diff --git a/checkpoint-107155/config.json b/checkpoint-107155/config.json new file mode 100644 index 0000000000000000000000000000000000000000..382a3e79497e514ac876eee8114c7079c255a204 --- /dev/null +++ b/checkpoint-107155/config.json @@ -0,0 +1,109 @@ +{ + "_name_or_path": "facebook/wav2vec2-base-960h", + "activation_dropout": 0.1, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForCTC" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "group", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.1, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 12, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 12, + "num_negatives": 100, + "output_hidden_size": 768, + "pad_token_id": 0, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} diff --git a/checkpoint-107155/optimizer.pt b/checkpoint-107155/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5d127c4dafbe94b596aacfefa99f3a1367dd683 --- /dev/null +++ b/checkpoint-107155/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a7d686b5754a70d5c75e505c4ea5147740ffa25ba0cac43cf31e301c43e7b0 +size 1847865 diff --git a/checkpoint-107155/preprocessor_config.json b/checkpoint-107155/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a0b7227fc1d916e469b14f6c154ad6dfea1e6891 --- /dev/null +++ b/checkpoint-107155/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-107155/pytorch_model.bin b/checkpoint-107155/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e071716bfc0534b83934af42235a6d5adf3604b6 --- /dev/null +++ b/checkpoint-107155/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a6f3c820658ca3b508455f5427a1af3951479fe4e2f6841b989596dbafa1216 +size 377656855 diff --git a/checkpoint-107155/rng_state.pth b/checkpoint-107155/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8305db7327b39d05f49499cde4f9e9688cc17775 --- /dev/null +++ b/checkpoint-107155/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acdbbd25619cb7ae5fe3b233f2b577dff65e3e3e681c76e1f9950b3baae7c295 +size 14503 diff --git a/checkpoint-107155/scaler.pt b/checkpoint-107155/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a030941c896e2bbb852d61b83fd857dab68d2b79 --- /dev/null +++ b/checkpoint-107155/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:befbef1ac86f28ff304c059d7a80966531e389df823000267a8d2ccbf6b488b3 +size 559 diff --git a/checkpoint-107155/scheduler.pt b/checkpoint-107155/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..36f5dbeadccf93699d1016cc92c75150593429de --- /dev/null +++ b/checkpoint-107155/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d51615ad2fc239d706165a66c41520877ba489ed9ddc3b1248104b22907676ae +size 623 diff --git a/checkpoint-107155/trainer_state.json b/checkpoint-107155/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2b8c170cf924a5d5dc430479ff408db8ff5d62a9 --- /dev/null +++ b/checkpoint-107155/trainer_state.json @@ -0,0 +1,128652 @@ +{ + "best_metric": 0.03575053811073303, + "best_model_checkpoint": "wav2vec2-base-pem123-960h-la/checkpoint-64293", + "epoch": 5.0, + "global_step": 107155, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 6.000000000000001e-08, + "loss": 2.6531, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.6e-07, + "loss": 3.4824, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 2.6e-07, + "loss": 3.2682, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 3.6e-07, + "loss": 3.2567, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.6000000000000004e-07, + "loss": 3.5979, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 5.6e-07, + "loss": 3.3327, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 6.6e-07, + "loss": 3.7519, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 7.6e-07, + "loss": 3.5748, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 8.6e-07, + "loss": 3.5357, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 9.400000000000001e-07, + "loss": 3.4531, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.04e-06, + "loss": 2.5381, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 1.14e-06, + "loss": 2.9048, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 1.2400000000000002e-06, + "loss": 3.0937, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 1.34e-06, + "loss": 3.0091, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 1.44e-06, + "loss": 2.8452, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.54e-06, + "loss": 2.6674, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 1.6400000000000002e-06, + "loss": 2.9619, + "step": 85 + }, + { + "epoch": 0.0, + "learning_rate": 1.74e-06, + "loss": 2.7327, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 1.8400000000000002e-06, + "loss": 2.7925, + "step": 95 + }, + { + "epoch": 0.0, + "learning_rate": 1.94e-06, + "loss": 3.0929, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.04e-06, + "loss": 1.7821, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 2.1400000000000003e-06, + "loss": 1.9388, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 2.24e-06, + "loss": 2.1683, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 2.3400000000000005e-06, + "loss": 1.8805, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 2.4400000000000004e-06, + "loss": 2.0734, + "step": 125 + }, + { + "epoch": 0.01, + "learning_rate": 2.5400000000000002e-06, + "loss": 2.0576, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 2.64e-06, + "loss": 1.778, + "step": 135 + }, + { + "epoch": 0.01, + "learning_rate": 2.7400000000000004e-06, + "loss": 1.866, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 2.84e-06, + "loss": 2.0255, + "step": 145 + }, + { + "epoch": 0.01, + "learning_rate": 2.9400000000000002e-06, + "loss": 2.1399, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 3.04e-06, + "loss": 1.4145, + "step": 155 + }, + { + "epoch": 0.01, + "learning_rate": 3.1400000000000004e-06, + "loss": 1.2365, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 3.2400000000000003e-06, + "loss": 1.5569, + "step": 165 + }, + { + "epoch": 0.01, + "learning_rate": 3.3400000000000006e-06, + "loss": 1.6138, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 3.44e-06, + "loss": 1.3237, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.54e-06, + "loss": 1.3709, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 3.6400000000000003e-06, + "loss": 1.475, + "step": 185 + }, + { + "epoch": 0.01, + "learning_rate": 3.74e-06, + "loss": 1.5188, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 3.8400000000000005e-06, + "loss": 1.7965, + "step": 195 + }, + { + "epoch": 0.01, + "learning_rate": 3.94e-06, + "loss": 1.9079, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.04e-06, + "loss": 1.1918, + "step": 205 + }, + { + "epoch": 0.01, + "learning_rate": 4.14e-06, + "loss": 0.9466, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 4.24e-06, + "loss": 1.186, + "step": 215 + }, + { + "epoch": 0.01, + "learning_rate": 4.34e-06, + "loss": 1.1864, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 4.440000000000001e-06, + "loss": 1.1844, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.540000000000001e-06, + "loss": 1.2449, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 4.6400000000000005e-06, + "loss": 1.5866, + "step": 235 + }, + { + "epoch": 0.01, + "learning_rate": 4.74e-06, + "loss": 1.3059, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 4.84e-06, + "loss": 1.4398, + "step": 245 + }, + { + "epoch": 0.01, + "learning_rate": 4.94e-06, + "loss": 1.8654, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.04e-06, + "loss": 1.2339, + "step": 255 + }, + { + "epoch": 0.01, + "learning_rate": 5.140000000000001e-06, + "loss": 0.8202, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 5.240000000000001e-06, + "loss": 1.151, + "step": 265 + }, + { + "epoch": 0.01, + "learning_rate": 5.3400000000000005e-06, + "loss": 1.1299, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 5.4400000000000004e-06, + "loss": 1.154, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.540000000000001e-06, + "loss": 1.2657, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 5.64e-06, + "loss": 1.3412, + "step": 285 + }, + { + "epoch": 0.01, + "learning_rate": 5.72e-06, + "loss": 1.2532, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 5.82e-06, + "loss": 1.5254, + "step": 295 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 1.9021, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.02e-06, + "loss": 1.2932, + "step": 305 + }, + { + "epoch": 0.01, + "learning_rate": 6.120000000000001e-06, + "loss": 0.882, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 6.220000000000001e-06, + "loss": 0.8607, + "step": 315 + }, + { + "epoch": 0.01, + "learning_rate": 6.3200000000000005e-06, + "loss": 0.9375, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 6.42e-06, + "loss": 1.0688, + "step": 325 + }, + { + "epoch": 0.02, + "learning_rate": 6.520000000000001e-06, + "loss": 1.0282, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 6.620000000000001e-06, + "loss": 1.1712, + "step": 335 + }, + { + "epoch": 0.02, + "learning_rate": 6.720000000000001e-06, + "loss": 1.3186, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 6.820000000000001e-06, + "loss": 1.3102, + "step": 345 + }, + { + "epoch": 0.02, + "learning_rate": 6.9e-06, + "loss": 2.0291, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 7e-06, + "loss": 1.0834, + "step": 355 + }, + { + "epoch": 0.02, + "learning_rate": 7.100000000000001e-06, + "loss": 0.7925, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 7.2000000000000005e-06, + "loss": 0.9559, + "step": 365 + }, + { + "epoch": 0.02, + "learning_rate": 7.3e-06, + "loss": 0.9066, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 7.4e-06, + "loss": 1.0408, + "step": 375 + }, + { + "epoch": 0.02, + "learning_rate": 7.500000000000001e-06, + "loss": 1.0672, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 7.600000000000001e-06, + "loss": 1.3249, + "step": 385 + }, + { + "epoch": 0.02, + "learning_rate": 7.7e-06, + "loss": 1.3579, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 7.800000000000002e-06, + "loss": 1.4037, + "step": 395 + }, + { + "epoch": 0.02, + "learning_rate": 7.9e-06, + "loss": 1.5432, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.9596, + "step": 405 + }, + { + "epoch": 0.02, + "learning_rate": 8.1e-06, + "loss": 0.6342, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 8.2e-06, + "loss": 0.8461, + "step": 415 + }, + { + "epoch": 0.02, + "learning_rate": 8.3e-06, + "loss": 0.9826, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 8.400000000000001e-06, + "loss": 0.9279, + "step": 425 + }, + { + "epoch": 0.02, + "learning_rate": 8.5e-06, + "loss": 0.8814, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 8.6e-06, + "loss": 1.1263, + "step": 435 + }, + { + "epoch": 0.02, + "learning_rate": 8.700000000000001e-06, + "loss": 1.0968, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 8.8e-06, + "loss": 1.2043, + "step": 445 + }, + { + "epoch": 0.02, + "learning_rate": 8.900000000000001e-06, + "loss": 1.5603, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 0.9924, + "step": 455 + }, + { + "epoch": 0.02, + "learning_rate": 9.100000000000001e-06, + "loss": 0.7293, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 9.200000000000002e-06, + "loss": 0.7576, + "step": 465 + }, + { + "epoch": 0.02, + "learning_rate": 9.3e-06, + "loss": 0.7923, + "step": 470 + }, + { + "epoch": 0.02, + "learning_rate": 9.4e-06, + "loss": 0.8264, + "step": 475 + }, + { + "epoch": 0.02, + "learning_rate": 9.5e-06, + "loss": 0.8031, + "step": 480 + }, + { + "epoch": 0.02, + "learning_rate": 9.600000000000001e-06, + "loss": 1.2293, + "step": 485 + }, + { + "epoch": 0.02, + "learning_rate": 9.7e-06, + "loss": 0.9651, + "step": 490 + }, + { + "epoch": 0.02, + "learning_rate": 9.800000000000001e-06, + "loss": 1.3314, + "step": 495 + }, + { + "epoch": 0.02, + "learning_rate": 9.9e-06, + "loss": 1.4383, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 1e-05, + "loss": 0.9384, + "step": 505 + }, + { + "epoch": 0.02, + "learning_rate": 1.0100000000000002e-05, + "loss": 0.6344, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 1.02e-05, + "loss": 0.8903, + "step": 515 + }, + { + "epoch": 0.02, + "learning_rate": 1.0300000000000001e-05, + "loss": 0.8112, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 1.04e-05, + "loss": 0.9797, + "step": 525 + }, + { + "epoch": 0.02, + "learning_rate": 1.0500000000000001e-05, + "loss": 0.7961, + "step": 530 + }, + { + "epoch": 0.02, + "learning_rate": 1.0600000000000002e-05, + "loss": 1.0021, + "step": 535 + }, + { + "epoch": 0.03, + "learning_rate": 1.0700000000000001e-05, + "loss": 1.111, + "step": 540 + }, + { + "epoch": 0.03, + "learning_rate": 1.0800000000000002e-05, + "loss": 1.0121, + "step": 545 + }, + { + "epoch": 0.03, + "learning_rate": 1.0900000000000002e-05, + "loss": 1.3162, + "step": 550 + }, + { + "epoch": 0.03, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.8775, + "step": 555 + }, + { + "epoch": 0.03, + "learning_rate": 1.1100000000000002e-05, + "loss": 0.6268, + "step": 560 + }, + { + "epoch": 0.03, + "learning_rate": 1.1200000000000001e-05, + "loss": 0.6093, + "step": 565 + }, + { + "epoch": 0.03, + "learning_rate": 1.13e-05, + "loss": 0.6371, + "step": 570 + }, + { + "epoch": 0.03, + "learning_rate": 1.14e-05, + "loss": 0.7299, + "step": 575 + }, + { + "epoch": 0.03, + "learning_rate": 1.15e-05, + "loss": 0.8892, + "step": 580 + }, + { + "epoch": 0.03, + "learning_rate": 1.16e-05, + "loss": 0.8902, + "step": 585 + }, + { + "epoch": 0.03, + "learning_rate": 1.17e-05, + "loss": 1.1263, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 1.18e-05, + "loss": 1.2628, + "step": 595 + }, + { + "epoch": 0.03, + "learning_rate": 1.1900000000000001e-05, + "loss": 1.4236, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 0.8066, + "step": 605 + }, + { + "epoch": 0.03, + "learning_rate": 1.2100000000000001e-05, + "loss": 0.6171, + "step": 610 + }, + { + "epoch": 0.03, + "learning_rate": 1.22e-05, + "loss": 0.6193, + "step": 615 + }, + { + "epoch": 0.03, + "learning_rate": 1.23e-05, + "loss": 0.7038, + "step": 620 + }, + { + "epoch": 0.03, + "learning_rate": 1.2400000000000002e-05, + "loss": 0.7382, + "step": 625 + }, + { + "epoch": 0.03, + "learning_rate": 1.25e-05, + "loss": 0.8153, + "step": 630 + }, + { + "epoch": 0.03, + "learning_rate": 1.2600000000000001e-05, + "loss": 0.8639, + "step": 635 + }, + { + "epoch": 0.03, + "learning_rate": 1.27e-05, + "loss": 0.985, + "step": 640 + }, + { + "epoch": 0.03, + "learning_rate": 1.2800000000000001e-05, + "loss": 0.9144, + "step": 645 + }, + { + "epoch": 0.03, + "learning_rate": 1.2900000000000002e-05, + "loss": 1.2459, + "step": 650 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.8517, + "step": 655 + }, + { + "epoch": 0.03, + "learning_rate": 1.3100000000000002e-05, + "loss": 0.4846, + "step": 660 + }, + { + "epoch": 0.03, + "learning_rate": 1.3200000000000002e-05, + "loss": 0.5826, + "step": 665 + }, + { + "epoch": 0.03, + "learning_rate": 1.3300000000000001e-05, + "loss": 0.7343, + "step": 670 + }, + { + "epoch": 0.03, + "learning_rate": 1.3400000000000002e-05, + "loss": 0.7328, + "step": 675 + }, + { + "epoch": 0.03, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.6546, + "step": 680 + }, + { + "epoch": 0.03, + "learning_rate": 1.3600000000000002e-05, + "loss": 0.8793, + "step": 685 + }, + { + "epoch": 0.03, + "learning_rate": 1.3700000000000003e-05, + "loss": 0.8999, + "step": 690 + }, + { + "epoch": 0.03, + "learning_rate": 1.38e-05, + "loss": 1.1491, + "step": 695 + }, + { + "epoch": 0.03, + "learning_rate": 1.39e-05, + "loss": 1.377, + "step": 700 + }, + { + "epoch": 0.03, + "learning_rate": 1.4e-05, + "loss": 0.7843, + "step": 705 + }, + { + "epoch": 0.03, + "learning_rate": 1.41e-05, + "loss": 0.622, + "step": 710 + }, + { + "epoch": 0.03, + "learning_rate": 1.4200000000000001e-05, + "loss": 0.5346, + "step": 715 + }, + { + "epoch": 0.03, + "learning_rate": 1.43e-05, + "loss": 0.6517, + "step": 720 + }, + { + "epoch": 0.03, + "learning_rate": 1.4400000000000001e-05, + "loss": 0.6661, + "step": 725 + }, + { + "epoch": 0.03, + "learning_rate": 1.45e-05, + "loss": 0.7379, + "step": 730 + }, + { + "epoch": 0.03, + "learning_rate": 1.46e-05, + "loss": 0.7839, + "step": 735 + }, + { + "epoch": 0.03, + "learning_rate": 1.4700000000000002e-05, + "loss": 1.0163, + "step": 740 + }, + { + "epoch": 0.03, + "learning_rate": 1.48e-05, + "loss": 0.9786, + "step": 745 + }, + { + "epoch": 0.03, + "learning_rate": 1.4900000000000001e-05, + "loss": 1.2756, + "step": 750 + }, + { + "epoch": 0.04, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.8289, + "step": 755 + }, + { + "epoch": 0.04, + "learning_rate": 1.5100000000000001e-05, + "loss": 0.5909, + "step": 760 + }, + { + "epoch": 0.04, + "learning_rate": 1.5200000000000002e-05, + "loss": 0.5347, + "step": 765 + }, + { + "epoch": 0.04, + "learning_rate": 1.5300000000000003e-05, + "loss": 0.7078, + "step": 770 + }, + { + "epoch": 0.04, + "learning_rate": 1.54e-05, + "loss": 0.6262, + "step": 775 + }, + { + "epoch": 0.04, + "learning_rate": 1.55e-05, + "loss": 0.8401, + "step": 780 + }, + { + "epoch": 0.04, + "learning_rate": 1.5600000000000003e-05, + "loss": 0.6788, + "step": 785 + }, + { + "epoch": 0.04, + "learning_rate": 1.5700000000000002e-05, + "loss": 0.9063, + "step": 790 + }, + { + "epoch": 0.04, + "learning_rate": 1.58e-05, + "loss": 0.9448, + "step": 795 + }, + { + "epoch": 0.04, + "learning_rate": 1.5900000000000004e-05, + "loss": 1.3078, + "step": 800 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7461, + "step": 805 + }, + { + "epoch": 0.04, + "learning_rate": 1.6100000000000002e-05, + "loss": 0.4522, + "step": 810 + }, + { + "epoch": 0.04, + "learning_rate": 1.62e-05, + "loss": 0.5883, + "step": 815 + }, + { + "epoch": 0.04, + "learning_rate": 1.63e-05, + "loss": 0.5923, + "step": 820 + }, + { + "epoch": 0.04, + "learning_rate": 1.64e-05, + "loss": 0.7269, + "step": 825 + }, + { + "epoch": 0.04, + "learning_rate": 1.65e-05, + "loss": 0.6916, + "step": 830 + }, + { + "epoch": 0.04, + "learning_rate": 1.66e-05, + "loss": 0.6976, + "step": 835 + }, + { + "epoch": 0.04, + "learning_rate": 1.67e-05, + "loss": 0.953, + "step": 840 + }, + { + "epoch": 0.04, + "learning_rate": 1.6800000000000002e-05, + "loss": 0.952, + "step": 845 + }, + { + "epoch": 0.04, + "learning_rate": 1.69e-05, + "loss": 1.4978, + "step": 850 + }, + { + "epoch": 0.04, + "learning_rate": 1.7e-05, + "loss": 0.8314, + "step": 855 + }, + { + "epoch": 0.04, + "learning_rate": 1.7100000000000002e-05, + "loss": 0.4179, + "step": 860 + }, + { + "epoch": 0.04, + "learning_rate": 1.72e-05, + "loss": 0.5123, + "step": 865 + }, + { + "epoch": 0.04, + "learning_rate": 1.73e-05, + "loss": 0.528, + "step": 870 + }, + { + "epoch": 0.04, + "learning_rate": 1.7400000000000003e-05, + "loss": 0.6553, + "step": 875 + }, + { + "epoch": 0.04, + "learning_rate": 1.7500000000000002e-05, + "loss": 0.8417, + "step": 880 + }, + { + "epoch": 0.04, + "learning_rate": 1.76e-05, + "loss": 0.7153, + "step": 885 + }, + { + "epoch": 0.04, + "learning_rate": 1.77e-05, + "loss": 0.6923, + "step": 890 + }, + { + "epoch": 0.04, + "learning_rate": 1.7800000000000002e-05, + "loss": 0.8491, + "step": 895 + }, + { + "epoch": 0.04, + "learning_rate": 1.79e-05, + "loss": 1.1041, + "step": 900 + }, + { + "epoch": 0.04, + "learning_rate": 1.8e-05, + "loss": 0.6685, + "step": 905 + }, + { + "epoch": 0.04, + "learning_rate": 1.8100000000000003e-05, + "loss": 0.467, + "step": 910 + }, + { + "epoch": 0.04, + "learning_rate": 1.8200000000000002e-05, + "loss": 0.478, + "step": 915 + }, + { + "epoch": 0.04, + "learning_rate": 1.83e-05, + "loss": 0.6318, + "step": 920 + }, + { + "epoch": 0.04, + "learning_rate": 1.8400000000000003e-05, + "loss": 0.5477, + "step": 925 + }, + { + "epoch": 0.04, + "learning_rate": 1.8500000000000002e-05, + "loss": 0.8122, + "step": 930 + }, + { + "epoch": 0.04, + "learning_rate": 1.86e-05, + "loss": 0.7658, + "step": 935 + }, + { + "epoch": 0.04, + "learning_rate": 1.8700000000000004e-05, + "loss": 0.8465, + "step": 940 + }, + { + "epoch": 0.04, + "learning_rate": 1.88e-05, + "loss": 0.8287, + "step": 945 + }, + { + "epoch": 0.04, + "learning_rate": 1.8900000000000002e-05, + "loss": 1.1613, + "step": 950 + }, + { + "epoch": 0.04, + "learning_rate": 1.9e-05, + "loss": 0.5815, + "step": 955 + }, + { + "epoch": 0.04, + "learning_rate": 1.91e-05, + "loss": 0.3932, + "step": 960 + }, + { + "epoch": 0.05, + "learning_rate": 1.9200000000000003e-05, + "loss": 0.3984, + "step": 965 + }, + { + "epoch": 0.05, + "learning_rate": 1.93e-05, + "loss": 0.5436, + "step": 970 + }, + { + "epoch": 0.05, + "learning_rate": 1.94e-05, + "loss": 0.5992, + "step": 975 + }, + { + "epoch": 0.05, + "learning_rate": 1.95e-05, + "loss": 0.6758, + "step": 980 + }, + { + "epoch": 0.05, + "learning_rate": 1.9600000000000002e-05, + "loss": 0.6634, + "step": 985 + }, + { + "epoch": 0.05, + "learning_rate": 1.97e-05, + "loss": 0.8048, + "step": 990 + }, + { + "epoch": 0.05, + "learning_rate": 1.98e-05, + "loss": 1.0593, + "step": 995 + }, + { + "epoch": 0.05, + "learning_rate": 1.9900000000000003e-05, + "loss": 1.3275, + "step": 1000 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 0.745, + "step": 1005 + }, + { + "epoch": 0.05, + "learning_rate": 1.9999216214945216e-05, + "loss": 0.4186, + "step": 1010 + }, + { + "epoch": 0.05, + "learning_rate": 1.999843242989043e-05, + "loss": 0.4657, + "step": 1015 + }, + { + "epoch": 0.05, + "learning_rate": 1.999764864483564e-05, + "loss": 0.3849, + "step": 1020 + }, + { + "epoch": 0.05, + "learning_rate": 1.9996864859780857e-05, + "loss": 0.6111, + "step": 1025 + }, + { + "epoch": 0.05, + "learning_rate": 1.9996081074726068e-05, + "loss": 0.5342, + "step": 1030 + }, + { + "epoch": 0.05, + "learning_rate": 1.999529728967128e-05, + "loss": 0.6535, + "step": 1035 + }, + { + "epoch": 0.05, + "learning_rate": 1.9994513504616496e-05, + "loss": 0.7321, + "step": 1040 + }, + { + "epoch": 0.05, + "learning_rate": 1.999372971956171e-05, + "loss": 0.7966, + "step": 1045 + }, + { + "epoch": 0.05, + "learning_rate": 1.9992945934506923e-05, + "loss": 1.1703, + "step": 1050 + }, + { + "epoch": 0.05, + "learning_rate": 1.9992162149452137e-05, + "loss": 0.6399, + "step": 1055 + }, + { + "epoch": 0.05, + "learning_rate": 1.9991378364397348e-05, + "loss": 0.3462, + "step": 1060 + }, + { + "epoch": 0.05, + "learning_rate": 1.9990594579342565e-05, + "loss": 0.4746, + "step": 1065 + }, + { + "epoch": 0.05, + "learning_rate": 1.9989810794287776e-05, + "loss": 0.4348, + "step": 1070 + }, + { + "epoch": 0.05, + "learning_rate": 1.998902700923299e-05, + "loss": 0.5812, + "step": 1075 + }, + { + "epoch": 0.05, + "learning_rate": 1.9988243224178203e-05, + "loss": 0.5924, + "step": 1080 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987459439123414e-05, + "loss": 0.6898, + "step": 1085 + }, + { + "epoch": 0.05, + "learning_rate": 1.998667565406863e-05, + "loss": 0.7083, + "step": 1090 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985891869013842e-05, + "loss": 0.9508, + "step": 1095 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985108083959056e-05, + "loss": 1.2479, + "step": 1100 + }, + { + "epoch": 0.05, + "learning_rate": 1.998432429890427e-05, + "loss": 0.6903, + "step": 1105 + }, + { + "epoch": 0.05, + "learning_rate": 1.9983540513849483e-05, + "loss": 0.3722, + "step": 1110 + }, + { + "epoch": 0.05, + "learning_rate": 1.9982756728794697e-05, + "loss": 0.4207, + "step": 1115 + }, + { + "epoch": 0.05, + "learning_rate": 1.998197294373991e-05, + "loss": 0.5722, + "step": 1120 + }, + { + "epoch": 0.05, + "learning_rate": 1.9981189158685125e-05, + "loss": 0.5865, + "step": 1125 + }, + { + "epoch": 0.05, + "learning_rate": 1.998040537363034e-05, + "loss": 0.4989, + "step": 1130 + }, + { + "epoch": 0.05, + "learning_rate": 1.997962158857555e-05, + "loss": 0.5998, + "step": 1135 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978837803520764e-05, + "loss": 0.9028, + "step": 1140 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978054018465977e-05, + "loss": 0.8579, + "step": 1145 + }, + { + "epoch": 0.05, + "learning_rate": 1.997727023341119e-05, + "loss": 1.2055, + "step": 1150 + }, + { + "epoch": 0.05, + "learning_rate": 1.9976486448356405e-05, + "loss": 0.6244, + "step": 1155 + }, + { + "epoch": 0.05, + "learning_rate": 1.9975702663301616e-05, + "loss": 0.3915, + "step": 1160 + }, + { + "epoch": 0.05, + "learning_rate": 1.9974918878246833e-05, + "loss": 0.4356, + "step": 1165 + }, + { + "epoch": 0.05, + "learning_rate": 1.9974135093192044e-05, + "loss": 0.5114, + "step": 1170 + }, + { + "epoch": 0.05, + "learning_rate": 1.9973351308137257e-05, + "loss": 0.5035, + "step": 1175 + }, + { + "epoch": 0.06, + "learning_rate": 1.997256752308247e-05, + "loss": 0.6821, + "step": 1180 + }, + { + "epoch": 0.06, + "learning_rate": 1.9971783738027685e-05, + "loss": 0.5494, + "step": 1185 + }, + { + "epoch": 0.06, + "learning_rate": 1.99709999529729e-05, + "loss": 0.6005, + "step": 1190 + }, + { + "epoch": 0.06, + "learning_rate": 1.9970216167918113e-05, + "loss": 0.8884, + "step": 1195 + }, + { + "epoch": 0.06, + "learning_rate": 1.9969432382863324e-05, + "loss": 0.9246, + "step": 1200 + }, + { + "epoch": 0.06, + "learning_rate": 1.9968648597808538e-05, + "loss": 0.5223, + "step": 1205 + }, + { + "epoch": 0.06, + "learning_rate": 1.996786481275375e-05, + "loss": 0.3661, + "step": 1210 + }, + { + "epoch": 0.06, + "learning_rate": 1.9967081027698965e-05, + "loss": 0.5004, + "step": 1215 + }, + { + "epoch": 0.06, + "learning_rate": 1.996629724264418e-05, + "loss": 0.4138, + "step": 1220 + }, + { + "epoch": 0.06, + "learning_rate": 1.9965513457589393e-05, + "loss": 0.6478, + "step": 1225 + }, + { + "epoch": 0.06, + "learning_rate": 1.9964729672534607e-05, + "loss": 0.5358, + "step": 1230 + }, + { + "epoch": 0.06, + "learning_rate": 1.9963945887479818e-05, + "loss": 0.664, + "step": 1235 + }, + { + "epoch": 0.06, + "learning_rate": 1.9963162102425035e-05, + "loss": 0.5272, + "step": 1240 + }, + { + "epoch": 0.06, + "learning_rate": 1.9962378317370245e-05, + "loss": 0.9186, + "step": 1245 + }, + { + "epoch": 0.06, + "learning_rate": 1.996159453231546e-05, + "loss": 1.262, + "step": 1250 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960810747260673e-05, + "loss": 0.5889, + "step": 1255 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960026962205887e-05, + "loss": 0.4323, + "step": 1260 + }, + { + "epoch": 0.06, + "learning_rate": 1.99592431771511e-05, + "loss": 0.3947, + "step": 1265 + }, + { + "epoch": 0.06, + "learning_rate": 1.995845939209631e-05, + "loss": 0.5637, + "step": 1270 + }, + { + "epoch": 0.06, + "learning_rate": 1.9957675607041525e-05, + "loss": 0.4977, + "step": 1275 + }, + { + "epoch": 0.06, + "learning_rate": 1.995689182198674e-05, + "loss": 0.5272, + "step": 1280 + }, + { + "epoch": 0.06, + "learning_rate": 1.9956108036931953e-05, + "loss": 0.5806, + "step": 1285 + }, + { + "epoch": 0.06, + "learning_rate": 1.9955324251877167e-05, + "loss": 0.7725, + "step": 1290 + }, + { + "epoch": 0.06, + "learning_rate": 1.995454046682238e-05, + "loss": 1.0007, + "step": 1295 + }, + { + "epoch": 0.06, + "learning_rate": 1.995375668176759e-05, + "loss": 1.2275, + "step": 1300 + }, + { + "epoch": 0.06, + "learning_rate": 1.995297289671281e-05, + "loss": 0.5902, + "step": 1305 + }, + { + "epoch": 0.06, + "learning_rate": 1.995218911165802e-05, + "loss": 0.3678, + "step": 1310 + }, + { + "epoch": 0.06, + "learning_rate": 1.9951405326603233e-05, + "loss": 0.3997, + "step": 1315 + }, + { + "epoch": 0.06, + "learning_rate": 1.9950621541548447e-05, + "loss": 0.3824, + "step": 1320 + }, + { + "epoch": 0.06, + "learning_rate": 1.994983775649366e-05, + "loss": 0.45, + "step": 1325 + }, + { + "epoch": 0.06, + "learning_rate": 1.9949053971438875e-05, + "loss": 0.4312, + "step": 1330 + }, + { + "epoch": 0.06, + "learning_rate": 1.9948270186384086e-05, + "loss": 0.6568, + "step": 1335 + }, + { + "epoch": 0.06, + "learning_rate": 1.9947486401329303e-05, + "loss": 0.7326, + "step": 1340 + }, + { + "epoch": 0.06, + "learning_rate": 1.9946702616274513e-05, + "loss": 1.6595, + "step": 1345 + }, + { + "epoch": 0.06, + "learning_rate": 1.9945918831219727e-05, + "loss": 1.1587, + "step": 1350 + }, + { + "epoch": 0.06, + "learning_rate": 1.994513504616494e-05, + "loss": 0.5287, + "step": 1355 + }, + { + "epoch": 0.06, + "learning_rate": 1.9944351261110155e-05, + "loss": 0.4248, + "step": 1360 + }, + { + "epoch": 0.06, + "learning_rate": 1.994356747605537e-05, + "loss": 0.4569, + "step": 1365 + }, + { + "epoch": 0.06, + "learning_rate": 1.9942783691000583e-05, + "loss": 0.4401, + "step": 1370 + }, + { + "epoch": 0.06, + "learning_rate": 1.9941999905945793e-05, + "loss": 0.475, + "step": 1375 + }, + { + "epoch": 0.06, + "learning_rate": 1.994121612089101e-05, + "loss": 0.5074, + "step": 1380 + }, + { + "epoch": 0.06, + "learning_rate": 1.994043233583622e-05, + "loss": 0.6305, + "step": 1385 + }, + { + "epoch": 0.06, + "learning_rate": 1.9939648550781435e-05, + "loss": 0.6242, + "step": 1390 + }, + { + "epoch": 0.07, + "learning_rate": 1.993886476572665e-05, + "loss": 0.6831, + "step": 1395 + }, + { + "epoch": 0.07, + "learning_rate": 1.993808098067186e-05, + "loss": 0.9001, + "step": 1400 + }, + { + "epoch": 0.07, + "learning_rate": 1.9937297195617077e-05, + "loss": 0.5865, + "step": 1405 + }, + { + "epoch": 0.07, + "learning_rate": 1.9936513410562287e-05, + "loss": 0.332, + "step": 1410 + }, + { + "epoch": 0.07, + "learning_rate": 1.99357296255075e-05, + "loss": 0.4441, + "step": 1415 + }, + { + "epoch": 0.07, + "learning_rate": 1.9934945840452715e-05, + "loss": 0.3415, + "step": 1420 + }, + { + "epoch": 0.07, + "learning_rate": 1.993416205539793e-05, + "loss": 0.4253, + "step": 1425 + }, + { + "epoch": 0.07, + "learning_rate": 1.9933378270343143e-05, + "loss": 0.594, + "step": 1430 + }, + { + "epoch": 0.07, + "learning_rate": 1.9932594485288357e-05, + "loss": 0.7563, + "step": 1435 + }, + { + "epoch": 0.07, + "learning_rate": 1.993181070023357e-05, + "loss": 0.7389, + "step": 1440 + }, + { + "epoch": 0.07, + "learning_rate": 1.9931026915178785e-05, + "loss": 0.8635, + "step": 1445 + }, + { + "epoch": 0.07, + "learning_rate": 1.9930243130123995e-05, + "loss": 1.0338, + "step": 1450 + }, + { + "epoch": 0.07, + "learning_rate": 1.9929459345069212e-05, + "loss": 0.4737, + "step": 1455 + }, + { + "epoch": 0.07, + "learning_rate": 1.9928675560014423e-05, + "loss": 0.2898, + "step": 1460 + }, + { + "epoch": 0.07, + "learning_rate": 1.9927891774959637e-05, + "loss": 0.3928, + "step": 1465 + }, + { + "epoch": 0.07, + "learning_rate": 1.992710798990485e-05, + "loss": 0.4041, + "step": 1470 + }, + { + "epoch": 0.07, + "learning_rate": 1.992632420485006e-05, + "loss": 0.4331, + "step": 1475 + }, + { + "epoch": 0.07, + "learning_rate": 1.992554041979528e-05, + "loss": 0.5609, + "step": 1480 + }, + { + "epoch": 0.07, + "learning_rate": 1.992475663474049e-05, + "loss": 0.545, + "step": 1485 + }, + { + "epoch": 0.07, + "learning_rate": 1.9923972849685703e-05, + "loss": 0.6846, + "step": 1490 + }, + { + "epoch": 0.07, + "learning_rate": 1.9923189064630917e-05, + "loss": 0.7526, + "step": 1495 + }, + { + "epoch": 0.07, + "learning_rate": 1.992240527957613e-05, + "loss": 1.0243, + "step": 1500 + }, + { + "epoch": 0.07, + "learning_rate": 1.9921621494521345e-05, + "loss": 0.5665, + "step": 1505 + }, + { + "epoch": 0.07, + "learning_rate": 1.992083770946656e-05, + "loss": 0.3, + "step": 1510 + }, + { + "epoch": 0.07, + "learning_rate": 1.992005392441177e-05, + "loss": 0.3819, + "step": 1515 + }, + { + "epoch": 0.07, + "learning_rate": 1.9919270139356986e-05, + "loss": 0.4012, + "step": 1520 + }, + { + "epoch": 0.07, + "learning_rate": 1.9918486354302197e-05, + "loss": 0.4596, + "step": 1525 + }, + { + "epoch": 0.07, + "learning_rate": 1.991770256924741e-05, + "loss": 0.5021, + "step": 1530 + }, + { + "epoch": 0.07, + "learning_rate": 1.9916918784192625e-05, + "loss": 0.4451, + "step": 1535 + }, + { + "epoch": 0.07, + "learning_rate": 1.991613499913784e-05, + "loss": 0.704, + "step": 1540 + }, + { + "epoch": 0.07, + "learning_rate": 1.9915351214083053e-05, + "loss": 0.6392, + "step": 1545 + }, + { + "epoch": 0.07, + "learning_rate": 1.9914567429028263e-05, + "loss": 1.1391, + "step": 1550 + }, + { + "epoch": 0.07, + "learning_rate": 1.991378364397348e-05, + "loss": 0.5388, + "step": 1555 + }, + { + "epoch": 0.07, + "learning_rate": 1.991299985891869e-05, + "loss": 0.3753, + "step": 1560 + }, + { + "epoch": 0.07, + "learning_rate": 1.9912216073863905e-05, + "loss": 0.3058, + "step": 1565 + }, + { + "epoch": 0.07, + "learning_rate": 1.991143228880912e-05, + "loss": 0.4206, + "step": 1570 + }, + { + "epoch": 0.07, + "learning_rate": 1.9910648503754333e-05, + "loss": 0.3922, + "step": 1575 + }, + { + "epoch": 0.07, + "learning_rate": 1.9909864718699547e-05, + "loss": 0.4388, + "step": 1580 + }, + { + "epoch": 0.07, + "learning_rate": 1.990908093364476e-05, + "loss": 0.485, + "step": 1585 + }, + { + "epoch": 0.07, + "learning_rate": 1.990829714858997e-05, + "loss": 0.7441, + "step": 1590 + }, + { + "epoch": 0.07, + "learning_rate": 1.9907513363535185e-05, + "loss": 0.7177, + "step": 1595 + }, + { + "epoch": 0.07, + "learning_rate": 1.99067295784804e-05, + "loss": 1.129, + "step": 1600 + }, + { + "epoch": 0.07, + "learning_rate": 1.9905945793425613e-05, + "loss": 0.4943, + "step": 1605 + }, + { + "epoch": 0.08, + "learning_rate": 1.9905162008370827e-05, + "loss": 0.299, + "step": 1610 + }, + { + "epoch": 0.08, + "learning_rate": 1.9904378223316037e-05, + "loss": 0.4898, + "step": 1615 + }, + { + "epoch": 0.08, + "learning_rate": 1.9903594438261254e-05, + "loss": 0.3973, + "step": 1620 + }, + { + "epoch": 0.08, + "learning_rate": 1.9902810653206465e-05, + "loss": 0.4336, + "step": 1625 + }, + { + "epoch": 0.08, + "learning_rate": 1.990202686815168e-05, + "loss": 0.4543, + "step": 1630 + }, + { + "epoch": 0.08, + "learning_rate": 1.9901243083096893e-05, + "loss": 0.4828, + "step": 1635 + }, + { + "epoch": 0.08, + "learning_rate": 1.9900459298042107e-05, + "loss": 0.7448, + "step": 1640 + }, + { + "epoch": 0.08, + "learning_rate": 1.989967551298732e-05, + "loss": 0.6852, + "step": 1645 + }, + { + "epoch": 0.08, + "learning_rate": 1.9898891727932534e-05, + "loss": 0.9936, + "step": 1650 + }, + { + "epoch": 0.08, + "learning_rate": 1.989810794287775e-05, + "loss": 0.4738, + "step": 1655 + }, + { + "epoch": 0.08, + "learning_rate": 1.989732415782296e-05, + "loss": 0.2747, + "step": 1660 + }, + { + "epoch": 0.08, + "learning_rate": 1.9896540372768173e-05, + "loss": 0.2839, + "step": 1665 + }, + { + "epoch": 0.08, + "learning_rate": 1.9895756587713387e-05, + "loss": 0.3596, + "step": 1670 + }, + { + "epoch": 0.08, + "learning_rate": 1.98949728026586e-05, + "loss": 0.4797, + "step": 1675 + }, + { + "epoch": 0.08, + "learning_rate": 1.9894189017603815e-05, + "loss": 0.4667, + "step": 1680 + }, + { + "epoch": 0.08, + "learning_rate": 1.989340523254903e-05, + "loss": 0.5085, + "step": 1685 + }, + { + "epoch": 0.08, + "learning_rate": 1.989262144749424e-05, + "loss": 0.6464, + "step": 1690 + }, + { + "epoch": 0.08, + "learning_rate": 1.9891837662439456e-05, + "loss": 0.7383, + "step": 1695 + }, + { + "epoch": 0.08, + "learning_rate": 1.9891053877384667e-05, + "loss": 1.1763, + "step": 1700 + }, + { + "epoch": 0.08, + "learning_rate": 1.989027009232988e-05, + "loss": 0.4347, + "step": 1705 + }, + { + "epoch": 0.08, + "learning_rate": 1.9889486307275095e-05, + "loss": 0.3092, + "step": 1710 + }, + { + "epoch": 0.08, + "learning_rate": 1.988870252222031e-05, + "loss": 0.3977, + "step": 1715 + }, + { + "epoch": 0.08, + "learning_rate": 1.9887918737165522e-05, + "loss": 0.3769, + "step": 1720 + }, + { + "epoch": 0.08, + "learning_rate": 1.9887134952110733e-05, + "loss": 0.4335, + "step": 1725 + }, + { + "epoch": 0.08, + "learning_rate": 1.9886351167055947e-05, + "loss": 0.5907, + "step": 1730 + }, + { + "epoch": 0.08, + "learning_rate": 1.988556738200116e-05, + "loss": 0.5505, + "step": 1735 + }, + { + "epoch": 0.08, + "learning_rate": 1.9884783596946375e-05, + "loss": 0.515, + "step": 1740 + }, + { + "epoch": 0.08, + "learning_rate": 1.988399981189159e-05, + "loss": 0.7169, + "step": 1745 + }, + { + "epoch": 0.08, + "learning_rate": 1.9883216026836802e-05, + "loss": 1.006, + "step": 1750 + }, + { + "epoch": 0.08, + "learning_rate": 1.9882432241782016e-05, + "loss": 0.5365, + "step": 1755 + }, + { + "epoch": 0.08, + "learning_rate": 1.988164845672723e-05, + "loss": 0.2144, + "step": 1760 + }, + { + "epoch": 0.08, + "learning_rate": 1.988086467167244e-05, + "loss": 0.4321, + "step": 1765 + }, + { + "epoch": 0.08, + "learning_rate": 1.9880080886617658e-05, + "loss": 0.4422, + "step": 1770 + }, + { + "epoch": 0.08, + "learning_rate": 1.987929710156287e-05, + "loss": 0.3549, + "step": 1775 + }, + { + "epoch": 0.08, + "learning_rate": 1.9878513316508082e-05, + "loss": 0.418, + "step": 1780 + }, + { + "epoch": 0.08, + "learning_rate": 1.9877729531453296e-05, + "loss": 0.5311, + "step": 1785 + }, + { + "epoch": 0.08, + "learning_rate": 1.9876945746398507e-05, + "loss": 0.5819, + "step": 1790 + }, + { + "epoch": 0.08, + "learning_rate": 1.9876161961343724e-05, + "loss": 0.7497, + "step": 1795 + }, + { + "epoch": 0.08, + "learning_rate": 1.9875378176288935e-05, + "loss": 1.267, + "step": 1800 + }, + { + "epoch": 0.08, + "learning_rate": 1.987459439123415e-05, + "loss": 0.5148, + "step": 1805 + }, + { + "epoch": 0.08, + "learning_rate": 1.9873810606179363e-05, + "loss": 0.3745, + "step": 1810 + }, + { + "epoch": 0.08, + "learning_rate": 1.9873026821124576e-05, + "loss": 0.3499, + "step": 1815 + }, + { + "epoch": 0.08, + "learning_rate": 1.987224303606979e-05, + "loss": 0.3857, + "step": 1820 + }, + { + "epoch": 0.09, + "learning_rate": 1.9871459251015004e-05, + "loss": 0.3592, + "step": 1825 + }, + { + "epoch": 0.09, + "learning_rate": 1.9870675465960215e-05, + "loss": 0.4503, + "step": 1830 + }, + { + "epoch": 0.09, + "learning_rate": 1.9869891680905432e-05, + "loss": 0.5993, + "step": 1835 + }, + { + "epoch": 0.09, + "learning_rate": 1.9869107895850643e-05, + "loss": 0.7032, + "step": 1840 + }, + { + "epoch": 0.09, + "learning_rate": 1.9868324110795856e-05, + "loss": 0.7298, + "step": 1845 + }, + { + "epoch": 0.09, + "learning_rate": 1.986754032574107e-05, + "loss": 1.0798, + "step": 1850 + }, + { + "epoch": 0.09, + "learning_rate": 1.9866756540686284e-05, + "loss": 0.5357, + "step": 1855 + }, + { + "epoch": 0.09, + "learning_rate": 1.9865972755631498e-05, + "loss": 0.3394, + "step": 1860 + }, + { + "epoch": 0.09, + "learning_rate": 1.986518897057671e-05, + "loss": 0.2815, + "step": 1865 + }, + { + "epoch": 0.09, + "learning_rate": 1.9864405185521926e-05, + "loss": 0.3833, + "step": 1870 + }, + { + "epoch": 0.09, + "learning_rate": 1.9863621400467137e-05, + "loss": 0.3657, + "step": 1875 + }, + { + "epoch": 0.09, + "learning_rate": 1.986283761541235e-05, + "loss": 0.4627, + "step": 1880 + }, + { + "epoch": 0.09, + "learning_rate": 1.9862053830357564e-05, + "loss": 0.5369, + "step": 1885 + }, + { + "epoch": 0.09, + "learning_rate": 1.9861270045302778e-05, + "loss": 0.6602, + "step": 1890 + }, + { + "epoch": 0.09, + "learning_rate": 1.9860486260247992e-05, + "loss": 0.5755, + "step": 1895 + }, + { + "epoch": 0.09, + "learning_rate": 1.9859702475193206e-05, + "loss": 1.0243, + "step": 1900 + }, + { + "epoch": 0.09, + "learning_rate": 1.9858918690138417e-05, + "loss": 0.6036, + "step": 1905 + }, + { + "epoch": 0.09, + "learning_rate": 1.9858134905083634e-05, + "loss": 0.2902, + "step": 1910 + }, + { + "epoch": 0.09, + "learning_rate": 1.9857351120028844e-05, + "loss": 0.3318, + "step": 1915 + }, + { + "epoch": 0.09, + "learning_rate": 1.9856567334974058e-05, + "loss": 0.3193, + "step": 1920 + }, + { + "epoch": 0.09, + "learning_rate": 1.9855783549919272e-05, + "loss": 0.4973, + "step": 1925 + }, + { + "epoch": 0.09, + "learning_rate": 1.9854999764864483e-05, + "loss": 0.5275, + "step": 1930 + }, + { + "epoch": 0.09, + "learning_rate": 1.98542159798097e-05, + "loss": 0.4504, + "step": 1935 + }, + { + "epoch": 0.09, + "learning_rate": 1.985343219475491e-05, + "loss": 0.528, + "step": 1940 + }, + { + "epoch": 0.09, + "learning_rate": 1.9852648409700124e-05, + "loss": 0.8864, + "step": 1945 + }, + { + "epoch": 0.09, + "learning_rate": 1.985186462464534e-05, + "loss": 0.9087, + "step": 1950 + }, + { + "epoch": 0.09, + "learning_rate": 1.9851080839590552e-05, + "loss": 0.5253, + "step": 1955 + }, + { + "epoch": 0.09, + "learning_rate": 1.9850297054535766e-05, + "loss": 0.3372, + "step": 1960 + }, + { + "epoch": 0.09, + "learning_rate": 1.984951326948098e-05, + "loss": 0.3391, + "step": 1965 + }, + { + "epoch": 0.09, + "learning_rate": 1.9848729484426194e-05, + "loss": 0.3418, + "step": 1970 + }, + { + "epoch": 0.09, + "learning_rate": 1.9847945699371408e-05, + "loss": 0.3695, + "step": 1975 + }, + { + "epoch": 0.09, + "learning_rate": 1.984716191431662e-05, + "loss": 0.4408, + "step": 1980 + }, + { + "epoch": 0.09, + "learning_rate": 1.9846378129261832e-05, + "loss": 0.4869, + "step": 1985 + }, + { + "epoch": 0.09, + "learning_rate": 1.9845594344207046e-05, + "loss": 0.4849, + "step": 1990 + }, + { + "epoch": 0.09, + "learning_rate": 1.984481055915226e-05, + "loss": 0.6679, + "step": 1995 + }, + { + "epoch": 0.09, + "learning_rate": 1.9844026774097474e-05, + "loss": 0.9504, + "step": 2000 + }, + { + "epoch": 0.09, + "learning_rate": 1.9843242989042685e-05, + "loss": 0.4752, + "step": 2005 + }, + { + "epoch": 0.09, + "learning_rate": 1.9842459203987902e-05, + "loss": 0.2645, + "step": 2010 + }, + { + "epoch": 0.09, + "learning_rate": 1.9841675418933112e-05, + "loss": 0.2796, + "step": 2015 + }, + { + "epoch": 0.09, + "learning_rate": 1.9840891633878326e-05, + "loss": 0.4134, + "step": 2020 + }, + { + "epoch": 0.09, + "learning_rate": 1.984010784882354e-05, + "loss": 0.4873, + "step": 2025 + }, + { + "epoch": 0.09, + "learning_rate": 1.9839324063768754e-05, + "loss": 0.5061, + "step": 2030 + }, + { + "epoch": 0.09, + "learning_rate": 1.9838540278713968e-05, + "loss": 0.5597, + "step": 2035 + }, + { + "epoch": 0.1, + "learning_rate": 1.9837756493659182e-05, + "loss": 0.5494, + "step": 2040 + }, + { + "epoch": 0.1, + "learning_rate": 1.9836972708604392e-05, + "loss": 0.7516, + "step": 2045 + }, + { + "epoch": 0.1, + "learning_rate": 1.9836188923549606e-05, + "loss": 1.0119, + "step": 2050 + }, + { + "epoch": 0.1, + "learning_rate": 1.983540513849482e-05, + "loss": 0.4939, + "step": 2055 + }, + { + "epoch": 0.1, + "learning_rate": 1.9834621353440034e-05, + "loss": 0.3384, + "step": 2060 + }, + { + "epoch": 0.1, + "learning_rate": 1.9833837568385248e-05, + "loss": 0.292, + "step": 2065 + }, + { + "epoch": 0.1, + "learning_rate": 1.9833053783330462e-05, + "loss": 0.3244, + "step": 2070 + }, + { + "epoch": 0.1, + "learning_rate": 1.9832269998275676e-05, + "loss": 0.3806, + "step": 2075 + }, + { + "epoch": 0.1, + "learning_rate": 1.9831486213220886e-05, + "loss": 0.4527, + "step": 2080 + }, + { + "epoch": 0.1, + "learning_rate": 1.9830702428166104e-05, + "loss": 0.5149, + "step": 2085 + }, + { + "epoch": 0.1, + "learning_rate": 1.9829918643111314e-05, + "loss": 0.7397, + "step": 2090 + }, + { + "epoch": 0.1, + "learning_rate": 1.9829134858056528e-05, + "loss": 0.7535, + "step": 2095 + }, + { + "epoch": 0.1, + "learning_rate": 1.9828351073001742e-05, + "loss": 0.9471, + "step": 2100 + }, + { + "epoch": 0.1, + "learning_rate": 1.9827567287946956e-05, + "loss": 0.4706, + "step": 2105 + }, + { + "epoch": 0.1, + "learning_rate": 1.982678350289217e-05, + "loss": 0.3369, + "step": 2110 + }, + { + "epoch": 0.1, + "learning_rate": 1.982599971783738e-05, + "loss": 0.3418, + "step": 2115 + }, + { + "epoch": 0.1, + "learning_rate": 1.9825215932782594e-05, + "loss": 0.3639, + "step": 2120 + }, + { + "epoch": 0.1, + "learning_rate": 1.9824432147727808e-05, + "loss": 0.4024, + "step": 2125 + }, + { + "epoch": 0.1, + "learning_rate": 1.9823648362673022e-05, + "loss": 0.4048, + "step": 2130 + }, + { + "epoch": 0.1, + "learning_rate": 1.9822864577618236e-05, + "loss": 0.6293, + "step": 2135 + }, + { + "epoch": 0.1, + "learning_rate": 1.982208079256345e-05, + "loss": 0.6106, + "step": 2140 + }, + { + "epoch": 0.1, + "learning_rate": 1.982129700750866e-05, + "loss": 0.6771, + "step": 2145 + }, + { + "epoch": 0.1, + "learning_rate": 1.9820513222453878e-05, + "loss": 1.2044, + "step": 2150 + }, + { + "epoch": 0.1, + "learning_rate": 1.9819729437399088e-05, + "loss": 0.5152, + "step": 2155 + }, + { + "epoch": 0.1, + "learning_rate": 1.9818945652344302e-05, + "loss": 0.2559, + "step": 2160 + }, + { + "epoch": 0.1, + "learning_rate": 1.9818161867289516e-05, + "loss": 0.2496, + "step": 2165 + }, + { + "epoch": 0.1, + "learning_rate": 1.981737808223473e-05, + "loss": 0.2577, + "step": 2170 + }, + { + "epoch": 0.1, + "learning_rate": 1.9816594297179944e-05, + "loss": 0.5568, + "step": 2175 + }, + { + "epoch": 0.1, + "learning_rate": 1.9815810512125154e-05, + "loss": 0.4338, + "step": 2180 + }, + { + "epoch": 0.1, + "learning_rate": 1.981502672707037e-05, + "loss": 0.6521, + "step": 2185 + }, + { + "epoch": 0.1, + "learning_rate": 1.9814242942015582e-05, + "loss": 0.5643, + "step": 2190 + }, + { + "epoch": 0.1, + "learning_rate": 1.9813459156960796e-05, + "loss": 0.4993, + "step": 2195 + }, + { + "epoch": 0.1, + "learning_rate": 1.981267537190601e-05, + "loss": 1.2342, + "step": 2200 + }, + { + "epoch": 0.1, + "learning_rate": 1.9811891586851224e-05, + "loss": 0.4659, + "step": 2205 + }, + { + "epoch": 0.1, + "learning_rate": 1.9811107801796438e-05, + "loss": 0.2984, + "step": 2210 + }, + { + "epoch": 0.1, + "learning_rate": 1.981032401674165e-05, + "loss": 0.3362, + "step": 2215 + }, + { + "epoch": 0.1, + "learning_rate": 1.9809540231686862e-05, + "loss": 0.2982, + "step": 2220 + }, + { + "epoch": 0.1, + "learning_rate": 1.980875644663208e-05, + "loss": 0.3995, + "step": 2225 + }, + { + "epoch": 0.1, + "learning_rate": 1.980797266157729e-05, + "loss": 0.4959, + "step": 2230 + }, + { + "epoch": 0.1, + "learning_rate": 1.9807188876522504e-05, + "loss": 0.5604, + "step": 2235 + }, + { + "epoch": 0.1, + "learning_rate": 1.9806405091467718e-05, + "loss": 0.6278, + "step": 2240 + }, + { + "epoch": 0.1, + "learning_rate": 1.9805621306412928e-05, + "loss": 0.751, + "step": 2245 + }, + { + "epoch": 0.1, + "learning_rate": 1.9804837521358146e-05, + "loss": 1.2158, + "step": 2250 + }, + { + "epoch": 0.11, + "learning_rate": 1.9804053736303356e-05, + "loss": 0.441, + "step": 2255 + }, + { + "epoch": 0.11, + "learning_rate": 1.980326995124857e-05, + "loss": 0.2743, + "step": 2260 + }, + { + "epoch": 0.11, + "learning_rate": 1.9802486166193784e-05, + "loss": 0.4457, + "step": 2265 + }, + { + "epoch": 0.11, + "learning_rate": 1.9801702381138998e-05, + "loss": 0.4269, + "step": 2270 + }, + { + "epoch": 0.11, + "learning_rate": 1.9800918596084212e-05, + "loss": 0.3709, + "step": 2275 + }, + { + "epoch": 0.11, + "learning_rate": 1.9800134811029426e-05, + "loss": 0.4595, + "step": 2280 + }, + { + "epoch": 0.11, + "learning_rate": 1.979935102597464e-05, + "loss": 0.6209, + "step": 2285 + }, + { + "epoch": 0.11, + "learning_rate": 1.9798567240919853e-05, + "loss": 0.5724, + "step": 2290 + }, + { + "epoch": 0.11, + "learning_rate": 1.9797783455865064e-05, + "loss": 0.7098, + "step": 2295 + }, + { + "epoch": 0.11, + "learning_rate": 1.979699967081028e-05, + "loss": 1.0224, + "step": 2300 + }, + { + "epoch": 0.11, + "learning_rate": 1.9796215885755492e-05, + "loss": 0.4748, + "step": 2305 + }, + { + "epoch": 0.11, + "learning_rate": 1.9795432100700706e-05, + "loss": 0.188, + "step": 2310 + }, + { + "epoch": 0.11, + "learning_rate": 1.979464831564592e-05, + "loss": 0.2832, + "step": 2315 + }, + { + "epoch": 0.11, + "learning_rate": 1.979386453059113e-05, + "loss": 0.3772, + "step": 2320 + }, + { + "epoch": 0.11, + "learning_rate": 1.9793080745536347e-05, + "loss": 0.3791, + "step": 2325 + }, + { + "epoch": 0.11, + "learning_rate": 1.9792296960481558e-05, + "loss": 0.4206, + "step": 2330 + }, + { + "epoch": 0.11, + "learning_rate": 1.9791513175426772e-05, + "loss": 0.591, + "step": 2335 + }, + { + "epoch": 0.11, + "learning_rate": 1.9790729390371986e-05, + "loss": 0.5566, + "step": 2340 + }, + { + "epoch": 0.11, + "learning_rate": 1.97899456053172e-05, + "loss": 0.7117, + "step": 2345 + }, + { + "epoch": 0.11, + "learning_rate": 1.9789161820262414e-05, + "loss": 0.9647, + "step": 2350 + }, + { + "epoch": 0.11, + "learning_rate": 1.9788378035207627e-05, + "loss": 0.4322, + "step": 2355 + }, + { + "epoch": 0.11, + "learning_rate": 1.9787594250152838e-05, + "loss": 0.3008, + "step": 2360 + }, + { + "epoch": 0.11, + "learning_rate": 1.9786810465098055e-05, + "loss": 0.3233, + "step": 2365 + }, + { + "epoch": 0.11, + "learning_rate": 1.9786026680043266e-05, + "loss": 0.4009, + "step": 2370 + }, + { + "epoch": 0.11, + "learning_rate": 1.978524289498848e-05, + "loss": 0.378, + "step": 2375 + }, + { + "epoch": 0.11, + "learning_rate": 1.9784459109933694e-05, + "loss": 0.4942, + "step": 2380 + }, + { + "epoch": 0.11, + "learning_rate": 1.9783675324878907e-05, + "loss": 0.3688, + "step": 2385 + }, + { + "epoch": 0.11, + "learning_rate": 1.978289153982412e-05, + "loss": 0.6061, + "step": 2390 + }, + { + "epoch": 0.11, + "learning_rate": 1.9782107754769332e-05, + "loss": 0.5694, + "step": 2395 + }, + { + "epoch": 0.11, + "learning_rate": 1.978132396971455e-05, + "loss": 0.9538, + "step": 2400 + }, + { + "epoch": 0.11, + "learning_rate": 1.978054018465976e-05, + "loss": 0.4312, + "step": 2405 + }, + { + "epoch": 0.11, + "learning_rate": 1.9779756399604974e-05, + "loss": 0.2543, + "step": 2410 + }, + { + "epoch": 0.11, + "learning_rate": 1.9778972614550188e-05, + "loss": 0.231, + "step": 2415 + }, + { + "epoch": 0.11, + "learning_rate": 1.97781888294954e-05, + "loss": 0.2848, + "step": 2420 + }, + { + "epoch": 0.11, + "learning_rate": 1.9777405044440615e-05, + "loss": 0.3575, + "step": 2425 + }, + { + "epoch": 0.11, + "learning_rate": 1.977662125938583e-05, + "loss": 0.4481, + "step": 2430 + }, + { + "epoch": 0.11, + "learning_rate": 1.977583747433104e-05, + "loss": 0.4305, + "step": 2435 + }, + { + "epoch": 0.11, + "learning_rate": 1.9775053689276254e-05, + "loss": 0.7923, + "step": 2440 + }, + { + "epoch": 0.11, + "learning_rate": 1.9774269904221468e-05, + "loss": 0.5847, + "step": 2445 + }, + { + "epoch": 0.11, + "learning_rate": 1.977348611916668e-05, + "loss": 1.0685, + "step": 2450 + }, + { + "epoch": 0.11, + "learning_rate": 1.9772702334111895e-05, + "loss": 0.5119, + "step": 2455 + }, + { + "epoch": 0.11, + "learning_rate": 1.9771918549057106e-05, + "loss": 0.2529, + "step": 2460 + }, + { + "epoch": 0.12, + "learning_rate": 1.9771134764002323e-05, + "loss": 0.2534, + "step": 2465 + }, + { + "epoch": 0.12, + "learning_rate": 1.9770350978947534e-05, + "loss": 0.4104, + "step": 2470 + }, + { + "epoch": 0.12, + "learning_rate": 1.9769567193892748e-05, + "loss": 0.4233, + "step": 2475 + }, + { + "epoch": 0.12, + "learning_rate": 1.976878340883796e-05, + "loss": 0.3437, + "step": 2480 + }, + { + "epoch": 0.12, + "learning_rate": 1.9767999623783175e-05, + "loss": 0.5363, + "step": 2485 + }, + { + "epoch": 0.12, + "learning_rate": 1.976721583872839e-05, + "loss": 0.6265, + "step": 2490 + }, + { + "epoch": 0.12, + "learning_rate": 1.9766432053673603e-05, + "loss": 0.6902, + "step": 2495 + }, + { + "epoch": 0.12, + "learning_rate": 1.9765648268618817e-05, + "loss": 1.1218, + "step": 2500 + }, + { + "epoch": 0.12, + "learning_rate": 1.9764864483564028e-05, + "loss": 0.3805, + "step": 2505 + }, + { + "epoch": 0.12, + "learning_rate": 1.976408069850924e-05, + "loss": 0.2308, + "step": 2510 + }, + { + "epoch": 0.12, + "learning_rate": 1.9763296913454455e-05, + "loss": 0.2854, + "step": 2515 + }, + { + "epoch": 0.12, + "learning_rate": 1.976251312839967e-05, + "loss": 0.3471, + "step": 2520 + }, + { + "epoch": 0.12, + "learning_rate": 1.9761729343344883e-05, + "loss": 0.418, + "step": 2525 + }, + { + "epoch": 0.12, + "learning_rate": 1.9760945558290097e-05, + "loss": 0.3995, + "step": 2530 + }, + { + "epoch": 0.12, + "learning_rate": 1.9760161773235308e-05, + "loss": 0.5414, + "step": 2535 + }, + { + "epoch": 0.12, + "learning_rate": 1.9759377988180525e-05, + "loss": 0.4674, + "step": 2540 + }, + { + "epoch": 0.12, + "learning_rate": 1.9758594203125736e-05, + "loss": 0.548, + "step": 2545 + }, + { + "epoch": 0.12, + "learning_rate": 1.975781041807095e-05, + "loss": 1.1554, + "step": 2550 + }, + { + "epoch": 0.12, + "learning_rate": 1.9757026633016163e-05, + "loss": 0.4927, + "step": 2555 + }, + { + "epoch": 0.12, + "learning_rate": 1.9756242847961377e-05, + "loss": 0.2384, + "step": 2560 + }, + { + "epoch": 0.12, + "learning_rate": 1.975545906290659e-05, + "loss": 0.2467, + "step": 2565 + }, + { + "epoch": 0.12, + "learning_rate": 1.97546752778518e-05, + "loss": 0.2739, + "step": 2570 + }, + { + "epoch": 0.12, + "learning_rate": 1.9753891492797016e-05, + "loss": 0.4415, + "step": 2575 + }, + { + "epoch": 0.12, + "learning_rate": 1.975310770774223e-05, + "loss": 0.3919, + "step": 2580 + }, + { + "epoch": 0.12, + "learning_rate": 1.9752323922687443e-05, + "loss": 0.4875, + "step": 2585 + }, + { + "epoch": 0.12, + "learning_rate": 1.9751540137632657e-05, + "loss": 0.5478, + "step": 2590 + }, + { + "epoch": 0.12, + "learning_rate": 1.975075635257787e-05, + "loss": 0.6074, + "step": 2595 + }, + { + "epoch": 0.12, + "learning_rate": 1.9749972567523085e-05, + "loss": 0.7679, + "step": 2600 + }, + { + "epoch": 0.12, + "learning_rate": 1.97491887824683e-05, + "loss": 0.5413, + "step": 2605 + }, + { + "epoch": 0.12, + "learning_rate": 1.974840499741351e-05, + "loss": 0.2023, + "step": 2610 + }, + { + "epoch": 0.12, + "learning_rate": 1.9747621212358727e-05, + "loss": 0.2183, + "step": 2615 + }, + { + "epoch": 0.12, + "learning_rate": 1.9746837427303937e-05, + "loss": 0.327, + "step": 2620 + }, + { + "epoch": 0.12, + "learning_rate": 1.974605364224915e-05, + "loss": 0.3329, + "step": 2625 + }, + { + "epoch": 0.12, + "learning_rate": 1.9745269857194365e-05, + "loss": 0.3636, + "step": 2630 + }, + { + "epoch": 0.12, + "learning_rate": 1.9744486072139576e-05, + "loss": 0.4454, + "step": 2635 + }, + { + "epoch": 0.12, + "learning_rate": 1.9743702287084793e-05, + "loss": 0.5881, + "step": 2640 + }, + { + "epoch": 0.12, + "learning_rate": 1.9742918502030003e-05, + "loss": 0.6546, + "step": 2645 + }, + { + "epoch": 0.12, + "learning_rate": 1.9742134716975217e-05, + "loss": 0.9867, + "step": 2650 + }, + { + "epoch": 0.12, + "learning_rate": 1.974135093192043e-05, + "loss": 0.4603, + "step": 2655 + }, + { + "epoch": 0.12, + "learning_rate": 1.9740567146865645e-05, + "loss": 0.2764, + "step": 2660 + }, + { + "epoch": 0.12, + "learning_rate": 1.973978336181086e-05, + "loss": 0.2423, + "step": 2665 + }, + { + "epoch": 0.12, + "learning_rate": 1.9738999576756073e-05, + "loss": 0.2468, + "step": 2670 + }, + { + "epoch": 0.12, + "learning_rate": 1.9738215791701284e-05, + "loss": 0.365, + "step": 2675 + }, + { + "epoch": 0.13, + "learning_rate": 1.97374320066465e-05, + "loss": 0.4589, + "step": 2680 + }, + { + "epoch": 0.13, + "learning_rate": 1.973664822159171e-05, + "loss": 0.4907, + "step": 2685 + }, + { + "epoch": 0.13, + "learning_rate": 1.9735864436536925e-05, + "loss": 0.5466, + "step": 2690 + }, + { + "epoch": 0.13, + "learning_rate": 1.973508065148214e-05, + "loss": 0.4969, + "step": 2695 + }, + { + "epoch": 0.13, + "learning_rate": 1.9734296866427353e-05, + "loss": 1.0733, + "step": 2700 + }, + { + "epoch": 0.13, + "learning_rate": 1.9733513081372567e-05, + "loss": 0.4684, + "step": 2705 + }, + { + "epoch": 0.13, + "learning_rate": 1.9732729296317777e-05, + "loss": 0.2367, + "step": 2710 + }, + { + "epoch": 0.13, + "learning_rate": 1.9731945511262995e-05, + "loss": 0.2683, + "step": 2715 + }, + { + "epoch": 0.13, + "learning_rate": 1.9731161726208205e-05, + "loss": 0.2503, + "step": 2720 + }, + { + "epoch": 0.13, + "learning_rate": 1.973037794115342e-05, + "loss": 0.3385, + "step": 2725 + }, + { + "epoch": 0.13, + "learning_rate": 1.9729594156098633e-05, + "loss": 0.3533, + "step": 2730 + }, + { + "epoch": 0.13, + "learning_rate": 1.9728810371043847e-05, + "loss": 0.434, + "step": 2735 + }, + { + "epoch": 0.13, + "learning_rate": 1.972802658598906e-05, + "loss": 0.4045, + "step": 2740 + }, + { + "epoch": 0.13, + "learning_rate": 1.9727242800934275e-05, + "loss": 0.5531, + "step": 2745 + }, + { + "epoch": 0.13, + "learning_rate": 1.9726459015879485e-05, + "loss": 0.8087, + "step": 2750 + }, + { + "epoch": 0.13, + "learning_rate": 1.9725675230824703e-05, + "loss": 0.4245, + "step": 2755 + }, + { + "epoch": 0.13, + "learning_rate": 1.9724891445769913e-05, + "loss": 0.2777, + "step": 2760 + }, + { + "epoch": 0.13, + "learning_rate": 1.9724107660715127e-05, + "loss": 0.3036, + "step": 2765 + }, + { + "epoch": 0.13, + "learning_rate": 1.972332387566034e-05, + "loss": 0.4066, + "step": 2770 + }, + { + "epoch": 0.13, + "learning_rate": 1.972254009060555e-05, + "loss": 0.3239, + "step": 2775 + }, + { + "epoch": 0.13, + "learning_rate": 1.972175630555077e-05, + "loss": 0.3395, + "step": 2780 + }, + { + "epoch": 0.13, + "learning_rate": 1.972097252049598e-05, + "loss": 0.4637, + "step": 2785 + }, + { + "epoch": 0.13, + "learning_rate": 1.9720188735441193e-05, + "loss": 0.486, + "step": 2790 + }, + { + "epoch": 0.13, + "learning_rate": 1.9719404950386407e-05, + "loss": 0.6314, + "step": 2795 + }, + { + "epoch": 0.13, + "learning_rate": 1.971862116533162e-05, + "loss": 0.8526, + "step": 2800 + }, + { + "epoch": 0.13, + "learning_rate": 1.9717837380276835e-05, + "loss": 0.4288, + "step": 2805 + }, + { + "epoch": 0.13, + "learning_rate": 1.971705359522205e-05, + "loss": 0.227, + "step": 2810 + }, + { + "epoch": 0.13, + "learning_rate": 1.9716269810167263e-05, + "loss": 0.2939, + "step": 2815 + }, + { + "epoch": 0.13, + "learning_rate": 1.9715486025112477e-05, + "loss": 0.2735, + "step": 2820 + }, + { + "epoch": 0.13, + "learning_rate": 1.9714702240057687e-05, + "loss": 0.3667, + "step": 2825 + }, + { + "epoch": 0.13, + "learning_rate": 1.97139184550029e-05, + "loss": 0.385, + "step": 2830 + }, + { + "epoch": 0.13, + "learning_rate": 1.9713134669948115e-05, + "loss": 0.3763, + "step": 2835 + }, + { + "epoch": 0.13, + "learning_rate": 1.971235088489333e-05, + "loss": 0.4141, + "step": 2840 + }, + { + "epoch": 0.13, + "learning_rate": 1.9711567099838543e-05, + "loss": 0.6103, + "step": 2845 + }, + { + "epoch": 0.13, + "learning_rate": 1.9710783314783753e-05, + "loss": 0.7695, + "step": 2850 + }, + { + "epoch": 0.13, + "learning_rate": 1.970999952972897e-05, + "loss": 0.4827, + "step": 2855 + }, + { + "epoch": 0.13, + "learning_rate": 1.970921574467418e-05, + "loss": 0.2578, + "step": 2860 + }, + { + "epoch": 0.13, + "learning_rate": 1.9708431959619395e-05, + "loss": 0.2754, + "step": 2865 + }, + { + "epoch": 0.13, + "learning_rate": 1.970764817456461e-05, + "loss": 0.2874, + "step": 2870 + }, + { + "epoch": 0.13, + "learning_rate": 1.9706864389509823e-05, + "loss": 0.2498, + "step": 2875 + }, + { + "epoch": 0.13, + "learning_rate": 1.9706080604455037e-05, + "loss": 0.5192, + "step": 2880 + }, + { + "epoch": 0.13, + "learning_rate": 1.970529681940025e-05, + "loss": 0.3749, + "step": 2885 + }, + { + "epoch": 0.13, + "learning_rate": 1.970451303434546e-05, + "loss": 0.5461, + "step": 2890 + }, + { + "epoch": 0.14, + "learning_rate": 1.9703729249290675e-05, + "loss": 0.6363, + "step": 2895 + }, + { + "epoch": 0.14, + "learning_rate": 1.970294546423589e-05, + "loss": 1.0204, + "step": 2900 + }, + { + "epoch": 0.14, + "learning_rate": 1.9702161679181103e-05, + "loss": 0.4849, + "step": 2905 + }, + { + "epoch": 0.14, + "learning_rate": 1.9701377894126317e-05, + "loss": 0.1746, + "step": 2910 + }, + { + "epoch": 0.14, + "learning_rate": 1.970059410907153e-05, + "loss": 0.259, + "step": 2915 + }, + { + "epoch": 0.14, + "learning_rate": 1.9699810324016745e-05, + "loss": 0.3586, + "step": 2920 + }, + { + "epoch": 0.14, + "learning_rate": 1.9699026538961955e-05, + "loss": 0.3875, + "step": 2925 + }, + { + "epoch": 0.14, + "learning_rate": 1.9698242753907172e-05, + "loss": 0.3089, + "step": 2930 + }, + { + "epoch": 0.14, + "learning_rate": 1.9697458968852383e-05, + "loss": 0.4891, + "step": 2935 + }, + { + "epoch": 0.14, + "learning_rate": 1.9696675183797597e-05, + "loss": 0.4279, + "step": 2940 + }, + { + "epoch": 0.14, + "learning_rate": 1.969589139874281e-05, + "loss": 0.6177, + "step": 2945 + }, + { + "epoch": 0.14, + "learning_rate": 1.9695107613688025e-05, + "loss": 0.8893, + "step": 2950 + }, + { + "epoch": 0.14, + "learning_rate": 1.969432382863324e-05, + "loss": 0.3806, + "step": 2955 + }, + { + "epoch": 0.14, + "learning_rate": 1.969354004357845e-05, + "loss": 0.2398, + "step": 2960 + }, + { + "epoch": 0.14, + "learning_rate": 1.9692756258523663e-05, + "loss": 0.2188, + "step": 2965 + }, + { + "epoch": 0.14, + "learning_rate": 1.9691972473468877e-05, + "loss": 0.3182, + "step": 2970 + }, + { + "epoch": 0.14, + "learning_rate": 1.969118868841409e-05, + "loss": 0.3459, + "step": 2975 + }, + { + "epoch": 0.14, + "learning_rate": 1.9690404903359305e-05, + "loss": 0.2882, + "step": 2980 + }, + { + "epoch": 0.14, + "learning_rate": 1.968962111830452e-05, + "loss": 0.3212, + "step": 2985 + }, + { + "epoch": 0.14, + "learning_rate": 1.968899409026069e-05, + "loss": 0.4588, + "step": 2990 + }, + { + "epoch": 0.14, + "learning_rate": 1.9688210305205903e-05, + "loss": 0.6138, + "step": 2995 + }, + { + "epoch": 0.14, + "learning_rate": 1.9687426520151117e-05, + "loss": 0.837, + "step": 3000 + }, + { + "epoch": 0.14, + "learning_rate": 1.9686642735096327e-05, + "loss": 0.3779, + "step": 3005 + }, + { + "epoch": 0.14, + "learning_rate": 1.9685858950041545e-05, + "loss": 0.2237, + "step": 3010 + }, + { + "epoch": 0.14, + "learning_rate": 1.9685075164986755e-05, + "loss": 0.3279, + "step": 3015 + }, + { + "epoch": 0.14, + "learning_rate": 1.968429137993197e-05, + "loss": 0.2848, + "step": 3020 + }, + { + "epoch": 0.14, + "learning_rate": 1.9683507594877183e-05, + "loss": 0.3314, + "step": 3025 + }, + { + "epoch": 0.14, + "learning_rate": 1.9682723809822394e-05, + "loss": 0.3744, + "step": 3030 + }, + { + "epoch": 0.14, + "learning_rate": 1.968194002476761e-05, + "loss": 0.4589, + "step": 3035 + }, + { + "epoch": 0.14, + "learning_rate": 1.968115623971282e-05, + "loss": 0.6012, + "step": 3040 + }, + { + "epoch": 0.14, + "learning_rate": 1.9680372454658035e-05, + "loss": 0.625, + "step": 3045 + }, + { + "epoch": 0.14, + "learning_rate": 1.967958866960325e-05, + "loss": 0.9265, + "step": 3050 + }, + { + "epoch": 0.14, + "learning_rate": 1.9678804884548463e-05, + "loss": 0.4029, + "step": 3055 + }, + { + "epoch": 0.14, + "learning_rate": 1.9678021099493677e-05, + "loss": 0.2582, + "step": 3060 + }, + { + "epoch": 0.14, + "learning_rate": 1.967723731443889e-05, + "loss": 0.3223, + "step": 3065 + }, + { + "epoch": 0.14, + "learning_rate": 1.96764535293841e-05, + "loss": 0.2428, + "step": 3070 + }, + { + "epoch": 0.14, + "learning_rate": 1.967566974432932e-05, + "loss": 0.3014, + "step": 3075 + }, + { + "epoch": 0.14, + "learning_rate": 1.967488595927453e-05, + "loss": 0.452, + "step": 3080 + }, + { + "epoch": 0.14, + "learning_rate": 1.9674102174219743e-05, + "loss": 0.3358, + "step": 3085 + }, + { + "epoch": 0.14, + "learning_rate": 1.9673318389164957e-05, + "loss": 0.4613, + "step": 3090 + }, + { + "epoch": 0.14, + "learning_rate": 1.967253460411017e-05, + "loss": 0.5618, + "step": 3095 + }, + { + "epoch": 0.14, + "learning_rate": 1.9671750819055385e-05, + "loss": 0.7322, + "step": 3100 + }, + { + "epoch": 0.14, + "learning_rate": 1.9670967034000595e-05, + "loss": 0.3473, + "step": 3105 + }, + { + "epoch": 0.15, + "learning_rate": 1.9670183248945813e-05, + "loss": 0.2555, + "step": 3110 + }, + { + "epoch": 0.15, + "learning_rate": 1.9669399463891023e-05, + "loss": 0.2629, + "step": 3115 + }, + { + "epoch": 0.15, + "learning_rate": 1.9668615678836237e-05, + "loss": 0.3049, + "step": 3120 + }, + { + "epoch": 0.15, + "learning_rate": 1.966783189378145e-05, + "loss": 0.3242, + "step": 3125 + }, + { + "epoch": 0.15, + "learning_rate": 1.9667048108726665e-05, + "loss": 0.3861, + "step": 3130 + }, + { + "epoch": 0.15, + "learning_rate": 1.966626432367188e-05, + "loss": 0.4608, + "step": 3135 + }, + { + "epoch": 0.15, + "learning_rate": 1.9665480538617093e-05, + "loss": 0.4212, + "step": 3140 + }, + { + "epoch": 0.15, + "learning_rate": 1.9664696753562303e-05, + "loss": 0.6555, + "step": 3145 + }, + { + "epoch": 0.15, + "learning_rate": 1.966391296850752e-05, + "loss": 0.7641, + "step": 3150 + }, + { + "epoch": 0.15, + "learning_rate": 1.966312918345273e-05, + "loss": 0.4017, + "step": 3155 + }, + { + "epoch": 0.15, + "learning_rate": 1.9662345398397945e-05, + "loss": 0.1657, + "step": 3160 + }, + { + "epoch": 0.15, + "learning_rate": 1.966156161334316e-05, + "loss": 0.2806, + "step": 3165 + }, + { + "epoch": 0.15, + "learning_rate": 1.966077782828837e-05, + "loss": 0.2702, + "step": 3170 + }, + { + "epoch": 0.15, + "learning_rate": 1.9659994043233587e-05, + "loss": 0.3138, + "step": 3175 + }, + { + "epoch": 0.15, + "learning_rate": 1.9659210258178797e-05, + "loss": 0.4255, + "step": 3180 + }, + { + "epoch": 0.15, + "learning_rate": 1.965842647312401e-05, + "loss": 0.3488, + "step": 3185 + }, + { + "epoch": 0.15, + "learning_rate": 1.9657642688069225e-05, + "loss": 0.5055, + "step": 3190 + }, + { + "epoch": 0.15, + "learning_rate": 1.965685890301444e-05, + "loss": 0.5626, + "step": 3195 + }, + { + "epoch": 0.15, + "learning_rate": 1.9656075117959653e-05, + "loss": 0.8002, + "step": 3200 + }, + { + "epoch": 0.15, + "learning_rate": 1.9655291332904867e-05, + "loss": 0.4219, + "step": 3205 + }, + { + "epoch": 0.15, + "learning_rate": 1.965450754785008e-05, + "loss": 0.2463, + "step": 3210 + }, + { + "epoch": 0.15, + "learning_rate": 1.9653723762795294e-05, + "loss": 0.2057, + "step": 3215 + }, + { + "epoch": 0.15, + "learning_rate": 1.9652939977740505e-05, + "loss": 0.326, + "step": 3220 + }, + { + "epoch": 0.15, + "learning_rate": 1.965215619268572e-05, + "loss": 0.3563, + "step": 3225 + }, + { + "epoch": 0.15, + "learning_rate": 1.9651372407630933e-05, + "loss": 0.4195, + "step": 3230 + }, + { + "epoch": 0.15, + "learning_rate": 1.9650588622576147e-05, + "loss": 0.3367, + "step": 3235 + }, + { + "epoch": 0.15, + "learning_rate": 1.964980483752136e-05, + "loss": 0.545, + "step": 3240 + }, + { + "epoch": 0.15, + "learning_rate": 1.964902105246657e-05, + "loss": 0.6454, + "step": 3245 + }, + { + "epoch": 0.15, + "learning_rate": 1.964823726741179e-05, + "loss": 0.7225, + "step": 3250 + }, + { + "epoch": 0.15, + "learning_rate": 1.9647453482357e-05, + "loss": 0.4606, + "step": 3255 + }, + { + "epoch": 0.15, + "learning_rate": 1.9646669697302213e-05, + "loss": 0.2679, + "step": 3260 + }, + { + "epoch": 0.15, + "learning_rate": 1.9645885912247427e-05, + "loss": 0.2538, + "step": 3265 + }, + { + "epoch": 0.15, + "learning_rate": 1.964510212719264e-05, + "loss": 0.2758, + "step": 3270 + }, + { + "epoch": 0.15, + "learning_rate": 1.9644318342137855e-05, + "loss": 0.3146, + "step": 3275 + }, + { + "epoch": 0.15, + "learning_rate": 1.964353455708307e-05, + "loss": 0.2646, + "step": 3280 + }, + { + "epoch": 0.15, + "learning_rate": 1.964275077202828e-05, + "loss": 0.3698, + "step": 3285 + }, + { + "epoch": 0.15, + "learning_rate": 1.9641966986973493e-05, + "loss": 0.505, + "step": 3290 + }, + { + "epoch": 0.15, + "learning_rate": 1.9641183201918707e-05, + "loss": 0.5847, + "step": 3295 + }, + { + "epoch": 0.15, + "learning_rate": 1.964039941686392e-05, + "loss": 0.7694, + "step": 3300 + }, + { + "epoch": 0.15, + "learning_rate": 1.9639615631809135e-05, + "loss": 0.4151, + "step": 3305 + }, + { + "epoch": 0.15, + "learning_rate": 1.963883184675435e-05, + "loss": 0.2161, + "step": 3310 + }, + { + "epoch": 0.15, + "learning_rate": 1.9638048061699562e-05, + "loss": 0.3054, + "step": 3315 + }, + { + "epoch": 0.15, + "learning_rate": 1.9637264276644773e-05, + "loss": 0.2491, + "step": 3320 + }, + { + "epoch": 0.16, + "learning_rate": 1.963648049158999e-05, + "loss": 0.2946, + "step": 3325 + }, + { + "epoch": 0.16, + "learning_rate": 1.96356967065352e-05, + "loss": 0.3118, + "step": 3330 + }, + { + "epoch": 0.16, + "learning_rate": 1.9634912921480415e-05, + "loss": 0.463, + "step": 3335 + }, + { + "epoch": 0.16, + "learning_rate": 1.963412913642563e-05, + "loss": 0.3721, + "step": 3340 + }, + { + "epoch": 0.16, + "learning_rate": 1.9633345351370842e-05, + "loss": 0.6009, + "step": 3345 + }, + { + "epoch": 0.16, + "learning_rate": 1.9632561566316056e-05, + "loss": 0.8445, + "step": 3350 + }, + { + "epoch": 0.16, + "learning_rate": 1.9631777781261267e-05, + "loss": 0.4554, + "step": 3355 + }, + { + "epoch": 0.16, + "learning_rate": 1.963099399620648e-05, + "loss": 0.2121, + "step": 3360 + }, + { + "epoch": 0.16, + "learning_rate": 1.9630210211151695e-05, + "loss": 0.2767, + "step": 3365 + }, + { + "epoch": 0.16, + "learning_rate": 1.962942642609691e-05, + "loss": 0.2726, + "step": 3370 + }, + { + "epoch": 0.16, + "learning_rate": 1.9628642641042123e-05, + "loss": 0.4073, + "step": 3375 + }, + { + "epoch": 0.16, + "learning_rate": 1.9627858855987336e-05, + "loss": 0.4101, + "step": 3380 + }, + { + "epoch": 0.16, + "learning_rate": 1.9627075070932547e-05, + "loss": 0.4055, + "step": 3385 + }, + { + "epoch": 0.16, + "learning_rate": 1.9626291285877764e-05, + "loss": 0.4796, + "step": 3390 + }, + { + "epoch": 0.16, + "learning_rate": 1.9625507500822975e-05, + "loss": 0.6178, + "step": 3395 + }, + { + "epoch": 0.16, + "learning_rate": 1.962472371576819e-05, + "loss": 0.8758, + "step": 3400 + }, + { + "epoch": 0.16, + "learning_rate": 1.9623939930713403e-05, + "loss": 0.4028, + "step": 3405 + }, + { + "epoch": 0.16, + "learning_rate": 1.9623156145658616e-05, + "loss": 0.2257, + "step": 3410 + }, + { + "epoch": 0.16, + "learning_rate": 1.962237236060383e-05, + "loss": 0.2765, + "step": 3415 + }, + { + "epoch": 0.16, + "learning_rate": 1.962158857554904e-05, + "loss": 0.2894, + "step": 3420 + }, + { + "epoch": 0.16, + "learning_rate": 1.9620804790494258e-05, + "loss": 0.2995, + "step": 3425 + }, + { + "epoch": 0.16, + "learning_rate": 1.962002100543947e-05, + "loss": 0.3938, + "step": 3430 + }, + { + "epoch": 0.16, + "learning_rate": 1.9619237220384683e-05, + "loss": 0.4108, + "step": 3435 + }, + { + "epoch": 0.16, + "learning_rate": 1.9618453435329897e-05, + "loss": 0.4647, + "step": 3440 + }, + { + "epoch": 0.16, + "learning_rate": 1.961766965027511e-05, + "loss": 0.5126, + "step": 3445 + }, + { + "epoch": 0.16, + "learning_rate": 1.9616885865220324e-05, + "loss": 0.8012, + "step": 3450 + }, + { + "epoch": 0.16, + "learning_rate": 1.9616102080165538e-05, + "loss": 0.4131, + "step": 3455 + }, + { + "epoch": 0.16, + "learning_rate": 1.961531829511075e-05, + "loss": 0.2214, + "step": 3460 + }, + { + "epoch": 0.16, + "learning_rate": 1.9614534510055966e-05, + "loss": 0.2513, + "step": 3465 + }, + { + "epoch": 0.16, + "learning_rate": 1.9613750725001177e-05, + "loss": 0.3865, + "step": 3470 + }, + { + "epoch": 0.16, + "learning_rate": 1.961296693994639e-05, + "loss": 0.3377, + "step": 3475 + }, + { + "epoch": 0.16, + "learning_rate": 1.9612183154891604e-05, + "loss": 0.3707, + "step": 3480 + }, + { + "epoch": 0.16, + "learning_rate": 1.9611399369836818e-05, + "loss": 0.4052, + "step": 3485 + }, + { + "epoch": 0.16, + "learning_rate": 1.9610615584782032e-05, + "loss": 0.4237, + "step": 3490 + }, + { + "epoch": 0.16, + "learning_rate": 1.9609831799727243e-05, + "loss": 0.466, + "step": 3495 + }, + { + "epoch": 0.16, + "learning_rate": 1.9609048014672457e-05, + "loss": 0.791, + "step": 3500 + }, + { + "epoch": 0.16, + "learning_rate": 1.960826422961767e-05, + "loss": 0.3511, + "step": 3505 + }, + { + "epoch": 0.16, + "learning_rate": 1.9607480444562884e-05, + "loss": 0.1677, + "step": 3510 + }, + { + "epoch": 0.16, + "learning_rate": 1.96066966595081e-05, + "loss": 0.2672, + "step": 3515 + }, + { + "epoch": 0.16, + "learning_rate": 1.9605912874453312e-05, + "loss": 0.27, + "step": 3520 + }, + { + "epoch": 0.16, + "learning_rate": 1.9605129089398526e-05, + "loss": 0.3319, + "step": 3525 + }, + { + "epoch": 0.16, + "learning_rate": 1.960434530434374e-05, + "loss": 0.3659, + "step": 3530 + }, + { + "epoch": 0.16, + "learning_rate": 1.960356151928895e-05, + "loss": 0.3154, + "step": 3535 + }, + { + "epoch": 0.17, + "learning_rate": 1.9602777734234168e-05, + "loss": 0.4368, + "step": 3540 + }, + { + "epoch": 0.17, + "learning_rate": 1.960199394917938e-05, + "loss": 0.6238, + "step": 3545 + }, + { + "epoch": 0.17, + "learning_rate": 1.9601210164124592e-05, + "loss": 0.868, + "step": 3550 + }, + { + "epoch": 0.17, + "learning_rate": 1.9600426379069806e-05, + "loss": 0.439, + "step": 3555 + }, + { + "epoch": 0.17, + "learning_rate": 1.9599642594015017e-05, + "loss": 0.1922, + "step": 3560 + }, + { + "epoch": 0.17, + "learning_rate": 1.9598858808960234e-05, + "loss": 0.2233, + "step": 3565 + }, + { + "epoch": 0.17, + "learning_rate": 1.9598075023905445e-05, + "loss": 0.3184, + "step": 3570 + }, + { + "epoch": 0.17, + "learning_rate": 1.959729123885066e-05, + "loss": 0.3508, + "step": 3575 + }, + { + "epoch": 0.17, + "learning_rate": 1.9596507453795872e-05, + "loss": 0.2921, + "step": 3580 + }, + { + "epoch": 0.17, + "learning_rate": 1.9595723668741086e-05, + "loss": 0.3405, + "step": 3585 + }, + { + "epoch": 0.17, + "learning_rate": 1.95949398836863e-05, + "loss": 0.5112, + "step": 3590 + }, + { + "epoch": 0.17, + "learning_rate": 1.9594156098631514e-05, + "loss": 0.4812, + "step": 3595 + }, + { + "epoch": 0.17, + "learning_rate": 1.9593372313576725e-05, + "loss": 0.8224, + "step": 3600 + }, + { + "epoch": 0.17, + "learning_rate": 1.9592588528521942e-05, + "loss": 0.4547, + "step": 3605 + }, + { + "epoch": 0.17, + "learning_rate": 1.9591804743467152e-05, + "loss": 0.1878, + "step": 3610 + }, + { + "epoch": 0.17, + "learning_rate": 1.9591020958412366e-05, + "loss": 0.2636, + "step": 3615 + }, + { + "epoch": 0.17, + "learning_rate": 1.959023717335758e-05, + "loss": 0.2779, + "step": 3620 + }, + { + "epoch": 0.17, + "learning_rate": 1.9589453388302794e-05, + "loss": 0.2307, + "step": 3625 + }, + { + "epoch": 0.17, + "learning_rate": 1.9588669603248008e-05, + "loss": 0.3005, + "step": 3630 + }, + { + "epoch": 0.17, + "learning_rate": 1.958788581819322e-05, + "loss": 0.3427, + "step": 3635 + }, + { + "epoch": 0.17, + "learning_rate": 1.9587102033138436e-05, + "loss": 0.4767, + "step": 3640 + }, + { + "epoch": 0.17, + "learning_rate": 1.9586318248083646e-05, + "loss": 0.4229, + "step": 3645 + }, + { + "epoch": 0.17, + "learning_rate": 1.958553446302886e-05, + "loss": 0.8545, + "step": 3650 + }, + { + "epoch": 0.17, + "learning_rate": 1.9584750677974074e-05, + "loss": 0.4065, + "step": 3655 + }, + { + "epoch": 0.17, + "learning_rate": 1.9583966892919288e-05, + "loss": 0.1509, + "step": 3660 + }, + { + "epoch": 0.17, + "learning_rate": 1.9583183107864502e-05, + "loss": 0.2052, + "step": 3665 + }, + { + "epoch": 0.17, + "learning_rate": 1.9582399322809716e-05, + "loss": 0.2437, + "step": 3670 + }, + { + "epoch": 0.17, + "learning_rate": 1.9581615537754926e-05, + "loss": 0.3728, + "step": 3675 + }, + { + "epoch": 0.17, + "learning_rate": 1.958083175270014e-05, + "loss": 0.3682, + "step": 3680 + }, + { + "epoch": 0.17, + "learning_rate": 1.9580047967645354e-05, + "loss": 0.4861, + "step": 3685 + }, + { + "epoch": 0.17, + "learning_rate": 1.9579264182590568e-05, + "loss": 0.4066, + "step": 3690 + }, + { + "epoch": 0.17, + "learning_rate": 1.9578480397535782e-05, + "loss": 0.45, + "step": 3695 + }, + { + "epoch": 0.17, + "learning_rate": 1.9577696612480993e-05, + "loss": 0.7415, + "step": 3700 + }, + { + "epoch": 0.17, + "learning_rate": 1.957691282742621e-05, + "loss": 0.4888, + "step": 3705 + }, + { + "epoch": 0.17, + "learning_rate": 1.957612904237142e-05, + "loss": 0.2174, + "step": 3710 + }, + { + "epoch": 0.17, + "learning_rate": 1.9575345257316634e-05, + "loss": 0.2577, + "step": 3715 + }, + { + "epoch": 0.17, + "learning_rate": 1.9574561472261848e-05, + "loss": 0.2525, + "step": 3720 + }, + { + "epoch": 0.17, + "learning_rate": 1.9573777687207062e-05, + "loss": 0.3246, + "step": 3725 + }, + { + "epoch": 0.17, + "learning_rate": 1.9572993902152276e-05, + "loss": 0.3091, + "step": 3730 + }, + { + "epoch": 0.17, + "learning_rate": 1.957221011709749e-05, + "loss": 0.3636, + "step": 3735 + }, + { + "epoch": 0.17, + "learning_rate": 1.9571426332042704e-05, + "loss": 0.4908, + "step": 3740 + }, + { + "epoch": 0.17, + "learning_rate": 1.9570642546987914e-05, + "loss": 0.444, + "step": 3745 + }, + { + "epoch": 0.17, + "learning_rate": 1.9569858761933128e-05, + "loss": 0.6554, + "step": 3750 + }, + { + "epoch": 0.18, + "learning_rate": 1.9569074976878342e-05, + "loss": 0.3102, + "step": 3755 + }, + { + "epoch": 0.18, + "learning_rate": 1.9568291191823556e-05, + "loss": 0.2166, + "step": 3760 + }, + { + "epoch": 0.18, + "learning_rate": 1.956750740676877e-05, + "loss": 0.2067, + "step": 3765 + }, + { + "epoch": 0.18, + "learning_rate": 1.9566723621713984e-05, + "loss": 0.2251, + "step": 3770 + }, + { + "epoch": 0.18, + "learning_rate": 1.9565939836659194e-05, + "loss": 0.3112, + "step": 3775 + }, + { + "epoch": 0.18, + "learning_rate": 1.956515605160441e-05, + "loss": 0.3403, + "step": 3780 + }, + { + "epoch": 0.18, + "learning_rate": 1.9564372266549622e-05, + "loss": 0.3457, + "step": 3785 + }, + { + "epoch": 0.18, + "learning_rate": 1.9563588481494836e-05, + "loss": 0.5348, + "step": 3790 + }, + { + "epoch": 0.18, + "learning_rate": 1.956280469644005e-05, + "loss": 0.5904, + "step": 3795 + }, + { + "epoch": 0.18, + "learning_rate": 1.9562020911385264e-05, + "loss": 0.8875, + "step": 3800 + }, + { + "epoch": 0.18, + "learning_rate": 1.9561237126330478e-05, + "loss": 0.3938, + "step": 3805 + }, + { + "epoch": 0.18, + "learning_rate": 1.9560453341275688e-05, + "loss": 0.2407, + "step": 3810 + }, + { + "epoch": 0.18, + "learning_rate": 1.9559669556220902e-05, + "loss": 0.2626, + "step": 3815 + }, + { + "epoch": 0.18, + "learning_rate": 1.9558885771166116e-05, + "loss": 0.2197, + "step": 3820 + }, + { + "epoch": 0.18, + "learning_rate": 1.955810198611133e-05, + "loss": 0.3859, + "step": 3825 + }, + { + "epoch": 0.18, + "learning_rate": 1.9557318201056544e-05, + "loss": 0.3984, + "step": 3830 + }, + { + "epoch": 0.18, + "learning_rate": 1.9556534416001758e-05, + "loss": 0.2618, + "step": 3835 + }, + { + "epoch": 0.18, + "learning_rate": 1.9555750630946972e-05, + "loss": 0.607, + "step": 3840 + }, + { + "epoch": 0.18, + "learning_rate": 1.9554966845892186e-05, + "loss": 0.6173, + "step": 3845 + }, + { + "epoch": 0.18, + "learning_rate": 1.9554183060837396e-05, + "loss": 0.8174, + "step": 3850 + }, + { + "epoch": 0.18, + "learning_rate": 1.9553399275782613e-05, + "loss": 0.3763, + "step": 3855 + }, + { + "epoch": 0.18, + "learning_rate": 1.9552615490727824e-05, + "loss": 0.232, + "step": 3860 + }, + { + "epoch": 0.18, + "learning_rate": 1.9551831705673038e-05, + "loss": 0.1597, + "step": 3865 + }, + { + "epoch": 0.18, + "learning_rate": 1.9551047920618252e-05, + "loss": 0.2861, + "step": 3870 + }, + { + "epoch": 0.18, + "learning_rate": 1.9550264135563462e-05, + "loss": 0.2547, + "step": 3875 + }, + { + "epoch": 0.18, + "learning_rate": 1.954948035050868e-05, + "loss": 0.3221, + "step": 3880 + }, + { + "epoch": 0.18, + "learning_rate": 1.954869656545389e-05, + "loss": 0.3712, + "step": 3885 + }, + { + "epoch": 0.18, + "learning_rate": 1.9547912780399104e-05, + "loss": 0.5022, + "step": 3890 + }, + { + "epoch": 0.18, + "learning_rate": 1.9547128995344318e-05, + "loss": 0.5154, + "step": 3895 + }, + { + "epoch": 0.18, + "learning_rate": 1.9546345210289532e-05, + "loss": 0.8972, + "step": 3900 + }, + { + "epoch": 0.18, + "learning_rate": 1.9545561425234746e-05, + "loss": 0.3731, + "step": 3905 + }, + { + "epoch": 0.18, + "learning_rate": 1.954477764017996e-05, + "loss": 0.1438, + "step": 3910 + }, + { + "epoch": 0.18, + "learning_rate": 1.954399385512517e-05, + "loss": 0.3258, + "step": 3915 + }, + { + "epoch": 0.18, + "learning_rate": 1.9543210070070387e-05, + "loss": 0.2788, + "step": 3920 + }, + { + "epoch": 0.18, + "learning_rate": 1.9542426285015598e-05, + "loss": 0.269, + "step": 3925 + }, + { + "epoch": 0.18, + "learning_rate": 1.9541642499960812e-05, + "loss": 0.3346, + "step": 3930 + }, + { + "epoch": 0.18, + "learning_rate": 1.9540858714906026e-05, + "loss": 0.401, + "step": 3935 + }, + { + "epoch": 0.18, + "learning_rate": 1.954007492985124e-05, + "loss": 0.3484, + "step": 3940 + }, + { + "epoch": 0.18, + "learning_rate": 1.9539291144796454e-05, + "loss": 0.5295, + "step": 3945 + }, + { + "epoch": 0.18, + "learning_rate": 1.9538507359741664e-05, + "loss": 0.7834, + "step": 3950 + }, + { + "epoch": 0.18, + "learning_rate": 1.953772357468688e-05, + "loss": 0.3469, + "step": 3955 + }, + { + "epoch": 0.18, + "learning_rate": 1.9536939789632092e-05, + "loss": 0.3008, + "step": 3960 + }, + { + "epoch": 0.19, + "learning_rate": 1.9536156004577306e-05, + "loss": 0.2192, + "step": 3965 + }, + { + "epoch": 0.19, + "learning_rate": 1.953537221952252e-05, + "loss": 0.2714, + "step": 3970 + }, + { + "epoch": 0.19, + "learning_rate": 1.9534588434467734e-05, + "loss": 0.236, + "step": 3975 + }, + { + "epoch": 0.19, + "learning_rate": 1.9533804649412948e-05, + "loss": 0.3313, + "step": 3980 + }, + { + "epoch": 0.19, + "learning_rate": 1.953302086435816e-05, + "loss": 0.4521, + "step": 3985 + }, + { + "epoch": 0.19, + "learning_rate": 1.9532237079303372e-05, + "loss": 0.6107, + "step": 3990 + }, + { + "epoch": 0.19, + "learning_rate": 1.953145329424859e-05, + "loss": 0.6532, + "step": 3995 + }, + { + "epoch": 0.19, + "learning_rate": 1.95306695091938e-05, + "loss": 0.7622, + "step": 4000 + }, + { + "epoch": 0.19, + "learning_rate": 1.9529885724139014e-05, + "loss": 0.3901, + "step": 4005 + }, + { + "epoch": 0.19, + "learning_rate": 1.9529101939084228e-05, + "loss": 0.2575, + "step": 4010 + }, + { + "epoch": 0.19, + "learning_rate": 1.9528318154029438e-05, + "loss": 0.1799, + "step": 4015 + }, + { + "epoch": 0.19, + "learning_rate": 1.9527534368974655e-05, + "loss": 0.2706, + "step": 4020 + }, + { + "epoch": 0.19, + "learning_rate": 1.9526750583919866e-05, + "loss": 0.333, + "step": 4025 + }, + { + "epoch": 0.19, + "learning_rate": 1.952596679886508e-05, + "loss": 0.341, + "step": 4030 + }, + { + "epoch": 0.19, + "learning_rate": 1.9525183013810294e-05, + "loss": 0.3399, + "step": 4035 + }, + { + "epoch": 0.19, + "learning_rate": 1.9524399228755508e-05, + "loss": 0.4147, + "step": 4040 + }, + { + "epoch": 0.19, + "learning_rate": 1.952361544370072e-05, + "loss": 0.4624, + "step": 4045 + }, + { + "epoch": 0.19, + "learning_rate": 1.9522831658645935e-05, + "loss": 0.7946, + "step": 4050 + }, + { + "epoch": 0.19, + "learning_rate": 1.952204787359115e-05, + "loss": 0.3148, + "step": 4055 + }, + { + "epoch": 0.19, + "learning_rate": 1.9521264088536363e-05, + "loss": 0.2504, + "step": 4060 + }, + { + "epoch": 0.19, + "learning_rate": 1.9520480303481574e-05, + "loss": 0.1754, + "step": 4065 + }, + { + "epoch": 0.19, + "learning_rate": 1.9519696518426788e-05, + "loss": 0.2607, + "step": 4070 + }, + { + "epoch": 0.19, + "learning_rate": 1.9518912733372e-05, + "loss": 0.3272, + "step": 4075 + }, + { + "epoch": 0.19, + "learning_rate": 1.9518128948317215e-05, + "loss": 0.426, + "step": 4080 + }, + { + "epoch": 0.19, + "learning_rate": 1.951734516326243e-05, + "loss": 0.3973, + "step": 4085 + }, + { + "epoch": 0.19, + "learning_rate": 1.951656137820764e-05, + "loss": 0.418, + "step": 4090 + }, + { + "epoch": 0.19, + "learning_rate": 1.9515777593152857e-05, + "loss": 0.5203, + "step": 4095 + }, + { + "epoch": 0.19, + "learning_rate": 1.9514993808098068e-05, + "loss": 0.9427, + "step": 4100 + }, + { + "epoch": 0.19, + "learning_rate": 1.951421002304328e-05, + "loss": 0.3731, + "step": 4105 + }, + { + "epoch": 0.19, + "learning_rate": 1.9513426237988496e-05, + "loss": 0.1844, + "step": 4110 + }, + { + "epoch": 0.19, + "learning_rate": 1.951264245293371e-05, + "loss": 0.2464, + "step": 4115 + }, + { + "epoch": 0.19, + "learning_rate": 1.9511858667878923e-05, + "loss": 0.1807, + "step": 4120 + }, + { + "epoch": 0.19, + "learning_rate": 1.9511074882824137e-05, + "loss": 0.3148, + "step": 4125 + }, + { + "epoch": 0.19, + "learning_rate": 1.9510291097769348e-05, + "loss": 0.2822, + "step": 4130 + }, + { + "epoch": 0.19, + "learning_rate": 1.950950731271456e-05, + "loss": 0.2875, + "step": 4135 + }, + { + "epoch": 0.19, + "learning_rate": 1.9508723527659776e-05, + "loss": 0.432, + "step": 4140 + }, + { + "epoch": 0.19, + "learning_rate": 1.950793974260499e-05, + "loss": 0.6259, + "step": 4145 + }, + { + "epoch": 0.19, + "learning_rate": 1.9507155957550203e-05, + "loss": 0.7135, + "step": 4150 + }, + { + "epoch": 0.19, + "learning_rate": 1.9506372172495417e-05, + "loss": 0.3204, + "step": 4155 + }, + { + "epoch": 0.19, + "learning_rate": 1.950558838744063e-05, + "loss": 0.1675, + "step": 4160 + }, + { + "epoch": 0.19, + "learning_rate": 1.950480460238584e-05, + "loss": 0.1806, + "step": 4165 + }, + { + "epoch": 0.19, + "learning_rate": 1.950402081733106e-05, + "loss": 0.299, + "step": 4170 + }, + { + "epoch": 0.19, + "learning_rate": 1.950323703227627e-05, + "loss": 0.2398, + "step": 4175 + }, + { + "epoch": 0.2, + "learning_rate": 1.9502453247221483e-05, + "loss": 0.3808, + "step": 4180 + }, + { + "epoch": 0.2, + "learning_rate": 1.9501669462166697e-05, + "loss": 0.4253, + "step": 4185 + }, + { + "epoch": 0.2, + "learning_rate": 1.950088567711191e-05, + "loss": 0.4762, + "step": 4190 + }, + { + "epoch": 0.2, + "learning_rate": 1.9500101892057125e-05, + "loss": 0.5255, + "step": 4195 + }, + { + "epoch": 0.2, + "learning_rate": 1.9499318107002336e-05, + "loss": 0.8221, + "step": 4200 + }, + { + "epoch": 0.2, + "learning_rate": 1.949853432194755e-05, + "loss": 0.416, + "step": 4205 + }, + { + "epoch": 0.2, + "learning_rate": 1.9497750536892763e-05, + "loss": 0.6598, + "step": 4210 + }, + { + "epoch": 0.2, + "learning_rate": 1.9496966751837977e-05, + "loss": 0.1326, + "step": 4215 + }, + { + "epoch": 0.2, + "learning_rate": 1.949618296678319e-05, + "loss": 0.215, + "step": 4220 + }, + { + "epoch": 0.2, + "learning_rate": 1.9495399181728405e-05, + "loss": 0.3046, + "step": 4225 + }, + { + "epoch": 0.2, + "learning_rate": 1.9494615396673616e-05, + "loss": 0.3458, + "step": 4230 + }, + { + "epoch": 0.2, + "learning_rate": 1.9493831611618833e-05, + "loss": 0.3501, + "step": 4235 + }, + { + "epoch": 0.2, + "learning_rate": 1.9493047826564044e-05, + "loss": 0.5032, + "step": 4240 + }, + { + "epoch": 0.2, + "learning_rate": 1.9492264041509257e-05, + "loss": 0.6051, + "step": 4245 + }, + { + "epoch": 0.2, + "learning_rate": 1.949148025645447e-05, + "loss": 0.8645, + "step": 4250 + }, + { + "epoch": 0.2, + "learning_rate": 1.9490696471399685e-05, + "loss": 0.3597, + "step": 4255 + }, + { + "epoch": 0.2, + "learning_rate": 1.94899126863449e-05, + "loss": 0.2018, + "step": 4260 + }, + { + "epoch": 0.2, + "learning_rate": 1.948912890129011e-05, + "loss": 0.2225, + "step": 4265 + }, + { + "epoch": 0.2, + "learning_rate": 1.9488345116235327e-05, + "loss": 0.3058, + "step": 4270 + }, + { + "epoch": 0.2, + "learning_rate": 1.9487561331180537e-05, + "loss": 0.367, + "step": 4275 + }, + { + "epoch": 0.2, + "learning_rate": 1.948677754612575e-05, + "loss": 0.3147, + "step": 4280 + }, + { + "epoch": 0.2, + "learning_rate": 1.9485993761070965e-05, + "loss": 0.3372, + "step": 4285 + }, + { + "epoch": 0.2, + "learning_rate": 1.948520997601618e-05, + "loss": 0.3279, + "step": 4290 + }, + { + "epoch": 0.2, + "learning_rate": 1.9484426190961393e-05, + "loss": 0.4893, + "step": 4295 + }, + { + "epoch": 0.2, + "learning_rate": 1.9483642405906607e-05, + "loss": 0.8027, + "step": 4300 + }, + { + "epoch": 0.2, + "learning_rate": 1.9482858620851818e-05, + "loss": 0.4597, + "step": 4305 + }, + { + "epoch": 0.2, + "learning_rate": 1.9482074835797035e-05, + "loss": 0.1776, + "step": 4310 + }, + { + "epoch": 0.2, + "learning_rate": 1.9481291050742245e-05, + "loss": 0.2143, + "step": 4315 + }, + { + "epoch": 0.2, + "learning_rate": 1.948050726568746e-05, + "loss": 0.2351, + "step": 4320 + }, + { + "epoch": 0.2, + "learning_rate": 1.9479723480632673e-05, + "loss": 0.3237, + "step": 4325 + }, + { + "epoch": 0.2, + "learning_rate": 1.9478939695577884e-05, + "loss": 0.3484, + "step": 4330 + }, + { + "epoch": 0.2, + "learning_rate": 1.94781559105231e-05, + "loss": 0.3139, + "step": 4335 + }, + { + "epoch": 0.2, + "learning_rate": 1.947737212546831e-05, + "loss": 0.4198, + "step": 4340 + }, + { + "epoch": 0.2, + "learning_rate": 1.9476588340413525e-05, + "loss": 0.48, + "step": 4345 + }, + { + "epoch": 0.2, + "learning_rate": 1.947580455535874e-05, + "loss": 0.7149, + "step": 4350 + }, + { + "epoch": 0.2, + "learning_rate": 1.9475020770303953e-05, + "loss": 0.4256, + "step": 4355 + }, + { + "epoch": 0.2, + "learning_rate": 1.9474236985249167e-05, + "loss": 0.2352, + "step": 4360 + }, + { + "epoch": 0.2, + "learning_rate": 1.947345320019438e-05, + "loss": 0.197, + "step": 4365 + }, + { + "epoch": 0.2, + "learning_rate": 1.9472669415139595e-05, + "loss": 0.313, + "step": 4370 + }, + { + "epoch": 0.2, + "learning_rate": 1.947188563008481e-05, + "loss": 0.2447, + "step": 4375 + }, + { + "epoch": 0.2, + "learning_rate": 1.947110184503002e-05, + "loss": 0.2627, + "step": 4380 + }, + { + "epoch": 0.2, + "learning_rate": 1.9470318059975237e-05, + "loss": 0.2974, + "step": 4385 + }, + { + "epoch": 0.2, + "learning_rate": 1.9469534274920447e-05, + "loss": 0.4315, + "step": 4390 + }, + { + "epoch": 0.21, + "learning_rate": 1.946875048986566e-05, + "loss": 0.6057, + "step": 4395 + }, + { + "epoch": 0.21, + "learning_rate": 1.9467966704810875e-05, + "loss": 0.7785, + "step": 4400 + }, + { + "epoch": 0.21, + "learning_rate": 1.9467182919756085e-05, + "loss": 0.426, + "step": 4405 + }, + { + "epoch": 0.21, + "learning_rate": 1.9466399134701303e-05, + "loss": 0.3034, + "step": 4410 + }, + { + "epoch": 0.21, + "learning_rate": 1.9465615349646513e-05, + "loss": 0.1557, + "step": 4415 + }, + { + "epoch": 0.21, + "learning_rate": 1.9464831564591727e-05, + "loss": 0.2953, + "step": 4420 + }, + { + "epoch": 0.21, + "learning_rate": 1.946404777953694e-05, + "loss": 0.4292, + "step": 4425 + }, + { + "epoch": 0.21, + "learning_rate": 1.9463263994482155e-05, + "loss": 0.468, + "step": 4430 + }, + { + "epoch": 0.21, + "learning_rate": 1.946248020942737e-05, + "loss": 0.3723, + "step": 4435 + }, + { + "epoch": 0.21, + "learning_rate": 1.9461696424372583e-05, + "loss": 0.3748, + "step": 4440 + }, + { + "epoch": 0.21, + "learning_rate": 1.9460912639317793e-05, + "loss": 0.3476, + "step": 4445 + }, + { + "epoch": 0.21, + "learning_rate": 1.946012885426301e-05, + "loss": 0.7008, + "step": 4450 + }, + { + "epoch": 0.21, + "learning_rate": 1.945934506920822e-05, + "loss": 0.3942, + "step": 4455 + }, + { + "epoch": 0.21, + "learning_rate": 1.9458561284153435e-05, + "loss": 0.1434, + "step": 4460 + }, + { + "epoch": 0.21, + "learning_rate": 1.945777749909865e-05, + "loss": 0.1975, + "step": 4465 + }, + { + "epoch": 0.21, + "learning_rate": 1.9456993714043863e-05, + "loss": 0.3374, + "step": 4470 + }, + { + "epoch": 0.21, + "learning_rate": 1.9456209928989077e-05, + "loss": 0.2609, + "step": 4475 + }, + { + "epoch": 0.21, + "learning_rate": 1.9455426143934287e-05, + "loss": 0.2869, + "step": 4480 + }, + { + "epoch": 0.21, + "learning_rate": 1.9454642358879505e-05, + "loss": 0.4011, + "step": 4485 + }, + { + "epoch": 0.21, + "learning_rate": 1.9453858573824715e-05, + "loss": 0.4172, + "step": 4490 + }, + { + "epoch": 0.21, + "learning_rate": 1.945307478876993e-05, + "loss": 0.5234, + "step": 4495 + }, + { + "epoch": 0.21, + "learning_rate": 1.9452291003715143e-05, + "loss": 0.7829, + "step": 4500 + }, + { + "epoch": 0.21, + "learning_rate": 1.9451507218660357e-05, + "loss": 0.3145, + "step": 4505 + }, + { + "epoch": 0.21, + "learning_rate": 1.945072343360557e-05, + "loss": 0.1722, + "step": 4510 + }, + { + "epoch": 0.21, + "learning_rate": 1.9449939648550785e-05, + "loss": 0.2948, + "step": 4515 + }, + { + "epoch": 0.21, + "learning_rate": 1.9449155863495995e-05, + "loss": 0.2227, + "step": 4520 + }, + { + "epoch": 0.21, + "learning_rate": 1.944837207844121e-05, + "loss": 0.2552, + "step": 4525 + }, + { + "epoch": 0.21, + "learning_rate": 1.9447588293386423e-05, + "loss": 0.3306, + "step": 4530 + }, + { + "epoch": 0.21, + "learning_rate": 1.9446804508331637e-05, + "loss": 0.2957, + "step": 4535 + }, + { + "epoch": 0.21, + "learning_rate": 1.944602072327685e-05, + "loss": 0.5235, + "step": 4540 + }, + { + "epoch": 0.21, + "learning_rate": 1.944523693822206e-05, + "loss": 0.5366, + "step": 4545 + }, + { + "epoch": 0.21, + "learning_rate": 1.944445315316728e-05, + "loss": 0.753, + "step": 4550 + }, + { + "epoch": 0.21, + "learning_rate": 1.944366936811249e-05, + "loss": 0.2989, + "step": 4555 + }, + { + "epoch": 0.21, + "learning_rate": 1.9442885583057703e-05, + "loss": 0.1639, + "step": 4560 + }, + { + "epoch": 0.21, + "learning_rate": 1.9442101798002917e-05, + "loss": 0.1844, + "step": 4565 + }, + { + "epoch": 0.21, + "learning_rate": 1.944131801294813e-05, + "loss": 0.2972, + "step": 4570 + }, + { + "epoch": 0.21, + "learning_rate": 1.9440534227893345e-05, + "loss": 0.3515, + "step": 4575 + }, + { + "epoch": 0.21, + "learning_rate": 1.943975044283856e-05, + "loss": 0.2764, + "step": 4580 + }, + { + "epoch": 0.21, + "learning_rate": 1.9438966657783773e-05, + "loss": 0.4514, + "step": 4585 + }, + { + "epoch": 0.21, + "learning_rate": 1.9438182872728983e-05, + "loss": 0.3424, + "step": 4590 + }, + { + "epoch": 0.21, + "learning_rate": 1.9437399087674197e-05, + "loss": 0.539, + "step": 4595 + }, + { + "epoch": 0.21, + "learning_rate": 1.943661530261941e-05, + "loss": 0.8198, + "step": 4600 + }, + { + "epoch": 0.21, + "learning_rate": 1.9435831517564625e-05, + "loss": 0.3306, + "step": 4605 + }, + { + "epoch": 0.22, + "learning_rate": 1.943504773250984e-05, + "loss": 0.1788, + "step": 4610 + }, + { + "epoch": 0.22, + "learning_rate": 1.9434263947455053e-05, + "loss": 0.2419, + "step": 4615 + }, + { + "epoch": 0.22, + "learning_rate": 1.9433480162400263e-05, + "loss": 0.3073, + "step": 4620 + }, + { + "epoch": 0.22, + "learning_rate": 1.943269637734548e-05, + "loss": 0.1982, + "step": 4625 + }, + { + "epoch": 0.22, + "learning_rate": 1.943191259229069e-05, + "loss": 0.3853, + "step": 4630 + }, + { + "epoch": 0.22, + "learning_rate": 1.9431128807235905e-05, + "loss": 0.3433, + "step": 4635 + }, + { + "epoch": 0.22, + "learning_rate": 1.943034502218112e-05, + "loss": 0.373, + "step": 4640 + }, + { + "epoch": 0.22, + "learning_rate": 1.9429561237126333e-05, + "loss": 0.4099, + "step": 4645 + }, + { + "epoch": 0.22, + "learning_rate": 1.9428777452071547e-05, + "loss": 0.7581, + "step": 4650 + }, + { + "epoch": 0.22, + "learning_rate": 1.9427993667016757e-05, + "loss": 0.3001, + "step": 4655 + }, + { + "epoch": 0.22, + "learning_rate": 1.942720988196197e-05, + "loss": 0.1235, + "step": 4660 + }, + { + "epoch": 0.22, + "learning_rate": 1.9426426096907185e-05, + "loss": 0.1975, + "step": 4665 + }, + { + "epoch": 0.22, + "learning_rate": 1.94256423118524e-05, + "loss": 0.2757, + "step": 4670 + }, + { + "epoch": 0.22, + "learning_rate": 1.9424858526797613e-05, + "loss": 0.2863, + "step": 4675 + }, + { + "epoch": 0.22, + "learning_rate": 1.9424074741742827e-05, + "loss": 0.3228, + "step": 4680 + }, + { + "epoch": 0.22, + "learning_rate": 1.942329095668804e-05, + "loss": 0.3734, + "step": 4685 + }, + { + "epoch": 0.22, + "learning_rate": 1.9422507171633254e-05, + "loss": 0.4365, + "step": 4690 + }, + { + "epoch": 0.22, + "learning_rate": 1.9421723386578465e-05, + "loss": 0.4877, + "step": 4695 + }, + { + "epoch": 0.22, + "learning_rate": 1.9420939601523682e-05, + "loss": 0.7756, + "step": 4700 + }, + { + "epoch": 0.22, + "learning_rate": 1.9420155816468893e-05, + "loss": 0.338, + "step": 4705 + }, + { + "epoch": 0.22, + "learning_rate": 1.9419372031414107e-05, + "loss": 0.1238, + "step": 4710 + }, + { + "epoch": 0.22, + "learning_rate": 1.941858824635932e-05, + "loss": 0.2242, + "step": 4715 + }, + { + "epoch": 0.22, + "learning_rate": 1.941780446130453e-05, + "loss": 0.2178, + "step": 4720 + }, + { + "epoch": 0.22, + "learning_rate": 1.941702067624975e-05, + "loss": 0.27, + "step": 4725 + }, + { + "epoch": 0.22, + "learning_rate": 1.941623689119496e-05, + "loss": 0.3249, + "step": 4730 + }, + { + "epoch": 0.22, + "learning_rate": 1.9415453106140173e-05, + "loss": 0.3082, + "step": 4735 + }, + { + "epoch": 0.22, + "learning_rate": 1.9414669321085387e-05, + "loss": 0.3891, + "step": 4740 + }, + { + "epoch": 0.22, + "learning_rate": 1.94138855360306e-05, + "loss": 0.525, + "step": 4745 + }, + { + "epoch": 0.22, + "learning_rate": 1.9413101750975814e-05, + "loss": 0.6924, + "step": 4750 + }, + { + "epoch": 0.22, + "learning_rate": 1.941231796592103e-05, + "loss": 0.4089, + "step": 4755 + }, + { + "epoch": 0.22, + "learning_rate": 1.941153418086624e-05, + "loss": 0.1315, + "step": 4760 + }, + { + "epoch": 0.22, + "learning_rate": 1.9410750395811456e-05, + "loss": 0.1435, + "step": 4765 + }, + { + "epoch": 0.22, + "learning_rate": 1.9409966610756667e-05, + "loss": 0.2198, + "step": 4770 + }, + { + "epoch": 0.22, + "learning_rate": 1.940918282570188e-05, + "loss": 0.2413, + "step": 4775 + }, + { + "epoch": 0.22, + "learning_rate": 1.9408399040647095e-05, + "loss": 0.3171, + "step": 4780 + }, + { + "epoch": 0.22, + "learning_rate": 1.940761525559231e-05, + "loss": 0.2903, + "step": 4785 + }, + { + "epoch": 0.22, + "learning_rate": 1.9406831470537522e-05, + "loss": 0.3473, + "step": 4790 + }, + { + "epoch": 0.22, + "learning_rate": 1.9406047685482733e-05, + "loss": 0.5104, + "step": 4795 + }, + { + "epoch": 0.22, + "learning_rate": 1.940526390042795e-05, + "loss": 0.7117, + "step": 4800 + }, + { + "epoch": 0.22, + "learning_rate": 1.940448011537316e-05, + "loss": 0.3581, + "step": 4805 + }, + { + "epoch": 0.22, + "learning_rate": 1.9403696330318375e-05, + "loss": 0.2153, + "step": 4810 + }, + { + "epoch": 0.22, + "learning_rate": 1.940291254526359e-05, + "loss": 0.2117, + "step": 4815 + }, + { + "epoch": 0.22, + "learning_rate": 1.9402128760208802e-05, + "loss": 0.2479, + "step": 4820 + }, + { + "epoch": 0.23, + "learning_rate": 1.9401344975154016e-05, + "loss": 0.3125, + "step": 4825 + }, + { + "epoch": 0.23, + "learning_rate": 1.940056119009923e-05, + "loss": 0.3053, + "step": 4830 + }, + { + "epoch": 0.23, + "learning_rate": 1.939977740504444e-05, + "loss": 0.3783, + "step": 4835 + }, + { + "epoch": 0.23, + "learning_rate": 1.9398993619989658e-05, + "loss": 0.4291, + "step": 4840 + }, + { + "epoch": 0.23, + "learning_rate": 1.939820983493487e-05, + "loss": 0.3458, + "step": 4845 + }, + { + "epoch": 0.23, + "learning_rate": 1.9397426049880082e-05, + "loss": 0.7172, + "step": 4850 + }, + { + "epoch": 0.23, + "learning_rate": 1.9396642264825296e-05, + "loss": 0.4026, + "step": 4855 + }, + { + "epoch": 0.23, + "learning_rate": 1.9395858479770507e-05, + "loss": 0.1863, + "step": 4860 + }, + { + "epoch": 0.23, + "learning_rate": 1.9395074694715724e-05, + "loss": 0.208, + "step": 4865 + }, + { + "epoch": 0.23, + "learning_rate": 1.9394290909660935e-05, + "loss": 0.2116, + "step": 4870 + }, + { + "epoch": 0.23, + "learning_rate": 1.939350712460615e-05, + "loss": 0.2817, + "step": 4875 + }, + { + "epoch": 0.23, + "learning_rate": 1.9392723339551362e-05, + "loss": 0.3429, + "step": 4880 + }, + { + "epoch": 0.23, + "learning_rate": 1.9391939554496576e-05, + "loss": 0.4071, + "step": 4885 + }, + { + "epoch": 0.23, + "learning_rate": 1.939115576944179e-05, + "loss": 0.4191, + "step": 4890 + }, + { + "epoch": 0.23, + "learning_rate": 1.9390371984387004e-05, + "loss": 0.5109, + "step": 4895 + }, + { + "epoch": 0.23, + "learning_rate": 1.9389588199332218e-05, + "loss": 0.7804, + "step": 4900 + }, + { + "epoch": 0.23, + "learning_rate": 1.9388804414277432e-05, + "loss": 0.3884, + "step": 4905 + }, + { + "epoch": 0.23, + "learning_rate": 1.9388020629222642e-05, + "loss": 0.1351, + "step": 4910 + }, + { + "epoch": 0.23, + "learning_rate": 1.9387236844167856e-05, + "loss": 0.2175, + "step": 4915 + }, + { + "epoch": 0.23, + "learning_rate": 1.938645305911307e-05, + "loss": 0.237, + "step": 4920 + }, + { + "epoch": 0.23, + "learning_rate": 1.9385669274058284e-05, + "loss": 0.3365, + "step": 4925 + }, + { + "epoch": 0.23, + "learning_rate": 1.9384885489003498e-05, + "loss": 0.3038, + "step": 4930 + }, + { + "epoch": 0.23, + "learning_rate": 1.938410170394871e-05, + "loss": 0.2486, + "step": 4935 + }, + { + "epoch": 0.23, + "learning_rate": 1.9383317918893926e-05, + "loss": 0.3903, + "step": 4940 + }, + { + "epoch": 0.23, + "learning_rate": 1.9382534133839136e-05, + "loss": 0.5263, + "step": 4945 + }, + { + "epoch": 0.23, + "learning_rate": 1.938175034878435e-05, + "loss": 0.6892, + "step": 4950 + }, + { + "epoch": 0.23, + "learning_rate": 1.9380966563729564e-05, + "loss": 0.439, + "step": 4955 + }, + { + "epoch": 0.23, + "learning_rate": 1.9380182778674778e-05, + "loss": 0.1741, + "step": 4960 + }, + { + "epoch": 0.23, + "learning_rate": 1.9379398993619992e-05, + "loss": 0.199, + "step": 4965 + }, + { + "epoch": 0.23, + "learning_rate": 1.9378615208565206e-05, + "loss": 0.246, + "step": 4970 + }, + { + "epoch": 0.23, + "learning_rate": 1.9377831423510416e-05, + "loss": 0.2149, + "step": 4975 + }, + { + "epoch": 0.23, + "learning_rate": 1.937704763845563e-05, + "loss": 0.3709, + "step": 4980 + }, + { + "epoch": 0.23, + "learning_rate": 1.9376263853400844e-05, + "loss": 0.3476, + "step": 4985 + }, + { + "epoch": 0.23, + "learning_rate": 1.9375480068346058e-05, + "loss": 0.41, + "step": 4990 + }, + { + "epoch": 0.23, + "learning_rate": 1.9374696283291272e-05, + "loss": 0.4483, + "step": 4995 + }, + { + "epoch": 0.23, + "learning_rate": 1.9373912498236486e-05, + "loss": 0.6013, + "step": 5000 + }, + { + "epoch": 0.23, + "learning_rate": 1.93731287131817e-05, + "loss": 0.3716, + "step": 5005 + }, + { + "epoch": 0.23, + "learning_rate": 1.937234492812691e-05, + "loss": 0.1351, + "step": 5010 + }, + { + "epoch": 0.23, + "learning_rate": 1.9371561143072128e-05, + "loss": 0.1643, + "step": 5015 + }, + { + "epoch": 0.23, + "learning_rate": 1.9370777358017338e-05, + "loss": 0.2067, + "step": 5020 + }, + { + "epoch": 0.23, + "learning_rate": 1.9369993572962552e-05, + "loss": 0.2506, + "step": 5025 + }, + { + "epoch": 0.23, + "learning_rate": 1.9369209787907766e-05, + "loss": 0.2104, + "step": 5030 + }, + { + "epoch": 0.23, + "learning_rate": 1.936842600285298e-05, + "loss": 0.2822, + "step": 5035 + }, + { + "epoch": 0.24, + "learning_rate": 1.9367642217798194e-05, + "loss": 0.3395, + "step": 5040 + }, + { + "epoch": 0.24, + "learning_rate": 1.9366858432743404e-05, + "loss": 0.5625, + "step": 5045 + }, + { + "epoch": 0.24, + "learning_rate": 1.936607464768862e-05, + "loss": 0.8041, + "step": 5050 + }, + { + "epoch": 0.24, + "learning_rate": 1.9365290862633832e-05, + "loss": 0.2843, + "step": 5055 + }, + { + "epoch": 0.24, + "learning_rate": 1.9364507077579046e-05, + "loss": 0.2228, + "step": 5060 + }, + { + "epoch": 0.24, + "learning_rate": 1.936372329252426e-05, + "loss": 0.2473, + "step": 5065 + }, + { + "epoch": 0.24, + "learning_rate": 1.9362939507469474e-05, + "loss": 0.2517, + "step": 5070 + }, + { + "epoch": 0.24, + "learning_rate": 1.9362155722414684e-05, + "loss": 0.6024, + "step": 5075 + }, + { + "epoch": 0.24, + "learning_rate": 1.9361371937359902e-05, + "loss": 0.4021, + "step": 5080 + }, + { + "epoch": 0.24, + "learning_rate": 1.9360588152305112e-05, + "loss": 0.4412, + "step": 5085 + }, + { + "epoch": 0.24, + "learning_rate": 1.9359804367250326e-05, + "loss": 0.4314, + "step": 5090 + }, + { + "epoch": 0.24, + "learning_rate": 1.935902058219554e-05, + "loss": 0.4803, + "step": 5095 + }, + { + "epoch": 0.24, + "learning_rate": 1.9358236797140754e-05, + "loss": 0.4986, + "step": 5100 + }, + { + "epoch": 0.24, + "learning_rate": 1.9357453012085968e-05, + "loss": 0.3644, + "step": 5105 + }, + { + "epoch": 0.24, + "learning_rate": 1.935666922703118e-05, + "loss": 0.2482, + "step": 5110 + }, + { + "epoch": 0.24, + "learning_rate": 1.9355885441976396e-05, + "loss": 0.1968, + "step": 5115 + }, + { + "epoch": 0.24, + "learning_rate": 1.9355101656921606e-05, + "loss": 0.2267, + "step": 5120 + }, + { + "epoch": 0.24, + "learning_rate": 1.935431787186682e-05, + "loss": 0.2209, + "step": 5125 + }, + { + "epoch": 0.24, + "learning_rate": 1.9353534086812034e-05, + "loss": 0.3899, + "step": 5130 + }, + { + "epoch": 0.24, + "learning_rate": 1.9352750301757248e-05, + "loss": 0.2757, + "step": 5135 + }, + { + "epoch": 0.24, + "learning_rate": 1.9351966516702462e-05, + "loss": 0.3713, + "step": 5140 + }, + { + "epoch": 0.24, + "learning_rate": 1.9351182731647676e-05, + "loss": 0.4712, + "step": 5145 + }, + { + "epoch": 0.24, + "learning_rate": 1.9350398946592886e-05, + "loss": 0.5116, + "step": 5150 + }, + { + "epoch": 0.24, + "learning_rate": 1.9349615161538104e-05, + "loss": 0.3821, + "step": 5155 + }, + { + "epoch": 0.24, + "learning_rate": 1.9348831376483314e-05, + "loss": 0.1595, + "step": 5160 + }, + { + "epoch": 0.24, + "learning_rate": 1.9348047591428528e-05, + "loss": 0.1296, + "step": 5165 + }, + { + "epoch": 0.24, + "learning_rate": 1.9347263806373742e-05, + "loss": 0.2284, + "step": 5170 + }, + { + "epoch": 0.24, + "learning_rate": 1.9346480021318952e-05, + "loss": 0.281, + "step": 5175 + }, + { + "epoch": 0.24, + "learning_rate": 1.934569623626417e-05, + "loss": 0.17, + "step": 5180 + }, + { + "epoch": 0.24, + "learning_rate": 1.934491245120938e-05, + "loss": 0.3331, + "step": 5185 + }, + { + "epoch": 0.24, + "learning_rate": 1.9344128666154594e-05, + "loss": 0.4022, + "step": 5190 + }, + { + "epoch": 0.24, + "learning_rate": 1.9343344881099808e-05, + "loss": 0.495, + "step": 5195 + }, + { + "epoch": 0.24, + "learning_rate": 1.9342561096045022e-05, + "loss": 0.8846, + "step": 5200 + }, + { + "epoch": 0.24, + "learning_rate": 1.9341777310990236e-05, + "loss": 0.3249, + "step": 5205 + }, + { + "epoch": 0.24, + "learning_rate": 1.934099352593545e-05, + "loss": 0.1457, + "step": 5210 + }, + { + "epoch": 0.24, + "learning_rate": 1.9340209740880664e-05, + "loss": 0.205, + "step": 5215 + }, + { + "epoch": 0.24, + "learning_rate": 1.9339425955825878e-05, + "loss": 0.2736, + "step": 5220 + }, + { + "epoch": 0.24, + "learning_rate": 1.9338642170771088e-05, + "loss": 0.2574, + "step": 5225 + }, + { + "epoch": 0.24, + "learning_rate": 1.9337858385716305e-05, + "loss": 0.2396, + "step": 5230 + }, + { + "epoch": 0.24, + "learning_rate": 1.9337074600661516e-05, + "loss": 0.3956, + "step": 5235 + }, + { + "epoch": 0.24, + "learning_rate": 1.933629081560673e-05, + "loss": 0.3899, + "step": 5240 + }, + { + "epoch": 0.24, + "learning_rate": 1.9335507030551944e-05, + "loss": 0.4603, + "step": 5245 + }, + { + "epoch": 0.24, + "learning_rate": 1.9334723245497154e-05, + "loss": 0.8332, + "step": 5250 + }, + { + "epoch": 0.25, + "learning_rate": 1.933393946044237e-05, + "loss": 0.3782, + "step": 5255 + }, + { + "epoch": 0.25, + "learning_rate": 1.9333155675387582e-05, + "loss": 0.1268, + "step": 5260 + }, + { + "epoch": 0.25, + "learning_rate": 1.9332371890332796e-05, + "loss": 0.2608, + "step": 5265 + }, + { + "epoch": 0.25, + "learning_rate": 1.933158810527801e-05, + "loss": 0.1392, + "step": 5270 + }, + { + "epoch": 0.25, + "learning_rate": 1.9330804320223224e-05, + "loss": 0.2242, + "step": 5275 + }, + { + "epoch": 0.25, + "learning_rate": 1.9330020535168438e-05, + "loss": 0.1908, + "step": 5280 + }, + { + "epoch": 0.25, + "learning_rate": 1.932923675011365e-05, + "loss": 0.2051, + "step": 5285 + }, + { + "epoch": 0.25, + "learning_rate": 1.9328452965058862e-05, + "loss": 0.5424, + "step": 5290 + }, + { + "epoch": 0.25, + "learning_rate": 1.932766918000408e-05, + "loss": 0.5415, + "step": 5295 + }, + { + "epoch": 0.25, + "learning_rate": 1.932688539494929e-05, + "loss": 0.6368, + "step": 5300 + }, + { + "epoch": 0.25, + "learning_rate": 1.9326101609894504e-05, + "loss": 0.4718, + "step": 5305 + }, + { + "epoch": 0.25, + "learning_rate": 1.9325317824839718e-05, + "loss": 0.1058, + "step": 5310 + }, + { + "epoch": 0.25, + "learning_rate": 1.932453403978493e-05, + "loss": 0.1814, + "step": 5315 + }, + { + "epoch": 0.25, + "learning_rate": 1.9323750254730146e-05, + "loss": 0.219, + "step": 5320 + }, + { + "epoch": 0.25, + "learning_rate": 1.9322966469675356e-05, + "loss": 0.2229, + "step": 5325 + }, + { + "epoch": 0.25, + "learning_rate": 1.9322182684620573e-05, + "loss": 0.3236, + "step": 5330 + }, + { + "epoch": 0.25, + "learning_rate": 1.9321398899565784e-05, + "loss": 0.2804, + "step": 5335 + }, + { + "epoch": 0.25, + "learning_rate": 1.9320615114510998e-05, + "loss": 0.3324, + "step": 5340 + }, + { + "epoch": 0.25, + "learning_rate": 1.931983132945621e-05, + "loss": 0.5195, + "step": 5345 + }, + { + "epoch": 0.25, + "learning_rate": 1.9319047544401426e-05, + "loss": 0.8297, + "step": 5350 + }, + { + "epoch": 0.25, + "learning_rate": 1.931826375934664e-05, + "loss": 0.3, + "step": 5355 + }, + { + "epoch": 0.25, + "learning_rate": 1.9317479974291853e-05, + "loss": 0.2239, + "step": 5360 + }, + { + "epoch": 0.25, + "learning_rate": 1.9316696189237064e-05, + "loss": 0.2179, + "step": 5365 + }, + { + "epoch": 0.25, + "learning_rate": 1.9315912404182278e-05, + "loss": 0.2458, + "step": 5370 + }, + { + "epoch": 0.25, + "learning_rate": 1.931512861912749e-05, + "loss": 0.2145, + "step": 5375 + }, + { + "epoch": 0.25, + "learning_rate": 1.9314344834072706e-05, + "loss": 0.1894, + "step": 5380 + }, + { + "epoch": 0.25, + "learning_rate": 1.931356104901792e-05, + "loss": 0.3247, + "step": 5385 + }, + { + "epoch": 0.25, + "learning_rate": 1.931277726396313e-05, + "loss": 0.4402, + "step": 5390 + }, + { + "epoch": 0.25, + "learning_rate": 1.9311993478908347e-05, + "loss": 0.5536, + "step": 5395 + }, + { + "epoch": 0.25, + "learning_rate": 1.9311209693853558e-05, + "loss": 0.7553, + "step": 5400 + }, + { + "epoch": 0.25, + "learning_rate": 1.9310425908798772e-05, + "loss": 0.345, + "step": 5405 + }, + { + "epoch": 0.25, + "learning_rate": 1.9309642123743986e-05, + "loss": 0.1617, + "step": 5410 + }, + { + "epoch": 0.25, + "learning_rate": 1.93088583386892e-05, + "loss": 0.2743, + "step": 5415 + }, + { + "epoch": 0.25, + "learning_rate": 1.9308074553634413e-05, + "loss": 0.2523, + "step": 5420 + }, + { + "epoch": 0.25, + "learning_rate": 1.9307290768579627e-05, + "loss": 0.2766, + "step": 5425 + }, + { + "epoch": 0.25, + "learning_rate": 1.930650698352484e-05, + "loss": 0.2555, + "step": 5430 + }, + { + "epoch": 0.25, + "learning_rate": 1.9305723198470052e-05, + "loss": 0.3894, + "step": 5435 + }, + { + "epoch": 0.25, + "learning_rate": 1.9304939413415266e-05, + "loss": 0.3815, + "step": 5440 + }, + { + "epoch": 0.25, + "learning_rate": 1.930415562836048e-05, + "loss": 0.5165, + "step": 5445 + }, + { + "epoch": 0.25, + "learning_rate": 1.9303371843305694e-05, + "loss": 0.8409, + "step": 5450 + }, + { + "epoch": 0.25, + "learning_rate": 1.9302588058250907e-05, + "loss": 0.3595, + "step": 5455 + }, + { + "epoch": 0.25, + "learning_rate": 1.930180427319612e-05, + "loss": 0.1876, + "step": 5460 + }, + { + "epoch": 0.26, + "learning_rate": 1.9301020488141332e-05, + "loss": 0.2553, + "step": 5465 + }, + { + "epoch": 0.26, + "learning_rate": 1.930023670308655e-05, + "loss": 0.3259, + "step": 5470 + }, + { + "epoch": 0.26, + "learning_rate": 1.929945291803176e-05, + "loss": 0.2538, + "step": 5475 + }, + { + "epoch": 0.26, + "learning_rate": 1.9298669132976974e-05, + "loss": 0.3327, + "step": 5480 + }, + { + "epoch": 0.26, + "learning_rate": 1.9297885347922187e-05, + "loss": 0.3046, + "step": 5485 + }, + { + "epoch": 0.26, + "learning_rate": 1.92971015628674e-05, + "loss": 0.4529, + "step": 5490 + }, + { + "epoch": 0.26, + "learning_rate": 1.9296317777812615e-05, + "loss": 0.4448, + "step": 5495 + }, + { + "epoch": 0.26, + "learning_rate": 1.9295533992757826e-05, + "loss": 0.6134, + "step": 5500 + }, + { + "epoch": 0.26, + "learning_rate": 1.929475020770304e-05, + "loss": 0.4037, + "step": 5505 + }, + { + "epoch": 0.26, + "learning_rate": 1.9293966422648254e-05, + "loss": 0.1626, + "step": 5510 + }, + { + "epoch": 0.26, + "learning_rate": 1.9293182637593467e-05, + "loss": 0.1955, + "step": 5515 + }, + { + "epoch": 0.26, + "learning_rate": 1.929239885253868e-05, + "loss": 0.244, + "step": 5520 + }, + { + "epoch": 0.26, + "learning_rate": 1.9291615067483895e-05, + "loss": 0.1823, + "step": 5525 + }, + { + "epoch": 0.26, + "learning_rate": 1.929083128242911e-05, + "loss": 0.3409, + "step": 5530 + }, + { + "epoch": 0.26, + "learning_rate": 1.9290047497374323e-05, + "loss": 0.3905, + "step": 5535 + }, + { + "epoch": 0.26, + "learning_rate": 1.9289263712319534e-05, + "loss": 0.4031, + "step": 5540 + }, + { + "epoch": 0.26, + "learning_rate": 1.928847992726475e-05, + "loss": 0.5521, + "step": 5545 + }, + { + "epoch": 0.26, + "learning_rate": 1.928769614220996e-05, + "loss": 0.6648, + "step": 5550 + }, + { + "epoch": 0.26, + "learning_rate": 1.9286912357155175e-05, + "loss": 0.2675, + "step": 5555 + }, + { + "epoch": 0.26, + "learning_rate": 1.928612857210039e-05, + "loss": 0.1622, + "step": 5560 + }, + { + "epoch": 0.26, + "learning_rate": 1.92853447870456e-05, + "loss": 0.2045, + "step": 5565 + }, + { + "epoch": 0.26, + "learning_rate": 1.9284561001990817e-05, + "loss": 0.2512, + "step": 5570 + }, + { + "epoch": 0.26, + "learning_rate": 1.9283777216936028e-05, + "loss": 0.2425, + "step": 5575 + }, + { + "epoch": 0.26, + "learning_rate": 1.928299343188124e-05, + "loss": 0.3474, + "step": 5580 + }, + { + "epoch": 0.26, + "learning_rate": 1.9282209646826455e-05, + "loss": 0.4243, + "step": 5585 + }, + { + "epoch": 0.26, + "learning_rate": 1.928142586177167e-05, + "loss": 0.4205, + "step": 5590 + }, + { + "epoch": 0.26, + "learning_rate": 1.9280642076716883e-05, + "loss": 0.4256, + "step": 5595 + }, + { + "epoch": 0.26, + "learning_rate": 1.9279858291662097e-05, + "loss": 0.74, + "step": 5600 + }, + { + "epoch": 0.26, + "learning_rate": 1.9279074506607308e-05, + "loss": 0.3487, + "step": 5605 + }, + { + "epoch": 0.26, + "learning_rate": 1.9278290721552525e-05, + "loss": 0.1986, + "step": 5610 + }, + { + "epoch": 0.26, + "learning_rate": 1.9277506936497735e-05, + "loss": 0.279, + "step": 5615 + }, + { + "epoch": 0.26, + "learning_rate": 1.927672315144295e-05, + "loss": 0.2964, + "step": 5620 + }, + { + "epoch": 0.26, + "learning_rate": 1.9275939366388163e-05, + "loss": 0.255, + "step": 5625 + }, + { + "epoch": 0.26, + "learning_rate": 1.9275155581333377e-05, + "loss": 0.3853, + "step": 5630 + }, + { + "epoch": 0.26, + "learning_rate": 1.927437179627859e-05, + "loss": 0.3548, + "step": 5635 + }, + { + "epoch": 0.26, + "learning_rate": 1.92735880112238e-05, + "loss": 0.3402, + "step": 5640 + }, + { + "epoch": 0.26, + "learning_rate": 1.927280422616902e-05, + "loss": 0.375, + "step": 5645 + }, + { + "epoch": 0.26, + "learning_rate": 1.927202044111423e-05, + "loss": 0.8663, + "step": 5650 + }, + { + "epoch": 0.26, + "learning_rate": 1.9271236656059443e-05, + "loss": 0.3404, + "step": 5655 + }, + { + "epoch": 0.26, + "learning_rate": 1.9270452871004657e-05, + "loss": 0.1193, + "step": 5660 + }, + { + "epoch": 0.26, + "learning_rate": 1.926966908594987e-05, + "loss": 0.2268, + "step": 5665 + }, + { + "epoch": 0.26, + "learning_rate": 1.9268885300895085e-05, + "loss": 0.3271, + "step": 5670 + }, + { + "epoch": 0.26, + "learning_rate": 1.92681015158403e-05, + "loss": 0.2239, + "step": 5675 + }, + { + "epoch": 0.27, + "learning_rate": 1.926731773078551e-05, + "loss": 0.363, + "step": 5680 + }, + { + "epoch": 0.27, + "learning_rate": 1.9266690702741683e-05, + "loss": 0.3885, + "step": 5685 + }, + { + "epoch": 0.27, + "learning_rate": 1.9265906917686894e-05, + "loss": 0.5457, + "step": 5690 + }, + { + "epoch": 0.27, + "learning_rate": 1.9265123132632108e-05, + "loss": 0.532, + "step": 5695 + }, + { + "epoch": 0.27, + "learning_rate": 1.926433934757732e-05, + "loss": 0.8407, + "step": 5700 + }, + { + "epoch": 0.27, + "learning_rate": 1.9263555562522536e-05, + "loss": 0.214, + "step": 5705 + }, + { + "epoch": 0.27, + "learning_rate": 1.926277177746775e-05, + "loss": 0.1542, + "step": 5710 + }, + { + "epoch": 0.27, + "learning_rate": 1.9261987992412963e-05, + "loss": 0.2797, + "step": 5715 + }, + { + "epoch": 0.27, + "learning_rate": 1.9261204207358174e-05, + "loss": 0.1692, + "step": 5720 + }, + { + "epoch": 0.27, + "learning_rate": 1.926042042230339e-05, + "loss": 0.3042, + "step": 5725 + }, + { + "epoch": 0.27, + "learning_rate": 1.92596366372486e-05, + "loss": 0.3473, + "step": 5730 + }, + { + "epoch": 0.27, + "learning_rate": 1.9258852852193816e-05, + "loss": 0.3281, + "step": 5735 + }, + { + "epoch": 0.27, + "learning_rate": 1.925806906713903e-05, + "loss": 0.3659, + "step": 5740 + }, + { + "epoch": 0.27, + "learning_rate": 1.9257285282084243e-05, + "loss": 0.4402, + "step": 5745 + }, + { + "epoch": 0.27, + "learning_rate": 1.9256501497029457e-05, + "loss": 0.7063, + "step": 5750 + }, + { + "epoch": 0.27, + "learning_rate": 1.9255717711974668e-05, + "loss": 0.3744, + "step": 5755 + }, + { + "epoch": 0.27, + "learning_rate": 1.9254933926919882e-05, + "loss": 0.1654, + "step": 5760 + }, + { + "epoch": 0.27, + "learning_rate": 1.9254150141865096e-05, + "loss": 0.1909, + "step": 5765 + }, + { + "epoch": 0.27, + "learning_rate": 1.925336635681031e-05, + "loss": 0.2026, + "step": 5770 + }, + { + "epoch": 0.27, + "learning_rate": 1.9252582571755523e-05, + "loss": 0.246, + "step": 5775 + }, + { + "epoch": 0.27, + "learning_rate": 1.9251798786700737e-05, + "loss": 0.2819, + "step": 5780 + }, + { + "epoch": 0.27, + "learning_rate": 1.925101500164595e-05, + "loss": 0.3336, + "step": 5785 + }, + { + "epoch": 0.27, + "learning_rate": 1.9250231216591165e-05, + "loss": 0.4281, + "step": 5790 + }, + { + "epoch": 0.27, + "learning_rate": 1.9249447431536376e-05, + "loss": 0.4699, + "step": 5795 + }, + { + "epoch": 0.27, + "learning_rate": 1.924866364648159e-05, + "loss": 0.7916, + "step": 5800 + }, + { + "epoch": 0.27, + "learning_rate": 1.9247879861426803e-05, + "loss": 0.3297, + "step": 5805 + }, + { + "epoch": 0.27, + "learning_rate": 1.9247096076372017e-05, + "loss": 0.1378, + "step": 5810 + }, + { + "epoch": 0.27, + "learning_rate": 1.924631229131723e-05, + "loss": 0.155, + "step": 5815 + }, + { + "epoch": 0.27, + "learning_rate": 1.9245528506262442e-05, + "loss": 0.2305, + "step": 5820 + }, + { + "epoch": 0.27, + "learning_rate": 1.924474472120766e-05, + "loss": 0.247, + "step": 5825 + }, + { + "epoch": 0.27, + "learning_rate": 1.924396093615287e-05, + "loss": 0.2662, + "step": 5830 + }, + { + "epoch": 0.27, + "learning_rate": 1.9243177151098084e-05, + "loss": 0.335, + "step": 5835 + }, + { + "epoch": 0.27, + "learning_rate": 1.9242393366043297e-05, + "loss": 0.3854, + "step": 5840 + }, + { + "epoch": 0.27, + "learning_rate": 1.924160958098851e-05, + "loss": 0.4929, + "step": 5845 + }, + { + "epoch": 0.27, + "learning_rate": 1.9240825795933725e-05, + "loss": 0.6641, + "step": 5850 + }, + { + "epoch": 0.27, + "learning_rate": 1.924004201087894e-05, + "loss": 0.3136, + "step": 5855 + }, + { + "epoch": 0.27, + "learning_rate": 1.923925822582415e-05, + "loss": 0.1783, + "step": 5860 + }, + { + "epoch": 0.27, + "learning_rate": 1.9238474440769367e-05, + "loss": 0.2408, + "step": 5865 + }, + { + "epoch": 0.27, + "learning_rate": 1.9237690655714577e-05, + "loss": 0.2645, + "step": 5870 + }, + { + "epoch": 0.27, + "learning_rate": 1.923690687065979e-05, + "loss": 0.2702, + "step": 5875 + }, + { + "epoch": 0.27, + "learning_rate": 1.9236123085605005e-05, + "loss": 0.2408, + "step": 5880 + }, + { + "epoch": 0.27, + "learning_rate": 1.923533930055022e-05, + "loss": 0.3547, + "step": 5885 + }, + { + "epoch": 0.27, + "learning_rate": 1.9234555515495433e-05, + "loss": 0.3429, + "step": 5890 + }, + { + "epoch": 0.28, + "learning_rate": 1.9233771730440644e-05, + "loss": 0.5108, + "step": 5895 + }, + { + "epoch": 0.28, + "learning_rate": 1.923298794538586e-05, + "loss": 0.9399, + "step": 5900 + }, + { + "epoch": 0.28, + "learning_rate": 1.923220416033107e-05, + "loss": 0.3939, + "step": 5905 + }, + { + "epoch": 0.28, + "learning_rate": 1.9231420375276285e-05, + "loss": 0.1552, + "step": 5910 + }, + { + "epoch": 0.28, + "learning_rate": 1.92306365902215e-05, + "loss": 0.2075, + "step": 5915 + }, + { + "epoch": 0.28, + "learning_rate": 1.9229852805166713e-05, + "loss": 0.2066, + "step": 5920 + }, + { + "epoch": 0.28, + "learning_rate": 1.9229069020111927e-05, + "loss": 0.3028, + "step": 5925 + }, + { + "epoch": 0.28, + "learning_rate": 1.922828523505714e-05, + "loss": 0.24, + "step": 5930 + }, + { + "epoch": 0.28, + "learning_rate": 1.922750145000235e-05, + "loss": 0.2975, + "step": 5935 + }, + { + "epoch": 0.28, + "learning_rate": 1.922671766494757e-05, + "loss": 0.3763, + "step": 5940 + }, + { + "epoch": 0.28, + "learning_rate": 1.922593387989278e-05, + "loss": 0.4842, + "step": 5945 + }, + { + "epoch": 0.28, + "learning_rate": 1.9225150094837993e-05, + "loss": 0.4962, + "step": 5950 + }, + { + "epoch": 0.28, + "learning_rate": 1.9224366309783207e-05, + "loss": 0.3506, + "step": 5955 + }, + { + "epoch": 0.28, + "learning_rate": 1.9223582524728418e-05, + "loss": 0.1437, + "step": 5960 + }, + { + "epoch": 0.28, + "learning_rate": 1.9222798739673635e-05, + "loss": 0.1921, + "step": 5965 + }, + { + "epoch": 0.28, + "learning_rate": 1.9222014954618845e-05, + "loss": 0.211, + "step": 5970 + }, + { + "epoch": 0.28, + "learning_rate": 1.922123116956406e-05, + "loss": 0.1829, + "step": 5975 + }, + { + "epoch": 0.28, + "learning_rate": 1.9220447384509273e-05, + "loss": 0.3135, + "step": 5980 + }, + { + "epoch": 0.28, + "learning_rate": 1.9219663599454487e-05, + "loss": 0.3635, + "step": 5985 + }, + { + "epoch": 0.28, + "learning_rate": 1.92188798143997e-05, + "loss": 0.3972, + "step": 5990 + }, + { + "epoch": 0.28, + "learning_rate": 1.9218096029344915e-05, + "loss": 0.4304, + "step": 5995 + }, + { + "epoch": 0.28, + "learning_rate": 1.921731224429013e-05, + "loss": 0.5761, + "step": 6000 + }, + { + "epoch": 0.28, + "learning_rate": 1.9216528459235343e-05, + "loss": 0.3202, + "step": 6005 + }, + { + "epoch": 0.28, + "learning_rate": 1.9215744674180553e-05, + "loss": 0.132, + "step": 6010 + }, + { + "epoch": 0.28, + "learning_rate": 1.9214960889125767e-05, + "loss": 0.248, + "step": 6015 + }, + { + "epoch": 0.28, + "learning_rate": 1.921417710407098e-05, + "loss": 0.2491, + "step": 6020 + }, + { + "epoch": 0.28, + "learning_rate": 1.9213393319016195e-05, + "loss": 0.1984, + "step": 6025 + }, + { + "epoch": 0.28, + "learning_rate": 1.921260953396141e-05, + "loss": 0.4062, + "step": 6030 + }, + { + "epoch": 0.28, + "learning_rate": 1.921182574890662e-05, + "loss": 0.381, + "step": 6035 + }, + { + "epoch": 0.28, + "learning_rate": 1.9211041963851837e-05, + "loss": 0.3894, + "step": 6040 + }, + { + "epoch": 0.28, + "learning_rate": 1.9210258178797047e-05, + "loss": 0.5295, + "step": 6045 + }, + { + "epoch": 0.28, + "learning_rate": 1.920947439374226e-05, + "loss": 0.5582, + "step": 6050 + }, + { + "epoch": 0.28, + "learning_rate": 1.9208690608687475e-05, + "loss": 0.3742, + "step": 6055 + }, + { + "epoch": 0.28, + "learning_rate": 1.920790682363269e-05, + "loss": 0.1369, + "step": 6060 + }, + { + "epoch": 0.28, + "learning_rate": 1.9207123038577903e-05, + "loss": 0.191, + "step": 6065 + }, + { + "epoch": 0.28, + "learning_rate": 1.9206339253523117e-05, + "loss": 0.2741, + "step": 6070 + }, + { + "epoch": 0.28, + "learning_rate": 1.9205555468468327e-05, + "loss": 0.2237, + "step": 6075 + }, + { + "epoch": 0.28, + "learning_rate": 1.920477168341354e-05, + "loss": 0.2747, + "step": 6080 + }, + { + "epoch": 0.28, + "learning_rate": 1.9203987898358755e-05, + "loss": 0.3203, + "step": 6085 + }, + { + "epoch": 0.28, + "learning_rate": 1.920320411330397e-05, + "loss": 0.3738, + "step": 6090 + }, + { + "epoch": 0.28, + "learning_rate": 1.9202420328249183e-05, + "loss": 0.4449, + "step": 6095 + }, + { + "epoch": 0.28, + "learning_rate": 1.9201636543194397e-05, + "loss": 0.5899, + "step": 6100 + }, + { + "epoch": 0.28, + "learning_rate": 1.920085275813961e-05, + "loss": 0.2703, + "step": 6105 + }, + { + "epoch": 0.29, + "learning_rate": 1.920006897308482e-05, + "loss": 0.2, + "step": 6110 + }, + { + "epoch": 0.29, + "learning_rate": 1.9199285188030035e-05, + "loss": 0.1694, + "step": 6115 + }, + { + "epoch": 0.29, + "learning_rate": 1.919850140297525e-05, + "loss": 0.211, + "step": 6120 + }, + { + "epoch": 0.29, + "learning_rate": 1.9197717617920463e-05, + "loss": 0.2194, + "step": 6125 + }, + { + "epoch": 0.29, + "learning_rate": 1.9196933832865677e-05, + "loss": 0.2725, + "step": 6130 + }, + { + "epoch": 0.29, + "learning_rate": 1.919615004781089e-05, + "loss": 0.3563, + "step": 6135 + }, + { + "epoch": 0.29, + "learning_rate": 1.9195366262756105e-05, + "loss": 0.3059, + "step": 6140 + }, + { + "epoch": 0.29, + "learning_rate": 1.9194582477701315e-05, + "loss": 0.4064, + "step": 6145 + }, + { + "epoch": 0.29, + "learning_rate": 1.919379869264653e-05, + "loss": 0.6607, + "step": 6150 + }, + { + "epoch": 0.29, + "learning_rate": 1.9193014907591743e-05, + "loss": 0.3117, + "step": 6155 + }, + { + "epoch": 0.29, + "learning_rate": 1.9192231122536957e-05, + "loss": 0.1045, + "step": 6160 + }, + { + "epoch": 0.29, + "learning_rate": 1.919144733748217e-05, + "loss": 0.1874, + "step": 6165 + }, + { + "epoch": 0.29, + "learning_rate": 1.9190663552427385e-05, + "loss": 0.3021, + "step": 6170 + }, + { + "epoch": 0.29, + "learning_rate": 1.9189879767372595e-05, + "loss": 0.211, + "step": 6175 + }, + { + "epoch": 0.29, + "learning_rate": 1.9189095982317813e-05, + "loss": 0.265, + "step": 6180 + }, + { + "epoch": 0.29, + "learning_rate": 1.9188312197263023e-05, + "loss": 0.2312, + "step": 6185 + }, + { + "epoch": 0.29, + "learning_rate": 1.9187528412208237e-05, + "loss": 0.3051, + "step": 6190 + }, + { + "epoch": 0.29, + "learning_rate": 1.918674462715345e-05, + "loss": 0.5762, + "step": 6195 + }, + { + "epoch": 0.29, + "learning_rate": 1.9185960842098665e-05, + "loss": 0.6396, + "step": 6200 + }, + { + "epoch": 0.29, + "learning_rate": 1.918517705704388e-05, + "loss": 0.3087, + "step": 6205 + }, + { + "epoch": 0.29, + "learning_rate": 1.918439327198909e-05, + "loss": 0.1117, + "step": 6210 + }, + { + "epoch": 0.29, + "learning_rate": 1.9183609486934306e-05, + "loss": 0.1206, + "step": 6215 + }, + { + "epoch": 0.29, + "learning_rate": 1.9182825701879517e-05, + "loss": 0.177, + "step": 6220 + }, + { + "epoch": 0.29, + "learning_rate": 1.918204191682473e-05, + "loss": 0.2447, + "step": 6225 + }, + { + "epoch": 0.29, + "learning_rate": 1.9181258131769945e-05, + "loss": 0.2349, + "step": 6230 + }, + { + "epoch": 0.29, + "learning_rate": 1.918047434671516e-05, + "loss": 0.2984, + "step": 6235 + }, + { + "epoch": 0.29, + "learning_rate": 1.9179690561660373e-05, + "loss": 0.2954, + "step": 6240 + }, + { + "epoch": 0.29, + "learning_rate": 1.9178906776605587e-05, + "loss": 0.3903, + "step": 6245 + }, + { + "epoch": 0.29, + "learning_rate": 1.9178122991550797e-05, + "loss": 0.6694, + "step": 6250 + }, + { + "epoch": 0.29, + "learning_rate": 1.9177339206496014e-05, + "loss": 0.2947, + "step": 6255 + }, + { + "epoch": 0.29, + "learning_rate": 1.9176555421441225e-05, + "loss": 0.1219, + "step": 6260 + }, + { + "epoch": 0.29, + "learning_rate": 1.917577163638644e-05, + "loss": 0.2013, + "step": 6265 + }, + { + "epoch": 0.29, + "learning_rate": 1.9174987851331653e-05, + "loss": 0.1924, + "step": 6270 + }, + { + "epoch": 0.29, + "learning_rate": 1.9174204066276863e-05, + "loss": 0.29, + "step": 6275 + }, + { + "epoch": 0.29, + "learning_rate": 1.917342028122208e-05, + "loss": 0.3052, + "step": 6280 + }, + { + "epoch": 0.29, + "learning_rate": 1.917263649616729e-05, + "loss": 0.3302, + "step": 6285 + }, + { + "epoch": 0.29, + "learning_rate": 1.9171852711112505e-05, + "loss": 0.4615, + "step": 6290 + }, + { + "epoch": 0.29, + "learning_rate": 1.917106892605772e-05, + "loss": 0.4105, + "step": 6295 + }, + { + "epoch": 0.29, + "learning_rate": 1.9170285141002933e-05, + "loss": 0.6715, + "step": 6300 + }, + { + "epoch": 0.29, + "learning_rate": 1.9169501355948147e-05, + "loss": 0.3003, + "step": 6305 + }, + { + "epoch": 0.29, + "learning_rate": 1.916871757089336e-05, + "loss": 0.2266, + "step": 6310 + }, + { + "epoch": 0.29, + "learning_rate": 1.9167933785838574e-05, + "loss": 0.1107, + "step": 6315 + }, + { + "epoch": 0.29, + "learning_rate": 1.916715000078379e-05, + "loss": 0.2179, + "step": 6320 + }, + { + "epoch": 0.3, + "learning_rate": 1.9166366215729e-05, + "loss": 0.1912, + "step": 6325 + }, + { + "epoch": 0.3, + "learning_rate": 1.9165582430674213e-05, + "loss": 0.241, + "step": 6330 + }, + { + "epoch": 0.3, + "learning_rate": 1.9164798645619427e-05, + "loss": 0.336, + "step": 6335 + }, + { + "epoch": 0.3, + "learning_rate": 1.916401486056464e-05, + "loss": 0.3382, + "step": 6340 + }, + { + "epoch": 0.3, + "learning_rate": 1.9163231075509854e-05, + "loss": 0.5897, + "step": 6345 + }, + { + "epoch": 0.3, + "learning_rate": 1.9162447290455065e-05, + "loss": 0.7494, + "step": 6350 + }, + { + "epoch": 0.3, + "learning_rate": 1.9161663505400282e-05, + "loss": 0.3499, + "step": 6355 + }, + { + "epoch": 0.3, + "learning_rate": 1.9160879720345493e-05, + "loss": 0.1544, + "step": 6360 + }, + { + "epoch": 0.3, + "learning_rate": 1.9160095935290707e-05, + "loss": 0.1487, + "step": 6365 + }, + { + "epoch": 0.3, + "learning_rate": 1.915931215023592e-05, + "loss": 0.2265, + "step": 6370 + }, + { + "epoch": 0.3, + "learning_rate": 1.9158528365181135e-05, + "loss": 0.2467, + "step": 6375 + }, + { + "epoch": 0.3, + "learning_rate": 1.915774458012635e-05, + "loss": 0.2705, + "step": 6380 + }, + { + "epoch": 0.3, + "learning_rate": 1.9156960795071562e-05, + "loss": 0.4046, + "step": 6385 + }, + { + "epoch": 0.3, + "learning_rate": 1.9156177010016773e-05, + "loss": 0.4616, + "step": 6390 + }, + { + "epoch": 0.3, + "learning_rate": 1.915539322496199e-05, + "loss": 0.5142, + "step": 6395 + }, + { + "epoch": 0.3, + "learning_rate": 1.91546094399072e-05, + "loss": 0.6709, + "step": 6400 + }, + { + "epoch": 0.3, + "learning_rate": 1.9153825654852415e-05, + "loss": 0.3144, + "step": 6405 + }, + { + "epoch": 0.3, + "learning_rate": 1.915304186979763e-05, + "loss": 0.1743, + "step": 6410 + }, + { + "epoch": 0.3, + "learning_rate": 1.9152258084742842e-05, + "loss": 0.2046, + "step": 6415 + }, + { + "epoch": 0.3, + "learning_rate": 1.9151474299688056e-05, + "loss": 0.1748, + "step": 6420 + }, + { + "epoch": 0.3, + "learning_rate": 1.9150690514633267e-05, + "loss": 0.347, + "step": 6425 + }, + { + "epoch": 0.3, + "learning_rate": 1.9149906729578484e-05, + "loss": 0.4057, + "step": 6430 + }, + { + "epoch": 0.3, + "learning_rate": 1.9149122944523695e-05, + "loss": 0.3375, + "step": 6435 + }, + { + "epoch": 0.3, + "learning_rate": 1.914833915946891e-05, + "loss": 0.3648, + "step": 6440 + }, + { + "epoch": 0.3, + "learning_rate": 1.9147555374414122e-05, + "loss": 0.3829, + "step": 6445 + }, + { + "epoch": 0.3, + "learning_rate": 1.9146771589359336e-05, + "loss": 0.5695, + "step": 6450 + }, + { + "epoch": 0.3, + "learning_rate": 1.914598780430455e-05, + "loss": 0.238, + "step": 6455 + }, + { + "epoch": 0.3, + "learning_rate": 1.9145204019249764e-05, + "loss": 0.1316, + "step": 6460 + }, + { + "epoch": 0.3, + "learning_rate": 1.9144420234194975e-05, + "loss": 0.2054, + "step": 6465 + }, + { + "epoch": 0.3, + "learning_rate": 1.914363644914019e-05, + "loss": 0.2046, + "step": 6470 + }, + { + "epoch": 0.3, + "learning_rate": 1.9142852664085402e-05, + "loss": 0.2385, + "step": 6475 + }, + { + "epoch": 0.3, + "learning_rate": 1.9142068879030616e-05, + "loss": 0.2931, + "step": 6480 + }, + { + "epoch": 0.3, + "learning_rate": 1.914128509397583e-05, + "loss": 0.3218, + "step": 6485 + }, + { + "epoch": 0.3, + "learning_rate": 1.914050130892104e-05, + "loss": 0.3059, + "step": 6490 + }, + { + "epoch": 0.3, + "learning_rate": 1.9139717523866258e-05, + "loss": 0.4008, + "step": 6495 + }, + { + "epoch": 0.3, + "learning_rate": 1.913893373881147e-05, + "loss": 0.6874, + "step": 6500 + }, + { + "epoch": 0.3, + "learning_rate": 1.9138149953756683e-05, + "loss": 0.3523, + "step": 6505 + }, + { + "epoch": 0.3, + "learning_rate": 1.9137366168701896e-05, + "loss": 0.1623, + "step": 6510 + }, + { + "epoch": 0.3, + "learning_rate": 1.913658238364711e-05, + "loss": 0.1548, + "step": 6515 + }, + { + "epoch": 0.3, + "learning_rate": 1.9135798598592324e-05, + "loss": 0.2571, + "step": 6520 + }, + { + "epoch": 0.3, + "learning_rate": 1.9135014813537538e-05, + "loss": 0.2677, + "step": 6525 + }, + { + "epoch": 0.3, + "learning_rate": 1.9134231028482752e-05, + "loss": 0.2672, + "step": 6530 + }, + { + "epoch": 0.3, + "learning_rate": 1.9133447243427963e-05, + "loss": 0.1512, + "step": 6535 + }, + { + "epoch": 0.31, + "learning_rate": 1.9132663458373176e-05, + "loss": 0.434, + "step": 6540 + }, + { + "epoch": 0.31, + "learning_rate": 1.913187967331839e-05, + "loss": 0.4751, + "step": 6545 + }, + { + "epoch": 0.31, + "learning_rate": 1.9131095888263604e-05, + "loss": 0.9035, + "step": 6550 + }, + { + "epoch": 0.31, + "learning_rate": 1.9130312103208818e-05, + "loss": 0.2479, + "step": 6555 + }, + { + "epoch": 0.31, + "learning_rate": 1.9129528318154032e-05, + "loss": 0.1588, + "step": 6560 + }, + { + "epoch": 0.31, + "learning_rate": 1.9128744533099243e-05, + "loss": 0.1603, + "step": 6565 + }, + { + "epoch": 0.31, + "learning_rate": 1.912796074804446e-05, + "loss": 0.2272, + "step": 6570 + }, + { + "epoch": 0.31, + "learning_rate": 1.912717696298967e-05, + "loss": 0.2211, + "step": 6575 + }, + { + "epoch": 0.31, + "learning_rate": 1.9126393177934884e-05, + "loss": 0.2354, + "step": 6580 + }, + { + "epoch": 0.31, + "learning_rate": 1.9125609392880098e-05, + "loss": 0.3462, + "step": 6585 + }, + { + "epoch": 0.31, + "learning_rate": 1.9124825607825312e-05, + "loss": 0.3478, + "step": 6590 + }, + { + "epoch": 0.31, + "learning_rate": 1.9124041822770526e-05, + "loss": 0.434, + "step": 6595 + }, + { + "epoch": 0.31, + "learning_rate": 1.9123258037715737e-05, + "loss": 0.6833, + "step": 6600 + }, + { + "epoch": 0.31, + "learning_rate": 1.912247425266095e-05, + "loss": 0.3255, + "step": 6605 + }, + { + "epoch": 0.31, + "learning_rate": 1.9121690467606164e-05, + "loss": 0.2105, + "step": 6610 + }, + { + "epoch": 0.31, + "learning_rate": 1.9120906682551378e-05, + "loss": 0.248, + "step": 6615 + }, + { + "epoch": 0.31, + "learning_rate": 1.9120122897496592e-05, + "loss": 0.2495, + "step": 6620 + }, + { + "epoch": 0.31, + "learning_rate": 1.9119339112441806e-05, + "loss": 0.2071, + "step": 6625 + }, + { + "epoch": 0.31, + "learning_rate": 1.911855532738702e-05, + "loss": 0.204, + "step": 6630 + }, + { + "epoch": 0.31, + "learning_rate": 1.9117771542332234e-05, + "loss": 0.4248, + "step": 6635 + }, + { + "epoch": 0.31, + "learning_rate": 1.9116987757277444e-05, + "loss": 0.3261, + "step": 6640 + }, + { + "epoch": 0.31, + "learning_rate": 1.911620397222266e-05, + "loss": 0.425, + "step": 6645 + }, + { + "epoch": 0.31, + "learning_rate": 1.9115420187167872e-05, + "loss": 0.658, + "step": 6650 + }, + { + "epoch": 0.31, + "learning_rate": 1.9114636402113086e-05, + "loss": 0.2923, + "step": 6655 + }, + { + "epoch": 0.31, + "learning_rate": 1.91138526170583e-05, + "loss": 0.1128, + "step": 6660 + }, + { + "epoch": 0.31, + "learning_rate": 1.911306883200351e-05, + "loss": 0.1456, + "step": 6665 + }, + { + "epoch": 0.31, + "learning_rate": 1.9112285046948728e-05, + "loss": 0.2254, + "step": 6670 + }, + { + "epoch": 0.31, + "learning_rate": 1.911150126189394e-05, + "loss": 0.2541, + "step": 6675 + }, + { + "epoch": 0.31, + "learning_rate": 1.9110717476839152e-05, + "loss": 0.3436, + "step": 6680 + }, + { + "epoch": 0.31, + "learning_rate": 1.9109933691784366e-05, + "loss": 0.3193, + "step": 6685 + }, + { + "epoch": 0.31, + "learning_rate": 1.910914990672958e-05, + "loss": 0.309, + "step": 6690 + }, + { + "epoch": 0.31, + "learning_rate": 1.9108366121674794e-05, + "loss": 0.504, + "step": 6695 + }, + { + "epoch": 0.31, + "learning_rate": 1.9107582336620008e-05, + "loss": 0.6894, + "step": 6700 + }, + { + "epoch": 0.31, + "learning_rate": 1.910679855156522e-05, + "loss": 0.3584, + "step": 6705 + }, + { + "epoch": 0.31, + "learning_rate": 1.9106014766510436e-05, + "loss": 0.1103, + "step": 6710 + }, + { + "epoch": 0.31, + "learning_rate": 1.9105230981455646e-05, + "loss": 0.2487, + "step": 6715 + }, + { + "epoch": 0.31, + "learning_rate": 1.910444719640086e-05, + "loss": 0.1941, + "step": 6720 + }, + { + "epoch": 0.31, + "learning_rate": 1.9103663411346074e-05, + "loss": 0.2426, + "step": 6725 + }, + { + "epoch": 0.31, + "learning_rate": 1.9102879626291288e-05, + "loss": 0.2712, + "step": 6730 + }, + { + "epoch": 0.31, + "learning_rate": 1.9102095841236502e-05, + "loss": 0.3572, + "step": 6735 + }, + { + "epoch": 0.31, + "learning_rate": 1.9101312056181712e-05, + "loss": 0.4285, + "step": 6740 + }, + { + "epoch": 0.31, + "learning_rate": 1.910052827112693e-05, + "loss": 0.5303, + "step": 6745 + }, + { + "epoch": 0.31, + "learning_rate": 1.909974448607214e-05, + "loss": 0.4896, + "step": 6750 + }, + { + "epoch": 0.32, + "learning_rate": 1.9098960701017354e-05, + "loss": 0.2817, + "step": 6755 + }, + { + "epoch": 0.32, + "learning_rate": 1.9098176915962568e-05, + "loss": 0.1629, + "step": 6760 + }, + { + "epoch": 0.32, + "learning_rate": 1.9097393130907782e-05, + "loss": 0.2024, + "step": 6765 + }, + { + "epoch": 0.32, + "learning_rate": 1.9096609345852996e-05, + "loss": 0.2163, + "step": 6770 + }, + { + "epoch": 0.32, + "learning_rate": 1.909582556079821e-05, + "loss": 0.2022, + "step": 6775 + }, + { + "epoch": 0.32, + "learning_rate": 1.909504177574342e-05, + "loss": 0.3167, + "step": 6780 + }, + { + "epoch": 0.32, + "learning_rate": 1.9094257990688638e-05, + "loss": 0.4345, + "step": 6785 + }, + { + "epoch": 0.32, + "learning_rate": 1.9093474205633848e-05, + "loss": 0.3893, + "step": 6790 + }, + { + "epoch": 0.32, + "learning_rate": 1.9092690420579062e-05, + "loss": 0.3411, + "step": 6795 + }, + { + "epoch": 0.32, + "learning_rate": 1.9091906635524276e-05, + "loss": 0.6765, + "step": 6800 + }, + { + "epoch": 0.32, + "learning_rate": 1.9091122850469486e-05, + "loss": 0.3742, + "step": 6805 + }, + { + "epoch": 0.32, + "learning_rate": 1.9090339065414704e-05, + "loss": 0.1481, + "step": 6810 + }, + { + "epoch": 0.32, + "learning_rate": 1.9089555280359914e-05, + "loss": 0.1375, + "step": 6815 + }, + { + "epoch": 0.32, + "learning_rate": 1.9088771495305128e-05, + "loss": 0.2551, + "step": 6820 + }, + { + "epoch": 0.32, + "learning_rate": 1.9087987710250342e-05, + "loss": 0.2335, + "step": 6825 + }, + { + "epoch": 0.32, + "learning_rate": 1.9087203925195556e-05, + "loss": 0.2022, + "step": 6830 + }, + { + "epoch": 0.32, + "learning_rate": 1.908642014014077e-05, + "loss": 0.272, + "step": 6835 + }, + { + "epoch": 0.32, + "learning_rate": 1.9085636355085984e-05, + "loss": 0.3548, + "step": 6840 + }, + { + "epoch": 0.32, + "learning_rate": 1.9084852570031198e-05, + "loss": 0.4034, + "step": 6845 + }, + { + "epoch": 0.32, + "learning_rate": 1.908406878497641e-05, + "loss": 0.7186, + "step": 6850 + }, + { + "epoch": 0.32, + "learning_rate": 1.9083284999921622e-05, + "loss": 0.3299, + "step": 6855 + }, + { + "epoch": 0.32, + "learning_rate": 1.9082501214866836e-05, + "loss": 0.1502, + "step": 6860 + }, + { + "epoch": 0.32, + "learning_rate": 1.908171742981205e-05, + "loss": 0.1961, + "step": 6865 + }, + { + "epoch": 0.32, + "learning_rate": 1.9080933644757264e-05, + "loss": 0.1746, + "step": 6870 + }, + { + "epoch": 0.32, + "learning_rate": 1.9080149859702478e-05, + "loss": 0.2177, + "step": 6875 + }, + { + "epoch": 0.32, + "learning_rate": 1.9079366074647688e-05, + "loss": 0.2479, + "step": 6880 + }, + { + "epoch": 0.32, + "learning_rate": 1.9078582289592905e-05, + "loss": 0.3497, + "step": 6885 + }, + { + "epoch": 0.32, + "learning_rate": 1.9077798504538116e-05, + "loss": 0.2857, + "step": 6890 + }, + { + "epoch": 0.32, + "learning_rate": 1.907701471948333e-05, + "loss": 0.4186, + "step": 6895 + }, + { + "epoch": 0.32, + "learning_rate": 1.9076230934428544e-05, + "loss": 0.5697, + "step": 6900 + }, + { + "epoch": 0.32, + "learning_rate": 1.9075447149373758e-05, + "loss": 0.2705, + "step": 6905 + }, + { + "epoch": 0.32, + "learning_rate": 1.907466336431897e-05, + "loss": 0.1694, + "step": 6910 + }, + { + "epoch": 0.32, + "learning_rate": 1.9073879579264186e-05, + "loss": 0.2024, + "step": 6915 + }, + { + "epoch": 0.32, + "learning_rate": 1.9073095794209396e-05, + "loss": 0.1911, + "step": 6920 + }, + { + "epoch": 0.32, + "learning_rate": 1.907231200915461e-05, + "loss": 0.324, + "step": 6925 + }, + { + "epoch": 0.32, + "learning_rate": 1.9071528224099824e-05, + "loss": 0.2166, + "step": 6930 + }, + { + "epoch": 0.32, + "learning_rate": 1.9070744439045038e-05, + "loss": 0.2728, + "step": 6935 + }, + { + "epoch": 0.32, + "learning_rate": 1.906996065399025e-05, + "loss": 0.4889, + "step": 6940 + }, + { + "epoch": 0.32, + "learning_rate": 1.9069176868935466e-05, + "loss": 0.3559, + "step": 6945 + }, + { + "epoch": 0.32, + "learning_rate": 1.906839308388068e-05, + "loss": 0.6589, + "step": 6950 + }, + { + "epoch": 0.32, + "learning_rate": 1.906760929882589e-05, + "loss": 0.3064, + "step": 6955 + }, + { + "epoch": 0.32, + "learning_rate": 1.9066825513771104e-05, + "loss": 0.1575, + "step": 6960 + }, + { + "epoch": 0.32, + "learning_rate": 1.9066041728716318e-05, + "loss": 0.2603, + "step": 6965 + }, + { + "epoch": 0.33, + "learning_rate": 1.9065257943661532e-05, + "loss": 0.2049, + "step": 6970 + }, + { + "epoch": 0.33, + "learning_rate": 1.9064474158606746e-05, + "loss": 0.1928, + "step": 6975 + }, + { + "epoch": 0.33, + "learning_rate": 1.906369037355196e-05, + "loss": 0.2743, + "step": 6980 + }, + { + "epoch": 0.33, + "learning_rate": 1.9062906588497173e-05, + "loss": 0.3078, + "step": 6985 + }, + { + "epoch": 0.33, + "learning_rate": 1.9062122803442384e-05, + "loss": 0.3558, + "step": 6990 + }, + { + "epoch": 0.33, + "learning_rate": 1.9061339018387598e-05, + "loss": 0.5479, + "step": 6995 + }, + { + "epoch": 0.33, + "learning_rate": 1.9060555233332812e-05, + "loss": 0.7941, + "step": 7000 + }, + { + "epoch": 0.33, + "learning_rate": 1.9059771448278026e-05, + "loss": 0.2665, + "step": 7005 + }, + { + "epoch": 0.33, + "learning_rate": 1.905898766322324e-05, + "loss": 0.1489, + "step": 7010 + }, + { + "epoch": 0.33, + "learning_rate": 1.9058203878168453e-05, + "loss": 0.1762, + "step": 7015 + }, + { + "epoch": 0.33, + "learning_rate": 1.9057420093113664e-05, + "loss": 0.2242, + "step": 7020 + }, + { + "epoch": 0.33, + "learning_rate": 1.905663630805888e-05, + "loss": 0.2587, + "step": 7025 + }, + { + "epoch": 0.33, + "learning_rate": 1.9055852523004092e-05, + "loss": 0.2101, + "step": 7030 + }, + { + "epoch": 0.33, + "learning_rate": 1.9055068737949306e-05, + "loss": 0.2533, + "step": 7035 + }, + { + "epoch": 0.33, + "learning_rate": 1.905428495289452e-05, + "loss": 0.4377, + "step": 7040 + }, + { + "epoch": 0.33, + "learning_rate": 1.9053501167839734e-05, + "loss": 0.441, + "step": 7045 + }, + { + "epoch": 0.33, + "learning_rate": 1.9052717382784947e-05, + "loss": 0.7094, + "step": 7050 + }, + { + "epoch": 0.33, + "learning_rate": 1.9051933597730158e-05, + "loss": 0.1968, + "step": 7055 + }, + { + "epoch": 0.33, + "learning_rate": 1.9051149812675375e-05, + "loss": 0.1635, + "step": 7060 + }, + { + "epoch": 0.33, + "learning_rate": 1.9050366027620586e-05, + "loss": 0.1534, + "step": 7065 + }, + { + "epoch": 0.33, + "learning_rate": 1.90495822425658e-05, + "loss": 0.1294, + "step": 7070 + }, + { + "epoch": 0.33, + "learning_rate": 1.9048798457511014e-05, + "loss": 0.1829, + "step": 7075 + }, + { + "epoch": 0.33, + "learning_rate": 1.9048014672456227e-05, + "loss": 0.172, + "step": 7080 + }, + { + "epoch": 0.33, + "learning_rate": 1.904723088740144e-05, + "loss": 0.2696, + "step": 7085 + }, + { + "epoch": 0.33, + "learning_rate": 1.9046447102346655e-05, + "loss": 0.3744, + "step": 7090 + }, + { + "epoch": 0.33, + "learning_rate": 1.9045663317291866e-05, + "loss": 0.4525, + "step": 7095 + }, + { + "epoch": 0.33, + "learning_rate": 1.9044879532237083e-05, + "loss": 0.5999, + "step": 7100 + }, + { + "epoch": 0.33, + "learning_rate": 1.9044095747182294e-05, + "loss": 0.2546, + "step": 7105 + }, + { + "epoch": 0.33, + "learning_rate": 1.9043311962127508e-05, + "loss": 0.1782, + "step": 7110 + }, + { + "epoch": 0.33, + "learning_rate": 1.904252817707272e-05, + "loss": 0.217, + "step": 7115 + }, + { + "epoch": 0.33, + "learning_rate": 1.9041744392017932e-05, + "loss": 0.1918, + "step": 7120 + }, + { + "epoch": 0.33, + "learning_rate": 1.904096060696315e-05, + "loss": 0.2431, + "step": 7125 + }, + { + "epoch": 0.33, + "learning_rate": 1.904017682190836e-05, + "loss": 0.3174, + "step": 7130 + }, + { + "epoch": 0.33, + "learning_rate": 1.9039393036853574e-05, + "loss": 0.3594, + "step": 7135 + }, + { + "epoch": 0.33, + "learning_rate": 1.9038609251798788e-05, + "loss": 0.3965, + "step": 7140 + }, + { + "epoch": 0.33, + "learning_rate": 1.9037825466744e-05, + "loss": 0.2913, + "step": 7145 + }, + { + "epoch": 0.33, + "learning_rate": 1.9037041681689215e-05, + "loss": 0.7954, + "step": 7150 + }, + { + "epoch": 0.33, + "learning_rate": 1.903625789663443e-05, + "loss": 0.3234, + "step": 7155 + }, + { + "epoch": 0.33, + "learning_rate": 1.9035474111579643e-05, + "loss": 0.1657, + "step": 7160 + }, + { + "epoch": 0.33, + "learning_rate": 1.9034690326524857e-05, + "loss": 0.0944, + "step": 7165 + }, + { + "epoch": 0.33, + "learning_rate": 1.9033906541470068e-05, + "loss": 0.2358, + "step": 7170 + }, + { + "epoch": 0.33, + "learning_rate": 1.903312275641528e-05, + "loss": 0.2233, + "step": 7175 + }, + { + "epoch": 0.34, + "learning_rate": 1.9032338971360495e-05, + "loss": 0.2742, + "step": 7180 + }, + { + "epoch": 0.34, + "learning_rate": 1.903155518630571e-05, + "loss": 0.3522, + "step": 7185 + }, + { + "epoch": 0.34, + "learning_rate": 1.9030771401250923e-05, + "loss": 0.2871, + "step": 7190 + }, + { + "epoch": 0.34, + "learning_rate": 1.9029987616196134e-05, + "loss": 0.5827, + "step": 7195 + }, + { + "epoch": 0.34, + "learning_rate": 1.902920383114135e-05, + "loss": 0.9008, + "step": 7200 + }, + { + "epoch": 0.34, + "learning_rate": 1.902842004608656e-05, + "loss": 0.3112, + "step": 7205 + }, + { + "epoch": 0.34, + "learning_rate": 1.9027636261031775e-05, + "loss": 0.1972, + "step": 7210 + }, + { + "epoch": 0.34, + "learning_rate": 1.902685247597699e-05, + "loss": 0.2397, + "step": 7215 + }, + { + "epoch": 0.34, + "learning_rate": 1.9026068690922203e-05, + "loss": 0.1644, + "step": 7220 + }, + { + "epoch": 0.34, + "learning_rate": 1.9025284905867417e-05, + "loss": 0.329, + "step": 7225 + }, + { + "epoch": 0.34, + "learning_rate": 1.902450112081263e-05, + "loss": 0.2891, + "step": 7230 + }, + { + "epoch": 0.34, + "learning_rate": 1.902371733575784e-05, + "loss": 0.3564, + "step": 7235 + }, + { + "epoch": 0.34, + "learning_rate": 1.902293355070306e-05, + "loss": 0.3063, + "step": 7240 + }, + { + "epoch": 0.34, + "learning_rate": 1.902214976564827e-05, + "loss": 0.481, + "step": 7245 + }, + { + "epoch": 0.34, + "learning_rate": 1.9021365980593483e-05, + "loss": 0.7006, + "step": 7250 + }, + { + "epoch": 0.34, + "learning_rate": 1.9020582195538697e-05, + "loss": 0.3454, + "step": 7255 + }, + { + "epoch": 0.34, + "learning_rate": 1.901979841048391e-05, + "loss": 0.0799, + "step": 7260 + }, + { + "epoch": 0.34, + "learning_rate": 1.9019014625429125e-05, + "loss": 0.1361, + "step": 7265 + }, + { + "epoch": 0.34, + "learning_rate": 1.9018230840374336e-05, + "loss": 0.2282, + "step": 7270 + }, + { + "epoch": 0.34, + "learning_rate": 1.901744705531955e-05, + "loss": 0.2861, + "step": 7275 + }, + { + "epoch": 0.34, + "learning_rate": 1.9016663270264763e-05, + "loss": 0.3123, + "step": 7280 + }, + { + "epoch": 0.34, + "learning_rate": 1.9015879485209977e-05, + "loss": 0.2939, + "step": 7285 + }, + { + "epoch": 0.34, + "learning_rate": 1.901509570015519e-05, + "loss": 0.368, + "step": 7290 + }, + { + "epoch": 0.34, + "learning_rate": 1.9014311915100405e-05, + "loss": 0.2656, + "step": 7295 + }, + { + "epoch": 0.34, + "learning_rate": 1.901352813004562e-05, + "loss": 0.7157, + "step": 7300 + }, + { + "epoch": 0.34, + "learning_rate": 1.9012744344990833e-05, + "loss": 0.297, + "step": 7305 + }, + { + "epoch": 0.34, + "learning_rate": 1.9011960559936043e-05, + "loss": 0.1168, + "step": 7310 + }, + { + "epoch": 0.34, + "learning_rate": 1.9011176774881257e-05, + "loss": 0.1664, + "step": 7315 + }, + { + "epoch": 0.34, + "learning_rate": 1.901039298982647e-05, + "loss": 0.1445, + "step": 7320 + }, + { + "epoch": 0.34, + "learning_rate": 1.9009609204771685e-05, + "loss": 0.2953, + "step": 7325 + }, + { + "epoch": 0.34, + "learning_rate": 1.90088254197169e-05, + "loss": 0.1705, + "step": 7330 + }, + { + "epoch": 0.34, + "learning_rate": 1.900804163466211e-05, + "loss": 0.3413, + "step": 7335 + }, + { + "epoch": 0.34, + "learning_rate": 1.9007257849607327e-05, + "loss": 0.3309, + "step": 7340 + }, + { + "epoch": 0.34, + "learning_rate": 1.9006474064552537e-05, + "loss": 0.4903, + "step": 7345 + }, + { + "epoch": 0.34, + "learning_rate": 1.900569027949775e-05, + "loss": 0.6247, + "step": 7350 + }, + { + "epoch": 0.34, + "learning_rate": 1.9004906494442965e-05, + "loss": 0.299, + "step": 7355 + }, + { + "epoch": 0.34, + "learning_rate": 1.900412270938818e-05, + "loss": 0.1186, + "step": 7360 + }, + { + "epoch": 0.34, + "learning_rate": 1.9003338924333393e-05, + "loss": 0.151, + "step": 7365 + }, + { + "epoch": 0.34, + "learning_rate": 1.9002555139278607e-05, + "loss": 0.1634, + "step": 7370 + }, + { + "epoch": 0.34, + "learning_rate": 1.900177135422382e-05, + "loss": 0.2192, + "step": 7375 + }, + { + "epoch": 0.34, + "learning_rate": 1.900098756916903e-05, + "loss": 0.3089, + "step": 7380 + }, + { + "epoch": 0.34, + "learning_rate": 1.9000203784114245e-05, + "loss": 0.2922, + "step": 7385 + }, + { + "epoch": 0.34, + "learning_rate": 1.899941999905946e-05, + "loss": 0.2575, + "step": 7390 + }, + { + "epoch": 0.35, + "learning_rate": 1.8998636214004673e-05, + "loss": 0.5155, + "step": 7395 + }, + { + "epoch": 0.35, + "learning_rate": 1.8997852428949887e-05, + "loss": 0.8029, + "step": 7400 + }, + { + "epoch": 0.35, + "learning_rate": 1.89970686438951e-05, + "loss": 0.3019, + "step": 7405 + }, + { + "epoch": 0.35, + "learning_rate": 1.899628485884031e-05, + "loss": 0.1694, + "step": 7410 + }, + { + "epoch": 0.35, + "learning_rate": 1.899550107378553e-05, + "loss": 0.1644, + "step": 7415 + }, + { + "epoch": 0.35, + "learning_rate": 1.899471728873074e-05, + "loss": 0.2562, + "step": 7420 + }, + { + "epoch": 0.35, + "learning_rate": 1.8993933503675953e-05, + "loss": 0.2024, + "step": 7425 + }, + { + "epoch": 0.35, + "learning_rate": 1.8993149718621167e-05, + "loss": 0.2765, + "step": 7430 + }, + { + "epoch": 0.35, + "learning_rate": 1.899236593356638e-05, + "loss": 0.3152, + "step": 7435 + }, + { + "epoch": 0.35, + "learning_rate": 1.8991582148511595e-05, + "loss": 0.7121, + "step": 7440 + }, + { + "epoch": 0.35, + "learning_rate": 1.8990798363456805e-05, + "loss": 0.4815, + "step": 7445 + }, + { + "epoch": 0.35, + "learning_rate": 1.899001457840202e-05, + "loss": 0.596, + "step": 7450 + }, + { + "epoch": 0.35, + "learning_rate": 1.8989230793347233e-05, + "loss": 0.2899, + "step": 7455 + }, + { + "epoch": 0.35, + "learning_rate": 1.8988447008292447e-05, + "loss": 0.1684, + "step": 7460 + }, + { + "epoch": 0.35, + "learning_rate": 1.898766322323766e-05, + "loss": 0.1769, + "step": 7465 + }, + { + "epoch": 0.35, + "learning_rate": 1.8986879438182875e-05, + "loss": 0.1686, + "step": 7470 + }, + { + "epoch": 0.35, + "learning_rate": 1.898609565312809e-05, + "loss": 0.1521, + "step": 7475 + }, + { + "epoch": 0.35, + "learning_rate": 1.8985311868073303e-05, + "loss": 0.213, + "step": 7480 + }, + { + "epoch": 0.35, + "learning_rate": 1.8984528083018513e-05, + "loss": 0.3328, + "step": 7485 + }, + { + "epoch": 0.35, + "learning_rate": 1.8983744297963727e-05, + "loss": 0.3086, + "step": 7490 + }, + { + "epoch": 0.35, + "learning_rate": 1.898296051290894e-05, + "loss": 0.3979, + "step": 7495 + }, + { + "epoch": 0.35, + "learning_rate": 1.8982176727854155e-05, + "loss": 0.9146, + "step": 7500 + }, + { + "epoch": 0.35, + "learning_rate": 1.898139294279937e-05, + "loss": 0.2771, + "step": 7505 + }, + { + "epoch": 0.35, + "learning_rate": 1.898060915774458e-05, + "loss": 0.1257, + "step": 7510 + }, + { + "epoch": 0.35, + "learning_rate": 1.8979825372689797e-05, + "loss": 0.2241, + "step": 7515 + }, + { + "epoch": 0.35, + "learning_rate": 1.8979041587635007e-05, + "loss": 0.2393, + "step": 7520 + }, + { + "epoch": 0.35, + "learning_rate": 1.897825780258022e-05, + "loss": 0.172, + "step": 7525 + }, + { + "epoch": 0.35, + "learning_rate": 1.8977474017525435e-05, + "loss": 0.2781, + "step": 7530 + }, + { + "epoch": 0.35, + "learning_rate": 1.897669023247065e-05, + "loss": 0.3206, + "step": 7535 + }, + { + "epoch": 0.35, + "learning_rate": 1.8975906447415863e-05, + "loss": 0.3699, + "step": 7540 + }, + { + "epoch": 0.35, + "learning_rate": 1.8975122662361077e-05, + "loss": 0.3469, + "step": 7545 + }, + { + "epoch": 0.35, + "learning_rate": 1.8974338877306287e-05, + "loss": 0.8029, + "step": 7550 + }, + { + "epoch": 0.35, + "learning_rate": 1.8973555092251504e-05, + "loss": 0.312, + "step": 7555 + }, + { + "epoch": 0.35, + "learning_rate": 1.8972771307196715e-05, + "loss": 0.1367, + "step": 7560 + }, + { + "epoch": 0.35, + "learning_rate": 1.897198752214193e-05, + "loss": 0.0884, + "step": 7565 + }, + { + "epoch": 0.35, + "learning_rate": 1.8971203737087143e-05, + "loss": 0.2232, + "step": 7570 + }, + { + "epoch": 0.35, + "learning_rate": 1.8970419952032357e-05, + "loss": 0.1861, + "step": 7575 + }, + { + "epoch": 0.35, + "learning_rate": 1.896963616697757e-05, + "loss": 0.2897, + "step": 7580 + }, + { + "epoch": 0.35, + "learning_rate": 1.896885238192278e-05, + "loss": 0.3206, + "step": 7585 + }, + { + "epoch": 0.35, + "learning_rate": 1.8968068596868e-05, + "loss": 0.3773, + "step": 7590 + }, + { + "epoch": 0.35, + "learning_rate": 1.896728481181321e-05, + "loss": 0.3115, + "step": 7595 + }, + { + "epoch": 0.35, + "learning_rate": 1.8966501026758423e-05, + "loss": 0.9366, + "step": 7600 + }, + { + "epoch": 0.35, + "learning_rate": 1.8965717241703637e-05, + "loss": 0.2707, + "step": 7605 + }, + { + "epoch": 0.36, + "learning_rate": 1.896493345664885e-05, + "loss": 0.1204, + "step": 7610 + }, + { + "epoch": 0.36, + "learning_rate": 1.8964149671594065e-05, + "loss": 0.1409, + "step": 7615 + }, + { + "epoch": 0.36, + "learning_rate": 1.896336588653928e-05, + "loss": 0.1973, + "step": 7620 + }, + { + "epoch": 0.36, + "learning_rate": 1.896258210148449e-05, + "loss": 0.1588, + "step": 7625 + }, + { + "epoch": 0.36, + "learning_rate": 1.8961798316429706e-05, + "loss": 0.3012, + "step": 7630 + }, + { + "epoch": 0.36, + "learning_rate": 1.8961014531374917e-05, + "loss": 0.3534, + "step": 7635 + }, + { + "epoch": 0.36, + "learning_rate": 1.896023074632013e-05, + "loss": 0.4233, + "step": 7640 + }, + { + "epoch": 0.36, + "learning_rate": 1.8959446961265345e-05, + "loss": 0.3975, + "step": 7645 + }, + { + "epoch": 0.36, + "learning_rate": 1.8958663176210555e-05, + "loss": 0.4849, + "step": 7650 + }, + { + "epoch": 0.36, + "learning_rate": 1.8957879391155772e-05, + "loss": 0.3376, + "step": 7655 + }, + { + "epoch": 0.36, + "learning_rate": 1.8957095606100983e-05, + "loss": 0.1438, + "step": 7660 + }, + { + "epoch": 0.36, + "learning_rate": 1.8956311821046197e-05, + "loss": 0.1841, + "step": 7665 + }, + { + "epoch": 0.36, + "learning_rate": 1.895552803599141e-05, + "loss": 0.1959, + "step": 7670 + }, + { + "epoch": 0.36, + "learning_rate": 1.8954744250936625e-05, + "loss": 0.2594, + "step": 7675 + }, + { + "epoch": 0.36, + "learning_rate": 1.895396046588184e-05, + "loss": 0.3133, + "step": 7680 + }, + { + "epoch": 0.36, + "learning_rate": 1.8953176680827052e-05, + "loss": 0.3055, + "step": 7685 + }, + { + "epoch": 0.36, + "learning_rate": 1.8952392895772266e-05, + "loss": 0.3877, + "step": 7690 + }, + { + "epoch": 0.36, + "learning_rate": 1.895160911071748e-05, + "loss": 0.3368, + "step": 7695 + }, + { + "epoch": 0.36, + "learning_rate": 1.895082532566269e-05, + "loss": 0.6805, + "step": 7700 + }, + { + "epoch": 0.36, + "learning_rate": 1.8950041540607905e-05, + "loss": 0.3995, + "step": 7705 + }, + { + "epoch": 0.36, + "learning_rate": 1.894925775555312e-05, + "loss": 0.1304, + "step": 7710 + }, + { + "epoch": 0.36, + "learning_rate": 1.8948473970498333e-05, + "loss": 0.1476, + "step": 7715 + }, + { + "epoch": 0.36, + "learning_rate": 1.8947690185443546e-05, + "loss": 0.1871, + "step": 7720 + }, + { + "epoch": 0.36, + "learning_rate": 1.8946906400388757e-05, + "loss": 0.2658, + "step": 7725 + }, + { + "epoch": 0.36, + "learning_rate": 1.8946122615333974e-05, + "loss": 0.2439, + "step": 7730 + }, + { + "epoch": 0.36, + "learning_rate": 1.8945338830279185e-05, + "loss": 0.3401, + "step": 7735 + }, + { + "epoch": 0.36, + "learning_rate": 1.89445550452244e-05, + "loss": 0.2806, + "step": 7740 + }, + { + "epoch": 0.36, + "learning_rate": 1.8943771260169613e-05, + "loss": 0.4399, + "step": 7745 + }, + { + "epoch": 0.36, + "learning_rate": 1.8942987475114826e-05, + "loss": 0.6512, + "step": 7750 + }, + { + "epoch": 0.36, + "learning_rate": 1.894220369006004e-05, + "loss": 0.2376, + "step": 7755 + }, + { + "epoch": 0.36, + "learning_rate": 1.8941419905005254e-05, + "loss": 0.1043, + "step": 7760 + }, + { + "epoch": 0.36, + "learning_rate": 1.8940636119950465e-05, + "loss": 0.1589, + "step": 7765 + }, + { + "epoch": 0.36, + "learning_rate": 1.893985233489568e-05, + "loss": 0.1373, + "step": 7770 + }, + { + "epoch": 0.36, + "learning_rate": 1.8939068549840893e-05, + "loss": 0.2714, + "step": 7775 + }, + { + "epoch": 0.36, + "learning_rate": 1.8938284764786107e-05, + "loss": 0.2307, + "step": 7780 + }, + { + "epoch": 0.36, + "learning_rate": 1.893750097973132e-05, + "loss": 0.2299, + "step": 7785 + }, + { + "epoch": 0.36, + "learning_rate": 1.8936717194676534e-05, + "loss": 0.4098, + "step": 7790 + }, + { + "epoch": 0.36, + "learning_rate": 1.8935933409621748e-05, + "loss": 0.4609, + "step": 7795 + }, + { + "epoch": 0.36, + "learning_rate": 1.893514962456696e-05, + "loss": 0.7143, + "step": 7800 + }, + { + "epoch": 0.36, + "learning_rate": 1.8934365839512173e-05, + "loss": 0.3278, + "step": 7805 + }, + { + "epoch": 0.36, + "learning_rate": 1.8933582054457387e-05, + "loss": 0.1103, + "step": 7810 + }, + { + "epoch": 0.36, + "learning_rate": 1.89327982694026e-05, + "loss": 0.1488, + "step": 7815 + }, + { + "epoch": 0.36, + "learning_rate": 1.8932014484347814e-05, + "loss": 0.2829, + "step": 7820 + }, + { + "epoch": 0.37, + "learning_rate": 1.8931230699293028e-05, + "loss": 0.2354, + "step": 7825 + }, + { + "epoch": 0.37, + "learning_rate": 1.8930446914238242e-05, + "loss": 0.3971, + "step": 7830 + }, + { + "epoch": 0.37, + "learning_rate": 1.8929663129183453e-05, + "loss": 0.4175, + "step": 7835 + }, + { + "epoch": 0.37, + "learning_rate": 1.8928879344128667e-05, + "loss": 0.3929, + "step": 7840 + }, + { + "epoch": 0.37, + "learning_rate": 1.892809555907388e-05, + "loss": 0.4019, + "step": 7845 + }, + { + "epoch": 0.37, + "learning_rate": 1.8927311774019094e-05, + "loss": 0.6624, + "step": 7850 + }, + { + "epoch": 0.37, + "learning_rate": 1.892652798896431e-05, + "loss": 0.3053, + "step": 7855 + }, + { + "epoch": 0.37, + "learning_rate": 1.8925744203909522e-05, + "loss": 0.2106, + "step": 7860 + }, + { + "epoch": 0.37, + "learning_rate": 1.8924960418854733e-05, + "loss": 0.172, + "step": 7865 + }, + { + "epoch": 0.37, + "learning_rate": 1.892417663379995e-05, + "loss": 0.1984, + "step": 7870 + }, + { + "epoch": 0.37, + "learning_rate": 1.892339284874516e-05, + "loss": 0.1985, + "step": 7875 + }, + { + "epoch": 0.37, + "learning_rate": 1.8922609063690374e-05, + "loss": 0.4085, + "step": 7880 + }, + { + "epoch": 0.37, + "learning_rate": 1.892182527863559e-05, + "loss": 0.3442, + "step": 7885 + }, + { + "epoch": 0.37, + "learning_rate": 1.8921041493580802e-05, + "loss": 0.2404, + "step": 7890 + }, + { + "epoch": 0.37, + "learning_rate": 1.8920257708526016e-05, + "loss": 0.4951, + "step": 7895 + }, + { + "epoch": 0.37, + "learning_rate": 1.891947392347123e-05, + "loss": 0.6079, + "step": 7900 + }, + { + "epoch": 0.37, + "learning_rate": 1.8918690138416444e-05, + "loss": 0.3329, + "step": 7905 + }, + { + "epoch": 0.37, + "learning_rate": 1.8917906353361655e-05, + "loss": 0.1138, + "step": 7910 + }, + { + "epoch": 0.37, + "learning_rate": 1.891712256830687e-05, + "loss": 0.1449, + "step": 7915 + }, + { + "epoch": 0.37, + "learning_rate": 1.8916338783252082e-05, + "loss": 0.1854, + "step": 7920 + }, + { + "epoch": 0.37, + "learning_rate": 1.8915554998197296e-05, + "loss": 0.1878, + "step": 7925 + }, + { + "epoch": 0.37, + "learning_rate": 1.891477121314251e-05, + "loss": 0.2632, + "step": 7930 + }, + { + "epoch": 0.37, + "learning_rate": 1.8913987428087724e-05, + "loss": 0.1892, + "step": 7935 + }, + { + "epoch": 0.37, + "learning_rate": 1.8913203643032935e-05, + "loss": 0.3837, + "step": 7940 + }, + { + "epoch": 0.37, + "learning_rate": 1.8912419857978152e-05, + "loss": 0.4508, + "step": 7945 + }, + { + "epoch": 0.37, + "learning_rate": 1.8911636072923362e-05, + "loss": 0.5534, + "step": 7950 + }, + { + "epoch": 0.37, + "learning_rate": 1.8910852287868576e-05, + "loss": 0.315, + "step": 7955 + }, + { + "epoch": 0.37, + "learning_rate": 1.891006850281379e-05, + "loss": 0.1236, + "step": 7960 + }, + { + "epoch": 0.37, + "learning_rate": 1.8909284717759004e-05, + "loss": 0.0979, + "step": 7965 + }, + { + "epoch": 0.37, + "learning_rate": 1.8908500932704218e-05, + "loss": 0.1878, + "step": 7970 + }, + { + "epoch": 0.37, + "learning_rate": 1.890771714764943e-05, + "loss": 0.2305, + "step": 7975 + }, + { + "epoch": 0.37, + "learning_rate": 1.8906933362594642e-05, + "loss": 0.2422, + "step": 7980 + }, + { + "epoch": 0.37, + "learning_rate": 1.8906149577539856e-05, + "loss": 0.2615, + "step": 7985 + }, + { + "epoch": 0.37, + "learning_rate": 1.890536579248507e-05, + "loss": 0.3486, + "step": 7990 + }, + { + "epoch": 0.37, + "learning_rate": 1.8904582007430284e-05, + "loss": 0.461, + "step": 7995 + }, + { + "epoch": 0.37, + "learning_rate": 1.8903798222375498e-05, + "loss": 0.6374, + "step": 8000 + }, + { + "epoch": 0.37, + "learning_rate": 1.8903014437320712e-05, + "loss": 0.3201, + "step": 8005 + }, + { + "epoch": 0.37, + "learning_rate": 1.8902230652265926e-05, + "loss": 0.1109, + "step": 8010 + }, + { + "epoch": 0.37, + "learning_rate": 1.8901446867211136e-05, + "loss": 0.1345, + "step": 8015 + }, + { + "epoch": 0.37, + "learning_rate": 1.890066308215635e-05, + "loss": 0.1565, + "step": 8020 + }, + { + "epoch": 0.37, + "learning_rate": 1.8899879297101564e-05, + "loss": 0.7264, + "step": 8025 + }, + { + "epoch": 0.37, + "learning_rate": 1.8899095512046778e-05, + "loss": 0.231, + "step": 8030 + }, + { + "epoch": 0.37, + "learning_rate": 1.8898311726991992e-05, + "loss": 0.2366, + "step": 8035 + }, + { + "epoch": 0.38, + "learning_rate": 1.8897527941937203e-05, + "loss": 0.3771, + "step": 8040 + }, + { + "epoch": 0.38, + "learning_rate": 1.889674415688242e-05, + "loss": 0.4329, + "step": 8045 + }, + { + "epoch": 0.38, + "learning_rate": 1.889596037182763e-05, + "loss": 0.6533, + "step": 8050 + }, + { + "epoch": 0.38, + "learning_rate": 1.8895176586772844e-05, + "loss": 0.338, + "step": 8055 + }, + { + "epoch": 0.38, + "learning_rate": 1.8894392801718058e-05, + "loss": 0.1299, + "step": 8060 + }, + { + "epoch": 0.38, + "learning_rate": 1.8893609016663272e-05, + "loss": 0.1517, + "step": 8065 + }, + { + "epoch": 0.38, + "learning_rate": 1.8892825231608486e-05, + "loss": 0.2633, + "step": 8070 + }, + { + "epoch": 0.38, + "learning_rate": 1.88920414465537e-05, + "loss": 0.2221, + "step": 8075 + }, + { + "epoch": 0.38, + "learning_rate": 1.889125766149891e-05, + "loss": 0.2924, + "step": 8080 + }, + { + "epoch": 0.38, + "learning_rate": 1.8890473876444128e-05, + "loss": 0.2867, + "step": 8085 + }, + { + "epoch": 0.38, + "learning_rate": 1.8889690091389338e-05, + "loss": 0.3109, + "step": 8090 + }, + { + "epoch": 0.38, + "learning_rate": 1.8888906306334552e-05, + "loss": 0.4688, + "step": 8095 + }, + { + "epoch": 0.38, + "learning_rate": 1.8888122521279766e-05, + "loss": 0.7502, + "step": 8100 + }, + { + "epoch": 0.38, + "learning_rate": 1.888733873622498e-05, + "loss": 0.2301, + "step": 8105 + }, + { + "epoch": 0.38, + "learning_rate": 1.8886554951170194e-05, + "loss": 0.1549, + "step": 8110 + }, + { + "epoch": 0.38, + "learning_rate": 1.8885771166115404e-05, + "loss": 0.1893, + "step": 8115 + }, + { + "epoch": 0.38, + "learning_rate": 1.8884987381060618e-05, + "loss": 0.1992, + "step": 8120 + }, + { + "epoch": 0.38, + "learning_rate": 1.8884203596005832e-05, + "loss": 0.1521, + "step": 8125 + }, + { + "epoch": 0.38, + "learning_rate": 1.8883419810951046e-05, + "loss": 0.2274, + "step": 8130 + }, + { + "epoch": 0.38, + "learning_rate": 1.888263602589626e-05, + "loss": 0.2885, + "step": 8135 + }, + { + "epoch": 0.38, + "learning_rate": 1.8881852240841474e-05, + "loss": 0.3451, + "step": 8140 + }, + { + "epoch": 0.38, + "learning_rate": 1.8881068455786688e-05, + "loss": 0.3565, + "step": 8145 + }, + { + "epoch": 0.38, + "learning_rate": 1.88802846707319e-05, + "loss": 0.5021, + "step": 8150 + }, + { + "epoch": 0.38, + "learning_rate": 1.8879500885677112e-05, + "loss": 0.344, + "step": 8155 + }, + { + "epoch": 0.38, + "learning_rate": 1.887871710062233e-05, + "loss": 0.2039, + "step": 8160 + }, + { + "epoch": 0.38, + "learning_rate": 1.887793331556754e-05, + "loss": 0.2314, + "step": 8165 + }, + { + "epoch": 0.38, + "learning_rate": 1.8877149530512754e-05, + "loss": 0.1719, + "step": 8170 + }, + { + "epoch": 0.38, + "learning_rate": 1.8876365745457968e-05, + "loss": 0.2557, + "step": 8175 + }, + { + "epoch": 0.38, + "learning_rate": 1.887558196040318e-05, + "loss": 0.2644, + "step": 8180 + }, + { + "epoch": 0.38, + "learning_rate": 1.8874798175348396e-05, + "loss": 0.2162, + "step": 8185 + }, + { + "epoch": 0.38, + "learning_rate": 1.8874014390293606e-05, + "loss": 0.3655, + "step": 8190 + }, + { + "epoch": 0.38, + "learning_rate": 1.887323060523882e-05, + "loss": 0.4942, + "step": 8195 + }, + { + "epoch": 0.38, + "learning_rate": 1.8872446820184034e-05, + "loss": 0.6199, + "step": 8200 + }, + { + "epoch": 0.38, + "learning_rate": 1.8871663035129248e-05, + "loss": 0.3494, + "step": 8205 + }, + { + "epoch": 0.38, + "learning_rate": 1.8870879250074462e-05, + "loss": 0.1605, + "step": 8210 + }, + { + "epoch": 0.38, + "learning_rate": 1.8870095465019676e-05, + "loss": 0.1165, + "step": 8215 + }, + { + "epoch": 0.38, + "learning_rate": 1.886931167996489e-05, + "loss": 0.1653, + "step": 8220 + }, + { + "epoch": 0.38, + "learning_rate": 1.8868527894910103e-05, + "loss": 0.1936, + "step": 8225 + }, + { + "epoch": 0.38, + "learning_rate": 1.8867744109855314e-05, + "loss": 0.2854, + "step": 8230 + }, + { + "epoch": 0.38, + "learning_rate": 1.8866960324800528e-05, + "loss": 0.2725, + "step": 8235 + }, + { + "epoch": 0.38, + "learning_rate": 1.8866176539745742e-05, + "loss": 0.4045, + "step": 8240 + }, + { + "epoch": 0.38, + "learning_rate": 1.8865392754690956e-05, + "loss": 0.3646, + "step": 8245 + }, + { + "epoch": 0.38, + "learning_rate": 1.886460896963617e-05, + "loss": 0.6643, + "step": 8250 + }, + { + "epoch": 0.39, + "learning_rate": 1.886382518458138e-05, + "loss": 0.2439, + "step": 8255 + }, + { + "epoch": 0.39, + "learning_rate": 1.8863041399526597e-05, + "loss": 0.1384, + "step": 8260 + }, + { + "epoch": 0.39, + "learning_rate": 1.8862257614471808e-05, + "loss": 0.1399, + "step": 8265 + }, + { + "epoch": 0.39, + "learning_rate": 1.8861473829417022e-05, + "loss": 0.17, + "step": 8270 + }, + { + "epoch": 0.39, + "learning_rate": 1.8860690044362236e-05, + "loss": 0.2109, + "step": 8275 + }, + { + "epoch": 0.39, + "learning_rate": 1.885990625930745e-05, + "loss": 0.2079, + "step": 8280 + }, + { + "epoch": 0.39, + "learning_rate": 1.8859122474252664e-05, + "loss": 0.2478, + "step": 8285 + }, + { + "epoch": 0.39, + "learning_rate": 1.8858338689197877e-05, + "loss": 0.2618, + "step": 8290 + }, + { + "epoch": 0.39, + "learning_rate": 1.8857554904143088e-05, + "loss": 0.3146, + "step": 8295 + }, + { + "epoch": 0.39, + "learning_rate": 1.8856771119088302e-05, + "loss": 0.5877, + "step": 8300 + }, + { + "epoch": 0.39, + "learning_rate": 1.8855987334033516e-05, + "loss": 0.2784, + "step": 8305 + }, + { + "epoch": 0.39, + "learning_rate": 1.885520354897873e-05, + "loss": 0.1436, + "step": 8310 + }, + { + "epoch": 0.39, + "learning_rate": 1.8854419763923944e-05, + "loss": 0.1598, + "step": 8315 + }, + { + "epoch": 0.39, + "learning_rate": 1.8853635978869158e-05, + "loss": 0.1545, + "step": 8320 + }, + { + "epoch": 0.39, + "learning_rate": 1.885285219381437e-05, + "loss": 0.2853, + "step": 8325 + }, + { + "epoch": 0.39, + "learning_rate": 1.8852068408759582e-05, + "loss": 0.2467, + "step": 8330 + }, + { + "epoch": 0.39, + "learning_rate": 1.8851284623704796e-05, + "loss": 0.271, + "step": 8335 + }, + { + "epoch": 0.39, + "learning_rate": 1.885050083865001e-05, + "loss": 0.3561, + "step": 8340 + }, + { + "epoch": 0.39, + "learning_rate": 1.8849717053595224e-05, + "loss": 0.3717, + "step": 8345 + }, + { + "epoch": 0.39, + "learning_rate": 1.8848933268540438e-05, + "loss": 0.6701, + "step": 8350 + }, + { + "epoch": 0.39, + "learning_rate": 1.884814948348565e-05, + "loss": 0.3146, + "step": 8355 + }, + { + "epoch": 0.39, + "learning_rate": 1.8847365698430865e-05, + "loss": 0.1088, + "step": 8360 + }, + { + "epoch": 0.39, + "learning_rate": 1.8846581913376076e-05, + "loss": 0.1436, + "step": 8365 + }, + { + "epoch": 0.39, + "learning_rate": 1.884579812832129e-05, + "loss": 0.1958, + "step": 8370 + }, + { + "epoch": 0.39, + "learning_rate": 1.8845014343266504e-05, + "loss": 0.2178, + "step": 8375 + }, + { + "epoch": 0.39, + "learning_rate": 1.8844230558211718e-05, + "loss": 0.1422, + "step": 8380 + }, + { + "epoch": 0.39, + "learning_rate": 1.884344677315693e-05, + "loss": 0.2301, + "step": 8385 + }, + { + "epoch": 0.39, + "learning_rate": 1.8842662988102145e-05, + "loss": 0.4035, + "step": 8390 + }, + { + "epoch": 0.39, + "learning_rate": 1.8841879203047356e-05, + "loss": 0.4027, + "step": 8395 + }, + { + "epoch": 0.39, + "learning_rate": 1.8841095417992573e-05, + "loss": 0.6834, + "step": 8400 + }, + { + "epoch": 0.39, + "learning_rate": 1.8840311632937784e-05, + "loss": 0.2374, + "step": 8405 + }, + { + "epoch": 0.39, + "learning_rate": 1.8839527847882998e-05, + "loss": 0.172, + "step": 8410 + }, + { + "epoch": 0.39, + "learning_rate": 1.883874406282821e-05, + "loss": 0.1755, + "step": 8415 + }, + { + "epoch": 0.39, + "learning_rate": 1.8837960277773425e-05, + "loss": 0.2204, + "step": 8420 + }, + { + "epoch": 0.39, + "learning_rate": 1.883717649271864e-05, + "loss": 0.2173, + "step": 8425 + }, + { + "epoch": 0.39, + "learning_rate": 1.883639270766385e-05, + "loss": 0.3071, + "step": 8430 + }, + { + "epoch": 0.39, + "learning_rate": 1.8835608922609064e-05, + "loss": 0.2889, + "step": 8435 + }, + { + "epoch": 0.39, + "learning_rate": 1.8834825137554278e-05, + "loss": 0.3744, + "step": 8440 + }, + { + "epoch": 0.39, + "learning_rate": 1.8834198109510448e-05, + "loss": 0.6518, + "step": 8445 + }, + { + "epoch": 0.39, + "learning_rate": 1.8833414324455662e-05, + "loss": 0.7128, + "step": 8450 + }, + { + "epoch": 0.39, + "learning_rate": 1.8832630539400876e-05, + "loss": 0.3003, + "step": 8455 + }, + { + "epoch": 0.39, + "learning_rate": 1.883184675434609e-05, + "loss": 0.0996, + "step": 8460 + }, + { + "epoch": 0.39, + "learning_rate": 1.8831062969291304e-05, + "loss": 0.1282, + "step": 8465 + }, + { + "epoch": 0.4, + "learning_rate": 1.8830279184236518e-05, + "loss": 0.1423, + "step": 8470 + }, + { + "epoch": 0.4, + "learning_rate": 1.8829495399181728e-05, + "loss": 0.2481, + "step": 8475 + }, + { + "epoch": 0.4, + "learning_rate": 1.8828711614126945e-05, + "loss": 0.1612, + "step": 8480 + }, + { + "epoch": 0.4, + "learning_rate": 1.8827927829072156e-05, + "loss": 0.3512, + "step": 8485 + }, + { + "epoch": 0.4, + "learning_rate": 1.882714404401737e-05, + "loss": 0.2906, + "step": 8490 + }, + { + "epoch": 0.4, + "learning_rate": 1.8826360258962584e-05, + "loss": 0.2834, + "step": 8495 + }, + { + "epoch": 0.4, + "learning_rate": 1.8825576473907798e-05, + "loss": 0.6138, + "step": 8500 + }, + { + "epoch": 0.4, + "learning_rate": 1.882479268885301e-05, + "loss": 0.3247, + "step": 8505 + }, + { + "epoch": 0.4, + "learning_rate": 1.8824008903798222e-05, + "loss": 0.0987, + "step": 8510 + }, + { + "epoch": 0.4, + "learning_rate": 1.882322511874344e-05, + "loss": 0.1981, + "step": 8515 + }, + { + "epoch": 0.4, + "learning_rate": 1.882244133368865e-05, + "loss": 0.2302, + "step": 8520 + }, + { + "epoch": 0.4, + "learning_rate": 1.8821657548633864e-05, + "loss": 0.286, + "step": 8525 + }, + { + "epoch": 0.4, + "learning_rate": 1.8820873763579078e-05, + "loss": 0.2, + "step": 8530 + }, + { + "epoch": 0.4, + "learning_rate": 1.882008997852429e-05, + "loss": 0.3144, + "step": 8535 + }, + { + "epoch": 0.4, + "learning_rate": 1.8819306193469506e-05, + "loss": 0.2449, + "step": 8540 + }, + { + "epoch": 0.4, + "learning_rate": 1.881852240841472e-05, + "loss": 0.324, + "step": 8545 + }, + { + "epoch": 0.4, + "learning_rate": 1.881773862335993e-05, + "loss": 0.5676, + "step": 8550 + }, + { + "epoch": 0.4, + "learning_rate": 1.8816954838305144e-05, + "loss": 0.2509, + "step": 8555 + }, + { + "epoch": 0.4, + "learning_rate": 1.8816171053250358e-05, + "loss": 0.1236, + "step": 8560 + }, + { + "epoch": 0.4, + "learning_rate": 1.8815387268195572e-05, + "loss": 0.1508, + "step": 8565 + }, + { + "epoch": 0.4, + "learning_rate": 1.8814603483140786e-05, + "loss": 0.1854, + "step": 8570 + }, + { + "epoch": 0.4, + "learning_rate": 1.8813819698085996e-05, + "loss": 0.2049, + "step": 8575 + }, + { + "epoch": 0.4, + "learning_rate": 1.8813035913031213e-05, + "loss": 0.253, + "step": 8580 + }, + { + "epoch": 0.4, + "learning_rate": 1.8812252127976424e-05, + "loss": 0.2392, + "step": 8585 + }, + { + "epoch": 0.4, + "learning_rate": 1.8811468342921638e-05, + "loss": 0.3392, + "step": 8590 + }, + { + "epoch": 0.4, + "learning_rate": 1.8810684557866852e-05, + "loss": 0.3588, + "step": 8595 + }, + { + "epoch": 0.4, + "learning_rate": 1.8809900772812066e-05, + "loss": 0.5704, + "step": 8600 + }, + { + "epoch": 0.4, + "learning_rate": 1.880911698775728e-05, + "loss": 0.2261, + "step": 8605 + }, + { + "epoch": 0.4, + "learning_rate": 1.8808333202702493e-05, + "loss": 0.1138, + "step": 8610 + }, + { + "epoch": 0.4, + "learning_rate": 1.8807549417647707e-05, + "loss": 0.1819, + "step": 8615 + }, + { + "epoch": 0.4, + "learning_rate": 1.8806765632592918e-05, + "loss": 0.2097, + "step": 8620 + }, + { + "epoch": 0.4, + "learning_rate": 1.8805981847538132e-05, + "loss": 0.33, + "step": 8625 + }, + { + "epoch": 0.4, + "learning_rate": 1.8805198062483346e-05, + "loss": 0.1766, + "step": 8630 + }, + { + "epoch": 0.4, + "learning_rate": 1.880441427742856e-05, + "loss": 0.2288, + "step": 8635 + }, + { + "epoch": 0.4, + "learning_rate": 1.8803630492373774e-05, + "loss": 0.3268, + "step": 8640 + }, + { + "epoch": 0.4, + "learning_rate": 1.8802846707318987e-05, + "loss": 0.4618, + "step": 8645 + }, + { + "epoch": 0.4, + "learning_rate": 1.8802062922264198e-05, + "loss": 0.487, + "step": 8650 + }, + { + "epoch": 0.4, + "learning_rate": 1.8801279137209415e-05, + "loss": 0.2719, + "step": 8655 + }, + { + "epoch": 0.4, + "learning_rate": 1.8800495352154626e-05, + "loss": 0.0903, + "step": 8660 + }, + { + "epoch": 0.4, + "learning_rate": 1.879971156709984e-05, + "loss": 0.1441, + "step": 8665 + }, + { + "epoch": 0.4, + "learning_rate": 1.8798927782045054e-05, + "loss": 0.1544, + "step": 8670 + }, + { + "epoch": 0.4, + "learning_rate": 1.8798143996990267e-05, + "loss": 0.2806, + "step": 8675 + }, + { + "epoch": 0.41, + "learning_rate": 1.879736021193548e-05, + "loss": 0.3168, + "step": 8680 + }, + { + "epoch": 0.41, + "learning_rate": 1.8796576426880692e-05, + "loss": 0.2666, + "step": 8685 + }, + { + "epoch": 0.41, + "learning_rate": 1.8795792641825906e-05, + "loss": 0.3645, + "step": 8690 + }, + { + "epoch": 0.41, + "learning_rate": 1.879500885677112e-05, + "loss": 0.5892, + "step": 8695 + }, + { + "epoch": 0.41, + "learning_rate": 1.8794225071716334e-05, + "loss": 0.6008, + "step": 8700 + }, + { + "epoch": 0.41, + "learning_rate": 1.8793441286661548e-05, + "loss": 0.3091, + "step": 8705 + }, + { + "epoch": 0.41, + "learning_rate": 1.879265750160676e-05, + "loss": 0.1157, + "step": 8710 + }, + { + "epoch": 0.41, + "learning_rate": 1.8791873716551975e-05, + "loss": 0.1896, + "step": 8715 + }, + { + "epoch": 0.41, + "learning_rate": 1.879108993149719e-05, + "loss": 0.1758, + "step": 8720 + }, + { + "epoch": 0.41, + "learning_rate": 1.87903061464424e-05, + "loss": 0.2199, + "step": 8725 + }, + { + "epoch": 0.41, + "learning_rate": 1.8789522361387617e-05, + "loss": 0.1525, + "step": 8730 + }, + { + "epoch": 0.41, + "learning_rate": 1.8788738576332828e-05, + "loss": 0.3416, + "step": 8735 + }, + { + "epoch": 0.41, + "learning_rate": 1.878795479127804e-05, + "loss": 0.3551, + "step": 8740 + }, + { + "epoch": 0.41, + "learning_rate": 1.8787171006223255e-05, + "loss": 0.3541, + "step": 8745 + }, + { + "epoch": 0.41, + "learning_rate": 1.8786387221168466e-05, + "loss": 0.6545, + "step": 8750 + }, + { + "epoch": 0.41, + "learning_rate": 1.8785603436113683e-05, + "loss": 0.2391, + "step": 8755 + }, + { + "epoch": 0.41, + "learning_rate": 1.8784819651058894e-05, + "loss": 0.1449, + "step": 8760 + }, + { + "epoch": 0.41, + "learning_rate": 1.8784035866004108e-05, + "loss": 0.1726, + "step": 8765 + }, + { + "epoch": 0.41, + "learning_rate": 1.878325208094932e-05, + "loss": 0.1607, + "step": 8770 + }, + { + "epoch": 0.41, + "learning_rate": 1.8782468295894535e-05, + "loss": 0.2541, + "step": 8775 + }, + { + "epoch": 0.41, + "learning_rate": 1.878168451083975e-05, + "loss": 0.1685, + "step": 8780 + }, + { + "epoch": 0.41, + "learning_rate": 1.8780900725784963e-05, + "loss": 0.3067, + "step": 8785 + }, + { + "epoch": 0.41, + "learning_rate": 1.8780116940730174e-05, + "loss": 0.3734, + "step": 8790 + }, + { + "epoch": 0.41, + "learning_rate": 1.877933315567539e-05, + "loss": 0.311, + "step": 8795 + }, + { + "epoch": 0.41, + "learning_rate": 1.87785493706206e-05, + "loss": 0.464, + "step": 8800 + }, + { + "epoch": 0.41, + "learning_rate": 1.8777765585565815e-05, + "loss": 0.2861, + "step": 8805 + }, + { + "epoch": 0.41, + "learning_rate": 1.877698180051103e-05, + "loss": 0.0895, + "step": 8810 + }, + { + "epoch": 0.41, + "learning_rate": 1.8776198015456243e-05, + "loss": 0.1917, + "step": 8815 + }, + { + "epoch": 0.41, + "learning_rate": 1.8775414230401457e-05, + "loss": 0.2786, + "step": 8820 + }, + { + "epoch": 0.41, + "learning_rate": 1.8774630445346668e-05, + "loss": 0.1561, + "step": 8825 + }, + { + "epoch": 0.41, + "learning_rate": 1.8773846660291885e-05, + "loss": 0.2006, + "step": 8830 + }, + { + "epoch": 0.41, + "learning_rate": 1.8773062875237096e-05, + "loss": 0.2568, + "step": 8835 + }, + { + "epoch": 0.41, + "learning_rate": 1.877227909018231e-05, + "loss": 0.3812, + "step": 8840 + }, + { + "epoch": 0.41, + "learning_rate": 1.8771495305127523e-05, + "loss": 0.3857, + "step": 8845 + }, + { + "epoch": 0.41, + "learning_rate": 1.8770711520072737e-05, + "loss": 0.6512, + "step": 8850 + }, + { + "epoch": 0.41, + "learning_rate": 1.876992773501795e-05, + "loss": 0.2031, + "step": 8855 + }, + { + "epoch": 0.41, + "learning_rate": 1.8769143949963165e-05, + "loss": 0.0943, + "step": 8860 + }, + { + "epoch": 0.41, + "learning_rate": 1.8768360164908376e-05, + "loss": 0.133, + "step": 8865 + }, + { + "epoch": 0.41, + "learning_rate": 1.8767576379853593e-05, + "loss": 0.181, + "step": 8870 + }, + { + "epoch": 0.41, + "learning_rate": 1.8766792594798803e-05, + "loss": 0.3046, + "step": 8875 + }, + { + "epoch": 0.41, + "learning_rate": 1.8766008809744017e-05, + "loss": 0.3182, + "step": 8880 + }, + { + "epoch": 0.41, + "learning_rate": 1.876522502468923e-05, + "loss": 0.2638, + "step": 8885 + }, + { + "epoch": 0.41, + "learning_rate": 1.8764441239634442e-05, + "loss": 0.3133, + "step": 8890 + }, + { + "epoch": 0.42, + "learning_rate": 1.876365745457966e-05, + "loss": 0.3542, + "step": 8895 + }, + { + "epoch": 0.42, + "learning_rate": 1.876287366952487e-05, + "loss": 0.7553, + "step": 8900 + }, + { + "epoch": 0.42, + "learning_rate": 1.8762089884470083e-05, + "loss": 0.2799, + "step": 8905 + }, + { + "epoch": 0.42, + "learning_rate": 1.8761306099415297e-05, + "loss": 0.1592, + "step": 8910 + }, + { + "epoch": 0.42, + "learning_rate": 1.876052231436051e-05, + "loss": 0.1252, + "step": 8915 + }, + { + "epoch": 0.42, + "learning_rate": 1.8759738529305725e-05, + "loss": 0.1886, + "step": 8920 + }, + { + "epoch": 0.42, + "learning_rate": 1.875895474425094e-05, + "loss": 0.2613, + "step": 8925 + }, + { + "epoch": 0.42, + "learning_rate": 1.8758170959196153e-05, + "loss": 0.3106, + "step": 8930 + }, + { + "epoch": 0.42, + "learning_rate": 1.8757387174141367e-05, + "loss": 0.2214, + "step": 8935 + }, + { + "epoch": 0.42, + "learning_rate": 1.8756603389086577e-05, + "loss": 0.3003, + "step": 8940 + }, + { + "epoch": 0.42, + "learning_rate": 1.875581960403179e-05, + "loss": 0.3717, + "step": 8945 + }, + { + "epoch": 0.42, + "learning_rate": 1.8755035818977005e-05, + "loss": 0.5338, + "step": 8950 + }, + { + "epoch": 0.42, + "learning_rate": 1.875425203392222e-05, + "loss": 0.2474, + "step": 8955 + }, + { + "epoch": 0.42, + "learning_rate": 1.8753468248867433e-05, + "loss": 0.1502, + "step": 8960 + }, + { + "epoch": 0.42, + "learning_rate": 1.8752684463812644e-05, + "loss": 0.1918, + "step": 8965 + }, + { + "epoch": 0.42, + "learning_rate": 1.875190067875786e-05, + "loss": 0.1617, + "step": 8970 + }, + { + "epoch": 0.42, + "learning_rate": 1.875111689370307e-05, + "loss": 0.2153, + "step": 8975 + }, + { + "epoch": 0.42, + "learning_rate": 1.8750333108648285e-05, + "loss": 0.2767, + "step": 8980 + }, + { + "epoch": 0.42, + "learning_rate": 1.87495493235935e-05, + "loss": 0.2624, + "step": 8985 + }, + { + "epoch": 0.42, + "learning_rate": 1.8748765538538713e-05, + "loss": 0.3624, + "step": 8990 + }, + { + "epoch": 0.42, + "learning_rate": 1.8747981753483927e-05, + "loss": 0.4102, + "step": 8995 + }, + { + "epoch": 0.42, + "learning_rate": 1.874719796842914e-05, + "loss": 0.6647, + "step": 9000 + }, + { + "epoch": 0.42, + "learning_rate": 1.874641418337435e-05, + "loss": 0.16, + "step": 9005 + }, + { + "epoch": 0.42, + "learning_rate": 1.8745630398319565e-05, + "loss": 0.1188, + "step": 9010 + }, + { + "epoch": 0.42, + "learning_rate": 1.874484661326478e-05, + "loss": 0.1938, + "step": 9015 + }, + { + "epoch": 0.42, + "learning_rate": 1.8744062828209993e-05, + "loss": 0.2017, + "step": 9020 + }, + { + "epoch": 0.42, + "learning_rate": 1.8743279043155207e-05, + "loss": 0.2465, + "step": 9025 + }, + { + "epoch": 0.42, + "learning_rate": 1.874249525810042e-05, + "loss": 0.2671, + "step": 9030 + }, + { + "epoch": 0.42, + "learning_rate": 1.8741711473045635e-05, + "loss": 0.1911, + "step": 9035 + }, + { + "epoch": 0.42, + "learning_rate": 1.8740927687990845e-05, + "loss": 0.3654, + "step": 9040 + }, + { + "epoch": 0.42, + "learning_rate": 1.8740143902936063e-05, + "loss": 0.3904, + "step": 9045 + }, + { + "epoch": 0.42, + "learning_rate": 1.8739360117881273e-05, + "loss": 0.5258, + "step": 9050 + }, + { + "epoch": 0.42, + "learning_rate": 1.8738576332826487e-05, + "loss": 0.2227, + "step": 9055 + }, + { + "epoch": 0.42, + "learning_rate": 1.87377925477717e-05, + "loss": 0.0826, + "step": 9060 + }, + { + "epoch": 0.42, + "learning_rate": 1.8737008762716915e-05, + "loss": 0.1681, + "step": 9065 + }, + { + "epoch": 0.42, + "learning_rate": 1.873622497766213e-05, + "loss": 0.2063, + "step": 9070 + }, + { + "epoch": 0.42, + "learning_rate": 1.873544119260734e-05, + "loss": 0.2334, + "step": 9075 + }, + { + "epoch": 0.42, + "learning_rate": 1.8734657407552553e-05, + "loss": 0.2478, + "step": 9080 + }, + { + "epoch": 0.42, + "learning_rate": 1.8733873622497767e-05, + "loss": 0.3684, + "step": 9085 + }, + { + "epoch": 0.42, + "learning_rate": 1.873308983744298e-05, + "loss": 0.292, + "step": 9090 + }, + { + "epoch": 0.42, + "learning_rate": 1.8732306052388195e-05, + "loss": 0.2457, + "step": 9095 + }, + { + "epoch": 0.42, + "learning_rate": 1.873152226733341e-05, + "loss": 0.5138, + "step": 9100 + }, + { + "epoch": 0.42, + "learning_rate": 1.873073848227862e-05, + "loss": 0.3402, + "step": 9105 + }, + { + "epoch": 0.43, + "learning_rate": 1.8729954697223837e-05, + "loss": 0.1397, + "step": 9110 + }, + { + "epoch": 0.43, + "learning_rate": 1.8729170912169047e-05, + "loss": 0.1392, + "step": 9115 + }, + { + "epoch": 0.43, + "learning_rate": 1.872838712711426e-05, + "loss": 0.109, + "step": 9120 + }, + { + "epoch": 0.43, + "learning_rate": 1.8727603342059475e-05, + "loss": 0.2441, + "step": 9125 + }, + { + "epoch": 0.43, + "learning_rate": 1.872681955700469e-05, + "loss": 0.2452, + "step": 9130 + }, + { + "epoch": 0.43, + "learning_rate": 1.8726035771949903e-05, + "loss": 0.2466, + "step": 9135 + }, + { + "epoch": 0.43, + "learning_rate": 1.8725251986895113e-05, + "loss": 0.3464, + "step": 9140 + }, + { + "epoch": 0.43, + "learning_rate": 1.872446820184033e-05, + "loss": 0.4602, + "step": 9145 + }, + { + "epoch": 0.43, + "learning_rate": 1.872368441678554e-05, + "loss": 0.6565, + "step": 9150 + }, + { + "epoch": 0.43, + "learning_rate": 1.8722900631730755e-05, + "loss": 0.3206, + "step": 9155 + }, + { + "epoch": 0.43, + "learning_rate": 1.872211684667597e-05, + "loss": 0.0716, + "step": 9160 + }, + { + "epoch": 0.43, + "learning_rate": 1.8721333061621183e-05, + "loss": 0.1231, + "step": 9165 + }, + { + "epoch": 0.43, + "learning_rate": 1.8720549276566397e-05, + "loss": 0.2065, + "step": 9170 + }, + { + "epoch": 0.43, + "learning_rate": 1.871976549151161e-05, + "loss": 0.1649, + "step": 9175 + }, + { + "epoch": 0.43, + "learning_rate": 1.871898170645682e-05, + "loss": 0.1393, + "step": 9180 + }, + { + "epoch": 0.43, + "learning_rate": 1.871819792140204e-05, + "loss": 0.2792, + "step": 9185 + }, + { + "epoch": 0.43, + "learning_rate": 1.871741413634725e-05, + "loss": 0.3291, + "step": 9190 + }, + { + "epoch": 0.43, + "learning_rate": 1.8716630351292463e-05, + "loss": 0.294, + "step": 9195 + }, + { + "epoch": 0.43, + "learning_rate": 1.8715846566237677e-05, + "loss": 0.6407, + "step": 9200 + }, + { + "epoch": 0.43, + "learning_rate": 1.8715062781182887e-05, + "loss": 0.2999, + "step": 9205 + }, + { + "epoch": 0.43, + "learning_rate": 1.8714278996128105e-05, + "loss": 0.142, + "step": 9210 + }, + { + "epoch": 0.43, + "learning_rate": 1.8713495211073315e-05, + "loss": 0.2173, + "step": 9215 + }, + { + "epoch": 0.43, + "learning_rate": 1.871271142601853e-05, + "loss": 0.2285, + "step": 9220 + }, + { + "epoch": 0.43, + "learning_rate": 1.8711927640963743e-05, + "loss": 0.1986, + "step": 9225 + }, + { + "epoch": 0.43, + "learning_rate": 1.8711143855908957e-05, + "loss": 0.2207, + "step": 9230 + }, + { + "epoch": 0.43, + "learning_rate": 1.871036007085417e-05, + "loss": 0.2179, + "step": 9235 + }, + { + "epoch": 0.43, + "learning_rate": 1.8709576285799385e-05, + "loss": 0.2123, + "step": 9240 + }, + { + "epoch": 0.43, + "learning_rate": 1.87087925007446e-05, + "loss": 0.2957, + "step": 9245 + }, + { + "epoch": 0.43, + "learning_rate": 1.8708008715689812e-05, + "loss": 0.6045, + "step": 9250 + }, + { + "epoch": 0.43, + "learning_rate": 1.8707224930635023e-05, + "loss": 0.2722, + "step": 9255 + }, + { + "epoch": 0.43, + "learning_rate": 1.870644114558024e-05, + "loss": 0.2209, + "step": 9260 + }, + { + "epoch": 0.43, + "learning_rate": 1.870565736052545e-05, + "loss": 0.1307, + "step": 9265 + }, + { + "epoch": 0.43, + "learning_rate": 1.8704873575470665e-05, + "loss": 0.152, + "step": 9270 + }, + { + "epoch": 0.43, + "learning_rate": 1.870408979041588e-05, + "loss": 0.1983, + "step": 9275 + }, + { + "epoch": 0.43, + "learning_rate": 1.870330600536109e-05, + "loss": 0.2012, + "step": 9280 + }, + { + "epoch": 0.43, + "learning_rate": 1.8702522220306306e-05, + "loss": 0.6512, + "step": 9285 + }, + { + "epoch": 0.43, + "learning_rate": 1.8701738435251517e-05, + "loss": 0.3317, + "step": 9290 + }, + { + "epoch": 0.43, + "learning_rate": 1.870095465019673e-05, + "loss": 0.4515, + "step": 9295 + }, + { + "epoch": 0.43, + "learning_rate": 1.8700170865141945e-05, + "loss": 0.7104, + "step": 9300 + }, + { + "epoch": 0.43, + "learning_rate": 1.869938708008716e-05, + "loss": 0.2689, + "step": 9305 + }, + { + "epoch": 0.43, + "learning_rate": 1.8698603295032373e-05, + "loss": 0.1028, + "step": 9310 + }, + { + "epoch": 0.43, + "learning_rate": 1.8697819509977586e-05, + "loss": 0.1626, + "step": 9315 + }, + { + "epoch": 0.43, + "learning_rate": 1.8697035724922797e-05, + "loss": 0.2462, + "step": 9320 + }, + { + "epoch": 0.44, + "learning_rate": 1.8696251939868014e-05, + "loss": 0.1763, + "step": 9325 + }, + { + "epoch": 0.44, + "learning_rate": 1.8695468154813225e-05, + "loss": 0.3345, + "step": 9330 + }, + { + "epoch": 0.44, + "learning_rate": 1.869468436975844e-05, + "loss": 0.2613, + "step": 9335 + }, + { + "epoch": 0.44, + "learning_rate": 1.8693900584703653e-05, + "loss": 0.3072, + "step": 9340 + }, + { + "epoch": 0.44, + "learning_rate": 1.8693116799648866e-05, + "loss": 0.3295, + "step": 9345 + }, + { + "epoch": 0.44, + "learning_rate": 1.869233301459408e-05, + "loss": 0.6022, + "step": 9350 + }, + { + "epoch": 0.44, + "learning_rate": 1.869154922953929e-05, + "loss": 0.3035, + "step": 9355 + }, + { + "epoch": 0.44, + "learning_rate": 1.8690765444484508e-05, + "loss": 0.2063, + "step": 9360 + }, + { + "epoch": 0.44, + "learning_rate": 1.868998165942972e-05, + "loss": 0.2401, + "step": 9365 + }, + { + "epoch": 0.44, + "learning_rate": 1.8689197874374933e-05, + "loss": 0.1846, + "step": 9370 + }, + { + "epoch": 0.44, + "learning_rate": 1.8688414089320147e-05, + "loss": 0.1649, + "step": 9375 + }, + { + "epoch": 0.44, + "learning_rate": 1.868763030426536e-05, + "loss": 0.1734, + "step": 9380 + }, + { + "epoch": 0.44, + "learning_rate": 1.8686846519210574e-05, + "loss": 0.2245, + "step": 9385 + }, + { + "epoch": 0.44, + "learning_rate": 1.8686062734155788e-05, + "loss": 0.2765, + "step": 9390 + }, + { + "epoch": 0.44, + "learning_rate": 1.8685278949101e-05, + "loss": 0.3261, + "step": 9395 + }, + { + "epoch": 0.44, + "learning_rate": 1.8684495164046213e-05, + "loss": 0.5533, + "step": 9400 + }, + { + "epoch": 0.44, + "learning_rate": 1.8683711378991427e-05, + "loss": 0.2859, + "step": 9405 + }, + { + "epoch": 0.44, + "learning_rate": 1.868292759393664e-05, + "loss": 0.0899, + "step": 9410 + }, + { + "epoch": 0.44, + "learning_rate": 1.8682143808881854e-05, + "loss": 0.1552, + "step": 9415 + }, + { + "epoch": 0.44, + "learning_rate": 1.8681360023827065e-05, + "loss": 0.1668, + "step": 9420 + }, + { + "epoch": 0.44, + "learning_rate": 1.8680576238772282e-05, + "loss": 0.2532, + "step": 9425 + }, + { + "epoch": 0.44, + "learning_rate": 1.8679792453717493e-05, + "loss": 0.254, + "step": 9430 + }, + { + "epoch": 0.44, + "learning_rate": 1.8679008668662707e-05, + "loss": 0.2807, + "step": 9435 + }, + { + "epoch": 0.44, + "learning_rate": 1.867822488360792e-05, + "loss": 0.506, + "step": 9440 + }, + { + "epoch": 0.44, + "learning_rate": 1.8677441098553134e-05, + "loss": 0.3954, + "step": 9445 + }, + { + "epoch": 0.44, + "learning_rate": 1.867665731349835e-05, + "loss": 0.5384, + "step": 9450 + }, + { + "epoch": 0.44, + "learning_rate": 1.8675873528443562e-05, + "loss": 0.1873, + "step": 9455 + }, + { + "epoch": 0.44, + "learning_rate": 1.8675089743388776e-05, + "loss": 0.1488, + "step": 9460 + }, + { + "epoch": 0.44, + "learning_rate": 1.8674305958333987e-05, + "loss": 0.1511, + "step": 9465 + }, + { + "epoch": 0.44, + "learning_rate": 1.86735221732792e-05, + "loss": 0.17, + "step": 9470 + }, + { + "epoch": 0.44, + "learning_rate": 1.8672738388224414e-05, + "loss": 0.1869, + "step": 9475 + }, + { + "epoch": 0.44, + "learning_rate": 1.867195460316963e-05, + "loss": 0.2043, + "step": 9480 + }, + { + "epoch": 0.44, + "learning_rate": 1.8671170818114842e-05, + "loss": 0.3333, + "step": 9485 + }, + { + "epoch": 0.44, + "learning_rate": 1.8670387033060056e-05, + "loss": 0.3531, + "step": 9490 + }, + { + "epoch": 0.44, + "learning_rate": 1.8669603248005267e-05, + "loss": 0.5792, + "step": 9495 + }, + { + "epoch": 0.44, + "learning_rate": 1.8668819462950484e-05, + "loss": 0.6072, + "step": 9500 + }, + { + "epoch": 0.44, + "learning_rate": 1.8668035677895695e-05, + "loss": 0.3354, + "step": 9505 + }, + { + "epoch": 0.44, + "learning_rate": 1.866725189284091e-05, + "loss": 0.081, + "step": 9510 + }, + { + "epoch": 0.44, + "learning_rate": 1.8666468107786122e-05, + "loss": 0.1598, + "step": 9515 + }, + { + "epoch": 0.44, + "learning_rate": 1.8665684322731336e-05, + "loss": 0.1325, + "step": 9520 + }, + { + "epoch": 0.44, + "learning_rate": 1.866490053767655e-05, + "loss": 0.243, + "step": 9525 + }, + { + "epoch": 0.44, + "learning_rate": 1.866411675262176e-05, + "loss": 0.256, + "step": 9530 + }, + { + "epoch": 0.44, + "learning_rate": 1.8663332967566975e-05, + "loss": 0.2464, + "step": 9535 + }, + { + "epoch": 0.45, + "learning_rate": 1.866254918251219e-05, + "loss": 0.3764, + "step": 9540 + }, + { + "epoch": 0.45, + "learning_rate": 1.8661765397457402e-05, + "loss": 0.3959, + "step": 9545 + }, + { + "epoch": 0.45, + "learning_rate": 1.8660981612402616e-05, + "loss": 0.8095, + "step": 9550 + }, + { + "epoch": 0.45, + "learning_rate": 1.866019782734783e-05, + "loss": 0.2742, + "step": 9555 + }, + { + "epoch": 0.45, + "learning_rate": 1.8659414042293044e-05, + "loss": 0.1321, + "step": 9560 + }, + { + "epoch": 0.45, + "learning_rate": 1.8658630257238258e-05, + "loss": 0.1742, + "step": 9565 + }, + { + "epoch": 0.45, + "learning_rate": 1.865784647218347e-05, + "loss": 0.1818, + "step": 9570 + }, + { + "epoch": 0.45, + "learning_rate": 1.8657062687128686e-05, + "loss": 0.1883, + "step": 9575 + }, + { + "epoch": 0.45, + "learning_rate": 1.8656278902073896e-05, + "loss": 0.2709, + "step": 9580 + }, + { + "epoch": 0.45, + "learning_rate": 1.865549511701911e-05, + "loss": 0.2502, + "step": 9585 + }, + { + "epoch": 0.45, + "learning_rate": 1.8654711331964324e-05, + "loss": 0.3082, + "step": 9590 + }, + { + "epoch": 0.45, + "learning_rate": 1.8653927546909535e-05, + "loss": 0.3196, + "step": 9595 + }, + { + "epoch": 0.45, + "learning_rate": 1.8653143761854752e-05, + "loss": 0.8118, + "step": 9600 + }, + { + "epoch": 0.45, + "learning_rate": 1.8652359976799962e-05, + "loss": 0.2884, + "step": 9605 + }, + { + "epoch": 0.45, + "learning_rate": 1.8651576191745176e-05, + "loss": 0.1232, + "step": 9610 + }, + { + "epoch": 0.45, + "learning_rate": 1.865079240669039e-05, + "loss": 0.1453, + "step": 9615 + }, + { + "epoch": 0.45, + "learning_rate": 1.8650008621635604e-05, + "loss": 0.1877, + "step": 9620 + }, + { + "epoch": 0.45, + "learning_rate": 1.8649224836580818e-05, + "loss": 0.2546, + "step": 9625 + }, + { + "epoch": 0.45, + "learning_rate": 1.8648441051526032e-05, + "loss": 0.1846, + "step": 9630 + }, + { + "epoch": 0.45, + "learning_rate": 1.8647657266471243e-05, + "loss": 0.3649, + "step": 9635 + }, + { + "epoch": 0.45, + "learning_rate": 1.864687348141646e-05, + "loss": 0.3054, + "step": 9640 + }, + { + "epoch": 0.45, + "learning_rate": 1.864608969636167e-05, + "loss": 0.2868, + "step": 9645 + }, + { + "epoch": 0.45, + "learning_rate": 1.8645305911306884e-05, + "loss": 0.6246, + "step": 9650 + }, + { + "epoch": 0.45, + "learning_rate": 1.8644522126252098e-05, + "loss": 0.2241, + "step": 9655 + }, + { + "epoch": 0.45, + "learning_rate": 1.8643738341197312e-05, + "loss": 0.1827, + "step": 9660 + }, + { + "epoch": 0.45, + "learning_rate": 1.8642954556142526e-05, + "loss": 0.1333, + "step": 9665 + }, + { + "epoch": 0.45, + "learning_rate": 1.8642170771087736e-05, + "loss": 0.2136, + "step": 9670 + }, + { + "epoch": 0.45, + "learning_rate": 1.8641386986032954e-05, + "loss": 0.164, + "step": 9675 + }, + { + "epoch": 0.45, + "learning_rate": 1.8640603200978164e-05, + "loss": 0.2427, + "step": 9680 + }, + { + "epoch": 0.45, + "learning_rate": 1.8639819415923378e-05, + "loss": 0.2968, + "step": 9685 + }, + { + "epoch": 0.45, + "learning_rate": 1.8639035630868592e-05, + "loss": 0.3661, + "step": 9690 + }, + { + "epoch": 0.45, + "learning_rate": 1.8638251845813806e-05, + "loss": 0.3389, + "step": 9695 + }, + { + "epoch": 0.45, + "learning_rate": 1.863746806075902e-05, + "loss": 0.8643, + "step": 9700 + }, + { + "epoch": 0.45, + "learning_rate": 1.8636684275704234e-05, + "loss": 0.2849, + "step": 9705 + }, + { + "epoch": 0.45, + "learning_rate": 1.8635900490649444e-05, + "loss": 0.1267, + "step": 9710 + }, + { + "epoch": 0.45, + "learning_rate": 1.863511670559466e-05, + "loss": 0.1608, + "step": 9715 + }, + { + "epoch": 0.45, + "learning_rate": 1.8634332920539872e-05, + "loss": 0.1116, + "step": 9720 + }, + { + "epoch": 0.45, + "learning_rate": 1.8633549135485086e-05, + "loss": 0.1512, + "step": 9725 + }, + { + "epoch": 0.45, + "learning_rate": 1.86327653504303e-05, + "loss": 0.2167, + "step": 9730 + }, + { + "epoch": 0.45, + "learning_rate": 1.863198156537551e-05, + "loss": 0.2379, + "step": 9735 + }, + { + "epoch": 0.45, + "learning_rate": 1.8631197780320728e-05, + "loss": 0.3606, + "step": 9740 + }, + { + "epoch": 0.45, + "learning_rate": 1.8630413995265938e-05, + "loss": 0.3912, + "step": 9745 + }, + { + "epoch": 0.45, + "learning_rate": 1.8629630210211152e-05, + "loss": 0.5597, + "step": 9750 + }, + { + "epoch": 0.46, + "learning_rate": 1.8628846425156366e-05, + "loss": 0.3225, + "step": 9755 + }, + { + "epoch": 0.46, + "learning_rate": 1.862806264010158e-05, + "loss": 0.1031, + "step": 9760 + }, + { + "epoch": 0.46, + "learning_rate": 1.8627278855046794e-05, + "loss": 0.2045, + "step": 9765 + }, + { + "epoch": 0.46, + "learning_rate": 1.8626495069992008e-05, + "loss": 0.1208, + "step": 9770 + }, + { + "epoch": 0.46, + "learning_rate": 1.8625711284937222e-05, + "loss": 0.2459, + "step": 9775 + }, + { + "epoch": 0.46, + "learning_rate": 1.8624927499882436e-05, + "loss": 0.1681, + "step": 9780 + }, + { + "epoch": 0.46, + "learning_rate": 1.8624143714827646e-05, + "loss": 0.4399, + "step": 9785 + }, + { + "epoch": 0.46, + "learning_rate": 1.862335992977286e-05, + "loss": 0.2947, + "step": 9790 + }, + { + "epoch": 0.46, + "learning_rate": 1.8622576144718074e-05, + "loss": 0.4561, + "step": 9795 + }, + { + "epoch": 0.46, + "learning_rate": 1.8621792359663288e-05, + "loss": 0.6068, + "step": 9800 + }, + { + "epoch": 0.46, + "learning_rate": 1.8621008574608502e-05, + "loss": 0.2161, + "step": 9805 + }, + { + "epoch": 0.46, + "learning_rate": 1.8620224789553712e-05, + "loss": 0.1146, + "step": 9810 + }, + { + "epoch": 0.46, + "learning_rate": 1.861944100449893e-05, + "loss": 0.1992, + "step": 9815 + }, + { + "epoch": 0.46, + "learning_rate": 1.861865721944414e-05, + "loss": 0.2146, + "step": 9820 + }, + { + "epoch": 0.46, + "learning_rate": 1.8617873434389354e-05, + "loss": 0.1645, + "step": 9825 + }, + { + "epoch": 0.46, + "learning_rate": 1.8617089649334568e-05, + "loss": 0.2525, + "step": 9830 + }, + { + "epoch": 0.46, + "learning_rate": 1.8616305864279782e-05, + "loss": 0.2867, + "step": 9835 + }, + { + "epoch": 0.46, + "learning_rate": 1.8615522079224996e-05, + "loss": 0.3965, + "step": 9840 + }, + { + "epoch": 0.46, + "learning_rate": 1.861473829417021e-05, + "loss": 0.316, + "step": 9845 + }, + { + "epoch": 0.46, + "learning_rate": 1.861395450911542e-05, + "loss": 0.4794, + "step": 9850 + }, + { + "epoch": 0.46, + "learning_rate": 1.8613170724060634e-05, + "loss": 0.2089, + "step": 9855 + }, + { + "epoch": 0.46, + "learning_rate": 1.8612386939005848e-05, + "loss": 0.0973, + "step": 9860 + }, + { + "epoch": 0.46, + "learning_rate": 1.8611603153951062e-05, + "loss": 0.1503, + "step": 9865 + }, + { + "epoch": 0.46, + "learning_rate": 1.8610819368896276e-05, + "loss": 0.1923, + "step": 9870 + }, + { + "epoch": 0.46, + "learning_rate": 1.861003558384149e-05, + "loss": 0.2079, + "step": 9875 + }, + { + "epoch": 0.46, + "learning_rate": 1.8609251798786704e-05, + "loss": 0.2932, + "step": 9880 + }, + { + "epoch": 0.46, + "learning_rate": 1.8608468013731914e-05, + "loss": 0.2118, + "step": 9885 + }, + { + "epoch": 0.46, + "learning_rate": 1.860768422867713e-05, + "loss": 0.4529, + "step": 9890 + }, + { + "epoch": 0.46, + "learning_rate": 1.8606900443622342e-05, + "loss": 0.4992, + "step": 9895 + }, + { + "epoch": 0.46, + "learning_rate": 1.8606116658567556e-05, + "loss": 0.7247, + "step": 9900 + }, + { + "epoch": 0.46, + "learning_rate": 1.860533287351277e-05, + "loss": 0.2615, + "step": 9905 + }, + { + "epoch": 0.46, + "learning_rate": 1.8604549088457984e-05, + "loss": 0.1337, + "step": 9910 + }, + { + "epoch": 0.46, + "learning_rate": 1.8603765303403198e-05, + "loss": 0.1473, + "step": 9915 + }, + { + "epoch": 0.46, + "learning_rate": 1.8602981518348408e-05, + "loss": 0.2391, + "step": 9920 + }, + { + "epoch": 0.46, + "learning_rate": 1.8602197733293622e-05, + "loss": 0.2209, + "step": 9925 + }, + { + "epoch": 0.46, + "learning_rate": 1.8601413948238836e-05, + "loss": 0.2441, + "step": 9930 + }, + { + "epoch": 0.46, + "learning_rate": 1.860063016318405e-05, + "loss": 0.2697, + "step": 9935 + }, + { + "epoch": 0.46, + "learning_rate": 1.8599846378129264e-05, + "loss": 0.3542, + "step": 9940 + }, + { + "epoch": 0.46, + "learning_rate": 1.8599062593074478e-05, + "loss": 0.3546, + "step": 9945 + }, + { + "epoch": 0.46, + "learning_rate": 1.8598278808019688e-05, + "loss": 0.8018, + "step": 9950 + }, + { + "epoch": 0.46, + "learning_rate": 1.8597495022964905e-05, + "loss": 0.2301, + "step": 9955 + }, + { + "epoch": 0.46, + "learning_rate": 1.8596711237910116e-05, + "loss": 0.0913, + "step": 9960 + }, + { + "epoch": 0.46, + "learning_rate": 1.859592745285533e-05, + "loss": 0.1875, + "step": 9965 + }, + { + "epoch": 0.47, + "learning_rate": 1.8595143667800544e-05, + "loss": 0.1521, + "step": 9970 + }, + { + "epoch": 0.47, + "learning_rate": 1.8594359882745758e-05, + "loss": 0.1545, + "step": 9975 + }, + { + "epoch": 0.47, + "learning_rate": 1.859357609769097e-05, + "loss": 0.2105, + "step": 9980 + }, + { + "epoch": 0.47, + "learning_rate": 1.8592792312636182e-05, + "loss": 0.2772, + "step": 9985 + }, + { + "epoch": 0.47, + "learning_rate": 1.85920085275814e-05, + "loss": 0.384, + "step": 9990 + }, + { + "epoch": 0.47, + "learning_rate": 1.859122474252661e-05, + "loss": 0.294, + "step": 9995 + }, + { + "epoch": 0.47, + "learning_rate": 1.8590440957471824e-05, + "loss": 0.5701, + "step": 10000 + }, + { + "epoch": 0.47, + "learning_rate": 1.8589657172417038e-05, + "loss": 0.3018, + "step": 10005 + }, + { + "epoch": 0.47, + "learning_rate": 1.858887338736225e-05, + "loss": 0.146, + "step": 10010 + }, + { + "epoch": 0.47, + "learning_rate": 1.8588089602307465e-05, + "loss": 0.2157, + "step": 10015 + }, + { + "epoch": 0.47, + "learning_rate": 1.858730581725268e-05, + "loss": 0.1809, + "step": 10020 + }, + { + "epoch": 0.47, + "learning_rate": 1.858652203219789e-05, + "loss": 0.1605, + "step": 10025 + }, + { + "epoch": 0.47, + "learning_rate": 1.8585738247143107e-05, + "loss": 0.1612, + "step": 10030 + }, + { + "epoch": 0.47, + "learning_rate": 1.8584954462088318e-05, + "loss": 0.1955, + "step": 10035 + }, + { + "epoch": 0.47, + "learning_rate": 1.858417067703353e-05, + "loss": 0.2136, + "step": 10040 + }, + { + "epoch": 0.47, + "learning_rate": 1.8583386891978746e-05, + "loss": 0.4425, + "step": 10045 + }, + { + "epoch": 0.47, + "learning_rate": 1.8582603106923956e-05, + "loss": 0.5771, + "step": 10050 + }, + { + "epoch": 0.47, + "learning_rate": 1.8581819321869173e-05, + "loss": 0.3124, + "step": 10055 + }, + { + "epoch": 0.47, + "learning_rate": 1.8581035536814384e-05, + "loss": 0.1658, + "step": 10060 + }, + { + "epoch": 0.47, + "learning_rate": 1.8580251751759598e-05, + "loss": 0.2146, + "step": 10065 + }, + { + "epoch": 0.47, + "learning_rate": 1.857946796670481e-05, + "loss": 0.1755, + "step": 10070 + }, + { + "epoch": 0.47, + "learning_rate": 1.8578684181650026e-05, + "loss": 0.2154, + "step": 10075 + }, + { + "epoch": 0.47, + "learning_rate": 1.857790039659524e-05, + "loss": 0.1933, + "step": 10080 + }, + { + "epoch": 0.47, + "learning_rate": 1.8577116611540453e-05, + "loss": 0.1655, + "step": 10085 + }, + { + "epoch": 0.47, + "learning_rate": 1.8576332826485667e-05, + "loss": 0.1967, + "step": 10090 + }, + { + "epoch": 0.47, + "learning_rate": 1.857554904143088e-05, + "loss": 0.4955, + "step": 10095 + }, + { + "epoch": 0.47, + "learning_rate": 1.8574765256376092e-05, + "loss": 0.5188, + "step": 10100 + }, + { + "epoch": 0.47, + "learning_rate": 1.857398147132131e-05, + "loss": 0.2023, + "step": 10105 + }, + { + "epoch": 0.47, + "learning_rate": 1.857319768626652e-05, + "loss": 0.1892, + "step": 10110 + }, + { + "epoch": 0.47, + "learning_rate": 1.8572413901211733e-05, + "loss": 0.1397, + "step": 10115 + }, + { + "epoch": 0.47, + "learning_rate": 1.8571630116156947e-05, + "loss": 0.1333, + "step": 10120 + }, + { + "epoch": 0.47, + "learning_rate": 1.8570846331102158e-05, + "loss": 0.2091, + "step": 10125 + }, + { + "epoch": 0.47, + "learning_rate": 1.8570062546047375e-05, + "loss": 0.1695, + "step": 10130 + }, + { + "epoch": 0.47, + "learning_rate": 1.8569278760992586e-05, + "loss": 0.3251, + "step": 10135 + }, + { + "epoch": 0.47, + "learning_rate": 1.85684949759378e-05, + "loss": 0.3039, + "step": 10140 + }, + { + "epoch": 0.47, + "learning_rate": 1.8567711190883013e-05, + "loss": 0.4027, + "step": 10145 + }, + { + "epoch": 0.47, + "learning_rate": 1.8566927405828227e-05, + "loss": 0.461, + "step": 10150 + }, + { + "epoch": 0.47, + "learning_rate": 1.856614362077344e-05, + "loss": 0.2918, + "step": 10155 + }, + { + "epoch": 0.47, + "learning_rate": 1.8565359835718655e-05, + "loss": 0.1856, + "step": 10160 + }, + { + "epoch": 0.47, + "learning_rate": 1.8564576050663866e-05, + "loss": 0.1539, + "step": 10165 + }, + { + "epoch": 0.47, + "learning_rate": 1.8563792265609083e-05, + "loss": 0.1487, + "step": 10170 + }, + { + "epoch": 0.47, + "learning_rate": 1.8563008480554294e-05, + "loss": 0.1158, + "step": 10175 + }, + { + "epoch": 0.48, + "learning_rate": 1.8562224695499507e-05, + "loss": 0.216, + "step": 10180 + }, + { + "epoch": 0.48, + "learning_rate": 1.856144091044472e-05, + "loss": 0.3013, + "step": 10185 + }, + { + "epoch": 0.48, + "learning_rate": 1.8560657125389935e-05, + "loss": 0.4077, + "step": 10190 + }, + { + "epoch": 0.48, + "learning_rate": 1.855987334033515e-05, + "loss": 0.3365, + "step": 10195 + }, + { + "epoch": 0.48, + "learning_rate": 1.855908955528036e-05, + "loss": 0.4686, + "step": 10200 + }, + { + "epoch": 0.48, + "learning_rate": 1.8558305770225577e-05, + "loss": 0.2812, + "step": 10205 + }, + { + "epoch": 0.48, + "learning_rate": 1.8557521985170787e-05, + "loss": 0.133, + "step": 10210 + }, + { + "epoch": 0.48, + "learning_rate": 1.8556738200116e-05, + "loss": 0.1349, + "step": 10215 + }, + { + "epoch": 0.48, + "learning_rate": 1.8555954415061215e-05, + "loss": 0.1394, + "step": 10220 + }, + { + "epoch": 0.48, + "learning_rate": 1.855517063000643e-05, + "loss": 0.2261, + "step": 10225 + }, + { + "epoch": 0.48, + "learning_rate": 1.8554386844951643e-05, + "loss": 0.1947, + "step": 10230 + }, + { + "epoch": 0.48, + "learning_rate": 1.8553603059896857e-05, + "loss": 0.2436, + "step": 10235 + }, + { + "epoch": 0.48, + "learning_rate": 1.8552819274842068e-05, + "loss": 0.2637, + "step": 10240 + }, + { + "epoch": 0.48, + "learning_rate": 1.855203548978728e-05, + "loss": 0.4449, + "step": 10245 + }, + { + "epoch": 0.48, + "learning_rate": 1.8551251704732495e-05, + "loss": 0.7073, + "step": 10250 + }, + { + "epoch": 0.48, + "learning_rate": 1.855046791967771e-05, + "loss": 0.2425, + "step": 10255 + }, + { + "epoch": 0.48, + "learning_rate": 1.8549684134622923e-05, + "loss": 0.1223, + "step": 10260 + }, + { + "epoch": 0.48, + "learning_rate": 1.8548900349568134e-05, + "loss": 0.1035, + "step": 10265 + }, + { + "epoch": 0.48, + "learning_rate": 1.854811656451335e-05, + "loss": 0.1336, + "step": 10270 + }, + { + "epoch": 0.48, + "learning_rate": 1.854733277945856e-05, + "loss": 0.224, + "step": 10275 + }, + { + "epoch": 0.48, + "learning_rate": 1.8546548994403775e-05, + "loss": 0.2017, + "step": 10280 + }, + { + "epoch": 0.48, + "learning_rate": 1.854576520934899e-05, + "loss": 0.3207, + "step": 10285 + }, + { + "epoch": 0.48, + "learning_rate": 1.8544981424294203e-05, + "loss": 0.3581, + "step": 10290 + }, + { + "epoch": 0.48, + "learning_rate": 1.8544197639239417e-05, + "loss": 0.299, + "step": 10295 + }, + { + "epoch": 0.48, + "learning_rate": 1.854341385418463e-05, + "loss": 0.4774, + "step": 10300 + }, + { + "epoch": 0.48, + "learning_rate": 1.8542630069129845e-05, + "loss": 0.2978, + "step": 10305 + }, + { + "epoch": 0.48, + "learning_rate": 1.8541846284075055e-05, + "loss": 0.1423, + "step": 10310 + }, + { + "epoch": 0.48, + "learning_rate": 1.854106249902027e-05, + "loss": 0.1697, + "step": 10315 + }, + { + "epoch": 0.48, + "learning_rate": 1.8540278713965483e-05, + "loss": 0.1412, + "step": 10320 + }, + { + "epoch": 0.48, + "learning_rate": 1.8539494928910697e-05, + "loss": 0.1674, + "step": 10325 + }, + { + "epoch": 0.48, + "learning_rate": 1.853871114385591e-05, + "loss": 0.2561, + "step": 10330 + }, + { + "epoch": 0.48, + "learning_rate": 1.8537927358801125e-05, + "loss": 0.3232, + "step": 10335 + }, + { + "epoch": 0.48, + "learning_rate": 1.8537143573746335e-05, + "loss": 0.2814, + "step": 10340 + }, + { + "epoch": 0.48, + "learning_rate": 1.8536359788691553e-05, + "loss": 0.4515, + "step": 10345 + }, + { + "epoch": 0.48, + "learning_rate": 1.8535576003636763e-05, + "loss": 0.6846, + "step": 10350 + }, + { + "epoch": 0.48, + "learning_rate": 1.8534792218581977e-05, + "loss": 0.3577, + "step": 10355 + }, + { + "epoch": 0.48, + "learning_rate": 1.853400843352719e-05, + "loss": 0.076, + "step": 10360 + }, + { + "epoch": 0.48, + "learning_rate": 1.8533224648472405e-05, + "loss": 0.2025, + "step": 10365 + }, + { + "epoch": 0.48, + "learning_rate": 1.853244086341762e-05, + "loss": 0.1471, + "step": 10370 + }, + { + "epoch": 0.48, + "learning_rate": 1.853165707836283e-05, + "loss": 0.1286, + "step": 10375 + }, + { + "epoch": 0.48, + "learning_rate": 1.8530873293308043e-05, + "loss": 0.2737, + "step": 10380 + }, + { + "epoch": 0.48, + "learning_rate": 1.8530089508253257e-05, + "loss": 0.2873, + "step": 10385 + }, + { + "epoch": 0.48, + "learning_rate": 1.852930572319847e-05, + "loss": 0.3913, + "step": 10390 + }, + { + "epoch": 0.49, + "learning_rate": 1.8528521938143685e-05, + "loss": 0.4033, + "step": 10395 + }, + { + "epoch": 0.49, + "learning_rate": 1.85277381530889e-05, + "loss": 0.5046, + "step": 10400 + }, + { + "epoch": 0.49, + "learning_rate": 1.8526954368034113e-05, + "loss": 0.2842, + "step": 10405 + }, + { + "epoch": 0.49, + "learning_rate": 1.8526170582979327e-05, + "loss": 0.1708, + "step": 10410 + }, + { + "epoch": 0.49, + "learning_rate": 1.8525386797924537e-05, + "loss": 0.1666, + "step": 10415 + }, + { + "epoch": 0.49, + "learning_rate": 1.8524603012869755e-05, + "loss": 0.1689, + "step": 10420 + }, + { + "epoch": 0.49, + "learning_rate": 1.8523819227814965e-05, + "loss": 0.2137, + "step": 10425 + }, + { + "epoch": 0.49, + "learning_rate": 1.852303544276018e-05, + "loss": 0.2885, + "step": 10430 + }, + { + "epoch": 0.49, + "learning_rate": 1.8522251657705393e-05, + "loss": 0.2818, + "step": 10435 + }, + { + "epoch": 0.49, + "learning_rate": 1.8521467872650603e-05, + "loss": 0.2431, + "step": 10440 + }, + { + "epoch": 0.49, + "learning_rate": 1.852068408759582e-05, + "loss": 0.3849, + "step": 10445 + }, + { + "epoch": 0.49, + "learning_rate": 1.851990030254103e-05, + "loss": 0.6032, + "step": 10450 + }, + { + "epoch": 0.49, + "learning_rate": 1.8519116517486245e-05, + "loss": 0.2557, + "step": 10455 + }, + { + "epoch": 0.49, + "learning_rate": 1.851833273243146e-05, + "loss": 0.1205, + "step": 10460 + }, + { + "epoch": 0.49, + "learning_rate": 1.8517548947376673e-05, + "loss": 0.1141, + "step": 10465 + }, + { + "epoch": 0.49, + "learning_rate": 1.8516765162321887e-05, + "loss": 0.1535, + "step": 10470 + }, + { + "epoch": 0.49, + "learning_rate": 1.85159813772671e-05, + "loss": 0.1693, + "step": 10475 + }, + { + "epoch": 0.49, + "learning_rate": 1.851519759221231e-05, + "loss": 0.2214, + "step": 10480 + }, + { + "epoch": 0.49, + "learning_rate": 1.851441380715753e-05, + "loss": 0.3034, + "step": 10485 + }, + { + "epoch": 0.49, + "learning_rate": 1.851363002210274e-05, + "loss": 0.2867, + "step": 10490 + }, + { + "epoch": 0.49, + "learning_rate": 1.8512846237047953e-05, + "loss": 0.4438, + "step": 10495 + }, + { + "epoch": 0.49, + "learning_rate": 1.8512062451993167e-05, + "loss": 0.614, + "step": 10500 + }, + { + "epoch": 0.49, + "learning_rate": 1.851127866693838e-05, + "loss": 0.2172, + "step": 10505 + }, + { + "epoch": 0.49, + "learning_rate": 1.8510494881883595e-05, + "loss": 0.0805, + "step": 10510 + }, + { + "epoch": 0.49, + "learning_rate": 1.8509711096828805e-05, + "loss": 0.1772, + "step": 10515 + }, + { + "epoch": 0.49, + "learning_rate": 1.8508927311774023e-05, + "loss": 0.1784, + "step": 10520 + }, + { + "epoch": 0.49, + "learning_rate": 1.8508143526719233e-05, + "loss": 0.2444, + "step": 10525 + }, + { + "epoch": 0.49, + "learning_rate": 1.8507359741664447e-05, + "loss": 0.1369, + "step": 10530 + }, + { + "epoch": 0.49, + "learning_rate": 1.850657595660966e-05, + "loss": 0.1954, + "step": 10535 + }, + { + "epoch": 0.49, + "learning_rate": 1.8505792171554875e-05, + "loss": 0.3048, + "step": 10540 + }, + { + "epoch": 0.49, + "learning_rate": 1.850500838650009e-05, + "loss": 0.3967, + "step": 10545 + }, + { + "epoch": 0.49, + "learning_rate": 1.8504224601445303e-05, + "loss": 0.476, + "step": 10550 + }, + { + "epoch": 0.49, + "learning_rate": 1.8503440816390513e-05, + "loss": 0.2726, + "step": 10555 + }, + { + "epoch": 0.49, + "learning_rate": 1.850265703133573e-05, + "loss": 0.0955, + "step": 10560 + }, + { + "epoch": 0.49, + "learning_rate": 1.850187324628094e-05, + "loss": 0.1345, + "step": 10565 + }, + { + "epoch": 0.49, + "learning_rate": 1.8501089461226155e-05, + "loss": 0.2196, + "step": 10570 + }, + { + "epoch": 0.49, + "learning_rate": 1.850030567617137e-05, + "loss": 0.1968, + "step": 10575 + }, + { + "epoch": 0.49, + "learning_rate": 1.849952189111658e-05, + "loss": 0.2663, + "step": 10580 + }, + { + "epoch": 0.49, + "learning_rate": 1.8498738106061797e-05, + "loss": 0.2857, + "step": 10585 + }, + { + "epoch": 0.49, + "learning_rate": 1.8497954321007007e-05, + "loss": 0.3226, + "step": 10590 + }, + { + "epoch": 0.49, + "learning_rate": 1.849717053595222e-05, + "loss": 0.3366, + "step": 10595 + }, + { + "epoch": 0.49, + "learning_rate": 1.8496386750897435e-05, + "loss": 0.4914, + "step": 10600 + }, + { + "epoch": 0.49, + "learning_rate": 1.849560296584265e-05, + "loss": 0.3555, + "step": 10605 + }, + { + "epoch": 0.5, + "learning_rate": 1.8494819180787863e-05, + "loss": 0.1054, + "step": 10610 + }, + { + "epoch": 0.5, + "learning_rate": 1.8494035395733077e-05, + "loss": 0.2007, + "step": 10615 + }, + { + "epoch": 0.5, + "learning_rate": 1.849325161067829e-05, + "loss": 0.2069, + "step": 10620 + }, + { + "epoch": 0.5, + "learning_rate": 1.8492467825623504e-05, + "loss": 0.2225, + "step": 10625 + }, + { + "epoch": 0.5, + "learning_rate": 1.8491684040568715e-05, + "loss": 0.2789, + "step": 10630 + }, + { + "epoch": 0.5, + "learning_rate": 1.849090025551393e-05, + "loss": 0.3114, + "step": 10635 + }, + { + "epoch": 0.5, + "learning_rate": 1.8490116470459143e-05, + "loss": 0.3273, + "step": 10640 + }, + { + "epoch": 0.5, + "learning_rate": 1.8489332685404357e-05, + "loss": 0.2557, + "step": 10645 + }, + { + "epoch": 0.5, + "learning_rate": 1.848854890034957e-05, + "loss": 0.5148, + "step": 10650 + }, + { + "epoch": 0.5, + "learning_rate": 1.848776511529478e-05, + "loss": 0.3063, + "step": 10655 + }, + { + "epoch": 0.5, + "learning_rate": 1.848698133024e-05, + "loss": 0.1092, + "step": 10660 + }, + { + "epoch": 0.5, + "learning_rate": 1.848619754518521e-05, + "loss": 0.1135, + "step": 10665 + }, + { + "epoch": 0.5, + "learning_rate": 1.8485413760130423e-05, + "loss": 0.1765, + "step": 10670 + }, + { + "epoch": 0.5, + "learning_rate": 1.8484629975075637e-05, + "loss": 0.1905, + "step": 10675 + }, + { + "epoch": 0.5, + "learning_rate": 1.848384619002085e-05, + "loss": 0.1588, + "step": 10680 + }, + { + "epoch": 0.5, + "learning_rate": 1.8483062404966064e-05, + "loss": 0.2983, + "step": 10685 + }, + { + "epoch": 0.5, + "learning_rate": 1.848227861991128e-05, + "loss": 0.2593, + "step": 10690 + }, + { + "epoch": 0.5, + "learning_rate": 1.848149483485649e-05, + "loss": 0.3945, + "step": 10695 + }, + { + "epoch": 0.5, + "learning_rate": 1.8480711049801703e-05, + "loss": 0.5359, + "step": 10700 + }, + { + "epoch": 0.5, + "learning_rate": 1.8479927264746917e-05, + "loss": 0.473, + "step": 10705 + }, + { + "epoch": 0.5, + "learning_rate": 1.847914347969213e-05, + "loss": 0.1142, + "step": 10710 + }, + { + "epoch": 0.5, + "learning_rate": 1.8478359694637345e-05, + "loss": 0.1146, + "step": 10715 + }, + { + "epoch": 0.5, + "learning_rate": 1.847757590958256e-05, + "loss": 0.2126, + "step": 10720 + }, + { + "epoch": 0.5, + "learning_rate": 1.8476792124527772e-05, + "loss": 0.1638, + "step": 10725 + }, + { + "epoch": 0.5, + "learning_rate": 1.8476008339472983e-05, + "loss": 0.2529, + "step": 10730 + }, + { + "epoch": 0.5, + "learning_rate": 1.84752245544182e-05, + "loss": 0.2843, + "step": 10735 + }, + { + "epoch": 0.5, + "learning_rate": 1.847444076936341e-05, + "loss": 0.3466, + "step": 10740 + }, + { + "epoch": 0.5, + "learning_rate": 1.8473656984308625e-05, + "loss": 0.3651, + "step": 10745 + }, + { + "epoch": 0.5, + "learning_rate": 1.847287319925384e-05, + "loss": 0.5283, + "step": 10750 + }, + { + "epoch": 0.5, + "learning_rate": 1.8472089414199052e-05, + "loss": 0.2919, + "step": 10755 + }, + { + "epoch": 0.5, + "learning_rate": 1.8471305629144266e-05, + "loss": 0.1133, + "step": 10760 + }, + { + "epoch": 0.5, + "learning_rate": 1.8470521844089477e-05, + "loss": 0.1283, + "step": 10765 + }, + { + "epoch": 0.5, + "learning_rate": 1.846973805903469e-05, + "loss": 0.1331, + "step": 10770 + }, + { + "epoch": 0.5, + "learning_rate": 1.8468954273979905e-05, + "loss": 0.2511, + "step": 10775 + }, + { + "epoch": 0.5, + "learning_rate": 1.846817048892512e-05, + "loss": 0.2337, + "step": 10780 + }, + { + "epoch": 0.5, + "learning_rate": 1.8467386703870332e-05, + "loss": 0.2217, + "step": 10785 + }, + { + "epoch": 0.5, + "learning_rate": 1.8466602918815546e-05, + "loss": 0.4054, + "step": 10790 + }, + { + "epoch": 0.5, + "learning_rate": 1.8465819133760757e-05, + "loss": 0.42, + "step": 10795 + }, + { + "epoch": 0.5, + "learning_rate": 1.8465035348705974e-05, + "loss": 0.4874, + "step": 10800 + }, + { + "epoch": 0.5, + "learning_rate": 1.8464251563651185e-05, + "loss": 0.2858, + "step": 10805 + }, + { + "epoch": 0.5, + "learning_rate": 1.84634677785964e-05, + "loss": 0.1269, + "step": 10810 + }, + { + "epoch": 0.5, + "learning_rate": 1.8462683993541612e-05, + "loss": 0.1334, + "step": 10815 + }, + { + "epoch": 0.5, + "learning_rate": 1.8461900208486826e-05, + "loss": 0.1715, + "step": 10820 + }, + { + "epoch": 0.51, + "learning_rate": 1.846111642343204e-05, + "loss": 0.1632, + "step": 10825 + }, + { + "epoch": 0.51, + "learning_rate": 1.846033263837725e-05, + "loss": 0.1676, + "step": 10830 + }, + { + "epoch": 0.51, + "learning_rate": 1.8459548853322468e-05, + "loss": 0.1878, + "step": 10835 + }, + { + "epoch": 0.51, + "learning_rate": 1.845876506826768e-05, + "loss": 0.2542, + "step": 10840 + }, + { + "epoch": 0.51, + "learning_rate": 1.8457981283212893e-05, + "loss": 0.3347, + "step": 10845 + }, + { + "epoch": 0.51, + "learning_rate": 1.8457197498158106e-05, + "loss": 0.5031, + "step": 10850 + }, + { + "epoch": 0.51, + "learning_rate": 1.845641371310332e-05, + "loss": 0.3013, + "step": 10855 + }, + { + "epoch": 0.51, + "learning_rate": 1.8455629928048534e-05, + "loss": 0.0807, + "step": 10860 + }, + { + "epoch": 0.51, + "learning_rate": 1.8454846142993748e-05, + "loss": 0.1866, + "step": 10865 + }, + { + "epoch": 0.51, + "learning_rate": 1.845406235793896e-05, + "loss": 0.128, + "step": 10870 + }, + { + "epoch": 0.51, + "learning_rate": 1.8453278572884176e-05, + "loss": 0.2177, + "step": 10875 + }, + { + "epoch": 0.51, + "learning_rate": 1.8452494787829386e-05, + "loss": 0.2811, + "step": 10880 + }, + { + "epoch": 0.51, + "learning_rate": 1.84517110027746e-05, + "loss": 0.3075, + "step": 10885 + }, + { + "epoch": 0.51, + "learning_rate": 1.8450927217719814e-05, + "loss": 0.3236, + "step": 10890 + }, + { + "epoch": 0.51, + "learning_rate": 1.8450143432665025e-05, + "loss": 0.2975, + "step": 10895 + }, + { + "epoch": 0.51, + "learning_rate": 1.8449359647610242e-05, + "loss": 0.7018, + "step": 10900 + }, + { + "epoch": 0.51, + "learning_rate": 1.8448575862555453e-05, + "loss": 0.3354, + "step": 10905 + }, + { + "epoch": 0.51, + "learning_rate": 1.8447792077500667e-05, + "loss": 0.1611, + "step": 10910 + }, + { + "epoch": 0.51, + "learning_rate": 1.844700829244588e-05, + "loss": 0.1056, + "step": 10915 + }, + { + "epoch": 0.51, + "learning_rate": 1.8446224507391094e-05, + "loss": 0.1905, + "step": 10920 + }, + { + "epoch": 0.51, + "learning_rate": 1.8445440722336308e-05, + "loss": 0.1444, + "step": 10925 + }, + { + "epoch": 0.51, + "learning_rate": 1.8444656937281522e-05, + "loss": 0.2417, + "step": 10930 + }, + { + "epoch": 0.51, + "learning_rate": 1.8443873152226736e-05, + "loss": 0.2322, + "step": 10935 + }, + { + "epoch": 0.51, + "learning_rate": 1.844308936717195e-05, + "loss": 0.2791, + "step": 10940 + }, + { + "epoch": 0.51, + "learning_rate": 1.844230558211716e-05, + "loss": 0.3903, + "step": 10945 + }, + { + "epoch": 0.51, + "learning_rate": 1.8441521797062378e-05, + "loss": 0.6466, + "step": 10950 + }, + { + "epoch": 0.51, + "learning_rate": 1.8440738012007588e-05, + "loss": 0.2265, + "step": 10955 + }, + { + "epoch": 0.51, + "learning_rate": 1.8439954226952802e-05, + "loss": 0.1035, + "step": 10960 + }, + { + "epoch": 0.51, + "learning_rate": 1.8439170441898016e-05, + "loss": 0.127, + "step": 10965 + }, + { + "epoch": 0.51, + "learning_rate": 1.8438386656843227e-05, + "loss": 0.1504, + "step": 10970 + }, + { + "epoch": 0.51, + "learning_rate": 1.8437602871788444e-05, + "loss": 0.3139, + "step": 10975 + }, + { + "epoch": 0.51, + "learning_rate": 1.8436819086733654e-05, + "loss": 0.2015, + "step": 10980 + }, + { + "epoch": 0.51, + "learning_rate": 1.843603530167887e-05, + "loss": 0.2831, + "step": 10985 + }, + { + "epoch": 0.51, + "learning_rate": 1.8435251516624082e-05, + "loss": 0.4307, + "step": 10990 + }, + { + "epoch": 0.51, + "learning_rate": 1.8434467731569296e-05, + "loss": 0.4474, + "step": 10995 + }, + { + "epoch": 0.51, + "learning_rate": 1.843368394651451e-05, + "loss": 0.4485, + "step": 11000 + }, + { + "epoch": 0.51, + "learning_rate": 1.8432900161459724e-05, + "loss": 0.3155, + "step": 11005 + }, + { + "epoch": 0.51, + "learning_rate": 1.8432116376404934e-05, + "loss": 0.1338, + "step": 11010 + }, + { + "epoch": 0.51, + "learning_rate": 1.8431332591350152e-05, + "loss": 0.1072, + "step": 11015 + }, + { + "epoch": 0.51, + "learning_rate": 1.8430548806295362e-05, + "loss": 0.1568, + "step": 11020 + }, + { + "epoch": 0.51, + "learning_rate": 1.8429765021240576e-05, + "loss": 0.209, + "step": 11025 + }, + { + "epoch": 0.51, + "learning_rate": 1.842898123618579e-05, + "loss": 0.2181, + "step": 11030 + }, + { + "epoch": 0.51, + "learning_rate": 1.8428197451131004e-05, + "loss": 0.1599, + "step": 11035 + }, + { + "epoch": 0.52, + "learning_rate": 1.8427413666076218e-05, + "loss": 0.2345, + "step": 11040 + }, + { + "epoch": 0.52, + "learning_rate": 1.842662988102143e-05, + "loss": 0.5213, + "step": 11045 + }, + { + "epoch": 0.52, + "learning_rate": 1.8425846095966646e-05, + "loss": 0.6337, + "step": 11050 + }, + { + "epoch": 0.52, + "learning_rate": 1.8425062310911856e-05, + "loss": 0.2929, + "step": 11055 + }, + { + "epoch": 0.52, + "learning_rate": 1.842427852585707e-05, + "loss": 0.0707, + "step": 11060 + }, + { + "epoch": 0.52, + "learning_rate": 1.8423494740802284e-05, + "loss": 0.1464, + "step": 11065 + }, + { + "epoch": 0.52, + "learning_rate": 1.8422710955747498e-05, + "loss": 0.1703, + "step": 11070 + }, + { + "epoch": 0.52, + "learning_rate": 1.8421927170692712e-05, + "loss": 0.2093, + "step": 11075 + }, + { + "epoch": 0.52, + "learning_rate": 1.8421143385637926e-05, + "loss": 0.1709, + "step": 11080 + }, + { + "epoch": 0.52, + "learning_rate": 1.8420359600583136e-05, + "loss": 0.2491, + "step": 11085 + }, + { + "epoch": 0.52, + "learning_rate": 1.841957581552835e-05, + "loss": 0.3152, + "step": 11090 + }, + { + "epoch": 0.52, + "learning_rate": 1.8418792030473564e-05, + "loss": 0.3752, + "step": 11095 + }, + { + "epoch": 0.52, + "learning_rate": 1.8418008245418778e-05, + "loss": 0.7978, + "step": 11100 + }, + { + "epoch": 0.52, + "learning_rate": 1.8417224460363992e-05, + "loss": 0.2618, + "step": 11105 + }, + { + "epoch": 0.52, + "learning_rate": 1.8416440675309202e-05, + "loss": 0.121, + "step": 11110 + }, + { + "epoch": 0.52, + "learning_rate": 1.841565689025442e-05, + "loss": 0.3329, + "step": 11115 + }, + { + "epoch": 0.52, + "learning_rate": 1.841487310519963e-05, + "loss": 0.1459, + "step": 11120 + }, + { + "epoch": 0.52, + "learning_rate": 1.8414089320144844e-05, + "loss": 0.1842, + "step": 11125 + }, + { + "epoch": 0.52, + "learning_rate": 1.8413305535090058e-05, + "loss": 0.2167, + "step": 11130 + }, + { + "epoch": 0.52, + "learning_rate": 1.8412521750035272e-05, + "loss": 0.2646, + "step": 11135 + }, + { + "epoch": 0.52, + "learning_rate": 1.8411737964980486e-05, + "loss": 0.3365, + "step": 11140 + }, + { + "epoch": 0.52, + "learning_rate": 1.84109541799257e-05, + "loss": 0.356, + "step": 11145 + }, + { + "epoch": 0.52, + "learning_rate": 1.8410170394870914e-05, + "loss": 0.7355, + "step": 11150 + }, + { + "epoch": 0.52, + "learning_rate": 1.8409386609816124e-05, + "loss": 0.2418, + "step": 11155 + }, + { + "epoch": 0.52, + "learning_rate": 1.8408602824761338e-05, + "loss": 0.091, + "step": 11160 + }, + { + "epoch": 0.52, + "learning_rate": 1.8407819039706552e-05, + "loss": 0.1088, + "step": 11165 + }, + { + "epoch": 0.52, + "learning_rate": 1.8407035254651766e-05, + "loss": 0.2117, + "step": 11170 + }, + { + "epoch": 0.52, + "learning_rate": 1.840625146959698e-05, + "loss": 0.1758, + "step": 11175 + }, + { + "epoch": 0.52, + "learning_rate": 1.8405467684542194e-05, + "loss": 0.2617, + "step": 11180 + }, + { + "epoch": 0.52, + "learning_rate": 1.8404683899487404e-05, + "loss": 0.1872, + "step": 11185 + }, + { + "epoch": 0.52, + "learning_rate": 1.840390011443262e-05, + "loss": 0.2953, + "step": 11190 + }, + { + "epoch": 0.52, + "learning_rate": 1.8403116329377832e-05, + "loss": 0.2921, + "step": 11195 + }, + { + "epoch": 0.52, + "learning_rate": 1.8402332544323046e-05, + "loss": 0.6133, + "step": 11200 + }, + { + "epoch": 0.52, + "learning_rate": 1.840154875926826e-05, + "loss": 0.2085, + "step": 11205 + }, + { + "epoch": 0.52, + "learning_rate": 1.8400764974213474e-05, + "loss": 0.1601, + "step": 11210 + }, + { + "epoch": 0.52, + "learning_rate": 1.8399981189158688e-05, + "loss": 0.1244, + "step": 11215 + }, + { + "epoch": 0.52, + "learning_rate": 1.8399197404103898e-05, + "loss": 0.1695, + "step": 11220 + }, + { + "epoch": 0.52, + "learning_rate": 1.8398413619049112e-05, + "loss": 0.1907, + "step": 11225 + }, + { + "epoch": 0.52, + "learning_rate": 1.8397629833994326e-05, + "loss": 0.2046, + "step": 11230 + }, + { + "epoch": 0.52, + "learning_rate": 1.839684604893954e-05, + "loss": 0.2313, + "step": 11235 + }, + { + "epoch": 0.52, + "learning_rate": 1.8396062263884754e-05, + "loss": 0.4132, + "step": 11240 + }, + { + "epoch": 0.52, + "learning_rate": 1.8395278478829968e-05, + "loss": 0.3662, + "step": 11245 + }, + { + "epoch": 0.52, + "learning_rate": 1.839449469377518e-05, + "loss": 0.6663, + "step": 11250 + }, + { + "epoch": 0.53, + "learning_rate": 1.8393710908720396e-05, + "loss": 0.2829, + "step": 11255 + }, + { + "epoch": 0.53, + "learning_rate": 1.8392927123665606e-05, + "loss": 0.0905, + "step": 11260 + }, + { + "epoch": 0.53, + "learning_rate": 1.8392143338610823e-05, + "loss": 0.1383, + "step": 11265 + }, + { + "epoch": 0.53, + "learning_rate": 1.8391359553556034e-05, + "loss": 0.1256, + "step": 11270 + }, + { + "epoch": 0.53, + "learning_rate": 1.8390575768501248e-05, + "loss": 0.1687, + "step": 11275 + }, + { + "epoch": 0.53, + "learning_rate": 1.838979198344646e-05, + "loss": 0.2381, + "step": 11280 + }, + { + "epoch": 0.53, + "learning_rate": 1.8389008198391672e-05, + "loss": 0.2776, + "step": 11285 + }, + { + "epoch": 0.53, + "learning_rate": 1.838822441333689e-05, + "loss": 0.314, + "step": 11290 + }, + { + "epoch": 0.53, + "learning_rate": 1.83874406282821e-05, + "loss": 0.3639, + "step": 11295 + }, + { + "epoch": 0.53, + "learning_rate": 1.8386656843227314e-05, + "loss": 0.5223, + "step": 11300 + }, + { + "epoch": 0.53, + "learning_rate": 1.8385873058172528e-05, + "loss": 0.1783, + "step": 11305 + }, + { + "epoch": 0.53, + "learning_rate": 1.8385089273117742e-05, + "loss": 0.0991, + "step": 11310 + }, + { + "epoch": 0.53, + "learning_rate": 1.8384305488062956e-05, + "loss": 0.2287, + "step": 11315 + }, + { + "epoch": 0.53, + "learning_rate": 1.838352170300817e-05, + "loss": 0.2312, + "step": 11320 + }, + { + "epoch": 0.53, + "learning_rate": 1.838273791795338e-05, + "loss": 0.2481, + "step": 11325 + }, + { + "epoch": 0.53, + "learning_rate": 1.8381954132898597e-05, + "loss": 0.1717, + "step": 11330 + }, + { + "epoch": 0.53, + "learning_rate": 1.8381170347843808e-05, + "loss": 0.2379, + "step": 11335 + }, + { + "epoch": 0.53, + "learning_rate": 1.8380386562789022e-05, + "loss": 0.3034, + "step": 11340 + }, + { + "epoch": 0.53, + "learning_rate": 1.8379602777734236e-05, + "loss": 0.3773, + "step": 11345 + }, + { + "epoch": 0.53, + "learning_rate": 1.837881899267945e-05, + "loss": 0.6601, + "step": 11350 + }, + { + "epoch": 0.53, + "learning_rate": 1.8378035207624663e-05, + "loss": 0.2288, + "step": 11355 + }, + { + "epoch": 0.53, + "learning_rate": 1.8377251422569874e-05, + "loss": 0.1123, + "step": 11360 + }, + { + "epoch": 0.53, + "learning_rate": 1.837646763751509e-05, + "loss": 0.158, + "step": 11365 + }, + { + "epoch": 0.53, + "learning_rate": 1.8375683852460302e-05, + "loss": 0.1854, + "step": 11370 + }, + { + "epoch": 0.53, + "learning_rate": 1.8374900067405516e-05, + "loss": 0.1726, + "step": 11375 + }, + { + "epoch": 0.53, + "learning_rate": 1.837411628235073e-05, + "loss": 0.2843, + "step": 11380 + }, + { + "epoch": 0.53, + "learning_rate": 1.8373332497295944e-05, + "loss": 0.2421, + "step": 11385 + }, + { + "epoch": 0.53, + "learning_rate": 1.8372548712241157e-05, + "loss": 0.3232, + "step": 11390 + }, + { + "epoch": 0.53, + "learning_rate": 1.837176492718637e-05, + "loss": 0.4045, + "step": 11395 + }, + { + "epoch": 0.53, + "learning_rate": 1.8370981142131582e-05, + "loss": 0.5567, + "step": 11400 + }, + { + "epoch": 0.53, + "learning_rate": 1.83701973570768e-05, + "loss": 0.2517, + "step": 11405 + }, + { + "epoch": 0.53, + "learning_rate": 1.836941357202201e-05, + "loss": 0.0982, + "step": 11410 + }, + { + "epoch": 0.53, + "learning_rate": 1.8368629786967224e-05, + "loss": 0.1019, + "step": 11415 + }, + { + "epoch": 0.53, + "learning_rate": 1.8367846001912437e-05, + "loss": 0.1961, + "step": 11420 + }, + { + "epoch": 0.53, + "learning_rate": 1.8367062216857648e-05, + "loss": 0.1333, + "step": 11425 + }, + { + "epoch": 0.53, + "learning_rate": 1.8366278431802865e-05, + "loss": 0.1751, + "step": 11430 + }, + { + "epoch": 0.53, + "learning_rate": 1.8365494646748076e-05, + "loss": 0.2277, + "step": 11435 + }, + { + "epoch": 0.53, + "learning_rate": 1.836471086169329e-05, + "loss": 0.2504, + "step": 11440 + }, + { + "epoch": 0.53, + "learning_rate": 1.8363927076638504e-05, + "loss": 0.4636, + "step": 11445 + }, + { + "epoch": 0.53, + "learning_rate": 1.8363143291583718e-05, + "loss": 0.5295, + "step": 11450 + }, + { + "epoch": 0.53, + "learning_rate": 1.836235950652893e-05, + "loss": 0.2709, + "step": 11455 + }, + { + "epoch": 0.53, + "learning_rate": 1.8361575721474145e-05, + "loss": 0.0808, + "step": 11460 + }, + { + "epoch": 0.53, + "learning_rate": 1.836079193641936e-05, + "loss": 0.1669, + "step": 11465 + }, + { + "epoch": 0.54, + "learning_rate": 1.8360008151364573e-05, + "loss": 0.1414, + "step": 11470 + }, + { + "epoch": 0.54, + "learning_rate": 1.8359224366309784e-05, + "loss": 0.2378, + "step": 11475 + }, + { + "epoch": 0.54, + "learning_rate": 1.8358440581254998e-05, + "loss": 0.2094, + "step": 11480 + }, + { + "epoch": 0.54, + "learning_rate": 1.835765679620021e-05, + "loss": 0.193, + "step": 11485 + }, + { + "epoch": 0.54, + "learning_rate": 1.8356873011145425e-05, + "loss": 0.3476, + "step": 11490 + }, + { + "epoch": 0.54, + "learning_rate": 1.835608922609064e-05, + "loss": 0.416, + "step": 11495 + }, + { + "epoch": 0.54, + "learning_rate": 1.835530544103585e-05, + "loss": 0.5145, + "step": 11500 + }, + { + "epoch": 0.54, + "learning_rate": 1.8354521655981067e-05, + "loss": 0.2958, + "step": 11505 + }, + { + "epoch": 0.54, + "learning_rate": 1.8353737870926278e-05, + "loss": 0.0655, + "step": 11510 + }, + { + "epoch": 0.54, + "learning_rate": 1.835295408587149e-05, + "loss": 0.1104, + "step": 11515 + }, + { + "epoch": 0.54, + "learning_rate": 1.8352170300816705e-05, + "loss": 0.1485, + "step": 11520 + }, + { + "epoch": 0.54, + "learning_rate": 1.835138651576192e-05, + "loss": 0.1616, + "step": 11525 + }, + { + "epoch": 0.54, + "learning_rate": 1.8350602730707133e-05, + "loss": 0.183, + "step": 11530 + }, + { + "epoch": 0.54, + "learning_rate": 1.8349818945652347e-05, + "loss": 0.2659, + "step": 11535 + }, + { + "epoch": 0.54, + "learning_rate": 1.8349035160597558e-05, + "loss": 0.3224, + "step": 11540 + }, + { + "epoch": 0.54, + "learning_rate": 1.834825137554277e-05, + "loss": 0.4451, + "step": 11545 + }, + { + "epoch": 0.54, + "learning_rate": 1.8347467590487985e-05, + "loss": 0.6188, + "step": 11550 + }, + { + "epoch": 0.54, + "learning_rate": 1.83466838054332e-05, + "loss": 0.2427, + "step": 11555 + }, + { + "epoch": 0.54, + "learning_rate": 1.8345900020378413e-05, + "loss": 0.0932, + "step": 11560 + }, + { + "epoch": 0.54, + "learning_rate": 1.8345116235323627e-05, + "loss": 0.201, + "step": 11565 + }, + { + "epoch": 0.54, + "learning_rate": 1.834433245026884e-05, + "loss": 0.1564, + "step": 11570 + }, + { + "epoch": 0.54, + "learning_rate": 1.834354866521405e-05, + "loss": 0.1475, + "step": 11575 + }, + { + "epoch": 0.54, + "learning_rate": 1.834276488015927e-05, + "loss": 0.1801, + "step": 11580 + }, + { + "epoch": 0.54, + "learning_rate": 1.834198109510448e-05, + "loss": 0.2695, + "step": 11585 + }, + { + "epoch": 0.54, + "learning_rate": 1.8341197310049693e-05, + "loss": 0.2264, + "step": 11590 + }, + { + "epoch": 0.54, + "learning_rate": 1.8340413524994907e-05, + "loss": 0.3982, + "step": 11595 + }, + { + "epoch": 0.54, + "learning_rate": 1.833962973994012e-05, + "loss": 0.6555, + "step": 11600 + }, + { + "epoch": 0.54, + "learning_rate": 1.8338845954885335e-05, + "loss": 0.3081, + "step": 11605 + }, + { + "epoch": 0.54, + "learning_rate": 1.8338062169830546e-05, + "loss": 0.1064, + "step": 11610 + }, + { + "epoch": 0.54, + "learning_rate": 1.833727838477576e-05, + "loss": 0.1906, + "step": 11615 + }, + { + "epoch": 0.54, + "learning_rate": 1.8336494599720973e-05, + "loss": 0.1571, + "step": 11620 + }, + { + "epoch": 0.54, + "learning_rate": 1.8335710814666187e-05, + "loss": 0.1314, + "step": 11625 + }, + { + "epoch": 0.54, + "learning_rate": 1.83349270296114e-05, + "loss": 0.233, + "step": 11630 + }, + { + "epoch": 0.54, + "learning_rate": 1.8334143244556615e-05, + "loss": 0.216, + "step": 11635 + }, + { + "epoch": 0.54, + "learning_rate": 1.8333359459501826e-05, + "loss": 0.2412, + "step": 11640 + }, + { + "epoch": 0.54, + "learning_rate": 1.8332575674447043e-05, + "loss": 0.3539, + "step": 11645 + }, + { + "epoch": 0.54, + "learning_rate": 1.8331791889392253e-05, + "loss": 0.3351, + "step": 11650 + }, + { + "epoch": 0.54, + "learning_rate": 1.8331008104337467e-05, + "loss": 0.2767, + "step": 11655 + }, + { + "epoch": 0.54, + "learning_rate": 1.833022431928268e-05, + "loss": 0.1602, + "step": 11660 + }, + { + "epoch": 0.54, + "learning_rate": 1.8329440534227895e-05, + "loss": 0.64, + "step": 11665 + }, + { + "epoch": 0.54, + "learning_rate": 1.8328813506184066e-05, + "loss": 0.128, + "step": 11670 + }, + { + "epoch": 0.54, + "learning_rate": 1.832802972112928e-05, + "loss": 0.1629, + "step": 11675 + }, + { + "epoch": 0.55, + "learning_rate": 1.832724593607449e-05, + "loss": 0.2992, + "step": 11680 + }, + { + "epoch": 0.55, + "learning_rate": 1.8326462151019707e-05, + "loss": 0.2846, + "step": 11685 + }, + { + "epoch": 0.55, + "learning_rate": 1.8325678365964918e-05, + "loss": 0.283, + "step": 11690 + }, + { + "epoch": 0.55, + "learning_rate": 1.8324894580910132e-05, + "loss": 0.5153, + "step": 11695 + }, + { + "epoch": 0.55, + "learning_rate": 1.8324110795855346e-05, + "loss": 0.5733, + "step": 11700 + }, + { + "epoch": 0.55, + "learning_rate": 1.832332701080056e-05, + "loss": 0.2492, + "step": 11705 + }, + { + "epoch": 0.55, + "learning_rate": 1.8322543225745773e-05, + "loss": 0.0945, + "step": 11710 + }, + { + "epoch": 0.55, + "learning_rate": 1.8321759440690987e-05, + "loss": 0.0858, + "step": 11715 + }, + { + "epoch": 0.55, + "learning_rate": 1.8320975655636198e-05, + "loss": 0.1475, + "step": 11720 + }, + { + "epoch": 0.55, + "learning_rate": 1.8320191870581415e-05, + "loss": 0.1625, + "step": 11725 + }, + { + "epoch": 0.55, + "learning_rate": 1.8319408085526626e-05, + "loss": 0.2204, + "step": 11730 + }, + { + "epoch": 0.55, + "learning_rate": 1.831862430047184e-05, + "loss": 0.236, + "step": 11735 + }, + { + "epoch": 0.55, + "learning_rate": 1.8317840515417053e-05, + "loss": 0.32, + "step": 11740 + }, + { + "epoch": 0.55, + "learning_rate": 1.8317056730362267e-05, + "loss": 0.4229, + "step": 11745 + }, + { + "epoch": 0.55, + "learning_rate": 1.831627294530748e-05, + "loss": 0.5182, + "step": 11750 + }, + { + "epoch": 0.55, + "learning_rate": 1.8315489160252692e-05, + "loss": 0.2982, + "step": 11755 + }, + { + "epoch": 0.55, + "learning_rate": 1.831470537519791e-05, + "loss": 0.0763, + "step": 11760 + }, + { + "epoch": 0.55, + "learning_rate": 1.831392159014312e-05, + "loss": 0.0989, + "step": 11765 + }, + { + "epoch": 0.55, + "learning_rate": 1.8313137805088334e-05, + "loss": 0.1841, + "step": 11770 + }, + { + "epoch": 0.55, + "learning_rate": 1.8312354020033547e-05, + "loss": 0.1474, + "step": 11775 + }, + { + "epoch": 0.55, + "learning_rate": 1.831157023497876e-05, + "loss": 0.186, + "step": 11780 + }, + { + "epoch": 0.55, + "learning_rate": 1.8310786449923975e-05, + "loss": 0.2091, + "step": 11785 + }, + { + "epoch": 0.55, + "learning_rate": 1.831000266486919e-05, + "loss": 0.2279, + "step": 11790 + }, + { + "epoch": 0.55, + "learning_rate": 1.83092188798144e-05, + "loss": 0.3051, + "step": 11795 + }, + { + "epoch": 0.55, + "learning_rate": 1.8308435094759617e-05, + "loss": 0.7879, + "step": 11800 + }, + { + "epoch": 0.55, + "learning_rate": 1.8307651309704827e-05, + "loss": 0.2961, + "step": 11805 + }, + { + "epoch": 0.55, + "learning_rate": 1.830686752465004e-05, + "loss": 0.1053, + "step": 11810 + }, + { + "epoch": 0.55, + "learning_rate": 1.8306083739595255e-05, + "loss": 0.1465, + "step": 11815 + }, + { + "epoch": 0.55, + "learning_rate": 1.8305299954540466e-05, + "loss": 0.1971, + "step": 11820 + }, + { + "epoch": 0.55, + "learning_rate": 1.8304516169485683e-05, + "loss": 0.1379, + "step": 11825 + }, + { + "epoch": 0.55, + "learning_rate": 1.8303732384430894e-05, + "loss": 0.1948, + "step": 11830 + }, + { + "epoch": 0.55, + "learning_rate": 1.8302948599376108e-05, + "loss": 0.3947, + "step": 11835 + }, + { + "epoch": 0.55, + "learning_rate": 1.830216481432132e-05, + "loss": 0.3241, + "step": 11840 + }, + { + "epoch": 0.55, + "learning_rate": 1.8301381029266535e-05, + "loss": 0.4234, + "step": 11845 + }, + { + "epoch": 0.55, + "learning_rate": 1.830059724421175e-05, + "loss": 0.7616, + "step": 11850 + }, + { + "epoch": 0.55, + "learning_rate": 1.8299813459156963e-05, + "loss": 0.3111, + "step": 11855 + }, + { + "epoch": 0.55, + "learning_rate": 1.8299029674102177e-05, + "loss": 0.0971, + "step": 11860 + }, + { + "epoch": 0.55, + "learning_rate": 1.829824588904739e-05, + "loss": 0.0742, + "step": 11865 + }, + { + "epoch": 0.55, + "learning_rate": 1.82974621039926e-05, + "loss": 0.1005, + "step": 11870 + }, + { + "epoch": 0.55, + "learning_rate": 1.8296678318937815e-05, + "loss": 0.1713, + "step": 11875 + }, + { + "epoch": 0.55, + "learning_rate": 1.829589453388303e-05, + "loss": 0.2496, + "step": 11880 + }, + { + "epoch": 0.55, + "learning_rate": 1.82952675058392e-05, + "loss": 0.2245, + "step": 11885 + }, + { + "epoch": 0.55, + "learning_rate": 1.8294483720784414e-05, + "loss": 0.3005, + "step": 11890 + }, + { + "epoch": 0.56, + "learning_rate": 1.8293699935729628e-05, + "loss": 0.4579, + "step": 11895 + }, + { + "epoch": 0.56, + "learning_rate": 1.829291615067484e-05, + "loss": 0.7279, + "step": 11900 + }, + { + "epoch": 0.56, + "learning_rate": 1.8292132365620055e-05, + "loss": 0.177, + "step": 11905 + }, + { + "epoch": 0.56, + "learning_rate": 1.8291348580565266e-05, + "loss": 0.1003, + "step": 11910 + }, + { + "epoch": 0.56, + "learning_rate": 1.829056479551048e-05, + "loss": 0.1564, + "step": 11915 + }, + { + "epoch": 0.56, + "learning_rate": 1.8289781010455694e-05, + "loss": 0.233, + "step": 11920 + }, + { + "epoch": 0.56, + "learning_rate": 1.8288997225400908e-05, + "loss": 0.1809, + "step": 11925 + }, + { + "epoch": 0.56, + "learning_rate": 1.828821344034612e-05, + "loss": 0.2469, + "step": 11930 + }, + { + "epoch": 0.56, + "learning_rate": 1.8287429655291332e-05, + "loss": 0.2773, + "step": 11935 + }, + { + "epoch": 0.56, + "learning_rate": 1.828664587023655e-05, + "loss": 0.3309, + "step": 11940 + }, + { + "epoch": 0.56, + "learning_rate": 1.828586208518176e-05, + "loss": 0.25, + "step": 11945 + }, + { + "epoch": 0.56, + "learning_rate": 1.8285078300126974e-05, + "loss": 0.4452, + "step": 11950 + }, + { + "epoch": 0.56, + "learning_rate": 1.8284294515072188e-05, + "loss": 0.2698, + "step": 11955 + }, + { + "epoch": 0.56, + "learning_rate": 1.82835107300174e-05, + "loss": 0.0781, + "step": 11960 + }, + { + "epoch": 0.56, + "learning_rate": 1.8282726944962615e-05, + "loss": 0.1029, + "step": 11965 + }, + { + "epoch": 0.56, + "learning_rate": 1.828194315990783e-05, + "loss": 0.1722, + "step": 11970 + }, + { + "epoch": 0.56, + "learning_rate": 1.828115937485304e-05, + "loss": 0.1502, + "step": 11975 + }, + { + "epoch": 0.56, + "learning_rate": 1.8280375589798257e-05, + "loss": 0.2189, + "step": 11980 + }, + { + "epoch": 0.56, + "learning_rate": 1.8279591804743468e-05, + "loss": 0.1885, + "step": 11985 + }, + { + "epoch": 0.56, + "learning_rate": 1.827880801968868e-05, + "loss": 0.1883, + "step": 11990 + }, + { + "epoch": 0.56, + "learning_rate": 1.8278024234633896e-05, + "loss": 0.3488, + "step": 11995 + }, + { + "epoch": 0.56, + "learning_rate": 1.827724044957911e-05, + "loss": 0.6505, + "step": 12000 + }, + { + "epoch": 0.56, + "learning_rate": 1.8276456664524323e-05, + "loss": 0.2543, + "step": 12005 + }, + { + "epoch": 0.56, + "learning_rate": 1.8275672879469534e-05, + "loss": 0.1051, + "step": 12010 + }, + { + "epoch": 0.56, + "learning_rate": 1.827488909441475e-05, + "loss": 0.106, + "step": 12015 + }, + { + "epoch": 0.56, + "learning_rate": 1.827410530935996e-05, + "loss": 0.1483, + "step": 12020 + }, + { + "epoch": 0.56, + "learning_rate": 1.8273321524305176e-05, + "loss": 0.2022, + "step": 12025 + }, + { + "epoch": 0.56, + "learning_rate": 1.827253773925039e-05, + "loss": 0.2002, + "step": 12030 + }, + { + "epoch": 0.56, + "learning_rate": 1.8271753954195603e-05, + "loss": 0.2563, + "step": 12035 + }, + { + "epoch": 0.56, + "learning_rate": 1.8270970169140817e-05, + "loss": 0.2597, + "step": 12040 + }, + { + "epoch": 0.56, + "learning_rate": 1.827018638408603e-05, + "loss": 0.429, + "step": 12045 + }, + { + "epoch": 0.56, + "learning_rate": 1.8269402599031242e-05, + "loss": 0.5843, + "step": 12050 + }, + { + "epoch": 0.56, + "learning_rate": 1.826861881397646e-05, + "loss": 0.2978, + "step": 12055 + }, + { + "epoch": 0.56, + "learning_rate": 1.826783502892167e-05, + "loss": 0.1374, + "step": 12060 + }, + { + "epoch": 0.56, + "learning_rate": 1.8267051243866883e-05, + "loss": 0.1817, + "step": 12065 + }, + { + "epoch": 0.56, + "learning_rate": 1.8266267458812097e-05, + "loss": 0.0982, + "step": 12070 + }, + { + "epoch": 0.56, + "learning_rate": 1.8265483673757308e-05, + "loss": 0.2185, + "step": 12075 + }, + { + "epoch": 0.56, + "learning_rate": 1.8264699888702525e-05, + "loss": 0.1185, + "step": 12080 + }, + { + "epoch": 0.56, + "learning_rate": 1.8263916103647736e-05, + "loss": 0.2142, + "step": 12085 + }, + { + "epoch": 0.56, + "learning_rate": 1.826313231859295e-05, + "loss": 0.3277, + "step": 12090 + }, + { + "epoch": 0.56, + "learning_rate": 1.8262348533538163e-05, + "loss": 0.4278, + "step": 12095 + }, + { + "epoch": 0.56, + "learning_rate": 1.8261564748483377e-05, + "loss": 0.5751, + "step": 12100 + }, + { + "epoch": 0.56, + "learning_rate": 1.826078096342859e-05, + "loss": 0.3273, + "step": 12105 + }, + { + "epoch": 0.57, + "learning_rate": 1.8259997178373805e-05, + "loss": 0.0808, + "step": 12110 + }, + { + "epoch": 0.57, + "learning_rate": 1.825921339331902e-05, + "loss": 0.1315, + "step": 12115 + }, + { + "epoch": 0.57, + "learning_rate": 1.8258429608264233e-05, + "loss": 0.155, + "step": 12120 + }, + { + "epoch": 0.57, + "learning_rate": 1.8257645823209444e-05, + "loss": 0.1783, + "step": 12125 + }, + { + "epoch": 0.57, + "learning_rate": 1.8256862038154657e-05, + "loss": 0.2643, + "step": 12130 + }, + { + "epoch": 0.57, + "learning_rate": 1.825607825309987e-05, + "loss": 0.2056, + "step": 12135 + }, + { + "epoch": 0.57, + "learning_rate": 1.8255294468045085e-05, + "loss": 0.3616, + "step": 12140 + }, + { + "epoch": 0.57, + "learning_rate": 1.82545106829903e-05, + "loss": 0.4022, + "step": 12145 + }, + { + "epoch": 0.57, + "learning_rate": 1.825372689793551e-05, + "loss": 0.6273, + "step": 12150 + }, + { + "epoch": 0.57, + "learning_rate": 1.8252943112880727e-05, + "loss": 0.2924, + "step": 12155 + }, + { + "epoch": 0.57, + "learning_rate": 1.8252159327825937e-05, + "loss": 0.1846, + "step": 12160 + }, + { + "epoch": 0.57, + "learning_rate": 1.825137554277115e-05, + "loss": 0.1085, + "step": 12165 + }, + { + "epoch": 0.57, + "learning_rate": 1.8250591757716365e-05, + "loss": 0.1888, + "step": 12170 + }, + { + "epoch": 0.57, + "learning_rate": 1.824980797266158e-05, + "loss": 0.1602, + "step": 12175 + }, + { + "epoch": 0.57, + "learning_rate": 1.8249024187606793e-05, + "loss": 0.3114, + "step": 12180 + }, + { + "epoch": 0.57, + "learning_rate": 1.8248240402552007e-05, + "loss": 0.2241, + "step": 12185 + }, + { + "epoch": 0.57, + "learning_rate": 1.8247456617497218e-05, + "loss": 0.3146, + "step": 12190 + }, + { + "epoch": 0.57, + "learning_rate": 1.824667283244243e-05, + "loss": 0.3433, + "step": 12195 + }, + { + "epoch": 0.57, + "learning_rate": 1.8245889047387645e-05, + "loss": 0.3809, + "step": 12200 + }, + { + "epoch": 0.57, + "learning_rate": 1.824510526233286e-05, + "loss": 0.2425, + "step": 12205 + }, + { + "epoch": 0.57, + "learning_rate": 1.8244321477278073e-05, + "loss": 0.0895, + "step": 12210 + }, + { + "epoch": 0.57, + "learning_rate": 1.8243537692223287e-05, + "loss": 0.1157, + "step": 12215 + }, + { + "epoch": 0.57, + "learning_rate": 1.82427539071685e-05, + "loss": 0.1821, + "step": 12220 + }, + { + "epoch": 0.57, + "learning_rate": 1.824197012211371e-05, + "loss": 0.2085, + "step": 12225 + }, + { + "epoch": 0.57, + "learning_rate": 1.8241186337058925e-05, + "loss": 0.2786, + "step": 12230 + }, + { + "epoch": 0.57, + "learning_rate": 1.824040255200414e-05, + "loss": 0.2232, + "step": 12235 + }, + { + "epoch": 0.57, + "learning_rate": 1.8239618766949353e-05, + "loss": 0.1914, + "step": 12240 + }, + { + "epoch": 0.57, + "learning_rate": 1.8238834981894567e-05, + "loss": 0.4057, + "step": 12245 + }, + { + "epoch": 0.57, + "learning_rate": 1.823805119683978e-05, + "loss": 0.5458, + "step": 12250 + }, + { + "epoch": 0.57, + "learning_rate": 1.8237267411784995e-05, + "loss": 0.2761, + "step": 12255 + }, + { + "epoch": 0.57, + "learning_rate": 1.8236483626730205e-05, + "loss": 0.1128, + "step": 12260 + }, + { + "epoch": 0.57, + "learning_rate": 1.823569984167542e-05, + "loss": 0.1394, + "step": 12265 + }, + { + "epoch": 0.57, + "learning_rate": 1.8234916056620633e-05, + "loss": 0.1571, + "step": 12270 + }, + { + "epoch": 0.57, + "learning_rate": 1.8234132271565847e-05, + "loss": 0.2344, + "step": 12275 + }, + { + "epoch": 0.57, + "learning_rate": 1.823334848651106e-05, + "loss": 0.2405, + "step": 12280 + }, + { + "epoch": 0.57, + "learning_rate": 1.8232564701456275e-05, + "loss": 0.2111, + "step": 12285 + }, + { + "epoch": 0.57, + "learning_rate": 1.8231780916401485e-05, + "loss": 0.2919, + "step": 12290 + }, + { + "epoch": 0.57, + "learning_rate": 1.8230997131346703e-05, + "loss": 0.461, + "step": 12295 + }, + { + "epoch": 0.57, + "learning_rate": 1.8230213346291913e-05, + "loss": 0.4065, + "step": 12300 + }, + { + "epoch": 0.57, + "learning_rate": 1.8229429561237127e-05, + "loss": 0.2381, + "step": 12305 + }, + { + "epoch": 0.57, + "learning_rate": 1.822864577618234e-05, + "loss": 0.0971, + "step": 12310 + }, + { + "epoch": 0.57, + "learning_rate": 1.8227861991127555e-05, + "loss": 0.1587, + "step": 12315 + }, + { + "epoch": 0.57, + "learning_rate": 1.822707820607277e-05, + "loss": 0.221, + "step": 12320 + }, + { + "epoch": 0.58, + "learning_rate": 1.822629442101798e-05, + "loss": 0.1664, + "step": 12325 + }, + { + "epoch": 0.58, + "learning_rate": 1.8225510635963197e-05, + "loss": 0.2185, + "step": 12330 + }, + { + "epoch": 0.58, + "learning_rate": 1.8224726850908407e-05, + "loss": 0.2366, + "step": 12335 + }, + { + "epoch": 0.58, + "learning_rate": 1.822394306585362e-05, + "loss": 0.3796, + "step": 12340 + }, + { + "epoch": 0.58, + "learning_rate": 1.8223159280798835e-05, + "loss": 0.3175, + "step": 12345 + }, + { + "epoch": 0.58, + "learning_rate": 1.822237549574405e-05, + "loss": 0.6086, + "step": 12350 + }, + { + "epoch": 0.58, + "learning_rate": 1.8221591710689263e-05, + "loss": 0.2234, + "step": 12355 + }, + { + "epoch": 0.58, + "learning_rate": 1.8220807925634477e-05, + "loss": 0.1107, + "step": 12360 + }, + { + "epoch": 0.58, + "learning_rate": 1.8220024140579687e-05, + "loss": 0.1629, + "step": 12365 + }, + { + "epoch": 0.58, + "learning_rate": 1.8219240355524905e-05, + "loss": 0.1813, + "step": 12370 + }, + { + "epoch": 0.58, + "learning_rate": 1.8218456570470115e-05, + "loss": 0.1278, + "step": 12375 + }, + { + "epoch": 0.58, + "learning_rate": 1.821767278541533e-05, + "loss": 0.2862, + "step": 12380 + }, + { + "epoch": 0.58, + "learning_rate": 1.8216889000360543e-05, + "loss": 0.1883, + "step": 12385 + }, + { + "epoch": 0.58, + "learning_rate": 1.8216105215305753e-05, + "loss": 0.3023, + "step": 12390 + }, + { + "epoch": 0.58, + "learning_rate": 1.821532143025097e-05, + "loss": 0.3543, + "step": 12395 + }, + { + "epoch": 0.58, + "learning_rate": 1.821453764519618e-05, + "loss": 0.6779, + "step": 12400 + }, + { + "epoch": 0.58, + "learning_rate": 1.8213753860141395e-05, + "loss": 0.2855, + "step": 12405 + }, + { + "epoch": 0.58, + "learning_rate": 1.821297007508661e-05, + "loss": 0.1078, + "step": 12410 + }, + { + "epoch": 0.58, + "learning_rate": 1.8212186290031823e-05, + "loss": 0.0658, + "step": 12415 + }, + { + "epoch": 0.58, + "learning_rate": 1.8211402504977037e-05, + "loss": 0.1846, + "step": 12420 + }, + { + "epoch": 0.58, + "learning_rate": 1.821061871992225e-05, + "loss": 0.2117, + "step": 12425 + }, + { + "epoch": 0.58, + "learning_rate": 1.8209834934867465e-05, + "loss": 0.2137, + "step": 12430 + }, + { + "epoch": 0.58, + "learning_rate": 1.820905114981268e-05, + "loss": 0.2952, + "step": 12435 + }, + { + "epoch": 0.58, + "learning_rate": 1.820826736475789e-05, + "loss": 0.3457, + "step": 12440 + }, + { + "epoch": 0.58, + "learning_rate": 1.8207483579703103e-05, + "loss": 0.3901, + "step": 12445 + }, + { + "epoch": 0.58, + "learning_rate": 1.8206699794648317e-05, + "loss": 0.4563, + "step": 12450 + }, + { + "epoch": 0.58, + "learning_rate": 1.820591600959353e-05, + "loss": 0.3036, + "step": 12455 + }, + { + "epoch": 0.58, + "learning_rate": 1.8205132224538745e-05, + "loss": 0.1384, + "step": 12460 + }, + { + "epoch": 0.58, + "learning_rate": 1.8204348439483955e-05, + "loss": 0.1121, + "step": 12465 + }, + { + "epoch": 0.58, + "learning_rate": 1.8203564654429173e-05, + "loss": 0.1626, + "step": 12470 + }, + { + "epoch": 0.58, + "learning_rate": 1.8202780869374383e-05, + "loss": 0.1478, + "step": 12475 + }, + { + "epoch": 0.58, + "learning_rate": 1.8201997084319597e-05, + "loss": 0.2527, + "step": 12480 + }, + { + "epoch": 0.58, + "learning_rate": 1.820121329926481e-05, + "loss": 0.2802, + "step": 12485 + }, + { + "epoch": 0.58, + "learning_rate": 1.8200429514210025e-05, + "loss": 0.3921, + "step": 12490 + }, + { + "epoch": 0.58, + "learning_rate": 1.819964572915524e-05, + "loss": 0.3346, + "step": 12495 + }, + { + "epoch": 0.58, + "learning_rate": 1.8198861944100453e-05, + "loss": 0.5112, + "step": 12500 + }, + { + "epoch": 0.58, + "learning_rate": 1.8198078159045663e-05, + "loss": 0.2538, + "step": 12505 + }, + { + "epoch": 0.58, + "learning_rate": 1.819729437399088e-05, + "loss": 0.0939, + "step": 12510 + }, + { + "epoch": 0.58, + "learning_rate": 1.819651058893609e-05, + "loss": 0.1042, + "step": 12515 + }, + { + "epoch": 0.58, + "learning_rate": 1.8195726803881305e-05, + "loss": 0.1335, + "step": 12520 + }, + { + "epoch": 0.58, + "learning_rate": 1.819494301882652e-05, + "loss": 0.1319, + "step": 12525 + }, + { + "epoch": 0.58, + "learning_rate": 1.8194159233771733e-05, + "loss": 0.1862, + "step": 12530 + }, + { + "epoch": 0.58, + "learning_rate": 1.8193375448716947e-05, + "loss": 0.2162, + "step": 12535 + }, + { + "epoch": 0.59, + "learning_rate": 1.8192591663662157e-05, + "loss": 0.3461, + "step": 12540 + }, + { + "epoch": 0.59, + "learning_rate": 1.819180787860737e-05, + "loss": 0.2864, + "step": 12545 + }, + { + "epoch": 0.59, + "learning_rate": 1.8191024093552585e-05, + "loss": 0.5461, + "step": 12550 + }, + { + "epoch": 0.59, + "learning_rate": 1.81902403084978e-05, + "loss": 0.2668, + "step": 12555 + }, + { + "epoch": 0.59, + "learning_rate": 1.8189456523443013e-05, + "loss": 0.0315, + "step": 12560 + }, + { + "epoch": 0.59, + "learning_rate": 1.8188672738388227e-05, + "loss": 0.0844, + "step": 12565 + }, + { + "epoch": 0.59, + "learning_rate": 1.818788895333344e-05, + "loss": 0.193, + "step": 12570 + }, + { + "epoch": 0.59, + "learning_rate": 1.8187105168278654e-05, + "loss": 0.1365, + "step": 12575 + }, + { + "epoch": 0.59, + "learning_rate": 1.8186321383223865e-05, + "loss": 0.1781, + "step": 12580 + }, + { + "epoch": 0.59, + "learning_rate": 1.8185537598169082e-05, + "loss": 0.331, + "step": 12585 + }, + { + "epoch": 0.59, + "learning_rate": 1.8184753813114293e-05, + "loss": 0.2479, + "step": 12590 + }, + { + "epoch": 0.59, + "learning_rate": 1.8183970028059507e-05, + "loss": 0.3348, + "step": 12595 + }, + { + "epoch": 0.59, + "learning_rate": 1.818318624300472e-05, + "loss": 0.5393, + "step": 12600 + }, + { + "epoch": 0.59, + "learning_rate": 1.818240245794993e-05, + "loss": 0.2076, + "step": 12605 + }, + { + "epoch": 0.59, + "learning_rate": 1.818161867289515e-05, + "loss": 0.0844, + "step": 12610 + }, + { + "epoch": 0.59, + "learning_rate": 1.818083488784036e-05, + "loss": 0.1753, + "step": 12615 + }, + { + "epoch": 0.59, + "learning_rate": 1.8180051102785573e-05, + "loss": 0.1373, + "step": 12620 + }, + { + "epoch": 0.59, + "learning_rate": 1.8179267317730787e-05, + "loss": 0.2813, + "step": 12625 + }, + { + "epoch": 0.59, + "learning_rate": 1.8178483532676e-05, + "loss": 0.2083, + "step": 12630 + }, + { + "epoch": 0.59, + "learning_rate": 1.8177699747621214e-05, + "loss": 0.3167, + "step": 12635 + }, + { + "epoch": 0.59, + "learning_rate": 1.817691596256643e-05, + "loss": 0.2749, + "step": 12640 + }, + { + "epoch": 0.59, + "learning_rate": 1.8176132177511642e-05, + "loss": 0.421, + "step": 12645 + }, + { + "epoch": 0.59, + "learning_rate": 1.8175348392456856e-05, + "loss": 0.6261, + "step": 12650 + }, + { + "epoch": 0.59, + "learning_rate": 1.8174564607402067e-05, + "loss": 0.2091, + "step": 12655 + }, + { + "epoch": 0.59, + "learning_rate": 1.817378082234728e-05, + "loss": 0.1761, + "step": 12660 + }, + { + "epoch": 0.59, + "learning_rate": 1.8172997037292495e-05, + "loss": 0.1845, + "step": 12665 + }, + { + "epoch": 0.59, + "learning_rate": 1.817221325223771e-05, + "loss": 0.1495, + "step": 12670 + }, + { + "epoch": 0.59, + "learning_rate": 1.8171429467182922e-05, + "loss": 0.183, + "step": 12675 + }, + { + "epoch": 0.59, + "learning_rate": 1.8170645682128133e-05, + "loss": 0.1456, + "step": 12680 + }, + { + "epoch": 0.59, + "learning_rate": 1.816986189707335e-05, + "loss": 0.3501, + "step": 12685 + }, + { + "epoch": 0.59, + "learning_rate": 1.816907811201856e-05, + "loss": 0.2666, + "step": 12690 + }, + { + "epoch": 0.59, + "learning_rate": 1.8168294326963775e-05, + "loss": 0.2329, + "step": 12695 + }, + { + "epoch": 0.59, + "learning_rate": 1.816751054190899e-05, + "loss": 0.5528, + "step": 12700 + }, + { + "epoch": 0.59, + "learning_rate": 1.8166726756854202e-05, + "loss": 0.2247, + "step": 12705 + }, + { + "epoch": 0.59, + "learning_rate": 1.8165942971799416e-05, + "loss": 0.1046, + "step": 12710 + }, + { + "epoch": 0.59, + "learning_rate": 1.816515918674463e-05, + "loss": 0.1533, + "step": 12715 + }, + { + "epoch": 0.59, + "learning_rate": 1.816437540168984e-05, + "loss": 0.1581, + "step": 12720 + }, + { + "epoch": 0.59, + "learning_rate": 1.8163591616635055e-05, + "loss": 0.1386, + "step": 12725 + }, + { + "epoch": 0.59, + "learning_rate": 1.816280783158027e-05, + "loss": 0.1616, + "step": 12730 + }, + { + "epoch": 0.59, + "learning_rate": 1.8162024046525482e-05, + "loss": 0.1669, + "step": 12735 + }, + { + "epoch": 0.59, + "learning_rate": 1.8161240261470696e-05, + "loss": 0.2804, + "step": 12740 + }, + { + "epoch": 0.59, + "learning_rate": 1.816045647641591e-05, + "loss": 0.4752, + "step": 12745 + }, + { + "epoch": 0.59, + "learning_rate": 1.8159672691361124e-05, + "loss": 0.6178, + "step": 12750 + }, + { + "epoch": 0.6, + "learning_rate": 1.8158888906306335e-05, + "loss": 0.214, + "step": 12755 + }, + { + "epoch": 0.6, + "learning_rate": 1.815810512125155e-05, + "loss": 0.1284, + "step": 12760 + }, + { + "epoch": 0.6, + "learning_rate": 1.8157321336196762e-05, + "loss": 0.0707, + "step": 12765 + }, + { + "epoch": 0.6, + "learning_rate": 1.8156537551141976e-05, + "loss": 0.0857, + "step": 12770 + }, + { + "epoch": 0.6, + "learning_rate": 1.815575376608719e-05, + "loss": 0.162, + "step": 12775 + }, + { + "epoch": 0.6, + "learning_rate": 1.8154969981032404e-05, + "loss": 0.1753, + "step": 12780 + }, + { + "epoch": 0.6, + "learning_rate": 1.8154186195977618e-05, + "loss": 0.3083, + "step": 12785 + }, + { + "epoch": 0.6, + "learning_rate": 1.815340241092283e-05, + "loss": 0.359, + "step": 12790 + }, + { + "epoch": 0.6, + "learning_rate": 1.8152618625868043e-05, + "loss": 0.3942, + "step": 12795 + }, + { + "epoch": 0.6, + "learning_rate": 1.8151834840813256e-05, + "loss": 0.5581, + "step": 12800 + }, + { + "epoch": 0.6, + "learning_rate": 1.815105105575847e-05, + "loss": 0.275, + "step": 12805 + }, + { + "epoch": 0.6, + "learning_rate": 1.8150267270703684e-05, + "loss": 0.0976, + "step": 12810 + }, + { + "epoch": 0.6, + "learning_rate": 1.8149483485648898e-05, + "loss": 0.1869, + "step": 12815 + }, + { + "epoch": 0.6, + "learning_rate": 1.814869970059411e-05, + "loss": 0.1546, + "step": 12820 + }, + { + "epoch": 0.6, + "learning_rate": 1.8147915915539326e-05, + "loss": 0.1551, + "step": 12825 + }, + { + "epoch": 0.6, + "learning_rate": 1.8147132130484536e-05, + "loss": 0.1323, + "step": 12830 + }, + { + "epoch": 0.6, + "learning_rate": 1.814634834542975e-05, + "loss": 0.199, + "step": 12835 + }, + { + "epoch": 0.6, + "learning_rate": 1.8145564560374964e-05, + "loss": 0.2718, + "step": 12840 + }, + { + "epoch": 0.6, + "learning_rate": 1.8144780775320178e-05, + "loss": 0.358, + "step": 12845 + }, + { + "epoch": 0.6, + "learning_rate": 1.8143996990265392e-05, + "loss": 0.7596, + "step": 12850 + }, + { + "epoch": 0.6, + "learning_rate": 1.8143213205210603e-05, + "loss": 0.1977, + "step": 12855 + }, + { + "epoch": 0.6, + "learning_rate": 1.814242942015582e-05, + "loss": 0.1171, + "step": 12860 + }, + { + "epoch": 0.6, + "learning_rate": 1.814164563510103e-05, + "loss": 0.117, + "step": 12865 + }, + { + "epoch": 0.6, + "learning_rate": 1.8140861850046244e-05, + "loss": 0.1249, + "step": 12870 + }, + { + "epoch": 0.6, + "learning_rate": 1.8140078064991458e-05, + "loss": 0.2719, + "step": 12875 + }, + { + "epoch": 0.6, + "learning_rate": 1.8139294279936672e-05, + "loss": 0.1671, + "step": 12880 + }, + { + "epoch": 0.6, + "learning_rate": 1.8138510494881886e-05, + "loss": 0.2753, + "step": 12885 + }, + { + "epoch": 0.6, + "learning_rate": 1.81377267098271e-05, + "loss": 0.2222, + "step": 12890 + }, + { + "epoch": 0.6, + "learning_rate": 1.813694292477231e-05, + "loss": 0.3789, + "step": 12895 + }, + { + "epoch": 0.6, + "learning_rate": 1.8136159139717528e-05, + "loss": 0.5476, + "step": 12900 + }, + { + "epoch": 0.6, + "learning_rate": 1.8135375354662738e-05, + "loss": 0.2506, + "step": 12905 + }, + { + "epoch": 0.6, + "learning_rate": 1.8134591569607952e-05, + "loss": 0.1047, + "step": 12910 + }, + { + "epoch": 0.6, + "learning_rate": 1.8133807784553166e-05, + "loss": 0.1184, + "step": 12915 + }, + { + "epoch": 0.6, + "learning_rate": 1.8133023999498377e-05, + "loss": 0.1366, + "step": 12920 + }, + { + "epoch": 0.6, + "learning_rate": 1.8132240214443594e-05, + "loss": 0.1905, + "step": 12925 + }, + { + "epoch": 0.6, + "learning_rate": 1.8131456429388804e-05, + "loss": 0.1637, + "step": 12930 + }, + { + "epoch": 0.6, + "learning_rate": 1.813067264433402e-05, + "loss": 0.2118, + "step": 12935 + }, + { + "epoch": 0.6, + "learning_rate": 1.8129888859279232e-05, + "loss": 0.4517, + "step": 12940 + }, + { + "epoch": 0.6, + "learning_rate": 1.8129105074224446e-05, + "loss": 0.3792, + "step": 12945 + }, + { + "epoch": 0.6, + "learning_rate": 1.812832128916966e-05, + "loss": 0.6719, + "step": 12950 + }, + { + "epoch": 0.6, + "learning_rate": 1.8127537504114874e-05, + "loss": 0.1911, + "step": 12955 + }, + { + "epoch": 0.6, + "learning_rate": 1.8126753719060088e-05, + "loss": 0.0936, + "step": 12960 + }, + { + "epoch": 0.6, + "learning_rate": 1.8125969934005302e-05, + "loss": 0.0923, + "step": 12965 + }, + { + "epoch": 0.61, + "learning_rate": 1.8125186148950512e-05, + "loss": 0.184, + "step": 12970 + }, + { + "epoch": 0.61, + "learning_rate": 1.8124402363895726e-05, + "loss": 0.1625, + "step": 12975 + }, + { + "epoch": 0.61, + "learning_rate": 1.812361857884094e-05, + "loss": 0.1753, + "step": 12980 + }, + { + "epoch": 0.61, + "learning_rate": 1.8122834793786154e-05, + "loss": 0.2652, + "step": 12985 + }, + { + "epoch": 0.61, + "learning_rate": 1.8122051008731368e-05, + "loss": 0.2203, + "step": 12990 + }, + { + "epoch": 0.61, + "learning_rate": 1.812126722367658e-05, + "loss": 0.3814, + "step": 12995 + }, + { + "epoch": 0.61, + "learning_rate": 1.8120483438621796e-05, + "loss": 0.613, + "step": 13000 + }, + { + "epoch": 0.61, + "learning_rate": 1.8119699653567006e-05, + "loss": 0.2086, + "step": 13005 + }, + { + "epoch": 0.61, + "learning_rate": 1.811891586851222e-05, + "loss": 0.1071, + "step": 13010 + }, + { + "epoch": 0.61, + "learning_rate": 1.8118132083457434e-05, + "loss": 0.1252, + "step": 13015 + }, + { + "epoch": 0.61, + "learning_rate": 1.8117348298402648e-05, + "loss": 0.1397, + "step": 13020 + }, + { + "epoch": 0.61, + "learning_rate": 1.8116564513347862e-05, + "loss": 0.1292, + "step": 13025 + }, + { + "epoch": 0.61, + "learning_rate": 1.8115780728293076e-05, + "loss": 0.2849, + "step": 13030 + }, + { + "epoch": 0.61, + "learning_rate": 1.8114996943238286e-05, + "loss": 0.1862, + "step": 13035 + }, + { + "epoch": 0.61, + "learning_rate": 1.8114213158183504e-05, + "loss": 0.2616, + "step": 13040 + }, + { + "epoch": 0.61, + "learning_rate": 1.8113429373128714e-05, + "loss": 0.2944, + "step": 13045 + }, + { + "epoch": 0.61, + "learning_rate": 1.8112645588073928e-05, + "loss": 0.4961, + "step": 13050 + }, + { + "epoch": 0.61, + "learning_rate": 1.8111861803019142e-05, + "loss": 0.2485, + "step": 13055 + }, + { + "epoch": 0.61, + "learning_rate": 1.8111078017964356e-05, + "loss": 0.0993, + "step": 13060 + }, + { + "epoch": 0.61, + "learning_rate": 1.811029423290957e-05, + "loss": 0.1454, + "step": 13065 + }, + { + "epoch": 0.61, + "learning_rate": 1.810951044785478e-05, + "loss": 0.1368, + "step": 13070 + }, + { + "epoch": 0.61, + "learning_rate": 1.8108726662799994e-05, + "loss": 0.178, + "step": 13075 + }, + { + "epoch": 0.61, + "learning_rate": 1.8107942877745208e-05, + "loss": 0.2138, + "step": 13080 + }, + { + "epoch": 0.61, + "learning_rate": 1.8107159092690422e-05, + "loss": 0.2884, + "step": 13085 + }, + { + "epoch": 0.61, + "learning_rate": 1.8106375307635636e-05, + "loss": 0.2957, + "step": 13090 + }, + { + "epoch": 0.61, + "learning_rate": 1.810559152258085e-05, + "loss": 0.3439, + "step": 13095 + }, + { + "epoch": 0.61, + "learning_rate": 1.8104807737526064e-05, + "loss": 0.3735, + "step": 13100 + }, + { + "epoch": 0.61, + "learning_rate": 1.8104023952471278e-05, + "loss": 0.2341, + "step": 13105 + }, + { + "epoch": 0.61, + "learning_rate": 1.8103240167416488e-05, + "loss": 0.1029, + "step": 13110 + }, + { + "epoch": 0.61, + "learning_rate": 1.8102456382361702e-05, + "loss": 0.1167, + "step": 13115 + }, + { + "epoch": 0.61, + "learning_rate": 1.8101672597306916e-05, + "loss": 0.1298, + "step": 13120 + }, + { + "epoch": 0.61, + "learning_rate": 1.810088881225213e-05, + "loss": 0.1732, + "step": 13125 + }, + { + "epoch": 0.61, + "learning_rate": 1.8100105027197344e-05, + "loss": 0.2762, + "step": 13130 + }, + { + "epoch": 0.61, + "learning_rate": 1.8099321242142554e-05, + "loss": 0.26, + "step": 13135 + }, + { + "epoch": 0.61, + "learning_rate": 1.809853745708777e-05, + "loss": 0.3426, + "step": 13140 + }, + { + "epoch": 0.61, + "learning_rate": 1.8097753672032982e-05, + "loss": 0.4019, + "step": 13145 + }, + { + "epoch": 0.61, + "learning_rate": 1.8096969886978196e-05, + "loss": 0.7487, + "step": 13150 + }, + { + "epoch": 0.61, + "learning_rate": 1.809618610192341e-05, + "loss": 0.3123, + "step": 13155 + }, + { + "epoch": 0.61, + "learning_rate": 1.8095402316868624e-05, + "loss": 0.0959, + "step": 13160 + }, + { + "epoch": 0.61, + "learning_rate": 1.8094618531813838e-05, + "loss": 0.0824, + "step": 13165 + }, + { + "epoch": 0.61, + "learning_rate": 1.809383474675905e-05, + "loss": 0.2266, + "step": 13170 + }, + { + "epoch": 0.61, + "learning_rate": 1.8093050961704265e-05, + "loss": 0.1764, + "step": 13175 + }, + { + "epoch": 0.61, + "learning_rate": 1.8092267176649476e-05, + "loss": 0.2117, + "step": 13180 + }, + { + "epoch": 0.62, + "learning_rate": 1.809148339159469e-05, + "loss": 0.2613, + "step": 13185 + }, + { + "epoch": 0.62, + "learning_rate": 1.8090699606539904e-05, + "loss": 0.2495, + "step": 13190 + }, + { + "epoch": 0.62, + "learning_rate": 1.8089915821485118e-05, + "loss": 0.3384, + "step": 13195 + }, + { + "epoch": 0.62, + "learning_rate": 1.808913203643033e-05, + "loss": 0.6137, + "step": 13200 + }, + { + "epoch": 0.62, + "learning_rate": 1.8088348251375546e-05, + "loss": 0.2418, + "step": 13205 + }, + { + "epoch": 0.62, + "learning_rate": 1.8087564466320756e-05, + "loss": 0.0482, + "step": 13210 + }, + { + "epoch": 0.62, + "learning_rate": 1.8086780681265973e-05, + "loss": 0.1059, + "step": 13215 + }, + { + "epoch": 0.62, + "learning_rate": 1.8085996896211184e-05, + "loss": 0.1762, + "step": 13220 + }, + { + "epoch": 0.62, + "learning_rate": 1.8085213111156398e-05, + "loss": 0.2271, + "step": 13225 + }, + { + "epoch": 0.62, + "learning_rate": 1.808442932610161e-05, + "loss": 0.2679, + "step": 13230 + }, + { + "epoch": 0.62, + "learning_rate": 1.8083645541046826e-05, + "loss": 0.3361, + "step": 13235 + }, + { + "epoch": 0.62, + "learning_rate": 1.808286175599204e-05, + "loss": 0.3421, + "step": 13240 + }, + { + "epoch": 0.62, + "learning_rate": 1.808207797093725e-05, + "loss": 0.3615, + "step": 13245 + }, + { + "epoch": 0.62, + "learning_rate": 1.8081294185882464e-05, + "loss": 0.6389, + "step": 13250 + }, + { + "epoch": 0.62, + "learning_rate": 1.8080510400827678e-05, + "loss": 0.2581, + "step": 13255 + }, + { + "epoch": 0.62, + "learning_rate": 1.8079726615772892e-05, + "loss": 0.1071, + "step": 13260 + }, + { + "epoch": 0.62, + "learning_rate": 1.8078942830718106e-05, + "loss": 0.1831, + "step": 13265 + }, + { + "epoch": 0.62, + "learning_rate": 1.807815904566332e-05, + "loss": 0.2567, + "step": 13270 + }, + { + "epoch": 0.62, + "learning_rate": 1.8077375260608533e-05, + "loss": 0.1976, + "step": 13275 + }, + { + "epoch": 0.62, + "learning_rate": 1.8076591475553747e-05, + "loss": 0.177, + "step": 13280 + }, + { + "epoch": 0.62, + "learning_rate": 1.8075807690498958e-05, + "loss": 0.3031, + "step": 13285 + }, + { + "epoch": 0.62, + "learning_rate": 1.8075023905444172e-05, + "loss": 0.2594, + "step": 13290 + }, + { + "epoch": 0.62, + "learning_rate": 1.8074240120389386e-05, + "loss": 0.3334, + "step": 13295 + }, + { + "epoch": 0.62, + "learning_rate": 1.80734563353346e-05, + "loss": 0.7191, + "step": 13300 + }, + { + "epoch": 0.62, + "learning_rate": 1.8072672550279813e-05, + "loss": 0.2046, + "step": 13305 + }, + { + "epoch": 0.62, + "learning_rate": 1.8071888765225024e-05, + "loss": 0.0685, + "step": 13310 + }, + { + "epoch": 0.62, + "learning_rate": 1.807110498017024e-05, + "loss": 0.1259, + "step": 13315 + }, + { + "epoch": 0.62, + "learning_rate": 1.8070321195115452e-05, + "loss": 0.2039, + "step": 13320 + }, + { + "epoch": 0.62, + "learning_rate": 1.8069537410060666e-05, + "loss": 0.2184, + "step": 13325 + }, + { + "epoch": 0.62, + "learning_rate": 1.806875362500588e-05, + "loss": 0.1575, + "step": 13330 + }, + { + "epoch": 0.62, + "learning_rate": 1.8067969839951094e-05, + "loss": 0.3521, + "step": 13335 + }, + { + "epoch": 0.62, + "learning_rate": 1.8067186054896307e-05, + "loss": 0.3732, + "step": 13340 + }, + { + "epoch": 0.62, + "learning_rate": 1.806640226984152e-05, + "loss": 0.3423, + "step": 13345 + }, + { + "epoch": 0.62, + "learning_rate": 1.8065618484786732e-05, + "loss": 0.657, + "step": 13350 + }, + { + "epoch": 0.62, + "learning_rate": 1.806483469973195e-05, + "loss": 0.3007, + "step": 13355 + }, + { + "epoch": 0.62, + "learning_rate": 1.806405091467716e-05, + "loss": 0.0585, + "step": 13360 + }, + { + "epoch": 0.62, + "learning_rate": 1.8063267129622374e-05, + "loss": 0.1533, + "step": 13365 + }, + { + "epoch": 0.62, + "learning_rate": 1.8062483344567587e-05, + "loss": 0.2043, + "step": 13370 + }, + { + "epoch": 0.62, + "learning_rate": 1.80616995595128e-05, + "loss": 0.197, + "step": 13375 + }, + { + "epoch": 0.62, + "learning_rate": 1.8060915774458015e-05, + "loss": 0.1869, + "step": 13380 + }, + { + "epoch": 0.62, + "learning_rate": 1.8060131989403226e-05, + "loss": 0.1669, + "step": 13385 + }, + { + "epoch": 0.62, + "learning_rate": 1.805934820434844e-05, + "loss": 0.3543, + "step": 13390 + }, + { + "epoch": 0.63, + "learning_rate": 1.8058564419293654e-05, + "loss": 0.291, + "step": 13395 + }, + { + "epoch": 0.63, + "learning_rate": 1.8057780634238868e-05, + "loss": 0.4537, + "step": 13400 + }, + { + "epoch": 0.63, + "learning_rate": 1.805699684918408e-05, + "loss": 0.2838, + "step": 13405 + }, + { + "epoch": 0.63, + "learning_rate": 1.8056213064129295e-05, + "loss": 0.101, + "step": 13410 + }, + { + "epoch": 0.63, + "learning_rate": 1.805542927907451e-05, + "loss": 0.109, + "step": 13415 + }, + { + "epoch": 0.63, + "learning_rate": 1.8054645494019723e-05, + "loss": 0.13, + "step": 13420 + }, + { + "epoch": 0.63, + "learning_rate": 1.8053861708964934e-05, + "loss": 0.1621, + "step": 13425 + }, + { + "epoch": 0.63, + "learning_rate": 1.805307792391015e-05, + "loss": 0.2074, + "step": 13430 + }, + { + "epoch": 0.63, + "learning_rate": 1.805229413885536e-05, + "loss": 0.2683, + "step": 13435 + }, + { + "epoch": 0.63, + "learning_rate": 1.8051510353800575e-05, + "loss": 0.265, + "step": 13440 + }, + { + "epoch": 0.63, + "learning_rate": 1.805072656874579e-05, + "loss": 0.4351, + "step": 13445 + }, + { + "epoch": 0.63, + "learning_rate": 1.8049942783691e-05, + "loss": 0.7304, + "step": 13450 + }, + { + "epoch": 0.63, + "learning_rate": 1.8049158998636217e-05, + "loss": 0.1502, + "step": 13455 + }, + { + "epoch": 0.63, + "learning_rate": 1.8048375213581428e-05, + "loss": 0.0326, + "step": 13460 + }, + { + "epoch": 0.63, + "learning_rate": 1.804759142852664e-05, + "loss": 0.0823, + "step": 13465 + }, + { + "epoch": 0.63, + "learning_rate": 1.8046807643471855e-05, + "loss": 0.1804, + "step": 13470 + }, + { + "epoch": 0.63, + "learning_rate": 1.804602385841707e-05, + "loss": 0.1101, + "step": 13475 + }, + { + "epoch": 0.63, + "learning_rate": 1.8045240073362283e-05, + "loss": 0.2353, + "step": 13480 + }, + { + "epoch": 0.63, + "learning_rate": 1.8044456288307497e-05, + "loss": 0.2658, + "step": 13485 + }, + { + "epoch": 0.63, + "learning_rate": 1.804367250325271e-05, + "loss": 0.2739, + "step": 13490 + }, + { + "epoch": 0.63, + "learning_rate": 1.8042888718197925e-05, + "loss": 0.3481, + "step": 13495 + }, + { + "epoch": 0.63, + "learning_rate": 1.8042104933143135e-05, + "loss": 0.4943, + "step": 13500 + }, + { + "epoch": 0.63, + "learning_rate": 1.804132114808835e-05, + "loss": 0.2629, + "step": 13505 + }, + { + "epoch": 0.63, + "learning_rate": 1.8040537363033563e-05, + "loss": 0.0884, + "step": 13510 + }, + { + "epoch": 0.63, + "learning_rate": 1.8039753577978777e-05, + "loss": 0.0984, + "step": 13515 + }, + { + "epoch": 0.63, + "learning_rate": 1.803896979292399e-05, + "loss": 0.0861, + "step": 13520 + }, + { + "epoch": 0.63, + "learning_rate": 1.80381860078692e-05, + "loss": 0.1505, + "step": 13525 + }, + { + "epoch": 0.63, + "learning_rate": 1.803740222281442e-05, + "loss": 0.2409, + "step": 13530 + }, + { + "epoch": 0.63, + "learning_rate": 1.803661843775963e-05, + "loss": 0.2881, + "step": 13535 + }, + { + "epoch": 0.63, + "learning_rate": 1.8035834652704843e-05, + "loss": 0.2466, + "step": 13540 + }, + { + "epoch": 0.63, + "learning_rate": 1.8035050867650057e-05, + "loss": 0.3239, + "step": 13545 + }, + { + "epoch": 0.63, + "learning_rate": 1.803426708259527e-05, + "loss": 0.4168, + "step": 13550 + }, + { + "epoch": 0.63, + "learning_rate": 1.8033483297540485e-05, + "loss": 0.2401, + "step": 13555 + }, + { + "epoch": 0.63, + "learning_rate": 1.80326995124857e-05, + "loss": 0.1225, + "step": 13560 + }, + { + "epoch": 0.63, + "learning_rate": 1.803191572743091e-05, + "loss": 0.0936, + "step": 13565 + }, + { + "epoch": 0.63, + "learning_rate": 1.8031131942376123e-05, + "loss": 0.1213, + "step": 13570 + }, + { + "epoch": 0.63, + "learning_rate": 1.8030348157321337e-05, + "loss": 0.2741, + "step": 13575 + }, + { + "epoch": 0.63, + "learning_rate": 1.802956437226655e-05, + "loss": 0.2251, + "step": 13580 + }, + { + "epoch": 0.63, + "learning_rate": 1.8028780587211765e-05, + "loss": 0.2146, + "step": 13585 + }, + { + "epoch": 0.63, + "learning_rate": 1.802799680215698e-05, + "loss": 0.323, + "step": 13590 + }, + { + "epoch": 0.63, + "learning_rate": 1.8027213017102193e-05, + "loss": 0.2774, + "step": 13595 + }, + { + "epoch": 0.63, + "learning_rate": 1.8026429232047403e-05, + "loss": 0.4286, + "step": 13600 + }, + { + "epoch": 0.63, + "learning_rate": 1.8025645446992617e-05, + "loss": 0.2137, + "step": 13605 + }, + { + "epoch": 0.64, + "learning_rate": 1.802486166193783e-05, + "loss": 0.1228, + "step": 13610 + }, + { + "epoch": 0.64, + "learning_rate": 1.8024077876883045e-05, + "loss": 0.1267, + "step": 13615 + }, + { + "epoch": 0.64, + "learning_rate": 1.802329409182826e-05, + "loss": 0.1645, + "step": 13620 + }, + { + "epoch": 0.64, + "learning_rate": 1.8022510306773473e-05, + "loss": 0.2046, + "step": 13625 + }, + { + "epoch": 0.64, + "learning_rate": 1.8021726521718687e-05, + "loss": 0.2009, + "step": 13630 + }, + { + "epoch": 0.64, + "learning_rate": 1.8020942736663897e-05, + "loss": 0.2249, + "step": 13635 + }, + { + "epoch": 0.64, + "learning_rate": 1.802015895160911e-05, + "loss": 0.2867, + "step": 13640 + }, + { + "epoch": 0.64, + "learning_rate": 1.8019375166554325e-05, + "loss": 0.3413, + "step": 13645 + }, + { + "epoch": 0.64, + "learning_rate": 1.801859138149954e-05, + "loss": 0.7257, + "step": 13650 + }, + { + "epoch": 0.64, + "learning_rate": 1.8017807596444753e-05, + "loss": 0.1693, + "step": 13655 + }, + { + "epoch": 0.64, + "learning_rate": 1.8017023811389967e-05, + "loss": 0.0749, + "step": 13660 + }, + { + "epoch": 0.64, + "learning_rate": 1.8016240026335177e-05, + "loss": 0.1104, + "step": 13665 + }, + { + "epoch": 0.64, + "learning_rate": 1.8015456241280395e-05, + "loss": 0.1834, + "step": 13670 + }, + { + "epoch": 0.64, + "learning_rate": 1.8014672456225605e-05, + "loss": 0.1609, + "step": 13675 + }, + { + "epoch": 0.64, + "learning_rate": 1.801388867117082e-05, + "loss": 0.2899, + "step": 13680 + }, + { + "epoch": 0.64, + "learning_rate": 1.8013104886116033e-05, + "loss": 0.1833, + "step": 13685 + }, + { + "epoch": 0.64, + "learning_rate": 1.8012321101061247e-05, + "loss": 0.3229, + "step": 13690 + }, + { + "epoch": 0.64, + "learning_rate": 1.801153731600646e-05, + "loss": 0.34, + "step": 13695 + }, + { + "epoch": 0.64, + "learning_rate": 1.801075353095167e-05, + "loss": 0.7479, + "step": 13700 + }, + { + "epoch": 0.64, + "learning_rate": 1.8009969745896885e-05, + "loss": 0.2743, + "step": 13705 + }, + { + "epoch": 0.64, + "learning_rate": 1.80091859608421e-05, + "loss": 0.0687, + "step": 13710 + }, + { + "epoch": 0.64, + "learning_rate": 1.8008402175787313e-05, + "loss": 0.1244, + "step": 13715 + }, + { + "epoch": 0.64, + "learning_rate": 1.8007618390732527e-05, + "loss": 0.0964, + "step": 13720 + }, + { + "epoch": 0.64, + "learning_rate": 1.800683460567774e-05, + "loss": 0.2067, + "step": 13725 + }, + { + "epoch": 0.64, + "learning_rate": 1.8006050820622955e-05, + "loss": 0.2851, + "step": 13730 + }, + { + "epoch": 0.64, + "learning_rate": 1.800526703556817e-05, + "loss": 0.145, + "step": 13735 + }, + { + "epoch": 0.64, + "learning_rate": 1.800448325051338e-05, + "loss": 0.314, + "step": 13740 + }, + { + "epoch": 0.64, + "learning_rate": 1.8003699465458597e-05, + "loss": 0.3162, + "step": 13745 + }, + { + "epoch": 0.64, + "learning_rate": 1.8002915680403807e-05, + "loss": 0.6037, + "step": 13750 + }, + { + "epoch": 0.64, + "learning_rate": 1.800213189534902e-05, + "loss": 0.2893, + "step": 13755 + }, + { + "epoch": 0.64, + "learning_rate": 1.8001348110294235e-05, + "loss": 0.1066, + "step": 13760 + }, + { + "epoch": 0.64, + "learning_rate": 1.8000564325239445e-05, + "loss": 0.1369, + "step": 13765 + }, + { + "epoch": 0.64, + "learning_rate": 1.7999780540184663e-05, + "loss": 0.119, + "step": 13770 + }, + { + "epoch": 0.64, + "learning_rate": 1.7998996755129873e-05, + "loss": 0.1651, + "step": 13775 + }, + { + "epoch": 0.64, + "learning_rate": 1.7998212970075087e-05, + "loss": 0.1298, + "step": 13780 + }, + { + "epoch": 0.64, + "learning_rate": 1.79974291850203e-05, + "loss": 0.3191, + "step": 13785 + }, + { + "epoch": 0.64, + "learning_rate": 1.7996645399965515e-05, + "loss": 0.2348, + "step": 13790 + }, + { + "epoch": 0.64, + "learning_rate": 1.799586161491073e-05, + "loss": 0.2823, + "step": 13795 + }, + { + "epoch": 0.64, + "learning_rate": 1.7995077829855943e-05, + "loss": 0.7438, + "step": 13800 + }, + { + "epoch": 0.64, + "learning_rate": 1.7994294044801157e-05, + "loss": 0.1944, + "step": 13805 + }, + { + "epoch": 0.64, + "learning_rate": 1.799351025974637e-05, + "loss": 0.0748, + "step": 13810 + }, + { + "epoch": 0.64, + "learning_rate": 1.799272647469158e-05, + "loss": 0.1483, + "step": 13815 + }, + { + "epoch": 0.64, + "learning_rate": 1.7991942689636795e-05, + "loss": 0.1346, + "step": 13820 + }, + { + "epoch": 0.65, + "learning_rate": 1.799115890458201e-05, + "loss": 0.1945, + "step": 13825 + }, + { + "epoch": 0.65, + "learning_rate": 1.7990375119527223e-05, + "loss": 0.2159, + "step": 13830 + }, + { + "epoch": 0.65, + "learning_rate": 1.7989591334472437e-05, + "loss": 0.2135, + "step": 13835 + }, + { + "epoch": 0.65, + "learning_rate": 1.7988807549417647e-05, + "loss": 0.3681, + "step": 13840 + }, + { + "epoch": 0.65, + "learning_rate": 1.7988023764362864e-05, + "loss": 0.3707, + "step": 13845 + }, + { + "epoch": 0.65, + "learning_rate": 1.7987239979308075e-05, + "loss": 0.7777, + "step": 13850 + }, + { + "epoch": 0.65, + "learning_rate": 1.798645619425329e-05, + "loss": 0.322, + "step": 13855 + }, + { + "epoch": 0.65, + "learning_rate": 1.7985672409198503e-05, + "loss": 0.0827, + "step": 13860 + }, + { + "epoch": 0.65, + "learning_rate": 1.7984888624143717e-05, + "loss": 0.1055, + "step": 13865 + }, + { + "epoch": 0.65, + "learning_rate": 1.798410483908893e-05, + "loss": 0.1698, + "step": 13870 + }, + { + "epoch": 0.65, + "learning_rate": 1.7983321054034145e-05, + "loss": 0.2299, + "step": 13875 + }, + { + "epoch": 0.65, + "learning_rate": 1.7982537268979355e-05, + "loss": 0.1941, + "step": 13880 + }, + { + "epoch": 0.65, + "learning_rate": 1.7981753483924572e-05, + "loss": 0.2909, + "step": 13885 + }, + { + "epoch": 0.65, + "learning_rate": 1.7980969698869783e-05, + "loss": 0.3056, + "step": 13890 + }, + { + "epoch": 0.65, + "learning_rate": 1.7980185913814997e-05, + "loss": 0.3902, + "step": 13895 + }, + { + "epoch": 0.65, + "learning_rate": 1.797940212876021e-05, + "loss": 0.6766, + "step": 13900 + }, + { + "epoch": 0.65, + "learning_rate": 1.7978618343705425e-05, + "loss": 0.2698, + "step": 13905 + }, + { + "epoch": 0.65, + "learning_rate": 1.797783455865064e-05, + "loss": 0.1085, + "step": 13910 + }, + { + "epoch": 0.65, + "learning_rate": 1.797705077359585e-05, + "loss": 0.1044, + "step": 13915 + }, + { + "epoch": 0.65, + "learning_rate": 1.7976266988541063e-05, + "loss": 0.1361, + "step": 13920 + }, + { + "epoch": 0.65, + "learning_rate": 1.7975483203486277e-05, + "loss": 0.1876, + "step": 13925 + }, + { + "epoch": 0.65, + "learning_rate": 1.797469941843149e-05, + "loss": 0.1581, + "step": 13930 + }, + { + "epoch": 0.65, + "learning_rate": 1.7973915633376705e-05, + "loss": 0.2839, + "step": 13935 + }, + { + "epoch": 0.65, + "learning_rate": 1.797313184832192e-05, + "loss": 0.1772, + "step": 13940 + }, + { + "epoch": 0.65, + "learning_rate": 1.7972348063267132e-05, + "loss": 0.3157, + "step": 13945 + }, + { + "epoch": 0.65, + "learning_rate": 1.7971564278212346e-05, + "loss": 0.5271, + "step": 13950 + }, + { + "epoch": 0.65, + "learning_rate": 1.7970780493157557e-05, + "loss": 0.2653, + "step": 13955 + }, + { + "epoch": 0.65, + "learning_rate": 1.796999670810277e-05, + "loss": 0.0371, + "step": 13960 + }, + { + "epoch": 0.65, + "learning_rate": 1.7969212923047985e-05, + "loss": 0.077, + "step": 13965 + }, + { + "epoch": 0.65, + "learning_rate": 1.79684291379932e-05, + "loss": 0.1503, + "step": 13970 + }, + { + "epoch": 0.65, + "learning_rate": 1.7967645352938412e-05, + "loss": 0.1859, + "step": 13975 + }, + { + "epoch": 0.65, + "learning_rate": 1.7966861567883623e-05, + "loss": 0.1487, + "step": 13980 + }, + { + "epoch": 0.65, + "learning_rate": 1.796607778282884e-05, + "loss": 0.2365, + "step": 13985 + }, + { + "epoch": 0.65, + "learning_rate": 1.796529399777405e-05, + "loss": 0.2422, + "step": 13990 + }, + { + "epoch": 0.65, + "learning_rate": 1.7964510212719265e-05, + "loss": 0.3292, + "step": 13995 + }, + { + "epoch": 0.65, + "learning_rate": 1.796372642766448e-05, + "loss": 0.6147, + "step": 14000 + }, + { + "epoch": 0.65, + "learning_rate": 1.7962942642609693e-05, + "loss": 0.222, + "step": 14005 + }, + { + "epoch": 0.65, + "learning_rate": 1.7962158857554906e-05, + "loss": 0.1412, + "step": 14010 + }, + { + "epoch": 0.65, + "learning_rate": 1.796137507250012e-05, + "loss": 0.1588, + "step": 14015 + }, + { + "epoch": 0.65, + "learning_rate": 1.7960591287445334e-05, + "loss": 0.1609, + "step": 14020 + }, + { + "epoch": 0.65, + "learning_rate": 1.7959807502390545e-05, + "loss": 0.1297, + "step": 14025 + }, + { + "epoch": 0.65, + "learning_rate": 1.795902371733576e-05, + "loss": 0.1735, + "step": 14030 + }, + { + "epoch": 0.65, + "learning_rate": 1.7958239932280973e-05, + "loss": 0.1816, + "step": 14035 + }, + { + "epoch": 0.66, + "learning_rate": 1.7957456147226186e-05, + "loss": 0.2098, + "step": 14040 + }, + { + "epoch": 0.66, + "learning_rate": 1.79566723621714e-05, + "loss": 0.3348, + "step": 14045 + }, + { + "epoch": 0.66, + "learning_rate": 1.7955888577116614e-05, + "loss": 0.534, + "step": 14050 + }, + { + "epoch": 0.66, + "learning_rate": 1.7955104792061825e-05, + "loss": 0.2183, + "step": 14055 + }, + { + "epoch": 0.66, + "learning_rate": 1.7954321007007042e-05, + "loss": 0.0817, + "step": 14060 + }, + { + "epoch": 0.66, + "learning_rate": 1.7953537221952253e-05, + "loss": 0.0821, + "step": 14065 + }, + { + "epoch": 0.66, + "learning_rate": 1.7952753436897467e-05, + "loss": 0.1166, + "step": 14070 + }, + { + "epoch": 0.66, + "learning_rate": 1.795196965184268e-05, + "loss": 0.2538, + "step": 14075 + }, + { + "epoch": 0.66, + "learning_rate": 1.7951185866787894e-05, + "loss": 0.1809, + "step": 14080 + }, + { + "epoch": 0.66, + "learning_rate": 1.7950402081733108e-05, + "loss": 0.2724, + "step": 14085 + }, + { + "epoch": 0.66, + "learning_rate": 1.794961829667832e-05, + "loss": 0.2887, + "step": 14090 + }, + { + "epoch": 0.66, + "learning_rate": 1.7948834511623533e-05, + "loss": 0.4142, + "step": 14095 + }, + { + "epoch": 0.66, + "learning_rate": 1.7948050726568747e-05, + "loss": 0.6192, + "step": 14100 + }, + { + "epoch": 0.66, + "learning_rate": 1.794726694151396e-05, + "loss": 0.251, + "step": 14105 + }, + { + "epoch": 0.66, + "learning_rate": 1.7946483156459174e-05, + "loss": 0.0909, + "step": 14110 + }, + { + "epoch": 0.66, + "learning_rate": 1.7945699371404388e-05, + "loss": 0.1046, + "step": 14115 + }, + { + "epoch": 0.66, + "learning_rate": 1.7944915586349602e-05, + "loss": 0.0959, + "step": 14120 + }, + { + "epoch": 0.66, + "learning_rate": 1.7944131801294816e-05, + "loss": 0.1656, + "step": 14125 + }, + { + "epoch": 0.66, + "learning_rate": 1.7943348016240027e-05, + "loss": 0.2122, + "step": 14130 + }, + { + "epoch": 0.66, + "learning_rate": 1.794256423118524e-05, + "loss": 0.2179, + "step": 14135 + }, + { + "epoch": 0.66, + "learning_rate": 1.7941780446130454e-05, + "loss": 0.2127, + "step": 14140 + }, + { + "epoch": 0.66, + "learning_rate": 1.794099666107567e-05, + "loss": 0.3221, + "step": 14145 + }, + { + "epoch": 0.66, + "learning_rate": 1.7940212876020882e-05, + "loss": 0.6101, + "step": 14150 + }, + { + "epoch": 0.66, + "learning_rate": 1.7939429090966093e-05, + "loss": 0.294, + "step": 14155 + }, + { + "epoch": 0.66, + "learning_rate": 1.793864530591131e-05, + "loss": 0.1212, + "step": 14160 + }, + { + "epoch": 0.66, + "learning_rate": 1.793786152085652e-05, + "loss": 0.1184, + "step": 14165 + }, + { + "epoch": 0.66, + "learning_rate": 1.7937077735801734e-05, + "loss": 0.1062, + "step": 14170 + }, + { + "epoch": 0.66, + "learning_rate": 1.793629395074695e-05, + "loss": 0.1962, + "step": 14175 + }, + { + "epoch": 0.66, + "learning_rate": 1.7935510165692162e-05, + "loss": 0.1837, + "step": 14180 + }, + { + "epoch": 0.66, + "learning_rate": 1.7934726380637376e-05, + "loss": 0.2212, + "step": 14185 + }, + { + "epoch": 0.66, + "learning_rate": 1.793394259558259e-05, + "loss": 0.3377, + "step": 14190 + }, + { + "epoch": 0.66, + "learning_rate": 1.79331588105278e-05, + "loss": 0.2935, + "step": 14195 + }, + { + "epoch": 0.66, + "learning_rate": 1.7932375025473018e-05, + "loss": 0.4795, + "step": 14200 + }, + { + "epoch": 0.66, + "learning_rate": 1.793159124041823e-05, + "loss": 0.2201, + "step": 14205 + }, + { + "epoch": 0.66, + "learning_rate": 1.7930807455363442e-05, + "loss": 0.134, + "step": 14210 + }, + { + "epoch": 0.66, + "learning_rate": 1.7930023670308656e-05, + "loss": 0.1086, + "step": 14215 + }, + { + "epoch": 0.66, + "learning_rate": 1.792923988525387e-05, + "loss": 0.1584, + "step": 14220 + }, + { + "epoch": 0.66, + "learning_rate": 1.7928456100199084e-05, + "loss": 0.1764, + "step": 14225 + }, + { + "epoch": 0.66, + "learning_rate": 1.7927672315144295e-05, + "loss": 0.1776, + "step": 14230 + }, + { + "epoch": 0.66, + "learning_rate": 1.792688853008951e-05, + "loss": 0.1828, + "step": 14235 + }, + { + "epoch": 0.66, + "learning_rate": 1.7926104745034722e-05, + "loss": 0.2791, + "step": 14240 + }, + { + "epoch": 0.66, + "learning_rate": 1.7925320959979936e-05, + "loss": 0.3181, + "step": 14245 + }, + { + "epoch": 0.66, + "learning_rate": 1.792453717492515e-05, + "loss": 0.8255, + "step": 14250 + }, + { + "epoch": 0.67, + "learning_rate": 1.7923753389870364e-05, + "loss": 0.321, + "step": 14255 + }, + { + "epoch": 0.67, + "learning_rate": 1.7922969604815578e-05, + "loss": 0.0898, + "step": 14260 + }, + { + "epoch": 0.67, + "learning_rate": 1.7922185819760792e-05, + "loss": 0.1708, + "step": 14265 + }, + { + "epoch": 0.67, + "learning_rate": 1.7921402034706002e-05, + "loss": 0.1316, + "step": 14270 + }, + { + "epoch": 0.67, + "learning_rate": 1.792061824965122e-05, + "loss": 0.1778, + "step": 14275 + }, + { + "epoch": 0.67, + "learning_rate": 1.791983446459643e-05, + "loss": 0.168, + "step": 14280 + }, + { + "epoch": 0.67, + "learning_rate": 1.7919050679541644e-05, + "loss": 0.291, + "step": 14285 + }, + { + "epoch": 0.67, + "learning_rate": 1.7918266894486858e-05, + "loss": 0.2596, + "step": 14290 + }, + { + "epoch": 0.67, + "learning_rate": 1.791748310943207e-05, + "loss": 0.2771, + "step": 14295 + }, + { + "epoch": 0.67, + "learning_rate": 1.7916699324377286e-05, + "loss": 0.6026, + "step": 14300 + }, + { + "epoch": 0.67, + "learning_rate": 1.7915915539322496e-05, + "loss": 0.2228, + "step": 14305 + }, + { + "epoch": 0.67, + "learning_rate": 1.791513175426771e-05, + "loss": 0.0702, + "step": 14310 + }, + { + "epoch": 0.67, + "learning_rate": 1.7914347969212924e-05, + "loss": 0.164, + "step": 14315 + }, + { + "epoch": 0.67, + "learning_rate": 1.7913564184158138e-05, + "loss": 0.1234, + "step": 14320 + }, + { + "epoch": 0.67, + "learning_rate": 1.7912780399103352e-05, + "loss": 0.1424, + "step": 14325 + }, + { + "epoch": 0.67, + "learning_rate": 1.7911996614048566e-05, + "loss": 0.1952, + "step": 14330 + }, + { + "epoch": 0.67, + "learning_rate": 1.791121282899378e-05, + "loss": 0.233, + "step": 14335 + }, + { + "epoch": 0.67, + "learning_rate": 1.7910429043938994e-05, + "loss": 0.2717, + "step": 14340 + }, + { + "epoch": 0.67, + "learning_rate": 1.7909645258884204e-05, + "loss": 0.2716, + "step": 14345 + }, + { + "epoch": 0.67, + "learning_rate": 1.7908861473829418e-05, + "loss": 0.5792, + "step": 14350 + }, + { + "epoch": 0.67, + "learning_rate": 1.7908077688774632e-05, + "loss": 0.1712, + "step": 14355 + }, + { + "epoch": 0.67, + "learning_rate": 1.7907293903719846e-05, + "loss": 0.0798, + "step": 14360 + }, + { + "epoch": 0.67, + "learning_rate": 1.790651011866506e-05, + "loss": 0.137, + "step": 14365 + }, + { + "epoch": 0.67, + "learning_rate": 1.790572633361027e-05, + "loss": 0.1375, + "step": 14370 + }, + { + "epoch": 0.67, + "learning_rate": 1.7904942548555488e-05, + "loss": 0.2504, + "step": 14375 + }, + { + "epoch": 0.67, + "learning_rate": 1.7904158763500698e-05, + "loss": 0.2304, + "step": 14380 + }, + { + "epoch": 0.67, + "learning_rate": 1.7903374978445912e-05, + "loss": 0.2548, + "step": 14385 + }, + { + "epoch": 0.67, + "learning_rate": 1.7902591193391126e-05, + "loss": 0.3049, + "step": 14390 + }, + { + "epoch": 0.67, + "learning_rate": 1.790180740833634e-05, + "loss": 0.4073, + "step": 14395 + }, + { + "epoch": 0.67, + "learning_rate": 1.7901023623281554e-05, + "loss": 0.5195, + "step": 14400 + }, + { + "epoch": 0.67, + "learning_rate": 1.7900239838226768e-05, + "loss": 0.267, + "step": 14405 + }, + { + "epoch": 0.67, + "learning_rate": 1.7899456053171978e-05, + "loss": 0.0918, + "step": 14410 + }, + { + "epoch": 0.67, + "learning_rate": 1.7898672268117192e-05, + "loss": 0.1025, + "step": 14415 + }, + { + "epoch": 0.67, + "learning_rate": 1.7897888483062406e-05, + "loss": 0.1415, + "step": 14420 + }, + { + "epoch": 0.67, + "learning_rate": 1.789710469800762e-05, + "loss": 0.1369, + "step": 14425 + }, + { + "epoch": 0.67, + "learning_rate": 1.7896320912952834e-05, + "loss": 0.2144, + "step": 14430 + }, + { + "epoch": 0.67, + "learning_rate": 1.7895537127898048e-05, + "loss": 0.1942, + "step": 14435 + }, + { + "epoch": 0.67, + "learning_rate": 1.789475334284326e-05, + "loss": 0.2313, + "step": 14440 + }, + { + "epoch": 0.67, + "learning_rate": 1.7893969557788472e-05, + "loss": 0.4069, + "step": 14445 + }, + { + "epoch": 0.67, + "learning_rate": 1.7893185772733686e-05, + "loss": 0.6075, + "step": 14450 + }, + { + "epoch": 0.67, + "learning_rate": 1.78924019876789e-05, + "loss": 0.2016, + "step": 14455 + }, + { + "epoch": 0.67, + "learning_rate": 1.7891618202624114e-05, + "loss": 0.0962, + "step": 14460 + }, + { + "epoch": 0.67, + "learning_rate": 1.7890834417569328e-05, + "loss": 0.1401, + "step": 14465 + }, + { + "epoch": 0.68, + "learning_rate": 1.7890050632514542e-05, + "loss": 0.1746, + "step": 14470 + }, + { + "epoch": 0.68, + "learning_rate": 1.7889266847459756e-05, + "loss": 0.2198, + "step": 14475 + }, + { + "epoch": 0.68, + "learning_rate": 1.7888483062404966e-05, + "loss": 0.1788, + "step": 14480 + }, + { + "epoch": 0.68, + "learning_rate": 1.788769927735018e-05, + "loss": 0.3015, + "step": 14485 + }, + { + "epoch": 0.68, + "learning_rate": 1.7886915492295394e-05, + "loss": 0.336, + "step": 14490 + }, + { + "epoch": 0.68, + "learning_rate": 1.7886131707240608e-05, + "loss": 0.399, + "step": 14495 + }, + { + "epoch": 0.68, + "learning_rate": 1.7885347922185822e-05, + "loss": 0.5129, + "step": 14500 + }, + { + "epoch": 0.68, + "learning_rate": 1.7884564137131036e-05, + "loss": 0.2472, + "step": 14505 + }, + { + "epoch": 0.68, + "learning_rate": 1.7883780352076246e-05, + "loss": 0.0997, + "step": 14510 + }, + { + "epoch": 0.68, + "learning_rate": 1.7882996567021463e-05, + "loss": 0.151, + "step": 14515 + }, + { + "epoch": 0.68, + "learning_rate": 1.7882212781966674e-05, + "loss": 0.1149, + "step": 14520 + }, + { + "epoch": 0.68, + "learning_rate": 1.7881428996911888e-05, + "loss": 0.1079, + "step": 14525 + }, + { + "epoch": 0.68, + "learning_rate": 1.7880645211857102e-05, + "loss": 0.1688, + "step": 14530 + }, + { + "epoch": 0.68, + "learning_rate": 1.7879861426802316e-05, + "loss": 0.2422, + "step": 14535 + }, + { + "epoch": 0.68, + "learning_rate": 1.787907764174753e-05, + "loss": 0.256, + "step": 14540 + }, + { + "epoch": 0.68, + "learning_rate": 1.787829385669274e-05, + "loss": 0.2604, + "step": 14545 + }, + { + "epoch": 0.68, + "learning_rate": 1.7877510071637954e-05, + "loss": 0.4361, + "step": 14550 + }, + { + "epoch": 0.68, + "learning_rate": 1.7876726286583168e-05, + "loss": 0.3397, + "step": 14555 + }, + { + "epoch": 0.68, + "learning_rate": 1.7875942501528382e-05, + "loss": 0.0841, + "step": 14560 + }, + { + "epoch": 0.68, + "learning_rate": 1.7875158716473596e-05, + "loss": 0.0836, + "step": 14565 + }, + { + "epoch": 0.68, + "learning_rate": 1.787437493141881e-05, + "loss": 0.1452, + "step": 14570 + }, + { + "epoch": 0.68, + "learning_rate": 1.7873591146364024e-05, + "loss": 0.1594, + "step": 14575 + }, + { + "epoch": 0.68, + "learning_rate": 1.7872807361309237e-05, + "loss": 0.2242, + "step": 14580 + }, + { + "epoch": 0.68, + "learning_rate": 1.7872023576254448e-05, + "loss": 0.2338, + "step": 14585 + }, + { + "epoch": 0.68, + "learning_rate": 1.7871239791199665e-05, + "loss": 0.218, + "step": 14590 + }, + { + "epoch": 0.68, + "learning_rate": 1.7870456006144876e-05, + "loss": 0.4391, + "step": 14595 + }, + { + "epoch": 0.68, + "learning_rate": 1.786967222109009e-05, + "loss": 0.5228, + "step": 14600 + }, + { + "epoch": 0.68, + "learning_rate": 1.7868888436035304e-05, + "loss": 0.2472, + "step": 14605 + }, + { + "epoch": 0.68, + "learning_rate": 1.7868104650980514e-05, + "loss": 0.0867, + "step": 14610 + }, + { + "epoch": 0.68, + "learning_rate": 1.786732086592573e-05, + "loss": 0.1296, + "step": 14615 + }, + { + "epoch": 0.68, + "learning_rate": 1.7866537080870942e-05, + "loss": 0.1691, + "step": 14620 + }, + { + "epoch": 0.68, + "learning_rate": 1.7865753295816156e-05, + "loss": 0.2125, + "step": 14625 + }, + { + "epoch": 0.68, + "learning_rate": 1.786496951076137e-05, + "loss": 0.2022, + "step": 14630 + }, + { + "epoch": 0.68, + "learning_rate": 1.7864185725706584e-05, + "loss": 0.1817, + "step": 14635 + }, + { + "epoch": 0.68, + "learning_rate": 1.7863401940651798e-05, + "loss": 0.3351, + "step": 14640 + }, + { + "epoch": 0.68, + "learning_rate": 1.786261815559701e-05, + "loss": 0.3566, + "step": 14645 + }, + { + "epoch": 0.68, + "learning_rate": 1.7861834370542225e-05, + "loss": 0.6346, + "step": 14650 + }, + { + "epoch": 0.68, + "learning_rate": 1.786105058548744e-05, + "loss": 0.2488, + "step": 14655 + }, + { + "epoch": 0.68, + "learning_rate": 1.786026680043265e-05, + "loss": 0.0688, + "step": 14660 + }, + { + "epoch": 0.68, + "learning_rate": 1.7859483015377864e-05, + "loss": 0.1261, + "step": 14665 + }, + { + "epoch": 0.68, + "learning_rate": 1.7858699230323078e-05, + "loss": 0.1552, + "step": 14670 + }, + { + "epoch": 0.68, + "learning_rate": 1.785791544526829e-05, + "loss": 0.1928, + "step": 14675 + }, + { + "epoch": 0.68, + "learning_rate": 1.7857131660213505e-05, + "loss": 0.1883, + "step": 14680 + }, + { + "epoch": 0.69, + "learning_rate": 1.7856347875158716e-05, + "loss": 0.2545, + "step": 14685 + }, + { + "epoch": 0.69, + "learning_rate": 1.7855564090103933e-05, + "loss": 0.3732, + "step": 14690 + }, + { + "epoch": 0.69, + "learning_rate": 1.7854780305049144e-05, + "loss": 0.3061, + "step": 14695 + }, + { + "epoch": 0.69, + "learning_rate": 1.7853996519994358e-05, + "loss": 0.5844, + "step": 14700 + }, + { + "epoch": 0.69, + "learning_rate": 1.785321273493957e-05, + "loss": 0.2783, + "step": 14705 + }, + { + "epoch": 0.69, + "learning_rate": 1.7852428949884785e-05, + "loss": 0.1046, + "step": 14710 + }, + { + "epoch": 0.69, + "learning_rate": 1.785164516483e-05, + "loss": 0.1335, + "step": 14715 + }, + { + "epoch": 0.69, + "learning_rate": 1.7850861379775213e-05, + "loss": 0.1018, + "step": 14720 + }, + { + "epoch": 0.69, + "learning_rate": 1.7850077594720424e-05, + "loss": 0.1993, + "step": 14725 + }, + { + "epoch": 0.69, + "learning_rate": 1.784929380966564e-05, + "loss": 0.1792, + "step": 14730 + }, + { + "epoch": 0.69, + "learning_rate": 1.784851002461085e-05, + "loss": 0.2488, + "step": 14735 + }, + { + "epoch": 0.69, + "learning_rate": 1.7847726239556066e-05, + "loss": 0.247, + "step": 14740 + }, + { + "epoch": 0.69, + "learning_rate": 1.784694245450128e-05, + "loss": 0.4897, + "step": 14745 + }, + { + "epoch": 0.69, + "learning_rate": 1.7846158669446493e-05, + "loss": 0.4311, + "step": 14750 + }, + { + "epoch": 0.69, + "learning_rate": 1.7845374884391707e-05, + "loss": 0.2626, + "step": 14755 + }, + { + "epoch": 0.69, + "learning_rate": 1.7844591099336918e-05, + "loss": 0.069, + "step": 14760 + }, + { + "epoch": 0.69, + "learning_rate": 1.784380731428213e-05, + "loss": 0.1219, + "step": 14765 + }, + { + "epoch": 0.69, + "learning_rate": 1.7843023529227346e-05, + "loss": 0.1279, + "step": 14770 + }, + { + "epoch": 0.69, + "learning_rate": 1.784223974417256e-05, + "loss": 0.1779, + "step": 14775 + }, + { + "epoch": 0.69, + "learning_rate": 1.7841455959117773e-05, + "loss": 0.2421, + "step": 14780 + }, + { + "epoch": 0.69, + "learning_rate": 1.7840672174062987e-05, + "loss": 0.2912, + "step": 14785 + }, + { + "epoch": 0.69, + "learning_rate": 1.78398883890082e-05, + "loss": 0.2462, + "step": 14790 + }, + { + "epoch": 0.69, + "learning_rate": 1.7839104603953415e-05, + "loss": 0.3095, + "step": 14795 + }, + { + "epoch": 0.69, + "learning_rate": 1.7838320818898626e-05, + "loss": 0.5914, + "step": 14800 + }, + { + "epoch": 0.69, + "learning_rate": 1.783753703384384e-05, + "loss": 0.2444, + "step": 14805 + }, + { + "epoch": 0.69, + "learning_rate": 1.7836753248789053e-05, + "loss": 0.0887, + "step": 14810 + }, + { + "epoch": 0.69, + "learning_rate": 1.7835969463734267e-05, + "loss": 0.1362, + "step": 14815 + }, + { + "epoch": 0.69, + "learning_rate": 1.783518567867948e-05, + "loss": 0.0878, + "step": 14820 + }, + { + "epoch": 0.69, + "learning_rate": 1.7834401893624692e-05, + "loss": 0.1273, + "step": 14825 + }, + { + "epoch": 0.69, + "learning_rate": 1.783361810856991e-05, + "loss": 0.2425, + "step": 14830 + }, + { + "epoch": 0.69, + "learning_rate": 1.783283432351512e-05, + "loss": 0.275, + "step": 14835 + }, + { + "epoch": 0.69, + "learning_rate": 1.7832050538460333e-05, + "loss": 0.2317, + "step": 14840 + }, + { + "epoch": 0.69, + "learning_rate": 1.7831266753405547e-05, + "loss": 0.2663, + "step": 14845 + }, + { + "epoch": 0.69, + "learning_rate": 1.783048296835076e-05, + "loss": 0.2922, + "step": 14850 + }, + { + "epoch": 0.69, + "learning_rate": 1.7829699183295975e-05, + "loss": 0.2981, + "step": 14855 + }, + { + "epoch": 0.69, + "learning_rate": 1.782891539824119e-05, + "loss": 0.0931, + "step": 14860 + }, + { + "epoch": 0.69, + "learning_rate": 1.78281316131864e-05, + "loss": 0.0793, + "step": 14865 + }, + { + "epoch": 0.69, + "learning_rate": 1.7827347828131614e-05, + "loss": 0.1572, + "step": 14870 + }, + { + "epoch": 0.69, + "learning_rate": 1.7826564043076827e-05, + "loss": 0.2228, + "step": 14875 + }, + { + "epoch": 0.69, + "learning_rate": 1.782578025802204e-05, + "loss": 0.1472, + "step": 14880 + }, + { + "epoch": 0.69, + "learning_rate": 1.7824996472967255e-05, + "loss": 0.2983, + "step": 14885 + }, + { + "epoch": 0.69, + "learning_rate": 1.782421268791247e-05, + "loss": 0.3364, + "step": 14890 + }, + { + "epoch": 0.7, + "learning_rate": 1.7823428902857683e-05, + "loss": 0.4755, + "step": 14895 + }, + { + "epoch": 0.7, + "learning_rate": 1.7822645117802894e-05, + "loss": 0.5627, + "step": 14900 + }, + { + "epoch": 0.7, + "learning_rate": 1.782186133274811e-05, + "loss": 0.2746, + "step": 14905 + }, + { + "epoch": 0.7, + "learning_rate": 1.782107754769332e-05, + "loss": 0.0909, + "step": 14910 + }, + { + "epoch": 0.7, + "learning_rate": 1.7820293762638535e-05, + "loss": 0.1021, + "step": 14915 + }, + { + "epoch": 0.7, + "learning_rate": 1.781950997758375e-05, + "loss": 0.1913, + "step": 14920 + }, + { + "epoch": 0.7, + "learning_rate": 1.7818726192528963e-05, + "loss": 0.1024, + "step": 14925 + }, + { + "epoch": 0.7, + "learning_rate": 1.7817942407474177e-05, + "loss": 0.1359, + "step": 14930 + }, + { + "epoch": 0.7, + "learning_rate": 1.7817158622419388e-05, + "loss": 0.2782, + "step": 14935 + }, + { + "epoch": 0.7, + "learning_rate": 1.78163748373646e-05, + "loss": 0.3662, + "step": 14940 + }, + { + "epoch": 0.7, + "learning_rate": 1.7815591052309815e-05, + "loss": 0.3173, + "step": 14945 + }, + { + "epoch": 0.7, + "learning_rate": 1.781480726725503e-05, + "loss": 0.5629, + "step": 14950 + }, + { + "epoch": 0.7, + "learning_rate": 1.7814023482200243e-05, + "loss": 0.3126, + "step": 14955 + }, + { + "epoch": 0.7, + "learning_rate": 1.7813239697145457e-05, + "loss": 0.1257, + "step": 14960 + }, + { + "epoch": 0.7, + "learning_rate": 1.781245591209067e-05, + "loss": 0.102, + "step": 14965 + }, + { + "epoch": 0.7, + "learning_rate": 1.7811672127035885e-05, + "loss": 0.0812, + "step": 14970 + }, + { + "epoch": 0.7, + "learning_rate": 1.7810888341981095e-05, + "loss": 0.1951, + "step": 14975 + }, + { + "epoch": 0.7, + "learning_rate": 1.781010455692631e-05, + "loss": 0.2276, + "step": 14980 + }, + { + "epoch": 0.7, + "learning_rate": 1.7809320771871523e-05, + "loss": 0.2704, + "step": 14985 + }, + { + "epoch": 0.7, + "learning_rate": 1.7808536986816737e-05, + "loss": 0.2971, + "step": 14990 + }, + { + "epoch": 0.7, + "learning_rate": 1.780775320176195e-05, + "loss": 0.2102, + "step": 14995 + }, + { + "epoch": 0.7, + "learning_rate": 1.780696941670716e-05, + "loss": 0.4946, + "step": 15000 + }, + { + "epoch": 0.7, + "learning_rate": 1.780618563165238e-05, + "loss": 0.2092, + "step": 15005 + }, + { + "epoch": 0.7, + "learning_rate": 1.780540184659759e-05, + "loss": 0.0679, + "step": 15010 + }, + { + "epoch": 0.7, + "learning_rate": 1.7804618061542803e-05, + "loss": 0.1363, + "step": 15015 + }, + { + "epoch": 0.7, + "learning_rate": 1.7803834276488017e-05, + "loss": 0.1115, + "step": 15020 + }, + { + "epoch": 0.7, + "learning_rate": 1.780305049143323e-05, + "loss": 0.1363, + "step": 15025 + }, + { + "epoch": 0.7, + "learning_rate": 1.7802266706378445e-05, + "loss": 0.213, + "step": 15030 + }, + { + "epoch": 0.7, + "learning_rate": 1.780148292132366e-05, + "loss": 0.2516, + "step": 15035 + }, + { + "epoch": 0.7, + "learning_rate": 1.780069913626887e-05, + "loss": 0.3133, + "step": 15040 + }, + { + "epoch": 0.7, + "learning_rate": 1.7799915351214087e-05, + "loss": 0.3108, + "step": 15045 + }, + { + "epoch": 0.7, + "learning_rate": 1.7799131566159297e-05, + "loss": 0.4203, + "step": 15050 + }, + { + "epoch": 0.7, + "learning_rate": 1.779834778110451e-05, + "loss": 0.2288, + "step": 15055 + }, + { + "epoch": 0.7, + "learning_rate": 1.7797563996049725e-05, + "loss": 0.0967, + "step": 15060 + }, + { + "epoch": 0.7, + "learning_rate": 1.779678021099494e-05, + "loss": 0.1433, + "step": 15065 + }, + { + "epoch": 0.7, + "learning_rate": 1.7795996425940153e-05, + "loss": 0.1605, + "step": 15070 + }, + { + "epoch": 0.7, + "learning_rate": 1.7795212640885363e-05, + "loss": 0.1502, + "step": 15075 + }, + { + "epoch": 0.7, + "learning_rate": 1.7794428855830577e-05, + "loss": 0.2371, + "step": 15080 + }, + { + "epoch": 0.7, + "learning_rate": 1.779364507077579e-05, + "loss": 0.2546, + "step": 15085 + }, + { + "epoch": 0.7, + "learning_rate": 1.7792861285721005e-05, + "loss": 0.2453, + "step": 15090 + }, + { + "epoch": 0.7, + "learning_rate": 1.779207750066622e-05, + "loss": 0.3223, + "step": 15095 + }, + { + "epoch": 0.7, + "learning_rate": 1.7791293715611433e-05, + "loss": 0.5728, + "step": 15100 + }, + { + "epoch": 0.7, + "learning_rate": 1.7790509930556647e-05, + "loss": 0.1818, + "step": 15105 + }, + { + "epoch": 0.71, + "learning_rate": 1.778972614550186e-05, + "loss": 0.0977, + "step": 15110 + }, + { + "epoch": 0.71, + "learning_rate": 1.778894236044707e-05, + "loss": 0.0862, + "step": 15115 + }, + { + "epoch": 0.71, + "learning_rate": 1.778815857539229e-05, + "loss": 0.1162, + "step": 15120 + }, + { + "epoch": 0.71, + "learning_rate": 1.77873747903375e-05, + "loss": 0.1285, + "step": 15125 + }, + { + "epoch": 0.71, + "learning_rate": 1.7786591005282713e-05, + "loss": 0.1789, + "step": 15130 + }, + { + "epoch": 0.71, + "learning_rate": 1.7785807220227927e-05, + "loss": 0.1586, + "step": 15135 + }, + { + "epoch": 0.71, + "learning_rate": 1.7785023435173137e-05, + "loss": 0.2264, + "step": 15140 + }, + { + "epoch": 0.71, + "learning_rate": 1.7784239650118355e-05, + "loss": 0.3286, + "step": 15145 + }, + { + "epoch": 0.71, + "learning_rate": 1.7783455865063565e-05, + "loss": 0.5867, + "step": 15150 + }, + { + "epoch": 0.71, + "learning_rate": 1.778267208000878e-05, + "loss": 0.2071, + "step": 15155 + }, + { + "epoch": 0.71, + "learning_rate": 1.7781888294953993e-05, + "loss": 0.1373, + "step": 15160 + }, + { + "epoch": 0.71, + "learning_rate": 1.7781104509899207e-05, + "loss": 0.1818, + "step": 15165 + }, + { + "epoch": 0.71, + "learning_rate": 1.778032072484442e-05, + "loss": 0.1301, + "step": 15170 + }, + { + "epoch": 0.71, + "learning_rate": 1.7779536939789635e-05, + "loss": 0.161, + "step": 15175 + }, + { + "epoch": 0.71, + "learning_rate": 1.777875315473485e-05, + "loss": 0.3043, + "step": 15180 + }, + { + "epoch": 0.71, + "learning_rate": 1.7777969369680062e-05, + "loss": 0.3192, + "step": 15185 + }, + { + "epoch": 0.71, + "learning_rate": 1.7777185584625273e-05, + "loss": 0.263, + "step": 15190 + }, + { + "epoch": 0.71, + "learning_rate": 1.7776401799570487e-05, + "loss": 0.2928, + "step": 15195 + }, + { + "epoch": 0.71, + "learning_rate": 1.77756180145157e-05, + "loss": 0.866, + "step": 15200 + }, + { + "epoch": 0.71, + "learning_rate": 1.7774834229460915e-05, + "loss": 0.1849, + "step": 15205 + }, + { + "epoch": 0.71, + "learning_rate": 1.777405044440613e-05, + "loss": 0.0998, + "step": 15210 + }, + { + "epoch": 0.71, + "learning_rate": 1.777326665935134e-05, + "loss": 0.1643, + "step": 15215 + }, + { + "epoch": 0.71, + "learning_rate": 1.7772482874296556e-05, + "loss": 0.2084, + "step": 15220 + }, + { + "epoch": 0.71, + "learning_rate": 1.7771699089241767e-05, + "loss": 0.1575, + "step": 15225 + }, + { + "epoch": 0.71, + "learning_rate": 1.777091530418698e-05, + "loss": 0.132, + "step": 15230 + }, + { + "epoch": 0.71, + "learning_rate": 1.7770131519132195e-05, + "loss": 0.2206, + "step": 15235 + }, + { + "epoch": 0.71, + "learning_rate": 1.776934773407741e-05, + "loss": 0.2715, + "step": 15240 + }, + { + "epoch": 0.71, + "learning_rate": 1.7768563949022623e-05, + "loss": 0.3409, + "step": 15245 + }, + { + "epoch": 0.71, + "learning_rate": 1.7767780163967836e-05, + "loss": 0.5405, + "step": 15250 + }, + { + "epoch": 0.71, + "learning_rate": 1.7766996378913047e-05, + "loss": 0.3159, + "step": 15255 + }, + { + "epoch": 0.71, + "learning_rate": 1.776621259385826e-05, + "loss": 0.1621, + "step": 15260 + }, + { + "epoch": 0.71, + "learning_rate": 1.7765428808803475e-05, + "loss": 0.058, + "step": 15265 + }, + { + "epoch": 0.71, + "learning_rate": 1.776464502374869e-05, + "loss": 0.1118, + "step": 15270 + }, + { + "epoch": 0.71, + "learning_rate": 1.7763861238693903e-05, + "loss": 0.0801, + "step": 15275 + }, + { + "epoch": 0.71, + "learning_rate": 1.7763077453639117e-05, + "loss": 0.1691, + "step": 15280 + }, + { + "epoch": 0.71, + "learning_rate": 1.776229366858433e-05, + "loss": 0.1647, + "step": 15285 + }, + { + "epoch": 0.71, + "learning_rate": 1.776150988352954e-05, + "loss": 0.3036, + "step": 15290 + }, + { + "epoch": 0.71, + "learning_rate": 1.7760726098474755e-05, + "loss": 0.3638, + "step": 15295 + }, + { + "epoch": 0.71, + "learning_rate": 1.775994231341997e-05, + "loss": 0.6663, + "step": 15300 + }, + { + "epoch": 0.71, + "learning_rate": 1.7759158528365183e-05, + "loss": 0.2445, + "step": 15305 + }, + { + "epoch": 0.71, + "learning_rate": 1.7758374743310397e-05, + "loss": 0.0941, + "step": 15310 + }, + { + "epoch": 0.71, + "learning_rate": 1.775759095825561e-05, + "loss": 0.1141, + "step": 15315 + }, + { + "epoch": 0.71, + "learning_rate": 1.7756807173200824e-05, + "loss": 0.1723, + "step": 15320 + }, + { + "epoch": 0.72, + "learning_rate": 1.7756023388146035e-05, + "loss": 0.1063, + "step": 15325 + }, + { + "epoch": 0.72, + "learning_rate": 1.775523960309125e-05, + "loss": 0.2201, + "step": 15330 + }, + { + "epoch": 0.72, + "learning_rate": 1.7754455818036463e-05, + "loss": 0.2288, + "step": 15335 + }, + { + "epoch": 0.72, + "learning_rate": 1.7753672032981677e-05, + "loss": 0.2141, + "step": 15340 + }, + { + "epoch": 0.72, + "learning_rate": 1.775288824792689e-05, + "loss": 0.2616, + "step": 15345 + }, + { + "epoch": 0.72, + "learning_rate": 1.7752104462872104e-05, + "loss": 0.5344, + "step": 15350 + }, + { + "epoch": 0.72, + "learning_rate": 1.7751320677817315e-05, + "loss": 0.2139, + "step": 15355 + }, + { + "epoch": 0.72, + "learning_rate": 1.7750536892762532e-05, + "loss": 0.066, + "step": 15360 + }, + { + "epoch": 0.72, + "learning_rate": 1.7749753107707743e-05, + "loss": 0.1592, + "step": 15365 + }, + { + "epoch": 0.72, + "learning_rate": 1.7748969322652957e-05, + "loss": 0.1448, + "step": 15370 + }, + { + "epoch": 0.72, + "learning_rate": 1.774818553759817e-05, + "loss": 0.171, + "step": 15375 + }, + { + "epoch": 0.72, + "learning_rate": 1.7747401752543384e-05, + "loss": 0.181, + "step": 15380 + }, + { + "epoch": 0.72, + "learning_rate": 1.77466179674886e-05, + "loss": 0.2425, + "step": 15385 + }, + { + "epoch": 0.72, + "learning_rate": 1.774583418243381e-05, + "loss": 0.2223, + "step": 15390 + }, + { + "epoch": 0.72, + "learning_rate": 1.7745050397379023e-05, + "loss": 0.3635, + "step": 15395 + }, + { + "epoch": 0.72, + "learning_rate": 1.7744266612324237e-05, + "loss": 0.542, + "step": 15400 + }, + { + "epoch": 0.72, + "learning_rate": 1.774348282726945e-05, + "loss": 0.2035, + "step": 15405 + }, + { + "epoch": 0.72, + "learning_rate": 1.7742699042214665e-05, + "loss": 0.096, + "step": 15410 + }, + { + "epoch": 0.72, + "learning_rate": 1.774191525715988e-05, + "loss": 0.1612, + "step": 15415 + }, + { + "epoch": 0.72, + "learning_rate": 1.7741131472105092e-05, + "loss": 0.114, + "step": 15420 + }, + { + "epoch": 0.72, + "learning_rate": 1.7740347687050306e-05, + "loss": 0.0943, + "step": 15425 + }, + { + "epoch": 0.72, + "learning_rate": 1.7739563901995517e-05, + "loss": 0.1448, + "step": 15430 + }, + { + "epoch": 0.72, + "learning_rate": 1.7738780116940734e-05, + "loss": 0.2311, + "step": 15435 + }, + { + "epoch": 0.72, + "learning_rate": 1.7737996331885945e-05, + "loss": 0.1838, + "step": 15440 + }, + { + "epoch": 0.72, + "learning_rate": 1.773721254683116e-05, + "loss": 0.3578, + "step": 15445 + }, + { + "epoch": 0.72, + "learning_rate": 1.7736428761776372e-05, + "loss": 0.5445, + "step": 15450 + }, + { + "epoch": 0.72, + "learning_rate": 1.7735644976721583e-05, + "loss": 0.2114, + "step": 15455 + }, + { + "epoch": 0.72, + "learning_rate": 1.77348611916668e-05, + "loss": 0.0826, + "step": 15460 + }, + { + "epoch": 0.72, + "learning_rate": 1.773407740661201e-05, + "loss": 0.0514, + "step": 15465 + }, + { + "epoch": 0.72, + "learning_rate": 1.7733293621557225e-05, + "loss": 0.1493, + "step": 15470 + }, + { + "epoch": 0.72, + "learning_rate": 1.773250983650244e-05, + "loss": 0.1625, + "step": 15475 + }, + { + "epoch": 0.72, + "learning_rate": 1.7731726051447652e-05, + "loss": 0.3053, + "step": 15480 + }, + { + "epoch": 0.72, + "learning_rate": 1.7730942266392866e-05, + "loss": 0.1644, + "step": 15485 + }, + { + "epoch": 0.72, + "learning_rate": 1.773015848133808e-05, + "loss": 0.397, + "step": 15490 + }, + { + "epoch": 0.72, + "learning_rate": 1.7729374696283294e-05, + "loss": 0.5178, + "step": 15495 + }, + { + "epoch": 0.72, + "learning_rate": 1.7728590911228508e-05, + "loss": 0.588, + "step": 15500 + }, + { + "epoch": 0.72, + "learning_rate": 1.772780712617372e-05, + "loss": 0.219, + "step": 15505 + }, + { + "epoch": 0.72, + "learning_rate": 1.7727023341118932e-05, + "loss": 0.1002, + "step": 15510 + }, + { + "epoch": 0.72, + "learning_rate": 1.7726239556064146e-05, + "loss": 0.0972, + "step": 15515 + }, + { + "epoch": 0.72, + "learning_rate": 1.772545577100936e-05, + "loss": 0.1926, + "step": 15520 + }, + { + "epoch": 0.72, + "learning_rate": 1.7724671985954574e-05, + "loss": 0.1175, + "step": 15525 + }, + { + "epoch": 0.72, + "learning_rate": 1.7723888200899785e-05, + "loss": 0.2136, + "step": 15530 + }, + { + "epoch": 0.72, + "learning_rate": 1.7723104415845002e-05, + "loss": 0.3205, + "step": 15535 + }, + { + "epoch": 0.73, + "learning_rate": 1.7722320630790213e-05, + "loss": 0.2608, + "step": 15540 + }, + { + "epoch": 0.73, + "learning_rate": 1.7721536845735426e-05, + "loss": 0.2418, + "step": 15545 + }, + { + "epoch": 0.73, + "learning_rate": 1.772075306068064e-05, + "loss": 0.5699, + "step": 15550 + }, + { + "epoch": 0.73, + "learning_rate": 1.7719969275625854e-05, + "loss": 0.2461, + "step": 15555 + }, + { + "epoch": 0.73, + "learning_rate": 1.7719185490571068e-05, + "loss": 0.0584, + "step": 15560 + }, + { + "epoch": 0.73, + "learning_rate": 1.7718401705516282e-05, + "loss": 0.0991, + "step": 15565 + }, + { + "epoch": 0.73, + "learning_rate": 1.7717617920461493e-05, + "loss": 0.1333, + "step": 15570 + }, + { + "epoch": 0.73, + "learning_rate": 1.771683413540671e-05, + "loss": 0.1246, + "step": 15575 + }, + { + "epoch": 0.73, + "learning_rate": 1.771605035035192e-05, + "loss": 0.1544, + "step": 15580 + }, + { + "epoch": 0.73, + "learning_rate": 1.7715266565297134e-05, + "loss": 0.2113, + "step": 15585 + }, + { + "epoch": 0.73, + "learning_rate": 1.7714482780242348e-05, + "loss": 0.2301, + "step": 15590 + }, + { + "epoch": 0.73, + "learning_rate": 1.7713698995187562e-05, + "loss": 0.2443, + "step": 15595 + }, + { + "epoch": 0.73, + "learning_rate": 1.7712915210132776e-05, + "loss": 0.5969, + "step": 15600 + }, + { + "epoch": 0.73, + "learning_rate": 1.7712131425077986e-05, + "loss": 0.2402, + "step": 15605 + }, + { + "epoch": 0.73, + "learning_rate": 1.77113476400232e-05, + "loss": 0.1322, + "step": 15610 + }, + { + "epoch": 0.73, + "learning_rate": 1.7710563854968414e-05, + "loss": 0.1391, + "step": 15615 + }, + { + "epoch": 0.73, + "learning_rate": 1.7709780069913628e-05, + "loss": 0.1207, + "step": 15620 + }, + { + "epoch": 0.73, + "learning_rate": 1.7708996284858842e-05, + "loss": 0.2047, + "step": 15625 + }, + { + "epoch": 0.73, + "learning_rate": 1.7708212499804056e-05, + "loss": 0.2101, + "step": 15630 + }, + { + "epoch": 0.73, + "learning_rate": 1.770742871474927e-05, + "loss": 0.2683, + "step": 15635 + }, + { + "epoch": 0.73, + "learning_rate": 1.7706644929694484e-05, + "loss": 0.2237, + "step": 15640 + }, + { + "epoch": 0.73, + "learning_rate": 1.7705861144639694e-05, + "loss": 0.3521, + "step": 15645 + }, + { + "epoch": 0.73, + "learning_rate": 1.7705077359584908e-05, + "loss": 0.5843, + "step": 15650 + }, + { + "epoch": 0.73, + "learning_rate": 1.7704293574530122e-05, + "loss": 0.1719, + "step": 15655 + }, + { + "epoch": 0.73, + "learning_rate": 1.7703509789475336e-05, + "loss": 0.1001, + "step": 15660 + }, + { + "epoch": 0.73, + "learning_rate": 1.770272600442055e-05, + "loss": 0.0878, + "step": 15665 + }, + { + "epoch": 0.73, + "learning_rate": 1.770194221936576e-05, + "loss": 0.1989, + "step": 15670 + }, + { + "epoch": 0.73, + "learning_rate": 1.7701158434310978e-05, + "loss": 0.1543, + "step": 15675 + }, + { + "epoch": 0.73, + "learning_rate": 1.770037464925619e-05, + "loss": 0.1588, + "step": 15680 + }, + { + "epoch": 0.73, + "learning_rate": 1.7699590864201402e-05, + "loss": 0.3029, + "step": 15685 + }, + { + "epoch": 0.73, + "learning_rate": 1.7698807079146616e-05, + "loss": 0.2396, + "step": 15690 + }, + { + "epoch": 0.73, + "learning_rate": 1.769802329409183e-05, + "loss": 0.357, + "step": 15695 + }, + { + "epoch": 0.73, + "learning_rate": 1.7697239509037044e-05, + "loss": 0.6507, + "step": 15700 + }, + { + "epoch": 0.73, + "learning_rate": 1.7696455723982258e-05, + "loss": 0.2289, + "step": 15705 + }, + { + "epoch": 0.73, + "learning_rate": 1.769567193892747e-05, + "loss": 0.0816, + "step": 15710 + }, + { + "epoch": 0.73, + "learning_rate": 1.7694888153872682e-05, + "loss": 0.0891, + "step": 15715 + }, + { + "epoch": 0.73, + "learning_rate": 1.7694104368817896e-05, + "loss": 0.1736, + "step": 15720 + }, + { + "epoch": 0.73, + "learning_rate": 1.769332058376311e-05, + "loss": 0.2008, + "step": 15725 + }, + { + "epoch": 0.73, + "learning_rate": 1.7692536798708324e-05, + "loss": 0.199, + "step": 15730 + }, + { + "epoch": 0.73, + "learning_rate": 1.7691753013653538e-05, + "loss": 0.255, + "step": 15735 + }, + { + "epoch": 0.73, + "learning_rate": 1.7690969228598752e-05, + "loss": 0.2359, + "step": 15740 + }, + { + "epoch": 0.73, + "learning_rate": 1.7690185443543962e-05, + "loss": 0.3103, + "step": 15745 + }, + { + "epoch": 0.73, + "learning_rate": 1.768940165848918e-05, + "loss": 0.5922, + "step": 15750 + }, + { + "epoch": 0.74, + "learning_rate": 1.768861787343439e-05, + "loss": 0.2138, + "step": 15755 + }, + { + "epoch": 0.74, + "learning_rate": 1.7687834088379604e-05, + "loss": 0.1087, + "step": 15760 + }, + { + "epoch": 0.74, + "learning_rate": 1.7687050303324818e-05, + "loss": 0.0963, + "step": 15765 + }, + { + "epoch": 0.74, + "learning_rate": 1.7686266518270032e-05, + "loss": 0.0777, + "step": 15770 + }, + { + "epoch": 0.74, + "learning_rate": 1.7685482733215246e-05, + "loss": 0.1607, + "step": 15775 + }, + { + "epoch": 0.74, + "learning_rate": 1.7684698948160456e-05, + "loss": 0.1585, + "step": 15780 + }, + { + "epoch": 0.74, + "learning_rate": 1.768391516310567e-05, + "loss": 0.2127, + "step": 15785 + }, + { + "epoch": 0.74, + "learning_rate": 1.7683131378050884e-05, + "loss": 0.2459, + "step": 15790 + }, + { + "epoch": 0.74, + "learning_rate": 1.7682347592996098e-05, + "loss": 0.3235, + "step": 15795 + }, + { + "epoch": 0.74, + "learning_rate": 1.7681563807941312e-05, + "loss": 0.5031, + "step": 15800 + }, + { + "epoch": 0.74, + "learning_rate": 1.7680780022886526e-05, + "loss": 0.2883, + "step": 15805 + }, + { + "epoch": 0.74, + "learning_rate": 1.767999623783174e-05, + "loss": 0.1161, + "step": 15810 + }, + { + "epoch": 0.74, + "learning_rate": 1.7679212452776954e-05, + "loss": 0.117, + "step": 15815 + }, + { + "epoch": 0.74, + "learning_rate": 1.7678428667722164e-05, + "loss": 0.1198, + "step": 15820 + }, + { + "epoch": 0.74, + "learning_rate": 1.7677644882667378e-05, + "loss": 0.1703, + "step": 15825 + }, + { + "epoch": 0.74, + "learning_rate": 1.7676861097612592e-05, + "loss": 0.204, + "step": 15830 + }, + { + "epoch": 0.74, + "learning_rate": 1.7676077312557806e-05, + "loss": 0.1478, + "step": 15835 + }, + { + "epoch": 0.74, + "learning_rate": 1.767529352750302e-05, + "loss": 0.2569, + "step": 15840 + }, + { + "epoch": 0.74, + "learning_rate": 1.767450974244823e-05, + "loss": 0.2254, + "step": 15845 + }, + { + "epoch": 0.74, + "learning_rate": 1.7673725957393448e-05, + "loss": 0.5472, + "step": 15850 + }, + { + "epoch": 0.74, + "learning_rate": 1.7672942172338658e-05, + "loss": 0.2518, + "step": 15855 + }, + { + "epoch": 0.74, + "learning_rate": 1.7672158387283872e-05, + "loss": 0.0926, + "step": 15860 + }, + { + "epoch": 0.74, + "learning_rate": 1.7671374602229086e-05, + "loss": 0.1318, + "step": 15865 + }, + { + "epoch": 0.74, + "learning_rate": 1.76705908171743e-05, + "loss": 0.1401, + "step": 15870 + }, + { + "epoch": 0.74, + "learning_rate": 1.7669807032119514e-05, + "loss": 0.1579, + "step": 15875 + }, + { + "epoch": 0.74, + "learning_rate": 1.7669023247064728e-05, + "loss": 0.139, + "step": 15880 + }, + { + "epoch": 0.74, + "learning_rate": 1.7668239462009938e-05, + "loss": 0.17, + "step": 15885 + }, + { + "epoch": 0.74, + "learning_rate": 1.7667455676955155e-05, + "loss": 0.2695, + "step": 15890 + }, + { + "epoch": 0.74, + "learning_rate": 1.7666671891900366e-05, + "loss": 0.2543, + "step": 15895 + }, + { + "epoch": 0.74, + "learning_rate": 1.766588810684558e-05, + "loss": 0.4279, + "step": 15900 + }, + { + "epoch": 0.74, + "learning_rate": 1.7665104321790794e-05, + "loss": 0.2844, + "step": 15905 + }, + { + "epoch": 0.74, + "learning_rate": 1.7664320536736008e-05, + "loss": 0.1015, + "step": 15910 + }, + { + "epoch": 0.74, + "learning_rate": 1.766353675168122e-05, + "loss": 0.1068, + "step": 15915 + }, + { + "epoch": 0.74, + "learning_rate": 1.7662752966626432e-05, + "loss": 0.1115, + "step": 15920 + }, + { + "epoch": 0.74, + "learning_rate": 1.7661969181571646e-05, + "loss": 0.1674, + "step": 15925 + }, + { + "epoch": 0.74, + "learning_rate": 1.766118539651686e-05, + "loss": 0.1759, + "step": 15930 + }, + { + "epoch": 0.74, + "learning_rate": 1.7660401611462074e-05, + "loss": 0.2252, + "step": 15935 + }, + { + "epoch": 0.74, + "learning_rate": 1.7659617826407288e-05, + "loss": 0.2375, + "step": 15940 + }, + { + "epoch": 0.74, + "learning_rate": 1.76588340413525e-05, + "loss": 0.3769, + "step": 15945 + }, + { + "epoch": 0.74, + "learning_rate": 1.7658050256297716e-05, + "loss": 0.4495, + "step": 15950 + }, + { + "epoch": 0.74, + "learning_rate": 1.765726647124293e-05, + "loss": 0.1948, + "step": 15955 + }, + { + "epoch": 0.74, + "learning_rate": 1.765648268618814e-05, + "loss": 0.0774, + "step": 15960 + }, + { + "epoch": 0.74, + "learning_rate": 1.7655698901133357e-05, + "loss": 0.0989, + "step": 15965 + }, + { + "epoch": 0.75, + "learning_rate": 1.7654915116078568e-05, + "loss": 0.0939, + "step": 15970 + }, + { + "epoch": 0.75, + "learning_rate": 1.765413133102378e-05, + "loss": 0.2203, + "step": 15975 + }, + { + "epoch": 0.75, + "learning_rate": 1.7653347545968996e-05, + "loss": 0.2015, + "step": 15980 + }, + { + "epoch": 0.75, + "learning_rate": 1.7652563760914206e-05, + "loss": 0.193, + "step": 15985 + }, + { + "epoch": 0.75, + "learning_rate": 1.7651779975859423e-05, + "loss": 0.2686, + "step": 15990 + }, + { + "epoch": 0.75, + "learning_rate": 1.7650996190804634e-05, + "loss": 0.213, + "step": 15995 + }, + { + "epoch": 0.75, + "learning_rate": 1.7650212405749848e-05, + "loss": 0.476, + "step": 16000 + }, + { + "epoch": 0.75, + "learning_rate": 1.764942862069506e-05, + "loss": 0.2164, + "step": 16005 + }, + { + "epoch": 0.75, + "learning_rate": 1.7648644835640276e-05, + "loss": 0.0558, + "step": 16010 + }, + { + "epoch": 0.75, + "learning_rate": 1.764786105058549e-05, + "loss": 0.1303, + "step": 16015 + }, + { + "epoch": 0.75, + "learning_rate": 1.7647077265530703e-05, + "loss": 0.1231, + "step": 16020 + }, + { + "epoch": 0.75, + "learning_rate": 1.7646293480475914e-05, + "loss": 0.1846, + "step": 16025 + }, + { + "epoch": 0.75, + "learning_rate": 1.764550969542113e-05, + "loss": 0.1873, + "step": 16030 + }, + { + "epoch": 0.75, + "learning_rate": 1.7644725910366342e-05, + "loss": 0.1345, + "step": 16035 + }, + { + "epoch": 0.75, + "learning_rate": 1.7643942125311556e-05, + "loss": 0.2916, + "step": 16040 + }, + { + "epoch": 0.75, + "learning_rate": 1.764315834025677e-05, + "loss": 0.3561, + "step": 16045 + }, + { + "epoch": 0.75, + "learning_rate": 1.7642374555201983e-05, + "loss": 0.5833, + "step": 16050 + }, + { + "epoch": 0.75, + "learning_rate": 1.7641590770147197e-05, + "loss": 0.2464, + "step": 16055 + }, + { + "epoch": 0.75, + "learning_rate": 1.7640806985092408e-05, + "loss": 0.0829, + "step": 16060 + }, + { + "epoch": 0.75, + "learning_rate": 1.7640023200037625e-05, + "loss": 0.1179, + "step": 16065 + }, + { + "epoch": 0.75, + "learning_rate": 1.7639239414982836e-05, + "loss": 0.1629, + "step": 16070 + }, + { + "epoch": 0.75, + "learning_rate": 1.763845562992805e-05, + "loss": 0.1615, + "step": 16075 + }, + { + "epoch": 0.75, + "learning_rate": 1.7637671844873264e-05, + "loss": 0.2043, + "step": 16080 + }, + { + "epoch": 0.75, + "learning_rate": 1.7636888059818477e-05, + "loss": 0.2283, + "step": 16085 + }, + { + "epoch": 0.75, + "learning_rate": 1.763610427476369e-05, + "loss": 0.306, + "step": 16090 + }, + { + "epoch": 0.75, + "learning_rate": 1.7635320489708905e-05, + "loss": 0.4119, + "step": 16095 + }, + { + "epoch": 0.75, + "learning_rate": 1.7634536704654116e-05, + "loss": 0.4373, + "step": 16100 + }, + { + "epoch": 0.75, + "learning_rate": 1.763375291959933e-05, + "loss": 0.1972, + "step": 16105 + }, + { + "epoch": 0.75, + "learning_rate": 1.7632969134544544e-05, + "loss": 0.0756, + "step": 16110 + }, + { + "epoch": 0.75, + "learning_rate": 1.7632185349489757e-05, + "loss": 0.1467, + "step": 16115 + }, + { + "epoch": 0.75, + "learning_rate": 1.763140156443497e-05, + "loss": 0.1195, + "step": 16120 + }, + { + "epoch": 0.75, + "learning_rate": 1.7630617779380185e-05, + "loss": 0.1707, + "step": 16125 + }, + { + "epoch": 0.75, + "learning_rate": 1.76298339943254e-05, + "loss": 0.2611, + "step": 16130 + }, + { + "epoch": 0.75, + "learning_rate": 1.762905020927061e-05, + "loss": 0.2035, + "step": 16135 + }, + { + "epoch": 0.75, + "learning_rate": 1.7628266424215824e-05, + "loss": 0.2931, + "step": 16140 + }, + { + "epoch": 0.75, + "learning_rate": 1.7627482639161038e-05, + "loss": 0.3166, + "step": 16145 + }, + { + "epoch": 0.75, + "learning_rate": 1.762669885410625e-05, + "loss": 0.4894, + "step": 16150 + }, + { + "epoch": 0.75, + "learning_rate": 1.7625915069051465e-05, + "loss": 0.2469, + "step": 16155 + }, + { + "epoch": 0.75, + "learning_rate": 1.762513128399668e-05, + "loss": 0.091, + "step": 16160 + }, + { + "epoch": 0.75, + "learning_rate": 1.7624347498941893e-05, + "loss": 0.1252, + "step": 16165 + }, + { + "epoch": 0.75, + "learning_rate": 1.7623563713887104e-05, + "loss": 0.1507, + "step": 16170 + }, + { + "epoch": 0.75, + "learning_rate": 1.7622779928832318e-05, + "loss": 0.0806, + "step": 16175 + }, + { + "epoch": 0.75, + "learning_rate": 1.762199614377753e-05, + "loss": 0.1723, + "step": 16180 + }, + { + "epoch": 0.76, + "learning_rate": 1.7621212358722745e-05, + "loss": 0.1738, + "step": 16185 + }, + { + "epoch": 0.76, + "learning_rate": 1.762042857366796e-05, + "loss": 0.2276, + "step": 16190 + }, + { + "epoch": 0.76, + "learning_rate": 1.7619644788613173e-05, + "loss": 0.3772, + "step": 16195 + }, + { + "epoch": 0.76, + "learning_rate": 1.7618861003558384e-05, + "loss": 0.5853, + "step": 16200 + }, + { + "epoch": 0.76, + "learning_rate": 1.76180772185036e-05, + "loss": 0.2295, + "step": 16205 + }, + { + "epoch": 0.76, + "learning_rate": 1.761729343344881e-05, + "loss": 0.0793, + "step": 16210 + }, + { + "epoch": 0.76, + "learning_rate": 1.7616509648394025e-05, + "loss": 0.1744, + "step": 16215 + }, + { + "epoch": 0.76, + "learning_rate": 1.761572586333924e-05, + "loss": 0.1258, + "step": 16220 + }, + { + "epoch": 0.76, + "learning_rate": 1.7614942078284453e-05, + "loss": 0.1278, + "step": 16225 + }, + { + "epoch": 0.76, + "learning_rate": 1.7614158293229667e-05, + "loss": 0.1408, + "step": 16230 + }, + { + "epoch": 0.76, + "learning_rate": 1.7613374508174878e-05, + "loss": 0.176, + "step": 16235 + }, + { + "epoch": 0.76, + "learning_rate": 1.761259072312009e-05, + "loss": 0.2184, + "step": 16240 + }, + { + "epoch": 0.76, + "learning_rate": 1.7611806938065305e-05, + "loss": 0.4551, + "step": 16245 + }, + { + "epoch": 0.76, + "learning_rate": 1.761102315301052e-05, + "loss": 0.4999, + "step": 16250 + }, + { + "epoch": 0.76, + "learning_rate": 1.7610239367955733e-05, + "loss": 0.1799, + "step": 16255 + }, + { + "epoch": 0.76, + "learning_rate": 1.7609455582900947e-05, + "loss": 0.0511, + "step": 16260 + }, + { + "epoch": 0.76, + "learning_rate": 1.760867179784616e-05, + "loss": 0.0712, + "step": 16265 + }, + { + "epoch": 0.76, + "learning_rate": 1.7607888012791375e-05, + "loss": 0.1195, + "step": 16270 + }, + { + "epoch": 0.76, + "learning_rate": 1.7607104227736585e-05, + "loss": 0.1133, + "step": 16275 + }, + { + "epoch": 0.76, + "learning_rate": 1.7606320442681803e-05, + "loss": 0.18, + "step": 16280 + }, + { + "epoch": 0.76, + "learning_rate": 1.7605536657627013e-05, + "loss": 0.2426, + "step": 16285 + }, + { + "epoch": 0.76, + "learning_rate": 1.7604752872572227e-05, + "loss": 0.252, + "step": 16290 + }, + { + "epoch": 0.76, + "learning_rate": 1.760396908751744e-05, + "loss": 0.4202, + "step": 16295 + }, + { + "epoch": 0.76, + "learning_rate": 1.760318530246265e-05, + "loss": 0.645, + "step": 16300 + }, + { + "epoch": 0.76, + "learning_rate": 1.760240151740787e-05, + "loss": 0.2253, + "step": 16305 + }, + { + "epoch": 0.76, + "learning_rate": 1.760161773235308e-05, + "loss": 0.0957, + "step": 16310 + }, + { + "epoch": 0.76, + "learning_rate": 1.7600833947298293e-05, + "loss": 0.0851, + "step": 16315 + }, + { + "epoch": 0.76, + "learning_rate": 1.7600050162243507e-05, + "loss": 0.1494, + "step": 16320 + }, + { + "epoch": 0.76, + "learning_rate": 1.759926637718872e-05, + "loss": 0.2095, + "step": 16325 + }, + { + "epoch": 0.76, + "learning_rate": 1.7598482592133935e-05, + "loss": 0.1371, + "step": 16330 + }, + { + "epoch": 0.76, + "learning_rate": 1.759769880707915e-05, + "loss": 0.2431, + "step": 16335 + }, + { + "epoch": 0.76, + "learning_rate": 1.7596915022024363e-05, + "loss": 0.2534, + "step": 16340 + }, + { + "epoch": 0.76, + "learning_rate": 1.7596131236969577e-05, + "loss": 0.2616, + "step": 16345 + }, + { + "epoch": 0.76, + "learning_rate": 1.7595347451914787e-05, + "loss": 0.5365, + "step": 16350 + }, + { + "epoch": 0.76, + "learning_rate": 1.759456366686e-05, + "loss": 0.1913, + "step": 16355 + }, + { + "epoch": 0.76, + "learning_rate": 1.7593779881805215e-05, + "loss": 0.0728, + "step": 16360 + }, + { + "epoch": 0.76, + "learning_rate": 1.759299609675043e-05, + "loss": 0.1707, + "step": 16365 + }, + { + "epoch": 0.76, + "learning_rate": 1.7592212311695643e-05, + "loss": 0.2359, + "step": 16370 + }, + { + "epoch": 0.76, + "learning_rate": 1.7591428526640853e-05, + "loss": 0.1137, + "step": 16375 + }, + { + "epoch": 0.76, + "learning_rate": 1.759064474158607e-05, + "loss": 0.1787, + "step": 16380 + }, + { + "epoch": 0.76, + "learning_rate": 1.758986095653128e-05, + "loss": 0.1674, + "step": 16385 + }, + { + "epoch": 0.76, + "learning_rate": 1.7589077171476495e-05, + "loss": 0.1887, + "step": 16390 + }, + { + "epoch": 0.77, + "learning_rate": 1.758829338642171e-05, + "loss": 0.4233, + "step": 16395 + }, + { + "epoch": 0.77, + "learning_rate": 1.7587509601366923e-05, + "loss": 0.5711, + "step": 16400 + }, + { + "epoch": 0.77, + "learning_rate": 1.7586725816312137e-05, + "loss": 0.1733, + "step": 16405 + }, + { + "epoch": 0.77, + "learning_rate": 1.758594203125735e-05, + "loss": 0.0874, + "step": 16410 + }, + { + "epoch": 0.77, + "learning_rate": 1.758515824620256e-05, + "loss": 0.15, + "step": 16415 + }, + { + "epoch": 0.77, + "learning_rate": 1.758437446114778e-05, + "loss": 0.1558, + "step": 16420 + }, + { + "epoch": 0.77, + "learning_rate": 1.758359067609299e-05, + "loss": 0.1326, + "step": 16425 + }, + { + "epoch": 0.77, + "learning_rate": 1.7582806891038203e-05, + "loss": 0.196, + "step": 16430 + }, + { + "epoch": 0.77, + "learning_rate": 1.7582023105983417e-05, + "loss": 0.1857, + "step": 16435 + }, + { + "epoch": 0.77, + "learning_rate": 1.758123932092863e-05, + "loss": 0.2995, + "step": 16440 + }, + { + "epoch": 0.77, + "learning_rate": 1.7580455535873845e-05, + "loss": 0.4498, + "step": 16445 + }, + { + "epoch": 0.77, + "learning_rate": 1.7579671750819055e-05, + "loss": 0.6452, + "step": 16450 + }, + { + "epoch": 0.77, + "learning_rate": 1.757888796576427e-05, + "loss": 0.2235, + "step": 16455 + }, + { + "epoch": 0.77, + "learning_rate": 1.7578104180709483e-05, + "loss": 0.0881, + "step": 16460 + }, + { + "epoch": 0.77, + "learning_rate": 1.7577320395654697e-05, + "loss": 0.063, + "step": 16465 + }, + { + "epoch": 0.77, + "learning_rate": 1.757653661059991e-05, + "loss": 0.1451, + "step": 16470 + }, + { + "epoch": 0.77, + "learning_rate": 1.7575752825545125e-05, + "loss": 0.1652, + "step": 16475 + }, + { + "epoch": 0.77, + "learning_rate": 1.757496904049034e-05, + "loss": 0.1904, + "step": 16480 + }, + { + "epoch": 0.77, + "learning_rate": 1.7574185255435553e-05, + "loss": 0.253, + "step": 16485 + }, + { + "epoch": 0.77, + "learning_rate": 1.7573401470380763e-05, + "loss": 0.1778, + "step": 16490 + }, + { + "epoch": 0.77, + "learning_rate": 1.7572617685325977e-05, + "loss": 0.4833, + "step": 16495 + }, + { + "epoch": 0.77, + "learning_rate": 1.757183390027119e-05, + "loss": 0.4754, + "step": 16500 + }, + { + "epoch": 0.77, + "learning_rate": 1.7571050115216405e-05, + "loss": 0.1547, + "step": 16505 + }, + { + "epoch": 0.77, + "learning_rate": 1.757026633016162e-05, + "loss": 0.0813, + "step": 16510 + }, + { + "epoch": 0.77, + "learning_rate": 1.756948254510683e-05, + "loss": 0.1433, + "step": 16515 + }, + { + "epoch": 0.77, + "learning_rate": 1.7568698760052047e-05, + "loss": 0.1179, + "step": 16520 + }, + { + "epoch": 0.77, + "learning_rate": 1.7567914974997257e-05, + "loss": 0.1643, + "step": 16525 + }, + { + "epoch": 0.77, + "learning_rate": 1.756713118994247e-05, + "loss": 0.1876, + "step": 16530 + }, + { + "epoch": 0.77, + "learning_rate": 1.7566347404887685e-05, + "loss": 0.1914, + "step": 16535 + }, + { + "epoch": 0.77, + "learning_rate": 1.75655636198329e-05, + "loss": 0.2733, + "step": 16540 + }, + { + "epoch": 0.77, + "learning_rate": 1.7564779834778113e-05, + "loss": 0.3474, + "step": 16545 + }, + { + "epoch": 0.77, + "learning_rate": 1.7563996049723327e-05, + "loss": 0.5545, + "step": 16550 + }, + { + "epoch": 0.77, + "learning_rate": 1.7563212264668537e-05, + "loss": 0.251, + "step": 16555 + }, + { + "epoch": 0.77, + "learning_rate": 1.756242847961375e-05, + "loss": 0.0742, + "step": 16560 + }, + { + "epoch": 0.77, + "learning_rate": 1.7561644694558965e-05, + "loss": 0.0762, + "step": 16565 + }, + { + "epoch": 0.77, + "learning_rate": 1.756086090950418e-05, + "loss": 0.1709, + "step": 16570 + }, + { + "epoch": 0.77, + "learning_rate": 1.7560077124449393e-05, + "loss": 0.1702, + "step": 16575 + }, + { + "epoch": 0.77, + "learning_rate": 1.7559293339394607e-05, + "loss": 0.2099, + "step": 16580 + }, + { + "epoch": 0.77, + "learning_rate": 1.755850955433982e-05, + "loss": 0.311, + "step": 16585 + }, + { + "epoch": 0.77, + "learning_rate": 1.755772576928503e-05, + "loss": 0.2829, + "step": 16590 + }, + { + "epoch": 0.77, + "learning_rate": 1.755694198423025e-05, + "loss": 0.3343, + "step": 16595 + }, + { + "epoch": 0.77, + "learning_rate": 1.755615819917546e-05, + "loss": 0.3147, + "step": 16600 + }, + { + "epoch": 0.77, + "learning_rate": 1.7555374414120673e-05, + "loss": 0.242, + "step": 16605 + }, + { + "epoch": 0.78, + "learning_rate": 1.7554590629065887e-05, + "loss": 0.0859, + "step": 16610 + }, + { + "epoch": 0.78, + "learning_rate": 1.75538068440111e-05, + "loss": 0.1072, + "step": 16615 + }, + { + "epoch": 0.78, + "learning_rate": 1.7553023058956315e-05, + "loss": 0.1143, + "step": 16620 + }, + { + "epoch": 0.78, + "learning_rate": 1.7552239273901525e-05, + "loss": 0.1676, + "step": 16625 + }, + { + "epoch": 0.78, + "learning_rate": 1.755145548884674e-05, + "loss": 0.1877, + "step": 16630 + }, + { + "epoch": 0.78, + "learning_rate": 1.7550671703791953e-05, + "loss": 0.2769, + "step": 16635 + }, + { + "epoch": 0.78, + "learning_rate": 1.7549887918737167e-05, + "loss": 0.2055, + "step": 16640 + }, + { + "epoch": 0.78, + "learning_rate": 1.754910413368238e-05, + "loss": 0.1681, + "step": 16645 + }, + { + "epoch": 0.78, + "learning_rate": 1.7548320348627595e-05, + "loss": 0.5035, + "step": 16650 + }, + { + "epoch": 0.78, + "learning_rate": 1.754753656357281e-05, + "loss": 0.251, + "step": 16655 + }, + { + "epoch": 0.78, + "learning_rate": 1.7546752778518022e-05, + "loss": 0.0733, + "step": 16660 + }, + { + "epoch": 0.78, + "learning_rate": 1.7545968993463233e-05, + "loss": 0.0622, + "step": 16665 + }, + { + "epoch": 0.78, + "learning_rate": 1.7545185208408447e-05, + "loss": 0.186, + "step": 16670 + }, + { + "epoch": 0.78, + "learning_rate": 1.754440142335366e-05, + "loss": 0.1875, + "step": 16675 + }, + { + "epoch": 0.78, + "learning_rate": 1.7543617638298875e-05, + "loss": 0.1456, + "step": 16680 + }, + { + "epoch": 0.78, + "learning_rate": 1.754283385324409e-05, + "loss": 0.1846, + "step": 16685 + }, + { + "epoch": 0.78, + "learning_rate": 1.75420500681893e-05, + "loss": 0.2411, + "step": 16690 + }, + { + "epoch": 0.78, + "learning_rate": 1.7541266283134516e-05, + "loss": 0.2714, + "step": 16695 + }, + { + "epoch": 0.78, + "learning_rate": 1.7540482498079727e-05, + "loss": 0.5341, + "step": 16700 + }, + { + "epoch": 0.78, + "learning_rate": 1.753969871302494e-05, + "loss": 0.2582, + "step": 16705 + }, + { + "epoch": 0.78, + "learning_rate": 1.7538914927970155e-05, + "loss": 0.0639, + "step": 16710 + }, + { + "epoch": 0.78, + "learning_rate": 1.753813114291537e-05, + "loss": 0.091, + "step": 16715 + }, + { + "epoch": 0.78, + "learning_rate": 1.7537347357860582e-05, + "loss": 0.1561, + "step": 16720 + }, + { + "epoch": 0.78, + "learning_rate": 1.7536563572805796e-05, + "loss": 0.167, + "step": 16725 + }, + { + "epoch": 0.78, + "learning_rate": 1.7535779787751007e-05, + "loss": 0.1176, + "step": 16730 + }, + { + "epoch": 0.78, + "learning_rate": 1.7534996002696224e-05, + "loss": 0.1937, + "step": 16735 + }, + { + "epoch": 0.78, + "learning_rate": 1.7534212217641435e-05, + "loss": 0.1984, + "step": 16740 + }, + { + "epoch": 0.78, + "learning_rate": 1.753342843258665e-05, + "loss": 0.2591, + "step": 16745 + }, + { + "epoch": 0.78, + "learning_rate": 1.7532644647531863e-05, + "loss": 0.4628, + "step": 16750 + }, + { + "epoch": 0.78, + "learning_rate": 1.7531860862477076e-05, + "loss": 0.1609, + "step": 16755 + }, + { + "epoch": 0.78, + "learning_rate": 1.753107707742229e-05, + "loss": 0.0979, + "step": 16760 + }, + { + "epoch": 0.78, + "learning_rate": 1.75302932923675e-05, + "loss": 0.1462, + "step": 16765 + }, + { + "epoch": 0.78, + "learning_rate": 1.7529509507312715e-05, + "loss": 0.163, + "step": 16770 + }, + { + "epoch": 0.78, + "learning_rate": 1.752872572225793e-05, + "loss": 0.1616, + "step": 16775 + }, + { + "epoch": 0.78, + "learning_rate": 1.7527941937203143e-05, + "loss": 0.1537, + "step": 16780 + }, + { + "epoch": 0.78, + "learning_rate": 1.7527158152148356e-05, + "loss": 0.1465, + "step": 16785 + }, + { + "epoch": 0.78, + "learning_rate": 1.752637436709357e-05, + "loss": 0.2385, + "step": 16790 + }, + { + "epoch": 0.78, + "learning_rate": 1.7525590582038784e-05, + "loss": 0.3273, + "step": 16795 + }, + { + "epoch": 0.78, + "learning_rate": 1.7524806796983998e-05, + "loss": 0.5533, + "step": 16800 + }, + { + "epoch": 0.78, + "learning_rate": 1.752402301192921e-05, + "loss": 0.1391, + "step": 16805 + }, + { + "epoch": 0.78, + "learning_rate": 1.7523239226874426e-05, + "loss": 0.0745, + "step": 16810 + }, + { + "epoch": 0.78, + "learning_rate": 1.7522455441819636e-05, + "loss": 0.1043, + "step": 16815 + }, + { + "epoch": 0.78, + "learning_rate": 1.752167165676485e-05, + "loss": 0.1905, + "step": 16820 + }, + { + "epoch": 0.79, + "learning_rate": 1.7520887871710064e-05, + "loss": 0.1423, + "step": 16825 + }, + { + "epoch": 0.79, + "learning_rate": 1.7520104086655275e-05, + "loss": 0.0993, + "step": 16830 + }, + { + "epoch": 0.79, + "learning_rate": 1.7519320301600492e-05, + "loss": 0.2252, + "step": 16835 + }, + { + "epoch": 0.79, + "learning_rate": 1.7518536516545703e-05, + "loss": 0.2168, + "step": 16840 + }, + { + "epoch": 0.79, + "learning_rate": 1.7517752731490917e-05, + "loss": 0.3685, + "step": 16845 + }, + { + "epoch": 0.79, + "learning_rate": 1.751696894643613e-05, + "loss": 0.428, + "step": 16850 + }, + { + "epoch": 0.79, + "learning_rate": 1.7516185161381344e-05, + "loss": 0.2561, + "step": 16855 + }, + { + "epoch": 0.79, + "learning_rate": 1.7515401376326558e-05, + "loss": 0.0574, + "step": 16860 + }, + { + "epoch": 0.79, + "learning_rate": 1.7514617591271772e-05, + "loss": 0.1081, + "step": 16865 + }, + { + "epoch": 0.79, + "learning_rate": 1.7513833806216983e-05, + "loss": 0.1428, + "step": 16870 + }, + { + "epoch": 0.79, + "learning_rate": 1.75130500211622e-05, + "loss": 0.1128, + "step": 16875 + }, + { + "epoch": 0.79, + "learning_rate": 1.751226623610741e-05, + "loss": 0.2477, + "step": 16880 + }, + { + "epoch": 0.79, + "learning_rate": 1.7511482451052624e-05, + "loss": 0.2166, + "step": 16885 + }, + { + "epoch": 0.79, + "learning_rate": 1.751069866599784e-05, + "loss": 0.2552, + "step": 16890 + }, + { + "epoch": 0.79, + "learning_rate": 1.7509914880943052e-05, + "loss": 0.2411, + "step": 16895 + }, + { + "epoch": 0.79, + "learning_rate": 1.7509131095888266e-05, + "loss": 0.4488, + "step": 16900 + }, + { + "epoch": 0.79, + "learning_rate": 1.7508347310833477e-05, + "loss": 0.2285, + "step": 16905 + }, + { + "epoch": 0.79, + "learning_rate": 1.7507563525778694e-05, + "loss": 0.0925, + "step": 16910 + }, + { + "epoch": 0.79, + "learning_rate": 1.7506779740723904e-05, + "loss": 0.1007, + "step": 16915 + }, + { + "epoch": 0.79, + "learning_rate": 1.750599595566912e-05, + "loss": 0.1318, + "step": 16920 + }, + { + "epoch": 0.79, + "learning_rate": 1.7505212170614332e-05, + "loss": 0.1029, + "step": 16925 + }, + { + "epoch": 0.79, + "learning_rate": 1.7504428385559546e-05, + "loss": 0.1626, + "step": 16930 + }, + { + "epoch": 0.79, + "learning_rate": 1.750364460050476e-05, + "loss": 0.2252, + "step": 16935 + }, + { + "epoch": 0.79, + "learning_rate": 1.7502860815449974e-05, + "loss": 0.2685, + "step": 16940 + }, + { + "epoch": 0.79, + "learning_rate": 1.7502077030395184e-05, + "loss": 0.2682, + "step": 16945 + }, + { + "epoch": 0.79, + "learning_rate": 1.75012932453404e-05, + "loss": 0.4503, + "step": 16950 + }, + { + "epoch": 0.79, + "learning_rate": 1.7500509460285612e-05, + "loss": 0.1966, + "step": 16955 + }, + { + "epoch": 0.79, + "learning_rate": 1.7499725675230826e-05, + "loss": 0.0716, + "step": 16960 + }, + { + "epoch": 0.79, + "learning_rate": 1.749894189017604e-05, + "loss": 0.0855, + "step": 16965 + }, + { + "epoch": 0.79, + "learning_rate": 1.7498158105121254e-05, + "loss": 0.1765, + "step": 16970 + }, + { + "epoch": 0.79, + "learning_rate": 1.7497374320066468e-05, + "loss": 0.1045, + "step": 16975 + }, + { + "epoch": 0.79, + "learning_rate": 1.749659053501168e-05, + "loss": 0.1965, + "step": 16980 + }, + { + "epoch": 0.79, + "learning_rate": 1.7495806749956892e-05, + "loss": 0.2889, + "step": 16985 + }, + { + "epoch": 0.79, + "learning_rate": 1.7495022964902106e-05, + "loss": 0.2706, + "step": 16990 + }, + { + "epoch": 0.79, + "learning_rate": 1.749423917984732e-05, + "loss": 0.219, + "step": 16995 + }, + { + "epoch": 0.79, + "learning_rate": 1.7493455394792534e-05, + "loss": 0.7537, + "step": 17000 + }, + { + "epoch": 0.79, + "learning_rate": 1.7492671609737748e-05, + "loss": 0.2183, + "step": 17005 + }, + { + "epoch": 0.79, + "learning_rate": 1.7491887824682962e-05, + "loss": 0.095, + "step": 17010 + }, + { + "epoch": 0.79, + "learning_rate": 1.7491104039628172e-05, + "loss": 0.0708, + "step": 17015 + }, + { + "epoch": 0.79, + "learning_rate": 1.7490320254573386e-05, + "loss": 0.1224, + "step": 17020 + }, + { + "epoch": 0.79, + "learning_rate": 1.74895364695186e-05, + "loss": 0.149, + "step": 17025 + }, + { + "epoch": 0.79, + "learning_rate": 1.7488752684463814e-05, + "loss": 0.2394, + "step": 17030 + }, + { + "epoch": 0.79, + "learning_rate": 1.7487968899409028e-05, + "loss": 0.2442, + "step": 17035 + }, + { + "epoch": 0.8, + "learning_rate": 1.7487185114354242e-05, + "loss": 0.2727, + "step": 17040 + }, + { + "epoch": 0.8, + "learning_rate": 1.7486401329299452e-05, + "loss": 0.3609, + "step": 17045 + }, + { + "epoch": 0.8, + "learning_rate": 1.748561754424467e-05, + "loss": 0.5439, + "step": 17050 + }, + { + "epoch": 0.8, + "learning_rate": 1.748483375918988e-05, + "loss": 0.2195, + "step": 17055 + }, + { + "epoch": 0.8, + "learning_rate": 1.7484049974135094e-05, + "loss": 0.0805, + "step": 17060 + }, + { + "epoch": 0.8, + "learning_rate": 1.7483266189080308e-05, + "loss": 0.1076, + "step": 17065 + }, + { + "epoch": 0.8, + "learning_rate": 1.7482482404025522e-05, + "loss": 0.2019, + "step": 17070 + }, + { + "epoch": 0.8, + "learning_rate": 1.7481698618970736e-05, + "loss": 0.1393, + "step": 17075 + }, + { + "epoch": 0.8, + "learning_rate": 1.7480914833915946e-05, + "loss": 0.1601, + "step": 17080 + }, + { + "epoch": 0.8, + "learning_rate": 1.748013104886116e-05, + "loss": 0.2208, + "step": 17085 + }, + { + "epoch": 0.8, + "learning_rate": 1.7479347263806374e-05, + "loss": 0.2692, + "step": 17090 + }, + { + "epoch": 0.8, + "learning_rate": 1.7478563478751588e-05, + "loss": 0.2507, + "step": 17095 + }, + { + "epoch": 0.8, + "learning_rate": 1.7477779693696802e-05, + "loss": 0.6805, + "step": 17100 + }, + { + "epoch": 0.8, + "learning_rate": 1.7476995908642016e-05, + "loss": 0.2378, + "step": 17105 + }, + { + "epoch": 0.8, + "learning_rate": 1.747621212358723e-05, + "loss": 0.0834, + "step": 17110 + }, + { + "epoch": 0.8, + "learning_rate": 1.7475428338532444e-05, + "loss": 0.0724, + "step": 17115 + }, + { + "epoch": 0.8, + "learning_rate": 1.7474644553477654e-05, + "loss": 0.1847, + "step": 17120 + }, + { + "epoch": 0.8, + "learning_rate": 1.747386076842287e-05, + "loss": 0.1556, + "step": 17125 + }, + { + "epoch": 0.8, + "learning_rate": 1.7473076983368082e-05, + "loss": 0.2036, + "step": 17130 + }, + { + "epoch": 0.8, + "learning_rate": 1.7472293198313296e-05, + "loss": 0.2969, + "step": 17135 + }, + { + "epoch": 0.8, + "learning_rate": 1.747150941325851e-05, + "loss": 0.3382, + "step": 17140 + }, + { + "epoch": 0.8, + "learning_rate": 1.747072562820372e-05, + "loss": 0.3471, + "step": 17145 + }, + { + "epoch": 0.8, + "learning_rate": 1.7469941843148938e-05, + "loss": 0.4779, + "step": 17150 + }, + { + "epoch": 0.8, + "learning_rate": 1.7469158058094148e-05, + "loss": 0.2133, + "step": 17155 + }, + { + "epoch": 0.8, + "learning_rate": 1.7468374273039362e-05, + "loss": 0.0909, + "step": 17160 + }, + { + "epoch": 0.8, + "learning_rate": 1.7467590487984576e-05, + "loss": 0.1217, + "step": 17165 + }, + { + "epoch": 0.8, + "learning_rate": 1.746680670292979e-05, + "loss": 0.1222, + "step": 17170 + }, + { + "epoch": 0.8, + "learning_rate": 1.7466022917875004e-05, + "loss": 0.1105, + "step": 17175 + }, + { + "epoch": 0.8, + "learning_rate": 1.7465239132820218e-05, + "loss": 0.1904, + "step": 17180 + }, + { + "epoch": 0.8, + "learning_rate": 1.7464455347765428e-05, + "loss": 0.2327, + "step": 17185 + }, + { + "epoch": 0.8, + "learning_rate": 1.7463671562710646e-05, + "loss": 0.2373, + "step": 17190 + }, + { + "epoch": 0.8, + "learning_rate": 1.7462887777655856e-05, + "loss": 0.2425, + "step": 17195 + }, + { + "epoch": 0.8, + "learning_rate": 1.746210399260107e-05, + "loss": 0.4149, + "step": 17200 + }, + { + "epoch": 0.8, + "learning_rate": 1.7461320207546284e-05, + "loss": 0.3749, + "step": 17205 + }, + { + "epoch": 0.8, + "learning_rate": 1.7460536422491498e-05, + "loss": 0.076, + "step": 17210 + }, + { + "epoch": 0.8, + "learning_rate": 1.745975263743671e-05, + "loss": 0.0653, + "step": 17215 + }, + { + "epoch": 0.8, + "learning_rate": 1.7458968852381922e-05, + "loss": 0.1085, + "step": 17220 + }, + { + "epoch": 0.8, + "learning_rate": 1.745818506732714e-05, + "loss": 0.1582, + "step": 17225 + }, + { + "epoch": 0.8, + "learning_rate": 1.745740128227235e-05, + "loss": 0.2298, + "step": 17230 + }, + { + "epoch": 0.8, + "learning_rate": 1.7456617497217564e-05, + "loss": 0.1976, + "step": 17235 + }, + { + "epoch": 0.8, + "learning_rate": 1.7455833712162778e-05, + "loss": 0.3482, + "step": 17240 + }, + { + "epoch": 0.8, + "learning_rate": 1.7455049927107992e-05, + "loss": 0.2661, + "step": 17245 + }, + { + "epoch": 0.8, + "learning_rate": 1.7454266142053206e-05, + "loss": 0.6355, + "step": 17250 + }, + { + "epoch": 0.81, + "learning_rate": 1.745348235699842e-05, + "loss": 0.2025, + "step": 17255 + }, + { + "epoch": 0.81, + "learning_rate": 1.745269857194363e-05, + "loss": 0.0766, + "step": 17260 + }, + { + "epoch": 0.81, + "learning_rate": 1.7451914786888847e-05, + "loss": 0.1712, + "step": 17265 + }, + { + "epoch": 0.81, + "learning_rate": 1.7451131001834058e-05, + "loss": 0.1655, + "step": 17270 + }, + { + "epoch": 0.81, + "learning_rate": 1.7450347216779272e-05, + "loss": 0.1001, + "step": 17275 + }, + { + "epoch": 0.81, + "learning_rate": 1.7449563431724486e-05, + "loss": 0.1872, + "step": 17280 + }, + { + "epoch": 0.81, + "learning_rate": 1.74487796466697e-05, + "loss": 0.2953, + "step": 17285 + }, + { + "epoch": 0.81, + "learning_rate": 1.7447995861614914e-05, + "loss": 0.2079, + "step": 17290 + }, + { + "epoch": 0.81, + "learning_rate": 1.7447212076560124e-05, + "loss": 0.3059, + "step": 17295 + }, + { + "epoch": 0.81, + "learning_rate": 1.7446428291505338e-05, + "loss": 0.5211, + "step": 17300 + }, + { + "epoch": 0.81, + "learning_rate": 1.7445644506450552e-05, + "loss": 0.2568, + "step": 17305 + }, + { + "epoch": 0.81, + "learning_rate": 1.7444860721395766e-05, + "loss": 0.0556, + "step": 17310 + }, + { + "epoch": 0.81, + "learning_rate": 1.744407693634098e-05, + "loss": 0.1149, + "step": 17315 + }, + { + "epoch": 0.81, + "learning_rate": 1.7443293151286194e-05, + "loss": 0.1159, + "step": 17320 + }, + { + "epoch": 0.81, + "learning_rate": 1.7442509366231407e-05, + "loss": 0.1228, + "step": 17325 + }, + { + "epoch": 0.81, + "learning_rate": 1.744172558117662e-05, + "loss": 0.1988, + "step": 17330 + }, + { + "epoch": 0.81, + "learning_rate": 1.7440941796121832e-05, + "loss": 0.129, + "step": 17335 + }, + { + "epoch": 0.81, + "learning_rate": 1.7440158011067046e-05, + "loss": 0.3034, + "step": 17340 + }, + { + "epoch": 0.81, + "learning_rate": 1.743937422601226e-05, + "loss": 0.3292, + "step": 17345 + }, + { + "epoch": 0.81, + "learning_rate": 1.7438590440957474e-05, + "loss": 0.5812, + "step": 17350 + }, + { + "epoch": 0.81, + "learning_rate": 1.7437806655902688e-05, + "loss": 0.2535, + "step": 17355 + }, + { + "epoch": 0.81, + "learning_rate": 1.7437022870847898e-05, + "loss": 0.0812, + "step": 17360 + }, + { + "epoch": 0.81, + "learning_rate": 1.7436239085793115e-05, + "loss": 0.0573, + "step": 17365 + }, + { + "epoch": 0.81, + "learning_rate": 1.7435455300738326e-05, + "loss": 0.1486, + "step": 17370 + }, + { + "epoch": 0.81, + "learning_rate": 1.743467151568354e-05, + "loss": 0.1502, + "step": 17375 + }, + { + "epoch": 0.81, + "learning_rate": 1.7433887730628754e-05, + "loss": 0.1391, + "step": 17380 + }, + { + "epoch": 0.81, + "learning_rate": 1.7433103945573968e-05, + "loss": 0.2151, + "step": 17385 + }, + { + "epoch": 0.81, + "learning_rate": 1.743232016051918e-05, + "loss": 0.2215, + "step": 17390 + }, + { + "epoch": 0.81, + "learning_rate": 1.7431536375464395e-05, + "loss": 0.3069, + "step": 17395 + }, + { + "epoch": 0.81, + "learning_rate": 1.7430752590409606e-05, + "loss": 0.5807, + "step": 17400 + }, + { + "epoch": 0.81, + "learning_rate": 1.742996880535482e-05, + "loss": 0.2226, + "step": 17405 + }, + { + "epoch": 0.81, + "learning_rate": 1.7429185020300034e-05, + "loss": 0.0589, + "step": 17410 + }, + { + "epoch": 0.81, + "learning_rate": 1.7428401235245248e-05, + "loss": 0.0894, + "step": 17415 + }, + { + "epoch": 0.81, + "learning_rate": 1.742761745019046e-05, + "loss": 0.0559, + "step": 17420 + }, + { + "epoch": 0.81, + "learning_rate": 1.7426833665135675e-05, + "loss": 0.1474, + "step": 17425 + }, + { + "epoch": 0.81, + "learning_rate": 1.742604988008089e-05, + "loss": 0.1283, + "step": 17430 + }, + { + "epoch": 0.81, + "learning_rate": 1.74252660950261e-05, + "loss": 0.2015, + "step": 17435 + }, + { + "epoch": 0.81, + "learning_rate": 1.7424482309971317e-05, + "loss": 0.2343, + "step": 17440 + }, + { + "epoch": 0.81, + "learning_rate": 1.7423698524916528e-05, + "loss": 0.2946, + "step": 17445 + }, + { + "epoch": 0.81, + "learning_rate": 1.742291473986174e-05, + "loss": 0.4734, + "step": 17450 + }, + { + "epoch": 0.81, + "learning_rate": 1.7422130954806955e-05, + "loss": 0.1872, + "step": 17455 + }, + { + "epoch": 0.81, + "learning_rate": 1.742134716975217e-05, + "loss": 0.0883, + "step": 17460 + }, + { + "epoch": 0.81, + "learning_rate": 1.7420563384697383e-05, + "loss": 0.1069, + "step": 17465 + }, + { + "epoch": 0.82, + "learning_rate": 1.7419779599642594e-05, + "loss": 0.1773, + "step": 17470 + }, + { + "epoch": 0.82, + "learning_rate": 1.7418995814587808e-05, + "loss": 0.1791, + "step": 17475 + }, + { + "epoch": 0.82, + "learning_rate": 1.741821202953302e-05, + "loss": 0.1533, + "step": 17480 + }, + { + "epoch": 0.82, + "learning_rate": 1.7417428244478235e-05, + "loss": 0.176, + "step": 17485 + }, + { + "epoch": 0.82, + "learning_rate": 1.741664445942345e-05, + "loss": 0.3268, + "step": 17490 + }, + { + "epoch": 0.82, + "learning_rate": 1.7415860674368663e-05, + "loss": 0.2135, + "step": 17495 + }, + { + "epoch": 0.82, + "learning_rate": 1.7415076889313877e-05, + "loss": 0.5947, + "step": 17500 + }, + { + "epoch": 0.82, + "learning_rate": 1.741429310425909e-05, + "loss": 0.2006, + "step": 17505 + }, + { + "epoch": 0.82, + "learning_rate": 1.74135093192043e-05, + "loss": 0.1167, + "step": 17510 + }, + { + "epoch": 0.82, + "learning_rate": 1.7412725534149516e-05, + "loss": 0.1088, + "step": 17515 + }, + { + "epoch": 0.82, + "learning_rate": 1.741194174909473e-05, + "loss": 0.0985, + "step": 17520 + }, + { + "epoch": 0.82, + "learning_rate": 1.7411157964039943e-05, + "loss": 0.1412, + "step": 17525 + }, + { + "epoch": 0.82, + "learning_rate": 1.7410374178985157e-05, + "loss": 0.2124, + "step": 17530 + }, + { + "epoch": 0.82, + "learning_rate": 1.7409590393930368e-05, + "loss": 0.1429, + "step": 17535 + }, + { + "epoch": 0.82, + "learning_rate": 1.7408806608875585e-05, + "loss": 0.2762, + "step": 17540 + }, + { + "epoch": 0.82, + "learning_rate": 1.7408022823820796e-05, + "loss": 0.336, + "step": 17545 + }, + { + "epoch": 0.82, + "learning_rate": 1.740723903876601e-05, + "loss": 0.6298, + "step": 17550 + }, + { + "epoch": 0.82, + "learning_rate": 1.7406455253711223e-05, + "loss": 0.2686, + "step": 17555 + }, + { + "epoch": 0.82, + "learning_rate": 1.7405671468656437e-05, + "loss": 0.1043, + "step": 17560 + }, + { + "epoch": 0.82, + "learning_rate": 1.740488768360165e-05, + "loss": 0.0676, + "step": 17565 + }, + { + "epoch": 0.82, + "learning_rate": 1.7404103898546865e-05, + "loss": 0.1212, + "step": 17570 + }, + { + "epoch": 0.82, + "learning_rate": 1.7403320113492076e-05, + "loss": 0.1228, + "step": 17575 + }, + { + "epoch": 0.82, + "learning_rate": 1.7402536328437293e-05, + "loss": 0.1101, + "step": 17580 + }, + { + "epoch": 0.82, + "learning_rate": 1.7401752543382503e-05, + "loss": 0.1469, + "step": 17585 + }, + { + "epoch": 0.82, + "learning_rate": 1.7400968758327717e-05, + "loss": 0.1787, + "step": 17590 + }, + { + "epoch": 0.82, + "learning_rate": 1.740018497327293e-05, + "loss": 0.2206, + "step": 17595 + }, + { + "epoch": 0.82, + "learning_rate": 1.7399401188218145e-05, + "loss": 0.4916, + "step": 17600 + }, + { + "epoch": 0.82, + "learning_rate": 1.739861740316336e-05, + "loss": 0.208, + "step": 17605 + }, + { + "epoch": 0.82, + "learning_rate": 1.739783361810857e-05, + "loss": 0.0906, + "step": 17610 + }, + { + "epoch": 0.82, + "learning_rate": 1.7397049833053783e-05, + "loss": 0.1041, + "step": 17615 + }, + { + "epoch": 0.82, + "learning_rate": 1.7396266047998997e-05, + "loss": 0.1332, + "step": 17620 + }, + { + "epoch": 0.82, + "learning_rate": 1.739548226294421e-05, + "loss": 0.1337, + "step": 17625 + }, + { + "epoch": 0.82, + "learning_rate": 1.7394698477889425e-05, + "loss": 0.1325, + "step": 17630 + }, + { + "epoch": 0.82, + "learning_rate": 1.739391469283464e-05, + "loss": 0.147, + "step": 17635 + }, + { + "epoch": 0.82, + "learning_rate": 1.7393130907779853e-05, + "loss": 0.2622, + "step": 17640 + }, + { + "epoch": 0.82, + "learning_rate": 1.7392347122725067e-05, + "loss": 0.4564, + "step": 17645 + }, + { + "epoch": 0.82, + "learning_rate": 1.7391563337670277e-05, + "loss": 0.5175, + "step": 17650 + }, + { + "epoch": 0.82, + "learning_rate": 1.7390779552615495e-05, + "loss": 0.1756, + "step": 17655 + }, + { + "epoch": 0.82, + "learning_rate": 1.7389995767560705e-05, + "loss": 0.1188, + "step": 17660 + }, + { + "epoch": 0.82, + "learning_rate": 1.738921198250592e-05, + "loss": 0.0799, + "step": 17665 + }, + { + "epoch": 0.82, + "learning_rate": 1.7388428197451133e-05, + "loss": 0.1077, + "step": 17670 + }, + { + "epoch": 0.82, + "learning_rate": 1.7387644412396344e-05, + "loss": 0.1528, + "step": 17675 + }, + { + "epoch": 0.82, + "learning_rate": 1.738686062734156e-05, + "loss": 0.1655, + "step": 17680 + }, + { + "epoch": 0.83, + "learning_rate": 1.738607684228677e-05, + "loss": 0.1754, + "step": 17685 + }, + { + "epoch": 0.83, + "learning_rate": 1.7385293057231985e-05, + "loss": 0.2688, + "step": 17690 + }, + { + "epoch": 0.83, + "learning_rate": 1.73845092721772e-05, + "loss": 0.2753, + "step": 17695 + }, + { + "epoch": 0.83, + "learning_rate": 1.7383725487122413e-05, + "loss": 0.3675, + "step": 17700 + }, + { + "epoch": 0.83, + "learning_rate": 1.7382941702067627e-05, + "loss": 0.245, + "step": 17705 + }, + { + "epoch": 0.83, + "learning_rate": 1.738215791701284e-05, + "loss": 0.1142, + "step": 17710 + }, + { + "epoch": 0.83, + "learning_rate": 1.738137413195805e-05, + "loss": 0.0702, + "step": 17715 + }, + { + "epoch": 0.83, + "learning_rate": 1.738059034690327e-05, + "loss": 0.1529, + "step": 17720 + }, + { + "epoch": 0.83, + "learning_rate": 1.737980656184848e-05, + "loss": 0.1147, + "step": 17725 + }, + { + "epoch": 0.83, + "learning_rate": 1.7379022776793693e-05, + "loss": 0.1938, + "step": 17730 + }, + { + "epoch": 0.83, + "learning_rate": 1.7378238991738907e-05, + "loss": 0.2224, + "step": 17735 + }, + { + "epoch": 0.83, + "learning_rate": 1.737745520668412e-05, + "loss": 0.1982, + "step": 17740 + }, + { + "epoch": 0.83, + "learning_rate": 1.7376671421629335e-05, + "loss": 0.3572, + "step": 17745 + }, + { + "epoch": 0.83, + "learning_rate": 1.7375887636574545e-05, + "loss": 0.6613, + "step": 17750 + }, + { + "epoch": 0.83, + "learning_rate": 1.7375103851519763e-05, + "loss": 0.2306, + "step": 17755 + }, + { + "epoch": 0.83, + "learning_rate": 1.7374320066464973e-05, + "loss": 0.0619, + "step": 17760 + }, + { + "epoch": 0.83, + "learning_rate": 1.7373536281410187e-05, + "loss": 0.1426, + "step": 17765 + }, + { + "epoch": 0.83, + "learning_rate": 1.73727524963554e-05, + "loss": 0.1614, + "step": 17770 + }, + { + "epoch": 0.83, + "learning_rate": 1.7371968711300615e-05, + "loss": 0.137, + "step": 17775 + }, + { + "epoch": 0.83, + "learning_rate": 1.737118492624583e-05, + "loss": 0.2, + "step": 17780 + }, + { + "epoch": 0.83, + "learning_rate": 1.7370401141191043e-05, + "loss": 0.2528, + "step": 17785 + }, + { + "epoch": 0.83, + "learning_rate": 1.7369617356136253e-05, + "loss": 0.3046, + "step": 17790 + }, + { + "epoch": 0.83, + "learning_rate": 1.7368833571081467e-05, + "loss": 0.3107, + "step": 17795 + }, + { + "epoch": 0.83, + "learning_rate": 1.736804978602668e-05, + "loss": 0.4765, + "step": 17800 + }, + { + "epoch": 0.83, + "learning_rate": 1.7367266000971895e-05, + "loss": 0.2553, + "step": 17805 + }, + { + "epoch": 0.83, + "learning_rate": 1.736648221591711e-05, + "loss": 0.1027, + "step": 17810 + }, + { + "epoch": 0.83, + "learning_rate": 1.7365698430862323e-05, + "loss": 0.1448, + "step": 17815 + }, + { + "epoch": 0.83, + "learning_rate": 1.7364914645807537e-05, + "loss": 0.1179, + "step": 17820 + }, + { + "epoch": 0.83, + "learning_rate": 1.7364130860752747e-05, + "loss": 0.206, + "step": 17825 + }, + { + "epoch": 0.83, + "learning_rate": 1.736334707569796e-05, + "loss": 0.1499, + "step": 17830 + }, + { + "epoch": 0.83, + "learning_rate": 1.7362563290643175e-05, + "loss": 0.2357, + "step": 17835 + }, + { + "epoch": 0.83, + "learning_rate": 1.736177950558839e-05, + "loss": 0.1637, + "step": 17840 + }, + { + "epoch": 0.83, + "learning_rate": 1.7360995720533603e-05, + "loss": 0.2992, + "step": 17845 + }, + { + "epoch": 0.83, + "learning_rate": 1.7360211935478817e-05, + "loss": 0.3762, + "step": 17850 + }, + { + "epoch": 0.83, + "learning_rate": 1.735942815042403e-05, + "loss": 0.225, + "step": 17855 + }, + { + "epoch": 0.83, + "learning_rate": 1.735864436536924e-05, + "loss": 0.0935, + "step": 17860 + }, + { + "epoch": 0.83, + "learning_rate": 1.7357860580314455e-05, + "loss": 0.0893, + "step": 17865 + }, + { + "epoch": 0.83, + "learning_rate": 1.735707679525967e-05, + "loss": 0.1329, + "step": 17870 + }, + { + "epoch": 0.83, + "learning_rate": 1.7356293010204883e-05, + "loss": 0.1481, + "step": 17875 + }, + { + "epoch": 0.83, + "learning_rate": 1.7355509225150097e-05, + "loss": 0.1654, + "step": 17880 + }, + { + "epoch": 0.83, + "learning_rate": 1.735472544009531e-05, + "loss": 0.1354, + "step": 17885 + }, + { + "epoch": 0.83, + "learning_rate": 1.735394165504052e-05, + "loss": 0.3464, + "step": 17890 + }, + { + "epoch": 0.84, + "learning_rate": 1.735315786998574e-05, + "loss": 0.2993, + "step": 17895 + }, + { + "epoch": 0.84, + "learning_rate": 1.735253084194191e-05, + "loss": 0.6743, + "step": 17900 + }, + { + "epoch": 0.84, + "learning_rate": 1.735174705688712e-05, + "loss": 0.1873, + "step": 17905 + }, + { + "epoch": 0.84, + "learning_rate": 1.7350963271832337e-05, + "loss": 0.144, + "step": 17910 + }, + { + "epoch": 0.84, + "learning_rate": 1.7350179486777547e-05, + "loss": 0.1459, + "step": 17915 + }, + { + "epoch": 0.84, + "learning_rate": 1.734939570172276e-05, + "loss": 0.0939, + "step": 17920 + }, + { + "epoch": 0.84, + "learning_rate": 1.7348611916667975e-05, + "loss": 0.1254, + "step": 17925 + }, + { + "epoch": 0.84, + "learning_rate": 1.7347828131613186e-05, + "loss": 0.1482, + "step": 17930 + }, + { + "epoch": 0.84, + "learning_rate": 1.7347044346558403e-05, + "loss": 0.1526, + "step": 17935 + }, + { + "epoch": 0.84, + "learning_rate": 1.7346260561503613e-05, + "loss": 0.2159, + "step": 17940 + }, + { + "epoch": 0.84, + "learning_rate": 1.7345476776448827e-05, + "loss": 0.2293, + "step": 17945 + }, + { + "epoch": 0.84, + "learning_rate": 1.734469299139404e-05, + "loss": 0.3899, + "step": 17950 + }, + { + "epoch": 0.84, + "learning_rate": 1.7343909206339255e-05, + "loss": 0.2237, + "step": 17955 + }, + { + "epoch": 0.84, + "learning_rate": 1.734312542128447e-05, + "loss": 0.0663, + "step": 17960 + }, + { + "epoch": 0.84, + "learning_rate": 1.7342341636229683e-05, + "loss": 0.0931, + "step": 17965 + }, + { + "epoch": 0.84, + "learning_rate": 1.7341557851174893e-05, + "loss": 0.0785, + "step": 17970 + }, + { + "epoch": 0.84, + "learning_rate": 1.734077406612011e-05, + "loss": 0.1971, + "step": 17975 + }, + { + "epoch": 0.84, + "learning_rate": 1.733999028106532e-05, + "loss": 0.2693, + "step": 17980 + }, + { + "epoch": 0.84, + "learning_rate": 1.7339206496010535e-05, + "loss": 0.2542, + "step": 17985 + }, + { + "epoch": 0.84, + "learning_rate": 1.733842271095575e-05, + "loss": 0.1786, + "step": 17990 + }, + { + "epoch": 0.84, + "learning_rate": 1.7337638925900963e-05, + "loss": 0.3282, + "step": 17995 + }, + { + "epoch": 0.84, + "learning_rate": 1.7336855140846177e-05, + "loss": 0.4567, + "step": 18000 + }, + { + "epoch": 0.84, + "learning_rate": 1.7336071355791387e-05, + "loss": 0.3093, + "step": 18005 + }, + { + "epoch": 0.84, + "learning_rate": 1.7335287570736605e-05, + "loss": 0.0859, + "step": 18010 + }, + { + "epoch": 0.84, + "learning_rate": 1.7334503785681815e-05, + "loss": 0.1042, + "step": 18015 + }, + { + "epoch": 0.84, + "learning_rate": 1.733372000062703e-05, + "loss": 0.077, + "step": 18020 + }, + { + "epoch": 0.84, + "learning_rate": 1.7332936215572243e-05, + "loss": 0.148, + "step": 18025 + }, + { + "epoch": 0.84, + "learning_rate": 1.7332152430517457e-05, + "loss": 0.1841, + "step": 18030 + }, + { + "epoch": 0.84, + "learning_rate": 1.733136864546267e-05, + "loss": 0.0989, + "step": 18035 + }, + { + "epoch": 0.84, + "learning_rate": 1.7330584860407885e-05, + "loss": 0.1978, + "step": 18040 + }, + { + "epoch": 0.84, + "learning_rate": 1.7329801075353095e-05, + "loss": 0.3673, + "step": 18045 + }, + { + "epoch": 0.84, + "learning_rate": 1.732901729029831e-05, + "loss": 0.4043, + "step": 18050 + }, + { + "epoch": 0.84, + "learning_rate": 1.7328233505243523e-05, + "loss": 0.2048, + "step": 18055 + }, + { + "epoch": 0.84, + "learning_rate": 1.7327449720188737e-05, + "loss": 0.0706, + "step": 18060 + }, + { + "epoch": 0.84, + "learning_rate": 1.732666593513395e-05, + "loss": 0.1344, + "step": 18065 + }, + { + "epoch": 0.84, + "learning_rate": 1.732588215007916e-05, + "loss": 0.1345, + "step": 18070 + }, + { + "epoch": 0.84, + "learning_rate": 1.732509836502438e-05, + "loss": 0.1103, + "step": 18075 + }, + { + "epoch": 0.84, + "learning_rate": 1.732431457996959e-05, + "loss": 0.1883, + "step": 18080 + }, + { + "epoch": 0.84, + "learning_rate": 1.7323530794914803e-05, + "loss": 0.1763, + "step": 18085 + }, + { + "epoch": 0.84, + "learning_rate": 1.7322747009860017e-05, + "loss": 0.2073, + "step": 18090 + }, + { + "epoch": 0.84, + "learning_rate": 1.732196322480523e-05, + "loss": 0.3624, + "step": 18095 + }, + { + "epoch": 0.84, + "learning_rate": 1.7321179439750445e-05, + "loss": 0.4715, + "step": 18100 + }, + { + "epoch": 0.84, + "learning_rate": 1.732039565469566e-05, + "loss": 0.1813, + "step": 18105 + }, + { + "epoch": 0.85, + "learning_rate": 1.7319611869640873e-05, + "loss": 0.0933, + "step": 18110 + }, + { + "epoch": 0.85, + "learning_rate": 1.7318828084586083e-05, + "loss": 0.1297, + "step": 18115 + }, + { + "epoch": 0.85, + "learning_rate": 1.7318044299531297e-05, + "loss": 0.1176, + "step": 18120 + }, + { + "epoch": 0.85, + "learning_rate": 1.731726051447651e-05, + "loss": 0.1331, + "step": 18125 + }, + { + "epoch": 0.85, + "learning_rate": 1.7316476729421725e-05, + "loss": 0.166, + "step": 18130 + }, + { + "epoch": 0.85, + "learning_rate": 1.731569294436694e-05, + "loss": 0.2521, + "step": 18135 + }, + { + "epoch": 0.85, + "learning_rate": 1.7314909159312153e-05, + "loss": 0.2726, + "step": 18140 + }, + { + "epoch": 0.85, + "learning_rate": 1.7314125374257363e-05, + "loss": 0.3365, + "step": 18145 + }, + { + "epoch": 0.85, + "learning_rate": 1.731334158920258e-05, + "loss": 0.6564, + "step": 18150 + }, + { + "epoch": 0.85, + "learning_rate": 1.731255780414779e-05, + "loss": 0.2698, + "step": 18155 + }, + { + "epoch": 0.85, + "learning_rate": 1.7311774019093005e-05, + "loss": 0.0988, + "step": 18160 + }, + { + "epoch": 0.85, + "learning_rate": 1.731099023403822e-05, + "loss": 0.0748, + "step": 18165 + }, + { + "epoch": 0.85, + "learning_rate": 1.7310206448983433e-05, + "loss": 0.1137, + "step": 18170 + }, + { + "epoch": 0.85, + "learning_rate": 1.7309422663928647e-05, + "loss": 0.1392, + "step": 18175 + }, + { + "epoch": 0.85, + "learning_rate": 1.7308638878873857e-05, + "loss": 0.1052, + "step": 18180 + }, + { + "epoch": 0.85, + "learning_rate": 1.730785509381907e-05, + "loss": 0.1821, + "step": 18185 + }, + { + "epoch": 0.85, + "learning_rate": 1.7307071308764285e-05, + "loss": 0.1918, + "step": 18190 + }, + { + "epoch": 0.85, + "learning_rate": 1.73062875237095e-05, + "loss": 0.3034, + "step": 18195 + }, + { + "epoch": 0.85, + "learning_rate": 1.7305503738654713e-05, + "loss": 0.5586, + "step": 18200 + }, + { + "epoch": 0.85, + "learning_rate": 1.7304719953599927e-05, + "loss": 0.3347, + "step": 18205 + }, + { + "epoch": 0.85, + "learning_rate": 1.730393616854514e-05, + "loss": 0.1197, + "step": 18210 + }, + { + "epoch": 0.85, + "learning_rate": 1.7303152383490355e-05, + "loss": 0.0808, + "step": 18215 + }, + { + "epoch": 0.85, + "learning_rate": 1.7302525355446525e-05, + "loss": 0.1548, + "step": 18220 + }, + { + "epoch": 0.85, + "learning_rate": 1.7301741570391735e-05, + "loss": 0.1336, + "step": 18225 + }, + { + "epoch": 0.85, + "learning_rate": 1.7300957785336953e-05, + "loss": 0.1881, + "step": 18230 + }, + { + "epoch": 0.85, + "learning_rate": 1.7300174000282163e-05, + "loss": 0.1734, + "step": 18235 + }, + { + "epoch": 0.85, + "learning_rate": 1.7299390215227377e-05, + "loss": 0.2273, + "step": 18240 + }, + { + "epoch": 0.85, + "learning_rate": 1.729860643017259e-05, + "loss": 0.3434, + "step": 18245 + }, + { + "epoch": 0.85, + "learning_rate": 1.72978226451178e-05, + "loss": 0.4018, + "step": 18250 + }, + { + "epoch": 0.85, + "learning_rate": 1.729703886006302e-05, + "loss": 0.2318, + "step": 18255 + }, + { + "epoch": 0.85, + "learning_rate": 1.729625507500823e-05, + "loss": 0.0581, + "step": 18260 + }, + { + "epoch": 0.85, + "learning_rate": 1.7295471289953443e-05, + "loss": 0.1156, + "step": 18265 + }, + { + "epoch": 0.85, + "learning_rate": 1.7294687504898657e-05, + "loss": 0.1539, + "step": 18270 + }, + { + "epoch": 0.85, + "learning_rate": 1.729390371984387e-05, + "loss": 0.1465, + "step": 18275 + }, + { + "epoch": 0.85, + "learning_rate": 1.7293119934789085e-05, + "loss": 0.2405, + "step": 18280 + }, + { + "epoch": 0.85, + "learning_rate": 1.72923361497343e-05, + "loss": 0.2068, + "step": 18285 + }, + { + "epoch": 0.85, + "learning_rate": 1.7291552364679513e-05, + "loss": 0.3418, + "step": 18290 + }, + { + "epoch": 0.85, + "learning_rate": 1.7290768579624727e-05, + "loss": 0.3344, + "step": 18295 + }, + { + "epoch": 0.85, + "learning_rate": 1.7289984794569937e-05, + "loss": 0.324, + "step": 18300 + }, + { + "epoch": 0.85, + "learning_rate": 1.7289201009515155e-05, + "loss": 0.2146, + "step": 18305 + }, + { + "epoch": 0.85, + "learning_rate": 1.7288417224460365e-05, + "loss": 0.12, + "step": 18310 + }, + { + "epoch": 0.85, + "learning_rate": 1.728763343940558e-05, + "loss": 0.1088, + "step": 18315 + }, + { + "epoch": 0.85, + "learning_rate": 1.7286849654350793e-05, + "loss": 0.1019, + "step": 18320 + }, + { + "epoch": 0.86, + "learning_rate": 1.7286065869296003e-05, + "loss": 0.1868, + "step": 18325 + }, + { + "epoch": 0.86, + "learning_rate": 1.728528208424122e-05, + "loss": 0.2649, + "step": 18330 + }, + { + "epoch": 0.86, + "learning_rate": 1.728449829918643e-05, + "loss": 0.2723, + "step": 18335 + }, + { + "epoch": 0.86, + "learning_rate": 1.7283714514131645e-05, + "loss": 0.3037, + "step": 18340 + }, + { + "epoch": 0.86, + "learning_rate": 1.728293072907686e-05, + "loss": 0.32, + "step": 18345 + }, + { + "epoch": 0.86, + "learning_rate": 1.7282146944022073e-05, + "loss": 0.6633, + "step": 18350 + }, + { + "epoch": 0.86, + "learning_rate": 1.7281363158967287e-05, + "loss": 0.3423, + "step": 18355 + }, + { + "epoch": 0.86, + "learning_rate": 1.72805793739125e-05, + "loss": 0.0901, + "step": 18360 + }, + { + "epoch": 0.86, + "learning_rate": 1.727979558885771e-05, + "loss": 0.0976, + "step": 18365 + }, + { + "epoch": 0.86, + "learning_rate": 1.727901180380293e-05, + "loss": 0.219, + "step": 18370 + }, + { + "epoch": 0.86, + "learning_rate": 1.727822801874814e-05, + "loss": 0.1546, + "step": 18375 + }, + { + "epoch": 0.86, + "learning_rate": 1.7277444233693353e-05, + "loss": 0.183, + "step": 18380 + }, + { + "epoch": 0.86, + "learning_rate": 1.7276660448638567e-05, + "loss": 0.1376, + "step": 18385 + }, + { + "epoch": 0.86, + "learning_rate": 1.727587666358378e-05, + "loss": 0.2397, + "step": 18390 + }, + { + "epoch": 0.86, + "learning_rate": 1.7275092878528995e-05, + "loss": 0.3504, + "step": 18395 + }, + { + "epoch": 0.86, + "learning_rate": 1.7274309093474205e-05, + "loss": 0.4606, + "step": 18400 + }, + { + "epoch": 0.86, + "learning_rate": 1.7273525308419423e-05, + "loss": 0.1816, + "step": 18405 + }, + { + "epoch": 0.86, + "learning_rate": 1.7272741523364633e-05, + "loss": 0.0837, + "step": 18410 + }, + { + "epoch": 0.86, + "learning_rate": 1.7271957738309847e-05, + "loss": 0.1249, + "step": 18415 + }, + { + "epoch": 0.86, + "learning_rate": 1.727117395325506e-05, + "loss": 0.1805, + "step": 18420 + }, + { + "epoch": 0.86, + "learning_rate": 1.7270390168200275e-05, + "loss": 0.246, + "step": 18425 + }, + { + "epoch": 0.86, + "learning_rate": 1.726960638314549e-05, + "loss": 0.1257, + "step": 18430 + }, + { + "epoch": 0.86, + "learning_rate": 1.7268822598090703e-05, + "loss": 0.1773, + "step": 18435 + }, + { + "epoch": 0.86, + "learning_rate": 1.7268038813035913e-05, + "loss": 0.1636, + "step": 18440 + }, + { + "epoch": 0.86, + "learning_rate": 1.7267255027981127e-05, + "loss": 0.2327, + "step": 18445 + }, + { + "epoch": 0.86, + "learning_rate": 1.726647124292634e-05, + "loss": 0.8099, + "step": 18450 + }, + { + "epoch": 0.86, + "learning_rate": 1.7265687457871555e-05, + "loss": 0.13, + "step": 18455 + }, + { + "epoch": 0.86, + "learning_rate": 1.726490367281677e-05, + "loss": 0.1281, + "step": 18460 + }, + { + "epoch": 0.86, + "learning_rate": 1.726411988776198e-05, + "loss": 0.1082, + "step": 18465 + }, + { + "epoch": 0.86, + "learning_rate": 1.7263336102707197e-05, + "loss": 0.0937, + "step": 18470 + }, + { + "epoch": 0.86, + "learning_rate": 1.7262552317652407e-05, + "loss": 0.1067, + "step": 18475 + }, + { + "epoch": 0.86, + "learning_rate": 1.726176853259762e-05, + "loss": 0.1886, + "step": 18480 + }, + { + "epoch": 0.86, + "learning_rate": 1.7260984747542835e-05, + "loss": 0.1727, + "step": 18485 + }, + { + "epoch": 0.86, + "learning_rate": 1.726020096248805e-05, + "loss": 0.2541, + "step": 18490 + }, + { + "epoch": 0.86, + "learning_rate": 1.7259417177433263e-05, + "loss": 0.3449, + "step": 18495 + }, + { + "epoch": 0.86, + "learning_rate": 1.7258633392378477e-05, + "loss": 0.4496, + "step": 18500 + }, + { + "epoch": 0.86, + "learning_rate": 1.725784960732369e-05, + "loss": 0.2353, + "step": 18505 + }, + { + "epoch": 0.86, + "learning_rate": 1.72570658222689e-05, + "loss": 0.0484, + "step": 18510 + }, + { + "epoch": 0.86, + "learning_rate": 1.7256282037214115e-05, + "loss": 0.0597, + "step": 18515 + }, + { + "epoch": 0.86, + "learning_rate": 1.725549825215933e-05, + "loss": 0.1713, + "step": 18520 + }, + { + "epoch": 0.86, + "learning_rate": 1.7254714467104543e-05, + "loss": 0.1206, + "step": 18525 + }, + { + "epoch": 0.86, + "learning_rate": 1.7253930682049757e-05, + "loss": 0.2763, + "step": 18530 + }, + { + "epoch": 0.86, + "learning_rate": 1.725314689699497e-05, + "loss": 0.1911, + "step": 18535 + }, + { + "epoch": 0.87, + "learning_rate": 1.725236311194018e-05, + "loss": 0.2108, + "step": 18540 + }, + { + "epoch": 0.87, + "learning_rate": 1.72515793268854e-05, + "loss": 0.3533, + "step": 18545 + }, + { + "epoch": 0.87, + "learning_rate": 1.725079554183061e-05, + "loss": 0.3996, + "step": 18550 + }, + { + "epoch": 0.87, + "learning_rate": 1.7250011756775823e-05, + "loss": 0.1907, + "step": 18555 + }, + { + "epoch": 0.87, + "learning_rate": 1.7249227971721037e-05, + "loss": 0.076, + "step": 18560 + }, + { + "epoch": 0.87, + "learning_rate": 1.724844418666625e-05, + "loss": 0.165, + "step": 18565 + }, + { + "epoch": 0.87, + "learning_rate": 1.7247660401611464e-05, + "loss": 0.1086, + "step": 18570 + }, + { + "epoch": 0.87, + "learning_rate": 1.7246876616556675e-05, + "loss": 0.1199, + "step": 18575 + }, + { + "epoch": 0.87, + "learning_rate": 1.724609283150189e-05, + "loss": 0.245, + "step": 18580 + }, + { + "epoch": 0.87, + "learning_rate": 1.7245309046447103e-05, + "loss": 0.1674, + "step": 18585 + }, + { + "epoch": 0.87, + "learning_rate": 1.7244525261392317e-05, + "loss": 0.2862, + "step": 18590 + }, + { + "epoch": 0.87, + "learning_rate": 1.724374147633753e-05, + "loss": 0.4564, + "step": 18595 + }, + { + "epoch": 0.87, + "learning_rate": 1.7242957691282745e-05, + "loss": 0.4542, + "step": 18600 + }, + { + "epoch": 0.87, + "learning_rate": 1.724217390622796e-05, + "loss": 0.2245, + "step": 18605 + }, + { + "epoch": 0.87, + "learning_rate": 1.7241390121173172e-05, + "loss": 0.1992, + "step": 18610 + }, + { + "epoch": 0.87, + "learning_rate": 1.7240606336118383e-05, + "loss": 0.0679, + "step": 18615 + }, + { + "epoch": 0.87, + "learning_rate": 1.72398225510636e-05, + "loss": 0.0884, + "step": 18620 + }, + { + "epoch": 0.87, + "learning_rate": 1.723903876600881e-05, + "loss": 0.1305, + "step": 18625 + }, + { + "epoch": 0.87, + "learning_rate": 1.7238254980954025e-05, + "loss": 0.2016, + "step": 18630 + }, + { + "epoch": 0.87, + "learning_rate": 1.723747119589924e-05, + "loss": 0.196, + "step": 18635 + }, + { + "epoch": 0.87, + "learning_rate": 1.723668741084445e-05, + "loss": 0.1693, + "step": 18640 + }, + { + "epoch": 0.87, + "learning_rate": 1.7235903625789666e-05, + "loss": 0.3803, + "step": 18645 + }, + { + "epoch": 0.87, + "learning_rate": 1.7235119840734877e-05, + "loss": 0.4806, + "step": 18650 + }, + { + "epoch": 0.87, + "learning_rate": 1.723433605568009e-05, + "loss": 0.2328, + "step": 18655 + }, + { + "epoch": 0.87, + "learning_rate": 1.7233552270625305e-05, + "loss": 0.068, + "step": 18660 + }, + { + "epoch": 0.87, + "learning_rate": 1.723276848557052e-05, + "loss": 0.0851, + "step": 18665 + }, + { + "epoch": 0.87, + "learning_rate": 1.7231984700515732e-05, + "loss": 0.1106, + "step": 18670 + }, + { + "epoch": 0.87, + "learning_rate": 1.7231200915460946e-05, + "loss": 0.1496, + "step": 18675 + }, + { + "epoch": 0.87, + "learning_rate": 1.7230417130406157e-05, + "loss": 0.1601, + "step": 18680 + }, + { + "epoch": 0.87, + "learning_rate": 1.7229633345351374e-05, + "loss": 0.2402, + "step": 18685 + }, + { + "epoch": 0.87, + "learning_rate": 1.7228849560296585e-05, + "loss": 0.255, + "step": 18690 + }, + { + "epoch": 0.87, + "learning_rate": 1.72280657752418e-05, + "loss": 0.2822, + "step": 18695 + }, + { + "epoch": 0.87, + "learning_rate": 1.7227281990187012e-05, + "loss": 0.5758, + "step": 18700 + }, + { + "epoch": 0.87, + "learning_rate": 1.7226498205132226e-05, + "loss": 0.1843, + "step": 18705 + }, + { + "epoch": 0.87, + "learning_rate": 1.722571442007744e-05, + "loss": 0.0946, + "step": 18710 + }, + { + "epoch": 0.87, + "learning_rate": 1.722493063502265e-05, + "loss": 0.0383, + "step": 18715 + }, + { + "epoch": 0.87, + "learning_rate": 1.7224146849967868e-05, + "loss": 0.1229, + "step": 18720 + }, + { + "epoch": 0.87, + "learning_rate": 1.722336306491308e-05, + "loss": 0.2175, + "step": 18725 + }, + { + "epoch": 0.87, + "learning_rate": 1.7222579279858293e-05, + "loss": 0.1291, + "step": 18730 + }, + { + "epoch": 0.87, + "learning_rate": 1.7221795494803506e-05, + "loss": 0.1822, + "step": 18735 + }, + { + "epoch": 0.87, + "learning_rate": 1.722101170974872e-05, + "loss": 0.2656, + "step": 18740 + }, + { + "epoch": 0.87, + "learning_rate": 1.7220227924693934e-05, + "loss": 0.2236, + "step": 18745 + }, + { + "epoch": 0.87, + "learning_rate": 1.7219444139639148e-05, + "loss": 0.4038, + "step": 18750 + }, + { + "epoch": 0.88, + "learning_rate": 1.721866035458436e-05, + "loss": 0.1978, + "step": 18755 + }, + { + "epoch": 0.88, + "learning_rate": 1.7217876569529576e-05, + "loss": 0.0921, + "step": 18760 + }, + { + "epoch": 0.88, + "learning_rate": 1.7217092784474786e-05, + "loss": 0.151, + "step": 18765 + }, + { + "epoch": 0.88, + "learning_rate": 1.721630899942e-05, + "loss": 0.1317, + "step": 18770 + }, + { + "epoch": 0.88, + "learning_rate": 1.7215525214365214e-05, + "loss": 0.1417, + "step": 18775 + }, + { + "epoch": 0.88, + "learning_rate": 1.7214741429310425e-05, + "loss": 0.1506, + "step": 18780 + }, + { + "epoch": 0.88, + "learning_rate": 1.7213957644255642e-05, + "loss": 0.1963, + "step": 18785 + }, + { + "epoch": 0.88, + "learning_rate": 1.7213173859200853e-05, + "loss": 0.2705, + "step": 18790 + }, + { + "epoch": 0.88, + "learning_rate": 1.7212390074146067e-05, + "loss": 0.2663, + "step": 18795 + }, + { + "epoch": 0.88, + "learning_rate": 1.721160628909128e-05, + "loss": 0.5767, + "step": 18800 + }, + { + "epoch": 0.88, + "learning_rate": 1.7210822504036494e-05, + "loss": 0.1702, + "step": 18805 + }, + { + "epoch": 0.88, + "learning_rate": 1.7210038718981708e-05, + "loss": 0.0625, + "step": 18810 + }, + { + "epoch": 0.88, + "learning_rate": 1.7209254933926922e-05, + "loss": 0.1013, + "step": 18815 + }, + { + "epoch": 0.88, + "learning_rate": 1.7208471148872136e-05, + "loss": 0.132, + "step": 18820 + }, + { + "epoch": 0.88, + "learning_rate": 1.720768736381735e-05, + "loss": 0.1107, + "step": 18825 + }, + { + "epoch": 0.88, + "learning_rate": 1.720690357876256e-05, + "loss": 0.1782, + "step": 18830 + }, + { + "epoch": 0.88, + "learning_rate": 1.7206119793707774e-05, + "loss": 0.213, + "step": 18835 + }, + { + "epoch": 0.88, + "learning_rate": 1.7205336008652988e-05, + "loss": 0.3126, + "step": 18840 + }, + { + "epoch": 0.88, + "learning_rate": 1.7204552223598202e-05, + "loss": 0.3847, + "step": 18845 + }, + { + "epoch": 0.88, + "learning_rate": 1.7203768438543416e-05, + "loss": 0.5436, + "step": 18850 + }, + { + "epoch": 0.88, + "learning_rate": 1.7202984653488627e-05, + "loss": 0.1875, + "step": 18855 + }, + { + "epoch": 0.88, + "learning_rate": 1.7202200868433844e-05, + "loss": 0.0937, + "step": 18860 + }, + { + "epoch": 0.88, + "learning_rate": 1.7201417083379054e-05, + "loss": 0.1355, + "step": 18865 + }, + { + "epoch": 0.88, + "learning_rate": 1.720063329832427e-05, + "loss": 0.1417, + "step": 18870 + }, + { + "epoch": 0.88, + "learning_rate": 1.7199849513269482e-05, + "loss": 0.1382, + "step": 18875 + }, + { + "epoch": 0.88, + "learning_rate": 1.7199065728214696e-05, + "loss": 0.2395, + "step": 18880 + }, + { + "epoch": 0.88, + "learning_rate": 1.719828194315991e-05, + "loss": 0.2275, + "step": 18885 + }, + { + "epoch": 0.88, + "learning_rate": 1.7197498158105124e-05, + "loss": 0.2511, + "step": 18890 + }, + { + "epoch": 0.88, + "learning_rate": 1.7196714373050334e-05, + "loss": 0.3154, + "step": 18895 + }, + { + "epoch": 0.88, + "learning_rate": 1.719593058799555e-05, + "loss": 0.5231, + "step": 18900 + }, + { + "epoch": 0.88, + "learning_rate": 1.7195146802940762e-05, + "loss": 0.1573, + "step": 18905 + }, + { + "epoch": 0.88, + "learning_rate": 1.7194363017885976e-05, + "loss": 0.0808, + "step": 18910 + }, + { + "epoch": 0.88, + "learning_rate": 1.719357923283119e-05, + "loss": 0.0893, + "step": 18915 + }, + { + "epoch": 0.88, + "learning_rate": 1.7192795447776404e-05, + "loss": 0.1562, + "step": 18920 + }, + { + "epoch": 0.88, + "learning_rate": 1.7192011662721618e-05, + "loss": 0.1937, + "step": 18925 + }, + { + "epoch": 0.88, + "learning_rate": 1.719122787766683e-05, + "loss": 0.1925, + "step": 18930 + }, + { + "epoch": 0.88, + "learning_rate": 1.7190444092612046e-05, + "loss": 0.2791, + "step": 18935 + }, + { + "epoch": 0.88, + "learning_rate": 1.7189660307557256e-05, + "loss": 0.3337, + "step": 18940 + }, + { + "epoch": 0.88, + "learning_rate": 1.718887652250247e-05, + "loss": 0.3198, + "step": 18945 + }, + { + "epoch": 0.88, + "learning_rate": 1.7188092737447684e-05, + "loss": 0.5088, + "step": 18950 + }, + { + "epoch": 0.88, + "learning_rate": 1.7187308952392898e-05, + "loss": 0.1709, + "step": 18955 + }, + { + "epoch": 0.88, + "learning_rate": 1.7186525167338112e-05, + "loss": 0.1017, + "step": 18960 + }, + { + "epoch": 0.88, + "learning_rate": 1.7185741382283322e-05, + "loss": 0.1423, + "step": 18965 + }, + { + "epoch": 0.89, + "learning_rate": 1.7184957597228536e-05, + "loss": 0.1257, + "step": 18970 + }, + { + "epoch": 0.89, + "learning_rate": 1.718417381217375e-05, + "loss": 0.1645, + "step": 18975 + }, + { + "epoch": 0.89, + "learning_rate": 1.7183390027118964e-05, + "loss": 0.1752, + "step": 18980 + }, + { + "epoch": 0.89, + "learning_rate": 1.7182606242064178e-05, + "loss": 0.1956, + "step": 18985 + }, + { + "epoch": 0.89, + "learning_rate": 1.7181822457009392e-05, + "loss": 0.2104, + "step": 18990 + }, + { + "epoch": 0.89, + "learning_rate": 1.7181038671954602e-05, + "loss": 0.2864, + "step": 18995 + }, + { + "epoch": 0.89, + "learning_rate": 1.718025488689982e-05, + "loss": 0.5584, + "step": 19000 + }, + { + "epoch": 0.89, + "learning_rate": 1.717947110184503e-05, + "loss": 0.2235, + "step": 19005 + }, + { + "epoch": 0.89, + "learning_rate": 1.7178687316790244e-05, + "loss": 0.0822, + "step": 19010 + }, + { + "epoch": 0.89, + "learning_rate": 1.7177903531735458e-05, + "loss": 0.1069, + "step": 19015 + }, + { + "epoch": 0.89, + "learning_rate": 1.7177119746680672e-05, + "loss": 0.1213, + "step": 19020 + }, + { + "epoch": 0.89, + "learning_rate": 1.7176335961625886e-05, + "loss": 0.2554, + "step": 19025 + }, + { + "epoch": 0.89, + "learning_rate": 1.7175552176571096e-05, + "loss": 0.1368, + "step": 19030 + }, + { + "epoch": 0.89, + "learning_rate": 1.7174768391516314e-05, + "loss": 0.1717, + "step": 19035 + }, + { + "epoch": 0.89, + "learning_rate": 1.7173984606461524e-05, + "loss": 0.2277, + "step": 19040 + }, + { + "epoch": 0.89, + "learning_rate": 1.7173200821406738e-05, + "loss": 0.2489, + "step": 19045 + }, + { + "epoch": 0.89, + "learning_rate": 1.7172417036351952e-05, + "loss": 0.5192, + "step": 19050 + }, + { + "epoch": 0.89, + "learning_rate": 1.7171633251297166e-05, + "loss": 0.1059, + "step": 19055 + }, + { + "epoch": 0.89, + "learning_rate": 1.717084946624238e-05, + "loss": 0.1134, + "step": 19060 + }, + { + "epoch": 0.89, + "learning_rate": 1.7170065681187594e-05, + "loss": 0.1005, + "step": 19065 + }, + { + "epoch": 0.89, + "learning_rate": 1.7169281896132804e-05, + "loss": 0.079, + "step": 19070 + }, + { + "epoch": 0.89, + "learning_rate": 1.716849811107802e-05, + "loss": 0.1957, + "step": 19075 + }, + { + "epoch": 0.89, + "learning_rate": 1.7167714326023232e-05, + "loss": 0.1822, + "step": 19080 + }, + { + "epoch": 0.89, + "learning_rate": 1.7166930540968446e-05, + "loss": 0.1535, + "step": 19085 + }, + { + "epoch": 0.89, + "learning_rate": 1.716614675591366e-05, + "loss": 0.2625, + "step": 19090 + }, + { + "epoch": 0.89, + "learning_rate": 1.716536297085887e-05, + "loss": 0.3252, + "step": 19095 + }, + { + "epoch": 0.89, + "learning_rate": 1.7164579185804088e-05, + "loss": 0.486, + "step": 19100 + }, + { + "epoch": 0.89, + "learning_rate": 1.7163795400749298e-05, + "loss": 0.1903, + "step": 19105 + }, + { + "epoch": 0.89, + "learning_rate": 1.7163011615694512e-05, + "loss": 0.0703, + "step": 19110 + }, + { + "epoch": 0.89, + "learning_rate": 1.7162227830639726e-05, + "loss": 0.1731, + "step": 19115 + }, + { + "epoch": 0.89, + "learning_rate": 1.716144404558494e-05, + "loss": 0.0951, + "step": 19120 + }, + { + "epoch": 0.89, + "learning_rate": 1.7160660260530154e-05, + "loss": 0.1422, + "step": 19125 + }, + { + "epoch": 0.89, + "learning_rate": 1.7159876475475368e-05, + "loss": 0.1888, + "step": 19130 + }, + { + "epoch": 0.89, + "learning_rate": 1.715909269042058e-05, + "loss": 0.1529, + "step": 19135 + }, + { + "epoch": 0.89, + "learning_rate": 1.7158308905365796e-05, + "loss": 0.1551, + "step": 19140 + }, + { + "epoch": 0.89, + "learning_rate": 1.7157525120311006e-05, + "loss": 0.2719, + "step": 19145 + }, + { + "epoch": 0.89, + "learning_rate": 1.7156741335256223e-05, + "loss": 0.5324, + "step": 19150 + }, + { + "epoch": 0.89, + "learning_rate": 1.7155957550201434e-05, + "loss": 0.2271, + "step": 19155 + }, + { + "epoch": 0.89, + "learning_rate": 1.7155173765146648e-05, + "loss": 0.1467, + "step": 19160 + }, + { + "epoch": 0.89, + "learning_rate": 1.715438998009186e-05, + "loss": 0.0991, + "step": 19165 + }, + { + "epoch": 0.89, + "learning_rate": 1.7153606195037072e-05, + "loss": 0.1359, + "step": 19170 + }, + { + "epoch": 0.89, + "learning_rate": 1.715282240998229e-05, + "loss": 0.1051, + "step": 19175 + }, + { + "epoch": 0.89, + "learning_rate": 1.71520386249275e-05, + "loss": 0.2102, + "step": 19180 + }, + { + "epoch": 0.9, + "learning_rate": 1.7151254839872714e-05, + "loss": 0.2189, + "step": 19185 + }, + { + "epoch": 0.9, + "learning_rate": 1.7150471054817928e-05, + "loss": 0.2397, + "step": 19190 + }, + { + "epoch": 0.9, + "learning_rate": 1.7149687269763142e-05, + "loss": 0.2792, + "step": 19195 + }, + { + "epoch": 0.9, + "learning_rate": 1.7148903484708356e-05, + "loss": 0.4688, + "step": 19200 + }, + { + "epoch": 0.9, + "learning_rate": 1.714811969965357e-05, + "loss": 0.2494, + "step": 19205 + }, + { + "epoch": 0.9, + "learning_rate": 1.714733591459878e-05, + "loss": 0.1013, + "step": 19210 + }, + { + "epoch": 0.9, + "learning_rate": 1.7146552129543997e-05, + "loss": 0.0738, + "step": 19215 + }, + { + "epoch": 0.9, + "learning_rate": 1.7145768344489208e-05, + "loss": 0.1392, + "step": 19220 + }, + { + "epoch": 0.9, + "learning_rate": 1.7144984559434422e-05, + "loss": 0.132, + "step": 19225 + }, + { + "epoch": 0.9, + "learning_rate": 1.7144200774379636e-05, + "loss": 0.1433, + "step": 19230 + }, + { + "epoch": 0.9, + "learning_rate": 1.714341698932485e-05, + "loss": 0.2131, + "step": 19235 + }, + { + "epoch": 0.9, + "learning_rate": 1.7142633204270063e-05, + "loss": 0.237, + "step": 19240 + }, + { + "epoch": 0.9, + "learning_rate": 1.7142006176226234e-05, + "loss": 0.3774, + "step": 19245 + }, + { + "epoch": 0.9, + "learning_rate": 1.7141222391171444e-05, + "loss": 0.4308, + "step": 19250 + }, + { + "epoch": 0.9, + "learning_rate": 1.7140438606116662e-05, + "loss": 0.2426, + "step": 19255 + }, + { + "epoch": 0.9, + "learning_rate": 1.7139654821061872e-05, + "loss": 0.1149, + "step": 19260 + }, + { + "epoch": 0.9, + "learning_rate": 1.7138871036007086e-05, + "loss": 0.0679, + "step": 19265 + }, + { + "epoch": 0.9, + "learning_rate": 1.71380872509523e-05, + "loss": 0.0922, + "step": 19270 + }, + { + "epoch": 0.9, + "learning_rate": 1.7137303465897514e-05, + "loss": 0.1522, + "step": 19275 + }, + { + "epoch": 0.9, + "learning_rate": 1.7136519680842728e-05, + "loss": 0.1422, + "step": 19280 + }, + { + "epoch": 0.9, + "learning_rate": 1.7135735895787942e-05, + "loss": 0.1224, + "step": 19285 + }, + { + "epoch": 0.9, + "learning_rate": 1.7134952110733156e-05, + "loss": 0.27, + "step": 19290 + }, + { + "epoch": 0.9, + "learning_rate": 1.7134168325678366e-05, + "loss": 0.3725, + "step": 19295 + }, + { + "epoch": 0.9, + "learning_rate": 1.713338454062358e-05, + "loss": 0.3632, + "step": 19300 + }, + { + "epoch": 0.9, + "learning_rate": 1.7132600755568794e-05, + "loss": 0.1988, + "step": 19305 + }, + { + "epoch": 0.9, + "learning_rate": 1.7131816970514008e-05, + "loss": 0.0415, + "step": 19310 + }, + { + "epoch": 0.9, + "learning_rate": 1.7131033185459222e-05, + "loss": 0.0715, + "step": 19315 + }, + { + "epoch": 0.9, + "learning_rate": 1.7130249400404436e-05, + "loss": 0.1044, + "step": 19320 + }, + { + "epoch": 0.9, + "learning_rate": 1.7129465615349646e-05, + "loss": 0.2214, + "step": 19325 + }, + { + "epoch": 0.9, + "learning_rate": 1.7128681830294864e-05, + "loss": 0.4414, + "step": 19330 + }, + { + "epoch": 0.9, + "learning_rate": 1.7127898045240074e-05, + "loss": 0.2215, + "step": 19335 + }, + { + "epoch": 0.9, + "learning_rate": 1.7127114260185288e-05, + "loss": 0.2416, + "step": 19340 + }, + { + "epoch": 0.9, + "learning_rate": 1.7126330475130502e-05, + "loss": 0.2983, + "step": 19345 + }, + { + "epoch": 0.9, + "learning_rate": 1.7125546690075716e-05, + "loss": 0.6556, + "step": 19350 + }, + { + "epoch": 0.9, + "learning_rate": 1.712476290502093e-05, + "loss": 0.1771, + "step": 19355 + }, + { + "epoch": 0.9, + "learning_rate": 1.712397911996614e-05, + "loss": 0.1033, + "step": 19360 + }, + { + "epoch": 0.9, + "learning_rate": 1.7123195334911354e-05, + "loss": 0.0999, + "step": 19365 + }, + { + "epoch": 0.9, + "learning_rate": 1.7122411549856568e-05, + "loss": 0.1314, + "step": 19370 + }, + { + "epoch": 0.9, + "learning_rate": 1.7121627764801782e-05, + "loss": 0.1199, + "step": 19375 + }, + { + "epoch": 0.9, + "learning_rate": 1.7120843979746996e-05, + "loss": 0.237, + "step": 19380 + }, + { + "epoch": 0.9, + "learning_rate": 1.712006019469221e-05, + "loss": 0.2092, + "step": 19385 + }, + { + "epoch": 0.9, + "learning_rate": 1.7119276409637424e-05, + "loss": 0.2627, + "step": 19390 + }, + { + "epoch": 0.9, + "learning_rate": 1.7118492624582638e-05, + "loss": 0.3178, + "step": 19395 + }, + { + "epoch": 0.91, + "learning_rate": 1.7117708839527848e-05, + "loss": 0.5211, + "step": 19400 + }, + { + "epoch": 0.91, + "learning_rate": 1.7116925054473062e-05, + "loss": 0.2629, + "step": 19405 + }, + { + "epoch": 0.91, + "learning_rate": 1.7116141269418276e-05, + "loss": 0.0863, + "step": 19410 + }, + { + "epoch": 0.91, + "learning_rate": 1.711535748436349e-05, + "loss": 0.1249, + "step": 19415 + }, + { + "epoch": 0.91, + "learning_rate": 1.7114573699308704e-05, + "loss": 0.1279, + "step": 19420 + }, + { + "epoch": 0.91, + "learning_rate": 1.7113789914253914e-05, + "loss": 0.0951, + "step": 19425 + }, + { + "epoch": 0.91, + "learning_rate": 1.711300612919913e-05, + "loss": 0.1773, + "step": 19430 + }, + { + "epoch": 0.91, + "learning_rate": 1.7112222344144342e-05, + "loss": 0.169, + "step": 19435 + }, + { + "epoch": 0.91, + "learning_rate": 1.7111438559089556e-05, + "loss": 0.1928, + "step": 19440 + }, + { + "epoch": 0.91, + "learning_rate": 1.711065477403477e-05, + "loss": 0.3014, + "step": 19445 + }, + { + "epoch": 0.91, + "learning_rate": 1.7109870988979984e-05, + "loss": 0.5065, + "step": 19450 + }, + { + "epoch": 0.91, + "learning_rate": 1.7109087203925198e-05, + "loss": 0.127, + "step": 19455 + }, + { + "epoch": 0.91, + "learning_rate": 1.710830341887041e-05, + "loss": 0.0899, + "step": 19460 + }, + { + "epoch": 0.91, + "learning_rate": 1.7107519633815622e-05, + "loss": 0.0991, + "step": 19465 + }, + { + "epoch": 0.91, + "learning_rate": 1.710673584876084e-05, + "loss": 0.0852, + "step": 19470 + }, + { + "epoch": 0.91, + "learning_rate": 1.710595206370605e-05, + "loss": 0.1273, + "step": 19475 + }, + { + "epoch": 0.91, + "learning_rate": 1.7105168278651264e-05, + "loss": 0.2085, + "step": 19480 + }, + { + "epoch": 0.91, + "learning_rate": 1.7104384493596478e-05, + "loss": 0.2701, + "step": 19485 + }, + { + "epoch": 0.91, + "learning_rate": 1.710360070854169e-05, + "loss": 0.214, + "step": 19490 + }, + { + "epoch": 0.91, + "learning_rate": 1.7102816923486906e-05, + "loss": 0.3061, + "step": 19495 + }, + { + "epoch": 0.91, + "learning_rate": 1.7102033138432116e-05, + "loss": 0.4987, + "step": 19500 + }, + { + "epoch": 0.91, + "learning_rate": 1.710124935337733e-05, + "loss": 0.2229, + "step": 19505 + }, + { + "epoch": 0.91, + "learning_rate": 1.7100465568322544e-05, + "loss": 0.073, + "step": 19510 + }, + { + "epoch": 0.91, + "learning_rate": 1.7099681783267758e-05, + "loss": 0.0574, + "step": 19515 + }, + { + "epoch": 0.91, + "learning_rate": 1.709889799821297e-05, + "loss": 0.0993, + "step": 19520 + }, + { + "epoch": 0.91, + "learning_rate": 1.7098114213158186e-05, + "loss": 0.1771, + "step": 19525 + }, + { + "epoch": 0.91, + "learning_rate": 1.70973304281034e-05, + "loss": 0.0893, + "step": 19530 + }, + { + "epoch": 0.91, + "learning_rate": 1.7096546643048613e-05, + "loss": 0.1401, + "step": 19535 + }, + { + "epoch": 0.91, + "learning_rate": 1.7095762857993824e-05, + "loss": 0.2401, + "step": 19540 + }, + { + "epoch": 0.91, + "learning_rate": 1.709497907293904e-05, + "loss": 0.2492, + "step": 19545 + }, + { + "epoch": 0.91, + "learning_rate": 1.709419528788425e-05, + "loss": 0.4506, + "step": 19550 + }, + { + "epoch": 0.91, + "learning_rate": 1.7093411502829466e-05, + "loss": 0.1444, + "step": 19555 + }, + { + "epoch": 0.91, + "learning_rate": 1.709262771777468e-05, + "loss": 0.0706, + "step": 19560 + }, + { + "epoch": 0.91, + "learning_rate": 1.709184393271989e-05, + "loss": 0.0977, + "step": 19565 + }, + { + "epoch": 0.91, + "learning_rate": 1.7091060147665107e-05, + "loss": 0.1293, + "step": 19570 + }, + { + "epoch": 0.91, + "learning_rate": 1.7090276362610318e-05, + "loss": 0.1469, + "step": 19575 + }, + { + "epoch": 0.91, + "learning_rate": 1.7089492577555532e-05, + "loss": 0.1896, + "step": 19580 + }, + { + "epoch": 0.91, + "learning_rate": 1.7088708792500746e-05, + "loss": 0.1649, + "step": 19585 + }, + { + "epoch": 0.91, + "learning_rate": 1.708792500744596e-05, + "loss": 0.2219, + "step": 19590 + }, + { + "epoch": 0.91, + "learning_rate": 1.7087141222391173e-05, + "loss": 0.2975, + "step": 19595 + }, + { + "epoch": 0.91, + "learning_rate": 1.7086357437336387e-05, + "loss": 0.7063, + "step": 19600 + }, + { + "epoch": 0.91, + "learning_rate": 1.70855736522816e-05, + "loss": 0.1183, + "step": 19605 + }, + { + "epoch": 0.92, + "learning_rate": 1.7084789867226815e-05, + "loss": 0.0876, + "step": 19610 + }, + { + "epoch": 0.92, + "learning_rate": 1.7084006082172026e-05, + "loss": 0.3809, + "step": 19615 + }, + { + "epoch": 0.92, + "learning_rate": 1.708322229711724e-05, + "loss": 0.1294, + "step": 19620 + }, + { + "epoch": 0.92, + "learning_rate": 1.7082438512062454e-05, + "loss": 0.1697, + "step": 19625 + }, + { + "epoch": 0.92, + "learning_rate": 1.7081654727007667e-05, + "loss": 0.224, + "step": 19630 + }, + { + "epoch": 0.92, + "learning_rate": 1.708087094195288e-05, + "loss": 0.1926, + "step": 19635 + }, + { + "epoch": 0.92, + "learning_rate": 1.7080087156898092e-05, + "loss": 0.295, + "step": 19640 + }, + { + "epoch": 0.92, + "learning_rate": 1.707930337184331e-05, + "loss": 0.3733, + "step": 19645 + }, + { + "epoch": 0.92, + "learning_rate": 1.707851958678852e-05, + "loss": 0.4389, + "step": 19650 + }, + { + "epoch": 0.92, + "learning_rate": 1.7077735801733734e-05, + "loss": 0.1884, + "step": 19655 + }, + { + "epoch": 0.92, + "learning_rate": 1.7076952016678947e-05, + "loss": 0.0577, + "step": 19660 + }, + { + "epoch": 0.92, + "learning_rate": 1.707616823162416e-05, + "loss": 0.0889, + "step": 19665 + }, + { + "epoch": 0.92, + "learning_rate": 1.7075384446569375e-05, + "loss": 0.0989, + "step": 19670 + }, + { + "epoch": 0.92, + "learning_rate": 1.707460066151459e-05, + "loss": 0.1449, + "step": 19675 + }, + { + "epoch": 0.92, + "learning_rate": 1.70738168764598e-05, + "loss": 0.1931, + "step": 19680 + }, + { + "epoch": 0.92, + "learning_rate": 1.7073033091405014e-05, + "loss": 0.1717, + "step": 19685 + }, + { + "epoch": 0.92, + "learning_rate": 1.7072249306350228e-05, + "loss": 0.2924, + "step": 19690 + }, + { + "epoch": 0.92, + "learning_rate": 1.707146552129544e-05, + "loss": 0.2265, + "step": 19695 + }, + { + "epoch": 0.92, + "learning_rate": 1.7070681736240655e-05, + "loss": 0.3034, + "step": 19700 + }, + { + "epoch": 0.92, + "learning_rate": 1.706989795118587e-05, + "loss": 0.2481, + "step": 19705 + }, + { + "epoch": 0.92, + "learning_rate": 1.7069114166131083e-05, + "loss": 0.0638, + "step": 19710 + }, + { + "epoch": 0.92, + "learning_rate": 1.7068330381076294e-05, + "loss": 0.0919, + "step": 19715 + }, + { + "epoch": 0.92, + "learning_rate": 1.7067546596021508e-05, + "loss": 0.095, + "step": 19720 + }, + { + "epoch": 0.92, + "learning_rate": 1.706676281096672e-05, + "loss": 0.1459, + "step": 19725 + }, + { + "epoch": 0.92, + "learning_rate": 1.7065979025911935e-05, + "loss": 0.1144, + "step": 19730 + }, + { + "epoch": 0.92, + "learning_rate": 1.706519524085715e-05, + "loss": 0.2965, + "step": 19735 + }, + { + "epoch": 0.92, + "learning_rate": 1.7064411455802363e-05, + "loss": 0.18, + "step": 19740 + }, + { + "epoch": 0.92, + "learning_rate": 1.7063627670747577e-05, + "loss": 0.4294, + "step": 19745 + }, + { + "epoch": 0.92, + "learning_rate": 1.7062843885692788e-05, + "loss": 0.7633, + "step": 19750 + }, + { + "epoch": 0.92, + "learning_rate": 1.7062060100638e-05, + "loss": 0.1876, + "step": 19755 + }, + { + "epoch": 0.92, + "learning_rate": 1.7061276315583215e-05, + "loss": 0.1402, + "step": 19760 + }, + { + "epoch": 0.92, + "learning_rate": 1.706049253052843e-05, + "loss": 0.0744, + "step": 19765 + }, + { + "epoch": 0.92, + "learning_rate": 1.7059708745473643e-05, + "loss": 0.1534, + "step": 19770 + }, + { + "epoch": 0.92, + "learning_rate": 1.7058924960418857e-05, + "loss": 0.105, + "step": 19775 + }, + { + "epoch": 0.92, + "learning_rate": 1.7058141175364068e-05, + "loss": 0.2081, + "step": 19780 + }, + { + "epoch": 0.92, + "learning_rate": 1.7057357390309285e-05, + "loss": 0.1488, + "step": 19785 + }, + { + "epoch": 0.92, + "learning_rate": 1.7056573605254495e-05, + "loss": 0.2616, + "step": 19790 + }, + { + "epoch": 0.92, + "learning_rate": 1.705578982019971e-05, + "loss": 0.2662, + "step": 19795 + }, + { + "epoch": 0.92, + "learning_rate": 1.7055006035144923e-05, + "loss": 0.3883, + "step": 19800 + }, + { + "epoch": 0.92, + "learning_rate": 1.7054222250090137e-05, + "loss": 0.1609, + "step": 19805 + }, + { + "epoch": 0.92, + "learning_rate": 1.705343846503535e-05, + "loss": 0.0799, + "step": 19810 + }, + { + "epoch": 0.92, + "learning_rate": 1.705265467998056e-05, + "loss": 0.087, + "step": 19815 + }, + { + "epoch": 0.92, + "learning_rate": 1.7051870894925775e-05, + "loss": 0.194, + "step": 19820 + }, + { + "epoch": 0.93, + "learning_rate": 1.705108710987099e-05, + "loss": 0.1103, + "step": 19825 + }, + { + "epoch": 0.93, + "learning_rate": 1.7050303324816203e-05, + "loss": 0.1893, + "step": 19830 + }, + { + "epoch": 0.93, + "learning_rate": 1.7049519539761417e-05, + "loss": 0.2444, + "step": 19835 + }, + { + "epoch": 0.93, + "learning_rate": 1.704873575470663e-05, + "loss": 0.2429, + "step": 19840 + }, + { + "epoch": 0.93, + "learning_rate": 1.7047951969651845e-05, + "loss": 0.3144, + "step": 19845 + }, + { + "epoch": 0.93, + "learning_rate": 1.704716818459706e-05, + "loss": 0.4834, + "step": 19850 + }, + { + "epoch": 0.93, + "learning_rate": 1.704638439954227e-05, + "loss": 0.1951, + "step": 19855 + }, + { + "epoch": 0.93, + "learning_rate": 1.7045600614487487e-05, + "loss": 0.0849, + "step": 19860 + }, + { + "epoch": 0.93, + "learning_rate": 1.7044816829432697e-05, + "loss": 0.1384, + "step": 19865 + }, + { + "epoch": 0.93, + "learning_rate": 1.704403304437791e-05, + "loss": 0.1505, + "step": 19870 + }, + { + "epoch": 0.93, + "learning_rate": 1.7043249259323125e-05, + "loss": 0.154, + "step": 19875 + }, + { + "epoch": 0.93, + "learning_rate": 1.7042465474268336e-05, + "loss": 0.2354, + "step": 19880 + }, + { + "epoch": 0.93, + "learning_rate": 1.7041681689213553e-05, + "loss": 0.2192, + "step": 19885 + }, + { + "epoch": 0.93, + "learning_rate": 1.7040897904158763e-05, + "loss": 0.2718, + "step": 19890 + }, + { + "epoch": 0.93, + "learning_rate": 1.7040114119103977e-05, + "loss": 0.3056, + "step": 19895 + }, + { + "epoch": 0.93, + "learning_rate": 1.703933033404919e-05, + "loss": 0.4475, + "step": 19900 + }, + { + "epoch": 0.93, + "learning_rate": 1.7038546548994405e-05, + "loss": 0.2562, + "step": 19905 + }, + { + "epoch": 0.93, + "learning_rate": 1.703776276393962e-05, + "loss": 0.0562, + "step": 19910 + }, + { + "epoch": 0.93, + "learning_rate": 1.7036978978884833e-05, + "loss": 0.1212, + "step": 19915 + }, + { + "epoch": 0.93, + "learning_rate": 1.7036195193830047e-05, + "loss": 0.0697, + "step": 19920 + }, + { + "epoch": 0.93, + "learning_rate": 1.703541140877526e-05, + "loss": 0.1794, + "step": 19925 + }, + { + "epoch": 0.93, + "learning_rate": 1.703462762372047e-05, + "loss": 0.1976, + "step": 19930 + }, + { + "epoch": 0.93, + "learning_rate": 1.7033843838665685e-05, + "loss": 0.185, + "step": 19935 + }, + { + "epoch": 0.93, + "learning_rate": 1.70330600536109e-05, + "loss": 0.2454, + "step": 19940 + }, + { + "epoch": 0.93, + "learning_rate": 1.7032276268556113e-05, + "loss": 0.1997, + "step": 19945 + }, + { + "epoch": 0.93, + "learning_rate": 1.7031492483501327e-05, + "loss": 0.6297, + "step": 19950 + }, + { + "epoch": 0.93, + "learning_rate": 1.7030708698446537e-05, + "loss": 0.1574, + "step": 19955 + }, + { + "epoch": 0.93, + "learning_rate": 1.7029924913391755e-05, + "loss": 0.066, + "step": 19960 + }, + { + "epoch": 0.93, + "learning_rate": 1.7029141128336965e-05, + "loss": 0.1136, + "step": 19965 + }, + { + "epoch": 0.93, + "learning_rate": 1.702835734328218e-05, + "loss": 0.1132, + "step": 19970 + }, + { + "epoch": 0.93, + "learning_rate": 1.7027573558227393e-05, + "loss": 0.1921, + "step": 19975 + }, + { + "epoch": 0.93, + "learning_rate": 1.7026789773172607e-05, + "loss": 0.1347, + "step": 19980 + }, + { + "epoch": 0.93, + "learning_rate": 1.702600598811782e-05, + "loss": 0.2405, + "step": 19985 + }, + { + "epoch": 0.93, + "learning_rate": 1.7025222203063035e-05, + "loss": 0.2259, + "step": 19990 + }, + { + "epoch": 0.93, + "learning_rate": 1.7024438418008245e-05, + "loss": 0.3136, + "step": 19995 + }, + { + "epoch": 0.93, + "learning_rate": 1.7023654632953463e-05, + "loss": 0.3853, + "step": 20000 + }, + { + "epoch": 0.93, + "learning_rate": 1.7022870847898673e-05, + "loss": 0.2158, + "step": 20005 + }, + { + "epoch": 0.93, + "learning_rate": 1.7022087062843887e-05, + "loss": 0.0802, + "step": 20010 + }, + { + "epoch": 0.93, + "learning_rate": 1.70213032777891e-05, + "loss": 0.0745, + "step": 20015 + }, + { + "epoch": 0.93, + "learning_rate": 1.7020519492734315e-05, + "loss": 0.1353, + "step": 20020 + }, + { + "epoch": 0.93, + "learning_rate": 1.701973570767953e-05, + "loss": 0.1455, + "step": 20025 + }, + { + "epoch": 0.93, + "learning_rate": 1.701895192262474e-05, + "loss": 0.1117, + "step": 20030 + }, + { + "epoch": 0.93, + "learning_rate": 1.7018168137569953e-05, + "loss": 0.1912, + "step": 20035 + }, + { + "epoch": 0.94, + "learning_rate": 1.7017384352515167e-05, + "loss": 0.211, + "step": 20040 + }, + { + "epoch": 0.94, + "learning_rate": 1.701660056746038e-05, + "loss": 0.2398, + "step": 20045 + }, + { + "epoch": 0.94, + "learning_rate": 1.7015816782405595e-05, + "loss": 0.6622, + "step": 20050 + }, + { + "epoch": 0.94, + "learning_rate": 1.701503299735081e-05, + "loss": 0.1905, + "step": 20055 + }, + { + "epoch": 0.94, + "learning_rate": 1.7014249212296023e-05, + "loss": 0.0718, + "step": 20060 + }, + { + "epoch": 0.94, + "learning_rate": 1.7013465427241237e-05, + "loss": 0.0488, + "step": 20065 + }, + { + "epoch": 0.94, + "learning_rate": 1.7012681642186447e-05, + "loss": 0.1622, + "step": 20070 + }, + { + "epoch": 0.94, + "learning_rate": 1.701189785713166e-05, + "loss": 0.1579, + "step": 20075 + }, + { + "epoch": 0.94, + "learning_rate": 1.7011114072076875e-05, + "loss": 0.1167, + "step": 20080 + }, + { + "epoch": 0.94, + "learning_rate": 1.701033028702209e-05, + "loss": 0.1729, + "step": 20085 + }, + { + "epoch": 0.94, + "learning_rate": 1.7009546501967303e-05, + "loss": 0.1643, + "step": 20090 + }, + { + "epoch": 0.94, + "learning_rate": 1.7008762716912513e-05, + "loss": 0.3448, + "step": 20095 + }, + { + "epoch": 0.94, + "learning_rate": 1.700797893185773e-05, + "loss": 0.4274, + "step": 20100 + }, + { + "epoch": 0.94, + "learning_rate": 1.700719514680294e-05, + "loss": 0.1908, + "step": 20105 + }, + { + "epoch": 0.94, + "learning_rate": 1.7006411361748155e-05, + "loss": 0.0368, + "step": 20110 + }, + { + "epoch": 0.94, + "learning_rate": 1.700562757669337e-05, + "loss": 0.0898, + "step": 20115 + }, + { + "epoch": 0.94, + "learning_rate": 1.7004843791638583e-05, + "loss": 0.1293, + "step": 20120 + }, + { + "epoch": 0.94, + "learning_rate": 1.7004060006583797e-05, + "loss": 0.1416, + "step": 20125 + }, + { + "epoch": 0.94, + "learning_rate": 1.700327622152901e-05, + "loss": 0.1573, + "step": 20130 + }, + { + "epoch": 0.94, + "learning_rate": 1.700249243647422e-05, + "loss": 0.1809, + "step": 20135 + }, + { + "epoch": 0.94, + "learning_rate": 1.7001708651419435e-05, + "loss": 0.3192, + "step": 20140 + }, + { + "epoch": 0.94, + "learning_rate": 1.700092486636465e-05, + "loss": 0.2653, + "step": 20145 + }, + { + "epoch": 0.94, + "learning_rate": 1.7000141081309863e-05, + "loss": 0.5466, + "step": 20150 + }, + { + "epoch": 0.94, + "learning_rate": 1.6999357296255077e-05, + "loss": 0.1971, + "step": 20155 + }, + { + "epoch": 0.94, + "learning_rate": 1.699857351120029e-05, + "loss": 0.1222, + "step": 20160 + }, + { + "epoch": 0.94, + "learning_rate": 1.6997789726145505e-05, + "loss": 0.1325, + "step": 20165 + }, + { + "epoch": 0.94, + "learning_rate": 1.6997005941090715e-05, + "loss": 0.0593, + "step": 20170 + }, + { + "epoch": 0.94, + "learning_rate": 1.6996222156035932e-05, + "loss": 0.1517, + "step": 20175 + }, + { + "epoch": 0.94, + "learning_rate": 1.6995438370981143e-05, + "loss": 0.1367, + "step": 20180 + }, + { + "epoch": 0.94, + "learning_rate": 1.6994654585926357e-05, + "loss": 0.2145, + "step": 20185 + }, + { + "epoch": 0.94, + "learning_rate": 1.699387080087157e-05, + "loss": 0.2504, + "step": 20190 + }, + { + "epoch": 0.94, + "learning_rate": 1.6993087015816785e-05, + "loss": 0.3664, + "step": 20195 + }, + { + "epoch": 0.94, + "learning_rate": 1.6992303230762e-05, + "loss": 0.4137, + "step": 20200 + }, + { + "epoch": 0.94, + "learning_rate": 1.699151944570721e-05, + "loss": 0.2094, + "step": 20205 + }, + { + "epoch": 0.94, + "learning_rate": 1.6990735660652423e-05, + "loss": 0.0852, + "step": 20210 + }, + { + "epoch": 0.94, + "learning_rate": 1.6989951875597637e-05, + "loss": 0.0603, + "step": 20215 + }, + { + "epoch": 0.94, + "learning_rate": 1.698916809054285e-05, + "loss": 0.161, + "step": 20220 + }, + { + "epoch": 0.94, + "learning_rate": 1.6988384305488065e-05, + "loss": 0.106, + "step": 20225 + }, + { + "epoch": 0.94, + "learning_rate": 1.698760052043328e-05, + "loss": 0.1538, + "step": 20230 + }, + { + "epoch": 0.94, + "learning_rate": 1.6986816735378492e-05, + "loss": 0.1602, + "step": 20235 + }, + { + "epoch": 0.94, + "learning_rate": 1.6986032950323706e-05, + "loss": 0.2877, + "step": 20240 + }, + { + "epoch": 0.94, + "learning_rate": 1.6985249165268917e-05, + "loss": 0.4337, + "step": 20245 + }, + { + "epoch": 0.94, + "learning_rate": 1.698446538021413e-05, + "loss": 0.6768, + "step": 20250 + }, + { + "epoch": 0.95, + "learning_rate": 1.6983681595159345e-05, + "loss": 0.2147, + "step": 20255 + }, + { + "epoch": 0.95, + "learning_rate": 1.698289781010456e-05, + "loss": 0.1292, + "step": 20260 + }, + { + "epoch": 0.95, + "learning_rate": 1.6982114025049772e-05, + "loss": 0.1835, + "step": 20265 + }, + { + "epoch": 0.95, + "learning_rate": 1.6981330239994983e-05, + "loss": 0.1112, + "step": 20270 + }, + { + "epoch": 0.95, + "learning_rate": 1.69805464549402e-05, + "loss": 0.1764, + "step": 20275 + }, + { + "epoch": 0.95, + "learning_rate": 1.697976266988541e-05, + "loss": 0.1477, + "step": 20280 + }, + { + "epoch": 0.95, + "learning_rate": 1.6978978884830625e-05, + "loss": 0.257, + "step": 20285 + }, + { + "epoch": 0.95, + "learning_rate": 1.697819509977584e-05, + "loss": 0.2419, + "step": 20290 + }, + { + "epoch": 0.95, + "learning_rate": 1.6977411314721053e-05, + "loss": 0.2276, + "step": 20295 + }, + { + "epoch": 0.95, + "learning_rate": 1.6976627529666266e-05, + "loss": 0.3961, + "step": 20300 + }, + { + "epoch": 0.95, + "learning_rate": 1.697584374461148e-05, + "loss": 0.3068, + "step": 20305 + }, + { + "epoch": 0.95, + "learning_rate": 1.697505995955669e-05, + "loss": 0.0723, + "step": 20310 + }, + { + "epoch": 0.95, + "learning_rate": 1.6974276174501908e-05, + "loss": 0.0967, + "step": 20315 + }, + { + "epoch": 0.95, + "learning_rate": 1.697349238944712e-05, + "loss": 0.1151, + "step": 20320 + }, + { + "epoch": 0.95, + "learning_rate": 1.6972708604392333e-05, + "loss": 0.1615, + "step": 20325 + }, + { + "epoch": 0.95, + "learning_rate": 1.6971924819337546e-05, + "loss": 0.219, + "step": 20330 + }, + { + "epoch": 0.95, + "learning_rate": 1.697114103428276e-05, + "loss": 0.2574, + "step": 20335 + }, + { + "epoch": 0.95, + "learning_rate": 1.6970357249227974e-05, + "loss": 0.2705, + "step": 20340 + }, + { + "epoch": 0.95, + "learning_rate": 1.6969573464173185e-05, + "loss": 0.2993, + "step": 20345 + }, + { + "epoch": 0.95, + "learning_rate": 1.69687896791184e-05, + "loss": 0.6279, + "step": 20350 + }, + { + "epoch": 0.95, + "learning_rate": 1.6968005894063613e-05, + "loss": 0.1919, + "step": 20355 + }, + { + "epoch": 0.95, + "learning_rate": 1.6967222109008826e-05, + "loss": 0.0439, + "step": 20360 + }, + { + "epoch": 0.95, + "learning_rate": 1.696643832395404e-05, + "loss": 0.0789, + "step": 20365 + }, + { + "epoch": 0.95, + "learning_rate": 1.6965654538899254e-05, + "loss": 0.1537, + "step": 20370 + }, + { + "epoch": 0.95, + "learning_rate": 1.6964870753844468e-05, + "loss": 0.0927, + "step": 20375 + }, + { + "epoch": 0.95, + "learning_rate": 1.6964086968789682e-05, + "loss": 0.1653, + "step": 20380 + }, + { + "epoch": 0.95, + "learning_rate": 1.6963303183734893e-05, + "loss": 0.2245, + "step": 20385 + }, + { + "epoch": 0.95, + "learning_rate": 1.696251939868011e-05, + "loss": 0.2526, + "step": 20390 + }, + { + "epoch": 0.95, + "learning_rate": 1.696173561362532e-05, + "loss": 0.2687, + "step": 20395 + }, + { + "epoch": 0.95, + "learning_rate": 1.6960951828570534e-05, + "loss": 0.6334, + "step": 20400 + }, + { + "epoch": 0.95, + "learning_rate": 1.6960168043515748e-05, + "loss": 0.1858, + "step": 20405 + }, + { + "epoch": 0.95, + "learning_rate": 1.695938425846096e-05, + "loss": 0.0953, + "step": 20410 + }, + { + "epoch": 0.95, + "learning_rate": 1.6958600473406176e-05, + "loss": 0.1384, + "step": 20415 + }, + { + "epoch": 0.95, + "learning_rate": 1.6957816688351387e-05, + "loss": 0.1128, + "step": 20420 + }, + { + "epoch": 0.95, + "learning_rate": 1.69570329032966e-05, + "loss": 0.1004, + "step": 20425 + }, + { + "epoch": 0.95, + "learning_rate": 1.6956249118241814e-05, + "loss": 0.1117, + "step": 20430 + }, + { + "epoch": 0.95, + "learning_rate": 1.695546533318703e-05, + "loss": 0.2534, + "step": 20435 + }, + { + "epoch": 0.95, + "learning_rate": 1.6954681548132242e-05, + "loss": 0.23, + "step": 20440 + }, + { + "epoch": 0.95, + "learning_rate": 1.6953897763077456e-05, + "loss": 0.2366, + "step": 20445 + }, + { + "epoch": 0.95, + "learning_rate": 1.695311397802267e-05, + "loss": 0.4956, + "step": 20450 + }, + { + "epoch": 0.95, + "learning_rate": 1.6952330192967884e-05, + "loss": 0.2167, + "step": 20455 + }, + { + "epoch": 0.95, + "learning_rate": 1.6951546407913094e-05, + "loss": 0.0958, + "step": 20460 + }, + { + "epoch": 0.95, + "learning_rate": 1.695076262285831e-05, + "loss": 0.0566, + "step": 20465 + }, + { + "epoch": 0.96, + "learning_rate": 1.6949978837803522e-05, + "loss": 0.1221, + "step": 20470 + }, + { + "epoch": 0.96, + "learning_rate": 1.6949195052748736e-05, + "loss": 0.0884, + "step": 20475 + }, + { + "epoch": 0.96, + "learning_rate": 1.694841126769395e-05, + "loss": 0.2541, + "step": 20480 + }, + { + "epoch": 0.96, + "learning_rate": 1.694762748263916e-05, + "loss": 0.2203, + "step": 20485 + }, + { + "epoch": 0.96, + "learning_rate": 1.6946843697584378e-05, + "loss": 0.1863, + "step": 20490 + }, + { + "epoch": 0.96, + "learning_rate": 1.694605991252959e-05, + "loss": 0.2929, + "step": 20495 + }, + { + "epoch": 0.96, + "learning_rate": 1.6945276127474802e-05, + "loss": 0.271, + "step": 20500 + }, + { + "epoch": 0.96, + "learning_rate": 1.6944492342420016e-05, + "loss": 0.1831, + "step": 20505 + }, + { + "epoch": 0.96, + "learning_rate": 1.694370855736523e-05, + "loss": 0.0501, + "step": 20510 + }, + { + "epoch": 0.96, + "learning_rate": 1.6942924772310444e-05, + "loss": 0.1323, + "step": 20515 + }, + { + "epoch": 0.96, + "learning_rate": 1.6942140987255658e-05, + "loss": 0.2169, + "step": 20520 + }, + { + "epoch": 0.96, + "learning_rate": 1.694135720220087e-05, + "loss": 0.1172, + "step": 20525 + }, + { + "epoch": 0.96, + "learning_rate": 1.6940573417146082e-05, + "loss": 0.2164, + "step": 20530 + }, + { + "epoch": 0.96, + "learning_rate": 1.6939789632091296e-05, + "loss": 0.1629, + "step": 20535 + }, + { + "epoch": 0.96, + "learning_rate": 1.693900584703651e-05, + "loss": 0.2959, + "step": 20540 + }, + { + "epoch": 0.96, + "learning_rate": 1.6938222061981724e-05, + "loss": 0.3087, + "step": 20545 + }, + { + "epoch": 0.96, + "learning_rate": 1.6937438276926938e-05, + "loss": 0.7987, + "step": 20550 + }, + { + "epoch": 0.96, + "learning_rate": 1.6936654491872152e-05, + "loss": 0.2158, + "step": 20555 + }, + { + "epoch": 0.96, + "learning_rate": 1.6935870706817362e-05, + "loss": 0.0551, + "step": 20560 + }, + { + "epoch": 0.96, + "learning_rate": 1.6935086921762576e-05, + "loss": 0.0877, + "step": 20565 + }, + { + "epoch": 0.96, + "learning_rate": 1.693430313670779e-05, + "loss": 0.1039, + "step": 20570 + }, + { + "epoch": 0.96, + "learning_rate": 1.6933519351653004e-05, + "loss": 0.1547, + "step": 20575 + }, + { + "epoch": 0.96, + "learning_rate": 1.6932735566598218e-05, + "loss": 0.167, + "step": 20580 + }, + { + "epoch": 0.96, + "learning_rate": 1.6931951781543432e-05, + "loss": 0.1186, + "step": 20585 + }, + { + "epoch": 0.96, + "learning_rate": 1.6931167996488646e-05, + "loss": 0.1959, + "step": 20590 + }, + { + "epoch": 0.96, + "learning_rate": 1.6930384211433856e-05, + "loss": 0.1781, + "step": 20595 + }, + { + "epoch": 0.96, + "learning_rate": 1.692960042637907e-05, + "loss": 0.4715, + "step": 20600 + }, + { + "epoch": 0.96, + "learning_rate": 1.6928816641324284e-05, + "loss": 0.2431, + "step": 20605 + }, + { + "epoch": 0.96, + "learning_rate": 1.6928032856269498e-05, + "loss": 0.0756, + "step": 20610 + }, + { + "epoch": 0.96, + "learning_rate": 1.6927249071214712e-05, + "loss": 0.0938, + "step": 20615 + }, + { + "epoch": 0.96, + "learning_rate": 1.6926465286159926e-05, + "loss": 0.1711, + "step": 20620 + }, + { + "epoch": 0.96, + "learning_rate": 1.6925681501105136e-05, + "loss": 0.1379, + "step": 20625 + }, + { + "epoch": 0.96, + "learning_rate": 1.6924897716050354e-05, + "loss": 0.1878, + "step": 20630 + }, + { + "epoch": 0.96, + "learning_rate": 1.6924113930995564e-05, + "loss": 0.2055, + "step": 20635 + }, + { + "epoch": 0.96, + "learning_rate": 1.6923330145940778e-05, + "loss": 0.1536, + "step": 20640 + }, + { + "epoch": 0.96, + "learning_rate": 1.6922546360885992e-05, + "loss": 0.2507, + "step": 20645 + }, + { + "epoch": 0.96, + "learning_rate": 1.6921762575831206e-05, + "loss": 0.4966, + "step": 20650 + }, + { + "epoch": 0.96, + "learning_rate": 1.692097879077642e-05, + "loss": 0.1616, + "step": 20655 + }, + { + "epoch": 0.96, + "learning_rate": 1.692019500572163e-05, + "loss": 0.0673, + "step": 20660 + }, + { + "epoch": 0.96, + "learning_rate": 1.6919411220666844e-05, + "loss": 0.0905, + "step": 20665 + }, + { + "epoch": 0.96, + "learning_rate": 1.6918627435612058e-05, + "loss": 0.1219, + "step": 20670 + }, + { + "epoch": 0.96, + "learning_rate": 1.6917843650557272e-05, + "loss": 0.0941, + "step": 20675 + }, + { + "epoch": 0.96, + "learning_rate": 1.6917059865502486e-05, + "loss": 0.1362, + "step": 20680 + }, + { + "epoch": 0.97, + "learning_rate": 1.69162760804477e-05, + "loss": 0.1788, + "step": 20685 + }, + { + "epoch": 0.97, + "learning_rate": 1.6915492295392914e-05, + "loss": 0.2372, + "step": 20690 + }, + { + "epoch": 0.97, + "learning_rate": 1.6914708510338128e-05, + "loss": 0.2672, + "step": 20695 + }, + { + "epoch": 0.97, + "learning_rate": 1.6913924725283338e-05, + "loss": 0.409, + "step": 20700 + }, + { + "epoch": 0.97, + "learning_rate": 1.6913140940228556e-05, + "loss": 0.1966, + "step": 20705 + }, + { + "epoch": 0.97, + "learning_rate": 1.6912357155173766e-05, + "loss": 0.0573, + "step": 20710 + }, + { + "epoch": 0.97, + "learning_rate": 1.691157337011898e-05, + "loss": 0.0683, + "step": 20715 + }, + { + "epoch": 0.97, + "learning_rate": 1.6910789585064194e-05, + "loss": 0.0474, + "step": 20720 + }, + { + "epoch": 0.97, + "learning_rate": 1.6910005800009404e-05, + "loss": 0.0682, + "step": 20725 + }, + { + "epoch": 0.97, + "learning_rate": 1.690922201495462e-05, + "loss": 0.1241, + "step": 20730 + }, + { + "epoch": 0.97, + "learning_rate": 1.6908438229899832e-05, + "loss": 0.1928, + "step": 20735 + }, + { + "epoch": 0.97, + "learning_rate": 1.6907654444845046e-05, + "loss": 0.23, + "step": 20740 + }, + { + "epoch": 0.97, + "learning_rate": 1.690687065979026e-05, + "loss": 0.3035, + "step": 20745 + }, + { + "epoch": 0.97, + "learning_rate": 1.6906086874735474e-05, + "loss": 0.443, + "step": 20750 + }, + { + "epoch": 0.97, + "learning_rate": 1.6905303089680688e-05, + "loss": 0.2238, + "step": 20755 + }, + { + "epoch": 0.97, + "learning_rate": 1.69045193046259e-05, + "loss": 0.0642, + "step": 20760 + }, + { + "epoch": 0.97, + "learning_rate": 1.6903735519571116e-05, + "loss": 0.0975, + "step": 20765 + }, + { + "epoch": 0.97, + "learning_rate": 1.690295173451633e-05, + "loss": 0.1374, + "step": 20770 + }, + { + "epoch": 0.97, + "learning_rate": 1.690216794946154e-05, + "loss": 0.1851, + "step": 20775 + }, + { + "epoch": 0.97, + "learning_rate": 1.6901384164406754e-05, + "loss": 0.1809, + "step": 20780 + }, + { + "epoch": 0.97, + "learning_rate": 1.6900600379351968e-05, + "loss": 0.1576, + "step": 20785 + }, + { + "epoch": 0.97, + "learning_rate": 1.6899816594297182e-05, + "loss": 0.2686, + "step": 20790 + }, + { + "epoch": 0.97, + "learning_rate": 1.6899032809242396e-05, + "loss": 0.241, + "step": 20795 + }, + { + "epoch": 0.97, + "learning_rate": 1.6898249024187606e-05, + "loss": 0.6042, + "step": 20800 + }, + { + "epoch": 0.97, + "learning_rate": 1.6897465239132823e-05, + "loss": 0.1927, + "step": 20805 + }, + { + "epoch": 0.97, + "learning_rate": 1.6896681454078034e-05, + "loss": 0.1114, + "step": 20810 + }, + { + "epoch": 0.97, + "learning_rate": 1.6895897669023248e-05, + "loss": 0.0992, + "step": 20815 + }, + { + "epoch": 0.97, + "learning_rate": 1.6895113883968462e-05, + "loss": 0.1313, + "step": 20820 + }, + { + "epoch": 0.97, + "learning_rate": 1.6894330098913676e-05, + "loss": 0.1349, + "step": 20825 + }, + { + "epoch": 0.97, + "learning_rate": 1.689354631385889e-05, + "loss": 0.0776, + "step": 20830 + }, + { + "epoch": 0.97, + "learning_rate": 1.6892762528804104e-05, + "loss": 0.2206, + "step": 20835 + }, + { + "epoch": 0.97, + "learning_rate": 1.6891978743749314e-05, + "loss": 0.2008, + "step": 20840 + }, + { + "epoch": 0.97, + "learning_rate": 1.689119495869453e-05, + "loss": 0.1772, + "step": 20845 + }, + { + "epoch": 0.97, + "learning_rate": 1.6890411173639742e-05, + "loss": 0.4666, + "step": 20850 + }, + { + "epoch": 0.97, + "learning_rate": 1.6889627388584956e-05, + "loss": 0.1922, + "step": 20855 + }, + { + "epoch": 0.97, + "learning_rate": 1.688884360353017e-05, + "loss": 0.0489, + "step": 20860 + }, + { + "epoch": 0.97, + "learning_rate": 1.6888059818475384e-05, + "loss": 0.1144, + "step": 20865 + }, + { + "epoch": 0.97, + "learning_rate": 1.6887276033420597e-05, + "loss": 0.1017, + "step": 20870 + }, + { + "epoch": 0.97, + "learning_rate": 1.6886492248365808e-05, + "loss": 0.1164, + "step": 20875 + }, + { + "epoch": 0.97, + "learning_rate": 1.6885708463311022e-05, + "loss": 0.1358, + "step": 20880 + }, + { + "epoch": 0.97, + "learning_rate": 1.6884924678256236e-05, + "loss": 0.1768, + "step": 20885 + }, + { + "epoch": 0.97, + "learning_rate": 1.688414089320145e-05, + "loss": 0.2551, + "step": 20890 + }, + { + "epoch": 0.97, + "learning_rate": 1.6883357108146664e-05, + "loss": 0.3179, + "step": 20895 + }, + { + "epoch": 0.98, + "learning_rate": 1.6882573323091878e-05, + "loss": 0.5792, + "step": 20900 + }, + { + "epoch": 0.98, + "learning_rate": 1.688178953803709e-05, + "loss": 0.197, + "step": 20905 + }, + { + "epoch": 0.98, + "learning_rate": 1.6881005752982305e-05, + "loss": 0.1008, + "step": 20910 + }, + { + "epoch": 0.98, + "learning_rate": 1.6880221967927516e-05, + "loss": 0.1025, + "step": 20915 + }, + { + "epoch": 0.98, + "learning_rate": 1.687943818287273e-05, + "loss": 0.1174, + "step": 20920 + }, + { + "epoch": 0.98, + "learning_rate": 1.6878654397817944e-05, + "loss": 0.1236, + "step": 20925 + }, + { + "epoch": 0.98, + "learning_rate": 1.6877870612763158e-05, + "loss": 0.2077, + "step": 20930 + }, + { + "epoch": 0.98, + "learning_rate": 1.687708682770837e-05, + "loss": 0.2177, + "step": 20935 + }, + { + "epoch": 0.98, + "learning_rate": 1.6876303042653582e-05, + "loss": 0.2288, + "step": 20940 + }, + { + "epoch": 0.98, + "learning_rate": 1.68755192575988e-05, + "loss": 0.3481, + "step": 20945 + }, + { + "epoch": 0.98, + "learning_rate": 1.687473547254401e-05, + "loss": 0.4198, + "step": 20950 + }, + { + "epoch": 0.98, + "learning_rate": 1.6873951687489224e-05, + "loss": 0.2261, + "step": 20955 + }, + { + "epoch": 0.98, + "learning_rate": 1.6873167902434438e-05, + "loss": 0.0703, + "step": 20960 + }, + { + "epoch": 0.98, + "learning_rate": 1.687238411737965e-05, + "loss": 0.0709, + "step": 20965 + }, + { + "epoch": 0.98, + "learning_rate": 1.6871600332324865e-05, + "loss": 0.1438, + "step": 20970 + }, + { + "epoch": 0.98, + "learning_rate": 1.687081654727008e-05, + "loss": 0.1581, + "step": 20975 + }, + { + "epoch": 0.98, + "learning_rate": 1.687003276221529e-05, + "loss": 0.1882, + "step": 20980 + }, + { + "epoch": 0.98, + "learning_rate": 1.6869248977160504e-05, + "loss": 0.1306, + "step": 20985 + }, + { + "epoch": 0.98, + "learning_rate": 1.6868465192105718e-05, + "loss": 0.3843, + "step": 20990 + }, + { + "epoch": 0.98, + "learning_rate": 1.686768140705093e-05, + "loss": 0.2341, + "step": 20995 + }, + { + "epoch": 0.98, + "learning_rate": 1.6866897621996145e-05, + "loss": 0.5793, + "step": 21000 + }, + { + "epoch": 0.98, + "learning_rate": 1.686611383694136e-05, + "loss": 0.1926, + "step": 21005 + }, + { + "epoch": 0.98, + "learning_rate": 1.6865330051886573e-05, + "loss": 0.0443, + "step": 21010 + }, + { + "epoch": 0.98, + "learning_rate": 1.6864546266831784e-05, + "loss": 0.0993, + "step": 21015 + }, + { + "epoch": 0.98, + "learning_rate": 1.6863762481777e-05, + "loss": 0.1558, + "step": 21020 + }, + { + "epoch": 0.98, + "learning_rate": 1.686297869672221e-05, + "loss": 0.1095, + "step": 21025 + }, + { + "epoch": 0.98, + "learning_rate": 1.6862194911667425e-05, + "loss": 0.1696, + "step": 21030 + }, + { + "epoch": 0.98, + "learning_rate": 1.686141112661264e-05, + "loss": 0.1484, + "step": 21035 + }, + { + "epoch": 0.98, + "learning_rate": 1.6860627341557853e-05, + "loss": 0.3471, + "step": 21040 + }, + { + "epoch": 0.98, + "learning_rate": 1.6859843556503067e-05, + "loss": 0.2733, + "step": 21045 + }, + { + "epoch": 0.98, + "learning_rate": 1.6859059771448278e-05, + "loss": 0.4663, + "step": 21050 + }, + { + "epoch": 0.98, + "learning_rate": 1.685827598639349e-05, + "loss": 0.2136, + "step": 21055 + }, + { + "epoch": 0.98, + "learning_rate": 1.6857492201338706e-05, + "loss": 0.0273, + "step": 21060 + }, + { + "epoch": 0.98, + "learning_rate": 1.685670841628392e-05, + "loss": 0.1179, + "step": 21065 + }, + { + "epoch": 0.98, + "learning_rate": 1.6855924631229133e-05, + "loss": 0.1099, + "step": 21070 + }, + { + "epoch": 0.98, + "learning_rate": 1.6855140846174347e-05, + "loss": 0.1182, + "step": 21075 + }, + { + "epoch": 0.98, + "learning_rate": 1.685435706111956e-05, + "loss": 0.1853, + "step": 21080 + }, + { + "epoch": 0.98, + "learning_rate": 1.6853573276064775e-05, + "loss": 0.0739, + "step": 21085 + }, + { + "epoch": 0.98, + "learning_rate": 1.6852789491009986e-05, + "loss": 0.2085, + "step": 21090 + }, + { + "epoch": 0.98, + "learning_rate": 1.68520057059552e-05, + "loss": 0.2557, + "step": 21095 + }, + { + "epoch": 0.98, + "learning_rate": 1.6851221920900413e-05, + "loss": 0.5123, + "step": 21100 + }, + { + "epoch": 0.98, + "learning_rate": 1.6850438135845627e-05, + "loss": 0.2266, + "step": 21105 + }, + { + "epoch": 0.99, + "learning_rate": 1.684965435079084e-05, + "loss": 0.0615, + "step": 21110 + }, + { + "epoch": 0.99, + "learning_rate": 1.6848870565736052e-05, + "loss": 0.0517, + "step": 21115 + }, + { + "epoch": 0.99, + "learning_rate": 1.684808678068127e-05, + "loss": 0.0945, + "step": 21120 + }, + { + "epoch": 0.99, + "learning_rate": 1.684730299562648e-05, + "loss": 0.1399, + "step": 21125 + }, + { + "epoch": 0.99, + "learning_rate": 1.6846519210571693e-05, + "loss": 0.1547, + "step": 21130 + }, + { + "epoch": 0.99, + "learning_rate": 1.6845735425516907e-05, + "loss": 0.2386, + "step": 21135 + }, + { + "epoch": 0.99, + "learning_rate": 1.684495164046212e-05, + "loss": 0.2613, + "step": 21140 + }, + { + "epoch": 0.99, + "learning_rate": 1.6844167855407335e-05, + "loss": 0.3378, + "step": 21145 + }, + { + "epoch": 0.99, + "learning_rate": 1.684338407035255e-05, + "loss": 0.5164, + "step": 21150 + }, + { + "epoch": 0.99, + "learning_rate": 1.684260028529776e-05, + "loss": 0.1966, + "step": 21155 + }, + { + "epoch": 0.99, + "learning_rate": 1.6841816500242977e-05, + "loss": 0.0675, + "step": 21160 + }, + { + "epoch": 0.99, + "learning_rate": 1.6841032715188187e-05, + "loss": 0.1247, + "step": 21165 + }, + { + "epoch": 0.99, + "learning_rate": 1.68402489301334e-05, + "loss": 0.1262, + "step": 21170 + }, + { + "epoch": 0.99, + "learning_rate": 1.6839465145078615e-05, + "loss": 0.1284, + "step": 21175 + }, + { + "epoch": 0.99, + "learning_rate": 1.683868136002383e-05, + "loss": 0.2704, + "step": 21180 + }, + { + "epoch": 0.99, + "learning_rate": 1.6837897574969043e-05, + "loss": 0.1451, + "step": 21185 + }, + { + "epoch": 0.99, + "learning_rate": 1.6837113789914254e-05, + "loss": 0.3121, + "step": 21190 + }, + { + "epoch": 0.99, + "learning_rate": 1.6836330004859467e-05, + "loss": 0.3131, + "step": 21195 + }, + { + "epoch": 0.99, + "learning_rate": 1.683554621980468e-05, + "loss": 0.6027, + "step": 21200 + }, + { + "epoch": 0.99, + "learning_rate": 1.6834762434749895e-05, + "loss": 0.2483, + "step": 21205 + }, + { + "epoch": 0.99, + "learning_rate": 1.683397864969511e-05, + "loss": 0.0526, + "step": 21210 + }, + { + "epoch": 0.99, + "learning_rate": 1.6833194864640323e-05, + "loss": 0.1629, + "step": 21215 + }, + { + "epoch": 0.99, + "learning_rate": 1.6832411079585537e-05, + "loss": 0.1384, + "step": 21220 + }, + { + "epoch": 0.99, + "learning_rate": 1.683162729453075e-05, + "loss": 0.1629, + "step": 21225 + }, + { + "epoch": 0.99, + "learning_rate": 1.683084350947596e-05, + "loss": 0.1871, + "step": 21230 + }, + { + "epoch": 0.99, + "learning_rate": 1.683005972442118e-05, + "loss": 0.2585, + "step": 21235 + }, + { + "epoch": 0.99, + "learning_rate": 1.682927593936639e-05, + "loss": 0.3094, + "step": 21240 + }, + { + "epoch": 0.99, + "learning_rate": 1.6828492154311603e-05, + "loss": 0.275, + "step": 21245 + }, + { + "epoch": 0.99, + "learning_rate": 1.6827708369256817e-05, + "loss": 0.4019, + "step": 21250 + }, + { + "epoch": 0.99, + "learning_rate": 1.6826924584202028e-05, + "loss": 0.2128, + "step": 21255 + }, + { + "epoch": 0.99, + "learning_rate": 1.6826140799147245e-05, + "loss": 0.0263, + "step": 21260 + }, + { + "epoch": 0.99, + "learning_rate": 1.6825357014092455e-05, + "loss": 0.1624, + "step": 21265 + }, + { + "epoch": 0.99, + "learning_rate": 1.682457322903767e-05, + "loss": 0.0927, + "step": 21270 + }, + { + "epoch": 0.99, + "learning_rate": 1.6823789443982883e-05, + "loss": 0.1146, + "step": 21275 + }, + { + "epoch": 0.99, + "learning_rate": 1.6823005658928097e-05, + "loss": 0.1037, + "step": 21280 + }, + { + "epoch": 0.99, + "learning_rate": 1.682222187387331e-05, + "loss": 0.1416, + "step": 21285 + }, + { + "epoch": 0.99, + "learning_rate": 1.6821438088818525e-05, + "loss": 0.2463, + "step": 21290 + }, + { + "epoch": 0.99, + "learning_rate": 1.6820654303763735e-05, + "loss": 0.3664, + "step": 21295 + }, + { + "epoch": 0.99, + "learning_rate": 1.6819870518708953e-05, + "loss": 0.5594, + "step": 21300 + }, + { + "epoch": 0.99, + "learning_rate": 1.6819086733654163e-05, + "loss": 0.1797, + "step": 21305 + }, + { + "epoch": 0.99, + "learning_rate": 1.6818302948599377e-05, + "loss": 0.1078, + "step": 21310 + }, + { + "epoch": 0.99, + "learning_rate": 1.681751916354459e-05, + "loss": 0.0942, + "step": 21315 + }, + { + "epoch": 0.99, + "learning_rate": 1.6816735378489805e-05, + "loss": 0.0715, + "step": 21320 + }, + { + "epoch": 1.0, + "learning_rate": 1.681595159343502e-05, + "loss": 0.0956, + "step": 21325 + }, + { + "epoch": 1.0, + "learning_rate": 1.681516780838023e-05, + "loss": 0.1574, + "step": 21330 + }, + { + "epoch": 1.0, + "learning_rate": 1.6814384023325447e-05, + "loss": 0.1546, + "step": 21335 + }, + { + "epoch": 1.0, + "learning_rate": 1.6813600238270657e-05, + "loss": 0.3196, + "step": 21340 + }, + { + "epoch": 1.0, + "learning_rate": 1.681281645321587e-05, + "loss": 0.3463, + "step": 21345 + }, + { + "epoch": 1.0, + "learning_rate": 1.6812032668161085e-05, + "loss": 0.515, + "step": 21350 + }, + { + "epoch": 1.0, + "learning_rate": 1.68112488831063e-05, + "loss": 0.1254, + "step": 21355 + }, + { + "epoch": 1.0, + "learning_rate": 1.6810465098051513e-05, + "loss": 0.1172, + "step": 21360 + }, + { + "epoch": 1.0, + "learning_rate": 1.6809681312996727e-05, + "loss": 0.081, + "step": 21365 + }, + { + "epoch": 1.0, + "learning_rate": 1.6808897527941937e-05, + "loss": 0.1016, + "step": 21370 + }, + { + "epoch": 1.0, + "learning_rate": 1.680811374288715e-05, + "loss": 0.133, + "step": 21375 + }, + { + "epoch": 1.0, + "learning_rate": 1.6807329957832365e-05, + "loss": 0.1478, + "step": 21380 + }, + { + "epoch": 1.0, + "learning_rate": 1.680654617277758e-05, + "loss": 0.2154, + "step": 21385 + }, + { + "epoch": 1.0, + "learning_rate": 1.6805762387722793e-05, + "loss": 0.2109, + "step": 21390 + }, + { + "epoch": 1.0, + "learning_rate": 1.6804978602668007e-05, + "loss": 0.5048, + "step": 21395 + }, + { + "epoch": 1.0, + "learning_rate": 1.680419481761322e-05, + "loss": 0.5279, + "step": 21400 + }, + { + "epoch": 1.0, + "learning_rate": 1.680341103255843e-05, + "loss": 0.1696, + "step": 21405 + }, + { + "epoch": 1.0, + "learning_rate": 1.6802627247503645e-05, + "loss": 0.0934, + "step": 21410 + }, + { + "epoch": 1.0, + "learning_rate": 1.680184346244886e-05, + "loss": 0.1172, + "step": 21415 + }, + { + "epoch": 1.0, + "learning_rate": 1.6801059677394073e-05, + "loss": 0.1691, + "step": 21420 + }, + { + "epoch": 1.0, + "learning_rate": 1.6800275892339287e-05, + "loss": 0.2184, + "step": 21425 + }, + { + "epoch": 1.0, + "learning_rate": 1.67994921072845e-05, + "loss": 0.3285, + "step": 21430 + }, + { + "epoch": 1.0, + "eval_cer": 0.019780983363940314, + "eval_loss": 0.6674277186393738, + "eval_runtime": 457.5932, + "eval_samples_per_second": 41.631, + "eval_steps_per_second": 5.205, + "eval_wer": 0.16735751295336787, + "step": 21431 + }, + { + "epoch": 1.0, + "learning_rate": 1.6798708322229715e-05, + "loss": 0.2708, + "step": 21435 + }, + { + "epoch": 1.0, + "learning_rate": 1.6797924537174925e-05, + "loss": 0.0632, + "step": 21440 + }, + { + "epoch": 1.0, + "learning_rate": 1.679714075212014e-05, + "loss": 0.0625, + "step": 21445 + }, + { + "epoch": 1.0, + "learning_rate": 1.6796356967065353e-05, + "loss": 0.1169, + "step": 21450 + }, + { + "epoch": 1.0, + "learning_rate": 1.6795573182010567e-05, + "loss": 0.1674, + "step": 21455 + }, + { + "epoch": 1.0, + "learning_rate": 1.679478939695578e-05, + "loss": 0.1875, + "step": 21460 + }, + { + "epoch": 1.0, + "learning_rate": 1.6794005611900995e-05, + "loss": 0.2132, + "step": 21465 + }, + { + "epoch": 1.0, + "learning_rate": 1.6793221826846205e-05, + "loss": 0.2001, + "step": 21470 + }, + { + "epoch": 1.0, + "learning_rate": 1.6792438041791422e-05, + "loss": 0.2851, + "step": 21475 + }, + { + "epoch": 1.0, + "learning_rate": 1.6791654256736633e-05, + "loss": 0.2083, + "step": 21480 + }, + { + "epoch": 1.0, + "learning_rate": 1.6790870471681847e-05, + "loss": 0.4056, + "step": 21485 + }, + { + "epoch": 1.0, + "learning_rate": 1.679008668662706e-05, + "loss": 0.0784, + "step": 21490 + }, + { + "epoch": 1.0, + "learning_rate": 1.6789302901572275e-05, + "loss": 0.1198, + "step": 21495 + }, + { + "epoch": 1.0, + "learning_rate": 1.678851911651749e-05, + "loss": 0.0637, + "step": 21500 + }, + { + "epoch": 1.0, + "learning_rate": 1.67877353314627e-05, + "loss": 0.0893, + "step": 21505 + }, + { + "epoch": 1.0, + "learning_rate": 1.6786951546407913e-05, + "loss": 0.1339, + "step": 21510 + }, + { + "epoch": 1.0, + "learning_rate": 1.6786167761353127e-05, + "loss": 0.2976, + "step": 21515 + }, + { + "epoch": 1.0, + "learning_rate": 1.678538397629834e-05, + "loss": 0.2174, + "step": 21520 + }, + { + "epoch": 1.0, + "learning_rate": 1.6784600191243555e-05, + "loss": 0.2785, + "step": 21525 + }, + { + "epoch": 1.0, + "learning_rate": 1.678381640618877e-05, + "loss": 0.5756, + "step": 21530 + }, + { + "epoch": 1.0, + "learning_rate": 1.6783032621133983e-05, + "loss": 0.3013, + "step": 21535 + }, + { + "epoch": 1.01, + "learning_rate": 1.6782248836079196e-05, + "loss": 0.0721, + "step": 21540 + }, + { + "epoch": 1.01, + "learning_rate": 1.6781465051024407e-05, + "loss": 0.0953, + "step": 21545 + }, + { + "epoch": 1.01, + "learning_rate": 1.6780681265969624e-05, + "loss": 0.0683, + "step": 21550 + }, + { + "epoch": 1.01, + "learning_rate": 1.6779897480914835e-05, + "loss": 0.155, + "step": 21555 + }, + { + "epoch": 1.01, + "learning_rate": 1.677911369586005e-05, + "loss": 0.2342, + "step": 21560 + }, + { + "epoch": 1.01, + "learning_rate": 1.6778329910805263e-05, + "loss": 0.1579, + "step": 21565 + }, + { + "epoch": 1.01, + "learning_rate": 1.6777546125750473e-05, + "loss": 0.2137, + "step": 21570 + }, + { + "epoch": 1.01, + "learning_rate": 1.677676234069569e-05, + "loss": 0.3298, + "step": 21575 + }, + { + "epoch": 1.01, + "learning_rate": 1.67759785556409e-05, + "loss": 0.3294, + "step": 21580 + }, + { + "epoch": 1.01, + "learning_rate": 1.6775194770586115e-05, + "loss": 0.347, + "step": 21585 + }, + { + "epoch": 1.01, + "learning_rate": 1.677441098553133e-05, + "loss": 0.0707, + "step": 21590 + }, + { + "epoch": 1.01, + "learning_rate": 1.6773627200476543e-05, + "loss": 0.0494, + "step": 21595 + }, + { + "epoch": 1.01, + "learning_rate": 1.6772843415421757e-05, + "loss": 0.0614, + "step": 21600 + }, + { + "epoch": 1.01, + "learning_rate": 1.677205963036697e-05, + "loss": 0.1719, + "step": 21605 + }, + { + "epoch": 1.01, + "learning_rate": 1.6771275845312184e-05, + "loss": 0.1739, + "step": 21610 + }, + { + "epoch": 1.01, + "learning_rate": 1.6770492060257398e-05, + "loss": 0.2605, + "step": 21615 + }, + { + "epoch": 1.01, + "learning_rate": 1.676970827520261e-05, + "loss": 0.2799, + "step": 21620 + }, + { + "epoch": 1.01, + "learning_rate": 1.6768924490147823e-05, + "loss": 0.3259, + "step": 21625 + }, + { + "epoch": 1.01, + "learning_rate": 1.6768140705093037e-05, + "loss": 0.3949, + "step": 21630 + }, + { + "epoch": 1.01, + "learning_rate": 1.676735692003825e-05, + "loss": 0.4475, + "step": 21635 + }, + { + "epoch": 1.01, + "learning_rate": 1.6766573134983464e-05, + "loss": 0.067, + "step": 21640 + }, + { + "epoch": 1.01, + "learning_rate": 1.6765789349928675e-05, + "loss": 0.0889, + "step": 21645 + }, + { + "epoch": 1.01, + "learning_rate": 1.6765005564873892e-05, + "loss": 0.0926, + "step": 21650 + }, + { + "epoch": 1.01, + "learning_rate": 1.6764221779819103e-05, + "loss": 0.169, + "step": 21655 + }, + { + "epoch": 1.01, + "learning_rate": 1.6763437994764317e-05, + "loss": 0.1749, + "step": 21660 + }, + { + "epoch": 1.01, + "learning_rate": 1.676265420970953e-05, + "loss": 0.2628, + "step": 21665 + }, + { + "epoch": 1.01, + "learning_rate": 1.6761870424654744e-05, + "loss": 0.2144, + "step": 21670 + }, + { + "epoch": 1.01, + "learning_rate": 1.676108663959996e-05, + "loss": 0.274, + "step": 21675 + }, + { + "epoch": 1.01, + "learning_rate": 1.6760302854545172e-05, + "loss": 0.3579, + "step": 21680 + }, + { + "epoch": 1.01, + "learning_rate": 1.6759519069490383e-05, + "loss": 0.3807, + "step": 21685 + }, + { + "epoch": 1.01, + "learning_rate": 1.67587352844356e-05, + "loss": 0.1214, + "step": 21690 + }, + { + "epoch": 1.01, + "learning_rate": 1.675795149938081e-05, + "loss": 0.1011, + "step": 21695 + }, + { + "epoch": 1.01, + "learning_rate": 1.6757167714326024e-05, + "loss": 0.0696, + "step": 21700 + }, + { + "epoch": 1.01, + "learning_rate": 1.675638392927124e-05, + "loss": 0.1463, + "step": 21705 + }, + { + "epoch": 1.01, + "learning_rate": 1.6755600144216452e-05, + "loss": 0.1416, + "step": 21710 + }, + { + "epoch": 1.01, + "learning_rate": 1.6754816359161666e-05, + "loss": 0.128, + "step": 21715 + }, + { + "epoch": 1.01, + "learning_rate": 1.6754032574106877e-05, + "loss": 0.1893, + "step": 21720 + }, + { + "epoch": 1.01, + "learning_rate": 1.675324878905209e-05, + "loss": 0.3064, + "step": 21725 + }, + { + "epoch": 1.01, + "learning_rate": 1.6752465003997305e-05, + "loss": 0.3098, + "step": 21730 + }, + { + "epoch": 1.01, + "learning_rate": 1.675168121894252e-05, + "loss": 0.3172, + "step": 21735 + }, + { + "epoch": 1.01, + "learning_rate": 1.6750897433887732e-05, + "loss": 0.0526, + "step": 21740 + }, + { + "epoch": 1.01, + "learning_rate": 1.6750113648832946e-05, + "loss": 0.0767, + "step": 21745 + }, + { + "epoch": 1.01, + "learning_rate": 1.674932986377816e-05, + "loss": 0.105, + "step": 21750 + }, + { + "epoch": 1.02, + "learning_rate": 1.6748546078723374e-05, + "loss": 0.128, + "step": 21755 + }, + { + "epoch": 1.02, + "learning_rate": 1.6747762293668585e-05, + "loss": 0.1071, + "step": 21760 + }, + { + "epoch": 1.02, + "learning_rate": 1.67469785086138e-05, + "loss": 0.1624, + "step": 21765 + }, + { + "epoch": 1.02, + "learning_rate": 1.6746194723559012e-05, + "loss": 0.3446, + "step": 21770 + }, + { + "epoch": 1.02, + "learning_rate": 1.6745410938504226e-05, + "loss": 0.3188, + "step": 21775 + }, + { + "epoch": 1.02, + "learning_rate": 1.674462715344944e-05, + "loss": 0.5742, + "step": 21780 + }, + { + "epoch": 1.02, + "learning_rate": 1.674384336839465e-05, + "loss": 0.2419, + "step": 21785 + }, + { + "epoch": 1.02, + "learning_rate": 1.6743059583339868e-05, + "loss": 0.0936, + "step": 21790 + }, + { + "epoch": 1.02, + "learning_rate": 1.674227579828508e-05, + "loss": 0.0601, + "step": 21795 + }, + { + "epoch": 1.02, + "learning_rate": 1.6741492013230292e-05, + "loss": 0.0906, + "step": 21800 + }, + { + "epoch": 1.02, + "learning_rate": 1.6740708228175506e-05, + "loss": 0.104, + "step": 21805 + }, + { + "epoch": 1.02, + "learning_rate": 1.673992444312072e-05, + "loss": 0.1736, + "step": 21810 + }, + { + "epoch": 1.02, + "learning_rate": 1.6739140658065934e-05, + "loss": 0.1642, + "step": 21815 + }, + { + "epoch": 1.02, + "learning_rate": 1.6738356873011148e-05, + "loss": 0.194, + "step": 21820 + }, + { + "epoch": 1.02, + "learning_rate": 1.673757308795636e-05, + "loss": 0.1698, + "step": 21825 + }, + { + "epoch": 1.02, + "learning_rate": 1.6736789302901572e-05, + "loss": 0.5151, + "step": 21830 + }, + { + "epoch": 1.02, + "learning_rate": 1.6736005517846786e-05, + "loss": 0.4388, + "step": 21835 + }, + { + "epoch": 1.02, + "learning_rate": 1.6735221732792e-05, + "loss": 0.0625, + "step": 21840 + }, + { + "epoch": 1.02, + "learning_rate": 1.6734437947737214e-05, + "loss": 0.0551, + "step": 21845 + }, + { + "epoch": 1.02, + "learning_rate": 1.6733654162682428e-05, + "loss": 0.086, + "step": 21850 + }, + { + "epoch": 1.02, + "learning_rate": 1.6732870377627642e-05, + "loss": 0.107, + "step": 21855 + }, + { + "epoch": 1.02, + "learning_rate": 1.6732086592572853e-05, + "loss": 0.1138, + "step": 21860 + }, + { + "epoch": 1.02, + "learning_rate": 1.673130280751807e-05, + "loss": 0.1885, + "step": 21865 + }, + { + "epoch": 1.02, + "learning_rate": 1.673051902246328e-05, + "loss": 0.2082, + "step": 21870 + }, + { + "epoch": 1.02, + "learning_rate": 1.6729735237408494e-05, + "loss": 0.2015, + "step": 21875 + }, + { + "epoch": 1.02, + "learning_rate": 1.6728951452353708e-05, + "loss": 0.4405, + "step": 21880 + }, + { + "epoch": 1.02, + "learning_rate": 1.6728167667298922e-05, + "loss": 0.2858, + "step": 21885 + }, + { + "epoch": 1.02, + "learning_rate": 1.6727383882244136e-05, + "loss": 0.0538, + "step": 21890 + }, + { + "epoch": 1.02, + "learning_rate": 1.6726600097189346e-05, + "loss": 0.0779, + "step": 21895 + }, + { + "epoch": 1.02, + "learning_rate": 1.672581631213456e-05, + "loss": 0.142, + "step": 21900 + }, + { + "epoch": 1.02, + "learning_rate": 1.6725032527079774e-05, + "loss": 0.1888, + "step": 21905 + }, + { + "epoch": 1.02, + "learning_rate": 1.6724248742024988e-05, + "loss": 0.1195, + "step": 21910 + }, + { + "epoch": 1.02, + "learning_rate": 1.6723464956970202e-05, + "loss": 0.2157, + "step": 21915 + }, + { + "epoch": 1.02, + "learning_rate": 1.6722681171915416e-05, + "loss": 0.2258, + "step": 21920 + }, + { + "epoch": 1.02, + "learning_rate": 1.672189738686063e-05, + "loss": 0.3311, + "step": 21925 + }, + { + "epoch": 1.02, + "learning_rate": 1.6721113601805844e-05, + "loss": 0.3652, + "step": 21930 + }, + { + "epoch": 1.02, + "learning_rate": 1.6720329816751054e-05, + "loss": 0.3514, + "step": 21935 + }, + { + "epoch": 1.02, + "learning_rate": 1.6719546031696268e-05, + "loss": 0.0737, + "step": 21940 + }, + { + "epoch": 1.02, + "learning_rate": 1.6718762246641482e-05, + "loss": 0.0702, + "step": 21945 + }, + { + "epoch": 1.02, + "learning_rate": 1.6717978461586696e-05, + "loss": 0.1228, + "step": 21950 + }, + { + "epoch": 1.02, + "learning_rate": 1.671719467653191e-05, + "loss": 0.1717, + "step": 21955 + }, + { + "epoch": 1.02, + "learning_rate": 1.671641089147712e-05, + "loss": 0.1327, + "step": 21960 + }, + { + "epoch": 1.02, + "learning_rate": 1.6715627106422338e-05, + "loss": 0.18, + "step": 21965 + }, + { + "epoch": 1.03, + "learning_rate": 1.6714843321367548e-05, + "loss": 0.1339, + "step": 21970 + }, + { + "epoch": 1.03, + "learning_rate": 1.6714059536312762e-05, + "loss": 0.4464, + "step": 21975 + }, + { + "epoch": 1.03, + "learning_rate": 1.6713275751257976e-05, + "loss": 0.4614, + "step": 21980 + }, + { + "epoch": 1.03, + "learning_rate": 1.671249196620319e-05, + "loss": 0.4731, + "step": 21985 + }, + { + "epoch": 1.03, + "learning_rate": 1.6711708181148404e-05, + "loss": 0.0743, + "step": 21990 + }, + { + "epoch": 1.03, + "learning_rate": 1.6710924396093618e-05, + "loss": 0.0772, + "step": 21995 + }, + { + "epoch": 1.03, + "learning_rate": 1.671014061103883e-05, + "loss": 0.1295, + "step": 22000 + }, + { + "epoch": 1.03, + "learning_rate": 1.6709356825984046e-05, + "loss": 0.1665, + "step": 22005 + }, + { + "epoch": 1.03, + "learning_rate": 1.6708573040929256e-05, + "loss": 0.1362, + "step": 22010 + }, + { + "epoch": 1.03, + "learning_rate": 1.670778925587447e-05, + "loss": 0.1636, + "step": 22015 + }, + { + "epoch": 1.03, + "learning_rate": 1.6707005470819684e-05, + "loss": 0.2899, + "step": 22020 + }, + { + "epoch": 1.03, + "learning_rate": 1.6706221685764898e-05, + "loss": 0.2897, + "step": 22025 + }, + { + "epoch": 1.03, + "learning_rate": 1.6705437900710112e-05, + "loss": 0.3886, + "step": 22030 + }, + { + "epoch": 1.03, + "learning_rate": 1.6704654115655322e-05, + "loss": 0.2882, + "step": 22035 + }, + { + "epoch": 1.03, + "learning_rate": 1.6703870330600536e-05, + "loss": 0.0652, + "step": 22040 + }, + { + "epoch": 1.03, + "learning_rate": 1.670308654554575e-05, + "loss": 0.0749, + "step": 22045 + }, + { + "epoch": 1.03, + "learning_rate": 1.6702302760490964e-05, + "loss": 0.0939, + "step": 22050 + }, + { + "epoch": 1.03, + "learning_rate": 1.6701518975436178e-05, + "loss": 0.1791, + "step": 22055 + }, + { + "epoch": 1.03, + "learning_rate": 1.6700735190381392e-05, + "loss": 0.1139, + "step": 22060 + }, + { + "epoch": 1.03, + "learning_rate": 1.6699951405326606e-05, + "loss": 0.2075, + "step": 22065 + }, + { + "epoch": 1.03, + "learning_rate": 1.669916762027182e-05, + "loss": 0.2175, + "step": 22070 + }, + { + "epoch": 1.03, + "learning_rate": 1.669838383521703e-05, + "loss": 0.2589, + "step": 22075 + }, + { + "epoch": 1.03, + "learning_rate": 1.6697600050162247e-05, + "loss": 0.42, + "step": 22080 + }, + { + "epoch": 1.03, + "learning_rate": 1.6696816265107458e-05, + "loss": 0.3279, + "step": 22085 + }, + { + "epoch": 1.03, + "learning_rate": 1.6696032480052672e-05, + "loss": 0.0295, + "step": 22090 + }, + { + "epoch": 1.03, + "learning_rate": 1.6695248694997886e-05, + "loss": 0.0318, + "step": 22095 + }, + { + "epoch": 1.03, + "learning_rate": 1.6694464909943096e-05, + "loss": 0.1887, + "step": 22100 + }, + { + "epoch": 1.03, + "learning_rate": 1.6693681124888314e-05, + "loss": 0.1017, + "step": 22105 + }, + { + "epoch": 1.03, + "learning_rate": 1.6692897339833524e-05, + "loss": 0.145, + "step": 22110 + }, + { + "epoch": 1.03, + "learning_rate": 1.6692113554778738e-05, + "loss": 0.195, + "step": 22115 + }, + { + "epoch": 1.03, + "learning_rate": 1.6691329769723952e-05, + "loss": 0.2584, + "step": 22120 + }, + { + "epoch": 1.03, + "learning_rate": 1.6690545984669166e-05, + "loss": 0.2965, + "step": 22125 + }, + { + "epoch": 1.03, + "learning_rate": 1.668976219961438e-05, + "loss": 0.3912, + "step": 22130 + }, + { + "epoch": 1.03, + "learning_rate": 1.6688978414559594e-05, + "loss": 0.3964, + "step": 22135 + }, + { + "epoch": 1.03, + "learning_rate": 1.6688194629504804e-05, + "loss": 0.0447, + "step": 22140 + }, + { + "epoch": 1.03, + "learning_rate": 1.668741084445002e-05, + "loss": 0.1708, + "step": 22145 + }, + { + "epoch": 1.03, + "learning_rate": 1.6686627059395232e-05, + "loss": 0.1072, + "step": 22150 + }, + { + "epoch": 1.03, + "learning_rate": 1.6685843274340446e-05, + "loss": 0.1223, + "step": 22155 + }, + { + "epoch": 1.03, + "learning_rate": 1.668505948928566e-05, + "loss": 0.1006, + "step": 22160 + }, + { + "epoch": 1.03, + "learning_rate": 1.6684275704230874e-05, + "loss": 0.1096, + "step": 22165 + }, + { + "epoch": 1.03, + "learning_rate": 1.6683491919176088e-05, + "loss": 0.2708, + "step": 22170 + }, + { + "epoch": 1.03, + "learning_rate": 1.6682708134121298e-05, + "loss": 0.3093, + "step": 22175 + }, + { + "epoch": 1.03, + "learning_rate": 1.6681924349066515e-05, + "loss": 0.3701, + "step": 22180 + }, + { + "epoch": 1.04, + "learning_rate": 1.6681140564011726e-05, + "loss": 0.251, + "step": 22185 + }, + { + "epoch": 1.04, + "learning_rate": 1.668035677895694e-05, + "loss": 0.0674, + "step": 22190 + }, + { + "epoch": 1.04, + "learning_rate": 1.6679572993902154e-05, + "loss": 0.1336, + "step": 22195 + }, + { + "epoch": 1.04, + "learning_rate": 1.6678789208847368e-05, + "loss": 0.136, + "step": 22200 + }, + { + "epoch": 1.04, + "learning_rate": 1.667800542379258e-05, + "loss": 0.0998, + "step": 22205 + }, + { + "epoch": 1.04, + "learning_rate": 1.6677221638737795e-05, + "loss": 0.13, + "step": 22210 + }, + { + "epoch": 1.04, + "learning_rate": 1.6676437853683006e-05, + "loss": 0.1497, + "step": 22215 + }, + { + "epoch": 1.04, + "learning_rate": 1.667565406862822e-05, + "loss": 0.1846, + "step": 22220 + }, + { + "epoch": 1.04, + "learning_rate": 1.6674870283573434e-05, + "loss": 0.2499, + "step": 22225 + }, + { + "epoch": 1.04, + "learning_rate": 1.6674086498518648e-05, + "loss": 0.4191, + "step": 22230 + }, + { + "epoch": 1.04, + "learning_rate": 1.667330271346386e-05, + "loss": 0.3157, + "step": 22235 + }, + { + "epoch": 1.04, + "learning_rate": 1.6672518928409075e-05, + "loss": 0.0555, + "step": 22240 + }, + { + "epoch": 1.04, + "learning_rate": 1.667173514335429e-05, + "loss": 0.1057, + "step": 22245 + }, + { + "epoch": 1.04, + "learning_rate": 1.66709513582995e-05, + "loss": 0.0907, + "step": 22250 + }, + { + "epoch": 1.04, + "learning_rate": 1.6670167573244714e-05, + "loss": 0.1564, + "step": 22255 + }, + { + "epoch": 1.04, + "learning_rate": 1.6669383788189928e-05, + "loss": 0.1607, + "step": 22260 + }, + { + "epoch": 1.04, + "learning_rate": 1.666860000313514e-05, + "loss": 0.1959, + "step": 22265 + }, + { + "epoch": 1.04, + "learning_rate": 1.6667816218080356e-05, + "loss": 0.2283, + "step": 22270 + }, + { + "epoch": 1.04, + "learning_rate": 1.666703243302557e-05, + "loss": 0.3232, + "step": 22275 + }, + { + "epoch": 1.04, + "learning_rate": 1.6666248647970783e-05, + "loss": 0.344, + "step": 22280 + }, + { + "epoch": 1.04, + "learning_rate": 1.6665464862915994e-05, + "loss": 0.2879, + "step": 22285 + }, + { + "epoch": 1.04, + "learning_rate": 1.6664681077861208e-05, + "loss": 0.077, + "step": 22290 + }, + { + "epoch": 1.04, + "learning_rate": 1.666389729280642e-05, + "loss": 0.0681, + "step": 22295 + }, + { + "epoch": 1.04, + "learning_rate": 1.6663113507751636e-05, + "loss": 0.0788, + "step": 22300 + }, + { + "epoch": 1.04, + "learning_rate": 1.666232972269685e-05, + "loss": 0.1024, + "step": 22305 + }, + { + "epoch": 1.04, + "learning_rate": 1.6661545937642063e-05, + "loss": 0.1455, + "step": 22310 + }, + { + "epoch": 1.04, + "learning_rate": 1.6660762152587274e-05, + "loss": 0.2212, + "step": 22315 + }, + { + "epoch": 1.04, + "learning_rate": 1.665997836753249e-05, + "loss": 0.1622, + "step": 22320 + }, + { + "epoch": 1.04, + "learning_rate": 1.6659194582477702e-05, + "loss": 0.257, + "step": 22325 + }, + { + "epoch": 1.04, + "learning_rate": 1.6658410797422916e-05, + "loss": 0.386, + "step": 22330 + }, + { + "epoch": 1.04, + "learning_rate": 1.665762701236813e-05, + "loss": 0.3866, + "step": 22335 + }, + { + "epoch": 1.04, + "learning_rate": 1.6656843227313343e-05, + "loss": 0.068, + "step": 22340 + }, + { + "epoch": 1.04, + "learning_rate": 1.6656059442258557e-05, + "loss": 0.1259, + "step": 22345 + }, + { + "epoch": 1.04, + "learning_rate": 1.6655275657203768e-05, + "loss": 0.1199, + "step": 22350 + }, + { + "epoch": 1.04, + "learning_rate": 1.6654491872148982e-05, + "loss": 0.1675, + "step": 22355 + }, + { + "epoch": 1.04, + "learning_rate": 1.6653708087094196e-05, + "loss": 0.1525, + "step": 22360 + }, + { + "epoch": 1.04, + "learning_rate": 1.665292430203941e-05, + "loss": 0.1875, + "step": 22365 + }, + { + "epoch": 1.04, + "learning_rate": 1.6652140516984623e-05, + "loss": 0.2257, + "step": 22370 + }, + { + "epoch": 1.04, + "learning_rate": 1.6651356731929837e-05, + "loss": 0.195, + "step": 22375 + }, + { + "epoch": 1.04, + "learning_rate": 1.665057294687505e-05, + "loss": 0.3703, + "step": 22380 + }, + { + "epoch": 1.04, + "learning_rate": 1.6649789161820265e-05, + "loss": 0.3111, + "step": 22385 + }, + { + "epoch": 1.04, + "learning_rate": 1.6649005376765476e-05, + "loss": 0.0501, + "step": 22390 + }, + { + "epoch": 1.04, + "learning_rate": 1.6648221591710693e-05, + "loss": 0.1009, + "step": 22395 + }, + { + "epoch": 1.05, + "learning_rate": 1.6647437806655904e-05, + "loss": 0.1188, + "step": 22400 + }, + { + "epoch": 1.05, + "learning_rate": 1.6646654021601117e-05, + "loss": 0.1223, + "step": 22405 + }, + { + "epoch": 1.05, + "learning_rate": 1.664587023654633e-05, + "loss": 0.2532, + "step": 22410 + }, + { + "epoch": 1.05, + "learning_rate": 1.6645086451491542e-05, + "loss": 0.2103, + "step": 22415 + }, + { + "epoch": 1.05, + "learning_rate": 1.664430266643676e-05, + "loss": 0.2827, + "step": 22420 + }, + { + "epoch": 1.05, + "learning_rate": 1.664351888138197e-05, + "loss": 0.3525, + "step": 22425 + }, + { + "epoch": 1.05, + "learning_rate": 1.6642735096327184e-05, + "loss": 0.4255, + "step": 22430 + }, + { + "epoch": 1.05, + "learning_rate": 1.6641951311272397e-05, + "loss": 0.3211, + "step": 22435 + }, + { + "epoch": 1.05, + "learning_rate": 1.664116752621761e-05, + "loss": 0.0945, + "step": 22440 + }, + { + "epoch": 1.05, + "learning_rate": 1.6640383741162825e-05, + "loss": 0.0941, + "step": 22445 + }, + { + "epoch": 1.05, + "learning_rate": 1.663959995610804e-05, + "loss": 0.1018, + "step": 22450 + }, + { + "epoch": 1.05, + "learning_rate": 1.663881617105325e-05, + "loss": 0.1377, + "step": 22455 + }, + { + "epoch": 1.05, + "learning_rate": 1.6638032385998467e-05, + "loss": 0.1353, + "step": 22460 + }, + { + "epoch": 1.05, + "learning_rate": 1.6637248600943678e-05, + "loss": 0.1506, + "step": 22465 + }, + { + "epoch": 1.05, + "learning_rate": 1.663646481588889e-05, + "loss": 0.2194, + "step": 22470 + }, + { + "epoch": 1.05, + "learning_rate": 1.6635681030834105e-05, + "loss": 0.2673, + "step": 22475 + }, + { + "epoch": 1.05, + "learning_rate": 1.663489724577932e-05, + "loss": 0.4444, + "step": 22480 + }, + { + "epoch": 1.05, + "learning_rate": 1.6634113460724533e-05, + "loss": 0.4203, + "step": 22485 + }, + { + "epoch": 1.05, + "learning_rate": 1.6633329675669744e-05, + "loss": 0.0998, + "step": 22490 + }, + { + "epoch": 1.05, + "learning_rate": 1.663254589061496e-05, + "loss": 0.0645, + "step": 22495 + }, + { + "epoch": 1.05, + "learning_rate": 1.663176210556017e-05, + "loss": 0.0815, + "step": 22500 + }, + { + "epoch": 1.05, + "learning_rate": 1.6630978320505385e-05, + "loss": 0.1239, + "step": 22505 + }, + { + "epoch": 1.05, + "learning_rate": 1.66301945354506e-05, + "loss": 0.1158, + "step": 22510 + }, + { + "epoch": 1.05, + "learning_rate": 1.6629410750395813e-05, + "loss": 0.1965, + "step": 22515 + }, + { + "epoch": 1.05, + "learning_rate": 1.6628626965341027e-05, + "loss": 0.2295, + "step": 22520 + }, + { + "epoch": 1.05, + "learning_rate": 1.662784318028624e-05, + "loss": 0.3215, + "step": 22525 + }, + { + "epoch": 1.05, + "learning_rate": 1.662705939523145e-05, + "loss": 0.3832, + "step": 22530 + }, + { + "epoch": 1.05, + "learning_rate": 1.662627561017667e-05, + "loss": 0.3457, + "step": 22535 + }, + { + "epoch": 1.05, + "learning_rate": 1.662549182512188e-05, + "loss": 0.1041, + "step": 22540 + }, + { + "epoch": 1.05, + "learning_rate": 1.6624708040067093e-05, + "loss": 0.0946, + "step": 22545 + }, + { + "epoch": 1.05, + "learning_rate": 1.6623924255012307e-05, + "loss": 0.0797, + "step": 22550 + }, + { + "epoch": 1.05, + "learning_rate": 1.662314046995752e-05, + "loss": 0.1227, + "step": 22555 + }, + { + "epoch": 1.05, + "learning_rate": 1.6622356684902735e-05, + "loss": 0.1388, + "step": 22560 + }, + { + "epoch": 1.05, + "learning_rate": 1.6621572899847945e-05, + "loss": 0.1356, + "step": 22565 + }, + { + "epoch": 1.05, + "learning_rate": 1.662078911479316e-05, + "loss": 0.1936, + "step": 22570 + }, + { + "epoch": 1.05, + "learning_rate": 1.6620005329738373e-05, + "loss": 0.3707, + "step": 22575 + }, + { + "epoch": 1.05, + "learning_rate": 1.6619221544683587e-05, + "loss": 0.4417, + "step": 22580 + }, + { + "epoch": 1.05, + "learning_rate": 1.66184377596288e-05, + "loss": 0.4056, + "step": 22585 + }, + { + "epoch": 1.05, + "learning_rate": 1.6617653974574015e-05, + "loss": 0.1009, + "step": 22590 + }, + { + "epoch": 1.05, + "learning_rate": 1.661687018951923e-05, + "loss": 0.1221, + "step": 22595 + }, + { + "epoch": 1.05, + "learning_rate": 1.6616086404464443e-05, + "loss": 0.1144, + "step": 22600 + }, + { + "epoch": 1.05, + "learning_rate": 1.6615302619409653e-05, + "loss": 0.1499, + "step": 22605 + }, + { + "epoch": 1.06, + "learning_rate": 1.6614518834354867e-05, + "loss": 0.1002, + "step": 22610 + }, + { + "epoch": 1.06, + "learning_rate": 1.661373504930008e-05, + "loss": 0.1595, + "step": 22615 + }, + { + "epoch": 1.06, + "learning_rate": 1.6612951264245295e-05, + "loss": 0.2121, + "step": 22620 + }, + { + "epoch": 1.06, + "learning_rate": 1.661216747919051e-05, + "loss": 0.2338, + "step": 22625 + }, + { + "epoch": 1.06, + "learning_rate": 1.661138369413572e-05, + "loss": 0.375, + "step": 22630 + }, + { + "epoch": 1.06, + "learning_rate": 1.6610599909080937e-05, + "loss": 0.2275, + "step": 22635 + }, + { + "epoch": 1.06, + "learning_rate": 1.6609816124026147e-05, + "loss": 0.0468, + "step": 22640 + }, + { + "epoch": 1.06, + "learning_rate": 1.660903233897136e-05, + "loss": 0.0573, + "step": 22645 + }, + { + "epoch": 1.06, + "learning_rate": 1.6608248553916575e-05, + "loss": 0.0989, + "step": 22650 + }, + { + "epoch": 1.06, + "learning_rate": 1.660746476886179e-05, + "loss": 0.1472, + "step": 22655 + }, + { + "epoch": 1.06, + "learning_rate": 1.6606680983807003e-05, + "loss": 0.1244, + "step": 22660 + }, + { + "epoch": 1.06, + "learning_rate": 1.6605897198752217e-05, + "loss": 0.2461, + "step": 22665 + }, + { + "epoch": 1.06, + "learning_rate": 1.6605113413697427e-05, + "loss": 0.1669, + "step": 22670 + }, + { + "epoch": 1.06, + "learning_rate": 1.660432962864264e-05, + "loss": 0.2483, + "step": 22675 + }, + { + "epoch": 1.06, + "learning_rate": 1.6603545843587855e-05, + "loss": 0.3463, + "step": 22680 + }, + { + "epoch": 1.06, + "learning_rate": 1.660276205853307e-05, + "loss": 0.3456, + "step": 22685 + }, + { + "epoch": 1.06, + "learning_rate": 1.6601978273478283e-05, + "loss": 0.0599, + "step": 22690 + }, + { + "epoch": 1.06, + "learning_rate": 1.6601194488423497e-05, + "loss": 0.0639, + "step": 22695 + }, + { + "epoch": 1.06, + "learning_rate": 1.660041070336871e-05, + "loss": 0.1262, + "step": 22700 + }, + { + "epoch": 1.06, + "learning_rate": 1.659962691831392e-05, + "loss": 0.1424, + "step": 22705 + }, + { + "epoch": 1.06, + "learning_rate": 1.659884313325914e-05, + "loss": 0.1273, + "step": 22710 + }, + { + "epoch": 1.06, + "learning_rate": 1.659805934820435e-05, + "loss": 0.2208, + "step": 22715 + }, + { + "epoch": 1.06, + "learning_rate": 1.6597275563149563e-05, + "loss": 0.1814, + "step": 22720 + }, + { + "epoch": 1.06, + "learning_rate": 1.6596491778094777e-05, + "loss": 0.2325, + "step": 22725 + }, + { + "epoch": 1.06, + "learning_rate": 1.659570799303999e-05, + "loss": 0.4139, + "step": 22730 + }, + { + "epoch": 1.06, + "learning_rate": 1.6594924207985205e-05, + "loss": 0.2675, + "step": 22735 + }, + { + "epoch": 1.06, + "learning_rate": 1.6594140422930415e-05, + "loss": 0.0246, + "step": 22740 + }, + { + "epoch": 1.06, + "learning_rate": 1.659335663787563e-05, + "loss": 0.0829, + "step": 22745 + }, + { + "epoch": 1.06, + "learning_rate": 1.6592572852820843e-05, + "loss": 0.1863, + "step": 22750 + }, + { + "epoch": 1.06, + "learning_rate": 1.6591789067766057e-05, + "loss": 0.1843, + "step": 22755 + }, + { + "epoch": 1.06, + "learning_rate": 1.659100528271127e-05, + "loss": 0.1143, + "step": 22760 + }, + { + "epoch": 1.06, + "learning_rate": 1.6590221497656485e-05, + "loss": 0.1305, + "step": 22765 + }, + { + "epoch": 1.06, + "learning_rate": 1.65894377126017e-05, + "loss": 0.229, + "step": 22770 + }, + { + "epoch": 1.06, + "learning_rate": 1.6588653927546913e-05, + "loss": 0.2498, + "step": 22775 + }, + { + "epoch": 1.06, + "learning_rate": 1.6587870142492123e-05, + "loss": 0.3766, + "step": 22780 + }, + { + "epoch": 1.06, + "learning_rate": 1.6587086357437337e-05, + "loss": 0.2961, + "step": 22785 + }, + { + "epoch": 1.06, + "learning_rate": 1.658630257238255e-05, + "loss": 0.069, + "step": 22790 + }, + { + "epoch": 1.06, + "learning_rate": 1.6585518787327765e-05, + "loss": 0.0735, + "step": 22795 + }, + { + "epoch": 1.06, + "learning_rate": 1.658473500227298e-05, + "loss": 0.1352, + "step": 22800 + }, + { + "epoch": 1.06, + "learning_rate": 1.658395121721819e-05, + "loss": 0.1563, + "step": 22805 + }, + { + "epoch": 1.06, + "learning_rate": 1.6583167432163407e-05, + "loss": 0.155, + "step": 22810 + }, + { + "epoch": 1.06, + "learning_rate": 1.6582383647108617e-05, + "loss": 0.2341, + "step": 22815 + }, + { + "epoch": 1.06, + "learning_rate": 1.658159986205383e-05, + "loss": 0.22, + "step": 22820 + }, + { + "epoch": 1.07, + "learning_rate": 1.6580816076999045e-05, + "loss": 0.2533, + "step": 22825 + }, + { + "epoch": 1.07, + "learning_rate": 1.658003229194426e-05, + "loss": 0.5077, + "step": 22830 + }, + { + "epoch": 1.07, + "learning_rate": 1.6579248506889473e-05, + "loss": 0.49, + "step": 22835 + }, + { + "epoch": 1.07, + "learning_rate": 1.6578464721834687e-05, + "loss": 0.166, + "step": 22840 + }, + { + "epoch": 1.07, + "learning_rate": 1.6577680936779897e-05, + "loss": 0.0973, + "step": 22845 + }, + { + "epoch": 1.07, + "learning_rate": 1.6576897151725114e-05, + "loss": 0.0986, + "step": 22850 + }, + { + "epoch": 1.07, + "learning_rate": 1.6576113366670325e-05, + "loss": 0.1956, + "step": 22855 + }, + { + "epoch": 1.07, + "learning_rate": 1.657532958161554e-05, + "loss": 0.2051, + "step": 22860 + }, + { + "epoch": 1.07, + "learning_rate": 1.6574545796560753e-05, + "loss": 0.1668, + "step": 22865 + }, + { + "epoch": 1.07, + "learning_rate": 1.6573762011505967e-05, + "loss": 0.2645, + "step": 22870 + }, + { + "epoch": 1.07, + "learning_rate": 1.657297822645118e-05, + "loss": 0.2412, + "step": 22875 + }, + { + "epoch": 1.07, + "learning_rate": 1.657219444139639e-05, + "loss": 0.36, + "step": 22880 + }, + { + "epoch": 1.07, + "learning_rate": 1.6571410656341605e-05, + "loss": 0.3128, + "step": 22885 + }, + { + "epoch": 1.07, + "learning_rate": 1.657062687128682e-05, + "loss": 0.0542, + "step": 22890 + }, + { + "epoch": 1.07, + "learning_rate": 1.6569843086232033e-05, + "loss": 0.0474, + "step": 22895 + }, + { + "epoch": 1.07, + "learning_rate": 1.6569059301177247e-05, + "loss": 0.1289, + "step": 22900 + }, + { + "epoch": 1.07, + "learning_rate": 1.656827551612246e-05, + "loss": 0.0668, + "step": 22905 + }, + { + "epoch": 1.07, + "learning_rate": 1.6567491731067674e-05, + "loss": 0.2174, + "step": 22910 + }, + { + "epoch": 1.07, + "learning_rate": 1.656670794601289e-05, + "loss": 0.1433, + "step": 22915 + }, + { + "epoch": 1.07, + "learning_rate": 1.65659241609581e-05, + "loss": 0.1519, + "step": 22920 + }, + { + "epoch": 1.07, + "learning_rate": 1.6565140375903316e-05, + "loss": 0.2903, + "step": 22925 + }, + { + "epoch": 1.07, + "learning_rate": 1.6564356590848527e-05, + "loss": 0.4346, + "step": 22930 + }, + { + "epoch": 1.07, + "learning_rate": 1.656357280579374e-05, + "loss": 0.3024, + "step": 22935 + }, + { + "epoch": 1.07, + "learning_rate": 1.6562789020738955e-05, + "loss": 0.031, + "step": 22940 + }, + { + "epoch": 1.07, + "learning_rate": 1.6562005235684165e-05, + "loss": 0.1083, + "step": 22945 + }, + { + "epoch": 1.07, + "learning_rate": 1.6561221450629382e-05, + "loss": 0.0789, + "step": 22950 + }, + { + "epoch": 1.07, + "learning_rate": 1.6560437665574593e-05, + "loss": 0.193, + "step": 22955 + }, + { + "epoch": 1.07, + "learning_rate": 1.6559653880519807e-05, + "loss": 0.2532, + "step": 22960 + }, + { + "epoch": 1.07, + "learning_rate": 1.655887009546502e-05, + "loss": 0.2134, + "step": 22965 + }, + { + "epoch": 1.07, + "learning_rate": 1.6558086310410235e-05, + "loss": 0.2538, + "step": 22970 + }, + { + "epoch": 1.07, + "learning_rate": 1.655730252535545e-05, + "loss": 0.2492, + "step": 22975 + }, + { + "epoch": 1.07, + "learning_rate": 1.6556518740300662e-05, + "loss": 0.4392, + "step": 22980 + }, + { + "epoch": 1.07, + "learning_rate": 1.6555734955245873e-05, + "loss": 0.2968, + "step": 22985 + }, + { + "epoch": 1.07, + "learning_rate": 1.655495117019109e-05, + "loss": 0.0663, + "step": 22990 + }, + { + "epoch": 1.07, + "learning_rate": 1.65541673851363e-05, + "loss": 0.1486, + "step": 22995 + }, + { + "epoch": 1.07, + "learning_rate": 1.6553383600081515e-05, + "loss": 0.0821, + "step": 23000 + }, + { + "epoch": 1.07, + "learning_rate": 1.655259981502673e-05, + "loss": 0.1134, + "step": 23005 + }, + { + "epoch": 1.07, + "learning_rate": 1.6551816029971942e-05, + "loss": 0.2011, + "step": 23010 + }, + { + "epoch": 1.07, + "learning_rate": 1.6551032244917156e-05, + "loss": 0.2416, + "step": 23015 + }, + { + "epoch": 1.07, + "learning_rate": 1.6550248459862367e-05, + "loss": 0.2838, + "step": 23020 + }, + { + "epoch": 1.07, + "learning_rate": 1.6549464674807584e-05, + "loss": 0.285, + "step": 23025 + }, + { + "epoch": 1.07, + "learning_rate": 1.6548680889752795e-05, + "loss": 0.3597, + "step": 23030 + }, + { + "epoch": 1.07, + "learning_rate": 1.654789710469801e-05, + "loss": 0.4682, + "step": 23035 + }, + { + "epoch": 1.08, + "learning_rate": 1.6547113319643222e-05, + "loss": 0.0424, + "step": 23040 + }, + { + "epoch": 1.08, + "learning_rate": 1.6546329534588436e-05, + "loss": 0.0739, + "step": 23045 + }, + { + "epoch": 1.08, + "learning_rate": 1.654554574953365e-05, + "loss": 0.0871, + "step": 23050 + }, + { + "epoch": 1.08, + "learning_rate": 1.6544761964478864e-05, + "loss": 0.0759, + "step": 23055 + }, + { + "epoch": 1.08, + "learning_rate": 1.6543978179424075e-05, + "loss": 0.1438, + "step": 23060 + }, + { + "epoch": 1.08, + "learning_rate": 1.654319439436929e-05, + "loss": 0.1683, + "step": 23065 + }, + { + "epoch": 1.08, + "learning_rate": 1.6542410609314503e-05, + "loss": 0.2168, + "step": 23070 + }, + { + "epoch": 1.08, + "learning_rate": 1.6541626824259716e-05, + "loss": 0.2413, + "step": 23075 + }, + { + "epoch": 1.08, + "learning_rate": 1.654084303920493e-05, + "loss": 0.3585, + "step": 23080 + }, + { + "epoch": 1.08, + "learning_rate": 1.6540059254150144e-05, + "loss": 0.2754, + "step": 23085 + }, + { + "epoch": 1.08, + "learning_rate": 1.6539275469095358e-05, + "loss": 0.0734, + "step": 23090 + }, + { + "epoch": 1.08, + "learning_rate": 1.653849168404057e-05, + "loss": 0.1821, + "step": 23095 + }, + { + "epoch": 1.08, + "learning_rate": 1.6537707898985783e-05, + "loss": 0.136, + "step": 23100 + }, + { + "epoch": 1.08, + "learning_rate": 1.6536924113930996e-05, + "loss": 0.1626, + "step": 23105 + }, + { + "epoch": 1.08, + "learning_rate": 1.653614032887621e-05, + "loss": 0.1497, + "step": 23110 + }, + { + "epoch": 1.08, + "learning_rate": 1.6535356543821424e-05, + "loss": 0.1505, + "step": 23115 + }, + { + "epoch": 1.08, + "learning_rate": 1.6534572758766638e-05, + "loss": 0.2225, + "step": 23120 + }, + { + "epoch": 1.08, + "learning_rate": 1.6533788973711852e-05, + "loss": 0.2445, + "step": 23125 + }, + { + "epoch": 1.08, + "learning_rate": 1.6533005188657063e-05, + "loss": 0.4143, + "step": 23130 + }, + { + "epoch": 1.08, + "learning_rate": 1.6532221403602277e-05, + "loss": 0.322, + "step": 23135 + }, + { + "epoch": 1.08, + "learning_rate": 1.653143761854749e-05, + "loss": 0.0707, + "step": 23140 + }, + { + "epoch": 1.08, + "learning_rate": 1.6530653833492704e-05, + "loss": 0.0468, + "step": 23145 + }, + { + "epoch": 1.08, + "learning_rate": 1.6529870048437918e-05, + "loss": 0.1199, + "step": 23150 + }, + { + "epoch": 1.08, + "learning_rate": 1.6529086263383132e-05, + "loss": 0.1203, + "step": 23155 + }, + { + "epoch": 1.08, + "learning_rate": 1.6528302478328343e-05, + "loss": 0.1562, + "step": 23160 + }, + { + "epoch": 1.08, + "learning_rate": 1.652751869327356e-05, + "loss": 0.1764, + "step": 23165 + }, + { + "epoch": 1.08, + "learning_rate": 1.652673490821877e-05, + "loss": 0.1951, + "step": 23170 + }, + { + "epoch": 1.08, + "learning_rate": 1.6525951123163984e-05, + "loss": 0.3135, + "step": 23175 + }, + { + "epoch": 1.08, + "learning_rate": 1.6525167338109198e-05, + "loss": 0.2562, + "step": 23180 + }, + { + "epoch": 1.08, + "learning_rate": 1.6524383553054412e-05, + "loss": 0.2707, + "step": 23185 + }, + { + "epoch": 1.08, + "learning_rate": 1.6523599767999626e-05, + "loss": 0.0834, + "step": 23190 + }, + { + "epoch": 1.08, + "learning_rate": 1.6522815982944837e-05, + "loss": 0.1467, + "step": 23195 + }, + { + "epoch": 1.08, + "learning_rate": 1.652203219789005e-05, + "loss": 0.093, + "step": 23200 + }, + { + "epoch": 1.08, + "learning_rate": 1.6521248412835264e-05, + "loss": 0.0902, + "step": 23205 + }, + { + "epoch": 1.08, + "learning_rate": 1.652046462778048e-05, + "loss": 0.2016, + "step": 23210 + }, + { + "epoch": 1.08, + "learning_rate": 1.6519680842725692e-05, + "loss": 0.2145, + "step": 23215 + }, + { + "epoch": 1.08, + "learning_rate": 1.6518897057670906e-05, + "loss": 0.2803, + "step": 23220 + }, + { + "epoch": 1.08, + "learning_rate": 1.651811327261612e-05, + "loss": 0.2825, + "step": 23225 + }, + { + "epoch": 1.08, + "learning_rate": 1.6517329487561334e-05, + "loss": 0.3829, + "step": 23230 + }, + { + "epoch": 1.08, + "learning_rate": 1.6516545702506544e-05, + "loss": 0.2447, + "step": 23235 + }, + { + "epoch": 1.08, + "learning_rate": 1.6515761917451762e-05, + "loss": 0.0754, + "step": 23240 + }, + { + "epoch": 1.08, + "learning_rate": 1.6514978132396972e-05, + "loss": 0.0598, + "step": 23245 + }, + { + "epoch": 1.08, + "learning_rate": 1.6514194347342186e-05, + "loss": 0.1389, + "step": 23250 + }, + { + "epoch": 1.09, + "learning_rate": 1.65134105622874e-05, + "loss": 0.156, + "step": 23255 + }, + { + "epoch": 1.09, + "learning_rate": 1.651262677723261e-05, + "loss": 0.1372, + "step": 23260 + }, + { + "epoch": 1.09, + "learning_rate": 1.6511842992177828e-05, + "loss": 0.2297, + "step": 23265 + }, + { + "epoch": 1.09, + "learning_rate": 1.651105920712304e-05, + "loss": 0.2115, + "step": 23270 + }, + { + "epoch": 1.09, + "learning_rate": 1.6510275422068252e-05, + "loss": 0.2469, + "step": 23275 + }, + { + "epoch": 1.09, + "learning_rate": 1.6509491637013466e-05, + "loss": 0.2814, + "step": 23280 + }, + { + "epoch": 1.09, + "learning_rate": 1.650870785195868e-05, + "loss": 0.3825, + "step": 23285 + }, + { + "epoch": 1.09, + "learning_rate": 1.6507924066903894e-05, + "loss": 0.0548, + "step": 23290 + }, + { + "epoch": 1.09, + "learning_rate": 1.6507140281849108e-05, + "loss": 0.0556, + "step": 23295 + }, + { + "epoch": 1.09, + "learning_rate": 1.650635649679432e-05, + "loss": 0.0974, + "step": 23300 + }, + { + "epoch": 1.09, + "learning_rate": 1.6505572711739536e-05, + "loss": 0.1694, + "step": 23305 + }, + { + "epoch": 1.09, + "learning_rate": 1.6504788926684746e-05, + "loss": 0.2936, + "step": 23310 + }, + { + "epoch": 1.09, + "learning_rate": 1.650400514162996e-05, + "loss": 0.1766, + "step": 23315 + }, + { + "epoch": 1.09, + "learning_rate": 1.6503221356575174e-05, + "loss": 0.1843, + "step": 23320 + }, + { + "epoch": 1.09, + "learning_rate": 1.6502437571520388e-05, + "loss": 0.3095, + "step": 23325 + }, + { + "epoch": 1.09, + "learning_rate": 1.6501653786465602e-05, + "loss": 0.3444, + "step": 23330 + }, + { + "epoch": 1.09, + "learning_rate": 1.6500870001410812e-05, + "loss": 0.2731, + "step": 23335 + }, + { + "epoch": 1.09, + "learning_rate": 1.650008621635603e-05, + "loss": 0.0252, + "step": 23340 + }, + { + "epoch": 1.09, + "learning_rate": 1.649930243130124e-05, + "loss": 0.1032, + "step": 23345 + }, + { + "epoch": 1.09, + "learning_rate": 1.6498518646246454e-05, + "loss": 0.1265, + "step": 23350 + }, + { + "epoch": 1.09, + "learning_rate": 1.6497734861191668e-05, + "loss": 0.1087, + "step": 23355 + }, + { + "epoch": 1.09, + "learning_rate": 1.6496951076136882e-05, + "loss": 0.1427, + "step": 23360 + }, + { + "epoch": 1.09, + "learning_rate": 1.6496167291082096e-05, + "loss": 0.2091, + "step": 23365 + }, + { + "epoch": 1.09, + "learning_rate": 1.649538350602731e-05, + "loss": 0.7426, + "step": 23370 + }, + { + "epoch": 1.09, + "learning_rate": 1.649459972097252e-05, + "loss": 0.2259, + "step": 23375 + }, + { + "epoch": 1.09, + "learning_rate": 1.6493815935917738e-05, + "loss": 0.4487, + "step": 23380 + }, + { + "epoch": 1.09, + "learning_rate": 1.6493032150862948e-05, + "loss": 0.2265, + "step": 23385 + }, + { + "epoch": 1.09, + "learning_rate": 1.6492248365808162e-05, + "loss": 0.0922, + "step": 23390 + }, + { + "epoch": 1.09, + "learning_rate": 1.6491464580753376e-05, + "loss": 0.1548, + "step": 23395 + }, + { + "epoch": 1.09, + "learning_rate": 1.649068079569859e-05, + "loss": 0.1354, + "step": 23400 + }, + { + "epoch": 1.09, + "learning_rate": 1.6489897010643804e-05, + "loss": 0.1246, + "step": 23405 + }, + { + "epoch": 1.09, + "learning_rate": 1.6489113225589014e-05, + "loss": 0.1558, + "step": 23410 + }, + { + "epoch": 1.09, + "learning_rate": 1.6488329440534228e-05, + "loss": 0.1481, + "step": 23415 + }, + { + "epoch": 1.09, + "learning_rate": 1.6487545655479442e-05, + "loss": 0.2439, + "step": 23420 + }, + { + "epoch": 1.09, + "learning_rate": 1.6486761870424656e-05, + "loss": 0.3442, + "step": 23425 + }, + { + "epoch": 1.09, + "learning_rate": 1.648597808536987e-05, + "loss": 0.3016, + "step": 23430 + }, + { + "epoch": 1.09, + "learning_rate": 1.6485194300315084e-05, + "loss": 0.2837, + "step": 23435 + }, + { + "epoch": 1.09, + "learning_rate": 1.6484410515260298e-05, + "loss": 0.0948, + "step": 23440 + }, + { + "epoch": 1.09, + "learning_rate": 1.648362673020551e-05, + "loss": 0.0681, + "step": 23445 + }, + { + "epoch": 1.09, + "learning_rate": 1.6482842945150722e-05, + "loss": 0.107, + "step": 23450 + }, + { + "epoch": 1.09, + "learning_rate": 1.6482059160095936e-05, + "loss": 0.1297, + "step": 23455 + }, + { + "epoch": 1.09, + "learning_rate": 1.648127537504115e-05, + "loss": 0.1361, + "step": 23460 + }, + { + "epoch": 1.09, + "learning_rate": 1.6480491589986364e-05, + "loss": 0.142, + "step": 23465 + }, + { + "epoch": 1.1, + "learning_rate": 1.6479707804931578e-05, + "loss": 0.2334, + "step": 23470 + }, + { + "epoch": 1.1, + "learning_rate": 1.6478924019876788e-05, + "loss": 0.277, + "step": 23475 + }, + { + "epoch": 1.1, + "learning_rate": 1.6478140234822006e-05, + "loss": 0.341, + "step": 23480 + }, + { + "epoch": 1.1, + "learning_rate": 1.6477356449767216e-05, + "loss": 0.3929, + "step": 23485 + }, + { + "epoch": 1.1, + "learning_rate": 1.647657266471243e-05, + "loss": 0.1686, + "step": 23490 + }, + { + "epoch": 1.1, + "learning_rate": 1.6475788879657644e-05, + "loss": 0.0867, + "step": 23495 + }, + { + "epoch": 1.1, + "learning_rate": 1.6475005094602858e-05, + "loss": 0.0441, + "step": 23500 + }, + { + "epoch": 1.1, + "learning_rate": 1.647422130954807e-05, + "loss": 0.1218, + "step": 23505 + }, + { + "epoch": 1.1, + "learning_rate": 1.6473437524493286e-05, + "loss": 0.1216, + "step": 23510 + }, + { + "epoch": 1.1, + "learning_rate": 1.6472653739438496e-05, + "loss": 0.2273, + "step": 23515 + }, + { + "epoch": 1.1, + "learning_rate": 1.647186995438371e-05, + "loss": 0.3123, + "step": 23520 + }, + { + "epoch": 1.1, + "learning_rate": 1.6471086169328924e-05, + "loss": 0.2573, + "step": 23525 + }, + { + "epoch": 1.1, + "learning_rate": 1.6470302384274138e-05, + "loss": 0.4518, + "step": 23530 + }, + { + "epoch": 1.1, + "learning_rate": 1.6469518599219352e-05, + "loss": 0.2675, + "step": 23535 + }, + { + "epoch": 1.1, + "learning_rate": 1.6468734814164566e-05, + "loss": 0.0457, + "step": 23540 + }, + { + "epoch": 1.1, + "learning_rate": 1.646795102910978e-05, + "loss": 0.0892, + "step": 23545 + }, + { + "epoch": 1.1, + "learning_rate": 1.646716724405499e-05, + "loss": 0.07, + "step": 23550 + }, + { + "epoch": 1.1, + "learning_rate": 1.6466383459000207e-05, + "loss": 0.1443, + "step": 23555 + }, + { + "epoch": 1.1, + "learning_rate": 1.6465599673945418e-05, + "loss": 0.1848, + "step": 23560 + }, + { + "epoch": 1.1, + "learning_rate": 1.6464815888890632e-05, + "loss": 0.1461, + "step": 23565 + }, + { + "epoch": 1.1, + "learning_rate": 1.6464032103835846e-05, + "loss": 0.1837, + "step": 23570 + }, + { + "epoch": 1.1, + "learning_rate": 1.646324831878106e-05, + "loss": 0.178, + "step": 23575 + }, + { + "epoch": 1.1, + "learning_rate": 1.6462464533726273e-05, + "loss": 0.5349, + "step": 23580 + }, + { + "epoch": 1.1, + "learning_rate": 1.6461680748671484e-05, + "loss": 0.3265, + "step": 23585 + }, + { + "epoch": 1.1, + "learning_rate": 1.6460896963616698e-05, + "loss": 0.0742, + "step": 23590 + }, + { + "epoch": 1.1, + "learning_rate": 1.6460113178561912e-05, + "loss": 0.0978, + "step": 23595 + }, + { + "epoch": 1.1, + "learning_rate": 1.6459329393507126e-05, + "loss": 0.1308, + "step": 23600 + }, + { + "epoch": 1.1, + "learning_rate": 1.645854560845234e-05, + "loss": 0.1915, + "step": 23605 + }, + { + "epoch": 1.1, + "learning_rate": 1.6457761823397554e-05, + "loss": 0.094, + "step": 23610 + }, + { + "epoch": 1.1, + "learning_rate": 1.6456978038342764e-05, + "loss": 0.1869, + "step": 23615 + }, + { + "epoch": 1.1, + "learning_rate": 1.645619425328798e-05, + "loss": 0.1502, + "step": 23620 + }, + { + "epoch": 1.1, + "learning_rate": 1.6455410468233192e-05, + "loss": 0.2174, + "step": 23625 + }, + { + "epoch": 1.1, + "learning_rate": 1.6454626683178406e-05, + "loss": 0.4411, + "step": 23630 + }, + { + "epoch": 1.1, + "learning_rate": 1.645384289812362e-05, + "loss": 0.2565, + "step": 23635 + }, + { + "epoch": 1.1, + "learning_rate": 1.6453059113068834e-05, + "loss": 0.0362, + "step": 23640 + }, + { + "epoch": 1.1, + "learning_rate": 1.6452275328014047e-05, + "loss": 0.0526, + "step": 23645 + }, + { + "epoch": 1.1, + "learning_rate": 1.6451491542959258e-05, + "loss": 0.1324, + "step": 23650 + }, + { + "epoch": 1.1, + "learning_rate": 1.6450707757904475e-05, + "loss": 0.0992, + "step": 23655 + }, + { + "epoch": 1.1, + "learning_rate": 1.6449923972849686e-05, + "loss": 0.1667, + "step": 23660 + }, + { + "epoch": 1.1, + "learning_rate": 1.64491401877949e-05, + "loss": 0.2788, + "step": 23665 + }, + { + "epoch": 1.1, + "learning_rate": 1.6448356402740114e-05, + "loss": 0.2025, + "step": 23670 + }, + { + "epoch": 1.1, + "learning_rate": 1.6447572617685328e-05, + "loss": 0.2577, + "step": 23675 + }, + { + "epoch": 1.1, + "learning_rate": 1.644678883263054e-05, + "loss": 0.2843, + "step": 23680 + }, + { + "epoch": 1.11, + "learning_rate": 1.6446005047575755e-05, + "loss": 0.2967, + "step": 23685 + }, + { + "epoch": 1.11, + "learning_rate": 1.6445221262520966e-05, + "loss": 0.0835, + "step": 23690 + }, + { + "epoch": 1.11, + "learning_rate": 1.6444437477466183e-05, + "loss": 0.0873, + "step": 23695 + }, + { + "epoch": 1.11, + "learning_rate": 1.6443653692411394e-05, + "loss": 0.1089, + "step": 23700 + }, + { + "epoch": 1.11, + "learning_rate": 1.6442869907356608e-05, + "loss": 0.1707, + "step": 23705 + }, + { + "epoch": 1.11, + "learning_rate": 1.644208612230182e-05, + "loss": 0.191, + "step": 23710 + }, + { + "epoch": 1.11, + "learning_rate": 1.6441302337247035e-05, + "loss": 0.124, + "step": 23715 + }, + { + "epoch": 1.11, + "learning_rate": 1.644051855219225e-05, + "loss": 0.2111, + "step": 23720 + }, + { + "epoch": 1.11, + "learning_rate": 1.643973476713746e-05, + "loss": 0.1918, + "step": 23725 + }, + { + "epoch": 1.11, + "learning_rate": 1.6438950982082674e-05, + "loss": 0.4829, + "step": 23730 + }, + { + "epoch": 1.11, + "learning_rate": 1.6438167197027888e-05, + "loss": 0.3964, + "step": 23735 + }, + { + "epoch": 1.11, + "learning_rate": 1.64373834119731e-05, + "loss": 0.0372, + "step": 23740 + }, + { + "epoch": 1.11, + "learning_rate": 1.6436599626918315e-05, + "loss": 0.0559, + "step": 23745 + }, + { + "epoch": 1.11, + "learning_rate": 1.643581584186353e-05, + "loss": 0.1288, + "step": 23750 + }, + { + "epoch": 1.11, + "learning_rate": 1.6435032056808743e-05, + "loss": 0.1367, + "step": 23755 + }, + { + "epoch": 1.11, + "learning_rate": 1.6434248271753957e-05, + "loss": 0.0864, + "step": 23760 + }, + { + "epoch": 1.11, + "learning_rate": 1.6433464486699168e-05, + "loss": 0.1882, + "step": 23765 + }, + { + "epoch": 1.11, + "learning_rate": 1.6432680701644385e-05, + "loss": 0.2035, + "step": 23770 + }, + { + "epoch": 1.11, + "learning_rate": 1.6431896916589595e-05, + "loss": 0.3376, + "step": 23775 + }, + { + "epoch": 1.11, + "learning_rate": 1.643111313153481e-05, + "loss": 0.5252, + "step": 23780 + }, + { + "epoch": 1.11, + "learning_rate": 1.6430329346480023e-05, + "loss": 0.2832, + "step": 23785 + }, + { + "epoch": 1.11, + "learning_rate": 1.6429545561425234e-05, + "loss": 0.0732, + "step": 23790 + }, + { + "epoch": 1.11, + "learning_rate": 1.642876177637045e-05, + "loss": 0.0599, + "step": 23795 + }, + { + "epoch": 1.11, + "learning_rate": 1.642797799131566e-05, + "loss": 0.1289, + "step": 23800 + }, + { + "epoch": 1.11, + "learning_rate": 1.6427194206260876e-05, + "loss": 0.1547, + "step": 23805 + }, + { + "epoch": 1.11, + "learning_rate": 1.642641042120609e-05, + "loss": 0.0893, + "step": 23810 + }, + { + "epoch": 1.11, + "learning_rate": 1.6425626636151303e-05, + "loss": 0.1896, + "step": 23815 + }, + { + "epoch": 1.11, + "learning_rate": 1.6424842851096517e-05, + "loss": 0.2018, + "step": 23820 + }, + { + "epoch": 1.11, + "learning_rate": 1.642405906604173e-05, + "loss": 0.2314, + "step": 23825 + }, + { + "epoch": 1.11, + "learning_rate": 1.642327528098694e-05, + "loss": 0.6314, + "step": 23830 + }, + { + "epoch": 1.11, + "learning_rate": 1.642249149593216e-05, + "loss": 0.2099, + "step": 23835 + }, + { + "epoch": 1.11, + "learning_rate": 1.642170771087737e-05, + "loss": 0.0935, + "step": 23840 + }, + { + "epoch": 1.11, + "learning_rate": 1.6420923925822583e-05, + "loss": 0.0824, + "step": 23845 + }, + { + "epoch": 1.11, + "learning_rate": 1.6420140140767797e-05, + "loss": 0.1094, + "step": 23850 + }, + { + "epoch": 1.11, + "learning_rate": 1.641935635571301e-05, + "loss": 0.1203, + "step": 23855 + }, + { + "epoch": 1.11, + "learning_rate": 1.6418572570658225e-05, + "loss": 0.0716, + "step": 23860 + }, + { + "epoch": 1.11, + "learning_rate": 1.6417788785603436e-05, + "loss": 0.0886, + "step": 23865 + }, + { + "epoch": 1.11, + "learning_rate": 1.6417005000548653e-05, + "loss": 0.2079, + "step": 23870 + }, + { + "epoch": 1.11, + "learning_rate": 1.6416221215493863e-05, + "loss": 0.2192, + "step": 23875 + }, + { + "epoch": 1.11, + "learning_rate": 1.6415437430439077e-05, + "loss": 0.1609, + "step": 23880 + }, + { + "epoch": 1.11, + "learning_rate": 1.641465364538429e-05, + "loss": 0.427, + "step": 23885 + }, + { + "epoch": 1.11, + "learning_rate": 1.6413869860329505e-05, + "loss": 0.0594, + "step": 23890 + }, + { + "epoch": 1.11, + "learning_rate": 1.641308607527472e-05, + "loss": 0.0976, + "step": 23895 + }, + { + "epoch": 1.12, + "learning_rate": 1.6412302290219933e-05, + "loss": 0.1183, + "step": 23900 + }, + { + "epoch": 1.12, + "learning_rate": 1.6411518505165143e-05, + "loss": 0.1117, + "step": 23905 + }, + { + "epoch": 1.12, + "learning_rate": 1.6410734720110357e-05, + "loss": 0.1788, + "step": 23910 + }, + { + "epoch": 1.12, + "learning_rate": 1.640995093505557e-05, + "loss": 0.1834, + "step": 23915 + }, + { + "epoch": 1.12, + "learning_rate": 1.6409167150000785e-05, + "loss": 0.2126, + "step": 23920 + }, + { + "epoch": 1.12, + "learning_rate": 1.6408383364946e-05, + "loss": 0.4336, + "step": 23925 + }, + { + "epoch": 1.12, + "learning_rate": 1.6407599579891213e-05, + "loss": 0.3505, + "step": 23930 + }, + { + "epoch": 1.12, + "learning_rate": 1.6406815794836427e-05, + "loss": 0.3065, + "step": 23935 + }, + { + "epoch": 1.12, + "learning_rate": 1.6406032009781637e-05, + "loss": 0.0399, + "step": 23940 + }, + { + "epoch": 1.12, + "learning_rate": 1.640524822472685e-05, + "loss": 0.0772, + "step": 23945 + }, + { + "epoch": 1.12, + "learning_rate": 1.6404464439672065e-05, + "loss": 0.0448, + "step": 23950 + }, + { + "epoch": 1.12, + "learning_rate": 1.640368065461728e-05, + "loss": 0.0798, + "step": 23955 + }, + { + "epoch": 1.12, + "learning_rate": 1.6402896869562493e-05, + "loss": 0.0925, + "step": 23960 + }, + { + "epoch": 1.12, + "learning_rate": 1.6402113084507707e-05, + "loss": 0.1361, + "step": 23965 + }, + { + "epoch": 1.12, + "learning_rate": 1.640132929945292e-05, + "loss": 0.1452, + "step": 23970 + }, + { + "epoch": 1.12, + "learning_rate": 1.640054551439813e-05, + "loss": 0.2325, + "step": 23975 + }, + { + "epoch": 1.12, + "learning_rate": 1.6399761729343345e-05, + "loss": 0.3527, + "step": 23980 + }, + { + "epoch": 1.12, + "learning_rate": 1.639897794428856e-05, + "loss": 0.2864, + "step": 23985 + }, + { + "epoch": 1.12, + "learning_rate": 1.6398194159233773e-05, + "loss": 0.0622, + "step": 23990 + }, + { + "epoch": 1.12, + "learning_rate": 1.6397410374178987e-05, + "loss": 0.0723, + "step": 23995 + }, + { + "epoch": 1.12, + "learning_rate": 1.63966265891242e-05, + "loss": 0.1408, + "step": 24000 + }, + { + "epoch": 1.12, + "learning_rate": 1.639584280406941e-05, + "loss": 0.1051, + "step": 24005 + }, + { + "epoch": 1.12, + "learning_rate": 1.639505901901463e-05, + "loss": 0.1084, + "step": 24010 + }, + { + "epoch": 1.12, + "learning_rate": 1.639427523395984e-05, + "loss": 0.2103, + "step": 24015 + }, + { + "epoch": 1.12, + "learning_rate": 1.6393491448905053e-05, + "loss": 0.2023, + "step": 24020 + }, + { + "epoch": 1.12, + "learning_rate": 1.6392707663850267e-05, + "loss": 0.2348, + "step": 24025 + }, + { + "epoch": 1.12, + "learning_rate": 1.639192387879548e-05, + "loss": 0.3672, + "step": 24030 + }, + { + "epoch": 1.12, + "learning_rate": 1.6391140093740695e-05, + "loss": 0.3526, + "step": 24035 + }, + { + "epoch": 1.12, + "learning_rate": 1.6390356308685905e-05, + "loss": 0.0778, + "step": 24040 + }, + { + "epoch": 1.12, + "learning_rate": 1.638957252363112e-05, + "loss": 0.1023, + "step": 24045 + }, + { + "epoch": 1.12, + "learning_rate": 1.6388788738576333e-05, + "loss": 0.0873, + "step": 24050 + }, + { + "epoch": 1.12, + "learning_rate": 1.6388004953521547e-05, + "loss": 0.1086, + "step": 24055 + }, + { + "epoch": 1.12, + "learning_rate": 1.638722116846676e-05, + "loss": 0.1551, + "step": 24060 + }, + { + "epoch": 1.12, + "learning_rate": 1.6386437383411975e-05, + "loss": 0.1625, + "step": 24065 + }, + { + "epoch": 1.12, + "learning_rate": 1.638565359835719e-05, + "loss": 0.2734, + "step": 24070 + }, + { + "epoch": 1.12, + "learning_rate": 1.6384869813302403e-05, + "loss": 0.2265, + "step": 24075 + }, + { + "epoch": 1.12, + "learning_rate": 1.6384086028247613e-05, + "loss": 0.2489, + "step": 24080 + }, + { + "epoch": 1.12, + "learning_rate": 1.638330224319283e-05, + "loss": 0.3076, + "step": 24085 + }, + { + "epoch": 1.12, + "learning_rate": 1.638251845813804e-05, + "loss": 0.1064, + "step": 24090 + }, + { + "epoch": 1.12, + "learning_rate": 1.6381734673083255e-05, + "loss": 0.0911, + "step": 24095 + }, + { + "epoch": 1.12, + "learning_rate": 1.638095088802847e-05, + "loss": 0.0852, + "step": 24100 + }, + { + "epoch": 1.12, + "learning_rate": 1.638016710297368e-05, + "loss": 0.1027, + "step": 24105 + }, + { + "epoch": 1.13, + "learning_rate": 1.6379383317918897e-05, + "loss": 0.1949, + "step": 24110 + }, + { + "epoch": 1.13, + "learning_rate": 1.6378599532864107e-05, + "loss": 0.1723, + "step": 24115 + }, + { + "epoch": 1.13, + "learning_rate": 1.637781574780932e-05, + "loss": 0.207, + "step": 24120 + }, + { + "epoch": 1.13, + "learning_rate": 1.6377031962754535e-05, + "loss": 0.2483, + "step": 24125 + }, + { + "epoch": 1.13, + "learning_rate": 1.637624817769975e-05, + "loss": 0.4968, + "step": 24130 + }, + { + "epoch": 1.13, + "learning_rate": 1.6375464392644963e-05, + "loss": 0.3258, + "step": 24135 + }, + { + "epoch": 1.13, + "learning_rate": 1.6374680607590177e-05, + "loss": 0.0432, + "step": 24140 + }, + { + "epoch": 1.13, + "learning_rate": 1.6373896822535387e-05, + "loss": 0.0878, + "step": 24145 + }, + { + "epoch": 1.13, + "learning_rate": 1.6373113037480605e-05, + "loss": 0.163, + "step": 24150 + }, + { + "epoch": 1.13, + "learning_rate": 1.6372329252425815e-05, + "loss": 0.2356, + "step": 24155 + }, + { + "epoch": 1.13, + "learning_rate": 1.637154546737103e-05, + "loss": 0.1573, + "step": 24160 + }, + { + "epoch": 1.13, + "learning_rate": 1.6370761682316243e-05, + "loss": 0.1774, + "step": 24165 + }, + { + "epoch": 1.13, + "learning_rate": 1.6369977897261457e-05, + "loss": 0.1667, + "step": 24170 + }, + { + "epoch": 1.13, + "learning_rate": 1.636919411220667e-05, + "loss": 0.1914, + "step": 24175 + }, + { + "epoch": 1.13, + "learning_rate": 1.636841032715188e-05, + "loss": 0.3366, + "step": 24180 + }, + { + "epoch": 1.13, + "learning_rate": 1.63676265420971e-05, + "loss": 0.389, + "step": 24185 + }, + { + "epoch": 1.13, + "learning_rate": 1.636684275704231e-05, + "loss": 0.0601, + "step": 24190 + }, + { + "epoch": 1.13, + "learning_rate": 1.6366058971987523e-05, + "loss": 0.0784, + "step": 24195 + }, + { + "epoch": 1.13, + "learning_rate": 1.6365275186932737e-05, + "loss": 0.1066, + "step": 24200 + }, + { + "epoch": 1.13, + "learning_rate": 1.636449140187795e-05, + "loss": 0.1525, + "step": 24205 + }, + { + "epoch": 1.13, + "learning_rate": 1.6363707616823165e-05, + "loss": 0.1988, + "step": 24210 + }, + { + "epoch": 1.13, + "learning_rate": 1.636292383176838e-05, + "loss": 0.252, + "step": 24215 + }, + { + "epoch": 1.13, + "learning_rate": 1.636214004671359e-05, + "loss": 0.3322, + "step": 24220 + }, + { + "epoch": 1.13, + "learning_rate": 1.6361356261658806e-05, + "loss": 0.2519, + "step": 24225 + }, + { + "epoch": 1.13, + "learning_rate": 1.6360572476604017e-05, + "loss": 0.2909, + "step": 24230 + }, + { + "epoch": 1.13, + "learning_rate": 1.635978869154923e-05, + "loss": 0.3463, + "step": 24235 + }, + { + "epoch": 1.13, + "learning_rate": 1.6359004906494445e-05, + "loss": 0.0224, + "step": 24240 + }, + { + "epoch": 1.13, + "learning_rate": 1.635822112143966e-05, + "loss": 0.0675, + "step": 24245 + }, + { + "epoch": 1.13, + "learning_rate": 1.6357437336384872e-05, + "loss": 0.1372, + "step": 24250 + }, + { + "epoch": 1.13, + "learning_rate": 1.6356653551330083e-05, + "loss": 0.1832, + "step": 24255 + }, + { + "epoch": 1.13, + "learning_rate": 1.6355869766275297e-05, + "loss": 0.1108, + "step": 24260 + }, + { + "epoch": 1.13, + "learning_rate": 1.635508598122051e-05, + "loss": 0.1944, + "step": 24265 + }, + { + "epoch": 1.13, + "learning_rate": 1.6354302196165725e-05, + "loss": 0.1828, + "step": 24270 + }, + { + "epoch": 1.13, + "learning_rate": 1.635351841111094e-05, + "loss": 0.2393, + "step": 24275 + }, + { + "epoch": 1.13, + "learning_rate": 1.6352734626056153e-05, + "loss": 0.4225, + "step": 24280 + }, + { + "epoch": 1.13, + "learning_rate": 1.6351950841001366e-05, + "loss": 0.3735, + "step": 24285 + }, + { + "epoch": 1.13, + "learning_rate": 1.635116705594658e-05, + "loss": 0.0227, + "step": 24290 + }, + { + "epoch": 1.13, + "learning_rate": 1.635038327089179e-05, + "loss": 0.0606, + "step": 24295 + }, + { + "epoch": 1.13, + "learning_rate": 1.6349599485837005e-05, + "loss": 0.0897, + "step": 24300 + }, + { + "epoch": 1.13, + "learning_rate": 1.634881570078222e-05, + "loss": 0.1893, + "step": 24305 + }, + { + "epoch": 1.13, + "learning_rate": 1.6348031915727433e-05, + "loss": 0.1348, + "step": 24310 + }, + { + "epoch": 1.13, + "learning_rate": 1.6347248130672646e-05, + "loss": 0.1975, + "step": 24315 + }, + { + "epoch": 1.13, + "learning_rate": 1.6346464345617857e-05, + "loss": 0.2427, + "step": 24320 + }, + { + "epoch": 1.14, + "learning_rate": 1.6345680560563074e-05, + "loss": 0.2059, + "step": 24325 + }, + { + "epoch": 1.14, + "learning_rate": 1.6344896775508285e-05, + "loss": 0.4388, + "step": 24330 + }, + { + "epoch": 1.14, + "learning_rate": 1.63441129904535e-05, + "loss": 0.2305, + "step": 24335 + }, + { + "epoch": 1.14, + "learning_rate": 1.6343329205398713e-05, + "loss": 0.0722, + "step": 24340 + }, + { + "epoch": 1.14, + "learning_rate": 1.6342545420343927e-05, + "loss": 0.1261, + "step": 24345 + }, + { + "epoch": 1.14, + "learning_rate": 1.634176163528914e-05, + "loss": 0.0991, + "step": 24350 + }, + { + "epoch": 1.14, + "learning_rate": 1.6340977850234354e-05, + "loss": 0.1313, + "step": 24355 + }, + { + "epoch": 1.14, + "learning_rate": 1.6340194065179565e-05, + "loss": 0.1284, + "step": 24360 + }, + { + "epoch": 1.14, + "learning_rate": 1.633941028012478e-05, + "loss": 0.2371, + "step": 24365 + }, + { + "epoch": 1.14, + "learning_rate": 1.6338626495069993e-05, + "loss": 0.2578, + "step": 24370 + }, + { + "epoch": 1.14, + "learning_rate": 1.6337842710015207e-05, + "loss": 0.2852, + "step": 24375 + }, + { + "epoch": 1.14, + "learning_rate": 1.633705892496042e-05, + "loss": 0.3184, + "step": 24380 + }, + { + "epoch": 1.14, + "learning_rate": 1.6336275139905634e-05, + "loss": 0.2727, + "step": 24385 + }, + { + "epoch": 1.14, + "learning_rate": 1.6335491354850848e-05, + "loss": 0.0995, + "step": 24390 + }, + { + "epoch": 1.14, + "learning_rate": 1.633470756979606e-05, + "loss": 0.075, + "step": 24395 + }, + { + "epoch": 1.14, + "learning_rate": 1.6333923784741276e-05, + "loss": 0.1169, + "step": 24400 + }, + { + "epoch": 1.14, + "learning_rate": 1.6333139999686487e-05, + "loss": 0.1186, + "step": 24405 + }, + { + "epoch": 1.14, + "learning_rate": 1.63323562146317e-05, + "loss": 0.1519, + "step": 24410 + }, + { + "epoch": 1.14, + "learning_rate": 1.6331572429576914e-05, + "loss": 0.1897, + "step": 24415 + }, + { + "epoch": 1.14, + "learning_rate": 1.633078864452213e-05, + "loss": 0.2469, + "step": 24420 + }, + { + "epoch": 1.14, + "learning_rate": 1.6330004859467342e-05, + "loss": 0.2405, + "step": 24425 + }, + { + "epoch": 1.14, + "learning_rate": 1.6329221074412553e-05, + "loss": 0.5484, + "step": 24430 + }, + { + "epoch": 1.14, + "learning_rate": 1.6328437289357767e-05, + "loss": 0.3294, + "step": 24435 + }, + { + "epoch": 1.14, + "learning_rate": 1.632765350430298e-05, + "loss": 0.0427, + "step": 24440 + }, + { + "epoch": 1.14, + "learning_rate": 1.6326869719248194e-05, + "loss": 0.1192, + "step": 24445 + }, + { + "epoch": 1.14, + "learning_rate": 1.632608593419341e-05, + "loss": 0.0817, + "step": 24450 + }, + { + "epoch": 1.14, + "learning_rate": 1.6325302149138622e-05, + "loss": 0.0814, + "step": 24455 + }, + { + "epoch": 1.14, + "learning_rate": 1.6324518364083833e-05, + "loss": 0.1558, + "step": 24460 + }, + { + "epoch": 1.14, + "learning_rate": 1.632373457902905e-05, + "loss": 0.149, + "step": 24465 + }, + { + "epoch": 1.14, + "learning_rate": 1.632295079397426e-05, + "loss": 0.1717, + "step": 24470 + }, + { + "epoch": 1.14, + "learning_rate": 1.6322167008919475e-05, + "loss": 0.2424, + "step": 24475 + }, + { + "epoch": 1.14, + "learning_rate": 1.632138322386469e-05, + "loss": 0.2916, + "step": 24480 + }, + { + "epoch": 1.14, + "learning_rate": 1.6320599438809902e-05, + "loss": 0.3675, + "step": 24485 + }, + { + "epoch": 1.14, + "learning_rate": 1.6319815653755116e-05, + "loss": 0.1092, + "step": 24490 + }, + { + "epoch": 1.14, + "learning_rate": 1.6319031868700327e-05, + "loss": 0.086, + "step": 24495 + }, + { + "epoch": 1.14, + "learning_rate": 1.6318248083645544e-05, + "loss": 0.1483, + "step": 24500 + }, + { + "epoch": 1.14, + "learning_rate": 1.6317464298590755e-05, + "loss": 0.1295, + "step": 24505 + }, + { + "epoch": 1.14, + "learning_rate": 1.631668051353597e-05, + "loss": 0.1388, + "step": 24510 + }, + { + "epoch": 1.14, + "learning_rate": 1.6315896728481182e-05, + "loss": 0.2248, + "step": 24515 + }, + { + "epoch": 1.14, + "learning_rate": 1.6315112943426396e-05, + "loss": 0.229, + "step": 24520 + }, + { + "epoch": 1.14, + "learning_rate": 1.631432915837161e-05, + "loss": 0.2604, + "step": 24525 + }, + { + "epoch": 1.14, + "learning_rate": 1.6313545373316824e-05, + "loss": 0.3488, + "step": 24530 + }, + { + "epoch": 1.14, + "learning_rate": 1.6312761588262035e-05, + "loss": 0.2816, + "step": 24535 + }, + { + "epoch": 1.15, + "learning_rate": 1.6311977803207252e-05, + "loss": 0.0694, + "step": 24540 + }, + { + "epoch": 1.15, + "learning_rate": 1.6311194018152462e-05, + "loss": 0.0728, + "step": 24545 + }, + { + "epoch": 1.15, + "learning_rate": 1.6310410233097676e-05, + "loss": 0.058, + "step": 24550 + }, + { + "epoch": 1.15, + "learning_rate": 1.630962644804289e-05, + "loss": 0.0766, + "step": 24555 + }, + { + "epoch": 1.15, + "learning_rate": 1.6308842662988104e-05, + "loss": 0.1177, + "step": 24560 + }, + { + "epoch": 1.15, + "learning_rate": 1.6308058877933318e-05, + "loss": 0.1314, + "step": 24565 + }, + { + "epoch": 1.15, + "learning_rate": 1.630727509287853e-05, + "loss": 0.1472, + "step": 24570 + }, + { + "epoch": 1.15, + "learning_rate": 1.6306491307823742e-05, + "loss": 0.3199, + "step": 24575 + }, + { + "epoch": 1.15, + "learning_rate": 1.6305707522768956e-05, + "loss": 0.3317, + "step": 24580 + }, + { + "epoch": 1.15, + "learning_rate": 1.630492373771417e-05, + "loss": 0.3662, + "step": 24585 + }, + { + "epoch": 1.15, + "learning_rate": 1.6304139952659384e-05, + "loss": 0.0684, + "step": 24590 + }, + { + "epoch": 1.15, + "learning_rate": 1.6303356167604598e-05, + "loss": 0.0825, + "step": 24595 + }, + { + "epoch": 1.15, + "learning_rate": 1.6302572382549812e-05, + "loss": 0.0647, + "step": 24600 + }, + { + "epoch": 1.15, + "learning_rate": 1.6301788597495026e-05, + "loss": 0.101, + "step": 24605 + }, + { + "epoch": 1.15, + "learning_rate": 1.6301004812440236e-05, + "loss": 0.1126, + "step": 24610 + }, + { + "epoch": 1.15, + "learning_rate": 1.6300221027385454e-05, + "loss": 0.1619, + "step": 24615 + }, + { + "epoch": 1.15, + "learning_rate": 1.6299437242330664e-05, + "loss": 0.248, + "step": 24620 + }, + { + "epoch": 1.15, + "learning_rate": 1.6298653457275878e-05, + "loss": 0.2743, + "step": 24625 + }, + { + "epoch": 1.15, + "learning_rate": 1.6297869672221092e-05, + "loss": 0.4154, + "step": 24630 + }, + { + "epoch": 1.15, + "learning_rate": 1.6297085887166303e-05, + "loss": 0.3563, + "step": 24635 + }, + { + "epoch": 1.15, + "learning_rate": 1.629630210211152e-05, + "loss": 0.0512, + "step": 24640 + }, + { + "epoch": 1.15, + "learning_rate": 1.629551831705673e-05, + "loss": 0.0548, + "step": 24645 + }, + { + "epoch": 1.15, + "learning_rate": 1.6294734532001944e-05, + "loss": 0.0815, + "step": 24650 + }, + { + "epoch": 1.15, + "learning_rate": 1.6293950746947158e-05, + "loss": 0.1276, + "step": 24655 + }, + { + "epoch": 1.15, + "learning_rate": 1.6293166961892372e-05, + "loss": 0.141, + "step": 24660 + }, + { + "epoch": 1.15, + "learning_rate": 1.6292383176837586e-05, + "loss": 0.2118, + "step": 24665 + }, + { + "epoch": 1.15, + "learning_rate": 1.62915993917828e-05, + "loss": 0.1749, + "step": 24670 + }, + { + "epoch": 1.15, + "learning_rate": 1.629081560672801e-05, + "loss": 0.2312, + "step": 24675 + }, + { + "epoch": 1.15, + "learning_rate": 1.6290031821673228e-05, + "loss": 0.5181, + "step": 24680 + }, + { + "epoch": 1.15, + "learning_rate": 1.6289248036618438e-05, + "loss": 0.2571, + "step": 24685 + }, + { + "epoch": 1.15, + "learning_rate": 1.6288464251563652e-05, + "loss": 0.0409, + "step": 24690 + }, + { + "epoch": 1.15, + "learning_rate": 1.6287680466508866e-05, + "loss": 0.0747, + "step": 24695 + }, + { + "epoch": 1.15, + "learning_rate": 1.628689668145408e-05, + "loss": 0.1181, + "step": 24700 + }, + { + "epoch": 1.15, + "learning_rate": 1.6286112896399294e-05, + "loss": 0.1395, + "step": 24705 + }, + { + "epoch": 1.15, + "learning_rate": 1.6285329111344504e-05, + "loss": 0.1435, + "step": 24710 + }, + { + "epoch": 1.15, + "learning_rate": 1.628454532628972e-05, + "loss": 0.174, + "step": 24715 + }, + { + "epoch": 1.15, + "learning_rate": 1.6283761541234932e-05, + "loss": 0.1197, + "step": 24720 + }, + { + "epoch": 1.15, + "learning_rate": 1.6282977756180146e-05, + "loss": 0.2038, + "step": 24725 + }, + { + "epoch": 1.15, + "learning_rate": 1.628219397112536e-05, + "loss": 0.3092, + "step": 24730 + }, + { + "epoch": 1.15, + "learning_rate": 1.6281410186070574e-05, + "loss": 0.4175, + "step": 24735 + }, + { + "epoch": 1.15, + "learning_rate": 1.6280626401015788e-05, + "loss": 0.0201, + "step": 24740 + }, + { + "epoch": 1.15, + "learning_rate": 1.6279842615961002e-05, + "loss": 0.1052, + "step": 24745 + }, + { + "epoch": 1.15, + "learning_rate": 1.6279058830906212e-05, + "loss": 0.1185, + "step": 24750 + }, + { + "epoch": 1.16, + "learning_rate": 1.6278275045851426e-05, + "loss": 0.1012, + "step": 24755 + }, + { + "epoch": 1.16, + "learning_rate": 1.627749126079664e-05, + "loss": 0.2098, + "step": 24760 + }, + { + "epoch": 1.16, + "learning_rate": 1.6276707475741854e-05, + "loss": 0.1507, + "step": 24765 + }, + { + "epoch": 1.16, + "learning_rate": 1.6275923690687068e-05, + "loss": 0.1518, + "step": 24770 + }, + { + "epoch": 1.16, + "learning_rate": 1.627513990563228e-05, + "loss": 0.2168, + "step": 24775 + }, + { + "epoch": 1.16, + "learning_rate": 1.6274356120577496e-05, + "loss": 0.3516, + "step": 24780 + }, + { + "epoch": 1.16, + "learning_rate": 1.6273572335522706e-05, + "loss": 0.3077, + "step": 24785 + }, + { + "epoch": 1.16, + "learning_rate": 1.627278855046792e-05, + "loss": 0.0342, + "step": 24790 + }, + { + "epoch": 1.16, + "learning_rate": 1.6272004765413134e-05, + "loss": 0.083, + "step": 24795 + }, + { + "epoch": 1.16, + "learning_rate": 1.6271220980358348e-05, + "loss": 0.0801, + "step": 24800 + }, + { + "epoch": 1.16, + "learning_rate": 1.6270437195303562e-05, + "loss": 0.1694, + "step": 24805 + }, + { + "epoch": 1.16, + "learning_rate": 1.6269653410248776e-05, + "loss": 0.1143, + "step": 24810 + }, + { + "epoch": 1.16, + "learning_rate": 1.626886962519399e-05, + "loss": 0.0924, + "step": 24815 + }, + { + "epoch": 1.16, + "learning_rate": 1.62680858401392e-05, + "loss": 0.237, + "step": 24820 + }, + { + "epoch": 1.16, + "learning_rate": 1.6267302055084414e-05, + "loss": 0.1925, + "step": 24825 + }, + { + "epoch": 1.16, + "learning_rate": 1.6266518270029628e-05, + "loss": 0.3285, + "step": 24830 + }, + { + "epoch": 1.16, + "learning_rate": 1.6265734484974842e-05, + "loss": 0.4066, + "step": 24835 + }, + { + "epoch": 1.16, + "learning_rate": 1.6264950699920056e-05, + "loss": 0.0913, + "step": 24840 + }, + { + "epoch": 1.16, + "learning_rate": 1.626416691486527e-05, + "loss": 0.09, + "step": 24845 + }, + { + "epoch": 1.16, + "learning_rate": 1.626338312981048e-05, + "loss": 0.1174, + "step": 24850 + }, + { + "epoch": 1.16, + "learning_rate": 1.6262599344755697e-05, + "loss": 0.0969, + "step": 24855 + }, + { + "epoch": 1.16, + "learning_rate": 1.6261815559700908e-05, + "loss": 0.1173, + "step": 24860 + }, + { + "epoch": 1.16, + "learning_rate": 1.6261031774646122e-05, + "loss": 0.222, + "step": 24865 + }, + { + "epoch": 1.16, + "learning_rate": 1.6260247989591336e-05, + "loss": 0.2767, + "step": 24870 + }, + { + "epoch": 1.16, + "learning_rate": 1.625946420453655e-05, + "loss": 0.1782, + "step": 24875 + }, + { + "epoch": 1.16, + "learning_rate": 1.6258680419481764e-05, + "loss": 0.2277, + "step": 24880 + }, + { + "epoch": 1.16, + "learning_rate": 1.6257896634426974e-05, + "loss": 0.2343, + "step": 24885 + }, + { + "epoch": 1.16, + "learning_rate": 1.6257112849372188e-05, + "loss": 0.3339, + "step": 24890 + }, + { + "epoch": 1.16, + "learning_rate": 1.6256329064317402e-05, + "loss": 0.0648, + "step": 24895 + }, + { + "epoch": 1.16, + "learning_rate": 1.6255545279262616e-05, + "loss": 0.1105, + "step": 24900 + }, + { + "epoch": 1.16, + "learning_rate": 1.625476149420783e-05, + "loss": 0.1184, + "step": 24905 + }, + { + "epoch": 1.16, + "learning_rate": 1.6253977709153044e-05, + "loss": 0.166, + "step": 24910 + }, + { + "epoch": 1.16, + "learning_rate": 1.6253193924098258e-05, + "loss": 0.098, + "step": 24915 + }, + { + "epoch": 1.16, + "learning_rate": 1.625241013904347e-05, + "loss": 0.2253, + "step": 24920 + }, + { + "epoch": 1.16, + "learning_rate": 1.6251626353988682e-05, + "loss": 0.2517, + "step": 24925 + }, + { + "epoch": 1.16, + "learning_rate": 1.62508425689339e-05, + "loss": 0.287, + "step": 24930 + }, + { + "epoch": 1.16, + "learning_rate": 1.625005878387911e-05, + "loss": 0.3426, + "step": 24935 + }, + { + "epoch": 1.16, + "learning_rate": 1.6249274998824324e-05, + "loss": 0.0747, + "step": 24940 + }, + { + "epoch": 1.16, + "learning_rate": 1.6248491213769538e-05, + "loss": 0.0584, + "step": 24945 + }, + { + "epoch": 1.16, + "learning_rate": 1.6247707428714748e-05, + "loss": 0.1144, + "step": 24950 + }, + { + "epoch": 1.16, + "learning_rate": 1.6246923643659965e-05, + "loss": 0.1062, + "step": 24955 + }, + { + "epoch": 1.16, + "learning_rate": 1.6246139858605176e-05, + "loss": 0.1777, + "step": 24960 + }, + { + "epoch": 1.16, + "learning_rate": 1.624535607355039e-05, + "loss": 0.1773, + "step": 24965 + }, + { + "epoch": 1.17, + "learning_rate": 1.6244572288495604e-05, + "loss": 0.2313, + "step": 24970 + }, + { + "epoch": 1.17, + "learning_rate": 1.6243788503440818e-05, + "loss": 0.2795, + "step": 24975 + }, + { + "epoch": 1.17, + "learning_rate": 1.624300471838603e-05, + "loss": 0.3885, + "step": 24980 + }, + { + "epoch": 1.17, + "learning_rate": 1.6242220933331245e-05, + "loss": 0.3908, + "step": 24985 + }, + { + "epoch": 1.17, + "learning_rate": 1.6241437148276456e-05, + "loss": 0.0605, + "step": 24990 + }, + { + "epoch": 1.17, + "learning_rate": 1.6240653363221673e-05, + "loss": 0.085, + "step": 24995 + }, + { + "epoch": 1.17, + "learning_rate": 1.6239869578166884e-05, + "loss": 0.1085, + "step": 25000 + }, + { + "epoch": 1.17, + "learning_rate": 1.6239085793112098e-05, + "loss": 0.0463, + "step": 25005 + }, + { + "epoch": 1.17, + "learning_rate": 1.623830200805731e-05, + "loss": 0.1531, + "step": 25010 + }, + { + "epoch": 1.17, + "learning_rate": 1.6237518223002526e-05, + "loss": 0.2199, + "step": 25015 + }, + { + "epoch": 1.17, + "learning_rate": 1.623673443794774e-05, + "loss": 0.2393, + "step": 25020 + }, + { + "epoch": 1.17, + "learning_rate": 1.623595065289295e-05, + "loss": 0.3733, + "step": 25025 + }, + { + "epoch": 1.17, + "learning_rate": 1.6235166867838167e-05, + "loss": 0.3225, + "step": 25030 + }, + { + "epoch": 1.17, + "learning_rate": 1.6234383082783378e-05, + "loss": 0.2117, + "step": 25035 + }, + { + "epoch": 1.17, + "learning_rate": 1.623359929772859e-05, + "loss": 0.0419, + "step": 25040 + }, + { + "epoch": 1.17, + "learning_rate": 1.6232815512673806e-05, + "loss": 0.1021, + "step": 25045 + }, + { + "epoch": 1.17, + "learning_rate": 1.623203172761902e-05, + "loss": 0.1235, + "step": 25050 + }, + { + "epoch": 1.17, + "learning_rate": 1.6231247942564233e-05, + "loss": 0.1249, + "step": 25055 + }, + { + "epoch": 1.17, + "learning_rate": 1.6230464157509447e-05, + "loss": 0.192, + "step": 25060 + }, + { + "epoch": 1.17, + "learning_rate": 1.6229680372454658e-05, + "loss": 0.1072, + "step": 25065 + }, + { + "epoch": 1.17, + "learning_rate": 1.6228896587399875e-05, + "loss": 0.1035, + "step": 25070 + }, + { + "epoch": 1.17, + "learning_rate": 1.6228112802345086e-05, + "loss": 0.2934, + "step": 25075 + }, + { + "epoch": 1.17, + "learning_rate": 1.62273290172903e-05, + "loss": 0.4639, + "step": 25080 + }, + { + "epoch": 1.17, + "learning_rate": 1.6226545232235513e-05, + "loss": 0.235, + "step": 25085 + }, + { + "epoch": 1.17, + "learning_rate": 1.6225761447180727e-05, + "loss": 0.0493, + "step": 25090 + }, + { + "epoch": 1.17, + "learning_rate": 1.622497766212594e-05, + "loss": 0.0572, + "step": 25095 + }, + { + "epoch": 1.17, + "learning_rate": 1.6224193877071152e-05, + "loss": 0.0987, + "step": 25100 + }, + { + "epoch": 1.17, + "learning_rate": 1.6223410092016366e-05, + "loss": 0.1056, + "step": 25105 + }, + { + "epoch": 1.17, + "learning_rate": 1.622262630696158e-05, + "loss": 0.1613, + "step": 25110 + }, + { + "epoch": 1.17, + "learning_rate": 1.6221842521906793e-05, + "loss": 0.1629, + "step": 25115 + }, + { + "epoch": 1.17, + "learning_rate": 1.6221058736852007e-05, + "loss": 0.2078, + "step": 25120 + }, + { + "epoch": 1.17, + "learning_rate": 1.622027495179722e-05, + "loss": 0.1947, + "step": 25125 + }, + { + "epoch": 1.17, + "learning_rate": 1.6219491166742435e-05, + "loss": 0.3467, + "step": 25130 + }, + { + "epoch": 1.17, + "learning_rate": 1.621870738168765e-05, + "loss": 0.2284, + "step": 25135 + }, + { + "epoch": 1.17, + "learning_rate": 1.621792359663286e-05, + "loss": 0.0624, + "step": 25140 + }, + { + "epoch": 1.17, + "learning_rate": 1.6217139811578074e-05, + "loss": 0.0692, + "step": 25145 + }, + { + "epoch": 1.17, + "learning_rate": 1.6216356026523287e-05, + "loss": 0.1222, + "step": 25150 + }, + { + "epoch": 1.17, + "learning_rate": 1.62155722414685e-05, + "loss": 0.1692, + "step": 25155 + }, + { + "epoch": 1.17, + "learning_rate": 1.6214788456413715e-05, + "loss": 0.1629, + "step": 25160 + }, + { + "epoch": 1.17, + "learning_rate": 1.6214004671358926e-05, + "loss": 0.1671, + "step": 25165 + }, + { + "epoch": 1.17, + "learning_rate": 1.6213220886304143e-05, + "loss": 0.2507, + "step": 25170 + }, + { + "epoch": 1.17, + "learning_rate": 1.6212437101249354e-05, + "loss": 0.2868, + "step": 25175 + }, + { + "epoch": 1.17, + "learning_rate": 1.6211653316194567e-05, + "loss": 0.3525, + "step": 25180 + }, + { + "epoch": 1.18, + "learning_rate": 1.621086953113978e-05, + "loss": 0.3272, + "step": 25185 + }, + { + "epoch": 1.18, + "learning_rate": 1.6210085746084995e-05, + "loss": 0.0452, + "step": 25190 + }, + { + "epoch": 1.18, + "learning_rate": 1.620930196103021e-05, + "loss": 0.0713, + "step": 25195 + }, + { + "epoch": 1.18, + "learning_rate": 1.6208518175975423e-05, + "loss": 0.0912, + "step": 25200 + }, + { + "epoch": 1.18, + "learning_rate": 1.6207734390920634e-05, + "loss": 0.1543, + "step": 25205 + }, + { + "epoch": 1.18, + "learning_rate": 1.6206950605865848e-05, + "loss": 0.1041, + "step": 25210 + }, + { + "epoch": 1.18, + "learning_rate": 1.620616682081106e-05, + "loss": 0.141, + "step": 25215 + }, + { + "epoch": 1.18, + "learning_rate": 1.6205383035756275e-05, + "loss": 0.1966, + "step": 25220 + }, + { + "epoch": 1.18, + "learning_rate": 1.620459925070149e-05, + "loss": 0.3014, + "step": 25225 + }, + { + "epoch": 1.18, + "learning_rate": 1.6203815465646703e-05, + "loss": 0.4792, + "step": 25230 + }, + { + "epoch": 1.18, + "learning_rate": 1.6203031680591917e-05, + "loss": 0.2327, + "step": 25235 + }, + { + "epoch": 1.18, + "learning_rate": 1.6202247895537128e-05, + "loss": 0.0362, + "step": 25240 + }, + { + "epoch": 1.18, + "learning_rate": 1.6201464110482345e-05, + "loss": 0.1217, + "step": 25245 + }, + { + "epoch": 1.18, + "learning_rate": 1.6200680325427555e-05, + "loss": 0.0556, + "step": 25250 + }, + { + "epoch": 1.18, + "learning_rate": 1.619989654037277e-05, + "loss": 0.1454, + "step": 25255 + }, + { + "epoch": 1.18, + "learning_rate": 1.6199112755317983e-05, + "loss": 0.1041, + "step": 25260 + }, + { + "epoch": 1.18, + "learning_rate": 1.6198328970263197e-05, + "loss": 0.1451, + "step": 25265 + }, + { + "epoch": 1.18, + "learning_rate": 1.619754518520841e-05, + "loss": 0.2237, + "step": 25270 + }, + { + "epoch": 1.18, + "learning_rate": 1.619691815716458e-05, + "loss": 0.2745, + "step": 25275 + }, + { + "epoch": 1.18, + "learning_rate": 1.6196134372109792e-05, + "loss": 0.4215, + "step": 25280 + }, + { + "epoch": 1.18, + "learning_rate": 1.619535058705501e-05, + "loss": 0.481, + "step": 25285 + }, + { + "epoch": 1.18, + "learning_rate": 1.619456680200022e-05, + "loss": 0.0337, + "step": 25290 + }, + { + "epoch": 1.18, + "learning_rate": 1.6193783016945434e-05, + "loss": 0.102, + "step": 25295 + }, + { + "epoch": 1.18, + "learning_rate": 1.6192999231890648e-05, + "loss": 0.0853, + "step": 25300 + }, + { + "epoch": 1.18, + "learning_rate": 1.619221544683586e-05, + "loss": 0.1298, + "step": 25305 + }, + { + "epoch": 1.18, + "learning_rate": 1.6191431661781075e-05, + "loss": 0.151, + "step": 25310 + }, + { + "epoch": 1.18, + "learning_rate": 1.619064787672629e-05, + "loss": 0.1743, + "step": 25315 + }, + { + "epoch": 1.18, + "learning_rate": 1.61898640916715e-05, + "loss": 0.247, + "step": 25320 + }, + { + "epoch": 1.18, + "learning_rate": 1.6189080306616717e-05, + "loss": 0.2403, + "step": 25325 + }, + { + "epoch": 1.18, + "learning_rate": 1.6188296521561928e-05, + "loss": 0.4411, + "step": 25330 + }, + { + "epoch": 1.18, + "learning_rate": 1.618751273650714e-05, + "loss": 0.2575, + "step": 25335 + }, + { + "epoch": 1.18, + "learning_rate": 1.6186728951452355e-05, + "loss": 0.0465, + "step": 25340 + }, + { + "epoch": 1.18, + "learning_rate": 1.6185945166397566e-05, + "loss": 0.0688, + "step": 25345 + }, + { + "epoch": 1.18, + "learning_rate": 1.6185161381342783e-05, + "loss": 0.1129, + "step": 25350 + }, + { + "epoch": 1.18, + "learning_rate": 1.6184377596287994e-05, + "loss": 0.1276, + "step": 25355 + }, + { + "epoch": 1.18, + "learning_rate": 1.6183593811233208e-05, + "loss": 0.0864, + "step": 25360 + }, + { + "epoch": 1.18, + "learning_rate": 1.618281002617842e-05, + "loss": 0.1639, + "step": 25365 + }, + { + "epoch": 1.18, + "learning_rate": 1.6182026241123636e-05, + "loss": 0.1556, + "step": 25370 + }, + { + "epoch": 1.18, + "learning_rate": 1.618124245606885e-05, + "loss": 0.3036, + "step": 25375 + }, + { + "epoch": 1.18, + "learning_rate": 1.6180458671014063e-05, + "loss": 0.3818, + "step": 25380 + }, + { + "epoch": 1.18, + "learning_rate": 1.6179674885959277e-05, + "loss": 0.2293, + "step": 25385 + }, + { + "epoch": 1.18, + "learning_rate": 1.617889110090449e-05, + "loss": 0.0748, + "step": 25390 + }, + { + "epoch": 1.18, + "learning_rate": 1.61781073158497e-05, + "loss": 0.1107, + "step": 25395 + }, + { + "epoch": 1.19, + "learning_rate": 1.6177323530794916e-05, + "loss": 0.166, + "step": 25400 + }, + { + "epoch": 1.19, + "learning_rate": 1.617653974574013e-05, + "loss": 0.0938, + "step": 25405 + }, + { + "epoch": 1.19, + "learning_rate": 1.6175755960685343e-05, + "loss": 0.1765, + "step": 25410 + }, + { + "epoch": 1.19, + "learning_rate": 1.6174972175630557e-05, + "loss": 0.1421, + "step": 25415 + }, + { + "epoch": 1.19, + "learning_rate": 1.6174188390575768e-05, + "loss": 0.169, + "step": 25420 + }, + { + "epoch": 1.19, + "learning_rate": 1.6173404605520985e-05, + "loss": 0.2901, + "step": 25425 + }, + { + "epoch": 1.19, + "learning_rate": 1.6172620820466196e-05, + "loss": 0.4629, + "step": 25430 + }, + { + "epoch": 1.19, + "learning_rate": 1.617183703541141e-05, + "loss": 0.4004, + "step": 25435 + }, + { + "epoch": 1.19, + "learning_rate": 1.6171053250356623e-05, + "loss": 0.0532, + "step": 25440 + }, + { + "epoch": 1.19, + "learning_rate": 1.6170269465301837e-05, + "loss": 0.0672, + "step": 25445 + }, + { + "epoch": 1.19, + "learning_rate": 1.616948568024705e-05, + "loss": 0.1277, + "step": 25450 + }, + { + "epoch": 1.19, + "learning_rate": 1.6168701895192265e-05, + "loss": 0.0931, + "step": 25455 + }, + { + "epoch": 1.19, + "learning_rate": 1.6167918110137476e-05, + "loss": 0.1628, + "step": 25460 + }, + { + "epoch": 1.19, + "learning_rate": 1.616713432508269e-05, + "loss": 0.1642, + "step": 25465 + }, + { + "epoch": 1.19, + "learning_rate": 1.6166350540027903e-05, + "loss": 0.1649, + "step": 25470 + }, + { + "epoch": 1.19, + "learning_rate": 1.6165566754973117e-05, + "loss": 0.2531, + "step": 25475 + }, + { + "epoch": 1.19, + "learning_rate": 1.616478296991833e-05, + "loss": 0.4141, + "step": 25480 + }, + { + "epoch": 1.19, + "learning_rate": 1.6163999184863545e-05, + "loss": 0.3355, + "step": 25485 + }, + { + "epoch": 1.19, + "learning_rate": 1.616321539980876e-05, + "loss": 0.0631, + "step": 25490 + }, + { + "epoch": 1.19, + "learning_rate": 1.616243161475397e-05, + "loss": 0.0915, + "step": 25495 + }, + { + "epoch": 1.19, + "learning_rate": 1.6161647829699187e-05, + "loss": 0.1369, + "step": 25500 + }, + { + "epoch": 1.19, + "learning_rate": 1.6160864044644397e-05, + "loss": 0.1394, + "step": 25505 + }, + { + "epoch": 1.19, + "learning_rate": 1.616008025958961e-05, + "loss": 0.096, + "step": 25510 + }, + { + "epoch": 1.19, + "learning_rate": 1.6159296474534825e-05, + "loss": 0.1094, + "step": 25515 + }, + { + "epoch": 1.19, + "learning_rate": 1.615851268948004e-05, + "loss": 0.1217, + "step": 25520 + }, + { + "epoch": 1.19, + "learning_rate": 1.6157728904425253e-05, + "loss": 0.2451, + "step": 25525 + }, + { + "epoch": 1.19, + "learning_rate": 1.6156945119370464e-05, + "loss": 0.2088, + "step": 25530 + }, + { + "epoch": 1.19, + "learning_rate": 1.6156161334315677e-05, + "loss": 0.2696, + "step": 25535 + }, + { + "epoch": 1.19, + "learning_rate": 1.615537754926089e-05, + "loss": 0.0968, + "step": 25540 + }, + { + "epoch": 1.19, + "learning_rate": 1.6154593764206105e-05, + "loss": 0.0515, + "step": 25545 + }, + { + "epoch": 1.19, + "learning_rate": 1.615380997915132e-05, + "loss": 0.0912, + "step": 25550 + }, + { + "epoch": 1.19, + "learning_rate": 1.6153026194096533e-05, + "loss": 0.1158, + "step": 25555 + }, + { + "epoch": 1.19, + "learning_rate": 1.6152242409041744e-05, + "loss": 0.1445, + "step": 25560 + }, + { + "epoch": 1.19, + "learning_rate": 1.615145862398696e-05, + "loss": 0.1769, + "step": 25565 + }, + { + "epoch": 1.19, + "learning_rate": 1.615067483893217e-05, + "loss": 0.1704, + "step": 25570 + }, + { + "epoch": 1.19, + "learning_rate": 1.6149891053877385e-05, + "loss": 0.2812, + "step": 25575 + }, + { + "epoch": 1.19, + "learning_rate": 1.61491072688226e-05, + "loss": 0.3944, + "step": 25580 + }, + { + "epoch": 1.19, + "learning_rate": 1.6148323483767813e-05, + "loss": 0.2521, + "step": 25585 + }, + { + "epoch": 1.19, + "learning_rate": 1.6147539698713027e-05, + "loss": 0.0945, + "step": 25590 + }, + { + "epoch": 1.19, + "learning_rate": 1.6146755913658238e-05, + "loss": 0.1083, + "step": 25595 + }, + { + "epoch": 1.19, + "learning_rate": 1.6145972128603455e-05, + "loss": 0.0934, + "step": 25600 + }, + { + "epoch": 1.19, + "learning_rate": 1.6145188343548665e-05, + "loss": 0.1016, + "step": 25605 + }, + { + "epoch": 1.19, + "learning_rate": 1.614440455849388e-05, + "loss": 0.1465, + "step": 25610 + }, + { + "epoch": 1.2, + "learning_rate": 1.6143620773439093e-05, + "loss": 0.1531, + "step": 25615 + }, + { + "epoch": 1.2, + "learning_rate": 1.6142836988384307e-05, + "loss": 0.1764, + "step": 25620 + }, + { + "epoch": 1.2, + "learning_rate": 1.614205320332952e-05, + "loss": 0.2381, + "step": 25625 + }, + { + "epoch": 1.2, + "learning_rate": 1.6141269418274735e-05, + "loss": 0.4556, + "step": 25630 + }, + { + "epoch": 1.2, + "learning_rate": 1.6140485633219945e-05, + "loss": 0.3126, + "step": 25635 + }, + { + "epoch": 1.2, + "learning_rate": 1.6139701848165163e-05, + "loss": 0.0332, + "step": 25640 + }, + { + "epoch": 1.2, + "learning_rate": 1.6138918063110373e-05, + "loss": 0.0406, + "step": 25645 + }, + { + "epoch": 1.2, + "learning_rate": 1.6138134278055587e-05, + "loss": 0.1147, + "step": 25650 + }, + { + "epoch": 1.2, + "learning_rate": 1.61373504930008e-05, + "loss": 0.1279, + "step": 25655 + }, + { + "epoch": 1.2, + "learning_rate": 1.613656670794601e-05, + "loss": 0.1488, + "step": 25660 + }, + { + "epoch": 1.2, + "learning_rate": 1.613578292289123e-05, + "loss": 0.0947, + "step": 25665 + }, + { + "epoch": 1.2, + "learning_rate": 1.613499913783644e-05, + "loss": 0.147, + "step": 25670 + }, + { + "epoch": 1.2, + "learning_rate": 1.6134215352781653e-05, + "loss": 0.2196, + "step": 25675 + }, + { + "epoch": 1.2, + "learning_rate": 1.6133431567726867e-05, + "loss": 0.3325, + "step": 25680 + }, + { + "epoch": 1.2, + "learning_rate": 1.613264778267208e-05, + "loss": 0.3548, + "step": 25685 + }, + { + "epoch": 1.2, + "learning_rate": 1.6131863997617295e-05, + "loss": 0.0497, + "step": 25690 + }, + { + "epoch": 1.2, + "learning_rate": 1.613108021256251e-05, + "loss": 0.0752, + "step": 25695 + }, + { + "epoch": 1.2, + "learning_rate": 1.6130296427507723e-05, + "loss": 0.0716, + "step": 25700 + }, + { + "epoch": 1.2, + "learning_rate": 1.6129512642452937e-05, + "loss": 0.1445, + "step": 25705 + }, + { + "epoch": 1.2, + "learning_rate": 1.6128728857398147e-05, + "loss": 0.1371, + "step": 25710 + }, + { + "epoch": 1.2, + "learning_rate": 1.6128101829354318e-05, + "loss": 0.1388, + "step": 25715 + }, + { + "epoch": 1.2, + "learning_rate": 1.6127318044299535e-05, + "loss": 0.1818, + "step": 25720 + }, + { + "epoch": 1.2, + "learning_rate": 1.6126534259244745e-05, + "loss": 0.1747, + "step": 25725 + }, + { + "epoch": 1.2, + "learning_rate": 1.612575047418996e-05, + "loss": 0.2925, + "step": 25730 + }, + { + "epoch": 1.2, + "learning_rate": 1.6124966689135173e-05, + "loss": 0.212, + "step": 25735 + }, + { + "epoch": 1.2, + "learning_rate": 1.6124182904080384e-05, + "loss": 0.07, + "step": 25740 + }, + { + "epoch": 1.2, + "learning_rate": 1.61233991190256e-05, + "loss": 0.0871, + "step": 25745 + }, + { + "epoch": 1.2, + "learning_rate": 1.612261533397081e-05, + "loss": 0.1572, + "step": 25750 + }, + { + "epoch": 1.2, + "learning_rate": 1.6121831548916026e-05, + "loss": 0.1053, + "step": 25755 + }, + { + "epoch": 1.2, + "learning_rate": 1.612104776386124e-05, + "loss": 0.1303, + "step": 25760 + }, + { + "epoch": 1.2, + "learning_rate": 1.6120263978806453e-05, + "loss": 0.2018, + "step": 25765 + }, + { + "epoch": 1.2, + "learning_rate": 1.6119480193751667e-05, + "loss": 0.2664, + "step": 25770 + }, + { + "epoch": 1.2, + "learning_rate": 1.611869640869688e-05, + "loss": 0.2745, + "step": 25775 + }, + { + "epoch": 1.2, + "learning_rate": 1.6117912623642095e-05, + "loss": 0.4014, + "step": 25780 + }, + { + "epoch": 1.2, + "learning_rate": 1.611712883858731e-05, + "loss": 0.2742, + "step": 25785 + }, + { + "epoch": 1.2, + "learning_rate": 1.611634505353252e-05, + "loss": 0.094, + "step": 25790 + }, + { + "epoch": 1.2, + "learning_rate": 1.6115561268477733e-05, + "loss": 0.1958, + "step": 25795 + }, + { + "epoch": 1.2, + "learning_rate": 1.6114777483422947e-05, + "loss": 0.0788, + "step": 25800 + }, + { + "epoch": 1.2, + "learning_rate": 1.611399369836816e-05, + "loss": 0.106, + "step": 25805 + }, + { + "epoch": 1.2, + "learning_rate": 1.6113209913313375e-05, + "loss": 0.1569, + "step": 25810 + }, + { + "epoch": 1.2, + "learning_rate": 1.6112426128258586e-05, + "loss": 0.126, + "step": 25815 + }, + { + "epoch": 1.2, + "learning_rate": 1.6111642343203803e-05, + "loss": 0.1446, + "step": 25820 + }, + { + "epoch": 1.21, + "learning_rate": 1.6110858558149013e-05, + "loss": 0.3583, + "step": 25825 + }, + { + "epoch": 1.21, + "learning_rate": 1.6110074773094227e-05, + "loss": 0.3009, + "step": 25830 + }, + { + "epoch": 1.21, + "learning_rate": 1.610929098803944e-05, + "loss": 0.3158, + "step": 25835 + }, + { + "epoch": 1.21, + "learning_rate": 1.6108507202984655e-05, + "loss": 0.1105, + "step": 25840 + }, + { + "epoch": 1.21, + "learning_rate": 1.610772341792987e-05, + "loss": 0.0567, + "step": 25845 + }, + { + "epoch": 1.21, + "learning_rate": 1.6106939632875083e-05, + "loss": 0.0254, + "step": 25850 + }, + { + "epoch": 1.21, + "learning_rate": 1.6106155847820293e-05, + "loss": 0.1127, + "step": 25855 + }, + { + "epoch": 1.21, + "learning_rate": 1.6105372062765507e-05, + "loss": 0.165, + "step": 25860 + }, + { + "epoch": 1.21, + "learning_rate": 1.610458827771072e-05, + "loss": 0.1691, + "step": 25865 + }, + { + "epoch": 1.21, + "learning_rate": 1.6103804492655935e-05, + "loss": 0.1748, + "step": 25870 + }, + { + "epoch": 1.21, + "learning_rate": 1.610302070760115e-05, + "loss": 0.3087, + "step": 25875 + }, + { + "epoch": 1.21, + "learning_rate": 1.6102236922546363e-05, + "loss": 0.3935, + "step": 25880 + }, + { + "epoch": 1.21, + "learning_rate": 1.6101453137491577e-05, + "loss": 0.3608, + "step": 25885 + }, + { + "epoch": 1.21, + "learning_rate": 1.6100669352436787e-05, + "loss": 0.0722, + "step": 25890 + }, + { + "epoch": 1.21, + "learning_rate": 1.6099885567382005e-05, + "loss": 0.1239, + "step": 25895 + }, + { + "epoch": 1.21, + "learning_rate": 1.6099101782327215e-05, + "loss": 0.0837, + "step": 25900 + }, + { + "epoch": 1.21, + "learning_rate": 1.609831799727243e-05, + "loss": 0.1534, + "step": 25905 + }, + { + "epoch": 1.21, + "learning_rate": 1.6097534212217643e-05, + "loss": 0.1598, + "step": 25910 + }, + { + "epoch": 1.21, + "learning_rate": 1.6096750427162857e-05, + "loss": 0.1712, + "step": 25915 + }, + { + "epoch": 1.21, + "learning_rate": 1.609596664210807e-05, + "loss": 0.1853, + "step": 25920 + }, + { + "epoch": 1.21, + "learning_rate": 1.609518285705328e-05, + "loss": 0.2454, + "step": 25925 + }, + { + "epoch": 1.21, + "learning_rate": 1.6094399071998495e-05, + "loss": 0.303, + "step": 25930 + }, + { + "epoch": 1.21, + "learning_rate": 1.609361528694371e-05, + "loss": 0.3649, + "step": 25935 + }, + { + "epoch": 1.21, + "learning_rate": 1.6092831501888923e-05, + "loss": 0.0425, + "step": 25940 + }, + { + "epoch": 1.21, + "learning_rate": 1.6092047716834137e-05, + "loss": 0.1058, + "step": 25945 + }, + { + "epoch": 1.21, + "learning_rate": 1.609126393177935e-05, + "loss": 0.0823, + "step": 25950 + }, + { + "epoch": 1.21, + "learning_rate": 1.609048014672456e-05, + "loss": 0.0905, + "step": 25955 + }, + { + "epoch": 1.21, + "learning_rate": 1.608969636166978e-05, + "loss": 0.1479, + "step": 25960 + }, + { + "epoch": 1.21, + "learning_rate": 1.608891257661499e-05, + "loss": 0.1194, + "step": 25965 + }, + { + "epoch": 1.21, + "learning_rate": 1.6088128791560203e-05, + "loss": 0.2551, + "step": 25970 + }, + { + "epoch": 1.21, + "learning_rate": 1.6087345006505417e-05, + "loss": 0.2567, + "step": 25975 + }, + { + "epoch": 1.21, + "learning_rate": 1.608656122145063e-05, + "loss": 0.4998, + "step": 25980 + }, + { + "epoch": 1.21, + "learning_rate": 1.6085777436395845e-05, + "loss": 0.3416, + "step": 25985 + }, + { + "epoch": 1.21, + "learning_rate": 1.6084993651341055e-05, + "loss": 0.0866, + "step": 25990 + }, + { + "epoch": 1.21, + "learning_rate": 1.6084209866286273e-05, + "loss": 0.1128, + "step": 25995 + }, + { + "epoch": 1.21, + "learning_rate": 1.6083426081231483e-05, + "loss": 0.1394, + "step": 26000 + }, + { + "epoch": 1.21, + "learning_rate": 1.6082642296176697e-05, + "loss": 0.163, + "step": 26005 + }, + { + "epoch": 1.21, + "learning_rate": 1.608185851112191e-05, + "loss": 0.1127, + "step": 26010 + }, + { + "epoch": 1.21, + "learning_rate": 1.6081074726067125e-05, + "loss": 0.1923, + "step": 26015 + }, + { + "epoch": 1.21, + "learning_rate": 1.608029094101234e-05, + "loss": 0.1738, + "step": 26020 + }, + { + "epoch": 1.21, + "learning_rate": 1.6079507155957553e-05, + "loss": 0.2733, + "step": 26025 + }, + { + "epoch": 1.21, + "learning_rate": 1.6078723370902763e-05, + "loss": 0.3812, + "step": 26030 + }, + { + "epoch": 1.21, + "learning_rate": 1.607793958584798e-05, + "loss": 0.2911, + "step": 26035 + }, + { + "epoch": 1.22, + "learning_rate": 1.607715580079319e-05, + "loss": 0.0426, + "step": 26040 + }, + { + "epoch": 1.22, + "learning_rate": 1.6076372015738405e-05, + "loss": 0.1156, + "step": 26045 + }, + { + "epoch": 1.22, + "learning_rate": 1.607558823068362e-05, + "loss": 0.1092, + "step": 26050 + }, + { + "epoch": 1.22, + "learning_rate": 1.607480444562883e-05, + "loss": 0.1071, + "step": 26055 + }, + { + "epoch": 1.22, + "learning_rate": 1.6074020660574047e-05, + "loss": 0.1825, + "step": 26060 + }, + { + "epoch": 1.22, + "learning_rate": 1.6073236875519257e-05, + "loss": 0.2019, + "step": 26065 + }, + { + "epoch": 1.22, + "learning_rate": 1.607245309046447e-05, + "loss": 0.2312, + "step": 26070 + }, + { + "epoch": 1.22, + "learning_rate": 1.6071669305409685e-05, + "loss": 0.1766, + "step": 26075 + }, + { + "epoch": 1.22, + "learning_rate": 1.60708855203549e-05, + "loss": 0.3091, + "step": 26080 + }, + { + "epoch": 1.22, + "learning_rate": 1.6070101735300113e-05, + "loss": 0.35, + "step": 26085 + }, + { + "epoch": 1.22, + "learning_rate": 1.6069317950245327e-05, + "loss": 0.0862, + "step": 26090 + }, + { + "epoch": 1.22, + "learning_rate": 1.606853416519054e-05, + "loss": 0.0783, + "step": 26095 + }, + { + "epoch": 1.22, + "learning_rate": 1.6067750380135755e-05, + "loss": 0.0974, + "step": 26100 + }, + { + "epoch": 1.22, + "learning_rate": 1.6066966595080965e-05, + "loss": 0.0428, + "step": 26105 + }, + { + "epoch": 1.22, + "learning_rate": 1.6066182810026182e-05, + "loss": 0.1272, + "step": 26110 + }, + { + "epoch": 1.22, + "learning_rate": 1.6065399024971393e-05, + "loss": 0.1728, + "step": 26115 + }, + { + "epoch": 1.22, + "learning_rate": 1.6064615239916607e-05, + "loss": 0.2236, + "step": 26120 + }, + { + "epoch": 1.22, + "learning_rate": 1.606383145486182e-05, + "loss": 0.3677, + "step": 26125 + }, + { + "epoch": 1.22, + "learning_rate": 1.606304766980703e-05, + "loss": 0.4215, + "step": 26130 + }, + { + "epoch": 1.22, + "learning_rate": 1.606226388475225e-05, + "loss": 0.3085, + "step": 26135 + }, + { + "epoch": 1.22, + "learning_rate": 1.606148009969746e-05, + "loss": 0.0464, + "step": 26140 + }, + { + "epoch": 1.22, + "learning_rate": 1.6060696314642673e-05, + "loss": 0.0697, + "step": 26145 + }, + { + "epoch": 1.22, + "learning_rate": 1.6059912529587887e-05, + "loss": 0.1091, + "step": 26150 + }, + { + "epoch": 1.22, + "learning_rate": 1.60591287445331e-05, + "loss": 0.1073, + "step": 26155 + }, + { + "epoch": 1.22, + "learning_rate": 1.6058344959478315e-05, + "loss": 0.1585, + "step": 26160 + }, + { + "epoch": 1.22, + "learning_rate": 1.605756117442353e-05, + "loss": 0.1376, + "step": 26165 + }, + { + "epoch": 1.22, + "learning_rate": 1.605677738936874e-05, + "loss": 0.2462, + "step": 26170 + }, + { + "epoch": 1.22, + "learning_rate": 1.6055993604313956e-05, + "loss": 0.1889, + "step": 26175 + }, + { + "epoch": 1.22, + "learning_rate": 1.6055209819259167e-05, + "loss": 0.3838, + "step": 26180 + }, + { + "epoch": 1.22, + "learning_rate": 1.605442603420438e-05, + "loss": 0.3044, + "step": 26185 + }, + { + "epoch": 1.22, + "learning_rate": 1.6053642249149595e-05, + "loss": 0.0562, + "step": 26190 + }, + { + "epoch": 1.22, + "learning_rate": 1.605285846409481e-05, + "loss": 0.0978, + "step": 26195 + }, + { + "epoch": 1.22, + "learning_rate": 1.6052074679040022e-05, + "loss": 0.1215, + "step": 26200 + }, + { + "epoch": 1.22, + "learning_rate": 1.6051290893985233e-05, + "loss": 0.1945, + "step": 26205 + }, + { + "epoch": 1.22, + "learning_rate": 1.605050710893045e-05, + "loss": 0.1485, + "step": 26210 + }, + { + "epoch": 1.22, + "learning_rate": 1.604972332387566e-05, + "loss": 0.0891, + "step": 26215 + }, + { + "epoch": 1.22, + "learning_rate": 1.6048939538820875e-05, + "loss": 0.1399, + "step": 26220 + }, + { + "epoch": 1.22, + "learning_rate": 1.604815575376609e-05, + "loss": 0.205, + "step": 26225 + }, + { + "epoch": 1.22, + "learning_rate": 1.6047371968711303e-05, + "loss": 0.3367, + "step": 26230 + }, + { + "epoch": 1.22, + "learning_rate": 1.6046588183656516e-05, + "loss": 0.2997, + "step": 26235 + }, + { + "epoch": 1.22, + "learning_rate": 1.604580439860173e-05, + "loss": 0.0592, + "step": 26240 + }, + { + "epoch": 1.22, + "learning_rate": 1.604502061354694e-05, + "loss": 0.0551, + "step": 26245 + }, + { + "epoch": 1.22, + "learning_rate": 1.6044236828492155e-05, + "loss": 0.0715, + "step": 26250 + }, + { + "epoch": 1.23, + "learning_rate": 1.604345304343737e-05, + "loss": 0.1212, + "step": 26255 + }, + { + "epoch": 1.23, + "learning_rate": 1.6042669258382583e-05, + "loss": 0.1241, + "step": 26260 + }, + { + "epoch": 1.23, + "learning_rate": 1.6041885473327796e-05, + "loss": 0.1764, + "step": 26265 + }, + { + "epoch": 1.23, + "learning_rate": 1.6041101688273007e-05, + "loss": 0.1995, + "step": 26270 + }, + { + "epoch": 1.23, + "learning_rate": 1.6040317903218224e-05, + "loss": 0.2772, + "step": 26275 + }, + { + "epoch": 1.23, + "learning_rate": 1.6039534118163435e-05, + "loss": 0.3426, + "step": 26280 + }, + { + "epoch": 1.23, + "learning_rate": 1.603875033310865e-05, + "loss": 0.2971, + "step": 26285 + }, + { + "epoch": 1.23, + "learning_rate": 1.6037966548053863e-05, + "loss": 0.065, + "step": 26290 + }, + { + "epoch": 1.23, + "learning_rate": 1.6037182762999077e-05, + "loss": 0.0287, + "step": 26295 + }, + { + "epoch": 1.23, + "learning_rate": 1.603639897794429e-05, + "loss": 0.1137, + "step": 26300 + }, + { + "epoch": 1.23, + "learning_rate": 1.6035615192889504e-05, + "loss": 0.1516, + "step": 26305 + }, + { + "epoch": 1.23, + "learning_rate": 1.6034831407834718e-05, + "loss": 0.0944, + "step": 26310 + }, + { + "epoch": 1.23, + "learning_rate": 1.603404762277993e-05, + "loss": 0.1105, + "step": 26315 + }, + { + "epoch": 1.23, + "learning_rate": 1.6033263837725143e-05, + "loss": 0.1635, + "step": 26320 + }, + { + "epoch": 1.23, + "learning_rate": 1.6032480052670357e-05, + "loss": 0.2127, + "step": 26325 + }, + { + "epoch": 1.23, + "learning_rate": 1.603169626761557e-05, + "loss": 0.4454, + "step": 26330 + }, + { + "epoch": 1.23, + "learning_rate": 1.6030912482560784e-05, + "loss": 0.3167, + "step": 26335 + }, + { + "epoch": 1.23, + "learning_rate": 1.6030128697505998e-05, + "loss": 0.0731, + "step": 26340 + }, + { + "epoch": 1.23, + "learning_rate": 1.602934491245121e-05, + "loss": 0.0499, + "step": 26345 + }, + { + "epoch": 1.23, + "learning_rate": 1.6028561127396426e-05, + "loss": 0.097, + "step": 26350 + }, + { + "epoch": 1.23, + "learning_rate": 1.6027777342341637e-05, + "loss": 0.0544, + "step": 26355 + }, + { + "epoch": 1.23, + "learning_rate": 1.602699355728685e-05, + "loss": 0.1498, + "step": 26360 + }, + { + "epoch": 1.23, + "learning_rate": 1.6026209772232064e-05, + "loss": 0.1206, + "step": 26365 + }, + { + "epoch": 1.23, + "learning_rate": 1.602542598717728e-05, + "loss": 0.2232, + "step": 26370 + }, + { + "epoch": 1.23, + "learning_rate": 1.6024642202122492e-05, + "loss": 0.3615, + "step": 26375 + }, + { + "epoch": 1.23, + "learning_rate": 1.6023858417067703e-05, + "loss": 0.3271, + "step": 26380 + }, + { + "epoch": 1.23, + "learning_rate": 1.6023074632012917e-05, + "loss": 0.3493, + "step": 26385 + }, + { + "epoch": 1.23, + "learning_rate": 1.602229084695813e-05, + "loss": 0.0708, + "step": 26390 + }, + { + "epoch": 1.23, + "learning_rate": 1.6021507061903344e-05, + "loss": 0.124, + "step": 26395 + }, + { + "epoch": 1.23, + "learning_rate": 1.602072327684856e-05, + "loss": 0.0928, + "step": 26400 + }, + { + "epoch": 1.23, + "learning_rate": 1.6019939491793772e-05, + "loss": 0.0961, + "step": 26405 + }, + { + "epoch": 1.23, + "learning_rate": 1.6019155706738986e-05, + "loss": 0.1516, + "step": 26410 + }, + { + "epoch": 1.23, + "learning_rate": 1.60183719216842e-05, + "loss": 0.1803, + "step": 26415 + }, + { + "epoch": 1.23, + "learning_rate": 1.601758813662941e-05, + "loss": 0.1668, + "step": 26420 + }, + { + "epoch": 1.23, + "learning_rate": 1.6016804351574628e-05, + "loss": 0.2315, + "step": 26425 + }, + { + "epoch": 1.23, + "learning_rate": 1.601602056651984e-05, + "loss": 0.353, + "step": 26430 + }, + { + "epoch": 1.23, + "learning_rate": 1.6015236781465052e-05, + "loss": 0.319, + "step": 26435 + }, + { + "epoch": 1.23, + "learning_rate": 1.6014452996410266e-05, + "loss": 0.0558, + "step": 26440 + }, + { + "epoch": 1.23, + "learning_rate": 1.6013669211355477e-05, + "loss": 0.1412, + "step": 26445 + }, + { + "epoch": 1.23, + "learning_rate": 1.6012885426300694e-05, + "loss": 0.0911, + "step": 26450 + }, + { + "epoch": 1.23, + "learning_rate": 1.6012101641245905e-05, + "loss": 0.0855, + "step": 26455 + }, + { + "epoch": 1.23, + "learning_rate": 1.601131785619112e-05, + "loss": 0.1342, + "step": 26460 + }, + { + "epoch": 1.23, + "learning_rate": 1.6010534071136332e-05, + "loss": 0.1502, + "step": 26465 + }, + { + "epoch": 1.24, + "learning_rate": 1.6009750286081546e-05, + "loss": 0.2149, + "step": 26470 + }, + { + "epoch": 1.24, + "learning_rate": 1.600896650102676e-05, + "loss": 0.1451, + "step": 26475 + }, + { + "epoch": 1.24, + "learning_rate": 1.6008182715971974e-05, + "loss": 0.38, + "step": 26480 + }, + { + "epoch": 1.24, + "learning_rate": 1.6007398930917185e-05, + "loss": 0.5451, + "step": 26485 + }, + { + "epoch": 1.24, + "learning_rate": 1.6006615145862402e-05, + "loss": 0.0726, + "step": 26490 + }, + { + "epoch": 1.24, + "learning_rate": 1.6005831360807612e-05, + "loss": 0.0716, + "step": 26495 + }, + { + "epoch": 1.24, + "learning_rate": 1.6005047575752826e-05, + "loss": 0.084, + "step": 26500 + }, + { + "epoch": 1.24, + "learning_rate": 1.600426379069804e-05, + "loss": 0.1345, + "step": 26505 + }, + { + "epoch": 1.24, + "learning_rate": 1.6003480005643254e-05, + "loss": 0.2791, + "step": 26510 + }, + { + "epoch": 1.24, + "learning_rate": 1.6002696220588468e-05, + "loss": 0.1281, + "step": 26515 + }, + { + "epoch": 1.24, + "learning_rate": 1.600191243553368e-05, + "loss": 0.2016, + "step": 26520 + }, + { + "epoch": 1.24, + "learning_rate": 1.6001128650478896e-05, + "loss": 0.3165, + "step": 26525 + }, + { + "epoch": 1.24, + "learning_rate": 1.6000344865424106e-05, + "loss": 0.336, + "step": 26530 + }, + { + "epoch": 1.24, + "learning_rate": 1.599956108036932e-05, + "loss": 0.3234, + "step": 26535 + }, + { + "epoch": 1.24, + "learning_rate": 1.5998777295314534e-05, + "loss": 0.1082, + "step": 26540 + }, + { + "epoch": 1.24, + "learning_rate": 1.5997993510259748e-05, + "loss": 0.0455, + "step": 26545 + }, + { + "epoch": 1.24, + "learning_rate": 1.5997209725204962e-05, + "loss": 0.1116, + "step": 26550 + }, + { + "epoch": 1.24, + "learning_rate": 1.5996425940150176e-05, + "loss": 0.0805, + "step": 26555 + }, + { + "epoch": 1.24, + "learning_rate": 1.5995642155095386e-05, + "loss": 0.1438, + "step": 26560 + }, + { + "epoch": 1.24, + "learning_rate": 1.5994858370040604e-05, + "loss": 0.1412, + "step": 26565 + }, + { + "epoch": 1.24, + "learning_rate": 1.5994074584985814e-05, + "loss": 0.2316, + "step": 26570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5993290799931028e-05, + "loss": 0.2124, + "step": 26575 + }, + { + "epoch": 1.24, + "learning_rate": 1.5992507014876242e-05, + "loss": 0.3504, + "step": 26580 + }, + { + "epoch": 1.24, + "learning_rate": 1.5991723229821453e-05, + "loss": 0.3047, + "step": 26585 + }, + { + "epoch": 1.24, + "learning_rate": 1.599093944476667e-05, + "loss": 0.0622, + "step": 26590 + }, + { + "epoch": 1.24, + "learning_rate": 1.599015565971188e-05, + "loss": 0.0547, + "step": 26595 + }, + { + "epoch": 1.24, + "learning_rate": 1.5989371874657094e-05, + "loss": 0.0788, + "step": 26600 + }, + { + "epoch": 1.24, + "learning_rate": 1.5988588089602308e-05, + "loss": 0.1388, + "step": 26605 + }, + { + "epoch": 1.24, + "learning_rate": 1.5987804304547522e-05, + "loss": 0.1345, + "step": 26610 + }, + { + "epoch": 1.24, + "learning_rate": 1.5987020519492736e-05, + "loss": 0.1711, + "step": 26615 + }, + { + "epoch": 1.24, + "learning_rate": 1.598623673443795e-05, + "loss": 0.209, + "step": 26620 + }, + { + "epoch": 1.24, + "learning_rate": 1.5985452949383164e-05, + "loss": 0.24, + "step": 26625 + }, + { + "epoch": 1.24, + "learning_rate": 1.5984669164328378e-05, + "loss": 0.2429, + "step": 26630 + }, + { + "epoch": 1.24, + "learning_rate": 1.5983885379273588e-05, + "loss": 0.2346, + "step": 26635 + }, + { + "epoch": 1.24, + "learning_rate": 1.5983101594218802e-05, + "loss": 0.052, + "step": 26640 + }, + { + "epoch": 1.24, + "learning_rate": 1.5982317809164016e-05, + "loss": 0.1405, + "step": 26645 + }, + { + "epoch": 1.24, + "learning_rate": 1.598153402410923e-05, + "loss": 0.0957, + "step": 26650 + }, + { + "epoch": 1.24, + "learning_rate": 1.5980750239054444e-05, + "loss": 0.0875, + "step": 26655 + }, + { + "epoch": 1.24, + "learning_rate": 1.5979966453999654e-05, + "loss": 0.0848, + "step": 26660 + }, + { + "epoch": 1.24, + "learning_rate": 1.597918266894487e-05, + "loss": 0.1606, + "step": 26665 + }, + { + "epoch": 1.24, + "learning_rate": 1.5978398883890082e-05, + "loss": 0.2839, + "step": 26670 + }, + { + "epoch": 1.24, + "learning_rate": 1.5977615098835296e-05, + "loss": 0.2471, + "step": 26675 + }, + { + "epoch": 1.24, + "learning_rate": 1.597683131378051e-05, + "loss": 0.3593, + "step": 26680 + }, + { + "epoch": 1.25, + "learning_rate": 1.5976047528725724e-05, + "loss": 0.3423, + "step": 26685 + }, + { + "epoch": 1.25, + "learning_rate": 1.5975263743670938e-05, + "loss": 0.0586, + "step": 26690 + }, + { + "epoch": 1.25, + "learning_rate": 1.5974479958616152e-05, + "loss": 0.0374, + "step": 26695 + }, + { + "epoch": 1.25, + "learning_rate": 1.5973696173561362e-05, + "loss": 0.1045, + "step": 26700 + }, + { + "epoch": 1.25, + "learning_rate": 1.5972912388506576e-05, + "loss": 0.1342, + "step": 26705 + }, + { + "epoch": 1.25, + "learning_rate": 1.597212860345179e-05, + "loss": 0.2293, + "step": 26710 + }, + { + "epoch": 1.25, + "learning_rate": 1.5971344818397004e-05, + "loss": 0.1674, + "step": 26715 + }, + { + "epoch": 1.25, + "learning_rate": 1.5970561033342218e-05, + "loss": 0.1991, + "step": 26720 + }, + { + "epoch": 1.25, + "learning_rate": 1.5969777248287432e-05, + "loss": 0.2317, + "step": 26725 + }, + { + "epoch": 1.25, + "learning_rate": 1.5968993463232646e-05, + "loss": 0.4123, + "step": 26730 + }, + { + "epoch": 1.25, + "learning_rate": 1.5968209678177856e-05, + "loss": 0.2453, + "step": 26735 + }, + { + "epoch": 1.25, + "learning_rate": 1.5967425893123073e-05, + "loss": 0.083, + "step": 26740 + }, + { + "epoch": 1.25, + "learning_rate": 1.5966642108068284e-05, + "loss": 0.0648, + "step": 26745 + }, + { + "epoch": 1.25, + "learning_rate": 1.5965858323013498e-05, + "loss": 0.1362, + "step": 26750 + }, + { + "epoch": 1.25, + "learning_rate": 1.5965074537958712e-05, + "loss": 0.1406, + "step": 26755 + }, + { + "epoch": 1.25, + "learning_rate": 1.5964290752903926e-05, + "loss": 0.1994, + "step": 26760 + }, + { + "epoch": 1.25, + "learning_rate": 1.596350696784914e-05, + "loss": 0.1565, + "step": 26765 + }, + { + "epoch": 1.25, + "learning_rate": 1.596272318279435e-05, + "loss": 0.2318, + "step": 26770 + }, + { + "epoch": 1.25, + "learning_rate": 1.5961939397739564e-05, + "loss": 0.2744, + "step": 26775 + }, + { + "epoch": 1.25, + "learning_rate": 1.5961155612684778e-05, + "loss": 0.4521, + "step": 26780 + }, + { + "epoch": 1.25, + "learning_rate": 1.5960371827629992e-05, + "loss": 0.2907, + "step": 26785 + }, + { + "epoch": 1.25, + "learning_rate": 1.5959588042575206e-05, + "loss": 0.086, + "step": 26790 + }, + { + "epoch": 1.25, + "learning_rate": 1.595880425752042e-05, + "loss": 0.0616, + "step": 26795 + }, + { + "epoch": 1.25, + "learning_rate": 1.595802047246563e-05, + "loss": 0.0885, + "step": 26800 + }, + { + "epoch": 1.25, + "learning_rate": 1.5957236687410847e-05, + "loss": 0.1038, + "step": 26805 + }, + { + "epoch": 1.25, + "learning_rate": 1.5956452902356058e-05, + "loss": 0.1311, + "step": 26810 + }, + { + "epoch": 1.25, + "learning_rate": 1.5955669117301272e-05, + "loss": 0.2754, + "step": 26815 + }, + { + "epoch": 1.25, + "learning_rate": 1.5954885332246486e-05, + "loss": 0.2622, + "step": 26820 + }, + { + "epoch": 1.25, + "learning_rate": 1.59541015471917e-05, + "loss": 0.3481, + "step": 26825 + }, + { + "epoch": 1.25, + "learning_rate": 1.5953317762136914e-05, + "loss": 0.3888, + "step": 26830 + }, + { + "epoch": 1.25, + "learning_rate": 1.5952533977082124e-05, + "loss": 0.3201, + "step": 26835 + }, + { + "epoch": 1.25, + "learning_rate": 1.595175019202734e-05, + "loss": 0.0431, + "step": 26840 + }, + { + "epoch": 1.25, + "learning_rate": 1.5950966406972552e-05, + "loss": 0.0601, + "step": 26845 + }, + { + "epoch": 1.25, + "learning_rate": 1.5950182621917766e-05, + "loss": 0.0401, + "step": 26850 + }, + { + "epoch": 1.25, + "learning_rate": 1.594939883686298e-05, + "loss": 0.0848, + "step": 26855 + }, + { + "epoch": 1.25, + "learning_rate": 1.5948615051808194e-05, + "loss": 0.1519, + "step": 26860 + }, + { + "epoch": 1.25, + "learning_rate": 1.5947831266753408e-05, + "loss": 0.1502, + "step": 26865 + }, + { + "epoch": 1.25, + "learning_rate": 1.594704748169862e-05, + "loss": 0.1738, + "step": 26870 + }, + { + "epoch": 1.25, + "learning_rate": 1.5946263696643832e-05, + "loss": 0.1806, + "step": 26875 + }, + { + "epoch": 1.25, + "learning_rate": 1.594547991158905e-05, + "loss": 0.359, + "step": 26880 + }, + { + "epoch": 1.25, + "learning_rate": 1.594469612653426e-05, + "loss": 0.3319, + "step": 26885 + }, + { + "epoch": 1.25, + "learning_rate": 1.5943912341479474e-05, + "loss": 0.0602, + "step": 26890 + }, + { + "epoch": 1.25, + "learning_rate": 1.5943128556424688e-05, + "loss": 0.0851, + "step": 26895 + }, + { + "epoch": 1.26, + "learning_rate": 1.5942344771369898e-05, + "loss": 0.1058, + "step": 26900 + }, + { + "epoch": 1.26, + "learning_rate": 1.5941560986315115e-05, + "loss": 0.1035, + "step": 26905 + }, + { + "epoch": 1.26, + "learning_rate": 1.5940777201260326e-05, + "loss": 0.1427, + "step": 26910 + }, + { + "epoch": 1.26, + "learning_rate": 1.593999341620554e-05, + "loss": 0.1269, + "step": 26915 + }, + { + "epoch": 1.26, + "learning_rate": 1.5939209631150754e-05, + "loss": 0.1434, + "step": 26920 + }, + { + "epoch": 1.26, + "learning_rate": 1.5938425846095968e-05, + "loss": 0.2233, + "step": 26925 + }, + { + "epoch": 1.26, + "learning_rate": 1.593764206104118e-05, + "loss": 0.4562, + "step": 26930 + }, + { + "epoch": 1.26, + "learning_rate": 1.5936858275986395e-05, + "loss": 0.3848, + "step": 26935 + }, + { + "epoch": 1.26, + "learning_rate": 1.593607449093161e-05, + "loss": 0.0614, + "step": 26940 + }, + { + "epoch": 1.26, + "learning_rate": 1.5935290705876823e-05, + "loss": 0.0845, + "step": 26945 + }, + { + "epoch": 1.26, + "learning_rate": 1.5934506920822034e-05, + "loss": 0.0953, + "step": 26950 + }, + { + "epoch": 1.26, + "learning_rate": 1.593372313576725e-05, + "loss": 0.1694, + "step": 26955 + }, + { + "epoch": 1.26, + "learning_rate": 1.593293935071246e-05, + "loss": 0.1371, + "step": 26960 + }, + { + "epoch": 1.26, + "learning_rate": 1.5932155565657676e-05, + "loss": 0.2797, + "step": 26965 + }, + { + "epoch": 1.26, + "learning_rate": 1.593137178060289e-05, + "loss": 0.2314, + "step": 26970 + }, + { + "epoch": 1.26, + "learning_rate": 1.59305879955481e-05, + "loss": 0.1904, + "step": 26975 + }, + { + "epoch": 1.26, + "learning_rate": 1.5929804210493317e-05, + "loss": 0.3626, + "step": 26980 + }, + { + "epoch": 1.26, + "learning_rate": 1.5929020425438528e-05, + "loss": 0.3654, + "step": 26985 + }, + { + "epoch": 1.26, + "learning_rate": 1.592823664038374e-05, + "loss": 0.0897, + "step": 26990 + }, + { + "epoch": 1.26, + "learning_rate": 1.5927452855328956e-05, + "loss": 0.1229, + "step": 26995 + }, + { + "epoch": 1.26, + "learning_rate": 1.592666907027417e-05, + "loss": 0.1352, + "step": 27000 + }, + { + "epoch": 1.26, + "learning_rate": 1.5925885285219383e-05, + "loss": 0.1724, + "step": 27005 + }, + { + "epoch": 1.26, + "learning_rate": 1.5925101500164597e-05, + "loss": 0.1872, + "step": 27010 + }, + { + "epoch": 1.26, + "learning_rate": 1.5924317715109808e-05, + "loss": 0.211, + "step": 27015 + }, + { + "epoch": 1.26, + "learning_rate": 1.5923533930055025e-05, + "loss": 0.2077, + "step": 27020 + }, + { + "epoch": 1.26, + "learning_rate": 1.5922750145000236e-05, + "loss": 0.2314, + "step": 27025 + }, + { + "epoch": 1.26, + "learning_rate": 1.592196635994545e-05, + "loss": 0.2645, + "step": 27030 + }, + { + "epoch": 1.26, + "learning_rate": 1.5921182574890663e-05, + "loss": 0.3657, + "step": 27035 + }, + { + "epoch": 1.26, + "learning_rate": 1.5920398789835877e-05, + "loss": 0.0485, + "step": 27040 + }, + { + "epoch": 1.26, + "learning_rate": 1.591961500478109e-05, + "loss": 0.0873, + "step": 27045 + }, + { + "epoch": 1.26, + "learning_rate": 1.5918831219726302e-05, + "loss": 0.0935, + "step": 27050 + }, + { + "epoch": 1.26, + "learning_rate": 1.591804743467152e-05, + "loss": 0.0481, + "step": 27055 + }, + { + "epoch": 1.26, + "learning_rate": 1.591726364961673e-05, + "loss": 0.1495, + "step": 27060 + }, + { + "epoch": 1.26, + "learning_rate": 1.5916479864561943e-05, + "loss": 0.2669, + "step": 27065 + }, + { + "epoch": 1.26, + "learning_rate": 1.5915696079507157e-05, + "loss": 0.2449, + "step": 27070 + }, + { + "epoch": 1.26, + "learning_rate": 1.591491229445237e-05, + "loss": 0.2292, + "step": 27075 + }, + { + "epoch": 1.26, + "learning_rate": 1.5914128509397585e-05, + "loss": 0.3765, + "step": 27080 + }, + { + "epoch": 1.26, + "learning_rate": 1.59133447243428e-05, + "loss": 0.4043, + "step": 27085 + }, + { + "epoch": 1.26, + "learning_rate": 1.591256093928801e-05, + "loss": 0.0565, + "step": 27090 + }, + { + "epoch": 1.26, + "learning_rate": 1.5911777154233224e-05, + "loss": 0.1073, + "step": 27095 + }, + { + "epoch": 1.26, + "learning_rate": 1.5910993369178437e-05, + "loss": 0.1031, + "step": 27100 + }, + { + "epoch": 1.26, + "learning_rate": 1.591020958412365e-05, + "loss": 0.1669, + "step": 27105 + }, + { + "epoch": 1.26, + "learning_rate": 1.5909425799068865e-05, + "loss": 0.1159, + "step": 27110 + }, + { + "epoch": 1.27, + "learning_rate": 1.5908642014014076e-05, + "loss": 0.1482, + "step": 27115 + }, + { + "epoch": 1.27, + "learning_rate": 1.5907858228959293e-05, + "loss": 0.2052, + "step": 27120 + }, + { + "epoch": 1.27, + "learning_rate": 1.5907074443904504e-05, + "loss": 0.1452, + "step": 27125 + }, + { + "epoch": 1.27, + "learning_rate": 1.5906290658849717e-05, + "loss": 0.3528, + "step": 27130 + }, + { + "epoch": 1.27, + "learning_rate": 1.590550687379493e-05, + "loss": 0.2911, + "step": 27135 + }, + { + "epoch": 1.27, + "learning_rate": 1.5904723088740145e-05, + "loss": 0.0498, + "step": 27140 + }, + { + "epoch": 1.27, + "learning_rate": 1.590393930368536e-05, + "loss": 0.0742, + "step": 27145 + }, + { + "epoch": 1.27, + "learning_rate": 1.5903155518630573e-05, + "loss": 0.0673, + "step": 27150 + }, + { + "epoch": 1.27, + "learning_rate": 1.5902371733575787e-05, + "loss": 0.1554, + "step": 27155 + }, + { + "epoch": 1.27, + "learning_rate": 1.5901587948520998e-05, + "loss": 0.1423, + "step": 27160 + }, + { + "epoch": 1.27, + "learning_rate": 1.590080416346621e-05, + "loss": 0.2888, + "step": 27165 + }, + { + "epoch": 1.27, + "learning_rate": 1.5900020378411425e-05, + "loss": 0.206, + "step": 27170 + }, + { + "epoch": 1.27, + "learning_rate": 1.589923659335664e-05, + "loss": 0.2769, + "step": 27175 + }, + { + "epoch": 1.27, + "learning_rate": 1.5898452808301853e-05, + "loss": 0.3929, + "step": 27180 + }, + { + "epoch": 1.27, + "learning_rate": 1.5897669023247067e-05, + "loss": 0.2299, + "step": 27185 + }, + { + "epoch": 1.27, + "learning_rate": 1.5896885238192278e-05, + "loss": 0.0616, + "step": 27190 + }, + { + "epoch": 1.27, + "learning_rate": 1.5896101453137495e-05, + "loss": 0.0768, + "step": 27195 + }, + { + "epoch": 1.27, + "learning_rate": 1.5895317668082705e-05, + "loss": 0.066, + "step": 27200 + }, + { + "epoch": 1.27, + "learning_rate": 1.589453388302792e-05, + "loss": 0.1199, + "step": 27205 + }, + { + "epoch": 1.27, + "learning_rate": 1.5893750097973133e-05, + "loss": 0.1225, + "step": 27210 + }, + { + "epoch": 1.27, + "learning_rate": 1.5892966312918347e-05, + "loss": 0.1322, + "step": 27215 + }, + { + "epoch": 1.27, + "learning_rate": 1.589218252786356e-05, + "loss": 0.1938, + "step": 27220 + }, + { + "epoch": 1.27, + "learning_rate": 1.589139874280877e-05, + "loss": 0.1989, + "step": 27225 + }, + { + "epoch": 1.27, + "learning_rate": 1.5890614957753985e-05, + "loss": 0.3783, + "step": 27230 + }, + { + "epoch": 1.27, + "learning_rate": 1.58898311726992e-05, + "loss": 0.2278, + "step": 27235 + }, + { + "epoch": 1.27, + "learning_rate": 1.5889047387644413e-05, + "loss": 0.0426, + "step": 27240 + }, + { + "epoch": 1.27, + "learning_rate": 1.5888263602589627e-05, + "loss": 0.0671, + "step": 27245 + }, + { + "epoch": 1.27, + "learning_rate": 1.588747981753484e-05, + "loss": 0.1248, + "step": 27250 + }, + { + "epoch": 1.27, + "learning_rate": 1.5886696032480055e-05, + "loss": 0.1308, + "step": 27255 + }, + { + "epoch": 1.27, + "learning_rate": 1.588591224742527e-05, + "loss": 0.1626, + "step": 27260 + }, + { + "epoch": 1.27, + "learning_rate": 1.588512846237048e-05, + "loss": 0.0983, + "step": 27265 + }, + { + "epoch": 1.27, + "learning_rate": 1.5884344677315697e-05, + "loss": 0.1998, + "step": 27270 + }, + { + "epoch": 1.27, + "learning_rate": 1.5883560892260907e-05, + "loss": 0.2857, + "step": 27275 + }, + { + "epoch": 1.27, + "learning_rate": 1.588277710720612e-05, + "loss": 0.4716, + "step": 27280 + }, + { + "epoch": 1.27, + "learning_rate": 1.5881993322151335e-05, + "loss": 0.3464, + "step": 27285 + }, + { + "epoch": 1.27, + "learning_rate": 1.5881209537096546e-05, + "loss": 0.0157, + "step": 27290 + }, + { + "epoch": 1.27, + "learning_rate": 1.5880425752041763e-05, + "loss": 0.0292, + "step": 27295 + }, + { + "epoch": 1.27, + "learning_rate": 1.5879641966986973e-05, + "loss": 0.1048, + "step": 27300 + }, + { + "epoch": 1.27, + "learning_rate": 1.5878858181932187e-05, + "loss": 0.1233, + "step": 27305 + }, + { + "epoch": 1.27, + "learning_rate": 1.58780743968774e-05, + "loss": 0.1633, + "step": 27310 + }, + { + "epoch": 1.27, + "learning_rate": 1.5877290611822615e-05, + "loss": 0.213, + "step": 27315 + }, + { + "epoch": 1.27, + "learning_rate": 1.587650682676783e-05, + "loss": 0.1694, + "step": 27320 + }, + { + "epoch": 1.28, + "learning_rate": 1.5875723041713043e-05, + "loss": 0.2286, + "step": 27325 + }, + { + "epoch": 1.28, + "learning_rate": 1.5874939256658253e-05, + "loss": 0.4041, + "step": 27330 + }, + { + "epoch": 1.28, + "learning_rate": 1.587415547160347e-05, + "loss": 0.3315, + "step": 27335 + }, + { + "epoch": 1.28, + "learning_rate": 1.587337168654868e-05, + "loss": 0.0725, + "step": 27340 + }, + { + "epoch": 1.28, + "learning_rate": 1.5872587901493895e-05, + "loss": 0.092, + "step": 27345 + }, + { + "epoch": 1.28, + "learning_rate": 1.587180411643911e-05, + "loss": 0.0829, + "step": 27350 + }, + { + "epoch": 1.28, + "learning_rate": 1.5871020331384323e-05, + "loss": 0.1871, + "step": 27355 + }, + { + "epoch": 1.28, + "learning_rate": 1.5870236546329537e-05, + "loss": 0.1217, + "step": 27360 + }, + { + "epoch": 1.28, + "learning_rate": 1.5869452761274747e-05, + "loss": 0.1402, + "step": 27365 + }, + { + "epoch": 1.28, + "learning_rate": 1.5868668976219965e-05, + "loss": 0.2723, + "step": 27370 + }, + { + "epoch": 1.28, + "learning_rate": 1.5867885191165175e-05, + "loss": 0.2456, + "step": 27375 + }, + { + "epoch": 1.28, + "learning_rate": 1.586710140611039e-05, + "loss": 0.3611, + "step": 27380 + }, + { + "epoch": 1.28, + "learning_rate": 1.5866317621055603e-05, + "loss": 0.2447, + "step": 27385 + }, + { + "epoch": 1.28, + "learning_rate": 1.5865533836000817e-05, + "loss": 0.0605, + "step": 27390 + }, + { + "epoch": 1.28, + "learning_rate": 1.586475005094603e-05, + "loss": 0.1235, + "step": 27395 + }, + { + "epoch": 1.28, + "learning_rate": 1.5863966265891245e-05, + "loss": 0.1074, + "step": 27400 + }, + { + "epoch": 1.28, + "learning_rate": 1.5863182480836455e-05, + "loss": 0.1034, + "step": 27405 + }, + { + "epoch": 1.28, + "learning_rate": 1.5862398695781672e-05, + "loss": 0.1398, + "step": 27410 + }, + { + "epoch": 1.28, + "learning_rate": 1.5861614910726883e-05, + "loss": 0.1485, + "step": 27415 + }, + { + "epoch": 1.28, + "learning_rate": 1.5860831125672097e-05, + "loss": 0.184, + "step": 27420 + }, + { + "epoch": 1.28, + "learning_rate": 1.586004734061731e-05, + "loss": 0.301, + "step": 27425 + }, + { + "epoch": 1.28, + "learning_rate": 1.585926355556252e-05, + "loss": 0.3592, + "step": 27430 + }, + { + "epoch": 1.28, + "learning_rate": 1.585847977050774e-05, + "loss": 0.4242, + "step": 27435 + }, + { + "epoch": 1.28, + "learning_rate": 1.585769598545295e-05, + "loss": 0.033, + "step": 27440 + }, + { + "epoch": 1.28, + "learning_rate": 1.5856912200398163e-05, + "loss": 0.077, + "step": 27445 + }, + { + "epoch": 1.28, + "learning_rate": 1.5856128415343377e-05, + "loss": 0.0681, + "step": 27450 + }, + { + "epoch": 1.28, + "learning_rate": 1.585534463028859e-05, + "loss": 0.0868, + "step": 27455 + }, + { + "epoch": 1.28, + "learning_rate": 1.5854560845233805e-05, + "loss": 0.1456, + "step": 27460 + }, + { + "epoch": 1.28, + "learning_rate": 1.585377706017902e-05, + "loss": 0.1694, + "step": 27465 + }, + { + "epoch": 1.28, + "learning_rate": 1.5852993275124233e-05, + "loss": 0.2192, + "step": 27470 + }, + { + "epoch": 1.28, + "learning_rate": 1.5852209490069446e-05, + "loss": 0.2615, + "step": 27475 + }, + { + "epoch": 1.28, + "learning_rate": 1.5851425705014657e-05, + "loss": 0.3738, + "step": 27480 + }, + { + "epoch": 1.28, + "learning_rate": 1.585064191995987e-05, + "loss": 0.4018, + "step": 27485 + }, + { + "epoch": 1.28, + "learning_rate": 1.5849858134905085e-05, + "loss": 0.0784, + "step": 27490 + }, + { + "epoch": 1.28, + "learning_rate": 1.58490743498503e-05, + "loss": 0.0999, + "step": 27495 + }, + { + "epoch": 1.28, + "learning_rate": 1.5848290564795513e-05, + "loss": 0.094, + "step": 27500 + }, + { + "epoch": 1.28, + "learning_rate": 1.5847506779740723e-05, + "loss": 0.1918, + "step": 27505 + }, + { + "epoch": 1.28, + "learning_rate": 1.584672299468594e-05, + "loss": 0.0998, + "step": 27510 + }, + { + "epoch": 1.28, + "learning_rate": 1.584593920963115e-05, + "loss": 0.1329, + "step": 27515 + }, + { + "epoch": 1.28, + "learning_rate": 1.5845155424576365e-05, + "loss": 0.2364, + "step": 27520 + }, + { + "epoch": 1.28, + "learning_rate": 1.584437163952158e-05, + "loss": 0.2244, + "step": 27525 + }, + { + "epoch": 1.28, + "learning_rate": 1.5843587854466793e-05, + "loss": 0.2506, + "step": 27530 + }, + { + "epoch": 1.28, + "learning_rate": 1.5842804069412007e-05, + "loss": 0.2574, + "step": 27535 + }, + { + "epoch": 1.29, + "learning_rate": 1.584202028435722e-05, + "loss": 0.0459, + "step": 27540 + }, + { + "epoch": 1.29, + "learning_rate": 1.584123649930243e-05, + "loss": 0.169, + "step": 27545 + }, + { + "epoch": 1.29, + "learning_rate": 1.5840452714247645e-05, + "loss": 0.0555, + "step": 27550 + }, + { + "epoch": 1.29, + "learning_rate": 1.583966892919286e-05, + "loss": 0.1168, + "step": 27555 + }, + { + "epoch": 1.29, + "learning_rate": 1.5838885144138073e-05, + "loss": 0.1272, + "step": 27560 + }, + { + "epoch": 1.29, + "learning_rate": 1.5838101359083287e-05, + "loss": 0.1271, + "step": 27565 + }, + { + "epoch": 1.29, + "learning_rate": 1.58373175740285e-05, + "loss": 0.2171, + "step": 27570 + }, + { + "epoch": 1.29, + "learning_rate": 1.5836533788973714e-05, + "loss": 0.2344, + "step": 27575 + }, + { + "epoch": 1.29, + "learning_rate": 1.5835750003918925e-05, + "loss": 0.441, + "step": 27580 + }, + { + "epoch": 1.29, + "learning_rate": 1.5834966218864142e-05, + "loss": 0.344, + "step": 27585 + }, + { + "epoch": 1.29, + "learning_rate": 1.5834182433809353e-05, + "loss": 0.0275, + "step": 27590 + }, + { + "epoch": 1.29, + "learning_rate": 1.5833398648754567e-05, + "loss": 0.0879, + "step": 27595 + }, + { + "epoch": 1.29, + "learning_rate": 1.583261486369978e-05, + "loss": 0.0835, + "step": 27600 + }, + { + "epoch": 1.29, + "learning_rate": 1.5831831078644994e-05, + "loss": 0.0898, + "step": 27605 + }, + { + "epoch": 1.29, + "learning_rate": 1.583104729359021e-05, + "loss": 0.1473, + "step": 27610 + }, + { + "epoch": 1.29, + "learning_rate": 1.583026350853542e-05, + "loss": 0.1409, + "step": 27615 + }, + { + "epoch": 1.29, + "learning_rate": 1.5829479723480633e-05, + "loss": 0.1624, + "step": 27620 + }, + { + "epoch": 1.29, + "learning_rate": 1.5828695938425847e-05, + "loss": 0.2715, + "step": 27625 + }, + { + "epoch": 1.29, + "learning_rate": 1.582791215337106e-05, + "loss": 0.24, + "step": 27630 + }, + { + "epoch": 1.29, + "learning_rate": 1.5827128368316275e-05, + "loss": 0.3, + "step": 27635 + }, + { + "epoch": 1.29, + "learning_rate": 1.582634458326149e-05, + "loss": 0.1383, + "step": 27640 + }, + { + "epoch": 1.29, + "learning_rate": 1.58255607982067e-05, + "loss": 0.0718, + "step": 27645 + }, + { + "epoch": 1.29, + "learning_rate": 1.5824777013151916e-05, + "loss": 0.1329, + "step": 27650 + }, + { + "epoch": 1.29, + "learning_rate": 1.5823993228097127e-05, + "loss": 0.0816, + "step": 27655 + }, + { + "epoch": 1.29, + "learning_rate": 1.582320944304234e-05, + "loss": 0.1626, + "step": 27660 + }, + { + "epoch": 1.29, + "learning_rate": 1.5822425657987555e-05, + "loss": 0.1631, + "step": 27665 + }, + { + "epoch": 1.29, + "learning_rate": 1.582164187293277e-05, + "loss": 0.176, + "step": 27670 + }, + { + "epoch": 1.29, + "learning_rate": 1.5820858087877982e-05, + "loss": 0.242, + "step": 27675 + }, + { + "epoch": 1.29, + "learning_rate": 1.5820074302823193e-05, + "loss": 0.3799, + "step": 27680 + }, + { + "epoch": 1.29, + "learning_rate": 1.581929051776841e-05, + "loss": 0.3226, + "step": 27685 + }, + { + "epoch": 1.29, + "learning_rate": 1.581850673271362e-05, + "loss": 0.0577, + "step": 27690 + }, + { + "epoch": 1.29, + "learning_rate": 1.5817722947658835e-05, + "loss": 0.0607, + "step": 27695 + }, + { + "epoch": 1.29, + "learning_rate": 1.581693916260405e-05, + "loss": 0.0799, + "step": 27700 + }, + { + "epoch": 1.29, + "learning_rate": 1.5816155377549262e-05, + "loss": 0.1294, + "step": 27705 + }, + { + "epoch": 1.29, + "learning_rate": 1.5815371592494476e-05, + "loss": 0.1614, + "step": 27710 + }, + { + "epoch": 1.29, + "learning_rate": 1.581458780743969e-05, + "loss": 0.2011, + "step": 27715 + }, + { + "epoch": 1.29, + "learning_rate": 1.58138040223849e-05, + "loss": 0.1317, + "step": 27720 + }, + { + "epoch": 1.29, + "learning_rate": 1.5813020237330118e-05, + "loss": 0.138, + "step": 27725 + }, + { + "epoch": 1.29, + "learning_rate": 1.581239320928629e-05, + "loss": 0.3351, + "step": 27730 + }, + { + "epoch": 1.29, + "learning_rate": 1.58116094242315e-05, + "loss": 0.2869, + "step": 27735 + }, + { + "epoch": 1.29, + "learning_rate": 1.5810825639176713e-05, + "loss": 0.0514, + "step": 27740 + }, + { + "epoch": 1.29, + "learning_rate": 1.5810041854121927e-05, + "loss": 0.0941, + "step": 27745 + }, + { + "epoch": 1.29, + "learning_rate": 1.580925806906714e-05, + "loss": 0.0642, + "step": 27750 + }, + { + "epoch": 1.3, + "learning_rate": 1.5808474284012355e-05, + "loss": 0.1529, + "step": 27755 + }, + { + "epoch": 1.3, + "learning_rate": 1.5807690498957565e-05, + "loss": 0.1682, + "step": 27760 + }, + { + "epoch": 1.3, + "learning_rate": 1.5806906713902782e-05, + "loss": 0.1137, + "step": 27765 + }, + { + "epoch": 1.3, + "learning_rate": 1.5806279685858953e-05, + "loss": 0.2363, + "step": 27770 + }, + { + "epoch": 1.3, + "learning_rate": 1.5805495900804163e-05, + "loss": 0.2647, + "step": 27775 + }, + { + "epoch": 1.3, + "learning_rate": 1.5804712115749377e-05, + "loss": 0.432, + "step": 27780 + }, + { + "epoch": 1.3, + "learning_rate": 1.580392833069459e-05, + "loss": 0.2727, + "step": 27785 + }, + { + "epoch": 1.3, + "learning_rate": 1.5803144545639805e-05, + "loss": 0.0257, + "step": 27790 + }, + { + "epoch": 1.3, + "learning_rate": 1.580236076058502e-05, + "loss": 0.056, + "step": 27795 + }, + { + "epoch": 1.3, + "learning_rate": 1.5801576975530233e-05, + "loss": 0.1739, + "step": 27800 + }, + { + "epoch": 1.3, + "learning_rate": 1.5800793190475447e-05, + "loss": 0.1359, + "step": 27805 + }, + { + "epoch": 1.3, + "learning_rate": 1.5800009405420657e-05, + "loss": 0.1377, + "step": 27810 + }, + { + "epoch": 1.3, + "learning_rate": 1.579922562036587e-05, + "loss": 0.2141, + "step": 27815 + }, + { + "epoch": 1.3, + "learning_rate": 1.5798441835311085e-05, + "loss": 0.2105, + "step": 27820 + }, + { + "epoch": 1.3, + "learning_rate": 1.57976580502563e-05, + "loss": 0.2329, + "step": 27825 + }, + { + "epoch": 1.3, + "learning_rate": 1.5796874265201513e-05, + "loss": 0.2853, + "step": 27830 + }, + { + "epoch": 1.3, + "learning_rate": 1.5796090480146727e-05, + "loss": 0.2378, + "step": 27835 + }, + { + "epoch": 1.3, + "learning_rate": 1.5795306695091937e-05, + "loss": 0.0456, + "step": 27840 + }, + { + "epoch": 1.3, + "learning_rate": 1.5794522910037155e-05, + "loss": 0.0577, + "step": 27845 + }, + { + "epoch": 1.3, + "learning_rate": 1.5793739124982365e-05, + "loss": 0.0971, + "step": 27850 + }, + { + "epoch": 1.3, + "learning_rate": 1.579295533992758e-05, + "loss": 0.1172, + "step": 27855 + }, + { + "epoch": 1.3, + "learning_rate": 1.5792171554872793e-05, + "loss": 0.0872, + "step": 27860 + }, + { + "epoch": 1.3, + "learning_rate": 1.5791387769818007e-05, + "loss": 0.1082, + "step": 27865 + }, + { + "epoch": 1.3, + "learning_rate": 1.579060398476322e-05, + "loss": 0.1545, + "step": 27870 + }, + { + "epoch": 1.3, + "learning_rate": 1.578982019970843e-05, + "loss": 0.2331, + "step": 27875 + }, + { + "epoch": 1.3, + "learning_rate": 1.5789036414653645e-05, + "loss": 0.3144, + "step": 27880 + }, + { + "epoch": 1.3, + "learning_rate": 1.578825262959886e-05, + "loss": 0.2813, + "step": 27885 + }, + { + "epoch": 1.3, + "learning_rate": 1.5787468844544073e-05, + "loss": 0.0546, + "step": 27890 + }, + { + "epoch": 1.3, + "learning_rate": 1.5786685059489287e-05, + "loss": 0.0614, + "step": 27895 + }, + { + "epoch": 1.3, + "learning_rate": 1.57859012744345e-05, + "loss": 0.0767, + "step": 27900 + }, + { + "epoch": 1.3, + "learning_rate": 1.5785117489379715e-05, + "loss": 0.1322, + "step": 27905 + }, + { + "epoch": 1.3, + "learning_rate": 1.578433370432493e-05, + "loss": 0.134, + "step": 27910 + }, + { + "epoch": 1.3, + "learning_rate": 1.578354991927014e-05, + "loss": 0.1844, + "step": 27915 + }, + { + "epoch": 1.3, + "learning_rate": 1.5782766134215357e-05, + "loss": 0.1431, + "step": 27920 + }, + { + "epoch": 1.3, + "learning_rate": 1.5781982349160567e-05, + "loss": 0.2983, + "step": 27925 + }, + { + "epoch": 1.3, + "learning_rate": 1.578119856410578e-05, + "loss": 0.381, + "step": 27930 + }, + { + "epoch": 1.3, + "learning_rate": 1.5780414779050995e-05, + "loss": 0.3145, + "step": 27935 + }, + { + "epoch": 1.3, + "learning_rate": 1.5779630993996205e-05, + "loss": 0.0599, + "step": 27940 + }, + { + "epoch": 1.3, + "learning_rate": 1.5778847208941423e-05, + "loss": 0.1142, + "step": 27945 + }, + { + "epoch": 1.3, + "learning_rate": 1.5778063423886633e-05, + "loss": 0.0946, + "step": 27950 + }, + { + "epoch": 1.3, + "learning_rate": 1.5777279638831847e-05, + "loss": 0.1403, + "step": 27955 + }, + { + "epoch": 1.3, + "learning_rate": 1.577649585377706e-05, + "loss": 0.1777, + "step": 27960 + }, + { + "epoch": 1.3, + "learning_rate": 1.5775712068722275e-05, + "loss": 0.2978, + "step": 27965 + }, + { + "epoch": 1.31, + "learning_rate": 1.577492828366749e-05, + "loss": 0.1597, + "step": 27970 + }, + { + "epoch": 1.31, + "learning_rate": 1.5774144498612703e-05, + "loss": 0.195, + "step": 27975 + }, + { + "epoch": 1.31, + "learning_rate": 1.5773360713557913e-05, + "loss": 0.356, + "step": 27980 + }, + { + "epoch": 1.31, + "learning_rate": 1.577257692850313e-05, + "loss": 0.3127, + "step": 27985 + }, + { + "epoch": 1.31, + "learning_rate": 1.577179314344834e-05, + "loss": 0.0726, + "step": 27990 + }, + { + "epoch": 1.31, + "learning_rate": 1.5771009358393555e-05, + "loss": 0.0738, + "step": 27995 + }, + { + "epoch": 1.31, + "learning_rate": 1.577022557333877e-05, + "loss": 0.0923, + "step": 28000 + }, + { + "epoch": 1.31, + "learning_rate": 1.5769441788283983e-05, + "loss": 0.0928, + "step": 28005 + }, + { + "epoch": 1.31, + "learning_rate": 1.5768658003229197e-05, + "loss": 0.1215, + "step": 28010 + }, + { + "epoch": 1.31, + "learning_rate": 1.5767874218174407e-05, + "loss": 0.161, + "step": 28015 + }, + { + "epoch": 1.31, + "learning_rate": 1.5767090433119624e-05, + "loss": 0.2235, + "step": 28020 + }, + { + "epoch": 1.31, + "learning_rate": 1.5766306648064835e-05, + "loss": 0.2186, + "step": 28025 + }, + { + "epoch": 1.31, + "learning_rate": 1.576552286301005e-05, + "loss": 0.3355, + "step": 28030 + }, + { + "epoch": 1.31, + "learning_rate": 1.5764739077955263e-05, + "loss": 0.3447, + "step": 28035 + }, + { + "epoch": 1.31, + "learning_rate": 1.5763955292900477e-05, + "loss": 0.0388, + "step": 28040 + }, + { + "epoch": 1.31, + "learning_rate": 1.576317150784569e-05, + "loss": 0.074, + "step": 28045 + }, + { + "epoch": 1.31, + "learning_rate": 1.5762387722790905e-05, + "loss": 0.058, + "step": 28050 + }, + { + "epoch": 1.31, + "learning_rate": 1.5761603937736115e-05, + "loss": 0.1904, + "step": 28055 + }, + { + "epoch": 1.31, + "learning_rate": 1.5760820152681332e-05, + "loss": 0.0815, + "step": 28060 + }, + { + "epoch": 1.31, + "learning_rate": 1.5760036367626543e-05, + "loss": 0.1802, + "step": 28065 + }, + { + "epoch": 1.31, + "learning_rate": 1.5759252582571757e-05, + "loss": 0.1224, + "step": 28070 + }, + { + "epoch": 1.31, + "learning_rate": 1.575846879751697e-05, + "loss": 0.2543, + "step": 28075 + }, + { + "epoch": 1.31, + "learning_rate": 1.575768501246218e-05, + "loss": 0.248, + "step": 28080 + }, + { + "epoch": 1.31, + "learning_rate": 1.57569012274074e-05, + "loss": 0.3462, + "step": 28085 + }, + { + "epoch": 1.31, + "learning_rate": 1.575611744235261e-05, + "loss": 0.0377, + "step": 28090 + }, + { + "epoch": 1.31, + "learning_rate": 1.5755333657297823e-05, + "loss": 0.0877, + "step": 28095 + }, + { + "epoch": 1.31, + "learning_rate": 1.5754549872243037e-05, + "loss": 0.1138, + "step": 28100 + }, + { + "epoch": 1.31, + "learning_rate": 1.575376608718825e-05, + "loss": 0.1132, + "step": 28105 + }, + { + "epoch": 1.31, + "learning_rate": 1.5752982302133465e-05, + "loss": 0.1068, + "step": 28110 + }, + { + "epoch": 1.31, + "learning_rate": 1.575219851707868e-05, + "loss": 0.1146, + "step": 28115 + }, + { + "epoch": 1.31, + "learning_rate": 1.5751414732023892e-05, + "loss": 0.1925, + "step": 28120 + }, + { + "epoch": 1.31, + "learning_rate": 1.5750630946969106e-05, + "loss": 0.2804, + "step": 28125 + }, + { + "epoch": 1.31, + "learning_rate": 1.5749847161914317e-05, + "loss": 0.4683, + "step": 28130 + }, + { + "epoch": 1.31, + "learning_rate": 1.574906337685953e-05, + "loss": 0.3226, + "step": 28135 + }, + { + "epoch": 1.31, + "learning_rate": 1.5748279591804745e-05, + "loss": 0.063, + "step": 28140 + }, + { + "epoch": 1.31, + "learning_rate": 1.574749580674996e-05, + "loss": 0.0538, + "step": 28145 + }, + { + "epoch": 1.31, + "learning_rate": 1.5746712021695172e-05, + "loss": 0.0772, + "step": 28150 + }, + { + "epoch": 1.31, + "learning_rate": 1.5745928236640383e-05, + "loss": 0.1166, + "step": 28155 + }, + { + "epoch": 1.31, + "learning_rate": 1.57451444515856e-05, + "loss": 0.1212, + "step": 28160 + }, + { + "epoch": 1.31, + "learning_rate": 1.574436066653081e-05, + "loss": 0.1134, + "step": 28165 + }, + { + "epoch": 1.31, + "learning_rate": 1.5743576881476025e-05, + "loss": 0.2492, + "step": 28170 + }, + { + "epoch": 1.31, + "learning_rate": 1.574279309642124e-05, + "loss": 0.3189, + "step": 28175 + }, + { + "epoch": 1.31, + "learning_rate": 1.5742009311366453e-05, + "loss": 0.2254, + "step": 28180 + }, + { + "epoch": 1.32, + "learning_rate": 1.5741225526311666e-05, + "loss": 0.2818, + "step": 28185 + }, + { + "epoch": 1.32, + "learning_rate": 1.574044174125688e-05, + "loss": 0.0439, + "step": 28190 + }, + { + "epoch": 1.32, + "learning_rate": 1.573965795620209e-05, + "loss": 0.1292, + "step": 28195 + }, + { + "epoch": 1.32, + "learning_rate": 1.5738874171147305e-05, + "loss": 0.1012, + "step": 28200 + }, + { + "epoch": 1.32, + "learning_rate": 1.573809038609252e-05, + "loss": 0.08, + "step": 28205 + }, + { + "epoch": 1.32, + "learning_rate": 1.5737306601037733e-05, + "loss": 0.1199, + "step": 28210 + }, + { + "epoch": 1.32, + "learning_rate": 1.5736522815982946e-05, + "loss": 0.2438, + "step": 28215 + }, + { + "epoch": 1.32, + "learning_rate": 1.573573903092816e-05, + "loss": 0.0924, + "step": 28220 + }, + { + "epoch": 1.32, + "learning_rate": 1.5734955245873374e-05, + "loss": 0.2114, + "step": 28225 + }, + { + "epoch": 1.32, + "learning_rate": 1.5734171460818585e-05, + "loss": 0.323, + "step": 28230 + }, + { + "epoch": 1.32, + "learning_rate": 1.5733387675763802e-05, + "loss": 0.2703, + "step": 28235 + }, + { + "epoch": 1.32, + "learning_rate": 1.5732603890709013e-05, + "loss": 0.0825, + "step": 28240 + }, + { + "epoch": 1.32, + "learning_rate": 1.5731820105654227e-05, + "loss": 0.0734, + "step": 28245 + }, + { + "epoch": 1.32, + "learning_rate": 1.573103632059944e-05, + "loss": 0.0782, + "step": 28250 + }, + { + "epoch": 1.32, + "learning_rate": 1.5730252535544654e-05, + "loss": 0.1712, + "step": 28255 + }, + { + "epoch": 1.32, + "learning_rate": 1.5729468750489868e-05, + "loss": 0.1776, + "step": 28260 + }, + { + "epoch": 1.32, + "learning_rate": 1.572868496543508e-05, + "loss": 0.1425, + "step": 28265 + }, + { + "epoch": 1.32, + "learning_rate": 1.5727901180380293e-05, + "loss": 0.2537, + "step": 28270 + }, + { + "epoch": 1.32, + "learning_rate": 1.5727117395325507e-05, + "loss": 0.2227, + "step": 28275 + }, + { + "epoch": 1.32, + "learning_rate": 1.572633361027072e-05, + "loss": 0.4145, + "step": 28280 + }, + { + "epoch": 1.32, + "learning_rate": 1.5725549825215934e-05, + "loss": 0.4865, + "step": 28285 + }, + { + "epoch": 1.32, + "learning_rate": 1.5724766040161148e-05, + "loss": 0.0633, + "step": 28290 + }, + { + "epoch": 1.32, + "learning_rate": 1.572398225510636e-05, + "loss": 0.0838, + "step": 28295 + }, + { + "epoch": 1.32, + "learning_rate": 1.5723198470051576e-05, + "loss": 0.1071, + "step": 28300 + }, + { + "epoch": 1.32, + "learning_rate": 1.5722414684996787e-05, + "loss": 0.1015, + "step": 28305 + }, + { + "epoch": 1.32, + "learning_rate": 1.5721630899942e-05, + "loss": 0.0966, + "step": 28310 + }, + { + "epoch": 1.32, + "learning_rate": 1.5720847114887214e-05, + "loss": 0.2073, + "step": 28315 + }, + { + "epoch": 1.32, + "learning_rate": 1.572006332983243e-05, + "loss": 0.1782, + "step": 28320 + }, + { + "epoch": 1.32, + "learning_rate": 1.5719279544777642e-05, + "loss": 0.2457, + "step": 28325 + }, + { + "epoch": 1.32, + "learning_rate": 1.5718495759722853e-05, + "loss": 0.3423, + "step": 28330 + }, + { + "epoch": 1.32, + "learning_rate": 1.571771197466807e-05, + "loss": 0.3075, + "step": 28335 + }, + { + "epoch": 1.32, + "learning_rate": 1.571692818961328e-05, + "loss": 0.0612, + "step": 28340 + }, + { + "epoch": 1.32, + "learning_rate": 1.5716144404558494e-05, + "loss": 0.087, + "step": 28345 + }, + { + "epoch": 1.32, + "learning_rate": 1.571536061950371e-05, + "loss": 0.0356, + "step": 28350 + }, + { + "epoch": 1.32, + "learning_rate": 1.5714576834448922e-05, + "loss": 0.1484, + "step": 28355 + }, + { + "epoch": 1.32, + "learning_rate": 1.5713793049394136e-05, + "loss": 0.1245, + "step": 28360 + }, + { + "epoch": 1.32, + "learning_rate": 1.571300926433935e-05, + "loss": 0.117, + "step": 28365 + }, + { + "epoch": 1.32, + "learning_rate": 1.571222547928456e-05, + "loss": 0.187, + "step": 28370 + }, + { + "epoch": 1.32, + "learning_rate": 1.5711441694229778e-05, + "loss": 0.3141, + "step": 28375 + }, + { + "epoch": 1.32, + "learning_rate": 1.571065790917499e-05, + "loss": 0.3338, + "step": 28380 + }, + { + "epoch": 1.32, + "learning_rate": 1.5709874124120202e-05, + "loss": 0.3198, + "step": 28385 + }, + { + "epoch": 1.32, + "learning_rate": 1.5709090339065416e-05, + "loss": 0.0383, + "step": 28390 + }, + { + "epoch": 1.32, + "learning_rate": 1.5708306554010627e-05, + "loss": 0.1249, + "step": 28395 + }, + { + "epoch": 1.33, + "learning_rate": 1.5707522768955844e-05, + "loss": 0.0838, + "step": 28400 + }, + { + "epoch": 1.33, + "learning_rate": 1.5706738983901055e-05, + "loss": 0.1157, + "step": 28405 + }, + { + "epoch": 1.33, + "learning_rate": 1.570595519884627e-05, + "loss": 0.1878, + "step": 28410 + }, + { + "epoch": 1.33, + "learning_rate": 1.5705171413791482e-05, + "loss": 0.1487, + "step": 28415 + }, + { + "epoch": 1.33, + "learning_rate": 1.5704387628736696e-05, + "loss": 0.1422, + "step": 28420 + }, + { + "epoch": 1.33, + "learning_rate": 1.570360384368191e-05, + "loss": 0.1978, + "step": 28425 + }, + { + "epoch": 1.33, + "learning_rate": 1.5702820058627124e-05, + "loss": 0.3651, + "step": 28430 + }, + { + "epoch": 1.33, + "learning_rate": 1.5702036273572338e-05, + "loss": 0.2579, + "step": 28435 + }, + { + "epoch": 1.33, + "learning_rate": 1.5701252488517552e-05, + "loss": 0.0636, + "step": 28440 + }, + { + "epoch": 1.33, + "learning_rate": 1.5700468703462762e-05, + "loss": 0.069, + "step": 28445 + }, + { + "epoch": 1.33, + "learning_rate": 1.569968491840798e-05, + "loss": 0.0931, + "step": 28450 + }, + { + "epoch": 1.33, + "learning_rate": 1.569890113335319e-05, + "loss": 0.1109, + "step": 28455 + }, + { + "epoch": 1.33, + "learning_rate": 1.5698117348298404e-05, + "loss": 0.0914, + "step": 28460 + }, + { + "epoch": 1.33, + "learning_rate": 1.5697333563243618e-05, + "loss": 0.1079, + "step": 28465 + }, + { + "epoch": 1.33, + "learning_rate": 1.569654977818883e-05, + "loss": 0.2067, + "step": 28470 + }, + { + "epoch": 1.33, + "learning_rate": 1.5695765993134046e-05, + "loss": 0.2517, + "step": 28475 + }, + { + "epoch": 1.33, + "learning_rate": 1.5694982208079256e-05, + "loss": 0.3391, + "step": 28480 + }, + { + "epoch": 1.33, + "learning_rate": 1.569419842302447e-05, + "loss": 0.1997, + "step": 28485 + }, + { + "epoch": 1.33, + "learning_rate": 1.5693414637969684e-05, + "loss": 0.0238, + "step": 28490 + }, + { + "epoch": 1.33, + "learning_rate": 1.5692630852914898e-05, + "loss": 0.0778, + "step": 28495 + }, + { + "epoch": 1.33, + "learning_rate": 1.5691847067860112e-05, + "loss": 0.0956, + "step": 28500 + }, + { + "epoch": 1.33, + "learning_rate": 1.5691063282805326e-05, + "loss": 0.1695, + "step": 28505 + }, + { + "epoch": 1.33, + "learning_rate": 1.5690279497750536e-05, + "loss": 0.0927, + "step": 28510 + }, + { + "epoch": 1.33, + "learning_rate": 1.5689495712695754e-05, + "loss": 0.109, + "step": 28515 + }, + { + "epoch": 1.33, + "learning_rate": 1.5688711927640964e-05, + "loss": 0.2123, + "step": 28520 + }, + { + "epoch": 1.33, + "learning_rate": 1.5687928142586178e-05, + "loss": 0.1514, + "step": 28525 + }, + { + "epoch": 1.33, + "learning_rate": 1.5687144357531392e-05, + "loss": 0.4057, + "step": 28530 + }, + { + "epoch": 1.33, + "learning_rate": 1.5686360572476606e-05, + "loss": 0.3171, + "step": 28535 + }, + { + "epoch": 1.33, + "learning_rate": 1.568557678742182e-05, + "loss": 0.0878, + "step": 28540 + }, + { + "epoch": 1.33, + "learning_rate": 1.568479300236703e-05, + "loss": 0.0565, + "step": 28545 + }, + { + "epoch": 1.33, + "learning_rate": 1.5684009217312248e-05, + "loss": 0.1183, + "step": 28550 + }, + { + "epoch": 1.33, + "learning_rate": 1.5683225432257458e-05, + "loss": 0.1481, + "step": 28555 + }, + { + "epoch": 1.33, + "learning_rate": 1.5682441647202672e-05, + "loss": 0.1402, + "step": 28560 + }, + { + "epoch": 1.33, + "learning_rate": 1.5681657862147886e-05, + "loss": 0.1809, + "step": 28565 + }, + { + "epoch": 1.33, + "learning_rate": 1.56808740770931e-05, + "loss": 0.2217, + "step": 28570 + }, + { + "epoch": 1.33, + "learning_rate": 1.5680090292038314e-05, + "loss": 0.3258, + "step": 28575 + }, + { + "epoch": 1.33, + "learning_rate": 1.5679306506983528e-05, + "loss": 0.2286, + "step": 28580 + }, + { + "epoch": 1.33, + "learning_rate": 1.5678522721928738e-05, + "loss": 0.2848, + "step": 28585 + }, + { + "epoch": 1.33, + "learning_rate": 1.5677738936873952e-05, + "loss": 0.0547, + "step": 28590 + }, + { + "epoch": 1.33, + "learning_rate": 1.5676955151819166e-05, + "loss": 0.0618, + "step": 28595 + }, + { + "epoch": 1.33, + "learning_rate": 1.567617136676438e-05, + "loss": 0.0941, + "step": 28600 + }, + { + "epoch": 1.33, + "learning_rate": 1.5675387581709594e-05, + "loss": 0.1233, + "step": 28605 + }, + { + "epoch": 1.33, + "learning_rate": 1.5674603796654804e-05, + "loss": 0.1848, + "step": 28610 + }, + { + "epoch": 1.34, + "learning_rate": 1.567382001160002e-05, + "loss": 0.0776, + "step": 28615 + }, + { + "epoch": 1.34, + "learning_rate": 1.5673036226545232e-05, + "loss": 0.248, + "step": 28620 + }, + { + "epoch": 1.34, + "learning_rate": 1.5672252441490446e-05, + "loss": 0.232, + "step": 28625 + }, + { + "epoch": 1.34, + "learning_rate": 1.567146865643566e-05, + "loss": 0.4364, + "step": 28630 + }, + { + "epoch": 1.34, + "learning_rate": 1.5670684871380874e-05, + "loss": 0.2168, + "step": 28635 + }, + { + "epoch": 1.34, + "learning_rate": 1.5669901086326088e-05, + "loss": 0.049, + "step": 28640 + }, + { + "epoch": 1.34, + "learning_rate": 1.5669117301271302e-05, + "loss": 0.0206, + "step": 28645 + }, + { + "epoch": 1.34, + "learning_rate": 1.5668333516216516e-05, + "loss": 0.1098, + "step": 28650 + }, + { + "epoch": 1.34, + "learning_rate": 1.5667549731161726e-05, + "loss": 0.1082, + "step": 28655 + }, + { + "epoch": 1.34, + "learning_rate": 1.566676594610694e-05, + "loss": 0.1286, + "step": 28660 + }, + { + "epoch": 1.34, + "learning_rate": 1.5665982161052154e-05, + "loss": 0.2045, + "step": 28665 + }, + { + "epoch": 1.34, + "learning_rate": 1.5665198375997368e-05, + "loss": 0.1718, + "step": 28670 + }, + { + "epoch": 1.34, + "learning_rate": 1.5664414590942582e-05, + "loss": 0.3584, + "step": 28675 + }, + { + "epoch": 1.34, + "learning_rate": 1.5663630805887796e-05, + "loss": 0.4569, + "step": 28680 + }, + { + "epoch": 1.34, + "learning_rate": 1.5662847020833006e-05, + "loss": 0.3119, + "step": 28685 + }, + { + "epoch": 1.34, + "learning_rate": 1.5662063235778223e-05, + "loss": 0.0559, + "step": 28690 + }, + { + "epoch": 1.34, + "learning_rate": 1.5661279450723434e-05, + "loss": 0.0382, + "step": 28695 + }, + { + "epoch": 1.34, + "learning_rate": 1.5660495665668648e-05, + "loss": 0.1046, + "step": 28700 + }, + { + "epoch": 1.34, + "learning_rate": 1.5659711880613862e-05, + "loss": 0.1162, + "step": 28705 + }, + { + "epoch": 1.34, + "learning_rate": 1.5658928095559076e-05, + "loss": 0.1435, + "step": 28710 + }, + { + "epoch": 1.34, + "learning_rate": 1.565814431050429e-05, + "loss": 0.203, + "step": 28715 + }, + { + "epoch": 1.34, + "learning_rate": 1.56573605254495e-05, + "loss": 0.2438, + "step": 28720 + }, + { + "epoch": 1.34, + "learning_rate": 1.5656576740394714e-05, + "loss": 0.214, + "step": 28725 + }, + { + "epoch": 1.34, + "learning_rate": 1.5655792955339928e-05, + "loss": 0.369, + "step": 28730 + }, + { + "epoch": 1.34, + "learning_rate": 1.5655009170285142e-05, + "loss": 0.2569, + "step": 28735 + }, + { + "epoch": 1.34, + "learning_rate": 1.5654225385230356e-05, + "loss": 0.0718, + "step": 28740 + }, + { + "epoch": 1.34, + "learning_rate": 1.565344160017557e-05, + "loss": 0.1807, + "step": 28745 + }, + { + "epoch": 1.34, + "learning_rate": 1.5652657815120784e-05, + "loss": 0.0907, + "step": 28750 + }, + { + "epoch": 1.34, + "learning_rate": 1.5651874030065997e-05, + "loss": 0.1447, + "step": 28755 + }, + { + "epoch": 1.34, + "learning_rate": 1.5651090245011208e-05, + "loss": 0.1166, + "step": 28760 + }, + { + "epoch": 1.34, + "learning_rate": 1.5650306459956425e-05, + "loss": 0.1564, + "step": 28765 + }, + { + "epoch": 1.34, + "learning_rate": 1.5649522674901636e-05, + "loss": 0.2352, + "step": 28770 + }, + { + "epoch": 1.34, + "learning_rate": 1.564873888984685e-05, + "loss": 0.2784, + "step": 28775 + }, + { + "epoch": 1.34, + "learning_rate": 1.5647955104792064e-05, + "loss": 0.3881, + "step": 28780 + }, + { + "epoch": 1.34, + "learning_rate": 1.5647171319737274e-05, + "loss": 0.2386, + "step": 28785 + }, + { + "epoch": 1.34, + "learning_rate": 1.564638753468249e-05, + "loss": 0.0621, + "step": 28790 + }, + { + "epoch": 1.34, + "learning_rate": 1.5645603749627702e-05, + "loss": 0.0444, + "step": 28795 + }, + { + "epoch": 1.34, + "learning_rate": 1.5644819964572916e-05, + "loss": 0.0656, + "step": 28800 + }, + { + "epoch": 1.34, + "learning_rate": 1.564403617951813e-05, + "loss": 0.0765, + "step": 28805 + }, + { + "epoch": 1.34, + "learning_rate": 1.5643252394463344e-05, + "loss": 0.2032, + "step": 28810 + }, + { + "epoch": 1.34, + "learning_rate": 1.5642468609408558e-05, + "loss": 0.1976, + "step": 28815 + }, + { + "epoch": 1.34, + "learning_rate": 1.564168482435377e-05, + "loss": 0.1735, + "step": 28820 + }, + { + "epoch": 1.35, + "learning_rate": 1.5640901039298982e-05, + "loss": 0.2373, + "step": 28825 + }, + { + "epoch": 1.35, + "learning_rate": 1.56401172542442e-05, + "loss": 0.3012, + "step": 28830 + }, + { + "epoch": 1.35, + "learning_rate": 1.563933346918941e-05, + "loss": 0.3411, + "step": 28835 + }, + { + "epoch": 1.35, + "learning_rate": 1.5638549684134624e-05, + "loss": 0.0834, + "step": 28840 + }, + { + "epoch": 1.35, + "learning_rate": 1.5637765899079838e-05, + "loss": 0.0505, + "step": 28845 + }, + { + "epoch": 1.35, + "learning_rate": 1.563698211402505e-05, + "loss": 0.076, + "step": 28850 + }, + { + "epoch": 1.35, + "learning_rate": 1.5636198328970265e-05, + "loss": 0.0456, + "step": 28855 + }, + { + "epoch": 1.35, + "learning_rate": 1.5635414543915476e-05, + "loss": 0.0746, + "step": 28860 + }, + { + "epoch": 1.35, + "learning_rate": 1.5634630758860693e-05, + "loss": 0.1093, + "step": 28865 + }, + { + "epoch": 1.35, + "learning_rate": 1.5633846973805904e-05, + "loss": 0.1907, + "step": 28870 + }, + { + "epoch": 1.35, + "learning_rate": 1.5633063188751118e-05, + "loss": 0.1332, + "step": 28875 + }, + { + "epoch": 1.35, + "learning_rate": 1.563227940369633e-05, + "loss": 0.3031, + "step": 28880 + }, + { + "epoch": 1.35, + "learning_rate": 1.5631495618641545e-05, + "loss": 0.3297, + "step": 28885 + }, + { + "epoch": 1.35, + "learning_rate": 1.563071183358676e-05, + "loss": 0.0652, + "step": 28890 + }, + { + "epoch": 1.35, + "learning_rate": 1.5629928048531973e-05, + "loss": 0.0844, + "step": 28895 + }, + { + "epoch": 1.35, + "learning_rate": 1.5629144263477184e-05, + "loss": 0.1097, + "step": 28900 + }, + { + "epoch": 1.35, + "learning_rate": 1.56283604784224e-05, + "loss": 0.115, + "step": 28905 + }, + { + "epoch": 1.35, + "learning_rate": 1.562757669336761e-05, + "loss": 0.1077, + "step": 28910 + }, + { + "epoch": 1.35, + "learning_rate": 1.5626792908312826e-05, + "loss": 0.1605, + "step": 28915 + }, + { + "epoch": 1.35, + "learning_rate": 1.562600912325804e-05, + "loss": 0.2131, + "step": 28920 + }, + { + "epoch": 1.35, + "learning_rate": 1.562522533820325e-05, + "loss": 0.1889, + "step": 28925 + }, + { + "epoch": 1.35, + "learning_rate": 1.5624441553148467e-05, + "loss": 0.3687, + "step": 28930 + }, + { + "epoch": 1.35, + "learning_rate": 1.5623657768093678e-05, + "loss": 0.2025, + "step": 28935 + }, + { + "epoch": 1.35, + "learning_rate": 1.562287398303889e-05, + "loss": 0.0673, + "step": 28940 + }, + { + "epoch": 1.35, + "learning_rate": 1.5622090197984106e-05, + "loss": 0.0705, + "step": 28945 + }, + { + "epoch": 1.35, + "learning_rate": 1.562130641292932e-05, + "loss": 0.1058, + "step": 28950 + }, + { + "epoch": 1.35, + "learning_rate": 1.5620522627874533e-05, + "loss": 0.1207, + "step": 28955 + }, + { + "epoch": 1.35, + "learning_rate": 1.5619738842819747e-05, + "loss": 0.1275, + "step": 28960 + }, + { + "epoch": 1.35, + "learning_rate": 1.561895505776496e-05, + "loss": 0.2169, + "step": 28965 + }, + { + "epoch": 1.35, + "learning_rate": 1.5618171272710175e-05, + "loss": 0.1789, + "step": 28970 + }, + { + "epoch": 1.35, + "learning_rate": 1.5617387487655386e-05, + "loss": 0.2378, + "step": 28975 + }, + { + "epoch": 1.35, + "learning_rate": 1.56166037026006e-05, + "loss": 0.4098, + "step": 28980 + }, + { + "epoch": 1.35, + "learning_rate": 1.5615819917545813e-05, + "loss": 0.2499, + "step": 28985 + }, + { + "epoch": 1.35, + "learning_rate": 1.5615036132491027e-05, + "loss": 0.0793, + "step": 28990 + }, + { + "epoch": 1.35, + "learning_rate": 1.561425234743624e-05, + "loss": 0.067, + "step": 28995 + }, + { + "epoch": 1.35, + "learning_rate": 1.5613468562381452e-05, + "loss": 0.0746, + "step": 29000 + }, + { + "epoch": 1.35, + "learning_rate": 1.561268477732667e-05, + "loss": 0.2106, + "step": 29005 + }, + { + "epoch": 1.35, + "learning_rate": 1.561190099227188e-05, + "loss": 0.1199, + "step": 29010 + }, + { + "epoch": 1.35, + "learning_rate": 1.5611117207217093e-05, + "loss": 0.0969, + "step": 29015 + }, + { + "epoch": 1.35, + "learning_rate": 1.5610333422162307e-05, + "loss": 0.3277, + "step": 29020 + }, + { + "epoch": 1.35, + "learning_rate": 1.560954963710752e-05, + "loss": 0.2931, + "step": 29025 + }, + { + "epoch": 1.35, + "learning_rate": 1.5608765852052735e-05, + "loss": 0.4393, + "step": 29030 + }, + { + "epoch": 1.35, + "learning_rate": 1.560798206699795e-05, + "loss": 0.4498, + "step": 29035 + }, + { + "epoch": 1.36, + "learning_rate": 1.560719828194316e-05, + "loss": 0.0525, + "step": 29040 + }, + { + "epoch": 1.36, + "learning_rate": 1.5606414496888374e-05, + "loss": 0.0493, + "step": 29045 + }, + { + "epoch": 1.36, + "learning_rate": 1.5605630711833587e-05, + "loss": 0.0895, + "step": 29050 + }, + { + "epoch": 1.36, + "learning_rate": 1.56048469267788e-05, + "loss": 0.1352, + "step": 29055 + }, + { + "epoch": 1.36, + "learning_rate": 1.5604063141724015e-05, + "loss": 0.1055, + "step": 29060 + }, + { + "epoch": 1.36, + "learning_rate": 1.560327935666923e-05, + "loss": 0.2887, + "step": 29065 + }, + { + "epoch": 1.36, + "learning_rate": 1.5602495571614443e-05, + "loss": 0.1776, + "step": 29070 + }, + { + "epoch": 1.36, + "learning_rate": 1.5601711786559654e-05, + "loss": 0.1566, + "step": 29075 + }, + { + "epoch": 1.36, + "learning_rate": 1.560092800150487e-05, + "loss": 0.2249, + "step": 29080 + }, + { + "epoch": 1.36, + "learning_rate": 1.560014421645008e-05, + "loss": 0.4125, + "step": 29085 + }, + { + "epoch": 1.36, + "learning_rate": 1.5599360431395295e-05, + "loss": 0.1064, + "step": 29090 + }, + { + "epoch": 1.36, + "learning_rate": 1.559857664634051e-05, + "loss": 0.1015, + "step": 29095 + }, + { + "epoch": 1.36, + "learning_rate": 1.5597792861285723e-05, + "loss": 0.0806, + "step": 29100 + }, + { + "epoch": 1.36, + "learning_rate": 1.5597009076230937e-05, + "loss": 0.1206, + "step": 29105 + }, + { + "epoch": 1.36, + "learning_rate": 1.5596225291176148e-05, + "loss": 0.1109, + "step": 29110 + }, + { + "epoch": 1.36, + "learning_rate": 1.559544150612136e-05, + "loss": 0.1305, + "step": 29115 + }, + { + "epoch": 1.36, + "learning_rate": 1.5594657721066575e-05, + "loss": 0.2071, + "step": 29120 + }, + { + "epoch": 1.36, + "learning_rate": 1.559387393601179e-05, + "loss": 0.1554, + "step": 29125 + }, + { + "epoch": 1.36, + "learning_rate": 1.5593090150957003e-05, + "loss": 0.5038, + "step": 29130 + }, + { + "epoch": 1.36, + "learning_rate": 1.5592306365902217e-05, + "loss": 0.3296, + "step": 29135 + }, + { + "epoch": 1.36, + "learning_rate": 1.5591522580847428e-05, + "loss": 0.1192, + "step": 29140 + }, + { + "epoch": 1.36, + "learning_rate": 1.5590738795792645e-05, + "loss": 0.1061, + "step": 29145 + }, + { + "epoch": 1.36, + "learning_rate": 1.5589955010737855e-05, + "loss": 0.0826, + "step": 29150 + }, + { + "epoch": 1.36, + "learning_rate": 1.558917122568307e-05, + "loss": 0.1131, + "step": 29155 + }, + { + "epoch": 1.36, + "learning_rate": 1.5588387440628283e-05, + "loss": 0.1495, + "step": 29160 + }, + { + "epoch": 1.36, + "learning_rate": 1.5587603655573497e-05, + "loss": 0.2157, + "step": 29165 + }, + { + "epoch": 1.36, + "learning_rate": 1.558681987051871e-05, + "loss": 0.2206, + "step": 29170 + }, + { + "epoch": 1.36, + "learning_rate": 1.558603608546392e-05, + "loss": 0.2772, + "step": 29175 + }, + { + "epoch": 1.36, + "learning_rate": 1.558525230040914e-05, + "loss": 0.5025, + "step": 29180 + }, + { + "epoch": 1.36, + "learning_rate": 1.558446851535435e-05, + "loss": 0.3455, + "step": 29185 + }, + { + "epoch": 1.36, + "learning_rate": 1.5583684730299563e-05, + "loss": 0.0408, + "step": 29190 + }, + { + "epoch": 1.36, + "learning_rate": 1.5582900945244777e-05, + "loss": 0.0804, + "step": 29195 + }, + { + "epoch": 1.36, + "learning_rate": 1.558211716018999e-05, + "loss": 0.1028, + "step": 29200 + }, + { + "epoch": 1.36, + "learning_rate": 1.5581333375135205e-05, + "loss": 0.0895, + "step": 29205 + }, + { + "epoch": 1.36, + "learning_rate": 1.558054959008042e-05, + "loss": 0.151, + "step": 29210 + }, + { + "epoch": 1.36, + "learning_rate": 1.557976580502563e-05, + "loss": 0.1078, + "step": 29215 + }, + { + "epoch": 1.36, + "learning_rate": 1.5578982019970847e-05, + "loss": 0.287, + "step": 29220 + }, + { + "epoch": 1.36, + "learning_rate": 1.5578198234916057e-05, + "loss": 0.2512, + "step": 29225 + }, + { + "epoch": 1.36, + "learning_rate": 1.557741444986127e-05, + "loss": 0.3908, + "step": 29230 + }, + { + "epoch": 1.36, + "learning_rate": 1.5576630664806485e-05, + "loss": 0.3252, + "step": 29235 + }, + { + "epoch": 1.36, + "learning_rate": 1.5575846879751695e-05, + "loss": 0.0572, + "step": 29240 + }, + { + "epoch": 1.36, + "learning_rate": 1.5575063094696913e-05, + "loss": 0.0728, + "step": 29245 + }, + { + "epoch": 1.36, + "learning_rate": 1.5574279309642123e-05, + "loss": 0.1044, + "step": 29250 + }, + { + "epoch": 1.37, + "learning_rate": 1.5573495524587337e-05, + "loss": 0.1306, + "step": 29255 + }, + { + "epoch": 1.37, + "learning_rate": 1.557271173953255e-05, + "loss": 0.1469, + "step": 29260 + }, + { + "epoch": 1.37, + "learning_rate": 1.5571927954477765e-05, + "loss": 0.1307, + "step": 29265 + }, + { + "epoch": 1.37, + "learning_rate": 1.557114416942298e-05, + "loss": 0.2949, + "step": 29270 + }, + { + "epoch": 1.37, + "learning_rate": 1.5570360384368193e-05, + "loss": 0.1483, + "step": 29275 + }, + { + "epoch": 1.37, + "learning_rate": 1.5569576599313407e-05, + "loss": 0.3853, + "step": 29280 + }, + { + "epoch": 1.37, + "learning_rate": 1.556879281425862e-05, + "loss": 0.2308, + "step": 29285 + }, + { + "epoch": 1.37, + "learning_rate": 1.556800902920383e-05, + "loss": 0.0336, + "step": 29290 + }, + { + "epoch": 1.37, + "learning_rate": 1.556722524414905e-05, + "loss": 0.0368, + "step": 29295 + }, + { + "epoch": 1.37, + "learning_rate": 1.556644145909426e-05, + "loss": 0.0633, + "step": 29300 + }, + { + "epoch": 1.37, + "learning_rate": 1.5565657674039473e-05, + "loss": 0.1041, + "step": 29305 + }, + { + "epoch": 1.37, + "learning_rate": 1.5564873888984687e-05, + "loss": 0.2455, + "step": 29310 + }, + { + "epoch": 1.37, + "learning_rate": 1.5564090103929897e-05, + "loss": 0.128, + "step": 29315 + }, + { + "epoch": 1.37, + "learning_rate": 1.5563306318875115e-05, + "loss": 0.1876, + "step": 29320 + }, + { + "epoch": 1.37, + "learning_rate": 1.5562522533820325e-05, + "loss": 0.1958, + "step": 29325 + }, + { + "epoch": 1.37, + "learning_rate": 1.556173874876554e-05, + "loss": 0.3136, + "step": 29330 + }, + { + "epoch": 1.37, + "learning_rate": 1.5560954963710753e-05, + "loss": 0.3116, + "step": 29335 + }, + { + "epoch": 1.37, + "learning_rate": 1.5560171178655967e-05, + "loss": 0.0619, + "step": 29340 + }, + { + "epoch": 1.37, + "learning_rate": 1.555938739360118e-05, + "loss": 0.0497, + "step": 29345 + }, + { + "epoch": 1.37, + "learning_rate": 1.5558603608546395e-05, + "loss": 0.1101, + "step": 29350 + }, + { + "epoch": 1.37, + "learning_rate": 1.5557819823491605e-05, + "loss": 0.1347, + "step": 29355 + }, + { + "epoch": 1.37, + "learning_rate": 1.5557036038436822e-05, + "loss": 0.14, + "step": 29360 + }, + { + "epoch": 1.37, + "learning_rate": 1.5556252253382033e-05, + "loss": 0.2094, + "step": 29365 + }, + { + "epoch": 1.37, + "learning_rate": 1.5555468468327247e-05, + "loss": 0.2115, + "step": 29370 + }, + { + "epoch": 1.37, + "learning_rate": 1.555468468327246e-05, + "loss": 0.23, + "step": 29375 + }, + { + "epoch": 1.37, + "learning_rate": 1.5553900898217675e-05, + "loss": 0.409, + "step": 29380 + }, + { + "epoch": 1.37, + "learning_rate": 1.555311711316289e-05, + "loss": 0.2575, + "step": 29385 + }, + { + "epoch": 1.37, + "learning_rate": 1.55523333281081e-05, + "loss": 0.0701, + "step": 29390 + }, + { + "epoch": 1.37, + "learning_rate": 1.5551549543053316e-05, + "loss": 0.0587, + "step": 29395 + }, + { + "epoch": 1.37, + "learning_rate": 1.5550765757998527e-05, + "loss": 0.1614, + "step": 29400 + }, + { + "epoch": 1.37, + "learning_rate": 1.554998197294374e-05, + "loss": 0.1066, + "step": 29405 + }, + { + "epoch": 1.37, + "learning_rate": 1.5549198187888955e-05, + "loss": 0.1333, + "step": 29410 + }, + { + "epoch": 1.37, + "learning_rate": 1.554841440283417e-05, + "loss": 0.181, + "step": 29415 + }, + { + "epoch": 1.37, + "learning_rate": 1.5547630617779383e-05, + "loss": 0.183, + "step": 29420 + }, + { + "epoch": 1.37, + "learning_rate": 1.5546846832724596e-05, + "loss": 0.2203, + "step": 29425 + }, + { + "epoch": 1.37, + "learning_rate": 1.5546063047669807e-05, + "loss": 0.4626, + "step": 29430 + }, + { + "epoch": 1.37, + "learning_rate": 1.554527926261502e-05, + "loss": 0.2009, + "step": 29435 + }, + { + "epoch": 1.37, + "learning_rate": 1.5544495477560235e-05, + "loss": 0.0562, + "step": 29440 + }, + { + "epoch": 1.37, + "learning_rate": 1.554371169250545e-05, + "loss": 0.1155, + "step": 29445 + }, + { + "epoch": 1.37, + "learning_rate": 1.5542927907450663e-05, + "loss": 0.0612, + "step": 29450 + }, + { + "epoch": 1.37, + "learning_rate": 1.5542144122395873e-05, + "loss": 0.1157, + "step": 29455 + }, + { + "epoch": 1.37, + "learning_rate": 1.554136033734109e-05, + "loss": 0.1621, + "step": 29460 + }, + { + "epoch": 1.37, + "learning_rate": 1.55405765522863e-05, + "loss": 0.0789, + "step": 29465 + }, + { + "epoch": 1.38, + "learning_rate": 1.5539792767231515e-05, + "loss": 0.2351, + "step": 29470 + }, + { + "epoch": 1.38, + "learning_rate": 1.553900898217673e-05, + "loss": 0.2513, + "step": 29475 + }, + { + "epoch": 1.38, + "learning_rate": 1.5538225197121943e-05, + "loss": 0.3753, + "step": 29480 + }, + { + "epoch": 1.38, + "learning_rate": 1.5537441412067157e-05, + "loss": 0.2661, + "step": 29485 + }, + { + "epoch": 1.38, + "learning_rate": 1.553665762701237e-05, + "loss": 0.1098, + "step": 29490 + }, + { + "epoch": 1.38, + "learning_rate": 1.5535873841957584e-05, + "loss": 0.0493, + "step": 29495 + }, + { + "epoch": 1.38, + "learning_rate": 1.5535090056902795e-05, + "loss": 0.1339, + "step": 29500 + }, + { + "epoch": 1.38, + "learning_rate": 1.553430627184801e-05, + "loss": 0.1432, + "step": 29505 + }, + { + "epoch": 1.38, + "learning_rate": 1.5533522486793223e-05, + "loss": 0.107, + "step": 29510 + }, + { + "epoch": 1.38, + "learning_rate": 1.5532738701738437e-05, + "loss": 0.1993, + "step": 29515 + }, + { + "epoch": 1.38, + "learning_rate": 1.553195491668365e-05, + "loss": 0.3002, + "step": 29520 + }, + { + "epoch": 1.38, + "learning_rate": 1.5531171131628864e-05, + "loss": 0.2679, + "step": 29525 + }, + { + "epoch": 1.38, + "learning_rate": 1.5530387346574075e-05, + "loss": 0.3519, + "step": 29530 + }, + { + "epoch": 1.38, + "learning_rate": 1.5529603561519292e-05, + "loss": 0.2758, + "step": 29535 + }, + { + "epoch": 1.38, + "learning_rate": 1.5528819776464503e-05, + "loss": 0.1208, + "step": 29540 + }, + { + "epoch": 1.38, + "learning_rate": 1.5528035991409717e-05, + "loss": 0.0767, + "step": 29545 + }, + { + "epoch": 1.38, + "learning_rate": 1.552725220635493e-05, + "loss": 0.0603, + "step": 29550 + }, + { + "epoch": 1.38, + "learning_rate": 1.5526468421300144e-05, + "loss": 0.0691, + "step": 29555 + }, + { + "epoch": 1.38, + "learning_rate": 1.552568463624536e-05, + "loss": 0.1479, + "step": 29560 + }, + { + "epoch": 1.38, + "learning_rate": 1.552490085119057e-05, + "loss": 0.0891, + "step": 29565 + }, + { + "epoch": 1.38, + "learning_rate": 1.5524117066135783e-05, + "loss": 0.2104, + "step": 29570 + }, + { + "epoch": 1.38, + "learning_rate": 1.5523333281080997e-05, + "loss": 0.2748, + "step": 29575 + }, + { + "epoch": 1.38, + "learning_rate": 1.552254949602621e-05, + "loss": 0.3622, + "step": 29580 + }, + { + "epoch": 1.38, + "learning_rate": 1.5521765710971425e-05, + "loss": 0.252, + "step": 29585 + }, + { + "epoch": 1.38, + "learning_rate": 1.552098192591664e-05, + "loss": 0.079, + "step": 29590 + }, + { + "epoch": 1.38, + "learning_rate": 1.5520198140861852e-05, + "loss": 0.0913, + "step": 29595 + }, + { + "epoch": 1.38, + "learning_rate": 1.5519414355807066e-05, + "loss": 0.1742, + "step": 29600 + }, + { + "epoch": 1.38, + "learning_rate": 1.5518630570752277e-05, + "loss": 0.073, + "step": 29605 + }, + { + "epoch": 1.38, + "learning_rate": 1.5517846785697494e-05, + "loss": 0.091, + "step": 29610 + }, + { + "epoch": 1.38, + "learning_rate": 1.5517063000642705e-05, + "loss": 0.1321, + "step": 29615 + }, + { + "epoch": 1.38, + "learning_rate": 1.551627921558792e-05, + "loss": 0.1131, + "step": 29620 + }, + { + "epoch": 1.38, + "learning_rate": 1.5515495430533132e-05, + "loss": 0.3363, + "step": 29625 + }, + { + "epoch": 1.38, + "learning_rate": 1.5514711645478343e-05, + "loss": 0.3416, + "step": 29630 + }, + { + "epoch": 1.38, + "learning_rate": 1.551392786042356e-05, + "loss": 0.3418, + "step": 29635 + }, + { + "epoch": 1.38, + "learning_rate": 1.551314407536877e-05, + "loss": 0.0653, + "step": 29640 + }, + { + "epoch": 1.38, + "learning_rate": 1.5512360290313985e-05, + "loss": 0.0418, + "step": 29645 + }, + { + "epoch": 1.38, + "learning_rate": 1.55115765052592e-05, + "loss": 0.0941, + "step": 29650 + }, + { + "epoch": 1.38, + "learning_rate": 1.5510792720204412e-05, + "loss": 0.1759, + "step": 29655 + }, + { + "epoch": 1.38, + "learning_rate": 1.5510008935149626e-05, + "loss": 0.111, + "step": 29660 + }, + { + "epoch": 1.38, + "learning_rate": 1.550922515009484e-05, + "loss": 0.1351, + "step": 29665 + }, + { + "epoch": 1.38, + "learning_rate": 1.550844136504005e-05, + "loss": 0.1784, + "step": 29670 + }, + { + "epoch": 1.38, + "learning_rate": 1.5507657579985268e-05, + "loss": 0.2896, + "step": 29675 + }, + { + "epoch": 1.38, + "learning_rate": 1.550687379493048e-05, + "loss": 0.3232, + "step": 29680 + }, + { + "epoch": 1.39, + "learning_rate": 1.5506090009875692e-05, + "loss": 0.5113, + "step": 29685 + }, + { + "epoch": 1.39, + "learning_rate": 1.5505306224820906e-05, + "loss": 0.0731, + "step": 29690 + }, + { + "epoch": 1.39, + "learning_rate": 1.550452243976612e-05, + "loss": 0.0491, + "step": 29695 + }, + { + "epoch": 1.39, + "learning_rate": 1.5503738654711334e-05, + "loss": 0.1016, + "step": 29700 + }, + { + "epoch": 1.39, + "learning_rate": 1.5502954869656545e-05, + "loss": 0.0946, + "step": 29705 + }, + { + "epoch": 1.39, + "learning_rate": 1.5502171084601762e-05, + "loss": 0.1709, + "step": 29710 + }, + { + "epoch": 1.39, + "learning_rate": 1.5501387299546973e-05, + "loss": 0.2002, + "step": 29715 + }, + { + "epoch": 1.39, + "learning_rate": 1.5500603514492186e-05, + "loss": 0.2524, + "step": 29720 + }, + { + "epoch": 1.39, + "learning_rate": 1.54998197294374e-05, + "loss": 0.2481, + "step": 29725 + }, + { + "epoch": 1.39, + "learning_rate": 1.5499035944382614e-05, + "loss": 0.4631, + "step": 29730 + }, + { + "epoch": 1.39, + "learning_rate": 1.5498252159327828e-05, + "loss": 0.273, + "step": 29735 + }, + { + "epoch": 1.39, + "learning_rate": 1.5497468374273042e-05, + "loss": 0.0508, + "step": 29740 + }, + { + "epoch": 1.39, + "learning_rate": 1.5496684589218253e-05, + "loss": 0.1125, + "step": 29745 + }, + { + "epoch": 1.39, + "learning_rate": 1.549590080416347e-05, + "loss": 0.0327, + "step": 29750 + }, + { + "epoch": 1.39, + "learning_rate": 1.549511701910868e-05, + "loss": 0.1278, + "step": 29755 + }, + { + "epoch": 1.39, + "learning_rate": 1.5494333234053894e-05, + "loss": 0.0829, + "step": 29760 + }, + { + "epoch": 1.39, + "learning_rate": 1.5493549448999108e-05, + "loss": 0.1396, + "step": 29765 + }, + { + "epoch": 1.39, + "learning_rate": 1.549276566394432e-05, + "loss": 0.1894, + "step": 29770 + }, + { + "epoch": 1.39, + "learning_rate": 1.5491981878889536e-05, + "loss": 0.2046, + "step": 29775 + }, + { + "epoch": 1.39, + "learning_rate": 1.5491198093834747e-05, + "loss": 0.2948, + "step": 29780 + }, + { + "epoch": 1.39, + "learning_rate": 1.549041430877996e-05, + "loss": 0.3256, + "step": 29785 + }, + { + "epoch": 1.39, + "learning_rate": 1.5489630523725174e-05, + "loss": 0.09, + "step": 29790 + }, + { + "epoch": 1.39, + "learning_rate": 1.5488846738670388e-05, + "loss": 0.0646, + "step": 29795 + }, + { + "epoch": 1.39, + "learning_rate": 1.5488062953615602e-05, + "loss": 0.1158, + "step": 29800 + }, + { + "epoch": 1.39, + "learning_rate": 1.5487279168560816e-05, + "loss": 0.1228, + "step": 29805 + }, + { + "epoch": 1.39, + "learning_rate": 1.548649538350603e-05, + "loss": 0.1435, + "step": 29810 + }, + { + "epoch": 1.39, + "learning_rate": 1.5485711598451244e-05, + "loss": 0.1631, + "step": 29815 + }, + { + "epoch": 1.39, + "learning_rate": 1.5484927813396454e-05, + "loss": 0.0708, + "step": 29820 + }, + { + "epoch": 1.39, + "learning_rate": 1.5484144028341668e-05, + "loss": 0.4099, + "step": 29825 + }, + { + "epoch": 1.39, + "learning_rate": 1.5483360243286882e-05, + "loss": 0.4474, + "step": 29830 + }, + { + "epoch": 1.39, + "learning_rate": 1.5482576458232096e-05, + "loss": 0.3448, + "step": 29835 + }, + { + "epoch": 1.39, + "learning_rate": 1.548179267317731e-05, + "loss": 0.0606, + "step": 29840 + }, + { + "epoch": 1.39, + "learning_rate": 1.548100888812252e-05, + "loss": 0.1094, + "step": 29845 + }, + { + "epoch": 1.39, + "learning_rate": 1.5480225103067738e-05, + "loss": 0.1081, + "step": 29850 + }, + { + "epoch": 1.39, + "learning_rate": 1.547944131801295e-05, + "loss": 0.1361, + "step": 29855 + }, + { + "epoch": 1.39, + "learning_rate": 1.5478657532958162e-05, + "loss": 0.1611, + "step": 29860 + }, + { + "epoch": 1.39, + "learning_rate": 1.5477873747903376e-05, + "loss": 0.1986, + "step": 29865 + }, + { + "epoch": 1.39, + "learning_rate": 1.547708996284859e-05, + "loss": 0.1848, + "step": 29870 + }, + { + "epoch": 1.39, + "learning_rate": 1.5476306177793804e-05, + "loss": 0.3482, + "step": 29875 + }, + { + "epoch": 1.39, + "learning_rate": 1.5475522392739018e-05, + "loss": 0.4131, + "step": 29880 + }, + { + "epoch": 1.39, + "learning_rate": 1.547473860768423e-05, + "loss": 0.2946, + "step": 29885 + }, + { + "epoch": 1.39, + "learning_rate": 1.5473954822629442e-05, + "loss": 0.0356, + "step": 29890 + }, + { + "epoch": 1.39, + "learning_rate": 1.5473171037574656e-05, + "loss": 0.0547, + "step": 29895 + }, + { + "epoch": 1.4, + "learning_rate": 1.547238725251987e-05, + "loss": 0.1375, + "step": 29900 + }, + { + "epoch": 1.4, + "learning_rate": 1.5471603467465084e-05, + "loss": 0.119, + "step": 29905 + }, + { + "epoch": 1.4, + "learning_rate": 1.5470819682410298e-05, + "loss": 0.107, + "step": 29910 + }, + { + "epoch": 1.4, + "learning_rate": 1.5470035897355512e-05, + "loss": 0.1367, + "step": 29915 + }, + { + "epoch": 1.4, + "learning_rate": 1.5469252112300722e-05, + "loss": 0.147, + "step": 29920 + }, + { + "epoch": 1.4, + "learning_rate": 1.546846832724594e-05, + "loss": 0.2123, + "step": 29925 + }, + { + "epoch": 1.4, + "learning_rate": 1.546768454219115e-05, + "loss": 0.3934, + "step": 29930 + }, + { + "epoch": 1.4, + "learning_rate": 1.5466900757136364e-05, + "loss": 0.2301, + "step": 29935 + }, + { + "epoch": 1.4, + "learning_rate": 1.5466116972081578e-05, + "loss": 0.045, + "step": 29940 + }, + { + "epoch": 1.4, + "learning_rate": 1.5465333187026792e-05, + "loss": 0.092, + "step": 29945 + }, + { + "epoch": 1.4, + "learning_rate": 1.5464549401972006e-05, + "loss": 0.0908, + "step": 29950 + }, + { + "epoch": 1.4, + "learning_rate": 1.5463765616917216e-05, + "loss": 0.1098, + "step": 29955 + }, + { + "epoch": 1.4, + "learning_rate": 1.546298183186243e-05, + "loss": 0.0832, + "step": 29960 + }, + { + "epoch": 1.4, + "learning_rate": 1.5462198046807644e-05, + "loss": 0.1968, + "step": 29965 + }, + { + "epoch": 1.4, + "learning_rate": 1.5461414261752858e-05, + "loss": 0.2016, + "step": 29970 + }, + { + "epoch": 1.4, + "learning_rate": 1.5460630476698072e-05, + "loss": 0.2575, + "step": 29975 + }, + { + "epoch": 1.4, + "learning_rate": 1.5459846691643286e-05, + "loss": 0.4952, + "step": 29980 + }, + { + "epoch": 1.4, + "learning_rate": 1.5459062906588496e-05, + "loss": 0.364, + "step": 29985 + }, + { + "epoch": 1.4, + "learning_rate": 1.5458279121533714e-05, + "loss": 0.0521, + "step": 29990 + }, + { + "epoch": 1.4, + "learning_rate": 1.5457495336478924e-05, + "loss": 0.0457, + "step": 29995 + }, + { + "epoch": 1.4, + "learning_rate": 1.5456711551424138e-05, + "loss": 0.0946, + "step": 30000 + }, + { + "epoch": 1.4, + "learning_rate": 1.5455927766369352e-05, + "loss": 0.1691, + "step": 30005 + }, + { + "epoch": 1.4, + "learning_rate": 1.5455143981314566e-05, + "loss": 0.1704, + "step": 30010 + }, + { + "epoch": 1.4, + "learning_rate": 1.545436019625978e-05, + "loss": 0.1653, + "step": 30015 + }, + { + "epoch": 1.4, + "learning_rate": 1.545357641120499e-05, + "loss": 0.1592, + "step": 30020 + }, + { + "epoch": 1.4, + "learning_rate": 1.5452792626150208e-05, + "loss": 0.2251, + "step": 30025 + }, + { + "epoch": 1.4, + "learning_rate": 1.5452008841095418e-05, + "loss": 0.2223, + "step": 30030 + }, + { + "epoch": 1.4, + "learning_rate": 1.5451225056040632e-05, + "loss": 0.3359, + "step": 30035 + }, + { + "epoch": 1.4, + "learning_rate": 1.5450441270985846e-05, + "loss": 0.0442, + "step": 30040 + }, + { + "epoch": 1.4, + "learning_rate": 1.544965748593106e-05, + "loss": 0.1268, + "step": 30045 + }, + { + "epoch": 1.4, + "learning_rate": 1.5448873700876274e-05, + "loss": 0.1322, + "step": 30050 + }, + { + "epoch": 1.4, + "learning_rate": 1.5448089915821488e-05, + "loss": 0.0535, + "step": 30055 + }, + { + "epoch": 1.4, + "learning_rate": 1.5447306130766698e-05, + "loss": 0.1431, + "step": 30060 + }, + { + "epoch": 1.4, + "learning_rate": 1.5446522345711915e-05, + "loss": 0.1551, + "step": 30065 + }, + { + "epoch": 1.4, + "learning_rate": 1.5445738560657126e-05, + "loss": 0.1445, + "step": 30070 + }, + { + "epoch": 1.4, + "learning_rate": 1.544495477560234e-05, + "loss": 0.2025, + "step": 30075 + }, + { + "epoch": 1.4, + "learning_rate": 1.5444170990547554e-05, + "loss": 0.3375, + "step": 30080 + }, + { + "epoch": 1.4, + "learning_rate": 1.5443387205492764e-05, + "loss": 0.2115, + "step": 30085 + }, + { + "epoch": 1.4, + "learning_rate": 1.544260342043798e-05, + "loss": 0.0324, + "step": 30090 + }, + { + "epoch": 1.4, + "learning_rate": 1.5441819635383192e-05, + "loss": 0.0582, + "step": 30095 + }, + { + "epoch": 1.4, + "learning_rate": 1.5441035850328406e-05, + "loss": 0.0829, + "step": 30100 + }, + { + "epoch": 1.4, + "learning_rate": 1.544025206527362e-05, + "loss": 0.0776, + "step": 30105 + }, + { + "epoch": 1.4, + "learning_rate": 1.5439468280218834e-05, + "loss": 0.1564, + "step": 30110 + }, + { + "epoch": 1.41, + "learning_rate": 1.5438684495164048e-05, + "loss": 0.1717, + "step": 30115 + }, + { + "epoch": 1.41, + "learning_rate": 1.543790071010926e-05, + "loss": 0.217, + "step": 30120 + }, + { + "epoch": 1.41, + "learning_rate": 1.5437116925054476e-05, + "loss": 0.2191, + "step": 30125 + }, + { + "epoch": 1.41, + "learning_rate": 1.543633313999969e-05, + "loss": 0.3155, + "step": 30130 + }, + { + "epoch": 1.41, + "learning_rate": 1.54355493549449e-05, + "loss": 0.3595, + "step": 30135 + }, + { + "epoch": 1.41, + "learning_rate": 1.5434765569890117e-05, + "loss": 0.0571, + "step": 30140 + }, + { + "epoch": 1.41, + "learning_rate": 1.5433981784835328e-05, + "loss": 0.0568, + "step": 30145 + }, + { + "epoch": 1.41, + "learning_rate": 1.543319799978054e-05, + "loss": 0.077, + "step": 30150 + }, + { + "epoch": 1.41, + "learning_rate": 1.5432414214725756e-05, + "loss": 0.1147, + "step": 30155 + }, + { + "epoch": 1.41, + "learning_rate": 1.5431630429670966e-05, + "loss": 0.0826, + "step": 30160 + }, + { + "epoch": 1.41, + "learning_rate": 1.5430846644616183e-05, + "loss": 0.1165, + "step": 30165 + }, + { + "epoch": 1.41, + "learning_rate": 1.5430062859561394e-05, + "loss": 0.1987, + "step": 30170 + }, + { + "epoch": 1.41, + "learning_rate": 1.5429279074506608e-05, + "loss": 0.2379, + "step": 30175 + }, + { + "epoch": 1.41, + "learning_rate": 1.542849528945182e-05, + "loss": 0.5167, + "step": 30180 + }, + { + "epoch": 1.41, + "learning_rate": 1.5427711504397036e-05, + "loss": 0.2545, + "step": 30185 + }, + { + "epoch": 1.41, + "learning_rate": 1.542692771934225e-05, + "loss": 0.0899, + "step": 30190 + }, + { + "epoch": 1.41, + "learning_rate": 1.5426143934287463e-05, + "loss": 0.0671, + "step": 30195 + }, + { + "epoch": 1.41, + "learning_rate": 1.5425360149232674e-05, + "loss": 0.1145, + "step": 30200 + }, + { + "epoch": 1.41, + "learning_rate": 1.542457636417789e-05, + "loss": 0.0848, + "step": 30205 + }, + { + "epoch": 1.41, + "learning_rate": 1.5423792579123102e-05, + "loss": 0.208, + "step": 30210 + }, + { + "epoch": 1.41, + "learning_rate": 1.5423008794068316e-05, + "loss": 0.1277, + "step": 30215 + }, + { + "epoch": 1.41, + "learning_rate": 1.542222500901353e-05, + "loss": 0.182, + "step": 30220 + }, + { + "epoch": 1.41, + "learning_rate": 1.5421441223958743e-05, + "loss": 0.2234, + "step": 30225 + }, + { + "epoch": 1.41, + "learning_rate": 1.5420657438903957e-05, + "loss": 0.3056, + "step": 30230 + }, + { + "epoch": 1.41, + "learning_rate": 1.5419873653849168e-05, + "loss": 0.4144, + "step": 30235 + }, + { + "epoch": 1.41, + "learning_rate": 1.5419089868794385e-05, + "loss": 0.0343, + "step": 30240 + }, + { + "epoch": 1.41, + "learning_rate": 1.5418306083739596e-05, + "loss": 0.0805, + "step": 30245 + }, + { + "epoch": 1.41, + "learning_rate": 1.541752229868481e-05, + "loss": 0.1667, + "step": 30250 + }, + { + "epoch": 1.41, + "learning_rate": 1.5416738513630024e-05, + "loss": 0.1355, + "step": 30255 + }, + { + "epoch": 1.41, + "learning_rate": 1.5415954728575237e-05, + "loss": 0.1269, + "step": 30260 + }, + { + "epoch": 1.41, + "learning_rate": 1.541517094352045e-05, + "loss": 0.246, + "step": 30265 + }, + { + "epoch": 1.41, + "learning_rate": 1.5414387158465665e-05, + "loss": 0.1213, + "step": 30270 + }, + { + "epoch": 1.41, + "learning_rate": 1.5413603373410876e-05, + "loss": 0.169, + "step": 30275 + }, + { + "epoch": 1.41, + "learning_rate": 1.541281958835609e-05, + "loss": 0.3212, + "step": 30280 + }, + { + "epoch": 1.41, + "learning_rate": 1.5412035803301304e-05, + "loss": 0.1963, + "step": 30285 + }, + { + "epoch": 1.41, + "learning_rate": 1.5411252018246517e-05, + "loss": 0.0683, + "step": 30290 + }, + { + "epoch": 1.41, + "learning_rate": 1.541046823319173e-05, + "loss": 0.0797, + "step": 30295 + }, + { + "epoch": 1.41, + "learning_rate": 1.5409684448136942e-05, + "loss": 0.0972, + "step": 30300 + }, + { + "epoch": 1.41, + "learning_rate": 1.540890066308216e-05, + "loss": 0.1827, + "step": 30305 + }, + { + "epoch": 1.41, + "learning_rate": 1.540811687802737e-05, + "loss": 0.0909, + "step": 30310 + }, + { + "epoch": 1.41, + "learning_rate": 1.5407333092972584e-05, + "loss": 0.1743, + "step": 30315 + }, + { + "epoch": 1.41, + "learning_rate": 1.5406549307917798e-05, + "loss": 0.1385, + "step": 30320 + }, + { + "epoch": 1.42, + "learning_rate": 1.540576552286301e-05, + "loss": 0.2003, + "step": 30325 + }, + { + "epoch": 1.42, + "learning_rate": 1.5404981737808225e-05, + "loss": 0.4004, + "step": 30330 + }, + { + "epoch": 1.42, + "learning_rate": 1.540419795275344e-05, + "loss": 0.2143, + "step": 30335 + }, + { + "epoch": 1.42, + "learning_rate": 1.5403414167698653e-05, + "loss": 0.0175, + "step": 30340 + }, + { + "epoch": 1.42, + "learning_rate": 1.5402630382643864e-05, + "loss": 0.1199, + "step": 30345 + }, + { + "epoch": 1.42, + "learning_rate": 1.5401846597589078e-05, + "loss": 0.0549, + "step": 30350 + }, + { + "epoch": 1.42, + "learning_rate": 1.540106281253429e-05, + "loss": 0.1376, + "step": 30355 + }, + { + "epoch": 1.42, + "learning_rate": 1.5400279027479505e-05, + "loss": 0.1743, + "step": 30360 + }, + { + "epoch": 1.42, + "learning_rate": 1.539949524242472e-05, + "loss": 0.2547, + "step": 30365 + }, + { + "epoch": 1.42, + "learning_rate": 1.5398711457369933e-05, + "loss": 0.1769, + "step": 30370 + }, + { + "epoch": 1.42, + "learning_rate": 1.5397927672315144e-05, + "loss": 0.3749, + "step": 30375 + }, + { + "epoch": 1.42, + "learning_rate": 1.539714388726036e-05, + "loss": 0.5307, + "step": 30380 + }, + { + "epoch": 1.42, + "learning_rate": 1.539636010220557e-05, + "loss": 0.3359, + "step": 30385 + }, + { + "epoch": 1.42, + "learning_rate": 1.5395576317150785e-05, + "loss": 0.1245, + "step": 30390 + }, + { + "epoch": 1.42, + "learning_rate": 1.5394792532096e-05, + "loss": 0.0377, + "step": 30395 + }, + { + "epoch": 1.42, + "learning_rate": 1.5394008747041213e-05, + "loss": 0.1, + "step": 30400 + }, + { + "epoch": 1.42, + "learning_rate": 1.5393224961986427e-05, + "loss": 0.146, + "step": 30405 + }, + { + "epoch": 1.42, + "learning_rate": 1.5392441176931638e-05, + "loss": 0.1585, + "step": 30410 + }, + { + "epoch": 1.42, + "learning_rate": 1.539165739187685e-05, + "loss": 0.2002, + "step": 30415 + }, + { + "epoch": 1.42, + "learning_rate": 1.5390873606822065e-05, + "loss": 0.2221, + "step": 30420 + }, + { + "epoch": 1.42, + "learning_rate": 1.539008982176728e-05, + "loss": 0.2473, + "step": 30425 + }, + { + "epoch": 1.42, + "learning_rate": 1.5389306036712493e-05, + "loss": 0.3531, + "step": 30430 + }, + { + "epoch": 1.42, + "learning_rate": 1.5388522251657707e-05, + "loss": 0.3376, + "step": 30435 + }, + { + "epoch": 1.42, + "learning_rate": 1.538773846660292e-05, + "loss": 0.1212, + "step": 30440 + }, + { + "epoch": 1.42, + "learning_rate": 1.5386954681548135e-05, + "loss": 0.0599, + "step": 30445 + }, + { + "epoch": 1.42, + "learning_rate": 1.5386170896493345e-05, + "loss": 0.0923, + "step": 30450 + }, + { + "epoch": 1.42, + "learning_rate": 1.5385387111438563e-05, + "loss": 0.0752, + "step": 30455 + }, + { + "epoch": 1.42, + "learning_rate": 1.5384603326383773e-05, + "loss": 0.074, + "step": 30460 + }, + { + "epoch": 1.42, + "learning_rate": 1.5383819541328987e-05, + "loss": 0.0945, + "step": 30465 + }, + { + "epoch": 1.42, + "learning_rate": 1.53830357562742e-05, + "loss": 0.2006, + "step": 30470 + }, + { + "epoch": 1.42, + "learning_rate": 1.538225197121941e-05, + "loss": 0.2685, + "step": 30475 + }, + { + "epoch": 1.42, + "learning_rate": 1.538146818616463e-05, + "loss": 0.2831, + "step": 30480 + }, + { + "epoch": 1.42, + "learning_rate": 1.538068440110984e-05, + "loss": 0.1803, + "step": 30485 + }, + { + "epoch": 1.42, + "learning_rate": 1.5379900616055053e-05, + "loss": 0.0256, + "step": 30490 + }, + { + "epoch": 1.42, + "learning_rate": 1.5379116831000267e-05, + "loss": 0.0892, + "step": 30495 + }, + { + "epoch": 1.42, + "learning_rate": 1.537833304594548e-05, + "loss": 0.1554, + "step": 30500 + }, + { + "epoch": 1.42, + "learning_rate": 1.5377549260890695e-05, + "loss": 0.1182, + "step": 30505 + }, + { + "epoch": 1.42, + "learning_rate": 1.537676547583591e-05, + "loss": 0.1168, + "step": 30510 + }, + { + "epoch": 1.42, + "learning_rate": 1.537598169078112e-05, + "loss": 0.1539, + "step": 30515 + }, + { + "epoch": 1.42, + "learning_rate": 1.5375197905726337e-05, + "loss": 0.229, + "step": 30520 + }, + { + "epoch": 1.42, + "learning_rate": 1.5374414120671547e-05, + "loss": 0.2612, + "step": 30525 + }, + { + "epoch": 1.42, + "learning_rate": 1.537363033561676e-05, + "loss": 0.4199, + "step": 30530 + }, + { + "epoch": 1.42, + "learning_rate": 1.5372846550561975e-05, + "loss": 0.3006, + "step": 30535 + }, + { + "epoch": 1.43, + "learning_rate": 1.537206276550719e-05, + "loss": 0.0404, + "step": 30540 + }, + { + "epoch": 1.43, + "learning_rate": 1.5371278980452403e-05, + "loss": 0.0872, + "step": 30545 + }, + { + "epoch": 1.43, + "learning_rate": 1.5370495195397613e-05, + "loss": 0.1091, + "step": 30550 + }, + { + "epoch": 1.43, + "learning_rate": 1.536971141034283e-05, + "loss": 0.0654, + "step": 30555 + }, + { + "epoch": 1.43, + "learning_rate": 1.536892762528804e-05, + "loss": 0.1402, + "step": 30560 + }, + { + "epoch": 1.43, + "learning_rate": 1.5368143840233255e-05, + "loss": 0.1676, + "step": 30565 + }, + { + "epoch": 1.43, + "learning_rate": 1.536736005517847e-05, + "loss": 0.1912, + "step": 30570 + }, + { + "epoch": 1.43, + "learning_rate": 1.5366576270123683e-05, + "loss": 0.3351, + "step": 30575 + }, + { + "epoch": 1.43, + "learning_rate": 1.5365792485068897e-05, + "loss": 0.367, + "step": 30580 + }, + { + "epoch": 1.43, + "learning_rate": 1.536500870001411e-05, + "loss": 0.2385, + "step": 30585 + }, + { + "epoch": 1.43, + "learning_rate": 1.536422491495932e-05, + "loss": 0.0754, + "step": 30590 + }, + { + "epoch": 1.43, + "learning_rate": 1.536344112990454e-05, + "loss": 0.0664, + "step": 30595 + }, + { + "epoch": 1.43, + "learning_rate": 1.536265734484975e-05, + "loss": 0.1467, + "step": 30600 + }, + { + "epoch": 1.43, + "learning_rate": 1.5361873559794963e-05, + "loss": 0.0865, + "step": 30605 + }, + { + "epoch": 1.43, + "learning_rate": 1.5361089774740177e-05, + "loss": 0.1208, + "step": 30610 + }, + { + "epoch": 1.43, + "learning_rate": 1.5360305989685387e-05, + "loss": 0.1351, + "step": 30615 + }, + { + "epoch": 1.43, + "learning_rate": 1.5359522204630605e-05, + "loss": 0.1539, + "step": 30620 + }, + { + "epoch": 1.43, + "learning_rate": 1.5358738419575815e-05, + "loss": 0.2201, + "step": 30625 + }, + { + "epoch": 1.43, + "learning_rate": 1.535795463452103e-05, + "loss": 0.3883, + "step": 30630 + }, + { + "epoch": 1.43, + "learning_rate": 1.5357170849466243e-05, + "loss": 0.2628, + "step": 30635 + }, + { + "epoch": 1.43, + "learning_rate": 1.5356387064411457e-05, + "loss": 0.0461, + "step": 30640 + }, + { + "epoch": 1.43, + "learning_rate": 1.535560327935667e-05, + "loss": 0.0915, + "step": 30645 + }, + { + "epoch": 1.43, + "learning_rate": 1.5354819494301885e-05, + "loss": 0.0698, + "step": 30650 + }, + { + "epoch": 1.43, + "learning_rate": 1.53540357092471e-05, + "loss": 0.1448, + "step": 30655 + }, + { + "epoch": 1.43, + "learning_rate": 1.5353251924192313e-05, + "loss": 0.1386, + "step": 30660 + }, + { + "epoch": 1.43, + "learning_rate": 1.5352468139137523e-05, + "loss": 0.169, + "step": 30665 + }, + { + "epoch": 1.43, + "learning_rate": 1.5351684354082737e-05, + "loss": 0.1716, + "step": 30670 + }, + { + "epoch": 1.43, + "learning_rate": 1.535090056902795e-05, + "loss": 0.1982, + "step": 30675 + }, + { + "epoch": 1.43, + "learning_rate": 1.5350116783973165e-05, + "loss": 0.3828, + "step": 30680 + }, + { + "epoch": 1.43, + "learning_rate": 1.534933299891838e-05, + "loss": 0.2975, + "step": 30685 + }, + { + "epoch": 1.43, + "learning_rate": 1.534854921386359e-05, + "loss": 0.0999, + "step": 30690 + }, + { + "epoch": 1.43, + "learning_rate": 1.5347765428808807e-05, + "loss": 0.0746, + "step": 30695 + }, + { + "epoch": 1.43, + "learning_rate": 1.5346981643754017e-05, + "loss": 0.0579, + "step": 30700 + }, + { + "epoch": 1.43, + "learning_rate": 1.534619785869923e-05, + "loss": 0.1278, + "step": 30705 + }, + { + "epoch": 1.43, + "learning_rate": 1.5345414073644445e-05, + "loss": 0.1335, + "step": 30710 + }, + { + "epoch": 1.43, + "learning_rate": 1.534463028858966e-05, + "loss": 0.1177, + "step": 30715 + }, + { + "epoch": 1.43, + "learning_rate": 1.5343846503534873e-05, + "loss": 0.1861, + "step": 30720 + }, + { + "epoch": 1.43, + "learning_rate": 1.5343062718480087e-05, + "loss": 0.2215, + "step": 30725 + }, + { + "epoch": 1.43, + "learning_rate": 1.5342278933425297e-05, + "loss": 0.3913, + "step": 30730 + }, + { + "epoch": 1.43, + "learning_rate": 1.5341495148370514e-05, + "loss": 0.332, + "step": 30735 + }, + { + "epoch": 1.43, + "learning_rate": 1.5340711363315725e-05, + "loss": 0.0479, + "step": 30740 + }, + { + "epoch": 1.43, + "learning_rate": 1.533992757826094e-05, + "loss": 0.0902, + "step": 30745 + }, + { + "epoch": 1.43, + "learning_rate": 1.5339143793206153e-05, + "loss": 0.0383, + "step": 30750 + }, + { + "epoch": 1.44, + "learning_rate": 1.5338360008151367e-05, + "loss": 0.0873, + "step": 30755 + }, + { + "epoch": 1.44, + "learning_rate": 1.533757622309658e-05, + "loss": 0.058, + "step": 30760 + }, + { + "epoch": 1.44, + "learning_rate": 1.533679243804179e-05, + "loss": 0.1621, + "step": 30765 + }, + { + "epoch": 1.44, + "learning_rate": 1.533600865298701e-05, + "loss": 0.1878, + "step": 30770 + }, + { + "epoch": 1.44, + "learning_rate": 1.533522486793222e-05, + "loss": 0.3271, + "step": 30775 + }, + { + "epoch": 1.44, + "learning_rate": 1.5334441082877433e-05, + "loss": 0.2911, + "step": 30780 + }, + { + "epoch": 1.44, + "learning_rate": 1.5333657297822647e-05, + "loss": 0.2349, + "step": 30785 + }, + { + "epoch": 1.44, + "learning_rate": 1.533287351276786e-05, + "loss": 0.1201, + "step": 30790 + }, + { + "epoch": 1.44, + "learning_rate": 1.5332089727713075e-05, + "loss": 0.0789, + "step": 30795 + }, + { + "epoch": 1.44, + "learning_rate": 1.533130594265829e-05, + "loss": 0.0823, + "step": 30800 + }, + { + "epoch": 1.44, + "learning_rate": 1.53305221576035e-05, + "loss": 0.1153, + "step": 30805 + }, + { + "epoch": 1.44, + "learning_rate": 1.5329738372548713e-05, + "loss": 0.1054, + "step": 30810 + }, + { + "epoch": 1.44, + "learning_rate": 1.5328954587493927e-05, + "loss": 0.1376, + "step": 30815 + }, + { + "epoch": 1.44, + "learning_rate": 1.532817080243914e-05, + "loss": 0.1513, + "step": 30820 + }, + { + "epoch": 1.44, + "learning_rate": 1.5327387017384355e-05, + "loss": 0.2324, + "step": 30825 + }, + { + "epoch": 1.44, + "learning_rate": 1.5326603232329565e-05, + "loss": 0.3203, + "step": 30830 + }, + { + "epoch": 1.44, + "learning_rate": 1.5325819447274782e-05, + "loss": 0.4077, + "step": 30835 + }, + { + "epoch": 1.44, + "learning_rate": 1.5325035662219993e-05, + "loss": 0.0783, + "step": 30840 + }, + { + "epoch": 1.44, + "learning_rate": 1.5324251877165207e-05, + "loss": 0.0441, + "step": 30845 + }, + { + "epoch": 1.44, + "learning_rate": 1.532346809211042e-05, + "loss": 0.0616, + "step": 30850 + }, + { + "epoch": 1.44, + "learning_rate": 1.5322684307055635e-05, + "loss": 0.1068, + "step": 30855 + }, + { + "epoch": 1.44, + "learning_rate": 1.532190052200085e-05, + "loss": 0.1137, + "step": 30860 + }, + { + "epoch": 1.44, + "learning_rate": 1.5321116736946062e-05, + "loss": 0.2083, + "step": 30865 + }, + { + "epoch": 1.44, + "learning_rate": 1.5320332951891276e-05, + "loss": 0.1234, + "step": 30870 + }, + { + "epoch": 1.44, + "learning_rate": 1.5319549166836487e-05, + "loss": 0.2183, + "step": 30875 + }, + { + "epoch": 1.44, + "learning_rate": 1.53187653817817e-05, + "loss": 0.3248, + "step": 30880 + }, + { + "epoch": 1.44, + "learning_rate": 1.5317981596726915e-05, + "loss": 0.2879, + "step": 30885 + }, + { + "epoch": 1.44, + "learning_rate": 1.531719781167213e-05, + "loss": 0.0633, + "step": 30890 + }, + { + "epoch": 1.44, + "learning_rate": 1.5316414026617342e-05, + "loss": 0.0492, + "step": 30895 + }, + { + "epoch": 1.44, + "learning_rate": 1.5315630241562556e-05, + "loss": 0.0931, + "step": 30900 + }, + { + "epoch": 1.44, + "learning_rate": 1.5314846456507767e-05, + "loss": 0.0612, + "step": 30905 + }, + { + "epoch": 1.44, + "learning_rate": 1.5314062671452984e-05, + "loss": 0.0813, + "step": 30910 + }, + { + "epoch": 1.44, + "learning_rate": 1.5313278886398195e-05, + "loss": 0.1622, + "step": 30915 + }, + { + "epoch": 1.44, + "learning_rate": 1.531249510134341e-05, + "loss": 0.1378, + "step": 30920 + }, + { + "epoch": 1.44, + "learning_rate": 1.5311711316288623e-05, + "loss": 0.1609, + "step": 30925 + }, + { + "epoch": 1.44, + "learning_rate": 1.5310927531233836e-05, + "loss": 0.5023, + "step": 30930 + }, + { + "epoch": 1.44, + "learning_rate": 1.531014374617905e-05, + "loss": 0.3593, + "step": 30935 + }, + { + "epoch": 1.44, + "learning_rate": 1.530935996112426e-05, + "loss": 0.1545, + "step": 30940 + }, + { + "epoch": 1.44, + "learning_rate": 1.5308576176069475e-05, + "loss": 0.0696, + "step": 30945 + }, + { + "epoch": 1.44, + "learning_rate": 1.530779239101469e-05, + "loss": 0.0694, + "step": 30950 + }, + { + "epoch": 1.44, + "learning_rate": 1.5307008605959903e-05, + "loss": 0.1546, + "step": 30955 + }, + { + "epoch": 1.44, + "learning_rate": 1.5306224820905116e-05, + "loss": 0.1018, + "step": 30960 + }, + { + "epoch": 1.44, + "learning_rate": 1.530544103585033e-05, + "loss": 0.1363, + "step": 30965 + }, + { + "epoch": 1.45, + "learning_rate": 1.5304657250795544e-05, + "loss": 0.3256, + "step": 30970 + }, + { + "epoch": 1.45, + "learning_rate": 1.5303873465740758e-05, + "loss": 0.3606, + "step": 30975 + }, + { + "epoch": 1.45, + "learning_rate": 1.530308968068597e-05, + "loss": 0.331, + "step": 30980 + }, + { + "epoch": 1.45, + "learning_rate": 1.5302305895631186e-05, + "loss": 0.2471, + "step": 30985 + }, + { + "epoch": 1.45, + "learning_rate": 1.5301522110576397e-05, + "loss": 0.042, + "step": 30990 + }, + { + "epoch": 1.45, + "learning_rate": 1.530073832552161e-05, + "loss": 0.0757, + "step": 30995 + }, + { + "epoch": 1.45, + "learning_rate": 1.5299954540466824e-05, + "loss": 0.0695, + "step": 31000 + }, + { + "epoch": 1.45, + "learning_rate": 1.5299170755412035e-05, + "loss": 0.1101, + "step": 31005 + }, + { + "epoch": 1.45, + "learning_rate": 1.5298386970357252e-05, + "loss": 0.1784, + "step": 31010 + }, + { + "epoch": 1.45, + "learning_rate": 1.5297603185302463e-05, + "loss": 0.1233, + "step": 31015 + }, + { + "epoch": 1.45, + "learning_rate": 1.5296819400247677e-05, + "loss": 0.2507, + "step": 31020 + }, + { + "epoch": 1.45, + "learning_rate": 1.529603561519289e-05, + "loss": 0.248, + "step": 31025 + }, + { + "epoch": 1.45, + "learning_rate": 1.5295251830138104e-05, + "loss": 0.4157, + "step": 31030 + }, + { + "epoch": 1.45, + "learning_rate": 1.5294468045083318e-05, + "loss": 0.2699, + "step": 31035 + }, + { + "epoch": 1.45, + "learning_rate": 1.5293684260028532e-05, + "loss": 0.1, + "step": 31040 + }, + { + "epoch": 1.45, + "learning_rate": 1.5292900474973743e-05, + "loss": 0.0886, + "step": 31045 + }, + { + "epoch": 1.45, + "learning_rate": 1.529211668991896e-05, + "loss": 0.0945, + "step": 31050 + }, + { + "epoch": 1.45, + "learning_rate": 1.529133290486417e-05, + "loss": 0.1045, + "step": 31055 + }, + { + "epoch": 1.45, + "learning_rate": 1.5290549119809384e-05, + "loss": 0.1411, + "step": 31060 + }, + { + "epoch": 1.45, + "learning_rate": 1.52897653347546e-05, + "loss": 0.167, + "step": 31065 + }, + { + "epoch": 1.45, + "learning_rate": 1.5288981549699812e-05, + "loss": 0.2069, + "step": 31070 + }, + { + "epoch": 1.45, + "learning_rate": 1.5288197764645026e-05, + "loss": 0.1847, + "step": 31075 + }, + { + "epoch": 1.45, + "learning_rate": 1.5287413979590237e-05, + "loss": 0.2041, + "step": 31080 + }, + { + "epoch": 1.45, + "learning_rate": 1.5286630194535454e-05, + "loss": 0.2017, + "step": 31085 + }, + { + "epoch": 1.45, + "learning_rate": 1.5285846409480664e-05, + "loss": 0.0352, + "step": 31090 + }, + { + "epoch": 1.45, + "learning_rate": 1.528506262442588e-05, + "loss": 0.0676, + "step": 31095 + }, + { + "epoch": 1.45, + "learning_rate": 1.5284278839371092e-05, + "loss": 0.1651, + "step": 31100 + }, + { + "epoch": 1.45, + "learning_rate": 1.5283495054316306e-05, + "loss": 0.095, + "step": 31105 + }, + { + "epoch": 1.45, + "learning_rate": 1.528271126926152e-05, + "loss": 0.1385, + "step": 31110 + }, + { + "epoch": 1.45, + "learning_rate": 1.5281927484206734e-05, + "loss": 0.16, + "step": 31115 + }, + { + "epoch": 1.45, + "learning_rate": 1.5281143699151944e-05, + "loss": 0.2099, + "step": 31120 + }, + { + "epoch": 1.45, + "learning_rate": 1.5280359914097162e-05, + "loss": 0.3167, + "step": 31125 + }, + { + "epoch": 1.45, + "learning_rate": 1.5279576129042372e-05, + "loss": 0.2805, + "step": 31130 + }, + { + "epoch": 1.45, + "learning_rate": 1.5278792343987586e-05, + "loss": 0.2409, + "step": 31135 + }, + { + "epoch": 1.45, + "learning_rate": 1.52780085589328e-05, + "loss": 0.0747, + "step": 31140 + }, + { + "epoch": 1.45, + "learning_rate": 1.527722477387801e-05, + "loss": 0.0464, + "step": 31145 + }, + { + "epoch": 1.45, + "learning_rate": 1.5276440988823228e-05, + "loss": 0.0867, + "step": 31150 + }, + { + "epoch": 1.45, + "learning_rate": 1.527565720376844e-05, + "loss": 0.1238, + "step": 31155 + }, + { + "epoch": 1.45, + "learning_rate": 1.5274873418713652e-05, + "loss": 0.1296, + "step": 31160 + }, + { + "epoch": 1.45, + "learning_rate": 1.5274089633658866e-05, + "loss": 0.1621, + "step": 31165 + }, + { + "epoch": 1.45, + "learning_rate": 1.527330584860408e-05, + "loss": 0.1919, + "step": 31170 + }, + { + "epoch": 1.45, + "learning_rate": 1.5272522063549294e-05, + "loss": 0.3081, + "step": 31175 + }, + { + "epoch": 1.45, + "learning_rate": 1.5271738278494508e-05, + "loss": 0.3428, + "step": 31180 + }, + { + "epoch": 1.46, + "learning_rate": 1.5270954493439722e-05, + "loss": 0.273, + "step": 31185 + }, + { + "epoch": 1.46, + "learning_rate": 1.5270170708384936e-05, + "loss": 0.1131, + "step": 31190 + }, + { + "epoch": 1.46, + "learning_rate": 1.5269386923330146e-05, + "loss": 0.0283, + "step": 31195 + }, + { + "epoch": 1.46, + "learning_rate": 1.526860313827536e-05, + "loss": 0.0582, + "step": 31200 + }, + { + "epoch": 1.46, + "learning_rate": 1.5267819353220574e-05, + "loss": 0.0585, + "step": 31205 + }, + { + "epoch": 1.46, + "learning_rate": 1.5267035568165788e-05, + "loss": 0.0735, + "step": 31210 + }, + { + "epoch": 1.46, + "learning_rate": 1.5266251783111002e-05, + "loss": 0.272, + "step": 31215 + }, + { + "epoch": 1.46, + "learning_rate": 1.5265467998056212e-05, + "loss": 0.1506, + "step": 31220 + }, + { + "epoch": 1.46, + "learning_rate": 1.526468421300143e-05, + "loss": 0.2312, + "step": 31225 + }, + { + "epoch": 1.46, + "learning_rate": 1.526390042794664e-05, + "loss": 0.2993, + "step": 31230 + }, + { + "epoch": 1.46, + "learning_rate": 1.5263116642891854e-05, + "loss": 0.3461, + "step": 31235 + }, + { + "epoch": 1.46, + "learning_rate": 1.5262332857837068e-05, + "loss": 0.0388, + "step": 31240 + }, + { + "epoch": 1.46, + "learning_rate": 1.5261549072782282e-05, + "loss": 0.0356, + "step": 31245 + }, + { + "epoch": 1.46, + "learning_rate": 1.5260765287727496e-05, + "loss": 0.1398, + "step": 31250 + }, + { + "epoch": 1.46, + "learning_rate": 1.525998150267271e-05, + "loss": 0.0815, + "step": 31255 + }, + { + "epoch": 1.46, + "learning_rate": 1.525919771761792e-05, + "loss": 0.1347, + "step": 31260 + }, + { + "epoch": 1.46, + "learning_rate": 1.5258413932563134e-05, + "loss": 0.1489, + "step": 31265 + }, + { + "epoch": 1.46, + "learning_rate": 1.5257630147508348e-05, + "loss": 0.1687, + "step": 31270 + }, + { + "epoch": 1.46, + "learning_rate": 1.5256846362453562e-05, + "loss": 0.1338, + "step": 31275 + }, + { + "epoch": 1.46, + "learning_rate": 1.5256062577398776e-05, + "loss": 0.3636, + "step": 31280 + }, + { + "epoch": 1.46, + "learning_rate": 1.5255278792343988e-05, + "loss": 0.2142, + "step": 31285 + }, + { + "epoch": 1.46, + "learning_rate": 1.5254495007289202e-05, + "loss": 0.0323, + "step": 31290 + }, + { + "epoch": 1.46, + "learning_rate": 1.5253711222234416e-05, + "loss": 0.0875, + "step": 31295 + }, + { + "epoch": 1.46, + "learning_rate": 1.525292743717963e-05, + "loss": 0.0987, + "step": 31300 + }, + { + "epoch": 1.46, + "learning_rate": 1.5252143652124842e-05, + "loss": 0.1468, + "step": 31305 + }, + { + "epoch": 1.46, + "learning_rate": 1.5251359867070058e-05, + "loss": 0.0654, + "step": 31310 + }, + { + "epoch": 1.46, + "learning_rate": 1.525057608201527e-05, + "loss": 0.1286, + "step": 31315 + }, + { + "epoch": 1.46, + "learning_rate": 1.5249792296960484e-05, + "loss": 0.1423, + "step": 31320 + }, + { + "epoch": 1.46, + "learning_rate": 1.5249008511905696e-05, + "loss": 0.2072, + "step": 31325 + }, + { + "epoch": 1.46, + "learning_rate": 1.5248224726850908e-05, + "loss": 0.3649, + "step": 31330 + }, + { + "epoch": 1.46, + "learning_rate": 1.5247440941796124e-05, + "loss": 0.366, + "step": 31335 + }, + { + "epoch": 1.46, + "learning_rate": 1.5246657156741336e-05, + "loss": 0.0274, + "step": 31340 + }, + { + "epoch": 1.46, + "learning_rate": 1.524587337168655e-05, + "loss": 0.0756, + "step": 31345 + }, + { + "epoch": 1.46, + "learning_rate": 1.5245089586631762e-05, + "loss": 0.1292, + "step": 31350 + }, + { + "epoch": 1.46, + "learning_rate": 1.5244305801576978e-05, + "loss": 0.1784, + "step": 31355 + }, + { + "epoch": 1.46, + "learning_rate": 1.524352201652219e-05, + "loss": 0.1282, + "step": 31360 + }, + { + "epoch": 1.46, + "learning_rate": 1.5242738231467404e-05, + "loss": 0.2259, + "step": 31365 + }, + { + "epoch": 1.46, + "learning_rate": 1.5241954446412616e-05, + "loss": 0.2356, + "step": 31370 + }, + { + "epoch": 1.46, + "learning_rate": 1.5241170661357832e-05, + "loss": 0.2162, + "step": 31375 + }, + { + "epoch": 1.46, + "learning_rate": 1.5240386876303044e-05, + "loss": 0.2779, + "step": 31380 + }, + { + "epoch": 1.46, + "learning_rate": 1.5239603091248258e-05, + "loss": 0.3447, + "step": 31385 + }, + { + "epoch": 1.46, + "learning_rate": 1.523881930619347e-05, + "loss": 0.0535, + "step": 31390 + }, + { + "epoch": 1.46, + "learning_rate": 1.5238035521138684e-05, + "loss": 0.1216, + "step": 31395 + }, + { + "epoch": 1.47, + "learning_rate": 1.5237251736083898e-05, + "loss": 0.0795, + "step": 31400 + }, + { + "epoch": 1.47, + "learning_rate": 1.523646795102911e-05, + "loss": 0.1059, + "step": 31405 + }, + { + "epoch": 1.47, + "learning_rate": 1.5235684165974326e-05, + "loss": 0.079, + "step": 31410 + }, + { + "epoch": 1.47, + "learning_rate": 1.5234900380919538e-05, + "loss": 0.1216, + "step": 31415 + }, + { + "epoch": 1.47, + "learning_rate": 1.5234116595864752e-05, + "loss": 0.1635, + "step": 31420 + }, + { + "epoch": 1.47, + "learning_rate": 1.5233332810809964e-05, + "loss": 0.1558, + "step": 31425 + }, + { + "epoch": 1.47, + "learning_rate": 1.523254902575518e-05, + "loss": 0.405, + "step": 31430 + }, + { + "epoch": 1.47, + "learning_rate": 1.5231765240700392e-05, + "loss": 0.4419, + "step": 31435 + }, + { + "epoch": 1.47, + "learning_rate": 1.5230981455645606e-05, + "loss": 0.0325, + "step": 31440 + }, + { + "epoch": 1.47, + "learning_rate": 1.5230197670590818e-05, + "loss": 0.1278, + "step": 31445 + }, + { + "epoch": 1.47, + "learning_rate": 1.5229413885536033e-05, + "loss": 0.1036, + "step": 31450 + }, + { + "epoch": 1.47, + "learning_rate": 1.5228630100481246e-05, + "loss": 0.1107, + "step": 31455 + }, + { + "epoch": 1.47, + "learning_rate": 1.5227846315426458e-05, + "loss": 0.0893, + "step": 31460 + }, + { + "epoch": 1.47, + "learning_rate": 1.5227062530371672e-05, + "loss": 0.1751, + "step": 31465 + }, + { + "epoch": 1.47, + "learning_rate": 1.5226278745316884e-05, + "loss": 0.1661, + "step": 31470 + }, + { + "epoch": 1.47, + "learning_rate": 1.52254949602621e-05, + "loss": 0.1933, + "step": 31475 + }, + { + "epoch": 1.47, + "learning_rate": 1.5224711175207312e-05, + "loss": 0.2263, + "step": 31480 + }, + { + "epoch": 1.47, + "learning_rate": 1.5223927390152526e-05, + "loss": 0.3486, + "step": 31485 + }, + { + "epoch": 1.47, + "learning_rate": 1.522314360509774e-05, + "loss": 0.0391, + "step": 31490 + }, + { + "epoch": 1.47, + "learning_rate": 1.5222359820042954e-05, + "loss": 0.0702, + "step": 31495 + }, + { + "epoch": 1.47, + "learning_rate": 1.5221576034988166e-05, + "loss": 0.0595, + "step": 31500 + }, + { + "epoch": 1.47, + "learning_rate": 1.522079224993338e-05, + "loss": 0.0184, + "step": 31505 + }, + { + "epoch": 1.47, + "learning_rate": 1.5220008464878594e-05, + "loss": 0.1383, + "step": 31510 + }, + { + "epoch": 1.47, + "learning_rate": 1.5219224679823807e-05, + "loss": 0.1519, + "step": 31515 + }, + { + "epoch": 1.47, + "learning_rate": 1.521844089476902e-05, + "loss": 0.1777, + "step": 31520 + }, + { + "epoch": 1.47, + "learning_rate": 1.5217657109714232e-05, + "loss": 0.2411, + "step": 31525 + }, + { + "epoch": 1.47, + "learning_rate": 1.5216873324659448e-05, + "loss": 0.3028, + "step": 31530 + }, + { + "epoch": 1.47, + "learning_rate": 1.521608953960466e-05, + "loss": 0.379, + "step": 31535 + }, + { + "epoch": 1.47, + "learning_rate": 1.5215305754549874e-05, + "loss": 0.0478, + "step": 31540 + }, + { + "epoch": 1.47, + "learning_rate": 1.5214521969495086e-05, + "loss": 0.0623, + "step": 31545 + }, + { + "epoch": 1.47, + "learning_rate": 1.5213738184440301e-05, + "loss": 0.0523, + "step": 31550 + }, + { + "epoch": 1.47, + "learning_rate": 1.5212954399385514e-05, + "loss": 0.1509, + "step": 31555 + }, + { + "epoch": 1.47, + "learning_rate": 1.5212170614330728e-05, + "loss": 0.1149, + "step": 31560 + }, + { + "epoch": 1.47, + "learning_rate": 1.521138682927594e-05, + "loss": 0.1829, + "step": 31565 + }, + { + "epoch": 1.47, + "learning_rate": 1.5210603044221155e-05, + "loss": 0.1609, + "step": 31570 + }, + { + "epoch": 1.47, + "learning_rate": 1.5209819259166368e-05, + "loss": 0.2295, + "step": 31575 + }, + { + "epoch": 1.47, + "learning_rate": 1.5209035474111581e-05, + "loss": 0.486, + "step": 31580 + }, + { + "epoch": 1.47, + "learning_rate": 1.5208251689056794e-05, + "loss": 0.2702, + "step": 31585 + }, + { + "epoch": 1.47, + "learning_rate": 1.5207467904002008e-05, + "loss": 0.0594, + "step": 31590 + }, + { + "epoch": 1.47, + "learning_rate": 1.5206684118947222e-05, + "loss": 0.1273, + "step": 31595 + }, + { + "epoch": 1.47, + "learning_rate": 1.5205900333892434e-05, + "loss": 0.0883, + "step": 31600 + }, + { + "epoch": 1.47, + "learning_rate": 1.5205116548837648e-05, + "loss": 0.0647, + "step": 31605 + }, + { + "epoch": 1.47, + "learning_rate": 1.5204332763782862e-05, + "loss": 0.0872, + "step": 31610 + }, + { + "epoch": 1.48, + "learning_rate": 1.5203548978728075e-05, + "loss": 0.1148, + "step": 31615 + }, + { + "epoch": 1.48, + "learning_rate": 1.5202765193673288e-05, + "loss": 0.1226, + "step": 31620 + }, + { + "epoch": 1.48, + "learning_rate": 1.5201981408618503e-05, + "loss": 0.2385, + "step": 31625 + }, + { + "epoch": 1.48, + "learning_rate": 1.5201197623563715e-05, + "loss": 0.3862, + "step": 31630 + }, + { + "epoch": 1.48, + "learning_rate": 1.520041383850893e-05, + "loss": 0.3206, + "step": 31635 + }, + { + "epoch": 1.48, + "learning_rate": 1.5199630053454142e-05, + "loss": 0.0743, + "step": 31640 + }, + { + "epoch": 1.48, + "learning_rate": 1.5198846268399357e-05, + "loss": 0.0545, + "step": 31645 + }, + { + "epoch": 1.48, + "learning_rate": 1.519806248334457e-05, + "loss": 0.0978, + "step": 31650 + }, + { + "epoch": 1.48, + "learning_rate": 1.5197278698289782e-05, + "loss": 0.0675, + "step": 31655 + }, + { + "epoch": 1.48, + "learning_rate": 1.5196494913234995e-05, + "loss": 0.1666, + "step": 31660 + }, + { + "epoch": 1.48, + "learning_rate": 1.5195711128180208e-05, + "loss": 0.2834, + "step": 31665 + }, + { + "epoch": 1.48, + "learning_rate": 1.5194927343125423e-05, + "loss": 0.1719, + "step": 31670 + }, + { + "epoch": 1.48, + "learning_rate": 1.5194143558070636e-05, + "loss": 0.2468, + "step": 31675 + }, + { + "epoch": 1.48, + "learning_rate": 1.519335977301585e-05, + "loss": 0.3821, + "step": 31680 + }, + { + "epoch": 1.48, + "learning_rate": 1.5192575987961062e-05, + "loss": 0.3466, + "step": 31685 + }, + { + "epoch": 1.48, + "learning_rate": 1.5191792202906277e-05, + "loss": 0.0438, + "step": 31690 + }, + { + "epoch": 1.48, + "learning_rate": 1.519100841785149e-05, + "loss": 0.066, + "step": 31695 + }, + { + "epoch": 1.48, + "learning_rate": 1.5190224632796703e-05, + "loss": 0.083, + "step": 31700 + }, + { + "epoch": 1.48, + "learning_rate": 1.5189440847741916e-05, + "loss": 0.0671, + "step": 31705 + }, + { + "epoch": 1.48, + "learning_rate": 1.5188657062687131e-05, + "loss": 0.1106, + "step": 31710 + }, + { + "epoch": 1.48, + "learning_rate": 1.5187873277632343e-05, + "loss": 0.1968, + "step": 31715 + }, + { + "epoch": 1.48, + "learning_rate": 1.5187089492577556e-05, + "loss": 0.209, + "step": 31720 + }, + { + "epoch": 1.48, + "learning_rate": 1.5186305707522771e-05, + "loss": 0.1886, + "step": 31725 + }, + { + "epoch": 1.48, + "learning_rate": 1.5185521922467983e-05, + "loss": 0.3375, + "step": 31730 + }, + { + "epoch": 1.48, + "learning_rate": 1.5184738137413197e-05, + "loss": 0.2246, + "step": 31735 + }, + { + "epoch": 1.48, + "learning_rate": 1.518395435235841e-05, + "loss": 0.0597, + "step": 31740 + }, + { + "epoch": 1.48, + "learning_rate": 1.5183170567303625e-05, + "loss": 0.0915, + "step": 31745 + }, + { + "epoch": 1.48, + "learning_rate": 1.5182386782248837e-05, + "loss": 0.1168, + "step": 31750 + }, + { + "epoch": 1.48, + "learning_rate": 1.5181602997194051e-05, + "loss": 0.0906, + "step": 31755 + }, + { + "epoch": 1.48, + "learning_rate": 1.5180819212139263e-05, + "loss": 0.0936, + "step": 31760 + }, + { + "epoch": 1.48, + "learning_rate": 1.5180035427084479e-05, + "loss": 0.1309, + "step": 31765 + }, + { + "epoch": 1.48, + "learning_rate": 1.5179251642029691e-05, + "loss": 0.149, + "step": 31770 + }, + { + "epoch": 1.48, + "learning_rate": 1.5178467856974905e-05, + "loss": 0.2683, + "step": 31775 + }, + { + "epoch": 1.48, + "learning_rate": 1.5177684071920117e-05, + "loss": 0.4699, + "step": 31780 + }, + { + "epoch": 1.48, + "learning_rate": 1.517690028686533e-05, + "loss": 0.2512, + "step": 31785 + }, + { + "epoch": 1.48, + "learning_rate": 1.5176116501810545e-05, + "loss": 0.059, + "step": 31790 + }, + { + "epoch": 1.48, + "learning_rate": 1.5175332716755757e-05, + "loss": 0.0595, + "step": 31795 + }, + { + "epoch": 1.48, + "learning_rate": 1.5174548931700971e-05, + "loss": 0.0786, + "step": 31800 + }, + { + "epoch": 1.48, + "learning_rate": 1.5173765146646185e-05, + "loss": 0.1279, + "step": 31805 + }, + { + "epoch": 1.48, + "learning_rate": 1.5172981361591399e-05, + "loss": 0.2461, + "step": 31810 + }, + { + "epoch": 1.48, + "learning_rate": 1.5172197576536611e-05, + "loss": 0.1358, + "step": 31815 + }, + { + "epoch": 1.48, + "learning_rate": 1.5171413791481825e-05, + "loss": 0.1626, + "step": 31820 + }, + { + "epoch": 1.48, + "learning_rate": 1.5170630006427039e-05, + "loss": 0.1351, + "step": 31825 + }, + { + "epoch": 1.49, + "learning_rate": 1.5169846221372253e-05, + "loss": 0.3434, + "step": 31830 + }, + { + "epoch": 1.49, + "learning_rate": 1.5169062436317465e-05, + "loss": 0.2988, + "step": 31835 + }, + { + "epoch": 1.49, + "learning_rate": 1.5168278651262681e-05, + "loss": 0.0382, + "step": 31840 + }, + { + "epoch": 1.49, + "learning_rate": 1.5167494866207893e-05, + "loss": 0.0455, + "step": 31845 + }, + { + "epoch": 1.49, + "learning_rate": 1.5166711081153105e-05, + "loss": 0.0726, + "step": 31850 + }, + { + "epoch": 1.49, + "learning_rate": 1.516592729609832e-05, + "loss": 0.0954, + "step": 31855 + }, + { + "epoch": 1.49, + "learning_rate": 1.5165143511043531e-05, + "loss": 0.0936, + "step": 31860 + }, + { + "epoch": 1.49, + "learning_rate": 1.5164359725988747e-05, + "loss": 0.1024, + "step": 31865 + }, + { + "epoch": 1.49, + "learning_rate": 1.516357594093396e-05, + "loss": 0.1589, + "step": 31870 + }, + { + "epoch": 1.49, + "learning_rate": 1.5162792155879173e-05, + "loss": 0.2689, + "step": 31875 + }, + { + "epoch": 1.49, + "learning_rate": 1.5162008370824385e-05, + "loss": 0.3105, + "step": 31880 + }, + { + "epoch": 1.49, + "learning_rate": 1.5161224585769601e-05, + "loss": 0.295, + "step": 31885 + }, + { + "epoch": 1.49, + "learning_rate": 1.5160440800714813e-05, + "loss": 0.0471, + "step": 31890 + }, + { + "epoch": 1.49, + "learning_rate": 1.5159657015660027e-05, + "loss": 0.0563, + "step": 31895 + }, + { + "epoch": 1.49, + "learning_rate": 1.515887323060524e-05, + "loss": 0.0521, + "step": 31900 + }, + { + "epoch": 1.49, + "learning_rate": 1.5158089445550455e-05, + "loss": 0.1391, + "step": 31905 + }, + { + "epoch": 1.49, + "learning_rate": 1.5157305660495667e-05, + "loss": 0.0926, + "step": 31910 + }, + { + "epoch": 1.49, + "learning_rate": 1.515652187544088e-05, + "loss": 0.1678, + "step": 31915 + }, + { + "epoch": 1.49, + "learning_rate": 1.5155738090386093e-05, + "loss": 0.1874, + "step": 31920 + }, + { + "epoch": 1.49, + "learning_rate": 1.5154954305331307e-05, + "loss": 0.1741, + "step": 31925 + }, + { + "epoch": 1.49, + "learning_rate": 1.5154170520276521e-05, + "loss": 0.362, + "step": 31930 + }, + { + "epoch": 1.49, + "learning_rate": 1.5153386735221733e-05, + "loss": 0.3195, + "step": 31935 + }, + { + "epoch": 1.49, + "learning_rate": 1.5152602950166949e-05, + "loss": 0.0607, + "step": 31940 + }, + { + "epoch": 1.49, + "learning_rate": 1.5151819165112161e-05, + "loss": 0.0734, + "step": 31945 + }, + { + "epoch": 1.49, + "learning_rate": 1.5151035380057375e-05, + "loss": 0.096, + "step": 31950 + }, + { + "epoch": 1.49, + "learning_rate": 1.5150251595002587e-05, + "loss": 0.1064, + "step": 31955 + }, + { + "epoch": 1.49, + "learning_rate": 1.5149467809947803e-05, + "loss": 0.1528, + "step": 31960 + }, + { + "epoch": 1.49, + "learning_rate": 1.5148684024893015e-05, + "loss": 0.2397, + "step": 31965 + }, + { + "epoch": 1.49, + "learning_rate": 1.5147900239838229e-05, + "loss": 0.2079, + "step": 31970 + }, + { + "epoch": 1.49, + "learning_rate": 1.5147116454783441e-05, + "loss": 0.2747, + "step": 31975 + }, + { + "epoch": 1.49, + "learning_rate": 1.5146332669728653e-05, + "loss": 0.4375, + "step": 31980 + }, + { + "epoch": 1.49, + "learning_rate": 1.5145548884673869e-05, + "loss": 0.348, + "step": 31985 + }, + { + "epoch": 1.49, + "learning_rate": 1.5144765099619081e-05, + "loss": 0.0462, + "step": 31990 + }, + { + "epoch": 1.49, + "learning_rate": 1.5143981314564295e-05, + "loss": 0.078, + "step": 31995 + }, + { + "epoch": 1.49, + "learning_rate": 1.5143197529509507e-05, + "loss": 0.0692, + "step": 32000 + }, + { + "epoch": 1.49, + "learning_rate": 1.5142413744454723e-05, + "loss": 0.0825, + "step": 32005 + }, + { + "epoch": 1.49, + "learning_rate": 1.5141629959399935e-05, + "loss": 0.138, + "step": 32010 + }, + { + "epoch": 1.49, + "learning_rate": 1.5140846174345149e-05, + "loss": 0.1539, + "step": 32015 + }, + { + "epoch": 1.49, + "learning_rate": 1.5140062389290361e-05, + "loss": 0.2308, + "step": 32020 + }, + { + "epoch": 1.49, + "learning_rate": 1.5139278604235577e-05, + "loss": 0.2213, + "step": 32025 + }, + { + "epoch": 1.49, + "learning_rate": 1.5138494819180789e-05, + "loss": 0.2618, + "step": 32030 + }, + { + "epoch": 1.49, + "learning_rate": 1.5137711034126003e-05, + "loss": 0.3362, + "step": 32035 + }, + { + "epoch": 1.5, + "learning_rate": 1.5136927249071217e-05, + "loss": 0.0699, + "step": 32040 + }, + { + "epoch": 1.5, + "learning_rate": 1.5136143464016429e-05, + "loss": 0.0228, + "step": 32045 + }, + { + "epoch": 1.5, + "learning_rate": 1.5135359678961643e-05, + "loss": 0.046, + "step": 32050 + }, + { + "epoch": 1.5, + "learning_rate": 1.5134575893906855e-05, + "loss": 0.0907, + "step": 32055 + }, + { + "epoch": 1.5, + "learning_rate": 1.513379210885207e-05, + "loss": 0.1112, + "step": 32060 + }, + { + "epoch": 1.5, + "learning_rate": 1.5133008323797283e-05, + "loss": 0.1013, + "step": 32065 + }, + { + "epoch": 1.5, + "learning_rate": 1.5132224538742497e-05, + "loss": 0.1556, + "step": 32070 + }, + { + "epoch": 1.5, + "learning_rate": 1.5131440753687709e-05, + "loss": 0.24, + "step": 32075 + }, + { + "epoch": 1.5, + "learning_rate": 1.5130656968632925e-05, + "loss": 0.5122, + "step": 32080 + }, + { + "epoch": 1.5, + "learning_rate": 1.5129873183578137e-05, + "loss": 0.2973, + "step": 32085 + }, + { + "epoch": 1.5, + "learning_rate": 1.512908939852335e-05, + "loss": 0.0492, + "step": 32090 + }, + { + "epoch": 1.5, + "learning_rate": 1.5128305613468563e-05, + "loss": 0.0999, + "step": 32095 + }, + { + "epoch": 1.5, + "learning_rate": 1.5127521828413779e-05, + "loss": 0.0568, + "step": 32100 + }, + { + "epoch": 1.5, + "learning_rate": 1.512673804335899e-05, + "loss": 0.0834, + "step": 32105 + }, + { + "epoch": 1.5, + "learning_rate": 1.5125954258304203e-05, + "loss": 0.0824, + "step": 32110 + }, + { + "epoch": 1.5, + "learning_rate": 1.5125170473249417e-05, + "loss": 0.2455, + "step": 32115 + }, + { + "epoch": 1.5, + "learning_rate": 1.512438668819463e-05, + "loss": 0.1645, + "step": 32120 + }, + { + "epoch": 1.5, + "learning_rate": 1.5123602903139845e-05, + "loss": 0.2583, + "step": 32125 + }, + { + "epoch": 1.5, + "learning_rate": 1.5122819118085057e-05, + "loss": 0.3227, + "step": 32130 + }, + { + "epoch": 1.5, + "learning_rate": 1.512203533303027e-05, + "loss": 0.3561, + "step": 32135 + }, + { + "epoch": 1.5, + "learning_rate": 1.5121251547975485e-05, + "loss": 0.0648, + "step": 32140 + }, + { + "epoch": 1.5, + "learning_rate": 1.5120467762920699e-05, + "loss": 0.016, + "step": 32145 + }, + { + "epoch": 1.5, + "learning_rate": 1.511968397786591e-05, + "loss": 0.1238, + "step": 32150 + }, + { + "epoch": 1.5, + "learning_rate": 1.5118900192811126e-05, + "loss": 0.0738, + "step": 32155 + }, + { + "epoch": 1.5, + "learning_rate": 1.5118116407756339e-05, + "loss": 0.113, + "step": 32160 + }, + { + "epoch": 1.5, + "learning_rate": 1.5117332622701553e-05, + "loss": 0.0999, + "step": 32165 + }, + { + "epoch": 1.5, + "learning_rate": 1.5116548837646765e-05, + "loss": 0.2245, + "step": 32170 + }, + { + "epoch": 1.5, + "learning_rate": 1.5115765052591977e-05, + "loss": 0.3073, + "step": 32175 + }, + { + "epoch": 1.5, + "learning_rate": 1.5114981267537193e-05, + "loss": 0.2557, + "step": 32180 + }, + { + "epoch": 1.5, + "learning_rate": 1.5114197482482405e-05, + "loss": 0.2607, + "step": 32185 + }, + { + "epoch": 1.5, + "learning_rate": 1.5113413697427619e-05, + "loss": 0.064, + "step": 32190 + }, + { + "epoch": 1.5, + "learning_rate": 1.5112629912372831e-05, + "loss": 0.076, + "step": 32195 + }, + { + "epoch": 1.5, + "learning_rate": 1.5111846127318046e-05, + "loss": 0.1157, + "step": 32200 + }, + { + "epoch": 1.5, + "learning_rate": 1.5111062342263259e-05, + "loss": 0.075, + "step": 32205 + }, + { + "epoch": 1.5, + "learning_rate": 1.5110278557208473e-05, + "loss": 0.1556, + "step": 32210 + }, + { + "epoch": 1.5, + "learning_rate": 1.5109494772153685e-05, + "loss": 0.1983, + "step": 32215 + }, + { + "epoch": 1.5, + "learning_rate": 1.51087109870989e-05, + "loss": 0.1951, + "step": 32220 + }, + { + "epoch": 1.5, + "learning_rate": 1.5107927202044113e-05, + "loss": 0.2283, + "step": 32225 + }, + { + "epoch": 1.5, + "learning_rate": 1.5107143416989327e-05, + "loss": 0.3268, + "step": 32230 + }, + { + "epoch": 1.5, + "learning_rate": 1.5106359631934539e-05, + "loss": 0.2575, + "step": 32235 + }, + { + "epoch": 1.5, + "learning_rate": 1.5105575846879753e-05, + "loss": 0.046, + "step": 32240 + }, + { + "epoch": 1.5, + "learning_rate": 1.5104792061824967e-05, + "loss": 0.0554, + "step": 32245 + }, + { + "epoch": 1.5, + "learning_rate": 1.5104008276770179e-05, + "loss": 0.0833, + "step": 32250 + }, + { + "epoch": 1.51, + "learning_rate": 1.5103224491715394e-05, + "loss": 0.0969, + "step": 32255 + }, + { + "epoch": 1.51, + "learning_rate": 1.5102440706660607e-05, + "loss": 0.1418, + "step": 32260 + }, + { + "epoch": 1.51, + "learning_rate": 1.510165692160582e-05, + "loss": 0.1293, + "step": 32265 + }, + { + "epoch": 1.51, + "learning_rate": 1.5100873136551033e-05, + "loss": 0.0968, + "step": 32270 + }, + { + "epoch": 1.51, + "learning_rate": 1.5100089351496248e-05, + "loss": 0.3261, + "step": 32275 + }, + { + "epoch": 1.51, + "learning_rate": 1.509930556644146e-05, + "loss": 0.283, + "step": 32280 + }, + { + "epoch": 1.51, + "learning_rate": 1.5098521781386674e-05, + "loss": 0.3071, + "step": 32285 + }, + { + "epoch": 1.51, + "learning_rate": 1.5097737996331887e-05, + "loss": 0.072, + "step": 32290 + }, + { + "epoch": 1.51, + "learning_rate": 1.5096954211277102e-05, + "loss": 0.1006, + "step": 32295 + }, + { + "epoch": 1.51, + "learning_rate": 1.5096170426222314e-05, + "loss": 0.1003, + "step": 32300 + }, + { + "epoch": 1.51, + "learning_rate": 1.5095386641167527e-05, + "loss": 0.1151, + "step": 32305 + }, + { + "epoch": 1.51, + "learning_rate": 1.509460285611274e-05, + "loss": 0.1184, + "step": 32310 + }, + { + "epoch": 1.51, + "learning_rate": 1.5093819071057953e-05, + "loss": 0.1667, + "step": 32315 + }, + { + "epoch": 1.51, + "learning_rate": 1.5093035286003168e-05, + "loss": 0.1405, + "step": 32320 + }, + { + "epoch": 1.51, + "learning_rate": 1.509225150094838e-05, + "loss": 0.2462, + "step": 32325 + }, + { + "epoch": 1.51, + "learning_rate": 1.5091467715893594e-05, + "loss": 0.2843, + "step": 32330 + }, + { + "epoch": 1.51, + "learning_rate": 1.5090683930838808e-05, + "loss": 0.2082, + "step": 32335 + }, + { + "epoch": 1.51, + "learning_rate": 1.5089900145784022e-05, + "loss": 0.0373, + "step": 32340 + }, + { + "epoch": 1.51, + "learning_rate": 1.5089116360729235e-05, + "loss": 0.0384, + "step": 32345 + }, + { + "epoch": 1.51, + "learning_rate": 1.5088332575674448e-05, + "loss": 0.0444, + "step": 32350 + }, + { + "epoch": 1.51, + "learning_rate": 1.5087548790619662e-05, + "loss": 0.0799, + "step": 32355 + }, + { + "epoch": 1.51, + "learning_rate": 1.5086765005564876e-05, + "loss": 0.1469, + "step": 32360 + }, + { + "epoch": 1.51, + "learning_rate": 1.5085981220510088e-05, + "loss": 0.1759, + "step": 32365 + }, + { + "epoch": 1.51, + "learning_rate": 1.50851974354553e-05, + "loss": 0.1966, + "step": 32370 + }, + { + "epoch": 1.51, + "learning_rate": 1.5084413650400516e-05, + "loss": 0.2456, + "step": 32375 + }, + { + "epoch": 1.51, + "learning_rate": 1.5083629865345728e-05, + "loss": 0.3569, + "step": 32380 + }, + { + "epoch": 1.51, + "learning_rate": 1.5082846080290942e-05, + "loss": 0.2519, + "step": 32385 + }, + { + "epoch": 1.51, + "learning_rate": 1.5082062295236155e-05, + "loss": 0.0463, + "step": 32390 + }, + { + "epoch": 1.51, + "learning_rate": 1.508127851018137e-05, + "loss": 0.0796, + "step": 32395 + }, + { + "epoch": 1.51, + "learning_rate": 1.5080494725126582e-05, + "loss": 0.0918, + "step": 32400 + }, + { + "epoch": 1.51, + "learning_rate": 1.5079710940071796e-05, + "loss": 0.0762, + "step": 32405 + }, + { + "epoch": 1.51, + "learning_rate": 1.5078927155017009e-05, + "loss": 0.0586, + "step": 32410 + }, + { + "epoch": 1.51, + "learning_rate": 1.5078143369962224e-05, + "loss": 0.1551, + "step": 32415 + }, + { + "epoch": 1.51, + "learning_rate": 1.5077359584907436e-05, + "loss": 0.2989, + "step": 32420 + }, + { + "epoch": 1.51, + "learning_rate": 1.507657579985265e-05, + "loss": 0.2065, + "step": 32425 + }, + { + "epoch": 1.51, + "learning_rate": 1.5075792014797862e-05, + "loss": 0.3864, + "step": 32430 + }, + { + "epoch": 1.51, + "learning_rate": 1.5075008229743076e-05, + "loss": 0.2009, + "step": 32435 + }, + { + "epoch": 1.51, + "learning_rate": 1.507422444468829e-05, + "loss": 0.0476, + "step": 32440 + }, + { + "epoch": 1.51, + "learning_rate": 1.5073440659633502e-05, + "loss": 0.1002, + "step": 32445 + }, + { + "epoch": 1.51, + "learning_rate": 1.5072656874578716e-05, + "loss": 0.1062, + "step": 32450 + }, + { + "epoch": 1.51, + "learning_rate": 1.507187308952393e-05, + "loss": 0.0847, + "step": 32455 + }, + { + "epoch": 1.51, + "learning_rate": 1.5071089304469144e-05, + "loss": 0.0809, + "step": 32460 + }, + { + "epoch": 1.51, + "learning_rate": 1.5070305519414356e-05, + "loss": 0.1638, + "step": 32465 + }, + { + "epoch": 1.52, + "learning_rate": 1.5069521734359572e-05, + "loss": 0.2693, + "step": 32470 + }, + { + "epoch": 1.52, + "learning_rate": 1.5068737949304784e-05, + "loss": 0.2582, + "step": 32475 + }, + { + "epoch": 1.52, + "learning_rate": 1.5067954164249998e-05, + "loss": 0.2734, + "step": 32480 + }, + { + "epoch": 1.52, + "learning_rate": 1.506717037919521e-05, + "loss": 0.2822, + "step": 32485 + }, + { + "epoch": 1.52, + "learning_rate": 1.5066386594140426e-05, + "loss": 0.0517, + "step": 32490 + }, + { + "epoch": 1.52, + "learning_rate": 1.5065602809085638e-05, + "loss": 0.0486, + "step": 32495 + }, + { + "epoch": 1.52, + "learning_rate": 1.506481902403085e-05, + "loss": 0.0478, + "step": 32500 + }, + { + "epoch": 1.52, + "learning_rate": 1.5064035238976064e-05, + "loss": 0.1418, + "step": 32505 + }, + { + "epoch": 1.52, + "learning_rate": 1.5063251453921276e-05, + "loss": 0.1221, + "step": 32510 + }, + { + "epoch": 1.52, + "learning_rate": 1.5062467668866492e-05, + "loss": 0.168, + "step": 32515 + }, + { + "epoch": 1.52, + "learning_rate": 1.5061683883811704e-05, + "loss": 0.196, + "step": 32520 + }, + { + "epoch": 1.52, + "learning_rate": 1.5060900098756918e-05, + "loss": 0.2721, + "step": 32525 + }, + { + "epoch": 1.52, + "learning_rate": 1.506011631370213e-05, + "loss": 0.3576, + "step": 32530 + }, + { + "epoch": 1.52, + "learning_rate": 1.5059332528647346e-05, + "loss": 0.3454, + "step": 32535 + }, + { + "epoch": 1.52, + "learning_rate": 1.5058548743592558e-05, + "loss": 0.0558, + "step": 32540 + }, + { + "epoch": 1.52, + "learning_rate": 1.5057764958537772e-05, + "loss": 0.0572, + "step": 32545 + }, + { + "epoch": 1.52, + "learning_rate": 1.5056981173482984e-05, + "loss": 0.0613, + "step": 32550 + }, + { + "epoch": 1.52, + "learning_rate": 1.50561973884282e-05, + "loss": 0.1226, + "step": 32555 + }, + { + "epoch": 1.52, + "learning_rate": 1.5055413603373412e-05, + "loss": 0.1035, + "step": 32560 + }, + { + "epoch": 1.52, + "learning_rate": 1.5054629818318624e-05, + "loss": 0.0922, + "step": 32565 + }, + { + "epoch": 1.52, + "learning_rate": 1.505384603326384e-05, + "loss": 0.2358, + "step": 32570 + }, + { + "epoch": 1.52, + "learning_rate": 1.5053062248209052e-05, + "loss": 0.1497, + "step": 32575 + }, + { + "epoch": 1.52, + "learning_rate": 1.5052278463154266e-05, + "loss": 0.3272, + "step": 32580 + }, + { + "epoch": 1.52, + "learning_rate": 1.5051494678099478e-05, + "loss": 0.3985, + "step": 32585 + }, + { + "epoch": 1.52, + "learning_rate": 1.5050710893044694e-05, + "loss": 0.044, + "step": 32590 + }, + { + "epoch": 1.52, + "learning_rate": 1.5049927107989906e-05, + "loss": 0.1202, + "step": 32595 + }, + { + "epoch": 1.52, + "learning_rate": 1.504914332293512e-05, + "loss": 0.1429, + "step": 32600 + }, + { + "epoch": 1.52, + "learning_rate": 1.5048359537880332e-05, + "loss": 0.0826, + "step": 32605 + }, + { + "epoch": 1.52, + "learning_rate": 1.5047575752825548e-05, + "loss": 0.1068, + "step": 32610 + }, + { + "epoch": 1.52, + "learning_rate": 1.504679196777076e-05, + "loss": 0.2536, + "step": 32615 + }, + { + "epoch": 1.52, + "learning_rate": 1.5046008182715974e-05, + "loss": 0.2464, + "step": 32620 + }, + { + "epoch": 1.52, + "learning_rate": 1.5045224397661186e-05, + "loss": 0.4023, + "step": 32625 + }, + { + "epoch": 1.52, + "learning_rate": 1.5044440612606398e-05, + "loss": 0.4528, + "step": 32630 + }, + { + "epoch": 1.52, + "learning_rate": 1.5043656827551614e-05, + "loss": 0.2417, + "step": 32635 + }, + { + "epoch": 1.52, + "learning_rate": 1.5042873042496826e-05, + "loss": 0.0292, + "step": 32640 + }, + { + "epoch": 1.52, + "learning_rate": 1.504208925744204e-05, + "loss": 0.0988, + "step": 32645 + }, + { + "epoch": 1.52, + "learning_rate": 1.5041305472387254e-05, + "loss": 0.067, + "step": 32650 + }, + { + "epoch": 1.52, + "learning_rate": 1.5040521687332468e-05, + "loss": 0.1081, + "step": 32655 + }, + { + "epoch": 1.52, + "learning_rate": 1.503973790227768e-05, + "loss": 0.1431, + "step": 32660 + }, + { + "epoch": 1.52, + "learning_rate": 1.5038954117222894e-05, + "loss": 0.1415, + "step": 32665 + }, + { + "epoch": 1.52, + "learning_rate": 1.5038170332168108e-05, + "loss": 0.1489, + "step": 32670 + }, + { + "epoch": 1.52, + "learning_rate": 1.5037386547113322e-05, + "loss": 0.187, + "step": 32675 + }, + { + "epoch": 1.52, + "learning_rate": 1.5036602762058534e-05, + "loss": 0.3941, + "step": 32680 + }, + { + "epoch": 1.53, + "learning_rate": 1.503581897700375e-05, + "loss": 0.267, + "step": 32685 + }, + { + "epoch": 1.53, + "learning_rate": 1.5035035191948962e-05, + "loss": 0.0578, + "step": 32690 + }, + { + "epoch": 1.53, + "learning_rate": 1.5034251406894174e-05, + "loss": 0.0391, + "step": 32695 + }, + { + "epoch": 1.53, + "learning_rate": 1.5033467621839388e-05, + "loss": 0.0686, + "step": 32700 + }, + { + "epoch": 1.53, + "learning_rate": 1.50326838367846e-05, + "loss": 0.1086, + "step": 32705 + }, + { + "epoch": 1.53, + "learning_rate": 1.5031900051729816e-05, + "loss": 0.1141, + "step": 32710 + }, + { + "epoch": 1.53, + "learning_rate": 1.5031116266675028e-05, + "loss": 0.1051, + "step": 32715 + }, + { + "epoch": 1.53, + "learning_rate": 1.5030332481620242e-05, + "loss": 0.2131, + "step": 32720 + }, + { + "epoch": 1.53, + "learning_rate": 1.5029548696565454e-05, + "loss": 0.2887, + "step": 32725 + }, + { + "epoch": 1.53, + "learning_rate": 1.502876491151067e-05, + "loss": 0.4061, + "step": 32730 + }, + { + "epoch": 1.53, + "learning_rate": 1.5027981126455882e-05, + "loss": 0.3098, + "step": 32735 + }, + { + "epoch": 1.53, + "learning_rate": 1.5027197341401096e-05, + "loss": 0.0542, + "step": 32740 + }, + { + "epoch": 1.53, + "learning_rate": 1.5026413556346308e-05, + "loss": 0.0951, + "step": 32745 + }, + { + "epoch": 1.53, + "learning_rate": 1.5025629771291524e-05, + "loss": 0.0702, + "step": 32750 + }, + { + "epoch": 1.53, + "learning_rate": 1.5024845986236736e-05, + "loss": 0.0914, + "step": 32755 + }, + { + "epoch": 1.53, + "learning_rate": 1.5024062201181948e-05, + "loss": 0.1323, + "step": 32760 + }, + { + "epoch": 1.53, + "learning_rate": 1.5023278416127162e-05, + "loss": 0.1518, + "step": 32765 + }, + { + "epoch": 1.53, + "learning_rate": 1.5022494631072376e-05, + "loss": 0.2011, + "step": 32770 + }, + { + "epoch": 1.53, + "learning_rate": 1.502171084601759e-05, + "loss": 0.1865, + "step": 32775 + }, + { + "epoch": 1.53, + "learning_rate": 1.5020927060962802e-05, + "loss": 0.328, + "step": 32780 + }, + { + "epoch": 1.53, + "learning_rate": 1.5020143275908018e-05, + "loss": 0.3728, + "step": 32785 + }, + { + "epoch": 1.53, + "learning_rate": 1.501935949085323e-05, + "loss": 0.0544, + "step": 32790 + }, + { + "epoch": 1.53, + "learning_rate": 1.5018575705798444e-05, + "loss": 0.0543, + "step": 32795 + }, + { + "epoch": 1.53, + "learning_rate": 1.5017791920743656e-05, + "loss": 0.0897, + "step": 32800 + }, + { + "epoch": 1.53, + "learning_rate": 1.5017008135688871e-05, + "loss": 0.1329, + "step": 32805 + }, + { + "epoch": 1.53, + "learning_rate": 1.5016224350634084e-05, + "loss": 0.0871, + "step": 32810 + }, + { + "epoch": 1.53, + "learning_rate": 1.5015440565579298e-05, + "loss": 0.1539, + "step": 32815 + }, + { + "epoch": 1.53, + "learning_rate": 1.501465678052451e-05, + "loss": 0.2018, + "step": 32820 + }, + { + "epoch": 1.53, + "learning_rate": 1.5013872995469722e-05, + "loss": 0.1788, + "step": 32825 + }, + { + "epoch": 1.53, + "learning_rate": 1.5013089210414938e-05, + "loss": 0.4134, + "step": 32830 + }, + { + "epoch": 1.53, + "learning_rate": 1.501230542536015e-05, + "loss": 0.3125, + "step": 32835 + }, + { + "epoch": 1.53, + "learning_rate": 1.5011521640305364e-05, + "loss": 0.0423, + "step": 32840 + }, + { + "epoch": 1.53, + "learning_rate": 1.5010737855250576e-05, + "loss": 0.0694, + "step": 32845 + }, + { + "epoch": 1.53, + "learning_rate": 1.5009954070195792e-05, + "loss": 0.0747, + "step": 32850 + }, + { + "epoch": 1.53, + "learning_rate": 1.5009170285141004e-05, + "loss": 0.1646, + "step": 32855 + }, + { + "epoch": 1.53, + "learning_rate": 1.5008386500086218e-05, + "loss": 0.1413, + "step": 32860 + }, + { + "epoch": 1.53, + "learning_rate": 1.500760271503143e-05, + "loss": 0.1363, + "step": 32865 + }, + { + "epoch": 1.53, + "learning_rate": 1.5006818929976645e-05, + "loss": 0.1389, + "step": 32870 + }, + { + "epoch": 1.53, + "learning_rate": 1.5006035144921858e-05, + "loss": 0.313, + "step": 32875 + }, + { + "epoch": 1.53, + "learning_rate": 1.5005251359867072e-05, + "loss": 0.4249, + "step": 32880 + }, + { + "epoch": 1.53, + "learning_rate": 1.5004467574812286e-05, + "loss": 0.2864, + "step": 32885 + }, + { + "epoch": 1.53, + "learning_rate": 1.5003683789757498e-05, + "loss": 0.0551, + "step": 32890 + }, + { + "epoch": 1.53, + "learning_rate": 1.5002900004702712e-05, + "loss": 0.0842, + "step": 32895 + }, + { + "epoch": 1.54, + "learning_rate": 1.5002116219647924e-05, + "loss": 0.0557, + "step": 32900 + }, + { + "epoch": 1.54, + "learning_rate": 1.500133243459314e-05, + "loss": 0.1035, + "step": 32905 + }, + { + "epoch": 1.54, + "learning_rate": 1.5000548649538352e-05, + "loss": 0.0518, + "step": 32910 + }, + { + "epoch": 1.54, + "learning_rate": 1.4999764864483566e-05, + "loss": 0.1531, + "step": 32915 + }, + { + "epoch": 1.54, + "learning_rate": 1.4998981079428778e-05, + "loss": 0.216, + "step": 32920 + }, + { + "epoch": 1.54, + "learning_rate": 1.4998197294373993e-05, + "loss": 0.1716, + "step": 32925 + }, + { + "epoch": 1.54, + "learning_rate": 1.4997413509319206e-05, + "loss": 0.39, + "step": 32930 + }, + { + "epoch": 1.54, + "learning_rate": 1.499662972426442e-05, + "loss": 0.2803, + "step": 32935 + }, + { + "epoch": 1.54, + "learning_rate": 1.4995845939209632e-05, + "loss": 0.1181, + "step": 32940 + }, + { + "epoch": 1.54, + "learning_rate": 1.4995062154154847e-05, + "loss": 0.0665, + "step": 32945 + }, + { + "epoch": 1.54, + "learning_rate": 1.499427836910006e-05, + "loss": 0.0879, + "step": 32950 + }, + { + "epoch": 1.54, + "learning_rate": 1.4993494584045272e-05, + "loss": 0.111, + "step": 32955 + }, + { + "epoch": 1.54, + "learning_rate": 1.4992710798990486e-05, + "loss": 0.1512, + "step": 32960 + }, + { + "epoch": 1.54, + "learning_rate": 1.49919270139357e-05, + "loss": 0.1847, + "step": 32965 + }, + { + "epoch": 1.54, + "learning_rate": 1.4991143228880913e-05, + "loss": 0.1602, + "step": 32970 + }, + { + "epoch": 1.54, + "learning_rate": 1.4990359443826126e-05, + "loss": 0.3279, + "step": 32975 + }, + { + "epoch": 1.54, + "learning_rate": 1.498957565877134e-05, + "loss": 0.2164, + "step": 32980 + }, + { + "epoch": 1.54, + "learning_rate": 1.4988791873716553e-05, + "loss": 0.2688, + "step": 32985 + }, + { + "epoch": 1.54, + "learning_rate": 1.4988008088661767e-05, + "loss": 0.0506, + "step": 32990 + }, + { + "epoch": 1.54, + "learning_rate": 1.498722430360698e-05, + "loss": 0.0692, + "step": 32995 + }, + { + "epoch": 1.54, + "learning_rate": 1.4986440518552195e-05, + "loss": 0.1243, + "step": 33000 + }, + { + "epoch": 1.54, + "learning_rate": 1.4985656733497407e-05, + "loss": 0.1195, + "step": 33005 + }, + { + "epoch": 1.54, + "learning_rate": 1.4984872948442621e-05, + "loss": 0.1462, + "step": 33010 + }, + { + "epoch": 1.54, + "learning_rate": 1.4984089163387834e-05, + "loss": 0.121, + "step": 33015 + }, + { + "epoch": 1.54, + "learning_rate": 1.4983305378333046e-05, + "loss": 0.1771, + "step": 33020 + }, + { + "epoch": 1.54, + "learning_rate": 1.4982521593278261e-05, + "loss": 0.1746, + "step": 33025 + }, + { + "epoch": 1.54, + "learning_rate": 1.4981737808223474e-05, + "loss": 0.3095, + "step": 33030 + }, + { + "epoch": 1.54, + "learning_rate": 1.4980954023168687e-05, + "loss": 0.2838, + "step": 33035 + }, + { + "epoch": 1.54, + "learning_rate": 1.49801702381139e-05, + "loss": 0.0275, + "step": 33040 + }, + { + "epoch": 1.54, + "learning_rate": 1.4979386453059115e-05, + "loss": 0.0868, + "step": 33045 + }, + { + "epoch": 1.54, + "learning_rate": 1.4978602668004327e-05, + "loss": 0.0809, + "step": 33050 + }, + { + "epoch": 1.54, + "learning_rate": 1.4977818882949541e-05, + "loss": 0.1187, + "step": 33055 + }, + { + "epoch": 1.54, + "learning_rate": 1.4977035097894754e-05, + "loss": 0.1628, + "step": 33060 + }, + { + "epoch": 1.54, + "learning_rate": 1.497625131283997e-05, + "loss": 0.1992, + "step": 33065 + }, + { + "epoch": 1.54, + "learning_rate": 1.4975467527785181e-05, + "loss": 0.2054, + "step": 33070 + }, + { + "epoch": 1.54, + "learning_rate": 1.4974683742730395e-05, + "loss": 0.2641, + "step": 33075 + }, + { + "epoch": 1.54, + "learning_rate": 1.4973899957675608e-05, + "loss": 0.4842, + "step": 33080 + }, + { + "epoch": 1.54, + "learning_rate": 1.4973116172620821e-05, + "loss": 0.4444, + "step": 33085 + }, + { + "epoch": 1.54, + "learning_rate": 1.4972332387566035e-05, + "loss": 0.0517, + "step": 33090 + }, + { + "epoch": 1.54, + "learning_rate": 1.4971548602511248e-05, + "loss": 0.0611, + "step": 33095 + }, + { + "epoch": 1.54, + "learning_rate": 1.4970764817456463e-05, + "loss": 0.0702, + "step": 33100 + }, + { + "epoch": 1.54, + "learning_rate": 1.4969981032401675e-05, + "loss": 0.143, + "step": 33105 + }, + { + "epoch": 1.54, + "learning_rate": 1.496919724734689e-05, + "loss": 0.1304, + "step": 33110 + }, + { + "epoch": 1.55, + "learning_rate": 1.4968413462292101e-05, + "loss": 0.1834, + "step": 33115 + }, + { + "epoch": 1.55, + "learning_rate": 1.4967629677237317e-05, + "loss": 0.1337, + "step": 33120 + }, + { + "epoch": 1.55, + "learning_rate": 1.496684589218253e-05, + "loss": 0.213, + "step": 33125 + }, + { + "epoch": 1.55, + "learning_rate": 1.49662188641387e-05, + "loss": 0.3345, + "step": 33130 + }, + { + "epoch": 1.55, + "learning_rate": 1.4965435079083914e-05, + "loss": 0.307, + "step": 33135 + }, + { + "epoch": 1.55, + "learning_rate": 1.4964651294029126e-05, + "loss": 0.063, + "step": 33140 + }, + { + "epoch": 1.55, + "learning_rate": 1.496386750897434e-05, + "loss": 0.0567, + "step": 33145 + }, + { + "epoch": 1.55, + "learning_rate": 1.4963083723919554e-05, + "loss": 0.0812, + "step": 33150 + }, + { + "epoch": 1.55, + "learning_rate": 1.4962299938864766e-05, + "loss": 0.0735, + "step": 33155 + }, + { + "epoch": 1.55, + "learning_rate": 1.4961516153809981e-05, + "loss": 0.1804, + "step": 33160 + }, + { + "epoch": 1.55, + "learning_rate": 1.4960732368755194e-05, + "loss": 0.125, + "step": 33165 + }, + { + "epoch": 1.55, + "learning_rate": 1.4959948583700408e-05, + "loss": 0.2112, + "step": 33170 + }, + { + "epoch": 1.55, + "learning_rate": 1.495916479864562e-05, + "loss": 0.2541, + "step": 33175 + }, + { + "epoch": 1.55, + "learning_rate": 1.4958381013590835e-05, + "loss": 0.2793, + "step": 33180 + }, + { + "epoch": 1.55, + "learning_rate": 1.4957597228536048e-05, + "loss": 0.242, + "step": 33185 + }, + { + "epoch": 1.55, + "learning_rate": 1.4956813443481262e-05, + "loss": 0.0286, + "step": 33190 + }, + { + "epoch": 1.55, + "learning_rate": 1.4956029658426474e-05, + "loss": 0.057, + "step": 33195 + }, + { + "epoch": 1.55, + "learning_rate": 1.495524587337169e-05, + "loss": 0.087, + "step": 33200 + }, + { + "epoch": 1.55, + "learning_rate": 1.4954462088316902e-05, + "loss": 0.0883, + "step": 33205 + }, + { + "epoch": 1.55, + "learning_rate": 1.4953678303262114e-05, + "loss": 0.1055, + "step": 33210 + }, + { + "epoch": 1.55, + "learning_rate": 1.4952894518207328e-05, + "loss": 0.4157, + "step": 33215 + }, + { + "epoch": 1.55, + "learning_rate": 1.495211073315254e-05, + "loss": 0.1814, + "step": 33220 + }, + { + "epoch": 1.55, + "learning_rate": 1.4951326948097755e-05, + "loss": 0.301, + "step": 33225 + }, + { + "epoch": 1.55, + "learning_rate": 1.4950543163042968e-05, + "loss": 0.3661, + "step": 33230 + }, + { + "epoch": 1.55, + "learning_rate": 1.4949759377988182e-05, + "loss": 0.2502, + "step": 33235 + }, + { + "epoch": 1.55, + "learning_rate": 1.4948975592933394e-05, + "loss": 0.0987, + "step": 33240 + }, + { + "epoch": 1.55, + "learning_rate": 1.494819180787861e-05, + "loss": 0.0505, + "step": 33245 + }, + { + "epoch": 1.55, + "learning_rate": 1.4947408022823822e-05, + "loss": 0.1345, + "step": 33250 + }, + { + "epoch": 1.55, + "learning_rate": 1.4946624237769036e-05, + "loss": 0.1235, + "step": 33255 + }, + { + "epoch": 1.55, + "learning_rate": 1.494584045271425e-05, + "loss": 0.1194, + "step": 33260 + }, + { + "epoch": 1.55, + "learning_rate": 1.4945056667659463e-05, + "loss": 0.1738, + "step": 33265 + }, + { + "epoch": 1.55, + "learning_rate": 1.4944272882604676e-05, + "loss": 0.2238, + "step": 33270 + }, + { + "epoch": 1.55, + "learning_rate": 1.4943489097549888e-05, + "loss": 0.295, + "step": 33275 + }, + { + "epoch": 1.55, + "learning_rate": 1.4942705312495103e-05, + "loss": 0.488, + "step": 33280 + }, + { + "epoch": 1.55, + "learning_rate": 1.4941921527440316e-05, + "loss": 0.392, + "step": 33285 + }, + { + "epoch": 1.55, + "learning_rate": 1.494113774238553e-05, + "loss": 0.0366, + "step": 33290 + }, + { + "epoch": 1.55, + "learning_rate": 1.4940353957330742e-05, + "loss": 0.0348, + "step": 33295 + }, + { + "epoch": 1.55, + "learning_rate": 1.4939570172275957e-05, + "loss": 0.0887, + "step": 33300 + }, + { + "epoch": 1.55, + "learning_rate": 1.493878638722117e-05, + "loss": 0.1132, + "step": 33305 + }, + { + "epoch": 1.55, + "learning_rate": 1.4938002602166383e-05, + "loss": 0.1445, + "step": 33310 + }, + { + "epoch": 1.55, + "learning_rate": 1.4937218817111596e-05, + "loss": 0.0744, + "step": 33315 + }, + { + "epoch": 1.55, + "learning_rate": 1.4936435032056811e-05, + "loss": 0.2349, + "step": 33320 + }, + { + "epoch": 1.55, + "learning_rate": 1.4935651247002023e-05, + "loss": 0.3013, + "step": 33325 + }, + { + "epoch": 1.56, + "learning_rate": 1.4934867461947237e-05, + "loss": 0.3091, + "step": 33330 + }, + { + "epoch": 1.56, + "learning_rate": 1.493408367689245e-05, + "loss": 0.3184, + "step": 33335 + }, + { + "epoch": 1.56, + "learning_rate": 1.4933299891837663e-05, + "loss": 0.0297, + "step": 33340 + }, + { + "epoch": 1.56, + "learning_rate": 1.4932516106782877e-05, + "loss": 0.0657, + "step": 33345 + }, + { + "epoch": 1.56, + "learning_rate": 1.493173232172809e-05, + "loss": 0.0838, + "step": 33350 + }, + { + "epoch": 1.56, + "learning_rate": 1.4930948536673303e-05, + "loss": 0.0415, + "step": 33355 + }, + { + "epoch": 1.56, + "learning_rate": 1.4930164751618517e-05, + "loss": 0.1043, + "step": 33360 + }, + { + "epoch": 1.56, + "learning_rate": 1.4929380966563731e-05, + "loss": 0.1528, + "step": 33365 + }, + { + "epoch": 1.56, + "learning_rate": 1.4928597181508943e-05, + "loss": 0.1359, + "step": 33370 + }, + { + "epoch": 1.56, + "learning_rate": 1.4927813396454159e-05, + "loss": 0.2899, + "step": 33375 + }, + { + "epoch": 1.56, + "learning_rate": 1.4927029611399371e-05, + "loss": 0.3585, + "step": 33380 + }, + { + "epoch": 1.56, + "learning_rate": 1.4926245826344585e-05, + "loss": 0.3395, + "step": 33385 + }, + { + "epoch": 1.56, + "learning_rate": 1.4925462041289797e-05, + "loss": 0.0484, + "step": 33390 + }, + { + "epoch": 1.56, + "learning_rate": 1.4924678256235013e-05, + "loss": 0.0426, + "step": 33395 + }, + { + "epoch": 1.56, + "learning_rate": 1.4923894471180225e-05, + "loss": 0.0988, + "step": 33400 + }, + { + "epoch": 1.56, + "learning_rate": 1.4923110686125437e-05, + "loss": 0.1479, + "step": 33405 + }, + { + "epoch": 1.56, + "learning_rate": 1.4922326901070651e-05, + "loss": 0.1827, + "step": 33410 + }, + { + "epoch": 1.56, + "learning_rate": 1.4921543116015864e-05, + "loss": 0.1848, + "step": 33415 + }, + { + "epoch": 1.56, + "learning_rate": 1.492075933096108e-05, + "loss": 0.1389, + "step": 33420 + }, + { + "epoch": 1.56, + "learning_rate": 1.4919975545906291e-05, + "loss": 0.2672, + "step": 33425 + }, + { + "epoch": 1.56, + "learning_rate": 1.4919191760851505e-05, + "loss": 0.3146, + "step": 33430 + }, + { + "epoch": 1.56, + "learning_rate": 1.4918407975796717e-05, + "loss": 0.206, + "step": 33435 + }, + { + "epoch": 1.56, + "learning_rate": 1.4917624190741933e-05, + "loss": 0.0536, + "step": 33440 + }, + { + "epoch": 1.56, + "learning_rate": 1.4916840405687145e-05, + "loss": 0.0521, + "step": 33445 + }, + { + "epoch": 1.56, + "learning_rate": 1.491605662063236e-05, + "loss": 0.053, + "step": 33450 + }, + { + "epoch": 1.56, + "learning_rate": 1.4915272835577571e-05, + "loss": 0.1253, + "step": 33455 + }, + { + "epoch": 1.56, + "learning_rate": 1.4914489050522787e-05, + "loss": 0.1643, + "step": 33460 + }, + { + "epoch": 1.56, + "learning_rate": 1.4913705265468e-05, + "loss": 0.1678, + "step": 33465 + }, + { + "epoch": 1.56, + "learning_rate": 1.4912921480413211e-05, + "loss": 0.1316, + "step": 33470 + }, + { + "epoch": 1.56, + "learning_rate": 1.4912137695358427e-05, + "loss": 0.3028, + "step": 33475 + }, + { + "epoch": 1.56, + "learning_rate": 1.491135391030364e-05, + "loss": 0.4472, + "step": 33480 + }, + { + "epoch": 1.56, + "learning_rate": 1.4910570125248853e-05, + "loss": 0.2954, + "step": 33485 + }, + { + "epoch": 1.56, + "learning_rate": 1.4909786340194065e-05, + "loss": 0.0151, + "step": 33490 + }, + { + "epoch": 1.56, + "learning_rate": 1.4909002555139281e-05, + "loss": 0.0831, + "step": 33495 + }, + { + "epoch": 1.56, + "learning_rate": 1.4908218770084493e-05, + "loss": 0.0586, + "step": 33500 + }, + { + "epoch": 1.56, + "learning_rate": 1.4907434985029707e-05, + "loss": 0.1487, + "step": 33505 + }, + { + "epoch": 1.56, + "learning_rate": 1.490665119997492e-05, + "loss": 0.0689, + "step": 33510 + }, + { + "epoch": 1.56, + "learning_rate": 1.4905867414920135e-05, + "loss": 0.1569, + "step": 33515 + }, + { + "epoch": 1.56, + "learning_rate": 1.4905083629865347e-05, + "loss": 0.1794, + "step": 33520 + }, + { + "epoch": 1.56, + "learning_rate": 1.4904299844810561e-05, + "loss": 0.2747, + "step": 33525 + }, + { + "epoch": 1.56, + "learning_rate": 1.4903516059755773e-05, + "loss": 0.3703, + "step": 33530 + }, + { + "epoch": 1.56, + "learning_rate": 1.4902732274700985e-05, + "loss": 0.336, + "step": 33535 + }, + { + "epoch": 1.57, + "learning_rate": 1.4901948489646201e-05, + "loss": 0.0809, + "step": 33540 + }, + { + "epoch": 1.57, + "learning_rate": 1.4901164704591413e-05, + "loss": 0.0574, + "step": 33545 + }, + { + "epoch": 1.57, + "learning_rate": 1.4900380919536627e-05, + "loss": 0.0474, + "step": 33550 + }, + { + "epoch": 1.57, + "learning_rate": 1.489959713448184e-05, + "loss": 0.1294, + "step": 33555 + }, + { + "epoch": 1.57, + "learning_rate": 1.4898813349427055e-05, + "loss": 0.1134, + "step": 33560 + }, + { + "epoch": 1.57, + "learning_rate": 1.4898029564372267e-05, + "loss": 0.1304, + "step": 33565 + }, + { + "epoch": 1.57, + "learning_rate": 1.4897245779317481e-05, + "loss": 0.1349, + "step": 33570 + }, + { + "epoch": 1.57, + "learning_rate": 1.4896461994262695e-05, + "loss": 0.1339, + "step": 33575 + }, + { + "epoch": 1.57, + "learning_rate": 1.4895678209207909e-05, + "loss": 0.3101, + "step": 33580 + }, + { + "epoch": 1.57, + "learning_rate": 1.4894894424153121e-05, + "loss": 0.2441, + "step": 33585 + }, + { + "epoch": 1.57, + "learning_rate": 1.4894110639098337e-05, + "loss": 0.0574, + "step": 33590 + }, + { + "epoch": 1.57, + "learning_rate": 1.4893326854043549e-05, + "loss": 0.1494, + "step": 33595 + }, + { + "epoch": 1.57, + "learning_rate": 1.4892543068988761e-05, + "loss": 0.0818, + "step": 33600 + }, + { + "epoch": 1.57, + "learning_rate": 1.4891759283933975e-05, + "loss": 0.0343, + "step": 33605 + }, + { + "epoch": 1.57, + "learning_rate": 1.4890975498879187e-05, + "loss": 0.0497, + "step": 33610 + }, + { + "epoch": 1.57, + "learning_rate": 1.4890191713824403e-05, + "loss": 0.1251, + "step": 33615 + }, + { + "epoch": 1.57, + "learning_rate": 1.4889407928769615e-05, + "loss": 0.1593, + "step": 33620 + }, + { + "epoch": 1.57, + "learning_rate": 1.4888624143714829e-05, + "loss": 0.1829, + "step": 33625 + }, + { + "epoch": 1.57, + "learning_rate": 1.4887840358660041e-05, + "loss": 0.4039, + "step": 33630 + }, + { + "epoch": 1.57, + "learning_rate": 1.4887056573605257e-05, + "loss": 0.2516, + "step": 33635 + }, + { + "epoch": 1.57, + "learning_rate": 1.4886272788550469e-05, + "loss": 0.0328, + "step": 33640 + }, + { + "epoch": 1.57, + "learning_rate": 1.4885489003495683e-05, + "loss": 0.0959, + "step": 33645 + }, + { + "epoch": 1.57, + "learning_rate": 1.4884705218440895e-05, + "loss": 0.0733, + "step": 33650 + }, + { + "epoch": 1.57, + "learning_rate": 1.488392143338611e-05, + "loss": 0.0875, + "step": 33655 + }, + { + "epoch": 1.57, + "learning_rate": 1.4883137648331323e-05, + "loss": 0.0994, + "step": 33660 + }, + { + "epoch": 1.57, + "learning_rate": 1.4882353863276535e-05, + "loss": 0.1776, + "step": 33665 + }, + { + "epoch": 1.57, + "learning_rate": 1.4881570078221749e-05, + "loss": 0.1794, + "step": 33670 + }, + { + "epoch": 1.57, + "learning_rate": 1.4880786293166963e-05, + "loss": 0.2112, + "step": 33675 + }, + { + "epoch": 1.57, + "learning_rate": 1.4880002508112177e-05, + "loss": 0.2123, + "step": 33680 + }, + { + "epoch": 1.57, + "learning_rate": 1.4879218723057389e-05, + "loss": 0.4273, + "step": 33685 + }, + { + "epoch": 1.57, + "learning_rate": 1.4878434938002605e-05, + "loss": 0.0487, + "step": 33690 + }, + { + "epoch": 1.57, + "learning_rate": 1.4877651152947817e-05, + "loss": 0.0653, + "step": 33695 + }, + { + "epoch": 1.57, + "learning_rate": 1.487686736789303e-05, + "loss": 0.0647, + "step": 33700 + }, + { + "epoch": 1.57, + "learning_rate": 1.4876083582838243e-05, + "loss": 0.0774, + "step": 33705 + }, + { + "epoch": 1.57, + "learning_rate": 1.4875299797783459e-05, + "loss": 0.1084, + "step": 33710 + }, + { + "epoch": 1.57, + "learning_rate": 1.487451601272867e-05, + "loss": 0.1575, + "step": 33715 + }, + { + "epoch": 1.57, + "learning_rate": 1.4873732227673885e-05, + "loss": 0.1553, + "step": 33720 + }, + { + "epoch": 1.57, + "learning_rate": 1.4872948442619097e-05, + "loss": 0.2492, + "step": 33725 + }, + { + "epoch": 1.57, + "learning_rate": 1.4872164657564309e-05, + "loss": 0.2886, + "step": 33730 + }, + { + "epoch": 1.57, + "learning_rate": 1.4871380872509525e-05, + "loss": 0.2957, + "step": 33735 + }, + { + "epoch": 1.57, + "learning_rate": 1.4870597087454737e-05, + "loss": 0.0997, + "step": 33740 + }, + { + "epoch": 1.57, + "learning_rate": 1.486981330239995e-05, + "loss": 0.0764, + "step": 33745 + }, + { + "epoch": 1.57, + "learning_rate": 1.4869029517345163e-05, + "loss": 0.0875, + "step": 33750 + }, + { + "epoch": 1.58, + "learning_rate": 1.4868245732290379e-05, + "loss": 0.1655, + "step": 33755 + }, + { + "epoch": 1.58, + "learning_rate": 1.4867461947235591e-05, + "loss": 0.0998, + "step": 33760 + }, + { + "epoch": 1.58, + "learning_rate": 1.4866678162180805e-05, + "loss": 0.1561, + "step": 33765 + }, + { + "epoch": 1.58, + "learning_rate": 1.4865894377126017e-05, + "loss": 0.1504, + "step": 33770 + }, + { + "epoch": 1.58, + "learning_rate": 1.4865110592071233e-05, + "loss": 0.2526, + "step": 33775 + }, + { + "epoch": 1.58, + "learning_rate": 1.4864326807016445e-05, + "loss": 0.3547, + "step": 33780 + }, + { + "epoch": 1.58, + "learning_rate": 1.4863543021961659e-05, + "loss": 0.2778, + "step": 33785 + }, + { + "epoch": 1.58, + "learning_rate": 1.4862759236906873e-05, + "loss": 0.0651, + "step": 33790 + }, + { + "epoch": 1.58, + "learning_rate": 1.4861975451852085e-05, + "loss": 0.0651, + "step": 33795 + }, + { + "epoch": 1.58, + "learning_rate": 1.4861191666797299e-05, + "loss": 0.1012, + "step": 33800 + }, + { + "epoch": 1.58, + "learning_rate": 1.4860407881742511e-05, + "loss": 0.1282, + "step": 33805 + }, + { + "epoch": 1.58, + "learning_rate": 1.4859624096687727e-05, + "loss": 0.163, + "step": 33810 + }, + { + "epoch": 1.58, + "learning_rate": 1.4858840311632939e-05, + "loss": 0.221, + "step": 33815 + }, + { + "epoch": 1.58, + "learning_rate": 1.4858056526578153e-05, + "loss": 0.1405, + "step": 33820 + }, + { + "epoch": 1.58, + "learning_rate": 1.4857272741523365e-05, + "loss": 0.3016, + "step": 33825 + }, + { + "epoch": 1.58, + "learning_rate": 1.485648895646858e-05, + "loss": 0.4393, + "step": 33830 + }, + { + "epoch": 1.58, + "learning_rate": 1.4855705171413793e-05, + "loss": 0.2873, + "step": 33835 + }, + { + "epoch": 1.58, + "learning_rate": 1.4854921386359007e-05, + "loss": 0.0466, + "step": 33840 + }, + { + "epoch": 1.58, + "learning_rate": 1.4854137601304219e-05, + "loss": 0.0364, + "step": 33845 + }, + { + "epoch": 1.58, + "learning_rate": 1.4853353816249434e-05, + "loss": 0.1092, + "step": 33850 + }, + { + "epoch": 1.58, + "learning_rate": 1.4852570031194647e-05, + "loss": 0.1333, + "step": 33855 + }, + { + "epoch": 1.58, + "learning_rate": 1.4851786246139859e-05, + "loss": 0.1323, + "step": 33860 + }, + { + "epoch": 1.58, + "learning_rate": 1.4851002461085073e-05, + "loss": 0.1203, + "step": 33865 + }, + { + "epoch": 1.58, + "learning_rate": 1.4850218676030285e-05, + "loss": 0.1091, + "step": 33870 + }, + { + "epoch": 1.58, + "learning_rate": 1.48494348909755e-05, + "loss": 0.2064, + "step": 33875 + }, + { + "epoch": 1.58, + "learning_rate": 1.4848651105920713e-05, + "loss": 0.3023, + "step": 33880 + }, + { + "epoch": 1.58, + "learning_rate": 1.4847867320865927e-05, + "loss": 0.3641, + "step": 33885 + }, + { + "epoch": 1.58, + "learning_rate": 1.484708353581114e-05, + "loss": 0.0508, + "step": 33890 + }, + { + "epoch": 1.58, + "learning_rate": 1.4846299750756354e-05, + "loss": 0.0378, + "step": 33895 + }, + { + "epoch": 1.58, + "learning_rate": 1.4845515965701567e-05, + "loss": 0.1599, + "step": 33900 + }, + { + "epoch": 1.58, + "learning_rate": 1.4844732180646782e-05, + "loss": 0.0987, + "step": 33905 + }, + { + "epoch": 1.58, + "learning_rate": 1.4843948395591994e-05, + "loss": 0.0775, + "step": 33910 + }, + { + "epoch": 1.58, + "learning_rate": 1.4843164610537208e-05, + "loss": 0.1141, + "step": 33915 + }, + { + "epoch": 1.58, + "learning_rate": 1.484238082548242e-05, + "loss": 0.1908, + "step": 33920 + }, + { + "epoch": 1.58, + "learning_rate": 1.4841597040427633e-05, + "loss": 0.2086, + "step": 33925 + }, + { + "epoch": 1.58, + "learning_rate": 1.4840813255372848e-05, + "loss": 0.3953, + "step": 33930 + }, + { + "epoch": 1.58, + "learning_rate": 1.484002947031806e-05, + "loss": 0.3082, + "step": 33935 + }, + { + "epoch": 1.58, + "learning_rate": 1.4839245685263275e-05, + "loss": 0.0498, + "step": 33940 + }, + { + "epoch": 1.58, + "learning_rate": 1.4838461900208487e-05, + "loss": 0.0308, + "step": 33945 + }, + { + "epoch": 1.58, + "learning_rate": 1.4837678115153702e-05, + "loss": 0.0608, + "step": 33950 + }, + { + "epoch": 1.58, + "learning_rate": 1.4836894330098915e-05, + "loss": 0.1048, + "step": 33955 + }, + { + "epoch": 1.58, + "learning_rate": 1.4836110545044128e-05, + "loss": 0.1423, + "step": 33960 + }, + { + "epoch": 1.58, + "learning_rate": 1.483532675998934e-05, + "loss": 0.175, + "step": 33965 + }, + { + "epoch": 1.59, + "learning_rate": 1.4834542974934556e-05, + "loss": 0.1559, + "step": 33970 + }, + { + "epoch": 1.59, + "learning_rate": 1.4833759189879768e-05, + "loss": 0.2204, + "step": 33975 + }, + { + "epoch": 1.59, + "learning_rate": 1.4832975404824982e-05, + "loss": 0.3428, + "step": 33980 + }, + { + "epoch": 1.59, + "learning_rate": 1.4832191619770195e-05, + "loss": 0.2474, + "step": 33985 + }, + { + "epoch": 1.59, + "learning_rate": 1.4831407834715409e-05, + "loss": 0.0378, + "step": 33990 + }, + { + "epoch": 1.59, + "learning_rate": 1.4830624049660622e-05, + "loss": 0.0739, + "step": 33995 + }, + { + "epoch": 1.59, + "learning_rate": 1.4829840264605835e-05, + "loss": 0.0936, + "step": 34000 + }, + { + "epoch": 1.59, + "learning_rate": 1.482905647955105e-05, + "loss": 0.0705, + "step": 34005 + }, + { + "epoch": 1.59, + "learning_rate": 1.4828272694496262e-05, + "loss": 0.0965, + "step": 34010 + }, + { + "epoch": 1.59, + "learning_rate": 1.4827488909441476e-05, + "loss": 0.0939, + "step": 34015 + }, + { + "epoch": 1.59, + "learning_rate": 1.4826705124386689e-05, + "loss": 0.1264, + "step": 34020 + }, + { + "epoch": 1.59, + "learning_rate": 1.4825921339331904e-05, + "loss": 0.1568, + "step": 34025 + }, + { + "epoch": 1.59, + "learning_rate": 1.4825137554277116e-05, + "loss": 0.2927, + "step": 34030 + }, + { + "epoch": 1.59, + "learning_rate": 1.482435376922233e-05, + "loss": 0.3461, + "step": 34035 + }, + { + "epoch": 1.59, + "learning_rate": 1.4823569984167542e-05, + "loss": 0.0535, + "step": 34040 + }, + { + "epoch": 1.59, + "learning_rate": 1.4822786199112758e-05, + "loss": 0.0372, + "step": 34045 + }, + { + "epoch": 1.59, + "learning_rate": 1.482200241405797e-05, + "loss": 0.0823, + "step": 34050 + }, + { + "epoch": 1.59, + "learning_rate": 1.4821218629003183e-05, + "loss": 0.0993, + "step": 34055 + }, + { + "epoch": 1.59, + "learning_rate": 1.4820434843948396e-05, + "loss": 0.1109, + "step": 34060 + }, + { + "epoch": 1.59, + "learning_rate": 1.4819651058893609e-05, + "loss": 0.1505, + "step": 34065 + }, + { + "epoch": 1.59, + "learning_rate": 1.4818867273838824e-05, + "loss": 0.2167, + "step": 34070 + }, + { + "epoch": 1.59, + "learning_rate": 1.4818083488784036e-05, + "loss": 0.1975, + "step": 34075 + }, + { + "epoch": 1.59, + "learning_rate": 1.481729970372925e-05, + "loss": 0.3396, + "step": 34080 + }, + { + "epoch": 1.59, + "learning_rate": 1.4816515918674463e-05, + "loss": 0.2306, + "step": 34085 + }, + { + "epoch": 1.59, + "learning_rate": 1.4815732133619678e-05, + "loss": 0.0387, + "step": 34090 + }, + { + "epoch": 1.59, + "learning_rate": 1.481494834856489e-05, + "loss": 0.0714, + "step": 34095 + }, + { + "epoch": 1.59, + "learning_rate": 1.4814164563510104e-05, + "loss": 0.1295, + "step": 34100 + }, + { + "epoch": 1.59, + "learning_rate": 1.4813380778455318e-05, + "loss": 0.1515, + "step": 34105 + }, + { + "epoch": 1.59, + "learning_rate": 1.4812596993400532e-05, + "loss": 0.1432, + "step": 34110 + }, + { + "epoch": 1.59, + "learning_rate": 1.4811813208345744e-05, + "loss": 0.1063, + "step": 34115 + }, + { + "epoch": 1.59, + "learning_rate": 1.4811029423290957e-05, + "loss": 0.175, + "step": 34120 + }, + { + "epoch": 1.59, + "learning_rate": 1.4810245638236172e-05, + "loss": 0.2832, + "step": 34125 + }, + { + "epoch": 1.59, + "learning_rate": 1.4809461853181384e-05, + "loss": 0.4135, + "step": 34130 + }, + { + "epoch": 1.59, + "learning_rate": 1.4808678068126598e-05, + "loss": 0.2442, + "step": 34135 + }, + { + "epoch": 1.59, + "learning_rate": 1.480789428307181e-05, + "loss": 0.0886, + "step": 34140 + }, + { + "epoch": 1.59, + "learning_rate": 1.4807110498017026e-05, + "loss": 0.0458, + "step": 34145 + }, + { + "epoch": 1.59, + "learning_rate": 1.4806326712962238e-05, + "loss": 0.1056, + "step": 34150 + }, + { + "epoch": 1.59, + "learning_rate": 1.4805542927907452e-05, + "loss": 0.0775, + "step": 34155 + }, + { + "epoch": 1.59, + "learning_rate": 1.4804759142852664e-05, + "loss": 0.1509, + "step": 34160 + }, + { + "epoch": 1.59, + "learning_rate": 1.480397535779788e-05, + "loss": 0.1604, + "step": 34165 + }, + { + "epoch": 1.59, + "learning_rate": 1.4803191572743092e-05, + "loss": 0.1765, + "step": 34170 + }, + { + "epoch": 1.59, + "learning_rate": 1.4802407787688306e-05, + "loss": 0.1314, + "step": 34175 + }, + { + "epoch": 1.59, + "learning_rate": 1.4801624002633518e-05, + "loss": 0.428, + "step": 34180 + }, + { + "epoch": 1.6, + "learning_rate": 1.480084021757873e-05, + "loss": 0.2183, + "step": 34185 + }, + { + "epoch": 1.6, + "learning_rate": 1.4800056432523946e-05, + "loss": 0.0748, + "step": 34190 + }, + { + "epoch": 1.6, + "learning_rate": 1.4799272647469158e-05, + "loss": 0.0489, + "step": 34195 + }, + { + "epoch": 1.6, + "learning_rate": 1.4798488862414372e-05, + "loss": 0.0639, + "step": 34200 + }, + { + "epoch": 1.6, + "learning_rate": 1.4797705077359586e-05, + "loss": 0.0929, + "step": 34205 + }, + { + "epoch": 1.6, + "learning_rate": 1.47969212923048e-05, + "loss": 0.187, + "step": 34210 + }, + { + "epoch": 1.6, + "learning_rate": 1.4796137507250012e-05, + "loss": 0.1703, + "step": 34215 + }, + { + "epoch": 1.6, + "learning_rate": 1.4795353722195228e-05, + "loss": 0.1433, + "step": 34220 + }, + { + "epoch": 1.6, + "learning_rate": 1.479456993714044e-05, + "loss": 0.2344, + "step": 34225 + }, + { + "epoch": 1.6, + "learning_rate": 1.4793786152085654e-05, + "loss": 0.3602, + "step": 34230 + }, + { + "epoch": 1.6, + "learning_rate": 1.4793002367030866e-05, + "loss": 0.3009, + "step": 34235 + }, + { + "epoch": 1.6, + "learning_rate": 1.4792218581976082e-05, + "loss": 0.107, + "step": 34240 + }, + { + "epoch": 1.6, + "learning_rate": 1.4791434796921294e-05, + "loss": 0.0895, + "step": 34245 + }, + { + "epoch": 1.6, + "learning_rate": 1.4790651011866506e-05, + "loss": 0.0729, + "step": 34250 + }, + { + "epoch": 1.6, + "learning_rate": 1.478986722681172e-05, + "loss": 0.0705, + "step": 34255 + }, + { + "epoch": 1.6, + "learning_rate": 1.4789083441756932e-05, + "loss": 0.1037, + "step": 34260 + }, + { + "epoch": 1.6, + "learning_rate": 1.4788299656702148e-05, + "loss": 0.1372, + "step": 34265 + }, + { + "epoch": 1.6, + "learning_rate": 1.478751587164736e-05, + "loss": 0.2172, + "step": 34270 + }, + { + "epoch": 1.6, + "learning_rate": 1.4786732086592574e-05, + "loss": 0.2641, + "step": 34275 + }, + { + "epoch": 1.6, + "learning_rate": 1.4785948301537786e-05, + "loss": 0.3562, + "step": 34280 + }, + { + "epoch": 1.6, + "learning_rate": 1.4785164516483002e-05, + "loss": 0.2552, + "step": 34285 + }, + { + "epoch": 1.6, + "learning_rate": 1.4784380731428214e-05, + "loss": 0.0604, + "step": 34290 + }, + { + "epoch": 1.6, + "learning_rate": 1.4783596946373428e-05, + "loss": 0.0537, + "step": 34295 + }, + { + "epoch": 1.6, + "learning_rate": 1.478281316131864e-05, + "loss": 0.1428, + "step": 34300 + }, + { + "epoch": 1.6, + "learning_rate": 1.4782029376263856e-05, + "loss": 0.1001, + "step": 34305 + }, + { + "epoch": 1.6, + "learning_rate": 1.4781245591209068e-05, + "loss": 0.0837, + "step": 34310 + }, + { + "epoch": 1.6, + "learning_rate": 1.478046180615428e-05, + "loss": 0.125, + "step": 34315 + }, + { + "epoch": 1.6, + "learning_rate": 1.4779678021099496e-05, + "loss": 0.1296, + "step": 34320 + }, + { + "epoch": 1.6, + "learning_rate": 1.4778894236044708e-05, + "loss": 0.1319, + "step": 34325 + }, + { + "epoch": 1.6, + "learning_rate": 1.4778110450989922e-05, + "loss": 0.2868, + "step": 34330 + }, + { + "epoch": 1.6, + "learning_rate": 1.4777326665935134e-05, + "loss": 0.3055, + "step": 34335 + }, + { + "epoch": 1.6, + "learning_rate": 1.477654288088035e-05, + "loss": 0.0613, + "step": 34340 + }, + { + "epoch": 1.6, + "learning_rate": 1.4775759095825562e-05, + "loss": 0.1232, + "step": 34345 + }, + { + "epoch": 1.6, + "learning_rate": 1.4774975310770776e-05, + "loss": 0.0745, + "step": 34350 + }, + { + "epoch": 1.6, + "learning_rate": 1.4774191525715988e-05, + "loss": 0.128, + "step": 34355 + }, + { + "epoch": 1.6, + "learning_rate": 1.4773407740661204e-05, + "loss": 0.0547, + "step": 34360 + }, + { + "epoch": 1.6, + "learning_rate": 1.4772623955606416e-05, + "loss": 0.1655, + "step": 34365 + }, + { + "epoch": 1.6, + "learning_rate": 1.477184017055163e-05, + "loss": 0.47, + "step": 34370 + }, + { + "epoch": 1.6, + "learning_rate": 1.4771056385496842e-05, + "loss": 0.2424, + "step": 34375 + }, + { + "epoch": 1.6, + "learning_rate": 1.4770272600442054e-05, + "loss": 0.3795, + "step": 34380 + }, + { + "epoch": 1.6, + "learning_rate": 1.476948881538727e-05, + "loss": 0.292, + "step": 34385 + }, + { + "epoch": 1.6, + "learning_rate": 1.4768705030332482e-05, + "loss": 0.0394, + "step": 34390 + }, + { + "epoch": 1.6, + "learning_rate": 1.4767921245277696e-05, + "loss": 0.0294, + "step": 34395 + }, + { + "epoch": 1.61, + "learning_rate": 1.4767137460222908e-05, + "loss": 0.0748, + "step": 34400 + }, + { + "epoch": 1.61, + "learning_rate": 1.4766353675168124e-05, + "loss": 0.1135, + "step": 34405 + }, + { + "epoch": 1.61, + "learning_rate": 1.4765569890113336e-05, + "loss": 0.1248, + "step": 34410 + }, + { + "epoch": 1.61, + "learning_rate": 1.476478610505855e-05, + "loss": 0.1699, + "step": 34415 + }, + { + "epoch": 1.61, + "learning_rate": 1.4764002320003764e-05, + "loss": 0.1724, + "step": 34420 + }, + { + "epoch": 1.61, + "learning_rate": 1.4763218534948978e-05, + "loss": 0.2934, + "step": 34425 + }, + { + "epoch": 1.61, + "learning_rate": 1.476243474989419e-05, + "loss": 0.3888, + "step": 34430 + }, + { + "epoch": 1.61, + "learning_rate": 1.4761650964839405e-05, + "loss": 0.3509, + "step": 34435 + }, + { + "epoch": 1.61, + "learning_rate": 1.4760867179784618e-05, + "loss": 0.0337, + "step": 34440 + }, + { + "epoch": 1.61, + "learning_rate": 1.476008339472983e-05, + "loss": 0.0399, + "step": 34445 + }, + { + "epoch": 1.61, + "learning_rate": 1.4759299609675044e-05, + "loss": 0.071, + "step": 34450 + }, + { + "epoch": 1.61, + "learning_rate": 1.4758515824620256e-05, + "loss": 0.1536, + "step": 34455 + }, + { + "epoch": 1.61, + "learning_rate": 1.4757732039565472e-05, + "loss": 0.0879, + "step": 34460 + }, + { + "epoch": 1.61, + "learning_rate": 1.4756948254510684e-05, + "loss": 0.256, + "step": 34465 + }, + { + "epoch": 1.61, + "learning_rate": 1.4756164469455898e-05, + "loss": 0.1642, + "step": 34470 + }, + { + "epoch": 1.61, + "learning_rate": 1.475538068440111e-05, + "loss": 0.3665, + "step": 34475 + }, + { + "epoch": 1.61, + "learning_rate": 1.4754596899346326e-05, + "loss": 0.5219, + "step": 34480 + }, + { + "epoch": 1.61, + "learning_rate": 1.4753813114291538e-05, + "loss": 0.24, + "step": 34485 + }, + { + "epoch": 1.61, + "learning_rate": 1.4753029329236752e-05, + "loss": 0.0458, + "step": 34490 + }, + { + "epoch": 1.61, + "learning_rate": 1.4752245544181964e-05, + "loss": 0.0644, + "step": 34495 + }, + { + "epoch": 1.61, + "learning_rate": 1.475146175912718e-05, + "loss": 0.0412, + "step": 34500 + }, + { + "epoch": 1.61, + "learning_rate": 1.4750677974072392e-05, + "loss": 0.0782, + "step": 34505 + }, + { + "epoch": 1.61, + "learning_rate": 1.4749894189017604e-05, + "loss": 0.102, + "step": 34510 + }, + { + "epoch": 1.61, + "learning_rate": 1.4749110403962818e-05, + "loss": 0.1028, + "step": 34515 + }, + { + "epoch": 1.61, + "learning_rate": 1.4748326618908032e-05, + "loss": 0.1788, + "step": 34520 + }, + { + "epoch": 1.61, + "learning_rate": 1.4747542833853246e-05, + "loss": 0.1417, + "step": 34525 + }, + { + "epoch": 1.61, + "learning_rate": 1.4746759048798458e-05, + "loss": 0.2783, + "step": 34530 + }, + { + "epoch": 1.61, + "learning_rate": 1.4745975263743673e-05, + "loss": 0.2968, + "step": 34535 + }, + { + "epoch": 1.61, + "learning_rate": 1.4745191478688886e-05, + "loss": 0.0946, + "step": 34540 + }, + { + "epoch": 1.61, + "learning_rate": 1.47444076936341e-05, + "loss": 0.0437, + "step": 34545 + }, + { + "epoch": 1.61, + "learning_rate": 1.4743623908579312e-05, + "loss": 0.0868, + "step": 34550 + }, + { + "epoch": 1.61, + "learning_rate": 1.4742840123524527e-05, + "loss": 0.105, + "step": 34555 + }, + { + "epoch": 1.61, + "learning_rate": 1.474205633846974e-05, + "loss": 0.1074, + "step": 34560 + }, + { + "epoch": 1.61, + "learning_rate": 1.4741272553414953e-05, + "loss": 0.1464, + "step": 34565 + }, + { + "epoch": 1.61, + "learning_rate": 1.4740488768360166e-05, + "loss": 0.1298, + "step": 34570 + }, + { + "epoch": 1.61, + "learning_rate": 1.4739704983305378e-05, + "loss": 0.2425, + "step": 34575 + }, + { + "epoch": 1.61, + "learning_rate": 1.4738921198250593e-05, + "loss": 0.3058, + "step": 34580 + }, + { + "epoch": 1.61, + "learning_rate": 1.4738137413195806e-05, + "loss": 0.2462, + "step": 34585 + }, + { + "epoch": 1.61, + "learning_rate": 1.473735362814102e-05, + "loss": 0.0695, + "step": 34590 + }, + { + "epoch": 1.61, + "learning_rate": 1.4736569843086232e-05, + "loss": 0.0971, + "step": 34595 + }, + { + "epoch": 1.61, + "learning_rate": 1.4735786058031447e-05, + "loss": 0.1426, + "step": 34600 + }, + { + "epoch": 1.61, + "learning_rate": 1.473500227297666e-05, + "loss": 0.0843, + "step": 34605 + }, + { + "epoch": 1.61, + "learning_rate": 1.4734218487921874e-05, + "loss": 0.1106, + "step": 34610 + }, + { + "epoch": 1.62, + "learning_rate": 1.4733434702867086e-05, + "loss": 0.0959, + "step": 34615 + }, + { + "epoch": 1.62, + "learning_rate": 1.4732650917812301e-05, + "loss": 0.1614, + "step": 34620 + }, + { + "epoch": 1.62, + "learning_rate": 1.4731867132757514e-05, + "loss": 0.2626, + "step": 34625 + }, + { + "epoch": 1.62, + "learning_rate": 1.4731083347702727e-05, + "loss": 0.2758, + "step": 34630 + }, + { + "epoch": 1.62, + "learning_rate": 1.4730299562647941e-05, + "loss": 0.3072, + "step": 34635 + }, + { + "epoch": 1.62, + "learning_rate": 1.4729515777593154e-05, + "loss": 0.0358, + "step": 34640 + }, + { + "epoch": 1.62, + "learning_rate": 1.4728731992538367e-05, + "loss": 0.0405, + "step": 34645 + }, + { + "epoch": 1.62, + "learning_rate": 1.472794820748358e-05, + "loss": 0.0772, + "step": 34650 + }, + { + "epoch": 1.62, + "learning_rate": 1.4727164422428795e-05, + "loss": 0.1179, + "step": 34655 + }, + { + "epoch": 1.62, + "learning_rate": 1.4726380637374008e-05, + "loss": 0.0595, + "step": 34660 + }, + { + "epoch": 1.62, + "learning_rate": 1.4725596852319221e-05, + "loss": 0.2746, + "step": 34665 + }, + { + "epoch": 1.62, + "learning_rate": 1.4724813067264434e-05, + "loss": 0.1401, + "step": 34670 + }, + { + "epoch": 1.62, + "learning_rate": 1.472402928220965e-05, + "loss": 0.1435, + "step": 34675 + }, + { + "epoch": 1.62, + "learning_rate": 1.4723245497154861e-05, + "loss": 0.2242, + "step": 34680 + }, + { + "epoch": 1.62, + "learning_rate": 1.4722461712100075e-05, + "loss": 0.3268, + "step": 34685 + }, + { + "epoch": 1.62, + "learning_rate": 1.4721677927045288e-05, + "loss": 0.034, + "step": 34690 + }, + { + "epoch": 1.62, + "learning_rate": 1.4720894141990503e-05, + "loss": 0.1351, + "step": 34695 + }, + { + "epoch": 1.62, + "learning_rate": 1.4720110356935715e-05, + "loss": 0.0795, + "step": 34700 + }, + { + "epoch": 1.62, + "learning_rate": 1.4719326571880928e-05, + "loss": 0.0898, + "step": 34705 + }, + { + "epoch": 1.62, + "learning_rate": 1.4718542786826141e-05, + "loss": 0.096, + "step": 34710 + }, + { + "epoch": 1.62, + "learning_rate": 1.4717759001771354e-05, + "loss": 0.1264, + "step": 34715 + }, + { + "epoch": 1.62, + "learning_rate": 1.471697521671657e-05, + "loss": 0.1767, + "step": 34720 + }, + { + "epoch": 1.62, + "learning_rate": 1.4716191431661782e-05, + "loss": 0.1141, + "step": 34725 + }, + { + "epoch": 1.62, + "learning_rate": 1.4715407646606995e-05, + "loss": 0.3326, + "step": 34730 + }, + { + "epoch": 1.62, + "learning_rate": 1.471462386155221e-05, + "loss": 0.3716, + "step": 34735 + }, + { + "epoch": 1.62, + "learning_rate": 1.4713840076497423e-05, + "loss": 0.0574, + "step": 34740 + }, + { + "epoch": 1.62, + "learning_rate": 1.4713056291442635e-05, + "loss": 0.0426, + "step": 34745 + }, + { + "epoch": 1.62, + "learning_rate": 1.4712272506387851e-05, + "loss": 0.0909, + "step": 34750 + }, + { + "epoch": 1.62, + "learning_rate": 1.4711488721333063e-05, + "loss": 0.1078, + "step": 34755 + }, + { + "epoch": 1.62, + "learning_rate": 1.4710704936278277e-05, + "loss": 0.1061, + "step": 34760 + }, + { + "epoch": 1.62, + "learning_rate": 1.470992115122349e-05, + "loss": 0.1885, + "step": 34765 + }, + { + "epoch": 1.62, + "learning_rate": 1.4709137366168702e-05, + "loss": 0.176, + "step": 34770 + }, + { + "epoch": 1.62, + "learning_rate": 1.4708353581113917e-05, + "loss": 0.1519, + "step": 34775 + }, + { + "epoch": 1.62, + "learning_rate": 1.470756979605913e-05, + "loss": 0.2766, + "step": 34780 + }, + { + "epoch": 1.62, + "learning_rate": 1.4706786011004343e-05, + "loss": 0.2345, + "step": 34785 + }, + { + "epoch": 1.62, + "learning_rate": 1.4706002225949556e-05, + "loss": 0.0734, + "step": 34790 + }, + { + "epoch": 1.62, + "learning_rate": 1.4705218440894771e-05, + "loss": 0.0545, + "step": 34795 + }, + { + "epoch": 1.62, + "learning_rate": 1.4704434655839983e-05, + "loss": 0.0359, + "step": 34800 + }, + { + "epoch": 1.62, + "learning_rate": 1.4703650870785197e-05, + "loss": 0.08, + "step": 34805 + }, + { + "epoch": 1.62, + "learning_rate": 1.470286708573041e-05, + "loss": 0.1442, + "step": 34810 + }, + { + "epoch": 1.62, + "learning_rate": 1.4702083300675625e-05, + "loss": 0.1107, + "step": 34815 + }, + { + "epoch": 1.62, + "learning_rate": 1.4701299515620837e-05, + "loss": 0.1809, + "step": 34820 + }, + { + "epoch": 1.62, + "learning_rate": 1.4700515730566051e-05, + "loss": 0.1639, + "step": 34825 + }, + { + "epoch": 1.63, + "learning_rate": 1.4699731945511263e-05, + "loss": 0.2368, + "step": 34830 + }, + { + "epoch": 1.63, + "learning_rate": 1.4698948160456477e-05, + "loss": 0.2754, + "step": 34835 + }, + { + "epoch": 1.63, + "learning_rate": 1.4698164375401691e-05, + "loss": 0.1172, + "step": 34840 + }, + { + "epoch": 1.63, + "learning_rate": 1.4697380590346903e-05, + "loss": 0.0575, + "step": 34845 + }, + { + "epoch": 1.63, + "learning_rate": 1.4696596805292119e-05, + "loss": 0.1116, + "step": 34850 + }, + { + "epoch": 1.63, + "learning_rate": 1.4695813020237331e-05, + "loss": 0.1418, + "step": 34855 + }, + { + "epoch": 1.63, + "learning_rate": 1.4695029235182545e-05, + "loss": 0.1506, + "step": 34860 + }, + { + "epoch": 1.63, + "learning_rate": 1.4694245450127757e-05, + "loss": 0.1702, + "step": 34865 + }, + { + "epoch": 1.63, + "learning_rate": 1.4693461665072973e-05, + "loss": 0.2012, + "step": 34870 + }, + { + "epoch": 1.63, + "learning_rate": 1.4692677880018185e-05, + "loss": 0.28, + "step": 34875 + }, + { + "epoch": 1.63, + "learning_rate": 1.4691894094963399e-05, + "loss": 0.2179, + "step": 34880 + }, + { + "epoch": 1.63, + "learning_rate": 1.4691110309908611e-05, + "loss": 0.2453, + "step": 34885 + }, + { + "epoch": 1.63, + "learning_rate": 1.4690326524853827e-05, + "loss": 0.0277, + "step": 34890 + }, + { + "epoch": 1.63, + "learning_rate": 1.4689542739799039e-05, + "loss": 0.1151, + "step": 34895 + }, + { + "epoch": 1.63, + "learning_rate": 1.4688758954744251e-05, + "loss": 0.091, + "step": 34900 + }, + { + "epoch": 1.63, + "learning_rate": 1.4687975169689465e-05, + "loss": 0.1309, + "step": 34905 + }, + { + "epoch": 1.63, + "learning_rate": 1.4687191384634677e-05, + "loss": 0.0805, + "step": 34910 + }, + { + "epoch": 1.63, + "learning_rate": 1.4686407599579893e-05, + "loss": 0.1674, + "step": 34915 + }, + { + "epoch": 1.63, + "learning_rate": 1.4685623814525105e-05, + "loss": 0.1859, + "step": 34920 + }, + { + "epoch": 1.63, + "learning_rate": 1.4684840029470319e-05, + "loss": 0.234, + "step": 34925 + }, + { + "epoch": 1.63, + "learning_rate": 1.4684056244415531e-05, + "loss": 0.3353, + "step": 34930 + }, + { + "epoch": 1.63, + "learning_rate": 1.4683272459360747e-05, + "loss": 0.3759, + "step": 34935 + }, + { + "epoch": 1.63, + "learning_rate": 1.4682488674305959e-05, + "loss": 0.0299, + "step": 34940 + }, + { + "epoch": 1.63, + "learning_rate": 1.4681704889251173e-05, + "loss": 0.0724, + "step": 34945 + }, + { + "epoch": 1.63, + "learning_rate": 1.4680921104196387e-05, + "loss": 0.1143, + "step": 34950 + }, + { + "epoch": 1.63, + "learning_rate": 1.46801373191416e-05, + "loss": 0.1031, + "step": 34955 + }, + { + "epoch": 1.63, + "learning_rate": 1.4679353534086813e-05, + "loss": 0.1564, + "step": 34960 + }, + { + "epoch": 1.63, + "learning_rate": 1.4678569749032025e-05, + "loss": 0.1843, + "step": 34965 + }, + { + "epoch": 1.63, + "learning_rate": 1.4677785963977241e-05, + "loss": 0.1421, + "step": 34970 + }, + { + "epoch": 1.63, + "learning_rate": 1.4677002178922453e-05, + "loss": 0.1495, + "step": 34975 + }, + { + "epoch": 1.63, + "learning_rate": 1.4676218393867667e-05, + "loss": 0.3336, + "step": 34980 + }, + { + "epoch": 1.63, + "learning_rate": 1.467543460881288e-05, + "loss": 0.3336, + "step": 34985 + }, + { + "epoch": 1.63, + "learning_rate": 1.4674650823758095e-05, + "loss": 0.0764, + "step": 34990 + }, + { + "epoch": 1.63, + "learning_rate": 1.4673867038703307e-05, + "loss": 0.0414, + "step": 34995 + }, + { + "epoch": 1.63, + "learning_rate": 1.4673083253648521e-05, + "loss": 0.0867, + "step": 35000 + }, + { + "epoch": 1.63, + "learning_rate": 1.4672299468593733e-05, + "loss": 0.0791, + "step": 35005 + }, + { + "epoch": 1.63, + "learning_rate": 1.4671515683538949e-05, + "loss": 0.1432, + "step": 35010 + }, + { + "epoch": 1.63, + "learning_rate": 1.4670731898484161e-05, + "loss": 0.1849, + "step": 35015 + }, + { + "epoch": 1.63, + "learning_rate": 1.4669948113429375e-05, + "loss": 0.1386, + "step": 35020 + }, + { + "epoch": 1.63, + "learning_rate": 1.4669164328374587e-05, + "loss": 0.1576, + "step": 35025 + }, + { + "epoch": 1.63, + "learning_rate": 1.46683805433198e-05, + "loss": 0.3647, + "step": 35030 + }, + { + "epoch": 1.63, + "learning_rate": 1.4667596758265015e-05, + "loss": 0.3103, + "step": 35035 + }, + { + "epoch": 1.64, + "learning_rate": 1.4666812973210227e-05, + "loss": 0.0494, + "step": 35040 + }, + { + "epoch": 1.64, + "learning_rate": 1.4666029188155441e-05, + "loss": 0.0672, + "step": 35045 + }, + { + "epoch": 1.64, + "learning_rate": 1.4665245403100655e-05, + "loss": 0.0995, + "step": 35050 + }, + { + "epoch": 1.64, + "learning_rate": 1.4664461618045869e-05, + "loss": 0.0589, + "step": 35055 + }, + { + "epoch": 1.64, + "learning_rate": 1.4663677832991081e-05, + "loss": 0.1083, + "step": 35060 + }, + { + "epoch": 1.64, + "learning_rate": 1.4662894047936297e-05, + "loss": 0.1836, + "step": 35065 + }, + { + "epoch": 1.64, + "learning_rate": 1.4662110262881509e-05, + "loss": 0.216, + "step": 35070 + }, + { + "epoch": 1.64, + "learning_rate": 1.4661326477826723e-05, + "loss": 0.2673, + "step": 35075 + }, + { + "epoch": 1.64, + "learning_rate": 1.4660542692771935e-05, + "loss": 0.3442, + "step": 35080 + }, + { + "epoch": 1.64, + "learning_rate": 1.465975890771715e-05, + "loss": 0.3217, + "step": 35085 + }, + { + "epoch": 1.64, + "learning_rate": 1.4658975122662363e-05, + "loss": 0.0211, + "step": 35090 + }, + { + "epoch": 1.64, + "learning_rate": 1.4658191337607575e-05, + "loss": 0.0388, + "step": 35095 + }, + { + "epoch": 1.64, + "learning_rate": 1.4657407552552789e-05, + "loss": 0.044, + "step": 35100 + }, + { + "epoch": 1.64, + "learning_rate": 1.4656623767498001e-05, + "loss": 0.1086, + "step": 35105 + }, + { + "epoch": 1.64, + "learning_rate": 1.4655839982443217e-05, + "loss": 0.1599, + "step": 35110 + }, + { + "epoch": 1.64, + "learning_rate": 1.4655056197388429e-05, + "loss": 0.129, + "step": 35115 + }, + { + "epoch": 1.64, + "learning_rate": 1.4654272412333643e-05, + "loss": 0.1483, + "step": 35120 + }, + { + "epoch": 1.64, + "learning_rate": 1.4653488627278855e-05, + "loss": 0.2187, + "step": 35125 + }, + { + "epoch": 1.64, + "learning_rate": 1.465270484222407e-05, + "loss": 0.4357, + "step": 35130 + }, + { + "epoch": 1.64, + "learning_rate": 1.4651921057169283e-05, + "loss": 0.2683, + "step": 35135 + }, + { + "epoch": 1.64, + "learning_rate": 1.4651137272114497e-05, + "loss": 0.0366, + "step": 35140 + }, + { + "epoch": 1.64, + "learning_rate": 1.4650353487059709e-05, + "loss": 0.0448, + "step": 35145 + }, + { + "epoch": 1.64, + "learning_rate": 1.4649569702004925e-05, + "loss": 0.0861, + "step": 35150 + }, + { + "epoch": 1.64, + "learning_rate": 1.4648785916950137e-05, + "loss": 0.0554, + "step": 35155 + }, + { + "epoch": 1.64, + "learning_rate": 1.4648002131895349e-05, + "loss": 0.143, + "step": 35160 + }, + { + "epoch": 1.64, + "learning_rate": 1.4647218346840565e-05, + "loss": 0.1561, + "step": 35165 + }, + { + "epoch": 1.64, + "learning_rate": 1.4646434561785777e-05, + "loss": 0.1867, + "step": 35170 + }, + { + "epoch": 1.64, + "learning_rate": 1.464565077673099e-05, + "loss": 0.1855, + "step": 35175 + }, + { + "epoch": 1.64, + "learning_rate": 1.4644866991676203e-05, + "loss": 0.4092, + "step": 35180 + }, + { + "epoch": 1.64, + "learning_rate": 1.4644083206621418e-05, + "loss": 0.3768, + "step": 35185 + }, + { + "epoch": 1.64, + "learning_rate": 1.464329942156663e-05, + "loss": 0.1151, + "step": 35190 + }, + { + "epoch": 1.64, + "learning_rate": 1.4642515636511845e-05, + "loss": 0.0626, + "step": 35195 + }, + { + "epoch": 1.64, + "learning_rate": 1.4641731851457057e-05, + "loss": 0.0333, + "step": 35200 + }, + { + "epoch": 1.64, + "learning_rate": 1.4640948066402272e-05, + "loss": 0.0781, + "step": 35205 + }, + { + "epoch": 1.64, + "learning_rate": 1.4640164281347485e-05, + "loss": 0.1316, + "step": 35210 + }, + { + "epoch": 1.64, + "learning_rate": 1.4639380496292699e-05, + "loss": 0.0988, + "step": 35215 + }, + { + "epoch": 1.64, + "learning_rate": 1.463859671123791e-05, + "loss": 0.1637, + "step": 35220 + }, + { + "epoch": 1.64, + "learning_rate": 1.4637812926183123e-05, + "loss": 0.2726, + "step": 35225 + }, + { + "epoch": 1.64, + "learning_rate": 1.4637029141128339e-05, + "loss": 0.336, + "step": 35230 + }, + { + "epoch": 1.64, + "learning_rate": 1.463624535607355e-05, + "loss": 0.1921, + "step": 35235 + }, + { + "epoch": 1.64, + "learning_rate": 1.4635461571018765e-05, + "loss": 0.026, + "step": 35240 + }, + { + "epoch": 1.64, + "learning_rate": 1.4634677785963977e-05, + "loss": 0.0511, + "step": 35245 + }, + { + "epoch": 1.64, + "learning_rate": 1.4633894000909192e-05, + "loss": 0.0613, + "step": 35250 + }, + { + "epoch": 1.65, + "learning_rate": 1.4633110215854405e-05, + "loss": 0.116, + "step": 35255 + }, + { + "epoch": 1.65, + "learning_rate": 1.4632326430799619e-05, + "loss": 0.1376, + "step": 35260 + }, + { + "epoch": 1.65, + "learning_rate": 1.4631542645744833e-05, + "loss": 0.1423, + "step": 35265 + }, + { + "epoch": 1.65, + "learning_rate": 1.4630758860690046e-05, + "loss": 0.0715, + "step": 35270 + }, + { + "epoch": 1.65, + "learning_rate": 1.4629975075635259e-05, + "loss": 0.2406, + "step": 35275 + }, + { + "epoch": 1.65, + "learning_rate": 1.4629191290580474e-05, + "loss": 0.2576, + "step": 35280 + }, + { + "epoch": 1.65, + "learning_rate": 1.4628407505525686e-05, + "loss": 0.1546, + "step": 35285 + }, + { + "epoch": 1.65, + "learning_rate": 1.4627623720470899e-05, + "loss": 0.0603, + "step": 35290 + }, + { + "epoch": 1.65, + "learning_rate": 1.4626839935416113e-05, + "loss": 0.0637, + "step": 35295 + }, + { + "epoch": 1.65, + "learning_rate": 1.4626056150361325e-05, + "loss": 0.0723, + "step": 35300 + }, + { + "epoch": 1.65, + "learning_rate": 1.462527236530654e-05, + "loss": 0.0931, + "step": 35305 + }, + { + "epoch": 1.65, + "learning_rate": 1.4624488580251753e-05, + "loss": 0.163, + "step": 35310 + }, + { + "epoch": 1.65, + "learning_rate": 1.4623704795196966e-05, + "loss": 0.2184, + "step": 35315 + }, + { + "epoch": 1.65, + "learning_rate": 1.4622921010142179e-05, + "loss": 0.0844, + "step": 35320 + }, + { + "epoch": 1.65, + "learning_rate": 1.4622137225087394e-05, + "loss": 0.2475, + "step": 35325 + }, + { + "epoch": 1.65, + "learning_rate": 1.4621353440032607e-05, + "loss": 0.3015, + "step": 35330 + }, + { + "epoch": 1.65, + "learning_rate": 1.462056965497782e-05, + "loss": 0.26, + "step": 35335 + }, + { + "epoch": 1.65, + "learning_rate": 1.4619785869923033e-05, + "loss": 0.0676, + "step": 35340 + }, + { + "epoch": 1.65, + "learning_rate": 1.4619002084868248e-05, + "loss": 0.0765, + "step": 35345 + }, + { + "epoch": 1.65, + "learning_rate": 1.461821829981346e-05, + "loss": 0.0856, + "step": 35350 + }, + { + "epoch": 1.65, + "learning_rate": 1.4617434514758673e-05, + "loss": 0.0801, + "step": 35355 + }, + { + "epoch": 1.65, + "learning_rate": 1.4616650729703887e-05, + "loss": 0.1053, + "step": 35360 + }, + { + "epoch": 1.65, + "learning_rate": 1.46158669446491e-05, + "loss": 0.1531, + "step": 35365 + }, + { + "epoch": 1.65, + "learning_rate": 1.4615083159594314e-05, + "loss": 0.2514, + "step": 35370 + }, + { + "epoch": 1.65, + "learning_rate": 1.4614299374539527e-05, + "loss": 0.1658, + "step": 35375 + }, + { + "epoch": 1.65, + "learning_rate": 1.4613515589484742e-05, + "loss": 0.3044, + "step": 35380 + }, + { + "epoch": 1.65, + "learning_rate": 1.4612731804429954e-05, + "loss": 0.2306, + "step": 35385 + }, + { + "epoch": 1.65, + "learning_rate": 1.4611948019375168e-05, + "loss": 0.0903, + "step": 35390 + }, + { + "epoch": 1.65, + "learning_rate": 1.461116423432038e-05, + "loss": 0.0519, + "step": 35395 + }, + { + "epoch": 1.65, + "learning_rate": 1.4610380449265596e-05, + "loss": 0.0563, + "step": 35400 + }, + { + "epoch": 1.65, + "learning_rate": 1.4609596664210808e-05, + "loss": 0.0959, + "step": 35405 + }, + { + "epoch": 1.65, + "learning_rate": 1.4608812879156022e-05, + "loss": 0.0844, + "step": 35410 + }, + { + "epoch": 1.65, + "learning_rate": 1.4608029094101234e-05, + "loss": 0.1355, + "step": 35415 + }, + { + "epoch": 1.65, + "learning_rate": 1.4607245309046447e-05, + "loss": 0.1931, + "step": 35420 + }, + { + "epoch": 1.65, + "learning_rate": 1.4606461523991662e-05, + "loss": 0.257, + "step": 35425 + }, + { + "epoch": 1.65, + "learning_rate": 1.4605677738936874e-05, + "loss": 0.5352, + "step": 35430 + }, + { + "epoch": 1.65, + "learning_rate": 1.4604893953882088e-05, + "loss": 0.3642, + "step": 35435 + }, + { + "epoch": 1.65, + "learning_rate": 1.46041101688273e-05, + "loss": 0.0393, + "step": 35440 + }, + { + "epoch": 1.65, + "learning_rate": 1.4603326383772516e-05, + "loss": 0.106, + "step": 35445 + }, + { + "epoch": 1.65, + "learning_rate": 1.4602542598717728e-05, + "loss": 0.1245, + "step": 35450 + }, + { + "epoch": 1.65, + "learning_rate": 1.4601758813662942e-05, + "loss": 0.1576, + "step": 35455 + }, + { + "epoch": 1.65, + "learning_rate": 1.4600975028608155e-05, + "loss": 0.1734, + "step": 35460 + }, + { + "epoch": 1.65, + "learning_rate": 1.460019124355337e-05, + "loss": 0.0837, + "step": 35465 + }, + { + "epoch": 1.66, + "learning_rate": 1.4599407458498582e-05, + "loss": 0.1933, + "step": 35470 + }, + { + "epoch": 1.66, + "learning_rate": 1.4598623673443796e-05, + "loss": 0.1611, + "step": 35475 + }, + { + "epoch": 1.66, + "learning_rate": 1.459783988838901e-05, + "loss": 0.2965, + "step": 35480 + }, + { + "epoch": 1.66, + "learning_rate": 1.4597056103334222e-05, + "loss": 0.314, + "step": 35485 + }, + { + "epoch": 1.66, + "learning_rate": 1.4596272318279436e-05, + "loss": 0.0355, + "step": 35490 + }, + { + "epoch": 1.66, + "learning_rate": 1.4595488533224648e-05, + "loss": 0.0476, + "step": 35495 + }, + { + "epoch": 1.66, + "learning_rate": 1.4594704748169864e-05, + "loss": 0.0686, + "step": 35500 + }, + { + "epoch": 1.66, + "learning_rate": 1.4593920963115076e-05, + "loss": 0.1746, + "step": 35505 + }, + { + "epoch": 1.66, + "learning_rate": 1.459313717806029e-05, + "loss": 0.1403, + "step": 35510 + }, + { + "epoch": 1.66, + "learning_rate": 1.4592353393005502e-05, + "loss": 0.1285, + "step": 35515 + }, + { + "epoch": 1.66, + "learning_rate": 1.4591569607950718e-05, + "loss": 0.252, + "step": 35520 + }, + { + "epoch": 1.66, + "learning_rate": 1.459078582289593e-05, + "loss": 0.2358, + "step": 35525 + }, + { + "epoch": 1.66, + "learning_rate": 1.4590002037841144e-05, + "loss": 0.2364, + "step": 35530 + }, + { + "epoch": 1.66, + "learning_rate": 1.4589218252786356e-05, + "loss": 0.2942, + "step": 35535 + }, + { + "epoch": 1.66, + "learning_rate": 1.4588434467731572e-05, + "loss": 0.0387, + "step": 35540 + }, + { + "epoch": 1.66, + "learning_rate": 1.4587650682676784e-05, + "loss": 0.1152, + "step": 35545 + }, + { + "epoch": 1.66, + "learning_rate": 1.4586866897621996e-05, + "loss": 0.0824, + "step": 35550 + }, + { + "epoch": 1.66, + "learning_rate": 1.458608311256721e-05, + "loss": 0.1106, + "step": 35555 + }, + { + "epoch": 1.66, + "learning_rate": 1.4585299327512422e-05, + "loss": 0.1017, + "step": 35560 + }, + { + "epoch": 1.66, + "learning_rate": 1.4584515542457638e-05, + "loss": 0.2116, + "step": 35565 + }, + { + "epoch": 1.66, + "learning_rate": 1.458373175740285e-05, + "loss": 0.2479, + "step": 35570 + }, + { + "epoch": 1.66, + "learning_rate": 1.4582947972348064e-05, + "loss": 0.2595, + "step": 35575 + }, + { + "epoch": 1.66, + "learning_rate": 1.4582164187293278e-05, + "loss": 0.3131, + "step": 35580 + }, + { + "epoch": 1.66, + "learning_rate": 1.4581380402238492e-05, + "loss": 0.2581, + "step": 35585 + }, + { + "epoch": 1.66, + "learning_rate": 1.4580596617183704e-05, + "loss": 0.0496, + "step": 35590 + }, + { + "epoch": 1.66, + "learning_rate": 1.457981283212892e-05, + "loss": 0.0473, + "step": 35595 + }, + { + "epoch": 1.66, + "learning_rate": 1.4579029047074132e-05, + "loss": 0.1001, + "step": 35600 + }, + { + "epoch": 1.66, + "learning_rate": 1.4578245262019346e-05, + "loss": 0.053, + "step": 35605 + }, + { + "epoch": 1.66, + "learning_rate": 1.4577461476964558e-05, + "loss": 0.1636, + "step": 35610 + }, + { + "epoch": 1.66, + "learning_rate": 1.457667769190977e-05, + "loss": 0.1338, + "step": 35615 + }, + { + "epoch": 1.66, + "learning_rate": 1.4575893906854986e-05, + "loss": 0.2236, + "step": 35620 + }, + { + "epoch": 1.66, + "learning_rate": 1.4575110121800198e-05, + "loss": 0.17, + "step": 35625 + }, + { + "epoch": 1.66, + "learning_rate": 1.4574326336745412e-05, + "loss": 0.3704, + "step": 35630 + }, + { + "epoch": 1.66, + "learning_rate": 1.4573542551690624e-05, + "loss": 0.1904, + "step": 35635 + }, + { + "epoch": 1.66, + "learning_rate": 1.457275876663584e-05, + "loss": 0.0506, + "step": 35640 + }, + { + "epoch": 1.66, + "learning_rate": 1.4571974981581052e-05, + "loss": 0.0974, + "step": 35645 + }, + { + "epoch": 1.66, + "learning_rate": 1.4571191196526266e-05, + "loss": 0.036, + "step": 35650 + }, + { + "epoch": 1.66, + "learning_rate": 1.4570407411471478e-05, + "loss": 0.1061, + "step": 35655 + }, + { + "epoch": 1.66, + "learning_rate": 1.4569623626416694e-05, + "loss": 0.1198, + "step": 35660 + }, + { + "epoch": 1.66, + "learning_rate": 1.4568839841361906e-05, + "loss": 0.244, + "step": 35665 + }, + { + "epoch": 1.66, + "learning_rate": 1.456805605630712e-05, + "loss": 0.1738, + "step": 35670 + }, + { + "epoch": 1.66, + "learning_rate": 1.4567272271252332e-05, + "loss": 0.1297, + "step": 35675 + }, + { + "epoch": 1.66, + "learning_rate": 1.4566488486197546e-05, + "loss": 0.3234, + "step": 35680 + }, + { + "epoch": 1.67, + "learning_rate": 1.456570470114276e-05, + "loss": 0.3038, + "step": 35685 + }, + { + "epoch": 1.67, + "learning_rate": 1.4564920916087972e-05, + "loss": 0.0216, + "step": 35690 + }, + { + "epoch": 1.67, + "learning_rate": 1.4564137131033188e-05, + "loss": 0.0839, + "step": 35695 + }, + { + "epoch": 1.67, + "learning_rate": 1.45633533459784e-05, + "loss": 0.0911, + "step": 35700 + }, + { + "epoch": 1.67, + "learning_rate": 1.4562569560923614e-05, + "loss": 0.1222, + "step": 35705 + }, + { + "epoch": 1.67, + "learning_rate": 1.4561785775868826e-05, + "loss": 0.0914, + "step": 35710 + }, + { + "epoch": 1.67, + "learning_rate": 1.4561001990814042e-05, + "loss": 0.1321, + "step": 35715 + }, + { + "epoch": 1.67, + "learning_rate": 1.4560218205759254e-05, + "loss": 0.1351, + "step": 35720 + }, + { + "epoch": 1.67, + "learning_rate": 1.4559434420704468e-05, + "loss": 0.2113, + "step": 35725 + }, + { + "epoch": 1.67, + "learning_rate": 1.455865063564968e-05, + "loss": 0.3217, + "step": 35730 + }, + { + "epoch": 1.67, + "learning_rate": 1.4557866850594896e-05, + "loss": 0.3432, + "step": 35735 + }, + { + "epoch": 1.67, + "learning_rate": 1.4557083065540108e-05, + "loss": 0.0595, + "step": 35740 + }, + { + "epoch": 1.67, + "learning_rate": 1.455629928048532e-05, + "loss": 0.0583, + "step": 35745 + }, + { + "epoch": 1.67, + "learning_rate": 1.4555515495430534e-05, + "loss": 0.0955, + "step": 35750 + }, + { + "epoch": 1.67, + "learning_rate": 1.4554731710375746e-05, + "loss": 0.0763, + "step": 35755 + }, + { + "epoch": 1.67, + "learning_rate": 1.4553947925320962e-05, + "loss": 0.159, + "step": 35760 + }, + { + "epoch": 1.67, + "learning_rate": 1.4553164140266174e-05, + "loss": 0.1699, + "step": 35765 + }, + { + "epoch": 1.67, + "learning_rate": 1.4552380355211388e-05, + "loss": 0.1226, + "step": 35770 + }, + { + "epoch": 1.67, + "learning_rate": 1.45515965701566e-05, + "loss": 0.233, + "step": 35775 + }, + { + "epoch": 1.67, + "learning_rate": 1.4550812785101816e-05, + "loss": 0.3687, + "step": 35780 + }, + { + "epoch": 1.67, + "learning_rate": 1.4550029000047028e-05, + "loss": 0.2873, + "step": 35785 + }, + { + "epoch": 1.67, + "learning_rate": 1.4549245214992242e-05, + "loss": 0.0251, + "step": 35790 + }, + { + "epoch": 1.67, + "learning_rate": 1.4548461429937456e-05, + "loss": 0.0768, + "step": 35795 + }, + { + "epoch": 1.67, + "learning_rate": 1.454767764488267e-05, + "loss": 0.1486, + "step": 35800 + }, + { + "epoch": 1.67, + "learning_rate": 1.4546893859827882e-05, + "loss": 0.1071, + "step": 35805 + }, + { + "epoch": 1.67, + "learning_rate": 1.4546110074773094e-05, + "loss": 0.115, + "step": 35810 + }, + { + "epoch": 1.67, + "learning_rate": 1.454532628971831e-05, + "loss": 0.1783, + "step": 35815 + }, + { + "epoch": 1.67, + "learning_rate": 1.4544542504663522e-05, + "loss": 0.1416, + "step": 35820 + }, + { + "epoch": 1.67, + "learning_rate": 1.4543758719608736e-05, + "loss": 0.2712, + "step": 35825 + }, + { + "epoch": 1.67, + "learning_rate": 1.4542974934553948e-05, + "loss": 0.3227, + "step": 35830 + }, + { + "epoch": 1.67, + "learning_rate": 1.4542191149499164e-05, + "loss": 0.3159, + "step": 35835 + }, + { + "epoch": 1.67, + "learning_rate": 1.4541407364444376e-05, + "loss": 0.0305, + "step": 35840 + }, + { + "epoch": 1.67, + "learning_rate": 1.454062357938959e-05, + "loss": 0.0921, + "step": 35845 + }, + { + "epoch": 1.67, + "learning_rate": 1.4539839794334802e-05, + "loss": 0.0471, + "step": 35850 + }, + { + "epoch": 1.67, + "learning_rate": 1.4539056009280017e-05, + "loss": 0.1261, + "step": 35855 + }, + { + "epoch": 1.67, + "learning_rate": 1.453827222422523e-05, + "loss": 0.1288, + "step": 35860 + }, + { + "epoch": 1.67, + "learning_rate": 1.4537488439170444e-05, + "loss": 0.1459, + "step": 35865 + }, + { + "epoch": 1.67, + "learning_rate": 1.4536704654115656e-05, + "loss": 0.141, + "step": 35870 + }, + { + "epoch": 1.67, + "learning_rate": 1.4535920869060868e-05, + "loss": 0.2848, + "step": 35875 + }, + { + "epoch": 1.67, + "learning_rate": 1.4535137084006084e-05, + "loss": 0.3458, + "step": 35880 + }, + { + "epoch": 1.67, + "learning_rate": 1.4534353298951296e-05, + "loss": 0.4117, + "step": 35885 + }, + { + "epoch": 1.67, + "learning_rate": 1.453356951389651e-05, + "loss": 0.0384, + "step": 35890 + }, + { + "epoch": 1.67, + "learning_rate": 1.4532785728841724e-05, + "loss": 0.1041, + "step": 35895 + }, + { + "epoch": 1.68, + "learning_rate": 1.4532001943786938e-05, + "loss": 0.1218, + "step": 35900 + }, + { + "epoch": 1.68, + "learning_rate": 1.453121815873215e-05, + "loss": 0.1158, + "step": 35905 + }, + { + "epoch": 1.68, + "learning_rate": 1.4530434373677365e-05, + "loss": 0.0852, + "step": 35910 + }, + { + "epoch": 1.68, + "learning_rate": 1.4529650588622578e-05, + "loss": 0.081, + "step": 35915 + }, + { + "epoch": 1.68, + "learning_rate": 1.4528866803567791e-05, + "loss": 0.198, + "step": 35920 + }, + { + "epoch": 1.68, + "learning_rate": 1.4528083018513004e-05, + "loss": 0.2399, + "step": 35925 + }, + { + "epoch": 1.68, + "learning_rate": 1.452729923345822e-05, + "loss": 0.2144, + "step": 35930 + }, + { + "epoch": 1.68, + "learning_rate": 1.4526515448403432e-05, + "loss": 0.2149, + "step": 35935 + }, + { + "epoch": 1.68, + "learning_rate": 1.4525731663348644e-05, + "loss": 0.0402, + "step": 35940 + }, + { + "epoch": 1.68, + "learning_rate": 1.4524947878293858e-05, + "loss": 0.0718, + "step": 35945 + }, + { + "epoch": 1.68, + "learning_rate": 1.452416409323907e-05, + "loss": 0.0818, + "step": 35950 + }, + { + "epoch": 1.68, + "learning_rate": 1.4523380308184285e-05, + "loss": 0.0695, + "step": 35955 + }, + { + "epoch": 1.68, + "learning_rate": 1.4522596523129498e-05, + "loss": 0.0995, + "step": 35960 + }, + { + "epoch": 1.68, + "learning_rate": 1.4521812738074712e-05, + "loss": 0.1538, + "step": 35965 + }, + { + "epoch": 1.68, + "learning_rate": 1.4521028953019924e-05, + "loss": 0.2184, + "step": 35970 + }, + { + "epoch": 1.68, + "learning_rate": 1.452024516796514e-05, + "loss": 0.1701, + "step": 35975 + }, + { + "epoch": 1.68, + "learning_rate": 1.4519461382910352e-05, + "loss": 0.401, + "step": 35980 + }, + { + "epoch": 1.68, + "learning_rate": 1.4518677597855565e-05, + "loss": 0.3014, + "step": 35985 + }, + { + "epoch": 1.68, + "learning_rate": 1.4517893812800778e-05, + "loss": 0.047, + "step": 35990 + }, + { + "epoch": 1.68, + "learning_rate": 1.4517110027745993e-05, + "loss": 0.0691, + "step": 35995 + }, + { + "epoch": 1.68, + "learning_rate": 1.4516326242691206e-05, + "loss": 0.1155, + "step": 36000 + }, + { + "epoch": 1.68, + "learning_rate": 1.4515542457636418e-05, + "loss": 0.1304, + "step": 36005 + }, + { + "epoch": 1.68, + "learning_rate": 1.4514758672581633e-05, + "loss": 0.1563, + "step": 36010 + }, + { + "epoch": 1.68, + "learning_rate": 1.4513974887526846e-05, + "loss": 0.1016, + "step": 36015 + }, + { + "epoch": 1.68, + "learning_rate": 1.451319110247206e-05, + "loss": 0.186, + "step": 36020 + }, + { + "epoch": 1.68, + "learning_rate": 1.4512407317417272e-05, + "loss": 0.203, + "step": 36025 + }, + { + "epoch": 1.68, + "learning_rate": 1.4511623532362487e-05, + "loss": 0.3696, + "step": 36030 + }, + { + "epoch": 1.68, + "learning_rate": 1.45108397473077e-05, + "loss": 0.2746, + "step": 36035 + }, + { + "epoch": 1.68, + "learning_rate": 1.4510055962252913e-05, + "loss": 0.0431, + "step": 36040 + }, + { + "epoch": 1.68, + "learning_rate": 1.4509272177198126e-05, + "loss": 0.0458, + "step": 36045 + }, + { + "epoch": 1.68, + "learning_rate": 1.4508488392143341e-05, + "loss": 0.068, + "step": 36050 + }, + { + "epoch": 1.68, + "learning_rate": 1.4507704607088553e-05, + "loss": 0.156, + "step": 36055 + }, + { + "epoch": 1.68, + "learning_rate": 1.4506920822033767e-05, + "loss": 0.0859, + "step": 36060 + }, + { + "epoch": 1.68, + "learning_rate": 1.450613703697898e-05, + "loss": 0.1969, + "step": 36065 + }, + { + "epoch": 1.68, + "learning_rate": 1.4505353251924192e-05, + "loss": 0.1335, + "step": 36070 + }, + { + "epoch": 1.68, + "learning_rate": 1.4504569466869407e-05, + "loss": 0.2158, + "step": 36075 + }, + { + "epoch": 1.68, + "learning_rate": 1.450378568181462e-05, + "loss": 0.2714, + "step": 36080 + }, + { + "epoch": 1.68, + "learning_rate": 1.4503001896759833e-05, + "loss": 0.2969, + "step": 36085 + }, + { + "epoch": 1.68, + "learning_rate": 1.4502218111705046e-05, + "loss": 0.0216, + "step": 36090 + }, + { + "epoch": 1.68, + "learning_rate": 1.4501434326650261e-05, + "loss": 0.0438, + "step": 36095 + }, + { + "epoch": 1.68, + "learning_rate": 1.4500650541595473e-05, + "loss": 0.0988, + "step": 36100 + }, + { + "epoch": 1.68, + "learning_rate": 1.4499866756540687e-05, + "loss": 0.0888, + "step": 36105 + }, + { + "epoch": 1.68, + "learning_rate": 1.4499082971485901e-05, + "loss": 0.0665, + "step": 36110 + }, + { + "epoch": 1.69, + "learning_rate": 1.4498299186431115e-05, + "loss": 0.1506, + "step": 36115 + }, + { + "epoch": 1.69, + "learning_rate": 1.4497515401376327e-05, + "loss": 0.1244, + "step": 36120 + }, + { + "epoch": 1.69, + "learning_rate": 1.4496731616321541e-05, + "loss": 0.1407, + "step": 36125 + }, + { + "epoch": 1.69, + "learning_rate": 1.4495947831266755e-05, + "loss": 0.389, + "step": 36130 + }, + { + "epoch": 1.69, + "learning_rate": 1.4495164046211967e-05, + "loss": 0.2841, + "step": 36135 + }, + { + "epoch": 1.69, + "learning_rate": 1.4494380261157181e-05, + "loss": 0.0355, + "step": 36140 + }, + { + "epoch": 1.69, + "learning_rate": 1.4493596476102394e-05, + "loss": 0.1308, + "step": 36145 + }, + { + "epoch": 1.69, + "learning_rate": 1.4492812691047609e-05, + "loss": 0.0888, + "step": 36150 + }, + { + "epoch": 1.69, + "learning_rate": 1.4492028905992821e-05, + "loss": 0.1226, + "step": 36155 + }, + { + "epoch": 1.69, + "learning_rate": 1.4491245120938035e-05, + "loss": 0.1033, + "step": 36160 + }, + { + "epoch": 1.69, + "learning_rate": 1.4490461335883247e-05, + "loss": 0.1986, + "step": 36165 + }, + { + "epoch": 1.69, + "learning_rate": 1.4489677550828463e-05, + "loss": 0.1476, + "step": 36170 + }, + { + "epoch": 1.69, + "learning_rate": 1.4488893765773675e-05, + "loss": 0.3752, + "step": 36175 + }, + { + "epoch": 1.69, + "learning_rate": 1.448810998071889e-05, + "loss": 0.3593, + "step": 36180 + }, + { + "epoch": 1.69, + "learning_rate": 1.4487326195664101e-05, + "loss": 0.3019, + "step": 36185 + }, + { + "epoch": 1.69, + "learning_rate": 1.4486542410609317e-05, + "loss": 0.0502, + "step": 36190 + }, + { + "epoch": 1.69, + "learning_rate": 1.448575862555453e-05, + "loss": 0.0869, + "step": 36195 + }, + { + "epoch": 1.69, + "learning_rate": 1.4484974840499741e-05, + "loss": 0.0769, + "step": 36200 + }, + { + "epoch": 1.69, + "learning_rate": 1.4484191055444955e-05, + "loss": 0.0761, + "step": 36205 + }, + { + "epoch": 1.69, + "learning_rate": 1.448340727039017e-05, + "loss": 0.1277, + "step": 36210 + }, + { + "epoch": 1.69, + "learning_rate": 1.4482623485335383e-05, + "loss": 0.1541, + "step": 36215 + }, + { + "epoch": 1.69, + "learning_rate": 1.4481839700280595e-05, + "loss": 0.1727, + "step": 36220 + }, + { + "epoch": 1.69, + "learning_rate": 1.4481055915225811e-05, + "loss": 0.2071, + "step": 36225 + }, + { + "epoch": 1.69, + "learning_rate": 1.4480272130171023e-05, + "loss": 0.2472, + "step": 36230 + }, + { + "epoch": 1.69, + "learning_rate": 1.4479488345116237e-05, + "loss": 0.1617, + "step": 36235 + }, + { + "epoch": 1.69, + "learning_rate": 1.447870456006145e-05, + "loss": 0.0486, + "step": 36240 + }, + { + "epoch": 1.69, + "learning_rate": 1.4477920775006665e-05, + "loss": 0.029, + "step": 36245 + }, + { + "epoch": 1.69, + "learning_rate": 1.4477136989951877e-05, + "loss": 0.0703, + "step": 36250 + }, + { + "epoch": 1.69, + "learning_rate": 1.4476353204897091e-05, + "loss": 0.0847, + "step": 36255 + }, + { + "epoch": 1.69, + "learning_rate": 1.4475569419842303e-05, + "loss": 0.1484, + "step": 36260 + }, + { + "epoch": 1.69, + "learning_rate": 1.4474785634787515e-05, + "loss": 0.1272, + "step": 36265 + }, + { + "epoch": 1.69, + "learning_rate": 1.4474001849732731e-05, + "loss": 0.1129, + "step": 36270 + }, + { + "epoch": 1.69, + "learning_rate": 1.4473218064677943e-05, + "loss": 0.1698, + "step": 36275 + }, + { + "epoch": 1.69, + "learning_rate": 1.4472434279623157e-05, + "loss": 0.2721, + "step": 36280 + }, + { + "epoch": 1.69, + "learning_rate": 1.447165049456837e-05, + "loss": 0.2507, + "step": 36285 + }, + { + "epoch": 1.69, + "learning_rate": 1.4470866709513585e-05, + "loss": 0.1443, + "step": 36290 + }, + { + "epoch": 1.69, + "learning_rate": 1.4470082924458797e-05, + "loss": 0.0702, + "step": 36295 + }, + { + "epoch": 1.69, + "learning_rate": 1.4469299139404011e-05, + "loss": 0.0486, + "step": 36300 + }, + { + "epoch": 1.69, + "learning_rate": 1.4468515354349223e-05, + "loss": 0.0962, + "step": 36305 + }, + { + "epoch": 1.69, + "learning_rate": 1.4467731569294439e-05, + "loss": 0.1491, + "step": 36310 + }, + { + "epoch": 1.69, + "learning_rate": 1.4466947784239651e-05, + "loss": 0.1473, + "step": 36315 + }, + { + "epoch": 1.69, + "learning_rate": 1.4466163999184865e-05, + "loss": 0.1861, + "step": 36320 + }, + { + "epoch": 1.69, + "learning_rate": 1.4465380214130079e-05, + "loss": 0.2508, + "step": 36325 + }, + { + "epoch": 1.7, + "learning_rate": 1.4464596429075291e-05, + "loss": 0.1919, + "step": 36330 + }, + { + "epoch": 1.7, + "learning_rate": 1.4463812644020505e-05, + "loss": 0.329, + "step": 36335 + }, + { + "epoch": 1.7, + "learning_rate": 1.4463028858965717e-05, + "loss": 0.0241, + "step": 36340 + }, + { + "epoch": 1.7, + "learning_rate": 1.4462245073910933e-05, + "loss": 0.0493, + "step": 36345 + }, + { + "epoch": 1.7, + "learning_rate": 1.4461461288856145e-05, + "loss": 0.0885, + "step": 36350 + }, + { + "epoch": 1.7, + "learning_rate": 1.4460677503801359e-05, + "loss": 0.0676, + "step": 36355 + }, + { + "epoch": 1.7, + "learning_rate": 1.4459893718746571e-05, + "loss": 0.138, + "step": 36360 + }, + { + "epoch": 1.7, + "learning_rate": 1.4459109933691787e-05, + "loss": 0.1602, + "step": 36365 + }, + { + "epoch": 1.7, + "learning_rate": 1.4458326148636999e-05, + "loss": 0.1771, + "step": 36370 + }, + { + "epoch": 1.7, + "learning_rate": 1.4457542363582213e-05, + "loss": 0.207, + "step": 36375 + }, + { + "epoch": 1.7, + "learning_rate": 1.4456758578527425e-05, + "loss": 0.3872, + "step": 36380 + }, + { + "epoch": 1.7, + "learning_rate": 1.445597479347264e-05, + "loss": 0.4241, + "step": 36385 + }, + { + "epoch": 1.7, + "learning_rate": 1.4455191008417853e-05, + "loss": 0.0311, + "step": 36390 + }, + { + "epoch": 1.7, + "learning_rate": 1.4454407223363065e-05, + "loss": 0.0573, + "step": 36395 + }, + { + "epoch": 1.7, + "learning_rate": 1.4453623438308279e-05, + "loss": 0.0677, + "step": 36400 + }, + { + "epoch": 1.7, + "learning_rate": 1.4452839653253491e-05, + "loss": 0.1657, + "step": 36405 + }, + { + "epoch": 1.7, + "learning_rate": 1.4452055868198707e-05, + "loss": 0.0953, + "step": 36410 + }, + { + "epoch": 1.7, + "learning_rate": 1.4451272083143919e-05, + "loss": 0.1371, + "step": 36415 + }, + { + "epoch": 1.7, + "learning_rate": 1.4450488298089133e-05, + "loss": 0.1549, + "step": 36420 + }, + { + "epoch": 1.7, + "learning_rate": 1.4449704513034347e-05, + "loss": 0.1376, + "step": 36425 + }, + { + "epoch": 1.7, + "learning_rate": 1.444892072797956e-05, + "loss": 0.3851, + "step": 36430 + }, + { + "epoch": 1.7, + "learning_rate": 1.4448136942924773e-05, + "loss": 0.2807, + "step": 36435 + }, + { + "epoch": 1.7, + "learning_rate": 1.4447353157869989e-05, + "loss": 0.0163, + "step": 36440 + }, + { + "epoch": 1.7, + "learning_rate": 1.44465693728152e-05, + "loss": 0.0581, + "step": 36445 + }, + { + "epoch": 1.7, + "learning_rate": 1.4445785587760415e-05, + "loss": 0.056, + "step": 36450 + }, + { + "epoch": 1.7, + "learning_rate": 1.4445001802705627e-05, + "loss": 0.1045, + "step": 36455 + }, + { + "epoch": 1.7, + "learning_rate": 1.4444218017650839e-05, + "loss": 0.0543, + "step": 36460 + }, + { + "epoch": 1.7, + "learning_rate": 1.4443434232596055e-05, + "loss": 0.1655, + "step": 36465 + }, + { + "epoch": 1.7, + "learning_rate": 1.4442807204552225e-05, + "loss": 0.1582, + "step": 36470 + }, + { + "epoch": 1.7, + "learning_rate": 1.4442023419497437e-05, + "loss": 0.2369, + "step": 36475 + }, + { + "epoch": 1.7, + "learning_rate": 1.4441239634442651e-05, + "loss": 0.2963, + "step": 36480 + }, + { + "epoch": 1.7, + "learning_rate": 1.4440455849387865e-05, + "loss": 0.2817, + "step": 36485 + }, + { + "epoch": 1.7, + "learning_rate": 1.4439672064333079e-05, + "loss": 0.0662, + "step": 36490 + }, + { + "epoch": 1.7, + "learning_rate": 1.4438888279278291e-05, + "loss": 0.0917, + "step": 36495 + }, + { + "epoch": 1.7, + "learning_rate": 1.4438104494223505e-05, + "loss": 0.0448, + "step": 36500 + }, + { + "epoch": 1.7, + "learning_rate": 1.4437320709168719e-05, + "loss": 0.1574, + "step": 36505 + }, + { + "epoch": 1.7, + "learning_rate": 1.4436536924113933e-05, + "loss": 0.1734, + "step": 36510 + }, + { + "epoch": 1.7, + "learning_rate": 1.4435753139059145e-05, + "loss": 0.1669, + "step": 36515 + }, + { + "epoch": 1.7, + "learning_rate": 1.443496935400436e-05, + "loss": 0.1831, + "step": 36520 + }, + { + "epoch": 1.7, + "learning_rate": 1.4434185568949573e-05, + "loss": 0.1776, + "step": 36525 + }, + { + "epoch": 1.7, + "learning_rate": 1.4433401783894785e-05, + "loss": 0.3592, + "step": 36530 + }, + { + "epoch": 1.7, + "learning_rate": 1.4432617998839999e-05, + "loss": 0.2814, + "step": 36535 + }, + { + "epoch": 1.71, + "learning_rate": 1.4431834213785211e-05, + "loss": 0.0311, + "step": 36540 + }, + { + "epoch": 1.71, + "learning_rate": 1.4431050428730427e-05, + "loss": 0.091, + "step": 36545 + }, + { + "epoch": 1.71, + "learning_rate": 1.443026664367564e-05, + "loss": 0.0954, + "step": 36550 + }, + { + "epoch": 1.71, + "learning_rate": 1.4429482858620853e-05, + "loss": 0.102, + "step": 36555 + }, + { + "epoch": 1.71, + "learning_rate": 1.4428699073566065e-05, + "loss": 0.1249, + "step": 36560 + }, + { + "epoch": 1.71, + "learning_rate": 1.4427915288511281e-05, + "loss": 0.1781, + "step": 36565 + }, + { + "epoch": 1.71, + "learning_rate": 1.4427131503456493e-05, + "loss": 0.122, + "step": 36570 + }, + { + "epoch": 1.71, + "learning_rate": 1.4426347718401707e-05, + "loss": 0.2721, + "step": 36575 + }, + { + "epoch": 1.71, + "learning_rate": 1.442556393334692e-05, + "loss": 0.3878, + "step": 36580 + }, + { + "epoch": 1.71, + "learning_rate": 1.4424780148292135e-05, + "loss": 0.2806, + "step": 36585 + }, + { + "epoch": 1.71, + "learning_rate": 1.4423996363237347e-05, + "loss": 0.0162, + "step": 36590 + }, + { + "epoch": 1.71, + "learning_rate": 1.442321257818256e-05, + "loss": 0.1438, + "step": 36595 + }, + { + "epoch": 1.71, + "learning_rate": 1.4422428793127775e-05, + "loss": 0.0547, + "step": 36600 + }, + { + "epoch": 1.71, + "learning_rate": 1.4421645008072987e-05, + "loss": 0.0748, + "step": 36605 + }, + { + "epoch": 1.71, + "learning_rate": 1.4420861223018201e-05, + "loss": 0.071, + "step": 36610 + }, + { + "epoch": 1.71, + "learning_rate": 1.4420077437963413e-05, + "loss": 0.0877, + "step": 36615 + }, + { + "epoch": 1.71, + "learning_rate": 1.4419293652908629e-05, + "loss": 0.0916, + "step": 36620 + }, + { + "epoch": 1.71, + "learning_rate": 1.4418509867853841e-05, + "loss": 0.29, + "step": 36625 + }, + { + "epoch": 1.71, + "learning_rate": 1.4417726082799055e-05, + "loss": 0.3652, + "step": 36630 + }, + { + "epoch": 1.71, + "learning_rate": 1.4416942297744267e-05, + "loss": 0.3349, + "step": 36635 + }, + { + "epoch": 1.71, + "learning_rate": 1.4416158512689483e-05, + "loss": 0.0536, + "step": 36640 + }, + { + "epoch": 1.71, + "learning_rate": 1.4415374727634695e-05, + "loss": 0.0458, + "step": 36645 + }, + { + "epoch": 1.71, + "learning_rate": 1.4414590942579909e-05, + "loss": 0.085, + "step": 36650 + }, + { + "epoch": 1.71, + "learning_rate": 1.4413807157525121e-05, + "loss": 0.0993, + "step": 36655 + }, + { + "epoch": 1.71, + "learning_rate": 1.4413023372470333e-05, + "loss": 0.1099, + "step": 36660 + }, + { + "epoch": 1.71, + "learning_rate": 1.4412239587415549e-05, + "loss": 0.112, + "step": 36665 + }, + { + "epoch": 1.71, + "learning_rate": 1.4411455802360761e-05, + "loss": 0.2057, + "step": 36670 + }, + { + "epoch": 1.71, + "learning_rate": 1.4410672017305975e-05, + "loss": 0.3171, + "step": 36675 + }, + { + "epoch": 1.71, + "learning_rate": 1.4409888232251187e-05, + "loss": 0.296, + "step": 36680 + }, + { + "epoch": 1.71, + "learning_rate": 1.4409104447196403e-05, + "loss": 0.2658, + "step": 36685 + }, + { + "epoch": 1.71, + "learning_rate": 1.4408320662141615e-05, + "loss": 0.0218, + "step": 36690 + }, + { + "epoch": 1.71, + "learning_rate": 1.4407536877086829e-05, + "loss": 0.0514, + "step": 36695 + }, + { + "epoch": 1.71, + "learning_rate": 1.4406753092032043e-05, + "loss": 0.081, + "step": 36700 + }, + { + "epoch": 1.71, + "learning_rate": 1.4405969306977257e-05, + "loss": 0.0679, + "step": 36705 + }, + { + "epoch": 1.71, + "learning_rate": 1.4405185521922469e-05, + "loss": 0.1526, + "step": 36710 + }, + { + "epoch": 1.71, + "learning_rate": 1.4404401736867683e-05, + "loss": 0.1244, + "step": 36715 + }, + { + "epoch": 1.71, + "learning_rate": 1.4403617951812897e-05, + "loss": 0.1967, + "step": 36720 + }, + { + "epoch": 1.71, + "learning_rate": 1.4402834166758109e-05, + "loss": 0.2807, + "step": 36725 + }, + { + "epoch": 1.71, + "learning_rate": 1.4402050381703323e-05, + "loss": 0.3164, + "step": 36730 + }, + { + "epoch": 1.71, + "learning_rate": 1.4401266596648535e-05, + "loss": 0.2543, + "step": 36735 + }, + { + "epoch": 1.71, + "learning_rate": 1.440048281159375e-05, + "loss": 0.0474, + "step": 36740 + }, + { + "epoch": 1.71, + "learning_rate": 1.4399699026538963e-05, + "loss": 0.0358, + "step": 36745 + }, + { + "epoch": 1.71, + "learning_rate": 1.4398915241484177e-05, + "loss": 0.0577, + "step": 36750 + }, + { + "epoch": 1.72, + "learning_rate": 1.4398131456429389e-05, + "loss": 0.1247, + "step": 36755 + }, + { + "epoch": 1.72, + "learning_rate": 1.4397347671374605e-05, + "loss": 0.0912, + "step": 36760 + }, + { + "epoch": 1.72, + "learning_rate": 1.4396563886319817e-05, + "loss": 0.1512, + "step": 36765 + }, + { + "epoch": 1.72, + "learning_rate": 1.439578010126503e-05, + "loss": 0.187, + "step": 36770 + }, + { + "epoch": 1.72, + "learning_rate": 1.4394996316210243e-05, + "loss": 0.2044, + "step": 36775 + }, + { + "epoch": 1.72, + "learning_rate": 1.4394212531155459e-05, + "loss": 0.2219, + "step": 36780 + }, + { + "epoch": 1.72, + "learning_rate": 1.439342874610067e-05, + "loss": 0.3031, + "step": 36785 + }, + { + "epoch": 1.72, + "learning_rate": 1.4392644961045883e-05, + "loss": 0.0566, + "step": 36790 + }, + { + "epoch": 1.72, + "learning_rate": 1.4391861175991097e-05, + "loss": 0.05, + "step": 36795 + }, + { + "epoch": 1.72, + "learning_rate": 1.439107739093631e-05, + "loss": 0.1119, + "step": 36800 + }, + { + "epoch": 1.72, + "learning_rate": 1.4390293605881525e-05, + "loss": 0.1107, + "step": 36805 + }, + { + "epoch": 1.72, + "learning_rate": 1.4389509820826737e-05, + "loss": 0.198, + "step": 36810 + }, + { + "epoch": 1.72, + "learning_rate": 1.438872603577195e-05, + "loss": 0.1943, + "step": 36815 + }, + { + "epoch": 1.72, + "learning_rate": 1.4387942250717165e-05, + "loss": 0.1967, + "step": 36820 + }, + { + "epoch": 1.72, + "learning_rate": 1.4387158465662379e-05, + "loss": 0.1664, + "step": 36825 + }, + { + "epoch": 1.72, + "learning_rate": 1.438637468060759e-05, + "loss": 0.3422, + "step": 36830 + }, + { + "epoch": 1.72, + "learning_rate": 1.4385590895552806e-05, + "loss": 0.2327, + "step": 36835 + }, + { + "epoch": 1.72, + "learning_rate": 1.4384807110498019e-05, + "loss": 0.0715, + "step": 36840 + }, + { + "epoch": 1.72, + "learning_rate": 1.4384023325443233e-05, + "loss": 0.0976, + "step": 36845 + }, + { + "epoch": 1.72, + "learning_rate": 1.4383239540388445e-05, + "loss": 0.0848, + "step": 36850 + }, + { + "epoch": 1.72, + "learning_rate": 1.4382455755333657e-05, + "loss": 0.0524, + "step": 36855 + }, + { + "epoch": 1.72, + "learning_rate": 1.4381671970278873e-05, + "loss": 0.1459, + "step": 36860 + }, + { + "epoch": 1.72, + "learning_rate": 1.4380888185224085e-05, + "loss": 0.1363, + "step": 36865 + }, + { + "epoch": 1.72, + "learning_rate": 1.4380104400169299e-05, + "loss": 0.1704, + "step": 36870 + }, + { + "epoch": 1.72, + "learning_rate": 1.4379320615114511e-05, + "loss": 0.3359, + "step": 36875 + }, + { + "epoch": 1.72, + "learning_rate": 1.4378536830059726e-05, + "loss": 0.4377, + "step": 36880 + }, + { + "epoch": 1.72, + "learning_rate": 1.4377753045004939e-05, + "loss": 0.1847, + "step": 36885 + }, + { + "epoch": 1.72, + "learning_rate": 1.4376969259950153e-05, + "loss": 0.0775, + "step": 36890 + }, + { + "epoch": 1.72, + "learning_rate": 1.4376185474895365e-05, + "loss": 0.0722, + "step": 36895 + }, + { + "epoch": 1.72, + "learning_rate": 1.437540168984058e-05, + "loss": 0.106, + "step": 36900 + }, + { + "epoch": 1.72, + "learning_rate": 1.4374617904785793e-05, + "loss": 0.1095, + "step": 36905 + }, + { + "epoch": 1.72, + "learning_rate": 1.4373834119731007e-05, + "loss": 0.1045, + "step": 36910 + }, + { + "epoch": 1.72, + "learning_rate": 1.437305033467622e-05, + "loss": 0.1643, + "step": 36915 + }, + { + "epoch": 1.72, + "learning_rate": 1.4372266549621433e-05, + "loss": 0.1888, + "step": 36920 + }, + { + "epoch": 1.72, + "learning_rate": 1.4371482764566647e-05, + "loss": 0.1461, + "step": 36925 + }, + { + "epoch": 1.72, + "learning_rate": 1.4370698979511859e-05, + "loss": 0.36, + "step": 36930 + }, + { + "epoch": 1.72, + "learning_rate": 1.4369915194457074e-05, + "loss": 0.1914, + "step": 36935 + }, + { + "epoch": 1.72, + "learning_rate": 1.4369131409402287e-05, + "loss": 0.0292, + "step": 36940 + }, + { + "epoch": 1.72, + "learning_rate": 1.43683476243475e-05, + "loss": 0.0758, + "step": 36945 + }, + { + "epoch": 1.72, + "learning_rate": 1.4367563839292713e-05, + "loss": 0.074, + "step": 36950 + }, + { + "epoch": 1.72, + "learning_rate": 1.4366780054237928e-05, + "loss": 0.1499, + "step": 36955 + }, + { + "epoch": 1.72, + "learning_rate": 1.436599626918314e-05, + "loss": 0.1167, + "step": 36960 + }, + { + "epoch": 1.72, + "learning_rate": 1.4365212484128354e-05, + "loss": 0.0796, + "step": 36965 + }, + { + "epoch": 1.73, + "learning_rate": 1.4364428699073567e-05, + "loss": 0.0926, + "step": 36970 + }, + { + "epoch": 1.73, + "learning_rate": 1.4363644914018782e-05, + "loss": 0.2108, + "step": 36975 + }, + { + "epoch": 1.73, + "learning_rate": 1.4362861128963994e-05, + "loss": 0.2106, + "step": 36980 + }, + { + "epoch": 1.73, + "learning_rate": 1.4362077343909207e-05, + "loss": 0.2872, + "step": 36985 + }, + { + "epoch": 1.73, + "learning_rate": 1.436129355885442e-05, + "loss": 0.0477, + "step": 36990 + }, + { + "epoch": 1.73, + "learning_rate": 1.4360509773799633e-05, + "loss": 0.0257, + "step": 36995 + }, + { + "epoch": 1.73, + "learning_rate": 1.4359725988744848e-05, + "loss": 0.1252, + "step": 37000 + }, + { + "epoch": 1.73, + "learning_rate": 1.435894220369006e-05, + "loss": 0.1321, + "step": 37005 + }, + { + "epoch": 1.73, + "learning_rate": 1.4358158418635274e-05, + "loss": 0.1789, + "step": 37010 + }, + { + "epoch": 1.73, + "learning_rate": 1.4357374633580488e-05, + "loss": 0.1004, + "step": 37015 + }, + { + "epoch": 1.73, + "learning_rate": 1.4356590848525702e-05, + "loss": 0.1946, + "step": 37020 + }, + { + "epoch": 1.73, + "learning_rate": 1.4355807063470914e-05, + "loss": 0.1881, + "step": 37025 + }, + { + "epoch": 1.73, + "learning_rate": 1.4355023278416128e-05, + "loss": 0.3375, + "step": 37030 + }, + { + "epoch": 1.73, + "learning_rate": 1.4354239493361342e-05, + "loss": 0.2199, + "step": 37035 + }, + { + "epoch": 1.73, + "learning_rate": 1.4353455708306556e-05, + "loss": 0.0575, + "step": 37040 + }, + { + "epoch": 1.73, + "learning_rate": 1.4352671923251768e-05, + "loss": 0.0641, + "step": 37045 + }, + { + "epoch": 1.73, + "learning_rate": 1.435188813819698e-05, + "loss": 0.1354, + "step": 37050 + }, + { + "epoch": 1.73, + "learning_rate": 1.4351104353142196e-05, + "loss": 0.0793, + "step": 37055 + }, + { + "epoch": 1.73, + "learning_rate": 1.4350320568087408e-05, + "loss": 0.0634, + "step": 37060 + }, + { + "epoch": 1.73, + "learning_rate": 1.4349536783032622e-05, + "loss": 0.1773, + "step": 37065 + }, + { + "epoch": 1.73, + "learning_rate": 1.4348752997977835e-05, + "loss": 0.1752, + "step": 37070 + }, + { + "epoch": 1.73, + "learning_rate": 1.434796921292305e-05, + "loss": 0.1668, + "step": 37075 + }, + { + "epoch": 1.73, + "learning_rate": 1.4347185427868262e-05, + "loss": 0.3402, + "step": 37080 + }, + { + "epoch": 1.73, + "learning_rate": 1.4346401642813476e-05, + "loss": 0.2596, + "step": 37085 + }, + { + "epoch": 1.73, + "learning_rate": 1.4345617857758688e-05, + "loss": 0.0693, + "step": 37090 + }, + { + "epoch": 1.73, + "learning_rate": 1.4344834072703904e-05, + "loss": 0.0821, + "step": 37095 + }, + { + "epoch": 1.73, + "learning_rate": 1.4344050287649116e-05, + "loss": 0.0515, + "step": 37100 + }, + { + "epoch": 1.73, + "learning_rate": 1.434326650259433e-05, + "loss": 0.0693, + "step": 37105 + }, + { + "epoch": 1.73, + "learning_rate": 1.4342482717539542e-05, + "loss": 0.0675, + "step": 37110 + }, + { + "epoch": 1.73, + "learning_rate": 1.4341698932484756e-05, + "loss": 0.0614, + "step": 37115 + }, + { + "epoch": 1.73, + "learning_rate": 1.434091514742997e-05, + "loss": 0.1118, + "step": 37120 + }, + { + "epoch": 1.73, + "learning_rate": 1.4340131362375182e-05, + "loss": 0.1841, + "step": 37125 + }, + { + "epoch": 1.73, + "learning_rate": 1.4339347577320396e-05, + "loss": 0.4163, + "step": 37130 + }, + { + "epoch": 1.73, + "learning_rate": 1.433856379226561e-05, + "loss": 0.2525, + "step": 37135 + }, + { + "epoch": 1.73, + "learning_rate": 1.4337780007210824e-05, + "loss": 0.0489, + "step": 37140 + }, + { + "epoch": 1.73, + "learning_rate": 1.4336996222156036e-05, + "loss": 0.0879, + "step": 37145 + }, + { + "epoch": 1.73, + "learning_rate": 1.4336212437101252e-05, + "loss": 0.0851, + "step": 37150 + }, + { + "epoch": 1.73, + "learning_rate": 1.4335428652046464e-05, + "loss": 0.093, + "step": 37155 + }, + { + "epoch": 1.73, + "learning_rate": 1.4334644866991678e-05, + "loss": 0.161, + "step": 37160 + }, + { + "epoch": 1.73, + "learning_rate": 1.433386108193689e-05, + "loss": 0.1694, + "step": 37165 + }, + { + "epoch": 1.73, + "learning_rate": 1.4333077296882106e-05, + "loss": 0.17, + "step": 37170 + }, + { + "epoch": 1.73, + "learning_rate": 1.4332293511827318e-05, + "loss": 0.2063, + "step": 37175 + }, + { + "epoch": 1.73, + "learning_rate": 1.433150972677253e-05, + "loss": 0.2002, + "step": 37180 + }, + { + "epoch": 1.74, + "learning_rate": 1.4330725941717744e-05, + "loss": 0.2128, + "step": 37185 + }, + { + "epoch": 1.74, + "learning_rate": 1.4329942156662956e-05, + "loss": 0.0525, + "step": 37190 + }, + { + "epoch": 1.74, + "learning_rate": 1.4329158371608172e-05, + "loss": 0.1173, + "step": 37195 + }, + { + "epoch": 1.74, + "learning_rate": 1.4328374586553384e-05, + "loss": 0.1373, + "step": 37200 + }, + { + "epoch": 1.74, + "learning_rate": 1.4327590801498598e-05, + "loss": 0.0733, + "step": 37205 + }, + { + "epoch": 1.74, + "learning_rate": 1.432680701644381e-05, + "loss": 0.118, + "step": 37210 + }, + { + "epoch": 1.74, + "learning_rate": 1.4326023231389026e-05, + "loss": 0.1538, + "step": 37215 + }, + { + "epoch": 1.74, + "learning_rate": 1.4325239446334238e-05, + "loss": 0.1563, + "step": 37220 + }, + { + "epoch": 1.74, + "learning_rate": 1.4324455661279452e-05, + "loss": 0.2196, + "step": 37225 + }, + { + "epoch": 1.74, + "learning_rate": 1.4323671876224666e-05, + "loss": 0.3397, + "step": 37230 + }, + { + "epoch": 1.74, + "learning_rate": 1.432288809116988e-05, + "loss": 0.2154, + "step": 37235 + }, + { + "epoch": 1.74, + "learning_rate": 1.4322104306115092e-05, + "loss": 0.0617, + "step": 37240 + }, + { + "epoch": 1.74, + "learning_rate": 1.4321320521060304e-05, + "loss": 0.0488, + "step": 37245 + }, + { + "epoch": 1.74, + "learning_rate": 1.432053673600552e-05, + "loss": 0.1145, + "step": 37250 + }, + { + "epoch": 1.74, + "learning_rate": 1.4319752950950732e-05, + "loss": 0.1049, + "step": 37255 + }, + { + "epoch": 1.74, + "learning_rate": 1.4318969165895946e-05, + "loss": 0.0923, + "step": 37260 + }, + { + "epoch": 1.74, + "learning_rate": 1.4318185380841158e-05, + "loss": 0.1686, + "step": 37265 + }, + { + "epoch": 1.74, + "learning_rate": 1.4317401595786374e-05, + "loss": 0.1347, + "step": 37270 + }, + { + "epoch": 1.74, + "learning_rate": 1.4316617810731586e-05, + "loss": 0.161, + "step": 37275 + }, + { + "epoch": 1.74, + "learning_rate": 1.43158340256768e-05, + "loss": 0.2883, + "step": 37280 + }, + { + "epoch": 1.74, + "learning_rate": 1.4315050240622012e-05, + "loss": 0.2093, + "step": 37285 + }, + { + "epoch": 1.74, + "learning_rate": 1.4314266455567228e-05, + "loss": 0.0615, + "step": 37290 + }, + { + "epoch": 1.74, + "learning_rate": 1.431348267051244e-05, + "loss": 0.058, + "step": 37295 + }, + { + "epoch": 1.74, + "learning_rate": 1.4312698885457654e-05, + "loss": 0.0953, + "step": 37300 + }, + { + "epoch": 1.74, + "learning_rate": 1.4311915100402866e-05, + "loss": 0.0668, + "step": 37305 + }, + { + "epoch": 1.74, + "learning_rate": 1.4311131315348078e-05, + "loss": 0.1407, + "step": 37310 + }, + { + "epoch": 1.74, + "learning_rate": 1.4310347530293294e-05, + "loss": 0.1212, + "step": 37315 + }, + { + "epoch": 1.74, + "learning_rate": 1.4309563745238506e-05, + "loss": 0.1046, + "step": 37320 + }, + { + "epoch": 1.74, + "learning_rate": 1.430877996018372e-05, + "loss": 0.1611, + "step": 37325 + }, + { + "epoch": 1.74, + "learning_rate": 1.4307996175128934e-05, + "loss": 0.3317, + "step": 37330 + }, + { + "epoch": 1.74, + "learning_rate": 1.4307212390074148e-05, + "loss": 0.2498, + "step": 37335 + }, + { + "epoch": 1.74, + "learning_rate": 1.430642860501936e-05, + "loss": 0.0457, + "step": 37340 + }, + { + "epoch": 1.74, + "learning_rate": 1.4305644819964574e-05, + "loss": 0.0306, + "step": 37345 + }, + { + "epoch": 1.74, + "learning_rate": 1.4304861034909788e-05, + "loss": 0.0676, + "step": 37350 + }, + { + "epoch": 1.74, + "learning_rate": 1.4304077249855002e-05, + "loss": 0.0872, + "step": 37355 + }, + { + "epoch": 1.74, + "learning_rate": 1.4303293464800214e-05, + "loss": 0.1829, + "step": 37360 + }, + { + "epoch": 1.74, + "learning_rate": 1.430250967974543e-05, + "loss": 0.1626, + "step": 37365 + }, + { + "epoch": 1.74, + "learning_rate": 1.4301725894690642e-05, + "loss": 0.1964, + "step": 37370 + }, + { + "epoch": 1.74, + "learning_rate": 1.4300942109635854e-05, + "loss": 0.2105, + "step": 37375 + }, + { + "epoch": 1.74, + "learning_rate": 1.4300158324581068e-05, + "loss": 0.3942, + "step": 37380 + }, + { + "epoch": 1.74, + "learning_rate": 1.429937453952628e-05, + "loss": 0.2594, + "step": 37385 + }, + { + "epoch": 1.74, + "learning_rate": 1.4298590754471496e-05, + "loss": 0.0395, + "step": 37390 + }, + { + "epoch": 1.74, + "learning_rate": 1.4297806969416708e-05, + "loss": 0.0307, + "step": 37395 + }, + { + "epoch": 1.75, + "learning_rate": 1.4297023184361922e-05, + "loss": 0.0373, + "step": 37400 + }, + { + "epoch": 1.75, + "learning_rate": 1.4296239399307134e-05, + "loss": 0.06, + "step": 37405 + }, + { + "epoch": 1.75, + "learning_rate": 1.429545561425235e-05, + "loss": 0.1288, + "step": 37410 + }, + { + "epoch": 1.75, + "learning_rate": 1.4294671829197562e-05, + "loss": 0.1277, + "step": 37415 + }, + { + "epoch": 1.75, + "learning_rate": 1.4293888044142776e-05, + "loss": 0.1357, + "step": 37420 + }, + { + "epoch": 1.75, + "learning_rate": 1.4293104259087988e-05, + "loss": 0.1732, + "step": 37425 + }, + { + "epoch": 1.75, + "learning_rate": 1.4292320474033204e-05, + "loss": 0.2856, + "step": 37430 + }, + { + "epoch": 1.75, + "learning_rate": 1.4291536688978416e-05, + "loss": 0.2645, + "step": 37435 + }, + { + "epoch": 1.75, + "learning_rate": 1.4290752903923628e-05, + "loss": 0.0783, + "step": 37440 + }, + { + "epoch": 1.75, + "learning_rate": 1.4289969118868842e-05, + "loss": 0.0332, + "step": 37445 + }, + { + "epoch": 1.75, + "learning_rate": 1.4289185333814056e-05, + "loss": 0.0371, + "step": 37450 + }, + { + "epoch": 1.75, + "learning_rate": 1.428840154875927e-05, + "loss": 0.1575, + "step": 37455 + }, + { + "epoch": 1.75, + "learning_rate": 1.4287617763704482e-05, + "loss": 0.094, + "step": 37460 + }, + { + "epoch": 1.75, + "learning_rate": 1.4286833978649698e-05, + "loss": 0.0949, + "step": 37465 + }, + { + "epoch": 1.75, + "learning_rate": 1.428605019359491e-05, + "loss": 0.1676, + "step": 37470 + }, + { + "epoch": 1.75, + "learning_rate": 1.4285266408540124e-05, + "loss": 0.1947, + "step": 37475 + }, + { + "epoch": 1.75, + "learning_rate": 1.4284482623485336e-05, + "loss": 0.1995, + "step": 37480 + }, + { + "epoch": 1.75, + "learning_rate": 1.4283698838430551e-05, + "loss": 0.2297, + "step": 37485 + }, + { + "epoch": 1.75, + "learning_rate": 1.4282915053375764e-05, + "loss": 0.0635, + "step": 37490 + }, + { + "epoch": 1.75, + "learning_rate": 1.4282131268320978e-05, + "loss": 0.049, + "step": 37495 + }, + { + "epoch": 1.75, + "learning_rate": 1.428134748326619e-05, + "loss": 0.1047, + "step": 37500 + }, + { + "epoch": 1.75, + "learning_rate": 1.4280563698211402e-05, + "loss": 0.125, + "step": 37505 + }, + { + "epoch": 1.75, + "learning_rate": 1.4279779913156618e-05, + "loss": 0.1367, + "step": 37510 + }, + { + "epoch": 1.75, + "learning_rate": 1.427899612810183e-05, + "loss": 0.16, + "step": 37515 + }, + { + "epoch": 1.75, + "learning_rate": 1.4278212343047044e-05, + "loss": 0.2038, + "step": 37520 + }, + { + "epoch": 1.75, + "learning_rate": 1.4277428557992256e-05, + "loss": 0.2235, + "step": 37525 + }, + { + "epoch": 1.75, + "learning_rate": 1.4276644772937472e-05, + "loss": 0.428, + "step": 37530 + }, + { + "epoch": 1.75, + "learning_rate": 1.4275860987882684e-05, + "loss": 0.3868, + "step": 37535 + }, + { + "epoch": 1.75, + "learning_rate": 1.4275077202827898e-05, + "loss": 0.0283, + "step": 37540 + }, + { + "epoch": 1.75, + "learning_rate": 1.4274293417773112e-05, + "loss": 0.0262, + "step": 37545 + }, + { + "epoch": 1.75, + "learning_rate": 1.4273509632718325e-05, + "loss": 0.0667, + "step": 37550 + }, + { + "epoch": 1.75, + "learning_rate": 1.4272725847663538e-05, + "loss": 0.1034, + "step": 37555 + }, + { + "epoch": 1.75, + "learning_rate": 1.4271942062608752e-05, + "loss": 0.1123, + "step": 37560 + }, + { + "epoch": 1.75, + "learning_rate": 1.4271158277553965e-05, + "loss": 0.1533, + "step": 37565 + }, + { + "epoch": 1.75, + "learning_rate": 1.4270374492499178e-05, + "loss": 0.0854, + "step": 37570 + }, + { + "epoch": 1.75, + "learning_rate": 1.4269590707444392e-05, + "loss": 0.2158, + "step": 37575 + }, + { + "epoch": 1.75, + "learning_rate": 1.4268806922389604e-05, + "loss": 0.3784, + "step": 37580 + }, + { + "epoch": 1.75, + "learning_rate": 1.426802313733482e-05, + "loss": 0.2143, + "step": 37585 + }, + { + "epoch": 1.75, + "learning_rate": 1.4267239352280032e-05, + "loss": 0.0696, + "step": 37590 + }, + { + "epoch": 1.75, + "learning_rate": 1.4266455567225246e-05, + "loss": 0.0339, + "step": 37595 + }, + { + "epoch": 1.75, + "learning_rate": 1.4265671782170458e-05, + "loss": 0.0495, + "step": 37600 + }, + { + "epoch": 1.75, + "learning_rate": 1.4264887997115673e-05, + "loss": 0.1684, + "step": 37605 + }, + { + "epoch": 1.75, + "learning_rate": 1.4264104212060886e-05, + "loss": 0.184, + "step": 37610 + }, + { + "epoch": 1.76, + "learning_rate": 1.42633204270061e-05, + "loss": 0.1755, + "step": 37615 + }, + { + "epoch": 1.76, + "learning_rate": 1.4262536641951312e-05, + "loss": 0.1916, + "step": 37620 + }, + { + "epoch": 1.76, + "learning_rate": 1.4261752856896527e-05, + "loss": 0.178, + "step": 37625 + }, + { + "epoch": 1.76, + "learning_rate": 1.426096907184174e-05, + "loss": 0.2962, + "step": 37630 + }, + { + "epoch": 1.76, + "learning_rate": 1.4260185286786952e-05, + "loss": 0.1308, + "step": 37635 + }, + { + "epoch": 1.76, + "learning_rate": 1.4259401501732166e-05, + "loss": 0.0454, + "step": 37640 + }, + { + "epoch": 1.76, + "learning_rate": 1.425861771667738e-05, + "loss": 0.0986, + "step": 37645 + }, + { + "epoch": 1.76, + "learning_rate": 1.4257833931622593e-05, + "loss": 0.0789, + "step": 37650 + }, + { + "epoch": 1.76, + "learning_rate": 1.4257050146567806e-05, + "loss": 0.1297, + "step": 37655 + }, + { + "epoch": 1.76, + "learning_rate": 1.425626636151302e-05, + "loss": 0.0659, + "step": 37660 + }, + { + "epoch": 1.76, + "learning_rate": 1.4255482576458233e-05, + "loss": 0.1486, + "step": 37665 + }, + { + "epoch": 1.76, + "learning_rate": 1.4254698791403447e-05, + "loss": 0.1687, + "step": 37670 + }, + { + "epoch": 1.76, + "learning_rate": 1.425391500634866e-05, + "loss": 0.1958, + "step": 37675 + }, + { + "epoch": 1.76, + "learning_rate": 1.4253131221293875e-05, + "loss": 0.3865, + "step": 37680 + }, + { + "epoch": 1.76, + "learning_rate": 1.4252347436239087e-05, + "loss": 0.3559, + "step": 37685 + }, + { + "epoch": 1.76, + "learning_rate": 1.4251563651184301e-05, + "loss": 0.0624, + "step": 37690 + }, + { + "epoch": 1.76, + "learning_rate": 1.4250779866129513e-05, + "loss": 0.0917, + "step": 37695 + }, + { + "epoch": 1.76, + "learning_rate": 1.4249996081074726e-05, + "loss": 0.0923, + "step": 37700 + }, + { + "epoch": 1.76, + "learning_rate": 1.4249212296019941e-05, + "loss": 0.1211, + "step": 37705 + }, + { + "epoch": 1.76, + "learning_rate": 1.4248428510965153e-05, + "loss": 0.1117, + "step": 37710 + }, + { + "epoch": 1.76, + "learning_rate": 1.4247644725910367e-05, + "loss": 0.2351, + "step": 37715 + }, + { + "epoch": 1.76, + "learning_rate": 1.424686094085558e-05, + "loss": 0.1649, + "step": 37720 + }, + { + "epoch": 1.76, + "learning_rate": 1.4246077155800795e-05, + "loss": 0.1855, + "step": 37725 + }, + { + "epoch": 1.76, + "learning_rate": 1.4245293370746007e-05, + "loss": 0.3319, + "step": 37730 + }, + { + "epoch": 1.76, + "learning_rate": 1.4244509585691221e-05, + "loss": 0.2802, + "step": 37735 + }, + { + "epoch": 1.76, + "learning_rate": 1.4243725800636434e-05, + "loss": 0.0575, + "step": 37740 + }, + { + "epoch": 1.76, + "learning_rate": 1.4242942015581649e-05, + "loss": 0.0562, + "step": 37745 + }, + { + "epoch": 1.76, + "learning_rate": 1.4242158230526861e-05, + "loss": 0.0816, + "step": 37750 + }, + { + "epoch": 1.76, + "learning_rate": 1.4241374445472075e-05, + "loss": 0.0493, + "step": 37755 + }, + { + "epoch": 1.76, + "learning_rate": 1.424059066041729e-05, + "loss": 0.1505, + "step": 37760 + }, + { + "epoch": 1.76, + "learning_rate": 1.4239806875362501e-05, + "loss": 0.1146, + "step": 37765 + }, + { + "epoch": 1.76, + "learning_rate": 1.4239023090307715e-05, + "loss": 0.2125, + "step": 37770 + }, + { + "epoch": 1.76, + "learning_rate": 1.4238239305252927e-05, + "loss": 0.2498, + "step": 37775 + }, + { + "epoch": 1.76, + "learning_rate": 1.4237455520198143e-05, + "loss": 0.3085, + "step": 37780 + }, + { + "epoch": 1.76, + "learning_rate": 1.4236671735143355e-05, + "loss": 0.4248, + "step": 37785 + }, + { + "epoch": 1.76, + "learning_rate": 1.423588795008857e-05, + "loss": 0.0457, + "step": 37790 + }, + { + "epoch": 1.76, + "learning_rate": 1.4235104165033781e-05, + "loss": 0.0464, + "step": 37795 + }, + { + "epoch": 1.76, + "learning_rate": 1.4234320379978997e-05, + "loss": 0.0329, + "step": 37800 + }, + { + "epoch": 1.76, + "learning_rate": 1.423353659492421e-05, + "loss": 0.1443, + "step": 37805 + }, + { + "epoch": 1.76, + "learning_rate": 1.4232752809869423e-05, + "loss": 0.1504, + "step": 37810 + }, + { + "epoch": 1.76, + "learning_rate": 1.4231969024814635e-05, + "loss": 0.1618, + "step": 37815 + }, + { + "epoch": 1.76, + "learning_rate": 1.4231185239759851e-05, + "loss": 0.1992, + "step": 37820 + }, + { + "epoch": 1.76, + "learning_rate": 1.4230401454705063e-05, + "loss": 0.1979, + "step": 37825 + }, + { + "epoch": 1.77, + "learning_rate": 1.4229617669650275e-05, + "loss": 0.3638, + "step": 37830 + }, + { + "epoch": 1.77, + "learning_rate": 1.422883388459549e-05, + "loss": 0.2355, + "step": 37835 + }, + { + "epoch": 1.77, + "learning_rate": 1.4228050099540701e-05, + "loss": 0.0892, + "step": 37840 + }, + { + "epoch": 1.77, + "learning_rate": 1.4227266314485917e-05, + "loss": 0.0729, + "step": 37845 + }, + { + "epoch": 1.77, + "learning_rate": 1.422648252943113e-05, + "loss": 0.1274, + "step": 37850 + }, + { + "epoch": 1.77, + "learning_rate": 1.4225698744376343e-05, + "loss": 0.0896, + "step": 37855 + }, + { + "epoch": 1.77, + "learning_rate": 1.4224914959321557e-05, + "loss": 0.0717, + "step": 37860 + }, + { + "epoch": 1.77, + "learning_rate": 1.4224131174266771e-05, + "loss": 0.1184, + "step": 37865 + }, + { + "epoch": 1.77, + "learning_rate": 1.4223347389211983e-05, + "loss": 0.1207, + "step": 37870 + }, + { + "epoch": 1.77, + "learning_rate": 1.4222563604157197e-05, + "loss": 0.2518, + "step": 37875 + }, + { + "epoch": 1.77, + "learning_rate": 1.4221779819102411e-05, + "loss": 0.3498, + "step": 37880 + }, + { + "epoch": 1.77, + "learning_rate": 1.4220996034047625e-05, + "loss": 0.3115, + "step": 37885 + }, + { + "epoch": 1.77, + "learning_rate": 1.4220212248992837e-05, + "loss": 0.0583, + "step": 37890 + }, + { + "epoch": 1.77, + "learning_rate": 1.421942846393805e-05, + "loss": 0.0557, + "step": 37895 + }, + { + "epoch": 1.77, + "learning_rate": 1.4218644678883265e-05, + "loss": 0.0753, + "step": 37900 + }, + { + "epoch": 1.77, + "learning_rate": 1.4217860893828477e-05, + "loss": 0.0799, + "step": 37905 + }, + { + "epoch": 1.77, + "learning_rate": 1.4217077108773691e-05, + "loss": 0.0979, + "step": 37910 + }, + { + "epoch": 1.77, + "learning_rate": 1.4216293323718903e-05, + "loss": 0.0891, + "step": 37915 + }, + { + "epoch": 1.77, + "learning_rate": 1.4215509538664119e-05, + "loss": 0.1499, + "step": 37920 + }, + { + "epoch": 1.77, + "learning_rate": 1.4214725753609331e-05, + "loss": 0.1411, + "step": 37925 + }, + { + "epoch": 1.77, + "learning_rate": 1.4213941968554545e-05, + "loss": 0.2998, + "step": 37930 + }, + { + "epoch": 1.77, + "learning_rate": 1.4213158183499757e-05, + "loss": 0.2268, + "step": 37935 + }, + { + "epoch": 1.77, + "learning_rate": 1.4212374398444973e-05, + "loss": 0.0357, + "step": 37940 + }, + { + "epoch": 1.77, + "learning_rate": 1.4211590613390185e-05, + "loss": 0.0426, + "step": 37945 + }, + { + "epoch": 1.77, + "learning_rate": 1.4210806828335399e-05, + "loss": 0.0794, + "step": 37950 + }, + { + "epoch": 1.77, + "learning_rate": 1.4210023043280611e-05, + "loss": 0.1651, + "step": 37955 + }, + { + "epoch": 1.77, + "learning_rate": 1.4209239258225825e-05, + "loss": 0.1167, + "step": 37960 + }, + { + "epoch": 1.77, + "learning_rate": 1.4208455473171039e-05, + "loss": 0.1324, + "step": 37965 + }, + { + "epoch": 1.77, + "learning_rate": 1.4207671688116251e-05, + "loss": 0.1587, + "step": 37970 + }, + { + "epoch": 1.77, + "learning_rate": 1.4206887903061465e-05, + "loss": 0.2, + "step": 37975 + }, + { + "epoch": 1.77, + "learning_rate": 1.4206104118006679e-05, + "loss": 0.3415, + "step": 37980 + }, + { + "epoch": 1.77, + "learning_rate": 1.4205320332951893e-05, + "loss": 0.24, + "step": 37985 + }, + { + "epoch": 1.77, + "learning_rate": 1.4204536547897105e-05, + "loss": 0.0173, + "step": 37990 + }, + { + "epoch": 1.77, + "learning_rate": 1.420375276284232e-05, + "loss": 0.0568, + "step": 37995 + }, + { + "epoch": 1.77, + "learning_rate": 1.4202968977787533e-05, + "loss": 0.0666, + "step": 38000 + }, + { + "epoch": 1.77, + "learning_rate": 1.4202185192732747e-05, + "loss": 0.1213, + "step": 38005 + }, + { + "epoch": 1.77, + "learning_rate": 1.4201401407677959e-05, + "loss": 0.1022, + "step": 38010 + }, + { + "epoch": 1.77, + "learning_rate": 1.4200617622623175e-05, + "loss": 0.1468, + "step": 38015 + }, + { + "epoch": 1.77, + "learning_rate": 1.4199833837568387e-05, + "loss": 0.2344, + "step": 38020 + }, + { + "epoch": 1.77, + "learning_rate": 1.4199050052513599e-05, + "loss": 0.2398, + "step": 38025 + }, + { + "epoch": 1.77, + "learning_rate": 1.4198266267458813e-05, + "loss": 0.3248, + "step": 38030 + }, + { + "epoch": 1.77, + "learning_rate": 1.4197482482404025e-05, + "loss": 0.2467, + "step": 38035 + }, + { + "epoch": 1.77, + "learning_rate": 1.419669869734924e-05, + "loss": 0.0681, + "step": 38040 + }, + { + "epoch": 1.78, + "learning_rate": 1.4195914912294453e-05, + "loss": 0.0525, + "step": 38045 + }, + { + "epoch": 1.78, + "learning_rate": 1.4195131127239667e-05, + "loss": 0.0501, + "step": 38050 + }, + { + "epoch": 1.78, + "learning_rate": 1.4194347342184879e-05, + "loss": 0.0514, + "step": 38055 + }, + { + "epoch": 1.78, + "learning_rate": 1.4193563557130095e-05, + "loss": 0.1218, + "step": 38060 + }, + { + "epoch": 1.78, + "learning_rate": 1.4192779772075307e-05, + "loss": 0.1249, + "step": 38065 + }, + { + "epoch": 1.78, + "learning_rate": 1.419199598702052e-05, + "loss": 0.1505, + "step": 38070 + }, + { + "epoch": 1.78, + "learning_rate": 1.4191212201965735e-05, + "loss": 0.2301, + "step": 38075 + }, + { + "epoch": 1.78, + "learning_rate": 1.4190428416910949e-05, + "loss": 0.3129, + "step": 38080 + }, + { + "epoch": 1.78, + "learning_rate": 1.4189644631856161e-05, + "loss": 0.2717, + "step": 38085 + }, + { + "epoch": 1.78, + "learning_rate": 1.4188860846801373e-05, + "loss": 0.0331, + "step": 38090 + }, + { + "epoch": 1.78, + "learning_rate": 1.4188077061746589e-05, + "loss": 0.0624, + "step": 38095 + }, + { + "epoch": 1.78, + "learning_rate": 1.4187293276691801e-05, + "loss": 0.0583, + "step": 38100 + }, + { + "epoch": 1.78, + "learning_rate": 1.4186509491637015e-05, + "loss": 0.0417, + "step": 38105 + }, + { + "epoch": 1.78, + "learning_rate": 1.4185725706582227e-05, + "loss": 0.1927, + "step": 38110 + }, + { + "epoch": 1.78, + "learning_rate": 1.4184941921527443e-05, + "loss": 0.0895, + "step": 38115 + }, + { + "epoch": 1.78, + "learning_rate": 1.4184158136472655e-05, + "loss": 0.1494, + "step": 38120 + }, + { + "epoch": 1.78, + "learning_rate": 1.4183374351417869e-05, + "loss": 0.2075, + "step": 38125 + }, + { + "epoch": 1.78, + "learning_rate": 1.4182590566363081e-05, + "loss": 0.2841, + "step": 38130 + }, + { + "epoch": 1.78, + "learning_rate": 1.4181806781308297e-05, + "loss": 0.2523, + "step": 38135 + }, + { + "epoch": 1.78, + "learning_rate": 1.4181022996253509e-05, + "loss": 0.051, + "step": 38140 + }, + { + "epoch": 1.78, + "learning_rate": 1.4180239211198723e-05, + "loss": 0.0424, + "step": 38145 + }, + { + "epoch": 1.78, + "learning_rate": 1.4179455426143935e-05, + "loss": 0.0772, + "step": 38150 + }, + { + "epoch": 1.78, + "learning_rate": 1.4178671641089147e-05, + "loss": 0.0753, + "step": 38155 + }, + { + "epoch": 1.78, + "learning_rate": 1.4177887856034363e-05, + "loss": 0.1035, + "step": 38160 + }, + { + "epoch": 1.78, + "learning_rate": 1.4177104070979575e-05, + "loss": 0.226, + "step": 38165 + }, + { + "epoch": 1.78, + "learning_rate": 1.4176320285924789e-05, + "loss": 0.2246, + "step": 38170 + }, + { + "epoch": 1.78, + "learning_rate": 1.4175536500870003e-05, + "loss": 0.1679, + "step": 38175 + }, + { + "epoch": 1.78, + "learning_rate": 1.4174752715815217e-05, + "loss": 0.3673, + "step": 38180 + }, + { + "epoch": 1.78, + "learning_rate": 1.4173968930760429e-05, + "loss": 0.2447, + "step": 38185 + }, + { + "epoch": 1.78, + "learning_rate": 1.4173185145705643e-05, + "loss": 0.1178, + "step": 38190 + }, + { + "epoch": 1.78, + "learning_rate": 1.4172401360650857e-05, + "loss": 0.0845, + "step": 38195 + }, + { + "epoch": 1.78, + "learning_rate": 1.417161757559607e-05, + "loss": 0.0427, + "step": 38200 + }, + { + "epoch": 1.78, + "learning_rate": 1.4170833790541283e-05, + "loss": 0.1022, + "step": 38205 + }, + { + "epoch": 1.78, + "learning_rate": 1.4170050005486498e-05, + "loss": 0.0848, + "step": 38210 + }, + { + "epoch": 1.78, + "learning_rate": 1.416926622043171e-05, + "loss": 0.1151, + "step": 38215 + }, + { + "epoch": 1.78, + "learning_rate": 1.4168482435376923e-05, + "loss": 0.1462, + "step": 38220 + }, + { + "epoch": 1.78, + "learning_rate": 1.4167698650322137e-05, + "loss": 0.2352, + "step": 38225 + }, + { + "epoch": 1.78, + "learning_rate": 1.4166914865267349e-05, + "loss": 0.4185, + "step": 38230 + }, + { + "epoch": 1.78, + "learning_rate": 1.4166131080212564e-05, + "loss": 0.2938, + "step": 38235 + }, + { + "epoch": 1.78, + "learning_rate": 1.4165347295157777e-05, + "loss": 0.0908, + "step": 38240 + }, + { + "epoch": 1.78, + "learning_rate": 1.416456351010299e-05, + "loss": 0.0728, + "step": 38245 + }, + { + "epoch": 1.78, + "learning_rate": 1.4163779725048203e-05, + "loss": 0.0581, + "step": 38250 + }, + { + "epoch": 1.79, + "learning_rate": 1.4162995939993418e-05, + "loss": 0.128, + "step": 38255 + }, + { + "epoch": 1.79, + "learning_rate": 1.416221215493863e-05, + "loss": 0.1307, + "step": 38260 + }, + { + "epoch": 1.79, + "learning_rate": 1.4161428369883845e-05, + "loss": 0.1847, + "step": 38265 + }, + { + "epoch": 1.79, + "learning_rate": 1.4160644584829057e-05, + "loss": 0.1701, + "step": 38270 + }, + { + "epoch": 1.79, + "learning_rate": 1.4159860799774272e-05, + "loss": 0.2065, + "step": 38275 + }, + { + "epoch": 1.79, + "learning_rate": 1.4159077014719485e-05, + "loss": 0.4186, + "step": 38280 + }, + { + "epoch": 1.79, + "learning_rate": 1.4158293229664697e-05, + "loss": 0.2782, + "step": 38285 + }, + { + "epoch": 1.79, + "learning_rate": 1.415750944460991e-05, + "loss": 0.0348, + "step": 38290 + }, + { + "epoch": 1.79, + "learning_rate": 1.4156725659555125e-05, + "loss": 0.023, + "step": 38295 + }, + { + "epoch": 1.79, + "learning_rate": 1.4155941874500338e-05, + "loss": 0.0195, + "step": 38300 + }, + { + "epoch": 1.79, + "learning_rate": 1.415515808944555e-05, + "loss": 0.1097, + "step": 38305 + }, + { + "epoch": 1.79, + "learning_rate": 1.4154374304390766e-05, + "loss": 0.1873, + "step": 38310 + }, + { + "epoch": 1.79, + "learning_rate": 1.4153590519335978e-05, + "loss": 0.1415, + "step": 38315 + }, + { + "epoch": 1.79, + "learning_rate": 1.4152806734281192e-05, + "loss": 0.1651, + "step": 38320 + }, + { + "epoch": 1.79, + "learning_rate": 1.4152022949226405e-05, + "loss": 0.2963, + "step": 38325 + }, + { + "epoch": 1.79, + "learning_rate": 1.415123916417162e-05, + "loss": 0.2719, + "step": 38330 + }, + { + "epoch": 1.79, + "learning_rate": 1.4150455379116832e-05, + "loss": 0.2542, + "step": 38335 + }, + { + "epoch": 1.79, + "learning_rate": 1.4149671594062046e-05, + "loss": 0.0304, + "step": 38340 + }, + { + "epoch": 1.79, + "learning_rate": 1.4148887809007259e-05, + "loss": 0.1187, + "step": 38345 + }, + { + "epoch": 1.79, + "learning_rate": 1.414810402395247e-05, + "loss": 0.1246, + "step": 38350 + }, + { + "epoch": 1.79, + "learning_rate": 1.4147320238897686e-05, + "loss": 0.0625, + "step": 38355 + }, + { + "epoch": 1.79, + "learning_rate": 1.4146536453842899e-05, + "loss": 0.182, + "step": 38360 + }, + { + "epoch": 1.79, + "learning_rate": 1.4145752668788112e-05, + "loss": 0.0816, + "step": 38365 + }, + { + "epoch": 1.79, + "learning_rate": 1.4144968883733325e-05, + "loss": 0.1735, + "step": 38370 + }, + { + "epoch": 1.79, + "learning_rate": 1.414418509867854e-05, + "loss": 0.1373, + "step": 38375 + }, + { + "epoch": 1.79, + "learning_rate": 1.4143401313623752e-05, + "loss": 0.4016, + "step": 38380 + }, + { + "epoch": 1.79, + "learning_rate": 1.4142617528568966e-05, + "loss": 0.2637, + "step": 38385 + }, + { + "epoch": 1.79, + "learning_rate": 1.414183374351418e-05, + "loss": 0.0964, + "step": 38390 + }, + { + "epoch": 1.79, + "learning_rate": 1.4141049958459394e-05, + "loss": 0.0635, + "step": 38395 + }, + { + "epoch": 1.79, + "learning_rate": 1.4140266173404606e-05, + "loss": 0.0482, + "step": 38400 + }, + { + "epoch": 1.79, + "learning_rate": 1.413948238834982e-05, + "loss": 0.0768, + "step": 38405 + }, + { + "epoch": 1.79, + "learning_rate": 1.4138698603295034e-05, + "loss": 0.0976, + "step": 38410 + }, + { + "epoch": 1.79, + "learning_rate": 1.4137914818240246e-05, + "loss": 0.2089, + "step": 38415 + }, + { + "epoch": 1.79, + "learning_rate": 1.413713103318546e-05, + "loss": 0.2171, + "step": 38420 + }, + { + "epoch": 1.79, + "learning_rate": 1.4136347248130673e-05, + "loss": 0.2064, + "step": 38425 + }, + { + "epoch": 1.79, + "learning_rate": 1.4135563463075888e-05, + "loss": 0.4382, + "step": 38430 + }, + { + "epoch": 1.79, + "learning_rate": 1.41347796780211e-05, + "loss": 0.2063, + "step": 38435 + }, + { + "epoch": 1.79, + "learning_rate": 1.4133995892966314e-05, + "loss": 0.0299, + "step": 38440 + }, + { + "epoch": 1.79, + "learning_rate": 1.4133212107911526e-05, + "loss": 0.08, + "step": 38445 + }, + { + "epoch": 1.79, + "learning_rate": 1.4132428322856742e-05, + "loss": 0.1102, + "step": 38450 + }, + { + "epoch": 1.79, + "learning_rate": 1.4131644537801954e-05, + "loss": 0.0615, + "step": 38455 + }, + { + "epoch": 1.79, + "learning_rate": 1.4130860752747168e-05, + "loss": 0.0704, + "step": 38460 + }, + { + "epoch": 1.79, + "learning_rate": 1.413007696769238e-05, + "loss": 0.0914, + "step": 38465 + }, + { + "epoch": 1.8, + "learning_rate": 1.4129293182637596e-05, + "loss": 0.1401, + "step": 38470 + }, + { + "epoch": 1.8, + "learning_rate": 1.4128509397582808e-05, + "loss": 0.2028, + "step": 38475 + }, + { + "epoch": 1.8, + "learning_rate": 1.412772561252802e-05, + "loss": 0.2902, + "step": 38480 + }, + { + "epoch": 1.8, + "learning_rate": 1.4126941827473234e-05, + "loss": 0.2409, + "step": 38485 + }, + { + "epoch": 1.8, + "learning_rate": 1.4126158042418448e-05, + "loss": 0.0215, + "step": 38490 + }, + { + "epoch": 1.8, + "learning_rate": 1.4125374257363662e-05, + "loss": 0.0641, + "step": 38495 + }, + { + "epoch": 1.8, + "learning_rate": 1.4124590472308874e-05, + "loss": 0.1115, + "step": 38500 + }, + { + "epoch": 1.8, + "learning_rate": 1.4123806687254088e-05, + "loss": 0.099, + "step": 38505 + }, + { + "epoch": 1.8, + "learning_rate": 1.4123022902199302e-05, + "loss": 0.0935, + "step": 38510 + }, + { + "epoch": 1.8, + "learning_rate": 1.4122239117144516e-05, + "loss": 0.1694, + "step": 38515 + }, + { + "epoch": 1.8, + "learning_rate": 1.4121455332089728e-05, + "loss": 0.234, + "step": 38520 + }, + { + "epoch": 1.8, + "learning_rate": 1.4120671547034944e-05, + "loss": 0.237, + "step": 38525 + }, + { + "epoch": 1.8, + "learning_rate": 1.4119887761980156e-05, + "loss": 0.3852, + "step": 38530 + }, + { + "epoch": 1.8, + "learning_rate": 1.411910397692537e-05, + "loss": 0.2284, + "step": 38535 + }, + { + "epoch": 1.8, + "learning_rate": 1.4118320191870582e-05, + "loss": 0.0491, + "step": 38540 + }, + { + "epoch": 1.8, + "learning_rate": 1.4117536406815794e-05, + "loss": 0.0762, + "step": 38545 + }, + { + "epoch": 1.8, + "learning_rate": 1.411675262176101e-05, + "loss": 0.1074, + "step": 38550 + }, + { + "epoch": 1.8, + "learning_rate": 1.4115968836706222e-05, + "loss": 0.1303, + "step": 38555 + }, + { + "epoch": 1.8, + "learning_rate": 1.4115185051651436e-05, + "loss": 0.1395, + "step": 38560 + }, + { + "epoch": 1.8, + "learning_rate": 1.4114401266596648e-05, + "loss": 0.156, + "step": 38565 + }, + { + "epoch": 1.8, + "learning_rate": 1.4113617481541864e-05, + "loss": 0.1675, + "step": 38570 + }, + { + "epoch": 1.8, + "learning_rate": 1.4112833696487076e-05, + "loss": 0.1753, + "step": 38575 + }, + { + "epoch": 1.8, + "learning_rate": 1.411204991143229e-05, + "loss": 0.3437, + "step": 38580 + }, + { + "epoch": 1.8, + "learning_rate": 1.4111266126377502e-05, + "loss": 0.3223, + "step": 38585 + }, + { + "epoch": 1.8, + "learning_rate": 1.4110482341322718e-05, + "loss": 0.0702, + "step": 38590 + }, + { + "epoch": 1.8, + "learning_rate": 1.410969855626793e-05, + "loss": 0.0645, + "step": 38595 + }, + { + "epoch": 1.8, + "learning_rate": 1.4108914771213144e-05, + "loss": 0.1382, + "step": 38600 + }, + { + "epoch": 1.8, + "learning_rate": 1.4108130986158356e-05, + "loss": 0.1124, + "step": 38605 + }, + { + "epoch": 1.8, + "learning_rate": 1.410734720110357e-05, + "loss": 0.0886, + "step": 38610 + }, + { + "epoch": 1.8, + "learning_rate": 1.4106563416048784e-05, + "loss": 0.1948, + "step": 38615 + }, + { + "epoch": 1.8, + "learning_rate": 1.4105779630993996e-05, + "loss": 0.2204, + "step": 38620 + }, + { + "epoch": 1.8, + "learning_rate": 1.4104995845939212e-05, + "loss": 0.1517, + "step": 38625 + }, + { + "epoch": 1.8, + "learning_rate": 1.4104212060884424e-05, + "loss": 0.3109, + "step": 38630 + }, + { + "epoch": 1.8, + "learning_rate": 1.4103428275829638e-05, + "loss": 0.1802, + "step": 38635 + }, + { + "epoch": 1.8, + "learning_rate": 1.410264449077485e-05, + "loss": 0.028, + "step": 38640 + }, + { + "epoch": 1.8, + "learning_rate": 1.4101860705720066e-05, + "loss": 0.1063, + "step": 38645 + }, + { + "epoch": 1.8, + "learning_rate": 1.4101076920665278e-05, + "loss": 0.0521, + "step": 38650 + }, + { + "epoch": 1.8, + "learning_rate": 1.4100293135610492e-05, + "loss": 0.091, + "step": 38655 + }, + { + "epoch": 1.8, + "learning_rate": 1.4099509350555704e-05, + "loss": 0.0967, + "step": 38660 + }, + { + "epoch": 1.8, + "learning_rate": 1.409872556550092e-05, + "loss": 0.117, + "step": 38665 + }, + { + "epoch": 1.8, + "learning_rate": 1.4097941780446132e-05, + "loss": 0.1346, + "step": 38670 + }, + { + "epoch": 1.8, + "learning_rate": 1.4097157995391344e-05, + "loss": 0.1494, + "step": 38675 + }, + { + "epoch": 1.8, + "learning_rate": 1.4096374210336558e-05, + "loss": 0.2843, + "step": 38680 + }, + { + "epoch": 1.81, + "learning_rate": 1.409559042528177e-05, + "loss": 0.2728, + "step": 38685 + }, + { + "epoch": 1.81, + "learning_rate": 1.4094806640226986e-05, + "loss": 0.0535, + "step": 38690 + }, + { + "epoch": 1.81, + "learning_rate": 1.4094022855172198e-05, + "loss": 0.0669, + "step": 38695 + }, + { + "epoch": 1.81, + "learning_rate": 1.4093239070117412e-05, + "loss": 0.1214, + "step": 38700 + }, + { + "epoch": 1.81, + "learning_rate": 1.4092455285062626e-05, + "loss": 0.0523, + "step": 38705 + }, + { + "epoch": 1.81, + "learning_rate": 1.409167150000784e-05, + "loss": 0.1108, + "step": 38710 + }, + { + "epoch": 1.81, + "learning_rate": 1.4090887714953052e-05, + "loss": 0.1358, + "step": 38715 + }, + { + "epoch": 1.81, + "learning_rate": 1.4090103929898266e-05, + "loss": 0.1182, + "step": 38720 + }, + { + "epoch": 1.81, + "learning_rate": 1.408932014484348e-05, + "loss": 0.1505, + "step": 38725 + }, + { + "epoch": 1.81, + "learning_rate": 1.4088536359788694e-05, + "loss": 0.3261, + "step": 38730 + }, + { + "epoch": 1.81, + "learning_rate": 1.4087752574733906e-05, + "loss": 0.3337, + "step": 38735 + }, + { + "epoch": 1.81, + "learning_rate": 1.4086968789679118e-05, + "loss": 0.0553, + "step": 38740 + }, + { + "epoch": 1.81, + "learning_rate": 1.4086185004624334e-05, + "loss": 0.0685, + "step": 38745 + }, + { + "epoch": 1.81, + "learning_rate": 1.4085401219569546e-05, + "loss": 0.0481, + "step": 38750 + }, + { + "epoch": 1.81, + "learning_rate": 1.408461743451476e-05, + "loss": 0.095, + "step": 38755 + }, + { + "epoch": 1.81, + "learning_rate": 1.4083833649459972e-05, + "loss": 0.1435, + "step": 38760 + }, + { + "epoch": 1.81, + "learning_rate": 1.4083049864405188e-05, + "loss": 0.0965, + "step": 38765 + }, + { + "epoch": 1.81, + "learning_rate": 1.40822660793504e-05, + "loss": 0.1779, + "step": 38770 + }, + { + "epoch": 1.81, + "learning_rate": 1.4081482294295614e-05, + "loss": 0.2204, + "step": 38775 + }, + { + "epoch": 1.81, + "learning_rate": 1.4080698509240826e-05, + "loss": 0.3111, + "step": 38780 + }, + { + "epoch": 1.81, + "learning_rate": 1.4079914724186042e-05, + "loss": 0.2863, + "step": 38785 + }, + { + "epoch": 1.81, + "learning_rate": 1.4079130939131254e-05, + "loss": 0.0378, + "step": 38790 + }, + { + "epoch": 1.81, + "learning_rate": 1.4078347154076468e-05, + "loss": 0.1025, + "step": 38795 + }, + { + "epoch": 1.81, + "learning_rate": 1.407756336902168e-05, + "loss": 0.0496, + "step": 38800 + }, + { + "epoch": 1.81, + "learning_rate": 1.4076779583966894e-05, + "loss": 0.0819, + "step": 38805 + }, + { + "epoch": 1.81, + "learning_rate": 1.4075995798912108e-05, + "loss": 0.071, + "step": 38810 + }, + { + "epoch": 1.81, + "learning_rate": 1.407521201385732e-05, + "loss": 0.1571, + "step": 38815 + }, + { + "epoch": 1.81, + "learning_rate": 1.4074428228802534e-05, + "loss": 0.262, + "step": 38820 + }, + { + "epoch": 1.81, + "learning_rate": 1.4073644443747748e-05, + "loss": 0.2983, + "step": 38825 + }, + { + "epoch": 1.81, + "learning_rate": 1.4072860658692962e-05, + "loss": 0.228, + "step": 38830 + }, + { + "epoch": 1.81, + "learning_rate": 1.4072076873638174e-05, + "loss": 0.1873, + "step": 38835 + }, + { + "epoch": 1.81, + "learning_rate": 1.407129308858339e-05, + "loss": 0.018, + "step": 38840 + }, + { + "epoch": 1.81, + "learning_rate": 1.4070509303528602e-05, + "loss": 0.0565, + "step": 38845 + }, + { + "epoch": 1.81, + "learning_rate": 1.4069725518473816e-05, + "loss": 0.1004, + "step": 38850 + }, + { + "epoch": 1.81, + "learning_rate": 1.4068941733419028e-05, + "loss": 0.0656, + "step": 38855 + }, + { + "epoch": 1.81, + "learning_rate": 1.4068157948364243e-05, + "loss": 0.2027, + "step": 38860 + }, + { + "epoch": 1.81, + "learning_rate": 1.4067374163309456e-05, + "loss": 0.2106, + "step": 38865 + }, + { + "epoch": 1.81, + "learning_rate": 1.4066590378254668e-05, + "loss": 0.1221, + "step": 38870 + }, + { + "epoch": 1.81, + "learning_rate": 1.4065806593199882e-05, + "loss": 0.2289, + "step": 38875 + }, + { + "epoch": 1.81, + "learning_rate": 1.4065022808145094e-05, + "loss": 0.311, + "step": 38880 + }, + { + "epoch": 1.81, + "learning_rate": 1.406423902309031e-05, + "loss": 0.3, + "step": 38885 + }, + { + "epoch": 1.81, + "learning_rate": 1.4063455238035522e-05, + "loss": 0.0561, + "step": 38890 + }, + { + "epoch": 1.81, + "learning_rate": 1.4062671452980736e-05, + "loss": 0.0676, + "step": 38895 + }, + { + "epoch": 1.82, + "learning_rate": 1.4061887667925948e-05, + "loss": 0.1541, + "step": 38900 + }, + { + "epoch": 1.82, + "learning_rate": 1.4061103882871163e-05, + "loss": 0.0841, + "step": 38905 + }, + { + "epoch": 1.82, + "learning_rate": 1.4060320097816376e-05, + "loss": 0.1486, + "step": 38910 + }, + { + "epoch": 1.82, + "learning_rate": 1.405953631276159e-05, + "loss": 0.1742, + "step": 38915 + }, + { + "epoch": 1.82, + "learning_rate": 1.4058752527706803e-05, + "loss": 0.153, + "step": 38920 + }, + { + "epoch": 1.82, + "learning_rate": 1.4057968742652017e-05, + "loss": 0.201, + "step": 38925 + }, + { + "epoch": 1.82, + "learning_rate": 1.405718495759723e-05, + "loss": 0.3103, + "step": 38930 + }, + { + "epoch": 1.82, + "learning_rate": 1.4056401172542442e-05, + "loss": 0.1888, + "step": 38935 + }, + { + "epoch": 1.82, + "learning_rate": 1.4055617387487657e-05, + "loss": 0.035, + "step": 38940 + }, + { + "epoch": 1.82, + "learning_rate": 1.405483360243287e-05, + "loss": 0.02, + "step": 38945 + }, + { + "epoch": 1.82, + "learning_rate": 1.4054049817378084e-05, + "loss": 0.0971, + "step": 38950 + }, + { + "epoch": 1.82, + "learning_rate": 1.4053266032323296e-05, + "loss": 0.1084, + "step": 38955 + }, + { + "epoch": 1.82, + "learning_rate": 1.4052482247268511e-05, + "loss": 0.0948, + "step": 38960 + }, + { + "epoch": 1.82, + "learning_rate": 1.4051698462213724e-05, + "loss": 0.1653, + "step": 38965 + }, + { + "epoch": 1.82, + "learning_rate": 1.4050914677158937e-05, + "loss": 0.1645, + "step": 38970 + }, + { + "epoch": 1.82, + "learning_rate": 1.405013089210415e-05, + "loss": 0.1154, + "step": 38975 + }, + { + "epoch": 1.82, + "learning_rate": 1.4049347107049365e-05, + "loss": 0.4433, + "step": 38980 + }, + { + "epoch": 1.82, + "learning_rate": 1.4048563321994577e-05, + "loss": 0.2112, + "step": 38985 + }, + { + "epoch": 1.82, + "learning_rate": 1.4047779536939791e-05, + "loss": 0.0622, + "step": 38990 + }, + { + "epoch": 1.82, + "learning_rate": 1.4046995751885004e-05, + "loss": 0.0382, + "step": 38995 + }, + { + "epoch": 1.82, + "learning_rate": 1.4046211966830216e-05, + "loss": 0.1102, + "step": 39000 + }, + { + "epoch": 1.82, + "learning_rate": 1.4045428181775431e-05, + "loss": 0.1516, + "step": 39005 + }, + { + "epoch": 1.82, + "learning_rate": 1.4044644396720644e-05, + "loss": 0.1424, + "step": 39010 + }, + { + "epoch": 1.82, + "learning_rate": 1.4043860611665858e-05, + "loss": 0.1871, + "step": 39015 + }, + { + "epoch": 1.82, + "learning_rate": 1.4043076826611071e-05, + "loss": 0.2085, + "step": 39020 + }, + { + "epoch": 1.82, + "learning_rate": 1.4042293041556285e-05, + "loss": 0.2028, + "step": 39025 + }, + { + "epoch": 1.82, + "learning_rate": 1.4041509256501498e-05, + "loss": 0.3885, + "step": 39030 + }, + { + "epoch": 1.82, + "learning_rate": 1.4040725471446711e-05, + "loss": 0.3049, + "step": 39035 + }, + { + "epoch": 1.82, + "learning_rate": 1.4039941686391925e-05, + "loss": 0.0252, + "step": 39040 + }, + { + "epoch": 1.82, + "learning_rate": 1.403915790133714e-05, + "loss": 0.0376, + "step": 39045 + }, + { + "epoch": 1.82, + "learning_rate": 1.4038374116282351e-05, + "loss": 0.029, + "step": 39050 + }, + { + "epoch": 1.82, + "learning_rate": 1.4037590331227567e-05, + "loss": 0.1269, + "step": 39055 + }, + { + "epoch": 1.82, + "learning_rate": 1.403680654617278e-05, + "loss": 0.1043, + "step": 39060 + }, + { + "epoch": 1.82, + "learning_rate": 1.4036022761117992e-05, + "loss": 0.1261, + "step": 39065 + }, + { + "epoch": 1.82, + "learning_rate": 1.4035238976063205e-05, + "loss": 0.2339, + "step": 39070 + }, + { + "epoch": 1.82, + "learning_rate": 1.4034455191008418e-05, + "loss": 0.1286, + "step": 39075 + }, + { + "epoch": 1.82, + "learning_rate": 1.4033671405953633e-05, + "loss": 0.2755, + "step": 39080 + }, + { + "epoch": 1.82, + "learning_rate": 1.4032887620898845e-05, + "loss": 0.3312, + "step": 39085 + }, + { + "epoch": 1.82, + "learning_rate": 1.403210383584406e-05, + "loss": 0.0087, + "step": 39090 + }, + { + "epoch": 1.82, + "learning_rate": 1.4031320050789272e-05, + "loss": 0.0794, + "step": 39095 + }, + { + "epoch": 1.82, + "learning_rate": 1.4030536265734487e-05, + "loss": 0.0557, + "step": 39100 + }, + { + "epoch": 1.82, + "learning_rate": 1.40297524806797e-05, + "loss": 0.136, + "step": 39105 + }, + { + "epoch": 1.82, + "learning_rate": 1.4028968695624913e-05, + "loss": 0.1002, + "step": 39110 + }, + { + "epoch": 1.83, + "learning_rate": 1.4028184910570125e-05, + "loss": 0.1967, + "step": 39115 + }, + { + "epoch": 1.83, + "learning_rate": 1.4027401125515341e-05, + "loss": 0.1752, + "step": 39120 + }, + { + "epoch": 1.83, + "learning_rate": 1.4026617340460553e-05, + "loss": 0.1226, + "step": 39125 + }, + { + "epoch": 1.83, + "learning_rate": 1.4025833555405766e-05, + "loss": 0.2543, + "step": 39130 + }, + { + "epoch": 1.83, + "learning_rate": 1.402504977035098e-05, + "loss": 0.3523, + "step": 39135 + }, + { + "epoch": 1.83, + "learning_rate": 1.4024265985296193e-05, + "loss": 0.086, + "step": 39140 + }, + { + "epoch": 1.83, + "learning_rate": 1.4023482200241407e-05, + "loss": 0.1465, + "step": 39145 + }, + { + "epoch": 1.83, + "learning_rate": 1.402269841518662e-05, + "loss": 0.1037, + "step": 39150 + }, + { + "epoch": 1.83, + "learning_rate": 1.4021914630131835e-05, + "loss": 0.0747, + "step": 39155 + }, + { + "epoch": 1.83, + "learning_rate": 1.4021130845077047e-05, + "loss": 0.1403, + "step": 39160 + }, + { + "epoch": 1.83, + "learning_rate": 1.4020347060022261e-05, + "loss": 0.1251, + "step": 39165 + }, + { + "epoch": 1.83, + "learning_rate": 1.4019563274967473e-05, + "loss": 0.1805, + "step": 39170 + }, + { + "epoch": 1.83, + "learning_rate": 1.4018779489912689e-05, + "loss": 0.1899, + "step": 39175 + }, + { + "epoch": 1.83, + "learning_rate": 1.4017995704857901e-05, + "loss": 0.3973, + "step": 39180 + }, + { + "epoch": 1.83, + "learning_rate": 1.4017368676814072e-05, + "loss": 0.2354, + "step": 39185 + }, + { + "epoch": 1.83, + "learning_rate": 1.4016584891759286e-05, + "loss": 0.0266, + "step": 39190 + }, + { + "epoch": 1.83, + "learning_rate": 1.4015801106704498e-05, + "loss": 0.0359, + "step": 39195 + }, + { + "epoch": 1.83, + "learning_rate": 1.4015017321649712e-05, + "loss": 0.0703, + "step": 39200 + }, + { + "epoch": 1.83, + "learning_rate": 1.4014233536594926e-05, + "loss": 0.1166, + "step": 39205 + }, + { + "epoch": 1.83, + "learning_rate": 1.4013449751540138e-05, + "loss": 0.1409, + "step": 39210 + }, + { + "epoch": 1.83, + "learning_rate": 1.4012665966485353e-05, + "loss": 0.129, + "step": 39215 + }, + { + "epoch": 1.83, + "learning_rate": 1.4011882181430566e-05, + "loss": 0.1237, + "step": 39220 + }, + { + "epoch": 1.83, + "learning_rate": 1.401109839637578e-05, + "loss": 0.1638, + "step": 39225 + }, + { + "epoch": 1.83, + "learning_rate": 1.4010314611320992e-05, + "loss": 0.2936, + "step": 39230 + }, + { + "epoch": 1.83, + "learning_rate": 1.4009530826266207e-05, + "loss": 0.2029, + "step": 39235 + }, + { + "epoch": 1.83, + "learning_rate": 1.400874704121142e-05, + "loss": 0.0323, + "step": 39240 + }, + { + "epoch": 1.83, + "learning_rate": 1.4007963256156633e-05, + "loss": 0.1204, + "step": 39245 + }, + { + "epoch": 1.83, + "learning_rate": 1.4007179471101846e-05, + "loss": 0.0603, + "step": 39250 + }, + { + "epoch": 1.83, + "learning_rate": 1.4006395686047061e-05, + "loss": 0.1161, + "step": 39255 + }, + { + "epoch": 1.83, + "learning_rate": 1.4005611900992273e-05, + "loss": 0.1051, + "step": 39260 + }, + { + "epoch": 1.83, + "learning_rate": 1.4004828115937486e-05, + "loss": 0.1097, + "step": 39265 + }, + { + "epoch": 1.83, + "learning_rate": 1.40040443308827e-05, + "loss": 0.1569, + "step": 39270 + }, + { + "epoch": 1.83, + "learning_rate": 1.4003260545827912e-05, + "loss": 0.2331, + "step": 39275 + }, + { + "epoch": 1.83, + "learning_rate": 1.4002476760773127e-05, + "loss": 0.4034, + "step": 39280 + }, + { + "epoch": 1.83, + "learning_rate": 1.400169297571834e-05, + "loss": 0.2077, + "step": 39285 + }, + { + "epoch": 1.83, + "learning_rate": 1.4000909190663553e-05, + "loss": 0.0452, + "step": 39290 + }, + { + "epoch": 1.83, + "learning_rate": 1.4000125405608766e-05, + "loss": 0.0357, + "step": 39295 + }, + { + "epoch": 1.83, + "learning_rate": 1.3999341620553981e-05, + "loss": 0.0787, + "step": 39300 + }, + { + "epoch": 1.83, + "learning_rate": 1.3998557835499194e-05, + "loss": 0.1583, + "step": 39305 + }, + { + "epoch": 1.83, + "learning_rate": 1.3997774050444407e-05, + "loss": 0.1058, + "step": 39310 + }, + { + "epoch": 1.83, + "learning_rate": 1.3996990265389621e-05, + "loss": 0.0733, + "step": 39315 + }, + { + "epoch": 1.83, + "learning_rate": 1.3996206480334835e-05, + "loss": 0.1355, + "step": 39320 + }, + { + "epoch": 1.83, + "learning_rate": 1.3995422695280047e-05, + "loss": 0.1527, + "step": 39325 + }, + { + "epoch": 1.84, + "learning_rate": 1.399463891022526e-05, + "loss": 0.2326, + "step": 39330 + }, + { + "epoch": 1.84, + "learning_rate": 1.3993855125170475e-05, + "loss": 0.2507, + "step": 39335 + }, + { + "epoch": 1.84, + "learning_rate": 1.3993071340115687e-05, + "loss": 0.0268, + "step": 39340 + }, + { + "epoch": 1.84, + "learning_rate": 1.3992287555060901e-05, + "loss": 0.0515, + "step": 39345 + }, + { + "epoch": 1.84, + "learning_rate": 1.3991503770006114e-05, + "loss": 0.0863, + "step": 39350 + }, + { + "epoch": 1.84, + "learning_rate": 1.399071998495133e-05, + "loss": 0.1121, + "step": 39355 + }, + { + "epoch": 1.84, + "learning_rate": 1.3989936199896541e-05, + "loss": 0.1218, + "step": 39360 + }, + { + "epoch": 1.84, + "learning_rate": 1.3989152414841755e-05, + "loss": 0.0561, + "step": 39365 + }, + { + "epoch": 1.84, + "learning_rate": 1.3988368629786968e-05, + "loss": 0.1979, + "step": 39370 + }, + { + "epoch": 1.84, + "learning_rate": 1.3987584844732183e-05, + "loss": 0.3349, + "step": 39375 + }, + { + "epoch": 1.84, + "learning_rate": 1.3986801059677395e-05, + "loss": 0.2712, + "step": 39380 + }, + { + "epoch": 1.84, + "learning_rate": 1.398601727462261e-05, + "loss": 0.21, + "step": 39385 + }, + { + "epoch": 1.84, + "learning_rate": 1.3985233489567821e-05, + "loss": 0.0712, + "step": 39390 + }, + { + "epoch": 1.84, + "learning_rate": 1.3984449704513035e-05, + "loss": 0.1168, + "step": 39395 + }, + { + "epoch": 1.84, + "learning_rate": 1.398366591945825e-05, + "loss": 0.0892, + "step": 39400 + }, + { + "epoch": 1.84, + "learning_rate": 1.3982882134403461e-05, + "loss": 0.1336, + "step": 39405 + }, + { + "epoch": 1.84, + "learning_rate": 1.3982098349348675e-05, + "loss": 0.172, + "step": 39410 + }, + { + "epoch": 1.84, + "learning_rate": 1.398131456429389e-05, + "loss": 0.1663, + "step": 39415 + }, + { + "epoch": 1.84, + "learning_rate": 1.3980530779239103e-05, + "loss": 0.1677, + "step": 39420 + }, + { + "epoch": 1.84, + "learning_rate": 1.3979746994184315e-05, + "loss": 0.1407, + "step": 39425 + }, + { + "epoch": 1.84, + "learning_rate": 1.3978963209129531e-05, + "loss": 0.4581, + "step": 39430 + }, + { + "epoch": 1.84, + "learning_rate": 1.3978179424074743e-05, + "loss": 0.4789, + "step": 39435 + }, + { + "epoch": 1.84, + "learning_rate": 1.3977395639019957e-05, + "loss": 0.0543, + "step": 39440 + }, + { + "epoch": 1.84, + "learning_rate": 1.397661185396517e-05, + "loss": 0.0469, + "step": 39445 + }, + { + "epoch": 1.84, + "learning_rate": 1.3975828068910385e-05, + "loss": 0.0991, + "step": 39450 + }, + { + "epoch": 1.84, + "learning_rate": 1.3975044283855597e-05, + "loss": 0.0445, + "step": 39455 + }, + { + "epoch": 1.84, + "learning_rate": 1.397426049880081e-05, + "loss": 0.1107, + "step": 39460 + }, + { + "epoch": 1.84, + "learning_rate": 1.3973476713746023e-05, + "loss": 0.0865, + "step": 39465 + }, + { + "epoch": 1.84, + "learning_rate": 1.3972692928691235e-05, + "loss": 0.1738, + "step": 39470 + }, + { + "epoch": 1.84, + "learning_rate": 1.3971909143636451e-05, + "loss": 0.3184, + "step": 39475 + }, + { + "epoch": 1.84, + "learning_rate": 1.3971125358581663e-05, + "loss": 0.2718, + "step": 39480 + }, + { + "epoch": 1.84, + "learning_rate": 1.3970341573526877e-05, + "loss": 0.3997, + "step": 39485 + }, + { + "epoch": 1.84, + "learning_rate": 1.396955778847209e-05, + "loss": 0.0962, + "step": 39490 + }, + { + "epoch": 1.84, + "learning_rate": 1.3968774003417305e-05, + "loss": 0.0429, + "step": 39495 + }, + { + "epoch": 1.84, + "learning_rate": 1.3967990218362517e-05, + "loss": 0.119, + "step": 39500 + }, + { + "epoch": 1.84, + "learning_rate": 1.3967206433307731e-05, + "loss": 0.065, + "step": 39505 + }, + { + "epoch": 1.84, + "learning_rate": 1.3966422648252943e-05, + "loss": 0.2045, + "step": 39510 + }, + { + "epoch": 1.84, + "learning_rate": 1.3965638863198159e-05, + "loss": 0.1923, + "step": 39515 + }, + { + "epoch": 1.84, + "learning_rate": 1.3964855078143371e-05, + "loss": 0.1101, + "step": 39520 + }, + { + "epoch": 1.84, + "learning_rate": 1.3964071293088583e-05, + "loss": 0.201, + "step": 39525 + }, + { + "epoch": 1.84, + "learning_rate": 1.3963287508033799e-05, + "loss": 0.2879, + "step": 39530 + }, + { + "epoch": 1.84, + "learning_rate": 1.3962503722979011e-05, + "loss": 0.1518, + "step": 39535 + }, + { + "epoch": 1.84, + "learning_rate": 1.3961719937924225e-05, + "loss": 0.0882, + "step": 39540 + }, + { + "epoch": 1.85, + "learning_rate": 1.3960936152869437e-05, + "loss": 0.0576, + "step": 39545 + }, + { + "epoch": 1.85, + "learning_rate": 1.3960152367814653e-05, + "loss": 0.0711, + "step": 39550 + }, + { + "epoch": 1.85, + "learning_rate": 1.3959368582759865e-05, + "loss": 0.1663, + "step": 39555 + }, + { + "epoch": 1.85, + "learning_rate": 1.3958584797705079e-05, + "loss": 0.1107, + "step": 39560 + }, + { + "epoch": 1.85, + "learning_rate": 1.3957801012650291e-05, + "loss": 0.1376, + "step": 39565 + }, + { + "epoch": 1.85, + "learning_rate": 1.3957017227595507e-05, + "loss": 0.2258, + "step": 39570 + }, + { + "epoch": 1.85, + "learning_rate": 1.3956233442540719e-05, + "loss": 0.1872, + "step": 39575 + }, + { + "epoch": 1.85, + "learning_rate": 1.3955449657485933e-05, + "loss": 0.3751, + "step": 39580 + }, + { + "epoch": 1.85, + "learning_rate": 1.3954665872431145e-05, + "loss": 0.3203, + "step": 39585 + }, + { + "epoch": 1.85, + "learning_rate": 1.3953882087376357e-05, + "loss": 0.0157, + "step": 39590 + }, + { + "epoch": 1.85, + "learning_rate": 1.3953098302321573e-05, + "loss": 0.0469, + "step": 39595 + }, + { + "epoch": 1.85, + "learning_rate": 1.3952314517266785e-05, + "loss": 0.0631, + "step": 39600 + }, + { + "epoch": 1.85, + "learning_rate": 1.3951530732211999e-05, + "loss": 0.0862, + "step": 39605 + }, + { + "epoch": 1.85, + "learning_rate": 1.3950746947157211e-05, + "loss": 0.1194, + "step": 39610 + }, + { + "epoch": 1.85, + "learning_rate": 1.3949963162102427e-05, + "loss": 0.1458, + "step": 39615 + }, + { + "epoch": 1.85, + "learning_rate": 1.3949179377047639e-05, + "loss": 0.2322, + "step": 39620 + }, + { + "epoch": 1.85, + "learning_rate": 1.3948395591992853e-05, + "loss": 0.363, + "step": 39625 + }, + { + "epoch": 1.85, + "learning_rate": 1.3947611806938067e-05, + "loss": 0.2981, + "step": 39630 + }, + { + "epoch": 1.85, + "learning_rate": 1.394682802188328e-05, + "loss": 0.2648, + "step": 39635 + }, + { + "epoch": 1.85, + "learning_rate": 1.3946044236828493e-05, + "loss": 0.0346, + "step": 39640 + }, + { + "epoch": 1.85, + "learning_rate": 1.3945260451773709e-05, + "loss": 0.0576, + "step": 39645 + }, + { + "epoch": 1.85, + "learning_rate": 1.394447666671892e-05, + "loss": 0.1158, + "step": 39650 + }, + { + "epoch": 1.85, + "learning_rate": 1.3943692881664133e-05, + "loss": 0.0324, + "step": 39655 + }, + { + "epoch": 1.85, + "learning_rate": 1.3942909096609347e-05, + "loss": 0.1207, + "step": 39660 + }, + { + "epoch": 1.85, + "learning_rate": 1.3942125311554559e-05, + "loss": 0.1835, + "step": 39665 + }, + { + "epoch": 1.85, + "learning_rate": 1.3941341526499775e-05, + "loss": 0.1337, + "step": 39670 + }, + { + "epoch": 1.85, + "learning_rate": 1.3940557741444987e-05, + "loss": 0.1672, + "step": 39675 + }, + { + "epoch": 1.85, + "learning_rate": 1.3939773956390201e-05, + "loss": 0.2736, + "step": 39680 + }, + { + "epoch": 1.85, + "learning_rate": 1.3938990171335413e-05, + "loss": 0.2847, + "step": 39685 + }, + { + "epoch": 1.85, + "learning_rate": 1.3938206386280629e-05, + "loss": 0.044, + "step": 39690 + }, + { + "epoch": 1.85, + "learning_rate": 1.3937422601225841e-05, + "loss": 0.0497, + "step": 39695 + }, + { + "epoch": 1.85, + "learning_rate": 1.3936638816171055e-05, + "loss": 0.1422, + "step": 39700 + }, + { + "epoch": 1.85, + "learning_rate": 1.3935855031116267e-05, + "loss": 0.0669, + "step": 39705 + }, + { + "epoch": 1.85, + "learning_rate": 1.3935071246061483e-05, + "loss": 0.0804, + "step": 39710 + }, + { + "epoch": 1.85, + "learning_rate": 1.3934287461006695e-05, + "loss": 0.0735, + "step": 39715 + }, + { + "epoch": 1.85, + "learning_rate": 1.3933503675951907e-05, + "loss": 0.1105, + "step": 39720 + }, + { + "epoch": 1.85, + "learning_rate": 1.3932719890897121e-05, + "loss": 0.1518, + "step": 39725 + }, + { + "epoch": 1.85, + "learning_rate": 1.3931936105842335e-05, + "loss": 0.3293, + "step": 39730 + }, + { + "epoch": 1.85, + "learning_rate": 1.3931152320787549e-05, + "loss": 0.2068, + "step": 39735 + }, + { + "epoch": 1.85, + "learning_rate": 1.3930368535732761e-05, + "loss": 0.0597, + "step": 39740 + }, + { + "epoch": 1.85, + "learning_rate": 1.3929584750677977e-05, + "loss": 0.0848, + "step": 39745 + }, + { + "epoch": 1.85, + "learning_rate": 1.3928800965623189e-05, + "loss": 0.0794, + "step": 39750 + }, + { + "epoch": 1.86, + "learning_rate": 1.3928017180568403e-05, + "loss": 0.0735, + "step": 39755 + }, + { + "epoch": 1.86, + "learning_rate": 1.3927233395513615e-05, + "loss": 0.1662, + "step": 39760 + }, + { + "epoch": 1.86, + "learning_rate": 1.392644961045883e-05, + "loss": 0.1268, + "step": 39765 + }, + { + "epoch": 1.86, + "learning_rate": 1.3925665825404043e-05, + "loss": 0.1259, + "step": 39770 + }, + { + "epoch": 1.86, + "learning_rate": 1.3924882040349257e-05, + "loss": 0.2228, + "step": 39775 + }, + { + "epoch": 1.86, + "learning_rate": 1.3924098255294469e-05, + "loss": 0.2452, + "step": 39780 + }, + { + "epoch": 1.86, + "learning_rate": 1.3923314470239681e-05, + "loss": 0.3537, + "step": 39785 + }, + { + "epoch": 1.86, + "learning_rate": 1.3922530685184897e-05, + "loss": 0.0579, + "step": 39790 + }, + { + "epoch": 1.86, + "learning_rate": 1.3921746900130109e-05, + "loss": 0.0819, + "step": 39795 + }, + { + "epoch": 1.86, + "learning_rate": 1.3920963115075323e-05, + "loss": 0.117, + "step": 39800 + }, + { + "epoch": 1.86, + "learning_rate": 1.3920179330020535e-05, + "loss": 0.0466, + "step": 39805 + }, + { + "epoch": 1.86, + "learning_rate": 1.391939554496575e-05, + "loss": 0.1153, + "step": 39810 + }, + { + "epoch": 1.86, + "learning_rate": 1.3918611759910963e-05, + "loss": 0.1227, + "step": 39815 + }, + { + "epoch": 1.86, + "learning_rate": 1.3917827974856177e-05, + "loss": 0.1416, + "step": 39820 + }, + { + "epoch": 1.86, + "learning_rate": 1.3917044189801389e-05, + "loss": 0.1812, + "step": 39825 + }, + { + "epoch": 1.86, + "learning_rate": 1.3916260404746604e-05, + "loss": 0.3345, + "step": 39830 + }, + { + "epoch": 1.86, + "learning_rate": 1.3915476619691817e-05, + "loss": 0.359, + "step": 39835 + }, + { + "epoch": 1.86, + "learning_rate": 1.391469283463703e-05, + "loss": 0.019, + "step": 39840 + }, + { + "epoch": 1.86, + "learning_rate": 1.3913909049582245e-05, + "loss": 0.037, + "step": 39845 + }, + { + "epoch": 1.86, + "learning_rate": 1.3913125264527457e-05, + "loss": 0.0897, + "step": 39850 + }, + { + "epoch": 1.86, + "learning_rate": 1.391234147947267e-05, + "loss": 0.0943, + "step": 39855 + }, + { + "epoch": 1.86, + "learning_rate": 1.3911557694417883e-05, + "loss": 0.1399, + "step": 39860 + }, + { + "epoch": 1.86, + "learning_rate": 1.3910773909363098e-05, + "loss": 0.1403, + "step": 39865 + }, + { + "epoch": 1.86, + "learning_rate": 1.390999012430831e-05, + "loss": 0.2049, + "step": 39870 + }, + { + "epoch": 1.86, + "learning_rate": 1.3909206339253525e-05, + "loss": 0.2028, + "step": 39875 + }, + { + "epoch": 1.86, + "learning_rate": 1.3908422554198737e-05, + "loss": 0.281, + "step": 39880 + }, + { + "epoch": 1.86, + "learning_rate": 1.3907638769143952e-05, + "loss": 0.2856, + "step": 39885 + }, + { + "epoch": 1.86, + "learning_rate": 1.3906854984089165e-05, + "loss": 0.0383, + "step": 39890 + }, + { + "epoch": 1.86, + "learning_rate": 1.3906071199034378e-05, + "loss": 0.0449, + "step": 39895 + }, + { + "epoch": 1.86, + "learning_rate": 1.390528741397959e-05, + "loss": 0.0715, + "step": 39900 + }, + { + "epoch": 1.86, + "learning_rate": 1.3904503628924806e-05, + "loss": 0.0983, + "step": 39905 + }, + { + "epoch": 1.86, + "learning_rate": 1.3903719843870019e-05, + "loss": 0.1431, + "step": 39910 + }, + { + "epoch": 1.86, + "learning_rate": 1.390293605881523e-05, + "loss": 0.1682, + "step": 39915 + }, + { + "epoch": 1.86, + "learning_rate": 1.3902152273760445e-05, + "loss": 0.1312, + "step": 39920 + }, + { + "epoch": 1.86, + "learning_rate": 1.3901368488705659e-05, + "loss": 0.1199, + "step": 39925 + }, + { + "epoch": 1.86, + "learning_rate": 1.3900584703650872e-05, + "loss": 0.1829, + "step": 39930 + }, + { + "epoch": 1.86, + "learning_rate": 1.3899800918596085e-05, + "loss": 0.2109, + "step": 39935 + }, + { + "epoch": 1.86, + "learning_rate": 1.3899017133541299e-05, + "loss": 0.0469, + "step": 39940 + }, + { + "epoch": 1.86, + "learning_rate": 1.3898233348486512e-05, + "loss": 0.056, + "step": 39945 + }, + { + "epoch": 1.86, + "learning_rate": 1.3897449563431726e-05, + "loss": 0.0796, + "step": 39950 + }, + { + "epoch": 1.86, + "learning_rate": 1.3896665778376939e-05, + "loss": 0.0767, + "step": 39955 + }, + { + "epoch": 1.86, + "learning_rate": 1.3895881993322154e-05, + "loss": 0.0807, + "step": 39960 + }, + { + "epoch": 1.86, + "learning_rate": 1.3895098208267366e-05, + "loss": 0.1694, + "step": 39965 + }, + { + "epoch": 1.87, + "learning_rate": 1.389431442321258e-05, + "loss": 0.1686, + "step": 39970 + }, + { + "epoch": 1.87, + "learning_rate": 1.3893530638157793e-05, + "loss": 0.1735, + "step": 39975 + }, + { + "epoch": 1.87, + "learning_rate": 1.3892746853103005e-05, + "loss": 0.3052, + "step": 39980 + }, + { + "epoch": 1.87, + "learning_rate": 1.389196306804822e-05, + "loss": 0.315, + "step": 39985 + }, + { + "epoch": 1.87, + "learning_rate": 1.3891179282993433e-05, + "loss": 0.0689, + "step": 39990 + }, + { + "epoch": 1.87, + "learning_rate": 1.3890395497938646e-05, + "loss": 0.0481, + "step": 39995 + }, + { + "epoch": 1.87, + "learning_rate": 1.3889611712883859e-05, + "loss": 0.0813, + "step": 40000 + }, + { + "epoch": 1.87, + "learning_rate": 1.3888827927829074e-05, + "loss": 0.0883, + "step": 40005 + }, + { + "epoch": 1.87, + "learning_rate": 1.3888044142774286e-05, + "loss": 0.0955, + "step": 40010 + }, + { + "epoch": 1.87, + "learning_rate": 1.38872603577195e-05, + "loss": 0.1675, + "step": 40015 + }, + { + "epoch": 1.87, + "learning_rate": 1.3886476572664713e-05, + "loss": 0.1361, + "step": 40020 + }, + { + "epoch": 1.87, + "learning_rate": 1.3885692787609928e-05, + "loss": 0.1712, + "step": 40025 + }, + { + "epoch": 1.87, + "learning_rate": 1.388490900255514e-05, + "loss": 0.3516, + "step": 40030 + }, + { + "epoch": 1.87, + "learning_rate": 1.3884125217500354e-05, + "loss": 0.233, + "step": 40035 + }, + { + "epoch": 1.87, + "learning_rate": 1.3883341432445567e-05, + "loss": 0.0227, + "step": 40040 + }, + { + "epoch": 1.87, + "learning_rate": 1.388255764739078e-05, + "loss": 0.0551, + "step": 40045 + }, + { + "epoch": 1.87, + "learning_rate": 1.3881773862335994e-05, + "loss": 0.0955, + "step": 40050 + }, + { + "epoch": 1.87, + "learning_rate": 1.3880990077281207e-05, + "loss": 0.1336, + "step": 40055 + }, + { + "epoch": 1.87, + "learning_rate": 1.3880206292226422e-05, + "loss": 0.0766, + "step": 40060 + }, + { + "epoch": 1.87, + "learning_rate": 1.3879422507171634e-05, + "loss": 0.0847, + "step": 40065 + }, + { + "epoch": 1.87, + "learning_rate": 1.3878638722116848e-05, + "loss": 0.143, + "step": 40070 + }, + { + "epoch": 1.87, + "learning_rate": 1.387785493706206e-05, + "loss": 0.1564, + "step": 40075 + }, + { + "epoch": 1.87, + "learning_rate": 1.3877071152007276e-05, + "loss": 0.3278, + "step": 40080 + }, + { + "epoch": 1.87, + "learning_rate": 1.3876287366952488e-05, + "loss": 0.2336, + "step": 40085 + }, + { + "epoch": 1.87, + "learning_rate": 1.3875503581897702e-05, + "loss": 0.0916, + "step": 40090 + }, + { + "epoch": 1.87, + "learning_rate": 1.3874719796842914e-05, + "loss": 0.11, + "step": 40095 + }, + { + "epoch": 1.87, + "learning_rate": 1.387393601178813e-05, + "loss": 0.0472, + "step": 40100 + }, + { + "epoch": 1.87, + "learning_rate": 1.3873152226733342e-05, + "loss": 0.0719, + "step": 40105 + }, + { + "epoch": 1.87, + "learning_rate": 1.3872368441678554e-05, + "loss": 0.1545, + "step": 40110 + }, + { + "epoch": 1.87, + "learning_rate": 1.3871584656623768e-05, + "loss": 0.1102, + "step": 40115 + }, + { + "epoch": 1.87, + "learning_rate": 1.387080087156898e-05, + "loss": 0.2047, + "step": 40120 + }, + { + "epoch": 1.87, + "learning_rate": 1.3870017086514196e-05, + "loss": 0.241, + "step": 40125 + }, + { + "epoch": 1.87, + "learning_rate": 1.3869233301459408e-05, + "loss": 0.3618, + "step": 40130 + }, + { + "epoch": 1.87, + "learning_rate": 1.3868449516404622e-05, + "loss": 0.3678, + "step": 40135 + }, + { + "epoch": 1.87, + "learning_rate": 1.3867665731349834e-05, + "loss": 0.0394, + "step": 40140 + }, + { + "epoch": 1.87, + "learning_rate": 1.386688194629505e-05, + "loss": 0.0337, + "step": 40145 + }, + { + "epoch": 1.87, + "learning_rate": 1.3866098161240262e-05, + "loss": 0.0898, + "step": 40150 + }, + { + "epoch": 1.87, + "learning_rate": 1.3865314376185476e-05, + "loss": 0.074, + "step": 40155 + }, + { + "epoch": 1.87, + "learning_rate": 1.386453059113069e-05, + "loss": 0.1356, + "step": 40160 + }, + { + "epoch": 1.87, + "learning_rate": 1.3863746806075904e-05, + "loss": 0.1607, + "step": 40165 + }, + { + "epoch": 1.87, + "learning_rate": 1.3862963021021116e-05, + "loss": 0.1426, + "step": 40170 + }, + { + "epoch": 1.87, + "learning_rate": 1.3862179235966328e-05, + "loss": 0.2663, + "step": 40175 + }, + { + "epoch": 1.87, + "learning_rate": 1.3861395450911544e-05, + "loss": 0.3673, + "step": 40180 + }, + { + "epoch": 1.88, + "learning_rate": 1.3860611665856756e-05, + "loss": 0.3103, + "step": 40185 + }, + { + "epoch": 1.88, + "learning_rate": 1.385982788080197e-05, + "loss": 0.0458, + "step": 40190 + }, + { + "epoch": 1.88, + "learning_rate": 1.3859044095747182e-05, + "loss": 0.0491, + "step": 40195 + }, + { + "epoch": 1.88, + "learning_rate": 1.3858260310692398e-05, + "loss": 0.0837, + "step": 40200 + }, + { + "epoch": 1.88, + "learning_rate": 1.385747652563761e-05, + "loss": 0.1077, + "step": 40205 + }, + { + "epoch": 1.88, + "learning_rate": 1.3856692740582824e-05, + "loss": 0.1128, + "step": 40210 + }, + { + "epoch": 1.88, + "learning_rate": 1.3855908955528036e-05, + "loss": 0.2105, + "step": 40215 + }, + { + "epoch": 1.88, + "learning_rate": 1.3855125170473252e-05, + "loss": 0.169, + "step": 40220 + }, + { + "epoch": 1.88, + "learning_rate": 1.3854341385418464e-05, + "loss": 0.1373, + "step": 40225 + }, + { + "epoch": 1.88, + "learning_rate": 1.3853557600363678e-05, + "loss": 0.2803, + "step": 40230 + }, + { + "epoch": 1.88, + "learning_rate": 1.385277381530889e-05, + "loss": 0.2458, + "step": 40235 + }, + { + "epoch": 1.88, + "learning_rate": 1.3851990030254104e-05, + "loss": 0.067, + "step": 40240 + }, + { + "epoch": 1.88, + "learning_rate": 1.3851206245199318e-05, + "loss": 0.0656, + "step": 40245 + }, + { + "epoch": 1.88, + "learning_rate": 1.385042246014453e-05, + "loss": 0.0975, + "step": 40250 + }, + { + "epoch": 1.88, + "learning_rate": 1.3849638675089744e-05, + "loss": 0.0711, + "step": 40255 + }, + { + "epoch": 1.88, + "learning_rate": 1.3848854890034958e-05, + "loss": 0.1479, + "step": 40260 + }, + { + "epoch": 1.88, + "learning_rate": 1.3848071104980172e-05, + "loss": 0.0855, + "step": 40265 + }, + { + "epoch": 1.88, + "learning_rate": 1.3847287319925384e-05, + "loss": 0.2257, + "step": 40270 + }, + { + "epoch": 1.88, + "learning_rate": 1.38465035348706e-05, + "loss": 0.2097, + "step": 40275 + }, + { + "epoch": 1.88, + "learning_rate": 1.3845719749815812e-05, + "loss": 0.4084, + "step": 40280 + }, + { + "epoch": 1.88, + "learning_rate": 1.3844935964761026e-05, + "loss": 0.2848, + "step": 40285 + }, + { + "epoch": 1.88, + "learning_rate": 1.3844152179706238e-05, + "loss": 0.104, + "step": 40290 + }, + { + "epoch": 1.88, + "learning_rate": 1.3843368394651454e-05, + "loss": 0.0429, + "step": 40295 + }, + { + "epoch": 1.88, + "learning_rate": 1.3842584609596666e-05, + "loss": 0.086, + "step": 40300 + }, + { + "epoch": 1.88, + "learning_rate": 1.3841800824541878e-05, + "loss": 0.0811, + "step": 40305 + }, + { + "epoch": 1.88, + "learning_rate": 1.3841017039487092e-05, + "loss": 0.058, + "step": 40310 + }, + { + "epoch": 1.88, + "learning_rate": 1.3840233254432304e-05, + "loss": 0.1156, + "step": 40315 + }, + { + "epoch": 1.88, + "learning_rate": 1.383944946937752e-05, + "loss": 0.2159, + "step": 40320 + }, + { + "epoch": 1.88, + "learning_rate": 1.3838665684322732e-05, + "loss": 0.2478, + "step": 40325 + }, + { + "epoch": 1.88, + "learning_rate": 1.3837881899267946e-05, + "loss": 0.2005, + "step": 40330 + }, + { + "epoch": 1.88, + "learning_rate": 1.3837098114213158e-05, + "loss": 0.3357, + "step": 40335 + }, + { + "epoch": 1.88, + "learning_rate": 1.3836314329158374e-05, + "loss": 0.0394, + "step": 40340 + }, + { + "epoch": 1.88, + "learning_rate": 1.3835530544103586e-05, + "loss": 0.0648, + "step": 40345 + }, + { + "epoch": 1.88, + "learning_rate": 1.38347467590488e-05, + "loss": 0.0823, + "step": 40350 + }, + { + "epoch": 1.88, + "learning_rate": 1.3833962973994012e-05, + "loss": 0.0483, + "step": 40355 + }, + { + "epoch": 1.88, + "learning_rate": 1.3833179188939228e-05, + "loss": 0.193, + "step": 40360 + }, + { + "epoch": 1.88, + "learning_rate": 1.383239540388444e-05, + "loss": 0.1007, + "step": 40365 + }, + { + "epoch": 1.88, + "learning_rate": 1.3831611618829652e-05, + "loss": 0.1092, + "step": 40370 + }, + { + "epoch": 1.88, + "learning_rate": 1.3830827833774868e-05, + "loss": 0.2738, + "step": 40375 + }, + { + "epoch": 1.88, + "learning_rate": 1.383004404872008e-05, + "loss": 0.2871, + "step": 40380 + }, + { + "epoch": 1.88, + "learning_rate": 1.3829260263665294e-05, + "loss": 0.2501, + "step": 40385 + }, + { + "epoch": 1.88, + "learning_rate": 1.3828476478610506e-05, + "loss": 0.0473, + "step": 40390 + }, + { + "epoch": 1.88, + "learning_rate": 1.3827692693555722e-05, + "loss": 0.0597, + "step": 40395 + }, + { + "epoch": 1.89, + "learning_rate": 1.3826908908500934e-05, + "loss": 0.0646, + "step": 40400 + }, + { + "epoch": 1.89, + "learning_rate": 1.3826125123446148e-05, + "loss": 0.1314, + "step": 40405 + }, + { + "epoch": 1.89, + "learning_rate": 1.382534133839136e-05, + "loss": 0.1628, + "step": 40410 + }, + { + "epoch": 1.89, + "learning_rate": 1.3824557553336576e-05, + "loss": 0.1858, + "step": 40415 + }, + { + "epoch": 1.89, + "learning_rate": 1.3823773768281788e-05, + "loss": 0.2056, + "step": 40420 + }, + { + "epoch": 1.89, + "learning_rate": 1.3822989983227002e-05, + "loss": 0.1649, + "step": 40425 + }, + { + "epoch": 1.89, + "learning_rate": 1.3822206198172214e-05, + "loss": 0.1464, + "step": 40430 + }, + { + "epoch": 1.89, + "learning_rate": 1.3821422413117426e-05, + "loss": 0.3024, + "step": 40435 + }, + { + "epoch": 1.89, + "learning_rate": 1.3820638628062642e-05, + "loss": 0.0702, + "step": 40440 + }, + { + "epoch": 1.89, + "learning_rate": 1.3819854843007854e-05, + "loss": 0.0468, + "step": 40445 + }, + { + "epoch": 1.89, + "learning_rate": 1.3819071057953068e-05, + "loss": 0.0472, + "step": 40450 + }, + { + "epoch": 1.89, + "learning_rate": 1.381828727289828e-05, + "loss": 0.0957, + "step": 40455 + }, + { + "epoch": 1.89, + "learning_rate": 1.3817503487843496e-05, + "loss": 0.1398, + "step": 40460 + }, + { + "epoch": 1.89, + "learning_rate": 1.3816719702788708e-05, + "loss": 0.109, + "step": 40465 + }, + { + "epoch": 1.89, + "learning_rate": 1.3815935917733922e-05, + "loss": 0.2193, + "step": 40470 + }, + { + "epoch": 1.89, + "learning_rate": 1.3815152132679136e-05, + "loss": 0.2995, + "step": 40475 + }, + { + "epoch": 1.89, + "learning_rate": 1.381436834762435e-05, + "loss": 0.336, + "step": 40480 + }, + { + "epoch": 1.89, + "learning_rate": 1.3813584562569562e-05, + "loss": 0.2809, + "step": 40485 + }, + { + "epoch": 1.89, + "learning_rate": 1.3812800777514777e-05, + "loss": 0.0376, + "step": 40490 + }, + { + "epoch": 1.89, + "learning_rate": 1.381201699245999e-05, + "loss": 0.0265, + "step": 40495 + }, + { + "epoch": 1.89, + "learning_rate": 1.3811233207405202e-05, + "loss": 0.0517, + "step": 40500 + }, + { + "epoch": 1.89, + "learning_rate": 1.3810449422350416e-05, + "loss": 0.0746, + "step": 40505 + }, + { + "epoch": 1.89, + "learning_rate": 1.3809665637295628e-05, + "loss": 0.0954, + "step": 40510 + }, + { + "epoch": 1.89, + "learning_rate": 1.3808881852240844e-05, + "loss": 0.1229, + "step": 40515 + }, + { + "epoch": 1.89, + "learning_rate": 1.3808098067186056e-05, + "loss": 0.1508, + "step": 40520 + }, + { + "epoch": 1.89, + "learning_rate": 1.380731428213127e-05, + "loss": 0.2266, + "step": 40525 + }, + { + "epoch": 1.89, + "learning_rate": 1.3806530497076482e-05, + "loss": 0.3108, + "step": 40530 + }, + { + "epoch": 1.89, + "learning_rate": 1.3805746712021697e-05, + "loss": 0.2541, + "step": 40535 + }, + { + "epoch": 1.89, + "learning_rate": 1.380496292696691e-05, + "loss": 0.0478, + "step": 40540 + }, + { + "epoch": 1.89, + "learning_rate": 1.3804179141912124e-05, + "loss": 0.0496, + "step": 40545 + }, + { + "epoch": 1.89, + "learning_rate": 1.3803395356857336e-05, + "loss": 0.0953, + "step": 40550 + }, + { + "epoch": 1.89, + "learning_rate": 1.3802611571802551e-05, + "loss": 0.1024, + "step": 40555 + }, + { + "epoch": 1.89, + "learning_rate": 1.3801827786747764e-05, + "loss": 0.0996, + "step": 40560 + }, + { + "epoch": 1.89, + "learning_rate": 1.3801044001692976e-05, + "loss": 0.1564, + "step": 40565 + }, + { + "epoch": 1.89, + "learning_rate": 1.380026021663819e-05, + "loss": 0.2166, + "step": 40570 + }, + { + "epoch": 1.89, + "learning_rate": 1.3799476431583404e-05, + "loss": 0.226, + "step": 40575 + }, + { + "epoch": 1.89, + "learning_rate": 1.3798692646528618e-05, + "loss": 0.219, + "step": 40580 + }, + { + "epoch": 1.89, + "learning_rate": 1.379790886147383e-05, + "loss": 0.1555, + "step": 40585 + }, + { + "epoch": 1.89, + "learning_rate": 1.3797125076419045e-05, + "loss": 0.0567, + "step": 40590 + }, + { + "epoch": 1.89, + "learning_rate": 1.3796341291364258e-05, + "loss": 0.0816, + "step": 40595 + }, + { + "epoch": 1.89, + "learning_rate": 1.3795557506309471e-05, + "loss": 0.0672, + "step": 40600 + }, + { + "epoch": 1.89, + "learning_rate": 1.3794773721254684e-05, + "loss": 0.0293, + "step": 40605 + }, + { + "epoch": 1.89, + "learning_rate": 1.37939899361999e-05, + "loss": 0.1691, + "step": 40610 + }, + { + "epoch": 1.9, + "learning_rate": 1.3793206151145111e-05, + "loss": 0.1475, + "step": 40615 + }, + { + "epoch": 1.9, + "learning_rate": 1.3792422366090325e-05, + "loss": 0.1439, + "step": 40620 + }, + { + "epoch": 1.9, + "learning_rate": 1.3791638581035538e-05, + "loss": 0.2063, + "step": 40625 + }, + { + "epoch": 1.9, + "learning_rate": 1.379085479598075e-05, + "loss": 0.2501, + "step": 40630 + }, + { + "epoch": 1.9, + "learning_rate": 1.3790071010925965e-05, + "loss": 0.24, + "step": 40635 + }, + { + "epoch": 1.9, + "learning_rate": 1.3789287225871178e-05, + "loss": 0.0474, + "step": 40640 + }, + { + "epoch": 1.9, + "learning_rate": 1.3788503440816392e-05, + "loss": 0.0137, + "step": 40645 + }, + { + "epoch": 1.9, + "learning_rate": 1.3787719655761604e-05, + "loss": 0.0644, + "step": 40650 + }, + { + "epoch": 1.9, + "learning_rate": 1.378693587070682e-05, + "loss": 0.0847, + "step": 40655 + }, + { + "epoch": 1.9, + "learning_rate": 1.3786152085652032e-05, + "loss": 0.136, + "step": 40660 + }, + { + "epoch": 1.9, + "learning_rate": 1.3785368300597245e-05, + "loss": 0.1225, + "step": 40665 + }, + { + "epoch": 1.9, + "learning_rate": 1.3784584515542458e-05, + "loss": 0.1019, + "step": 40670 + }, + { + "epoch": 1.9, + "learning_rate": 1.3783800730487673e-05, + "loss": 0.2475, + "step": 40675 + }, + { + "epoch": 1.9, + "learning_rate": 1.3783016945432885e-05, + "loss": 0.2234, + "step": 40680 + }, + { + "epoch": 1.9, + "learning_rate": 1.37822331603781e-05, + "loss": 0.2204, + "step": 40685 + }, + { + "epoch": 1.9, + "learning_rate": 1.3781449375323313e-05, + "loss": 0.0354, + "step": 40690 + }, + { + "epoch": 1.9, + "learning_rate": 1.3780665590268525e-05, + "loss": 0.0904, + "step": 40695 + }, + { + "epoch": 1.9, + "learning_rate": 1.377988180521374e-05, + "loss": 0.0707, + "step": 40700 + }, + { + "epoch": 1.9, + "learning_rate": 1.3779098020158952e-05, + "loss": 0.0884, + "step": 40705 + }, + { + "epoch": 1.9, + "learning_rate": 1.3778314235104167e-05, + "loss": 0.1006, + "step": 40710 + }, + { + "epoch": 1.9, + "learning_rate": 1.377753045004938e-05, + "loss": 0.0914, + "step": 40715 + }, + { + "epoch": 1.9, + "learning_rate": 1.3776746664994593e-05, + "loss": 0.2008, + "step": 40720 + }, + { + "epoch": 1.9, + "learning_rate": 1.3775962879939806e-05, + "loss": 0.1749, + "step": 40725 + }, + { + "epoch": 1.9, + "learning_rate": 1.3775179094885021e-05, + "loss": 0.3868, + "step": 40730 + }, + { + "epoch": 1.9, + "learning_rate": 1.3774395309830233e-05, + "loss": 0.3199, + "step": 40735 + }, + { + "epoch": 1.9, + "learning_rate": 1.3773611524775447e-05, + "loss": 0.065, + "step": 40740 + }, + { + "epoch": 1.9, + "learning_rate": 1.377282773972066e-05, + "loss": 0.071, + "step": 40745 + }, + { + "epoch": 1.9, + "learning_rate": 1.3772043954665875e-05, + "loss": 0.0642, + "step": 40750 + }, + { + "epoch": 1.9, + "learning_rate": 1.3771260169611087e-05, + "loss": 0.1533, + "step": 40755 + }, + { + "epoch": 1.9, + "learning_rate": 1.37704763845563e-05, + "loss": 0.0883, + "step": 40760 + }, + { + "epoch": 1.9, + "learning_rate": 1.3769692599501513e-05, + "loss": 0.2085, + "step": 40765 + }, + { + "epoch": 1.9, + "learning_rate": 1.3768908814446726e-05, + "loss": 0.1509, + "step": 40770 + }, + { + "epoch": 1.9, + "learning_rate": 1.3768125029391941e-05, + "loss": 0.1984, + "step": 40775 + }, + { + "epoch": 1.9, + "learning_rate": 1.3767341244337153e-05, + "loss": 0.4016, + "step": 40780 + }, + { + "epoch": 1.9, + "learning_rate": 1.3766557459282367e-05, + "loss": 0.2388, + "step": 40785 + }, + { + "epoch": 1.9, + "learning_rate": 1.3765773674227581e-05, + "loss": 0.0885, + "step": 40790 + }, + { + "epoch": 1.9, + "learning_rate": 1.3764989889172795e-05, + "loss": 0.0829, + "step": 40795 + }, + { + "epoch": 1.9, + "learning_rate": 1.3764206104118007e-05, + "loss": 0.0609, + "step": 40800 + }, + { + "epoch": 1.9, + "learning_rate": 1.3763422319063223e-05, + "loss": 0.0371, + "step": 40805 + }, + { + "epoch": 1.9, + "learning_rate": 1.3762638534008435e-05, + "loss": 0.0782, + "step": 40810 + }, + { + "epoch": 1.9, + "learning_rate": 1.3761854748953649e-05, + "loss": 0.1198, + "step": 40815 + }, + { + "epoch": 1.9, + "learning_rate": 1.3761070963898861e-05, + "loss": 0.1064, + "step": 40820 + }, + { + "epoch": 1.9, + "learning_rate": 1.3760287178844073e-05, + "loss": 0.2126, + "step": 40825 + }, + { + "epoch": 1.91, + "learning_rate": 1.3759503393789289e-05, + "loss": 0.2644, + "step": 40830 + }, + { + "epoch": 1.91, + "learning_rate": 1.3758719608734501e-05, + "loss": 0.2017, + "step": 40835 + }, + { + "epoch": 1.91, + "learning_rate": 1.3757935823679715e-05, + "loss": 0.0589, + "step": 40840 + }, + { + "epoch": 1.91, + "learning_rate": 1.3757152038624927e-05, + "loss": 0.0665, + "step": 40845 + }, + { + "epoch": 1.91, + "learning_rate": 1.3756368253570143e-05, + "loss": 0.0637, + "step": 40850 + }, + { + "epoch": 1.91, + "learning_rate": 1.3755584468515355e-05, + "loss": 0.1055, + "step": 40855 + }, + { + "epoch": 1.91, + "learning_rate": 1.3754800683460569e-05, + "loss": 0.0967, + "step": 40860 + }, + { + "epoch": 1.91, + "learning_rate": 1.3754016898405781e-05, + "loss": 0.1057, + "step": 40865 + }, + { + "epoch": 1.91, + "learning_rate": 1.3753233113350997e-05, + "loss": 0.1226, + "step": 40870 + }, + { + "epoch": 1.91, + "learning_rate": 1.3752449328296209e-05, + "loss": 0.1455, + "step": 40875 + }, + { + "epoch": 1.91, + "learning_rate": 1.3751665543241423e-05, + "loss": 0.4124, + "step": 40880 + }, + { + "epoch": 1.91, + "learning_rate": 1.3750881758186635e-05, + "loss": 0.2321, + "step": 40885 + }, + { + "epoch": 1.91, + "learning_rate": 1.375009797313185e-05, + "loss": 0.0824, + "step": 40890 + }, + { + "epoch": 1.91, + "learning_rate": 1.3749314188077063e-05, + "loss": 0.0691, + "step": 40895 + }, + { + "epoch": 1.91, + "learning_rate": 1.3748530403022275e-05, + "loss": 0.0824, + "step": 40900 + }, + { + "epoch": 1.91, + "learning_rate": 1.3747746617967491e-05, + "loss": 0.1582, + "step": 40905 + }, + { + "epoch": 1.91, + "learning_rate": 1.3746962832912703e-05, + "loss": 0.104, + "step": 40910 + }, + { + "epoch": 1.91, + "learning_rate": 1.3746179047857917e-05, + "loss": 0.1756, + "step": 40915 + }, + { + "epoch": 1.91, + "learning_rate": 1.374539526280313e-05, + "loss": 0.1663, + "step": 40920 + }, + { + "epoch": 1.91, + "learning_rate": 1.3744611477748345e-05, + "loss": 0.2132, + "step": 40925 + }, + { + "epoch": 1.91, + "learning_rate": 1.3743827692693557e-05, + "loss": 0.2881, + "step": 40930 + }, + { + "epoch": 1.91, + "learning_rate": 1.3743043907638771e-05, + "loss": 0.1533, + "step": 40935 + }, + { + "epoch": 1.91, + "learning_rate": 1.3742260122583983e-05, + "loss": 0.034, + "step": 40940 + }, + { + "epoch": 1.91, + "learning_rate": 1.3741476337529199e-05, + "loss": 0.0692, + "step": 40945 + }, + { + "epoch": 1.91, + "learning_rate": 1.3740692552474411e-05, + "loss": 0.0526, + "step": 40950 + }, + { + "epoch": 1.91, + "learning_rate": 1.3739908767419623e-05, + "loss": 0.1364, + "step": 40955 + }, + { + "epoch": 1.91, + "learning_rate": 1.3739124982364837e-05, + "loss": 0.0757, + "step": 40960 + }, + { + "epoch": 1.91, + "learning_rate": 1.373834119731005e-05, + "loss": 0.1443, + "step": 40965 + }, + { + "epoch": 1.91, + "learning_rate": 1.3737557412255265e-05, + "loss": 0.2183, + "step": 40970 + }, + { + "epoch": 1.91, + "learning_rate": 1.3736773627200477e-05, + "loss": 0.2046, + "step": 40975 + }, + { + "epoch": 1.91, + "learning_rate": 1.3735989842145691e-05, + "loss": 0.3461, + "step": 40980 + }, + { + "epoch": 1.91, + "learning_rate": 1.3735206057090903e-05, + "loss": 0.268, + "step": 40985 + }, + { + "epoch": 1.91, + "learning_rate": 1.3734422272036119e-05, + "loss": 0.0231, + "step": 40990 + }, + { + "epoch": 1.91, + "learning_rate": 1.3733638486981331e-05, + "loss": 0.1039, + "step": 40995 + }, + { + "epoch": 1.91, + "learning_rate": 1.3732854701926545e-05, + "loss": 0.0878, + "step": 41000 + }, + { + "epoch": 1.91, + "learning_rate": 1.3732070916871759e-05, + "loss": 0.0818, + "step": 41005 + }, + { + "epoch": 1.91, + "learning_rate": 1.3731287131816973e-05, + "loss": 0.108, + "step": 41010 + }, + { + "epoch": 1.91, + "learning_rate": 1.3730503346762185e-05, + "loss": 0.1776, + "step": 41015 + }, + { + "epoch": 1.91, + "learning_rate": 1.3729719561707397e-05, + "loss": 0.0533, + "step": 41020 + }, + { + "epoch": 1.91, + "learning_rate": 1.3728935776652613e-05, + "loss": 0.1883, + "step": 41025 + }, + { + "epoch": 1.91, + "learning_rate": 1.3728151991597825e-05, + "loss": 0.3926, + "step": 41030 + }, + { + "epoch": 1.91, + "learning_rate": 1.3727368206543039e-05, + "loss": 0.2241, + "step": 41035 + }, + { + "epoch": 1.91, + "learning_rate": 1.3726584421488251e-05, + "loss": 0.0445, + "step": 41040 + }, + { + "epoch": 1.92, + "learning_rate": 1.3725800636433467e-05, + "loss": 0.087, + "step": 41045 + }, + { + "epoch": 1.92, + "learning_rate": 1.3725016851378679e-05, + "loss": 0.0322, + "step": 41050 + }, + { + "epoch": 1.92, + "learning_rate": 1.3724233066323893e-05, + "loss": 0.0478, + "step": 41055 + }, + { + "epoch": 1.92, + "learning_rate": 1.3723449281269105e-05, + "loss": 0.1294, + "step": 41060 + }, + { + "epoch": 1.92, + "learning_rate": 1.372266549621432e-05, + "loss": 0.1646, + "step": 41065 + }, + { + "epoch": 1.92, + "learning_rate": 1.3721881711159533e-05, + "loss": 0.1664, + "step": 41070 + }, + { + "epoch": 1.92, + "learning_rate": 1.3721097926104747e-05, + "loss": 0.2329, + "step": 41075 + }, + { + "epoch": 1.92, + "learning_rate": 1.3720314141049959e-05, + "loss": 0.2623, + "step": 41080 + }, + { + "epoch": 1.92, + "learning_rate": 1.3719530355995173e-05, + "loss": 0.2138, + "step": 41085 + }, + { + "epoch": 1.92, + "learning_rate": 1.3718746570940387e-05, + "loss": 0.0504, + "step": 41090 + }, + { + "epoch": 1.92, + "learning_rate": 1.3717962785885599e-05, + "loss": 0.0839, + "step": 41095 + }, + { + "epoch": 1.92, + "learning_rate": 1.3717179000830813e-05, + "loss": 0.0589, + "step": 41100 + }, + { + "epoch": 1.92, + "learning_rate": 1.3716395215776027e-05, + "loss": 0.1023, + "step": 41105 + }, + { + "epoch": 1.92, + "learning_rate": 1.371561143072124e-05, + "loss": 0.1489, + "step": 41110 + }, + { + "epoch": 1.92, + "learning_rate": 1.3714827645666453e-05, + "loss": 0.1497, + "step": 41115 + }, + { + "epoch": 1.92, + "learning_rate": 1.3714043860611669e-05, + "loss": 0.1494, + "step": 41120 + }, + { + "epoch": 1.92, + "learning_rate": 1.371326007555688e-05, + "loss": 0.2351, + "step": 41125 + }, + { + "epoch": 1.92, + "learning_rate": 1.3712476290502095e-05, + "loss": 0.3537, + "step": 41130 + }, + { + "epoch": 1.92, + "learning_rate": 1.3711692505447307e-05, + "loss": 0.3639, + "step": 41135 + }, + { + "epoch": 1.92, + "learning_rate": 1.3710908720392522e-05, + "loss": 0.0352, + "step": 41140 + }, + { + "epoch": 1.92, + "learning_rate": 1.3710124935337735e-05, + "loss": 0.0505, + "step": 41145 + }, + { + "epoch": 1.92, + "learning_rate": 1.3709341150282947e-05, + "loss": 0.0559, + "step": 41150 + }, + { + "epoch": 1.92, + "learning_rate": 1.370855736522816e-05, + "loss": 0.0432, + "step": 41155 + }, + { + "epoch": 1.92, + "learning_rate": 1.3707773580173373e-05, + "loss": 0.2254, + "step": 41160 + }, + { + "epoch": 1.92, + "learning_rate": 1.3706989795118589e-05, + "loss": 0.1569, + "step": 41165 + }, + { + "epoch": 1.92, + "learning_rate": 1.37062060100638e-05, + "loss": 0.0941, + "step": 41170 + }, + { + "epoch": 1.92, + "learning_rate": 1.3705422225009015e-05, + "loss": 0.2558, + "step": 41175 + }, + { + "epoch": 1.92, + "learning_rate": 1.3704638439954227e-05, + "loss": 0.2511, + "step": 41180 + }, + { + "epoch": 1.92, + "learning_rate": 1.3703854654899443e-05, + "loss": 0.3308, + "step": 41185 + }, + { + "epoch": 1.92, + "learning_rate": 1.3703070869844655e-05, + "loss": 0.0499, + "step": 41190 + }, + { + "epoch": 1.92, + "learning_rate": 1.3702287084789869e-05, + "loss": 0.0471, + "step": 41195 + }, + { + "epoch": 1.92, + "learning_rate": 1.370150329973508e-05, + "loss": 0.063, + "step": 41200 + }, + { + "epoch": 1.92, + "learning_rate": 1.3700719514680296e-05, + "loss": 0.0849, + "step": 41205 + }, + { + "epoch": 1.92, + "learning_rate": 1.3699935729625509e-05, + "loss": 0.1009, + "step": 41210 + }, + { + "epoch": 1.92, + "learning_rate": 1.3699151944570721e-05, + "loss": 0.1324, + "step": 41215 + }, + { + "epoch": 1.92, + "learning_rate": 1.3698368159515936e-05, + "loss": 0.145, + "step": 41220 + }, + { + "epoch": 1.92, + "learning_rate": 1.3697584374461149e-05, + "loss": 0.206, + "step": 41225 + }, + { + "epoch": 1.92, + "learning_rate": 1.3696800589406363e-05, + "loss": 0.3881, + "step": 41230 + }, + { + "epoch": 1.92, + "learning_rate": 1.3696016804351575e-05, + "loss": 0.3881, + "step": 41235 + }, + { + "epoch": 1.92, + "learning_rate": 1.369523301929679e-05, + "loss": 0.1123, + "step": 41240 + }, + { + "epoch": 1.92, + "learning_rate": 1.3694449234242003e-05, + "loss": 0.0195, + "step": 41245 + }, + { + "epoch": 1.92, + "learning_rate": 1.3693665449187217e-05, + "loss": 0.1247, + "step": 41250 + }, + { + "epoch": 1.93, + "learning_rate": 1.3692881664132429e-05, + "loss": 0.0557, + "step": 41255 + }, + { + "epoch": 1.93, + "learning_rate": 1.3692097879077644e-05, + "loss": 0.0885, + "step": 41260 + }, + { + "epoch": 1.93, + "learning_rate": 1.3691314094022857e-05, + "loss": 0.1122, + "step": 41265 + }, + { + "epoch": 1.93, + "learning_rate": 1.369053030896807e-05, + "loss": 0.2121, + "step": 41270 + }, + { + "epoch": 1.93, + "learning_rate": 1.3689746523913283e-05, + "loss": 0.1881, + "step": 41275 + }, + { + "epoch": 1.93, + "learning_rate": 1.3688962738858495e-05, + "loss": 0.2472, + "step": 41280 + }, + { + "epoch": 1.93, + "learning_rate": 1.368817895380371e-05, + "loss": 0.1782, + "step": 41285 + }, + { + "epoch": 1.93, + "learning_rate": 1.3687395168748923e-05, + "loss": 0.0595, + "step": 41290 + }, + { + "epoch": 1.93, + "learning_rate": 1.3686611383694137e-05, + "loss": 0.0678, + "step": 41295 + }, + { + "epoch": 1.93, + "learning_rate": 1.3685827598639349e-05, + "loss": 0.1161, + "step": 41300 + }, + { + "epoch": 1.93, + "learning_rate": 1.3685043813584564e-05, + "loss": 0.0635, + "step": 41305 + }, + { + "epoch": 1.93, + "learning_rate": 1.3684260028529777e-05, + "loss": 0.1394, + "step": 41310 + }, + { + "epoch": 1.93, + "learning_rate": 1.368347624347499e-05, + "loss": 0.1814, + "step": 41315 + }, + { + "epoch": 1.93, + "learning_rate": 1.3682692458420204e-05, + "loss": 0.3026, + "step": 41320 + }, + { + "epoch": 1.93, + "learning_rate": 1.3681908673365418e-05, + "loss": 0.2154, + "step": 41325 + }, + { + "epoch": 1.93, + "learning_rate": 1.368112488831063e-05, + "loss": 0.2504, + "step": 41330 + }, + { + "epoch": 1.93, + "learning_rate": 1.3680341103255846e-05, + "loss": 0.3073, + "step": 41335 + }, + { + "epoch": 1.93, + "learning_rate": 1.3679557318201058e-05, + "loss": 0.0638, + "step": 41340 + }, + { + "epoch": 1.93, + "learning_rate": 1.367877353314627e-05, + "loss": 0.0279, + "step": 41345 + }, + { + "epoch": 1.93, + "learning_rate": 1.3677989748091484e-05, + "loss": 0.1001, + "step": 41350 + }, + { + "epoch": 1.93, + "learning_rate": 1.3677205963036697e-05, + "loss": 0.0529, + "step": 41355 + }, + { + "epoch": 1.93, + "learning_rate": 1.3676422177981912e-05, + "loss": 0.1054, + "step": 41360 + }, + { + "epoch": 1.93, + "learning_rate": 1.3675638392927124e-05, + "loss": 0.111, + "step": 41365 + }, + { + "epoch": 1.93, + "learning_rate": 1.3674854607872338e-05, + "loss": 0.0676, + "step": 41370 + }, + { + "epoch": 1.93, + "learning_rate": 1.367407082281755e-05, + "loss": 0.243, + "step": 41375 + }, + { + "epoch": 1.93, + "learning_rate": 1.3673287037762766e-05, + "loss": 0.4077, + "step": 41380 + }, + { + "epoch": 1.93, + "learning_rate": 1.3672503252707978e-05, + "loss": 0.3552, + "step": 41385 + }, + { + "epoch": 1.93, + "learning_rate": 1.3671719467653192e-05, + "loss": 0.0411, + "step": 41390 + }, + { + "epoch": 1.93, + "learning_rate": 1.3670935682598405e-05, + "loss": 0.039, + "step": 41395 + }, + { + "epoch": 1.93, + "learning_rate": 1.367015189754362e-05, + "loss": 0.0688, + "step": 41400 + }, + { + "epoch": 1.93, + "learning_rate": 1.3669368112488832e-05, + "loss": 0.1222, + "step": 41405 + }, + { + "epoch": 1.93, + "learning_rate": 1.3668584327434045e-05, + "loss": 0.073, + "step": 41410 + }, + { + "epoch": 1.93, + "learning_rate": 1.3667800542379258e-05, + "loss": 0.0939, + "step": 41415 + }, + { + "epoch": 1.93, + "learning_rate": 1.3667016757324472e-05, + "loss": 0.1917, + "step": 41420 + }, + { + "epoch": 1.93, + "learning_rate": 1.3666232972269686e-05, + "loss": 0.1409, + "step": 41425 + }, + { + "epoch": 1.93, + "learning_rate": 1.3665449187214898e-05, + "loss": 0.3746, + "step": 41430 + }, + { + "epoch": 1.93, + "learning_rate": 1.3664665402160114e-05, + "loss": 0.3048, + "step": 41435 + }, + { + "epoch": 1.93, + "learning_rate": 1.3663881617105326e-05, + "loss": 0.0518, + "step": 41440 + }, + { + "epoch": 1.93, + "learning_rate": 1.366309783205054e-05, + "loss": 0.0805, + "step": 41445 + }, + { + "epoch": 1.93, + "learning_rate": 1.3662314046995752e-05, + "loss": 0.0876, + "step": 41450 + }, + { + "epoch": 1.93, + "learning_rate": 1.3661530261940968e-05, + "loss": 0.0906, + "step": 41455 + }, + { + "epoch": 1.93, + "learning_rate": 1.366074647688618e-05, + "loss": 0.0911, + "step": 41460 + }, + { + "epoch": 1.93, + "learning_rate": 1.3659962691831394e-05, + "loss": 0.1121, + "step": 41465 + }, + { + "epoch": 1.94, + "learning_rate": 1.3659178906776606e-05, + "loss": 0.1549, + "step": 41470 + }, + { + "epoch": 1.94, + "learning_rate": 1.3658395121721819e-05, + "loss": 0.2158, + "step": 41475 + }, + { + "epoch": 1.94, + "learning_rate": 1.3657611336667034e-05, + "loss": 0.3177, + "step": 41480 + }, + { + "epoch": 1.94, + "learning_rate": 1.3656827551612246e-05, + "loss": 0.2616, + "step": 41485 + }, + { + "epoch": 1.94, + "learning_rate": 1.365604376655746e-05, + "loss": 0.0809, + "step": 41490 + }, + { + "epoch": 1.94, + "learning_rate": 1.3655259981502672e-05, + "loss": 0.0507, + "step": 41495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3654476196447888e-05, + "loss": 0.0408, + "step": 41500 + }, + { + "epoch": 1.94, + "learning_rate": 1.36536924113931e-05, + "loss": 0.079, + "step": 41505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3652908626338314e-05, + "loss": 0.1017, + "step": 41510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3652124841283526e-05, + "loss": 0.0745, + "step": 41515 + }, + { + "epoch": 1.94, + "learning_rate": 1.3651341056228742e-05, + "loss": 0.1234, + "step": 41520 + }, + { + "epoch": 1.94, + "learning_rate": 1.3650557271173954e-05, + "loss": 0.23, + "step": 41525 + }, + { + "epoch": 1.94, + "learning_rate": 1.3649773486119168e-05, + "loss": 0.2216, + "step": 41530 + }, + { + "epoch": 1.94, + "learning_rate": 1.3648989701064382e-05, + "loss": 0.2246, + "step": 41535 + }, + { + "epoch": 1.94, + "learning_rate": 1.3648205916009594e-05, + "loss": 0.0395, + "step": 41540 + }, + { + "epoch": 1.94, + "learning_rate": 1.3647422130954808e-05, + "loss": 0.0559, + "step": 41545 + }, + { + "epoch": 1.94, + "learning_rate": 1.364663834590002e-05, + "loss": 0.0517, + "step": 41550 + }, + { + "epoch": 1.94, + "learning_rate": 1.3645854560845236e-05, + "loss": 0.1465, + "step": 41555 + }, + { + "epoch": 1.94, + "learning_rate": 1.3645070775790448e-05, + "loss": 0.2362, + "step": 41560 + }, + { + "epoch": 1.94, + "learning_rate": 1.3644286990735662e-05, + "loss": 0.1821, + "step": 41565 + }, + { + "epoch": 1.94, + "learning_rate": 1.3643503205680874e-05, + "loss": 0.1984, + "step": 41570 + }, + { + "epoch": 1.94, + "learning_rate": 1.364271942062609e-05, + "loss": 0.2761, + "step": 41575 + }, + { + "epoch": 1.94, + "learning_rate": 1.3641935635571302e-05, + "loss": 0.4789, + "step": 41580 + }, + { + "epoch": 1.94, + "learning_rate": 1.3641151850516516e-05, + "loss": 0.2259, + "step": 41585 + }, + { + "epoch": 1.94, + "learning_rate": 1.3640368065461728e-05, + "loss": 0.0482, + "step": 41590 + }, + { + "epoch": 1.94, + "learning_rate": 1.3639584280406944e-05, + "loss": 0.0146, + "step": 41595 + }, + { + "epoch": 1.94, + "learning_rate": 1.3638800495352156e-05, + "loss": 0.1254, + "step": 41600 + }, + { + "epoch": 1.94, + "learning_rate": 1.3638016710297368e-05, + "loss": 0.0778, + "step": 41605 + }, + { + "epoch": 1.94, + "learning_rate": 1.3637232925242582e-05, + "loss": 0.0651, + "step": 41610 + }, + { + "epoch": 1.94, + "learning_rate": 1.3636449140187794e-05, + "loss": 0.1359, + "step": 41615 + }, + { + "epoch": 1.94, + "learning_rate": 1.363566535513301e-05, + "loss": 0.2115, + "step": 41620 + }, + { + "epoch": 1.94, + "learning_rate": 1.3634881570078222e-05, + "loss": 0.1439, + "step": 41625 + }, + { + "epoch": 1.94, + "learning_rate": 1.3634097785023436e-05, + "loss": 0.3997, + "step": 41630 + }, + { + "epoch": 1.94, + "learning_rate": 1.363331399996865e-05, + "loss": 0.2421, + "step": 41635 + }, + { + "epoch": 1.94, + "learning_rate": 1.3632530214913864e-05, + "loss": 0.0331, + "step": 41640 + }, + { + "epoch": 1.94, + "learning_rate": 1.3631746429859076e-05, + "loss": 0.1243, + "step": 41645 + }, + { + "epoch": 1.94, + "learning_rate": 1.3630962644804292e-05, + "loss": 0.147, + "step": 41650 + }, + { + "epoch": 1.94, + "learning_rate": 1.3630178859749504e-05, + "loss": 0.0483, + "step": 41655 + }, + { + "epoch": 1.94, + "learning_rate": 1.3629395074694718e-05, + "loss": 0.0896, + "step": 41660 + }, + { + "epoch": 1.94, + "learning_rate": 1.362861128963993e-05, + "loss": 0.1532, + "step": 41665 + }, + { + "epoch": 1.94, + "learning_rate": 1.3627827504585142e-05, + "loss": 0.1593, + "step": 41670 + }, + { + "epoch": 1.94, + "learning_rate": 1.3627043719530358e-05, + "loss": 0.1917, + "step": 41675 + }, + { + "epoch": 1.94, + "learning_rate": 1.362625993447557e-05, + "loss": 0.318, + "step": 41680 + }, + { + "epoch": 1.95, + "learning_rate": 1.3625476149420784e-05, + "loss": 0.3726, + "step": 41685 + }, + { + "epoch": 1.95, + "learning_rate": 1.3624692364365996e-05, + "loss": 0.0432, + "step": 41690 + }, + { + "epoch": 1.95, + "learning_rate": 1.3623908579311212e-05, + "loss": 0.0398, + "step": 41695 + }, + { + "epoch": 1.95, + "learning_rate": 1.3623124794256424e-05, + "loss": 0.0579, + "step": 41700 + }, + { + "epoch": 1.95, + "learning_rate": 1.3622341009201638e-05, + "loss": 0.0813, + "step": 41705 + }, + { + "epoch": 1.95, + "learning_rate": 1.362155722414685e-05, + "loss": 0.0798, + "step": 41710 + }, + { + "epoch": 1.95, + "learning_rate": 1.3620773439092066e-05, + "loss": 0.0768, + "step": 41715 + }, + { + "epoch": 1.95, + "learning_rate": 1.3619989654037278e-05, + "loss": 0.1886, + "step": 41720 + }, + { + "epoch": 1.95, + "learning_rate": 1.3619205868982492e-05, + "loss": 0.1185, + "step": 41725 + }, + { + "epoch": 1.95, + "learning_rate": 1.3618422083927704e-05, + "loss": 0.2939, + "step": 41730 + }, + { + "epoch": 1.95, + "learning_rate": 1.3617638298872918e-05, + "loss": 0.295, + "step": 41735 + }, + { + "epoch": 1.95, + "learning_rate": 1.3616854513818132e-05, + "loss": 0.028, + "step": 41740 + }, + { + "epoch": 1.95, + "learning_rate": 1.3616070728763344e-05, + "loss": 0.0461, + "step": 41745 + }, + { + "epoch": 1.95, + "learning_rate": 1.361528694370856e-05, + "loss": 0.0892, + "step": 41750 + }, + { + "epoch": 1.95, + "learning_rate": 1.3614503158653772e-05, + "loss": 0.0907, + "step": 41755 + }, + { + "epoch": 1.95, + "learning_rate": 1.3613719373598986e-05, + "loss": 0.0892, + "step": 41760 + }, + { + "epoch": 1.95, + "learning_rate": 1.3612935588544198e-05, + "loss": 0.1427, + "step": 41765 + }, + { + "epoch": 1.95, + "learning_rate": 1.3612151803489414e-05, + "loss": 0.108, + "step": 41770 + }, + { + "epoch": 1.95, + "learning_rate": 1.3611368018434626e-05, + "loss": 0.206, + "step": 41775 + }, + { + "epoch": 1.95, + "learning_rate": 1.361058423337984e-05, + "loss": 0.2341, + "step": 41780 + }, + { + "epoch": 1.95, + "learning_rate": 1.3609800448325052e-05, + "loss": 0.2855, + "step": 41785 + }, + { + "epoch": 1.95, + "learning_rate": 1.3609016663270268e-05, + "loss": 0.0509, + "step": 41790 + }, + { + "epoch": 1.95, + "learning_rate": 1.360823287821548e-05, + "loss": 0.0388, + "step": 41795 + }, + { + "epoch": 1.95, + "learning_rate": 1.3607449093160692e-05, + "loss": 0.0668, + "step": 41800 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606665308105906e-05, + "loss": 0.067, + "step": 41805 + }, + { + "epoch": 1.95, + "learning_rate": 1.3605881523051118e-05, + "loss": 0.1153, + "step": 41810 + }, + { + "epoch": 1.95, + "learning_rate": 1.3605097737996334e-05, + "loss": 0.183, + "step": 41815 + }, + { + "epoch": 1.95, + "learning_rate": 1.3604313952941546e-05, + "loss": 0.2191, + "step": 41820 + }, + { + "epoch": 1.95, + "learning_rate": 1.360353016788676e-05, + "loss": 0.1736, + "step": 41825 + }, + { + "epoch": 1.95, + "learning_rate": 1.3602746382831972e-05, + "loss": 0.3504, + "step": 41830 + }, + { + "epoch": 1.95, + "learning_rate": 1.3601962597777188e-05, + "loss": 0.2775, + "step": 41835 + }, + { + "epoch": 1.95, + "learning_rate": 1.36011788127224e-05, + "loss": 0.0406, + "step": 41840 + }, + { + "epoch": 1.95, + "learning_rate": 1.3600395027667614e-05, + "loss": 0.0676, + "step": 41845 + }, + { + "epoch": 1.95, + "learning_rate": 1.3599611242612828e-05, + "loss": 0.0566, + "step": 41850 + }, + { + "epoch": 1.95, + "learning_rate": 1.3598827457558042e-05, + "loss": 0.0765, + "step": 41855 + }, + { + "epoch": 1.95, + "learning_rate": 1.3598043672503254e-05, + "loss": 0.1108, + "step": 41860 + }, + { + "epoch": 1.95, + "learning_rate": 1.3597259887448466e-05, + "loss": 0.1186, + "step": 41865 + }, + { + "epoch": 1.95, + "learning_rate": 1.3596476102393682e-05, + "loss": 0.1556, + "step": 41870 + }, + { + "epoch": 1.95, + "learning_rate": 1.3595692317338894e-05, + "loss": 0.16, + "step": 41875 + }, + { + "epoch": 1.95, + "learning_rate": 1.3594908532284108e-05, + "loss": 0.3715, + "step": 41880 + }, + { + "epoch": 1.95, + "learning_rate": 1.359412474722932e-05, + "loss": 0.272, + "step": 41885 + }, + { + "epoch": 1.95, + "learning_rate": 1.3593340962174535e-05, + "loss": 0.0219, + "step": 41890 + }, + { + "epoch": 1.95, + "learning_rate": 1.3592557177119748e-05, + "loss": 0.0154, + "step": 41895 + }, + { + "epoch": 1.96, + "learning_rate": 1.3591773392064962e-05, + "loss": 0.1363, + "step": 41900 + }, + { + "epoch": 1.96, + "learning_rate": 1.3590989607010174e-05, + "loss": 0.074, + "step": 41905 + }, + { + "epoch": 1.96, + "learning_rate": 1.359020582195539e-05, + "loss": 0.1535, + "step": 41910 + }, + { + "epoch": 1.96, + "learning_rate": 1.3589422036900602e-05, + "loss": 0.0951, + "step": 41915 + }, + { + "epoch": 1.96, + "learning_rate": 1.3588638251845816e-05, + "loss": 0.1372, + "step": 41920 + }, + { + "epoch": 1.96, + "learning_rate": 1.3587854466791028e-05, + "loss": 0.2103, + "step": 41925 + }, + { + "epoch": 1.96, + "learning_rate": 1.358707068173624e-05, + "loss": 0.1851, + "step": 41930 + }, + { + "epoch": 1.96, + "learning_rate": 1.3586286896681456e-05, + "loss": 0.3416, + "step": 41935 + }, + { + "epoch": 1.96, + "learning_rate": 1.3585503111626668e-05, + "loss": 0.0271, + "step": 41940 + }, + { + "epoch": 1.96, + "learning_rate": 1.3584719326571882e-05, + "loss": 0.0605, + "step": 41945 + }, + { + "epoch": 1.96, + "learning_rate": 1.3583935541517096e-05, + "loss": 0.0437, + "step": 41950 + }, + { + "epoch": 1.96, + "learning_rate": 1.358315175646231e-05, + "loss": 0.0778, + "step": 41955 + }, + { + "epoch": 1.96, + "learning_rate": 1.3582367971407522e-05, + "loss": 0.0917, + "step": 41960 + }, + { + "epoch": 1.96, + "learning_rate": 1.3581584186352737e-05, + "loss": 0.1232, + "step": 41965 + }, + { + "epoch": 1.96, + "learning_rate": 1.358080040129795e-05, + "loss": 0.236, + "step": 41970 + }, + { + "epoch": 1.96, + "learning_rate": 1.3580016616243163e-05, + "loss": 0.2185, + "step": 41975 + }, + { + "epoch": 1.96, + "learning_rate": 1.3579232831188376e-05, + "loss": 0.393, + "step": 41980 + }, + { + "epoch": 1.96, + "learning_rate": 1.3578449046133591e-05, + "loss": 0.3043, + "step": 41985 + }, + { + "epoch": 1.96, + "learning_rate": 1.3577665261078803e-05, + "loss": 0.0506, + "step": 41990 + }, + { + "epoch": 1.96, + "learning_rate": 1.3576881476024016e-05, + "loss": 0.0796, + "step": 41995 + }, + { + "epoch": 1.96, + "learning_rate": 1.357609769096923e-05, + "loss": 0.0657, + "step": 42000 + }, + { + "epoch": 1.96, + "learning_rate": 1.3575313905914442e-05, + "loss": 0.0712, + "step": 42005 + }, + { + "epoch": 1.96, + "learning_rate": 1.3574530120859657e-05, + "loss": 0.1189, + "step": 42010 + }, + { + "epoch": 1.96, + "learning_rate": 1.357374633580487e-05, + "loss": 0.157, + "step": 42015 + }, + { + "epoch": 1.96, + "learning_rate": 1.3572962550750083e-05, + "loss": 0.1085, + "step": 42020 + }, + { + "epoch": 1.96, + "learning_rate": 1.3572178765695296e-05, + "loss": 0.2305, + "step": 42025 + }, + { + "epoch": 1.96, + "learning_rate": 1.3571394980640511e-05, + "loss": 0.3907, + "step": 42030 + }, + { + "epoch": 1.96, + "learning_rate": 1.3570611195585723e-05, + "loss": 0.2425, + "step": 42035 + }, + { + "epoch": 1.96, + "learning_rate": 1.3569827410530937e-05, + "loss": 0.039, + "step": 42040 + }, + { + "epoch": 1.96, + "learning_rate": 1.356904362547615e-05, + "loss": 0.0644, + "step": 42045 + }, + { + "epoch": 1.96, + "learning_rate": 1.3568259840421365e-05, + "loss": 0.0367, + "step": 42050 + }, + { + "epoch": 1.96, + "learning_rate": 1.3567476055366577e-05, + "loss": 0.0325, + "step": 42055 + }, + { + "epoch": 1.96, + "learning_rate": 1.356669227031179e-05, + "loss": 0.1135, + "step": 42060 + }, + { + "epoch": 1.96, + "learning_rate": 1.3565908485257005e-05, + "loss": 0.1177, + "step": 42065 + }, + { + "epoch": 1.96, + "learning_rate": 1.3565124700202217e-05, + "loss": 0.1744, + "step": 42070 + }, + { + "epoch": 1.96, + "learning_rate": 1.3564340915147431e-05, + "loss": 0.267, + "step": 42075 + }, + { + "epoch": 1.96, + "learning_rate": 1.3563557130092644e-05, + "loss": 0.2601, + "step": 42080 + }, + { + "epoch": 1.96, + "learning_rate": 1.3562773345037859e-05, + "loss": 0.3815, + "step": 42085 + }, + { + "epoch": 1.96, + "learning_rate": 1.3561989559983071e-05, + "loss": 0.0435, + "step": 42090 + }, + { + "epoch": 1.96, + "learning_rate": 1.3561205774928285e-05, + "loss": 0.0452, + "step": 42095 + }, + { + "epoch": 1.96, + "learning_rate": 1.3560421989873497e-05, + "loss": 0.1247, + "step": 42100 + }, + { + "epoch": 1.96, + "learning_rate": 1.3559638204818713e-05, + "loss": 0.0745, + "step": 42105 + }, + { + "epoch": 1.96, + "learning_rate": 1.3558854419763925e-05, + "loss": 0.1184, + "step": 42110 + }, + { + "epoch": 1.97, + "learning_rate": 1.355807063470914e-05, + "loss": 0.1522, + "step": 42115 + }, + { + "epoch": 1.97, + "learning_rate": 1.3557286849654351e-05, + "loss": 0.1299, + "step": 42120 + }, + { + "epoch": 1.97, + "learning_rate": 1.3556503064599564e-05, + "loss": 0.2057, + "step": 42125 + }, + { + "epoch": 1.97, + "learning_rate": 1.355571927954478e-05, + "loss": 0.2441, + "step": 42130 + }, + { + "epoch": 1.97, + "learning_rate": 1.3554935494489991e-05, + "loss": 0.2413, + "step": 42135 + }, + { + "epoch": 1.97, + "learning_rate": 1.3554151709435205e-05, + "loss": 0.063, + "step": 42140 + }, + { + "epoch": 1.97, + "learning_rate": 1.3553367924380418e-05, + "loss": 0.0286, + "step": 42145 + }, + { + "epoch": 1.97, + "learning_rate": 1.3552584139325633e-05, + "loss": 0.0437, + "step": 42150 + }, + { + "epoch": 1.97, + "learning_rate": 1.3551800354270845e-05, + "loss": 0.0742, + "step": 42155 + }, + { + "epoch": 1.97, + "learning_rate": 1.355101656921606e-05, + "loss": 0.1081, + "step": 42160 + }, + { + "epoch": 1.97, + "learning_rate": 1.3550232784161273e-05, + "loss": 0.1142, + "step": 42165 + }, + { + "epoch": 1.97, + "learning_rate": 1.3549448999106487e-05, + "loss": 0.1755, + "step": 42170 + }, + { + "epoch": 1.97, + "learning_rate": 1.35486652140517e-05, + "loss": 0.2252, + "step": 42175 + }, + { + "epoch": 1.97, + "learning_rate": 1.3547881428996915e-05, + "loss": 0.2728, + "step": 42180 + }, + { + "epoch": 1.97, + "learning_rate": 1.3547097643942127e-05, + "loss": 0.3548, + "step": 42185 + }, + { + "epoch": 1.97, + "learning_rate": 1.354631385888734e-05, + "loss": 0.1084, + "step": 42190 + }, + { + "epoch": 1.97, + "learning_rate": 1.3545530073832553e-05, + "loss": 0.0751, + "step": 42195 + }, + { + "epoch": 1.97, + "learning_rate": 1.3544746288777765e-05, + "loss": 0.1289, + "step": 42200 + }, + { + "epoch": 1.97, + "learning_rate": 1.3543962503722981e-05, + "loss": 0.1643, + "step": 42205 + }, + { + "epoch": 1.97, + "learning_rate": 1.3543178718668193e-05, + "loss": 0.1434, + "step": 42210 + }, + { + "epoch": 1.97, + "learning_rate": 1.3542394933613407e-05, + "loss": 0.1457, + "step": 42215 + }, + { + "epoch": 1.97, + "learning_rate": 1.354161114855862e-05, + "loss": 0.1197, + "step": 42220 + }, + { + "epoch": 1.97, + "learning_rate": 1.3540827363503835e-05, + "loss": 0.3008, + "step": 42225 + }, + { + "epoch": 1.97, + "learning_rate": 1.3540043578449047e-05, + "loss": 0.2925, + "step": 42230 + }, + { + "epoch": 1.97, + "learning_rate": 1.3539259793394261e-05, + "loss": 0.3225, + "step": 42235 + }, + { + "epoch": 1.97, + "learning_rate": 1.3538476008339473e-05, + "loss": 0.0174, + "step": 42240 + }, + { + "epoch": 1.97, + "learning_rate": 1.3537692223284689e-05, + "loss": 0.0662, + "step": 42245 + }, + { + "epoch": 1.97, + "learning_rate": 1.3536908438229901e-05, + "loss": 0.0883, + "step": 42250 + }, + { + "epoch": 1.97, + "learning_rate": 1.3536124653175113e-05, + "loss": 0.1134, + "step": 42255 + }, + { + "epoch": 1.97, + "learning_rate": 1.3535340868120327e-05, + "loss": 0.0576, + "step": 42260 + }, + { + "epoch": 1.97, + "learning_rate": 1.3534557083065541e-05, + "loss": 0.1248, + "step": 42265 + }, + { + "epoch": 1.97, + "learning_rate": 1.3533773298010755e-05, + "loss": 0.1739, + "step": 42270 + }, + { + "epoch": 1.97, + "learning_rate": 1.3532989512955967e-05, + "loss": 0.1615, + "step": 42275 + }, + { + "epoch": 1.97, + "learning_rate": 1.3532205727901183e-05, + "loss": 0.2108, + "step": 42280 + }, + { + "epoch": 1.97, + "learning_rate": 1.3531421942846395e-05, + "loss": 0.1906, + "step": 42285 + }, + { + "epoch": 1.97, + "learning_rate": 1.3530638157791609e-05, + "loss": 0.0301, + "step": 42290 + }, + { + "epoch": 1.97, + "learning_rate": 1.3529854372736821e-05, + "loss": 0.0248, + "step": 42295 + }, + { + "epoch": 1.97, + "learning_rate": 1.3529070587682037e-05, + "loss": 0.0653, + "step": 42300 + }, + { + "epoch": 1.97, + "learning_rate": 1.3528286802627249e-05, + "loss": 0.1097, + "step": 42305 + }, + { + "epoch": 1.97, + "learning_rate": 1.3527503017572463e-05, + "loss": 0.1149, + "step": 42310 + }, + { + "epoch": 1.97, + "learning_rate": 1.3526719232517675e-05, + "loss": 0.1353, + "step": 42315 + }, + { + "epoch": 1.97, + "learning_rate": 1.3525935447462887e-05, + "loss": 0.1043, + "step": 42320 + }, + { + "epoch": 1.97, + "learning_rate": 1.3525151662408103e-05, + "loss": 0.3242, + "step": 42325 + }, + { + "epoch": 1.98, + "learning_rate": 1.3524367877353315e-05, + "loss": 0.2849, + "step": 42330 + }, + { + "epoch": 1.98, + "learning_rate": 1.3523584092298529e-05, + "loss": 0.3416, + "step": 42335 + }, + { + "epoch": 1.98, + "learning_rate": 1.3522800307243741e-05, + "loss": 0.0547, + "step": 42340 + }, + { + "epoch": 1.98, + "learning_rate": 1.3522016522188957e-05, + "loss": 0.0568, + "step": 42345 + }, + { + "epoch": 1.98, + "learning_rate": 1.3521232737134169e-05, + "loss": 0.0978, + "step": 42350 + }, + { + "epoch": 1.98, + "learning_rate": 1.3520448952079383e-05, + "loss": 0.1457, + "step": 42355 + }, + { + "epoch": 1.98, + "learning_rate": 1.3519665167024595e-05, + "loss": 0.0819, + "step": 42360 + }, + { + "epoch": 1.98, + "learning_rate": 1.351888138196981e-05, + "loss": 0.1787, + "step": 42365 + }, + { + "epoch": 1.98, + "learning_rate": 1.3518097596915023e-05, + "loss": 0.1941, + "step": 42370 + }, + { + "epoch": 1.98, + "learning_rate": 1.3517313811860237e-05, + "loss": 0.2267, + "step": 42375 + }, + { + "epoch": 1.98, + "learning_rate": 1.351653002680545e-05, + "loss": 0.377, + "step": 42380 + }, + { + "epoch": 1.98, + "learning_rate": 1.3515746241750663e-05, + "loss": 0.2612, + "step": 42385 + }, + { + "epoch": 1.98, + "learning_rate": 1.3514962456695877e-05, + "loss": 0.0406, + "step": 42390 + }, + { + "epoch": 1.98, + "learning_rate": 1.3514178671641089e-05, + "loss": 0.0252, + "step": 42395 + }, + { + "epoch": 1.98, + "learning_rate": 1.3513394886586305e-05, + "loss": 0.0102, + "step": 42400 + }, + { + "epoch": 1.98, + "learning_rate": 1.3512611101531517e-05, + "loss": 0.1246, + "step": 42405 + }, + { + "epoch": 1.98, + "learning_rate": 1.351182731647673e-05, + "loss": 0.1436, + "step": 42410 + }, + { + "epoch": 1.98, + "learning_rate": 1.3511043531421943e-05, + "loss": 0.1284, + "step": 42415 + }, + { + "epoch": 1.98, + "learning_rate": 1.3510259746367159e-05, + "loss": 0.1715, + "step": 42420 + }, + { + "epoch": 1.98, + "learning_rate": 1.3509475961312371e-05, + "loss": 0.2565, + "step": 42425 + }, + { + "epoch": 1.98, + "learning_rate": 1.3508692176257585e-05, + "loss": 0.2379, + "step": 42430 + }, + { + "epoch": 1.98, + "learning_rate": 1.3507908391202797e-05, + "loss": 0.1811, + "step": 42435 + }, + { + "epoch": 1.98, + "learning_rate": 1.3507124606148013e-05, + "loss": 0.0796, + "step": 42440 + }, + { + "epoch": 1.98, + "learning_rate": 1.3506340821093225e-05, + "loss": 0.0504, + "step": 42445 + }, + { + "epoch": 1.98, + "learning_rate": 1.3505557036038437e-05, + "loss": 0.1026, + "step": 42450 + }, + { + "epoch": 1.98, + "learning_rate": 1.3504773250983651e-05, + "loss": 0.0687, + "step": 42455 + }, + { + "epoch": 1.98, + "learning_rate": 1.3503989465928863e-05, + "loss": 0.0637, + "step": 42460 + }, + { + "epoch": 1.98, + "learning_rate": 1.3503205680874079e-05, + "loss": 0.0864, + "step": 42465 + }, + { + "epoch": 1.98, + "learning_rate": 1.3502421895819291e-05, + "loss": 0.1525, + "step": 42470 + }, + { + "epoch": 1.98, + "learning_rate": 1.3501638110764505e-05, + "loss": 0.1946, + "step": 42475 + }, + { + "epoch": 1.98, + "learning_rate": 1.3500854325709719e-05, + "loss": 0.4185, + "step": 42480 + }, + { + "epoch": 1.98, + "learning_rate": 1.3500070540654933e-05, + "loss": 0.2679, + "step": 42485 + }, + { + "epoch": 1.98, + "learning_rate": 1.3499286755600145e-05, + "loss": 0.0422, + "step": 42490 + }, + { + "epoch": 1.98, + "learning_rate": 1.349850297054536e-05, + "loss": 0.1082, + "step": 42495 + }, + { + "epoch": 1.98, + "learning_rate": 1.3497719185490573e-05, + "loss": 0.0823, + "step": 42500 + }, + { + "epoch": 1.98, + "learning_rate": 1.3496935400435787e-05, + "loss": 0.0491, + "step": 42505 + }, + { + "epoch": 1.98, + "learning_rate": 1.3496151615380999e-05, + "loss": 0.1773, + "step": 42510 + }, + { + "epoch": 1.98, + "learning_rate": 1.3495367830326211e-05, + "loss": 0.1344, + "step": 42515 + }, + { + "epoch": 1.98, + "learning_rate": 1.3494584045271427e-05, + "loss": 0.0834, + "step": 42520 + }, + { + "epoch": 1.98, + "learning_rate": 1.3493800260216639e-05, + "loss": 0.2854, + "step": 42525 + }, + { + "epoch": 1.98, + "learning_rate": 1.3493016475161853e-05, + "loss": 0.3165, + "step": 42530 + }, + { + "epoch": 1.98, + "learning_rate": 1.3492232690107065e-05, + "loss": 0.2865, + "step": 42535 + }, + { + "epoch": 1.98, + "learning_rate": 1.349144890505228e-05, + "loss": 0.0551, + "step": 42540 + }, + { + "epoch": 1.99, + "learning_rate": 1.3490665119997493e-05, + "loss": 0.0634, + "step": 42545 + }, + { + "epoch": 1.99, + "learning_rate": 1.3489881334942707e-05, + "loss": 0.0897, + "step": 42550 + }, + { + "epoch": 1.99, + "learning_rate": 1.3489097549887919e-05, + "loss": 0.1488, + "step": 42555 + }, + { + "epoch": 1.99, + "learning_rate": 1.3488313764833134e-05, + "loss": 0.1682, + "step": 42560 + }, + { + "epoch": 1.99, + "learning_rate": 1.3487529979778347e-05, + "loss": 0.1566, + "step": 42565 + }, + { + "epoch": 1.99, + "learning_rate": 1.348674619472356e-05, + "loss": 0.1352, + "step": 42570 + }, + { + "epoch": 1.99, + "learning_rate": 1.3485962409668773e-05, + "loss": 0.2381, + "step": 42575 + }, + { + "epoch": 1.99, + "learning_rate": 1.3485178624613987e-05, + "loss": 0.3136, + "step": 42580 + }, + { + "epoch": 1.99, + "learning_rate": 1.34843948395592e-05, + "loss": 0.2361, + "step": 42585 + }, + { + "epoch": 1.99, + "learning_rate": 1.3483611054504413e-05, + "loss": 0.0656, + "step": 42590 + }, + { + "epoch": 1.99, + "learning_rate": 1.3482827269449628e-05, + "loss": 0.0353, + "step": 42595 + }, + { + "epoch": 1.99, + "learning_rate": 1.348204348439484e-05, + "loss": 0.0433, + "step": 42600 + }, + { + "epoch": 1.99, + "learning_rate": 1.3481259699340055e-05, + "loss": 0.0876, + "step": 42605 + }, + { + "epoch": 1.99, + "learning_rate": 1.3480475914285267e-05, + "loss": 0.1082, + "step": 42610 + }, + { + "epoch": 1.99, + "learning_rate": 1.3479692129230482e-05, + "loss": 0.1904, + "step": 42615 + }, + { + "epoch": 1.99, + "learning_rate": 1.3478908344175695e-05, + "loss": 0.2017, + "step": 42620 + }, + { + "epoch": 1.99, + "learning_rate": 1.3478124559120908e-05, + "loss": 0.1698, + "step": 42625 + }, + { + "epoch": 1.99, + "learning_rate": 1.347734077406612e-05, + "loss": 0.4722, + "step": 42630 + }, + { + "epoch": 1.99, + "learning_rate": 1.3476556989011336e-05, + "loss": 0.2895, + "step": 42635 + }, + { + "epoch": 1.99, + "learning_rate": 1.3475773203956548e-05, + "loss": 0.0633, + "step": 42640 + }, + { + "epoch": 1.99, + "learning_rate": 1.347498941890176e-05, + "loss": 0.0354, + "step": 42645 + }, + { + "epoch": 1.99, + "learning_rate": 1.3474205633846975e-05, + "loss": 0.0921, + "step": 42650 + }, + { + "epoch": 1.99, + "learning_rate": 1.3473421848792187e-05, + "loss": 0.0774, + "step": 42655 + }, + { + "epoch": 1.99, + "learning_rate": 1.3472638063737402e-05, + "loss": 0.1163, + "step": 42660 + }, + { + "epoch": 1.99, + "learning_rate": 1.3471854278682615e-05, + "loss": 0.1109, + "step": 42665 + }, + { + "epoch": 1.99, + "learning_rate": 1.3471070493627829e-05, + "loss": 0.1748, + "step": 42670 + }, + { + "epoch": 1.99, + "learning_rate": 1.347028670857304e-05, + "loss": 0.1627, + "step": 42675 + }, + { + "epoch": 1.99, + "learning_rate": 1.3469502923518256e-05, + "loss": 0.3621, + "step": 42680 + }, + { + "epoch": 1.99, + "learning_rate": 1.3468719138463469e-05, + "loss": 0.193, + "step": 42685 + }, + { + "epoch": 1.99, + "learning_rate": 1.3467935353408682e-05, + "loss": 0.0512, + "step": 42690 + }, + { + "epoch": 1.99, + "learning_rate": 1.3467151568353896e-05, + "loss": 0.0666, + "step": 42695 + }, + { + "epoch": 1.99, + "learning_rate": 1.346636778329911e-05, + "loss": 0.1495, + "step": 42700 + }, + { + "epoch": 1.99, + "learning_rate": 1.3465583998244322e-05, + "loss": 0.13, + "step": 42705 + }, + { + "epoch": 1.99, + "learning_rate": 1.3464800213189535e-05, + "loss": 0.0941, + "step": 42710 + }, + { + "epoch": 1.99, + "learning_rate": 1.346401642813475e-05, + "loss": 0.1118, + "step": 42715 + }, + { + "epoch": 1.99, + "learning_rate": 1.3463232643079963e-05, + "loss": 0.1672, + "step": 42720 + }, + { + "epoch": 1.99, + "learning_rate": 1.3462448858025176e-05, + "loss": 0.2903, + "step": 42725 + }, + { + "epoch": 1.99, + "learning_rate": 1.3461665072970389e-05, + "loss": 0.5016, + "step": 42730 + }, + { + "epoch": 1.99, + "learning_rate": 1.3460881287915604e-05, + "loss": 0.3474, + "step": 42735 + }, + { + "epoch": 1.99, + "learning_rate": 1.3460097502860816e-05, + "loss": 0.0593, + "step": 42740 + }, + { + "epoch": 1.99, + "learning_rate": 1.345931371780603e-05, + "loss": 0.0545, + "step": 42745 + }, + { + "epoch": 1.99, + "learning_rate": 1.3458529932751243e-05, + "loss": 0.0924, + "step": 42750 + }, + { + "epoch": 2.0, + "learning_rate": 1.3457746147696458e-05, + "loss": 0.0801, + "step": 42755 + }, + { + "epoch": 2.0, + "learning_rate": 1.345696236264167e-05, + "loss": 0.0866, + "step": 42760 + }, + { + "epoch": 2.0, + "learning_rate": 1.3456178577586884e-05, + "loss": 0.1346, + "step": 42765 + }, + { + "epoch": 2.0, + "learning_rate": 1.3455394792532096e-05, + "loss": 0.1853, + "step": 42770 + }, + { + "epoch": 2.0, + "learning_rate": 1.3454611007477309e-05, + "loss": 0.1966, + "step": 42775 + }, + { + "epoch": 2.0, + "learning_rate": 1.3453827222422524e-05, + "loss": 0.238, + "step": 42780 + }, + { + "epoch": 2.0, + "learning_rate": 1.3453043437367737e-05, + "loss": 0.2965, + "step": 42785 + }, + { + "epoch": 2.0, + "learning_rate": 1.345225965231295e-05, + "loss": 0.1616, + "step": 42790 + }, + { + "epoch": 2.0, + "learning_rate": 1.3451475867258164e-05, + "loss": 0.0177, + "step": 42795 + }, + { + "epoch": 2.0, + "learning_rate": 1.3450692082203378e-05, + "loss": 0.0763, + "step": 42800 + }, + { + "epoch": 2.0, + "learning_rate": 1.344990829714859e-05, + "loss": 0.108, + "step": 42805 + }, + { + "epoch": 2.0, + "learning_rate": 1.3449124512093806e-05, + "loss": 0.0828, + "step": 42810 + }, + { + "epoch": 2.0, + "learning_rate": 1.3448340727039018e-05, + "loss": 0.0873, + "step": 42815 + }, + { + "epoch": 2.0, + "learning_rate": 1.3447556941984232e-05, + "loss": 0.1054, + "step": 42820 + }, + { + "epoch": 2.0, + "learning_rate": 1.3446773156929444e-05, + "loss": 0.2872, + "step": 42825 + }, + { + "epoch": 2.0, + "learning_rate": 1.344598937187466e-05, + "loss": 0.2569, + "step": 42830 + }, + { + "epoch": 2.0, + "learning_rate": 1.3445205586819872e-05, + "loss": 0.2157, + "step": 42835 + }, + { + "epoch": 2.0, + "learning_rate": 1.3444421801765084e-05, + "loss": 0.074, + "step": 42840 + }, + { + "epoch": 2.0, + "learning_rate": 1.3443638016710298e-05, + "loss": 0.1238, + "step": 42845 + }, + { + "epoch": 2.0, + "learning_rate": 1.344285423165551e-05, + "loss": 0.1821, + "step": 42850 + }, + { + "epoch": 2.0, + "learning_rate": 1.3442070446600726e-05, + "loss": 0.0898, + "step": 42855 + }, + { + "epoch": 2.0, + "learning_rate": 1.3441286661545938e-05, + "loss": 0.269, + "step": 42860 + }, + { + "epoch": 2.0, + "eval_cer": 0.015201216458283512, + "eval_loss": 0.34811559319496155, + "eval_runtime": 472.4357, + "eval_samples_per_second": 40.323, + "eval_steps_per_second": 5.042, + "eval_wer": 0.12934863064396743, + "step": 42862 + }, + { + "epoch": 2.0, + "learning_rate": 1.3440502876491152e-05, + "loss": 0.3587, + "step": 42865 + }, + { + "epoch": 2.0, + "learning_rate": 1.3439719091436364e-05, + "loss": 0.1501, + "step": 42870 + }, + { + "epoch": 2.0, + "learning_rate": 1.343893530638158e-05, + "loss": 0.0647, + "step": 42875 + }, + { + "epoch": 2.0, + "learning_rate": 1.3438151521326792e-05, + "loss": 0.049, + "step": 42880 + }, + { + "epoch": 2.0, + "learning_rate": 1.3437367736272006e-05, + "loss": 0.0998, + "step": 42885 + }, + { + "epoch": 2.0, + "learning_rate": 1.3436583951217218e-05, + "loss": 0.0791, + "step": 42890 + }, + { + "epoch": 2.0, + "learning_rate": 1.3435800166162434e-05, + "loss": 0.2884, + "step": 42895 + }, + { + "epoch": 2.0, + "learning_rate": 1.3435016381107646e-05, + "loss": 0.1765, + "step": 42900 + }, + { + "epoch": 2.0, + "learning_rate": 1.3434232596052858e-05, + "loss": 0.1953, + "step": 42905 + }, + { + "epoch": 2.0, + "learning_rate": 1.3433448810998074e-05, + "loss": 0.2493, + "step": 42910 + }, + { + "epoch": 2.0, + "learning_rate": 1.3432665025943286e-05, + "loss": 0.2425, + "step": 42915 + }, + { + "epoch": 2.0, + "learning_rate": 1.34318812408885e-05, + "loss": 0.0484, + "step": 42920 + }, + { + "epoch": 2.0, + "learning_rate": 1.3431097455833712e-05, + "loss": 0.0687, + "step": 42925 + }, + { + "epoch": 2.0, + "learning_rate": 1.3430313670778928e-05, + "loss": 0.0949, + "step": 42930 + }, + { + "epoch": 2.0, + "learning_rate": 1.342952988572414e-05, + "loss": 0.0677, + "step": 42935 + }, + { + "epoch": 2.0, + "learning_rate": 1.3428746100669354e-05, + "loss": 0.1255, + "step": 42940 + }, + { + "epoch": 2.0, + "learning_rate": 1.3427962315614566e-05, + "loss": 0.1747, + "step": 42945 + }, + { + "epoch": 2.0, + "learning_rate": 1.3427178530559782e-05, + "loss": 0.1615, + "step": 42950 + }, + { + "epoch": 2.0, + "learning_rate": 1.3426394745504994e-05, + "loss": 0.2272, + "step": 42955 + }, + { + "epoch": 2.0, + "learning_rate": 1.3425610960450208e-05, + "loss": 0.4022, + "step": 42960 + }, + { + "epoch": 2.0, + "learning_rate": 1.342482717539542e-05, + "loss": 0.3077, + "step": 42965 + }, + { + "epoch": 2.01, + "learning_rate": 1.3424043390340632e-05, + "loss": 0.0336, + "step": 42970 + }, + { + "epoch": 2.01, + "learning_rate": 1.3423259605285848e-05, + "loss": 0.0245, + "step": 42975 + }, + { + "epoch": 2.01, + "learning_rate": 1.342247582023106e-05, + "loss": 0.0466, + "step": 42980 + }, + { + "epoch": 2.01, + "learning_rate": 1.3421692035176274e-05, + "loss": 0.049, + "step": 42985 + }, + { + "epoch": 2.01, + "learning_rate": 1.3420908250121486e-05, + "loss": 0.081, + "step": 42990 + }, + { + "epoch": 2.01, + "learning_rate": 1.3420124465066702e-05, + "loss": 0.2433, + "step": 42995 + }, + { + "epoch": 2.01, + "learning_rate": 1.3419340680011914e-05, + "loss": 0.0825, + "step": 43000 + }, + { + "epoch": 2.01, + "learning_rate": 1.3418556894957128e-05, + "loss": 0.176, + "step": 43005 + }, + { + "epoch": 2.01, + "learning_rate": 1.3417773109902342e-05, + "loss": 0.3964, + "step": 43010 + }, + { + "epoch": 2.01, + "learning_rate": 1.3416989324847556e-05, + "loss": 0.2797, + "step": 43015 + }, + { + "epoch": 2.01, + "learning_rate": 1.3416205539792768e-05, + "loss": 0.0541, + "step": 43020 + }, + { + "epoch": 2.01, + "learning_rate": 1.3415421754737984e-05, + "loss": 0.1003, + "step": 43025 + }, + { + "epoch": 2.01, + "learning_rate": 1.3414637969683196e-05, + "loss": 0.0925, + "step": 43030 + }, + { + "epoch": 2.01, + "learning_rate": 1.3413854184628408e-05, + "loss": 0.0643, + "step": 43035 + }, + { + "epoch": 2.01, + "learning_rate": 1.3413070399573622e-05, + "loss": 0.0862, + "step": 43040 + }, + { + "epoch": 2.01, + "learning_rate": 1.3412286614518834e-05, + "loss": 0.1016, + "step": 43045 + }, + { + "epoch": 2.01, + "learning_rate": 1.341150282946405e-05, + "loss": 0.1259, + "step": 43050 + }, + { + "epoch": 2.01, + "learning_rate": 1.3410719044409262e-05, + "loss": 0.1814, + "step": 43055 + }, + { + "epoch": 2.01, + "learning_rate": 1.3409935259354476e-05, + "loss": 0.2568, + "step": 43060 + }, + { + "epoch": 2.01, + "learning_rate": 1.3409151474299688e-05, + "loss": 0.3472, + "step": 43065 + }, + { + "epoch": 2.01, + "learning_rate": 1.3408367689244904e-05, + "loss": 0.0966, + "step": 43070 + }, + { + "epoch": 2.01, + "learning_rate": 1.3407583904190116e-05, + "loss": 0.0378, + "step": 43075 + }, + { + "epoch": 2.01, + "learning_rate": 1.340680011913533e-05, + "loss": 0.066, + "step": 43080 + }, + { + "epoch": 2.01, + "learning_rate": 1.3406016334080542e-05, + "loss": 0.1326, + "step": 43085 + }, + { + "epoch": 2.01, + "learning_rate": 1.3405232549025758e-05, + "loss": 0.0976, + "step": 43090 + }, + { + "epoch": 2.01, + "learning_rate": 1.340444876397097e-05, + "loss": 0.1264, + "step": 43095 + }, + { + "epoch": 2.01, + "learning_rate": 1.3403664978916182e-05, + "loss": 0.1619, + "step": 43100 + }, + { + "epoch": 2.01, + "learning_rate": 1.3402881193861396e-05, + "loss": 0.2066, + "step": 43105 + }, + { + "epoch": 2.01, + "learning_rate": 1.340209740880661e-05, + "loss": 0.2903, + "step": 43110 + }, + { + "epoch": 2.01, + "learning_rate": 1.3401313623751824e-05, + "loss": 0.2423, + "step": 43115 + }, + { + "epoch": 2.01, + "learning_rate": 1.3400529838697036e-05, + "loss": 0.0823, + "step": 43120 + }, + { + "epoch": 2.01, + "learning_rate": 1.3399746053642252e-05, + "loss": 0.0537, + "step": 43125 + }, + { + "epoch": 2.01, + "learning_rate": 1.3398962268587464e-05, + "loss": 0.061, + "step": 43130 + }, + { + "epoch": 2.01, + "learning_rate": 1.3398178483532678e-05, + "loss": 0.1014, + "step": 43135 + }, + { + "epoch": 2.01, + "learning_rate": 1.339739469847789e-05, + "loss": 0.1187, + "step": 43140 + }, + { + "epoch": 2.01, + "learning_rate": 1.3396610913423106e-05, + "loss": 0.0802, + "step": 43145 + }, + { + "epoch": 2.01, + "learning_rate": 1.3395827128368318e-05, + "loss": 0.1449, + "step": 43150 + }, + { + "epoch": 2.01, + "learning_rate": 1.3395043343313532e-05, + "loss": 0.2398, + "step": 43155 + }, + { + "epoch": 2.01, + "learning_rate": 1.3394259558258744e-05, + "loss": 0.1875, + "step": 43160 + }, + { + "epoch": 2.01, + "learning_rate": 1.3393475773203956e-05, + "loss": 0.3293, + "step": 43165 + }, + { + "epoch": 2.01, + "learning_rate": 1.3392691988149172e-05, + "loss": 0.0328, + "step": 43170 + }, + { + "epoch": 2.01, + "learning_rate": 1.3391908203094384e-05, + "loss": 0.0352, + "step": 43175 + }, + { + "epoch": 2.01, + "learning_rate": 1.3391124418039598e-05, + "loss": 0.0326, + "step": 43180 + }, + { + "epoch": 2.02, + "learning_rate": 1.339034063298481e-05, + "loss": 0.1078, + "step": 43185 + }, + { + "epoch": 2.02, + "learning_rate": 1.3389556847930026e-05, + "loss": 0.0485, + "step": 43190 + }, + { + "epoch": 2.02, + "learning_rate": 1.3388773062875238e-05, + "loss": 0.1721, + "step": 43195 + }, + { + "epoch": 2.02, + "learning_rate": 1.3387989277820452e-05, + "loss": 0.2037, + "step": 43200 + }, + { + "epoch": 2.02, + "learning_rate": 1.3387205492765664e-05, + "loss": 0.1506, + "step": 43205 + }, + { + "epoch": 2.02, + "learning_rate": 1.338642170771088e-05, + "loss": 0.2595, + "step": 43210 + }, + { + "epoch": 2.02, + "learning_rate": 1.3385637922656092e-05, + "loss": 0.2567, + "step": 43215 + }, + { + "epoch": 2.02, + "learning_rate": 1.3384854137601306e-05, + "loss": 0.0354, + "step": 43220 + }, + { + "epoch": 2.02, + "learning_rate": 1.338407035254652e-05, + "loss": 0.0445, + "step": 43225 + }, + { + "epoch": 2.02, + "learning_rate": 1.3383286567491732e-05, + "loss": 0.1386, + "step": 43230 + }, + { + "epoch": 2.02, + "learning_rate": 1.3382502782436946e-05, + "loss": 0.1286, + "step": 43235 + }, + { + "epoch": 2.02, + "learning_rate": 1.3381718997382158e-05, + "loss": 0.1383, + "step": 43240 + }, + { + "epoch": 2.02, + "learning_rate": 1.3380935212327373e-05, + "loss": 0.164, + "step": 43245 + }, + { + "epoch": 2.02, + "learning_rate": 1.3380151427272586e-05, + "loss": 0.1469, + "step": 43250 + }, + { + "epoch": 2.02, + "learning_rate": 1.33793676422178e-05, + "loss": 0.1615, + "step": 43255 + }, + { + "epoch": 2.02, + "learning_rate": 1.3378583857163012e-05, + "loss": 0.2817, + "step": 43260 + }, + { + "epoch": 2.02, + "learning_rate": 1.3377800072108227e-05, + "loss": 0.3087, + "step": 43265 + }, + { + "epoch": 2.02, + "learning_rate": 1.337701628705344e-05, + "loss": 0.0857, + "step": 43270 + }, + { + "epoch": 2.02, + "learning_rate": 1.3376232501998654e-05, + "loss": 0.0569, + "step": 43275 + }, + { + "epoch": 2.02, + "learning_rate": 1.3375448716943866e-05, + "loss": 0.0329, + "step": 43280 + }, + { + "epoch": 2.02, + "learning_rate": 1.3374664931889081e-05, + "loss": 0.0912, + "step": 43285 + }, + { + "epoch": 2.02, + "learning_rate": 1.3373881146834294e-05, + "loss": 0.0769, + "step": 43290 + }, + { + "epoch": 2.02, + "learning_rate": 1.3373097361779506e-05, + "loss": 0.0714, + "step": 43295 + }, + { + "epoch": 2.02, + "learning_rate": 1.337231357672472e-05, + "loss": 0.1705, + "step": 43300 + }, + { + "epoch": 2.02, + "learning_rate": 1.3371529791669932e-05, + "loss": 0.3426, + "step": 43305 + }, + { + "epoch": 2.02, + "learning_rate": 1.3370746006615147e-05, + "loss": 0.3034, + "step": 43310 + }, + { + "epoch": 2.02, + "learning_rate": 1.336996222156036e-05, + "loss": 0.2828, + "step": 43315 + }, + { + "epoch": 2.02, + "learning_rate": 1.3369178436505574e-05, + "loss": 0.0257, + "step": 43320 + }, + { + "epoch": 2.02, + "learning_rate": 1.3368394651450788e-05, + "loss": 0.0729, + "step": 43325 + }, + { + "epoch": 2.02, + "learning_rate": 1.3367610866396001e-05, + "loss": 0.132, + "step": 43330 + }, + { + "epoch": 2.02, + "learning_rate": 1.3366827081341214e-05, + "loss": 0.1212, + "step": 43335 + }, + { + "epoch": 2.02, + "learning_rate": 1.336604329628643e-05, + "loss": 0.1476, + "step": 43340 + }, + { + "epoch": 2.02, + "learning_rate": 1.3365259511231641e-05, + "loss": 0.1749, + "step": 43345 + }, + { + "epoch": 2.02, + "learning_rate": 1.3364475726176855e-05, + "loss": 0.2094, + "step": 43350 + }, + { + "epoch": 2.02, + "learning_rate": 1.3363691941122068e-05, + "loss": 0.1269, + "step": 43355 + }, + { + "epoch": 2.02, + "learning_rate": 1.336290815606728e-05, + "loss": 0.397, + "step": 43360 + }, + { + "epoch": 2.02, + "learning_rate": 1.3362124371012495e-05, + "loss": 0.3699, + "step": 43365 + }, + { + "epoch": 2.02, + "learning_rate": 1.3361340585957708e-05, + "loss": 0.0477, + "step": 43370 + }, + { + "epoch": 2.02, + "learning_rate": 1.3360556800902921e-05, + "loss": 0.1156, + "step": 43375 + }, + { + "epoch": 2.02, + "learning_rate": 1.3359773015848134e-05, + "loss": 0.0311, + "step": 43380 + }, + { + "epoch": 2.02, + "learning_rate": 1.335898923079335e-05, + "loss": 0.3633, + "step": 43385 + }, + { + "epoch": 2.02, + "learning_rate": 1.3358205445738562e-05, + "loss": 0.1132, + "step": 43390 + }, + { + "epoch": 2.02, + "learning_rate": 1.3357421660683775e-05, + "loss": 0.0839, + "step": 43395 + }, + { + "epoch": 2.03, + "learning_rate": 1.3356637875628988e-05, + "loss": 0.1844, + "step": 43400 + }, + { + "epoch": 2.03, + "learning_rate": 1.3355854090574203e-05, + "loss": 0.2091, + "step": 43405 + }, + { + "epoch": 2.03, + "learning_rate": 1.3355070305519415e-05, + "loss": 0.2798, + "step": 43410 + }, + { + "epoch": 2.03, + "learning_rate": 1.335428652046463e-05, + "loss": 0.2263, + "step": 43415 + }, + { + "epoch": 2.03, + "learning_rate": 1.3353502735409842e-05, + "loss": 0.0277, + "step": 43420 + }, + { + "epoch": 2.03, + "learning_rate": 1.3352718950355055e-05, + "loss": 0.0383, + "step": 43425 + }, + { + "epoch": 2.03, + "learning_rate": 1.335193516530027e-05, + "loss": 0.0466, + "step": 43430 + }, + { + "epoch": 2.03, + "learning_rate": 1.3351151380245482e-05, + "loss": 0.0454, + "step": 43435 + }, + { + "epoch": 2.03, + "learning_rate": 1.3350367595190697e-05, + "loss": 0.1619, + "step": 43440 + }, + { + "epoch": 2.03, + "learning_rate": 1.3349740567146868e-05, + "loss": 0.0808, + "step": 43445 + }, + { + "epoch": 2.03, + "learning_rate": 1.334895678209208e-05, + "loss": 0.1816, + "step": 43450 + }, + { + "epoch": 2.03, + "learning_rate": 1.3348172997037294e-05, + "loss": 0.2489, + "step": 43455 + }, + { + "epoch": 2.03, + "learning_rate": 1.3347389211982506e-05, + "loss": 0.2021, + "step": 43460 + }, + { + "epoch": 2.03, + "learning_rate": 1.3346762183938678e-05, + "loss": 0.3291, + "step": 43465 + }, + { + "epoch": 2.03, + "learning_rate": 1.3345978398883892e-05, + "loss": 0.0624, + "step": 43470 + }, + { + "epoch": 2.03, + "learning_rate": 1.3345194613829104e-05, + "loss": 0.0649, + "step": 43475 + }, + { + "epoch": 2.03, + "learning_rate": 1.334441082877432e-05, + "loss": 0.093, + "step": 43480 + }, + { + "epoch": 2.03, + "learning_rate": 1.3343627043719532e-05, + "loss": 0.124, + "step": 43485 + }, + { + "epoch": 2.03, + "learning_rate": 1.3342843258664744e-05, + "loss": 0.0791, + "step": 43490 + }, + { + "epoch": 2.03, + "learning_rate": 1.3342059473609958e-05, + "loss": 0.1554, + "step": 43495 + }, + { + "epoch": 2.03, + "learning_rate": 1.334127568855517e-05, + "loss": 0.1531, + "step": 43500 + }, + { + "epoch": 2.03, + "learning_rate": 1.3340491903500386e-05, + "loss": 0.1726, + "step": 43505 + }, + { + "epoch": 2.03, + "learning_rate": 1.3339708118445598e-05, + "loss": 0.3568, + "step": 43510 + }, + { + "epoch": 2.03, + "learning_rate": 1.3338924333390812e-05, + "loss": 0.3171, + "step": 43515 + }, + { + "epoch": 2.03, + "learning_rate": 1.3338140548336024e-05, + "loss": 0.0196, + "step": 43520 + }, + { + "epoch": 2.03, + "learning_rate": 1.333735676328124e-05, + "loss": 0.0832, + "step": 43525 + }, + { + "epoch": 2.03, + "learning_rate": 1.3336572978226452e-05, + "loss": 0.0747, + "step": 43530 + }, + { + "epoch": 2.03, + "learning_rate": 1.3335789193171666e-05, + "loss": 0.0519, + "step": 43535 + }, + { + "epoch": 2.03, + "learning_rate": 1.3335005408116878e-05, + "loss": 0.1477, + "step": 43540 + }, + { + "epoch": 2.03, + "learning_rate": 1.3334221623062094e-05, + "loss": 0.231, + "step": 43545 + }, + { + "epoch": 2.03, + "learning_rate": 1.3333437838007306e-05, + "loss": 0.2418, + "step": 43550 + }, + { + "epoch": 2.03, + "learning_rate": 1.3332654052952518e-05, + "loss": 0.2191, + "step": 43555 + }, + { + "epoch": 2.03, + "learning_rate": 1.3331870267897732e-05, + "loss": 0.396, + "step": 43560 + }, + { + "epoch": 2.03, + "learning_rate": 1.3331086482842946e-05, + "loss": 0.3743, + "step": 43565 + }, + { + "epoch": 2.03, + "learning_rate": 1.333030269778816e-05, + "loss": 0.0503, + "step": 43570 + }, + { + "epoch": 2.03, + "learning_rate": 1.3329518912733372e-05, + "loss": 0.0244, + "step": 43575 + }, + { + "epoch": 2.03, + "learning_rate": 1.3328735127678588e-05, + "loss": 0.0543, + "step": 43580 + }, + { + "epoch": 2.03, + "learning_rate": 1.33279513426238e-05, + "loss": 0.0519, + "step": 43585 + }, + { + "epoch": 2.03, + "learning_rate": 1.3327167557569014e-05, + "loss": 0.0916, + "step": 43590 + }, + { + "epoch": 2.03, + "learning_rate": 1.3326383772514226e-05, + "loss": 0.167, + "step": 43595 + }, + { + "epoch": 2.03, + "learning_rate": 1.3325599987459442e-05, + "loss": 0.1703, + "step": 43600 + }, + { + "epoch": 2.03, + "learning_rate": 1.3324816202404654e-05, + "loss": 0.1012, + "step": 43605 + }, + { + "epoch": 2.03, + "learning_rate": 1.3324032417349868e-05, + "loss": 0.1885, + "step": 43610 + }, + { + "epoch": 2.04, + "learning_rate": 1.332324863229508e-05, + "loss": 0.2534, + "step": 43615 + }, + { + "epoch": 2.04, + "learning_rate": 1.3322464847240292e-05, + "loss": 0.0711, + "step": 43620 + }, + { + "epoch": 2.04, + "learning_rate": 1.3321681062185508e-05, + "loss": 0.0885, + "step": 43625 + }, + { + "epoch": 2.04, + "learning_rate": 1.332089727713072e-05, + "loss": 0.0919, + "step": 43630 + }, + { + "epoch": 2.04, + "learning_rate": 1.3320113492075934e-05, + "loss": 0.0972, + "step": 43635 + }, + { + "epoch": 2.04, + "learning_rate": 1.3319329707021146e-05, + "loss": 0.0946, + "step": 43640 + }, + { + "epoch": 2.04, + "learning_rate": 1.3318545921966362e-05, + "loss": 0.0836, + "step": 43645 + }, + { + "epoch": 2.04, + "learning_rate": 1.3317762136911574e-05, + "loss": 0.1282, + "step": 43650 + }, + { + "epoch": 2.04, + "learning_rate": 1.3316978351856788e-05, + "loss": 0.1321, + "step": 43655 + }, + { + "epoch": 2.04, + "learning_rate": 1.3316194566802002e-05, + "loss": 0.266, + "step": 43660 + }, + { + "epoch": 2.04, + "learning_rate": 1.3315410781747216e-05, + "loss": 0.3727, + "step": 43665 + }, + { + "epoch": 2.04, + "learning_rate": 1.3314626996692428e-05, + "loss": 0.0841, + "step": 43670 + }, + { + "epoch": 2.04, + "learning_rate": 1.3313843211637642e-05, + "loss": 0.0567, + "step": 43675 + }, + { + "epoch": 2.04, + "learning_rate": 1.3313059426582856e-05, + "loss": 0.0792, + "step": 43680 + }, + { + "epoch": 2.04, + "learning_rate": 1.3312275641528068e-05, + "loss": 0.0857, + "step": 43685 + }, + { + "epoch": 2.04, + "learning_rate": 1.3311491856473282e-05, + "loss": 0.0897, + "step": 43690 + }, + { + "epoch": 2.04, + "learning_rate": 1.3310708071418494e-05, + "loss": 0.1323, + "step": 43695 + }, + { + "epoch": 2.04, + "learning_rate": 1.330992428636371e-05, + "loss": 0.3455, + "step": 43700 + }, + { + "epoch": 2.04, + "learning_rate": 1.3309140501308922e-05, + "loss": 0.1129, + "step": 43705 + }, + { + "epoch": 2.04, + "learning_rate": 1.3308356716254136e-05, + "loss": 0.2136, + "step": 43710 + }, + { + "epoch": 2.04, + "learning_rate": 1.3307572931199348e-05, + "loss": 0.3236, + "step": 43715 + }, + { + "epoch": 2.04, + "learning_rate": 1.3306789146144564e-05, + "loss": 0.0699, + "step": 43720 + }, + { + "epoch": 2.04, + "learning_rate": 1.3306005361089776e-05, + "loss": 0.063, + "step": 43725 + }, + { + "epoch": 2.04, + "learning_rate": 1.330522157603499e-05, + "loss": 0.0609, + "step": 43730 + }, + { + "epoch": 2.04, + "learning_rate": 1.3304437790980202e-05, + "loss": 0.0902, + "step": 43735 + }, + { + "epoch": 2.04, + "learning_rate": 1.3303654005925417e-05, + "loss": 0.0967, + "step": 43740 + }, + { + "epoch": 2.04, + "learning_rate": 1.330287022087063e-05, + "loss": 0.1716, + "step": 43745 + }, + { + "epoch": 2.04, + "learning_rate": 1.3302086435815842e-05, + "loss": 0.117, + "step": 43750 + }, + { + "epoch": 2.04, + "learning_rate": 1.3301302650761056e-05, + "loss": 0.2584, + "step": 43755 + }, + { + "epoch": 2.04, + "learning_rate": 1.330051886570627e-05, + "loss": 0.3534, + "step": 43760 + }, + { + "epoch": 2.04, + "learning_rate": 1.3299735080651484e-05, + "loss": 0.2967, + "step": 43765 + }, + { + "epoch": 2.04, + "learning_rate": 1.3298951295596696e-05, + "loss": 0.0437, + "step": 43770 + }, + { + "epoch": 2.04, + "learning_rate": 1.329816751054191e-05, + "loss": 0.0362, + "step": 43775 + }, + { + "epoch": 2.04, + "learning_rate": 1.3297383725487124e-05, + "loss": 0.0681, + "step": 43780 + }, + { + "epoch": 2.04, + "learning_rate": 1.3296599940432338e-05, + "loss": 0.0636, + "step": 43785 + }, + { + "epoch": 2.04, + "learning_rate": 1.329581615537755e-05, + "loss": 0.1009, + "step": 43790 + }, + { + "epoch": 2.04, + "learning_rate": 1.3295032370322765e-05, + "loss": 0.1697, + "step": 43795 + }, + { + "epoch": 2.04, + "learning_rate": 1.3294248585267978e-05, + "loss": 0.1545, + "step": 43800 + }, + { + "epoch": 2.04, + "learning_rate": 1.3293464800213191e-05, + "loss": 0.1749, + "step": 43805 + }, + { + "epoch": 2.04, + "learning_rate": 1.3292681015158404e-05, + "loss": 0.2246, + "step": 43810 + }, + { + "epoch": 2.04, + "learning_rate": 1.3291897230103616e-05, + "loss": 0.2266, + "step": 43815 + }, + { + "epoch": 2.04, + "learning_rate": 1.3291113445048832e-05, + "loss": 0.0738, + "step": 43820 + }, + { + "epoch": 2.04, + "learning_rate": 1.3290329659994044e-05, + "loss": 0.0333, + "step": 43825 + }, + { + "epoch": 2.05, + "learning_rate": 1.3289545874939258e-05, + "loss": 0.0439, + "step": 43830 + }, + { + "epoch": 2.05, + "learning_rate": 1.328876208988447e-05, + "loss": 0.0997, + "step": 43835 + }, + { + "epoch": 2.05, + "learning_rate": 1.3287978304829685e-05, + "loss": 0.0818, + "step": 43840 + }, + { + "epoch": 2.05, + "learning_rate": 1.3287194519774898e-05, + "loss": 0.0876, + "step": 43845 + }, + { + "epoch": 2.05, + "learning_rate": 1.3286410734720112e-05, + "loss": 0.1397, + "step": 43850 + }, + { + "epoch": 2.05, + "learning_rate": 1.3285626949665324e-05, + "loss": 0.1035, + "step": 43855 + }, + { + "epoch": 2.05, + "learning_rate": 1.328484316461054e-05, + "loss": 0.3954, + "step": 43860 + }, + { + "epoch": 2.05, + "learning_rate": 1.3284059379555752e-05, + "loss": 0.4025, + "step": 43865 + }, + { + "epoch": 2.05, + "learning_rate": 1.3283275594500965e-05, + "loss": 0.0292, + "step": 43870 + }, + { + "epoch": 2.05, + "learning_rate": 1.328249180944618e-05, + "loss": 0.091, + "step": 43875 + }, + { + "epoch": 2.05, + "learning_rate": 1.3281708024391392e-05, + "loss": 0.0692, + "step": 43880 + }, + { + "epoch": 2.05, + "learning_rate": 1.3280924239336606e-05, + "loss": 0.0826, + "step": 43885 + }, + { + "epoch": 2.05, + "learning_rate": 1.3280140454281818e-05, + "loss": 0.1142, + "step": 43890 + }, + { + "epoch": 2.05, + "learning_rate": 1.3279356669227033e-05, + "loss": 0.1234, + "step": 43895 + }, + { + "epoch": 2.05, + "learning_rate": 1.3278572884172246e-05, + "loss": 0.1834, + "step": 43900 + }, + { + "epoch": 2.05, + "learning_rate": 1.327778909911746e-05, + "loss": 0.227, + "step": 43905 + }, + { + "epoch": 2.05, + "learning_rate": 1.3277005314062672e-05, + "loss": 0.336, + "step": 43910 + }, + { + "epoch": 2.05, + "learning_rate": 1.3276221529007887e-05, + "loss": 0.2137, + "step": 43915 + }, + { + "epoch": 2.05, + "learning_rate": 1.32754377439531e-05, + "loss": 0.08, + "step": 43920 + }, + { + "epoch": 2.05, + "learning_rate": 1.3274653958898313e-05, + "loss": 0.0813, + "step": 43925 + }, + { + "epoch": 2.05, + "learning_rate": 1.3273870173843526e-05, + "loss": 0.0463, + "step": 43930 + }, + { + "epoch": 2.05, + "learning_rate": 1.3273086388788741e-05, + "loss": 0.0811, + "step": 43935 + }, + { + "epoch": 2.05, + "learning_rate": 1.3272302603733953e-05, + "loss": 0.0861, + "step": 43940 + }, + { + "epoch": 2.05, + "learning_rate": 1.3271518818679166e-05, + "loss": 0.1037, + "step": 43945 + }, + { + "epoch": 2.05, + "learning_rate": 1.327073503362438e-05, + "loss": 0.2025, + "step": 43950 + }, + { + "epoch": 2.05, + "learning_rate": 1.3269951248569592e-05, + "loss": 0.2942, + "step": 43955 + }, + { + "epoch": 2.05, + "learning_rate": 1.3269167463514807e-05, + "loss": 0.1693, + "step": 43960 + }, + { + "epoch": 2.05, + "learning_rate": 1.326838367846002e-05, + "loss": 0.269, + "step": 43965 + }, + { + "epoch": 2.05, + "learning_rate": 1.3267599893405233e-05, + "loss": 0.1356, + "step": 43970 + }, + { + "epoch": 2.05, + "learning_rate": 1.3266816108350447e-05, + "loss": 0.016, + "step": 43975 + }, + { + "epoch": 2.05, + "learning_rate": 1.3266032323295661e-05, + "loss": 0.0608, + "step": 43980 + }, + { + "epoch": 2.05, + "learning_rate": 1.3265248538240873e-05, + "loss": 0.0614, + "step": 43985 + }, + { + "epoch": 2.05, + "learning_rate": 1.3264464753186087e-05, + "loss": 0.0645, + "step": 43990 + }, + { + "epoch": 2.05, + "learning_rate": 1.3263680968131301e-05, + "loss": 0.1768, + "step": 43995 + }, + { + "epoch": 2.05, + "learning_rate": 1.3262897183076515e-05, + "loss": 0.0816, + "step": 44000 + }, + { + "epoch": 2.05, + "learning_rate": 1.3262113398021727e-05, + "loss": 0.121, + "step": 44005 + }, + { + "epoch": 2.05, + "learning_rate": 1.326132961296694e-05, + "loss": 0.1856, + "step": 44010 + }, + { + "epoch": 2.05, + "learning_rate": 1.3260545827912155e-05, + "loss": 0.2582, + "step": 44015 + }, + { + "epoch": 2.05, + "learning_rate": 1.3259762042857367e-05, + "loss": 0.0342, + "step": 44020 + }, + { + "epoch": 2.05, + "learning_rate": 1.3258978257802581e-05, + "loss": 0.0482, + "step": 44025 + }, + { + "epoch": 2.05, + "learning_rate": 1.3258194472747794e-05, + "loss": 0.066, + "step": 44030 + }, + { + "epoch": 2.05, + "learning_rate": 1.3257410687693009e-05, + "loss": 0.1002, + "step": 44035 + }, + { + "epoch": 2.05, + "learning_rate": 1.3256626902638221e-05, + "loss": 0.0774, + "step": 44040 + }, + { + "epoch": 2.06, + "learning_rate": 1.3255843117583435e-05, + "loss": 0.2158, + "step": 44045 + }, + { + "epoch": 2.06, + "learning_rate": 1.3255059332528647e-05, + "loss": 0.1574, + "step": 44050 + }, + { + "epoch": 2.06, + "learning_rate": 1.3254275547473863e-05, + "loss": 0.2037, + "step": 44055 + }, + { + "epoch": 2.06, + "learning_rate": 1.3253491762419075e-05, + "loss": 0.3914, + "step": 44060 + }, + { + "epoch": 2.06, + "learning_rate": 1.325270797736429e-05, + "loss": 0.299, + "step": 44065 + }, + { + "epoch": 2.06, + "learning_rate": 1.3251924192309501e-05, + "loss": 0.0783, + "step": 44070 + }, + { + "epoch": 2.06, + "learning_rate": 1.3251140407254715e-05, + "loss": 0.0358, + "step": 44075 + }, + { + "epoch": 2.06, + "learning_rate": 1.325035662219993e-05, + "loss": 0.0511, + "step": 44080 + }, + { + "epoch": 2.06, + "learning_rate": 1.3249572837145141e-05, + "loss": 0.0737, + "step": 44085 + }, + { + "epoch": 2.06, + "learning_rate": 1.3248789052090355e-05, + "loss": 0.0319, + "step": 44090 + }, + { + "epoch": 2.06, + "learning_rate": 1.324800526703557e-05, + "loss": 0.0438, + "step": 44095 + }, + { + "epoch": 2.06, + "learning_rate": 1.3247221481980783e-05, + "loss": 0.1289, + "step": 44100 + }, + { + "epoch": 2.06, + "learning_rate": 1.3246437696925995e-05, + "loss": 0.2054, + "step": 44105 + }, + { + "epoch": 2.06, + "learning_rate": 1.3245653911871211e-05, + "loss": 0.2511, + "step": 44110 + }, + { + "epoch": 2.06, + "learning_rate": 1.3244870126816423e-05, + "loss": 0.3578, + "step": 44115 + }, + { + "epoch": 2.06, + "learning_rate": 1.3244086341761637e-05, + "loss": 0.0513, + "step": 44120 + }, + { + "epoch": 2.06, + "learning_rate": 1.324330255670685e-05, + "loss": 0.0485, + "step": 44125 + }, + { + "epoch": 2.06, + "learning_rate": 1.3242518771652065e-05, + "loss": 0.0457, + "step": 44130 + }, + { + "epoch": 2.06, + "learning_rate": 1.3241734986597277e-05, + "loss": 0.0965, + "step": 44135 + }, + { + "epoch": 2.06, + "learning_rate": 1.324095120154249e-05, + "loss": 0.0919, + "step": 44140 + }, + { + "epoch": 2.06, + "learning_rate": 1.3240167416487703e-05, + "loss": 0.1236, + "step": 44145 + }, + { + "epoch": 2.06, + "learning_rate": 1.3239383631432915e-05, + "loss": 0.1037, + "step": 44150 + }, + { + "epoch": 2.06, + "learning_rate": 1.3238599846378131e-05, + "loss": 0.1805, + "step": 44155 + }, + { + "epoch": 2.06, + "learning_rate": 1.3237816061323343e-05, + "loss": 0.2902, + "step": 44160 + }, + { + "epoch": 2.06, + "learning_rate": 1.3237032276268557e-05, + "loss": 0.2851, + "step": 44165 + }, + { + "epoch": 2.06, + "learning_rate": 1.323624849121377e-05, + "loss": 0.0588, + "step": 44170 + }, + { + "epoch": 2.06, + "learning_rate": 1.3235464706158985e-05, + "loss": 0.1677, + "step": 44175 + }, + { + "epoch": 2.06, + "learning_rate": 1.3234680921104197e-05, + "loss": 0.0701, + "step": 44180 + }, + { + "epoch": 2.06, + "learning_rate": 1.3233897136049411e-05, + "loss": 0.0714, + "step": 44185 + }, + { + "epoch": 2.06, + "learning_rate": 1.3233113350994625e-05, + "loss": 0.0526, + "step": 44190 + }, + { + "epoch": 2.06, + "learning_rate": 1.3232329565939839e-05, + "loss": 0.0875, + "step": 44195 + }, + { + "epoch": 2.06, + "learning_rate": 1.3231545780885051e-05, + "loss": 0.1067, + "step": 44200 + }, + { + "epoch": 2.06, + "learning_rate": 1.3230761995830263e-05, + "loss": 0.1859, + "step": 44205 + }, + { + "epoch": 2.06, + "learning_rate": 1.3229978210775479e-05, + "loss": 0.2862, + "step": 44210 + }, + { + "epoch": 2.06, + "learning_rate": 1.3229194425720691e-05, + "loss": 0.3919, + "step": 44215 + }, + { + "epoch": 2.06, + "learning_rate": 1.3228410640665905e-05, + "loss": 0.0165, + "step": 44220 + }, + { + "epoch": 2.06, + "learning_rate": 1.3227626855611117e-05, + "loss": 0.0282, + "step": 44225 + }, + { + "epoch": 2.06, + "learning_rate": 1.3226843070556333e-05, + "loss": 0.1177, + "step": 44230 + }, + { + "epoch": 2.06, + "learning_rate": 1.3226059285501545e-05, + "loss": 0.0937, + "step": 44235 + }, + { + "epoch": 2.06, + "learning_rate": 1.3225275500446759e-05, + "loss": 0.1239, + "step": 44240 + }, + { + "epoch": 2.06, + "learning_rate": 1.3224491715391971e-05, + "loss": 0.1821, + "step": 44245 + }, + { + "epoch": 2.06, + "learning_rate": 1.3223707930337187e-05, + "loss": 0.205, + "step": 44250 + }, + { + "epoch": 2.06, + "learning_rate": 1.3222924145282399e-05, + "loss": 0.1652, + "step": 44255 + }, + { + "epoch": 2.07, + "learning_rate": 1.3222140360227613e-05, + "loss": 0.1785, + "step": 44260 + }, + { + "epoch": 2.07, + "learning_rate": 1.3221356575172825e-05, + "loss": 0.2491, + "step": 44265 + }, + { + "epoch": 2.07, + "learning_rate": 1.3220572790118037e-05, + "loss": 0.026, + "step": 44270 + }, + { + "epoch": 2.07, + "learning_rate": 1.3219789005063253e-05, + "loss": 0.0235, + "step": 44275 + }, + { + "epoch": 2.07, + "learning_rate": 1.3219005220008465e-05, + "loss": 0.0774, + "step": 44280 + }, + { + "epoch": 2.07, + "learning_rate": 1.3218221434953679e-05, + "loss": 0.08, + "step": 44285 + }, + { + "epoch": 2.07, + "learning_rate": 1.3217437649898893e-05, + "loss": 0.0742, + "step": 44290 + }, + { + "epoch": 2.07, + "learning_rate": 1.3216653864844107e-05, + "loss": 0.1891, + "step": 44295 + }, + { + "epoch": 2.07, + "learning_rate": 1.3215870079789319e-05, + "loss": 0.1566, + "step": 44300 + }, + { + "epoch": 2.07, + "learning_rate": 1.3215086294734533e-05, + "loss": 0.2266, + "step": 44305 + }, + { + "epoch": 2.07, + "learning_rate": 1.3214302509679747e-05, + "loss": 0.2815, + "step": 44310 + }, + { + "epoch": 2.07, + "learning_rate": 1.321351872462496e-05, + "loss": 0.2934, + "step": 44315 + }, + { + "epoch": 2.07, + "learning_rate": 1.3212734939570173e-05, + "loss": 0.0386, + "step": 44320 + }, + { + "epoch": 2.07, + "learning_rate": 1.3211951154515389e-05, + "loss": 0.079, + "step": 44325 + }, + { + "epoch": 2.07, + "learning_rate": 1.32111673694606e-05, + "loss": 0.0483, + "step": 44330 + }, + { + "epoch": 2.07, + "learning_rate": 1.3210383584405813e-05, + "loss": 0.12, + "step": 44335 + }, + { + "epoch": 2.07, + "learning_rate": 1.3209599799351027e-05, + "loss": 0.1331, + "step": 44340 + }, + { + "epoch": 2.07, + "learning_rate": 1.3208816014296239e-05, + "loss": 0.1102, + "step": 44345 + }, + { + "epoch": 2.07, + "learning_rate": 1.3208032229241455e-05, + "loss": 0.116, + "step": 44350 + }, + { + "epoch": 2.07, + "learning_rate": 1.3207248444186667e-05, + "loss": 0.1937, + "step": 44355 + }, + { + "epoch": 2.07, + "learning_rate": 1.320646465913188e-05, + "loss": 0.2692, + "step": 44360 + }, + { + "epoch": 2.07, + "learning_rate": 1.3205837631088051e-05, + "loss": 0.3699, + "step": 44365 + }, + { + "epoch": 2.07, + "learning_rate": 1.3205053846033265e-05, + "loss": 0.0153, + "step": 44370 + }, + { + "epoch": 2.07, + "learning_rate": 1.3204270060978479e-05, + "loss": 0.0688, + "step": 44375 + }, + { + "epoch": 2.07, + "learning_rate": 1.3203486275923691e-05, + "loss": 0.0807, + "step": 44380 + }, + { + "epoch": 2.07, + "learning_rate": 1.3202702490868907e-05, + "loss": 0.0753, + "step": 44385 + }, + { + "epoch": 2.07, + "learning_rate": 1.3201918705814119e-05, + "loss": 0.083, + "step": 44390 + }, + { + "epoch": 2.07, + "learning_rate": 1.3201134920759333e-05, + "loss": 0.165, + "step": 44395 + }, + { + "epoch": 2.07, + "learning_rate": 1.3200351135704545e-05, + "loss": 0.169, + "step": 44400 + }, + { + "epoch": 2.07, + "learning_rate": 1.3199567350649757e-05, + "loss": 0.2277, + "step": 44405 + }, + { + "epoch": 2.07, + "learning_rate": 1.3198783565594973e-05, + "loss": 0.3473, + "step": 44410 + }, + { + "epoch": 2.07, + "learning_rate": 1.3197999780540185e-05, + "loss": 0.2985, + "step": 44415 + }, + { + "epoch": 2.07, + "learning_rate": 1.3197215995485399e-05, + "loss": 0.0606, + "step": 44420 + }, + { + "epoch": 2.07, + "learning_rate": 1.3196432210430611e-05, + "loss": 0.0483, + "step": 44425 + }, + { + "epoch": 2.07, + "learning_rate": 1.3195648425375827e-05, + "loss": 0.0787, + "step": 44430 + }, + { + "epoch": 2.07, + "learning_rate": 1.319486464032104e-05, + "loss": 0.1182, + "step": 44435 + }, + { + "epoch": 2.07, + "learning_rate": 1.3194080855266253e-05, + "loss": 0.152, + "step": 44440 + }, + { + "epoch": 2.07, + "learning_rate": 1.3193297070211465e-05, + "loss": 0.1204, + "step": 44445 + }, + { + "epoch": 2.07, + "learning_rate": 1.3192513285156681e-05, + "loss": 0.0873, + "step": 44450 + }, + { + "epoch": 2.07, + "learning_rate": 1.3191729500101893e-05, + "loss": 0.1337, + "step": 44455 + }, + { + "epoch": 2.07, + "learning_rate": 1.3190945715047107e-05, + "loss": 0.2727, + "step": 44460 + }, + { + "epoch": 2.07, + "learning_rate": 1.319016192999232e-05, + "loss": 0.2811, + "step": 44465 + }, + { + "epoch": 2.08, + "learning_rate": 1.3189378144937533e-05, + "loss": 0.0753, + "step": 44470 + }, + { + "epoch": 2.08, + "learning_rate": 1.3188594359882747e-05, + "loss": 0.0589, + "step": 44475 + }, + { + "epoch": 2.08, + "learning_rate": 1.318781057482796e-05, + "loss": 0.1141, + "step": 44480 + }, + { + "epoch": 2.08, + "learning_rate": 1.3187026789773175e-05, + "loss": 0.0495, + "step": 44485 + }, + { + "epoch": 2.08, + "learning_rate": 1.3186243004718387e-05, + "loss": 0.0716, + "step": 44490 + }, + { + "epoch": 2.08, + "learning_rate": 1.3185459219663601e-05, + "loss": 0.1369, + "step": 44495 + }, + { + "epoch": 2.08, + "learning_rate": 1.3184675434608813e-05, + "loss": 0.1567, + "step": 44500 + }, + { + "epoch": 2.08, + "learning_rate": 1.3183891649554029e-05, + "loss": 0.1988, + "step": 44505 + }, + { + "epoch": 2.08, + "learning_rate": 1.3183107864499241e-05, + "loss": 0.3542, + "step": 44510 + }, + { + "epoch": 2.08, + "learning_rate": 1.3182324079444455e-05, + "loss": 0.3402, + "step": 44515 + }, + { + "epoch": 2.08, + "learning_rate": 1.3181540294389667e-05, + "loss": 0.0784, + "step": 44520 + }, + { + "epoch": 2.08, + "learning_rate": 1.3180756509334883e-05, + "loss": 0.0768, + "step": 44525 + }, + { + "epoch": 2.08, + "learning_rate": 1.3179972724280095e-05, + "loss": 0.0386, + "step": 44530 + }, + { + "epoch": 2.08, + "learning_rate": 1.3179188939225307e-05, + "loss": 0.0284, + "step": 44535 + }, + { + "epoch": 2.08, + "learning_rate": 1.3178405154170521e-05, + "loss": 0.0668, + "step": 44540 + }, + { + "epoch": 2.08, + "learning_rate": 1.3177621369115733e-05, + "loss": 0.1517, + "step": 44545 + }, + { + "epoch": 2.08, + "learning_rate": 1.3176837584060949e-05, + "loss": 0.1934, + "step": 44550 + }, + { + "epoch": 2.08, + "learning_rate": 1.3176053799006161e-05, + "loss": 0.321, + "step": 44555 + }, + { + "epoch": 2.08, + "learning_rate": 1.3175270013951375e-05, + "loss": 0.3407, + "step": 44560 + }, + { + "epoch": 2.08, + "learning_rate": 1.3174486228896587e-05, + "loss": 0.2514, + "step": 44565 + }, + { + "epoch": 2.08, + "learning_rate": 1.3173702443841803e-05, + "loss": 0.041, + "step": 44570 + }, + { + "epoch": 2.08, + "learning_rate": 1.3172918658787015e-05, + "loss": 0.0766, + "step": 44575 + }, + { + "epoch": 2.08, + "learning_rate": 1.3172134873732229e-05, + "loss": 0.0465, + "step": 44580 + }, + { + "epoch": 2.08, + "learning_rate": 1.3171351088677443e-05, + "loss": 0.1087, + "step": 44585 + }, + { + "epoch": 2.08, + "learning_rate": 1.3170567303622657e-05, + "loss": 0.1005, + "step": 44590 + }, + { + "epoch": 2.08, + "learning_rate": 1.3169783518567869e-05, + "loss": 0.1296, + "step": 44595 + }, + { + "epoch": 2.08, + "learning_rate": 1.3168999733513081e-05, + "loss": 0.1509, + "step": 44600 + }, + { + "epoch": 2.08, + "learning_rate": 1.3168215948458297e-05, + "loss": 0.1615, + "step": 44605 + }, + { + "epoch": 2.08, + "learning_rate": 1.3167432163403509e-05, + "loss": 0.1695, + "step": 44610 + }, + { + "epoch": 2.08, + "learning_rate": 1.3166648378348723e-05, + "loss": 0.2675, + "step": 44615 + }, + { + "epoch": 2.08, + "learning_rate": 1.3165864593293935e-05, + "loss": 0.0326, + "step": 44620 + }, + { + "epoch": 2.08, + "learning_rate": 1.316508080823915e-05, + "loss": 0.065, + "step": 44625 + }, + { + "epoch": 2.08, + "learning_rate": 1.3164297023184363e-05, + "loss": 0.0504, + "step": 44630 + }, + { + "epoch": 2.08, + "learning_rate": 1.3163513238129577e-05, + "loss": 0.0747, + "step": 44635 + }, + { + "epoch": 2.08, + "learning_rate": 1.3162729453074789e-05, + "loss": 0.064, + "step": 44640 + }, + { + "epoch": 2.08, + "learning_rate": 1.3161945668020005e-05, + "loss": 0.1414, + "step": 44645 + }, + { + "epoch": 2.08, + "learning_rate": 1.3161161882965217e-05, + "loss": 0.2267, + "step": 44650 + }, + { + "epoch": 2.08, + "learning_rate": 1.316037809791043e-05, + "loss": 0.1493, + "step": 44655 + }, + { + "epoch": 2.08, + "learning_rate": 1.3159594312855643e-05, + "loss": 0.3172, + "step": 44660 + }, + { + "epoch": 2.08, + "learning_rate": 1.3158810527800857e-05, + "loss": 0.2474, + "step": 44665 + }, + { + "epoch": 2.08, + "learning_rate": 1.315802674274607e-05, + "loss": 0.0665, + "step": 44670 + }, + { + "epoch": 2.08, + "learning_rate": 1.3157242957691283e-05, + "loss": 0.0464, + "step": 44675 + }, + { + "epoch": 2.08, + "learning_rate": 1.3156459172636497e-05, + "loss": 0.0733, + "step": 44680 + }, + { + "epoch": 2.09, + "learning_rate": 1.315567538758171e-05, + "loss": 0.1709, + "step": 44685 + }, + { + "epoch": 2.09, + "learning_rate": 1.3154891602526925e-05, + "loss": 0.0666, + "step": 44690 + }, + { + "epoch": 2.09, + "learning_rate": 1.3154107817472137e-05, + "loss": 0.0916, + "step": 44695 + }, + { + "epoch": 2.09, + "learning_rate": 1.3153324032417352e-05, + "loss": 0.0939, + "step": 44700 + }, + { + "epoch": 2.09, + "learning_rate": 1.3152540247362565e-05, + "loss": 0.155, + "step": 44705 + }, + { + "epoch": 2.09, + "learning_rate": 1.3151756462307779e-05, + "loss": 0.2603, + "step": 44710 + }, + { + "epoch": 2.09, + "learning_rate": 1.315097267725299e-05, + "loss": 0.2802, + "step": 44715 + }, + { + "epoch": 2.09, + "learning_rate": 1.3150188892198206e-05, + "loss": 0.0517, + "step": 44720 + }, + { + "epoch": 2.09, + "learning_rate": 1.3149405107143419e-05, + "loss": 0.0623, + "step": 44725 + }, + { + "epoch": 2.09, + "learning_rate": 1.314862132208863e-05, + "loss": 0.0714, + "step": 44730 + }, + { + "epoch": 2.09, + "learning_rate": 1.3147837537033845e-05, + "loss": 0.0585, + "step": 44735 + }, + { + "epoch": 2.09, + "learning_rate": 1.3147053751979057e-05, + "loss": 0.1013, + "step": 44740 + }, + { + "epoch": 2.09, + "learning_rate": 1.3146269966924273e-05, + "loss": 0.0922, + "step": 44745 + }, + { + "epoch": 2.09, + "learning_rate": 1.3145486181869485e-05, + "loss": 0.1085, + "step": 44750 + }, + { + "epoch": 2.09, + "learning_rate": 1.3144702396814699e-05, + "loss": 0.1422, + "step": 44755 + }, + { + "epoch": 2.09, + "learning_rate": 1.3143918611759911e-05, + "loss": 0.2372, + "step": 44760 + }, + { + "epoch": 2.09, + "learning_rate": 1.3143134826705126e-05, + "loss": 0.3727, + "step": 44765 + }, + { + "epoch": 2.09, + "learning_rate": 1.3142351041650339e-05, + "loss": 0.1013, + "step": 44770 + }, + { + "epoch": 2.09, + "learning_rate": 1.3141567256595553e-05, + "loss": 0.0361, + "step": 44775 + }, + { + "epoch": 2.09, + "learning_rate": 1.3140783471540765e-05, + "loss": 0.088, + "step": 44780 + }, + { + "epoch": 2.09, + "learning_rate": 1.313999968648598e-05, + "loss": 0.0731, + "step": 44785 + }, + { + "epoch": 2.09, + "learning_rate": 1.3139215901431193e-05, + "loss": 0.0661, + "step": 44790 + }, + { + "epoch": 2.09, + "learning_rate": 1.3138432116376405e-05, + "loss": 0.1273, + "step": 44795 + }, + { + "epoch": 2.09, + "learning_rate": 1.313764833132162e-05, + "loss": 0.1032, + "step": 44800 + }, + { + "epoch": 2.09, + "learning_rate": 1.3136864546266833e-05, + "loss": 0.2088, + "step": 44805 + }, + { + "epoch": 2.09, + "learning_rate": 1.3136080761212047e-05, + "loss": 0.3149, + "step": 44810 + }, + { + "epoch": 2.09, + "learning_rate": 1.3135296976157259e-05, + "loss": 0.3955, + "step": 44815 + }, + { + "epoch": 2.09, + "learning_rate": 1.3134513191102474e-05, + "loss": 0.0629, + "step": 44820 + }, + { + "epoch": 2.09, + "learning_rate": 1.3133729406047687e-05, + "loss": 0.0434, + "step": 44825 + }, + { + "epoch": 2.09, + "learning_rate": 1.31329456209929e-05, + "loss": 0.1228, + "step": 44830 + }, + { + "epoch": 2.09, + "learning_rate": 1.3132161835938113e-05, + "loss": 0.0403, + "step": 44835 + }, + { + "epoch": 2.09, + "learning_rate": 1.3131378050883328e-05, + "loss": 0.0699, + "step": 44840 + }, + { + "epoch": 2.09, + "learning_rate": 1.313059426582854e-05, + "loss": 0.1441, + "step": 44845 + }, + { + "epoch": 2.09, + "learning_rate": 1.3129810480773754e-05, + "loss": 0.1262, + "step": 44850 + }, + { + "epoch": 2.09, + "learning_rate": 1.3129026695718967e-05, + "loss": 0.1689, + "step": 44855 + }, + { + "epoch": 2.09, + "learning_rate": 1.3128242910664179e-05, + "loss": 0.2219, + "step": 44860 + }, + { + "epoch": 2.09, + "learning_rate": 1.3127459125609394e-05, + "loss": 0.3262, + "step": 44865 + }, + { + "epoch": 2.09, + "learning_rate": 1.3126675340554607e-05, + "loss": 0.0307, + "step": 44870 + }, + { + "epoch": 2.09, + "learning_rate": 1.312589155549982e-05, + "loss": 0.0441, + "step": 44875 + }, + { + "epoch": 2.09, + "learning_rate": 1.3125107770445034e-05, + "loss": 0.0356, + "step": 44880 + }, + { + "epoch": 2.09, + "learning_rate": 1.3124323985390248e-05, + "loss": 0.0829, + "step": 44885 + }, + { + "epoch": 2.09, + "learning_rate": 1.312354020033546e-05, + "loss": 0.0608, + "step": 44890 + }, + { + "epoch": 2.09, + "learning_rate": 1.3122756415280674e-05, + "loss": 0.1429, + "step": 44895 + }, + { + "epoch": 2.1, + "learning_rate": 1.3121972630225888e-05, + "loss": 0.125, + "step": 44900 + }, + { + "epoch": 2.1, + "learning_rate": 1.3121188845171102e-05, + "loss": 0.1752, + "step": 44905 + }, + { + "epoch": 2.1, + "learning_rate": 1.3120405060116314e-05, + "loss": 0.1865, + "step": 44910 + }, + { + "epoch": 2.1, + "learning_rate": 1.311962127506153e-05, + "loss": 0.358, + "step": 44915 + }, + { + "epoch": 2.1, + "learning_rate": 1.3118837490006742e-05, + "loss": 0.0342, + "step": 44920 + }, + { + "epoch": 2.1, + "learning_rate": 1.3118053704951955e-05, + "loss": 0.0759, + "step": 44925 + }, + { + "epoch": 2.1, + "learning_rate": 1.3117269919897168e-05, + "loss": 0.0653, + "step": 44930 + }, + { + "epoch": 2.1, + "learning_rate": 1.311648613484238e-05, + "loss": 0.0713, + "step": 44935 + }, + { + "epoch": 2.1, + "learning_rate": 1.3115702349787596e-05, + "loss": 0.0867, + "step": 44940 + }, + { + "epoch": 2.1, + "learning_rate": 1.3114918564732808e-05, + "loss": 0.0849, + "step": 44945 + }, + { + "epoch": 2.1, + "learning_rate": 1.3114134779678022e-05, + "loss": 0.175, + "step": 44950 + }, + { + "epoch": 2.1, + "learning_rate": 1.3113350994623235e-05, + "loss": 0.1791, + "step": 44955 + }, + { + "epoch": 2.1, + "learning_rate": 1.311256720956845e-05, + "loss": 0.1874, + "step": 44960 + }, + { + "epoch": 2.1, + "learning_rate": 1.3111783424513662e-05, + "loss": 0.2178, + "step": 44965 + }, + { + "epoch": 2.1, + "learning_rate": 1.3110999639458876e-05, + "loss": 0.0445, + "step": 44970 + }, + { + "epoch": 2.1, + "learning_rate": 1.3110215854404088e-05, + "loss": 0.0422, + "step": 44975 + }, + { + "epoch": 2.1, + "learning_rate": 1.3109432069349304e-05, + "loss": 0.0326, + "step": 44980 + }, + { + "epoch": 2.1, + "learning_rate": 1.3108648284294516e-05, + "loss": 0.0743, + "step": 44985 + }, + { + "epoch": 2.1, + "learning_rate": 1.3107864499239729e-05, + "loss": 0.1022, + "step": 44990 + }, + { + "epoch": 2.1, + "learning_rate": 1.3107080714184942e-05, + "loss": 0.1278, + "step": 44995 + }, + { + "epoch": 2.1, + "learning_rate": 1.3106296929130156e-05, + "loss": 0.1631, + "step": 45000 + }, + { + "epoch": 2.1, + "learning_rate": 1.310551314407537e-05, + "loss": 0.1475, + "step": 45005 + }, + { + "epoch": 2.1, + "learning_rate": 1.3104729359020582e-05, + "loss": 0.2213, + "step": 45010 + }, + { + "epoch": 2.1, + "learning_rate": 1.3103945573965798e-05, + "loss": 0.3505, + "step": 45015 + }, + { + "epoch": 2.1, + "learning_rate": 1.310316178891101e-05, + "loss": 0.0454, + "step": 45020 + }, + { + "epoch": 2.1, + "learning_rate": 1.3102378003856224e-05, + "loss": 0.0718, + "step": 45025 + }, + { + "epoch": 2.1, + "learning_rate": 1.3101594218801436e-05, + "loss": 0.0849, + "step": 45030 + }, + { + "epoch": 2.1, + "learning_rate": 1.3100810433746652e-05, + "loss": 0.0966, + "step": 45035 + }, + { + "epoch": 2.1, + "learning_rate": 1.3100026648691864e-05, + "loss": 0.1208, + "step": 45040 + }, + { + "epoch": 2.1, + "learning_rate": 1.3099242863637078e-05, + "loss": 0.1753, + "step": 45045 + }, + { + "epoch": 2.1, + "learning_rate": 1.309845907858229e-05, + "loss": 0.2323, + "step": 45050 + }, + { + "epoch": 2.1, + "learning_rate": 1.3097675293527503e-05, + "loss": 0.2094, + "step": 45055 + }, + { + "epoch": 2.1, + "learning_rate": 1.3096891508472718e-05, + "loss": 0.2613, + "step": 45060 + }, + { + "epoch": 2.1, + "learning_rate": 1.309610772341793e-05, + "loss": 0.3646, + "step": 45065 + }, + { + "epoch": 2.1, + "learning_rate": 1.3095323938363144e-05, + "loss": 0.0468, + "step": 45070 + }, + { + "epoch": 2.1, + "learning_rate": 1.3094540153308356e-05, + "loss": 0.0398, + "step": 45075 + }, + { + "epoch": 2.1, + "learning_rate": 1.3093756368253572e-05, + "loss": 0.0819, + "step": 45080 + }, + { + "epoch": 2.1, + "learning_rate": 1.3092972583198784e-05, + "loss": 0.0739, + "step": 45085 + }, + { + "epoch": 2.1, + "learning_rate": 1.3092188798143998e-05, + "loss": 0.1802, + "step": 45090 + }, + { + "epoch": 2.1, + "learning_rate": 1.309140501308921e-05, + "loss": 0.1384, + "step": 45095 + }, + { + "epoch": 2.1, + "learning_rate": 1.3090621228034426e-05, + "loss": 0.1445, + "step": 45100 + }, + { + "epoch": 2.1, + "learning_rate": 1.3089837442979638e-05, + "loss": 0.1752, + "step": 45105 + }, + { + "epoch": 2.1, + "learning_rate": 1.3089053657924852e-05, + "loss": 0.4362, + "step": 45110 + }, + { + "epoch": 2.11, + "learning_rate": 1.3088269872870066e-05, + "loss": 0.3223, + "step": 45115 + }, + { + "epoch": 2.11, + "learning_rate": 1.3087486087815278e-05, + "loss": 0.0303, + "step": 45120 + }, + { + "epoch": 2.11, + "learning_rate": 1.3086702302760492e-05, + "loss": 0.047, + "step": 45125 + }, + { + "epoch": 2.11, + "learning_rate": 1.3085918517705704e-05, + "loss": 0.0458, + "step": 45130 + }, + { + "epoch": 2.11, + "learning_rate": 1.308513473265092e-05, + "loss": 0.097, + "step": 45135 + }, + { + "epoch": 2.11, + "learning_rate": 1.3084350947596132e-05, + "loss": 0.1045, + "step": 45140 + }, + { + "epoch": 2.11, + "learning_rate": 1.3083567162541346e-05, + "loss": 0.1566, + "step": 45145 + }, + { + "epoch": 2.11, + "learning_rate": 1.3082783377486558e-05, + "loss": 0.098, + "step": 45150 + }, + { + "epoch": 2.11, + "learning_rate": 1.3081999592431774e-05, + "loss": 0.1235, + "step": 45155 + }, + { + "epoch": 2.11, + "learning_rate": 1.3081215807376986e-05, + "loss": 0.2617, + "step": 45160 + }, + { + "epoch": 2.11, + "learning_rate": 1.30804320223222e-05, + "loss": 0.2604, + "step": 45165 + }, + { + "epoch": 2.11, + "learning_rate": 1.3079648237267412e-05, + "loss": 0.0638, + "step": 45170 + }, + { + "epoch": 2.11, + "learning_rate": 1.3078864452212628e-05, + "loss": 0.0437, + "step": 45175 + }, + { + "epoch": 2.11, + "learning_rate": 1.307808066715784e-05, + "loss": 0.125, + "step": 45180 + }, + { + "epoch": 2.11, + "learning_rate": 1.3077296882103052e-05, + "loss": 0.0794, + "step": 45185 + }, + { + "epoch": 2.11, + "learning_rate": 1.3076513097048266e-05, + "loss": 0.0686, + "step": 45190 + }, + { + "epoch": 2.11, + "learning_rate": 1.307572931199348e-05, + "loss": 0.1249, + "step": 45195 + }, + { + "epoch": 2.11, + "learning_rate": 1.3074945526938694e-05, + "loss": 0.1045, + "step": 45200 + }, + { + "epoch": 2.11, + "learning_rate": 1.3074161741883906e-05, + "loss": 0.151, + "step": 45205 + }, + { + "epoch": 2.11, + "learning_rate": 1.307337795682912e-05, + "loss": 0.2116, + "step": 45210 + }, + { + "epoch": 2.11, + "learning_rate": 1.3072594171774334e-05, + "loss": 0.3223, + "step": 45215 + }, + { + "epoch": 2.11, + "learning_rate": 1.3071810386719548e-05, + "loss": 0.1557, + "step": 45220 + }, + { + "epoch": 2.11, + "learning_rate": 1.307102660166476e-05, + "loss": 0.0382, + "step": 45225 + }, + { + "epoch": 2.11, + "learning_rate": 1.3070242816609976e-05, + "loss": 0.0262, + "step": 45230 + }, + { + "epoch": 2.11, + "learning_rate": 1.3069459031555188e-05, + "loss": 0.1138, + "step": 45235 + }, + { + "epoch": 2.11, + "learning_rate": 1.3068675246500402e-05, + "loss": 0.0946, + "step": 45240 + }, + { + "epoch": 2.11, + "learning_rate": 1.3067891461445614e-05, + "loss": 0.0577, + "step": 45245 + }, + { + "epoch": 2.11, + "learning_rate": 1.3067107676390826e-05, + "loss": 0.125, + "step": 45250 + }, + { + "epoch": 2.11, + "learning_rate": 1.3066323891336042e-05, + "loss": 0.0984, + "step": 45255 + }, + { + "epoch": 2.11, + "learning_rate": 1.3065540106281254e-05, + "loss": 0.3112, + "step": 45260 + }, + { + "epoch": 2.11, + "learning_rate": 1.3064756321226468e-05, + "loss": 0.4456, + "step": 45265 + }, + { + "epoch": 2.11, + "learning_rate": 1.306397253617168e-05, + "loss": 0.1024, + "step": 45270 + }, + { + "epoch": 2.11, + "learning_rate": 1.3063188751116896e-05, + "loss": 0.0654, + "step": 45275 + }, + { + "epoch": 2.11, + "learning_rate": 1.3062404966062108e-05, + "loss": 0.063, + "step": 45280 + }, + { + "epoch": 2.11, + "learning_rate": 1.3061621181007322e-05, + "loss": 0.073, + "step": 45285 + }, + { + "epoch": 2.11, + "learning_rate": 1.3060837395952534e-05, + "loss": 0.0433, + "step": 45290 + }, + { + "epoch": 2.11, + "learning_rate": 1.306005361089775e-05, + "loss": 0.0807, + "step": 45295 + }, + { + "epoch": 2.11, + "learning_rate": 1.3059269825842962e-05, + "loss": 0.1388, + "step": 45300 + }, + { + "epoch": 2.11, + "learning_rate": 1.3058486040788176e-05, + "loss": 0.1288, + "step": 45305 + }, + { + "epoch": 2.11, + "learning_rate": 1.3057702255733388e-05, + "loss": 0.1973, + "step": 45310 + }, + { + "epoch": 2.11, + "learning_rate": 1.3056918470678602e-05, + "loss": 0.2249, + "step": 45315 + }, + { + "epoch": 2.11, + "learning_rate": 1.3056134685623816e-05, + "loss": 0.0417, + "step": 45320 + }, + { + "epoch": 2.11, + "learning_rate": 1.3055350900569028e-05, + "loss": 0.0328, + "step": 45325 + }, + { + "epoch": 2.12, + "learning_rate": 1.3054567115514244e-05, + "loss": 0.092, + "step": 45330 + }, + { + "epoch": 2.12, + "learning_rate": 1.3053783330459456e-05, + "loss": 0.1243, + "step": 45335 + }, + { + "epoch": 2.12, + "learning_rate": 1.305299954540467e-05, + "loss": 0.056, + "step": 45340 + }, + { + "epoch": 2.12, + "learning_rate": 1.3052215760349882e-05, + "loss": 0.1449, + "step": 45345 + }, + { + "epoch": 2.12, + "learning_rate": 1.3051431975295098e-05, + "loss": 0.2336, + "step": 45350 + }, + { + "epoch": 2.12, + "learning_rate": 1.305064819024031e-05, + "loss": 0.1807, + "step": 45355 + }, + { + "epoch": 2.12, + "learning_rate": 1.3049864405185524e-05, + "loss": 0.373, + "step": 45360 + }, + { + "epoch": 2.12, + "learning_rate": 1.3049080620130736e-05, + "loss": 0.3324, + "step": 45365 + }, + { + "epoch": 2.12, + "learning_rate": 1.3048296835075951e-05, + "loss": 0.0863, + "step": 45370 + }, + { + "epoch": 2.12, + "learning_rate": 1.3047513050021164e-05, + "loss": 0.0827, + "step": 45375 + }, + { + "epoch": 2.12, + "learning_rate": 1.3046729264966376e-05, + "loss": 0.053, + "step": 45380 + }, + { + "epoch": 2.12, + "learning_rate": 1.304594547991159e-05, + "loss": 0.0841, + "step": 45385 + }, + { + "epoch": 2.12, + "learning_rate": 1.3045161694856802e-05, + "loss": 0.1131, + "step": 45390 + }, + { + "epoch": 2.12, + "learning_rate": 1.3044377909802018e-05, + "loss": 0.2027, + "step": 45395 + }, + { + "epoch": 2.12, + "learning_rate": 1.304359412474723e-05, + "loss": 0.1405, + "step": 45400 + }, + { + "epoch": 2.12, + "learning_rate": 1.3042810339692444e-05, + "loss": 0.1859, + "step": 45405 + }, + { + "epoch": 2.12, + "learning_rate": 1.3042026554637656e-05, + "loss": 0.2386, + "step": 45410 + }, + { + "epoch": 2.12, + "learning_rate": 1.3041242769582872e-05, + "loss": 0.3155, + "step": 45415 + }, + { + "epoch": 2.12, + "learning_rate": 1.3040458984528084e-05, + "loss": 0.0233, + "step": 45420 + }, + { + "epoch": 2.12, + "learning_rate": 1.3039675199473298e-05, + "loss": 0.0834, + "step": 45425 + }, + { + "epoch": 2.12, + "learning_rate": 1.3038891414418512e-05, + "loss": 0.0897, + "step": 45430 + }, + { + "epoch": 2.12, + "learning_rate": 1.3038107629363725e-05, + "loss": 0.0855, + "step": 45435 + }, + { + "epoch": 2.12, + "learning_rate": 1.3037323844308938e-05, + "loss": 0.0508, + "step": 45440 + }, + { + "epoch": 2.12, + "learning_rate": 1.303654005925415e-05, + "loss": 0.0879, + "step": 45445 + }, + { + "epoch": 2.12, + "learning_rate": 1.3035756274199365e-05, + "loss": 0.1952, + "step": 45450 + }, + { + "epoch": 2.12, + "learning_rate": 1.3034972489144578e-05, + "loss": 0.1725, + "step": 45455 + }, + { + "epoch": 2.12, + "learning_rate": 1.3034188704089792e-05, + "loss": 0.3064, + "step": 45460 + }, + { + "epoch": 2.12, + "learning_rate": 1.3033404919035004e-05, + "loss": 0.2897, + "step": 45465 + }, + { + "epoch": 2.12, + "learning_rate": 1.303262113398022e-05, + "loss": 0.0456, + "step": 45470 + }, + { + "epoch": 2.12, + "learning_rate": 1.3031837348925432e-05, + "loss": 0.0907, + "step": 45475 + }, + { + "epoch": 2.12, + "learning_rate": 1.3031053563870646e-05, + "loss": 0.0953, + "step": 45480 + }, + { + "epoch": 2.12, + "learning_rate": 1.3030269778815858e-05, + "loss": 0.0623, + "step": 45485 + }, + { + "epoch": 2.12, + "learning_rate": 1.3029485993761073e-05, + "loss": 0.0422, + "step": 45490 + }, + { + "epoch": 2.12, + "learning_rate": 1.3028702208706286e-05, + "loss": 0.1251, + "step": 45495 + }, + { + "epoch": 2.12, + "learning_rate": 1.30279184236515e-05, + "loss": 0.2241, + "step": 45500 + }, + { + "epoch": 2.12, + "learning_rate": 1.3027134638596712e-05, + "loss": 0.206, + "step": 45505 + }, + { + "epoch": 2.12, + "learning_rate": 1.3026350853541926e-05, + "loss": 0.1835, + "step": 45510 + }, + { + "epoch": 2.12, + "learning_rate": 1.302556706848714e-05, + "loss": 0.2396, + "step": 45515 + }, + { + "epoch": 2.12, + "learning_rate": 1.3024783283432352e-05, + "loss": 0.0602, + "step": 45520 + }, + { + "epoch": 2.12, + "learning_rate": 1.3023999498377566e-05, + "loss": 0.0384, + "step": 45525 + }, + { + "epoch": 2.12, + "learning_rate": 1.302321571332278e-05, + "loss": 0.0227, + "step": 45530 + }, + { + "epoch": 2.12, + "learning_rate": 1.3022431928267993e-05, + "loss": 0.0598, + "step": 45535 + }, + { + "epoch": 2.12, + "learning_rate": 1.3021648143213206e-05, + "loss": 0.1411, + "step": 45540 + }, + { + "epoch": 2.13, + "learning_rate": 1.3020864358158421e-05, + "loss": 0.1612, + "step": 45545 + }, + { + "epoch": 2.13, + "learning_rate": 1.3020080573103633e-05, + "loss": 0.218, + "step": 45550 + }, + { + "epoch": 2.13, + "learning_rate": 1.3019296788048847e-05, + "loss": 0.2523, + "step": 45555 + }, + { + "epoch": 2.13, + "learning_rate": 1.301851300299406e-05, + "loss": 0.1978, + "step": 45560 + }, + { + "epoch": 2.13, + "learning_rate": 1.3017729217939275e-05, + "loss": 0.3117, + "step": 45565 + }, + { + "epoch": 2.13, + "learning_rate": 1.3016945432884487e-05, + "loss": 0.1062, + "step": 45570 + }, + { + "epoch": 2.13, + "learning_rate": 1.30161616478297e-05, + "loss": 0.1145, + "step": 45575 + }, + { + "epoch": 2.13, + "learning_rate": 1.3015377862774913e-05, + "loss": 0.1031, + "step": 45580 + }, + { + "epoch": 2.13, + "learning_rate": 1.3014594077720126e-05, + "loss": 0.0727, + "step": 45585 + }, + { + "epoch": 2.13, + "learning_rate": 1.3013810292665341e-05, + "loss": 0.0766, + "step": 45590 + }, + { + "epoch": 2.13, + "learning_rate": 1.3013026507610554e-05, + "loss": 0.118, + "step": 45595 + }, + { + "epoch": 2.13, + "learning_rate": 1.3012242722555767e-05, + "loss": 0.0794, + "step": 45600 + }, + { + "epoch": 2.13, + "learning_rate": 1.301145893750098e-05, + "loss": 0.2502, + "step": 45605 + }, + { + "epoch": 2.13, + "learning_rate": 1.3010675152446195e-05, + "loss": 0.2556, + "step": 45610 + }, + { + "epoch": 2.13, + "learning_rate": 1.3009891367391407e-05, + "loss": 0.2407, + "step": 45615 + }, + { + "epoch": 2.13, + "learning_rate": 1.3009107582336621e-05, + "loss": 0.0709, + "step": 45620 + }, + { + "epoch": 2.13, + "learning_rate": 1.3008323797281834e-05, + "loss": 0.0743, + "step": 45625 + }, + { + "epoch": 2.13, + "learning_rate": 1.3007540012227049e-05, + "loss": 0.0364, + "step": 45630 + }, + { + "epoch": 2.13, + "learning_rate": 1.3006756227172261e-05, + "loss": 0.0777, + "step": 45635 + }, + { + "epoch": 2.13, + "learning_rate": 1.3005972442117474e-05, + "loss": 0.0727, + "step": 45640 + }, + { + "epoch": 2.13, + "learning_rate": 1.300518865706269e-05, + "loss": 0.1977, + "step": 45645 + }, + { + "epoch": 2.13, + "learning_rate": 1.3004404872007901e-05, + "loss": 0.1681, + "step": 45650 + }, + { + "epoch": 2.13, + "learning_rate": 1.3003621086953115e-05, + "loss": 0.2623, + "step": 45655 + }, + { + "epoch": 2.13, + "learning_rate": 1.3002837301898328e-05, + "loss": 0.3067, + "step": 45660 + }, + { + "epoch": 2.13, + "learning_rate": 1.3002053516843543e-05, + "loss": 0.3166, + "step": 45665 + }, + { + "epoch": 2.13, + "learning_rate": 1.3001269731788755e-05, + "loss": 0.1028, + "step": 45670 + }, + { + "epoch": 2.13, + "learning_rate": 1.300048594673397e-05, + "loss": 0.0572, + "step": 45675 + }, + { + "epoch": 2.13, + "learning_rate": 1.2999702161679181e-05, + "loss": 0.0974, + "step": 45680 + }, + { + "epoch": 2.13, + "learning_rate": 1.2998918376624397e-05, + "loss": 0.1084, + "step": 45685 + }, + { + "epoch": 2.13, + "learning_rate": 1.299813459156961e-05, + "loss": 0.1394, + "step": 45690 + }, + { + "epoch": 2.13, + "learning_rate": 1.2997350806514823e-05, + "loss": 0.1306, + "step": 45695 + }, + { + "epoch": 2.13, + "learning_rate": 1.2996567021460035e-05, + "loss": 0.1705, + "step": 45700 + }, + { + "epoch": 2.13, + "learning_rate": 1.2995783236405248e-05, + "loss": 0.1305, + "step": 45705 + }, + { + "epoch": 2.13, + "learning_rate": 1.2994999451350463e-05, + "loss": 0.3169, + "step": 45710 + }, + { + "epoch": 2.13, + "learning_rate": 1.2994215666295675e-05, + "loss": 0.2909, + "step": 45715 + }, + { + "epoch": 2.13, + "learning_rate": 1.299343188124089e-05, + "loss": 0.0292, + "step": 45720 + }, + { + "epoch": 2.13, + "learning_rate": 1.2992648096186102e-05, + "loss": 0.0243, + "step": 45725 + }, + { + "epoch": 2.13, + "learning_rate": 1.2991864311131317e-05, + "loss": 0.0665, + "step": 45730 + }, + { + "epoch": 2.13, + "learning_rate": 1.299108052607653e-05, + "loss": 0.0384, + "step": 45735 + }, + { + "epoch": 2.13, + "learning_rate": 1.2990296741021743e-05, + "loss": 0.0984, + "step": 45740 + }, + { + "epoch": 2.13, + "learning_rate": 1.2989512955966957e-05, + "loss": 0.0985, + "step": 45745 + }, + { + "epoch": 2.13, + "learning_rate": 1.2988729170912171e-05, + "loss": 0.1137, + "step": 45750 + }, + { + "epoch": 2.13, + "learning_rate": 1.2987945385857383e-05, + "loss": 0.2239, + "step": 45755 + }, + { + "epoch": 2.14, + "learning_rate": 1.2987161600802599e-05, + "loss": 0.1643, + "step": 45760 + }, + { + "epoch": 2.14, + "learning_rate": 1.2986377815747811e-05, + "loss": 0.4238, + "step": 45765 + }, + { + "epoch": 2.14, + "learning_rate": 1.2985594030693023e-05, + "loss": 0.0549, + "step": 45770 + }, + { + "epoch": 2.14, + "learning_rate": 1.2984810245638237e-05, + "loss": 0.0319, + "step": 45775 + }, + { + "epoch": 2.14, + "learning_rate": 1.298402646058345e-05, + "loss": 0.0423, + "step": 45780 + }, + { + "epoch": 2.14, + "learning_rate": 1.2983242675528665e-05, + "loss": 0.0657, + "step": 45785 + }, + { + "epoch": 2.14, + "learning_rate": 1.2982458890473877e-05, + "loss": 0.065, + "step": 45790 + }, + { + "epoch": 2.14, + "learning_rate": 1.2981675105419091e-05, + "loss": 0.1418, + "step": 45795 + }, + { + "epoch": 2.14, + "learning_rate": 1.2980891320364303e-05, + "loss": 0.1648, + "step": 45800 + }, + { + "epoch": 2.14, + "learning_rate": 1.2980107535309519e-05, + "loss": 0.1982, + "step": 45805 + }, + { + "epoch": 2.14, + "learning_rate": 1.2979323750254731e-05, + "loss": 0.196, + "step": 45810 + }, + { + "epoch": 2.14, + "learning_rate": 1.2978539965199945e-05, + "loss": 0.3101, + "step": 45815 + }, + { + "epoch": 2.14, + "learning_rate": 1.2977756180145157e-05, + "loss": 0.0599, + "step": 45820 + }, + { + "epoch": 2.14, + "learning_rate": 1.2976972395090373e-05, + "loss": 0.0809, + "step": 45825 + }, + { + "epoch": 2.14, + "learning_rate": 1.2976188610035585e-05, + "loss": 0.1309, + "step": 45830 + }, + { + "epoch": 2.14, + "learning_rate": 1.2975404824980797e-05, + "loss": 0.0561, + "step": 45835 + }, + { + "epoch": 2.14, + "learning_rate": 1.2974621039926011e-05, + "loss": 0.0666, + "step": 45840 + }, + { + "epoch": 2.14, + "learning_rate": 1.2973837254871225e-05, + "loss": 0.1195, + "step": 45845 + }, + { + "epoch": 2.14, + "learning_rate": 1.2973053469816439e-05, + "loss": 0.1992, + "step": 45850 + }, + { + "epoch": 2.14, + "learning_rate": 1.2972269684761651e-05, + "loss": 0.1405, + "step": 45855 + }, + { + "epoch": 2.14, + "learning_rate": 1.2971485899706867e-05, + "loss": 0.2539, + "step": 45860 + }, + { + "epoch": 2.14, + "learning_rate": 1.2970702114652079e-05, + "loss": 0.3003, + "step": 45865 + }, + { + "epoch": 2.14, + "learning_rate": 1.2969918329597293e-05, + "loss": 0.0268, + "step": 45870 + }, + { + "epoch": 2.14, + "learning_rate": 1.2969134544542505e-05, + "loss": 0.029, + "step": 45875 + }, + { + "epoch": 2.14, + "learning_rate": 1.296835075948772e-05, + "loss": 0.0899, + "step": 45880 + }, + { + "epoch": 2.14, + "learning_rate": 1.2967566974432933e-05, + "loss": 0.0464, + "step": 45885 + }, + { + "epoch": 2.14, + "learning_rate": 1.2966783189378147e-05, + "loss": 0.1088, + "step": 45890 + }, + { + "epoch": 2.14, + "learning_rate": 1.2965999404323359e-05, + "loss": 0.1264, + "step": 45895 + }, + { + "epoch": 2.14, + "learning_rate": 1.2965215619268571e-05, + "loss": 0.1743, + "step": 45900 + }, + { + "epoch": 2.14, + "learning_rate": 1.2964431834213787e-05, + "loss": 0.2256, + "step": 45905 + }, + { + "epoch": 2.14, + "learning_rate": 1.2963648049158999e-05, + "loss": 0.235, + "step": 45910 + }, + { + "epoch": 2.14, + "learning_rate": 1.2962864264104213e-05, + "loss": 0.4299, + "step": 45915 + }, + { + "epoch": 2.14, + "learning_rate": 1.2962080479049425e-05, + "loss": 0.0647, + "step": 45920 + }, + { + "epoch": 2.14, + "learning_rate": 1.296129669399464e-05, + "loss": 0.0445, + "step": 45925 + }, + { + "epoch": 2.14, + "learning_rate": 1.2960512908939853e-05, + "loss": 0.0616, + "step": 45930 + }, + { + "epoch": 2.14, + "learning_rate": 1.2959729123885067e-05, + "loss": 0.0822, + "step": 45935 + }, + { + "epoch": 2.14, + "learning_rate": 1.2958945338830279e-05, + "loss": 0.1084, + "step": 45940 + }, + { + "epoch": 2.14, + "learning_rate": 1.2958161553775495e-05, + "loss": 0.1383, + "step": 45945 + }, + { + "epoch": 2.14, + "learning_rate": 1.2957377768720707e-05, + "loss": 0.309, + "step": 45950 + }, + { + "epoch": 2.14, + "learning_rate": 1.295659398366592e-05, + "loss": 0.1302, + "step": 45955 + }, + { + "epoch": 2.14, + "learning_rate": 1.2955810198611135e-05, + "loss": 0.4292, + "step": 45960 + }, + { + "epoch": 2.14, + "learning_rate": 1.2955026413556347e-05, + "loss": 0.3065, + "step": 45965 + }, + { + "epoch": 2.15, + "learning_rate": 1.2954242628501561e-05, + "loss": 0.0508, + "step": 45970 + }, + { + "epoch": 2.15, + "learning_rate": 1.2953458843446773e-05, + "loss": 0.0602, + "step": 45975 + }, + { + "epoch": 2.15, + "learning_rate": 1.2952675058391989e-05, + "loss": 0.08, + "step": 45980 + }, + { + "epoch": 2.15, + "learning_rate": 1.2951891273337201e-05, + "loss": 0.0903, + "step": 45985 + }, + { + "epoch": 2.15, + "learning_rate": 1.2951107488282415e-05, + "loss": 0.0842, + "step": 45990 + }, + { + "epoch": 2.15, + "learning_rate": 1.2950323703227627e-05, + "loss": 0.115, + "step": 45995 + }, + { + "epoch": 2.15, + "learning_rate": 1.2949539918172843e-05, + "loss": 0.0675, + "step": 46000 + }, + { + "epoch": 2.15, + "learning_rate": 1.2948756133118055e-05, + "loss": 0.1959, + "step": 46005 + }, + { + "epoch": 2.15, + "learning_rate": 1.2947972348063269e-05, + "loss": 0.2618, + "step": 46010 + }, + { + "epoch": 2.15, + "learning_rate": 1.2947188563008481e-05, + "loss": 0.291, + "step": 46015 + }, + { + "epoch": 2.15, + "learning_rate": 1.2946404777953697e-05, + "loss": 0.0569, + "step": 46020 + }, + { + "epoch": 2.15, + "learning_rate": 1.2945620992898909e-05, + "loss": 0.0237, + "step": 46025 + }, + { + "epoch": 2.15, + "learning_rate": 1.2944837207844121e-05, + "loss": 0.0394, + "step": 46030 + }, + { + "epoch": 2.15, + "learning_rate": 1.2944053422789335e-05, + "loss": 0.029, + "step": 46035 + }, + { + "epoch": 2.15, + "learning_rate": 1.2943269637734549e-05, + "loss": 0.069, + "step": 46040 + }, + { + "epoch": 2.15, + "learning_rate": 1.2942485852679763e-05, + "loss": 0.0995, + "step": 46045 + }, + { + "epoch": 2.15, + "learning_rate": 1.2941702067624975e-05, + "loss": 0.0975, + "step": 46050 + }, + { + "epoch": 2.15, + "learning_rate": 1.2940918282570189e-05, + "loss": 0.1855, + "step": 46055 + }, + { + "epoch": 2.15, + "learning_rate": 1.2940134497515403e-05, + "loss": 0.2035, + "step": 46060 + }, + { + "epoch": 2.15, + "learning_rate": 1.2939350712460617e-05, + "loss": 0.3937, + "step": 46065 + }, + { + "epoch": 2.15, + "learning_rate": 1.2938566927405829e-05, + "loss": 0.1026, + "step": 46070 + }, + { + "epoch": 2.15, + "learning_rate": 1.2937783142351044e-05, + "loss": 0.0271, + "step": 46075 + }, + { + "epoch": 2.15, + "learning_rate": 1.2936999357296257e-05, + "loss": 0.0701, + "step": 46080 + }, + { + "epoch": 2.15, + "learning_rate": 1.293621557224147e-05, + "loss": 0.0962, + "step": 46085 + }, + { + "epoch": 2.15, + "learning_rate": 1.2935431787186683e-05, + "loss": 0.0695, + "step": 46090 + }, + { + "epoch": 2.15, + "learning_rate": 1.2934648002131895e-05, + "loss": 0.0941, + "step": 46095 + }, + { + "epoch": 2.15, + "learning_rate": 1.293386421707711e-05, + "loss": 0.1758, + "step": 46100 + }, + { + "epoch": 2.15, + "learning_rate": 1.2933080432022323e-05, + "loss": 0.2329, + "step": 46105 + }, + { + "epoch": 2.15, + "learning_rate": 1.2932296646967537e-05, + "loss": 0.257, + "step": 46110 + }, + { + "epoch": 2.15, + "learning_rate": 1.2931512861912749e-05, + "loss": 0.1914, + "step": 46115 + }, + { + "epoch": 2.15, + "learning_rate": 1.2930729076857964e-05, + "loss": 0.0743, + "step": 46120 + }, + { + "epoch": 2.15, + "learning_rate": 1.2929945291803177e-05, + "loss": 0.0692, + "step": 46125 + }, + { + "epoch": 2.15, + "learning_rate": 1.292916150674839e-05, + "loss": 0.0397, + "step": 46130 + }, + { + "epoch": 2.15, + "learning_rate": 1.2928377721693603e-05, + "loss": 0.0593, + "step": 46135 + }, + { + "epoch": 2.15, + "learning_rate": 1.2927593936638818e-05, + "loss": 0.1479, + "step": 46140 + }, + { + "epoch": 2.15, + "learning_rate": 1.292681015158403e-05, + "loss": 0.0926, + "step": 46145 + }, + { + "epoch": 2.15, + "learning_rate": 1.2926026366529245e-05, + "loss": 0.1446, + "step": 46150 + }, + { + "epoch": 2.15, + "learning_rate": 1.2925242581474457e-05, + "loss": 0.1745, + "step": 46155 + }, + { + "epoch": 2.15, + "learning_rate": 1.292445879641967e-05, + "loss": 0.2465, + "step": 46160 + }, + { + "epoch": 2.15, + "learning_rate": 1.2923675011364885e-05, + "loss": 0.2131, + "step": 46165 + }, + { + "epoch": 2.15, + "learning_rate": 1.2922891226310097e-05, + "loss": 0.0662, + "step": 46170 + }, + { + "epoch": 2.15, + "learning_rate": 1.2922107441255312e-05, + "loss": 0.0392, + "step": 46175 + }, + { + "epoch": 2.15, + "learning_rate": 1.2921323656200525e-05, + "loss": 0.1003, + "step": 46180 + }, + { + "epoch": 2.16, + "learning_rate": 1.2920539871145738e-05, + "loss": 0.1232, + "step": 46185 + }, + { + "epoch": 2.16, + "learning_rate": 1.291975608609095e-05, + "loss": 0.1044, + "step": 46190 + }, + { + "epoch": 2.16, + "learning_rate": 1.2918972301036166e-05, + "loss": 0.0763, + "step": 46195 + }, + { + "epoch": 2.16, + "learning_rate": 1.2918188515981379e-05, + "loss": 0.1681, + "step": 46200 + }, + { + "epoch": 2.16, + "learning_rate": 1.2917404730926592e-05, + "loss": 0.1915, + "step": 46205 + }, + { + "epoch": 2.16, + "learning_rate": 1.2916620945871805e-05, + "loss": 0.2404, + "step": 46210 + }, + { + "epoch": 2.16, + "learning_rate": 1.291583716081702e-05, + "loss": 0.2095, + "step": 46215 + }, + { + "epoch": 2.16, + "learning_rate": 1.2915053375762232e-05, + "loss": 0.0508, + "step": 46220 + }, + { + "epoch": 2.16, + "learning_rate": 1.2914269590707445e-05, + "loss": 0.0292, + "step": 46225 + }, + { + "epoch": 2.16, + "learning_rate": 1.2913485805652659e-05, + "loss": 0.0615, + "step": 46230 + }, + { + "epoch": 2.16, + "learning_rate": 1.291270202059787e-05, + "loss": 0.0614, + "step": 46235 + }, + { + "epoch": 2.16, + "learning_rate": 1.2911918235543086e-05, + "loss": 0.1748, + "step": 46240 + }, + { + "epoch": 2.16, + "learning_rate": 1.2911134450488299e-05, + "loss": 0.0778, + "step": 46245 + }, + { + "epoch": 2.16, + "learning_rate": 1.2910350665433512e-05, + "loss": 0.1938, + "step": 46250 + }, + { + "epoch": 2.16, + "learning_rate": 1.2909566880378725e-05, + "loss": 0.22, + "step": 46255 + }, + { + "epoch": 2.16, + "learning_rate": 1.290878309532394e-05, + "loss": 0.2227, + "step": 46260 + }, + { + "epoch": 2.16, + "learning_rate": 1.2907999310269153e-05, + "loss": 0.2721, + "step": 46265 + }, + { + "epoch": 2.16, + "learning_rate": 1.2907215525214366e-05, + "loss": 0.0752, + "step": 46270 + }, + { + "epoch": 2.16, + "learning_rate": 1.290643174015958e-05, + "loss": 0.0638, + "step": 46275 + }, + { + "epoch": 2.16, + "learning_rate": 1.2905647955104794e-05, + "loss": 0.0867, + "step": 46280 + }, + { + "epoch": 2.16, + "learning_rate": 1.2904864170050006e-05, + "loss": 0.0472, + "step": 46285 + }, + { + "epoch": 2.16, + "learning_rate": 1.2904080384995219e-05, + "loss": 0.0876, + "step": 46290 + }, + { + "epoch": 2.16, + "learning_rate": 1.2903296599940434e-05, + "loss": 0.1803, + "step": 46295 + }, + { + "epoch": 2.16, + "learning_rate": 1.2902512814885646e-05, + "loss": 0.1082, + "step": 46300 + }, + { + "epoch": 2.16, + "learning_rate": 1.290172902983086e-05, + "loss": 0.203, + "step": 46305 + }, + { + "epoch": 2.16, + "learning_rate": 1.2900945244776073e-05, + "loss": 0.3482, + "step": 46310 + }, + { + "epoch": 2.16, + "learning_rate": 1.2900161459721288e-05, + "loss": 0.31, + "step": 46315 + }, + { + "epoch": 2.16, + "learning_rate": 1.28993776746665e-05, + "loss": 0.0936, + "step": 46320 + }, + { + "epoch": 2.16, + "learning_rate": 1.2898593889611714e-05, + "loss": 0.0706, + "step": 46325 + }, + { + "epoch": 2.16, + "learning_rate": 1.2897810104556927e-05, + "loss": 0.0416, + "step": 46330 + }, + { + "epoch": 2.16, + "learning_rate": 1.2897026319502142e-05, + "loss": 0.1093, + "step": 46335 + }, + { + "epoch": 2.16, + "learning_rate": 1.2896242534447354e-05, + "loss": 0.1354, + "step": 46340 + }, + { + "epoch": 2.16, + "learning_rate": 1.2895458749392568e-05, + "loss": 0.0767, + "step": 46345 + }, + { + "epoch": 2.16, + "learning_rate": 1.289467496433778e-05, + "loss": 0.1904, + "step": 46350 + }, + { + "epoch": 2.16, + "learning_rate": 1.2893891179282994e-05, + "loss": 0.1699, + "step": 46355 + }, + { + "epoch": 2.16, + "learning_rate": 1.2893107394228208e-05, + "loss": 0.1509, + "step": 46360 + }, + { + "epoch": 2.16, + "learning_rate": 1.289232360917342e-05, + "loss": 0.3889, + "step": 46365 + }, + { + "epoch": 2.16, + "learning_rate": 1.2891539824118634e-05, + "loss": 0.0727, + "step": 46370 + }, + { + "epoch": 2.16, + "learning_rate": 1.2890756039063848e-05, + "loss": 0.0465, + "step": 46375 + }, + { + "epoch": 2.16, + "learning_rate": 1.2889972254009062e-05, + "loss": 0.0655, + "step": 46380 + }, + { + "epoch": 2.16, + "learning_rate": 1.2889188468954274e-05, + "loss": 0.0616, + "step": 46385 + }, + { + "epoch": 2.16, + "learning_rate": 1.288840468389949e-05, + "loss": 0.1318, + "step": 46390 + }, + { + "epoch": 2.16, + "learning_rate": 1.2887620898844702e-05, + "loss": 0.1106, + "step": 46395 + }, + { + "epoch": 2.17, + "learning_rate": 1.2886837113789916e-05, + "loss": 0.175, + "step": 46400 + }, + { + "epoch": 2.17, + "learning_rate": 1.2886053328735128e-05, + "loss": 0.2635, + "step": 46405 + }, + { + "epoch": 2.17, + "learning_rate": 1.2885269543680344e-05, + "loss": 0.2759, + "step": 46410 + }, + { + "epoch": 2.17, + "learning_rate": 1.2884485758625556e-05, + "loss": 0.2409, + "step": 46415 + }, + { + "epoch": 2.17, + "learning_rate": 1.2883701973570768e-05, + "loss": 0.0379, + "step": 46420 + }, + { + "epoch": 2.17, + "learning_rate": 1.2882918188515982e-05, + "loss": 0.0526, + "step": 46425 + }, + { + "epoch": 2.17, + "learning_rate": 1.2882134403461194e-05, + "loss": 0.0494, + "step": 46430 + }, + { + "epoch": 2.17, + "learning_rate": 1.288135061840641e-05, + "loss": 0.0765, + "step": 46435 + }, + { + "epoch": 2.17, + "learning_rate": 1.2880566833351622e-05, + "loss": 0.1189, + "step": 46440 + }, + { + "epoch": 2.17, + "learning_rate": 1.2879783048296836e-05, + "loss": 0.1203, + "step": 46445 + }, + { + "epoch": 2.17, + "learning_rate": 1.2878999263242048e-05, + "loss": 0.1393, + "step": 46450 + }, + { + "epoch": 2.17, + "learning_rate": 1.2878215478187264e-05, + "loss": 0.1572, + "step": 46455 + }, + { + "epoch": 2.17, + "learning_rate": 1.2877431693132476e-05, + "loss": 0.3012, + "step": 46460 + }, + { + "epoch": 2.17, + "learning_rate": 1.287664790807769e-05, + "loss": 0.2512, + "step": 46465 + }, + { + "epoch": 2.17, + "learning_rate": 1.2875864123022902e-05, + "loss": 0.0924, + "step": 46470 + }, + { + "epoch": 2.17, + "learning_rate": 1.2875080337968118e-05, + "loss": 0.0359, + "step": 46475 + }, + { + "epoch": 2.17, + "learning_rate": 1.287429655291333e-05, + "loss": 0.079, + "step": 46480 + }, + { + "epoch": 2.17, + "learning_rate": 1.2873512767858542e-05, + "loss": 0.0759, + "step": 46485 + }, + { + "epoch": 2.17, + "learning_rate": 1.2872728982803758e-05, + "loss": 0.103, + "step": 46490 + }, + { + "epoch": 2.17, + "learning_rate": 1.287194519774897e-05, + "loss": 0.0971, + "step": 46495 + }, + { + "epoch": 2.17, + "learning_rate": 1.2871161412694184e-05, + "loss": 0.1746, + "step": 46500 + }, + { + "epoch": 2.17, + "learning_rate": 1.2870377627639396e-05, + "loss": 0.1998, + "step": 46505 + }, + { + "epoch": 2.17, + "learning_rate": 1.2869593842584612e-05, + "loss": 0.2048, + "step": 46510 + }, + { + "epoch": 2.17, + "learning_rate": 1.2868810057529824e-05, + "loss": 0.3011, + "step": 46515 + }, + { + "epoch": 2.17, + "learning_rate": 1.2868026272475038e-05, + "loss": 0.0584, + "step": 46520 + }, + { + "epoch": 2.17, + "learning_rate": 1.286724248742025e-05, + "loss": 0.064, + "step": 46525 + }, + { + "epoch": 2.17, + "learning_rate": 1.2866458702365466e-05, + "loss": 0.0913, + "step": 46530 + }, + { + "epoch": 2.17, + "learning_rate": 1.2865674917310678e-05, + "loss": 0.1153, + "step": 46535 + }, + { + "epoch": 2.17, + "learning_rate": 1.2864891132255892e-05, + "loss": 0.069, + "step": 46540 + }, + { + "epoch": 2.17, + "learning_rate": 1.2864107347201104e-05, + "loss": 0.068, + "step": 46545 + }, + { + "epoch": 2.17, + "learning_rate": 1.2863323562146316e-05, + "loss": 0.1295, + "step": 46550 + }, + { + "epoch": 2.17, + "learning_rate": 1.2862539777091532e-05, + "loss": 0.229, + "step": 46555 + }, + { + "epoch": 2.17, + "learning_rate": 1.2861755992036744e-05, + "loss": 0.2234, + "step": 46560 + }, + { + "epoch": 2.17, + "learning_rate": 1.2860972206981958e-05, + "loss": 0.2418, + "step": 46565 + }, + { + "epoch": 2.17, + "learning_rate": 1.286018842192717e-05, + "loss": 0.0448, + "step": 46570 + }, + { + "epoch": 2.17, + "learning_rate": 1.2859404636872386e-05, + "loss": 0.0333, + "step": 46575 + }, + { + "epoch": 2.17, + "learning_rate": 1.2858620851817598e-05, + "loss": 0.0544, + "step": 46580 + }, + { + "epoch": 2.17, + "learning_rate": 1.2857837066762812e-05, + "loss": 0.126, + "step": 46585 + }, + { + "epoch": 2.17, + "learning_rate": 1.2857053281708026e-05, + "loss": 0.0373, + "step": 46590 + }, + { + "epoch": 2.17, + "learning_rate": 1.285626949665324e-05, + "loss": 0.0796, + "step": 46595 + }, + { + "epoch": 2.17, + "learning_rate": 1.2855485711598452e-05, + "loss": 0.1411, + "step": 46600 + }, + { + "epoch": 2.17, + "learning_rate": 1.2854701926543668e-05, + "loss": 0.1499, + "step": 46605 + }, + { + "epoch": 2.17, + "learning_rate": 1.285391814148888e-05, + "loss": 0.259, + "step": 46610 + }, + { + "epoch": 2.18, + "learning_rate": 1.2853134356434092e-05, + "loss": 0.2371, + "step": 46615 + }, + { + "epoch": 2.18, + "learning_rate": 1.2852350571379306e-05, + "loss": 0.0754, + "step": 46620 + }, + { + "epoch": 2.18, + "learning_rate": 1.2851566786324518e-05, + "loss": 0.0368, + "step": 46625 + }, + { + "epoch": 2.18, + "learning_rate": 1.2850783001269734e-05, + "loss": 0.0324, + "step": 46630 + }, + { + "epoch": 2.18, + "learning_rate": 1.2849999216214946e-05, + "loss": 0.0577, + "step": 46635 + }, + { + "epoch": 2.18, + "learning_rate": 1.284921543116016e-05, + "loss": 0.0888, + "step": 46640 + }, + { + "epoch": 2.18, + "learning_rate": 1.2848431646105372e-05, + "loss": 0.1034, + "step": 46645 + }, + { + "epoch": 2.18, + "learning_rate": 1.2847647861050588e-05, + "loss": 0.1378, + "step": 46650 + }, + { + "epoch": 2.18, + "learning_rate": 1.28468640759958e-05, + "loss": 0.2142, + "step": 46655 + }, + { + "epoch": 2.18, + "learning_rate": 1.2846080290941014e-05, + "loss": 0.3083, + "step": 46660 + }, + { + "epoch": 2.18, + "learning_rate": 1.2845296505886226e-05, + "loss": 0.193, + "step": 46665 + }, + { + "epoch": 2.18, + "learning_rate": 1.2844512720831442e-05, + "loss": 0.0664, + "step": 46670 + }, + { + "epoch": 2.18, + "learning_rate": 1.2843728935776654e-05, + "loss": 0.04, + "step": 46675 + }, + { + "epoch": 2.18, + "learning_rate": 1.2842945150721866e-05, + "loss": 0.0385, + "step": 46680 + }, + { + "epoch": 2.18, + "learning_rate": 1.284216136566708e-05, + "loss": 0.0412, + "step": 46685 + }, + { + "epoch": 2.18, + "learning_rate": 1.2841377580612294e-05, + "loss": 0.0962, + "step": 46690 + }, + { + "epoch": 2.18, + "learning_rate": 1.2840593795557508e-05, + "loss": 0.1109, + "step": 46695 + }, + { + "epoch": 2.18, + "learning_rate": 1.283981001050272e-05, + "loss": 0.1748, + "step": 46700 + }, + { + "epoch": 2.18, + "learning_rate": 1.2839026225447936e-05, + "loss": 0.1363, + "step": 46705 + }, + { + "epoch": 2.18, + "learning_rate": 1.2838242440393148e-05, + "loss": 0.3242, + "step": 46710 + }, + { + "epoch": 2.18, + "learning_rate": 1.2837458655338362e-05, + "loss": 0.2394, + "step": 46715 + }, + { + "epoch": 2.18, + "learning_rate": 1.2836674870283574e-05, + "loss": 0.0867, + "step": 46720 + }, + { + "epoch": 2.18, + "learning_rate": 1.283589108522879e-05, + "loss": 0.053, + "step": 46725 + }, + { + "epoch": 2.18, + "learning_rate": 1.2835107300174002e-05, + "loss": 0.0816, + "step": 46730 + }, + { + "epoch": 2.18, + "learning_rate": 1.2834323515119216e-05, + "loss": 0.0848, + "step": 46735 + }, + { + "epoch": 2.18, + "learning_rate": 1.2833539730064428e-05, + "loss": 0.1253, + "step": 46740 + }, + { + "epoch": 2.18, + "learning_rate": 1.283275594500964e-05, + "loss": 0.1279, + "step": 46745 + }, + { + "epoch": 2.18, + "learning_rate": 1.2831972159954856e-05, + "loss": 0.1643, + "step": 46750 + }, + { + "epoch": 2.18, + "learning_rate": 1.2831188374900068e-05, + "loss": 0.1052, + "step": 46755 + }, + { + "epoch": 2.18, + "learning_rate": 1.2830404589845282e-05, + "loss": 0.414, + "step": 46760 + }, + { + "epoch": 2.18, + "learning_rate": 1.2829620804790494e-05, + "loss": 0.3846, + "step": 46765 + }, + { + "epoch": 2.18, + "learning_rate": 1.282883701973571e-05, + "loss": 0.0487, + "step": 46770 + }, + { + "epoch": 2.18, + "learning_rate": 1.2828053234680922e-05, + "loss": 0.049, + "step": 46775 + }, + { + "epoch": 2.18, + "learning_rate": 1.2827269449626136e-05, + "loss": 0.0539, + "step": 46780 + }, + { + "epoch": 2.18, + "learning_rate": 1.2826485664571348e-05, + "loss": 0.0454, + "step": 46785 + }, + { + "epoch": 2.18, + "learning_rate": 1.2825701879516563e-05, + "loss": 0.1597, + "step": 46790 + }, + { + "epoch": 2.18, + "learning_rate": 1.2824918094461776e-05, + "loss": 0.1002, + "step": 46795 + }, + { + "epoch": 2.18, + "learning_rate": 1.282413430940699e-05, + "loss": 0.1373, + "step": 46800 + }, + { + "epoch": 2.18, + "learning_rate": 1.2823350524352204e-05, + "loss": 0.2601, + "step": 46805 + }, + { + "epoch": 2.18, + "learning_rate": 1.2822566739297416e-05, + "loss": 0.2318, + "step": 46810 + }, + { + "epoch": 2.18, + "learning_rate": 1.282178295424263e-05, + "loss": 0.3056, + "step": 46815 + }, + { + "epoch": 2.18, + "learning_rate": 1.2820999169187842e-05, + "loss": 0.026, + "step": 46820 + }, + { + "epoch": 2.18, + "learning_rate": 1.2820215384133057e-05, + "loss": 0.103, + "step": 46825 + }, + { + "epoch": 2.19, + "learning_rate": 1.281943159907827e-05, + "loss": 0.0383, + "step": 46830 + }, + { + "epoch": 2.19, + "learning_rate": 1.2818647814023484e-05, + "loss": 0.0978, + "step": 46835 + }, + { + "epoch": 2.19, + "learning_rate": 1.2817864028968696e-05, + "loss": 0.0945, + "step": 46840 + }, + { + "epoch": 2.19, + "learning_rate": 1.2817080243913911e-05, + "loss": 0.1017, + "step": 46845 + }, + { + "epoch": 2.19, + "learning_rate": 1.2816296458859124e-05, + "loss": 0.0924, + "step": 46850 + }, + { + "epoch": 2.19, + "learning_rate": 1.2815512673804337e-05, + "loss": 0.156, + "step": 46855 + }, + { + "epoch": 2.19, + "learning_rate": 1.281472888874955e-05, + "loss": 0.2734, + "step": 46860 + }, + { + "epoch": 2.19, + "learning_rate": 1.2813945103694765e-05, + "loss": 0.3109, + "step": 46865 + }, + { + "epoch": 2.19, + "learning_rate": 1.2813161318639978e-05, + "loss": 0.0688, + "step": 46870 + }, + { + "epoch": 2.19, + "learning_rate": 1.281237753358519e-05, + "loss": 0.0376, + "step": 46875 + }, + { + "epoch": 2.19, + "learning_rate": 1.2811593748530404e-05, + "loss": 0.0537, + "step": 46880 + }, + { + "epoch": 2.19, + "learning_rate": 1.2810809963475616e-05, + "loss": 0.084, + "step": 46885 + }, + { + "epoch": 2.19, + "learning_rate": 1.2810026178420831e-05, + "loss": 0.1507, + "step": 46890 + }, + { + "epoch": 2.19, + "learning_rate": 1.2809242393366044e-05, + "loss": 0.144, + "step": 46895 + }, + { + "epoch": 2.19, + "learning_rate": 1.2808458608311258e-05, + "loss": 0.1558, + "step": 46900 + }, + { + "epoch": 2.19, + "learning_rate": 1.2807674823256471e-05, + "loss": 0.1787, + "step": 46905 + }, + { + "epoch": 2.19, + "learning_rate": 1.2806891038201685e-05, + "loss": 0.2776, + "step": 46910 + }, + { + "epoch": 2.19, + "learning_rate": 1.2806107253146898e-05, + "loss": 0.3282, + "step": 46915 + }, + { + "epoch": 2.19, + "learning_rate": 1.2805323468092113e-05, + "loss": 0.0975, + "step": 46920 + }, + { + "epoch": 2.19, + "learning_rate": 1.2804539683037325e-05, + "loss": 0.0326, + "step": 46925 + }, + { + "epoch": 2.19, + "learning_rate": 1.280375589798254e-05, + "loss": 0.0807, + "step": 46930 + }, + { + "epoch": 2.19, + "learning_rate": 1.2802972112927752e-05, + "loss": 0.1076, + "step": 46935 + }, + { + "epoch": 2.19, + "learning_rate": 1.2802188327872964e-05, + "loss": 0.123, + "step": 46940 + }, + { + "epoch": 2.19, + "learning_rate": 1.280140454281818e-05, + "loss": 0.1297, + "step": 46945 + }, + { + "epoch": 2.19, + "learning_rate": 1.2800620757763392e-05, + "loss": 0.13, + "step": 46950 + }, + { + "epoch": 2.19, + "learning_rate": 1.2799836972708605e-05, + "loss": 0.1437, + "step": 46955 + }, + { + "epoch": 2.19, + "learning_rate": 1.2799053187653818e-05, + "loss": 0.1865, + "step": 46960 + }, + { + "epoch": 2.19, + "learning_rate": 1.2798269402599033e-05, + "loss": 0.2441, + "step": 46965 + }, + { + "epoch": 2.19, + "learning_rate": 1.2797485617544245e-05, + "loss": 0.0758, + "step": 46970 + }, + { + "epoch": 2.19, + "learning_rate": 1.279670183248946e-05, + "loss": 0.0861, + "step": 46975 + }, + { + "epoch": 2.19, + "learning_rate": 1.2795918047434672e-05, + "loss": 0.0704, + "step": 46980 + }, + { + "epoch": 2.19, + "learning_rate": 1.2795134262379887e-05, + "loss": 0.0482, + "step": 46985 + }, + { + "epoch": 2.19, + "learning_rate": 1.27943504773251e-05, + "loss": 0.0407, + "step": 46990 + }, + { + "epoch": 2.19, + "learning_rate": 1.2793566692270313e-05, + "loss": 0.1309, + "step": 46995 + }, + { + "epoch": 2.19, + "learning_rate": 1.2792782907215526e-05, + "loss": 0.1658, + "step": 47000 + }, + { + "epoch": 2.19, + "learning_rate": 1.279199912216074e-05, + "loss": 0.1571, + "step": 47005 + }, + { + "epoch": 2.19, + "learning_rate": 1.2791215337105953e-05, + "loss": 0.2098, + "step": 47010 + }, + { + "epoch": 2.19, + "learning_rate": 1.2790431552051166e-05, + "loss": 0.2532, + "step": 47015 + }, + { + "epoch": 2.19, + "learning_rate": 1.2789647766996381e-05, + "loss": 0.0152, + "step": 47020 + }, + { + "epoch": 2.19, + "learning_rate": 1.2788863981941593e-05, + "loss": 0.0505, + "step": 47025 + }, + { + "epoch": 2.19, + "learning_rate": 1.2788080196886807e-05, + "loss": 0.0752, + "step": 47030 + }, + { + "epoch": 2.19, + "learning_rate": 1.278729641183202e-05, + "loss": 0.0727, + "step": 47035 + }, + { + "epoch": 2.19, + "learning_rate": 1.2786512626777235e-05, + "loss": 0.1866, + "step": 47040 + }, + { + "epoch": 2.2, + "learning_rate": 1.2785728841722447e-05, + "loss": 0.0927, + "step": 47045 + }, + { + "epoch": 2.2, + "learning_rate": 1.2784945056667661e-05, + "loss": 0.0914, + "step": 47050 + }, + { + "epoch": 2.2, + "learning_rate": 1.2784161271612873e-05, + "loss": 0.2837, + "step": 47055 + }, + { + "epoch": 2.2, + "learning_rate": 1.2783377486558089e-05, + "loss": 0.3236, + "step": 47060 + }, + { + "epoch": 2.2, + "learning_rate": 1.2782593701503301e-05, + "loss": 0.3262, + "step": 47065 + }, + { + "epoch": 2.2, + "learning_rate": 1.2781809916448513e-05, + "loss": 0.0784, + "step": 47070 + }, + { + "epoch": 2.2, + "learning_rate": 1.2781026131393727e-05, + "loss": 0.0751, + "step": 47075 + }, + { + "epoch": 2.2, + "learning_rate": 1.278024234633894e-05, + "loss": 0.0486, + "step": 47080 + }, + { + "epoch": 2.2, + "learning_rate": 1.2779458561284155e-05, + "loss": 0.1281, + "step": 47085 + }, + { + "epoch": 2.2, + "learning_rate": 1.2778674776229367e-05, + "loss": 0.1138, + "step": 47090 + }, + { + "epoch": 2.2, + "learning_rate": 1.2777890991174581e-05, + "loss": 0.1101, + "step": 47095 + }, + { + "epoch": 2.2, + "learning_rate": 1.2777107206119793e-05, + "loss": 0.1547, + "step": 47100 + }, + { + "epoch": 2.2, + "learning_rate": 1.2776323421065009e-05, + "loss": 0.1095, + "step": 47105 + }, + { + "epoch": 2.2, + "learning_rate": 1.2775539636010221e-05, + "loss": 0.3924, + "step": 47110 + }, + { + "epoch": 2.2, + "learning_rate": 1.2774755850955435e-05, + "loss": 0.314, + "step": 47115 + }, + { + "epoch": 2.2, + "learning_rate": 1.2773972065900649e-05, + "loss": 0.0539, + "step": 47120 + }, + { + "epoch": 2.2, + "learning_rate": 1.2773188280845863e-05, + "loss": 0.039, + "step": 47125 + }, + { + "epoch": 2.2, + "learning_rate": 1.2772404495791075e-05, + "loss": 0.0658, + "step": 47130 + }, + { + "epoch": 2.2, + "learning_rate": 1.2771620710736287e-05, + "loss": 0.0625, + "step": 47135 + }, + { + "epoch": 2.2, + "learning_rate": 1.2770836925681503e-05, + "loss": 0.115, + "step": 47140 + }, + { + "epoch": 2.2, + "learning_rate": 1.2770053140626715e-05, + "loss": 0.1128, + "step": 47145 + }, + { + "epoch": 2.2, + "learning_rate": 1.2769269355571929e-05, + "loss": 0.1195, + "step": 47150 + }, + { + "epoch": 2.2, + "learning_rate": 1.2768485570517141e-05, + "loss": 0.1494, + "step": 47155 + }, + { + "epoch": 2.2, + "learning_rate": 1.2767701785462357e-05, + "loss": 0.1993, + "step": 47160 + }, + { + "epoch": 2.2, + "learning_rate": 1.2766918000407569e-05, + "loss": 0.2361, + "step": 47165 + }, + { + "epoch": 2.2, + "learning_rate": 1.2766134215352783e-05, + "loss": 0.0823, + "step": 47170 + }, + { + "epoch": 2.2, + "learning_rate": 1.2765350430297995e-05, + "loss": 0.0557, + "step": 47175 + }, + { + "epoch": 2.2, + "learning_rate": 1.2764566645243211e-05, + "loss": 0.0295, + "step": 47180 + }, + { + "epoch": 2.2, + "learning_rate": 1.2763782860188423e-05, + "loss": 0.0804, + "step": 47185 + }, + { + "epoch": 2.2, + "learning_rate": 1.2762999075133637e-05, + "loss": 0.0986, + "step": 47190 + }, + { + "epoch": 2.2, + "learning_rate": 1.276221529007885e-05, + "loss": 0.1192, + "step": 47195 + }, + { + "epoch": 2.2, + "learning_rate": 1.2761431505024063e-05, + "loss": 0.1397, + "step": 47200 + }, + { + "epoch": 2.2, + "learning_rate": 1.2760647719969277e-05, + "loss": 0.2456, + "step": 47205 + }, + { + "epoch": 2.2, + "learning_rate": 1.275986393491449e-05, + "loss": 0.267, + "step": 47210 + }, + { + "epoch": 2.2, + "learning_rate": 1.2759080149859703e-05, + "loss": 0.3058, + "step": 47215 + }, + { + "epoch": 2.2, + "learning_rate": 1.2758296364804917e-05, + "loss": 0.0419, + "step": 47220 + }, + { + "epoch": 2.2, + "learning_rate": 1.2757512579750131e-05, + "loss": 0.0535, + "step": 47225 + }, + { + "epoch": 2.2, + "learning_rate": 1.2756728794695343e-05, + "loss": 0.0522, + "step": 47230 + }, + { + "epoch": 2.2, + "learning_rate": 1.2755945009640559e-05, + "loss": 0.0544, + "step": 47235 + }, + { + "epoch": 2.2, + "learning_rate": 1.2755161224585771e-05, + "loss": 0.1465, + "step": 47240 + }, + { + "epoch": 2.2, + "learning_rate": 1.2754377439530985e-05, + "loss": 0.1576, + "step": 47245 + }, + { + "epoch": 2.2, + "learning_rate": 1.2753593654476197e-05, + "loss": 0.1764, + "step": 47250 + }, + { + "epoch": 2.2, + "learning_rate": 1.2752809869421413e-05, + "loss": 0.2072, + "step": 47255 + }, + { + "epoch": 2.21, + "learning_rate": 1.2752026084366625e-05, + "loss": 0.2698, + "step": 47260 + }, + { + "epoch": 2.21, + "learning_rate": 1.2751242299311837e-05, + "loss": 0.2404, + "step": 47265 + }, + { + "epoch": 2.21, + "learning_rate": 1.2750458514257051e-05, + "loss": 0.1002, + "step": 47270 + }, + { + "epoch": 2.21, + "learning_rate": 1.2749674729202263e-05, + "loss": 0.0465, + "step": 47275 + }, + { + "epoch": 2.21, + "learning_rate": 1.2748890944147479e-05, + "loss": 0.0432, + "step": 47280 + }, + { + "epoch": 2.21, + "learning_rate": 1.2748107159092691e-05, + "loss": 0.0784, + "step": 47285 + }, + { + "epoch": 2.21, + "learning_rate": 1.2747323374037905e-05, + "loss": 0.0798, + "step": 47290 + }, + { + "epoch": 2.21, + "learning_rate": 1.2746539588983117e-05, + "loss": 0.0903, + "step": 47295 + }, + { + "epoch": 2.21, + "learning_rate": 1.2745755803928333e-05, + "loss": 0.197, + "step": 47300 + }, + { + "epoch": 2.21, + "learning_rate": 1.2744972018873545e-05, + "loss": 0.2127, + "step": 47305 + }, + { + "epoch": 2.21, + "learning_rate": 1.2744188233818759e-05, + "loss": 0.2387, + "step": 47310 + }, + { + "epoch": 2.21, + "learning_rate": 1.2743404448763971e-05, + "loss": 0.2397, + "step": 47315 + }, + { + "epoch": 2.21, + "learning_rate": 1.2742620663709187e-05, + "loss": 0.0321, + "step": 47320 + }, + { + "epoch": 2.21, + "learning_rate": 1.2741836878654399e-05, + "loss": 0.0406, + "step": 47325 + }, + { + "epoch": 2.21, + "learning_rate": 1.2741053093599611e-05, + "loss": 0.0561, + "step": 47330 + }, + { + "epoch": 2.21, + "learning_rate": 1.2740269308544827e-05, + "loss": 0.1, + "step": 47335 + }, + { + "epoch": 2.21, + "learning_rate": 1.2739485523490039e-05, + "loss": 0.0816, + "step": 47340 + }, + { + "epoch": 2.21, + "learning_rate": 1.2738701738435253e-05, + "loss": 0.1594, + "step": 47345 + }, + { + "epoch": 2.21, + "learning_rate": 1.2737917953380465e-05, + "loss": 0.1174, + "step": 47350 + }, + { + "epoch": 2.21, + "learning_rate": 1.273713416832568e-05, + "loss": 0.2226, + "step": 47355 + }, + { + "epoch": 2.21, + "learning_rate": 1.2736350383270893e-05, + "loss": 0.2778, + "step": 47360 + }, + { + "epoch": 2.21, + "learning_rate": 1.2735566598216107e-05, + "loss": 0.3245, + "step": 47365 + }, + { + "epoch": 2.21, + "learning_rate": 1.2734782813161319e-05, + "loss": 0.0526, + "step": 47370 + }, + { + "epoch": 2.21, + "learning_rate": 1.2733999028106535e-05, + "loss": 0.0418, + "step": 47375 + }, + { + "epoch": 2.21, + "learning_rate": 1.2733215243051747e-05, + "loss": 0.1414, + "step": 47380 + }, + { + "epoch": 2.21, + "learning_rate": 1.273243145799696e-05, + "loss": 0.0651, + "step": 47385 + }, + { + "epoch": 2.21, + "learning_rate": 1.2731647672942173e-05, + "loss": 0.1081, + "step": 47390 + }, + { + "epoch": 2.21, + "learning_rate": 1.2730863887887385e-05, + "loss": 0.1513, + "step": 47395 + }, + { + "epoch": 2.21, + "learning_rate": 1.27300801028326e-05, + "loss": 0.1419, + "step": 47400 + }, + { + "epoch": 2.21, + "learning_rate": 1.2729296317777813e-05, + "loss": 0.3379, + "step": 47405 + }, + { + "epoch": 2.21, + "learning_rate": 1.2728512532723027e-05, + "loss": 0.2274, + "step": 47410 + }, + { + "epoch": 2.21, + "learning_rate": 1.2727728747668239e-05, + "loss": 0.3456, + "step": 47415 + }, + { + "epoch": 2.21, + "learning_rate": 1.2726944962613455e-05, + "loss": 0.0683, + "step": 47420 + }, + { + "epoch": 2.21, + "learning_rate": 1.2726161177558667e-05, + "loss": 0.0611, + "step": 47425 + }, + { + "epoch": 2.21, + "learning_rate": 1.272537739250388e-05, + "loss": 0.0673, + "step": 47430 + }, + { + "epoch": 2.21, + "learning_rate": 1.2724593607449095e-05, + "loss": 0.1342, + "step": 47435 + }, + { + "epoch": 2.21, + "learning_rate": 1.2723809822394309e-05, + "loss": 0.0678, + "step": 47440 + }, + { + "epoch": 2.21, + "learning_rate": 1.272302603733952e-05, + "loss": 0.254, + "step": 47445 + }, + { + "epoch": 2.21, + "learning_rate": 1.2722242252284736e-05, + "loss": 0.1418, + "step": 47450 + }, + { + "epoch": 2.21, + "learning_rate": 1.2721458467229949e-05, + "loss": 0.1771, + "step": 47455 + }, + { + "epoch": 2.21, + "learning_rate": 1.272067468217516e-05, + "loss": 0.2067, + "step": 47460 + }, + { + "epoch": 2.21, + "learning_rate": 1.2719890897120375e-05, + "loss": 0.302, + "step": 47465 + }, + { + "epoch": 2.22, + "learning_rate": 1.2719107112065587e-05, + "loss": 0.0339, + "step": 47470 + }, + { + "epoch": 2.22, + "learning_rate": 1.2718323327010803e-05, + "loss": 0.068, + "step": 47475 + }, + { + "epoch": 2.22, + "learning_rate": 1.2717539541956015e-05, + "loss": 0.101, + "step": 47480 + }, + { + "epoch": 2.22, + "learning_rate": 1.2716755756901229e-05, + "loss": 0.0721, + "step": 47485 + }, + { + "epoch": 2.22, + "learning_rate": 1.271597197184644e-05, + "loss": 0.0679, + "step": 47490 + }, + { + "epoch": 2.22, + "learning_rate": 1.2715188186791656e-05, + "loss": 0.155, + "step": 47495 + }, + { + "epoch": 2.22, + "learning_rate": 1.2714404401736869e-05, + "loss": 0.0995, + "step": 47500 + }, + { + "epoch": 2.22, + "learning_rate": 1.2713620616682083e-05, + "loss": 0.2467, + "step": 47505 + }, + { + "epoch": 2.22, + "learning_rate": 1.2712836831627295e-05, + "loss": 0.2413, + "step": 47510 + }, + { + "epoch": 2.22, + "learning_rate": 1.271205304657251e-05, + "loss": 0.2587, + "step": 47515 + }, + { + "epoch": 2.22, + "learning_rate": 1.2711269261517723e-05, + "loss": 0.0649, + "step": 47520 + }, + { + "epoch": 2.22, + "learning_rate": 1.2710485476462935e-05, + "loss": 0.0333, + "step": 47525 + }, + { + "epoch": 2.22, + "learning_rate": 1.2709701691408149e-05, + "loss": 0.0575, + "step": 47530 + }, + { + "epoch": 2.22, + "learning_rate": 1.2708917906353363e-05, + "loss": 0.1551, + "step": 47535 + }, + { + "epoch": 2.22, + "learning_rate": 1.2708134121298577e-05, + "loss": 0.2555, + "step": 47540 + }, + { + "epoch": 2.22, + "learning_rate": 1.2707350336243789e-05, + "loss": 0.0763, + "step": 47545 + }, + { + "epoch": 2.22, + "learning_rate": 1.2706566551189004e-05, + "loss": 0.1238, + "step": 47550 + }, + { + "epoch": 2.22, + "learning_rate": 1.2705782766134217e-05, + "loss": 0.1277, + "step": 47555 + }, + { + "epoch": 2.22, + "learning_rate": 1.270499898107943e-05, + "loss": 0.2487, + "step": 47560 + }, + { + "epoch": 2.22, + "learning_rate": 1.2704215196024643e-05, + "loss": 0.2534, + "step": 47565 + }, + { + "epoch": 2.22, + "learning_rate": 1.2703431410969858e-05, + "loss": 0.0228, + "step": 47570 + }, + { + "epoch": 2.22, + "learning_rate": 1.270264762591507e-05, + "loss": 0.0238, + "step": 47575 + }, + { + "epoch": 2.22, + "learning_rate": 1.2701863840860284e-05, + "loss": 0.0532, + "step": 47580 + }, + { + "epoch": 2.22, + "learning_rate": 1.2701080055805497e-05, + "loss": 0.1343, + "step": 47585 + }, + { + "epoch": 2.22, + "learning_rate": 1.2700296270750709e-05, + "loss": 0.1061, + "step": 47590 + }, + { + "epoch": 2.22, + "learning_rate": 1.2699512485695924e-05, + "loss": 0.1548, + "step": 47595 + }, + { + "epoch": 2.22, + "learning_rate": 1.2698728700641137e-05, + "loss": 0.2443, + "step": 47600 + }, + { + "epoch": 2.22, + "learning_rate": 1.269794491558635e-05, + "loss": 0.1995, + "step": 47605 + }, + { + "epoch": 2.22, + "learning_rate": 1.2697161130531563e-05, + "loss": 0.4085, + "step": 47610 + }, + { + "epoch": 2.22, + "learning_rate": 1.2696377345476778e-05, + "loss": 0.2198, + "step": 47615 + }, + { + "epoch": 2.22, + "learning_rate": 1.269559356042199e-05, + "loss": 0.0253, + "step": 47620 + }, + { + "epoch": 2.22, + "learning_rate": 1.2694809775367204e-05, + "loss": 0.0511, + "step": 47625 + }, + { + "epoch": 2.22, + "learning_rate": 1.2694025990312417e-05, + "loss": 0.0841, + "step": 47630 + }, + { + "epoch": 2.22, + "learning_rate": 1.2693242205257632e-05, + "loss": 0.0448, + "step": 47635 + }, + { + "epoch": 2.22, + "learning_rate": 1.2692458420202844e-05, + "loss": 0.0793, + "step": 47640 + }, + { + "epoch": 2.22, + "learning_rate": 1.2691674635148058e-05, + "loss": 0.0846, + "step": 47645 + }, + { + "epoch": 2.22, + "learning_rate": 1.2690890850093272e-05, + "loss": 0.1674, + "step": 47650 + }, + { + "epoch": 2.22, + "learning_rate": 1.2690107065038484e-05, + "loss": 0.129, + "step": 47655 + }, + { + "epoch": 2.22, + "learning_rate": 1.2689323279983698e-05, + "loss": 0.2383, + "step": 47660 + }, + { + "epoch": 2.22, + "learning_rate": 1.268853949492891e-05, + "loss": 0.3317, + "step": 47665 + }, + { + "epoch": 2.22, + "learning_rate": 1.2687755709874126e-05, + "loss": 0.0892, + "step": 47670 + }, + { + "epoch": 2.22, + "learning_rate": 1.2686971924819338e-05, + "loss": 0.0223, + "step": 47675 + }, + { + "epoch": 2.22, + "learning_rate": 1.2686188139764552e-05, + "loss": 0.077, + "step": 47680 + }, + { + "epoch": 2.23, + "learning_rate": 1.2685404354709765e-05, + "loss": 0.0558, + "step": 47685 + }, + { + "epoch": 2.23, + "learning_rate": 1.268462056965498e-05, + "loss": 0.0593, + "step": 47690 + }, + { + "epoch": 2.23, + "learning_rate": 1.2683836784600192e-05, + "loss": 0.0725, + "step": 47695 + }, + { + "epoch": 2.23, + "learning_rate": 1.2683052999545406e-05, + "loss": 0.2018, + "step": 47700 + }, + { + "epoch": 2.23, + "learning_rate": 1.2682269214490618e-05, + "loss": 0.1803, + "step": 47705 + }, + { + "epoch": 2.23, + "learning_rate": 1.2681485429435834e-05, + "loss": 0.2223, + "step": 47710 + }, + { + "epoch": 2.23, + "learning_rate": 1.2680701644381046e-05, + "loss": 0.4095, + "step": 47715 + }, + { + "epoch": 2.23, + "learning_rate": 1.2679917859326258e-05, + "loss": 0.0528, + "step": 47720 + }, + { + "epoch": 2.23, + "learning_rate": 1.2679134074271472e-05, + "loss": 0.0447, + "step": 47725 + }, + { + "epoch": 2.23, + "learning_rate": 1.2678350289216685e-05, + "loss": 0.0256, + "step": 47730 + }, + { + "epoch": 2.23, + "learning_rate": 1.26775665041619e-05, + "loss": 0.0814, + "step": 47735 + }, + { + "epoch": 2.23, + "learning_rate": 1.2676782719107112e-05, + "loss": 0.1283, + "step": 47740 + }, + { + "epoch": 2.23, + "learning_rate": 1.2675998934052326e-05, + "loss": 0.0823, + "step": 47745 + }, + { + "epoch": 2.23, + "learning_rate": 1.267521514899754e-05, + "loss": 0.0804, + "step": 47750 + }, + { + "epoch": 2.23, + "learning_rate": 1.2674431363942754e-05, + "loss": 0.1365, + "step": 47755 + }, + { + "epoch": 2.23, + "learning_rate": 1.2673647578887966e-05, + "loss": 0.2962, + "step": 47760 + }, + { + "epoch": 2.23, + "learning_rate": 1.2672863793833182e-05, + "loss": 0.3267, + "step": 47765 + }, + { + "epoch": 2.23, + "learning_rate": 1.2672080008778394e-05, + "loss": 0.0293, + "step": 47770 + }, + { + "epoch": 2.23, + "learning_rate": 1.2671296223723608e-05, + "loss": 0.0746, + "step": 47775 + }, + { + "epoch": 2.23, + "learning_rate": 1.267051243866882e-05, + "loss": 0.0665, + "step": 47780 + }, + { + "epoch": 2.23, + "learning_rate": 1.2669728653614032e-05, + "loss": 0.0971, + "step": 47785 + }, + { + "epoch": 2.23, + "learning_rate": 1.2668944868559248e-05, + "loss": 0.0418, + "step": 47790 + }, + { + "epoch": 2.23, + "learning_rate": 1.266816108350446e-05, + "loss": 0.1123, + "step": 47795 + }, + { + "epoch": 2.23, + "learning_rate": 1.2667377298449674e-05, + "loss": 0.0995, + "step": 47800 + }, + { + "epoch": 2.23, + "learning_rate": 1.2666593513394886e-05, + "loss": 0.1129, + "step": 47805 + }, + { + "epoch": 2.23, + "learning_rate": 1.2665809728340102e-05, + "loss": 0.2582, + "step": 47810 + }, + { + "epoch": 2.23, + "learning_rate": 1.2665025943285314e-05, + "loss": 0.3506, + "step": 47815 + }, + { + "epoch": 2.23, + "learning_rate": 1.2664242158230528e-05, + "loss": 0.0258, + "step": 47820 + }, + { + "epoch": 2.23, + "learning_rate": 1.266345837317574e-05, + "loss": 0.0343, + "step": 47825 + }, + { + "epoch": 2.23, + "learning_rate": 1.2662674588120956e-05, + "loss": 0.0366, + "step": 47830 + }, + { + "epoch": 2.23, + "learning_rate": 1.2661890803066168e-05, + "loss": 0.1132, + "step": 47835 + }, + { + "epoch": 2.23, + "learning_rate": 1.2661107018011382e-05, + "loss": 0.0819, + "step": 47840 + }, + { + "epoch": 2.23, + "learning_rate": 1.2660323232956594e-05, + "loss": 0.1235, + "step": 47845 + }, + { + "epoch": 2.23, + "learning_rate": 1.2659539447901808e-05, + "loss": 0.1482, + "step": 47850 + }, + { + "epoch": 2.23, + "learning_rate": 1.2658755662847022e-05, + "loss": 0.1346, + "step": 47855 + }, + { + "epoch": 2.23, + "learning_rate": 1.2657971877792234e-05, + "loss": 0.1981, + "step": 47860 + }, + { + "epoch": 2.23, + "learning_rate": 1.265718809273745e-05, + "loss": 0.2022, + "step": 47865 + }, + { + "epoch": 2.23, + "learning_rate": 1.2656404307682662e-05, + "loss": 0.0667, + "step": 47870 + }, + { + "epoch": 2.23, + "learning_rate": 1.2655620522627876e-05, + "loss": 0.0178, + "step": 47875 + }, + { + "epoch": 2.23, + "learning_rate": 1.2654836737573088e-05, + "loss": 0.1099, + "step": 47880 + }, + { + "epoch": 2.23, + "learning_rate": 1.2654052952518304e-05, + "loss": 0.0823, + "step": 47885 + }, + { + "epoch": 2.23, + "learning_rate": 1.2653269167463516e-05, + "loss": 0.0406, + "step": 47890 + }, + { + "epoch": 2.23, + "learning_rate": 1.265248538240873e-05, + "loss": 0.1551, + "step": 47895 + }, + { + "epoch": 2.24, + "learning_rate": 1.2651701597353942e-05, + "loss": 0.1111, + "step": 47900 + }, + { + "epoch": 2.24, + "learning_rate": 1.2650917812299158e-05, + "loss": 0.1663, + "step": 47905 + }, + { + "epoch": 2.24, + "learning_rate": 1.265013402724437e-05, + "loss": 0.2386, + "step": 47910 + }, + { + "epoch": 2.24, + "learning_rate": 1.2649350242189582e-05, + "loss": 0.2496, + "step": 47915 + }, + { + "epoch": 2.24, + "learning_rate": 1.2648566457134796e-05, + "loss": 0.0195, + "step": 47920 + }, + { + "epoch": 2.24, + "learning_rate": 1.2647782672080008e-05, + "loss": 0.0671, + "step": 47925 + }, + { + "epoch": 2.24, + "learning_rate": 1.2646998887025224e-05, + "loss": 0.0558, + "step": 47930 + }, + { + "epoch": 2.24, + "learning_rate": 1.2646215101970436e-05, + "loss": 0.0917, + "step": 47935 + }, + { + "epoch": 2.24, + "learning_rate": 1.264543131691565e-05, + "loss": 0.1752, + "step": 47940 + }, + { + "epoch": 2.24, + "learning_rate": 1.2644647531860862e-05, + "loss": 0.1103, + "step": 47945 + }, + { + "epoch": 2.24, + "learning_rate": 1.2643863746806078e-05, + "loss": 0.1002, + "step": 47950 + }, + { + "epoch": 2.24, + "learning_rate": 1.264307996175129e-05, + "loss": 0.1335, + "step": 47955 + }, + { + "epoch": 2.24, + "learning_rate": 1.2642296176696504e-05, + "loss": 0.3335, + "step": 47960 + }, + { + "epoch": 2.24, + "learning_rate": 1.2641512391641718e-05, + "loss": 0.3093, + "step": 47965 + }, + { + "epoch": 2.24, + "learning_rate": 1.2640728606586932e-05, + "loss": 0.0686, + "step": 47970 + }, + { + "epoch": 2.24, + "learning_rate": 1.2639944821532144e-05, + "loss": 0.0387, + "step": 47975 + }, + { + "epoch": 2.24, + "learning_rate": 1.2639161036477356e-05, + "loss": 0.0357, + "step": 47980 + }, + { + "epoch": 2.24, + "learning_rate": 1.2638377251422572e-05, + "loss": 0.0849, + "step": 47985 + }, + { + "epoch": 2.24, + "learning_rate": 1.2637593466367784e-05, + "loss": 0.1276, + "step": 47990 + }, + { + "epoch": 2.24, + "learning_rate": 1.2636809681312998e-05, + "loss": 0.1694, + "step": 47995 + }, + { + "epoch": 2.24, + "learning_rate": 1.263602589625821e-05, + "loss": 0.1722, + "step": 48000 + }, + { + "epoch": 2.24, + "learning_rate": 1.2635242111203426e-05, + "loss": 0.1639, + "step": 48005 + }, + { + "epoch": 2.24, + "learning_rate": 1.2634458326148638e-05, + "loss": 0.326, + "step": 48010 + }, + { + "epoch": 2.24, + "learning_rate": 1.2633674541093852e-05, + "loss": 0.3992, + "step": 48015 + }, + { + "epoch": 2.24, + "learning_rate": 1.2632890756039064e-05, + "loss": 0.0436, + "step": 48020 + }, + { + "epoch": 2.24, + "learning_rate": 1.263210697098428e-05, + "loss": 0.0531, + "step": 48025 + }, + { + "epoch": 2.24, + "learning_rate": 1.2631323185929492e-05, + "loss": 0.0581, + "step": 48030 + }, + { + "epoch": 2.24, + "learning_rate": 1.2630539400874706e-05, + "loss": 0.0561, + "step": 48035 + }, + { + "epoch": 2.24, + "learning_rate": 1.2629755615819918e-05, + "loss": 0.1014, + "step": 48040 + }, + { + "epoch": 2.24, + "learning_rate": 1.262897183076513e-05, + "loss": 0.1809, + "step": 48045 + }, + { + "epoch": 2.24, + "learning_rate": 1.2628188045710346e-05, + "loss": 0.1019, + "step": 48050 + }, + { + "epoch": 2.24, + "learning_rate": 1.2627404260655558e-05, + "loss": 0.1508, + "step": 48055 + }, + { + "epoch": 2.24, + "learning_rate": 1.2626620475600772e-05, + "loss": 0.309, + "step": 48060 + }, + { + "epoch": 2.24, + "learning_rate": 1.2625836690545986e-05, + "loss": 0.2341, + "step": 48065 + }, + { + "epoch": 2.24, + "learning_rate": 1.26250529054912e-05, + "loss": 0.0517, + "step": 48070 + }, + { + "epoch": 2.24, + "learning_rate": 1.2624269120436412e-05, + "loss": 0.039, + "step": 48075 + }, + { + "epoch": 2.24, + "learning_rate": 1.2623485335381628e-05, + "loss": 0.0392, + "step": 48080 + }, + { + "epoch": 2.24, + "learning_rate": 1.262270155032684e-05, + "loss": 0.0337, + "step": 48085 + }, + { + "epoch": 2.24, + "learning_rate": 1.2621917765272054e-05, + "loss": 0.0574, + "step": 48090 + }, + { + "epoch": 2.24, + "learning_rate": 1.2621133980217266e-05, + "loss": 0.0952, + "step": 48095 + }, + { + "epoch": 2.24, + "learning_rate": 1.2620350195162481e-05, + "loss": 0.1344, + "step": 48100 + }, + { + "epoch": 2.24, + "learning_rate": 1.2619566410107694e-05, + "loss": 0.235, + "step": 48105 + }, + { + "epoch": 2.24, + "learning_rate": 1.2618782625052906e-05, + "loss": 0.2454, + "step": 48110 + }, + { + "epoch": 2.25, + "learning_rate": 1.261799883999812e-05, + "loss": 0.26, + "step": 48115 + }, + { + "epoch": 2.25, + "learning_rate": 1.2617215054943332e-05, + "loss": 0.0369, + "step": 48120 + }, + { + "epoch": 2.25, + "learning_rate": 1.2616431269888548e-05, + "loss": 0.0334, + "step": 48125 + }, + { + "epoch": 2.25, + "learning_rate": 1.261564748483376e-05, + "loss": 0.0459, + "step": 48130 + }, + { + "epoch": 2.25, + "learning_rate": 1.2614863699778974e-05, + "loss": 0.0905, + "step": 48135 + }, + { + "epoch": 2.25, + "learning_rate": 1.2614079914724186e-05, + "loss": 0.1017, + "step": 48140 + }, + { + "epoch": 2.25, + "learning_rate": 1.2613296129669402e-05, + "loss": 0.1289, + "step": 48145 + }, + { + "epoch": 2.25, + "learning_rate": 1.2612512344614614e-05, + "loss": 0.1654, + "step": 48150 + }, + { + "epoch": 2.25, + "learning_rate": 1.2611728559559828e-05, + "loss": 0.1671, + "step": 48155 + }, + { + "epoch": 2.25, + "learning_rate": 1.261094477450504e-05, + "loss": 0.2027, + "step": 48160 + }, + { + "epoch": 2.25, + "learning_rate": 1.2610160989450255e-05, + "loss": 0.2967, + "step": 48165 + }, + { + "epoch": 2.25, + "learning_rate": 1.2609377204395468e-05, + "loss": 0.0546, + "step": 48170 + }, + { + "epoch": 2.25, + "learning_rate": 1.260859341934068e-05, + "loss": 0.0423, + "step": 48175 + }, + { + "epoch": 2.25, + "learning_rate": 1.2607809634285895e-05, + "loss": 0.0787, + "step": 48180 + }, + { + "epoch": 2.25, + "learning_rate": 1.2607025849231108e-05, + "loss": 0.1554, + "step": 48185 + }, + { + "epoch": 2.25, + "learning_rate": 1.2606242064176322e-05, + "loss": 0.0672, + "step": 48190 + }, + { + "epoch": 2.25, + "learning_rate": 1.2605458279121534e-05, + "loss": 0.1474, + "step": 48195 + }, + { + "epoch": 2.25, + "learning_rate": 1.260467449406675e-05, + "loss": 0.0994, + "step": 48200 + }, + { + "epoch": 2.25, + "learning_rate": 1.2603890709011962e-05, + "loss": 0.1914, + "step": 48205 + }, + { + "epoch": 2.25, + "learning_rate": 1.2603106923957176e-05, + "loss": 0.2273, + "step": 48210 + }, + { + "epoch": 2.25, + "learning_rate": 1.2602323138902388e-05, + "loss": 0.3731, + "step": 48215 + }, + { + "epoch": 2.25, + "learning_rate": 1.2601539353847603e-05, + "loss": 0.0643, + "step": 48220 + }, + { + "epoch": 2.25, + "learning_rate": 1.2600755568792816e-05, + "loss": 0.0238, + "step": 48225 + }, + { + "epoch": 2.25, + "learning_rate": 1.259997178373803e-05, + "loss": 0.0542, + "step": 48230 + }, + { + "epoch": 2.25, + "learning_rate": 1.2599187998683242e-05, + "loss": 0.0509, + "step": 48235 + }, + { + "epoch": 2.25, + "learning_rate": 1.2598404213628454e-05, + "loss": 0.1615, + "step": 48240 + }, + { + "epoch": 2.25, + "learning_rate": 1.259762042857367e-05, + "loss": 0.0994, + "step": 48245 + }, + { + "epoch": 2.25, + "learning_rate": 1.2596836643518882e-05, + "loss": 0.1109, + "step": 48250 + }, + { + "epoch": 2.25, + "learning_rate": 1.2596052858464096e-05, + "loss": 0.1553, + "step": 48255 + }, + { + "epoch": 2.25, + "learning_rate": 1.2595269073409308e-05, + "loss": 0.3124, + "step": 48260 + }, + { + "epoch": 2.25, + "learning_rate": 1.2594485288354523e-05, + "loss": 0.2899, + "step": 48265 + }, + { + "epoch": 2.25, + "learning_rate": 1.2593701503299736e-05, + "loss": 0.0501, + "step": 48270 + }, + { + "epoch": 2.25, + "learning_rate": 1.259291771824495e-05, + "loss": 0.0855, + "step": 48275 + }, + { + "epoch": 2.25, + "learning_rate": 1.2592133933190163e-05, + "loss": 0.0823, + "step": 48280 + }, + { + "epoch": 2.25, + "learning_rate": 1.2591350148135377e-05, + "loss": 0.145, + "step": 48285 + }, + { + "epoch": 2.25, + "learning_rate": 1.259056636308059e-05, + "loss": 0.0703, + "step": 48290 + }, + { + "epoch": 2.25, + "learning_rate": 1.2589782578025805e-05, + "loss": 0.143, + "step": 48295 + }, + { + "epoch": 2.25, + "learning_rate": 1.2588998792971017e-05, + "loss": 0.1223, + "step": 48300 + }, + { + "epoch": 2.25, + "learning_rate": 1.258821500791623e-05, + "loss": 0.1272, + "step": 48305 + }, + { + "epoch": 2.25, + "learning_rate": 1.2587431222861443e-05, + "loss": 0.1794, + "step": 48310 + }, + { + "epoch": 2.25, + "learning_rate": 1.2586647437806656e-05, + "loss": 0.2009, + "step": 48315 + }, + { + "epoch": 2.25, + "learning_rate": 1.2585863652751871e-05, + "loss": 0.0941, + "step": 48320 + }, + { + "epoch": 2.25, + "learning_rate": 1.2585079867697083e-05, + "loss": 0.0456, + "step": 48325 + }, + { + "epoch": 2.26, + "learning_rate": 1.2584296082642297e-05, + "loss": 0.0458, + "step": 48330 + }, + { + "epoch": 2.26, + "learning_rate": 1.258351229758751e-05, + "loss": 0.0858, + "step": 48335 + }, + { + "epoch": 2.26, + "learning_rate": 1.2582728512532725e-05, + "loss": 0.1233, + "step": 48340 + }, + { + "epoch": 2.26, + "learning_rate": 1.2581944727477937e-05, + "loss": 0.0849, + "step": 48345 + }, + { + "epoch": 2.26, + "learning_rate": 1.2581160942423151e-05, + "loss": 0.1166, + "step": 48350 + }, + { + "epoch": 2.26, + "learning_rate": 1.2580377157368364e-05, + "loss": 0.2759, + "step": 48355 + }, + { + "epoch": 2.26, + "learning_rate": 1.2579593372313579e-05, + "loss": 0.2935, + "step": 48360 + }, + { + "epoch": 2.26, + "learning_rate": 1.2578809587258791e-05, + "loss": 0.3687, + "step": 48365 + }, + { + "epoch": 2.26, + "learning_rate": 1.2578025802204004e-05, + "loss": 0.0169, + "step": 48370 + }, + { + "epoch": 2.26, + "learning_rate": 1.2577242017149217e-05, + "loss": 0.0272, + "step": 48375 + }, + { + "epoch": 2.26, + "learning_rate": 1.2576458232094431e-05, + "loss": 0.0452, + "step": 48380 + }, + { + "epoch": 2.26, + "learning_rate": 1.2575674447039645e-05, + "loss": 0.0606, + "step": 48385 + }, + { + "epoch": 2.26, + "learning_rate": 1.2574890661984857e-05, + "loss": 0.1621, + "step": 48390 + }, + { + "epoch": 2.26, + "learning_rate": 1.2574106876930073e-05, + "loss": 0.0805, + "step": 48395 + }, + { + "epoch": 2.26, + "learning_rate": 1.2573323091875285e-05, + "loss": 0.1328, + "step": 48400 + }, + { + "epoch": 2.26, + "learning_rate": 1.25725393068205e-05, + "loss": 0.1362, + "step": 48405 + }, + { + "epoch": 2.26, + "learning_rate": 1.2571755521765711e-05, + "loss": 0.1753, + "step": 48410 + }, + { + "epoch": 2.26, + "learning_rate": 1.2570971736710927e-05, + "loss": 0.2823, + "step": 48415 + }, + { + "epoch": 2.26, + "learning_rate": 1.257018795165614e-05, + "loss": 0.0633, + "step": 48420 + }, + { + "epoch": 2.26, + "learning_rate": 1.2569404166601353e-05, + "loss": 0.0347, + "step": 48425 + }, + { + "epoch": 2.26, + "learning_rate": 1.2568620381546565e-05, + "loss": 0.0578, + "step": 48430 + }, + { + "epoch": 2.26, + "learning_rate": 1.2567836596491778e-05, + "loss": 0.0631, + "step": 48435 + }, + { + "epoch": 2.26, + "learning_rate": 1.2567052811436993e-05, + "loss": 0.0828, + "step": 48440 + }, + { + "epoch": 2.26, + "learning_rate": 1.2566269026382205e-05, + "loss": 0.1166, + "step": 48445 + }, + { + "epoch": 2.26, + "learning_rate": 1.256548524132742e-05, + "loss": 0.1153, + "step": 48450 + }, + { + "epoch": 2.26, + "learning_rate": 1.2564701456272631e-05, + "loss": 0.1634, + "step": 48455 + }, + { + "epoch": 2.26, + "learning_rate": 1.2563917671217847e-05, + "loss": 0.2301, + "step": 48460 + }, + { + "epoch": 2.26, + "learning_rate": 1.256313388616306e-05, + "loss": 0.2951, + "step": 48465 + }, + { + "epoch": 2.26, + "learning_rate": 1.2562350101108273e-05, + "loss": 0.046, + "step": 48470 + }, + { + "epoch": 2.26, + "learning_rate": 1.2561566316053485e-05, + "loss": 0.0414, + "step": 48475 + }, + { + "epoch": 2.26, + "learning_rate": 1.2560782530998701e-05, + "loss": 0.0748, + "step": 48480 + }, + { + "epoch": 2.26, + "learning_rate": 1.2559998745943913e-05, + "loss": 0.0706, + "step": 48485 + }, + { + "epoch": 2.26, + "learning_rate": 1.2559214960889127e-05, + "loss": 0.1279, + "step": 48490 + }, + { + "epoch": 2.26, + "learning_rate": 1.2558431175834341e-05, + "loss": 0.0743, + "step": 48495 + }, + { + "epoch": 2.26, + "learning_rate": 1.2557647390779553e-05, + "loss": 0.1205, + "step": 48500 + }, + { + "epoch": 2.26, + "learning_rate": 1.2556863605724767e-05, + "loss": 0.1911, + "step": 48505 + }, + { + "epoch": 2.26, + "learning_rate": 1.255607982066998e-05, + "loss": 0.1648, + "step": 48510 + }, + { + "epoch": 2.26, + "learning_rate": 1.2555296035615195e-05, + "loss": 0.2687, + "step": 48515 + }, + { + "epoch": 2.26, + "learning_rate": 1.2554512250560407e-05, + "loss": 0.0847, + "step": 48520 + }, + { + "epoch": 2.26, + "learning_rate": 1.2553728465505621e-05, + "loss": 0.1152, + "step": 48525 + }, + { + "epoch": 2.26, + "learning_rate": 1.2552944680450833e-05, + "loss": 0.0977, + "step": 48530 + }, + { + "epoch": 2.26, + "learning_rate": 1.2552160895396049e-05, + "loss": 0.0585, + "step": 48535 + }, + { + "epoch": 2.26, + "learning_rate": 1.2551377110341261e-05, + "loss": 0.1386, + "step": 48540 + }, + { + "epoch": 2.27, + "learning_rate": 1.2550593325286475e-05, + "loss": 0.1517, + "step": 48545 + }, + { + "epoch": 2.27, + "learning_rate": 1.2549809540231687e-05, + "loss": 0.0852, + "step": 48550 + }, + { + "epoch": 2.27, + "learning_rate": 1.2549025755176903e-05, + "loss": 0.1604, + "step": 48555 + }, + { + "epoch": 2.27, + "learning_rate": 1.2548241970122115e-05, + "loss": 0.3075, + "step": 48560 + }, + { + "epoch": 2.27, + "learning_rate": 1.2547458185067327e-05, + "loss": 0.3901, + "step": 48565 + }, + { + "epoch": 2.27, + "learning_rate": 1.2546674400012541e-05, + "loss": 0.0896, + "step": 48570 + }, + { + "epoch": 2.27, + "learning_rate": 1.2545890614957753e-05, + "loss": 0.0526, + "step": 48575 + }, + { + "epoch": 2.27, + "learning_rate": 1.2545106829902969e-05, + "loss": 0.0586, + "step": 48580 + }, + { + "epoch": 2.27, + "learning_rate": 1.2544323044848181e-05, + "loss": 0.0928, + "step": 48585 + }, + { + "epoch": 2.27, + "learning_rate": 1.2543539259793395e-05, + "loss": 0.1047, + "step": 48590 + }, + { + "epoch": 2.27, + "learning_rate": 1.2542755474738609e-05, + "loss": 0.144, + "step": 48595 + }, + { + "epoch": 2.27, + "learning_rate": 1.2541971689683823e-05, + "loss": 0.1413, + "step": 48600 + }, + { + "epoch": 2.27, + "learning_rate": 1.2541187904629035e-05, + "loss": 0.1345, + "step": 48605 + }, + { + "epoch": 2.27, + "learning_rate": 1.254040411957425e-05, + "loss": 0.2603, + "step": 48610 + }, + { + "epoch": 2.27, + "learning_rate": 1.2539620334519463e-05, + "loss": 0.3291, + "step": 48615 + }, + { + "epoch": 2.27, + "learning_rate": 1.2538836549464677e-05, + "loss": 0.0751, + "step": 48620 + }, + { + "epoch": 2.27, + "learning_rate": 1.2538052764409889e-05, + "loss": 0.0066, + "step": 48625 + }, + { + "epoch": 2.27, + "learning_rate": 1.2537268979355101e-05, + "loss": 0.076, + "step": 48630 + }, + { + "epoch": 2.27, + "learning_rate": 1.2536485194300317e-05, + "loss": 0.131, + "step": 48635 + }, + { + "epoch": 2.27, + "learning_rate": 1.2535701409245529e-05, + "loss": 0.0862, + "step": 48640 + }, + { + "epoch": 2.27, + "learning_rate": 1.2534917624190743e-05, + "loss": 0.0589, + "step": 48645 + }, + { + "epoch": 2.27, + "learning_rate": 1.2534133839135955e-05, + "loss": 0.1927, + "step": 48650 + }, + { + "epoch": 2.27, + "learning_rate": 1.253335005408117e-05, + "loss": 0.103, + "step": 48655 + }, + { + "epoch": 2.27, + "learning_rate": 1.2532566269026383e-05, + "loss": 0.305, + "step": 48660 + }, + { + "epoch": 2.27, + "learning_rate": 1.2531782483971597e-05, + "loss": 0.3637, + "step": 48665 + }, + { + "epoch": 2.27, + "learning_rate": 1.2530998698916809e-05, + "loss": 0.0596, + "step": 48670 + }, + { + "epoch": 2.27, + "learning_rate": 1.2530214913862025e-05, + "loss": 0.0414, + "step": 48675 + }, + { + "epoch": 2.27, + "learning_rate": 1.2529431128807237e-05, + "loss": 0.0615, + "step": 48680 + }, + { + "epoch": 2.27, + "learning_rate": 1.252864734375245e-05, + "loss": 0.1236, + "step": 48685 + }, + { + "epoch": 2.27, + "learning_rate": 1.2527863558697663e-05, + "loss": 0.1301, + "step": 48690 + }, + { + "epoch": 2.27, + "learning_rate": 1.2527079773642877e-05, + "loss": 0.0993, + "step": 48695 + }, + { + "epoch": 2.27, + "learning_rate": 1.252629598858809e-05, + "loss": 0.1616, + "step": 48700 + }, + { + "epoch": 2.27, + "learning_rate": 1.2525512203533303e-05, + "loss": 0.1623, + "step": 48705 + }, + { + "epoch": 2.27, + "learning_rate": 1.2524728418478519e-05, + "loss": 0.3241, + "step": 48710 + }, + { + "epoch": 2.27, + "learning_rate": 1.2523944633423731e-05, + "loss": 0.2883, + "step": 48715 + }, + { + "epoch": 2.27, + "learning_rate": 1.2523160848368945e-05, + "loss": 0.1131, + "step": 48720 + }, + { + "epoch": 2.27, + "learning_rate": 1.2522377063314157e-05, + "loss": 0.0587, + "step": 48725 + }, + { + "epoch": 2.27, + "learning_rate": 1.2521593278259373e-05, + "loss": 0.0835, + "step": 48730 + }, + { + "epoch": 2.27, + "learning_rate": 1.2520809493204585e-05, + "loss": 0.0798, + "step": 48735 + }, + { + "epoch": 2.27, + "learning_rate": 1.2520025708149799e-05, + "loss": 0.112, + "step": 48740 + }, + { + "epoch": 2.27, + "learning_rate": 1.2519241923095011e-05, + "loss": 0.0651, + "step": 48745 + }, + { + "epoch": 2.27, + "learning_rate": 1.2518458138040227e-05, + "loss": 0.248, + "step": 48750 + }, + { + "epoch": 2.27, + "learning_rate": 1.2517674352985439e-05, + "loss": 0.2368, + "step": 48755 + }, + { + "epoch": 2.28, + "learning_rate": 1.2516890567930651e-05, + "loss": 0.1879, + "step": 48760 + }, + { + "epoch": 2.28, + "learning_rate": 1.2516106782875865e-05, + "loss": 0.267, + "step": 48765 + }, + { + "epoch": 2.28, + "learning_rate": 1.2515322997821077e-05, + "loss": 0.0323, + "step": 48770 + }, + { + "epoch": 2.28, + "learning_rate": 1.2514539212766293e-05, + "loss": 0.0736, + "step": 48775 + }, + { + "epoch": 2.28, + "learning_rate": 1.2513755427711505e-05, + "loss": 0.0625, + "step": 48780 + }, + { + "epoch": 2.28, + "learning_rate": 1.2512971642656719e-05, + "loss": 0.089, + "step": 48785 + }, + { + "epoch": 2.28, + "learning_rate": 1.2512187857601931e-05, + "loss": 0.0695, + "step": 48790 + }, + { + "epoch": 2.28, + "learning_rate": 1.2511404072547147e-05, + "loss": 0.1172, + "step": 48795 + }, + { + "epoch": 2.28, + "learning_rate": 1.2510620287492359e-05, + "loss": 0.2187, + "step": 48800 + }, + { + "epoch": 2.28, + "learning_rate": 1.2509836502437573e-05, + "loss": 0.1755, + "step": 48805 + }, + { + "epoch": 2.28, + "learning_rate": 1.2509052717382787e-05, + "loss": 0.3494, + "step": 48810 + }, + { + "epoch": 2.28, + "learning_rate": 1.2508268932328e-05, + "loss": 0.3253, + "step": 48815 + }, + { + "epoch": 2.28, + "learning_rate": 1.2507485147273213e-05, + "loss": 0.1197, + "step": 48820 + }, + { + "epoch": 2.28, + "learning_rate": 1.2506701362218425e-05, + "loss": 0.0672, + "step": 48825 + }, + { + "epoch": 2.28, + "learning_rate": 1.250591757716364e-05, + "loss": 0.0997, + "step": 48830 + }, + { + "epoch": 2.28, + "learning_rate": 1.2505133792108853e-05, + "loss": 0.0682, + "step": 48835 + }, + { + "epoch": 2.28, + "learning_rate": 1.2504350007054067e-05, + "loss": 0.0378, + "step": 48840 + }, + { + "epoch": 2.28, + "learning_rate": 1.2503566221999279e-05, + "loss": 0.0569, + "step": 48845 + }, + { + "epoch": 2.28, + "learning_rate": 1.2502782436944494e-05, + "loss": 0.0817, + "step": 48850 + }, + { + "epoch": 2.28, + "learning_rate": 1.2501998651889707e-05, + "loss": 0.1632, + "step": 48855 + }, + { + "epoch": 2.28, + "learning_rate": 1.250121486683492e-05, + "loss": 0.2643, + "step": 48860 + }, + { + "epoch": 2.28, + "learning_rate": 1.2500431081780133e-05, + "loss": 0.419, + "step": 48865 + }, + { + "epoch": 2.28, + "learning_rate": 1.2499647296725348e-05, + "loss": 0.0458, + "step": 48870 + }, + { + "epoch": 2.28, + "learning_rate": 1.249886351167056e-05, + "loss": 0.0664, + "step": 48875 + }, + { + "epoch": 2.28, + "learning_rate": 1.2498079726615774e-05, + "loss": 0.0878, + "step": 48880 + }, + { + "epoch": 2.28, + "learning_rate": 1.2497295941560987e-05, + "loss": 0.069, + "step": 48885 + }, + { + "epoch": 2.28, + "learning_rate": 1.2496512156506199e-05, + "loss": 0.1751, + "step": 48890 + }, + { + "epoch": 2.28, + "learning_rate": 1.2495728371451415e-05, + "loss": 0.1207, + "step": 48895 + }, + { + "epoch": 2.28, + "learning_rate": 1.2494944586396627e-05, + "loss": 0.1929, + "step": 48900 + }, + { + "epoch": 2.28, + "learning_rate": 1.249416080134184e-05, + "loss": 0.2412, + "step": 48905 + }, + { + "epoch": 2.28, + "learning_rate": 1.2493377016287055e-05, + "loss": 0.2499, + "step": 48910 + }, + { + "epoch": 2.28, + "learning_rate": 1.2492593231232268e-05, + "loss": 0.3758, + "step": 48915 + }, + { + "epoch": 2.28, + "learning_rate": 1.249180944617748e-05, + "loss": 0.0861, + "step": 48920 + }, + { + "epoch": 2.28, + "learning_rate": 1.2491025661122696e-05, + "loss": 0.051, + "step": 48925 + }, + { + "epoch": 2.28, + "learning_rate": 1.2490241876067908e-05, + "loss": 0.0319, + "step": 48930 + }, + { + "epoch": 2.28, + "learning_rate": 1.2489458091013122e-05, + "loss": 0.0455, + "step": 48935 + }, + { + "epoch": 2.28, + "learning_rate": 1.2488674305958335e-05, + "loss": 0.0583, + "step": 48940 + }, + { + "epoch": 2.28, + "learning_rate": 1.248789052090355e-05, + "loss": 0.0818, + "step": 48945 + }, + { + "epoch": 2.28, + "learning_rate": 1.2487106735848762e-05, + "loss": 0.1859, + "step": 48950 + }, + { + "epoch": 2.28, + "learning_rate": 1.2486322950793975e-05, + "loss": 0.1899, + "step": 48955 + }, + { + "epoch": 2.28, + "learning_rate": 1.2485539165739189e-05, + "loss": 0.4047, + "step": 48960 + }, + { + "epoch": 2.28, + "learning_rate": 1.24847553806844e-05, + "loss": 0.3151, + "step": 48965 + }, + { + "epoch": 2.29, + "learning_rate": 1.2483971595629616e-05, + "loss": 0.0545, + "step": 48970 + }, + { + "epoch": 2.29, + "learning_rate": 1.2483187810574829e-05, + "loss": 0.0155, + "step": 48975 + }, + { + "epoch": 2.29, + "learning_rate": 1.2482404025520042e-05, + "loss": 0.0754, + "step": 48980 + }, + { + "epoch": 2.29, + "learning_rate": 1.2481620240465255e-05, + "loss": 0.039, + "step": 48985 + }, + { + "epoch": 2.29, + "learning_rate": 1.248083645541047e-05, + "loss": 0.08, + "step": 48990 + }, + { + "epoch": 2.29, + "learning_rate": 1.2480052670355682e-05, + "loss": 0.1233, + "step": 48995 + }, + { + "epoch": 2.29, + "learning_rate": 1.2479268885300896e-05, + "loss": 0.1165, + "step": 49000 + }, + { + "epoch": 2.29, + "learning_rate": 1.2478485100246109e-05, + "loss": 0.1813, + "step": 49005 + }, + { + "epoch": 2.29, + "learning_rate": 1.2477701315191324e-05, + "loss": 0.2639, + "step": 49010 + }, + { + "epoch": 2.29, + "learning_rate": 1.2476917530136536e-05, + "loss": 0.3061, + "step": 49015 + }, + { + "epoch": 2.29, + "learning_rate": 1.2476133745081749e-05, + "loss": 0.0448, + "step": 49020 + }, + { + "epoch": 2.29, + "learning_rate": 1.2475349960026964e-05, + "loss": 0.0651, + "step": 49025 + }, + { + "epoch": 2.29, + "learning_rate": 1.2474566174972176e-05, + "loss": 0.0556, + "step": 49030 + }, + { + "epoch": 2.29, + "learning_rate": 1.247378238991739e-05, + "loss": 0.0711, + "step": 49035 + }, + { + "epoch": 2.29, + "learning_rate": 1.2472998604862603e-05, + "loss": 0.0944, + "step": 49040 + }, + { + "epoch": 2.29, + "learning_rate": 1.2472214819807818e-05, + "loss": 0.1723, + "step": 49045 + }, + { + "epoch": 2.29, + "learning_rate": 1.247143103475303e-05, + "loss": 0.1437, + "step": 49050 + }, + { + "epoch": 2.29, + "learning_rate": 1.2470647249698244e-05, + "loss": 0.1611, + "step": 49055 + }, + { + "epoch": 2.29, + "learning_rate": 1.2469863464643456e-05, + "loss": 0.2474, + "step": 49060 + }, + { + "epoch": 2.29, + "learning_rate": 1.2469079679588672e-05, + "loss": 0.3262, + "step": 49065 + }, + { + "epoch": 2.29, + "learning_rate": 1.2468295894533884e-05, + "loss": 0.0379, + "step": 49070 + }, + { + "epoch": 2.29, + "learning_rate": 1.2467512109479098e-05, + "loss": 0.0395, + "step": 49075 + }, + { + "epoch": 2.29, + "learning_rate": 1.246672832442431e-05, + "loss": 0.0432, + "step": 49080 + }, + { + "epoch": 2.29, + "learning_rate": 1.2465944539369523e-05, + "loss": 0.075, + "step": 49085 + }, + { + "epoch": 2.29, + "learning_rate": 1.2465160754314738e-05, + "loss": 0.051, + "step": 49090 + }, + { + "epoch": 2.29, + "learning_rate": 1.246437696925995e-05, + "loss": 0.1434, + "step": 49095 + }, + { + "epoch": 2.29, + "learning_rate": 1.2463593184205164e-05, + "loss": 0.1986, + "step": 49100 + }, + { + "epoch": 2.29, + "learning_rate": 1.2462809399150377e-05, + "loss": 0.1596, + "step": 49105 + }, + { + "epoch": 2.29, + "learning_rate": 1.2462025614095592e-05, + "loss": 0.3813, + "step": 49110 + }, + { + "epoch": 2.29, + "learning_rate": 1.2461241829040804e-05, + "loss": 0.2625, + "step": 49115 + }, + { + "epoch": 2.29, + "learning_rate": 1.2460458043986018e-05, + "loss": 0.0344, + "step": 49120 + }, + { + "epoch": 2.29, + "learning_rate": 1.2459674258931232e-05, + "loss": 0.034, + "step": 49125 + }, + { + "epoch": 2.29, + "learning_rate": 1.2458890473876446e-05, + "loss": 0.0638, + "step": 49130 + }, + { + "epoch": 2.29, + "learning_rate": 1.2458106688821658e-05, + "loss": 0.045, + "step": 49135 + }, + { + "epoch": 2.29, + "learning_rate": 1.2457322903766872e-05, + "loss": 0.0743, + "step": 49140 + }, + { + "epoch": 2.29, + "learning_rate": 1.2456539118712086e-05, + "loss": 0.1166, + "step": 49145 + }, + { + "epoch": 2.29, + "learning_rate": 1.2455755333657298e-05, + "loss": 0.1078, + "step": 49150 + }, + { + "epoch": 2.29, + "learning_rate": 1.2454971548602512e-05, + "loss": 0.1802, + "step": 49155 + }, + { + "epoch": 2.29, + "learning_rate": 1.2454187763547724e-05, + "loss": 0.3078, + "step": 49160 + }, + { + "epoch": 2.29, + "learning_rate": 1.245340397849294e-05, + "loss": 0.3489, + "step": 49165 + }, + { + "epoch": 2.29, + "learning_rate": 1.2452620193438152e-05, + "loss": 0.0332, + "step": 49170 + }, + { + "epoch": 2.29, + "learning_rate": 1.2451836408383366e-05, + "loss": 0.0592, + "step": 49175 + }, + { + "epoch": 2.29, + "learning_rate": 1.2451052623328578e-05, + "loss": 0.0291, + "step": 49180 + }, + { + "epoch": 2.3, + "learning_rate": 1.2450268838273794e-05, + "loss": 0.0695, + "step": 49185 + }, + { + "epoch": 2.3, + "learning_rate": 1.2449485053219006e-05, + "loss": 0.081, + "step": 49190 + }, + { + "epoch": 2.3, + "learning_rate": 1.244870126816422e-05, + "loss": 0.1311, + "step": 49195 + }, + { + "epoch": 2.3, + "learning_rate": 1.2447917483109432e-05, + "loss": 0.1058, + "step": 49200 + }, + { + "epoch": 2.3, + "learning_rate": 1.2447133698054648e-05, + "loss": 0.1388, + "step": 49205 + }, + { + "epoch": 2.3, + "learning_rate": 1.244634991299986e-05, + "loss": 0.3073, + "step": 49210 + }, + { + "epoch": 2.3, + "learning_rate": 1.2445566127945072e-05, + "loss": 0.2426, + "step": 49215 + }, + { + "epoch": 2.3, + "learning_rate": 1.2444782342890286e-05, + "loss": 0.0957, + "step": 49220 + }, + { + "epoch": 2.3, + "learning_rate": 1.24439985578355e-05, + "loss": 0.055, + "step": 49225 + }, + { + "epoch": 2.3, + "learning_rate": 1.2443214772780714e-05, + "loss": 0.0305, + "step": 49230 + }, + { + "epoch": 2.3, + "learning_rate": 1.2442430987725926e-05, + "loss": 0.1408, + "step": 49235 + }, + { + "epoch": 2.3, + "learning_rate": 1.2441647202671142e-05, + "loss": 0.0908, + "step": 49240 + }, + { + "epoch": 2.3, + "learning_rate": 1.2440863417616354e-05, + "loss": 0.0978, + "step": 49245 + }, + { + "epoch": 2.3, + "learning_rate": 1.2440079632561568e-05, + "loss": 0.1482, + "step": 49250 + }, + { + "epoch": 2.3, + "learning_rate": 1.243929584750678e-05, + "loss": 0.2039, + "step": 49255 + }, + { + "epoch": 2.3, + "learning_rate": 1.2438512062451996e-05, + "loss": 0.3077, + "step": 49260 + }, + { + "epoch": 2.3, + "learning_rate": 1.2437728277397208e-05, + "loss": 0.1668, + "step": 49265 + }, + { + "epoch": 2.3, + "learning_rate": 1.2436944492342422e-05, + "loss": 0.0675, + "step": 49270 + }, + { + "epoch": 2.3, + "learning_rate": 1.2436160707287634e-05, + "loss": 0.0325, + "step": 49275 + }, + { + "epoch": 2.3, + "learning_rate": 1.2435376922232846e-05, + "loss": 0.0352, + "step": 49280 + }, + { + "epoch": 2.3, + "learning_rate": 1.2434593137178062e-05, + "loss": 0.0273, + "step": 49285 + }, + { + "epoch": 2.3, + "learning_rate": 1.2433809352123274e-05, + "loss": 0.1357, + "step": 49290 + }, + { + "epoch": 2.3, + "learning_rate": 1.2433025567068488e-05, + "loss": 0.1806, + "step": 49295 + }, + { + "epoch": 2.3, + "learning_rate": 1.24322417820137e-05, + "loss": 0.1102, + "step": 49300 + }, + { + "epoch": 2.3, + "learning_rate": 1.2431457996958916e-05, + "loss": 0.2882, + "step": 49305 + }, + { + "epoch": 2.3, + "learning_rate": 1.2430674211904128e-05, + "loss": 0.301, + "step": 49310 + }, + { + "epoch": 2.3, + "learning_rate": 1.2429890426849342e-05, + "loss": 0.2907, + "step": 49315 + }, + { + "epoch": 2.3, + "learning_rate": 1.2429106641794554e-05, + "loss": 0.0655, + "step": 49320 + }, + { + "epoch": 2.3, + "learning_rate": 1.242832285673977e-05, + "loss": 0.0572, + "step": 49325 + }, + { + "epoch": 2.3, + "learning_rate": 1.2427539071684982e-05, + "loss": 0.0607, + "step": 49330 + }, + { + "epoch": 2.3, + "learning_rate": 1.2426755286630196e-05, + "loss": 0.0779, + "step": 49335 + }, + { + "epoch": 2.3, + "learning_rate": 1.242597150157541e-05, + "loss": 0.0683, + "step": 49340 + }, + { + "epoch": 2.3, + "learning_rate": 1.2425187716520622e-05, + "loss": 0.1186, + "step": 49345 + }, + { + "epoch": 2.3, + "learning_rate": 1.2424403931465836e-05, + "loss": 0.1542, + "step": 49350 + }, + { + "epoch": 2.3, + "learning_rate": 1.2423620146411048e-05, + "loss": 0.11, + "step": 49355 + }, + { + "epoch": 2.3, + "learning_rate": 1.2422836361356264e-05, + "loss": 0.2608, + "step": 49360 + }, + { + "epoch": 2.3, + "learning_rate": 1.2422052576301476e-05, + "loss": 0.3212, + "step": 49365 + }, + { + "epoch": 2.3, + "learning_rate": 1.242126879124669e-05, + "loss": 0.031, + "step": 49370 + }, + { + "epoch": 2.3, + "learning_rate": 1.2420485006191902e-05, + "loss": 0.0297, + "step": 49375 + }, + { + "epoch": 2.3, + "learning_rate": 1.2419701221137118e-05, + "loss": 0.1169, + "step": 49380 + }, + { + "epoch": 2.3, + "learning_rate": 1.241891743608233e-05, + "loss": 0.0557, + "step": 49385 + }, + { + "epoch": 2.3, + "learning_rate": 1.2418133651027544e-05, + "loss": 0.1283, + "step": 49390 + }, + { + "epoch": 2.3, + "learning_rate": 1.2417349865972756e-05, + "loss": 0.114, + "step": 49395 + }, + { + "epoch": 2.31, + "learning_rate": 1.2416566080917972e-05, + "loss": 0.0548, + "step": 49400 + }, + { + "epoch": 2.31, + "learning_rate": 1.2415782295863184e-05, + "loss": 0.1379, + "step": 49405 + }, + { + "epoch": 2.31, + "learning_rate": 1.2414998510808396e-05, + "loss": 0.2058, + "step": 49410 + }, + { + "epoch": 2.31, + "learning_rate": 1.241421472575361e-05, + "loss": 0.2836, + "step": 49415 + }, + { + "epoch": 2.31, + "learning_rate": 1.2413430940698822e-05, + "loss": 0.0669, + "step": 49420 + }, + { + "epoch": 2.31, + "learning_rate": 1.2412647155644038e-05, + "loss": 0.0242, + "step": 49425 + }, + { + "epoch": 2.31, + "learning_rate": 1.241186337058925e-05, + "loss": 0.0977, + "step": 49430 + }, + { + "epoch": 2.31, + "learning_rate": 1.2411079585534464e-05, + "loss": 0.0507, + "step": 49435 + }, + { + "epoch": 2.31, + "learning_rate": 1.2410295800479678e-05, + "loss": 0.063, + "step": 49440 + }, + { + "epoch": 2.31, + "learning_rate": 1.2409512015424892e-05, + "loss": 0.1527, + "step": 49445 + }, + { + "epoch": 2.31, + "learning_rate": 1.2408728230370104e-05, + "loss": 0.0736, + "step": 49450 + }, + { + "epoch": 2.31, + "learning_rate": 1.240794444531532e-05, + "loss": 0.2219, + "step": 49455 + }, + { + "epoch": 2.31, + "learning_rate": 1.2407160660260532e-05, + "loss": 0.2611, + "step": 49460 + }, + { + "epoch": 2.31, + "learning_rate": 1.2406376875205746e-05, + "loss": 0.3693, + "step": 49465 + }, + { + "epoch": 2.31, + "learning_rate": 1.2405593090150958e-05, + "loss": 0.1247, + "step": 49470 + }, + { + "epoch": 2.31, + "learning_rate": 1.240480930509617e-05, + "loss": 0.0177, + "step": 49475 + }, + { + "epoch": 2.31, + "learning_rate": 1.2404025520041386e-05, + "loss": 0.0873, + "step": 49480 + }, + { + "epoch": 2.31, + "learning_rate": 1.2403241734986598e-05, + "loss": 0.0888, + "step": 49485 + }, + { + "epoch": 2.31, + "learning_rate": 1.2402457949931812e-05, + "loss": 0.0582, + "step": 49490 + }, + { + "epoch": 2.31, + "learning_rate": 1.2401674164877024e-05, + "loss": 0.1075, + "step": 49495 + }, + { + "epoch": 2.31, + "learning_rate": 1.240089037982224e-05, + "loss": 0.1859, + "step": 49500 + }, + { + "epoch": 2.31, + "learning_rate": 1.2400106594767452e-05, + "loss": 0.1537, + "step": 49505 + }, + { + "epoch": 2.31, + "learning_rate": 1.2399322809712666e-05, + "loss": 0.2297, + "step": 49510 + }, + { + "epoch": 2.31, + "learning_rate": 1.2398695781668836e-05, + "loss": 0.2882, + "step": 49515 + }, + { + "epoch": 2.31, + "learning_rate": 1.239791199661405e-05, + "loss": 0.0205, + "step": 49520 + }, + { + "epoch": 2.31, + "learning_rate": 1.2397128211559264e-05, + "loss": 0.0498, + "step": 49525 + }, + { + "epoch": 2.31, + "learning_rate": 1.2396344426504476e-05, + "loss": 0.0858, + "step": 49530 + }, + { + "epoch": 2.31, + "learning_rate": 1.2395560641449692e-05, + "loss": 0.1835, + "step": 49535 + }, + { + "epoch": 2.31, + "learning_rate": 1.2394776856394904e-05, + "loss": 0.1171, + "step": 49540 + }, + { + "epoch": 2.31, + "learning_rate": 1.2393993071340116e-05, + "loss": 0.0876, + "step": 49545 + }, + { + "epoch": 2.31, + "learning_rate": 1.239320928628533e-05, + "loss": 0.0962, + "step": 49550 + }, + { + "epoch": 2.31, + "learning_rate": 1.2392425501230542e-05, + "loss": 0.0861, + "step": 49555 + }, + { + "epoch": 2.31, + "learning_rate": 1.2391641716175758e-05, + "loss": 0.2315, + "step": 49560 + }, + { + "epoch": 2.31, + "learning_rate": 1.239085793112097e-05, + "loss": 0.278, + "step": 49565 + }, + { + "epoch": 2.31, + "learning_rate": 1.2390074146066184e-05, + "loss": 0.0506, + "step": 49570 + }, + { + "epoch": 2.31, + "learning_rate": 1.2389290361011396e-05, + "loss": 0.0443, + "step": 49575 + }, + { + "epoch": 2.31, + "learning_rate": 1.2388506575956612e-05, + "loss": 0.1289, + "step": 49580 + }, + { + "epoch": 2.31, + "learning_rate": 1.2387722790901824e-05, + "loss": 0.0433, + "step": 49585 + }, + { + "epoch": 2.31, + "learning_rate": 1.2386939005847038e-05, + "loss": 0.1379, + "step": 49590 + }, + { + "epoch": 2.31, + "learning_rate": 1.238615522079225e-05, + "loss": 0.108, + "step": 49595 + }, + { + "epoch": 2.31, + "learning_rate": 1.2385371435737466e-05, + "loss": 0.1276, + "step": 49600 + }, + { + "epoch": 2.31, + "learning_rate": 1.2384587650682678e-05, + "loss": 0.1717, + "step": 49605 + }, + { + "epoch": 2.31, + "learning_rate": 1.238380386562789e-05, + "loss": 0.2584, + "step": 49610 + }, + { + "epoch": 2.32, + "learning_rate": 1.2383020080573106e-05, + "loss": 0.2265, + "step": 49615 + }, + { + "epoch": 2.32, + "learning_rate": 1.2382236295518318e-05, + "loss": 0.1069, + "step": 49620 + }, + { + "epoch": 2.32, + "learning_rate": 1.2381452510463532e-05, + "loss": 0.0404, + "step": 49625 + }, + { + "epoch": 2.32, + "learning_rate": 1.2380668725408744e-05, + "loss": 0.0704, + "step": 49630 + }, + { + "epoch": 2.32, + "learning_rate": 1.237988494035396e-05, + "loss": 0.0985, + "step": 49635 + }, + { + "epoch": 2.32, + "learning_rate": 1.2379101155299172e-05, + "loss": 0.084, + "step": 49640 + }, + { + "epoch": 2.32, + "learning_rate": 1.2378317370244386e-05, + "loss": 0.2373, + "step": 49645 + }, + { + "epoch": 2.32, + "learning_rate": 1.2377533585189598e-05, + "loss": 0.0689, + "step": 49650 + }, + { + "epoch": 2.32, + "learning_rate": 1.2376749800134814e-05, + "loss": 0.1609, + "step": 49655 + }, + { + "epoch": 2.32, + "learning_rate": 1.2375966015080026e-05, + "loss": 0.2887, + "step": 49660 + }, + { + "epoch": 2.32, + "learning_rate": 1.237518223002524e-05, + "loss": 0.2715, + "step": 49665 + }, + { + "epoch": 2.32, + "learning_rate": 1.2374398444970452e-05, + "loss": 0.0749, + "step": 49670 + }, + { + "epoch": 2.32, + "learning_rate": 1.2373614659915664e-05, + "loss": 0.0717, + "step": 49675 + }, + { + "epoch": 2.32, + "learning_rate": 1.237283087486088e-05, + "loss": 0.0777, + "step": 49680 + }, + { + "epoch": 2.32, + "learning_rate": 1.2372047089806092e-05, + "loss": 0.0669, + "step": 49685 + }, + { + "epoch": 2.32, + "learning_rate": 1.2371263304751306e-05, + "loss": 0.0836, + "step": 49690 + }, + { + "epoch": 2.32, + "learning_rate": 1.2370479519696518e-05, + "loss": 0.2553, + "step": 49695 + }, + { + "epoch": 2.32, + "learning_rate": 1.2369695734641734e-05, + "loss": 0.1672, + "step": 49700 + }, + { + "epoch": 2.32, + "learning_rate": 1.2368911949586946e-05, + "loss": 0.1128, + "step": 49705 + }, + { + "epoch": 2.32, + "learning_rate": 1.236812816453216e-05, + "loss": 0.2423, + "step": 49710 + }, + { + "epoch": 2.32, + "learning_rate": 1.2367344379477374e-05, + "loss": 0.3124, + "step": 49715 + }, + { + "epoch": 2.32, + "learning_rate": 1.2366560594422588e-05, + "loss": 0.0414, + "step": 49720 + }, + { + "epoch": 2.32, + "learning_rate": 1.23657768093678e-05, + "loss": 0.0981, + "step": 49725 + }, + { + "epoch": 2.32, + "learning_rate": 1.2364993024313014e-05, + "loss": 0.1131, + "step": 49730 + }, + { + "epoch": 2.32, + "learning_rate": 1.2364209239258228e-05, + "loss": 0.0668, + "step": 49735 + }, + { + "epoch": 2.32, + "learning_rate": 1.236342545420344e-05, + "loss": 0.1405, + "step": 49740 + }, + { + "epoch": 2.32, + "learning_rate": 1.2362641669148654e-05, + "loss": 0.0965, + "step": 49745 + }, + { + "epoch": 2.32, + "learning_rate": 1.2361857884093866e-05, + "loss": 0.1463, + "step": 49750 + }, + { + "epoch": 2.32, + "learning_rate": 1.2361074099039082e-05, + "loss": 0.2175, + "step": 49755 + }, + { + "epoch": 2.32, + "learning_rate": 1.2360290313984294e-05, + "loss": 0.1888, + "step": 49760 + }, + { + "epoch": 2.32, + "learning_rate": 1.2359506528929508e-05, + "loss": 0.3132, + "step": 49765 + }, + { + "epoch": 2.32, + "learning_rate": 1.235872274387472e-05, + "loss": 0.1664, + "step": 49770 + }, + { + "epoch": 2.32, + "learning_rate": 1.2357938958819935e-05, + "loss": 0.0261, + "step": 49775 + }, + { + "epoch": 2.32, + "learning_rate": 1.2357155173765148e-05, + "loss": 0.015, + "step": 49780 + }, + { + "epoch": 2.32, + "learning_rate": 1.2356371388710362e-05, + "loss": 0.0923, + "step": 49785 + }, + { + "epoch": 2.32, + "learning_rate": 1.2355587603655574e-05, + "loss": 0.0871, + "step": 49790 + }, + { + "epoch": 2.32, + "learning_rate": 1.235480381860079e-05, + "loss": 0.1367, + "step": 49795 + }, + { + "epoch": 2.32, + "learning_rate": 1.2354020033546002e-05, + "loss": 0.1528, + "step": 49800 + }, + { + "epoch": 2.32, + "learning_rate": 1.2353236248491214e-05, + "loss": 0.1766, + "step": 49805 + }, + { + "epoch": 2.32, + "learning_rate": 1.2352452463436428e-05, + "loss": 0.2554, + "step": 49810 + }, + { + "epoch": 2.32, + "learning_rate": 1.2351668678381642e-05, + "loss": 0.3166, + "step": 49815 + }, + { + "epoch": 2.32, + "learning_rate": 1.2350884893326856e-05, + "loss": 0.0502, + "step": 49820 + }, + { + "epoch": 2.32, + "learning_rate": 1.2350101108272068e-05, + "loss": 0.0437, + "step": 49825 + }, + { + "epoch": 2.33, + "learning_rate": 1.2349317323217282e-05, + "loss": 0.0936, + "step": 49830 + }, + { + "epoch": 2.33, + "learning_rate": 1.2348533538162496e-05, + "loss": 0.0694, + "step": 49835 + }, + { + "epoch": 2.33, + "learning_rate": 1.234774975310771e-05, + "loss": 0.0366, + "step": 49840 + }, + { + "epoch": 2.33, + "learning_rate": 1.2346965968052922e-05, + "loss": 0.0825, + "step": 49845 + }, + { + "epoch": 2.33, + "learning_rate": 1.2346182182998137e-05, + "loss": 0.1329, + "step": 49850 + }, + { + "epoch": 2.33, + "learning_rate": 1.234539839794335e-05, + "loss": 0.0959, + "step": 49855 + }, + { + "epoch": 2.33, + "learning_rate": 1.2344614612888563e-05, + "loss": 0.1717, + "step": 49860 + }, + { + "epoch": 2.33, + "learning_rate": 1.2343830827833776e-05, + "loss": 0.2998, + "step": 49865 + }, + { + "epoch": 2.33, + "learning_rate": 1.2343047042778988e-05, + "loss": 0.123, + "step": 49870 + }, + { + "epoch": 2.33, + "learning_rate": 1.2342263257724203e-05, + "loss": 0.0147, + "step": 49875 + }, + { + "epoch": 2.33, + "learning_rate": 1.2341479472669416e-05, + "loss": 0.0361, + "step": 49880 + }, + { + "epoch": 2.33, + "learning_rate": 1.234069568761463e-05, + "loss": 0.089, + "step": 49885 + }, + { + "epoch": 2.33, + "learning_rate": 1.2339911902559842e-05, + "loss": 0.057, + "step": 49890 + }, + { + "epoch": 2.33, + "learning_rate": 1.2339128117505057e-05, + "loss": 0.066, + "step": 49895 + }, + { + "epoch": 2.33, + "learning_rate": 1.233834433245027e-05, + "loss": 0.2305, + "step": 49900 + }, + { + "epoch": 2.33, + "learning_rate": 1.2337560547395483e-05, + "loss": 0.2467, + "step": 49905 + }, + { + "epoch": 2.33, + "learning_rate": 1.2336776762340696e-05, + "loss": 0.226, + "step": 49910 + }, + { + "epoch": 2.33, + "learning_rate": 1.2335992977285911e-05, + "loss": 0.2258, + "step": 49915 + }, + { + "epoch": 2.33, + "learning_rate": 1.2335209192231123e-05, + "loss": 0.0611, + "step": 49920 + }, + { + "epoch": 2.33, + "learning_rate": 1.2334425407176337e-05, + "loss": 0.0755, + "step": 49925 + }, + { + "epoch": 2.33, + "learning_rate": 1.2333641622121551e-05, + "loss": 0.0437, + "step": 49930 + }, + { + "epoch": 2.33, + "learning_rate": 1.2332857837066764e-05, + "loss": 0.1033, + "step": 49935 + }, + { + "epoch": 2.33, + "learning_rate": 1.2332074052011977e-05, + "loss": 0.0961, + "step": 49940 + }, + { + "epoch": 2.33, + "learning_rate": 1.233129026695719e-05, + "loss": 0.0845, + "step": 49945 + }, + { + "epoch": 2.33, + "learning_rate": 1.2330506481902405e-05, + "loss": 0.1404, + "step": 49950 + }, + { + "epoch": 2.33, + "learning_rate": 1.2329722696847617e-05, + "loss": 0.2093, + "step": 49955 + }, + { + "epoch": 2.33, + "learning_rate": 1.2328938911792831e-05, + "loss": 0.1984, + "step": 49960 + }, + { + "epoch": 2.33, + "learning_rate": 1.2328155126738044e-05, + "loss": 0.2746, + "step": 49965 + }, + { + "epoch": 2.33, + "learning_rate": 1.232737134168326e-05, + "loss": 0.0631, + "step": 49970 + }, + { + "epoch": 2.33, + "learning_rate": 1.2326587556628471e-05, + "loss": 0.0268, + "step": 49975 + }, + { + "epoch": 2.33, + "learning_rate": 1.2325803771573685e-05, + "loss": 0.0344, + "step": 49980 + }, + { + "epoch": 2.33, + "learning_rate": 1.2325019986518897e-05, + "loss": 0.0861, + "step": 49985 + }, + { + "epoch": 2.33, + "learning_rate": 1.2324236201464113e-05, + "loss": 0.1821, + "step": 49990 + }, + { + "epoch": 2.33, + "learning_rate": 1.2323452416409325e-05, + "loss": 0.1076, + "step": 49995 + }, + { + "epoch": 2.33, + "learning_rate": 1.2322668631354538e-05, + "loss": 0.1184, + "step": 50000 + }, + { + "epoch": 2.33, + "learning_rate": 1.2321884846299751e-05, + "loss": 0.0798, + "step": 50005 + }, + { + "epoch": 2.33, + "learning_rate": 1.2321101061244964e-05, + "loss": 0.2843, + "step": 50010 + }, + { + "epoch": 2.33, + "learning_rate": 1.232031727619018e-05, + "loss": 0.1617, + "step": 50015 + }, + { + "epoch": 2.33, + "learning_rate": 1.2319533491135391e-05, + "loss": 0.0382, + "step": 50020 + }, + { + "epoch": 2.33, + "learning_rate": 1.2318749706080605e-05, + "loss": 0.0547, + "step": 50025 + }, + { + "epoch": 2.33, + "learning_rate": 1.231796592102582e-05, + "loss": 0.0349, + "step": 50030 + }, + { + "epoch": 2.33, + "learning_rate": 1.2317182135971033e-05, + "loss": 0.0498, + "step": 50035 + }, + { + "epoch": 2.33, + "learning_rate": 1.2316398350916245e-05, + "loss": 0.0729, + "step": 50040 + }, + { + "epoch": 2.34, + "learning_rate": 1.231561456586146e-05, + "loss": 0.1601, + "step": 50045 + }, + { + "epoch": 2.34, + "learning_rate": 1.2314830780806673e-05, + "loss": 0.1798, + "step": 50050 + }, + { + "epoch": 2.34, + "learning_rate": 1.2314046995751887e-05, + "loss": 0.1981, + "step": 50055 + }, + { + "epoch": 2.34, + "learning_rate": 1.23132632106971e-05, + "loss": 0.3657, + "step": 50060 + }, + { + "epoch": 2.34, + "learning_rate": 1.2312479425642312e-05, + "loss": 0.3816, + "step": 50065 + }, + { + "epoch": 2.34, + "learning_rate": 1.2311695640587527e-05, + "loss": 0.0421, + "step": 50070 + }, + { + "epoch": 2.34, + "learning_rate": 1.231091185553274e-05, + "loss": 0.0836, + "step": 50075 + }, + { + "epoch": 2.34, + "learning_rate": 1.2310128070477953e-05, + "loss": 0.0849, + "step": 50080 + }, + { + "epoch": 2.34, + "learning_rate": 1.2309344285423165e-05, + "loss": 0.1037, + "step": 50085 + }, + { + "epoch": 2.34, + "learning_rate": 1.2308560500368381e-05, + "loss": 0.1266, + "step": 50090 + }, + { + "epoch": 2.34, + "learning_rate": 1.2307776715313593e-05, + "loss": 0.1167, + "step": 50095 + }, + { + "epoch": 2.34, + "learning_rate": 1.2306992930258807e-05, + "loss": 0.1929, + "step": 50100 + }, + { + "epoch": 2.34, + "learning_rate": 1.230620914520402e-05, + "loss": 0.2002, + "step": 50105 + }, + { + "epoch": 2.34, + "learning_rate": 1.2305425360149235e-05, + "loss": 0.3381, + "step": 50110 + }, + { + "epoch": 2.34, + "learning_rate": 1.2304641575094447e-05, + "loss": 0.3687, + "step": 50115 + }, + { + "epoch": 2.34, + "learning_rate": 1.2303857790039661e-05, + "loss": 0.0699, + "step": 50120 + }, + { + "epoch": 2.34, + "learning_rate": 1.2303074004984873e-05, + "loss": 0.0607, + "step": 50125 + }, + { + "epoch": 2.34, + "learning_rate": 1.2302290219930087e-05, + "loss": 0.0523, + "step": 50130 + }, + { + "epoch": 2.34, + "learning_rate": 1.2301506434875301e-05, + "loss": 0.0631, + "step": 50135 + }, + { + "epoch": 2.34, + "learning_rate": 1.2300722649820513e-05, + "loss": 0.1482, + "step": 50140 + }, + { + "epoch": 2.34, + "learning_rate": 1.2299938864765727e-05, + "loss": 0.1021, + "step": 50145 + }, + { + "epoch": 2.34, + "learning_rate": 1.2299155079710941e-05, + "loss": 0.1413, + "step": 50150 + }, + { + "epoch": 2.34, + "learning_rate": 1.2298371294656155e-05, + "loss": 0.1853, + "step": 50155 + }, + { + "epoch": 2.34, + "learning_rate": 1.2297587509601367e-05, + "loss": 0.2019, + "step": 50160 + }, + { + "epoch": 2.34, + "learning_rate": 1.2296803724546583e-05, + "loss": 0.3643, + "step": 50165 + }, + { + "epoch": 2.34, + "learning_rate": 1.2296019939491795e-05, + "loss": 0.0704, + "step": 50170 + }, + { + "epoch": 2.34, + "learning_rate": 1.2295236154437009e-05, + "loss": 0.0761, + "step": 50175 + }, + { + "epoch": 2.34, + "learning_rate": 1.2294452369382221e-05, + "loss": 0.0657, + "step": 50180 + }, + { + "epoch": 2.34, + "learning_rate": 1.2293668584327437e-05, + "loss": 0.0956, + "step": 50185 + }, + { + "epoch": 2.34, + "learning_rate": 1.2292884799272649e-05, + "loss": 0.057, + "step": 50190 + }, + { + "epoch": 2.34, + "learning_rate": 1.2292101014217861e-05, + "loss": 0.0977, + "step": 50195 + }, + { + "epoch": 2.34, + "learning_rate": 1.2291317229163075e-05, + "loss": 0.1418, + "step": 50200 + }, + { + "epoch": 2.34, + "learning_rate": 1.2290533444108287e-05, + "loss": 0.2145, + "step": 50205 + }, + { + "epoch": 2.34, + "learning_rate": 1.2289749659053503e-05, + "loss": 0.1492, + "step": 50210 + }, + { + "epoch": 2.34, + "learning_rate": 1.2288965873998715e-05, + "loss": 0.3733, + "step": 50215 + }, + { + "epoch": 2.34, + "learning_rate": 1.2288182088943929e-05, + "loss": 0.0474, + "step": 50220 + }, + { + "epoch": 2.34, + "learning_rate": 1.2287398303889141e-05, + "loss": 0.0178, + "step": 50225 + }, + { + "epoch": 2.34, + "learning_rate": 1.2286614518834357e-05, + "loss": 0.1194, + "step": 50230 + }, + { + "epoch": 2.34, + "learning_rate": 1.2285830733779569e-05, + "loss": 0.0799, + "step": 50235 + }, + { + "epoch": 2.34, + "learning_rate": 1.2285046948724783e-05, + "loss": 0.1206, + "step": 50240 + }, + { + "epoch": 2.34, + "learning_rate": 1.2284263163669997e-05, + "loss": 0.1841, + "step": 50245 + }, + { + "epoch": 2.34, + "learning_rate": 1.228347937861521e-05, + "loss": 0.1276, + "step": 50250 + }, + { + "epoch": 2.34, + "learning_rate": 1.2282695593560423e-05, + "loss": 0.2027, + "step": 50255 + }, + { + "epoch": 2.35, + "learning_rate": 1.2281911808505635e-05, + "loss": 0.1799, + "step": 50260 + }, + { + "epoch": 2.35, + "learning_rate": 1.228112802345085e-05, + "loss": 0.375, + "step": 50265 + }, + { + "epoch": 2.35, + "learning_rate": 1.2280344238396063e-05, + "loss": 0.0396, + "step": 50270 + }, + { + "epoch": 2.35, + "learning_rate": 1.2279560453341277e-05, + "loss": 0.0545, + "step": 50275 + }, + { + "epoch": 2.35, + "learning_rate": 1.2278776668286489e-05, + "loss": 0.0606, + "step": 50280 + }, + { + "epoch": 2.35, + "learning_rate": 1.2277992883231705e-05, + "loss": 0.0914, + "step": 50285 + }, + { + "epoch": 2.35, + "learning_rate": 1.2277209098176917e-05, + "loss": 0.0412, + "step": 50290 + }, + { + "epoch": 2.35, + "learning_rate": 1.2276425313122131e-05, + "loss": 0.1434, + "step": 50295 + }, + { + "epoch": 2.35, + "learning_rate": 1.2275641528067343e-05, + "loss": 0.2036, + "step": 50300 + }, + { + "epoch": 2.35, + "learning_rate": 1.2274857743012559e-05, + "loss": 0.219, + "step": 50305 + }, + { + "epoch": 2.35, + "learning_rate": 1.2274073957957771e-05, + "loss": 0.263, + "step": 50310 + }, + { + "epoch": 2.35, + "learning_rate": 1.2273290172902985e-05, + "loss": 0.2972, + "step": 50315 + }, + { + "epoch": 2.35, + "learning_rate": 1.2272506387848197e-05, + "loss": 0.0396, + "step": 50320 + }, + { + "epoch": 2.35, + "learning_rate": 1.227172260279341e-05, + "loss": 0.0763, + "step": 50325 + }, + { + "epoch": 2.35, + "learning_rate": 1.2270938817738625e-05, + "loss": 0.0389, + "step": 50330 + }, + { + "epoch": 2.35, + "learning_rate": 1.2270155032683837e-05, + "loss": 0.0813, + "step": 50335 + }, + { + "epoch": 2.35, + "learning_rate": 1.2269371247629051e-05, + "loss": 0.1306, + "step": 50340 + }, + { + "epoch": 2.35, + "learning_rate": 1.2268587462574265e-05, + "loss": 0.0779, + "step": 50345 + }, + { + "epoch": 2.35, + "learning_rate": 1.2267803677519479e-05, + "loss": 0.187, + "step": 50350 + }, + { + "epoch": 2.35, + "learning_rate": 1.2267019892464691e-05, + "loss": 0.1718, + "step": 50355 + }, + { + "epoch": 2.35, + "learning_rate": 1.2266236107409905e-05, + "loss": 0.3207, + "step": 50360 + }, + { + "epoch": 2.35, + "learning_rate": 1.2265452322355119e-05, + "loss": 0.2648, + "step": 50365 + }, + { + "epoch": 2.35, + "learning_rate": 1.2264668537300333e-05, + "loss": 0.0264, + "step": 50370 + }, + { + "epoch": 2.35, + "learning_rate": 1.2263884752245545e-05, + "loss": 0.0418, + "step": 50375 + }, + { + "epoch": 2.35, + "learning_rate": 1.226310096719076e-05, + "loss": 0.0412, + "step": 50380 + }, + { + "epoch": 2.35, + "learning_rate": 1.2262317182135973e-05, + "loss": 0.0641, + "step": 50385 + }, + { + "epoch": 2.35, + "learning_rate": 1.2261533397081185e-05, + "loss": 0.1354, + "step": 50390 + }, + { + "epoch": 2.35, + "learning_rate": 1.2260749612026399e-05, + "loss": 0.0955, + "step": 50395 + }, + { + "epoch": 2.35, + "learning_rate": 1.2259965826971611e-05, + "loss": 0.1032, + "step": 50400 + }, + { + "epoch": 2.35, + "learning_rate": 1.2259182041916827e-05, + "loss": 0.2132, + "step": 50405 + }, + { + "epoch": 2.35, + "learning_rate": 1.2258398256862039e-05, + "loss": 0.2025, + "step": 50410 + }, + { + "epoch": 2.35, + "learning_rate": 1.2257614471807253e-05, + "loss": 0.2278, + "step": 50415 + }, + { + "epoch": 2.35, + "learning_rate": 1.2256830686752465e-05, + "loss": 0.0977, + "step": 50420 + }, + { + "epoch": 2.35, + "learning_rate": 1.225604690169768e-05, + "loss": 0.0473, + "step": 50425 + }, + { + "epoch": 2.35, + "learning_rate": 1.2255263116642893e-05, + "loss": 0.0606, + "step": 50430 + }, + { + "epoch": 2.35, + "learning_rate": 1.2254479331588107e-05, + "loss": 0.0981, + "step": 50435 + }, + { + "epoch": 2.35, + "learning_rate": 1.2253695546533319e-05, + "loss": 0.0818, + "step": 50440 + }, + { + "epoch": 2.35, + "learning_rate": 1.2252911761478534e-05, + "loss": 0.1135, + "step": 50445 + }, + { + "epoch": 2.35, + "learning_rate": 1.2252127976423747e-05, + "loss": 0.1628, + "step": 50450 + }, + { + "epoch": 2.35, + "learning_rate": 1.2251344191368959e-05, + "loss": 0.1521, + "step": 50455 + }, + { + "epoch": 2.35, + "learning_rate": 1.2250560406314174e-05, + "loss": 0.3401, + "step": 50460 + }, + { + "epoch": 2.35, + "learning_rate": 1.2249776621259387e-05, + "loss": 0.2756, + "step": 50465 + }, + { + "epoch": 2.35, + "learning_rate": 1.22489928362046e-05, + "loss": 0.0554, + "step": 50470 + }, + { + "epoch": 2.36, + "learning_rate": 1.2248209051149813e-05, + "loss": 0.0253, + "step": 50475 + }, + { + "epoch": 2.36, + "learning_rate": 1.2247425266095028e-05, + "loss": 0.1059, + "step": 50480 + }, + { + "epoch": 2.36, + "learning_rate": 1.224664148104024e-05, + "loss": 0.1775, + "step": 50485 + }, + { + "epoch": 2.36, + "learning_rate": 1.2245857695985455e-05, + "loss": 0.0703, + "step": 50490 + }, + { + "epoch": 2.36, + "learning_rate": 1.2245073910930667e-05, + "loss": 0.0463, + "step": 50495 + }, + { + "epoch": 2.36, + "learning_rate": 1.2244290125875882e-05, + "loss": 0.1483, + "step": 50500 + }, + { + "epoch": 2.36, + "learning_rate": 1.2243506340821095e-05, + "loss": 0.2113, + "step": 50505 + }, + { + "epoch": 2.36, + "learning_rate": 1.2242722555766308e-05, + "loss": 0.2143, + "step": 50510 + }, + { + "epoch": 2.36, + "learning_rate": 1.224193877071152e-05, + "loss": 0.3792, + "step": 50515 + }, + { + "epoch": 2.36, + "learning_rate": 1.2241154985656733e-05, + "loss": 0.0368, + "step": 50520 + }, + { + "epoch": 2.36, + "learning_rate": 1.2240371200601948e-05, + "loss": 0.0232, + "step": 50525 + }, + { + "epoch": 2.36, + "learning_rate": 1.223958741554716e-05, + "loss": 0.0197, + "step": 50530 + }, + { + "epoch": 2.36, + "learning_rate": 1.2238803630492375e-05, + "loss": 0.1343, + "step": 50535 + }, + { + "epoch": 2.36, + "learning_rate": 1.2238019845437587e-05, + "loss": 0.1147, + "step": 50540 + }, + { + "epoch": 2.36, + "learning_rate": 1.2237236060382802e-05, + "loss": 0.135, + "step": 50545 + }, + { + "epoch": 2.36, + "learning_rate": 1.2236452275328015e-05, + "loss": 0.1731, + "step": 50550 + }, + { + "epoch": 2.36, + "learning_rate": 1.2235668490273229e-05, + "loss": 0.1098, + "step": 50555 + }, + { + "epoch": 2.36, + "learning_rate": 1.2234884705218442e-05, + "loss": 0.2273, + "step": 50560 + }, + { + "epoch": 2.36, + "learning_rate": 1.2234100920163656e-05, + "loss": 0.2623, + "step": 50565 + }, + { + "epoch": 2.36, + "learning_rate": 1.2233317135108869e-05, + "loss": 0.0419, + "step": 50570 + }, + { + "epoch": 2.36, + "learning_rate": 1.2232533350054082e-05, + "loss": 0.0451, + "step": 50575 + }, + { + "epoch": 2.36, + "learning_rate": 1.2231749564999296e-05, + "loss": 0.0982, + "step": 50580 + }, + { + "epoch": 2.36, + "learning_rate": 1.2230965779944509e-05, + "loss": 0.0518, + "step": 50585 + }, + { + "epoch": 2.36, + "learning_rate": 1.2230181994889722e-05, + "loss": 0.0991, + "step": 50590 + }, + { + "epoch": 2.36, + "learning_rate": 1.2229398209834935e-05, + "loss": 0.0705, + "step": 50595 + }, + { + "epoch": 2.36, + "learning_rate": 1.222861442478015e-05, + "loss": 0.1053, + "step": 50600 + }, + { + "epoch": 2.36, + "learning_rate": 1.2227830639725363e-05, + "loss": 0.1624, + "step": 50605 + }, + { + "epoch": 2.36, + "learning_rate": 1.2227046854670576e-05, + "loss": 0.2793, + "step": 50610 + }, + { + "epoch": 2.36, + "learning_rate": 1.2226263069615789e-05, + "loss": 0.2738, + "step": 50615 + }, + { + "epoch": 2.36, + "learning_rate": 1.2225479284561004e-05, + "loss": 0.0397, + "step": 50620 + }, + { + "epoch": 2.36, + "learning_rate": 1.2224695499506216e-05, + "loss": 0.0552, + "step": 50625 + }, + { + "epoch": 2.36, + "learning_rate": 1.222391171445143e-05, + "loss": 0.1016, + "step": 50630 + }, + { + "epoch": 2.36, + "learning_rate": 1.2223127929396643e-05, + "loss": 0.1722, + "step": 50635 + }, + { + "epoch": 2.36, + "learning_rate": 1.2222344144341858e-05, + "loss": 0.1134, + "step": 50640 + }, + { + "epoch": 2.36, + "learning_rate": 1.222156035928707e-05, + "loss": 0.1319, + "step": 50645 + }, + { + "epoch": 2.36, + "learning_rate": 1.2220776574232283e-05, + "loss": 0.1036, + "step": 50650 + }, + { + "epoch": 2.36, + "learning_rate": 1.2219992789177496e-05, + "loss": 0.1984, + "step": 50655 + }, + { + "epoch": 2.36, + "learning_rate": 1.221920900412271e-05, + "loss": 0.2497, + "step": 50660 + }, + { + "epoch": 2.36, + "learning_rate": 1.2218425219067924e-05, + "loss": 0.3259, + "step": 50665 + }, + { + "epoch": 2.36, + "learning_rate": 1.2217641434013137e-05, + "loss": 0.0318, + "step": 50670 + }, + { + "epoch": 2.36, + "learning_rate": 1.221685764895835e-05, + "loss": 0.0643, + "step": 50675 + }, + { + "epoch": 2.36, + "learning_rate": 1.2216073863903564e-05, + "loss": 0.1131, + "step": 50680 + }, + { + "epoch": 2.37, + "learning_rate": 1.2215290078848778e-05, + "loss": 0.073, + "step": 50685 + }, + { + "epoch": 2.37, + "learning_rate": 1.221450629379399e-05, + "loss": 0.1513, + "step": 50690 + }, + { + "epoch": 2.37, + "learning_rate": 1.2213722508739206e-05, + "loss": 0.1274, + "step": 50695 + }, + { + "epoch": 2.37, + "learning_rate": 1.2212938723684418e-05, + "loss": 0.1174, + "step": 50700 + }, + { + "epoch": 2.37, + "learning_rate": 1.2212154938629632e-05, + "loss": 0.196, + "step": 50705 + }, + { + "epoch": 2.37, + "learning_rate": 1.2211371153574844e-05, + "loss": 0.2789, + "step": 50710 + }, + { + "epoch": 2.37, + "learning_rate": 1.2210587368520057e-05, + "loss": 0.2517, + "step": 50715 + }, + { + "epoch": 2.37, + "learning_rate": 1.2209803583465272e-05, + "loss": 0.0358, + "step": 50720 + }, + { + "epoch": 2.37, + "learning_rate": 1.2209019798410484e-05, + "loss": 0.0353, + "step": 50725 + }, + { + "epoch": 2.37, + "learning_rate": 1.2208236013355698e-05, + "loss": 0.116, + "step": 50730 + }, + { + "epoch": 2.37, + "learning_rate": 1.220745222830091e-05, + "loss": 0.0738, + "step": 50735 + }, + { + "epoch": 2.37, + "learning_rate": 1.2206668443246126e-05, + "loss": 0.0956, + "step": 50740 + }, + { + "epoch": 2.37, + "learning_rate": 1.2205884658191338e-05, + "loss": 0.0734, + "step": 50745 + }, + { + "epoch": 2.37, + "learning_rate": 1.2205100873136552e-05, + "loss": 0.1592, + "step": 50750 + }, + { + "epoch": 2.37, + "learning_rate": 1.2204317088081764e-05, + "loss": 0.2333, + "step": 50755 + }, + { + "epoch": 2.37, + "learning_rate": 1.220353330302698e-05, + "loss": 0.1929, + "step": 50760 + }, + { + "epoch": 2.37, + "learning_rate": 1.2202749517972192e-05, + "loss": 0.5826, + "step": 50765 + }, + { + "epoch": 2.37, + "learning_rate": 1.2201965732917406e-05, + "loss": 0.0678, + "step": 50770 + }, + { + "epoch": 2.37, + "learning_rate": 1.220118194786262e-05, + "loss": 0.0588, + "step": 50775 + }, + { + "epoch": 2.37, + "learning_rate": 1.2200398162807832e-05, + "loss": 0.0301, + "step": 50780 + }, + { + "epoch": 2.37, + "learning_rate": 1.2199614377753046e-05, + "loss": 0.0743, + "step": 50785 + }, + { + "epoch": 2.37, + "learning_rate": 1.2198830592698258e-05, + "loss": 0.0594, + "step": 50790 + }, + { + "epoch": 2.37, + "learning_rate": 1.2198046807643474e-05, + "loss": 0.0979, + "step": 50795 + }, + { + "epoch": 2.37, + "learning_rate": 1.2197263022588686e-05, + "loss": 0.0678, + "step": 50800 + }, + { + "epoch": 2.37, + "learning_rate": 1.21964792375339e-05, + "loss": 0.1468, + "step": 50805 + }, + { + "epoch": 2.37, + "learning_rate": 1.2195695452479112e-05, + "loss": 0.238, + "step": 50810 + }, + { + "epoch": 2.37, + "learning_rate": 1.2194911667424328e-05, + "loss": 0.185, + "step": 50815 + }, + { + "epoch": 2.37, + "learning_rate": 1.219412788236954e-05, + "loss": 0.0528, + "step": 50820 + }, + { + "epoch": 2.37, + "learning_rate": 1.2193344097314754e-05, + "loss": 0.0391, + "step": 50825 + }, + { + "epoch": 2.37, + "learning_rate": 1.2192560312259966e-05, + "loss": 0.0602, + "step": 50830 + }, + { + "epoch": 2.37, + "learning_rate": 1.2191776527205182e-05, + "loss": 0.0745, + "step": 50835 + }, + { + "epoch": 2.37, + "learning_rate": 1.2190992742150394e-05, + "loss": 0.0926, + "step": 50840 + }, + { + "epoch": 2.37, + "learning_rate": 1.2190208957095606e-05, + "loss": 0.1335, + "step": 50845 + }, + { + "epoch": 2.37, + "learning_rate": 1.218942517204082e-05, + "loss": 0.1373, + "step": 50850 + }, + { + "epoch": 2.37, + "learning_rate": 1.2188641386986032e-05, + "loss": 0.204, + "step": 50855 + }, + { + "epoch": 2.37, + "learning_rate": 1.2187857601931248e-05, + "loss": 0.2939, + "step": 50860 + }, + { + "epoch": 2.37, + "learning_rate": 1.218707381687646e-05, + "loss": 0.2599, + "step": 50865 + }, + { + "epoch": 2.37, + "learning_rate": 1.2186290031821674e-05, + "loss": 0.1391, + "step": 50870 + }, + { + "epoch": 2.37, + "learning_rate": 1.2185506246766888e-05, + "loss": 0.0675, + "step": 50875 + }, + { + "epoch": 2.37, + "learning_rate": 1.2184722461712102e-05, + "loss": 0.0415, + "step": 50880 + }, + { + "epoch": 2.37, + "learning_rate": 1.2183938676657314e-05, + "loss": 0.0662, + "step": 50885 + }, + { + "epoch": 2.37, + "learning_rate": 1.2183154891602528e-05, + "loss": 0.0667, + "step": 50890 + }, + { + "epoch": 2.37, + "learning_rate": 1.2182371106547742e-05, + "loss": 0.0808, + "step": 50895 + }, + { + "epoch": 2.38, + "learning_rate": 1.2181587321492956e-05, + "loss": 0.1179, + "step": 50900 + }, + { + "epoch": 2.38, + "learning_rate": 1.2180803536438168e-05, + "loss": 0.1444, + "step": 50905 + }, + { + "epoch": 2.38, + "learning_rate": 1.218001975138338e-05, + "loss": 0.3121, + "step": 50910 + }, + { + "epoch": 2.38, + "learning_rate": 1.2179235966328596e-05, + "loss": 0.1573, + "step": 50915 + }, + { + "epoch": 2.38, + "learning_rate": 1.2178452181273808e-05, + "loss": 0.1081, + "step": 50920 + }, + { + "epoch": 2.38, + "learning_rate": 1.2177668396219022e-05, + "loss": 0.0486, + "step": 50925 + }, + { + "epoch": 2.38, + "learning_rate": 1.2176884611164234e-05, + "loss": 0.0354, + "step": 50930 + }, + { + "epoch": 2.38, + "learning_rate": 1.217610082610945e-05, + "loss": 0.1187, + "step": 50935 + }, + { + "epoch": 2.38, + "learning_rate": 1.2175317041054662e-05, + "loss": 0.0868, + "step": 50940 + }, + { + "epoch": 2.38, + "learning_rate": 1.2174533255999876e-05, + "loss": 0.1052, + "step": 50945 + }, + { + "epoch": 2.38, + "learning_rate": 1.2173749470945088e-05, + "loss": 0.1715, + "step": 50950 + }, + { + "epoch": 2.38, + "learning_rate": 1.2172965685890304e-05, + "loss": 0.2318, + "step": 50955 + }, + { + "epoch": 2.38, + "learning_rate": 1.2172181900835516e-05, + "loss": 0.2057, + "step": 50960 + }, + { + "epoch": 2.38, + "learning_rate": 1.217139811578073e-05, + "loss": 0.2589, + "step": 50965 + }, + { + "epoch": 2.38, + "learning_rate": 1.2170614330725942e-05, + "loss": 0.0572, + "step": 50970 + }, + { + "epoch": 2.38, + "learning_rate": 1.2169830545671156e-05, + "loss": 0.0639, + "step": 50975 + }, + { + "epoch": 2.38, + "learning_rate": 1.216904676061637e-05, + "loss": 0.0619, + "step": 50980 + }, + { + "epoch": 2.38, + "learning_rate": 1.2168262975561582e-05, + "loss": 0.0854, + "step": 50985 + }, + { + "epoch": 2.38, + "learning_rate": 1.2167479190506796e-05, + "loss": 0.0625, + "step": 50990 + }, + { + "epoch": 2.38, + "learning_rate": 1.216669540545201e-05, + "loss": 0.0908, + "step": 50995 + }, + { + "epoch": 2.38, + "learning_rate": 1.2165911620397224e-05, + "loss": 0.1432, + "step": 51000 + }, + { + "epoch": 2.38, + "learning_rate": 1.2165127835342436e-05, + "loss": 0.1444, + "step": 51005 + }, + { + "epoch": 2.38, + "learning_rate": 1.2164344050287652e-05, + "loss": 0.1782, + "step": 51010 + }, + { + "epoch": 2.38, + "learning_rate": 1.2163560265232864e-05, + "loss": 0.2457, + "step": 51015 + }, + { + "epoch": 2.38, + "learning_rate": 1.2162776480178078e-05, + "loss": 0.0601, + "step": 51020 + }, + { + "epoch": 2.38, + "learning_rate": 1.216199269512329e-05, + "loss": 0.1218, + "step": 51025 + }, + { + "epoch": 2.38, + "learning_rate": 1.2161208910068506e-05, + "loss": 0.059, + "step": 51030 + }, + { + "epoch": 2.38, + "learning_rate": 1.2160425125013718e-05, + "loss": 0.0545, + "step": 51035 + }, + { + "epoch": 2.38, + "learning_rate": 1.215964133995893e-05, + "loss": 0.1509, + "step": 51040 + }, + { + "epoch": 2.38, + "learning_rate": 1.2158857554904144e-05, + "loss": 0.0776, + "step": 51045 + }, + { + "epoch": 2.38, + "learning_rate": 1.2158073769849356e-05, + "loss": 0.2111, + "step": 51050 + }, + { + "epoch": 2.38, + "learning_rate": 1.2157289984794572e-05, + "loss": 0.1845, + "step": 51055 + }, + { + "epoch": 2.38, + "learning_rate": 1.2156506199739784e-05, + "loss": 0.295, + "step": 51060 + }, + { + "epoch": 2.38, + "learning_rate": 1.2155722414684998e-05, + "loss": 0.3386, + "step": 51065 + }, + { + "epoch": 2.38, + "learning_rate": 1.215493862963021e-05, + "loss": 0.0308, + "step": 51070 + }, + { + "epoch": 2.38, + "learning_rate": 1.2154154844575426e-05, + "loss": 0.0535, + "step": 51075 + }, + { + "epoch": 2.38, + "learning_rate": 1.2153371059520638e-05, + "loss": 0.0747, + "step": 51080 + }, + { + "epoch": 2.38, + "learning_rate": 1.2152587274465852e-05, + "loss": 0.0781, + "step": 51085 + }, + { + "epoch": 2.38, + "learning_rate": 1.2151803489411066e-05, + "loss": 0.074, + "step": 51090 + }, + { + "epoch": 2.38, + "learning_rate": 1.215101970435628e-05, + "loss": 0.0889, + "step": 51095 + }, + { + "epoch": 2.38, + "learning_rate": 1.2150235919301492e-05, + "loss": 0.0867, + "step": 51100 + }, + { + "epoch": 2.38, + "learning_rate": 1.2149452134246704e-05, + "loss": 0.1321, + "step": 51105 + }, + { + "epoch": 2.38, + "learning_rate": 1.214866834919192e-05, + "loss": 0.1562, + "step": 51110 + }, + { + "epoch": 2.39, + "learning_rate": 1.2147884564137132e-05, + "loss": 0.632, + "step": 51115 + }, + { + "epoch": 2.39, + "learning_rate": 1.2147100779082346e-05, + "loss": 0.0545, + "step": 51120 + }, + { + "epoch": 2.39, + "learning_rate": 1.2146316994027558e-05, + "loss": 0.0554, + "step": 51125 + }, + { + "epoch": 2.39, + "learning_rate": 1.2145533208972773e-05, + "loss": 0.0916, + "step": 51130 + }, + { + "epoch": 2.39, + "learning_rate": 1.2144749423917986e-05, + "loss": 0.0778, + "step": 51135 + }, + { + "epoch": 2.39, + "learning_rate": 1.21439656388632e-05, + "loss": 0.1447, + "step": 51140 + }, + { + "epoch": 2.39, + "learning_rate": 1.2143181853808412e-05, + "loss": 0.062, + "step": 51145 + }, + { + "epoch": 2.39, + "learning_rate": 1.2142398068753627e-05, + "loss": 0.1337, + "step": 51150 + }, + { + "epoch": 2.39, + "learning_rate": 1.214161428369884e-05, + "loss": 0.1622, + "step": 51155 + }, + { + "epoch": 2.39, + "learning_rate": 1.2140830498644054e-05, + "loss": 0.2186, + "step": 51160 + }, + { + "epoch": 2.39, + "learning_rate": 1.2140046713589266e-05, + "loss": 0.2537, + "step": 51165 + }, + { + "epoch": 2.39, + "learning_rate": 1.2139262928534478e-05, + "loss": 0.0788, + "step": 51170 + }, + { + "epoch": 2.39, + "learning_rate": 1.2138479143479694e-05, + "loss": 0.0364, + "step": 51175 + }, + { + "epoch": 2.39, + "learning_rate": 1.2137695358424906e-05, + "loss": 0.1155, + "step": 51180 + }, + { + "epoch": 2.39, + "learning_rate": 1.213691157337012e-05, + "loss": 0.0683, + "step": 51185 + }, + { + "epoch": 2.39, + "learning_rate": 1.2136127788315334e-05, + "loss": 0.1416, + "step": 51190 + }, + { + "epoch": 2.39, + "learning_rate": 1.2135344003260547e-05, + "loss": 0.0975, + "step": 51195 + }, + { + "epoch": 2.39, + "learning_rate": 1.213456021820576e-05, + "loss": 0.0998, + "step": 51200 + }, + { + "epoch": 2.39, + "learning_rate": 1.2133776433150974e-05, + "loss": 0.2315, + "step": 51205 + }, + { + "epoch": 2.39, + "learning_rate": 1.2132992648096188e-05, + "loss": 0.2483, + "step": 51210 + }, + { + "epoch": 2.39, + "learning_rate": 1.2132208863041401e-05, + "loss": 0.2255, + "step": 51215 + }, + { + "epoch": 2.39, + "learning_rate": 1.2131425077986614e-05, + "loss": 0.1239, + "step": 51220 + }, + { + "epoch": 2.39, + "learning_rate": 1.213064129293183e-05, + "loss": 0.0421, + "step": 51225 + }, + { + "epoch": 2.39, + "learning_rate": 1.2129857507877041e-05, + "loss": 0.0431, + "step": 51230 + }, + { + "epoch": 2.39, + "learning_rate": 1.2129073722822254e-05, + "loss": 0.0604, + "step": 51235 + }, + { + "epoch": 2.39, + "learning_rate": 1.2128289937767468e-05, + "loss": 0.0692, + "step": 51240 + }, + { + "epoch": 2.39, + "learning_rate": 1.212750615271268e-05, + "loss": 0.0819, + "step": 51245 + }, + { + "epoch": 2.39, + "learning_rate": 1.2126722367657895e-05, + "loss": 0.0985, + "step": 51250 + }, + { + "epoch": 2.39, + "learning_rate": 1.2125938582603108e-05, + "loss": 0.1348, + "step": 51255 + }, + { + "epoch": 2.39, + "learning_rate": 1.2125154797548321e-05, + "loss": 0.184, + "step": 51260 + }, + { + "epoch": 2.39, + "learning_rate": 1.2124371012493534e-05, + "loss": 0.3057, + "step": 51265 + }, + { + "epoch": 2.39, + "learning_rate": 1.212358722743875e-05, + "loss": 0.088, + "step": 51270 + }, + { + "epoch": 2.39, + "learning_rate": 1.2122803442383962e-05, + "loss": 0.0724, + "step": 51275 + }, + { + "epoch": 2.39, + "learning_rate": 1.2122019657329175e-05, + "loss": 0.0378, + "step": 51280 + }, + { + "epoch": 2.39, + "learning_rate": 1.2121235872274388e-05, + "loss": 0.0781, + "step": 51285 + }, + { + "epoch": 2.39, + "learning_rate": 1.2120452087219603e-05, + "loss": 0.1908, + "step": 51290 + }, + { + "epoch": 2.39, + "learning_rate": 1.2119668302164815e-05, + "loss": 0.1843, + "step": 51295 + }, + { + "epoch": 2.39, + "learning_rate": 1.2118884517110028e-05, + "loss": 0.085, + "step": 51300 + }, + { + "epoch": 2.39, + "learning_rate": 1.2118100732055242e-05, + "loss": 0.1861, + "step": 51305 + }, + { + "epoch": 2.39, + "learning_rate": 1.2117316947000455e-05, + "loss": 0.252, + "step": 51310 + }, + { + "epoch": 2.39, + "learning_rate": 1.211653316194567e-05, + "loss": 0.341, + "step": 51315 + }, + { + "epoch": 2.39, + "learning_rate": 1.2115749376890882e-05, + "loss": 0.054, + "step": 51320 + }, + { + "epoch": 2.39, + "learning_rate": 1.2114965591836097e-05, + "loss": 0.0834, + "step": 51325 + }, + { + "epoch": 2.4, + "learning_rate": 1.211418180678131e-05, + "loss": 0.0253, + "step": 51330 + }, + { + "epoch": 2.4, + "learning_rate": 1.2113398021726523e-05, + "loss": 0.0387, + "step": 51335 + }, + { + "epoch": 2.4, + "learning_rate": 1.2112614236671736e-05, + "loss": 0.058, + "step": 51340 + }, + { + "epoch": 2.4, + "learning_rate": 1.2111830451616951e-05, + "loss": 0.0874, + "step": 51345 + }, + { + "epoch": 2.4, + "learning_rate": 1.2111046666562163e-05, + "loss": 0.0858, + "step": 51350 + }, + { + "epoch": 2.4, + "learning_rate": 1.2110262881507377e-05, + "loss": 0.1828, + "step": 51355 + }, + { + "epoch": 2.4, + "learning_rate": 1.210947909645259e-05, + "loss": 0.2306, + "step": 51360 + }, + { + "epoch": 2.4, + "learning_rate": 1.2108695311397802e-05, + "loss": 0.3866, + "step": 51365 + }, + { + "epoch": 2.4, + "learning_rate": 1.2107911526343017e-05, + "loss": 0.0208, + "step": 51370 + }, + { + "epoch": 2.4, + "learning_rate": 1.210712774128823e-05, + "loss": 0.0327, + "step": 51375 + }, + { + "epoch": 2.4, + "learning_rate": 1.2106343956233443e-05, + "loss": 0.0503, + "step": 51380 + }, + { + "epoch": 2.4, + "learning_rate": 1.2105560171178656e-05, + "loss": 0.1202, + "step": 51385 + }, + { + "epoch": 2.4, + "learning_rate": 1.2104776386123871e-05, + "loss": 0.1188, + "step": 51390 + }, + { + "epoch": 2.4, + "learning_rate": 1.2103992601069083e-05, + "loss": 0.0703, + "step": 51395 + }, + { + "epoch": 2.4, + "learning_rate": 1.2103208816014297e-05, + "loss": 0.1258, + "step": 51400 + }, + { + "epoch": 2.4, + "learning_rate": 1.2102425030959511e-05, + "loss": 0.2325, + "step": 51405 + }, + { + "epoch": 2.4, + "learning_rate": 1.2101641245904725e-05, + "loss": 0.1977, + "step": 51410 + }, + { + "epoch": 2.4, + "learning_rate": 1.2100857460849937e-05, + "loss": 0.3474, + "step": 51415 + }, + { + "epoch": 2.4, + "learning_rate": 1.2100073675795151e-05, + "loss": 0.077, + "step": 51420 + }, + { + "epoch": 2.4, + "learning_rate": 1.2099289890740365e-05, + "loss": 0.0395, + "step": 51425 + }, + { + "epoch": 2.4, + "learning_rate": 1.2098506105685577e-05, + "loss": 0.0626, + "step": 51430 + }, + { + "epoch": 2.4, + "learning_rate": 1.2097722320630791e-05, + "loss": 0.0527, + "step": 51435 + }, + { + "epoch": 2.4, + "learning_rate": 1.2096938535576003e-05, + "loss": 0.1469, + "step": 51440 + }, + { + "epoch": 2.4, + "learning_rate": 1.2096154750521219e-05, + "loss": 0.1648, + "step": 51445 + }, + { + "epoch": 2.4, + "learning_rate": 1.2095370965466431e-05, + "loss": 0.1252, + "step": 51450 + }, + { + "epoch": 2.4, + "learning_rate": 1.2094587180411645e-05, + "loss": 0.199, + "step": 51455 + }, + { + "epoch": 2.4, + "learning_rate": 1.2093803395356857e-05, + "loss": 0.252, + "step": 51460 + }, + { + "epoch": 2.4, + "learning_rate": 1.2093019610302073e-05, + "loss": 0.2616, + "step": 51465 + }, + { + "epoch": 2.4, + "learning_rate": 1.2092235825247285e-05, + "loss": 0.0359, + "step": 51470 + }, + { + "epoch": 2.4, + "learning_rate": 1.2091452040192499e-05, + "loss": 0.0569, + "step": 51475 + }, + { + "epoch": 2.4, + "learning_rate": 1.2090668255137711e-05, + "loss": 0.0824, + "step": 51480 + }, + { + "epoch": 2.4, + "learning_rate": 1.2089884470082927e-05, + "loss": 0.065, + "step": 51485 + }, + { + "epoch": 2.4, + "learning_rate": 1.2089100685028139e-05, + "loss": 0.0876, + "step": 51490 + }, + { + "epoch": 2.4, + "learning_rate": 1.2088316899973351e-05, + "loss": 0.1345, + "step": 51495 + }, + { + "epoch": 2.4, + "learning_rate": 1.2087533114918565e-05, + "loss": 0.0907, + "step": 51500 + }, + { + "epoch": 2.4, + "learning_rate": 1.2086749329863779e-05, + "loss": 0.3917, + "step": 51505 + }, + { + "epoch": 2.4, + "learning_rate": 1.2085965544808993e-05, + "loss": 0.3124, + "step": 51510 + }, + { + "epoch": 2.4, + "learning_rate": 1.2085181759754205e-05, + "loss": 0.3424, + "step": 51515 + }, + { + "epoch": 2.4, + "learning_rate": 1.208439797469942e-05, + "loss": 0.0339, + "step": 51520 + }, + { + "epoch": 2.4, + "learning_rate": 1.2083614189644633e-05, + "loss": 0.0568, + "step": 51525 + }, + { + "epoch": 2.4, + "learning_rate": 1.2082830404589847e-05, + "loss": 0.1319, + "step": 51530 + }, + { + "epoch": 2.4, + "learning_rate": 1.208204661953506e-05, + "loss": 0.0855, + "step": 51535 + }, + { + "epoch": 2.4, + "learning_rate": 1.2081262834480275e-05, + "loss": 0.0986, + "step": 51540 + }, + { + "epoch": 2.41, + "learning_rate": 1.2080479049425487e-05, + "loss": 0.1848, + "step": 51545 + }, + { + "epoch": 2.41, + "learning_rate": 1.2079695264370701e-05, + "loss": 0.1471, + "step": 51550 + }, + { + "epoch": 2.41, + "learning_rate": 1.2078911479315913e-05, + "loss": 0.1345, + "step": 51555 + }, + { + "epoch": 2.41, + "learning_rate": 1.2078127694261125e-05, + "loss": 0.1926, + "step": 51560 + }, + { + "epoch": 2.41, + "learning_rate": 1.2077343909206341e-05, + "loss": 0.3924, + "step": 51565 + }, + { + "epoch": 2.41, + "learning_rate": 1.2076560124151553e-05, + "loss": 0.0675, + "step": 51570 + }, + { + "epoch": 2.41, + "learning_rate": 1.2075776339096767e-05, + "loss": 0.0411, + "step": 51575 + }, + { + "epoch": 2.41, + "learning_rate": 1.207499255404198e-05, + "loss": 0.0543, + "step": 51580 + }, + { + "epoch": 2.41, + "learning_rate": 1.2074208768987195e-05, + "loss": 0.148, + "step": 51585 + }, + { + "epoch": 2.41, + "learning_rate": 1.2073424983932407e-05, + "loss": 0.0893, + "step": 51590 + }, + { + "epoch": 2.41, + "learning_rate": 1.2072641198877621e-05, + "loss": 0.1574, + "step": 51595 + }, + { + "epoch": 2.41, + "learning_rate": 1.2071857413822833e-05, + "loss": 0.1502, + "step": 51600 + }, + { + "epoch": 2.41, + "learning_rate": 1.2071073628768049e-05, + "loss": 0.2346, + "step": 51605 + }, + { + "epoch": 2.41, + "learning_rate": 1.2070289843713261e-05, + "loss": 0.2326, + "step": 51610 + }, + { + "epoch": 2.41, + "learning_rate": 1.2069506058658475e-05, + "loss": 0.3411, + "step": 51615 + }, + { + "epoch": 2.41, + "learning_rate": 1.2068722273603689e-05, + "loss": 0.0766, + "step": 51620 + }, + { + "epoch": 2.41, + "learning_rate": 1.2067938488548901e-05, + "loss": 0.022, + "step": 51625 + }, + { + "epoch": 2.41, + "learning_rate": 1.2067154703494115e-05, + "loss": 0.0682, + "step": 51630 + }, + { + "epoch": 2.41, + "learning_rate": 1.2066370918439327e-05, + "loss": 0.0439, + "step": 51635 + }, + { + "epoch": 2.41, + "learning_rate": 1.2065587133384543e-05, + "loss": 0.0751, + "step": 51640 + }, + { + "epoch": 2.41, + "learning_rate": 1.2064803348329755e-05, + "loss": 0.0661, + "step": 51645 + }, + { + "epoch": 2.41, + "learning_rate": 1.2064019563274969e-05, + "loss": 0.215, + "step": 51650 + }, + { + "epoch": 2.41, + "learning_rate": 1.2063235778220181e-05, + "loss": 0.2804, + "step": 51655 + }, + { + "epoch": 2.41, + "learning_rate": 1.2062451993165397e-05, + "loss": 0.3444, + "step": 51660 + }, + { + "epoch": 2.41, + "learning_rate": 1.2061668208110609e-05, + "loss": 0.2795, + "step": 51665 + }, + { + "epoch": 2.41, + "learning_rate": 1.2060884423055823e-05, + "loss": 0.0677, + "step": 51670 + }, + { + "epoch": 2.41, + "learning_rate": 1.2060100638001035e-05, + "loss": 0.0437, + "step": 51675 + }, + { + "epoch": 2.41, + "learning_rate": 1.205931685294625e-05, + "loss": 0.0713, + "step": 51680 + }, + { + "epoch": 2.41, + "learning_rate": 1.2058533067891463e-05, + "loss": 0.032, + "step": 51685 + }, + { + "epoch": 2.41, + "learning_rate": 1.2057749282836675e-05, + "loss": 0.0629, + "step": 51690 + }, + { + "epoch": 2.41, + "learning_rate": 1.2056965497781889e-05, + "loss": 0.1758, + "step": 51695 + }, + { + "epoch": 2.41, + "learning_rate": 1.2056181712727101e-05, + "loss": 0.0785, + "step": 51700 + }, + { + "epoch": 2.41, + "learning_rate": 1.2055397927672317e-05, + "loss": 0.1533, + "step": 51705 + }, + { + "epoch": 2.41, + "learning_rate": 1.2054614142617529e-05, + "loss": 0.249, + "step": 51710 + }, + { + "epoch": 2.41, + "learning_rate": 1.2053830357562743e-05, + "loss": 0.186, + "step": 51715 + }, + { + "epoch": 2.41, + "learning_rate": 1.2053046572507957e-05, + "loss": 0.01, + "step": 51720 + }, + { + "epoch": 2.41, + "learning_rate": 1.205226278745317e-05, + "loss": 0.0311, + "step": 51725 + }, + { + "epoch": 2.41, + "learning_rate": 1.2051479002398383e-05, + "loss": 0.0707, + "step": 51730 + }, + { + "epoch": 2.41, + "learning_rate": 1.2050695217343597e-05, + "loss": 0.0547, + "step": 51735 + }, + { + "epoch": 2.41, + "learning_rate": 1.204991143228881e-05, + "loss": 0.1202, + "step": 51740 + }, + { + "epoch": 2.41, + "learning_rate": 1.2049127647234025e-05, + "loss": 0.0915, + "step": 51745 + }, + { + "epoch": 2.41, + "learning_rate": 1.2048343862179237e-05, + "loss": 0.2305, + "step": 51750 + }, + { + "epoch": 2.41, + "learning_rate": 1.2047560077124449e-05, + "loss": 0.2337, + "step": 51755 + }, + { + "epoch": 2.42, + "learning_rate": 1.2046776292069665e-05, + "loss": 0.2104, + "step": 51760 + }, + { + "epoch": 2.42, + "learning_rate": 1.2045992507014877e-05, + "loss": 0.2833, + "step": 51765 + }, + { + "epoch": 2.42, + "learning_rate": 1.204520872196009e-05, + "loss": 0.022, + "step": 51770 + }, + { + "epoch": 2.42, + "learning_rate": 1.2044424936905303e-05, + "loss": 0.0689, + "step": 51775 + }, + { + "epoch": 2.42, + "learning_rate": 1.2043641151850519e-05, + "loss": 0.142, + "step": 51780 + }, + { + "epoch": 2.42, + "learning_rate": 1.204285736679573e-05, + "loss": 0.1489, + "step": 51785 + }, + { + "epoch": 2.42, + "learning_rate": 1.2042073581740945e-05, + "loss": 0.0738, + "step": 51790 + }, + { + "epoch": 2.42, + "learning_rate": 1.2041289796686157e-05, + "loss": 0.114, + "step": 51795 + }, + { + "epoch": 2.42, + "learning_rate": 1.2040506011631372e-05, + "loss": 0.1686, + "step": 51800 + }, + { + "epoch": 2.42, + "learning_rate": 1.2039722226576585e-05, + "loss": 0.1656, + "step": 51805 + }, + { + "epoch": 2.42, + "learning_rate": 1.2038938441521799e-05, + "loss": 0.1686, + "step": 51810 + }, + { + "epoch": 2.42, + "learning_rate": 1.203815465646701e-05, + "loss": 0.2587, + "step": 51815 + }, + { + "epoch": 2.42, + "learning_rate": 1.2037370871412225e-05, + "loss": 0.0502, + "step": 51820 + }, + { + "epoch": 2.42, + "learning_rate": 1.2036587086357439e-05, + "loss": 0.0239, + "step": 51825 + }, + { + "epoch": 2.42, + "learning_rate": 1.203580330130265e-05, + "loss": 0.0367, + "step": 51830 + }, + { + "epoch": 2.42, + "learning_rate": 1.2035019516247865e-05, + "loss": 0.0598, + "step": 51835 + }, + { + "epoch": 2.42, + "learning_rate": 1.2034235731193079e-05, + "loss": 0.1056, + "step": 51840 + }, + { + "epoch": 2.42, + "learning_rate": 1.2033451946138293e-05, + "loss": 0.1451, + "step": 51845 + }, + { + "epoch": 2.42, + "learning_rate": 1.2032668161083505e-05, + "loss": 0.174, + "step": 51850 + }, + { + "epoch": 2.42, + "learning_rate": 1.203188437602872e-05, + "loss": 0.199, + "step": 51855 + }, + { + "epoch": 2.42, + "learning_rate": 1.2031100590973933e-05, + "loss": 0.3578, + "step": 51860 + }, + { + "epoch": 2.42, + "learning_rate": 1.2030316805919146e-05, + "loss": 0.3888, + "step": 51865 + }, + { + "epoch": 2.42, + "learning_rate": 1.2029533020864359e-05, + "loss": 0.0432, + "step": 51870 + }, + { + "epoch": 2.42, + "learning_rate": 1.2028749235809574e-05, + "loss": 0.0301, + "step": 51875 + }, + { + "epoch": 2.42, + "learning_rate": 1.2027965450754787e-05, + "loss": 0.0577, + "step": 51880 + }, + { + "epoch": 2.42, + "learning_rate": 1.2027181665699999e-05, + "loss": 0.0879, + "step": 51885 + }, + { + "epoch": 2.42, + "learning_rate": 1.2026397880645213e-05, + "loss": 0.1147, + "step": 51890 + }, + { + "epoch": 2.42, + "learning_rate": 1.2025614095590425e-05, + "loss": 0.1604, + "step": 51895 + }, + { + "epoch": 2.42, + "learning_rate": 1.202483031053564e-05, + "loss": 0.0468, + "step": 51900 + }, + { + "epoch": 2.42, + "learning_rate": 1.2024046525480853e-05, + "loss": 0.1913, + "step": 51905 + }, + { + "epoch": 2.42, + "learning_rate": 1.2023262740426067e-05, + "loss": 0.2669, + "step": 51910 + }, + { + "epoch": 2.42, + "learning_rate": 1.2022478955371279e-05, + "loss": 0.17, + "step": 51915 + }, + { + "epoch": 2.42, + "learning_rate": 1.2021695170316494e-05, + "loss": 0.1219, + "step": 51920 + }, + { + "epoch": 2.42, + "learning_rate": 1.2020911385261707e-05, + "loss": 0.0375, + "step": 51925 + }, + { + "epoch": 2.42, + "learning_rate": 1.202012760020692e-05, + "loss": 0.0667, + "step": 51930 + }, + { + "epoch": 2.42, + "learning_rate": 1.2019343815152134e-05, + "loss": 0.0615, + "step": 51935 + }, + { + "epoch": 2.42, + "learning_rate": 1.2018560030097348e-05, + "loss": 0.0947, + "step": 51940 + }, + { + "epoch": 2.42, + "learning_rate": 1.201777624504256e-05, + "loss": 0.1589, + "step": 51945 + }, + { + "epoch": 2.42, + "learning_rate": 1.2016992459987773e-05, + "loss": 0.0904, + "step": 51950 + }, + { + "epoch": 2.42, + "learning_rate": 1.2016208674932988e-05, + "loss": 0.2414, + "step": 51955 + }, + { + "epoch": 2.42, + "learning_rate": 1.20154248898782e-05, + "loss": 0.3085, + "step": 51960 + }, + { + "epoch": 2.42, + "learning_rate": 1.2014641104823414e-05, + "loss": 0.3275, + "step": 51965 + }, + { + "epoch": 2.42, + "learning_rate": 1.2013857319768627e-05, + "loss": 0.0334, + "step": 51970 + }, + { + "epoch": 2.43, + "learning_rate": 1.2013073534713842e-05, + "loss": 0.0268, + "step": 51975 + }, + { + "epoch": 2.43, + "learning_rate": 1.2012289749659054e-05, + "loss": 0.1292, + "step": 51980 + }, + { + "epoch": 2.43, + "learning_rate": 1.2011505964604268e-05, + "loss": 0.0277, + "step": 51985 + }, + { + "epoch": 2.43, + "learning_rate": 1.201072217954948e-05, + "loss": 0.0855, + "step": 51990 + }, + { + "epoch": 2.43, + "learning_rate": 1.2009938394494696e-05, + "loss": 0.0944, + "step": 51995 + }, + { + "epoch": 2.43, + "learning_rate": 1.2009154609439908e-05, + "loss": 0.0794, + "step": 52000 + }, + { + "epoch": 2.43, + "learning_rate": 1.2008370824385122e-05, + "loss": 0.1485, + "step": 52005 + }, + { + "epoch": 2.43, + "learning_rate": 1.2007587039330335e-05, + "loss": 0.2271, + "step": 52010 + }, + { + "epoch": 2.43, + "learning_rate": 1.2006803254275547e-05, + "loss": 0.2456, + "step": 52015 + }, + { + "epoch": 2.43, + "learning_rate": 1.2006019469220762e-05, + "loss": 0.085, + "step": 52020 + }, + { + "epoch": 2.43, + "learning_rate": 1.2005235684165975e-05, + "loss": 0.0676, + "step": 52025 + }, + { + "epoch": 2.43, + "learning_rate": 1.2004451899111188e-05, + "loss": 0.0165, + "step": 52030 + }, + { + "epoch": 2.43, + "learning_rate": 1.2003668114056402e-05, + "loss": 0.0606, + "step": 52035 + }, + { + "epoch": 2.43, + "learning_rate": 1.2002884329001616e-05, + "loss": 0.1106, + "step": 52040 + }, + { + "epoch": 2.43, + "learning_rate": 1.2002100543946828e-05, + "loss": 0.1506, + "step": 52045 + }, + { + "epoch": 2.43, + "learning_rate": 1.2001316758892042e-05, + "loss": 0.0898, + "step": 52050 + }, + { + "epoch": 2.43, + "learning_rate": 1.2000532973837256e-05, + "loss": 0.1746, + "step": 52055 + }, + { + "epoch": 2.43, + "learning_rate": 1.199974918878247e-05, + "loss": 0.251, + "step": 52060 + }, + { + "epoch": 2.43, + "learning_rate": 1.1998965403727682e-05, + "loss": 0.2887, + "step": 52065 + }, + { + "epoch": 2.43, + "learning_rate": 1.1998181618672898e-05, + "loss": 0.0429, + "step": 52070 + }, + { + "epoch": 2.43, + "learning_rate": 1.199739783361811e-05, + "loss": 0.0352, + "step": 52075 + }, + { + "epoch": 2.43, + "learning_rate": 1.1996614048563322e-05, + "loss": 0.1168, + "step": 52080 + }, + { + "epoch": 2.43, + "learning_rate": 1.1995830263508536e-05, + "loss": 0.0959, + "step": 52085 + }, + { + "epoch": 2.43, + "learning_rate": 1.1995046478453749e-05, + "loss": 0.154, + "step": 52090 + }, + { + "epoch": 2.43, + "learning_rate": 1.1994262693398964e-05, + "loss": 0.0716, + "step": 52095 + }, + { + "epoch": 2.43, + "learning_rate": 1.1993478908344176e-05, + "loss": 0.1781, + "step": 52100 + }, + { + "epoch": 2.43, + "learning_rate": 1.199269512328939e-05, + "loss": 0.2264, + "step": 52105 + }, + { + "epoch": 2.43, + "learning_rate": 1.1991911338234602e-05, + "loss": 0.2278, + "step": 52110 + }, + { + "epoch": 2.43, + "learning_rate": 1.1991127553179818e-05, + "loss": 0.3195, + "step": 52115 + }, + { + "epoch": 2.43, + "learning_rate": 1.199034376812503e-05, + "loss": 0.0321, + "step": 52120 + }, + { + "epoch": 2.43, + "learning_rate": 1.1989559983070244e-05, + "loss": 0.0277, + "step": 52125 + }, + { + "epoch": 2.43, + "learning_rate": 1.1988776198015456e-05, + "loss": 0.0288, + "step": 52130 + }, + { + "epoch": 2.43, + "learning_rate": 1.1987992412960672e-05, + "loss": 0.0316, + "step": 52135 + }, + { + "epoch": 2.43, + "learning_rate": 1.1987208627905884e-05, + "loss": 0.0851, + "step": 52140 + }, + { + "epoch": 2.43, + "learning_rate": 1.1986424842851096e-05, + "loss": 0.1352, + "step": 52145 + }, + { + "epoch": 2.43, + "learning_rate": 1.198564105779631e-05, + "loss": 0.1331, + "step": 52150 + }, + { + "epoch": 2.43, + "learning_rate": 1.1984857272741524e-05, + "loss": 0.1579, + "step": 52155 + }, + { + "epoch": 2.43, + "learning_rate": 1.1984073487686738e-05, + "loss": 0.2474, + "step": 52160 + }, + { + "epoch": 2.43, + "learning_rate": 1.198328970263195e-05, + "loss": 0.2848, + "step": 52165 + }, + { + "epoch": 2.43, + "learning_rate": 1.1982505917577166e-05, + "loss": 0.0673, + "step": 52170 + }, + { + "epoch": 2.43, + "learning_rate": 1.1981722132522378e-05, + "loss": 0.0426, + "step": 52175 + }, + { + "epoch": 2.43, + "learning_rate": 1.1980938347467592e-05, + "loss": 0.0501, + "step": 52180 + }, + { + "epoch": 2.44, + "learning_rate": 1.1980154562412804e-05, + "loss": 0.0576, + "step": 52185 + }, + { + "epoch": 2.44, + "learning_rate": 1.197937077735802e-05, + "loss": 0.105, + "step": 52190 + }, + { + "epoch": 2.44, + "learning_rate": 1.1978586992303232e-05, + "loss": 0.1487, + "step": 52195 + }, + { + "epoch": 2.44, + "learning_rate": 1.1977803207248446e-05, + "loss": 0.063, + "step": 52200 + }, + { + "epoch": 2.44, + "learning_rate": 1.1977019422193658e-05, + "loss": 0.1358, + "step": 52205 + }, + { + "epoch": 2.44, + "learning_rate": 1.197623563713887e-05, + "loss": 0.4102, + "step": 52210 + }, + { + "epoch": 2.44, + "learning_rate": 1.1975451852084086e-05, + "loss": 0.2808, + "step": 52215 + }, + { + "epoch": 2.44, + "learning_rate": 1.1974668067029298e-05, + "loss": 0.1002, + "step": 52220 + }, + { + "epoch": 2.44, + "learning_rate": 1.1973884281974512e-05, + "loss": 0.0695, + "step": 52225 + }, + { + "epoch": 2.44, + "learning_rate": 1.1973100496919724e-05, + "loss": 0.0389, + "step": 52230 + }, + { + "epoch": 2.44, + "learning_rate": 1.197231671186494e-05, + "loss": 0.0591, + "step": 52235 + }, + { + "epoch": 2.44, + "learning_rate": 1.1971532926810152e-05, + "loss": 0.1046, + "step": 52240 + }, + { + "epoch": 2.44, + "learning_rate": 1.1970749141755366e-05, + "loss": 0.0821, + "step": 52245 + }, + { + "epoch": 2.44, + "learning_rate": 1.196996535670058e-05, + "loss": 0.1596, + "step": 52250 + }, + { + "epoch": 2.44, + "learning_rate": 1.1969181571645794e-05, + "loss": 0.0936, + "step": 52255 + }, + { + "epoch": 2.44, + "learning_rate": 1.1968397786591006e-05, + "loss": 0.1988, + "step": 52260 + }, + { + "epoch": 2.44, + "learning_rate": 1.196761400153622e-05, + "loss": 0.3196, + "step": 52265 + }, + { + "epoch": 2.44, + "learning_rate": 1.1966830216481434e-05, + "loss": 0.0698, + "step": 52270 + }, + { + "epoch": 2.44, + "learning_rate": 1.1966046431426646e-05, + "loss": 0.0393, + "step": 52275 + }, + { + "epoch": 2.44, + "learning_rate": 1.196526264637186e-05, + "loss": 0.0488, + "step": 52280 + }, + { + "epoch": 2.44, + "learning_rate": 1.1964478861317072e-05, + "loss": 0.096, + "step": 52285 + }, + { + "epoch": 2.44, + "learning_rate": 1.1963695076262288e-05, + "loss": 0.0349, + "step": 52290 + }, + { + "epoch": 2.44, + "learning_rate": 1.19629112912075e-05, + "loss": 0.2553, + "step": 52295 + }, + { + "epoch": 2.44, + "learning_rate": 1.1962127506152714e-05, + "loss": 0.1327, + "step": 52300 + }, + { + "epoch": 2.44, + "learning_rate": 1.1961343721097926e-05, + "loss": 0.1643, + "step": 52305 + }, + { + "epoch": 2.44, + "learning_rate": 1.1960559936043142e-05, + "loss": 0.2194, + "step": 52310 + }, + { + "epoch": 2.44, + "learning_rate": 1.1959776150988354e-05, + "loss": 0.3497, + "step": 52315 + }, + { + "epoch": 2.44, + "learning_rate": 1.1958992365933568e-05, + "loss": 0.0641, + "step": 52320 + }, + { + "epoch": 2.44, + "learning_rate": 1.195820858087878e-05, + "loss": 0.0347, + "step": 52325 + }, + { + "epoch": 2.44, + "learning_rate": 1.1957424795823996e-05, + "loss": 0.0945, + "step": 52330 + }, + { + "epoch": 2.44, + "learning_rate": 1.1956641010769208e-05, + "loss": 0.0739, + "step": 52335 + }, + { + "epoch": 2.44, + "learning_rate": 1.195585722571442e-05, + "loss": 0.1243, + "step": 52340 + }, + { + "epoch": 2.44, + "learning_rate": 1.1955073440659634e-05, + "loss": 0.1303, + "step": 52345 + }, + { + "epoch": 2.44, + "learning_rate": 1.1954289655604848e-05, + "loss": 0.1913, + "step": 52350 + }, + { + "epoch": 2.44, + "learning_rate": 1.1953505870550062e-05, + "loss": 0.1298, + "step": 52355 + }, + { + "epoch": 2.44, + "learning_rate": 1.1952722085495274e-05, + "loss": 0.2293, + "step": 52360 + }, + { + "epoch": 2.44, + "learning_rate": 1.1951938300440488e-05, + "loss": 0.2134, + "step": 52365 + }, + { + "epoch": 2.44, + "learning_rate": 1.1951154515385702e-05, + "loss": 0.0677, + "step": 52370 + }, + { + "epoch": 2.44, + "learning_rate": 1.1950370730330916e-05, + "loss": 0.0779, + "step": 52375 + }, + { + "epoch": 2.44, + "learning_rate": 1.1949586945276128e-05, + "loss": 0.0791, + "step": 52380 + }, + { + "epoch": 2.44, + "learning_rate": 1.1948803160221344e-05, + "loss": 0.086, + "step": 52385 + }, + { + "epoch": 2.44, + "learning_rate": 1.1948019375166556e-05, + "loss": 0.1275, + "step": 52390 + }, + { + "epoch": 2.44, + "learning_rate": 1.194723559011177e-05, + "loss": 0.0879, + "step": 52395 + }, + { + "epoch": 2.45, + "learning_rate": 1.1946451805056982e-05, + "loss": 0.133, + "step": 52400 + }, + { + "epoch": 2.45, + "learning_rate": 1.1945668020002194e-05, + "loss": 0.2451, + "step": 52405 + }, + { + "epoch": 2.45, + "learning_rate": 1.194488423494741e-05, + "loss": 0.2477, + "step": 52410 + }, + { + "epoch": 2.45, + "learning_rate": 1.1944100449892622e-05, + "loss": 0.4061, + "step": 52415 + }, + { + "epoch": 2.45, + "learning_rate": 1.1943316664837836e-05, + "loss": 0.0401, + "step": 52420 + }, + { + "epoch": 2.45, + "learning_rate": 1.1942532879783048e-05, + "loss": 0.0241, + "step": 52425 + }, + { + "epoch": 2.45, + "learning_rate": 1.1941749094728264e-05, + "loss": 0.0777, + "step": 52430 + }, + { + "epoch": 2.45, + "learning_rate": 1.1940965309673476e-05, + "loss": 0.065, + "step": 52435 + }, + { + "epoch": 2.45, + "learning_rate": 1.194018152461869e-05, + "loss": 0.1047, + "step": 52440 + }, + { + "epoch": 2.45, + "learning_rate": 1.1939397739563902e-05, + "loss": 0.0401, + "step": 52445 + }, + { + "epoch": 2.45, + "learning_rate": 1.1938613954509118e-05, + "loss": 0.0681, + "step": 52450 + }, + { + "epoch": 2.45, + "learning_rate": 1.193783016945433e-05, + "loss": 0.1758, + "step": 52455 + }, + { + "epoch": 2.45, + "learning_rate": 1.1937046384399544e-05, + "loss": 0.1733, + "step": 52460 + }, + { + "epoch": 2.45, + "learning_rate": 1.1936262599344756e-05, + "loss": 0.3287, + "step": 52465 + }, + { + "epoch": 2.45, + "learning_rate": 1.193547881428997e-05, + "loss": 0.049, + "step": 52470 + }, + { + "epoch": 2.45, + "learning_rate": 1.1934695029235184e-05, + "loss": 0.0322, + "step": 52475 + }, + { + "epoch": 2.45, + "learning_rate": 1.1933911244180396e-05, + "loss": 0.069, + "step": 52480 + }, + { + "epoch": 2.45, + "learning_rate": 1.1933127459125612e-05, + "loss": 0.0482, + "step": 52485 + }, + { + "epoch": 2.45, + "learning_rate": 1.1932343674070824e-05, + "loss": 0.1658, + "step": 52490 + }, + { + "epoch": 2.45, + "learning_rate": 1.1931559889016038e-05, + "loss": 0.1137, + "step": 52495 + }, + { + "epoch": 2.45, + "learning_rate": 1.193077610396125e-05, + "loss": 0.2187, + "step": 52500 + }, + { + "epoch": 2.45, + "learning_rate": 1.1929992318906465e-05, + "loss": 0.1515, + "step": 52505 + }, + { + "epoch": 2.45, + "learning_rate": 1.1929208533851678e-05, + "loss": 0.307, + "step": 52510 + }, + { + "epoch": 2.45, + "learning_rate": 1.1928424748796892e-05, + "loss": 0.3527, + "step": 52515 + }, + { + "epoch": 2.45, + "learning_rate": 1.1927640963742104e-05, + "loss": 0.0675, + "step": 52520 + }, + { + "epoch": 2.45, + "learning_rate": 1.192685717868732e-05, + "loss": 0.0585, + "step": 52525 + }, + { + "epoch": 2.45, + "learning_rate": 1.1926073393632532e-05, + "loss": 0.0977, + "step": 52530 + }, + { + "epoch": 2.45, + "learning_rate": 1.1925289608577744e-05, + "loss": 0.0608, + "step": 52535 + }, + { + "epoch": 2.45, + "learning_rate": 1.1924505823522958e-05, + "loss": 0.061, + "step": 52540 + }, + { + "epoch": 2.45, + "learning_rate": 1.192372203846817e-05, + "loss": 0.0563, + "step": 52545 + }, + { + "epoch": 2.45, + "learning_rate": 1.1922938253413386e-05, + "loss": 0.1041, + "step": 52550 + }, + { + "epoch": 2.45, + "learning_rate": 1.1922154468358598e-05, + "loss": 0.1776, + "step": 52555 + }, + { + "epoch": 2.45, + "learning_rate": 1.1921370683303812e-05, + "loss": 0.2721, + "step": 52560 + }, + { + "epoch": 2.45, + "learning_rate": 1.1920586898249026e-05, + "loss": 0.2447, + "step": 52565 + }, + { + "epoch": 2.45, + "learning_rate": 1.191980311319424e-05, + "loss": 0.0822, + "step": 52570 + }, + { + "epoch": 2.45, + "learning_rate": 1.1919019328139452e-05, + "loss": 0.0929, + "step": 52575 + }, + { + "epoch": 2.45, + "learning_rate": 1.1918235543084666e-05, + "loss": 0.0734, + "step": 52580 + }, + { + "epoch": 2.45, + "learning_rate": 1.191745175802988e-05, + "loss": 0.0762, + "step": 52585 + }, + { + "epoch": 2.45, + "learning_rate": 1.1916667972975093e-05, + "loss": 0.1376, + "step": 52590 + }, + { + "epoch": 2.45, + "learning_rate": 1.1915884187920306e-05, + "loss": 0.1607, + "step": 52595 + }, + { + "epoch": 2.45, + "learning_rate": 1.1915100402865518e-05, + "loss": 0.2222, + "step": 52600 + }, + { + "epoch": 2.45, + "learning_rate": 1.1914316617810733e-05, + "loss": 0.2216, + "step": 52605 + }, + { + "epoch": 2.45, + "learning_rate": 1.1913532832755946e-05, + "loss": 0.2515, + "step": 52610 + }, + { + "epoch": 2.46, + "learning_rate": 1.191274904770116e-05, + "loss": 0.3711, + "step": 52615 + }, + { + "epoch": 2.46, + "learning_rate": 1.1911965262646372e-05, + "loss": 0.0333, + "step": 52620 + }, + { + "epoch": 2.46, + "learning_rate": 1.1911181477591587e-05, + "loss": 0.0515, + "step": 52625 + }, + { + "epoch": 2.46, + "learning_rate": 1.19103976925368e-05, + "loss": 0.0941, + "step": 52630 + }, + { + "epoch": 2.46, + "learning_rate": 1.1909613907482013e-05, + "loss": 0.1234, + "step": 52635 + }, + { + "epoch": 2.46, + "learning_rate": 1.1908830122427226e-05, + "loss": 0.0329, + "step": 52640 + }, + { + "epoch": 2.46, + "learning_rate": 1.1908046337372441e-05, + "loss": 0.1261, + "step": 52645 + }, + { + "epoch": 2.46, + "learning_rate": 1.1907262552317653e-05, + "loss": 0.1531, + "step": 52650 + }, + { + "epoch": 2.46, + "learning_rate": 1.1906478767262867e-05, + "loss": 0.1486, + "step": 52655 + }, + { + "epoch": 2.46, + "learning_rate": 1.190569498220808e-05, + "loss": 0.2458, + "step": 52660 + }, + { + "epoch": 2.46, + "learning_rate": 1.1904911197153293e-05, + "loss": 0.3166, + "step": 52665 + }, + { + "epoch": 2.46, + "learning_rate": 1.1904127412098507e-05, + "loss": 0.0304, + "step": 52670 + }, + { + "epoch": 2.46, + "learning_rate": 1.190334362704372e-05, + "loss": 0.0736, + "step": 52675 + }, + { + "epoch": 2.46, + "learning_rate": 1.1902559841988934e-05, + "loss": 0.0754, + "step": 52680 + }, + { + "epoch": 2.46, + "learning_rate": 1.1901776056934147e-05, + "loss": 0.0731, + "step": 52685 + }, + { + "epoch": 2.46, + "learning_rate": 1.1900992271879361e-05, + "loss": 0.0482, + "step": 52690 + }, + { + "epoch": 2.46, + "learning_rate": 1.1900208486824574e-05, + "loss": 0.0744, + "step": 52695 + }, + { + "epoch": 2.46, + "learning_rate": 1.1899424701769789e-05, + "loss": 0.1076, + "step": 52700 + }, + { + "epoch": 2.46, + "learning_rate": 1.1898640916715001e-05, + "loss": 0.1539, + "step": 52705 + }, + { + "epoch": 2.46, + "learning_rate": 1.1897857131660215e-05, + "loss": 0.2656, + "step": 52710 + }, + { + "epoch": 2.46, + "learning_rate": 1.1897073346605427e-05, + "loss": 0.2979, + "step": 52715 + }, + { + "epoch": 2.46, + "learning_rate": 1.1896289561550643e-05, + "loss": 0.0254, + "step": 52720 + }, + { + "epoch": 2.46, + "learning_rate": 1.1895505776495855e-05, + "loss": 0.0355, + "step": 52725 + }, + { + "epoch": 2.46, + "learning_rate": 1.1894721991441067e-05, + "loss": 0.0842, + "step": 52730 + }, + { + "epoch": 2.46, + "learning_rate": 1.1893938206386281e-05, + "loss": 0.075, + "step": 52735 + }, + { + "epoch": 2.46, + "learning_rate": 1.1893154421331494e-05, + "loss": 0.122, + "step": 52740 + }, + { + "epoch": 2.46, + "learning_rate": 1.189237063627671e-05, + "loss": 0.1795, + "step": 52745 + }, + { + "epoch": 2.46, + "learning_rate": 1.1891586851221921e-05, + "loss": 0.1504, + "step": 52750 + }, + { + "epoch": 2.46, + "learning_rate": 1.1890803066167135e-05, + "loss": 0.2475, + "step": 52755 + }, + { + "epoch": 2.46, + "learning_rate": 1.1890019281112348e-05, + "loss": 0.1841, + "step": 52760 + }, + { + "epoch": 2.46, + "learning_rate": 1.1889235496057563e-05, + "loss": 0.3067, + "step": 52765 + }, + { + "epoch": 2.46, + "learning_rate": 1.1888451711002775e-05, + "loss": 0.0693, + "step": 52770 + }, + { + "epoch": 2.46, + "learning_rate": 1.188766792594799e-05, + "loss": 0.0174, + "step": 52775 + }, + { + "epoch": 2.46, + "learning_rate": 1.1886884140893203e-05, + "loss": 0.0503, + "step": 52780 + }, + { + "epoch": 2.46, + "learning_rate": 1.1886100355838417e-05, + "loss": 0.1001, + "step": 52785 + }, + { + "epoch": 2.46, + "learning_rate": 1.188531657078363e-05, + "loss": 0.1064, + "step": 52790 + }, + { + "epoch": 2.46, + "learning_rate": 1.1884532785728841e-05, + "loss": 0.0935, + "step": 52795 + }, + { + "epoch": 2.46, + "learning_rate": 1.1883749000674057e-05, + "loss": 0.1237, + "step": 52800 + }, + { + "epoch": 2.46, + "learning_rate": 1.188296521561927e-05, + "loss": 0.2529, + "step": 52805 + }, + { + "epoch": 2.46, + "learning_rate": 1.1882181430564483e-05, + "loss": 0.2904, + "step": 52810 + }, + { + "epoch": 2.46, + "learning_rate": 1.1881397645509695e-05, + "loss": 0.2634, + "step": 52815 + }, + { + "epoch": 2.46, + "learning_rate": 1.1880613860454911e-05, + "loss": 0.0563, + "step": 52820 + }, + { + "epoch": 2.46, + "learning_rate": 1.1879830075400123e-05, + "loss": 0.1206, + "step": 52825 + }, + { + "epoch": 2.47, + "learning_rate": 1.1879046290345337e-05, + "loss": 0.0276, + "step": 52830 + }, + { + "epoch": 2.47, + "learning_rate": 1.187826250529055e-05, + "loss": 0.0621, + "step": 52835 + }, + { + "epoch": 2.47, + "learning_rate": 1.1877478720235765e-05, + "loss": 0.1221, + "step": 52840 + }, + { + "epoch": 2.47, + "learning_rate": 1.1876694935180977e-05, + "loss": 0.1302, + "step": 52845 + }, + { + "epoch": 2.47, + "learning_rate": 1.1875911150126191e-05, + "loss": 0.1104, + "step": 52850 + }, + { + "epoch": 2.47, + "learning_rate": 1.1875127365071403e-05, + "loss": 0.2365, + "step": 52855 + }, + { + "epoch": 2.47, + "learning_rate": 1.1874343580016615e-05, + "loss": 0.2957, + "step": 52860 + }, + { + "epoch": 2.47, + "learning_rate": 1.1873559794961831e-05, + "loss": 0.1747, + "step": 52865 + }, + { + "epoch": 2.47, + "learning_rate": 1.1872776009907043e-05, + "loss": 0.0363, + "step": 52870 + }, + { + "epoch": 2.47, + "learning_rate": 1.1871992224852257e-05, + "loss": 0.0558, + "step": 52875 + }, + { + "epoch": 2.47, + "learning_rate": 1.1871208439797471e-05, + "loss": 0.1079, + "step": 52880 + }, + { + "epoch": 2.47, + "learning_rate": 1.1870424654742685e-05, + "loss": 0.0892, + "step": 52885 + }, + { + "epoch": 2.47, + "learning_rate": 1.1869640869687897e-05, + "loss": 0.0586, + "step": 52890 + }, + { + "epoch": 2.47, + "learning_rate": 1.1868857084633111e-05, + "loss": 0.0733, + "step": 52895 + }, + { + "epoch": 2.47, + "learning_rate": 1.1868073299578325e-05, + "loss": 0.1407, + "step": 52900 + }, + { + "epoch": 2.47, + "learning_rate": 1.1867289514523539e-05, + "loss": 0.1468, + "step": 52905 + }, + { + "epoch": 2.47, + "learning_rate": 1.1866505729468751e-05, + "loss": 0.312, + "step": 52910 + }, + { + "epoch": 2.47, + "learning_rate": 1.1865721944413967e-05, + "loss": 0.1312, + "step": 52915 + }, + { + "epoch": 2.47, + "learning_rate": 1.1864938159359179e-05, + "loss": 0.074, + "step": 52920 + }, + { + "epoch": 2.47, + "learning_rate": 1.1864154374304391e-05, + "loss": 0.0478, + "step": 52925 + }, + { + "epoch": 2.47, + "learning_rate": 1.1863370589249605e-05, + "loss": 0.0973, + "step": 52930 + }, + { + "epoch": 2.47, + "learning_rate": 1.1862586804194817e-05, + "loss": 0.0613, + "step": 52935 + }, + { + "epoch": 2.47, + "learning_rate": 1.1861803019140033e-05, + "loss": 0.0627, + "step": 52940 + }, + { + "epoch": 2.47, + "learning_rate": 1.1861019234085245e-05, + "loss": 0.0968, + "step": 52945 + }, + { + "epoch": 2.47, + "learning_rate": 1.1860235449030459e-05, + "loss": 0.0754, + "step": 52950 + }, + { + "epoch": 2.47, + "learning_rate": 1.1859451663975671e-05, + "loss": 0.1461, + "step": 52955 + }, + { + "epoch": 2.47, + "learning_rate": 1.1858667878920887e-05, + "loss": 0.2842, + "step": 52960 + }, + { + "epoch": 2.47, + "learning_rate": 1.1857884093866099e-05, + "loss": 0.2588, + "step": 52965 + }, + { + "epoch": 2.47, + "learning_rate": 1.1857100308811313e-05, + "loss": 0.044, + "step": 52970 + }, + { + "epoch": 2.47, + "learning_rate": 1.1856473280767485e-05, + "loss": 0.0901, + "step": 52975 + }, + { + "epoch": 2.47, + "learning_rate": 1.1855689495712697e-05, + "loss": 0.098, + "step": 52980 + }, + { + "epoch": 2.47, + "learning_rate": 1.185490571065791e-05, + "loss": 0.1027, + "step": 52985 + }, + { + "epoch": 2.47, + "learning_rate": 1.1854121925603123e-05, + "loss": 0.0727, + "step": 52990 + }, + { + "epoch": 2.47, + "learning_rate": 1.1853338140548336e-05, + "loss": 0.086, + "step": 52995 + }, + { + "epoch": 2.47, + "learning_rate": 1.1852554355493551e-05, + "loss": 0.1336, + "step": 53000 + }, + { + "epoch": 2.47, + "learning_rate": 1.1851770570438763e-05, + "loss": 0.2022, + "step": 53005 + }, + { + "epoch": 2.47, + "learning_rate": 1.1850986785383977e-05, + "loss": 0.2916, + "step": 53010 + }, + { + "epoch": 2.47, + "learning_rate": 1.185020300032919e-05, + "loss": 0.1668, + "step": 53015 + }, + { + "epoch": 2.47, + "learning_rate": 1.1849419215274405e-05, + "loss": 0.0279, + "step": 53020 + }, + { + "epoch": 2.47, + "learning_rate": 1.1848635430219617e-05, + "loss": 0.0569, + "step": 53025 + }, + { + "epoch": 2.47, + "learning_rate": 1.1847851645164831e-05, + "loss": 0.0159, + "step": 53030 + }, + { + "epoch": 2.47, + "learning_rate": 1.1847067860110043e-05, + "loss": 0.0532, + "step": 53035 + }, + { + "epoch": 2.47, + "learning_rate": 1.1846284075055259e-05, + "loss": 0.0931, + "step": 53040 + }, + { + "epoch": 2.48, + "learning_rate": 1.1845500290000471e-05, + "loss": 0.1704, + "step": 53045 + }, + { + "epoch": 2.48, + "learning_rate": 1.1844716504945683e-05, + "loss": 0.1275, + "step": 53050 + }, + { + "epoch": 2.48, + "learning_rate": 1.1843932719890897e-05, + "loss": 0.204, + "step": 53055 + }, + { + "epoch": 2.48, + "learning_rate": 1.1843148934836111e-05, + "loss": 0.2639, + "step": 53060 + }, + { + "epoch": 2.48, + "learning_rate": 1.1842365149781325e-05, + "loss": 0.3573, + "step": 53065 + }, + { + "epoch": 2.48, + "learning_rate": 1.1841581364726537e-05, + "loss": 0.0625, + "step": 53070 + }, + { + "epoch": 2.48, + "learning_rate": 1.1840797579671753e-05, + "loss": 0.0354, + "step": 53075 + }, + { + "epoch": 2.48, + "learning_rate": 1.1840013794616965e-05, + "loss": 0.0453, + "step": 53080 + }, + { + "epoch": 2.48, + "learning_rate": 1.1839230009562179e-05, + "loss": 0.0939, + "step": 53085 + }, + { + "epoch": 2.48, + "learning_rate": 1.1838446224507391e-05, + "loss": 0.0633, + "step": 53090 + }, + { + "epoch": 2.48, + "learning_rate": 1.1837662439452607e-05, + "loss": 0.1074, + "step": 53095 + }, + { + "epoch": 2.48, + "learning_rate": 1.183687865439782e-05, + "loss": 0.0843, + "step": 53100 + }, + { + "epoch": 2.48, + "learning_rate": 1.1836094869343033e-05, + "loss": 0.1816, + "step": 53105 + }, + { + "epoch": 2.48, + "learning_rate": 1.1835311084288245e-05, + "loss": 0.283, + "step": 53110 + }, + { + "epoch": 2.48, + "learning_rate": 1.1834527299233457e-05, + "loss": 0.4184, + "step": 53115 + }, + { + "epoch": 2.48, + "learning_rate": 1.1833743514178673e-05, + "loss": 0.0683, + "step": 53120 + }, + { + "epoch": 2.48, + "learning_rate": 1.1832959729123885e-05, + "loss": 0.0495, + "step": 53125 + }, + { + "epoch": 2.48, + "learning_rate": 1.18321759440691e-05, + "loss": 0.0605, + "step": 53130 + }, + { + "epoch": 2.48, + "learning_rate": 1.1831392159014311e-05, + "loss": 0.1192, + "step": 53135 + }, + { + "epoch": 2.48, + "learning_rate": 1.1830608373959527e-05, + "loss": 0.1119, + "step": 53140 + }, + { + "epoch": 2.48, + "learning_rate": 1.182982458890474e-05, + "loss": 0.1132, + "step": 53145 + }, + { + "epoch": 2.48, + "learning_rate": 1.1829040803849953e-05, + "loss": 0.1361, + "step": 53150 + }, + { + "epoch": 2.48, + "learning_rate": 1.1828257018795165e-05, + "loss": 0.2086, + "step": 53155 + }, + { + "epoch": 2.48, + "learning_rate": 1.1827473233740381e-05, + "loss": 0.192, + "step": 53160 + }, + { + "epoch": 2.48, + "learning_rate": 1.1826689448685593e-05, + "loss": 0.2593, + "step": 53165 + }, + { + "epoch": 2.48, + "learning_rate": 1.1825905663630807e-05, + "loss": 0.0378, + "step": 53170 + }, + { + "epoch": 2.48, + "learning_rate": 1.1825121878576021e-05, + "loss": 0.0689, + "step": 53175 + }, + { + "epoch": 2.48, + "learning_rate": 1.1824338093521233e-05, + "loss": 0.0995, + "step": 53180 + }, + { + "epoch": 2.48, + "learning_rate": 1.1823554308466447e-05, + "loss": 0.0625, + "step": 53185 + }, + { + "epoch": 2.48, + "learning_rate": 1.182277052341166e-05, + "loss": 0.0522, + "step": 53190 + }, + { + "epoch": 2.48, + "learning_rate": 1.1821986738356875e-05, + "loss": 0.1119, + "step": 53195 + }, + { + "epoch": 2.48, + "learning_rate": 1.1821202953302087e-05, + "loss": 0.2608, + "step": 53200 + }, + { + "epoch": 2.48, + "learning_rate": 1.1820419168247301e-05, + "loss": 0.1296, + "step": 53205 + }, + { + "epoch": 2.48, + "learning_rate": 1.1819635383192513e-05, + "loss": 0.2392, + "step": 53210 + }, + { + "epoch": 2.48, + "learning_rate": 1.1818851598137729e-05, + "loss": 0.3229, + "step": 53215 + }, + { + "epoch": 2.48, + "learning_rate": 1.1818067813082941e-05, + "loss": 0.0728, + "step": 53220 + }, + { + "epoch": 2.48, + "learning_rate": 1.1817284028028155e-05, + "loss": 0.0496, + "step": 53225 + }, + { + "epoch": 2.48, + "learning_rate": 1.1816500242973367e-05, + "loss": 0.1219, + "step": 53230 + }, + { + "epoch": 2.48, + "learning_rate": 1.1815716457918583e-05, + "loss": 0.0571, + "step": 53235 + }, + { + "epoch": 2.48, + "learning_rate": 1.1814932672863795e-05, + "loss": 0.0712, + "step": 53240 + }, + { + "epoch": 2.48, + "learning_rate": 1.1814148887809007e-05, + "loss": 0.1081, + "step": 53245 + }, + { + "epoch": 2.48, + "learning_rate": 1.1813365102754221e-05, + "loss": 0.1176, + "step": 53250 + }, + { + "epoch": 2.48, + "learning_rate": 1.1812581317699435e-05, + "loss": 0.1212, + "step": 53255 + }, + { + "epoch": 2.49, + "learning_rate": 1.1811797532644649e-05, + "loss": 0.3415, + "step": 53260 + }, + { + "epoch": 2.49, + "learning_rate": 1.1811013747589861e-05, + "loss": 0.2234, + "step": 53265 + }, + { + "epoch": 2.49, + "learning_rate": 1.1810229962535075e-05, + "loss": 0.0399, + "step": 53270 + }, + { + "epoch": 2.49, + "learning_rate": 1.1809446177480289e-05, + "loss": 0.0437, + "step": 53275 + }, + { + "epoch": 2.49, + "learning_rate": 1.1808662392425503e-05, + "loss": 0.0732, + "step": 53280 + }, + { + "epoch": 2.49, + "learning_rate": 1.1807878607370715e-05, + "loss": 0.068, + "step": 53285 + }, + { + "epoch": 2.49, + "learning_rate": 1.180709482231593e-05, + "loss": 0.1564, + "step": 53290 + }, + { + "epoch": 2.49, + "learning_rate": 1.1806311037261143e-05, + "loss": 0.1242, + "step": 53295 + }, + { + "epoch": 2.49, + "learning_rate": 1.1805527252206357e-05, + "loss": 0.0852, + "step": 53300 + }, + { + "epoch": 2.49, + "learning_rate": 1.1804743467151569e-05, + "loss": 0.1913, + "step": 53305 + }, + { + "epoch": 2.49, + "learning_rate": 1.1803959682096781e-05, + "loss": 0.2867, + "step": 53310 + }, + { + "epoch": 2.49, + "learning_rate": 1.1803175897041997e-05, + "loss": 0.2384, + "step": 53315 + }, + { + "epoch": 2.49, + "learning_rate": 1.1802392111987209e-05, + "loss": 0.0679, + "step": 53320 + }, + { + "epoch": 2.49, + "learning_rate": 1.1801608326932423e-05, + "loss": 0.015, + "step": 53325 + }, + { + "epoch": 2.49, + "learning_rate": 1.1800824541877635e-05, + "loss": 0.0466, + "step": 53330 + }, + { + "epoch": 2.49, + "learning_rate": 1.180004075682285e-05, + "loss": 0.0421, + "step": 53335 + }, + { + "epoch": 2.49, + "learning_rate": 1.1799256971768063e-05, + "loss": 0.0758, + "step": 53340 + }, + { + "epoch": 2.49, + "learning_rate": 1.1798473186713277e-05, + "loss": 0.062, + "step": 53345 + }, + { + "epoch": 2.49, + "learning_rate": 1.1797689401658489e-05, + "loss": 0.1198, + "step": 53350 + }, + { + "epoch": 2.49, + "learning_rate": 1.1796905616603705e-05, + "loss": 0.2479, + "step": 53355 + }, + { + "epoch": 2.49, + "learning_rate": 1.1796121831548917e-05, + "loss": 0.2566, + "step": 53360 + }, + { + "epoch": 2.49, + "learning_rate": 1.179533804649413e-05, + "loss": 0.5469, + "step": 53365 + }, + { + "epoch": 2.49, + "learning_rate": 1.1794554261439343e-05, + "loss": 0.0849, + "step": 53370 + }, + { + "epoch": 2.49, + "learning_rate": 1.1793770476384557e-05, + "loss": 0.0693, + "step": 53375 + }, + { + "epoch": 2.49, + "learning_rate": 1.179298669132977e-05, + "loss": 0.0148, + "step": 53380 + }, + { + "epoch": 2.49, + "learning_rate": 1.1792202906274983e-05, + "loss": 0.0683, + "step": 53385 + }, + { + "epoch": 2.49, + "learning_rate": 1.1791419121220199e-05, + "loss": 0.0983, + "step": 53390 + }, + { + "epoch": 2.49, + "learning_rate": 1.179063533616541e-05, + "loss": 0.1328, + "step": 53395 + }, + { + "epoch": 2.49, + "learning_rate": 1.1789851551110625e-05, + "loss": 0.089, + "step": 53400 + }, + { + "epoch": 2.49, + "learning_rate": 1.1789067766055837e-05, + "loss": 0.2294, + "step": 53405 + }, + { + "epoch": 2.49, + "learning_rate": 1.1788283981001053e-05, + "loss": 0.216, + "step": 53410 + }, + { + "epoch": 2.49, + "learning_rate": 1.1787500195946265e-05, + "loss": 0.3105, + "step": 53415 + }, + { + "epoch": 2.49, + "learning_rate": 1.1786716410891479e-05, + "loss": 0.0386, + "step": 53420 + }, + { + "epoch": 2.49, + "learning_rate": 1.1785932625836691e-05, + "loss": 0.0221, + "step": 53425 + }, + { + "epoch": 2.49, + "learning_rate": 1.1785148840781906e-05, + "loss": 0.0662, + "step": 53430 + }, + { + "epoch": 2.49, + "learning_rate": 1.1784365055727119e-05, + "loss": 0.1186, + "step": 53435 + }, + { + "epoch": 2.49, + "learning_rate": 1.1783581270672331e-05, + "loss": 0.1842, + "step": 53440 + }, + { + "epoch": 2.49, + "learning_rate": 1.1782797485617545e-05, + "loss": 0.0644, + "step": 53445 + }, + { + "epoch": 2.49, + "learning_rate": 1.1782013700562757e-05, + "loss": 0.1558, + "step": 53450 + }, + { + "epoch": 2.49, + "learning_rate": 1.1781229915507973e-05, + "loss": 0.1776, + "step": 53455 + }, + { + "epoch": 2.49, + "learning_rate": 1.1780446130453185e-05, + "loss": 0.1429, + "step": 53460 + }, + { + "epoch": 2.49, + "learning_rate": 1.1779662345398399e-05, + "loss": 0.2067, + "step": 53465 + }, + { + "epoch": 2.49, + "learning_rate": 1.1778878560343611e-05, + "loss": 0.0238, + "step": 53470 + }, + { + "epoch": 2.5, + "learning_rate": 1.1778094775288827e-05, + "loss": 0.0882, + "step": 53475 + }, + { + "epoch": 2.5, + "learning_rate": 1.1777310990234039e-05, + "loss": 0.0437, + "step": 53480 + }, + { + "epoch": 2.5, + "learning_rate": 1.1776527205179253e-05, + "loss": 0.1085, + "step": 53485 + }, + { + "epoch": 2.5, + "learning_rate": 1.1775743420124467e-05, + "loss": 0.082, + "step": 53490 + }, + { + "epoch": 2.5, + "learning_rate": 1.177495963506968e-05, + "loss": 0.0515, + "step": 53495 + }, + { + "epoch": 2.5, + "learning_rate": 1.1774175850014893e-05, + "loss": 0.1574, + "step": 53500 + }, + { + "epoch": 2.5, + "learning_rate": 1.1773392064960105e-05, + "loss": 0.176, + "step": 53505 + }, + { + "epoch": 2.5, + "learning_rate": 1.177260827990532e-05, + "loss": 0.3283, + "step": 53510 + }, + { + "epoch": 2.5, + "learning_rate": 1.1771824494850533e-05, + "loss": 0.2003, + "step": 53515 + }, + { + "epoch": 2.5, + "learning_rate": 1.1771040709795747e-05, + "loss": 0.0773, + "step": 53520 + }, + { + "epoch": 2.5, + "learning_rate": 1.1770256924740959e-05, + "loss": 0.0402, + "step": 53525 + }, + { + "epoch": 2.5, + "learning_rate": 1.1769473139686174e-05, + "loss": 0.0686, + "step": 53530 + }, + { + "epoch": 2.5, + "learning_rate": 1.1768689354631387e-05, + "loss": 0.1132, + "step": 53535 + }, + { + "epoch": 2.5, + "learning_rate": 1.17679055695766e-05, + "loss": 0.109, + "step": 53540 + }, + { + "epoch": 2.5, + "learning_rate": 1.1767121784521813e-05, + "loss": 0.1463, + "step": 53545 + }, + { + "epoch": 2.5, + "learning_rate": 1.1766337999467028e-05, + "loss": 0.1783, + "step": 53550 + }, + { + "epoch": 2.5, + "learning_rate": 1.176555421441224e-05, + "loss": 0.1049, + "step": 53555 + }, + { + "epoch": 2.5, + "learning_rate": 1.1764770429357454e-05, + "loss": 0.4038, + "step": 53560 + }, + { + "epoch": 2.5, + "learning_rate": 1.1763986644302667e-05, + "loss": 0.2925, + "step": 53565 + }, + { + "epoch": 2.5, + "learning_rate": 1.176320285924788e-05, + "loss": 0.0268, + "step": 53570 + }, + { + "epoch": 2.5, + "learning_rate": 1.1762419074193094e-05, + "loss": 0.0987, + "step": 53575 + }, + { + "epoch": 2.5, + "learning_rate": 1.1761635289138307e-05, + "loss": 0.0748, + "step": 53580 + }, + { + "epoch": 2.5, + "learning_rate": 1.176085150408352e-05, + "loss": 0.0521, + "step": 53585 + }, + { + "epoch": 2.5, + "learning_rate": 1.1760067719028734e-05, + "loss": 0.1107, + "step": 53590 + }, + { + "epoch": 2.5, + "learning_rate": 1.1759283933973948e-05, + "loss": 0.1081, + "step": 53595 + }, + { + "epoch": 2.5, + "learning_rate": 1.175850014891916e-05, + "loss": 0.0851, + "step": 53600 + }, + { + "epoch": 2.5, + "learning_rate": 1.1757716363864376e-05, + "loss": 0.1295, + "step": 53605 + }, + { + "epoch": 2.5, + "learning_rate": 1.1756932578809588e-05, + "loss": 0.3218, + "step": 53610 + }, + { + "epoch": 2.5, + "learning_rate": 1.1756148793754802e-05, + "loss": 0.3039, + "step": 53615 + }, + { + "epoch": 2.5, + "learning_rate": 1.1755365008700015e-05, + "loss": 0.0713, + "step": 53620 + }, + { + "epoch": 2.5, + "learning_rate": 1.175458122364523e-05, + "loss": 0.0448, + "step": 53625 + }, + { + "epoch": 2.5, + "learning_rate": 1.1753797438590442e-05, + "loss": 0.0533, + "step": 53630 + }, + { + "epoch": 2.5, + "learning_rate": 1.1753013653535655e-05, + "loss": 0.0662, + "step": 53635 + }, + { + "epoch": 2.5, + "learning_rate": 1.1752229868480868e-05, + "loss": 0.0825, + "step": 53640 + }, + { + "epoch": 2.5, + "learning_rate": 1.175144608342608e-05, + "loss": 0.1356, + "step": 53645 + }, + { + "epoch": 2.5, + "learning_rate": 1.1750662298371296e-05, + "loss": 0.119, + "step": 53650 + }, + { + "epoch": 2.5, + "learning_rate": 1.1749878513316508e-05, + "loss": 0.177, + "step": 53655 + }, + { + "epoch": 2.5, + "learning_rate": 1.1749094728261722e-05, + "loss": 0.2674, + "step": 53660 + }, + { + "epoch": 2.5, + "learning_rate": 1.1748310943206935e-05, + "loss": 0.2447, + "step": 53665 + }, + { + "epoch": 2.5, + "learning_rate": 1.174752715815215e-05, + "loss": 0.0238, + "step": 53670 + }, + { + "epoch": 2.5, + "learning_rate": 1.1746743373097362e-05, + "loss": 0.0325, + "step": 53675 + }, + { + "epoch": 2.5, + "learning_rate": 1.1745959588042576e-05, + "loss": 0.0444, + "step": 53680 + }, + { + "epoch": 2.51, + "learning_rate": 1.1745175802987789e-05, + "loss": 0.0893, + "step": 53685 + }, + { + "epoch": 2.51, + "learning_rate": 1.1744392017933004e-05, + "loss": 0.0845, + "step": 53690 + }, + { + "epoch": 2.51, + "learning_rate": 1.1743608232878216e-05, + "loss": 0.0906, + "step": 53695 + }, + { + "epoch": 2.51, + "learning_rate": 1.1742824447823429e-05, + "loss": 0.1417, + "step": 53700 + }, + { + "epoch": 2.51, + "learning_rate": 1.1742040662768644e-05, + "loss": 0.1703, + "step": 53705 + }, + { + "epoch": 2.51, + "learning_rate": 1.1741256877713856e-05, + "loss": 0.2382, + "step": 53710 + }, + { + "epoch": 2.51, + "learning_rate": 1.174047309265907e-05, + "loss": 0.3102, + "step": 53715 + }, + { + "epoch": 2.51, + "learning_rate": 1.1739689307604282e-05, + "loss": 0.0984, + "step": 53720 + }, + { + "epoch": 2.51, + "learning_rate": 1.1738905522549498e-05, + "loss": 0.0297, + "step": 53725 + }, + { + "epoch": 2.51, + "learning_rate": 1.173812173749471e-05, + "loss": 0.0989, + "step": 53730 + }, + { + "epoch": 2.51, + "learning_rate": 1.1737337952439924e-05, + "loss": 0.076, + "step": 53735 + }, + { + "epoch": 2.51, + "learning_rate": 1.1736554167385136e-05, + "loss": 0.1314, + "step": 53740 + }, + { + "epoch": 2.51, + "learning_rate": 1.1735770382330352e-05, + "loss": 0.1822, + "step": 53745 + }, + { + "epoch": 2.51, + "learning_rate": 1.1734986597275564e-05, + "loss": 0.2006, + "step": 53750 + }, + { + "epoch": 2.51, + "learning_rate": 1.1734202812220778e-05, + "loss": 0.1828, + "step": 53755 + }, + { + "epoch": 2.51, + "learning_rate": 1.173341902716599e-05, + "loss": 0.3078, + "step": 53760 + }, + { + "epoch": 2.51, + "learning_rate": 1.1732635242111203e-05, + "loss": 0.3112, + "step": 53765 + }, + { + "epoch": 2.51, + "learning_rate": 1.1731851457056418e-05, + "loss": 0.0143, + "step": 53770 + }, + { + "epoch": 2.51, + "learning_rate": 1.173106767200163e-05, + "loss": 0.0488, + "step": 53775 + }, + { + "epoch": 2.51, + "learning_rate": 1.1730283886946844e-05, + "loss": 0.0628, + "step": 53780 + }, + { + "epoch": 2.51, + "learning_rate": 1.1729500101892058e-05, + "loss": 0.162, + "step": 53785 + }, + { + "epoch": 2.51, + "learning_rate": 1.1728716316837272e-05, + "loss": 0.1037, + "step": 53790 + }, + { + "epoch": 2.51, + "learning_rate": 1.1727932531782484e-05, + "loss": 0.0923, + "step": 53795 + }, + { + "epoch": 2.51, + "learning_rate": 1.1727148746727698e-05, + "loss": 0.0755, + "step": 53800 + }, + { + "epoch": 2.51, + "learning_rate": 1.1726364961672912e-05, + "loss": 0.1833, + "step": 53805 + }, + { + "epoch": 2.51, + "learning_rate": 1.1725581176618126e-05, + "loss": 0.1893, + "step": 53810 + }, + { + "epoch": 2.51, + "learning_rate": 1.1724797391563338e-05, + "loss": 0.3448, + "step": 53815 + }, + { + "epoch": 2.51, + "learning_rate": 1.1724013606508554e-05, + "loss": 0.0322, + "step": 53820 + }, + { + "epoch": 2.51, + "learning_rate": 1.1723229821453766e-05, + "loss": 0.0322, + "step": 53825 + }, + { + "epoch": 2.51, + "learning_rate": 1.1722446036398978e-05, + "loss": 0.0736, + "step": 53830 + }, + { + "epoch": 2.51, + "learning_rate": 1.1721662251344192e-05, + "loss": 0.0983, + "step": 53835 + }, + { + "epoch": 2.51, + "learning_rate": 1.1720878466289404e-05, + "loss": 0.0753, + "step": 53840 + }, + { + "epoch": 2.51, + "learning_rate": 1.172009468123462e-05, + "loss": 0.0771, + "step": 53845 + }, + { + "epoch": 2.51, + "learning_rate": 1.1719310896179832e-05, + "loss": 0.1376, + "step": 53850 + }, + { + "epoch": 2.51, + "learning_rate": 1.1718527111125046e-05, + "loss": 0.1836, + "step": 53855 + }, + { + "epoch": 2.51, + "learning_rate": 1.1717743326070258e-05, + "loss": 0.2587, + "step": 53860 + }, + { + "epoch": 2.51, + "learning_rate": 1.1716959541015474e-05, + "loss": 0.2983, + "step": 53865 + }, + { + "epoch": 2.51, + "learning_rate": 1.1716175755960686e-05, + "loss": 0.0732, + "step": 53870 + }, + { + "epoch": 2.51, + "learning_rate": 1.17153919709059e-05, + "loss": 0.0571, + "step": 53875 + }, + { + "epoch": 2.51, + "learning_rate": 1.1714608185851112e-05, + "loss": 0.0792, + "step": 53880 + }, + { + "epoch": 2.51, + "learning_rate": 1.1713824400796328e-05, + "loss": 0.0731, + "step": 53885 + }, + { + "epoch": 2.51, + "learning_rate": 1.171304061574154e-05, + "loss": 0.0635, + "step": 53890 + }, + { + "epoch": 2.51, + "learning_rate": 1.1712256830686752e-05, + "loss": 0.1637, + "step": 53895 + }, + { + "epoch": 2.52, + "learning_rate": 1.1711473045631966e-05, + "loss": 0.2193, + "step": 53900 + }, + { + "epoch": 2.52, + "learning_rate": 1.171068926057718e-05, + "loss": 0.2715, + "step": 53905 + }, + { + "epoch": 2.52, + "learning_rate": 1.1709905475522394e-05, + "loss": 0.3451, + "step": 53910 + }, + { + "epoch": 2.52, + "learning_rate": 1.1709121690467606e-05, + "loss": 0.2351, + "step": 53915 + }, + { + "epoch": 2.52, + "learning_rate": 1.1708337905412822e-05, + "loss": 0.0587, + "step": 53920 + }, + { + "epoch": 2.52, + "learning_rate": 1.1707554120358034e-05, + "loss": 0.0571, + "step": 53925 + }, + { + "epoch": 2.52, + "learning_rate": 1.1706770335303248e-05, + "loss": 0.0329, + "step": 53930 + }, + { + "epoch": 2.52, + "learning_rate": 1.170598655024846e-05, + "loss": 0.0982, + "step": 53935 + }, + { + "epoch": 2.52, + "learning_rate": 1.1705202765193676e-05, + "loss": 0.056, + "step": 53940 + }, + { + "epoch": 2.52, + "learning_rate": 1.1704418980138888e-05, + "loss": 0.1438, + "step": 53945 + }, + { + "epoch": 2.52, + "learning_rate": 1.1703635195084102e-05, + "loss": 0.1759, + "step": 53950 + }, + { + "epoch": 2.52, + "learning_rate": 1.1702851410029314e-05, + "loss": 0.2695, + "step": 53955 + }, + { + "epoch": 2.52, + "learning_rate": 1.1702067624974526e-05, + "loss": 0.2936, + "step": 53960 + }, + { + "epoch": 2.52, + "learning_rate": 1.1701283839919742e-05, + "loss": 0.2662, + "step": 53965 + }, + { + "epoch": 2.52, + "learning_rate": 1.1700500054864954e-05, + "loss": 0.0413, + "step": 53970 + }, + { + "epoch": 2.52, + "learning_rate": 1.1699716269810168e-05, + "loss": 0.0348, + "step": 53975 + }, + { + "epoch": 2.52, + "learning_rate": 1.169893248475538e-05, + "loss": 0.0487, + "step": 53980 + }, + { + "epoch": 2.52, + "learning_rate": 1.1698148699700596e-05, + "loss": 0.0561, + "step": 53985 + }, + { + "epoch": 2.52, + "learning_rate": 1.1697364914645808e-05, + "loss": 0.1206, + "step": 53990 + }, + { + "epoch": 2.52, + "learning_rate": 1.1696581129591022e-05, + "loss": 0.1703, + "step": 53995 + }, + { + "epoch": 2.52, + "learning_rate": 1.1695797344536234e-05, + "loss": 0.1549, + "step": 54000 + }, + { + "epoch": 2.52, + "learning_rate": 1.169501355948145e-05, + "loss": 0.209, + "step": 54005 + }, + { + "epoch": 2.52, + "learning_rate": 1.1694229774426662e-05, + "loss": 0.4151, + "step": 54010 + }, + { + "epoch": 2.52, + "learning_rate": 1.1693445989371876e-05, + "loss": 0.1933, + "step": 54015 + }, + { + "epoch": 2.52, + "learning_rate": 1.169266220431709e-05, + "loss": 0.0544, + "step": 54020 + }, + { + "epoch": 2.52, + "learning_rate": 1.1691878419262302e-05, + "loss": 0.0166, + "step": 54025 + }, + { + "epoch": 2.52, + "learning_rate": 1.1691094634207516e-05, + "loss": 0.0557, + "step": 54030 + }, + { + "epoch": 2.52, + "learning_rate": 1.1690310849152728e-05, + "loss": 0.0669, + "step": 54035 + }, + { + "epoch": 2.52, + "learning_rate": 1.1689527064097944e-05, + "loss": 0.0985, + "step": 54040 + }, + { + "epoch": 2.52, + "learning_rate": 1.1688743279043156e-05, + "loss": 0.1685, + "step": 54045 + }, + { + "epoch": 2.52, + "learning_rate": 1.168795949398837e-05, + "loss": 0.1963, + "step": 54050 + }, + { + "epoch": 2.52, + "learning_rate": 1.1687175708933582e-05, + "loss": 0.192, + "step": 54055 + }, + { + "epoch": 2.52, + "learning_rate": 1.1686391923878798e-05, + "loss": 0.3119, + "step": 54060 + }, + { + "epoch": 2.52, + "learning_rate": 1.168560813882401e-05, + "loss": 0.3424, + "step": 54065 + }, + { + "epoch": 2.52, + "learning_rate": 1.1684824353769224e-05, + "loss": 0.0345, + "step": 54070 + }, + { + "epoch": 2.52, + "learning_rate": 1.1684040568714436e-05, + "loss": 0.0528, + "step": 54075 + }, + { + "epoch": 2.52, + "learning_rate": 1.1683256783659652e-05, + "loss": 0.053, + "step": 54080 + }, + { + "epoch": 2.52, + "learning_rate": 1.1682472998604864e-05, + "loss": 0.0567, + "step": 54085 + }, + { + "epoch": 2.52, + "learning_rate": 1.1681689213550076e-05, + "loss": 0.1253, + "step": 54090 + }, + { + "epoch": 2.52, + "learning_rate": 1.168090542849529e-05, + "loss": 0.1384, + "step": 54095 + }, + { + "epoch": 2.52, + "learning_rate": 1.1680121643440504e-05, + "loss": 0.1149, + "step": 54100 + }, + { + "epoch": 2.52, + "learning_rate": 1.1679337858385718e-05, + "loss": 0.1669, + "step": 54105 + }, + { + "epoch": 2.52, + "learning_rate": 1.1678710830341888e-05, + "loss": 0.1833, + "step": 54110 + }, + { + "epoch": 2.53, + "learning_rate": 1.16779270452871e-05, + "loss": 0.2386, + "step": 54115 + }, + { + "epoch": 2.53, + "learning_rate": 1.1677143260232316e-05, + "loss": 0.0729, + "step": 54120 + }, + { + "epoch": 2.53, + "learning_rate": 1.1676359475177528e-05, + "loss": 0.0523, + "step": 54125 + }, + { + "epoch": 2.53, + "learning_rate": 1.1675575690122742e-05, + "loss": 0.0632, + "step": 54130 + }, + { + "epoch": 2.53, + "learning_rate": 1.1674791905067954e-05, + "loss": 0.0702, + "step": 54135 + }, + { + "epoch": 2.53, + "learning_rate": 1.167400812001317e-05, + "loss": 0.0453, + "step": 54140 + }, + { + "epoch": 2.53, + "learning_rate": 1.1673224334958382e-05, + "loss": 0.1348, + "step": 54145 + }, + { + "epoch": 2.53, + "learning_rate": 1.1672440549903596e-05, + "loss": 0.156, + "step": 54150 + }, + { + "epoch": 2.53, + "learning_rate": 1.1671656764848808e-05, + "loss": 0.1055, + "step": 54155 + }, + { + "epoch": 2.53, + "learning_rate": 1.167087297979402e-05, + "loss": 0.1431, + "step": 54160 + }, + { + "epoch": 2.53, + "learning_rate": 1.1670089194739236e-05, + "loss": 0.3631, + "step": 54165 + }, + { + "epoch": 2.53, + "learning_rate": 1.1669305409684448e-05, + "loss": 0.0497, + "step": 54170 + }, + { + "epoch": 2.53, + "learning_rate": 1.1668521624629662e-05, + "loss": 0.0897, + "step": 54175 + }, + { + "epoch": 2.53, + "learning_rate": 1.1667737839574876e-05, + "loss": 0.0819, + "step": 54180 + }, + { + "epoch": 2.53, + "learning_rate": 1.166695405452009e-05, + "loss": 0.0764, + "step": 54185 + }, + { + "epoch": 2.53, + "learning_rate": 1.1666170269465302e-05, + "loss": 0.0894, + "step": 54190 + }, + { + "epoch": 2.53, + "learning_rate": 1.1665386484410518e-05, + "loss": 0.0866, + "step": 54195 + }, + { + "epoch": 2.53, + "learning_rate": 1.166460269935573e-05, + "loss": 0.1523, + "step": 54200 + }, + { + "epoch": 2.53, + "learning_rate": 1.1663818914300944e-05, + "loss": 0.1929, + "step": 54205 + }, + { + "epoch": 2.53, + "learning_rate": 1.1663035129246156e-05, + "loss": 0.2861, + "step": 54210 + }, + { + "epoch": 2.53, + "learning_rate": 1.1662251344191372e-05, + "loss": 0.2864, + "step": 54215 + }, + { + "epoch": 2.53, + "learning_rate": 1.1661467559136584e-05, + "loss": 0.0513, + "step": 54220 + }, + { + "epoch": 2.53, + "learning_rate": 1.1660683774081796e-05, + "loss": 0.0342, + "step": 54225 + }, + { + "epoch": 2.53, + "learning_rate": 1.165989998902701e-05, + "loss": 0.078, + "step": 54230 + }, + { + "epoch": 2.53, + "learning_rate": 1.1659116203972222e-05, + "loss": 0.0786, + "step": 54235 + }, + { + "epoch": 2.53, + "learning_rate": 1.1658332418917438e-05, + "loss": 0.1127, + "step": 54240 + }, + { + "epoch": 2.53, + "learning_rate": 1.165754863386265e-05, + "loss": 0.1296, + "step": 54245 + }, + { + "epoch": 2.53, + "learning_rate": 1.1656764848807864e-05, + "loss": 0.134, + "step": 54250 + }, + { + "epoch": 2.53, + "learning_rate": 1.1655981063753076e-05, + "loss": 0.2205, + "step": 54255 + }, + { + "epoch": 2.53, + "learning_rate": 1.1655197278698292e-05, + "loss": 0.1846, + "step": 54260 + }, + { + "epoch": 2.53, + "learning_rate": 1.1654413493643504e-05, + "loss": 0.3259, + "step": 54265 + }, + { + "epoch": 2.53, + "learning_rate": 1.1653629708588718e-05, + "loss": 0.0222, + "step": 54270 + }, + { + "epoch": 2.53, + "learning_rate": 1.165284592353393e-05, + "loss": 0.0677, + "step": 54275 + }, + { + "epoch": 2.53, + "learning_rate": 1.1652062138479146e-05, + "loss": 0.0542, + "step": 54280 + }, + { + "epoch": 2.53, + "learning_rate": 1.1651278353424358e-05, + "loss": 0.0699, + "step": 54285 + }, + { + "epoch": 2.53, + "learning_rate": 1.165049456836957e-05, + "loss": 0.0722, + "step": 54290 + }, + { + "epoch": 2.53, + "learning_rate": 1.1649710783314786e-05, + "loss": 0.1666, + "step": 54295 + }, + { + "epoch": 2.53, + "learning_rate": 1.1648926998259998e-05, + "loss": 0.2113, + "step": 54300 + }, + { + "epoch": 2.53, + "learning_rate": 1.1648143213205212e-05, + "loss": 0.1151, + "step": 54305 + }, + { + "epoch": 2.53, + "learning_rate": 1.1647359428150424e-05, + "loss": 0.2646, + "step": 54310 + }, + { + "epoch": 2.53, + "learning_rate": 1.164657564309564e-05, + "loss": 0.2441, + "step": 54315 + }, + { + "epoch": 2.53, + "learning_rate": 1.1645791858040852e-05, + "loss": 0.0764, + "step": 54320 + }, + { + "epoch": 2.53, + "learning_rate": 1.1645008072986066e-05, + "loss": 0.0424, + "step": 54325 + }, + { + "epoch": 2.54, + "learning_rate": 1.1644224287931278e-05, + "loss": 0.0538, + "step": 54330 + }, + { + "epoch": 2.54, + "learning_rate": 1.1643440502876494e-05, + "loss": 0.0319, + "step": 54335 + }, + { + "epoch": 2.54, + "learning_rate": 1.1642656717821706e-05, + "loss": 0.1016, + "step": 54340 + }, + { + "epoch": 2.54, + "learning_rate": 1.164187293276692e-05, + "loss": 0.0941, + "step": 54345 + }, + { + "epoch": 2.54, + "learning_rate": 1.1641089147712132e-05, + "loss": 0.0697, + "step": 54350 + }, + { + "epoch": 2.54, + "learning_rate": 1.1640305362657344e-05, + "loss": 0.1258, + "step": 54355 + }, + { + "epoch": 2.54, + "learning_rate": 1.163952157760256e-05, + "loss": 0.2775, + "step": 54360 + }, + { + "epoch": 2.54, + "learning_rate": 1.1638737792547772e-05, + "loss": 0.3991, + "step": 54365 + }, + { + "epoch": 2.54, + "learning_rate": 1.1637954007492986e-05, + "loss": 0.098, + "step": 54370 + }, + { + "epoch": 2.54, + "learning_rate": 1.1637170222438198e-05, + "loss": 0.0404, + "step": 54375 + }, + { + "epoch": 2.54, + "learning_rate": 1.1636386437383414e-05, + "loss": 0.0601, + "step": 54380 + }, + { + "epoch": 2.54, + "learning_rate": 1.1635602652328626e-05, + "loss": 0.0452, + "step": 54385 + }, + { + "epoch": 2.54, + "learning_rate": 1.163481886727384e-05, + "loss": 0.1061, + "step": 54390 + }, + { + "epoch": 2.54, + "learning_rate": 1.1634035082219054e-05, + "loss": 0.0819, + "step": 54395 + }, + { + "epoch": 2.54, + "learning_rate": 1.1633251297164268e-05, + "loss": 0.1277, + "step": 54400 + }, + { + "epoch": 2.54, + "learning_rate": 1.163246751210948e-05, + "loss": 0.2061, + "step": 54405 + }, + { + "epoch": 2.54, + "learning_rate": 1.1631683727054695e-05, + "loss": 0.3015, + "step": 54410 + }, + { + "epoch": 2.54, + "learning_rate": 1.1630899941999908e-05, + "loss": 0.4022, + "step": 54415 + }, + { + "epoch": 2.54, + "learning_rate": 1.163011615694512e-05, + "loss": 0.0536, + "step": 54420 + }, + { + "epoch": 2.54, + "learning_rate": 1.1629332371890334e-05, + "loss": 0.0426, + "step": 54425 + }, + { + "epoch": 2.54, + "learning_rate": 1.1628548586835546e-05, + "loss": 0.046, + "step": 54430 + }, + { + "epoch": 2.54, + "learning_rate": 1.1627764801780761e-05, + "loss": 0.0336, + "step": 54435 + }, + { + "epoch": 2.54, + "learning_rate": 1.1626981016725974e-05, + "loss": 0.0846, + "step": 54440 + }, + { + "epoch": 2.54, + "learning_rate": 1.1626197231671188e-05, + "loss": 0.0895, + "step": 54445 + }, + { + "epoch": 2.54, + "learning_rate": 1.16254134466164e-05, + "loss": 0.0969, + "step": 54450 + }, + { + "epoch": 2.54, + "learning_rate": 1.1624629661561615e-05, + "loss": 0.1187, + "step": 54455 + }, + { + "epoch": 2.54, + "learning_rate": 1.1623845876506828e-05, + "loss": 0.2181, + "step": 54460 + }, + { + "epoch": 2.54, + "learning_rate": 1.1623062091452042e-05, + "loss": 0.2948, + "step": 54465 + }, + { + "epoch": 2.54, + "learning_rate": 1.1622278306397254e-05, + "loss": 0.0419, + "step": 54470 + }, + { + "epoch": 2.54, + "learning_rate": 1.162149452134247e-05, + "loss": 0.0756, + "step": 54475 + }, + { + "epoch": 2.54, + "learning_rate": 1.1620710736287682e-05, + "loss": 0.0746, + "step": 54480 + }, + { + "epoch": 2.54, + "learning_rate": 1.1619926951232894e-05, + "loss": 0.0578, + "step": 54485 + }, + { + "epoch": 2.54, + "learning_rate": 1.1619143166178108e-05, + "loss": 0.047, + "step": 54490 + }, + { + "epoch": 2.54, + "learning_rate": 1.1618359381123322e-05, + "loss": 0.1197, + "step": 54495 + }, + { + "epoch": 2.54, + "learning_rate": 1.1617575596068535e-05, + "loss": 0.0707, + "step": 54500 + }, + { + "epoch": 2.54, + "learning_rate": 1.1616791811013748e-05, + "loss": 0.1846, + "step": 54505 + }, + { + "epoch": 2.54, + "learning_rate": 1.1616008025958963e-05, + "loss": 0.3005, + "step": 54510 + }, + { + "epoch": 2.54, + "learning_rate": 1.1615224240904176e-05, + "loss": 0.3327, + "step": 54515 + }, + { + "epoch": 2.54, + "learning_rate": 1.161444045584939e-05, + "loss": 0.0754, + "step": 54520 + }, + { + "epoch": 2.54, + "learning_rate": 1.1613656670794602e-05, + "loss": 0.0423, + "step": 54525 + }, + { + "epoch": 2.54, + "learning_rate": 1.1612872885739817e-05, + "loss": 0.0361, + "step": 54530 + }, + { + "epoch": 2.54, + "learning_rate": 1.161208910068503e-05, + "loss": 0.0925, + "step": 54535 + }, + { + "epoch": 2.54, + "learning_rate": 1.1611305315630243e-05, + "loss": 0.0593, + "step": 54540 + }, + { + "epoch": 2.55, + "learning_rate": 1.1610521530575456e-05, + "loss": 0.1092, + "step": 54545 + }, + { + "epoch": 2.55, + "learning_rate": 1.1609737745520668e-05, + "loss": 0.1291, + "step": 54550 + }, + { + "epoch": 2.55, + "learning_rate": 1.1608953960465883e-05, + "loss": 0.2171, + "step": 54555 + }, + { + "epoch": 2.55, + "learning_rate": 1.1608170175411096e-05, + "loss": 0.1797, + "step": 54560 + }, + { + "epoch": 2.55, + "learning_rate": 1.160738639035631e-05, + "loss": 0.3179, + "step": 54565 + }, + { + "epoch": 2.55, + "learning_rate": 1.1606602605301522e-05, + "loss": 0.0331, + "step": 54570 + }, + { + "epoch": 2.55, + "learning_rate": 1.1605818820246737e-05, + "loss": 0.0527, + "step": 54575 + }, + { + "epoch": 2.55, + "learning_rate": 1.160503503519195e-05, + "loss": 0.0389, + "step": 54580 + }, + { + "epoch": 2.55, + "learning_rate": 1.1604251250137163e-05, + "loss": 0.0399, + "step": 54585 + }, + { + "epoch": 2.55, + "learning_rate": 1.1603467465082376e-05, + "loss": 0.0983, + "step": 54590 + }, + { + "epoch": 2.55, + "learning_rate": 1.1602683680027591e-05, + "loss": 0.0581, + "step": 54595 + }, + { + "epoch": 2.55, + "learning_rate": 1.1601899894972803e-05, + "loss": 0.1263, + "step": 54600 + }, + { + "epoch": 2.55, + "learning_rate": 1.1601116109918017e-05, + "loss": 0.1091, + "step": 54605 + }, + { + "epoch": 2.55, + "learning_rate": 1.1600332324863231e-05, + "loss": 0.3447, + "step": 54610 + }, + { + "epoch": 2.55, + "learning_rate": 1.1599548539808443e-05, + "loss": 0.2702, + "step": 54615 + }, + { + "epoch": 2.55, + "learning_rate": 1.1598764754753657e-05, + "loss": 0.1182, + "step": 54620 + }, + { + "epoch": 2.55, + "learning_rate": 1.159798096969887e-05, + "loss": 0.0375, + "step": 54625 + }, + { + "epoch": 2.55, + "learning_rate": 1.1597197184644085e-05, + "loss": 0.0246, + "step": 54630 + }, + { + "epoch": 2.55, + "learning_rate": 1.1596413399589297e-05, + "loss": 0.0543, + "step": 54635 + }, + { + "epoch": 2.55, + "learning_rate": 1.1595629614534511e-05, + "loss": 0.0953, + "step": 54640 + }, + { + "epoch": 2.55, + "learning_rate": 1.1594845829479724e-05, + "loss": 0.206, + "step": 54645 + }, + { + "epoch": 2.55, + "learning_rate": 1.1594062044424939e-05, + "loss": 0.1144, + "step": 54650 + }, + { + "epoch": 2.55, + "learning_rate": 1.1593278259370151e-05, + "loss": 0.1205, + "step": 54655 + }, + { + "epoch": 2.55, + "learning_rate": 1.1592494474315365e-05, + "loss": 0.2163, + "step": 54660 + }, + { + "epoch": 2.55, + "learning_rate": 1.1591710689260577e-05, + "loss": 0.34, + "step": 54665 + }, + { + "epoch": 2.55, + "learning_rate": 1.1590926904205793e-05, + "loss": 0.0405, + "step": 54670 + }, + { + "epoch": 2.55, + "learning_rate": 1.1590143119151005e-05, + "loss": 0.0285, + "step": 54675 + }, + { + "epoch": 2.55, + "learning_rate": 1.1589359334096217e-05, + "loss": 0.0436, + "step": 54680 + }, + { + "epoch": 2.55, + "learning_rate": 1.1588575549041431e-05, + "loss": 0.036, + "step": 54685 + }, + { + "epoch": 2.55, + "learning_rate": 1.1587791763986644e-05, + "loss": 0.0615, + "step": 54690 + }, + { + "epoch": 2.55, + "learning_rate": 1.158700797893186e-05, + "loss": 0.073, + "step": 54695 + }, + { + "epoch": 2.55, + "learning_rate": 1.1586224193877071e-05, + "loss": 0.0988, + "step": 54700 + }, + { + "epoch": 2.55, + "learning_rate": 1.1585440408822285e-05, + "loss": 0.1362, + "step": 54705 + }, + { + "epoch": 2.55, + "learning_rate": 1.15846566237675e-05, + "loss": 0.2456, + "step": 54710 + }, + { + "epoch": 2.55, + "learning_rate": 1.1583872838712713e-05, + "loss": 0.2314, + "step": 54715 + }, + { + "epoch": 2.55, + "learning_rate": 1.1583089053657925e-05, + "loss": 0.1112, + "step": 54720 + }, + { + "epoch": 2.55, + "learning_rate": 1.1582305268603141e-05, + "loss": 0.125, + "step": 54725 + }, + { + "epoch": 2.55, + "learning_rate": 1.1581521483548353e-05, + "loss": 0.0651, + "step": 54730 + }, + { + "epoch": 2.55, + "learning_rate": 1.1580737698493567e-05, + "loss": 0.1076, + "step": 54735 + }, + { + "epoch": 2.55, + "learning_rate": 1.157995391343878e-05, + "loss": 0.0838, + "step": 54740 + }, + { + "epoch": 2.55, + "learning_rate": 1.1579170128383991e-05, + "loss": 0.0998, + "step": 54745 + }, + { + "epoch": 2.55, + "learning_rate": 1.1578386343329207e-05, + "loss": 0.0908, + "step": 54750 + }, + { + "epoch": 2.55, + "learning_rate": 1.157760255827442e-05, + "loss": 0.1566, + "step": 54755 + }, + { + "epoch": 2.56, + "learning_rate": 1.1576818773219633e-05, + "loss": 0.325, + "step": 54760 + }, + { + "epoch": 2.56, + "learning_rate": 1.1576034988164845e-05, + "loss": 0.3982, + "step": 54765 + }, + { + "epoch": 2.56, + "learning_rate": 1.1575251203110061e-05, + "loss": 0.0304, + "step": 54770 + }, + { + "epoch": 2.56, + "learning_rate": 1.1574467418055273e-05, + "loss": 0.0603, + "step": 54775 + }, + { + "epoch": 2.56, + "learning_rate": 1.1573683633000487e-05, + "loss": 0.0578, + "step": 54780 + }, + { + "epoch": 2.56, + "learning_rate": 1.15728998479457e-05, + "loss": 0.0566, + "step": 54785 + }, + { + "epoch": 2.56, + "learning_rate": 1.1572116062890915e-05, + "loss": 0.0771, + "step": 54790 + }, + { + "epoch": 2.56, + "learning_rate": 1.1571332277836127e-05, + "loss": 0.1294, + "step": 54795 + }, + { + "epoch": 2.56, + "learning_rate": 1.1570548492781341e-05, + "loss": 0.1138, + "step": 54800 + }, + { + "epoch": 2.56, + "learning_rate": 1.1569764707726553e-05, + "loss": 0.1218, + "step": 54805 + }, + { + "epoch": 2.56, + "learning_rate": 1.1568980922671767e-05, + "loss": 0.256, + "step": 54810 + }, + { + "epoch": 2.56, + "learning_rate": 1.1568197137616981e-05, + "loss": 0.3657, + "step": 54815 + }, + { + "epoch": 2.56, + "learning_rate": 1.1567413352562193e-05, + "loss": 0.0775, + "step": 54820 + }, + { + "epoch": 2.56, + "learning_rate": 1.1566629567507409e-05, + "loss": 0.0329, + "step": 54825 + }, + { + "epoch": 2.56, + "learning_rate": 1.1565845782452621e-05, + "loss": 0.02, + "step": 54830 + }, + { + "epoch": 2.56, + "learning_rate": 1.1565061997397835e-05, + "loss": 0.0346, + "step": 54835 + }, + { + "epoch": 2.56, + "learning_rate": 1.1564278212343047e-05, + "loss": 0.0832, + "step": 54840 + }, + { + "epoch": 2.56, + "learning_rate": 1.1563494427288263e-05, + "loss": 0.0888, + "step": 54845 + }, + { + "epoch": 2.56, + "learning_rate": 1.1562710642233475e-05, + "loss": 0.1356, + "step": 54850 + }, + { + "epoch": 2.56, + "learning_rate": 1.1561926857178689e-05, + "loss": 0.1574, + "step": 54855 + }, + { + "epoch": 2.56, + "learning_rate": 1.1561143072123901e-05, + "loss": 0.2213, + "step": 54860 + }, + { + "epoch": 2.56, + "learning_rate": 1.1560359287069117e-05, + "loss": 0.2773, + "step": 54865 + }, + { + "epoch": 2.56, + "learning_rate": 1.1559575502014329e-05, + "loss": 0.073, + "step": 54870 + }, + { + "epoch": 2.56, + "learning_rate": 1.1558791716959541e-05, + "loss": 0.0188, + "step": 54875 + }, + { + "epoch": 2.56, + "learning_rate": 1.1558007931904755e-05, + "loss": 0.0895, + "step": 54880 + }, + { + "epoch": 2.56, + "learning_rate": 1.1557224146849967e-05, + "loss": 0.0839, + "step": 54885 + }, + { + "epoch": 2.56, + "learning_rate": 1.1556440361795183e-05, + "loss": 0.0635, + "step": 54890 + }, + { + "epoch": 2.56, + "learning_rate": 1.1555656576740395e-05, + "loss": 0.0701, + "step": 54895 + }, + { + "epoch": 2.56, + "learning_rate": 1.1554872791685609e-05, + "loss": 0.1132, + "step": 54900 + }, + { + "epoch": 2.56, + "learning_rate": 1.1554089006630821e-05, + "loss": 0.1373, + "step": 54905 + }, + { + "epoch": 2.56, + "learning_rate": 1.1553305221576037e-05, + "loss": 0.1604, + "step": 54910 + }, + { + "epoch": 2.56, + "learning_rate": 1.1552521436521249e-05, + "loss": 0.3499, + "step": 54915 + }, + { + "epoch": 2.56, + "learning_rate": 1.1551737651466463e-05, + "loss": 0.0524, + "step": 54920 + }, + { + "epoch": 2.56, + "learning_rate": 1.1550953866411677e-05, + "loss": 0.026, + "step": 54925 + }, + { + "epoch": 2.56, + "learning_rate": 1.155017008135689e-05, + "loss": 0.0591, + "step": 54930 + }, + { + "epoch": 2.56, + "learning_rate": 1.1549386296302103e-05, + "loss": 0.1202, + "step": 54935 + }, + { + "epoch": 2.56, + "learning_rate": 1.1548602511247315e-05, + "loss": 0.0974, + "step": 54940 + }, + { + "epoch": 2.56, + "learning_rate": 1.154781872619253e-05, + "loss": 0.1075, + "step": 54945 + }, + { + "epoch": 2.56, + "learning_rate": 1.1547034941137743e-05, + "loss": 0.588, + "step": 54950 + }, + { + "epoch": 2.56, + "learning_rate": 1.1546251156082957e-05, + "loss": 0.1737, + "step": 54955 + }, + { + "epoch": 2.56, + "learning_rate": 1.1545467371028169e-05, + "loss": 0.2966, + "step": 54960 + }, + { + "epoch": 2.56, + "learning_rate": 1.1544683585973385e-05, + "loss": 0.1925, + "step": 54965 + }, + { + "epoch": 2.56, + "learning_rate": 1.1543899800918597e-05, + "loss": 0.0461, + "step": 54970 + }, + { + "epoch": 2.57, + "learning_rate": 1.154311601586381e-05, + "loss": 0.0533, + "step": 54975 + }, + { + "epoch": 2.57, + "learning_rate": 1.1542332230809023e-05, + "loss": 0.0946, + "step": 54980 + }, + { + "epoch": 2.57, + "learning_rate": 1.1541548445754239e-05, + "loss": 0.0699, + "step": 54985 + }, + { + "epoch": 2.57, + "learning_rate": 1.154076466069945e-05, + "loss": 0.125, + "step": 54990 + }, + { + "epoch": 2.57, + "learning_rate": 1.1539980875644665e-05, + "loss": 0.0972, + "step": 54995 + }, + { + "epoch": 2.57, + "learning_rate": 1.1539197090589877e-05, + "loss": 0.2018, + "step": 55000 + }, + { + "epoch": 2.57, + "learning_rate": 1.153841330553509e-05, + "loss": 0.2578, + "step": 55005 + }, + { + "epoch": 2.57, + "learning_rate": 1.1537629520480305e-05, + "loss": 0.2174, + "step": 55010 + }, + { + "epoch": 2.57, + "learning_rate": 1.1536845735425517e-05, + "loss": 0.2747, + "step": 55015 + }, + { + "epoch": 2.57, + "learning_rate": 1.1536061950370731e-05, + "loss": 0.1615, + "step": 55020 + }, + { + "epoch": 2.57, + "learning_rate": 1.1535278165315945e-05, + "loss": 0.034, + "step": 55025 + }, + { + "epoch": 2.57, + "learning_rate": 1.1534494380261159e-05, + "loss": 0.101, + "step": 55030 + }, + { + "epoch": 2.57, + "learning_rate": 1.1533710595206371e-05, + "loss": 0.1131, + "step": 55035 + }, + { + "epoch": 2.57, + "learning_rate": 1.1532926810151586e-05, + "loss": 0.142, + "step": 55040 + }, + { + "epoch": 2.57, + "learning_rate": 1.1532143025096799e-05, + "loss": 0.0432, + "step": 55045 + }, + { + "epoch": 2.57, + "learning_rate": 1.1531359240042013e-05, + "loss": 0.1238, + "step": 55050 + }, + { + "epoch": 2.57, + "learning_rate": 1.1530575454987225e-05, + "loss": 0.1882, + "step": 55055 + }, + { + "epoch": 2.57, + "learning_rate": 1.152979166993244e-05, + "loss": 0.1863, + "step": 55060 + }, + { + "epoch": 2.57, + "learning_rate": 1.1529007884877653e-05, + "loss": 0.3653, + "step": 55065 + }, + { + "epoch": 2.57, + "learning_rate": 1.1528224099822865e-05, + "loss": 0.043, + "step": 55070 + }, + { + "epoch": 2.57, + "learning_rate": 1.1527440314768079e-05, + "loss": 0.0351, + "step": 55075 + }, + { + "epoch": 2.57, + "learning_rate": 1.1526656529713291e-05, + "loss": 0.0739, + "step": 55080 + }, + { + "epoch": 2.57, + "learning_rate": 1.1525872744658507e-05, + "loss": 0.0644, + "step": 55085 + }, + { + "epoch": 2.57, + "learning_rate": 1.1525088959603719e-05, + "loss": 0.1164, + "step": 55090 + }, + { + "epoch": 2.57, + "learning_rate": 1.1524305174548933e-05, + "loss": 0.1265, + "step": 55095 + }, + { + "epoch": 2.57, + "learning_rate": 1.1523521389494145e-05, + "loss": 0.0871, + "step": 55100 + }, + { + "epoch": 2.57, + "learning_rate": 1.152273760443936e-05, + "loss": 0.1264, + "step": 55105 + }, + { + "epoch": 2.57, + "learning_rate": 1.1521953819384573e-05, + "loss": 0.3587, + "step": 55110 + }, + { + "epoch": 2.57, + "learning_rate": 1.1521170034329787e-05, + "loss": 0.4438, + "step": 55115 + }, + { + "epoch": 2.57, + "learning_rate": 1.1520386249274999e-05, + "loss": 0.0607, + "step": 55120 + }, + { + "epoch": 2.57, + "learning_rate": 1.1519602464220214e-05, + "loss": 0.0342, + "step": 55125 + }, + { + "epoch": 2.57, + "learning_rate": 1.1518818679165427e-05, + "loss": 0.022, + "step": 55130 + }, + { + "epoch": 2.57, + "learning_rate": 1.1518034894110639e-05, + "loss": 0.0265, + "step": 55135 + }, + { + "epoch": 2.57, + "learning_rate": 1.1517251109055854e-05, + "loss": 0.0946, + "step": 55140 + }, + { + "epoch": 2.57, + "learning_rate": 1.1516467324001067e-05, + "loss": 0.1235, + "step": 55145 + }, + { + "epoch": 2.57, + "learning_rate": 1.151568353894628e-05, + "loss": 0.2051, + "step": 55150 + }, + { + "epoch": 2.57, + "learning_rate": 1.1514899753891493e-05, + "loss": 0.2828, + "step": 55155 + }, + { + "epoch": 2.57, + "learning_rate": 1.1514115968836708e-05, + "loss": 0.2089, + "step": 55160 + }, + { + "epoch": 2.57, + "learning_rate": 1.151333218378192e-05, + "loss": 0.2756, + "step": 55165 + }, + { + "epoch": 2.57, + "learning_rate": 1.1512548398727134e-05, + "loss": 0.0753, + "step": 55170 + }, + { + "epoch": 2.57, + "learning_rate": 1.1511764613672347e-05, + "loss": 0.0189, + "step": 55175 + }, + { + "epoch": 2.57, + "learning_rate": 1.1510980828617562e-05, + "loss": 0.0486, + "step": 55180 + }, + { + "epoch": 2.58, + "learning_rate": 1.1510197043562775e-05, + "loss": 0.0668, + "step": 55185 + }, + { + "epoch": 2.58, + "learning_rate": 1.1509413258507988e-05, + "loss": 0.0484, + "step": 55190 + }, + { + "epoch": 2.58, + "learning_rate": 1.15086294734532e-05, + "loss": 0.2344, + "step": 55195 + }, + { + "epoch": 2.58, + "learning_rate": 1.1507845688398413e-05, + "loss": 0.1022, + "step": 55200 + }, + { + "epoch": 2.58, + "learning_rate": 1.1507061903343628e-05, + "loss": 0.2039, + "step": 55205 + }, + { + "epoch": 2.58, + "learning_rate": 1.150627811828884e-05, + "loss": 0.3759, + "step": 55210 + }, + { + "epoch": 2.58, + "learning_rate": 1.1505494333234055e-05, + "loss": 0.3593, + "step": 55215 + }, + { + "epoch": 2.58, + "learning_rate": 1.1504710548179267e-05, + "loss": 0.0577, + "step": 55220 + }, + { + "epoch": 2.58, + "learning_rate": 1.1503926763124482e-05, + "loss": 0.0329, + "step": 55225 + }, + { + "epoch": 2.58, + "learning_rate": 1.1503142978069695e-05, + "loss": 0.0644, + "step": 55230 + }, + { + "epoch": 2.58, + "learning_rate": 1.1502359193014908e-05, + "loss": 0.0507, + "step": 55235 + }, + { + "epoch": 2.58, + "learning_rate": 1.1501575407960122e-05, + "loss": 0.0605, + "step": 55240 + }, + { + "epoch": 2.58, + "learning_rate": 1.1500791622905336e-05, + "loss": 0.1677, + "step": 55245 + }, + { + "epoch": 2.58, + "learning_rate": 1.1500007837850549e-05, + "loss": 0.1497, + "step": 55250 + }, + { + "epoch": 2.58, + "learning_rate": 1.1499224052795762e-05, + "loss": 0.1236, + "step": 55255 + }, + { + "epoch": 2.58, + "learning_rate": 1.1498440267740976e-05, + "loss": 0.2027, + "step": 55260 + }, + { + "epoch": 2.58, + "learning_rate": 1.1497656482686189e-05, + "loss": 0.3551, + "step": 55265 + }, + { + "epoch": 2.58, + "learning_rate": 1.1496872697631402e-05, + "loss": 0.1194, + "step": 55270 + }, + { + "epoch": 2.58, + "learning_rate": 1.1496088912576615e-05, + "loss": 0.0431, + "step": 55275 + }, + { + "epoch": 2.58, + "learning_rate": 1.149530512752183e-05, + "loss": 0.0716, + "step": 55280 + }, + { + "epoch": 2.58, + "learning_rate": 1.1494521342467042e-05, + "loss": 0.0789, + "step": 55285 + }, + { + "epoch": 2.58, + "learning_rate": 1.1493737557412256e-05, + "loss": 0.0781, + "step": 55290 + }, + { + "epoch": 2.58, + "learning_rate": 1.1492953772357469e-05, + "loss": 0.0786, + "step": 55295 + }, + { + "epoch": 2.58, + "learning_rate": 1.1492169987302684e-05, + "loss": 0.1363, + "step": 55300 + }, + { + "epoch": 2.58, + "learning_rate": 1.1491386202247896e-05, + "loss": 0.2028, + "step": 55305 + }, + { + "epoch": 2.58, + "learning_rate": 1.149060241719311e-05, + "loss": 0.1638, + "step": 55310 + }, + { + "epoch": 2.58, + "learning_rate": 1.1489818632138323e-05, + "loss": 0.2609, + "step": 55315 + }, + { + "epoch": 2.58, + "learning_rate": 1.1489034847083538e-05, + "loss": 0.0372, + "step": 55320 + }, + { + "epoch": 2.58, + "learning_rate": 1.148825106202875e-05, + "loss": 0.0139, + "step": 55325 + }, + { + "epoch": 2.58, + "learning_rate": 1.1487467276973963e-05, + "loss": 0.0547, + "step": 55330 + }, + { + "epoch": 2.58, + "learning_rate": 1.1486683491919176e-05, + "loss": 0.0491, + "step": 55335 + }, + { + "epoch": 2.58, + "learning_rate": 1.148589970686439e-05, + "loss": 0.0403, + "step": 55340 + }, + { + "epoch": 2.58, + "learning_rate": 1.1485115921809604e-05, + "loss": 0.1877, + "step": 55345 + }, + { + "epoch": 2.58, + "learning_rate": 1.1484332136754816e-05, + "loss": 0.1706, + "step": 55350 + }, + { + "epoch": 2.58, + "learning_rate": 1.1483548351700032e-05, + "loss": 0.1843, + "step": 55355 + }, + { + "epoch": 2.58, + "learning_rate": 1.1482764566645244e-05, + "loss": 0.2677, + "step": 55360 + }, + { + "epoch": 2.58, + "learning_rate": 1.1481980781590458e-05, + "loss": 0.1672, + "step": 55365 + }, + { + "epoch": 2.58, + "learning_rate": 1.148119699653567e-05, + "loss": 0.0489, + "step": 55370 + }, + { + "epoch": 2.58, + "learning_rate": 1.1480413211480886e-05, + "loss": 0.0291, + "step": 55375 + }, + { + "epoch": 2.58, + "learning_rate": 1.1479629426426098e-05, + "loss": 0.034, + "step": 55380 + }, + { + "epoch": 2.58, + "learning_rate": 1.1478845641371312e-05, + "loss": 0.1155, + "step": 55385 + }, + { + "epoch": 2.58, + "learning_rate": 1.1478061856316524e-05, + "loss": 0.1148, + "step": 55390 + }, + { + "epoch": 2.58, + "learning_rate": 1.1477278071261737e-05, + "loss": 0.1079, + "step": 55395 + }, + { + "epoch": 2.59, + "learning_rate": 1.1476494286206952e-05, + "loss": 0.1543, + "step": 55400 + }, + { + "epoch": 2.59, + "learning_rate": 1.1475710501152164e-05, + "loss": 0.1308, + "step": 55405 + }, + { + "epoch": 2.59, + "learning_rate": 1.1474926716097378e-05, + "loss": 0.3201, + "step": 55410 + }, + { + "epoch": 2.59, + "learning_rate": 1.147414293104259e-05, + "loss": 0.1922, + "step": 55415 + }, + { + "epoch": 2.59, + "learning_rate": 1.1473359145987806e-05, + "loss": 0.056, + "step": 55420 + }, + { + "epoch": 2.59, + "learning_rate": 1.1472575360933018e-05, + "loss": 0.0477, + "step": 55425 + }, + { + "epoch": 2.59, + "learning_rate": 1.1471791575878232e-05, + "loss": 0.0765, + "step": 55430 + }, + { + "epoch": 2.59, + "learning_rate": 1.1471007790823444e-05, + "loss": 0.0625, + "step": 55435 + }, + { + "epoch": 2.59, + "learning_rate": 1.147022400576866e-05, + "loss": 0.0535, + "step": 55440 + }, + { + "epoch": 2.59, + "learning_rate": 1.1469440220713872e-05, + "loss": 0.1265, + "step": 55445 + }, + { + "epoch": 2.59, + "learning_rate": 1.1468656435659086e-05, + "loss": 0.0814, + "step": 55450 + }, + { + "epoch": 2.59, + "learning_rate": 1.14678726506043e-05, + "loss": 0.1643, + "step": 55455 + }, + { + "epoch": 2.59, + "learning_rate": 1.1467088865549512e-05, + "loss": 0.3128, + "step": 55460 + }, + { + "epoch": 2.59, + "learning_rate": 1.1466305080494726e-05, + "loss": 0.2313, + "step": 55465 + }, + { + "epoch": 2.59, + "learning_rate": 1.1465521295439938e-05, + "loss": 0.0514, + "step": 55470 + }, + { + "epoch": 2.59, + "learning_rate": 1.1464737510385154e-05, + "loss": 0.0917, + "step": 55475 + }, + { + "epoch": 2.59, + "learning_rate": 1.1463953725330366e-05, + "loss": 0.0453, + "step": 55480 + }, + { + "epoch": 2.59, + "learning_rate": 1.146316994027558e-05, + "loss": 0.0833, + "step": 55485 + }, + { + "epoch": 2.59, + "learning_rate": 1.1462386155220792e-05, + "loss": 0.1032, + "step": 55490 + }, + { + "epoch": 2.59, + "learning_rate": 1.1461602370166008e-05, + "loss": 0.1346, + "step": 55495 + }, + { + "epoch": 2.59, + "learning_rate": 1.146081858511122e-05, + "loss": 0.1739, + "step": 55500 + }, + { + "epoch": 2.59, + "learning_rate": 1.1460034800056434e-05, + "loss": 0.2011, + "step": 55505 + }, + { + "epoch": 2.59, + "learning_rate": 1.1459251015001646e-05, + "loss": 0.1148, + "step": 55510 + }, + { + "epoch": 2.59, + "learning_rate": 1.1458467229946862e-05, + "loss": 0.2026, + "step": 55515 + }, + { + "epoch": 2.59, + "learning_rate": 1.1457683444892074e-05, + "loss": 0.0575, + "step": 55520 + }, + { + "epoch": 2.59, + "learning_rate": 1.1456899659837286e-05, + "loss": 0.0359, + "step": 55525 + }, + { + "epoch": 2.59, + "learning_rate": 1.14561158747825e-05, + "loss": 0.0556, + "step": 55530 + }, + { + "epoch": 2.59, + "learning_rate": 1.1455332089727712e-05, + "loss": 0.0612, + "step": 55535 + }, + { + "epoch": 2.59, + "learning_rate": 1.1454548304672928e-05, + "loss": 0.0526, + "step": 55540 + }, + { + "epoch": 2.59, + "learning_rate": 1.145376451961814e-05, + "loss": 0.0315, + "step": 55545 + }, + { + "epoch": 2.59, + "learning_rate": 1.1452980734563354e-05, + "loss": 0.144, + "step": 55550 + }, + { + "epoch": 2.59, + "learning_rate": 1.1452196949508568e-05, + "loss": 0.1281, + "step": 55555 + }, + { + "epoch": 2.59, + "learning_rate": 1.1451413164453782e-05, + "loss": 0.2331, + "step": 55560 + }, + { + "epoch": 2.59, + "learning_rate": 1.1450629379398994e-05, + "loss": 0.2845, + "step": 55565 + }, + { + "epoch": 2.59, + "learning_rate": 1.1449845594344208e-05, + "loss": 0.0317, + "step": 55570 + }, + { + "epoch": 2.59, + "learning_rate": 1.1449061809289422e-05, + "loss": 0.03, + "step": 55575 + }, + { + "epoch": 2.59, + "learning_rate": 1.1448278024234636e-05, + "loss": 0.0739, + "step": 55580 + }, + { + "epoch": 2.59, + "learning_rate": 1.1447494239179848e-05, + "loss": 0.1004, + "step": 55585 + }, + { + "epoch": 2.59, + "learning_rate": 1.144671045412506e-05, + "loss": 0.1453, + "step": 55590 + }, + { + "epoch": 2.59, + "learning_rate": 1.1445926669070276e-05, + "loss": 0.2057, + "step": 55595 + }, + { + "epoch": 2.59, + "learning_rate": 1.1445142884015488e-05, + "loss": 0.2203, + "step": 55600 + }, + { + "epoch": 2.59, + "learning_rate": 1.1444359098960702e-05, + "loss": 0.1848, + "step": 55605 + }, + { + "epoch": 2.59, + "learning_rate": 1.1443575313905914e-05, + "loss": 0.2693, + "step": 55610 + }, + { + "epoch": 2.6, + "learning_rate": 1.144279152885113e-05, + "loss": 0.2025, + "step": 55615 + }, + { + "epoch": 2.6, + "learning_rate": 1.1442007743796342e-05, + "loss": 0.0353, + "step": 55620 + }, + { + "epoch": 2.6, + "learning_rate": 1.1441223958741556e-05, + "loss": 0.0815, + "step": 55625 + }, + { + "epoch": 2.6, + "learning_rate": 1.1440440173686768e-05, + "loss": 0.0724, + "step": 55630 + }, + { + "epoch": 2.6, + "learning_rate": 1.1439656388631984e-05, + "loss": 0.0461, + "step": 55635 + }, + { + "epoch": 2.6, + "learning_rate": 1.1438872603577196e-05, + "loss": 0.1578, + "step": 55640 + }, + { + "epoch": 2.6, + "learning_rate": 1.143808881852241e-05, + "loss": 0.091, + "step": 55645 + }, + { + "epoch": 2.6, + "learning_rate": 1.1437305033467622e-05, + "loss": 0.1293, + "step": 55650 + }, + { + "epoch": 2.6, + "learning_rate": 1.1436521248412836e-05, + "loss": 0.2032, + "step": 55655 + }, + { + "epoch": 2.6, + "learning_rate": 1.143573746335805e-05, + "loss": 0.2329, + "step": 55660 + }, + { + "epoch": 2.6, + "learning_rate": 1.1434953678303262e-05, + "loss": 0.246, + "step": 55665 + }, + { + "epoch": 2.6, + "learning_rate": 1.1434169893248478e-05, + "loss": 0.0633, + "step": 55670 + }, + { + "epoch": 2.6, + "learning_rate": 1.143338610819369e-05, + "loss": 0.0702, + "step": 55675 + }, + { + "epoch": 2.6, + "learning_rate": 1.1432602323138904e-05, + "loss": 0.0746, + "step": 55680 + }, + { + "epoch": 2.6, + "learning_rate": 1.1431818538084116e-05, + "loss": 0.0994, + "step": 55685 + }, + { + "epoch": 2.6, + "learning_rate": 1.1431034753029332e-05, + "loss": 0.0673, + "step": 55690 + }, + { + "epoch": 2.6, + "learning_rate": 1.1430250967974544e-05, + "loss": 0.1014, + "step": 55695 + }, + { + "epoch": 2.6, + "learning_rate": 1.1429467182919758e-05, + "loss": 0.1598, + "step": 55700 + }, + { + "epoch": 2.6, + "learning_rate": 1.142868339786497e-05, + "loss": 0.1542, + "step": 55705 + }, + { + "epoch": 2.6, + "learning_rate": 1.1427899612810185e-05, + "loss": 0.1683, + "step": 55710 + }, + { + "epoch": 2.6, + "learning_rate": 1.1427115827755398e-05, + "loss": 0.3066, + "step": 55715 + }, + { + "epoch": 2.6, + "learning_rate": 1.142633204270061e-05, + "loss": 0.0337, + "step": 55720 + }, + { + "epoch": 2.6, + "learning_rate": 1.1425548257645824e-05, + "loss": 0.0113, + "step": 55725 + }, + { + "epoch": 2.6, + "learning_rate": 1.1424764472591036e-05, + "loss": 0.0901, + "step": 55730 + }, + { + "epoch": 2.6, + "learning_rate": 1.1423980687536252e-05, + "loss": 0.0709, + "step": 55735 + }, + { + "epoch": 2.6, + "learning_rate": 1.1423196902481464e-05, + "loss": 0.0917, + "step": 55740 + }, + { + "epoch": 2.6, + "learning_rate": 1.1422413117426678e-05, + "loss": 0.0923, + "step": 55745 + }, + { + "epoch": 2.6, + "learning_rate": 1.142162933237189e-05, + "loss": 0.1011, + "step": 55750 + }, + { + "epoch": 2.6, + "learning_rate": 1.1420845547317106e-05, + "loss": 0.2065, + "step": 55755 + }, + { + "epoch": 2.6, + "learning_rate": 1.1420061762262318e-05, + "loss": 0.2262, + "step": 55760 + }, + { + "epoch": 2.6, + "learning_rate": 1.1419277977207532e-05, + "loss": 0.3833, + "step": 55765 + }, + { + "epoch": 2.6, + "learning_rate": 1.1418494192152746e-05, + "loss": 0.0298, + "step": 55770 + }, + { + "epoch": 2.6, + "learning_rate": 1.141771040709796e-05, + "loss": 0.0199, + "step": 55775 + }, + { + "epoch": 2.6, + "learning_rate": 1.1416926622043172e-05, + "loss": 0.0608, + "step": 55780 + }, + { + "epoch": 2.6, + "learning_rate": 1.1416142836988384e-05, + "loss": 0.0674, + "step": 55785 + }, + { + "epoch": 2.6, + "learning_rate": 1.14153590519336e-05, + "loss": 0.05, + "step": 55790 + }, + { + "epoch": 2.6, + "learning_rate": 1.1414575266878812e-05, + "loss": 0.4276, + "step": 55795 + }, + { + "epoch": 2.6, + "learning_rate": 1.1413791481824026e-05, + "loss": 0.1672, + "step": 55800 + }, + { + "epoch": 2.6, + "learning_rate": 1.1413007696769238e-05, + "loss": 0.2332, + "step": 55805 + }, + { + "epoch": 2.6, + "learning_rate": 1.1412223911714453e-05, + "loss": 0.2378, + "step": 55810 + }, + { + "epoch": 2.6, + "learning_rate": 1.1411440126659666e-05, + "loss": 0.3594, + "step": 55815 + }, + { + "epoch": 2.6, + "learning_rate": 1.141065634160488e-05, + "loss": 0.0743, + "step": 55820 + }, + { + "epoch": 2.6, + "learning_rate": 1.1409872556550092e-05, + "loss": 0.0567, + "step": 55825 + }, + { + "epoch": 2.61, + "learning_rate": 1.1409088771495307e-05, + "loss": 0.0386, + "step": 55830 + }, + { + "epoch": 2.61, + "learning_rate": 1.140830498644052e-05, + "loss": 0.0708, + "step": 55835 + }, + { + "epoch": 2.61, + "learning_rate": 1.1407521201385733e-05, + "loss": 0.1147, + "step": 55840 + }, + { + "epoch": 2.61, + "learning_rate": 1.1406737416330946e-05, + "loss": 0.107, + "step": 55845 + }, + { + "epoch": 2.61, + "learning_rate": 1.1405953631276158e-05, + "loss": 0.1496, + "step": 55850 + }, + { + "epoch": 2.61, + "learning_rate": 1.1405169846221374e-05, + "loss": 0.1711, + "step": 55855 + }, + { + "epoch": 2.61, + "learning_rate": 1.1404386061166586e-05, + "loss": 0.1991, + "step": 55860 + }, + { + "epoch": 2.61, + "learning_rate": 1.14036022761118e-05, + "loss": 0.2534, + "step": 55865 + }, + { + "epoch": 2.61, + "learning_rate": 1.1402818491057014e-05, + "loss": 0.0475, + "step": 55870 + }, + { + "epoch": 2.61, + "learning_rate": 1.1402034706002227e-05, + "loss": 0.0586, + "step": 55875 + }, + { + "epoch": 2.61, + "learning_rate": 1.140125092094744e-05, + "loss": 0.0615, + "step": 55880 + }, + { + "epoch": 2.61, + "learning_rate": 1.1400467135892655e-05, + "loss": 0.0973, + "step": 55885 + }, + { + "epoch": 2.61, + "learning_rate": 1.1399683350837867e-05, + "loss": 0.0871, + "step": 55890 + }, + { + "epoch": 2.61, + "learning_rate": 1.1398899565783081e-05, + "loss": 0.0739, + "step": 55895 + }, + { + "epoch": 2.61, + "learning_rate": 1.1398115780728294e-05, + "loss": 0.1811, + "step": 55900 + }, + { + "epoch": 2.61, + "learning_rate": 1.139733199567351e-05, + "loss": 0.1679, + "step": 55905 + }, + { + "epoch": 2.61, + "learning_rate": 1.1396548210618721e-05, + "loss": 0.2377, + "step": 55910 + }, + { + "epoch": 2.61, + "learning_rate": 1.1395764425563934e-05, + "loss": 0.29, + "step": 55915 + }, + { + "epoch": 2.61, + "learning_rate": 1.1394980640509148e-05, + "loss": 0.0498, + "step": 55920 + }, + { + "epoch": 2.61, + "learning_rate": 1.139419685545436e-05, + "loss": 0.0492, + "step": 55925 + }, + { + "epoch": 2.61, + "learning_rate": 1.1393413070399575e-05, + "loss": 0.0697, + "step": 55930 + }, + { + "epoch": 2.61, + "learning_rate": 1.1392629285344788e-05, + "loss": 0.0466, + "step": 55935 + }, + { + "epoch": 2.61, + "learning_rate": 1.1391845500290001e-05, + "loss": 0.0723, + "step": 55940 + }, + { + "epoch": 2.61, + "learning_rate": 1.1391061715235214e-05, + "loss": 0.0772, + "step": 55945 + }, + { + "epoch": 2.61, + "learning_rate": 1.139027793018043e-05, + "loss": 0.1073, + "step": 55950 + }, + { + "epoch": 2.61, + "learning_rate": 1.1389494145125641e-05, + "loss": 0.1866, + "step": 55955 + }, + { + "epoch": 2.61, + "learning_rate": 1.1388710360070855e-05, + "loss": 0.3105, + "step": 55960 + }, + { + "epoch": 2.61, + "learning_rate": 1.1387926575016068e-05, + "loss": 0.3204, + "step": 55965 + }, + { + "epoch": 2.61, + "learning_rate": 1.1387142789961283e-05, + "loss": 0.0536, + "step": 55970 + }, + { + "epoch": 2.61, + "learning_rate": 1.1386359004906495e-05, + "loss": 0.0331, + "step": 55975 + }, + { + "epoch": 2.61, + "learning_rate": 1.1385575219851708e-05, + "loss": 0.0336, + "step": 55980 + }, + { + "epoch": 2.61, + "learning_rate": 1.1384791434796923e-05, + "loss": 0.0781, + "step": 55985 + }, + { + "epoch": 2.61, + "learning_rate": 1.1384007649742135e-05, + "loss": 0.0902, + "step": 55990 + }, + { + "epoch": 2.61, + "learning_rate": 1.138322386468735e-05, + "loss": 0.1057, + "step": 55995 + }, + { + "epoch": 2.61, + "learning_rate": 1.1382440079632562e-05, + "loss": 0.1152, + "step": 56000 + }, + { + "epoch": 2.61, + "learning_rate": 1.1381656294577777e-05, + "loss": 0.1095, + "step": 56005 + }, + { + "epoch": 2.61, + "learning_rate": 1.138087250952299e-05, + "loss": 0.2313, + "step": 56010 + }, + { + "epoch": 2.61, + "learning_rate": 1.1380088724468203e-05, + "loss": 0.3165, + "step": 56015 + }, + { + "epoch": 2.61, + "learning_rate": 1.1379304939413415e-05, + "loss": 0.0596, + "step": 56020 + }, + { + "epoch": 2.61, + "learning_rate": 1.1378521154358631e-05, + "loss": 0.0576, + "step": 56025 + }, + { + "epoch": 2.61, + "learning_rate": 1.1377737369303843e-05, + "loss": 0.0679, + "step": 56030 + }, + { + "epoch": 2.61, + "learning_rate": 1.1376953584249057e-05, + "loss": 0.0265, + "step": 56035 + }, + { + "epoch": 2.61, + "learning_rate": 1.137616979919427e-05, + "loss": 0.0645, + "step": 56040 + }, + { + "epoch": 2.62, + "learning_rate": 1.1375386014139482e-05, + "loss": 0.1304, + "step": 56045 + }, + { + "epoch": 2.62, + "learning_rate": 1.1374602229084697e-05, + "loss": 0.2927, + "step": 56050 + }, + { + "epoch": 2.62, + "learning_rate": 1.137381844402991e-05, + "loss": 0.1191, + "step": 56055 + }, + { + "epoch": 2.62, + "learning_rate": 1.1373034658975123e-05, + "loss": 0.2488, + "step": 56060 + }, + { + "epoch": 2.62, + "learning_rate": 1.1372250873920336e-05, + "loss": 0.1988, + "step": 56065 + }, + { + "epoch": 2.62, + "learning_rate": 1.1371467088865551e-05, + "loss": 0.0326, + "step": 56070 + }, + { + "epoch": 2.62, + "learning_rate": 1.1370683303810763e-05, + "loss": 0.0501, + "step": 56075 + }, + { + "epoch": 2.62, + "learning_rate": 1.1369899518755977e-05, + "loss": 0.0934, + "step": 56080 + }, + { + "epoch": 2.62, + "learning_rate": 1.1369115733701191e-05, + "loss": 0.0529, + "step": 56085 + }, + { + "epoch": 2.62, + "learning_rate": 1.1368331948646405e-05, + "loss": 0.1376, + "step": 56090 + }, + { + "epoch": 2.62, + "learning_rate": 1.1367548163591617e-05, + "loss": 0.0725, + "step": 56095 + }, + { + "epoch": 2.62, + "learning_rate": 1.1366764378536831e-05, + "loss": 0.1274, + "step": 56100 + }, + { + "epoch": 2.62, + "learning_rate": 1.1365980593482045e-05, + "loss": 0.1481, + "step": 56105 + }, + { + "epoch": 2.62, + "learning_rate": 1.1365196808427257e-05, + "loss": 0.1959, + "step": 56110 + }, + { + "epoch": 2.62, + "learning_rate": 1.1364413023372471e-05, + "loss": 0.1771, + "step": 56115 + }, + { + "epoch": 2.62, + "learning_rate": 1.1363629238317683e-05, + "loss": 0.0172, + "step": 56120 + }, + { + "epoch": 2.62, + "learning_rate": 1.1362845453262899e-05, + "loss": 0.0531, + "step": 56125 + }, + { + "epoch": 2.62, + "learning_rate": 1.1362061668208111e-05, + "loss": 0.0984, + "step": 56130 + }, + { + "epoch": 2.62, + "learning_rate": 1.1361277883153325e-05, + "loss": 0.074, + "step": 56135 + }, + { + "epoch": 2.62, + "learning_rate": 1.1360494098098537e-05, + "loss": 0.0676, + "step": 56140 + }, + { + "epoch": 2.62, + "learning_rate": 1.1359710313043753e-05, + "loss": 0.1362, + "step": 56145 + }, + { + "epoch": 2.62, + "learning_rate": 1.1358926527988965e-05, + "loss": 0.1122, + "step": 56150 + }, + { + "epoch": 2.62, + "learning_rate": 1.1358142742934179e-05, + "loss": 0.1515, + "step": 56155 + }, + { + "epoch": 2.62, + "learning_rate": 1.1357358957879391e-05, + "loss": 0.261, + "step": 56160 + }, + { + "epoch": 2.62, + "learning_rate": 1.1356575172824607e-05, + "loss": 0.293, + "step": 56165 + }, + { + "epoch": 2.62, + "learning_rate": 1.1355791387769819e-05, + "loss": 0.0781, + "step": 56170 + }, + { + "epoch": 2.62, + "learning_rate": 1.1355007602715031e-05, + "loss": 0.0546, + "step": 56175 + }, + { + "epoch": 2.62, + "learning_rate": 1.1354223817660245e-05, + "loss": 0.0495, + "step": 56180 + }, + { + "epoch": 2.62, + "learning_rate": 1.1353440032605459e-05, + "loss": 0.0399, + "step": 56185 + }, + { + "epoch": 2.62, + "learning_rate": 1.1352656247550673e-05, + "loss": 0.1285, + "step": 56190 + }, + { + "epoch": 2.62, + "learning_rate": 1.1351872462495885e-05, + "loss": 0.1656, + "step": 56195 + }, + { + "epoch": 2.62, + "learning_rate": 1.13510886774411e-05, + "loss": 0.1061, + "step": 56200 + }, + { + "epoch": 2.62, + "learning_rate": 1.1350304892386313e-05, + "loss": 0.1784, + "step": 56205 + }, + { + "epoch": 2.62, + "learning_rate": 1.1349521107331527e-05, + "loss": 0.254, + "step": 56210 + }, + { + "epoch": 2.62, + "learning_rate": 1.134873732227674e-05, + "loss": 0.3493, + "step": 56215 + }, + { + "epoch": 2.62, + "learning_rate": 1.1347953537221955e-05, + "loss": 0.0536, + "step": 56220 + }, + { + "epoch": 2.62, + "learning_rate": 1.1347169752167167e-05, + "loss": 0.0224, + "step": 56225 + }, + { + "epoch": 2.62, + "learning_rate": 1.1346385967112381e-05, + "loss": 0.06, + "step": 56230 + }, + { + "epoch": 2.62, + "learning_rate": 1.1345602182057593e-05, + "loss": 0.1365, + "step": 56235 + }, + { + "epoch": 2.62, + "learning_rate": 1.1344818397002805e-05, + "loss": 0.0783, + "step": 56240 + }, + { + "epoch": 2.62, + "learning_rate": 1.1344034611948021e-05, + "loss": 0.1582, + "step": 56245 + }, + { + "epoch": 2.62, + "learning_rate": 1.1343250826893233e-05, + "loss": 0.0864, + "step": 56250 + }, + { + "epoch": 2.62, + "learning_rate": 1.1342467041838447e-05, + "loss": 0.1631, + "step": 56255 + }, + { + "epoch": 2.63, + "learning_rate": 1.134168325678366e-05, + "loss": 0.2896, + "step": 56260 + }, + { + "epoch": 2.63, + "learning_rate": 1.1340899471728875e-05, + "loss": 0.2259, + "step": 56265 + }, + { + "epoch": 2.63, + "learning_rate": 1.1340115686674087e-05, + "loss": 0.083, + "step": 56270 + }, + { + "epoch": 2.63, + "learning_rate": 1.1339331901619301e-05, + "loss": 0.0676, + "step": 56275 + }, + { + "epoch": 2.63, + "learning_rate": 1.1338548116564513e-05, + "loss": 0.0453, + "step": 56280 + }, + { + "epoch": 2.63, + "learning_rate": 1.1337764331509729e-05, + "loss": 0.0732, + "step": 56285 + }, + { + "epoch": 2.63, + "learning_rate": 1.1336980546454941e-05, + "loss": 0.0857, + "step": 56290 + }, + { + "epoch": 2.63, + "learning_rate": 1.1336196761400155e-05, + "loss": 0.0953, + "step": 56295 + }, + { + "epoch": 2.63, + "learning_rate": 1.1335412976345369e-05, + "loss": 0.1977, + "step": 56300 + }, + { + "epoch": 2.63, + "learning_rate": 1.1334629191290581e-05, + "loss": 0.1663, + "step": 56305 + }, + { + "epoch": 2.63, + "learning_rate": 1.1333845406235795e-05, + "loss": 0.2465, + "step": 56310 + }, + { + "epoch": 2.63, + "learning_rate": 1.1333061621181007e-05, + "loss": 0.2748, + "step": 56315 + }, + { + "epoch": 2.63, + "learning_rate": 1.1332277836126223e-05, + "loss": 0.0505, + "step": 56320 + }, + { + "epoch": 2.63, + "learning_rate": 1.1331494051071435e-05, + "loss": 0.0276, + "step": 56325 + }, + { + "epoch": 2.63, + "learning_rate": 1.1330710266016649e-05, + "loss": 0.0824, + "step": 56330 + }, + { + "epoch": 2.63, + "learning_rate": 1.1329926480961861e-05, + "loss": 0.1261, + "step": 56335 + }, + { + "epoch": 2.63, + "learning_rate": 1.1329142695907077e-05, + "loss": 0.074, + "step": 56340 + }, + { + "epoch": 2.63, + "learning_rate": 1.1328358910852289e-05, + "loss": 0.206, + "step": 56345 + }, + { + "epoch": 2.63, + "learning_rate": 1.1327575125797503e-05, + "loss": 0.1365, + "step": 56350 + }, + { + "epoch": 2.63, + "learning_rate": 1.1326791340742715e-05, + "loss": 0.1942, + "step": 56355 + }, + { + "epoch": 2.63, + "learning_rate": 1.132600755568793e-05, + "loss": 0.2217, + "step": 56360 + }, + { + "epoch": 2.63, + "learning_rate": 1.1325223770633143e-05, + "loss": 0.3364, + "step": 56365 + }, + { + "epoch": 2.63, + "learning_rate": 1.1324439985578355e-05, + "loss": 0.0938, + "step": 56370 + }, + { + "epoch": 2.63, + "learning_rate": 1.1323656200523569e-05, + "loss": 0.0528, + "step": 56375 + }, + { + "epoch": 2.63, + "learning_rate": 1.1322872415468781e-05, + "loss": 0.0529, + "step": 56380 + }, + { + "epoch": 2.63, + "learning_rate": 1.1322088630413997e-05, + "loss": 0.0723, + "step": 56385 + }, + { + "epoch": 2.63, + "learning_rate": 1.1321304845359209e-05, + "loss": 0.0862, + "step": 56390 + }, + { + "epoch": 2.63, + "learning_rate": 1.1320521060304423e-05, + "loss": 0.1117, + "step": 56395 + }, + { + "epoch": 2.63, + "learning_rate": 1.1319737275249637e-05, + "loss": 0.1197, + "step": 56400 + }, + { + "epoch": 2.63, + "learning_rate": 1.131895349019485e-05, + "loss": 0.2187, + "step": 56405 + }, + { + "epoch": 2.63, + "learning_rate": 1.1318169705140063e-05, + "loss": 0.119, + "step": 56410 + }, + { + "epoch": 2.63, + "learning_rate": 1.1317385920085277e-05, + "loss": 0.4055, + "step": 56415 + }, + { + "epoch": 2.63, + "learning_rate": 1.131660213503049e-05, + "loss": 0.0653, + "step": 56420 + }, + { + "epoch": 2.63, + "learning_rate": 1.1315818349975705e-05, + "loss": 0.0624, + "step": 56425 + }, + { + "epoch": 2.63, + "learning_rate": 1.1315034564920917e-05, + "loss": 0.1205, + "step": 56430 + }, + { + "epoch": 2.63, + "learning_rate": 1.1314250779866129e-05, + "loss": 0.0533, + "step": 56435 + }, + { + "epoch": 2.63, + "learning_rate": 1.1313466994811345e-05, + "loss": 0.1112, + "step": 56440 + }, + { + "epoch": 2.63, + "learning_rate": 1.1312683209756557e-05, + "loss": 0.0813, + "step": 56445 + }, + { + "epoch": 2.63, + "learning_rate": 1.131189942470177e-05, + "loss": 0.1179, + "step": 56450 + }, + { + "epoch": 2.63, + "learning_rate": 1.1311115639646983e-05, + "loss": 0.108, + "step": 56455 + }, + { + "epoch": 2.63, + "learning_rate": 1.1310331854592199e-05, + "loss": 0.3539, + "step": 56460 + }, + { + "epoch": 2.63, + "learning_rate": 1.130954806953741e-05, + "loss": 0.3225, + "step": 56465 + }, + { + "epoch": 2.63, + "learning_rate": 1.1308764284482625e-05, + "loss": 0.1074, + "step": 56470 + }, + { + "epoch": 2.64, + "learning_rate": 1.1307980499427837e-05, + "loss": 0.0635, + "step": 56475 + }, + { + "epoch": 2.64, + "learning_rate": 1.1307196714373052e-05, + "loss": 0.0487, + "step": 56480 + }, + { + "epoch": 2.64, + "learning_rate": 1.1306412929318265e-05, + "loss": 0.0477, + "step": 56485 + }, + { + "epoch": 2.64, + "learning_rate": 1.1305629144263479e-05, + "loss": 0.0545, + "step": 56490 + }, + { + "epoch": 2.64, + "learning_rate": 1.130484535920869e-05, + "loss": 0.1771, + "step": 56495 + }, + { + "epoch": 2.64, + "learning_rate": 1.1304061574153905e-05, + "loss": 0.1594, + "step": 56500 + }, + { + "epoch": 2.64, + "learning_rate": 1.1303277789099119e-05, + "loss": 0.1938, + "step": 56505 + }, + { + "epoch": 2.64, + "learning_rate": 1.130249400404433e-05, + "loss": 0.2132, + "step": 56510 + }, + { + "epoch": 2.64, + "learning_rate": 1.1301710218989546e-05, + "loss": 0.2514, + "step": 56515 + }, + { + "epoch": 2.64, + "learning_rate": 1.1300926433934759e-05, + "loss": 0.042, + "step": 56520 + }, + { + "epoch": 2.64, + "learning_rate": 1.1300142648879973e-05, + "loss": 0.0904, + "step": 56525 + }, + { + "epoch": 2.64, + "learning_rate": 1.1299358863825185e-05, + "loss": 0.0356, + "step": 56530 + }, + { + "epoch": 2.64, + "learning_rate": 1.12985750787704e-05, + "loss": 0.1147, + "step": 56535 + }, + { + "epoch": 2.64, + "learning_rate": 1.1297791293715613e-05, + "loss": 0.0365, + "step": 56540 + }, + { + "epoch": 2.64, + "learning_rate": 1.1297007508660826e-05, + "loss": 0.1268, + "step": 56545 + }, + { + "epoch": 2.64, + "learning_rate": 1.1296223723606039e-05, + "loss": 0.1831, + "step": 56550 + }, + { + "epoch": 2.64, + "learning_rate": 1.1295439938551254e-05, + "loss": 0.1668, + "step": 56555 + }, + { + "epoch": 2.64, + "learning_rate": 1.1294656153496466e-05, + "loss": 0.3163, + "step": 56560 + }, + { + "epoch": 2.64, + "learning_rate": 1.1293872368441679e-05, + "loss": 0.2783, + "step": 56565 + }, + { + "epoch": 2.64, + "learning_rate": 1.1293088583386893e-05, + "loss": 0.0947, + "step": 56570 + }, + { + "epoch": 2.64, + "learning_rate": 1.1292304798332105e-05, + "loss": 0.0528, + "step": 56575 + }, + { + "epoch": 2.64, + "learning_rate": 1.129152101327732e-05, + "loss": 0.0567, + "step": 56580 + }, + { + "epoch": 2.64, + "learning_rate": 1.1290737228222533e-05, + "loss": 0.0607, + "step": 56585 + }, + { + "epoch": 2.64, + "learning_rate": 1.1289953443167747e-05, + "loss": 0.0598, + "step": 56590 + }, + { + "epoch": 2.64, + "learning_rate": 1.1289169658112959e-05, + "loss": 0.151, + "step": 56595 + }, + { + "epoch": 2.64, + "learning_rate": 1.1288385873058174e-05, + "loss": 0.2271, + "step": 56600 + }, + { + "epoch": 2.64, + "learning_rate": 1.1287602088003387e-05, + "loss": 0.1683, + "step": 56605 + }, + { + "epoch": 2.64, + "learning_rate": 1.12868183029486e-05, + "loss": 0.2022, + "step": 56610 + }, + { + "epoch": 2.64, + "learning_rate": 1.1286034517893814e-05, + "loss": 0.2549, + "step": 56615 + }, + { + "epoch": 2.64, + "learning_rate": 1.1285250732839028e-05, + "loss": 0.0524, + "step": 56620 + }, + { + "epoch": 2.64, + "learning_rate": 1.128446694778424e-05, + "loss": 0.0105, + "step": 56625 + }, + { + "epoch": 2.64, + "learning_rate": 1.1283683162729453e-05, + "loss": 0.0215, + "step": 56630 + }, + { + "epoch": 2.64, + "learning_rate": 1.1282899377674668e-05, + "loss": 0.0492, + "step": 56635 + }, + { + "epoch": 2.64, + "learning_rate": 1.128211559261988e-05, + "loss": 0.1595, + "step": 56640 + }, + { + "epoch": 2.64, + "learning_rate": 1.1281331807565094e-05, + "loss": 0.0684, + "step": 56645 + }, + { + "epoch": 2.64, + "learning_rate": 1.1280548022510307e-05, + "loss": 0.1465, + "step": 56650 + }, + { + "epoch": 2.64, + "learning_rate": 1.1279764237455522e-05, + "loss": 0.1494, + "step": 56655 + }, + { + "epoch": 2.64, + "learning_rate": 1.1278980452400734e-05, + "loss": 0.2374, + "step": 56660 + }, + { + "epoch": 2.64, + "learning_rate": 1.1278196667345948e-05, + "loss": 0.2635, + "step": 56665 + }, + { + "epoch": 2.64, + "learning_rate": 1.127741288229116e-05, + "loss": 0.0529, + "step": 56670 + }, + { + "epoch": 2.64, + "learning_rate": 1.1276629097236376e-05, + "loss": 0.0205, + "step": 56675 + }, + { + "epoch": 2.64, + "learning_rate": 1.1275845312181588e-05, + "loss": 0.1151, + "step": 56680 + }, + { + "epoch": 2.65, + "learning_rate": 1.1275061527126802e-05, + "loss": 0.0793, + "step": 56685 + }, + { + "epoch": 2.65, + "learning_rate": 1.1274277742072014e-05, + "loss": 0.058, + "step": 56690 + }, + { + "epoch": 2.65, + "learning_rate": 1.1273493957017227e-05, + "loss": 0.0678, + "step": 56695 + }, + { + "epoch": 2.65, + "learning_rate": 1.1272710171962442e-05, + "loss": 0.1245, + "step": 56700 + }, + { + "epoch": 2.65, + "learning_rate": 1.1271926386907654e-05, + "loss": 0.3038, + "step": 56705 + }, + { + "epoch": 2.65, + "learning_rate": 1.1271142601852868e-05, + "loss": 0.2822, + "step": 56710 + }, + { + "epoch": 2.65, + "learning_rate": 1.1270358816798082e-05, + "loss": 0.2808, + "step": 56715 + }, + { + "epoch": 2.65, + "learning_rate": 1.1269575031743296e-05, + "loss": 0.0484, + "step": 56720 + }, + { + "epoch": 2.65, + "learning_rate": 1.1268791246688508e-05, + "loss": 0.0473, + "step": 56725 + }, + { + "epoch": 2.65, + "learning_rate": 1.1268007461633722e-05, + "loss": 0.0651, + "step": 56730 + }, + { + "epoch": 2.65, + "learning_rate": 1.1267223676578936e-05, + "loss": 0.0608, + "step": 56735 + }, + { + "epoch": 2.65, + "learning_rate": 1.126643989152415e-05, + "loss": 0.0815, + "step": 56740 + }, + { + "epoch": 2.65, + "learning_rate": 1.1265656106469362e-05, + "loss": 0.0831, + "step": 56745 + }, + { + "epoch": 2.65, + "learning_rate": 1.1264872321414578e-05, + "loss": 0.1264, + "step": 56750 + }, + { + "epoch": 2.65, + "learning_rate": 1.126408853635979e-05, + "loss": 0.196, + "step": 56755 + }, + { + "epoch": 2.65, + "learning_rate": 1.1263304751305002e-05, + "loss": 0.1935, + "step": 56760 + }, + { + "epoch": 2.65, + "learning_rate": 1.1262520966250216e-05, + "loss": 0.2181, + "step": 56765 + }, + { + "epoch": 2.65, + "learning_rate": 1.1261737181195428e-05, + "loss": 0.0669, + "step": 56770 + }, + { + "epoch": 2.65, + "learning_rate": 1.1260953396140644e-05, + "loss": 0.0351, + "step": 56775 + }, + { + "epoch": 2.65, + "learning_rate": 1.1260169611085856e-05, + "loss": 0.031, + "step": 56780 + }, + { + "epoch": 2.65, + "learning_rate": 1.125938582603107e-05, + "loss": 0.1046, + "step": 56785 + }, + { + "epoch": 2.65, + "learning_rate": 1.1258602040976282e-05, + "loss": 0.0576, + "step": 56790 + }, + { + "epoch": 2.65, + "learning_rate": 1.1257818255921498e-05, + "loss": 0.1757, + "step": 56795 + }, + { + "epoch": 2.65, + "learning_rate": 1.125703447086671e-05, + "loss": 0.0646, + "step": 56800 + }, + { + "epoch": 2.65, + "learning_rate": 1.1256250685811924e-05, + "loss": 0.0942, + "step": 56805 + }, + { + "epoch": 2.65, + "learning_rate": 1.1255466900757136e-05, + "loss": 0.2081, + "step": 56810 + }, + { + "epoch": 2.65, + "learning_rate": 1.1254683115702352e-05, + "loss": 0.2665, + "step": 56815 + }, + { + "epoch": 2.65, + "learning_rate": 1.1253899330647564e-05, + "loss": 0.1061, + "step": 56820 + }, + { + "epoch": 2.65, + "learning_rate": 1.1253115545592776e-05, + "loss": 0.0643, + "step": 56825 + }, + { + "epoch": 2.65, + "learning_rate": 1.1252331760537992e-05, + "loss": 0.0954, + "step": 56830 + }, + { + "epoch": 2.65, + "learning_rate": 1.1251547975483204e-05, + "loss": 0.1031, + "step": 56835 + }, + { + "epoch": 2.65, + "learning_rate": 1.1250764190428418e-05, + "loss": 0.0669, + "step": 56840 + }, + { + "epoch": 2.65, + "learning_rate": 1.124998040537363e-05, + "loss": 0.102, + "step": 56845 + }, + { + "epoch": 2.65, + "learning_rate": 1.1249196620318846e-05, + "loss": 0.0921, + "step": 56850 + }, + { + "epoch": 2.65, + "learning_rate": 1.1248412835264058e-05, + "loss": 0.2177, + "step": 56855 + }, + { + "epoch": 2.65, + "learning_rate": 1.1247629050209272e-05, + "loss": 0.2624, + "step": 56860 + }, + { + "epoch": 2.65, + "learning_rate": 1.1246845265154484e-05, + "loss": 0.2272, + "step": 56865 + }, + { + "epoch": 2.65, + "learning_rate": 1.12460614800997e-05, + "loss": 0.0577, + "step": 56870 + }, + { + "epoch": 2.65, + "learning_rate": 1.1245277695044912e-05, + "loss": 0.0705, + "step": 56875 + }, + { + "epoch": 2.65, + "learning_rate": 1.1244493909990126e-05, + "loss": 0.0784, + "step": 56880 + }, + { + "epoch": 2.65, + "learning_rate": 1.1243710124935338e-05, + "loss": 0.0725, + "step": 56885 + }, + { + "epoch": 2.65, + "learning_rate": 1.124292633988055e-05, + "loss": 0.079, + "step": 56890 + }, + { + "epoch": 2.65, + "learning_rate": 1.1242142554825766e-05, + "loss": 0.0578, + "step": 56895 + }, + { + "epoch": 2.66, + "learning_rate": 1.1241358769770978e-05, + "loss": 0.1903, + "step": 56900 + }, + { + "epoch": 2.66, + "learning_rate": 1.1240574984716192e-05, + "loss": 0.1061, + "step": 56905 + }, + { + "epoch": 2.66, + "learning_rate": 1.1239791199661404e-05, + "loss": 0.3914, + "step": 56910 + }, + { + "epoch": 2.66, + "learning_rate": 1.123900741460662e-05, + "loss": 0.4026, + "step": 56915 + }, + { + "epoch": 2.66, + "learning_rate": 1.1238223629551832e-05, + "loss": 0.0493, + "step": 56920 + }, + { + "epoch": 2.66, + "learning_rate": 1.1237439844497046e-05, + "loss": 0.066, + "step": 56925 + }, + { + "epoch": 2.66, + "learning_rate": 1.123665605944226e-05, + "loss": 0.0681, + "step": 56930 + }, + { + "epoch": 2.66, + "learning_rate": 1.1235872274387474e-05, + "loss": 0.0228, + "step": 56935 + }, + { + "epoch": 2.66, + "learning_rate": 1.1235088489332686e-05, + "loss": 0.0556, + "step": 56940 + }, + { + "epoch": 2.66, + "learning_rate": 1.12343047042779e-05, + "loss": 0.1144, + "step": 56945 + }, + { + "epoch": 2.66, + "learning_rate": 1.1233520919223114e-05, + "loss": 0.0883, + "step": 56950 + }, + { + "epoch": 2.66, + "learning_rate": 1.1232737134168326e-05, + "loss": 0.156, + "step": 56955 + }, + { + "epoch": 2.66, + "learning_rate": 1.123195334911354e-05, + "loss": 0.2826, + "step": 56960 + }, + { + "epoch": 2.66, + "learning_rate": 1.1231169564058752e-05, + "loss": 0.2467, + "step": 56965 + }, + { + "epoch": 2.66, + "learning_rate": 1.1230385779003968e-05, + "loss": 0.0382, + "step": 56970 + }, + { + "epoch": 2.66, + "learning_rate": 1.122960199394918e-05, + "loss": 0.068, + "step": 56975 + }, + { + "epoch": 2.66, + "learning_rate": 1.1228818208894394e-05, + "loss": 0.0677, + "step": 56980 + }, + { + "epoch": 2.66, + "learning_rate": 1.1228034423839606e-05, + "loss": 0.1175, + "step": 56985 + }, + { + "epoch": 2.66, + "learning_rate": 1.1227250638784822e-05, + "loss": 0.0899, + "step": 56990 + }, + { + "epoch": 2.66, + "learning_rate": 1.1226466853730034e-05, + "loss": 0.1353, + "step": 56995 + }, + { + "epoch": 2.66, + "learning_rate": 1.1225683068675248e-05, + "loss": 0.1473, + "step": 57000 + }, + { + "epoch": 2.66, + "learning_rate": 1.122489928362046e-05, + "loss": 0.1321, + "step": 57005 + }, + { + "epoch": 2.66, + "learning_rate": 1.1224115498565676e-05, + "loss": 0.3038, + "step": 57010 + }, + { + "epoch": 2.66, + "learning_rate": 1.1223331713510888e-05, + "loss": 0.4092, + "step": 57015 + }, + { + "epoch": 2.66, + "learning_rate": 1.12225479284561e-05, + "loss": 0.0233, + "step": 57020 + }, + { + "epoch": 2.66, + "learning_rate": 1.1221764143401314e-05, + "loss": 0.0531, + "step": 57025 + }, + { + "epoch": 2.66, + "learning_rate": 1.1220980358346528e-05, + "loss": 0.011, + "step": 57030 + }, + { + "epoch": 2.66, + "learning_rate": 1.1220196573291742e-05, + "loss": 0.0896, + "step": 57035 + }, + { + "epoch": 2.66, + "learning_rate": 1.1219412788236954e-05, + "loss": 0.038, + "step": 57040 + }, + { + "epoch": 2.66, + "learning_rate": 1.121862900318217e-05, + "loss": 0.1052, + "step": 57045 + }, + { + "epoch": 2.66, + "learning_rate": 1.1217845218127382e-05, + "loss": 0.1334, + "step": 57050 + }, + { + "epoch": 2.66, + "learning_rate": 1.1217061433072596e-05, + "loss": 0.1431, + "step": 57055 + }, + { + "epoch": 2.66, + "learning_rate": 1.1216277648017808e-05, + "loss": 0.1915, + "step": 57060 + }, + { + "epoch": 2.66, + "learning_rate": 1.1215493862963024e-05, + "loss": 0.287, + "step": 57065 + }, + { + "epoch": 2.66, + "learning_rate": 1.1214710077908236e-05, + "loss": 0.1064, + "step": 57070 + }, + { + "epoch": 2.66, + "learning_rate": 1.121392629285345e-05, + "loss": 0.0665, + "step": 57075 + }, + { + "epoch": 2.66, + "learning_rate": 1.1213142507798662e-05, + "loss": 0.0537, + "step": 57080 + }, + { + "epoch": 2.66, + "learning_rate": 1.1212358722743874e-05, + "loss": 0.0913, + "step": 57085 + }, + { + "epoch": 2.66, + "learning_rate": 1.121157493768909e-05, + "loss": 0.0652, + "step": 57090 + }, + { + "epoch": 2.66, + "learning_rate": 1.1210791152634302e-05, + "loss": 0.0595, + "step": 57095 + }, + { + "epoch": 2.66, + "learning_rate": 1.1210007367579516e-05, + "loss": 0.0666, + "step": 57100 + }, + { + "epoch": 2.66, + "learning_rate": 1.1209223582524728e-05, + "loss": 0.2029, + "step": 57105 + }, + { + "epoch": 2.66, + "learning_rate": 1.1208439797469944e-05, + "loss": 0.2374, + "step": 57110 + }, + { + "epoch": 2.67, + "learning_rate": 1.1207656012415156e-05, + "loss": 0.2624, + "step": 57115 + }, + { + "epoch": 2.67, + "learning_rate": 1.120687222736037e-05, + "loss": 0.0789, + "step": 57120 + }, + { + "epoch": 2.67, + "learning_rate": 1.1206088442305582e-05, + "loss": 0.0434, + "step": 57125 + }, + { + "epoch": 2.67, + "learning_rate": 1.1205304657250798e-05, + "loss": 0.0571, + "step": 57130 + }, + { + "epoch": 2.67, + "learning_rate": 1.120452087219601e-05, + "loss": 0.0856, + "step": 57135 + }, + { + "epoch": 2.67, + "learning_rate": 1.1203737087141224e-05, + "loss": 0.1147, + "step": 57140 + }, + { + "epoch": 2.67, + "learning_rate": 1.1202953302086438e-05, + "loss": 0.1438, + "step": 57145 + }, + { + "epoch": 2.67, + "learning_rate": 1.120216951703165e-05, + "loss": 0.1312, + "step": 57150 + }, + { + "epoch": 2.67, + "learning_rate": 1.1201385731976864e-05, + "loss": 0.1851, + "step": 57155 + }, + { + "epoch": 2.67, + "learning_rate": 1.1200601946922076e-05, + "loss": 0.1552, + "step": 57160 + }, + { + "epoch": 2.67, + "learning_rate": 1.1199818161867291e-05, + "loss": 0.2446, + "step": 57165 + }, + { + "epoch": 2.67, + "learning_rate": 1.1199034376812504e-05, + "loss": 0.0227, + "step": 57170 + }, + { + "epoch": 2.67, + "learning_rate": 1.1198250591757718e-05, + "loss": 0.071, + "step": 57175 + }, + { + "epoch": 2.67, + "learning_rate": 1.119746680670293e-05, + "loss": 0.0333, + "step": 57180 + }, + { + "epoch": 2.67, + "learning_rate": 1.1196683021648145e-05, + "loss": 0.0713, + "step": 57185 + }, + { + "epoch": 2.67, + "learning_rate": 1.1195899236593358e-05, + "loss": 0.0717, + "step": 57190 + }, + { + "epoch": 2.67, + "learning_rate": 1.1195115451538572e-05, + "loss": 0.0648, + "step": 57195 + }, + { + "epoch": 2.67, + "learning_rate": 1.1194331666483784e-05, + "loss": 0.1173, + "step": 57200 + }, + { + "epoch": 2.67, + "learning_rate": 1.1193547881429e-05, + "loss": 0.09, + "step": 57205 + }, + { + "epoch": 2.67, + "learning_rate": 1.1192764096374212e-05, + "loss": 0.2192, + "step": 57210 + }, + { + "epoch": 2.67, + "learning_rate": 1.1191980311319424e-05, + "loss": 0.3397, + "step": 57215 + }, + { + "epoch": 2.67, + "learning_rate": 1.1191196526264638e-05, + "loss": 0.0862, + "step": 57220 + }, + { + "epoch": 2.67, + "learning_rate": 1.119041274120985e-05, + "loss": 0.0396, + "step": 57225 + }, + { + "epoch": 2.67, + "learning_rate": 1.1189628956155065e-05, + "loss": 0.0291, + "step": 57230 + }, + { + "epoch": 2.67, + "learning_rate": 1.1188845171100278e-05, + "loss": 0.0333, + "step": 57235 + }, + { + "epoch": 2.67, + "learning_rate": 1.1188061386045492e-05, + "loss": 0.0601, + "step": 57240 + }, + { + "epoch": 2.67, + "learning_rate": 1.1187277600990705e-05, + "loss": 0.0811, + "step": 57245 + }, + { + "epoch": 2.67, + "learning_rate": 1.118649381593592e-05, + "loss": 0.0969, + "step": 57250 + }, + { + "epoch": 2.67, + "learning_rate": 1.1185710030881132e-05, + "loss": 0.229, + "step": 57255 + }, + { + "epoch": 2.67, + "learning_rate": 1.1184926245826346e-05, + "loss": 0.294, + "step": 57260 + }, + { + "epoch": 2.67, + "learning_rate": 1.118414246077156e-05, + "loss": 0.3216, + "step": 57265 + }, + { + "epoch": 2.67, + "learning_rate": 1.1183358675716773e-05, + "loss": 0.0457, + "step": 57270 + }, + { + "epoch": 2.67, + "learning_rate": 1.1182574890661986e-05, + "loss": 0.0488, + "step": 57275 + }, + { + "epoch": 2.67, + "learning_rate": 1.1181791105607198e-05, + "loss": 0.0694, + "step": 57280 + }, + { + "epoch": 2.67, + "learning_rate": 1.1181007320552413e-05, + "loss": 0.0691, + "step": 57285 + }, + { + "epoch": 2.67, + "learning_rate": 1.1180223535497626e-05, + "loss": 0.1437, + "step": 57290 + }, + { + "epoch": 2.67, + "learning_rate": 1.117943975044284e-05, + "loss": 0.1237, + "step": 57295 + }, + { + "epoch": 2.67, + "learning_rate": 1.1178655965388052e-05, + "loss": 0.1094, + "step": 57300 + }, + { + "epoch": 2.67, + "learning_rate": 1.1177872180333267e-05, + "loss": 0.3414, + "step": 57305 + }, + { + "epoch": 2.67, + "learning_rate": 1.117708839527848e-05, + "loss": 0.2766, + "step": 57310 + }, + { + "epoch": 2.67, + "learning_rate": 1.1176304610223693e-05, + "loss": 0.2673, + "step": 57315 + }, + { + "epoch": 2.67, + "learning_rate": 1.1175520825168906e-05, + "loss": 0.0421, + "step": 57320 + }, + { + "epoch": 2.67, + "learning_rate": 1.1174737040114121e-05, + "loss": 0.0769, + "step": 57325 + }, + { + "epoch": 2.68, + "learning_rate": 1.1173953255059333e-05, + "loss": 0.042, + "step": 57330 + }, + { + "epoch": 2.68, + "learning_rate": 1.1173169470004547e-05, + "loss": 0.0679, + "step": 57335 + }, + { + "epoch": 2.68, + "learning_rate": 1.117238568494976e-05, + "loss": 0.0895, + "step": 57340 + }, + { + "epoch": 2.68, + "learning_rate": 1.1171601899894973e-05, + "loss": 0.1201, + "step": 57345 + }, + { + "epoch": 2.68, + "learning_rate": 1.1170818114840187e-05, + "loss": 0.1044, + "step": 57350 + }, + { + "epoch": 2.68, + "learning_rate": 1.11700343297854e-05, + "loss": 0.0649, + "step": 57355 + }, + { + "epoch": 2.68, + "learning_rate": 1.1169250544730615e-05, + "loss": 0.224, + "step": 57360 + }, + { + "epoch": 2.68, + "learning_rate": 1.1168466759675827e-05, + "loss": 0.3896, + "step": 57365 + }, + { + "epoch": 2.68, + "learning_rate": 1.1167682974621041e-05, + "loss": 0.0307, + "step": 57370 + }, + { + "epoch": 2.68, + "learning_rate": 1.1166899189566253e-05, + "loss": 0.0542, + "step": 57375 + }, + { + "epoch": 2.68, + "learning_rate": 1.1166115404511469e-05, + "loss": 0.0674, + "step": 57380 + }, + { + "epoch": 2.68, + "learning_rate": 1.1165331619456681e-05, + "loss": 0.0919, + "step": 57385 + }, + { + "epoch": 2.68, + "learning_rate": 1.1164547834401895e-05, + "loss": 0.0887, + "step": 57390 + }, + { + "epoch": 2.68, + "learning_rate": 1.1163764049347107e-05, + "loss": 0.1729, + "step": 57395 + }, + { + "epoch": 2.68, + "learning_rate": 1.1162980264292323e-05, + "loss": 0.1892, + "step": 57400 + }, + { + "epoch": 2.68, + "learning_rate": 1.1162196479237535e-05, + "loss": 0.1451, + "step": 57405 + }, + { + "epoch": 2.68, + "learning_rate": 1.1161412694182747e-05, + "loss": 0.3484, + "step": 57410 + }, + { + "epoch": 2.68, + "learning_rate": 1.1160628909127961e-05, + "loss": 0.3337, + "step": 57415 + }, + { + "epoch": 2.68, + "learning_rate": 1.1159845124073174e-05, + "loss": 0.0925, + "step": 57420 + }, + { + "epoch": 2.68, + "learning_rate": 1.1159061339018389e-05, + "loss": 0.0863, + "step": 57425 + }, + { + "epoch": 2.68, + "learning_rate": 1.1158277553963601e-05, + "loss": 0.0389, + "step": 57430 + }, + { + "epoch": 2.68, + "learning_rate": 1.1157493768908815e-05, + "loss": 0.0606, + "step": 57435 + }, + { + "epoch": 2.68, + "learning_rate": 1.1156709983854027e-05, + "loss": 0.1619, + "step": 57440 + }, + { + "epoch": 2.68, + "learning_rate": 1.1155926198799243e-05, + "loss": 0.177, + "step": 57445 + }, + { + "epoch": 2.68, + "learning_rate": 1.1155142413744455e-05, + "loss": 0.1052, + "step": 57450 + }, + { + "epoch": 2.68, + "learning_rate": 1.115435862868967e-05, + "loss": 0.2065, + "step": 57455 + }, + { + "epoch": 2.68, + "learning_rate": 1.1153574843634883e-05, + "loss": 0.1876, + "step": 57460 + }, + { + "epoch": 2.68, + "learning_rate": 1.1152791058580097e-05, + "loss": 0.1923, + "step": 57465 + }, + { + "epoch": 2.68, + "learning_rate": 1.115200727352531e-05, + "loss": 0.0589, + "step": 57470 + }, + { + "epoch": 2.68, + "learning_rate": 1.1151223488470521e-05, + "loss": 0.0346, + "step": 57475 + }, + { + "epoch": 2.68, + "learning_rate": 1.1150439703415737e-05, + "loss": 0.0486, + "step": 57480 + }, + { + "epoch": 2.68, + "learning_rate": 1.114965591836095e-05, + "loss": 0.0673, + "step": 57485 + }, + { + "epoch": 2.68, + "learning_rate": 1.1148872133306163e-05, + "loss": 0.0864, + "step": 57490 + }, + { + "epoch": 2.68, + "learning_rate": 1.1148088348251375e-05, + "loss": 0.103, + "step": 57495 + }, + { + "epoch": 2.68, + "learning_rate": 1.1147304563196591e-05, + "loss": 0.2384, + "step": 57500 + }, + { + "epoch": 2.68, + "learning_rate": 1.1146520778141803e-05, + "loss": 0.151, + "step": 57505 + }, + { + "epoch": 2.68, + "learning_rate": 1.1145736993087017e-05, + "loss": 0.1685, + "step": 57510 + }, + { + "epoch": 2.68, + "learning_rate": 1.114495320803223e-05, + "loss": 0.2351, + "step": 57515 + }, + { + "epoch": 2.68, + "learning_rate": 1.1144169422977445e-05, + "loss": 0.0842, + "step": 57520 + }, + { + "epoch": 2.68, + "learning_rate": 1.1143385637922657e-05, + "loss": 0.0256, + "step": 57525 + }, + { + "epoch": 2.68, + "learning_rate": 1.1142601852867871e-05, + "loss": 0.0696, + "step": 57530 + }, + { + "epoch": 2.68, + "learning_rate": 1.1141818067813083e-05, + "loss": 0.0371, + "step": 57535 + }, + { + "epoch": 2.68, + "learning_rate": 1.1141034282758295e-05, + "loss": 0.0474, + "step": 57540 + }, + { + "epoch": 2.69, + "learning_rate": 1.1140250497703511e-05, + "loss": 0.0927, + "step": 57545 + }, + { + "epoch": 2.69, + "learning_rate": 1.1139466712648723e-05, + "loss": 0.0978, + "step": 57550 + }, + { + "epoch": 2.69, + "learning_rate": 1.1138682927593937e-05, + "loss": 0.223, + "step": 57555 + }, + { + "epoch": 2.69, + "learning_rate": 1.1137899142539151e-05, + "loss": 0.2131, + "step": 57560 + }, + { + "epoch": 2.69, + "learning_rate": 1.1137115357484365e-05, + "loss": 0.2394, + "step": 57565 + }, + { + "epoch": 2.69, + "learning_rate": 1.1136331572429577e-05, + "loss": 0.0255, + "step": 57570 + }, + { + "epoch": 2.69, + "learning_rate": 1.1135547787374791e-05, + "loss": 0.0629, + "step": 57575 + }, + { + "epoch": 2.69, + "learning_rate": 1.1134764002320005e-05, + "loss": 0.0524, + "step": 57580 + }, + { + "epoch": 2.69, + "learning_rate": 1.1133980217265219e-05, + "loss": 0.0692, + "step": 57585 + }, + { + "epoch": 2.69, + "learning_rate": 1.1133196432210431e-05, + "loss": 0.0791, + "step": 57590 + }, + { + "epoch": 2.69, + "learning_rate": 1.1132412647155647e-05, + "loss": 0.0576, + "step": 57595 + }, + { + "epoch": 2.69, + "learning_rate": 1.1131628862100859e-05, + "loss": 0.1245, + "step": 57600 + }, + { + "epoch": 2.69, + "learning_rate": 1.1130845077046071e-05, + "loss": 0.1853, + "step": 57605 + }, + { + "epoch": 2.69, + "learning_rate": 1.1130061291991285e-05, + "loss": 0.4984, + "step": 57610 + }, + { + "epoch": 2.69, + "learning_rate": 1.1129277506936497e-05, + "loss": 0.306, + "step": 57615 + }, + { + "epoch": 2.69, + "learning_rate": 1.1128493721881713e-05, + "loss": 0.0546, + "step": 57620 + }, + { + "epoch": 2.69, + "learning_rate": 1.1127709936826925e-05, + "loss": 0.0659, + "step": 57625 + }, + { + "epoch": 2.69, + "learning_rate": 1.1126926151772139e-05, + "loss": 0.043, + "step": 57630 + }, + { + "epoch": 2.69, + "learning_rate": 1.1126142366717351e-05, + "loss": 0.0508, + "step": 57635 + }, + { + "epoch": 2.69, + "learning_rate": 1.1125358581662567e-05, + "loss": 0.0316, + "step": 57640 + }, + { + "epoch": 2.69, + "learning_rate": 1.1124574796607779e-05, + "loss": 0.0663, + "step": 57645 + }, + { + "epoch": 2.69, + "learning_rate": 1.1123791011552993e-05, + "loss": 0.098, + "step": 57650 + }, + { + "epoch": 2.69, + "learning_rate": 1.1123007226498205e-05, + "loss": 0.1989, + "step": 57655 + }, + { + "epoch": 2.69, + "learning_rate": 1.112222344144342e-05, + "loss": 0.25, + "step": 57660 + }, + { + "epoch": 2.69, + "learning_rate": 1.1121439656388633e-05, + "loss": 0.3534, + "step": 57665 + }, + { + "epoch": 2.69, + "learning_rate": 1.1120655871333845e-05, + "loss": 0.018, + "step": 57670 + }, + { + "epoch": 2.69, + "learning_rate": 1.111987208627906e-05, + "loss": 0.0352, + "step": 57675 + }, + { + "epoch": 2.69, + "learning_rate": 1.1119088301224273e-05, + "loss": 0.0879, + "step": 57680 + }, + { + "epoch": 2.69, + "learning_rate": 1.1118304516169487e-05, + "loss": 0.1559, + "step": 57685 + }, + { + "epoch": 2.69, + "learning_rate": 1.1117520731114699e-05, + "loss": 0.1489, + "step": 57690 + }, + { + "epoch": 2.69, + "learning_rate": 1.1116736946059915e-05, + "loss": 0.0735, + "step": 57695 + }, + { + "epoch": 2.69, + "learning_rate": 1.1115953161005127e-05, + "loss": 0.1344, + "step": 57700 + }, + { + "epoch": 2.69, + "learning_rate": 1.111516937595034e-05, + "loss": 0.1133, + "step": 57705 + }, + { + "epoch": 2.69, + "learning_rate": 1.1114385590895553e-05, + "loss": 0.1911, + "step": 57710 + }, + { + "epoch": 2.69, + "learning_rate": 1.1113601805840769e-05, + "loss": 0.3633, + "step": 57715 + }, + { + "epoch": 2.69, + "learning_rate": 1.111281802078598e-05, + "loss": 0.036, + "step": 57720 + }, + { + "epoch": 2.69, + "learning_rate": 1.1112034235731195e-05, + "loss": 0.0589, + "step": 57725 + }, + { + "epoch": 2.69, + "learning_rate": 1.1111250450676407e-05, + "loss": 0.0351, + "step": 57730 + }, + { + "epoch": 2.69, + "learning_rate": 1.1110466665621619e-05, + "loss": 0.0914, + "step": 57735 + }, + { + "epoch": 2.69, + "learning_rate": 1.1109682880566835e-05, + "loss": 0.1147, + "step": 57740 + }, + { + "epoch": 2.69, + "learning_rate": 1.1108899095512047e-05, + "loss": 0.0828, + "step": 57745 + }, + { + "epoch": 2.69, + "learning_rate": 1.110811531045726e-05, + "loss": 0.1332, + "step": 57750 + }, + { + "epoch": 2.69, + "learning_rate": 1.1107331525402473e-05, + "loss": 0.2566, + "step": 57755 + }, + { + "epoch": 2.7, + "learning_rate": 1.1106547740347689e-05, + "loss": 0.2272, + "step": 57760 + }, + { + "epoch": 2.7, + "learning_rate": 1.1105763955292901e-05, + "loss": 0.2749, + "step": 57765 + }, + { + "epoch": 2.7, + "learning_rate": 1.1104980170238115e-05, + "loss": 0.0733, + "step": 57770 + }, + { + "epoch": 2.7, + "learning_rate": 1.1104196385183329e-05, + "loss": 0.0124, + "step": 57775 + }, + { + "epoch": 2.7, + "learning_rate": 1.1103412600128543e-05, + "loss": 0.0851, + "step": 57780 + }, + { + "epoch": 2.7, + "learning_rate": 1.1102628815073755e-05, + "loss": 0.0767, + "step": 57785 + }, + { + "epoch": 2.7, + "learning_rate": 1.1101845030018969e-05, + "loss": 0.0738, + "step": 57790 + }, + { + "epoch": 2.7, + "learning_rate": 1.1101061244964183e-05, + "loss": 0.0922, + "step": 57795 + }, + { + "epoch": 2.7, + "learning_rate": 1.1100277459909395e-05, + "loss": 0.121, + "step": 57800 + }, + { + "epoch": 2.7, + "learning_rate": 1.1099493674854609e-05, + "loss": 0.2056, + "step": 57805 + }, + { + "epoch": 2.7, + "learning_rate": 1.1098709889799821e-05, + "loss": 0.1976, + "step": 57810 + }, + { + "epoch": 2.7, + "learning_rate": 1.1097926104745037e-05, + "loss": 0.3544, + "step": 57815 + }, + { + "epoch": 2.7, + "learning_rate": 1.1097142319690249e-05, + "loss": 0.0921, + "step": 57820 + }, + { + "epoch": 2.7, + "learning_rate": 1.1096358534635463e-05, + "loss": 0.0193, + "step": 57825 + }, + { + "epoch": 2.7, + "learning_rate": 1.1095574749580675e-05, + "loss": 0.0806, + "step": 57830 + }, + { + "epoch": 2.7, + "learning_rate": 1.109479096452589e-05, + "loss": 0.0968, + "step": 57835 + }, + { + "epoch": 2.7, + "learning_rate": 1.1094007179471103e-05, + "loss": 0.0627, + "step": 57840 + }, + { + "epoch": 2.7, + "learning_rate": 1.1093223394416317e-05, + "loss": 0.1318, + "step": 57845 + }, + { + "epoch": 2.7, + "learning_rate": 1.1092439609361529e-05, + "loss": 0.1039, + "step": 57850 + }, + { + "epoch": 2.7, + "learning_rate": 1.1091655824306744e-05, + "loss": 0.1956, + "step": 57855 + }, + { + "epoch": 2.7, + "learning_rate": 1.1090872039251957e-05, + "loss": 0.1967, + "step": 57860 + }, + { + "epoch": 2.7, + "learning_rate": 1.1090088254197169e-05, + "loss": 0.4312, + "step": 57865 + }, + { + "epoch": 2.7, + "learning_rate": 1.1089304469142383e-05, + "loss": 0.054, + "step": 57870 + }, + { + "epoch": 2.7, + "learning_rate": 1.1088520684087597e-05, + "loss": 0.0584, + "step": 57875 + }, + { + "epoch": 2.7, + "learning_rate": 1.108773689903281e-05, + "loss": 0.0745, + "step": 57880 + }, + { + "epoch": 2.7, + "learning_rate": 1.1086953113978023e-05, + "loss": 0.0903, + "step": 57885 + }, + { + "epoch": 2.7, + "learning_rate": 1.1086169328923237e-05, + "loss": 0.0683, + "step": 57890 + }, + { + "epoch": 2.7, + "learning_rate": 1.108538554386845e-05, + "loss": 0.1025, + "step": 57895 + }, + { + "epoch": 2.7, + "learning_rate": 1.1084601758813664e-05, + "loss": 0.129, + "step": 57900 + }, + { + "epoch": 2.7, + "learning_rate": 1.1083817973758877e-05, + "loss": 0.0918, + "step": 57905 + }, + { + "epoch": 2.7, + "learning_rate": 1.1083034188704092e-05, + "loss": 0.1841, + "step": 57910 + }, + { + "epoch": 2.7, + "learning_rate": 1.1082250403649304e-05, + "loss": 0.237, + "step": 57915 + }, + { + "epoch": 2.7, + "learning_rate": 1.1081466618594518e-05, + "loss": 0.1046, + "step": 57920 + }, + { + "epoch": 2.7, + "learning_rate": 1.108068283353973e-05, + "loss": 0.0991, + "step": 57925 + }, + { + "epoch": 2.7, + "learning_rate": 1.1079899048484943e-05, + "loss": 0.0322, + "step": 57930 + }, + { + "epoch": 2.7, + "learning_rate": 1.1079115263430158e-05, + "loss": 0.0464, + "step": 57935 + }, + { + "epoch": 2.7, + "learning_rate": 1.107833147837537e-05, + "loss": 0.0611, + "step": 57940 + }, + { + "epoch": 2.7, + "learning_rate": 1.1077547693320585e-05, + "loss": 0.1024, + "step": 57945 + }, + { + "epoch": 2.7, + "learning_rate": 1.1076763908265797e-05, + "loss": 0.2626, + "step": 57950 + }, + { + "epoch": 2.7, + "learning_rate": 1.1075980123211012e-05, + "loss": 0.1249, + "step": 57955 + }, + { + "epoch": 2.7, + "learning_rate": 1.1075196338156225e-05, + "loss": 0.2525, + "step": 57960 + }, + { + "epoch": 2.7, + "learning_rate": 1.1074412553101438e-05, + "loss": 0.2988, + "step": 57965 + }, + { + "epoch": 2.7, + "learning_rate": 1.107362876804665e-05, + "loss": 0.0208, + "step": 57970 + }, + { + "epoch": 2.71, + "learning_rate": 1.1072844982991866e-05, + "loss": 0.0614, + "step": 57975 + }, + { + "epoch": 2.71, + "learning_rate": 1.1072061197937078e-05, + "loss": 0.056, + "step": 57980 + }, + { + "epoch": 2.71, + "learning_rate": 1.1071277412882292e-05, + "loss": 0.0867, + "step": 57985 + }, + { + "epoch": 2.71, + "learning_rate": 1.1070493627827506e-05, + "loss": 0.0771, + "step": 57990 + }, + { + "epoch": 2.71, + "learning_rate": 1.1069709842772719e-05, + "loss": 0.1148, + "step": 57995 + }, + { + "epoch": 2.71, + "learning_rate": 1.1068926057717932e-05, + "loss": 0.1713, + "step": 58000 + }, + { + "epoch": 2.71, + "learning_rate": 1.1068142272663145e-05, + "loss": 0.1785, + "step": 58005 + }, + { + "epoch": 2.71, + "learning_rate": 1.106735848760836e-05, + "loss": 0.1555, + "step": 58010 + }, + { + "epoch": 2.71, + "learning_rate": 1.1066574702553572e-05, + "loss": 0.3208, + "step": 58015 + }, + { + "epoch": 2.71, + "learning_rate": 1.1065790917498786e-05, + "loss": 0.0433, + "step": 58020 + }, + { + "epoch": 2.71, + "learning_rate": 1.1065007132443999e-05, + "loss": 0.0256, + "step": 58025 + }, + { + "epoch": 2.71, + "learning_rate": 1.1064223347389214e-05, + "loss": 0.09, + "step": 58030 + }, + { + "epoch": 2.71, + "learning_rate": 1.1063439562334426e-05, + "loss": 0.0716, + "step": 58035 + }, + { + "epoch": 2.71, + "learning_rate": 1.106265577727964e-05, + "loss": 0.0298, + "step": 58040 + }, + { + "epoch": 2.71, + "learning_rate": 1.1061871992224852e-05, + "loss": 0.0745, + "step": 58045 + }, + { + "epoch": 2.71, + "learning_rate": 1.1061088207170068e-05, + "loss": 0.1625, + "step": 58050 + }, + { + "epoch": 2.71, + "learning_rate": 1.106030442211528e-05, + "loss": 0.1818, + "step": 58055 + }, + { + "epoch": 2.71, + "learning_rate": 1.1059520637060493e-05, + "loss": 0.2606, + "step": 58060 + }, + { + "epoch": 2.71, + "learning_rate": 1.1058736852005706e-05, + "loss": 0.2315, + "step": 58065 + }, + { + "epoch": 2.71, + "learning_rate": 1.1057953066950919e-05, + "loss": 0.0346, + "step": 58070 + }, + { + "epoch": 2.71, + "learning_rate": 1.1057169281896134e-05, + "loss": 0.0731, + "step": 58075 + }, + { + "epoch": 2.71, + "learning_rate": 1.1056385496841346e-05, + "loss": 0.0567, + "step": 58080 + }, + { + "epoch": 2.71, + "learning_rate": 1.105560171178656e-05, + "loss": 0.0793, + "step": 58085 + }, + { + "epoch": 2.71, + "learning_rate": 1.1054817926731774e-05, + "loss": 0.0618, + "step": 58090 + }, + { + "epoch": 2.71, + "learning_rate": 1.1054034141676988e-05, + "loss": 0.1303, + "step": 58095 + }, + { + "epoch": 2.71, + "learning_rate": 1.10532503566222e-05, + "loss": 0.1785, + "step": 58100 + }, + { + "epoch": 2.71, + "learning_rate": 1.1052466571567414e-05, + "loss": 0.197, + "step": 58105 + }, + { + "epoch": 2.71, + "learning_rate": 1.1051682786512628e-05, + "loss": 0.3032, + "step": 58110 + }, + { + "epoch": 2.71, + "learning_rate": 1.1050899001457842e-05, + "loss": 0.3066, + "step": 58115 + }, + { + "epoch": 2.71, + "learning_rate": 1.1050115216403054e-05, + "loss": 0.0911, + "step": 58120 + }, + { + "epoch": 2.71, + "learning_rate": 1.1049331431348267e-05, + "loss": 0.0323, + "step": 58125 + }, + { + "epoch": 2.71, + "learning_rate": 1.1048547646293482e-05, + "loss": 0.0549, + "step": 58130 + }, + { + "epoch": 2.71, + "learning_rate": 1.1047763861238694e-05, + "loss": 0.1821, + "step": 58135 + }, + { + "epoch": 2.71, + "learning_rate": 1.1046980076183908e-05, + "loss": 0.0566, + "step": 58140 + }, + { + "epoch": 2.71, + "learning_rate": 1.104619629112912e-05, + "loss": 0.0821, + "step": 58145 + }, + { + "epoch": 2.71, + "learning_rate": 1.1045412506074336e-05, + "loss": 0.1769, + "step": 58150 + }, + { + "epoch": 2.71, + "learning_rate": 1.1044628721019548e-05, + "loss": 0.1196, + "step": 58155 + }, + { + "epoch": 2.71, + "learning_rate": 1.1043844935964762e-05, + "loss": 0.1464, + "step": 58160 + }, + { + "epoch": 2.71, + "learning_rate": 1.1043061150909974e-05, + "loss": 0.2286, + "step": 58165 + }, + { + "epoch": 2.71, + "learning_rate": 1.104227736585519e-05, + "loss": 0.0928, + "step": 58170 + }, + { + "epoch": 2.71, + "learning_rate": 1.1041493580800402e-05, + "loss": 0.0066, + "step": 58175 + }, + { + "epoch": 2.71, + "learning_rate": 1.1040709795745616e-05, + "loss": 0.0367, + "step": 58180 + }, + { + "epoch": 2.71, + "learning_rate": 1.1039926010690828e-05, + "loss": 0.0453, + "step": 58185 + }, + { + "epoch": 2.72, + "learning_rate": 1.1039142225636042e-05, + "loss": 0.0471, + "step": 58190 + }, + { + "epoch": 2.72, + "learning_rate": 1.1038358440581256e-05, + "loss": 0.1814, + "step": 58195 + }, + { + "epoch": 2.72, + "learning_rate": 1.1037574655526468e-05, + "loss": 0.1526, + "step": 58200 + }, + { + "epoch": 2.72, + "learning_rate": 1.1036790870471684e-05, + "loss": 0.2725, + "step": 58205 + }, + { + "epoch": 2.72, + "learning_rate": 1.1036007085416896e-05, + "loss": 0.1844, + "step": 58210 + }, + { + "epoch": 2.72, + "learning_rate": 1.103522330036211e-05, + "loss": 0.1978, + "step": 58215 + }, + { + "epoch": 2.72, + "learning_rate": 1.1034439515307322e-05, + "loss": 0.1005, + "step": 58220 + }, + { + "epoch": 2.72, + "learning_rate": 1.1033655730252538e-05, + "loss": 0.0466, + "step": 58225 + }, + { + "epoch": 2.72, + "learning_rate": 1.103287194519775e-05, + "loss": 0.0683, + "step": 58230 + }, + { + "epoch": 2.72, + "learning_rate": 1.1032088160142964e-05, + "loss": 0.0518, + "step": 58235 + }, + { + "epoch": 2.72, + "learning_rate": 1.1031304375088176e-05, + "loss": 0.0813, + "step": 58240 + }, + { + "epoch": 2.72, + "learning_rate": 1.1030520590033392e-05, + "loss": 0.0502, + "step": 58245 + }, + { + "epoch": 2.72, + "learning_rate": 1.1029736804978604e-05, + "loss": 0.1395, + "step": 58250 + }, + { + "epoch": 2.72, + "learning_rate": 1.1028953019923816e-05, + "loss": 0.0872, + "step": 58255 + }, + { + "epoch": 2.72, + "learning_rate": 1.102816923486903e-05, + "loss": 0.3588, + "step": 58260 + }, + { + "epoch": 2.72, + "learning_rate": 1.1027385449814242e-05, + "loss": 0.2024, + "step": 58265 + }, + { + "epoch": 2.72, + "learning_rate": 1.1026601664759458e-05, + "loss": 0.1045, + "step": 58270 + }, + { + "epoch": 2.72, + "learning_rate": 1.102581787970467e-05, + "loss": 0.0733, + "step": 58275 + }, + { + "epoch": 2.72, + "learning_rate": 1.1025034094649884e-05, + "loss": 0.0158, + "step": 58280 + }, + { + "epoch": 2.72, + "learning_rate": 1.1024250309595096e-05, + "loss": 0.0791, + "step": 58285 + }, + { + "epoch": 2.72, + "learning_rate": 1.1023466524540312e-05, + "loss": 0.0812, + "step": 58290 + }, + { + "epoch": 2.72, + "learning_rate": 1.1022682739485524e-05, + "loss": 0.0696, + "step": 58295 + }, + { + "epoch": 2.72, + "learning_rate": 1.1021898954430738e-05, + "loss": 0.1509, + "step": 58300 + }, + { + "epoch": 2.72, + "learning_rate": 1.1021115169375952e-05, + "loss": 0.1556, + "step": 58305 + }, + { + "epoch": 2.72, + "learning_rate": 1.1020331384321166e-05, + "loss": 0.2344, + "step": 58310 + }, + { + "epoch": 2.72, + "learning_rate": 1.1019547599266378e-05, + "loss": 0.3865, + "step": 58315 + }, + { + "epoch": 2.72, + "learning_rate": 1.101876381421159e-05, + "loss": 0.0351, + "step": 58320 + }, + { + "epoch": 2.72, + "learning_rate": 1.1017980029156806e-05, + "loss": 0.0247, + "step": 58325 + }, + { + "epoch": 2.72, + "learning_rate": 1.1017196244102018e-05, + "loss": 0.034, + "step": 58330 + }, + { + "epoch": 2.72, + "learning_rate": 1.1016412459047232e-05, + "loss": 0.0443, + "step": 58335 + }, + { + "epoch": 2.72, + "learning_rate": 1.1015628673992444e-05, + "loss": 0.0533, + "step": 58340 + }, + { + "epoch": 2.72, + "learning_rate": 1.101484488893766e-05, + "loss": 0.113, + "step": 58345 + }, + { + "epoch": 2.72, + "learning_rate": 1.1014061103882872e-05, + "loss": 0.1294, + "step": 58350 + }, + { + "epoch": 2.72, + "learning_rate": 1.1013277318828086e-05, + "loss": 0.2372, + "step": 58355 + }, + { + "epoch": 2.72, + "learning_rate": 1.1012493533773298e-05, + "loss": 0.1815, + "step": 58360 + }, + { + "epoch": 2.72, + "learning_rate": 1.1011709748718514e-05, + "loss": 0.4188, + "step": 58365 + }, + { + "epoch": 2.72, + "learning_rate": 1.1010925963663726e-05, + "loss": 0.0725, + "step": 58370 + }, + { + "epoch": 2.72, + "learning_rate": 1.101014217860894e-05, + "loss": 0.0558, + "step": 58375 + }, + { + "epoch": 2.72, + "learning_rate": 1.1009358393554152e-05, + "loss": 0.0533, + "step": 58380 + }, + { + "epoch": 2.72, + "learning_rate": 1.1008574608499364e-05, + "loss": 0.0649, + "step": 58385 + }, + { + "epoch": 2.72, + "learning_rate": 1.100779082344458e-05, + "loss": 0.0457, + "step": 58390 + }, + { + "epoch": 2.72, + "learning_rate": 1.1007007038389792e-05, + "loss": 0.063, + "step": 58395 + }, + { + "epoch": 2.73, + "learning_rate": 1.1006223253335006e-05, + "loss": 0.2506, + "step": 58400 + }, + { + "epoch": 2.73, + "learning_rate": 1.100543946828022e-05, + "loss": 0.1326, + "step": 58405 + }, + { + "epoch": 2.73, + "learning_rate": 1.1004655683225434e-05, + "loss": 0.2099, + "step": 58410 + }, + { + "epoch": 2.73, + "learning_rate": 1.1003871898170646e-05, + "loss": 0.2704, + "step": 58415 + }, + { + "epoch": 2.73, + "learning_rate": 1.100308811311586e-05, + "loss": 0.0142, + "step": 58420 + }, + { + "epoch": 2.73, + "learning_rate": 1.1002304328061074e-05, + "loss": 0.0844, + "step": 58425 + }, + { + "epoch": 2.73, + "learning_rate": 1.1001520543006288e-05, + "loss": 0.0416, + "step": 58430 + }, + { + "epoch": 2.73, + "learning_rate": 1.10007367579515e-05, + "loss": 0.0186, + "step": 58435 + }, + { + "epoch": 2.73, + "learning_rate": 1.0999952972896715e-05, + "loss": 0.0636, + "step": 58440 + }, + { + "epoch": 2.73, + "learning_rate": 1.0999169187841928e-05, + "loss": 0.1073, + "step": 58445 + }, + { + "epoch": 2.73, + "learning_rate": 1.099838540278714e-05, + "loss": 0.1895, + "step": 58450 + }, + { + "epoch": 2.73, + "learning_rate": 1.0997601617732354e-05, + "loss": 0.157, + "step": 58455 + }, + { + "epoch": 2.73, + "learning_rate": 1.0996817832677566e-05, + "loss": 0.2624, + "step": 58460 + }, + { + "epoch": 2.73, + "learning_rate": 1.0996034047622782e-05, + "loss": 0.2066, + "step": 58465 + }, + { + "epoch": 2.73, + "learning_rate": 1.0995250262567994e-05, + "loss": 0.0556, + "step": 58470 + }, + { + "epoch": 2.73, + "learning_rate": 1.0994466477513208e-05, + "loss": 0.0346, + "step": 58475 + }, + { + "epoch": 2.73, + "learning_rate": 1.099368269245842e-05, + "loss": 0.0719, + "step": 58480 + }, + { + "epoch": 2.73, + "learning_rate": 1.0992898907403636e-05, + "loss": 0.064, + "step": 58485 + }, + { + "epoch": 2.73, + "learning_rate": 1.0992115122348848e-05, + "loss": 0.0563, + "step": 58490 + }, + { + "epoch": 2.73, + "learning_rate": 1.0991331337294062e-05, + "loss": 0.1556, + "step": 58495 + }, + { + "epoch": 2.73, + "learning_rate": 1.0990547552239274e-05, + "loss": 0.1548, + "step": 58500 + }, + { + "epoch": 2.73, + "learning_rate": 1.098976376718449e-05, + "loss": 0.1101, + "step": 58505 + }, + { + "epoch": 2.73, + "learning_rate": 1.0988979982129702e-05, + "loss": 0.3026, + "step": 58510 + }, + { + "epoch": 2.73, + "learning_rate": 1.0988196197074914e-05, + "loss": 0.3281, + "step": 58515 + }, + { + "epoch": 2.73, + "learning_rate": 1.098741241202013e-05, + "loss": 0.1004, + "step": 58520 + }, + { + "epoch": 2.73, + "learning_rate": 1.0986628626965342e-05, + "loss": 0.0659, + "step": 58525 + }, + { + "epoch": 2.73, + "learning_rate": 1.0985844841910556e-05, + "loss": 0.0501, + "step": 58530 + }, + { + "epoch": 2.73, + "learning_rate": 1.0985061056855768e-05, + "loss": 0.0952, + "step": 58535 + }, + { + "epoch": 2.73, + "learning_rate": 1.0984277271800983e-05, + "loss": 0.1421, + "step": 58540 + }, + { + "epoch": 2.73, + "learning_rate": 1.0983493486746196e-05, + "loss": 0.0672, + "step": 58545 + }, + { + "epoch": 2.73, + "learning_rate": 1.098270970169141e-05, + "loss": 0.1645, + "step": 58550 + }, + { + "epoch": 2.73, + "learning_rate": 1.0981925916636622e-05, + "loss": 0.2002, + "step": 58555 + }, + { + "epoch": 2.73, + "learning_rate": 1.0981142131581837e-05, + "loss": 0.2722, + "step": 58560 + }, + { + "epoch": 2.73, + "learning_rate": 1.098035834652705e-05, + "loss": 0.3582, + "step": 58565 + }, + { + "epoch": 2.73, + "learning_rate": 1.0979574561472263e-05, + "loss": 0.0799, + "step": 58570 + }, + { + "epoch": 2.73, + "learning_rate": 1.0978790776417476e-05, + "loss": 0.0616, + "step": 58575 + }, + { + "epoch": 2.73, + "learning_rate": 1.0978006991362688e-05, + "loss": 0.0691, + "step": 58580 + }, + { + "epoch": 2.73, + "learning_rate": 1.0977223206307903e-05, + "loss": 0.0437, + "step": 58585 + }, + { + "epoch": 2.73, + "learning_rate": 1.0976439421253116e-05, + "loss": 0.0749, + "step": 58590 + }, + { + "epoch": 2.73, + "learning_rate": 1.097565563619833e-05, + "loss": 0.0765, + "step": 58595 + }, + { + "epoch": 2.73, + "learning_rate": 1.0975028608154502e-05, + "loss": 0.0997, + "step": 58600 + }, + { + "epoch": 2.73, + "learning_rate": 1.0974244823099714e-05, + "loss": 0.1803, + "step": 58605 + }, + { + "epoch": 2.73, + "learning_rate": 1.0973461038044928e-05, + "loss": 0.0981, + "step": 58610 + }, + { + "epoch": 2.74, + "learning_rate": 1.097267725299014e-05, + "loss": 0.3792, + "step": 58615 + }, + { + "epoch": 2.74, + "learning_rate": 1.0971893467935356e-05, + "loss": 0.0301, + "step": 58620 + }, + { + "epoch": 2.74, + "learning_rate": 1.0971109682880568e-05, + "loss": 0.0599, + "step": 58625 + }, + { + "epoch": 2.74, + "learning_rate": 1.0970325897825782e-05, + "loss": 0.0407, + "step": 58630 + }, + { + "epoch": 2.74, + "learning_rate": 1.0969542112770994e-05, + "loss": 0.0282, + "step": 58635 + }, + { + "epoch": 2.74, + "learning_rate": 1.096875832771621e-05, + "loss": 0.1213, + "step": 58640 + }, + { + "epoch": 2.74, + "learning_rate": 1.0967974542661422e-05, + "loss": 0.0466, + "step": 58645 + }, + { + "epoch": 2.74, + "learning_rate": 1.0967190757606634e-05, + "loss": 0.1724, + "step": 58650 + }, + { + "epoch": 2.74, + "learning_rate": 1.0966406972551848e-05, + "loss": 0.127, + "step": 58655 + }, + { + "epoch": 2.74, + "learning_rate": 1.096562318749706e-05, + "loss": 0.1452, + "step": 58660 + }, + { + "epoch": 2.74, + "learning_rate": 1.0964839402442276e-05, + "loss": 0.1929, + "step": 58665 + }, + { + "epoch": 2.74, + "learning_rate": 1.0964055617387488e-05, + "loss": 0.0849, + "step": 58670 + }, + { + "epoch": 2.74, + "learning_rate": 1.0963271832332702e-05, + "loss": 0.0543, + "step": 58675 + }, + { + "epoch": 2.74, + "learning_rate": 1.0962488047277916e-05, + "loss": 0.0736, + "step": 58680 + }, + { + "epoch": 2.74, + "learning_rate": 1.096170426222313e-05, + "loss": 0.0518, + "step": 58685 + }, + { + "epoch": 2.74, + "learning_rate": 1.0960920477168342e-05, + "loss": 0.0852, + "step": 58690 + }, + { + "epoch": 2.74, + "learning_rate": 1.0960136692113556e-05, + "loss": 0.1975, + "step": 58695 + }, + { + "epoch": 2.74, + "learning_rate": 1.095935290705877e-05, + "loss": 0.1414, + "step": 58700 + }, + { + "epoch": 2.74, + "learning_rate": 1.0958569122003984e-05, + "loss": 0.2228, + "step": 58705 + }, + { + "epoch": 2.74, + "learning_rate": 1.0957785336949196e-05, + "loss": 0.1912, + "step": 58710 + }, + { + "epoch": 2.74, + "learning_rate": 1.0957001551894408e-05, + "loss": 0.2865, + "step": 58715 + }, + { + "epoch": 2.74, + "learning_rate": 1.0956217766839624e-05, + "loss": 0.0588, + "step": 58720 + }, + { + "epoch": 2.74, + "learning_rate": 1.0955433981784836e-05, + "loss": 0.0375, + "step": 58725 + }, + { + "epoch": 2.74, + "learning_rate": 1.095465019673005e-05, + "loss": 0.1023, + "step": 58730 + }, + { + "epoch": 2.74, + "learning_rate": 1.0953866411675262e-05, + "loss": 0.0744, + "step": 58735 + }, + { + "epoch": 2.74, + "learning_rate": 1.0953082626620478e-05, + "loss": 0.114, + "step": 58740 + }, + { + "epoch": 2.74, + "learning_rate": 1.095229884156569e-05, + "loss": 0.1044, + "step": 58745 + }, + { + "epoch": 2.74, + "learning_rate": 1.0951515056510904e-05, + "loss": 0.1161, + "step": 58750 + }, + { + "epoch": 2.74, + "learning_rate": 1.0950731271456116e-05, + "loss": 0.1975, + "step": 58755 + }, + { + "epoch": 2.74, + "learning_rate": 1.0949947486401331e-05, + "loss": 0.2124, + "step": 58760 + }, + { + "epoch": 2.74, + "learning_rate": 1.0949163701346544e-05, + "loss": 0.4417, + "step": 58765 + }, + { + "epoch": 2.74, + "learning_rate": 1.0948379916291758e-05, + "loss": 0.0748, + "step": 58770 + }, + { + "epoch": 2.74, + "learning_rate": 1.094759613123697e-05, + "loss": 0.0129, + "step": 58775 + }, + { + "epoch": 2.74, + "learning_rate": 1.0946812346182184e-05, + "loss": 0.0311, + "step": 58780 + }, + { + "epoch": 2.74, + "learning_rate": 1.0946028561127398e-05, + "loss": 0.1198, + "step": 58785 + }, + { + "epoch": 2.74, + "learning_rate": 1.094524477607261e-05, + "loss": 0.0663, + "step": 58790 + }, + { + "epoch": 2.74, + "learning_rate": 1.0944460991017824e-05, + "loss": 0.0381, + "step": 58795 + }, + { + "epoch": 2.74, + "learning_rate": 1.0943677205963038e-05, + "loss": 0.1238, + "step": 58800 + }, + { + "epoch": 2.74, + "learning_rate": 1.0942893420908252e-05, + "loss": 0.213, + "step": 58805 + }, + { + "epoch": 2.74, + "learning_rate": 1.0942109635853464e-05, + "loss": 0.1942, + "step": 58810 + }, + { + "epoch": 2.74, + "learning_rate": 1.094132585079868e-05, + "loss": 0.349, + "step": 58815 + }, + { + "epoch": 2.74, + "learning_rate": 1.0940542065743892e-05, + "loss": 0.0369, + "step": 58820 + }, + { + "epoch": 2.74, + "learning_rate": 1.0939758280689105e-05, + "loss": 0.0544, + "step": 58825 + }, + { + "epoch": 2.75, + "learning_rate": 1.0938974495634318e-05, + "loss": 0.0355, + "step": 58830 + }, + { + "epoch": 2.75, + "learning_rate": 1.0938190710579533e-05, + "loss": 0.0572, + "step": 58835 + }, + { + "epoch": 2.75, + "learning_rate": 1.0937406925524746e-05, + "loss": 0.1182, + "step": 58840 + }, + { + "epoch": 2.75, + "learning_rate": 1.0936623140469958e-05, + "loss": 0.1305, + "step": 58845 + }, + { + "epoch": 2.75, + "learning_rate": 1.0935839355415172e-05, + "loss": 0.1772, + "step": 58850 + }, + { + "epoch": 2.75, + "learning_rate": 1.0935055570360384e-05, + "loss": 0.1283, + "step": 58855 + }, + { + "epoch": 2.75, + "learning_rate": 1.09342717853056e-05, + "loss": 0.1314, + "step": 58860 + }, + { + "epoch": 2.75, + "learning_rate": 1.0933488000250812e-05, + "loss": 0.2568, + "step": 58865 + }, + { + "epoch": 2.75, + "learning_rate": 1.0932704215196026e-05, + "loss": 0.0676, + "step": 58870 + }, + { + "epoch": 2.75, + "learning_rate": 1.0931920430141238e-05, + "loss": 0.0313, + "step": 58875 + }, + { + "epoch": 2.75, + "learning_rate": 1.0931136645086453e-05, + "loss": 0.0603, + "step": 58880 + }, + { + "epoch": 2.75, + "learning_rate": 1.0930352860031666e-05, + "loss": 0.0634, + "step": 58885 + }, + { + "epoch": 2.75, + "learning_rate": 1.092956907497688e-05, + "loss": 0.0677, + "step": 58890 + }, + { + "epoch": 2.75, + "learning_rate": 1.0928785289922092e-05, + "loss": 0.0863, + "step": 58895 + }, + { + "epoch": 2.75, + "learning_rate": 1.0928001504867307e-05, + "loss": 0.063, + "step": 58900 + }, + { + "epoch": 2.75, + "learning_rate": 1.092721771981252e-05, + "loss": 0.17, + "step": 58905 + }, + { + "epoch": 2.75, + "learning_rate": 1.0926433934757732e-05, + "loss": 0.1749, + "step": 58910 + }, + { + "epoch": 2.75, + "learning_rate": 1.0925650149702947e-05, + "loss": 0.2382, + "step": 58915 + }, + { + "epoch": 2.75, + "learning_rate": 1.092486636464816e-05, + "loss": 0.0386, + "step": 58920 + }, + { + "epoch": 2.75, + "learning_rate": 1.0924082579593373e-05, + "loss": 0.0822, + "step": 58925 + }, + { + "epoch": 2.75, + "learning_rate": 1.0923298794538586e-05, + "loss": 0.0393, + "step": 58930 + }, + { + "epoch": 2.75, + "learning_rate": 1.0922515009483801e-05, + "loss": 0.0475, + "step": 58935 + }, + { + "epoch": 2.75, + "learning_rate": 1.0921731224429013e-05, + "loss": 0.0143, + "step": 58940 + }, + { + "epoch": 2.75, + "learning_rate": 1.0920947439374227e-05, + "loss": 0.1933, + "step": 58945 + }, + { + "epoch": 2.75, + "learning_rate": 1.092016365431944e-05, + "loss": 0.1644, + "step": 58950 + }, + { + "epoch": 2.75, + "learning_rate": 1.0919379869264655e-05, + "loss": 0.1502, + "step": 58955 + }, + { + "epoch": 2.75, + "learning_rate": 1.0918596084209867e-05, + "loss": 0.1974, + "step": 58960 + }, + { + "epoch": 2.75, + "learning_rate": 1.0917812299155081e-05, + "loss": 0.2703, + "step": 58965 + }, + { + "epoch": 2.75, + "learning_rate": 1.0917028514100293e-05, + "loss": 0.0824, + "step": 58970 + }, + { + "epoch": 2.75, + "learning_rate": 1.0916244729045506e-05, + "loss": 0.0104, + "step": 58975 + }, + { + "epoch": 2.75, + "learning_rate": 1.0915460943990721e-05, + "loss": 0.0975, + "step": 58980 + }, + { + "epoch": 2.75, + "learning_rate": 1.0914677158935934e-05, + "loss": 0.0575, + "step": 58985 + }, + { + "epoch": 2.75, + "learning_rate": 1.0913893373881147e-05, + "loss": 0.059, + "step": 58990 + }, + { + "epoch": 2.75, + "learning_rate": 1.0913109588826361e-05, + "loss": 0.1536, + "step": 58995 + }, + { + "epoch": 2.75, + "learning_rate": 1.0912325803771575e-05, + "loss": 0.1253, + "step": 59000 + }, + { + "epoch": 2.75, + "learning_rate": 1.0911542018716787e-05, + "loss": 0.198, + "step": 59005 + }, + { + "epoch": 2.75, + "learning_rate": 1.0910758233662001e-05, + "loss": 0.2212, + "step": 59010 + }, + { + "epoch": 2.75, + "learning_rate": 1.0909974448607215e-05, + "loss": 0.3376, + "step": 59015 + }, + { + "epoch": 2.75, + "learning_rate": 1.090919066355243e-05, + "loss": 0.0407, + "step": 59020 + }, + { + "epoch": 2.75, + "learning_rate": 1.0908406878497641e-05, + "loss": 0.0796, + "step": 59025 + }, + { + "epoch": 2.75, + "learning_rate": 1.0907623093442857e-05, + "loss": 0.0662, + "step": 59030 + }, + { + "epoch": 2.75, + "learning_rate": 1.090683930838807e-05, + "loss": 0.0887, + "step": 59035 + }, + { + "epoch": 2.75, + "learning_rate": 1.0906055523333281e-05, + "loss": 0.0557, + "step": 59040 + }, + { + "epoch": 2.76, + "learning_rate": 1.0905271738278495e-05, + "loss": 0.1039, + "step": 59045 + }, + { + "epoch": 2.76, + "learning_rate": 1.0904487953223708e-05, + "loss": 0.1408, + "step": 59050 + }, + { + "epoch": 2.76, + "learning_rate": 1.0903704168168923e-05, + "loss": 0.1745, + "step": 59055 + }, + { + "epoch": 2.76, + "learning_rate": 1.0902920383114135e-05, + "loss": 0.2041, + "step": 59060 + }, + { + "epoch": 2.76, + "learning_rate": 1.090213659805935e-05, + "loss": 0.3081, + "step": 59065 + }, + { + "epoch": 2.76, + "learning_rate": 1.0901352813004561e-05, + "loss": 0.0728, + "step": 59070 + }, + { + "epoch": 2.76, + "learning_rate": 1.0900569027949777e-05, + "loss": 0.0263, + "step": 59075 + }, + { + "epoch": 2.76, + "learning_rate": 1.089978524289499e-05, + "loss": 0.0564, + "step": 59080 + }, + { + "epoch": 2.76, + "learning_rate": 1.0899001457840203e-05, + "loss": 0.0237, + "step": 59085 + }, + { + "epoch": 2.76, + "learning_rate": 1.0898217672785415e-05, + "loss": 0.1018, + "step": 59090 + }, + { + "epoch": 2.76, + "learning_rate": 1.0897433887730631e-05, + "loss": 0.1151, + "step": 59095 + }, + { + "epoch": 2.76, + "learning_rate": 1.0896650102675843e-05, + "loss": 0.147, + "step": 59100 + }, + { + "epoch": 2.76, + "learning_rate": 1.0895866317621055e-05, + "loss": 0.1815, + "step": 59105 + }, + { + "epoch": 2.76, + "learning_rate": 1.089508253256627e-05, + "loss": 0.1446, + "step": 59110 + }, + { + "epoch": 2.76, + "learning_rate": 1.0894298747511483e-05, + "loss": 0.2181, + "step": 59115 + }, + { + "epoch": 2.76, + "learning_rate": 1.0893514962456697e-05, + "loss": 0.0203, + "step": 59120 + }, + { + "epoch": 2.76, + "learning_rate": 1.089273117740191e-05, + "loss": 0.0237, + "step": 59125 + }, + { + "epoch": 2.76, + "learning_rate": 1.0891947392347125e-05, + "loss": 0.0479, + "step": 59130 + }, + { + "epoch": 2.76, + "learning_rate": 1.0891163607292337e-05, + "loss": 0.0397, + "step": 59135 + }, + { + "epoch": 2.76, + "learning_rate": 1.0890379822237551e-05, + "loss": 0.0788, + "step": 59140 + }, + { + "epoch": 2.76, + "learning_rate": 1.0889596037182763e-05, + "loss": 0.1114, + "step": 59145 + }, + { + "epoch": 2.76, + "learning_rate": 1.0888812252127979e-05, + "loss": 0.1694, + "step": 59150 + }, + { + "epoch": 2.76, + "learning_rate": 1.0888028467073191e-05, + "loss": 0.2413, + "step": 59155 + }, + { + "epoch": 2.76, + "learning_rate": 1.0887244682018405e-05, + "loss": 0.2377, + "step": 59160 + }, + { + "epoch": 2.76, + "learning_rate": 1.0886460896963617e-05, + "loss": 0.2335, + "step": 59165 + }, + { + "epoch": 2.76, + "learning_rate": 1.088567711190883e-05, + "loss": 0.0697, + "step": 59170 + }, + { + "epoch": 2.76, + "learning_rate": 1.0884893326854045e-05, + "loss": 0.0551, + "step": 59175 + }, + { + "epoch": 2.76, + "learning_rate": 1.0884109541799257e-05, + "loss": 0.0589, + "step": 59180 + }, + { + "epoch": 2.76, + "learning_rate": 1.0883325756744471e-05, + "loss": 0.0893, + "step": 59185 + }, + { + "epoch": 2.76, + "learning_rate": 1.0882541971689683e-05, + "loss": 0.1181, + "step": 59190 + }, + { + "epoch": 2.76, + "learning_rate": 1.0881758186634899e-05, + "loss": 0.0627, + "step": 59195 + }, + { + "epoch": 2.76, + "learning_rate": 1.0880974401580111e-05, + "loss": 0.0549, + "step": 59200 + }, + { + "epoch": 2.76, + "learning_rate": 1.0880190616525325e-05, + "loss": 0.1895, + "step": 59205 + }, + { + "epoch": 2.76, + "learning_rate": 1.0879406831470539e-05, + "loss": 0.1951, + "step": 59210 + }, + { + "epoch": 2.76, + "learning_rate": 1.0878623046415753e-05, + "loss": 0.2903, + "step": 59215 + }, + { + "epoch": 2.76, + "learning_rate": 1.0877839261360965e-05, + "loss": 0.0543, + "step": 59220 + }, + { + "epoch": 2.76, + "learning_rate": 1.0877055476306179e-05, + "loss": 0.0562, + "step": 59225 + }, + { + "epoch": 2.76, + "learning_rate": 1.0876271691251393e-05, + "loss": 0.0212, + "step": 59230 + }, + { + "epoch": 2.76, + "learning_rate": 1.0875487906196605e-05, + "loss": 0.0372, + "step": 59235 + }, + { + "epoch": 2.76, + "learning_rate": 1.0874704121141819e-05, + "loss": 0.0557, + "step": 59240 + }, + { + "epoch": 2.76, + "learning_rate": 1.0873920336087031e-05, + "loss": 0.0941, + "step": 59245 + }, + { + "epoch": 2.76, + "learning_rate": 1.0873136551032247e-05, + "loss": 0.1335, + "step": 59250 + }, + { + "epoch": 2.76, + "learning_rate": 1.0872352765977459e-05, + "loss": 0.1388, + "step": 59255 + }, + { + "epoch": 2.77, + "learning_rate": 1.0871568980922673e-05, + "loss": 0.3518, + "step": 59260 + }, + { + "epoch": 2.77, + "learning_rate": 1.0870785195867885e-05, + "loss": 0.2424, + "step": 59265 + }, + { + "epoch": 2.77, + "learning_rate": 1.08700014108131e-05, + "loss": 0.0594, + "step": 59270 + }, + { + "epoch": 2.77, + "learning_rate": 1.0869217625758313e-05, + "loss": 0.0566, + "step": 59275 + }, + { + "epoch": 2.77, + "learning_rate": 1.0868433840703527e-05, + "loss": 0.0809, + "step": 59280 + }, + { + "epoch": 2.77, + "learning_rate": 1.0867650055648739e-05, + "loss": 0.0813, + "step": 59285 + }, + { + "epoch": 2.77, + "learning_rate": 1.0866866270593955e-05, + "loss": 0.1093, + "step": 59290 + }, + { + "epoch": 2.77, + "learning_rate": 1.0866082485539167e-05, + "loss": 0.0765, + "step": 59295 + }, + { + "epoch": 2.77, + "learning_rate": 1.0865298700484379e-05, + "loss": 0.1189, + "step": 59300 + }, + { + "epoch": 2.77, + "learning_rate": 1.0864514915429593e-05, + "loss": 0.2332, + "step": 59305 + }, + { + "epoch": 2.77, + "learning_rate": 1.0863731130374807e-05, + "loss": 0.2367, + "step": 59310 + }, + { + "epoch": 2.77, + "learning_rate": 1.086294734532002e-05, + "loss": 0.3552, + "step": 59315 + }, + { + "epoch": 2.77, + "learning_rate": 1.0862163560265233e-05, + "loss": 0.0216, + "step": 59320 + }, + { + "epoch": 2.77, + "learning_rate": 1.0861379775210447e-05, + "loss": 0.0542, + "step": 59325 + }, + { + "epoch": 2.77, + "learning_rate": 1.086059599015566e-05, + "loss": 0.0626, + "step": 59330 + }, + { + "epoch": 2.77, + "learning_rate": 1.0859812205100875e-05, + "loss": 0.0793, + "step": 59335 + }, + { + "epoch": 2.77, + "learning_rate": 1.0859028420046087e-05, + "loss": 0.0943, + "step": 59340 + }, + { + "epoch": 2.77, + "learning_rate": 1.0858244634991303e-05, + "loss": 0.1106, + "step": 59345 + }, + { + "epoch": 2.77, + "learning_rate": 1.0857460849936515e-05, + "loss": 0.1345, + "step": 59350 + }, + { + "epoch": 2.77, + "learning_rate": 1.0856677064881729e-05, + "loss": 0.2081, + "step": 59355 + }, + { + "epoch": 2.77, + "learning_rate": 1.0855893279826941e-05, + "loss": 0.3029, + "step": 59360 + }, + { + "epoch": 2.77, + "learning_rate": 1.0855109494772153e-05, + "loss": 0.2904, + "step": 59365 + }, + { + "epoch": 2.77, + "learning_rate": 1.0854325709717369e-05, + "loss": 0.0358, + "step": 59370 + }, + { + "epoch": 2.77, + "learning_rate": 1.0853541924662581e-05, + "loss": 0.0264, + "step": 59375 + }, + { + "epoch": 2.77, + "learning_rate": 1.0852758139607795e-05, + "loss": 0.0394, + "step": 59380 + }, + { + "epoch": 2.77, + "learning_rate": 1.0851974354553007e-05, + "loss": 0.0479, + "step": 59385 + }, + { + "epoch": 2.77, + "learning_rate": 1.0851190569498223e-05, + "loss": 0.0999, + "step": 59390 + }, + { + "epoch": 2.77, + "learning_rate": 1.0850406784443435e-05, + "loss": 0.0661, + "step": 59395 + }, + { + "epoch": 2.77, + "learning_rate": 1.0849622999388649e-05, + "loss": 0.0937, + "step": 59400 + }, + { + "epoch": 2.77, + "learning_rate": 1.0848839214333861e-05, + "loss": 0.1815, + "step": 59405 + }, + { + "epoch": 2.77, + "learning_rate": 1.0848055429279077e-05, + "loss": 0.3394, + "step": 59410 + }, + { + "epoch": 2.77, + "learning_rate": 1.0847271644224289e-05, + "loss": 0.3475, + "step": 59415 + }, + { + "epoch": 2.77, + "learning_rate": 1.0846487859169503e-05, + "loss": 0.0382, + "step": 59420 + }, + { + "epoch": 2.77, + "learning_rate": 1.0845704074114715e-05, + "loss": 0.0969, + "step": 59425 + }, + { + "epoch": 2.77, + "learning_rate": 1.0844920289059929e-05, + "loss": 0.0548, + "step": 59430 + }, + { + "epoch": 2.77, + "learning_rate": 1.0844136504005143e-05, + "loss": 0.0841, + "step": 59435 + }, + { + "epoch": 2.77, + "learning_rate": 1.0843352718950355e-05, + "loss": 0.0781, + "step": 59440 + }, + { + "epoch": 2.77, + "learning_rate": 1.084256893389557e-05, + "loss": 0.1071, + "step": 59445 + }, + { + "epoch": 2.77, + "learning_rate": 1.0841785148840783e-05, + "loss": 0.1219, + "step": 59450 + }, + { + "epoch": 2.77, + "learning_rate": 1.0841001363785997e-05, + "loss": 0.1605, + "step": 59455 + }, + { + "epoch": 2.77, + "learning_rate": 1.0840217578731209e-05, + "loss": 0.3221, + "step": 59460 + }, + { + "epoch": 2.77, + "learning_rate": 1.0839433793676424e-05, + "loss": 0.2631, + "step": 59465 + }, + { + "epoch": 2.77, + "learning_rate": 1.0838650008621637e-05, + "loss": 0.0423, + "step": 59470 + }, + { + "epoch": 2.78, + "learning_rate": 1.083786622356685e-05, + "loss": 0.0269, + "step": 59475 + }, + { + "epoch": 2.78, + "learning_rate": 1.0837082438512063e-05, + "loss": 0.0213, + "step": 59480 + }, + { + "epoch": 2.78, + "learning_rate": 1.0836298653457278e-05, + "loss": 0.0742, + "step": 59485 + }, + { + "epoch": 2.78, + "learning_rate": 1.083551486840249e-05, + "loss": 0.0317, + "step": 59490 + }, + { + "epoch": 2.78, + "learning_rate": 1.0834731083347703e-05, + "loss": 0.1428, + "step": 59495 + }, + { + "epoch": 2.78, + "learning_rate": 1.0833947298292917e-05, + "loss": 0.1145, + "step": 59500 + }, + { + "epoch": 2.78, + "learning_rate": 1.0833163513238129e-05, + "loss": 0.1002, + "step": 59505 + }, + { + "epoch": 2.78, + "learning_rate": 1.0832379728183344e-05, + "loss": 0.2298, + "step": 59510 + }, + { + "epoch": 2.78, + "learning_rate": 1.0831595943128557e-05, + "loss": 0.3871, + "step": 59515 + }, + { + "epoch": 2.78, + "learning_rate": 1.083081215807377e-05, + "loss": 0.03, + "step": 59520 + }, + { + "epoch": 2.78, + "learning_rate": 1.0830028373018985e-05, + "loss": 0.0571, + "step": 59525 + }, + { + "epoch": 2.78, + "learning_rate": 1.0829244587964198e-05, + "loss": 0.0582, + "step": 59530 + }, + { + "epoch": 2.78, + "learning_rate": 1.082846080290941e-05, + "loss": 0.0255, + "step": 59535 + }, + { + "epoch": 2.78, + "learning_rate": 1.0827677017854625e-05, + "loss": 0.0613, + "step": 59540 + }, + { + "epoch": 2.78, + "learning_rate": 1.0826893232799838e-05, + "loss": 0.0576, + "step": 59545 + }, + { + "epoch": 2.78, + "learning_rate": 1.0826109447745052e-05, + "loss": 0.172, + "step": 59550 + }, + { + "epoch": 2.78, + "learning_rate": 1.0825325662690265e-05, + "loss": 0.1161, + "step": 59555 + }, + { + "epoch": 2.78, + "learning_rate": 1.0824541877635477e-05, + "loss": 0.3, + "step": 59560 + }, + { + "epoch": 2.78, + "learning_rate": 1.0823758092580692e-05, + "loss": 0.3196, + "step": 59565 + }, + { + "epoch": 2.78, + "learning_rate": 1.0822974307525905e-05, + "loss": 0.0401, + "step": 59570 + }, + { + "epoch": 2.78, + "learning_rate": 1.0822190522471118e-05, + "loss": 0.0501, + "step": 59575 + }, + { + "epoch": 2.78, + "learning_rate": 1.082140673741633e-05, + "loss": 0.03, + "step": 59580 + }, + { + "epoch": 2.78, + "learning_rate": 1.0820622952361546e-05, + "loss": 0.1153, + "step": 59585 + }, + { + "epoch": 2.78, + "learning_rate": 1.0819839167306759e-05, + "loss": 0.095, + "step": 59590 + }, + { + "epoch": 2.78, + "learning_rate": 1.0819055382251972e-05, + "loss": 0.0778, + "step": 59595 + }, + { + "epoch": 2.78, + "learning_rate": 1.0818271597197185e-05, + "loss": 0.0841, + "step": 59600 + }, + { + "epoch": 2.78, + "learning_rate": 1.08174878121424e-05, + "loss": 0.1166, + "step": 59605 + }, + { + "epoch": 2.78, + "learning_rate": 1.0816704027087612e-05, + "loss": 0.202, + "step": 59610 + }, + { + "epoch": 2.78, + "learning_rate": 1.0815920242032826e-05, + "loss": 0.2211, + "step": 59615 + }, + { + "epoch": 2.78, + "learning_rate": 1.0815136456978039e-05, + "loss": 0.0165, + "step": 59620 + }, + { + "epoch": 2.78, + "learning_rate": 1.0814352671923252e-05, + "loss": 0.0745, + "step": 59625 + }, + { + "epoch": 2.78, + "learning_rate": 1.0813568886868466e-05, + "loss": 0.0653, + "step": 59630 + }, + { + "epoch": 2.78, + "learning_rate": 1.0812785101813679e-05, + "loss": 0.0922, + "step": 59635 + }, + { + "epoch": 2.78, + "learning_rate": 1.0812001316758892e-05, + "loss": 0.1051, + "step": 59640 + }, + { + "epoch": 2.78, + "learning_rate": 1.0811217531704106e-05, + "loss": 0.1219, + "step": 59645 + }, + { + "epoch": 2.78, + "learning_rate": 1.081043374664932e-05, + "loss": 0.2201, + "step": 59650 + }, + { + "epoch": 2.78, + "learning_rate": 1.0809649961594533e-05, + "loss": 0.1585, + "step": 59655 + }, + { + "epoch": 2.78, + "learning_rate": 1.0808866176539748e-05, + "loss": 0.2065, + "step": 59660 + }, + { + "epoch": 2.78, + "learning_rate": 1.080808239148496e-05, + "loss": 0.3347, + "step": 59665 + }, + { + "epoch": 2.78, + "learning_rate": 1.0807298606430174e-05, + "loss": 0.0825, + "step": 59670 + }, + { + "epoch": 2.78, + "learning_rate": 1.0806514821375386e-05, + "loss": 0.0217, + "step": 59675 + }, + { + "epoch": 2.78, + "learning_rate": 1.0805731036320602e-05, + "loss": 0.0387, + "step": 59680 + }, + { + "epoch": 2.78, + "learning_rate": 1.0804947251265814e-05, + "loss": 0.0708, + "step": 59685 + }, + { + "epoch": 2.79, + "learning_rate": 1.0804163466211026e-05, + "loss": 0.1803, + "step": 59690 + }, + { + "epoch": 2.79, + "learning_rate": 1.080337968115624e-05, + "loss": 0.0929, + "step": 59695 + }, + { + "epoch": 2.79, + "learning_rate": 1.0802595896101453e-05, + "loss": 0.0763, + "step": 59700 + }, + { + "epoch": 2.79, + "learning_rate": 1.0801812111046668e-05, + "loss": 0.1241, + "step": 59705 + }, + { + "epoch": 2.79, + "learning_rate": 1.080102832599188e-05, + "loss": 0.2267, + "step": 59710 + }, + { + "epoch": 2.79, + "learning_rate": 1.0800244540937094e-05, + "loss": 0.1755, + "step": 59715 + }, + { + "epoch": 2.79, + "learning_rate": 1.0799460755882307e-05, + "loss": 0.0372, + "step": 59720 + }, + { + "epoch": 2.79, + "learning_rate": 1.0798676970827522e-05, + "loss": 0.0253, + "step": 59725 + }, + { + "epoch": 2.79, + "learning_rate": 1.0797893185772734e-05, + "loss": 0.1087, + "step": 59730 + }, + { + "epoch": 2.79, + "learning_rate": 1.0797109400717948e-05, + "loss": 0.0811, + "step": 59735 + }, + { + "epoch": 2.79, + "learning_rate": 1.079632561566316e-05, + "loss": 0.0563, + "step": 59740 + }, + { + "epoch": 2.79, + "learning_rate": 1.0795541830608376e-05, + "loss": 0.1129, + "step": 59745 + }, + { + "epoch": 2.79, + "learning_rate": 1.0794758045553588e-05, + "loss": 0.1649, + "step": 59750 + }, + { + "epoch": 2.79, + "learning_rate": 1.07939742604988e-05, + "loss": 0.1641, + "step": 59755 + }, + { + "epoch": 2.79, + "learning_rate": 1.0793190475444016e-05, + "loss": 0.2485, + "step": 59760 + }, + { + "epoch": 2.79, + "learning_rate": 1.0792406690389228e-05, + "loss": 0.2066, + "step": 59765 + }, + { + "epoch": 2.79, + "learning_rate": 1.0791622905334442e-05, + "loss": 0.0445, + "step": 59770 + }, + { + "epoch": 2.79, + "learning_rate": 1.0790839120279654e-05, + "loss": 0.0544, + "step": 59775 + }, + { + "epoch": 2.79, + "learning_rate": 1.079005533522487e-05, + "loss": 0.0232, + "step": 59780 + }, + { + "epoch": 2.79, + "learning_rate": 1.0789271550170082e-05, + "loss": 0.0835, + "step": 59785 + }, + { + "epoch": 2.79, + "learning_rate": 1.0788487765115296e-05, + "loss": 0.1308, + "step": 59790 + }, + { + "epoch": 2.79, + "learning_rate": 1.0787703980060508e-05, + "loss": 0.0412, + "step": 59795 + }, + { + "epoch": 2.79, + "learning_rate": 1.0786920195005724e-05, + "loss": 0.2067, + "step": 59800 + }, + { + "epoch": 2.79, + "learning_rate": 1.0786136409950936e-05, + "loss": 0.1801, + "step": 59805 + }, + { + "epoch": 2.79, + "learning_rate": 1.078535262489615e-05, + "loss": 0.2654, + "step": 59810 + }, + { + "epoch": 2.79, + "learning_rate": 1.0784568839841362e-05, + "loss": 0.2404, + "step": 59815 + }, + { + "epoch": 2.79, + "learning_rate": 1.0783785054786574e-05, + "loss": 0.0479, + "step": 59820 + }, + { + "epoch": 2.79, + "learning_rate": 1.078300126973179e-05, + "loss": 0.0855, + "step": 59825 + }, + { + "epoch": 2.79, + "learning_rate": 1.0782217484677002e-05, + "loss": 0.0729, + "step": 59830 + }, + { + "epoch": 2.79, + "learning_rate": 1.0781433699622216e-05, + "loss": 0.0526, + "step": 59835 + }, + { + "epoch": 2.79, + "learning_rate": 1.078064991456743e-05, + "loss": 0.0608, + "step": 59840 + }, + { + "epoch": 2.79, + "learning_rate": 1.0779866129512644e-05, + "loss": 0.0754, + "step": 59845 + }, + { + "epoch": 2.79, + "learning_rate": 1.0779082344457856e-05, + "loss": 0.1885, + "step": 59850 + }, + { + "epoch": 2.79, + "learning_rate": 1.077829855940307e-05, + "loss": 0.1078, + "step": 59855 + }, + { + "epoch": 2.79, + "learning_rate": 1.0777514774348284e-05, + "loss": 0.2044, + "step": 59860 + }, + { + "epoch": 2.79, + "learning_rate": 1.0776730989293498e-05, + "loss": 0.3295, + "step": 59865 + }, + { + "epoch": 2.79, + "learning_rate": 1.077594720423871e-05, + "loss": 0.0507, + "step": 59870 + }, + { + "epoch": 2.79, + "learning_rate": 1.0775163419183926e-05, + "loss": 0.0369, + "step": 59875 + }, + { + "epoch": 2.79, + "learning_rate": 1.0774379634129138e-05, + "loss": 0.0298, + "step": 59880 + }, + { + "epoch": 2.79, + "learning_rate": 1.077359584907435e-05, + "loss": 0.0492, + "step": 59885 + }, + { + "epoch": 2.79, + "learning_rate": 1.0772812064019564e-05, + "loss": 0.0656, + "step": 59890 + }, + { + "epoch": 2.79, + "learning_rate": 1.0772028278964776e-05, + "loss": 0.0895, + "step": 59895 + }, + { + "epoch": 2.8, + "learning_rate": 1.0771244493909992e-05, + "loss": 0.1175, + "step": 59900 + }, + { + "epoch": 2.8, + "learning_rate": 1.0770460708855204e-05, + "loss": 0.1537, + "step": 59905 + }, + { + "epoch": 2.8, + "learning_rate": 1.0769676923800418e-05, + "loss": 0.3598, + "step": 59910 + }, + { + "epoch": 2.8, + "learning_rate": 1.076889313874563e-05, + "loss": 0.2923, + "step": 59915 + }, + { + "epoch": 2.8, + "learning_rate": 1.0768109353690846e-05, + "loss": 0.0608, + "step": 59920 + }, + { + "epoch": 2.8, + "learning_rate": 1.0767325568636058e-05, + "loss": 0.0519, + "step": 59925 + }, + { + "epoch": 2.8, + "learning_rate": 1.0766541783581272e-05, + "loss": 0.0307, + "step": 59930 + }, + { + "epoch": 2.8, + "learning_rate": 1.0765757998526484e-05, + "loss": 0.1519, + "step": 59935 + }, + { + "epoch": 2.8, + "learning_rate": 1.07649742134717e-05, + "loss": 0.0559, + "step": 59940 + }, + { + "epoch": 2.8, + "learning_rate": 1.0764190428416912e-05, + "loss": 0.1429, + "step": 59945 + }, + { + "epoch": 2.8, + "learning_rate": 1.0763406643362124e-05, + "loss": 0.085, + "step": 59950 + }, + { + "epoch": 2.8, + "learning_rate": 1.0762622858307338e-05, + "loss": 0.2346, + "step": 59955 + }, + { + "epoch": 2.8, + "learning_rate": 1.0761839073252552e-05, + "loss": 0.2619, + "step": 59960 + }, + { + "epoch": 2.8, + "learning_rate": 1.0761055288197766e-05, + "loss": 0.1942, + "step": 59965 + }, + { + "epoch": 2.8, + "learning_rate": 1.0760271503142978e-05, + "loss": 0.0377, + "step": 59970 + }, + { + "epoch": 2.8, + "learning_rate": 1.0759487718088194e-05, + "loss": 0.0325, + "step": 59975 + }, + { + "epoch": 2.8, + "learning_rate": 1.0758703933033406e-05, + "loss": 0.0534, + "step": 59980 + }, + { + "epoch": 2.8, + "learning_rate": 1.075792014797862e-05, + "loss": 0.0849, + "step": 59985 + }, + { + "epoch": 2.8, + "learning_rate": 1.0757136362923832e-05, + "loss": 0.0567, + "step": 59990 + }, + { + "epoch": 2.8, + "learning_rate": 1.0756352577869048e-05, + "loss": 0.0723, + "step": 59995 + }, + { + "epoch": 2.8, + "learning_rate": 1.075556879281426e-05, + "loss": 0.1649, + "step": 60000 + }, + { + "epoch": 2.8, + "learning_rate": 1.0754785007759474e-05, + "loss": 0.1065, + "step": 60005 + }, + { + "epoch": 2.8, + "learning_rate": 1.0754001222704686e-05, + "loss": 0.0981, + "step": 60010 + }, + { + "epoch": 2.8, + "learning_rate": 1.0753217437649898e-05, + "loss": 0.3571, + "step": 60015 + }, + { + "epoch": 2.8, + "learning_rate": 1.0752433652595114e-05, + "loss": 0.0691, + "step": 60020 + }, + { + "epoch": 2.8, + "learning_rate": 1.0751649867540326e-05, + "loss": 0.0567, + "step": 60025 + }, + { + "epoch": 2.8, + "learning_rate": 1.075086608248554e-05, + "loss": 0.0616, + "step": 60030 + }, + { + "epoch": 2.8, + "learning_rate": 1.0750082297430752e-05, + "loss": 0.0686, + "step": 60035 + }, + { + "epoch": 2.8, + "learning_rate": 1.0749298512375968e-05, + "loss": 0.0687, + "step": 60040 + }, + { + "epoch": 2.8, + "learning_rate": 1.074851472732118e-05, + "loss": 0.1136, + "step": 60045 + }, + { + "epoch": 2.8, + "learning_rate": 1.0747730942266394e-05, + "loss": 0.1254, + "step": 60050 + }, + { + "epoch": 2.8, + "learning_rate": 1.0746947157211606e-05, + "loss": 0.2474, + "step": 60055 + }, + { + "epoch": 2.8, + "learning_rate": 1.0746163372156822e-05, + "loss": 0.1883, + "step": 60060 + }, + { + "epoch": 2.8, + "learning_rate": 1.0745379587102034e-05, + "loss": 0.4343, + "step": 60065 + }, + { + "epoch": 2.8, + "learning_rate": 1.0744595802047248e-05, + "loss": 0.0974, + "step": 60070 + }, + { + "epoch": 2.8, + "learning_rate": 1.0743812016992462e-05, + "loss": 0.0394, + "step": 60075 + }, + { + "epoch": 2.8, + "learning_rate": 1.0743028231937674e-05, + "loss": 0.0907, + "step": 60080 + }, + { + "epoch": 2.8, + "learning_rate": 1.0742244446882888e-05, + "loss": 0.1173, + "step": 60085 + }, + { + "epoch": 2.8, + "learning_rate": 1.07414606618281e-05, + "loss": 0.096, + "step": 60090 + }, + { + "epoch": 2.8, + "learning_rate": 1.0740676876773316e-05, + "loss": 0.1801, + "step": 60095 + }, + { + "epoch": 2.8, + "learning_rate": 1.0739893091718528e-05, + "loss": 0.2232, + "step": 60100 + }, + { + "epoch": 2.8, + "learning_rate": 1.0739109306663742e-05, + "loss": 0.1869, + "step": 60105 + }, + { + "epoch": 2.8, + "learning_rate": 1.0738325521608954e-05, + "loss": 0.2526, + "step": 60110 + }, + { + "epoch": 2.81, + "learning_rate": 1.073754173655417e-05, + "loss": 0.1866, + "step": 60115 + }, + { + "epoch": 2.81, + "learning_rate": 1.0736757951499382e-05, + "loss": 0.088, + "step": 60120 + }, + { + "epoch": 2.81, + "learning_rate": 1.0735974166444596e-05, + "loss": 0.0488, + "step": 60125 + }, + { + "epoch": 2.81, + "learning_rate": 1.0735190381389808e-05, + "loss": 0.0262, + "step": 60130 + }, + { + "epoch": 2.81, + "learning_rate": 1.0734406596335023e-05, + "loss": 0.0383, + "step": 60135 + }, + { + "epoch": 2.81, + "learning_rate": 1.0733622811280236e-05, + "loss": 0.1139, + "step": 60140 + }, + { + "epoch": 2.81, + "learning_rate": 1.0732839026225448e-05, + "loss": 0.0991, + "step": 60145 + }, + { + "epoch": 2.81, + "learning_rate": 1.0732055241170662e-05, + "loss": 0.08, + "step": 60150 + }, + { + "epoch": 2.81, + "learning_rate": 1.0731271456115876e-05, + "loss": 0.1972, + "step": 60155 + }, + { + "epoch": 2.81, + "learning_rate": 1.073048767106109e-05, + "loss": 0.157, + "step": 60160 + }, + { + "epoch": 2.81, + "learning_rate": 1.0729703886006302e-05, + "loss": 0.3585, + "step": 60165 + }, + { + "epoch": 2.81, + "learning_rate": 1.0728920100951516e-05, + "loss": 0.0269, + "step": 60170 + }, + { + "epoch": 2.81, + "learning_rate": 1.072813631589673e-05, + "loss": 0.0937, + "step": 60175 + }, + { + "epoch": 2.81, + "learning_rate": 1.0727352530841943e-05, + "loss": 0.0532, + "step": 60180 + }, + { + "epoch": 2.81, + "learning_rate": 1.0726568745787156e-05, + "loss": 0.065, + "step": 60185 + }, + { + "epoch": 2.81, + "learning_rate": 1.0725784960732371e-05, + "loss": 0.1078, + "step": 60190 + }, + { + "epoch": 2.81, + "learning_rate": 1.0725001175677584e-05, + "loss": 0.0757, + "step": 60195 + }, + { + "epoch": 2.81, + "learning_rate": 1.0724217390622797e-05, + "loss": 0.1246, + "step": 60200 + }, + { + "epoch": 2.81, + "learning_rate": 1.072343360556801e-05, + "loss": 0.1834, + "step": 60205 + }, + { + "epoch": 2.81, + "learning_rate": 1.0722649820513222e-05, + "loss": 0.3636, + "step": 60210 + }, + { + "epoch": 2.81, + "learning_rate": 1.0721866035458437e-05, + "loss": 0.2649, + "step": 60215 + }, + { + "epoch": 2.81, + "learning_rate": 1.072108225040365e-05, + "loss": 0.0994, + "step": 60220 + }, + { + "epoch": 2.81, + "learning_rate": 1.0720298465348864e-05, + "loss": 0.0371, + "step": 60225 + }, + { + "epoch": 2.81, + "learning_rate": 1.0719514680294076e-05, + "loss": 0.0265, + "step": 60230 + }, + { + "epoch": 2.81, + "learning_rate": 1.0718730895239291e-05, + "loss": 0.0794, + "step": 60235 + }, + { + "epoch": 2.81, + "learning_rate": 1.0717947110184504e-05, + "loss": 0.0882, + "step": 60240 + }, + { + "epoch": 2.81, + "learning_rate": 1.0717163325129717e-05, + "loss": 0.0821, + "step": 60245 + }, + { + "epoch": 2.81, + "learning_rate": 1.071637954007493e-05, + "loss": 0.0938, + "step": 60250 + }, + { + "epoch": 2.81, + "learning_rate": 1.0715595755020145e-05, + "loss": 0.0977, + "step": 60255 + }, + { + "epoch": 2.81, + "learning_rate": 1.0714811969965358e-05, + "loss": 0.165, + "step": 60260 + }, + { + "epoch": 2.81, + "learning_rate": 1.0714028184910571e-05, + "loss": 0.3129, + "step": 60265 + }, + { + "epoch": 2.81, + "learning_rate": 1.0713244399855784e-05, + "loss": 0.0767, + "step": 60270 + }, + { + "epoch": 2.81, + "learning_rate": 1.0712460614800998e-05, + "loss": 0.0686, + "step": 60275 + }, + { + "epoch": 2.81, + "learning_rate": 1.0711676829746211e-05, + "loss": 0.0351, + "step": 60280 + }, + { + "epoch": 2.81, + "learning_rate": 1.0710893044691424e-05, + "loss": 0.0881, + "step": 60285 + }, + { + "epoch": 2.81, + "learning_rate": 1.071010925963664e-05, + "loss": 0.0737, + "step": 60290 + }, + { + "epoch": 2.81, + "learning_rate": 1.0709325474581851e-05, + "loss": 0.125, + "step": 60295 + }, + { + "epoch": 2.81, + "learning_rate": 1.0708541689527065e-05, + "loss": 0.1237, + "step": 60300 + }, + { + "epoch": 2.81, + "learning_rate": 1.0707757904472278e-05, + "loss": 0.1589, + "step": 60305 + }, + { + "epoch": 2.81, + "learning_rate": 1.0706974119417493e-05, + "loss": 0.1822, + "step": 60310 + }, + { + "epoch": 2.81, + "learning_rate": 1.0706190334362705e-05, + "loss": 0.2297, + "step": 60315 + }, + { + "epoch": 2.81, + "learning_rate": 1.070540654930792e-05, + "loss": 0.0529, + "step": 60320 + }, + { + "epoch": 2.81, + "learning_rate": 1.0704622764253132e-05, + "loss": 0.0159, + "step": 60325 + }, + { + "epoch": 2.82, + "learning_rate": 1.0703838979198347e-05, + "loss": 0.0392, + "step": 60330 + }, + { + "epoch": 2.82, + "learning_rate": 1.070305519414356e-05, + "loss": 0.0952, + "step": 60335 + }, + { + "epoch": 2.82, + "learning_rate": 1.0702271409088772e-05, + "loss": 0.1019, + "step": 60340 + }, + { + "epoch": 2.82, + "learning_rate": 1.0701487624033985e-05, + "loss": 0.1498, + "step": 60345 + }, + { + "epoch": 2.82, + "learning_rate": 1.0700703838979198e-05, + "loss": 0.1059, + "step": 60350 + }, + { + "epoch": 2.82, + "learning_rate": 1.0699920053924413e-05, + "loss": 0.1739, + "step": 60355 + }, + { + "epoch": 2.82, + "learning_rate": 1.0699136268869625e-05, + "loss": 0.2294, + "step": 60360 + }, + { + "epoch": 2.82, + "learning_rate": 1.069835248381484e-05, + "loss": 0.3432, + "step": 60365 + }, + { + "epoch": 2.82, + "learning_rate": 1.0697568698760053e-05, + "loss": 0.0413, + "step": 60370 + }, + { + "epoch": 2.82, + "learning_rate": 1.0696784913705267e-05, + "loss": 0.0329, + "step": 60375 + }, + { + "epoch": 2.82, + "learning_rate": 1.069600112865048e-05, + "loss": 0.0649, + "step": 60380 + }, + { + "epoch": 2.82, + "learning_rate": 1.0695217343595693e-05, + "loss": 0.0388, + "step": 60385 + }, + { + "epoch": 2.82, + "learning_rate": 1.0694433558540907e-05, + "loss": 0.1039, + "step": 60390 + }, + { + "epoch": 2.82, + "learning_rate": 1.0693649773486121e-05, + "loss": 0.0985, + "step": 60395 + }, + { + "epoch": 2.82, + "learning_rate": 1.0692865988431333e-05, + "loss": 0.152, + "step": 60400 + }, + { + "epoch": 2.82, + "learning_rate": 1.0692082203376546e-05, + "loss": 0.1796, + "step": 60405 + }, + { + "epoch": 2.82, + "learning_rate": 1.0691298418321761e-05, + "loss": 0.1797, + "step": 60410 + }, + { + "epoch": 2.82, + "learning_rate": 1.0690514633266973e-05, + "loss": 0.2262, + "step": 60415 + }, + { + "epoch": 2.82, + "learning_rate": 1.0689730848212187e-05, + "loss": 0.0712, + "step": 60420 + }, + { + "epoch": 2.82, + "learning_rate": 1.06889470631574e-05, + "loss": 0.0351, + "step": 60425 + }, + { + "epoch": 2.82, + "learning_rate": 1.0688163278102615e-05, + "loss": 0.0411, + "step": 60430 + }, + { + "epoch": 2.82, + "learning_rate": 1.0687379493047827e-05, + "loss": 0.0386, + "step": 60435 + }, + { + "epoch": 2.82, + "learning_rate": 1.0686595707993041e-05, + "loss": 0.0963, + "step": 60440 + }, + { + "epoch": 2.82, + "learning_rate": 1.0685811922938253e-05, + "loss": 0.1084, + "step": 60445 + }, + { + "epoch": 2.82, + "learning_rate": 1.0685028137883469e-05, + "loss": 0.1291, + "step": 60450 + }, + { + "epoch": 2.82, + "learning_rate": 1.0684244352828681e-05, + "loss": 0.2619, + "step": 60455 + }, + { + "epoch": 2.82, + "learning_rate": 1.0683460567773895e-05, + "loss": 0.2443, + "step": 60460 + }, + { + "epoch": 2.82, + "learning_rate": 1.0682676782719107e-05, + "loss": 0.2697, + "step": 60465 + }, + { + "epoch": 2.82, + "learning_rate": 1.0681892997664321e-05, + "loss": 0.0326, + "step": 60470 + }, + { + "epoch": 2.82, + "learning_rate": 1.0681109212609535e-05, + "loss": 0.0153, + "step": 60475 + }, + { + "epoch": 2.82, + "learning_rate": 1.0680325427554747e-05, + "loss": 0.0179, + "step": 60480 + }, + { + "epoch": 2.82, + "learning_rate": 1.0679541642499961e-05, + "loss": 0.036, + "step": 60485 + }, + { + "epoch": 2.82, + "learning_rate": 1.0678757857445175e-05, + "loss": 0.057, + "step": 60490 + }, + { + "epoch": 2.82, + "learning_rate": 1.0677974072390389e-05, + "loss": 0.0842, + "step": 60495 + }, + { + "epoch": 2.82, + "learning_rate": 1.0677190287335601e-05, + "loss": 0.1821, + "step": 60500 + }, + { + "epoch": 2.82, + "learning_rate": 1.0676406502280817e-05, + "loss": 0.1145, + "step": 60505 + }, + { + "epoch": 2.82, + "learning_rate": 1.0675622717226029e-05, + "loss": 0.1985, + "step": 60510 + }, + { + "epoch": 2.82, + "learning_rate": 1.0674838932171243e-05, + "loss": 0.202, + "step": 60515 + }, + { + "epoch": 2.82, + "learning_rate": 1.0674055147116455e-05, + "loss": 0.0571, + "step": 60520 + }, + { + "epoch": 2.82, + "learning_rate": 1.067327136206167e-05, + "loss": 0.046, + "step": 60525 + }, + { + "epoch": 2.82, + "learning_rate": 1.0672487577006883e-05, + "loss": 0.0769, + "step": 60530 + }, + { + "epoch": 2.82, + "learning_rate": 1.0671703791952095e-05, + "loss": 0.0421, + "step": 60535 + }, + { + "epoch": 2.82, + "learning_rate": 1.0670920006897309e-05, + "loss": 0.121, + "step": 60540 + }, + { + "epoch": 2.83, + "learning_rate": 1.0670136221842521e-05, + "loss": 0.099, + "step": 60545 + }, + { + "epoch": 2.83, + "learning_rate": 1.0669352436787737e-05, + "loss": 0.1578, + "step": 60550 + }, + { + "epoch": 2.83, + "learning_rate": 1.066856865173295e-05, + "loss": 0.1956, + "step": 60555 + }, + { + "epoch": 2.83, + "learning_rate": 1.0667784866678163e-05, + "loss": 0.4068, + "step": 60560 + }, + { + "epoch": 2.83, + "learning_rate": 1.0667001081623375e-05, + "loss": 0.3239, + "step": 60565 + }, + { + "epoch": 2.83, + "learning_rate": 1.0666217296568591e-05, + "loss": 0.0318, + "step": 60570 + }, + { + "epoch": 2.83, + "learning_rate": 1.0665433511513803e-05, + "loss": 0.0266, + "step": 60575 + }, + { + "epoch": 2.83, + "learning_rate": 1.0664649726459017e-05, + "loss": 0.0429, + "step": 60580 + }, + { + "epoch": 2.83, + "learning_rate": 1.066386594140423e-05, + "loss": 0.0474, + "step": 60585 + }, + { + "epoch": 2.83, + "learning_rate": 1.0663082156349445e-05, + "loss": 0.0555, + "step": 60590 + }, + { + "epoch": 2.83, + "learning_rate": 1.0662298371294657e-05, + "loss": 0.0826, + "step": 60595 + }, + { + "epoch": 2.83, + "learning_rate": 1.066151458623987e-05, + "loss": 0.1561, + "step": 60600 + }, + { + "epoch": 2.83, + "learning_rate": 1.0660730801185085e-05, + "loss": 0.1313, + "step": 60605 + }, + { + "epoch": 2.83, + "learning_rate": 1.0659947016130297e-05, + "loss": 0.2536, + "step": 60610 + }, + { + "epoch": 2.83, + "learning_rate": 1.0659163231075511e-05, + "loss": 0.2586, + "step": 60615 + }, + { + "epoch": 2.83, + "learning_rate": 1.0658379446020723e-05, + "loss": 0.0328, + "step": 60620 + }, + { + "epoch": 2.83, + "learning_rate": 1.0657595660965939e-05, + "loss": 0.0261, + "step": 60625 + }, + { + "epoch": 2.83, + "learning_rate": 1.0656811875911151e-05, + "loss": 0.0413, + "step": 60630 + }, + { + "epoch": 2.83, + "learning_rate": 1.0656028090856365e-05, + "loss": 0.0651, + "step": 60635 + }, + { + "epoch": 2.83, + "learning_rate": 1.0655244305801577e-05, + "loss": 0.1421, + "step": 60640 + }, + { + "epoch": 2.83, + "learning_rate": 1.0654460520746793e-05, + "loss": 0.1075, + "step": 60645 + }, + { + "epoch": 2.83, + "learning_rate": 1.0653676735692005e-05, + "loss": 0.1013, + "step": 60650 + }, + { + "epoch": 2.83, + "learning_rate": 1.0652892950637219e-05, + "loss": 0.2221, + "step": 60655 + }, + { + "epoch": 2.83, + "learning_rate": 1.0652109165582431e-05, + "loss": 0.1437, + "step": 60660 + }, + { + "epoch": 2.83, + "learning_rate": 1.0651325380527643e-05, + "loss": 0.2729, + "step": 60665 + }, + { + "epoch": 2.83, + "learning_rate": 1.0650541595472859e-05, + "loss": 0.0612, + "step": 60670 + }, + { + "epoch": 2.83, + "learning_rate": 1.0649757810418071e-05, + "loss": 0.0464, + "step": 60675 + }, + { + "epoch": 2.83, + "learning_rate": 1.0648974025363285e-05, + "loss": 0.03, + "step": 60680 + }, + { + "epoch": 2.83, + "learning_rate": 1.0648190240308499e-05, + "loss": 0.0097, + "step": 60685 + }, + { + "epoch": 2.83, + "learning_rate": 1.0647406455253713e-05, + "loss": 0.1497, + "step": 60690 + }, + { + "epoch": 2.83, + "learning_rate": 1.0646622670198925e-05, + "loss": 0.0659, + "step": 60695 + }, + { + "epoch": 2.83, + "learning_rate": 1.0645838885144139e-05, + "loss": 0.1278, + "step": 60700 + }, + { + "epoch": 2.83, + "learning_rate": 1.0645055100089353e-05, + "loss": 0.1808, + "step": 60705 + }, + { + "epoch": 2.83, + "learning_rate": 1.0644271315034567e-05, + "loss": 0.3674, + "step": 60710 + }, + { + "epoch": 2.83, + "learning_rate": 1.0643487529979779e-05, + "loss": 0.395, + "step": 60715 + }, + { + "epoch": 2.83, + "learning_rate": 1.0642703744924994e-05, + "loss": 0.0519, + "step": 60720 + }, + { + "epoch": 2.83, + "learning_rate": 1.0641919959870207e-05, + "loss": 0.0121, + "step": 60725 + }, + { + "epoch": 2.83, + "learning_rate": 1.0641136174815419e-05, + "loss": 0.0334, + "step": 60730 + }, + { + "epoch": 2.83, + "learning_rate": 1.0640352389760633e-05, + "loss": 0.0302, + "step": 60735 + }, + { + "epoch": 2.83, + "learning_rate": 1.0639568604705845e-05, + "loss": 0.2201, + "step": 60740 + }, + { + "epoch": 2.83, + "learning_rate": 1.063878481965106e-05, + "loss": 0.1408, + "step": 60745 + }, + { + "epoch": 2.83, + "learning_rate": 1.0638001034596273e-05, + "loss": 0.1301, + "step": 60750 + }, + { + "epoch": 2.83, + "learning_rate": 1.0637217249541487e-05, + "loss": 0.1961, + "step": 60755 + }, + { + "epoch": 2.84, + "learning_rate": 1.0636433464486699e-05, + "loss": 0.1496, + "step": 60760 + }, + { + "epoch": 2.84, + "learning_rate": 1.0635649679431915e-05, + "loss": 0.2515, + "step": 60765 + }, + { + "epoch": 2.84, + "learning_rate": 1.0634865894377127e-05, + "loss": 0.0856, + "step": 60770 + }, + { + "epoch": 2.84, + "learning_rate": 1.063408210932234e-05, + "loss": 0.0536, + "step": 60775 + }, + { + "epoch": 2.84, + "learning_rate": 1.0633298324267553e-05, + "loss": 0.0576, + "step": 60780 + }, + { + "epoch": 2.84, + "learning_rate": 1.0632514539212768e-05, + "loss": 0.0704, + "step": 60785 + }, + { + "epoch": 2.84, + "learning_rate": 1.063173075415798e-05, + "loss": 0.068, + "step": 60790 + }, + { + "epoch": 2.84, + "learning_rate": 1.0630946969103193e-05, + "loss": 0.1093, + "step": 60795 + }, + { + "epoch": 2.84, + "learning_rate": 1.0630163184048407e-05, + "loss": 0.1841, + "step": 60800 + }, + { + "epoch": 2.84, + "learning_rate": 1.062937939899362e-05, + "loss": 0.1482, + "step": 60805 + }, + { + "epoch": 2.84, + "learning_rate": 1.0628595613938835e-05, + "loss": 0.1913, + "step": 60810 + }, + { + "epoch": 2.84, + "learning_rate": 1.0627811828884047e-05, + "loss": 0.2599, + "step": 60815 + }, + { + "epoch": 2.84, + "learning_rate": 1.0627028043829262e-05, + "loss": 0.0773, + "step": 60820 + }, + { + "epoch": 2.84, + "learning_rate": 1.0626244258774475e-05, + "loss": 0.0442, + "step": 60825 + }, + { + "epoch": 2.84, + "learning_rate": 1.0625460473719689e-05, + "loss": 0.0988, + "step": 60830 + }, + { + "epoch": 2.84, + "learning_rate": 1.06246766886649e-05, + "loss": 0.1, + "step": 60835 + }, + { + "epoch": 2.84, + "learning_rate": 1.0623892903610116e-05, + "loss": 0.0715, + "step": 60840 + }, + { + "epoch": 2.84, + "learning_rate": 1.0623109118555329e-05, + "loss": 0.0782, + "step": 60845 + }, + { + "epoch": 2.84, + "learning_rate": 1.0622325333500542e-05, + "loss": 0.1205, + "step": 60850 + }, + { + "epoch": 2.84, + "learning_rate": 1.0621541548445755e-05, + "loss": 0.1423, + "step": 60855 + }, + { + "epoch": 2.84, + "learning_rate": 1.0620757763390967e-05, + "loss": 0.2899, + "step": 60860 + }, + { + "epoch": 2.84, + "learning_rate": 1.0619973978336183e-05, + "loss": 0.2523, + "step": 60865 + }, + { + "epoch": 2.84, + "learning_rate": 1.0619190193281395e-05, + "loss": 0.0619, + "step": 60870 + }, + { + "epoch": 2.84, + "learning_rate": 1.0618406408226609e-05, + "loss": 0.044, + "step": 60875 + }, + { + "epoch": 2.84, + "learning_rate": 1.0617622623171821e-05, + "loss": 0.0412, + "step": 60880 + }, + { + "epoch": 2.84, + "learning_rate": 1.0616838838117036e-05, + "loss": 0.0697, + "step": 60885 + }, + { + "epoch": 2.84, + "learning_rate": 1.0616055053062249e-05, + "loss": 0.0875, + "step": 60890 + }, + { + "epoch": 2.84, + "learning_rate": 1.0615271268007463e-05, + "loss": 0.1086, + "step": 60895 + }, + { + "epoch": 2.84, + "learning_rate": 1.0614487482952675e-05, + "loss": 0.1613, + "step": 60900 + }, + { + "epoch": 2.84, + "learning_rate": 1.061370369789789e-05, + "loss": 0.1262, + "step": 60905 + }, + { + "epoch": 2.84, + "learning_rate": 1.0612919912843103e-05, + "loss": 0.181, + "step": 60910 + }, + { + "epoch": 2.84, + "learning_rate": 1.0612136127788316e-05, + "loss": 0.2297, + "step": 60915 + }, + { + "epoch": 2.84, + "learning_rate": 1.061135234273353e-05, + "loss": 0.0381, + "step": 60920 + }, + { + "epoch": 2.84, + "learning_rate": 1.0610568557678743e-05, + "loss": 0.0273, + "step": 60925 + }, + { + "epoch": 2.84, + "learning_rate": 1.0609784772623957e-05, + "loss": 0.0332, + "step": 60930 + }, + { + "epoch": 2.84, + "learning_rate": 1.0609000987569169e-05, + "loss": 0.0649, + "step": 60935 + }, + { + "epoch": 2.84, + "learning_rate": 1.0608217202514384e-05, + "loss": 0.0916, + "step": 60940 + }, + { + "epoch": 2.84, + "learning_rate": 1.0607433417459597e-05, + "loss": 0.0944, + "step": 60945 + }, + { + "epoch": 2.84, + "learning_rate": 1.060664963240481e-05, + "loss": 0.1064, + "step": 60950 + }, + { + "epoch": 2.84, + "learning_rate": 1.0605865847350023e-05, + "loss": 0.1578, + "step": 60955 + }, + { + "epoch": 2.84, + "learning_rate": 1.0605082062295238e-05, + "loss": 0.1276, + "step": 60960 + }, + { + "epoch": 2.84, + "learning_rate": 1.060429827724045e-05, + "loss": 0.3312, + "step": 60965 + }, + { + "epoch": 2.84, + "learning_rate": 1.0603514492185664e-05, + "loss": 0.0179, + "step": 60970 + }, + { + "epoch": 2.85, + "learning_rate": 1.0602730707130877e-05, + "loss": 0.062, + "step": 60975 + }, + { + "epoch": 2.85, + "learning_rate": 1.0601946922076092e-05, + "loss": 0.0659, + "step": 60980 + }, + { + "epoch": 2.85, + "learning_rate": 1.0601163137021304e-05, + "loss": 0.1297, + "step": 60985 + }, + { + "epoch": 2.85, + "learning_rate": 1.0600379351966517e-05, + "loss": 0.0375, + "step": 60990 + }, + { + "epoch": 2.85, + "learning_rate": 1.059959556691173e-05, + "loss": 0.118, + "step": 60995 + }, + { + "epoch": 2.85, + "learning_rate": 1.0598811781856944e-05, + "loss": 0.0791, + "step": 61000 + }, + { + "epoch": 2.85, + "learning_rate": 1.0598027996802158e-05, + "loss": 0.1074, + "step": 61005 + }, + { + "epoch": 2.85, + "learning_rate": 1.059724421174737e-05, + "loss": 0.2143, + "step": 61010 + }, + { + "epoch": 2.85, + "learning_rate": 1.0596460426692584e-05, + "loss": 0.2168, + "step": 61015 + }, + { + "epoch": 2.85, + "learning_rate": 1.0595676641637798e-05, + "loss": 0.07, + "step": 61020 + }, + { + "epoch": 2.85, + "learning_rate": 1.0594892856583012e-05, + "loss": 0.0378, + "step": 61025 + }, + { + "epoch": 2.85, + "learning_rate": 1.0594109071528224e-05, + "loss": 0.0538, + "step": 61030 + }, + { + "epoch": 2.85, + "learning_rate": 1.059332528647344e-05, + "loss": 0.058, + "step": 61035 + }, + { + "epoch": 2.85, + "learning_rate": 1.0592541501418652e-05, + "loss": 0.1057, + "step": 61040 + }, + { + "epoch": 2.85, + "learning_rate": 1.0591757716363866e-05, + "loss": 0.114, + "step": 61045 + }, + { + "epoch": 2.85, + "learning_rate": 1.0590973931309078e-05, + "loss": 0.1695, + "step": 61050 + }, + { + "epoch": 2.85, + "learning_rate": 1.059019014625429e-05, + "loss": 0.171, + "step": 61055 + }, + { + "epoch": 2.85, + "learning_rate": 1.0589406361199506e-05, + "loss": 0.3068, + "step": 61060 + }, + { + "epoch": 2.85, + "learning_rate": 1.0588622576144718e-05, + "loss": 0.2925, + "step": 61065 + }, + { + "epoch": 2.85, + "learning_rate": 1.0587838791089932e-05, + "loss": 0.0337, + "step": 61070 + }, + { + "epoch": 2.85, + "learning_rate": 1.0587055006035145e-05, + "loss": 0.0352, + "step": 61075 + }, + { + "epoch": 2.85, + "learning_rate": 1.058627122098036e-05, + "loss": 0.0502, + "step": 61080 + }, + { + "epoch": 2.85, + "learning_rate": 1.0585487435925572e-05, + "loss": 0.0615, + "step": 61085 + }, + { + "epoch": 2.85, + "learning_rate": 1.0584703650870786e-05, + "loss": 0.1042, + "step": 61090 + }, + { + "epoch": 2.85, + "learning_rate": 1.0583919865815998e-05, + "loss": 0.1052, + "step": 61095 + }, + { + "epoch": 2.85, + "learning_rate": 1.0583136080761214e-05, + "loss": 0.1772, + "step": 61100 + }, + { + "epoch": 2.85, + "learning_rate": 1.0582352295706426e-05, + "loss": 0.208, + "step": 61105 + }, + { + "epoch": 2.85, + "learning_rate": 1.058156851065164e-05, + "loss": 0.2459, + "step": 61110 + }, + { + "epoch": 2.85, + "learning_rate": 1.0580784725596852e-05, + "loss": 0.432, + "step": 61115 + }, + { + "epoch": 2.85, + "learning_rate": 1.0580000940542066e-05, + "loss": 0.064, + "step": 61120 + }, + { + "epoch": 2.85, + "learning_rate": 1.057921715548728e-05, + "loss": 0.0289, + "step": 61125 + }, + { + "epoch": 2.85, + "learning_rate": 1.0578433370432492e-05, + "loss": 0.0404, + "step": 61130 + }, + { + "epoch": 2.85, + "learning_rate": 1.0577649585377708e-05, + "loss": 0.0537, + "step": 61135 + }, + { + "epoch": 2.85, + "learning_rate": 1.057686580032292e-05, + "loss": 0.0917, + "step": 61140 + }, + { + "epoch": 2.85, + "learning_rate": 1.0576082015268134e-05, + "loss": 0.0957, + "step": 61145 + }, + { + "epoch": 2.85, + "learning_rate": 1.0575298230213346e-05, + "loss": 0.0822, + "step": 61150 + }, + { + "epoch": 2.85, + "learning_rate": 1.0574514445158562e-05, + "loss": 0.1236, + "step": 61155 + }, + { + "epoch": 2.85, + "learning_rate": 1.0573730660103774e-05, + "loss": 0.1939, + "step": 61160 + }, + { + "epoch": 2.85, + "learning_rate": 1.0572946875048988e-05, + "loss": 0.1853, + "step": 61165 + }, + { + "epoch": 2.85, + "learning_rate": 1.05721630899942e-05, + "loss": 0.028, + "step": 61170 + }, + { + "epoch": 2.85, + "learning_rate": 1.0571379304939416e-05, + "loss": 0.0427, + "step": 61175 + }, + { + "epoch": 2.85, + "learning_rate": 1.0570595519884628e-05, + "loss": 0.0422, + "step": 61180 + }, + { + "epoch": 2.85, + "learning_rate": 1.056981173482984e-05, + "loss": 0.0487, + "step": 61185 + }, + { + "epoch": 2.86, + "learning_rate": 1.0569027949775054e-05, + "loss": 0.0449, + "step": 61190 + }, + { + "epoch": 2.86, + "learning_rate": 1.0568244164720266e-05, + "loss": 0.1407, + "step": 61195 + }, + { + "epoch": 2.86, + "learning_rate": 1.0567460379665482e-05, + "loss": 0.1246, + "step": 61200 + }, + { + "epoch": 2.86, + "learning_rate": 1.0566676594610694e-05, + "loss": 0.085, + "step": 61205 + }, + { + "epoch": 2.86, + "learning_rate": 1.0565892809555908e-05, + "loss": 0.2521, + "step": 61210 + }, + { + "epoch": 2.86, + "learning_rate": 1.056510902450112e-05, + "loss": 0.1783, + "step": 61215 + }, + { + "epoch": 2.86, + "learning_rate": 1.0564325239446336e-05, + "loss": 0.1145, + "step": 61220 + }, + { + "epoch": 2.86, + "learning_rate": 1.0563541454391548e-05, + "loss": 0.03, + "step": 61225 + }, + { + "epoch": 2.86, + "learning_rate": 1.0562757669336762e-05, + "loss": 0.0302, + "step": 61230 + }, + { + "epoch": 2.86, + "learning_rate": 1.0561973884281976e-05, + "loss": 0.0758, + "step": 61235 + }, + { + "epoch": 2.86, + "learning_rate": 1.056119009922719e-05, + "loss": 0.0925, + "step": 61240 + }, + { + "epoch": 2.86, + "learning_rate": 1.0560406314172402e-05, + "loss": 0.0622, + "step": 61245 + }, + { + "epoch": 2.86, + "learning_rate": 1.0559622529117614e-05, + "loss": 0.0614, + "step": 61250 + }, + { + "epoch": 2.86, + "learning_rate": 1.055883874406283e-05, + "loss": 0.1648, + "step": 61255 + }, + { + "epoch": 2.86, + "learning_rate": 1.0558054959008042e-05, + "loss": 0.2341, + "step": 61260 + }, + { + "epoch": 2.86, + "learning_rate": 1.0557271173953256e-05, + "loss": 0.269, + "step": 61265 + }, + { + "epoch": 2.86, + "learning_rate": 1.0556487388898468e-05, + "loss": 0.0684, + "step": 61270 + }, + { + "epoch": 2.86, + "learning_rate": 1.0555703603843684e-05, + "loss": 0.107, + "step": 61275 + }, + { + "epoch": 2.86, + "learning_rate": 1.0554919818788896e-05, + "loss": 0.0388, + "step": 61280 + }, + { + "epoch": 2.86, + "learning_rate": 1.055413603373411e-05, + "loss": 0.0633, + "step": 61285 + }, + { + "epoch": 2.86, + "learning_rate": 1.0553352248679322e-05, + "loss": 0.1063, + "step": 61290 + }, + { + "epoch": 2.86, + "learning_rate": 1.0552568463624538e-05, + "loss": 0.1456, + "step": 61295 + }, + { + "epoch": 2.86, + "learning_rate": 1.055178467856975e-05, + "loss": 0.1461, + "step": 61300 + }, + { + "epoch": 2.86, + "learning_rate": 1.0551000893514964e-05, + "loss": 0.1618, + "step": 61305 + }, + { + "epoch": 2.86, + "learning_rate": 1.0550217108460176e-05, + "loss": 0.2129, + "step": 61310 + }, + { + "epoch": 2.86, + "learning_rate": 1.054943332340539e-05, + "loss": 0.2407, + "step": 61315 + }, + { + "epoch": 2.86, + "learning_rate": 1.0548649538350604e-05, + "loss": 0.0771, + "step": 61320 + }, + { + "epoch": 2.86, + "learning_rate": 1.0547865753295816e-05, + "loss": 0.0186, + "step": 61325 + }, + { + "epoch": 2.86, + "learning_rate": 1.054708196824103e-05, + "loss": 0.0309, + "step": 61330 + }, + { + "epoch": 2.86, + "learning_rate": 1.0546298183186244e-05, + "loss": 0.1447, + "step": 61335 + }, + { + "epoch": 2.86, + "learning_rate": 1.0545514398131458e-05, + "loss": 0.0258, + "step": 61340 + }, + { + "epoch": 2.86, + "learning_rate": 1.054473061307667e-05, + "loss": 0.1747, + "step": 61345 + }, + { + "epoch": 2.86, + "learning_rate": 1.0543946828021886e-05, + "loss": 0.1391, + "step": 61350 + }, + { + "epoch": 2.86, + "learning_rate": 1.0543163042967098e-05, + "loss": 0.217, + "step": 61355 + }, + { + "epoch": 2.86, + "learning_rate": 1.0542379257912312e-05, + "loss": 0.2874, + "step": 61360 + }, + { + "epoch": 2.86, + "learning_rate": 1.0541595472857524e-05, + "loss": 0.2921, + "step": 61365 + }, + { + "epoch": 2.86, + "learning_rate": 1.054081168780274e-05, + "loss": 0.0604, + "step": 61370 + }, + { + "epoch": 2.86, + "learning_rate": 1.0540027902747952e-05, + "loss": 0.0352, + "step": 61375 + }, + { + "epoch": 2.86, + "learning_rate": 1.0539244117693164e-05, + "loss": 0.0296, + "step": 61380 + }, + { + "epoch": 2.86, + "learning_rate": 1.0538460332638378e-05, + "loss": 0.0616, + "step": 61385 + }, + { + "epoch": 2.86, + "learning_rate": 1.053767654758359e-05, + "loss": 0.094, + "step": 61390 + }, + { + "epoch": 2.86, + "learning_rate": 1.0536892762528806e-05, + "loss": 0.1211, + "step": 61395 + }, + { + "epoch": 2.87, + "learning_rate": 1.0536108977474018e-05, + "loss": 0.104, + "step": 61400 + }, + { + "epoch": 2.87, + "learning_rate": 1.0535325192419232e-05, + "loss": 0.154, + "step": 61405 + }, + { + "epoch": 2.87, + "learning_rate": 1.0534541407364444e-05, + "loss": 0.1931, + "step": 61410 + }, + { + "epoch": 2.87, + "learning_rate": 1.053375762230966e-05, + "loss": 0.1805, + "step": 61415 + }, + { + "epoch": 2.87, + "learning_rate": 1.0532973837254872e-05, + "loss": 0.0324, + "step": 61420 + }, + { + "epoch": 2.87, + "learning_rate": 1.0532190052200086e-05, + "loss": 0.0607, + "step": 61425 + }, + { + "epoch": 2.87, + "learning_rate": 1.0531406267145298e-05, + "loss": 0.0146, + "step": 61430 + }, + { + "epoch": 2.87, + "learning_rate": 1.0530622482090514e-05, + "loss": 0.0607, + "step": 61435 + }, + { + "epoch": 2.87, + "learning_rate": 1.0529838697035726e-05, + "loss": 0.0477, + "step": 61440 + }, + { + "epoch": 2.87, + "learning_rate": 1.0529054911980938e-05, + "loss": 0.0923, + "step": 61445 + }, + { + "epoch": 2.87, + "learning_rate": 1.0528271126926154e-05, + "loss": 0.0744, + "step": 61450 + }, + { + "epoch": 2.87, + "learning_rate": 1.0527487341871366e-05, + "loss": 0.1322, + "step": 61455 + }, + { + "epoch": 2.87, + "learning_rate": 1.052670355681658e-05, + "loss": 0.2138, + "step": 61460 + }, + { + "epoch": 2.87, + "learning_rate": 1.0525919771761792e-05, + "loss": 0.2974, + "step": 61465 + }, + { + "epoch": 2.87, + "learning_rate": 1.0525135986707008e-05, + "loss": 0.0482, + "step": 61470 + }, + { + "epoch": 2.87, + "learning_rate": 1.052435220165222e-05, + "loss": 0.0448, + "step": 61475 + }, + { + "epoch": 2.87, + "learning_rate": 1.0523568416597434e-05, + "loss": 0.05, + "step": 61480 + }, + { + "epoch": 2.87, + "learning_rate": 1.0522784631542646e-05, + "loss": 0.0529, + "step": 61485 + }, + { + "epoch": 2.87, + "learning_rate": 1.0522000846487861e-05, + "loss": 0.1533, + "step": 61490 + }, + { + "epoch": 2.87, + "learning_rate": 1.0521217061433074e-05, + "loss": 0.1484, + "step": 61495 + }, + { + "epoch": 2.87, + "learning_rate": 1.0520433276378288e-05, + "loss": 0.1626, + "step": 61500 + }, + { + "epoch": 2.87, + "learning_rate": 1.05196494913235e-05, + "loss": 0.1731, + "step": 61505 + }, + { + "epoch": 2.87, + "learning_rate": 1.0518865706268712e-05, + "loss": 0.33, + "step": 61510 + }, + { + "epoch": 2.87, + "learning_rate": 1.0518081921213928e-05, + "loss": 0.3039, + "step": 61515 + }, + { + "epoch": 2.87, + "learning_rate": 1.051729813615914e-05, + "loss": 0.01, + "step": 61520 + }, + { + "epoch": 2.87, + "learning_rate": 1.0516514351104354e-05, + "loss": 0.0286, + "step": 61525 + }, + { + "epoch": 2.87, + "learning_rate": 1.0515730566049568e-05, + "loss": 0.0298, + "step": 61530 + }, + { + "epoch": 2.87, + "learning_rate": 1.0514946780994782e-05, + "loss": 0.077, + "step": 61535 + }, + { + "epoch": 2.87, + "learning_rate": 1.0514162995939994e-05, + "loss": 0.1129, + "step": 61540 + }, + { + "epoch": 2.87, + "learning_rate": 1.0513379210885208e-05, + "loss": 0.0934, + "step": 61545 + }, + { + "epoch": 2.87, + "learning_rate": 1.0512595425830422e-05, + "loss": 0.1048, + "step": 61550 + }, + { + "epoch": 2.87, + "learning_rate": 1.0511811640775635e-05, + "loss": 0.1929, + "step": 61555 + }, + { + "epoch": 2.87, + "learning_rate": 1.0511027855720848e-05, + "loss": 0.281, + "step": 61560 + }, + { + "epoch": 2.87, + "learning_rate": 1.0510244070666063e-05, + "loss": 0.345, + "step": 61565 + }, + { + "epoch": 2.87, + "learning_rate": 1.0509460285611275e-05, + "loss": 0.0306, + "step": 61570 + }, + { + "epoch": 2.87, + "learning_rate": 1.0508676500556488e-05, + "loss": 0.0636, + "step": 61575 + }, + { + "epoch": 2.87, + "learning_rate": 1.0507892715501702e-05, + "loss": 0.0506, + "step": 61580 + }, + { + "epoch": 2.87, + "learning_rate": 1.0507108930446914e-05, + "loss": 0.112, + "step": 61585 + }, + { + "epoch": 2.87, + "learning_rate": 1.050632514539213e-05, + "loss": 0.0215, + "step": 61590 + }, + { + "epoch": 2.87, + "learning_rate": 1.0505541360337342e-05, + "loss": 0.0534, + "step": 61595 + }, + { + "epoch": 2.87, + "learning_rate": 1.0504757575282556e-05, + "loss": 0.0811, + "step": 61600 + }, + { + "epoch": 2.87, + "learning_rate": 1.0503973790227768e-05, + "loss": 0.1887, + "step": 61605 + }, + { + "epoch": 2.87, + "learning_rate": 1.0503190005172983e-05, + "loss": 0.2217, + "step": 61610 + }, + { + "epoch": 2.88, + "learning_rate": 1.0502406220118196e-05, + "loss": 0.3228, + "step": 61615 + }, + { + "epoch": 2.88, + "learning_rate": 1.050162243506341e-05, + "loss": 0.052, + "step": 61620 + }, + { + "epoch": 2.88, + "learning_rate": 1.0500838650008622e-05, + "loss": 0.0327, + "step": 61625 + }, + { + "epoch": 2.88, + "learning_rate": 1.0500054864953837e-05, + "loss": 0.0629, + "step": 61630 + }, + { + "epoch": 2.88, + "learning_rate": 1.049927107989905e-05, + "loss": 0.0539, + "step": 61635 + }, + { + "epoch": 2.88, + "learning_rate": 1.0498487294844262e-05, + "loss": 0.0616, + "step": 61640 + }, + { + "epoch": 2.88, + "learning_rate": 1.0497703509789476e-05, + "loss": 0.0863, + "step": 61645 + }, + { + "epoch": 2.88, + "learning_rate": 1.049691972473469e-05, + "loss": 0.1395, + "step": 61650 + }, + { + "epoch": 2.88, + "learning_rate": 1.0496135939679903e-05, + "loss": 0.0981, + "step": 61655 + }, + { + "epoch": 2.88, + "learning_rate": 1.0495352154625116e-05, + "loss": 0.2278, + "step": 61660 + }, + { + "epoch": 2.88, + "learning_rate": 1.0494568369570331e-05, + "loss": 0.2109, + "step": 61665 + }, + { + "epoch": 2.88, + "learning_rate": 1.0493784584515543e-05, + "loss": 0.0107, + "step": 61670 + }, + { + "epoch": 2.88, + "learning_rate": 1.0493000799460757e-05, + "loss": 0.085, + "step": 61675 + }, + { + "epoch": 2.88, + "learning_rate": 1.049221701440597e-05, + "loss": 0.0593, + "step": 61680 + }, + { + "epoch": 2.88, + "learning_rate": 1.0491433229351185e-05, + "loss": 0.1071, + "step": 61685 + }, + { + "epoch": 2.88, + "learning_rate": 1.0490649444296397e-05, + "loss": 0.0502, + "step": 61690 + }, + { + "epoch": 2.88, + "learning_rate": 1.0489865659241611e-05, + "loss": 0.0725, + "step": 61695 + }, + { + "epoch": 2.88, + "learning_rate": 1.0489081874186823e-05, + "loss": 0.1771, + "step": 61700 + }, + { + "epoch": 2.88, + "learning_rate": 1.0488298089132036e-05, + "loss": 0.1795, + "step": 61705 + }, + { + "epoch": 2.88, + "learning_rate": 1.0487514304077251e-05, + "loss": 0.2024, + "step": 61710 + }, + { + "epoch": 2.88, + "learning_rate": 1.0486730519022463e-05, + "loss": 0.3146, + "step": 61715 + }, + { + "epoch": 2.88, + "learning_rate": 1.0485946733967677e-05, + "loss": 0.0512, + "step": 61720 + }, + { + "epoch": 2.88, + "learning_rate": 1.048516294891289e-05, + "loss": 0.0375, + "step": 61725 + }, + { + "epoch": 2.88, + "learning_rate": 1.0484379163858105e-05, + "loss": 0.0448, + "step": 61730 + }, + { + "epoch": 2.88, + "learning_rate": 1.0483595378803317e-05, + "loss": 0.062, + "step": 61735 + }, + { + "epoch": 2.88, + "learning_rate": 1.0482811593748531e-05, + "loss": 0.1169, + "step": 61740 + }, + { + "epoch": 2.88, + "learning_rate": 1.0482027808693744e-05, + "loss": 0.0705, + "step": 61745 + }, + { + "epoch": 2.88, + "learning_rate": 1.0481244023638959e-05, + "loss": 0.0729, + "step": 61750 + }, + { + "epoch": 2.88, + "learning_rate": 1.0480460238584171e-05, + "loss": 0.1679, + "step": 61755 + }, + { + "epoch": 2.88, + "learning_rate": 1.0479676453529385e-05, + "loss": 0.188, + "step": 61760 + }, + { + "epoch": 2.88, + "learning_rate": 1.04788926684746e-05, + "loss": 0.2839, + "step": 61765 + }, + { + "epoch": 2.88, + "learning_rate": 1.0478108883419811e-05, + "loss": 0.0484, + "step": 61770 + }, + { + "epoch": 2.88, + "learning_rate": 1.0477325098365025e-05, + "loss": 0.0209, + "step": 61775 + }, + { + "epoch": 2.88, + "learning_rate": 1.0476541313310237e-05, + "loss": 0.072, + "step": 61780 + }, + { + "epoch": 2.88, + "learning_rate": 1.0475757528255453e-05, + "loss": 0.043, + "step": 61785 + }, + { + "epoch": 2.88, + "learning_rate": 1.0474973743200665e-05, + "loss": 0.1533, + "step": 61790 + }, + { + "epoch": 2.88, + "learning_rate": 1.047418995814588e-05, + "loss": 0.062, + "step": 61795 + }, + { + "epoch": 2.88, + "learning_rate": 1.0473406173091091e-05, + "loss": 0.0718, + "step": 61800 + }, + { + "epoch": 2.88, + "learning_rate": 1.0472622388036307e-05, + "loss": 0.1603, + "step": 61805 + }, + { + "epoch": 2.88, + "learning_rate": 1.047183860298152e-05, + "loss": 0.2909, + "step": 61810 + }, + { + "epoch": 2.88, + "learning_rate": 1.0471054817926733e-05, + "loss": 0.182, + "step": 61815 + }, + { + "epoch": 2.88, + "learning_rate": 1.0470271032871945e-05, + "loss": 0.0543, + "step": 61820 + }, + { + "epoch": 2.88, + "learning_rate": 1.0469487247817161e-05, + "loss": 0.039, + "step": 61825 + }, + { + "epoch": 2.89, + "learning_rate": 1.0468703462762373e-05, + "loss": 0.0647, + "step": 61830 + }, + { + "epoch": 2.89, + "learning_rate": 1.0467919677707585e-05, + "loss": 0.0652, + "step": 61835 + }, + { + "epoch": 2.89, + "learning_rate": 1.04671358926528e-05, + "loss": 0.0565, + "step": 61840 + }, + { + "epoch": 2.89, + "learning_rate": 1.0466352107598013e-05, + "loss": 0.1336, + "step": 61845 + }, + { + "epoch": 2.89, + "learning_rate": 1.0465568322543227e-05, + "loss": 0.1115, + "step": 61850 + }, + { + "epoch": 2.89, + "learning_rate": 1.046478453748844e-05, + "loss": 0.1073, + "step": 61855 + }, + { + "epoch": 2.89, + "learning_rate": 1.046415750944461e-05, + "loss": 0.2446, + "step": 61860 + }, + { + "epoch": 2.89, + "learning_rate": 1.046353048140078e-05, + "loss": 0.3299, + "step": 61865 + }, + { + "epoch": 2.89, + "learning_rate": 1.0462746696345996e-05, + "loss": 0.0518, + "step": 61870 + }, + { + "epoch": 2.89, + "learning_rate": 1.0461962911291208e-05, + "loss": 0.0517, + "step": 61875 + }, + { + "epoch": 2.89, + "learning_rate": 1.0461179126236422e-05, + "loss": 0.0442, + "step": 61880 + }, + { + "epoch": 2.89, + "learning_rate": 1.0460395341181636e-05, + "loss": 0.0964, + "step": 61885 + }, + { + "epoch": 2.89, + "learning_rate": 1.045961155612685e-05, + "loss": 0.0607, + "step": 61890 + }, + { + "epoch": 2.89, + "learning_rate": 1.0458827771072062e-05, + "loss": 0.1274, + "step": 61895 + }, + { + "epoch": 2.89, + "learning_rate": 1.0458043986017274e-05, + "loss": 0.2033, + "step": 61900 + }, + { + "epoch": 2.89, + "learning_rate": 1.045726020096249e-05, + "loss": 0.2348, + "step": 61905 + }, + { + "epoch": 2.89, + "learning_rate": 1.0456476415907702e-05, + "loss": 0.1433, + "step": 61910 + }, + { + "epoch": 2.89, + "learning_rate": 1.0455692630852916e-05, + "loss": 0.2228, + "step": 61915 + }, + { + "epoch": 2.89, + "learning_rate": 1.0454908845798128e-05, + "loss": 0.0411, + "step": 61920 + }, + { + "epoch": 2.89, + "learning_rate": 1.0454125060743344e-05, + "loss": 0.0441, + "step": 61925 + }, + { + "epoch": 2.89, + "learning_rate": 1.0453341275688556e-05, + "loss": 0.0757, + "step": 61930 + }, + { + "epoch": 2.89, + "learning_rate": 1.045255749063377e-05, + "loss": 0.0323, + "step": 61935 + }, + { + "epoch": 2.89, + "learning_rate": 1.0451773705578982e-05, + "loss": 0.0426, + "step": 61940 + }, + { + "epoch": 2.89, + "learning_rate": 1.0450989920524198e-05, + "loss": 0.0863, + "step": 61945 + }, + { + "epoch": 2.89, + "learning_rate": 1.045020613546941e-05, + "loss": 0.184, + "step": 61950 + }, + { + "epoch": 2.89, + "learning_rate": 1.0449422350414624e-05, + "loss": 0.162, + "step": 61955 + }, + { + "epoch": 2.89, + "learning_rate": 1.0448638565359836e-05, + "loss": 0.1879, + "step": 61960 + }, + { + "epoch": 2.89, + "learning_rate": 1.0447854780305048e-05, + "loss": 0.2687, + "step": 61965 + }, + { + "epoch": 2.89, + "learning_rate": 1.0447070995250264e-05, + "loss": 0.0468, + "step": 61970 + }, + { + "epoch": 2.89, + "learning_rate": 1.0446287210195476e-05, + "loss": 0.0404, + "step": 61975 + }, + { + "epoch": 2.89, + "learning_rate": 1.044550342514069e-05, + "loss": 0.0753, + "step": 61980 + }, + { + "epoch": 2.89, + "learning_rate": 1.0444719640085904e-05, + "loss": 0.0453, + "step": 61985 + }, + { + "epoch": 2.89, + "learning_rate": 1.0443935855031118e-05, + "loss": 0.1476, + "step": 61990 + }, + { + "epoch": 2.89, + "learning_rate": 1.044315206997633e-05, + "loss": 0.0593, + "step": 61995 + }, + { + "epoch": 2.89, + "learning_rate": 1.0442368284921545e-05, + "loss": 0.1595, + "step": 62000 + }, + { + "epoch": 2.89, + "learning_rate": 1.0441584499866758e-05, + "loss": 0.1356, + "step": 62005 + }, + { + "epoch": 2.89, + "learning_rate": 1.0440800714811972e-05, + "loss": 0.2253, + "step": 62010 + }, + { + "epoch": 2.89, + "learning_rate": 1.0440016929757184e-05, + "loss": 0.2739, + "step": 62015 + }, + { + "epoch": 2.89, + "learning_rate": 1.04392331447024e-05, + "loss": 0.0676, + "step": 62020 + }, + { + "epoch": 2.89, + "learning_rate": 1.0438449359647612e-05, + "loss": 0.076, + "step": 62025 + }, + { + "epoch": 2.89, + "learning_rate": 1.0437665574592824e-05, + "loss": 0.0512, + "step": 62030 + }, + { + "epoch": 2.89, + "learning_rate": 1.0436881789538038e-05, + "loss": 0.0411, + "step": 62035 + }, + { + "epoch": 2.89, + "learning_rate": 1.043609800448325e-05, + "loss": 0.2422, + "step": 62040 + }, + { + "epoch": 2.9, + "learning_rate": 1.0435314219428466e-05, + "loss": 0.1052, + "step": 62045 + }, + { + "epoch": 2.9, + "learning_rate": 1.0434530434373678e-05, + "loss": 0.1557, + "step": 62050 + }, + { + "epoch": 2.9, + "learning_rate": 1.0433746649318892e-05, + "loss": 0.217, + "step": 62055 + }, + { + "epoch": 2.9, + "learning_rate": 1.0432962864264104e-05, + "loss": 0.1938, + "step": 62060 + }, + { + "epoch": 2.9, + "learning_rate": 1.043217907920932e-05, + "loss": 0.1747, + "step": 62065 + }, + { + "epoch": 2.9, + "learning_rate": 1.0431395294154532e-05, + "loss": 0.0406, + "step": 62070 + }, + { + "epoch": 2.9, + "learning_rate": 1.0430611509099746e-05, + "loss": 0.0092, + "step": 62075 + }, + { + "epoch": 2.9, + "learning_rate": 1.0429827724044958e-05, + "loss": 0.0618, + "step": 62080 + }, + { + "epoch": 2.9, + "learning_rate": 1.0429043938990173e-05, + "loss": 0.0918, + "step": 62085 + }, + { + "epoch": 2.9, + "learning_rate": 1.0428260153935386e-05, + "loss": 0.1263, + "step": 62090 + }, + { + "epoch": 2.9, + "learning_rate": 1.0427476368880598e-05, + "loss": 0.1332, + "step": 62095 + }, + { + "epoch": 2.9, + "learning_rate": 1.0426692583825813e-05, + "loss": 0.1137, + "step": 62100 + }, + { + "epoch": 2.9, + "learning_rate": 1.0425908798771026e-05, + "loss": 0.0945, + "step": 62105 + }, + { + "epoch": 2.9, + "learning_rate": 1.042512501371624e-05, + "loss": 0.1955, + "step": 62110 + }, + { + "epoch": 2.9, + "learning_rate": 1.0424341228661452e-05, + "loss": 0.226, + "step": 62115 + }, + { + "epoch": 2.9, + "learning_rate": 1.0423557443606667e-05, + "loss": 0.0538, + "step": 62120 + }, + { + "epoch": 2.9, + "learning_rate": 1.042277365855188e-05, + "loss": 0.0342, + "step": 62125 + }, + { + "epoch": 2.9, + "learning_rate": 1.0421989873497093e-05, + "loss": 0.0623, + "step": 62130 + }, + { + "epoch": 2.9, + "learning_rate": 1.0421206088442306e-05, + "loss": 0.0489, + "step": 62135 + }, + { + "epoch": 2.9, + "learning_rate": 1.0420422303387521e-05, + "loss": 0.108, + "step": 62140 + }, + { + "epoch": 2.9, + "learning_rate": 1.0419638518332734e-05, + "loss": 0.0872, + "step": 62145 + }, + { + "epoch": 2.9, + "learning_rate": 1.0418854733277947e-05, + "loss": 0.0748, + "step": 62150 + }, + { + "epoch": 2.9, + "learning_rate": 1.041807094822316e-05, + "loss": 0.1508, + "step": 62155 + }, + { + "epoch": 2.9, + "learning_rate": 1.0417287163168372e-05, + "loss": 0.2324, + "step": 62160 + }, + { + "epoch": 2.9, + "learning_rate": 1.0416503378113587e-05, + "loss": 0.3346, + "step": 62165 + }, + { + "epoch": 2.9, + "learning_rate": 1.04157195930588e-05, + "loss": 0.0784, + "step": 62170 + }, + { + "epoch": 2.9, + "learning_rate": 1.0414935808004014e-05, + "loss": 0.0446, + "step": 62175 + }, + { + "epoch": 2.9, + "learning_rate": 1.0414152022949226e-05, + "loss": 0.0434, + "step": 62180 + }, + { + "epoch": 2.9, + "learning_rate": 1.0413368237894441e-05, + "loss": 0.0521, + "step": 62185 + }, + { + "epoch": 2.9, + "learning_rate": 1.0412584452839654e-05, + "loss": 0.0605, + "step": 62190 + }, + { + "epoch": 2.9, + "learning_rate": 1.0411800667784867e-05, + "loss": 0.1037, + "step": 62195 + }, + { + "epoch": 2.9, + "learning_rate": 1.0411016882730081e-05, + "loss": 0.0748, + "step": 62200 + }, + { + "epoch": 2.9, + "learning_rate": 1.0410233097675295e-05, + "loss": 0.2214, + "step": 62205 + }, + { + "epoch": 2.9, + "learning_rate": 1.0409449312620508e-05, + "loss": 0.1377, + "step": 62210 + }, + { + "epoch": 2.9, + "learning_rate": 1.0408665527565721e-05, + "loss": 0.2861, + "step": 62215 + }, + { + "epoch": 2.9, + "learning_rate": 1.0407881742510935e-05, + "loss": 0.0281, + "step": 62220 + }, + { + "epoch": 2.9, + "learning_rate": 1.0407097957456148e-05, + "loss": 0.0211, + "step": 62225 + }, + { + "epoch": 2.9, + "learning_rate": 1.0406314172401361e-05, + "loss": 0.0767, + "step": 62230 + }, + { + "epoch": 2.9, + "learning_rate": 1.0405530387346574e-05, + "loss": 0.0752, + "step": 62235 + }, + { + "epoch": 2.9, + "learning_rate": 1.040474660229179e-05, + "loss": 0.0622, + "step": 62240 + }, + { + "epoch": 2.9, + "learning_rate": 1.0403962817237001e-05, + "loss": 0.0847, + "step": 62245 + }, + { + "epoch": 2.9, + "learning_rate": 1.0403179032182215e-05, + "loss": 0.0918, + "step": 62250 + }, + { + "epoch": 2.9, + "learning_rate": 1.0402395247127428e-05, + "loss": 0.0615, + "step": 62255 + }, + { + "epoch": 2.91, + "learning_rate": 1.0401611462072643e-05, + "loss": 0.1679, + "step": 62260 + }, + { + "epoch": 2.91, + "learning_rate": 1.0400827677017855e-05, + "loss": 0.3511, + "step": 62265 + }, + { + "epoch": 2.91, + "learning_rate": 1.040004389196307e-05, + "loss": 0.0629, + "step": 62270 + }, + { + "epoch": 2.91, + "learning_rate": 1.0399260106908282e-05, + "loss": 0.0183, + "step": 62275 + }, + { + "epoch": 2.91, + "learning_rate": 1.0398476321853497e-05, + "loss": 0.0467, + "step": 62280 + }, + { + "epoch": 2.91, + "learning_rate": 1.039769253679871e-05, + "loss": 0.0157, + "step": 62285 + }, + { + "epoch": 2.91, + "learning_rate": 1.0396908751743922e-05, + "loss": 0.0978, + "step": 62290 + }, + { + "epoch": 2.91, + "learning_rate": 1.0396124966689135e-05, + "loss": 0.0457, + "step": 62295 + }, + { + "epoch": 2.91, + "learning_rate": 1.039534118163435e-05, + "loss": 0.1463, + "step": 62300 + }, + { + "epoch": 2.91, + "learning_rate": 1.0394557396579563e-05, + "loss": 0.1993, + "step": 62305 + }, + { + "epoch": 2.91, + "learning_rate": 1.0393773611524775e-05, + "loss": 0.1728, + "step": 62310 + }, + { + "epoch": 2.91, + "learning_rate": 1.0392989826469991e-05, + "loss": 0.163, + "step": 62315 + }, + { + "epoch": 2.91, + "learning_rate": 1.0392206041415203e-05, + "loss": 0.0391, + "step": 62320 + }, + { + "epoch": 2.91, + "learning_rate": 1.0391422256360417e-05, + "loss": 0.054, + "step": 62325 + }, + { + "epoch": 2.91, + "learning_rate": 1.039063847130563e-05, + "loss": 0.0313, + "step": 62330 + }, + { + "epoch": 2.91, + "learning_rate": 1.0389854686250845e-05, + "loss": 0.0868, + "step": 62335 + }, + { + "epoch": 2.91, + "learning_rate": 1.0389070901196057e-05, + "loss": 0.0782, + "step": 62340 + }, + { + "epoch": 2.91, + "learning_rate": 1.0388287116141271e-05, + "loss": 0.0603, + "step": 62345 + }, + { + "epoch": 2.91, + "learning_rate": 1.0387503331086483e-05, + "loss": 0.1835, + "step": 62350 + }, + { + "epoch": 2.91, + "learning_rate": 1.0386719546031696e-05, + "loss": 0.1227, + "step": 62355 + }, + { + "epoch": 2.91, + "learning_rate": 1.0385935760976911e-05, + "loss": 0.4369, + "step": 62360 + }, + { + "epoch": 2.91, + "learning_rate": 1.0385151975922123e-05, + "loss": 0.3164, + "step": 62365 + }, + { + "epoch": 2.91, + "learning_rate": 1.0384368190867337e-05, + "loss": 0.0913, + "step": 62370 + }, + { + "epoch": 2.91, + "learning_rate": 1.038358440581255e-05, + "loss": 0.0212, + "step": 62375 + }, + { + "epoch": 2.91, + "learning_rate": 1.0382800620757765e-05, + "loss": 0.0354, + "step": 62380 + }, + { + "epoch": 2.91, + "learning_rate": 1.0382016835702977e-05, + "loss": 0.0694, + "step": 62385 + }, + { + "epoch": 2.91, + "learning_rate": 1.0381233050648191e-05, + "loss": 0.1128, + "step": 62390 + }, + { + "epoch": 2.91, + "learning_rate": 1.0380449265593403e-05, + "loss": 0.1718, + "step": 62395 + }, + { + "epoch": 2.91, + "learning_rate": 1.0379665480538619e-05, + "loss": 0.1168, + "step": 62400 + }, + { + "epoch": 2.91, + "learning_rate": 1.0378881695483831e-05, + "loss": 0.2419, + "step": 62405 + }, + { + "epoch": 2.91, + "learning_rate": 1.0378097910429045e-05, + "loss": 0.2319, + "step": 62410 + }, + { + "epoch": 2.91, + "learning_rate": 1.0377314125374259e-05, + "loss": 0.261, + "step": 62415 + }, + { + "epoch": 2.91, + "learning_rate": 1.0376530340319471e-05, + "loss": 0.0797, + "step": 62420 + }, + { + "epoch": 2.91, + "learning_rate": 1.0375746555264685e-05, + "loss": 0.0227, + "step": 62425 + }, + { + "epoch": 2.91, + "learning_rate": 1.0374962770209897e-05, + "loss": 0.0566, + "step": 62430 + }, + { + "epoch": 2.91, + "learning_rate": 1.0374178985155113e-05, + "loss": 0.1145, + "step": 62435 + }, + { + "epoch": 2.91, + "learning_rate": 1.0373395200100325e-05, + "loss": 0.0866, + "step": 62440 + }, + { + "epoch": 2.91, + "learning_rate": 1.0372611415045539e-05, + "loss": 0.0935, + "step": 62445 + }, + { + "epoch": 2.91, + "learning_rate": 1.0371827629990751e-05, + "loss": 0.1222, + "step": 62450 + }, + { + "epoch": 2.91, + "learning_rate": 1.0371043844935967e-05, + "loss": 0.1236, + "step": 62455 + }, + { + "epoch": 2.91, + "learning_rate": 1.0370260059881179e-05, + "loss": 0.2584, + "step": 62460 + }, + { + "epoch": 2.91, + "learning_rate": 1.0369476274826393e-05, + "loss": 0.3033, + "step": 62465 + }, + { + "epoch": 2.91, + "learning_rate": 1.0368692489771605e-05, + "loss": 0.0242, + "step": 62470 + }, + { + "epoch": 2.92, + "learning_rate": 1.036790870471682e-05, + "loss": 0.036, + "step": 62475 + }, + { + "epoch": 2.92, + "learning_rate": 1.0367124919662033e-05, + "loss": 0.0251, + "step": 62480 + }, + { + "epoch": 2.92, + "learning_rate": 1.0366341134607245e-05, + "loss": 0.0294, + "step": 62485 + }, + { + "epoch": 2.92, + "learning_rate": 1.0365557349552459e-05, + "loss": 0.0555, + "step": 62490 + }, + { + "epoch": 2.92, + "learning_rate": 1.0364773564497671e-05, + "loss": 0.0958, + "step": 62495 + }, + { + "epoch": 2.92, + "learning_rate": 1.0363989779442887e-05, + "loss": 0.1204, + "step": 62500 + }, + { + "epoch": 2.92, + "learning_rate": 1.0363205994388099e-05, + "loss": 0.0712, + "step": 62505 + }, + { + "epoch": 2.92, + "learning_rate": 1.0362422209333313e-05, + "loss": 0.1334, + "step": 62510 + }, + { + "epoch": 2.92, + "learning_rate": 1.0361638424278527e-05, + "loss": 0.2207, + "step": 62515 + }, + { + "epoch": 2.92, + "learning_rate": 1.0360854639223741e-05, + "loss": 0.0133, + "step": 62520 + }, + { + "epoch": 2.92, + "learning_rate": 1.0360070854168953e-05, + "loss": 0.0209, + "step": 62525 + }, + { + "epoch": 2.92, + "learning_rate": 1.0359287069114167e-05, + "loss": 0.0494, + "step": 62530 + }, + { + "epoch": 2.92, + "learning_rate": 1.0358503284059381e-05, + "loss": 0.0147, + "step": 62535 + }, + { + "epoch": 2.92, + "learning_rate": 1.0357719499004595e-05, + "loss": 0.1232, + "step": 62540 + }, + { + "epoch": 2.92, + "learning_rate": 1.0356935713949807e-05, + "loss": 0.0512, + "step": 62545 + }, + { + "epoch": 2.92, + "learning_rate": 1.035615192889502e-05, + "loss": 0.2343, + "step": 62550 + }, + { + "epoch": 2.92, + "learning_rate": 1.0355368143840235e-05, + "loss": 0.1384, + "step": 62555 + }, + { + "epoch": 2.92, + "learning_rate": 1.0354584358785447e-05, + "loss": 0.2172, + "step": 62560 + }, + { + "epoch": 2.92, + "learning_rate": 1.0353800573730661e-05, + "loss": 0.2982, + "step": 62565 + }, + { + "epoch": 2.92, + "learning_rate": 1.0353016788675873e-05, + "loss": 0.0251, + "step": 62570 + }, + { + "epoch": 2.92, + "learning_rate": 1.0352233003621089e-05, + "loss": 0.1112, + "step": 62575 + }, + { + "epoch": 2.92, + "learning_rate": 1.0351449218566301e-05, + "loss": 0.0654, + "step": 62580 + }, + { + "epoch": 2.92, + "learning_rate": 1.0350665433511515e-05, + "loss": 0.0838, + "step": 62585 + }, + { + "epoch": 2.92, + "learning_rate": 1.0349881648456727e-05, + "loss": 0.1541, + "step": 62590 + }, + { + "epoch": 2.92, + "learning_rate": 1.0349097863401943e-05, + "loss": 0.0852, + "step": 62595 + }, + { + "epoch": 2.92, + "learning_rate": 1.0348314078347155e-05, + "loss": 0.2073, + "step": 62600 + }, + { + "epoch": 2.92, + "learning_rate": 1.0347530293292369e-05, + "loss": 0.1479, + "step": 62605 + }, + { + "epoch": 2.92, + "learning_rate": 1.0346746508237581e-05, + "loss": 0.2269, + "step": 62610 + }, + { + "epoch": 2.92, + "learning_rate": 1.0345962723182795e-05, + "loss": 0.1799, + "step": 62615 + }, + { + "epoch": 2.92, + "learning_rate": 1.0345178938128009e-05, + "loss": 0.0507, + "step": 62620 + }, + { + "epoch": 2.92, + "learning_rate": 1.0344395153073221e-05, + "loss": 0.0694, + "step": 62625 + }, + { + "epoch": 2.92, + "learning_rate": 1.0343611368018437e-05, + "loss": 0.0565, + "step": 62630 + }, + { + "epoch": 2.92, + "learning_rate": 1.0342827582963649e-05, + "loss": 0.0774, + "step": 62635 + }, + { + "epoch": 2.92, + "learning_rate": 1.0342043797908863e-05, + "loss": 0.1522, + "step": 62640 + }, + { + "epoch": 2.92, + "learning_rate": 1.0341260012854075e-05, + "loss": 0.1715, + "step": 62645 + }, + { + "epoch": 2.92, + "learning_rate": 1.034047622779929e-05, + "loss": 0.1394, + "step": 62650 + }, + { + "epoch": 2.92, + "learning_rate": 1.0339692442744503e-05, + "loss": 0.2182, + "step": 62655 + }, + { + "epoch": 2.92, + "learning_rate": 1.0338908657689717e-05, + "loss": 0.3029, + "step": 62660 + }, + { + "epoch": 2.92, + "learning_rate": 1.0338124872634929e-05, + "loss": 0.2364, + "step": 62665 + }, + { + "epoch": 2.92, + "learning_rate": 1.0337341087580144e-05, + "loss": 0.036, + "step": 62670 + }, + { + "epoch": 2.92, + "learning_rate": 1.0336557302525357e-05, + "loss": 0.0416, + "step": 62675 + }, + { + "epoch": 2.92, + "learning_rate": 1.0335773517470569e-05, + "loss": 0.0485, + "step": 62680 + }, + { + "epoch": 2.92, + "learning_rate": 1.0334989732415783e-05, + "loss": 0.0772, + "step": 62685 + }, + { + "epoch": 2.93, + "learning_rate": 1.0334205947360995e-05, + "loss": 0.0545, + "step": 62690 + }, + { + "epoch": 2.93, + "learning_rate": 1.033342216230621e-05, + "loss": 0.0758, + "step": 62695 + }, + { + "epoch": 2.93, + "learning_rate": 1.0332638377251423e-05, + "loss": 0.1142, + "step": 62700 + }, + { + "epoch": 2.93, + "learning_rate": 1.0331854592196637e-05, + "loss": 0.1031, + "step": 62705 + }, + { + "epoch": 2.93, + "learning_rate": 1.0331070807141849e-05, + "loss": 0.3682, + "step": 62710 + }, + { + "epoch": 2.93, + "learning_rate": 1.0330287022087065e-05, + "loss": 0.3997, + "step": 62715 + }, + { + "epoch": 2.93, + "learning_rate": 1.0329503237032277e-05, + "loss": 0.052, + "step": 62720 + }, + { + "epoch": 2.93, + "learning_rate": 1.032871945197749e-05, + "loss": 0.0213, + "step": 62725 + }, + { + "epoch": 2.93, + "learning_rate": 1.0327935666922705e-05, + "loss": 0.0501, + "step": 62730 + }, + { + "epoch": 2.93, + "learning_rate": 1.0327151881867918e-05, + "loss": 0.0711, + "step": 62735 + }, + { + "epoch": 2.93, + "learning_rate": 1.032636809681313e-05, + "loss": 0.1498, + "step": 62740 + }, + { + "epoch": 2.93, + "learning_rate": 1.0325584311758343e-05, + "loss": 0.1179, + "step": 62745 + }, + { + "epoch": 2.93, + "learning_rate": 1.0324800526703559e-05, + "loss": 0.1803, + "step": 62750 + }, + { + "epoch": 2.93, + "learning_rate": 1.032401674164877e-05, + "loss": 0.1619, + "step": 62755 + }, + { + "epoch": 2.93, + "learning_rate": 1.0323232956593985e-05, + "loss": 0.2089, + "step": 62760 + }, + { + "epoch": 2.93, + "learning_rate": 1.0322449171539197e-05, + "loss": 0.4382, + "step": 62765 + }, + { + "epoch": 2.93, + "learning_rate": 1.0321665386484412e-05, + "loss": 0.0535, + "step": 62770 + }, + { + "epoch": 2.93, + "learning_rate": 1.0320881601429625e-05, + "loss": 0.029, + "step": 62775 + }, + { + "epoch": 2.93, + "learning_rate": 1.0320097816374839e-05, + "loss": 0.0429, + "step": 62780 + }, + { + "epoch": 2.93, + "learning_rate": 1.031931403132005e-05, + "loss": 0.0829, + "step": 62785 + }, + { + "epoch": 2.93, + "learning_rate": 1.0318530246265266e-05, + "loss": 0.0953, + "step": 62790 + }, + { + "epoch": 2.93, + "learning_rate": 1.0317746461210479e-05, + "loss": 0.1233, + "step": 62795 + }, + { + "epoch": 2.93, + "learning_rate": 1.0316962676155692e-05, + "loss": 0.0843, + "step": 62800 + }, + { + "epoch": 2.93, + "learning_rate": 1.0316178891100905e-05, + "loss": 0.1054, + "step": 62805 + }, + { + "epoch": 2.93, + "learning_rate": 1.0315395106046117e-05, + "loss": 0.277, + "step": 62810 + }, + { + "epoch": 2.93, + "learning_rate": 1.0314611320991333e-05, + "loss": 0.4017, + "step": 62815 + }, + { + "epoch": 2.93, + "learning_rate": 1.0313827535936545e-05, + "loss": 0.031, + "step": 62820 + }, + { + "epoch": 2.93, + "learning_rate": 1.0313043750881759e-05, + "loss": 0.0184, + "step": 62825 + }, + { + "epoch": 2.93, + "learning_rate": 1.0312259965826973e-05, + "loss": 0.1057, + "step": 62830 + }, + { + "epoch": 2.93, + "learning_rate": 1.0311476180772186e-05, + "loss": 0.1594, + "step": 62835 + }, + { + "epoch": 2.93, + "learning_rate": 1.0310692395717399e-05, + "loss": 0.1388, + "step": 62840 + }, + { + "epoch": 2.93, + "learning_rate": 1.0309908610662613e-05, + "loss": 0.0698, + "step": 62845 + }, + { + "epoch": 2.93, + "learning_rate": 1.0309124825607826e-05, + "loss": 0.2027, + "step": 62850 + }, + { + "epoch": 2.93, + "learning_rate": 1.030834104055304e-05, + "loss": 0.0898, + "step": 62855 + }, + { + "epoch": 2.93, + "learning_rate": 1.0307557255498253e-05, + "loss": 0.2479, + "step": 62860 + }, + { + "epoch": 2.93, + "learning_rate": 1.0306773470443468e-05, + "loss": 0.2501, + "step": 62865 + }, + { + "epoch": 2.93, + "learning_rate": 1.030598968538868e-05, + "loss": 0.0262, + "step": 62870 + }, + { + "epoch": 2.93, + "learning_rate": 1.0305205900333893e-05, + "loss": 0.0397, + "step": 62875 + }, + { + "epoch": 2.93, + "learning_rate": 1.0304422115279107e-05, + "loss": 0.074, + "step": 62880 + }, + { + "epoch": 2.93, + "learning_rate": 1.0303638330224319e-05, + "loss": 0.0701, + "step": 62885 + }, + { + "epoch": 2.93, + "learning_rate": 1.0302854545169534e-05, + "loss": 0.0451, + "step": 62890 + }, + { + "epoch": 2.93, + "learning_rate": 1.0302070760114747e-05, + "loss": 0.1437, + "step": 62895 + }, + { + "epoch": 2.94, + "learning_rate": 1.030128697505996e-05, + "loss": 0.0555, + "step": 62900 + }, + { + "epoch": 2.94, + "learning_rate": 1.0300503190005173e-05, + "loss": 0.1305, + "step": 62905 + }, + { + "epoch": 2.94, + "learning_rate": 1.0299719404950388e-05, + "loss": 0.2862, + "step": 62910 + }, + { + "epoch": 2.94, + "learning_rate": 1.02989356198956e-05, + "loss": 0.2047, + "step": 62915 + }, + { + "epoch": 2.94, + "learning_rate": 1.0298151834840814e-05, + "loss": 0.0395, + "step": 62920 + }, + { + "epoch": 2.94, + "learning_rate": 1.0297368049786027e-05, + "loss": 0.0727, + "step": 62925 + }, + { + "epoch": 2.94, + "learning_rate": 1.0296584264731242e-05, + "loss": 0.0484, + "step": 62930 + }, + { + "epoch": 2.94, + "learning_rate": 1.0295800479676454e-05, + "loss": 0.0634, + "step": 62935 + }, + { + "epoch": 2.94, + "learning_rate": 1.0295016694621667e-05, + "loss": 0.1216, + "step": 62940 + }, + { + "epoch": 2.94, + "learning_rate": 1.0294232909566882e-05, + "loss": 0.0631, + "step": 62945 + }, + { + "epoch": 2.94, + "learning_rate": 1.0293449124512094e-05, + "loss": 0.0701, + "step": 62950 + }, + { + "epoch": 2.94, + "learning_rate": 1.0292665339457308e-05, + "loss": 0.1508, + "step": 62955 + }, + { + "epoch": 2.94, + "learning_rate": 1.029188155440252e-05, + "loss": 0.1583, + "step": 62960 + }, + { + "epoch": 2.94, + "learning_rate": 1.0291097769347736e-05, + "loss": 0.2747, + "step": 62965 + }, + { + "epoch": 2.94, + "learning_rate": 1.0290313984292948e-05, + "loss": 0.027, + "step": 62970 + }, + { + "epoch": 2.94, + "learning_rate": 1.0289530199238162e-05, + "loss": 0.0262, + "step": 62975 + }, + { + "epoch": 2.94, + "learning_rate": 1.0288746414183374e-05, + "loss": 0.0405, + "step": 62980 + }, + { + "epoch": 2.94, + "learning_rate": 1.028796262912859e-05, + "loss": 0.0914, + "step": 62985 + }, + { + "epoch": 2.94, + "learning_rate": 1.0287178844073802e-05, + "loss": 0.0421, + "step": 62990 + }, + { + "epoch": 2.94, + "learning_rate": 1.0286395059019016e-05, + "loss": 0.1202, + "step": 62995 + }, + { + "epoch": 2.94, + "learning_rate": 1.0285611273964228e-05, + "loss": 0.1285, + "step": 63000 + }, + { + "epoch": 2.94, + "learning_rate": 1.028482748890944e-05, + "loss": 0.169, + "step": 63005 + }, + { + "epoch": 2.94, + "learning_rate": 1.0284043703854656e-05, + "loss": 0.1355, + "step": 63010 + }, + { + "epoch": 2.94, + "learning_rate": 1.0283259918799868e-05, + "loss": 0.2761, + "step": 63015 + }, + { + "epoch": 2.94, + "learning_rate": 1.0282476133745082e-05, + "loss": 0.089, + "step": 63020 + }, + { + "epoch": 2.94, + "learning_rate": 1.0281692348690295e-05, + "loss": 0.0571, + "step": 63025 + }, + { + "epoch": 2.94, + "learning_rate": 1.028090856363551e-05, + "loss": 0.0232, + "step": 63030 + }, + { + "epoch": 2.94, + "learning_rate": 1.0280124778580722e-05, + "loss": 0.0644, + "step": 63035 + }, + { + "epoch": 2.94, + "learning_rate": 1.0279340993525936e-05, + "loss": 0.0646, + "step": 63040 + }, + { + "epoch": 2.94, + "learning_rate": 1.027855720847115e-05, + "loss": 0.1479, + "step": 63045 + }, + { + "epoch": 2.94, + "learning_rate": 1.0277773423416364e-05, + "loss": 0.1911, + "step": 63050 + }, + { + "epoch": 2.94, + "learning_rate": 1.0276989638361576e-05, + "loss": 0.1927, + "step": 63055 + }, + { + "epoch": 2.94, + "learning_rate": 1.027620585330679e-05, + "loss": 0.1748, + "step": 63060 + }, + { + "epoch": 2.94, + "learning_rate": 1.0275422068252004e-05, + "loss": 0.2667, + "step": 63065 + }, + { + "epoch": 2.94, + "learning_rate": 1.0274638283197216e-05, + "loss": 0.0506, + "step": 63070 + }, + { + "epoch": 2.94, + "learning_rate": 1.027385449814243e-05, + "loss": 0.0323, + "step": 63075 + }, + { + "epoch": 2.94, + "learning_rate": 1.0273070713087642e-05, + "loss": 0.0294, + "step": 63080 + }, + { + "epoch": 2.94, + "learning_rate": 1.0272286928032858e-05, + "loss": 0.0357, + "step": 63085 + }, + { + "epoch": 2.94, + "learning_rate": 1.027150314297807e-05, + "loss": 0.0781, + "step": 63090 + }, + { + "epoch": 2.94, + "learning_rate": 1.0270719357923284e-05, + "loss": 0.1175, + "step": 63095 + }, + { + "epoch": 2.94, + "learning_rate": 1.0269935572868496e-05, + "loss": 0.1468, + "step": 63100 + }, + { + "epoch": 2.94, + "learning_rate": 1.0269151787813712e-05, + "loss": 0.1011, + "step": 63105 + }, + { + "epoch": 2.94, + "learning_rate": 1.0268368002758924e-05, + "loss": 0.2504, + "step": 63110 + }, + { + "epoch": 2.95, + "learning_rate": 1.0267584217704138e-05, + "loss": 0.3333, + "step": 63115 + }, + { + "epoch": 2.95, + "learning_rate": 1.026680043264935e-05, + "loss": 0.03, + "step": 63120 + }, + { + "epoch": 2.95, + "learning_rate": 1.0266016647594566e-05, + "loss": 0.0314, + "step": 63125 + }, + { + "epoch": 2.95, + "learning_rate": 1.0265232862539778e-05, + "loss": 0.0549, + "step": 63130 + }, + { + "epoch": 2.95, + "learning_rate": 1.026444907748499e-05, + "loss": 0.0711, + "step": 63135 + }, + { + "epoch": 2.95, + "learning_rate": 1.0263665292430204e-05, + "loss": 0.0903, + "step": 63140 + }, + { + "epoch": 2.95, + "learning_rate": 1.0262881507375418e-05, + "loss": 0.1095, + "step": 63145 + }, + { + "epoch": 2.95, + "learning_rate": 1.0262097722320632e-05, + "loss": 0.1305, + "step": 63150 + }, + { + "epoch": 2.95, + "learning_rate": 1.0261313937265844e-05, + "loss": 0.1326, + "step": 63155 + }, + { + "epoch": 2.95, + "learning_rate": 1.026053015221106e-05, + "loss": 0.2379, + "step": 63160 + }, + { + "epoch": 2.95, + "learning_rate": 1.0259746367156272e-05, + "loss": 0.3484, + "step": 63165 + }, + { + "epoch": 2.95, + "learning_rate": 1.0258962582101486e-05, + "loss": 0.0702, + "step": 63170 + }, + { + "epoch": 2.95, + "learning_rate": 1.0258178797046698e-05, + "loss": 0.0214, + "step": 63175 + }, + { + "epoch": 2.95, + "learning_rate": 1.0257395011991914e-05, + "loss": 0.0748, + "step": 63180 + }, + { + "epoch": 2.95, + "learning_rate": 1.0256611226937126e-05, + "loss": 0.046, + "step": 63185 + }, + { + "epoch": 2.95, + "learning_rate": 1.025582744188234e-05, + "loss": 0.0891, + "step": 63190 + }, + { + "epoch": 2.95, + "learning_rate": 1.0255043656827552e-05, + "loss": 0.0839, + "step": 63195 + }, + { + "epoch": 2.95, + "learning_rate": 1.0254259871772764e-05, + "loss": 0.1694, + "step": 63200 + }, + { + "epoch": 2.95, + "learning_rate": 1.025347608671798e-05, + "loss": 0.1068, + "step": 63205 + }, + { + "epoch": 2.95, + "learning_rate": 1.0252692301663192e-05, + "loss": 0.3257, + "step": 63210 + }, + { + "epoch": 2.95, + "learning_rate": 1.0251908516608406e-05, + "loss": 0.21, + "step": 63215 + }, + { + "epoch": 2.95, + "learning_rate": 1.0251124731553618e-05, + "loss": 0.0523, + "step": 63220 + }, + { + "epoch": 2.95, + "learning_rate": 1.0250340946498834e-05, + "loss": 0.0437, + "step": 63225 + }, + { + "epoch": 2.95, + "learning_rate": 1.0249557161444046e-05, + "loss": 0.0957, + "step": 63230 + }, + { + "epoch": 2.95, + "learning_rate": 1.024877337638926e-05, + "loss": 0.0554, + "step": 63235 + }, + { + "epoch": 2.95, + "learning_rate": 1.0247989591334472e-05, + "loss": 0.0969, + "step": 63240 + }, + { + "epoch": 2.95, + "learning_rate": 1.0247205806279688e-05, + "loss": 0.1589, + "step": 63245 + }, + { + "epoch": 2.95, + "learning_rate": 1.02464220212249e-05, + "loss": 0.1167, + "step": 63250 + }, + { + "epoch": 2.95, + "learning_rate": 1.0245638236170114e-05, + "loss": 0.1879, + "step": 63255 + }, + { + "epoch": 2.95, + "learning_rate": 1.0244854451115328e-05, + "loss": 0.1869, + "step": 63260 + }, + { + "epoch": 2.95, + "learning_rate": 1.024407066606054e-05, + "loss": 0.1766, + "step": 63265 + }, + { + "epoch": 2.95, + "learning_rate": 1.0243286881005754e-05, + "loss": 0.0105, + "step": 63270 + }, + { + "epoch": 2.95, + "learning_rate": 1.0242503095950966e-05, + "loss": 0.0834, + "step": 63275 + }, + { + "epoch": 2.95, + "learning_rate": 1.0241719310896182e-05, + "loss": 0.0791, + "step": 63280 + }, + { + "epoch": 2.95, + "learning_rate": 1.0240935525841394e-05, + "loss": 0.0993, + "step": 63285 + }, + { + "epoch": 2.95, + "learning_rate": 1.0240151740786608e-05, + "loss": 0.0776, + "step": 63290 + }, + { + "epoch": 2.95, + "learning_rate": 1.023936795573182e-05, + "loss": 0.1271, + "step": 63295 + }, + { + "epoch": 2.95, + "learning_rate": 1.0238584170677036e-05, + "loss": 0.0777, + "step": 63300 + }, + { + "epoch": 2.95, + "learning_rate": 1.0237800385622248e-05, + "loss": 0.2114, + "step": 63305 + }, + { + "epoch": 2.95, + "learning_rate": 1.0237016600567462e-05, + "loss": 0.2023, + "step": 63310 + }, + { + "epoch": 2.95, + "learning_rate": 1.0236232815512674e-05, + "loss": 0.2993, + "step": 63315 + }, + { + "epoch": 2.95, + "learning_rate": 1.023544903045789e-05, + "loss": 0.0464, + "step": 63320 + }, + { + "epoch": 2.95, + "learning_rate": 1.0234665245403102e-05, + "loss": 0.0378, + "step": 63325 + }, + { + "epoch": 2.96, + "learning_rate": 1.0233881460348314e-05, + "loss": 0.0215, + "step": 63330 + }, + { + "epoch": 2.96, + "learning_rate": 1.0233097675293528e-05, + "loss": 0.0999, + "step": 63335 + }, + { + "epoch": 2.96, + "learning_rate": 1.023231389023874e-05, + "loss": 0.0813, + "step": 63340 + }, + { + "epoch": 2.96, + "learning_rate": 1.0231530105183956e-05, + "loss": 0.0367, + "step": 63345 + }, + { + "epoch": 2.96, + "learning_rate": 1.0230746320129168e-05, + "loss": 0.1402, + "step": 63350 + }, + { + "epoch": 2.96, + "learning_rate": 1.0229962535074382e-05, + "loss": 0.2003, + "step": 63355 + }, + { + "epoch": 2.96, + "learning_rate": 1.0229178750019596e-05, + "loss": 0.1149, + "step": 63360 + }, + { + "epoch": 2.96, + "learning_rate": 1.022839496496481e-05, + "loss": 0.3584, + "step": 63365 + }, + { + "epoch": 2.96, + "learning_rate": 1.0227611179910022e-05, + "loss": 0.1109, + "step": 63370 + }, + { + "epoch": 2.96, + "learning_rate": 1.0226827394855236e-05, + "loss": 0.0422, + "step": 63375 + }, + { + "epoch": 2.96, + "learning_rate": 1.022604360980045e-05, + "loss": 0.0409, + "step": 63380 + }, + { + "epoch": 2.96, + "learning_rate": 1.0225259824745664e-05, + "loss": 0.0294, + "step": 63385 + }, + { + "epoch": 2.96, + "learning_rate": 1.0224476039690876e-05, + "loss": 0.0393, + "step": 63390 + }, + { + "epoch": 2.96, + "learning_rate": 1.0223692254636088e-05, + "loss": 0.0275, + "step": 63395 + }, + { + "epoch": 2.96, + "learning_rate": 1.0222908469581304e-05, + "loss": 0.1272, + "step": 63400 + }, + { + "epoch": 2.96, + "learning_rate": 1.0222124684526516e-05, + "loss": 0.1611, + "step": 63405 + }, + { + "epoch": 2.96, + "learning_rate": 1.022134089947173e-05, + "loss": 0.3513, + "step": 63410 + }, + { + "epoch": 2.96, + "learning_rate": 1.0220557114416942e-05, + "loss": 0.2599, + "step": 63415 + }, + { + "epoch": 2.96, + "learning_rate": 1.0219773329362158e-05, + "loss": 0.0898, + "step": 63420 + }, + { + "epoch": 2.96, + "learning_rate": 1.021898954430737e-05, + "loss": 0.0381, + "step": 63425 + }, + { + "epoch": 2.96, + "learning_rate": 1.0218205759252584e-05, + "loss": 0.0469, + "step": 63430 + }, + { + "epoch": 2.96, + "learning_rate": 1.0217421974197796e-05, + "loss": 0.0635, + "step": 63435 + }, + { + "epoch": 2.96, + "learning_rate": 1.0216638189143011e-05, + "loss": 0.0756, + "step": 63440 + }, + { + "epoch": 2.96, + "learning_rate": 1.0215854404088224e-05, + "loss": 0.0884, + "step": 63445 + }, + { + "epoch": 2.96, + "learning_rate": 1.0215070619033438e-05, + "loss": 0.1282, + "step": 63450 + }, + { + "epoch": 2.96, + "learning_rate": 1.021428683397865e-05, + "loss": 0.1419, + "step": 63455 + }, + { + "epoch": 2.96, + "learning_rate": 1.0213503048923864e-05, + "loss": 0.2619, + "step": 63460 + }, + { + "epoch": 2.96, + "learning_rate": 1.0212719263869078e-05, + "loss": 0.2593, + "step": 63465 + }, + { + "epoch": 2.96, + "learning_rate": 1.021193547881429e-05, + "loss": 0.0369, + "step": 63470 + }, + { + "epoch": 2.96, + "learning_rate": 1.0211151693759505e-05, + "loss": 0.0356, + "step": 63475 + }, + { + "epoch": 2.96, + "learning_rate": 1.0210367908704718e-05, + "loss": 0.0794, + "step": 63480 + }, + { + "epoch": 2.96, + "learning_rate": 1.0209584123649932e-05, + "loss": 0.0407, + "step": 63485 + }, + { + "epoch": 2.96, + "learning_rate": 1.0208800338595144e-05, + "loss": 0.0817, + "step": 63490 + }, + { + "epoch": 2.96, + "learning_rate": 1.020801655354036e-05, + "loss": 0.0535, + "step": 63495 + }, + { + "epoch": 2.96, + "learning_rate": 1.0207232768485572e-05, + "loss": 0.1159, + "step": 63500 + }, + { + "epoch": 2.96, + "learning_rate": 1.0206448983430785e-05, + "loss": 0.2946, + "step": 63505 + }, + { + "epoch": 2.96, + "learning_rate": 1.0205665198375998e-05, + "loss": 0.2417, + "step": 63510 + }, + { + "epoch": 2.96, + "learning_rate": 1.0204881413321213e-05, + "loss": 0.226, + "step": 63515 + }, + { + "epoch": 2.96, + "learning_rate": 1.0204097628266425e-05, + "loss": 0.0084, + "step": 63520 + }, + { + "epoch": 2.96, + "learning_rate": 1.0203313843211638e-05, + "loss": 0.041, + "step": 63525 + }, + { + "epoch": 2.96, + "learning_rate": 1.0202530058156852e-05, + "loss": 0.0369, + "step": 63530 + }, + { + "epoch": 2.96, + "learning_rate": 1.0201746273102064e-05, + "loss": 0.0958, + "step": 63535 + }, + { + "epoch": 2.96, + "learning_rate": 1.020096248804728e-05, + "loss": 0.1259, + "step": 63540 + }, + { + "epoch": 2.97, + "learning_rate": 1.0200178702992492e-05, + "loss": 0.1039, + "step": 63545 + }, + { + "epoch": 2.97, + "learning_rate": 1.0199394917937706e-05, + "loss": 0.1852, + "step": 63550 + }, + { + "epoch": 2.97, + "learning_rate": 1.0198611132882918e-05, + "loss": 0.1922, + "step": 63555 + }, + { + "epoch": 2.97, + "learning_rate": 1.0197827347828133e-05, + "loss": 0.2911, + "step": 63560 + }, + { + "epoch": 2.97, + "learning_rate": 1.0197043562773346e-05, + "loss": 0.2036, + "step": 63565 + }, + { + "epoch": 2.97, + "learning_rate": 1.019625977771856e-05, + "loss": 0.0676, + "step": 63570 + }, + { + "epoch": 2.97, + "learning_rate": 1.0195475992663773e-05, + "loss": 0.0323, + "step": 63575 + }, + { + "epoch": 2.97, + "learning_rate": 1.0194692207608987e-05, + "loss": 0.0438, + "step": 63580 + }, + { + "epoch": 2.97, + "learning_rate": 1.01939084225542e-05, + "loss": 0.0627, + "step": 63585 + }, + { + "epoch": 2.97, + "learning_rate": 1.0193124637499412e-05, + "loss": 0.1465, + "step": 63590 + }, + { + "epoch": 2.97, + "learning_rate": 1.0192340852444627e-05, + "loss": 0.0959, + "step": 63595 + }, + { + "epoch": 2.97, + "learning_rate": 1.019155706738984e-05, + "loss": 0.0919, + "step": 63600 + }, + { + "epoch": 2.97, + "learning_rate": 1.0190773282335053e-05, + "loss": 0.194, + "step": 63605 + }, + { + "epoch": 2.97, + "learning_rate": 1.0189989497280266e-05, + "loss": 0.1797, + "step": 63610 + }, + { + "epoch": 2.97, + "learning_rate": 1.0189205712225481e-05, + "loss": 0.2537, + "step": 63615 + }, + { + "epoch": 2.97, + "learning_rate": 1.0188421927170693e-05, + "loss": 0.0516, + "step": 63620 + }, + { + "epoch": 2.97, + "learning_rate": 1.0187638142115907e-05, + "loss": 0.0706, + "step": 63625 + }, + { + "epoch": 2.97, + "learning_rate": 1.018685435706112e-05, + "loss": 0.0674, + "step": 63630 + }, + { + "epoch": 2.97, + "learning_rate": 1.0186070572006335e-05, + "loss": 0.0511, + "step": 63635 + }, + { + "epoch": 2.97, + "learning_rate": 1.0185286786951547e-05, + "loss": 0.0479, + "step": 63640 + }, + { + "epoch": 2.97, + "learning_rate": 1.0184503001896761e-05, + "loss": 0.1024, + "step": 63645 + }, + { + "epoch": 2.97, + "learning_rate": 1.0183719216841973e-05, + "loss": 0.0979, + "step": 63650 + }, + { + "epoch": 2.97, + "learning_rate": 1.0182935431787186e-05, + "loss": 0.1912, + "step": 63655 + }, + { + "epoch": 2.97, + "learning_rate": 1.0182151646732401e-05, + "loss": 0.23, + "step": 63660 + }, + { + "epoch": 2.97, + "learning_rate": 1.0181367861677613e-05, + "loss": 0.3174, + "step": 63665 + }, + { + "epoch": 2.97, + "learning_rate": 1.0180584076622827e-05, + "loss": 0.0348, + "step": 63670 + }, + { + "epoch": 2.97, + "learning_rate": 1.0179800291568041e-05, + "loss": 0.0672, + "step": 63675 + }, + { + "epoch": 2.97, + "learning_rate": 1.0179016506513255e-05, + "loss": 0.0229, + "step": 63680 + }, + { + "epoch": 2.97, + "learning_rate": 1.0178232721458467e-05, + "loss": 0.0771, + "step": 63685 + }, + { + "epoch": 2.97, + "learning_rate": 1.0177448936403681e-05, + "loss": 0.114, + "step": 63690 + }, + { + "epoch": 2.97, + "learning_rate": 1.0176665151348895e-05, + "loss": 0.0955, + "step": 63695 + }, + { + "epoch": 2.97, + "learning_rate": 1.0175881366294109e-05, + "loss": 0.0727, + "step": 63700 + }, + { + "epoch": 2.97, + "learning_rate": 1.0175097581239321e-05, + "loss": 0.0753, + "step": 63705 + }, + { + "epoch": 2.97, + "learning_rate": 1.0174313796184537e-05, + "loss": 0.1925, + "step": 63710 + }, + { + "epoch": 2.97, + "learning_rate": 1.0173530011129749e-05, + "loss": 0.2355, + "step": 63715 + }, + { + "epoch": 2.97, + "learning_rate": 1.0172746226074961e-05, + "loss": 0.0492, + "step": 63720 + }, + { + "epoch": 2.97, + "learning_rate": 1.0171962441020175e-05, + "loss": 0.033, + "step": 63725 + }, + { + "epoch": 2.97, + "learning_rate": 1.0171178655965387e-05, + "loss": 0.0612, + "step": 63730 + }, + { + "epoch": 2.97, + "learning_rate": 1.0170394870910603e-05, + "loss": 0.0285, + "step": 63735 + }, + { + "epoch": 2.97, + "learning_rate": 1.0169611085855815e-05, + "loss": 0.0306, + "step": 63740 + }, + { + "epoch": 2.97, + "learning_rate": 1.016882730080103e-05, + "loss": 0.0759, + "step": 63745 + }, + { + "epoch": 2.97, + "learning_rate": 1.0168043515746241e-05, + "loss": 0.1336, + "step": 63750 + }, + { + "epoch": 2.97, + "learning_rate": 1.0167259730691457e-05, + "loss": 0.2051, + "step": 63755 + }, + { + "epoch": 2.98, + "learning_rate": 1.016647594563667e-05, + "loss": 0.1794, + "step": 63760 + }, + { + "epoch": 2.98, + "learning_rate": 1.0165692160581883e-05, + "loss": 0.2222, + "step": 63765 + }, + { + "epoch": 2.98, + "learning_rate": 1.0164908375527095e-05, + "loss": 0.0448, + "step": 63770 + }, + { + "epoch": 2.98, + "learning_rate": 1.0164124590472311e-05, + "loss": 0.1489, + "step": 63775 + }, + { + "epoch": 2.98, + "learning_rate": 1.0163340805417523e-05, + "loss": 0.0714, + "step": 63780 + }, + { + "epoch": 2.98, + "learning_rate": 1.0162557020362735e-05, + "loss": 0.1082, + "step": 63785 + }, + { + "epoch": 2.98, + "learning_rate": 1.0161773235307951e-05, + "loss": 0.0898, + "step": 63790 + }, + { + "epoch": 2.98, + "learning_rate": 1.0160989450253163e-05, + "loss": 0.0668, + "step": 63795 + }, + { + "epoch": 2.98, + "learning_rate": 1.0160205665198377e-05, + "loss": 0.0864, + "step": 63800 + }, + { + "epoch": 2.98, + "learning_rate": 1.015942188014359e-05, + "loss": 0.1504, + "step": 63805 + }, + { + "epoch": 2.98, + "learning_rate": 1.0158638095088805e-05, + "loss": 0.1931, + "step": 63810 + }, + { + "epoch": 2.98, + "learning_rate": 1.0157854310034017e-05, + "loss": 0.1701, + "step": 63815 + }, + { + "epoch": 2.98, + "learning_rate": 1.0157070524979231e-05, + "loss": 0.0198, + "step": 63820 + }, + { + "epoch": 2.98, + "learning_rate": 1.0156286739924443e-05, + "loss": 0.064, + "step": 63825 + }, + { + "epoch": 2.98, + "learning_rate": 1.0155502954869659e-05, + "loss": 0.0866, + "step": 63830 + }, + { + "epoch": 2.98, + "learning_rate": 1.0154719169814871e-05, + "loss": 0.0517, + "step": 63835 + }, + { + "epoch": 2.98, + "learning_rate": 1.0153935384760085e-05, + "loss": 0.0199, + "step": 63840 + }, + { + "epoch": 2.98, + "learning_rate": 1.0153151599705297e-05, + "loss": 0.1171, + "step": 63845 + }, + { + "epoch": 2.98, + "learning_rate": 1.015236781465051e-05, + "loss": 0.1281, + "step": 63850 + }, + { + "epoch": 2.98, + "learning_rate": 1.0151584029595725e-05, + "loss": 0.1711, + "step": 63855 + }, + { + "epoch": 2.98, + "learning_rate": 1.0150800244540937e-05, + "loss": 0.1402, + "step": 63860 + }, + { + "epoch": 2.98, + "learning_rate": 1.0150016459486151e-05, + "loss": 0.2753, + "step": 63865 + }, + { + "epoch": 2.98, + "learning_rate": 1.0149232674431363e-05, + "loss": 0.0264, + "step": 63870 + }, + { + "epoch": 2.98, + "learning_rate": 1.0148448889376579e-05, + "loss": 0.0742, + "step": 63875 + }, + { + "epoch": 2.98, + "learning_rate": 1.0147665104321791e-05, + "loss": 0.0883, + "step": 63880 + }, + { + "epoch": 2.98, + "learning_rate": 1.0146881319267005e-05, + "loss": 0.059, + "step": 63885 + }, + { + "epoch": 2.98, + "learning_rate": 1.0146097534212219e-05, + "loss": 0.0566, + "step": 63890 + }, + { + "epoch": 2.98, + "learning_rate": 1.0145313749157433e-05, + "loss": 0.2492, + "step": 63895 + }, + { + "epoch": 2.98, + "learning_rate": 1.0144529964102645e-05, + "loss": 0.1694, + "step": 63900 + }, + { + "epoch": 2.98, + "learning_rate": 1.0143746179047859e-05, + "loss": 0.1478, + "step": 63905 + }, + { + "epoch": 2.98, + "learning_rate": 1.0142962393993073e-05, + "loss": 0.2933, + "step": 63910 + }, + { + "epoch": 2.98, + "learning_rate": 1.0142178608938285e-05, + "loss": 0.3151, + "step": 63915 + }, + { + "epoch": 2.98, + "learning_rate": 1.0141394823883499e-05, + "loss": 0.0621, + "step": 63920 + }, + { + "epoch": 2.98, + "learning_rate": 1.0140611038828711e-05, + "loss": 0.0457, + "step": 63925 + }, + { + "epoch": 2.98, + "learning_rate": 1.0139827253773927e-05, + "loss": 0.0642, + "step": 63930 + }, + { + "epoch": 2.98, + "learning_rate": 1.0139043468719139e-05, + "loss": 0.091, + "step": 63935 + }, + { + "epoch": 2.98, + "learning_rate": 1.0138259683664353e-05, + "loss": 0.078, + "step": 63940 + }, + { + "epoch": 2.98, + "learning_rate": 1.0137475898609565e-05, + "loss": 0.0961, + "step": 63945 + }, + { + "epoch": 2.98, + "learning_rate": 1.013669211355478e-05, + "loss": 0.212, + "step": 63950 + }, + { + "epoch": 2.98, + "learning_rate": 1.0135908328499993e-05, + "loss": 0.1122, + "step": 63955 + }, + { + "epoch": 2.98, + "learning_rate": 1.0135124543445207e-05, + "loss": 0.1807, + "step": 63960 + }, + { + "epoch": 2.98, + "learning_rate": 1.0134340758390419e-05, + "loss": 0.216, + "step": 63965 + }, + { + "epoch": 2.98, + "learning_rate": 1.0133556973335635e-05, + "loss": 0.0358, + "step": 63970 + }, + { + "epoch": 2.99, + "learning_rate": 1.0132773188280847e-05, + "loss": 0.291, + "step": 63975 + }, + { + "epoch": 2.99, + "learning_rate": 1.0131989403226059e-05, + "loss": 0.0242, + "step": 63980 + }, + { + "epoch": 2.99, + "learning_rate": 1.0131205618171273e-05, + "loss": 0.1145, + "step": 63985 + }, + { + "epoch": 2.99, + "learning_rate": 1.0130421833116487e-05, + "loss": 0.0473, + "step": 63990 + }, + { + "epoch": 2.99, + "learning_rate": 1.01296380480617e-05, + "loss": 0.1068, + "step": 63995 + }, + { + "epoch": 2.99, + "learning_rate": 1.0128854263006913e-05, + "loss": 0.0515, + "step": 64000 + }, + { + "epoch": 2.99, + "learning_rate": 1.0128070477952127e-05, + "loss": 0.1869, + "step": 64005 + }, + { + "epoch": 2.99, + "learning_rate": 1.012728669289734e-05, + "loss": 0.2932, + "step": 64010 + }, + { + "epoch": 2.99, + "learning_rate": 1.0126502907842555e-05, + "loss": 0.3333, + "step": 64015 + }, + { + "epoch": 2.99, + "learning_rate": 1.0125719122787767e-05, + "loss": 0.0225, + "step": 64020 + }, + { + "epoch": 2.99, + "learning_rate": 1.0124935337732983e-05, + "loss": 0.0358, + "step": 64025 + }, + { + "epoch": 2.99, + "learning_rate": 1.0124151552678195e-05, + "loss": 0.0832, + "step": 64030 + }, + { + "epoch": 2.99, + "learning_rate": 1.0123367767623409e-05, + "loss": 0.0553, + "step": 64035 + }, + { + "epoch": 2.99, + "learning_rate": 1.012258398256862e-05, + "loss": 0.0834, + "step": 64040 + }, + { + "epoch": 2.99, + "learning_rate": 1.0121800197513833e-05, + "loss": 0.1044, + "step": 64045 + }, + { + "epoch": 2.99, + "learning_rate": 1.0121016412459049e-05, + "loss": 0.1887, + "step": 64050 + }, + { + "epoch": 2.99, + "learning_rate": 1.0120232627404261e-05, + "loss": 0.2406, + "step": 64055 + }, + { + "epoch": 2.99, + "learning_rate": 1.0119448842349475e-05, + "loss": 0.1824, + "step": 64060 + }, + { + "epoch": 2.99, + "learning_rate": 1.0118665057294687e-05, + "loss": 0.3448, + "step": 64065 + }, + { + "epoch": 2.99, + "learning_rate": 1.0117881272239903e-05, + "loss": 0.0214, + "step": 64070 + }, + { + "epoch": 2.99, + "learning_rate": 1.0117097487185115e-05, + "loss": 0.0121, + "step": 64075 + }, + { + "epoch": 2.99, + "learning_rate": 1.0116313702130329e-05, + "loss": 0.0553, + "step": 64080 + }, + { + "epoch": 2.99, + "learning_rate": 1.0115529917075541e-05, + "loss": 0.0827, + "step": 64085 + }, + { + "epoch": 2.99, + "learning_rate": 1.0114746132020757e-05, + "loss": 0.0886, + "step": 64090 + }, + { + "epoch": 2.99, + "learning_rate": 1.0113962346965969e-05, + "loss": 0.0533, + "step": 64095 + }, + { + "epoch": 2.99, + "learning_rate": 1.0113178561911183e-05, + "loss": 0.143, + "step": 64100 + }, + { + "epoch": 2.99, + "learning_rate": 1.0112394776856397e-05, + "loss": 0.1127, + "step": 64105 + }, + { + "epoch": 2.99, + "learning_rate": 1.0111610991801609e-05, + "loss": 0.2126, + "step": 64110 + }, + { + "epoch": 2.99, + "learning_rate": 1.0110827206746823e-05, + "loss": 0.2579, + "step": 64115 + }, + { + "epoch": 2.99, + "learning_rate": 1.0110043421692035e-05, + "loss": 0.0479, + "step": 64120 + }, + { + "epoch": 2.99, + "learning_rate": 1.010925963663725e-05, + "loss": 0.0203, + "step": 64125 + }, + { + "epoch": 2.99, + "learning_rate": 1.0108475851582463e-05, + "loss": 0.031, + "step": 64130 + }, + { + "epoch": 2.99, + "learning_rate": 1.0107692066527677e-05, + "loss": 0.1182, + "step": 64135 + }, + { + "epoch": 2.99, + "learning_rate": 1.0106908281472889e-05, + "loss": 0.059, + "step": 64140 + }, + { + "epoch": 2.99, + "learning_rate": 1.0106124496418104e-05, + "loss": 0.1079, + "step": 64145 + }, + { + "epoch": 2.99, + "learning_rate": 1.0105340711363317e-05, + "loss": 0.0826, + "step": 64150 + }, + { + "epoch": 2.99, + "learning_rate": 1.010455692630853e-05, + "loss": 0.1238, + "step": 64155 + }, + { + "epoch": 2.99, + "learning_rate": 1.0103773141253743e-05, + "loss": 0.1699, + "step": 64160 + }, + { + "epoch": 2.99, + "learning_rate": 1.0102989356198958e-05, + "loss": 0.224, + "step": 64165 + }, + { + "epoch": 2.99, + "learning_rate": 1.010220557114417e-05, + "loss": 0.0326, + "step": 64170 + }, + { + "epoch": 2.99, + "learning_rate": 1.0101421786089383e-05, + "loss": 0.061, + "step": 64175 + }, + { + "epoch": 2.99, + "learning_rate": 1.0100638001034597e-05, + "loss": 0.0567, + "step": 64180 + }, + { + "epoch": 2.99, + "learning_rate": 1.0099854215979809e-05, + "loss": 0.0547, + "step": 64185 + }, + { + "epoch": 3.0, + "learning_rate": 1.0099070430925024e-05, + "loss": 0.0667, + "step": 64190 + }, + { + "epoch": 3.0, + "learning_rate": 1.0098286645870237e-05, + "loss": 0.1063, + "step": 64195 + }, + { + "epoch": 3.0, + "learning_rate": 1.009750286081545e-05, + "loss": 0.1263, + "step": 64200 + }, + { + "epoch": 3.0, + "learning_rate": 1.0096719075760664e-05, + "loss": 0.1766, + "step": 64205 + }, + { + "epoch": 3.0, + "learning_rate": 1.0095935290705878e-05, + "loss": 0.2074, + "step": 64210 + }, + { + "epoch": 3.0, + "learning_rate": 1.009515150565109e-05, + "loss": 0.2975, + "step": 64215 + }, + { + "epoch": 3.0, + "learning_rate": 1.0094367720596305e-05, + "loss": 0.0328, + "step": 64220 + }, + { + "epoch": 3.0, + "learning_rate": 1.0093583935541518e-05, + "loss": 0.0301, + "step": 64225 + }, + { + "epoch": 3.0, + "learning_rate": 1.0092800150486732e-05, + "loss": 0.051, + "step": 64230 + }, + { + "epoch": 3.0, + "learning_rate": 1.0092016365431945e-05, + "loss": 0.0794, + "step": 64235 + }, + { + "epoch": 3.0, + "learning_rate": 1.0091232580377157e-05, + "loss": 0.2081, + "step": 64240 + }, + { + "epoch": 3.0, + "learning_rate": 1.0090448795322372e-05, + "loss": 0.0887, + "step": 64245 + }, + { + "epoch": 3.0, + "learning_rate": 1.0089665010267585e-05, + "loss": 0.0939, + "step": 64250 + }, + { + "epoch": 3.0, + "learning_rate": 1.0088881225212798e-05, + "loss": 0.152, + "step": 64255 + }, + { + "epoch": 3.0, + "learning_rate": 1.008809744015801e-05, + "loss": 0.1754, + "step": 64260 + }, + { + "epoch": 3.0, + "learning_rate": 1.0087313655103226e-05, + "loss": 0.2495, + "step": 64265 + }, + { + "epoch": 3.0, + "learning_rate": 1.0086529870048438e-05, + "loss": 0.0218, + "step": 64270 + }, + { + "epoch": 3.0, + "learning_rate": 1.0085746084993652e-05, + "loss": 0.0405, + "step": 64275 + }, + { + "epoch": 3.0, + "learning_rate": 1.0084962299938865e-05, + "loss": 0.0995, + "step": 64280 + }, + { + "epoch": 3.0, + "learning_rate": 1.008417851488408e-05, + "loss": 0.1217, + "step": 64285 + }, + { + "epoch": 3.0, + "learning_rate": 1.0083394729829292e-05, + "loss": 0.1712, + "step": 64290 + }, + { + "epoch": 3.0, + "eval_cer": 0.013233947127380689, + "eval_loss": 0.03575053811073303, + "eval_runtime": 477.3319, + "eval_samples_per_second": 39.909, + "eval_steps_per_second": 4.99, + "eval_wer": 0.11354552183567727, + "step": 64293 + }, + { + "epoch": 3.0, + "learning_rate": 1.0082610944774506e-05, + "loss": 0.1887, + "step": 64295 + }, + { + "epoch": 3.0, + "learning_rate": 1.0081827159719719e-05, + "loss": 0.0727, + "step": 64300 + }, + { + "epoch": 3.0, + "learning_rate": 1.0081043374664932e-05, + "loss": 0.0216, + "step": 64305 + }, + { + "epoch": 3.0, + "learning_rate": 1.0080259589610146e-05, + "loss": 0.0405, + "step": 64310 + }, + { + "epoch": 3.0, + "learning_rate": 1.0079475804555359e-05, + "loss": 0.083, + "step": 64315 + }, + { + "epoch": 3.0, + "learning_rate": 1.0078692019500574e-05, + "loss": 0.0631, + "step": 64320 + }, + { + "epoch": 3.0, + "learning_rate": 1.0077908234445786e-05, + "loss": 0.0888, + "step": 64325 + }, + { + "epoch": 3.0, + "learning_rate": 1.0077124449391e-05, + "loss": 0.2566, + "step": 64330 + }, + { + "epoch": 3.0, + "learning_rate": 1.0076340664336212e-05, + "loss": 0.1636, + "step": 64335 + }, + { + "epoch": 3.0, + "learning_rate": 1.0075556879281428e-05, + "loss": 0.1579, + "step": 64340 + }, + { + "epoch": 3.0, + "learning_rate": 1.007477309422664e-05, + "loss": 0.2837, + "step": 64345 + }, + { + "epoch": 3.0, + "learning_rate": 1.0073989309171854e-05, + "loss": 0.1294, + "step": 64350 + }, + { + "epoch": 3.0, + "learning_rate": 1.0073205524117066e-05, + "loss": 0.038, + "step": 64355 + }, + { + "epoch": 3.0, + "learning_rate": 1.0072421739062282e-05, + "loss": 0.0945, + "step": 64360 + }, + { + "epoch": 3.0, + "learning_rate": 1.0071637954007494e-05, + "loss": 0.0394, + "step": 64365 + }, + { + "epoch": 3.0, + "learning_rate": 1.0070854168952706e-05, + "loss": 0.0792, + "step": 64370 + }, + { + "epoch": 3.0, + "learning_rate": 1.007007038389792e-05, + "loss": 0.114, + "step": 64375 + }, + { + "epoch": 3.0, + "learning_rate": 1.0069286598843133e-05, + "loss": 0.0888, + "step": 64380 + }, + { + "epoch": 3.0, + "learning_rate": 1.0068502813788348e-05, + "loss": 0.1336, + "step": 64385 + }, + { + "epoch": 3.0, + "learning_rate": 1.006771902873356e-05, + "loss": 0.2821, + "step": 64390 + }, + { + "epoch": 3.0, + "learning_rate": 1.0066935243678774e-05, + "loss": 0.2377, + "step": 64395 + }, + { + "epoch": 3.0, + "learning_rate": 1.0066151458623986e-05, + "loss": 0.1294, + "step": 64400 + }, + { + "epoch": 3.01, + "learning_rate": 1.0065367673569202e-05, + "loss": 0.0552, + "step": 64405 + }, + { + "epoch": 3.01, + "learning_rate": 1.0064583888514414e-05, + "loss": 0.0234, + "step": 64410 + }, + { + "epoch": 3.01, + "learning_rate": 1.0063800103459628e-05, + "loss": 0.0932, + "step": 64415 + }, + { + "epoch": 3.01, + "learning_rate": 1.0063016318404842e-05, + "loss": 0.1105, + "step": 64420 + }, + { + "epoch": 3.01, + "learning_rate": 1.0062232533350056e-05, + "loss": 0.0516, + "step": 64425 + }, + { + "epoch": 3.01, + "learning_rate": 1.0061448748295268e-05, + "loss": 0.2342, + "step": 64430 + }, + { + "epoch": 3.01, + "learning_rate": 1.006066496324048e-05, + "loss": 0.1758, + "step": 64435 + }, + { + "epoch": 3.01, + "learning_rate": 1.0059881178185696e-05, + "loss": 0.1461, + "step": 64440 + }, + { + "epoch": 3.01, + "learning_rate": 1.0059097393130908e-05, + "loss": 0.3334, + "step": 64445 + }, + { + "epoch": 3.01, + "learning_rate": 1.0058313608076122e-05, + "loss": 0.0562, + "step": 64450 + }, + { + "epoch": 3.01, + "learning_rate": 1.0057529823021334e-05, + "loss": 0.0572, + "step": 64455 + }, + { + "epoch": 3.01, + "learning_rate": 1.005674603796655e-05, + "loss": 0.0752, + "step": 64460 + }, + { + "epoch": 3.01, + "learning_rate": 1.0055962252911762e-05, + "loss": 0.0533, + "step": 64465 + }, + { + "epoch": 3.01, + "learning_rate": 1.0055178467856976e-05, + "loss": 0.0731, + "step": 64470 + }, + { + "epoch": 3.01, + "learning_rate": 1.0054394682802188e-05, + "loss": 0.0838, + "step": 64475 + }, + { + "epoch": 3.01, + "learning_rate": 1.0053610897747404e-05, + "loss": 0.0864, + "step": 64480 + }, + { + "epoch": 3.01, + "learning_rate": 1.0052827112692616e-05, + "loss": 0.2098, + "step": 64485 + }, + { + "epoch": 3.01, + "learning_rate": 1.005204332763783e-05, + "loss": 0.1538, + "step": 64490 + }, + { + "epoch": 3.01, + "learning_rate": 1.0051259542583042e-05, + "loss": 0.2549, + "step": 64495 + }, + { + "epoch": 3.01, + "learning_rate": 1.0050475757528254e-05, + "loss": 0.1237, + "step": 64500 + }, + { + "epoch": 3.01, + "learning_rate": 1.004969197247347e-05, + "loss": 0.0369, + "step": 64505 + }, + { + "epoch": 3.01, + "learning_rate": 1.0048908187418682e-05, + "loss": 0.0278, + "step": 64510 + }, + { + "epoch": 3.01, + "learning_rate": 1.0048124402363896e-05, + "loss": 0.074, + "step": 64515 + }, + { + "epoch": 3.01, + "learning_rate": 1.004734061730911e-05, + "loss": 0.0517, + "step": 64520 + }, + { + "epoch": 3.01, + "learning_rate": 1.0046556832254324e-05, + "loss": 0.0991, + "step": 64525 + }, + { + "epoch": 3.01, + "learning_rate": 1.0045773047199536e-05, + "loss": 0.1158, + "step": 64530 + }, + { + "epoch": 3.01, + "learning_rate": 1.004498926214475e-05, + "loss": 0.1029, + "step": 64535 + }, + { + "epoch": 3.01, + "learning_rate": 1.0044205477089964e-05, + "loss": 0.1561, + "step": 64540 + }, + { + "epoch": 3.01, + "learning_rate": 1.0043421692035178e-05, + "loss": 0.2435, + "step": 64545 + }, + { + "epoch": 3.01, + "learning_rate": 1.004263790698039e-05, + "loss": 0.0767, + "step": 64550 + }, + { + "epoch": 3.01, + "learning_rate": 1.0041854121925606e-05, + "loss": 0.0818, + "step": 64555 + }, + { + "epoch": 3.01, + "learning_rate": 1.0041070336870818e-05, + "loss": 0.0948, + "step": 64560 + }, + { + "epoch": 3.01, + "learning_rate": 1.004028655181603e-05, + "loss": 0.0388, + "step": 64565 + }, + { + "epoch": 3.01, + "learning_rate": 1.0039502766761244e-05, + "loss": 0.0989, + "step": 64570 + }, + { + "epoch": 3.01, + "learning_rate": 1.0038718981706456e-05, + "loss": 0.1033, + "step": 64575 + }, + { + "epoch": 3.01, + "learning_rate": 1.0037935196651672e-05, + "loss": 0.0571, + "step": 64580 + }, + { + "epoch": 3.01, + "learning_rate": 1.0037151411596884e-05, + "loss": 0.1396, + "step": 64585 + }, + { + "epoch": 3.01, + "learning_rate": 1.0036367626542098e-05, + "loss": 0.1573, + "step": 64590 + }, + { + "epoch": 3.01, + "learning_rate": 1.003558384148731e-05, + "loss": 0.2187, + "step": 64595 + }, + { + "epoch": 3.01, + "learning_rate": 1.0034800056432526e-05, + "loss": 0.0839, + "step": 64600 + }, + { + "epoch": 3.01, + "learning_rate": 1.0034016271377738e-05, + "loss": 0.0647, + "step": 64605 + }, + { + "epoch": 3.01, + "learning_rate": 1.0033232486322952e-05, + "loss": 0.0414, + "step": 64610 + }, + { + "epoch": 3.02, + "learning_rate": 1.0032448701268164e-05, + "loss": 0.0753, + "step": 64615 + }, + { + "epoch": 3.02, + "learning_rate": 1.003166491621338e-05, + "loss": 0.0433, + "step": 64620 + }, + { + "epoch": 3.02, + "learning_rate": 1.0030881131158592e-05, + "loss": 0.0816, + "step": 64625 + }, + { + "epoch": 3.02, + "learning_rate": 1.0030097346103804e-05, + "loss": 0.0984, + "step": 64630 + }, + { + "epoch": 3.02, + "learning_rate": 1.002931356104902e-05, + "loss": 0.1363, + "step": 64635 + }, + { + "epoch": 3.02, + "learning_rate": 1.002868653300519e-05, + "loss": 0.1344, + "step": 64640 + }, + { + "epoch": 3.02, + "learning_rate": 1.0027902747950402e-05, + "loss": 0.2421, + "step": 64645 + }, + { + "epoch": 3.02, + "learning_rate": 1.0027118962895616e-05, + "loss": 0.1472, + "step": 64650 + }, + { + "epoch": 3.02, + "learning_rate": 1.0026335177840828e-05, + "loss": 0.0602, + "step": 64655 + }, + { + "epoch": 3.02, + "learning_rate": 1.0025551392786044e-05, + "loss": 0.0546, + "step": 64660 + }, + { + "epoch": 3.02, + "learning_rate": 1.0024767607731256e-05, + "loss": 0.04, + "step": 64665 + }, + { + "epoch": 3.02, + "learning_rate": 1.002398382267647e-05, + "loss": 0.0398, + "step": 64670 + }, + { + "epoch": 3.02, + "learning_rate": 1.0023200037621682e-05, + "loss": 0.0285, + "step": 64675 + }, + { + "epoch": 3.02, + "learning_rate": 1.0022416252566898e-05, + "loss": 0.134, + "step": 64680 + }, + { + "epoch": 3.02, + "learning_rate": 1.002163246751211e-05, + "loss": 0.0968, + "step": 64685 + }, + { + "epoch": 3.02, + "learning_rate": 1.0020848682457324e-05, + "loss": 0.2418, + "step": 64690 + }, + { + "epoch": 3.02, + "learning_rate": 1.0020064897402536e-05, + "loss": 0.3335, + "step": 64695 + }, + { + "epoch": 3.02, + "learning_rate": 1.001928111234775e-05, + "loss": 0.065, + "step": 64700 + }, + { + "epoch": 3.02, + "learning_rate": 1.0018497327292964e-05, + "loss": 0.0142, + "step": 64705 + }, + { + "epoch": 3.02, + "learning_rate": 1.0017713542238176e-05, + "loss": 0.0253, + "step": 64710 + }, + { + "epoch": 3.02, + "learning_rate": 1.0016929757183392e-05, + "loss": 0.0308, + "step": 64715 + }, + { + "epoch": 3.02, + "learning_rate": 1.0016145972128604e-05, + "loss": 0.0628, + "step": 64720 + }, + { + "epoch": 3.02, + "learning_rate": 1.0015362187073818e-05, + "loss": 0.1409, + "step": 64725 + }, + { + "epoch": 3.02, + "learning_rate": 1.001457840201903e-05, + "loss": 0.0527, + "step": 64730 + }, + { + "epoch": 3.02, + "learning_rate": 1.0013794616964246e-05, + "loss": 0.1638, + "step": 64735 + }, + { + "epoch": 3.02, + "learning_rate": 1.0013010831909458e-05, + "loss": 0.1495, + "step": 64740 + }, + { + "epoch": 3.02, + "learning_rate": 1.0012227046854672e-05, + "loss": 0.3142, + "step": 64745 + }, + { + "epoch": 3.02, + "learning_rate": 1.0011443261799884e-05, + "loss": 0.0679, + "step": 64750 + }, + { + "epoch": 3.02, + "learning_rate": 1.00106594767451e-05, + "loss": 0.0159, + "step": 64755 + }, + { + "epoch": 3.02, + "learning_rate": 1.0009875691690312e-05, + "loss": 0.0669, + "step": 64760 + }, + { + "epoch": 3.02, + "learning_rate": 1.0009091906635524e-05, + "loss": 0.0254, + "step": 64765 + }, + { + "epoch": 3.02, + "learning_rate": 1.0008308121580738e-05, + "loss": 0.0483, + "step": 64770 + }, + { + "epoch": 3.02, + "learning_rate": 1.000752433652595e-05, + "loss": 0.0889, + "step": 64775 + }, + { + "epoch": 3.02, + "learning_rate": 1.0006740551471166e-05, + "loss": 0.0646, + "step": 64780 + }, + { + "epoch": 3.02, + "learning_rate": 1.0005956766416378e-05, + "loss": 0.1802, + "step": 64785 + }, + { + "epoch": 3.02, + "learning_rate": 1.0005172981361592e-05, + "loss": 0.2382, + "step": 64790 + }, + { + "epoch": 3.02, + "learning_rate": 1.0004389196306806e-05, + "loss": 0.3438, + "step": 64795 + }, + { + "epoch": 3.02, + "learning_rate": 1.000360541125202e-05, + "loss": 0.072, + "step": 64800 + }, + { + "epoch": 3.02, + "learning_rate": 1.0002821626197232e-05, + "loss": 0.0219, + "step": 64805 + }, + { + "epoch": 3.02, + "learning_rate": 1.0002037841142446e-05, + "loss": 0.0681, + "step": 64810 + }, + { + "epoch": 3.02, + "learning_rate": 1.000125405608766e-05, + "loss": 0.0652, + "step": 64815 + }, + { + "epoch": 3.02, + "learning_rate": 1.0000470271032874e-05, + "loss": 0.1291, + "step": 64820 + }, + { + "epoch": 3.02, + "learning_rate": 9.999686485978086e-06, + "loss": 0.1223, + "step": 64825 + }, + { + "epoch": 3.03, + "learning_rate": 9.9989027009233e-06, + "loss": 0.1653, + "step": 64830 + }, + { + "epoch": 3.03, + "learning_rate": 9.998118915868514e-06, + "loss": 0.1461, + "step": 64835 + }, + { + "epoch": 3.03, + "learning_rate": 9.997335130813726e-06, + "loss": 0.2772, + "step": 64840 + }, + { + "epoch": 3.03, + "learning_rate": 9.99655134575894e-06, + "loss": 0.2285, + "step": 64845 + }, + { + "epoch": 3.03, + "learning_rate": 9.995767560704154e-06, + "loss": 0.1091, + "step": 64850 + }, + { + "epoch": 3.03, + "learning_rate": 9.994983775649368e-06, + "loss": 0.0319, + "step": 64855 + }, + { + "epoch": 3.03, + "learning_rate": 9.99419999059458e-06, + "loss": 0.044, + "step": 64860 + }, + { + "epoch": 3.03, + "learning_rate": 9.993416205539794e-06, + "loss": 0.08, + "step": 64865 + }, + { + "epoch": 3.03, + "learning_rate": 9.992632420485006e-06, + "loss": 0.1067, + "step": 64870 + }, + { + "epoch": 3.03, + "learning_rate": 9.99184863543022e-06, + "loss": 0.1008, + "step": 64875 + }, + { + "epoch": 3.03, + "learning_rate": 9.991064850375434e-06, + "loss": 0.1137, + "step": 64880 + }, + { + "epoch": 3.03, + "learning_rate": 9.990281065320648e-06, + "loss": 0.0964, + "step": 64885 + }, + { + "epoch": 3.03, + "learning_rate": 9.98949728026586e-06, + "loss": 0.2726, + "step": 64890 + }, + { + "epoch": 3.03, + "learning_rate": 9.988713495211074e-06, + "loss": 0.3439, + "step": 64895 + }, + { + "epoch": 3.03, + "learning_rate": 9.987929710156288e-06, + "loss": 0.0521, + "step": 64900 + }, + { + "epoch": 3.03, + "learning_rate": 9.987145925101502e-06, + "loss": 0.0303, + "step": 64905 + }, + { + "epoch": 3.03, + "learning_rate": 9.986362140046714e-06, + "loss": 0.0604, + "step": 64910 + }, + { + "epoch": 3.03, + "learning_rate": 9.985578354991928e-06, + "loss": 0.0909, + "step": 64915 + }, + { + "epoch": 3.03, + "learning_rate": 9.984794569937142e-06, + "loss": 0.0717, + "step": 64920 + }, + { + "epoch": 3.03, + "learning_rate": 9.984010784882354e-06, + "loss": 0.0903, + "step": 64925 + }, + { + "epoch": 3.03, + "learning_rate": 9.983226999827568e-06, + "loss": 0.0838, + "step": 64930 + }, + { + "epoch": 3.03, + "learning_rate": 9.982443214772782e-06, + "loss": 0.1498, + "step": 64935 + }, + { + "epoch": 3.03, + "learning_rate": 9.981659429717994e-06, + "loss": 0.1015, + "step": 64940 + }, + { + "epoch": 3.03, + "learning_rate": 9.980875644663208e-06, + "loss": 0.2865, + "step": 64945 + }, + { + "epoch": 3.03, + "learning_rate": 9.980091859608422e-06, + "loss": 0.0189, + "step": 64950 + }, + { + "epoch": 3.03, + "learning_rate": 9.979308074553636e-06, + "loss": 0.0376, + "step": 64955 + }, + { + "epoch": 3.03, + "learning_rate": 9.978524289498848e-06, + "loss": 0.0662, + "step": 64960 + }, + { + "epoch": 3.03, + "learning_rate": 9.977740504444062e-06, + "loss": 0.0318, + "step": 64965 + }, + { + "epoch": 3.03, + "learning_rate": 9.976956719389276e-06, + "loss": 0.0951, + "step": 64970 + }, + { + "epoch": 3.03, + "learning_rate": 9.97617293433449e-06, + "loss": 0.0406, + "step": 64975 + }, + { + "epoch": 3.03, + "learning_rate": 9.975389149279704e-06, + "loss": 0.0863, + "step": 64980 + }, + { + "epoch": 3.03, + "learning_rate": 9.974605364224916e-06, + "loss": 0.1428, + "step": 64985 + }, + { + "epoch": 3.03, + "learning_rate": 9.973821579170128e-06, + "loss": 0.2493, + "step": 64990 + }, + { + "epoch": 3.03, + "learning_rate": 9.973037794115342e-06, + "loss": 0.2759, + "step": 64995 + }, + { + "epoch": 3.03, + "learning_rate": 9.972254009060556e-06, + "loss": 0.0916, + "step": 65000 + }, + { + "epoch": 3.03, + "learning_rate": 9.97147022400577e-06, + "loss": 0.029, + "step": 65005 + }, + { + "epoch": 3.03, + "learning_rate": 9.970686438950982e-06, + "loss": 0.0638, + "step": 65010 + }, + { + "epoch": 3.03, + "learning_rate": 9.969902653896196e-06, + "loss": 0.0404, + "step": 65015 + }, + { + "epoch": 3.03, + "learning_rate": 9.96911886884141e-06, + "loss": 0.0599, + "step": 65020 + }, + { + "epoch": 3.03, + "learning_rate": 9.968335083786624e-06, + "loss": 0.1324, + "step": 65025 + }, + { + "epoch": 3.03, + "learning_rate": 9.967551298731838e-06, + "loss": 0.1053, + "step": 65030 + }, + { + "epoch": 3.03, + "learning_rate": 9.96676751367705e-06, + "loss": 0.0871, + "step": 65035 + }, + { + "epoch": 3.03, + "learning_rate": 9.965983728622264e-06, + "loss": 0.3495, + "step": 65040 + }, + { + "epoch": 3.04, + "learning_rate": 9.965199943567478e-06, + "loss": 0.2196, + "step": 65045 + }, + { + "epoch": 3.04, + "learning_rate": 9.964416158512691e-06, + "loss": 0.0536, + "step": 65050 + }, + { + "epoch": 3.04, + "learning_rate": 9.963632373457904e-06, + "loss": 0.0329, + "step": 65055 + }, + { + "epoch": 3.04, + "learning_rate": 9.962848588403118e-06, + "loss": 0.0575, + "step": 65060 + }, + { + "epoch": 3.04, + "learning_rate": 9.96206480334833e-06, + "loss": 0.0783, + "step": 65065 + }, + { + "epoch": 3.04, + "learning_rate": 9.961281018293544e-06, + "loss": 0.0488, + "step": 65070 + }, + { + "epoch": 3.04, + "learning_rate": 9.960497233238758e-06, + "loss": 0.1067, + "step": 65075 + }, + { + "epoch": 3.04, + "learning_rate": 9.959713448183972e-06, + "loss": 0.1122, + "step": 65080 + }, + { + "epoch": 3.04, + "learning_rate": 9.958929663129184e-06, + "loss": 0.1402, + "step": 65085 + }, + { + "epoch": 3.04, + "learning_rate": 9.958145878074398e-06, + "loss": 0.1527, + "step": 65090 + }, + { + "epoch": 3.04, + "learning_rate": 9.957362093019612e-06, + "loss": 0.173, + "step": 65095 + }, + { + "epoch": 3.04, + "learning_rate": 9.956578307964825e-06, + "loss": 0.0723, + "step": 65100 + }, + { + "epoch": 3.04, + "learning_rate": 9.955794522910038e-06, + "loss": 0.0479, + "step": 65105 + }, + { + "epoch": 3.04, + "learning_rate": 9.955010737855252e-06, + "loss": 0.087, + "step": 65110 + }, + { + "epoch": 3.04, + "learning_rate": 9.954226952800465e-06, + "loss": 0.0572, + "step": 65115 + }, + { + "epoch": 3.04, + "learning_rate": 9.953443167745678e-06, + "loss": 0.0548, + "step": 65120 + }, + { + "epoch": 3.04, + "learning_rate": 9.952659382690892e-06, + "loss": 0.0851, + "step": 65125 + }, + { + "epoch": 3.04, + "learning_rate": 9.951875597636105e-06, + "loss": 0.1876, + "step": 65130 + }, + { + "epoch": 3.04, + "learning_rate": 9.951091812581318e-06, + "loss": 0.1328, + "step": 65135 + }, + { + "epoch": 3.04, + "learning_rate": 9.950308027526532e-06, + "loss": 0.1966, + "step": 65140 + }, + { + "epoch": 3.04, + "learning_rate": 9.949524242471746e-06, + "loss": 0.222, + "step": 65145 + }, + { + "epoch": 3.04, + "learning_rate": 9.94874045741696e-06, + "loss": 0.0636, + "step": 65150 + }, + { + "epoch": 3.04, + "learning_rate": 9.947956672362172e-06, + "loss": 0.055, + "step": 65155 + }, + { + "epoch": 3.04, + "learning_rate": 9.947172887307386e-06, + "loss": 0.0413, + "step": 65160 + }, + { + "epoch": 3.04, + "learning_rate": 9.9463891022526e-06, + "loss": 0.0801, + "step": 65165 + }, + { + "epoch": 3.04, + "learning_rate": 9.945605317197813e-06, + "loss": 0.081, + "step": 65170 + }, + { + "epoch": 3.04, + "learning_rate": 9.944821532143026e-06, + "loss": 0.0356, + "step": 65175 + }, + { + "epoch": 3.04, + "learning_rate": 9.94403774708824e-06, + "loss": 0.1325, + "step": 65180 + }, + { + "epoch": 3.04, + "learning_rate": 9.943253962033452e-06, + "loss": 0.1551, + "step": 65185 + }, + { + "epoch": 3.04, + "learning_rate": 9.942470176978666e-06, + "loss": 0.2802, + "step": 65190 + }, + { + "epoch": 3.04, + "learning_rate": 9.94168639192388e-06, + "loss": 0.4242, + "step": 65195 + }, + { + "epoch": 3.04, + "learning_rate": 9.940902606869093e-06, + "loss": 0.0853, + "step": 65200 + }, + { + "epoch": 3.04, + "learning_rate": 9.940118821814306e-06, + "loss": 0.0255, + "step": 65205 + }, + { + "epoch": 3.04, + "learning_rate": 9.93933503675952e-06, + "loss": 0.0622, + "step": 65210 + }, + { + "epoch": 3.04, + "learning_rate": 9.938551251704733e-06, + "loss": 0.0154, + "step": 65215 + }, + { + "epoch": 3.04, + "learning_rate": 9.937767466649947e-06, + "loss": 0.093, + "step": 65220 + }, + { + "epoch": 3.04, + "learning_rate": 9.93698368159516e-06, + "loss": 0.2092, + "step": 65225 + }, + { + "epoch": 3.04, + "learning_rate": 9.936199896540373e-06, + "loss": 0.0847, + "step": 65230 + }, + { + "epoch": 3.04, + "learning_rate": 9.935416111485587e-06, + "loss": 0.1413, + "step": 65235 + }, + { + "epoch": 3.04, + "learning_rate": 9.934632326430801e-06, + "loss": 0.2631, + "step": 65240 + }, + { + "epoch": 3.04, + "learning_rate": 9.933848541376015e-06, + "loss": 0.3179, + "step": 65245 + }, + { + "epoch": 3.04, + "learning_rate": 9.933064756321227e-06, + "loss": 0.0971, + "step": 65250 + }, + { + "epoch": 3.04, + "learning_rate": 9.93228097126644e-06, + "loss": 0.055, + "step": 65255 + }, + { + "epoch": 3.05, + "learning_rate": 9.931497186211653e-06, + "loss": 0.0182, + "step": 65260 + }, + { + "epoch": 3.05, + "learning_rate": 9.930713401156867e-06, + "loss": 0.1053, + "step": 65265 + }, + { + "epoch": 3.05, + "learning_rate": 9.929929616102081e-06, + "loss": 0.0475, + "step": 65270 + }, + { + "epoch": 3.05, + "learning_rate": 9.929145831047294e-06, + "loss": 0.0945, + "step": 65275 + }, + { + "epoch": 3.05, + "learning_rate": 9.928362045992507e-06, + "loss": 0.0834, + "step": 65280 + }, + { + "epoch": 3.05, + "learning_rate": 9.927578260937721e-06, + "loss": 0.1769, + "step": 65285 + }, + { + "epoch": 3.05, + "learning_rate": 9.926794475882935e-06, + "loss": 0.2825, + "step": 65290 + }, + { + "epoch": 3.05, + "learning_rate": 9.926010690828149e-06, + "loss": 0.2869, + "step": 65295 + }, + { + "epoch": 3.05, + "learning_rate": 9.925226905773361e-06, + "loss": 0.1088, + "step": 65300 + }, + { + "epoch": 3.05, + "learning_rate": 9.924443120718575e-06, + "loss": 0.035, + "step": 65305 + }, + { + "epoch": 3.05, + "learning_rate": 9.92365933566379e-06, + "loss": 0.0313, + "step": 65310 + }, + { + "epoch": 3.05, + "learning_rate": 9.922875550609001e-06, + "loss": 0.0684, + "step": 65315 + }, + { + "epoch": 3.05, + "learning_rate": 9.922091765554215e-06, + "loss": 0.0795, + "step": 65320 + }, + { + "epoch": 3.05, + "learning_rate": 9.921307980499427e-06, + "loss": 0.0704, + "step": 65325 + }, + { + "epoch": 3.05, + "learning_rate": 9.920524195444641e-06, + "loss": 0.125, + "step": 65330 + }, + { + "epoch": 3.05, + "learning_rate": 9.919740410389855e-06, + "loss": 0.1401, + "step": 65335 + }, + { + "epoch": 3.05, + "learning_rate": 9.91895662533507e-06, + "loss": 0.2818, + "step": 65340 + }, + { + "epoch": 3.05, + "learning_rate": 9.918172840280283e-06, + "loss": 0.2141, + "step": 65345 + }, + { + "epoch": 3.05, + "learning_rate": 9.917389055225495e-06, + "loss": 0.1145, + "step": 65350 + }, + { + "epoch": 3.05, + "learning_rate": 9.91660527017071e-06, + "loss": 0.0228, + "step": 65355 + }, + { + "epoch": 3.05, + "learning_rate": 9.915821485115923e-06, + "loss": 0.0151, + "step": 65360 + }, + { + "epoch": 3.05, + "learning_rate": 9.915037700061137e-06, + "loss": 0.0571, + "step": 65365 + }, + { + "epoch": 3.05, + "learning_rate": 9.91425391500635e-06, + "loss": 0.0924, + "step": 65370 + }, + { + "epoch": 3.05, + "learning_rate": 9.913470129951563e-06, + "loss": 0.0435, + "step": 65375 + }, + { + "epoch": 3.05, + "learning_rate": 9.912686344896775e-06, + "loss": 0.1203, + "step": 65380 + }, + { + "epoch": 3.05, + "learning_rate": 9.91190255984199e-06, + "loss": 0.1247, + "step": 65385 + }, + { + "epoch": 3.05, + "learning_rate": 9.911118774787203e-06, + "loss": 0.1618, + "step": 65390 + }, + { + "epoch": 3.05, + "learning_rate": 9.910334989732417e-06, + "loss": 0.2363, + "step": 65395 + }, + { + "epoch": 3.05, + "learning_rate": 9.90955120467763e-06, + "loss": 0.0811, + "step": 65400 + }, + { + "epoch": 3.05, + "learning_rate": 9.908767419622843e-06, + "loss": 0.0359, + "step": 65405 + }, + { + "epoch": 3.05, + "learning_rate": 9.907983634568057e-06, + "loss": 0.0697, + "step": 65410 + }, + { + "epoch": 3.05, + "learning_rate": 9.907199849513271e-06, + "loss": 0.0408, + "step": 65415 + }, + { + "epoch": 3.05, + "learning_rate": 9.906416064458483e-06, + "loss": 0.0404, + "step": 65420 + }, + { + "epoch": 3.05, + "learning_rate": 9.905632279403697e-06, + "loss": 0.1271, + "step": 65425 + }, + { + "epoch": 3.05, + "learning_rate": 9.904848494348911e-06, + "loss": 0.149, + "step": 65430 + }, + { + "epoch": 3.05, + "learning_rate": 9.904064709294125e-06, + "loss": 0.1755, + "step": 65435 + }, + { + "epoch": 3.05, + "learning_rate": 9.903280924239337e-06, + "loss": 0.2316, + "step": 65440 + }, + { + "epoch": 3.05, + "learning_rate": 9.902497139184551e-06, + "loss": 0.2651, + "step": 65445 + }, + { + "epoch": 3.05, + "learning_rate": 9.901713354129763e-06, + "loss": 0.0487, + "step": 65450 + }, + { + "epoch": 3.05, + "learning_rate": 9.900929569074977e-06, + "loss": 0.0474, + "step": 65455 + }, + { + "epoch": 3.05, + "learning_rate": 9.900145784020191e-06, + "loss": 0.0345, + "step": 65460 + }, + { + "epoch": 3.05, + "learning_rate": 9.899361998965405e-06, + "loss": 0.0674, + "step": 65465 + }, + { + "epoch": 3.05, + "learning_rate": 9.898578213910617e-06, + "loss": 0.1199, + "step": 65470 + }, + { + "epoch": 3.06, + "learning_rate": 9.897794428855831e-06, + "loss": 0.075, + "step": 65475 + }, + { + "epoch": 3.06, + "learning_rate": 9.897010643801045e-06, + "loss": 0.0844, + "step": 65480 + }, + { + "epoch": 3.06, + "learning_rate": 9.896226858746259e-06, + "loss": 0.1378, + "step": 65485 + }, + { + "epoch": 3.06, + "learning_rate": 9.895443073691471e-06, + "loss": 0.2872, + "step": 65490 + }, + { + "epoch": 3.06, + "learning_rate": 9.894659288636685e-06, + "loss": 0.276, + "step": 65495 + }, + { + "epoch": 3.06, + "learning_rate": 9.893875503581899e-06, + "loss": 0.1098, + "step": 65500 + }, + { + "epoch": 3.06, + "learning_rate": 9.893091718527113e-06, + "loss": 0.0231, + "step": 65505 + }, + { + "epoch": 3.06, + "learning_rate": 9.892307933472325e-06, + "loss": 0.0576, + "step": 65510 + }, + { + "epoch": 3.06, + "learning_rate": 9.891524148417539e-06, + "loss": 0.1021, + "step": 65515 + }, + { + "epoch": 3.06, + "learning_rate": 9.890740363362751e-06, + "loss": 0.1098, + "step": 65520 + }, + { + "epoch": 3.06, + "learning_rate": 9.889956578307965e-06, + "loss": 0.0809, + "step": 65525 + }, + { + "epoch": 3.06, + "learning_rate": 9.889172793253179e-06, + "loss": 0.0706, + "step": 65530 + }, + { + "epoch": 3.06, + "learning_rate": 9.888389008198393e-06, + "loss": 0.1328, + "step": 65535 + }, + { + "epoch": 3.06, + "learning_rate": 9.887605223143605e-06, + "loss": 0.1188, + "step": 65540 + }, + { + "epoch": 3.06, + "learning_rate": 9.886821438088819e-06, + "loss": 0.2656, + "step": 65545 + }, + { + "epoch": 3.06, + "learning_rate": 9.886037653034033e-06, + "loss": 0.1282, + "step": 65550 + }, + { + "epoch": 3.06, + "learning_rate": 9.885253867979247e-06, + "loss": 0.0373, + "step": 65555 + }, + { + "epoch": 3.06, + "learning_rate": 9.88447008292446e-06, + "loss": 0.0479, + "step": 65560 + }, + { + "epoch": 3.06, + "learning_rate": 9.883686297869673e-06, + "loss": 0.0388, + "step": 65565 + }, + { + "epoch": 3.06, + "learning_rate": 9.882902512814887e-06, + "loss": 0.0458, + "step": 65570 + }, + { + "epoch": 3.06, + "learning_rate": 9.882118727760099e-06, + "loss": 0.06, + "step": 65575 + }, + { + "epoch": 3.06, + "learning_rate": 9.881334942705313e-06, + "loss": 0.1196, + "step": 65580 + }, + { + "epoch": 3.06, + "learning_rate": 9.880551157650527e-06, + "loss": 0.2084, + "step": 65585 + }, + { + "epoch": 3.06, + "learning_rate": 9.879767372595739e-06, + "loss": 0.247, + "step": 65590 + }, + { + "epoch": 3.06, + "learning_rate": 9.878983587540953e-06, + "loss": 0.3117, + "step": 65595 + }, + { + "epoch": 3.06, + "learning_rate": 9.878199802486167e-06, + "loss": 0.1258, + "step": 65600 + }, + { + "epoch": 3.06, + "learning_rate": 9.87741601743138e-06, + "loss": 0.0392, + "step": 65605 + }, + { + "epoch": 3.06, + "learning_rate": 9.876632232376595e-06, + "loss": 0.0395, + "step": 65610 + }, + { + "epoch": 3.06, + "learning_rate": 9.875848447321807e-06, + "loss": 0.0822, + "step": 65615 + }, + { + "epoch": 3.06, + "learning_rate": 9.87506466226702e-06, + "loss": 0.1316, + "step": 65620 + }, + { + "epoch": 3.06, + "learning_rate": 9.874280877212235e-06, + "loss": 0.0738, + "step": 65625 + }, + { + "epoch": 3.06, + "learning_rate": 9.873497092157449e-06, + "loss": 0.1253, + "step": 65630 + }, + { + "epoch": 3.06, + "learning_rate": 9.872713307102661e-06, + "loss": 0.1578, + "step": 65635 + }, + { + "epoch": 3.06, + "learning_rate": 9.871929522047875e-06, + "loss": 0.2201, + "step": 65640 + }, + { + "epoch": 3.06, + "learning_rate": 9.871145736993087e-06, + "loss": 0.2527, + "step": 65645 + }, + { + "epoch": 3.06, + "learning_rate": 9.870361951938301e-06, + "loss": 0.0481, + "step": 65650 + }, + { + "epoch": 3.06, + "learning_rate": 9.869578166883515e-06, + "loss": 0.0439, + "step": 65655 + }, + { + "epoch": 3.06, + "learning_rate": 9.868794381828729e-06, + "loss": 0.0479, + "step": 65660 + }, + { + "epoch": 3.06, + "learning_rate": 9.868010596773941e-06, + "loss": 0.0606, + "step": 65665 + }, + { + "epoch": 3.06, + "learning_rate": 9.867226811719155e-06, + "loss": 0.0551, + "step": 65670 + }, + { + "epoch": 3.06, + "learning_rate": 9.866443026664369e-06, + "loss": 0.1018, + "step": 65675 + }, + { + "epoch": 3.06, + "learning_rate": 9.865659241609583e-06, + "loss": 0.0722, + "step": 65680 + }, + { + "epoch": 3.06, + "learning_rate": 9.864875456554795e-06, + "loss": 0.1421, + "step": 65685 + }, + { + "epoch": 3.07, + "learning_rate": 9.864091671500009e-06, + "loss": 0.2154, + "step": 65690 + }, + { + "epoch": 3.07, + "learning_rate": 9.863307886445223e-06, + "loss": 0.3339, + "step": 65695 + }, + { + "epoch": 3.07, + "learning_rate": 9.862524101390437e-06, + "loss": 0.0759, + "step": 65700 + }, + { + "epoch": 3.07, + "learning_rate": 9.861740316335649e-06, + "loss": 0.0287, + "step": 65705 + }, + { + "epoch": 3.07, + "learning_rate": 9.860956531280863e-06, + "loss": 0.096, + "step": 65710 + }, + { + "epoch": 3.07, + "learning_rate": 9.860172746226075e-06, + "loss": 0.0382, + "step": 65715 + }, + { + "epoch": 3.07, + "learning_rate": 9.859388961171289e-06, + "loss": 0.0794, + "step": 65720 + }, + { + "epoch": 3.07, + "learning_rate": 9.858605176116503e-06, + "loss": 0.1148, + "step": 65725 + }, + { + "epoch": 3.07, + "learning_rate": 9.857821391061717e-06, + "loss": 0.1004, + "step": 65730 + }, + { + "epoch": 3.07, + "learning_rate": 9.857037606006929e-06, + "loss": 0.1558, + "step": 65735 + }, + { + "epoch": 3.07, + "learning_rate": 9.856253820952143e-06, + "loss": 0.1553, + "step": 65740 + }, + { + "epoch": 3.07, + "learning_rate": 9.855470035897357e-06, + "loss": 0.3653, + "step": 65745 + }, + { + "epoch": 3.07, + "learning_rate": 9.85468625084257e-06, + "loss": 0.0707, + "step": 65750 + }, + { + "epoch": 3.07, + "learning_rate": 9.853902465787783e-06, + "loss": 0.0303, + "step": 65755 + }, + { + "epoch": 3.07, + "learning_rate": 9.853118680732997e-06, + "loss": 0.0592, + "step": 65760 + }, + { + "epoch": 3.07, + "learning_rate": 9.85233489567821e-06, + "loss": 0.0684, + "step": 65765 + }, + { + "epoch": 3.07, + "learning_rate": 9.851551110623423e-06, + "loss": 0.073, + "step": 65770 + }, + { + "epoch": 3.07, + "learning_rate": 9.850767325568637e-06, + "loss": 0.085, + "step": 65775 + }, + { + "epoch": 3.07, + "learning_rate": 9.84998354051385e-06, + "loss": 0.1083, + "step": 65780 + }, + { + "epoch": 3.07, + "learning_rate": 9.849199755459063e-06, + "loss": 0.1539, + "step": 65785 + }, + { + "epoch": 3.07, + "learning_rate": 9.848415970404277e-06, + "loss": 0.1221, + "step": 65790 + }, + { + "epoch": 3.07, + "learning_rate": 9.84763218534949e-06, + "loss": 0.3577, + "step": 65795 + }, + { + "epoch": 3.07, + "learning_rate": 9.846848400294704e-06, + "loss": 0.12, + "step": 65800 + }, + { + "epoch": 3.07, + "learning_rate": 9.846064615239917e-06, + "loss": 0.022, + "step": 65805 + }, + { + "epoch": 3.07, + "learning_rate": 9.84528083018513e-06, + "loss": 0.082, + "step": 65810 + }, + { + "epoch": 3.07, + "learning_rate": 9.844497045130345e-06, + "loss": 0.0643, + "step": 65815 + }, + { + "epoch": 3.07, + "learning_rate": 9.843713260075558e-06, + "loss": 0.0713, + "step": 65820 + }, + { + "epoch": 3.07, + "learning_rate": 9.842929475020772e-06, + "loss": 0.0629, + "step": 65825 + }, + { + "epoch": 3.07, + "learning_rate": 9.842145689965985e-06, + "loss": 0.1637, + "step": 65830 + }, + { + "epoch": 3.07, + "learning_rate": 9.841361904911197e-06, + "loss": 0.1241, + "step": 65835 + }, + { + "epoch": 3.07, + "learning_rate": 9.84057811985641e-06, + "loss": 0.2507, + "step": 65840 + }, + { + "epoch": 3.07, + "learning_rate": 9.839794334801625e-06, + "loss": 0.4136, + "step": 65845 + }, + { + "epoch": 3.07, + "learning_rate": 9.839010549746838e-06, + "loss": 0.0744, + "step": 65850 + }, + { + "epoch": 3.07, + "learning_rate": 9.83822676469205e-06, + "loss": 0.0148, + "step": 65855 + }, + { + "epoch": 3.07, + "learning_rate": 9.837442979637265e-06, + "loss": 0.0641, + "step": 65860 + }, + { + "epoch": 3.07, + "learning_rate": 9.836659194582478e-06, + "loss": 0.0579, + "step": 65865 + }, + { + "epoch": 3.07, + "learning_rate": 9.835875409527692e-06, + "loss": 0.0924, + "step": 65870 + }, + { + "epoch": 3.07, + "learning_rate": 9.835091624472906e-06, + "loss": 0.0568, + "step": 65875 + }, + { + "epoch": 3.07, + "learning_rate": 9.834307839418119e-06, + "loss": 0.1328, + "step": 65880 + }, + { + "epoch": 3.07, + "learning_rate": 9.833524054363332e-06, + "loss": 0.2094, + "step": 65885 + }, + { + "epoch": 3.07, + "learning_rate": 9.832740269308546e-06, + "loss": 0.1356, + "step": 65890 + }, + { + "epoch": 3.07, + "learning_rate": 9.83195648425376e-06, + "loss": 0.3647, + "step": 65895 + }, + { + "epoch": 3.07, + "learning_rate": 9.831172699198972e-06, + "loss": 0.0792, + "step": 65900 + }, + { + "epoch": 3.08, + "learning_rate": 9.830388914144185e-06, + "loss": 0.0556, + "step": 65905 + }, + { + "epoch": 3.08, + "learning_rate": 9.829605129089399e-06, + "loss": 0.0496, + "step": 65910 + }, + { + "epoch": 3.08, + "learning_rate": 9.828821344034612e-06, + "loss": 0.0758, + "step": 65915 + }, + { + "epoch": 3.08, + "learning_rate": 9.828037558979826e-06, + "loss": 0.0409, + "step": 65920 + }, + { + "epoch": 3.08, + "learning_rate": 9.82725377392504e-06, + "loss": 0.053, + "step": 65925 + }, + { + "epoch": 3.08, + "learning_rate": 9.826469988870252e-06, + "loss": 0.0676, + "step": 65930 + }, + { + "epoch": 3.08, + "learning_rate": 9.825686203815466e-06, + "loss": 0.1414, + "step": 65935 + }, + { + "epoch": 3.08, + "learning_rate": 9.82490241876068e-06, + "loss": 0.233, + "step": 65940 + }, + { + "epoch": 3.08, + "learning_rate": 9.824118633705894e-06, + "loss": 0.2629, + "step": 65945 + }, + { + "epoch": 3.08, + "learning_rate": 9.823334848651106e-06, + "loss": 0.0594, + "step": 65950 + }, + { + "epoch": 3.08, + "learning_rate": 9.82255106359632e-06, + "loss": 0.0329, + "step": 65955 + }, + { + "epoch": 3.08, + "learning_rate": 9.821767278541534e-06, + "loss": 0.0585, + "step": 65960 + }, + { + "epoch": 3.08, + "learning_rate": 9.820983493486746e-06, + "loss": 0.0716, + "step": 65965 + }, + { + "epoch": 3.08, + "learning_rate": 9.82019970843196e-06, + "loss": 0.0227, + "step": 65970 + }, + { + "epoch": 3.08, + "learning_rate": 9.819415923377174e-06, + "loss": 0.1145, + "step": 65975 + }, + { + "epoch": 3.08, + "learning_rate": 9.818632138322386e-06, + "loss": 0.0928, + "step": 65980 + }, + { + "epoch": 3.08, + "learning_rate": 9.8178483532676e-06, + "loss": 0.1671, + "step": 65985 + }, + { + "epoch": 3.08, + "learning_rate": 9.817064568212814e-06, + "loss": 0.3238, + "step": 65990 + }, + { + "epoch": 3.08, + "learning_rate": 9.816280783158028e-06, + "loss": 0.2865, + "step": 65995 + }, + { + "epoch": 3.08, + "learning_rate": 9.81549699810324e-06, + "loss": 0.0648, + "step": 66000 + }, + { + "epoch": 3.08, + "learning_rate": 9.814713213048454e-06, + "loss": 0.0084, + "step": 66005 + }, + { + "epoch": 3.08, + "learning_rate": 9.813929427993668e-06, + "loss": 0.0627, + "step": 66010 + }, + { + "epoch": 3.08, + "learning_rate": 9.813145642938882e-06, + "loss": 0.0375, + "step": 66015 + }, + { + "epoch": 3.08, + "learning_rate": 9.812361857884094e-06, + "loss": 0.0965, + "step": 66020 + }, + { + "epoch": 3.08, + "learning_rate": 9.811578072829308e-06, + "loss": 0.0915, + "step": 66025 + }, + { + "epoch": 3.08, + "learning_rate": 9.81079428777452e-06, + "loss": 0.1729, + "step": 66030 + }, + { + "epoch": 3.08, + "learning_rate": 9.810010502719734e-06, + "loss": 0.0954, + "step": 66035 + }, + { + "epoch": 3.08, + "learning_rate": 9.809226717664948e-06, + "loss": 0.1511, + "step": 66040 + }, + { + "epoch": 3.08, + "learning_rate": 9.808442932610162e-06, + "loss": 0.2462, + "step": 66045 + }, + { + "epoch": 3.08, + "learning_rate": 9.807659147555374e-06, + "loss": 0.0609, + "step": 66050 + }, + { + "epoch": 3.08, + "learning_rate": 9.806875362500588e-06, + "loss": 0.063, + "step": 66055 + }, + { + "epoch": 3.08, + "learning_rate": 9.806091577445802e-06, + "loss": 0.042, + "step": 66060 + }, + { + "epoch": 3.08, + "learning_rate": 9.805307792391016e-06, + "loss": 0.0742, + "step": 66065 + }, + { + "epoch": 3.08, + "learning_rate": 9.804524007336228e-06, + "loss": 0.0461, + "step": 66070 + }, + { + "epoch": 3.08, + "learning_rate": 9.803740222281442e-06, + "loss": 0.1103, + "step": 66075 + }, + { + "epoch": 3.08, + "learning_rate": 9.802956437226656e-06, + "loss": 0.1392, + "step": 66080 + }, + { + "epoch": 3.08, + "learning_rate": 9.80217265217187e-06, + "loss": 0.112, + "step": 66085 + }, + { + "epoch": 3.08, + "learning_rate": 9.801388867117084e-06, + "loss": 0.1597, + "step": 66090 + }, + { + "epoch": 3.08, + "learning_rate": 9.800605082062296e-06, + "loss": 0.3931, + "step": 66095 + }, + { + "epoch": 3.08, + "learning_rate": 9.799821297007508e-06, + "loss": 0.038, + "step": 66100 + }, + { + "epoch": 3.08, + "learning_rate": 9.799037511952722e-06, + "loss": 0.0841, + "step": 66105 + }, + { + "epoch": 3.08, + "learning_rate": 9.798253726897936e-06, + "loss": 0.0283, + "step": 66110 + }, + { + "epoch": 3.09, + "learning_rate": 9.79746994184315e-06, + "loss": 0.0556, + "step": 66115 + }, + { + "epoch": 3.09, + "learning_rate": 9.796686156788362e-06, + "loss": 0.0488, + "step": 66120 + }, + { + "epoch": 3.09, + "learning_rate": 9.795902371733576e-06, + "loss": 0.0714, + "step": 66125 + }, + { + "epoch": 3.09, + "learning_rate": 9.79511858667879e-06, + "loss": 0.0883, + "step": 66130 + }, + { + "epoch": 3.09, + "learning_rate": 9.794334801624004e-06, + "loss": 0.0689, + "step": 66135 + }, + { + "epoch": 3.09, + "learning_rate": 9.793551016569218e-06, + "loss": 0.2007, + "step": 66140 + }, + { + "epoch": 3.09, + "learning_rate": 9.79276723151443e-06, + "loss": 0.3548, + "step": 66145 + }, + { + "epoch": 3.09, + "learning_rate": 9.791983446459644e-06, + "loss": 0.0955, + "step": 66150 + }, + { + "epoch": 3.09, + "learning_rate": 9.791199661404858e-06, + "loss": 0.0395, + "step": 66155 + }, + { + "epoch": 3.09, + "learning_rate": 9.79041587635007e-06, + "loss": 0.0361, + "step": 66160 + }, + { + "epoch": 3.09, + "learning_rate": 9.789632091295284e-06, + "loss": 0.0398, + "step": 66165 + }, + { + "epoch": 3.09, + "learning_rate": 9.788848306240496e-06, + "loss": 0.0547, + "step": 66170 + }, + { + "epoch": 3.09, + "learning_rate": 9.78806452118571e-06, + "loss": 0.0618, + "step": 66175 + }, + { + "epoch": 3.09, + "learning_rate": 9.787280736130924e-06, + "loss": 0.1462, + "step": 66180 + }, + { + "epoch": 3.09, + "learning_rate": 9.786496951076138e-06, + "loss": 0.1491, + "step": 66185 + }, + { + "epoch": 3.09, + "learning_rate": 9.785713166021352e-06, + "loss": 0.177, + "step": 66190 + }, + { + "epoch": 3.09, + "learning_rate": 9.784929380966564e-06, + "loss": 0.2986, + "step": 66195 + }, + { + "epoch": 3.09, + "learning_rate": 9.784145595911778e-06, + "loss": 0.0846, + "step": 66200 + }, + { + "epoch": 3.09, + "learning_rate": 9.783361810856992e-06, + "loss": 0.0278, + "step": 66205 + }, + { + "epoch": 3.09, + "learning_rate": 9.782578025802206e-06, + "loss": 0.0166, + "step": 66210 + }, + { + "epoch": 3.09, + "learning_rate": 9.781794240747418e-06, + "loss": 0.051, + "step": 66215 + }, + { + "epoch": 3.09, + "learning_rate": 9.781010455692632e-06, + "loss": 0.0734, + "step": 66220 + }, + { + "epoch": 3.09, + "learning_rate": 9.780226670637844e-06, + "loss": 0.0638, + "step": 66225 + }, + { + "epoch": 3.09, + "learning_rate": 9.779442885583058e-06, + "loss": 0.0957, + "step": 66230 + }, + { + "epoch": 3.09, + "learning_rate": 9.778659100528272e-06, + "loss": 0.1582, + "step": 66235 + }, + { + "epoch": 3.09, + "learning_rate": 9.777875315473486e-06, + "loss": 0.2031, + "step": 66240 + }, + { + "epoch": 3.09, + "learning_rate": 9.777091530418698e-06, + "loss": 0.3371, + "step": 66245 + }, + { + "epoch": 3.09, + "learning_rate": 9.776307745363912e-06, + "loss": 0.0844, + "step": 66250 + }, + { + "epoch": 3.09, + "learning_rate": 9.775523960309126e-06, + "loss": 0.0135, + "step": 66255 + }, + { + "epoch": 3.09, + "learning_rate": 9.77474017525434e-06, + "loss": 0.0855, + "step": 66260 + }, + { + "epoch": 3.09, + "learning_rate": 9.773956390199552e-06, + "loss": 0.0722, + "step": 66265 + }, + { + "epoch": 3.09, + "learning_rate": 9.773172605144766e-06, + "loss": 0.1067, + "step": 66270 + }, + { + "epoch": 3.09, + "learning_rate": 9.77238882008998e-06, + "loss": 0.118, + "step": 66275 + }, + { + "epoch": 3.09, + "learning_rate": 9.771605035035194e-06, + "loss": 0.193, + "step": 66280 + }, + { + "epoch": 3.09, + "learning_rate": 9.770821249980406e-06, + "loss": 0.172, + "step": 66285 + }, + { + "epoch": 3.09, + "learning_rate": 9.77003746492562e-06, + "loss": 0.2177, + "step": 66290 + }, + { + "epoch": 3.09, + "learning_rate": 9.769253679870832e-06, + "loss": 0.46, + "step": 66295 + }, + { + "epoch": 3.09, + "learning_rate": 9.768469894816046e-06, + "loss": 0.085, + "step": 66300 + }, + { + "epoch": 3.09, + "learning_rate": 9.76768610976126e-06, + "loss": 0.027, + "step": 66305 + }, + { + "epoch": 3.09, + "learning_rate": 9.766902324706474e-06, + "loss": 0.0324, + "step": 66310 + }, + { + "epoch": 3.09, + "learning_rate": 9.766118539651686e-06, + "loss": 0.0424, + "step": 66315 + }, + { + "epoch": 3.09, + "learning_rate": 9.7653347545969e-06, + "loss": 0.1014, + "step": 66320 + }, + { + "epoch": 3.09, + "learning_rate": 9.764550969542114e-06, + "loss": 0.1147, + "step": 66325 + }, + { + "epoch": 3.1, + "learning_rate": 9.763767184487328e-06, + "loss": 0.1188, + "step": 66330 + }, + { + "epoch": 3.1, + "learning_rate": 9.76298339943254e-06, + "loss": 0.2082, + "step": 66335 + }, + { + "epoch": 3.1, + "learning_rate": 9.762199614377754e-06, + "loss": 0.2368, + "step": 66340 + }, + { + "epoch": 3.1, + "learning_rate": 9.761415829322968e-06, + "loss": 0.2593, + "step": 66345 + }, + { + "epoch": 3.1, + "learning_rate": 9.760632044268182e-06, + "loss": 0.0695, + "step": 66350 + }, + { + "epoch": 3.1, + "learning_rate": 9.759848259213394e-06, + "loss": 0.0248, + "step": 66355 + }, + { + "epoch": 3.1, + "learning_rate": 9.759064474158608e-06, + "loss": 0.048, + "step": 66360 + }, + { + "epoch": 3.1, + "learning_rate": 9.75828068910382e-06, + "loss": 0.0906, + "step": 66365 + }, + { + "epoch": 3.1, + "learning_rate": 9.757496904049034e-06, + "loss": 0.1016, + "step": 66370 + }, + { + "epoch": 3.1, + "learning_rate": 9.756713118994248e-06, + "loss": 0.0903, + "step": 66375 + }, + { + "epoch": 3.1, + "learning_rate": 9.755929333939462e-06, + "loss": 0.1557, + "step": 66380 + }, + { + "epoch": 3.1, + "learning_rate": 9.755145548884674e-06, + "loss": 0.2118, + "step": 66385 + }, + { + "epoch": 3.1, + "learning_rate": 9.754361763829888e-06, + "loss": 0.3594, + "step": 66390 + }, + { + "epoch": 3.1, + "learning_rate": 9.753577978775102e-06, + "loss": 0.3155, + "step": 66395 + }, + { + "epoch": 3.1, + "learning_rate": 9.752794193720316e-06, + "loss": 0.0644, + "step": 66400 + }, + { + "epoch": 3.1, + "learning_rate": 9.75201040866553e-06, + "loss": 0.0568, + "step": 66405 + }, + { + "epoch": 3.1, + "learning_rate": 9.751226623610742e-06, + "loss": 0.0713, + "step": 66410 + }, + { + "epoch": 3.1, + "learning_rate": 9.750442838555956e-06, + "loss": 0.0646, + "step": 66415 + }, + { + "epoch": 3.1, + "learning_rate": 9.749659053501168e-06, + "loss": 0.0589, + "step": 66420 + }, + { + "epoch": 3.1, + "learning_rate": 9.748875268446382e-06, + "loss": 0.1199, + "step": 66425 + }, + { + "epoch": 3.1, + "learning_rate": 9.748091483391596e-06, + "loss": 0.0658, + "step": 66430 + }, + { + "epoch": 3.1, + "learning_rate": 9.747307698336808e-06, + "loss": 0.1286, + "step": 66435 + }, + { + "epoch": 3.1, + "learning_rate": 9.746523913282022e-06, + "loss": 0.2039, + "step": 66440 + }, + { + "epoch": 3.1, + "learning_rate": 9.745740128227236e-06, + "loss": 0.3109, + "step": 66445 + }, + { + "epoch": 3.1, + "learning_rate": 9.74495634317245e-06, + "loss": 0.0421, + "step": 66450 + }, + { + "epoch": 3.1, + "learning_rate": 9.744172558117663e-06, + "loss": 0.0413, + "step": 66455 + }, + { + "epoch": 3.1, + "learning_rate": 9.743388773062876e-06, + "loss": 0.0249, + "step": 66460 + }, + { + "epoch": 3.1, + "learning_rate": 9.74260498800809e-06, + "loss": 0.0545, + "step": 66465 + }, + { + "epoch": 3.1, + "learning_rate": 9.741821202953303e-06, + "loss": 0.099, + "step": 66470 + }, + { + "epoch": 3.1, + "learning_rate": 9.741037417898517e-06, + "loss": 0.0948, + "step": 66475 + }, + { + "epoch": 3.1, + "learning_rate": 9.74025363284373e-06, + "loss": 0.0998, + "step": 66480 + }, + { + "epoch": 3.1, + "learning_rate": 9.739469847788942e-06, + "loss": 0.1262, + "step": 66485 + }, + { + "epoch": 3.1, + "learning_rate": 9.738686062734156e-06, + "loss": 0.1912, + "step": 66490 + }, + { + "epoch": 3.1, + "learning_rate": 9.73790227767937e-06, + "loss": 0.2891, + "step": 66495 + }, + { + "epoch": 3.1, + "learning_rate": 9.737118492624584e-06, + "loss": 0.0749, + "step": 66500 + }, + { + "epoch": 3.1, + "learning_rate": 9.736334707569797e-06, + "loss": 0.1176, + "step": 66505 + }, + { + "epoch": 3.1, + "learning_rate": 9.73555092251501e-06, + "loss": 0.0257, + "step": 66510 + }, + { + "epoch": 3.1, + "learning_rate": 9.734767137460224e-06, + "loss": 0.0613, + "step": 66515 + }, + { + "epoch": 3.1, + "learning_rate": 9.733983352405437e-06, + "loss": 0.1069, + "step": 66520 + }, + { + "epoch": 3.1, + "learning_rate": 9.733199567350651e-06, + "loss": 0.0754, + "step": 66525 + }, + { + "epoch": 3.1, + "learning_rate": 9.732415782295864e-06, + "loss": 0.0548, + "step": 66530 + }, + { + "epoch": 3.1, + "learning_rate": 9.731631997241077e-06, + "loss": 0.1278, + "step": 66535 + }, + { + "epoch": 3.1, + "learning_rate": 9.730848212186291e-06, + "loss": 0.1902, + "step": 66540 + }, + { + "epoch": 3.11, + "learning_rate": 9.730064427131505e-06, + "loss": 0.3116, + "step": 66545 + }, + { + "epoch": 3.11, + "learning_rate": 9.729280642076718e-06, + "loss": 0.0847, + "step": 66550 + }, + { + "epoch": 3.11, + "learning_rate": 9.728496857021931e-06, + "loss": 0.0388, + "step": 66555 + }, + { + "epoch": 3.11, + "learning_rate": 9.727713071967144e-06, + "loss": 0.0235, + "step": 66560 + }, + { + "epoch": 3.11, + "learning_rate": 9.726929286912358e-06, + "loss": 0.0432, + "step": 66565 + }, + { + "epoch": 3.11, + "learning_rate": 9.726145501857571e-06, + "loss": 0.0687, + "step": 66570 + }, + { + "epoch": 3.11, + "learning_rate": 9.725361716802785e-06, + "loss": 0.1861, + "step": 66575 + }, + { + "epoch": 3.11, + "learning_rate": 9.724577931747998e-06, + "loss": 0.1056, + "step": 66580 + }, + { + "epoch": 3.11, + "learning_rate": 9.723794146693211e-06, + "loss": 0.1404, + "step": 66585 + }, + { + "epoch": 3.11, + "learning_rate": 9.723010361638425e-06, + "loss": 0.2881, + "step": 66590 + }, + { + "epoch": 3.11, + "learning_rate": 9.72222657658364e-06, + "loss": 0.2303, + "step": 66595 + }, + { + "epoch": 3.11, + "learning_rate": 9.721442791528851e-06, + "loss": 0.0623, + "step": 66600 + }, + { + "epoch": 3.11, + "learning_rate": 9.720659006474065e-06, + "loss": 0.016, + "step": 66605 + }, + { + "epoch": 3.11, + "learning_rate": 9.71987522141928e-06, + "loss": 0.0418, + "step": 66610 + }, + { + "epoch": 3.11, + "learning_rate": 9.719091436364492e-06, + "loss": 0.0622, + "step": 66615 + }, + { + "epoch": 3.11, + "learning_rate": 9.718307651309705e-06, + "loss": 0.163, + "step": 66620 + }, + { + "epoch": 3.11, + "learning_rate": 9.71752386625492e-06, + "loss": 0.1038, + "step": 66625 + }, + { + "epoch": 3.11, + "learning_rate": 9.716740081200132e-06, + "loss": 0.1165, + "step": 66630 + }, + { + "epoch": 3.11, + "learning_rate": 9.715956296145345e-06, + "loss": 0.0962, + "step": 66635 + }, + { + "epoch": 3.11, + "learning_rate": 9.71517251109056e-06, + "loss": 0.164, + "step": 66640 + }, + { + "epoch": 3.11, + "learning_rate": 9.714388726035773e-06, + "loss": 0.4056, + "step": 66645 + }, + { + "epoch": 3.11, + "learning_rate": 9.713604940980985e-06, + "loss": 0.0518, + "step": 66650 + }, + { + "epoch": 3.11, + "learning_rate": 9.7128211559262e-06, + "loss": 0.069, + "step": 66655 + }, + { + "epoch": 3.11, + "learning_rate": 9.712037370871413e-06, + "loss": 0.0428, + "step": 66660 + }, + { + "epoch": 3.11, + "learning_rate": 9.711253585816627e-06, + "loss": 0.1114, + "step": 66665 + }, + { + "epoch": 3.11, + "learning_rate": 9.710469800761841e-06, + "loss": 0.0601, + "step": 66670 + }, + { + "epoch": 3.11, + "learning_rate": 9.709686015707053e-06, + "loss": 0.0506, + "step": 66675 + }, + { + "epoch": 3.11, + "learning_rate": 9.708902230652266e-06, + "loss": 0.1785, + "step": 66680 + }, + { + "epoch": 3.11, + "learning_rate": 9.70811844559748e-06, + "loss": 0.198, + "step": 66685 + }, + { + "epoch": 3.11, + "learning_rate": 9.707334660542693e-06, + "loss": 0.2683, + "step": 66690 + }, + { + "epoch": 3.11, + "learning_rate": 9.706550875487907e-06, + "loss": 0.3954, + "step": 66695 + }, + { + "epoch": 3.11, + "learning_rate": 9.70576709043312e-06, + "loss": 0.081, + "step": 66700 + }, + { + "epoch": 3.11, + "learning_rate": 9.704983305378333e-06, + "loss": 0.0303, + "step": 66705 + }, + { + "epoch": 3.11, + "learning_rate": 9.704199520323547e-06, + "loss": 0.0428, + "step": 66710 + }, + { + "epoch": 3.11, + "learning_rate": 9.703415735268761e-06, + "loss": 0.0387, + "step": 66715 + }, + { + "epoch": 3.11, + "learning_rate": 9.702631950213975e-06, + "loss": 0.0866, + "step": 66720 + }, + { + "epoch": 3.11, + "learning_rate": 9.701848165159187e-06, + "loss": 0.1363, + "step": 66725 + }, + { + "epoch": 3.11, + "learning_rate": 9.701064380104401e-06, + "loss": 0.208, + "step": 66730 + }, + { + "epoch": 3.11, + "learning_rate": 9.700280595049615e-06, + "loss": 0.1501, + "step": 66735 + }, + { + "epoch": 3.11, + "learning_rate": 9.699496809994829e-06, + "loss": 0.1701, + "step": 66740 + }, + { + "epoch": 3.11, + "learning_rate": 9.698713024940041e-06, + "loss": 0.23, + "step": 66745 + }, + { + "epoch": 3.11, + "learning_rate": 9.697929239885253e-06, + "loss": 0.0608, + "step": 66750 + }, + { + "epoch": 3.11, + "learning_rate": 9.697145454830467e-06, + "loss": 0.0249, + "step": 66755 + }, + { + "epoch": 3.12, + "learning_rate": 9.696361669775681e-06, + "loss": 0.0245, + "step": 66760 + }, + { + "epoch": 3.12, + "learning_rate": 9.695577884720895e-06, + "loss": 0.0743, + "step": 66765 + }, + { + "epoch": 3.12, + "learning_rate": 9.694794099666109e-06, + "loss": 0.0734, + "step": 66770 + }, + { + "epoch": 3.12, + "learning_rate": 9.694010314611321e-06, + "loss": 0.0864, + "step": 66775 + }, + { + "epoch": 3.12, + "learning_rate": 9.693226529556535e-06, + "loss": 0.1432, + "step": 66780 + }, + { + "epoch": 3.12, + "learning_rate": 9.692442744501749e-06, + "loss": 0.1472, + "step": 66785 + }, + { + "epoch": 3.12, + "learning_rate": 9.691658959446963e-06, + "loss": 0.0936, + "step": 66790 + }, + { + "epoch": 3.12, + "learning_rate": 9.690875174392175e-06, + "loss": 0.3271, + "step": 66795 + }, + { + "epoch": 3.12, + "learning_rate": 9.690091389337389e-06, + "loss": 0.0738, + "step": 66800 + }, + { + "epoch": 3.12, + "learning_rate": 9.689307604282603e-06, + "loss": 0.0401, + "step": 66805 + }, + { + "epoch": 3.12, + "learning_rate": 9.688523819227815e-06, + "loss": 0.0529, + "step": 66810 + }, + { + "epoch": 3.12, + "learning_rate": 9.687740034173029e-06, + "loss": 0.0409, + "step": 66815 + }, + { + "epoch": 3.12, + "learning_rate": 9.686956249118243e-06, + "loss": 0.0777, + "step": 66820 + }, + { + "epoch": 3.12, + "learning_rate": 9.686172464063455e-06, + "loss": 0.1198, + "step": 66825 + }, + { + "epoch": 3.12, + "learning_rate": 9.685388679008669e-06, + "loss": 0.1342, + "step": 66830 + }, + { + "epoch": 3.12, + "learning_rate": 9.684604893953883e-06, + "loss": 0.0932, + "step": 66835 + }, + { + "epoch": 3.12, + "learning_rate": 9.683821108899097e-06, + "loss": 0.2755, + "step": 66840 + }, + { + "epoch": 3.12, + "learning_rate": 9.68303732384431e-06, + "loss": 0.2539, + "step": 66845 + }, + { + "epoch": 3.12, + "learning_rate": 9.682253538789523e-06, + "loss": 0.059, + "step": 66850 + }, + { + "epoch": 3.12, + "learning_rate": 9.681469753734737e-06, + "loss": 0.0206, + "step": 66855 + }, + { + "epoch": 3.12, + "learning_rate": 9.680685968679951e-06, + "loss": 0.0485, + "step": 66860 + }, + { + "epoch": 3.12, + "learning_rate": 9.679902183625163e-06, + "loss": 0.0562, + "step": 66865 + }, + { + "epoch": 3.12, + "learning_rate": 9.679118398570377e-06, + "loss": 0.0851, + "step": 66870 + }, + { + "epoch": 3.12, + "learning_rate": 9.67833461351559e-06, + "loss": 0.1055, + "step": 66875 + }, + { + "epoch": 3.12, + "learning_rate": 9.677550828460803e-06, + "loss": 0.0656, + "step": 66880 + }, + { + "epoch": 3.12, + "learning_rate": 9.676767043406017e-06, + "loss": 0.1683, + "step": 66885 + }, + { + "epoch": 3.12, + "learning_rate": 9.675983258351231e-06, + "loss": 0.1515, + "step": 66890 + }, + { + "epoch": 3.12, + "learning_rate": 9.675199473296443e-06, + "loss": 0.3496, + "step": 66895 + }, + { + "epoch": 3.12, + "learning_rate": 9.674415688241657e-06, + "loss": 0.0908, + "step": 66900 + }, + { + "epoch": 3.12, + "learning_rate": 9.673631903186871e-06, + "loss": 0.0156, + "step": 66905 + }, + { + "epoch": 3.12, + "learning_rate": 9.672848118132085e-06, + "loss": 0.0492, + "step": 66910 + }, + { + "epoch": 3.12, + "learning_rate": 9.672064333077297e-06, + "loss": 0.0519, + "step": 66915 + }, + { + "epoch": 3.12, + "learning_rate": 9.671280548022511e-06, + "loss": 0.1146, + "step": 66920 + }, + { + "epoch": 3.12, + "learning_rate": 9.670496762967725e-06, + "loss": 0.1169, + "step": 66925 + }, + { + "epoch": 3.12, + "learning_rate": 9.669712977912939e-06, + "loss": 0.0662, + "step": 66930 + }, + { + "epoch": 3.12, + "learning_rate": 9.668929192858153e-06, + "loss": 0.1658, + "step": 66935 + }, + { + "epoch": 3.12, + "learning_rate": 9.668145407803365e-06, + "loss": 0.1625, + "step": 66940 + }, + { + "epoch": 3.12, + "learning_rate": 9.667361622748577e-06, + "loss": 0.1685, + "step": 66945 + }, + { + "epoch": 3.12, + "learning_rate": 9.666577837693791e-06, + "loss": 0.0706, + "step": 66950 + }, + { + "epoch": 3.12, + "learning_rate": 9.665794052639005e-06, + "loss": 0.0915, + "step": 66955 + }, + { + "epoch": 3.12, + "learning_rate": 9.665010267584219e-06, + "loss": 0.0695, + "step": 66960 + }, + { + "epoch": 3.12, + "learning_rate": 9.664226482529431e-06, + "loss": 0.0764, + "step": 66965 + }, + { + "epoch": 3.12, + "learning_rate": 9.663442697474645e-06, + "loss": 0.0657, + "step": 66970 + }, + { + "epoch": 3.13, + "learning_rate": 9.662658912419859e-06, + "loss": 0.0836, + "step": 66975 + }, + { + "epoch": 3.13, + "learning_rate": 9.661875127365073e-06, + "loss": 0.0888, + "step": 66980 + }, + { + "epoch": 3.13, + "learning_rate": 9.661091342310287e-06, + "loss": 0.1487, + "step": 66985 + }, + { + "epoch": 3.13, + "learning_rate": 9.660307557255499e-06, + "loss": 0.3276, + "step": 66990 + }, + { + "epoch": 3.13, + "learning_rate": 9.659523772200713e-06, + "loss": 0.3517, + "step": 66995 + }, + { + "epoch": 3.13, + "learning_rate": 9.658739987145927e-06, + "loss": 0.0516, + "step": 67000 + }, + { + "epoch": 3.13, + "learning_rate": 9.657956202091139e-06, + "loss": 0.0154, + "step": 67005 + }, + { + "epoch": 3.13, + "learning_rate": 9.657172417036353e-06, + "loss": 0.0361, + "step": 67010 + }, + { + "epoch": 3.13, + "learning_rate": 9.656388631981565e-06, + "loss": 0.0564, + "step": 67015 + }, + { + "epoch": 3.13, + "learning_rate": 9.655604846926779e-06, + "loss": 0.0617, + "step": 67020 + }, + { + "epoch": 3.13, + "learning_rate": 9.654821061871993e-06, + "loss": 0.0484, + "step": 67025 + }, + { + "epoch": 3.13, + "learning_rate": 9.654037276817207e-06, + "loss": 0.0938, + "step": 67030 + }, + { + "epoch": 3.13, + "learning_rate": 9.65325349176242e-06, + "loss": 0.104, + "step": 67035 + }, + { + "epoch": 3.13, + "learning_rate": 9.652469706707633e-06, + "loss": 0.2527, + "step": 67040 + }, + { + "epoch": 3.13, + "learning_rate": 9.651685921652847e-06, + "loss": 0.2725, + "step": 67045 + }, + { + "epoch": 3.13, + "learning_rate": 9.65090213659806e-06, + "loss": 0.1013, + "step": 67050 + }, + { + "epoch": 3.13, + "learning_rate": 9.650118351543275e-06, + "loss": 0.0622, + "step": 67055 + }, + { + "epoch": 3.13, + "learning_rate": 9.649334566488487e-06, + "loss": 0.0481, + "step": 67060 + }, + { + "epoch": 3.13, + "learning_rate": 9.6485507814337e-06, + "loss": 0.0547, + "step": 67065 + }, + { + "epoch": 3.13, + "learning_rate": 9.647766996378913e-06, + "loss": 0.1045, + "step": 67070 + }, + { + "epoch": 3.13, + "learning_rate": 9.646983211324127e-06, + "loss": 0.0833, + "step": 67075 + }, + { + "epoch": 3.13, + "learning_rate": 9.64619942626934e-06, + "loss": 0.1074, + "step": 67080 + }, + { + "epoch": 3.13, + "learning_rate": 9.645415641214555e-06, + "loss": 0.1298, + "step": 67085 + }, + { + "epoch": 3.13, + "learning_rate": 9.644631856159767e-06, + "loss": 0.1969, + "step": 67090 + }, + { + "epoch": 3.13, + "learning_rate": 9.64384807110498e-06, + "loss": 0.2585, + "step": 67095 + }, + { + "epoch": 3.13, + "learning_rate": 9.643064286050195e-06, + "loss": 0.0726, + "step": 67100 + }, + { + "epoch": 3.13, + "learning_rate": 9.642280500995409e-06, + "loss": 0.0397, + "step": 67105 + }, + { + "epoch": 3.13, + "learning_rate": 9.64149671594062e-06, + "loss": 0.0865, + "step": 67110 + }, + { + "epoch": 3.13, + "learning_rate": 9.640712930885835e-06, + "loss": 0.0552, + "step": 67115 + }, + { + "epoch": 3.13, + "learning_rate": 9.639929145831049e-06, + "loss": 0.0323, + "step": 67120 + }, + { + "epoch": 3.13, + "learning_rate": 9.639145360776262e-06, + "loss": 0.0693, + "step": 67125 + }, + { + "epoch": 3.13, + "learning_rate": 9.638361575721475e-06, + "loss": 0.0847, + "step": 67130 + }, + { + "epoch": 3.13, + "learning_rate": 9.637577790666689e-06, + "loss": 0.1027, + "step": 67135 + }, + { + "epoch": 3.13, + "learning_rate": 9.6367940056119e-06, + "loss": 0.2035, + "step": 67140 + }, + { + "epoch": 3.13, + "learning_rate": 9.636010220557115e-06, + "loss": 0.225, + "step": 67145 + }, + { + "epoch": 3.13, + "learning_rate": 9.635226435502329e-06, + "loss": 0.1034, + "step": 67150 + }, + { + "epoch": 3.13, + "learning_rate": 9.634442650447543e-06, + "loss": 0.0304, + "step": 67155 + }, + { + "epoch": 3.13, + "learning_rate": 9.633658865392755e-06, + "loss": 0.0912, + "step": 67160 + }, + { + "epoch": 3.13, + "learning_rate": 9.632875080337969e-06, + "loss": 0.0627, + "step": 67165 + }, + { + "epoch": 3.13, + "learning_rate": 9.632091295283183e-06, + "loss": 0.0269, + "step": 67170 + }, + { + "epoch": 3.13, + "learning_rate": 9.631307510228396e-06, + "loss": 0.0673, + "step": 67175 + }, + { + "epoch": 3.13, + "learning_rate": 9.630523725173609e-06, + "loss": 0.1514, + "step": 67180 + }, + { + "epoch": 3.13, + "learning_rate": 9.629739940118823e-06, + "loss": 0.146, + "step": 67185 + }, + { + "epoch": 3.14, + "learning_rate": 9.628956155064036e-06, + "loss": 0.185, + "step": 67190 + }, + { + "epoch": 3.14, + "learning_rate": 9.62817237000925e-06, + "loss": 0.3927, + "step": 67195 + }, + { + "epoch": 3.14, + "learning_rate": 9.627388584954463e-06, + "loss": 0.0939, + "step": 67200 + }, + { + "epoch": 3.14, + "learning_rate": 9.626604799899676e-06, + "loss": 0.034, + "step": 67205 + }, + { + "epoch": 3.14, + "learning_rate": 9.625821014844889e-06, + "loss": 0.0339, + "step": 67210 + }, + { + "epoch": 3.14, + "learning_rate": 9.625037229790103e-06, + "loss": 0.1123, + "step": 67215 + }, + { + "epoch": 3.14, + "learning_rate": 9.624253444735317e-06, + "loss": 0.0462, + "step": 67220 + }, + { + "epoch": 3.14, + "learning_rate": 9.62346965968053e-06, + "loss": 0.1015, + "step": 67225 + }, + { + "epoch": 3.14, + "learning_rate": 9.622685874625743e-06, + "loss": 0.1491, + "step": 67230 + }, + { + "epoch": 3.14, + "learning_rate": 9.621902089570957e-06, + "loss": 0.1363, + "step": 67235 + }, + { + "epoch": 3.14, + "learning_rate": 9.62111830451617e-06, + "loss": 0.2497, + "step": 67240 + }, + { + "epoch": 3.14, + "learning_rate": 9.620334519461384e-06, + "loss": 0.2666, + "step": 67245 + }, + { + "epoch": 3.14, + "learning_rate": 9.619550734406598e-06, + "loss": 0.0863, + "step": 67250 + }, + { + "epoch": 3.14, + "learning_rate": 9.61876694935181e-06, + "loss": 0.0429, + "step": 67255 + }, + { + "epoch": 3.14, + "learning_rate": 9.617983164297024e-06, + "loss": 0.0542, + "step": 67260 + }, + { + "epoch": 3.14, + "learning_rate": 9.617199379242237e-06, + "loss": 0.0543, + "step": 67265 + }, + { + "epoch": 3.14, + "learning_rate": 9.61641559418745e-06, + "loss": 0.0545, + "step": 67270 + }, + { + "epoch": 3.14, + "learning_rate": 9.615631809132664e-06, + "loss": 0.0679, + "step": 67275 + }, + { + "epoch": 3.14, + "learning_rate": 9.614848024077877e-06, + "loss": 0.1376, + "step": 67280 + }, + { + "epoch": 3.14, + "learning_rate": 9.61406423902309e-06, + "loss": 0.1443, + "step": 67285 + }, + { + "epoch": 3.14, + "learning_rate": 9.613280453968304e-06, + "loss": 0.1009, + "step": 67290 + }, + { + "epoch": 3.14, + "learning_rate": 9.612496668913518e-06, + "loss": 0.2522, + "step": 67295 + }, + { + "epoch": 3.14, + "learning_rate": 9.611712883858732e-06, + "loss": 0.0529, + "step": 67300 + }, + { + "epoch": 3.14, + "learning_rate": 9.610929098803944e-06, + "loss": 0.0183, + "step": 67305 + }, + { + "epoch": 3.14, + "learning_rate": 9.610145313749158e-06, + "loss": 0.0619, + "step": 67310 + }, + { + "epoch": 3.14, + "learning_rate": 9.609361528694372e-06, + "loss": 0.0607, + "step": 67315 + }, + { + "epoch": 3.14, + "learning_rate": 9.608577743639586e-06, + "loss": 0.0888, + "step": 67320 + }, + { + "epoch": 3.14, + "learning_rate": 9.607793958584798e-06, + "loss": 0.1036, + "step": 67325 + }, + { + "epoch": 3.14, + "learning_rate": 9.60701017353001e-06, + "loss": 0.117, + "step": 67330 + }, + { + "epoch": 3.14, + "learning_rate": 9.606226388475224e-06, + "loss": 0.1202, + "step": 67335 + }, + { + "epoch": 3.14, + "learning_rate": 9.605442603420438e-06, + "loss": 0.2976, + "step": 67340 + }, + { + "epoch": 3.14, + "learning_rate": 9.604658818365652e-06, + "loss": 0.3352, + "step": 67345 + }, + { + "epoch": 3.14, + "learning_rate": 9.603875033310866e-06, + "loss": 0.0541, + "step": 67350 + }, + { + "epoch": 3.14, + "learning_rate": 9.603091248256078e-06, + "loss": 0.017, + "step": 67355 + }, + { + "epoch": 3.14, + "learning_rate": 9.602307463201292e-06, + "loss": 0.0692, + "step": 67360 + }, + { + "epoch": 3.14, + "learning_rate": 9.601523678146506e-06, + "loss": 0.0356, + "step": 67365 + }, + { + "epoch": 3.14, + "learning_rate": 9.60073989309172e-06, + "loss": 0.0566, + "step": 67370 + }, + { + "epoch": 3.14, + "learning_rate": 9.599956108036932e-06, + "loss": 0.0287, + "step": 67375 + }, + { + "epoch": 3.14, + "learning_rate": 9.599172322982146e-06, + "loss": 0.1013, + "step": 67380 + }, + { + "epoch": 3.14, + "learning_rate": 9.59838853792736e-06, + "loss": 0.1021, + "step": 67385 + }, + { + "epoch": 3.14, + "learning_rate": 9.597604752872574e-06, + "loss": 0.1245, + "step": 67390 + }, + { + "epoch": 3.14, + "learning_rate": 9.596820967817786e-06, + "loss": 0.2174, + "step": 67395 + }, + { + "epoch": 3.14, + "learning_rate": 9.596037182763e-06, + "loss": 0.0651, + "step": 67400 + }, + { + "epoch": 3.15, + "learning_rate": 9.595253397708212e-06, + "loss": 0.0342, + "step": 67405 + }, + { + "epoch": 3.15, + "learning_rate": 9.594469612653426e-06, + "loss": 0.0149, + "step": 67410 + }, + { + "epoch": 3.15, + "learning_rate": 9.59368582759864e-06, + "loss": 0.0436, + "step": 67415 + }, + { + "epoch": 3.15, + "learning_rate": 9.592902042543854e-06, + "loss": 0.0795, + "step": 67420 + }, + { + "epoch": 3.15, + "learning_rate": 9.592118257489066e-06, + "loss": 0.1034, + "step": 67425 + }, + { + "epoch": 3.15, + "learning_rate": 9.59133447243428e-06, + "loss": 0.1312, + "step": 67430 + }, + { + "epoch": 3.15, + "learning_rate": 9.590550687379494e-06, + "loss": 0.1473, + "step": 67435 + }, + { + "epoch": 3.15, + "learning_rate": 9.589766902324708e-06, + "loss": 0.1771, + "step": 67440 + }, + { + "epoch": 3.15, + "learning_rate": 9.58898311726992e-06, + "loss": 0.2701, + "step": 67445 + }, + { + "epoch": 3.15, + "learning_rate": 9.588199332215134e-06, + "loss": 0.0963, + "step": 67450 + }, + { + "epoch": 3.15, + "learning_rate": 9.587415547160348e-06, + "loss": 0.0433, + "step": 67455 + }, + { + "epoch": 3.15, + "learning_rate": 9.58663176210556e-06, + "loss": 0.0833, + "step": 67460 + }, + { + "epoch": 3.15, + "learning_rate": 9.585847977050774e-06, + "loss": 0.0404, + "step": 67465 + }, + { + "epoch": 3.15, + "learning_rate": 9.585064191995988e-06, + "loss": 0.1147, + "step": 67470 + }, + { + "epoch": 3.15, + "learning_rate": 9.5842804069412e-06, + "loss": 0.1279, + "step": 67475 + }, + { + "epoch": 3.15, + "learning_rate": 9.583496621886414e-06, + "loss": 0.0957, + "step": 67480 + }, + { + "epoch": 3.15, + "learning_rate": 9.582712836831628e-06, + "loss": 0.1441, + "step": 67485 + }, + { + "epoch": 3.15, + "learning_rate": 9.581929051776842e-06, + "loss": 0.1781, + "step": 67490 + }, + { + "epoch": 3.15, + "learning_rate": 9.581145266722054e-06, + "loss": 0.3565, + "step": 67495 + }, + { + "epoch": 3.15, + "learning_rate": 9.580361481667268e-06, + "loss": 0.0838, + "step": 67500 + }, + { + "epoch": 3.15, + "learning_rate": 9.579577696612482e-06, + "loss": 0.025, + "step": 67505 + }, + { + "epoch": 3.15, + "learning_rate": 9.578793911557696e-06, + "loss": 0.0205, + "step": 67510 + }, + { + "epoch": 3.15, + "learning_rate": 9.57801012650291e-06, + "loss": 0.0888, + "step": 67515 + }, + { + "epoch": 3.15, + "learning_rate": 9.577226341448122e-06, + "loss": 0.0704, + "step": 67520 + }, + { + "epoch": 3.15, + "learning_rate": 9.576442556393334e-06, + "loss": 0.097, + "step": 67525 + }, + { + "epoch": 3.15, + "learning_rate": 9.575658771338548e-06, + "loss": 0.1136, + "step": 67530 + }, + { + "epoch": 3.15, + "learning_rate": 9.574874986283762e-06, + "loss": 0.1827, + "step": 67535 + }, + { + "epoch": 3.15, + "learning_rate": 9.574091201228976e-06, + "loss": 0.3217, + "step": 67540 + }, + { + "epoch": 3.15, + "learning_rate": 9.573307416174188e-06, + "loss": 0.3586, + "step": 67545 + }, + { + "epoch": 3.15, + "learning_rate": 9.572523631119402e-06, + "loss": 0.094, + "step": 67550 + }, + { + "epoch": 3.15, + "learning_rate": 9.571739846064616e-06, + "loss": 0.0299, + "step": 67555 + }, + { + "epoch": 3.15, + "learning_rate": 9.57095606100983e-06, + "loss": 0.0378, + "step": 67560 + }, + { + "epoch": 3.15, + "learning_rate": 9.570172275955044e-06, + "loss": 0.0593, + "step": 67565 + }, + { + "epoch": 3.15, + "learning_rate": 9.569388490900256e-06, + "loss": 0.0426, + "step": 67570 + }, + { + "epoch": 3.15, + "learning_rate": 9.56860470584547e-06, + "loss": 0.1359, + "step": 67575 + }, + { + "epoch": 3.15, + "learning_rate": 9.567820920790684e-06, + "loss": 0.0562, + "step": 67580 + }, + { + "epoch": 3.15, + "learning_rate": 9.567037135735898e-06, + "loss": 0.1838, + "step": 67585 + }, + { + "epoch": 3.15, + "learning_rate": 9.56625335068111e-06, + "loss": 0.2696, + "step": 67590 + }, + { + "epoch": 3.15, + "learning_rate": 9.565469565626322e-06, + "loss": 0.4682, + "step": 67595 + }, + { + "epoch": 3.15, + "learning_rate": 9.564685780571536e-06, + "loss": 0.0999, + "step": 67600 + }, + { + "epoch": 3.15, + "learning_rate": 9.56390199551675e-06, + "loss": 0.0261, + "step": 67605 + }, + { + "epoch": 3.15, + "learning_rate": 9.563118210461964e-06, + "loss": 0.1327, + "step": 67610 + }, + { + "epoch": 3.16, + "learning_rate": 9.562334425407178e-06, + "loss": 0.0242, + "step": 67615 + }, + { + "epoch": 3.16, + "learning_rate": 9.56155064035239e-06, + "loss": 0.0565, + "step": 67620 + }, + { + "epoch": 3.16, + "learning_rate": 9.560766855297604e-06, + "loss": 0.1068, + "step": 67625 + }, + { + "epoch": 3.16, + "learning_rate": 9.559983070242818e-06, + "loss": 0.0941, + "step": 67630 + }, + { + "epoch": 3.16, + "learning_rate": 9.559199285188032e-06, + "loss": 0.2012, + "step": 67635 + }, + { + "epoch": 3.16, + "learning_rate": 9.558415500133244e-06, + "loss": 0.2336, + "step": 67640 + }, + { + "epoch": 3.16, + "learning_rate": 9.557631715078458e-06, + "loss": 0.2486, + "step": 67645 + }, + { + "epoch": 3.16, + "learning_rate": 9.556847930023672e-06, + "loss": 0.1049, + "step": 67650 + }, + { + "epoch": 3.16, + "learning_rate": 9.556064144968884e-06, + "loss": 0.0125, + "step": 67655 + }, + { + "epoch": 3.16, + "learning_rate": 9.555280359914098e-06, + "loss": 0.0499, + "step": 67660 + }, + { + "epoch": 3.16, + "learning_rate": 9.554496574859312e-06, + "loss": 0.1079, + "step": 67665 + }, + { + "epoch": 3.16, + "learning_rate": 9.553712789804524e-06, + "loss": 0.0323, + "step": 67670 + }, + { + "epoch": 3.16, + "learning_rate": 9.552929004749738e-06, + "loss": 0.0886, + "step": 67675 + }, + { + "epoch": 3.16, + "learning_rate": 9.552145219694952e-06, + "loss": 0.1056, + "step": 67680 + }, + { + "epoch": 3.16, + "learning_rate": 9.551361434640166e-06, + "loss": 0.1175, + "step": 67685 + }, + { + "epoch": 3.16, + "learning_rate": 9.550577649585378e-06, + "loss": 0.3264, + "step": 67690 + }, + { + "epoch": 3.16, + "learning_rate": 9.549793864530592e-06, + "loss": 0.5046, + "step": 67695 + }, + { + "epoch": 3.16, + "learning_rate": 9.549010079475806e-06, + "loss": 0.0509, + "step": 67700 + }, + { + "epoch": 3.16, + "learning_rate": 9.54822629442102e-06, + "loss": 0.0376, + "step": 67705 + }, + { + "epoch": 3.16, + "learning_rate": 9.547442509366232e-06, + "loss": 0.0338, + "step": 67710 + }, + { + "epoch": 3.16, + "learning_rate": 9.546658724311446e-06, + "loss": 0.033, + "step": 67715 + }, + { + "epoch": 3.16, + "learning_rate": 9.545874939256658e-06, + "loss": 0.0897, + "step": 67720 + }, + { + "epoch": 3.16, + "learning_rate": 9.545091154201872e-06, + "loss": 0.1058, + "step": 67725 + }, + { + "epoch": 3.16, + "learning_rate": 9.544307369147086e-06, + "loss": 0.0865, + "step": 67730 + }, + { + "epoch": 3.16, + "learning_rate": 9.5435235840923e-06, + "loss": 0.1294, + "step": 67735 + }, + { + "epoch": 3.16, + "learning_rate": 9.542739799037512e-06, + "loss": 0.1835, + "step": 67740 + }, + { + "epoch": 3.16, + "learning_rate": 9.541956013982726e-06, + "loss": 0.3487, + "step": 67745 + }, + { + "epoch": 3.16, + "learning_rate": 9.54117222892794e-06, + "loss": 0.0457, + "step": 67750 + }, + { + "epoch": 3.16, + "learning_rate": 9.540388443873154e-06, + "loss": 0.0352, + "step": 67755 + }, + { + "epoch": 3.16, + "learning_rate": 9.539604658818366e-06, + "loss": 0.026, + "step": 67760 + }, + { + "epoch": 3.16, + "learning_rate": 9.53882087376358e-06, + "loss": 0.0794, + "step": 67765 + }, + { + "epoch": 3.16, + "learning_rate": 9.538037088708794e-06, + "loss": 0.0649, + "step": 67770 + }, + { + "epoch": 3.16, + "learning_rate": 9.537253303654008e-06, + "loss": 0.0954, + "step": 67775 + }, + { + "epoch": 3.16, + "learning_rate": 9.536469518599221e-06, + "loss": 0.0569, + "step": 67780 + }, + { + "epoch": 3.16, + "learning_rate": 9.535685733544434e-06, + "loss": 0.1698, + "step": 67785 + }, + { + "epoch": 3.16, + "learning_rate": 9.534901948489646e-06, + "loss": 0.2006, + "step": 67790 + }, + { + "epoch": 3.16, + "learning_rate": 9.53411816343486e-06, + "loss": 0.1633, + "step": 67795 + }, + { + "epoch": 3.16, + "learning_rate": 9.533334378380074e-06, + "loss": 0.078, + "step": 67800 + }, + { + "epoch": 3.16, + "learning_rate": 9.532550593325288e-06, + "loss": 0.0221, + "step": 67805 + }, + { + "epoch": 3.16, + "learning_rate": 9.5317668082705e-06, + "loss": 0.1055, + "step": 67810 + }, + { + "epoch": 3.16, + "learning_rate": 9.530983023215714e-06, + "loss": 0.0439, + "step": 67815 + }, + { + "epoch": 3.16, + "learning_rate": 9.530199238160928e-06, + "loss": 0.0632, + "step": 67820 + }, + { + "epoch": 3.16, + "learning_rate": 9.529415453106142e-06, + "loss": 0.1022, + "step": 67825 + }, + { + "epoch": 3.17, + "learning_rate": 9.528631668051355e-06, + "loss": 0.1556, + "step": 67830 + }, + { + "epoch": 3.17, + "learning_rate": 9.527847882996568e-06, + "loss": 0.118, + "step": 67835 + }, + { + "epoch": 3.17, + "learning_rate": 9.527064097941782e-06, + "loss": 0.1185, + "step": 67840 + }, + { + "epoch": 3.17, + "learning_rate": 9.526280312886995e-06, + "loss": 0.2748, + "step": 67845 + }, + { + "epoch": 3.17, + "learning_rate": 9.525496527832208e-06, + "loss": 0.0664, + "step": 67850 + }, + { + "epoch": 3.17, + "learning_rate": 9.524712742777422e-06, + "loss": 0.0549, + "step": 67855 + }, + { + "epoch": 3.17, + "learning_rate": 9.523928957722634e-06, + "loss": 0.0159, + "step": 67860 + }, + { + "epoch": 3.17, + "learning_rate": 9.523145172667848e-06, + "loss": 0.029, + "step": 67865 + }, + { + "epoch": 3.17, + "learning_rate": 9.522361387613062e-06, + "loss": 0.0671, + "step": 67870 + }, + { + "epoch": 3.17, + "learning_rate": 9.521577602558275e-06, + "loss": 0.0725, + "step": 67875 + }, + { + "epoch": 3.17, + "learning_rate": 9.52079381750349e-06, + "loss": 0.2095, + "step": 67880 + }, + { + "epoch": 3.17, + "learning_rate": 9.520010032448702e-06, + "loss": 0.2265, + "step": 67885 + }, + { + "epoch": 3.17, + "learning_rate": 9.519226247393916e-06, + "loss": 0.3112, + "step": 67890 + }, + { + "epoch": 3.17, + "learning_rate": 9.51844246233913e-06, + "loss": 0.2284, + "step": 67895 + }, + { + "epoch": 3.17, + "learning_rate": 9.517658677284343e-06, + "loss": 0.0592, + "step": 67900 + }, + { + "epoch": 3.17, + "learning_rate": 9.516874892229556e-06, + "loss": 0.0495, + "step": 67905 + }, + { + "epoch": 3.17, + "learning_rate": 9.51609110717477e-06, + "loss": 0.02, + "step": 67910 + }, + { + "epoch": 3.17, + "learning_rate": 9.515307322119982e-06, + "loss": 0.0283, + "step": 67915 + }, + { + "epoch": 3.17, + "learning_rate": 9.514523537065196e-06, + "loss": 0.0647, + "step": 67920 + }, + { + "epoch": 3.17, + "learning_rate": 9.51373975201041e-06, + "loss": 0.1077, + "step": 67925 + }, + { + "epoch": 3.17, + "learning_rate": 9.512955966955623e-06, + "loss": 0.1124, + "step": 67930 + }, + { + "epoch": 3.17, + "learning_rate": 9.512172181900836e-06, + "loss": 0.1757, + "step": 67935 + }, + { + "epoch": 3.17, + "learning_rate": 9.51138839684605e-06, + "loss": 0.217, + "step": 67940 + }, + { + "epoch": 3.17, + "learning_rate": 9.510604611791263e-06, + "loss": 0.3992, + "step": 67945 + }, + { + "epoch": 3.17, + "learning_rate": 9.509820826736477e-06, + "loss": 0.0693, + "step": 67950 + }, + { + "epoch": 3.17, + "learning_rate": 9.50903704168169e-06, + "loss": 0.067, + "step": 67955 + }, + { + "epoch": 3.17, + "learning_rate": 9.508253256626903e-06, + "loss": 0.0275, + "step": 67960 + }, + { + "epoch": 3.17, + "learning_rate": 9.507469471572117e-06, + "loss": 0.0541, + "step": 67965 + }, + { + "epoch": 3.17, + "learning_rate": 9.506685686517331e-06, + "loss": 0.055, + "step": 67970 + }, + { + "epoch": 3.17, + "learning_rate": 9.505901901462543e-06, + "loss": 0.0966, + "step": 67975 + }, + { + "epoch": 3.17, + "learning_rate": 9.505118116407757e-06, + "loss": 0.1219, + "step": 67980 + }, + { + "epoch": 3.17, + "learning_rate": 9.50433433135297e-06, + "loss": 0.2099, + "step": 67985 + }, + { + "epoch": 3.17, + "learning_rate": 9.503550546298183e-06, + "loss": 0.1988, + "step": 67990 + }, + { + "epoch": 3.17, + "learning_rate": 9.502766761243397e-06, + "loss": 0.3276, + "step": 67995 + }, + { + "epoch": 3.17, + "learning_rate": 9.501982976188611e-06, + "loss": 0.1253, + "step": 68000 + }, + { + "epoch": 3.17, + "learning_rate": 9.501199191133823e-06, + "loss": 0.0215, + "step": 68005 + }, + { + "epoch": 3.17, + "learning_rate": 9.500415406079037e-06, + "loss": 0.0237, + "step": 68010 + }, + { + "epoch": 3.17, + "learning_rate": 9.499631621024251e-06, + "loss": 0.0624, + "step": 68015 + }, + { + "epoch": 3.17, + "learning_rate": 9.498847835969465e-06, + "loss": 0.0849, + "step": 68020 + }, + { + "epoch": 3.17, + "learning_rate": 9.498064050914677e-06, + "loss": 0.083, + "step": 68025 + }, + { + "epoch": 3.17, + "learning_rate": 9.497280265859891e-06, + "loss": 0.1419, + "step": 68030 + }, + { + "epoch": 3.17, + "learning_rate": 9.496496480805105e-06, + "loss": 0.1342, + "step": 68035 + }, + { + "epoch": 3.17, + "learning_rate": 9.495712695750319e-06, + "loss": 0.1337, + "step": 68040 + }, + { + "epoch": 3.18, + "learning_rate": 9.494928910695531e-06, + "loss": 0.2854, + "step": 68045 + }, + { + "epoch": 3.18, + "learning_rate": 9.494145125640745e-06, + "loss": 0.146, + "step": 68050 + }, + { + "epoch": 3.18, + "learning_rate": 9.493361340585957e-06, + "loss": 0.0851, + "step": 68055 + }, + { + "epoch": 3.18, + "learning_rate": 9.492577555531171e-06, + "loss": 0.0148, + "step": 68060 + }, + { + "epoch": 3.18, + "learning_rate": 9.491793770476385e-06, + "loss": 0.0833, + "step": 68065 + }, + { + "epoch": 3.18, + "learning_rate": 9.4910099854216e-06, + "loss": 0.0505, + "step": 68070 + }, + { + "epoch": 3.18, + "learning_rate": 9.490226200366811e-06, + "loss": 0.103, + "step": 68075 + }, + { + "epoch": 3.18, + "learning_rate": 9.489442415312025e-06, + "loss": 0.1506, + "step": 68080 + }, + { + "epoch": 3.18, + "learning_rate": 9.48865863025724e-06, + "loss": 0.1196, + "step": 68085 + }, + { + "epoch": 3.18, + "learning_rate": 9.487874845202453e-06, + "loss": 0.1659, + "step": 68090 + }, + { + "epoch": 3.18, + "learning_rate": 9.487091060147667e-06, + "loss": 0.3757, + "step": 68095 + }, + { + "epoch": 3.18, + "learning_rate": 9.48630727509288e-06, + "loss": 0.0993, + "step": 68100 + }, + { + "epoch": 3.18, + "learning_rate": 9.485523490038093e-06, + "loss": 0.0322, + "step": 68105 + }, + { + "epoch": 3.18, + "learning_rate": 9.484739704983305e-06, + "loss": 0.047, + "step": 68110 + }, + { + "epoch": 3.18, + "learning_rate": 9.48395591992852e-06, + "loss": 0.081, + "step": 68115 + }, + { + "epoch": 3.18, + "learning_rate": 9.483172134873733e-06, + "loss": 0.0764, + "step": 68120 + }, + { + "epoch": 3.18, + "learning_rate": 9.482388349818945e-06, + "loss": 0.0573, + "step": 68125 + }, + { + "epoch": 3.18, + "learning_rate": 9.48160456476416e-06, + "loss": 0.1731, + "step": 68130 + }, + { + "epoch": 3.18, + "learning_rate": 9.480820779709373e-06, + "loss": 0.1322, + "step": 68135 + }, + { + "epoch": 3.18, + "learning_rate": 9.480036994654587e-06, + "loss": 0.1864, + "step": 68140 + }, + { + "epoch": 3.18, + "learning_rate": 9.479253209599801e-06, + "loss": 0.2936, + "step": 68145 + }, + { + "epoch": 3.18, + "learning_rate": 9.478469424545013e-06, + "loss": 0.0338, + "step": 68150 + }, + { + "epoch": 3.18, + "learning_rate": 9.477685639490227e-06, + "loss": 0.0361, + "step": 68155 + }, + { + "epoch": 3.18, + "learning_rate": 9.476901854435441e-06, + "loss": 0.0377, + "step": 68160 + }, + { + "epoch": 3.18, + "learning_rate": 9.476118069380655e-06, + "loss": 0.018, + "step": 68165 + }, + { + "epoch": 3.18, + "learning_rate": 9.475334284325867e-06, + "loss": 0.0892, + "step": 68170 + }, + { + "epoch": 3.18, + "learning_rate": 9.47455049927108e-06, + "loss": 0.0872, + "step": 68175 + }, + { + "epoch": 3.18, + "learning_rate": 9.473766714216293e-06, + "loss": 0.1519, + "step": 68180 + }, + { + "epoch": 3.18, + "learning_rate": 9.472982929161507e-06, + "loss": 0.2009, + "step": 68185 + }, + { + "epoch": 3.18, + "learning_rate": 9.472199144106721e-06, + "loss": 0.122, + "step": 68190 + }, + { + "epoch": 3.18, + "learning_rate": 9.471415359051935e-06, + "loss": 0.2358, + "step": 68195 + }, + { + "epoch": 3.18, + "learning_rate": 9.470631573997147e-06, + "loss": 0.1171, + "step": 68200 + }, + { + "epoch": 3.18, + "learning_rate": 9.469847788942361e-06, + "loss": 0.0262, + "step": 68205 + }, + { + "epoch": 3.18, + "learning_rate": 9.469064003887575e-06, + "loss": 0.0397, + "step": 68210 + }, + { + "epoch": 3.18, + "learning_rate": 9.468280218832789e-06, + "loss": 0.0614, + "step": 68215 + }, + { + "epoch": 3.18, + "learning_rate": 9.467496433778001e-06, + "loss": 0.0558, + "step": 68220 + }, + { + "epoch": 3.18, + "learning_rate": 9.466712648723215e-06, + "loss": 0.106, + "step": 68225 + }, + { + "epoch": 3.18, + "learning_rate": 9.465928863668429e-06, + "loss": 0.0806, + "step": 68230 + }, + { + "epoch": 3.18, + "learning_rate": 9.465145078613643e-06, + "loss": 0.1555, + "step": 68235 + }, + { + "epoch": 3.18, + "learning_rate": 9.464361293558855e-06, + "loss": 0.1864, + "step": 68240 + }, + { + "epoch": 3.18, + "learning_rate": 9.463577508504069e-06, + "loss": 0.2363, + "step": 68245 + }, + { + "epoch": 3.18, + "learning_rate": 9.462793723449281e-06, + "loss": 0.0648, + "step": 68250 + }, + { + "epoch": 3.18, + "learning_rate": 9.462009938394495e-06, + "loss": 0.0486, + "step": 68255 + }, + { + "epoch": 3.19, + "learning_rate": 9.461226153339709e-06, + "loss": 0.0306, + "step": 68260 + }, + { + "epoch": 3.19, + "learning_rate": 9.460442368284923e-06, + "loss": 0.0714, + "step": 68265 + }, + { + "epoch": 3.19, + "learning_rate": 9.459658583230135e-06, + "loss": 0.05, + "step": 68270 + }, + { + "epoch": 3.19, + "learning_rate": 9.458874798175349e-06, + "loss": 0.0767, + "step": 68275 + }, + { + "epoch": 3.19, + "learning_rate": 9.458091013120563e-06, + "loss": 0.062, + "step": 68280 + }, + { + "epoch": 3.19, + "learning_rate": 9.457307228065777e-06, + "loss": 0.1273, + "step": 68285 + }, + { + "epoch": 3.19, + "learning_rate": 9.456523443010989e-06, + "loss": 0.1011, + "step": 68290 + }, + { + "epoch": 3.19, + "learning_rate": 9.455739657956203e-06, + "loss": 0.2082, + "step": 68295 + }, + { + "epoch": 3.19, + "learning_rate": 9.454955872901417e-06, + "loss": 0.0501, + "step": 68300 + }, + { + "epoch": 3.19, + "learning_rate": 9.454172087846629e-06, + "loss": 0.0578, + "step": 68305 + }, + { + "epoch": 3.19, + "learning_rate": 9.453388302791843e-06, + "loss": 0.0514, + "step": 68310 + }, + { + "epoch": 3.19, + "learning_rate": 9.452604517737057e-06, + "loss": 0.0431, + "step": 68315 + }, + { + "epoch": 3.19, + "learning_rate": 9.451820732682269e-06, + "loss": 0.0945, + "step": 68320 + }, + { + "epoch": 3.19, + "learning_rate": 9.451036947627483e-06, + "loss": 0.1068, + "step": 68325 + }, + { + "epoch": 3.19, + "learning_rate": 9.450253162572697e-06, + "loss": 0.1171, + "step": 68330 + }, + { + "epoch": 3.19, + "learning_rate": 9.44946937751791e-06, + "loss": 0.0724, + "step": 68335 + }, + { + "epoch": 3.19, + "learning_rate": 9.448685592463123e-06, + "loss": 0.2423, + "step": 68340 + }, + { + "epoch": 3.19, + "learning_rate": 9.447901807408337e-06, + "loss": 0.2527, + "step": 68345 + }, + { + "epoch": 3.19, + "learning_rate": 9.44711802235355e-06, + "loss": 0.0918, + "step": 68350 + }, + { + "epoch": 3.19, + "learning_rate": 9.446334237298765e-06, + "loss": 0.0222, + "step": 68355 + }, + { + "epoch": 3.19, + "learning_rate": 9.445550452243979e-06, + "loss": 0.0152, + "step": 68360 + }, + { + "epoch": 3.19, + "learning_rate": 9.44476666718919e-06, + "loss": 0.0738, + "step": 68365 + }, + { + "epoch": 3.19, + "learning_rate": 9.443982882134403e-06, + "loss": 0.0907, + "step": 68370 + }, + { + "epoch": 3.19, + "learning_rate": 9.443199097079617e-06, + "loss": 0.0834, + "step": 68375 + }, + { + "epoch": 3.19, + "learning_rate": 9.44241531202483e-06, + "loss": 0.107, + "step": 68380 + }, + { + "epoch": 3.19, + "learning_rate": 9.441631526970045e-06, + "loss": 0.2131, + "step": 68385 + }, + { + "epoch": 3.19, + "learning_rate": 9.440847741915257e-06, + "loss": 0.1436, + "step": 68390 + }, + { + "epoch": 3.19, + "learning_rate": 9.440063956860471e-06, + "loss": 0.3737, + "step": 68395 + }, + { + "epoch": 3.19, + "learning_rate": 9.439280171805685e-06, + "loss": 0.1027, + "step": 68400 + }, + { + "epoch": 3.19, + "learning_rate": 9.438496386750899e-06, + "loss": 0.0675, + "step": 68405 + }, + { + "epoch": 3.19, + "learning_rate": 9.437712601696113e-06, + "loss": 0.0592, + "step": 68410 + }, + { + "epoch": 3.19, + "learning_rate": 9.436928816641325e-06, + "loss": 0.0521, + "step": 68415 + }, + { + "epoch": 3.19, + "learning_rate": 9.436145031586539e-06, + "loss": 0.0423, + "step": 68420 + }, + { + "epoch": 3.19, + "learning_rate": 9.435361246531753e-06, + "loss": 0.0177, + "step": 68425 + }, + { + "epoch": 3.19, + "learning_rate": 9.434577461476967e-06, + "loss": 0.0523, + "step": 68430 + }, + { + "epoch": 3.19, + "learning_rate": 9.433793676422179e-06, + "loss": 0.1323, + "step": 68435 + }, + { + "epoch": 3.19, + "learning_rate": 9.433009891367391e-06, + "loss": 0.1668, + "step": 68440 + }, + { + "epoch": 3.19, + "learning_rate": 9.432226106312605e-06, + "loss": 0.25, + "step": 68445 + }, + { + "epoch": 3.19, + "learning_rate": 9.431442321257819e-06, + "loss": 0.0571, + "step": 68450 + }, + { + "epoch": 3.19, + "learning_rate": 9.430658536203033e-06, + "loss": 0.0383, + "step": 68455 + }, + { + "epoch": 3.19, + "learning_rate": 9.429874751148247e-06, + "loss": 0.0444, + "step": 68460 + }, + { + "epoch": 3.19, + "learning_rate": 9.429090966093459e-06, + "loss": 0.0427, + "step": 68465 + }, + { + "epoch": 3.19, + "learning_rate": 9.428307181038673e-06, + "loss": 0.0621, + "step": 68470 + }, + { + "epoch": 3.2, + "learning_rate": 9.427523395983887e-06, + "loss": 0.0821, + "step": 68475 + }, + { + "epoch": 3.2, + "learning_rate": 9.4267396109291e-06, + "loss": 0.1072, + "step": 68480 + }, + { + "epoch": 3.2, + "learning_rate": 9.425955825874313e-06, + "loss": 0.1496, + "step": 68485 + }, + { + "epoch": 3.2, + "learning_rate": 9.425172040819527e-06, + "loss": 0.327, + "step": 68490 + }, + { + "epoch": 3.2, + "learning_rate": 9.42438825576474e-06, + "loss": 0.3904, + "step": 68495 + }, + { + "epoch": 3.2, + "learning_rate": 9.423604470709953e-06, + "loss": 0.0764, + "step": 68500 + }, + { + "epoch": 3.2, + "learning_rate": 9.422820685655167e-06, + "loss": 0.0141, + "step": 68505 + }, + { + "epoch": 3.2, + "learning_rate": 9.42203690060038e-06, + "loss": 0.0912, + "step": 68510 + }, + { + "epoch": 3.2, + "learning_rate": 9.421253115545593e-06, + "loss": 0.0373, + "step": 68515 + }, + { + "epoch": 3.2, + "learning_rate": 9.420469330490807e-06, + "loss": 0.1329, + "step": 68520 + }, + { + "epoch": 3.2, + "learning_rate": 9.41968554543602e-06, + "loss": 0.0989, + "step": 68525 + }, + { + "epoch": 3.2, + "learning_rate": 9.418901760381234e-06, + "loss": 0.103, + "step": 68530 + }, + { + "epoch": 3.2, + "learning_rate": 9.418117975326447e-06, + "loss": 0.1543, + "step": 68535 + }, + { + "epoch": 3.2, + "learning_rate": 9.41733419027166e-06, + "loss": 0.1886, + "step": 68540 + }, + { + "epoch": 3.2, + "learning_rate": 9.416550405216874e-06, + "loss": 0.3793, + "step": 68545 + }, + { + "epoch": 3.2, + "learning_rate": 9.415766620162088e-06, + "loss": 0.0654, + "step": 68550 + }, + { + "epoch": 3.2, + "learning_rate": 9.4149828351073e-06, + "loss": 0.0313, + "step": 68555 + }, + { + "epoch": 3.2, + "learning_rate": 9.414199050052515e-06, + "loss": 0.0346, + "step": 68560 + }, + { + "epoch": 3.2, + "learning_rate": 9.413415264997727e-06, + "loss": 0.0393, + "step": 68565 + }, + { + "epoch": 3.2, + "learning_rate": 9.41263147994294e-06, + "loss": 0.0822, + "step": 68570 + }, + { + "epoch": 3.2, + "learning_rate": 9.411847694888155e-06, + "loss": 0.1126, + "step": 68575 + }, + { + "epoch": 3.2, + "learning_rate": 9.411063909833368e-06, + "loss": 0.0963, + "step": 68580 + }, + { + "epoch": 3.2, + "learning_rate": 9.41028012477858e-06, + "loss": 0.1581, + "step": 68585 + }, + { + "epoch": 3.2, + "learning_rate": 9.409496339723795e-06, + "loss": 0.2612, + "step": 68590 + }, + { + "epoch": 3.2, + "learning_rate": 9.408712554669008e-06, + "loss": 0.323, + "step": 68595 + }, + { + "epoch": 3.2, + "learning_rate": 9.407928769614222e-06, + "loss": 0.0175, + "step": 68600 + }, + { + "epoch": 3.2, + "learning_rate": 9.407144984559435e-06, + "loss": 0.0262, + "step": 68605 + }, + { + "epoch": 3.2, + "learning_rate": 9.406361199504648e-06, + "loss": 0.0282, + "step": 68610 + }, + { + "epoch": 3.2, + "learning_rate": 9.405577414449862e-06, + "loss": 0.0527, + "step": 68615 + }, + { + "epoch": 3.2, + "learning_rate": 9.404793629395076e-06, + "loss": 0.0796, + "step": 68620 + }, + { + "epoch": 3.2, + "learning_rate": 9.40400984434029e-06, + "loss": 0.0827, + "step": 68625 + }, + { + "epoch": 3.2, + "learning_rate": 9.403226059285502e-06, + "loss": 0.0572, + "step": 68630 + }, + { + "epoch": 3.2, + "learning_rate": 9.402442274230715e-06, + "loss": 0.1104, + "step": 68635 + }, + { + "epoch": 3.2, + "learning_rate": 9.401658489175929e-06, + "loss": 0.1353, + "step": 68640 + }, + { + "epoch": 3.2, + "learning_rate": 9.400874704121142e-06, + "loss": 0.3382, + "step": 68645 + }, + { + "epoch": 3.2, + "learning_rate": 9.400090919066356e-06, + "loss": 0.0508, + "step": 68650 + }, + { + "epoch": 3.2, + "learning_rate": 9.399307134011569e-06, + "loss": 0.0686, + "step": 68655 + }, + { + "epoch": 3.2, + "learning_rate": 9.398523348956782e-06, + "loss": 0.0725, + "step": 68660 + }, + { + "epoch": 3.2, + "learning_rate": 9.397739563901996e-06, + "loss": 0.1, + "step": 68665 + }, + { + "epoch": 3.2, + "learning_rate": 9.39695577884721e-06, + "loss": 0.0732, + "step": 68670 + }, + { + "epoch": 3.2, + "learning_rate": 9.396171993792424e-06, + "loss": 0.0743, + "step": 68675 + }, + { + "epoch": 3.2, + "learning_rate": 9.395388208737636e-06, + "loss": 0.0961, + "step": 68680 + }, + { + "epoch": 3.2, + "learning_rate": 9.39460442368285e-06, + "loss": 0.1475, + "step": 68685 + }, + { + "epoch": 3.21, + "learning_rate": 9.393820638628064e-06, + "loss": 0.197, + "step": 68690 + }, + { + "epoch": 3.21, + "learning_rate": 9.393036853573276e-06, + "loss": 0.2542, + "step": 68695 + }, + { + "epoch": 3.21, + "learning_rate": 9.39225306851849e-06, + "loss": 0.0426, + "step": 68700 + }, + { + "epoch": 3.21, + "learning_rate": 9.391469283463703e-06, + "loss": 0.0372, + "step": 68705 + }, + { + "epoch": 3.21, + "learning_rate": 9.390685498408916e-06, + "loss": 0.0381, + "step": 68710 + }, + { + "epoch": 3.21, + "learning_rate": 9.38990171335413e-06, + "loss": 0.07, + "step": 68715 + }, + { + "epoch": 3.21, + "learning_rate": 9.389117928299344e-06, + "loss": 0.0335, + "step": 68720 + }, + { + "epoch": 3.21, + "learning_rate": 9.388334143244558e-06, + "loss": 0.0311, + "step": 68725 + }, + { + "epoch": 3.21, + "learning_rate": 9.38755035818977e-06, + "loss": 0.0415, + "step": 68730 + }, + { + "epoch": 3.21, + "learning_rate": 9.386766573134984e-06, + "loss": 0.1207, + "step": 68735 + }, + { + "epoch": 3.21, + "learning_rate": 9.385982788080198e-06, + "loss": 0.2369, + "step": 68740 + }, + { + "epoch": 3.21, + "learning_rate": 9.385199003025412e-06, + "loss": 0.2357, + "step": 68745 + }, + { + "epoch": 3.21, + "learning_rate": 9.384415217970624e-06, + "loss": 0.1444, + "step": 68750 + }, + { + "epoch": 3.21, + "learning_rate": 9.383631432915838e-06, + "loss": 0.0314, + "step": 68755 + }, + { + "epoch": 3.21, + "learning_rate": 9.38284764786105e-06, + "loss": 0.0433, + "step": 68760 + }, + { + "epoch": 3.21, + "learning_rate": 9.382063862806264e-06, + "loss": 0.0297, + "step": 68765 + }, + { + "epoch": 3.21, + "learning_rate": 9.381280077751478e-06, + "loss": 0.0899, + "step": 68770 + }, + { + "epoch": 3.21, + "learning_rate": 9.380496292696692e-06, + "loss": 0.06, + "step": 68775 + }, + { + "epoch": 3.21, + "learning_rate": 9.379712507641904e-06, + "loss": 0.1671, + "step": 68780 + }, + { + "epoch": 3.21, + "learning_rate": 9.378928722587118e-06, + "loss": 0.1742, + "step": 68785 + }, + { + "epoch": 3.21, + "learning_rate": 9.378144937532332e-06, + "loss": 0.243, + "step": 68790 + }, + { + "epoch": 3.21, + "learning_rate": 9.377361152477546e-06, + "loss": 0.2625, + "step": 68795 + }, + { + "epoch": 3.21, + "learning_rate": 9.376577367422758e-06, + "loss": 0.0928, + "step": 68800 + }, + { + "epoch": 3.21, + "learning_rate": 9.375793582367972e-06, + "loss": 0.0624, + "step": 68805 + }, + { + "epoch": 3.21, + "learning_rate": 9.375009797313186e-06, + "loss": 0.0441, + "step": 68810 + }, + { + "epoch": 3.21, + "learning_rate": 9.3742260122584e-06, + "loss": 0.0707, + "step": 68815 + }, + { + "epoch": 3.21, + "learning_rate": 9.373442227203612e-06, + "loss": 0.0557, + "step": 68820 + }, + { + "epoch": 3.21, + "learning_rate": 9.372658442148826e-06, + "loss": 0.0632, + "step": 68825 + }, + { + "epoch": 3.21, + "learning_rate": 9.371874657094038e-06, + "loss": 0.0644, + "step": 68830 + }, + { + "epoch": 3.21, + "learning_rate": 9.371090872039252e-06, + "loss": 0.1814, + "step": 68835 + }, + { + "epoch": 3.21, + "learning_rate": 9.370307086984466e-06, + "loss": 0.1505, + "step": 68840 + }, + { + "epoch": 3.21, + "learning_rate": 9.36952330192968e-06, + "loss": 0.2694, + "step": 68845 + }, + { + "epoch": 3.21, + "learning_rate": 9.368739516874892e-06, + "loss": 0.0535, + "step": 68850 + }, + { + "epoch": 3.21, + "learning_rate": 9.367955731820106e-06, + "loss": 0.0589, + "step": 68855 + }, + { + "epoch": 3.21, + "learning_rate": 9.36717194676532e-06, + "loss": 0.0595, + "step": 68860 + }, + { + "epoch": 3.21, + "learning_rate": 9.366388161710534e-06, + "loss": 0.0274, + "step": 68865 + }, + { + "epoch": 3.21, + "learning_rate": 9.365604376655746e-06, + "loss": 0.1054, + "step": 68870 + }, + { + "epoch": 3.21, + "learning_rate": 9.36482059160096e-06, + "loss": 0.0694, + "step": 68875 + }, + { + "epoch": 3.21, + "learning_rate": 9.364036806546174e-06, + "loss": 0.0681, + "step": 68880 + }, + { + "epoch": 3.21, + "learning_rate": 9.363253021491388e-06, + "loss": 0.0854, + "step": 68885 + }, + { + "epoch": 3.21, + "learning_rate": 9.3624692364366e-06, + "loss": 0.306, + "step": 68890 + }, + { + "epoch": 3.21, + "learning_rate": 9.361685451381814e-06, + "loss": 0.1706, + "step": 68895 + }, + { + "epoch": 3.21, + "learning_rate": 9.360901666327026e-06, + "loss": 0.037, + "step": 68900 + }, + { + "epoch": 3.22, + "learning_rate": 9.36011788127224e-06, + "loss": 0.0279, + "step": 68905 + }, + { + "epoch": 3.22, + "learning_rate": 9.359334096217454e-06, + "loss": 0.0225, + "step": 68910 + }, + { + "epoch": 3.22, + "learning_rate": 9.358550311162668e-06, + "loss": 0.0484, + "step": 68915 + }, + { + "epoch": 3.22, + "learning_rate": 9.35776652610788e-06, + "loss": 0.101, + "step": 68920 + }, + { + "epoch": 3.22, + "learning_rate": 9.356982741053094e-06, + "loss": 0.0671, + "step": 68925 + }, + { + "epoch": 3.22, + "learning_rate": 9.356198955998308e-06, + "loss": 0.1678, + "step": 68930 + }, + { + "epoch": 3.22, + "learning_rate": 9.355415170943522e-06, + "loss": 0.1417, + "step": 68935 + }, + { + "epoch": 3.22, + "learning_rate": 9.354631385888736e-06, + "loss": 0.212, + "step": 68940 + }, + { + "epoch": 3.22, + "learning_rate": 9.353847600833948e-06, + "loss": 0.3602, + "step": 68945 + }, + { + "epoch": 3.22, + "learning_rate": 9.353063815779162e-06, + "loss": 0.0381, + "step": 68950 + }, + { + "epoch": 3.22, + "learning_rate": 9.352280030724374e-06, + "loss": 0.0262, + "step": 68955 + }, + { + "epoch": 3.22, + "learning_rate": 9.351496245669588e-06, + "loss": 0.0538, + "step": 68960 + }, + { + "epoch": 3.22, + "learning_rate": 9.350712460614802e-06, + "loss": 0.0429, + "step": 68965 + }, + { + "epoch": 3.22, + "learning_rate": 9.349928675560014e-06, + "loss": 0.0416, + "step": 68970 + }, + { + "epoch": 3.22, + "learning_rate": 9.349144890505228e-06, + "loss": 0.0407, + "step": 68975 + }, + { + "epoch": 3.22, + "learning_rate": 9.348361105450442e-06, + "loss": 0.129, + "step": 68980 + }, + { + "epoch": 3.22, + "learning_rate": 9.347577320395656e-06, + "loss": 0.0655, + "step": 68985 + }, + { + "epoch": 3.22, + "learning_rate": 9.34679353534087e-06, + "loss": 0.2001, + "step": 68990 + }, + { + "epoch": 3.22, + "learning_rate": 9.346009750286082e-06, + "loss": 0.2085, + "step": 68995 + }, + { + "epoch": 3.22, + "learning_rate": 9.345225965231296e-06, + "loss": 0.0888, + "step": 69000 + }, + { + "epoch": 3.22, + "learning_rate": 9.34444218017651e-06, + "loss": 0.0161, + "step": 69005 + }, + { + "epoch": 3.22, + "learning_rate": 9.343658395121724e-06, + "loss": 0.0768, + "step": 69010 + }, + { + "epoch": 3.22, + "learning_rate": 9.342874610066936e-06, + "loss": 0.0276, + "step": 69015 + }, + { + "epoch": 3.22, + "learning_rate": 9.342090825012148e-06, + "loss": 0.0582, + "step": 69020 + }, + { + "epoch": 3.22, + "learning_rate": 9.341307039957362e-06, + "loss": 0.0803, + "step": 69025 + }, + { + "epoch": 3.22, + "learning_rate": 9.340523254902576e-06, + "loss": 0.0411, + "step": 69030 + }, + { + "epoch": 3.22, + "learning_rate": 9.33973946984779e-06, + "loss": 0.1711, + "step": 69035 + }, + { + "epoch": 3.22, + "learning_rate": 9.338955684793004e-06, + "loss": 0.1804, + "step": 69040 + }, + { + "epoch": 3.22, + "learning_rate": 9.338171899738216e-06, + "loss": 0.231, + "step": 69045 + }, + { + "epoch": 3.22, + "learning_rate": 9.33738811468343e-06, + "loss": 0.0687, + "step": 69050 + }, + { + "epoch": 3.22, + "learning_rate": 9.336604329628644e-06, + "loss": 0.0435, + "step": 69055 + }, + { + "epoch": 3.22, + "learning_rate": 9.335820544573858e-06, + "loss": 0.0353, + "step": 69060 + }, + { + "epoch": 3.22, + "learning_rate": 9.33503675951907e-06, + "loss": 0.0694, + "step": 69065 + }, + { + "epoch": 3.22, + "learning_rate": 9.334252974464284e-06, + "loss": 0.0859, + "step": 69070 + }, + { + "epoch": 3.22, + "learning_rate": 9.333469189409498e-06, + "loss": 0.0612, + "step": 69075 + }, + { + "epoch": 3.22, + "learning_rate": 9.332685404354712e-06, + "loss": 0.1262, + "step": 69080 + }, + { + "epoch": 3.22, + "learning_rate": 9.331901619299924e-06, + "loss": 0.1553, + "step": 69085 + }, + { + "epoch": 3.22, + "learning_rate": 9.331117834245138e-06, + "loss": 0.2087, + "step": 69090 + }, + { + "epoch": 3.22, + "learning_rate": 9.33033404919035e-06, + "loss": 0.1873, + "step": 69095 + }, + { + "epoch": 3.22, + "learning_rate": 9.329550264135564e-06, + "loss": 0.0867, + "step": 69100 + }, + { + "epoch": 3.22, + "learning_rate": 9.328766479080778e-06, + "loss": 0.0105, + "step": 69105 + }, + { + "epoch": 3.22, + "learning_rate": 9.327982694025992e-06, + "loss": 0.0393, + "step": 69110 + }, + { + "epoch": 3.23, + "learning_rate": 9.327198908971204e-06, + "loss": 0.0326, + "step": 69115 + }, + { + "epoch": 3.23, + "learning_rate": 9.326415123916418e-06, + "loss": 0.0376, + "step": 69120 + }, + { + "epoch": 3.23, + "learning_rate": 9.325631338861632e-06, + "loss": 0.0788, + "step": 69125 + }, + { + "epoch": 3.23, + "learning_rate": 9.324847553806846e-06, + "loss": 0.0968, + "step": 69130 + }, + { + "epoch": 3.23, + "learning_rate": 9.324063768752058e-06, + "loss": 0.215, + "step": 69135 + }, + { + "epoch": 3.23, + "learning_rate": 9.323279983697272e-06, + "loss": 0.2422, + "step": 69140 + }, + { + "epoch": 3.23, + "learning_rate": 9.322496198642486e-06, + "loss": 0.4225, + "step": 69145 + }, + { + "epoch": 3.23, + "learning_rate": 9.321712413587698e-06, + "loss": 0.0695, + "step": 69150 + }, + { + "epoch": 3.23, + "learning_rate": 9.320928628532912e-06, + "loss": 0.0703, + "step": 69155 + }, + { + "epoch": 3.23, + "learning_rate": 9.320144843478126e-06, + "loss": 0.1488, + "step": 69160 + }, + { + "epoch": 3.23, + "learning_rate": 9.319361058423338e-06, + "loss": 0.0382, + "step": 69165 + }, + { + "epoch": 3.23, + "learning_rate": 9.318577273368552e-06, + "loss": 0.1603, + "step": 69170 + }, + { + "epoch": 3.23, + "learning_rate": 9.317793488313766e-06, + "loss": 0.1347, + "step": 69175 + }, + { + "epoch": 3.23, + "learning_rate": 9.31700970325898e-06, + "loss": 0.0366, + "step": 69180 + }, + { + "epoch": 3.23, + "learning_rate": 9.316225918204192e-06, + "loss": 0.1063, + "step": 69185 + }, + { + "epoch": 3.23, + "learning_rate": 9.315442133149406e-06, + "loss": 0.2112, + "step": 69190 + }, + { + "epoch": 3.23, + "learning_rate": 9.31465834809462e-06, + "loss": 0.3252, + "step": 69195 + }, + { + "epoch": 3.23, + "learning_rate": 9.313874563039833e-06, + "loss": 0.0515, + "step": 69200 + }, + { + "epoch": 3.23, + "learning_rate": 9.313090777985047e-06, + "loss": 0.0263, + "step": 69205 + }, + { + "epoch": 3.23, + "learning_rate": 9.31230699293026e-06, + "loss": 0.0196, + "step": 69210 + }, + { + "epoch": 3.23, + "learning_rate": 9.311523207875472e-06, + "loss": 0.0484, + "step": 69215 + }, + { + "epoch": 3.23, + "learning_rate": 9.310739422820686e-06, + "loss": 0.0974, + "step": 69220 + }, + { + "epoch": 3.23, + "learning_rate": 9.3099556377659e-06, + "loss": 0.1009, + "step": 69225 + }, + { + "epoch": 3.23, + "learning_rate": 9.309171852711114e-06, + "loss": 0.1064, + "step": 69230 + }, + { + "epoch": 3.23, + "learning_rate": 9.308388067656326e-06, + "loss": 0.1218, + "step": 69235 + }, + { + "epoch": 3.23, + "learning_rate": 9.30760428260154e-06, + "loss": 0.1984, + "step": 69240 + }, + { + "epoch": 3.23, + "learning_rate": 9.306820497546754e-06, + "loss": 0.3309, + "step": 69245 + }, + { + "epoch": 3.23, + "learning_rate": 9.306036712491967e-06, + "loss": 0.0605, + "step": 69250 + }, + { + "epoch": 3.23, + "learning_rate": 9.305252927437181e-06, + "loss": 0.0544, + "step": 69255 + }, + { + "epoch": 3.23, + "learning_rate": 9.304469142382394e-06, + "loss": 0.044, + "step": 69260 + }, + { + "epoch": 3.23, + "learning_rate": 9.303685357327607e-06, + "loss": 0.046, + "step": 69265 + }, + { + "epoch": 3.23, + "learning_rate": 9.302901572272821e-06, + "loss": 0.0805, + "step": 69270 + }, + { + "epoch": 3.23, + "learning_rate": 9.302117787218035e-06, + "loss": 0.1386, + "step": 69275 + }, + { + "epoch": 3.23, + "learning_rate": 9.301334002163247e-06, + "loss": 0.1237, + "step": 69280 + }, + { + "epoch": 3.23, + "learning_rate": 9.30055021710846e-06, + "loss": 0.1503, + "step": 69285 + }, + { + "epoch": 3.23, + "learning_rate": 9.299766432053674e-06, + "loss": 0.1735, + "step": 69290 + }, + { + "epoch": 3.23, + "learning_rate": 9.298982646998888e-06, + "loss": 0.2164, + "step": 69295 + }, + { + "epoch": 3.23, + "learning_rate": 9.298198861944101e-06, + "loss": 0.0882, + "step": 69300 + }, + { + "epoch": 3.23, + "learning_rate": 9.297415076889315e-06, + "loss": 0.0838, + "step": 69305 + }, + { + "epoch": 3.23, + "learning_rate": 9.296631291834528e-06, + "loss": 0.0821, + "step": 69310 + }, + { + "epoch": 3.23, + "learning_rate": 9.295847506779741e-06, + "loss": 0.0388, + "step": 69315 + }, + { + "epoch": 3.23, + "learning_rate": 9.295063721724955e-06, + "loss": 0.1242, + "step": 69320 + }, + { + "epoch": 3.23, + "learning_rate": 9.29427993667017e-06, + "loss": 0.1089, + "step": 69325 + }, + { + "epoch": 3.24, + "learning_rate": 9.293496151615381e-06, + "loss": 0.0842, + "step": 69330 + }, + { + "epoch": 3.24, + "learning_rate": 9.292712366560595e-06, + "loss": 0.1545, + "step": 69335 + }, + { + "epoch": 3.24, + "learning_rate": 9.29192858150581e-06, + "loss": 0.2175, + "step": 69340 + }, + { + "epoch": 3.24, + "learning_rate": 9.291144796451021e-06, + "loss": 0.3745, + "step": 69345 + }, + { + "epoch": 3.24, + "learning_rate": 9.290361011396235e-06, + "loss": 0.093, + "step": 69350 + }, + { + "epoch": 3.24, + "learning_rate": 9.28957722634145e-06, + "loss": 0.0221, + "step": 69355 + }, + { + "epoch": 3.24, + "learning_rate": 9.288793441286662e-06, + "loss": 0.0494, + "step": 69360 + }, + { + "epoch": 3.24, + "learning_rate": 9.288009656231875e-06, + "loss": 0.06, + "step": 69365 + }, + { + "epoch": 3.24, + "learning_rate": 9.28722587117709e-06, + "loss": 0.0468, + "step": 69370 + }, + { + "epoch": 3.24, + "learning_rate": 9.286442086122303e-06, + "loss": 0.0985, + "step": 69375 + }, + { + "epoch": 3.24, + "learning_rate": 9.285658301067515e-06, + "loss": 0.2375, + "step": 69380 + }, + { + "epoch": 3.24, + "learning_rate": 9.28487451601273e-06, + "loss": 0.0803, + "step": 69385 + }, + { + "epoch": 3.24, + "learning_rate": 9.284090730957943e-06, + "loss": 0.3352, + "step": 69390 + }, + { + "epoch": 3.24, + "learning_rate": 9.283306945903157e-06, + "loss": 0.2779, + "step": 69395 + }, + { + "epoch": 3.24, + "learning_rate": 9.28252316084837e-06, + "loss": 0.0764, + "step": 69400 + }, + { + "epoch": 3.24, + "learning_rate": 9.281739375793583e-06, + "loss": 0.0254, + "step": 69405 + }, + { + "epoch": 3.24, + "learning_rate": 9.280955590738795e-06, + "loss": 0.0204, + "step": 69410 + }, + { + "epoch": 3.24, + "learning_rate": 9.28017180568401e-06, + "loss": 0.0758, + "step": 69415 + }, + { + "epoch": 3.24, + "learning_rate": 9.279388020629223e-06, + "loss": 0.0516, + "step": 69420 + }, + { + "epoch": 3.24, + "learning_rate": 9.278604235574437e-06, + "loss": 0.1475, + "step": 69425 + }, + { + "epoch": 3.24, + "learning_rate": 9.27782045051965e-06, + "loss": 0.1257, + "step": 69430 + }, + { + "epoch": 3.24, + "learning_rate": 9.277036665464863e-06, + "loss": 0.1269, + "step": 69435 + }, + { + "epoch": 3.24, + "learning_rate": 9.276252880410077e-06, + "loss": 0.2353, + "step": 69440 + }, + { + "epoch": 3.24, + "learning_rate": 9.275469095355291e-06, + "loss": 0.3649, + "step": 69445 + }, + { + "epoch": 3.24, + "learning_rate": 9.274685310300503e-06, + "loss": 0.0354, + "step": 69450 + }, + { + "epoch": 3.24, + "learning_rate": 9.273901525245717e-06, + "loss": 0.0686, + "step": 69455 + }, + { + "epoch": 3.24, + "learning_rate": 9.273117740190931e-06, + "loss": 0.0279, + "step": 69460 + }, + { + "epoch": 3.24, + "learning_rate": 9.272333955136145e-06, + "loss": 0.0607, + "step": 69465 + }, + { + "epoch": 3.24, + "learning_rate": 9.271550170081359e-06, + "loss": 0.0724, + "step": 69470 + }, + { + "epoch": 3.24, + "learning_rate": 9.270766385026571e-06, + "loss": 0.1336, + "step": 69475 + }, + { + "epoch": 3.24, + "learning_rate": 9.269982599971783e-06, + "loss": 0.1661, + "step": 69480 + }, + { + "epoch": 3.24, + "learning_rate": 9.269198814916997e-06, + "loss": 0.1611, + "step": 69485 + }, + { + "epoch": 3.24, + "learning_rate": 9.268415029862211e-06, + "loss": 0.3469, + "step": 69490 + }, + { + "epoch": 3.24, + "learning_rate": 9.267631244807425e-06, + "loss": 0.3338, + "step": 69495 + }, + { + "epoch": 3.24, + "learning_rate": 9.266847459752637e-06, + "loss": 0.0401, + "step": 69500 + }, + { + "epoch": 3.24, + "learning_rate": 9.266063674697851e-06, + "loss": 0.072, + "step": 69505 + }, + { + "epoch": 3.24, + "learning_rate": 9.265279889643065e-06, + "loss": 0.0282, + "step": 69510 + }, + { + "epoch": 3.24, + "learning_rate": 9.264496104588279e-06, + "loss": 0.0697, + "step": 69515 + }, + { + "epoch": 3.24, + "learning_rate": 9.263712319533493e-06, + "loss": 0.1149, + "step": 69520 + }, + { + "epoch": 3.24, + "learning_rate": 9.262928534478705e-06, + "loss": 0.0651, + "step": 69525 + }, + { + "epoch": 3.24, + "learning_rate": 9.262144749423919e-06, + "loss": 0.0796, + "step": 69530 + }, + { + "epoch": 3.24, + "learning_rate": 9.261360964369133e-06, + "loss": 0.1888, + "step": 69535 + }, + { + "epoch": 3.24, + "learning_rate": 9.260577179314345e-06, + "loss": 0.1374, + "step": 69540 + }, + { + "epoch": 3.25, + "learning_rate": 9.259793394259559e-06, + "loss": 0.2406, + "step": 69545 + }, + { + "epoch": 3.25, + "learning_rate": 9.259009609204771e-06, + "loss": 0.088, + "step": 69550 + }, + { + "epoch": 3.25, + "learning_rate": 9.258225824149985e-06, + "loss": 0.1102, + "step": 69555 + }, + { + "epoch": 3.25, + "learning_rate": 9.257442039095199e-06, + "loss": 0.014, + "step": 69560 + }, + { + "epoch": 3.25, + "learning_rate": 9.256658254040413e-06, + "loss": 0.0231, + "step": 69565 + }, + { + "epoch": 3.25, + "learning_rate": 9.255874468985627e-06, + "loss": 0.0708, + "step": 69570 + }, + { + "epoch": 3.25, + "learning_rate": 9.255090683930839e-06, + "loss": 0.0572, + "step": 69575 + }, + { + "epoch": 3.25, + "learning_rate": 9.254306898876053e-06, + "loss": 0.0775, + "step": 69580 + }, + { + "epoch": 3.25, + "learning_rate": 9.253523113821267e-06, + "loss": 0.1815, + "step": 69585 + }, + { + "epoch": 3.25, + "learning_rate": 9.25273932876648e-06, + "loss": 0.2664, + "step": 69590 + }, + { + "epoch": 3.25, + "learning_rate": 9.251955543711693e-06, + "loss": 0.308, + "step": 69595 + }, + { + "epoch": 3.25, + "learning_rate": 9.251171758656907e-06, + "loss": 0.1139, + "step": 69600 + }, + { + "epoch": 3.25, + "learning_rate": 9.25038797360212e-06, + "loss": 0.0147, + "step": 69605 + }, + { + "epoch": 3.25, + "learning_rate": 9.249604188547333e-06, + "loss": 0.0446, + "step": 69610 + }, + { + "epoch": 3.25, + "learning_rate": 9.248820403492547e-06, + "loss": 0.0582, + "step": 69615 + }, + { + "epoch": 3.25, + "learning_rate": 9.248036618437761e-06, + "loss": 0.0645, + "step": 69620 + }, + { + "epoch": 3.25, + "learning_rate": 9.247252833382973e-06, + "loss": 0.103, + "step": 69625 + }, + { + "epoch": 3.25, + "learning_rate": 9.246469048328187e-06, + "loss": 0.0929, + "step": 69630 + }, + { + "epoch": 3.25, + "learning_rate": 9.245685263273401e-06, + "loss": 0.104, + "step": 69635 + }, + { + "epoch": 3.25, + "learning_rate": 9.244901478218615e-06, + "loss": 0.2659, + "step": 69640 + }, + { + "epoch": 3.25, + "learning_rate": 9.244117693163827e-06, + "loss": 0.2342, + "step": 69645 + }, + { + "epoch": 3.25, + "learning_rate": 9.243333908109041e-06, + "loss": 0.6174, + "step": 69650 + }, + { + "epoch": 3.25, + "learning_rate": 9.242550123054255e-06, + "loss": 0.0114, + "step": 69655 + }, + { + "epoch": 3.25, + "learning_rate": 9.241766337999469e-06, + "loss": 0.0766, + "step": 69660 + }, + { + "epoch": 3.25, + "learning_rate": 9.240982552944681e-06, + "loss": 0.0467, + "step": 69665 + }, + { + "epoch": 3.25, + "learning_rate": 9.240198767889895e-06, + "loss": 0.0568, + "step": 69670 + }, + { + "epoch": 3.25, + "learning_rate": 9.239414982835107e-06, + "loss": 0.1458, + "step": 69675 + }, + { + "epoch": 3.25, + "learning_rate": 9.238631197780321e-06, + "loss": 0.2081, + "step": 69680 + }, + { + "epoch": 3.25, + "learning_rate": 9.237847412725535e-06, + "loss": 0.2514, + "step": 69685 + }, + { + "epoch": 3.25, + "learning_rate": 9.237063627670749e-06, + "loss": 0.1977, + "step": 69690 + }, + { + "epoch": 3.25, + "learning_rate": 9.236279842615961e-06, + "loss": 0.2282, + "step": 69695 + }, + { + "epoch": 3.25, + "learning_rate": 9.235496057561175e-06, + "loss": 0.0591, + "step": 69700 + }, + { + "epoch": 3.25, + "learning_rate": 9.234712272506389e-06, + "loss": 0.0384, + "step": 69705 + }, + { + "epoch": 3.25, + "learning_rate": 9.233928487451603e-06, + "loss": 0.0815, + "step": 69710 + }, + { + "epoch": 3.25, + "learning_rate": 9.233144702396815e-06, + "loss": 0.0774, + "step": 69715 + }, + { + "epoch": 3.25, + "learning_rate": 9.232360917342029e-06, + "loss": 0.0509, + "step": 69720 + }, + { + "epoch": 3.25, + "learning_rate": 9.231577132287243e-06, + "loss": 0.0877, + "step": 69725 + }, + { + "epoch": 3.25, + "learning_rate": 9.230793347232457e-06, + "loss": 0.1008, + "step": 69730 + }, + { + "epoch": 3.25, + "learning_rate": 9.230009562177669e-06, + "loss": 0.126, + "step": 69735 + }, + { + "epoch": 3.25, + "learning_rate": 9.229225777122883e-06, + "loss": 0.1484, + "step": 69740 + }, + { + "epoch": 3.25, + "learning_rate": 9.228441992068095e-06, + "loss": 0.2838, + "step": 69745 + }, + { + "epoch": 3.25, + "learning_rate": 9.227658207013309e-06, + "loss": 0.1356, + "step": 69750 + }, + { + "epoch": 3.25, + "learning_rate": 9.226874421958523e-06, + "loss": 0.0381, + "step": 69755 + }, + { + "epoch": 3.26, + "learning_rate": 9.226090636903737e-06, + "loss": 0.0354, + "step": 69760 + }, + { + "epoch": 3.26, + "learning_rate": 9.225306851848949e-06, + "loss": 0.0649, + "step": 69765 + }, + { + "epoch": 3.26, + "learning_rate": 9.224523066794163e-06, + "loss": 0.0909, + "step": 69770 + }, + { + "epoch": 3.26, + "learning_rate": 9.223739281739377e-06, + "loss": 0.0969, + "step": 69775 + }, + { + "epoch": 3.26, + "learning_rate": 9.22295549668459e-06, + "loss": 0.1227, + "step": 69780 + }, + { + "epoch": 3.26, + "learning_rate": 9.222171711629805e-06, + "loss": 0.1145, + "step": 69785 + }, + { + "epoch": 3.26, + "learning_rate": 9.221387926575017e-06, + "loss": 0.2365, + "step": 69790 + }, + { + "epoch": 3.26, + "learning_rate": 9.22060414152023e-06, + "loss": 0.2366, + "step": 69795 + }, + { + "epoch": 3.26, + "learning_rate": 9.219820356465443e-06, + "loss": 0.1243, + "step": 69800 + }, + { + "epoch": 3.26, + "learning_rate": 9.219036571410657e-06, + "loss": 0.0516, + "step": 69805 + }, + { + "epoch": 3.26, + "learning_rate": 9.21825278635587e-06, + "loss": 0.0249, + "step": 69810 + }, + { + "epoch": 3.26, + "learning_rate": 9.217469001301083e-06, + "loss": 0.0937, + "step": 69815 + }, + { + "epoch": 3.26, + "learning_rate": 9.216685216246297e-06, + "loss": 0.1407, + "step": 69820 + }, + { + "epoch": 3.26, + "learning_rate": 9.21590143119151e-06, + "loss": 0.0835, + "step": 69825 + }, + { + "epoch": 3.26, + "learning_rate": 9.215117646136725e-06, + "loss": 0.104, + "step": 69830 + }, + { + "epoch": 3.26, + "learning_rate": 9.214333861081939e-06, + "loss": 0.2058, + "step": 69835 + }, + { + "epoch": 3.26, + "learning_rate": 9.21355007602715e-06, + "loss": 0.2023, + "step": 69840 + }, + { + "epoch": 3.26, + "learning_rate": 9.212766290972365e-06, + "loss": 0.3072, + "step": 69845 + }, + { + "epoch": 3.26, + "learning_rate": 9.211982505917579e-06, + "loss": 0.0897, + "step": 69850 + }, + { + "epoch": 3.26, + "learning_rate": 9.211198720862792e-06, + "loss": 0.036, + "step": 69855 + }, + { + "epoch": 3.26, + "learning_rate": 9.210414935808005e-06, + "loss": 0.0343, + "step": 69860 + }, + { + "epoch": 3.26, + "learning_rate": 9.209631150753217e-06, + "loss": 0.0693, + "step": 69865 + }, + { + "epoch": 3.26, + "learning_rate": 9.20884736569843e-06, + "loss": 0.0274, + "step": 69870 + }, + { + "epoch": 3.26, + "learning_rate": 9.208063580643645e-06, + "loss": 0.0446, + "step": 69875 + }, + { + "epoch": 3.26, + "learning_rate": 9.207279795588859e-06, + "loss": 0.1388, + "step": 69880 + }, + { + "epoch": 3.26, + "learning_rate": 9.206496010534072e-06, + "loss": 0.1066, + "step": 69885 + }, + { + "epoch": 3.26, + "learning_rate": 9.205712225479285e-06, + "loss": 0.1121, + "step": 69890 + }, + { + "epoch": 3.26, + "learning_rate": 9.204928440424499e-06, + "loss": 0.283, + "step": 69895 + }, + { + "epoch": 3.26, + "learning_rate": 9.204144655369713e-06, + "loss": 0.0839, + "step": 69900 + }, + { + "epoch": 3.26, + "learning_rate": 9.203360870314926e-06, + "loss": 0.053, + "step": 69905 + }, + { + "epoch": 3.26, + "learning_rate": 9.202577085260139e-06, + "loss": 0.0771, + "step": 69910 + }, + { + "epoch": 3.26, + "learning_rate": 9.201793300205353e-06, + "loss": 0.1367, + "step": 69915 + }, + { + "epoch": 3.26, + "learning_rate": 9.201009515150566e-06, + "loss": 0.0701, + "step": 69920 + }, + { + "epoch": 3.26, + "learning_rate": 9.20022573009578e-06, + "loss": 0.0521, + "step": 69925 + }, + { + "epoch": 3.26, + "learning_rate": 9.199441945040993e-06, + "loss": 0.1232, + "step": 69930 + }, + { + "epoch": 3.26, + "learning_rate": 9.198658159986206e-06, + "loss": 0.0843, + "step": 69935 + }, + { + "epoch": 3.26, + "learning_rate": 9.197874374931419e-06, + "loss": 0.2965, + "step": 69940 + }, + { + "epoch": 3.26, + "learning_rate": 9.197090589876633e-06, + "loss": 0.277, + "step": 69945 + }, + { + "epoch": 3.26, + "learning_rate": 9.196306804821846e-06, + "loss": 0.0945, + "step": 69950 + }, + { + "epoch": 3.26, + "learning_rate": 9.19552301976706e-06, + "loss": 0.0235, + "step": 69955 + }, + { + "epoch": 3.26, + "learning_rate": 9.194739234712273e-06, + "loss": 0.0263, + "step": 69960 + }, + { + "epoch": 3.26, + "learning_rate": 9.193955449657487e-06, + "loss": 0.0488, + "step": 69965 + }, + { + "epoch": 3.26, + "learning_rate": 9.1931716646027e-06, + "loss": 0.0679, + "step": 69970 + }, + { + "epoch": 3.27, + "learning_rate": 9.192387879547914e-06, + "loss": 0.1051, + "step": 69975 + }, + { + "epoch": 3.27, + "learning_rate": 9.191604094493127e-06, + "loss": 0.1029, + "step": 69980 + }, + { + "epoch": 3.27, + "learning_rate": 9.19082030943834e-06, + "loss": 0.1088, + "step": 69985 + }, + { + "epoch": 3.27, + "learning_rate": 9.190036524383554e-06, + "loss": 0.1562, + "step": 69990 + }, + { + "epoch": 3.27, + "learning_rate": 9.189252739328767e-06, + "loss": 0.2521, + "step": 69995 + }, + { + "epoch": 3.27, + "learning_rate": 9.18846895427398e-06, + "loss": 0.0542, + "step": 70000 + }, + { + "epoch": 3.27, + "learning_rate": 9.187685169219194e-06, + "loss": 0.0405, + "step": 70005 + }, + { + "epoch": 3.27, + "learning_rate": 9.186901384164407e-06, + "loss": 0.0403, + "step": 70010 + }, + { + "epoch": 3.27, + "learning_rate": 9.18611759910962e-06, + "loss": 0.0638, + "step": 70015 + }, + { + "epoch": 3.27, + "learning_rate": 9.185333814054834e-06, + "loss": 0.1057, + "step": 70020 + }, + { + "epoch": 3.27, + "learning_rate": 9.184550029000048e-06, + "loss": 0.0915, + "step": 70025 + }, + { + "epoch": 3.27, + "learning_rate": 9.18376624394526e-06, + "loss": 0.0756, + "step": 70030 + }, + { + "epoch": 3.27, + "learning_rate": 9.182982458890474e-06, + "loss": 0.1864, + "step": 70035 + }, + { + "epoch": 3.27, + "learning_rate": 9.182198673835688e-06, + "loss": 0.1794, + "step": 70040 + }, + { + "epoch": 3.27, + "learning_rate": 9.181414888780902e-06, + "loss": 0.3143, + "step": 70045 + }, + { + "epoch": 3.27, + "learning_rate": 9.180631103726116e-06, + "loss": 0.0461, + "step": 70050 + }, + { + "epoch": 3.27, + "learning_rate": 9.179847318671328e-06, + "loss": 0.0468, + "step": 70055 + }, + { + "epoch": 3.27, + "learning_rate": 9.17906353361654e-06, + "loss": 0.058, + "step": 70060 + }, + { + "epoch": 3.27, + "learning_rate": 9.178279748561754e-06, + "loss": 0.0569, + "step": 70065 + }, + { + "epoch": 3.27, + "learning_rate": 9.177495963506968e-06, + "loss": 0.0887, + "step": 70070 + }, + { + "epoch": 3.27, + "learning_rate": 9.176712178452182e-06, + "loss": 0.0843, + "step": 70075 + }, + { + "epoch": 3.27, + "learning_rate": 9.175928393397394e-06, + "loss": 0.1165, + "step": 70080 + }, + { + "epoch": 3.27, + "learning_rate": 9.175144608342608e-06, + "loss": 0.1447, + "step": 70085 + }, + { + "epoch": 3.27, + "learning_rate": 9.174360823287822e-06, + "loss": 0.1615, + "step": 70090 + }, + { + "epoch": 3.27, + "learning_rate": 9.173577038233036e-06, + "loss": 0.2179, + "step": 70095 + }, + { + "epoch": 3.27, + "learning_rate": 9.17279325317825e-06, + "loss": 0.0924, + "step": 70100 + }, + { + "epoch": 3.27, + "learning_rate": 9.172009468123462e-06, + "loss": 0.0574, + "step": 70105 + }, + { + "epoch": 3.27, + "learning_rate": 9.171225683068676e-06, + "loss": 0.0505, + "step": 70110 + }, + { + "epoch": 3.27, + "learning_rate": 9.17044189801389e-06, + "loss": 0.0461, + "step": 70115 + }, + { + "epoch": 3.27, + "learning_rate": 9.169658112959104e-06, + "loss": 0.1141, + "step": 70120 + }, + { + "epoch": 3.27, + "learning_rate": 9.168874327904316e-06, + "loss": 0.1524, + "step": 70125 + }, + { + "epoch": 3.27, + "learning_rate": 9.168090542849528e-06, + "loss": 0.0598, + "step": 70130 + }, + { + "epoch": 3.27, + "learning_rate": 9.167306757794742e-06, + "loss": 0.1248, + "step": 70135 + }, + { + "epoch": 3.27, + "learning_rate": 9.166522972739956e-06, + "loss": 0.1644, + "step": 70140 + }, + { + "epoch": 3.27, + "learning_rate": 9.16573918768517e-06, + "loss": 0.1953, + "step": 70145 + }, + { + "epoch": 3.27, + "learning_rate": 9.164955402630384e-06, + "loss": 0.0394, + "step": 70150 + }, + { + "epoch": 3.27, + "learning_rate": 9.164171617575596e-06, + "loss": 0.0543, + "step": 70155 + }, + { + "epoch": 3.27, + "learning_rate": 9.16338783252081e-06, + "loss": 0.0456, + "step": 70160 + }, + { + "epoch": 3.27, + "learning_rate": 9.162604047466024e-06, + "loss": 0.0717, + "step": 70165 + }, + { + "epoch": 3.27, + "learning_rate": 9.161820262411238e-06, + "loss": 0.1553, + "step": 70170 + }, + { + "epoch": 3.27, + "learning_rate": 9.16103647735645e-06, + "loss": 0.0369, + "step": 70175 + }, + { + "epoch": 3.27, + "learning_rate": 9.160252692301664e-06, + "loss": 0.11, + "step": 70180 + }, + { + "epoch": 3.27, + "learning_rate": 9.159468907246878e-06, + "loss": 0.1583, + "step": 70185 + }, + { + "epoch": 3.28, + "learning_rate": 9.15868512219209e-06, + "loss": 0.2984, + "step": 70190 + }, + { + "epoch": 3.28, + "learning_rate": 9.157901337137304e-06, + "loss": 0.2892, + "step": 70195 + }, + { + "epoch": 3.28, + "learning_rate": 9.157117552082518e-06, + "loss": 0.0816, + "step": 70200 + }, + { + "epoch": 3.28, + "learning_rate": 9.15633376702773e-06, + "loss": 0.0477, + "step": 70205 + }, + { + "epoch": 3.28, + "learning_rate": 9.155549981972944e-06, + "loss": 0.0191, + "step": 70210 + }, + { + "epoch": 3.28, + "learning_rate": 9.154766196918158e-06, + "loss": 0.058, + "step": 70215 + }, + { + "epoch": 3.28, + "learning_rate": 9.153982411863372e-06, + "loss": 0.0377, + "step": 70220 + }, + { + "epoch": 3.28, + "learning_rate": 9.153198626808584e-06, + "loss": 0.1391, + "step": 70225 + }, + { + "epoch": 3.28, + "learning_rate": 9.152414841753798e-06, + "loss": 0.1003, + "step": 70230 + }, + { + "epoch": 3.28, + "learning_rate": 9.151631056699012e-06, + "loss": 0.1134, + "step": 70235 + }, + { + "epoch": 3.28, + "learning_rate": 9.150847271644226e-06, + "loss": 0.2391, + "step": 70240 + }, + { + "epoch": 3.28, + "learning_rate": 9.150063486589438e-06, + "loss": 0.1599, + "step": 70245 + }, + { + "epoch": 3.28, + "learning_rate": 9.149279701534652e-06, + "loss": 0.0952, + "step": 70250 + }, + { + "epoch": 3.28, + "learning_rate": 9.148495916479864e-06, + "loss": 0.0636, + "step": 70255 + }, + { + "epoch": 3.28, + "learning_rate": 9.147712131425078e-06, + "loss": 0.0402, + "step": 70260 + }, + { + "epoch": 3.28, + "learning_rate": 9.146928346370292e-06, + "loss": 0.0885, + "step": 70265 + }, + { + "epoch": 3.28, + "learning_rate": 9.146144561315506e-06, + "loss": 0.1678, + "step": 70270 + }, + { + "epoch": 3.28, + "learning_rate": 9.145360776260718e-06, + "loss": 0.0763, + "step": 70275 + }, + { + "epoch": 3.28, + "learning_rate": 9.144576991205932e-06, + "loss": 0.0798, + "step": 70280 + }, + { + "epoch": 3.28, + "learning_rate": 9.143793206151146e-06, + "loss": 0.1155, + "step": 70285 + }, + { + "epoch": 3.28, + "learning_rate": 9.14300942109636e-06, + "loss": 0.2216, + "step": 70290 + }, + { + "epoch": 3.28, + "learning_rate": 9.142225636041572e-06, + "loss": 0.288, + "step": 70295 + }, + { + "epoch": 3.28, + "learning_rate": 9.141441850986786e-06, + "loss": 0.0696, + "step": 70300 + }, + { + "epoch": 3.28, + "learning_rate": 9.140658065932e-06, + "loss": 0.0834, + "step": 70305 + }, + { + "epoch": 3.28, + "learning_rate": 9.139874280877214e-06, + "loss": 0.0411, + "step": 70310 + }, + { + "epoch": 3.28, + "learning_rate": 9.139090495822428e-06, + "loss": 0.0357, + "step": 70315 + }, + { + "epoch": 3.28, + "learning_rate": 9.13830671076764e-06, + "loss": 0.1072, + "step": 70320 + }, + { + "epoch": 3.28, + "learning_rate": 9.137522925712852e-06, + "loss": 0.0559, + "step": 70325 + }, + { + "epoch": 3.28, + "learning_rate": 9.136739140658066e-06, + "loss": 0.1139, + "step": 70330 + }, + { + "epoch": 3.28, + "learning_rate": 9.13595535560328e-06, + "loss": 0.1853, + "step": 70335 + }, + { + "epoch": 3.28, + "learning_rate": 9.135171570548494e-06, + "loss": 0.1746, + "step": 70340 + }, + { + "epoch": 3.28, + "learning_rate": 9.134387785493706e-06, + "loss": 0.2631, + "step": 70345 + }, + { + "epoch": 3.28, + "learning_rate": 9.13360400043892e-06, + "loss": 0.0738, + "step": 70350 + }, + { + "epoch": 3.28, + "learning_rate": 9.132820215384134e-06, + "loss": 0.018, + "step": 70355 + }, + { + "epoch": 3.28, + "learning_rate": 9.132036430329348e-06, + "loss": 0.0448, + "step": 70360 + }, + { + "epoch": 3.28, + "learning_rate": 9.131252645274562e-06, + "loss": 0.063, + "step": 70365 + }, + { + "epoch": 3.28, + "learning_rate": 9.130468860219774e-06, + "loss": 0.0829, + "step": 70370 + }, + { + "epoch": 3.28, + "learning_rate": 9.129685075164988e-06, + "loss": 0.0777, + "step": 70375 + }, + { + "epoch": 3.28, + "learning_rate": 9.128901290110202e-06, + "loss": 0.0711, + "step": 70380 + }, + { + "epoch": 3.28, + "learning_rate": 9.128117505055414e-06, + "loss": 0.1215, + "step": 70385 + }, + { + "epoch": 3.28, + "learning_rate": 9.127333720000628e-06, + "loss": 0.171, + "step": 70390 + }, + { + "epoch": 3.28, + "learning_rate": 9.12654993494584e-06, + "loss": 0.3134, + "step": 70395 + }, + { + "epoch": 3.28, + "learning_rate": 9.125766149891054e-06, + "loss": 0.0421, + "step": 70400 + }, + { + "epoch": 3.29, + "learning_rate": 9.124982364836268e-06, + "loss": 0.0682, + "step": 70405 + }, + { + "epoch": 3.29, + "learning_rate": 9.124198579781482e-06, + "loss": 0.0628, + "step": 70410 + }, + { + "epoch": 3.29, + "learning_rate": 9.123571551737652e-06, + "loss": 0.0632, + "step": 70415 + }, + { + "epoch": 3.29, + "learning_rate": 9.122787766682866e-06, + "loss": 0.0502, + "step": 70420 + }, + { + "epoch": 3.29, + "learning_rate": 9.12200398162808e-06, + "loss": 0.0768, + "step": 70425 + }, + { + "epoch": 3.29, + "learning_rate": 9.121220196573292e-06, + "loss": 0.114, + "step": 70430 + }, + { + "epoch": 3.29, + "learning_rate": 9.120436411518506e-06, + "loss": 0.2299, + "step": 70435 + }, + { + "epoch": 3.29, + "learning_rate": 9.11965262646372e-06, + "loss": 0.1957, + "step": 70440 + }, + { + "epoch": 3.29, + "learning_rate": 9.118868841408934e-06, + "loss": 0.3038, + "step": 70445 + }, + { + "epoch": 3.29, + "learning_rate": 9.118085056354146e-06, + "loss": 0.0454, + "step": 70450 + }, + { + "epoch": 3.29, + "learning_rate": 9.117301271299358e-06, + "loss": 0.0615, + "step": 70455 + }, + { + "epoch": 3.29, + "learning_rate": 9.116517486244572e-06, + "loss": 0.0582, + "step": 70460 + }, + { + "epoch": 3.29, + "learning_rate": 9.115733701189786e-06, + "loss": 0.113, + "step": 70465 + }, + { + "epoch": 3.29, + "learning_rate": 9.114949916135e-06, + "loss": 0.0629, + "step": 70470 + }, + { + "epoch": 3.29, + "learning_rate": 9.114166131080214e-06, + "loss": 0.0821, + "step": 70475 + }, + { + "epoch": 3.29, + "learning_rate": 9.113382346025426e-06, + "loss": 0.1246, + "step": 70480 + }, + { + "epoch": 3.29, + "learning_rate": 9.11259856097064e-06, + "loss": 0.199, + "step": 70485 + }, + { + "epoch": 3.29, + "learning_rate": 9.111814775915854e-06, + "loss": 0.2589, + "step": 70490 + }, + { + "epoch": 3.29, + "learning_rate": 9.111030990861068e-06, + "loss": 0.2966, + "step": 70495 + }, + { + "epoch": 3.29, + "learning_rate": 9.11024720580628e-06, + "loss": 0.0677, + "step": 70500 + }, + { + "epoch": 3.29, + "learning_rate": 9.109463420751494e-06, + "loss": 0.0092, + "step": 70505 + }, + { + "epoch": 3.29, + "learning_rate": 9.108679635696708e-06, + "loss": 0.0471, + "step": 70510 + }, + { + "epoch": 3.29, + "learning_rate": 9.10789585064192e-06, + "loss": 0.0891, + "step": 70515 + }, + { + "epoch": 3.29, + "learning_rate": 9.107112065587134e-06, + "loss": 0.1139, + "step": 70520 + }, + { + "epoch": 3.29, + "learning_rate": 9.106328280532348e-06, + "loss": 0.0479, + "step": 70525 + }, + { + "epoch": 3.29, + "learning_rate": 9.10554449547756e-06, + "loss": 0.0951, + "step": 70530 + }, + { + "epoch": 3.29, + "learning_rate": 9.104760710422774e-06, + "loss": 0.1251, + "step": 70535 + }, + { + "epoch": 3.29, + "learning_rate": 9.103976925367988e-06, + "loss": 0.1632, + "step": 70540 + }, + { + "epoch": 3.29, + "learning_rate": 9.103193140313202e-06, + "loss": 0.3112, + "step": 70545 + }, + { + "epoch": 3.29, + "learning_rate": 9.102409355258414e-06, + "loss": 0.0694, + "step": 70550 + }, + { + "epoch": 3.29, + "learning_rate": 9.101625570203628e-06, + "loss": 0.0419, + "step": 70555 + }, + { + "epoch": 3.29, + "learning_rate": 9.100841785148842e-06, + "loss": 0.0534, + "step": 70560 + }, + { + "epoch": 3.29, + "learning_rate": 9.100058000094056e-06, + "loss": 0.0536, + "step": 70565 + }, + { + "epoch": 3.29, + "learning_rate": 9.099274215039268e-06, + "loss": 0.0873, + "step": 70570 + }, + { + "epoch": 3.29, + "learning_rate": 9.098490429984482e-06, + "loss": 0.1011, + "step": 70575 + }, + { + "epoch": 3.29, + "learning_rate": 9.097706644929694e-06, + "loss": 0.1586, + "step": 70580 + }, + { + "epoch": 3.29, + "learning_rate": 9.096922859874908e-06, + "loss": 0.1024, + "step": 70585 + }, + { + "epoch": 3.29, + "learning_rate": 9.096139074820122e-06, + "loss": 0.1611, + "step": 70590 + }, + { + "epoch": 3.29, + "learning_rate": 9.095355289765336e-06, + "loss": 0.3784, + "step": 70595 + }, + { + "epoch": 3.29, + "learning_rate": 9.094571504710548e-06, + "loss": 0.1113, + "step": 70600 + }, + { + "epoch": 3.29, + "learning_rate": 9.093787719655762e-06, + "loss": 0.0555, + "step": 70605 + }, + { + "epoch": 3.29, + "learning_rate": 9.093003934600976e-06, + "loss": 0.0418, + "step": 70610 + }, + { + "epoch": 3.29, + "learning_rate": 9.09222014954619e-06, + "loss": 0.0386, + "step": 70615 + }, + { + "epoch": 3.3, + "learning_rate": 9.091436364491402e-06, + "loss": 0.076, + "step": 70620 + }, + { + "epoch": 3.3, + "learning_rate": 9.090652579436616e-06, + "loss": 0.0295, + "step": 70625 + }, + { + "epoch": 3.3, + "learning_rate": 9.08986879438183e-06, + "loss": 0.1095, + "step": 70630 + }, + { + "epoch": 3.3, + "learning_rate": 9.089085009327044e-06, + "loss": 0.1026, + "step": 70635 + }, + { + "epoch": 3.3, + "learning_rate": 9.088301224272258e-06, + "loss": 0.2221, + "step": 70640 + }, + { + "epoch": 3.3, + "learning_rate": 9.08751743921747e-06, + "loss": 0.2452, + "step": 70645 + }, + { + "epoch": 3.3, + "learning_rate": 9.086733654162682e-06, + "loss": 0.0779, + "step": 70650 + }, + { + "epoch": 3.3, + "learning_rate": 9.085949869107896e-06, + "loss": 0.0415, + "step": 70655 + }, + { + "epoch": 3.3, + "learning_rate": 9.08516608405311e-06, + "loss": 0.065, + "step": 70660 + }, + { + "epoch": 3.3, + "learning_rate": 9.084382298998324e-06, + "loss": 0.1131, + "step": 70665 + }, + { + "epoch": 3.3, + "learning_rate": 9.083598513943536e-06, + "loss": 0.0668, + "step": 70670 + }, + { + "epoch": 3.3, + "learning_rate": 9.08281472888875e-06, + "loss": 0.0669, + "step": 70675 + }, + { + "epoch": 3.3, + "learning_rate": 9.082030943833964e-06, + "loss": 0.1014, + "step": 70680 + }, + { + "epoch": 3.3, + "learning_rate": 9.081247158779178e-06, + "loss": 0.0679, + "step": 70685 + }, + { + "epoch": 3.3, + "learning_rate": 9.080463373724392e-06, + "loss": 0.1369, + "step": 70690 + }, + { + "epoch": 3.3, + "learning_rate": 9.079679588669604e-06, + "loss": 0.3386, + "step": 70695 + }, + { + "epoch": 3.3, + "learning_rate": 9.078895803614818e-06, + "loss": 0.097, + "step": 70700 + }, + { + "epoch": 3.3, + "learning_rate": 9.078112018560032e-06, + "loss": 0.0637, + "step": 70705 + }, + { + "epoch": 3.3, + "learning_rate": 9.077328233505244e-06, + "loss": 0.0306, + "step": 70710 + }, + { + "epoch": 3.3, + "learning_rate": 9.076544448450458e-06, + "loss": 0.0302, + "step": 70715 + }, + { + "epoch": 3.3, + "learning_rate": 9.07576066339567e-06, + "loss": 0.0902, + "step": 70720 + }, + { + "epoch": 3.3, + "learning_rate": 9.074976878340884e-06, + "loss": 0.1711, + "step": 70725 + }, + { + "epoch": 3.3, + "learning_rate": 9.074193093286098e-06, + "loss": 0.0799, + "step": 70730 + }, + { + "epoch": 3.3, + "learning_rate": 9.073409308231312e-06, + "loss": 0.0986, + "step": 70735 + }, + { + "epoch": 3.3, + "learning_rate": 9.072625523176526e-06, + "loss": 0.2437, + "step": 70740 + }, + { + "epoch": 3.3, + "learning_rate": 9.071841738121738e-06, + "loss": 0.3259, + "step": 70745 + }, + { + "epoch": 3.3, + "learning_rate": 9.071057953066952e-06, + "loss": 0.0754, + "step": 70750 + }, + { + "epoch": 3.3, + "learning_rate": 9.070274168012166e-06, + "loss": 0.0394, + "step": 70755 + }, + { + "epoch": 3.3, + "learning_rate": 9.06949038295738e-06, + "loss": 0.0298, + "step": 70760 + }, + { + "epoch": 3.3, + "learning_rate": 9.068706597902592e-06, + "loss": 0.0491, + "step": 70765 + }, + { + "epoch": 3.3, + "learning_rate": 9.067922812847806e-06, + "loss": 0.0504, + "step": 70770 + }, + { + "epoch": 3.3, + "learning_rate": 9.067139027793018e-06, + "loss": 0.0863, + "step": 70775 + }, + { + "epoch": 3.3, + "learning_rate": 9.066355242738232e-06, + "loss": 0.133, + "step": 70780 + }, + { + "epoch": 3.3, + "learning_rate": 9.065571457683446e-06, + "loss": 0.1856, + "step": 70785 + }, + { + "epoch": 3.3, + "learning_rate": 9.06478767262866e-06, + "loss": 0.33, + "step": 70790 + }, + { + "epoch": 3.3, + "learning_rate": 9.064003887573872e-06, + "loss": 0.293, + "step": 70795 + }, + { + "epoch": 3.3, + "learning_rate": 9.063220102519086e-06, + "loss": 0.0919, + "step": 70800 + }, + { + "epoch": 3.3, + "learning_rate": 9.0624363174643e-06, + "loss": 0.0686, + "step": 70805 + }, + { + "epoch": 3.3, + "learning_rate": 9.061652532409513e-06, + "loss": 0.0482, + "step": 70810 + }, + { + "epoch": 3.3, + "learning_rate": 9.060868747354726e-06, + "loss": 0.0436, + "step": 70815 + }, + { + "epoch": 3.3, + "learning_rate": 9.06008496229994e-06, + "loss": 0.1321, + "step": 70820 + }, + { + "epoch": 3.3, + "learning_rate": 9.059301177245154e-06, + "loss": 0.0897, + "step": 70825 + }, + { + "epoch": 3.31, + "learning_rate": 9.058517392190367e-06, + "loss": 0.1366, + "step": 70830 + }, + { + "epoch": 3.31, + "learning_rate": 9.05773360713558e-06, + "loss": 0.0788, + "step": 70835 + }, + { + "epoch": 3.31, + "learning_rate": 9.056949822080794e-06, + "loss": 0.1021, + "step": 70840 + }, + { + "epoch": 3.31, + "learning_rate": 9.056166037026006e-06, + "loss": 0.4001, + "step": 70845 + }, + { + "epoch": 3.31, + "learning_rate": 9.05538225197122e-06, + "loss": 0.0607, + "step": 70850 + }, + { + "epoch": 3.31, + "learning_rate": 9.054598466916434e-06, + "loss": 0.0377, + "step": 70855 + }, + { + "epoch": 3.31, + "learning_rate": 9.053814681861647e-06, + "loss": 0.0581, + "step": 70860 + }, + { + "epoch": 3.31, + "learning_rate": 9.05303089680686e-06, + "loss": 0.0553, + "step": 70865 + }, + { + "epoch": 3.31, + "learning_rate": 9.052247111752074e-06, + "loss": 0.0653, + "step": 70870 + }, + { + "epoch": 3.31, + "learning_rate": 9.051463326697287e-06, + "loss": 0.17, + "step": 70875 + }, + { + "epoch": 3.31, + "learning_rate": 9.050679541642501e-06, + "loss": 0.1759, + "step": 70880 + }, + { + "epoch": 3.31, + "learning_rate": 9.049895756587714e-06, + "loss": 0.0962, + "step": 70885 + }, + { + "epoch": 3.31, + "learning_rate": 9.049111971532928e-06, + "loss": 0.1679, + "step": 70890 + }, + { + "epoch": 3.31, + "learning_rate": 9.048328186478141e-06, + "loss": 0.3484, + "step": 70895 + }, + { + "epoch": 3.31, + "learning_rate": 9.047544401423355e-06, + "loss": 0.0895, + "step": 70900 + }, + { + "epoch": 3.31, + "learning_rate": 9.046760616368568e-06, + "loss": 0.0192, + "step": 70905 + }, + { + "epoch": 3.31, + "learning_rate": 9.045976831313781e-06, + "loss": 0.0598, + "step": 70910 + }, + { + "epoch": 3.31, + "learning_rate": 9.045193046258994e-06, + "loss": 0.0646, + "step": 70915 + }, + { + "epoch": 3.31, + "learning_rate": 9.044409261204208e-06, + "loss": 0.125, + "step": 70920 + }, + { + "epoch": 3.31, + "learning_rate": 9.043625476149421e-06, + "loss": 0.0955, + "step": 70925 + }, + { + "epoch": 3.31, + "learning_rate": 9.042841691094635e-06, + "loss": 0.0751, + "step": 70930 + }, + { + "epoch": 3.31, + "learning_rate": 9.042057906039848e-06, + "loss": 0.0934, + "step": 70935 + }, + { + "epoch": 3.31, + "learning_rate": 9.041274120985061e-06, + "loss": 0.3588, + "step": 70940 + }, + { + "epoch": 3.31, + "learning_rate": 9.040490335930275e-06, + "loss": 0.4088, + "step": 70945 + }, + { + "epoch": 3.31, + "learning_rate": 9.03970655087549e-06, + "loss": 0.096, + "step": 70950 + }, + { + "epoch": 3.31, + "learning_rate": 9.038922765820703e-06, + "loss": 0.039, + "step": 70955 + }, + { + "epoch": 3.31, + "learning_rate": 9.038138980765915e-06, + "loss": 0.0466, + "step": 70960 + }, + { + "epoch": 3.31, + "learning_rate": 9.03735519571113e-06, + "loss": 0.056, + "step": 70965 + }, + { + "epoch": 3.31, + "learning_rate": 9.036571410656342e-06, + "loss": 0.0959, + "step": 70970 + }, + { + "epoch": 3.31, + "learning_rate": 9.035787625601555e-06, + "loss": 0.0983, + "step": 70975 + }, + { + "epoch": 3.31, + "learning_rate": 9.03500384054677e-06, + "loss": 0.0763, + "step": 70980 + }, + { + "epoch": 3.31, + "learning_rate": 9.034220055491982e-06, + "loss": 0.1379, + "step": 70985 + }, + { + "epoch": 3.31, + "learning_rate": 9.033436270437195e-06, + "loss": 0.1648, + "step": 70990 + }, + { + "epoch": 3.31, + "learning_rate": 9.03265248538241e-06, + "loss": 0.3926, + "step": 70995 + }, + { + "epoch": 3.31, + "learning_rate": 9.031868700327623e-06, + "loss": 0.0747, + "step": 71000 + }, + { + "epoch": 3.31, + "learning_rate": 9.031084915272837e-06, + "loss": 0.0096, + "step": 71005 + }, + { + "epoch": 3.31, + "learning_rate": 9.03030113021805e-06, + "loss": 0.0439, + "step": 71010 + }, + { + "epoch": 3.31, + "learning_rate": 9.029517345163263e-06, + "loss": 0.0545, + "step": 71015 + }, + { + "epoch": 3.31, + "learning_rate": 9.028733560108477e-06, + "loss": 0.0585, + "step": 71020 + }, + { + "epoch": 3.31, + "learning_rate": 9.027949775053691e-06, + "loss": 0.0719, + "step": 71025 + }, + { + "epoch": 3.31, + "learning_rate": 9.027165989998903e-06, + "loss": 0.0609, + "step": 71030 + }, + { + "epoch": 3.31, + "learning_rate": 9.026382204944116e-06, + "loss": 0.1347, + "step": 71035 + }, + { + "epoch": 3.31, + "learning_rate": 9.02559841988933e-06, + "loss": 0.2076, + "step": 71040 + }, + { + "epoch": 3.32, + "learning_rate": 9.024814634834543e-06, + "loss": 0.2844, + "step": 71045 + }, + { + "epoch": 3.32, + "learning_rate": 9.024030849779757e-06, + "loss": 0.0975, + "step": 71050 + }, + { + "epoch": 3.32, + "learning_rate": 9.023247064724971e-06, + "loss": 0.0456, + "step": 71055 + }, + { + "epoch": 3.32, + "learning_rate": 9.022463279670183e-06, + "loss": 0.0962, + "step": 71060 + }, + { + "epoch": 3.32, + "learning_rate": 9.021679494615397e-06, + "loss": 0.1066, + "step": 71065 + }, + { + "epoch": 3.32, + "learning_rate": 9.020895709560611e-06, + "loss": 0.0887, + "step": 71070 + }, + { + "epoch": 3.32, + "learning_rate": 9.020111924505825e-06, + "loss": 0.1813, + "step": 71075 + }, + { + "epoch": 3.32, + "learning_rate": 9.019328139451037e-06, + "loss": 0.1152, + "step": 71080 + }, + { + "epoch": 3.32, + "learning_rate": 9.018544354396251e-06, + "loss": 0.1858, + "step": 71085 + }, + { + "epoch": 3.32, + "learning_rate": 9.017760569341465e-06, + "loss": 0.2239, + "step": 71090 + }, + { + "epoch": 3.32, + "learning_rate": 9.016976784286679e-06, + "loss": 0.3627, + "step": 71095 + }, + { + "epoch": 3.32, + "learning_rate": 9.016192999231891e-06, + "loss": 0.0499, + "step": 71100 + }, + { + "epoch": 3.32, + "learning_rate": 9.015409214177105e-06, + "loss": 0.033, + "step": 71105 + }, + { + "epoch": 3.32, + "learning_rate": 9.014625429122317e-06, + "loss": 0.0734, + "step": 71110 + }, + { + "epoch": 3.32, + "learning_rate": 9.013841644067531e-06, + "loss": 0.0203, + "step": 71115 + }, + { + "epoch": 3.32, + "learning_rate": 9.013057859012745e-06, + "loss": 0.0896, + "step": 71120 + }, + { + "epoch": 3.32, + "learning_rate": 9.012274073957959e-06, + "loss": 0.0496, + "step": 71125 + }, + { + "epoch": 3.32, + "learning_rate": 9.011490288903171e-06, + "loss": 0.1073, + "step": 71130 + }, + { + "epoch": 3.32, + "learning_rate": 9.010706503848385e-06, + "loss": 0.2097, + "step": 71135 + }, + { + "epoch": 3.32, + "learning_rate": 9.009922718793599e-06, + "loss": 0.2172, + "step": 71140 + }, + { + "epoch": 3.32, + "learning_rate": 9.009138933738813e-06, + "loss": 0.3922, + "step": 71145 + }, + { + "epoch": 3.32, + "learning_rate": 9.008355148684025e-06, + "loss": 0.0941, + "step": 71150 + }, + { + "epoch": 3.32, + "learning_rate": 9.007571363629239e-06, + "loss": 0.0167, + "step": 71155 + }, + { + "epoch": 3.32, + "learning_rate": 9.006787578574453e-06, + "loss": 0.042, + "step": 71160 + }, + { + "epoch": 3.32, + "learning_rate": 9.006003793519665e-06, + "loss": 0.0617, + "step": 71165 + }, + { + "epoch": 3.32, + "learning_rate": 9.005220008464879e-06, + "loss": 0.0855, + "step": 71170 + }, + { + "epoch": 3.32, + "learning_rate": 9.004436223410093e-06, + "loss": 0.1239, + "step": 71175 + }, + { + "epoch": 3.32, + "learning_rate": 9.003652438355305e-06, + "loss": 0.0801, + "step": 71180 + }, + { + "epoch": 3.32, + "learning_rate": 9.00286865330052e-06, + "loss": 0.1824, + "step": 71185 + }, + { + "epoch": 3.32, + "learning_rate": 9.002084868245733e-06, + "loss": 0.1555, + "step": 71190 + }, + { + "epoch": 3.32, + "learning_rate": 9.001301083190947e-06, + "loss": 0.3987, + "step": 71195 + }, + { + "epoch": 3.32, + "learning_rate": 9.00051729813616e-06, + "loss": 0.0915, + "step": 71200 + }, + { + "epoch": 3.32, + "learning_rate": 8.999733513081373e-06, + "loss": 0.0749, + "step": 71205 + }, + { + "epoch": 3.32, + "learning_rate": 8.998949728026587e-06, + "loss": 0.0269, + "step": 71210 + }, + { + "epoch": 3.32, + "learning_rate": 8.998165942971801e-06, + "loss": 0.0165, + "step": 71215 + }, + { + "epoch": 3.32, + "learning_rate": 8.997382157917015e-06, + "loss": 0.0827, + "step": 71220 + }, + { + "epoch": 3.32, + "learning_rate": 8.996598372862227e-06, + "loss": 0.0354, + "step": 71225 + }, + { + "epoch": 3.32, + "learning_rate": 8.99581458780744e-06, + "loss": 0.1169, + "step": 71230 + }, + { + "epoch": 3.32, + "learning_rate": 8.995030802752653e-06, + "loss": 0.1483, + "step": 71235 + }, + { + "epoch": 3.32, + "learning_rate": 8.994247017697867e-06, + "loss": 0.1475, + "step": 71240 + }, + { + "epoch": 3.32, + "learning_rate": 8.993463232643081e-06, + "loss": 0.199, + "step": 71245 + }, + { + "epoch": 3.32, + "learning_rate": 8.992679447588293e-06, + "loss": 0.1115, + "step": 71250 + }, + { + "epoch": 3.32, + "learning_rate": 8.991895662533507e-06, + "loss": 0.0245, + "step": 71255 + }, + { + "epoch": 3.33, + "learning_rate": 8.991111877478721e-06, + "loss": 0.04, + "step": 71260 + }, + { + "epoch": 3.33, + "learning_rate": 8.990328092423935e-06, + "loss": 0.0279, + "step": 71265 + }, + { + "epoch": 3.33, + "learning_rate": 8.989544307369149e-06, + "loss": 0.0768, + "step": 71270 + }, + { + "epoch": 3.33, + "learning_rate": 8.988760522314361e-06, + "loss": 0.079, + "step": 71275 + }, + { + "epoch": 3.33, + "learning_rate": 8.987976737259575e-06, + "loss": 0.1345, + "step": 71280 + }, + { + "epoch": 3.33, + "learning_rate": 8.987192952204789e-06, + "loss": 0.1671, + "step": 71285 + }, + { + "epoch": 3.33, + "learning_rate": 8.986409167150003e-06, + "loss": 0.1551, + "step": 71290 + }, + { + "epoch": 3.33, + "learning_rate": 8.985625382095215e-06, + "loss": 0.2852, + "step": 71295 + }, + { + "epoch": 3.33, + "learning_rate": 8.984841597040427e-06, + "loss": 0.0474, + "step": 71300 + }, + { + "epoch": 3.33, + "learning_rate": 8.984057811985641e-06, + "loss": 0.0449, + "step": 71305 + }, + { + "epoch": 3.33, + "learning_rate": 8.983274026930855e-06, + "loss": 0.0172, + "step": 71310 + }, + { + "epoch": 3.33, + "learning_rate": 8.982490241876069e-06, + "loss": 0.0817, + "step": 71315 + }, + { + "epoch": 3.33, + "learning_rate": 8.981706456821283e-06, + "loss": 0.0804, + "step": 71320 + }, + { + "epoch": 3.33, + "learning_rate": 8.980922671766495e-06, + "loss": 0.0379, + "step": 71325 + }, + { + "epoch": 3.33, + "learning_rate": 8.980138886711709e-06, + "loss": 0.0653, + "step": 71330 + }, + { + "epoch": 3.33, + "learning_rate": 8.979355101656923e-06, + "loss": 0.099, + "step": 71335 + }, + { + "epoch": 3.33, + "learning_rate": 8.978571316602137e-06, + "loss": 0.149, + "step": 71340 + }, + { + "epoch": 3.33, + "learning_rate": 8.977787531547349e-06, + "loss": 0.2112, + "step": 71345 + }, + { + "epoch": 3.33, + "learning_rate": 8.977003746492563e-06, + "loss": 0.077, + "step": 71350 + }, + { + "epoch": 3.33, + "learning_rate": 8.976219961437777e-06, + "loss": 0.0558, + "step": 71355 + }, + { + "epoch": 3.33, + "learning_rate": 8.975436176382989e-06, + "loss": 0.0582, + "step": 71360 + }, + { + "epoch": 3.33, + "learning_rate": 8.974652391328203e-06, + "loss": 0.0369, + "step": 71365 + }, + { + "epoch": 3.33, + "learning_rate": 8.973868606273417e-06, + "loss": 0.0759, + "step": 71370 + }, + { + "epoch": 3.33, + "learning_rate": 8.973084821218629e-06, + "loss": 0.0728, + "step": 71375 + }, + { + "epoch": 3.33, + "learning_rate": 8.972301036163843e-06, + "loss": 0.0995, + "step": 71380 + }, + { + "epoch": 3.33, + "learning_rate": 8.971517251109057e-06, + "loss": 0.4829, + "step": 71385 + }, + { + "epoch": 3.33, + "learning_rate": 8.97073346605427e-06, + "loss": 0.1905, + "step": 71390 + }, + { + "epoch": 3.33, + "learning_rate": 8.969949680999483e-06, + "loss": 0.355, + "step": 71395 + }, + { + "epoch": 3.33, + "learning_rate": 8.969165895944697e-06, + "loss": 0.0518, + "step": 71400 + }, + { + "epoch": 3.33, + "learning_rate": 8.96838211088991e-06, + "loss": 0.0564, + "step": 71405 + }, + { + "epoch": 3.33, + "learning_rate": 8.967598325835125e-06, + "loss": 0.0721, + "step": 71410 + }, + { + "epoch": 3.33, + "learning_rate": 8.966814540780337e-06, + "loss": 0.0613, + "step": 71415 + }, + { + "epoch": 3.33, + "learning_rate": 8.96603075572555e-06, + "loss": 0.1354, + "step": 71420 + }, + { + "epoch": 3.33, + "learning_rate": 8.965246970670763e-06, + "loss": 0.1019, + "step": 71425 + }, + { + "epoch": 3.33, + "learning_rate": 8.964463185615977e-06, + "loss": 0.071, + "step": 71430 + }, + { + "epoch": 3.33, + "learning_rate": 8.96367940056119e-06, + "loss": 0.0923, + "step": 71435 + }, + { + "epoch": 3.33, + "learning_rate": 8.962895615506405e-06, + "loss": 0.3224, + "step": 71440 + }, + { + "epoch": 3.33, + "learning_rate": 8.962111830451617e-06, + "loss": 0.3049, + "step": 71445 + }, + { + "epoch": 3.33, + "learning_rate": 8.96132804539683e-06, + "loss": 0.0961, + "step": 71450 + }, + { + "epoch": 3.33, + "learning_rate": 8.960544260342045e-06, + "loss": 0.0378, + "step": 71455 + }, + { + "epoch": 3.33, + "learning_rate": 8.959760475287259e-06, + "loss": 0.0332, + "step": 71460 + }, + { + "epoch": 3.33, + "learning_rate": 8.95897669023247e-06, + "loss": 0.0462, + "step": 71465 + }, + { + "epoch": 3.33, + "learning_rate": 8.958192905177685e-06, + "loss": 0.0546, + "step": 71470 + }, + { + "epoch": 3.34, + "learning_rate": 8.957409120122899e-06, + "loss": 0.0442, + "step": 71475 + }, + { + "epoch": 3.34, + "learning_rate": 8.956625335068112e-06, + "loss": 0.1452, + "step": 71480 + }, + { + "epoch": 3.34, + "learning_rate": 8.955841550013326e-06, + "loss": 0.1316, + "step": 71485 + }, + { + "epoch": 3.34, + "learning_rate": 8.955057764958539e-06, + "loss": 0.1885, + "step": 71490 + }, + { + "epoch": 3.34, + "learning_rate": 8.95427397990375e-06, + "loss": 0.1965, + "step": 71495 + }, + { + "epoch": 3.34, + "learning_rate": 8.953490194848965e-06, + "loss": 0.0727, + "step": 71500 + }, + { + "epoch": 3.34, + "learning_rate": 8.952706409794179e-06, + "loss": 0.0363, + "step": 71505 + }, + { + "epoch": 3.34, + "learning_rate": 8.951922624739393e-06, + "loss": 0.0834, + "step": 71510 + }, + { + "epoch": 3.34, + "learning_rate": 8.951138839684605e-06, + "loss": 0.053, + "step": 71515 + }, + { + "epoch": 3.34, + "learning_rate": 8.950355054629819e-06, + "loss": 0.0988, + "step": 71520 + }, + { + "epoch": 3.34, + "learning_rate": 8.949571269575033e-06, + "loss": 0.1021, + "step": 71525 + }, + { + "epoch": 3.34, + "learning_rate": 8.948787484520246e-06, + "loss": 0.1247, + "step": 71530 + }, + { + "epoch": 3.34, + "learning_rate": 8.94800369946546e-06, + "loss": 0.1089, + "step": 71535 + }, + { + "epoch": 3.34, + "learning_rate": 8.947219914410673e-06, + "loss": 0.213, + "step": 71540 + }, + { + "epoch": 3.34, + "learning_rate": 8.946436129355886e-06, + "loss": 0.3372, + "step": 71545 + }, + { + "epoch": 3.34, + "learning_rate": 8.9456523443011e-06, + "loss": 0.0676, + "step": 71550 + }, + { + "epoch": 3.34, + "learning_rate": 8.944868559246313e-06, + "loss": 0.0195, + "step": 71555 + }, + { + "epoch": 3.34, + "learning_rate": 8.944084774191527e-06, + "loss": 0.0476, + "step": 71560 + }, + { + "epoch": 3.34, + "learning_rate": 8.943300989136739e-06, + "loss": 0.0935, + "step": 71565 + }, + { + "epoch": 3.34, + "learning_rate": 8.942517204081953e-06, + "loss": 0.0661, + "step": 71570 + }, + { + "epoch": 3.34, + "learning_rate": 8.941733419027167e-06, + "loss": 0.1716, + "step": 71575 + }, + { + "epoch": 3.34, + "learning_rate": 8.94094963397238e-06, + "loss": 0.1298, + "step": 71580 + }, + { + "epoch": 3.34, + "learning_rate": 8.940165848917594e-06, + "loss": 0.1413, + "step": 71585 + }, + { + "epoch": 3.34, + "learning_rate": 8.939382063862807e-06, + "loss": 0.2108, + "step": 71590 + }, + { + "epoch": 3.34, + "learning_rate": 8.93859827880802e-06, + "loss": 0.2585, + "step": 71595 + }, + { + "epoch": 3.34, + "learning_rate": 8.937814493753234e-06, + "loss": 0.0759, + "step": 71600 + }, + { + "epoch": 3.34, + "learning_rate": 8.937030708698448e-06, + "loss": 0.0262, + "step": 71605 + }, + { + "epoch": 3.34, + "learning_rate": 8.93624692364366e-06, + "loss": 0.0391, + "step": 71610 + }, + { + "epoch": 3.34, + "learning_rate": 8.935463138588874e-06, + "loss": 0.0442, + "step": 71615 + }, + { + "epoch": 3.34, + "learning_rate": 8.934679353534087e-06, + "loss": 0.1691, + "step": 71620 + }, + { + "epoch": 3.34, + "learning_rate": 8.9338955684793e-06, + "loss": 0.1244, + "step": 71625 + }, + { + "epoch": 3.34, + "learning_rate": 8.933111783424514e-06, + "loss": 0.1147, + "step": 71630 + }, + { + "epoch": 3.34, + "learning_rate": 8.932327998369728e-06, + "loss": 0.1015, + "step": 71635 + }, + { + "epoch": 3.34, + "learning_rate": 8.93154421331494e-06, + "loss": 0.2465, + "step": 71640 + }, + { + "epoch": 3.34, + "learning_rate": 8.930760428260154e-06, + "loss": 0.2367, + "step": 71645 + }, + { + "epoch": 3.34, + "learning_rate": 8.929976643205368e-06, + "loss": 0.0882, + "step": 71650 + }, + { + "epoch": 3.34, + "learning_rate": 8.929192858150582e-06, + "loss": 0.0552, + "step": 71655 + }, + { + "epoch": 3.34, + "learning_rate": 8.928409073095794e-06, + "loss": 0.0628, + "step": 71660 + }, + { + "epoch": 3.34, + "learning_rate": 8.927625288041008e-06, + "loss": 0.0651, + "step": 71665 + }, + { + "epoch": 3.34, + "learning_rate": 8.926841502986222e-06, + "loss": 0.0243, + "step": 71670 + }, + { + "epoch": 3.34, + "learning_rate": 8.926057717931436e-06, + "loss": 0.113, + "step": 71675 + }, + { + "epoch": 3.34, + "learning_rate": 8.925273932876648e-06, + "loss": 0.1623, + "step": 71680 + }, + { + "epoch": 3.34, + "learning_rate": 8.924490147821862e-06, + "loss": 0.1306, + "step": 71685 + }, + { + "epoch": 3.35, + "learning_rate": 8.923706362767075e-06, + "loss": 0.2359, + "step": 71690 + }, + { + "epoch": 3.35, + "learning_rate": 8.922922577712288e-06, + "loss": 0.2171, + "step": 71695 + }, + { + "epoch": 3.35, + "learning_rate": 8.922138792657502e-06, + "loss": 0.1319, + "step": 71700 + }, + { + "epoch": 3.35, + "learning_rate": 8.921355007602716e-06, + "loss": 0.0114, + "step": 71705 + }, + { + "epoch": 3.35, + "learning_rate": 8.920571222547928e-06, + "loss": 0.0387, + "step": 71710 + }, + { + "epoch": 3.35, + "learning_rate": 8.919787437493142e-06, + "loss": 0.0901, + "step": 71715 + }, + { + "epoch": 3.35, + "learning_rate": 8.919003652438356e-06, + "loss": 0.0646, + "step": 71720 + }, + { + "epoch": 3.35, + "learning_rate": 8.91821986738357e-06, + "loss": 0.0286, + "step": 71725 + }, + { + "epoch": 3.35, + "learning_rate": 8.917436082328782e-06, + "loss": 0.1043, + "step": 71730 + }, + { + "epoch": 3.35, + "learning_rate": 8.916652297273996e-06, + "loss": 0.1484, + "step": 71735 + }, + { + "epoch": 3.35, + "learning_rate": 8.91586851221921e-06, + "loss": 0.1106, + "step": 71740 + }, + { + "epoch": 3.35, + "learning_rate": 8.915084727164424e-06, + "loss": 0.2136, + "step": 71745 + }, + { + "epoch": 3.35, + "learning_rate": 8.914300942109636e-06, + "loss": 0.0145, + "step": 71750 + }, + { + "epoch": 3.35, + "learning_rate": 8.91351715705485e-06, + "loss": 0.0284, + "step": 71755 + }, + { + "epoch": 3.35, + "learning_rate": 8.912733372000062e-06, + "loss": 0.0206, + "step": 71760 + }, + { + "epoch": 3.35, + "learning_rate": 8.911949586945276e-06, + "loss": 0.0796, + "step": 71765 + }, + { + "epoch": 3.35, + "learning_rate": 8.91116580189049e-06, + "loss": 0.0947, + "step": 71770 + }, + { + "epoch": 3.35, + "learning_rate": 8.910382016835704e-06, + "loss": 0.0731, + "step": 71775 + }, + { + "epoch": 3.35, + "learning_rate": 8.909598231780916e-06, + "loss": 0.1135, + "step": 71780 + }, + { + "epoch": 3.35, + "learning_rate": 8.90881444672613e-06, + "loss": 0.1274, + "step": 71785 + }, + { + "epoch": 3.35, + "learning_rate": 8.908030661671344e-06, + "loss": 0.2114, + "step": 71790 + }, + { + "epoch": 3.35, + "learning_rate": 8.907246876616558e-06, + "loss": 0.29, + "step": 71795 + }, + { + "epoch": 3.35, + "learning_rate": 8.906463091561772e-06, + "loss": 0.1027, + "step": 71800 + }, + { + "epoch": 3.35, + "learning_rate": 8.905679306506984e-06, + "loss": 0.0313, + "step": 71805 + }, + { + "epoch": 3.35, + "learning_rate": 8.904895521452198e-06, + "loss": 0.057, + "step": 71810 + }, + { + "epoch": 3.35, + "learning_rate": 8.90411173639741e-06, + "loss": 0.039, + "step": 71815 + }, + { + "epoch": 3.35, + "learning_rate": 8.903327951342624e-06, + "loss": 0.0554, + "step": 71820 + }, + { + "epoch": 3.35, + "learning_rate": 8.902544166287838e-06, + "loss": 0.0639, + "step": 71825 + }, + { + "epoch": 3.35, + "learning_rate": 8.90176038123305e-06, + "loss": 0.1176, + "step": 71830 + }, + { + "epoch": 3.35, + "learning_rate": 8.900976596178264e-06, + "loss": 0.0734, + "step": 71835 + }, + { + "epoch": 3.35, + "learning_rate": 8.900192811123478e-06, + "loss": 0.2307, + "step": 71840 + }, + { + "epoch": 3.35, + "learning_rate": 8.899409026068692e-06, + "loss": 0.3026, + "step": 71845 + }, + { + "epoch": 3.35, + "learning_rate": 8.898625241013906e-06, + "loss": 0.0709, + "step": 71850 + }, + { + "epoch": 3.35, + "learning_rate": 8.897841455959118e-06, + "loss": 0.0339, + "step": 71855 + }, + { + "epoch": 3.35, + "learning_rate": 8.897057670904332e-06, + "loss": 0.0831, + "step": 71860 + }, + { + "epoch": 3.35, + "learning_rate": 8.896273885849546e-06, + "loss": 0.0316, + "step": 71865 + }, + { + "epoch": 3.35, + "learning_rate": 8.89549010079476e-06, + "loss": 0.103, + "step": 71870 + }, + { + "epoch": 3.35, + "learning_rate": 8.894706315739972e-06, + "loss": 0.0875, + "step": 71875 + }, + { + "epoch": 3.35, + "learning_rate": 8.893922530685184e-06, + "loss": 0.0532, + "step": 71880 + }, + { + "epoch": 3.35, + "learning_rate": 8.893138745630398e-06, + "loss": 0.1521, + "step": 71885 + }, + { + "epoch": 3.35, + "learning_rate": 8.892354960575612e-06, + "loss": 0.1833, + "step": 71890 + }, + { + "epoch": 3.35, + "learning_rate": 8.891571175520826e-06, + "loss": 0.4676, + "step": 71895 + }, + { + "epoch": 3.35, + "learning_rate": 8.89078739046604e-06, + "loss": 0.0905, + "step": 71900 + }, + { + "epoch": 3.36, + "learning_rate": 8.890003605411252e-06, + "loss": 0.0352, + "step": 71905 + }, + { + "epoch": 3.36, + "learning_rate": 8.889219820356466e-06, + "loss": 0.0343, + "step": 71910 + }, + { + "epoch": 3.36, + "learning_rate": 8.88843603530168e-06, + "loss": 0.0217, + "step": 71915 + }, + { + "epoch": 3.36, + "learning_rate": 8.887652250246894e-06, + "loss": 0.0845, + "step": 71920 + }, + { + "epoch": 3.36, + "learning_rate": 8.886868465192106e-06, + "loss": 0.0914, + "step": 71925 + }, + { + "epoch": 3.36, + "learning_rate": 8.88608468013732e-06, + "loss": 0.0889, + "step": 71930 + }, + { + "epoch": 3.36, + "learning_rate": 8.885300895082534e-06, + "loss": 0.0855, + "step": 71935 + }, + { + "epoch": 3.36, + "learning_rate": 8.884517110027748e-06, + "loss": 0.2037, + "step": 71940 + }, + { + "epoch": 3.36, + "learning_rate": 8.88373332497296e-06, + "loss": 0.3553, + "step": 71945 + }, + { + "epoch": 3.36, + "learning_rate": 8.882949539918174e-06, + "loss": 0.0663, + "step": 71950 + }, + { + "epoch": 3.36, + "learning_rate": 8.882165754863386e-06, + "loss": 0.0432, + "step": 71955 + }, + { + "epoch": 3.36, + "learning_rate": 8.8813819698086e-06, + "loss": 0.0513, + "step": 71960 + }, + { + "epoch": 3.36, + "learning_rate": 8.880598184753814e-06, + "loss": 0.027, + "step": 71965 + }, + { + "epoch": 3.36, + "learning_rate": 8.879814399699028e-06, + "loss": 0.1287, + "step": 71970 + }, + { + "epoch": 3.36, + "learning_rate": 8.87903061464424e-06, + "loss": 0.0541, + "step": 71975 + }, + { + "epoch": 3.36, + "learning_rate": 8.878246829589454e-06, + "loss": 0.0934, + "step": 71980 + }, + { + "epoch": 3.36, + "learning_rate": 8.877463044534668e-06, + "loss": 0.0739, + "step": 71985 + }, + { + "epoch": 3.36, + "learning_rate": 8.876679259479882e-06, + "loss": 0.1659, + "step": 71990 + }, + { + "epoch": 3.36, + "learning_rate": 8.875895474425094e-06, + "loss": 0.2925, + "step": 71995 + }, + { + "epoch": 3.36, + "learning_rate": 8.875111689370308e-06, + "loss": 0.0761, + "step": 72000 + }, + { + "epoch": 3.36, + "learning_rate": 8.874327904315522e-06, + "loss": 0.0252, + "step": 72005 + }, + { + "epoch": 3.36, + "learning_rate": 8.873544119260734e-06, + "loss": 0.0426, + "step": 72010 + }, + { + "epoch": 3.36, + "learning_rate": 8.872760334205948e-06, + "loss": 0.0238, + "step": 72015 + }, + { + "epoch": 3.36, + "learning_rate": 8.871976549151162e-06, + "loss": 0.0345, + "step": 72020 + }, + { + "epoch": 3.36, + "learning_rate": 8.871192764096374e-06, + "loss": 0.0779, + "step": 72025 + }, + { + "epoch": 3.36, + "learning_rate": 8.870408979041588e-06, + "loss": 0.0438, + "step": 72030 + }, + { + "epoch": 3.36, + "learning_rate": 8.869625193986802e-06, + "loss": 0.1802, + "step": 72035 + }, + { + "epoch": 3.36, + "learning_rate": 8.868841408932016e-06, + "loss": 0.1752, + "step": 72040 + }, + { + "epoch": 3.36, + "learning_rate": 8.868057623877228e-06, + "loss": 0.2804, + "step": 72045 + }, + { + "epoch": 3.36, + "learning_rate": 8.867273838822442e-06, + "loss": 0.0499, + "step": 72050 + }, + { + "epoch": 3.36, + "learning_rate": 8.866490053767656e-06, + "loss": 0.0281, + "step": 72055 + }, + { + "epoch": 3.36, + "learning_rate": 8.86570626871287e-06, + "loss": 0.0557, + "step": 72060 + }, + { + "epoch": 3.36, + "learning_rate": 8.864922483658084e-06, + "loss": 0.0986, + "step": 72065 + }, + { + "epoch": 3.36, + "learning_rate": 8.864138698603296e-06, + "loss": 0.0801, + "step": 72070 + }, + { + "epoch": 3.36, + "learning_rate": 8.863354913548508e-06, + "loss": 0.1067, + "step": 72075 + }, + { + "epoch": 3.36, + "learning_rate": 8.862571128493722e-06, + "loss": 0.069, + "step": 72080 + }, + { + "epoch": 3.36, + "learning_rate": 8.861787343438936e-06, + "loss": 0.1912, + "step": 72085 + }, + { + "epoch": 3.36, + "learning_rate": 8.86100355838415e-06, + "loss": 0.2374, + "step": 72090 + }, + { + "epoch": 3.36, + "learning_rate": 8.860219773329362e-06, + "loss": 0.2909, + "step": 72095 + }, + { + "epoch": 3.36, + "learning_rate": 8.859435988274576e-06, + "loss": 0.0656, + "step": 72100 + }, + { + "epoch": 3.36, + "learning_rate": 8.85865220321979e-06, + "loss": 0.0112, + "step": 72105 + }, + { + "epoch": 3.36, + "learning_rate": 8.857868418165004e-06, + "loss": 0.0553, + "step": 72110 + }, + { + "epoch": 3.36, + "learning_rate": 8.857084633110218e-06, + "loss": 0.045, + "step": 72115 + }, + { + "epoch": 3.37, + "learning_rate": 8.85630084805543e-06, + "loss": 0.1336, + "step": 72120 + }, + { + "epoch": 3.37, + "learning_rate": 8.855517063000644e-06, + "loss": 0.1027, + "step": 72125 + }, + { + "epoch": 3.37, + "learning_rate": 8.854733277945858e-06, + "loss": 0.1755, + "step": 72130 + }, + { + "epoch": 3.37, + "learning_rate": 8.853949492891071e-06, + "loss": 0.0975, + "step": 72135 + }, + { + "epoch": 3.37, + "learning_rate": 8.853165707836284e-06, + "loss": 0.219, + "step": 72140 + }, + { + "epoch": 3.37, + "learning_rate": 8.852381922781496e-06, + "loss": 0.2413, + "step": 72145 + }, + { + "epoch": 3.37, + "learning_rate": 8.85159813772671e-06, + "loss": 0.1244, + "step": 72150 + }, + { + "epoch": 3.37, + "learning_rate": 8.850814352671924e-06, + "loss": 0.0359, + "step": 72155 + }, + { + "epoch": 3.37, + "learning_rate": 8.850030567617138e-06, + "loss": 0.0713, + "step": 72160 + }, + { + "epoch": 3.37, + "learning_rate": 8.849246782562352e-06, + "loss": 0.0794, + "step": 72165 + }, + { + "epoch": 3.37, + "learning_rate": 8.848462997507564e-06, + "loss": 0.0946, + "step": 72170 + }, + { + "epoch": 3.37, + "learning_rate": 8.847679212452778e-06, + "loss": 0.0688, + "step": 72175 + }, + { + "epoch": 3.37, + "learning_rate": 8.846895427397992e-06, + "loss": 0.1037, + "step": 72180 + }, + { + "epoch": 3.37, + "learning_rate": 8.846111642343205e-06, + "loss": 0.1237, + "step": 72185 + }, + { + "epoch": 3.37, + "learning_rate": 8.845327857288418e-06, + "loss": 0.1358, + "step": 72190 + }, + { + "epoch": 3.37, + "learning_rate": 8.844544072233632e-06, + "loss": 0.5546, + "step": 72195 + }, + { + "epoch": 3.37, + "learning_rate": 8.843760287178845e-06, + "loss": 0.0636, + "step": 72200 + }, + { + "epoch": 3.37, + "learning_rate": 8.842976502124058e-06, + "loss": 0.0212, + "step": 72205 + }, + { + "epoch": 3.37, + "learning_rate": 8.842192717069272e-06, + "loss": 0.0511, + "step": 72210 + }, + { + "epoch": 3.37, + "learning_rate": 8.841408932014485e-06, + "loss": 0.0671, + "step": 72215 + }, + { + "epoch": 3.37, + "learning_rate": 8.840625146959698e-06, + "loss": 0.1021, + "step": 72220 + }, + { + "epoch": 3.37, + "learning_rate": 8.839841361904912e-06, + "loss": 0.1161, + "step": 72225 + }, + { + "epoch": 3.37, + "learning_rate": 8.839057576850126e-06, + "loss": 0.1356, + "step": 72230 + }, + { + "epoch": 3.37, + "learning_rate": 8.83827379179534e-06, + "loss": 0.2081, + "step": 72235 + }, + { + "epoch": 3.37, + "learning_rate": 8.837490006740552e-06, + "loss": 0.1651, + "step": 72240 + }, + { + "epoch": 3.37, + "learning_rate": 8.836706221685766e-06, + "loss": 0.1851, + "step": 72245 + }, + { + "epoch": 3.37, + "learning_rate": 8.83592243663098e-06, + "loss": 0.0708, + "step": 72250 + }, + { + "epoch": 3.37, + "learning_rate": 8.835138651576193e-06, + "loss": 0.007, + "step": 72255 + }, + { + "epoch": 3.37, + "learning_rate": 8.834354866521406e-06, + "loss": 0.0373, + "step": 72260 + }, + { + "epoch": 3.37, + "learning_rate": 8.83357108146662e-06, + "loss": 0.0591, + "step": 72265 + }, + { + "epoch": 3.37, + "learning_rate": 8.832787296411832e-06, + "loss": 0.0865, + "step": 72270 + }, + { + "epoch": 3.37, + "learning_rate": 8.832003511357046e-06, + "loss": 0.0524, + "step": 72275 + }, + { + "epoch": 3.37, + "learning_rate": 8.83121972630226e-06, + "loss": 0.1104, + "step": 72280 + }, + { + "epoch": 3.37, + "learning_rate": 8.830435941247473e-06, + "loss": 0.1238, + "step": 72285 + }, + { + "epoch": 3.37, + "learning_rate": 8.829652156192686e-06, + "loss": 0.1583, + "step": 72290 + }, + { + "epoch": 3.37, + "learning_rate": 8.8288683711379e-06, + "loss": 0.1681, + "step": 72295 + }, + { + "epoch": 3.37, + "learning_rate": 8.828084586083113e-06, + "loss": 0.1097, + "step": 72300 + }, + { + "epoch": 3.37, + "learning_rate": 8.827300801028327e-06, + "loss": 0.0426, + "step": 72305 + }, + { + "epoch": 3.37, + "learning_rate": 8.82651701597354e-06, + "loss": 0.0368, + "step": 72310 + }, + { + "epoch": 3.37, + "learning_rate": 8.825733230918753e-06, + "loss": 0.0631, + "step": 72315 + }, + { + "epoch": 3.37, + "learning_rate": 8.824949445863967e-06, + "loss": 0.0818, + "step": 72320 + }, + { + "epoch": 3.37, + "learning_rate": 8.824165660809181e-06, + "loss": 0.0878, + "step": 72325 + }, + { + "epoch": 3.38, + "learning_rate": 8.823381875754393e-06, + "loss": 0.1477, + "step": 72330 + }, + { + "epoch": 3.38, + "learning_rate": 8.822598090699607e-06, + "loss": 0.2534, + "step": 72335 + }, + { + "epoch": 3.38, + "learning_rate": 8.82181430564482e-06, + "loss": 0.2975, + "step": 72340 + }, + { + "epoch": 3.38, + "learning_rate": 8.821030520590033e-06, + "loss": 0.2963, + "step": 72345 + }, + { + "epoch": 3.38, + "learning_rate": 8.820246735535247e-06, + "loss": 0.0708, + "step": 72350 + }, + { + "epoch": 3.38, + "learning_rate": 8.819462950480461e-06, + "loss": 0.0599, + "step": 72355 + }, + { + "epoch": 3.38, + "learning_rate": 8.818679165425674e-06, + "loss": 0.0268, + "step": 72360 + }, + { + "epoch": 3.38, + "learning_rate": 8.817895380370887e-06, + "loss": 0.0357, + "step": 72365 + }, + { + "epoch": 3.38, + "learning_rate": 8.817111595316101e-06, + "loss": 0.1084, + "step": 72370 + }, + { + "epoch": 3.38, + "learning_rate": 8.816327810261315e-06, + "loss": 0.1211, + "step": 72375 + }, + { + "epoch": 3.38, + "learning_rate": 8.815544025206529e-06, + "loss": 0.1424, + "step": 72380 + }, + { + "epoch": 3.38, + "learning_rate": 8.814760240151741e-06, + "loss": 0.1438, + "step": 72385 + }, + { + "epoch": 3.38, + "learning_rate": 8.813976455096955e-06, + "loss": 0.2297, + "step": 72390 + }, + { + "epoch": 3.38, + "learning_rate": 8.81319267004217e-06, + "loss": 0.2569, + "step": 72395 + }, + { + "epoch": 3.38, + "learning_rate": 8.812408884987381e-06, + "loss": 0.0635, + "step": 72400 + }, + { + "epoch": 3.38, + "learning_rate": 8.811625099932595e-06, + "loss": 0.0358, + "step": 72405 + }, + { + "epoch": 3.38, + "learning_rate": 8.810841314877807e-06, + "loss": 0.033, + "step": 72410 + }, + { + "epoch": 3.38, + "learning_rate": 8.810057529823021e-06, + "loss": 0.0559, + "step": 72415 + }, + { + "epoch": 3.38, + "learning_rate": 8.809273744768235e-06, + "loss": 0.0961, + "step": 72420 + }, + { + "epoch": 3.38, + "learning_rate": 8.80848995971345e-06, + "loss": 0.133, + "step": 72425 + }, + { + "epoch": 3.38, + "learning_rate": 8.807706174658663e-06, + "loss": 0.1051, + "step": 72430 + }, + { + "epoch": 3.38, + "learning_rate": 8.806922389603875e-06, + "loss": 0.1934, + "step": 72435 + }, + { + "epoch": 3.38, + "learning_rate": 8.80613860454909e-06, + "loss": 0.1838, + "step": 72440 + }, + { + "epoch": 3.38, + "learning_rate": 8.805354819494303e-06, + "loss": 0.2467, + "step": 72445 + }, + { + "epoch": 3.38, + "learning_rate": 8.804571034439517e-06, + "loss": 0.064, + "step": 72450 + }, + { + "epoch": 3.38, + "learning_rate": 8.80378724938473e-06, + "loss": 0.0198, + "step": 72455 + }, + { + "epoch": 3.38, + "learning_rate": 8.803003464329943e-06, + "loss": 0.0427, + "step": 72460 + }, + { + "epoch": 3.38, + "learning_rate": 8.802219679275155e-06, + "loss": 0.05, + "step": 72465 + }, + { + "epoch": 3.38, + "learning_rate": 8.80143589422037e-06, + "loss": 0.1142, + "step": 72470 + }, + { + "epoch": 3.38, + "learning_rate": 8.800652109165583e-06, + "loss": 0.0693, + "step": 72475 + }, + { + "epoch": 3.38, + "learning_rate": 8.799868324110797e-06, + "loss": 0.0562, + "step": 72480 + }, + { + "epoch": 3.38, + "learning_rate": 8.79908453905601e-06, + "loss": 0.1657, + "step": 72485 + }, + { + "epoch": 3.38, + "learning_rate": 8.798300754001223e-06, + "loss": 0.2237, + "step": 72490 + }, + { + "epoch": 3.38, + "learning_rate": 8.797516968946437e-06, + "loss": 0.2782, + "step": 72495 + }, + { + "epoch": 3.38, + "learning_rate": 8.796733183891651e-06, + "loss": 0.037, + "step": 72500 + }, + { + "epoch": 3.38, + "learning_rate": 8.795949398836863e-06, + "loss": 0.0315, + "step": 72505 + }, + { + "epoch": 3.38, + "learning_rate": 8.795165613782077e-06, + "loss": 0.0494, + "step": 72510 + }, + { + "epoch": 3.38, + "learning_rate": 8.794381828727291e-06, + "loss": 0.0724, + "step": 72515 + }, + { + "epoch": 3.38, + "learning_rate": 8.793598043672505e-06, + "loss": 0.1049, + "step": 72520 + }, + { + "epoch": 3.38, + "learning_rate": 8.792814258617717e-06, + "loss": 0.0456, + "step": 72525 + }, + { + "epoch": 3.38, + "learning_rate": 8.792030473562931e-06, + "loss": 0.0621, + "step": 72530 + }, + { + "epoch": 3.38, + "learning_rate": 8.791246688508143e-06, + "loss": 0.0755, + "step": 72535 + }, + { + "epoch": 3.38, + "learning_rate": 8.790462903453357e-06, + "loss": 0.2267, + "step": 72540 + }, + { + "epoch": 3.39, + "learning_rate": 8.789679118398571e-06, + "loss": 0.4156, + "step": 72545 + }, + { + "epoch": 3.39, + "learning_rate": 8.788895333343785e-06, + "loss": 0.0726, + "step": 72550 + }, + { + "epoch": 3.39, + "learning_rate": 8.788111548288997e-06, + "loss": 0.0252, + "step": 72555 + }, + { + "epoch": 3.39, + "learning_rate": 8.787327763234211e-06, + "loss": 0.0365, + "step": 72560 + }, + { + "epoch": 3.39, + "learning_rate": 8.786543978179425e-06, + "loss": 0.0294, + "step": 72565 + }, + { + "epoch": 3.39, + "learning_rate": 8.785760193124639e-06, + "loss": 0.0894, + "step": 72570 + }, + { + "epoch": 3.39, + "learning_rate": 8.784976408069851e-06, + "loss": 0.1237, + "step": 72575 + }, + { + "epoch": 3.39, + "learning_rate": 8.784192623015065e-06, + "loss": 0.1305, + "step": 72580 + }, + { + "epoch": 3.39, + "learning_rate": 8.783408837960279e-06, + "loss": 0.2606, + "step": 72585 + }, + { + "epoch": 3.39, + "learning_rate": 8.782625052905493e-06, + "loss": 0.1614, + "step": 72590 + }, + { + "epoch": 3.39, + "learning_rate": 8.781841267850705e-06, + "loss": 0.4077, + "step": 72595 + }, + { + "epoch": 3.39, + "learning_rate": 8.781057482795919e-06, + "loss": 0.0702, + "step": 72600 + }, + { + "epoch": 3.39, + "learning_rate": 8.780273697741131e-06, + "loss": 0.1083, + "step": 72605 + }, + { + "epoch": 3.39, + "learning_rate": 8.779489912686345e-06, + "loss": 0.0806, + "step": 72610 + }, + { + "epoch": 3.39, + "learning_rate": 8.778706127631559e-06, + "loss": 0.07, + "step": 72615 + }, + { + "epoch": 3.39, + "learning_rate": 8.777922342576773e-06, + "loss": 0.0412, + "step": 72620 + }, + { + "epoch": 3.39, + "learning_rate": 8.777138557521985e-06, + "loss": 0.1552, + "step": 72625 + }, + { + "epoch": 3.39, + "learning_rate": 8.776354772467199e-06, + "loss": 0.1425, + "step": 72630 + }, + { + "epoch": 3.39, + "learning_rate": 8.775570987412413e-06, + "loss": 0.2181, + "step": 72635 + }, + { + "epoch": 3.39, + "learning_rate": 8.774787202357627e-06, + "loss": 0.1684, + "step": 72640 + }, + { + "epoch": 3.39, + "learning_rate": 8.77400341730284e-06, + "loss": 0.2213, + "step": 72645 + }, + { + "epoch": 3.39, + "learning_rate": 8.773219632248053e-06, + "loss": 0.1042, + "step": 72650 + }, + { + "epoch": 3.39, + "learning_rate": 8.772435847193267e-06, + "loss": 0.0489, + "step": 72655 + }, + { + "epoch": 3.39, + "learning_rate": 8.771652062138479e-06, + "loss": 0.0216, + "step": 72660 + }, + { + "epoch": 3.39, + "learning_rate": 8.770868277083693e-06, + "loss": 0.0553, + "step": 72665 + }, + { + "epoch": 3.39, + "learning_rate": 8.770084492028907e-06, + "loss": 0.1275, + "step": 72670 + }, + { + "epoch": 3.39, + "learning_rate": 8.769300706974119e-06, + "loss": 0.1648, + "step": 72675 + }, + { + "epoch": 3.39, + "learning_rate": 8.768516921919333e-06, + "loss": 0.0575, + "step": 72680 + }, + { + "epoch": 3.39, + "learning_rate": 8.767733136864547e-06, + "loss": 0.1371, + "step": 72685 + }, + { + "epoch": 3.39, + "learning_rate": 8.76694935180976e-06, + "loss": 0.21, + "step": 72690 + }, + { + "epoch": 3.39, + "learning_rate": 8.766165566754975e-06, + "loss": 0.3111, + "step": 72695 + }, + { + "epoch": 3.39, + "learning_rate": 8.765381781700187e-06, + "loss": 0.0959, + "step": 72700 + }, + { + "epoch": 3.39, + "learning_rate": 8.7645979966454e-06, + "loss": 0.0205, + "step": 72705 + }, + { + "epoch": 3.39, + "learning_rate": 8.763814211590615e-06, + "loss": 0.0431, + "step": 72710 + }, + { + "epoch": 3.39, + "learning_rate": 8.763030426535829e-06, + "loss": 0.0457, + "step": 72715 + }, + { + "epoch": 3.39, + "learning_rate": 8.762246641481041e-06, + "loss": 0.0813, + "step": 72720 + }, + { + "epoch": 3.39, + "learning_rate": 8.761462856426253e-06, + "loss": 0.057, + "step": 72725 + }, + { + "epoch": 3.39, + "learning_rate": 8.760679071371467e-06, + "loss": 0.1586, + "step": 72730 + }, + { + "epoch": 3.39, + "learning_rate": 8.759895286316681e-06, + "loss": 0.1567, + "step": 72735 + }, + { + "epoch": 3.39, + "learning_rate": 8.759111501261895e-06, + "loss": 0.1828, + "step": 72740 + }, + { + "epoch": 3.39, + "learning_rate": 8.758327716207109e-06, + "loss": 0.3974, + "step": 72745 + }, + { + "epoch": 3.39, + "learning_rate": 8.757543931152321e-06, + "loss": 0.0386, + "step": 72750 + }, + { + "epoch": 3.39, + "learning_rate": 8.756760146097535e-06, + "loss": 0.0502, + "step": 72755 + }, + { + "epoch": 3.4, + "learning_rate": 8.755976361042749e-06, + "loss": 0.0306, + "step": 72760 + }, + { + "epoch": 3.4, + "learning_rate": 8.755192575987963e-06, + "loss": 0.0475, + "step": 72765 + }, + { + "epoch": 3.4, + "learning_rate": 8.754408790933175e-06, + "loss": 0.0952, + "step": 72770 + }, + { + "epoch": 3.4, + "learning_rate": 8.753625005878389e-06, + "loss": 0.1277, + "step": 72775 + }, + { + "epoch": 3.4, + "learning_rate": 8.752841220823603e-06, + "loss": 0.0964, + "step": 72780 + }, + { + "epoch": 3.4, + "learning_rate": 8.752057435768817e-06, + "loss": 0.1072, + "step": 72785 + }, + { + "epoch": 3.4, + "learning_rate": 8.751273650714029e-06, + "loss": 0.2131, + "step": 72790 + }, + { + "epoch": 3.4, + "learning_rate": 8.750489865659243e-06, + "loss": 0.2558, + "step": 72795 + }, + { + "epoch": 3.4, + "learning_rate": 8.749706080604455e-06, + "loss": 0.0444, + "step": 72800 + }, + { + "epoch": 3.4, + "learning_rate": 8.748922295549669e-06, + "loss": 0.057, + "step": 72805 + }, + { + "epoch": 3.4, + "learning_rate": 8.748138510494883e-06, + "loss": 0.0217, + "step": 72810 + }, + { + "epoch": 3.4, + "learning_rate": 8.747354725440097e-06, + "loss": 0.0364, + "step": 72815 + }, + { + "epoch": 3.4, + "learning_rate": 8.746570940385309e-06, + "loss": 0.0689, + "step": 72820 + }, + { + "epoch": 3.4, + "learning_rate": 8.745787155330523e-06, + "loss": 0.1745, + "step": 72825 + }, + { + "epoch": 3.4, + "learning_rate": 8.745003370275737e-06, + "loss": 0.1426, + "step": 72830 + }, + { + "epoch": 3.4, + "learning_rate": 8.74421958522095e-06, + "loss": 0.2692, + "step": 72835 + }, + { + "epoch": 3.4, + "learning_rate": 8.743435800166163e-06, + "loss": 0.2706, + "step": 72840 + }, + { + "epoch": 3.4, + "learning_rate": 8.742652015111377e-06, + "loss": 0.1699, + "step": 72845 + }, + { + "epoch": 3.4, + "learning_rate": 8.74186823005659e-06, + "loss": 0.0315, + "step": 72850 + }, + { + "epoch": 3.4, + "learning_rate": 8.741084445001804e-06, + "loss": 0.0269, + "step": 72855 + }, + { + "epoch": 3.4, + "learning_rate": 8.740300659947017e-06, + "loss": 0.0178, + "step": 72860 + }, + { + "epoch": 3.4, + "learning_rate": 8.73951687489223e-06, + "loss": 0.0391, + "step": 72865 + }, + { + "epoch": 3.4, + "learning_rate": 8.738733089837443e-06, + "loss": 0.0862, + "step": 72870 + }, + { + "epoch": 3.4, + "learning_rate": 8.737949304782657e-06, + "loss": 0.0993, + "step": 72875 + }, + { + "epoch": 3.4, + "learning_rate": 8.73716551972787e-06, + "loss": 0.1759, + "step": 72880 + }, + { + "epoch": 3.4, + "learning_rate": 8.736381734673084e-06, + "loss": 0.1792, + "step": 72885 + }, + { + "epoch": 3.4, + "learning_rate": 8.735597949618297e-06, + "loss": 0.2136, + "step": 72890 + }, + { + "epoch": 3.4, + "learning_rate": 8.73481416456351e-06, + "loss": 0.1858, + "step": 72895 + }, + { + "epoch": 3.4, + "learning_rate": 8.734030379508725e-06, + "loss": 0.0937, + "step": 72900 + }, + { + "epoch": 3.4, + "learning_rate": 8.733246594453938e-06, + "loss": 0.0136, + "step": 72905 + }, + { + "epoch": 3.4, + "learning_rate": 8.73246280939915e-06, + "loss": 0.0673, + "step": 72910 + }, + { + "epoch": 3.4, + "learning_rate": 8.731679024344365e-06, + "loss": 0.0488, + "step": 72915 + }, + { + "epoch": 3.4, + "learning_rate": 8.730895239289578e-06, + "loss": 0.0564, + "step": 72920 + }, + { + "epoch": 3.4, + "learning_rate": 8.73011145423479e-06, + "loss": 0.158, + "step": 72925 + }, + { + "epoch": 3.4, + "learning_rate": 8.729327669180005e-06, + "loss": 0.1729, + "step": 72930 + }, + { + "epoch": 3.4, + "learning_rate": 8.728543884125218e-06, + "loss": 0.1008, + "step": 72935 + }, + { + "epoch": 3.4, + "learning_rate": 8.72776009907043e-06, + "loss": 0.1283, + "step": 72940 + }, + { + "epoch": 3.4, + "learning_rate": 8.726976314015645e-06, + "loss": 0.4005, + "step": 72945 + }, + { + "epoch": 3.4, + "learning_rate": 8.726192528960858e-06, + "loss": 0.0513, + "step": 72950 + }, + { + "epoch": 3.4, + "learning_rate": 8.725408743906072e-06, + "loss": 0.0246, + "step": 72955 + }, + { + "epoch": 3.4, + "learning_rate": 8.724624958851286e-06, + "loss": 0.0681, + "step": 72960 + }, + { + "epoch": 3.4, + "learning_rate": 8.723841173796499e-06, + "loss": 0.0764, + "step": 72965 + }, + { + "epoch": 3.4, + "learning_rate": 8.723057388741712e-06, + "loss": 0.0384, + "step": 72970 + }, + { + "epoch": 3.41, + "learning_rate": 8.722273603686926e-06, + "loss": 0.0427, + "step": 72975 + }, + { + "epoch": 3.41, + "learning_rate": 8.72148981863214e-06, + "loss": 0.1383, + "step": 72980 + }, + { + "epoch": 3.41, + "learning_rate": 8.720706033577352e-06, + "loss": 0.1134, + "step": 72985 + }, + { + "epoch": 3.41, + "learning_rate": 8.719922248522565e-06, + "loss": 0.1788, + "step": 72990 + }, + { + "epoch": 3.41, + "learning_rate": 8.719138463467779e-06, + "loss": 0.1881, + "step": 72995 + }, + { + "epoch": 3.41, + "learning_rate": 8.718354678412992e-06, + "loss": 0.0662, + "step": 73000 + }, + { + "epoch": 3.41, + "learning_rate": 8.717570893358206e-06, + "loss": 0.0656, + "step": 73005 + }, + { + "epoch": 3.41, + "learning_rate": 8.71678710830342e-06, + "loss": 0.091, + "step": 73010 + }, + { + "epoch": 3.41, + "learning_rate": 8.716003323248632e-06, + "loss": 0.0671, + "step": 73015 + }, + { + "epoch": 3.41, + "learning_rate": 8.715219538193846e-06, + "loss": 0.0417, + "step": 73020 + }, + { + "epoch": 3.41, + "learning_rate": 8.71443575313906e-06, + "loss": 0.1163, + "step": 73025 + }, + { + "epoch": 3.41, + "learning_rate": 8.713651968084274e-06, + "loss": 0.0666, + "step": 73030 + }, + { + "epoch": 3.41, + "learning_rate": 8.712868183029486e-06, + "loss": 0.128, + "step": 73035 + }, + { + "epoch": 3.41, + "learning_rate": 8.7120843979747e-06, + "loss": 0.1942, + "step": 73040 + }, + { + "epoch": 3.41, + "learning_rate": 8.711300612919914e-06, + "loss": 0.2439, + "step": 73045 + }, + { + "epoch": 3.41, + "learning_rate": 8.710516827865128e-06, + "loss": 0.067, + "step": 73050 + }, + { + "epoch": 3.41, + "learning_rate": 8.70973304281034e-06, + "loss": 0.0163, + "step": 73055 + }, + { + "epoch": 3.41, + "learning_rate": 8.708949257755554e-06, + "loss": 0.0617, + "step": 73060 + }, + { + "epoch": 3.41, + "learning_rate": 8.708165472700766e-06, + "loss": 0.0927, + "step": 73065 + }, + { + "epoch": 3.41, + "learning_rate": 8.70738168764598e-06, + "loss": 0.0931, + "step": 73070 + }, + { + "epoch": 3.41, + "learning_rate": 8.706597902591194e-06, + "loss": 0.0455, + "step": 73075 + }, + { + "epoch": 3.41, + "learning_rate": 8.705814117536408e-06, + "loss": 0.1762, + "step": 73080 + }, + { + "epoch": 3.41, + "learning_rate": 8.70503033248162e-06, + "loss": 0.1451, + "step": 73085 + }, + { + "epoch": 3.41, + "learning_rate": 8.704246547426834e-06, + "loss": 0.1864, + "step": 73090 + }, + { + "epoch": 3.41, + "learning_rate": 8.703462762372048e-06, + "loss": 0.2506, + "step": 73095 + }, + { + "epoch": 3.41, + "learning_rate": 8.702678977317262e-06, + "loss": 0.136, + "step": 73100 + }, + { + "epoch": 3.41, + "learning_rate": 8.701895192262474e-06, + "loss": 0.0409, + "step": 73105 + }, + { + "epoch": 3.41, + "learning_rate": 8.701111407207688e-06, + "loss": 0.0228, + "step": 73110 + }, + { + "epoch": 3.41, + "learning_rate": 8.700327622152902e-06, + "loss": 0.0804, + "step": 73115 + }, + { + "epoch": 3.41, + "learning_rate": 8.699543837098114e-06, + "loss": 0.0498, + "step": 73120 + }, + { + "epoch": 3.41, + "learning_rate": 8.698916809054285e-06, + "loss": 0.0897, + "step": 73125 + }, + { + "epoch": 3.41, + "learning_rate": 8.698133023999499e-06, + "loss": 0.0444, + "step": 73130 + }, + { + "epoch": 3.41, + "learning_rate": 8.697349238944713e-06, + "loss": 0.1355, + "step": 73135 + }, + { + "epoch": 3.41, + "learning_rate": 8.696565453889927e-06, + "loss": 0.1694, + "step": 73140 + }, + { + "epoch": 3.41, + "learning_rate": 8.695781668835139e-06, + "loss": 0.2544, + "step": 73145 + }, + { + "epoch": 3.41, + "learning_rate": 8.694997883780353e-06, + "loss": 0.0608, + "step": 73150 + }, + { + "epoch": 3.41, + "learning_rate": 8.694214098725567e-06, + "loss": 0.0813, + "step": 73155 + }, + { + "epoch": 3.41, + "learning_rate": 8.69343031367078e-06, + "loss": 0.049, + "step": 73160 + }, + { + "epoch": 3.41, + "learning_rate": 8.692646528615993e-06, + "loss": 0.0249, + "step": 73165 + }, + { + "epoch": 3.41, + "learning_rate": 8.691862743561207e-06, + "loss": 0.0771, + "step": 73170 + }, + { + "epoch": 3.41, + "learning_rate": 8.69107895850642e-06, + "loss": 0.1589, + "step": 73175 + }, + { + "epoch": 3.41, + "learning_rate": 8.690295173451634e-06, + "loss": 0.0805, + "step": 73180 + }, + { + "epoch": 3.41, + "learning_rate": 8.689511388396847e-06, + "loss": 0.1952, + "step": 73185 + }, + { + "epoch": 3.42, + "learning_rate": 8.68872760334206e-06, + "loss": 0.2552, + "step": 73190 + }, + { + "epoch": 3.42, + "learning_rate": 8.687943818287273e-06, + "loss": 0.2266, + "step": 73195 + }, + { + "epoch": 3.42, + "learning_rate": 8.687160033232487e-06, + "loss": 0.0267, + "step": 73200 + }, + { + "epoch": 3.42, + "learning_rate": 8.6863762481777e-06, + "loss": 0.0572, + "step": 73205 + }, + { + "epoch": 3.42, + "learning_rate": 8.685592463122914e-06, + "loss": 0.0195, + "step": 73210 + }, + { + "epoch": 3.42, + "learning_rate": 8.684808678068127e-06, + "loss": 0.1335, + "step": 73215 + }, + { + "epoch": 3.42, + "learning_rate": 8.68402489301334e-06, + "loss": 0.0398, + "step": 73220 + }, + { + "epoch": 3.42, + "learning_rate": 8.683241107958554e-06, + "loss": 0.2702, + "step": 73225 + }, + { + "epoch": 3.42, + "learning_rate": 8.682457322903768e-06, + "loss": 0.1329, + "step": 73230 + }, + { + "epoch": 3.42, + "learning_rate": 8.68167353784898e-06, + "loss": 0.1341, + "step": 73235 + }, + { + "epoch": 3.42, + "learning_rate": 8.680889752794194e-06, + "loss": 0.1694, + "step": 73240 + }, + { + "epoch": 3.42, + "learning_rate": 8.680105967739408e-06, + "loss": 0.3827, + "step": 73245 + }, + { + "epoch": 3.42, + "learning_rate": 8.67932218268462e-06, + "loss": 0.0979, + "step": 73250 + }, + { + "epoch": 3.42, + "learning_rate": 8.678538397629834e-06, + "loss": 0.0562, + "step": 73255 + }, + { + "epoch": 3.42, + "learning_rate": 8.677754612575048e-06, + "loss": 0.0399, + "step": 73260 + }, + { + "epoch": 3.42, + "learning_rate": 8.67697082752026e-06, + "loss": 0.0307, + "step": 73265 + }, + { + "epoch": 3.42, + "learning_rate": 8.676187042465475e-06, + "loss": 0.0571, + "step": 73270 + }, + { + "epoch": 3.42, + "learning_rate": 8.675403257410688e-06, + "loss": 0.0912, + "step": 73275 + }, + { + "epoch": 3.42, + "learning_rate": 8.674619472355902e-06, + "loss": 0.0733, + "step": 73280 + }, + { + "epoch": 3.42, + "learning_rate": 8.673835687301115e-06, + "loss": 0.0705, + "step": 73285 + }, + { + "epoch": 3.42, + "learning_rate": 8.673051902246328e-06, + "loss": 0.1932, + "step": 73290 + }, + { + "epoch": 3.42, + "learning_rate": 8.672268117191542e-06, + "loss": 0.2412, + "step": 73295 + }, + { + "epoch": 3.42, + "learning_rate": 8.671484332136756e-06, + "loss": 0.1248, + "step": 73300 + }, + { + "epoch": 3.42, + "learning_rate": 8.67070054708197e-06, + "loss": 0.0374, + "step": 73305 + }, + { + "epoch": 3.42, + "learning_rate": 8.669916762027182e-06, + "loss": 0.0487, + "step": 73310 + }, + { + "epoch": 3.42, + "learning_rate": 8.669132976972395e-06, + "loss": 0.087, + "step": 73315 + }, + { + "epoch": 3.42, + "learning_rate": 8.668349191917608e-06, + "loss": 0.0897, + "step": 73320 + }, + { + "epoch": 3.42, + "learning_rate": 8.667565406862822e-06, + "loss": 0.092, + "step": 73325 + }, + { + "epoch": 3.42, + "learning_rate": 8.666781621808036e-06, + "loss": 0.1125, + "step": 73330 + }, + { + "epoch": 3.42, + "learning_rate": 8.665997836753249e-06, + "loss": 0.1317, + "step": 73335 + }, + { + "epoch": 3.42, + "learning_rate": 8.665214051698462e-06, + "loss": 0.2335, + "step": 73340 + }, + { + "epoch": 3.42, + "learning_rate": 8.664430266643676e-06, + "loss": 0.2869, + "step": 73345 + }, + { + "epoch": 3.42, + "learning_rate": 8.66364648158889e-06, + "loss": 0.0941, + "step": 73350 + }, + { + "epoch": 3.42, + "learning_rate": 8.662862696534104e-06, + "loss": 0.0136, + "step": 73355 + }, + { + "epoch": 3.42, + "learning_rate": 8.662078911479316e-06, + "loss": 0.065, + "step": 73360 + }, + { + "epoch": 3.42, + "learning_rate": 8.66129512642453e-06, + "loss": 0.0897, + "step": 73365 + }, + { + "epoch": 3.42, + "learning_rate": 8.660511341369744e-06, + "loss": 0.0441, + "step": 73370 + }, + { + "epoch": 3.42, + "learning_rate": 8.659727556314958e-06, + "loss": 0.0534, + "step": 73375 + }, + { + "epoch": 3.42, + "learning_rate": 8.65894377126017e-06, + "loss": 0.147, + "step": 73380 + }, + { + "epoch": 3.42, + "learning_rate": 8.658159986205384e-06, + "loss": 0.0851, + "step": 73385 + }, + { + "epoch": 3.42, + "learning_rate": 8.657376201150596e-06, + "loss": 0.1515, + "step": 73390 + }, + { + "epoch": 3.42, + "learning_rate": 8.65659241609581e-06, + "loss": 0.272, + "step": 73395 + }, + { + "epoch": 3.42, + "learning_rate": 8.655808631041024e-06, + "loss": 0.0938, + "step": 73400 + }, + { + "epoch": 3.43, + "learning_rate": 8.655024845986238e-06, + "loss": 0.0485, + "step": 73405 + }, + { + "epoch": 3.43, + "learning_rate": 8.65424106093145e-06, + "loss": 0.0487, + "step": 73410 + }, + { + "epoch": 3.43, + "learning_rate": 8.653457275876664e-06, + "loss": 0.024, + "step": 73415 + }, + { + "epoch": 3.43, + "learning_rate": 8.652673490821878e-06, + "loss": 0.0974, + "step": 73420 + }, + { + "epoch": 3.43, + "learning_rate": 8.651889705767092e-06, + "loss": 0.111, + "step": 73425 + }, + { + "epoch": 3.43, + "learning_rate": 8.651105920712304e-06, + "loss": 0.088, + "step": 73430 + }, + { + "epoch": 3.43, + "learning_rate": 8.650322135657518e-06, + "loss": 0.1132, + "step": 73435 + }, + { + "epoch": 3.43, + "learning_rate": 8.649538350602732e-06, + "loss": 0.1547, + "step": 73440 + }, + { + "epoch": 3.43, + "learning_rate": 8.648754565547944e-06, + "loss": 0.1941, + "step": 73445 + }, + { + "epoch": 3.43, + "learning_rate": 8.647970780493158e-06, + "loss": 0.0833, + "step": 73450 + }, + { + "epoch": 3.43, + "learning_rate": 8.647186995438372e-06, + "loss": 0.028, + "step": 73455 + }, + { + "epoch": 3.43, + "learning_rate": 8.646403210383584e-06, + "loss": 0.0611, + "step": 73460 + }, + { + "epoch": 3.43, + "learning_rate": 8.645619425328798e-06, + "loss": 0.0603, + "step": 73465 + }, + { + "epoch": 3.43, + "learning_rate": 8.644835640274012e-06, + "loss": 0.0568, + "step": 73470 + }, + { + "epoch": 3.43, + "learning_rate": 8.644051855219226e-06, + "loss": 0.0533, + "step": 73475 + }, + { + "epoch": 3.43, + "learning_rate": 8.643268070164438e-06, + "loss": 0.0709, + "step": 73480 + }, + { + "epoch": 3.43, + "learning_rate": 8.642484285109652e-06, + "loss": 0.0942, + "step": 73485 + }, + { + "epoch": 3.43, + "learning_rate": 8.641700500054866e-06, + "loss": 0.1067, + "step": 73490 + }, + { + "epoch": 3.43, + "learning_rate": 8.64091671500008e-06, + "loss": 0.2943, + "step": 73495 + }, + { + "epoch": 3.43, + "learning_rate": 8.640132929945292e-06, + "loss": 0.0612, + "step": 73500 + }, + { + "epoch": 3.43, + "learning_rate": 8.639349144890506e-06, + "loss": 0.041, + "step": 73505 + }, + { + "epoch": 3.43, + "learning_rate": 8.638565359835718e-06, + "loss": 0.0527, + "step": 73510 + }, + { + "epoch": 3.43, + "learning_rate": 8.637781574780932e-06, + "loss": 0.0718, + "step": 73515 + }, + { + "epoch": 3.43, + "learning_rate": 8.636997789726146e-06, + "loss": 0.1534, + "step": 73520 + }, + { + "epoch": 3.43, + "learning_rate": 8.63621400467136e-06, + "loss": 0.0413, + "step": 73525 + }, + { + "epoch": 3.43, + "learning_rate": 8.635430219616572e-06, + "loss": 0.1293, + "step": 73530 + }, + { + "epoch": 3.43, + "learning_rate": 8.634646434561786e-06, + "loss": 0.0855, + "step": 73535 + }, + { + "epoch": 3.43, + "learning_rate": 8.633862649507e-06, + "loss": 0.2193, + "step": 73540 + }, + { + "epoch": 3.43, + "learning_rate": 8.633078864452214e-06, + "loss": 0.3822, + "step": 73545 + }, + { + "epoch": 3.43, + "learning_rate": 8.632295079397426e-06, + "loss": 0.0456, + "step": 73550 + }, + { + "epoch": 3.43, + "learning_rate": 8.63151129434264e-06, + "loss": 0.0161, + "step": 73555 + }, + { + "epoch": 3.43, + "learning_rate": 8.630727509287854e-06, + "loss": 0.054, + "step": 73560 + }, + { + "epoch": 3.43, + "learning_rate": 8.629943724233068e-06, + "loss": 0.1103, + "step": 73565 + }, + { + "epoch": 3.43, + "learning_rate": 8.629159939178282e-06, + "loss": 0.0462, + "step": 73570 + }, + { + "epoch": 3.43, + "learning_rate": 8.628376154123494e-06, + "loss": 0.1141, + "step": 73575 + }, + { + "epoch": 3.43, + "learning_rate": 8.627592369068706e-06, + "loss": 0.1126, + "step": 73580 + }, + { + "epoch": 3.43, + "learning_rate": 8.62680858401392e-06, + "loss": 0.0803, + "step": 73585 + }, + { + "epoch": 3.43, + "learning_rate": 8.626024798959134e-06, + "loss": 0.1266, + "step": 73590 + }, + { + "epoch": 3.43, + "learning_rate": 8.625241013904348e-06, + "loss": 0.2645, + "step": 73595 + }, + { + "epoch": 3.43, + "learning_rate": 8.62445722884956e-06, + "loss": 0.09, + "step": 73600 + }, + { + "epoch": 3.43, + "learning_rate": 8.623673443794774e-06, + "loss": 0.0246, + "step": 73605 + }, + { + "epoch": 3.43, + "learning_rate": 8.622889658739988e-06, + "loss": 0.0233, + "step": 73610 + }, + { + "epoch": 3.43, + "learning_rate": 8.622105873685202e-06, + "loss": 0.037, + "step": 73615 + }, + { + "epoch": 3.44, + "learning_rate": 8.621322088630416e-06, + "loss": 0.0779, + "step": 73620 + }, + { + "epoch": 3.44, + "learning_rate": 8.620538303575628e-06, + "loss": 0.0583, + "step": 73625 + }, + { + "epoch": 3.44, + "learning_rate": 8.619754518520842e-06, + "loss": 0.1562, + "step": 73630 + }, + { + "epoch": 3.44, + "learning_rate": 8.618970733466056e-06, + "loss": 0.084, + "step": 73635 + }, + { + "epoch": 3.44, + "learning_rate": 8.618186948411268e-06, + "loss": 0.2324, + "step": 73640 + }, + { + "epoch": 3.44, + "learning_rate": 8.617403163356482e-06, + "loss": 0.3043, + "step": 73645 + }, + { + "epoch": 3.44, + "learning_rate": 8.616619378301696e-06, + "loss": 0.1044, + "step": 73650 + }, + { + "epoch": 3.44, + "learning_rate": 8.615835593246908e-06, + "loss": 0.0041, + "step": 73655 + }, + { + "epoch": 3.44, + "learning_rate": 8.615051808192122e-06, + "loss": 0.0606, + "step": 73660 + }, + { + "epoch": 3.44, + "learning_rate": 8.614268023137336e-06, + "loss": 0.0305, + "step": 73665 + }, + { + "epoch": 3.44, + "learning_rate": 8.61348423808255e-06, + "loss": 0.1222, + "step": 73670 + }, + { + "epoch": 3.44, + "learning_rate": 8.612700453027762e-06, + "loss": 0.0978, + "step": 73675 + }, + { + "epoch": 3.44, + "learning_rate": 8.611916667972976e-06, + "loss": 0.105, + "step": 73680 + }, + { + "epoch": 3.44, + "learning_rate": 8.61113288291819e-06, + "loss": 0.0781, + "step": 73685 + }, + { + "epoch": 3.44, + "learning_rate": 8.610349097863404e-06, + "loss": 0.196, + "step": 73690 + }, + { + "epoch": 3.44, + "learning_rate": 8.609565312808616e-06, + "loss": 0.2442, + "step": 73695 + }, + { + "epoch": 3.44, + "learning_rate": 8.60878152775383e-06, + "loss": 0.105, + "step": 73700 + }, + { + "epoch": 3.44, + "learning_rate": 8.607997742699042e-06, + "loss": 0.0184, + "step": 73705 + }, + { + "epoch": 3.44, + "learning_rate": 8.607213957644256e-06, + "loss": 0.0643, + "step": 73710 + }, + { + "epoch": 3.44, + "learning_rate": 8.60643017258947e-06, + "loss": 0.0301, + "step": 73715 + }, + { + "epoch": 3.44, + "learning_rate": 8.605646387534684e-06, + "loss": 0.0908, + "step": 73720 + }, + { + "epoch": 3.44, + "learning_rate": 8.604862602479896e-06, + "loss": 0.057, + "step": 73725 + }, + { + "epoch": 3.44, + "learning_rate": 8.60407881742511e-06, + "loss": 0.0879, + "step": 73730 + }, + { + "epoch": 3.44, + "learning_rate": 8.603295032370324e-06, + "loss": 0.1638, + "step": 73735 + }, + { + "epoch": 3.44, + "learning_rate": 8.602511247315538e-06, + "loss": 0.1459, + "step": 73740 + }, + { + "epoch": 3.44, + "learning_rate": 8.60172746226075e-06, + "loss": 0.3662, + "step": 73745 + }, + { + "epoch": 3.44, + "learning_rate": 8.600943677205964e-06, + "loss": 0.1232, + "step": 73750 + }, + { + "epoch": 3.44, + "learning_rate": 8.600159892151178e-06, + "loss": 0.0439, + "step": 73755 + }, + { + "epoch": 3.44, + "learning_rate": 8.599376107096392e-06, + "loss": 0.0612, + "step": 73760 + }, + { + "epoch": 3.44, + "learning_rate": 8.598592322041604e-06, + "loss": 0.0793, + "step": 73765 + }, + { + "epoch": 3.44, + "learning_rate": 8.597808536986818e-06, + "loss": 0.1031, + "step": 73770 + }, + { + "epoch": 3.44, + "learning_rate": 8.59702475193203e-06, + "loss": 0.1145, + "step": 73775 + }, + { + "epoch": 3.44, + "learning_rate": 8.596240966877244e-06, + "loss": 0.0756, + "step": 73780 + }, + { + "epoch": 3.44, + "learning_rate": 8.595457181822458e-06, + "loss": 0.1199, + "step": 73785 + }, + { + "epoch": 3.44, + "learning_rate": 8.594673396767672e-06, + "loss": 0.1422, + "step": 73790 + }, + { + "epoch": 3.44, + "learning_rate": 8.593889611712884e-06, + "loss": 0.2619, + "step": 73795 + }, + { + "epoch": 3.44, + "learning_rate": 8.593105826658098e-06, + "loss": 0.0832, + "step": 73800 + }, + { + "epoch": 3.44, + "learning_rate": 8.592322041603312e-06, + "loss": 0.0297, + "step": 73805 + }, + { + "epoch": 3.44, + "learning_rate": 8.591538256548526e-06, + "loss": 0.0311, + "step": 73810 + }, + { + "epoch": 3.44, + "learning_rate": 8.590754471493738e-06, + "loss": 0.0356, + "step": 73815 + }, + { + "epoch": 3.44, + "learning_rate": 8.589970686438952e-06, + "loss": 0.1395, + "step": 73820 + }, + { + "epoch": 3.44, + "learning_rate": 8.589186901384166e-06, + "loss": 0.321, + "step": 73825 + }, + { + "epoch": 3.45, + "learning_rate": 8.58840311632938e-06, + "loss": 0.1941, + "step": 73830 + }, + { + "epoch": 3.45, + "learning_rate": 8.587619331274592e-06, + "loss": 0.1362, + "step": 73835 + }, + { + "epoch": 3.45, + "learning_rate": 8.586835546219806e-06, + "loss": 0.1958, + "step": 73840 + }, + { + "epoch": 3.45, + "learning_rate": 8.586051761165018e-06, + "loss": 0.3383, + "step": 73845 + }, + { + "epoch": 3.45, + "learning_rate": 8.585267976110232e-06, + "loss": 0.0636, + "step": 73850 + }, + { + "epoch": 3.45, + "learning_rate": 8.584484191055446e-06, + "loss": 0.041, + "step": 73855 + }, + { + "epoch": 3.45, + "learning_rate": 8.58370040600066e-06, + "loss": 0.0478, + "step": 73860 + }, + { + "epoch": 3.45, + "learning_rate": 8.582916620945872e-06, + "loss": 0.0538, + "step": 73865 + }, + { + "epoch": 3.45, + "learning_rate": 8.582132835891086e-06, + "loss": 0.1034, + "step": 73870 + }, + { + "epoch": 3.45, + "learning_rate": 8.5813490508363e-06, + "loss": 0.1014, + "step": 73875 + }, + { + "epoch": 3.45, + "learning_rate": 8.580565265781513e-06, + "loss": 0.039, + "step": 73880 + }, + { + "epoch": 3.45, + "learning_rate": 8.579781480726727e-06, + "loss": 0.2059, + "step": 73885 + }, + { + "epoch": 3.45, + "learning_rate": 8.57899769567194e-06, + "loss": 0.2326, + "step": 73890 + }, + { + "epoch": 3.45, + "learning_rate": 8.578213910617153e-06, + "loss": 0.2216, + "step": 73895 + }, + { + "epoch": 3.45, + "learning_rate": 8.577430125562366e-06, + "loss": 0.0655, + "step": 73900 + }, + { + "epoch": 3.45, + "learning_rate": 8.57664634050758e-06, + "loss": 0.0306, + "step": 73905 + }, + { + "epoch": 3.45, + "learning_rate": 8.575862555452793e-06, + "loss": 0.041, + "step": 73910 + }, + { + "epoch": 3.45, + "learning_rate": 8.575078770398006e-06, + "loss": 0.0813, + "step": 73915 + }, + { + "epoch": 3.45, + "learning_rate": 8.57429498534322e-06, + "loss": 0.0851, + "step": 73920 + }, + { + "epoch": 3.45, + "learning_rate": 8.573511200288433e-06, + "loss": 0.0716, + "step": 73925 + }, + { + "epoch": 3.45, + "learning_rate": 8.572727415233647e-06, + "loss": 0.1528, + "step": 73930 + }, + { + "epoch": 3.45, + "learning_rate": 8.571943630178861e-06, + "loss": 0.0754, + "step": 73935 + }, + { + "epoch": 3.45, + "learning_rate": 8.571159845124074e-06, + "loss": 0.3233, + "step": 73940 + }, + { + "epoch": 3.45, + "learning_rate": 8.570376060069287e-06, + "loss": 0.2341, + "step": 73945 + }, + { + "epoch": 3.45, + "learning_rate": 8.569592275014501e-06, + "loss": 0.0771, + "step": 73950 + }, + { + "epoch": 3.45, + "learning_rate": 8.568808489959715e-06, + "loss": 0.0268, + "step": 73955 + }, + { + "epoch": 3.45, + "learning_rate": 8.568024704904927e-06, + "loss": 0.0653, + "step": 73960 + }, + { + "epoch": 3.45, + "learning_rate": 8.567240919850141e-06, + "loss": 0.0982, + "step": 73965 + }, + { + "epoch": 3.45, + "learning_rate": 8.566457134795354e-06, + "loss": 0.0533, + "step": 73970 + }, + { + "epoch": 3.45, + "learning_rate": 8.565673349740567e-06, + "loss": 0.0812, + "step": 73975 + }, + { + "epoch": 3.45, + "learning_rate": 8.564889564685781e-06, + "loss": 0.1737, + "step": 73980 + }, + { + "epoch": 3.45, + "learning_rate": 8.564105779630995e-06, + "loss": 0.1171, + "step": 73985 + }, + { + "epoch": 3.45, + "learning_rate": 8.563321994576207e-06, + "loss": 0.2456, + "step": 73990 + }, + { + "epoch": 3.45, + "learning_rate": 8.562538209521421e-06, + "loss": 0.2324, + "step": 73995 + }, + { + "epoch": 3.45, + "learning_rate": 8.561754424466635e-06, + "loss": 0.0594, + "step": 74000 + }, + { + "epoch": 3.45, + "learning_rate": 8.56097063941185e-06, + "loss": 0.0404, + "step": 74005 + }, + { + "epoch": 3.45, + "learning_rate": 8.560186854357061e-06, + "loss": 0.0326, + "step": 74010 + }, + { + "epoch": 3.45, + "learning_rate": 8.559403069302275e-06, + "loss": 0.0492, + "step": 74015 + }, + { + "epoch": 3.45, + "learning_rate": 8.55861928424749e-06, + "loss": 0.0784, + "step": 74020 + }, + { + "epoch": 3.45, + "learning_rate": 8.557835499192703e-06, + "loss": 0.1076, + "step": 74025 + }, + { + "epoch": 3.45, + "learning_rate": 8.557051714137915e-06, + "loss": 0.122, + "step": 74030 + }, + { + "epoch": 3.45, + "learning_rate": 8.55626792908313e-06, + "loss": 0.1381, + "step": 74035 + }, + { + "epoch": 3.45, + "learning_rate": 8.555484144028341e-06, + "loss": 0.1295, + "step": 74040 + }, + { + "epoch": 3.46, + "learning_rate": 8.554700358973555e-06, + "loss": 0.3292, + "step": 74045 + }, + { + "epoch": 3.46, + "learning_rate": 8.55391657391877e-06, + "loss": 0.0633, + "step": 74050 + }, + { + "epoch": 3.46, + "learning_rate": 8.553132788863983e-06, + "loss": 0.039, + "step": 74055 + }, + { + "epoch": 3.46, + "learning_rate": 8.552349003809195e-06, + "loss": 0.031, + "step": 74060 + }, + { + "epoch": 3.46, + "learning_rate": 8.55156521875441e-06, + "loss": 0.1121, + "step": 74065 + }, + { + "epoch": 3.46, + "learning_rate": 8.550781433699623e-06, + "loss": 0.0995, + "step": 74070 + }, + { + "epoch": 3.46, + "learning_rate": 8.549997648644837e-06, + "loss": 0.0764, + "step": 74075 + }, + { + "epoch": 3.46, + "learning_rate": 8.54921386359005e-06, + "loss": 0.1044, + "step": 74080 + }, + { + "epoch": 3.46, + "learning_rate": 8.548430078535263e-06, + "loss": 0.072, + "step": 74085 + }, + { + "epoch": 3.46, + "learning_rate": 8.547646293480477e-06, + "loss": 0.2137, + "step": 74090 + }, + { + "epoch": 3.46, + "learning_rate": 8.54686250842569e-06, + "loss": 0.2968, + "step": 74095 + }, + { + "epoch": 3.46, + "learning_rate": 8.546078723370903e-06, + "loss": 0.0581, + "step": 74100 + }, + { + "epoch": 3.46, + "learning_rate": 8.545294938316117e-06, + "loss": 0.0139, + "step": 74105 + }, + { + "epoch": 3.46, + "learning_rate": 8.54451115326133e-06, + "loss": 0.032, + "step": 74110 + }, + { + "epoch": 3.46, + "learning_rate": 8.543727368206543e-06, + "loss": 0.1237, + "step": 74115 + }, + { + "epoch": 3.46, + "learning_rate": 8.542943583151757e-06, + "loss": 0.0868, + "step": 74120 + }, + { + "epoch": 3.46, + "learning_rate": 8.542159798096971e-06, + "loss": 0.0434, + "step": 74125 + }, + { + "epoch": 3.46, + "learning_rate": 8.541376013042183e-06, + "loss": 0.0577, + "step": 74130 + }, + { + "epoch": 3.46, + "learning_rate": 8.540592227987397e-06, + "loss": 0.1186, + "step": 74135 + }, + { + "epoch": 3.46, + "learning_rate": 8.539808442932611e-06, + "loss": 0.2465, + "step": 74140 + }, + { + "epoch": 3.46, + "learning_rate": 8.539024657877825e-06, + "loss": 0.4115, + "step": 74145 + }, + { + "epoch": 3.46, + "learning_rate": 8.538240872823039e-06, + "loss": 0.0344, + "step": 74150 + }, + { + "epoch": 3.46, + "learning_rate": 8.537457087768251e-06, + "loss": 0.0311, + "step": 74155 + }, + { + "epoch": 3.46, + "learning_rate": 8.536673302713463e-06, + "loss": 0.0506, + "step": 74160 + }, + { + "epoch": 3.46, + "learning_rate": 8.535889517658677e-06, + "loss": 0.1081, + "step": 74165 + }, + { + "epoch": 3.46, + "learning_rate": 8.535105732603891e-06, + "loss": 0.0612, + "step": 74170 + }, + { + "epoch": 3.46, + "learning_rate": 8.534321947549105e-06, + "loss": 0.0644, + "step": 74175 + }, + { + "epoch": 3.46, + "learning_rate": 8.533538162494317e-06, + "loss": 0.0716, + "step": 74180 + }, + { + "epoch": 3.46, + "learning_rate": 8.532754377439531e-06, + "loss": 0.219, + "step": 74185 + }, + { + "epoch": 3.46, + "learning_rate": 8.531970592384745e-06, + "loss": 0.1413, + "step": 74190 + }, + { + "epoch": 3.46, + "learning_rate": 8.531186807329959e-06, + "loss": 0.2697, + "step": 74195 + }, + { + "epoch": 3.46, + "learning_rate": 8.530403022275173e-06, + "loss": 0.1026, + "step": 74200 + }, + { + "epoch": 3.46, + "learning_rate": 8.529619237220385e-06, + "loss": 0.1033, + "step": 74205 + }, + { + "epoch": 3.46, + "learning_rate": 8.528835452165599e-06, + "loss": 0.0327, + "step": 74210 + }, + { + "epoch": 3.46, + "learning_rate": 8.528051667110813e-06, + "loss": 0.0793, + "step": 74215 + }, + { + "epoch": 3.46, + "learning_rate": 8.527267882056027e-06, + "loss": 0.0608, + "step": 74220 + }, + { + "epoch": 3.46, + "learning_rate": 8.526484097001239e-06, + "loss": 0.1283, + "step": 74225 + }, + { + "epoch": 3.46, + "learning_rate": 8.525700311946453e-06, + "loss": 0.1363, + "step": 74230 + }, + { + "epoch": 3.46, + "learning_rate": 8.524916526891665e-06, + "loss": 0.1228, + "step": 74235 + }, + { + "epoch": 3.46, + "learning_rate": 8.524132741836879e-06, + "loss": 0.1047, + "step": 74240 + }, + { + "epoch": 3.46, + "learning_rate": 8.523348956782093e-06, + "loss": 0.3583, + "step": 74245 + }, + { + "epoch": 3.46, + "learning_rate": 8.522565171727307e-06, + "loss": 0.0644, + "step": 74250 + }, + { + "epoch": 3.46, + "learning_rate": 8.521781386672519e-06, + "loss": 0.0303, + "step": 74255 + }, + { + "epoch": 3.47, + "learning_rate": 8.520997601617733e-06, + "loss": 0.0464, + "step": 74260 + }, + { + "epoch": 3.47, + "learning_rate": 8.520213816562947e-06, + "loss": 0.0405, + "step": 74265 + }, + { + "epoch": 3.47, + "learning_rate": 8.51943003150816e-06, + "loss": 0.0766, + "step": 74270 + }, + { + "epoch": 3.47, + "learning_rate": 8.518646246453373e-06, + "loss": 0.0909, + "step": 74275 + }, + { + "epoch": 3.47, + "learning_rate": 8.517862461398587e-06, + "loss": 0.0987, + "step": 74280 + }, + { + "epoch": 3.47, + "learning_rate": 8.5170786763438e-06, + "loss": 0.0941, + "step": 74285 + }, + { + "epoch": 3.47, + "learning_rate": 8.516294891289013e-06, + "loss": 0.1221, + "step": 74290 + }, + { + "epoch": 3.47, + "learning_rate": 8.515511106234227e-06, + "loss": 0.2891, + "step": 74295 + }, + { + "epoch": 3.47, + "learning_rate": 8.51472732117944e-06, + "loss": 0.104, + "step": 74300 + }, + { + "epoch": 3.47, + "learning_rate": 8.513943536124653e-06, + "loss": 0.0496, + "step": 74305 + }, + { + "epoch": 3.47, + "learning_rate": 8.513159751069867e-06, + "loss": 0.0794, + "step": 74310 + }, + { + "epoch": 3.47, + "learning_rate": 8.512375966015081e-06, + "loss": 0.1103, + "step": 74315 + }, + { + "epoch": 3.47, + "learning_rate": 8.511592180960295e-06, + "loss": 0.0626, + "step": 74320 + }, + { + "epoch": 3.47, + "learning_rate": 8.510808395905507e-06, + "loss": 0.1357, + "step": 74325 + }, + { + "epoch": 3.47, + "learning_rate": 8.510024610850721e-06, + "loss": 0.1465, + "step": 74330 + }, + { + "epoch": 3.47, + "learning_rate": 8.509240825795935e-06, + "loss": 0.1323, + "step": 74335 + }, + { + "epoch": 3.47, + "learning_rate": 8.508457040741149e-06, + "loss": 0.2311, + "step": 74340 + }, + { + "epoch": 3.47, + "learning_rate": 8.507673255686361e-06, + "loss": 0.3059, + "step": 74345 + }, + { + "epoch": 3.47, + "learning_rate": 8.506889470631575e-06, + "loss": 0.0809, + "step": 74350 + }, + { + "epoch": 3.47, + "learning_rate": 8.506105685576787e-06, + "loss": 0.0233, + "step": 74355 + }, + { + "epoch": 3.47, + "learning_rate": 8.505321900522001e-06, + "loss": 0.0304, + "step": 74360 + }, + { + "epoch": 3.47, + "learning_rate": 8.504538115467215e-06, + "loss": 0.0697, + "step": 74365 + }, + { + "epoch": 3.47, + "learning_rate": 8.503754330412429e-06, + "loss": 0.1096, + "step": 74370 + }, + { + "epoch": 3.47, + "learning_rate": 8.502970545357641e-06, + "loss": 0.1051, + "step": 74375 + }, + { + "epoch": 3.47, + "learning_rate": 8.502186760302855e-06, + "loss": 0.0537, + "step": 74380 + }, + { + "epoch": 3.47, + "learning_rate": 8.501402975248069e-06, + "loss": 0.1299, + "step": 74385 + }, + { + "epoch": 3.47, + "learning_rate": 8.500619190193283e-06, + "loss": 0.2017, + "step": 74390 + }, + { + "epoch": 3.47, + "learning_rate": 8.499835405138495e-06, + "loss": 0.2505, + "step": 74395 + }, + { + "epoch": 3.47, + "learning_rate": 8.499051620083709e-06, + "loss": 0.0293, + "step": 74400 + }, + { + "epoch": 3.47, + "learning_rate": 8.498267835028923e-06, + "loss": 0.0581, + "step": 74405 + }, + { + "epoch": 3.47, + "learning_rate": 8.497484049974137e-06, + "loss": 0.0503, + "step": 74410 + }, + { + "epoch": 3.47, + "learning_rate": 8.49670026491935e-06, + "loss": 0.0547, + "step": 74415 + }, + { + "epoch": 3.47, + "learning_rate": 8.495916479864563e-06, + "loss": 0.0854, + "step": 74420 + }, + { + "epoch": 3.47, + "learning_rate": 8.495132694809775e-06, + "loss": 0.0962, + "step": 74425 + }, + { + "epoch": 3.47, + "learning_rate": 8.494348909754989e-06, + "loss": 0.0931, + "step": 74430 + }, + { + "epoch": 3.47, + "learning_rate": 8.493565124700203e-06, + "loss": 0.1318, + "step": 74435 + }, + { + "epoch": 3.47, + "learning_rate": 8.492781339645417e-06, + "loss": 0.223, + "step": 74440 + }, + { + "epoch": 3.47, + "learning_rate": 8.491997554590629e-06, + "loss": 0.2705, + "step": 74445 + }, + { + "epoch": 3.47, + "learning_rate": 8.491213769535843e-06, + "loss": 0.0941, + "step": 74450 + }, + { + "epoch": 3.47, + "learning_rate": 8.490429984481057e-06, + "loss": 0.0466, + "step": 74455 + }, + { + "epoch": 3.47, + "learning_rate": 8.48964619942627e-06, + "loss": 0.0315, + "step": 74460 + }, + { + "epoch": 3.47, + "learning_rate": 8.488862414371484e-06, + "loss": 0.044, + "step": 74465 + }, + { + "epoch": 3.47, + "learning_rate": 8.488078629316697e-06, + "loss": 0.1042, + "step": 74470 + }, + { + "epoch": 3.48, + "learning_rate": 8.48729484426191e-06, + "loss": 0.071, + "step": 74475 + }, + { + "epoch": 3.48, + "learning_rate": 8.486511059207125e-06, + "loss": 0.1504, + "step": 74480 + }, + { + "epoch": 3.48, + "learning_rate": 8.485727274152337e-06, + "loss": 0.1746, + "step": 74485 + }, + { + "epoch": 3.48, + "learning_rate": 8.48494348909755e-06, + "loss": 0.295, + "step": 74490 + }, + { + "epoch": 3.48, + "learning_rate": 8.484159704042763e-06, + "loss": 0.2217, + "step": 74495 + }, + { + "epoch": 3.48, + "learning_rate": 8.483375918987977e-06, + "loss": 0.0303, + "step": 74500 + }, + { + "epoch": 3.48, + "learning_rate": 8.48259213393319e-06, + "loss": 0.0239, + "step": 74505 + }, + { + "epoch": 3.48, + "learning_rate": 8.481808348878405e-06, + "loss": 0.0651, + "step": 74510 + }, + { + "epoch": 3.48, + "learning_rate": 8.481024563823618e-06, + "loss": 0.0711, + "step": 74515 + }, + { + "epoch": 3.48, + "learning_rate": 8.48024077876883e-06, + "loss": 0.162, + "step": 74520 + }, + { + "epoch": 3.48, + "learning_rate": 8.479456993714045e-06, + "loss": 0.0623, + "step": 74525 + }, + { + "epoch": 3.48, + "learning_rate": 8.478673208659258e-06, + "loss": 0.0458, + "step": 74530 + }, + { + "epoch": 3.48, + "learning_rate": 8.477889423604472e-06, + "loss": 0.0875, + "step": 74535 + }, + { + "epoch": 3.48, + "learning_rate": 8.477105638549685e-06, + "loss": 0.2352, + "step": 74540 + }, + { + "epoch": 3.48, + "learning_rate": 8.476321853494899e-06, + "loss": 0.2607, + "step": 74545 + }, + { + "epoch": 3.48, + "learning_rate": 8.47553806844011e-06, + "loss": 0.0826, + "step": 74550 + }, + { + "epoch": 3.48, + "learning_rate": 8.474754283385325e-06, + "loss": 0.0826, + "step": 74555 + }, + { + "epoch": 3.48, + "learning_rate": 8.473970498330539e-06, + "loss": 0.0637, + "step": 74560 + }, + { + "epoch": 3.48, + "learning_rate": 8.473186713275752e-06, + "loss": 0.053, + "step": 74565 + }, + { + "epoch": 3.48, + "learning_rate": 8.472402928220965e-06, + "loss": 0.064, + "step": 74570 + }, + { + "epoch": 3.48, + "learning_rate": 8.471619143166179e-06, + "loss": 0.0851, + "step": 74575 + }, + { + "epoch": 3.48, + "learning_rate": 8.470835358111392e-06, + "loss": 0.1457, + "step": 74580 + }, + { + "epoch": 3.48, + "learning_rate": 8.470051573056606e-06, + "loss": 0.1017, + "step": 74585 + }, + { + "epoch": 3.48, + "learning_rate": 8.469267788001819e-06, + "loss": 0.1879, + "step": 74590 + }, + { + "epoch": 3.48, + "learning_rate": 8.468484002947032e-06, + "loss": 0.3116, + "step": 74595 + }, + { + "epoch": 3.48, + "learning_rate": 8.467700217892246e-06, + "loss": 0.1218, + "step": 74600 + }, + { + "epoch": 3.48, + "learning_rate": 8.46691643283746e-06, + "loss": 0.0432, + "step": 74605 + }, + { + "epoch": 3.48, + "learning_rate": 8.466132647782673e-06, + "loss": 0.0217, + "step": 74610 + }, + { + "epoch": 3.48, + "learning_rate": 8.465348862727886e-06, + "loss": 0.0888, + "step": 74615 + }, + { + "epoch": 3.48, + "learning_rate": 8.464565077673099e-06, + "loss": 0.0544, + "step": 74620 + }, + { + "epoch": 3.48, + "learning_rate": 8.463781292618313e-06, + "loss": 0.0501, + "step": 74625 + }, + { + "epoch": 3.48, + "learning_rate": 8.462997507563526e-06, + "loss": 0.0863, + "step": 74630 + }, + { + "epoch": 3.48, + "learning_rate": 8.46221372250874e-06, + "loss": 0.2043, + "step": 74635 + }, + { + "epoch": 3.48, + "learning_rate": 8.461429937453953e-06, + "loss": 0.288, + "step": 74640 + }, + { + "epoch": 3.48, + "learning_rate": 8.460646152399166e-06, + "loss": 0.3324, + "step": 74645 + }, + { + "epoch": 3.48, + "learning_rate": 8.45986236734438e-06, + "loss": 0.0586, + "step": 74650 + }, + { + "epoch": 3.48, + "learning_rate": 8.459078582289594e-06, + "loss": 0.0707, + "step": 74655 + }, + { + "epoch": 3.48, + "learning_rate": 8.458294797234806e-06, + "loss": 0.0335, + "step": 74660 + }, + { + "epoch": 3.48, + "learning_rate": 8.45751101218002e-06, + "loss": 0.0967, + "step": 74665 + }, + { + "epoch": 3.48, + "learning_rate": 8.456727227125234e-06, + "loss": 0.0504, + "step": 74670 + }, + { + "epoch": 3.48, + "learning_rate": 8.455943442070448e-06, + "loss": 0.0912, + "step": 74675 + }, + { + "epoch": 3.48, + "learning_rate": 8.45515965701566e-06, + "loss": 0.1916, + "step": 74680 + }, + { + "epoch": 3.48, + "learning_rate": 8.454375871960874e-06, + "loss": 0.195, + "step": 74685 + }, + { + "epoch": 3.49, + "learning_rate": 8.453592086906087e-06, + "loss": 0.1557, + "step": 74690 + }, + { + "epoch": 3.49, + "learning_rate": 8.4528083018513e-06, + "loss": 0.3294, + "step": 74695 + }, + { + "epoch": 3.49, + "learning_rate": 8.452024516796514e-06, + "loss": 0.1187, + "step": 74700 + }, + { + "epoch": 3.49, + "learning_rate": 8.451240731741728e-06, + "loss": 0.0509, + "step": 74705 + }, + { + "epoch": 3.49, + "learning_rate": 8.45045694668694e-06, + "loss": 0.0239, + "step": 74710 + }, + { + "epoch": 3.49, + "learning_rate": 8.449673161632154e-06, + "loss": 0.0198, + "step": 74715 + }, + { + "epoch": 3.49, + "learning_rate": 8.448889376577368e-06, + "loss": 0.0729, + "step": 74720 + }, + { + "epoch": 3.49, + "learning_rate": 8.448105591522582e-06, + "loss": 0.0967, + "step": 74725 + }, + { + "epoch": 3.49, + "learning_rate": 8.447321806467796e-06, + "loss": 0.1026, + "step": 74730 + }, + { + "epoch": 3.49, + "learning_rate": 8.446538021413008e-06, + "loss": 0.1301, + "step": 74735 + }, + { + "epoch": 3.49, + "learning_rate": 8.445754236358222e-06, + "loss": 0.2965, + "step": 74740 + }, + { + "epoch": 3.49, + "learning_rate": 8.444970451303434e-06, + "loss": 0.3699, + "step": 74745 + }, + { + "epoch": 3.49, + "learning_rate": 8.444186666248648e-06, + "loss": 0.0774, + "step": 74750 + }, + { + "epoch": 3.49, + "learning_rate": 8.443402881193862e-06, + "loss": 0.036, + "step": 74755 + }, + { + "epoch": 3.49, + "learning_rate": 8.442619096139074e-06, + "loss": 0.0738, + "step": 74760 + }, + { + "epoch": 3.49, + "learning_rate": 8.441835311084288e-06, + "loss": 0.146, + "step": 74765 + }, + { + "epoch": 3.49, + "learning_rate": 8.441051526029502e-06, + "loss": 0.0819, + "step": 74770 + }, + { + "epoch": 3.49, + "learning_rate": 8.440267740974716e-06, + "loss": 0.0764, + "step": 74775 + }, + { + "epoch": 3.49, + "learning_rate": 8.43948395591993e-06, + "loss": 0.1826, + "step": 74780 + }, + { + "epoch": 3.49, + "learning_rate": 8.438700170865142e-06, + "loss": 0.1201, + "step": 74785 + }, + { + "epoch": 3.49, + "learning_rate": 8.437916385810356e-06, + "loss": 0.2108, + "step": 74790 + }, + { + "epoch": 3.49, + "learning_rate": 8.43713260075557e-06, + "loss": 0.3365, + "step": 74795 + }, + { + "epoch": 3.49, + "learning_rate": 8.436348815700784e-06, + "loss": 0.0217, + "step": 74800 + }, + { + "epoch": 3.49, + "learning_rate": 8.435565030645996e-06, + "loss": 0.0271, + "step": 74805 + }, + { + "epoch": 3.49, + "learning_rate": 8.434781245591208e-06, + "loss": 0.1088, + "step": 74810 + }, + { + "epoch": 3.49, + "learning_rate": 8.433997460536422e-06, + "loss": 0.0373, + "step": 74815 + }, + { + "epoch": 3.49, + "learning_rate": 8.433213675481636e-06, + "loss": 0.0418, + "step": 74820 + }, + { + "epoch": 3.49, + "learning_rate": 8.43242989042685e-06, + "loss": 0.0449, + "step": 74825 + }, + { + "epoch": 3.49, + "learning_rate": 8.431646105372064e-06, + "loss": 0.1189, + "step": 74830 + }, + { + "epoch": 3.49, + "learning_rate": 8.430862320317276e-06, + "loss": 0.2114, + "step": 74835 + }, + { + "epoch": 3.49, + "learning_rate": 8.43007853526249e-06, + "loss": 0.2039, + "step": 74840 + }, + { + "epoch": 3.49, + "learning_rate": 8.429294750207704e-06, + "loss": 0.2547, + "step": 74845 + }, + { + "epoch": 3.49, + "learning_rate": 8.428510965152918e-06, + "loss": 0.109, + "step": 74850 + }, + { + "epoch": 3.49, + "learning_rate": 8.42772718009813e-06, + "loss": 0.0629, + "step": 74855 + }, + { + "epoch": 3.49, + "learning_rate": 8.426943395043344e-06, + "loss": 0.0208, + "step": 74860 + }, + { + "epoch": 3.49, + "learning_rate": 8.426159609988558e-06, + "loss": 0.058, + "step": 74865 + }, + { + "epoch": 3.49, + "learning_rate": 8.425375824933772e-06, + "loss": 0.0206, + "step": 74870 + }, + { + "epoch": 3.49, + "learning_rate": 8.424592039878984e-06, + "loss": 0.0756, + "step": 74875 + }, + { + "epoch": 3.49, + "learning_rate": 8.423808254824198e-06, + "loss": 0.0402, + "step": 74880 + }, + { + "epoch": 3.49, + "learning_rate": 8.42302446976941e-06, + "loss": 0.1043, + "step": 74885 + }, + { + "epoch": 3.49, + "learning_rate": 8.422240684714624e-06, + "loss": 0.2587, + "step": 74890 + }, + { + "epoch": 3.49, + "learning_rate": 8.421456899659838e-06, + "loss": 0.283, + "step": 74895 + }, + { + "epoch": 3.49, + "learning_rate": 8.420673114605052e-06, + "loss": 0.0697, + "step": 74900 + }, + { + "epoch": 3.5, + "learning_rate": 8.419889329550264e-06, + "loss": 0.0106, + "step": 74905 + }, + { + "epoch": 3.5, + "learning_rate": 8.419105544495478e-06, + "loss": 0.0514, + "step": 74910 + }, + { + "epoch": 3.5, + "learning_rate": 8.418321759440692e-06, + "loss": 0.0274, + "step": 74915 + }, + { + "epoch": 3.5, + "learning_rate": 8.417537974385906e-06, + "loss": 0.1058, + "step": 74920 + }, + { + "epoch": 3.5, + "learning_rate": 8.416754189331118e-06, + "loss": 0.0667, + "step": 74925 + }, + { + "epoch": 3.5, + "learning_rate": 8.415970404276332e-06, + "loss": 0.0528, + "step": 74930 + }, + { + "epoch": 3.5, + "learning_rate": 8.415186619221546e-06, + "loss": 0.1184, + "step": 74935 + }, + { + "epoch": 3.5, + "learning_rate": 8.414402834166758e-06, + "loss": 0.2604, + "step": 74940 + }, + { + "epoch": 3.5, + "learning_rate": 8.413619049111972e-06, + "loss": 0.3226, + "step": 74945 + }, + { + "epoch": 3.5, + "learning_rate": 8.412835264057186e-06, + "loss": 0.0827, + "step": 74950 + }, + { + "epoch": 3.5, + "learning_rate": 8.412051479002398e-06, + "loss": 0.0426, + "step": 74955 + }, + { + "epoch": 3.5, + "learning_rate": 8.411267693947612e-06, + "loss": 0.0343, + "step": 74960 + }, + { + "epoch": 3.5, + "learning_rate": 8.410483908892826e-06, + "loss": 0.1049, + "step": 74965 + }, + { + "epoch": 3.5, + "learning_rate": 8.40970012383804e-06, + "loss": 0.056, + "step": 74970 + }, + { + "epoch": 3.5, + "learning_rate": 8.408916338783252e-06, + "loss": 0.1389, + "step": 74975 + }, + { + "epoch": 3.5, + "learning_rate": 8.408132553728466e-06, + "loss": 0.0572, + "step": 74980 + }, + { + "epoch": 3.5, + "learning_rate": 8.40734876867368e-06, + "loss": 0.115, + "step": 74985 + }, + { + "epoch": 3.5, + "learning_rate": 8.406564983618894e-06, + "loss": 0.2138, + "step": 74990 + }, + { + "epoch": 3.5, + "learning_rate": 8.405781198564108e-06, + "loss": 0.3076, + "step": 74995 + }, + { + "epoch": 3.5, + "learning_rate": 8.40499741350932e-06, + "loss": 0.0509, + "step": 75000 + }, + { + "epoch": 3.5, + "learning_rate": 8.404213628454532e-06, + "loss": 0.0308, + "step": 75005 + }, + { + "epoch": 3.5, + "learning_rate": 8.403429843399746e-06, + "loss": 0.0843, + "step": 75010 + }, + { + "epoch": 3.5, + "learning_rate": 8.40264605834496e-06, + "loss": 0.1278, + "step": 75015 + }, + { + "epoch": 3.5, + "learning_rate": 8.401862273290174e-06, + "loss": 0.0298, + "step": 75020 + }, + { + "epoch": 3.5, + "learning_rate": 8.401078488235386e-06, + "loss": 0.0982, + "step": 75025 + }, + { + "epoch": 3.5, + "learning_rate": 8.4002947031806e-06, + "loss": 0.1339, + "step": 75030 + }, + { + "epoch": 3.5, + "learning_rate": 8.399510918125814e-06, + "loss": 0.1617, + "step": 75035 + }, + { + "epoch": 3.5, + "learning_rate": 8.398727133071028e-06, + "loss": 0.2432, + "step": 75040 + }, + { + "epoch": 3.5, + "learning_rate": 8.397943348016242e-06, + "loss": 0.2742, + "step": 75045 + }, + { + "epoch": 3.5, + "learning_rate": 8.397159562961454e-06, + "loss": 0.0797, + "step": 75050 + }, + { + "epoch": 3.5, + "learning_rate": 8.396375777906668e-06, + "loss": 0.048, + "step": 75055 + }, + { + "epoch": 3.5, + "learning_rate": 8.395591992851882e-06, + "loss": 0.035, + "step": 75060 + }, + { + "epoch": 3.5, + "learning_rate": 8.394808207797096e-06, + "loss": 0.0585, + "step": 75065 + }, + { + "epoch": 3.5, + "learning_rate": 8.394024422742308e-06, + "loss": 0.1197, + "step": 75070 + }, + { + "epoch": 3.5, + "learning_rate": 8.39324063768752e-06, + "loss": 0.0651, + "step": 75075 + }, + { + "epoch": 3.5, + "learning_rate": 8.392456852632734e-06, + "loss": 0.1699, + "step": 75080 + }, + { + "epoch": 3.5, + "learning_rate": 8.391673067577948e-06, + "loss": 0.1379, + "step": 75085 + }, + { + "epoch": 3.5, + "learning_rate": 8.390889282523162e-06, + "loss": 0.2202, + "step": 75090 + }, + { + "epoch": 3.5, + "learning_rate": 8.390105497468376e-06, + "loss": 0.3336, + "step": 75095 + }, + { + "epoch": 3.5, + "learning_rate": 8.389321712413588e-06, + "loss": 0.0507, + "step": 75100 + }, + { + "epoch": 3.5, + "learning_rate": 8.388537927358802e-06, + "loss": 0.0492, + "step": 75105 + }, + { + "epoch": 3.5, + "learning_rate": 8.387754142304016e-06, + "loss": 0.0252, + "step": 75110 + }, + { + "epoch": 3.5, + "learning_rate": 8.38697035724923e-06, + "loss": 0.1295, + "step": 75115 + }, + { + "epoch": 3.51, + "learning_rate": 8.386186572194442e-06, + "loss": 0.1012, + "step": 75120 + }, + { + "epoch": 3.51, + "learning_rate": 8.385402787139656e-06, + "loss": 0.1108, + "step": 75125 + }, + { + "epoch": 3.51, + "learning_rate": 8.38461900208487e-06, + "loss": 0.0637, + "step": 75130 + }, + { + "epoch": 3.51, + "learning_rate": 8.383835217030082e-06, + "loss": 0.1363, + "step": 75135 + }, + { + "epoch": 3.51, + "learning_rate": 8.383051431975296e-06, + "loss": 0.2128, + "step": 75140 + }, + { + "epoch": 3.51, + "learning_rate": 8.38226764692051e-06, + "loss": 0.2319, + "step": 75145 + }, + { + "epoch": 3.51, + "learning_rate": 8.381483861865722e-06, + "loss": 0.0976, + "step": 75150 + }, + { + "epoch": 3.51, + "learning_rate": 8.380700076810936e-06, + "loss": 0.0321, + "step": 75155 + }, + { + "epoch": 3.51, + "learning_rate": 8.37991629175615e-06, + "loss": 0.0293, + "step": 75160 + }, + { + "epoch": 3.51, + "learning_rate": 8.379132506701364e-06, + "loss": 0.0679, + "step": 75165 + }, + { + "epoch": 3.51, + "learning_rate": 8.378348721646576e-06, + "loss": 0.1282, + "step": 75170 + }, + { + "epoch": 3.51, + "learning_rate": 8.37756493659179e-06, + "loss": 0.0671, + "step": 75175 + }, + { + "epoch": 3.51, + "learning_rate": 8.376781151537004e-06, + "loss": 0.0583, + "step": 75180 + }, + { + "epoch": 3.51, + "learning_rate": 8.375997366482217e-06, + "loss": 0.1728, + "step": 75185 + }, + { + "epoch": 3.51, + "learning_rate": 8.37521358142743e-06, + "loss": 0.1375, + "step": 75190 + }, + { + "epoch": 3.51, + "learning_rate": 8.374429796372644e-06, + "loss": 0.1859, + "step": 75195 + }, + { + "epoch": 3.51, + "learning_rate": 8.373646011317856e-06, + "loss": 0.0792, + "step": 75200 + }, + { + "epoch": 3.51, + "learning_rate": 8.37286222626307e-06, + "loss": 0.0548, + "step": 75205 + }, + { + "epoch": 3.51, + "learning_rate": 8.372078441208284e-06, + "loss": 0.0164, + "step": 75210 + }, + { + "epoch": 3.51, + "learning_rate": 8.371294656153498e-06, + "loss": 0.0193, + "step": 75215 + }, + { + "epoch": 3.51, + "learning_rate": 8.37051087109871e-06, + "loss": 0.0914, + "step": 75220 + }, + { + "epoch": 3.51, + "learning_rate": 8.369727086043924e-06, + "loss": 0.1217, + "step": 75225 + }, + { + "epoch": 3.51, + "learning_rate": 8.368943300989138e-06, + "loss": 0.1028, + "step": 75230 + }, + { + "epoch": 3.51, + "learning_rate": 8.368159515934351e-06, + "loss": 0.1744, + "step": 75235 + }, + { + "epoch": 3.51, + "learning_rate": 8.367375730879564e-06, + "loss": 0.2296, + "step": 75240 + }, + { + "epoch": 3.51, + "learning_rate": 8.366591945824778e-06, + "loss": 0.2154, + "step": 75245 + }, + { + "epoch": 3.51, + "learning_rate": 8.365808160769991e-06, + "loss": 0.0799, + "step": 75250 + }, + { + "epoch": 3.51, + "learning_rate": 8.365024375715205e-06, + "loss": 0.0403, + "step": 75255 + }, + { + "epoch": 3.51, + "learning_rate": 8.36424059066042e-06, + "loss": 0.0285, + "step": 75260 + }, + { + "epoch": 3.51, + "learning_rate": 8.363456805605631e-06, + "loss": 0.0687, + "step": 75265 + }, + { + "epoch": 3.51, + "learning_rate": 8.362673020550844e-06, + "loss": 0.1119, + "step": 75270 + }, + { + "epoch": 3.51, + "learning_rate": 8.361889235496058e-06, + "loss": 0.0707, + "step": 75275 + }, + { + "epoch": 3.51, + "learning_rate": 8.361105450441272e-06, + "loss": 0.0419, + "step": 75280 + }, + { + "epoch": 3.51, + "learning_rate": 8.360321665386485e-06, + "loss": 0.1551, + "step": 75285 + }, + { + "epoch": 3.51, + "learning_rate": 8.359537880331698e-06, + "loss": 0.209, + "step": 75290 + }, + { + "epoch": 3.51, + "learning_rate": 8.358754095276912e-06, + "loss": 0.3307, + "step": 75295 + }, + { + "epoch": 3.51, + "learning_rate": 8.357970310222125e-06, + "loss": 0.0807, + "step": 75300 + }, + { + "epoch": 3.51, + "learning_rate": 8.35718652516734e-06, + "loss": 0.0273, + "step": 75305 + }, + { + "epoch": 3.51, + "learning_rate": 8.356402740112553e-06, + "loss": 0.0654, + "step": 75310 + }, + { + "epoch": 3.51, + "learning_rate": 8.355618955057765e-06, + "loss": 0.0535, + "step": 75315 + }, + { + "epoch": 3.51, + "learning_rate": 8.35483517000298e-06, + "loss": 0.0402, + "step": 75320 + }, + { + "epoch": 3.51, + "learning_rate": 8.354051384948193e-06, + "loss": 0.0842, + "step": 75325 + }, + { + "epoch": 3.52, + "learning_rate": 8.353267599893405e-06, + "loss": 0.1353, + "step": 75330 + }, + { + "epoch": 3.52, + "learning_rate": 8.35248381483862e-06, + "loss": 0.1386, + "step": 75335 + }, + { + "epoch": 3.52, + "learning_rate": 8.351700029783832e-06, + "loss": 0.1482, + "step": 75340 + }, + { + "epoch": 3.52, + "learning_rate": 8.350916244729046e-06, + "loss": 0.4411, + "step": 75345 + }, + { + "epoch": 3.52, + "learning_rate": 8.35013245967426e-06, + "loss": 0.0583, + "step": 75350 + }, + { + "epoch": 3.52, + "learning_rate": 8.349348674619473e-06, + "loss": 0.0396, + "step": 75355 + }, + { + "epoch": 3.52, + "learning_rate": 8.348564889564687e-06, + "loss": 0.02, + "step": 75360 + }, + { + "epoch": 3.52, + "learning_rate": 8.3477811045099e-06, + "loss": 0.0512, + "step": 75365 + }, + { + "epoch": 3.52, + "learning_rate": 8.346997319455113e-06, + "loss": 0.0338, + "step": 75370 + }, + { + "epoch": 3.52, + "learning_rate": 8.346213534400327e-06, + "loss": 0.0807, + "step": 75375 + }, + { + "epoch": 3.52, + "learning_rate": 8.345429749345541e-06, + "loss": 0.1244, + "step": 75380 + }, + { + "epoch": 3.52, + "learning_rate": 8.344645964290753e-06, + "loss": 0.1919, + "step": 75385 + }, + { + "epoch": 3.52, + "learning_rate": 8.343862179235967e-06, + "loss": 0.2456, + "step": 75390 + }, + { + "epoch": 3.52, + "learning_rate": 8.34307839418118e-06, + "loss": 0.3295, + "step": 75395 + }, + { + "epoch": 3.52, + "learning_rate": 8.342294609126393e-06, + "loss": 0.0937, + "step": 75400 + }, + { + "epoch": 3.52, + "learning_rate": 8.341510824071607e-06, + "loss": 0.0356, + "step": 75405 + }, + { + "epoch": 3.52, + "learning_rate": 8.340727039016821e-06, + "loss": 0.0717, + "step": 75410 + }, + { + "epoch": 3.52, + "learning_rate": 8.339943253962033e-06, + "loss": 0.0336, + "step": 75415 + }, + { + "epoch": 3.52, + "learning_rate": 8.339159468907247e-06, + "loss": 0.0833, + "step": 75420 + }, + { + "epoch": 3.52, + "learning_rate": 8.338375683852461e-06, + "loss": 0.1159, + "step": 75425 + }, + { + "epoch": 3.52, + "learning_rate": 8.337591898797675e-06, + "loss": 0.0786, + "step": 75430 + }, + { + "epoch": 3.52, + "learning_rate": 8.336808113742887e-06, + "loss": 0.1555, + "step": 75435 + }, + { + "epoch": 3.52, + "learning_rate": 8.336024328688101e-06, + "loss": 0.2435, + "step": 75440 + }, + { + "epoch": 3.52, + "learning_rate": 8.335240543633315e-06, + "loss": 0.2153, + "step": 75445 + }, + { + "epoch": 3.52, + "learning_rate": 8.334456758578529e-06, + "loss": 0.0385, + "step": 75450 + }, + { + "epoch": 3.52, + "learning_rate": 8.333672973523741e-06, + "loss": 0.0182, + "step": 75455 + }, + { + "epoch": 3.52, + "learning_rate": 8.332889188468955e-06, + "loss": 0.0516, + "step": 75460 + }, + { + "epoch": 3.52, + "learning_rate": 8.332105403414167e-06, + "loss": 0.0697, + "step": 75465 + }, + { + "epoch": 3.52, + "learning_rate": 8.331321618359381e-06, + "loss": 0.1061, + "step": 75470 + }, + { + "epoch": 3.52, + "learning_rate": 8.330537833304595e-06, + "loss": 0.1071, + "step": 75475 + }, + { + "epoch": 3.52, + "learning_rate": 8.329754048249809e-06, + "loss": 0.0414, + "step": 75480 + }, + { + "epoch": 3.52, + "learning_rate": 8.328970263195021e-06, + "loss": 0.1574, + "step": 75485 + }, + { + "epoch": 3.52, + "learning_rate": 8.328186478140235e-06, + "loss": 0.191, + "step": 75490 + }, + { + "epoch": 3.52, + "learning_rate": 8.327402693085449e-06, + "loss": 0.1444, + "step": 75495 + }, + { + "epoch": 3.52, + "learning_rate": 8.326618908030663e-06, + "loss": 0.0571, + "step": 75500 + }, + { + "epoch": 3.52, + "learning_rate": 8.325835122975875e-06, + "loss": 0.0229, + "step": 75505 + }, + { + "epoch": 3.52, + "learning_rate": 8.325051337921089e-06, + "loss": 0.063, + "step": 75510 + }, + { + "epoch": 3.52, + "learning_rate": 8.324267552866303e-06, + "loss": 0.0546, + "step": 75515 + }, + { + "epoch": 3.52, + "learning_rate": 8.323483767811517e-06, + "loss": 0.0901, + "step": 75520 + }, + { + "epoch": 3.52, + "learning_rate": 8.32269998275673e-06, + "loss": 0.0649, + "step": 75525 + }, + { + "epoch": 3.52, + "learning_rate": 8.321916197701943e-06, + "loss": 0.0851, + "step": 75530 + }, + { + "epoch": 3.52, + "learning_rate": 8.321132412647155e-06, + "loss": 0.1331, + "step": 75535 + }, + { + "epoch": 3.52, + "learning_rate": 8.32034862759237e-06, + "loss": 0.2535, + "step": 75540 + }, + { + "epoch": 3.53, + "learning_rate": 8.319564842537583e-06, + "loss": 0.2529, + "step": 75545 + }, + { + "epoch": 3.53, + "learning_rate": 8.318781057482797e-06, + "loss": 0.0799, + "step": 75550 + }, + { + "epoch": 3.53, + "learning_rate": 8.31799727242801e-06, + "loss": 0.0059, + "step": 75555 + }, + { + "epoch": 3.53, + "learning_rate": 8.317213487373223e-06, + "loss": 0.0637, + "step": 75560 + }, + { + "epoch": 3.53, + "learning_rate": 8.316429702318437e-06, + "loss": 0.03, + "step": 75565 + }, + { + "epoch": 3.53, + "learning_rate": 8.315645917263651e-06, + "loss": 0.081, + "step": 75570 + }, + { + "epoch": 3.53, + "learning_rate": 8.314862132208865e-06, + "loss": 0.1242, + "step": 75575 + }, + { + "epoch": 3.53, + "learning_rate": 8.314078347154077e-06, + "loss": 0.0443, + "step": 75580 + }, + { + "epoch": 3.53, + "learning_rate": 8.313294562099291e-06, + "loss": 0.1409, + "step": 75585 + }, + { + "epoch": 3.53, + "learning_rate": 8.312510777044503e-06, + "loss": 0.0827, + "step": 75590 + }, + { + "epoch": 3.53, + "learning_rate": 8.311726991989717e-06, + "loss": 0.2518, + "step": 75595 + }, + { + "epoch": 3.53, + "learning_rate": 8.310943206934931e-06, + "loss": 0.0828, + "step": 75600 + }, + { + "epoch": 3.53, + "learning_rate": 8.310159421880143e-06, + "loss": 0.015, + "step": 75605 + }, + { + "epoch": 3.53, + "learning_rate": 8.309375636825357e-06, + "loss": 0.0221, + "step": 75610 + }, + { + "epoch": 3.53, + "learning_rate": 8.308591851770571e-06, + "loss": 0.0275, + "step": 75615 + }, + { + "epoch": 3.53, + "learning_rate": 8.307808066715785e-06, + "loss": 0.0394, + "step": 75620 + }, + { + "epoch": 3.53, + "learning_rate": 8.307024281660999e-06, + "loss": 0.0226, + "step": 75625 + }, + { + "epoch": 3.53, + "learning_rate": 8.306240496606211e-06, + "loss": 0.0955, + "step": 75630 + }, + { + "epoch": 3.53, + "learning_rate": 8.305456711551425e-06, + "loss": 0.1029, + "step": 75635 + }, + { + "epoch": 3.53, + "learning_rate": 8.304672926496639e-06, + "loss": 0.2034, + "step": 75640 + }, + { + "epoch": 3.53, + "learning_rate": 8.303889141441853e-06, + "loss": 0.3854, + "step": 75645 + }, + { + "epoch": 3.53, + "learning_rate": 8.303105356387065e-06, + "loss": 0.0793, + "step": 75650 + }, + { + "epoch": 3.53, + "learning_rate": 8.302321571332277e-06, + "loss": 0.0189, + "step": 75655 + }, + { + "epoch": 3.53, + "learning_rate": 8.301537786277491e-06, + "loss": 0.0376, + "step": 75660 + }, + { + "epoch": 3.53, + "learning_rate": 8.300754001222705e-06, + "loss": 0.0251, + "step": 75665 + }, + { + "epoch": 3.53, + "learning_rate": 8.299970216167919e-06, + "loss": 0.068, + "step": 75670 + }, + { + "epoch": 3.53, + "learning_rate": 8.299186431113133e-06, + "loss": 0.1966, + "step": 75675 + }, + { + "epoch": 3.53, + "learning_rate": 8.298402646058345e-06, + "loss": 0.0765, + "step": 75680 + }, + { + "epoch": 3.53, + "learning_rate": 8.297618861003559e-06, + "loss": 0.0766, + "step": 75685 + }, + { + "epoch": 3.53, + "learning_rate": 8.296835075948773e-06, + "loss": 0.2139, + "step": 75690 + }, + { + "epoch": 3.53, + "learning_rate": 8.296051290893987e-06, + "loss": 0.2011, + "step": 75695 + }, + { + "epoch": 3.53, + "learning_rate": 8.295267505839199e-06, + "loss": 0.0308, + "step": 75700 + }, + { + "epoch": 3.53, + "learning_rate": 8.294483720784413e-06, + "loss": 0.0239, + "step": 75705 + }, + { + "epoch": 3.53, + "learning_rate": 8.293699935729627e-06, + "loss": 0.0442, + "step": 75710 + }, + { + "epoch": 3.53, + "learning_rate": 8.29291615067484e-06, + "loss": 0.1048, + "step": 75715 + }, + { + "epoch": 3.53, + "learning_rate": 8.292132365620053e-06, + "loss": 0.1053, + "step": 75720 + }, + { + "epoch": 3.53, + "learning_rate": 8.291348580565267e-06, + "loss": 0.0838, + "step": 75725 + }, + { + "epoch": 3.53, + "learning_rate": 8.290564795510479e-06, + "loss": 0.1122, + "step": 75730 + }, + { + "epoch": 3.53, + "learning_rate": 8.289781010455693e-06, + "loss": 0.0573, + "step": 75735 + }, + { + "epoch": 3.53, + "learning_rate": 8.288997225400907e-06, + "loss": 0.286, + "step": 75740 + }, + { + "epoch": 3.53, + "learning_rate": 8.28821344034612e-06, + "loss": 0.3212, + "step": 75745 + }, + { + "epoch": 3.53, + "learning_rate": 8.287429655291333e-06, + "loss": 0.0789, + "step": 75750 + }, + { + "epoch": 3.53, + "learning_rate": 8.286645870236547e-06, + "loss": 0.0362, + "step": 75755 + }, + { + "epoch": 3.54, + "learning_rate": 8.28586208518176e-06, + "loss": 0.0528, + "step": 75760 + }, + { + "epoch": 3.54, + "learning_rate": 8.285078300126975e-06, + "loss": 0.0774, + "step": 75765 + }, + { + "epoch": 3.54, + "learning_rate": 8.284294515072187e-06, + "loss": 0.037, + "step": 75770 + }, + { + "epoch": 3.54, + "learning_rate": 8.2835107300174e-06, + "loss": 0.0995, + "step": 75775 + }, + { + "epoch": 3.54, + "learning_rate": 8.282726944962615e-06, + "loss": 0.0897, + "step": 75780 + }, + { + "epoch": 3.54, + "learning_rate": 8.281943159907827e-06, + "loss": 0.0795, + "step": 75785 + }, + { + "epoch": 3.54, + "learning_rate": 8.28115937485304e-06, + "loss": 0.1816, + "step": 75790 + }, + { + "epoch": 3.54, + "learning_rate": 8.280375589798255e-06, + "loss": 0.2481, + "step": 75795 + }, + { + "epoch": 3.54, + "learning_rate": 8.279591804743467e-06, + "loss": 0.0503, + "step": 75800 + }, + { + "epoch": 3.54, + "learning_rate": 8.27880801968868e-06, + "loss": 0.0122, + "step": 75805 + }, + { + "epoch": 3.54, + "learning_rate": 8.278024234633895e-06, + "loss": 0.0234, + "step": 75810 + }, + { + "epoch": 3.54, + "learning_rate": 8.277240449579109e-06, + "loss": 0.0445, + "step": 75815 + }, + { + "epoch": 3.54, + "learning_rate": 8.27645666452432e-06, + "loss": 0.059, + "step": 75820 + }, + { + "epoch": 3.54, + "learning_rate": 8.275672879469535e-06, + "loss": 0.0987, + "step": 75825 + }, + { + "epoch": 3.54, + "learning_rate": 8.274889094414749e-06, + "loss": 0.1198, + "step": 75830 + }, + { + "epoch": 3.54, + "learning_rate": 8.274105309359963e-06, + "loss": 0.1624, + "step": 75835 + }, + { + "epoch": 3.54, + "learning_rate": 8.273321524305176e-06, + "loss": 0.1389, + "step": 75840 + }, + { + "epoch": 3.54, + "learning_rate": 8.272537739250389e-06, + "loss": 0.2726, + "step": 75845 + }, + { + "epoch": 3.54, + "learning_rate": 8.271753954195601e-06, + "loss": 0.0476, + "step": 75850 + }, + { + "epoch": 3.54, + "learning_rate": 8.270970169140815e-06, + "loss": 0.0124, + "step": 75855 + }, + { + "epoch": 3.54, + "learning_rate": 8.270186384086029e-06, + "loss": 0.0568, + "step": 75860 + }, + { + "epoch": 3.54, + "learning_rate": 8.269402599031243e-06, + "loss": 0.0667, + "step": 75865 + }, + { + "epoch": 3.54, + "learning_rate": 8.268618813976455e-06, + "loss": 0.1202, + "step": 75870 + }, + { + "epoch": 3.54, + "learning_rate": 8.267835028921669e-06, + "loss": 0.0241, + "step": 75875 + }, + { + "epoch": 3.54, + "learning_rate": 8.267051243866883e-06, + "loss": 0.1229, + "step": 75880 + }, + { + "epoch": 3.54, + "learning_rate": 8.266267458812097e-06, + "loss": 0.0868, + "step": 75885 + }, + { + "epoch": 3.54, + "learning_rate": 8.26548367375731e-06, + "loss": 0.201, + "step": 75890 + }, + { + "epoch": 3.54, + "learning_rate": 8.264699888702523e-06, + "loss": 0.2908, + "step": 75895 + }, + { + "epoch": 3.54, + "learning_rate": 8.263916103647737e-06, + "loss": 0.0415, + "step": 75900 + }, + { + "epoch": 3.54, + "learning_rate": 8.26313231859295e-06, + "loss": 0.0291, + "step": 75905 + }, + { + "epoch": 3.54, + "learning_rate": 8.262348533538164e-06, + "loss": 0.012, + "step": 75910 + }, + { + "epoch": 3.54, + "learning_rate": 8.261564748483377e-06, + "loss": 0.0537, + "step": 75915 + }, + { + "epoch": 3.54, + "learning_rate": 8.260780963428589e-06, + "loss": 0.078, + "step": 75920 + }, + { + "epoch": 3.54, + "learning_rate": 8.259997178373803e-06, + "loss": 0.162, + "step": 75925 + }, + { + "epoch": 3.54, + "learning_rate": 8.259213393319017e-06, + "loss": 0.1439, + "step": 75930 + }, + { + "epoch": 3.54, + "learning_rate": 8.25842960826423e-06, + "loss": 0.09, + "step": 75935 + }, + { + "epoch": 3.54, + "learning_rate": 8.257645823209444e-06, + "loss": 0.1553, + "step": 75940 + }, + { + "epoch": 3.54, + "learning_rate": 8.256862038154657e-06, + "loss": 0.2545, + "step": 75945 + }, + { + "epoch": 3.54, + "learning_rate": 8.25607825309987e-06, + "loss": 0.0797, + "step": 75950 + }, + { + "epoch": 3.54, + "learning_rate": 8.255294468045084e-06, + "loss": 0.0807, + "step": 75955 + }, + { + "epoch": 3.54, + "learning_rate": 8.254510682990298e-06, + "loss": 0.0415, + "step": 75960 + }, + { + "epoch": 3.54, + "learning_rate": 8.25372689793551e-06, + "loss": 0.0477, + "step": 75965 + }, + { + "epoch": 3.54, + "learning_rate": 8.252943112880724e-06, + "loss": 0.1314, + "step": 75970 + }, + { + "epoch": 3.55, + "learning_rate": 8.252159327825938e-06, + "loss": 0.0979, + "step": 75975 + }, + { + "epoch": 3.55, + "learning_rate": 8.25137554277115e-06, + "loss": 0.0582, + "step": 75980 + }, + { + "epoch": 3.55, + "learning_rate": 8.250591757716364e-06, + "loss": 0.0899, + "step": 75985 + }, + { + "epoch": 3.55, + "learning_rate": 8.249807972661578e-06, + "loss": 0.2765, + "step": 75990 + }, + { + "epoch": 3.55, + "learning_rate": 8.24902418760679e-06, + "loss": 0.2902, + "step": 75995 + }, + { + "epoch": 3.55, + "learning_rate": 8.248240402552004e-06, + "loss": 0.1004, + "step": 76000 + }, + { + "epoch": 3.55, + "learning_rate": 8.247456617497218e-06, + "loss": 0.0277, + "step": 76005 + }, + { + "epoch": 3.55, + "learning_rate": 8.246672832442432e-06, + "loss": 0.0526, + "step": 76010 + }, + { + "epoch": 3.55, + "learning_rate": 8.245889047387645e-06, + "loss": 0.0395, + "step": 76015 + }, + { + "epoch": 3.55, + "learning_rate": 8.245105262332858e-06, + "loss": 0.0749, + "step": 76020 + }, + { + "epoch": 3.55, + "learning_rate": 8.244321477278072e-06, + "loss": 0.0923, + "step": 76025 + }, + { + "epoch": 3.55, + "learning_rate": 8.243537692223286e-06, + "loss": 0.0788, + "step": 76030 + }, + { + "epoch": 3.55, + "learning_rate": 8.242753907168498e-06, + "loss": 0.1192, + "step": 76035 + }, + { + "epoch": 3.55, + "learning_rate": 8.241970122113712e-06, + "loss": 0.1008, + "step": 76040 + }, + { + "epoch": 3.55, + "learning_rate": 8.241186337058925e-06, + "loss": 0.4365, + "step": 76045 + }, + { + "epoch": 3.55, + "learning_rate": 8.240402552004138e-06, + "loss": 0.0791, + "step": 76050 + }, + { + "epoch": 3.55, + "learning_rate": 8.239618766949352e-06, + "loss": 0.0438, + "step": 76055 + }, + { + "epoch": 3.55, + "learning_rate": 8.238834981894566e-06, + "loss": 0.0265, + "step": 76060 + }, + { + "epoch": 3.55, + "learning_rate": 8.238051196839778e-06, + "loss": 0.0122, + "step": 76065 + }, + { + "epoch": 3.55, + "learning_rate": 8.237267411784992e-06, + "loss": 0.0748, + "step": 76070 + }, + { + "epoch": 3.55, + "learning_rate": 8.236483626730206e-06, + "loss": 0.1352, + "step": 76075 + }, + { + "epoch": 3.55, + "learning_rate": 8.23569984167542e-06, + "loss": 0.1491, + "step": 76080 + }, + { + "epoch": 3.55, + "learning_rate": 8.234916056620632e-06, + "loss": 0.1648, + "step": 76085 + }, + { + "epoch": 3.55, + "learning_rate": 8.234132271565846e-06, + "loss": 0.1639, + "step": 76090 + }, + { + "epoch": 3.55, + "learning_rate": 8.23334848651106e-06, + "loss": 0.1581, + "step": 76095 + }, + { + "epoch": 3.55, + "learning_rate": 8.232564701456274e-06, + "loss": 0.1422, + "step": 76100 + }, + { + "epoch": 3.55, + "learning_rate": 8.231780916401488e-06, + "loss": 0.0529, + "step": 76105 + }, + { + "epoch": 3.55, + "learning_rate": 8.2309971313467e-06, + "loss": 0.0227, + "step": 76110 + }, + { + "epoch": 3.55, + "learning_rate": 8.230213346291912e-06, + "loss": 0.0177, + "step": 76115 + }, + { + "epoch": 3.55, + "learning_rate": 8.229429561237126e-06, + "loss": 0.0846, + "step": 76120 + }, + { + "epoch": 3.55, + "learning_rate": 8.22864577618234e-06, + "loss": 0.0638, + "step": 76125 + }, + { + "epoch": 3.55, + "learning_rate": 8.227861991127554e-06, + "loss": 0.1441, + "step": 76130 + }, + { + "epoch": 3.55, + "learning_rate": 8.227078206072766e-06, + "loss": 0.1508, + "step": 76135 + }, + { + "epoch": 3.55, + "learning_rate": 8.22629442101798e-06, + "loss": 0.2952, + "step": 76140 + }, + { + "epoch": 3.55, + "learning_rate": 8.225510635963194e-06, + "loss": 0.268, + "step": 76145 + }, + { + "epoch": 3.55, + "learning_rate": 8.224726850908408e-06, + "loss": 0.0742, + "step": 76150 + }, + { + "epoch": 3.55, + "learning_rate": 8.223943065853622e-06, + "loss": 0.034, + "step": 76155 + }, + { + "epoch": 3.55, + "learning_rate": 8.223159280798834e-06, + "loss": 0.0313, + "step": 76160 + }, + { + "epoch": 3.55, + "learning_rate": 8.222375495744048e-06, + "loss": 0.052, + "step": 76165 + }, + { + "epoch": 3.55, + "learning_rate": 8.221591710689262e-06, + "loss": 0.0843, + "step": 76170 + }, + { + "epoch": 3.55, + "learning_rate": 8.220807925634474e-06, + "loss": 0.0998, + "step": 76175 + }, + { + "epoch": 3.55, + "learning_rate": 8.220024140579688e-06, + "loss": 0.1013, + "step": 76180 + }, + { + "epoch": 3.55, + "learning_rate": 8.2192403555249e-06, + "loss": 0.1085, + "step": 76185 + }, + { + "epoch": 3.56, + "learning_rate": 8.218456570470114e-06, + "loss": 0.1796, + "step": 76190 + }, + { + "epoch": 3.56, + "learning_rate": 8.217672785415328e-06, + "loss": 0.293, + "step": 76195 + }, + { + "epoch": 3.56, + "learning_rate": 8.216889000360542e-06, + "loss": 0.0851, + "step": 76200 + }, + { + "epoch": 3.56, + "learning_rate": 8.216105215305756e-06, + "loss": 0.0458, + "step": 76205 + }, + { + "epoch": 3.56, + "learning_rate": 8.215321430250968e-06, + "loss": 0.043, + "step": 76210 + }, + { + "epoch": 3.56, + "learning_rate": 8.214537645196182e-06, + "loss": 0.0795, + "step": 76215 + }, + { + "epoch": 3.56, + "learning_rate": 8.213753860141396e-06, + "loss": 0.0633, + "step": 76220 + }, + { + "epoch": 3.56, + "learning_rate": 8.21297007508661e-06, + "loss": 0.0637, + "step": 76225 + }, + { + "epoch": 3.56, + "learning_rate": 8.212186290031822e-06, + "loss": 0.1009, + "step": 76230 + }, + { + "epoch": 3.56, + "learning_rate": 8.211402504977036e-06, + "loss": 0.1289, + "step": 76235 + }, + { + "epoch": 3.56, + "learning_rate": 8.210618719922248e-06, + "loss": 0.1908, + "step": 76240 + }, + { + "epoch": 3.56, + "learning_rate": 8.209834934867462e-06, + "loss": 0.2687, + "step": 76245 + }, + { + "epoch": 3.56, + "learning_rate": 8.209051149812676e-06, + "loss": 0.092, + "step": 76250 + }, + { + "epoch": 3.56, + "learning_rate": 8.20826736475789e-06, + "loss": 0.057, + "step": 76255 + }, + { + "epoch": 3.56, + "learning_rate": 8.207483579703102e-06, + "loss": 0.0251, + "step": 76260 + }, + { + "epoch": 3.56, + "learning_rate": 8.206699794648316e-06, + "loss": 0.0866, + "step": 76265 + }, + { + "epoch": 3.56, + "learning_rate": 8.20591600959353e-06, + "loss": 0.1353, + "step": 76270 + }, + { + "epoch": 3.56, + "learning_rate": 8.205132224538744e-06, + "loss": 0.0878, + "step": 76275 + }, + { + "epoch": 3.56, + "learning_rate": 8.204348439483956e-06, + "loss": 0.0853, + "step": 76280 + }, + { + "epoch": 3.56, + "learning_rate": 8.20356465442917e-06, + "loss": 0.0852, + "step": 76285 + }, + { + "epoch": 3.56, + "learning_rate": 8.202780869374384e-06, + "loss": 0.1853, + "step": 76290 + }, + { + "epoch": 3.56, + "learning_rate": 8.201997084319598e-06, + "loss": 0.2395, + "step": 76295 + }, + { + "epoch": 3.56, + "learning_rate": 8.20121329926481e-06, + "loss": 0.0643, + "step": 76300 + }, + { + "epoch": 3.56, + "learning_rate": 8.200429514210024e-06, + "loss": 0.044, + "step": 76305 + }, + { + "epoch": 3.56, + "learning_rate": 8.199645729155236e-06, + "loss": 0.0486, + "step": 76310 + }, + { + "epoch": 3.56, + "learning_rate": 8.19886194410045e-06, + "loss": 0.0564, + "step": 76315 + }, + { + "epoch": 3.56, + "learning_rate": 8.198078159045664e-06, + "loss": 0.0806, + "step": 76320 + }, + { + "epoch": 3.56, + "learning_rate": 8.197294373990878e-06, + "loss": 0.0805, + "step": 76325 + }, + { + "epoch": 3.56, + "learning_rate": 8.19651058893609e-06, + "loss": 0.1667, + "step": 76330 + }, + { + "epoch": 3.56, + "learning_rate": 8.195726803881304e-06, + "loss": 0.2686, + "step": 76335 + }, + { + "epoch": 3.56, + "learning_rate": 8.194943018826518e-06, + "loss": 0.2024, + "step": 76340 + }, + { + "epoch": 3.56, + "learning_rate": 8.194159233771732e-06, + "loss": 0.2692, + "step": 76345 + }, + { + "epoch": 3.56, + "learning_rate": 8.193375448716944e-06, + "loss": 0.0956, + "step": 76350 + }, + { + "epoch": 3.56, + "learning_rate": 8.192591663662158e-06, + "loss": 0.0181, + "step": 76355 + }, + { + "epoch": 3.56, + "learning_rate": 8.191807878607372e-06, + "loss": 0.0212, + "step": 76360 + }, + { + "epoch": 3.56, + "learning_rate": 8.191024093552586e-06, + "loss": 0.1716, + "step": 76365 + }, + { + "epoch": 3.56, + "learning_rate": 8.190240308497798e-06, + "loss": 0.1032, + "step": 76370 + }, + { + "epoch": 3.56, + "learning_rate": 8.189456523443012e-06, + "loss": 0.0507, + "step": 76375 + }, + { + "epoch": 3.56, + "learning_rate": 8.188672738388224e-06, + "loss": 0.1214, + "step": 76380 + }, + { + "epoch": 3.56, + "learning_rate": 8.187888953333438e-06, + "loss": 0.1668, + "step": 76385 + }, + { + "epoch": 3.56, + "learning_rate": 8.187105168278652e-06, + "loss": 0.2541, + "step": 76390 + }, + { + "epoch": 3.56, + "learning_rate": 8.186321383223866e-06, + "loss": 0.2692, + "step": 76395 + }, + { + "epoch": 3.56, + "learning_rate": 8.185537598169078e-06, + "loss": 0.0889, + "step": 76400 + }, + { + "epoch": 3.57, + "learning_rate": 8.184753813114292e-06, + "loss": 0.0115, + "step": 76405 + }, + { + "epoch": 3.57, + "learning_rate": 8.183970028059506e-06, + "loss": 0.0236, + "step": 76410 + }, + { + "epoch": 3.57, + "learning_rate": 8.18318624300472e-06, + "loss": 0.022, + "step": 76415 + }, + { + "epoch": 3.57, + "learning_rate": 8.182402457949934e-06, + "loss": 0.1446, + "step": 76420 + }, + { + "epoch": 3.57, + "learning_rate": 8.181618672895146e-06, + "loss": 0.0501, + "step": 76425 + }, + { + "epoch": 3.57, + "learning_rate": 8.18083488784036e-06, + "loss": 0.1078, + "step": 76430 + }, + { + "epoch": 3.57, + "learning_rate": 8.180051102785572e-06, + "loss": 0.1363, + "step": 76435 + }, + { + "epoch": 3.57, + "learning_rate": 8.179267317730786e-06, + "loss": 0.1027, + "step": 76440 + }, + { + "epoch": 3.57, + "learning_rate": 8.178483532676e-06, + "loss": 0.3394, + "step": 76445 + }, + { + "epoch": 3.57, + "learning_rate": 8.177699747621212e-06, + "loss": 0.0626, + "step": 76450 + }, + { + "epoch": 3.57, + "learning_rate": 8.176915962566426e-06, + "loss": 0.0337, + "step": 76455 + }, + { + "epoch": 3.57, + "learning_rate": 8.17613217751164e-06, + "loss": 0.0466, + "step": 76460 + }, + { + "epoch": 3.57, + "learning_rate": 8.175348392456854e-06, + "loss": 0.0782, + "step": 76465 + }, + { + "epoch": 3.57, + "learning_rate": 8.174564607402068e-06, + "loss": 0.0538, + "step": 76470 + }, + { + "epoch": 3.57, + "learning_rate": 8.17378082234728e-06, + "loss": 0.2283, + "step": 76475 + }, + { + "epoch": 3.57, + "learning_rate": 8.172997037292494e-06, + "loss": 0.1222, + "step": 76480 + }, + { + "epoch": 3.57, + "learning_rate": 8.172213252237708e-06, + "loss": 0.1388, + "step": 76485 + }, + { + "epoch": 3.57, + "learning_rate": 8.171429467182922e-06, + "loss": 0.2528, + "step": 76490 + }, + { + "epoch": 3.57, + "learning_rate": 8.170645682128134e-06, + "loss": 0.2953, + "step": 76495 + }, + { + "epoch": 3.57, + "learning_rate": 8.169861897073346e-06, + "loss": 0.1195, + "step": 76500 + }, + { + "epoch": 3.57, + "learning_rate": 8.16907811201856e-06, + "loss": 0.029, + "step": 76505 + }, + { + "epoch": 3.57, + "learning_rate": 8.168294326963774e-06, + "loss": 0.0202, + "step": 76510 + }, + { + "epoch": 3.57, + "learning_rate": 8.167510541908988e-06, + "loss": 0.0609, + "step": 76515 + }, + { + "epoch": 3.57, + "learning_rate": 8.166726756854202e-06, + "loss": 0.0825, + "step": 76520 + }, + { + "epoch": 3.57, + "learning_rate": 8.165942971799414e-06, + "loss": 0.0671, + "step": 76525 + }, + { + "epoch": 3.57, + "learning_rate": 8.165159186744628e-06, + "loss": 0.1079, + "step": 76530 + }, + { + "epoch": 3.57, + "learning_rate": 8.164375401689842e-06, + "loss": 0.1552, + "step": 76535 + }, + { + "epoch": 3.57, + "learning_rate": 8.163591616635055e-06, + "loss": 0.2753, + "step": 76540 + }, + { + "epoch": 3.57, + "learning_rate": 8.162807831580268e-06, + "loss": 0.2377, + "step": 76545 + }, + { + "epoch": 3.57, + "learning_rate": 8.162024046525482e-06, + "loss": 0.0422, + "step": 76550 + }, + { + "epoch": 3.57, + "learning_rate": 8.161240261470696e-06, + "loss": 0.0695, + "step": 76555 + }, + { + "epoch": 3.57, + "learning_rate": 8.16045647641591e-06, + "loss": 0.0457, + "step": 76560 + }, + { + "epoch": 3.57, + "learning_rate": 8.159672691361122e-06, + "loss": 0.0343, + "step": 76565 + }, + { + "epoch": 3.57, + "learning_rate": 8.158888906306336e-06, + "loss": 0.0607, + "step": 76570 + }, + { + "epoch": 3.57, + "learning_rate": 8.158105121251548e-06, + "loss": 0.0968, + "step": 76575 + }, + { + "epoch": 3.57, + "learning_rate": 8.157321336196762e-06, + "loss": 0.0807, + "step": 76580 + }, + { + "epoch": 3.57, + "learning_rate": 8.156537551141976e-06, + "loss": 0.1224, + "step": 76585 + }, + { + "epoch": 3.57, + "learning_rate": 8.15575376608719e-06, + "loss": 0.0768, + "step": 76590 + }, + { + "epoch": 3.57, + "learning_rate": 8.154969981032402e-06, + "loss": 0.3868, + "step": 76595 + }, + { + "epoch": 3.57, + "learning_rate": 8.154186195977616e-06, + "loss": 0.0574, + "step": 76600 + }, + { + "epoch": 3.57, + "learning_rate": 8.15340241092283e-06, + "loss": 0.0198, + "step": 76605 + }, + { + "epoch": 3.57, + "learning_rate": 8.152618625868043e-06, + "loss": 0.0272, + "step": 76610 + }, + { + "epoch": 3.57, + "learning_rate": 8.151834840813256e-06, + "loss": 0.1063, + "step": 76615 + }, + { + "epoch": 3.58, + "learning_rate": 8.15105105575847e-06, + "loss": 0.0647, + "step": 76620 + }, + { + "epoch": 3.58, + "learning_rate": 8.150267270703683e-06, + "loss": 0.0933, + "step": 76625 + }, + { + "epoch": 3.58, + "learning_rate": 8.149483485648896e-06, + "loss": 0.1446, + "step": 76630 + }, + { + "epoch": 3.58, + "learning_rate": 8.14869970059411e-06, + "loss": 0.1358, + "step": 76635 + }, + { + "epoch": 3.58, + "learning_rate": 8.147915915539323e-06, + "loss": 0.2393, + "step": 76640 + }, + { + "epoch": 3.58, + "learning_rate": 8.147132130484536e-06, + "loss": 0.3171, + "step": 76645 + }, + { + "epoch": 3.58, + "learning_rate": 8.14634834542975e-06, + "loss": 0.1299, + "step": 76650 + }, + { + "epoch": 3.58, + "learning_rate": 8.145564560374963e-06, + "loss": 0.0429, + "step": 76655 + }, + { + "epoch": 3.58, + "learning_rate": 8.144780775320177e-06, + "loss": 0.0264, + "step": 76660 + }, + { + "epoch": 3.58, + "learning_rate": 8.14399699026539e-06, + "loss": 0.0446, + "step": 76665 + }, + { + "epoch": 3.58, + "learning_rate": 8.143213205210603e-06, + "loss": 0.1154, + "step": 76670 + }, + { + "epoch": 3.58, + "learning_rate": 8.142429420155817e-06, + "loss": 0.105, + "step": 76675 + }, + { + "epoch": 3.58, + "learning_rate": 8.141645635101031e-06, + "loss": 0.0613, + "step": 76680 + }, + { + "epoch": 3.58, + "learning_rate": 8.140861850046245e-06, + "loss": 0.2053, + "step": 76685 + }, + { + "epoch": 3.58, + "learning_rate": 8.140078064991457e-06, + "loss": 0.1957, + "step": 76690 + }, + { + "epoch": 3.58, + "learning_rate": 8.13929427993667e-06, + "loss": 0.2316, + "step": 76695 + }, + { + "epoch": 3.58, + "learning_rate": 8.138510494881884e-06, + "loss": 0.0859, + "step": 76700 + }, + { + "epoch": 3.58, + "learning_rate": 8.137726709827097e-06, + "loss": 0.0217, + "step": 76705 + }, + { + "epoch": 3.58, + "learning_rate": 8.136942924772311e-06, + "loss": 0.0319, + "step": 76710 + }, + { + "epoch": 3.58, + "learning_rate": 8.136159139717524e-06, + "loss": 0.0411, + "step": 76715 + }, + { + "epoch": 3.58, + "learning_rate": 8.135375354662737e-06, + "loss": 0.0563, + "step": 76720 + }, + { + "epoch": 3.58, + "learning_rate": 8.134591569607951e-06, + "loss": 0.1346, + "step": 76725 + }, + { + "epoch": 3.58, + "learning_rate": 8.133807784553165e-06, + "loss": 0.1397, + "step": 76730 + }, + { + "epoch": 3.58, + "learning_rate": 8.13302399949838e-06, + "loss": 0.0917, + "step": 76735 + }, + { + "epoch": 3.58, + "learning_rate": 8.13239697145455e-06, + "loss": 0.2303, + "step": 76740 + }, + { + "epoch": 3.58, + "learning_rate": 8.131613186399764e-06, + "loss": 0.6665, + "step": 76745 + }, + { + "epoch": 3.58, + "learning_rate": 8.130829401344976e-06, + "loss": 0.0425, + "step": 76750 + }, + { + "epoch": 3.58, + "learning_rate": 8.13004561629019e-06, + "loss": 0.0298, + "step": 76755 + }, + { + "epoch": 3.58, + "learning_rate": 8.129261831235402e-06, + "loss": 0.0255, + "step": 76760 + }, + { + "epoch": 3.58, + "learning_rate": 8.128478046180616e-06, + "loss": 0.0327, + "step": 76765 + }, + { + "epoch": 3.58, + "learning_rate": 8.12769426112583e-06, + "loss": 0.075, + "step": 76770 + }, + { + "epoch": 3.58, + "learning_rate": 8.126910476071042e-06, + "loss": 0.0938, + "step": 76775 + }, + { + "epoch": 3.58, + "learning_rate": 8.126126691016256e-06, + "loss": 0.1333, + "step": 76780 + }, + { + "epoch": 3.58, + "learning_rate": 8.12534290596147e-06, + "loss": 0.1166, + "step": 76785 + }, + { + "epoch": 3.58, + "learning_rate": 8.124559120906684e-06, + "loss": 0.1987, + "step": 76790 + }, + { + "epoch": 3.58, + "learning_rate": 8.123775335851897e-06, + "loss": 0.3264, + "step": 76795 + }, + { + "epoch": 3.58, + "learning_rate": 8.12299155079711e-06, + "loss": 0.096, + "step": 76800 + }, + { + "epoch": 3.58, + "learning_rate": 8.122207765742324e-06, + "loss": 0.034, + "step": 76805 + }, + { + "epoch": 3.58, + "learning_rate": 8.121423980687538e-06, + "loss": 0.0516, + "step": 76810 + }, + { + "epoch": 3.58, + "learning_rate": 8.120640195632751e-06, + "loss": 0.0528, + "step": 76815 + }, + { + "epoch": 3.58, + "learning_rate": 8.119856410577964e-06, + "loss": 0.0884, + "step": 76820 + }, + { + "epoch": 3.58, + "learning_rate": 8.119072625523176e-06, + "loss": 0.0855, + "step": 76825 + }, + { + "epoch": 3.58, + "learning_rate": 8.11828884046839e-06, + "loss": 0.0948, + "step": 76830 + }, + { + "epoch": 3.59, + "learning_rate": 8.117505055413604e-06, + "loss": 0.0889, + "step": 76835 + }, + { + "epoch": 3.59, + "learning_rate": 8.116721270358818e-06, + "loss": 0.2022, + "step": 76840 + }, + { + "epoch": 3.59, + "learning_rate": 8.115937485304031e-06, + "loss": 0.2287, + "step": 76845 + }, + { + "epoch": 3.59, + "learning_rate": 8.115153700249244e-06, + "loss": 0.0573, + "step": 76850 + }, + { + "epoch": 3.59, + "learning_rate": 8.114369915194458e-06, + "loss": 0.0238, + "step": 76855 + }, + { + "epoch": 3.59, + "learning_rate": 8.113586130139671e-06, + "loss": 0.0125, + "step": 76860 + }, + { + "epoch": 3.59, + "learning_rate": 8.112802345084885e-06, + "loss": 0.1019, + "step": 76865 + }, + { + "epoch": 3.59, + "learning_rate": 8.112018560030098e-06, + "loss": 0.1157, + "step": 76870 + }, + { + "epoch": 3.59, + "learning_rate": 8.111234774975312e-06, + "loss": 0.0912, + "step": 76875 + }, + { + "epoch": 3.59, + "learning_rate": 8.110450989920525e-06, + "loss": 0.1185, + "step": 76880 + }, + { + "epoch": 3.59, + "learning_rate": 8.10966720486574e-06, + "loss": 0.1433, + "step": 76885 + }, + { + "epoch": 3.59, + "learning_rate": 8.108883419810952e-06, + "loss": 0.2892, + "step": 76890 + }, + { + "epoch": 3.59, + "learning_rate": 8.108099634756165e-06, + "loss": 0.375, + "step": 76895 + }, + { + "epoch": 3.59, + "learning_rate": 8.107315849701378e-06, + "loss": 0.0706, + "step": 76900 + }, + { + "epoch": 3.59, + "learning_rate": 8.106532064646592e-06, + "loss": 0.022, + "step": 76905 + }, + { + "epoch": 3.59, + "learning_rate": 8.105748279591805e-06, + "loss": 0.0182, + "step": 76910 + }, + { + "epoch": 3.59, + "learning_rate": 8.10496449453702e-06, + "loss": 0.0763, + "step": 76915 + }, + { + "epoch": 3.59, + "learning_rate": 8.104180709482232e-06, + "loss": 0.0429, + "step": 76920 + }, + { + "epoch": 3.59, + "learning_rate": 8.103396924427445e-06, + "loss": 0.0621, + "step": 76925 + }, + { + "epoch": 3.59, + "learning_rate": 8.10261313937266e-06, + "loss": 0.0587, + "step": 76930 + }, + { + "epoch": 3.59, + "learning_rate": 8.101829354317873e-06, + "loss": 0.1526, + "step": 76935 + }, + { + "epoch": 3.59, + "learning_rate": 8.101045569263086e-06, + "loss": 0.101, + "step": 76940 + }, + { + "epoch": 3.59, + "learning_rate": 8.1002617842083e-06, + "loss": 0.213, + "step": 76945 + }, + { + "epoch": 3.59, + "learning_rate": 8.099477999153513e-06, + "loss": 0.0705, + "step": 76950 + }, + { + "epoch": 3.59, + "learning_rate": 8.098694214098726e-06, + "loss": 0.0205, + "step": 76955 + }, + { + "epoch": 3.59, + "learning_rate": 8.09791042904394e-06, + "loss": 0.0226, + "step": 76960 + }, + { + "epoch": 3.59, + "learning_rate": 8.097126643989153e-06, + "loss": 0.0564, + "step": 76965 + }, + { + "epoch": 3.59, + "learning_rate": 8.096342858934366e-06, + "loss": 0.0664, + "step": 76970 + }, + { + "epoch": 3.59, + "learning_rate": 8.09555907387958e-06, + "loss": 0.0594, + "step": 76975 + }, + { + "epoch": 3.59, + "learning_rate": 8.094775288824793e-06, + "loss": 0.1133, + "step": 76980 + }, + { + "epoch": 3.59, + "learning_rate": 8.093991503770007e-06, + "loss": 0.1443, + "step": 76985 + }, + { + "epoch": 3.59, + "learning_rate": 8.09320771871522e-06, + "loss": 0.2564, + "step": 76990 + }, + { + "epoch": 3.59, + "learning_rate": 8.092423933660433e-06, + "loss": 0.3879, + "step": 76995 + }, + { + "epoch": 3.59, + "learning_rate": 8.091640148605647e-06, + "loss": 0.0718, + "step": 77000 + }, + { + "epoch": 3.59, + "learning_rate": 8.090856363550861e-06, + "loss": 0.0181, + "step": 77005 + }, + { + "epoch": 3.59, + "learning_rate": 8.090072578496075e-06, + "loss": 0.0427, + "step": 77010 + }, + { + "epoch": 3.59, + "learning_rate": 8.089288793441287e-06, + "loss": 0.0605, + "step": 77015 + }, + { + "epoch": 3.59, + "learning_rate": 8.0885050083865e-06, + "loss": 0.0279, + "step": 77020 + }, + { + "epoch": 3.59, + "learning_rate": 8.087721223331713e-06, + "loss": 0.2084, + "step": 77025 + }, + { + "epoch": 3.59, + "learning_rate": 8.086937438276927e-06, + "loss": 0.097, + "step": 77030 + }, + { + "epoch": 3.59, + "learning_rate": 8.086153653222141e-06, + "loss": 0.1676, + "step": 77035 + }, + { + "epoch": 3.59, + "learning_rate": 8.085369868167353e-06, + "loss": 0.1195, + "step": 77040 + }, + { + "epoch": 3.6, + "learning_rate": 8.084586083112567e-06, + "loss": 0.1522, + "step": 77045 + }, + { + "epoch": 3.6, + "learning_rate": 8.083802298057781e-06, + "loss": 0.0322, + "step": 77050 + }, + { + "epoch": 3.6, + "learning_rate": 8.083018513002995e-06, + "loss": 0.035, + "step": 77055 + }, + { + "epoch": 3.6, + "learning_rate": 8.082234727948209e-06, + "loss": 0.047, + "step": 77060 + }, + { + "epoch": 3.6, + "learning_rate": 8.081450942893421e-06, + "loss": 0.0983, + "step": 77065 + }, + { + "epoch": 3.6, + "learning_rate": 8.080667157838635e-06, + "loss": 0.1496, + "step": 77070 + }, + { + "epoch": 3.6, + "learning_rate": 8.079883372783849e-06, + "loss": 0.1533, + "step": 77075 + }, + { + "epoch": 3.6, + "learning_rate": 8.079099587729063e-06, + "loss": 0.1387, + "step": 77080 + }, + { + "epoch": 3.6, + "learning_rate": 8.078315802674275e-06, + "loss": 0.0755, + "step": 77085 + }, + { + "epoch": 3.6, + "learning_rate": 8.077532017619487e-06, + "loss": 0.215, + "step": 77090 + }, + { + "epoch": 3.6, + "learning_rate": 8.076748232564701e-06, + "loss": 0.172, + "step": 77095 + }, + { + "epoch": 3.6, + "learning_rate": 8.075964447509915e-06, + "loss": 0.0512, + "step": 77100 + }, + { + "epoch": 3.6, + "learning_rate": 8.07518066245513e-06, + "loss": 0.0308, + "step": 77105 + }, + { + "epoch": 3.6, + "learning_rate": 8.074396877400343e-06, + "loss": 0.0165, + "step": 77110 + }, + { + "epoch": 3.6, + "learning_rate": 8.073613092345555e-06, + "loss": 0.0949, + "step": 77115 + }, + { + "epoch": 3.6, + "learning_rate": 8.07282930729077e-06, + "loss": 0.0572, + "step": 77120 + }, + { + "epoch": 3.6, + "learning_rate": 8.072045522235983e-06, + "loss": 0.0503, + "step": 77125 + }, + { + "epoch": 3.6, + "learning_rate": 8.071261737181197e-06, + "loss": 0.0687, + "step": 77130 + }, + { + "epoch": 3.6, + "learning_rate": 8.07047795212641e-06, + "loss": 0.1899, + "step": 77135 + }, + { + "epoch": 3.6, + "learning_rate": 8.069694167071623e-06, + "loss": 0.1388, + "step": 77140 + }, + { + "epoch": 3.6, + "learning_rate": 8.068910382016837e-06, + "loss": 0.2585, + "step": 77145 + }, + { + "epoch": 3.6, + "learning_rate": 8.06812659696205e-06, + "loss": 0.0928, + "step": 77150 + }, + { + "epoch": 3.6, + "learning_rate": 8.067342811907263e-06, + "loss": 0.082, + "step": 77155 + }, + { + "epoch": 3.6, + "learning_rate": 8.066559026852477e-06, + "loss": 0.0616, + "step": 77160 + }, + { + "epoch": 3.6, + "learning_rate": 8.06577524179769e-06, + "loss": 0.1023, + "step": 77165 + }, + { + "epoch": 3.6, + "learning_rate": 8.064991456742903e-06, + "loss": 0.0539, + "step": 77170 + }, + { + "epoch": 3.6, + "learning_rate": 8.064207671688117e-06, + "loss": 0.0532, + "step": 77175 + }, + { + "epoch": 3.6, + "learning_rate": 8.063423886633331e-06, + "loss": 0.1331, + "step": 77180 + }, + { + "epoch": 3.6, + "learning_rate": 8.062640101578543e-06, + "loss": 0.1599, + "step": 77185 + }, + { + "epoch": 3.6, + "learning_rate": 8.061856316523757e-06, + "loss": 0.2474, + "step": 77190 + }, + { + "epoch": 3.6, + "learning_rate": 8.061072531468971e-06, + "loss": 0.2996, + "step": 77195 + }, + { + "epoch": 3.6, + "learning_rate": 8.060288746414185e-06, + "loss": 0.0673, + "step": 77200 + }, + { + "epoch": 3.6, + "learning_rate": 8.059504961359397e-06, + "loss": 0.0557, + "step": 77205 + }, + { + "epoch": 3.6, + "learning_rate": 8.058721176304611e-06, + "loss": 0.0642, + "step": 77210 + }, + { + "epoch": 3.6, + "learning_rate": 8.057937391249823e-06, + "loss": 0.0275, + "step": 77215 + }, + { + "epoch": 3.6, + "learning_rate": 8.057153606195037e-06, + "loss": 0.0654, + "step": 77220 + }, + { + "epoch": 3.6, + "learning_rate": 8.056369821140251e-06, + "loss": 0.0903, + "step": 77225 + }, + { + "epoch": 3.6, + "learning_rate": 8.055586036085465e-06, + "loss": 0.0431, + "step": 77230 + }, + { + "epoch": 3.6, + "learning_rate": 8.054802251030677e-06, + "loss": 0.0962, + "step": 77235 + }, + { + "epoch": 3.6, + "learning_rate": 8.054018465975891e-06, + "loss": 0.237, + "step": 77240 + }, + { + "epoch": 3.6, + "learning_rate": 8.053234680921105e-06, + "loss": 0.3863, + "step": 77245 + }, + { + "epoch": 3.6, + "learning_rate": 8.052450895866319e-06, + "loss": 0.0608, + "step": 77250 + }, + { + "epoch": 3.6, + "learning_rate": 8.051667110811531e-06, + "loss": 0.0215, + "step": 77255 + }, + { + "epoch": 3.61, + "learning_rate": 8.050883325756745e-06, + "loss": 0.088, + "step": 77260 + }, + { + "epoch": 3.61, + "learning_rate": 8.050099540701959e-06, + "loss": 0.0515, + "step": 77265 + }, + { + "epoch": 3.61, + "learning_rate": 8.049315755647173e-06, + "loss": 0.0528, + "step": 77270 + }, + { + "epoch": 3.61, + "learning_rate": 8.048531970592387e-06, + "loss": 0.0872, + "step": 77275 + }, + { + "epoch": 3.61, + "learning_rate": 8.047748185537599e-06, + "loss": 0.1265, + "step": 77280 + }, + { + "epoch": 3.61, + "learning_rate": 8.046964400482811e-06, + "loss": 0.1019, + "step": 77285 + }, + { + "epoch": 3.61, + "learning_rate": 8.046180615428025e-06, + "loss": 0.1935, + "step": 77290 + }, + { + "epoch": 3.61, + "learning_rate": 8.045396830373239e-06, + "loss": 0.331, + "step": 77295 + }, + { + "epoch": 3.61, + "learning_rate": 8.044613045318453e-06, + "loss": 0.0256, + "step": 77300 + }, + { + "epoch": 3.61, + "learning_rate": 8.043829260263665e-06, + "loss": 0.06, + "step": 77305 + }, + { + "epoch": 3.61, + "learning_rate": 8.043045475208879e-06, + "loss": 0.0504, + "step": 77310 + }, + { + "epoch": 3.61, + "learning_rate": 8.042261690154093e-06, + "loss": 0.0804, + "step": 77315 + }, + { + "epoch": 3.61, + "learning_rate": 8.041477905099307e-06, + "loss": 0.0677, + "step": 77320 + }, + { + "epoch": 3.61, + "learning_rate": 8.04069412004452e-06, + "loss": 0.088, + "step": 77325 + }, + { + "epoch": 3.61, + "learning_rate": 8.039910334989733e-06, + "loss": 0.095, + "step": 77330 + }, + { + "epoch": 3.61, + "learning_rate": 8.039126549934947e-06, + "loss": 0.0983, + "step": 77335 + }, + { + "epoch": 3.61, + "learning_rate": 8.03834276488016e-06, + "loss": 0.204, + "step": 77340 + }, + { + "epoch": 3.61, + "learning_rate": 8.037558979825373e-06, + "loss": 0.3072, + "step": 77345 + }, + { + "epoch": 3.61, + "learning_rate": 8.036775194770587e-06, + "loss": 0.0787, + "step": 77350 + }, + { + "epoch": 3.61, + "learning_rate": 8.035991409715799e-06, + "loss": 0.0133, + "step": 77355 + }, + { + "epoch": 3.61, + "learning_rate": 8.035207624661013e-06, + "loss": 0.0537, + "step": 77360 + }, + { + "epoch": 3.61, + "learning_rate": 8.034423839606227e-06, + "loss": 0.0439, + "step": 77365 + }, + { + "epoch": 3.61, + "learning_rate": 8.03364005455144e-06, + "loss": 0.098, + "step": 77370 + }, + { + "epoch": 3.61, + "learning_rate": 8.032856269496655e-06, + "loss": 0.0948, + "step": 77375 + }, + { + "epoch": 3.61, + "learning_rate": 8.032072484441867e-06, + "loss": 0.08, + "step": 77380 + }, + { + "epoch": 3.61, + "learning_rate": 8.03128869938708e-06, + "loss": 0.0979, + "step": 77385 + }, + { + "epoch": 3.61, + "learning_rate": 8.030504914332295e-06, + "loss": 0.2572, + "step": 77390 + }, + { + "epoch": 3.61, + "learning_rate": 8.029721129277509e-06, + "loss": 0.3724, + "step": 77395 + }, + { + "epoch": 3.61, + "learning_rate": 8.02893734422272e-06, + "loss": 0.0675, + "step": 77400 + }, + { + "epoch": 3.61, + "learning_rate": 8.028153559167935e-06, + "loss": 0.0416, + "step": 77405 + }, + { + "epoch": 3.61, + "learning_rate": 8.027369774113147e-06, + "loss": 0.046, + "step": 77410 + }, + { + "epoch": 3.61, + "learning_rate": 8.02658598905836e-06, + "loss": 0.1032, + "step": 77415 + }, + { + "epoch": 3.61, + "learning_rate": 8.025802204003575e-06, + "loss": 0.0493, + "step": 77420 + }, + { + "epoch": 3.61, + "learning_rate": 8.025018418948789e-06, + "loss": 0.0605, + "step": 77425 + }, + { + "epoch": 3.61, + "learning_rate": 8.024234633894001e-06, + "loss": 0.1291, + "step": 77430 + }, + { + "epoch": 3.61, + "learning_rate": 8.023450848839215e-06, + "loss": 0.1378, + "step": 77435 + }, + { + "epoch": 3.61, + "learning_rate": 8.022667063784429e-06, + "loss": 0.1254, + "step": 77440 + }, + { + "epoch": 3.61, + "learning_rate": 8.021883278729643e-06, + "loss": 0.3442, + "step": 77445 + }, + { + "epoch": 3.61, + "learning_rate": 8.021099493674855e-06, + "loss": 0.0681, + "step": 77450 + }, + { + "epoch": 3.61, + "learning_rate": 8.020315708620069e-06, + "loss": 0.0381, + "step": 77455 + }, + { + "epoch": 3.61, + "learning_rate": 8.019531923565283e-06, + "loss": 0.0804, + "step": 77460 + }, + { + "epoch": 3.61, + "learning_rate": 8.018748138510496e-06, + "loss": 0.0281, + "step": 77465 + }, + { + "epoch": 3.61, + "learning_rate": 8.017964353455709e-06, + "loss": 0.0935, + "step": 77470 + }, + { + "epoch": 3.62, + "learning_rate": 8.017180568400923e-06, + "loss": 0.1158, + "step": 77475 + }, + { + "epoch": 3.62, + "learning_rate": 8.016396783346135e-06, + "loss": 0.1317, + "step": 77480 + }, + { + "epoch": 3.62, + "learning_rate": 8.015612998291349e-06, + "loss": 0.1924, + "step": 77485 + }, + { + "epoch": 3.62, + "learning_rate": 8.014829213236563e-06, + "loss": 0.2521, + "step": 77490 + }, + { + "epoch": 3.62, + "learning_rate": 8.014045428181777e-06, + "loss": 0.2315, + "step": 77495 + }, + { + "epoch": 3.62, + "learning_rate": 8.013261643126989e-06, + "loss": 0.0565, + "step": 77500 + }, + { + "epoch": 3.62, + "learning_rate": 8.012477858072203e-06, + "loss": 0.0091, + "step": 77505 + }, + { + "epoch": 3.62, + "learning_rate": 8.011694073017417e-06, + "loss": 0.0347, + "step": 77510 + }, + { + "epoch": 3.62, + "learning_rate": 8.01091028796263e-06, + "loss": 0.0646, + "step": 77515 + }, + { + "epoch": 3.62, + "learning_rate": 8.010126502907843e-06, + "loss": 0.0254, + "step": 77520 + }, + { + "epoch": 3.62, + "learning_rate": 8.009342717853057e-06, + "loss": 0.0536, + "step": 77525 + }, + { + "epoch": 3.62, + "learning_rate": 8.00855893279827e-06, + "loss": 0.1028, + "step": 77530 + }, + { + "epoch": 3.62, + "learning_rate": 8.007775147743484e-06, + "loss": 0.0755, + "step": 77535 + }, + { + "epoch": 3.62, + "learning_rate": 8.006991362688697e-06, + "loss": 0.19, + "step": 77540 + }, + { + "epoch": 3.62, + "learning_rate": 8.00620757763391e-06, + "loss": 0.4053, + "step": 77545 + }, + { + "epoch": 3.62, + "learning_rate": 8.005423792579123e-06, + "loss": 0.0813, + "step": 77550 + }, + { + "epoch": 3.62, + "learning_rate": 8.004640007524337e-06, + "loss": 0.0142, + "step": 77555 + }, + { + "epoch": 3.62, + "learning_rate": 8.00385622246955e-06, + "loss": 0.0355, + "step": 77560 + }, + { + "epoch": 3.62, + "learning_rate": 8.003072437414764e-06, + "loss": 0.0556, + "step": 77565 + }, + { + "epoch": 3.62, + "learning_rate": 8.002288652359977e-06, + "loss": 0.0372, + "step": 77570 + }, + { + "epoch": 3.62, + "learning_rate": 8.00150486730519e-06, + "loss": 0.054, + "step": 77575 + }, + { + "epoch": 3.62, + "learning_rate": 8.000721082250404e-06, + "loss": 0.0955, + "step": 77580 + }, + { + "epoch": 3.62, + "learning_rate": 7.999937297195618e-06, + "loss": 0.1179, + "step": 77585 + }, + { + "epoch": 3.62, + "learning_rate": 7.999153512140832e-06, + "loss": 0.1637, + "step": 77590 + }, + { + "epoch": 3.62, + "learning_rate": 7.998369727086044e-06, + "loss": 0.2502, + "step": 77595 + }, + { + "epoch": 3.62, + "learning_rate": 7.997585942031258e-06, + "loss": 0.0943, + "step": 77600 + }, + { + "epoch": 3.62, + "learning_rate": 7.99680215697647e-06, + "loss": 0.0234, + "step": 77605 + }, + { + "epoch": 3.62, + "learning_rate": 7.996018371921685e-06, + "loss": 0.0662, + "step": 77610 + }, + { + "epoch": 3.62, + "learning_rate": 7.995234586866898e-06, + "loss": 0.0476, + "step": 77615 + }, + { + "epoch": 3.62, + "learning_rate": 7.99445080181211e-06, + "loss": 0.0984, + "step": 77620 + }, + { + "epoch": 3.62, + "learning_rate": 7.993667016757325e-06, + "loss": 0.0983, + "step": 77625 + }, + { + "epoch": 3.62, + "learning_rate": 7.992883231702538e-06, + "loss": 0.0697, + "step": 77630 + }, + { + "epoch": 3.62, + "learning_rate": 7.992099446647752e-06, + "loss": 0.0726, + "step": 77635 + }, + { + "epoch": 3.62, + "learning_rate": 7.991315661592966e-06, + "loss": 0.1864, + "step": 77640 + }, + { + "epoch": 3.62, + "learning_rate": 7.990531876538178e-06, + "loss": 0.2873, + "step": 77645 + }, + { + "epoch": 3.62, + "learning_rate": 7.989748091483392e-06, + "loss": 0.0721, + "step": 77650 + }, + { + "epoch": 3.62, + "learning_rate": 7.988964306428606e-06, + "loss": 0.0206, + "step": 77655 + }, + { + "epoch": 3.62, + "learning_rate": 7.98818052137382e-06, + "loss": 0.0385, + "step": 77660 + }, + { + "epoch": 3.62, + "learning_rate": 7.987396736319032e-06, + "loss": 0.0535, + "step": 77665 + }, + { + "epoch": 3.62, + "learning_rate": 7.986612951264245e-06, + "loss": 0.0577, + "step": 77670 + }, + { + "epoch": 3.62, + "learning_rate": 7.985829166209459e-06, + "loss": 0.0842, + "step": 77675 + }, + { + "epoch": 3.62, + "learning_rate": 7.985045381154672e-06, + "loss": 0.0724, + "step": 77680 + }, + { + "epoch": 3.62, + "learning_rate": 7.984261596099886e-06, + "loss": 0.0749, + "step": 77685 + }, + { + "epoch": 3.63, + "learning_rate": 7.9834778110451e-06, + "loss": 0.1389, + "step": 77690 + }, + { + "epoch": 3.63, + "learning_rate": 7.982694025990312e-06, + "loss": 0.2554, + "step": 77695 + }, + { + "epoch": 3.63, + "learning_rate": 7.981910240935526e-06, + "loss": 0.0241, + "step": 77700 + }, + { + "epoch": 3.63, + "learning_rate": 7.98112645588074e-06, + "loss": 0.0397, + "step": 77705 + }, + { + "epoch": 3.63, + "learning_rate": 7.980342670825954e-06, + "loss": 0.0711, + "step": 77710 + }, + { + "epoch": 3.63, + "learning_rate": 7.979558885771166e-06, + "loss": 0.0933, + "step": 77715 + }, + { + "epoch": 3.63, + "learning_rate": 7.97877510071638e-06, + "loss": 0.0517, + "step": 77720 + }, + { + "epoch": 3.63, + "learning_rate": 7.977991315661594e-06, + "loss": 0.1226, + "step": 77725 + }, + { + "epoch": 3.63, + "learning_rate": 7.977207530606808e-06, + "loss": 0.1274, + "step": 77730 + }, + { + "epoch": 3.63, + "learning_rate": 7.97642374555202e-06, + "loss": 0.1621, + "step": 77735 + }, + { + "epoch": 3.63, + "learning_rate": 7.975639960497234e-06, + "loss": 0.279, + "step": 77740 + }, + { + "epoch": 3.63, + "learning_rate": 7.974856175442446e-06, + "loss": 0.2862, + "step": 77745 + }, + { + "epoch": 3.63, + "learning_rate": 7.97407239038766e-06, + "loss": 0.0266, + "step": 77750 + }, + { + "epoch": 3.63, + "learning_rate": 7.973288605332874e-06, + "loss": 0.0083, + "step": 77755 + }, + { + "epoch": 3.63, + "learning_rate": 7.972504820278088e-06, + "loss": 0.0477, + "step": 77760 + }, + { + "epoch": 3.63, + "learning_rate": 7.9717210352233e-06, + "loss": 0.0244, + "step": 77765 + }, + { + "epoch": 3.63, + "learning_rate": 7.970937250168514e-06, + "loss": 0.0371, + "step": 77770 + }, + { + "epoch": 3.63, + "learning_rate": 7.970153465113728e-06, + "loss": 0.1224, + "step": 77775 + }, + { + "epoch": 3.63, + "learning_rate": 7.969369680058942e-06, + "loss": 0.1255, + "step": 77780 + }, + { + "epoch": 3.63, + "learning_rate": 7.968585895004154e-06, + "loss": 0.1138, + "step": 77785 + }, + { + "epoch": 3.63, + "learning_rate": 7.967802109949368e-06, + "loss": 0.174, + "step": 77790 + }, + { + "epoch": 3.63, + "learning_rate": 7.967018324894582e-06, + "loss": 0.2551, + "step": 77795 + }, + { + "epoch": 3.63, + "learning_rate": 7.966234539839794e-06, + "loss": 0.0698, + "step": 77800 + }, + { + "epoch": 3.63, + "learning_rate": 7.965450754785008e-06, + "loss": 0.0357, + "step": 77805 + }, + { + "epoch": 3.63, + "learning_rate": 7.964666969730222e-06, + "loss": 0.0263, + "step": 77810 + }, + { + "epoch": 3.63, + "learning_rate": 7.963883184675434e-06, + "loss": 0.0402, + "step": 77815 + }, + { + "epoch": 3.63, + "learning_rate": 7.963099399620648e-06, + "loss": 0.0769, + "step": 77820 + }, + { + "epoch": 3.63, + "learning_rate": 7.962315614565862e-06, + "loss": 0.0841, + "step": 77825 + }, + { + "epoch": 3.63, + "learning_rate": 7.961531829511076e-06, + "loss": 0.1091, + "step": 77830 + }, + { + "epoch": 3.63, + "learning_rate": 7.960748044456288e-06, + "loss": 0.0706, + "step": 77835 + }, + { + "epoch": 3.63, + "learning_rate": 7.959964259401502e-06, + "loss": 0.1679, + "step": 77840 + }, + { + "epoch": 3.63, + "learning_rate": 7.959180474346716e-06, + "loss": 0.2807, + "step": 77845 + }, + { + "epoch": 3.63, + "learning_rate": 7.95839668929193e-06, + "loss": 0.0608, + "step": 77850 + }, + { + "epoch": 3.63, + "learning_rate": 7.957612904237144e-06, + "loss": 0.0379, + "step": 77855 + }, + { + "epoch": 3.63, + "learning_rate": 7.956829119182356e-06, + "loss": 0.012, + "step": 77860 + }, + { + "epoch": 3.63, + "learning_rate": 7.956045334127568e-06, + "loss": 0.1044, + "step": 77865 + }, + { + "epoch": 3.63, + "learning_rate": 7.955261549072782e-06, + "loss": 0.0755, + "step": 77870 + }, + { + "epoch": 3.63, + "learning_rate": 7.954477764017996e-06, + "loss": 0.1034, + "step": 77875 + }, + { + "epoch": 3.63, + "learning_rate": 7.95369397896321e-06, + "loss": 0.078, + "step": 77880 + }, + { + "epoch": 3.63, + "learning_rate": 7.952910193908422e-06, + "loss": 0.1178, + "step": 77885 + }, + { + "epoch": 3.63, + "learning_rate": 7.952126408853636e-06, + "loss": 0.2689, + "step": 77890 + }, + { + "epoch": 3.63, + "learning_rate": 7.95134262379885e-06, + "loss": 0.297, + "step": 77895 + }, + { + "epoch": 3.63, + "learning_rate": 7.950558838744064e-06, + "loss": 0.1267, + "step": 77900 + }, + { + "epoch": 3.64, + "learning_rate": 7.949775053689278e-06, + "loss": 0.0355, + "step": 77905 + }, + { + "epoch": 3.64, + "learning_rate": 7.94899126863449e-06, + "loss": 0.0595, + "step": 77910 + }, + { + "epoch": 3.64, + "learning_rate": 7.948207483579704e-06, + "loss": 0.0467, + "step": 77915 + }, + { + "epoch": 3.64, + "learning_rate": 7.947423698524918e-06, + "loss": 0.0638, + "step": 77920 + }, + { + "epoch": 3.64, + "learning_rate": 7.946639913470132e-06, + "loss": 0.0661, + "step": 77925 + }, + { + "epoch": 3.64, + "learning_rate": 7.945856128415344e-06, + "loss": 0.0676, + "step": 77930 + }, + { + "epoch": 3.64, + "learning_rate": 7.945072343360556e-06, + "loss": 0.123, + "step": 77935 + }, + { + "epoch": 3.64, + "learning_rate": 7.94428855830577e-06, + "loss": 0.2575, + "step": 77940 + }, + { + "epoch": 3.64, + "learning_rate": 7.943504773250984e-06, + "loss": 0.2391, + "step": 77945 + }, + { + "epoch": 3.64, + "learning_rate": 7.942720988196198e-06, + "loss": 0.0485, + "step": 77950 + }, + { + "epoch": 3.64, + "learning_rate": 7.941937203141412e-06, + "loss": 0.0493, + "step": 77955 + }, + { + "epoch": 3.64, + "learning_rate": 7.941153418086624e-06, + "loss": 0.0492, + "step": 77960 + }, + { + "epoch": 3.64, + "learning_rate": 7.940369633031838e-06, + "loss": 0.0334, + "step": 77965 + }, + { + "epoch": 3.64, + "learning_rate": 7.939585847977052e-06, + "loss": 0.0832, + "step": 77970 + }, + { + "epoch": 3.64, + "learning_rate": 7.938802062922266e-06, + "loss": 0.0248, + "step": 77975 + }, + { + "epoch": 3.64, + "learning_rate": 7.938018277867478e-06, + "loss": 0.0332, + "step": 77980 + }, + { + "epoch": 3.64, + "learning_rate": 7.937234492812692e-06, + "loss": 0.1669, + "step": 77985 + }, + { + "epoch": 3.64, + "learning_rate": 7.936450707757906e-06, + "loss": 0.2448, + "step": 77990 + }, + { + "epoch": 3.64, + "learning_rate": 7.935666922703118e-06, + "loss": 0.3489, + "step": 77995 + }, + { + "epoch": 3.64, + "learning_rate": 7.934883137648332e-06, + "loss": 0.0499, + "step": 78000 + }, + { + "epoch": 3.64, + "learning_rate": 7.934099352593546e-06, + "loss": 0.0775, + "step": 78005 + }, + { + "epoch": 3.64, + "learning_rate": 7.933315567538758e-06, + "loss": 0.0262, + "step": 78010 + }, + { + "epoch": 3.64, + "learning_rate": 7.932531782483972e-06, + "loss": 0.0921, + "step": 78015 + }, + { + "epoch": 3.64, + "learning_rate": 7.931747997429186e-06, + "loss": 0.0541, + "step": 78020 + }, + { + "epoch": 3.64, + "learning_rate": 7.9309642123744e-06, + "loss": 0.1125, + "step": 78025 + }, + { + "epoch": 3.64, + "learning_rate": 7.930180427319612e-06, + "loss": 0.083, + "step": 78030 + }, + { + "epoch": 3.64, + "learning_rate": 7.929396642264826e-06, + "loss": 0.0961, + "step": 78035 + }, + { + "epoch": 3.64, + "learning_rate": 7.92861285721004e-06, + "loss": 0.1987, + "step": 78040 + }, + { + "epoch": 3.64, + "learning_rate": 7.927829072155254e-06, + "loss": 0.3407, + "step": 78045 + }, + { + "epoch": 3.64, + "learning_rate": 7.927045287100466e-06, + "loss": 0.0674, + "step": 78050 + }, + { + "epoch": 3.64, + "learning_rate": 7.92626150204568e-06, + "loss": 0.0582, + "step": 78055 + }, + { + "epoch": 3.64, + "learning_rate": 7.925477716990892e-06, + "loss": 0.0877, + "step": 78060 + }, + { + "epoch": 3.64, + "learning_rate": 7.924693931936106e-06, + "loss": 0.6231, + "step": 78065 + }, + { + "epoch": 3.64, + "learning_rate": 7.92391014688132e-06, + "loss": 0.0963, + "step": 78070 + }, + { + "epoch": 3.64, + "learning_rate": 7.923126361826534e-06, + "loss": 0.0848, + "step": 78075 + }, + { + "epoch": 3.64, + "learning_rate": 7.922342576771746e-06, + "loss": 0.119, + "step": 78080 + }, + { + "epoch": 3.64, + "learning_rate": 7.92155879171696e-06, + "loss": 0.1368, + "step": 78085 + }, + { + "epoch": 3.64, + "learning_rate": 7.920775006662174e-06, + "loss": 0.299, + "step": 78090 + }, + { + "epoch": 3.64, + "learning_rate": 7.919991221607388e-06, + "loss": 0.2911, + "step": 78095 + }, + { + "epoch": 3.64, + "learning_rate": 7.9192074365526e-06, + "loss": 0.0824, + "step": 78100 + }, + { + "epoch": 3.64, + "learning_rate": 7.918423651497814e-06, + "loss": 0.0767, + "step": 78105 + }, + { + "epoch": 3.64, + "learning_rate": 7.917639866443028e-06, + "loss": 0.0962, + "step": 78110 + }, + { + "epoch": 3.64, + "learning_rate": 7.916856081388242e-06, + "loss": 0.0316, + "step": 78115 + }, + { + "epoch": 3.65, + "learning_rate": 7.916072296333455e-06, + "loss": 0.1537, + "step": 78120 + }, + { + "epoch": 3.65, + "learning_rate": 7.915288511278668e-06, + "loss": 0.1014, + "step": 78125 + }, + { + "epoch": 3.65, + "learning_rate": 7.91450472622388e-06, + "loss": 0.0757, + "step": 78130 + }, + { + "epoch": 3.65, + "learning_rate": 7.913720941169094e-06, + "loss": 0.1169, + "step": 78135 + }, + { + "epoch": 3.65, + "learning_rate": 7.912937156114308e-06, + "loss": 0.1633, + "step": 78140 + }, + { + "epoch": 3.65, + "learning_rate": 7.912153371059522e-06, + "loss": 0.301, + "step": 78145 + }, + { + "epoch": 3.65, + "learning_rate": 7.911369586004734e-06, + "loss": 0.07, + "step": 78150 + }, + { + "epoch": 3.65, + "learning_rate": 7.910585800949948e-06, + "loss": 0.0381, + "step": 78155 + }, + { + "epoch": 3.65, + "learning_rate": 7.909802015895162e-06, + "loss": 0.0312, + "step": 78160 + }, + { + "epoch": 3.65, + "learning_rate": 7.909018230840376e-06, + "loss": 0.0264, + "step": 78165 + }, + { + "epoch": 3.65, + "learning_rate": 7.90823444578559e-06, + "loss": 0.0543, + "step": 78170 + }, + { + "epoch": 3.65, + "learning_rate": 7.907450660730802e-06, + "loss": 0.0919, + "step": 78175 + }, + { + "epoch": 3.65, + "learning_rate": 7.906666875676016e-06, + "loss": 0.1097, + "step": 78180 + }, + { + "epoch": 3.65, + "learning_rate": 7.90588309062123e-06, + "loss": 0.0907, + "step": 78185 + }, + { + "epoch": 3.65, + "learning_rate": 7.905099305566442e-06, + "loss": 0.1474, + "step": 78190 + }, + { + "epoch": 3.65, + "learning_rate": 7.904315520511656e-06, + "loss": 0.2071, + "step": 78195 + }, + { + "epoch": 3.65, + "learning_rate": 7.903531735456868e-06, + "loss": 0.0569, + "step": 78200 + }, + { + "epoch": 3.65, + "learning_rate": 7.902747950402082e-06, + "loss": 0.0304, + "step": 78205 + }, + { + "epoch": 3.65, + "learning_rate": 7.901964165347296e-06, + "loss": 0.0876, + "step": 78210 + }, + { + "epoch": 3.65, + "learning_rate": 7.90118038029251e-06, + "loss": 0.0544, + "step": 78215 + }, + { + "epoch": 3.65, + "learning_rate": 7.900396595237723e-06, + "loss": 0.0574, + "step": 78220 + }, + { + "epoch": 3.65, + "learning_rate": 7.899612810182936e-06, + "loss": 0.0921, + "step": 78225 + }, + { + "epoch": 3.65, + "learning_rate": 7.89882902512815e-06, + "loss": 0.1187, + "step": 78230 + }, + { + "epoch": 3.65, + "learning_rate": 7.898045240073363e-06, + "loss": 0.1302, + "step": 78235 + }, + { + "epoch": 3.65, + "learning_rate": 7.897261455018577e-06, + "loss": 0.1524, + "step": 78240 + }, + { + "epoch": 3.65, + "learning_rate": 7.89647766996379e-06, + "loss": 0.2349, + "step": 78245 + }, + { + "epoch": 3.65, + "learning_rate": 7.895693884909003e-06, + "loss": 0.0869, + "step": 78250 + }, + { + "epoch": 3.65, + "learning_rate": 7.894910099854216e-06, + "loss": 0.0222, + "step": 78255 + }, + { + "epoch": 3.65, + "learning_rate": 7.89412631479943e-06, + "loss": 0.0833, + "step": 78260 + }, + { + "epoch": 3.65, + "learning_rate": 7.893342529744643e-06, + "loss": 0.0476, + "step": 78265 + }, + { + "epoch": 3.65, + "learning_rate": 7.892558744689857e-06, + "loss": 0.063, + "step": 78270 + }, + { + "epoch": 3.65, + "learning_rate": 7.89177495963507e-06, + "loss": 0.085, + "step": 78275 + }, + { + "epoch": 3.65, + "learning_rate": 7.890991174580284e-06, + "loss": 0.1516, + "step": 78280 + }, + { + "epoch": 3.65, + "learning_rate": 7.890207389525497e-06, + "loss": 0.0996, + "step": 78285 + }, + { + "epoch": 3.65, + "learning_rate": 7.889423604470711e-06, + "loss": 0.178, + "step": 78290 + }, + { + "epoch": 3.65, + "learning_rate": 7.888639819415924e-06, + "loss": 0.2889, + "step": 78295 + }, + { + "epoch": 3.65, + "learning_rate": 7.887856034361137e-06, + "loss": 0.0732, + "step": 78300 + }, + { + "epoch": 3.65, + "learning_rate": 7.887072249306351e-06, + "loss": 0.0353, + "step": 78305 + }, + { + "epoch": 3.65, + "learning_rate": 7.886288464251565e-06, + "loss": 0.0454, + "step": 78310 + }, + { + "epoch": 3.65, + "learning_rate": 7.885504679196777e-06, + "loss": 0.0422, + "step": 78315 + }, + { + "epoch": 3.65, + "learning_rate": 7.884720894141991e-06, + "loss": 0.073, + "step": 78320 + }, + { + "epoch": 3.65, + "learning_rate": 7.883937109087204e-06, + "loss": 0.0793, + "step": 78325 + }, + { + "epoch": 3.65, + "learning_rate": 7.883153324032417e-06, + "loss": 0.0979, + "step": 78330 + }, + { + "epoch": 3.66, + "learning_rate": 7.882369538977631e-06, + "loss": 0.1539, + "step": 78335 + }, + { + "epoch": 3.66, + "learning_rate": 7.881585753922845e-06, + "loss": 0.2239, + "step": 78340 + }, + { + "epoch": 3.66, + "learning_rate": 7.880801968868058e-06, + "loss": 0.2689, + "step": 78345 + }, + { + "epoch": 3.66, + "learning_rate": 7.880018183813271e-06, + "loss": 0.0597, + "step": 78350 + }, + { + "epoch": 3.66, + "learning_rate": 7.879234398758485e-06, + "loss": 0.0656, + "step": 78355 + }, + { + "epoch": 3.66, + "learning_rate": 7.8784506137037e-06, + "loss": 0.0343, + "step": 78360 + }, + { + "epoch": 3.66, + "learning_rate": 7.877666828648911e-06, + "loss": 0.0812, + "step": 78365 + }, + { + "epoch": 3.66, + "learning_rate": 7.876883043594125e-06, + "loss": 0.0899, + "step": 78370 + }, + { + "epoch": 3.66, + "learning_rate": 7.87609925853934e-06, + "loss": 0.0821, + "step": 78375 + }, + { + "epoch": 3.66, + "learning_rate": 7.875315473484553e-06, + "loss": 0.0868, + "step": 78380 + }, + { + "epoch": 3.66, + "learning_rate": 7.874531688429765e-06, + "loss": 0.0812, + "step": 78385 + }, + { + "epoch": 3.66, + "learning_rate": 7.87374790337498e-06, + "loss": 0.1704, + "step": 78390 + }, + { + "epoch": 3.66, + "learning_rate": 7.872964118320191e-06, + "loss": 0.4426, + "step": 78395 + }, + { + "epoch": 3.66, + "learning_rate": 7.872180333265405e-06, + "loss": 0.0825, + "step": 78400 + }, + { + "epoch": 3.66, + "learning_rate": 7.87139654821062e-06, + "loss": 0.0304, + "step": 78405 + }, + { + "epoch": 3.66, + "learning_rate": 7.870612763155833e-06, + "loss": 0.0108, + "step": 78410 + }, + { + "epoch": 3.66, + "learning_rate": 7.869828978101045e-06, + "loss": 0.061, + "step": 78415 + }, + { + "epoch": 3.66, + "learning_rate": 7.86904519304626e-06, + "loss": 0.044, + "step": 78420 + }, + { + "epoch": 3.66, + "learning_rate": 7.868261407991473e-06, + "loss": 0.1055, + "step": 78425 + }, + { + "epoch": 3.66, + "learning_rate": 7.867477622936687e-06, + "loss": 0.0752, + "step": 78430 + }, + { + "epoch": 3.66, + "learning_rate": 7.866693837881901e-06, + "loss": 0.1773, + "step": 78435 + }, + { + "epoch": 3.66, + "learning_rate": 7.865910052827113e-06, + "loss": 0.1699, + "step": 78440 + }, + { + "epoch": 3.66, + "learning_rate": 7.865126267772327e-06, + "loss": 0.1484, + "step": 78445 + }, + { + "epoch": 3.66, + "learning_rate": 7.86434248271754e-06, + "loss": 0.0886, + "step": 78450 + }, + { + "epoch": 3.66, + "learning_rate": 7.863558697662753e-06, + "loss": 0.028, + "step": 78455 + }, + { + "epoch": 3.66, + "learning_rate": 7.862774912607967e-06, + "loss": 0.0327, + "step": 78460 + }, + { + "epoch": 3.66, + "learning_rate": 7.86199112755318e-06, + "loss": 0.0557, + "step": 78465 + }, + { + "epoch": 3.66, + "learning_rate": 7.861207342498393e-06, + "loss": 0.068, + "step": 78470 + }, + { + "epoch": 3.66, + "learning_rate": 7.860423557443607e-06, + "loss": 0.0406, + "step": 78475 + }, + { + "epoch": 3.66, + "learning_rate": 7.859639772388821e-06, + "loss": 0.1129, + "step": 78480 + }, + { + "epoch": 3.66, + "learning_rate": 7.858855987334035e-06, + "loss": 0.0918, + "step": 78485 + }, + { + "epoch": 3.66, + "learning_rate": 7.858072202279247e-06, + "loss": 0.1735, + "step": 78490 + }, + { + "epoch": 3.66, + "learning_rate": 7.857288417224461e-06, + "loss": 0.2278, + "step": 78495 + }, + { + "epoch": 3.66, + "learning_rate": 7.856504632169675e-06, + "loss": 0.063, + "step": 78500 + }, + { + "epoch": 3.66, + "learning_rate": 7.855720847114889e-06, + "loss": 0.0642, + "step": 78505 + }, + { + "epoch": 3.66, + "learning_rate": 7.854937062060101e-06, + "loss": 0.029, + "step": 78510 + }, + { + "epoch": 3.66, + "learning_rate": 7.854153277005313e-06, + "loss": 0.0382, + "step": 78515 + }, + { + "epoch": 3.66, + "learning_rate": 7.853369491950527e-06, + "loss": 0.0576, + "step": 78520 + }, + { + "epoch": 3.66, + "learning_rate": 7.852585706895741e-06, + "loss": 0.096, + "step": 78525 + }, + { + "epoch": 3.66, + "learning_rate": 7.851801921840955e-06, + "loss": 0.0674, + "step": 78530 + }, + { + "epoch": 3.66, + "learning_rate": 7.851018136786169e-06, + "loss": 0.2253, + "step": 78535 + }, + { + "epoch": 3.66, + "learning_rate": 7.850234351731381e-06, + "loss": 0.1518, + "step": 78540 + }, + { + "epoch": 3.67, + "learning_rate": 7.849450566676595e-06, + "loss": 0.386, + "step": 78545 + }, + { + "epoch": 3.67, + "learning_rate": 7.848666781621809e-06, + "loss": 0.0507, + "step": 78550 + }, + { + "epoch": 3.67, + "learning_rate": 7.847882996567023e-06, + "loss": 0.0119, + "step": 78555 + }, + { + "epoch": 3.67, + "learning_rate": 7.847099211512235e-06, + "loss": 0.0605, + "step": 78560 + }, + { + "epoch": 3.67, + "learning_rate": 7.846315426457449e-06, + "loss": 0.0267, + "step": 78565 + }, + { + "epoch": 3.67, + "learning_rate": 7.845531641402663e-06, + "loss": 0.054, + "step": 78570 + }, + { + "epoch": 3.67, + "learning_rate": 7.844747856347877e-06, + "loss": 0.1297, + "step": 78575 + }, + { + "epoch": 3.67, + "learning_rate": 7.843964071293089e-06, + "loss": 0.1203, + "step": 78580 + }, + { + "epoch": 3.67, + "learning_rate": 7.843180286238303e-06, + "loss": 0.0647, + "step": 78585 + }, + { + "epoch": 3.67, + "learning_rate": 7.842396501183515e-06, + "loss": 0.1628, + "step": 78590 + }, + { + "epoch": 3.67, + "learning_rate": 7.841612716128729e-06, + "loss": 0.3329, + "step": 78595 + }, + { + "epoch": 3.67, + "learning_rate": 7.840828931073943e-06, + "loss": 0.05, + "step": 78600 + }, + { + "epoch": 3.67, + "learning_rate": 7.840045146019157e-06, + "loss": 0.0131, + "step": 78605 + }, + { + "epoch": 3.67, + "learning_rate": 7.839261360964369e-06, + "loss": 0.0388, + "step": 78610 + }, + { + "epoch": 3.67, + "learning_rate": 7.838477575909583e-06, + "loss": 0.0757, + "step": 78615 + }, + { + "epoch": 3.67, + "learning_rate": 7.837693790854797e-06, + "loss": 0.0546, + "step": 78620 + }, + { + "epoch": 3.67, + "learning_rate": 7.83691000580001e-06, + "loss": 0.1291, + "step": 78625 + }, + { + "epoch": 3.67, + "learning_rate": 7.836126220745223e-06, + "loss": 0.0914, + "step": 78630 + }, + { + "epoch": 3.67, + "learning_rate": 7.835342435690437e-06, + "loss": 0.2011, + "step": 78635 + }, + { + "epoch": 3.67, + "learning_rate": 7.834558650635651e-06, + "loss": 0.1913, + "step": 78640 + }, + { + "epoch": 3.67, + "learning_rate": 7.833774865580863e-06, + "loss": 0.3214, + "step": 78645 + }, + { + "epoch": 3.67, + "learning_rate": 7.832991080526077e-06, + "loss": 0.0318, + "step": 78650 + }, + { + "epoch": 3.67, + "learning_rate": 7.832207295471291e-06, + "loss": 0.0228, + "step": 78655 + }, + { + "epoch": 3.67, + "learning_rate": 7.831423510416503e-06, + "loss": 0.0552, + "step": 78660 + }, + { + "epoch": 3.67, + "learning_rate": 7.830639725361717e-06, + "loss": 0.0665, + "step": 78665 + }, + { + "epoch": 3.67, + "learning_rate": 7.829855940306931e-06, + "loss": 0.0686, + "step": 78670 + }, + { + "epoch": 3.67, + "learning_rate": 7.829072155252145e-06, + "loss": 0.0723, + "step": 78675 + }, + { + "epoch": 3.67, + "learning_rate": 7.828288370197357e-06, + "loss": 0.1752, + "step": 78680 + }, + { + "epoch": 3.67, + "learning_rate": 7.827504585142571e-06, + "loss": 0.1152, + "step": 78685 + }, + { + "epoch": 3.67, + "learning_rate": 7.826720800087785e-06, + "loss": 0.1724, + "step": 78690 + }, + { + "epoch": 3.67, + "learning_rate": 7.825937015032999e-06, + "loss": 0.1554, + "step": 78695 + }, + { + "epoch": 3.67, + "learning_rate": 7.825153229978213e-06, + "loss": 0.054, + "step": 78700 + }, + { + "epoch": 3.67, + "learning_rate": 7.824369444923425e-06, + "loss": 0.0047, + "step": 78705 + }, + { + "epoch": 3.67, + "learning_rate": 7.823585659868637e-06, + "loss": 0.0628, + "step": 78710 + }, + { + "epoch": 3.67, + "learning_rate": 7.822801874813851e-06, + "loss": 0.1317, + "step": 78715 + }, + { + "epoch": 3.67, + "learning_rate": 7.822018089759065e-06, + "loss": 0.0651, + "step": 78720 + }, + { + "epoch": 3.67, + "learning_rate": 7.821234304704279e-06, + "loss": 0.0526, + "step": 78725 + }, + { + "epoch": 3.67, + "learning_rate": 7.820450519649491e-06, + "loss": 0.0481, + "step": 78730 + }, + { + "epoch": 3.67, + "learning_rate": 7.819666734594705e-06, + "loss": 0.1255, + "step": 78735 + }, + { + "epoch": 3.67, + "learning_rate": 7.818882949539919e-06, + "loss": 0.2002, + "step": 78740 + }, + { + "epoch": 3.67, + "learning_rate": 7.818099164485133e-06, + "loss": 0.2313, + "step": 78745 + }, + { + "epoch": 3.67, + "learning_rate": 7.817315379430347e-06, + "loss": 0.1013, + "step": 78750 + }, + { + "epoch": 3.67, + "learning_rate": 7.816531594375559e-06, + "loss": 0.0301, + "step": 78755 + }, + { + "epoch": 3.68, + "learning_rate": 7.815747809320773e-06, + "loss": 0.0135, + "step": 78760 + }, + { + "epoch": 3.68, + "learning_rate": 7.814964024265987e-06, + "loss": 0.0525, + "step": 78765 + }, + { + "epoch": 3.68, + "learning_rate": 7.8141802392112e-06, + "loss": 0.1247, + "step": 78770 + }, + { + "epoch": 3.68, + "learning_rate": 7.813396454156413e-06, + "loss": 0.0658, + "step": 78775 + }, + { + "epoch": 3.68, + "learning_rate": 7.812612669101625e-06, + "loss": 0.1077, + "step": 78780 + }, + { + "epoch": 3.68, + "learning_rate": 7.811828884046839e-06, + "loss": 0.1567, + "step": 78785 + }, + { + "epoch": 3.68, + "learning_rate": 7.811045098992053e-06, + "loss": 0.1781, + "step": 78790 + }, + { + "epoch": 3.68, + "learning_rate": 7.810261313937267e-06, + "loss": 0.2068, + "step": 78795 + }, + { + "epoch": 3.68, + "learning_rate": 7.80947752888248e-06, + "loss": 0.1577, + "step": 78800 + }, + { + "epoch": 3.68, + "learning_rate": 7.808693743827693e-06, + "loss": 0.0462, + "step": 78805 + }, + { + "epoch": 3.68, + "learning_rate": 7.807909958772907e-06, + "loss": 0.0606, + "step": 78810 + }, + { + "epoch": 3.68, + "learning_rate": 7.80712617371812e-06, + "loss": 0.0329, + "step": 78815 + }, + { + "epoch": 3.68, + "learning_rate": 7.806342388663335e-06, + "loss": 0.0622, + "step": 78820 + }, + { + "epoch": 3.68, + "learning_rate": 7.805558603608547e-06, + "loss": 0.2249, + "step": 78825 + }, + { + "epoch": 3.68, + "learning_rate": 7.80477481855376e-06, + "loss": 0.1718, + "step": 78830 + }, + { + "epoch": 3.68, + "learning_rate": 7.803991033498975e-06, + "loss": 0.1659, + "step": 78835 + }, + { + "epoch": 3.68, + "learning_rate": 7.803207248444187e-06, + "loss": 0.1572, + "step": 78840 + }, + { + "epoch": 3.68, + "learning_rate": 7.8024234633894e-06, + "loss": 0.3135, + "step": 78845 + }, + { + "epoch": 3.68, + "learning_rate": 7.801639678334615e-06, + "loss": 0.0707, + "step": 78850 + }, + { + "epoch": 3.68, + "learning_rate": 7.800855893279827e-06, + "loss": 0.0092, + "step": 78855 + }, + { + "epoch": 3.68, + "learning_rate": 7.80007210822504e-06, + "loss": 0.061, + "step": 78860 + }, + { + "epoch": 3.68, + "learning_rate": 7.799288323170255e-06, + "loss": 0.0245, + "step": 78865 + }, + { + "epoch": 3.68, + "learning_rate": 7.798504538115468e-06, + "loss": 0.0909, + "step": 78870 + }, + { + "epoch": 3.68, + "learning_rate": 7.79772075306068e-06, + "loss": 0.1077, + "step": 78875 + }, + { + "epoch": 3.68, + "learning_rate": 7.796936968005895e-06, + "loss": 0.0799, + "step": 78880 + }, + { + "epoch": 3.68, + "learning_rate": 7.796153182951109e-06, + "loss": 0.1157, + "step": 78885 + }, + { + "epoch": 3.68, + "learning_rate": 7.795369397896322e-06, + "loss": 0.2334, + "step": 78890 + }, + { + "epoch": 3.68, + "learning_rate": 7.794585612841535e-06, + "loss": 0.4019, + "step": 78895 + }, + { + "epoch": 3.68, + "learning_rate": 7.793801827786749e-06, + "loss": 0.0946, + "step": 78900 + }, + { + "epoch": 3.68, + "learning_rate": 7.79301804273196e-06, + "loss": 0.0261, + "step": 78905 + }, + { + "epoch": 3.68, + "learning_rate": 7.792234257677175e-06, + "loss": 0.0345, + "step": 78910 + }, + { + "epoch": 3.68, + "learning_rate": 7.791450472622389e-06, + "loss": 0.0157, + "step": 78915 + }, + { + "epoch": 3.68, + "learning_rate": 7.790666687567602e-06, + "loss": 0.0677, + "step": 78920 + }, + { + "epoch": 3.68, + "learning_rate": 7.789882902512815e-06, + "loss": 0.1063, + "step": 78925 + }, + { + "epoch": 3.68, + "learning_rate": 7.789099117458029e-06, + "loss": 0.0972, + "step": 78930 + }, + { + "epoch": 3.68, + "learning_rate": 7.788315332403242e-06, + "loss": 0.1355, + "step": 78935 + }, + { + "epoch": 3.68, + "learning_rate": 7.787531547348456e-06, + "loss": 0.2036, + "step": 78940 + }, + { + "epoch": 3.68, + "learning_rate": 7.786747762293669e-06, + "loss": 0.296, + "step": 78945 + }, + { + "epoch": 3.68, + "learning_rate": 7.785963977238883e-06, + "loss": 0.0542, + "step": 78950 + }, + { + "epoch": 3.68, + "learning_rate": 7.785180192184096e-06, + "loss": 0.0348, + "step": 78955 + }, + { + "epoch": 3.68, + "learning_rate": 7.78439640712931e-06, + "loss": 0.0292, + "step": 78960 + }, + { + "epoch": 3.68, + "learning_rate": 7.783612622074524e-06, + "loss": 0.115, + "step": 78965 + }, + { + "epoch": 3.68, + "learning_rate": 7.782828837019736e-06, + "loss": 0.1179, + "step": 78970 + }, + { + "epoch": 3.69, + "learning_rate": 7.782045051964949e-06, + "loss": 0.0641, + "step": 78975 + }, + { + "epoch": 3.69, + "learning_rate": 7.781261266910163e-06, + "loss": 0.0712, + "step": 78980 + }, + { + "epoch": 3.69, + "learning_rate": 7.780477481855376e-06, + "loss": 0.0613, + "step": 78985 + }, + { + "epoch": 3.69, + "learning_rate": 7.77969369680059e-06, + "loss": 0.2245, + "step": 78990 + }, + { + "epoch": 3.69, + "learning_rate": 7.778909911745803e-06, + "loss": 0.1215, + "step": 78995 + }, + { + "epoch": 3.69, + "learning_rate": 7.778126126691016e-06, + "loss": 0.0407, + "step": 79000 + }, + { + "epoch": 3.69, + "learning_rate": 7.77734234163623e-06, + "loss": 0.0457, + "step": 79005 + }, + { + "epoch": 3.69, + "learning_rate": 7.776558556581444e-06, + "loss": 0.0425, + "step": 79010 + }, + { + "epoch": 3.69, + "learning_rate": 7.775774771526658e-06, + "loss": 0.0623, + "step": 79015 + }, + { + "epoch": 3.69, + "learning_rate": 7.77499098647187e-06, + "loss": 0.0741, + "step": 79020 + }, + { + "epoch": 3.69, + "learning_rate": 7.774207201417084e-06, + "loss": 0.0935, + "step": 79025 + }, + { + "epoch": 3.69, + "learning_rate": 7.773423416362298e-06, + "loss": 0.0852, + "step": 79030 + }, + { + "epoch": 3.69, + "learning_rate": 7.77263963130751e-06, + "loss": 0.1269, + "step": 79035 + }, + { + "epoch": 3.69, + "learning_rate": 7.771855846252724e-06, + "loss": 0.2653, + "step": 79040 + }, + { + "epoch": 3.69, + "learning_rate": 7.771072061197937e-06, + "loss": 0.3134, + "step": 79045 + }, + { + "epoch": 3.69, + "learning_rate": 7.77028827614315e-06, + "loss": 0.0349, + "step": 79050 + }, + { + "epoch": 3.69, + "learning_rate": 7.769504491088364e-06, + "loss": 0.0252, + "step": 79055 + }, + { + "epoch": 3.69, + "learning_rate": 7.768720706033578e-06, + "loss": 0.0486, + "step": 79060 + }, + { + "epoch": 3.69, + "learning_rate": 7.767936920978792e-06, + "loss": 0.0675, + "step": 79065 + }, + { + "epoch": 3.69, + "learning_rate": 7.767153135924004e-06, + "loss": 0.0798, + "step": 79070 + }, + { + "epoch": 3.69, + "learning_rate": 7.766369350869218e-06, + "loss": 0.1307, + "step": 79075 + }, + { + "epoch": 3.69, + "learning_rate": 7.765585565814432e-06, + "loss": 0.0834, + "step": 79080 + }, + { + "epoch": 3.69, + "learning_rate": 7.764801780759646e-06, + "loss": 0.1156, + "step": 79085 + }, + { + "epoch": 3.69, + "learning_rate": 7.764017995704858e-06, + "loss": 0.3002, + "step": 79090 + }, + { + "epoch": 3.69, + "learning_rate": 7.763234210650072e-06, + "loss": 0.1833, + "step": 79095 + }, + { + "epoch": 3.69, + "learning_rate": 7.762450425595284e-06, + "loss": 0.0513, + "step": 79100 + }, + { + "epoch": 3.69, + "learning_rate": 7.761666640540498e-06, + "loss": 0.0273, + "step": 79105 + }, + { + "epoch": 3.69, + "learning_rate": 7.760882855485712e-06, + "loss": 0.0704, + "step": 79110 + }, + { + "epoch": 3.69, + "learning_rate": 7.760099070430926e-06, + "loss": 0.0579, + "step": 79115 + }, + { + "epoch": 3.69, + "learning_rate": 7.759315285376138e-06, + "loss": 0.1112, + "step": 79120 + }, + { + "epoch": 3.69, + "learning_rate": 7.758531500321352e-06, + "loss": 0.067, + "step": 79125 + }, + { + "epoch": 3.69, + "learning_rate": 7.757747715266566e-06, + "loss": 0.1091, + "step": 79130 + }, + { + "epoch": 3.69, + "learning_rate": 7.75696393021178e-06, + "loss": 0.1161, + "step": 79135 + }, + { + "epoch": 3.69, + "learning_rate": 7.756180145156992e-06, + "loss": 0.2371, + "step": 79140 + }, + { + "epoch": 3.69, + "learning_rate": 7.755396360102206e-06, + "loss": 0.277, + "step": 79145 + }, + { + "epoch": 3.69, + "learning_rate": 7.75461257504742e-06, + "loss": 0.0751, + "step": 79150 + }, + { + "epoch": 3.69, + "learning_rate": 7.75398554700359e-06, + "loss": 0.0851, + "step": 79155 + }, + { + "epoch": 3.69, + "learning_rate": 7.753201761948804e-06, + "loss": 0.0308, + "step": 79160 + }, + { + "epoch": 3.69, + "learning_rate": 7.752417976894018e-06, + "loss": 0.0851, + "step": 79165 + }, + { + "epoch": 3.69, + "learning_rate": 7.75163419183923e-06, + "loss": 0.0434, + "step": 79170 + }, + { + "epoch": 3.69, + "learning_rate": 7.750850406784444e-06, + "loss": 0.0541, + "step": 79175 + }, + { + "epoch": 3.69, + "learning_rate": 7.750066621729657e-06, + "loss": 0.1718, + "step": 79180 + }, + { + "epoch": 3.69, + "learning_rate": 7.74928283667487e-06, + "loss": 0.0957, + "step": 79185 + }, + { + "epoch": 3.7, + "learning_rate": 7.748499051620085e-06, + "loss": 0.2069, + "step": 79190 + }, + { + "epoch": 3.7, + "learning_rate": 7.747715266565298e-06, + "loss": 0.3096, + "step": 79195 + }, + { + "epoch": 3.7, + "learning_rate": 7.74693148151051e-06, + "loss": 0.0314, + "step": 79200 + }, + { + "epoch": 3.7, + "learning_rate": 7.746147696455725e-06, + "loss": 0.026, + "step": 79205 + }, + { + "epoch": 3.7, + "learning_rate": 7.745363911400938e-06, + "loss": 0.0116, + "step": 79210 + }, + { + "epoch": 3.7, + "learning_rate": 7.744580126346152e-06, + "loss": 0.0652, + "step": 79215 + }, + { + "epoch": 3.7, + "learning_rate": 7.743796341291365e-06, + "loss": 0.0941, + "step": 79220 + }, + { + "epoch": 3.7, + "learning_rate": 7.743012556236578e-06, + "loss": 0.054, + "step": 79225 + }, + { + "epoch": 3.7, + "learning_rate": 7.742228771181792e-06, + "loss": 0.0919, + "step": 79230 + }, + { + "epoch": 3.7, + "learning_rate": 7.741444986127005e-06, + "loss": 0.0863, + "step": 79235 + }, + { + "epoch": 3.7, + "learning_rate": 7.740661201072218e-06, + "loss": 0.2058, + "step": 79240 + }, + { + "epoch": 3.7, + "learning_rate": 7.739877416017432e-06, + "loss": 0.3746, + "step": 79245 + }, + { + "epoch": 3.7, + "learning_rate": 7.739093630962645e-06, + "loss": 0.0631, + "step": 79250 + }, + { + "epoch": 3.7, + "learning_rate": 7.738309845907859e-06, + "loss": 0.0152, + "step": 79255 + }, + { + "epoch": 3.7, + "learning_rate": 7.737526060853072e-06, + "loss": 0.0425, + "step": 79260 + }, + { + "epoch": 3.7, + "learning_rate": 7.736742275798286e-06, + "loss": 0.104, + "step": 79265 + }, + { + "epoch": 3.7, + "learning_rate": 7.735958490743499e-06, + "loss": 0.0209, + "step": 79270 + }, + { + "epoch": 3.7, + "learning_rate": 7.735174705688712e-06, + "loss": 0.0863, + "step": 79275 + }, + { + "epoch": 3.7, + "learning_rate": 7.734390920633926e-06, + "loss": 0.1091, + "step": 79280 + }, + { + "epoch": 3.7, + "learning_rate": 7.73360713557914e-06, + "loss": 0.1356, + "step": 79285 + }, + { + "epoch": 3.7, + "learning_rate": 7.732823350524352e-06, + "loss": 0.1522, + "step": 79290 + }, + { + "epoch": 3.7, + "learning_rate": 7.732039565469566e-06, + "loss": 0.1584, + "step": 79295 + }, + { + "epoch": 3.7, + "learning_rate": 7.731255780414779e-06, + "loss": 0.0795, + "step": 79300 + }, + { + "epoch": 3.7, + "learning_rate": 7.730471995359992e-06, + "loss": 0.0596, + "step": 79305 + }, + { + "epoch": 3.7, + "learning_rate": 7.729688210305206e-06, + "loss": 0.0502, + "step": 79310 + }, + { + "epoch": 3.7, + "learning_rate": 7.72890442525042e-06, + "loss": 0.0671, + "step": 79315 + }, + { + "epoch": 3.7, + "learning_rate": 7.728120640195633e-06, + "loss": 0.0851, + "step": 79320 + }, + { + "epoch": 3.7, + "learning_rate": 7.727336855140846e-06, + "loss": 0.0644, + "step": 79325 + }, + { + "epoch": 3.7, + "learning_rate": 7.72655307008606e-06, + "loss": 0.104, + "step": 79330 + }, + { + "epoch": 3.7, + "learning_rate": 7.725769285031274e-06, + "loss": 0.1543, + "step": 79335 + }, + { + "epoch": 3.7, + "learning_rate": 7.724985499976486e-06, + "loss": 0.1573, + "step": 79340 + }, + { + "epoch": 3.7, + "learning_rate": 7.7242017149217e-06, + "loss": 0.2184, + "step": 79345 + }, + { + "epoch": 3.7, + "learning_rate": 7.723417929866914e-06, + "loss": 0.0369, + "step": 79350 + }, + { + "epoch": 3.7, + "learning_rate": 7.722634144812128e-06, + "loss": 0.0109, + "step": 79355 + }, + { + "epoch": 3.7, + "learning_rate": 7.721850359757342e-06, + "loss": 0.0269, + "step": 79360 + }, + { + "epoch": 3.7, + "learning_rate": 7.721066574702554e-06, + "loss": 0.0179, + "step": 79365 + }, + { + "epoch": 3.7, + "learning_rate": 7.720282789647766e-06, + "loss": 0.0539, + "step": 79370 + }, + { + "epoch": 3.7, + "learning_rate": 7.71949900459298e-06, + "loss": 0.0521, + "step": 79375 + }, + { + "epoch": 3.7, + "learning_rate": 7.718715219538194e-06, + "loss": 0.0838, + "step": 79380 + }, + { + "epoch": 3.7, + "learning_rate": 7.717931434483408e-06, + "loss": 0.0885, + "step": 79385 + }, + { + "epoch": 3.7, + "learning_rate": 7.717147649428622e-06, + "loss": 0.2194, + "step": 79390 + }, + { + "epoch": 3.7, + "learning_rate": 7.716363864373834e-06, + "loss": 0.2308, + "step": 79395 + }, + { + "epoch": 3.7, + "learning_rate": 7.715580079319048e-06, + "loss": 0.0599, + "step": 79400 + }, + { + "epoch": 3.71, + "learning_rate": 7.714796294264262e-06, + "loss": 0.0291, + "step": 79405 + }, + { + "epoch": 3.71, + "learning_rate": 7.714012509209476e-06, + "loss": 0.0124, + "step": 79410 + }, + { + "epoch": 3.71, + "learning_rate": 7.713228724154688e-06, + "loss": 0.1041, + "step": 79415 + }, + { + "epoch": 3.71, + "learning_rate": 7.712444939099902e-06, + "loss": 0.0448, + "step": 79420 + }, + { + "epoch": 3.71, + "learning_rate": 7.711661154045116e-06, + "loss": 0.0642, + "step": 79425 + }, + { + "epoch": 3.71, + "learning_rate": 7.710877368990328e-06, + "loss": 0.0951, + "step": 79430 + }, + { + "epoch": 3.71, + "learning_rate": 7.710093583935542e-06, + "loss": 0.1728, + "step": 79435 + }, + { + "epoch": 3.71, + "learning_rate": 7.709309798880756e-06, + "loss": 0.1487, + "step": 79440 + }, + { + "epoch": 3.71, + "learning_rate": 7.708526013825968e-06, + "loss": 0.237, + "step": 79445 + }, + { + "epoch": 3.71, + "learning_rate": 7.707742228771182e-06, + "loss": 0.0672, + "step": 79450 + }, + { + "epoch": 3.71, + "learning_rate": 7.706958443716396e-06, + "loss": 0.0275, + "step": 79455 + }, + { + "epoch": 3.71, + "learning_rate": 7.70617465866161e-06, + "loss": 0.0369, + "step": 79460 + }, + { + "epoch": 3.71, + "learning_rate": 7.705390873606822e-06, + "loss": 0.0482, + "step": 79465 + }, + { + "epoch": 3.71, + "learning_rate": 7.704607088552036e-06, + "loss": 0.0857, + "step": 79470 + }, + { + "epoch": 3.71, + "learning_rate": 7.70382330349725e-06, + "loss": 0.0598, + "step": 79475 + }, + { + "epoch": 3.71, + "learning_rate": 7.703039518442464e-06, + "loss": 0.1201, + "step": 79480 + }, + { + "epoch": 3.71, + "learning_rate": 7.702255733387676e-06, + "loss": 0.1677, + "step": 79485 + }, + { + "epoch": 3.71, + "learning_rate": 7.70147194833289e-06, + "loss": 0.2025, + "step": 79490 + }, + { + "epoch": 3.71, + "learning_rate": 7.700688163278102e-06, + "loss": 0.3051, + "step": 79495 + }, + { + "epoch": 3.71, + "learning_rate": 7.699904378223316e-06, + "loss": 0.1057, + "step": 79500 + }, + { + "epoch": 3.71, + "learning_rate": 7.69912059316853e-06, + "loss": 0.0513, + "step": 79505 + }, + { + "epoch": 3.71, + "learning_rate": 7.698336808113744e-06, + "loss": 0.0324, + "step": 79510 + }, + { + "epoch": 3.71, + "learning_rate": 7.697553023058956e-06, + "loss": 0.031, + "step": 79515 + }, + { + "epoch": 3.71, + "learning_rate": 7.69676923800417e-06, + "loss": 0.0548, + "step": 79520 + }, + { + "epoch": 3.71, + "learning_rate": 7.695985452949384e-06, + "loss": 0.0992, + "step": 79525 + }, + { + "epoch": 3.71, + "learning_rate": 7.695201667894598e-06, + "loss": 0.1201, + "step": 79530 + }, + { + "epoch": 3.71, + "learning_rate": 7.69441788283981e-06, + "loss": 0.0911, + "step": 79535 + }, + { + "epoch": 3.71, + "learning_rate": 7.693634097785024e-06, + "loss": 0.3037, + "step": 79540 + }, + { + "epoch": 3.71, + "learning_rate": 7.692850312730238e-06, + "loss": 0.4432, + "step": 79545 + }, + { + "epoch": 3.71, + "learning_rate": 7.692066527675452e-06, + "loss": 0.0432, + "step": 79550 + }, + { + "epoch": 3.71, + "learning_rate": 7.691282742620664e-06, + "loss": 0.0619, + "step": 79555 + }, + { + "epoch": 3.71, + "learning_rate": 7.690498957565878e-06, + "loss": 0.0823, + "step": 79560 + }, + { + "epoch": 3.71, + "learning_rate": 7.68971517251109e-06, + "loss": 0.0433, + "step": 79565 + }, + { + "epoch": 3.71, + "learning_rate": 7.688931387456304e-06, + "loss": 0.0585, + "step": 79570 + }, + { + "epoch": 3.71, + "learning_rate": 7.688147602401518e-06, + "loss": 0.1646, + "step": 79575 + }, + { + "epoch": 3.71, + "learning_rate": 7.687363817346732e-06, + "loss": 0.1671, + "step": 79580 + }, + { + "epoch": 3.71, + "learning_rate": 7.686580032291944e-06, + "loss": 0.0532, + "step": 79585 + }, + { + "epoch": 3.71, + "learning_rate": 7.685796247237158e-06, + "loss": 0.1626, + "step": 79590 + }, + { + "epoch": 3.71, + "learning_rate": 7.685012462182372e-06, + "loss": 0.3362, + "step": 79595 + }, + { + "epoch": 3.71, + "learning_rate": 7.684228677127586e-06, + "loss": 0.0745, + "step": 79600 + }, + { + "epoch": 3.71, + "learning_rate": 7.683444892072798e-06, + "loss": 0.0608, + "step": 79605 + }, + { + "epoch": 3.71, + "learning_rate": 7.682661107018012e-06, + "loss": 0.0552, + "step": 79610 + }, + { + "epoch": 3.71, + "learning_rate": 7.681877321963226e-06, + "loss": 0.0213, + "step": 79615 + }, + { + "epoch": 3.72, + "learning_rate": 7.68109353690844e-06, + "loss": 0.0431, + "step": 79620 + }, + { + "epoch": 3.72, + "learning_rate": 7.680309751853652e-06, + "loss": 0.084, + "step": 79625 + }, + { + "epoch": 3.72, + "learning_rate": 7.679525966798866e-06, + "loss": 0.0523, + "step": 79630 + }, + { + "epoch": 3.72, + "learning_rate": 7.678898938755036e-06, + "loss": 0.1399, + "step": 79635 + }, + { + "epoch": 3.72, + "learning_rate": 7.67811515370025e-06, + "loss": 0.1198, + "step": 79640 + }, + { + "epoch": 3.72, + "learning_rate": 7.677331368645462e-06, + "loss": 0.1457, + "step": 79645 + }, + { + "epoch": 3.72, + "learning_rate": 7.676547583590676e-06, + "loss": 0.0347, + "step": 79650 + }, + { + "epoch": 3.72, + "learning_rate": 7.67576379853589e-06, + "loss": 0.0446, + "step": 79655 + }, + { + "epoch": 3.72, + "learning_rate": 7.674980013481104e-06, + "loss": 0.0179, + "step": 79660 + }, + { + "epoch": 3.72, + "learning_rate": 7.674196228426316e-06, + "loss": 0.058, + "step": 79665 + }, + { + "epoch": 3.72, + "learning_rate": 7.67341244337153e-06, + "loss": 0.041, + "step": 79670 + }, + { + "epoch": 3.72, + "learning_rate": 7.672628658316744e-06, + "loss": 0.112, + "step": 79675 + }, + { + "epoch": 3.72, + "learning_rate": 7.671844873261958e-06, + "loss": 0.1677, + "step": 79680 + }, + { + "epoch": 3.72, + "learning_rate": 7.671061088207172e-06, + "loss": 0.148, + "step": 79685 + }, + { + "epoch": 3.72, + "learning_rate": 7.670277303152384e-06, + "loss": 0.2323, + "step": 79690 + }, + { + "epoch": 3.72, + "learning_rate": 7.669493518097596e-06, + "loss": 0.1864, + "step": 79695 + }, + { + "epoch": 3.72, + "learning_rate": 7.66870973304281e-06, + "loss": 0.046, + "step": 79700 + }, + { + "epoch": 3.72, + "learning_rate": 7.667925947988024e-06, + "loss": 0.0314, + "step": 79705 + }, + { + "epoch": 3.72, + "learning_rate": 7.667142162933238e-06, + "loss": 0.0366, + "step": 79710 + }, + { + "epoch": 3.72, + "learning_rate": 7.66635837787845e-06, + "loss": 0.0915, + "step": 79715 + }, + { + "epoch": 3.72, + "learning_rate": 7.665574592823664e-06, + "loss": 0.074, + "step": 79720 + }, + { + "epoch": 3.72, + "learning_rate": 7.664790807768878e-06, + "loss": 0.1124, + "step": 79725 + }, + { + "epoch": 3.72, + "learning_rate": 7.664007022714092e-06, + "loss": 0.0639, + "step": 79730 + }, + { + "epoch": 3.72, + "learning_rate": 7.663223237659306e-06, + "loss": 0.134, + "step": 79735 + }, + { + "epoch": 3.72, + "learning_rate": 7.662439452604518e-06, + "loss": 0.0796, + "step": 79740 + }, + { + "epoch": 3.72, + "learning_rate": 7.661655667549732e-06, + "loss": 0.2107, + "step": 79745 + }, + { + "epoch": 3.72, + "learning_rate": 7.660871882494946e-06, + "loss": 0.035, + "step": 79750 + }, + { + "epoch": 3.72, + "learning_rate": 7.660088097440158e-06, + "loss": 0.0502, + "step": 79755 + }, + { + "epoch": 3.72, + "learning_rate": 7.659304312385372e-06, + "loss": 0.0185, + "step": 79760 + }, + { + "epoch": 3.72, + "learning_rate": 7.658520527330584e-06, + "loss": 0.0351, + "step": 79765 + }, + { + "epoch": 3.72, + "learning_rate": 7.657736742275798e-06, + "loss": 0.1205, + "step": 79770 + }, + { + "epoch": 3.72, + "learning_rate": 7.656952957221012e-06, + "loss": 0.0339, + "step": 79775 + }, + { + "epoch": 3.72, + "learning_rate": 7.656169172166226e-06, + "loss": 0.1625, + "step": 79780 + }, + { + "epoch": 3.72, + "learning_rate": 7.65538538711144e-06, + "loss": 0.2515, + "step": 79785 + }, + { + "epoch": 3.72, + "learning_rate": 7.654601602056652e-06, + "loss": 0.1826, + "step": 79790 + }, + { + "epoch": 3.72, + "learning_rate": 7.653817817001866e-06, + "loss": 0.2473, + "step": 79795 + }, + { + "epoch": 3.72, + "learning_rate": 7.65303403194708e-06, + "loss": 0.108, + "step": 79800 + }, + { + "epoch": 3.72, + "learning_rate": 7.652250246892294e-06, + "loss": 0.029, + "step": 79805 + }, + { + "epoch": 3.72, + "learning_rate": 7.651466461837506e-06, + "loss": 0.0212, + "step": 79810 + }, + { + "epoch": 3.72, + "learning_rate": 7.65068267678272e-06, + "loss": 0.0281, + "step": 79815 + }, + { + "epoch": 3.72, + "learning_rate": 7.649898891727932e-06, + "loss": 0.1148, + "step": 79820 + }, + { + "epoch": 3.72, + "learning_rate": 7.649115106673146e-06, + "loss": 0.1028, + "step": 79825 + }, + { + "epoch": 3.72, + "learning_rate": 7.64833132161836e-06, + "loss": 0.1096, + "step": 79830 + }, + { + "epoch": 3.73, + "learning_rate": 7.647547536563574e-06, + "loss": 0.1648, + "step": 79835 + }, + { + "epoch": 3.73, + "learning_rate": 7.646763751508786e-06, + "loss": 0.2439, + "step": 79840 + }, + { + "epoch": 3.73, + "learning_rate": 7.645979966454e-06, + "loss": 0.3196, + "step": 79845 + }, + { + "epoch": 3.73, + "learning_rate": 7.645196181399214e-06, + "loss": 0.0447, + "step": 79850 + }, + { + "epoch": 3.73, + "learning_rate": 7.644412396344428e-06, + "loss": 0.0287, + "step": 79855 + }, + { + "epoch": 3.73, + "learning_rate": 7.64362861128964e-06, + "loss": 0.0343, + "step": 79860 + }, + { + "epoch": 3.73, + "learning_rate": 7.642844826234854e-06, + "loss": 0.0918, + "step": 79865 + }, + { + "epoch": 3.73, + "learning_rate": 7.642061041180068e-06, + "loss": 0.0603, + "step": 79870 + }, + { + "epoch": 3.73, + "learning_rate": 7.641277256125282e-06, + "loss": 0.0978, + "step": 79875 + }, + { + "epoch": 3.73, + "learning_rate": 7.640493471070494e-06, + "loss": 0.1099, + "step": 79880 + }, + { + "epoch": 3.73, + "learning_rate": 7.639709686015708e-06, + "loss": 0.0707, + "step": 79885 + }, + { + "epoch": 3.73, + "learning_rate": 7.63892590096092e-06, + "loss": 0.1441, + "step": 79890 + }, + { + "epoch": 3.73, + "learning_rate": 7.638142115906134e-06, + "loss": 0.2386, + "step": 79895 + }, + { + "epoch": 3.73, + "learning_rate": 7.637358330851348e-06, + "loss": 0.0736, + "step": 79900 + }, + { + "epoch": 3.73, + "learning_rate": 7.636574545796562e-06, + "loss": 0.0412, + "step": 79905 + }, + { + "epoch": 3.73, + "learning_rate": 7.635790760741774e-06, + "loss": 0.0549, + "step": 79910 + }, + { + "epoch": 3.73, + "learning_rate": 7.635006975686988e-06, + "loss": 0.0487, + "step": 79915 + }, + { + "epoch": 3.73, + "learning_rate": 7.634223190632202e-06, + "loss": 0.071, + "step": 79920 + }, + { + "epoch": 3.73, + "learning_rate": 7.633439405577416e-06, + "loss": 0.0594, + "step": 79925 + }, + { + "epoch": 3.73, + "learning_rate": 7.632655620522628e-06, + "loss": 0.0753, + "step": 79930 + }, + { + "epoch": 3.73, + "learning_rate": 7.631871835467842e-06, + "loss": 0.1202, + "step": 79935 + }, + { + "epoch": 3.73, + "learning_rate": 7.631088050413056e-06, + "loss": 0.1864, + "step": 79940 + }, + { + "epoch": 3.73, + "learning_rate": 7.63030426535827e-06, + "loss": 0.2801, + "step": 79945 + }, + { + "epoch": 3.73, + "learning_rate": 7.629520480303482e-06, + "loss": 0.0676, + "step": 79950 + }, + { + "epoch": 3.73, + "learning_rate": 7.628736695248695e-06, + "loss": 0.0707, + "step": 79955 + }, + { + "epoch": 3.73, + "learning_rate": 7.627952910193909e-06, + "loss": 0.0497, + "step": 79960 + }, + { + "epoch": 3.73, + "learning_rate": 7.627169125139122e-06, + "loss": 0.0591, + "step": 79965 + }, + { + "epoch": 3.73, + "learning_rate": 7.626385340084336e-06, + "loss": 0.0237, + "step": 79970 + }, + { + "epoch": 3.73, + "learning_rate": 7.625601555029549e-06, + "loss": 0.0949, + "step": 79975 + }, + { + "epoch": 3.73, + "learning_rate": 7.624817769974763e-06, + "loss": 0.0499, + "step": 79980 + }, + { + "epoch": 3.73, + "learning_rate": 7.624033984919977e-06, + "loss": 0.1492, + "step": 79985 + }, + { + "epoch": 3.73, + "learning_rate": 7.62325019986519e-06, + "loss": 0.1709, + "step": 79990 + }, + { + "epoch": 3.73, + "learning_rate": 7.622466414810404e-06, + "loss": 0.3034, + "step": 79995 + }, + { + "epoch": 3.73, + "learning_rate": 7.621682629755617e-06, + "loss": 0.1068, + "step": 80000 + }, + { + "epoch": 3.73, + "learning_rate": 7.620898844700831e-06, + "loss": 0.0279, + "step": 80005 + }, + { + "epoch": 3.73, + "learning_rate": 7.620115059646044e-06, + "loss": 0.08, + "step": 80010 + }, + { + "epoch": 3.73, + "learning_rate": 7.619331274591256e-06, + "loss": 0.0887, + "step": 80015 + }, + { + "epoch": 3.73, + "learning_rate": 7.61854748953647e-06, + "loss": 0.0659, + "step": 80020 + }, + { + "epoch": 3.73, + "learning_rate": 7.617763704481683e-06, + "loss": 0.1461, + "step": 80025 + }, + { + "epoch": 3.73, + "learning_rate": 7.616979919426897e-06, + "loss": 0.1, + "step": 80030 + }, + { + "epoch": 3.73, + "learning_rate": 7.616196134372111e-06, + "loss": 0.1256, + "step": 80035 + }, + { + "epoch": 3.73, + "learning_rate": 7.615412349317324e-06, + "loss": 0.1121, + "step": 80040 + }, + { + "epoch": 3.74, + "learning_rate": 7.614628564262538e-06, + "loss": 0.3639, + "step": 80045 + }, + { + "epoch": 3.74, + "learning_rate": 7.613844779207751e-06, + "loss": 0.0781, + "step": 80050 + }, + { + "epoch": 3.74, + "learning_rate": 7.613060994152965e-06, + "loss": 0.0196, + "step": 80055 + }, + { + "epoch": 3.74, + "learning_rate": 7.612277209098178e-06, + "loss": 0.1044, + "step": 80060 + }, + { + "epoch": 3.74, + "learning_rate": 7.6114934240433916e-06, + "loss": 0.0287, + "step": 80065 + }, + { + "epoch": 3.74, + "learning_rate": 7.610709638988605e-06, + "loss": 0.0434, + "step": 80070 + }, + { + "epoch": 3.74, + "learning_rate": 7.6099258539338185e-06, + "loss": 0.0981, + "step": 80075 + }, + { + "epoch": 3.74, + "learning_rate": 7.609142068879031e-06, + "loss": 0.0543, + "step": 80080 + }, + { + "epoch": 3.74, + "learning_rate": 7.608358283824245e-06, + "loss": 0.0924, + "step": 80085 + }, + { + "epoch": 3.74, + "learning_rate": 7.607574498769458e-06, + "loss": 0.2712, + "step": 80090 + }, + { + "epoch": 3.74, + "learning_rate": 7.606790713714672e-06, + "loss": 0.2002, + "step": 80095 + }, + { + "epoch": 3.74, + "learning_rate": 7.606006928659885e-06, + "loss": 0.0818, + "step": 80100 + }, + { + "epoch": 3.74, + "learning_rate": 7.6052231436050986e-06, + "loss": 0.0633, + "step": 80105 + }, + { + "epoch": 3.74, + "learning_rate": 7.604439358550312e-06, + "loss": 0.0325, + "step": 80110 + }, + { + "epoch": 3.74, + "learning_rate": 7.6036555734955255e-06, + "loss": 0.1279, + "step": 80115 + }, + { + "epoch": 3.74, + "learning_rate": 7.602871788440739e-06, + "loss": 0.0537, + "step": 80120 + }, + { + "epoch": 3.74, + "learning_rate": 7.6020880033859525e-06, + "loss": 0.193, + "step": 80125 + }, + { + "epoch": 3.74, + "learning_rate": 7.6013042183311655e-06, + "loss": 0.0717, + "step": 80130 + }, + { + "epoch": 3.74, + "learning_rate": 7.6005204332763795e-06, + "loss": 0.1594, + "step": 80135 + }, + { + "epoch": 3.74, + "learning_rate": 7.5997366482215925e-06, + "loss": 0.1187, + "step": 80140 + }, + { + "epoch": 3.74, + "learning_rate": 7.5989528631668056e-06, + "loss": 0.3229, + "step": 80145 + }, + { + "epoch": 3.74, + "learning_rate": 7.598169078112019e-06, + "loss": 0.0789, + "step": 80150 + }, + { + "epoch": 3.74, + "learning_rate": 7.5973852930572325e-06, + "loss": 0.0952, + "step": 80155 + }, + { + "epoch": 3.74, + "learning_rate": 7.596601508002446e-06, + "loss": 0.0218, + "step": 80160 + }, + { + "epoch": 3.74, + "learning_rate": 7.5958177229476595e-06, + "loss": 0.0401, + "step": 80165 + }, + { + "epoch": 3.74, + "learning_rate": 7.5950339378928726e-06, + "loss": 0.0259, + "step": 80170 + }, + { + "epoch": 3.74, + "learning_rate": 7.5942501528380865e-06, + "loss": 0.1339, + "step": 80175 + }, + { + "epoch": 3.74, + "learning_rate": 7.5934663677832995e-06, + "loss": 0.0555, + "step": 80180 + }, + { + "epoch": 3.74, + "learning_rate": 7.5926825827285134e-06, + "loss": 0.1528, + "step": 80185 + }, + { + "epoch": 3.74, + "learning_rate": 7.5918987976737265e-06, + "loss": 0.2276, + "step": 80190 + }, + { + "epoch": 3.74, + "learning_rate": 7.59111501261894e-06, + "loss": 0.5037, + "step": 80195 + }, + { + "epoch": 3.74, + "learning_rate": 7.5903312275641535e-06, + "loss": 0.0713, + "step": 80200 + }, + { + "epoch": 3.74, + "learning_rate": 7.589547442509367e-06, + "loss": 0.0174, + "step": 80205 + }, + { + "epoch": 3.74, + "learning_rate": 7.5887636574545796e-06, + "loss": 0.016, + "step": 80210 + }, + { + "epoch": 3.74, + "learning_rate": 7.5879798723997935e-06, + "loss": 0.0537, + "step": 80215 + }, + { + "epoch": 3.74, + "learning_rate": 7.5871960873450065e-06, + "loss": 0.1113, + "step": 80220 + }, + { + "epoch": 3.74, + "learning_rate": 7.5864123022902204e-06, + "loss": 0.1015, + "step": 80225 + }, + { + "epoch": 3.74, + "learning_rate": 7.5856285172354335e-06, + "loss": 0.1157, + "step": 80230 + }, + { + "epoch": 3.74, + "learning_rate": 7.584844732180647e-06, + "loss": 0.097, + "step": 80235 + }, + { + "epoch": 3.74, + "learning_rate": 7.5840609471258605e-06, + "loss": 0.1815, + "step": 80240 + }, + { + "epoch": 3.74, + "learning_rate": 7.583277162071074e-06, + "loss": 0.2547, + "step": 80245 + }, + { + "epoch": 3.74, + "learning_rate": 7.5824933770162874e-06, + "loss": 0.064, + "step": 80250 + }, + { + "epoch": 3.74, + "learning_rate": 7.581709591961501e-06, + "loss": 0.0617, + "step": 80255 + }, + { + "epoch": 3.75, + "learning_rate": 7.580925806906715e-06, + "loss": 0.0732, + "step": 80260 + }, + { + "epoch": 3.75, + "learning_rate": 7.580142021851928e-06, + "loss": 0.0817, + "step": 80265 + }, + { + "epoch": 3.75, + "learning_rate": 7.579358236797142e-06, + "loss": 0.0828, + "step": 80270 + }, + { + "epoch": 3.75, + "learning_rate": 7.578574451742354e-06, + "loss": 0.1249, + "step": 80275 + }, + { + "epoch": 3.75, + "learning_rate": 7.5777906666875675e-06, + "loss": 0.085, + "step": 80280 + }, + { + "epoch": 3.75, + "learning_rate": 7.577006881632781e-06, + "loss": 0.2085, + "step": 80285 + }, + { + "epoch": 3.75, + "learning_rate": 7.5762230965779944e-06, + "loss": 0.1444, + "step": 80290 + }, + { + "epoch": 3.75, + "learning_rate": 7.575439311523208e-06, + "loss": 0.2968, + "step": 80295 + }, + { + "epoch": 3.75, + "learning_rate": 7.574655526468422e-06, + "loss": 0.0624, + "step": 80300 + }, + { + "epoch": 3.75, + "learning_rate": 7.573871741413635e-06, + "loss": 0.0325, + "step": 80305 + }, + { + "epoch": 3.75, + "learning_rate": 7.573087956358849e-06, + "loss": 0.0343, + "step": 80310 + }, + { + "epoch": 3.75, + "learning_rate": 7.572304171304062e-06, + "loss": 0.0628, + "step": 80315 + }, + { + "epoch": 3.75, + "learning_rate": 7.571520386249276e-06, + "loss": 0.0368, + "step": 80320 + }, + { + "epoch": 3.75, + "learning_rate": 7.570736601194489e-06, + "loss": 0.1064, + "step": 80325 + }, + { + "epoch": 3.75, + "learning_rate": 7.569952816139703e-06, + "loss": 0.1008, + "step": 80330 + }, + { + "epoch": 3.75, + "learning_rate": 7.569169031084916e-06, + "loss": 0.0986, + "step": 80335 + }, + { + "epoch": 3.75, + "learning_rate": 7.568385246030128e-06, + "loss": 0.1832, + "step": 80340 + }, + { + "epoch": 3.75, + "learning_rate": 7.567601460975342e-06, + "loss": 0.2612, + "step": 80345 + }, + { + "epoch": 3.75, + "learning_rate": 7.566817675920556e-06, + "loss": 0.0729, + "step": 80350 + }, + { + "epoch": 3.75, + "learning_rate": 7.566033890865769e-06, + "loss": 0.0367, + "step": 80355 + }, + { + "epoch": 3.75, + "learning_rate": 7.565250105810983e-06, + "loss": 0.0083, + "step": 80360 + }, + { + "epoch": 3.75, + "learning_rate": 7.564466320756196e-06, + "loss": 0.1255, + "step": 80365 + }, + { + "epoch": 3.75, + "learning_rate": 7.56368253570141e-06, + "loss": 0.0915, + "step": 80370 + }, + { + "epoch": 3.75, + "learning_rate": 7.562898750646623e-06, + "loss": 0.0961, + "step": 80375 + }, + { + "epoch": 3.75, + "learning_rate": 7.562114965591837e-06, + "loss": 0.0742, + "step": 80380 + }, + { + "epoch": 3.75, + "learning_rate": 7.56133118053705e-06, + "loss": 0.0985, + "step": 80385 + }, + { + "epoch": 3.75, + "learning_rate": 7.560547395482264e-06, + "loss": 0.2192, + "step": 80390 + }, + { + "epoch": 3.75, + "learning_rate": 7.559763610427477e-06, + "loss": 0.1856, + "step": 80395 + }, + { + "epoch": 3.75, + "learning_rate": 7.558979825372691e-06, + "loss": 0.0524, + "step": 80400 + }, + { + "epoch": 3.75, + "learning_rate": 7.558196040317903e-06, + "loss": 0.068, + "step": 80405 + }, + { + "epoch": 3.75, + "learning_rate": 7.557412255263117e-06, + "loss": 0.0281, + "step": 80410 + }, + { + "epoch": 3.75, + "learning_rate": 7.55662847020833e-06, + "loss": 0.0676, + "step": 80415 + }, + { + "epoch": 3.75, + "learning_rate": 7.555844685153544e-06, + "loss": 0.0235, + "step": 80420 + }, + { + "epoch": 3.75, + "learning_rate": 7.555060900098757e-06, + "loss": 0.0761, + "step": 80425 + }, + { + "epoch": 3.75, + "learning_rate": 7.554277115043971e-06, + "loss": 0.145, + "step": 80430 + }, + { + "epoch": 3.75, + "learning_rate": 7.553493329989184e-06, + "loss": 0.1257, + "step": 80435 + }, + { + "epoch": 3.75, + "learning_rate": 7.552709544934398e-06, + "loss": 0.2336, + "step": 80440 + }, + { + "epoch": 3.75, + "learning_rate": 7.551925759879611e-06, + "loss": 0.146, + "step": 80445 + }, + { + "epoch": 3.75, + "learning_rate": 7.551141974824825e-06, + "loss": 0.071, + "step": 80450 + }, + { + "epoch": 3.75, + "learning_rate": 7.550358189770038e-06, + "loss": 0.0779, + "step": 80455 + }, + { + "epoch": 3.75, + "learning_rate": 7.549574404715252e-06, + "loss": 0.0214, + "step": 80460 + }, + { + "epoch": 3.75, + "learning_rate": 7.548790619660465e-06, + "loss": 0.0261, + "step": 80465 + }, + { + "epoch": 3.75, + "learning_rate": 7.548006834605678e-06, + "loss": 0.0636, + "step": 80470 + }, + { + "epoch": 3.76, + "learning_rate": 7.547223049550891e-06, + "loss": 0.1001, + "step": 80475 + }, + { + "epoch": 3.76, + "learning_rate": 7.546439264496105e-06, + "loss": 0.0961, + "step": 80480 + }, + { + "epoch": 3.76, + "learning_rate": 7.545655479441318e-06, + "loss": 0.1074, + "step": 80485 + }, + { + "epoch": 3.76, + "learning_rate": 7.544871694386532e-06, + "loss": 0.1157, + "step": 80490 + }, + { + "epoch": 3.76, + "learning_rate": 7.544087909331745e-06, + "loss": 0.4315, + "step": 80495 + }, + { + "epoch": 3.76, + "learning_rate": 7.543304124276959e-06, + "loss": 0.0414, + "step": 80500 + }, + { + "epoch": 3.76, + "learning_rate": 7.542520339222172e-06, + "loss": 0.0545, + "step": 80505 + }, + { + "epoch": 3.76, + "learning_rate": 7.541736554167386e-06, + "loss": 0.0484, + "step": 80510 + }, + { + "epoch": 3.76, + "learning_rate": 7.540952769112599e-06, + "loss": 0.0749, + "step": 80515 + }, + { + "epoch": 3.76, + "learning_rate": 7.540168984057813e-06, + "loss": 0.0789, + "step": 80520 + }, + { + "epoch": 3.76, + "learning_rate": 7.539385199003027e-06, + "loss": 0.0878, + "step": 80525 + }, + { + "epoch": 3.76, + "learning_rate": 7.53860141394824e-06, + "loss": 0.0794, + "step": 80530 + }, + { + "epoch": 3.76, + "learning_rate": 7.537817628893452e-06, + "loss": 0.1222, + "step": 80535 + }, + { + "epoch": 3.76, + "learning_rate": 7.537033843838666e-06, + "loss": 0.1389, + "step": 80540 + }, + { + "epoch": 3.76, + "learning_rate": 7.536250058783879e-06, + "loss": 0.1715, + "step": 80545 + }, + { + "epoch": 3.76, + "learning_rate": 7.535466273729093e-06, + "loss": 0.0799, + "step": 80550 + }, + { + "epoch": 3.76, + "learning_rate": 7.534682488674306e-06, + "loss": 0.0222, + "step": 80555 + }, + { + "epoch": 3.76, + "learning_rate": 7.53389870361952e-06, + "loss": 0.0624, + "step": 80560 + }, + { + "epoch": 3.76, + "learning_rate": 7.533114918564734e-06, + "loss": 0.1226, + "step": 80565 + }, + { + "epoch": 3.76, + "learning_rate": 7.532331133509947e-06, + "loss": 0.1108, + "step": 80570 + }, + { + "epoch": 3.76, + "learning_rate": 7.531547348455161e-06, + "loss": 0.101, + "step": 80575 + }, + { + "epoch": 3.76, + "learning_rate": 7.530763563400374e-06, + "loss": 0.0392, + "step": 80580 + }, + { + "epoch": 3.76, + "learning_rate": 7.529979778345588e-06, + "loss": 0.1091, + "step": 80585 + }, + { + "epoch": 3.76, + "learning_rate": 7.529195993290801e-06, + "loss": 0.1041, + "step": 80590 + }, + { + "epoch": 3.76, + "learning_rate": 7.528412208236015e-06, + "loss": 0.2562, + "step": 80595 + }, + { + "epoch": 3.76, + "learning_rate": 7.527628423181227e-06, + "loss": 0.033, + "step": 80600 + }, + { + "epoch": 3.76, + "learning_rate": 7.52684463812644e-06, + "loss": 0.0161, + "step": 80605 + }, + { + "epoch": 3.76, + "learning_rate": 7.526060853071654e-06, + "loss": 0.0184, + "step": 80610 + }, + { + "epoch": 3.76, + "learning_rate": 7.525277068016868e-06, + "loss": 0.034, + "step": 80615 + }, + { + "epoch": 3.76, + "learning_rate": 7.524493282962081e-06, + "loss": 0.0559, + "step": 80620 + }, + { + "epoch": 3.76, + "learning_rate": 7.523709497907295e-06, + "loss": 0.0886, + "step": 80625 + }, + { + "epoch": 3.76, + "learning_rate": 7.522925712852508e-06, + "loss": 0.1356, + "step": 80630 + }, + { + "epoch": 3.76, + "learning_rate": 7.522141927797722e-06, + "loss": 0.2386, + "step": 80635 + }, + { + "epoch": 3.76, + "learning_rate": 7.521358142742935e-06, + "loss": 0.1457, + "step": 80640 + }, + { + "epoch": 3.76, + "learning_rate": 7.520574357688149e-06, + "loss": 0.2578, + "step": 80645 + }, + { + "epoch": 3.76, + "learning_rate": 7.519790572633362e-06, + "loss": 0.0684, + "step": 80650 + }, + { + "epoch": 3.76, + "learning_rate": 7.519006787578576e-06, + "loss": 0.0222, + "step": 80655 + }, + { + "epoch": 3.76, + "learning_rate": 7.518223002523789e-06, + "loss": 0.0702, + "step": 80660 + }, + { + "epoch": 3.76, + "learning_rate": 7.517439217469002e-06, + "loss": 0.0117, + "step": 80665 + }, + { + "epoch": 3.76, + "learning_rate": 7.516655432414215e-06, + "loss": 0.0342, + "step": 80670 + }, + { + "epoch": 3.76, + "learning_rate": 7.515871647359429e-06, + "loss": 0.1044, + "step": 80675 + }, + { + "epoch": 3.76, + "learning_rate": 7.515087862304642e-06, + "loss": 0.1218, + "step": 80680 + }, + { + "epoch": 3.76, + "learning_rate": 7.514304077249856e-06, + "loss": 0.1304, + "step": 80685 + }, + { + "epoch": 3.77, + "learning_rate": 7.513520292195069e-06, + "loss": 0.2197, + "step": 80690 + }, + { + "epoch": 3.77, + "learning_rate": 7.512736507140283e-06, + "loss": 0.3307, + "step": 80695 + }, + { + "epoch": 3.77, + "learning_rate": 7.511952722085496e-06, + "loss": 0.07, + "step": 80700 + }, + { + "epoch": 3.77, + "learning_rate": 7.51116893703071e-06, + "loss": 0.0504, + "step": 80705 + }, + { + "epoch": 3.77, + "learning_rate": 7.510385151975923e-06, + "loss": 0.0316, + "step": 80710 + }, + { + "epoch": 3.77, + "learning_rate": 7.509601366921137e-06, + "loss": 0.0594, + "step": 80715 + }, + { + "epoch": 3.77, + "learning_rate": 7.50881758186635e-06, + "loss": 0.1011, + "step": 80720 + }, + { + "epoch": 3.77, + "learning_rate": 7.508033796811564e-06, + "loss": 0.0862, + "step": 80725 + }, + { + "epoch": 3.77, + "learning_rate": 7.507250011756776e-06, + "loss": 0.1308, + "step": 80730 + }, + { + "epoch": 3.77, + "learning_rate": 7.50646622670199e-06, + "loss": 0.1348, + "step": 80735 + }, + { + "epoch": 3.77, + "learning_rate": 7.505682441647203e-06, + "loss": 0.2836, + "step": 80740 + }, + { + "epoch": 3.77, + "learning_rate": 7.504898656592417e-06, + "loss": 0.3387, + "step": 80745 + }, + { + "epoch": 3.77, + "learning_rate": 7.50411487153763e-06, + "loss": 0.1073, + "step": 80750 + }, + { + "epoch": 3.77, + "learning_rate": 7.503331086482844e-06, + "loss": 0.016, + "step": 80755 + }, + { + "epoch": 3.77, + "learning_rate": 7.502547301428057e-06, + "loss": 0.0336, + "step": 80760 + }, + { + "epoch": 3.77, + "learning_rate": 7.501763516373271e-06, + "loss": 0.0689, + "step": 80765 + }, + { + "epoch": 3.77, + "learning_rate": 7.500979731318484e-06, + "loss": 0.0737, + "step": 80770 + }, + { + "epoch": 3.77, + "learning_rate": 7.5001959462636976e-06, + "loss": 0.0906, + "step": 80775 + }, + { + "epoch": 3.77, + "learning_rate": 7.499412161208911e-06, + "loss": 0.1477, + "step": 80780 + }, + { + "epoch": 3.77, + "learning_rate": 7.4986283761541245e-06, + "loss": 0.1053, + "step": 80785 + }, + { + "epoch": 3.77, + "learning_rate": 7.4978445910993384e-06, + "loss": 0.1451, + "step": 80790 + }, + { + "epoch": 3.77, + "learning_rate": 7.497060806044551e-06, + "loss": 0.2624, + "step": 80795 + }, + { + "epoch": 3.77, + "learning_rate": 7.496277020989764e-06, + "loss": 0.0496, + "step": 80800 + }, + { + "epoch": 3.77, + "learning_rate": 7.495493235934978e-06, + "loss": 0.0683, + "step": 80805 + }, + { + "epoch": 3.77, + "learning_rate": 7.494709450880191e-06, + "loss": 0.0146, + "step": 80810 + }, + { + "epoch": 3.77, + "learning_rate": 7.4939256658254046e-06, + "loss": 0.0139, + "step": 80815 + }, + { + "epoch": 3.77, + "learning_rate": 7.493141880770618e-06, + "loss": 0.0735, + "step": 80820 + }, + { + "epoch": 3.77, + "learning_rate": 7.4923580957158315e-06, + "loss": 0.0898, + "step": 80825 + }, + { + "epoch": 3.77, + "learning_rate": 7.491574310661045e-06, + "loss": 0.0639, + "step": 80830 + }, + { + "epoch": 3.77, + "learning_rate": 7.4907905256062585e-06, + "loss": 0.1063, + "step": 80835 + }, + { + "epoch": 3.77, + "learning_rate": 7.490006740551472e-06, + "loss": 0.1522, + "step": 80840 + }, + { + "epoch": 3.77, + "learning_rate": 7.4892229554966855e-06, + "loss": 0.1718, + "step": 80845 + }, + { + "epoch": 3.77, + "learning_rate": 7.488439170441899e-06, + "loss": 0.1038, + "step": 80850 + }, + { + "epoch": 3.77, + "learning_rate": 7.4876553853871124e-06, + "loss": 0.0347, + "step": 80855 + }, + { + "epoch": 3.77, + "learning_rate": 7.486871600332325e-06, + "loss": 0.0486, + "step": 80860 + }, + { + "epoch": 3.77, + "learning_rate": 7.4860878152775385e-06, + "loss": 0.0466, + "step": 80865 + }, + { + "epoch": 3.77, + "learning_rate": 7.485304030222752e-06, + "loss": 0.1096, + "step": 80870 + }, + { + "epoch": 3.77, + "learning_rate": 7.4845202451679655e-06, + "loss": 0.1127, + "step": 80875 + }, + { + "epoch": 3.77, + "learning_rate": 7.483736460113179e-06, + "loss": 0.108, + "step": 80880 + }, + { + "epoch": 3.77, + "learning_rate": 7.4829526750583925e-06, + "loss": 0.1396, + "step": 80885 + }, + { + "epoch": 3.77, + "learning_rate": 7.482168890003606e-06, + "loss": 0.1624, + "step": 80890 + }, + { + "epoch": 3.77, + "learning_rate": 7.4813851049488194e-06, + "loss": 0.2717, + "step": 80895 + }, + { + "epoch": 3.77, + "learning_rate": 7.480601319894033e-06, + "loss": 0.0359, + "step": 80900 + }, + { + "epoch": 3.78, + "learning_rate": 7.479817534839246e-06, + "loss": 0.0532, + "step": 80905 + }, + { + "epoch": 3.78, + "learning_rate": 7.47903374978446e-06, + "loss": 0.0593, + "step": 80910 + }, + { + "epoch": 3.78, + "learning_rate": 7.478249964729673e-06, + "loss": 0.0459, + "step": 80915 + }, + { + "epoch": 3.78, + "learning_rate": 7.477466179674887e-06, + "loss": 0.0694, + "step": 80920 + }, + { + "epoch": 3.78, + "learning_rate": 7.4766823946200995e-06, + "loss": 0.0415, + "step": 80925 + }, + { + "epoch": 3.78, + "learning_rate": 7.475898609565313e-06, + "loss": 0.106, + "step": 80930 + }, + { + "epoch": 3.78, + "learning_rate": 7.4751148245105265e-06, + "loss": 0.1636, + "step": 80935 + }, + { + "epoch": 3.78, + "learning_rate": 7.47433103945574e-06, + "loss": 0.1601, + "step": 80940 + }, + { + "epoch": 3.78, + "learning_rate": 7.473547254400953e-06, + "loss": 0.277, + "step": 80945 + }, + { + "epoch": 3.78, + "learning_rate": 7.472763469346167e-06, + "loss": 0.065, + "step": 80950 + }, + { + "epoch": 3.78, + "learning_rate": 7.47197968429138e-06, + "loss": 0.0045, + "step": 80955 + }, + { + "epoch": 3.78, + "learning_rate": 7.471195899236594e-06, + "loss": 0.0261, + "step": 80960 + }, + { + "epoch": 3.78, + "learning_rate": 7.470412114181807e-06, + "loss": 0.0298, + "step": 80965 + }, + { + "epoch": 3.78, + "learning_rate": 7.469628329127021e-06, + "loss": 0.0836, + "step": 80970 + }, + { + "epoch": 3.78, + "learning_rate": 7.468844544072234e-06, + "loss": 0.0585, + "step": 80975 + }, + { + "epoch": 3.78, + "learning_rate": 7.468060759017448e-06, + "loss": 0.0885, + "step": 80980 + }, + { + "epoch": 3.78, + "learning_rate": 7.467276973962661e-06, + "loss": 0.1012, + "step": 80985 + }, + { + "epoch": 3.78, + "learning_rate": 7.466493188907874e-06, + "loss": 0.1778, + "step": 80990 + }, + { + "epoch": 3.78, + "learning_rate": 7.465709403853087e-06, + "loss": 0.1979, + "step": 80995 + }, + { + "epoch": 3.78, + "learning_rate": 7.464925618798301e-06, + "loss": 0.0336, + "step": 81000 + }, + { + "epoch": 3.78, + "learning_rate": 7.464141833743514e-06, + "loss": 0.0493, + "step": 81005 + }, + { + "epoch": 3.78, + "learning_rate": 7.463358048688728e-06, + "loss": 0.0297, + "step": 81010 + }, + { + "epoch": 3.78, + "learning_rate": 7.462574263633941e-06, + "loss": 0.017, + "step": 81015 + }, + { + "epoch": 3.78, + "learning_rate": 7.461790478579155e-06, + "loss": 0.0602, + "step": 81020 + }, + { + "epoch": 3.78, + "learning_rate": 7.461006693524368e-06, + "loss": 0.0804, + "step": 81025 + }, + { + "epoch": 3.78, + "learning_rate": 7.460222908469582e-06, + "loss": 0.0775, + "step": 81030 + }, + { + "epoch": 3.78, + "learning_rate": 7.459439123414795e-06, + "loss": 0.2335, + "step": 81035 + }, + { + "epoch": 3.78, + "learning_rate": 7.458655338360009e-06, + "loss": 0.1755, + "step": 81040 + }, + { + "epoch": 3.78, + "learning_rate": 7.457871553305222e-06, + "loss": 0.2894, + "step": 81045 + }, + { + "epoch": 3.78, + "learning_rate": 7.457087768250436e-06, + "loss": 0.0208, + "step": 81050 + }, + { + "epoch": 3.78, + "learning_rate": 7.456303983195648e-06, + "loss": 0.0958, + "step": 81055 + }, + { + "epoch": 3.78, + "learning_rate": 7.455520198140862e-06, + "loss": 0.0727, + "step": 81060 + }, + { + "epoch": 3.78, + "learning_rate": 7.454736413086075e-06, + "loss": 0.0566, + "step": 81065 + }, + { + "epoch": 3.78, + "learning_rate": 7.453952628031289e-06, + "loss": 0.0311, + "step": 81070 + }, + { + "epoch": 3.78, + "learning_rate": 7.453168842976502e-06, + "loss": 0.1151, + "step": 81075 + }, + { + "epoch": 3.78, + "learning_rate": 7.452385057921716e-06, + "loss": 0.1571, + "step": 81080 + }, + { + "epoch": 3.78, + "learning_rate": 7.451601272866929e-06, + "loss": 0.1032, + "step": 81085 + }, + { + "epoch": 3.78, + "learning_rate": 7.450817487812143e-06, + "loss": 0.1239, + "step": 81090 + }, + { + "epoch": 3.78, + "learning_rate": 7.450033702757356e-06, + "loss": 0.3272, + "step": 81095 + }, + { + "epoch": 3.78, + "learning_rate": 7.44924991770257e-06, + "loss": 0.088, + "step": 81100 + }, + { + "epoch": 3.78, + "learning_rate": 7.448466132647784e-06, + "loss": 0.0075, + "step": 81105 + }, + { + "epoch": 3.78, + "learning_rate": 7.447682347592997e-06, + "loss": 0.0353, + "step": 81110 + }, + { + "epoch": 3.78, + "learning_rate": 7.446898562538211e-06, + "loss": 0.0448, + "step": 81115 + }, + { + "epoch": 3.79, + "learning_rate": 7.446114777483423e-06, + "loss": 0.0518, + "step": 81120 + }, + { + "epoch": 3.79, + "learning_rate": 7.445330992428636e-06, + "loss": 0.1159, + "step": 81125 + }, + { + "epoch": 3.79, + "learning_rate": 7.44454720737385e-06, + "loss": 0.0917, + "step": 81130 + }, + { + "epoch": 3.79, + "learning_rate": 7.443763422319063e-06, + "loss": 0.1232, + "step": 81135 + }, + { + "epoch": 3.79, + "learning_rate": 7.442979637264277e-06, + "loss": 0.2608, + "step": 81140 + }, + { + "epoch": 3.79, + "learning_rate": 7.442195852209491e-06, + "loss": 0.2487, + "step": 81145 + }, + { + "epoch": 3.79, + "learning_rate": 7.441412067154704e-06, + "loss": 0.0297, + "step": 81150 + }, + { + "epoch": 3.79, + "learning_rate": 7.440628282099918e-06, + "loss": 0.0077, + "step": 81155 + }, + { + "epoch": 3.79, + "learning_rate": 7.439844497045131e-06, + "loss": 0.0524, + "step": 81160 + }, + { + "epoch": 3.79, + "learning_rate": 7.439060711990345e-06, + "loss": 0.0355, + "step": 81165 + }, + { + "epoch": 3.79, + "learning_rate": 7.438276926935558e-06, + "loss": 0.0954, + "step": 81170 + }, + { + "epoch": 3.79, + "learning_rate": 7.437493141880772e-06, + "loss": 0.1215, + "step": 81175 + }, + { + "epoch": 3.79, + "learning_rate": 7.436709356825985e-06, + "loss": 0.1101, + "step": 81180 + }, + { + "epoch": 3.79, + "learning_rate": 7.435925571771197e-06, + "loss": 0.1134, + "step": 81185 + }, + { + "epoch": 3.79, + "learning_rate": 7.435141786716411e-06, + "loss": 0.1016, + "step": 81190 + }, + { + "epoch": 3.79, + "learning_rate": 7.434358001661625e-06, + "loss": 0.3215, + "step": 81195 + }, + { + "epoch": 3.79, + "learning_rate": 7.433574216606838e-06, + "loss": 0.0755, + "step": 81200 + }, + { + "epoch": 3.79, + "learning_rate": 7.432790431552052e-06, + "loss": 0.0231, + "step": 81205 + }, + { + "epoch": 3.79, + "learning_rate": 7.432006646497265e-06, + "loss": 0.0457, + "step": 81210 + }, + { + "epoch": 3.79, + "learning_rate": 7.431222861442479e-06, + "loss": 0.0341, + "step": 81215 + }, + { + "epoch": 3.79, + "learning_rate": 7.430439076387692e-06, + "loss": 0.1064, + "step": 81220 + }, + { + "epoch": 3.79, + "learning_rate": 7.429655291332906e-06, + "loss": 0.105, + "step": 81225 + }, + { + "epoch": 3.79, + "learning_rate": 7.428871506278119e-06, + "loss": 0.0941, + "step": 81230 + }, + { + "epoch": 3.79, + "learning_rate": 7.428087721223333e-06, + "loss": 0.0624, + "step": 81235 + }, + { + "epoch": 3.79, + "learning_rate": 7.427303936168546e-06, + "loss": 0.2318, + "step": 81240 + }, + { + "epoch": 3.79, + "learning_rate": 7.42652015111376e-06, + "loss": 0.2825, + "step": 81245 + }, + { + "epoch": 3.79, + "learning_rate": 7.425736366058972e-06, + "loss": 0.0709, + "step": 81250 + }, + { + "epoch": 3.79, + "learning_rate": 7.424952581004186e-06, + "loss": 0.0251, + "step": 81255 + }, + { + "epoch": 3.79, + "learning_rate": 7.424168795949399e-06, + "loss": 0.0932, + "step": 81260 + }, + { + "epoch": 3.79, + "learning_rate": 7.423385010894613e-06, + "loss": 0.0567, + "step": 81265 + }, + { + "epoch": 3.79, + "learning_rate": 7.422601225839826e-06, + "loss": 0.0956, + "step": 81270 + }, + { + "epoch": 3.79, + "learning_rate": 7.42181744078504e-06, + "loss": 0.0827, + "step": 81275 + }, + { + "epoch": 3.79, + "learning_rate": 7.421033655730253e-06, + "loss": 0.1244, + "step": 81280 + }, + { + "epoch": 3.79, + "learning_rate": 7.420249870675467e-06, + "loss": 0.049, + "step": 81285 + }, + { + "epoch": 3.79, + "learning_rate": 7.41946608562068e-06, + "loss": 0.1748, + "step": 81290 + }, + { + "epoch": 3.79, + "learning_rate": 7.418682300565894e-06, + "loss": 0.2296, + "step": 81295 + }, + { + "epoch": 3.79, + "learning_rate": 7.417898515511107e-06, + "loss": 0.0165, + "step": 81300 + }, + { + "epoch": 3.79, + "learning_rate": 7.417114730456321e-06, + "loss": 0.0275, + "step": 81305 + }, + { + "epoch": 3.79, + "learning_rate": 7.416330945401534e-06, + "loss": 0.0393, + "step": 81310 + }, + { + "epoch": 3.79, + "learning_rate": 7.415547160346747e-06, + "loss": 0.0505, + "step": 81315 + }, + { + "epoch": 3.79, + "learning_rate": 7.41476337529196e-06, + "loss": 0.148, + "step": 81320 + }, + { + "epoch": 3.79, + "learning_rate": 7.413979590237174e-06, + "loss": 0.0627, + "step": 81325 + }, + { + "epoch": 3.79, + "learning_rate": 7.413195805182387e-06, + "loss": 0.111, + "step": 81330 + }, + { + "epoch": 3.8, + "learning_rate": 7.412412020127601e-06, + "loss": 0.1179, + "step": 81335 + }, + { + "epoch": 3.8, + "learning_rate": 7.411628235072814e-06, + "loss": 0.2385, + "step": 81340 + }, + { + "epoch": 3.8, + "learning_rate": 7.410844450018028e-06, + "loss": 0.2367, + "step": 81345 + }, + { + "epoch": 3.8, + "learning_rate": 7.410060664963241e-06, + "loss": 0.0705, + "step": 81350 + }, + { + "epoch": 3.8, + "learning_rate": 7.409276879908455e-06, + "loss": 0.0118, + "step": 81355 + }, + { + "epoch": 3.8, + "learning_rate": 7.408493094853668e-06, + "loss": 0.0494, + "step": 81360 + }, + { + "epoch": 3.8, + "learning_rate": 7.407709309798882e-06, + "loss": 0.0659, + "step": 81365 + }, + { + "epoch": 3.8, + "learning_rate": 7.406925524744096e-06, + "loss": 0.0815, + "step": 81370 + }, + { + "epoch": 3.8, + "learning_rate": 7.406141739689309e-06, + "loss": 0.1722, + "step": 81375 + }, + { + "epoch": 3.8, + "learning_rate": 7.405357954634521e-06, + "loss": 0.0848, + "step": 81380 + }, + { + "epoch": 3.8, + "learning_rate": 7.404574169579735e-06, + "loss": 0.1096, + "step": 81385 + }, + { + "epoch": 3.8, + "learning_rate": 7.403790384524948e-06, + "loss": 0.2585, + "step": 81390 + }, + { + "epoch": 3.8, + "learning_rate": 7.403006599470162e-06, + "loss": 0.3918, + "step": 81395 + }, + { + "epoch": 3.8, + "learning_rate": 7.402222814415375e-06, + "loss": 0.0702, + "step": 81400 + }, + { + "epoch": 3.8, + "learning_rate": 7.401439029360589e-06, + "loss": 0.016, + "step": 81405 + }, + { + "epoch": 3.8, + "learning_rate": 7.400655244305802e-06, + "loss": 0.0741, + "step": 81410 + }, + { + "epoch": 3.8, + "learning_rate": 7.399871459251016e-06, + "loss": 0.0389, + "step": 81415 + }, + { + "epoch": 3.8, + "learning_rate": 7.3990876741962296e-06, + "loss": 0.0683, + "step": 81420 + }, + { + "epoch": 3.8, + "learning_rate": 7.398303889141443e-06, + "loss": 0.1226, + "step": 81425 + }, + { + "epoch": 3.8, + "learning_rate": 7.3975201040866565e-06, + "loss": 0.1473, + "step": 81430 + }, + { + "epoch": 3.8, + "learning_rate": 7.39673631903187e-06, + "loss": 0.1458, + "step": 81435 + }, + { + "epoch": 3.8, + "learning_rate": 7.3959525339770835e-06, + "loss": 0.1623, + "step": 81440 + }, + { + "epoch": 3.8, + "learning_rate": 7.395168748922296e-06, + "loss": 0.2272, + "step": 81445 + }, + { + "epoch": 3.8, + "learning_rate": 7.394384963867509e-06, + "loss": 0.0278, + "step": 81450 + }, + { + "epoch": 3.8, + "learning_rate": 7.393601178812723e-06, + "loss": 0.026, + "step": 81455 + }, + { + "epoch": 3.8, + "learning_rate": 7.392817393757937e-06, + "loss": 0.0562, + "step": 81460 + }, + { + "epoch": 3.8, + "learning_rate": 7.39203360870315e-06, + "loss": 0.0508, + "step": 81465 + }, + { + "epoch": 3.8, + "learning_rate": 7.3912498236483635e-06, + "loss": 0.067, + "step": 81470 + }, + { + "epoch": 3.8, + "learning_rate": 7.390466038593577e-06, + "loss": 0.1139, + "step": 81475 + }, + { + "epoch": 3.8, + "learning_rate": 7.3896822535387905e-06, + "loss": 0.0491, + "step": 81480 + }, + { + "epoch": 3.8, + "learning_rate": 7.3888984684840036e-06, + "loss": 0.1286, + "step": 81485 + }, + { + "epoch": 3.8, + "learning_rate": 7.3881146834292175e-06, + "loss": 0.2884, + "step": 81490 + }, + { + "epoch": 3.8, + "learning_rate": 7.3873308983744305e-06, + "loss": 0.1804, + "step": 81495 + }, + { + "epoch": 3.8, + "learning_rate": 7.3865471133196444e-06, + "loss": 0.0247, + "step": 81500 + }, + { + "epoch": 3.8, + "learning_rate": 7.3857633282648575e-06, + "loss": 0.0351, + "step": 81505 + }, + { + "epoch": 3.8, + "learning_rate": 7.3849795432100706e-06, + "loss": 0.0495, + "step": 81510 + }, + { + "epoch": 3.8, + "learning_rate": 7.384195758155284e-06, + "loss": 0.0383, + "step": 81515 + }, + { + "epoch": 3.8, + "learning_rate": 7.3834119731004975e-06, + "loss": 0.0481, + "step": 81520 + }, + { + "epoch": 3.8, + "learning_rate": 7.382628188045711e-06, + "loss": 0.096, + "step": 81525 + }, + { + "epoch": 3.8, + "learning_rate": 7.3818444029909245e-06, + "loss": 0.0954, + "step": 81530 + }, + { + "epoch": 3.8, + "learning_rate": 7.3810606179361375e-06, + "loss": 0.1689, + "step": 81535 + }, + { + "epoch": 3.8, + "learning_rate": 7.3802768328813515e-06, + "loss": 0.1517, + "step": 81540 + }, + { + "epoch": 3.81, + "learning_rate": 7.3794930478265645e-06, + "loss": 0.2761, + "step": 81545 + }, + { + "epoch": 3.81, + "learning_rate": 7.378709262771778e-06, + "loss": 0.0598, + "step": 81550 + }, + { + "epoch": 3.81, + "learning_rate": 7.3779254777169915e-06, + "loss": 0.035, + "step": 81555 + }, + { + "epoch": 3.81, + "learning_rate": 7.377141692662205e-06, + "loss": 0.0392, + "step": 81560 + }, + { + "epoch": 3.81, + "learning_rate": 7.3763579076074184e-06, + "loss": 0.0373, + "step": 81565 + }, + { + "epoch": 3.81, + "learning_rate": 7.375574122552632e-06, + "loss": 0.0863, + "step": 81570 + }, + { + "epoch": 3.81, + "learning_rate": 7.3747903374978446e-06, + "loss": 0.0891, + "step": 81575 + }, + { + "epoch": 3.81, + "learning_rate": 7.3740065524430585e-06, + "loss": 0.22, + "step": 81580 + }, + { + "epoch": 3.81, + "learning_rate": 7.3732227673882715e-06, + "loss": 0.1706, + "step": 81585 + }, + { + "epoch": 3.81, + "learning_rate": 7.3724389823334854e-06, + "loss": 0.1848, + "step": 81590 + }, + { + "epoch": 3.81, + "learning_rate": 7.3716551972786985e-06, + "loss": 0.2883, + "step": 81595 + }, + { + "epoch": 3.81, + "learning_rate": 7.370871412223912e-06, + "loss": 0.0452, + "step": 81600 + }, + { + "epoch": 3.81, + "learning_rate": 7.3700876271691255e-06, + "loss": 0.0269, + "step": 81605 + }, + { + "epoch": 3.81, + "learning_rate": 7.369303842114339e-06, + "loss": 0.0488, + "step": 81610 + }, + { + "epoch": 3.81, + "learning_rate": 7.368520057059552e-06, + "loss": 0.1098, + "step": 81615 + }, + { + "epoch": 3.81, + "learning_rate": 7.367736272004766e-06, + "loss": 0.044, + "step": 81620 + }, + { + "epoch": 3.81, + "learning_rate": 7.366952486949979e-06, + "loss": 0.0823, + "step": 81625 + }, + { + "epoch": 3.81, + "learning_rate": 7.366168701895193e-06, + "loss": 0.102, + "step": 81630 + }, + { + "epoch": 3.81, + "learning_rate": 7.365384916840407e-06, + "loss": 0.0871, + "step": 81635 + }, + { + "epoch": 3.81, + "learning_rate": 7.364601131785619e-06, + "loss": 0.1703, + "step": 81640 + }, + { + "epoch": 3.81, + "learning_rate": 7.3638173467308325e-06, + "loss": 0.2861, + "step": 81645 + }, + { + "epoch": 3.81, + "learning_rate": 7.363033561676046e-06, + "loss": 0.0518, + "step": 81650 + }, + { + "epoch": 3.81, + "learning_rate": 7.3622497766212594e-06, + "loss": 0.0568, + "step": 81655 + }, + { + "epoch": 3.81, + "learning_rate": 7.361465991566473e-06, + "loss": 0.0474, + "step": 81660 + }, + { + "epoch": 3.81, + "learning_rate": 7.360682206511686e-06, + "loss": 0.0214, + "step": 81665 + }, + { + "epoch": 3.81, + "learning_rate": 7.3598984214569e-06, + "loss": 0.0658, + "step": 81670 + }, + { + "epoch": 3.81, + "learning_rate": 7.359114636402113e-06, + "loss": 0.0758, + "step": 81675 + }, + { + "epoch": 3.81, + "learning_rate": 7.358330851347327e-06, + "loss": 0.0836, + "step": 81680 + }, + { + "epoch": 3.81, + "learning_rate": 7.357547066292541e-06, + "loss": 0.0765, + "step": 81685 + }, + { + "epoch": 3.81, + "learning_rate": 7.356763281237754e-06, + "loss": 0.1627, + "step": 81690 + }, + { + "epoch": 3.81, + "learning_rate": 7.355979496182968e-06, + "loss": 0.3071, + "step": 81695 + }, + { + "epoch": 3.81, + "learning_rate": 7.355195711128181e-06, + "loss": 0.1123, + "step": 81700 + }, + { + "epoch": 3.81, + "learning_rate": 7.354411926073393e-06, + "loss": 0.0162, + "step": 81705 + }, + { + "epoch": 3.81, + "learning_rate": 7.353628141018607e-06, + "loss": 0.0456, + "step": 81710 + }, + { + "epoch": 3.81, + "learning_rate": 7.35284435596382e-06, + "loss": 0.052, + "step": 81715 + }, + { + "epoch": 3.81, + "learning_rate": 7.352060570909034e-06, + "loss": 0.0999, + "step": 81720 + }, + { + "epoch": 3.81, + "learning_rate": 7.351276785854248e-06, + "loss": 0.1386, + "step": 81725 + }, + { + "epoch": 3.81, + "learning_rate": 7.350493000799461e-06, + "loss": 0.0898, + "step": 81730 + }, + { + "epoch": 3.81, + "learning_rate": 7.349709215744675e-06, + "loss": 0.1664, + "step": 81735 + }, + { + "epoch": 3.81, + "learning_rate": 7.348925430689888e-06, + "loss": 0.2276, + "step": 81740 + }, + { + "epoch": 3.81, + "learning_rate": 7.348141645635102e-06, + "loss": 0.2442, + "step": 81745 + }, + { + "epoch": 3.81, + "learning_rate": 7.347357860580315e-06, + "loss": 0.0724, + "step": 81750 + }, + { + "epoch": 3.81, + "learning_rate": 7.346574075525529e-06, + "loss": 0.0132, + "step": 81755 + }, + { + "epoch": 3.82, + "learning_rate": 7.345790290470742e-06, + "loss": 0.0106, + "step": 81760 + }, + { + "epoch": 3.82, + "learning_rate": 7.345006505415956e-06, + "loss": 0.0188, + "step": 81765 + }, + { + "epoch": 3.82, + "learning_rate": 7.344222720361168e-06, + "loss": 0.0648, + "step": 81770 + }, + { + "epoch": 3.82, + "learning_rate": 7.343438935306382e-06, + "loss": 0.1224, + "step": 81775 + }, + { + "epoch": 3.82, + "learning_rate": 7.342655150251595e-06, + "loss": 0.0454, + "step": 81780 + }, + { + "epoch": 3.82, + "learning_rate": 7.341871365196809e-06, + "loss": 0.1007, + "step": 81785 + }, + { + "epoch": 3.82, + "learning_rate": 7.341087580142022e-06, + "loss": 0.227, + "step": 81790 + }, + { + "epoch": 3.82, + "learning_rate": 7.340303795087236e-06, + "loss": 0.206, + "step": 81795 + }, + { + "epoch": 3.82, + "learning_rate": 7.339520010032449e-06, + "loss": 0.0636, + "step": 81800 + }, + { + "epoch": 3.82, + "learning_rate": 7.338736224977663e-06, + "loss": 0.0383, + "step": 81805 + }, + { + "epoch": 3.82, + "learning_rate": 7.337952439922876e-06, + "loss": 0.0408, + "step": 81810 + }, + { + "epoch": 3.82, + "learning_rate": 7.33716865486809e-06, + "loss": 0.0514, + "step": 81815 + }, + { + "epoch": 3.82, + "learning_rate": 7.336384869813303e-06, + "loss": 0.0681, + "step": 81820 + }, + { + "epoch": 3.82, + "learning_rate": 7.335601084758517e-06, + "loss": 0.0524, + "step": 81825 + }, + { + "epoch": 3.82, + "learning_rate": 7.33481729970373e-06, + "loss": 0.0948, + "step": 81830 + }, + { + "epoch": 3.82, + "learning_rate": 7.334033514648943e-06, + "loss": 0.1006, + "step": 81835 + }, + { + "epoch": 3.82, + "learning_rate": 7.333249729594156e-06, + "loss": 0.1285, + "step": 81840 + }, + { + "epoch": 3.82, + "learning_rate": 7.33246594453937e-06, + "loss": 0.2243, + "step": 81845 + }, + { + "epoch": 3.82, + "learning_rate": 7.331682159484583e-06, + "loss": 0.0631, + "step": 81850 + }, + { + "epoch": 3.82, + "learning_rate": 7.330898374429797e-06, + "loss": 0.0126, + "step": 81855 + }, + { + "epoch": 3.82, + "learning_rate": 7.33011458937501e-06, + "loss": 0.033, + "step": 81860 + }, + { + "epoch": 3.82, + "learning_rate": 7.329330804320224e-06, + "loss": 0.0864, + "step": 81865 + }, + { + "epoch": 3.82, + "learning_rate": 7.328547019265437e-06, + "loss": 0.0507, + "step": 81870 + }, + { + "epoch": 3.82, + "learning_rate": 7.327763234210651e-06, + "loss": 0.0965, + "step": 81875 + }, + { + "epoch": 3.82, + "learning_rate": 7.326979449155864e-06, + "loss": 0.1274, + "step": 81880 + }, + { + "epoch": 3.82, + "learning_rate": 7.326195664101078e-06, + "loss": 0.1476, + "step": 81885 + }, + { + "epoch": 3.82, + "learning_rate": 7.325411879046291e-06, + "loss": 0.239, + "step": 81890 + }, + { + "epoch": 3.82, + "learning_rate": 7.324628093991505e-06, + "loss": 0.2824, + "step": 81895 + }, + { + "epoch": 3.82, + "learning_rate": 7.323844308936717e-06, + "loss": 0.046, + "step": 81900 + }, + { + "epoch": 3.82, + "learning_rate": 7.323060523881931e-06, + "loss": 0.0408, + "step": 81905 + }, + { + "epoch": 3.82, + "learning_rate": 7.322276738827144e-06, + "loss": 0.0289, + "step": 81910 + }, + { + "epoch": 3.82, + "learning_rate": 7.321492953772358e-06, + "loss": 0.0431, + "step": 81915 + }, + { + "epoch": 3.82, + "learning_rate": 7.320709168717571e-06, + "loss": 0.025, + "step": 81920 + }, + { + "epoch": 3.82, + "learning_rate": 7.319925383662785e-06, + "loss": 0.0459, + "step": 81925 + }, + { + "epoch": 3.82, + "learning_rate": 7.319141598607998e-06, + "loss": 0.1007, + "step": 81930 + }, + { + "epoch": 3.82, + "learning_rate": 7.318357813553212e-06, + "loss": 0.1685, + "step": 81935 + }, + { + "epoch": 3.82, + "learning_rate": 7.317574028498425e-06, + "loss": 0.2215, + "step": 81940 + }, + { + "epoch": 3.82, + "learning_rate": 7.316790243443639e-06, + "loss": 0.2574, + "step": 81945 + }, + { + "epoch": 3.82, + "learning_rate": 7.316006458388853e-06, + "loss": 0.0614, + "step": 81950 + }, + { + "epoch": 3.82, + "learning_rate": 7.315222673334066e-06, + "loss": 0.0751, + "step": 81955 + }, + { + "epoch": 3.82, + "learning_rate": 7.31443888827928e-06, + "loss": 0.0696, + "step": 81960 + }, + { + "epoch": 3.82, + "learning_rate": 7.313655103224492e-06, + "loss": 0.0804, + "step": 81965 + }, + { + "epoch": 3.82, + "learning_rate": 7.312871318169705e-06, + "loss": 0.0579, + "step": 81970 + }, + { + "epoch": 3.83, + "learning_rate": 7.312087533114919e-06, + "loss": 0.064, + "step": 81975 + }, + { + "epoch": 3.83, + "learning_rate": 7.311303748060132e-06, + "loss": 0.0764, + "step": 81980 + }, + { + "epoch": 3.83, + "learning_rate": 7.310519963005346e-06, + "loss": 0.1372, + "step": 81985 + }, + { + "epoch": 3.83, + "learning_rate": 7.309736177950559e-06, + "loss": 0.2091, + "step": 81990 + }, + { + "epoch": 3.83, + "learning_rate": 7.308952392895773e-06, + "loss": 0.2188, + "step": 81995 + }, + { + "epoch": 3.83, + "learning_rate": 7.308168607840987e-06, + "loss": 0.0708, + "step": 82000 + }, + { + "epoch": 3.83, + "learning_rate": 7.3073848227862e-06, + "loss": 0.0232, + "step": 82005 + }, + { + "epoch": 3.83, + "learning_rate": 7.306601037731414e-06, + "loss": 0.0316, + "step": 82010 + }, + { + "epoch": 3.83, + "learning_rate": 7.305817252676627e-06, + "loss": 0.0794, + "step": 82015 + }, + { + "epoch": 3.83, + "learning_rate": 7.305033467621841e-06, + "loss": 0.0471, + "step": 82020 + }, + { + "epoch": 3.83, + "learning_rate": 7.304249682567054e-06, + "loss": 0.1317, + "step": 82025 + }, + { + "epoch": 3.83, + "learning_rate": 7.303465897512266e-06, + "loss": 0.1332, + "step": 82030 + }, + { + "epoch": 3.83, + "learning_rate": 7.30268211245748e-06, + "loss": 0.0663, + "step": 82035 + }, + { + "epoch": 3.83, + "learning_rate": 7.301898327402694e-06, + "loss": 0.1334, + "step": 82040 + }, + { + "epoch": 3.83, + "learning_rate": 7.301114542347907e-06, + "loss": 0.2608, + "step": 82045 + }, + { + "epoch": 3.83, + "learning_rate": 7.300330757293121e-06, + "loss": 0.1023, + "step": 82050 + }, + { + "epoch": 3.83, + "learning_rate": 7.299546972238334e-06, + "loss": 0.042, + "step": 82055 + }, + { + "epoch": 3.83, + "learning_rate": 7.298763187183548e-06, + "loss": 0.0138, + "step": 82060 + }, + { + "epoch": 3.83, + "learning_rate": 7.297979402128761e-06, + "loss": 0.0702, + "step": 82065 + }, + { + "epoch": 3.83, + "learning_rate": 7.297195617073975e-06, + "loss": 0.1097, + "step": 82070 + }, + { + "epoch": 3.83, + "learning_rate": 7.296411832019188e-06, + "loss": 0.0231, + "step": 82075 + }, + { + "epoch": 3.83, + "learning_rate": 7.295628046964402e-06, + "loss": 0.0528, + "step": 82080 + }, + { + "epoch": 3.83, + "learning_rate": 7.294844261909615e-06, + "loss": 0.0923, + "step": 82085 + }, + { + "epoch": 3.83, + "learning_rate": 7.2940604768548286e-06, + "loss": 0.153, + "step": 82090 + }, + { + "epoch": 3.83, + "learning_rate": 7.293276691800041e-06, + "loss": 0.2262, + "step": 82095 + }, + { + "epoch": 3.83, + "learning_rate": 7.292492906745255e-06, + "loss": 0.0514, + "step": 82100 + }, + { + "epoch": 3.83, + "learning_rate": 7.291709121690468e-06, + "loss": 0.0909, + "step": 82105 + }, + { + "epoch": 3.83, + "learning_rate": 7.290925336635682e-06, + "loss": 0.0377, + "step": 82110 + }, + { + "epoch": 3.83, + "learning_rate": 7.290141551580895e-06, + "loss": 0.0834, + "step": 82115 + }, + { + "epoch": 3.83, + "learning_rate": 7.289357766526109e-06, + "loss": 0.0565, + "step": 82120 + }, + { + "epoch": 3.83, + "learning_rate": 7.288573981471322e-06, + "loss": 0.1259, + "step": 82125 + }, + { + "epoch": 3.83, + "learning_rate": 7.287790196416536e-06, + "loss": 0.1021, + "step": 82130 + }, + { + "epoch": 3.83, + "learning_rate": 7.287006411361749e-06, + "loss": 0.1543, + "step": 82135 + }, + { + "epoch": 3.83, + "learning_rate": 7.2862226263069625e-06, + "loss": 0.1117, + "step": 82140 + }, + { + "epoch": 3.83, + "learning_rate": 7.285438841252176e-06, + "loss": 0.2498, + "step": 82145 + }, + { + "epoch": 3.83, + "learning_rate": 7.2846550561973895e-06, + "loss": 0.074, + "step": 82150 + }, + { + "epoch": 3.83, + "learning_rate": 7.2838712711426026e-06, + "loss": 0.014, + "step": 82155 + }, + { + "epoch": 3.83, + "learning_rate": 7.283087486087816e-06, + "loss": 0.0593, + "step": 82160 + }, + { + "epoch": 3.83, + "learning_rate": 7.282303701033029e-06, + "loss": 0.0556, + "step": 82165 + }, + { + "epoch": 3.83, + "learning_rate": 7.281519915978243e-06, + "loss": 0.1292, + "step": 82170 + }, + { + "epoch": 3.83, + "learning_rate": 7.280736130923456e-06, + "loss": 0.1866, + "step": 82175 + }, + { + "epoch": 3.83, + "learning_rate": 7.2799523458686696e-06, + "loss": 0.1015, + "step": 82180 + }, + { + "epoch": 3.83, + "learning_rate": 7.279168560813883e-06, + "loss": 0.1122, + "step": 82185 + }, + { + "epoch": 3.84, + "learning_rate": 7.2783847757590965e-06, + "loss": 0.2874, + "step": 82190 + }, + { + "epoch": 3.84, + "learning_rate": 7.27760099070431e-06, + "loss": 0.2783, + "step": 82195 + }, + { + "epoch": 3.84, + "learning_rate": 7.2768172056495235e-06, + "loss": 0.0762, + "step": 82200 + }, + { + "epoch": 3.84, + "learning_rate": 7.2760334205947365e-06, + "loss": 0.0206, + "step": 82205 + }, + { + "epoch": 3.84, + "learning_rate": 7.2752496355399504e-06, + "loss": 0.0353, + "step": 82210 + }, + { + "epoch": 3.84, + "learning_rate": 7.274465850485164e-06, + "loss": 0.0914, + "step": 82215 + }, + { + "epoch": 3.84, + "learning_rate": 7.273682065430377e-06, + "loss": 0.0593, + "step": 82220 + }, + { + "epoch": 3.84, + "learning_rate": 7.27289828037559e-06, + "loss": 0.0548, + "step": 82225 + }, + { + "epoch": 3.84, + "learning_rate": 7.2721144953208035e-06, + "loss": 0.1621, + "step": 82230 + }, + { + "epoch": 3.84, + "learning_rate": 7.271330710266017e-06, + "loss": 0.1095, + "step": 82235 + }, + { + "epoch": 3.84, + "learning_rate": 7.2705469252112305e-06, + "loss": 0.1579, + "step": 82240 + }, + { + "epoch": 3.84, + "learning_rate": 7.2697631401564436e-06, + "loss": 0.3142, + "step": 82245 + }, + { + "epoch": 3.84, + "learning_rate": 7.2689793551016575e-06, + "loss": 0.0763, + "step": 82250 + }, + { + "epoch": 3.84, + "learning_rate": 7.2681955700468705e-06, + "loss": 0.0297, + "step": 82255 + }, + { + "epoch": 3.84, + "learning_rate": 7.2674117849920844e-06, + "loss": 0.051, + "step": 82260 + }, + { + "epoch": 3.84, + "learning_rate": 7.266627999937298e-06, + "loss": 0.0488, + "step": 82265 + }, + { + "epoch": 3.84, + "learning_rate": 7.265844214882511e-06, + "loss": 0.069, + "step": 82270 + }, + { + "epoch": 3.84, + "learning_rate": 7.265060429827725e-06, + "loss": 0.0763, + "step": 82275 + }, + { + "epoch": 3.84, + "learning_rate": 7.264276644772938e-06, + "loss": 0.0423, + "step": 82280 + }, + { + "epoch": 3.84, + "learning_rate": 7.263492859718152e-06, + "loss": 0.0885, + "step": 82285 + }, + { + "epoch": 3.84, + "learning_rate": 7.2627090746633645e-06, + "loss": 0.1617, + "step": 82290 + }, + { + "epoch": 3.84, + "learning_rate": 7.2619252896085775e-06, + "loss": 0.3108, + "step": 82295 + }, + { + "epoch": 3.84, + "learning_rate": 7.2611415045537914e-06, + "loss": 0.0586, + "step": 82300 + }, + { + "epoch": 3.84, + "learning_rate": 7.260357719499005e-06, + "loss": 0.0536, + "step": 82305 + }, + { + "epoch": 3.84, + "learning_rate": 7.259573934444218e-06, + "loss": 0.0501, + "step": 82310 + }, + { + "epoch": 3.84, + "learning_rate": 7.258790149389432e-06, + "loss": 0.0268, + "step": 82315 + }, + { + "epoch": 3.84, + "learning_rate": 7.258006364334645e-06, + "loss": 0.1857, + "step": 82320 + }, + { + "epoch": 3.84, + "learning_rate": 7.257222579279859e-06, + "loss": 0.0637, + "step": 82325 + }, + { + "epoch": 3.84, + "learning_rate": 7.256438794225072e-06, + "loss": 0.1362, + "step": 82330 + }, + { + "epoch": 3.84, + "learning_rate": 7.255655009170286e-06, + "loss": 0.1791, + "step": 82335 + }, + { + "epoch": 3.84, + "learning_rate": 7.254871224115499e-06, + "loss": 0.1739, + "step": 82340 + }, + { + "epoch": 3.84, + "learning_rate": 7.254087439060713e-06, + "loss": 0.222, + "step": 82345 + }, + { + "epoch": 3.84, + "learning_rate": 7.253303654005926e-06, + "loss": 0.0556, + "step": 82350 + }, + { + "epoch": 3.84, + "learning_rate": 7.252519868951139e-06, + "loss": 0.0175, + "step": 82355 + }, + { + "epoch": 3.84, + "learning_rate": 7.251736083896352e-06, + "loss": 0.0208, + "step": 82360 + }, + { + "epoch": 3.84, + "learning_rate": 7.250952298841566e-06, + "loss": 0.0106, + "step": 82365 + }, + { + "epoch": 3.84, + "learning_rate": 7.250168513786779e-06, + "loss": 0.0689, + "step": 82370 + }, + { + "epoch": 3.84, + "learning_rate": 7.249384728731993e-06, + "loss": 0.0633, + "step": 82375 + }, + { + "epoch": 3.84, + "learning_rate": 7.248600943677206e-06, + "loss": 0.0498, + "step": 82380 + }, + { + "epoch": 3.84, + "learning_rate": 7.24781715862242e-06, + "loss": 0.1595, + "step": 82385 + }, + { + "epoch": 3.84, + "learning_rate": 7.247033373567633e-06, + "loss": 0.1389, + "step": 82390 + }, + { + "epoch": 3.84, + "learning_rate": 7.246249588512847e-06, + "loss": 0.2956, + "step": 82395 + }, + { + "epoch": 3.84, + "learning_rate": 7.24546580345806e-06, + "loss": 0.0601, + "step": 82400 + }, + { + "epoch": 3.85, + "learning_rate": 7.244682018403274e-06, + "loss": 0.0084, + "step": 82405 + }, + { + "epoch": 3.85, + "learning_rate": 7.243898233348487e-06, + "loss": 0.0508, + "step": 82410 + }, + { + "epoch": 3.85, + "learning_rate": 7.243114448293701e-06, + "loss": 0.0388, + "step": 82415 + }, + { + "epoch": 3.85, + "learning_rate": 7.242330663238913e-06, + "loss": 0.0569, + "step": 82420 + }, + { + "epoch": 3.85, + "learning_rate": 7.241546878184127e-06, + "loss": 0.0943, + "step": 82425 + }, + { + "epoch": 3.85, + "learning_rate": 7.24076309312934e-06, + "loss": 0.0948, + "step": 82430 + }, + { + "epoch": 3.85, + "learning_rate": 7.239979308074554e-06, + "loss": 0.1843, + "step": 82435 + }, + { + "epoch": 3.85, + "learning_rate": 7.239195523019767e-06, + "loss": 0.2833, + "step": 82440 + }, + { + "epoch": 3.85, + "learning_rate": 7.238411737964981e-06, + "loss": 0.1999, + "step": 82445 + }, + { + "epoch": 3.85, + "learning_rate": 7.237627952910194e-06, + "loss": 0.056, + "step": 82450 + }, + { + "epoch": 3.85, + "learning_rate": 7.236844167855408e-06, + "loss": 0.009, + "step": 82455 + }, + { + "epoch": 3.85, + "learning_rate": 7.236060382800621e-06, + "loss": 0.0356, + "step": 82460 + }, + { + "epoch": 3.85, + "learning_rate": 7.235276597745835e-06, + "loss": 0.0509, + "step": 82465 + }, + { + "epoch": 3.85, + "learning_rate": 7.234492812691048e-06, + "loss": 0.0332, + "step": 82470 + }, + { + "epoch": 3.85, + "learning_rate": 7.233709027636262e-06, + "loss": 0.0879, + "step": 82475 + }, + { + "epoch": 3.85, + "learning_rate": 7.232925242581476e-06, + "loss": 0.1112, + "step": 82480 + }, + { + "epoch": 3.85, + "learning_rate": 7.232141457526688e-06, + "loss": 0.1709, + "step": 82485 + }, + { + "epoch": 3.85, + "learning_rate": 7.231357672471901e-06, + "loss": 0.1559, + "step": 82490 + }, + { + "epoch": 3.85, + "learning_rate": 7.230573887417115e-06, + "loss": 0.2309, + "step": 82495 + }, + { + "epoch": 3.85, + "learning_rate": 7.229790102362328e-06, + "loss": 0.0397, + "step": 82500 + }, + { + "epoch": 3.85, + "learning_rate": 7.229006317307542e-06, + "loss": 0.0336, + "step": 82505 + }, + { + "epoch": 3.85, + "learning_rate": 7.228222532252755e-06, + "loss": 0.0623, + "step": 82510 + }, + { + "epoch": 3.85, + "learning_rate": 7.227438747197969e-06, + "loss": 0.0456, + "step": 82515 + }, + { + "epoch": 3.85, + "learning_rate": 7.226654962143182e-06, + "loss": 0.0266, + "step": 82520 + }, + { + "epoch": 3.85, + "learning_rate": 7.225871177088396e-06, + "loss": 0.067, + "step": 82525 + }, + { + "epoch": 3.85, + "learning_rate": 7.22508739203361e-06, + "loss": 0.1142, + "step": 82530 + }, + { + "epoch": 3.85, + "learning_rate": 7.224303606978823e-06, + "loss": 0.1408, + "step": 82535 + }, + { + "epoch": 3.85, + "learning_rate": 7.223519821924037e-06, + "loss": 0.1929, + "step": 82540 + }, + { + "epoch": 3.85, + "learning_rate": 7.22273603686925e-06, + "loss": 0.2066, + "step": 82545 + }, + { + "epoch": 3.85, + "learning_rate": 7.221952251814462e-06, + "loss": 0.0644, + "step": 82550 + }, + { + "epoch": 3.85, + "learning_rate": 7.221168466759676e-06, + "loss": 0.0071, + "step": 82555 + }, + { + "epoch": 3.85, + "learning_rate": 7.220384681704889e-06, + "loss": 0.0073, + "step": 82560 + }, + { + "epoch": 3.85, + "learning_rate": 7.219600896650103e-06, + "loss": 0.0488, + "step": 82565 + }, + { + "epoch": 3.85, + "learning_rate": 7.218817111595316e-06, + "loss": 0.03, + "step": 82570 + }, + { + "epoch": 3.85, + "learning_rate": 7.21803332654053e-06, + "loss": 0.0536, + "step": 82575 + }, + { + "epoch": 3.85, + "learning_rate": 7.217249541485744e-06, + "loss": 0.0703, + "step": 82580 + }, + { + "epoch": 3.85, + "learning_rate": 7.216465756430957e-06, + "loss": 0.1186, + "step": 82585 + }, + { + "epoch": 3.85, + "learning_rate": 7.215681971376171e-06, + "loss": 0.1826, + "step": 82590 + }, + { + "epoch": 3.85, + "learning_rate": 7.214898186321384e-06, + "loss": 0.2697, + "step": 82595 + }, + { + "epoch": 3.85, + "learning_rate": 7.214114401266598e-06, + "loss": 0.095, + "step": 82600 + }, + { + "epoch": 3.85, + "learning_rate": 7.213330616211811e-06, + "loss": 0.0544, + "step": 82605 + }, + { + "epoch": 3.85, + "learning_rate": 7.212546831157025e-06, + "loss": 0.0774, + "step": 82610 + }, + { + "epoch": 3.85, + "learning_rate": 7.211763046102237e-06, + "loss": 0.0261, + "step": 82615 + }, + { + "epoch": 3.86, + "learning_rate": 7.210979261047451e-06, + "loss": 0.0929, + "step": 82620 + }, + { + "epoch": 3.86, + "learning_rate": 7.210195475992664e-06, + "loss": 0.0937, + "step": 82625 + }, + { + "epoch": 3.86, + "learning_rate": 7.209411690937878e-06, + "loss": 0.1644, + "step": 82630 + }, + { + "epoch": 3.86, + "learning_rate": 7.208627905883091e-06, + "loss": 0.2539, + "step": 82635 + }, + { + "epoch": 3.86, + "learning_rate": 7.207844120828305e-06, + "loss": 0.2675, + "step": 82640 + }, + { + "epoch": 3.86, + "learning_rate": 7.207060335773518e-06, + "loss": 0.4391, + "step": 82645 + }, + { + "epoch": 3.86, + "learning_rate": 7.206276550718732e-06, + "loss": 0.1219, + "step": 82650 + }, + { + "epoch": 3.86, + "learning_rate": 7.205492765663945e-06, + "loss": 0.034, + "step": 82655 + }, + { + "epoch": 3.86, + "learning_rate": 7.204708980609159e-06, + "loss": 0.0426, + "step": 82660 + }, + { + "epoch": 3.86, + "learning_rate": 7.203925195554372e-06, + "loss": 0.0084, + "step": 82665 + }, + { + "epoch": 3.86, + "learning_rate": 7.203141410499586e-06, + "loss": 0.0198, + "step": 82670 + }, + { + "epoch": 3.86, + "learning_rate": 7.202357625444799e-06, + "loss": 0.1924, + "step": 82675 + }, + { + "epoch": 3.86, + "learning_rate": 7.201573840390012e-06, + "loss": 0.1191, + "step": 82680 + }, + { + "epoch": 3.86, + "learning_rate": 7.200790055335225e-06, + "loss": 0.1241, + "step": 82685 + }, + { + "epoch": 3.86, + "learning_rate": 7.200006270280439e-06, + "loss": 0.1177, + "step": 82690 + }, + { + "epoch": 3.86, + "learning_rate": 7.199222485225652e-06, + "loss": 0.1445, + "step": 82695 + }, + { + "epoch": 3.86, + "learning_rate": 7.198438700170866e-06, + "loss": 0.1037, + "step": 82700 + }, + { + "epoch": 3.86, + "learning_rate": 7.197654915116079e-06, + "loss": 0.0354, + "step": 82705 + }, + { + "epoch": 3.86, + "learning_rate": 7.196871130061293e-06, + "loss": 0.0342, + "step": 82710 + }, + { + "epoch": 3.86, + "learning_rate": 7.196087345006506e-06, + "loss": 0.0374, + "step": 82715 + }, + { + "epoch": 3.86, + "learning_rate": 7.19530355995172e-06, + "loss": 0.0835, + "step": 82720 + }, + { + "epoch": 3.86, + "learning_rate": 7.194519774896933e-06, + "loss": 0.065, + "step": 82725 + }, + { + "epoch": 3.86, + "learning_rate": 7.193735989842147e-06, + "loss": 0.1511, + "step": 82730 + }, + { + "epoch": 3.86, + "learning_rate": 7.19295220478736e-06, + "loss": 0.0832, + "step": 82735 + }, + { + "epoch": 3.86, + "learning_rate": 7.192168419732574e-06, + "loss": 0.3202, + "step": 82740 + }, + { + "epoch": 3.86, + "learning_rate": 7.191384634677786e-06, + "loss": 0.1847, + "step": 82745 + }, + { + "epoch": 3.86, + "learning_rate": 7.190600849623e-06, + "loss": 0.0422, + "step": 82750 + }, + { + "epoch": 3.86, + "learning_rate": 7.189817064568213e-06, + "loss": 0.0422, + "step": 82755 + }, + { + "epoch": 3.86, + "learning_rate": 7.189033279513427e-06, + "loss": 0.0557, + "step": 82760 + }, + { + "epoch": 3.86, + "learning_rate": 7.18824949445864e-06, + "loss": 0.0645, + "step": 82765 + }, + { + "epoch": 3.86, + "learning_rate": 7.187465709403854e-06, + "loss": 0.0985, + "step": 82770 + }, + { + "epoch": 3.86, + "learning_rate": 7.186681924349067e-06, + "loss": 0.1161, + "step": 82775 + }, + { + "epoch": 3.86, + "learning_rate": 7.185898139294281e-06, + "loss": 0.0945, + "step": 82780 + }, + { + "epoch": 3.86, + "learning_rate": 7.185114354239494e-06, + "loss": 0.1213, + "step": 82785 + }, + { + "epoch": 3.86, + "learning_rate": 7.184330569184708e-06, + "loss": 0.1853, + "step": 82790 + }, + { + "epoch": 3.86, + "learning_rate": 7.1835467841299215e-06, + "loss": 0.1965, + "step": 82795 + }, + { + "epoch": 3.86, + "learning_rate": 7.182762999075135e-06, + "loss": 0.0679, + "step": 82800 + }, + { + "epoch": 3.86, + "learning_rate": 7.1819792140203485e-06, + "loss": 0.0218, + "step": 82805 + }, + { + "epoch": 3.86, + "learning_rate": 7.181195428965561e-06, + "loss": 0.0262, + "step": 82810 + }, + { + "epoch": 3.86, + "learning_rate": 7.180411643910774e-06, + "loss": 0.0487, + "step": 82815 + }, + { + "epoch": 3.86, + "learning_rate": 7.179627858855988e-06, + "loss": 0.0454, + "step": 82820 + }, + { + "epoch": 3.86, + "learning_rate": 7.178844073801201e-06, + "loss": 0.064, + "step": 82825 + }, + { + "epoch": 3.86, + "learning_rate": 7.178060288746415e-06, + "loss": 0.0518, + "step": 82830 + }, + { + "epoch": 3.87, + "learning_rate": 7.177276503691628e-06, + "loss": 0.0888, + "step": 82835 + }, + { + "epoch": 3.87, + "learning_rate": 7.176492718636842e-06, + "loss": 0.2183, + "step": 82840 + }, + { + "epoch": 3.87, + "learning_rate": 7.1757089335820555e-06, + "loss": 0.3927, + "step": 82845 + }, + { + "epoch": 3.87, + "learning_rate": 7.1749251485272686e-06, + "loss": 0.0595, + "step": 82850 + }, + { + "epoch": 3.87, + "learning_rate": 7.1741413634724825e-06, + "loss": 0.0189, + "step": 82855 + }, + { + "epoch": 3.87, + "learning_rate": 7.1733575784176955e-06, + "loss": 0.0272, + "step": 82860 + }, + { + "epoch": 3.87, + "learning_rate": 7.1725737933629094e-06, + "loss": 0.0464, + "step": 82865 + }, + { + "epoch": 3.87, + "learning_rate": 7.1717900083081225e-06, + "loss": 0.0883, + "step": 82870 + }, + { + "epoch": 3.87, + "learning_rate": 7.171006223253335e-06, + "loss": 0.0915, + "step": 82875 + }, + { + "epoch": 3.87, + "learning_rate": 7.170222438198549e-06, + "loss": 0.0331, + "step": 82880 + }, + { + "epoch": 3.87, + "learning_rate": 7.1694386531437625e-06, + "loss": 0.111, + "step": 82885 + }, + { + "epoch": 3.87, + "learning_rate": 7.1686548680889756e-06, + "loss": 0.1855, + "step": 82890 + }, + { + "epoch": 3.87, + "learning_rate": 7.1678710830341895e-06, + "loss": 0.1827, + "step": 82895 + }, + { + "epoch": 3.87, + "learning_rate": 7.1670872979794025e-06, + "loss": 0.0682, + "step": 82900 + }, + { + "epoch": 3.87, + "learning_rate": 7.1663035129246164e-06, + "loss": 0.009, + "step": 82905 + }, + { + "epoch": 3.87, + "learning_rate": 7.1655197278698295e-06, + "loss": 0.0417, + "step": 82910 + }, + { + "epoch": 3.87, + "learning_rate": 7.164735942815043e-06, + "loss": 0.129, + "step": 82915 + }, + { + "epoch": 3.87, + "learning_rate": 7.1639521577602565e-06, + "loss": 0.0611, + "step": 82920 + }, + { + "epoch": 3.87, + "learning_rate": 7.16316837270547e-06, + "loss": 0.0629, + "step": 82925 + }, + { + "epoch": 3.87, + "learning_rate": 7.1623845876506834e-06, + "loss": 0.1248, + "step": 82930 + }, + { + "epoch": 3.87, + "learning_rate": 7.161600802595897e-06, + "loss": 0.1549, + "step": 82935 + }, + { + "epoch": 3.87, + "learning_rate": 7.1608170175411095e-06, + "loss": 0.2218, + "step": 82940 + }, + { + "epoch": 3.87, + "learning_rate": 7.1600332324863234e-06, + "loss": 0.433, + "step": 82945 + }, + { + "epoch": 3.87, + "learning_rate": 7.1592494474315365e-06, + "loss": 0.0611, + "step": 82950 + }, + { + "epoch": 3.87, + "learning_rate": 7.15846566237675e-06, + "loss": 0.0288, + "step": 82955 + }, + { + "epoch": 3.87, + "learning_rate": 7.1576818773219635e-06, + "loss": 0.048, + "step": 82960 + }, + { + "epoch": 3.87, + "learning_rate": 7.156898092267177e-06, + "loss": 0.0867, + "step": 82965 + }, + { + "epoch": 3.87, + "learning_rate": 7.1561143072123904e-06, + "loss": 0.0412, + "step": 82970 + }, + { + "epoch": 3.87, + "learning_rate": 7.155330522157604e-06, + "loss": 0.0969, + "step": 82975 + }, + { + "epoch": 3.87, + "learning_rate": 7.154546737102817e-06, + "loss": 0.1484, + "step": 82980 + }, + { + "epoch": 3.87, + "learning_rate": 7.153762952048031e-06, + "loss": 0.1506, + "step": 82985 + }, + { + "epoch": 3.87, + "learning_rate": 7.152979166993244e-06, + "loss": 0.2028, + "step": 82990 + }, + { + "epoch": 3.87, + "learning_rate": 7.152195381938458e-06, + "loss": 0.204, + "step": 82995 + }, + { + "epoch": 3.87, + "learning_rate": 7.151411596883671e-06, + "loss": 0.0698, + "step": 83000 + }, + { + "epoch": 3.87, + "learning_rate": 7.150627811828884e-06, + "loss": 0.0254, + "step": 83005 + }, + { + "epoch": 3.87, + "learning_rate": 7.1498440267740974e-06, + "loss": 0.0321, + "step": 83010 + }, + { + "epoch": 3.87, + "learning_rate": 7.149060241719311e-06, + "loss": 0.0639, + "step": 83015 + }, + { + "epoch": 3.87, + "learning_rate": 7.148276456664524e-06, + "loss": 0.0328, + "step": 83020 + }, + { + "epoch": 3.87, + "learning_rate": 7.147492671609738e-06, + "loss": 0.0518, + "step": 83025 + }, + { + "epoch": 3.87, + "learning_rate": 7.146708886554951e-06, + "loss": 0.108, + "step": 83030 + }, + { + "epoch": 3.87, + "learning_rate": 7.145925101500165e-06, + "loss": 0.1571, + "step": 83035 + }, + { + "epoch": 3.87, + "learning_rate": 7.145141316445378e-06, + "loss": 0.1334, + "step": 83040 + }, + { + "epoch": 3.87, + "learning_rate": 7.144357531390592e-06, + "loss": 0.3672, + "step": 83045 + }, + { + "epoch": 3.88, + "learning_rate": 7.143573746335805e-06, + "loss": 0.0319, + "step": 83050 + }, + { + "epoch": 3.88, + "learning_rate": 7.142789961281019e-06, + "loss": 0.0214, + "step": 83055 + }, + { + "epoch": 3.88, + "learning_rate": 7.142006176226233e-06, + "loss": 0.0112, + "step": 83060 + }, + { + "epoch": 3.88, + "learning_rate": 7.141222391171446e-06, + "loss": 0.0584, + "step": 83065 + }, + { + "epoch": 3.88, + "learning_rate": 7.140438606116658e-06, + "loss": 0.0678, + "step": 83070 + }, + { + "epoch": 3.88, + "learning_rate": 7.139654821061872e-06, + "loss": 0.0804, + "step": 83075 + }, + { + "epoch": 3.88, + "learning_rate": 7.138871036007085e-06, + "loss": 0.0552, + "step": 83080 + }, + { + "epoch": 3.88, + "learning_rate": 7.138087250952299e-06, + "loss": 0.0976, + "step": 83085 + }, + { + "epoch": 3.88, + "learning_rate": 7.137303465897512e-06, + "loss": 0.2082, + "step": 83090 + }, + { + "epoch": 3.88, + "learning_rate": 7.136519680842726e-06, + "loss": 0.2262, + "step": 83095 + }, + { + "epoch": 3.88, + "learning_rate": 7.135735895787939e-06, + "loss": 0.0472, + "step": 83100 + }, + { + "epoch": 3.88, + "learning_rate": 7.134952110733153e-06, + "loss": 0.0212, + "step": 83105 + }, + { + "epoch": 3.88, + "learning_rate": 7.134168325678367e-06, + "loss": 0.0167, + "step": 83110 + }, + { + "epoch": 3.88, + "learning_rate": 7.13338454062358e-06, + "loss": 0.0431, + "step": 83115 + }, + { + "epoch": 3.88, + "learning_rate": 7.132600755568794e-06, + "loss": 0.0403, + "step": 83120 + }, + { + "epoch": 3.88, + "learning_rate": 7.131816970514007e-06, + "loss": 0.0445, + "step": 83125 + }, + { + "epoch": 3.88, + "learning_rate": 7.131033185459221e-06, + "loss": 0.0775, + "step": 83130 + }, + { + "epoch": 3.88, + "learning_rate": 7.130249400404433e-06, + "loss": 0.1693, + "step": 83135 + }, + { + "epoch": 3.88, + "learning_rate": 7.129465615349646e-06, + "loss": 0.1404, + "step": 83140 + }, + { + "epoch": 3.88, + "learning_rate": 7.12868183029486e-06, + "loss": 0.1983, + "step": 83145 + }, + { + "epoch": 3.88, + "learning_rate": 7.127898045240073e-06, + "loss": 0.0576, + "step": 83150 + }, + { + "epoch": 3.88, + "learning_rate": 7.127114260185287e-06, + "loss": 0.0225, + "step": 83155 + }, + { + "epoch": 3.88, + "learning_rate": 7.126330475130501e-06, + "loss": 0.042, + "step": 83160 + }, + { + "epoch": 3.88, + "learning_rate": 7.125546690075714e-06, + "loss": 0.047, + "step": 83165 + }, + { + "epoch": 3.88, + "learning_rate": 7.124762905020928e-06, + "loss": 0.0822, + "step": 83170 + }, + { + "epoch": 3.88, + "learning_rate": 7.123979119966141e-06, + "loss": 0.0317, + "step": 83175 + }, + { + "epoch": 3.88, + "learning_rate": 7.123195334911355e-06, + "loss": 0.178, + "step": 83180 + }, + { + "epoch": 3.88, + "learning_rate": 7.122411549856568e-06, + "loss": 0.0838, + "step": 83185 + }, + { + "epoch": 3.88, + "learning_rate": 7.121627764801782e-06, + "loss": 0.1366, + "step": 83190 + }, + { + "epoch": 3.88, + "learning_rate": 7.120843979746995e-06, + "loss": 0.22, + "step": 83195 + }, + { + "epoch": 3.88, + "learning_rate": 7.120060194692208e-06, + "loss": 0.0103, + "step": 83200 + }, + { + "epoch": 3.88, + "learning_rate": 7.119276409637421e-06, + "loss": 0.0207, + "step": 83205 + }, + { + "epoch": 3.88, + "learning_rate": 7.118492624582635e-06, + "loss": 0.0341, + "step": 83210 + }, + { + "epoch": 3.88, + "learning_rate": 7.117708839527848e-06, + "loss": 0.0363, + "step": 83215 + }, + { + "epoch": 3.88, + "learning_rate": 7.116925054473062e-06, + "loss": 0.0304, + "step": 83220 + }, + { + "epoch": 3.88, + "learning_rate": 7.116141269418275e-06, + "loss": 0.1067, + "step": 83225 + }, + { + "epoch": 3.88, + "learning_rate": 7.115357484363489e-06, + "loss": 0.0815, + "step": 83230 + }, + { + "epoch": 3.88, + "learning_rate": 7.114573699308702e-06, + "loss": 0.1673, + "step": 83235 + }, + { + "epoch": 3.88, + "learning_rate": 7.113789914253916e-06, + "loss": 0.2259, + "step": 83240 + }, + { + "epoch": 3.88, + "learning_rate": 7.113006129199129e-06, + "loss": 0.2584, + "step": 83245 + }, + { + "epoch": 3.88, + "learning_rate": 7.112222344144343e-06, + "loss": 0.1109, + "step": 83250 + }, + { + "epoch": 3.88, + "learning_rate": 7.111438559089556e-06, + "loss": 0.1151, + "step": 83255 + }, + { + "epoch": 3.89, + "learning_rate": 7.11065477403477e-06, + "loss": 0.0398, + "step": 83260 + }, + { + "epoch": 3.89, + "learning_rate": 7.109870988979982e-06, + "loss": 0.0395, + "step": 83265 + }, + { + "epoch": 3.89, + "learning_rate": 7.109087203925196e-06, + "loss": 0.0706, + "step": 83270 + }, + { + "epoch": 3.89, + "learning_rate": 7.108303418870409e-06, + "loss": 0.0666, + "step": 83275 + }, + { + "epoch": 3.89, + "learning_rate": 7.107519633815623e-06, + "loss": 0.0801, + "step": 83280 + }, + { + "epoch": 3.89, + "learning_rate": 7.106735848760836e-06, + "loss": 0.119, + "step": 83285 + }, + { + "epoch": 3.89, + "learning_rate": 7.10595206370605e-06, + "loss": 0.2241, + "step": 83290 + }, + { + "epoch": 3.89, + "learning_rate": 7.105168278651263e-06, + "loss": 0.1682, + "step": 83295 + }, + { + "epoch": 3.89, + "learning_rate": 7.104384493596477e-06, + "loss": 0.0431, + "step": 83300 + }, + { + "epoch": 3.89, + "learning_rate": 7.10360070854169e-06, + "loss": 0.011, + "step": 83305 + }, + { + "epoch": 3.89, + "learning_rate": 7.102816923486904e-06, + "loss": 0.0472, + "step": 83310 + }, + { + "epoch": 3.89, + "learning_rate": 7.102033138432117e-06, + "loss": 0.0674, + "step": 83315 + }, + { + "epoch": 3.89, + "learning_rate": 7.101249353377331e-06, + "loss": 0.0227, + "step": 83320 + }, + { + "epoch": 3.89, + "learning_rate": 7.100465568322545e-06, + "loss": 0.0615, + "step": 83325 + }, + { + "epoch": 3.89, + "learning_rate": 7.099838540278714e-06, + "loss": 0.1183, + "step": 83330 + }, + { + "epoch": 3.89, + "learning_rate": 7.099054755223927e-06, + "loss": 0.0832, + "step": 83335 + }, + { + "epoch": 3.89, + "learning_rate": 7.098270970169141e-06, + "loss": 0.1191, + "step": 83340 + }, + { + "epoch": 3.89, + "learning_rate": 7.097487185114354e-06, + "loss": 0.3515, + "step": 83345 + }, + { + "epoch": 3.89, + "learning_rate": 7.096703400059568e-06, + "loss": 0.106, + "step": 83350 + }, + { + "epoch": 3.89, + "learning_rate": 7.095919615004781e-06, + "loss": 0.0136, + "step": 83355 + }, + { + "epoch": 3.89, + "learning_rate": 7.095135829949995e-06, + "loss": 0.0589, + "step": 83360 + }, + { + "epoch": 3.89, + "learning_rate": 7.094352044895208e-06, + "loss": 0.0601, + "step": 83365 + }, + { + "epoch": 3.89, + "learning_rate": 7.093568259840422e-06, + "loss": 0.0845, + "step": 83370 + }, + { + "epoch": 3.89, + "learning_rate": 7.092784474785635e-06, + "loss": 0.1721, + "step": 83375 + }, + { + "epoch": 3.89, + "learning_rate": 7.092000689730849e-06, + "loss": 0.0775, + "step": 83380 + }, + { + "epoch": 3.89, + "learning_rate": 7.091216904676062e-06, + "loss": 0.1601, + "step": 83385 + }, + { + "epoch": 3.89, + "learning_rate": 7.090433119621276e-06, + "loss": 0.1606, + "step": 83390 + }, + { + "epoch": 3.89, + "learning_rate": 7.089649334566488e-06, + "loss": 0.2723, + "step": 83395 + }, + { + "epoch": 3.89, + "learning_rate": 7.088865549511702e-06, + "loss": 0.0425, + "step": 83400 + }, + { + "epoch": 3.89, + "learning_rate": 7.088081764456915e-06, + "loss": 0.0291, + "step": 83405 + }, + { + "epoch": 3.89, + "learning_rate": 7.087297979402129e-06, + "loss": 0.038, + "step": 83410 + }, + { + "epoch": 3.89, + "learning_rate": 7.086514194347342e-06, + "loss": 0.0745, + "step": 83415 + }, + { + "epoch": 3.89, + "learning_rate": 7.085730409292556e-06, + "loss": 0.0439, + "step": 83420 + }, + { + "epoch": 3.89, + "learning_rate": 7.084946624237769e-06, + "loss": 0.0769, + "step": 83425 + }, + { + "epoch": 3.89, + "learning_rate": 7.084162839182983e-06, + "loss": 0.0507, + "step": 83430 + }, + { + "epoch": 3.89, + "learning_rate": 7.083379054128197e-06, + "loss": 0.1185, + "step": 83435 + }, + { + "epoch": 3.89, + "learning_rate": 7.08259526907341e-06, + "loss": 0.2094, + "step": 83440 + }, + { + "epoch": 3.89, + "learning_rate": 7.081811484018624e-06, + "loss": 0.3072, + "step": 83445 + }, + { + "epoch": 3.89, + "learning_rate": 7.081027698963837e-06, + "loss": 0.044, + "step": 83450 + }, + { + "epoch": 3.89, + "learning_rate": 7.080243913909051e-06, + "loss": 0.0651, + "step": 83455 + }, + { + "epoch": 3.89, + "learning_rate": 7.079460128854263e-06, + "loss": 0.0351, + "step": 83460 + }, + { + "epoch": 3.89, + "learning_rate": 7.078676343799476e-06, + "loss": 0.023, + "step": 83465 + }, + { + "epoch": 3.89, + "learning_rate": 7.07789255874469e-06, + "loss": 0.1239, + "step": 83470 + }, + { + "epoch": 3.9, + "learning_rate": 7.077108773689903e-06, + "loss": 0.1016, + "step": 83475 + }, + { + "epoch": 3.9, + "learning_rate": 7.076324988635117e-06, + "loss": 0.093, + "step": 83480 + }, + { + "epoch": 3.9, + "learning_rate": 7.075541203580331e-06, + "loss": 0.1165, + "step": 83485 + }, + { + "epoch": 3.9, + "learning_rate": 7.074757418525544e-06, + "loss": 0.165, + "step": 83490 + }, + { + "epoch": 3.9, + "learning_rate": 7.073973633470758e-06, + "loss": 0.2434, + "step": 83495 + }, + { + "epoch": 3.9, + "learning_rate": 7.073189848415971e-06, + "loss": 0.0324, + "step": 83500 + }, + { + "epoch": 3.9, + "learning_rate": 7.072406063361185e-06, + "loss": 0.0379, + "step": 83505 + }, + { + "epoch": 3.9, + "learning_rate": 7.071622278306398e-06, + "loss": 0.0729, + "step": 83510 + }, + { + "epoch": 3.9, + "learning_rate": 7.070838493251612e-06, + "loss": 0.0829, + "step": 83515 + }, + { + "epoch": 3.9, + "learning_rate": 7.070054708196825e-06, + "loss": 0.046, + "step": 83520 + }, + { + "epoch": 3.9, + "learning_rate": 7.069270923142037e-06, + "loss": 0.0305, + "step": 83525 + }, + { + "epoch": 3.9, + "learning_rate": 7.068487138087251e-06, + "loss": 0.0839, + "step": 83530 + }, + { + "epoch": 3.9, + "learning_rate": 7.067703353032465e-06, + "loss": 0.1183, + "step": 83535 + }, + { + "epoch": 3.9, + "learning_rate": 7.066919567977678e-06, + "loss": 0.1723, + "step": 83540 + }, + { + "epoch": 3.9, + "learning_rate": 7.066135782922892e-06, + "loss": 0.1977, + "step": 83545 + }, + { + "epoch": 3.9, + "learning_rate": 7.065351997868105e-06, + "loss": 0.0894, + "step": 83550 + }, + { + "epoch": 3.9, + "learning_rate": 7.064568212813319e-06, + "loss": 0.0183, + "step": 83555 + }, + { + "epoch": 3.9, + "learning_rate": 7.063784427758532e-06, + "loss": 0.0516, + "step": 83560 + }, + { + "epoch": 3.9, + "learning_rate": 7.063000642703746e-06, + "loss": 0.0505, + "step": 83565 + }, + { + "epoch": 3.9, + "learning_rate": 7.062216857648959e-06, + "loss": 0.0342, + "step": 83570 + }, + { + "epoch": 3.9, + "learning_rate": 7.061433072594173e-06, + "loss": 0.0858, + "step": 83575 + }, + { + "epoch": 3.9, + "learning_rate": 7.060649287539386e-06, + "loss": 0.0855, + "step": 83580 + }, + { + "epoch": 3.9, + "learning_rate": 7.0598655024846e-06, + "loss": 0.1751, + "step": 83585 + }, + { + "epoch": 3.9, + "learning_rate": 7.059081717429812e-06, + "loss": 0.1959, + "step": 83590 + }, + { + "epoch": 3.9, + "learning_rate": 7.058297932375026e-06, + "loss": 0.2493, + "step": 83595 + }, + { + "epoch": 3.9, + "learning_rate": 7.057514147320239e-06, + "loss": 0.0743, + "step": 83600 + }, + { + "epoch": 3.9, + "learning_rate": 7.056730362265453e-06, + "loss": 0.0131, + "step": 83605 + }, + { + "epoch": 3.9, + "learning_rate": 7.055946577210666e-06, + "loss": 0.029, + "step": 83610 + }, + { + "epoch": 3.9, + "learning_rate": 7.05516279215588e-06, + "loss": 0.0278, + "step": 83615 + }, + { + "epoch": 3.9, + "learning_rate": 7.054379007101093e-06, + "loss": 0.0613, + "step": 83620 + }, + { + "epoch": 3.9, + "learning_rate": 7.053595222046307e-06, + "loss": 0.0682, + "step": 83625 + }, + { + "epoch": 3.9, + "learning_rate": 7.05281143699152e-06, + "loss": 0.0363, + "step": 83630 + }, + { + "epoch": 3.9, + "learning_rate": 7.052027651936734e-06, + "loss": 0.1171, + "step": 83635 + }, + { + "epoch": 3.9, + "learning_rate": 7.051243866881947e-06, + "loss": 0.1654, + "step": 83640 + }, + { + "epoch": 3.9, + "learning_rate": 7.050460081827161e-06, + "loss": 0.2631, + "step": 83645 + }, + { + "epoch": 3.9, + "learning_rate": 7.049676296772374e-06, + "loss": 0.0565, + "step": 83650 + }, + { + "epoch": 3.9, + "learning_rate": 7.048892511717587e-06, + "loss": 0.0405, + "step": 83655 + }, + { + "epoch": 3.9, + "learning_rate": 7.0481087266628e-06, + "loss": 0.0411, + "step": 83660 + }, + { + "epoch": 3.9, + "learning_rate": 7.047324941608014e-06, + "loss": 0.0245, + "step": 83665 + }, + { + "epoch": 3.9, + "learning_rate": 7.046541156553227e-06, + "loss": 0.075, + "step": 83670 + }, + { + "epoch": 3.9, + "learning_rate": 7.045757371498441e-06, + "loss": 0.1692, + "step": 83675 + }, + { + "epoch": 3.9, + "learning_rate": 7.044973586443654e-06, + "loss": 0.1396, + "step": 83680 + }, + { + "epoch": 3.9, + "learning_rate": 7.044189801388868e-06, + "loss": 0.136, + "step": 83685 + }, + { + "epoch": 3.91, + "learning_rate": 7.043406016334081e-06, + "loss": 0.2351, + "step": 83690 + }, + { + "epoch": 3.91, + "learning_rate": 7.042622231279295e-06, + "loss": 0.3361, + "step": 83695 + }, + { + "epoch": 3.91, + "learning_rate": 7.041838446224508e-06, + "loss": 0.1006, + "step": 83700 + }, + { + "epoch": 3.91, + "learning_rate": 7.041054661169722e-06, + "loss": 0.03, + "step": 83705 + }, + { + "epoch": 3.91, + "learning_rate": 7.0402708761149356e-06, + "loss": 0.0043, + "step": 83710 + }, + { + "epoch": 3.91, + "learning_rate": 7.039487091060149e-06, + "loss": 0.0544, + "step": 83715 + }, + { + "epoch": 3.91, + "learning_rate": 7.038703306005361e-06, + "loss": 0.0858, + "step": 83720 + }, + { + "epoch": 3.91, + "learning_rate": 7.037919520950575e-06, + "loss": 0.1035, + "step": 83725 + }, + { + "epoch": 3.91, + "learning_rate": 7.037135735895788e-06, + "loss": 0.1786, + "step": 83730 + }, + { + "epoch": 3.91, + "learning_rate": 7.036351950841002e-06, + "loss": 0.1902, + "step": 83735 + }, + { + "epoch": 3.91, + "learning_rate": 7.035568165786215e-06, + "loss": 0.1655, + "step": 83740 + }, + { + "epoch": 3.91, + "learning_rate": 7.034784380731429e-06, + "loss": 0.3697, + "step": 83745 + }, + { + "epoch": 3.91, + "learning_rate": 7.034000595676643e-06, + "loss": 0.0693, + "step": 83750 + }, + { + "epoch": 3.91, + "learning_rate": 7.033216810621856e-06, + "loss": 0.0175, + "step": 83755 + }, + { + "epoch": 3.91, + "learning_rate": 7.0324330255670695e-06, + "loss": 0.0227, + "step": 83760 + }, + { + "epoch": 3.91, + "learning_rate": 7.031649240512283e-06, + "loss": 0.0299, + "step": 83765 + }, + { + "epoch": 3.91, + "learning_rate": 7.0308654554574965e-06, + "loss": 0.0819, + "step": 83770 + }, + { + "epoch": 3.91, + "learning_rate": 7.0300816704027096e-06, + "loss": 0.0887, + "step": 83775 + }, + { + "epoch": 3.91, + "learning_rate": 7.0292978853479235e-06, + "loss": 0.0337, + "step": 83780 + }, + { + "epoch": 3.91, + "learning_rate": 7.028514100293136e-06, + "loss": 0.1754, + "step": 83785 + }, + { + "epoch": 3.91, + "learning_rate": 7.027730315238349e-06, + "loss": 0.1529, + "step": 83790 + }, + { + "epoch": 3.91, + "learning_rate": 7.026946530183563e-06, + "loss": 0.2586, + "step": 83795 + }, + { + "epoch": 3.91, + "learning_rate": 7.0261627451287766e-06, + "loss": 0.0753, + "step": 83800 + }, + { + "epoch": 3.91, + "learning_rate": 7.02537896007399e-06, + "loss": 0.0732, + "step": 83805 + }, + { + "epoch": 3.91, + "learning_rate": 7.0245951750192035e-06, + "loss": 0.0517, + "step": 83810 + }, + { + "epoch": 3.91, + "learning_rate": 7.023811389964417e-06, + "loss": 0.0656, + "step": 83815 + }, + { + "epoch": 3.91, + "learning_rate": 7.0230276049096305e-06, + "loss": 0.0995, + "step": 83820 + }, + { + "epoch": 3.91, + "learning_rate": 7.0222438198548435e-06, + "loss": 0.0459, + "step": 83825 + }, + { + "epoch": 3.91, + "learning_rate": 7.0214600348000575e-06, + "loss": 0.0869, + "step": 83830 + }, + { + "epoch": 3.91, + "learning_rate": 7.0206762497452705e-06, + "loss": 0.0683, + "step": 83835 + }, + { + "epoch": 3.91, + "learning_rate": 7.019892464690484e-06, + "loss": 0.1981, + "step": 83840 + }, + { + "epoch": 3.91, + "learning_rate": 7.0191086796356975e-06, + "loss": 0.3106, + "step": 83845 + }, + { + "epoch": 3.91, + "learning_rate": 7.0183248945809105e-06, + "loss": 0.0214, + "step": 83850 + }, + { + "epoch": 3.91, + "learning_rate": 7.017541109526124e-06, + "loss": 0.0107, + "step": 83855 + }, + { + "epoch": 3.91, + "learning_rate": 7.0167573244713375e-06, + "loss": 0.0483, + "step": 83860 + }, + { + "epoch": 3.91, + "learning_rate": 7.0159735394165506e-06, + "loss": 0.0288, + "step": 83865 + }, + { + "epoch": 3.91, + "learning_rate": 7.0151897543617645e-06, + "loss": 0.0898, + "step": 83870 + }, + { + "epoch": 3.91, + "learning_rate": 7.0144059693069775e-06, + "loss": 0.1365, + "step": 83875 + }, + { + "epoch": 3.91, + "learning_rate": 7.0136221842521914e-06, + "loss": 0.0615, + "step": 83880 + }, + { + "epoch": 3.91, + "learning_rate": 7.0128383991974045e-06, + "loss": 0.1307, + "step": 83885 + }, + { + "epoch": 3.91, + "learning_rate": 7.012054614142618e-06, + "loss": 0.1897, + "step": 83890 + }, + { + "epoch": 3.91, + "learning_rate": 7.0112708290878315e-06, + "loss": 0.2455, + "step": 83895 + }, + { + "epoch": 3.91, + "learning_rate": 7.010487044033045e-06, + "loss": 0.0779, + "step": 83900 + }, + { + "epoch": 3.92, + "learning_rate": 7.009703258978258e-06, + "loss": 0.0304, + "step": 83905 + }, + { + "epoch": 3.92, + "learning_rate": 7.008919473923472e-06, + "loss": 0.0261, + "step": 83910 + }, + { + "epoch": 3.92, + "learning_rate": 7.0081356888686845e-06, + "loss": 0.0529, + "step": 83915 + }, + { + "epoch": 3.92, + "learning_rate": 7.0073519038138984e-06, + "loss": 0.0457, + "step": 83920 + }, + { + "epoch": 3.92, + "learning_rate": 7.0065681187591115e-06, + "loss": 0.1164, + "step": 83925 + }, + { + "epoch": 3.92, + "learning_rate": 7.005784333704325e-06, + "loss": 0.0913, + "step": 83930 + }, + { + "epoch": 3.92, + "learning_rate": 7.0050005486495385e-06, + "loss": 0.1972, + "step": 83935 + }, + { + "epoch": 3.92, + "learning_rate": 7.004216763594752e-06, + "loss": 0.137, + "step": 83940 + }, + { + "epoch": 3.92, + "learning_rate": 7.0034329785399654e-06, + "loss": 0.2245, + "step": 83945 + }, + { + "epoch": 3.92, + "learning_rate": 7.002649193485179e-06, + "loss": 0.0774, + "step": 83950 + }, + { + "epoch": 3.92, + "learning_rate": 7.001865408430392e-06, + "loss": 0.0612, + "step": 83955 + }, + { + "epoch": 3.92, + "learning_rate": 7.001081623375606e-06, + "loss": 0.0355, + "step": 83960 + }, + { + "epoch": 3.92, + "learning_rate": 7.000297838320819e-06, + "loss": 0.0663, + "step": 83965 + }, + { + "epoch": 3.92, + "learning_rate": 6.999514053266033e-06, + "loss": 0.0779, + "step": 83970 + }, + { + "epoch": 3.92, + "learning_rate": 6.998730268211247e-06, + "loss": 0.1159, + "step": 83975 + }, + { + "epoch": 3.92, + "learning_rate": 6.997946483156459e-06, + "loss": 0.1134, + "step": 83980 + }, + { + "epoch": 3.92, + "learning_rate": 6.9971626981016724e-06, + "loss": 0.0894, + "step": 83985 + }, + { + "epoch": 3.92, + "learning_rate": 6.996378913046886e-06, + "loss": 0.1549, + "step": 83990 + }, + { + "epoch": 3.92, + "learning_rate": 6.995595127992099e-06, + "loss": 0.266, + "step": 83995 + }, + { + "epoch": 3.92, + "learning_rate": 6.994811342937313e-06, + "loss": 0.064, + "step": 84000 + }, + { + "epoch": 3.92, + "learning_rate": 6.994027557882526e-06, + "loss": 0.0812, + "step": 84005 + }, + { + "epoch": 3.92, + "learning_rate": 6.99324377282774e-06, + "loss": 0.0724, + "step": 84010 + }, + { + "epoch": 3.92, + "learning_rate": 6.992459987772954e-06, + "loss": 0.056, + "step": 84015 + }, + { + "epoch": 3.92, + "learning_rate": 6.991676202718167e-06, + "loss": 0.0807, + "step": 84020 + }, + { + "epoch": 3.92, + "learning_rate": 6.990892417663381e-06, + "loss": 0.0549, + "step": 84025 + }, + { + "epoch": 3.92, + "learning_rate": 6.990108632608594e-06, + "loss": 0.0929, + "step": 84030 + }, + { + "epoch": 3.92, + "learning_rate": 6.989324847553808e-06, + "loss": 0.1392, + "step": 84035 + }, + { + "epoch": 3.92, + "learning_rate": 6.988541062499021e-06, + "loss": 0.2502, + "step": 84040 + }, + { + "epoch": 3.92, + "learning_rate": 6.987757277444233e-06, + "loss": 0.3448, + "step": 84045 + }, + { + "epoch": 3.92, + "learning_rate": 6.986973492389447e-06, + "loss": 0.0538, + "step": 84050 + }, + { + "epoch": 3.92, + "learning_rate": 6.98618970733466e-06, + "loss": 0.0152, + "step": 84055 + }, + { + "epoch": 3.92, + "learning_rate": 6.985405922279874e-06, + "loss": 0.074, + "step": 84060 + }, + { + "epoch": 3.92, + "learning_rate": 6.984622137225088e-06, + "loss": 0.0497, + "step": 84065 + }, + { + "epoch": 3.92, + "learning_rate": 6.983838352170301e-06, + "loss": 0.0891, + "step": 84070 + }, + { + "epoch": 3.92, + "learning_rate": 6.983054567115515e-06, + "loss": 0.0899, + "step": 84075 + }, + { + "epoch": 3.92, + "learning_rate": 6.982270782060728e-06, + "loss": 0.0808, + "step": 84080 + }, + { + "epoch": 3.92, + "learning_rate": 6.981486997005942e-06, + "loss": 0.1104, + "step": 84085 + }, + { + "epoch": 3.92, + "learning_rate": 6.980703211951155e-06, + "loss": 0.2043, + "step": 84090 + }, + { + "epoch": 3.92, + "learning_rate": 6.979919426896369e-06, + "loss": 0.2488, + "step": 84095 + }, + { + "epoch": 3.92, + "learning_rate": 6.979135641841582e-06, + "loss": 0.0789, + "step": 84100 + }, + { + "epoch": 3.92, + "learning_rate": 6.978351856786796e-06, + "loss": 0.0489, + "step": 84105 + }, + { + "epoch": 3.92, + "learning_rate": 6.977568071732008e-06, + "loss": 0.107, + "step": 84110 + }, + { + "epoch": 3.92, + "learning_rate": 6.976784286677222e-06, + "loss": 0.0696, + "step": 84115 + }, + { + "epoch": 3.93, + "learning_rate": 6.976000501622435e-06, + "loss": 0.1148, + "step": 84120 + }, + { + "epoch": 3.93, + "learning_rate": 6.975216716567649e-06, + "loss": 0.1428, + "step": 84125 + }, + { + "epoch": 3.93, + "learning_rate": 6.974432931512862e-06, + "loss": 0.12, + "step": 84130 + }, + { + "epoch": 3.93, + "learning_rate": 6.973649146458076e-06, + "loss": 0.2055, + "step": 84135 + }, + { + "epoch": 3.93, + "learning_rate": 6.972865361403289e-06, + "loss": 0.2817, + "step": 84140 + }, + { + "epoch": 3.93, + "learning_rate": 6.972081576348503e-06, + "loss": 0.2038, + "step": 84145 + }, + { + "epoch": 3.93, + "learning_rate": 6.971297791293716e-06, + "loss": 0.1615, + "step": 84150 + }, + { + "epoch": 3.93, + "learning_rate": 6.97051400623893e-06, + "loss": 0.0239, + "step": 84155 + }, + { + "epoch": 3.93, + "learning_rate": 6.969730221184143e-06, + "loss": 0.0394, + "step": 84160 + }, + { + "epoch": 3.93, + "learning_rate": 6.968946436129357e-06, + "loss": 0.0815, + "step": 84165 + }, + { + "epoch": 3.93, + "learning_rate": 6.96816265107457e-06, + "loss": 0.0547, + "step": 84170 + }, + { + "epoch": 3.93, + "learning_rate": 6.967378866019783e-06, + "loss": 0.0423, + "step": 84175 + }, + { + "epoch": 3.93, + "learning_rate": 6.966595080964996e-06, + "loss": 0.1092, + "step": 84180 + }, + { + "epoch": 3.93, + "learning_rate": 6.96581129591021e-06, + "loss": 0.0865, + "step": 84185 + }, + { + "epoch": 3.93, + "learning_rate": 6.965027510855423e-06, + "loss": 0.0807, + "step": 84190 + }, + { + "epoch": 3.93, + "learning_rate": 6.964243725800637e-06, + "loss": 0.2152, + "step": 84195 + }, + { + "epoch": 3.93, + "learning_rate": 6.96345994074585e-06, + "loss": 0.0474, + "step": 84200 + }, + { + "epoch": 3.93, + "learning_rate": 6.962676155691064e-06, + "loss": 0.0093, + "step": 84205 + }, + { + "epoch": 3.93, + "learning_rate": 6.961892370636277e-06, + "loss": 0.0628, + "step": 84210 + }, + { + "epoch": 3.93, + "learning_rate": 6.961108585581491e-06, + "loss": 0.0878, + "step": 84215 + }, + { + "epoch": 3.93, + "learning_rate": 6.960324800526704e-06, + "loss": 0.0281, + "step": 84220 + }, + { + "epoch": 3.93, + "learning_rate": 6.959541015471918e-06, + "loss": 0.1112, + "step": 84225 + }, + { + "epoch": 3.93, + "learning_rate": 6.958757230417131e-06, + "loss": 0.1499, + "step": 84230 + }, + { + "epoch": 3.93, + "learning_rate": 6.957973445362345e-06, + "loss": 0.0709, + "step": 84235 + }, + { + "epoch": 3.93, + "learning_rate": 6.957189660307559e-06, + "loss": 0.0987, + "step": 84240 + }, + { + "epoch": 3.93, + "learning_rate": 6.956405875252771e-06, + "loss": 0.3597, + "step": 84245 + }, + { + "epoch": 3.93, + "learning_rate": 6.955622090197984e-06, + "loss": 0.0683, + "step": 84250 + }, + { + "epoch": 3.93, + "learning_rate": 6.954838305143198e-06, + "loss": 0.043, + "step": 84255 + }, + { + "epoch": 3.93, + "learning_rate": 6.954054520088411e-06, + "loss": 0.0472, + "step": 84260 + }, + { + "epoch": 3.93, + "learning_rate": 6.953270735033625e-06, + "loss": 0.0598, + "step": 84265 + }, + { + "epoch": 3.93, + "learning_rate": 6.952486949978838e-06, + "loss": 0.0462, + "step": 84270 + }, + { + "epoch": 3.93, + "learning_rate": 6.951703164924052e-06, + "loss": 0.0809, + "step": 84275 + }, + { + "epoch": 3.93, + "learning_rate": 6.950919379869265e-06, + "loss": 0.0949, + "step": 84280 + }, + { + "epoch": 3.93, + "learning_rate": 6.950135594814479e-06, + "loss": 0.2274, + "step": 84285 + }, + { + "epoch": 3.93, + "learning_rate": 6.949351809759693e-06, + "loss": 0.1614, + "step": 84290 + }, + { + "epoch": 3.93, + "learning_rate": 6.948568024704906e-06, + "loss": 0.1468, + "step": 84295 + }, + { + "epoch": 3.93, + "learning_rate": 6.94778423965012e-06, + "loss": 0.0695, + "step": 84300 + }, + { + "epoch": 3.93, + "learning_rate": 6.947000454595333e-06, + "loss": 0.04, + "step": 84305 + }, + { + "epoch": 3.93, + "learning_rate": 6.946216669540545e-06, + "loss": 0.0326, + "step": 84310 + }, + { + "epoch": 3.93, + "learning_rate": 6.945432884485759e-06, + "loss": 0.4171, + "step": 84315 + }, + { + "epoch": 3.93, + "learning_rate": 6.944649099430972e-06, + "loss": 0.0765, + "step": 84320 + }, + { + "epoch": 3.93, + "learning_rate": 6.943865314376186e-06, + "loss": 0.0823, + "step": 84325 + }, + { + "epoch": 3.93, + "learning_rate": 6.9430815293214e-06, + "loss": 0.0574, + "step": 84330 + }, + { + "epoch": 3.94, + "learning_rate": 6.942297744266613e-06, + "loss": 0.1587, + "step": 84335 + }, + { + "epoch": 3.94, + "learning_rate": 6.941513959211827e-06, + "loss": 0.2067, + "step": 84340 + }, + { + "epoch": 3.94, + "learning_rate": 6.94073017415704e-06, + "loss": 0.3755, + "step": 84345 + }, + { + "epoch": 3.94, + "learning_rate": 6.939946389102254e-06, + "loss": 0.0658, + "step": 84350 + }, + { + "epoch": 3.94, + "learning_rate": 6.939162604047467e-06, + "loss": 0.0162, + "step": 84355 + }, + { + "epoch": 3.94, + "learning_rate": 6.938378818992681e-06, + "loss": 0.0154, + "step": 84360 + }, + { + "epoch": 3.94, + "learning_rate": 6.937595033937894e-06, + "loss": 0.0647, + "step": 84365 + }, + { + "epoch": 3.94, + "learning_rate": 6.936811248883108e-06, + "loss": 0.0973, + "step": 84370 + }, + { + "epoch": 3.94, + "learning_rate": 6.93602746382832e-06, + "loss": 0.073, + "step": 84375 + }, + { + "epoch": 3.94, + "learning_rate": 6.935243678773534e-06, + "loss": 0.2244, + "step": 84380 + }, + { + "epoch": 3.94, + "learning_rate": 6.934459893718747e-06, + "loss": 0.0851, + "step": 84385 + }, + { + "epoch": 3.94, + "learning_rate": 6.933676108663961e-06, + "loss": 0.2985, + "step": 84390 + }, + { + "epoch": 3.94, + "learning_rate": 6.932892323609174e-06, + "loss": 0.2953, + "step": 84395 + }, + { + "epoch": 3.94, + "learning_rate": 6.932108538554388e-06, + "loss": 0.0488, + "step": 84400 + }, + { + "epoch": 3.94, + "learning_rate": 6.931324753499601e-06, + "loss": 0.0575, + "step": 84405 + }, + { + "epoch": 3.94, + "learning_rate": 6.930540968444815e-06, + "loss": 0.0392, + "step": 84410 + }, + { + "epoch": 3.94, + "learning_rate": 6.929757183390028e-06, + "loss": 0.0602, + "step": 84415 + }, + { + "epoch": 3.94, + "learning_rate": 6.928973398335242e-06, + "loss": 0.1123, + "step": 84420 + }, + { + "epoch": 3.94, + "learning_rate": 6.928189613280455e-06, + "loss": 0.0457, + "step": 84425 + }, + { + "epoch": 3.94, + "learning_rate": 6.9274058282256685e-06, + "loss": 0.0504, + "step": 84430 + }, + { + "epoch": 3.94, + "learning_rate": 6.926622043170882e-06, + "loss": 0.104, + "step": 84435 + }, + { + "epoch": 3.94, + "learning_rate": 6.925838258116095e-06, + "loss": 0.1921, + "step": 84440 + }, + { + "epoch": 3.94, + "learning_rate": 6.925054473061308e-06, + "loss": 0.4111, + "step": 84445 + }, + { + "epoch": 3.94, + "learning_rate": 6.924270688006522e-06, + "loss": 0.0485, + "step": 84450 + }, + { + "epoch": 3.94, + "learning_rate": 6.923486902951735e-06, + "loss": 0.0229, + "step": 84455 + }, + { + "epoch": 3.94, + "learning_rate": 6.922703117896949e-06, + "loss": 0.0297, + "step": 84460 + }, + { + "epoch": 3.94, + "learning_rate": 6.921919332842162e-06, + "loss": 0.0345, + "step": 84465 + }, + { + "epoch": 3.94, + "learning_rate": 6.9211355477873756e-06, + "loss": 0.1021, + "step": 84470 + }, + { + "epoch": 3.94, + "learning_rate": 6.920351762732589e-06, + "loss": 0.1215, + "step": 84475 + }, + { + "epoch": 3.94, + "learning_rate": 6.9195679776778025e-06, + "loss": 0.1225, + "step": 84480 + }, + { + "epoch": 3.94, + "learning_rate": 6.918784192623016e-06, + "loss": 0.1005, + "step": 84485 + }, + { + "epoch": 3.94, + "learning_rate": 6.9180004075682295e-06, + "loss": 0.2317, + "step": 84490 + }, + { + "epoch": 3.94, + "learning_rate": 6.9172166225134425e-06, + "loss": 0.3476, + "step": 84495 + }, + { + "epoch": 3.94, + "learning_rate": 6.9164328374586565e-06, + "loss": 0.052, + "step": 84500 + }, + { + "epoch": 3.94, + "learning_rate": 6.915649052403869e-06, + "loss": 0.0212, + "step": 84505 + }, + { + "epoch": 3.94, + "learning_rate": 6.9148652673490826e-06, + "loss": 0.0163, + "step": 84510 + }, + { + "epoch": 3.94, + "learning_rate": 6.914081482294296e-06, + "loss": 0.0298, + "step": 84515 + }, + { + "epoch": 3.94, + "learning_rate": 6.9132976972395095e-06, + "loss": 0.0641, + "step": 84520 + }, + { + "epoch": 3.94, + "learning_rate": 6.912513912184723e-06, + "loss": 0.0839, + "step": 84525 + }, + { + "epoch": 3.94, + "learning_rate": 6.9117301271299365e-06, + "loss": 0.0815, + "step": 84530 + }, + { + "epoch": 3.94, + "learning_rate": 6.9109463420751496e-06, + "loss": 0.1028, + "step": 84535 + }, + { + "epoch": 3.94, + "learning_rate": 6.9101625570203635e-06, + "loss": 0.2123, + "step": 84540 + }, + { + "epoch": 3.94, + "learning_rate": 6.9093787719655765e-06, + "loss": 0.2505, + "step": 84545 + }, + { + "epoch": 3.95, + "learning_rate": 6.9085949869107904e-06, + "loss": 0.0627, + "step": 84550 + }, + { + "epoch": 3.95, + "learning_rate": 6.907811201856004e-06, + "loss": 0.0441, + "step": 84555 + }, + { + "epoch": 3.95, + "learning_rate": 6.907027416801217e-06, + "loss": 0.0371, + "step": 84560 + }, + { + "epoch": 3.95, + "learning_rate": 6.906243631746431e-06, + "loss": 0.0606, + "step": 84565 + }, + { + "epoch": 3.95, + "learning_rate": 6.9054598466916435e-06, + "loss": 0.0634, + "step": 84570 + }, + { + "epoch": 3.95, + "learning_rate": 6.9046760616368566e-06, + "loss": 0.0775, + "step": 84575 + }, + { + "epoch": 3.95, + "learning_rate": 6.9038922765820705e-06, + "loss": 0.067, + "step": 84580 + }, + { + "epoch": 3.95, + "learning_rate": 6.9031084915272835e-06, + "loss": 0.1218, + "step": 84585 + }, + { + "epoch": 3.95, + "learning_rate": 6.9023247064724974e-06, + "loss": 0.2002, + "step": 84590 + }, + { + "epoch": 3.95, + "learning_rate": 6.901540921417711e-06, + "loss": 0.3358, + "step": 84595 + }, + { + "epoch": 3.95, + "learning_rate": 6.900757136362924e-06, + "loss": 0.1052, + "step": 84600 + }, + { + "epoch": 3.95, + "learning_rate": 6.899973351308138e-06, + "loss": 0.0229, + "step": 84605 + }, + { + "epoch": 3.95, + "learning_rate": 6.899189566253351e-06, + "loss": 0.0396, + "step": 84610 + }, + { + "epoch": 3.95, + "learning_rate": 6.898405781198565e-06, + "loss": 0.0759, + "step": 84615 + }, + { + "epoch": 3.95, + "learning_rate": 6.897621996143778e-06, + "loss": 0.0415, + "step": 84620 + }, + { + "epoch": 3.95, + "learning_rate": 6.896838211088992e-06, + "loss": 0.0713, + "step": 84625 + }, + { + "epoch": 3.95, + "learning_rate": 6.896054426034205e-06, + "loss": 0.0515, + "step": 84630 + }, + { + "epoch": 3.95, + "learning_rate": 6.8952706409794175e-06, + "loss": 0.1614, + "step": 84635 + }, + { + "epoch": 3.95, + "learning_rate": 6.894486855924631e-06, + "loss": 0.1235, + "step": 84640 + }, + { + "epoch": 3.95, + "learning_rate": 6.893703070869845e-06, + "loss": 0.2719, + "step": 84645 + }, + { + "epoch": 3.95, + "learning_rate": 6.892919285815058e-06, + "loss": 0.0453, + "step": 84650 + }, + { + "epoch": 3.95, + "learning_rate": 6.892135500760272e-06, + "loss": 0.0359, + "step": 84655 + }, + { + "epoch": 3.95, + "learning_rate": 6.891351715705485e-06, + "loss": 0.0588, + "step": 84660 + }, + { + "epoch": 3.95, + "learning_rate": 6.890567930650699e-06, + "loss": 0.0396, + "step": 84665 + }, + { + "epoch": 3.95, + "learning_rate": 6.889784145595912e-06, + "loss": 0.0536, + "step": 84670 + }, + { + "epoch": 3.95, + "learning_rate": 6.889000360541126e-06, + "loss": 0.0496, + "step": 84675 + }, + { + "epoch": 3.95, + "learning_rate": 6.888216575486339e-06, + "loss": 0.1221, + "step": 84680 + }, + { + "epoch": 3.95, + "learning_rate": 6.887432790431553e-06, + "loss": 0.1342, + "step": 84685 + }, + { + "epoch": 3.95, + "learning_rate": 6.886649005376766e-06, + "loss": 0.2477, + "step": 84690 + }, + { + "epoch": 3.95, + "learning_rate": 6.88586522032198e-06, + "loss": 0.2642, + "step": 84695 + }, + { + "epoch": 3.95, + "learning_rate": 6.885081435267192e-06, + "loss": 0.0678, + "step": 84700 + }, + { + "epoch": 3.95, + "learning_rate": 6.884297650212406e-06, + "loss": 0.0121, + "step": 84705 + }, + { + "epoch": 3.95, + "learning_rate": 6.883513865157619e-06, + "loss": 0.0479, + "step": 84710 + }, + { + "epoch": 3.95, + "learning_rate": 6.882730080102833e-06, + "loss": 0.0348, + "step": 84715 + }, + { + "epoch": 3.95, + "learning_rate": 6.881946295048046e-06, + "loss": 0.0813, + "step": 84720 + }, + { + "epoch": 3.95, + "learning_rate": 6.88116250999326e-06, + "loss": 0.0826, + "step": 84725 + }, + { + "epoch": 3.95, + "learning_rate": 6.880378724938473e-06, + "loss": 0.0925, + "step": 84730 + }, + { + "epoch": 3.95, + "learning_rate": 6.879594939883687e-06, + "loss": 0.1139, + "step": 84735 + }, + { + "epoch": 3.95, + "learning_rate": 6.8788111548289e-06, + "loss": 0.1557, + "step": 84740 + }, + { + "epoch": 3.95, + "learning_rate": 6.878027369774114e-06, + "loss": 0.2379, + "step": 84745 + }, + { + "epoch": 3.95, + "learning_rate": 6.877243584719327e-06, + "loss": 0.1039, + "step": 84750 + }, + { + "epoch": 3.95, + "learning_rate": 6.876459799664541e-06, + "loss": 0.0495, + "step": 84755 + }, + { + "epoch": 3.96, + "learning_rate": 6.875676014609754e-06, + "loss": 0.0733, + "step": 84760 + }, + { + "epoch": 3.96, + "learning_rate": 6.874892229554967e-06, + "loss": 0.1012, + "step": 84765 + }, + { + "epoch": 3.96, + "learning_rate": 6.87410844450018e-06, + "loss": 0.1164, + "step": 84770 + }, + { + "epoch": 3.96, + "learning_rate": 6.873324659445394e-06, + "loss": 0.098, + "step": 84775 + }, + { + "epoch": 3.96, + "learning_rate": 6.872540874390607e-06, + "loss": 0.1502, + "step": 84780 + }, + { + "epoch": 3.96, + "learning_rate": 6.871757089335821e-06, + "loss": 0.159, + "step": 84785 + }, + { + "epoch": 3.96, + "learning_rate": 6.870973304281034e-06, + "loss": 0.1656, + "step": 84790 + }, + { + "epoch": 3.96, + "learning_rate": 6.870189519226248e-06, + "loss": 0.2007, + "step": 84795 + }, + { + "epoch": 3.96, + "learning_rate": 6.869405734171461e-06, + "loss": 0.0573, + "step": 84800 + }, + { + "epoch": 3.96, + "learning_rate": 6.868621949116675e-06, + "loss": 0.0321, + "step": 84805 + }, + { + "epoch": 3.96, + "learning_rate": 6.867838164061888e-06, + "loss": 0.0407, + "step": 84810 + }, + { + "epoch": 3.96, + "learning_rate": 6.867054379007102e-06, + "loss": 0.0898, + "step": 84815 + }, + { + "epoch": 3.96, + "learning_rate": 6.866270593952316e-06, + "loss": 0.0688, + "step": 84820 + }, + { + "epoch": 3.96, + "learning_rate": 6.865486808897529e-06, + "loss": 0.0849, + "step": 84825 + }, + { + "epoch": 3.96, + "learning_rate": 6.864703023842741e-06, + "loss": 0.055, + "step": 84830 + }, + { + "epoch": 3.96, + "learning_rate": 6.863919238787955e-06, + "loss": 0.1568, + "step": 84835 + }, + { + "epoch": 3.96, + "learning_rate": 6.863135453733168e-06, + "loss": 0.1795, + "step": 84840 + }, + { + "epoch": 3.96, + "learning_rate": 6.862351668678382e-06, + "loss": 0.2184, + "step": 84845 + }, + { + "epoch": 3.96, + "learning_rate": 6.861567883623595e-06, + "loss": 0.0367, + "step": 84850 + }, + { + "epoch": 3.96, + "learning_rate": 6.860784098568809e-06, + "loss": 0.0136, + "step": 84855 + }, + { + "epoch": 3.96, + "learning_rate": 6.860000313514022e-06, + "loss": 0.05, + "step": 84860 + }, + { + "epoch": 3.96, + "learning_rate": 6.859216528459236e-06, + "loss": 0.0288, + "step": 84865 + }, + { + "epoch": 3.96, + "learning_rate": 6.85843274340445e-06, + "loss": 0.0863, + "step": 84870 + }, + { + "epoch": 3.96, + "learning_rate": 6.857648958349663e-06, + "loss": 0.0304, + "step": 84875 + }, + { + "epoch": 3.96, + "learning_rate": 6.856865173294877e-06, + "loss": 0.1366, + "step": 84880 + }, + { + "epoch": 3.96, + "learning_rate": 6.85608138824009e-06, + "loss": 0.1891, + "step": 84885 + }, + { + "epoch": 3.96, + "learning_rate": 6.855297603185304e-06, + "loss": 0.2073, + "step": 84890 + }, + { + "epoch": 3.96, + "learning_rate": 6.854513818130516e-06, + "loss": 0.3622, + "step": 84895 + }, + { + "epoch": 3.96, + "learning_rate": 6.853730033075729e-06, + "loss": 0.0635, + "step": 84900 + }, + { + "epoch": 3.96, + "learning_rate": 6.852946248020943e-06, + "loss": 0.005, + "step": 84905 + }, + { + "epoch": 3.96, + "learning_rate": 6.852162462966157e-06, + "loss": 0.0065, + "step": 84910 + }, + { + "epoch": 3.96, + "learning_rate": 6.85137867791137e-06, + "loss": 0.0524, + "step": 84915 + }, + { + "epoch": 3.96, + "learning_rate": 6.850594892856584e-06, + "loss": 0.0971, + "step": 84920 + }, + { + "epoch": 3.96, + "learning_rate": 6.849811107801797e-06, + "loss": 0.0614, + "step": 84925 + }, + { + "epoch": 3.96, + "learning_rate": 6.849027322747011e-06, + "loss": 0.1487, + "step": 84930 + }, + { + "epoch": 3.96, + "learning_rate": 6.848243537692224e-06, + "loss": 0.146, + "step": 84935 + }, + { + "epoch": 3.96, + "learning_rate": 6.847459752637438e-06, + "loss": 0.2943, + "step": 84940 + }, + { + "epoch": 3.96, + "learning_rate": 6.846675967582651e-06, + "loss": 0.3352, + "step": 84945 + }, + { + "epoch": 3.96, + "learning_rate": 6.845892182527865e-06, + "loss": 0.034, + "step": 84950 + }, + { + "epoch": 3.96, + "learning_rate": 6.845108397473078e-06, + "loss": 0.0656, + "step": 84955 + }, + { + "epoch": 3.96, + "learning_rate": 6.844324612418291e-06, + "loss": 0.0307, + "step": 84960 + }, + { + "epoch": 3.96, + "learning_rate": 6.843540827363504e-06, + "loss": 0.0868, + "step": 84965 + }, + { + "epoch": 3.96, + "learning_rate": 6.842757042308718e-06, + "loss": 0.0561, + "step": 84970 + }, + { + "epoch": 3.97, + "learning_rate": 6.841973257253931e-06, + "loss": 0.1256, + "step": 84975 + }, + { + "epoch": 3.97, + "learning_rate": 6.841189472199145e-06, + "loss": 0.0754, + "step": 84980 + }, + { + "epoch": 3.97, + "learning_rate": 6.840405687144358e-06, + "loss": 0.1401, + "step": 84985 + }, + { + "epoch": 3.97, + "learning_rate": 6.839621902089572e-06, + "loss": 0.135, + "step": 84990 + }, + { + "epoch": 3.97, + "learning_rate": 6.838838117034785e-06, + "loss": 0.2946, + "step": 84995 + }, + { + "epoch": 3.97, + "learning_rate": 6.838054331979999e-06, + "loss": 0.0858, + "step": 85000 + }, + { + "epoch": 3.97, + "learning_rate": 6.837270546925212e-06, + "loss": 0.0219, + "step": 85005 + }, + { + "epoch": 3.97, + "learning_rate": 6.836486761870426e-06, + "loss": 0.026, + "step": 85010 + }, + { + "epoch": 3.97, + "learning_rate": 6.835702976815639e-06, + "loss": 0.0867, + "step": 85015 + }, + { + "epoch": 3.97, + "learning_rate": 6.834919191760853e-06, + "loss": 0.045, + "step": 85020 + }, + { + "epoch": 3.97, + "learning_rate": 6.834135406706065e-06, + "loss": 0.0908, + "step": 85025 + }, + { + "epoch": 3.97, + "learning_rate": 6.833351621651279e-06, + "loss": 0.2008, + "step": 85030 + }, + { + "epoch": 3.97, + "learning_rate": 6.832567836596492e-06, + "loss": 0.1749, + "step": 85035 + }, + { + "epoch": 3.97, + "learning_rate": 6.831784051541706e-06, + "loss": 0.1238, + "step": 85040 + }, + { + "epoch": 3.97, + "learning_rate": 6.831000266486919e-06, + "loss": 0.284, + "step": 85045 + }, + { + "epoch": 3.97, + "learning_rate": 6.830216481432133e-06, + "loss": 0.1071, + "step": 85050 + }, + { + "epoch": 3.97, + "learning_rate": 6.829432696377346e-06, + "loss": 0.0221, + "step": 85055 + }, + { + "epoch": 3.97, + "learning_rate": 6.82864891132256e-06, + "loss": 0.0341, + "step": 85060 + }, + { + "epoch": 3.97, + "learning_rate": 6.827865126267773e-06, + "loss": 0.0256, + "step": 85065 + }, + { + "epoch": 3.97, + "learning_rate": 6.827081341212987e-06, + "loss": 0.0422, + "step": 85070 + }, + { + "epoch": 3.97, + "learning_rate": 6.8262975561582e-06, + "loss": 0.0906, + "step": 85075 + }, + { + "epoch": 3.97, + "learning_rate": 6.825513771103414e-06, + "loss": 0.1339, + "step": 85080 + }, + { + "epoch": 3.97, + "learning_rate": 6.8247299860486275e-06, + "loss": 0.1046, + "step": 85085 + }, + { + "epoch": 3.97, + "learning_rate": 6.82394620099384e-06, + "loss": 0.1263, + "step": 85090 + }, + { + "epoch": 3.97, + "learning_rate": 6.823162415939053e-06, + "loss": 0.2744, + "step": 85095 + }, + { + "epoch": 3.97, + "learning_rate": 6.822378630884267e-06, + "loss": 0.0645, + "step": 85100 + }, + { + "epoch": 3.97, + "learning_rate": 6.82159484582948e-06, + "loss": 0.016, + "step": 85105 + }, + { + "epoch": 3.97, + "learning_rate": 6.820811060774694e-06, + "loss": 0.0759, + "step": 85110 + }, + { + "epoch": 3.97, + "learning_rate": 6.820027275719907e-06, + "loss": 0.0534, + "step": 85115 + }, + { + "epoch": 3.97, + "learning_rate": 6.819243490665121e-06, + "loss": 0.0791, + "step": 85120 + }, + { + "epoch": 3.97, + "learning_rate": 6.818459705610334e-06, + "loss": 0.1052, + "step": 85125 + }, + { + "epoch": 3.97, + "learning_rate": 6.817675920555548e-06, + "loss": 0.0868, + "step": 85130 + }, + { + "epoch": 3.97, + "learning_rate": 6.8168921355007615e-06, + "loss": 0.0865, + "step": 85135 + }, + { + "epoch": 3.97, + "learning_rate": 6.8161083504459746e-06, + "loss": 0.2504, + "step": 85140 + }, + { + "epoch": 3.97, + "learning_rate": 6.8153245653911885e-06, + "loss": 0.2567, + "step": 85145 + }, + { + "epoch": 3.97, + "learning_rate": 6.8145407803364015e-06, + "loss": 0.0648, + "step": 85150 + }, + { + "epoch": 3.97, + "learning_rate": 6.813756995281614e-06, + "loss": 0.0277, + "step": 85155 + }, + { + "epoch": 3.97, + "learning_rate": 6.812973210226828e-06, + "loss": 0.0593, + "step": 85160 + }, + { + "epoch": 3.97, + "learning_rate": 6.812189425172041e-06, + "loss": 0.1007, + "step": 85165 + }, + { + "epoch": 3.97, + "learning_rate": 6.811405640117255e-06, + "loss": 0.1356, + "step": 85170 + }, + { + "epoch": 3.97, + "learning_rate": 6.8106218550624685e-06, + "loss": 0.0813, + "step": 85175 + }, + { + "epoch": 3.97, + "learning_rate": 6.8098380700076816e-06, + "loss": 0.1681, + "step": 85180 + }, + { + "epoch": 3.97, + "learning_rate": 6.8090542849528955e-06, + "loss": 0.1017, + "step": 85185 + }, + { + "epoch": 3.98, + "learning_rate": 6.8082704998981085e-06, + "loss": 0.1854, + "step": 85190 + }, + { + "epoch": 3.98, + "learning_rate": 6.8074867148433224e-06, + "loss": 0.3727, + "step": 85195 + }, + { + "epoch": 3.98, + "learning_rate": 6.8067029297885355e-06, + "loss": 0.0719, + "step": 85200 + }, + { + "epoch": 3.98, + "learning_rate": 6.805919144733749e-06, + "loss": 0.0192, + "step": 85205 + }, + { + "epoch": 3.98, + "learning_rate": 6.8051353596789625e-06, + "loss": 0.0716, + "step": 85210 + }, + { + "epoch": 3.98, + "learning_rate": 6.804351574624176e-06, + "loss": 0.0414, + "step": 85215 + }, + { + "epoch": 3.98, + "learning_rate": 6.803567789569389e-06, + "loss": 0.0351, + "step": 85220 + }, + { + "epoch": 3.98, + "learning_rate": 6.8027840045146025e-06, + "loss": 0.075, + "step": 85225 + }, + { + "epoch": 3.98, + "learning_rate": 6.8020002194598155e-06, + "loss": 0.2622, + "step": 85230 + }, + { + "epoch": 3.98, + "learning_rate": 6.8012164344050294e-06, + "loss": 0.0557, + "step": 85235 + }, + { + "epoch": 3.98, + "learning_rate": 6.8004326493502425e-06, + "loss": 0.1608, + "step": 85240 + }, + { + "epoch": 3.98, + "learning_rate": 6.799648864295456e-06, + "loss": 0.1829, + "step": 85245 + }, + { + "epoch": 3.98, + "learning_rate": 6.7988650792406695e-06, + "loss": 0.1448, + "step": 85250 + }, + { + "epoch": 3.98, + "learning_rate": 6.798081294185883e-06, + "loss": 0.0546, + "step": 85255 + }, + { + "epoch": 3.98, + "learning_rate": 6.7972975091310964e-06, + "loss": 0.032, + "step": 85260 + }, + { + "epoch": 3.98, + "learning_rate": 6.79651372407631e-06, + "loss": 0.0511, + "step": 85265 + }, + { + "epoch": 3.98, + "learning_rate": 6.795729939021523e-06, + "loss": 0.0677, + "step": 85270 + }, + { + "epoch": 3.98, + "learning_rate": 6.794946153966737e-06, + "loss": 0.0947, + "step": 85275 + }, + { + "epoch": 3.98, + "learning_rate": 6.79416236891195e-06, + "loss": 0.1098, + "step": 85280 + }, + { + "epoch": 3.98, + "learning_rate": 6.7933785838571634e-06, + "loss": 0.1172, + "step": 85285 + }, + { + "epoch": 3.98, + "learning_rate": 6.7925947988023765e-06, + "loss": 0.1582, + "step": 85290 + }, + { + "epoch": 3.98, + "learning_rate": 6.79181101374759e-06, + "loss": 0.2443, + "step": 85295 + }, + { + "epoch": 3.98, + "learning_rate": 6.7910272286928034e-06, + "loss": 0.0805, + "step": 85300 + }, + { + "epoch": 3.98, + "learning_rate": 6.790243443638017e-06, + "loss": 0.0288, + "step": 85305 + }, + { + "epoch": 3.98, + "learning_rate": 6.78945965858323e-06, + "loss": 0.083, + "step": 85310 + }, + { + "epoch": 3.98, + "learning_rate": 6.788675873528444e-06, + "loss": 0.0465, + "step": 85315 + }, + { + "epoch": 3.98, + "learning_rate": 6.787892088473657e-06, + "loss": 0.048, + "step": 85320 + }, + { + "epoch": 3.98, + "learning_rate": 6.787108303418871e-06, + "loss": 0.0728, + "step": 85325 + }, + { + "epoch": 3.98, + "learning_rate": 6.786324518364084e-06, + "loss": 0.1453, + "step": 85330 + }, + { + "epoch": 3.98, + "learning_rate": 6.785540733309298e-06, + "loss": 0.2035, + "step": 85335 + }, + { + "epoch": 3.98, + "learning_rate": 6.784756948254511e-06, + "loss": 0.2733, + "step": 85340 + }, + { + "epoch": 3.98, + "learning_rate": 6.783973163199725e-06, + "loss": 0.1481, + "step": 85345 + }, + { + "epoch": 3.98, + "learning_rate": 6.783189378144937e-06, + "loss": 0.0662, + "step": 85350 + }, + { + "epoch": 3.98, + "learning_rate": 6.782405593090151e-06, + "loss": 0.0241, + "step": 85355 + }, + { + "epoch": 3.98, + "learning_rate": 6.781621808035364e-06, + "loss": 0.0078, + "step": 85360 + }, + { + "epoch": 3.98, + "learning_rate": 6.780838022980578e-06, + "loss": 0.0335, + "step": 85365 + }, + { + "epoch": 3.98, + "learning_rate": 6.780054237925791e-06, + "loss": 0.0504, + "step": 85370 + }, + { + "epoch": 3.98, + "learning_rate": 6.779270452871005e-06, + "loss": 0.0439, + "step": 85375 + }, + { + "epoch": 3.98, + "learning_rate": 6.778486667816218e-06, + "loss": 0.0861, + "step": 85380 + }, + { + "epoch": 3.98, + "learning_rate": 6.777702882761432e-06, + "loss": 0.1036, + "step": 85385 + }, + { + "epoch": 3.98, + "learning_rate": 6.776919097706645e-06, + "loss": 0.0982, + "step": 85390 + }, + { + "epoch": 3.98, + "learning_rate": 6.776135312651859e-06, + "loss": 0.4021, + "step": 85395 + }, + { + "epoch": 3.98, + "learning_rate": 6.775351527597073e-06, + "loss": 0.0563, + "step": 85400 + }, + { + "epoch": 3.99, + "learning_rate": 6.774567742542286e-06, + "loss": 0.0224, + "step": 85405 + }, + { + "epoch": 3.99, + "learning_rate": 6.7737839574875e-06, + "loss": 0.0658, + "step": 85410 + }, + { + "epoch": 3.99, + "learning_rate": 6.773000172432712e-06, + "loss": 0.0374, + "step": 85415 + }, + { + "epoch": 3.99, + "learning_rate": 6.772216387377925e-06, + "loss": 0.0689, + "step": 85420 + }, + { + "epoch": 3.99, + "learning_rate": 6.771432602323139e-06, + "loss": 0.0196, + "step": 85425 + }, + { + "epoch": 3.99, + "learning_rate": 6.770648817268352e-06, + "loss": 0.0726, + "step": 85430 + }, + { + "epoch": 3.99, + "learning_rate": 6.769865032213566e-06, + "loss": 0.1081, + "step": 85435 + }, + { + "epoch": 3.99, + "learning_rate": 6.769081247158779e-06, + "loss": 0.23, + "step": 85440 + }, + { + "epoch": 3.99, + "learning_rate": 6.768297462103993e-06, + "loss": 0.1952, + "step": 85445 + }, + { + "epoch": 3.99, + "learning_rate": 6.767513677049207e-06, + "loss": 0.0612, + "step": 85450 + }, + { + "epoch": 3.99, + "learning_rate": 6.76672989199442e-06, + "loss": 0.0818, + "step": 85455 + }, + { + "epoch": 3.99, + "learning_rate": 6.765946106939634e-06, + "loss": 0.0122, + "step": 85460 + }, + { + "epoch": 3.99, + "learning_rate": 6.765162321884847e-06, + "loss": 0.0459, + "step": 85465 + }, + { + "epoch": 3.99, + "learning_rate": 6.764378536830061e-06, + "loss": 0.0668, + "step": 85470 + }, + { + "epoch": 3.99, + "learning_rate": 6.763594751775274e-06, + "loss": 0.0418, + "step": 85475 + }, + { + "epoch": 3.99, + "learning_rate": 6.762810966720486e-06, + "loss": 0.0965, + "step": 85480 + }, + { + "epoch": 3.99, + "learning_rate": 6.7620271816657e-06, + "loss": 0.1879, + "step": 85485 + }, + { + "epoch": 3.99, + "learning_rate": 6.761243396610914e-06, + "loss": 0.2135, + "step": 85490 + }, + { + "epoch": 3.99, + "learning_rate": 6.760459611556127e-06, + "loss": 0.1978, + "step": 85495 + }, + { + "epoch": 3.99, + "learning_rate": 6.759675826501341e-06, + "loss": 0.111, + "step": 85500 + }, + { + "epoch": 3.99, + "learning_rate": 6.758892041446554e-06, + "loss": 0.0223, + "step": 85505 + }, + { + "epoch": 3.99, + "learning_rate": 6.758108256391768e-06, + "loss": 0.0205, + "step": 85510 + }, + { + "epoch": 3.99, + "learning_rate": 6.757324471336981e-06, + "loss": 0.0967, + "step": 85515 + }, + { + "epoch": 3.99, + "learning_rate": 6.756540686282195e-06, + "loss": 0.0362, + "step": 85520 + }, + { + "epoch": 3.99, + "learning_rate": 6.755756901227408e-06, + "loss": 0.1377, + "step": 85525 + }, + { + "epoch": 3.99, + "learning_rate": 6.754973116172622e-06, + "loss": 0.0516, + "step": 85530 + }, + { + "epoch": 3.99, + "learning_rate": 6.754189331117835e-06, + "loss": 0.1818, + "step": 85535 + }, + { + "epoch": 3.99, + "learning_rate": 6.753405546063049e-06, + "loss": 0.1526, + "step": 85540 + }, + { + "epoch": 3.99, + "learning_rate": 6.752621761008261e-06, + "loss": 0.3196, + "step": 85545 + }, + { + "epoch": 3.99, + "learning_rate": 6.751837975953475e-06, + "loss": 0.0523, + "step": 85550 + }, + { + "epoch": 3.99, + "learning_rate": 6.751054190898688e-06, + "loss": 0.0336, + "step": 85555 + }, + { + "epoch": 3.99, + "learning_rate": 6.750270405843902e-06, + "loss": 0.0413, + "step": 85560 + }, + { + "epoch": 3.99, + "learning_rate": 6.749486620789115e-06, + "loss": 0.1544, + "step": 85565 + }, + { + "epoch": 3.99, + "learning_rate": 6.748702835734329e-06, + "loss": 0.0657, + "step": 85570 + }, + { + "epoch": 3.99, + "learning_rate": 6.747919050679542e-06, + "loss": 0.0705, + "step": 85575 + }, + { + "epoch": 3.99, + "learning_rate": 6.747135265624756e-06, + "loss": 0.1632, + "step": 85580 + }, + { + "epoch": 3.99, + "learning_rate": 6.746351480569969e-06, + "loss": 0.1428, + "step": 85585 + }, + { + "epoch": 3.99, + "learning_rate": 6.745567695515183e-06, + "loss": 0.1539, + "step": 85590 + }, + { + "epoch": 3.99, + "learning_rate": 6.744783910460396e-06, + "loss": 0.2851, + "step": 85595 + }, + { + "epoch": 3.99, + "learning_rate": 6.74400012540561e-06, + "loss": 0.1251, + "step": 85600 + }, + { + "epoch": 3.99, + "learning_rate": 6.743216340350823e-06, + "loss": 0.0252, + "step": 85605 + }, + { + "epoch": 3.99, + "learning_rate": 6.742432555296036e-06, + "loss": 0.0494, + "step": 85610 + }, + { + "epoch": 3.99, + "learning_rate": 6.741648770241249e-06, + "loss": 0.0426, + "step": 85615 + }, + { + "epoch": 4.0, + "learning_rate": 6.740864985186463e-06, + "loss": 0.1323, + "step": 85620 + }, + { + "epoch": 4.0, + "learning_rate": 6.740081200131676e-06, + "loss": 0.1494, + "step": 85625 + }, + { + "epoch": 4.0, + "learning_rate": 6.73929741507689e-06, + "loss": 0.1759, + "step": 85630 + }, + { + "epoch": 4.0, + "learning_rate": 6.738513630022103e-06, + "loss": 0.1965, + "step": 85635 + }, + { + "epoch": 4.0, + "learning_rate": 6.737886601978274e-06, + "loss": 0.2621, + "step": 85640 + }, + { + "epoch": 4.0, + "learning_rate": 6.737102816923487e-06, + "loss": 0.3035, + "step": 85645 + }, + { + "epoch": 4.0, + "learning_rate": 6.736319031868701e-06, + "loss": 0.0709, + "step": 85650 + }, + { + "epoch": 4.0, + "learning_rate": 6.735535246813914e-06, + "loss": 0.0263, + "step": 85655 + }, + { + "epoch": 4.0, + "learning_rate": 6.734751461759128e-06, + "loss": 0.0484, + "step": 85660 + }, + { + "epoch": 4.0, + "learning_rate": 6.733967676704341e-06, + "loss": 0.0793, + "step": 85665 + }, + { + "epoch": 4.0, + "learning_rate": 6.733183891649555e-06, + "loss": 0.0728, + "step": 85670 + }, + { + "epoch": 4.0, + "learning_rate": 6.732400106594767e-06, + "loss": 0.0888, + "step": 85675 + }, + { + "epoch": 4.0, + "learning_rate": 6.731616321539981e-06, + "loss": 0.0878, + "step": 85680 + }, + { + "epoch": 4.0, + "learning_rate": 6.730832536485194e-06, + "loss": 0.2161, + "step": 85685 + }, + { + "epoch": 4.0, + "learning_rate": 6.730048751430408e-06, + "loss": 0.1678, + "step": 85690 + }, + { + "epoch": 4.0, + "learning_rate": 6.729264966375621e-06, + "loss": 0.2515, + "step": 85695 + }, + { + "epoch": 4.0, + "learning_rate": 6.728481181320835e-06, + "loss": 0.0519, + "step": 85700 + }, + { + "epoch": 4.0, + "learning_rate": 6.727697396266048e-06, + "loss": 0.035, + "step": 85705 + }, + { + "epoch": 4.0, + "learning_rate": 6.726913611211262e-06, + "loss": 0.0771, + "step": 85710 + }, + { + "epoch": 4.0, + "learning_rate": 6.726129826156475e-06, + "loss": 0.0916, + "step": 85715 + }, + { + "epoch": 4.0, + "learning_rate": 6.725346041101689e-06, + "loss": 0.163, + "step": 85720 + }, + { + "epoch": 4.0, + "eval_cer": 0.01181715224228095, + "eval_loss": 0.08370912075042725, + "eval_runtime": 455.3512, + "eval_samples_per_second": 41.836, + "eval_steps_per_second": 5.231, + "eval_wer": 0.10025906735751296, + "step": 85724 + }, + { + "epoch": 4.0, + "learning_rate": 6.724562256046903e-06, + "loss": 0.3869, + "step": 85725 + }, + { + "epoch": 4.0, + "learning_rate": 6.723778470992116e-06, + "loss": 0.1034, + "step": 85730 + }, + { + "epoch": 4.0, + "learning_rate": 6.72299468593733e-06, + "loss": 0.0496, + "step": 85735 + }, + { + "epoch": 4.0, + "learning_rate": 6.722210900882542e-06, + "loss": 0.0717, + "step": 85740 + }, + { + "epoch": 4.0, + "learning_rate": 6.721427115827755e-06, + "loss": 0.0621, + "step": 85745 + }, + { + "epoch": 4.0, + "learning_rate": 6.720643330772969e-06, + "loss": 0.0433, + "step": 85750 + }, + { + "epoch": 4.0, + "learning_rate": 6.719859545718182e-06, + "loss": 0.0813, + "step": 85755 + }, + { + "epoch": 4.0, + "learning_rate": 6.719075760663396e-06, + "loss": 0.0747, + "step": 85760 + }, + { + "epoch": 4.0, + "learning_rate": 6.718291975608609e-06, + "loss": 0.1336, + "step": 85765 + }, + { + "epoch": 4.0, + "learning_rate": 6.717508190553823e-06, + "loss": 0.3293, + "step": 85770 + }, + { + "epoch": 4.0, + "learning_rate": 6.716724405499037e-06, + "loss": 0.2614, + "step": 85775 + }, + { + "epoch": 4.0, + "learning_rate": 6.71594062044425e-06, + "loss": 0.1055, + "step": 85780 + }, + { + "epoch": 4.0, + "learning_rate": 6.715156835389464e-06, + "loss": 0.0226, + "step": 85785 + }, + { + "epoch": 4.0, + "learning_rate": 6.714373050334677e-06, + "loss": 0.0676, + "step": 85790 + }, + { + "epoch": 4.0, + "learning_rate": 6.713589265279891e-06, + "loss": 0.034, + "step": 85795 + }, + { + "epoch": 4.0, + "learning_rate": 6.712805480225104e-06, + "loss": 0.0403, + "step": 85800 + }, + { + "epoch": 4.0, + "learning_rate": 6.712021695170316e-06, + "loss": 0.0654, + "step": 85805 + }, + { + "epoch": 4.0, + "learning_rate": 6.71123791011553e-06, + "loss": 0.1075, + "step": 85810 + }, + { + "epoch": 4.0, + "learning_rate": 6.710454125060743e-06, + "loss": 0.0859, + "step": 85815 + }, + { + "epoch": 4.0, + "learning_rate": 6.709670340005957e-06, + "loss": 0.1162, + "step": 85820 + }, + { + "epoch": 4.0, + "learning_rate": 6.708886554951171e-06, + "loss": 0.2354, + "step": 85825 + }, + { + "epoch": 4.0, + "learning_rate": 6.708102769896384e-06, + "loss": 0.0505, + "step": 85830 + }, + { + "epoch": 4.01, + "learning_rate": 6.707318984841598e-06, + "loss": 0.0303, + "step": 85835 + }, + { + "epoch": 4.01, + "learning_rate": 6.706535199786811e-06, + "loss": 0.0591, + "step": 85840 + }, + { + "epoch": 4.01, + "learning_rate": 6.705751414732025e-06, + "loss": 0.0535, + "step": 85845 + }, + { + "epoch": 4.01, + "learning_rate": 6.704967629677238e-06, + "loss": 0.0453, + "step": 85850 + }, + { + "epoch": 4.01, + "learning_rate": 6.704183844622452e-06, + "loss": 0.0624, + "step": 85855 + }, + { + "epoch": 4.01, + "learning_rate": 6.703400059567665e-06, + "loss": 0.1451, + "step": 85860 + }, + { + "epoch": 4.01, + "learning_rate": 6.702616274512879e-06, + "loss": 0.0887, + "step": 85865 + }, + { + "epoch": 4.01, + "learning_rate": 6.701832489458091e-06, + "loss": 0.1117, + "step": 85870 + }, + { + "epoch": 4.01, + "learning_rate": 6.701048704403305e-06, + "loss": 0.2585, + "step": 85875 + }, + { + "epoch": 4.01, + "learning_rate": 6.700264919348518e-06, + "loss": 0.1133, + "step": 85880 + }, + { + "epoch": 4.01, + "learning_rate": 6.699481134293732e-06, + "loss": 0.0398, + "step": 85885 + }, + { + "epoch": 4.01, + "learning_rate": 6.698697349238945e-06, + "loss": 0.0391, + "step": 85890 + }, + { + "epoch": 4.01, + "learning_rate": 6.697913564184159e-06, + "loss": 0.0312, + "step": 85895 + }, + { + "epoch": 4.01, + "learning_rate": 6.697129779129372e-06, + "loss": 0.0566, + "step": 85900 + }, + { + "epoch": 4.01, + "learning_rate": 6.696345994074586e-06, + "loss": 0.1049, + "step": 85905 + }, + { + "epoch": 4.01, + "learning_rate": 6.695562209019799e-06, + "loss": 0.0799, + "step": 85910 + }, + { + "epoch": 4.01, + "learning_rate": 6.694778423965013e-06, + "loss": 0.1532, + "step": 85915 + }, + { + "epoch": 4.01, + "learning_rate": 6.693994638910226e-06, + "loss": 0.2829, + "step": 85920 + }, + { + "epoch": 4.01, + "learning_rate": 6.69321085385544e-06, + "loss": 0.3848, + "step": 85925 + }, + { + "epoch": 4.01, + "learning_rate": 6.692427068800653e-06, + "loss": 0.1048, + "step": 85930 + }, + { + "epoch": 4.01, + "learning_rate": 6.691643283745866e-06, + "loss": 0.0416, + "step": 85935 + }, + { + "epoch": 4.01, + "learning_rate": 6.690859498691079e-06, + "loss": 0.0483, + "step": 85940 + }, + { + "epoch": 4.01, + "learning_rate": 6.690075713636293e-06, + "loss": 0.0257, + "step": 85945 + }, + { + "epoch": 4.01, + "learning_rate": 6.689291928581506e-06, + "loss": 0.0497, + "step": 85950 + }, + { + "epoch": 4.01, + "learning_rate": 6.68850814352672e-06, + "loss": 0.1049, + "step": 85955 + }, + { + "epoch": 4.01, + "learning_rate": 6.687724358471933e-06, + "loss": 0.0786, + "step": 85960 + }, + { + "epoch": 4.01, + "learning_rate": 6.686940573417147e-06, + "loss": 0.244, + "step": 85965 + }, + { + "epoch": 4.01, + "learning_rate": 6.68615678836236e-06, + "loss": 0.1713, + "step": 85970 + }, + { + "epoch": 4.01, + "learning_rate": 6.685373003307574e-06, + "loss": 0.3134, + "step": 85975 + }, + { + "epoch": 4.01, + "learning_rate": 6.684589218252787e-06, + "loss": 0.0779, + "step": 85980 + }, + { + "epoch": 4.01, + "learning_rate": 6.683805433198001e-06, + "loss": 0.0033, + "step": 85985 + }, + { + "epoch": 4.01, + "learning_rate": 6.683021648143215e-06, + "loss": 0.0437, + "step": 85990 + }, + { + "epoch": 4.01, + "learning_rate": 6.682237863088428e-06, + "loss": 0.0269, + "step": 85995 + }, + { + "epoch": 4.01, + "learning_rate": 6.68145407803364e-06, + "loss": 0.0517, + "step": 86000 + }, + { + "epoch": 4.01, + "learning_rate": 6.680670292978854e-06, + "loss": 0.0475, + "step": 86005 + }, + { + "epoch": 4.01, + "learning_rate": 6.679886507924067e-06, + "loss": 0.1424, + "step": 86010 + }, + { + "epoch": 4.01, + "learning_rate": 6.679102722869281e-06, + "loss": 0.1523, + "step": 86015 + }, + { + "epoch": 4.01, + "learning_rate": 6.678318937814494e-06, + "loss": 0.1225, + "step": 86020 + }, + { + "epoch": 4.01, + "learning_rate": 6.677535152759708e-06, + "loss": 0.3258, + "step": 86025 + }, + { + "epoch": 4.01, + "learning_rate": 6.676751367704921e-06, + "loss": 0.0924, + "step": 86030 + }, + { + "epoch": 4.01, + "learning_rate": 6.675967582650135e-06, + "loss": 0.008, + "step": 86035 + }, + { + "epoch": 4.01, + "learning_rate": 6.675183797595349e-06, + "loss": 0.0142, + "step": 86040 + }, + { + "epoch": 4.01, + "learning_rate": 6.674400012540562e-06, + "loss": 0.0669, + "step": 86045 + }, + { + "epoch": 4.02, + "learning_rate": 6.6736162274857756e-06, + "loss": 0.0549, + "step": 86050 + }, + { + "epoch": 4.02, + "learning_rate": 6.672832442430989e-06, + "loss": 0.1676, + "step": 86055 + }, + { + "epoch": 4.02, + "learning_rate": 6.6720486573762025e-06, + "loss": 0.1218, + "step": 86060 + }, + { + "epoch": 4.02, + "learning_rate": 6.671264872321415e-06, + "loss": 0.1531, + "step": 86065 + }, + { + "epoch": 4.02, + "learning_rate": 6.670481087266628e-06, + "loss": 0.2372, + "step": 86070 + }, + { + "epoch": 4.02, + "learning_rate": 6.669697302211842e-06, + "loss": 0.201, + "step": 86075 + }, + { + "epoch": 4.02, + "learning_rate": 6.668913517157055e-06, + "loss": 0.0905, + "step": 86080 + }, + { + "epoch": 4.02, + "learning_rate": 6.668129732102269e-06, + "loss": 0.0378, + "step": 86085 + }, + { + "epoch": 4.02, + "learning_rate": 6.6673459470474826e-06, + "loss": 0.0084, + "step": 86090 + }, + { + "epoch": 4.02, + "learning_rate": 6.666562161992696e-06, + "loss": 0.0831, + "step": 86095 + }, + { + "epoch": 4.02, + "learning_rate": 6.6657783769379095e-06, + "loss": 0.0376, + "step": 86100 + }, + { + "epoch": 4.02, + "learning_rate": 6.664994591883123e-06, + "loss": 0.0516, + "step": 86105 + }, + { + "epoch": 4.02, + "learning_rate": 6.6642108068283365e-06, + "loss": 0.0656, + "step": 86110 + }, + { + "epoch": 4.02, + "learning_rate": 6.6634270217735495e-06, + "loss": 0.1976, + "step": 86115 + }, + { + "epoch": 4.02, + "learning_rate": 6.6626432367187635e-06, + "loss": 0.0929, + "step": 86120 + }, + { + "epoch": 4.02, + "learning_rate": 6.6618594516639765e-06, + "loss": 0.2294, + "step": 86125 + }, + { + "epoch": 4.02, + "learning_rate": 6.661075666609189e-06, + "loss": 0.0865, + "step": 86130 + }, + { + "epoch": 4.02, + "learning_rate": 6.660291881554403e-06, + "loss": 0.0254, + "step": 86135 + }, + { + "epoch": 4.02, + "learning_rate": 6.6595080964996165e-06, + "loss": 0.0696, + "step": 86140 + }, + { + "epoch": 4.02, + "learning_rate": 6.65872431144483e-06, + "loss": 0.03, + "step": 86145 + }, + { + "epoch": 4.02, + "learning_rate": 6.6579405263900435e-06, + "loss": 0.0553, + "step": 86150 + }, + { + "epoch": 4.02, + "learning_rate": 6.6571567413352566e-06, + "loss": 0.052, + "step": 86155 + }, + { + "epoch": 4.02, + "learning_rate": 6.6563729562804705e-06, + "loss": 0.0559, + "step": 86160 + }, + { + "epoch": 4.02, + "learning_rate": 6.6555891712256835e-06, + "loss": 0.218, + "step": 86165 + }, + { + "epoch": 4.02, + "learning_rate": 6.6548053861708974e-06, + "loss": 0.1176, + "step": 86170 + }, + { + "epoch": 4.02, + "learning_rate": 6.6540216011161105e-06, + "loss": 0.2272, + "step": 86175 + }, + { + "epoch": 4.02, + "learning_rate": 6.653237816061324e-06, + "loss": 0.0639, + "step": 86180 + }, + { + "epoch": 4.02, + "learning_rate": 6.6524540310065375e-06, + "loss": 0.0154, + "step": 86185 + }, + { + "epoch": 4.02, + "learning_rate": 6.651670245951751e-06, + "loss": 0.0691, + "step": 86190 + }, + { + "epoch": 4.02, + "learning_rate": 6.6508864608969636e-06, + "loss": 0.0433, + "step": 86195 + }, + { + "epoch": 4.02, + "learning_rate": 6.6501026758421775e-06, + "loss": 0.0739, + "step": 86200 + }, + { + "epoch": 4.02, + "learning_rate": 6.6493188907873905e-06, + "loss": 0.1184, + "step": 86205 + }, + { + "epoch": 4.02, + "learning_rate": 6.6485351057326044e-06, + "loss": 0.0466, + "step": 86210 + }, + { + "epoch": 4.02, + "learning_rate": 6.6477513206778175e-06, + "loss": 0.1509, + "step": 86215 + }, + { + "epoch": 4.02, + "learning_rate": 6.646967535623031e-06, + "loss": 0.1283, + "step": 86220 + }, + { + "epoch": 4.02, + "learning_rate": 6.6461837505682445e-06, + "loss": 0.3265, + "step": 86225 + }, + { + "epoch": 4.02, + "learning_rate": 6.645399965513458e-06, + "loss": 0.1273, + "step": 86230 + }, + { + "epoch": 4.02, + "learning_rate": 6.6446161804586714e-06, + "loss": 0.098, + "step": 86235 + }, + { + "epoch": 4.02, + "learning_rate": 6.643832395403885e-06, + "loss": 0.0169, + "step": 86240 + }, + { + "epoch": 4.02, + "learning_rate": 6.643048610349098e-06, + "loss": 0.0439, + "step": 86245 + }, + { + "epoch": 4.02, + "learning_rate": 6.642264825294312e-06, + "loss": 0.1139, + "step": 86250 + }, + { + "epoch": 4.02, + "learning_rate": 6.641481040239525e-06, + "loss": 0.057, + "step": 86255 + }, + { + "epoch": 4.03, + "learning_rate": 6.640697255184738e-06, + "loss": 0.1377, + "step": 86260 + }, + { + "epoch": 4.03, + "learning_rate": 6.6399134701299515e-06, + "loss": 0.1528, + "step": 86265 + }, + { + "epoch": 4.03, + "learning_rate": 6.639129685075165e-06, + "loss": 0.1555, + "step": 86270 + }, + { + "epoch": 4.03, + "learning_rate": 6.6383459000203784e-06, + "loss": 0.2474, + "step": 86275 + }, + { + "epoch": 4.03, + "learning_rate": 6.637562114965592e-06, + "loss": 0.0938, + "step": 86280 + }, + { + "epoch": 4.03, + "learning_rate": 6.636778329910805e-06, + "loss": 0.0148, + "step": 86285 + }, + { + "epoch": 4.03, + "learning_rate": 6.635994544856019e-06, + "loss": 0.0507, + "step": 86290 + }, + { + "epoch": 4.03, + "learning_rate": 6.635210759801232e-06, + "loss": 0.0622, + "step": 86295 + }, + { + "epoch": 4.03, + "learning_rate": 6.634426974746446e-06, + "loss": 0.059, + "step": 86300 + }, + { + "epoch": 4.03, + "learning_rate": 6.63364318969166e-06, + "loss": 0.0548, + "step": 86305 + }, + { + "epoch": 4.03, + "learning_rate": 6.632859404636873e-06, + "loss": 0.1452, + "step": 86310 + }, + { + "epoch": 4.03, + "learning_rate": 6.632075619582087e-06, + "loss": 0.1255, + "step": 86315 + }, + { + "epoch": 4.03, + "learning_rate": 6.6312918345273e-06, + "loss": 0.13, + "step": 86320 + }, + { + "epoch": 4.03, + "learning_rate": 6.630508049472512e-06, + "loss": 0.2595, + "step": 86325 + }, + { + "epoch": 4.03, + "learning_rate": 6.629724264417726e-06, + "loss": 0.0796, + "step": 86330 + }, + { + "epoch": 4.03, + "learning_rate": 6.628940479362939e-06, + "loss": 0.0438, + "step": 86335 + }, + { + "epoch": 4.03, + "learning_rate": 6.628156694308153e-06, + "loss": 0.0639, + "step": 86340 + }, + { + "epoch": 4.03, + "learning_rate": 6.627372909253366e-06, + "loss": 0.0474, + "step": 86345 + }, + { + "epoch": 4.03, + "learning_rate": 6.62658912419858e-06, + "loss": 0.0273, + "step": 86350 + }, + { + "epoch": 4.03, + "learning_rate": 6.625805339143794e-06, + "loss": 0.0312, + "step": 86355 + }, + { + "epoch": 4.03, + "learning_rate": 6.625021554089007e-06, + "loss": 0.1291, + "step": 86360 + }, + { + "epoch": 4.03, + "learning_rate": 6.624237769034221e-06, + "loss": 0.1709, + "step": 86365 + }, + { + "epoch": 4.03, + "learning_rate": 6.623453983979434e-06, + "loss": 0.14, + "step": 86370 + }, + { + "epoch": 4.03, + "learning_rate": 6.622670198924648e-06, + "loss": 0.2679, + "step": 86375 + }, + { + "epoch": 4.03, + "learning_rate": 6.621886413869861e-06, + "loss": 0.0834, + "step": 86380 + }, + { + "epoch": 4.03, + "learning_rate": 6.621102628815075e-06, + "loss": 0.0173, + "step": 86385 + }, + { + "epoch": 4.03, + "learning_rate": 6.620318843760287e-06, + "loss": 0.0317, + "step": 86390 + }, + { + "epoch": 4.03, + "learning_rate": 6.6195350587055e-06, + "loss": 0.093, + "step": 86395 + }, + { + "epoch": 4.03, + "learning_rate": 6.618751273650714e-06, + "loss": 0.0668, + "step": 86400 + }, + { + "epoch": 4.03, + "learning_rate": 6.617967488595928e-06, + "loss": 0.1152, + "step": 86405 + }, + { + "epoch": 4.03, + "learning_rate": 6.617183703541141e-06, + "loss": 0.212, + "step": 86410 + }, + { + "epoch": 4.03, + "learning_rate": 6.616399918486355e-06, + "loss": 0.1631, + "step": 86415 + }, + { + "epoch": 4.03, + "learning_rate": 6.615616133431568e-06, + "loss": 0.2263, + "step": 86420 + }, + { + "epoch": 4.03, + "learning_rate": 6.614832348376782e-06, + "loss": 0.1845, + "step": 86425 + }, + { + "epoch": 4.03, + "learning_rate": 6.614048563321995e-06, + "loss": 0.0779, + "step": 86430 + }, + { + "epoch": 4.03, + "learning_rate": 6.613264778267209e-06, + "loss": 0.03, + "step": 86435 + }, + { + "epoch": 4.03, + "learning_rate": 6.612480993212422e-06, + "loss": 0.0334, + "step": 86440 + }, + { + "epoch": 4.03, + "learning_rate": 6.611697208157636e-06, + "loss": 0.0441, + "step": 86445 + }, + { + "epoch": 4.03, + "learning_rate": 6.610913423102849e-06, + "loss": 0.0852, + "step": 86450 + }, + { + "epoch": 4.03, + "learning_rate": 6.610129638048062e-06, + "loss": 0.1316, + "step": 86455 + }, + { + "epoch": 4.03, + "learning_rate": 6.609345852993275e-06, + "loss": 0.0496, + "step": 86460 + }, + { + "epoch": 4.03, + "learning_rate": 6.608562067938489e-06, + "loss": 0.1089, + "step": 86465 + }, + { + "epoch": 4.03, + "learning_rate": 6.607778282883702e-06, + "loss": 0.104, + "step": 86470 + }, + { + "epoch": 4.04, + "learning_rate": 6.606994497828916e-06, + "loss": 0.3546, + "step": 86475 + }, + { + "epoch": 4.04, + "learning_rate": 6.606210712774129e-06, + "loss": 0.0999, + "step": 86480 + }, + { + "epoch": 4.04, + "learning_rate": 6.605426927719343e-06, + "loss": 0.0555, + "step": 86485 + }, + { + "epoch": 4.04, + "learning_rate": 6.604643142664556e-06, + "loss": 0.0279, + "step": 86490 + }, + { + "epoch": 4.04, + "learning_rate": 6.60385935760977e-06, + "loss": 0.0759, + "step": 86495 + }, + { + "epoch": 4.04, + "learning_rate": 6.603075572554983e-06, + "loss": 0.084, + "step": 86500 + }, + { + "epoch": 4.04, + "learning_rate": 6.602291787500197e-06, + "loss": 0.0451, + "step": 86505 + }, + { + "epoch": 4.04, + "learning_rate": 6.60150800244541e-06, + "loss": 0.0754, + "step": 86510 + }, + { + "epoch": 4.04, + "learning_rate": 6.600724217390624e-06, + "loss": 0.0851, + "step": 86515 + }, + { + "epoch": 4.04, + "learning_rate": 6.599940432335836e-06, + "loss": 0.1036, + "step": 86520 + }, + { + "epoch": 4.04, + "learning_rate": 6.59915664728105e-06, + "loss": 0.278, + "step": 86525 + }, + { + "epoch": 4.04, + "learning_rate": 6.598372862226263e-06, + "loss": 0.0583, + "step": 86530 + }, + { + "epoch": 4.04, + "learning_rate": 6.597589077171477e-06, + "loss": 0.0428, + "step": 86535 + }, + { + "epoch": 4.04, + "learning_rate": 6.59680529211669e-06, + "loss": 0.0496, + "step": 86540 + }, + { + "epoch": 4.04, + "learning_rate": 6.596021507061904e-06, + "loss": 0.0367, + "step": 86545 + }, + { + "epoch": 4.04, + "learning_rate": 6.595237722007117e-06, + "loss": 0.0872, + "step": 86550 + }, + { + "epoch": 4.04, + "learning_rate": 6.594453936952331e-06, + "loss": 0.0676, + "step": 86555 + }, + { + "epoch": 4.04, + "learning_rate": 6.593670151897544e-06, + "loss": 0.1954, + "step": 86560 + }, + { + "epoch": 4.04, + "learning_rate": 6.592886366842758e-06, + "loss": 0.0746, + "step": 86565 + }, + { + "epoch": 4.04, + "learning_rate": 6.592102581787972e-06, + "loss": 0.1854, + "step": 86570 + }, + { + "epoch": 4.04, + "learning_rate": 6.591318796733185e-06, + "loss": 0.1785, + "step": 86575 + }, + { + "epoch": 4.04, + "learning_rate": 6.590535011678399e-06, + "loss": 0.0884, + "step": 86580 + }, + { + "epoch": 4.04, + "learning_rate": 6.589751226623611e-06, + "loss": 0.0615, + "step": 86585 + }, + { + "epoch": 4.04, + "learning_rate": 6.588967441568824e-06, + "loss": 0.0387, + "step": 86590 + }, + { + "epoch": 4.04, + "learning_rate": 6.588183656514038e-06, + "loss": 0.0626, + "step": 86595 + }, + { + "epoch": 4.04, + "learning_rate": 6.587399871459251e-06, + "loss": 0.045, + "step": 86600 + }, + { + "epoch": 4.04, + "learning_rate": 6.586616086404465e-06, + "loss": 0.0564, + "step": 86605 + }, + { + "epoch": 4.04, + "learning_rate": 6.585832301349678e-06, + "loss": 0.0815, + "step": 86610 + }, + { + "epoch": 4.04, + "learning_rate": 6.585048516294892e-06, + "loss": 0.1293, + "step": 86615 + }, + { + "epoch": 4.04, + "learning_rate": 6.584264731240106e-06, + "loss": 0.1797, + "step": 86620 + }, + { + "epoch": 4.04, + "learning_rate": 6.583480946185319e-06, + "loss": 0.2436, + "step": 86625 + }, + { + "epoch": 4.04, + "learning_rate": 6.582697161130533e-06, + "loss": 0.1078, + "step": 86630 + }, + { + "epoch": 4.04, + "learning_rate": 6.581913376075746e-06, + "loss": 0.0091, + "step": 86635 + }, + { + "epoch": 4.04, + "learning_rate": 6.58112959102096e-06, + "loss": 0.0221, + "step": 86640 + }, + { + "epoch": 4.04, + "learning_rate": 6.580345805966173e-06, + "loss": 0.07, + "step": 86645 + }, + { + "epoch": 4.04, + "learning_rate": 6.579562020911385e-06, + "loss": 0.0768, + "step": 86650 + }, + { + "epoch": 4.04, + "learning_rate": 6.578778235856599e-06, + "loss": 0.0561, + "step": 86655 + }, + { + "epoch": 4.04, + "learning_rate": 6.577994450801812e-06, + "loss": 0.098, + "step": 86660 + }, + { + "epoch": 4.04, + "learning_rate": 6.577210665747026e-06, + "loss": 0.0594, + "step": 86665 + }, + { + "epoch": 4.04, + "learning_rate": 6.57642688069224e-06, + "loss": 0.2244, + "step": 86670 + }, + { + "epoch": 4.04, + "learning_rate": 6.575643095637453e-06, + "loss": 0.3049, + "step": 86675 + }, + { + "epoch": 4.04, + "learning_rate": 6.574859310582667e-06, + "loss": 0.1069, + "step": 86680 + }, + { + "epoch": 4.04, + "learning_rate": 6.57407552552788e-06, + "loss": 0.0324, + "step": 86685 + }, + { + "epoch": 4.05, + "learning_rate": 6.573291740473094e-06, + "loss": 0.0757, + "step": 86690 + }, + { + "epoch": 4.05, + "learning_rate": 6.572507955418307e-06, + "loss": 0.0384, + "step": 86695 + }, + { + "epoch": 4.05, + "learning_rate": 6.571724170363521e-06, + "loss": 0.0515, + "step": 86700 + }, + { + "epoch": 4.05, + "learning_rate": 6.570940385308734e-06, + "loss": 0.1113, + "step": 86705 + }, + { + "epoch": 4.05, + "learning_rate": 6.570156600253948e-06, + "loss": 0.1069, + "step": 86710 + }, + { + "epoch": 4.05, + "learning_rate": 6.56937281519916e-06, + "loss": 0.1026, + "step": 86715 + }, + { + "epoch": 4.05, + "learning_rate": 6.568589030144374e-06, + "loss": 0.1806, + "step": 86720 + }, + { + "epoch": 4.05, + "learning_rate": 6.567805245089587e-06, + "loss": 0.2835, + "step": 86725 + }, + { + "epoch": 4.05, + "learning_rate": 6.567021460034801e-06, + "loss": 0.1147, + "step": 86730 + }, + { + "epoch": 4.05, + "learning_rate": 6.566237674980014e-06, + "loss": 0.0272, + "step": 86735 + }, + { + "epoch": 4.05, + "learning_rate": 6.565453889925228e-06, + "loss": 0.0532, + "step": 86740 + }, + { + "epoch": 4.05, + "learning_rate": 6.564670104870441e-06, + "loss": 0.0573, + "step": 86745 + }, + { + "epoch": 4.05, + "learning_rate": 6.563886319815655e-06, + "loss": 0.1069, + "step": 86750 + }, + { + "epoch": 4.05, + "learning_rate": 6.563102534760868e-06, + "loss": 0.1186, + "step": 86755 + }, + { + "epoch": 4.05, + "learning_rate": 6.5623187497060816e-06, + "loss": 0.1125, + "step": 86760 + }, + { + "epoch": 4.05, + "learning_rate": 6.561534964651295e-06, + "loss": 0.0842, + "step": 86765 + }, + { + "epoch": 4.05, + "learning_rate": 6.5607511795965085e-06, + "loss": 0.1723, + "step": 86770 + }, + { + "epoch": 4.05, + "learning_rate": 6.559967394541722e-06, + "loss": 0.2628, + "step": 86775 + }, + { + "epoch": 4.05, + "learning_rate": 6.559183609486935e-06, + "loss": 0.0976, + "step": 86780 + }, + { + "epoch": 4.05, + "learning_rate": 6.558399824432148e-06, + "loss": 0.025, + "step": 86785 + }, + { + "epoch": 4.05, + "learning_rate": 6.557616039377362e-06, + "loss": 0.0342, + "step": 86790 + }, + { + "epoch": 4.05, + "learning_rate": 6.556832254322575e-06, + "loss": 0.0495, + "step": 86795 + }, + { + "epoch": 4.05, + "learning_rate": 6.5560484692677886e-06, + "loss": 0.0585, + "step": 86800 + }, + { + "epoch": 4.05, + "learning_rate": 6.555264684213002e-06, + "loss": 0.0427, + "step": 86805 + }, + { + "epoch": 4.05, + "learning_rate": 6.5544808991582155e-06, + "loss": 0.1364, + "step": 86810 + }, + { + "epoch": 4.05, + "learning_rate": 6.553697114103429e-06, + "loss": 0.1192, + "step": 86815 + }, + { + "epoch": 4.05, + "learning_rate": 6.5529133290486425e-06, + "loss": 0.148, + "step": 86820 + }, + { + "epoch": 4.05, + "learning_rate": 6.5521295439938556e-06, + "loss": 0.2054, + "step": 86825 + }, + { + "epoch": 4.05, + "learning_rate": 6.5513457589390695e-06, + "loss": 0.1116, + "step": 86830 + }, + { + "epoch": 4.05, + "learning_rate": 6.5505619738842825e-06, + "loss": 0.0213, + "step": 86835 + }, + { + "epoch": 4.05, + "learning_rate": 6.5497781888294964e-06, + "loss": 0.0252, + "step": 86840 + }, + { + "epoch": 4.05, + "learning_rate": 6.548994403774709e-06, + "loss": 0.0363, + "step": 86845 + }, + { + "epoch": 4.05, + "learning_rate": 6.5482106187199225e-06, + "loss": 0.055, + "step": 86850 + }, + { + "epoch": 4.05, + "learning_rate": 6.547426833665136e-06, + "loss": 0.063, + "step": 86855 + }, + { + "epoch": 4.05, + "learning_rate": 6.5466430486103495e-06, + "loss": 0.0967, + "step": 86860 + }, + { + "epoch": 4.05, + "learning_rate": 6.5458592635555626e-06, + "loss": 0.091, + "step": 86865 + }, + { + "epoch": 4.05, + "learning_rate": 6.5450754785007765e-06, + "loss": 0.1131, + "step": 86870 + }, + { + "epoch": 4.05, + "learning_rate": 6.5442916934459895e-06, + "loss": 0.3417, + "step": 86875 + }, + { + "epoch": 4.05, + "learning_rate": 6.5435079083912034e-06, + "loss": 0.0783, + "step": 86880 + }, + { + "epoch": 4.05, + "learning_rate": 6.542724123336417e-06, + "loss": 0.027, + "step": 86885 + }, + { + "epoch": 4.05, + "learning_rate": 6.54194033828163e-06, + "loss": 0.0212, + "step": 86890 + }, + { + "epoch": 4.05, + "learning_rate": 6.541156553226844e-06, + "loss": 0.0443, + "step": 86895 + }, + { + "epoch": 4.05, + "learning_rate": 6.540372768172057e-06, + "loss": 0.0521, + "step": 86900 + }, + { + "epoch": 4.06, + "learning_rate": 6.539588983117271e-06, + "loss": 0.0464, + "step": 86905 + }, + { + "epoch": 4.06, + "learning_rate": 6.5388051980624835e-06, + "loss": 0.2171, + "step": 86910 + }, + { + "epoch": 4.06, + "learning_rate": 6.5380214130076965e-06, + "loss": 0.1469, + "step": 86915 + }, + { + "epoch": 4.06, + "learning_rate": 6.5372376279529105e-06, + "loss": 0.1103, + "step": 86920 + }, + { + "epoch": 4.06, + "learning_rate": 6.5364538428981235e-06, + "loss": 0.2553, + "step": 86925 + }, + { + "epoch": 4.06, + "learning_rate": 6.535670057843337e-06, + "loss": 0.0803, + "step": 86930 + }, + { + "epoch": 4.06, + "learning_rate": 6.534886272788551e-06, + "loss": 0.0526, + "step": 86935 + }, + { + "epoch": 4.06, + "learning_rate": 6.534102487733764e-06, + "loss": 0.0252, + "step": 86940 + }, + { + "epoch": 4.06, + "learning_rate": 6.533318702678978e-06, + "loss": 0.0656, + "step": 86945 + }, + { + "epoch": 4.06, + "learning_rate": 6.532534917624191e-06, + "loss": 0.0772, + "step": 86950 + }, + { + "epoch": 4.06, + "learning_rate": 6.531751132569405e-06, + "loss": 0.0528, + "step": 86955 + }, + { + "epoch": 4.06, + "learning_rate": 6.530967347514618e-06, + "loss": 0.1471, + "step": 86960 + }, + { + "epoch": 4.06, + "learning_rate": 6.530183562459832e-06, + "loss": 0.0885, + "step": 86965 + }, + { + "epoch": 4.06, + "learning_rate": 6.529399777405045e-06, + "loss": 0.2131, + "step": 86970 + }, + { + "epoch": 4.06, + "learning_rate": 6.5286159923502575e-06, + "loss": 0.235, + "step": 86975 + }, + { + "epoch": 4.06, + "learning_rate": 6.527832207295471e-06, + "loss": 0.0936, + "step": 86980 + }, + { + "epoch": 4.06, + "learning_rate": 6.527048422240685e-06, + "loss": 0.0433, + "step": 86985 + }, + { + "epoch": 4.06, + "learning_rate": 6.526264637185898e-06, + "loss": 0.0312, + "step": 86990 + }, + { + "epoch": 4.06, + "learning_rate": 6.525480852131112e-06, + "loss": 0.0286, + "step": 86995 + }, + { + "epoch": 4.06, + "learning_rate": 6.524697067076325e-06, + "loss": 0.1035, + "step": 87000 + }, + { + "epoch": 4.06, + "learning_rate": 6.523913282021539e-06, + "loss": 0.1216, + "step": 87005 + }, + { + "epoch": 4.06, + "learning_rate": 6.523129496966752e-06, + "loss": 0.1191, + "step": 87010 + }, + { + "epoch": 4.06, + "learning_rate": 6.522345711911966e-06, + "loss": 0.1446, + "step": 87015 + }, + { + "epoch": 4.06, + "learning_rate": 6.521561926857179e-06, + "loss": 0.1196, + "step": 87020 + }, + { + "epoch": 4.06, + "learning_rate": 6.520778141802393e-06, + "loss": 0.1584, + "step": 87025 + }, + { + "epoch": 4.06, + "learning_rate": 6.519994356747606e-06, + "loss": 0.0627, + "step": 87030 + }, + { + "epoch": 4.06, + "learning_rate": 6.51921057169282e-06, + "loss": 0.0391, + "step": 87035 + }, + { + "epoch": 4.06, + "learning_rate": 6.518426786638032e-06, + "loss": 0.0231, + "step": 87040 + }, + { + "epoch": 4.06, + "learning_rate": 6.517643001583246e-06, + "loss": 0.0221, + "step": 87045 + }, + { + "epoch": 4.06, + "learning_rate": 6.516859216528459e-06, + "loss": 0.0765, + "step": 87050 + }, + { + "epoch": 4.06, + "learning_rate": 6.516075431473673e-06, + "loss": 0.0949, + "step": 87055 + }, + { + "epoch": 4.06, + "learning_rate": 6.515291646418886e-06, + "loss": 0.073, + "step": 87060 + }, + { + "epoch": 4.06, + "learning_rate": 6.5145078613641e-06, + "loss": 0.0697, + "step": 87065 + }, + { + "epoch": 4.06, + "learning_rate": 6.513724076309313e-06, + "loss": 0.181, + "step": 87070 + }, + { + "epoch": 4.06, + "learning_rate": 6.512940291254527e-06, + "loss": 0.4061, + "step": 87075 + }, + { + "epoch": 4.06, + "learning_rate": 6.51215650619974e-06, + "loss": 0.0987, + "step": 87080 + }, + { + "epoch": 4.06, + "learning_rate": 6.511372721144954e-06, + "loss": 0.0311, + "step": 87085 + }, + { + "epoch": 4.06, + "learning_rate": 6.510588936090167e-06, + "loss": 0.0548, + "step": 87090 + }, + { + "epoch": 4.06, + "learning_rate": 6.509805151035381e-06, + "loss": 0.0441, + "step": 87095 + }, + { + "epoch": 4.06, + "learning_rate": 6.509021365980594e-06, + "loss": 0.0354, + "step": 87100 + }, + { + "epoch": 4.06, + "learning_rate": 6.508237580925807e-06, + "loss": 0.056, + "step": 87105 + }, + { + "epoch": 4.06, + "learning_rate": 6.50745379587102e-06, + "loss": 0.1044, + "step": 87110 + }, + { + "epoch": 4.06, + "learning_rate": 6.506670010816234e-06, + "loss": 0.1385, + "step": 87115 + }, + { + "epoch": 4.07, + "learning_rate": 6.505886225761447e-06, + "loss": 0.1795, + "step": 87120 + }, + { + "epoch": 4.07, + "learning_rate": 6.505102440706661e-06, + "loss": 0.1908, + "step": 87125 + }, + { + "epoch": 4.07, + "learning_rate": 6.504318655651874e-06, + "loss": 0.0907, + "step": 87130 + }, + { + "epoch": 4.07, + "learning_rate": 6.503534870597088e-06, + "loss": 0.0217, + "step": 87135 + }, + { + "epoch": 4.07, + "learning_rate": 6.502751085542301e-06, + "loss": 0.0208, + "step": 87140 + }, + { + "epoch": 4.07, + "learning_rate": 6.501967300487515e-06, + "loss": 0.0597, + "step": 87145 + }, + { + "epoch": 4.07, + "learning_rate": 6.501183515432729e-06, + "loss": 0.0216, + "step": 87150 + }, + { + "epoch": 4.07, + "learning_rate": 6.500399730377942e-06, + "loss": 0.0424, + "step": 87155 + }, + { + "epoch": 4.07, + "learning_rate": 6.499615945323156e-06, + "loss": 0.1451, + "step": 87160 + }, + { + "epoch": 4.07, + "learning_rate": 6.498832160268369e-06, + "loss": 0.1819, + "step": 87165 + }, + { + "epoch": 4.07, + "learning_rate": 6.498048375213581e-06, + "loss": 0.1446, + "step": 87170 + }, + { + "epoch": 4.07, + "learning_rate": 6.497264590158795e-06, + "loss": 0.3621, + "step": 87175 + }, + { + "epoch": 4.07, + "learning_rate": 6.496480805104008e-06, + "loss": 0.0368, + "step": 87180 + }, + { + "epoch": 4.07, + "learning_rate": 6.495697020049222e-06, + "loss": 0.0267, + "step": 87185 + }, + { + "epoch": 4.07, + "learning_rate": 6.494913234994435e-06, + "loss": 0.0279, + "step": 87190 + }, + { + "epoch": 4.07, + "learning_rate": 6.494129449939649e-06, + "loss": 0.0602, + "step": 87195 + }, + { + "epoch": 4.07, + "learning_rate": 6.493345664884863e-06, + "loss": 0.0937, + "step": 87200 + }, + { + "epoch": 4.07, + "learning_rate": 6.492561879830076e-06, + "loss": 0.0745, + "step": 87205 + }, + { + "epoch": 4.07, + "learning_rate": 6.49177809477529e-06, + "loss": 0.0735, + "step": 87210 + }, + { + "epoch": 4.07, + "learning_rate": 6.490994309720503e-06, + "loss": 0.1595, + "step": 87215 + }, + { + "epoch": 4.07, + "learning_rate": 6.490210524665717e-06, + "loss": 0.2627, + "step": 87220 + }, + { + "epoch": 4.07, + "learning_rate": 6.48942673961093e-06, + "loss": 0.2398, + "step": 87225 + }, + { + "epoch": 4.07, + "learning_rate": 6.488642954556144e-06, + "loss": 0.0896, + "step": 87230 + }, + { + "epoch": 4.07, + "learning_rate": 6.487859169501356e-06, + "loss": 0.0176, + "step": 87235 + }, + { + "epoch": 4.07, + "learning_rate": 6.487075384446569e-06, + "loss": 0.0297, + "step": 87240 + }, + { + "epoch": 4.07, + "learning_rate": 6.486291599391783e-06, + "loss": 0.0487, + "step": 87245 + }, + { + "epoch": 4.07, + "learning_rate": 6.485507814336997e-06, + "loss": 0.045, + "step": 87250 + }, + { + "epoch": 4.07, + "learning_rate": 6.48472402928221e-06, + "loss": 0.0454, + "step": 87255 + }, + { + "epoch": 4.07, + "learning_rate": 6.483940244227424e-06, + "loss": 0.1093, + "step": 87260 + }, + { + "epoch": 4.07, + "learning_rate": 6.483156459172637e-06, + "loss": 0.1986, + "step": 87265 + }, + { + "epoch": 4.07, + "learning_rate": 6.482372674117851e-06, + "loss": 0.1638, + "step": 87270 + }, + { + "epoch": 4.07, + "learning_rate": 6.481588889063064e-06, + "loss": 0.2735, + "step": 87275 + }, + { + "epoch": 4.07, + "learning_rate": 6.480805104008278e-06, + "loss": 0.0861, + "step": 87280 + }, + { + "epoch": 4.07, + "learning_rate": 6.480021318953491e-06, + "loss": 0.0302, + "step": 87285 + }, + { + "epoch": 4.07, + "learning_rate": 6.479237533898705e-06, + "loss": 0.0729, + "step": 87290 + }, + { + "epoch": 4.07, + "learning_rate": 6.478453748843918e-06, + "loss": 0.0527, + "step": 87295 + }, + { + "epoch": 4.07, + "learning_rate": 6.477669963789131e-06, + "loss": 0.058, + "step": 87300 + }, + { + "epoch": 4.07, + "learning_rate": 6.476886178734344e-06, + "loss": 0.0528, + "step": 87305 + }, + { + "epoch": 4.07, + "learning_rate": 6.476102393679558e-06, + "loss": 0.0929, + "step": 87310 + }, + { + "epoch": 4.07, + "learning_rate": 6.475318608624771e-06, + "loss": 0.0393, + "step": 87315 + }, + { + "epoch": 4.07, + "learning_rate": 6.474534823569985e-06, + "loss": 0.1798, + "step": 87320 + }, + { + "epoch": 4.07, + "learning_rate": 6.473751038515198e-06, + "loss": 0.2319, + "step": 87325 + }, + { + "epoch": 4.07, + "learning_rate": 6.472967253460412e-06, + "loss": 0.0834, + "step": 87330 + }, + { + "epoch": 4.08, + "learning_rate": 6.472183468405625e-06, + "loss": 0.0408, + "step": 87335 + }, + { + "epoch": 4.08, + "learning_rate": 6.471399683350839e-06, + "loss": 0.0341, + "step": 87340 + }, + { + "epoch": 4.08, + "learning_rate": 6.470615898296052e-06, + "loss": 0.0598, + "step": 87345 + }, + { + "epoch": 4.08, + "learning_rate": 6.469832113241266e-06, + "loss": 0.133, + "step": 87350 + }, + { + "epoch": 4.08, + "learning_rate": 6.469048328186479e-06, + "loss": 0.0457, + "step": 87355 + }, + { + "epoch": 4.08, + "learning_rate": 6.468264543131693e-06, + "loss": 0.1221, + "step": 87360 + }, + { + "epoch": 4.08, + "learning_rate": 6.467480758076905e-06, + "loss": 0.1247, + "step": 87365 + }, + { + "epoch": 4.08, + "learning_rate": 6.466696973022119e-06, + "loss": 0.1616, + "step": 87370 + }, + { + "epoch": 4.08, + "learning_rate": 6.465913187967332e-06, + "loss": 0.3098, + "step": 87375 + }, + { + "epoch": 4.08, + "learning_rate": 6.465129402912546e-06, + "loss": 0.1429, + "step": 87380 + }, + { + "epoch": 4.08, + "learning_rate": 6.464345617857759e-06, + "loss": 0.0121, + "step": 87385 + }, + { + "epoch": 4.08, + "learning_rate": 6.463561832802973e-06, + "loss": 0.0204, + "step": 87390 + }, + { + "epoch": 4.08, + "learning_rate": 6.462778047748186e-06, + "loss": 0.0238, + "step": 87395 + }, + { + "epoch": 4.08, + "learning_rate": 6.4619942626934e-06, + "loss": 0.0535, + "step": 87400 + }, + { + "epoch": 4.08, + "learning_rate": 6.461210477638613e-06, + "loss": 0.1136, + "step": 87405 + }, + { + "epoch": 4.08, + "learning_rate": 6.460426692583827e-06, + "loss": 0.0846, + "step": 87410 + }, + { + "epoch": 4.08, + "learning_rate": 6.45964290752904e-06, + "loss": 0.0988, + "step": 87415 + }, + { + "epoch": 4.08, + "learning_rate": 6.458859122474254e-06, + "loss": 0.2105, + "step": 87420 + }, + { + "epoch": 4.08, + "learning_rate": 6.4580753374194675e-06, + "loss": 0.3813, + "step": 87425 + }, + { + "epoch": 4.08, + "learning_rate": 6.45729155236468e-06, + "loss": 0.0747, + "step": 87430 + }, + { + "epoch": 4.08, + "learning_rate": 6.456507767309893e-06, + "loss": 0.0165, + "step": 87435 + }, + { + "epoch": 4.08, + "learning_rate": 6.455723982255107e-06, + "loss": 0.0097, + "step": 87440 + }, + { + "epoch": 4.08, + "learning_rate": 6.45494019720032e-06, + "loss": 0.0734, + "step": 87445 + }, + { + "epoch": 4.08, + "learning_rate": 6.454156412145534e-06, + "loss": 0.0889, + "step": 87450 + }, + { + "epoch": 4.08, + "learning_rate": 6.453372627090747e-06, + "loss": 0.0418, + "step": 87455 + }, + { + "epoch": 4.08, + "learning_rate": 6.452588842035961e-06, + "loss": 0.0242, + "step": 87460 + }, + { + "epoch": 4.08, + "learning_rate": 6.4518050569811745e-06, + "loss": 0.1215, + "step": 87465 + }, + { + "epoch": 4.08, + "learning_rate": 6.4510212719263876e-06, + "loss": 0.1715, + "step": 87470 + }, + { + "epoch": 4.08, + "learning_rate": 6.4502374868716015e-06, + "loss": 0.2123, + "step": 87475 + }, + { + "epoch": 4.08, + "learning_rate": 6.4494537018168145e-06, + "loss": 0.0803, + "step": 87480 + }, + { + "epoch": 4.08, + "learning_rate": 6.4486699167620284e-06, + "loss": 0.0692, + "step": 87485 + }, + { + "epoch": 4.08, + "learning_rate": 6.4478861317072415e-06, + "loss": 0.0068, + "step": 87490 + }, + { + "epoch": 4.08, + "learning_rate": 6.447102346652454e-06, + "loss": 0.039, + "step": 87495 + }, + { + "epoch": 4.08, + "learning_rate": 6.446318561597668e-06, + "loss": 0.0553, + "step": 87500 + }, + { + "epoch": 4.08, + "learning_rate": 6.445534776542881e-06, + "loss": 0.1083, + "step": 87505 + }, + { + "epoch": 4.08, + "learning_rate": 6.444750991488095e-06, + "loss": 0.1061, + "step": 87510 + }, + { + "epoch": 4.08, + "learning_rate": 6.4439672064333085e-06, + "loss": 0.0711, + "step": 87515 + }, + { + "epoch": 4.08, + "learning_rate": 6.4431834213785215e-06, + "loss": 0.118, + "step": 87520 + }, + { + "epoch": 4.08, + "learning_rate": 6.4423996363237355e-06, + "loss": 0.1431, + "step": 87525 + }, + { + "epoch": 4.08, + "learning_rate": 6.4416158512689485e-06, + "loss": 0.1181, + "step": 87530 + }, + { + "epoch": 4.08, + "learning_rate": 6.440832066214162e-06, + "loss": 0.013, + "step": 87535 + }, + { + "epoch": 4.08, + "learning_rate": 6.4400482811593755e-06, + "loss": 0.0418, + "step": 87540 + }, + { + "epoch": 4.08, + "learning_rate": 6.439264496104589e-06, + "loss": 0.0703, + "step": 87545 + }, + { + "epoch": 4.09, + "learning_rate": 6.4384807110498024e-06, + "loss": 0.088, + "step": 87550 + }, + { + "epoch": 4.09, + "learning_rate": 6.437696925995016e-06, + "loss": 0.0454, + "step": 87555 + }, + { + "epoch": 4.09, + "learning_rate": 6.4369131409402286e-06, + "loss": 0.0776, + "step": 87560 + }, + { + "epoch": 4.09, + "learning_rate": 6.4361293558854425e-06, + "loss": 0.076, + "step": 87565 + }, + { + "epoch": 4.09, + "learning_rate": 6.4353455708306555e-06, + "loss": 0.1337, + "step": 87570 + }, + { + "epoch": 4.09, + "learning_rate": 6.4345617857758694e-06, + "loss": 0.1474, + "step": 87575 + }, + { + "epoch": 4.09, + "learning_rate": 6.4337780007210825e-06, + "loss": 0.0905, + "step": 87580 + }, + { + "epoch": 4.09, + "learning_rate": 6.432994215666296e-06, + "loss": 0.0118, + "step": 87585 + }, + { + "epoch": 4.09, + "learning_rate": 6.4322104306115094e-06, + "loss": 0.1066, + "step": 87590 + }, + { + "epoch": 4.09, + "learning_rate": 6.431426645556723e-06, + "loss": 0.0728, + "step": 87595 + }, + { + "epoch": 4.09, + "learning_rate": 6.430642860501936e-06, + "loss": 0.0312, + "step": 87600 + }, + { + "epoch": 4.09, + "learning_rate": 6.42985907544715e-06, + "loss": 0.0588, + "step": 87605 + }, + { + "epoch": 4.09, + "learning_rate": 6.429075290392363e-06, + "loss": 0.1211, + "step": 87610 + }, + { + "epoch": 4.09, + "learning_rate": 6.428291505337577e-06, + "loss": 0.1271, + "step": 87615 + }, + { + "epoch": 4.09, + "learning_rate": 6.42750772028279e-06, + "loss": 0.1344, + "step": 87620 + }, + { + "epoch": 4.09, + "learning_rate": 6.426723935228003e-06, + "loss": 0.3591, + "step": 87625 + }, + { + "epoch": 4.09, + "learning_rate": 6.4259401501732165e-06, + "loss": 0.0625, + "step": 87630 + }, + { + "epoch": 4.09, + "learning_rate": 6.42515636511843e-06, + "loss": 0.0176, + "step": 87635 + }, + { + "epoch": 4.09, + "learning_rate": 6.4243725800636434e-06, + "loss": 0.0641, + "step": 87640 + }, + { + "epoch": 4.09, + "learning_rate": 6.423588795008857e-06, + "loss": 0.0652, + "step": 87645 + }, + { + "epoch": 4.09, + "learning_rate": 6.42280500995407e-06, + "loss": 0.0472, + "step": 87650 + }, + { + "epoch": 4.09, + "learning_rate": 6.422021224899284e-06, + "loss": 0.1405, + "step": 87655 + }, + { + "epoch": 4.09, + "learning_rate": 6.421237439844497e-06, + "loss": 0.1497, + "step": 87660 + }, + { + "epoch": 4.09, + "learning_rate": 6.420453654789711e-06, + "loss": 0.0665, + "step": 87665 + }, + { + "epoch": 4.09, + "learning_rate": 6.419669869734924e-06, + "loss": 0.1844, + "step": 87670 + }, + { + "epoch": 4.09, + "learning_rate": 6.418886084680138e-06, + "loss": 0.2518, + "step": 87675 + }, + { + "epoch": 4.09, + "learning_rate": 6.418102299625351e-06, + "loss": 0.1124, + "step": 87680 + }, + { + "epoch": 4.09, + "learning_rate": 6.417318514570565e-06, + "loss": 0.0288, + "step": 87685 + }, + { + "epoch": 4.09, + "learning_rate": 6.416534729515777e-06, + "loss": 0.0627, + "step": 87690 + }, + { + "epoch": 4.09, + "learning_rate": 6.415750944460991e-06, + "loss": 0.049, + "step": 87695 + }, + { + "epoch": 4.09, + "learning_rate": 6.414967159406204e-06, + "loss": 0.0789, + "step": 87700 + }, + { + "epoch": 4.09, + "learning_rate": 6.414183374351418e-06, + "loss": 0.1132, + "step": 87705 + }, + { + "epoch": 4.09, + "learning_rate": 6.413399589296631e-06, + "loss": 0.081, + "step": 87710 + }, + { + "epoch": 4.09, + "learning_rate": 6.412615804241845e-06, + "loss": 0.1379, + "step": 87715 + }, + { + "epoch": 4.09, + "learning_rate": 6.411832019187058e-06, + "loss": 0.1553, + "step": 87720 + }, + { + "epoch": 4.09, + "learning_rate": 6.411048234132272e-06, + "loss": 0.1935, + "step": 87725 + }, + { + "epoch": 4.09, + "learning_rate": 6.410264449077486e-06, + "loss": 0.0936, + "step": 87730 + }, + { + "epoch": 4.09, + "learning_rate": 6.409480664022699e-06, + "loss": 0.0153, + "step": 87735 + }, + { + "epoch": 4.09, + "learning_rate": 6.408696878967913e-06, + "loss": 0.0296, + "step": 87740 + }, + { + "epoch": 4.09, + "learning_rate": 6.407913093913126e-06, + "loss": 0.0789, + "step": 87745 + }, + { + "epoch": 4.09, + "learning_rate": 6.40712930885834e-06, + "loss": 0.0188, + "step": 87750 + }, + { + "epoch": 4.09, + "learning_rate": 6.406345523803552e-06, + "loss": 0.1239, + "step": 87755 + }, + { + "epoch": 4.1, + "learning_rate": 6.405561738748765e-06, + "loss": 0.098, + "step": 87760 + }, + { + "epoch": 4.1, + "learning_rate": 6.404777953693979e-06, + "loss": 0.118, + "step": 87765 + }, + { + "epoch": 4.1, + "learning_rate": 6.403994168639192e-06, + "loss": 0.1554, + "step": 87770 + }, + { + "epoch": 4.1, + "learning_rate": 6.403210383584406e-06, + "loss": 0.1592, + "step": 87775 + }, + { + "epoch": 4.1, + "learning_rate": 6.40242659852962e-06, + "loss": 0.0485, + "step": 87780 + }, + { + "epoch": 4.1, + "learning_rate": 6.401642813474833e-06, + "loss": 0.0476, + "step": 87785 + }, + { + "epoch": 4.1, + "learning_rate": 6.400859028420047e-06, + "loss": 0.0409, + "step": 87790 + }, + { + "epoch": 4.1, + "learning_rate": 6.40007524336526e-06, + "loss": 0.0595, + "step": 87795 + }, + { + "epoch": 4.1, + "learning_rate": 6.399291458310474e-06, + "loss": 0.0683, + "step": 87800 + }, + { + "epoch": 4.1, + "learning_rate": 6.398507673255687e-06, + "loss": 0.1037, + "step": 87805 + }, + { + "epoch": 4.1, + "learning_rate": 6.397723888200901e-06, + "loss": 0.0907, + "step": 87810 + }, + { + "epoch": 4.1, + "learning_rate": 6.396940103146114e-06, + "loss": 0.0964, + "step": 87815 + }, + { + "epoch": 4.1, + "learning_rate": 6.396156318091326e-06, + "loss": 0.2069, + "step": 87820 + }, + { + "epoch": 4.1, + "learning_rate": 6.39537253303654e-06, + "loss": 0.2369, + "step": 87825 + }, + { + "epoch": 4.1, + "learning_rate": 6.394588747981754e-06, + "loss": 0.074, + "step": 87830 + }, + { + "epoch": 4.1, + "learning_rate": 6.393804962926967e-06, + "loss": 0.0256, + "step": 87835 + }, + { + "epoch": 4.1, + "learning_rate": 6.393021177872181e-06, + "loss": 0.0736, + "step": 87840 + }, + { + "epoch": 4.1, + "learning_rate": 6.392237392817394e-06, + "loss": 0.0449, + "step": 87845 + }, + { + "epoch": 4.1, + "learning_rate": 6.391453607762608e-06, + "loss": 0.0345, + "step": 87850 + }, + { + "epoch": 4.1, + "learning_rate": 6.390669822707821e-06, + "loss": 0.0579, + "step": 87855 + }, + { + "epoch": 4.1, + "learning_rate": 6.389886037653035e-06, + "loss": 0.0877, + "step": 87860 + }, + { + "epoch": 4.1, + "learning_rate": 6.389102252598248e-06, + "loss": 0.1333, + "step": 87865 + }, + { + "epoch": 4.1, + "learning_rate": 6.388318467543462e-06, + "loss": 0.1365, + "step": 87870 + }, + { + "epoch": 4.1, + "learning_rate": 6.387534682488675e-06, + "loss": 0.5326, + "step": 87875 + }, + { + "epoch": 4.1, + "learning_rate": 6.386750897433889e-06, + "loss": 0.0746, + "step": 87880 + }, + { + "epoch": 4.1, + "learning_rate": 6.385967112379101e-06, + "loss": 0.0096, + "step": 87885 + }, + { + "epoch": 4.1, + "learning_rate": 6.385183327324315e-06, + "loss": 0.0508, + "step": 87890 + }, + { + "epoch": 4.1, + "learning_rate": 6.384399542269528e-06, + "loss": 0.0339, + "step": 87895 + }, + { + "epoch": 4.1, + "learning_rate": 6.383615757214742e-06, + "loss": 0.0654, + "step": 87900 + }, + { + "epoch": 4.1, + "learning_rate": 6.382831972159955e-06, + "loss": 0.0918, + "step": 87905 + }, + { + "epoch": 4.1, + "learning_rate": 6.382048187105169e-06, + "loss": 0.0296, + "step": 87910 + }, + { + "epoch": 4.1, + "learning_rate": 6.381264402050382e-06, + "loss": 0.1635, + "step": 87915 + }, + { + "epoch": 4.1, + "learning_rate": 6.380480616995596e-06, + "loss": 0.2078, + "step": 87920 + }, + { + "epoch": 4.1, + "learning_rate": 6.379696831940809e-06, + "loss": 0.444, + "step": 87925 + }, + { + "epoch": 4.1, + "learning_rate": 6.378913046886023e-06, + "loss": 0.1321, + "step": 87930 + }, + { + "epoch": 4.1, + "learning_rate": 6.378129261831236e-06, + "loss": 0.0388, + "step": 87935 + }, + { + "epoch": 4.1, + "learning_rate": 6.37734547677645e-06, + "loss": 0.025, + "step": 87940 + }, + { + "epoch": 4.1, + "learning_rate": 6.376561691721663e-06, + "loss": 0.0972, + "step": 87945 + }, + { + "epoch": 4.1, + "learning_rate": 6.375777906666876e-06, + "loss": 0.0553, + "step": 87950 + }, + { + "epoch": 4.1, + "learning_rate": 6.374994121612089e-06, + "loss": 0.0721, + "step": 87955 + }, + { + "epoch": 4.1, + "learning_rate": 6.374210336557303e-06, + "loss": 0.0768, + "step": 87960 + }, + { + "epoch": 4.1, + "learning_rate": 6.373426551502516e-06, + "loss": 0.1282, + "step": 87965 + }, + { + "epoch": 4.1, + "learning_rate": 6.37264276644773e-06, + "loss": 0.2805, + "step": 87970 + }, + { + "epoch": 4.11, + "learning_rate": 6.371858981392943e-06, + "loss": 0.3247, + "step": 87975 + }, + { + "epoch": 4.11, + "learning_rate": 6.371075196338157e-06, + "loss": 0.0896, + "step": 87980 + }, + { + "epoch": 4.11, + "learning_rate": 6.37029141128337e-06, + "loss": 0.0147, + "step": 87985 + }, + { + "epoch": 4.11, + "learning_rate": 6.369507626228584e-06, + "loss": 0.0398, + "step": 87990 + }, + { + "epoch": 4.11, + "learning_rate": 6.368723841173797e-06, + "loss": 0.0385, + "step": 87995 + }, + { + "epoch": 4.11, + "learning_rate": 6.367940056119011e-06, + "loss": 0.0444, + "step": 88000 + }, + { + "epoch": 4.11, + "learning_rate": 6.367156271064225e-06, + "loss": 0.0426, + "step": 88005 + }, + { + "epoch": 4.11, + "learning_rate": 6.366372486009438e-06, + "loss": 0.0861, + "step": 88010 + }, + { + "epoch": 4.11, + "learning_rate": 6.36558870095465e-06, + "loss": 0.1315, + "step": 88015 + }, + { + "epoch": 4.11, + "learning_rate": 6.364804915899864e-06, + "loss": 0.1505, + "step": 88020 + }, + { + "epoch": 4.11, + "learning_rate": 6.364021130845077e-06, + "loss": 0.2346, + "step": 88025 + }, + { + "epoch": 4.11, + "learning_rate": 6.363237345790291e-06, + "loss": 0.0561, + "step": 88030 + }, + { + "epoch": 4.11, + "learning_rate": 6.362453560735504e-06, + "loss": 0.0582, + "step": 88035 + }, + { + "epoch": 4.11, + "learning_rate": 6.361669775680718e-06, + "loss": 0.0745, + "step": 88040 + }, + { + "epoch": 4.11, + "learning_rate": 6.360885990625932e-06, + "loss": 0.0559, + "step": 88045 + }, + { + "epoch": 4.11, + "learning_rate": 6.360102205571145e-06, + "loss": 0.0377, + "step": 88050 + }, + { + "epoch": 4.11, + "learning_rate": 6.359318420516359e-06, + "loss": 0.0607, + "step": 88055 + }, + { + "epoch": 4.11, + "learning_rate": 6.358534635461572e-06, + "loss": 0.1128, + "step": 88060 + }, + { + "epoch": 4.11, + "learning_rate": 6.357750850406786e-06, + "loss": 0.1015, + "step": 88065 + }, + { + "epoch": 4.11, + "learning_rate": 6.356967065351999e-06, + "loss": 0.1739, + "step": 88070 + }, + { + "epoch": 4.11, + "learning_rate": 6.3561832802972126e-06, + "loss": 0.1732, + "step": 88075 + }, + { + "epoch": 4.11, + "learning_rate": 6.355399495242425e-06, + "loss": 0.0648, + "step": 88080 + }, + { + "epoch": 4.11, + "learning_rate": 6.354615710187638e-06, + "loss": 0.0636, + "step": 88085 + }, + { + "epoch": 4.11, + "learning_rate": 6.353831925132852e-06, + "loss": 0.0788, + "step": 88090 + }, + { + "epoch": 4.11, + "learning_rate": 6.353048140078066e-06, + "loss": 0.055, + "step": 88095 + }, + { + "epoch": 4.11, + "learning_rate": 6.352264355023279e-06, + "loss": 0.0447, + "step": 88100 + }, + { + "epoch": 4.11, + "learning_rate": 6.351480569968493e-06, + "loss": 0.082, + "step": 88105 + }, + { + "epoch": 4.11, + "learning_rate": 6.350696784913706e-06, + "loss": 0.116, + "step": 88110 + }, + { + "epoch": 4.11, + "learning_rate": 6.34991299985892e-06, + "loss": 0.1232, + "step": 88115 + }, + { + "epoch": 4.11, + "learning_rate": 6.349129214804133e-06, + "loss": 0.0898, + "step": 88120 + }, + { + "epoch": 4.11, + "learning_rate": 6.3483454297493465e-06, + "loss": 0.3035, + "step": 88125 + }, + { + "epoch": 4.11, + "learning_rate": 6.34756164469456e-06, + "loss": 0.0733, + "step": 88130 + }, + { + "epoch": 4.11, + "learning_rate": 6.3467778596397735e-06, + "loss": 0.027, + "step": 88135 + }, + { + "epoch": 4.11, + "learning_rate": 6.3459940745849866e-06, + "loss": 0.0534, + "step": 88140 + }, + { + "epoch": 4.11, + "learning_rate": 6.3452102895302e-06, + "loss": 0.0438, + "step": 88145 + }, + { + "epoch": 4.11, + "learning_rate": 6.344426504475413e-06, + "loss": 0.035, + "step": 88150 + }, + { + "epoch": 4.11, + "learning_rate": 6.343642719420627e-06, + "loss": 0.075, + "step": 88155 + }, + { + "epoch": 4.11, + "learning_rate": 6.34285893436584e-06, + "loss": 0.1387, + "step": 88160 + }, + { + "epoch": 4.11, + "learning_rate": 6.3420751493110536e-06, + "loss": 0.0755, + "step": 88165 + }, + { + "epoch": 4.11, + "learning_rate": 6.341291364256267e-06, + "loss": 0.2312, + "step": 88170 + }, + { + "epoch": 4.11, + "learning_rate": 6.3405075792014805e-06, + "loss": 0.1952, + "step": 88175 + }, + { + "epoch": 4.11, + "learning_rate": 6.339723794146694e-06, + "loss": 0.0571, + "step": 88180 + }, + { + "epoch": 4.11, + "learning_rate": 6.3389400090919075e-06, + "loss": 0.0349, + "step": 88185 + }, + { + "epoch": 4.12, + "learning_rate": 6.3381562240371205e-06, + "loss": 0.0373, + "step": 88190 + }, + { + "epoch": 4.12, + "learning_rate": 6.3373724389823344e-06, + "loss": 0.0718, + "step": 88195 + }, + { + "epoch": 4.12, + "learning_rate": 6.3365886539275475e-06, + "loss": 0.0581, + "step": 88200 + }, + { + "epoch": 4.12, + "learning_rate": 6.335804868872761e-06, + "loss": 0.0827, + "step": 88205 + }, + { + "epoch": 4.12, + "learning_rate": 6.335021083817974e-06, + "loss": 0.0851, + "step": 88210 + }, + { + "epoch": 4.12, + "learning_rate": 6.3342372987631875e-06, + "loss": 0.1056, + "step": 88215 + }, + { + "epoch": 4.12, + "learning_rate": 6.333453513708401e-06, + "loss": 0.0726, + "step": 88220 + }, + { + "epoch": 4.12, + "learning_rate": 6.3326697286536145e-06, + "loss": 0.211, + "step": 88225 + }, + { + "epoch": 4.12, + "learning_rate": 6.3318859435988276e-06, + "loss": 0.1066, + "step": 88230 + }, + { + "epoch": 4.12, + "learning_rate": 6.3311021585440415e-06, + "loss": 0.0176, + "step": 88235 + }, + { + "epoch": 4.12, + "learning_rate": 6.3303183734892545e-06, + "loss": 0.0497, + "step": 88240 + }, + { + "epoch": 4.12, + "learning_rate": 6.3295345884344684e-06, + "loss": 0.0497, + "step": 88245 + }, + { + "epoch": 4.12, + "learning_rate": 6.3287508033796815e-06, + "loss": 0.0895, + "step": 88250 + }, + { + "epoch": 4.12, + "learning_rate": 6.327967018324895e-06, + "loss": 0.0476, + "step": 88255 + }, + { + "epoch": 4.12, + "learning_rate": 6.3271832332701084e-06, + "loss": 0.028, + "step": 88260 + }, + { + "epoch": 4.12, + "learning_rate": 6.326399448215322e-06, + "loss": 0.1155, + "step": 88265 + }, + { + "epoch": 4.12, + "learning_rate": 6.325615663160536e-06, + "loss": 0.1764, + "step": 88270 + }, + { + "epoch": 4.12, + "learning_rate": 6.3248318781057485e-06, + "loss": 0.2684, + "step": 88275 + }, + { + "epoch": 4.12, + "learning_rate": 6.3240480930509615e-06, + "loss": 0.1124, + "step": 88280 + }, + { + "epoch": 4.12, + "learning_rate": 6.3232643079961754e-06, + "loss": 0.0361, + "step": 88285 + }, + { + "epoch": 4.12, + "learning_rate": 6.3224805229413885e-06, + "loss": 0.0578, + "step": 88290 + }, + { + "epoch": 4.12, + "learning_rate": 6.321696737886602e-06, + "loss": 0.0501, + "step": 88295 + }, + { + "epoch": 4.12, + "learning_rate": 6.3209129528318155e-06, + "loss": 0.0494, + "step": 88300 + }, + { + "epoch": 4.12, + "learning_rate": 6.320129167777029e-06, + "loss": 0.0669, + "step": 88305 + }, + { + "epoch": 4.12, + "learning_rate": 6.319345382722243e-06, + "loss": 0.0575, + "step": 88310 + }, + { + "epoch": 4.12, + "learning_rate": 6.318561597667456e-06, + "loss": 0.2526, + "step": 88315 + }, + { + "epoch": 4.12, + "learning_rate": 6.31777781261267e-06, + "loss": 0.1299, + "step": 88320 + }, + { + "epoch": 4.12, + "learning_rate": 6.316994027557883e-06, + "loss": 0.3254, + "step": 88325 + }, + { + "epoch": 4.12, + "learning_rate": 6.316210242503097e-06, + "loss": 0.1457, + "step": 88330 + }, + { + "epoch": 4.12, + "learning_rate": 6.31542645744831e-06, + "loss": 0.0232, + "step": 88335 + }, + { + "epoch": 4.12, + "learning_rate": 6.3146426723935225e-06, + "loss": 0.0495, + "step": 88340 + }, + { + "epoch": 4.12, + "learning_rate": 6.313858887338736e-06, + "loss": 0.0355, + "step": 88345 + }, + { + "epoch": 4.12, + "learning_rate": 6.3130751022839494e-06, + "loss": 0.1044, + "step": 88350 + }, + { + "epoch": 4.12, + "learning_rate": 6.312291317229163e-06, + "loss": 0.0989, + "step": 88355 + }, + { + "epoch": 4.12, + "learning_rate": 6.311507532174377e-06, + "loss": 0.0778, + "step": 88360 + }, + { + "epoch": 4.12, + "learning_rate": 6.31072374711959e-06, + "loss": 0.0731, + "step": 88365 + }, + { + "epoch": 4.12, + "learning_rate": 6.309939962064804e-06, + "loss": 0.1624, + "step": 88370 + }, + { + "epoch": 4.12, + "learning_rate": 6.309156177010017e-06, + "loss": 0.214, + "step": 88375 + }, + { + "epoch": 4.12, + "learning_rate": 6.308372391955231e-06, + "loss": 0.103, + "step": 88380 + }, + { + "epoch": 4.12, + "learning_rate": 6.307588606900444e-06, + "loss": 0.0078, + "step": 88385 + }, + { + "epoch": 4.12, + "learning_rate": 6.306804821845658e-06, + "loss": 0.0153, + "step": 88390 + }, + { + "epoch": 4.12, + "learning_rate": 6.306021036790871e-06, + "loss": 0.0967, + "step": 88395 + }, + { + "epoch": 4.12, + "learning_rate": 6.305237251736085e-06, + "loss": 0.0975, + "step": 88400 + }, + { + "epoch": 4.13, + "learning_rate": 6.304453466681297e-06, + "loss": 0.1028, + "step": 88405 + }, + { + "epoch": 4.13, + "learning_rate": 6.303669681626511e-06, + "loss": 0.0814, + "step": 88410 + }, + { + "epoch": 4.13, + "learning_rate": 6.302885896571724e-06, + "loss": 0.1753, + "step": 88415 + }, + { + "epoch": 4.13, + "learning_rate": 6.302102111516938e-06, + "loss": 0.1501, + "step": 88420 + }, + { + "epoch": 4.13, + "learning_rate": 6.301318326462151e-06, + "loss": 0.2274, + "step": 88425 + }, + { + "epoch": 4.13, + "learning_rate": 6.300534541407365e-06, + "loss": 0.0729, + "step": 88430 + }, + { + "epoch": 4.13, + "learning_rate": 6.299750756352578e-06, + "loss": 0.0138, + "step": 88435 + }, + { + "epoch": 4.13, + "learning_rate": 6.298966971297792e-06, + "loss": 0.0052, + "step": 88440 + }, + { + "epoch": 4.13, + "learning_rate": 6.298183186243005e-06, + "loss": 0.0546, + "step": 88445 + }, + { + "epoch": 4.13, + "learning_rate": 6.297399401188219e-06, + "loss": 0.063, + "step": 88450 + }, + { + "epoch": 4.13, + "learning_rate": 6.296615616133432e-06, + "loss": 0.0907, + "step": 88455 + }, + { + "epoch": 4.13, + "learning_rate": 6.295831831078646e-06, + "loss": 0.0612, + "step": 88460 + }, + { + "epoch": 4.13, + "learning_rate": 6.295048046023859e-06, + "loss": 0.1018, + "step": 88465 + }, + { + "epoch": 4.13, + "learning_rate": 6.294264260969072e-06, + "loss": 0.2148, + "step": 88470 + }, + { + "epoch": 4.13, + "learning_rate": 6.293480475914285e-06, + "loss": 0.4048, + "step": 88475 + }, + { + "epoch": 4.13, + "learning_rate": 6.292696690859499e-06, + "loss": 0.1049, + "step": 88480 + }, + { + "epoch": 4.13, + "learning_rate": 6.291912905804712e-06, + "loss": 0.0336, + "step": 88485 + }, + { + "epoch": 4.13, + "learning_rate": 6.291129120749926e-06, + "loss": 0.0259, + "step": 88490 + }, + { + "epoch": 4.13, + "learning_rate": 6.290345335695139e-06, + "loss": 0.0528, + "step": 88495 + }, + { + "epoch": 4.13, + "learning_rate": 6.289561550640353e-06, + "loss": 0.0865, + "step": 88500 + }, + { + "epoch": 4.13, + "learning_rate": 6.288777765585566e-06, + "loss": 0.1226, + "step": 88505 + }, + { + "epoch": 4.13, + "learning_rate": 6.28799398053078e-06, + "loss": 0.0952, + "step": 88510 + }, + { + "epoch": 4.13, + "learning_rate": 6.287210195475993e-06, + "loss": 0.2186, + "step": 88515 + }, + { + "epoch": 4.13, + "learning_rate": 6.286426410421207e-06, + "loss": 0.2144, + "step": 88520 + }, + { + "epoch": 4.13, + "learning_rate": 6.28564262536642e-06, + "loss": 0.3712, + "step": 88525 + }, + { + "epoch": 4.13, + "learning_rate": 6.284858840311634e-06, + "loss": 0.1564, + "step": 88530 + }, + { + "epoch": 4.13, + "learning_rate": 6.284075055256846e-06, + "loss": 0.065, + "step": 88535 + }, + { + "epoch": 4.13, + "learning_rate": 6.28329127020206e-06, + "loss": 0.0444, + "step": 88540 + }, + { + "epoch": 4.13, + "learning_rate": 6.282507485147273e-06, + "loss": 0.0389, + "step": 88545 + }, + { + "epoch": 4.13, + "learning_rate": 6.281723700092487e-06, + "loss": 0.0365, + "step": 88550 + }, + { + "epoch": 4.13, + "learning_rate": 6.2809399150377e-06, + "loss": 0.1061, + "step": 88555 + }, + { + "epoch": 4.13, + "learning_rate": 6.280156129982914e-06, + "loss": 0.1723, + "step": 88560 + }, + { + "epoch": 4.13, + "learning_rate": 6.279372344928127e-06, + "loss": 0.1295, + "step": 88565 + }, + { + "epoch": 4.13, + "learning_rate": 6.278588559873341e-06, + "loss": 0.2212, + "step": 88570 + }, + { + "epoch": 4.13, + "learning_rate": 6.277804774818554e-06, + "loss": 0.2194, + "step": 88575 + }, + { + "epoch": 4.13, + "learning_rate": 6.277020989763768e-06, + "loss": 0.0349, + "step": 88580 + }, + { + "epoch": 4.13, + "learning_rate": 6.276237204708982e-06, + "loss": 0.0238, + "step": 88585 + }, + { + "epoch": 4.13, + "learning_rate": 6.275453419654195e-06, + "loss": 0.0562, + "step": 88590 + }, + { + "epoch": 4.13, + "learning_rate": 6.274669634599409e-06, + "loss": 0.0424, + "step": 88595 + }, + { + "epoch": 4.13, + "learning_rate": 6.273885849544621e-06, + "loss": 0.0506, + "step": 88600 + }, + { + "epoch": 4.13, + "learning_rate": 6.273102064489834e-06, + "loss": 0.0527, + "step": 88605 + }, + { + "epoch": 4.13, + "learning_rate": 6.272318279435048e-06, + "loss": 0.077, + "step": 88610 + }, + { + "epoch": 4.13, + "learning_rate": 6.271534494380261e-06, + "loss": 0.1514, + "step": 88615 + }, + { + "epoch": 4.14, + "learning_rate": 6.270750709325475e-06, + "loss": 0.1529, + "step": 88620 + }, + { + "epoch": 4.14, + "learning_rate": 6.269966924270689e-06, + "loss": 0.2726, + "step": 88625 + }, + { + "epoch": 4.14, + "learning_rate": 6.269183139215902e-06, + "loss": 0.0565, + "step": 88630 + }, + { + "epoch": 4.14, + "learning_rate": 6.268399354161116e-06, + "loss": 0.0463, + "step": 88635 + }, + { + "epoch": 4.14, + "learning_rate": 6.267615569106329e-06, + "loss": 0.0227, + "step": 88640 + }, + { + "epoch": 4.14, + "learning_rate": 6.266831784051543e-06, + "loss": 0.0636, + "step": 88645 + }, + { + "epoch": 4.14, + "learning_rate": 6.266047998996756e-06, + "loss": 0.053, + "step": 88650 + }, + { + "epoch": 4.14, + "learning_rate": 6.26526421394197e-06, + "loss": 0.0368, + "step": 88655 + }, + { + "epoch": 4.14, + "learning_rate": 6.264480428887183e-06, + "loss": 0.0499, + "step": 88660 + }, + { + "epoch": 4.14, + "learning_rate": 6.263696643832395e-06, + "loss": 0.1441, + "step": 88665 + }, + { + "epoch": 4.14, + "learning_rate": 6.262912858777609e-06, + "loss": 0.1812, + "step": 88670 + }, + { + "epoch": 4.14, + "learning_rate": 6.262129073722823e-06, + "loss": 0.2325, + "step": 88675 + }, + { + "epoch": 4.14, + "learning_rate": 6.261345288668036e-06, + "loss": 0.0733, + "step": 88680 + }, + { + "epoch": 4.14, + "learning_rate": 6.26056150361325e-06, + "loss": 0.0314, + "step": 88685 + }, + { + "epoch": 4.14, + "learning_rate": 6.259777718558463e-06, + "loss": 0.0476, + "step": 88690 + }, + { + "epoch": 4.14, + "learning_rate": 6.258993933503677e-06, + "loss": 0.0603, + "step": 88695 + }, + { + "epoch": 4.14, + "learning_rate": 6.25821014844889e-06, + "loss": 0.0249, + "step": 88700 + }, + { + "epoch": 4.14, + "learning_rate": 6.257426363394104e-06, + "loss": 0.0983, + "step": 88705 + }, + { + "epoch": 4.14, + "learning_rate": 6.256642578339317e-06, + "loss": 0.2059, + "step": 88710 + }, + { + "epoch": 4.14, + "learning_rate": 6.255858793284531e-06, + "loss": 0.0781, + "step": 88715 + }, + { + "epoch": 4.14, + "learning_rate": 6.255075008229744e-06, + "loss": 0.1035, + "step": 88720 + }, + { + "epoch": 4.14, + "learning_rate": 6.254291223174958e-06, + "loss": 0.5021, + "step": 88725 + }, + { + "epoch": 4.14, + "learning_rate": 6.25350743812017e-06, + "loss": 0.0905, + "step": 88730 + }, + { + "epoch": 4.14, + "learning_rate": 6.252723653065384e-06, + "loss": 0.0015, + "step": 88735 + }, + { + "epoch": 4.14, + "learning_rate": 6.251939868010597e-06, + "loss": 0.0715, + "step": 88740 + }, + { + "epoch": 4.14, + "learning_rate": 6.251156082955811e-06, + "loss": 0.0508, + "step": 88745 + }, + { + "epoch": 4.14, + "learning_rate": 6.250372297901024e-06, + "loss": 0.0438, + "step": 88750 + }, + { + "epoch": 4.14, + "learning_rate": 6.249588512846238e-06, + "loss": 0.0913, + "step": 88755 + }, + { + "epoch": 4.14, + "learning_rate": 6.248804727791451e-06, + "loss": 0.0922, + "step": 88760 + }, + { + "epoch": 4.14, + "learning_rate": 6.248020942736665e-06, + "loss": 0.1287, + "step": 88765 + }, + { + "epoch": 4.14, + "learning_rate": 6.247237157681878e-06, + "loss": 0.1945, + "step": 88770 + }, + { + "epoch": 4.14, + "learning_rate": 6.246453372627092e-06, + "loss": 0.4309, + "step": 88775 + }, + { + "epoch": 4.14, + "learning_rate": 6.245669587572305e-06, + "loss": 0.0991, + "step": 88780 + }, + { + "epoch": 4.14, + "learning_rate": 6.244885802517519e-06, + "loss": 0.1138, + "step": 88785 + }, + { + "epoch": 4.14, + "learning_rate": 6.244102017462732e-06, + "loss": 0.0286, + "step": 88790 + }, + { + "epoch": 4.14, + "learning_rate": 6.243318232407945e-06, + "loss": 0.0282, + "step": 88795 + }, + { + "epoch": 4.14, + "learning_rate": 6.242534447353158e-06, + "loss": 0.0464, + "step": 88800 + }, + { + "epoch": 4.14, + "learning_rate": 6.241750662298372e-06, + "loss": 0.1241, + "step": 88805 + }, + { + "epoch": 4.14, + "learning_rate": 6.240966877243585e-06, + "loss": 0.0799, + "step": 88810 + }, + { + "epoch": 4.14, + "learning_rate": 6.240183092188799e-06, + "loss": 0.0658, + "step": 88815 + }, + { + "epoch": 4.14, + "learning_rate": 6.239399307134012e-06, + "loss": 0.1682, + "step": 88820 + }, + { + "epoch": 4.14, + "learning_rate": 6.238615522079226e-06, + "loss": 0.1947, + "step": 88825 + }, + { + "epoch": 4.14, + "learning_rate": 6.237831737024439e-06, + "loss": 0.109, + "step": 88830 + }, + { + "epoch": 4.15, + "learning_rate": 6.2370479519696526e-06, + "loss": 0.0184, + "step": 88835 + }, + { + "epoch": 4.15, + "learning_rate": 6.236264166914866e-06, + "loss": 0.0241, + "step": 88840 + }, + { + "epoch": 4.15, + "learning_rate": 6.2354803818600795e-06, + "loss": 0.0518, + "step": 88845 + }, + { + "epoch": 4.15, + "learning_rate": 6.2346965968052934e-06, + "loss": 0.0389, + "step": 88850 + }, + { + "epoch": 4.15, + "learning_rate": 6.2339128117505065e-06, + "loss": 0.0605, + "step": 88855 + }, + { + "epoch": 4.15, + "learning_rate": 6.233129026695719e-06, + "loss": 0.1007, + "step": 88860 + }, + { + "epoch": 4.15, + "learning_rate": 6.232345241640933e-06, + "loss": 0.1136, + "step": 88865 + }, + { + "epoch": 4.15, + "learning_rate": 6.231561456586146e-06, + "loss": 0.1695, + "step": 88870 + }, + { + "epoch": 4.15, + "learning_rate": 6.2307776715313596e-06, + "loss": 0.3442, + "step": 88875 + }, + { + "epoch": 4.15, + "learning_rate": 6.229993886476573e-06, + "loss": 0.13, + "step": 88880 + }, + { + "epoch": 4.15, + "learning_rate": 6.2292101014217865e-06, + "loss": 0.0032, + "step": 88885 + }, + { + "epoch": 4.15, + "learning_rate": 6.2284263163670004e-06, + "loss": 0.0323, + "step": 88890 + }, + { + "epoch": 4.15, + "learning_rate": 6.2276425313122135e-06, + "loss": 0.0368, + "step": 88895 + }, + { + "epoch": 4.15, + "learning_rate": 6.226858746257427e-06, + "loss": 0.0523, + "step": 88900 + }, + { + "epoch": 4.15, + "learning_rate": 6.2260749612026405e-06, + "loss": 0.0824, + "step": 88905 + }, + { + "epoch": 4.15, + "learning_rate": 6.225291176147854e-06, + "loss": 0.1514, + "step": 88910 + }, + { + "epoch": 4.15, + "learning_rate": 6.224507391093067e-06, + "loss": 0.1334, + "step": 88915 + }, + { + "epoch": 4.15, + "learning_rate": 6.223723606038281e-06, + "loss": 0.1909, + "step": 88920 + }, + { + "epoch": 4.15, + "learning_rate": 6.2229398209834935e-06, + "loss": 0.2378, + "step": 88925 + }, + { + "epoch": 4.15, + "learning_rate": 6.222156035928707e-06, + "loss": 0.1063, + "step": 88930 + }, + { + "epoch": 4.15, + "learning_rate": 6.2213722508739205e-06, + "loss": 0.0204, + "step": 88935 + }, + { + "epoch": 4.15, + "learning_rate": 6.220588465819134e-06, + "loss": 0.0123, + "step": 88940 + }, + { + "epoch": 4.15, + "learning_rate": 6.2198046807643475e-06, + "loss": 0.0628, + "step": 88945 + }, + { + "epoch": 4.15, + "learning_rate": 6.219020895709561e-06, + "loss": 0.02, + "step": 88950 + }, + { + "epoch": 4.15, + "learning_rate": 6.2182371106547744e-06, + "loss": 0.1009, + "step": 88955 + }, + { + "epoch": 4.15, + "learning_rate": 6.217453325599988e-06, + "loss": 0.071, + "step": 88960 + }, + { + "epoch": 4.15, + "learning_rate": 6.216669540545201e-06, + "loss": 0.08, + "step": 88965 + }, + { + "epoch": 4.15, + "learning_rate": 6.215885755490415e-06, + "loss": 0.0997, + "step": 88970 + }, + { + "epoch": 4.15, + "learning_rate": 6.215101970435628e-06, + "loss": 0.277, + "step": 88975 + }, + { + "epoch": 4.15, + "learning_rate": 6.214318185380842e-06, + "loss": 0.0999, + "step": 88980 + }, + { + "epoch": 4.15, + "learning_rate": 6.213534400326055e-06, + "loss": 0.0273, + "step": 88985 + }, + { + "epoch": 4.15, + "learning_rate": 6.212750615271268e-06, + "loss": 0.0476, + "step": 88990 + }, + { + "epoch": 4.15, + "learning_rate": 6.2119668302164814e-06, + "loss": 0.0303, + "step": 88995 + }, + { + "epoch": 4.15, + "learning_rate": 6.211183045161695e-06, + "loss": 0.0347, + "step": 89000 + }, + { + "epoch": 4.15, + "learning_rate": 6.210399260106908e-06, + "loss": 0.0411, + "step": 89005 + }, + { + "epoch": 4.15, + "learning_rate": 6.209615475052122e-06, + "loss": 0.0864, + "step": 89010 + }, + { + "epoch": 4.15, + "learning_rate": 6.208831689997335e-06, + "loss": 0.1301, + "step": 89015 + }, + { + "epoch": 4.15, + "learning_rate": 6.208047904942549e-06, + "loss": 0.1993, + "step": 89020 + }, + { + "epoch": 4.15, + "learning_rate": 6.207264119887762e-06, + "loss": 0.0947, + "step": 89025 + }, + { + "epoch": 4.15, + "learning_rate": 6.206480334832976e-06, + "loss": 0.078, + "step": 89030 + }, + { + "epoch": 4.15, + "learning_rate": 6.205696549778189e-06, + "loss": 0.0447, + "step": 89035 + }, + { + "epoch": 4.15, + "learning_rate": 6.204912764723403e-06, + "loss": 0.032, + "step": 89040 + }, + { + "epoch": 4.15, + "learning_rate": 6.204128979668616e-06, + "loss": 0.11, + "step": 89045 + }, + { + "epoch": 4.16, + "learning_rate": 6.20334519461383e-06, + "loss": 0.1048, + "step": 89050 + }, + { + "epoch": 4.16, + "learning_rate": 6.202561409559042e-06, + "loss": 0.0573, + "step": 89055 + }, + { + "epoch": 4.16, + "learning_rate": 6.201777624504256e-06, + "loss": 0.0839, + "step": 89060 + }, + { + "epoch": 4.16, + "learning_rate": 6.200993839449469e-06, + "loss": 0.1672, + "step": 89065 + }, + { + "epoch": 4.16, + "learning_rate": 6.200210054394683e-06, + "loss": 0.2303, + "step": 89070 + }, + { + "epoch": 4.16, + "learning_rate": 6.199426269339896e-06, + "loss": 0.4011, + "step": 89075 + }, + { + "epoch": 4.16, + "learning_rate": 6.19864248428511e-06, + "loss": 0.0853, + "step": 89080 + }, + { + "epoch": 4.16, + "learning_rate": 6.197858699230323e-06, + "loss": 0.0321, + "step": 89085 + }, + { + "epoch": 4.16, + "learning_rate": 6.197074914175537e-06, + "loss": 0.0147, + "step": 89090 + }, + { + "epoch": 4.16, + "learning_rate": 6.19629112912075e-06, + "loss": 0.051, + "step": 89095 + }, + { + "epoch": 4.16, + "learning_rate": 6.195507344065964e-06, + "loss": 0.0454, + "step": 89100 + }, + { + "epoch": 4.16, + "learning_rate": 6.194723559011177e-06, + "loss": 0.0369, + "step": 89105 + }, + { + "epoch": 4.16, + "learning_rate": 6.193939773956391e-06, + "loss": 0.1615, + "step": 89110 + }, + { + "epoch": 4.16, + "learning_rate": 6.193155988901605e-06, + "loss": 0.1234, + "step": 89115 + }, + { + "epoch": 4.16, + "learning_rate": 6.192372203846817e-06, + "loss": 0.1306, + "step": 89120 + }, + { + "epoch": 4.16, + "learning_rate": 6.19158841879203e-06, + "loss": 0.305, + "step": 89125 + }, + { + "epoch": 4.16, + "learning_rate": 6.190804633737244e-06, + "loss": 0.067, + "step": 89130 + }, + { + "epoch": 4.16, + "learning_rate": 6.190020848682457e-06, + "loss": 0.0449, + "step": 89135 + }, + { + "epoch": 4.16, + "learning_rate": 6.189237063627671e-06, + "loss": 0.0631, + "step": 89140 + }, + { + "epoch": 4.16, + "learning_rate": 6.188453278572884e-06, + "loss": 0.0649, + "step": 89145 + }, + { + "epoch": 4.16, + "learning_rate": 6.187669493518098e-06, + "loss": 0.0574, + "step": 89150 + }, + { + "epoch": 4.16, + "learning_rate": 6.186885708463311e-06, + "loss": 0.0685, + "step": 89155 + }, + { + "epoch": 4.16, + "learning_rate": 6.186101923408525e-06, + "loss": 0.0932, + "step": 89160 + }, + { + "epoch": 4.16, + "learning_rate": 6.185318138353739e-06, + "loss": 0.1932, + "step": 89165 + }, + { + "epoch": 4.16, + "learning_rate": 6.184534353298952e-06, + "loss": 0.2358, + "step": 89170 + }, + { + "epoch": 4.16, + "learning_rate": 6.183750568244166e-06, + "loss": 0.2233, + "step": 89175 + }, + { + "epoch": 4.16, + "learning_rate": 6.182966783189379e-06, + "loss": 0.0727, + "step": 89180 + }, + { + "epoch": 4.16, + "learning_rate": 6.182182998134591e-06, + "loss": 0.0821, + "step": 89185 + }, + { + "epoch": 4.16, + "learning_rate": 6.181399213079805e-06, + "loss": 0.0073, + "step": 89190 + }, + { + "epoch": 4.16, + "learning_rate": 6.180615428025018e-06, + "loss": 0.0236, + "step": 89195 + }, + { + "epoch": 4.16, + "learning_rate": 6.179831642970232e-06, + "loss": 0.0535, + "step": 89200 + }, + { + "epoch": 4.16, + "learning_rate": 6.179047857915446e-06, + "loss": 0.0256, + "step": 89205 + }, + { + "epoch": 4.16, + "learning_rate": 6.178264072860659e-06, + "loss": 0.0649, + "step": 89210 + }, + { + "epoch": 4.16, + "learning_rate": 6.177480287805873e-06, + "loss": 0.065, + "step": 89215 + }, + { + "epoch": 4.16, + "learning_rate": 6.176696502751086e-06, + "loss": 0.2857, + "step": 89220 + }, + { + "epoch": 4.16, + "learning_rate": 6.1759127176963e-06, + "loss": 0.2933, + "step": 89225 + }, + { + "epoch": 4.16, + "learning_rate": 6.175128932641513e-06, + "loss": 0.1056, + "step": 89230 + }, + { + "epoch": 4.16, + "learning_rate": 6.174345147586727e-06, + "loss": 0.0446, + "step": 89235 + }, + { + "epoch": 4.16, + "learning_rate": 6.17356136253194e-06, + "loss": 0.0522, + "step": 89240 + }, + { + "epoch": 4.16, + "learning_rate": 6.172777577477154e-06, + "loss": 0.0394, + "step": 89245 + }, + { + "epoch": 4.16, + "learning_rate": 6.171993792422366e-06, + "loss": 0.0233, + "step": 89250 + }, + { + "epoch": 4.16, + "learning_rate": 6.17121000736758e-06, + "loss": 0.0849, + "step": 89255 + }, + { + "epoch": 4.16, + "learning_rate": 6.170426222312793e-06, + "loss": 0.079, + "step": 89260 + }, + { + "epoch": 4.17, + "learning_rate": 6.169642437258007e-06, + "loss": 0.1592, + "step": 89265 + }, + { + "epoch": 4.17, + "learning_rate": 6.16885865220322e-06, + "loss": 0.0925, + "step": 89270 + }, + { + "epoch": 4.17, + "learning_rate": 6.168074867148434e-06, + "loss": 0.1958, + "step": 89275 + }, + { + "epoch": 4.17, + "learning_rate": 6.167291082093647e-06, + "loss": 0.1201, + "step": 89280 + }, + { + "epoch": 4.17, + "learning_rate": 6.166507297038861e-06, + "loss": 0.0116, + "step": 89285 + }, + { + "epoch": 4.17, + "learning_rate": 6.165723511984074e-06, + "loss": 0.0647, + "step": 89290 + }, + { + "epoch": 4.17, + "learning_rate": 6.164939726929288e-06, + "loss": 0.0438, + "step": 89295 + }, + { + "epoch": 4.17, + "learning_rate": 6.164155941874501e-06, + "loss": 0.0513, + "step": 89300 + }, + { + "epoch": 4.17, + "learning_rate": 6.163372156819715e-06, + "loss": 0.1133, + "step": 89305 + }, + { + "epoch": 4.17, + "learning_rate": 6.162588371764928e-06, + "loss": 0.1608, + "step": 89310 + }, + { + "epoch": 4.17, + "learning_rate": 6.161804586710141e-06, + "loss": 0.0674, + "step": 89315 + }, + { + "epoch": 4.17, + "learning_rate": 6.161020801655354e-06, + "loss": 0.1883, + "step": 89320 + }, + { + "epoch": 4.17, + "learning_rate": 6.160237016600568e-06, + "loss": 0.3976, + "step": 89325 + }, + { + "epoch": 4.17, + "learning_rate": 6.159453231545781e-06, + "loss": 0.0882, + "step": 89330 + }, + { + "epoch": 4.17, + "learning_rate": 6.158669446490995e-06, + "loss": 0.042, + "step": 89335 + }, + { + "epoch": 4.17, + "learning_rate": 6.157885661436208e-06, + "loss": 0.0464, + "step": 89340 + }, + { + "epoch": 4.17, + "learning_rate": 6.157101876381422e-06, + "loss": 0.0564, + "step": 89345 + }, + { + "epoch": 4.17, + "learning_rate": 6.156318091326635e-06, + "loss": 0.0426, + "step": 89350 + }, + { + "epoch": 4.17, + "learning_rate": 6.155534306271849e-06, + "loss": 0.0953, + "step": 89355 + }, + { + "epoch": 4.17, + "learning_rate": 6.154750521217062e-06, + "loss": 0.0831, + "step": 89360 + }, + { + "epoch": 4.17, + "learning_rate": 6.153966736162276e-06, + "loss": 0.1253, + "step": 89365 + }, + { + "epoch": 4.17, + "learning_rate": 6.153182951107489e-06, + "loss": 0.131, + "step": 89370 + }, + { + "epoch": 4.17, + "learning_rate": 6.152399166052703e-06, + "loss": 0.2002, + "step": 89375 + }, + { + "epoch": 4.17, + "learning_rate": 6.151615380997915e-06, + "loss": 0.0783, + "step": 89380 + }, + { + "epoch": 4.17, + "learning_rate": 6.150831595943129e-06, + "loss": 0.0061, + "step": 89385 + }, + { + "epoch": 4.17, + "learning_rate": 6.150047810888342e-06, + "loss": 0.012, + "step": 89390 + }, + { + "epoch": 4.17, + "learning_rate": 6.149264025833556e-06, + "loss": 0.0333, + "step": 89395 + }, + { + "epoch": 4.17, + "learning_rate": 6.148480240778769e-06, + "loss": 0.0697, + "step": 89400 + }, + { + "epoch": 4.17, + "learning_rate": 6.147696455723983e-06, + "loss": 0.0735, + "step": 89405 + }, + { + "epoch": 4.17, + "learning_rate": 6.146912670669196e-06, + "loss": 0.0776, + "step": 89410 + }, + { + "epoch": 4.17, + "learning_rate": 6.14612888561441e-06, + "loss": 0.172, + "step": 89415 + }, + { + "epoch": 4.17, + "learning_rate": 6.145345100559623e-06, + "loss": 0.1534, + "step": 89420 + }, + { + "epoch": 4.17, + "learning_rate": 6.144561315504837e-06, + "loss": 0.3467, + "step": 89425 + }, + { + "epoch": 4.17, + "learning_rate": 6.143777530450051e-06, + "loss": 0.0825, + "step": 89430 + }, + { + "epoch": 4.17, + "learning_rate": 6.142993745395264e-06, + "loss": 0.046, + "step": 89435 + }, + { + "epoch": 4.17, + "learning_rate": 6.1422099603404776e-06, + "loss": 0.0452, + "step": 89440 + }, + { + "epoch": 4.17, + "learning_rate": 6.14142617528569e-06, + "loss": 0.0702, + "step": 89445 + }, + { + "epoch": 4.17, + "learning_rate": 6.140642390230903e-06, + "loss": 0.0492, + "step": 89450 + }, + { + "epoch": 4.17, + "learning_rate": 6.139858605176117e-06, + "loss": 0.0686, + "step": 89455 + }, + { + "epoch": 4.17, + "learning_rate": 6.13907482012133e-06, + "loss": 0.0756, + "step": 89460 + }, + { + "epoch": 4.17, + "learning_rate": 6.138291035066544e-06, + "loss": 0.1233, + "step": 89465 + }, + { + "epoch": 4.17, + "learning_rate": 6.137507250011758e-06, + "loss": 0.1597, + "step": 89470 + }, + { + "epoch": 4.18, + "learning_rate": 6.136723464956971e-06, + "loss": 0.1574, + "step": 89475 + }, + { + "epoch": 4.18, + "learning_rate": 6.1359396799021846e-06, + "loss": 0.0584, + "step": 89480 + }, + { + "epoch": 4.18, + "learning_rate": 6.135155894847398e-06, + "loss": 0.0504, + "step": 89485 + }, + { + "epoch": 4.18, + "learning_rate": 6.1343721097926115e-06, + "loss": 0.0132, + "step": 89490 + }, + { + "epoch": 4.18, + "learning_rate": 6.133588324737825e-06, + "loss": 0.1002, + "step": 89495 + }, + { + "epoch": 4.18, + "learning_rate": 6.1328045396830385e-06, + "loss": 0.0326, + "step": 89500 + }, + { + "epoch": 4.18, + "learning_rate": 6.1320207546282516e-06, + "loss": 0.15, + "step": 89505 + }, + { + "epoch": 4.18, + "learning_rate": 6.131236969573464e-06, + "loss": 0.0347, + "step": 89510 + }, + { + "epoch": 4.18, + "learning_rate": 6.130453184518678e-06, + "loss": 0.1959, + "step": 89515 + }, + { + "epoch": 4.18, + "learning_rate": 6.1296693994638916e-06, + "loss": 0.2061, + "step": 89520 + }, + { + "epoch": 4.18, + "learning_rate": 6.128885614409105e-06, + "loss": 0.2217, + "step": 89525 + }, + { + "epoch": 4.18, + "learning_rate": 6.1281018293543185e-06, + "loss": 0.077, + "step": 89530 + }, + { + "epoch": 4.18, + "learning_rate": 6.127318044299532e-06, + "loss": 0.0137, + "step": 89535 + }, + { + "epoch": 4.18, + "learning_rate": 6.1265342592447455e-06, + "loss": 0.0232, + "step": 89540 + }, + { + "epoch": 4.18, + "learning_rate": 6.1257504741899586e-06, + "loss": 0.0565, + "step": 89545 + }, + { + "epoch": 4.18, + "learning_rate": 6.1249666891351725e-06, + "loss": 0.0814, + "step": 89550 + }, + { + "epoch": 4.18, + "learning_rate": 6.1241829040803855e-06, + "loss": 0.056, + "step": 89555 + }, + { + "epoch": 4.18, + "learning_rate": 6.1233991190255994e-06, + "loss": 0.1184, + "step": 89560 + }, + { + "epoch": 4.18, + "learning_rate": 6.1226153339708125e-06, + "loss": 0.0755, + "step": 89565 + }, + { + "epoch": 4.18, + "learning_rate": 6.121831548916026e-06, + "loss": 0.244, + "step": 89570 + }, + { + "epoch": 4.18, + "learning_rate": 6.121047763861239e-06, + "loss": 0.187, + "step": 89575 + }, + { + "epoch": 4.18, + "learning_rate": 6.1202639788064525e-06, + "loss": 0.104, + "step": 89580 + }, + { + "epoch": 4.18, + "learning_rate": 6.1194801937516656e-06, + "loss": 0.0128, + "step": 89585 + }, + { + "epoch": 4.18, + "learning_rate": 6.1186964086968795e-06, + "loss": 0.1322, + "step": 89590 + }, + { + "epoch": 4.18, + "learning_rate": 6.1179126236420925e-06, + "loss": 0.0376, + "step": 89595 + }, + { + "epoch": 4.18, + "learning_rate": 6.1171288385873064e-06, + "loss": 0.0948, + "step": 89600 + }, + { + "epoch": 4.18, + "learning_rate": 6.1163450535325195e-06, + "loss": 0.0404, + "step": 89605 + }, + { + "epoch": 4.18, + "learning_rate": 6.115561268477733e-06, + "loss": 0.0668, + "step": 89610 + }, + { + "epoch": 4.18, + "learning_rate": 6.1147774834229465e-06, + "loss": 0.1415, + "step": 89615 + }, + { + "epoch": 4.18, + "learning_rate": 6.11399369836816e-06, + "loss": 0.1557, + "step": 89620 + }, + { + "epoch": 4.18, + "learning_rate": 6.1132099133133734e-06, + "loss": 0.3609, + "step": 89625 + }, + { + "epoch": 4.18, + "learning_rate": 6.112426128258587e-06, + "loss": 0.1221, + "step": 89630 + }, + { + "epoch": 4.18, + "learning_rate": 6.1116423432038e-06, + "loss": 0.0591, + "step": 89635 + }, + { + "epoch": 4.18, + "learning_rate": 6.1108585581490135e-06, + "loss": 0.0227, + "step": 89640 + }, + { + "epoch": 4.18, + "learning_rate": 6.1100747730942265e-06, + "loss": 0.0375, + "step": 89645 + }, + { + "epoch": 4.18, + "learning_rate": 6.10929098803944e-06, + "loss": 0.0431, + "step": 89650 + }, + { + "epoch": 4.18, + "learning_rate": 6.1085072029846535e-06, + "loss": 0.063, + "step": 89655 + }, + { + "epoch": 4.18, + "learning_rate": 6.107723417929867e-06, + "loss": 0.0882, + "step": 89660 + }, + { + "epoch": 4.18, + "learning_rate": 6.1069396328750804e-06, + "loss": 0.1451, + "step": 89665 + }, + { + "epoch": 4.18, + "learning_rate": 6.106155847820294e-06, + "loss": 0.1139, + "step": 89670 + }, + { + "epoch": 4.18, + "learning_rate": 6.105372062765507e-06, + "loss": 0.2785, + "step": 89675 + }, + { + "epoch": 4.18, + "learning_rate": 6.104588277710721e-06, + "loss": 0.064, + "step": 89680 + }, + { + "epoch": 4.18, + "learning_rate": 6.103804492655934e-06, + "loss": 0.0145, + "step": 89685 + }, + { + "epoch": 4.19, + "learning_rate": 6.103020707601148e-06, + "loss": 0.017, + "step": 89690 + }, + { + "epoch": 4.19, + "learning_rate": 6.102236922546362e-06, + "loss": 0.0564, + "step": 89695 + }, + { + "epoch": 4.19, + "learning_rate": 6.101453137491575e-06, + "loss": 0.0964, + "step": 89700 + }, + { + "epoch": 4.19, + "learning_rate": 6.1006693524367875e-06, + "loss": 0.0644, + "step": 89705 + }, + { + "epoch": 4.19, + "learning_rate": 6.099885567382001e-06, + "loss": 0.0778, + "step": 89710 + }, + { + "epoch": 4.19, + "learning_rate": 6.099101782327214e-06, + "loss": 0.1623, + "step": 89715 + }, + { + "epoch": 4.19, + "learning_rate": 6.098317997272428e-06, + "loss": 0.2354, + "step": 89720 + }, + { + "epoch": 4.19, + "learning_rate": 6.097534212217641e-06, + "loss": 0.2151, + "step": 89725 + }, + { + "epoch": 4.19, + "learning_rate": 6.096750427162855e-06, + "loss": 0.1062, + "step": 89730 + }, + { + "epoch": 4.19, + "learning_rate": 6.095966642108068e-06, + "loss": 0.0041, + "step": 89735 + }, + { + "epoch": 4.19, + "learning_rate": 6.095182857053282e-06, + "loss": 0.0345, + "step": 89740 + }, + { + "epoch": 4.19, + "learning_rate": 6.094399071998496e-06, + "loss": 0.0483, + "step": 89745 + }, + { + "epoch": 4.19, + "learning_rate": 6.093615286943709e-06, + "loss": 0.0438, + "step": 89750 + }, + { + "epoch": 4.19, + "learning_rate": 6.092831501888923e-06, + "loss": 0.0892, + "step": 89755 + }, + { + "epoch": 4.19, + "learning_rate": 6.092047716834136e-06, + "loss": 0.0652, + "step": 89760 + }, + { + "epoch": 4.19, + "learning_rate": 6.09126393177935e-06, + "loss": 0.133, + "step": 89765 + }, + { + "epoch": 4.19, + "learning_rate": 6.090480146724562e-06, + "loss": 0.2187, + "step": 89770 + }, + { + "epoch": 4.19, + "learning_rate": 6.089696361669775e-06, + "loss": 0.2867, + "step": 89775 + }, + { + "epoch": 4.19, + "learning_rate": 6.088912576614989e-06, + "loss": 0.0944, + "step": 89780 + }, + { + "epoch": 4.19, + "learning_rate": 6.088128791560203e-06, + "loss": 0.0482, + "step": 89785 + }, + { + "epoch": 4.19, + "learning_rate": 6.087345006505416e-06, + "loss": 0.0895, + "step": 89790 + }, + { + "epoch": 4.19, + "learning_rate": 6.08656122145063e-06, + "loss": 0.0182, + "step": 89795 + }, + { + "epoch": 4.19, + "learning_rate": 6.085777436395843e-06, + "loss": 0.1007, + "step": 89800 + }, + { + "epoch": 4.19, + "learning_rate": 6.084993651341057e-06, + "loss": 0.054, + "step": 89805 + }, + { + "epoch": 4.19, + "learning_rate": 6.08420986628627e-06, + "loss": 0.0982, + "step": 89810 + }, + { + "epoch": 4.19, + "learning_rate": 6.083426081231484e-06, + "loss": 0.1718, + "step": 89815 + }, + { + "epoch": 4.19, + "learning_rate": 6.082642296176697e-06, + "loss": 0.1158, + "step": 89820 + }, + { + "epoch": 4.19, + "learning_rate": 6.081858511121911e-06, + "loss": 0.2076, + "step": 89825 + }, + { + "epoch": 4.19, + "learning_rate": 6.081074726067124e-06, + "loss": 0.108, + "step": 89830 + }, + { + "epoch": 4.19, + "learning_rate": 6.080290941012337e-06, + "loss": 0.0433, + "step": 89835 + }, + { + "epoch": 4.19, + "learning_rate": 6.07950715595755e-06, + "loss": 0.0423, + "step": 89840 + }, + { + "epoch": 4.19, + "learning_rate": 6.078723370902764e-06, + "loss": 0.049, + "step": 89845 + }, + { + "epoch": 4.19, + "learning_rate": 6.077939585847977e-06, + "loss": 0.0125, + "step": 89850 + }, + { + "epoch": 4.19, + "learning_rate": 6.077155800793191e-06, + "loss": 0.0529, + "step": 89855 + }, + { + "epoch": 4.19, + "learning_rate": 6.076372015738404e-06, + "loss": 0.4706, + "step": 89860 + }, + { + "epoch": 4.19, + "learning_rate": 6.075588230683618e-06, + "loss": 0.0698, + "step": 89865 + }, + { + "epoch": 4.19, + "learning_rate": 6.074804445628831e-06, + "loss": 0.1114, + "step": 89870 + }, + { + "epoch": 4.19, + "learning_rate": 6.074020660574045e-06, + "loss": 0.318, + "step": 89875 + }, + { + "epoch": 4.19, + "learning_rate": 6.073236875519258e-06, + "loss": 0.0609, + "step": 89880 + }, + { + "epoch": 4.19, + "learning_rate": 6.072453090464472e-06, + "loss": 0.0141, + "step": 89885 + }, + { + "epoch": 4.19, + "learning_rate": 6.071669305409685e-06, + "loss": 0.0518, + "step": 89890 + }, + { + "epoch": 4.19, + "learning_rate": 6.070885520354899e-06, + "loss": 0.0432, + "step": 89895 + }, + { + "epoch": 4.19, + "learning_rate": 6.070101735300111e-06, + "loss": 0.0527, + "step": 89900 + }, + { + "epoch": 4.2, + "learning_rate": 6.069317950245325e-06, + "loss": 0.0655, + "step": 89905 + }, + { + "epoch": 4.2, + "learning_rate": 6.068534165190538e-06, + "loss": 0.1085, + "step": 89910 + }, + { + "epoch": 4.2, + "learning_rate": 6.067750380135752e-06, + "loss": 0.1045, + "step": 89915 + }, + { + "epoch": 4.2, + "learning_rate": 6.066966595080965e-06, + "loss": 0.0901, + "step": 89920 + }, + { + "epoch": 4.2, + "learning_rate": 6.066182810026179e-06, + "loss": 0.2014, + "step": 89925 + }, + { + "epoch": 4.2, + "learning_rate": 6.065399024971392e-06, + "loss": 0.0804, + "step": 89930 + }, + { + "epoch": 4.2, + "learning_rate": 6.064615239916606e-06, + "loss": 0.0183, + "step": 89935 + }, + { + "epoch": 4.2, + "learning_rate": 6.063831454861819e-06, + "loss": 0.0362, + "step": 89940 + }, + { + "epoch": 4.2, + "learning_rate": 6.063047669807033e-06, + "loss": 0.034, + "step": 89945 + }, + { + "epoch": 4.2, + "learning_rate": 6.062263884752246e-06, + "loss": 0.0835, + "step": 89950 + }, + { + "epoch": 4.2, + "learning_rate": 6.06148009969746e-06, + "loss": 0.0713, + "step": 89955 + }, + { + "epoch": 4.2, + "learning_rate": 6.060696314642674e-06, + "loss": 0.123, + "step": 89960 + }, + { + "epoch": 4.2, + "learning_rate": 6.059912529587886e-06, + "loss": 0.049, + "step": 89965 + }, + { + "epoch": 4.2, + "learning_rate": 6.059128744533099e-06, + "loss": 0.1066, + "step": 89970 + }, + { + "epoch": 4.2, + "learning_rate": 6.058344959478313e-06, + "loss": 0.3859, + "step": 89975 + }, + { + "epoch": 4.2, + "learning_rate": 6.057561174423526e-06, + "loss": 0.0998, + "step": 89980 + }, + { + "epoch": 4.2, + "learning_rate": 6.05677738936874e-06, + "loss": 0.0131, + "step": 89985 + }, + { + "epoch": 4.2, + "learning_rate": 6.055993604313953e-06, + "loss": 0.0514, + "step": 89990 + }, + { + "epoch": 4.2, + "learning_rate": 6.055209819259167e-06, + "loss": 0.0365, + "step": 89995 + }, + { + "epoch": 4.2, + "learning_rate": 6.05442603420438e-06, + "loss": 0.0505, + "step": 90000 + }, + { + "epoch": 4.2, + "learning_rate": 6.053642249149594e-06, + "loss": 0.0449, + "step": 90005 + }, + { + "epoch": 4.2, + "learning_rate": 6.052858464094808e-06, + "loss": 0.0937, + "step": 90010 + }, + { + "epoch": 4.2, + "learning_rate": 6.052074679040021e-06, + "loss": 0.071, + "step": 90015 + }, + { + "epoch": 4.2, + "learning_rate": 6.051290893985235e-06, + "loss": 0.2377, + "step": 90020 + }, + { + "epoch": 4.2, + "learning_rate": 6.050507108930448e-06, + "loss": 0.3692, + "step": 90025 + }, + { + "epoch": 4.2, + "learning_rate": 6.04972332387566e-06, + "loss": 0.0788, + "step": 90030 + }, + { + "epoch": 4.2, + "learning_rate": 6.048939538820874e-06, + "loss": 0.0178, + "step": 90035 + }, + { + "epoch": 4.2, + "learning_rate": 6.048155753766087e-06, + "loss": 0.0402, + "step": 90040 + }, + { + "epoch": 4.2, + "learning_rate": 6.047371968711301e-06, + "loss": 0.0688, + "step": 90045 + }, + { + "epoch": 4.2, + "learning_rate": 6.046588183656515e-06, + "loss": 0.0793, + "step": 90050 + }, + { + "epoch": 4.2, + "learning_rate": 6.045804398601728e-06, + "loss": 0.0935, + "step": 90055 + }, + { + "epoch": 4.2, + "learning_rate": 6.045020613546942e-06, + "loss": 0.1183, + "step": 90060 + }, + { + "epoch": 4.2, + "learning_rate": 6.044236828492155e-06, + "loss": 0.1245, + "step": 90065 + }, + { + "epoch": 4.2, + "learning_rate": 6.043453043437369e-06, + "loss": 0.2181, + "step": 90070 + }, + { + "epoch": 4.2, + "learning_rate": 6.042669258382582e-06, + "loss": 0.1639, + "step": 90075 + }, + { + "epoch": 4.2, + "learning_rate": 6.041885473327796e-06, + "loss": 0.0822, + "step": 90080 + }, + { + "epoch": 4.2, + "learning_rate": 6.041101688273009e-06, + "loss": 0.0224, + "step": 90085 + }, + { + "epoch": 4.2, + "learning_rate": 6.040317903218223e-06, + "loss": 0.0694, + "step": 90090 + }, + { + "epoch": 4.2, + "learning_rate": 6.039534118163435e-06, + "loss": 0.0438, + "step": 90095 + }, + { + "epoch": 4.2, + "learning_rate": 6.038750333108649e-06, + "loss": 0.0986, + "step": 90100 + }, + { + "epoch": 4.2, + "learning_rate": 6.037966548053862e-06, + "loss": 0.0518, + "step": 90105 + }, + { + "epoch": 4.2, + "learning_rate": 6.037182762999076e-06, + "loss": 0.1075, + "step": 90110 + }, + { + "epoch": 4.2, + "learning_rate": 6.036398977944289e-06, + "loss": 0.0888, + "step": 90115 + }, + { + "epoch": 4.21, + "learning_rate": 6.035615192889503e-06, + "loss": 0.1886, + "step": 90120 + }, + { + "epoch": 4.21, + "learning_rate": 6.034831407834716e-06, + "loss": 0.242, + "step": 90125 + }, + { + "epoch": 4.21, + "learning_rate": 6.03404762277993e-06, + "loss": 0.0662, + "step": 90130 + }, + { + "epoch": 4.21, + "learning_rate": 6.033263837725143e-06, + "loss": 0.0289, + "step": 90135 + }, + { + "epoch": 4.21, + "learning_rate": 6.032480052670357e-06, + "loss": 0.0234, + "step": 90140 + }, + { + "epoch": 4.21, + "learning_rate": 6.03169626761557e-06, + "loss": 0.0416, + "step": 90145 + }, + { + "epoch": 4.21, + "learning_rate": 6.0309124825607836e-06, + "loss": 0.0352, + "step": 90150 + }, + { + "epoch": 4.21, + "learning_rate": 6.030128697505997e-06, + "loss": 0.0751, + "step": 90155 + }, + { + "epoch": 4.21, + "learning_rate": 6.02934491245121e-06, + "loss": 0.1329, + "step": 90160 + }, + { + "epoch": 4.21, + "learning_rate": 6.028561127396423e-06, + "loss": 0.1839, + "step": 90165 + }, + { + "epoch": 4.21, + "learning_rate": 6.027777342341637e-06, + "loss": 0.2593, + "step": 90170 + }, + { + "epoch": 4.21, + "learning_rate": 6.02699355728685e-06, + "loss": 0.284, + "step": 90175 + }, + { + "epoch": 4.21, + "learning_rate": 6.026209772232064e-06, + "loss": 0.0334, + "step": 90180 + }, + { + "epoch": 4.21, + "learning_rate": 6.025425987177277e-06, + "loss": 0.0346, + "step": 90185 + }, + { + "epoch": 4.21, + "learning_rate": 6.0246422021224906e-06, + "loss": 0.0354, + "step": 90190 + }, + { + "epoch": 4.21, + "learning_rate": 6.023858417067704e-06, + "loss": 0.0417, + "step": 90195 + }, + { + "epoch": 4.21, + "learning_rate": 6.0230746320129175e-06, + "loss": 0.1311, + "step": 90200 + }, + { + "epoch": 4.21, + "learning_rate": 6.022290846958131e-06, + "loss": 0.0548, + "step": 90205 + }, + { + "epoch": 4.21, + "learning_rate": 6.0215070619033445e-06, + "loss": 0.0819, + "step": 90210 + }, + { + "epoch": 4.21, + "learning_rate": 6.0207232768485576e-06, + "loss": 0.1508, + "step": 90215 + }, + { + "epoch": 4.21, + "learning_rate": 6.0199394917937715e-06, + "loss": 0.2033, + "step": 90220 + }, + { + "epoch": 4.21, + "learning_rate": 6.019155706738984e-06, + "loss": 0.2608, + "step": 90225 + }, + { + "epoch": 4.21, + "learning_rate": 6.018371921684198e-06, + "loss": 0.1337, + "step": 90230 + }, + { + "epoch": 4.21, + "learning_rate": 6.017588136629411e-06, + "loss": 0.0074, + "step": 90235 + }, + { + "epoch": 4.21, + "learning_rate": 6.0168043515746245e-06, + "loss": 0.0424, + "step": 90240 + }, + { + "epoch": 4.21, + "learning_rate": 6.016020566519838e-06, + "loss": 0.0388, + "step": 90245 + }, + { + "epoch": 4.21, + "learning_rate": 6.0152367814650515e-06, + "loss": 0.0768, + "step": 90250 + }, + { + "epoch": 4.21, + "learning_rate": 6.0144529964102646e-06, + "loss": 0.0497, + "step": 90255 + }, + { + "epoch": 4.21, + "learning_rate": 6.0136692113554785e-06, + "loss": 0.0285, + "step": 90260 + }, + { + "epoch": 4.21, + "learning_rate": 6.0128854263006915e-06, + "loss": 0.0737, + "step": 90265 + }, + { + "epoch": 4.21, + "learning_rate": 6.0121016412459054e-06, + "loss": 0.185, + "step": 90270 + }, + { + "epoch": 4.21, + "learning_rate": 6.011317856191119e-06, + "loss": 0.2338, + "step": 90275 + }, + { + "epoch": 4.21, + "learning_rate": 6.010534071136332e-06, + "loss": 0.058, + "step": 90280 + }, + { + "epoch": 4.21, + "learning_rate": 6.009750286081546e-06, + "loss": 0.0251, + "step": 90285 + }, + { + "epoch": 4.21, + "learning_rate": 6.0089665010267585e-06, + "loss": 0.0317, + "step": 90290 + }, + { + "epoch": 4.21, + "learning_rate": 6.008182715971972e-06, + "loss": 0.0508, + "step": 90295 + }, + { + "epoch": 4.21, + "learning_rate": 6.0073989309171855e-06, + "loss": 0.0738, + "step": 90300 + }, + { + "epoch": 4.21, + "learning_rate": 6.0066151458623985e-06, + "loss": 0.0557, + "step": 90305 + }, + { + "epoch": 4.21, + "learning_rate": 6.0058313608076125e-06, + "loss": 0.085, + "step": 90310 + }, + { + "epoch": 4.21, + "learning_rate": 6.0050475757528255e-06, + "loss": 0.1645, + "step": 90315 + }, + { + "epoch": 4.21, + "learning_rate": 6.004263790698039e-06, + "loss": 0.1144, + "step": 90320 + }, + { + "epoch": 4.21, + "learning_rate": 6.003480005643253e-06, + "loss": 0.2678, + "step": 90325 + }, + { + "epoch": 4.21, + "learning_rate": 6.002696220588466e-06, + "loss": 0.1039, + "step": 90330 + }, + { + "epoch": 4.22, + "learning_rate": 6.00191243553368e-06, + "loss": 0.0477, + "step": 90335 + }, + { + "epoch": 4.22, + "learning_rate": 6.001128650478893e-06, + "loss": 0.0343, + "step": 90340 + }, + { + "epoch": 4.22, + "learning_rate": 6.000344865424107e-06, + "loss": 0.0525, + "step": 90345 + }, + { + "epoch": 4.22, + "learning_rate": 5.99956108036932e-06, + "loss": 0.0651, + "step": 90350 + }, + { + "epoch": 4.22, + "learning_rate": 5.9987772953145325e-06, + "loss": 0.1345, + "step": 90355 + }, + { + "epoch": 4.22, + "learning_rate": 5.9979935102597464e-06, + "loss": 0.0969, + "step": 90360 + }, + { + "epoch": 4.22, + "learning_rate": 5.99720972520496e-06, + "loss": 0.1482, + "step": 90365 + }, + { + "epoch": 4.22, + "learning_rate": 5.996425940150173e-06, + "loss": 0.1354, + "step": 90370 + }, + { + "epoch": 4.22, + "learning_rate": 5.995642155095387e-06, + "loss": 0.3137, + "step": 90375 + }, + { + "epoch": 4.22, + "learning_rate": 5.9948583700406e-06, + "loss": 0.0598, + "step": 90380 + }, + { + "epoch": 4.22, + "learning_rate": 5.994074584985814e-06, + "loss": 0.0126, + "step": 90385 + }, + { + "epoch": 4.22, + "learning_rate": 5.993290799931027e-06, + "loss": 0.0265, + "step": 90390 + }, + { + "epoch": 4.22, + "learning_rate": 5.992507014876241e-06, + "loss": 0.0334, + "step": 90395 + }, + { + "epoch": 4.22, + "learning_rate": 5.991723229821454e-06, + "loss": 0.0125, + "step": 90400 + }, + { + "epoch": 4.22, + "learning_rate": 5.990939444766668e-06, + "loss": 0.0424, + "step": 90405 + }, + { + "epoch": 4.22, + "learning_rate": 5.990155659711881e-06, + "loss": 0.0839, + "step": 90410 + }, + { + "epoch": 4.22, + "learning_rate": 5.989371874657095e-06, + "loss": 0.0869, + "step": 90415 + }, + { + "epoch": 4.22, + "learning_rate": 5.988588089602307e-06, + "loss": 0.2141, + "step": 90420 + }, + { + "epoch": 4.22, + "learning_rate": 5.987804304547521e-06, + "loss": 0.284, + "step": 90425 + }, + { + "epoch": 4.22, + "learning_rate": 5.987020519492734e-06, + "loss": 0.1168, + "step": 90430 + }, + { + "epoch": 4.22, + "learning_rate": 5.986236734437948e-06, + "loss": 0.069, + "step": 90435 + }, + { + "epoch": 4.22, + "learning_rate": 5.985452949383161e-06, + "loss": 0.0413, + "step": 90440 + }, + { + "epoch": 4.22, + "learning_rate": 5.984669164328375e-06, + "loss": 0.0248, + "step": 90445 + }, + { + "epoch": 4.22, + "learning_rate": 5.983885379273588e-06, + "loss": 0.0796, + "step": 90450 + }, + { + "epoch": 4.22, + "learning_rate": 5.983101594218802e-06, + "loss": 0.1198, + "step": 90455 + }, + { + "epoch": 4.22, + "learning_rate": 5.982317809164015e-06, + "loss": 0.1314, + "step": 90460 + }, + { + "epoch": 4.22, + "learning_rate": 5.981534024109229e-06, + "loss": 0.1196, + "step": 90465 + }, + { + "epoch": 4.22, + "learning_rate": 5.980750239054442e-06, + "loss": 0.253, + "step": 90470 + }, + { + "epoch": 4.22, + "learning_rate": 5.979966453999656e-06, + "loss": 0.2472, + "step": 90475 + }, + { + "epoch": 4.22, + "learning_rate": 5.979182668944869e-06, + "loss": 0.13, + "step": 90480 + }, + { + "epoch": 4.22, + "learning_rate": 5.978398883890082e-06, + "loss": 0.008, + "step": 90485 + }, + { + "epoch": 4.22, + "learning_rate": 5.977615098835295e-06, + "loss": 0.0168, + "step": 90490 + }, + { + "epoch": 4.22, + "learning_rate": 5.976831313780509e-06, + "loss": 0.0762, + "step": 90495 + }, + { + "epoch": 4.22, + "learning_rate": 5.976047528725722e-06, + "loss": 0.0723, + "step": 90500 + }, + { + "epoch": 4.22, + "learning_rate": 5.975263743670936e-06, + "loss": 0.0596, + "step": 90505 + }, + { + "epoch": 4.22, + "learning_rate": 5.974479958616149e-06, + "loss": 0.1083, + "step": 90510 + }, + { + "epoch": 4.22, + "learning_rate": 5.973696173561363e-06, + "loss": 0.1273, + "step": 90515 + }, + { + "epoch": 4.22, + "learning_rate": 5.972912388506576e-06, + "loss": 0.1285, + "step": 90520 + }, + { + "epoch": 4.22, + "learning_rate": 5.97212860345179e-06, + "loss": 0.2257, + "step": 90525 + }, + { + "epoch": 4.22, + "learning_rate": 5.9715015754079605e-06, + "loss": 0.0868, + "step": 90530 + }, + { + "epoch": 4.22, + "learning_rate": 5.9707177903531744e-06, + "loss": 0.058, + "step": 90535 + }, + { + "epoch": 4.22, + "learning_rate": 5.9699340052983875e-06, + "loss": 0.0261, + "step": 90540 + }, + { + "epoch": 4.22, + "learning_rate": 5.969150220243601e-06, + "loss": 0.0416, + "step": 90545 + }, + { + "epoch": 4.23, + "learning_rate": 5.968366435188814e-06, + "loss": 0.0643, + "step": 90550 + }, + { + "epoch": 4.23, + "learning_rate": 5.9675826501340275e-06, + "loss": 0.0362, + "step": 90555 + }, + { + "epoch": 4.23, + "learning_rate": 5.9667988650792406e-06, + "loss": 0.1408, + "step": 90560 + }, + { + "epoch": 4.23, + "learning_rate": 5.9660150800244545e-06, + "loss": 0.1165, + "step": 90565 + }, + { + "epoch": 4.23, + "learning_rate": 5.9652312949696675e-06, + "loss": 0.0919, + "step": 90570 + }, + { + "epoch": 4.23, + "learning_rate": 5.9644475099148814e-06, + "loss": 0.2328, + "step": 90575 + }, + { + "epoch": 4.23, + "learning_rate": 5.9636637248600945e-06, + "loss": 0.1449, + "step": 90580 + }, + { + "epoch": 4.23, + "learning_rate": 5.962879939805308e-06, + "loss": 0.01, + "step": 90585 + }, + { + "epoch": 4.23, + "learning_rate": 5.9620961547505215e-06, + "loss": 0.0488, + "step": 90590 + }, + { + "epoch": 4.23, + "learning_rate": 5.961312369695735e-06, + "loss": 0.0404, + "step": 90595 + }, + { + "epoch": 4.23, + "learning_rate": 5.960528584640949e-06, + "loss": 0.0393, + "step": 90600 + }, + { + "epoch": 4.23, + "learning_rate": 5.959744799586162e-06, + "loss": 0.0907, + "step": 90605 + }, + { + "epoch": 4.23, + "learning_rate": 5.958961014531376e-06, + "loss": 0.0856, + "step": 90610 + }, + { + "epoch": 4.23, + "learning_rate": 5.9581772294765884e-06, + "loss": 0.0902, + "step": 90615 + }, + { + "epoch": 4.23, + "learning_rate": 5.9573934444218015e-06, + "loss": 0.1232, + "step": 90620 + }, + { + "epoch": 4.23, + "learning_rate": 5.956609659367015e-06, + "loss": 0.2334, + "step": 90625 + }, + { + "epoch": 4.23, + "learning_rate": 5.9558258743122285e-06, + "loss": 0.0768, + "step": 90630 + }, + { + "epoch": 4.23, + "learning_rate": 5.955042089257442e-06, + "loss": 0.0328, + "step": 90635 + }, + { + "epoch": 4.23, + "learning_rate": 5.9542583042026554e-06, + "loss": 0.0109, + "step": 90640 + }, + { + "epoch": 4.23, + "learning_rate": 5.953474519147869e-06, + "loss": 0.0723, + "step": 90645 + }, + { + "epoch": 4.23, + "learning_rate": 5.952690734093083e-06, + "loss": 0.1425, + "step": 90650 + }, + { + "epoch": 4.23, + "learning_rate": 5.951906949038296e-06, + "loss": 0.0614, + "step": 90655 + }, + { + "epoch": 4.23, + "learning_rate": 5.95112316398351e-06, + "loss": 0.1139, + "step": 90660 + }, + { + "epoch": 4.23, + "learning_rate": 5.950339378928723e-06, + "loss": 0.1506, + "step": 90665 + }, + { + "epoch": 4.23, + "learning_rate": 5.949555593873937e-06, + "loss": 0.1338, + "step": 90670 + }, + { + "epoch": 4.23, + "learning_rate": 5.94877180881915e-06, + "loss": 0.2628, + "step": 90675 + }, + { + "epoch": 4.23, + "learning_rate": 5.9479880237643624e-06, + "loss": 0.1299, + "step": 90680 + }, + { + "epoch": 4.23, + "learning_rate": 5.947204238709576e-06, + "loss": 0.0146, + "step": 90685 + }, + { + "epoch": 4.23, + "learning_rate": 5.946420453654789e-06, + "loss": 0.0128, + "step": 90690 + }, + { + "epoch": 4.23, + "learning_rate": 5.945636668600003e-06, + "loss": 0.0892, + "step": 90695 + }, + { + "epoch": 4.23, + "learning_rate": 5.944852883545217e-06, + "loss": 0.0325, + "step": 90700 + }, + { + "epoch": 4.23, + "learning_rate": 5.94406909849043e-06, + "loss": 0.0615, + "step": 90705 + }, + { + "epoch": 4.23, + "learning_rate": 5.943285313435644e-06, + "loss": 0.0741, + "step": 90710 + }, + { + "epoch": 4.23, + "learning_rate": 5.942501528380857e-06, + "loss": 0.0939, + "step": 90715 + }, + { + "epoch": 4.23, + "learning_rate": 5.941717743326071e-06, + "loss": 0.1592, + "step": 90720 + }, + { + "epoch": 4.23, + "learning_rate": 5.940933958271284e-06, + "loss": 0.2863, + "step": 90725 + }, + { + "epoch": 4.23, + "learning_rate": 5.940150173216498e-06, + "loss": 0.0575, + "step": 90730 + }, + { + "epoch": 4.23, + "learning_rate": 5.939366388161711e-06, + "loss": 0.017, + "step": 90735 + }, + { + "epoch": 4.23, + "learning_rate": 5.938582603106925e-06, + "loss": 0.0257, + "step": 90740 + }, + { + "epoch": 4.23, + "learning_rate": 5.937798818052137e-06, + "loss": 0.0217, + "step": 90745 + }, + { + "epoch": 4.23, + "learning_rate": 5.937015032997351e-06, + "loss": 0.0478, + "step": 90750 + }, + { + "epoch": 4.23, + "learning_rate": 5.936231247942564e-06, + "loss": 0.0328, + "step": 90755 + }, + { + "epoch": 4.23, + "learning_rate": 5.935447462887778e-06, + "loss": 0.1046, + "step": 90760 + }, + { + "epoch": 4.24, + "learning_rate": 5.934663677832991e-06, + "loss": 0.07, + "step": 90765 + }, + { + "epoch": 4.24, + "learning_rate": 5.933879892778205e-06, + "loss": 0.1482, + "step": 90770 + }, + { + "epoch": 4.24, + "learning_rate": 5.933096107723418e-06, + "loss": 0.312, + "step": 90775 + }, + { + "epoch": 4.24, + "learning_rate": 5.932312322668632e-06, + "loss": 0.0711, + "step": 90780 + }, + { + "epoch": 4.24, + "learning_rate": 5.931528537613845e-06, + "loss": 0.0331, + "step": 90785 + }, + { + "epoch": 4.24, + "learning_rate": 5.930744752559059e-06, + "loss": 0.0518, + "step": 90790 + }, + { + "epoch": 4.24, + "learning_rate": 5.929960967504272e-06, + "loss": 0.0243, + "step": 90795 + }, + { + "epoch": 4.24, + "learning_rate": 5.929177182449486e-06, + "loss": 0.0966, + "step": 90800 + }, + { + "epoch": 4.24, + "learning_rate": 5.928393397394699e-06, + "loss": 0.0981, + "step": 90805 + }, + { + "epoch": 4.24, + "learning_rate": 5.927609612339912e-06, + "loss": 0.1148, + "step": 90810 + }, + { + "epoch": 4.24, + "learning_rate": 5.926825827285125e-06, + "loss": 0.119, + "step": 90815 + }, + { + "epoch": 4.24, + "learning_rate": 5.926042042230339e-06, + "loss": 0.1786, + "step": 90820 + }, + { + "epoch": 4.24, + "learning_rate": 5.925258257175552e-06, + "loss": 0.3331, + "step": 90825 + }, + { + "epoch": 4.24, + "learning_rate": 5.924474472120766e-06, + "loss": 0.074, + "step": 90830 + }, + { + "epoch": 4.24, + "learning_rate": 5.923690687065979e-06, + "loss": 0.0332, + "step": 90835 + }, + { + "epoch": 4.24, + "learning_rate": 5.922906902011193e-06, + "loss": 0.0213, + "step": 90840 + }, + { + "epoch": 4.24, + "learning_rate": 5.922123116956406e-06, + "loss": 0.0228, + "step": 90845 + }, + { + "epoch": 4.24, + "learning_rate": 5.92133933190162e-06, + "loss": 0.0234, + "step": 90850 + }, + { + "epoch": 4.24, + "learning_rate": 5.920555546846833e-06, + "loss": 0.0954, + "step": 90855 + }, + { + "epoch": 4.24, + "learning_rate": 5.919771761792047e-06, + "loss": 0.0538, + "step": 90860 + }, + { + "epoch": 4.24, + "learning_rate": 5.918987976737261e-06, + "loss": 0.1178, + "step": 90865 + }, + { + "epoch": 4.24, + "learning_rate": 5.918204191682474e-06, + "loss": 0.1837, + "step": 90870 + }, + { + "epoch": 4.24, + "learning_rate": 5.917420406627686e-06, + "loss": 0.3523, + "step": 90875 + }, + { + "epoch": 4.24, + "learning_rate": 5.9166366215729e-06, + "loss": 0.078, + "step": 90880 + }, + { + "epoch": 4.24, + "learning_rate": 5.915852836518113e-06, + "loss": 0.0019, + "step": 90885 + }, + { + "epoch": 4.24, + "learning_rate": 5.915069051463327e-06, + "loss": 0.0377, + "step": 90890 + }, + { + "epoch": 4.24, + "learning_rate": 5.91428526640854e-06, + "loss": 0.0181, + "step": 90895 + }, + { + "epoch": 4.24, + "learning_rate": 5.913501481353754e-06, + "loss": 0.0752, + "step": 90900 + }, + { + "epoch": 4.24, + "learning_rate": 5.912717696298967e-06, + "loss": 0.1215, + "step": 90905 + }, + { + "epoch": 4.24, + "learning_rate": 5.911933911244181e-06, + "loss": 0.0927, + "step": 90910 + }, + { + "epoch": 4.24, + "learning_rate": 5.911150126189395e-06, + "loss": 0.1609, + "step": 90915 + }, + { + "epoch": 4.24, + "learning_rate": 5.910366341134608e-06, + "loss": 0.1373, + "step": 90920 + }, + { + "epoch": 4.24, + "learning_rate": 5.909582556079822e-06, + "loss": 0.167, + "step": 90925 + }, + { + "epoch": 4.24, + "learning_rate": 5.908798771025035e-06, + "loss": 0.0965, + "step": 90930 + }, + { + "epoch": 4.24, + "learning_rate": 5.908014985970249e-06, + "loss": 0.0219, + "step": 90935 + }, + { + "epoch": 4.24, + "learning_rate": 5.907231200915461e-06, + "loss": 0.0882, + "step": 90940 + }, + { + "epoch": 4.24, + "learning_rate": 5.906447415860674e-06, + "loss": 0.0619, + "step": 90945 + }, + { + "epoch": 4.24, + "learning_rate": 5.905663630805888e-06, + "loss": 0.0978, + "step": 90950 + }, + { + "epoch": 4.24, + "learning_rate": 5.904879845751101e-06, + "loss": 0.0855, + "step": 90955 + }, + { + "epoch": 4.24, + "learning_rate": 5.904096060696315e-06, + "loss": 0.1471, + "step": 90960 + }, + { + "epoch": 4.24, + "learning_rate": 5.903312275641529e-06, + "loss": 0.184, + "step": 90965 + }, + { + "epoch": 4.24, + "learning_rate": 5.902528490586742e-06, + "loss": 0.1576, + "step": 90970 + }, + { + "epoch": 4.25, + "learning_rate": 5.901744705531956e-06, + "loss": 0.2986, + "step": 90975 + }, + { + "epoch": 4.25, + "learning_rate": 5.900960920477169e-06, + "loss": 0.1072, + "step": 90980 + }, + { + "epoch": 4.25, + "learning_rate": 5.900177135422383e-06, + "loss": 0.0297, + "step": 90985 + }, + { + "epoch": 4.25, + "learning_rate": 5.899393350367596e-06, + "loss": 0.0241, + "step": 90990 + }, + { + "epoch": 4.25, + "learning_rate": 5.89860956531281e-06, + "loss": 0.11, + "step": 90995 + }, + { + "epoch": 4.25, + "learning_rate": 5.897825780258023e-06, + "loss": 0.0495, + "step": 91000 + }, + { + "epoch": 4.25, + "learning_rate": 5.897041995203235e-06, + "loss": 0.1527, + "step": 91005 + }, + { + "epoch": 4.25, + "learning_rate": 5.896258210148449e-06, + "loss": 0.1269, + "step": 91010 + }, + { + "epoch": 4.25, + "learning_rate": 5.895474425093663e-06, + "loss": 0.0705, + "step": 91015 + }, + { + "epoch": 4.25, + "learning_rate": 5.894690640038876e-06, + "loss": 0.1735, + "step": 91020 + }, + { + "epoch": 4.25, + "learning_rate": 5.89390685498409e-06, + "loss": 0.4046, + "step": 91025 + }, + { + "epoch": 4.25, + "learning_rate": 5.893123069929303e-06, + "loss": 0.0778, + "step": 91030 + }, + { + "epoch": 4.25, + "learning_rate": 5.892339284874517e-06, + "loss": 0.0062, + "step": 91035 + }, + { + "epoch": 4.25, + "learning_rate": 5.89155549981973e-06, + "loss": 0.0657, + "step": 91040 + }, + { + "epoch": 4.25, + "learning_rate": 5.890771714764944e-06, + "loss": 0.0449, + "step": 91045 + }, + { + "epoch": 4.25, + "learning_rate": 5.889987929710157e-06, + "loss": 0.0381, + "step": 91050 + }, + { + "epoch": 4.25, + "learning_rate": 5.889204144655371e-06, + "loss": 0.0849, + "step": 91055 + }, + { + "epoch": 4.25, + "learning_rate": 5.888420359600584e-06, + "loss": 0.1032, + "step": 91060 + }, + { + "epoch": 4.25, + "learning_rate": 5.887636574545798e-06, + "loss": 0.1318, + "step": 91065 + }, + { + "epoch": 4.25, + "learning_rate": 5.886852789491011e-06, + "loss": 0.2191, + "step": 91070 + }, + { + "epoch": 4.25, + "learning_rate": 5.886069004436224e-06, + "loss": 0.1968, + "step": 91075 + }, + { + "epoch": 4.25, + "learning_rate": 5.885285219381437e-06, + "loss": 0.0889, + "step": 91080 + }, + { + "epoch": 4.25, + "learning_rate": 5.884501434326651e-06, + "loss": 0.0098, + "step": 91085 + }, + { + "epoch": 4.25, + "learning_rate": 5.883717649271864e-06, + "loss": 0.0416, + "step": 91090 + }, + { + "epoch": 4.25, + "learning_rate": 5.882933864217078e-06, + "loss": 0.0278, + "step": 91095 + }, + { + "epoch": 4.25, + "learning_rate": 5.882150079162291e-06, + "loss": 0.1008, + "step": 91100 + }, + { + "epoch": 4.25, + "learning_rate": 5.881366294107505e-06, + "loss": 0.1075, + "step": 91105 + }, + { + "epoch": 4.25, + "learning_rate": 5.880582509052718e-06, + "loss": 0.1326, + "step": 91110 + }, + { + "epoch": 4.25, + "learning_rate": 5.879798723997932e-06, + "loss": 0.1041, + "step": 91115 + }, + { + "epoch": 4.25, + "learning_rate": 5.879014938943145e-06, + "loss": 0.1203, + "step": 91120 + }, + { + "epoch": 4.25, + "learning_rate": 5.8782311538883586e-06, + "loss": 0.3442, + "step": 91125 + }, + { + "epoch": 4.25, + "learning_rate": 5.877447368833572e-06, + "loss": 0.0813, + "step": 91130 + }, + { + "epoch": 4.25, + "learning_rate": 5.8766635837787855e-06, + "loss": 0.0219, + "step": 91135 + }, + { + "epoch": 4.25, + "learning_rate": 5.875879798723998e-06, + "loss": 0.0666, + "step": 91140 + }, + { + "epoch": 4.25, + "learning_rate": 5.875096013669212e-06, + "loss": 0.0523, + "step": 91145 + }, + { + "epoch": 4.25, + "learning_rate": 5.874312228614425e-06, + "loss": 0.0992, + "step": 91150 + }, + { + "epoch": 4.25, + "learning_rate": 5.873528443559639e-06, + "loss": 0.0499, + "step": 91155 + }, + { + "epoch": 4.25, + "learning_rate": 5.872744658504852e-06, + "loss": 0.1551, + "step": 91160 + }, + { + "epoch": 4.25, + "learning_rate": 5.8719608734500656e-06, + "loss": 0.1377, + "step": 91165 + }, + { + "epoch": 4.25, + "learning_rate": 5.871177088395279e-06, + "loss": 0.2001, + "step": 91170 + }, + { + "epoch": 4.25, + "learning_rate": 5.8703933033404925e-06, + "loss": 0.3385, + "step": 91175 + }, + { + "epoch": 4.25, + "learning_rate": 5.8696095182857064e-06, + "loss": 0.0546, + "step": 91180 + }, + { + "epoch": 4.25, + "learning_rate": 5.8688257332309195e-06, + "loss": 0.0133, + "step": 91185 + }, + { + "epoch": 4.26, + "learning_rate": 5.868041948176133e-06, + "loss": 0.0212, + "step": 91190 + }, + { + "epoch": 4.26, + "learning_rate": 5.8672581631213465e-06, + "loss": 0.1172, + "step": 91195 + }, + { + "epoch": 4.26, + "learning_rate": 5.86647437806656e-06, + "loss": 0.0705, + "step": 91200 + }, + { + "epoch": 4.26, + "learning_rate": 5.865690593011773e-06, + "loss": 0.039, + "step": 91205 + }, + { + "epoch": 4.26, + "learning_rate": 5.864906807956986e-06, + "loss": 0.0975, + "step": 91210 + }, + { + "epoch": 4.26, + "learning_rate": 5.8641230229021995e-06, + "loss": 0.1576, + "step": 91215 + }, + { + "epoch": 4.26, + "learning_rate": 5.863339237847413e-06, + "loss": 0.1465, + "step": 91220 + }, + { + "epoch": 4.26, + "learning_rate": 5.8625554527926265e-06, + "loss": 0.2648, + "step": 91225 + }, + { + "epoch": 4.26, + "learning_rate": 5.86177166773784e-06, + "loss": 0.0647, + "step": 91230 + }, + { + "epoch": 4.26, + "learning_rate": 5.8609878826830535e-06, + "loss": 0.0208, + "step": 91235 + }, + { + "epoch": 4.26, + "learning_rate": 5.860204097628267e-06, + "loss": 0.0095, + "step": 91240 + }, + { + "epoch": 4.26, + "learning_rate": 5.8594203125734804e-06, + "loss": 0.019, + "step": 91245 + }, + { + "epoch": 4.26, + "learning_rate": 5.858636527518694e-06, + "loss": 0.054, + "step": 91250 + }, + { + "epoch": 4.26, + "learning_rate": 5.857852742463907e-06, + "loss": 0.0704, + "step": 91255 + }, + { + "epoch": 4.26, + "learning_rate": 5.857068957409121e-06, + "loss": 0.0985, + "step": 91260 + }, + { + "epoch": 4.26, + "learning_rate": 5.856285172354334e-06, + "loss": 0.1225, + "step": 91265 + }, + { + "epoch": 4.26, + "learning_rate": 5.8555013872995466e-06, + "loss": 0.117, + "step": 91270 + }, + { + "epoch": 4.26, + "learning_rate": 5.8547176022447605e-06, + "loss": 0.2884, + "step": 91275 + }, + { + "epoch": 4.26, + "learning_rate": 5.853933817189974e-06, + "loss": 0.0266, + "step": 91280 + }, + { + "epoch": 4.26, + "learning_rate": 5.8531500321351874e-06, + "loss": 0.0104, + "step": 91285 + }, + { + "epoch": 4.26, + "learning_rate": 5.852366247080401e-06, + "loss": 0.0138, + "step": 91290 + }, + { + "epoch": 4.26, + "learning_rate": 5.851582462025614e-06, + "loss": 0.0621, + "step": 91295 + }, + { + "epoch": 4.26, + "learning_rate": 5.850798676970828e-06, + "loss": 0.0707, + "step": 91300 + }, + { + "epoch": 4.26, + "learning_rate": 5.850014891916041e-06, + "loss": 0.1172, + "step": 91305 + }, + { + "epoch": 4.26, + "learning_rate": 5.849231106861255e-06, + "loss": 0.0875, + "step": 91310 + }, + { + "epoch": 4.26, + "learning_rate": 5.848447321806468e-06, + "loss": 0.134, + "step": 91315 + }, + { + "epoch": 4.26, + "learning_rate": 5.847663536751682e-06, + "loss": 0.2569, + "step": 91320 + }, + { + "epoch": 4.26, + "learning_rate": 5.846879751696895e-06, + "loss": 0.3717, + "step": 91325 + }, + { + "epoch": 4.26, + "learning_rate": 5.846095966642109e-06, + "loss": 0.1126, + "step": 91330 + }, + { + "epoch": 4.26, + "learning_rate": 5.845312181587321e-06, + "loss": 0.023, + "step": 91335 + }, + { + "epoch": 4.26, + "learning_rate": 5.844528396532535e-06, + "loss": 0.0438, + "step": 91340 + }, + { + "epoch": 4.26, + "learning_rate": 5.843744611477748e-06, + "loss": 0.0307, + "step": 91345 + }, + { + "epoch": 4.26, + "learning_rate": 5.842960826422962e-06, + "loss": 0.0288, + "step": 91350 + }, + { + "epoch": 4.26, + "learning_rate": 5.842177041368175e-06, + "loss": 0.0755, + "step": 91355 + }, + { + "epoch": 4.26, + "learning_rate": 5.841393256313389e-06, + "loss": 0.121, + "step": 91360 + }, + { + "epoch": 4.26, + "learning_rate": 5.840609471258602e-06, + "loss": 0.077, + "step": 91365 + }, + { + "epoch": 4.26, + "learning_rate": 5.839825686203816e-06, + "loss": 0.2005, + "step": 91370 + }, + { + "epoch": 4.26, + "learning_rate": 5.839041901149029e-06, + "loss": 0.2854, + "step": 91375 + }, + { + "epoch": 4.26, + "learning_rate": 5.838258116094243e-06, + "loss": 0.061, + "step": 91380 + }, + { + "epoch": 4.26, + "learning_rate": 5.837474331039456e-06, + "loss": 0.0745, + "step": 91385 + }, + { + "epoch": 4.26, + "learning_rate": 5.83669054598467e-06, + "loss": 0.011, + "step": 91390 + }, + { + "epoch": 4.26, + "learning_rate": 5.835906760929883e-06, + "loss": 0.0118, + "step": 91395 + }, + { + "epoch": 4.26, + "learning_rate": 5.835122975875096e-06, + "loss": 0.0725, + "step": 91400 + }, + { + "epoch": 4.27, + "learning_rate": 5.834339190820309e-06, + "loss": 0.1007, + "step": 91405 + }, + { + "epoch": 4.27, + "learning_rate": 5.833555405765523e-06, + "loss": 0.1028, + "step": 91410 + }, + { + "epoch": 4.27, + "learning_rate": 5.832771620710736e-06, + "loss": 0.0941, + "step": 91415 + }, + { + "epoch": 4.27, + "learning_rate": 5.83198783565595e-06, + "loss": 0.1198, + "step": 91420 + }, + { + "epoch": 4.27, + "learning_rate": 5.831204050601163e-06, + "loss": 0.2649, + "step": 91425 + }, + { + "epoch": 4.27, + "learning_rate": 5.830420265546377e-06, + "loss": 0.0966, + "step": 91430 + }, + { + "epoch": 4.27, + "learning_rate": 5.82963648049159e-06, + "loss": 0.026, + "step": 91435 + }, + { + "epoch": 4.27, + "learning_rate": 5.828852695436804e-06, + "loss": 0.02, + "step": 91440 + }, + { + "epoch": 4.27, + "learning_rate": 5.828068910382018e-06, + "loss": 0.0755, + "step": 91445 + }, + { + "epoch": 4.27, + "learning_rate": 5.827285125327231e-06, + "loss": 0.085, + "step": 91450 + }, + { + "epoch": 4.27, + "learning_rate": 5.826501340272445e-06, + "loss": 0.1138, + "step": 91455 + }, + { + "epoch": 4.27, + "learning_rate": 5.825717555217658e-06, + "loss": 0.0904, + "step": 91460 + }, + { + "epoch": 4.27, + "learning_rate": 5.82493377016287e-06, + "loss": 0.148, + "step": 91465 + }, + { + "epoch": 4.27, + "learning_rate": 5.824149985108084e-06, + "loss": 0.2005, + "step": 91470 + }, + { + "epoch": 4.27, + "learning_rate": 5.823366200053297e-06, + "loss": 0.2441, + "step": 91475 + }, + { + "epoch": 4.27, + "learning_rate": 5.822582414998511e-06, + "loss": 0.1025, + "step": 91480 + }, + { + "epoch": 4.27, + "learning_rate": 5.821798629943724e-06, + "loss": 0.0482, + "step": 91485 + }, + { + "epoch": 4.27, + "learning_rate": 5.821014844888938e-06, + "loss": 0.0478, + "step": 91490 + }, + { + "epoch": 4.27, + "learning_rate": 5.820231059834152e-06, + "loss": 0.085, + "step": 91495 + }, + { + "epoch": 4.27, + "learning_rate": 5.819447274779365e-06, + "loss": 0.0428, + "step": 91500 + }, + { + "epoch": 4.27, + "learning_rate": 5.818663489724579e-06, + "loss": 0.0218, + "step": 91505 + }, + { + "epoch": 4.27, + "learning_rate": 5.817879704669792e-06, + "loss": 0.1121, + "step": 91510 + }, + { + "epoch": 4.27, + "learning_rate": 5.817095919615006e-06, + "loss": 0.1174, + "step": 91515 + }, + { + "epoch": 4.27, + "learning_rate": 5.816312134560219e-06, + "loss": 0.1658, + "step": 91520 + }, + { + "epoch": 4.27, + "learning_rate": 5.815528349505433e-06, + "loss": 0.2823, + "step": 91525 + }, + { + "epoch": 4.27, + "learning_rate": 5.814744564450645e-06, + "loss": 0.0966, + "step": 91530 + }, + { + "epoch": 4.27, + "learning_rate": 5.813960779395858e-06, + "loss": 0.0231, + "step": 91535 + }, + { + "epoch": 4.27, + "learning_rate": 5.813176994341072e-06, + "loss": 0.0423, + "step": 91540 + }, + { + "epoch": 4.27, + "learning_rate": 5.812393209286286e-06, + "loss": 0.0486, + "step": 91545 + }, + { + "epoch": 4.27, + "learning_rate": 5.811609424231499e-06, + "loss": 0.044, + "step": 91550 + }, + { + "epoch": 4.27, + "learning_rate": 5.810825639176713e-06, + "loss": 0.0484, + "step": 91555 + }, + { + "epoch": 4.27, + "learning_rate": 5.810041854121926e-06, + "loss": 0.1279, + "step": 91560 + }, + { + "epoch": 4.27, + "learning_rate": 5.80925806906714e-06, + "loss": 0.0939, + "step": 91565 + }, + { + "epoch": 4.27, + "learning_rate": 5.808474284012353e-06, + "loss": 0.1795, + "step": 91570 + }, + { + "epoch": 4.27, + "learning_rate": 5.807690498957567e-06, + "loss": 0.2573, + "step": 91575 + }, + { + "epoch": 4.27, + "learning_rate": 5.80690671390278e-06, + "loss": 0.0619, + "step": 91580 + }, + { + "epoch": 4.27, + "learning_rate": 5.806122928847994e-06, + "loss": 0.0259, + "step": 91585 + }, + { + "epoch": 4.27, + "learning_rate": 5.805339143793207e-06, + "loss": 0.0099, + "step": 91590 + }, + { + "epoch": 4.27, + "learning_rate": 5.80455535873842e-06, + "loss": 0.0308, + "step": 91595 + }, + { + "epoch": 4.27, + "learning_rate": 5.803771573683633e-06, + "loss": 0.049, + "step": 91600 + }, + { + "epoch": 4.27, + "learning_rate": 5.802987788628847e-06, + "loss": 0.0281, + "step": 91605 + }, + { + "epoch": 4.27, + "learning_rate": 5.80220400357406e-06, + "loss": 0.0662, + "step": 91610 + }, + { + "epoch": 4.27, + "learning_rate": 5.801420218519274e-06, + "loss": 0.1175, + "step": 91615 + }, + { + "epoch": 4.28, + "learning_rate": 5.800636433464487e-06, + "loss": 0.135, + "step": 91620 + }, + { + "epoch": 4.28, + "learning_rate": 5.799852648409701e-06, + "loss": 0.1982, + "step": 91625 + }, + { + "epoch": 4.28, + "learning_rate": 5.799068863354914e-06, + "loss": 0.0358, + "step": 91630 + }, + { + "epoch": 4.28, + "learning_rate": 5.798285078300128e-06, + "loss": 0.0624, + "step": 91635 + }, + { + "epoch": 4.28, + "learning_rate": 5.797501293245341e-06, + "loss": 0.1193, + "step": 91640 + }, + { + "epoch": 4.28, + "learning_rate": 5.796717508190555e-06, + "loss": 0.0967, + "step": 91645 + }, + { + "epoch": 4.28, + "learning_rate": 5.795933723135768e-06, + "loss": 0.0787, + "step": 91650 + }, + { + "epoch": 4.28, + "learning_rate": 5.795149938080982e-06, + "loss": 0.0629, + "step": 91655 + }, + { + "epoch": 4.28, + "learning_rate": 5.794366153026194e-06, + "loss": 0.1216, + "step": 91660 + }, + { + "epoch": 4.28, + "learning_rate": 5.793582367971408e-06, + "loss": 0.1739, + "step": 91665 + }, + { + "epoch": 4.28, + "learning_rate": 5.792798582916621e-06, + "loss": 0.1603, + "step": 91670 + }, + { + "epoch": 4.28, + "learning_rate": 5.792014797861835e-06, + "loss": 0.2816, + "step": 91675 + }, + { + "epoch": 4.28, + "learning_rate": 5.791231012807048e-06, + "loss": 0.0642, + "step": 91680 + }, + { + "epoch": 4.28, + "learning_rate": 5.790447227752262e-06, + "loss": 0.0517, + "step": 91685 + }, + { + "epoch": 4.28, + "learning_rate": 5.789663442697475e-06, + "loss": 0.0424, + "step": 91690 + }, + { + "epoch": 4.28, + "learning_rate": 5.788879657642689e-06, + "loss": 0.0467, + "step": 91695 + }, + { + "epoch": 4.28, + "learning_rate": 5.788095872587902e-06, + "loss": 0.1118, + "step": 91700 + }, + { + "epoch": 4.28, + "learning_rate": 5.787312087533116e-06, + "loss": 0.0512, + "step": 91705 + }, + { + "epoch": 4.28, + "learning_rate": 5.786528302478329e-06, + "loss": 0.0556, + "step": 91710 + }, + { + "epoch": 4.28, + "learning_rate": 5.785744517423543e-06, + "loss": 0.1616, + "step": 91715 + }, + { + "epoch": 4.28, + "learning_rate": 5.784960732368757e-06, + "loss": 0.1424, + "step": 91720 + }, + { + "epoch": 4.28, + "learning_rate": 5.784176947313969e-06, + "loss": 0.361, + "step": 91725 + }, + { + "epoch": 4.28, + "learning_rate": 5.783393162259182e-06, + "loss": 0.0721, + "step": 91730 + }, + { + "epoch": 4.28, + "learning_rate": 5.782609377204396e-06, + "loss": 0.0352, + "step": 91735 + }, + { + "epoch": 4.28, + "learning_rate": 5.781825592149609e-06, + "loss": 0.0332, + "step": 91740 + }, + { + "epoch": 4.28, + "learning_rate": 5.781041807094823e-06, + "loss": 0.0272, + "step": 91745 + }, + { + "epoch": 4.28, + "learning_rate": 5.780258022040036e-06, + "loss": 0.0199, + "step": 91750 + }, + { + "epoch": 4.28, + "learning_rate": 5.77947423698525e-06, + "loss": 0.1228, + "step": 91755 + }, + { + "epoch": 4.28, + "learning_rate": 5.778690451930464e-06, + "loss": 0.106, + "step": 91760 + }, + { + "epoch": 4.28, + "learning_rate": 5.777906666875677e-06, + "loss": 0.1563, + "step": 91765 + }, + { + "epoch": 4.28, + "learning_rate": 5.7771228818208906e-06, + "loss": 0.1254, + "step": 91770 + }, + { + "epoch": 4.28, + "learning_rate": 5.776339096766104e-06, + "loss": 0.2847, + "step": 91775 + }, + { + "epoch": 4.28, + "learning_rate": 5.7755553117113175e-06, + "loss": 0.1171, + "step": 91780 + }, + { + "epoch": 4.28, + "learning_rate": 5.774771526656531e-06, + "loss": 0.0268, + "step": 91785 + }, + { + "epoch": 4.28, + "learning_rate": 5.773987741601743e-06, + "loss": 0.0125, + "step": 91790 + }, + { + "epoch": 4.28, + "learning_rate": 5.773203956546957e-06, + "loss": 0.0511, + "step": 91795 + }, + { + "epoch": 4.28, + "learning_rate": 5.77242017149217e-06, + "loss": 0.0268, + "step": 91800 + }, + { + "epoch": 4.28, + "learning_rate": 5.771636386437384e-06, + "loss": 0.0658, + "step": 91805 + }, + { + "epoch": 4.28, + "learning_rate": 5.770852601382598e-06, + "loss": 0.0453, + "step": 91810 + }, + { + "epoch": 4.28, + "learning_rate": 5.770068816327811e-06, + "loss": 0.1421, + "step": 91815 + }, + { + "epoch": 4.28, + "learning_rate": 5.7692850312730245e-06, + "loss": 0.1635, + "step": 91820 + }, + { + "epoch": 4.28, + "learning_rate": 5.768501246218238e-06, + "loss": 0.3275, + "step": 91825 + }, + { + "epoch": 4.28, + "learning_rate": 5.7677174611634515e-06, + "loss": 0.0868, + "step": 91830 + }, + { + "epoch": 4.29, + "learning_rate": 5.7669336761086646e-06, + "loss": 0.0254, + "step": 91835 + }, + { + "epoch": 4.29, + "learning_rate": 5.7661498910538785e-06, + "loss": 0.0464, + "step": 91840 + }, + { + "epoch": 4.29, + "learning_rate": 5.7653661059990915e-06, + "loss": 0.0873, + "step": 91845 + }, + { + "epoch": 4.29, + "learning_rate": 5.7645823209443054e-06, + "loss": 0.1071, + "step": 91850 + }, + { + "epoch": 4.29, + "learning_rate": 5.763798535889518e-06, + "loss": 0.0801, + "step": 91855 + }, + { + "epoch": 4.29, + "learning_rate": 5.7630147508347316e-06, + "loss": 0.11, + "step": 91860 + }, + { + "epoch": 4.29, + "learning_rate": 5.762230965779945e-06, + "loss": 0.0467, + "step": 91865 + }, + { + "epoch": 4.29, + "learning_rate": 5.7614471807251585e-06, + "loss": 0.2116, + "step": 91870 + }, + { + "epoch": 4.29, + "learning_rate": 5.7606633956703716e-06, + "loss": 0.2985, + "step": 91875 + }, + { + "epoch": 4.29, + "learning_rate": 5.7598796106155855e-06, + "loss": 0.0908, + "step": 91880 + }, + { + "epoch": 4.29, + "learning_rate": 5.7590958255607985e-06, + "loss": 0.0278, + "step": 91885 + }, + { + "epoch": 4.29, + "learning_rate": 5.7583120405060124e-06, + "loss": 0.0273, + "step": 91890 + }, + { + "epoch": 4.29, + "learning_rate": 5.7575282554512255e-06, + "loss": 0.0394, + "step": 91895 + }, + { + "epoch": 4.29, + "learning_rate": 5.756744470396439e-06, + "loss": 0.0433, + "step": 91900 + }, + { + "epoch": 4.29, + "learning_rate": 5.7559606853416525e-06, + "loss": 0.0555, + "step": 91905 + }, + { + "epoch": 4.29, + "learning_rate": 5.755176900286866e-06, + "loss": 0.0859, + "step": 91910 + }, + { + "epoch": 4.29, + "learning_rate": 5.7543931152320794e-06, + "loss": 0.0759, + "step": 91915 + }, + { + "epoch": 4.29, + "learning_rate": 5.7536093301772925e-06, + "loss": 0.092, + "step": 91920 + }, + { + "epoch": 4.29, + "learning_rate": 5.7528255451225056e-06, + "loss": 0.3305, + "step": 91925 + }, + { + "epoch": 4.29, + "learning_rate": 5.7520417600677195e-06, + "loss": 0.0687, + "step": 91930 + }, + { + "epoch": 4.29, + "learning_rate": 5.7512579750129325e-06, + "loss": 0.0257, + "step": 91935 + }, + { + "epoch": 4.29, + "learning_rate": 5.750474189958146e-06, + "loss": 0.0409, + "step": 91940 + }, + { + "epoch": 4.29, + "learning_rate": 5.7496904049033595e-06, + "loss": 0.0293, + "step": 91945 + }, + { + "epoch": 4.29, + "learning_rate": 5.748906619848573e-06, + "loss": 0.0237, + "step": 91950 + }, + { + "epoch": 4.29, + "learning_rate": 5.7481228347937864e-06, + "loss": 0.1458, + "step": 91955 + }, + { + "epoch": 4.29, + "learning_rate": 5.747339049739e-06, + "loss": 0.0827, + "step": 91960 + }, + { + "epoch": 4.29, + "learning_rate": 5.746555264684213e-06, + "loss": 0.0964, + "step": 91965 + }, + { + "epoch": 4.29, + "learning_rate": 5.745771479629427e-06, + "loss": 0.2677, + "step": 91970 + }, + { + "epoch": 4.29, + "learning_rate": 5.74498769457464e-06, + "loss": 0.3508, + "step": 91975 + }, + { + "epoch": 4.29, + "learning_rate": 5.744203909519854e-06, + "loss": 0.0687, + "step": 91980 + }, + { + "epoch": 4.29, + "learning_rate": 5.7434201244650665e-06, + "loss": 0.0129, + "step": 91985 + }, + { + "epoch": 4.29, + "learning_rate": 5.74263633941028e-06, + "loss": 0.0404, + "step": 91990 + }, + { + "epoch": 4.29, + "learning_rate": 5.7418525543554935e-06, + "loss": 0.0596, + "step": 91995 + }, + { + "epoch": 4.29, + "learning_rate": 5.741068769300707e-06, + "loss": 0.0869, + "step": 92000 + }, + { + "epoch": 4.29, + "learning_rate": 5.74028498424592e-06, + "loss": 0.0776, + "step": 92005 + }, + { + "epoch": 4.29, + "learning_rate": 5.739501199191134e-06, + "loss": 0.1546, + "step": 92010 + }, + { + "epoch": 4.29, + "learning_rate": 5.738717414136347e-06, + "loss": 0.19, + "step": 92015 + }, + { + "epoch": 4.29, + "learning_rate": 5.737933629081561e-06, + "loss": 0.1823, + "step": 92020 + }, + { + "epoch": 4.29, + "learning_rate": 5.737149844026774e-06, + "loss": 0.3525, + "step": 92025 + }, + { + "epoch": 4.29, + "learning_rate": 5.736366058971988e-06, + "loss": 0.0582, + "step": 92030 + }, + { + "epoch": 4.29, + "learning_rate": 5.735582273917202e-06, + "loss": 0.014, + "step": 92035 + }, + { + "epoch": 4.29, + "learning_rate": 5.734798488862415e-06, + "loss": 0.0688, + "step": 92040 + }, + { + "epoch": 4.29, + "learning_rate": 5.734014703807629e-06, + "loss": 0.0203, + "step": 92045 + }, + { + "epoch": 4.3, + "learning_rate": 5.733230918752841e-06, + "loss": 0.0462, + "step": 92050 + }, + { + "epoch": 4.3, + "learning_rate": 5.732447133698054e-06, + "loss": 0.0837, + "step": 92055 + }, + { + "epoch": 4.3, + "learning_rate": 5.731663348643268e-06, + "loss": 0.0777, + "step": 92060 + }, + { + "epoch": 4.3, + "learning_rate": 5.730879563588481e-06, + "loss": 0.1023, + "step": 92065 + }, + { + "epoch": 4.3, + "learning_rate": 5.730095778533695e-06, + "loss": 0.3469, + "step": 92070 + }, + { + "epoch": 4.3, + "learning_rate": 5.729311993478909e-06, + "loss": 0.3225, + "step": 92075 + }, + { + "epoch": 4.3, + "learning_rate": 5.728528208424122e-06, + "loss": 0.1317, + "step": 92080 + }, + { + "epoch": 4.3, + "learning_rate": 5.727744423369336e-06, + "loss": 0.0505, + "step": 92085 + }, + { + "epoch": 4.3, + "learning_rate": 5.726960638314549e-06, + "loss": 0.0072, + "step": 92090 + }, + { + "epoch": 4.3, + "learning_rate": 5.726176853259763e-06, + "loss": 0.0432, + "step": 92095 + }, + { + "epoch": 4.3, + "learning_rate": 5.725393068204976e-06, + "loss": 0.0355, + "step": 92100 + }, + { + "epoch": 4.3, + "learning_rate": 5.72460928315019e-06, + "loss": 0.081, + "step": 92105 + }, + { + "epoch": 4.3, + "learning_rate": 5.723825498095403e-06, + "loss": 0.1141, + "step": 92110 + }, + { + "epoch": 4.3, + "learning_rate": 5.723041713040615e-06, + "loss": 0.0998, + "step": 92115 + }, + { + "epoch": 4.3, + "learning_rate": 5.722257927985829e-06, + "loss": 0.1343, + "step": 92120 + }, + { + "epoch": 4.3, + "learning_rate": 5.721474142931043e-06, + "loss": 0.2115, + "step": 92125 + }, + { + "epoch": 4.3, + "learning_rate": 5.720690357876256e-06, + "loss": 0.1211, + "step": 92130 + }, + { + "epoch": 4.3, + "learning_rate": 5.71990657282147e-06, + "loss": 0.0248, + "step": 92135 + }, + { + "epoch": 4.3, + "learning_rate": 5.719122787766683e-06, + "loss": 0.0224, + "step": 92140 + }, + { + "epoch": 4.3, + "learning_rate": 5.718339002711897e-06, + "loss": 0.02, + "step": 92145 + }, + { + "epoch": 4.3, + "learning_rate": 5.71755521765711e-06, + "loss": 0.0683, + "step": 92150 + }, + { + "epoch": 4.3, + "learning_rate": 5.716771432602324e-06, + "loss": 0.1205, + "step": 92155 + }, + { + "epoch": 4.3, + "learning_rate": 5.715987647547537e-06, + "loss": 0.0763, + "step": 92160 + }, + { + "epoch": 4.3, + "learning_rate": 5.715203862492751e-06, + "loss": 0.0742, + "step": 92165 + }, + { + "epoch": 4.3, + "learning_rate": 5.714420077437964e-06, + "loss": 0.1694, + "step": 92170 + }, + { + "epoch": 4.3, + "learning_rate": 5.713636292383178e-06, + "loss": 0.3072, + "step": 92175 + }, + { + "epoch": 4.3, + "learning_rate": 5.71285250732839e-06, + "loss": 0.0853, + "step": 92180 + }, + { + "epoch": 4.3, + "learning_rate": 5.712068722273604e-06, + "loss": 0.0238, + "step": 92185 + }, + { + "epoch": 4.3, + "learning_rate": 5.711284937218817e-06, + "loss": 0.0184, + "step": 92190 + }, + { + "epoch": 4.3, + "learning_rate": 5.710501152164031e-06, + "loss": 0.0385, + "step": 92195 + }, + { + "epoch": 4.3, + "learning_rate": 5.709717367109244e-06, + "loss": 0.0656, + "step": 92200 + }, + { + "epoch": 4.3, + "learning_rate": 5.708933582054458e-06, + "loss": 0.0727, + "step": 92205 + }, + { + "epoch": 4.3, + "learning_rate": 5.708149796999671e-06, + "loss": 0.0878, + "step": 92210 + }, + { + "epoch": 4.3, + "learning_rate": 5.707366011944885e-06, + "loss": 0.084, + "step": 92215 + }, + { + "epoch": 4.3, + "learning_rate": 5.706582226890098e-06, + "loss": 0.2025, + "step": 92220 + }, + { + "epoch": 4.3, + "learning_rate": 5.705798441835312e-06, + "loss": 0.3276, + "step": 92225 + }, + { + "epoch": 4.3, + "learning_rate": 5.705014656780525e-06, + "loss": 0.0611, + "step": 92230 + }, + { + "epoch": 4.3, + "learning_rate": 5.704230871725739e-06, + "loss": 0.1299, + "step": 92235 + }, + { + "epoch": 4.3, + "learning_rate": 5.703447086670952e-06, + "loss": 0.0497, + "step": 92240 + }, + { + "epoch": 4.3, + "learning_rate": 5.702663301616165e-06, + "loss": 0.0629, + "step": 92245 + }, + { + "epoch": 4.3, + "learning_rate": 5.701879516561378e-06, + "loss": 0.0587, + "step": 92250 + }, + { + "epoch": 4.3, + "learning_rate": 5.701095731506592e-06, + "loss": 0.0905, + "step": 92255 + }, + { + "epoch": 4.3, + "learning_rate": 5.700311946451805e-06, + "loss": 0.1049, + "step": 92260 + }, + { + "epoch": 4.31, + "learning_rate": 5.699528161397019e-06, + "loss": 0.1427, + "step": 92265 + }, + { + "epoch": 4.31, + "learning_rate": 5.698744376342232e-06, + "loss": 0.2616, + "step": 92270 + }, + { + "epoch": 4.31, + "learning_rate": 5.697960591287446e-06, + "loss": 0.1968, + "step": 92275 + }, + { + "epoch": 4.31, + "learning_rate": 5.697176806232659e-06, + "loss": 0.0754, + "step": 92280 + }, + { + "epoch": 4.31, + "learning_rate": 5.696393021177873e-06, + "loss": 0.0246, + "step": 92285 + }, + { + "epoch": 4.31, + "learning_rate": 5.695609236123086e-06, + "loss": 0.0334, + "step": 92290 + }, + { + "epoch": 4.31, + "learning_rate": 5.6948254510683e-06, + "loss": 0.0211, + "step": 92295 + }, + { + "epoch": 4.31, + "learning_rate": 5.694041666013514e-06, + "loss": 0.0471, + "step": 92300 + }, + { + "epoch": 4.31, + "learning_rate": 5.693257880958727e-06, + "loss": 0.1016, + "step": 92305 + }, + { + "epoch": 4.31, + "learning_rate": 5.692474095903939e-06, + "loss": 0.1201, + "step": 92310 + }, + { + "epoch": 4.31, + "learning_rate": 5.691690310849153e-06, + "loss": 0.0935, + "step": 92315 + }, + { + "epoch": 4.31, + "learning_rate": 5.690906525794366e-06, + "loss": 0.2127, + "step": 92320 + }, + { + "epoch": 4.31, + "learning_rate": 5.69012274073958e-06, + "loss": 0.299, + "step": 92325 + }, + { + "epoch": 4.31, + "learning_rate": 5.689338955684793e-06, + "loss": 0.089, + "step": 92330 + }, + { + "epoch": 4.31, + "learning_rate": 5.688555170630007e-06, + "loss": 0.0127, + "step": 92335 + }, + { + "epoch": 4.31, + "learning_rate": 5.687771385575221e-06, + "loss": 0.0356, + "step": 92340 + }, + { + "epoch": 4.31, + "learning_rate": 5.686987600520434e-06, + "loss": 0.0709, + "step": 92345 + }, + { + "epoch": 4.31, + "learning_rate": 5.686203815465648e-06, + "loss": 0.0501, + "step": 92350 + }, + { + "epoch": 4.31, + "learning_rate": 5.685420030410861e-06, + "loss": 0.0524, + "step": 92355 + }, + { + "epoch": 4.31, + "learning_rate": 5.684636245356075e-06, + "loss": 0.0441, + "step": 92360 + }, + { + "epoch": 4.31, + "learning_rate": 5.683852460301288e-06, + "loss": 0.1178, + "step": 92365 + }, + { + "epoch": 4.31, + "learning_rate": 5.683068675246502e-06, + "loss": 0.2904, + "step": 92370 + }, + { + "epoch": 4.31, + "learning_rate": 5.682284890191714e-06, + "loss": 0.2871, + "step": 92375 + }, + { + "epoch": 4.31, + "learning_rate": 5.681501105136927e-06, + "loss": 0.1098, + "step": 92380 + }, + { + "epoch": 4.31, + "learning_rate": 5.680717320082141e-06, + "loss": 0.0876, + "step": 92385 + }, + { + "epoch": 4.31, + "learning_rate": 5.679933535027355e-06, + "loss": 0.0149, + "step": 92390 + }, + { + "epoch": 4.31, + "learning_rate": 5.679149749972568e-06, + "loss": 0.036, + "step": 92395 + }, + { + "epoch": 4.31, + "learning_rate": 5.678365964917782e-06, + "loss": 0.0441, + "step": 92400 + }, + { + "epoch": 4.31, + "learning_rate": 5.677582179862995e-06, + "loss": 0.0723, + "step": 92405 + }, + { + "epoch": 4.31, + "learning_rate": 5.676798394808209e-06, + "loss": 0.0428, + "step": 92410 + }, + { + "epoch": 4.31, + "learning_rate": 5.676014609753422e-06, + "loss": 0.1119, + "step": 92415 + }, + { + "epoch": 4.31, + "learning_rate": 5.675230824698636e-06, + "loss": 0.0758, + "step": 92420 + }, + { + "epoch": 4.31, + "learning_rate": 5.674447039643849e-06, + "loss": 0.3472, + "step": 92425 + }, + { + "epoch": 4.31, + "learning_rate": 5.673663254589063e-06, + "loss": 0.0403, + "step": 92430 + }, + { + "epoch": 4.31, + "learning_rate": 5.672879469534276e-06, + "loss": 0.0232, + "step": 92435 + }, + { + "epoch": 4.31, + "learning_rate": 5.672095684479489e-06, + "loss": 0.015, + "step": 92440 + }, + { + "epoch": 4.31, + "learning_rate": 5.671311899424702e-06, + "loss": 0.0632, + "step": 92445 + }, + { + "epoch": 4.31, + "learning_rate": 5.670528114369916e-06, + "loss": 0.0359, + "step": 92450 + }, + { + "epoch": 4.31, + "learning_rate": 5.669744329315129e-06, + "loss": 0.0974, + "step": 92455 + }, + { + "epoch": 4.31, + "learning_rate": 5.668960544260343e-06, + "loss": 0.0389, + "step": 92460 + }, + { + "epoch": 4.31, + "learning_rate": 5.668176759205556e-06, + "loss": 0.1899, + "step": 92465 + }, + { + "epoch": 4.31, + "learning_rate": 5.66739297415077e-06, + "loss": 0.1237, + "step": 92470 + }, + { + "epoch": 4.32, + "learning_rate": 5.666609189095983e-06, + "loss": 0.4016, + "step": 92475 + }, + { + "epoch": 4.32, + "learning_rate": 5.6658254040411966e-06, + "loss": 0.0711, + "step": 92480 + }, + { + "epoch": 4.32, + "learning_rate": 5.66504161898641e-06, + "loss": 0.0164, + "step": 92485 + }, + { + "epoch": 4.32, + "learning_rate": 5.6642578339316235e-06, + "loss": 0.0291, + "step": 92490 + }, + { + "epoch": 4.32, + "learning_rate": 5.663474048876837e-06, + "loss": 0.0205, + "step": 92495 + }, + { + "epoch": 4.32, + "learning_rate": 5.6626902638220505e-06, + "loss": 0.1012, + "step": 92500 + }, + { + "epoch": 4.32, + "learning_rate": 5.661906478767263e-06, + "loss": 0.1101, + "step": 92505 + }, + { + "epoch": 4.32, + "learning_rate": 5.661122693712477e-06, + "loss": 0.1274, + "step": 92510 + }, + { + "epoch": 4.32, + "learning_rate": 5.66033890865769e-06, + "loss": 0.0668, + "step": 92515 + }, + { + "epoch": 4.32, + "learning_rate": 5.659555123602904e-06, + "loss": 0.1267, + "step": 92520 + }, + { + "epoch": 4.32, + "learning_rate": 5.658771338548117e-06, + "loss": 0.2288, + "step": 92525 + }, + { + "epoch": 4.32, + "learning_rate": 5.6579875534933306e-06, + "loss": 0.1182, + "step": 92530 + }, + { + "epoch": 4.32, + "learning_rate": 5.657203768438544e-06, + "loss": 0.0092, + "step": 92535 + }, + { + "epoch": 4.32, + "learning_rate": 5.6564199833837575e-06, + "loss": 0.0518, + "step": 92540 + }, + { + "epoch": 4.32, + "learning_rate": 5.6556361983289706e-06, + "loss": 0.0696, + "step": 92545 + }, + { + "epoch": 4.32, + "learning_rate": 5.6548524132741845e-06, + "loss": 0.0519, + "step": 92550 + }, + { + "epoch": 4.32, + "learning_rate": 5.6540686282193975e-06, + "loss": 0.0841, + "step": 92555 + }, + { + "epoch": 4.32, + "learning_rate": 5.6532848431646114e-06, + "loss": 0.0735, + "step": 92560 + }, + { + "epoch": 4.32, + "learning_rate": 5.652501058109825e-06, + "loss": 0.1308, + "step": 92565 + }, + { + "epoch": 4.32, + "learning_rate": 5.6517172730550376e-06, + "loss": 0.1185, + "step": 92570 + }, + { + "epoch": 4.32, + "learning_rate": 5.650933488000251e-06, + "loss": 0.2907, + "step": 92575 + }, + { + "epoch": 4.32, + "learning_rate": 5.6501497029454645e-06, + "loss": 0.0912, + "step": 92580 + }, + { + "epoch": 4.32, + "learning_rate": 5.649365917890678e-06, + "loss": 0.0116, + "step": 92585 + }, + { + "epoch": 4.32, + "learning_rate": 5.6485821328358915e-06, + "loss": 0.0385, + "step": 92590 + }, + { + "epoch": 4.32, + "learning_rate": 5.6477983477811045e-06, + "loss": 0.0395, + "step": 92595 + }, + { + "epoch": 4.32, + "learning_rate": 5.6470145627263185e-06, + "loss": 0.0793, + "step": 92600 + }, + { + "epoch": 4.32, + "learning_rate": 5.6462307776715315e-06, + "loss": 0.0814, + "step": 92605 + }, + { + "epoch": 4.32, + "learning_rate": 5.645446992616745e-06, + "loss": 0.0984, + "step": 92610 + }, + { + "epoch": 4.32, + "learning_rate": 5.644663207561959e-06, + "loss": 0.0662, + "step": 92615 + }, + { + "epoch": 4.32, + "learning_rate": 5.643879422507172e-06, + "loss": 0.2063, + "step": 92620 + }, + { + "epoch": 4.32, + "learning_rate": 5.643095637452386e-06, + "loss": 0.2029, + "step": 92625 + }, + { + "epoch": 4.32, + "learning_rate": 5.642311852397599e-06, + "loss": 0.0741, + "step": 92630 + }, + { + "epoch": 4.32, + "learning_rate": 5.6415280673428116e-06, + "loss": 0.0396, + "step": 92635 + }, + { + "epoch": 4.32, + "learning_rate": 5.6407442822880255e-06, + "loss": 0.0642, + "step": 92640 + }, + { + "epoch": 4.32, + "learning_rate": 5.6399604972332385e-06, + "loss": 0.0635, + "step": 92645 + }, + { + "epoch": 4.32, + "learning_rate": 5.6391767121784524e-06, + "loss": 0.0769, + "step": 92650 + }, + { + "epoch": 4.32, + "learning_rate": 5.638392927123666e-06, + "loss": 0.077, + "step": 92655 + }, + { + "epoch": 4.32, + "learning_rate": 5.637609142068879e-06, + "loss": 0.0887, + "step": 92660 + }, + { + "epoch": 4.32, + "learning_rate": 5.636825357014093e-06, + "loss": 0.0834, + "step": 92665 + }, + { + "epoch": 4.32, + "learning_rate": 5.636041571959306e-06, + "loss": 0.1668, + "step": 92670 + }, + { + "epoch": 4.32, + "learning_rate": 5.63525778690452e-06, + "loss": 0.122, + "step": 92675 + }, + { + "epoch": 4.32, + "learning_rate": 5.634474001849733e-06, + "loss": 0.1064, + "step": 92680 + }, + { + "epoch": 4.32, + "learning_rate": 5.633690216794947e-06, + "loss": 0.0488, + "step": 92685 + }, + { + "epoch": 4.33, + "learning_rate": 5.63290643174016e-06, + "loss": 0.017, + "step": 92690 + }, + { + "epoch": 4.33, + "learning_rate": 5.632122646685374e-06, + "loss": 0.0428, + "step": 92695 + }, + { + "epoch": 4.33, + "learning_rate": 5.631338861630586e-06, + "loss": 0.1279, + "step": 92700 + }, + { + "epoch": 4.33, + "learning_rate": 5.6305550765758e-06, + "loss": 0.029, + "step": 92705 + }, + { + "epoch": 4.33, + "learning_rate": 5.629771291521013e-06, + "loss": 0.078, + "step": 92710 + }, + { + "epoch": 4.33, + "learning_rate": 5.628987506466227e-06, + "loss": 0.0682, + "step": 92715 + }, + { + "epoch": 4.33, + "learning_rate": 5.62820372141144e-06, + "loss": 0.1448, + "step": 92720 + }, + { + "epoch": 4.33, + "learning_rate": 5.627419936356654e-06, + "loss": 0.2363, + "step": 92725 + }, + { + "epoch": 4.33, + "learning_rate": 5.626636151301867e-06, + "loss": 0.0887, + "step": 92730 + }, + { + "epoch": 4.33, + "learning_rate": 5.625852366247081e-06, + "loss": 0.0445, + "step": 92735 + }, + { + "epoch": 4.33, + "learning_rate": 5.625068581192294e-06, + "loss": 0.0443, + "step": 92740 + }, + { + "epoch": 4.33, + "learning_rate": 5.624284796137508e-06, + "loss": 0.0344, + "step": 92745 + }, + { + "epoch": 4.33, + "learning_rate": 5.623501011082721e-06, + "loss": 0.0571, + "step": 92750 + }, + { + "epoch": 4.33, + "learning_rate": 5.622717226027935e-06, + "loss": 0.0273, + "step": 92755 + }, + { + "epoch": 4.33, + "learning_rate": 5.621933440973148e-06, + "loss": 0.056, + "step": 92760 + }, + { + "epoch": 4.33, + "learning_rate": 5.621149655918361e-06, + "loss": 0.1013, + "step": 92765 + }, + { + "epoch": 4.33, + "learning_rate": 5.620365870863574e-06, + "loss": 0.2048, + "step": 92770 + }, + { + "epoch": 4.33, + "learning_rate": 5.619582085808788e-06, + "loss": 0.3133, + "step": 92775 + }, + { + "epoch": 4.33, + "learning_rate": 5.618798300754001e-06, + "loss": 0.1164, + "step": 92780 + }, + { + "epoch": 4.33, + "learning_rate": 5.618014515699215e-06, + "loss": 0.0282, + "step": 92785 + }, + { + "epoch": 4.33, + "learning_rate": 5.617230730644428e-06, + "loss": 0.0423, + "step": 92790 + }, + { + "epoch": 4.33, + "learning_rate": 5.616446945589642e-06, + "loss": 0.0698, + "step": 92795 + }, + { + "epoch": 4.33, + "learning_rate": 5.615663160534855e-06, + "loss": 0.0539, + "step": 92800 + }, + { + "epoch": 4.33, + "learning_rate": 5.614879375480069e-06, + "loss": 0.1238, + "step": 92805 + }, + { + "epoch": 4.33, + "learning_rate": 5.614095590425282e-06, + "loss": 0.0739, + "step": 92810 + }, + { + "epoch": 4.33, + "learning_rate": 5.613311805370496e-06, + "loss": 0.1163, + "step": 92815 + }, + { + "epoch": 4.33, + "learning_rate": 5.612528020315709e-06, + "loss": 0.1494, + "step": 92820 + }, + { + "epoch": 4.33, + "learning_rate": 5.611744235260923e-06, + "loss": 0.3012, + "step": 92825 + }, + { + "epoch": 4.33, + "learning_rate": 5.610960450206135e-06, + "loss": 0.1179, + "step": 92830 + }, + { + "epoch": 4.33, + "learning_rate": 5.610176665151349e-06, + "loss": 0.023, + "step": 92835 + }, + { + "epoch": 4.33, + "learning_rate": 5.609392880096562e-06, + "loss": 0.0438, + "step": 92840 + }, + { + "epoch": 4.33, + "learning_rate": 5.608609095041776e-06, + "loss": 0.0163, + "step": 92845 + }, + { + "epoch": 4.33, + "learning_rate": 5.607825309986989e-06, + "loss": 0.0502, + "step": 92850 + }, + { + "epoch": 4.33, + "learning_rate": 5.607041524932203e-06, + "loss": 0.1047, + "step": 92855 + }, + { + "epoch": 4.33, + "learning_rate": 5.606257739877416e-06, + "loss": 0.1257, + "step": 92860 + }, + { + "epoch": 4.33, + "learning_rate": 5.60547395482263e-06, + "loss": 0.1548, + "step": 92865 + }, + { + "epoch": 4.33, + "learning_rate": 5.604690169767843e-06, + "loss": 0.2065, + "step": 92870 + }, + { + "epoch": 4.33, + "learning_rate": 5.603906384713057e-06, + "loss": 0.3251, + "step": 92875 + }, + { + "epoch": 4.33, + "learning_rate": 5.603122599658271e-06, + "loss": 0.0702, + "step": 92880 + }, + { + "epoch": 4.33, + "learning_rate": 5.602338814603484e-06, + "loss": 0.0147, + "step": 92885 + }, + { + "epoch": 4.33, + "learning_rate": 5.601555029548698e-06, + "loss": 0.0529, + "step": 92890 + }, + { + "epoch": 4.33, + "learning_rate": 5.60077124449391e-06, + "loss": 0.0425, + "step": 92895 + }, + { + "epoch": 4.33, + "learning_rate": 5.599987459439123e-06, + "loss": 0.0575, + "step": 92900 + }, + { + "epoch": 4.34, + "learning_rate": 5.599203674384337e-06, + "loss": 0.0861, + "step": 92905 + }, + { + "epoch": 4.34, + "learning_rate": 5.59841988932955e-06, + "loss": 0.0507, + "step": 92910 + }, + { + "epoch": 4.34, + "learning_rate": 5.597636104274764e-06, + "loss": 0.0715, + "step": 92915 + }, + { + "epoch": 4.34, + "learning_rate": 5.596852319219978e-06, + "loss": 0.2377, + "step": 92920 + }, + { + "epoch": 4.34, + "learning_rate": 5.596068534165191e-06, + "loss": 0.3196, + "step": 92925 + }, + { + "epoch": 4.34, + "learning_rate": 5.595284749110405e-06, + "loss": 0.0734, + "step": 92930 + }, + { + "epoch": 4.34, + "learning_rate": 5.594500964055618e-06, + "loss": 0.0267, + "step": 92935 + }, + { + "epoch": 4.34, + "learning_rate": 5.593717179000832e-06, + "loss": 0.0639, + "step": 92940 + }, + { + "epoch": 4.34, + "learning_rate": 5.592933393946045e-06, + "loss": 0.0708, + "step": 92945 + }, + { + "epoch": 4.34, + "learning_rate": 5.592149608891259e-06, + "loss": 0.0917, + "step": 92950 + }, + { + "epoch": 4.34, + "learning_rate": 5.591365823836472e-06, + "loss": 0.0304, + "step": 92955 + }, + { + "epoch": 4.34, + "learning_rate": 5.590582038781684e-06, + "loss": 0.2022, + "step": 92960 + }, + { + "epoch": 4.34, + "learning_rate": 5.589798253726898e-06, + "loss": 0.1388, + "step": 92965 + }, + { + "epoch": 4.34, + "learning_rate": 5.589014468672112e-06, + "loss": 0.1263, + "step": 92970 + }, + { + "epoch": 4.34, + "learning_rate": 5.588230683617325e-06, + "loss": 0.2642, + "step": 92975 + }, + { + "epoch": 4.34, + "learning_rate": 5.587446898562539e-06, + "loss": 0.09, + "step": 92980 + }, + { + "epoch": 4.34, + "learning_rate": 5.586663113507752e-06, + "loss": 0.0289, + "step": 92985 + }, + { + "epoch": 4.34, + "learning_rate": 5.585879328452966e-06, + "loss": 0.0786, + "step": 92990 + }, + { + "epoch": 4.34, + "learning_rate": 5.585095543398179e-06, + "loss": 0.0513, + "step": 92995 + }, + { + "epoch": 4.34, + "learning_rate": 5.584311758343393e-06, + "loss": 0.0352, + "step": 93000 + }, + { + "epoch": 4.34, + "learning_rate": 5.583527973288606e-06, + "loss": 0.0673, + "step": 93005 + }, + { + "epoch": 4.34, + "learning_rate": 5.58274418823382e-06, + "loss": 0.0303, + "step": 93010 + }, + { + "epoch": 4.34, + "learning_rate": 5.581960403179033e-06, + "loss": 0.1573, + "step": 93015 + }, + { + "epoch": 4.34, + "learning_rate": 5.581176618124247e-06, + "loss": 0.1667, + "step": 93020 + }, + { + "epoch": 4.34, + "learning_rate": 5.580392833069459e-06, + "loss": 0.3154, + "step": 93025 + }, + { + "epoch": 4.34, + "learning_rate": 5.579609048014673e-06, + "loss": 0.0641, + "step": 93030 + }, + { + "epoch": 4.34, + "learning_rate": 5.578825262959886e-06, + "loss": 0.0214, + "step": 93035 + }, + { + "epoch": 4.34, + "learning_rate": 5.5780414779051e-06, + "loss": 0.0258, + "step": 93040 + }, + { + "epoch": 4.34, + "learning_rate": 5.577257692850313e-06, + "loss": 0.049, + "step": 93045 + }, + { + "epoch": 4.34, + "learning_rate": 5.576473907795527e-06, + "loss": 0.0479, + "step": 93050 + }, + { + "epoch": 4.34, + "learning_rate": 5.57569012274074e-06, + "loss": 0.0786, + "step": 93055 + }, + { + "epoch": 4.34, + "learning_rate": 5.574906337685954e-06, + "loss": 0.0668, + "step": 93060 + }, + { + "epoch": 4.34, + "learning_rate": 5.574122552631167e-06, + "loss": 0.1106, + "step": 93065 + }, + { + "epoch": 4.34, + "learning_rate": 5.573338767576381e-06, + "loss": 0.0765, + "step": 93070 + }, + { + "epoch": 4.34, + "learning_rate": 5.572554982521594e-06, + "loss": 0.2523, + "step": 93075 + }, + { + "epoch": 4.34, + "learning_rate": 5.571771197466808e-06, + "loss": 0.1044, + "step": 93080 + }, + { + "epoch": 4.34, + "learning_rate": 5.570987412412021e-06, + "loss": 0.036, + "step": 93085 + }, + { + "epoch": 4.34, + "learning_rate": 5.570203627357234e-06, + "loss": 0.0114, + "step": 93090 + }, + { + "epoch": 4.34, + "learning_rate": 5.569419842302447e-06, + "loss": 0.052, + "step": 93095 + }, + { + "epoch": 4.34, + "learning_rate": 5.568636057247661e-06, + "loss": 0.0453, + "step": 93100 + }, + { + "epoch": 4.34, + "learning_rate": 5.567852272192874e-06, + "loss": 0.0893, + "step": 93105 + }, + { + "epoch": 4.34, + "learning_rate": 5.567068487138088e-06, + "loss": 0.1599, + "step": 93110 + }, + { + "epoch": 4.34, + "learning_rate": 5.566284702083301e-06, + "loss": 0.1558, + "step": 93115 + }, + { + "epoch": 4.35, + "learning_rate": 5.565500917028515e-06, + "loss": 0.1103, + "step": 93120 + }, + { + "epoch": 4.35, + "learning_rate": 5.564717131973728e-06, + "loss": 0.2208, + "step": 93125 + }, + { + "epoch": 4.35, + "learning_rate": 5.563933346918942e-06, + "loss": 0.0891, + "step": 93130 + }, + { + "epoch": 4.35, + "learning_rate": 5.563149561864155e-06, + "loss": 0.0128, + "step": 93135 + }, + { + "epoch": 4.35, + "learning_rate": 5.562365776809369e-06, + "loss": 0.1125, + "step": 93140 + }, + { + "epoch": 4.35, + "learning_rate": 5.5615819917545825e-06, + "loss": 0.0648, + "step": 93145 + }, + { + "epoch": 4.35, + "learning_rate": 5.5607982066997956e-06, + "loss": 0.0401, + "step": 93150 + }, + { + "epoch": 4.35, + "learning_rate": 5.560014421645008e-06, + "loss": 0.0634, + "step": 93155 + }, + { + "epoch": 4.35, + "learning_rate": 5.559230636590222e-06, + "loss": 0.0621, + "step": 93160 + }, + { + "epoch": 4.35, + "learning_rate": 5.558446851535435e-06, + "loss": 0.1719, + "step": 93165 + }, + { + "epoch": 4.35, + "learning_rate": 5.557663066480649e-06, + "loss": 0.1685, + "step": 93170 + }, + { + "epoch": 4.35, + "learning_rate": 5.556879281425862e-06, + "loss": 0.289, + "step": 93175 + }, + { + "epoch": 4.35, + "learning_rate": 5.556095496371076e-06, + "loss": 0.0859, + "step": 93180 + }, + { + "epoch": 4.35, + "learning_rate": 5.555311711316289e-06, + "loss": 0.0215, + "step": 93185 + }, + { + "epoch": 4.35, + "learning_rate": 5.554527926261503e-06, + "loss": 0.0543, + "step": 93190 + }, + { + "epoch": 4.35, + "learning_rate": 5.5537441412067165e-06, + "loss": 0.0316, + "step": 93195 + }, + { + "epoch": 4.35, + "learning_rate": 5.5529603561519295e-06, + "loss": 0.0465, + "step": 93200 + }, + { + "epoch": 4.35, + "learning_rate": 5.5521765710971435e-06, + "loss": 0.0542, + "step": 93205 + }, + { + "epoch": 4.35, + "learning_rate": 5.5513927860423565e-06, + "loss": 0.063, + "step": 93210 + }, + { + "epoch": 4.35, + "learning_rate": 5.55060900098757e-06, + "loss": 0.1363, + "step": 93215 + }, + { + "epoch": 4.35, + "learning_rate": 5.549825215932783e-06, + "loss": 0.1544, + "step": 93220 + }, + { + "epoch": 4.35, + "learning_rate": 5.549041430877996e-06, + "loss": 0.2124, + "step": 93225 + }, + { + "epoch": 4.35, + "learning_rate": 5.54825764582321e-06, + "loss": 0.1124, + "step": 93230 + }, + { + "epoch": 4.35, + "learning_rate": 5.5474738607684235e-06, + "loss": 0.0105, + "step": 93235 + }, + { + "epoch": 4.35, + "learning_rate": 5.5466900757136366e-06, + "loss": 0.0801, + "step": 93240 + }, + { + "epoch": 4.35, + "learning_rate": 5.5459062906588505e-06, + "loss": 0.0604, + "step": 93245 + }, + { + "epoch": 4.35, + "learning_rate": 5.5451225056040635e-06, + "loss": 0.0618, + "step": 93250 + }, + { + "epoch": 4.35, + "learning_rate": 5.5443387205492774e-06, + "loss": 0.0735, + "step": 93255 + }, + { + "epoch": 4.35, + "learning_rate": 5.5435549354944905e-06, + "loss": 0.2767, + "step": 93260 + }, + { + "epoch": 4.35, + "learning_rate": 5.542771150439704e-06, + "loss": 0.0636, + "step": 93265 + }, + { + "epoch": 4.35, + "learning_rate": 5.5419873653849175e-06, + "loss": 0.2251, + "step": 93270 + }, + { + "epoch": 4.35, + "learning_rate": 5.541203580330131e-06, + "loss": 0.2216, + "step": 93275 + }, + { + "epoch": 4.35, + "learning_rate": 5.540419795275344e-06, + "loss": 0.064, + "step": 93280 + }, + { + "epoch": 4.35, + "learning_rate": 5.5396360102205575e-06, + "loss": 0.0138, + "step": 93285 + }, + { + "epoch": 4.35, + "learning_rate": 5.5388522251657705e-06, + "loss": 0.0296, + "step": 93290 + }, + { + "epoch": 4.35, + "learning_rate": 5.5380684401109844e-06, + "loss": 0.0228, + "step": 93295 + }, + { + "epoch": 4.35, + "learning_rate": 5.5372846550561975e-06, + "loss": 0.059, + "step": 93300 + }, + { + "epoch": 4.35, + "learning_rate": 5.536500870001411e-06, + "loss": 0.0708, + "step": 93305 + }, + { + "epoch": 4.35, + "learning_rate": 5.5357170849466245e-06, + "loss": 0.1692, + "step": 93310 + }, + { + "epoch": 4.35, + "learning_rate": 5.534933299891838e-06, + "loss": 0.1297, + "step": 93315 + }, + { + "epoch": 4.35, + "learning_rate": 5.5341495148370514e-06, + "loss": 0.1018, + "step": 93320 + }, + { + "epoch": 4.35, + "learning_rate": 5.533365729782265e-06, + "loss": 0.2941, + "step": 93325 + }, + { + "epoch": 4.35, + "learning_rate": 5.532581944727478e-06, + "loss": 0.0471, + "step": 93330 + }, + { + "epoch": 4.36, + "learning_rate": 5.531798159672692e-06, + "loss": 0.0068, + "step": 93335 + }, + { + "epoch": 4.36, + "learning_rate": 5.531014374617905e-06, + "loss": 0.0373, + "step": 93340 + }, + { + "epoch": 4.36, + "learning_rate": 5.530230589563119e-06, + "loss": 0.0521, + "step": 93345 + }, + { + "epoch": 4.36, + "learning_rate": 5.5294468045083315e-06, + "loss": 0.0578, + "step": 93350 + }, + { + "epoch": 4.36, + "learning_rate": 5.528663019453545e-06, + "loss": 0.0775, + "step": 93355 + }, + { + "epoch": 4.36, + "learning_rate": 5.5278792343987584e-06, + "loss": 0.0369, + "step": 93360 + }, + { + "epoch": 4.36, + "learning_rate": 5.527095449343972e-06, + "loss": 0.1059, + "step": 93365 + }, + { + "epoch": 4.36, + "learning_rate": 5.526311664289185e-06, + "loss": 0.1973, + "step": 93370 + }, + { + "epoch": 4.36, + "learning_rate": 5.525527879234399e-06, + "loss": 0.3005, + "step": 93375 + }, + { + "epoch": 4.36, + "learning_rate": 5.524744094179612e-06, + "loss": 0.0914, + "step": 93380 + }, + { + "epoch": 4.36, + "learning_rate": 5.523960309124826e-06, + "loss": 0.0168, + "step": 93385 + }, + { + "epoch": 4.36, + "learning_rate": 5.523176524070039e-06, + "loss": 0.04, + "step": 93390 + }, + { + "epoch": 4.36, + "learning_rate": 5.522392739015253e-06, + "loss": 0.0474, + "step": 93395 + }, + { + "epoch": 4.36, + "learning_rate": 5.521608953960466e-06, + "loss": 0.055, + "step": 93400 + }, + { + "epoch": 4.36, + "learning_rate": 5.52082516890568e-06, + "loss": 0.0748, + "step": 93405 + }, + { + "epoch": 4.36, + "learning_rate": 5.520041383850894e-06, + "loss": 0.0981, + "step": 93410 + }, + { + "epoch": 4.36, + "learning_rate": 5.519257598796106e-06, + "loss": 0.0634, + "step": 93415 + }, + { + "epoch": 4.36, + "learning_rate": 5.518473813741319e-06, + "loss": 0.1274, + "step": 93420 + }, + { + "epoch": 4.36, + "learning_rate": 5.517690028686533e-06, + "loss": 0.3517, + "step": 93425 + }, + { + "epoch": 4.36, + "learning_rate": 5.516906243631746e-06, + "loss": 0.1113, + "step": 93430 + }, + { + "epoch": 4.36, + "learning_rate": 5.51612245857696e-06, + "loss": 0.0349, + "step": 93435 + }, + { + "epoch": 4.36, + "learning_rate": 5.515338673522173e-06, + "loss": 0.0222, + "step": 93440 + }, + { + "epoch": 4.36, + "learning_rate": 5.514554888467387e-06, + "loss": 0.0488, + "step": 93445 + }, + { + "epoch": 4.36, + "learning_rate": 5.5137711034126e-06, + "loss": 0.0884, + "step": 93450 + }, + { + "epoch": 4.36, + "learning_rate": 5.512987318357814e-06, + "loss": 0.0643, + "step": 93455 + }, + { + "epoch": 4.36, + "learning_rate": 5.512203533303028e-06, + "loss": 0.0958, + "step": 93460 + }, + { + "epoch": 4.36, + "learning_rate": 5.511419748248241e-06, + "loss": 0.1162, + "step": 93465 + }, + { + "epoch": 4.36, + "learning_rate": 5.510635963193455e-06, + "loss": 0.1336, + "step": 93470 + }, + { + "epoch": 4.36, + "learning_rate": 5.509852178138668e-06, + "loss": 0.35, + "step": 93475 + }, + { + "epoch": 4.36, + "learning_rate": 5.50906839308388e-06, + "loss": 0.1198, + "step": 93480 + }, + { + "epoch": 4.36, + "learning_rate": 5.508284608029094e-06, + "loss": 0.0225, + "step": 93485 + }, + { + "epoch": 4.36, + "learning_rate": 5.507500822974307e-06, + "loss": 0.0169, + "step": 93490 + }, + { + "epoch": 4.36, + "learning_rate": 5.506717037919521e-06, + "loss": 0.0478, + "step": 93495 + }, + { + "epoch": 4.36, + "learning_rate": 5.505933252864735e-06, + "loss": 0.0886, + "step": 93500 + }, + { + "epoch": 4.36, + "learning_rate": 5.505149467809948e-06, + "loss": 0.0692, + "step": 93505 + }, + { + "epoch": 4.36, + "learning_rate": 5.504365682755162e-06, + "loss": 0.1396, + "step": 93510 + }, + { + "epoch": 4.36, + "learning_rate": 5.503581897700375e-06, + "loss": 0.1287, + "step": 93515 + }, + { + "epoch": 4.36, + "learning_rate": 5.502798112645589e-06, + "loss": 0.2369, + "step": 93520 + }, + { + "epoch": 4.36, + "learning_rate": 5.502014327590802e-06, + "loss": 0.2279, + "step": 93525 + }, + { + "epoch": 4.36, + "learning_rate": 5.501230542536016e-06, + "loss": 0.0846, + "step": 93530 + }, + { + "epoch": 4.36, + "learning_rate": 5.500446757481229e-06, + "loss": 0.0063, + "step": 93535 + }, + { + "epoch": 4.36, + "learning_rate": 5.499662972426443e-06, + "loss": 0.0303, + "step": 93540 + }, + { + "epoch": 4.36, + "learning_rate": 5.498879187371655e-06, + "loss": 0.0422, + "step": 93545 + }, + { + "epoch": 4.37, + "learning_rate": 5.498095402316869e-06, + "loss": 0.0655, + "step": 93550 + }, + { + "epoch": 4.37, + "learning_rate": 5.497311617262082e-06, + "loss": 0.0336, + "step": 93555 + }, + { + "epoch": 4.37, + "learning_rate": 5.496527832207296e-06, + "loss": 0.1132, + "step": 93560 + }, + { + "epoch": 4.37, + "learning_rate": 5.495744047152509e-06, + "loss": 0.1235, + "step": 93565 + }, + { + "epoch": 4.37, + "learning_rate": 5.494960262097723e-06, + "loss": 0.1276, + "step": 93570 + }, + { + "epoch": 4.37, + "learning_rate": 5.494176477042936e-06, + "loss": 0.2389, + "step": 93575 + }, + { + "epoch": 4.37, + "learning_rate": 5.49339269198815e-06, + "loss": 0.0394, + "step": 93580 + }, + { + "epoch": 4.37, + "learning_rate": 5.492608906933363e-06, + "loss": 0.0187, + "step": 93585 + }, + { + "epoch": 4.37, + "learning_rate": 5.491825121878577e-06, + "loss": 0.0364, + "step": 93590 + }, + { + "epoch": 4.37, + "learning_rate": 5.49104133682379e-06, + "loss": 0.0756, + "step": 93595 + }, + { + "epoch": 4.37, + "learning_rate": 5.490257551769004e-06, + "loss": 0.1331, + "step": 93600 + }, + { + "epoch": 4.37, + "learning_rate": 5.489473766714217e-06, + "loss": 0.0545, + "step": 93605 + }, + { + "epoch": 4.37, + "learning_rate": 5.48868998165943e-06, + "loss": 0.0692, + "step": 93610 + }, + { + "epoch": 4.37, + "learning_rate": 5.487906196604643e-06, + "loss": 0.0762, + "step": 93615 + }, + { + "epoch": 4.37, + "learning_rate": 5.487122411549857e-06, + "loss": 0.1696, + "step": 93620 + }, + { + "epoch": 4.37, + "learning_rate": 5.48633862649507e-06, + "loss": 0.2664, + "step": 93625 + }, + { + "epoch": 4.37, + "learning_rate": 5.485554841440284e-06, + "loss": 0.1063, + "step": 93630 + }, + { + "epoch": 4.37, + "learning_rate": 5.484771056385497e-06, + "loss": 0.0347, + "step": 93635 + }, + { + "epoch": 4.37, + "learning_rate": 5.483987271330711e-06, + "loss": 0.0382, + "step": 93640 + }, + { + "epoch": 4.37, + "learning_rate": 5.483203486275924e-06, + "loss": 0.0234, + "step": 93645 + }, + { + "epoch": 4.37, + "learning_rate": 5.482419701221138e-06, + "loss": 0.0562, + "step": 93650 + }, + { + "epoch": 4.37, + "learning_rate": 5.481635916166351e-06, + "loss": 0.1205, + "step": 93655 + }, + { + "epoch": 4.37, + "learning_rate": 5.480852131111565e-06, + "loss": 0.1197, + "step": 93660 + }, + { + "epoch": 4.37, + "learning_rate": 5.480068346056778e-06, + "loss": 0.1209, + "step": 93665 + }, + { + "epoch": 4.37, + "learning_rate": 5.479284561001992e-06, + "loss": 0.1612, + "step": 93670 + }, + { + "epoch": 4.37, + "learning_rate": 5.478500775947204e-06, + "loss": 0.263, + "step": 93675 + }, + { + "epoch": 4.37, + "learning_rate": 5.477716990892418e-06, + "loss": 0.071, + "step": 93680 + }, + { + "epoch": 4.37, + "learning_rate": 5.476933205837631e-06, + "loss": 0.0193, + "step": 93685 + }, + { + "epoch": 4.37, + "learning_rate": 5.476149420782845e-06, + "loss": 0.0398, + "step": 93690 + }, + { + "epoch": 4.37, + "learning_rate": 5.475365635728058e-06, + "loss": 0.0171, + "step": 93695 + }, + { + "epoch": 4.37, + "learning_rate": 5.474581850673272e-06, + "loss": 0.0671, + "step": 93700 + }, + { + "epoch": 4.37, + "learning_rate": 5.473798065618485e-06, + "loss": 0.0807, + "step": 93705 + }, + { + "epoch": 4.37, + "learning_rate": 5.473014280563699e-06, + "loss": 0.0761, + "step": 93710 + }, + { + "epoch": 4.37, + "learning_rate": 5.472230495508912e-06, + "loss": 0.0736, + "step": 93715 + }, + { + "epoch": 4.37, + "learning_rate": 5.471446710454126e-06, + "loss": 0.3259, + "step": 93720 + }, + { + "epoch": 4.37, + "learning_rate": 5.47066292539934e-06, + "loss": 0.3525, + "step": 93725 + }, + { + "epoch": 4.37, + "learning_rate": 5.469879140344553e-06, + "loss": 0.1093, + "step": 93730 + }, + { + "epoch": 4.37, + "learning_rate": 5.469095355289767e-06, + "loss": 0.0386, + "step": 93735 + }, + { + "epoch": 4.37, + "learning_rate": 5.468311570234979e-06, + "loss": 0.0511, + "step": 93740 + }, + { + "epoch": 4.37, + "learning_rate": 5.467527785180192e-06, + "loss": 0.0422, + "step": 93745 + }, + { + "epoch": 4.37, + "learning_rate": 5.466744000125406e-06, + "loss": 0.0714, + "step": 93750 + }, + { + "epoch": 4.37, + "learning_rate": 5.465960215070619e-06, + "loss": 0.021, + "step": 93755 + }, + { + "epoch": 4.37, + "learning_rate": 5.465176430015833e-06, + "loss": 0.074, + "step": 93760 + }, + { + "epoch": 4.38, + "learning_rate": 5.464549401972003e-06, + "loss": 0.1566, + "step": 93765 + }, + { + "epoch": 4.38, + "learning_rate": 5.463765616917217e-06, + "loss": 0.1308, + "step": 93770 + }, + { + "epoch": 4.38, + "learning_rate": 5.46298183186243e-06, + "loss": 0.2837, + "step": 93775 + }, + { + "epoch": 4.38, + "learning_rate": 5.462198046807644e-06, + "loss": 0.0954, + "step": 93780 + }, + { + "epoch": 4.38, + "learning_rate": 5.461414261752858e-06, + "loss": 0.0132, + "step": 93785 + }, + { + "epoch": 4.38, + "learning_rate": 5.460630476698071e-06, + "loss": 0.026, + "step": 93790 + }, + { + "epoch": 4.38, + "learning_rate": 5.459846691643285e-06, + "loss": 0.065, + "step": 93795 + }, + { + "epoch": 4.38, + "learning_rate": 5.459062906588498e-06, + "loss": 0.0837, + "step": 93800 + }, + { + "epoch": 4.38, + "learning_rate": 5.45827912153371e-06, + "loss": 0.1, + "step": 93805 + }, + { + "epoch": 4.38, + "learning_rate": 5.457495336478924e-06, + "loss": 0.0343, + "step": 93810 + }, + { + "epoch": 4.38, + "learning_rate": 5.456711551424137e-06, + "loss": 0.0407, + "step": 93815 + }, + { + "epoch": 4.38, + "learning_rate": 5.455927766369351e-06, + "loss": 0.1016, + "step": 93820 + }, + { + "epoch": 4.38, + "learning_rate": 5.455143981314564e-06, + "loss": 0.2205, + "step": 93825 + }, + { + "epoch": 4.38, + "learning_rate": 5.454360196259778e-06, + "loss": 0.0554, + "step": 93830 + }, + { + "epoch": 4.38, + "learning_rate": 5.453576411204992e-06, + "loss": 0.0252, + "step": 93835 + }, + { + "epoch": 4.38, + "learning_rate": 5.452792626150205e-06, + "loss": 0.0667, + "step": 93840 + }, + { + "epoch": 4.38, + "learning_rate": 5.452008841095419e-06, + "loss": 0.0099, + "step": 93845 + }, + { + "epoch": 4.38, + "learning_rate": 5.451225056040632e-06, + "loss": 0.0591, + "step": 93850 + }, + { + "epoch": 4.38, + "learning_rate": 5.450441270985846e-06, + "loss": 0.0771, + "step": 93855 + }, + { + "epoch": 4.38, + "learning_rate": 5.449657485931059e-06, + "loss": 0.0646, + "step": 93860 + }, + { + "epoch": 4.38, + "learning_rate": 5.448873700876273e-06, + "loss": 0.0979, + "step": 93865 + }, + { + "epoch": 4.38, + "learning_rate": 5.448089915821485e-06, + "loss": 0.1096, + "step": 93870 + }, + { + "epoch": 4.38, + "learning_rate": 5.447306130766698e-06, + "loss": 0.3933, + "step": 93875 + }, + { + "epoch": 4.38, + "learning_rate": 5.446522345711912e-06, + "loss": 0.1126, + "step": 93880 + }, + { + "epoch": 4.38, + "learning_rate": 5.445738560657126e-06, + "loss": 0.022, + "step": 93885 + }, + { + "epoch": 4.38, + "learning_rate": 5.444954775602339e-06, + "loss": 0.0044, + "step": 93890 + }, + { + "epoch": 4.38, + "learning_rate": 5.444170990547553e-06, + "loss": 0.0406, + "step": 93895 + }, + { + "epoch": 4.38, + "learning_rate": 5.443387205492766e-06, + "loss": 0.042, + "step": 93900 + }, + { + "epoch": 4.38, + "learning_rate": 5.44260342043798e-06, + "loss": 0.0792, + "step": 93905 + }, + { + "epoch": 4.38, + "learning_rate": 5.441819635383193e-06, + "loss": 0.0912, + "step": 93910 + }, + { + "epoch": 4.38, + "learning_rate": 5.441035850328407e-06, + "loss": 0.1177, + "step": 93915 + }, + { + "epoch": 4.38, + "learning_rate": 5.44025206527362e-06, + "loss": 0.2086, + "step": 93920 + }, + { + "epoch": 4.38, + "learning_rate": 5.439468280218834e-06, + "loss": 0.2436, + "step": 93925 + }, + { + "epoch": 4.38, + "learning_rate": 5.438684495164047e-06, + "loss": 0.0886, + "step": 93930 + }, + { + "epoch": 4.38, + "learning_rate": 5.43790071010926e-06, + "loss": 0.0233, + "step": 93935 + }, + { + "epoch": 4.38, + "learning_rate": 5.437116925054473e-06, + "loss": 0.0095, + "step": 93940 + }, + { + "epoch": 4.38, + "learning_rate": 5.436333139999687e-06, + "loss": 0.1044, + "step": 93945 + }, + { + "epoch": 4.38, + "learning_rate": 5.4355493549449e-06, + "loss": 0.0649, + "step": 93950 + }, + { + "epoch": 4.38, + "learning_rate": 5.434765569890114e-06, + "loss": 0.0504, + "step": 93955 + }, + { + "epoch": 4.38, + "learning_rate": 5.433981784835327e-06, + "loss": 0.0792, + "step": 93960 + }, + { + "epoch": 4.38, + "learning_rate": 5.433197999780541e-06, + "loss": 0.1823, + "step": 93965 + }, + { + "epoch": 4.38, + "learning_rate": 5.432414214725754e-06, + "loss": 0.14, + "step": 93970 + }, + { + "epoch": 4.39, + "learning_rate": 5.431630429670968e-06, + "loss": 0.2092, + "step": 93975 + }, + { + "epoch": 4.39, + "learning_rate": 5.430846644616181e-06, + "loss": 0.0764, + "step": 93980 + }, + { + "epoch": 4.39, + "learning_rate": 5.430062859561395e-06, + "loss": 0.0467, + "step": 93985 + }, + { + "epoch": 4.39, + "learning_rate": 5.429279074506608e-06, + "loss": 0.0643, + "step": 93990 + }, + { + "epoch": 4.39, + "learning_rate": 5.428495289451822e-06, + "loss": 0.042, + "step": 93995 + }, + { + "epoch": 4.39, + "learning_rate": 5.427711504397034e-06, + "loss": 0.1081, + "step": 94000 + }, + { + "epoch": 4.39, + "learning_rate": 5.426927719342248e-06, + "loss": 0.0642, + "step": 94005 + }, + { + "epoch": 4.39, + "learning_rate": 5.426143934287461e-06, + "loss": 0.1115, + "step": 94010 + }, + { + "epoch": 4.39, + "learning_rate": 5.425360149232675e-06, + "loss": 0.1697, + "step": 94015 + }, + { + "epoch": 4.39, + "learning_rate": 5.424576364177888e-06, + "loss": 0.0586, + "step": 94020 + }, + { + "epoch": 4.39, + "learning_rate": 5.423792579123102e-06, + "loss": 0.1889, + "step": 94025 + }, + { + "epoch": 4.39, + "learning_rate": 5.423008794068315e-06, + "loss": 0.0461, + "step": 94030 + }, + { + "epoch": 4.39, + "learning_rate": 5.422225009013529e-06, + "loss": 0.0606, + "step": 94035 + }, + { + "epoch": 4.39, + "learning_rate": 5.421441223958742e-06, + "loss": 0.0025, + "step": 94040 + }, + { + "epoch": 4.39, + "learning_rate": 5.420657438903956e-06, + "loss": 0.0234, + "step": 94045 + }, + { + "epoch": 4.39, + "learning_rate": 5.41987365384917e-06, + "loss": 0.0614, + "step": 94050 + }, + { + "epoch": 4.39, + "learning_rate": 5.419089868794383e-06, + "loss": 0.1099, + "step": 94055 + }, + { + "epoch": 4.39, + "learning_rate": 5.4183060837395966e-06, + "loss": 0.0848, + "step": 94060 + }, + { + "epoch": 4.39, + "learning_rate": 5.417522298684809e-06, + "loss": 0.1277, + "step": 94065 + }, + { + "epoch": 4.39, + "learning_rate": 5.416738513630022e-06, + "loss": 0.1046, + "step": 94070 + }, + { + "epoch": 4.39, + "learning_rate": 5.415954728575236e-06, + "loss": 0.278, + "step": 94075 + }, + { + "epoch": 4.39, + "learning_rate": 5.415170943520449e-06, + "loss": 0.056, + "step": 94080 + }, + { + "epoch": 4.39, + "learning_rate": 5.414387158465663e-06, + "loss": 0.0217, + "step": 94085 + }, + { + "epoch": 4.39, + "learning_rate": 5.413603373410876e-06, + "loss": 0.044, + "step": 94090 + }, + { + "epoch": 4.39, + "learning_rate": 5.41281958835609e-06, + "loss": 0.0647, + "step": 94095 + }, + { + "epoch": 4.39, + "learning_rate": 5.412035803301304e-06, + "loss": 0.0422, + "step": 94100 + }, + { + "epoch": 4.39, + "learning_rate": 5.411252018246517e-06, + "loss": 0.0544, + "step": 94105 + }, + { + "epoch": 4.39, + "learning_rate": 5.4104682331917305e-06, + "loss": 0.0294, + "step": 94110 + }, + { + "epoch": 4.39, + "learning_rate": 5.409684448136944e-06, + "loss": 0.098, + "step": 94115 + }, + { + "epoch": 4.39, + "learning_rate": 5.4089006630821575e-06, + "loss": 0.1774, + "step": 94120 + }, + { + "epoch": 4.39, + "learning_rate": 5.4081168780273706e-06, + "loss": 0.9083, + "step": 94125 + }, + { + "epoch": 4.39, + "learning_rate": 5.407333092972583e-06, + "loss": 0.0559, + "step": 94130 + }, + { + "epoch": 4.39, + "learning_rate": 5.406549307917797e-06, + "loss": 0.0424, + "step": 94135 + }, + { + "epoch": 4.39, + "learning_rate": 5.40576552286301e-06, + "loss": 0.0275, + "step": 94140 + }, + { + "epoch": 4.39, + "learning_rate": 5.404981737808224e-06, + "loss": 0.0261, + "step": 94145 + }, + { + "epoch": 4.39, + "learning_rate": 5.4041979527534376e-06, + "loss": 0.1056, + "step": 94150 + }, + { + "epoch": 4.39, + "learning_rate": 5.403414167698651e-06, + "loss": 0.0855, + "step": 94155 + }, + { + "epoch": 4.39, + "learning_rate": 5.4026303826438645e-06, + "loss": 0.0668, + "step": 94160 + }, + { + "epoch": 4.39, + "learning_rate": 5.401846597589078e-06, + "loss": 0.0595, + "step": 94165 + }, + { + "epoch": 4.39, + "learning_rate": 5.4010628125342915e-06, + "loss": 0.1655, + "step": 94170 + }, + { + "epoch": 4.39, + "learning_rate": 5.4002790274795045e-06, + "loss": 0.4077, + "step": 94175 + }, + { + "epoch": 4.39, + "learning_rate": 5.3994952424247184e-06, + "loss": 0.112, + "step": 94180 + }, + { + "epoch": 4.39, + "learning_rate": 5.3987114573699315e-06, + "loss": 0.0193, + "step": 94185 + }, + { + "epoch": 4.4, + "learning_rate": 5.397927672315145e-06, + "loss": 0.0298, + "step": 94190 + }, + { + "epoch": 4.4, + "learning_rate": 5.397143887260358e-06, + "loss": 0.1002, + "step": 94195 + }, + { + "epoch": 4.4, + "learning_rate": 5.3963601022055715e-06, + "loss": 0.1018, + "step": 94200 + }, + { + "epoch": 4.4, + "learning_rate": 5.395576317150785e-06, + "loss": 0.092, + "step": 94205 + }, + { + "epoch": 4.4, + "learning_rate": 5.3947925320959985e-06, + "loss": 0.0637, + "step": 94210 + }, + { + "epoch": 4.4, + "learning_rate": 5.3940087470412116e-06, + "loss": 0.0959, + "step": 94215 + }, + { + "epoch": 4.4, + "learning_rate": 5.3932249619864255e-06, + "loss": 0.3055, + "step": 94220 + }, + { + "epoch": 4.4, + "learning_rate": 5.3924411769316385e-06, + "loss": 0.2291, + "step": 94225 + }, + { + "epoch": 4.4, + "learning_rate": 5.391657391876852e-06, + "loss": 0.0576, + "step": 94230 + }, + { + "epoch": 4.4, + "learning_rate": 5.3908736068220655e-06, + "loss": 0.0308, + "step": 94235 + }, + { + "epoch": 4.4, + "learning_rate": 5.390089821767279e-06, + "loss": 0.0825, + "step": 94240 + }, + { + "epoch": 4.4, + "learning_rate": 5.3893060367124924e-06, + "loss": 0.0327, + "step": 94245 + }, + { + "epoch": 4.4, + "learning_rate": 5.388522251657706e-06, + "loss": 0.101, + "step": 94250 + }, + { + "epoch": 4.4, + "learning_rate": 5.387738466602919e-06, + "loss": 0.1118, + "step": 94255 + }, + { + "epoch": 4.4, + "learning_rate": 5.3869546815481325e-06, + "loss": 0.0906, + "step": 94260 + }, + { + "epoch": 4.4, + "learning_rate": 5.3861708964933455e-06, + "loss": 0.1445, + "step": 94265 + }, + { + "epoch": 4.4, + "learning_rate": 5.3853871114385594e-06, + "loss": 0.1794, + "step": 94270 + }, + { + "epoch": 4.4, + "learning_rate": 5.3846033263837725e-06, + "loss": 0.3127, + "step": 94275 + }, + { + "epoch": 4.4, + "learning_rate": 5.383819541328986e-06, + "loss": 0.1108, + "step": 94280 + }, + { + "epoch": 4.4, + "learning_rate": 5.3830357562741995e-06, + "loss": 0.0173, + "step": 94285 + }, + { + "epoch": 4.4, + "learning_rate": 5.382251971219413e-06, + "loss": 0.0147, + "step": 94290 + }, + { + "epoch": 4.4, + "learning_rate": 5.381468186164626e-06, + "loss": 0.0375, + "step": 94295 + }, + { + "epoch": 4.4, + "learning_rate": 5.38068440110984e-06, + "loss": 0.0357, + "step": 94300 + }, + { + "epoch": 4.4, + "learning_rate": 5.379900616055053e-06, + "loss": 0.0637, + "step": 94305 + }, + { + "epoch": 4.4, + "learning_rate": 5.379116831000267e-06, + "loss": 0.0859, + "step": 94310 + }, + { + "epoch": 4.4, + "learning_rate": 5.378333045945481e-06, + "loss": 0.0535, + "step": 94315 + }, + { + "epoch": 4.4, + "learning_rate": 5.377549260890694e-06, + "loss": 0.0934, + "step": 94320 + }, + { + "epoch": 4.4, + "learning_rate": 5.3767654758359065e-06, + "loss": 0.1851, + "step": 94325 + }, + { + "epoch": 4.4, + "learning_rate": 5.37598169078112e-06, + "loss": 0.0937, + "step": 94330 + }, + { + "epoch": 4.4, + "learning_rate": 5.3751979057263334e-06, + "loss": 0.0283, + "step": 94335 + }, + { + "epoch": 4.4, + "learning_rate": 5.374414120671547e-06, + "loss": 0.0502, + "step": 94340 + }, + { + "epoch": 4.4, + "learning_rate": 5.37363033561676e-06, + "loss": 0.0551, + "step": 94345 + }, + { + "epoch": 4.4, + "learning_rate": 5.372846550561974e-06, + "loss": 0.0547, + "step": 94350 + }, + { + "epoch": 4.4, + "learning_rate": 5.372062765507187e-06, + "loss": 0.0846, + "step": 94355 + }, + { + "epoch": 4.4, + "learning_rate": 5.371278980452401e-06, + "loss": 0.0917, + "step": 94360 + }, + { + "epoch": 4.4, + "learning_rate": 5.370495195397615e-06, + "loss": 0.0626, + "step": 94365 + }, + { + "epoch": 4.4, + "learning_rate": 5.369711410342828e-06, + "loss": 0.3187, + "step": 94370 + }, + { + "epoch": 4.4, + "learning_rate": 5.368927625288042e-06, + "loss": 0.3131, + "step": 94375 + }, + { + "epoch": 4.4, + "learning_rate": 5.368143840233255e-06, + "loss": 0.0509, + "step": 94380 + }, + { + "epoch": 4.4, + "learning_rate": 5.367360055178469e-06, + "loss": 0.0031, + "step": 94385 + }, + { + "epoch": 4.4, + "learning_rate": 5.366576270123681e-06, + "loss": 0.042, + "step": 94390 + }, + { + "epoch": 4.4, + "learning_rate": 5.365792485068894e-06, + "loss": 0.0337, + "step": 94395 + }, + { + "epoch": 4.4, + "learning_rate": 5.365008700014108e-06, + "loss": 0.0632, + "step": 94400 + }, + { + "epoch": 4.41, + "learning_rate": 5.364224914959321e-06, + "loss": 0.0857, + "step": 94405 + }, + { + "epoch": 4.41, + "learning_rate": 5.363441129904535e-06, + "loss": 0.1284, + "step": 94410 + }, + { + "epoch": 4.41, + "learning_rate": 5.362657344849749e-06, + "loss": 0.1608, + "step": 94415 + }, + { + "epoch": 4.41, + "learning_rate": 5.361873559794962e-06, + "loss": 0.2023, + "step": 94420 + }, + { + "epoch": 4.41, + "learning_rate": 5.361089774740176e-06, + "loss": 0.3012, + "step": 94425 + }, + { + "epoch": 4.41, + "learning_rate": 5.360305989685389e-06, + "loss": 0.0722, + "step": 94430 + }, + { + "epoch": 4.41, + "learning_rate": 5.359522204630603e-06, + "loss": 0.0794, + "step": 94435 + }, + { + "epoch": 4.41, + "learning_rate": 5.358738419575816e-06, + "loss": 0.0341, + "step": 94440 + }, + { + "epoch": 4.41, + "learning_rate": 5.35795463452103e-06, + "loss": 0.0394, + "step": 94445 + }, + { + "epoch": 4.41, + "learning_rate": 5.357170849466243e-06, + "loss": 0.0824, + "step": 94450 + }, + { + "epoch": 4.41, + "learning_rate": 5.356387064411455e-06, + "loss": 0.107, + "step": 94455 + }, + { + "epoch": 4.41, + "learning_rate": 5.355603279356669e-06, + "loss": 0.1359, + "step": 94460 + }, + { + "epoch": 4.41, + "learning_rate": 5.354819494301883e-06, + "loss": 0.0842, + "step": 94465 + }, + { + "epoch": 4.41, + "learning_rate": 5.354035709247096e-06, + "loss": 0.1772, + "step": 94470 + }, + { + "epoch": 4.41, + "learning_rate": 5.35325192419231e-06, + "loss": 0.2817, + "step": 94475 + }, + { + "epoch": 4.41, + "learning_rate": 5.352468139137523e-06, + "loss": 0.0653, + "step": 94480 + }, + { + "epoch": 4.41, + "learning_rate": 5.351684354082737e-06, + "loss": 0.0257, + "step": 94485 + }, + { + "epoch": 4.41, + "learning_rate": 5.35090056902795e-06, + "loss": 0.0314, + "step": 94490 + }, + { + "epoch": 4.41, + "learning_rate": 5.350116783973164e-06, + "loss": 0.03, + "step": 94495 + }, + { + "epoch": 4.41, + "learning_rate": 5.349332998918377e-06, + "loss": 0.0837, + "step": 94500 + }, + { + "epoch": 4.41, + "learning_rate": 5.348549213863591e-06, + "loss": 0.068, + "step": 94505 + }, + { + "epoch": 4.41, + "learning_rate": 5.347765428808804e-06, + "loss": 0.1035, + "step": 94510 + }, + { + "epoch": 4.41, + "learning_rate": 5.346981643754018e-06, + "loss": 0.1012, + "step": 94515 + }, + { + "epoch": 4.41, + "learning_rate": 5.34619785869923e-06, + "loss": 0.1492, + "step": 94520 + }, + { + "epoch": 4.41, + "learning_rate": 5.345414073644444e-06, + "loss": 0.3077, + "step": 94525 + }, + { + "epoch": 4.41, + "learning_rate": 5.344630288589657e-06, + "loss": 0.1079, + "step": 94530 + }, + { + "epoch": 4.41, + "learning_rate": 5.343846503534871e-06, + "loss": 0.0021, + "step": 94535 + }, + { + "epoch": 4.41, + "learning_rate": 5.343062718480084e-06, + "loss": 0.0187, + "step": 94540 + }, + { + "epoch": 4.41, + "learning_rate": 5.342278933425298e-06, + "loss": 0.0476, + "step": 94545 + }, + { + "epoch": 4.41, + "learning_rate": 5.341495148370511e-06, + "loss": 0.0735, + "step": 94550 + }, + { + "epoch": 4.41, + "learning_rate": 5.340711363315725e-06, + "loss": 0.0585, + "step": 94555 + }, + { + "epoch": 4.41, + "learning_rate": 5.339927578260938e-06, + "loss": 0.0936, + "step": 94560 + }, + { + "epoch": 4.41, + "learning_rate": 5.339143793206152e-06, + "loss": 0.1221, + "step": 94565 + }, + { + "epoch": 4.41, + "learning_rate": 5.338360008151365e-06, + "loss": 0.1027, + "step": 94570 + }, + { + "epoch": 4.41, + "learning_rate": 5.337576223096579e-06, + "loss": 0.3455, + "step": 94575 + }, + { + "epoch": 4.41, + "learning_rate": 5.336792438041792e-06, + "loss": 0.1167, + "step": 94580 + }, + { + "epoch": 4.41, + "learning_rate": 5.336008652987005e-06, + "loss": 0.0593, + "step": 94585 + }, + { + "epoch": 4.41, + "learning_rate": 5.335224867932218e-06, + "loss": 0.0618, + "step": 94590 + }, + { + "epoch": 4.41, + "learning_rate": 5.334441082877432e-06, + "loss": 0.0215, + "step": 94595 + }, + { + "epoch": 4.41, + "learning_rate": 5.333657297822645e-06, + "loss": 0.0478, + "step": 94600 + }, + { + "epoch": 4.41, + "learning_rate": 5.332873512767859e-06, + "loss": 0.0374, + "step": 94605 + }, + { + "epoch": 4.41, + "learning_rate": 5.332089727713072e-06, + "loss": 0.1203, + "step": 94610 + }, + { + "epoch": 4.41, + "learning_rate": 5.331305942658286e-06, + "loss": 0.096, + "step": 94615 + }, + { + "epoch": 4.42, + "learning_rate": 5.330522157603499e-06, + "loss": 0.1288, + "step": 94620 + }, + { + "epoch": 4.42, + "learning_rate": 5.329738372548713e-06, + "loss": 0.3298, + "step": 94625 + }, + { + "epoch": 4.42, + "learning_rate": 5.328954587493927e-06, + "loss": 0.0654, + "step": 94630 + }, + { + "epoch": 4.42, + "learning_rate": 5.32817080243914e-06, + "loss": 0.0372, + "step": 94635 + }, + { + "epoch": 4.42, + "learning_rate": 5.327387017384354e-06, + "loss": 0.0341, + "step": 94640 + }, + { + "epoch": 4.42, + "learning_rate": 5.326603232329567e-06, + "loss": 0.1115, + "step": 94645 + }, + { + "epoch": 4.42, + "learning_rate": 5.325819447274779e-06, + "loss": 0.1194, + "step": 94650 + }, + { + "epoch": 4.42, + "learning_rate": 5.325035662219993e-06, + "loss": 0.0683, + "step": 94655 + }, + { + "epoch": 4.42, + "learning_rate": 5.324251877165206e-06, + "loss": 0.125, + "step": 94660 + }, + { + "epoch": 4.42, + "learning_rate": 5.32346809211042e-06, + "loss": 0.1841, + "step": 94665 + }, + { + "epoch": 4.42, + "learning_rate": 5.322684307055633e-06, + "loss": 0.1564, + "step": 94670 + }, + { + "epoch": 4.42, + "learning_rate": 5.321900522000847e-06, + "loss": 0.2087, + "step": 94675 + }, + { + "epoch": 4.42, + "learning_rate": 5.321116736946061e-06, + "loss": 0.067, + "step": 94680 + }, + { + "epoch": 4.42, + "learning_rate": 5.320332951891274e-06, + "loss": 0.0378, + "step": 94685 + }, + { + "epoch": 4.42, + "learning_rate": 5.319549166836488e-06, + "loss": 0.0236, + "step": 94690 + }, + { + "epoch": 4.42, + "learning_rate": 5.318765381781701e-06, + "loss": 0.0267, + "step": 94695 + }, + { + "epoch": 4.42, + "learning_rate": 5.317981596726915e-06, + "loss": 0.0426, + "step": 94700 + }, + { + "epoch": 4.42, + "learning_rate": 5.317197811672128e-06, + "loss": 0.0339, + "step": 94705 + }, + { + "epoch": 4.42, + "learning_rate": 5.316414026617342e-06, + "loss": 0.086, + "step": 94710 + }, + { + "epoch": 4.42, + "learning_rate": 5.315630241562554e-06, + "loss": 0.1495, + "step": 94715 + }, + { + "epoch": 4.42, + "learning_rate": 5.314846456507767e-06, + "loss": 0.1409, + "step": 94720 + }, + { + "epoch": 4.42, + "learning_rate": 5.314062671452981e-06, + "loss": 0.2498, + "step": 94725 + }, + { + "epoch": 4.42, + "learning_rate": 5.313278886398195e-06, + "loss": 0.1208, + "step": 94730 + }, + { + "epoch": 4.42, + "learning_rate": 5.312495101343408e-06, + "loss": 0.0606, + "step": 94735 + }, + { + "epoch": 4.42, + "learning_rate": 5.311711316288622e-06, + "loss": 0.0261, + "step": 94740 + }, + { + "epoch": 4.42, + "learning_rate": 5.310927531233835e-06, + "loss": 0.1263, + "step": 94745 + }, + { + "epoch": 4.42, + "learning_rate": 5.310143746179049e-06, + "loss": 0.0717, + "step": 94750 + }, + { + "epoch": 4.42, + "learning_rate": 5.309359961124262e-06, + "loss": 0.0305, + "step": 94755 + }, + { + "epoch": 4.42, + "learning_rate": 5.308576176069476e-06, + "loss": 0.0571, + "step": 94760 + }, + { + "epoch": 4.42, + "learning_rate": 5.307792391014689e-06, + "loss": 0.2048, + "step": 94765 + }, + { + "epoch": 4.42, + "learning_rate": 5.307008605959903e-06, + "loss": 0.1052, + "step": 94770 + }, + { + "epoch": 4.42, + "learning_rate": 5.306224820905116e-06, + "loss": 0.3887, + "step": 94775 + }, + { + "epoch": 4.42, + "learning_rate": 5.305441035850329e-06, + "loss": 0.1179, + "step": 94780 + }, + { + "epoch": 4.42, + "learning_rate": 5.304657250795542e-06, + "loss": 0.0314, + "step": 94785 + }, + { + "epoch": 4.42, + "learning_rate": 5.303873465740756e-06, + "loss": 0.0317, + "step": 94790 + }, + { + "epoch": 4.42, + "learning_rate": 5.303089680685969e-06, + "loss": 0.0489, + "step": 94795 + }, + { + "epoch": 4.42, + "learning_rate": 5.302305895631183e-06, + "loss": 0.0492, + "step": 94800 + }, + { + "epoch": 4.42, + "learning_rate": 5.301522110576396e-06, + "loss": 0.0911, + "step": 94805 + }, + { + "epoch": 4.42, + "learning_rate": 5.30073832552161e-06, + "loss": 0.073, + "step": 94810 + }, + { + "epoch": 4.42, + "learning_rate": 5.299954540466823e-06, + "loss": 0.1565, + "step": 94815 + }, + { + "epoch": 4.42, + "learning_rate": 5.2991707554120366e-06, + "loss": 0.1232, + "step": 94820 + }, + { + "epoch": 4.42, + "learning_rate": 5.29838697035725e-06, + "loss": 0.4846, + "step": 94825 + }, + { + "epoch": 4.42, + "learning_rate": 5.2976031853024635e-06, + "loss": 0.0942, + "step": 94830 + }, + { + "epoch": 4.43, + "learning_rate": 5.2968194002476766e-06, + "loss": 0.0406, + "step": 94835 + }, + { + "epoch": 4.43, + "learning_rate": 5.2960356151928905e-06, + "loss": 0.0519, + "step": 94840 + }, + { + "epoch": 4.43, + "learning_rate": 5.295251830138103e-06, + "loss": 0.0323, + "step": 94845 + }, + { + "epoch": 4.43, + "learning_rate": 5.294468045083317e-06, + "loss": 0.0845, + "step": 94850 + }, + { + "epoch": 4.43, + "learning_rate": 5.29368426002853e-06, + "loss": 0.0372, + "step": 94855 + }, + { + "epoch": 4.43, + "learning_rate": 5.2929004749737436e-06, + "loss": 0.1241, + "step": 94860 + }, + { + "epoch": 4.43, + "learning_rate": 5.292116689918957e-06, + "loss": 0.0852, + "step": 94865 + }, + { + "epoch": 4.43, + "learning_rate": 5.2913329048641705e-06, + "loss": 0.1992, + "step": 94870 + }, + { + "epoch": 4.43, + "learning_rate": 5.290549119809384e-06, + "loss": 0.3362, + "step": 94875 + }, + { + "epoch": 4.43, + "learning_rate": 5.2897653347545975e-06, + "loss": 0.0508, + "step": 94880 + }, + { + "epoch": 4.43, + "learning_rate": 5.2889815496998106e-06, + "loss": 0.017, + "step": 94885 + }, + { + "epoch": 4.43, + "learning_rate": 5.2881977646450245e-06, + "loss": 0.0439, + "step": 94890 + }, + { + "epoch": 4.43, + "learning_rate": 5.287413979590238e-06, + "loss": 0.0687, + "step": 94895 + }, + { + "epoch": 4.43, + "learning_rate": 5.286630194535451e-06, + "loss": 0.0565, + "step": 94900 + }, + { + "epoch": 4.43, + "learning_rate": 5.285846409480665e-06, + "loss": 0.0778, + "step": 94905 + }, + { + "epoch": 4.43, + "learning_rate": 5.2850626244258775e-06, + "loss": 0.0566, + "step": 94910 + }, + { + "epoch": 4.43, + "learning_rate": 5.284278839371091e-06, + "loss": 0.1712, + "step": 94915 + }, + { + "epoch": 4.43, + "learning_rate": 5.2834950543163045e-06, + "loss": 0.2344, + "step": 94920 + }, + { + "epoch": 4.43, + "learning_rate": 5.2827112692615176e-06, + "loss": 0.3864, + "step": 94925 + }, + { + "epoch": 4.43, + "learning_rate": 5.2819274842067315e-06, + "loss": 0.0749, + "step": 94930 + }, + { + "epoch": 4.43, + "learning_rate": 5.2811436991519445e-06, + "loss": 0.0081, + "step": 94935 + }, + { + "epoch": 4.43, + "learning_rate": 5.2803599140971584e-06, + "loss": 0.0292, + "step": 94940 + }, + { + "epoch": 4.43, + "learning_rate": 5.279576129042372e-06, + "loss": 0.0565, + "step": 94945 + }, + { + "epoch": 4.43, + "learning_rate": 5.278792343987585e-06, + "loss": 0.0914, + "step": 94950 + }, + { + "epoch": 4.43, + "learning_rate": 5.278008558932799e-06, + "loss": 0.0806, + "step": 94955 + }, + { + "epoch": 4.43, + "learning_rate": 5.277224773878012e-06, + "loss": 0.1211, + "step": 94960 + }, + { + "epoch": 4.43, + "learning_rate": 5.276440988823226e-06, + "loss": 0.0831, + "step": 94965 + }, + { + "epoch": 4.43, + "learning_rate": 5.275657203768439e-06, + "loss": 0.1333, + "step": 94970 + }, + { + "epoch": 4.43, + "learning_rate": 5.2748734187136515e-06, + "loss": 0.3398, + "step": 94975 + }, + { + "epoch": 4.43, + "learning_rate": 5.2740896336588654e-06, + "loss": 0.0687, + "step": 94980 + }, + { + "epoch": 4.43, + "learning_rate": 5.2733058486040785e-06, + "loss": 0.023, + "step": 94985 + }, + { + "epoch": 4.43, + "learning_rate": 5.272522063549292e-06, + "loss": 0.0358, + "step": 94990 + }, + { + "epoch": 4.43, + "learning_rate": 5.271738278494506e-06, + "loss": 0.0276, + "step": 94995 + }, + { + "epoch": 4.43, + "learning_rate": 5.270954493439719e-06, + "loss": 0.0435, + "step": 95000 + }, + { + "epoch": 4.43, + "learning_rate": 5.270170708384933e-06, + "loss": 0.0563, + "step": 95005 + }, + { + "epoch": 4.43, + "learning_rate": 5.269386923330146e-06, + "loss": 0.0479, + "step": 95010 + }, + { + "epoch": 4.43, + "learning_rate": 5.26860313827536e-06, + "loss": 0.1242, + "step": 95015 + }, + { + "epoch": 4.43, + "learning_rate": 5.267819353220573e-06, + "loss": 0.1067, + "step": 95020 + }, + { + "epoch": 4.43, + "learning_rate": 5.267035568165787e-06, + "loss": 0.187, + "step": 95025 + }, + { + "epoch": 4.43, + "learning_rate": 5.266251783111e-06, + "loss": 0.1048, + "step": 95030 + }, + { + "epoch": 4.43, + "learning_rate": 5.265467998056214e-06, + "loss": 0.0121, + "step": 95035 + }, + { + "epoch": 4.43, + "learning_rate": 5.264684213001426e-06, + "loss": 0.0388, + "step": 95040 + }, + { + "epoch": 4.43, + "learning_rate": 5.26390042794664e-06, + "loss": 0.039, + "step": 95045 + }, + { + "epoch": 4.44, + "learning_rate": 5.263116642891853e-06, + "loss": 0.0672, + "step": 95050 + }, + { + "epoch": 4.44, + "learning_rate": 5.262332857837067e-06, + "loss": 0.0662, + "step": 95055 + }, + { + "epoch": 4.44, + "learning_rate": 5.26154907278228e-06, + "loss": 0.0824, + "step": 95060 + }, + { + "epoch": 4.44, + "learning_rate": 5.260765287727494e-06, + "loss": 0.1178, + "step": 95065 + }, + { + "epoch": 4.44, + "learning_rate": 5.259981502672707e-06, + "loss": 0.1222, + "step": 95070 + }, + { + "epoch": 4.44, + "learning_rate": 5.259197717617921e-06, + "loss": 0.2457, + "step": 95075 + }, + { + "epoch": 4.44, + "learning_rate": 5.258413932563134e-06, + "loss": 0.1489, + "step": 95080 + }, + { + "epoch": 4.44, + "learning_rate": 5.257630147508348e-06, + "loss": 0.0279, + "step": 95085 + }, + { + "epoch": 4.44, + "learning_rate": 5.256846362453561e-06, + "loss": 0.0245, + "step": 95090 + }, + { + "epoch": 4.44, + "learning_rate": 5.256062577398775e-06, + "loss": 0.0522, + "step": 95095 + }, + { + "epoch": 4.44, + "learning_rate": 5.255278792343988e-06, + "loss": 0.0658, + "step": 95100 + }, + { + "epoch": 4.44, + "learning_rate": 5.254495007289201e-06, + "loss": 0.0312, + "step": 95105 + }, + { + "epoch": 4.44, + "learning_rate": 5.253711222234414e-06, + "loss": 0.0863, + "step": 95110 + }, + { + "epoch": 4.44, + "learning_rate": 5.252927437179628e-06, + "loss": 0.1052, + "step": 95115 + }, + { + "epoch": 4.44, + "learning_rate": 5.252143652124841e-06, + "loss": 0.2086, + "step": 95120 + }, + { + "epoch": 4.44, + "learning_rate": 5.251359867070055e-06, + "loss": 0.3319, + "step": 95125 + }, + { + "epoch": 4.44, + "learning_rate": 5.250576082015268e-06, + "loss": 0.0817, + "step": 95130 + }, + { + "epoch": 4.44, + "learning_rate": 5.249792296960482e-06, + "loss": 0.033, + "step": 95135 + }, + { + "epoch": 4.44, + "learning_rate": 5.249008511905695e-06, + "loss": 0.0278, + "step": 95140 + }, + { + "epoch": 4.44, + "learning_rate": 5.248224726850909e-06, + "loss": 0.0582, + "step": 95145 + }, + { + "epoch": 4.44, + "learning_rate": 5.247440941796122e-06, + "loss": 0.0525, + "step": 95150 + }, + { + "epoch": 4.44, + "learning_rate": 5.246657156741336e-06, + "loss": 0.0768, + "step": 95155 + }, + { + "epoch": 4.44, + "learning_rate": 5.245873371686549e-06, + "loss": 0.0597, + "step": 95160 + }, + { + "epoch": 4.44, + "learning_rate": 5.245089586631763e-06, + "loss": 0.1307, + "step": 95165 + }, + { + "epoch": 4.44, + "learning_rate": 5.244305801576975e-06, + "loss": 0.298, + "step": 95170 + }, + { + "epoch": 4.44, + "learning_rate": 5.243522016522189e-06, + "loss": 0.2818, + "step": 95175 + }, + { + "epoch": 4.44, + "learning_rate": 5.242738231467402e-06, + "loss": 0.0809, + "step": 95180 + }, + { + "epoch": 4.44, + "learning_rate": 5.241954446412616e-06, + "loss": 0.0503, + "step": 95185 + }, + { + "epoch": 4.44, + "learning_rate": 5.241170661357829e-06, + "loss": 0.0217, + "step": 95190 + }, + { + "epoch": 4.44, + "learning_rate": 5.240386876303043e-06, + "loss": 0.0091, + "step": 95195 + }, + { + "epoch": 4.44, + "learning_rate": 5.239603091248256e-06, + "loss": 0.2524, + "step": 95200 + }, + { + "epoch": 4.44, + "learning_rate": 5.23881930619347e-06, + "loss": 0.1054, + "step": 95205 + }, + { + "epoch": 4.44, + "learning_rate": 5.238035521138684e-06, + "loss": 0.1351, + "step": 95210 + }, + { + "epoch": 4.44, + "learning_rate": 5.237251736083897e-06, + "loss": 0.1043, + "step": 95215 + }, + { + "epoch": 4.44, + "learning_rate": 5.236467951029111e-06, + "loss": 0.1674, + "step": 95220 + }, + { + "epoch": 4.44, + "learning_rate": 5.235684165974324e-06, + "loss": 0.2654, + "step": 95225 + }, + { + "epoch": 4.44, + "learning_rate": 5.234900380919538e-06, + "loss": 0.0535, + "step": 95230 + }, + { + "epoch": 4.44, + "learning_rate": 5.23411659586475e-06, + "loss": 0.0409, + "step": 95235 + }, + { + "epoch": 4.44, + "learning_rate": 5.233332810809963e-06, + "loss": 0.0328, + "step": 95240 + }, + { + "epoch": 4.44, + "learning_rate": 5.232549025755177e-06, + "loss": 0.0177, + "step": 95245 + }, + { + "epoch": 4.44, + "learning_rate": 5.23176524070039e-06, + "loss": 0.0562, + "step": 95250 + }, + { + "epoch": 4.44, + "learning_rate": 5.230981455645604e-06, + "loss": 0.0974, + "step": 95255 + }, + { + "epoch": 4.44, + "learning_rate": 5.230197670590818e-06, + "loss": 0.1391, + "step": 95260 + }, + { + "epoch": 4.45, + "learning_rate": 5.229413885536031e-06, + "loss": 0.1046, + "step": 95265 + }, + { + "epoch": 4.45, + "learning_rate": 5.228630100481245e-06, + "loss": 0.1406, + "step": 95270 + }, + { + "epoch": 4.45, + "learning_rate": 5.227846315426458e-06, + "loss": 0.3384, + "step": 95275 + }, + { + "epoch": 4.45, + "learning_rate": 5.227062530371672e-06, + "loss": 0.0687, + "step": 95280 + }, + { + "epoch": 4.45, + "learning_rate": 5.226278745316885e-06, + "loss": 0.0178, + "step": 95285 + }, + { + "epoch": 4.45, + "learning_rate": 5.225494960262099e-06, + "loss": 0.0261, + "step": 95290 + }, + { + "epoch": 4.45, + "learning_rate": 5.224711175207312e-06, + "loss": 0.0263, + "step": 95295 + }, + { + "epoch": 4.45, + "learning_rate": 5.223927390152524e-06, + "loss": 0.026, + "step": 95300 + }, + { + "epoch": 4.45, + "learning_rate": 5.223143605097738e-06, + "loss": 0.054, + "step": 95305 + }, + { + "epoch": 4.45, + "learning_rate": 5.222359820042952e-06, + "loss": 0.1107, + "step": 95310 + }, + { + "epoch": 4.45, + "learning_rate": 5.221576034988165e-06, + "loss": 0.1421, + "step": 95315 + }, + { + "epoch": 4.45, + "learning_rate": 5.220792249933379e-06, + "loss": 0.1066, + "step": 95320 + }, + { + "epoch": 4.45, + "learning_rate": 5.220008464878592e-06, + "loss": 0.3201, + "step": 95325 + }, + { + "epoch": 4.45, + "learning_rate": 5.219224679823806e-06, + "loss": 0.1401, + "step": 95330 + }, + { + "epoch": 4.45, + "learning_rate": 5.218440894769019e-06, + "loss": 0.0372, + "step": 95335 + }, + { + "epoch": 4.45, + "learning_rate": 5.217657109714233e-06, + "loss": 0.0276, + "step": 95340 + }, + { + "epoch": 4.45, + "learning_rate": 5.216873324659446e-06, + "loss": 0.055, + "step": 95345 + }, + { + "epoch": 4.45, + "learning_rate": 5.21608953960466e-06, + "loss": 0.0375, + "step": 95350 + }, + { + "epoch": 4.45, + "learning_rate": 5.215305754549873e-06, + "loss": 0.0857, + "step": 95355 + }, + { + "epoch": 4.45, + "learning_rate": 5.214521969495087e-06, + "loss": 0.0805, + "step": 95360 + }, + { + "epoch": 4.45, + "learning_rate": 5.213738184440299e-06, + "loss": 0.0871, + "step": 95365 + }, + { + "epoch": 4.45, + "learning_rate": 5.212954399385513e-06, + "loss": 0.1381, + "step": 95370 + }, + { + "epoch": 4.45, + "learning_rate": 5.212170614330726e-06, + "loss": 0.3392, + "step": 95375 + }, + { + "epoch": 4.45, + "learning_rate": 5.21138682927594e-06, + "loss": 0.0491, + "step": 95380 + }, + { + "epoch": 4.45, + "learning_rate": 5.210603044221153e-06, + "loss": 0.0252, + "step": 95385 + }, + { + "epoch": 4.45, + "learning_rate": 5.209819259166367e-06, + "loss": 0.0284, + "step": 95390 + }, + { + "epoch": 4.45, + "learning_rate": 5.20903547411158e-06, + "loss": 0.0535, + "step": 95395 + }, + { + "epoch": 4.45, + "learning_rate": 5.208251689056794e-06, + "loss": 0.0496, + "step": 95400 + }, + { + "epoch": 4.45, + "learning_rate": 5.207467904002007e-06, + "loss": 0.0431, + "step": 95405 + }, + { + "epoch": 4.45, + "learning_rate": 5.206684118947221e-06, + "loss": 0.0613, + "step": 95410 + }, + { + "epoch": 4.45, + "learning_rate": 5.205900333892434e-06, + "loss": 0.1329, + "step": 95415 + }, + { + "epoch": 4.45, + "learning_rate": 5.205116548837648e-06, + "loss": 0.2121, + "step": 95420 + }, + { + "epoch": 4.45, + "learning_rate": 5.204332763782861e-06, + "loss": 0.1938, + "step": 95425 + }, + { + "epoch": 4.45, + "learning_rate": 5.203548978728074e-06, + "loss": 0.1142, + "step": 95430 + }, + { + "epoch": 4.45, + "learning_rate": 5.202765193673287e-06, + "loss": 0.0108, + "step": 95435 + }, + { + "epoch": 4.45, + "learning_rate": 5.201981408618501e-06, + "loss": 0.0168, + "step": 95440 + }, + { + "epoch": 4.45, + "learning_rate": 5.201197623563714e-06, + "loss": 0.078, + "step": 95445 + }, + { + "epoch": 4.45, + "learning_rate": 5.200413838508928e-06, + "loss": 0.0936, + "step": 95450 + }, + { + "epoch": 4.45, + "learning_rate": 5.199630053454141e-06, + "loss": 0.0835, + "step": 95455 + }, + { + "epoch": 4.45, + "learning_rate": 5.198846268399355e-06, + "loss": 0.144, + "step": 95460 + }, + { + "epoch": 4.45, + "learning_rate": 5.198062483344568e-06, + "loss": 0.1013, + "step": 95465 + }, + { + "epoch": 4.45, + "learning_rate": 5.197278698289782e-06, + "loss": 0.1307, + "step": 95470 + }, + { + "epoch": 4.45, + "learning_rate": 5.1964949132349955e-06, + "loss": 0.2666, + "step": 95475 + }, + { + "epoch": 4.46, + "learning_rate": 5.195711128180209e-06, + "loss": 0.1471, + "step": 95480 + }, + { + "epoch": 4.46, + "learning_rate": 5.1949273431254225e-06, + "loss": 0.0119, + "step": 95485 + }, + { + "epoch": 4.46, + "learning_rate": 5.1941435580706356e-06, + "loss": 0.0086, + "step": 95490 + }, + { + "epoch": 4.46, + "learning_rate": 5.193359773015848e-06, + "loss": 0.0188, + "step": 95495 + }, + { + "epoch": 4.46, + "learning_rate": 5.192575987961062e-06, + "loss": 0.0327, + "step": 95500 + }, + { + "epoch": 4.46, + "learning_rate": 5.191792202906275e-06, + "loss": 0.0809, + "step": 95505 + }, + { + "epoch": 4.46, + "learning_rate": 5.191008417851489e-06, + "loss": 0.1441, + "step": 95510 + }, + { + "epoch": 4.46, + "learning_rate": 5.190224632796702e-06, + "loss": 0.1278, + "step": 95515 + }, + { + "epoch": 4.46, + "learning_rate": 5.189440847741916e-06, + "loss": 0.0984, + "step": 95520 + }, + { + "epoch": 4.46, + "learning_rate": 5.1886570626871295e-06, + "loss": 0.1737, + "step": 95525 + }, + { + "epoch": 4.46, + "learning_rate": 5.1878732776323426e-06, + "loss": 0.0491, + "step": 95530 + }, + { + "epoch": 4.46, + "learning_rate": 5.1870894925775565e-06, + "loss": 0.0195, + "step": 95535 + }, + { + "epoch": 4.46, + "learning_rate": 5.1863057075227695e-06, + "loss": 0.0383, + "step": 95540 + }, + { + "epoch": 4.46, + "learning_rate": 5.1855219224679834e-06, + "loss": 0.0671, + "step": 95545 + }, + { + "epoch": 4.46, + "learning_rate": 5.1847381374131965e-06, + "loss": 0.0446, + "step": 95550 + }, + { + "epoch": 4.46, + "learning_rate": 5.18395435235841e-06, + "loss": 0.0744, + "step": 95555 + }, + { + "epoch": 4.46, + "learning_rate": 5.183170567303623e-06, + "loss": 0.1538, + "step": 95560 + }, + { + "epoch": 4.46, + "learning_rate": 5.182386782248836e-06, + "loss": 0.1187, + "step": 95565 + }, + { + "epoch": 4.46, + "learning_rate": 5.1816029971940496e-06, + "loss": 0.1129, + "step": 95570 + }, + { + "epoch": 4.46, + "learning_rate": 5.1808192121392635e-06, + "loss": 0.2866, + "step": 95575 + }, + { + "epoch": 4.46, + "learning_rate": 5.1800354270844765e-06, + "loss": 0.0671, + "step": 95580 + }, + { + "epoch": 4.46, + "learning_rate": 5.1792516420296904e-06, + "loss": 0.0258, + "step": 95585 + }, + { + "epoch": 4.46, + "learning_rate": 5.1784678569749035e-06, + "loss": 0.0303, + "step": 95590 + }, + { + "epoch": 4.46, + "learning_rate": 5.177684071920117e-06, + "loss": 0.0669, + "step": 95595 + }, + { + "epoch": 4.46, + "learning_rate": 5.1769002868653305e-06, + "loss": 0.1529, + "step": 95600 + }, + { + "epoch": 4.46, + "learning_rate": 5.176116501810544e-06, + "loss": 0.096, + "step": 95605 + }, + { + "epoch": 4.46, + "learning_rate": 5.1753327167557574e-06, + "loss": 0.038, + "step": 95610 + }, + { + "epoch": 4.46, + "learning_rate": 5.174548931700971e-06, + "loss": 0.0692, + "step": 95615 + }, + { + "epoch": 4.46, + "learning_rate": 5.173765146646184e-06, + "loss": 0.1471, + "step": 95620 + }, + { + "epoch": 4.46, + "learning_rate": 5.1729813615913975e-06, + "loss": 0.2807, + "step": 95625 + }, + { + "epoch": 4.46, + "learning_rate": 5.1721975765366105e-06, + "loss": 0.066, + "step": 95630 + }, + { + "epoch": 4.46, + "learning_rate": 5.171413791481824e-06, + "loss": 0.0502, + "step": 95635 + }, + { + "epoch": 4.46, + "learning_rate": 5.1706300064270375e-06, + "loss": 0.0317, + "step": 95640 + }, + { + "epoch": 4.46, + "learning_rate": 5.169846221372251e-06, + "loss": 0.0687, + "step": 95645 + }, + { + "epoch": 4.46, + "learning_rate": 5.1690624363174644e-06, + "loss": 0.0326, + "step": 95650 + }, + { + "epoch": 4.46, + "learning_rate": 5.168278651262678e-06, + "loss": 0.047, + "step": 95655 + }, + { + "epoch": 4.46, + "learning_rate": 5.167494866207891e-06, + "loss": 0.1274, + "step": 95660 + }, + { + "epoch": 4.46, + "learning_rate": 5.166711081153105e-06, + "loss": 0.0978, + "step": 95665 + }, + { + "epoch": 4.46, + "learning_rate": 5.165927296098318e-06, + "loss": 0.2717, + "step": 95670 + }, + { + "epoch": 4.46, + "learning_rate": 5.165143511043532e-06, + "loss": 0.2618, + "step": 95675 + }, + { + "epoch": 4.46, + "learning_rate": 5.164359725988745e-06, + "loss": 0.1012, + "step": 95680 + }, + { + "epoch": 4.46, + "learning_rate": 5.163575940933959e-06, + "loss": 0.0164, + "step": 95685 + }, + { + "epoch": 4.47, + "learning_rate": 5.1627921558791715e-06, + "loss": 0.0476, + "step": 95690 + }, + { + "epoch": 4.47, + "learning_rate": 5.162008370824385e-06, + "loss": 0.0308, + "step": 95695 + }, + { + "epoch": 4.47, + "learning_rate": 5.161224585769598e-06, + "loss": 0.0313, + "step": 95700 + }, + { + "epoch": 4.47, + "learning_rate": 5.160440800714812e-06, + "loss": 0.0541, + "step": 95705 + }, + { + "epoch": 4.47, + "learning_rate": 5.159657015660025e-06, + "loss": 0.0743, + "step": 95710 + }, + { + "epoch": 4.47, + "learning_rate": 5.158873230605239e-06, + "loss": 0.0944, + "step": 95715 + }, + { + "epoch": 4.47, + "learning_rate": 5.158089445550452e-06, + "loss": 0.1476, + "step": 95720 + }, + { + "epoch": 4.47, + "learning_rate": 5.157305660495666e-06, + "loss": 0.5071, + "step": 95725 + }, + { + "epoch": 4.47, + "learning_rate": 5.156521875440879e-06, + "loss": 0.0948, + "step": 95730 + }, + { + "epoch": 4.47, + "learning_rate": 5.155738090386093e-06, + "loss": 0.0133, + "step": 95735 + }, + { + "epoch": 4.47, + "learning_rate": 5.154954305331306e-06, + "loss": 0.034, + "step": 95740 + }, + { + "epoch": 4.47, + "learning_rate": 5.15417052027652e-06, + "loss": 0.0286, + "step": 95745 + }, + { + "epoch": 4.47, + "learning_rate": 5.153386735221734e-06, + "loss": 0.0403, + "step": 95750 + }, + { + "epoch": 4.47, + "learning_rate": 5.152602950166946e-06, + "loss": 0.0946, + "step": 95755 + }, + { + "epoch": 4.47, + "learning_rate": 5.151819165112159e-06, + "loss": 0.0878, + "step": 95760 + }, + { + "epoch": 4.47, + "learning_rate": 5.151035380057373e-06, + "loss": 0.1599, + "step": 95765 + }, + { + "epoch": 4.47, + "learning_rate": 5.150251595002586e-06, + "loss": 0.1451, + "step": 95770 + }, + { + "epoch": 4.47, + "learning_rate": 5.1494678099478e-06, + "loss": 0.2104, + "step": 95775 + }, + { + "epoch": 4.47, + "learning_rate": 5.148684024893013e-06, + "loss": 0.063, + "step": 95780 + }, + { + "epoch": 4.47, + "learning_rate": 5.147900239838227e-06, + "loss": 0.0221, + "step": 95785 + }, + { + "epoch": 4.47, + "learning_rate": 5.147116454783441e-06, + "loss": 0.0417, + "step": 95790 + }, + { + "epoch": 4.47, + "learning_rate": 5.146332669728654e-06, + "loss": 0.082, + "step": 95795 + }, + { + "epoch": 4.47, + "learning_rate": 5.145548884673868e-06, + "loss": 0.0665, + "step": 95800 + }, + { + "epoch": 4.47, + "learning_rate": 5.144765099619081e-06, + "loss": 0.0728, + "step": 95805 + }, + { + "epoch": 4.47, + "learning_rate": 5.143981314564295e-06, + "loss": 0.1377, + "step": 95810 + }, + { + "epoch": 4.47, + "learning_rate": 5.143197529509508e-06, + "loss": 0.1398, + "step": 95815 + }, + { + "epoch": 4.47, + "learning_rate": 5.14241374445472e-06, + "loss": 0.101, + "step": 95820 + }, + { + "epoch": 4.47, + "learning_rate": 5.141629959399934e-06, + "loss": 0.3332, + "step": 95825 + }, + { + "epoch": 4.47, + "learning_rate": 5.140846174345147e-06, + "loss": 0.0265, + "step": 95830 + }, + { + "epoch": 4.47, + "learning_rate": 5.140062389290361e-06, + "loss": 0.0367, + "step": 95835 + }, + { + "epoch": 4.47, + "learning_rate": 5.139278604235575e-06, + "loss": 0.0642, + "step": 95840 + }, + { + "epoch": 4.47, + "learning_rate": 5.138494819180788e-06, + "loss": 0.0294, + "step": 95845 + }, + { + "epoch": 4.47, + "learning_rate": 5.137711034126002e-06, + "loss": 0.0645, + "step": 95850 + }, + { + "epoch": 4.47, + "learning_rate": 5.136927249071215e-06, + "loss": 0.0848, + "step": 95855 + }, + { + "epoch": 4.47, + "learning_rate": 5.136143464016429e-06, + "loss": 0.0672, + "step": 95860 + }, + { + "epoch": 4.47, + "learning_rate": 5.135359678961642e-06, + "loss": 0.0753, + "step": 95865 + }, + { + "epoch": 4.47, + "learning_rate": 5.134575893906856e-06, + "loss": 0.1993, + "step": 95870 + }, + { + "epoch": 4.47, + "learning_rate": 5.133792108852069e-06, + "loss": 0.3417, + "step": 95875 + }, + { + "epoch": 4.47, + "learning_rate": 5.133008323797283e-06, + "loss": 0.1209, + "step": 95880 + }, + { + "epoch": 4.47, + "learning_rate": 5.132224538742495e-06, + "loss": 0.0442, + "step": 95885 + }, + { + "epoch": 4.47, + "learning_rate": 5.131440753687709e-06, + "loss": 0.0433, + "step": 95890 + }, + { + "epoch": 4.47, + "learning_rate": 5.130656968632922e-06, + "loss": 0.04, + "step": 95895 + }, + { + "epoch": 4.47, + "learning_rate": 5.129873183578136e-06, + "loss": 0.0476, + "step": 95900 + }, + { + "epoch": 4.48, + "learning_rate": 5.129089398523349e-06, + "loss": 0.0567, + "step": 95905 + }, + { + "epoch": 4.48, + "learning_rate": 5.128305613468563e-06, + "loss": 0.1427, + "step": 95910 + }, + { + "epoch": 4.48, + "learning_rate": 5.127521828413776e-06, + "loss": 0.1675, + "step": 95915 + }, + { + "epoch": 4.48, + "learning_rate": 5.12673804335899e-06, + "loss": 0.1834, + "step": 95920 + }, + { + "epoch": 4.48, + "learning_rate": 5.125954258304203e-06, + "loss": 0.2382, + "step": 95925 + }, + { + "epoch": 4.48, + "learning_rate": 5.125170473249417e-06, + "loss": 0.0803, + "step": 95930 + }, + { + "epoch": 4.48, + "learning_rate": 5.12438668819463e-06, + "loss": 0.0404, + "step": 95935 + }, + { + "epoch": 4.48, + "learning_rate": 5.123602903139844e-06, + "loss": 0.0186, + "step": 95940 + }, + { + "epoch": 4.48, + "learning_rate": 5.122819118085057e-06, + "loss": 0.049, + "step": 95945 + }, + { + "epoch": 4.48, + "learning_rate": 5.12203533303027e-06, + "loss": 0.0433, + "step": 95950 + }, + { + "epoch": 4.48, + "learning_rate": 5.121251547975483e-06, + "loss": 0.0587, + "step": 95955 + }, + { + "epoch": 4.48, + "learning_rate": 5.120467762920697e-06, + "loss": 0.0541, + "step": 95960 + }, + { + "epoch": 4.48, + "learning_rate": 5.11968397786591e-06, + "loss": 0.1232, + "step": 95965 + }, + { + "epoch": 4.48, + "learning_rate": 5.118900192811124e-06, + "loss": 0.215, + "step": 95970 + }, + { + "epoch": 4.48, + "learning_rate": 5.118116407756337e-06, + "loss": 0.2574, + "step": 95975 + }, + { + "epoch": 4.48, + "learning_rate": 5.117332622701551e-06, + "loss": 0.0626, + "step": 95980 + }, + { + "epoch": 4.48, + "learning_rate": 5.116548837646764e-06, + "loss": 0.0274, + "step": 95985 + }, + { + "epoch": 4.48, + "learning_rate": 5.115765052591978e-06, + "loss": 0.0429, + "step": 95990 + }, + { + "epoch": 4.48, + "learning_rate": 5.114981267537191e-06, + "loss": 0.0308, + "step": 95995 + }, + { + "epoch": 4.48, + "learning_rate": 5.114197482482405e-06, + "loss": 0.0277, + "step": 96000 + }, + { + "epoch": 4.48, + "learning_rate": 5.113413697427618e-06, + "loss": 0.0708, + "step": 96005 + }, + { + "epoch": 4.48, + "learning_rate": 5.112629912372832e-06, + "loss": 0.0683, + "step": 96010 + }, + { + "epoch": 4.48, + "learning_rate": 5.111846127318044e-06, + "loss": 0.1439, + "step": 96015 + }, + { + "epoch": 4.48, + "learning_rate": 5.111062342263258e-06, + "loss": 0.2307, + "step": 96020 + }, + { + "epoch": 4.48, + "learning_rate": 5.110278557208471e-06, + "loss": 0.2271, + "step": 96025 + }, + { + "epoch": 4.48, + "learning_rate": 5.109494772153685e-06, + "loss": 0.0971, + "step": 96030 + }, + { + "epoch": 4.48, + "learning_rate": 5.108710987098898e-06, + "loss": 0.0569, + "step": 96035 + }, + { + "epoch": 4.48, + "learning_rate": 5.107927202044112e-06, + "loss": 0.0343, + "step": 96040 + }, + { + "epoch": 4.48, + "learning_rate": 5.107143416989325e-06, + "loss": 0.0377, + "step": 96045 + }, + { + "epoch": 4.48, + "learning_rate": 5.106359631934539e-06, + "loss": 0.0933, + "step": 96050 + }, + { + "epoch": 4.48, + "learning_rate": 5.105575846879753e-06, + "loss": 0.0564, + "step": 96055 + }, + { + "epoch": 4.48, + "learning_rate": 5.104792061824966e-06, + "loss": 0.0657, + "step": 96060 + }, + { + "epoch": 4.48, + "learning_rate": 5.10400827677018e-06, + "loss": 0.0747, + "step": 96065 + }, + { + "epoch": 4.48, + "learning_rate": 5.103224491715393e-06, + "loss": 0.1499, + "step": 96070 + }, + { + "epoch": 4.48, + "learning_rate": 5.102440706660607e-06, + "loss": 0.2215, + "step": 96075 + }, + { + "epoch": 4.48, + "learning_rate": 5.101656921605819e-06, + "loss": 0.0642, + "step": 96080 + }, + { + "epoch": 4.48, + "learning_rate": 5.100873136551032e-06, + "loss": 0.0254, + "step": 96085 + }, + { + "epoch": 4.48, + "learning_rate": 5.100089351496246e-06, + "loss": 0.0405, + "step": 96090 + }, + { + "epoch": 4.48, + "learning_rate": 5.099305566441459e-06, + "loss": 0.0456, + "step": 96095 + }, + { + "epoch": 4.48, + "learning_rate": 5.098521781386673e-06, + "loss": 0.0553, + "step": 96100 + }, + { + "epoch": 4.48, + "learning_rate": 5.097737996331887e-06, + "loss": 0.0686, + "step": 96105 + }, + { + "epoch": 4.48, + "learning_rate": 5.0969542112771e-06, + "loss": 0.1628, + "step": 96110 + }, + { + "epoch": 4.48, + "learning_rate": 5.096170426222314e-06, + "loss": 0.1578, + "step": 96115 + }, + { + "epoch": 4.49, + "learning_rate": 5.095386641167527e-06, + "loss": 0.1804, + "step": 96120 + }, + { + "epoch": 4.49, + "learning_rate": 5.094602856112741e-06, + "loss": 0.2932, + "step": 96125 + }, + { + "epoch": 4.49, + "learning_rate": 5.093819071057954e-06, + "loss": 0.0573, + "step": 96130 + }, + { + "epoch": 4.49, + "learning_rate": 5.0930352860031676e-06, + "loss": 0.0141, + "step": 96135 + }, + { + "epoch": 4.49, + "learning_rate": 5.092251500948381e-06, + "loss": 0.0573, + "step": 96140 + }, + { + "epoch": 4.49, + "learning_rate": 5.091467715893593e-06, + "loss": 0.0622, + "step": 96145 + }, + { + "epoch": 4.49, + "learning_rate": 5.090683930838807e-06, + "loss": 0.0876, + "step": 96150 + }, + { + "epoch": 4.49, + "learning_rate": 5.089900145784021e-06, + "loss": 0.091, + "step": 96155 + }, + { + "epoch": 4.49, + "learning_rate": 5.089116360729234e-06, + "loss": 0.064, + "step": 96160 + }, + { + "epoch": 4.49, + "learning_rate": 5.088332575674448e-06, + "loss": 0.1033, + "step": 96165 + }, + { + "epoch": 4.49, + "learning_rate": 5.087548790619661e-06, + "loss": 0.1584, + "step": 96170 + }, + { + "epoch": 4.49, + "learning_rate": 5.0867650055648746e-06, + "loss": 0.3323, + "step": 96175 + }, + { + "epoch": 4.49, + "learning_rate": 5.085981220510088e-06, + "loss": 0.0875, + "step": 96180 + }, + { + "epoch": 4.49, + "learning_rate": 5.0851974354553015e-06, + "loss": 0.0535, + "step": 96185 + }, + { + "epoch": 4.49, + "learning_rate": 5.084413650400515e-06, + "loss": 0.0159, + "step": 96190 + }, + { + "epoch": 4.49, + "learning_rate": 5.0836298653457285e-06, + "loss": 0.0927, + "step": 96195 + }, + { + "epoch": 4.49, + "learning_rate": 5.0828460802909416e-06, + "loss": 0.0372, + "step": 96200 + }, + { + "epoch": 4.49, + "learning_rate": 5.0820622952361555e-06, + "loss": 0.0431, + "step": 96205 + }, + { + "epoch": 4.49, + "learning_rate": 5.081278510181368e-06, + "loss": 0.0643, + "step": 96210 + }, + { + "epoch": 4.49, + "learning_rate": 5.080494725126582e-06, + "loss": 0.036, + "step": 96215 + }, + { + "epoch": 4.49, + "learning_rate": 5.079710940071795e-06, + "loss": 0.1155, + "step": 96220 + }, + { + "epoch": 4.49, + "learning_rate": 5.0789271550170085e-06, + "loss": 0.2546, + "step": 96225 + }, + { + "epoch": 4.49, + "learning_rate": 5.078143369962222e-06, + "loss": 0.0981, + "step": 96230 + }, + { + "epoch": 4.49, + "learning_rate": 5.0773595849074355e-06, + "loss": 0.0233, + "step": 96235 + }, + { + "epoch": 4.49, + "learning_rate": 5.0765757998526486e-06, + "loss": 0.022, + "step": 96240 + }, + { + "epoch": 4.49, + "learning_rate": 5.0757920147978625e-06, + "loss": 0.0384, + "step": 96245 + }, + { + "epoch": 4.49, + "learning_rate": 5.0750082297430755e-06, + "loss": 0.0542, + "step": 96250 + }, + { + "epoch": 4.49, + "learning_rate": 5.0742244446882894e-06, + "loss": 0.061, + "step": 96255 + }, + { + "epoch": 4.49, + "learning_rate": 5.0734406596335025e-06, + "loss": 0.1771, + "step": 96260 + }, + { + "epoch": 4.49, + "learning_rate": 5.072656874578716e-06, + "loss": 0.0665, + "step": 96265 + }, + { + "epoch": 4.49, + "learning_rate": 5.0718730895239295e-06, + "loss": 0.1996, + "step": 96270 + }, + { + "epoch": 4.49, + "learning_rate": 5.0710893044691425e-06, + "loss": 0.2324, + "step": 96275 + }, + { + "epoch": 4.49, + "learning_rate": 5.070305519414356e-06, + "loss": 0.0724, + "step": 96280 + }, + { + "epoch": 4.49, + "learning_rate": 5.0695217343595695e-06, + "loss": 0.0442, + "step": 96285 + }, + { + "epoch": 4.49, + "learning_rate": 5.0687379493047825e-06, + "loss": 0.0201, + "step": 96290 + }, + { + "epoch": 4.49, + "learning_rate": 5.0679541642499965e-06, + "loss": 0.0359, + "step": 96295 + }, + { + "epoch": 4.49, + "learning_rate": 5.0671703791952095e-06, + "loss": 0.0216, + "step": 96300 + }, + { + "epoch": 4.49, + "learning_rate": 5.066386594140423e-06, + "loss": 0.0556, + "step": 96305 + }, + { + "epoch": 4.49, + "learning_rate": 5.0656028090856365e-06, + "loss": 0.1117, + "step": 96310 + }, + { + "epoch": 4.49, + "learning_rate": 5.06481902403085e-06, + "loss": 0.0927, + "step": 96315 + }, + { + "epoch": 4.49, + "learning_rate": 5.0640352389760634e-06, + "loss": 0.1306, + "step": 96320 + }, + { + "epoch": 4.49, + "learning_rate": 5.063251453921277e-06, + "loss": 0.4051, + "step": 96325 + }, + { + "epoch": 4.49, + "learning_rate": 5.062467668866491e-06, + "loss": 0.0428, + "step": 96330 + }, + { + "epoch": 4.5, + "learning_rate": 5.061683883811704e-06, + "loss": 0.024, + "step": 96335 + }, + { + "epoch": 4.5, + "learning_rate": 5.0609000987569165e-06, + "loss": 0.067, + "step": 96340 + }, + { + "epoch": 4.5, + "learning_rate": 5.0601163137021304e-06, + "loss": 0.0682, + "step": 96345 + }, + { + "epoch": 4.5, + "learning_rate": 5.0593325286473435e-06, + "loss": 0.0241, + "step": 96350 + }, + { + "epoch": 4.5, + "learning_rate": 5.058548743592557e-06, + "loss": 0.0588, + "step": 96355 + }, + { + "epoch": 4.5, + "learning_rate": 5.0577649585377705e-06, + "loss": 0.0678, + "step": 96360 + }, + { + "epoch": 4.5, + "learning_rate": 5.056981173482984e-06, + "loss": 0.0742, + "step": 96365 + }, + { + "epoch": 4.5, + "learning_rate": 5.056197388428198e-06, + "loss": 0.2118, + "step": 96370 + }, + { + "epoch": 4.5, + "learning_rate": 5.055413603373411e-06, + "loss": 0.1926, + "step": 96375 + }, + { + "epoch": 4.5, + "learning_rate": 5.054629818318625e-06, + "loss": 0.1095, + "step": 96380 + }, + { + "epoch": 4.5, + "learning_rate": 5.053846033263838e-06, + "loss": 0.037, + "step": 96385 + }, + { + "epoch": 4.5, + "learning_rate": 5.053062248209052e-06, + "loss": 0.0292, + "step": 96390 + }, + { + "epoch": 4.5, + "learning_rate": 5.052278463154265e-06, + "loss": 0.0827, + "step": 96395 + }, + { + "epoch": 4.5, + "learning_rate": 5.051494678099479e-06, + "loss": 0.043, + "step": 96400 + }, + { + "epoch": 4.5, + "learning_rate": 5.050710893044691e-06, + "loss": 0.0861, + "step": 96405 + }, + { + "epoch": 4.5, + "learning_rate": 5.0499271079899044e-06, + "loss": 0.0493, + "step": 96410 + }, + { + "epoch": 4.5, + "learning_rate": 5.049143322935118e-06, + "loss": 0.0618, + "step": 96415 + }, + { + "epoch": 4.5, + "learning_rate": 5.048359537880332e-06, + "loss": 0.1525, + "step": 96420 + }, + { + "epoch": 4.5, + "learning_rate": 5.047575752825545e-06, + "loss": 0.3023, + "step": 96425 + }, + { + "epoch": 4.5, + "learning_rate": 5.046791967770759e-06, + "loss": 0.0925, + "step": 96430 + }, + { + "epoch": 4.5, + "learning_rate": 5.046008182715972e-06, + "loss": 0.0056, + "step": 96435 + }, + { + "epoch": 4.5, + "learning_rate": 5.045224397661186e-06, + "loss": 0.0344, + "step": 96440 + }, + { + "epoch": 4.5, + "learning_rate": 5.044440612606399e-06, + "loss": 0.0139, + "step": 96445 + }, + { + "epoch": 4.5, + "learning_rate": 5.043656827551613e-06, + "loss": 0.0358, + "step": 96450 + }, + { + "epoch": 4.5, + "learning_rate": 5.042873042496826e-06, + "loss": 0.0974, + "step": 96455 + }, + { + "epoch": 4.5, + "learning_rate": 5.04208925744204e-06, + "loss": 0.0716, + "step": 96460 + }, + { + "epoch": 4.5, + "learning_rate": 5.041305472387253e-06, + "loss": 0.0823, + "step": 96465 + }, + { + "epoch": 4.5, + "learning_rate": 5.040521687332466e-06, + "loss": 0.1722, + "step": 96470 + }, + { + "epoch": 4.5, + "learning_rate": 5.039737902277679e-06, + "loss": 0.3896, + "step": 96475 + }, + { + "epoch": 4.5, + "learning_rate": 5.038954117222893e-06, + "loss": 0.0872, + "step": 96480 + }, + { + "epoch": 4.5, + "learning_rate": 5.038170332168106e-06, + "loss": 0.016, + "step": 96485 + }, + { + "epoch": 4.5, + "learning_rate": 5.03738654711332e-06, + "loss": 0.0659, + "step": 96490 + }, + { + "epoch": 4.5, + "learning_rate": 5.036602762058533e-06, + "loss": 0.0786, + "step": 96495 + }, + { + "epoch": 4.5, + "learning_rate": 5.035818977003747e-06, + "loss": 0.0762, + "step": 96500 + }, + { + "epoch": 4.5, + "learning_rate": 5.03503519194896e-06, + "loss": 0.1367, + "step": 96505 + }, + { + "epoch": 4.5, + "learning_rate": 5.034251406894174e-06, + "loss": 0.0624, + "step": 96510 + }, + { + "epoch": 4.5, + "learning_rate": 5.033467621839387e-06, + "loss": 0.1344, + "step": 96515 + }, + { + "epoch": 4.5, + "learning_rate": 5.032683836784601e-06, + "loss": 0.1567, + "step": 96520 + }, + { + "epoch": 4.5, + "learning_rate": 5.031900051729814e-06, + "loss": 0.2945, + "step": 96525 + }, + { + "epoch": 4.5, + "learning_rate": 5.031116266675028e-06, + "loss": 0.1039, + "step": 96530 + }, + { + "epoch": 4.5, + "learning_rate": 5.03033248162024e-06, + "loss": 0.0075, + "step": 96535 + }, + { + "epoch": 4.5, + "learning_rate": 5.029548696565454e-06, + "loss": 0.0324, + "step": 96540 + }, + { + "epoch": 4.5, + "learning_rate": 5.028764911510667e-06, + "loss": 0.047, + "step": 96545 + }, + { + "epoch": 4.51, + "learning_rate": 5.027981126455881e-06, + "loss": 0.0376, + "step": 96550 + }, + { + "epoch": 4.51, + "learning_rate": 5.027197341401094e-06, + "loss": 0.058, + "step": 96555 + }, + { + "epoch": 4.51, + "learning_rate": 5.026413556346308e-06, + "loss": 0.145, + "step": 96560 + }, + { + "epoch": 4.51, + "learning_rate": 5.025629771291521e-06, + "loss": 0.0721, + "step": 96565 + }, + { + "epoch": 4.51, + "learning_rate": 5.024845986236735e-06, + "loss": 0.196, + "step": 96570 + }, + { + "epoch": 4.51, + "learning_rate": 5.024062201181948e-06, + "loss": 0.3162, + "step": 96575 + }, + { + "epoch": 4.51, + "learning_rate": 5.023278416127162e-06, + "loss": 0.0714, + "step": 96580 + }, + { + "epoch": 4.51, + "learning_rate": 5.022494631072375e-06, + "loss": 0.0322, + "step": 96585 + }, + { + "epoch": 4.51, + "learning_rate": 5.021710846017589e-06, + "loss": 0.0425, + "step": 96590 + }, + { + "epoch": 4.51, + "learning_rate": 5.020927060962803e-06, + "loss": 0.0744, + "step": 96595 + }, + { + "epoch": 4.51, + "learning_rate": 5.020143275908015e-06, + "loss": 0.0285, + "step": 96600 + }, + { + "epoch": 4.51, + "learning_rate": 5.019359490853228e-06, + "loss": 0.123, + "step": 96605 + }, + { + "epoch": 4.51, + "learning_rate": 5.018575705798442e-06, + "loss": 0.0877, + "step": 96610 + }, + { + "epoch": 4.51, + "learning_rate": 5.017791920743655e-06, + "loss": 0.0745, + "step": 96615 + }, + { + "epoch": 4.51, + "learning_rate": 5.017008135688869e-06, + "loss": 0.1371, + "step": 96620 + }, + { + "epoch": 4.51, + "learning_rate": 5.016224350634082e-06, + "loss": 0.2937, + "step": 96625 + }, + { + "epoch": 4.51, + "learning_rate": 5.015440565579296e-06, + "loss": 0.0385, + "step": 96630 + }, + { + "epoch": 4.51, + "learning_rate": 5.01465678052451e-06, + "loss": 0.0205, + "step": 96635 + }, + { + "epoch": 4.51, + "learning_rate": 5.013872995469723e-06, + "loss": 0.071, + "step": 96640 + }, + { + "epoch": 4.51, + "learning_rate": 5.013089210414937e-06, + "loss": 0.0438, + "step": 96645 + }, + { + "epoch": 4.51, + "learning_rate": 5.01230542536015e-06, + "loss": 0.2251, + "step": 96650 + }, + { + "epoch": 4.51, + "learning_rate": 5.011521640305364e-06, + "loss": 0.1259, + "step": 96655 + }, + { + "epoch": 4.51, + "learning_rate": 5.010737855250577e-06, + "loss": 0.0676, + "step": 96660 + }, + { + "epoch": 4.51, + "learning_rate": 5.009954070195789e-06, + "loss": 0.1151, + "step": 96665 + }, + { + "epoch": 4.51, + "learning_rate": 5.009170285141003e-06, + "loss": 0.1968, + "step": 96670 + }, + { + "epoch": 4.51, + "learning_rate": 5.008386500086216e-06, + "loss": 0.3049, + "step": 96675 + }, + { + "epoch": 4.51, + "learning_rate": 5.00760271503143e-06, + "loss": 0.0801, + "step": 96680 + }, + { + "epoch": 4.51, + "learning_rate": 5.006818929976644e-06, + "loss": 0.0246, + "step": 96685 + }, + { + "epoch": 4.51, + "learning_rate": 5.006035144921857e-06, + "loss": 0.0264, + "step": 96690 + }, + { + "epoch": 4.51, + "learning_rate": 5.005251359867071e-06, + "loss": 0.0641, + "step": 96695 + }, + { + "epoch": 4.51, + "learning_rate": 5.004467574812284e-06, + "loss": 0.0993, + "step": 96700 + }, + { + "epoch": 4.51, + "learning_rate": 5.003683789757498e-06, + "loss": 0.0571, + "step": 96705 + }, + { + "epoch": 4.51, + "learning_rate": 5.002900004702711e-06, + "loss": 0.0889, + "step": 96710 + }, + { + "epoch": 4.51, + "learning_rate": 5.002116219647925e-06, + "loss": 0.0814, + "step": 96715 + }, + { + "epoch": 4.51, + "learning_rate": 5.001332434593138e-06, + "loss": 0.2205, + "step": 96720 + }, + { + "epoch": 4.51, + "learning_rate": 5.000548649538352e-06, + "loss": 0.2633, + "step": 96725 + }, + { + "epoch": 4.51, + "learning_rate": 4.999764864483565e-06, + "loss": 0.1192, + "step": 96730 + }, + { + "epoch": 4.51, + "learning_rate": 4.998981079428778e-06, + "loss": 0.0091, + "step": 96735 + }, + { + "epoch": 4.51, + "learning_rate": 4.998197294373992e-06, + "loss": 0.0728, + "step": 96740 + }, + { + "epoch": 4.51, + "learning_rate": 4.997413509319205e-06, + "loss": 0.048, + "step": 96745 + }, + { + "epoch": 4.51, + "learning_rate": 4.996629724264418e-06, + "loss": 0.0502, + "step": 96750 + }, + { + "epoch": 4.51, + "learning_rate": 4.995845939209632e-06, + "loss": 0.1211, + "step": 96755 + }, + { + "epoch": 4.51, + "learning_rate": 4.995062154154845e-06, + "loss": 0.0787, + "step": 96760 + }, + { + "epoch": 4.52, + "learning_rate": 4.994278369100059e-06, + "loss": 0.1444, + "step": 96765 + }, + { + "epoch": 4.52, + "learning_rate": 4.993494584045272e-06, + "loss": 0.1242, + "step": 96770 + }, + { + "epoch": 4.52, + "learning_rate": 4.992710798990486e-06, + "loss": 0.313, + "step": 96775 + }, + { + "epoch": 4.52, + "learning_rate": 4.991927013935699e-06, + "loss": 0.0916, + "step": 96780 + }, + { + "epoch": 4.52, + "learning_rate": 4.991143228880912e-06, + "loss": 0.0141, + "step": 96785 + }, + { + "epoch": 4.52, + "learning_rate": 4.990359443826126e-06, + "loss": 0.0885, + "step": 96790 + }, + { + "epoch": 4.52, + "learning_rate": 4.989575658771339e-06, + "loss": 0.0595, + "step": 96795 + }, + { + "epoch": 4.52, + "learning_rate": 4.988791873716553e-06, + "loss": 0.0519, + "step": 96800 + }, + { + "epoch": 4.52, + "learning_rate": 4.988008088661766e-06, + "loss": 0.0709, + "step": 96805 + }, + { + "epoch": 4.52, + "learning_rate": 4.987224303606979e-06, + "loss": 0.0593, + "step": 96810 + }, + { + "epoch": 4.52, + "learning_rate": 4.986440518552193e-06, + "loss": 0.0647, + "step": 96815 + }, + { + "epoch": 4.52, + "learning_rate": 4.985656733497406e-06, + "loss": 0.1616, + "step": 96820 + }, + { + "epoch": 4.52, + "learning_rate": 4.98487294844262e-06, + "loss": 0.1433, + "step": 96825 + }, + { + "epoch": 4.52, + "learning_rate": 4.984089163387833e-06, + "loss": 0.0904, + "step": 96830 + }, + { + "epoch": 4.52, + "learning_rate": 4.983305378333047e-06, + "loss": 0.0042, + "step": 96835 + }, + { + "epoch": 4.52, + "learning_rate": 4.982678350289217e-06, + "loss": 0.0378, + "step": 96840 + }, + { + "epoch": 4.52, + "learning_rate": 4.98189456523443e-06, + "loss": 0.0806, + "step": 96845 + }, + { + "epoch": 4.52, + "learning_rate": 4.981110780179644e-06, + "loss": 0.0883, + "step": 96850 + }, + { + "epoch": 4.52, + "learning_rate": 4.980326995124857e-06, + "loss": 0.0702, + "step": 96855 + }, + { + "epoch": 4.52, + "learning_rate": 4.979543210070071e-06, + "loss": 0.0795, + "step": 96860 + }, + { + "epoch": 4.52, + "learning_rate": 4.978759425015284e-06, + "loss": 0.2138, + "step": 96865 + }, + { + "epoch": 4.52, + "learning_rate": 4.977975639960498e-06, + "loss": 0.0481, + "step": 96870 + }, + { + "epoch": 4.52, + "learning_rate": 4.977191854905711e-06, + "loss": 0.3093, + "step": 96875 + }, + { + "epoch": 4.52, + "learning_rate": 4.976408069850924e-06, + "loss": 0.0415, + "step": 96880 + }, + { + "epoch": 4.52, + "learning_rate": 4.975624284796138e-06, + "loss": 0.0263, + "step": 96885 + }, + { + "epoch": 4.52, + "learning_rate": 4.974840499741351e-06, + "loss": 0.0565, + "step": 96890 + }, + { + "epoch": 4.52, + "learning_rate": 4.974056714686565e-06, + "loss": 0.0206, + "step": 96895 + }, + { + "epoch": 4.52, + "learning_rate": 4.973272929631778e-06, + "loss": 0.0273, + "step": 96900 + }, + { + "epoch": 4.52, + "learning_rate": 4.972489144576992e-06, + "loss": 0.0562, + "step": 96905 + }, + { + "epoch": 4.52, + "learning_rate": 4.971705359522205e-06, + "loss": 0.1214, + "step": 96910 + }, + { + "epoch": 4.52, + "learning_rate": 4.970921574467418e-06, + "loss": 0.059, + "step": 96915 + }, + { + "epoch": 4.52, + "learning_rate": 4.970137789412632e-06, + "loss": 0.1313, + "step": 96920 + }, + { + "epoch": 4.52, + "learning_rate": 4.969354004357845e-06, + "loss": 0.1764, + "step": 96925 + }, + { + "epoch": 4.52, + "learning_rate": 4.968570219303059e-06, + "loss": 0.0834, + "step": 96930 + }, + { + "epoch": 4.52, + "learning_rate": 4.967786434248272e-06, + "loss": 0.0578, + "step": 96935 + }, + { + "epoch": 4.52, + "learning_rate": 4.967002649193486e-06, + "loss": 0.0122, + "step": 96940 + }, + { + "epoch": 4.52, + "learning_rate": 4.966218864138699e-06, + "loss": 0.0402, + "step": 96945 + }, + { + "epoch": 4.52, + "learning_rate": 4.965435079083912e-06, + "loss": 0.0754, + "step": 96950 + }, + { + "epoch": 4.52, + "learning_rate": 4.964651294029126e-06, + "loss": 0.1326, + "step": 96955 + }, + { + "epoch": 4.52, + "learning_rate": 4.963867508974339e-06, + "loss": 0.0854, + "step": 96960 + }, + { + "epoch": 4.52, + "learning_rate": 4.963083723919553e-06, + "loss": 0.1199, + "step": 96965 + }, + { + "epoch": 4.52, + "learning_rate": 4.962299938864767e-06, + "loss": 0.157, + "step": 96970 + }, + { + "epoch": 4.52, + "learning_rate": 4.961516153809979e-06, + "loss": 0.2244, + "step": 96975 + }, + { + "epoch": 4.53, + "learning_rate": 4.960732368755193e-06, + "loss": 0.0522, + "step": 96980 + }, + { + "epoch": 4.53, + "learning_rate": 4.959948583700406e-06, + "loss": 0.03, + "step": 96985 + }, + { + "epoch": 4.53, + "learning_rate": 4.95916479864562e-06, + "loss": 0.0559, + "step": 96990 + }, + { + "epoch": 4.53, + "learning_rate": 4.958381013590834e-06, + "loss": 0.029, + "step": 96995 + }, + { + "epoch": 4.53, + "learning_rate": 4.957597228536047e-06, + "loss": 0.017, + "step": 97000 + }, + { + "epoch": 4.53, + "learning_rate": 4.956813443481261e-06, + "loss": 0.0463, + "step": 97005 + }, + { + "epoch": 4.53, + "learning_rate": 4.956029658426473e-06, + "loss": 0.0514, + "step": 97010 + }, + { + "epoch": 4.53, + "learning_rate": 4.955245873371687e-06, + "loss": 0.1119, + "step": 97015 + }, + { + "epoch": 4.53, + "learning_rate": 4.954462088316901e-06, + "loss": 0.1411, + "step": 97020 + }, + { + "epoch": 4.53, + "learning_rate": 4.953678303262114e-06, + "loss": 0.4395, + "step": 97025 + }, + { + "epoch": 4.53, + "learning_rate": 4.952894518207328e-06, + "loss": 0.1218, + "step": 97030 + }, + { + "epoch": 4.53, + "learning_rate": 4.952110733152541e-06, + "loss": 0.0304, + "step": 97035 + }, + { + "epoch": 4.53, + "learning_rate": 4.951326948097754e-06, + "loss": 0.0219, + "step": 97040 + }, + { + "epoch": 4.53, + "learning_rate": 4.950543163042968e-06, + "loss": 0.0266, + "step": 97045 + }, + { + "epoch": 4.53, + "learning_rate": 4.949759377988181e-06, + "loss": 0.0438, + "step": 97050 + }, + { + "epoch": 4.53, + "learning_rate": 4.948975592933395e-06, + "loss": 0.0769, + "step": 97055 + }, + { + "epoch": 4.53, + "learning_rate": 4.948191807878608e-06, + "loss": 0.0346, + "step": 97060 + }, + { + "epoch": 4.53, + "learning_rate": 4.947408022823822e-06, + "loss": 0.1147, + "step": 97065 + }, + { + "epoch": 4.53, + "learning_rate": 4.946624237769035e-06, + "loss": 0.1371, + "step": 97070 + }, + { + "epoch": 4.53, + "learning_rate": 4.945840452714248e-06, + "loss": 0.3511, + "step": 97075 + }, + { + "epoch": 4.53, + "learning_rate": 4.945056667659462e-06, + "loss": 0.1057, + "step": 97080 + }, + { + "epoch": 4.53, + "learning_rate": 4.944272882604675e-06, + "loss": 0.0914, + "step": 97085 + }, + { + "epoch": 4.53, + "learning_rate": 4.943489097549889e-06, + "loss": 0.0389, + "step": 97090 + }, + { + "epoch": 4.53, + "learning_rate": 4.942705312495102e-06, + "loss": 0.0314, + "step": 97095 + }, + { + "epoch": 4.53, + "learning_rate": 4.941921527440316e-06, + "loss": 0.0409, + "step": 97100 + }, + { + "epoch": 4.53, + "learning_rate": 4.941137742385529e-06, + "loss": 0.0336, + "step": 97105 + }, + { + "epoch": 4.53, + "learning_rate": 4.940353957330742e-06, + "loss": 0.0996, + "step": 97110 + }, + { + "epoch": 4.53, + "learning_rate": 4.939570172275956e-06, + "loss": 0.1607, + "step": 97115 + }, + { + "epoch": 4.53, + "learning_rate": 4.938786387221169e-06, + "loss": 0.1231, + "step": 97120 + }, + { + "epoch": 4.53, + "learning_rate": 4.938002602166383e-06, + "loss": 0.3094, + "step": 97125 + }, + { + "epoch": 4.53, + "learning_rate": 4.937218817111596e-06, + "loss": 0.0607, + "step": 97130 + }, + { + "epoch": 4.53, + "learning_rate": 4.9364350320568095e-06, + "loss": 0.0167, + "step": 97135 + }, + { + "epoch": 4.53, + "learning_rate": 4.935651247002023e-06, + "loss": 0.0365, + "step": 97140 + }, + { + "epoch": 4.53, + "learning_rate": 4.934867461947236e-06, + "loss": 0.0406, + "step": 97145 + }, + { + "epoch": 4.53, + "learning_rate": 4.9340836768924496e-06, + "loss": 0.0798, + "step": 97150 + }, + { + "epoch": 4.53, + "learning_rate": 4.933299891837663e-06, + "loss": 0.0943, + "step": 97155 + }, + { + "epoch": 4.53, + "learning_rate": 4.9325161067828765e-06, + "loss": 0.1016, + "step": 97160 + }, + { + "epoch": 4.53, + "learning_rate": 4.93173232172809e-06, + "loss": 0.1101, + "step": 97165 + }, + { + "epoch": 4.53, + "learning_rate": 4.930948536673303e-06, + "loss": 0.1823, + "step": 97170 + }, + { + "epoch": 4.53, + "learning_rate": 4.9301647516185166e-06, + "loss": 0.3086, + "step": 97175 + }, + { + "epoch": 4.53, + "learning_rate": 4.92938096656373e-06, + "loss": 0.0975, + "step": 97180 + }, + { + "epoch": 4.53, + "learning_rate": 4.9285971815089435e-06, + "loss": 0.0212, + "step": 97185 + }, + { + "epoch": 4.54, + "learning_rate": 4.9278133964541566e-06, + "loss": 0.0369, + "step": 97190 + }, + { + "epoch": 4.54, + "learning_rate": 4.9270296113993705e-06, + "loss": 0.0369, + "step": 97195 + }, + { + "epoch": 4.54, + "learning_rate": 4.9262458263445835e-06, + "loss": 0.0732, + "step": 97200 + }, + { + "epoch": 4.54, + "learning_rate": 4.925462041289797e-06, + "loss": 0.0529, + "step": 97205 + }, + { + "epoch": 4.54, + "learning_rate": 4.9246782562350105e-06, + "loss": 0.1272, + "step": 97210 + }, + { + "epoch": 4.54, + "learning_rate": 4.9238944711802236e-06, + "loss": 0.0712, + "step": 97215 + }, + { + "epoch": 4.54, + "learning_rate": 4.9231106861254375e-06, + "loss": 0.1005, + "step": 97220 + }, + { + "epoch": 4.54, + "learning_rate": 4.9223269010706505e-06, + "loss": 0.2915, + "step": 97225 + }, + { + "epoch": 4.54, + "learning_rate": 4.9215431160158644e-06, + "loss": 0.1004, + "step": 97230 + }, + { + "epoch": 4.54, + "learning_rate": 4.9207593309610775e-06, + "loss": 0.0223, + "step": 97235 + }, + { + "epoch": 4.54, + "learning_rate": 4.9199755459062906e-06, + "loss": 0.0694, + "step": 97240 + }, + { + "epoch": 4.54, + "learning_rate": 4.9191917608515045e-06, + "loss": 0.0434, + "step": 97245 + }, + { + "epoch": 4.54, + "learning_rate": 4.9184079757967175e-06, + "loss": 0.0165, + "step": 97250 + }, + { + "epoch": 4.54, + "learning_rate": 4.917780947752889e-06, + "loss": 0.0741, + "step": 97255 + }, + { + "epoch": 4.54, + "learning_rate": 4.916997162698102e-06, + "loss": 0.0778, + "step": 97260 + }, + { + "epoch": 4.54, + "learning_rate": 4.916213377643316e-06, + "loss": 0.1341, + "step": 97265 + }, + { + "epoch": 4.54, + "learning_rate": 4.915429592588529e-06, + "loss": 0.1537, + "step": 97270 + }, + { + "epoch": 4.54, + "learning_rate": 4.914645807533742e-06, + "loss": 0.148, + "step": 97275 + }, + { + "epoch": 4.54, + "learning_rate": 4.913862022478956e-06, + "loss": 0.0608, + "step": 97280 + }, + { + "epoch": 4.54, + "learning_rate": 4.913078237424169e-06, + "loss": 0.0336, + "step": 97285 + }, + { + "epoch": 4.54, + "learning_rate": 4.912294452369383e-06, + "loss": 0.036, + "step": 97290 + }, + { + "epoch": 4.54, + "learning_rate": 4.911510667314597e-06, + "loss": 0.0348, + "step": 97295 + }, + { + "epoch": 4.54, + "learning_rate": 4.910726882259809e-06, + "loss": 0.02, + "step": 97300 + }, + { + "epoch": 4.54, + "learning_rate": 4.909943097205023e-06, + "loss": 0.045, + "step": 97305 + }, + { + "epoch": 4.54, + "learning_rate": 4.909159312150236e-06, + "loss": 0.0736, + "step": 97310 + }, + { + "epoch": 4.54, + "learning_rate": 4.90837552709545e-06, + "loss": 0.1461, + "step": 97315 + }, + { + "epoch": 4.54, + "learning_rate": 4.907591742040664e-06, + "loss": 0.1316, + "step": 97320 + }, + { + "epoch": 4.54, + "learning_rate": 4.906807956985877e-06, + "loss": 0.2203, + "step": 97325 + }, + { + "epoch": 4.54, + "learning_rate": 4.906024171931091e-06, + "loss": 0.1042, + "step": 97330 + }, + { + "epoch": 4.54, + "learning_rate": 4.905240386876303e-06, + "loss": 0.0091, + "step": 97335 + }, + { + "epoch": 4.54, + "learning_rate": 4.904456601821517e-06, + "loss": 0.0188, + "step": 97340 + }, + { + "epoch": 4.54, + "learning_rate": 4.903672816766731e-06, + "loss": 0.0192, + "step": 97345 + }, + { + "epoch": 4.54, + "learning_rate": 4.902889031711944e-06, + "loss": 0.0486, + "step": 97350 + }, + { + "epoch": 4.54, + "learning_rate": 4.902105246657158e-06, + "loss": 0.1115, + "step": 97355 + }, + { + "epoch": 4.54, + "learning_rate": 4.901321461602371e-06, + "loss": 0.1391, + "step": 97360 + }, + { + "epoch": 4.54, + "learning_rate": 4.900537676547584e-06, + "loss": 0.1454, + "step": 97365 + }, + { + "epoch": 4.54, + "learning_rate": 4.899753891492798e-06, + "loss": 0.0928, + "step": 97370 + }, + { + "epoch": 4.54, + "learning_rate": 4.898970106438011e-06, + "loss": 0.4642, + "step": 97375 + }, + { + "epoch": 4.54, + "learning_rate": 4.898186321383225e-06, + "loss": 0.1042, + "step": 97380 + }, + { + "epoch": 4.54, + "learning_rate": 4.897402536328438e-06, + "loss": 0.0262, + "step": 97385 + }, + { + "epoch": 4.54, + "learning_rate": 4.8966187512736516e-06, + "loss": 0.0667, + "step": 97390 + }, + { + "epoch": 4.54, + "learning_rate": 4.895834966218865e-06, + "loss": 0.0581, + "step": 97395 + }, + { + "epoch": 4.54, + "learning_rate": 4.895051181164078e-06, + "loss": 0.0437, + "step": 97400 + }, + { + "epoch": 4.55, + "learning_rate": 4.894267396109292e-06, + "loss": 0.0215, + "step": 97405 + }, + { + "epoch": 4.55, + "learning_rate": 4.893483611054505e-06, + "loss": 0.0662, + "step": 97410 + }, + { + "epoch": 4.55, + "learning_rate": 4.8926998259997185e-06, + "loss": 0.1121, + "step": 97415 + }, + { + "epoch": 4.55, + "learning_rate": 4.891916040944932e-06, + "loss": 0.1535, + "step": 97420 + }, + { + "epoch": 4.55, + "learning_rate": 4.8911322558901455e-06, + "loss": 0.3421, + "step": 97425 + }, + { + "epoch": 4.55, + "learning_rate": 4.8903484708353586e-06, + "loss": 0.0633, + "step": 97430 + }, + { + "epoch": 4.55, + "learning_rate": 4.889564685780572e-06, + "loss": 0.0489, + "step": 97435 + }, + { + "epoch": 4.55, + "learning_rate": 4.8887809007257855e-06, + "loss": 0.0765, + "step": 97440 + }, + { + "epoch": 4.55, + "learning_rate": 4.887997115670999e-06, + "loss": 0.0729, + "step": 97445 + }, + { + "epoch": 4.55, + "learning_rate": 4.8872133306162125e-06, + "loss": 0.0675, + "step": 97450 + }, + { + "epoch": 4.55, + "learning_rate": 4.8864295455614256e-06, + "loss": 0.0514, + "step": 97455 + }, + { + "epoch": 4.55, + "learning_rate": 4.8856457605066395e-06, + "loss": 0.0788, + "step": 97460 + }, + { + "epoch": 4.55, + "learning_rate": 4.8848619754518525e-06, + "loss": 0.042, + "step": 97465 + }, + { + "epoch": 4.55, + "learning_rate": 4.884078190397066e-06, + "loss": 0.1569, + "step": 97470 + }, + { + "epoch": 4.55, + "learning_rate": 4.8832944053422795e-06, + "loss": 0.3501, + "step": 97475 + }, + { + "epoch": 4.55, + "learning_rate": 4.8825106202874925e-06, + "loss": 0.098, + "step": 97480 + }, + { + "epoch": 4.55, + "learning_rate": 4.8817268352327065e-06, + "loss": 0.0277, + "step": 97485 + }, + { + "epoch": 4.55, + "learning_rate": 4.8809430501779195e-06, + "loss": 0.0588, + "step": 97490 + }, + { + "epoch": 4.55, + "learning_rate": 4.8801592651231326e-06, + "loss": 0.0777, + "step": 97495 + }, + { + "epoch": 4.55, + "learning_rate": 4.8793754800683465e-06, + "loss": 0.0718, + "step": 97500 + }, + { + "epoch": 4.55, + "learning_rate": 4.8785916950135595e-06, + "loss": 0.1042, + "step": 97505 + }, + { + "epoch": 4.55, + "learning_rate": 4.8778079099587734e-06, + "loss": 0.0908, + "step": 97510 + }, + { + "epoch": 4.55, + "learning_rate": 4.8770241249039865e-06, + "loss": 0.1177, + "step": 97515 + }, + { + "epoch": 4.55, + "learning_rate": 4.8762403398492e-06, + "loss": 0.1094, + "step": 97520 + }, + { + "epoch": 4.55, + "learning_rate": 4.8754565547944135e-06, + "loss": 0.3185, + "step": 97525 + }, + { + "epoch": 4.55, + "learning_rate": 4.8746727697396265e-06, + "loss": 0.0354, + "step": 97530 + }, + { + "epoch": 4.55, + "learning_rate": 4.8738889846848404e-06, + "loss": 0.0217, + "step": 97535 + }, + { + "epoch": 4.55, + "learning_rate": 4.8731051996300535e-06, + "loss": 0.0749, + "step": 97540 + }, + { + "epoch": 4.55, + "learning_rate": 4.872321414575267e-06, + "loss": 0.0125, + "step": 97545 + }, + { + "epoch": 4.55, + "learning_rate": 4.8715376295204805e-06, + "loss": 0.0637, + "step": 97550 + }, + { + "epoch": 4.55, + "learning_rate": 4.870753844465694e-06, + "loss": 0.0765, + "step": 97555 + }, + { + "epoch": 4.55, + "learning_rate": 4.869970059410907e-06, + "loss": 0.0634, + "step": 97560 + }, + { + "epoch": 4.55, + "learning_rate": 4.8691862743561205e-06, + "loss": 0.064, + "step": 97565 + }, + { + "epoch": 4.55, + "learning_rate": 4.868402489301334e-06, + "loss": 0.2301, + "step": 97570 + }, + { + "epoch": 4.55, + "learning_rate": 4.8676187042465474e-06, + "loss": 0.2494, + "step": 97575 + }, + { + "epoch": 4.55, + "learning_rate": 4.866834919191761e-06, + "loss": 0.1085, + "step": 97580 + }, + { + "epoch": 4.55, + "learning_rate": 4.866051134136975e-06, + "loss": 0.0254, + "step": 97585 + }, + { + "epoch": 4.55, + "learning_rate": 4.865267349082188e-06, + "loss": 0.0543, + "step": 97590 + }, + { + "epoch": 4.55, + "learning_rate": 4.864483564027401e-06, + "loss": 0.0431, + "step": 97595 + }, + { + "epoch": 4.55, + "learning_rate": 4.8636997789726144e-06, + "loss": 0.0635, + "step": 97600 + }, + { + "epoch": 4.55, + "learning_rate": 4.862915993917828e-06, + "loss": 0.0282, + "step": 97605 + }, + { + "epoch": 4.55, + "learning_rate": 4.862132208863042e-06, + "loss": 0.142, + "step": 97610 + }, + { + "epoch": 4.55, + "learning_rate": 4.861348423808255e-06, + "loss": 0.1611, + "step": 97615 + }, + { + "epoch": 4.56, + "learning_rate": 4.860564638753469e-06, + "loss": 0.1703, + "step": 97620 + }, + { + "epoch": 4.56, + "learning_rate": 4.859780853698681e-06, + "loss": 0.2248, + "step": 97625 + }, + { + "epoch": 4.56, + "learning_rate": 4.858997068643895e-06, + "loss": 0.0997, + "step": 97630 + }, + { + "epoch": 4.56, + "learning_rate": 4.858213283589109e-06, + "loss": 0.0345, + "step": 97635 + }, + { + "epoch": 4.56, + "learning_rate": 4.857429498534322e-06, + "loss": 0.0514, + "step": 97640 + }, + { + "epoch": 4.56, + "learning_rate": 4.856645713479536e-06, + "loss": 0.0553, + "step": 97645 + }, + { + "epoch": 4.56, + "learning_rate": 4.855861928424749e-06, + "loss": 0.0776, + "step": 97650 + }, + { + "epoch": 4.56, + "learning_rate": 4.855078143369963e-06, + "loss": 0.0566, + "step": 97655 + }, + { + "epoch": 4.56, + "learning_rate": 4.854294358315176e-06, + "loss": 0.1198, + "step": 97660 + }, + { + "epoch": 4.56, + "learning_rate": 4.853510573260389e-06, + "loss": 0.123, + "step": 97665 + }, + { + "epoch": 4.56, + "learning_rate": 4.852726788205603e-06, + "loss": 0.2919, + "step": 97670 + }, + { + "epoch": 4.56, + "learning_rate": 4.851943003150816e-06, + "loss": 0.1835, + "step": 97675 + }, + { + "epoch": 4.56, + "learning_rate": 4.85115921809603e-06, + "loss": 0.0758, + "step": 97680 + }, + { + "epoch": 4.56, + "learning_rate": 4.850375433041243e-06, + "loss": 0.0462, + "step": 97685 + }, + { + "epoch": 4.56, + "learning_rate": 4.849591647986456e-06, + "loss": 0.0409, + "step": 97690 + }, + { + "epoch": 4.56, + "learning_rate": 4.84880786293167e-06, + "loss": 0.0213, + "step": 97695 + }, + { + "epoch": 4.56, + "learning_rate": 4.848024077876883e-06, + "loss": 0.044, + "step": 97700 + }, + { + "epoch": 4.56, + "learning_rate": 4.847240292822097e-06, + "loss": 0.0829, + "step": 97705 + }, + { + "epoch": 4.56, + "learning_rate": 4.84645650776731e-06, + "loss": 0.113, + "step": 97710 + }, + { + "epoch": 4.56, + "learning_rate": 4.845672722712524e-06, + "loss": 0.1968, + "step": 97715 + }, + { + "epoch": 4.56, + "learning_rate": 4.844888937657737e-06, + "loss": 0.2569, + "step": 97720 + }, + { + "epoch": 4.56, + "learning_rate": 4.84410515260295e-06, + "loss": 0.3715, + "step": 97725 + }, + { + "epoch": 4.56, + "learning_rate": 4.843321367548164e-06, + "loss": 0.077, + "step": 97730 + }, + { + "epoch": 4.56, + "learning_rate": 4.842537582493377e-06, + "loss": 0.0426, + "step": 97735 + }, + { + "epoch": 4.56, + "learning_rate": 4.841753797438591e-06, + "loss": 0.0269, + "step": 97740 + }, + { + "epoch": 4.56, + "learning_rate": 4.840970012383804e-06, + "loss": 0.0554, + "step": 97745 + }, + { + "epoch": 4.56, + "learning_rate": 4.840186227329018e-06, + "loss": 0.1202, + "step": 97750 + }, + { + "epoch": 4.56, + "learning_rate": 4.839402442274231e-06, + "loss": 0.0734, + "step": 97755 + }, + { + "epoch": 4.56, + "learning_rate": 4.838618657219444e-06, + "loss": 0.1045, + "step": 97760 + }, + { + "epoch": 4.56, + "learning_rate": 4.837834872164658e-06, + "loss": 0.1338, + "step": 97765 + }, + { + "epoch": 4.56, + "learning_rate": 4.837051087109871e-06, + "loss": 0.1308, + "step": 97770 + }, + { + "epoch": 4.56, + "learning_rate": 4.836267302055085e-06, + "loss": 0.2176, + "step": 97775 + }, + { + "epoch": 4.56, + "learning_rate": 4.835483517000298e-06, + "loss": 0.0666, + "step": 97780 + }, + { + "epoch": 4.56, + "learning_rate": 4.834699731945512e-06, + "loss": 0.0196, + "step": 97785 + }, + { + "epoch": 4.56, + "learning_rate": 4.833915946890725e-06, + "loss": 0.0603, + "step": 97790 + }, + { + "epoch": 4.56, + "learning_rate": 4.833132161835938e-06, + "loss": 0.0616, + "step": 97795 + }, + { + "epoch": 4.56, + "learning_rate": 4.832348376781152e-06, + "loss": 0.0545, + "step": 97800 + }, + { + "epoch": 4.56, + "learning_rate": 4.831564591726365e-06, + "loss": 0.0742, + "step": 97805 + }, + { + "epoch": 4.56, + "learning_rate": 4.830780806671579e-06, + "loss": 0.065, + "step": 97810 + }, + { + "epoch": 4.56, + "learning_rate": 4.829997021616792e-06, + "loss": 0.1645, + "step": 97815 + }, + { + "epoch": 4.56, + "learning_rate": 4.829213236562005e-06, + "loss": 0.1676, + "step": 97820 + }, + { + "epoch": 4.56, + "learning_rate": 4.828429451507219e-06, + "loss": 0.2441, + "step": 97825 + }, + { + "epoch": 4.56, + "learning_rate": 4.827645666452432e-06, + "loss": 0.0652, + "step": 97830 + }, + { + "epoch": 4.57, + "learning_rate": 4.826861881397646e-06, + "loss": 0.0025, + "step": 97835 + }, + { + "epoch": 4.57, + "learning_rate": 4.826078096342859e-06, + "loss": 0.0241, + "step": 97840 + }, + { + "epoch": 4.57, + "learning_rate": 4.825294311288073e-06, + "loss": 0.0687, + "step": 97845 + }, + { + "epoch": 4.57, + "learning_rate": 4.824510526233286e-06, + "loss": 0.0281, + "step": 97850 + }, + { + "epoch": 4.57, + "learning_rate": 4.823726741178499e-06, + "loss": 0.0065, + "step": 97855 + }, + { + "epoch": 4.57, + "learning_rate": 4.822942956123713e-06, + "loss": 0.132, + "step": 97860 + }, + { + "epoch": 4.57, + "learning_rate": 4.822159171068926e-06, + "loss": 0.0904, + "step": 97865 + }, + { + "epoch": 4.57, + "learning_rate": 4.82137538601414e-06, + "loss": 0.133, + "step": 97870 + }, + { + "epoch": 4.57, + "learning_rate": 4.820591600959354e-06, + "loss": 0.308, + "step": 97875 + }, + { + "epoch": 4.57, + "learning_rate": 4.819807815904567e-06, + "loss": 0.1002, + "step": 97880 + }, + { + "epoch": 4.57, + "learning_rate": 4.81902403084978e-06, + "loss": 0.0092, + "step": 97885 + }, + { + "epoch": 4.57, + "learning_rate": 4.818240245794993e-06, + "loss": 0.0551, + "step": 97890 + }, + { + "epoch": 4.57, + "learning_rate": 4.817456460740207e-06, + "loss": 0.0431, + "step": 97895 + }, + { + "epoch": 4.57, + "learning_rate": 4.816672675685421e-06, + "loss": 0.0457, + "step": 97900 + }, + { + "epoch": 4.57, + "learning_rate": 4.815888890630634e-06, + "loss": 0.0607, + "step": 97905 + }, + { + "epoch": 4.57, + "learning_rate": 4.815105105575848e-06, + "loss": 0.1423, + "step": 97910 + }, + { + "epoch": 4.57, + "learning_rate": 4.814321320521061e-06, + "loss": 0.1044, + "step": 97915 + }, + { + "epoch": 4.57, + "learning_rate": 4.813537535466274e-06, + "loss": 0.1191, + "step": 97920 + }, + { + "epoch": 4.57, + "learning_rate": 4.812753750411488e-06, + "loss": 0.3935, + "step": 97925 + }, + { + "epoch": 4.57, + "learning_rate": 4.811969965356701e-06, + "loss": 0.1108, + "step": 97930 + }, + { + "epoch": 4.57, + "learning_rate": 4.811186180301915e-06, + "loss": 0.0128, + "step": 97935 + }, + { + "epoch": 4.57, + "learning_rate": 4.810402395247128e-06, + "loss": 0.0096, + "step": 97940 + }, + { + "epoch": 4.57, + "learning_rate": 4.809618610192342e-06, + "loss": 0.0065, + "step": 97945 + }, + { + "epoch": 4.57, + "learning_rate": 4.808834825137555e-06, + "loss": 0.0238, + "step": 97950 + }, + { + "epoch": 4.57, + "learning_rate": 4.808051040082768e-06, + "loss": 0.0254, + "step": 97955 + }, + { + "epoch": 4.57, + "learning_rate": 4.807267255027982e-06, + "loss": 0.0706, + "step": 97960 + }, + { + "epoch": 4.57, + "learning_rate": 4.806483469973195e-06, + "loss": 0.0878, + "step": 97965 + }, + { + "epoch": 4.57, + "learning_rate": 4.805699684918409e-06, + "loss": 0.1129, + "step": 97970 + }, + { + "epoch": 4.57, + "learning_rate": 4.804915899863622e-06, + "loss": 0.247, + "step": 97975 + }, + { + "epoch": 4.57, + "learning_rate": 4.804132114808836e-06, + "loss": 0.088, + "step": 97980 + }, + { + "epoch": 4.57, + "learning_rate": 4.803348329754049e-06, + "loss": 0.0146, + "step": 97985 + }, + { + "epoch": 4.57, + "learning_rate": 4.802564544699262e-06, + "loss": 0.0342, + "step": 97990 + }, + { + "epoch": 4.57, + "learning_rate": 4.801780759644476e-06, + "loss": 0.0343, + "step": 97995 + }, + { + "epoch": 4.57, + "learning_rate": 4.800996974589689e-06, + "loss": 0.0808, + "step": 98000 + }, + { + "epoch": 4.57, + "learning_rate": 4.800213189534903e-06, + "loss": 0.0295, + "step": 98005 + }, + { + "epoch": 4.57, + "learning_rate": 4.799429404480116e-06, + "loss": 0.0679, + "step": 98010 + }, + { + "epoch": 4.57, + "learning_rate": 4.798645619425329e-06, + "loss": 0.1157, + "step": 98015 + }, + { + "epoch": 4.57, + "learning_rate": 4.797861834370543e-06, + "loss": 0.224, + "step": 98020 + }, + { + "epoch": 4.57, + "learning_rate": 4.797078049315756e-06, + "loss": 0.1635, + "step": 98025 + }, + { + "epoch": 4.57, + "learning_rate": 4.79629426426097e-06, + "loss": 0.068, + "step": 98030 + }, + { + "epoch": 4.57, + "learning_rate": 4.795510479206183e-06, + "loss": 0.0249, + "step": 98035 + }, + { + "epoch": 4.57, + "learning_rate": 4.794726694151397e-06, + "loss": 0.0808, + "step": 98040 + }, + { + "epoch": 4.57, + "learning_rate": 4.79394290909661e-06, + "loss": 0.0745, + "step": 98045 + }, + { + "epoch": 4.58, + "learning_rate": 4.793159124041823e-06, + "loss": 0.0261, + "step": 98050 + }, + { + "epoch": 4.58, + "learning_rate": 4.792375338987037e-06, + "loss": 0.0553, + "step": 98055 + }, + { + "epoch": 4.58, + "learning_rate": 4.79159155393225e-06, + "loss": 0.0576, + "step": 98060 + }, + { + "epoch": 4.58, + "learning_rate": 4.790807768877464e-06, + "loss": 0.0739, + "step": 98065 + }, + { + "epoch": 4.58, + "learning_rate": 4.790023983822677e-06, + "loss": 0.2026, + "step": 98070 + }, + { + "epoch": 4.58, + "learning_rate": 4.789240198767891e-06, + "loss": 0.3587, + "step": 98075 + }, + { + "epoch": 4.58, + "learning_rate": 4.788456413713104e-06, + "loss": 0.1066, + "step": 98080 + }, + { + "epoch": 4.58, + "learning_rate": 4.787672628658317e-06, + "loss": 0.0227, + "step": 98085 + }, + { + "epoch": 4.58, + "learning_rate": 4.786888843603531e-06, + "loss": 0.0534, + "step": 98090 + }, + { + "epoch": 4.58, + "learning_rate": 4.786105058548744e-06, + "loss": 0.0241, + "step": 98095 + }, + { + "epoch": 4.58, + "learning_rate": 4.7853212734939576e-06, + "loss": 0.0361, + "step": 98100 + }, + { + "epoch": 4.58, + "learning_rate": 4.784537488439171e-06, + "loss": 0.1102, + "step": 98105 + }, + { + "epoch": 4.58, + "learning_rate": 4.7837537033843845e-06, + "loss": 0.0765, + "step": 98110 + }, + { + "epoch": 4.58, + "learning_rate": 4.782969918329598e-06, + "loss": 0.1188, + "step": 98115 + }, + { + "epoch": 4.58, + "learning_rate": 4.782186133274811e-06, + "loss": 0.1429, + "step": 98120 + }, + { + "epoch": 4.58, + "learning_rate": 4.7814023482200246e-06, + "loss": 0.3327, + "step": 98125 + }, + { + "epoch": 4.58, + "learning_rate": 4.780618563165238e-06, + "loss": 0.073, + "step": 98130 + }, + { + "epoch": 4.58, + "learning_rate": 4.7798347781104515e-06, + "loss": 0.0439, + "step": 98135 + }, + { + "epoch": 4.58, + "learning_rate": 4.779050993055665e-06, + "loss": 0.0206, + "step": 98140 + }, + { + "epoch": 4.58, + "learning_rate": 4.778267208000878e-06, + "loss": 0.0281, + "step": 98145 + }, + { + "epoch": 4.58, + "learning_rate": 4.7774834229460915e-06, + "loss": 0.0482, + "step": 98150 + }, + { + "epoch": 4.58, + "learning_rate": 4.776699637891305e-06, + "loss": 0.0913, + "step": 98155 + }, + { + "epoch": 4.58, + "learning_rate": 4.7759158528365185e-06, + "loss": 0.1045, + "step": 98160 + }, + { + "epoch": 4.58, + "learning_rate": 4.775132067781732e-06, + "loss": 0.0822, + "step": 98165 + }, + { + "epoch": 4.58, + "learning_rate": 4.7743482827269455e-06, + "loss": 0.1039, + "step": 98170 + }, + { + "epoch": 4.58, + "learning_rate": 4.773564497672159e-06, + "loss": 0.3043, + "step": 98175 + }, + { + "epoch": 4.58, + "learning_rate": 4.772780712617372e-06, + "loss": 0.0674, + "step": 98180 + }, + { + "epoch": 4.58, + "learning_rate": 4.7719969275625855e-06, + "loss": 0.0219, + "step": 98185 + }, + { + "epoch": 4.58, + "learning_rate": 4.771213142507799e-06, + "loss": 0.0163, + "step": 98190 + }, + { + "epoch": 4.58, + "learning_rate": 4.7704293574530125e-06, + "loss": 0.0547, + "step": 98195 + }, + { + "epoch": 4.58, + "learning_rate": 4.769645572398226e-06, + "loss": 0.0748, + "step": 98200 + }, + { + "epoch": 4.58, + "learning_rate": 4.7688617873434394e-06, + "loss": 0.0547, + "step": 98205 + }, + { + "epoch": 4.58, + "learning_rate": 4.7680780022886525e-06, + "loss": 0.1277, + "step": 98210 + }, + { + "epoch": 4.58, + "learning_rate": 4.767294217233866e-06, + "loss": 0.1327, + "step": 98215 + }, + { + "epoch": 4.58, + "learning_rate": 4.7665104321790795e-06, + "loss": 0.1201, + "step": 98220 + }, + { + "epoch": 4.58, + "learning_rate": 4.765726647124293e-06, + "loss": 0.3079, + "step": 98225 + }, + { + "epoch": 4.58, + "learning_rate": 4.764942862069506e-06, + "loss": 0.075, + "step": 98230 + }, + { + "epoch": 4.58, + "learning_rate": 4.76415907701472e-06, + "loss": 0.0479, + "step": 98235 + }, + { + "epoch": 4.58, + "learning_rate": 4.763375291959933e-06, + "loss": 0.0288, + "step": 98240 + }, + { + "epoch": 4.58, + "learning_rate": 4.7625915069051464e-06, + "loss": 0.0471, + "step": 98245 + }, + { + "epoch": 4.58, + "learning_rate": 4.76180772185036e-06, + "loss": 0.0723, + "step": 98250 + }, + { + "epoch": 4.58, + "learning_rate": 4.761023936795573e-06, + "loss": 0.0457, + "step": 98255 + }, + { + "epoch": 4.58, + "learning_rate": 4.760240151740787e-06, + "loss": 0.162, + "step": 98260 + }, + { + "epoch": 4.59, + "learning_rate": 4.759456366686e-06, + "loss": 0.164, + "step": 98265 + }, + { + "epoch": 4.59, + "learning_rate": 4.758672581631214e-06, + "loss": 0.0886, + "step": 98270 + }, + { + "epoch": 4.59, + "learning_rate": 4.757888796576427e-06, + "loss": 0.4929, + "step": 98275 + }, + { + "epoch": 4.59, + "learning_rate": 4.75710501152164e-06, + "loss": 0.0793, + "step": 98280 + }, + { + "epoch": 4.59, + "learning_rate": 4.756321226466854e-06, + "loss": 0.0243, + "step": 98285 + }, + { + "epoch": 4.59, + "learning_rate": 4.755537441412067e-06, + "loss": 0.0293, + "step": 98290 + }, + { + "epoch": 4.59, + "learning_rate": 4.754753656357281e-06, + "loss": 0.024, + "step": 98295 + }, + { + "epoch": 4.59, + "learning_rate": 4.753969871302494e-06, + "loss": 0.0699, + "step": 98300 + }, + { + "epoch": 4.59, + "learning_rate": 4.753186086247708e-06, + "loss": 0.0388, + "step": 98305 + }, + { + "epoch": 4.59, + "learning_rate": 4.752402301192921e-06, + "loss": 0.0463, + "step": 98310 + }, + { + "epoch": 4.59, + "learning_rate": 4.751618516138134e-06, + "loss": 0.0508, + "step": 98315 + }, + { + "epoch": 4.59, + "learning_rate": 4.750834731083348e-06, + "loss": 0.1005, + "step": 98320 + }, + { + "epoch": 4.59, + "learning_rate": 4.750050946028561e-06, + "loss": 0.1858, + "step": 98325 + }, + { + "epoch": 4.59, + "learning_rate": 4.749267160973775e-06, + "loss": 0.0884, + "step": 98330 + }, + { + "epoch": 4.59, + "learning_rate": 4.748483375918988e-06, + "loss": 0.0103, + "step": 98335 + }, + { + "epoch": 4.59, + "learning_rate": 4.747699590864201e-06, + "loss": 0.029, + "step": 98340 + }, + { + "epoch": 4.59, + "learning_rate": 4.746915805809415e-06, + "loss": 0.0599, + "step": 98345 + }, + { + "epoch": 4.59, + "learning_rate": 4.746132020754628e-06, + "loss": 0.093, + "step": 98350 + }, + { + "epoch": 4.59, + "learning_rate": 4.745348235699842e-06, + "loss": 0.056, + "step": 98355 + }, + { + "epoch": 4.59, + "learning_rate": 4.744564450645055e-06, + "loss": 0.0937, + "step": 98360 + }, + { + "epoch": 4.59, + "learning_rate": 4.743780665590269e-06, + "loss": 0.1793, + "step": 98365 + }, + { + "epoch": 4.59, + "learning_rate": 4.742996880535482e-06, + "loss": 0.1542, + "step": 98370 + }, + { + "epoch": 4.59, + "learning_rate": 4.742213095480695e-06, + "loss": 0.292, + "step": 98375 + }, + { + "epoch": 4.59, + "learning_rate": 4.741429310425909e-06, + "loss": 0.0954, + "step": 98380 + }, + { + "epoch": 4.59, + "learning_rate": 4.740645525371122e-06, + "loss": 0.0101, + "step": 98385 + }, + { + "epoch": 4.59, + "learning_rate": 4.739861740316336e-06, + "loss": 0.0294, + "step": 98390 + }, + { + "epoch": 4.59, + "learning_rate": 4.739077955261549e-06, + "loss": 0.0526, + "step": 98395 + }, + { + "epoch": 4.59, + "learning_rate": 4.738294170206763e-06, + "loss": 0.1069, + "step": 98400 + }, + { + "epoch": 4.59, + "learning_rate": 4.737510385151976e-06, + "loss": 0.07, + "step": 98405 + }, + { + "epoch": 4.59, + "learning_rate": 4.736726600097189e-06, + "loss": 0.083, + "step": 98410 + }, + { + "epoch": 4.59, + "learning_rate": 4.735942815042403e-06, + "loss": 0.114, + "step": 98415 + }, + { + "epoch": 4.59, + "learning_rate": 4.735159029987616e-06, + "loss": 0.1524, + "step": 98420 + }, + { + "epoch": 4.59, + "learning_rate": 4.73437524493283e-06, + "loss": 0.2291, + "step": 98425 + }, + { + "epoch": 4.59, + "learning_rate": 4.733591459878043e-06, + "loss": 0.0698, + "step": 98430 + }, + { + "epoch": 4.59, + "learning_rate": 4.732807674823257e-06, + "loss": 0.0594, + "step": 98435 + }, + { + "epoch": 4.59, + "learning_rate": 4.73202388976847e-06, + "loss": 0.0253, + "step": 98440 + }, + { + "epoch": 4.59, + "learning_rate": 4.731240104713683e-06, + "loss": 0.0151, + "step": 98445 + }, + { + "epoch": 4.59, + "learning_rate": 4.730456319658897e-06, + "loss": 0.0529, + "step": 98450 + }, + { + "epoch": 4.59, + "learning_rate": 4.729672534604111e-06, + "loss": 0.0582, + "step": 98455 + }, + { + "epoch": 4.59, + "learning_rate": 4.728888749549324e-06, + "loss": 0.1311, + "step": 98460 + }, + { + "epoch": 4.59, + "learning_rate": 4.728104964494538e-06, + "loss": 0.1393, + "step": 98465 + }, + { + "epoch": 4.59, + "learning_rate": 4.727321179439751e-06, + "loss": 0.1408, + "step": 98470 + }, + { + "epoch": 4.59, + "learning_rate": 4.726537394384964e-06, + "loss": 0.204, + "step": 98475 + }, + { + "epoch": 4.6, + "learning_rate": 4.725753609330178e-06, + "loss": 0.0507, + "step": 98480 + }, + { + "epoch": 4.6, + "learning_rate": 4.724969824275391e-06, + "loss": 0.0056, + "step": 98485 + }, + { + "epoch": 4.6, + "learning_rate": 4.724186039220605e-06, + "loss": 0.0239, + "step": 98490 + }, + { + "epoch": 4.6, + "learning_rate": 4.723402254165818e-06, + "loss": 0.0197, + "step": 98495 + }, + { + "epoch": 4.6, + "learning_rate": 4.722618469111032e-06, + "loss": 0.03, + "step": 98500 + }, + { + "epoch": 4.6, + "learning_rate": 4.721834684056245e-06, + "loss": 0.0459, + "step": 98505 + }, + { + "epoch": 4.6, + "learning_rate": 4.721050899001458e-06, + "loss": 0.1789, + "step": 98510 + }, + { + "epoch": 4.6, + "learning_rate": 4.720267113946672e-06, + "loss": 0.1291, + "step": 98515 + }, + { + "epoch": 4.6, + "learning_rate": 4.719483328891885e-06, + "loss": 0.1693, + "step": 98520 + }, + { + "epoch": 4.6, + "learning_rate": 4.718699543837099e-06, + "loss": 0.3627, + "step": 98525 + }, + { + "epoch": 4.6, + "learning_rate": 4.717915758782312e-06, + "loss": 0.0747, + "step": 98530 + }, + { + "epoch": 4.6, + "learning_rate": 4.717131973727526e-06, + "loss": 0.0521, + "step": 98535 + }, + { + "epoch": 4.6, + "learning_rate": 4.716348188672739e-06, + "loss": 0.0338, + "step": 98540 + }, + { + "epoch": 4.6, + "learning_rate": 4.715564403617952e-06, + "loss": 0.0316, + "step": 98545 + }, + { + "epoch": 4.6, + "learning_rate": 4.714780618563166e-06, + "loss": 0.0676, + "step": 98550 + }, + { + "epoch": 4.6, + "learning_rate": 4.713996833508379e-06, + "loss": 0.1055, + "step": 98555 + }, + { + "epoch": 4.6, + "learning_rate": 4.713213048453593e-06, + "loss": 0.111, + "step": 98560 + }, + { + "epoch": 4.6, + "learning_rate": 4.712429263398806e-06, + "loss": 0.1715, + "step": 98565 + }, + { + "epoch": 4.6, + "learning_rate": 4.711645478344019e-06, + "loss": 0.1806, + "step": 98570 + }, + { + "epoch": 4.6, + "learning_rate": 4.710861693289233e-06, + "loss": 0.2132, + "step": 98575 + }, + { + "epoch": 4.6, + "learning_rate": 4.710077908234446e-06, + "loss": 0.0975, + "step": 98580 + }, + { + "epoch": 4.6, + "learning_rate": 4.70929412317966e-06, + "loss": 0.0238, + "step": 98585 + }, + { + "epoch": 4.6, + "learning_rate": 4.708510338124873e-06, + "loss": 0.0189, + "step": 98590 + }, + { + "epoch": 4.6, + "learning_rate": 4.707726553070087e-06, + "loss": 0.0709, + "step": 98595 + }, + { + "epoch": 4.6, + "learning_rate": 4.7069427680153e-06, + "loss": 0.0812, + "step": 98600 + }, + { + "epoch": 4.6, + "learning_rate": 4.706158982960513e-06, + "loss": 0.0753, + "step": 98605 + }, + { + "epoch": 4.6, + "learning_rate": 4.705375197905727e-06, + "loss": 0.0977, + "step": 98610 + }, + { + "epoch": 4.6, + "learning_rate": 4.70459141285094e-06, + "loss": 0.0735, + "step": 98615 + }, + { + "epoch": 4.6, + "learning_rate": 4.703807627796154e-06, + "loss": 0.114, + "step": 98620 + }, + { + "epoch": 4.6, + "learning_rate": 4.703023842741367e-06, + "loss": 0.2105, + "step": 98625 + }, + { + "epoch": 4.6, + "learning_rate": 4.702240057686581e-06, + "loss": 0.09, + "step": 98630 + }, + { + "epoch": 4.6, + "learning_rate": 4.701456272631794e-06, + "loss": 0.028, + "step": 98635 + }, + { + "epoch": 4.6, + "learning_rate": 4.700672487577007e-06, + "loss": 0.0076, + "step": 98640 + }, + { + "epoch": 4.6, + "learning_rate": 4.699888702522221e-06, + "loss": 0.069, + "step": 98645 + }, + { + "epoch": 4.6, + "learning_rate": 4.699104917467434e-06, + "loss": 0.0845, + "step": 98650 + }, + { + "epoch": 4.6, + "learning_rate": 4.698321132412648e-06, + "loss": 0.1237, + "step": 98655 + }, + { + "epoch": 4.6, + "learning_rate": 4.697537347357861e-06, + "loss": 0.0693, + "step": 98660 + }, + { + "epoch": 4.6, + "learning_rate": 4.696753562303075e-06, + "loss": 0.0838, + "step": 98665 + }, + { + "epoch": 4.6, + "learning_rate": 4.695969777248288e-06, + "loss": 0.2664, + "step": 98670 + }, + { + "epoch": 4.6, + "learning_rate": 4.695185992193501e-06, + "loss": 0.3182, + "step": 98675 + }, + { + "epoch": 4.6, + "learning_rate": 4.694402207138715e-06, + "loss": 0.1123, + "step": 98680 + }, + { + "epoch": 4.6, + "learning_rate": 4.693618422083928e-06, + "loss": 0.0013, + "step": 98685 + }, + { + "epoch": 4.61, + "learning_rate": 4.692834637029142e-06, + "loss": 0.046, + "step": 98690 + }, + { + "epoch": 4.61, + "learning_rate": 4.692050851974355e-06, + "loss": 0.0534, + "step": 98695 + }, + { + "epoch": 4.61, + "learning_rate": 4.691267066919568e-06, + "loss": 0.068, + "step": 98700 + }, + { + "epoch": 4.61, + "learning_rate": 4.690483281864782e-06, + "loss": 0.0488, + "step": 98705 + }, + { + "epoch": 4.61, + "learning_rate": 4.689699496809995e-06, + "loss": 0.1138, + "step": 98710 + }, + { + "epoch": 4.61, + "learning_rate": 4.688915711755209e-06, + "loss": 0.0755, + "step": 98715 + }, + { + "epoch": 4.61, + "learning_rate": 4.688131926700422e-06, + "loss": 0.0916, + "step": 98720 + }, + { + "epoch": 4.61, + "learning_rate": 4.687348141645636e-06, + "loss": 0.2622, + "step": 98725 + }, + { + "epoch": 4.61, + "learning_rate": 4.6865643565908496e-06, + "loss": 0.1045, + "step": 98730 + }, + { + "epoch": 4.61, + "learning_rate": 4.685780571536062e-06, + "loss": 0.0511, + "step": 98735 + }, + { + "epoch": 4.61, + "learning_rate": 4.684996786481276e-06, + "loss": 0.0175, + "step": 98740 + }, + { + "epoch": 4.61, + "learning_rate": 4.68421300142649e-06, + "loss": 0.0457, + "step": 98745 + }, + { + "epoch": 4.61, + "learning_rate": 4.683429216371703e-06, + "loss": 0.0618, + "step": 98750 + }, + { + "epoch": 4.61, + "learning_rate": 4.6826454313169165e-06, + "loss": 0.0635, + "step": 98755 + }, + { + "epoch": 4.61, + "learning_rate": 4.68186164626213e-06, + "loss": 0.1034, + "step": 98760 + }, + { + "epoch": 4.61, + "learning_rate": 4.681077861207343e-06, + "loss": 0.1423, + "step": 98765 + }, + { + "epoch": 4.61, + "learning_rate": 4.6802940761525566e-06, + "loss": 0.0631, + "step": 98770 + }, + { + "epoch": 4.61, + "learning_rate": 4.67951029109777e-06, + "loss": 0.3452, + "step": 98775 + }, + { + "epoch": 4.61, + "learning_rate": 4.6787265060429835e-06, + "loss": 0.0838, + "step": 98780 + }, + { + "epoch": 4.61, + "learning_rate": 4.677942720988197e-06, + "loss": 0.0484, + "step": 98785 + }, + { + "epoch": 4.61, + "learning_rate": 4.6771589359334105e-06, + "loss": 0.0497, + "step": 98790 + }, + { + "epoch": 4.61, + "learning_rate": 4.6763751508786236e-06, + "loss": 0.1333, + "step": 98795 + }, + { + "epoch": 4.61, + "learning_rate": 4.675591365823837e-06, + "loss": 0.0566, + "step": 98800 + }, + { + "epoch": 4.61, + "learning_rate": 4.6748075807690505e-06, + "loss": 0.0735, + "step": 98805 + }, + { + "epoch": 4.61, + "learning_rate": 4.674023795714264e-06, + "loss": 0.0919, + "step": 98810 + }, + { + "epoch": 4.61, + "learning_rate": 4.6732400106594775e-06, + "loss": 0.0704, + "step": 98815 + }, + { + "epoch": 4.61, + "learning_rate": 4.6724562256046905e-06, + "loss": 0.1036, + "step": 98820 + }, + { + "epoch": 4.61, + "learning_rate": 4.6716724405499044e-06, + "loss": 0.3106, + "step": 98825 + }, + { + "epoch": 4.61, + "learning_rate": 4.6708886554951175e-06, + "loss": 0.1045, + "step": 98830 + }, + { + "epoch": 4.61, + "learning_rate": 4.6701048704403306e-06, + "loss": 0.0328, + "step": 98835 + }, + { + "epoch": 4.61, + "learning_rate": 4.6693210853855445e-06, + "loss": 0.0274, + "step": 98840 + }, + { + "epoch": 4.61, + "learning_rate": 4.6685373003307575e-06, + "loss": 0.0643, + "step": 98845 + }, + { + "epoch": 4.61, + "learning_rate": 4.6677535152759714e-06, + "loss": 0.046, + "step": 98850 + }, + { + "epoch": 4.61, + "learning_rate": 4.6669697302211845e-06, + "loss": 0.085, + "step": 98855 + }, + { + "epoch": 4.61, + "learning_rate": 4.666185945166398e-06, + "loss": 0.1397, + "step": 98860 + }, + { + "epoch": 4.61, + "learning_rate": 4.6654021601116115e-06, + "loss": 0.1457, + "step": 98865 + }, + { + "epoch": 4.61, + "learning_rate": 4.6646183750568245e-06, + "loss": 0.2372, + "step": 98870 + }, + { + "epoch": 4.61, + "learning_rate": 4.6638345900020384e-06, + "loss": 0.2352, + "step": 98875 + }, + { + "epoch": 4.61, + "learning_rate": 4.6630508049472515e-06, + "loss": 0.0608, + "step": 98880 + }, + { + "epoch": 4.61, + "learning_rate": 4.662267019892465e-06, + "loss": 0.0154, + "step": 98885 + }, + { + "epoch": 4.61, + "learning_rate": 4.6614832348376784e-06, + "loss": 0.0452, + "step": 98890 + }, + { + "epoch": 4.61, + "learning_rate": 4.6606994497828915e-06, + "loss": 0.0785, + "step": 98895 + }, + { + "epoch": 4.61, + "learning_rate": 4.659915664728105e-06, + "loss": 0.0461, + "step": 98900 + }, + { + "epoch": 4.62, + "learning_rate": 4.6591318796733185e-06, + "loss": 0.0656, + "step": 98905 + }, + { + "epoch": 4.62, + "learning_rate": 4.658348094618532e-06, + "loss": 0.0561, + "step": 98910 + }, + { + "epoch": 4.62, + "learning_rate": 4.6575643095637454e-06, + "loss": 0.1176, + "step": 98915 + }, + { + "epoch": 4.62, + "learning_rate": 4.656780524508959e-06, + "loss": 0.2113, + "step": 98920 + }, + { + "epoch": 4.62, + "learning_rate": 4.655996739454172e-06, + "loss": 0.2794, + "step": 98925 + }, + { + "epoch": 4.62, + "learning_rate": 4.6552129543993855e-06, + "loss": 0.0784, + "step": 98930 + }, + { + "epoch": 4.62, + "learning_rate": 4.654429169344599e-06, + "loss": 0.0445, + "step": 98935 + }, + { + "epoch": 4.62, + "learning_rate": 4.6536453842898124e-06, + "loss": 0.0491, + "step": 98940 + }, + { + "epoch": 4.62, + "learning_rate": 4.652861599235026e-06, + "loss": 0.0283, + "step": 98945 + }, + { + "epoch": 4.62, + "learning_rate": 4.652077814180239e-06, + "loss": 0.0677, + "step": 98950 + }, + { + "epoch": 4.62, + "learning_rate": 4.651294029125453e-06, + "loss": 0.0318, + "step": 98955 + }, + { + "epoch": 4.62, + "learning_rate": 4.650510244070666e-06, + "loss": 0.0479, + "step": 98960 + }, + { + "epoch": 4.62, + "learning_rate": 4.649726459015879e-06, + "loss": 0.0969, + "step": 98965 + }, + { + "epoch": 4.62, + "learning_rate": 4.648942673961093e-06, + "loss": 0.1436, + "step": 98970 + }, + { + "epoch": 4.62, + "learning_rate": 4.648158888906306e-06, + "loss": 0.4882, + "step": 98975 + }, + { + "epoch": 4.62, + "learning_rate": 4.64737510385152e-06, + "loss": 0.0895, + "step": 98980 + }, + { + "epoch": 4.62, + "learning_rate": 4.646591318796733e-06, + "loss": 0.0144, + "step": 98985 + }, + { + "epoch": 4.62, + "learning_rate": 4.645807533741947e-06, + "loss": 0.0107, + "step": 98990 + }, + { + "epoch": 4.62, + "learning_rate": 4.64502374868716e-06, + "loss": 0.0203, + "step": 98995 + }, + { + "epoch": 4.62, + "learning_rate": 4.644239963632373e-06, + "loss": 0.0975, + "step": 99000 + }, + { + "epoch": 4.62, + "learning_rate": 4.643456178577587e-06, + "loss": 0.0675, + "step": 99005 + }, + { + "epoch": 4.62, + "learning_rate": 4.6426723935228e-06, + "loss": 0.105, + "step": 99010 + }, + { + "epoch": 4.62, + "learning_rate": 4.641888608468014e-06, + "loss": 0.1148, + "step": 99015 + }, + { + "epoch": 4.62, + "learning_rate": 4.641104823413228e-06, + "loss": 0.1345, + "step": 99020 + }, + { + "epoch": 4.62, + "learning_rate": 4.64032103835844e-06, + "loss": 0.2658, + "step": 99025 + }, + { + "epoch": 4.62, + "learning_rate": 4.639537253303654e-06, + "loss": 0.0614, + "step": 99030 + }, + { + "epoch": 4.62, + "learning_rate": 4.638753468248868e-06, + "loss": 0.0439, + "step": 99035 + }, + { + "epoch": 4.62, + "learning_rate": 4.637969683194081e-06, + "loss": 0.0631, + "step": 99040 + }, + { + "epoch": 4.62, + "learning_rate": 4.637185898139295e-06, + "loss": 0.031, + "step": 99045 + }, + { + "epoch": 4.62, + "learning_rate": 4.636402113084508e-06, + "loss": 0.0375, + "step": 99050 + }, + { + "epoch": 4.62, + "learning_rate": 4.635618328029722e-06, + "loss": 0.1032, + "step": 99055 + }, + { + "epoch": 4.62, + "learning_rate": 4.634834542974935e-06, + "loss": 0.1198, + "step": 99060 + }, + { + "epoch": 4.62, + "learning_rate": 4.634050757920148e-06, + "loss": 0.158, + "step": 99065 + }, + { + "epoch": 4.62, + "learning_rate": 4.633266972865362e-06, + "loss": 0.1609, + "step": 99070 + }, + { + "epoch": 4.62, + "learning_rate": 4.632483187810575e-06, + "loss": 0.3688, + "step": 99075 + }, + { + "epoch": 4.62, + "learning_rate": 4.631699402755789e-06, + "loss": 0.1057, + "step": 99080 + }, + { + "epoch": 4.62, + "learning_rate": 4.630915617701002e-06, + "loss": 0.0403, + "step": 99085 + }, + { + "epoch": 4.62, + "learning_rate": 4.630131832646215e-06, + "loss": 0.0434, + "step": 99090 + }, + { + "epoch": 4.62, + "learning_rate": 4.629348047591429e-06, + "loss": 0.0496, + "step": 99095 + }, + { + "epoch": 4.62, + "learning_rate": 4.628564262536642e-06, + "loss": 0.0234, + "step": 99100 + }, + { + "epoch": 4.62, + "learning_rate": 4.627780477481856e-06, + "loss": 0.0994, + "step": 99105 + }, + { + "epoch": 4.62, + "learning_rate": 4.626996692427069e-06, + "loss": 0.0785, + "step": 99110 + }, + { + "epoch": 4.62, + "learning_rate": 4.626212907372283e-06, + "loss": 0.155, + "step": 99115 + }, + { + "epoch": 4.63, + "learning_rate": 4.625429122317496e-06, + "loss": 0.1887, + "step": 99120 + }, + { + "epoch": 4.63, + "learning_rate": 4.624645337262709e-06, + "loss": 0.2303, + "step": 99125 + }, + { + "epoch": 4.63, + "learning_rate": 4.623861552207923e-06, + "loss": 0.0835, + "step": 99130 + }, + { + "epoch": 4.63, + "learning_rate": 4.623077767153136e-06, + "loss": 0.0357, + "step": 99135 + }, + { + "epoch": 4.63, + "learning_rate": 4.62229398209835e-06, + "loss": 0.0441, + "step": 99140 + }, + { + "epoch": 4.63, + "learning_rate": 4.621510197043563e-06, + "loss": 0.008, + "step": 99145 + }, + { + "epoch": 4.63, + "learning_rate": 4.620726411988777e-06, + "loss": 0.0457, + "step": 99150 + }, + { + "epoch": 4.63, + "learning_rate": 4.61994262693399e-06, + "loss": 0.0527, + "step": 99155 + }, + { + "epoch": 4.63, + "learning_rate": 4.619158841879203e-06, + "loss": 0.0727, + "step": 99160 + }, + { + "epoch": 4.63, + "learning_rate": 4.618375056824417e-06, + "loss": 0.1954, + "step": 99165 + }, + { + "epoch": 4.63, + "learning_rate": 4.61759127176963e-06, + "loss": 0.1393, + "step": 99170 + }, + { + "epoch": 4.63, + "learning_rate": 4.616807486714844e-06, + "loss": 0.2573, + "step": 99175 + }, + { + "epoch": 4.63, + "learning_rate": 4.616023701660057e-06, + "loss": 0.1002, + "step": 99180 + }, + { + "epoch": 4.63, + "learning_rate": 4.615239916605271e-06, + "loss": 0.0111, + "step": 99185 + }, + { + "epoch": 4.63, + "learning_rate": 4.614456131550484e-06, + "loss": 0.0747, + "step": 99190 + }, + { + "epoch": 4.63, + "learning_rate": 4.613672346495697e-06, + "loss": 0.0535, + "step": 99195 + }, + { + "epoch": 4.63, + "learning_rate": 4.612888561440911e-06, + "loss": 0.0439, + "step": 99200 + }, + { + "epoch": 4.63, + "learning_rate": 4.612104776386124e-06, + "loss": 0.068, + "step": 99205 + }, + { + "epoch": 4.63, + "learning_rate": 4.611320991331338e-06, + "loss": 0.085, + "step": 99210 + }, + { + "epoch": 4.63, + "learning_rate": 4.610537206276551e-06, + "loss": 0.1352, + "step": 99215 + }, + { + "epoch": 4.63, + "learning_rate": 4.609753421221764e-06, + "loss": 0.1039, + "step": 99220 + }, + { + "epoch": 4.63, + "learning_rate": 4.608969636166978e-06, + "loss": 0.296, + "step": 99225 + }, + { + "epoch": 4.63, + "learning_rate": 4.608185851112191e-06, + "loss": 0.0724, + "step": 99230 + }, + { + "epoch": 4.63, + "learning_rate": 4.607402066057405e-06, + "loss": 0.008, + "step": 99235 + }, + { + "epoch": 4.63, + "learning_rate": 4.606618281002618e-06, + "loss": 0.0358, + "step": 99240 + }, + { + "epoch": 4.63, + "learning_rate": 4.605834495947832e-06, + "loss": 0.0265, + "step": 99245 + }, + { + "epoch": 4.63, + "learning_rate": 4.605050710893045e-06, + "loss": 0.0432, + "step": 99250 + }, + { + "epoch": 4.63, + "learning_rate": 4.604266925838258e-06, + "loss": 0.0298, + "step": 99255 + }, + { + "epoch": 4.63, + "learning_rate": 4.603483140783472e-06, + "loss": 0.101, + "step": 99260 + }, + { + "epoch": 4.63, + "learning_rate": 4.602699355728685e-06, + "loss": 0.1657, + "step": 99265 + }, + { + "epoch": 4.63, + "learning_rate": 4.601915570673899e-06, + "loss": 0.2017, + "step": 99270 + }, + { + "epoch": 4.63, + "learning_rate": 4.601131785619112e-06, + "loss": 0.2428, + "step": 99275 + }, + { + "epoch": 4.63, + "learning_rate": 4.600348000564326e-06, + "loss": 0.0889, + "step": 99280 + }, + { + "epoch": 4.63, + "learning_rate": 4.599564215509539e-06, + "loss": 0.0376, + "step": 99285 + }, + { + "epoch": 4.63, + "learning_rate": 4.598780430454752e-06, + "loss": 0.0571, + "step": 99290 + }, + { + "epoch": 4.63, + "learning_rate": 4.597996645399966e-06, + "loss": 0.0663, + "step": 99295 + }, + { + "epoch": 4.63, + "learning_rate": 4.597212860345179e-06, + "loss": 0.0245, + "step": 99300 + }, + { + "epoch": 4.63, + "learning_rate": 4.596429075290393e-06, + "loss": 0.084, + "step": 99305 + }, + { + "epoch": 4.63, + "learning_rate": 4.595645290235607e-06, + "loss": 0.1281, + "step": 99310 + }, + { + "epoch": 4.63, + "learning_rate": 4.59486150518082e-06, + "loss": 0.2, + "step": 99315 + }, + { + "epoch": 4.63, + "learning_rate": 4.594077720126033e-06, + "loss": 0.0928, + "step": 99320 + }, + { + "epoch": 4.63, + "learning_rate": 4.593293935071247e-06, + "loss": 0.2263, + "step": 99325 + }, + { + "epoch": 4.63, + "learning_rate": 4.59251015001646e-06, + "loss": 0.0963, + "step": 99330 + }, + { + "epoch": 4.64, + "learning_rate": 4.591726364961674e-06, + "loss": 0.0292, + "step": 99335 + }, + { + "epoch": 4.64, + "learning_rate": 4.590942579906887e-06, + "loss": 0.0582, + "step": 99340 + }, + { + "epoch": 4.64, + "learning_rate": 4.590158794852101e-06, + "loss": 0.046, + "step": 99345 + }, + { + "epoch": 4.64, + "learning_rate": 4.589375009797314e-06, + "loss": 0.043, + "step": 99350 + }, + { + "epoch": 4.64, + "learning_rate": 4.588591224742527e-06, + "loss": 0.0467, + "step": 99355 + }, + { + "epoch": 4.64, + "learning_rate": 4.587807439687741e-06, + "loss": 0.1622, + "step": 99360 + }, + { + "epoch": 4.64, + "learning_rate": 4.587023654632954e-06, + "loss": 0.0681, + "step": 99365 + }, + { + "epoch": 4.64, + "learning_rate": 4.586239869578168e-06, + "loss": 0.1997, + "step": 99370 + }, + { + "epoch": 4.64, + "learning_rate": 4.585456084523381e-06, + "loss": 0.2206, + "step": 99375 + }, + { + "epoch": 4.64, + "learning_rate": 4.584672299468595e-06, + "loss": 0.0906, + "step": 99380 + }, + { + "epoch": 4.64, + "learning_rate": 4.583888514413808e-06, + "loss": 0.0321, + "step": 99385 + }, + { + "epoch": 4.64, + "learning_rate": 4.583104729359021e-06, + "loss": 0.0637, + "step": 99390 + }, + { + "epoch": 4.64, + "learning_rate": 4.582320944304235e-06, + "loss": 0.0361, + "step": 99395 + }, + { + "epoch": 4.64, + "learning_rate": 4.581537159249448e-06, + "loss": 0.0721, + "step": 99400 + }, + { + "epoch": 4.64, + "learning_rate": 4.580753374194662e-06, + "loss": 0.1105, + "step": 99405 + }, + { + "epoch": 4.64, + "learning_rate": 4.579969589139875e-06, + "loss": 0.0972, + "step": 99410 + }, + { + "epoch": 4.64, + "learning_rate": 4.579185804085088e-06, + "loss": 0.1117, + "step": 99415 + }, + { + "epoch": 4.64, + "learning_rate": 4.578402019030302e-06, + "loss": 0.165, + "step": 99420 + }, + { + "epoch": 4.64, + "learning_rate": 4.577618233975515e-06, + "loss": 0.1939, + "step": 99425 + }, + { + "epoch": 4.64, + "learning_rate": 4.576834448920729e-06, + "loss": 0.0918, + "step": 99430 + }, + { + "epoch": 4.64, + "learning_rate": 4.576050663865942e-06, + "loss": 0.0138, + "step": 99435 + }, + { + "epoch": 4.64, + "learning_rate": 4.5752668788111556e-06, + "loss": 0.0605, + "step": 99440 + }, + { + "epoch": 4.64, + "learning_rate": 4.574483093756369e-06, + "loss": 0.0318, + "step": 99445 + }, + { + "epoch": 4.64, + "learning_rate": 4.573699308701582e-06, + "loss": 0.0423, + "step": 99450 + }, + { + "epoch": 4.64, + "learning_rate": 4.572915523646796e-06, + "loss": 0.0584, + "step": 99455 + }, + { + "epoch": 4.64, + "learning_rate": 4.572131738592009e-06, + "loss": 0.0923, + "step": 99460 + }, + { + "epoch": 4.64, + "learning_rate": 4.5713479535372226e-06, + "loss": 0.1019, + "step": 99465 + }, + { + "epoch": 4.64, + "learning_rate": 4.570564168482436e-06, + "loss": 0.2771, + "step": 99470 + }, + { + "epoch": 4.64, + "learning_rate": 4.5697803834276495e-06, + "loss": 0.4815, + "step": 99475 + }, + { + "epoch": 4.64, + "learning_rate": 4.568996598372863e-06, + "loss": 0.0602, + "step": 99480 + }, + { + "epoch": 4.64, + "learning_rate": 4.568212813318076e-06, + "loss": 0.0252, + "step": 99485 + }, + { + "epoch": 4.64, + "learning_rate": 4.5674290282632895e-06, + "loss": 0.0183, + "step": 99490 + }, + { + "epoch": 4.64, + "learning_rate": 4.566645243208503e-06, + "loss": 0.0262, + "step": 99495 + }, + { + "epoch": 4.64, + "learning_rate": 4.5658614581537165e-06, + "loss": 0.0902, + "step": 99500 + }, + { + "epoch": 4.64, + "learning_rate": 4.5650776730989296e-06, + "loss": 0.099, + "step": 99505 + }, + { + "epoch": 4.64, + "learning_rate": 4.5642938880441435e-06, + "loss": 0.0742, + "step": 99510 + }, + { + "epoch": 4.64, + "learning_rate": 4.5635101029893565e-06, + "loss": 0.1073, + "step": 99515 + }, + { + "epoch": 4.64, + "learning_rate": 4.56272631793457e-06, + "loss": 0.2168, + "step": 99520 + }, + { + "epoch": 4.64, + "learning_rate": 4.5619425328797835e-06, + "loss": 0.3789, + "step": 99525 + }, + { + "epoch": 4.64, + "learning_rate": 4.5611587478249966e-06, + "loss": 0.0901, + "step": 99530 + }, + { + "epoch": 4.64, + "learning_rate": 4.5603749627702105e-06, + "loss": 0.0096, + "step": 99535 + }, + { + "epoch": 4.64, + "learning_rate": 4.5595911777154235e-06, + "loss": 0.0438, + "step": 99540 + }, + { + "epoch": 4.64, + "learning_rate": 4.558807392660637e-06, + "loss": 0.0671, + "step": 99545 + }, + { + "epoch": 4.65, + "learning_rate": 4.5580236076058505e-06, + "loss": 0.0396, + "step": 99550 + }, + { + "epoch": 4.65, + "learning_rate": 4.5572398225510635e-06, + "loss": 0.0452, + "step": 99555 + }, + { + "epoch": 4.65, + "learning_rate": 4.5564560374962774e-06, + "loss": 0.0448, + "step": 99560 + }, + { + "epoch": 4.65, + "learning_rate": 4.5556722524414905e-06, + "loss": 0.1305, + "step": 99565 + }, + { + "epoch": 4.65, + "learning_rate": 4.554888467386704e-06, + "loss": 0.1631, + "step": 99570 + }, + { + "epoch": 4.65, + "learning_rate": 4.554104682331918e-06, + "loss": 0.2078, + "step": 99575 + }, + { + "epoch": 4.65, + "learning_rate": 4.5533208972771305e-06, + "loss": 0.0835, + "step": 99580 + }, + { + "epoch": 4.65, + "learning_rate": 4.5525371122223444e-06, + "loss": 0.0182, + "step": 99585 + }, + { + "epoch": 4.65, + "learning_rate": 4.5517533271675575e-06, + "loss": 0.0456, + "step": 99590 + }, + { + "epoch": 4.65, + "learning_rate": 4.550969542112771e-06, + "loss": 0.0753, + "step": 99595 + }, + { + "epoch": 4.65, + "learning_rate": 4.550185757057985e-06, + "loss": 0.0423, + "step": 99600 + }, + { + "epoch": 4.65, + "learning_rate": 4.549401972003198e-06, + "loss": 0.1024, + "step": 99605 + }, + { + "epoch": 4.65, + "learning_rate": 4.548618186948411e-06, + "loss": 0.0633, + "step": 99610 + }, + { + "epoch": 4.65, + "learning_rate": 4.547834401893625e-06, + "loss": 0.1306, + "step": 99615 + }, + { + "epoch": 4.65, + "learning_rate": 4.547050616838838e-06, + "loss": 0.0914, + "step": 99620 + }, + { + "epoch": 4.65, + "learning_rate": 4.546266831784052e-06, + "loss": 0.3363, + "step": 99625 + }, + { + "epoch": 4.65, + "learning_rate": 4.545483046729265e-06, + "loss": 0.1153, + "step": 99630 + }, + { + "epoch": 4.65, + "learning_rate": 4.544699261674479e-06, + "loss": 0.0315, + "step": 99635 + }, + { + "epoch": 4.65, + "learning_rate": 4.543915476619692e-06, + "loss": 0.0346, + "step": 99640 + }, + { + "epoch": 4.65, + "learning_rate": 4.543131691564905e-06, + "loss": 0.0418, + "step": 99645 + }, + { + "epoch": 4.65, + "learning_rate": 4.542347906510119e-06, + "loss": 0.0681, + "step": 99650 + }, + { + "epoch": 4.65, + "learning_rate": 4.541564121455332e-06, + "loss": 0.0773, + "step": 99655 + }, + { + "epoch": 4.65, + "learning_rate": 4.540780336400546e-06, + "loss": 0.1295, + "step": 99660 + }, + { + "epoch": 4.65, + "learning_rate": 4.539996551345759e-06, + "loss": 0.1052, + "step": 99665 + }, + { + "epoch": 4.65, + "learning_rate": 4.539212766290973e-06, + "loss": 0.1512, + "step": 99670 + }, + { + "epoch": 4.65, + "learning_rate": 4.538428981236186e-06, + "loss": 0.1871, + "step": 99675 + }, + { + "epoch": 4.65, + "learning_rate": 4.537645196181399e-06, + "loss": 0.0809, + "step": 99680 + }, + { + "epoch": 4.65, + "learning_rate": 4.536861411126613e-06, + "loss": 0.0047, + "step": 99685 + }, + { + "epoch": 4.65, + "learning_rate": 4.536077626071826e-06, + "loss": 0.0064, + "step": 99690 + }, + { + "epoch": 4.65, + "learning_rate": 4.53529384101704e-06, + "loss": 0.0203, + "step": 99695 + }, + { + "epoch": 4.65, + "learning_rate": 4.534510055962253e-06, + "loss": 0.0274, + "step": 99700 + }, + { + "epoch": 4.65, + "learning_rate": 4.533726270907467e-06, + "loss": 0.056, + "step": 99705 + }, + { + "epoch": 4.65, + "learning_rate": 4.53294248585268e-06, + "loss": 0.0571, + "step": 99710 + }, + { + "epoch": 4.65, + "learning_rate": 4.532158700797893e-06, + "loss": 0.0781, + "step": 99715 + }, + { + "epoch": 4.65, + "learning_rate": 4.531374915743107e-06, + "loss": 0.2368, + "step": 99720 + }, + { + "epoch": 4.65, + "learning_rate": 4.53059113068832e-06, + "loss": 0.2704, + "step": 99725 + }, + { + "epoch": 4.65, + "learning_rate": 4.529807345633534e-06, + "loss": 0.0933, + "step": 99730 + }, + { + "epoch": 4.65, + "learning_rate": 4.529023560578747e-06, + "loss": 0.0763, + "step": 99735 + }, + { + "epoch": 4.65, + "learning_rate": 4.52823977552396e-06, + "loss": 0.0332, + "step": 99740 + }, + { + "epoch": 4.65, + "learning_rate": 4.527455990469174e-06, + "loss": 0.0182, + "step": 99745 + }, + { + "epoch": 4.65, + "learning_rate": 4.526672205414387e-06, + "loss": 0.0203, + "step": 99750 + }, + { + "epoch": 4.65, + "learning_rate": 4.525888420359601e-06, + "loss": 0.0483, + "step": 99755 + }, + { + "epoch": 4.65, + "learning_rate": 4.525104635304814e-06, + "loss": 0.0245, + "step": 99760 + }, + { + "epoch": 4.66, + "learning_rate": 4.524320850250028e-06, + "loss": 0.1421, + "step": 99765 + }, + { + "epoch": 4.66, + "learning_rate": 4.523537065195241e-06, + "loss": 0.1428, + "step": 99770 + }, + { + "epoch": 4.66, + "learning_rate": 4.522753280140454e-06, + "loss": 0.3552, + "step": 99775 + }, + { + "epoch": 4.66, + "learning_rate": 4.521969495085668e-06, + "loss": 0.0537, + "step": 99780 + }, + { + "epoch": 4.66, + "learning_rate": 4.521185710030881e-06, + "loss": 0.0148, + "step": 99785 + }, + { + "epoch": 4.66, + "learning_rate": 4.520401924976095e-06, + "loss": 0.0154, + "step": 99790 + }, + { + "epoch": 4.66, + "learning_rate": 4.519618139921308e-06, + "loss": 0.028, + "step": 99795 + }, + { + "epoch": 4.66, + "learning_rate": 4.518834354866522e-06, + "loss": 0.0268, + "step": 99800 + }, + { + "epoch": 4.66, + "learning_rate": 4.518050569811735e-06, + "loss": 0.0685, + "step": 99805 + }, + { + "epoch": 4.66, + "learning_rate": 4.517266784756948e-06, + "loss": 0.1018, + "step": 99810 + }, + { + "epoch": 4.66, + "learning_rate": 4.516482999702162e-06, + "loss": 0.1029, + "step": 99815 + }, + { + "epoch": 4.66, + "learning_rate": 4.515699214647375e-06, + "loss": 0.1975, + "step": 99820 + }, + { + "epoch": 4.66, + "learning_rate": 4.514915429592589e-06, + "loss": 0.3415, + "step": 99825 + }, + { + "epoch": 4.66, + "learning_rate": 4.514131644537802e-06, + "loss": 0.0967, + "step": 99830 + }, + { + "epoch": 4.66, + "learning_rate": 4.513347859483016e-06, + "loss": 0.0452, + "step": 99835 + }, + { + "epoch": 4.66, + "learning_rate": 4.512564074428229e-06, + "loss": 0.0264, + "step": 99840 + }, + { + "epoch": 4.66, + "learning_rate": 4.511780289373442e-06, + "loss": 0.0325, + "step": 99845 + }, + { + "epoch": 4.66, + "learning_rate": 4.510996504318656e-06, + "loss": 0.079, + "step": 99850 + }, + { + "epoch": 4.66, + "learning_rate": 4.510212719263869e-06, + "loss": 0.0827, + "step": 99855 + }, + { + "epoch": 4.66, + "learning_rate": 4.509428934209083e-06, + "loss": 0.1163, + "step": 99860 + }, + { + "epoch": 4.66, + "learning_rate": 4.508645149154297e-06, + "loss": 0.0894, + "step": 99865 + }, + { + "epoch": 4.66, + "learning_rate": 4.507861364099509e-06, + "loss": 0.1934, + "step": 99870 + }, + { + "epoch": 4.66, + "learning_rate": 4.507077579044723e-06, + "loss": 0.4402, + "step": 99875 + }, + { + "epoch": 4.66, + "learning_rate": 4.506293793989936e-06, + "loss": 0.0838, + "step": 99880 + }, + { + "epoch": 4.66, + "learning_rate": 4.50551000893515e-06, + "loss": 0.0098, + "step": 99885 + }, + { + "epoch": 4.66, + "learning_rate": 4.504726223880364e-06, + "loss": 0.0539, + "step": 99890 + }, + { + "epoch": 4.66, + "learning_rate": 4.503942438825577e-06, + "loss": 0.0893, + "step": 99895 + }, + { + "epoch": 4.66, + "learning_rate": 4.503158653770791e-06, + "loss": 0.0316, + "step": 99900 + }, + { + "epoch": 4.66, + "learning_rate": 4.502374868716004e-06, + "loss": 0.0923, + "step": 99905 + }, + { + "epoch": 4.66, + "learning_rate": 4.501591083661217e-06, + "loss": 0.1255, + "step": 99910 + }, + { + "epoch": 4.66, + "learning_rate": 4.500807298606431e-06, + "loss": 0.065, + "step": 99915 + }, + { + "epoch": 4.66, + "learning_rate": 4.500023513551644e-06, + "loss": 0.1271, + "step": 99920 + }, + { + "epoch": 4.66, + "learning_rate": 4.499239728496858e-06, + "loss": 0.3634, + "step": 99925 + }, + { + "epoch": 4.66, + "learning_rate": 4.498455943442071e-06, + "loss": 0.1126, + "step": 99930 + }, + { + "epoch": 4.66, + "learning_rate": 4.497672158387284e-06, + "loss": 0.0364, + "step": 99935 + }, + { + "epoch": 4.66, + "learning_rate": 4.496888373332498e-06, + "loss": 0.018, + "step": 99940 + }, + { + "epoch": 4.66, + "learning_rate": 4.496104588277711e-06, + "loss": 0.016, + "step": 99945 + }, + { + "epoch": 4.66, + "learning_rate": 4.495320803222925e-06, + "loss": 0.0291, + "step": 99950 + }, + { + "epoch": 4.66, + "learning_rate": 4.494537018168138e-06, + "loss": 0.0379, + "step": 99955 + }, + { + "epoch": 4.66, + "learning_rate": 4.493753233113352e-06, + "loss": 0.0958, + "step": 99960 + }, + { + "epoch": 4.66, + "learning_rate": 4.492969448058565e-06, + "loss": 0.0772, + "step": 99965 + }, + { + "epoch": 4.66, + "learning_rate": 4.492185663003778e-06, + "loss": 0.1524, + "step": 99970 + }, + { + "epoch": 4.66, + "learning_rate": 4.491401877948992e-06, + "loss": 0.1936, + "step": 99975 + }, + { + "epoch": 4.67, + "learning_rate": 4.490618092894205e-06, + "loss": 0.0644, + "step": 99980 + }, + { + "epoch": 4.67, + "learning_rate": 4.489834307839419e-06, + "loss": 0.0495, + "step": 99985 + }, + { + "epoch": 4.67, + "learning_rate": 4.489050522784632e-06, + "loss": 0.0511, + "step": 99990 + }, + { + "epoch": 4.67, + "learning_rate": 4.488266737729846e-06, + "loss": 0.043, + "step": 99995 + }, + { + "epoch": 4.67, + "learning_rate": 4.487482952675059e-06, + "loss": 0.0653, + "step": 100000 + }, + { + "epoch": 4.67, + "learning_rate": 4.486699167620272e-06, + "loss": 0.1064, + "step": 100005 + }, + { + "epoch": 4.67, + "learning_rate": 4.485915382565486e-06, + "loss": 0.1228, + "step": 100010 + }, + { + "epoch": 4.67, + "learning_rate": 4.485131597510699e-06, + "loss": 0.0842, + "step": 100015 + }, + { + "epoch": 4.67, + "learning_rate": 4.484347812455913e-06, + "loss": 0.163, + "step": 100020 + }, + { + "epoch": 4.67, + "learning_rate": 4.483564027401126e-06, + "loss": 0.2458, + "step": 100025 + }, + { + "epoch": 4.67, + "learning_rate": 4.48278024234634e-06, + "loss": 0.0753, + "step": 100030 + }, + { + "epoch": 4.67, + "learning_rate": 4.481996457291553e-06, + "loss": 0.0374, + "step": 100035 + }, + { + "epoch": 4.67, + "learning_rate": 4.481212672236766e-06, + "loss": 0.0364, + "step": 100040 + }, + { + "epoch": 4.67, + "learning_rate": 4.48042888718198e-06, + "loss": 0.0259, + "step": 100045 + }, + { + "epoch": 4.67, + "learning_rate": 4.479645102127193e-06, + "loss": 0.0265, + "step": 100050 + }, + { + "epoch": 4.67, + "learning_rate": 4.478861317072407e-06, + "loss": 0.0263, + "step": 100055 + }, + { + "epoch": 4.67, + "learning_rate": 4.47807753201762e-06, + "loss": 0.085, + "step": 100060 + }, + { + "epoch": 4.67, + "learning_rate": 4.477293746962833e-06, + "loss": 0.1516, + "step": 100065 + }, + { + "epoch": 4.67, + "learning_rate": 4.476509961908047e-06, + "loss": 0.1031, + "step": 100070 + }, + { + "epoch": 4.67, + "learning_rate": 4.47572617685326e-06, + "loss": 0.2501, + "step": 100075 + }, + { + "epoch": 4.67, + "learning_rate": 4.474942391798474e-06, + "loss": 0.0514, + "step": 100080 + }, + { + "epoch": 4.67, + "learning_rate": 4.474158606743687e-06, + "loss": 0.0301, + "step": 100085 + }, + { + "epoch": 4.67, + "learning_rate": 4.473374821688901e-06, + "loss": 0.0145, + "step": 100090 + }, + { + "epoch": 4.67, + "learning_rate": 4.472591036634114e-06, + "loss": 0.0621, + "step": 100095 + }, + { + "epoch": 4.67, + "learning_rate": 4.471807251579327e-06, + "loss": 0.0611, + "step": 100100 + }, + { + "epoch": 4.67, + "learning_rate": 4.471023466524541e-06, + "loss": 0.1244, + "step": 100105 + }, + { + "epoch": 4.67, + "learning_rate": 4.470239681469754e-06, + "loss": 0.066, + "step": 100110 + }, + { + "epoch": 4.67, + "learning_rate": 4.469455896414968e-06, + "loss": 0.1101, + "step": 100115 + }, + { + "epoch": 4.67, + "learning_rate": 4.468672111360181e-06, + "loss": 0.1427, + "step": 100120 + }, + { + "epoch": 4.67, + "learning_rate": 4.467888326305395e-06, + "loss": 0.4447, + "step": 100125 + }, + { + "epoch": 4.67, + "learning_rate": 4.467104541250608e-06, + "loss": 0.0871, + "step": 100130 + }, + { + "epoch": 4.67, + "learning_rate": 4.466320756195821e-06, + "loss": 0.0325, + "step": 100135 + }, + { + "epoch": 4.67, + "learning_rate": 4.465536971141035e-06, + "loss": 0.0313, + "step": 100140 + }, + { + "epoch": 4.67, + "learning_rate": 4.464753186086248e-06, + "loss": 0.1006, + "step": 100145 + }, + { + "epoch": 4.67, + "learning_rate": 4.463969401031462e-06, + "loss": 0.0335, + "step": 100150 + }, + { + "epoch": 4.67, + "learning_rate": 4.4631856159766755e-06, + "loss": 0.0333, + "step": 100155 + }, + { + "epoch": 4.67, + "learning_rate": 4.4624018309218885e-06, + "loss": 0.1575, + "step": 100160 + }, + { + "epoch": 4.67, + "learning_rate": 4.461618045867102e-06, + "loss": 0.1068, + "step": 100165 + }, + { + "epoch": 4.67, + "learning_rate": 4.460834260812315e-06, + "loss": 0.1535, + "step": 100170 + }, + { + "epoch": 4.67, + "learning_rate": 4.4600504757575286e-06, + "loss": 0.3579, + "step": 100175 + }, + { + "epoch": 4.67, + "learning_rate": 4.4592666907027425e-06, + "loss": 0.0549, + "step": 100180 + }, + { + "epoch": 4.67, + "learning_rate": 4.4584829056479555e-06, + "loss": 0.029, + "step": 100185 + }, + { + "epoch": 4.68, + "learning_rate": 4.4576991205931694e-06, + "loss": 0.0116, + "step": 100190 + }, + { + "epoch": 4.68, + "learning_rate": 4.4569153355383825e-06, + "loss": 0.0231, + "step": 100195 + }, + { + "epoch": 4.68, + "learning_rate": 4.4561315504835956e-06, + "loss": 0.0429, + "step": 100200 + }, + { + "epoch": 4.68, + "learning_rate": 4.4553477654288095e-06, + "loss": 0.05, + "step": 100205 + }, + { + "epoch": 4.68, + "learning_rate": 4.4545639803740225e-06, + "loss": 0.1529, + "step": 100210 + }, + { + "epoch": 4.68, + "learning_rate": 4.453780195319236e-06, + "loss": 0.1274, + "step": 100215 + }, + { + "epoch": 4.68, + "learning_rate": 4.4529964102644495e-06, + "loss": 0.1877, + "step": 100220 + }, + { + "epoch": 4.68, + "learning_rate": 4.452212625209663e-06, + "loss": 0.4301, + "step": 100225 + }, + { + "epoch": 4.68, + "learning_rate": 4.4514288401548764e-06, + "loss": 0.0988, + "step": 100230 + }, + { + "epoch": 4.68, + "learning_rate": 4.4506450551000895e-06, + "loss": 0.0515, + "step": 100235 + }, + { + "epoch": 4.68, + "learning_rate": 4.449861270045303e-06, + "loss": 0.0358, + "step": 100240 + }, + { + "epoch": 4.68, + "learning_rate": 4.4490774849905165e-06, + "loss": 0.0721, + "step": 100245 + }, + { + "epoch": 4.68, + "learning_rate": 4.44829369993573e-06, + "loss": 0.0414, + "step": 100250 + }, + { + "epoch": 4.68, + "learning_rate": 4.4475099148809434e-06, + "loss": 0.0563, + "step": 100255 + }, + { + "epoch": 4.68, + "learning_rate": 4.4467261298261565e-06, + "loss": 0.1743, + "step": 100260 + }, + { + "epoch": 4.68, + "learning_rate": 4.44594234477137e-06, + "loss": 0.0877, + "step": 100265 + }, + { + "epoch": 4.68, + "learning_rate": 4.4451585597165835e-06, + "loss": 0.1266, + "step": 100270 + }, + { + "epoch": 4.68, + "learning_rate": 4.444374774661797e-06, + "loss": 0.287, + "step": 100275 + }, + { + "epoch": 4.68, + "learning_rate": 4.44359098960701e-06, + "loss": 0.1152, + "step": 100280 + }, + { + "epoch": 4.68, + "learning_rate": 4.442807204552224e-06, + "loss": 0.0285, + "step": 100285 + }, + { + "epoch": 4.68, + "learning_rate": 4.442023419497437e-06, + "loss": 0.0665, + "step": 100290 + }, + { + "epoch": 4.68, + "learning_rate": 4.4412396344426504e-06, + "loss": 0.0313, + "step": 100295 + }, + { + "epoch": 4.68, + "learning_rate": 4.440455849387864e-06, + "loss": 0.052, + "step": 100300 + }, + { + "epoch": 4.68, + "learning_rate": 4.439672064333077e-06, + "loss": 0.0535, + "step": 100305 + }, + { + "epoch": 4.68, + "learning_rate": 4.438888279278291e-06, + "loss": 0.0936, + "step": 100310 + }, + { + "epoch": 4.68, + "learning_rate": 4.438104494223504e-06, + "loss": 0.0685, + "step": 100315 + }, + { + "epoch": 4.68, + "learning_rate": 4.437320709168718e-06, + "loss": 0.2202, + "step": 100320 + }, + { + "epoch": 4.68, + "learning_rate": 4.436536924113931e-06, + "loss": 0.1813, + "step": 100325 + }, + { + "epoch": 4.68, + "learning_rate": 4.435753139059144e-06, + "loss": 0.0361, + "step": 100330 + }, + { + "epoch": 4.68, + "learning_rate": 4.434969354004358e-06, + "loss": 0.0256, + "step": 100335 + }, + { + "epoch": 4.68, + "learning_rate": 4.434185568949571e-06, + "loss": 0.0306, + "step": 100340 + }, + { + "epoch": 4.68, + "learning_rate": 4.433401783894785e-06, + "loss": 0.0158, + "step": 100345 + }, + { + "epoch": 4.68, + "learning_rate": 4.432617998839998e-06, + "loss": 0.0229, + "step": 100350 + }, + { + "epoch": 4.68, + "learning_rate": 4.431834213785212e-06, + "loss": 0.081, + "step": 100355 + }, + { + "epoch": 4.68, + "learning_rate": 4.431050428730425e-06, + "loss": 0.1585, + "step": 100360 + }, + { + "epoch": 4.68, + "learning_rate": 4.430266643675638e-06, + "loss": 0.1321, + "step": 100365 + }, + { + "epoch": 4.68, + "learning_rate": 4.429482858620852e-06, + "loss": 0.1093, + "step": 100370 + }, + { + "epoch": 4.68, + "learning_rate": 4.428699073566065e-06, + "loss": 0.1172, + "step": 100375 + }, + { + "epoch": 4.68, + "learning_rate": 4.427915288511279e-06, + "loss": 0.0643, + "step": 100380 + }, + { + "epoch": 4.68, + "learning_rate": 4.427131503456492e-06, + "loss": 0.0622, + "step": 100385 + }, + { + "epoch": 4.68, + "learning_rate": 4.426347718401705e-06, + "loss": 0.0311, + "step": 100390 + }, + { + "epoch": 4.68, + "learning_rate": 4.425563933346919e-06, + "loss": 0.0394, + "step": 100395 + }, + { + "epoch": 4.68, + "learning_rate": 4.424780148292132e-06, + "loss": 0.0623, + "step": 100400 + }, + { + "epoch": 4.69, + "learning_rate": 4.423996363237346e-06, + "loss": 0.0637, + "step": 100405 + }, + { + "epoch": 4.69, + "learning_rate": 4.423212578182559e-06, + "loss": 0.0639, + "step": 100410 + }, + { + "epoch": 4.69, + "learning_rate": 4.422428793127773e-06, + "loss": 0.145, + "step": 100415 + }, + { + "epoch": 4.69, + "learning_rate": 4.421645008072987e-06, + "loss": 0.2184, + "step": 100420 + }, + { + "epoch": 4.69, + "learning_rate": 4.420861223018199e-06, + "loss": 0.3487, + "step": 100425 + }, + { + "epoch": 4.69, + "learning_rate": 4.420077437963413e-06, + "loss": 0.0608, + "step": 100430 + }, + { + "epoch": 4.69, + "learning_rate": 4.419293652908626e-06, + "loss": 0.0113, + "step": 100435 + }, + { + "epoch": 4.69, + "learning_rate": 4.41850986785384e-06, + "loss": 0.0447, + "step": 100440 + }, + { + "epoch": 4.69, + "learning_rate": 4.417726082799054e-06, + "loss": 0.0929, + "step": 100445 + }, + { + "epoch": 4.69, + "learning_rate": 4.416942297744267e-06, + "loss": 0.0528, + "step": 100450 + }, + { + "epoch": 4.69, + "learning_rate": 4.41615851268948e-06, + "loss": 0.0348, + "step": 100455 + }, + { + "epoch": 4.69, + "learning_rate": 4.415374727634693e-06, + "loss": 0.0728, + "step": 100460 + }, + { + "epoch": 4.69, + "learning_rate": 4.414590942579907e-06, + "loss": 0.0971, + "step": 100465 + }, + { + "epoch": 4.69, + "learning_rate": 4.413807157525121e-06, + "loss": 0.1023, + "step": 100470 + }, + { + "epoch": 4.69, + "learning_rate": 4.413023372470334e-06, + "loss": 0.3496, + "step": 100475 + }, + { + "epoch": 4.69, + "learning_rate": 4.412239587415548e-06, + "loss": 0.0878, + "step": 100480 + }, + { + "epoch": 4.69, + "learning_rate": 4.411455802360761e-06, + "loss": 0.0398, + "step": 100485 + }, + { + "epoch": 4.69, + "learning_rate": 4.410672017305974e-06, + "loss": 0.053, + "step": 100490 + }, + { + "epoch": 4.69, + "learning_rate": 4.409888232251188e-06, + "loss": 0.0338, + "step": 100495 + }, + { + "epoch": 4.69, + "learning_rate": 4.409104447196401e-06, + "loss": 0.0303, + "step": 100500 + }, + { + "epoch": 4.69, + "learning_rate": 4.408320662141615e-06, + "loss": 0.0656, + "step": 100505 + }, + { + "epoch": 4.69, + "learning_rate": 4.407536877086828e-06, + "loss": 0.046, + "step": 100510 + }, + { + "epoch": 4.69, + "learning_rate": 4.406753092032042e-06, + "loss": 0.0531, + "step": 100515 + }, + { + "epoch": 4.69, + "learning_rate": 4.405969306977255e-06, + "loss": 0.2252, + "step": 100520 + }, + { + "epoch": 4.69, + "learning_rate": 4.405185521922468e-06, + "loss": 0.2124, + "step": 100525 + }, + { + "epoch": 4.69, + "learning_rate": 4.404401736867682e-06, + "loss": 0.0876, + "step": 100530 + }, + { + "epoch": 4.69, + "learning_rate": 4.403617951812895e-06, + "loss": 0.0197, + "step": 100535 + }, + { + "epoch": 4.69, + "learning_rate": 4.402834166758109e-06, + "loss": 0.0438, + "step": 100540 + }, + { + "epoch": 4.69, + "learning_rate": 4.402050381703322e-06, + "loss": 0.0313, + "step": 100545 + }, + { + "epoch": 4.69, + "learning_rate": 4.401266596648536e-06, + "loss": 0.0481, + "step": 100550 + }, + { + "epoch": 4.69, + "learning_rate": 4.400482811593749e-06, + "loss": 0.0594, + "step": 100555 + }, + { + "epoch": 4.69, + "learning_rate": 4.399699026538962e-06, + "loss": 0.1185, + "step": 100560 + }, + { + "epoch": 4.69, + "learning_rate": 4.398915241484176e-06, + "loss": 0.129, + "step": 100565 + }, + { + "epoch": 4.69, + "learning_rate": 4.398131456429389e-06, + "loss": 0.1099, + "step": 100570 + }, + { + "epoch": 4.69, + "learning_rate": 4.397347671374603e-06, + "loss": 0.3222, + "step": 100575 + }, + { + "epoch": 4.69, + "learning_rate": 4.396563886319816e-06, + "loss": 0.0973, + "step": 100580 + }, + { + "epoch": 4.69, + "learning_rate": 4.395780101265029e-06, + "loss": 0.0292, + "step": 100585 + }, + { + "epoch": 4.69, + "learning_rate": 4.394996316210243e-06, + "loss": 0.0147, + "step": 100590 + }, + { + "epoch": 4.69, + "learning_rate": 4.394212531155456e-06, + "loss": 0.0115, + "step": 100595 + }, + { + "epoch": 4.69, + "learning_rate": 4.39342874610067e-06, + "loss": 0.0616, + "step": 100600 + }, + { + "epoch": 4.69, + "learning_rate": 4.392644961045883e-06, + "loss": 0.0233, + "step": 100605 + }, + { + "epoch": 4.69, + "learning_rate": 4.391861175991097e-06, + "loss": 0.105, + "step": 100610 + }, + { + "epoch": 4.69, + "learning_rate": 4.39107739093631e-06, + "loss": 0.1102, + "step": 100615 + }, + { + "epoch": 4.7, + "learning_rate": 4.390293605881523e-06, + "loss": 0.1404, + "step": 100620 + }, + { + "epoch": 4.7, + "learning_rate": 4.389509820826737e-06, + "loss": 0.2775, + "step": 100625 + }, + { + "epoch": 4.7, + "learning_rate": 4.38872603577195e-06, + "loss": 0.0757, + "step": 100630 + }, + { + "epoch": 4.7, + "learning_rate": 4.387942250717164e-06, + "loss": 0.0186, + "step": 100635 + }, + { + "epoch": 4.7, + "learning_rate": 4.387158465662377e-06, + "loss": 0.0139, + "step": 100640 + }, + { + "epoch": 4.7, + "learning_rate": 4.386374680607591e-06, + "loss": 0.0193, + "step": 100645 + }, + { + "epoch": 4.7, + "learning_rate": 4.385590895552804e-06, + "loss": 0.0621, + "step": 100650 + }, + { + "epoch": 4.7, + "learning_rate": 4.384807110498017e-06, + "loss": 0.1055, + "step": 100655 + }, + { + "epoch": 4.7, + "learning_rate": 4.384023325443231e-06, + "loss": 0.0997, + "step": 100660 + }, + { + "epoch": 4.7, + "learning_rate": 4.383239540388444e-06, + "loss": 0.0955, + "step": 100665 + }, + { + "epoch": 4.7, + "learning_rate": 4.382455755333658e-06, + "loss": 0.0834, + "step": 100670 + }, + { + "epoch": 4.7, + "learning_rate": 4.381671970278871e-06, + "loss": 0.1876, + "step": 100675 + }, + { + "epoch": 4.7, + "learning_rate": 4.380888185224085e-06, + "loss": 0.1019, + "step": 100680 + }, + { + "epoch": 4.7, + "learning_rate": 4.380104400169298e-06, + "loss": 0.0129, + "step": 100685 + }, + { + "epoch": 4.7, + "learning_rate": 4.379320615114511e-06, + "loss": 0.0639, + "step": 100690 + }, + { + "epoch": 4.7, + "learning_rate": 4.378536830059725e-06, + "loss": 0.036, + "step": 100695 + }, + { + "epoch": 4.7, + "learning_rate": 4.377753045004938e-06, + "loss": 0.0129, + "step": 100700 + }, + { + "epoch": 4.7, + "learning_rate": 4.376969259950152e-06, + "loss": 0.0503, + "step": 100705 + }, + { + "epoch": 4.7, + "learning_rate": 4.376185474895366e-06, + "loss": 0.1166, + "step": 100710 + }, + { + "epoch": 4.7, + "learning_rate": 4.375401689840578e-06, + "loss": 0.0856, + "step": 100715 + }, + { + "epoch": 4.7, + "learning_rate": 4.374617904785792e-06, + "loss": 0.1412, + "step": 100720 + }, + { + "epoch": 4.7, + "learning_rate": 4.373834119731005e-06, + "loss": 0.2159, + "step": 100725 + }, + { + "epoch": 4.7, + "learning_rate": 4.373050334676219e-06, + "loss": 0.1446, + "step": 100730 + }, + { + "epoch": 4.7, + "learning_rate": 4.372266549621433e-06, + "loss": 0.0103, + "step": 100735 + }, + { + "epoch": 4.7, + "learning_rate": 4.371482764566646e-06, + "loss": 0.0469, + "step": 100740 + }, + { + "epoch": 4.7, + "learning_rate": 4.37069897951186e-06, + "loss": 0.0531, + "step": 100745 + }, + { + "epoch": 4.7, + "learning_rate": 4.369915194457072e-06, + "loss": 0.0338, + "step": 100750 + }, + { + "epoch": 4.7, + "learning_rate": 4.369131409402286e-06, + "loss": 0.0647, + "step": 100755 + }, + { + "epoch": 4.7, + "learning_rate": 4.3683476243475e-06, + "loss": 0.0474, + "step": 100760 + }, + { + "epoch": 4.7, + "learning_rate": 4.367563839292713e-06, + "loss": 0.135, + "step": 100765 + }, + { + "epoch": 4.7, + "learning_rate": 4.366780054237927e-06, + "loss": 0.1614, + "step": 100770 + }, + { + "epoch": 4.7, + "learning_rate": 4.36599626918314e-06, + "loss": 0.4187, + "step": 100775 + }, + { + "epoch": 4.7, + "learning_rate": 4.365212484128353e-06, + "loss": 0.0599, + "step": 100780 + }, + { + "epoch": 4.7, + "learning_rate": 4.364428699073567e-06, + "loss": 0.0413, + "step": 100785 + }, + { + "epoch": 4.7, + "learning_rate": 4.36364491401878e-06, + "loss": 0.0776, + "step": 100790 + }, + { + "epoch": 4.7, + "learning_rate": 4.362861128963994e-06, + "loss": 0.1015, + "step": 100795 + }, + { + "epoch": 4.7, + "learning_rate": 4.362077343909207e-06, + "loss": 0.0761, + "step": 100800 + }, + { + "epoch": 4.7, + "learning_rate": 4.3612935588544206e-06, + "loss": 0.0833, + "step": 100805 + }, + { + "epoch": 4.7, + "learning_rate": 4.360509773799634e-06, + "loss": 0.0517, + "step": 100810 + }, + { + "epoch": 4.7, + "learning_rate": 4.359725988744847e-06, + "loss": 0.1294, + "step": 100815 + }, + { + "epoch": 4.7, + "learning_rate": 4.3589422036900606e-06, + "loss": 0.1531, + "step": 100820 + }, + { + "epoch": 4.7, + "learning_rate": 4.358158418635274e-06, + "loss": 0.2448, + "step": 100825 + }, + { + "epoch": 4.7, + "learning_rate": 4.3573746335804875e-06, + "loss": 0.071, + "step": 100830 + }, + { + "epoch": 4.71, + "learning_rate": 4.356590848525701e-06, + "loss": 0.0241, + "step": 100835 + }, + { + "epoch": 4.71, + "learning_rate": 4.3558070634709145e-06, + "loss": 0.0162, + "step": 100840 + }, + { + "epoch": 4.71, + "learning_rate": 4.3550232784161276e-06, + "loss": 0.0181, + "step": 100845 + }, + { + "epoch": 4.71, + "learning_rate": 4.354239493361341e-06, + "loss": 0.0426, + "step": 100850 + }, + { + "epoch": 4.71, + "learning_rate": 4.3534557083065545e-06, + "loss": 0.1073, + "step": 100855 + }, + { + "epoch": 4.71, + "learning_rate": 4.352671923251768e-06, + "loss": 0.1044, + "step": 100860 + }, + { + "epoch": 4.71, + "learning_rate": 4.3518881381969815e-06, + "loss": 0.1284, + "step": 100865 + }, + { + "epoch": 4.71, + "learning_rate": 4.3511043531421946e-06, + "loss": 0.2058, + "step": 100870 + }, + { + "epoch": 4.71, + "learning_rate": 4.3503205680874085e-06, + "loss": 0.3124, + "step": 100875 + }, + { + "epoch": 4.71, + "learning_rate": 4.3495367830326215e-06, + "loss": 0.1156, + "step": 100880 + }, + { + "epoch": 4.71, + "learning_rate": 4.3487529979778346e-06, + "loss": 0.0418, + "step": 100885 + }, + { + "epoch": 4.71, + "learning_rate": 4.3479692129230485e-06, + "loss": 0.0096, + "step": 100890 + }, + { + "epoch": 4.71, + "learning_rate": 4.3471854278682615e-06, + "loss": 0.036, + "step": 100895 + }, + { + "epoch": 4.71, + "learning_rate": 4.3464016428134754e-06, + "loss": 0.0427, + "step": 100900 + }, + { + "epoch": 4.71, + "learning_rate": 4.3456178577586885e-06, + "loss": 0.0462, + "step": 100905 + }, + { + "epoch": 4.71, + "learning_rate": 4.3448340727039016e-06, + "loss": 0.0838, + "step": 100910 + }, + { + "epoch": 4.71, + "learning_rate": 4.3440502876491155e-06, + "loss": 0.0962, + "step": 100915 + }, + { + "epoch": 4.71, + "learning_rate": 4.3432665025943285e-06, + "loss": 0.1006, + "step": 100920 + }, + { + "epoch": 4.71, + "learning_rate": 4.3424827175395424e-06, + "loss": 0.2515, + "step": 100925 + }, + { + "epoch": 4.71, + "learning_rate": 4.3416989324847555e-06, + "loss": 0.085, + "step": 100930 + }, + { + "epoch": 4.71, + "learning_rate": 4.340915147429969e-06, + "loss": 0.0464, + "step": 100935 + }, + { + "epoch": 4.71, + "learning_rate": 4.3401313623751825e-06, + "loss": 0.0113, + "step": 100940 + }, + { + "epoch": 4.71, + "learning_rate": 4.3393475773203955e-06, + "loss": 0.0565, + "step": 100945 + }, + { + "epoch": 4.71, + "learning_rate": 4.338563792265609e-06, + "loss": 0.0574, + "step": 100950 + }, + { + "epoch": 4.71, + "learning_rate": 4.3377800072108225e-06, + "loss": 0.0767, + "step": 100955 + }, + { + "epoch": 4.71, + "learning_rate": 4.336996222156036e-06, + "loss": 0.148, + "step": 100960 + }, + { + "epoch": 4.71, + "learning_rate": 4.3362124371012494e-06, + "loss": 0.0429, + "step": 100965 + }, + { + "epoch": 4.71, + "learning_rate": 4.335428652046463e-06, + "loss": 0.1585, + "step": 100970 + }, + { + "epoch": 4.71, + "learning_rate": 4.334644866991676e-06, + "loss": 0.3714, + "step": 100975 + }, + { + "epoch": 4.71, + "learning_rate": 4.3338610819368895e-06, + "loss": 0.1246, + "step": 100980 + }, + { + "epoch": 4.71, + "learning_rate": 4.333077296882103e-06, + "loss": 0.0045, + "step": 100985 + }, + { + "epoch": 4.71, + "learning_rate": 4.3322935118273164e-06, + "loss": 0.0094, + "step": 100990 + }, + { + "epoch": 4.71, + "learning_rate": 4.33150972677253e-06, + "loss": 0.1085, + "step": 100995 + }, + { + "epoch": 4.71, + "learning_rate": 4.330725941717744e-06, + "loss": 0.0222, + "step": 101000 + }, + { + "epoch": 4.71, + "learning_rate": 4.329942156662957e-06, + "loss": 0.0283, + "step": 101005 + }, + { + "epoch": 4.71, + "learning_rate": 4.32915837160817e-06, + "loss": 0.0531, + "step": 101010 + }, + { + "epoch": 4.71, + "learning_rate": 4.328374586553383e-06, + "loss": 0.1308, + "step": 101015 + }, + { + "epoch": 4.71, + "learning_rate": 4.327590801498597e-06, + "loss": 0.122, + "step": 101020 + }, + { + "epoch": 4.71, + "learning_rate": 4.326807016443811e-06, + "loss": 0.2022, + "step": 101025 + }, + { + "epoch": 4.71, + "learning_rate": 4.326023231389024e-06, + "loss": 0.0853, + "step": 101030 + }, + { + "epoch": 4.71, + "learning_rate": 4.325239446334238e-06, + "loss": 0.0304, + "step": 101035 + }, + { + "epoch": 4.71, + "learning_rate": 4.32445566127945e-06, + "loss": 0.0448, + "step": 101040 + }, + { + "epoch": 4.71, + "learning_rate": 4.323671876224664e-06, + "loss": 0.0421, + "step": 101045 + }, + { + "epoch": 4.72, + "learning_rate": 4.322888091169878e-06, + "loss": 0.0299, + "step": 101050 + }, + { + "epoch": 4.72, + "learning_rate": 4.322104306115091e-06, + "loss": 0.1093, + "step": 101055 + }, + { + "epoch": 4.72, + "learning_rate": 4.321320521060305e-06, + "loss": 0.1019, + "step": 101060 + }, + { + "epoch": 4.72, + "learning_rate": 4.320536736005518e-06, + "loss": 0.1684, + "step": 101065 + }, + { + "epoch": 4.72, + "learning_rate": 4.319752950950732e-06, + "loss": 0.1416, + "step": 101070 + }, + { + "epoch": 4.72, + "learning_rate": 4.318969165895945e-06, + "loss": 0.1617, + "step": 101075 + }, + { + "epoch": 4.72, + "learning_rate": 4.318185380841158e-06, + "loss": 0.0798, + "step": 101080 + }, + { + "epoch": 4.72, + "learning_rate": 4.317401595786372e-06, + "loss": 0.0269, + "step": 101085 + }, + { + "epoch": 4.72, + "learning_rate": 4.316617810731585e-06, + "loss": 0.0215, + "step": 101090 + }, + { + "epoch": 4.72, + "learning_rate": 4.315834025676799e-06, + "loss": 0.0319, + "step": 101095 + }, + { + "epoch": 4.72, + "learning_rate": 4.315050240622012e-06, + "loss": 0.1137, + "step": 101100 + }, + { + "epoch": 4.72, + "learning_rate": 4.314266455567225e-06, + "loss": 0.0468, + "step": 101105 + }, + { + "epoch": 4.72, + "learning_rate": 4.313482670512439e-06, + "loss": 0.1588, + "step": 101110 + }, + { + "epoch": 4.72, + "learning_rate": 4.312698885457652e-06, + "loss": 0.1657, + "step": 101115 + }, + { + "epoch": 4.72, + "learning_rate": 4.311915100402866e-06, + "loss": 0.1466, + "step": 101120 + }, + { + "epoch": 4.72, + "learning_rate": 4.311131315348079e-06, + "loss": 0.2507, + "step": 101125 + }, + { + "epoch": 4.72, + "learning_rate": 4.310347530293293e-06, + "loss": 0.1321, + "step": 101130 + }, + { + "epoch": 4.72, + "learning_rate": 4.309563745238506e-06, + "loss": 0.0487, + "step": 101135 + }, + { + "epoch": 4.72, + "learning_rate": 4.308779960183719e-06, + "loss": 0.0144, + "step": 101140 + }, + { + "epoch": 4.72, + "learning_rate": 4.307996175128933e-06, + "loss": 0.0329, + "step": 101145 + }, + { + "epoch": 4.72, + "learning_rate": 4.307212390074146e-06, + "loss": 0.0198, + "step": 101150 + }, + { + "epoch": 4.72, + "learning_rate": 4.30642860501936e-06, + "loss": 0.0716, + "step": 101155 + }, + { + "epoch": 4.72, + "learning_rate": 4.305644819964573e-06, + "loss": 0.0512, + "step": 101160 + }, + { + "epoch": 4.72, + "learning_rate": 4.304861034909787e-06, + "loss": 0.1013, + "step": 101165 + }, + { + "epoch": 4.72, + "learning_rate": 4.304077249855e-06, + "loss": 0.1608, + "step": 101170 + }, + { + "epoch": 4.72, + "learning_rate": 4.303293464800213e-06, + "loss": 0.2271, + "step": 101175 + }, + { + "epoch": 4.72, + "learning_rate": 4.302509679745427e-06, + "loss": 0.1106, + "step": 101180 + }, + { + "epoch": 4.72, + "learning_rate": 4.30172589469064e-06, + "loss": 0.0343, + "step": 101185 + }, + { + "epoch": 4.72, + "learning_rate": 4.300942109635854e-06, + "loss": 0.0277, + "step": 101190 + }, + { + "epoch": 4.72, + "learning_rate": 4.300158324581067e-06, + "loss": 0.0313, + "step": 101195 + }, + { + "epoch": 4.72, + "learning_rate": 4.299374539526281e-06, + "loss": 0.0511, + "step": 101200 + }, + { + "epoch": 4.72, + "learning_rate": 4.298590754471494e-06, + "loss": 0.0606, + "step": 101205 + }, + { + "epoch": 4.72, + "learning_rate": 4.297806969416707e-06, + "loss": 0.0386, + "step": 101210 + }, + { + "epoch": 4.72, + "learning_rate": 4.297023184361921e-06, + "loss": 0.0875, + "step": 101215 + }, + { + "epoch": 4.72, + "learning_rate": 4.296239399307134e-06, + "loss": 0.1837, + "step": 101220 + }, + { + "epoch": 4.72, + "learning_rate": 4.295455614252348e-06, + "loss": 0.3151, + "step": 101225 + }, + { + "epoch": 4.72, + "learning_rate": 4.294671829197561e-06, + "loss": 0.1033, + "step": 101230 + }, + { + "epoch": 4.72, + "learning_rate": 4.293888044142774e-06, + "loss": 0.0009, + "step": 101235 + }, + { + "epoch": 4.72, + "learning_rate": 4.293104259087988e-06, + "loss": 0.0203, + "step": 101240 + }, + { + "epoch": 4.72, + "learning_rate": 4.292320474033201e-06, + "loss": 0.0396, + "step": 101245 + }, + { + "epoch": 4.72, + "learning_rate": 4.291536688978415e-06, + "loss": 0.0467, + "step": 101250 + }, + { + "epoch": 4.72, + "learning_rate": 4.290752903923628e-06, + "loss": 0.0414, + "step": 101255 + }, + { + "epoch": 4.72, + "learning_rate": 4.289969118868842e-06, + "loss": 0.0745, + "step": 101260 + }, + { + "epoch": 4.73, + "learning_rate": 4.289185333814056e-06, + "loss": 0.1038, + "step": 101265 + }, + { + "epoch": 4.73, + "learning_rate": 4.288401548759268e-06, + "loss": 0.2039, + "step": 101270 + }, + { + "epoch": 4.73, + "learning_rate": 4.287617763704482e-06, + "loss": 0.2246, + "step": 101275 + }, + { + "epoch": 4.73, + "learning_rate": 4.286833978649695e-06, + "loss": 0.1132, + "step": 101280 + }, + { + "epoch": 4.73, + "learning_rate": 4.286050193594909e-06, + "loss": 0.0273, + "step": 101285 + }, + { + "epoch": 4.73, + "learning_rate": 4.285266408540123e-06, + "loss": 0.0254, + "step": 101290 + }, + { + "epoch": 4.73, + "learning_rate": 4.284482623485336e-06, + "loss": 0.0892, + "step": 101295 + }, + { + "epoch": 4.73, + "learning_rate": 4.283698838430549e-06, + "loss": 0.0356, + "step": 101300 + }, + { + "epoch": 4.73, + "learning_rate": 4.282915053375762e-06, + "loss": 0.0393, + "step": 101305 + }, + { + "epoch": 4.73, + "learning_rate": 4.282131268320976e-06, + "loss": 0.08, + "step": 101310 + }, + { + "epoch": 4.73, + "learning_rate": 4.28134748326619e-06, + "loss": 0.1312, + "step": 101315 + }, + { + "epoch": 4.73, + "learning_rate": 4.280563698211403e-06, + "loss": 0.213, + "step": 101320 + }, + { + "epoch": 4.73, + "learning_rate": 4.279779913156617e-06, + "loss": 0.2586, + "step": 101325 + }, + { + "epoch": 4.73, + "learning_rate": 4.27899612810183e-06, + "loss": 0.0852, + "step": 101330 + }, + { + "epoch": 4.73, + "learning_rate": 4.278212343047043e-06, + "loss": 0.0176, + "step": 101335 + }, + { + "epoch": 4.73, + "learning_rate": 4.277428557992257e-06, + "loss": 0.0589, + "step": 101340 + }, + { + "epoch": 4.73, + "learning_rate": 4.27664477293747e-06, + "loss": 0.0514, + "step": 101345 + }, + { + "epoch": 4.73, + "learning_rate": 4.275860987882684e-06, + "loss": 0.0651, + "step": 101350 + }, + { + "epoch": 4.73, + "learning_rate": 4.275077202827897e-06, + "loss": 0.0596, + "step": 101355 + }, + { + "epoch": 4.73, + "learning_rate": 4.274293417773111e-06, + "loss": 0.1311, + "step": 101360 + }, + { + "epoch": 4.73, + "learning_rate": 4.273509632718324e-06, + "loss": 0.0907, + "step": 101365 + }, + { + "epoch": 4.73, + "learning_rate": 4.272725847663537e-06, + "loss": 0.0822, + "step": 101370 + }, + { + "epoch": 4.73, + "learning_rate": 4.271942062608751e-06, + "loss": 0.3338, + "step": 101375 + }, + { + "epoch": 4.73, + "learning_rate": 4.271158277553964e-06, + "loss": 0.0661, + "step": 101380 + }, + { + "epoch": 4.73, + "learning_rate": 4.270374492499178e-06, + "loss": 0.0185, + "step": 101385 + }, + { + "epoch": 4.73, + "learning_rate": 4.269590707444391e-06, + "loss": 0.0155, + "step": 101390 + }, + { + "epoch": 4.73, + "learning_rate": 4.268806922389605e-06, + "loss": 0.0548, + "step": 101395 + }, + { + "epoch": 4.73, + "learning_rate": 4.268023137334818e-06, + "loss": 0.112, + "step": 101400 + }, + { + "epoch": 4.73, + "learning_rate": 4.267239352280031e-06, + "loss": 0.0483, + "step": 101405 + }, + { + "epoch": 4.73, + "learning_rate": 4.266455567225245e-06, + "loss": 0.0991, + "step": 101410 + }, + { + "epoch": 4.73, + "learning_rate": 4.265671782170458e-06, + "loss": 0.0935, + "step": 101415 + }, + { + "epoch": 4.73, + "learning_rate": 4.264887997115672e-06, + "loss": 0.1664, + "step": 101420 + }, + { + "epoch": 4.73, + "learning_rate": 4.264104212060885e-06, + "loss": 0.3062, + "step": 101425 + }, + { + "epoch": 4.73, + "learning_rate": 4.263320427006098e-06, + "loss": 0.0956, + "step": 101430 + }, + { + "epoch": 4.73, + "learning_rate": 4.262536641951312e-06, + "loss": 0.0355, + "step": 101435 + }, + { + "epoch": 4.73, + "learning_rate": 4.261752856896525e-06, + "loss": 0.0291, + "step": 101440 + }, + { + "epoch": 4.73, + "learning_rate": 4.260969071841739e-06, + "loss": 0.0411, + "step": 101445 + }, + { + "epoch": 4.73, + "learning_rate": 4.260185286786952e-06, + "loss": 0.0863, + "step": 101450 + }, + { + "epoch": 4.73, + "learning_rate": 4.259401501732166e-06, + "loss": 0.128, + "step": 101455 + }, + { + "epoch": 4.73, + "learning_rate": 4.258617716677379e-06, + "loss": 0.0704, + "step": 101460 + }, + { + "epoch": 4.73, + "learning_rate": 4.257833931622592e-06, + "loss": 0.0494, + "step": 101465 + }, + { + "epoch": 4.73, + "learning_rate": 4.257050146567806e-06, + "loss": 0.1606, + "step": 101470 + }, + { + "epoch": 4.73, + "learning_rate": 4.256266361513019e-06, + "loss": 0.4529, + "step": 101475 + }, + { + "epoch": 4.74, + "learning_rate": 4.255482576458233e-06, + "loss": 0.075, + "step": 101480 + }, + { + "epoch": 4.74, + "learning_rate": 4.254698791403446e-06, + "loss": 0.0068, + "step": 101485 + }, + { + "epoch": 4.74, + "learning_rate": 4.2539150063486596e-06, + "loss": 0.3438, + "step": 101490 + }, + { + "epoch": 4.74, + "learning_rate": 4.253131221293873e-06, + "loss": 0.0544, + "step": 101495 + }, + { + "epoch": 4.74, + "learning_rate": 4.252347436239086e-06, + "loss": 0.0778, + "step": 101500 + }, + { + "epoch": 4.74, + "learning_rate": 4.2515636511843e-06, + "loss": 0.0615, + "step": 101505 + }, + { + "epoch": 4.74, + "learning_rate": 4.250779866129513e-06, + "loss": 0.0813, + "step": 101510 + }, + { + "epoch": 4.74, + "learning_rate": 4.2499960810747266e-06, + "loss": 0.0782, + "step": 101515 + }, + { + "epoch": 4.74, + "learning_rate": 4.24921229601994e-06, + "loss": 0.1386, + "step": 101520 + }, + { + "epoch": 4.74, + "learning_rate": 4.2484285109651535e-06, + "loss": 0.1643, + "step": 101525 + }, + { + "epoch": 4.74, + "learning_rate": 4.247644725910367e-06, + "loss": 0.0699, + "step": 101530 + }, + { + "epoch": 4.74, + "learning_rate": 4.24686094085558e-06, + "loss": 0.001, + "step": 101535 + }, + { + "epoch": 4.74, + "learning_rate": 4.2460771558007935e-06, + "loss": 0.0392, + "step": 101540 + }, + { + "epoch": 4.74, + "learning_rate": 4.245293370746007e-06, + "loss": 0.037, + "step": 101545 + }, + { + "epoch": 4.74, + "learning_rate": 4.2445095856912205e-06, + "loss": 0.0647, + "step": 101550 + }, + { + "epoch": 4.74, + "learning_rate": 4.243725800636434e-06, + "loss": 0.1063, + "step": 101555 + }, + { + "epoch": 4.74, + "learning_rate": 4.242942015581647e-06, + "loss": 0.101, + "step": 101560 + }, + { + "epoch": 4.74, + "learning_rate": 4.2421582305268605e-06, + "loss": 0.0777, + "step": 101565 + }, + { + "epoch": 4.74, + "learning_rate": 4.241374445472074e-06, + "loss": 0.2107, + "step": 101570 + }, + { + "epoch": 4.74, + "learning_rate": 4.2405906604172875e-06, + "loss": 0.3052, + "step": 101575 + }, + { + "epoch": 4.74, + "learning_rate": 4.239806875362501e-06, + "loss": 0.1206, + "step": 101580 + }, + { + "epoch": 4.74, + "learning_rate": 4.2390230903077145e-06, + "loss": 0.0267, + "step": 101585 + }, + { + "epoch": 4.74, + "learning_rate": 4.238239305252928e-06, + "loss": 0.0488, + "step": 101590 + }, + { + "epoch": 4.74, + "learning_rate": 4.237455520198141e-06, + "loss": 0.0565, + "step": 101595 + }, + { + "epoch": 4.74, + "learning_rate": 4.2366717351433545e-06, + "loss": 0.078, + "step": 101600 + }, + { + "epoch": 4.74, + "learning_rate": 4.235887950088568e-06, + "loss": 0.0374, + "step": 101605 + }, + { + "epoch": 4.74, + "learning_rate": 4.2351041650337815e-06, + "loss": 0.0958, + "step": 101610 + }, + { + "epoch": 4.74, + "learning_rate": 4.234320379978995e-06, + "loss": 0.1608, + "step": 101615 + }, + { + "epoch": 4.74, + "learning_rate": 4.233536594924208e-06, + "loss": 0.2207, + "step": 101620 + }, + { + "epoch": 4.74, + "learning_rate": 4.2327528098694215e-06, + "loss": 0.3258, + "step": 101625 + }, + { + "epoch": 4.74, + "learning_rate": 4.231969024814635e-06, + "loss": 0.0515, + "step": 101630 + }, + { + "epoch": 4.74, + "learning_rate": 4.2311852397598484e-06, + "loss": 0.0243, + "step": 101635 + }, + { + "epoch": 4.74, + "learning_rate": 4.230401454705062e-06, + "loss": 0.0392, + "step": 101640 + }, + { + "epoch": 4.74, + "learning_rate": 4.229617669650275e-06, + "loss": 0.012, + "step": 101645 + }, + { + "epoch": 4.74, + "learning_rate": 4.228833884595489e-06, + "loss": 0.0937, + "step": 101650 + }, + { + "epoch": 4.74, + "learning_rate": 4.228050099540702e-06, + "loss": 0.0467, + "step": 101655 + }, + { + "epoch": 4.74, + "learning_rate": 4.2272663144859154e-06, + "loss": 0.1344, + "step": 101660 + }, + { + "epoch": 4.74, + "learning_rate": 4.226482529431129e-06, + "loss": 0.0932, + "step": 101665 + }, + { + "epoch": 4.74, + "learning_rate": 4.225698744376342e-06, + "loss": 0.2049, + "step": 101670 + }, + { + "epoch": 4.74, + "learning_rate": 4.224914959321556e-06, + "loss": 0.2129, + "step": 101675 + }, + { + "epoch": 4.74, + "learning_rate": 4.224131174266769e-06, + "loss": 0.1162, + "step": 101680 + }, + { + "epoch": 4.74, + "learning_rate": 4.223347389211983e-06, + "loss": 0.0149, + "step": 101685 + }, + { + "epoch": 4.74, + "learning_rate": 4.222563604157196e-06, + "loss": 0.0502, + "step": 101690 + }, + { + "epoch": 4.75, + "learning_rate": 4.221779819102409e-06, + "loss": 0.0724, + "step": 101695 + }, + { + "epoch": 4.75, + "learning_rate": 4.220996034047623e-06, + "loss": 0.0126, + "step": 101700 + }, + { + "epoch": 4.75, + "learning_rate": 4.220212248992836e-06, + "loss": 0.0404, + "step": 101705 + }, + { + "epoch": 4.75, + "learning_rate": 4.21942846393805e-06, + "loss": 0.0577, + "step": 101710 + }, + { + "epoch": 4.75, + "learning_rate": 4.218644678883263e-06, + "loss": 0.0525, + "step": 101715 + }, + { + "epoch": 4.75, + "learning_rate": 4.217860893828477e-06, + "loss": 0.1736, + "step": 101720 + }, + { + "epoch": 4.75, + "learning_rate": 4.21707710877369e-06, + "loss": 0.1916, + "step": 101725 + }, + { + "epoch": 4.75, + "learning_rate": 4.216293323718903e-06, + "loss": 0.1016, + "step": 101730 + }, + { + "epoch": 4.75, + "learning_rate": 4.215509538664117e-06, + "loss": 0.0209, + "step": 101735 + }, + { + "epoch": 4.75, + "learning_rate": 4.21472575360933e-06, + "loss": 0.0245, + "step": 101740 + }, + { + "epoch": 4.75, + "learning_rate": 4.213941968554544e-06, + "loss": 0.0573, + "step": 101745 + }, + { + "epoch": 4.75, + "learning_rate": 4.213158183499757e-06, + "loss": 0.0473, + "step": 101750 + }, + { + "epoch": 4.75, + "learning_rate": 4.21237439844497e-06, + "loss": 0.0722, + "step": 101755 + }, + { + "epoch": 4.75, + "learning_rate": 4.211590613390184e-06, + "loss": 0.0564, + "step": 101760 + }, + { + "epoch": 4.75, + "learning_rate": 4.210806828335397e-06, + "loss": 0.1332, + "step": 101765 + }, + { + "epoch": 4.75, + "learning_rate": 4.210023043280611e-06, + "loss": 0.0761, + "step": 101770 + }, + { + "epoch": 4.75, + "learning_rate": 4.209239258225824e-06, + "loss": 0.2551, + "step": 101775 + }, + { + "epoch": 4.75, + "learning_rate": 4.208455473171038e-06, + "loss": 0.0988, + "step": 101780 + }, + { + "epoch": 4.75, + "learning_rate": 4.207671688116251e-06, + "loss": 0.0175, + "step": 101785 + }, + { + "epoch": 4.75, + "learning_rate": 4.206887903061464e-06, + "loss": 0.0135, + "step": 101790 + }, + { + "epoch": 4.75, + "learning_rate": 4.206104118006678e-06, + "loss": 0.0462, + "step": 101795 + }, + { + "epoch": 4.75, + "learning_rate": 4.205320332951891e-06, + "loss": 0.0332, + "step": 101800 + }, + { + "epoch": 4.75, + "learning_rate": 4.204536547897105e-06, + "loss": 0.068, + "step": 101805 + }, + { + "epoch": 4.75, + "learning_rate": 4.203752762842318e-06, + "loss": 0.1954, + "step": 101810 + }, + { + "epoch": 4.75, + "learning_rate": 4.202968977787532e-06, + "loss": 0.1114, + "step": 101815 + }, + { + "epoch": 4.75, + "learning_rate": 4.202185192732745e-06, + "loss": 0.1719, + "step": 101820 + }, + { + "epoch": 4.75, + "learning_rate": 4.201401407677958e-06, + "loss": 0.2613, + "step": 101825 + }, + { + "epoch": 4.75, + "learning_rate": 4.200617622623172e-06, + "loss": 0.0916, + "step": 101830 + }, + { + "epoch": 4.75, + "learning_rate": 4.199833837568385e-06, + "loss": 0.0624, + "step": 101835 + }, + { + "epoch": 4.75, + "learning_rate": 4.199050052513599e-06, + "loss": 0.0462, + "step": 101840 + }, + { + "epoch": 4.75, + "learning_rate": 4.198266267458813e-06, + "loss": 0.0621, + "step": 101845 + }, + { + "epoch": 4.75, + "learning_rate": 4.197482482404026e-06, + "loss": 0.0872, + "step": 101850 + }, + { + "epoch": 4.75, + "learning_rate": 4.196698697349239e-06, + "loss": 0.0472, + "step": 101855 + }, + { + "epoch": 4.75, + "learning_rate": 4.195914912294452e-06, + "loss": 0.073, + "step": 101860 + }, + { + "epoch": 4.75, + "learning_rate": 4.195131127239666e-06, + "loss": 0.0993, + "step": 101865 + }, + { + "epoch": 4.75, + "learning_rate": 4.19434734218488e-06, + "loss": 0.1161, + "step": 101870 + }, + { + "epoch": 4.75, + "learning_rate": 4.193563557130093e-06, + "loss": 0.3303, + "step": 101875 + }, + { + "epoch": 4.75, + "learning_rate": 4.192779772075307e-06, + "loss": 0.0925, + "step": 101880 + }, + { + "epoch": 4.75, + "learning_rate": 4.191995987020519e-06, + "loss": 0.0127, + "step": 101885 + }, + { + "epoch": 4.75, + "learning_rate": 4.191212201965733e-06, + "loss": 0.0241, + "step": 101890 + }, + { + "epoch": 4.75, + "learning_rate": 4.190428416910947e-06, + "loss": 0.009, + "step": 101895 + }, + { + "epoch": 4.75, + "learning_rate": 4.18964463185616e-06, + "loss": 0.0566, + "step": 101900 + }, + { + "epoch": 4.76, + "learning_rate": 4.188860846801374e-06, + "loss": 0.037, + "step": 101905 + }, + { + "epoch": 4.76, + "learning_rate": 4.188077061746587e-06, + "loss": 0.0406, + "step": 101910 + }, + { + "epoch": 4.76, + "learning_rate": 4.187293276691801e-06, + "loss": 0.069, + "step": 101915 + }, + { + "epoch": 4.76, + "learning_rate": 4.186509491637014e-06, + "loss": 0.1592, + "step": 101920 + }, + { + "epoch": 4.76, + "learning_rate": 4.185725706582227e-06, + "loss": 0.3098, + "step": 101925 + }, + { + "epoch": 4.76, + "learning_rate": 4.184941921527441e-06, + "loss": 0.1094, + "step": 101930 + }, + { + "epoch": 4.76, + "learning_rate": 4.184158136472654e-06, + "loss": 0.0389, + "step": 101935 + }, + { + "epoch": 4.76, + "learning_rate": 4.183374351417868e-06, + "loss": 0.0345, + "step": 101940 + }, + { + "epoch": 4.76, + "learning_rate": 4.182590566363081e-06, + "loss": 0.0811, + "step": 101945 + }, + { + "epoch": 4.76, + "learning_rate": 4.181806781308294e-06, + "loss": 0.0345, + "step": 101950 + }, + { + "epoch": 4.76, + "learning_rate": 4.181022996253508e-06, + "loss": 0.0336, + "step": 101955 + }, + { + "epoch": 4.76, + "learning_rate": 4.180239211198721e-06, + "loss": 0.09, + "step": 101960 + }, + { + "epoch": 4.76, + "learning_rate": 4.179455426143935e-06, + "loss": 0.0354, + "step": 101965 + }, + { + "epoch": 4.76, + "learning_rate": 4.178671641089148e-06, + "loss": 0.157, + "step": 101970 + }, + { + "epoch": 4.76, + "learning_rate": 4.177887856034362e-06, + "loss": 0.2399, + "step": 101975 + }, + { + "epoch": 4.76, + "learning_rate": 4.177104070979575e-06, + "loss": 0.0876, + "step": 101980 + }, + { + "epoch": 4.76, + "learning_rate": 4.176320285924788e-06, + "loss": 0.0391, + "step": 101985 + }, + { + "epoch": 4.76, + "learning_rate": 4.175536500870002e-06, + "loss": 0.0371, + "step": 101990 + }, + { + "epoch": 4.76, + "learning_rate": 4.174752715815215e-06, + "loss": 0.0353, + "step": 101995 + }, + { + "epoch": 4.76, + "learning_rate": 4.173968930760429e-06, + "loss": 0.0632, + "step": 102000 + }, + { + "epoch": 4.76, + "learning_rate": 4.173185145705642e-06, + "loss": 0.0878, + "step": 102005 + }, + { + "epoch": 4.76, + "learning_rate": 4.172401360650856e-06, + "loss": 0.0843, + "step": 102010 + }, + { + "epoch": 4.76, + "learning_rate": 4.171617575596069e-06, + "loss": 0.0658, + "step": 102015 + }, + { + "epoch": 4.76, + "learning_rate": 4.170833790541282e-06, + "loss": 0.0957, + "step": 102020 + }, + { + "epoch": 4.76, + "learning_rate": 4.170050005486496e-06, + "loss": 0.1669, + "step": 102025 + }, + { + "epoch": 4.76, + "learning_rate": 4.169266220431709e-06, + "loss": 0.0512, + "step": 102030 + }, + { + "epoch": 4.76, + "learning_rate": 4.168482435376923e-06, + "loss": 0.0405, + "step": 102035 + }, + { + "epoch": 4.76, + "learning_rate": 4.167698650322136e-06, + "loss": 0.0266, + "step": 102040 + }, + { + "epoch": 4.76, + "learning_rate": 4.16691486526735e-06, + "loss": 0.0463, + "step": 102045 + }, + { + "epoch": 4.76, + "learning_rate": 4.166131080212563e-06, + "loss": 0.0861, + "step": 102050 + }, + { + "epoch": 4.76, + "learning_rate": 4.165347295157776e-06, + "loss": 0.0911, + "step": 102055 + }, + { + "epoch": 4.76, + "learning_rate": 4.16456351010299e-06, + "loss": 0.0714, + "step": 102060 + }, + { + "epoch": 4.76, + "learning_rate": 4.163779725048203e-06, + "loss": 0.0743, + "step": 102065 + }, + { + "epoch": 4.76, + "learning_rate": 4.162995939993417e-06, + "loss": 0.2581, + "step": 102070 + }, + { + "epoch": 4.76, + "learning_rate": 4.16221215493863e-06, + "loss": 0.3153, + "step": 102075 + }, + { + "epoch": 4.76, + "learning_rate": 4.161428369883843e-06, + "loss": 0.0794, + "step": 102080 + }, + { + "epoch": 4.76, + "learning_rate": 4.160644584829057e-06, + "loss": 0.0138, + "step": 102085 + }, + { + "epoch": 4.76, + "learning_rate": 4.15986079977427e-06, + "loss": 0.0238, + "step": 102090 + }, + { + "epoch": 4.76, + "learning_rate": 4.159077014719484e-06, + "loss": 0.0386, + "step": 102095 + }, + { + "epoch": 4.76, + "learning_rate": 4.158293229664697e-06, + "loss": 0.0344, + "step": 102100 + }, + { + "epoch": 4.76, + "learning_rate": 4.157509444609911e-06, + "loss": 0.0561, + "step": 102105 + }, + { + "epoch": 4.76, + "learning_rate": 4.156725659555125e-06, + "loss": 0.052, + "step": 102110 + }, + { + "epoch": 4.76, + "learning_rate": 4.155941874500337e-06, + "loss": 0.135, + "step": 102115 + }, + { + "epoch": 4.77, + "learning_rate": 4.155158089445551e-06, + "loss": 0.0471, + "step": 102120 + }, + { + "epoch": 4.77, + "learning_rate": 4.154374304390764e-06, + "loss": 0.2026, + "step": 102125 + }, + { + "epoch": 4.77, + "learning_rate": 4.153590519335978e-06, + "loss": 0.0847, + "step": 102130 + }, + { + "epoch": 4.77, + "learning_rate": 4.152806734281192e-06, + "loss": 0.0033, + "step": 102135 + }, + { + "epoch": 4.77, + "learning_rate": 4.152022949226405e-06, + "loss": 0.0082, + "step": 102140 + }, + { + "epoch": 4.77, + "learning_rate": 4.151239164171618e-06, + "loss": 0.0512, + "step": 102145 + }, + { + "epoch": 4.77, + "learning_rate": 4.150455379116831e-06, + "loss": 0.0657, + "step": 102150 + }, + { + "epoch": 4.77, + "learning_rate": 4.149671594062045e-06, + "loss": 0.0595, + "step": 102155 + }, + { + "epoch": 4.77, + "learning_rate": 4.1488878090072586e-06, + "loss": 0.1565, + "step": 102160 + }, + { + "epoch": 4.77, + "learning_rate": 4.148104023952472e-06, + "loss": 0.0536, + "step": 102165 + }, + { + "epoch": 4.77, + "learning_rate": 4.1473202388976855e-06, + "loss": 0.1117, + "step": 102170 + }, + { + "epoch": 4.77, + "learning_rate": 4.146536453842899e-06, + "loss": 0.2592, + "step": 102175 + }, + { + "epoch": 4.77, + "learning_rate": 4.145752668788112e-06, + "loss": 0.1001, + "step": 102180 + }, + { + "epoch": 4.77, + "learning_rate": 4.1449688837333256e-06, + "loss": 0.0175, + "step": 102185 + }, + { + "epoch": 4.77, + "learning_rate": 4.144185098678539e-06, + "loss": 0.0481, + "step": 102190 + }, + { + "epoch": 4.77, + "learning_rate": 4.1434013136237525e-06, + "loss": 0.1071, + "step": 102195 + }, + { + "epoch": 4.77, + "learning_rate": 4.142617528568966e-06, + "loss": 0.0326, + "step": 102200 + }, + { + "epoch": 4.77, + "learning_rate": 4.1418337435141795e-06, + "loss": 0.037, + "step": 102205 + }, + { + "epoch": 4.77, + "learning_rate": 4.1410499584593925e-06, + "loss": 0.1006, + "step": 102210 + }, + { + "epoch": 4.77, + "learning_rate": 4.140266173404606e-06, + "loss": 0.1192, + "step": 102215 + }, + { + "epoch": 4.77, + "learning_rate": 4.1394823883498195e-06, + "loss": 0.3034, + "step": 102220 + }, + { + "epoch": 4.77, + "learning_rate": 4.1386986032950326e-06, + "loss": 0.2872, + "step": 102225 + }, + { + "epoch": 4.77, + "learning_rate": 4.1379148182402465e-06, + "loss": 0.0789, + "step": 102230 + }, + { + "epoch": 4.77, + "learning_rate": 4.1371310331854595e-06, + "loss": 0.0159, + "step": 102235 + }, + { + "epoch": 4.77, + "learning_rate": 4.1363472481306734e-06, + "loss": 0.0175, + "step": 102240 + }, + { + "epoch": 4.77, + "learning_rate": 4.1355634630758865e-06, + "loss": 0.0292, + "step": 102245 + }, + { + "epoch": 4.77, + "learning_rate": 4.1347796780210996e-06, + "loss": 0.0747, + "step": 102250 + }, + { + "epoch": 4.77, + "learning_rate": 4.1339958929663135e-06, + "loss": 0.0835, + "step": 102255 + }, + { + "epoch": 4.77, + "learning_rate": 4.1332121079115265e-06, + "loss": 0.0788, + "step": 102260 + }, + { + "epoch": 4.77, + "learning_rate": 4.1324283228567404e-06, + "loss": 0.0587, + "step": 102265 + }, + { + "epoch": 4.77, + "learning_rate": 4.1316445378019535e-06, + "loss": 0.122, + "step": 102270 + }, + { + "epoch": 4.77, + "learning_rate": 4.1308607527471665e-06, + "loss": 0.3336, + "step": 102275 + }, + { + "epoch": 4.77, + "learning_rate": 4.1300769676923805e-06, + "loss": 0.0666, + "step": 102280 + }, + { + "epoch": 4.77, + "learning_rate": 4.1292931826375935e-06, + "loss": 0.0359, + "step": 102285 + }, + { + "epoch": 4.77, + "learning_rate": 4.128509397582807e-06, + "loss": 0.0399, + "step": 102290 + }, + { + "epoch": 4.77, + "learning_rate": 4.1277256125280205e-06, + "loss": 0.0313, + "step": 102295 + }, + { + "epoch": 4.77, + "learning_rate": 4.126941827473234e-06, + "loss": 0.0549, + "step": 102300 + }, + { + "epoch": 4.77, + "learning_rate": 4.1261580424184474e-06, + "loss": 0.0659, + "step": 102305 + }, + { + "epoch": 4.77, + "learning_rate": 4.1253742573636605e-06, + "loss": 0.0922, + "step": 102310 + }, + { + "epoch": 4.77, + "learning_rate": 4.124590472308874e-06, + "loss": 0.0788, + "step": 102315 + }, + { + "epoch": 4.77, + "learning_rate": 4.1238066872540875e-06, + "loss": 0.099, + "step": 102320 + }, + { + "epoch": 4.77, + "learning_rate": 4.123022902199301e-06, + "loss": 0.26, + "step": 102325 + }, + { + "epoch": 4.77, + "learning_rate": 4.1222391171445144e-06, + "loss": 0.0631, + "step": 102330 + }, + { + "epoch": 4.78, + "learning_rate": 4.121455332089728e-06, + "loss": 0.0161, + "step": 102335 + }, + { + "epoch": 4.78, + "learning_rate": 4.120671547034941e-06, + "loss": 0.0277, + "step": 102340 + }, + { + "epoch": 4.78, + "learning_rate": 4.1198877619801545e-06, + "loss": 0.0392, + "step": 102345 + }, + { + "epoch": 4.78, + "learning_rate": 4.119103976925368e-06, + "loss": 0.0096, + "step": 102350 + }, + { + "epoch": 4.78, + "learning_rate": 4.118320191870581e-06, + "loss": 0.0451, + "step": 102355 + }, + { + "epoch": 4.78, + "learning_rate": 4.117536406815795e-06, + "loss": 0.0639, + "step": 102360 + }, + { + "epoch": 4.78, + "learning_rate": 4.116752621761008e-06, + "loss": 0.0636, + "step": 102365 + }, + { + "epoch": 4.78, + "learning_rate": 4.115968836706222e-06, + "loss": 0.2446, + "step": 102370 + }, + { + "epoch": 4.78, + "learning_rate": 4.115185051651435e-06, + "loss": 0.2221, + "step": 102375 + }, + { + "epoch": 4.78, + "learning_rate": 4.114401266596648e-06, + "loss": 0.0824, + "step": 102380 + }, + { + "epoch": 4.78, + "learning_rate": 4.113617481541862e-06, + "loss": 0.0295, + "step": 102385 + }, + { + "epoch": 4.78, + "learning_rate": 4.112833696487075e-06, + "loss": 0.0207, + "step": 102390 + }, + { + "epoch": 4.78, + "learning_rate": 4.112049911432289e-06, + "loss": 0.0631, + "step": 102395 + }, + { + "epoch": 4.78, + "learning_rate": 4.111266126377503e-06, + "loss": 0.102, + "step": 102400 + }, + { + "epoch": 4.78, + "learning_rate": 4.110482341322715e-06, + "loss": 0.0641, + "step": 102405 + }, + { + "epoch": 4.78, + "learning_rate": 4.109698556267929e-06, + "loss": 0.0622, + "step": 102410 + }, + { + "epoch": 4.78, + "learning_rate": 4.108914771213142e-06, + "loss": 0.0673, + "step": 102415 + }, + { + "epoch": 4.78, + "learning_rate": 4.108130986158356e-06, + "loss": 0.131, + "step": 102420 + }, + { + "epoch": 4.78, + "learning_rate": 4.10734720110357e-06, + "loss": 0.2828, + "step": 102425 + }, + { + "epoch": 4.78, + "learning_rate": 4.106563416048783e-06, + "loss": 0.0347, + "step": 102430 + }, + { + "epoch": 4.78, + "learning_rate": 4.105779630993997e-06, + "loss": 0.0513, + "step": 102435 + }, + { + "epoch": 4.78, + "learning_rate": 4.104995845939209e-06, + "loss": 0.0369, + "step": 102440 + }, + { + "epoch": 4.78, + "learning_rate": 4.104212060884423e-06, + "loss": 0.0293, + "step": 102445 + }, + { + "epoch": 4.78, + "learning_rate": 4.103428275829637e-06, + "loss": 0.0855, + "step": 102450 + }, + { + "epoch": 4.78, + "learning_rate": 4.10264449077485e-06, + "loss": 0.0622, + "step": 102455 + }, + { + "epoch": 4.78, + "learning_rate": 4.101860705720064e-06, + "loss": 0.0408, + "step": 102460 + }, + { + "epoch": 4.78, + "learning_rate": 4.101076920665277e-06, + "loss": 0.0885, + "step": 102465 + }, + { + "epoch": 4.78, + "learning_rate": 4.10029313561049e-06, + "loss": 0.129, + "step": 102470 + }, + { + "epoch": 4.78, + "learning_rate": 4.099509350555704e-06, + "loss": 0.1714, + "step": 102475 + }, + { + "epoch": 4.78, + "learning_rate": 4.098725565500917e-06, + "loss": 0.0854, + "step": 102480 + }, + { + "epoch": 4.78, + "learning_rate": 4.097941780446131e-06, + "loss": 0.0391, + "step": 102485 + }, + { + "epoch": 4.78, + "learning_rate": 4.097157995391344e-06, + "loss": 0.005, + "step": 102490 + }, + { + "epoch": 4.78, + "learning_rate": 4.096374210336558e-06, + "loss": 0.103, + "step": 102495 + }, + { + "epoch": 4.78, + "learning_rate": 4.095590425281771e-06, + "loss": 0.0286, + "step": 102500 + }, + { + "epoch": 4.78, + "learning_rate": 4.094806640226984e-06, + "loss": 0.0226, + "step": 102505 + }, + { + "epoch": 4.78, + "learning_rate": 4.094022855172198e-06, + "loss": 0.0568, + "step": 102510 + }, + { + "epoch": 4.78, + "learning_rate": 4.093239070117411e-06, + "loss": 0.1216, + "step": 102515 + }, + { + "epoch": 4.78, + "learning_rate": 4.092455285062625e-06, + "loss": 0.2009, + "step": 102520 + }, + { + "epoch": 4.78, + "learning_rate": 4.091671500007838e-06, + "loss": 0.2164, + "step": 102525 + }, + { + "epoch": 4.78, + "learning_rate": 4.090887714953052e-06, + "loss": 0.0642, + "step": 102530 + }, + { + "epoch": 4.78, + "learning_rate": 4.090103929898265e-06, + "loss": 0.0415, + "step": 102535 + }, + { + "epoch": 4.78, + "learning_rate": 4.089320144843478e-06, + "loss": 0.0613, + "step": 102540 + }, + { + "epoch": 4.78, + "learning_rate": 4.088536359788692e-06, + "loss": 0.0675, + "step": 102545 + }, + { + "epoch": 4.79, + "learning_rate": 4.087752574733905e-06, + "loss": 0.1069, + "step": 102550 + }, + { + "epoch": 4.79, + "learning_rate": 4.086968789679119e-06, + "loss": 0.0902, + "step": 102555 + }, + { + "epoch": 4.79, + "learning_rate": 4.086185004624332e-06, + "loss": 0.0892, + "step": 102560 + }, + { + "epoch": 4.79, + "learning_rate": 4.085401219569546e-06, + "loss": 0.1369, + "step": 102565 + }, + { + "epoch": 4.79, + "learning_rate": 4.084617434514759e-06, + "loss": 0.137, + "step": 102570 + }, + { + "epoch": 4.79, + "learning_rate": 4.083833649459972e-06, + "loss": 0.389, + "step": 102575 + }, + { + "epoch": 4.79, + "learning_rate": 4.083049864405186e-06, + "loss": 0.0436, + "step": 102580 + }, + { + "epoch": 4.79, + "learning_rate": 4.082266079350399e-06, + "loss": 0.0341, + "step": 102585 + }, + { + "epoch": 4.79, + "learning_rate": 4.081482294295613e-06, + "loss": 0.0508, + "step": 102590 + }, + { + "epoch": 4.79, + "learning_rate": 4.080698509240826e-06, + "loss": 0.0604, + "step": 102595 + }, + { + "epoch": 4.79, + "learning_rate": 4.079914724186039e-06, + "loss": 0.0976, + "step": 102600 + }, + { + "epoch": 4.79, + "learning_rate": 4.079130939131253e-06, + "loss": 0.1002, + "step": 102605 + }, + { + "epoch": 4.79, + "learning_rate": 4.078347154076466e-06, + "loss": 0.0617, + "step": 102610 + }, + { + "epoch": 4.79, + "learning_rate": 4.07756336902168e-06, + "loss": 0.0887, + "step": 102615 + }, + { + "epoch": 4.79, + "learning_rate": 4.076779583966893e-06, + "loss": 0.115, + "step": 102620 + }, + { + "epoch": 4.79, + "learning_rate": 4.075995798912107e-06, + "loss": 0.3442, + "step": 102625 + }, + { + "epoch": 4.79, + "learning_rate": 4.07521201385732e-06, + "loss": 0.0787, + "step": 102630 + }, + { + "epoch": 4.79, + "learning_rate": 4.074428228802533e-06, + "loss": 0.0072, + "step": 102635 + }, + { + "epoch": 4.79, + "learning_rate": 4.073644443747747e-06, + "loss": 0.0509, + "step": 102640 + }, + { + "epoch": 4.79, + "learning_rate": 4.07286065869296e-06, + "loss": 0.0265, + "step": 102645 + }, + { + "epoch": 4.79, + "learning_rate": 4.072076873638174e-06, + "loss": 0.065, + "step": 102650 + }, + { + "epoch": 4.79, + "learning_rate": 4.071293088583387e-06, + "loss": 0.044, + "step": 102655 + }, + { + "epoch": 4.79, + "learning_rate": 4.070509303528601e-06, + "loss": 0.1027, + "step": 102660 + }, + { + "epoch": 4.79, + "learning_rate": 4.069725518473814e-06, + "loss": 0.0917, + "step": 102665 + }, + { + "epoch": 4.79, + "learning_rate": 4.068941733419027e-06, + "loss": 0.1766, + "step": 102670 + }, + { + "epoch": 4.79, + "learning_rate": 4.068157948364241e-06, + "loss": 0.2478, + "step": 102675 + }, + { + "epoch": 4.79, + "learning_rate": 4.067374163309454e-06, + "loss": 0.1261, + "step": 102680 + }, + { + "epoch": 4.79, + "learning_rate": 4.066590378254668e-06, + "loss": 0.0209, + "step": 102685 + }, + { + "epoch": 4.79, + "learning_rate": 4.065806593199882e-06, + "loss": 0.0438, + "step": 102690 + }, + { + "epoch": 4.79, + "learning_rate": 4.065022808145095e-06, + "loss": 0.0216, + "step": 102695 + }, + { + "epoch": 4.79, + "learning_rate": 4.064239023090308e-06, + "loss": 0.0541, + "step": 102700 + }, + { + "epoch": 4.79, + "learning_rate": 4.063455238035521e-06, + "loss": 0.0367, + "step": 102705 + }, + { + "epoch": 4.79, + "learning_rate": 4.062671452980735e-06, + "loss": 0.1052, + "step": 102710 + }, + { + "epoch": 4.79, + "learning_rate": 4.061887667925949e-06, + "loss": 0.1206, + "step": 102715 + }, + { + "epoch": 4.79, + "learning_rate": 4.061103882871162e-06, + "loss": 0.1606, + "step": 102720 + }, + { + "epoch": 4.79, + "learning_rate": 4.060320097816376e-06, + "loss": 0.347, + "step": 102725 + }, + { + "epoch": 4.79, + "learning_rate": 4.059536312761588e-06, + "loss": 0.0909, + "step": 102730 + }, + { + "epoch": 4.79, + "learning_rate": 4.058752527706802e-06, + "loss": 0.046, + "step": 102735 + }, + { + "epoch": 4.79, + "learning_rate": 4.057968742652016e-06, + "loss": 0.0274, + "step": 102740 + }, + { + "epoch": 4.79, + "learning_rate": 4.057184957597229e-06, + "loss": 0.0535, + "step": 102745 + }, + { + "epoch": 4.79, + "learning_rate": 4.056401172542443e-06, + "loss": 0.0331, + "step": 102750 + }, + { + "epoch": 4.79, + "learning_rate": 4.055617387487656e-06, + "loss": 0.1107, + "step": 102755 + }, + { + "epoch": 4.79, + "learning_rate": 4.05483360243287e-06, + "loss": 0.0586, + "step": 102760 + }, + { + "epoch": 4.8, + "learning_rate": 4.054049817378083e-06, + "loss": 0.1788, + "step": 102765 + }, + { + "epoch": 4.8, + "learning_rate": 4.053266032323296e-06, + "loss": 0.1341, + "step": 102770 + }, + { + "epoch": 4.8, + "learning_rate": 4.05248224726851e-06, + "loss": 0.2509, + "step": 102775 + }, + { + "epoch": 4.8, + "learning_rate": 4.051698462213723e-06, + "loss": 0.1412, + "step": 102780 + }, + { + "epoch": 4.8, + "learning_rate": 4.050914677158937e-06, + "loss": 0.0198, + "step": 102785 + }, + { + "epoch": 4.8, + "learning_rate": 4.05013089210415e-06, + "loss": 0.0228, + "step": 102790 + }, + { + "epoch": 4.8, + "learning_rate": 4.049347107049363e-06, + "loss": 0.0901, + "step": 102795 + }, + { + "epoch": 4.8, + "learning_rate": 4.048563321994577e-06, + "loss": 0.0139, + "step": 102800 + }, + { + "epoch": 4.8, + "learning_rate": 4.04777953693979e-06, + "loss": 0.0793, + "step": 102805 + }, + { + "epoch": 4.8, + "learning_rate": 4.046995751885004e-06, + "loss": 0.0542, + "step": 102810 + }, + { + "epoch": 4.8, + "learning_rate": 4.046211966830217e-06, + "loss": 0.0913, + "step": 102815 + }, + { + "epoch": 4.8, + "learning_rate": 4.045428181775431e-06, + "loss": 0.1549, + "step": 102820 + }, + { + "epoch": 4.8, + "learning_rate": 4.044644396720644e-06, + "loss": 0.4346, + "step": 102825 + }, + { + "epoch": 4.8, + "learning_rate": 4.043860611665857e-06, + "loss": 0.0802, + "step": 102830 + }, + { + "epoch": 4.8, + "learning_rate": 4.043076826611071e-06, + "loss": 0.0731, + "step": 102835 + }, + { + "epoch": 4.8, + "learning_rate": 4.042293041556284e-06, + "loss": 0.0238, + "step": 102840 + }, + { + "epoch": 4.8, + "learning_rate": 4.041509256501498e-06, + "loss": 0.021, + "step": 102845 + }, + { + "epoch": 4.8, + "learning_rate": 4.040725471446711e-06, + "loss": 0.0786, + "step": 102850 + }, + { + "epoch": 4.8, + "learning_rate": 4.0399416863919246e-06, + "loss": 0.0648, + "step": 102855 + }, + { + "epoch": 4.8, + "learning_rate": 4.039157901337138e-06, + "loss": 0.0493, + "step": 102860 + }, + { + "epoch": 4.8, + "learning_rate": 4.038374116282351e-06, + "loss": 0.0754, + "step": 102865 + }, + { + "epoch": 4.8, + "learning_rate": 4.037590331227565e-06, + "loss": 0.123, + "step": 102870 + }, + { + "epoch": 4.8, + "learning_rate": 4.036806546172778e-06, + "loss": 0.3846, + "step": 102875 + }, + { + "epoch": 4.8, + "learning_rate": 4.0360227611179915e-06, + "loss": 0.1, + "step": 102880 + }, + { + "epoch": 4.8, + "learning_rate": 4.035238976063205e-06, + "loss": 0.0153, + "step": 102885 + }, + { + "epoch": 4.8, + "learning_rate": 4.0344551910084185e-06, + "loss": 0.0395, + "step": 102890 + }, + { + "epoch": 4.8, + "learning_rate": 4.0336714059536316e-06, + "loss": 0.0692, + "step": 102895 + }, + { + "epoch": 4.8, + "learning_rate": 4.032887620898845e-06, + "loss": 0.0354, + "step": 102900 + }, + { + "epoch": 4.8, + "learning_rate": 4.0321038358440585e-06, + "loss": 0.0385, + "step": 102905 + }, + { + "epoch": 4.8, + "learning_rate": 4.031320050789272e-06, + "loss": 0.137, + "step": 102910 + }, + { + "epoch": 4.8, + "learning_rate": 4.0305362657344855e-06, + "loss": 0.0659, + "step": 102915 + }, + { + "epoch": 4.8, + "learning_rate": 4.0297524806796986e-06, + "loss": 0.1472, + "step": 102920 + }, + { + "epoch": 4.8, + "learning_rate": 4.028968695624912e-06, + "loss": 0.2286, + "step": 102925 + }, + { + "epoch": 4.8, + "learning_rate": 4.0281849105701255e-06, + "loss": 0.0944, + "step": 102930 + }, + { + "epoch": 4.8, + "learning_rate": 4.027401125515339e-06, + "loss": 0.0156, + "step": 102935 + }, + { + "epoch": 4.8, + "learning_rate": 4.02677409747151e-06, + "loss": 0.0433, + "step": 102940 + }, + { + "epoch": 4.8, + "learning_rate": 4.025990312416723e-06, + "loss": 0.0392, + "step": 102945 + }, + { + "epoch": 4.8, + "learning_rate": 4.025206527361937e-06, + "loss": 0.104, + "step": 102950 + }, + { + "epoch": 4.8, + "learning_rate": 4.02442274230715e-06, + "loss": 0.0597, + "step": 102955 + }, + { + "epoch": 4.8, + "learning_rate": 4.023638957252363e-06, + "loss": 0.0833, + "step": 102960 + }, + { + "epoch": 4.8, + "learning_rate": 4.022855172197577e-06, + "loss": 0.0934, + "step": 102965 + }, + { + "epoch": 4.8, + "learning_rate": 4.02207138714279e-06, + "loss": 0.1016, + "step": 102970 + }, + { + "epoch": 4.8, + "learning_rate": 4.021287602088004e-06, + "loss": 0.1621, + "step": 102975 + }, + { + "epoch": 4.81, + "learning_rate": 4.020503817033217e-06, + "loss": 0.1038, + "step": 102980 + }, + { + "epoch": 4.81, + "learning_rate": 4.019720031978431e-06, + "loss": 0.0445, + "step": 102985 + }, + { + "epoch": 4.81, + "learning_rate": 4.018936246923644e-06, + "loss": 0.0221, + "step": 102990 + }, + { + "epoch": 4.81, + "learning_rate": 4.018152461868857e-06, + "loss": 0.013, + "step": 102995 + }, + { + "epoch": 4.81, + "learning_rate": 4.017368676814071e-06, + "loss": 0.0599, + "step": 103000 + }, + { + "epoch": 4.81, + "learning_rate": 4.016584891759284e-06, + "loss": 0.0751, + "step": 103005 + }, + { + "epoch": 4.81, + "learning_rate": 4.015801106704498e-06, + "loss": 0.0242, + "step": 103010 + }, + { + "epoch": 4.81, + "learning_rate": 4.015017321649711e-06, + "loss": 0.0305, + "step": 103015 + }, + { + "epoch": 4.81, + "learning_rate": 4.014233536594925e-06, + "loss": 0.0854, + "step": 103020 + }, + { + "epoch": 4.81, + "learning_rate": 4.013449751540138e-06, + "loss": 0.2494, + "step": 103025 + }, + { + "epoch": 4.81, + "learning_rate": 4.012665966485351e-06, + "loss": 0.101, + "step": 103030 + }, + { + "epoch": 4.81, + "learning_rate": 4.011882181430565e-06, + "loss": 0.015, + "step": 103035 + }, + { + "epoch": 4.81, + "learning_rate": 4.011098396375778e-06, + "loss": 0.0303, + "step": 103040 + }, + { + "epoch": 4.81, + "learning_rate": 4.010314611320992e-06, + "loss": 0.0303, + "step": 103045 + }, + { + "epoch": 4.81, + "learning_rate": 4.009530826266206e-06, + "loss": 0.0611, + "step": 103050 + }, + { + "epoch": 4.81, + "learning_rate": 4.008747041211419e-06, + "loss": 0.0483, + "step": 103055 + }, + { + "epoch": 4.81, + "learning_rate": 4.007963256156632e-06, + "loss": 0.0349, + "step": 103060 + }, + { + "epoch": 4.81, + "learning_rate": 4.007179471101846e-06, + "loss": 0.0967, + "step": 103065 + }, + { + "epoch": 4.81, + "learning_rate": 4.006395686047059e-06, + "loss": 0.1971, + "step": 103070 + }, + { + "epoch": 4.81, + "learning_rate": 4.005611900992273e-06, + "loss": 0.304, + "step": 103075 + }, + { + "epoch": 4.81, + "learning_rate": 4.004828115937486e-06, + "loss": 0.0572, + "step": 103080 + }, + { + "epoch": 4.81, + "learning_rate": 4.0040443308827e-06, + "loss": 0.0324, + "step": 103085 + }, + { + "epoch": 4.81, + "learning_rate": 4.003260545827913e-06, + "loss": 0.0645, + "step": 103090 + }, + { + "epoch": 4.81, + "learning_rate": 4.002476760773126e-06, + "loss": 0.0743, + "step": 103095 + }, + { + "epoch": 4.81, + "learning_rate": 4.00169297571834e-06, + "loss": 0.0575, + "step": 103100 + }, + { + "epoch": 4.81, + "learning_rate": 4.000909190663553e-06, + "loss": 0.058, + "step": 103105 + }, + { + "epoch": 4.81, + "learning_rate": 4.000125405608767e-06, + "loss": 0.0634, + "step": 103110 + }, + { + "epoch": 4.81, + "learning_rate": 3.99934162055398e-06, + "loss": 0.1741, + "step": 103115 + }, + { + "epoch": 4.81, + "learning_rate": 3.9985578354991935e-06, + "loss": 0.2418, + "step": 103120 + }, + { + "epoch": 4.81, + "learning_rate": 3.997774050444407e-06, + "loss": 0.2416, + "step": 103125 + }, + { + "epoch": 4.81, + "learning_rate": 3.99699026538962e-06, + "loss": 0.0668, + "step": 103130 + }, + { + "epoch": 4.81, + "learning_rate": 3.9962064803348336e-06, + "loss": 0.0078, + "step": 103135 + }, + { + "epoch": 4.81, + "learning_rate": 3.995422695280047e-06, + "loss": 0.0146, + "step": 103140 + }, + { + "epoch": 4.81, + "learning_rate": 3.9946389102252605e-06, + "loss": 0.0574, + "step": 103145 + }, + { + "epoch": 4.81, + "learning_rate": 3.993855125170474e-06, + "loss": 0.0532, + "step": 103150 + }, + { + "epoch": 4.81, + "learning_rate": 3.993071340115687e-06, + "loss": 0.1054, + "step": 103155 + }, + { + "epoch": 4.81, + "learning_rate": 3.9922875550609006e-06, + "loss": 0.0364, + "step": 103160 + }, + { + "epoch": 4.81, + "learning_rate": 3.991503770006114e-06, + "loss": 0.1254, + "step": 103165 + }, + { + "epoch": 4.81, + "learning_rate": 3.9907199849513275e-06, + "loss": 0.1064, + "step": 103170 + }, + { + "epoch": 4.81, + "learning_rate": 3.9899361998965406e-06, + "loss": 0.1906, + "step": 103175 + }, + { + "epoch": 4.81, + "learning_rate": 3.9891524148417545e-06, + "loss": 0.1021, + "step": 103180 + }, + { + "epoch": 4.81, + "learning_rate": 3.9883686297869675e-06, + "loss": 0.0421, + "step": 103185 + }, + { + "epoch": 4.81, + "learning_rate": 3.987584844732181e-06, + "loss": 0.0177, + "step": 103190 + }, + { + "epoch": 4.82, + "learning_rate": 3.9868010596773945e-06, + "loss": 0.0091, + "step": 103195 + }, + { + "epoch": 4.82, + "learning_rate": 3.9860172746226076e-06, + "loss": 0.0355, + "step": 103200 + }, + { + "epoch": 4.82, + "learning_rate": 3.9852334895678215e-06, + "loss": 0.1109, + "step": 103205 + }, + { + "epoch": 4.82, + "learning_rate": 3.9844497045130345e-06, + "loss": 0.096, + "step": 103210 + }, + { + "epoch": 4.82, + "learning_rate": 3.9836659194582484e-06, + "loss": 0.1427, + "step": 103215 + }, + { + "epoch": 4.82, + "learning_rate": 3.9828821344034615e-06, + "loss": 0.1414, + "step": 103220 + }, + { + "epoch": 4.82, + "learning_rate": 3.9820983493486746e-06, + "loss": 0.3227, + "step": 103225 + }, + { + "epoch": 4.82, + "learning_rate": 3.9813145642938885e-06, + "loss": 0.1128, + "step": 103230 + }, + { + "epoch": 4.82, + "learning_rate": 3.9805307792391015e-06, + "loss": 0.037, + "step": 103235 + }, + { + "epoch": 4.82, + "learning_rate": 3.979746994184315e-06, + "loss": 0.0061, + "step": 103240 + }, + { + "epoch": 4.82, + "learning_rate": 3.9789632091295285e-06, + "loss": 0.0445, + "step": 103245 + }, + { + "epoch": 4.82, + "learning_rate": 3.978179424074742e-06, + "loss": 0.0726, + "step": 103250 + }, + { + "epoch": 4.82, + "learning_rate": 3.9773956390199554e-06, + "loss": 0.0325, + "step": 103255 + }, + { + "epoch": 4.82, + "learning_rate": 3.9766118539651685e-06, + "loss": 0.0483, + "step": 103260 + }, + { + "epoch": 4.82, + "learning_rate": 3.975828068910382e-06, + "loss": 0.1595, + "step": 103265 + }, + { + "epoch": 4.82, + "learning_rate": 3.9750442838555955e-06, + "loss": 0.1825, + "step": 103270 + }, + { + "epoch": 4.82, + "learning_rate": 3.974260498800809e-06, + "loss": 0.2561, + "step": 103275 + }, + { + "epoch": 4.82, + "learning_rate": 3.9734767137460224e-06, + "loss": 0.0649, + "step": 103280 + }, + { + "epoch": 4.82, + "learning_rate": 3.9726929286912355e-06, + "loss": 0.0169, + "step": 103285 + }, + { + "epoch": 4.82, + "learning_rate": 3.971909143636449e-06, + "loss": 0.0352, + "step": 103290 + }, + { + "epoch": 4.82, + "learning_rate": 3.9711253585816625e-06, + "loss": 0.0377, + "step": 103295 + }, + { + "epoch": 4.82, + "learning_rate": 3.970341573526876e-06, + "loss": 0.107, + "step": 103300 + }, + { + "epoch": 4.82, + "learning_rate": 3.969557788472089e-06, + "loss": 0.0849, + "step": 103305 + }, + { + "epoch": 4.82, + "learning_rate": 3.968774003417303e-06, + "loss": 0.0782, + "step": 103310 + }, + { + "epoch": 4.82, + "learning_rate": 3.967990218362517e-06, + "loss": 0.048, + "step": 103315 + }, + { + "epoch": 4.82, + "learning_rate": 3.9672064333077294e-06, + "loss": 0.1307, + "step": 103320 + }, + { + "epoch": 4.82, + "learning_rate": 3.966422648252943e-06, + "loss": 0.311, + "step": 103325 + }, + { + "epoch": 4.82, + "learning_rate": 3.965638863198156e-06, + "loss": 0.0546, + "step": 103330 + }, + { + "epoch": 4.82, + "learning_rate": 3.96485507814337e-06, + "loss": 0.0187, + "step": 103335 + }, + { + "epoch": 4.82, + "learning_rate": 3.964071293088584e-06, + "loss": 0.0921, + "step": 103340 + }, + { + "epoch": 4.82, + "learning_rate": 3.963287508033797e-06, + "loss": 0.0385, + "step": 103345 + }, + { + "epoch": 4.82, + "learning_rate": 3.96250372297901e-06, + "loss": 0.0431, + "step": 103350 + }, + { + "epoch": 4.82, + "learning_rate": 3.961719937924224e-06, + "loss": 0.0755, + "step": 103355 + }, + { + "epoch": 4.82, + "learning_rate": 3.960936152869437e-06, + "loss": 0.1237, + "step": 103360 + }, + { + "epoch": 4.82, + "learning_rate": 3.960152367814651e-06, + "loss": 0.1876, + "step": 103365 + }, + { + "epoch": 4.82, + "learning_rate": 3.959368582759864e-06, + "loss": 0.2078, + "step": 103370 + }, + { + "epoch": 4.82, + "learning_rate": 3.958584797705078e-06, + "loss": 0.3256, + "step": 103375 + }, + { + "epoch": 4.82, + "learning_rate": 3.957801012650291e-06, + "loss": 0.1022, + "step": 103380 + }, + { + "epoch": 4.82, + "learning_rate": 3.957017227595504e-06, + "loss": 0.0328, + "step": 103385 + }, + { + "epoch": 4.82, + "learning_rate": 3.956233442540718e-06, + "loss": 0.0335, + "step": 103390 + }, + { + "epoch": 4.82, + "learning_rate": 3.955449657485931e-06, + "loss": 0.0269, + "step": 103395 + }, + { + "epoch": 4.82, + "learning_rate": 3.954665872431145e-06, + "loss": 0.0909, + "step": 103400 + }, + { + "epoch": 4.83, + "learning_rate": 3.953882087376358e-06, + "loss": 0.0272, + "step": 103405 + }, + { + "epoch": 4.83, + "learning_rate": 3.953098302321572e-06, + "loss": 0.1165, + "step": 103410 + }, + { + "epoch": 4.83, + "learning_rate": 3.952314517266785e-06, + "loss": 0.0829, + "step": 103415 + }, + { + "epoch": 4.83, + "learning_rate": 3.951530732211998e-06, + "loss": 0.1673, + "step": 103420 + }, + { + "epoch": 4.83, + "learning_rate": 3.950746947157212e-06, + "loss": 0.3563, + "step": 103425 + }, + { + "epoch": 4.83, + "learning_rate": 3.949963162102425e-06, + "loss": 0.1074, + "step": 103430 + }, + { + "epoch": 4.83, + "learning_rate": 3.949179377047639e-06, + "loss": 0.0321, + "step": 103435 + }, + { + "epoch": 4.83, + "learning_rate": 3.948395591992852e-06, + "loss": 0.0144, + "step": 103440 + }, + { + "epoch": 4.83, + "learning_rate": 3.947611806938066e-06, + "loss": 0.0367, + "step": 103445 + }, + { + "epoch": 4.83, + "learning_rate": 3.946828021883279e-06, + "loss": 0.0579, + "step": 103450 + }, + { + "epoch": 4.83, + "learning_rate": 3.946044236828492e-06, + "loss": 0.086, + "step": 103455 + }, + { + "epoch": 4.83, + "learning_rate": 3.945260451773706e-06, + "loss": 0.1298, + "step": 103460 + }, + { + "epoch": 4.83, + "learning_rate": 3.944476666718919e-06, + "loss": 0.1572, + "step": 103465 + }, + { + "epoch": 4.83, + "learning_rate": 3.943692881664133e-06, + "loss": 0.1239, + "step": 103470 + }, + { + "epoch": 4.83, + "learning_rate": 3.942909096609346e-06, + "loss": 0.3083, + "step": 103475 + }, + { + "epoch": 4.83, + "learning_rate": 3.942125311554559e-06, + "loss": 0.1227, + "step": 103480 + }, + { + "epoch": 4.83, + "learning_rate": 3.941341526499773e-06, + "loss": 0.043, + "step": 103485 + }, + { + "epoch": 4.83, + "learning_rate": 3.940557741444986e-06, + "loss": 0.0544, + "step": 103490 + }, + { + "epoch": 4.83, + "learning_rate": 3.9397739563902e-06, + "loss": 0.0572, + "step": 103495 + }, + { + "epoch": 4.83, + "learning_rate": 3.938990171335413e-06, + "loss": 0.0971, + "step": 103500 + }, + { + "epoch": 4.83, + "learning_rate": 3.938206386280627e-06, + "loss": 0.0853, + "step": 103505 + }, + { + "epoch": 4.83, + "learning_rate": 3.93742260122584e-06, + "loss": 0.107, + "step": 103510 + }, + { + "epoch": 4.83, + "learning_rate": 3.936638816171053e-06, + "loss": 0.0928, + "step": 103515 + }, + { + "epoch": 4.83, + "learning_rate": 3.935855031116267e-06, + "loss": 0.0694, + "step": 103520 + }, + { + "epoch": 4.83, + "learning_rate": 3.93507124606148e-06, + "loss": 0.2613, + "step": 103525 + }, + { + "epoch": 4.83, + "learning_rate": 3.934287461006694e-06, + "loss": 0.064, + "step": 103530 + }, + { + "epoch": 4.83, + "learning_rate": 3.933503675951907e-06, + "loss": 0.0321, + "step": 103535 + }, + { + "epoch": 4.83, + "learning_rate": 3.932719890897121e-06, + "loss": 0.0268, + "step": 103540 + }, + { + "epoch": 4.83, + "learning_rate": 3.931936105842334e-06, + "loss": 0.0935, + "step": 103545 + }, + { + "epoch": 4.83, + "learning_rate": 3.931152320787547e-06, + "loss": 0.0853, + "step": 103550 + }, + { + "epoch": 4.83, + "learning_rate": 3.930368535732761e-06, + "loss": 0.0589, + "step": 103555 + }, + { + "epoch": 4.83, + "learning_rate": 3.929584750677974e-06, + "loss": 0.0774, + "step": 103560 + }, + { + "epoch": 4.83, + "learning_rate": 3.928800965623188e-06, + "loss": 0.1165, + "step": 103565 + }, + { + "epoch": 4.83, + "learning_rate": 3.928017180568401e-06, + "loss": 0.1892, + "step": 103570 + }, + { + "epoch": 4.83, + "learning_rate": 3.927233395513615e-06, + "loss": 0.2343, + "step": 103575 + }, + { + "epoch": 4.83, + "learning_rate": 3.926449610458828e-06, + "loss": 0.114, + "step": 103580 + }, + { + "epoch": 4.83, + "learning_rate": 3.925665825404041e-06, + "loss": 0.0522, + "step": 103585 + }, + { + "epoch": 4.83, + "learning_rate": 3.924882040349255e-06, + "loss": 0.0052, + "step": 103590 + }, + { + "epoch": 4.83, + "learning_rate": 3.924098255294468e-06, + "loss": 0.0324, + "step": 103595 + }, + { + "epoch": 4.83, + "learning_rate": 3.923314470239682e-06, + "loss": 0.0281, + "step": 103600 + }, + { + "epoch": 4.83, + "learning_rate": 3.922530685184896e-06, + "loss": 0.0895, + "step": 103605 + }, + { + "epoch": 4.83, + "learning_rate": 3.921746900130108e-06, + "loss": 0.0671, + "step": 103610 + }, + { + "epoch": 4.83, + "learning_rate": 3.920963115075322e-06, + "loss": 0.1254, + "step": 103615 + }, + { + "epoch": 4.84, + "learning_rate": 3.920179330020535e-06, + "loss": 0.1376, + "step": 103620 + }, + { + "epoch": 4.84, + "learning_rate": 3.919395544965749e-06, + "loss": 0.2706, + "step": 103625 + }, + { + "epoch": 4.84, + "learning_rate": 3.918611759910963e-06, + "loss": 0.0861, + "step": 103630 + }, + { + "epoch": 4.84, + "learning_rate": 3.917827974856176e-06, + "loss": 0.023, + "step": 103635 + }, + { + "epoch": 4.84, + "learning_rate": 3.91704418980139e-06, + "loss": 0.0176, + "step": 103640 + }, + { + "epoch": 4.84, + "learning_rate": 3.916260404746603e-06, + "loss": 0.0464, + "step": 103645 + }, + { + "epoch": 4.84, + "learning_rate": 3.915476619691816e-06, + "loss": 0.0682, + "step": 103650 + }, + { + "epoch": 4.84, + "learning_rate": 3.91469283463703e-06, + "loss": 0.0477, + "step": 103655 + }, + { + "epoch": 4.84, + "learning_rate": 3.913909049582243e-06, + "loss": 0.1352, + "step": 103660 + }, + { + "epoch": 4.84, + "learning_rate": 3.913125264527457e-06, + "loss": 0.0775, + "step": 103665 + }, + { + "epoch": 4.84, + "learning_rate": 3.91234147947267e-06, + "loss": 0.1178, + "step": 103670 + }, + { + "epoch": 4.84, + "learning_rate": 3.911557694417883e-06, + "loss": 0.2288, + "step": 103675 + }, + { + "epoch": 4.84, + "learning_rate": 3.910773909363097e-06, + "loss": 0.1158, + "step": 103680 + }, + { + "epoch": 4.84, + "learning_rate": 3.90999012430831e-06, + "loss": 0.0239, + "step": 103685 + }, + { + "epoch": 4.84, + "learning_rate": 3.909206339253524e-06, + "loss": 0.0458, + "step": 103690 + }, + { + "epoch": 4.84, + "learning_rate": 3.908422554198737e-06, + "loss": 0.0558, + "step": 103695 + }, + { + "epoch": 4.84, + "learning_rate": 3.907638769143951e-06, + "loss": 0.051, + "step": 103700 + }, + { + "epoch": 4.84, + "learning_rate": 3.906854984089164e-06, + "loss": 0.0805, + "step": 103705 + }, + { + "epoch": 4.84, + "learning_rate": 3.906071199034377e-06, + "loss": 0.1299, + "step": 103710 + }, + { + "epoch": 4.84, + "learning_rate": 3.905287413979591e-06, + "loss": 0.1771, + "step": 103715 + }, + { + "epoch": 4.84, + "learning_rate": 3.904503628924804e-06, + "loss": 0.1707, + "step": 103720 + }, + { + "epoch": 4.84, + "learning_rate": 3.903719843870018e-06, + "loss": 0.3106, + "step": 103725 + }, + { + "epoch": 4.84, + "learning_rate": 3.902936058815231e-06, + "loss": 0.0489, + "step": 103730 + }, + { + "epoch": 4.84, + "learning_rate": 3.902152273760445e-06, + "loss": 0.0107, + "step": 103735 + }, + { + "epoch": 4.84, + "learning_rate": 3.901368488705658e-06, + "loss": 0.0066, + "step": 103740 + }, + { + "epoch": 4.84, + "learning_rate": 3.900584703650871e-06, + "loss": 0.0215, + "step": 103745 + }, + { + "epoch": 4.84, + "learning_rate": 3.899800918596085e-06, + "loss": 0.0477, + "step": 103750 + }, + { + "epoch": 4.84, + "learning_rate": 3.899017133541298e-06, + "loss": 0.0937, + "step": 103755 + }, + { + "epoch": 4.84, + "learning_rate": 3.898233348486512e-06, + "loss": 0.1393, + "step": 103760 + }, + { + "epoch": 4.84, + "learning_rate": 3.897449563431725e-06, + "loss": 0.1036, + "step": 103765 + }, + { + "epoch": 4.84, + "learning_rate": 3.896665778376939e-06, + "loss": 0.1716, + "step": 103770 + }, + { + "epoch": 4.84, + "learning_rate": 3.895881993322152e-06, + "loss": 0.3847, + "step": 103775 + }, + { + "epoch": 4.84, + "learning_rate": 3.895098208267365e-06, + "loss": 0.0385, + "step": 103780 + }, + { + "epoch": 4.84, + "learning_rate": 3.894314423212579e-06, + "loss": 0.0286, + "step": 103785 + }, + { + "epoch": 4.84, + "learning_rate": 3.893530638157792e-06, + "loss": 0.0342, + "step": 103790 + }, + { + "epoch": 4.84, + "learning_rate": 3.892746853103006e-06, + "loss": 0.0366, + "step": 103795 + }, + { + "epoch": 4.84, + "learning_rate": 3.891963068048219e-06, + "loss": 0.0518, + "step": 103800 + }, + { + "epoch": 4.84, + "learning_rate": 3.891179282993432e-06, + "loss": 0.0312, + "step": 103805 + }, + { + "epoch": 4.84, + "learning_rate": 3.890395497938646e-06, + "loss": 0.1374, + "step": 103810 + }, + { + "epoch": 4.84, + "learning_rate": 3.889611712883859e-06, + "loss": 0.6932, + "step": 103815 + }, + { + "epoch": 4.84, + "learning_rate": 3.888827927829073e-06, + "loss": 0.1461, + "step": 103820 + }, + { + "epoch": 4.84, + "learning_rate": 3.888044142774286e-06, + "loss": 0.1767, + "step": 103825 + }, + { + "epoch": 4.84, + "learning_rate": 3.8872603577194996e-06, + "loss": 0.0822, + "step": 103830 + }, + { + "epoch": 4.85, + "learning_rate": 3.886476572664713e-06, + "loss": 0.0385, + "step": 103835 + }, + { + "epoch": 4.85, + "learning_rate": 3.885692787609926e-06, + "loss": 0.0192, + "step": 103840 + }, + { + "epoch": 4.85, + "learning_rate": 3.8849090025551396e-06, + "loss": 0.0313, + "step": 103845 + }, + { + "epoch": 4.85, + "learning_rate": 3.884125217500353e-06, + "loss": 0.0084, + "step": 103850 + }, + { + "epoch": 4.85, + "learning_rate": 3.8833414324455665e-06, + "loss": 0.0516, + "step": 103855 + }, + { + "epoch": 4.85, + "learning_rate": 3.88255764739078e-06, + "loss": 0.1076, + "step": 103860 + }, + { + "epoch": 4.85, + "learning_rate": 3.8817738623359935e-06, + "loss": 0.1215, + "step": 103865 + }, + { + "epoch": 4.85, + "learning_rate": 3.8809900772812066e-06, + "loss": 0.1948, + "step": 103870 + }, + { + "epoch": 4.85, + "learning_rate": 3.88020629222642e-06, + "loss": 0.2432, + "step": 103875 + }, + { + "epoch": 4.85, + "learning_rate": 3.8794225071716335e-06, + "loss": 0.0687, + "step": 103880 + }, + { + "epoch": 4.85, + "learning_rate": 3.878638722116847e-06, + "loss": 0.0108, + "step": 103885 + }, + { + "epoch": 4.85, + "learning_rate": 3.8778549370620605e-06, + "loss": 0.0307, + "step": 103890 + }, + { + "epoch": 4.85, + "learning_rate": 3.877071152007274e-06, + "loss": 0.0369, + "step": 103895 + }, + { + "epoch": 4.85, + "learning_rate": 3.8762873669524875e-06, + "loss": 0.0361, + "step": 103900 + }, + { + "epoch": 4.85, + "learning_rate": 3.8755035818977005e-06, + "loss": 0.1103, + "step": 103905 + }, + { + "epoch": 4.85, + "learning_rate": 3.8747197968429136e-06, + "loss": 0.0409, + "step": 103910 + }, + { + "epoch": 4.85, + "learning_rate": 3.8739360117881275e-06, + "loss": 0.2798, + "step": 103915 + }, + { + "epoch": 4.85, + "learning_rate": 3.873152226733341e-06, + "loss": 0.1021, + "step": 103920 + }, + { + "epoch": 4.85, + "learning_rate": 3.8723684416785544e-06, + "loss": 0.2691, + "step": 103925 + }, + { + "epoch": 4.85, + "learning_rate": 3.871584656623768e-06, + "loss": 0.0768, + "step": 103930 + }, + { + "epoch": 4.85, + "learning_rate": 3.870800871568981e-06, + "loss": 0.0344, + "step": 103935 + }, + { + "epoch": 4.85, + "learning_rate": 3.8700170865141945e-06, + "loss": 0.035, + "step": 103940 + }, + { + "epoch": 4.85, + "learning_rate": 3.869233301459408e-06, + "loss": 0.013, + "step": 103945 + }, + { + "epoch": 4.85, + "learning_rate": 3.8684495164046214e-06, + "loss": 0.0583, + "step": 103950 + }, + { + "epoch": 4.85, + "learning_rate": 3.867665731349835e-06, + "loss": 0.0442, + "step": 103955 + }, + { + "epoch": 4.85, + "learning_rate": 3.866881946295048e-06, + "loss": 0.0508, + "step": 103960 + }, + { + "epoch": 4.85, + "learning_rate": 3.866098161240262e-06, + "loss": 0.2123, + "step": 103965 + }, + { + "epoch": 4.85, + "learning_rate": 3.865314376185475e-06, + "loss": 0.1634, + "step": 103970 + }, + { + "epoch": 4.85, + "learning_rate": 3.864530591130688e-06, + "loss": 0.3576, + "step": 103975 + }, + { + "epoch": 4.85, + "learning_rate": 3.863746806075902e-06, + "loss": 0.0756, + "step": 103980 + }, + { + "epoch": 4.85, + "learning_rate": 3.862963021021115e-06, + "loss": 0.0478, + "step": 103985 + }, + { + "epoch": 4.85, + "learning_rate": 3.862179235966329e-06, + "loss": 0.022, + "step": 103990 + }, + { + "epoch": 4.85, + "learning_rate": 3.861395450911542e-06, + "loss": 0.0396, + "step": 103995 + }, + { + "epoch": 4.85, + "learning_rate": 3.860611665856755e-06, + "loss": 0.0329, + "step": 104000 + }, + { + "epoch": 4.85, + "learning_rate": 3.859827880801969e-06, + "loss": 0.0604, + "step": 104005 + }, + { + "epoch": 4.85, + "learning_rate": 3.859044095747182e-06, + "loss": 0.0841, + "step": 104010 + }, + { + "epoch": 4.85, + "learning_rate": 3.858260310692396e-06, + "loss": 0.1389, + "step": 104015 + }, + { + "epoch": 4.85, + "learning_rate": 3.857476525637609e-06, + "loss": 0.1726, + "step": 104020 + }, + { + "epoch": 4.85, + "learning_rate": 3.856692740582823e-06, + "loss": 0.1992, + "step": 104025 + }, + { + "epoch": 4.85, + "learning_rate": 3.855908955528036e-06, + "loss": 0.0709, + "step": 104030 + }, + { + "epoch": 4.85, + "learning_rate": 3.855125170473249e-06, + "loss": 0.0322, + "step": 104035 + }, + { + "epoch": 4.85, + "learning_rate": 3.854341385418463e-06, + "loss": 0.0241, + "step": 104040 + }, + { + "epoch": 4.85, + "learning_rate": 3.853557600363676e-06, + "loss": 0.0664, + "step": 104045 + }, + { + "epoch": 4.86, + "learning_rate": 3.85277381530889e-06, + "loss": 0.0412, + "step": 104050 + }, + { + "epoch": 4.86, + "learning_rate": 3.851990030254103e-06, + "loss": 0.0373, + "step": 104055 + }, + { + "epoch": 4.86, + "learning_rate": 3.851206245199317e-06, + "loss": 0.101, + "step": 104060 + }, + { + "epoch": 4.86, + "learning_rate": 3.85042246014453e-06, + "loss": 0.0951, + "step": 104065 + }, + { + "epoch": 4.86, + "learning_rate": 3.849638675089743e-06, + "loss": 0.1652, + "step": 104070 + }, + { + "epoch": 4.86, + "learning_rate": 3.848854890034957e-06, + "loss": 0.2592, + "step": 104075 + }, + { + "epoch": 4.86, + "learning_rate": 3.84807110498017e-06, + "loss": 0.1164, + "step": 104080 + }, + { + "epoch": 4.86, + "learning_rate": 3.847287319925384e-06, + "loss": 0.0214, + "step": 104085 + }, + { + "epoch": 4.86, + "learning_rate": 3.846503534870597e-06, + "loss": 0.022, + "step": 104090 + }, + { + "epoch": 4.86, + "learning_rate": 3.845719749815811e-06, + "loss": 0.06, + "step": 104095 + }, + { + "epoch": 4.86, + "learning_rate": 3.844935964761024e-06, + "loss": 0.039, + "step": 104100 + }, + { + "epoch": 4.86, + "learning_rate": 3.844152179706237e-06, + "loss": 0.0806, + "step": 104105 + }, + { + "epoch": 4.86, + "learning_rate": 3.843368394651451e-06, + "loss": 0.1003, + "step": 104110 + }, + { + "epoch": 4.86, + "learning_rate": 3.842584609596664e-06, + "loss": 0.1216, + "step": 104115 + }, + { + "epoch": 4.86, + "learning_rate": 3.841800824541878e-06, + "loss": 0.1058, + "step": 104120 + }, + { + "epoch": 4.86, + "learning_rate": 3.841017039487091e-06, + "loss": 0.3209, + "step": 104125 + }, + { + "epoch": 4.86, + "learning_rate": 3.840233254432304e-06, + "loss": 0.063, + "step": 104130 + }, + { + "epoch": 4.86, + "learning_rate": 3.839449469377518e-06, + "loss": 0.0431, + "step": 104135 + }, + { + "epoch": 4.86, + "learning_rate": 3.838665684322731e-06, + "loss": 0.0974, + "step": 104140 + }, + { + "epoch": 4.86, + "learning_rate": 3.837881899267945e-06, + "loss": 0.097, + "step": 104145 + }, + { + "epoch": 4.86, + "learning_rate": 3.837098114213158e-06, + "loss": 0.0729, + "step": 104150 + }, + { + "epoch": 4.86, + "learning_rate": 3.836314329158372e-06, + "loss": 0.1119, + "step": 104155 + }, + { + "epoch": 4.86, + "learning_rate": 3.8356873011145425e-06, + "loss": 0.0985, + "step": 104160 + }, + { + "epoch": 4.86, + "learning_rate": 3.834903516059756e-06, + "loss": 0.1325, + "step": 104165 + }, + { + "epoch": 4.86, + "learning_rate": 3.8341197310049695e-06, + "loss": 0.1151, + "step": 104170 + }, + { + "epoch": 4.86, + "learning_rate": 3.8333359459501826e-06, + "loss": 0.2696, + "step": 104175 + }, + { + "epoch": 4.86, + "learning_rate": 3.8325521608953965e-06, + "loss": 0.0508, + "step": 104180 + }, + { + "epoch": 4.86, + "learning_rate": 3.8317683758406095e-06, + "loss": 0.0219, + "step": 104185 + }, + { + "epoch": 4.86, + "learning_rate": 3.8309845907858234e-06, + "loss": 0.0272, + "step": 104190 + }, + { + "epoch": 4.86, + "learning_rate": 3.8302008057310365e-06, + "loss": 0.0218, + "step": 104195 + }, + { + "epoch": 4.86, + "learning_rate": 3.8294170206762495e-06, + "loss": 0.031, + "step": 104200 + }, + { + "epoch": 4.86, + "learning_rate": 3.8286332356214634e-06, + "loss": 0.0838, + "step": 104205 + }, + { + "epoch": 4.86, + "learning_rate": 3.8278494505666765e-06, + "loss": 0.115, + "step": 104210 + }, + { + "epoch": 4.86, + "learning_rate": 3.82706566551189e-06, + "loss": 0.1272, + "step": 104215 + }, + { + "epoch": 4.86, + "learning_rate": 3.826281880457104e-06, + "loss": 0.0836, + "step": 104220 + }, + { + "epoch": 4.86, + "learning_rate": 3.825498095402317e-06, + "loss": 0.2306, + "step": 104225 + }, + { + "epoch": 4.86, + "learning_rate": 3.8247143103475304e-06, + "loss": 0.0771, + "step": 104230 + }, + { + "epoch": 4.86, + "learning_rate": 3.8239305252927435e-06, + "loss": 0.0475, + "step": 104235 + }, + { + "epoch": 4.86, + "learning_rate": 3.823146740237957e-06, + "loss": 0.0571, + "step": 104240 + }, + { + "epoch": 4.86, + "learning_rate": 3.822362955183171e-06, + "loss": 0.0217, + "step": 104245 + }, + { + "epoch": 4.86, + "learning_rate": 3.821579170128384e-06, + "loss": 0.0567, + "step": 104250 + }, + { + "epoch": 4.86, + "learning_rate": 3.820795385073598e-06, + "loss": 0.0462, + "step": 104255 + }, + { + "epoch": 4.86, + "learning_rate": 3.8200116000188105e-06, + "loss": 0.0543, + "step": 104260 + }, + { + "epoch": 4.87, + "learning_rate": 3.819227814964024e-06, + "loss": 0.0734, + "step": 104265 + }, + { + "epoch": 4.87, + "learning_rate": 3.818444029909238e-06, + "loss": 0.1393, + "step": 104270 + }, + { + "epoch": 4.87, + "learning_rate": 3.817660244854451e-06, + "loss": 0.4331, + "step": 104275 + }, + { + "epoch": 4.87, + "learning_rate": 3.816876459799665e-06, + "loss": 0.0945, + "step": 104280 + }, + { + "epoch": 4.87, + "learning_rate": 3.816092674744878e-06, + "loss": 0.0238, + "step": 104285 + }, + { + "epoch": 4.87, + "learning_rate": 3.815308889690092e-06, + "loss": 0.0089, + "step": 104290 + }, + { + "epoch": 4.87, + "learning_rate": 3.814525104635305e-06, + "loss": 0.0413, + "step": 104295 + }, + { + "epoch": 4.87, + "learning_rate": 3.8137413195805183e-06, + "loss": 0.0926, + "step": 104300 + }, + { + "epoch": 4.87, + "learning_rate": 3.812957534525732e-06, + "loss": 0.0629, + "step": 104305 + }, + { + "epoch": 4.87, + "learning_rate": 3.8121737494709453e-06, + "loss": 0.0692, + "step": 104310 + }, + { + "epoch": 4.87, + "learning_rate": 3.8113899644161588e-06, + "loss": 0.1042, + "step": 104315 + }, + { + "epoch": 4.87, + "learning_rate": 3.8106061793613723e-06, + "loss": 0.2014, + "step": 104320 + }, + { + "epoch": 4.87, + "learning_rate": 3.8098223943065853e-06, + "loss": 0.3619, + "step": 104325 + }, + { + "epoch": 4.87, + "learning_rate": 3.809038609251799e-06, + "loss": 0.0675, + "step": 104330 + }, + { + "epoch": 4.87, + "learning_rate": 3.8082548241970123e-06, + "loss": 0.0589, + "step": 104335 + }, + { + "epoch": 4.87, + "learning_rate": 3.8074710391422258e-06, + "loss": 0.0188, + "step": 104340 + }, + { + "epoch": 4.87, + "learning_rate": 3.8066872540874393e-06, + "loss": 0.0493, + "step": 104345 + }, + { + "epoch": 4.87, + "learning_rate": 3.805903469032653e-06, + "loss": 0.0553, + "step": 104350 + }, + { + "epoch": 4.87, + "learning_rate": 3.8051196839778666e-06, + "loss": 0.0499, + "step": 104355 + }, + { + "epoch": 4.87, + "learning_rate": 3.8043358989230793e-06, + "loss": 0.1176, + "step": 104360 + }, + { + "epoch": 4.87, + "learning_rate": 3.8035521138682928e-06, + "loss": 0.1711, + "step": 104365 + }, + { + "epoch": 4.87, + "learning_rate": 3.8027683288135062e-06, + "loss": 0.1206, + "step": 104370 + }, + { + "epoch": 4.87, + "learning_rate": 3.80198454375872e-06, + "loss": 0.3171, + "step": 104375 + }, + { + "epoch": 4.87, + "learning_rate": 3.8012007587039336e-06, + "loss": 0.1069, + "step": 104380 + }, + { + "epoch": 4.87, + "learning_rate": 3.800416973649147e-06, + "loss": 0.0294, + "step": 104385 + }, + { + "epoch": 4.87, + "learning_rate": 3.7996331885943598e-06, + "loss": 0.0244, + "step": 104390 + }, + { + "epoch": 4.87, + "learning_rate": 3.7988494035395732e-06, + "loss": 0.0496, + "step": 104395 + }, + { + "epoch": 4.87, + "learning_rate": 3.798065618484787e-06, + "loss": 0.0659, + "step": 104400 + }, + { + "epoch": 4.87, + "learning_rate": 3.7972818334300006e-06, + "loss": 0.0906, + "step": 104405 + }, + { + "epoch": 4.87, + "learning_rate": 3.796498048375214e-06, + "loss": 0.0464, + "step": 104410 + }, + { + "epoch": 4.87, + "learning_rate": 3.7957142633204276e-06, + "loss": 0.074, + "step": 104415 + }, + { + "epoch": 4.87, + "learning_rate": 3.794930478265641e-06, + "loss": 0.1876, + "step": 104420 + }, + { + "epoch": 4.87, + "learning_rate": 3.794146693210854e-06, + "loss": 0.1694, + "step": 104425 + }, + { + "epoch": 4.87, + "learning_rate": 3.7933629081560676e-06, + "loss": 0.0543, + "step": 104430 + }, + { + "epoch": 4.87, + "learning_rate": 3.792579123101281e-06, + "loss": 0.0542, + "step": 104435 + }, + { + "epoch": 4.87, + "learning_rate": 3.7917953380464946e-06, + "loss": 0.0331, + "step": 104440 + }, + { + "epoch": 4.87, + "learning_rate": 3.791011552991708e-06, + "loss": 0.0342, + "step": 104445 + }, + { + "epoch": 4.87, + "learning_rate": 3.7902277679369215e-06, + "loss": 0.0512, + "step": 104450 + }, + { + "epoch": 4.87, + "learning_rate": 3.7894439828821346e-06, + "loss": 0.0409, + "step": 104455 + }, + { + "epoch": 4.87, + "learning_rate": 3.788660197827348e-06, + "loss": 0.0698, + "step": 104460 + }, + { + "epoch": 4.87, + "learning_rate": 3.7878764127725616e-06, + "loss": 0.1367, + "step": 104465 + }, + { + "epoch": 4.87, + "learning_rate": 3.787092627717775e-06, + "loss": 0.0767, + "step": 104470 + }, + { + "epoch": 4.87, + "learning_rate": 3.7863088426629885e-06, + "loss": 0.2956, + "step": 104475 + }, + { + "epoch": 4.88, + "learning_rate": 3.785525057608202e-06, + "loss": 0.1179, + "step": 104480 + }, + { + "epoch": 4.88, + "learning_rate": 3.7847412725534155e-06, + "loss": 0.0188, + "step": 104485 + }, + { + "epoch": 4.88, + "learning_rate": 3.7839574874986285e-06, + "loss": 0.0236, + "step": 104490 + }, + { + "epoch": 4.88, + "learning_rate": 3.783173702443842e-06, + "loss": 0.0645, + "step": 104495 + }, + { + "epoch": 4.88, + "learning_rate": 3.7823899173890555e-06, + "loss": 0.0343, + "step": 104500 + }, + { + "epoch": 4.88, + "learning_rate": 3.781606132334269e-06, + "loss": 0.0502, + "step": 104505 + }, + { + "epoch": 4.88, + "learning_rate": 3.7808223472794825e-06, + "loss": 0.0718, + "step": 104510 + }, + { + "epoch": 4.88, + "learning_rate": 3.780038562224696e-06, + "loss": 0.1089, + "step": 104515 + }, + { + "epoch": 4.88, + "learning_rate": 3.779254777169909e-06, + "loss": 0.1572, + "step": 104520 + }, + { + "epoch": 4.88, + "learning_rate": 3.7784709921151225e-06, + "loss": 0.2072, + "step": 104525 + }, + { + "epoch": 4.88, + "learning_rate": 3.777687207060336e-06, + "loss": 0.0979, + "step": 104530 + }, + { + "epoch": 4.88, + "learning_rate": 3.7769034220055495e-06, + "loss": 0.0279, + "step": 104535 + }, + { + "epoch": 4.88, + "learning_rate": 3.776119636950763e-06, + "loss": 0.0278, + "step": 104540 + }, + { + "epoch": 4.88, + "learning_rate": 3.7753358518959764e-06, + "loss": 0.0386, + "step": 104545 + }, + { + "epoch": 4.88, + "learning_rate": 3.77455206684119e-06, + "loss": 0.091, + "step": 104550 + }, + { + "epoch": 4.88, + "learning_rate": 3.773768281786403e-06, + "loss": 0.0301, + "step": 104555 + }, + { + "epoch": 4.88, + "learning_rate": 3.7729844967316165e-06, + "loss": 0.0991, + "step": 104560 + }, + { + "epoch": 4.88, + "learning_rate": 3.77220071167683e-06, + "loss": 0.1363, + "step": 104565 + }, + { + "epoch": 4.88, + "learning_rate": 3.7714169266220434e-06, + "loss": 0.1163, + "step": 104570 + }, + { + "epoch": 4.88, + "learning_rate": 3.770633141567257e-06, + "loss": 0.1723, + "step": 104575 + }, + { + "epoch": 4.88, + "learning_rate": 3.7698493565124704e-06, + "loss": 0.0797, + "step": 104580 + }, + { + "epoch": 4.88, + "learning_rate": 3.7690655714576834e-06, + "loss": 0.039, + "step": 104585 + }, + { + "epoch": 4.88, + "learning_rate": 3.768281786402897e-06, + "loss": 0.0169, + "step": 104590 + }, + { + "epoch": 4.88, + "learning_rate": 3.7674980013481104e-06, + "loss": 0.0126, + "step": 104595 + }, + { + "epoch": 4.88, + "learning_rate": 3.766714216293324e-06, + "loss": 0.0455, + "step": 104600 + }, + { + "epoch": 4.88, + "learning_rate": 3.7659304312385374e-06, + "loss": 0.1075, + "step": 104605 + }, + { + "epoch": 4.88, + "learning_rate": 3.765146646183751e-06, + "loss": 0.0571, + "step": 104610 + }, + { + "epoch": 4.88, + "learning_rate": 3.7643628611289643e-06, + "loss": 0.0888, + "step": 104615 + }, + { + "epoch": 4.88, + "learning_rate": 3.7635790760741774e-06, + "loss": 0.2116, + "step": 104620 + }, + { + "epoch": 4.88, + "learning_rate": 3.762795291019391e-06, + "loss": 0.2732, + "step": 104625 + }, + { + "epoch": 4.88, + "learning_rate": 3.7620115059646044e-06, + "loss": 0.0612, + "step": 104630 + }, + { + "epoch": 4.88, + "learning_rate": 3.761227720909818e-06, + "loss": 0.0124, + "step": 104635 + }, + { + "epoch": 4.88, + "learning_rate": 3.7604439358550317e-06, + "loss": 0.0435, + "step": 104640 + }, + { + "epoch": 4.88, + "learning_rate": 3.7596601508002452e-06, + "loss": 0.0506, + "step": 104645 + }, + { + "epoch": 4.88, + "learning_rate": 3.758876365745458e-06, + "loss": 0.0708, + "step": 104650 + }, + { + "epoch": 4.88, + "learning_rate": 3.7580925806906713e-06, + "loss": 0.1099, + "step": 104655 + }, + { + "epoch": 4.88, + "learning_rate": 3.757308795635885e-06, + "loss": 0.0849, + "step": 104660 + }, + { + "epoch": 4.88, + "learning_rate": 3.7565250105810987e-06, + "loss": 0.081, + "step": 104665 + }, + { + "epoch": 4.88, + "learning_rate": 3.7557412255263122e-06, + "loss": 0.1532, + "step": 104670 + }, + { + "epoch": 4.88, + "learning_rate": 3.7549574404715257e-06, + "loss": 0.2863, + "step": 104675 + }, + { + "epoch": 4.88, + "learning_rate": 3.754173655416739e-06, + "loss": 0.0662, + "step": 104680 + }, + { + "epoch": 4.88, + "learning_rate": 3.753389870361952e-06, + "loss": 0.0086, + "step": 104685 + }, + { + "epoch": 4.88, + "learning_rate": 3.7526060853071657e-06, + "loss": 0.0236, + "step": 104690 + }, + { + "epoch": 4.89, + "learning_rate": 3.751822300252379e-06, + "loss": 0.0273, + "step": 104695 + }, + { + "epoch": 4.89, + "learning_rate": 3.7510385151975927e-06, + "loss": 0.0747, + "step": 104700 + }, + { + "epoch": 4.89, + "learning_rate": 3.750254730142806e-06, + "loss": 0.0386, + "step": 104705 + }, + { + "epoch": 4.89, + "learning_rate": 3.7494709450880197e-06, + "loss": 0.0544, + "step": 104710 + }, + { + "epoch": 4.89, + "learning_rate": 3.7486871600332327e-06, + "loss": 0.0868, + "step": 104715 + }, + { + "epoch": 4.89, + "learning_rate": 3.747903374978446e-06, + "loss": 0.1007, + "step": 104720 + }, + { + "epoch": 4.89, + "learning_rate": 3.7471195899236597e-06, + "loss": 0.2702, + "step": 104725 + }, + { + "epoch": 4.89, + "learning_rate": 3.746335804868873e-06, + "loss": 0.0489, + "step": 104730 + }, + { + "epoch": 4.89, + "learning_rate": 3.7455520198140866e-06, + "loss": 0.0162, + "step": 104735 + }, + { + "epoch": 4.89, + "learning_rate": 3.7447682347593e-06, + "loss": 0.0074, + "step": 104740 + }, + { + "epoch": 4.89, + "learning_rate": 3.7439844497045136e-06, + "loss": 0.0208, + "step": 104745 + }, + { + "epoch": 4.89, + "learning_rate": 3.7432006646497267e-06, + "loss": 0.0309, + "step": 104750 + }, + { + "epoch": 4.89, + "learning_rate": 3.74241687959494e-06, + "loss": 0.1404, + "step": 104755 + }, + { + "epoch": 4.89, + "learning_rate": 3.7416330945401536e-06, + "loss": 0.0762, + "step": 104760 + }, + { + "epoch": 4.89, + "learning_rate": 3.740849309485367e-06, + "loss": 0.0931, + "step": 104765 + }, + { + "epoch": 4.89, + "learning_rate": 3.7400655244305806e-06, + "loss": 0.1815, + "step": 104770 + }, + { + "epoch": 4.89, + "learning_rate": 3.739281739375794e-06, + "loss": 0.204, + "step": 104775 + }, + { + "epoch": 4.89, + "learning_rate": 3.738497954321007e-06, + "loss": 0.0989, + "step": 104780 + }, + { + "epoch": 4.89, + "learning_rate": 3.7377141692662206e-06, + "loss": 0.0097, + "step": 104785 + }, + { + "epoch": 4.89, + "learning_rate": 3.736930384211434e-06, + "loss": 0.0749, + "step": 104790 + }, + { + "epoch": 4.89, + "learning_rate": 3.7361465991566476e-06, + "loss": 0.054, + "step": 104795 + }, + { + "epoch": 4.89, + "learning_rate": 3.735362814101861e-06, + "loss": 0.1055, + "step": 104800 + }, + { + "epoch": 4.89, + "learning_rate": 3.7345790290470745e-06, + "loss": 0.1324, + "step": 104805 + }, + { + "epoch": 4.89, + "learning_rate": 3.733795243992288e-06, + "loss": 0.117, + "step": 104810 + }, + { + "epoch": 4.89, + "learning_rate": 3.733011458937501e-06, + "loss": 0.1579, + "step": 104815 + }, + { + "epoch": 4.89, + "learning_rate": 3.7322276738827146e-06, + "loss": 0.267, + "step": 104820 + }, + { + "epoch": 4.89, + "learning_rate": 3.731443888827928e-06, + "loss": 0.3815, + "step": 104825 + }, + { + "epoch": 4.89, + "learning_rate": 3.7306601037731415e-06, + "loss": 0.0846, + "step": 104830 + }, + { + "epoch": 4.89, + "learning_rate": 3.729876318718355e-06, + "loss": 0.0701, + "step": 104835 + }, + { + "epoch": 4.89, + "learning_rate": 3.7290925336635685e-06, + "loss": 0.0461, + "step": 104840 + }, + { + "epoch": 4.89, + "learning_rate": 3.7283087486087816e-06, + "loss": 0.1035, + "step": 104845 + }, + { + "epoch": 4.89, + "learning_rate": 3.727524963553995e-06, + "loss": 0.0412, + "step": 104850 + }, + { + "epoch": 4.89, + "learning_rate": 3.7267411784992085e-06, + "loss": 0.0638, + "step": 104855 + }, + { + "epoch": 4.89, + "learning_rate": 3.725957393444422e-06, + "loss": 0.0766, + "step": 104860 + }, + { + "epoch": 4.89, + "learning_rate": 3.7251736083896355e-06, + "loss": 0.126, + "step": 104865 + }, + { + "epoch": 4.89, + "learning_rate": 3.724389823334849e-06, + "loss": 0.2201, + "step": 104870 + }, + { + "epoch": 4.89, + "learning_rate": 3.7236060382800624e-06, + "loss": 0.2927, + "step": 104875 + }, + { + "epoch": 4.89, + "learning_rate": 3.7228222532252755e-06, + "loss": 0.0888, + "step": 104880 + }, + { + "epoch": 4.89, + "learning_rate": 3.722038468170489e-06, + "loss": 0.0045, + "step": 104885 + }, + { + "epoch": 4.89, + "learning_rate": 3.7212546831157025e-06, + "loss": 0.0513, + "step": 104890 + }, + { + "epoch": 4.89, + "learning_rate": 3.720470898060916e-06, + "loss": 0.0512, + "step": 104895 + }, + { + "epoch": 4.89, + "learning_rate": 3.7196871130061294e-06, + "loss": 0.0232, + "step": 104900 + }, + { + "epoch": 4.9, + "learning_rate": 3.718903327951343e-06, + "loss": 0.1024, + "step": 104905 + }, + { + "epoch": 4.9, + "learning_rate": 3.718119542896556e-06, + "loss": 0.0991, + "step": 104910 + }, + { + "epoch": 4.9, + "learning_rate": 3.7173357578417695e-06, + "loss": 0.1334, + "step": 104915 + }, + { + "epoch": 4.9, + "learning_rate": 3.716551972786983e-06, + "loss": 0.1207, + "step": 104920 + }, + { + "epoch": 4.9, + "learning_rate": 3.7157681877321964e-06, + "loss": 0.1867, + "step": 104925 + }, + { + "epoch": 4.9, + "learning_rate": 3.7149844026774103e-06, + "loss": 0.1111, + "step": 104930 + }, + { + "epoch": 4.9, + "learning_rate": 3.714200617622624e-06, + "loss": 0.0232, + "step": 104935 + }, + { + "epoch": 4.9, + "learning_rate": 3.7134168325678373e-06, + "loss": 0.0331, + "step": 104940 + }, + { + "epoch": 4.9, + "learning_rate": 3.71263304751305e-06, + "loss": 0.0229, + "step": 104945 + }, + { + "epoch": 4.9, + "learning_rate": 3.7118492624582634e-06, + "loss": 0.0565, + "step": 104950 + }, + { + "epoch": 4.9, + "learning_rate": 3.7110654774034773e-06, + "loss": 0.0266, + "step": 104955 + }, + { + "epoch": 4.9, + "learning_rate": 3.710281692348691e-06, + "loss": 0.0631, + "step": 104960 + }, + { + "epoch": 4.9, + "learning_rate": 3.7094979072939043e-06, + "loss": 0.0721, + "step": 104965 + }, + { + "epoch": 4.9, + "learning_rate": 3.7087141222391178e-06, + "loss": 0.1649, + "step": 104970 + }, + { + "epoch": 4.9, + "learning_rate": 3.7079303371843304e-06, + "loss": 0.2464, + "step": 104975 + }, + { + "epoch": 4.9, + "learning_rate": 3.7071465521295443e-06, + "loss": 0.0854, + "step": 104980 + }, + { + "epoch": 4.9, + "learning_rate": 3.7063627670747578e-06, + "loss": 0.0231, + "step": 104985 + }, + { + "epoch": 4.9, + "learning_rate": 3.7055789820199713e-06, + "loss": 0.0248, + "step": 104990 + }, + { + "epoch": 4.9, + "learning_rate": 3.7047951969651848e-06, + "loss": 0.0292, + "step": 104995 + }, + { + "epoch": 4.9, + "learning_rate": 3.7040114119103982e-06, + "loss": 0.0154, + "step": 105000 + }, + { + "epoch": 4.9, + "learning_rate": 3.7032276268556117e-06, + "loss": 0.0512, + "step": 105005 + }, + { + "epoch": 4.9, + "learning_rate": 3.7024438418008248e-06, + "loss": 0.0861, + "step": 105010 + }, + { + "epoch": 4.9, + "learning_rate": 3.7016600567460383e-06, + "loss": 0.1469, + "step": 105015 + }, + { + "epoch": 4.9, + "learning_rate": 3.7008762716912517e-06, + "loss": 0.1815, + "step": 105020 + }, + { + "epoch": 4.9, + "learning_rate": 3.7000924866364652e-06, + "loss": 0.2861, + "step": 105025 + }, + { + "epoch": 4.9, + "learning_rate": 3.6993087015816787e-06, + "loss": 0.1016, + "step": 105030 + }, + { + "epoch": 4.9, + "learning_rate": 3.698524916526892e-06, + "loss": 0.0331, + "step": 105035 + }, + { + "epoch": 4.9, + "learning_rate": 3.6977411314721052e-06, + "loss": 0.0587, + "step": 105040 + }, + { + "epoch": 4.9, + "learning_rate": 3.6969573464173187e-06, + "loss": 0.0411, + "step": 105045 + }, + { + "epoch": 4.9, + "learning_rate": 3.696173561362532e-06, + "loss": 0.0557, + "step": 105050 + }, + { + "epoch": 4.9, + "learning_rate": 3.6953897763077457e-06, + "loss": 0.0695, + "step": 105055 + }, + { + "epoch": 4.9, + "learning_rate": 3.694605991252959e-06, + "loss": 0.0538, + "step": 105060 + }, + { + "epoch": 4.9, + "learning_rate": 3.6938222061981727e-06, + "loss": 0.1193, + "step": 105065 + }, + { + "epoch": 4.9, + "learning_rate": 3.693038421143386e-06, + "loss": 0.2479, + "step": 105070 + }, + { + "epoch": 4.9, + "learning_rate": 3.692254636088599e-06, + "loss": 0.1821, + "step": 105075 + }, + { + "epoch": 4.9, + "learning_rate": 3.6914708510338127e-06, + "loss": 0.0823, + "step": 105080 + }, + { + "epoch": 4.9, + "learning_rate": 3.690687065979026e-06, + "loss": 0.0265, + "step": 105085 + }, + { + "epoch": 4.9, + "learning_rate": 3.6899032809242396e-06, + "loss": 0.038, + "step": 105090 + }, + { + "epoch": 4.9, + "learning_rate": 3.689119495869453e-06, + "loss": 0.0093, + "step": 105095 + }, + { + "epoch": 4.9, + "learning_rate": 3.6883357108146666e-06, + "loss": 0.0537, + "step": 105100 + }, + { + "epoch": 4.9, + "learning_rate": 3.6875519257598797e-06, + "loss": 0.0959, + "step": 105105 + }, + { + "epoch": 4.9, + "learning_rate": 3.686768140705093e-06, + "loss": 0.1264, + "step": 105110 + }, + { + "epoch": 4.9, + "learning_rate": 3.6859843556503066e-06, + "loss": 0.071, + "step": 105115 + }, + { + "epoch": 4.91, + "learning_rate": 3.68520057059552e-06, + "loss": 0.1434, + "step": 105120 + }, + { + "epoch": 4.91, + "learning_rate": 3.6844167855407336e-06, + "loss": 0.564, + "step": 105125 + }, + { + "epoch": 4.91, + "learning_rate": 3.683633000485947e-06, + "loss": 0.0735, + "step": 105130 + }, + { + "epoch": 4.91, + "learning_rate": 3.6828492154311606e-06, + "loss": 0.0766, + "step": 105135 + }, + { + "epoch": 4.91, + "learning_rate": 3.6820654303763736e-06, + "loss": 0.0182, + "step": 105140 + }, + { + "epoch": 4.91, + "learning_rate": 3.681281645321587e-06, + "loss": 0.0329, + "step": 105145 + }, + { + "epoch": 4.91, + "learning_rate": 3.6804978602668006e-06, + "loss": 0.0504, + "step": 105150 + }, + { + "epoch": 4.91, + "learning_rate": 3.679714075212014e-06, + "loss": 0.0693, + "step": 105155 + }, + { + "epoch": 4.91, + "learning_rate": 3.6789302901572275e-06, + "loss": 0.1481, + "step": 105160 + }, + { + "epoch": 4.91, + "learning_rate": 3.678146505102441e-06, + "loss": 0.1619, + "step": 105165 + }, + { + "epoch": 4.91, + "learning_rate": 3.677362720047654e-06, + "loss": 0.197, + "step": 105170 + }, + { + "epoch": 4.91, + "learning_rate": 3.6765789349928676e-06, + "loss": 0.2148, + "step": 105175 + }, + { + "epoch": 4.91, + "learning_rate": 3.675795149938081e-06, + "loss": 0.0953, + "step": 105180 + }, + { + "epoch": 4.91, + "learning_rate": 3.6750113648832945e-06, + "loss": 0.0041, + "step": 105185 + }, + { + "epoch": 4.91, + "learning_rate": 3.674227579828508e-06, + "loss": 0.028, + "step": 105190 + }, + { + "epoch": 4.91, + "learning_rate": 3.6734437947737215e-06, + "loss": 0.0801, + "step": 105195 + }, + { + "epoch": 4.91, + "learning_rate": 3.6726600097189354e-06, + "loss": 0.0918, + "step": 105200 + }, + { + "epoch": 4.91, + "learning_rate": 3.671876224664148e-06, + "loss": 0.1148, + "step": 105205 + }, + { + "epoch": 4.91, + "learning_rate": 3.6710924396093615e-06, + "loss": 0.0428, + "step": 105210 + }, + { + "epoch": 4.91, + "learning_rate": 3.670308654554575e-06, + "loss": 0.1085, + "step": 105215 + }, + { + "epoch": 4.91, + "learning_rate": 3.669524869499789e-06, + "loss": 0.1252, + "step": 105220 + }, + { + "epoch": 4.91, + "learning_rate": 3.6687410844450024e-06, + "loss": 0.2745, + "step": 105225 + }, + { + "epoch": 4.91, + "learning_rate": 3.667957299390216e-06, + "loss": 0.0748, + "step": 105230 + }, + { + "epoch": 4.91, + "learning_rate": 3.6671735143354285e-06, + "loss": 0.0161, + "step": 105235 + }, + { + "epoch": 4.91, + "learning_rate": 3.666389729280642e-06, + "loss": 0.0287, + "step": 105240 + }, + { + "epoch": 4.91, + "learning_rate": 3.665605944225856e-06, + "loss": 0.0626, + "step": 105245 + }, + { + "epoch": 4.91, + "learning_rate": 3.6648221591710694e-06, + "loss": 0.1104, + "step": 105250 + }, + { + "epoch": 4.91, + "learning_rate": 3.664038374116283e-06, + "loss": 0.0796, + "step": 105255 + }, + { + "epoch": 4.91, + "learning_rate": 3.6632545890614963e-06, + "loss": 0.1188, + "step": 105260 + }, + { + "epoch": 4.91, + "learning_rate": 3.66247080400671e-06, + "loss": 0.1062, + "step": 105265 + }, + { + "epoch": 4.91, + "learning_rate": 3.661687018951923e-06, + "loss": 0.1849, + "step": 105270 + }, + { + "epoch": 4.91, + "learning_rate": 3.6609032338971364e-06, + "loss": 0.3491, + "step": 105275 + }, + { + "epoch": 4.91, + "learning_rate": 3.66011944884235e-06, + "loss": 0.1149, + "step": 105280 + }, + { + "epoch": 4.91, + "learning_rate": 3.6593356637875633e-06, + "loss": 0.0833, + "step": 105285 + }, + { + "epoch": 4.91, + "learning_rate": 3.658551878732777e-06, + "loss": 0.0371, + "step": 105290 + }, + { + "epoch": 4.91, + "learning_rate": 3.6577680936779903e-06, + "loss": 0.0682, + "step": 105295 + }, + { + "epoch": 4.91, + "learning_rate": 3.6569843086232038e-06, + "loss": 0.0367, + "step": 105300 + }, + { + "epoch": 4.91, + "learning_rate": 3.656200523568417e-06, + "loss": 0.0784, + "step": 105305 + }, + { + "epoch": 4.91, + "learning_rate": 3.6554167385136303e-06, + "loss": 0.1564, + "step": 105310 + }, + { + "epoch": 4.91, + "learning_rate": 3.654632953458844e-06, + "loss": 0.1572, + "step": 105315 + }, + { + "epoch": 4.91, + "learning_rate": 3.6538491684040573e-06, + "loss": 0.1604, + "step": 105320 + }, + { + "epoch": 4.91, + "learning_rate": 3.6530653833492708e-06, + "loss": 0.2611, + "step": 105325 + }, + { + "epoch": 4.91, + "learning_rate": 3.6522815982944843e-06, + "loss": 0.1115, + "step": 105330 + }, + { + "epoch": 4.92, + "learning_rate": 3.6514978132396973e-06, + "loss": 0.0122, + "step": 105335 + }, + { + "epoch": 4.92, + "learning_rate": 3.650714028184911e-06, + "loss": 0.0497, + "step": 105340 + }, + { + "epoch": 4.92, + "learning_rate": 3.6499302431301243e-06, + "loss": 0.0518, + "step": 105345 + }, + { + "epoch": 4.92, + "learning_rate": 3.6491464580753378e-06, + "loss": 0.0271, + "step": 105350 + }, + { + "epoch": 4.92, + "learning_rate": 3.6483626730205512e-06, + "loss": 0.1048, + "step": 105355 + }, + { + "epoch": 4.92, + "learning_rate": 3.6475788879657647e-06, + "loss": 0.1923, + "step": 105360 + }, + { + "epoch": 4.92, + "learning_rate": 3.646795102910978e-06, + "loss": 0.1279, + "step": 105365 + }, + { + "epoch": 4.92, + "learning_rate": 3.6460113178561913e-06, + "loss": 0.1805, + "step": 105370 + }, + { + "epoch": 4.92, + "learning_rate": 3.6452275328014047e-06, + "loss": 0.3541, + "step": 105375 + }, + { + "epoch": 4.92, + "learning_rate": 3.6444437477466182e-06, + "loss": 0.0504, + "step": 105380 + }, + { + "epoch": 4.92, + "learning_rate": 3.6436599626918317e-06, + "loss": 0.0187, + "step": 105385 + }, + { + "epoch": 4.92, + "learning_rate": 3.642876177637045e-06, + "loss": 0.0183, + "step": 105390 + }, + { + "epoch": 4.92, + "learning_rate": 3.6420923925822587e-06, + "loss": 0.0423, + "step": 105395 + }, + { + "epoch": 4.92, + "learning_rate": 3.6413086075274717e-06, + "loss": 0.0516, + "step": 105400 + }, + { + "epoch": 4.92, + "learning_rate": 3.6405248224726852e-06, + "loss": 0.0368, + "step": 105405 + }, + { + "epoch": 4.92, + "learning_rate": 3.6397410374178987e-06, + "loss": 0.1547, + "step": 105410 + }, + { + "epoch": 4.92, + "learning_rate": 3.638957252363112e-06, + "loss": 0.085, + "step": 105415 + }, + { + "epoch": 4.92, + "learning_rate": 3.6381734673083257e-06, + "loss": 0.2139, + "step": 105420 + }, + { + "epoch": 4.92, + "learning_rate": 3.637389682253539e-06, + "loss": 0.3813, + "step": 105425 + }, + { + "epoch": 4.92, + "learning_rate": 3.6366058971987526e-06, + "loss": 0.0819, + "step": 105430 + }, + { + "epoch": 4.92, + "learning_rate": 3.6358221121439657e-06, + "loss": 0.0529, + "step": 105435 + }, + { + "epoch": 4.92, + "learning_rate": 3.635038327089179e-06, + "loss": 0.0524, + "step": 105440 + }, + { + "epoch": 4.92, + "learning_rate": 3.6342545420343926e-06, + "loss": 0.041, + "step": 105445 + }, + { + "epoch": 4.92, + "learning_rate": 3.633470756979606e-06, + "loss": 0.0679, + "step": 105450 + }, + { + "epoch": 4.92, + "learning_rate": 3.6326869719248196e-06, + "loss": 0.0766, + "step": 105455 + }, + { + "epoch": 4.92, + "learning_rate": 3.631903186870033e-06, + "loss": 0.1548, + "step": 105460 + }, + { + "epoch": 4.92, + "learning_rate": 3.631119401815246e-06, + "loss": 0.1758, + "step": 105465 + }, + { + "epoch": 4.92, + "learning_rate": 3.6303356167604596e-06, + "loss": 0.1583, + "step": 105470 + }, + { + "epoch": 4.92, + "learning_rate": 3.629551831705673e-06, + "loss": 0.2218, + "step": 105475 + }, + { + "epoch": 4.92, + "learning_rate": 3.6287680466508866e-06, + "loss": 0.056, + "step": 105480 + }, + { + "epoch": 4.92, + "learning_rate": 3.6279842615961e-06, + "loss": 0.03, + "step": 105485 + }, + { + "epoch": 4.92, + "learning_rate": 3.627200476541314e-06, + "loss": 0.0319, + "step": 105490 + }, + { + "epoch": 4.92, + "learning_rate": 3.6264166914865275e-06, + "loss": 0.0156, + "step": 105495 + }, + { + "epoch": 4.92, + "learning_rate": 3.62563290643174e-06, + "loss": 0.0721, + "step": 105500 + }, + { + "epoch": 4.92, + "learning_rate": 3.6248491213769536e-06, + "loss": 0.0434, + "step": 105505 + }, + { + "epoch": 4.92, + "learning_rate": 3.6240653363221675e-06, + "loss": 0.0501, + "step": 105510 + }, + { + "epoch": 4.92, + "learning_rate": 3.623281551267381e-06, + "loss": 0.1003, + "step": 105515 + }, + { + "epoch": 4.92, + "learning_rate": 3.6224977662125945e-06, + "loss": 0.1763, + "step": 105520 + }, + { + "epoch": 4.92, + "learning_rate": 3.621713981157808e-06, + "loss": 0.1113, + "step": 105525 + }, + { + "epoch": 4.92, + "learning_rate": 3.6209301961030206e-06, + "loss": 0.1485, + "step": 105530 + }, + { + "epoch": 4.92, + "learning_rate": 3.6201464110482345e-06, + "loss": 0.0027, + "step": 105535 + }, + { + "epoch": 4.92, + "learning_rate": 3.619362625993448e-06, + "loss": 0.0314, + "step": 105540 + }, + { + "epoch": 4.92, + "learning_rate": 3.6185788409386614e-06, + "loss": 0.0288, + "step": 105545 + }, + { + "epoch": 4.93, + "learning_rate": 3.617795055883875e-06, + "loss": 0.0805, + "step": 105550 + }, + { + "epoch": 4.93, + "learning_rate": 3.6170112708290884e-06, + "loss": 0.0234, + "step": 105555 + }, + { + "epoch": 4.93, + "learning_rate": 3.616227485774302e-06, + "loss": 0.0523, + "step": 105560 + }, + { + "epoch": 4.93, + "learning_rate": 3.615443700719515e-06, + "loss": 0.1578, + "step": 105565 + }, + { + "epoch": 4.93, + "learning_rate": 3.6146599156647284e-06, + "loss": 0.1157, + "step": 105570 + }, + { + "epoch": 4.93, + "learning_rate": 3.613876130609942e-06, + "loss": 0.1026, + "step": 105575 + }, + { + "epoch": 4.93, + "learning_rate": 3.6130923455551554e-06, + "loss": 0.0726, + "step": 105580 + }, + { + "epoch": 4.93, + "learning_rate": 3.612308560500369e-06, + "loss": 0.0271, + "step": 105585 + }, + { + "epoch": 4.93, + "learning_rate": 3.6115247754455824e-06, + "loss": 0.03, + "step": 105590 + }, + { + "epoch": 4.93, + "learning_rate": 3.6107409903907954e-06, + "loss": 0.0297, + "step": 105595 + }, + { + "epoch": 4.93, + "learning_rate": 3.609957205336009e-06, + "loss": 0.0933, + "step": 105600 + }, + { + "epoch": 4.93, + "learning_rate": 3.6091734202812224e-06, + "loss": 0.0728, + "step": 105605 + }, + { + "epoch": 4.93, + "learning_rate": 3.608389635226436e-06, + "loss": 0.0844, + "step": 105610 + }, + { + "epoch": 4.93, + "learning_rate": 3.6076058501716494e-06, + "loss": 0.0902, + "step": 105615 + }, + { + "epoch": 4.93, + "learning_rate": 3.606822065116863e-06, + "loss": 0.1571, + "step": 105620 + }, + { + "epoch": 4.93, + "learning_rate": 3.6060382800620763e-06, + "loss": 0.3609, + "step": 105625 + }, + { + "epoch": 4.93, + "learning_rate": 3.6052544950072894e-06, + "loss": 0.1032, + "step": 105630 + }, + { + "epoch": 4.93, + "learning_rate": 3.604470709952503e-06, + "loss": 0.0413, + "step": 105635 + }, + { + "epoch": 4.93, + "learning_rate": 3.6036869248977163e-06, + "loss": 0.0128, + "step": 105640 + }, + { + "epoch": 4.93, + "learning_rate": 3.60290313984293e-06, + "loss": 0.0673, + "step": 105645 + }, + { + "epoch": 4.93, + "learning_rate": 3.6021193547881433e-06, + "loss": 0.0585, + "step": 105650 + }, + { + "epoch": 4.93, + "learning_rate": 3.6013355697333568e-06, + "loss": 0.0956, + "step": 105655 + }, + { + "epoch": 4.93, + "learning_rate": 3.60055178467857e-06, + "loss": 0.1386, + "step": 105660 + }, + { + "epoch": 4.93, + "learning_rate": 3.5997679996237833e-06, + "loss": 0.0706, + "step": 105665 + }, + { + "epoch": 4.93, + "learning_rate": 3.598984214568997e-06, + "loss": 0.0991, + "step": 105670 + }, + { + "epoch": 4.93, + "learning_rate": 3.5982004295142103e-06, + "loss": 0.2258, + "step": 105675 + }, + { + "epoch": 4.93, + "learning_rate": 3.5974166444594238e-06, + "loss": 0.0855, + "step": 105680 + }, + { + "epoch": 4.93, + "learning_rate": 3.5966328594046373e-06, + "loss": 0.0263, + "step": 105685 + }, + { + "epoch": 4.93, + "learning_rate": 3.5958490743498507e-06, + "loss": 0.0155, + "step": 105690 + }, + { + "epoch": 4.93, + "learning_rate": 3.595065289295064e-06, + "loss": 0.0519, + "step": 105695 + }, + { + "epoch": 4.93, + "learning_rate": 3.5942815042402773e-06, + "loss": 0.0827, + "step": 105700 + }, + { + "epoch": 4.93, + "learning_rate": 3.5934977191854908e-06, + "loss": 0.0672, + "step": 105705 + }, + { + "epoch": 4.93, + "learning_rate": 3.5927139341307042e-06, + "loss": 0.105, + "step": 105710 + }, + { + "epoch": 4.93, + "learning_rate": 3.5919301490759177e-06, + "loss": 0.0844, + "step": 105715 + }, + { + "epoch": 4.93, + "learning_rate": 3.591146364021131e-06, + "loss": 0.1419, + "step": 105720 + }, + { + "epoch": 4.93, + "learning_rate": 3.5903625789663443e-06, + "loss": 0.3696, + "step": 105725 + }, + { + "epoch": 4.93, + "learning_rate": 3.5895787939115577e-06, + "loss": 0.077, + "step": 105730 + }, + { + "epoch": 4.93, + "learning_rate": 3.5887950088567712e-06, + "loss": 0.0292, + "step": 105735 + }, + { + "epoch": 4.93, + "learning_rate": 3.5880112238019847e-06, + "loss": 0.0378, + "step": 105740 + }, + { + "epoch": 4.93, + "learning_rate": 3.587227438747198e-06, + "loss": 0.0408, + "step": 105745 + }, + { + "epoch": 4.93, + "learning_rate": 3.5864436536924117e-06, + "loss": 0.0388, + "step": 105750 + }, + { + "epoch": 4.93, + "learning_rate": 3.5856598686376256e-06, + "loss": 0.0425, + "step": 105755 + }, + { + "epoch": 4.93, + "learning_rate": 3.5848760835828382e-06, + "loss": 0.0547, + "step": 105760 + }, + { + "epoch": 4.94, + "learning_rate": 3.5840922985280517e-06, + "loss": 0.1345, + "step": 105765 + }, + { + "epoch": 4.94, + "learning_rate": 3.583308513473265e-06, + "loss": 0.1975, + "step": 105770 + }, + { + "epoch": 4.94, + "learning_rate": 3.5825247284184787e-06, + "loss": 0.2356, + "step": 105775 + }, + { + "epoch": 4.94, + "learning_rate": 3.5817409433636926e-06, + "loss": 0.0878, + "step": 105780 + }, + { + "epoch": 4.94, + "learning_rate": 3.580957158308906e-06, + "loss": 0.0163, + "step": 105785 + }, + { + "epoch": 4.94, + "learning_rate": 3.5801733732541187e-06, + "loss": 0.0549, + "step": 105790 + }, + { + "epoch": 4.94, + "learning_rate": 3.579389588199332e-06, + "loss": 0.0457, + "step": 105795 + }, + { + "epoch": 4.94, + "learning_rate": 3.578605803144546e-06, + "loss": 0.022, + "step": 105800 + }, + { + "epoch": 4.94, + "learning_rate": 3.5778220180897596e-06, + "loss": 0.0487, + "step": 105805 + }, + { + "epoch": 4.94, + "learning_rate": 3.577038233034973e-06, + "loss": 0.0787, + "step": 105810 + }, + { + "epoch": 4.94, + "learning_rate": 3.5762544479801865e-06, + "loss": 0.104, + "step": 105815 + }, + { + "epoch": 4.94, + "learning_rate": 3.5754706629254e-06, + "loss": 0.0664, + "step": 105820 + }, + { + "epoch": 4.94, + "learning_rate": 3.574686877870613e-06, + "loss": 0.2142, + "step": 105825 + }, + { + "epoch": 4.94, + "learning_rate": 3.5739030928158265e-06, + "loss": 0.0669, + "step": 105830 + }, + { + "epoch": 4.94, + "learning_rate": 3.57311930776104e-06, + "loss": 0.0148, + "step": 105835 + }, + { + "epoch": 4.94, + "learning_rate": 3.5723355227062535e-06, + "loss": 0.0472, + "step": 105840 + }, + { + "epoch": 4.94, + "learning_rate": 3.571551737651467e-06, + "loss": 0.0227, + "step": 105845 + }, + { + "epoch": 4.94, + "learning_rate": 3.5707679525966805e-06, + "loss": 0.0485, + "step": 105850 + }, + { + "epoch": 4.94, + "learning_rate": 3.5699841675418935e-06, + "loss": 0.052, + "step": 105855 + }, + { + "epoch": 4.94, + "learning_rate": 3.569200382487107e-06, + "loss": 0.0552, + "step": 105860 + }, + { + "epoch": 4.94, + "learning_rate": 3.5684165974323205e-06, + "loss": 0.1724, + "step": 105865 + }, + { + "epoch": 4.94, + "learning_rate": 3.567632812377534e-06, + "loss": 0.2415, + "step": 105870 + }, + { + "epoch": 4.94, + "learning_rate": 3.5668490273227475e-06, + "loss": 0.252, + "step": 105875 + }, + { + "epoch": 4.94, + "learning_rate": 3.566065242267961e-06, + "loss": 0.0695, + "step": 105880 + }, + { + "epoch": 4.94, + "learning_rate": 3.5652814572131744e-06, + "loss": 0.0181, + "step": 105885 + }, + { + "epoch": 4.94, + "learning_rate": 3.5644976721583875e-06, + "loss": 0.0542, + "step": 105890 + }, + { + "epoch": 4.94, + "learning_rate": 3.563713887103601e-06, + "loss": 0.0415, + "step": 105895 + }, + { + "epoch": 4.94, + "learning_rate": 3.5629301020488145e-06, + "loss": 0.0706, + "step": 105900 + }, + { + "epoch": 4.94, + "learning_rate": 3.562146316994028e-06, + "loss": 0.1483, + "step": 105905 + }, + { + "epoch": 4.94, + "learning_rate": 3.5613625319392414e-06, + "loss": 0.0904, + "step": 105910 + }, + { + "epoch": 4.94, + "learning_rate": 3.560578746884455e-06, + "loss": 0.1684, + "step": 105915 + }, + { + "epoch": 4.94, + "learning_rate": 3.559794961829668e-06, + "loss": 0.2194, + "step": 105920 + }, + { + "epoch": 4.94, + "learning_rate": 3.5590111767748814e-06, + "loss": 0.2801, + "step": 105925 + }, + { + "epoch": 4.94, + "learning_rate": 3.558227391720095e-06, + "loss": 0.1085, + "step": 105930 + }, + { + "epoch": 4.94, + "learning_rate": 3.5574436066653084e-06, + "loss": 0.026, + "step": 105935 + }, + { + "epoch": 4.94, + "learning_rate": 3.556659821610522e-06, + "loss": 0.0574, + "step": 105940 + }, + { + "epoch": 4.94, + "learning_rate": 3.5558760365557354e-06, + "loss": 0.0476, + "step": 105945 + }, + { + "epoch": 4.94, + "learning_rate": 3.555092251500949e-06, + "loss": 0.0264, + "step": 105950 + }, + { + "epoch": 4.94, + "learning_rate": 3.554308466446162e-06, + "loss": 0.1067, + "step": 105955 + }, + { + "epoch": 4.94, + "learning_rate": 3.5535246813913754e-06, + "loss": 0.2285, + "step": 105960 + }, + { + "epoch": 4.94, + "learning_rate": 3.552740896336589e-06, + "loss": 0.0687, + "step": 105965 + }, + { + "epoch": 4.94, + "learning_rate": 3.5519571112818024e-06, + "loss": 0.169, + "step": 105970 + }, + { + "epoch": 4.94, + "learning_rate": 3.551173326227016e-06, + "loss": 0.2477, + "step": 105975 + }, + { + "epoch": 4.95, + "learning_rate": 3.5503895411722293e-06, + "loss": 0.1724, + "step": 105980 + }, + { + "epoch": 4.95, + "learning_rate": 3.5496057561174424e-06, + "loss": 0.0145, + "step": 105985 + }, + { + "epoch": 4.95, + "learning_rate": 3.548821971062656e-06, + "loss": 0.0335, + "step": 105990 + }, + { + "epoch": 4.95, + "learning_rate": 3.5480381860078693e-06, + "loss": 0.0486, + "step": 105995 + }, + { + "epoch": 4.95, + "learning_rate": 3.547254400953083e-06, + "loss": 0.0692, + "step": 106000 + }, + { + "epoch": 4.95, + "learning_rate": 3.5464706158982963e-06, + "loss": 0.0605, + "step": 106005 + }, + { + "epoch": 4.95, + "learning_rate": 3.54568683084351e-06, + "loss": 0.0902, + "step": 106010 + }, + { + "epoch": 4.95, + "learning_rate": 3.5449030457887233e-06, + "loss": 0.0785, + "step": 106015 + }, + { + "epoch": 4.95, + "learning_rate": 3.5441192607339363e-06, + "loss": 0.2275, + "step": 106020 + }, + { + "epoch": 4.95, + "learning_rate": 3.54333547567915e-06, + "loss": 0.2043, + "step": 106025 + }, + { + "epoch": 4.95, + "learning_rate": 3.5425516906243633e-06, + "loss": 0.1101, + "step": 106030 + }, + { + "epoch": 4.95, + "learning_rate": 3.5417679055695768e-06, + "loss": 0.025, + "step": 106035 + }, + { + "epoch": 4.95, + "learning_rate": 3.5409841205147903e-06, + "loss": 0.0647, + "step": 106040 + }, + { + "epoch": 4.95, + "learning_rate": 3.540200335460004e-06, + "loss": 0.0437, + "step": 106045 + }, + { + "epoch": 4.95, + "learning_rate": 3.539416550405217e-06, + "loss": 0.0448, + "step": 106050 + }, + { + "epoch": 4.95, + "learning_rate": 3.5386327653504303e-06, + "loss": 0.0725, + "step": 106055 + }, + { + "epoch": 4.95, + "learning_rate": 3.5378489802956438e-06, + "loss": 0.227, + "step": 106060 + }, + { + "epoch": 4.95, + "learning_rate": 3.5370651952408572e-06, + "loss": 0.1955, + "step": 106065 + }, + { + "epoch": 4.95, + "learning_rate": 3.536281410186071e-06, + "loss": 0.081, + "step": 106070 + }, + { + "epoch": 4.95, + "learning_rate": 3.5354976251312846e-06, + "loss": 0.254, + "step": 106075 + }, + { + "epoch": 4.95, + "learning_rate": 3.534713840076498e-06, + "loss": 0.0661, + "step": 106080 + }, + { + "epoch": 4.95, + "learning_rate": 3.5339300550217108e-06, + "loss": 0.0277, + "step": 106085 + }, + { + "epoch": 4.95, + "learning_rate": 3.5331462699669242e-06, + "loss": 0.0263, + "step": 106090 + }, + { + "epoch": 4.95, + "learning_rate": 3.532362484912138e-06, + "loss": 0.032, + "step": 106095 + }, + { + "epoch": 4.95, + "learning_rate": 3.5315786998573516e-06, + "loss": 0.0333, + "step": 106100 + }, + { + "epoch": 4.95, + "learning_rate": 3.530794914802565e-06, + "loss": 0.0421, + "step": 106105 + }, + { + "epoch": 4.95, + "learning_rate": 3.5300111297477786e-06, + "loss": 0.0675, + "step": 106110 + }, + { + "epoch": 4.95, + "learning_rate": 3.5292273446929916e-06, + "loss": 0.4258, + "step": 106115 + }, + { + "epoch": 4.95, + "learning_rate": 3.528443559638205e-06, + "loss": 0.161, + "step": 106120 + }, + { + "epoch": 4.95, + "learning_rate": 3.5276597745834186e-06, + "loss": 0.278, + "step": 106125 + }, + { + "epoch": 4.95, + "learning_rate": 3.526875989528632e-06, + "loss": 0.0798, + "step": 106130 + }, + { + "epoch": 4.95, + "learning_rate": 3.5260922044738456e-06, + "loss": 0.0228, + "step": 106135 + }, + { + "epoch": 4.95, + "learning_rate": 3.525308419419059e-06, + "loss": 0.0544, + "step": 106140 + }, + { + "epoch": 4.95, + "learning_rate": 3.5245246343642725e-06, + "loss": 0.0382, + "step": 106145 + }, + { + "epoch": 4.95, + "learning_rate": 3.5237408493094856e-06, + "loss": 0.0646, + "step": 106150 + }, + { + "epoch": 4.95, + "learning_rate": 3.522957064254699e-06, + "loss": 0.0415, + "step": 106155 + }, + { + "epoch": 4.95, + "learning_rate": 3.5221732791999126e-06, + "loss": 0.0916, + "step": 106160 + }, + { + "epoch": 4.95, + "learning_rate": 3.521389494145126e-06, + "loss": 0.107, + "step": 106165 + }, + { + "epoch": 4.95, + "learning_rate": 3.5206057090903395e-06, + "loss": 0.0806, + "step": 106170 + }, + { + "epoch": 4.95, + "learning_rate": 3.519821924035553e-06, + "loss": 0.1467, + "step": 106175 + }, + { + "epoch": 4.95, + "learning_rate": 3.519038138980766e-06, + "loss": 0.0809, + "step": 106180 + }, + { + "epoch": 4.95, + "learning_rate": 3.5182543539259796e-06, + "loss": 0.0925, + "step": 106185 + }, + { + "epoch": 4.95, + "learning_rate": 3.517470568871193e-06, + "loss": 0.0654, + "step": 106190 + }, + { + "epoch": 4.96, + "learning_rate": 3.5166867838164065e-06, + "loss": 0.043, + "step": 106195 + }, + { + "epoch": 4.96, + "learning_rate": 3.51590299876162e-06, + "loss": 0.1209, + "step": 106200 + }, + { + "epoch": 4.96, + "learning_rate": 3.5151192137068335e-06, + "loss": 0.0499, + "step": 106205 + }, + { + "epoch": 4.96, + "learning_rate": 3.514335428652047e-06, + "loss": 0.0794, + "step": 106210 + }, + { + "epoch": 4.96, + "learning_rate": 3.51355164359726e-06, + "loss": 0.091, + "step": 106215 + }, + { + "epoch": 4.96, + "learning_rate": 3.5127678585424735e-06, + "loss": 0.0741, + "step": 106220 + }, + { + "epoch": 4.96, + "learning_rate": 3.511984073487687e-06, + "loss": 0.2565, + "step": 106225 + }, + { + "epoch": 4.96, + "learning_rate": 3.5112002884329005e-06, + "loss": 0.0434, + "step": 106230 + }, + { + "epoch": 4.96, + "learning_rate": 3.510416503378114e-06, + "loss": 0.0221, + "step": 106235 + }, + { + "epoch": 4.96, + "learning_rate": 3.5096327183233274e-06, + "loss": 0.0363, + "step": 106240 + }, + { + "epoch": 4.96, + "learning_rate": 3.5088489332685405e-06, + "loss": 0.0437, + "step": 106245 + }, + { + "epoch": 4.96, + "learning_rate": 3.508065148213754e-06, + "loss": 0.0652, + "step": 106250 + }, + { + "epoch": 4.96, + "learning_rate": 3.5072813631589675e-06, + "loss": 0.0989, + "step": 106255 + }, + { + "epoch": 4.96, + "learning_rate": 3.506497578104181e-06, + "loss": 0.1147, + "step": 106260 + }, + { + "epoch": 4.96, + "learning_rate": 3.5057137930493944e-06, + "loss": 0.1318, + "step": 106265 + }, + { + "epoch": 4.96, + "learning_rate": 3.504930007994608e-06, + "loss": 0.157, + "step": 106270 + }, + { + "epoch": 4.96, + "learning_rate": 3.5041462229398214e-06, + "loss": 0.4112, + "step": 106275 + }, + { + "epoch": 4.96, + "learning_rate": 3.5033624378850344e-06, + "loss": 0.0757, + "step": 106280 + }, + { + "epoch": 4.96, + "learning_rate": 3.502578652830248e-06, + "loss": 0.0156, + "step": 106285 + }, + { + "epoch": 4.96, + "learning_rate": 3.5017948677754614e-06, + "loss": 0.0737, + "step": 106290 + }, + { + "epoch": 4.96, + "learning_rate": 3.501011082720675e-06, + "loss": 0.0418, + "step": 106295 + }, + { + "epoch": 4.96, + "learning_rate": 3.5002272976658884e-06, + "loss": 0.0456, + "step": 106300 + }, + { + "epoch": 4.96, + "learning_rate": 3.499443512611102e-06, + "loss": 0.0979, + "step": 106305 + }, + { + "epoch": 4.96, + "learning_rate": 3.498659727556315e-06, + "loss": 0.0544, + "step": 106310 + }, + { + "epoch": 4.96, + "learning_rate": 3.4978759425015284e-06, + "loss": 0.1414, + "step": 106315 + }, + { + "epoch": 4.96, + "learning_rate": 3.497092157446742e-06, + "loss": 0.0642, + "step": 106320 + }, + { + "epoch": 4.96, + "learning_rate": 3.4963083723919554e-06, + "loss": 0.2217, + "step": 106325 + }, + { + "epoch": 4.96, + "learning_rate": 3.495524587337169e-06, + "loss": 0.075, + "step": 106330 + }, + { + "epoch": 4.96, + "learning_rate": 3.4947408022823827e-06, + "loss": 0.0124, + "step": 106335 + }, + { + "epoch": 4.96, + "learning_rate": 3.4939570172275962e-06, + "loss": 0.0152, + "step": 106340 + }, + { + "epoch": 4.96, + "learning_rate": 3.493173232172809e-06, + "loss": 0.0246, + "step": 106345 + }, + { + "epoch": 4.96, + "learning_rate": 3.4923894471180223e-06, + "loss": 0.0479, + "step": 106350 + }, + { + "epoch": 4.96, + "learning_rate": 3.491605662063236e-06, + "loss": 0.0742, + "step": 106355 + }, + { + "epoch": 4.96, + "learning_rate": 3.4908218770084497e-06, + "loss": 0.0471, + "step": 106360 + }, + { + "epoch": 4.96, + "learning_rate": 3.4900380919536632e-06, + "loss": 0.1336, + "step": 106365 + }, + { + "epoch": 4.96, + "learning_rate": 3.4892543068988767e-06, + "loss": 0.1388, + "step": 106370 + }, + { + "epoch": 4.96, + "learning_rate": 3.4884705218440893e-06, + "loss": 0.3218, + "step": 106375 + }, + { + "epoch": 4.96, + "learning_rate": 3.487686736789303e-06, + "loss": 0.0743, + "step": 106380 + }, + { + "epoch": 4.96, + "learning_rate": 3.4869029517345167e-06, + "loss": 0.0092, + "step": 106385 + }, + { + "epoch": 4.96, + "learning_rate": 3.48611916667973e-06, + "loss": 0.0394, + "step": 106390 + }, + { + "epoch": 4.96, + "learning_rate": 3.4853353816249437e-06, + "loss": 0.0466, + "step": 106395 + }, + { + "epoch": 4.96, + "learning_rate": 3.484551596570157e-06, + "loss": 0.0722, + "step": 106400 + }, + { + "epoch": 4.97, + "learning_rate": 3.4837678115153707e-06, + "loss": 0.0559, + "step": 106405 + }, + { + "epoch": 4.97, + "learning_rate": 3.4829840264605837e-06, + "loss": 0.1109, + "step": 106410 + }, + { + "epoch": 4.97, + "learning_rate": 3.482200241405797e-06, + "loss": 0.1401, + "step": 106415 + }, + { + "epoch": 4.97, + "learning_rate": 3.4814164563510107e-06, + "loss": 0.128, + "step": 106420 + }, + { + "epoch": 4.97, + "learning_rate": 3.480632671296224e-06, + "loss": 0.3628, + "step": 106425 + }, + { + "epoch": 4.97, + "learning_rate": 3.4798488862414376e-06, + "loss": 0.1111, + "step": 106430 + }, + { + "epoch": 4.97, + "learning_rate": 3.479065101186651e-06, + "loss": 0.0278, + "step": 106435 + }, + { + "epoch": 4.97, + "learning_rate": 3.478281316131864e-06, + "loss": 0.0346, + "step": 106440 + }, + { + "epoch": 4.97, + "learning_rate": 3.4774975310770777e-06, + "loss": 0.0373, + "step": 106445 + }, + { + "epoch": 4.97, + "learning_rate": 3.476713746022291e-06, + "loss": 0.0616, + "step": 106450 + }, + { + "epoch": 4.97, + "learning_rate": 3.4759299609675046e-06, + "loss": 0.0689, + "step": 106455 + }, + { + "epoch": 4.97, + "learning_rate": 3.475146175912718e-06, + "loss": 0.0737, + "step": 106460 + }, + { + "epoch": 4.97, + "learning_rate": 3.4743623908579316e-06, + "loss": 0.1977, + "step": 106465 + }, + { + "epoch": 4.97, + "learning_rate": 3.473578605803145e-06, + "loss": 0.1243, + "step": 106470 + }, + { + "epoch": 4.97, + "learning_rate": 3.472794820748358e-06, + "loss": 0.2419, + "step": 106475 + }, + { + "epoch": 4.97, + "learning_rate": 3.4720110356935716e-06, + "loss": 0.1013, + "step": 106480 + }, + { + "epoch": 4.97, + "learning_rate": 3.471227250638785e-06, + "loss": 0.0313, + "step": 106485 + }, + { + "epoch": 4.97, + "learning_rate": 3.4704434655839986e-06, + "loss": 0.0174, + "step": 106490 + }, + { + "epoch": 4.97, + "learning_rate": 3.469659680529212e-06, + "loss": 0.0356, + "step": 106495 + }, + { + "epoch": 4.97, + "learning_rate": 3.4688758954744255e-06, + "loss": 0.0787, + "step": 106500 + }, + { + "epoch": 4.97, + "learning_rate": 3.4680921104196386e-06, + "loss": 0.05, + "step": 106505 + }, + { + "epoch": 4.97, + "learning_rate": 3.467308325364852e-06, + "loss": 0.1218, + "step": 106510 + }, + { + "epoch": 4.97, + "learning_rate": 3.4665245403100656e-06, + "loss": 0.0471, + "step": 106515 + }, + { + "epoch": 4.97, + "learning_rate": 3.465740755255279e-06, + "loss": 0.2304, + "step": 106520 + }, + { + "epoch": 4.97, + "learning_rate": 3.4649569702004925e-06, + "loss": 0.3126, + "step": 106525 + }, + { + "epoch": 4.97, + "learning_rate": 3.464173185145706e-06, + "loss": 0.1508, + "step": 106530 + }, + { + "epoch": 4.97, + "learning_rate": 3.4633894000909195e-06, + "loss": 0.0197, + "step": 106535 + }, + { + "epoch": 4.97, + "learning_rate": 3.4626056150361326e-06, + "loss": 0.0725, + "step": 106540 + }, + { + "epoch": 4.97, + "learning_rate": 3.461821829981346e-06, + "loss": 0.0503, + "step": 106545 + }, + { + "epoch": 4.97, + "learning_rate": 3.4610380449265595e-06, + "loss": 0.0553, + "step": 106550 + }, + { + "epoch": 4.97, + "learning_rate": 3.460254259871773e-06, + "loss": 0.0367, + "step": 106555 + }, + { + "epoch": 4.97, + "learning_rate": 3.4594704748169865e-06, + "loss": 0.0992, + "step": 106560 + }, + { + "epoch": 4.97, + "learning_rate": 3.4586866897622e-06, + "loss": 0.0794, + "step": 106565 + }, + { + "epoch": 4.97, + "learning_rate": 3.457902904707413e-06, + "loss": 0.193, + "step": 106570 + }, + { + "epoch": 4.97, + "learning_rate": 3.4571191196526265e-06, + "loss": 0.2616, + "step": 106575 + }, + { + "epoch": 4.97, + "learning_rate": 3.45633533459784e-06, + "loss": 0.1108, + "step": 106580 + }, + { + "epoch": 4.97, + "learning_rate": 3.4555515495430535e-06, + "loss": 0.0291, + "step": 106585 + }, + { + "epoch": 4.97, + "learning_rate": 3.454767764488267e-06, + "loss": 0.0615, + "step": 106590 + }, + { + "epoch": 4.97, + "learning_rate": 3.4539839794334804e-06, + "loss": 0.0407, + "step": 106595 + }, + { + "epoch": 4.97, + "learning_rate": 3.4532001943786943e-06, + "loss": 0.0377, + "step": 106600 + }, + { + "epoch": 4.97, + "learning_rate": 3.452416409323907e-06, + "loss": 0.0444, + "step": 106605 + }, + { + "epoch": 4.97, + "learning_rate": 3.4516326242691205e-06, + "loss": 0.0721, + "step": 106610 + }, + { + "epoch": 4.97, + "learning_rate": 3.450848839214334e-06, + "loss": 0.0693, + "step": 106615 + }, + { + "epoch": 4.98, + "learning_rate": 3.4500650541595474e-06, + "loss": 0.2011, + "step": 106620 + }, + { + "epoch": 4.98, + "learning_rate": 3.4492812691047613e-06, + "loss": 0.3003, + "step": 106625 + }, + { + "epoch": 4.98, + "learning_rate": 3.448497484049975e-06, + "loss": 0.1171, + "step": 106630 + }, + { + "epoch": 4.98, + "learning_rate": 3.4477136989951874e-06, + "loss": 0.0382, + "step": 106635 + }, + { + "epoch": 4.98, + "learning_rate": 3.446929913940401e-06, + "loss": 0.0392, + "step": 106640 + }, + { + "epoch": 4.98, + "learning_rate": 3.4461461288856144e-06, + "loss": 0.0206, + "step": 106645 + }, + { + "epoch": 4.98, + "learning_rate": 3.4453623438308283e-06, + "loss": 0.0474, + "step": 106650 + }, + { + "epoch": 4.98, + "learning_rate": 3.444578558776042e-06, + "loss": 0.0398, + "step": 106655 + }, + { + "epoch": 4.98, + "learning_rate": 3.4437947737212553e-06, + "loss": 0.1376, + "step": 106660 + }, + { + "epoch": 4.98, + "learning_rate": 3.4430109886664688e-06, + "loss": 0.0836, + "step": 106665 + }, + { + "epoch": 4.98, + "learning_rate": 3.4422272036116814e-06, + "loss": 0.168, + "step": 106670 + }, + { + "epoch": 4.98, + "learning_rate": 3.4414434185568953e-06, + "loss": 0.2874, + "step": 106675 + }, + { + "epoch": 4.98, + "learning_rate": 3.4406596335021088e-06, + "loss": 0.0857, + "step": 106680 + }, + { + "epoch": 4.98, + "learning_rate": 3.4398758484473223e-06, + "loss": 0.0456, + "step": 106685 + }, + { + "epoch": 4.98, + "learning_rate": 3.4390920633925358e-06, + "loss": 0.0468, + "step": 106690 + }, + { + "epoch": 4.98, + "learning_rate": 3.4383082783377492e-06, + "loss": 0.0476, + "step": 106695 + }, + { + "epoch": 4.98, + "learning_rate": 3.4375244932829623e-06, + "loss": 0.0507, + "step": 106700 + }, + { + "epoch": 4.98, + "learning_rate": 3.4367407082281758e-06, + "loss": 0.041, + "step": 106705 + }, + { + "epoch": 4.98, + "learning_rate": 3.4359569231733893e-06, + "loss": 0.0779, + "step": 106710 + }, + { + "epoch": 4.98, + "learning_rate": 3.4351731381186027e-06, + "loss": 0.1492, + "step": 106715 + }, + { + "epoch": 4.98, + "learning_rate": 3.4343893530638162e-06, + "loss": 0.1987, + "step": 106720 + }, + { + "epoch": 4.98, + "learning_rate": 3.4336055680090297e-06, + "loss": 0.1688, + "step": 106725 + }, + { + "epoch": 4.98, + "learning_rate": 3.432821782954243e-06, + "loss": 0.1182, + "step": 106730 + }, + { + "epoch": 4.98, + "learning_rate": 3.4320379978994562e-06, + "loss": 0.0452, + "step": 106735 + }, + { + "epoch": 4.98, + "learning_rate": 3.4312542128446697e-06, + "loss": 0.026, + "step": 106740 + }, + { + "epoch": 4.98, + "learning_rate": 3.430470427789883e-06, + "loss": 0.0615, + "step": 106745 + }, + { + "epoch": 4.98, + "learning_rate": 3.4296866427350967e-06, + "loss": 0.054, + "step": 106750 + }, + { + "epoch": 4.98, + "learning_rate": 3.42890285768031e-06, + "loss": 0.0355, + "step": 106755 + }, + { + "epoch": 4.98, + "learning_rate": 3.4281190726255237e-06, + "loss": 0.0659, + "step": 106760 + }, + { + "epoch": 4.98, + "learning_rate": 3.4273352875707367e-06, + "loss": 0.1147, + "step": 106765 + }, + { + "epoch": 4.98, + "learning_rate": 3.42655150251595e-06, + "loss": 0.0859, + "step": 106770 + }, + { + "epoch": 4.98, + "learning_rate": 3.4257677174611637e-06, + "loss": 0.1798, + "step": 106775 + }, + { + "epoch": 4.98, + "learning_rate": 3.424983932406377e-06, + "loss": 0.0941, + "step": 106780 + }, + { + "epoch": 4.98, + "learning_rate": 3.4242001473515906e-06, + "loss": 0.0168, + "step": 106785 + }, + { + "epoch": 4.98, + "learning_rate": 3.423416362296804e-06, + "loss": 0.0317, + "step": 106790 + }, + { + "epoch": 4.98, + "learning_rate": 3.4226325772420176e-06, + "loss": 0.0486, + "step": 106795 + }, + { + "epoch": 4.98, + "learning_rate": 3.4218487921872307e-06, + "loss": 0.0382, + "step": 106800 + }, + { + "epoch": 4.98, + "learning_rate": 3.421065007132444e-06, + "loss": 0.0989, + "step": 106805 + }, + { + "epoch": 4.98, + "learning_rate": 3.4202812220776576e-06, + "loss": 0.0674, + "step": 106810 + }, + { + "epoch": 4.98, + "learning_rate": 3.419497437022871e-06, + "loss": 0.0986, + "step": 106815 + }, + { + "epoch": 4.98, + "learning_rate": 3.4187136519680846e-06, + "loss": 0.1039, + "step": 106820 + }, + { + "epoch": 4.98, + "learning_rate": 3.417929866913298e-06, + "loss": 0.2248, + "step": 106825 + }, + { + "epoch": 4.98, + "learning_rate": 3.417146081858511e-06, + "loss": 0.1063, + "step": 106830 + }, + { + "epoch": 4.99, + "learning_rate": 3.4163622968037246e-06, + "loss": 0.0143, + "step": 106835 + }, + { + "epoch": 4.99, + "learning_rate": 3.415578511748938e-06, + "loss": 0.035, + "step": 106840 + }, + { + "epoch": 4.99, + "learning_rate": 3.4147947266941516e-06, + "loss": 0.0637, + "step": 106845 + }, + { + "epoch": 4.99, + "learning_rate": 3.414010941639365e-06, + "loss": 0.1092, + "step": 106850 + }, + { + "epoch": 4.99, + "learning_rate": 3.4132271565845785e-06, + "loss": 0.1324, + "step": 106855 + }, + { + "epoch": 4.99, + "learning_rate": 3.412443371529792e-06, + "loss": 0.0607, + "step": 106860 + }, + { + "epoch": 4.99, + "learning_rate": 3.411659586475005e-06, + "loss": 0.1144, + "step": 106865 + }, + { + "epoch": 4.99, + "learning_rate": 3.4108758014202186e-06, + "loss": 0.1065, + "step": 106870 + }, + { + "epoch": 4.99, + "learning_rate": 3.410092016365432e-06, + "loss": 0.2655, + "step": 106875 + }, + { + "epoch": 4.99, + "learning_rate": 3.4093082313106455e-06, + "loss": 0.082, + "step": 106880 + }, + { + "epoch": 4.99, + "learning_rate": 3.408524446255859e-06, + "loss": 0.0192, + "step": 106885 + }, + { + "epoch": 4.99, + "learning_rate": 3.407740661201073e-06, + "loss": 0.0386, + "step": 106890 + }, + { + "epoch": 4.99, + "learning_rate": 3.4069568761462856e-06, + "loss": 0.086, + "step": 106895 + }, + { + "epoch": 4.99, + "learning_rate": 3.406173091091499e-06, + "loss": 0.0492, + "step": 106900 + }, + { + "epoch": 4.99, + "learning_rate": 3.4053893060367125e-06, + "loss": 0.1052, + "step": 106905 + }, + { + "epoch": 4.99, + "learning_rate": 3.404605520981926e-06, + "loss": 0.1199, + "step": 106910 + }, + { + "epoch": 4.99, + "learning_rate": 3.40382173592714e-06, + "loss": 0.1372, + "step": 106915 + }, + { + "epoch": 4.99, + "learning_rate": 3.4030379508723534e-06, + "loss": 0.1377, + "step": 106920 + }, + { + "epoch": 4.99, + "learning_rate": 3.402254165817567e-06, + "loss": 0.4274, + "step": 106925 + }, + { + "epoch": 4.99, + "learning_rate": 3.4014703807627795e-06, + "loss": 0.0971, + "step": 106930 + }, + { + "epoch": 4.99, + "learning_rate": 3.400686595707993e-06, + "loss": 0.0229, + "step": 106935 + }, + { + "epoch": 4.99, + "learning_rate": 3.399902810653207e-06, + "loss": 0.0343, + "step": 106940 + }, + { + "epoch": 4.99, + "learning_rate": 3.3991190255984204e-06, + "loss": 0.1156, + "step": 106945 + }, + { + "epoch": 4.99, + "learning_rate": 3.398335240543634e-06, + "loss": 0.0992, + "step": 106950 + }, + { + "epoch": 4.99, + "learning_rate": 3.3975514554888473e-06, + "loss": 0.0567, + "step": 106955 + }, + { + "epoch": 4.99, + "learning_rate": 3.39676767043406e-06, + "loss": 0.1088, + "step": 106960 + }, + { + "epoch": 4.99, + "learning_rate": 3.395983885379274e-06, + "loss": 0.1391, + "step": 106965 + }, + { + "epoch": 4.99, + "learning_rate": 3.3952001003244874e-06, + "loss": 0.2341, + "step": 106970 + }, + { + "epoch": 4.99, + "learning_rate": 3.394416315269701e-06, + "loss": 0.2564, + "step": 106975 + }, + { + "epoch": 4.99, + "learning_rate": 3.3936325302149143e-06, + "loss": 0.0925, + "step": 106980 + }, + { + "epoch": 4.99, + "learning_rate": 3.392848745160128e-06, + "loss": 0.0425, + "step": 106985 + }, + { + "epoch": 4.99, + "learning_rate": 3.3920649601053413e-06, + "loss": 0.0186, + "step": 106990 + }, + { + "epoch": 4.99, + "learning_rate": 3.3912811750505544e-06, + "loss": 0.0554, + "step": 106995 + }, + { + "epoch": 4.99, + "learning_rate": 3.390497389995768e-06, + "loss": 0.0929, + "step": 107000 + }, + { + "epoch": 4.99, + "learning_rate": 3.3897136049409813e-06, + "loss": 0.0481, + "step": 107005 + }, + { + "epoch": 4.99, + "learning_rate": 3.388929819886195e-06, + "loss": 0.1231, + "step": 107010 + }, + { + "epoch": 4.99, + "learning_rate": 3.3881460348314083e-06, + "loss": 0.1097, + "step": 107015 + }, + { + "epoch": 4.99, + "learning_rate": 3.3873622497766218e-06, + "loss": 0.1566, + "step": 107020 + }, + { + "epoch": 4.99, + "learning_rate": 3.386578464721835e-06, + "loss": 0.3127, + "step": 107025 + }, + { + "epoch": 4.99, + "learning_rate": 3.3857946796670483e-06, + "loss": 0.0991, + "step": 107030 + }, + { + "epoch": 4.99, + "learning_rate": 3.385010894612262e-06, + "loss": 0.0376, + "step": 107035 + }, + { + "epoch": 4.99, + "learning_rate": 3.3842271095574753e-06, + "loss": 0.0389, + "step": 107040 + }, + { + "epoch": 4.99, + "learning_rate": 3.3834433245026888e-06, + "loss": 0.0523, + "step": 107045 + }, + { + "epoch": 5.0, + "learning_rate": 3.3826595394479022e-06, + "loss": 0.0397, + "step": 107050 + }, + { + "epoch": 5.0, + "learning_rate": 3.3818757543931157e-06, + "loss": 0.0767, + "step": 107055 + }, + { + "epoch": 5.0, + "learning_rate": 3.3810919693383288e-06, + "loss": 0.0496, + "step": 107060 + }, + { + "epoch": 5.0, + "learning_rate": 3.3803081842835423e-06, + "loss": 0.1208, + "step": 107065 + }, + { + "epoch": 5.0, + "learning_rate": 3.3795243992287557e-06, + "loss": 0.1989, + "step": 107070 + }, + { + "epoch": 5.0, + "learning_rate": 3.3787406141739692e-06, + "loss": 0.1885, + "step": 107075 + }, + { + "epoch": 5.0, + "learning_rate": 3.3779568291191827e-06, + "loss": 0.1041, + "step": 107080 + }, + { + "epoch": 5.0, + "learning_rate": 3.377173044064396e-06, + "loss": 0.0277, + "step": 107085 + }, + { + "epoch": 5.0, + "learning_rate": 3.3763892590096093e-06, + "loss": 0.0263, + "step": 107090 + }, + { + "epoch": 5.0, + "learning_rate": 3.3756054739548227e-06, + "loss": 0.0636, + "step": 107095 + }, + { + "epoch": 5.0, + "learning_rate": 3.3748216889000362e-06, + "loss": 0.1094, + "step": 107100 + }, + { + "epoch": 5.0, + "learning_rate": 3.3740379038452497e-06, + "loss": 0.0627, + "step": 107105 + }, + { + "epoch": 5.0, + "learning_rate": 3.373254118790463e-06, + "loss": 0.1205, + "step": 107110 + }, + { + "epoch": 5.0, + "learning_rate": 3.3724703337356767e-06, + "loss": 0.0908, + "step": 107115 + }, + { + "epoch": 5.0, + "learning_rate": 3.37168654868089e-06, + "loss": 0.109, + "step": 107120 + }, + { + "epoch": 5.0, + "learning_rate": 3.370902763626103e-06, + "loss": 0.157, + "step": 107125 + }, + { + "epoch": 5.0, + "learning_rate": 3.3701189785713167e-06, + "loss": 0.0523, + "step": 107130 + }, + { + "epoch": 5.0, + "learning_rate": 3.36933519351653e-06, + "loss": 0.0538, + "step": 107135 + }, + { + "epoch": 5.0, + "learning_rate": 3.3685514084617436e-06, + "loss": 0.0309, + "step": 107140 + }, + { + "epoch": 5.0, + "learning_rate": 3.367767623406957e-06, + "loss": 0.1201, + "step": 107145 + }, + { + "epoch": 5.0, + "learning_rate": 3.3669838383521706e-06, + "loss": 0.0899, + "step": 107150 + }, + { + "epoch": 5.0, + "learning_rate": 3.3662000532973837e-06, + "loss": 0.3519, + "step": 107155 + }, + { + "epoch": 5.0, + "eval_cer": 0.011018513087304345, + "eval_loss": 0.06931844353675842, + "eval_runtime": 470.9171, + "eval_samples_per_second": 40.453, + "eval_steps_per_second": 5.058, + "eval_wer": 0.09341228719467061, + "step": 107155 + } + ], + "max_steps": 128586, + "num_train_epochs": 6, + "total_flos": 1.2861471382520263e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-107155/training_args.bin b/checkpoint-107155/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..36cc7cb27194c4763ad57ba9f820c49b1d0a2bcf --- /dev/null +++ b/checkpoint-107155/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35a655ca2fa82ac80a7162e5149caad102a189b97deb1fba1f94f21e15657a07 +size 3055 diff --git a/checkpoint-128586/config.json b/checkpoint-128586/config.json new file mode 100644 index 0000000000000000000000000000000000000000..382a3e79497e514ac876eee8114c7079c255a204 --- /dev/null +++ b/checkpoint-128586/config.json @@ -0,0 +1,109 @@ +{ + "_name_or_path": "facebook/wav2vec2-base-960h", + "activation_dropout": 0.1, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForCTC" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "group", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.1, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 12, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 12, + "num_negatives": 100, + "output_hidden_size": 768, + "pad_token_id": 0, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} diff --git a/checkpoint-128586/optimizer.pt b/checkpoint-128586/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ffb1e9f2e313498540a3bed3e2b39165097106b --- /dev/null +++ b/checkpoint-128586/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2913c5c60ffd7e8abe7467b852f19331ccfb6208d1f9d7e55a69c09da32364c +size 1847865 diff --git a/checkpoint-128586/preprocessor_config.json b/checkpoint-128586/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a0b7227fc1d916e469b14f6c154ad6dfea1e6891 --- /dev/null +++ b/checkpoint-128586/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-128586/pytorch_model.bin b/checkpoint-128586/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6e06d0b689a8726781e3c389810401d857e2015c --- /dev/null +++ b/checkpoint-128586/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc536a14d1997d45a25843da45bbb7321eda240d64a790d1197cc89525d8a30 +size 377656855 diff --git a/checkpoint-128586/rng_state.pth b/checkpoint-128586/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..564d045f522452548b731e618c69870e0df2d7e3 --- /dev/null +++ b/checkpoint-128586/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6e94057726ed90ce2b131bf31ca259338b87d4438c256f8e3e70ff1c86d0f55 +size 14567 diff --git a/checkpoint-128586/scaler.pt b/checkpoint-128586/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..00925fd6d229acf29dc6f5438b0ca9958c76d0cf --- /dev/null +++ b/checkpoint-128586/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dab08c7d2e7b3c2262eec249bccea43bb9b6a5aef9faeb9f763dd5a4ad21232 +size 559 diff --git a/checkpoint-128586/scheduler.pt b/checkpoint-128586/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7003ac19d7d939ef4615895c70f0059d2ca0dea3 --- /dev/null +++ b/checkpoint-128586/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cee45e655c4610bc0a5ddb7a3fe99bc89d3cbf8997df69dc70ff490420bbd7db +size 623 diff --git a/checkpoint-128586/trainer_state.json b/checkpoint-128586/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..aa1235d0dad561e0bbba8d34842fef7d6cd7321c --- /dev/null +++ b/checkpoint-128586/trainer_state.json @@ -0,0 +1,154378 @@ +{ + "best_metric": 0.03575053811073303, + "best_model_checkpoint": "wav2vec2-base-pem123-960h-la/checkpoint-64293", + "epoch": 6.0, + "global_step": 128586, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 6.000000000000001e-08, + "loss": 2.6531, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.6e-07, + "loss": 3.4824, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 2.6e-07, + "loss": 3.2682, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 3.6e-07, + "loss": 3.2567, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.6000000000000004e-07, + "loss": 3.5979, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 5.6e-07, + "loss": 3.3327, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 6.6e-07, + "loss": 3.7519, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 7.6e-07, + "loss": 3.5748, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 8.6e-07, + "loss": 3.5357, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 9.400000000000001e-07, + "loss": 3.4531, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.04e-06, + "loss": 2.5381, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 1.14e-06, + "loss": 2.9048, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 1.2400000000000002e-06, + "loss": 3.0937, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 1.34e-06, + "loss": 3.0091, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 1.44e-06, + "loss": 2.8452, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.54e-06, + "loss": 2.6674, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 1.6400000000000002e-06, + "loss": 2.9619, + "step": 85 + }, + { + "epoch": 0.0, + "learning_rate": 1.74e-06, + "loss": 2.7327, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 1.8400000000000002e-06, + "loss": 2.7925, + "step": 95 + }, + { + "epoch": 0.0, + "learning_rate": 1.94e-06, + "loss": 3.0929, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.04e-06, + "loss": 1.7821, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 2.1400000000000003e-06, + "loss": 1.9388, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 2.24e-06, + "loss": 2.1683, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 2.3400000000000005e-06, + "loss": 1.8805, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 2.4400000000000004e-06, + "loss": 2.0734, + "step": 125 + }, + { + "epoch": 0.01, + "learning_rate": 2.5400000000000002e-06, + "loss": 2.0576, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 2.64e-06, + "loss": 1.778, + "step": 135 + }, + { + "epoch": 0.01, + "learning_rate": 2.7400000000000004e-06, + "loss": 1.866, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 2.84e-06, + "loss": 2.0255, + "step": 145 + }, + { + "epoch": 0.01, + "learning_rate": 2.9400000000000002e-06, + "loss": 2.1399, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 3.04e-06, + "loss": 1.4145, + "step": 155 + }, + { + "epoch": 0.01, + "learning_rate": 3.1400000000000004e-06, + "loss": 1.2365, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 3.2400000000000003e-06, + "loss": 1.5569, + "step": 165 + }, + { + "epoch": 0.01, + "learning_rate": 3.3400000000000006e-06, + "loss": 1.6138, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 3.44e-06, + "loss": 1.3237, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.54e-06, + "loss": 1.3709, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 3.6400000000000003e-06, + "loss": 1.475, + "step": 185 + }, + { + "epoch": 0.01, + "learning_rate": 3.74e-06, + "loss": 1.5188, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 3.8400000000000005e-06, + "loss": 1.7965, + "step": 195 + }, + { + "epoch": 0.01, + "learning_rate": 3.94e-06, + "loss": 1.9079, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.04e-06, + "loss": 1.1918, + "step": 205 + }, + { + "epoch": 0.01, + "learning_rate": 4.14e-06, + "loss": 0.9466, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 4.24e-06, + "loss": 1.186, + "step": 215 + }, + { + "epoch": 0.01, + "learning_rate": 4.34e-06, + "loss": 1.1864, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 4.440000000000001e-06, + "loss": 1.1844, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.540000000000001e-06, + "loss": 1.2449, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 4.6400000000000005e-06, + "loss": 1.5866, + "step": 235 + }, + { + "epoch": 0.01, + "learning_rate": 4.74e-06, + "loss": 1.3059, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 4.84e-06, + "loss": 1.4398, + "step": 245 + }, + { + "epoch": 0.01, + "learning_rate": 4.94e-06, + "loss": 1.8654, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.04e-06, + "loss": 1.2339, + "step": 255 + }, + { + "epoch": 0.01, + "learning_rate": 5.140000000000001e-06, + "loss": 0.8202, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 5.240000000000001e-06, + "loss": 1.151, + "step": 265 + }, + { + "epoch": 0.01, + "learning_rate": 5.3400000000000005e-06, + "loss": 1.1299, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 5.4400000000000004e-06, + "loss": 1.154, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.540000000000001e-06, + "loss": 1.2657, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 5.64e-06, + "loss": 1.3412, + "step": 285 + }, + { + "epoch": 0.01, + "learning_rate": 5.72e-06, + "loss": 1.2532, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 5.82e-06, + "loss": 1.5254, + "step": 295 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 1.9021, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.02e-06, + "loss": 1.2932, + "step": 305 + }, + { + "epoch": 0.01, + "learning_rate": 6.120000000000001e-06, + "loss": 0.882, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 6.220000000000001e-06, + "loss": 0.8607, + "step": 315 + }, + { + "epoch": 0.01, + "learning_rate": 6.3200000000000005e-06, + "loss": 0.9375, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 6.42e-06, + "loss": 1.0688, + "step": 325 + }, + { + "epoch": 0.02, + "learning_rate": 6.520000000000001e-06, + "loss": 1.0282, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 6.620000000000001e-06, + "loss": 1.1712, + "step": 335 + }, + { + "epoch": 0.02, + "learning_rate": 6.720000000000001e-06, + "loss": 1.3186, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 6.820000000000001e-06, + "loss": 1.3102, + "step": 345 + }, + { + "epoch": 0.02, + "learning_rate": 6.9e-06, + "loss": 2.0291, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 7e-06, + "loss": 1.0834, + "step": 355 + }, + { + "epoch": 0.02, + "learning_rate": 7.100000000000001e-06, + "loss": 0.7925, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 7.2000000000000005e-06, + "loss": 0.9559, + "step": 365 + }, + { + "epoch": 0.02, + "learning_rate": 7.3e-06, + "loss": 0.9066, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 7.4e-06, + "loss": 1.0408, + "step": 375 + }, + { + "epoch": 0.02, + "learning_rate": 7.500000000000001e-06, + "loss": 1.0672, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 7.600000000000001e-06, + "loss": 1.3249, + "step": 385 + }, + { + "epoch": 0.02, + "learning_rate": 7.7e-06, + "loss": 1.3579, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 7.800000000000002e-06, + "loss": 1.4037, + "step": 395 + }, + { + "epoch": 0.02, + "learning_rate": 7.9e-06, + "loss": 1.5432, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.9596, + "step": 405 + }, + { + "epoch": 0.02, + "learning_rate": 8.1e-06, + "loss": 0.6342, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 8.2e-06, + "loss": 0.8461, + "step": 415 + }, + { + "epoch": 0.02, + "learning_rate": 8.3e-06, + "loss": 0.9826, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 8.400000000000001e-06, + "loss": 0.9279, + "step": 425 + }, + { + "epoch": 0.02, + "learning_rate": 8.5e-06, + "loss": 0.8814, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 8.6e-06, + "loss": 1.1263, + "step": 435 + }, + { + "epoch": 0.02, + "learning_rate": 8.700000000000001e-06, + "loss": 1.0968, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 8.8e-06, + "loss": 1.2043, + "step": 445 + }, + { + "epoch": 0.02, + "learning_rate": 8.900000000000001e-06, + "loss": 1.5603, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 0.9924, + "step": 455 + }, + { + "epoch": 0.02, + "learning_rate": 9.100000000000001e-06, + "loss": 0.7293, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 9.200000000000002e-06, + "loss": 0.7576, + "step": 465 + }, + { + "epoch": 0.02, + "learning_rate": 9.3e-06, + "loss": 0.7923, + "step": 470 + }, + { + "epoch": 0.02, + "learning_rate": 9.4e-06, + "loss": 0.8264, + "step": 475 + }, + { + "epoch": 0.02, + "learning_rate": 9.5e-06, + "loss": 0.8031, + "step": 480 + }, + { + "epoch": 0.02, + "learning_rate": 9.600000000000001e-06, + "loss": 1.2293, + "step": 485 + }, + { + "epoch": 0.02, + "learning_rate": 9.7e-06, + "loss": 0.9651, + "step": 490 + }, + { + "epoch": 0.02, + "learning_rate": 9.800000000000001e-06, + "loss": 1.3314, + "step": 495 + }, + { + "epoch": 0.02, + "learning_rate": 9.9e-06, + "loss": 1.4383, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 1e-05, + "loss": 0.9384, + "step": 505 + }, + { + "epoch": 0.02, + "learning_rate": 1.0100000000000002e-05, + "loss": 0.6344, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 1.02e-05, + "loss": 0.8903, + "step": 515 + }, + { + "epoch": 0.02, + "learning_rate": 1.0300000000000001e-05, + "loss": 0.8112, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 1.04e-05, + "loss": 0.9797, + "step": 525 + }, + { + "epoch": 0.02, + "learning_rate": 1.0500000000000001e-05, + "loss": 0.7961, + "step": 530 + }, + { + "epoch": 0.02, + "learning_rate": 1.0600000000000002e-05, + "loss": 1.0021, + "step": 535 + }, + { + "epoch": 0.03, + "learning_rate": 1.0700000000000001e-05, + "loss": 1.111, + "step": 540 + }, + { + "epoch": 0.03, + "learning_rate": 1.0800000000000002e-05, + "loss": 1.0121, + "step": 545 + }, + { + "epoch": 0.03, + "learning_rate": 1.0900000000000002e-05, + "loss": 1.3162, + "step": 550 + }, + { + "epoch": 0.03, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.8775, + "step": 555 + }, + { + "epoch": 0.03, + "learning_rate": 1.1100000000000002e-05, + "loss": 0.6268, + "step": 560 + }, + { + "epoch": 0.03, + "learning_rate": 1.1200000000000001e-05, + "loss": 0.6093, + "step": 565 + }, + { + "epoch": 0.03, + "learning_rate": 1.13e-05, + "loss": 0.6371, + "step": 570 + }, + { + "epoch": 0.03, + "learning_rate": 1.14e-05, + "loss": 0.7299, + "step": 575 + }, + { + "epoch": 0.03, + "learning_rate": 1.15e-05, + "loss": 0.8892, + "step": 580 + }, + { + "epoch": 0.03, + "learning_rate": 1.16e-05, + "loss": 0.8902, + "step": 585 + }, + { + "epoch": 0.03, + "learning_rate": 1.17e-05, + "loss": 1.1263, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 1.18e-05, + "loss": 1.2628, + "step": 595 + }, + { + "epoch": 0.03, + "learning_rate": 1.1900000000000001e-05, + "loss": 1.4236, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 0.8066, + "step": 605 + }, + { + "epoch": 0.03, + "learning_rate": 1.2100000000000001e-05, + "loss": 0.6171, + "step": 610 + }, + { + "epoch": 0.03, + "learning_rate": 1.22e-05, + "loss": 0.6193, + "step": 615 + }, + { + "epoch": 0.03, + "learning_rate": 1.23e-05, + "loss": 0.7038, + "step": 620 + }, + { + "epoch": 0.03, + "learning_rate": 1.2400000000000002e-05, + "loss": 0.7382, + "step": 625 + }, + { + "epoch": 0.03, + "learning_rate": 1.25e-05, + "loss": 0.8153, + "step": 630 + }, + { + "epoch": 0.03, + "learning_rate": 1.2600000000000001e-05, + "loss": 0.8639, + "step": 635 + }, + { + "epoch": 0.03, + "learning_rate": 1.27e-05, + "loss": 0.985, + "step": 640 + }, + { + "epoch": 0.03, + "learning_rate": 1.2800000000000001e-05, + "loss": 0.9144, + "step": 645 + }, + { + "epoch": 0.03, + "learning_rate": 1.2900000000000002e-05, + "loss": 1.2459, + "step": 650 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.8517, + "step": 655 + }, + { + "epoch": 0.03, + "learning_rate": 1.3100000000000002e-05, + "loss": 0.4846, + "step": 660 + }, + { + "epoch": 0.03, + "learning_rate": 1.3200000000000002e-05, + "loss": 0.5826, + "step": 665 + }, + { + "epoch": 0.03, + "learning_rate": 1.3300000000000001e-05, + "loss": 0.7343, + "step": 670 + }, + { + "epoch": 0.03, + "learning_rate": 1.3400000000000002e-05, + "loss": 0.7328, + "step": 675 + }, + { + "epoch": 0.03, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.6546, + "step": 680 + }, + { + "epoch": 0.03, + "learning_rate": 1.3600000000000002e-05, + "loss": 0.8793, + "step": 685 + }, + { + "epoch": 0.03, + "learning_rate": 1.3700000000000003e-05, + "loss": 0.8999, + "step": 690 + }, + { + "epoch": 0.03, + "learning_rate": 1.38e-05, + "loss": 1.1491, + "step": 695 + }, + { + "epoch": 0.03, + "learning_rate": 1.39e-05, + "loss": 1.377, + "step": 700 + }, + { + "epoch": 0.03, + "learning_rate": 1.4e-05, + "loss": 0.7843, + "step": 705 + }, + { + "epoch": 0.03, + "learning_rate": 1.41e-05, + "loss": 0.622, + "step": 710 + }, + { + "epoch": 0.03, + "learning_rate": 1.4200000000000001e-05, + "loss": 0.5346, + "step": 715 + }, + { + "epoch": 0.03, + "learning_rate": 1.43e-05, + "loss": 0.6517, + "step": 720 + }, + { + "epoch": 0.03, + "learning_rate": 1.4400000000000001e-05, + "loss": 0.6661, + "step": 725 + }, + { + "epoch": 0.03, + "learning_rate": 1.45e-05, + "loss": 0.7379, + "step": 730 + }, + { + "epoch": 0.03, + "learning_rate": 1.46e-05, + "loss": 0.7839, + "step": 735 + }, + { + "epoch": 0.03, + "learning_rate": 1.4700000000000002e-05, + "loss": 1.0163, + "step": 740 + }, + { + "epoch": 0.03, + "learning_rate": 1.48e-05, + "loss": 0.9786, + "step": 745 + }, + { + "epoch": 0.03, + "learning_rate": 1.4900000000000001e-05, + "loss": 1.2756, + "step": 750 + }, + { + "epoch": 0.04, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.8289, + "step": 755 + }, + { + "epoch": 0.04, + "learning_rate": 1.5100000000000001e-05, + "loss": 0.5909, + "step": 760 + }, + { + "epoch": 0.04, + "learning_rate": 1.5200000000000002e-05, + "loss": 0.5347, + "step": 765 + }, + { + "epoch": 0.04, + "learning_rate": 1.5300000000000003e-05, + "loss": 0.7078, + "step": 770 + }, + { + "epoch": 0.04, + "learning_rate": 1.54e-05, + "loss": 0.6262, + "step": 775 + }, + { + "epoch": 0.04, + "learning_rate": 1.55e-05, + "loss": 0.8401, + "step": 780 + }, + { + "epoch": 0.04, + "learning_rate": 1.5600000000000003e-05, + "loss": 0.6788, + "step": 785 + }, + { + "epoch": 0.04, + "learning_rate": 1.5700000000000002e-05, + "loss": 0.9063, + "step": 790 + }, + { + "epoch": 0.04, + "learning_rate": 1.58e-05, + "loss": 0.9448, + "step": 795 + }, + { + "epoch": 0.04, + "learning_rate": 1.5900000000000004e-05, + "loss": 1.3078, + "step": 800 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7461, + "step": 805 + }, + { + "epoch": 0.04, + "learning_rate": 1.6100000000000002e-05, + "loss": 0.4522, + "step": 810 + }, + { + "epoch": 0.04, + "learning_rate": 1.62e-05, + "loss": 0.5883, + "step": 815 + }, + { + "epoch": 0.04, + "learning_rate": 1.63e-05, + "loss": 0.5923, + "step": 820 + }, + { + "epoch": 0.04, + "learning_rate": 1.64e-05, + "loss": 0.7269, + "step": 825 + }, + { + "epoch": 0.04, + "learning_rate": 1.65e-05, + "loss": 0.6916, + "step": 830 + }, + { + "epoch": 0.04, + "learning_rate": 1.66e-05, + "loss": 0.6976, + "step": 835 + }, + { + "epoch": 0.04, + "learning_rate": 1.67e-05, + "loss": 0.953, + "step": 840 + }, + { + "epoch": 0.04, + "learning_rate": 1.6800000000000002e-05, + "loss": 0.952, + "step": 845 + }, + { + "epoch": 0.04, + "learning_rate": 1.69e-05, + "loss": 1.4978, + "step": 850 + }, + { + "epoch": 0.04, + "learning_rate": 1.7e-05, + "loss": 0.8314, + "step": 855 + }, + { + "epoch": 0.04, + "learning_rate": 1.7100000000000002e-05, + "loss": 0.4179, + "step": 860 + }, + { + "epoch": 0.04, + "learning_rate": 1.72e-05, + "loss": 0.5123, + "step": 865 + }, + { + "epoch": 0.04, + "learning_rate": 1.73e-05, + "loss": 0.528, + "step": 870 + }, + { + "epoch": 0.04, + "learning_rate": 1.7400000000000003e-05, + "loss": 0.6553, + "step": 875 + }, + { + "epoch": 0.04, + "learning_rate": 1.7500000000000002e-05, + "loss": 0.8417, + "step": 880 + }, + { + "epoch": 0.04, + "learning_rate": 1.76e-05, + "loss": 0.7153, + "step": 885 + }, + { + "epoch": 0.04, + "learning_rate": 1.77e-05, + "loss": 0.6923, + "step": 890 + }, + { + "epoch": 0.04, + "learning_rate": 1.7800000000000002e-05, + "loss": 0.8491, + "step": 895 + }, + { + "epoch": 0.04, + "learning_rate": 1.79e-05, + "loss": 1.1041, + "step": 900 + }, + { + "epoch": 0.04, + "learning_rate": 1.8e-05, + "loss": 0.6685, + "step": 905 + }, + { + "epoch": 0.04, + "learning_rate": 1.8100000000000003e-05, + "loss": 0.467, + "step": 910 + }, + { + "epoch": 0.04, + "learning_rate": 1.8200000000000002e-05, + "loss": 0.478, + "step": 915 + }, + { + "epoch": 0.04, + "learning_rate": 1.83e-05, + "loss": 0.6318, + "step": 920 + }, + { + "epoch": 0.04, + "learning_rate": 1.8400000000000003e-05, + "loss": 0.5477, + "step": 925 + }, + { + "epoch": 0.04, + "learning_rate": 1.8500000000000002e-05, + "loss": 0.8122, + "step": 930 + }, + { + "epoch": 0.04, + "learning_rate": 1.86e-05, + "loss": 0.7658, + "step": 935 + }, + { + "epoch": 0.04, + "learning_rate": 1.8700000000000004e-05, + "loss": 0.8465, + "step": 940 + }, + { + "epoch": 0.04, + "learning_rate": 1.88e-05, + "loss": 0.8287, + "step": 945 + }, + { + "epoch": 0.04, + "learning_rate": 1.8900000000000002e-05, + "loss": 1.1613, + "step": 950 + }, + { + "epoch": 0.04, + "learning_rate": 1.9e-05, + "loss": 0.5815, + "step": 955 + }, + { + "epoch": 0.04, + "learning_rate": 1.91e-05, + "loss": 0.3932, + "step": 960 + }, + { + "epoch": 0.05, + "learning_rate": 1.9200000000000003e-05, + "loss": 0.3984, + "step": 965 + }, + { + "epoch": 0.05, + "learning_rate": 1.93e-05, + "loss": 0.5436, + "step": 970 + }, + { + "epoch": 0.05, + "learning_rate": 1.94e-05, + "loss": 0.5992, + "step": 975 + }, + { + "epoch": 0.05, + "learning_rate": 1.95e-05, + "loss": 0.6758, + "step": 980 + }, + { + "epoch": 0.05, + "learning_rate": 1.9600000000000002e-05, + "loss": 0.6634, + "step": 985 + }, + { + "epoch": 0.05, + "learning_rate": 1.97e-05, + "loss": 0.8048, + "step": 990 + }, + { + "epoch": 0.05, + "learning_rate": 1.98e-05, + "loss": 1.0593, + "step": 995 + }, + { + "epoch": 0.05, + "learning_rate": 1.9900000000000003e-05, + "loss": 1.3275, + "step": 1000 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 0.745, + "step": 1005 + }, + { + "epoch": 0.05, + "learning_rate": 1.9999216214945216e-05, + "loss": 0.4186, + "step": 1010 + }, + { + "epoch": 0.05, + "learning_rate": 1.999843242989043e-05, + "loss": 0.4657, + "step": 1015 + }, + { + "epoch": 0.05, + "learning_rate": 1.999764864483564e-05, + "loss": 0.3849, + "step": 1020 + }, + { + "epoch": 0.05, + "learning_rate": 1.9996864859780857e-05, + "loss": 0.6111, + "step": 1025 + }, + { + "epoch": 0.05, + "learning_rate": 1.9996081074726068e-05, + "loss": 0.5342, + "step": 1030 + }, + { + "epoch": 0.05, + "learning_rate": 1.999529728967128e-05, + "loss": 0.6535, + "step": 1035 + }, + { + "epoch": 0.05, + "learning_rate": 1.9994513504616496e-05, + "loss": 0.7321, + "step": 1040 + }, + { + "epoch": 0.05, + "learning_rate": 1.999372971956171e-05, + "loss": 0.7966, + "step": 1045 + }, + { + "epoch": 0.05, + "learning_rate": 1.9992945934506923e-05, + "loss": 1.1703, + "step": 1050 + }, + { + "epoch": 0.05, + "learning_rate": 1.9992162149452137e-05, + "loss": 0.6399, + "step": 1055 + }, + { + "epoch": 0.05, + "learning_rate": 1.9991378364397348e-05, + "loss": 0.3462, + "step": 1060 + }, + { + "epoch": 0.05, + "learning_rate": 1.9990594579342565e-05, + "loss": 0.4746, + "step": 1065 + }, + { + "epoch": 0.05, + "learning_rate": 1.9989810794287776e-05, + "loss": 0.4348, + "step": 1070 + }, + { + "epoch": 0.05, + "learning_rate": 1.998902700923299e-05, + "loss": 0.5812, + "step": 1075 + }, + { + "epoch": 0.05, + "learning_rate": 1.9988243224178203e-05, + "loss": 0.5924, + "step": 1080 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987459439123414e-05, + "loss": 0.6898, + "step": 1085 + }, + { + "epoch": 0.05, + "learning_rate": 1.998667565406863e-05, + "loss": 0.7083, + "step": 1090 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985891869013842e-05, + "loss": 0.9508, + "step": 1095 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985108083959056e-05, + "loss": 1.2479, + "step": 1100 + }, + { + "epoch": 0.05, + "learning_rate": 1.998432429890427e-05, + "loss": 0.6903, + "step": 1105 + }, + { + "epoch": 0.05, + "learning_rate": 1.9983540513849483e-05, + "loss": 0.3722, + "step": 1110 + }, + { + "epoch": 0.05, + "learning_rate": 1.9982756728794697e-05, + "loss": 0.4207, + "step": 1115 + }, + { + "epoch": 0.05, + "learning_rate": 1.998197294373991e-05, + "loss": 0.5722, + "step": 1120 + }, + { + "epoch": 0.05, + "learning_rate": 1.9981189158685125e-05, + "loss": 0.5865, + "step": 1125 + }, + { + "epoch": 0.05, + "learning_rate": 1.998040537363034e-05, + "loss": 0.4989, + "step": 1130 + }, + { + "epoch": 0.05, + "learning_rate": 1.997962158857555e-05, + "loss": 0.5998, + "step": 1135 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978837803520764e-05, + "loss": 0.9028, + "step": 1140 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978054018465977e-05, + "loss": 0.8579, + "step": 1145 + }, + { + "epoch": 0.05, + "learning_rate": 1.997727023341119e-05, + "loss": 1.2055, + "step": 1150 + }, + { + "epoch": 0.05, + "learning_rate": 1.9976486448356405e-05, + "loss": 0.6244, + "step": 1155 + }, + { + "epoch": 0.05, + "learning_rate": 1.9975702663301616e-05, + "loss": 0.3915, + "step": 1160 + }, + { + "epoch": 0.05, + "learning_rate": 1.9974918878246833e-05, + "loss": 0.4356, + "step": 1165 + }, + { + "epoch": 0.05, + "learning_rate": 1.9974135093192044e-05, + "loss": 0.5114, + "step": 1170 + }, + { + "epoch": 0.05, + "learning_rate": 1.9973351308137257e-05, + "loss": 0.5035, + "step": 1175 + }, + { + "epoch": 0.06, + "learning_rate": 1.997256752308247e-05, + "loss": 0.6821, + "step": 1180 + }, + { + "epoch": 0.06, + "learning_rate": 1.9971783738027685e-05, + "loss": 0.5494, + "step": 1185 + }, + { + "epoch": 0.06, + "learning_rate": 1.99709999529729e-05, + "loss": 0.6005, + "step": 1190 + }, + { + "epoch": 0.06, + "learning_rate": 1.9970216167918113e-05, + "loss": 0.8884, + "step": 1195 + }, + { + "epoch": 0.06, + "learning_rate": 1.9969432382863324e-05, + "loss": 0.9246, + "step": 1200 + }, + { + "epoch": 0.06, + "learning_rate": 1.9968648597808538e-05, + "loss": 0.5223, + "step": 1205 + }, + { + "epoch": 0.06, + "learning_rate": 1.996786481275375e-05, + "loss": 0.3661, + "step": 1210 + }, + { + "epoch": 0.06, + "learning_rate": 1.9967081027698965e-05, + "loss": 0.5004, + "step": 1215 + }, + { + "epoch": 0.06, + "learning_rate": 1.996629724264418e-05, + "loss": 0.4138, + "step": 1220 + }, + { + "epoch": 0.06, + "learning_rate": 1.9965513457589393e-05, + "loss": 0.6478, + "step": 1225 + }, + { + "epoch": 0.06, + "learning_rate": 1.9964729672534607e-05, + "loss": 0.5358, + "step": 1230 + }, + { + "epoch": 0.06, + "learning_rate": 1.9963945887479818e-05, + "loss": 0.664, + "step": 1235 + }, + { + "epoch": 0.06, + "learning_rate": 1.9963162102425035e-05, + "loss": 0.5272, + "step": 1240 + }, + { + "epoch": 0.06, + "learning_rate": 1.9962378317370245e-05, + "loss": 0.9186, + "step": 1245 + }, + { + "epoch": 0.06, + "learning_rate": 1.996159453231546e-05, + "loss": 1.262, + "step": 1250 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960810747260673e-05, + "loss": 0.5889, + "step": 1255 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960026962205887e-05, + "loss": 0.4323, + "step": 1260 + }, + { + "epoch": 0.06, + "learning_rate": 1.99592431771511e-05, + "loss": 0.3947, + "step": 1265 + }, + { + "epoch": 0.06, + "learning_rate": 1.995845939209631e-05, + "loss": 0.5637, + "step": 1270 + }, + { + "epoch": 0.06, + "learning_rate": 1.9957675607041525e-05, + "loss": 0.4977, + "step": 1275 + }, + { + "epoch": 0.06, + "learning_rate": 1.995689182198674e-05, + "loss": 0.5272, + "step": 1280 + }, + { + "epoch": 0.06, + "learning_rate": 1.9956108036931953e-05, + "loss": 0.5806, + "step": 1285 + }, + { + "epoch": 0.06, + "learning_rate": 1.9955324251877167e-05, + "loss": 0.7725, + "step": 1290 + }, + { + "epoch": 0.06, + "learning_rate": 1.995454046682238e-05, + "loss": 1.0007, + "step": 1295 + }, + { + "epoch": 0.06, + "learning_rate": 1.995375668176759e-05, + "loss": 1.2275, + "step": 1300 + }, + { + "epoch": 0.06, + "learning_rate": 1.995297289671281e-05, + "loss": 0.5902, + "step": 1305 + }, + { + "epoch": 0.06, + "learning_rate": 1.995218911165802e-05, + "loss": 0.3678, + "step": 1310 + }, + { + "epoch": 0.06, + "learning_rate": 1.9951405326603233e-05, + "loss": 0.3997, + "step": 1315 + }, + { + "epoch": 0.06, + "learning_rate": 1.9950621541548447e-05, + "loss": 0.3824, + "step": 1320 + }, + { + "epoch": 0.06, + "learning_rate": 1.994983775649366e-05, + "loss": 0.45, + "step": 1325 + }, + { + "epoch": 0.06, + "learning_rate": 1.9949053971438875e-05, + "loss": 0.4312, + "step": 1330 + }, + { + "epoch": 0.06, + "learning_rate": 1.9948270186384086e-05, + "loss": 0.6568, + "step": 1335 + }, + { + "epoch": 0.06, + "learning_rate": 1.9947486401329303e-05, + "loss": 0.7326, + "step": 1340 + }, + { + "epoch": 0.06, + "learning_rate": 1.9946702616274513e-05, + "loss": 1.6595, + "step": 1345 + }, + { + "epoch": 0.06, + "learning_rate": 1.9945918831219727e-05, + "loss": 1.1587, + "step": 1350 + }, + { + "epoch": 0.06, + "learning_rate": 1.994513504616494e-05, + "loss": 0.5287, + "step": 1355 + }, + { + "epoch": 0.06, + "learning_rate": 1.9944351261110155e-05, + "loss": 0.4248, + "step": 1360 + }, + { + "epoch": 0.06, + "learning_rate": 1.994356747605537e-05, + "loss": 0.4569, + "step": 1365 + }, + { + "epoch": 0.06, + "learning_rate": 1.9942783691000583e-05, + "loss": 0.4401, + "step": 1370 + }, + { + "epoch": 0.06, + "learning_rate": 1.9941999905945793e-05, + "loss": 0.475, + "step": 1375 + }, + { + "epoch": 0.06, + "learning_rate": 1.994121612089101e-05, + "loss": 0.5074, + "step": 1380 + }, + { + "epoch": 0.06, + "learning_rate": 1.994043233583622e-05, + "loss": 0.6305, + "step": 1385 + }, + { + "epoch": 0.06, + "learning_rate": 1.9939648550781435e-05, + "loss": 0.6242, + "step": 1390 + }, + { + "epoch": 0.07, + "learning_rate": 1.993886476572665e-05, + "loss": 0.6831, + "step": 1395 + }, + { + "epoch": 0.07, + "learning_rate": 1.993808098067186e-05, + "loss": 0.9001, + "step": 1400 + }, + { + "epoch": 0.07, + "learning_rate": 1.9937297195617077e-05, + "loss": 0.5865, + "step": 1405 + }, + { + "epoch": 0.07, + "learning_rate": 1.9936513410562287e-05, + "loss": 0.332, + "step": 1410 + }, + { + "epoch": 0.07, + "learning_rate": 1.99357296255075e-05, + "loss": 0.4441, + "step": 1415 + }, + { + "epoch": 0.07, + "learning_rate": 1.9934945840452715e-05, + "loss": 0.3415, + "step": 1420 + }, + { + "epoch": 0.07, + "learning_rate": 1.993416205539793e-05, + "loss": 0.4253, + "step": 1425 + }, + { + "epoch": 0.07, + "learning_rate": 1.9933378270343143e-05, + "loss": 0.594, + "step": 1430 + }, + { + "epoch": 0.07, + "learning_rate": 1.9932594485288357e-05, + "loss": 0.7563, + "step": 1435 + }, + { + "epoch": 0.07, + "learning_rate": 1.993181070023357e-05, + "loss": 0.7389, + "step": 1440 + }, + { + "epoch": 0.07, + "learning_rate": 1.9931026915178785e-05, + "loss": 0.8635, + "step": 1445 + }, + { + "epoch": 0.07, + "learning_rate": 1.9930243130123995e-05, + "loss": 1.0338, + "step": 1450 + }, + { + "epoch": 0.07, + "learning_rate": 1.9929459345069212e-05, + "loss": 0.4737, + "step": 1455 + }, + { + "epoch": 0.07, + "learning_rate": 1.9928675560014423e-05, + "loss": 0.2898, + "step": 1460 + }, + { + "epoch": 0.07, + "learning_rate": 1.9927891774959637e-05, + "loss": 0.3928, + "step": 1465 + }, + { + "epoch": 0.07, + "learning_rate": 1.992710798990485e-05, + "loss": 0.4041, + "step": 1470 + }, + { + "epoch": 0.07, + "learning_rate": 1.992632420485006e-05, + "loss": 0.4331, + "step": 1475 + }, + { + "epoch": 0.07, + "learning_rate": 1.992554041979528e-05, + "loss": 0.5609, + "step": 1480 + }, + { + "epoch": 0.07, + "learning_rate": 1.992475663474049e-05, + "loss": 0.545, + "step": 1485 + }, + { + "epoch": 0.07, + "learning_rate": 1.9923972849685703e-05, + "loss": 0.6846, + "step": 1490 + }, + { + "epoch": 0.07, + "learning_rate": 1.9923189064630917e-05, + "loss": 0.7526, + "step": 1495 + }, + { + "epoch": 0.07, + "learning_rate": 1.992240527957613e-05, + "loss": 1.0243, + "step": 1500 + }, + { + "epoch": 0.07, + "learning_rate": 1.9921621494521345e-05, + "loss": 0.5665, + "step": 1505 + }, + { + "epoch": 0.07, + "learning_rate": 1.992083770946656e-05, + "loss": 0.3, + "step": 1510 + }, + { + "epoch": 0.07, + "learning_rate": 1.992005392441177e-05, + "loss": 0.3819, + "step": 1515 + }, + { + "epoch": 0.07, + "learning_rate": 1.9919270139356986e-05, + "loss": 0.4012, + "step": 1520 + }, + { + "epoch": 0.07, + "learning_rate": 1.9918486354302197e-05, + "loss": 0.4596, + "step": 1525 + }, + { + "epoch": 0.07, + "learning_rate": 1.991770256924741e-05, + "loss": 0.5021, + "step": 1530 + }, + { + "epoch": 0.07, + "learning_rate": 1.9916918784192625e-05, + "loss": 0.4451, + "step": 1535 + }, + { + "epoch": 0.07, + "learning_rate": 1.991613499913784e-05, + "loss": 0.704, + "step": 1540 + }, + { + "epoch": 0.07, + "learning_rate": 1.9915351214083053e-05, + "loss": 0.6392, + "step": 1545 + }, + { + "epoch": 0.07, + "learning_rate": 1.9914567429028263e-05, + "loss": 1.1391, + "step": 1550 + }, + { + "epoch": 0.07, + "learning_rate": 1.991378364397348e-05, + "loss": 0.5388, + "step": 1555 + }, + { + "epoch": 0.07, + "learning_rate": 1.991299985891869e-05, + "loss": 0.3753, + "step": 1560 + }, + { + "epoch": 0.07, + "learning_rate": 1.9912216073863905e-05, + "loss": 0.3058, + "step": 1565 + }, + { + "epoch": 0.07, + "learning_rate": 1.991143228880912e-05, + "loss": 0.4206, + "step": 1570 + }, + { + "epoch": 0.07, + "learning_rate": 1.9910648503754333e-05, + "loss": 0.3922, + "step": 1575 + }, + { + "epoch": 0.07, + "learning_rate": 1.9909864718699547e-05, + "loss": 0.4388, + "step": 1580 + }, + { + "epoch": 0.07, + "learning_rate": 1.990908093364476e-05, + "loss": 0.485, + "step": 1585 + }, + { + "epoch": 0.07, + "learning_rate": 1.990829714858997e-05, + "loss": 0.7441, + "step": 1590 + }, + { + "epoch": 0.07, + "learning_rate": 1.9907513363535185e-05, + "loss": 0.7177, + "step": 1595 + }, + { + "epoch": 0.07, + "learning_rate": 1.99067295784804e-05, + "loss": 1.129, + "step": 1600 + }, + { + "epoch": 0.07, + "learning_rate": 1.9905945793425613e-05, + "loss": 0.4943, + "step": 1605 + }, + { + "epoch": 0.08, + "learning_rate": 1.9905162008370827e-05, + "loss": 0.299, + "step": 1610 + }, + { + "epoch": 0.08, + "learning_rate": 1.9904378223316037e-05, + "loss": 0.4898, + "step": 1615 + }, + { + "epoch": 0.08, + "learning_rate": 1.9903594438261254e-05, + "loss": 0.3973, + "step": 1620 + }, + { + "epoch": 0.08, + "learning_rate": 1.9902810653206465e-05, + "loss": 0.4336, + "step": 1625 + }, + { + "epoch": 0.08, + "learning_rate": 1.990202686815168e-05, + "loss": 0.4543, + "step": 1630 + }, + { + "epoch": 0.08, + "learning_rate": 1.9901243083096893e-05, + "loss": 0.4828, + "step": 1635 + }, + { + "epoch": 0.08, + "learning_rate": 1.9900459298042107e-05, + "loss": 0.7448, + "step": 1640 + }, + { + "epoch": 0.08, + "learning_rate": 1.989967551298732e-05, + "loss": 0.6852, + "step": 1645 + }, + { + "epoch": 0.08, + "learning_rate": 1.9898891727932534e-05, + "loss": 0.9936, + "step": 1650 + }, + { + "epoch": 0.08, + "learning_rate": 1.989810794287775e-05, + "loss": 0.4738, + "step": 1655 + }, + { + "epoch": 0.08, + "learning_rate": 1.989732415782296e-05, + "loss": 0.2747, + "step": 1660 + }, + { + "epoch": 0.08, + "learning_rate": 1.9896540372768173e-05, + "loss": 0.2839, + "step": 1665 + }, + { + "epoch": 0.08, + "learning_rate": 1.9895756587713387e-05, + "loss": 0.3596, + "step": 1670 + }, + { + "epoch": 0.08, + "learning_rate": 1.98949728026586e-05, + "loss": 0.4797, + "step": 1675 + }, + { + "epoch": 0.08, + "learning_rate": 1.9894189017603815e-05, + "loss": 0.4667, + "step": 1680 + }, + { + "epoch": 0.08, + "learning_rate": 1.989340523254903e-05, + "loss": 0.5085, + "step": 1685 + }, + { + "epoch": 0.08, + "learning_rate": 1.989262144749424e-05, + "loss": 0.6464, + "step": 1690 + }, + { + "epoch": 0.08, + "learning_rate": 1.9891837662439456e-05, + "loss": 0.7383, + "step": 1695 + }, + { + "epoch": 0.08, + "learning_rate": 1.9891053877384667e-05, + "loss": 1.1763, + "step": 1700 + }, + { + "epoch": 0.08, + "learning_rate": 1.989027009232988e-05, + "loss": 0.4347, + "step": 1705 + }, + { + "epoch": 0.08, + "learning_rate": 1.9889486307275095e-05, + "loss": 0.3092, + "step": 1710 + }, + { + "epoch": 0.08, + "learning_rate": 1.988870252222031e-05, + "loss": 0.3977, + "step": 1715 + }, + { + "epoch": 0.08, + "learning_rate": 1.9887918737165522e-05, + "loss": 0.3769, + "step": 1720 + }, + { + "epoch": 0.08, + "learning_rate": 1.9887134952110733e-05, + "loss": 0.4335, + "step": 1725 + }, + { + "epoch": 0.08, + "learning_rate": 1.9886351167055947e-05, + "loss": 0.5907, + "step": 1730 + }, + { + "epoch": 0.08, + "learning_rate": 1.988556738200116e-05, + "loss": 0.5505, + "step": 1735 + }, + { + "epoch": 0.08, + "learning_rate": 1.9884783596946375e-05, + "loss": 0.515, + "step": 1740 + }, + { + "epoch": 0.08, + "learning_rate": 1.988399981189159e-05, + "loss": 0.7169, + "step": 1745 + }, + { + "epoch": 0.08, + "learning_rate": 1.9883216026836802e-05, + "loss": 1.006, + "step": 1750 + }, + { + "epoch": 0.08, + "learning_rate": 1.9882432241782016e-05, + "loss": 0.5365, + "step": 1755 + }, + { + "epoch": 0.08, + "learning_rate": 1.988164845672723e-05, + "loss": 0.2144, + "step": 1760 + }, + { + "epoch": 0.08, + "learning_rate": 1.988086467167244e-05, + "loss": 0.4321, + "step": 1765 + }, + { + "epoch": 0.08, + "learning_rate": 1.9880080886617658e-05, + "loss": 0.4422, + "step": 1770 + }, + { + "epoch": 0.08, + "learning_rate": 1.987929710156287e-05, + "loss": 0.3549, + "step": 1775 + }, + { + "epoch": 0.08, + "learning_rate": 1.9878513316508082e-05, + "loss": 0.418, + "step": 1780 + }, + { + "epoch": 0.08, + "learning_rate": 1.9877729531453296e-05, + "loss": 0.5311, + "step": 1785 + }, + { + "epoch": 0.08, + "learning_rate": 1.9876945746398507e-05, + "loss": 0.5819, + "step": 1790 + }, + { + "epoch": 0.08, + "learning_rate": 1.9876161961343724e-05, + "loss": 0.7497, + "step": 1795 + }, + { + "epoch": 0.08, + "learning_rate": 1.9875378176288935e-05, + "loss": 1.267, + "step": 1800 + }, + { + "epoch": 0.08, + "learning_rate": 1.987459439123415e-05, + "loss": 0.5148, + "step": 1805 + }, + { + "epoch": 0.08, + "learning_rate": 1.9873810606179363e-05, + "loss": 0.3745, + "step": 1810 + }, + { + "epoch": 0.08, + "learning_rate": 1.9873026821124576e-05, + "loss": 0.3499, + "step": 1815 + }, + { + "epoch": 0.08, + "learning_rate": 1.987224303606979e-05, + "loss": 0.3857, + "step": 1820 + }, + { + "epoch": 0.09, + "learning_rate": 1.9871459251015004e-05, + "loss": 0.3592, + "step": 1825 + }, + { + "epoch": 0.09, + "learning_rate": 1.9870675465960215e-05, + "loss": 0.4503, + "step": 1830 + }, + { + "epoch": 0.09, + "learning_rate": 1.9869891680905432e-05, + "loss": 0.5993, + "step": 1835 + }, + { + "epoch": 0.09, + "learning_rate": 1.9869107895850643e-05, + "loss": 0.7032, + "step": 1840 + }, + { + "epoch": 0.09, + "learning_rate": 1.9868324110795856e-05, + "loss": 0.7298, + "step": 1845 + }, + { + "epoch": 0.09, + "learning_rate": 1.986754032574107e-05, + "loss": 1.0798, + "step": 1850 + }, + { + "epoch": 0.09, + "learning_rate": 1.9866756540686284e-05, + "loss": 0.5357, + "step": 1855 + }, + { + "epoch": 0.09, + "learning_rate": 1.9865972755631498e-05, + "loss": 0.3394, + "step": 1860 + }, + { + "epoch": 0.09, + "learning_rate": 1.986518897057671e-05, + "loss": 0.2815, + "step": 1865 + }, + { + "epoch": 0.09, + "learning_rate": 1.9864405185521926e-05, + "loss": 0.3833, + "step": 1870 + }, + { + "epoch": 0.09, + "learning_rate": 1.9863621400467137e-05, + "loss": 0.3657, + "step": 1875 + }, + { + "epoch": 0.09, + "learning_rate": 1.986283761541235e-05, + "loss": 0.4627, + "step": 1880 + }, + { + "epoch": 0.09, + "learning_rate": 1.9862053830357564e-05, + "loss": 0.5369, + "step": 1885 + }, + { + "epoch": 0.09, + "learning_rate": 1.9861270045302778e-05, + "loss": 0.6602, + "step": 1890 + }, + { + "epoch": 0.09, + "learning_rate": 1.9860486260247992e-05, + "loss": 0.5755, + "step": 1895 + }, + { + "epoch": 0.09, + "learning_rate": 1.9859702475193206e-05, + "loss": 1.0243, + "step": 1900 + }, + { + "epoch": 0.09, + "learning_rate": 1.9858918690138417e-05, + "loss": 0.6036, + "step": 1905 + }, + { + "epoch": 0.09, + "learning_rate": 1.9858134905083634e-05, + "loss": 0.2902, + "step": 1910 + }, + { + "epoch": 0.09, + "learning_rate": 1.9857351120028844e-05, + "loss": 0.3318, + "step": 1915 + }, + { + "epoch": 0.09, + "learning_rate": 1.9856567334974058e-05, + "loss": 0.3193, + "step": 1920 + }, + { + "epoch": 0.09, + "learning_rate": 1.9855783549919272e-05, + "loss": 0.4973, + "step": 1925 + }, + { + "epoch": 0.09, + "learning_rate": 1.9854999764864483e-05, + "loss": 0.5275, + "step": 1930 + }, + { + "epoch": 0.09, + "learning_rate": 1.98542159798097e-05, + "loss": 0.4504, + "step": 1935 + }, + { + "epoch": 0.09, + "learning_rate": 1.985343219475491e-05, + "loss": 0.528, + "step": 1940 + }, + { + "epoch": 0.09, + "learning_rate": 1.9852648409700124e-05, + "loss": 0.8864, + "step": 1945 + }, + { + "epoch": 0.09, + "learning_rate": 1.985186462464534e-05, + "loss": 0.9087, + "step": 1950 + }, + { + "epoch": 0.09, + "learning_rate": 1.9851080839590552e-05, + "loss": 0.5253, + "step": 1955 + }, + { + "epoch": 0.09, + "learning_rate": 1.9850297054535766e-05, + "loss": 0.3372, + "step": 1960 + }, + { + "epoch": 0.09, + "learning_rate": 1.984951326948098e-05, + "loss": 0.3391, + "step": 1965 + }, + { + "epoch": 0.09, + "learning_rate": 1.9848729484426194e-05, + "loss": 0.3418, + "step": 1970 + }, + { + "epoch": 0.09, + "learning_rate": 1.9847945699371408e-05, + "loss": 0.3695, + "step": 1975 + }, + { + "epoch": 0.09, + "learning_rate": 1.984716191431662e-05, + "loss": 0.4408, + "step": 1980 + }, + { + "epoch": 0.09, + "learning_rate": 1.9846378129261832e-05, + "loss": 0.4869, + "step": 1985 + }, + { + "epoch": 0.09, + "learning_rate": 1.9845594344207046e-05, + "loss": 0.4849, + "step": 1990 + }, + { + "epoch": 0.09, + "learning_rate": 1.984481055915226e-05, + "loss": 0.6679, + "step": 1995 + }, + { + "epoch": 0.09, + "learning_rate": 1.9844026774097474e-05, + "loss": 0.9504, + "step": 2000 + }, + { + "epoch": 0.09, + "learning_rate": 1.9843242989042685e-05, + "loss": 0.4752, + "step": 2005 + }, + { + "epoch": 0.09, + "learning_rate": 1.9842459203987902e-05, + "loss": 0.2645, + "step": 2010 + }, + { + "epoch": 0.09, + "learning_rate": 1.9841675418933112e-05, + "loss": 0.2796, + "step": 2015 + }, + { + "epoch": 0.09, + "learning_rate": 1.9840891633878326e-05, + "loss": 0.4134, + "step": 2020 + }, + { + "epoch": 0.09, + "learning_rate": 1.984010784882354e-05, + "loss": 0.4873, + "step": 2025 + }, + { + "epoch": 0.09, + "learning_rate": 1.9839324063768754e-05, + "loss": 0.5061, + "step": 2030 + }, + { + "epoch": 0.09, + "learning_rate": 1.9838540278713968e-05, + "loss": 0.5597, + "step": 2035 + }, + { + "epoch": 0.1, + "learning_rate": 1.9837756493659182e-05, + "loss": 0.5494, + "step": 2040 + }, + { + "epoch": 0.1, + "learning_rate": 1.9836972708604392e-05, + "loss": 0.7516, + "step": 2045 + }, + { + "epoch": 0.1, + "learning_rate": 1.9836188923549606e-05, + "loss": 1.0119, + "step": 2050 + }, + { + "epoch": 0.1, + "learning_rate": 1.983540513849482e-05, + "loss": 0.4939, + "step": 2055 + }, + { + "epoch": 0.1, + "learning_rate": 1.9834621353440034e-05, + "loss": 0.3384, + "step": 2060 + }, + { + "epoch": 0.1, + "learning_rate": 1.9833837568385248e-05, + "loss": 0.292, + "step": 2065 + }, + { + "epoch": 0.1, + "learning_rate": 1.9833053783330462e-05, + "loss": 0.3244, + "step": 2070 + }, + { + "epoch": 0.1, + "learning_rate": 1.9832269998275676e-05, + "loss": 0.3806, + "step": 2075 + }, + { + "epoch": 0.1, + "learning_rate": 1.9831486213220886e-05, + "loss": 0.4527, + "step": 2080 + }, + { + "epoch": 0.1, + "learning_rate": 1.9830702428166104e-05, + "loss": 0.5149, + "step": 2085 + }, + { + "epoch": 0.1, + "learning_rate": 1.9829918643111314e-05, + "loss": 0.7397, + "step": 2090 + }, + { + "epoch": 0.1, + "learning_rate": 1.9829134858056528e-05, + "loss": 0.7535, + "step": 2095 + }, + { + "epoch": 0.1, + "learning_rate": 1.9828351073001742e-05, + "loss": 0.9471, + "step": 2100 + }, + { + "epoch": 0.1, + "learning_rate": 1.9827567287946956e-05, + "loss": 0.4706, + "step": 2105 + }, + { + "epoch": 0.1, + "learning_rate": 1.982678350289217e-05, + "loss": 0.3369, + "step": 2110 + }, + { + "epoch": 0.1, + "learning_rate": 1.982599971783738e-05, + "loss": 0.3418, + "step": 2115 + }, + { + "epoch": 0.1, + "learning_rate": 1.9825215932782594e-05, + "loss": 0.3639, + "step": 2120 + }, + { + "epoch": 0.1, + "learning_rate": 1.9824432147727808e-05, + "loss": 0.4024, + "step": 2125 + }, + { + "epoch": 0.1, + "learning_rate": 1.9823648362673022e-05, + "loss": 0.4048, + "step": 2130 + }, + { + "epoch": 0.1, + "learning_rate": 1.9822864577618236e-05, + "loss": 0.6293, + "step": 2135 + }, + { + "epoch": 0.1, + "learning_rate": 1.982208079256345e-05, + "loss": 0.6106, + "step": 2140 + }, + { + "epoch": 0.1, + "learning_rate": 1.982129700750866e-05, + "loss": 0.6771, + "step": 2145 + }, + { + "epoch": 0.1, + "learning_rate": 1.9820513222453878e-05, + "loss": 1.2044, + "step": 2150 + }, + { + "epoch": 0.1, + "learning_rate": 1.9819729437399088e-05, + "loss": 0.5152, + "step": 2155 + }, + { + "epoch": 0.1, + "learning_rate": 1.9818945652344302e-05, + "loss": 0.2559, + "step": 2160 + }, + { + "epoch": 0.1, + "learning_rate": 1.9818161867289516e-05, + "loss": 0.2496, + "step": 2165 + }, + { + "epoch": 0.1, + "learning_rate": 1.981737808223473e-05, + "loss": 0.2577, + "step": 2170 + }, + { + "epoch": 0.1, + "learning_rate": 1.9816594297179944e-05, + "loss": 0.5568, + "step": 2175 + }, + { + "epoch": 0.1, + "learning_rate": 1.9815810512125154e-05, + "loss": 0.4338, + "step": 2180 + }, + { + "epoch": 0.1, + "learning_rate": 1.981502672707037e-05, + "loss": 0.6521, + "step": 2185 + }, + { + "epoch": 0.1, + "learning_rate": 1.9814242942015582e-05, + "loss": 0.5643, + "step": 2190 + }, + { + "epoch": 0.1, + "learning_rate": 1.9813459156960796e-05, + "loss": 0.4993, + "step": 2195 + }, + { + "epoch": 0.1, + "learning_rate": 1.981267537190601e-05, + "loss": 1.2342, + "step": 2200 + }, + { + "epoch": 0.1, + "learning_rate": 1.9811891586851224e-05, + "loss": 0.4659, + "step": 2205 + }, + { + "epoch": 0.1, + "learning_rate": 1.9811107801796438e-05, + "loss": 0.2984, + "step": 2210 + }, + { + "epoch": 0.1, + "learning_rate": 1.981032401674165e-05, + "loss": 0.3362, + "step": 2215 + }, + { + "epoch": 0.1, + "learning_rate": 1.9809540231686862e-05, + "loss": 0.2982, + "step": 2220 + }, + { + "epoch": 0.1, + "learning_rate": 1.980875644663208e-05, + "loss": 0.3995, + "step": 2225 + }, + { + "epoch": 0.1, + "learning_rate": 1.980797266157729e-05, + "loss": 0.4959, + "step": 2230 + }, + { + "epoch": 0.1, + "learning_rate": 1.9807188876522504e-05, + "loss": 0.5604, + "step": 2235 + }, + { + "epoch": 0.1, + "learning_rate": 1.9806405091467718e-05, + "loss": 0.6278, + "step": 2240 + }, + { + "epoch": 0.1, + "learning_rate": 1.9805621306412928e-05, + "loss": 0.751, + "step": 2245 + }, + { + "epoch": 0.1, + "learning_rate": 1.9804837521358146e-05, + "loss": 1.2158, + "step": 2250 + }, + { + "epoch": 0.11, + "learning_rate": 1.9804053736303356e-05, + "loss": 0.441, + "step": 2255 + }, + { + "epoch": 0.11, + "learning_rate": 1.980326995124857e-05, + "loss": 0.2743, + "step": 2260 + }, + { + "epoch": 0.11, + "learning_rate": 1.9802486166193784e-05, + "loss": 0.4457, + "step": 2265 + }, + { + "epoch": 0.11, + "learning_rate": 1.9801702381138998e-05, + "loss": 0.4269, + "step": 2270 + }, + { + "epoch": 0.11, + "learning_rate": 1.9800918596084212e-05, + "loss": 0.3709, + "step": 2275 + }, + { + "epoch": 0.11, + "learning_rate": 1.9800134811029426e-05, + "loss": 0.4595, + "step": 2280 + }, + { + "epoch": 0.11, + "learning_rate": 1.979935102597464e-05, + "loss": 0.6209, + "step": 2285 + }, + { + "epoch": 0.11, + "learning_rate": 1.9798567240919853e-05, + "loss": 0.5724, + "step": 2290 + }, + { + "epoch": 0.11, + "learning_rate": 1.9797783455865064e-05, + "loss": 0.7098, + "step": 2295 + }, + { + "epoch": 0.11, + "learning_rate": 1.979699967081028e-05, + "loss": 1.0224, + "step": 2300 + }, + { + "epoch": 0.11, + "learning_rate": 1.9796215885755492e-05, + "loss": 0.4748, + "step": 2305 + }, + { + "epoch": 0.11, + "learning_rate": 1.9795432100700706e-05, + "loss": 0.188, + "step": 2310 + }, + { + "epoch": 0.11, + "learning_rate": 1.979464831564592e-05, + "loss": 0.2832, + "step": 2315 + }, + { + "epoch": 0.11, + "learning_rate": 1.979386453059113e-05, + "loss": 0.3772, + "step": 2320 + }, + { + "epoch": 0.11, + "learning_rate": 1.9793080745536347e-05, + "loss": 0.3791, + "step": 2325 + }, + { + "epoch": 0.11, + "learning_rate": 1.9792296960481558e-05, + "loss": 0.4206, + "step": 2330 + }, + { + "epoch": 0.11, + "learning_rate": 1.9791513175426772e-05, + "loss": 0.591, + "step": 2335 + }, + { + "epoch": 0.11, + "learning_rate": 1.9790729390371986e-05, + "loss": 0.5566, + "step": 2340 + }, + { + "epoch": 0.11, + "learning_rate": 1.97899456053172e-05, + "loss": 0.7117, + "step": 2345 + }, + { + "epoch": 0.11, + "learning_rate": 1.9789161820262414e-05, + "loss": 0.9647, + "step": 2350 + }, + { + "epoch": 0.11, + "learning_rate": 1.9788378035207627e-05, + "loss": 0.4322, + "step": 2355 + }, + { + "epoch": 0.11, + "learning_rate": 1.9787594250152838e-05, + "loss": 0.3008, + "step": 2360 + }, + { + "epoch": 0.11, + "learning_rate": 1.9786810465098055e-05, + "loss": 0.3233, + "step": 2365 + }, + { + "epoch": 0.11, + "learning_rate": 1.9786026680043266e-05, + "loss": 0.4009, + "step": 2370 + }, + { + "epoch": 0.11, + "learning_rate": 1.978524289498848e-05, + "loss": 0.378, + "step": 2375 + }, + { + "epoch": 0.11, + "learning_rate": 1.9784459109933694e-05, + "loss": 0.4942, + "step": 2380 + }, + { + "epoch": 0.11, + "learning_rate": 1.9783675324878907e-05, + "loss": 0.3688, + "step": 2385 + }, + { + "epoch": 0.11, + "learning_rate": 1.978289153982412e-05, + "loss": 0.6061, + "step": 2390 + }, + { + "epoch": 0.11, + "learning_rate": 1.9782107754769332e-05, + "loss": 0.5694, + "step": 2395 + }, + { + "epoch": 0.11, + "learning_rate": 1.978132396971455e-05, + "loss": 0.9538, + "step": 2400 + }, + { + "epoch": 0.11, + "learning_rate": 1.978054018465976e-05, + "loss": 0.4312, + "step": 2405 + }, + { + "epoch": 0.11, + "learning_rate": 1.9779756399604974e-05, + "loss": 0.2543, + "step": 2410 + }, + { + "epoch": 0.11, + "learning_rate": 1.9778972614550188e-05, + "loss": 0.231, + "step": 2415 + }, + { + "epoch": 0.11, + "learning_rate": 1.97781888294954e-05, + "loss": 0.2848, + "step": 2420 + }, + { + "epoch": 0.11, + "learning_rate": 1.9777405044440615e-05, + "loss": 0.3575, + "step": 2425 + }, + { + "epoch": 0.11, + "learning_rate": 1.977662125938583e-05, + "loss": 0.4481, + "step": 2430 + }, + { + "epoch": 0.11, + "learning_rate": 1.977583747433104e-05, + "loss": 0.4305, + "step": 2435 + }, + { + "epoch": 0.11, + "learning_rate": 1.9775053689276254e-05, + "loss": 0.7923, + "step": 2440 + }, + { + "epoch": 0.11, + "learning_rate": 1.9774269904221468e-05, + "loss": 0.5847, + "step": 2445 + }, + { + "epoch": 0.11, + "learning_rate": 1.977348611916668e-05, + "loss": 1.0685, + "step": 2450 + }, + { + "epoch": 0.11, + "learning_rate": 1.9772702334111895e-05, + "loss": 0.5119, + "step": 2455 + }, + { + "epoch": 0.11, + "learning_rate": 1.9771918549057106e-05, + "loss": 0.2529, + "step": 2460 + }, + { + "epoch": 0.12, + "learning_rate": 1.9771134764002323e-05, + "loss": 0.2534, + "step": 2465 + }, + { + "epoch": 0.12, + "learning_rate": 1.9770350978947534e-05, + "loss": 0.4104, + "step": 2470 + }, + { + "epoch": 0.12, + "learning_rate": 1.9769567193892748e-05, + "loss": 0.4233, + "step": 2475 + }, + { + "epoch": 0.12, + "learning_rate": 1.976878340883796e-05, + "loss": 0.3437, + "step": 2480 + }, + { + "epoch": 0.12, + "learning_rate": 1.9767999623783175e-05, + "loss": 0.5363, + "step": 2485 + }, + { + "epoch": 0.12, + "learning_rate": 1.976721583872839e-05, + "loss": 0.6265, + "step": 2490 + }, + { + "epoch": 0.12, + "learning_rate": 1.9766432053673603e-05, + "loss": 0.6902, + "step": 2495 + }, + { + "epoch": 0.12, + "learning_rate": 1.9765648268618817e-05, + "loss": 1.1218, + "step": 2500 + }, + { + "epoch": 0.12, + "learning_rate": 1.9764864483564028e-05, + "loss": 0.3805, + "step": 2505 + }, + { + "epoch": 0.12, + "learning_rate": 1.976408069850924e-05, + "loss": 0.2308, + "step": 2510 + }, + { + "epoch": 0.12, + "learning_rate": 1.9763296913454455e-05, + "loss": 0.2854, + "step": 2515 + }, + { + "epoch": 0.12, + "learning_rate": 1.976251312839967e-05, + "loss": 0.3471, + "step": 2520 + }, + { + "epoch": 0.12, + "learning_rate": 1.9761729343344883e-05, + "loss": 0.418, + "step": 2525 + }, + { + "epoch": 0.12, + "learning_rate": 1.9760945558290097e-05, + "loss": 0.3995, + "step": 2530 + }, + { + "epoch": 0.12, + "learning_rate": 1.9760161773235308e-05, + "loss": 0.5414, + "step": 2535 + }, + { + "epoch": 0.12, + "learning_rate": 1.9759377988180525e-05, + "loss": 0.4674, + "step": 2540 + }, + { + "epoch": 0.12, + "learning_rate": 1.9758594203125736e-05, + "loss": 0.548, + "step": 2545 + }, + { + "epoch": 0.12, + "learning_rate": 1.975781041807095e-05, + "loss": 1.1554, + "step": 2550 + }, + { + "epoch": 0.12, + "learning_rate": 1.9757026633016163e-05, + "loss": 0.4927, + "step": 2555 + }, + { + "epoch": 0.12, + "learning_rate": 1.9756242847961377e-05, + "loss": 0.2384, + "step": 2560 + }, + { + "epoch": 0.12, + "learning_rate": 1.975545906290659e-05, + "loss": 0.2467, + "step": 2565 + }, + { + "epoch": 0.12, + "learning_rate": 1.97546752778518e-05, + "loss": 0.2739, + "step": 2570 + }, + { + "epoch": 0.12, + "learning_rate": 1.9753891492797016e-05, + "loss": 0.4415, + "step": 2575 + }, + { + "epoch": 0.12, + "learning_rate": 1.975310770774223e-05, + "loss": 0.3919, + "step": 2580 + }, + { + "epoch": 0.12, + "learning_rate": 1.9752323922687443e-05, + "loss": 0.4875, + "step": 2585 + }, + { + "epoch": 0.12, + "learning_rate": 1.9751540137632657e-05, + "loss": 0.5478, + "step": 2590 + }, + { + "epoch": 0.12, + "learning_rate": 1.975075635257787e-05, + "loss": 0.6074, + "step": 2595 + }, + { + "epoch": 0.12, + "learning_rate": 1.9749972567523085e-05, + "loss": 0.7679, + "step": 2600 + }, + { + "epoch": 0.12, + "learning_rate": 1.97491887824683e-05, + "loss": 0.5413, + "step": 2605 + }, + { + "epoch": 0.12, + "learning_rate": 1.974840499741351e-05, + "loss": 0.2023, + "step": 2610 + }, + { + "epoch": 0.12, + "learning_rate": 1.9747621212358727e-05, + "loss": 0.2183, + "step": 2615 + }, + { + "epoch": 0.12, + "learning_rate": 1.9746837427303937e-05, + "loss": 0.327, + "step": 2620 + }, + { + "epoch": 0.12, + "learning_rate": 1.974605364224915e-05, + "loss": 0.3329, + "step": 2625 + }, + { + "epoch": 0.12, + "learning_rate": 1.9745269857194365e-05, + "loss": 0.3636, + "step": 2630 + }, + { + "epoch": 0.12, + "learning_rate": 1.9744486072139576e-05, + "loss": 0.4454, + "step": 2635 + }, + { + "epoch": 0.12, + "learning_rate": 1.9743702287084793e-05, + "loss": 0.5881, + "step": 2640 + }, + { + "epoch": 0.12, + "learning_rate": 1.9742918502030003e-05, + "loss": 0.6546, + "step": 2645 + }, + { + "epoch": 0.12, + "learning_rate": 1.9742134716975217e-05, + "loss": 0.9867, + "step": 2650 + }, + { + "epoch": 0.12, + "learning_rate": 1.974135093192043e-05, + "loss": 0.4603, + "step": 2655 + }, + { + "epoch": 0.12, + "learning_rate": 1.9740567146865645e-05, + "loss": 0.2764, + "step": 2660 + }, + { + "epoch": 0.12, + "learning_rate": 1.973978336181086e-05, + "loss": 0.2423, + "step": 2665 + }, + { + "epoch": 0.12, + "learning_rate": 1.9738999576756073e-05, + "loss": 0.2468, + "step": 2670 + }, + { + "epoch": 0.12, + "learning_rate": 1.9738215791701284e-05, + "loss": 0.365, + "step": 2675 + }, + { + "epoch": 0.13, + "learning_rate": 1.97374320066465e-05, + "loss": 0.4589, + "step": 2680 + }, + { + "epoch": 0.13, + "learning_rate": 1.973664822159171e-05, + "loss": 0.4907, + "step": 2685 + }, + { + "epoch": 0.13, + "learning_rate": 1.9735864436536925e-05, + "loss": 0.5466, + "step": 2690 + }, + { + "epoch": 0.13, + "learning_rate": 1.973508065148214e-05, + "loss": 0.4969, + "step": 2695 + }, + { + "epoch": 0.13, + "learning_rate": 1.9734296866427353e-05, + "loss": 1.0733, + "step": 2700 + }, + { + "epoch": 0.13, + "learning_rate": 1.9733513081372567e-05, + "loss": 0.4684, + "step": 2705 + }, + { + "epoch": 0.13, + "learning_rate": 1.9732729296317777e-05, + "loss": 0.2367, + "step": 2710 + }, + { + "epoch": 0.13, + "learning_rate": 1.9731945511262995e-05, + "loss": 0.2683, + "step": 2715 + }, + { + "epoch": 0.13, + "learning_rate": 1.9731161726208205e-05, + "loss": 0.2503, + "step": 2720 + }, + { + "epoch": 0.13, + "learning_rate": 1.973037794115342e-05, + "loss": 0.3385, + "step": 2725 + }, + { + "epoch": 0.13, + "learning_rate": 1.9729594156098633e-05, + "loss": 0.3533, + "step": 2730 + }, + { + "epoch": 0.13, + "learning_rate": 1.9728810371043847e-05, + "loss": 0.434, + "step": 2735 + }, + { + "epoch": 0.13, + "learning_rate": 1.972802658598906e-05, + "loss": 0.4045, + "step": 2740 + }, + { + "epoch": 0.13, + "learning_rate": 1.9727242800934275e-05, + "loss": 0.5531, + "step": 2745 + }, + { + "epoch": 0.13, + "learning_rate": 1.9726459015879485e-05, + "loss": 0.8087, + "step": 2750 + }, + { + "epoch": 0.13, + "learning_rate": 1.9725675230824703e-05, + "loss": 0.4245, + "step": 2755 + }, + { + "epoch": 0.13, + "learning_rate": 1.9724891445769913e-05, + "loss": 0.2777, + "step": 2760 + }, + { + "epoch": 0.13, + "learning_rate": 1.9724107660715127e-05, + "loss": 0.3036, + "step": 2765 + }, + { + "epoch": 0.13, + "learning_rate": 1.972332387566034e-05, + "loss": 0.4066, + "step": 2770 + }, + { + "epoch": 0.13, + "learning_rate": 1.972254009060555e-05, + "loss": 0.3239, + "step": 2775 + }, + { + "epoch": 0.13, + "learning_rate": 1.972175630555077e-05, + "loss": 0.3395, + "step": 2780 + }, + { + "epoch": 0.13, + "learning_rate": 1.972097252049598e-05, + "loss": 0.4637, + "step": 2785 + }, + { + "epoch": 0.13, + "learning_rate": 1.9720188735441193e-05, + "loss": 0.486, + "step": 2790 + }, + { + "epoch": 0.13, + "learning_rate": 1.9719404950386407e-05, + "loss": 0.6314, + "step": 2795 + }, + { + "epoch": 0.13, + "learning_rate": 1.971862116533162e-05, + "loss": 0.8526, + "step": 2800 + }, + { + "epoch": 0.13, + "learning_rate": 1.9717837380276835e-05, + "loss": 0.4288, + "step": 2805 + }, + { + "epoch": 0.13, + "learning_rate": 1.971705359522205e-05, + "loss": 0.227, + "step": 2810 + }, + { + "epoch": 0.13, + "learning_rate": 1.9716269810167263e-05, + "loss": 0.2939, + "step": 2815 + }, + { + "epoch": 0.13, + "learning_rate": 1.9715486025112477e-05, + "loss": 0.2735, + "step": 2820 + }, + { + "epoch": 0.13, + "learning_rate": 1.9714702240057687e-05, + "loss": 0.3667, + "step": 2825 + }, + { + "epoch": 0.13, + "learning_rate": 1.97139184550029e-05, + "loss": 0.385, + "step": 2830 + }, + { + "epoch": 0.13, + "learning_rate": 1.9713134669948115e-05, + "loss": 0.3763, + "step": 2835 + }, + { + "epoch": 0.13, + "learning_rate": 1.971235088489333e-05, + "loss": 0.4141, + "step": 2840 + }, + { + "epoch": 0.13, + "learning_rate": 1.9711567099838543e-05, + "loss": 0.6103, + "step": 2845 + }, + { + "epoch": 0.13, + "learning_rate": 1.9710783314783753e-05, + "loss": 0.7695, + "step": 2850 + }, + { + "epoch": 0.13, + "learning_rate": 1.970999952972897e-05, + "loss": 0.4827, + "step": 2855 + }, + { + "epoch": 0.13, + "learning_rate": 1.970921574467418e-05, + "loss": 0.2578, + "step": 2860 + }, + { + "epoch": 0.13, + "learning_rate": 1.9708431959619395e-05, + "loss": 0.2754, + "step": 2865 + }, + { + "epoch": 0.13, + "learning_rate": 1.970764817456461e-05, + "loss": 0.2874, + "step": 2870 + }, + { + "epoch": 0.13, + "learning_rate": 1.9706864389509823e-05, + "loss": 0.2498, + "step": 2875 + }, + { + "epoch": 0.13, + "learning_rate": 1.9706080604455037e-05, + "loss": 0.5192, + "step": 2880 + }, + { + "epoch": 0.13, + "learning_rate": 1.970529681940025e-05, + "loss": 0.3749, + "step": 2885 + }, + { + "epoch": 0.13, + "learning_rate": 1.970451303434546e-05, + "loss": 0.5461, + "step": 2890 + }, + { + "epoch": 0.14, + "learning_rate": 1.9703729249290675e-05, + "loss": 0.6363, + "step": 2895 + }, + { + "epoch": 0.14, + "learning_rate": 1.970294546423589e-05, + "loss": 1.0204, + "step": 2900 + }, + { + "epoch": 0.14, + "learning_rate": 1.9702161679181103e-05, + "loss": 0.4849, + "step": 2905 + }, + { + "epoch": 0.14, + "learning_rate": 1.9701377894126317e-05, + "loss": 0.1746, + "step": 2910 + }, + { + "epoch": 0.14, + "learning_rate": 1.970059410907153e-05, + "loss": 0.259, + "step": 2915 + }, + { + "epoch": 0.14, + "learning_rate": 1.9699810324016745e-05, + "loss": 0.3586, + "step": 2920 + }, + { + "epoch": 0.14, + "learning_rate": 1.9699026538961955e-05, + "loss": 0.3875, + "step": 2925 + }, + { + "epoch": 0.14, + "learning_rate": 1.9698242753907172e-05, + "loss": 0.3089, + "step": 2930 + }, + { + "epoch": 0.14, + "learning_rate": 1.9697458968852383e-05, + "loss": 0.4891, + "step": 2935 + }, + { + "epoch": 0.14, + "learning_rate": 1.9696675183797597e-05, + "loss": 0.4279, + "step": 2940 + }, + { + "epoch": 0.14, + "learning_rate": 1.969589139874281e-05, + "loss": 0.6177, + "step": 2945 + }, + { + "epoch": 0.14, + "learning_rate": 1.9695107613688025e-05, + "loss": 0.8893, + "step": 2950 + }, + { + "epoch": 0.14, + "learning_rate": 1.969432382863324e-05, + "loss": 0.3806, + "step": 2955 + }, + { + "epoch": 0.14, + "learning_rate": 1.969354004357845e-05, + "loss": 0.2398, + "step": 2960 + }, + { + "epoch": 0.14, + "learning_rate": 1.9692756258523663e-05, + "loss": 0.2188, + "step": 2965 + }, + { + "epoch": 0.14, + "learning_rate": 1.9691972473468877e-05, + "loss": 0.3182, + "step": 2970 + }, + { + "epoch": 0.14, + "learning_rate": 1.969118868841409e-05, + "loss": 0.3459, + "step": 2975 + }, + { + "epoch": 0.14, + "learning_rate": 1.9690404903359305e-05, + "loss": 0.2882, + "step": 2980 + }, + { + "epoch": 0.14, + "learning_rate": 1.968962111830452e-05, + "loss": 0.3212, + "step": 2985 + }, + { + "epoch": 0.14, + "learning_rate": 1.968899409026069e-05, + "loss": 0.4588, + "step": 2990 + }, + { + "epoch": 0.14, + "learning_rate": 1.9688210305205903e-05, + "loss": 0.6138, + "step": 2995 + }, + { + "epoch": 0.14, + "learning_rate": 1.9687426520151117e-05, + "loss": 0.837, + "step": 3000 + }, + { + "epoch": 0.14, + "learning_rate": 1.9686642735096327e-05, + "loss": 0.3779, + "step": 3005 + }, + { + "epoch": 0.14, + "learning_rate": 1.9685858950041545e-05, + "loss": 0.2237, + "step": 3010 + }, + { + "epoch": 0.14, + "learning_rate": 1.9685075164986755e-05, + "loss": 0.3279, + "step": 3015 + }, + { + "epoch": 0.14, + "learning_rate": 1.968429137993197e-05, + "loss": 0.2848, + "step": 3020 + }, + { + "epoch": 0.14, + "learning_rate": 1.9683507594877183e-05, + "loss": 0.3314, + "step": 3025 + }, + { + "epoch": 0.14, + "learning_rate": 1.9682723809822394e-05, + "loss": 0.3744, + "step": 3030 + }, + { + "epoch": 0.14, + "learning_rate": 1.968194002476761e-05, + "loss": 0.4589, + "step": 3035 + }, + { + "epoch": 0.14, + "learning_rate": 1.968115623971282e-05, + "loss": 0.6012, + "step": 3040 + }, + { + "epoch": 0.14, + "learning_rate": 1.9680372454658035e-05, + "loss": 0.625, + "step": 3045 + }, + { + "epoch": 0.14, + "learning_rate": 1.967958866960325e-05, + "loss": 0.9265, + "step": 3050 + }, + { + "epoch": 0.14, + "learning_rate": 1.9678804884548463e-05, + "loss": 0.4029, + "step": 3055 + }, + { + "epoch": 0.14, + "learning_rate": 1.9678021099493677e-05, + "loss": 0.2582, + "step": 3060 + }, + { + "epoch": 0.14, + "learning_rate": 1.967723731443889e-05, + "loss": 0.3223, + "step": 3065 + }, + { + "epoch": 0.14, + "learning_rate": 1.96764535293841e-05, + "loss": 0.2428, + "step": 3070 + }, + { + "epoch": 0.14, + "learning_rate": 1.967566974432932e-05, + "loss": 0.3014, + "step": 3075 + }, + { + "epoch": 0.14, + "learning_rate": 1.967488595927453e-05, + "loss": 0.452, + "step": 3080 + }, + { + "epoch": 0.14, + "learning_rate": 1.9674102174219743e-05, + "loss": 0.3358, + "step": 3085 + }, + { + "epoch": 0.14, + "learning_rate": 1.9673318389164957e-05, + "loss": 0.4613, + "step": 3090 + }, + { + "epoch": 0.14, + "learning_rate": 1.967253460411017e-05, + "loss": 0.5618, + "step": 3095 + }, + { + "epoch": 0.14, + "learning_rate": 1.9671750819055385e-05, + "loss": 0.7322, + "step": 3100 + }, + { + "epoch": 0.14, + "learning_rate": 1.9670967034000595e-05, + "loss": 0.3473, + "step": 3105 + }, + { + "epoch": 0.15, + "learning_rate": 1.9670183248945813e-05, + "loss": 0.2555, + "step": 3110 + }, + { + "epoch": 0.15, + "learning_rate": 1.9669399463891023e-05, + "loss": 0.2629, + "step": 3115 + }, + { + "epoch": 0.15, + "learning_rate": 1.9668615678836237e-05, + "loss": 0.3049, + "step": 3120 + }, + { + "epoch": 0.15, + "learning_rate": 1.966783189378145e-05, + "loss": 0.3242, + "step": 3125 + }, + { + "epoch": 0.15, + "learning_rate": 1.9667048108726665e-05, + "loss": 0.3861, + "step": 3130 + }, + { + "epoch": 0.15, + "learning_rate": 1.966626432367188e-05, + "loss": 0.4608, + "step": 3135 + }, + { + "epoch": 0.15, + "learning_rate": 1.9665480538617093e-05, + "loss": 0.4212, + "step": 3140 + }, + { + "epoch": 0.15, + "learning_rate": 1.9664696753562303e-05, + "loss": 0.6555, + "step": 3145 + }, + { + "epoch": 0.15, + "learning_rate": 1.966391296850752e-05, + "loss": 0.7641, + "step": 3150 + }, + { + "epoch": 0.15, + "learning_rate": 1.966312918345273e-05, + "loss": 0.4017, + "step": 3155 + }, + { + "epoch": 0.15, + "learning_rate": 1.9662345398397945e-05, + "loss": 0.1657, + "step": 3160 + }, + { + "epoch": 0.15, + "learning_rate": 1.966156161334316e-05, + "loss": 0.2806, + "step": 3165 + }, + { + "epoch": 0.15, + "learning_rate": 1.966077782828837e-05, + "loss": 0.2702, + "step": 3170 + }, + { + "epoch": 0.15, + "learning_rate": 1.9659994043233587e-05, + "loss": 0.3138, + "step": 3175 + }, + { + "epoch": 0.15, + "learning_rate": 1.9659210258178797e-05, + "loss": 0.4255, + "step": 3180 + }, + { + "epoch": 0.15, + "learning_rate": 1.965842647312401e-05, + "loss": 0.3488, + "step": 3185 + }, + { + "epoch": 0.15, + "learning_rate": 1.9657642688069225e-05, + "loss": 0.5055, + "step": 3190 + }, + { + "epoch": 0.15, + "learning_rate": 1.965685890301444e-05, + "loss": 0.5626, + "step": 3195 + }, + { + "epoch": 0.15, + "learning_rate": 1.9656075117959653e-05, + "loss": 0.8002, + "step": 3200 + }, + { + "epoch": 0.15, + "learning_rate": 1.9655291332904867e-05, + "loss": 0.4219, + "step": 3205 + }, + { + "epoch": 0.15, + "learning_rate": 1.965450754785008e-05, + "loss": 0.2463, + "step": 3210 + }, + { + "epoch": 0.15, + "learning_rate": 1.9653723762795294e-05, + "loss": 0.2057, + "step": 3215 + }, + { + "epoch": 0.15, + "learning_rate": 1.9652939977740505e-05, + "loss": 0.326, + "step": 3220 + }, + { + "epoch": 0.15, + "learning_rate": 1.965215619268572e-05, + "loss": 0.3563, + "step": 3225 + }, + { + "epoch": 0.15, + "learning_rate": 1.9651372407630933e-05, + "loss": 0.4195, + "step": 3230 + }, + { + "epoch": 0.15, + "learning_rate": 1.9650588622576147e-05, + "loss": 0.3367, + "step": 3235 + }, + { + "epoch": 0.15, + "learning_rate": 1.964980483752136e-05, + "loss": 0.545, + "step": 3240 + }, + { + "epoch": 0.15, + "learning_rate": 1.964902105246657e-05, + "loss": 0.6454, + "step": 3245 + }, + { + "epoch": 0.15, + "learning_rate": 1.964823726741179e-05, + "loss": 0.7225, + "step": 3250 + }, + { + "epoch": 0.15, + "learning_rate": 1.9647453482357e-05, + "loss": 0.4606, + "step": 3255 + }, + { + "epoch": 0.15, + "learning_rate": 1.9646669697302213e-05, + "loss": 0.2679, + "step": 3260 + }, + { + "epoch": 0.15, + "learning_rate": 1.9645885912247427e-05, + "loss": 0.2538, + "step": 3265 + }, + { + "epoch": 0.15, + "learning_rate": 1.964510212719264e-05, + "loss": 0.2758, + "step": 3270 + }, + { + "epoch": 0.15, + "learning_rate": 1.9644318342137855e-05, + "loss": 0.3146, + "step": 3275 + }, + { + "epoch": 0.15, + "learning_rate": 1.964353455708307e-05, + "loss": 0.2646, + "step": 3280 + }, + { + "epoch": 0.15, + "learning_rate": 1.964275077202828e-05, + "loss": 0.3698, + "step": 3285 + }, + { + "epoch": 0.15, + "learning_rate": 1.9641966986973493e-05, + "loss": 0.505, + "step": 3290 + }, + { + "epoch": 0.15, + "learning_rate": 1.9641183201918707e-05, + "loss": 0.5847, + "step": 3295 + }, + { + "epoch": 0.15, + "learning_rate": 1.964039941686392e-05, + "loss": 0.7694, + "step": 3300 + }, + { + "epoch": 0.15, + "learning_rate": 1.9639615631809135e-05, + "loss": 0.4151, + "step": 3305 + }, + { + "epoch": 0.15, + "learning_rate": 1.963883184675435e-05, + "loss": 0.2161, + "step": 3310 + }, + { + "epoch": 0.15, + "learning_rate": 1.9638048061699562e-05, + "loss": 0.3054, + "step": 3315 + }, + { + "epoch": 0.15, + "learning_rate": 1.9637264276644773e-05, + "loss": 0.2491, + "step": 3320 + }, + { + "epoch": 0.16, + "learning_rate": 1.963648049158999e-05, + "loss": 0.2946, + "step": 3325 + }, + { + "epoch": 0.16, + "learning_rate": 1.96356967065352e-05, + "loss": 0.3118, + "step": 3330 + }, + { + "epoch": 0.16, + "learning_rate": 1.9634912921480415e-05, + "loss": 0.463, + "step": 3335 + }, + { + "epoch": 0.16, + "learning_rate": 1.963412913642563e-05, + "loss": 0.3721, + "step": 3340 + }, + { + "epoch": 0.16, + "learning_rate": 1.9633345351370842e-05, + "loss": 0.6009, + "step": 3345 + }, + { + "epoch": 0.16, + "learning_rate": 1.9632561566316056e-05, + "loss": 0.8445, + "step": 3350 + }, + { + "epoch": 0.16, + "learning_rate": 1.9631777781261267e-05, + "loss": 0.4554, + "step": 3355 + }, + { + "epoch": 0.16, + "learning_rate": 1.963099399620648e-05, + "loss": 0.2121, + "step": 3360 + }, + { + "epoch": 0.16, + "learning_rate": 1.9630210211151695e-05, + "loss": 0.2767, + "step": 3365 + }, + { + "epoch": 0.16, + "learning_rate": 1.962942642609691e-05, + "loss": 0.2726, + "step": 3370 + }, + { + "epoch": 0.16, + "learning_rate": 1.9628642641042123e-05, + "loss": 0.4073, + "step": 3375 + }, + { + "epoch": 0.16, + "learning_rate": 1.9627858855987336e-05, + "loss": 0.4101, + "step": 3380 + }, + { + "epoch": 0.16, + "learning_rate": 1.9627075070932547e-05, + "loss": 0.4055, + "step": 3385 + }, + { + "epoch": 0.16, + "learning_rate": 1.9626291285877764e-05, + "loss": 0.4796, + "step": 3390 + }, + { + "epoch": 0.16, + "learning_rate": 1.9625507500822975e-05, + "loss": 0.6178, + "step": 3395 + }, + { + "epoch": 0.16, + "learning_rate": 1.962472371576819e-05, + "loss": 0.8758, + "step": 3400 + }, + { + "epoch": 0.16, + "learning_rate": 1.9623939930713403e-05, + "loss": 0.4028, + "step": 3405 + }, + { + "epoch": 0.16, + "learning_rate": 1.9623156145658616e-05, + "loss": 0.2257, + "step": 3410 + }, + { + "epoch": 0.16, + "learning_rate": 1.962237236060383e-05, + "loss": 0.2765, + "step": 3415 + }, + { + "epoch": 0.16, + "learning_rate": 1.962158857554904e-05, + "loss": 0.2894, + "step": 3420 + }, + { + "epoch": 0.16, + "learning_rate": 1.9620804790494258e-05, + "loss": 0.2995, + "step": 3425 + }, + { + "epoch": 0.16, + "learning_rate": 1.962002100543947e-05, + "loss": 0.3938, + "step": 3430 + }, + { + "epoch": 0.16, + "learning_rate": 1.9619237220384683e-05, + "loss": 0.4108, + "step": 3435 + }, + { + "epoch": 0.16, + "learning_rate": 1.9618453435329897e-05, + "loss": 0.4647, + "step": 3440 + }, + { + "epoch": 0.16, + "learning_rate": 1.961766965027511e-05, + "loss": 0.5126, + "step": 3445 + }, + { + "epoch": 0.16, + "learning_rate": 1.9616885865220324e-05, + "loss": 0.8012, + "step": 3450 + }, + { + "epoch": 0.16, + "learning_rate": 1.9616102080165538e-05, + "loss": 0.4131, + "step": 3455 + }, + { + "epoch": 0.16, + "learning_rate": 1.961531829511075e-05, + "loss": 0.2214, + "step": 3460 + }, + { + "epoch": 0.16, + "learning_rate": 1.9614534510055966e-05, + "loss": 0.2513, + "step": 3465 + }, + { + "epoch": 0.16, + "learning_rate": 1.9613750725001177e-05, + "loss": 0.3865, + "step": 3470 + }, + { + "epoch": 0.16, + "learning_rate": 1.961296693994639e-05, + "loss": 0.3377, + "step": 3475 + }, + { + "epoch": 0.16, + "learning_rate": 1.9612183154891604e-05, + "loss": 0.3707, + "step": 3480 + }, + { + "epoch": 0.16, + "learning_rate": 1.9611399369836818e-05, + "loss": 0.4052, + "step": 3485 + }, + { + "epoch": 0.16, + "learning_rate": 1.9610615584782032e-05, + "loss": 0.4237, + "step": 3490 + }, + { + "epoch": 0.16, + "learning_rate": 1.9609831799727243e-05, + "loss": 0.466, + "step": 3495 + }, + { + "epoch": 0.16, + "learning_rate": 1.9609048014672457e-05, + "loss": 0.791, + "step": 3500 + }, + { + "epoch": 0.16, + "learning_rate": 1.960826422961767e-05, + "loss": 0.3511, + "step": 3505 + }, + { + "epoch": 0.16, + "learning_rate": 1.9607480444562884e-05, + "loss": 0.1677, + "step": 3510 + }, + { + "epoch": 0.16, + "learning_rate": 1.96066966595081e-05, + "loss": 0.2672, + "step": 3515 + }, + { + "epoch": 0.16, + "learning_rate": 1.9605912874453312e-05, + "loss": 0.27, + "step": 3520 + }, + { + "epoch": 0.16, + "learning_rate": 1.9605129089398526e-05, + "loss": 0.3319, + "step": 3525 + }, + { + "epoch": 0.16, + "learning_rate": 1.960434530434374e-05, + "loss": 0.3659, + "step": 3530 + }, + { + "epoch": 0.16, + "learning_rate": 1.960356151928895e-05, + "loss": 0.3154, + "step": 3535 + }, + { + "epoch": 0.17, + "learning_rate": 1.9602777734234168e-05, + "loss": 0.4368, + "step": 3540 + }, + { + "epoch": 0.17, + "learning_rate": 1.960199394917938e-05, + "loss": 0.6238, + "step": 3545 + }, + { + "epoch": 0.17, + "learning_rate": 1.9601210164124592e-05, + "loss": 0.868, + "step": 3550 + }, + { + "epoch": 0.17, + "learning_rate": 1.9600426379069806e-05, + "loss": 0.439, + "step": 3555 + }, + { + "epoch": 0.17, + "learning_rate": 1.9599642594015017e-05, + "loss": 0.1922, + "step": 3560 + }, + { + "epoch": 0.17, + "learning_rate": 1.9598858808960234e-05, + "loss": 0.2233, + "step": 3565 + }, + { + "epoch": 0.17, + "learning_rate": 1.9598075023905445e-05, + "loss": 0.3184, + "step": 3570 + }, + { + "epoch": 0.17, + "learning_rate": 1.959729123885066e-05, + "loss": 0.3508, + "step": 3575 + }, + { + "epoch": 0.17, + "learning_rate": 1.9596507453795872e-05, + "loss": 0.2921, + "step": 3580 + }, + { + "epoch": 0.17, + "learning_rate": 1.9595723668741086e-05, + "loss": 0.3405, + "step": 3585 + }, + { + "epoch": 0.17, + "learning_rate": 1.95949398836863e-05, + "loss": 0.5112, + "step": 3590 + }, + { + "epoch": 0.17, + "learning_rate": 1.9594156098631514e-05, + "loss": 0.4812, + "step": 3595 + }, + { + "epoch": 0.17, + "learning_rate": 1.9593372313576725e-05, + "loss": 0.8224, + "step": 3600 + }, + { + "epoch": 0.17, + "learning_rate": 1.9592588528521942e-05, + "loss": 0.4547, + "step": 3605 + }, + { + "epoch": 0.17, + "learning_rate": 1.9591804743467152e-05, + "loss": 0.1878, + "step": 3610 + }, + { + "epoch": 0.17, + "learning_rate": 1.9591020958412366e-05, + "loss": 0.2636, + "step": 3615 + }, + { + "epoch": 0.17, + "learning_rate": 1.959023717335758e-05, + "loss": 0.2779, + "step": 3620 + }, + { + "epoch": 0.17, + "learning_rate": 1.9589453388302794e-05, + "loss": 0.2307, + "step": 3625 + }, + { + "epoch": 0.17, + "learning_rate": 1.9588669603248008e-05, + "loss": 0.3005, + "step": 3630 + }, + { + "epoch": 0.17, + "learning_rate": 1.958788581819322e-05, + "loss": 0.3427, + "step": 3635 + }, + { + "epoch": 0.17, + "learning_rate": 1.9587102033138436e-05, + "loss": 0.4767, + "step": 3640 + }, + { + "epoch": 0.17, + "learning_rate": 1.9586318248083646e-05, + "loss": 0.4229, + "step": 3645 + }, + { + "epoch": 0.17, + "learning_rate": 1.958553446302886e-05, + "loss": 0.8545, + "step": 3650 + }, + { + "epoch": 0.17, + "learning_rate": 1.9584750677974074e-05, + "loss": 0.4065, + "step": 3655 + }, + { + "epoch": 0.17, + "learning_rate": 1.9583966892919288e-05, + "loss": 0.1509, + "step": 3660 + }, + { + "epoch": 0.17, + "learning_rate": 1.9583183107864502e-05, + "loss": 0.2052, + "step": 3665 + }, + { + "epoch": 0.17, + "learning_rate": 1.9582399322809716e-05, + "loss": 0.2437, + "step": 3670 + }, + { + "epoch": 0.17, + "learning_rate": 1.9581615537754926e-05, + "loss": 0.3728, + "step": 3675 + }, + { + "epoch": 0.17, + "learning_rate": 1.958083175270014e-05, + "loss": 0.3682, + "step": 3680 + }, + { + "epoch": 0.17, + "learning_rate": 1.9580047967645354e-05, + "loss": 0.4861, + "step": 3685 + }, + { + "epoch": 0.17, + "learning_rate": 1.9579264182590568e-05, + "loss": 0.4066, + "step": 3690 + }, + { + "epoch": 0.17, + "learning_rate": 1.9578480397535782e-05, + "loss": 0.45, + "step": 3695 + }, + { + "epoch": 0.17, + "learning_rate": 1.9577696612480993e-05, + "loss": 0.7415, + "step": 3700 + }, + { + "epoch": 0.17, + "learning_rate": 1.957691282742621e-05, + "loss": 0.4888, + "step": 3705 + }, + { + "epoch": 0.17, + "learning_rate": 1.957612904237142e-05, + "loss": 0.2174, + "step": 3710 + }, + { + "epoch": 0.17, + "learning_rate": 1.9575345257316634e-05, + "loss": 0.2577, + "step": 3715 + }, + { + "epoch": 0.17, + "learning_rate": 1.9574561472261848e-05, + "loss": 0.2525, + "step": 3720 + }, + { + "epoch": 0.17, + "learning_rate": 1.9573777687207062e-05, + "loss": 0.3246, + "step": 3725 + }, + { + "epoch": 0.17, + "learning_rate": 1.9572993902152276e-05, + "loss": 0.3091, + "step": 3730 + }, + { + "epoch": 0.17, + "learning_rate": 1.957221011709749e-05, + "loss": 0.3636, + "step": 3735 + }, + { + "epoch": 0.17, + "learning_rate": 1.9571426332042704e-05, + "loss": 0.4908, + "step": 3740 + }, + { + "epoch": 0.17, + "learning_rate": 1.9570642546987914e-05, + "loss": 0.444, + "step": 3745 + }, + { + "epoch": 0.17, + "learning_rate": 1.9569858761933128e-05, + "loss": 0.6554, + "step": 3750 + }, + { + "epoch": 0.18, + "learning_rate": 1.9569074976878342e-05, + "loss": 0.3102, + "step": 3755 + }, + { + "epoch": 0.18, + "learning_rate": 1.9568291191823556e-05, + "loss": 0.2166, + "step": 3760 + }, + { + "epoch": 0.18, + "learning_rate": 1.956750740676877e-05, + "loss": 0.2067, + "step": 3765 + }, + { + "epoch": 0.18, + "learning_rate": 1.9566723621713984e-05, + "loss": 0.2251, + "step": 3770 + }, + { + "epoch": 0.18, + "learning_rate": 1.9565939836659194e-05, + "loss": 0.3112, + "step": 3775 + }, + { + "epoch": 0.18, + "learning_rate": 1.956515605160441e-05, + "loss": 0.3403, + "step": 3780 + }, + { + "epoch": 0.18, + "learning_rate": 1.9564372266549622e-05, + "loss": 0.3457, + "step": 3785 + }, + { + "epoch": 0.18, + "learning_rate": 1.9563588481494836e-05, + "loss": 0.5348, + "step": 3790 + }, + { + "epoch": 0.18, + "learning_rate": 1.956280469644005e-05, + "loss": 0.5904, + "step": 3795 + }, + { + "epoch": 0.18, + "learning_rate": 1.9562020911385264e-05, + "loss": 0.8875, + "step": 3800 + }, + { + "epoch": 0.18, + "learning_rate": 1.9561237126330478e-05, + "loss": 0.3938, + "step": 3805 + }, + { + "epoch": 0.18, + "learning_rate": 1.9560453341275688e-05, + "loss": 0.2407, + "step": 3810 + }, + { + "epoch": 0.18, + "learning_rate": 1.9559669556220902e-05, + "loss": 0.2626, + "step": 3815 + }, + { + "epoch": 0.18, + "learning_rate": 1.9558885771166116e-05, + "loss": 0.2197, + "step": 3820 + }, + { + "epoch": 0.18, + "learning_rate": 1.955810198611133e-05, + "loss": 0.3859, + "step": 3825 + }, + { + "epoch": 0.18, + "learning_rate": 1.9557318201056544e-05, + "loss": 0.3984, + "step": 3830 + }, + { + "epoch": 0.18, + "learning_rate": 1.9556534416001758e-05, + "loss": 0.2618, + "step": 3835 + }, + { + "epoch": 0.18, + "learning_rate": 1.9555750630946972e-05, + "loss": 0.607, + "step": 3840 + }, + { + "epoch": 0.18, + "learning_rate": 1.9554966845892186e-05, + "loss": 0.6173, + "step": 3845 + }, + { + "epoch": 0.18, + "learning_rate": 1.9554183060837396e-05, + "loss": 0.8174, + "step": 3850 + }, + { + "epoch": 0.18, + "learning_rate": 1.9553399275782613e-05, + "loss": 0.3763, + "step": 3855 + }, + { + "epoch": 0.18, + "learning_rate": 1.9552615490727824e-05, + "loss": 0.232, + "step": 3860 + }, + { + "epoch": 0.18, + "learning_rate": 1.9551831705673038e-05, + "loss": 0.1597, + "step": 3865 + }, + { + "epoch": 0.18, + "learning_rate": 1.9551047920618252e-05, + "loss": 0.2861, + "step": 3870 + }, + { + "epoch": 0.18, + "learning_rate": 1.9550264135563462e-05, + "loss": 0.2547, + "step": 3875 + }, + { + "epoch": 0.18, + "learning_rate": 1.954948035050868e-05, + "loss": 0.3221, + "step": 3880 + }, + { + "epoch": 0.18, + "learning_rate": 1.954869656545389e-05, + "loss": 0.3712, + "step": 3885 + }, + { + "epoch": 0.18, + "learning_rate": 1.9547912780399104e-05, + "loss": 0.5022, + "step": 3890 + }, + { + "epoch": 0.18, + "learning_rate": 1.9547128995344318e-05, + "loss": 0.5154, + "step": 3895 + }, + { + "epoch": 0.18, + "learning_rate": 1.9546345210289532e-05, + "loss": 0.8972, + "step": 3900 + }, + { + "epoch": 0.18, + "learning_rate": 1.9545561425234746e-05, + "loss": 0.3731, + "step": 3905 + }, + { + "epoch": 0.18, + "learning_rate": 1.954477764017996e-05, + "loss": 0.1438, + "step": 3910 + }, + { + "epoch": 0.18, + "learning_rate": 1.954399385512517e-05, + "loss": 0.3258, + "step": 3915 + }, + { + "epoch": 0.18, + "learning_rate": 1.9543210070070387e-05, + "loss": 0.2788, + "step": 3920 + }, + { + "epoch": 0.18, + "learning_rate": 1.9542426285015598e-05, + "loss": 0.269, + "step": 3925 + }, + { + "epoch": 0.18, + "learning_rate": 1.9541642499960812e-05, + "loss": 0.3346, + "step": 3930 + }, + { + "epoch": 0.18, + "learning_rate": 1.9540858714906026e-05, + "loss": 0.401, + "step": 3935 + }, + { + "epoch": 0.18, + "learning_rate": 1.954007492985124e-05, + "loss": 0.3484, + "step": 3940 + }, + { + "epoch": 0.18, + "learning_rate": 1.9539291144796454e-05, + "loss": 0.5295, + "step": 3945 + }, + { + "epoch": 0.18, + "learning_rate": 1.9538507359741664e-05, + "loss": 0.7834, + "step": 3950 + }, + { + "epoch": 0.18, + "learning_rate": 1.953772357468688e-05, + "loss": 0.3469, + "step": 3955 + }, + { + "epoch": 0.18, + "learning_rate": 1.9536939789632092e-05, + "loss": 0.3008, + "step": 3960 + }, + { + "epoch": 0.19, + "learning_rate": 1.9536156004577306e-05, + "loss": 0.2192, + "step": 3965 + }, + { + "epoch": 0.19, + "learning_rate": 1.953537221952252e-05, + "loss": 0.2714, + "step": 3970 + }, + { + "epoch": 0.19, + "learning_rate": 1.9534588434467734e-05, + "loss": 0.236, + "step": 3975 + }, + { + "epoch": 0.19, + "learning_rate": 1.9533804649412948e-05, + "loss": 0.3313, + "step": 3980 + }, + { + "epoch": 0.19, + "learning_rate": 1.953302086435816e-05, + "loss": 0.4521, + "step": 3985 + }, + { + "epoch": 0.19, + "learning_rate": 1.9532237079303372e-05, + "loss": 0.6107, + "step": 3990 + }, + { + "epoch": 0.19, + "learning_rate": 1.953145329424859e-05, + "loss": 0.6532, + "step": 3995 + }, + { + "epoch": 0.19, + "learning_rate": 1.95306695091938e-05, + "loss": 0.7622, + "step": 4000 + }, + { + "epoch": 0.19, + "learning_rate": 1.9529885724139014e-05, + "loss": 0.3901, + "step": 4005 + }, + { + "epoch": 0.19, + "learning_rate": 1.9529101939084228e-05, + "loss": 0.2575, + "step": 4010 + }, + { + "epoch": 0.19, + "learning_rate": 1.9528318154029438e-05, + "loss": 0.1799, + "step": 4015 + }, + { + "epoch": 0.19, + "learning_rate": 1.9527534368974655e-05, + "loss": 0.2706, + "step": 4020 + }, + { + "epoch": 0.19, + "learning_rate": 1.9526750583919866e-05, + "loss": 0.333, + "step": 4025 + }, + { + "epoch": 0.19, + "learning_rate": 1.952596679886508e-05, + "loss": 0.341, + "step": 4030 + }, + { + "epoch": 0.19, + "learning_rate": 1.9525183013810294e-05, + "loss": 0.3399, + "step": 4035 + }, + { + "epoch": 0.19, + "learning_rate": 1.9524399228755508e-05, + "loss": 0.4147, + "step": 4040 + }, + { + "epoch": 0.19, + "learning_rate": 1.952361544370072e-05, + "loss": 0.4624, + "step": 4045 + }, + { + "epoch": 0.19, + "learning_rate": 1.9522831658645935e-05, + "loss": 0.7946, + "step": 4050 + }, + { + "epoch": 0.19, + "learning_rate": 1.952204787359115e-05, + "loss": 0.3148, + "step": 4055 + }, + { + "epoch": 0.19, + "learning_rate": 1.9521264088536363e-05, + "loss": 0.2504, + "step": 4060 + }, + { + "epoch": 0.19, + "learning_rate": 1.9520480303481574e-05, + "loss": 0.1754, + "step": 4065 + }, + { + "epoch": 0.19, + "learning_rate": 1.9519696518426788e-05, + "loss": 0.2607, + "step": 4070 + }, + { + "epoch": 0.19, + "learning_rate": 1.9518912733372e-05, + "loss": 0.3272, + "step": 4075 + }, + { + "epoch": 0.19, + "learning_rate": 1.9518128948317215e-05, + "loss": 0.426, + "step": 4080 + }, + { + "epoch": 0.19, + "learning_rate": 1.951734516326243e-05, + "loss": 0.3973, + "step": 4085 + }, + { + "epoch": 0.19, + "learning_rate": 1.951656137820764e-05, + "loss": 0.418, + "step": 4090 + }, + { + "epoch": 0.19, + "learning_rate": 1.9515777593152857e-05, + "loss": 0.5203, + "step": 4095 + }, + { + "epoch": 0.19, + "learning_rate": 1.9514993808098068e-05, + "loss": 0.9427, + "step": 4100 + }, + { + "epoch": 0.19, + "learning_rate": 1.951421002304328e-05, + "loss": 0.3731, + "step": 4105 + }, + { + "epoch": 0.19, + "learning_rate": 1.9513426237988496e-05, + "loss": 0.1844, + "step": 4110 + }, + { + "epoch": 0.19, + "learning_rate": 1.951264245293371e-05, + "loss": 0.2464, + "step": 4115 + }, + { + "epoch": 0.19, + "learning_rate": 1.9511858667878923e-05, + "loss": 0.1807, + "step": 4120 + }, + { + "epoch": 0.19, + "learning_rate": 1.9511074882824137e-05, + "loss": 0.3148, + "step": 4125 + }, + { + "epoch": 0.19, + "learning_rate": 1.9510291097769348e-05, + "loss": 0.2822, + "step": 4130 + }, + { + "epoch": 0.19, + "learning_rate": 1.950950731271456e-05, + "loss": 0.2875, + "step": 4135 + }, + { + "epoch": 0.19, + "learning_rate": 1.9508723527659776e-05, + "loss": 0.432, + "step": 4140 + }, + { + "epoch": 0.19, + "learning_rate": 1.950793974260499e-05, + "loss": 0.6259, + "step": 4145 + }, + { + "epoch": 0.19, + "learning_rate": 1.9507155957550203e-05, + "loss": 0.7135, + "step": 4150 + }, + { + "epoch": 0.19, + "learning_rate": 1.9506372172495417e-05, + "loss": 0.3204, + "step": 4155 + }, + { + "epoch": 0.19, + "learning_rate": 1.950558838744063e-05, + "loss": 0.1675, + "step": 4160 + }, + { + "epoch": 0.19, + "learning_rate": 1.950480460238584e-05, + "loss": 0.1806, + "step": 4165 + }, + { + "epoch": 0.19, + "learning_rate": 1.950402081733106e-05, + "loss": 0.299, + "step": 4170 + }, + { + "epoch": 0.19, + "learning_rate": 1.950323703227627e-05, + "loss": 0.2398, + "step": 4175 + }, + { + "epoch": 0.2, + "learning_rate": 1.9502453247221483e-05, + "loss": 0.3808, + "step": 4180 + }, + { + "epoch": 0.2, + "learning_rate": 1.9501669462166697e-05, + "loss": 0.4253, + "step": 4185 + }, + { + "epoch": 0.2, + "learning_rate": 1.950088567711191e-05, + "loss": 0.4762, + "step": 4190 + }, + { + "epoch": 0.2, + "learning_rate": 1.9500101892057125e-05, + "loss": 0.5255, + "step": 4195 + }, + { + "epoch": 0.2, + "learning_rate": 1.9499318107002336e-05, + "loss": 0.8221, + "step": 4200 + }, + { + "epoch": 0.2, + "learning_rate": 1.949853432194755e-05, + "loss": 0.416, + "step": 4205 + }, + { + "epoch": 0.2, + "learning_rate": 1.9497750536892763e-05, + "loss": 0.6598, + "step": 4210 + }, + { + "epoch": 0.2, + "learning_rate": 1.9496966751837977e-05, + "loss": 0.1326, + "step": 4215 + }, + { + "epoch": 0.2, + "learning_rate": 1.949618296678319e-05, + "loss": 0.215, + "step": 4220 + }, + { + "epoch": 0.2, + "learning_rate": 1.9495399181728405e-05, + "loss": 0.3046, + "step": 4225 + }, + { + "epoch": 0.2, + "learning_rate": 1.9494615396673616e-05, + "loss": 0.3458, + "step": 4230 + }, + { + "epoch": 0.2, + "learning_rate": 1.9493831611618833e-05, + "loss": 0.3501, + "step": 4235 + }, + { + "epoch": 0.2, + "learning_rate": 1.9493047826564044e-05, + "loss": 0.5032, + "step": 4240 + }, + { + "epoch": 0.2, + "learning_rate": 1.9492264041509257e-05, + "loss": 0.6051, + "step": 4245 + }, + { + "epoch": 0.2, + "learning_rate": 1.949148025645447e-05, + "loss": 0.8645, + "step": 4250 + }, + { + "epoch": 0.2, + "learning_rate": 1.9490696471399685e-05, + "loss": 0.3597, + "step": 4255 + }, + { + "epoch": 0.2, + "learning_rate": 1.94899126863449e-05, + "loss": 0.2018, + "step": 4260 + }, + { + "epoch": 0.2, + "learning_rate": 1.948912890129011e-05, + "loss": 0.2225, + "step": 4265 + }, + { + "epoch": 0.2, + "learning_rate": 1.9488345116235327e-05, + "loss": 0.3058, + "step": 4270 + }, + { + "epoch": 0.2, + "learning_rate": 1.9487561331180537e-05, + "loss": 0.367, + "step": 4275 + }, + { + "epoch": 0.2, + "learning_rate": 1.948677754612575e-05, + "loss": 0.3147, + "step": 4280 + }, + { + "epoch": 0.2, + "learning_rate": 1.9485993761070965e-05, + "loss": 0.3372, + "step": 4285 + }, + { + "epoch": 0.2, + "learning_rate": 1.948520997601618e-05, + "loss": 0.3279, + "step": 4290 + }, + { + "epoch": 0.2, + "learning_rate": 1.9484426190961393e-05, + "loss": 0.4893, + "step": 4295 + }, + { + "epoch": 0.2, + "learning_rate": 1.9483642405906607e-05, + "loss": 0.8027, + "step": 4300 + }, + { + "epoch": 0.2, + "learning_rate": 1.9482858620851818e-05, + "loss": 0.4597, + "step": 4305 + }, + { + "epoch": 0.2, + "learning_rate": 1.9482074835797035e-05, + "loss": 0.1776, + "step": 4310 + }, + { + "epoch": 0.2, + "learning_rate": 1.9481291050742245e-05, + "loss": 0.2143, + "step": 4315 + }, + { + "epoch": 0.2, + "learning_rate": 1.948050726568746e-05, + "loss": 0.2351, + "step": 4320 + }, + { + "epoch": 0.2, + "learning_rate": 1.9479723480632673e-05, + "loss": 0.3237, + "step": 4325 + }, + { + "epoch": 0.2, + "learning_rate": 1.9478939695577884e-05, + "loss": 0.3484, + "step": 4330 + }, + { + "epoch": 0.2, + "learning_rate": 1.94781559105231e-05, + "loss": 0.3139, + "step": 4335 + }, + { + "epoch": 0.2, + "learning_rate": 1.947737212546831e-05, + "loss": 0.4198, + "step": 4340 + }, + { + "epoch": 0.2, + "learning_rate": 1.9476588340413525e-05, + "loss": 0.48, + "step": 4345 + }, + { + "epoch": 0.2, + "learning_rate": 1.947580455535874e-05, + "loss": 0.7149, + "step": 4350 + }, + { + "epoch": 0.2, + "learning_rate": 1.9475020770303953e-05, + "loss": 0.4256, + "step": 4355 + }, + { + "epoch": 0.2, + "learning_rate": 1.9474236985249167e-05, + "loss": 0.2352, + "step": 4360 + }, + { + "epoch": 0.2, + "learning_rate": 1.947345320019438e-05, + "loss": 0.197, + "step": 4365 + }, + { + "epoch": 0.2, + "learning_rate": 1.9472669415139595e-05, + "loss": 0.313, + "step": 4370 + }, + { + "epoch": 0.2, + "learning_rate": 1.947188563008481e-05, + "loss": 0.2447, + "step": 4375 + }, + { + "epoch": 0.2, + "learning_rate": 1.947110184503002e-05, + "loss": 0.2627, + "step": 4380 + }, + { + "epoch": 0.2, + "learning_rate": 1.9470318059975237e-05, + "loss": 0.2974, + "step": 4385 + }, + { + "epoch": 0.2, + "learning_rate": 1.9469534274920447e-05, + "loss": 0.4315, + "step": 4390 + }, + { + "epoch": 0.21, + "learning_rate": 1.946875048986566e-05, + "loss": 0.6057, + "step": 4395 + }, + { + "epoch": 0.21, + "learning_rate": 1.9467966704810875e-05, + "loss": 0.7785, + "step": 4400 + }, + { + "epoch": 0.21, + "learning_rate": 1.9467182919756085e-05, + "loss": 0.426, + "step": 4405 + }, + { + "epoch": 0.21, + "learning_rate": 1.9466399134701303e-05, + "loss": 0.3034, + "step": 4410 + }, + { + "epoch": 0.21, + "learning_rate": 1.9465615349646513e-05, + "loss": 0.1557, + "step": 4415 + }, + { + "epoch": 0.21, + "learning_rate": 1.9464831564591727e-05, + "loss": 0.2953, + "step": 4420 + }, + { + "epoch": 0.21, + "learning_rate": 1.946404777953694e-05, + "loss": 0.4292, + "step": 4425 + }, + { + "epoch": 0.21, + "learning_rate": 1.9463263994482155e-05, + "loss": 0.468, + "step": 4430 + }, + { + "epoch": 0.21, + "learning_rate": 1.946248020942737e-05, + "loss": 0.3723, + "step": 4435 + }, + { + "epoch": 0.21, + "learning_rate": 1.9461696424372583e-05, + "loss": 0.3748, + "step": 4440 + }, + { + "epoch": 0.21, + "learning_rate": 1.9460912639317793e-05, + "loss": 0.3476, + "step": 4445 + }, + { + "epoch": 0.21, + "learning_rate": 1.946012885426301e-05, + "loss": 0.7008, + "step": 4450 + }, + { + "epoch": 0.21, + "learning_rate": 1.945934506920822e-05, + "loss": 0.3942, + "step": 4455 + }, + { + "epoch": 0.21, + "learning_rate": 1.9458561284153435e-05, + "loss": 0.1434, + "step": 4460 + }, + { + "epoch": 0.21, + "learning_rate": 1.945777749909865e-05, + "loss": 0.1975, + "step": 4465 + }, + { + "epoch": 0.21, + "learning_rate": 1.9456993714043863e-05, + "loss": 0.3374, + "step": 4470 + }, + { + "epoch": 0.21, + "learning_rate": 1.9456209928989077e-05, + "loss": 0.2609, + "step": 4475 + }, + { + "epoch": 0.21, + "learning_rate": 1.9455426143934287e-05, + "loss": 0.2869, + "step": 4480 + }, + { + "epoch": 0.21, + "learning_rate": 1.9454642358879505e-05, + "loss": 0.4011, + "step": 4485 + }, + { + "epoch": 0.21, + "learning_rate": 1.9453858573824715e-05, + "loss": 0.4172, + "step": 4490 + }, + { + "epoch": 0.21, + "learning_rate": 1.945307478876993e-05, + "loss": 0.5234, + "step": 4495 + }, + { + "epoch": 0.21, + "learning_rate": 1.9452291003715143e-05, + "loss": 0.7829, + "step": 4500 + }, + { + "epoch": 0.21, + "learning_rate": 1.9451507218660357e-05, + "loss": 0.3145, + "step": 4505 + }, + { + "epoch": 0.21, + "learning_rate": 1.945072343360557e-05, + "loss": 0.1722, + "step": 4510 + }, + { + "epoch": 0.21, + "learning_rate": 1.9449939648550785e-05, + "loss": 0.2948, + "step": 4515 + }, + { + "epoch": 0.21, + "learning_rate": 1.9449155863495995e-05, + "loss": 0.2227, + "step": 4520 + }, + { + "epoch": 0.21, + "learning_rate": 1.944837207844121e-05, + "loss": 0.2552, + "step": 4525 + }, + { + "epoch": 0.21, + "learning_rate": 1.9447588293386423e-05, + "loss": 0.3306, + "step": 4530 + }, + { + "epoch": 0.21, + "learning_rate": 1.9446804508331637e-05, + "loss": 0.2957, + "step": 4535 + }, + { + "epoch": 0.21, + "learning_rate": 1.944602072327685e-05, + "loss": 0.5235, + "step": 4540 + }, + { + "epoch": 0.21, + "learning_rate": 1.944523693822206e-05, + "loss": 0.5366, + "step": 4545 + }, + { + "epoch": 0.21, + "learning_rate": 1.944445315316728e-05, + "loss": 0.753, + "step": 4550 + }, + { + "epoch": 0.21, + "learning_rate": 1.944366936811249e-05, + "loss": 0.2989, + "step": 4555 + }, + { + "epoch": 0.21, + "learning_rate": 1.9442885583057703e-05, + "loss": 0.1639, + "step": 4560 + }, + { + "epoch": 0.21, + "learning_rate": 1.9442101798002917e-05, + "loss": 0.1844, + "step": 4565 + }, + { + "epoch": 0.21, + "learning_rate": 1.944131801294813e-05, + "loss": 0.2972, + "step": 4570 + }, + { + "epoch": 0.21, + "learning_rate": 1.9440534227893345e-05, + "loss": 0.3515, + "step": 4575 + }, + { + "epoch": 0.21, + "learning_rate": 1.943975044283856e-05, + "loss": 0.2764, + "step": 4580 + }, + { + "epoch": 0.21, + "learning_rate": 1.9438966657783773e-05, + "loss": 0.4514, + "step": 4585 + }, + { + "epoch": 0.21, + "learning_rate": 1.9438182872728983e-05, + "loss": 0.3424, + "step": 4590 + }, + { + "epoch": 0.21, + "learning_rate": 1.9437399087674197e-05, + "loss": 0.539, + "step": 4595 + }, + { + "epoch": 0.21, + "learning_rate": 1.943661530261941e-05, + "loss": 0.8198, + "step": 4600 + }, + { + "epoch": 0.21, + "learning_rate": 1.9435831517564625e-05, + "loss": 0.3306, + "step": 4605 + }, + { + "epoch": 0.22, + "learning_rate": 1.943504773250984e-05, + "loss": 0.1788, + "step": 4610 + }, + { + "epoch": 0.22, + "learning_rate": 1.9434263947455053e-05, + "loss": 0.2419, + "step": 4615 + }, + { + "epoch": 0.22, + "learning_rate": 1.9433480162400263e-05, + "loss": 0.3073, + "step": 4620 + }, + { + "epoch": 0.22, + "learning_rate": 1.943269637734548e-05, + "loss": 0.1982, + "step": 4625 + }, + { + "epoch": 0.22, + "learning_rate": 1.943191259229069e-05, + "loss": 0.3853, + "step": 4630 + }, + { + "epoch": 0.22, + "learning_rate": 1.9431128807235905e-05, + "loss": 0.3433, + "step": 4635 + }, + { + "epoch": 0.22, + "learning_rate": 1.943034502218112e-05, + "loss": 0.373, + "step": 4640 + }, + { + "epoch": 0.22, + "learning_rate": 1.9429561237126333e-05, + "loss": 0.4099, + "step": 4645 + }, + { + "epoch": 0.22, + "learning_rate": 1.9428777452071547e-05, + "loss": 0.7581, + "step": 4650 + }, + { + "epoch": 0.22, + "learning_rate": 1.9427993667016757e-05, + "loss": 0.3001, + "step": 4655 + }, + { + "epoch": 0.22, + "learning_rate": 1.942720988196197e-05, + "loss": 0.1235, + "step": 4660 + }, + { + "epoch": 0.22, + "learning_rate": 1.9426426096907185e-05, + "loss": 0.1975, + "step": 4665 + }, + { + "epoch": 0.22, + "learning_rate": 1.94256423118524e-05, + "loss": 0.2757, + "step": 4670 + }, + { + "epoch": 0.22, + "learning_rate": 1.9424858526797613e-05, + "loss": 0.2863, + "step": 4675 + }, + { + "epoch": 0.22, + "learning_rate": 1.9424074741742827e-05, + "loss": 0.3228, + "step": 4680 + }, + { + "epoch": 0.22, + "learning_rate": 1.942329095668804e-05, + "loss": 0.3734, + "step": 4685 + }, + { + "epoch": 0.22, + "learning_rate": 1.9422507171633254e-05, + "loss": 0.4365, + "step": 4690 + }, + { + "epoch": 0.22, + "learning_rate": 1.9421723386578465e-05, + "loss": 0.4877, + "step": 4695 + }, + { + "epoch": 0.22, + "learning_rate": 1.9420939601523682e-05, + "loss": 0.7756, + "step": 4700 + }, + { + "epoch": 0.22, + "learning_rate": 1.9420155816468893e-05, + "loss": 0.338, + "step": 4705 + }, + { + "epoch": 0.22, + "learning_rate": 1.9419372031414107e-05, + "loss": 0.1238, + "step": 4710 + }, + { + "epoch": 0.22, + "learning_rate": 1.941858824635932e-05, + "loss": 0.2242, + "step": 4715 + }, + { + "epoch": 0.22, + "learning_rate": 1.941780446130453e-05, + "loss": 0.2178, + "step": 4720 + }, + { + "epoch": 0.22, + "learning_rate": 1.941702067624975e-05, + "loss": 0.27, + "step": 4725 + }, + { + "epoch": 0.22, + "learning_rate": 1.941623689119496e-05, + "loss": 0.3249, + "step": 4730 + }, + { + "epoch": 0.22, + "learning_rate": 1.9415453106140173e-05, + "loss": 0.3082, + "step": 4735 + }, + { + "epoch": 0.22, + "learning_rate": 1.9414669321085387e-05, + "loss": 0.3891, + "step": 4740 + }, + { + "epoch": 0.22, + "learning_rate": 1.94138855360306e-05, + "loss": 0.525, + "step": 4745 + }, + { + "epoch": 0.22, + "learning_rate": 1.9413101750975814e-05, + "loss": 0.6924, + "step": 4750 + }, + { + "epoch": 0.22, + "learning_rate": 1.941231796592103e-05, + "loss": 0.4089, + "step": 4755 + }, + { + "epoch": 0.22, + "learning_rate": 1.941153418086624e-05, + "loss": 0.1315, + "step": 4760 + }, + { + "epoch": 0.22, + "learning_rate": 1.9410750395811456e-05, + "loss": 0.1435, + "step": 4765 + }, + { + "epoch": 0.22, + "learning_rate": 1.9409966610756667e-05, + "loss": 0.2198, + "step": 4770 + }, + { + "epoch": 0.22, + "learning_rate": 1.940918282570188e-05, + "loss": 0.2413, + "step": 4775 + }, + { + "epoch": 0.22, + "learning_rate": 1.9408399040647095e-05, + "loss": 0.3171, + "step": 4780 + }, + { + "epoch": 0.22, + "learning_rate": 1.940761525559231e-05, + "loss": 0.2903, + "step": 4785 + }, + { + "epoch": 0.22, + "learning_rate": 1.9406831470537522e-05, + "loss": 0.3473, + "step": 4790 + }, + { + "epoch": 0.22, + "learning_rate": 1.9406047685482733e-05, + "loss": 0.5104, + "step": 4795 + }, + { + "epoch": 0.22, + "learning_rate": 1.940526390042795e-05, + "loss": 0.7117, + "step": 4800 + }, + { + "epoch": 0.22, + "learning_rate": 1.940448011537316e-05, + "loss": 0.3581, + "step": 4805 + }, + { + "epoch": 0.22, + "learning_rate": 1.9403696330318375e-05, + "loss": 0.2153, + "step": 4810 + }, + { + "epoch": 0.22, + "learning_rate": 1.940291254526359e-05, + "loss": 0.2117, + "step": 4815 + }, + { + "epoch": 0.22, + "learning_rate": 1.9402128760208802e-05, + "loss": 0.2479, + "step": 4820 + }, + { + "epoch": 0.23, + "learning_rate": 1.9401344975154016e-05, + "loss": 0.3125, + "step": 4825 + }, + { + "epoch": 0.23, + "learning_rate": 1.940056119009923e-05, + "loss": 0.3053, + "step": 4830 + }, + { + "epoch": 0.23, + "learning_rate": 1.939977740504444e-05, + "loss": 0.3783, + "step": 4835 + }, + { + "epoch": 0.23, + "learning_rate": 1.9398993619989658e-05, + "loss": 0.4291, + "step": 4840 + }, + { + "epoch": 0.23, + "learning_rate": 1.939820983493487e-05, + "loss": 0.3458, + "step": 4845 + }, + { + "epoch": 0.23, + "learning_rate": 1.9397426049880082e-05, + "loss": 0.7172, + "step": 4850 + }, + { + "epoch": 0.23, + "learning_rate": 1.9396642264825296e-05, + "loss": 0.4026, + "step": 4855 + }, + { + "epoch": 0.23, + "learning_rate": 1.9395858479770507e-05, + "loss": 0.1863, + "step": 4860 + }, + { + "epoch": 0.23, + "learning_rate": 1.9395074694715724e-05, + "loss": 0.208, + "step": 4865 + }, + { + "epoch": 0.23, + "learning_rate": 1.9394290909660935e-05, + "loss": 0.2116, + "step": 4870 + }, + { + "epoch": 0.23, + "learning_rate": 1.939350712460615e-05, + "loss": 0.2817, + "step": 4875 + }, + { + "epoch": 0.23, + "learning_rate": 1.9392723339551362e-05, + "loss": 0.3429, + "step": 4880 + }, + { + "epoch": 0.23, + "learning_rate": 1.9391939554496576e-05, + "loss": 0.4071, + "step": 4885 + }, + { + "epoch": 0.23, + "learning_rate": 1.939115576944179e-05, + "loss": 0.4191, + "step": 4890 + }, + { + "epoch": 0.23, + "learning_rate": 1.9390371984387004e-05, + "loss": 0.5109, + "step": 4895 + }, + { + "epoch": 0.23, + "learning_rate": 1.9389588199332218e-05, + "loss": 0.7804, + "step": 4900 + }, + { + "epoch": 0.23, + "learning_rate": 1.9388804414277432e-05, + "loss": 0.3884, + "step": 4905 + }, + { + "epoch": 0.23, + "learning_rate": 1.9388020629222642e-05, + "loss": 0.1351, + "step": 4910 + }, + { + "epoch": 0.23, + "learning_rate": 1.9387236844167856e-05, + "loss": 0.2175, + "step": 4915 + }, + { + "epoch": 0.23, + "learning_rate": 1.938645305911307e-05, + "loss": 0.237, + "step": 4920 + }, + { + "epoch": 0.23, + "learning_rate": 1.9385669274058284e-05, + "loss": 0.3365, + "step": 4925 + }, + { + "epoch": 0.23, + "learning_rate": 1.9384885489003498e-05, + "loss": 0.3038, + "step": 4930 + }, + { + "epoch": 0.23, + "learning_rate": 1.938410170394871e-05, + "loss": 0.2486, + "step": 4935 + }, + { + "epoch": 0.23, + "learning_rate": 1.9383317918893926e-05, + "loss": 0.3903, + "step": 4940 + }, + { + "epoch": 0.23, + "learning_rate": 1.9382534133839136e-05, + "loss": 0.5263, + "step": 4945 + }, + { + "epoch": 0.23, + "learning_rate": 1.938175034878435e-05, + "loss": 0.6892, + "step": 4950 + }, + { + "epoch": 0.23, + "learning_rate": 1.9380966563729564e-05, + "loss": 0.439, + "step": 4955 + }, + { + "epoch": 0.23, + "learning_rate": 1.9380182778674778e-05, + "loss": 0.1741, + "step": 4960 + }, + { + "epoch": 0.23, + "learning_rate": 1.9379398993619992e-05, + "loss": 0.199, + "step": 4965 + }, + { + "epoch": 0.23, + "learning_rate": 1.9378615208565206e-05, + "loss": 0.246, + "step": 4970 + }, + { + "epoch": 0.23, + "learning_rate": 1.9377831423510416e-05, + "loss": 0.2149, + "step": 4975 + }, + { + "epoch": 0.23, + "learning_rate": 1.937704763845563e-05, + "loss": 0.3709, + "step": 4980 + }, + { + "epoch": 0.23, + "learning_rate": 1.9376263853400844e-05, + "loss": 0.3476, + "step": 4985 + }, + { + "epoch": 0.23, + "learning_rate": 1.9375480068346058e-05, + "loss": 0.41, + "step": 4990 + }, + { + "epoch": 0.23, + "learning_rate": 1.9374696283291272e-05, + "loss": 0.4483, + "step": 4995 + }, + { + "epoch": 0.23, + "learning_rate": 1.9373912498236486e-05, + "loss": 0.6013, + "step": 5000 + }, + { + "epoch": 0.23, + "learning_rate": 1.93731287131817e-05, + "loss": 0.3716, + "step": 5005 + }, + { + "epoch": 0.23, + "learning_rate": 1.937234492812691e-05, + "loss": 0.1351, + "step": 5010 + }, + { + "epoch": 0.23, + "learning_rate": 1.9371561143072128e-05, + "loss": 0.1643, + "step": 5015 + }, + { + "epoch": 0.23, + "learning_rate": 1.9370777358017338e-05, + "loss": 0.2067, + "step": 5020 + }, + { + "epoch": 0.23, + "learning_rate": 1.9369993572962552e-05, + "loss": 0.2506, + "step": 5025 + }, + { + "epoch": 0.23, + "learning_rate": 1.9369209787907766e-05, + "loss": 0.2104, + "step": 5030 + }, + { + "epoch": 0.23, + "learning_rate": 1.936842600285298e-05, + "loss": 0.2822, + "step": 5035 + }, + { + "epoch": 0.24, + "learning_rate": 1.9367642217798194e-05, + "loss": 0.3395, + "step": 5040 + }, + { + "epoch": 0.24, + "learning_rate": 1.9366858432743404e-05, + "loss": 0.5625, + "step": 5045 + }, + { + "epoch": 0.24, + "learning_rate": 1.936607464768862e-05, + "loss": 0.8041, + "step": 5050 + }, + { + "epoch": 0.24, + "learning_rate": 1.9365290862633832e-05, + "loss": 0.2843, + "step": 5055 + }, + { + "epoch": 0.24, + "learning_rate": 1.9364507077579046e-05, + "loss": 0.2228, + "step": 5060 + }, + { + "epoch": 0.24, + "learning_rate": 1.936372329252426e-05, + "loss": 0.2473, + "step": 5065 + }, + { + "epoch": 0.24, + "learning_rate": 1.9362939507469474e-05, + "loss": 0.2517, + "step": 5070 + }, + { + "epoch": 0.24, + "learning_rate": 1.9362155722414684e-05, + "loss": 0.6024, + "step": 5075 + }, + { + "epoch": 0.24, + "learning_rate": 1.9361371937359902e-05, + "loss": 0.4021, + "step": 5080 + }, + { + "epoch": 0.24, + "learning_rate": 1.9360588152305112e-05, + "loss": 0.4412, + "step": 5085 + }, + { + "epoch": 0.24, + "learning_rate": 1.9359804367250326e-05, + "loss": 0.4314, + "step": 5090 + }, + { + "epoch": 0.24, + "learning_rate": 1.935902058219554e-05, + "loss": 0.4803, + "step": 5095 + }, + { + "epoch": 0.24, + "learning_rate": 1.9358236797140754e-05, + "loss": 0.4986, + "step": 5100 + }, + { + "epoch": 0.24, + "learning_rate": 1.9357453012085968e-05, + "loss": 0.3644, + "step": 5105 + }, + { + "epoch": 0.24, + "learning_rate": 1.935666922703118e-05, + "loss": 0.2482, + "step": 5110 + }, + { + "epoch": 0.24, + "learning_rate": 1.9355885441976396e-05, + "loss": 0.1968, + "step": 5115 + }, + { + "epoch": 0.24, + "learning_rate": 1.9355101656921606e-05, + "loss": 0.2267, + "step": 5120 + }, + { + "epoch": 0.24, + "learning_rate": 1.935431787186682e-05, + "loss": 0.2209, + "step": 5125 + }, + { + "epoch": 0.24, + "learning_rate": 1.9353534086812034e-05, + "loss": 0.3899, + "step": 5130 + }, + { + "epoch": 0.24, + "learning_rate": 1.9352750301757248e-05, + "loss": 0.2757, + "step": 5135 + }, + { + "epoch": 0.24, + "learning_rate": 1.9351966516702462e-05, + "loss": 0.3713, + "step": 5140 + }, + { + "epoch": 0.24, + "learning_rate": 1.9351182731647676e-05, + "loss": 0.4712, + "step": 5145 + }, + { + "epoch": 0.24, + "learning_rate": 1.9350398946592886e-05, + "loss": 0.5116, + "step": 5150 + }, + { + "epoch": 0.24, + "learning_rate": 1.9349615161538104e-05, + "loss": 0.3821, + "step": 5155 + }, + { + "epoch": 0.24, + "learning_rate": 1.9348831376483314e-05, + "loss": 0.1595, + "step": 5160 + }, + { + "epoch": 0.24, + "learning_rate": 1.9348047591428528e-05, + "loss": 0.1296, + "step": 5165 + }, + { + "epoch": 0.24, + "learning_rate": 1.9347263806373742e-05, + "loss": 0.2284, + "step": 5170 + }, + { + "epoch": 0.24, + "learning_rate": 1.9346480021318952e-05, + "loss": 0.281, + "step": 5175 + }, + { + "epoch": 0.24, + "learning_rate": 1.934569623626417e-05, + "loss": 0.17, + "step": 5180 + }, + { + "epoch": 0.24, + "learning_rate": 1.934491245120938e-05, + "loss": 0.3331, + "step": 5185 + }, + { + "epoch": 0.24, + "learning_rate": 1.9344128666154594e-05, + "loss": 0.4022, + "step": 5190 + }, + { + "epoch": 0.24, + "learning_rate": 1.9343344881099808e-05, + "loss": 0.495, + "step": 5195 + }, + { + "epoch": 0.24, + "learning_rate": 1.9342561096045022e-05, + "loss": 0.8846, + "step": 5200 + }, + { + "epoch": 0.24, + "learning_rate": 1.9341777310990236e-05, + "loss": 0.3249, + "step": 5205 + }, + { + "epoch": 0.24, + "learning_rate": 1.934099352593545e-05, + "loss": 0.1457, + "step": 5210 + }, + { + "epoch": 0.24, + "learning_rate": 1.9340209740880664e-05, + "loss": 0.205, + "step": 5215 + }, + { + "epoch": 0.24, + "learning_rate": 1.9339425955825878e-05, + "loss": 0.2736, + "step": 5220 + }, + { + "epoch": 0.24, + "learning_rate": 1.9338642170771088e-05, + "loss": 0.2574, + "step": 5225 + }, + { + "epoch": 0.24, + "learning_rate": 1.9337858385716305e-05, + "loss": 0.2396, + "step": 5230 + }, + { + "epoch": 0.24, + "learning_rate": 1.9337074600661516e-05, + "loss": 0.3956, + "step": 5235 + }, + { + "epoch": 0.24, + "learning_rate": 1.933629081560673e-05, + "loss": 0.3899, + "step": 5240 + }, + { + "epoch": 0.24, + "learning_rate": 1.9335507030551944e-05, + "loss": 0.4603, + "step": 5245 + }, + { + "epoch": 0.24, + "learning_rate": 1.9334723245497154e-05, + "loss": 0.8332, + "step": 5250 + }, + { + "epoch": 0.25, + "learning_rate": 1.933393946044237e-05, + "loss": 0.3782, + "step": 5255 + }, + { + "epoch": 0.25, + "learning_rate": 1.9333155675387582e-05, + "loss": 0.1268, + "step": 5260 + }, + { + "epoch": 0.25, + "learning_rate": 1.9332371890332796e-05, + "loss": 0.2608, + "step": 5265 + }, + { + "epoch": 0.25, + "learning_rate": 1.933158810527801e-05, + "loss": 0.1392, + "step": 5270 + }, + { + "epoch": 0.25, + "learning_rate": 1.9330804320223224e-05, + "loss": 0.2242, + "step": 5275 + }, + { + "epoch": 0.25, + "learning_rate": 1.9330020535168438e-05, + "loss": 0.1908, + "step": 5280 + }, + { + "epoch": 0.25, + "learning_rate": 1.932923675011365e-05, + "loss": 0.2051, + "step": 5285 + }, + { + "epoch": 0.25, + "learning_rate": 1.9328452965058862e-05, + "loss": 0.5424, + "step": 5290 + }, + { + "epoch": 0.25, + "learning_rate": 1.932766918000408e-05, + "loss": 0.5415, + "step": 5295 + }, + { + "epoch": 0.25, + "learning_rate": 1.932688539494929e-05, + "loss": 0.6368, + "step": 5300 + }, + { + "epoch": 0.25, + "learning_rate": 1.9326101609894504e-05, + "loss": 0.4718, + "step": 5305 + }, + { + "epoch": 0.25, + "learning_rate": 1.9325317824839718e-05, + "loss": 0.1058, + "step": 5310 + }, + { + "epoch": 0.25, + "learning_rate": 1.932453403978493e-05, + "loss": 0.1814, + "step": 5315 + }, + { + "epoch": 0.25, + "learning_rate": 1.9323750254730146e-05, + "loss": 0.219, + "step": 5320 + }, + { + "epoch": 0.25, + "learning_rate": 1.9322966469675356e-05, + "loss": 0.2229, + "step": 5325 + }, + { + "epoch": 0.25, + "learning_rate": 1.9322182684620573e-05, + "loss": 0.3236, + "step": 5330 + }, + { + "epoch": 0.25, + "learning_rate": 1.9321398899565784e-05, + "loss": 0.2804, + "step": 5335 + }, + { + "epoch": 0.25, + "learning_rate": 1.9320615114510998e-05, + "loss": 0.3324, + "step": 5340 + }, + { + "epoch": 0.25, + "learning_rate": 1.931983132945621e-05, + "loss": 0.5195, + "step": 5345 + }, + { + "epoch": 0.25, + "learning_rate": 1.9319047544401426e-05, + "loss": 0.8297, + "step": 5350 + }, + { + "epoch": 0.25, + "learning_rate": 1.931826375934664e-05, + "loss": 0.3, + "step": 5355 + }, + { + "epoch": 0.25, + "learning_rate": 1.9317479974291853e-05, + "loss": 0.2239, + "step": 5360 + }, + { + "epoch": 0.25, + "learning_rate": 1.9316696189237064e-05, + "loss": 0.2179, + "step": 5365 + }, + { + "epoch": 0.25, + "learning_rate": 1.9315912404182278e-05, + "loss": 0.2458, + "step": 5370 + }, + { + "epoch": 0.25, + "learning_rate": 1.931512861912749e-05, + "loss": 0.2145, + "step": 5375 + }, + { + "epoch": 0.25, + "learning_rate": 1.9314344834072706e-05, + "loss": 0.1894, + "step": 5380 + }, + { + "epoch": 0.25, + "learning_rate": 1.931356104901792e-05, + "loss": 0.3247, + "step": 5385 + }, + { + "epoch": 0.25, + "learning_rate": 1.931277726396313e-05, + "loss": 0.4402, + "step": 5390 + }, + { + "epoch": 0.25, + "learning_rate": 1.9311993478908347e-05, + "loss": 0.5536, + "step": 5395 + }, + { + "epoch": 0.25, + "learning_rate": 1.9311209693853558e-05, + "loss": 0.7553, + "step": 5400 + }, + { + "epoch": 0.25, + "learning_rate": 1.9310425908798772e-05, + "loss": 0.345, + "step": 5405 + }, + { + "epoch": 0.25, + "learning_rate": 1.9309642123743986e-05, + "loss": 0.1617, + "step": 5410 + }, + { + "epoch": 0.25, + "learning_rate": 1.93088583386892e-05, + "loss": 0.2743, + "step": 5415 + }, + { + "epoch": 0.25, + "learning_rate": 1.9308074553634413e-05, + "loss": 0.2523, + "step": 5420 + }, + { + "epoch": 0.25, + "learning_rate": 1.9307290768579627e-05, + "loss": 0.2766, + "step": 5425 + }, + { + "epoch": 0.25, + "learning_rate": 1.930650698352484e-05, + "loss": 0.2555, + "step": 5430 + }, + { + "epoch": 0.25, + "learning_rate": 1.9305723198470052e-05, + "loss": 0.3894, + "step": 5435 + }, + { + "epoch": 0.25, + "learning_rate": 1.9304939413415266e-05, + "loss": 0.3815, + "step": 5440 + }, + { + "epoch": 0.25, + "learning_rate": 1.930415562836048e-05, + "loss": 0.5165, + "step": 5445 + }, + { + "epoch": 0.25, + "learning_rate": 1.9303371843305694e-05, + "loss": 0.8409, + "step": 5450 + }, + { + "epoch": 0.25, + "learning_rate": 1.9302588058250907e-05, + "loss": 0.3595, + "step": 5455 + }, + { + "epoch": 0.25, + "learning_rate": 1.930180427319612e-05, + "loss": 0.1876, + "step": 5460 + }, + { + "epoch": 0.26, + "learning_rate": 1.9301020488141332e-05, + "loss": 0.2553, + "step": 5465 + }, + { + "epoch": 0.26, + "learning_rate": 1.930023670308655e-05, + "loss": 0.3259, + "step": 5470 + }, + { + "epoch": 0.26, + "learning_rate": 1.929945291803176e-05, + "loss": 0.2538, + "step": 5475 + }, + { + "epoch": 0.26, + "learning_rate": 1.9298669132976974e-05, + "loss": 0.3327, + "step": 5480 + }, + { + "epoch": 0.26, + "learning_rate": 1.9297885347922187e-05, + "loss": 0.3046, + "step": 5485 + }, + { + "epoch": 0.26, + "learning_rate": 1.92971015628674e-05, + "loss": 0.4529, + "step": 5490 + }, + { + "epoch": 0.26, + "learning_rate": 1.9296317777812615e-05, + "loss": 0.4448, + "step": 5495 + }, + { + "epoch": 0.26, + "learning_rate": 1.9295533992757826e-05, + "loss": 0.6134, + "step": 5500 + }, + { + "epoch": 0.26, + "learning_rate": 1.929475020770304e-05, + "loss": 0.4037, + "step": 5505 + }, + { + "epoch": 0.26, + "learning_rate": 1.9293966422648254e-05, + "loss": 0.1626, + "step": 5510 + }, + { + "epoch": 0.26, + "learning_rate": 1.9293182637593467e-05, + "loss": 0.1955, + "step": 5515 + }, + { + "epoch": 0.26, + "learning_rate": 1.929239885253868e-05, + "loss": 0.244, + "step": 5520 + }, + { + "epoch": 0.26, + "learning_rate": 1.9291615067483895e-05, + "loss": 0.1823, + "step": 5525 + }, + { + "epoch": 0.26, + "learning_rate": 1.929083128242911e-05, + "loss": 0.3409, + "step": 5530 + }, + { + "epoch": 0.26, + "learning_rate": 1.9290047497374323e-05, + "loss": 0.3905, + "step": 5535 + }, + { + "epoch": 0.26, + "learning_rate": 1.9289263712319534e-05, + "loss": 0.4031, + "step": 5540 + }, + { + "epoch": 0.26, + "learning_rate": 1.928847992726475e-05, + "loss": 0.5521, + "step": 5545 + }, + { + "epoch": 0.26, + "learning_rate": 1.928769614220996e-05, + "loss": 0.6648, + "step": 5550 + }, + { + "epoch": 0.26, + "learning_rate": 1.9286912357155175e-05, + "loss": 0.2675, + "step": 5555 + }, + { + "epoch": 0.26, + "learning_rate": 1.928612857210039e-05, + "loss": 0.1622, + "step": 5560 + }, + { + "epoch": 0.26, + "learning_rate": 1.92853447870456e-05, + "loss": 0.2045, + "step": 5565 + }, + { + "epoch": 0.26, + "learning_rate": 1.9284561001990817e-05, + "loss": 0.2512, + "step": 5570 + }, + { + "epoch": 0.26, + "learning_rate": 1.9283777216936028e-05, + "loss": 0.2425, + "step": 5575 + }, + { + "epoch": 0.26, + "learning_rate": 1.928299343188124e-05, + "loss": 0.3474, + "step": 5580 + }, + { + "epoch": 0.26, + "learning_rate": 1.9282209646826455e-05, + "loss": 0.4243, + "step": 5585 + }, + { + "epoch": 0.26, + "learning_rate": 1.928142586177167e-05, + "loss": 0.4205, + "step": 5590 + }, + { + "epoch": 0.26, + "learning_rate": 1.9280642076716883e-05, + "loss": 0.4256, + "step": 5595 + }, + { + "epoch": 0.26, + "learning_rate": 1.9279858291662097e-05, + "loss": 0.74, + "step": 5600 + }, + { + "epoch": 0.26, + "learning_rate": 1.9279074506607308e-05, + "loss": 0.3487, + "step": 5605 + }, + { + "epoch": 0.26, + "learning_rate": 1.9278290721552525e-05, + "loss": 0.1986, + "step": 5610 + }, + { + "epoch": 0.26, + "learning_rate": 1.9277506936497735e-05, + "loss": 0.279, + "step": 5615 + }, + { + "epoch": 0.26, + "learning_rate": 1.927672315144295e-05, + "loss": 0.2964, + "step": 5620 + }, + { + "epoch": 0.26, + "learning_rate": 1.9275939366388163e-05, + "loss": 0.255, + "step": 5625 + }, + { + "epoch": 0.26, + "learning_rate": 1.9275155581333377e-05, + "loss": 0.3853, + "step": 5630 + }, + { + "epoch": 0.26, + "learning_rate": 1.927437179627859e-05, + "loss": 0.3548, + "step": 5635 + }, + { + "epoch": 0.26, + "learning_rate": 1.92735880112238e-05, + "loss": 0.3402, + "step": 5640 + }, + { + "epoch": 0.26, + "learning_rate": 1.927280422616902e-05, + "loss": 0.375, + "step": 5645 + }, + { + "epoch": 0.26, + "learning_rate": 1.927202044111423e-05, + "loss": 0.8663, + "step": 5650 + }, + { + "epoch": 0.26, + "learning_rate": 1.9271236656059443e-05, + "loss": 0.3404, + "step": 5655 + }, + { + "epoch": 0.26, + "learning_rate": 1.9270452871004657e-05, + "loss": 0.1193, + "step": 5660 + }, + { + "epoch": 0.26, + "learning_rate": 1.926966908594987e-05, + "loss": 0.2268, + "step": 5665 + }, + { + "epoch": 0.26, + "learning_rate": 1.9268885300895085e-05, + "loss": 0.3271, + "step": 5670 + }, + { + "epoch": 0.26, + "learning_rate": 1.92681015158403e-05, + "loss": 0.2239, + "step": 5675 + }, + { + "epoch": 0.27, + "learning_rate": 1.926731773078551e-05, + "loss": 0.363, + "step": 5680 + }, + { + "epoch": 0.27, + "learning_rate": 1.9266690702741683e-05, + "loss": 0.3885, + "step": 5685 + }, + { + "epoch": 0.27, + "learning_rate": 1.9265906917686894e-05, + "loss": 0.5457, + "step": 5690 + }, + { + "epoch": 0.27, + "learning_rate": 1.9265123132632108e-05, + "loss": 0.532, + "step": 5695 + }, + { + "epoch": 0.27, + "learning_rate": 1.926433934757732e-05, + "loss": 0.8407, + "step": 5700 + }, + { + "epoch": 0.27, + "learning_rate": 1.9263555562522536e-05, + "loss": 0.214, + "step": 5705 + }, + { + "epoch": 0.27, + "learning_rate": 1.926277177746775e-05, + "loss": 0.1542, + "step": 5710 + }, + { + "epoch": 0.27, + "learning_rate": 1.9261987992412963e-05, + "loss": 0.2797, + "step": 5715 + }, + { + "epoch": 0.27, + "learning_rate": 1.9261204207358174e-05, + "loss": 0.1692, + "step": 5720 + }, + { + "epoch": 0.27, + "learning_rate": 1.926042042230339e-05, + "loss": 0.3042, + "step": 5725 + }, + { + "epoch": 0.27, + "learning_rate": 1.92596366372486e-05, + "loss": 0.3473, + "step": 5730 + }, + { + "epoch": 0.27, + "learning_rate": 1.9258852852193816e-05, + "loss": 0.3281, + "step": 5735 + }, + { + "epoch": 0.27, + "learning_rate": 1.925806906713903e-05, + "loss": 0.3659, + "step": 5740 + }, + { + "epoch": 0.27, + "learning_rate": 1.9257285282084243e-05, + "loss": 0.4402, + "step": 5745 + }, + { + "epoch": 0.27, + "learning_rate": 1.9256501497029457e-05, + "loss": 0.7063, + "step": 5750 + }, + { + "epoch": 0.27, + "learning_rate": 1.9255717711974668e-05, + "loss": 0.3744, + "step": 5755 + }, + { + "epoch": 0.27, + "learning_rate": 1.9254933926919882e-05, + "loss": 0.1654, + "step": 5760 + }, + { + "epoch": 0.27, + "learning_rate": 1.9254150141865096e-05, + "loss": 0.1909, + "step": 5765 + }, + { + "epoch": 0.27, + "learning_rate": 1.925336635681031e-05, + "loss": 0.2026, + "step": 5770 + }, + { + "epoch": 0.27, + "learning_rate": 1.9252582571755523e-05, + "loss": 0.246, + "step": 5775 + }, + { + "epoch": 0.27, + "learning_rate": 1.9251798786700737e-05, + "loss": 0.2819, + "step": 5780 + }, + { + "epoch": 0.27, + "learning_rate": 1.925101500164595e-05, + "loss": 0.3336, + "step": 5785 + }, + { + "epoch": 0.27, + "learning_rate": 1.9250231216591165e-05, + "loss": 0.4281, + "step": 5790 + }, + { + "epoch": 0.27, + "learning_rate": 1.9249447431536376e-05, + "loss": 0.4699, + "step": 5795 + }, + { + "epoch": 0.27, + "learning_rate": 1.924866364648159e-05, + "loss": 0.7916, + "step": 5800 + }, + { + "epoch": 0.27, + "learning_rate": 1.9247879861426803e-05, + "loss": 0.3297, + "step": 5805 + }, + { + "epoch": 0.27, + "learning_rate": 1.9247096076372017e-05, + "loss": 0.1378, + "step": 5810 + }, + { + "epoch": 0.27, + "learning_rate": 1.924631229131723e-05, + "loss": 0.155, + "step": 5815 + }, + { + "epoch": 0.27, + "learning_rate": 1.9245528506262442e-05, + "loss": 0.2305, + "step": 5820 + }, + { + "epoch": 0.27, + "learning_rate": 1.924474472120766e-05, + "loss": 0.247, + "step": 5825 + }, + { + "epoch": 0.27, + "learning_rate": 1.924396093615287e-05, + "loss": 0.2662, + "step": 5830 + }, + { + "epoch": 0.27, + "learning_rate": 1.9243177151098084e-05, + "loss": 0.335, + "step": 5835 + }, + { + "epoch": 0.27, + "learning_rate": 1.9242393366043297e-05, + "loss": 0.3854, + "step": 5840 + }, + { + "epoch": 0.27, + "learning_rate": 1.924160958098851e-05, + "loss": 0.4929, + "step": 5845 + }, + { + "epoch": 0.27, + "learning_rate": 1.9240825795933725e-05, + "loss": 0.6641, + "step": 5850 + }, + { + "epoch": 0.27, + "learning_rate": 1.924004201087894e-05, + "loss": 0.3136, + "step": 5855 + }, + { + "epoch": 0.27, + "learning_rate": 1.923925822582415e-05, + "loss": 0.1783, + "step": 5860 + }, + { + "epoch": 0.27, + "learning_rate": 1.9238474440769367e-05, + "loss": 0.2408, + "step": 5865 + }, + { + "epoch": 0.27, + "learning_rate": 1.9237690655714577e-05, + "loss": 0.2645, + "step": 5870 + }, + { + "epoch": 0.27, + "learning_rate": 1.923690687065979e-05, + "loss": 0.2702, + "step": 5875 + }, + { + "epoch": 0.27, + "learning_rate": 1.9236123085605005e-05, + "loss": 0.2408, + "step": 5880 + }, + { + "epoch": 0.27, + "learning_rate": 1.923533930055022e-05, + "loss": 0.3547, + "step": 5885 + }, + { + "epoch": 0.27, + "learning_rate": 1.9234555515495433e-05, + "loss": 0.3429, + "step": 5890 + }, + { + "epoch": 0.28, + "learning_rate": 1.9233771730440644e-05, + "loss": 0.5108, + "step": 5895 + }, + { + "epoch": 0.28, + "learning_rate": 1.923298794538586e-05, + "loss": 0.9399, + "step": 5900 + }, + { + "epoch": 0.28, + "learning_rate": 1.923220416033107e-05, + "loss": 0.3939, + "step": 5905 + }, + { + "epoch": 0.28, + "learning_rate": 1.9231420375276285e-05, + "loss": 0.1552, + "step": 5910 + }, + { + "epoch": 0.28, + "learning_rate": 1.92306365902215e-05, + "loss": 0.2075, + "step": 5915 + }, + { + "epoch": 0.28, + "learning_rate": 1.9229852805166713e-05, + "loss": 0.2066, + "step": 5920 + }, + { + "epoch": 0.28, + "learning_rate": 1.9229069020111927e-05, + "loss": 0.3028, + "step": 5925 + }, + { + "epoch": 0.28, + "learning_rate": 1.922828523505714e-05, + "loss": 0.24, + "step": 5930 + }, + { + "epoch": 0.28, + "learning_rate": 1.922750145000235e-05, + "loss": 0.2975, + "step": 5935 + }, + { + "epoch": 0.28, + "learning_rate": 1.922671766494757e-05, + "loss": 0.3763, + "step": 5940 + }, + { + "epoch": 0.28, + "learning_rate": 1.922593387989278e-05, + "loss": 0.4842, + "step": 5945 + }, + { + "epoch": 0.28, + "learning_rate": 1.9225150094837993e-05, + "loss": 0.4962, + "step": 5950 + }, + { + "epoch": 0.28, + "learning_rate": 1.9224366309783207e-05, + "loss": 0.3506, + "step": 5955 + }, + { + "epoch": 0.28, + "learning_rate": 1.9223582524728418e-05, + "loss": 0.1437, + "step": 5960 + }, + { + "epoch": 0.28, + "learning_rate": 1.9222798739673635e-05, + "loss": 0.1921, + "step": 5965 + }, + { + "epoch": 0.28, + "learning_rate": 1.9222014954618845e-05, + "loss": 0.211, + "step": 5970 + }, + { + "epoch": 0.28, + "learning_rate": 1.922123116956406e-05, + "loss": 0.1829, + "step": 5975 + }, + { + "epoch": 0.28, + "learning_rate": 1.9220447384509273e-05, + "loss": 0.3135, + "step": 5980 + }, + { + "epoch": 0.28, + "learning_rate": 1.9219663599454487e-05, + "loss": 0.3635, + "step": 5985 + }, + { + "epoch": 0.28, + "learning_rate": 1.92188798143997e-05, + "loss": 0.3972, + "step": 5990 + }, + { + "epoch": 0.28, + "learning_rate": 1.9218096029344915e-05, + "loss": 0.4304, + "step": 5995 + }, + { + "epoch": 0.28, + "learning_rate": 1.921731224429013e-05, + "loss": 0.5761, + "step": 6000 + }, + { + "epoch": 0.28, + "learning_rate": 1.9216528459235343e-05, + "loss": 0.3202, + "step": 6005 + }, + { + "epoch": 0.28, + "learning_rate": 1.9215744674180553e-05, + "loss": 0.132, + "step": 6010 + }, + { + "epoch": 0.28, + "learning_rate": 1.9214960889125767e-05, + "loss": 0.248, + "step": 6015 + }, + { + "epoch": 0.28, + "learning_rate": 1.921417710407098e-05, + "loss": 0.2491, + "step": 6020 + }, + { + "epoch": 0.28, + "learning_rate": 1.9213393319016195e-05, + "loss": 0.1984, + "step": 6025 + }, + { + "epoch": 0.28, + "learning_rate": 1.921260953396141e-05, + "loss": 0.4062, + "step": 6030 + }, + { + "epoch": 0.28, + "learning_rate": 1.921182574890662e-05, + "loss": 0.381, + "step": 6035 + }, + { + "epoch": 0.28, + "learning_rate": 1.9211041963851837e-05, + "loss": 0.3894, + "step": 6040 + }, + { + "epoch": 0.28, + "learning_rate": 1.9210258178797047e-05, + "loss": 0.5295, + "step": 6045 + }, + { + "epoch": 0.28, + "learning_rate": 1.920947439374226e-05, + "loss": 0.5582, + "step": 6050 + }, + { + "epoch": 0.28, + "learning_rate": 1.9208690608687475e-05, + "loss": 0.3742, + "step": 6055 + }, + { + "epoch": 0.28, + "learning_rate": 1.920790682363269e-05, + "loss": 0.1369, + "step": 6060 + }, + { + "epoch": 0.28, + "learning_rate": 1.9207123038577903e-05, + "loss": 0.191, + "step": 6065 + }, + { + "epoch": 0.28, + "learning_rate": 1.9206339253523117e-05, + "loss": 0.2741, + "step": 6070 + }, + { + "epoch": 0.28, + "learning_rate": 1.9205555468468327e-05, + "loss": 0.2237, + "step": 6075 + }, + { + "epoch": 0.28, + "learning_rate": 1.920477168341354e-05, + "loss": 0.2747, + "step": 6080 + }, + { + "epoch": 0.28, + "learning_rate": 1.9203987898358755e-05, + "loss": 0.3203, + "step": 6085 + }, + { + "epoch": 0.28, + "learning_rate": 1.920320411330397e-05, + "loss": 0.3738, + "step": 6090 + }, + { + "epoch": 0.28, + "learning_rate": 1.9202420328249183e-05, + "loss": 0.4449, + "step": 6095 + }, + { + "epoch": 0.28, + "learning_rate": 1.9201636543194397e-05, + "loss": 0.5899, + "step": 6100 + }, + { + "epoch": 0.28, + "learning_rate": 1.920085275813961e-05, + "loss": 0.2703, + "step": 6105 + }, + { + "epoch": 0.29, + "learning_rate": 1.920006897308482e-05, + "loss": 0.2, + "step": 6110 + }, + { + "epoch": 0.29, + "learning_rate": 1.9199285188030035e-05, + "loss": 0.1694, + "step": 6115 + }, + { + "epoch": 0.29, + "learning_rate": 1.919850140297525e-05, + "loss": 0.211, + "step": 6120 + }, + { + "epoch": 0.29, + "learning_rate": 1.9197717617920463e-05, + "loss": 0.2194, + "step": 6125 + }, + { + "epoch": 0.29, + "learning_rate": 1.9196933832865677e-05, + "loss": 0.2725, + "step": 6130 + }, + { + "epoch": 0.29, + "learning_rate": 1.919615004781089e-05, + "loss": 0.3563, + "step": 6135 + }, + { + "epoch": 0.29, + "learning_rate": 1.9195366262756105e-05, + "loss": 0.3059, + "step": 6140 + }, + { + "epoch": 0.29, + "learning_rate": 1.9194582477701315e-05, + "loss": 0.4064, + "step": 6145 + }, + { + "epoch": 0.29, + "learning_rate": 1.919379869264653e-05, + "loss": 0.6607, + "step": 6150 + }, + { + "epoch": 0.29, + "learning_rate": 1.9193014907591743e-05, + "loss": 0.3117, + "step": 6155 + }, + { + "epoch": 0.29, + "learning_rate": 1.9192231122536957e-05, + "loss": 0.1045, + "step": 6160 + }, + { + "epoch": 0.29, + "learning_rate": 1.919144733748217e-05, + "loss": 0.1874, + "step": 6165 + }, + { + "epoch": 0.29, + "learning_rate": 1.9190663552427385e-05, + "loss": 0.3021, + "step": 6170 + }, + { + "epoch": 0.29, + "learning_rate": 1.9189879767372595e-05, + "loss": 0.211, + "step": 6175 + }, + { + "epoch": 0.29, + "learning_rate": 1.9189095982317813e-05, + "loss": 0.265, + "step": 6180 + }, + { + "epoch": 0.29, + "learning_rate": 1.9188312197263023e-05, + "loss": 0.2312, + "step": 6185 + }, + { + "epoch": 0.29, + "learning_rate": 1.9187528412208237e-05, + "loss": 0.3051, + "step": 6190 + }, + { + "epoch": 0.29, + "learning_rate": 1.918674462715345e-05, + "loss": 0.5762, + "step": 6195 + }, + { + "epoch": 0.29, + "learning_rate": 1.9185960842098665e-05, + "loss": 0.6396, + "step": 6200 + }, + { + "epoch": 0.29, + "learning_rate": 1.918517705704388e-05, + "loss": 0.3087, + "step": 6205 + }, + { + "epoch": 0.29, + "learning_rate": 1.918439327198909e-05, + "loss": 0.1117, + "step": 6210 + }, + { + "epoch": 0.29, + "learning_rate": 1.9183609486934306e-05, + "loss": 0.1206, + "step": 6215 + }, + { + "epoch": 0.29, + "learning_rate": 1.9182825701879517e-05, + "loss": 0.177, + "step": 6220 + }, + { + "epoch": 0.29, + "learning_rate": 1.918204191682473e-05, + "loss": 0.2447, + "step": 6225 + }, + { + "epoch": 0.29, + "learning_rate": 1.9181258131769945e-05, + "loss": 0.2349, + "step": 6230 + }, + { + "epoch": 0.29, + "learning_rate": 1.918047434671516e-05, + "loss": 0.2984, + "step": 6235 + }, + { + "epoch": 0.29, + "learning_rate": 1.9179690561660373e-05, + "loss": 0.2954, + "step": 6240 + }, + { + "epoch": 0.29, + "learning_rate": 1.9178906776605587e-05, + "loss": 0.3903, + "step": 6245 + }, + { + "epoch": 0.29, + "learning_rate": 1.9178122991550797e-05, + "loss": 0.6694, + "step": 6250 + }, + { + "epoch": 0.29, + "learning_rate": 1.9177339206496014e-05, + "loss": 0.2947, + "step": 6255 + }, + { + "epoch": 0.29, + "learning_rate": 1.9176555421441225e-05, + "loss": 0.1219, + "step": 6260 + }, + { + "epoch": 0.29, + "learning_rate": 1.917577163638644e-05, + "loss": 0.2013, + "step": 6265 + }, + { + "epoch": 0.29, + "learning_rate": 1.9174987851331653e-05, + "loss": 0.1924, + "step": 6270 + }, + { + "epoch": 0.29, + "learning_rate": 1.9174204066276863e-05, + "loss": 0.29, + "step": 6275 + }, + { + "epoch": 0.29, + "learning_rate": 1.917342028122208e-05, + "loss": 0.3052, + "step": 6280 + }, + { + "epoch": 0.29, + "learning_rate": 1.917263649616729e-05, + "loss": 0.3302, + "step": 6285 + }, + { + "epoch": 0.29, + "learning_rate": 1.9171852711112505e-05, + "loss": 0.4615, + "step": 6290 + }, + { + "epoch": 0.29, + "learning_rate": 1.917106892605772e-05, + "loss": 0.4105, + "step": 6295 + }, + { + "epoch": 0.29, + "learning_rate": 1.9170285141002933e-05, + "loss": 0.6715, + "step": 6300 + }, + { + "epoch": 0.29, + "learning_rate": 1.9169501355948147e-05, + "loss": 0.3003, + "step": 6305 + }, + { + "epoch": 0.29, + "learning_rate": 1.916871757089336e-05, + "loss": 0.2266, + "step": 6310 + }, + { + "epoch": 0.29, + "learning_rate": 1.9167933785838574e-05, + "loss": 0.1107, + "step": 6315 + }, + { + "epoch": 0.29, + "learning_rate": 1.916715000078379e-05, + "loss": 0.2179, + "step": 6320 + }, + { + "epoch": 0.3, + "learning_rate": 1.9166366215729e-05, + "loss": 0.1912, + "step": 6325 + }, + { + "epoch": 0.3, + "learning_rate": 1.9165582430674213e-05, + "loss": 0.241, + "step": 6330 + }, + { + "epoch": 0.3, + "learning_rate": 1.9164798645619427e-05, + "loss": 0.336, + "step": 6335 + }, + { + "epoch": 0.3, + "learning_rate": 1.916401486056464e-05, + "loss": 0.3382, + "step": 6340 + }, + { + "epoch": 0.3, + "learning_rate": 1.9163231075509854e-05, + "loss": 0.5897, + "step": 6345 + }, + { + "epoch": 0.3, + "learning_rate": 1.9162447290455065e-05, + "loss": 0.7494, + "step": 6350 + }, + { + "epoch": 0.3, + "learning_rate": 1.9161663505400282e-05, + "loss": 0.3499, + "step": 6355 + }, + { + "epoch": 0.3, + "learning_rate": 1.9160879720345493e-05, + "loss": 0.1544, + "step": 6360 + }, + { + "epoch": 0.3, + "learning_rate": 1.9160095935290707e-05, + "loss": 0.1487, + "step": 6365 + }, + { + "epoch": 0.3, + "learning_rate": 1.915931215023592e-05, + "loss": 0.2265, + "step": 6370 + }, + { + "epoch": 0.3, + "learning_rate": 1.9158528365181135e-05, + "loss": 0.2467, + "step": 6375 + }, + { + "epoch": 0.3, + "learning_rate": 1.915774458012635e-05, + "loss": 0.2705, + "step": 6380 + }, + { + "epoch": 0.3, + "learning_rate": 1.9156960795071562e-05, + "loss": 0.4046, + "step": 6385 + }, + { + "epoch": 0.3, + "learning_rate": 1.9156177010016773e-05, + "loss": 0.4616, + "step": 6390 + }, + { + "epoch": 0.3, + "learning_rate": 1.915539322496199e-05, + "loss": 0.5142, + "step": 6395 + }, + { + "epoch": 0.3, + "learning_rate": 1.91546094399072e-05, + "loss": 0.6709, + "step": 6400 + }, + { + "epoch": 0.3, + "learning_rate": 1.9153825654852415e-05, + "loss": 0.3144, + "step": 6405 + }, + { + "epoch": 0.3, + "learning_rate": 1.915304186979763e-05, + "loss": 0.1743, + "step": 6410 + }, + { + "epoch": 0.3, + "learning_rate": 1.9152258084742842e-05, + "loss": 0.2046, + "step": 6415 + }, + { + "epoch": 0.3, + "learning_rate": 1.9151474299688056e-05, + "loss": 0.1748, + "step": 6420 + }, + { + "epoch": 0.3, + "learning_rate": 1.9150690514633267e-05, + "loss": 0.347, + "step": 6425 + }, + { + "epoch": 0.3, + "learning_rate": 1.9149906729578484e-05, + "loss": 0.4057, + "step": 6430 + }, + { + "epoch": 0.3, + "learning_rate": 1.9149122944523695e-05, + "loss": 0.3375, + "step": 6435 + }, + { + "epoch": 0.3, + "learning_rate": 1.914833915946891e-05, + "loss": 0.3648, + "step": 6440 + }, + { + "epoch": 0.3, + "learning_rate": 1.9147555374414122e-05, + "loss": 0.3829, + "step": 6445 + }, + { + "epoch": 0.3, + "learning_rate": 1.9146771589359336e-05, + "loss": 0.5695, + "step": 6450 + }, + { + "epoch": 0.3, + "learning_rate": 1.914598780430455e-05, + "loss": 0.238, + "step": 6455 + }, + { + "epoch": 0.3, + "learning_rate": 1.9145204019249764e-05, + "loss": 0.1316, + "step": 6460 + }, + { + "epoch": 0.3, + "learning_rate": 1.9144420234194975e-05, + "loss": 0.2054, + "step": 6465 + }, + { + "epoch": 0.3, + "learning_rate": 1.914363644914019e-05, + "loss": 0.2046, + "step": 6470 + }, + { + "epoch": 0.3, + "learning_rate": 1.9142852664085402e-05, + "loss": 0.2385, + "step": 6475 + }, + { + "epoch": 0.3, + "learning_rate": 1.9142068879030616e-05, + "loss": 0.2931, + "step": 6480 + }, + { + "epoch": 0.3, + "learning_rate": 1.914128509397583e-05, + "loss": 0.3218, + "step": 6485 + }, + { + "epoch": 0.3, + "learning_rate": 1.914050130892104e-05, + "loss": 0.3059, + "step": 6490 + }, + { + "epoch": 0.3, + "learning_rate": 1.9139717523866258e-05, + "loss": 0.4008, + "step": 6495 + }, + { + "epoch": 0.3, + "learning_rate": 1.913893373881147e-05, + "loss": 0.6874, + "step": 6500 + }, + { + "epoch": 0.3, + "learning_rate": 1.9138149953756683e-05, + "loss": 0.3523, + "step": 6505 + }, + { + "epoch": 0.3, + "learning_rate": 1.9137366168701896e-05, + "loss": 0.1623, + "step": 6510 + }, + { + "epoch": 0.3, + "learning_rate": 1.913658238364711e-05, + "loss": 0.1548, + "step": 6515 + }, + { + "epoch": 0.3, + "learning_rate": 1.9135798598592324e-05, + "loss": 0.2571, + "step": 6520 + }, + { + "epoch": 0.3, + "learning_rate": 1.9135014813537538e-05, + "loss": 0.2677, + "step": 6525 + }, + { + "epoch": 0.3, + "learning_rate": 1.9134231028482752e-05, + "loss": 0.2672, + "step": 6530 + }, + { + "epoch": 0.3, + "learning_rate": 1.9133447243427963e-05, + "loss": 0.1512, + "step": 6535 + }, + { + "epoch": 0.31, + "learning_rate": 1.9132663458373176e-05, + "loss": 0.434, + "step": 6540 + }, + { + "epoch": 0.31, + "learning_rate": 1.913187967331839e-05, + "loss": 0.4751, + "step": 6545 + }, + { + "epoch": 0.31, + "learning_rate": 1.9131095888263604e-05, + "loss": 0.9035, + "step": 6550 + }, + { + "epoch": 0.31, + "learning_rate": 1.9130312103208818e-05, + "loss": 0.2479, + "step": 6555 + }, + { + "epoch": 0.31, + "learning_rate": 1.9129528318154032e-05, + "loss": 0.1588, + "step": 6560 + }, + { + "epoch": 0.31, + "learning_rate": 1.9128744533099243e-05, + "loss": 0.1603, + "step": 6565 + }, + { + "epoch": 0.31, + "learning_rate": 1.912796074804446e-05, + "loss": 0.2272, + "step": 6570 + }, + { + "epoch": 0.31, + "learning_rate": 1.912717696298967e-05, + "loss": 0.2211, + "step": 6575 + }, + { + "epoch": 0.31, + "learning_rate": 1.9126393177934884e-05, + "loss": 0.2354, + "step": 6580 + }, + { + "epoch": 0.31, + "learning_rate": 1.9125609392880098e-05, + "loss": 0.3462, + "step": 6585 + }, + { + "epoch": 0.31, + "learning_rate": 1.9124825607825312e-05, + "loss": 0.3478, + "step": 6590 + }, + { + "epoch": 0.31, + "learning_rate": 1.9124041822770526e-05, + "loss": 0.434, + "step": 6595 + }, + { + "epoch": 0.31, + "learning_rate": 1.9123258037715737e-05, + "loss": 0.6833, + "step": 6600 + }, + { + "epoch": 0.31, + "learning_rate": 1.912247425266095e-05, + "loss": 0.3255, + "step": 6605 + }, + { + "epoch": 0.31, + "learning_rate": 1.9121690467606164e-05, + "loss": 0.2105, + "step": 6610 + }, + { + "epoch": 0.31, + "learning_rate": 1.9120906682551378e-05, + "loss": 0.248, + "step": 6615 + }, + { + "epoch": 0.31, + "learning_rate": 1.9120122897496592e-05, + "loss": 0.2495, + "step": 6620 + }, + { + "epoch": 0.31, + "learning_rate": 1.9119339112441806e-05, + "loss": 0.2071, + "step": 6625 + }, + { + "epoch": 0.31, + "learning_rate": 1.911855532738702e-05, + "loss": 0.204, + "step": 6630 + }, + { + "epoch": 0.31, + "learning_rate": 1.9117771542332234e-05, + "loss": 0.4248, + "step": 6635 + }, + { + "epoch": 0.31, + "learning_rate": 1.9116987757277444e-05, + "loss": 0.3261, + "step": 6640 + }, + { + "epoch": 0.31, + "learning_rate": 1.911620397222266e-05, + "loss": 0.425, + "step": 6645 + }, + { + "epoch": 0.31, + "learning_rate": 1.9115420187167872e-05, + "loss": 0.658, + "step": 6650 + }, + { + "epoch": 0.31, + "learning_rate": 1.9114636402113086e-05, + "loss": 0.2923, + "step": 6655 + }, + { + "epoch": 0.31, + "learning_rate": 1.91138526170583e-05, + "loss": 0.1128, + "step": 6660 + }, + { + "epoch": 0.31, + "learning_rate": 1.911306883200351e-05, + "loss": 0.1456, + "step": 6665 + }, + { + "epoch": 0.31, + "learning_rate": 1.9112285046948728e-05, + "loss": 0.2254, + "step": 6670 + }, + { + "epoch": 0.31, + "learning_rate": 1.911150126189394e-05, + "loss": 0.2541, + "step": 6675 + }, + { + "epoch": 0.31, + "learning_rate": 1.9110717476839152e-05, + "loss": 0.3436, + "step": 6680 + }, + { + "epoch": 0.31, + "learning_rate": 1.9109933691784366e-05, + "loss": 0.3193, + "step": 6685 + }, + { + "epoch": 0.31, + "learning_rate": 1.910914990672958e-05, + "loss": 0.309, + "step": 6690 + }, + { + "epoch": 0.31, + "learning_rate": 1.9108366121674794e-05, + "loss": 0.504, + "step": 6695 + }, + { + "epoch": 0.31, + "learning_rate": 1.9107582336620008e-05, + "loss": 0.6894, + "step": 6700 + }, + { + "epoch": 0.31, + "learning_rate": 1.910679855156522e-05, + "loss": 0.3584, + "step": 6705 + }, + { + "epoch": 0.31, + "learning_rate": 1.9106014766510436e-05, + "loss": 0.1103, + "step": 6710 + }, + { + "epoch": 0.31, + "learning_rate": 1.9105230981455646e-05, + "loss": 0.2487, + "step": 6715 + }, + { + "epoch": 0.31, + "learning_rate": 1.910444719640086e-05, + "loss": 0.1941, + "step": 6720 + }, + { + "epoch": 0.31, + "learning_rate": 1.9103663411346074e-05, + "loss": 0.2426, + "step": 6725 + }, + { + "epoch": 0.31, + "learning_rate": 1.9102879626291288e-05, + "loss": 0.2712, + "step": 6730 + }, + { + "epoch": 0.31, + "learning_rate": 1.9102095841236502e-05, + "loss": 0.3572, + "step": 6735 + }, + { + "epoch": 0.31, + "learning_rate": 1.9101312056181712e-05, + "loss": 0.4285, + "step": 6740 + }, + { + "epoch": 0.31, + "learning_rate": 1.910052827112693e-05, + "loss": 0.5303, + "step": 6745 + }, + { + "epoch": 0.31, + "learning_rate": 1.909974448607214e-05, + "loss": 0.4896, + "step": 6750 + }, + { + "epoch": 0.32, + "learning_rate": 1.9098960701017354e-05, + "loss": 0.2817, + "step": 6755 + }, + { + "epoch": 0.32, + "learning_rate": 1.9098176915962568e-05, + "loss": 0.1629, + "step": 6760 + }, + { + "epoch": 0.32, + "learning_rate": 1.9097393130907782e-05, + "loss": 0.2024, + "step": 6765 + }, + { + "epoch": 0.32, + "learning_rate": 1.9096609345852996e-05, + "loss": 0.2163, + "step": 6770 + }, + { + "epoch": 0.32, + "learning_rate": 1.909582556079821e-05, + "loss": 0.2022, + "step": 6775 + }, + { + "epoch": 0.32, + "learning_rate": 1.909504177574342e-05, + "loss": 0.3167, + "step": 6780 + }, + { + "epoch": 0.32, + "learning_rate": 1.9094257990688638e-05, + "loss": 0.4345, + "step": 6785 + }, + { + "epoch": 0.32, + "learning_rate": 1.9093474205633848e-05, + "loss": 0.3893, + "step": 6790 + }, + { + "epoch": 0.32, + "learning_rate": 1.9092690420579062e-05, + "loss": 0.3411, + "step": 6795 + }, + { + "epoch": 0.32, + "learning_rate": 1.9091906635524276e-05, + "loss": 0.6765, + "step": 6800 + }, + { + "epoch": 0.32, + "learning_rate": 1.9091122850469486e-05, + "loss": 0.3742, + "step": 6805 + }, + { + "epoch": 0.32, + "learning_rate": 1.9090339065414704e-05, + "loss": 0.1481, + "step": 6810 + }, + { + "epoch": 0.32, + "learning_rate": 1.9089555280359914e-05, + "loss": 0.1375, + "step": 6815 + }, + { + "epoch": 0.32, + "learning_rate": 1.9088771495305128e-05, + "loss": 0.2551, + "step": 6820 + }, + { + "epoch": 0.32, + "learning_rate": 1.9087987710250342e-05, + "loss": 0.2335, + "step": 6825 + }, + { + "epoch": 0.32, + "learning_rate": 1.9087203925195556e-05, + "loss": 0.2022, + "step": 6830 + }, + { + "epoch": 0.32, + "learning_rate": 1.908642014014077e-05, + "loss": 0.272, + "step": 6835 + }, + { + "epoch": 0.32, + "learning_rate": 1.9085636355085984e-05, + "loss": 0.3548, + "step": 6840 + }, + { + "epoch": 0.32, + "learning_rate": 1.9084852570031198e-05, + "loss": 0.4034, + "step": 6845 + }, + { + "epoch": 0.32, + "learning_rate": 1.908406878497641e-05, + "loss": 0.7186, + "step": 6850 + }, + { + "epoch": 0.32, + "learning_rate": 1.9083284999921622e-05, + "loss": 0.3299, + "step": 6855 + }, + { + "epoch": 0.32, + "learning_rate": 1.9082501214866836e-05, + "loss": 0.1502, + "step": 6860 + }, + { + "epoch": 0.32, + "learning_rate": 1.908171742981205e-05, + "loss": 0.1961, + "step": 6865 + }, + { + "epoch": 0.32, + "learning_rate": 1.9080933644757264e-05, + "loss": 0.1746, + "step": 6870 + }, + { + "epoch": 0.32, + "learning_rate": 1.9080149859702478e-05, + "loss": 0.2177, + "step": 6875 + }, + { + "epoch": 0.32, + "learning_rate": 1.9079366074647688e-05, + "loss": 0.2479, + "step": 6880 + }, + { + "epoch": 0.32, + "learning_rate": 1.9078582289592905e-05, + "loss": 0.3497, + "step": 6885 + }, + { + "epoch": 0.32, + "learning_rate": 1.9077798504538116e-05, + "loss": 0.2857, + "step": 6890 + }, + { + "epoch": 0.32, + "learning_rate": 1.907701471948333e-05, + "loss": 0.4186, + "step": 6895 + }, + { + "epoch": 0.32, + "learning_rate": 1.9076230934428544e-05, + "loss": 0.5697, + "step": 6900 + }, + { + "epoch": 0.32, + "learning_rate": 1.9075447149373758e-05, + "loss": 0.2705, + "step": 6905 + }, + { + "epoch": 0.32, + "learning_rate": 1.907466336431897e-05, + "loss": 0.1694, + "step": 6910 + }, + { + "epoch": 0.32, + "learning_rate": 1.9073879579264186e-05, + "loss": 0.2024, + "step": 6915 + }, + { + "epoch": 0.32, + "learning_rate": 1.9073095794209396e-05, + "loss": 0.1911, + "step": 6920 + }, + { + "epoch": 0.32, + "learning_rate": 1.907231200915461e-05, + "loss": 0.324, + "step": 6925 + }, + { + "epoch": 0.32, + "learning_rate": 1.9071528224099824e-05, + "loss": 0.2166, + "step": 6930 + }, + { + "epoch": 0.32, + "learning_rate": 1.9070744439045038e-05, + "loss": 0.2728, + "step": 6935 + }, + { + "epoch": 0.32, + "learning_rate": 1.906996065399025e-05, + "loss": 0.4889, + "step": 6940 + }, + { + "epoch": 0.32, + "learning_rate": 1.9069176868935466e-05, + "loss": 0.3559, + "step": 6945 + }, + { + "epoch": 0.32, + "learning_rate": 1.906839308388068e-05, + "loss": 0.6589, + "step": 6950 + }, + { + "epoch": 0.32, + "learning_rate": 1.906760929882589e-05, + "loss": 0.3064, + "step": 6955 + }, + { + "epoch": 0.32, + "learning_rate": 1.9066825513771104e-05, + "loss": 0.1575, + "step": 6960 + }, + { + "epoch": 0.32, + "learning_rate": 1.9066041728716318e-05, + "loss": 0.2603, + "step": 6965 + }, + { + "epoch": 0.33, + "learning_rate": 1.9065257943661532e-05, + "loss": 0.2049, + "step": 6970 + }, + { + "epoch": 0.33, + "learning_rate": 1.9064474158606746e-05, + "loss": 0.1928, + "step": 6975 + }, + { + "epoch": 0.33, + "learning_rate": 1.906369037355196e-05, + "loss": 0.2743, + "step": 6980 + }, + { + "epoch": 0.33, + "learning_rate": 1.9062906588497173e-05, + "loss": 0.3078, + "step": 6985 + }, + { + "epoch": 0.33, + "learning_rate": 1.9062122803442384e-05, + "loss": 0.3558, + "step": 6990 + }, + { + "epoch": 0.33, + "learning_rate": 1.9061339018387598e-05, + "loss": 0.5479, + "step": 6995 + }, + { + "epoch": 0.33, + "learning_rate": 1.9060555233332812e-05, + "loss": 0.7941, + "step": 7000 + }, + { + "epoch": 0.33, + "learning_rate": 1.9059771448278026e-05, + "loss": 0.2665, + "step": 7005 + }, + { + "epoch": 0.33, + "learning_rate": 1.905898766322324e-05, + "loss": 0.1489, + "step": 7010 + }, + { + "epoch": 0.33, + "learning_rate": 1.9058203878168453e-05, + "loss": 0.1762, + "step": 7015 + }, + { + "epoch": 0.33, + "learning_rate": 1.9057420093113664e-05, + "loss": 0.2242, + "step": 7020 + }, + { + "epoch": 0.33, + "learning_rate": 1.905663630805888e-05, + "loss": 0.2587, + "step": 7025 + }, + { + "epoch": 0.33, + "learning_rate": 1.9055852523004092e-05, + "loss": 0.2101, + "step": 7030 + }, + { + "epoch": 0.33, + "learning_rate": 1.9055068737949306e-05, + "loss": 0.2533, + "step": 7035 + }, + { + "epoch": 0.33, + "learning_rate": 1.905428495289452e-05, + "loss": 0.4377, + "step": 7040 + }, + { + "epoch": 0.33, + "learning_rate": 1.9053501167839734e-05, + "loss": 0.441, + "step": 7045 + }, + { + "epoch": 0.33, + "learning_rate": 1.9052717382784947e-05, + "loss": 0.7094, + "step": 7050 + }, + { + "epoch": 0.33, + "learning_rate": 1.9051933597730158e-05, + "loss": 0.1968, + "step": 7055 + }, + { + "epoch": 0.33, + "learning_rate": 1.9051149812675375e-05, + "loss": 0.1635, + "step": 7060 + }, + { + "epoch": 0.33, + "learning_rate": 1.9050366027620586e-05, + "loss": 0.1534, + "step": 7065 + }, + { + "epoch": 0.33, + "learning_rate": 1.90495822425658e-05, + "loss": 0.1294, + "step": 7070 + }, + { + "epoch": 0.33, + "learning_rate": 1.9048798457511014e-05, + "loss": 0.1829, + "step": 7075 + }, + { + "epoch": 0.33, + "learning_rate": 1.9048014672456227e-05, + "loss": 0.172, + "step": 7080 + }, + { + "epoch": 0.33, + "learning_rate": 1.904723088740144e-05, + "loss": 0.2696, + "step": 7085 + }, + { + "epoch": 0.33, + "learning_rate": 1.9046447102346655e-05, + "loss": 0.3744, + "step": 7090 + }, + { + "epoch": 0.33, + "learning_rate": 1.9045663317291866e-05, + "loss": 0.4525, + "step": 7095 + }, + { + "epoch": 0.33, + "learning_rate": 1.9044879532237083e-05, + "loss": 0.5999, + "step": 7100 + }, + { + "epoch": 0.33, + "learning_rate": 1.9044095747182294e-05, + "loss": 0.2546, + "step": 7105 + }, + { + "epoch": 0.33, + "learning_rate": 1.9043311962127508e-05, + "loss": 0.1782, + "step": 7110 + }, + { + "epoch": 0.33, + "learning_rate": 1.904252817707272e-05, + "loss": 0.217, + "step": 7115 + }, + { + "epoch": 0.33, + "learning_rate": 1.9041744392017932e-05, + "loss": 0.1918, + "step": 7120 + }, + { + "epoch": 0.33, + "learning_rate": 1.904096060696315e-05, + "loss": 0.2431, + "step": 7125 + }, + { + "epoch": 0.33, + "learning_rate": 1.904017682190836e-05, + "loss": 0.3174, + "step": 7130 + }, + { + "epoch": 0.33, + "learning_rate": 1.9039393036853574e-05, + "loss": 0.3594, + "step": 7135 + }, + { + "epoch": 0.33, + "learning_rate": 1.9038609251798788e-05, + "loss": 0.3965, + "step": 7140 + }, + { + "epoch": 0.33, + "learning_rate": 1.9037825466744e-05, + "loss": 0.2913, + "step": 7145 + }, + { + "epoch": 0.33, + "learning_rate": 1.9037041681689215e-05, + "loss": 0.7954, + "step": 7150 + }, + { + "epoch": 0.33, + "learning_rate": 1.903625789663443e-05, + "loss": 0.3234, + "step": 7155 + }, + { + "epoch": 0.33, + "learning_rate": 1.9035474111579643e-05, + "loss": 0.1657, + "step": 7160 + }, + { + "epoch": 0.33, + "learning_rate": 1.9034690326524857e-05, + "loss": 0.0944, + "step": 7165 + }, + { + "epoch": 0.33, + "learning_rate": 1.9033906541470068e-05, + "loss": 0.2358, + "step": 7170 + }, + { + "epoch": 0.33, + "learning_rate": 1.903312275641528e-05, + "loss": 0.2233, + "step": 7175 + }, + { + "epoch": 0.34, + "learning_rate": 1.9032338971360495e-05, + "loss": 0.2742, + "step": 7180 + }, + { + "epoch": 0.34, + "learning_rate": 1.903155518630571e-05, + "loss": 0.3522, + "step": 7185 + }, + { + "epoch": 0.34, + "learning_rate": 1.9030771401250923e-05, + "loss": 0.2871, + "step": 7190 + }, + { + "epoch": 0.34, + "learning_rate": 1.9029987616196134e-05, + "loss": 0.5827, + "step": 7195 + }, + { + "epoch": 0.34, + "learning_rate": 1.902920383114135e-05, + "loss": 0.9008, + "step": 7200 + }, + { + "epoch": 0.34, + "learning_rate": 1.902842004608656e-05, + "loss": 0.3112, + "step": 7205 + }, + { + "epoch": 0.34, + "learning_rate": 1.9027636261031775e-05, + "loss": 0.1972, + "step": 7210 + }, + { + "epoch": 0.34, + "learning_rate": 1.902685247597699e-05, + "loss": 0.2397, + "step": 7215 + }, + { + "epoch": 0.34, + "learning_rate": 1.9026068690922203e-05, + "loss": 0.1644, + "step": 7220 + }, + { + "epoch": 0.34, + "learning_rate": 1.9025284905867417e-05, + "loss": 0.329, + "step": 7225 + }, + { + "epoch": 0.34, + "learning_rate": 1.902450112081263e-05, + "loss": 0.2891, + "step": 7230 + }, + { + "epoch": 0.34, + "learning_rate": 1.902371733575784e-05, + "loss": 0.3564, + "step": 7235 + }, + { + "epoch": 0.34, + "learning_rate": 1.902293355070306e-05, + "loss": 0.3063, + "step": 7240 + }, + { + "epoch": 0.34, + "learning_rate": 1.902214976564827e-05, + "loss": 0.481, + "step": 7245 + }, + { + "epoch": 0.34, + "learning_rate": 1.9021365980593483e-05, + "loss": 0.7006, + "step": 7250 + }, + { + "epoch": 0.34, + "learning_rate": 1.9020582195538697e-05, + "loss": 0.3454, + "step": 7255 + }, + { + "epoch": 0.34, + "learning_rate": 1.901979841048391e-05, + "loss": 0.0799, + "step": 7260 + }, + { + "epoch": 0.34, + "learning_rate": 1.9019014625429125e-05, + "loss": 0.1361, + "step": 7265 + }, + { + "epoch": 0.34, + "learning_rate": 1.9018230840374336e-05, + "loss": 0.2282, + "step": 7270 + }, + { + "epoch": 0.34, + "learning_rate": 1.901744705531955e-05, + "loss": 0.2861, + "step": 7275 + }, + { + "epoch": 0.34, + "learning_rate": 1.9016663270264763e-05, + "loss": 0.3123, + "step": 7280 + }, + { + "epoch": 0.34, + "learning_rate": 1.9015879485209977e-05, + "loss": 0.2939, + "step": 7285 + }, + { + "epoch": 0.34, + "learning_rate": 1.901509570015519e-05, + "loss": 0.368, + "step": 7290 + }, + { + "epoch": 0.34, + "learning_rate": 1.9014311915100405e-05, + "loss": 0.2656, + "step": 7295 + }, + { + "epoch": 0.34, + "learning_rate": 1.901352813004562e-05, + "loss": 0.7157, + "step": 7300 + }, + { + "epoch": 0.34, + "learning_rate": 1.9012744344990833e-05, + "loss": 0.297, + "step": 7305 + }, + { + "epoch": 0.34, + "learning_rate": 1.9011960559936043e-05, + "loss": 0.1168, + "step": 7310 + }, + { + "epoch": 0.34, + "learning_rate": 1.9011176774881257e-05, + "loss": 0.1664, + "step": 7315 + }, + { + "epoch": 0.34, + "learning_rate": 1.901039298982647e-05, + "loss": 0.1445, + "step": 7320 + }, + { + "epoch": 0.34, + "learning_rate": 1.9009609204771685e-05, + "loss": 0.2953, + "step": 7325 + }, + { + "epoch": 0.34, + "learning_rate": 1.90088254197169e-05, + "loss": 0.1705, + "step": 7330 + }, + { + "epoch": 0.34, + "learning_rate": 1.900804163466211e-05, + "loss": 0.3413, + "step": 7335 + }, + { + "epoch": 0.34, + "learning_rate": 1.9007257849607327e-05, + "loss": 0.3309, + "step": 7340 + }, + { + "epoch": 0.34, + "learning_rate": 1.9006474064552537e-05, + "loss": 0.4903, + "step": 7345 + }, + { + "epoch": 0.34, + "learning_rate": 1.900569027949775e-05, + "loss": 0.6247, + "step": 7350 + }, + { + "epoch": 0.34, + "learning_rate": 1.9004906494442965e-05, + "loss": 0.299, + "step": 7355 + }, + { + "epoch": 0.34, + "learning_rate": 1.900412270938818e-05, + "loss": 0.1186, + "step": 7360 + }, + { + "epoch": 0.34, + "learning_rate": 1.9003338924333393e-05, + "loss": 0.151, + "step": 7365 + }, + { + "epoch": 0.34, + "learning_rate": 1.9002555139278607e-05, + "loss": 0.1634, + "step": 7370 + }, + { + "epoch": 0.34, + "learning_rate": 1.900177135422382e-05, + "loss": 0.2192, + "step": 7375 + }, + { + "epoch": 0.34, + "learning_rate": 1.900098756916903e-05, + "loss": 0.3089, + "step": 7380 + }, + { + "epoch": 0.34, + "learning_rate": 1.9000203784114245e-05, + "loss": 0.2922, + "step": 7385 + }, + { + "epoch": 0.34, + "learning_rate": 1.899941999905946e-05, + "loss": 0.2575, + "step": 7390 + }, + { + "epoch": 0.35, + "learning_rate": 1.8998636214004673e-05, + "loss": 0.5155, + "step": 7395 + }, + { + "epoch": 0.35, + "learning_rate": 1.8997852428949887e-05, + "loss": 0.8029, + "step": 7400 + }, + { + "epoch": 0.35, + "learning_rate": 1.89970686438951e-05, + "loss": 0.3019, + "step": 7405 + }, + { + "epoch": 0.35, + "learning_rate": 1.899628485884031e-05, + "loss": 0.1694, + "step": 7410 + }, + { + "epoch": 0.35, + "learning_rate": 1.899550107378553e-05, + "loss": 0.1644, + "step": 7415 + }, + { + "epoch": 0.35, + "learning_rate": 1.899471728873074e-05, + "loss": 0.2562, + "step": 7420 + }, + { + "epoch": 0.35, + "learning_rate": 1.8993933503675953e-05, + "loss": 0.2024, + "step": 7425 + }, + { + "epoch": 0.35, + "learning_rate": 1.8993149718621167e-05, + "loss": 0.2765, + "step": 7430 + }, + { + "epoch": 0.35, + "learning_rate": 1.899236593356638e-05, + "loss": 0.3152, + "step": 7435 + }, + { + "epoch": 0.35, + "learning_rate": 1.8991582148511595e-05, + "loss": 0.7121, + "step": 7440 + }, + { + "epoch": 0.35, + "learning_rate": 1.8990798363456805e-05, + "loss": 0.4815, + "step": 7445 + }, + { + "epoch": 0.35, + "learning_rate": 1.899001457840202e-05, + "loss": 0.596, + "step": 7450 + }, + { + "epoch": 0.35, + "learning_rate": 1.8989230793347233e-05, + "loss": 0.2899, + "step": 7455 + }, + { + "epoch": 0.35, + "learning_rate": 1.8988447008292447e-05, + "loss": 0.1684, + "step": 7460 + }, + { + "epoch": 0.35, + "learning_rate": 1.898766322323766e-05, + "loss": 0.1769, + "step": 7465 + }, + { + "epoch": 0.35, + "learning_rate": 1.8986879438182875e-05, + "loss": 0.1686, + "step": 7470 + }, + { + "epoch": 0.35, + "learning_rate": 1.898609565312809e-05, + "loss": 0.1521, + "step": 7475 + }, + { + "epoch": 0.35, + "learning_rate": 1.8985311868073303e-05, + "loss": 0.213, + "step": 7480 + }, + { + "epoch": 0.35, + "learning_rate": 1.8984528083018513e-05, + "loss": 0.3328, + "step": 7485 + }, + { + "epoch": 0.35, + "learning_rate": 1.8983744297963727e-05, + "loss": 0.3086, + "step": 7490 + }, + { + "epoch": 0.35, + "learning_rate": 1.898296051290894e-05, + "loss": 0.3979, + "step": 7495 + }, + { + "epoch": 0.35, + "learning_rate": 1.8982176727854155e-05, + "loss": 0.9146, + "step": 7500 + }, + { + "epoch": 0.35, + "learning_rate": 1.898139294279937e-05, + "loss": 0.2771, + "step": 7505 + }, + { + "epoch": 0.35, + "learning_rate": 1.898060915774458e-05, + "loss": 0.1257, + "step": 7510 + }, + { + "epoch": 0.35, + "learning_rate": 1.8979825372689797e-05, + "loss": 0.2241, + "step": 7515 + }, + { + "epoch": 0.35, + "learning_rate": 1.8979041587635007e-05, + "loss": 0.2393, + "step": 7520 + }, + { + "epoch": 0.35, + "learning_rate": 1.897825780258022e-05, + "loss": 0.172, + "step": 7525 + }, + { + "epoch": 0.35, + "learning_rate": 1.8977474017525435e-05, + "loss": 0.2781, + "step": 7530 + }, + { + "epoch": 0.35, + "learning_rate": 1.897669023247065e-05, + "loss": 0.3206, + "step": 7535 + }, + { + "epoch": 0.35, + "learning_rate": 1.8975906447415863e-05, + "loss": 0.3699, + "step": 7540 + }, + { + "epoch": 0.35, + "learning_rate": 1.8975122662361077e-05, + "loss": 0.3469, + "step": 7545 + }, + { + "epoch": 0.35, + "learning_rate": 1.8974338877306287e-05, + "loss": 0.8029, + "step": 7550 + }, + { + "epoch": 0.35, + "learning_rate": 1.8973555092251504e-05, + "loss": 0.312, + "step": 7555 + }, + { + "epoch": 0.35, + "learning_rate": 1.8972771307196715e-05, + "loss": 0.1367, + "step": 7560 + }, + { + "epoch": 0.35, + "learning_rate": 1.897198752214193e-05, + "loss": 0.0884, + "step": 7565 + }, + { + "epoch": 0.35, + "learning_rate": 1.8971203737087143e-05, + "loss": 0.2232, + "step": 7570 + }, + { + "epoch": 0.35, + "learning_rate": 1.8970419952032357e-05, + "loss": 0.1861, + "step": 7575 + }, + { + "epoch": 0.35, + "learning_rate": 1.896963616697757e-05, + "loss": 0.2897, + "step": 7580 + }, + { + "epoch": 0.35, + "learning_rate": 1.896885238192278e-05, + "loss": 0.3206, + "step": 7585 + }, + { + "epoch": 0.35, + "learning_rate": 1.8968068596868e-05, + "loss": 0.3773, + "step": 7590 + }, + { + "epoch": 0.35, + "learning_rate": 1.896728481181321e-05, + "loss": 0.3115, + "step": 7595 + }, + { + "epoch": 0.35, + "learning_rate": 1.8966501026758423e-05, + "loss": 0.9366, + "step": 7600 + }, + { + "epoch": 0.35, + "learning_rate": 1.8965717241703637e-05, + "loss": 0.2707, + "step": 7605 + }, + { + "epoch": 0.36, + "learning_rate": 1.896493345664885e-05, + "loss": 0.1204, + "step": 7610 + }, + { + "epoch": 0.36, + "learning_rate": 1.8964149671594065e-05, + "loss": 0.1409, + "step": 7615 + }, + { + "epoch": 0.36, + "learning_rate": 1.896336588653928e-05, + "loss": 0.1973, + "step": 7620 + }, + { + "epoch": 0.36, + "learning_rate": 1.896258210148449e-05, + "loss": 0.1588, + "step": 7625 + }, + { + "epoch": 0.36, + "learning_rate": 1.8961798316429706e-05, + "loss": 0.3012, + "step": 7630 + }, + { + "epoch": 0.36, + "learning_rate": 1.8961014531374917e-05, + "loss": 0.3534, + "step": 7635 + }, + { + "epoch": 0.36, + "learning_rate": 1.896023074632013e-05, + "loss": 0.4233, + "step": 7640 + }, + { + "epoch": 0.36, + "learning_rate": 1.8959446961265345e-05, + "loss": 0.3975, + "step": 7645 + }, + { + "epoch": 0.36, + "learning_rate": 1.8958663176210555e-05, + "loss": 0.4849, + "step": 7650 + }, + { + "epoch": 0.36, + "learning_rate": 1.8957879391155772e-05, + "loss": 0.3376, + "step": 7655 + }, + { + "epoch": 0.36, + "learning_rate": 1.8957095606100983e-05, + "loss": 0.1438, + "step": 7660 + }, + { + "epoch": 0.36, + "learning_rate": 1.8956311821046197e-05, + "loss": 0.1841, + "step": 7665 + }, + { + "epoch": 0.36, + "learning_rate": 1.895552803599141e-05, + "loss": 0.1959, + "step": 7670 + }, + { + "epoch": 0.36, + "learning_rate": 1.8954744250936625e-05, + "loss": 0.2594, + "step": 7675 + }, + { + "epoch": 0.36, + "learning_rate": 1.895396046588184e-05, + "loss": 0.3133, + "step": 7680 + }, + { + "epoch": 0.36, + "learning_rate": 1.8953176680827052e-05, + "loss": 0.3055, + "step": 7685 + }, + { + "epoch": 0.36, + "learning_rate": 1.8952392895772266e-05, + "loss": 0.3877, + "step": 7690 + }, + { + "epoch": 0.36, + "learning_rate": 1.895160911071748e-05, + "loss": 0.3368, + "step": 7695 + }, + { + "epoch": 0.36, + "learning_rate": 1.895082532566269e-05, + "loss": 0.6805, + "step": 7700 + }, + { + "epoch": 0.36, + "learning_rate": 1.8950041540607905e-05, + "loss": 0.3995, + "step": 7705 + }, + { + "epoch": 0.36, + "learning_rate": 1.894925775555312e-05, + "loss": 0.1304, + "step": 7710 + }, + { + "epoch": 0.36, + "learning_rate": 1.8948473970498333e-05, + "loss": 0.1476, + "step": 7715 + }, + { + "epoch": 0.36, + "learning_rate": 1.8947690185443546e-05, + "loss": 0.1871, + "step": 7720 + }, + { + "epoch": 0.36, + "learning_rate": 1.8946906400388757e-05, + "loss": 0.2658, + "step": 7725 + }, + { + "epoch": 0.36, + "learning_rate": 1.8946122615333974e-05, + "loss": 0.2439, + "step": 7730 + }, + { + "epoch": 0.36, + "learning_rate": 1.8945338830279185e-05, + "loss": 0.3401, + "step": 7735 + }, + { + "epoch": 0.36, + "learning_rate": 1.89445550452244e-05, + "loss": 0.2806, + "step": 7740 + }, + { + "epoch": 0.36, + "learning_rate": 1.8943771260169613e-05, + "loss": 0.4399, + "step": 7745 + }, + { + "epoch": 0.36, + "learning_rate": 1.8942987475114826e-05, + "loss": 0.6512, + "step": 7750 + }, + { + "epoch": 0.36, + "learning_rate": 1.894220369006004e-05, + "loss": 0.2376, + "step": 7755 + }, + { + "epoch": 0.36, + "learning_rate": 1.8941419905005254e-05, + "loss": 0.1043, + "step": 7760 + }, + { + "epoch": 0.36, + "learning_rate": 1.8940636119950465e-05, + "loss": 0.1589, + "step": 7765 + }, + { + "epoch": 0.36, + "learning_rate": 1.893985233489568e-05, + "loss": 0.1373, + "step": 7770 + }, + { + "epoch": 0.36, + "learning_rate": 1.8939068549840893e-05, + "loss": 0.2714, + "step": 7775 + }, + { + "epoch": 0.36, + "learning_rate": 1.8938284764786107e-05, + "loss": 0.2307, + "step": 7780 + }, + { + "epoch": 0.36, + "learning_rate": 1.893750097973132e-05, + "loss": 0.2299, + "step": 7785 + }, + { + "epoch": 0.36, + "learning_rate": 1.8936717194676534e-05, + "loss": 0.4098, + "step": 7790 + }, + { + "epoch": 0.36, + "learning_rate": 1.8935933409621748e-05, + "loss": 0.4609, + "step": 7795 + }, + { + "epoch": 0.36, + "learning_rate": 1.893514962456696e-05, + "loss": 0.7143, + "step": 7800 + }, + { + "epoch": 0.36, + "learning_rate": 1.8934365839512173e-05, + "loss": 0.3278, + "step": 7805 + }, + { + "epoch": 0.36, + "learning_rate": 1.8933582054457387e-05, + "loss": 0.1103, + "step": 7810 + }, + { + "epoch": 0.36, + "learning_rate": 1.89327982694026e-05, + "loss": 0.1488, + "step": 7815 + }, + { + "epoch": 0.36, + "learning_rate": 1.8932014484347814e-05, + "loss": 0.2829, + "step": 7820 + }, + { + "epoch": 0.37, + "learning_rate": 1.8931230699293028e-05, + "loss": 0.2354, + "step": 7825 + }, + { + "epoch": 0.37, + "learning_rate": 1.8930446914238242e-05, + "loss": 0.3971, + "step": 7830 + }, + { + "epoch": 0.37, + "learning_rate": 1.8929663129183453e-05, + "loss": 0.4175, + "step": 7835 + }, + { + "epoch": 0.37, + "learning_rate": 1.8928879344128667e-05, + "loss": 0.3929, + "step": 7840 + }, + { + "epoch": 0.37, + "learning_rate": 1.892809555907388e-05, + "loss": 0.4019, + "step": 7845 + }, + { + "epoch": 0.37, + "learning_rate": 1.8927311774019094e-05, + "loss": 0.6624, + "step": 7850 + }, + { + "epoch": 0.37, + "learning_rate": 1.892652798896431e-05, + "loss": 0.3053, + "step": 7855 + }, + { + "epoch": 0.37, + "learning_rate": 1.8925744203909522e-05, + "loss": 0.2106, + "step": 7860 + }, + { + "epoch": 0.37, + "learning_rate": 1.8924960418854733e-05, + "loss": 0.172, + "step": 7865 + }, + { + "epoch": 0.37, + "learning_rate": 1.892417663379995e-05, + "loss": 0.1984, + "step": 7870 + }, + { + "epoch": 0.37, + "learning_rate": 1.892339284874516e-05, + "loss": 0.1985, + "step": 7875 + }, + { + "epoch": 0.37, + "learning_rate": 1.8922609063690374e-05, + "loss": 0.4085, + "step": 7880 + }, + { + "epoch": 0.37, + "learning_rate": 1.892182527863559e-05, + "loss": 0.3442, + "step": 7885 + }, + { + "epoch": 0.37, + "learning_rate": 1.8921041493580802e-05, + "loss": 0.2404, + "step": 7890 + }, + { + "epoch": 0.37, + "learning_rate": 1.8920257708526016e-05, + "loss": 0.4951, + "step": 7895 + }, + { + "epoch": 0.37, + "learning_rate": 1.891947392347123e-05, + "loss": 0.6079, + "step": 7900 + }, + { + "epoch": 0.37, + "learning_rate": 1.8918690138416444e-05, + "loss": 0.3329, + "step": 7905 + }, + { + "epoch": 0.37, + "learning_rate": 1.8917906353361655e-05, + "loss": 0.1138, + "step": 7910 + }, + { + "epoch": 0.37, + "learning_rate": 1.891712256830687e-05, + "loss": 0.1449, + "step": 7915 + }, + { + "epoch": 0.37, + "learning_rate": 1.8916338783252082e-05, + "loss": 0.1854, + "step": 7920 + }, + { + "epoch": 0.37, + "learning_rate": 1.8915554998197296e-05, + "loss": 0.1878, + "step": 7925 + }, + { + "epoch": 0.37, + "learning_rate": 1.891477121314251e-05, + "loss": 0.2632, + "step": 7930 + }, + { + "epoch": 0.37, + "learning_rate": 1.8913987428087724e-05, + "loss": 0.1892, + "step": 7935 + }, + { + "epoch": 0.37, + "learning_rate": 1.8913203643032935e-05, + "loss": 0.3837, + "step": 7940 + }, + { + "epoch": 0.37, + "learning_rate": 1.8912419857978152e-05, + "loss": 0.4508, + "step": 7945 + }, + { + "epoch": 0.37, + "learning_rate": 1.8911636072923362e-05, + "loss": 0.5534, + "step": 7950 + }, + { + "epoch": 0.37, + "learning_rate": 1.8910852287868576e-05, + "loss": 0.315, + "step": 7955 + }, + { + "epoch": 0.37, + "learning_rate": 1.891006850281379e-05, + "loss": 0.1236, + "step": 7960 + }, + { + "epoch": 0.37, + "learning_rate": 1.8909284717759004e-05, + "loss": 0.0979, + "step": 7965 + }, + { + "epoch": 0.37, + "learning_rate": 1.8908500932704218e-05, + "loss": 0.1878, + "step": 7970 + }, + { + "epoch": 0.37, + "learning_rate": 1.890771714764943e-05, + "loss": 0.2305, + "step": 7975 + }, + { + "epoch": 0.37, + "learning_rate": 1.8906933362594642e-05, + "loss": 0.2422, + "step": 7980 + }, + { + "epoch": 0.37, + "learning_rate": 1.8906149577539856e-05, + "loss": 0.2615, + "step": 7985 + }, + { + "epoch": 0.37, + "learning_rate": 1.890536579248507e-05, + "loss": 0.3486, + "step": 7990 + }, + { + "epoch": 0.37, + "learning_rate": 1.8904582007430284e-05, + "loss": 0.461, + "step": 7995 + }, + { + "epoch": 0.37, + "learning_rate": 1.8903798222375498e-05, + "loss": 0.6374, + "step": 8000 + }, + { + "epoch": 0.37, + "learning_rate": 1.8903014437320712e-05, + "loss": 0.3201, + "step": 8005 + }, + { + "epoch": 0.37, + "learning_rate": 1.8902230652265926e-05, + "loss": 0.1109, + "step": 8010 + }, + { + "epoch": 0.37, + "learning_rate": 1.8901446867211136e-05, + "loss": 0.1345, + "step": 8015 + }, + { + "epoch": 0.37, + "learning_rate": 1.890066308215635e-05, + "loss": 0.1565, + "step": 8020 + }, + { + "epoch": 0.37, + "learning_rate": 1.8899879297101564e-05, + "loss": 0.7264, + "step": 8025 + }, + { + "epoch": 0.37, + "learning_rate": 1.8899095512046778e-05, + "loss": 0.231, + "step": 8030 + }, + { + "epoch": 0.37, + "learning_rate": 1.8898311726991992e-05, + "loss": 0.2366, + "step": 8035 + }, + { + "epoch": 0.38, + "learning_rate": 1.8897527941937203e-05, + "loss": 0.3771, + "step": 8040 + }, + { + "epoch": 0.38, + "learning_rate": 1.889674415688242e-05, + "loss": 0.4329, + "step": 8045 + }, + { + "epoch": 0.38, + "learning_rate": 1.889596037182763e-05, + "loss": 0.6533, + "step": 8050 + }, + { + "epoch": 0.38, + "learning_rate": 1.8895176586772844e-05, + "loss": 0.338, + "step": 8055 + }, + { + "epoch": 0.38, + "learning_rate": 1.8894392801718058e-05, + "loss": 0.1299, + "step": 8060 + }, + { + "epoch": 0.38, + "learning_rate": 1.8893609016663272e-05, + "loss": 0.1517, + "step": 8065 + }, + { + "epoch": 0.38, + "learning_rate": 1.8892825231608486e-05, + "loss": 0.2633, + "step": 8070 + }, + { + "epoch": 0.38, + "learning_rate": 1.88920414465537e-05, + "loss": 0.2221, + "step": 8075 + }, + { + "epoch": 0.38, + "learning_rate": 1.889125766149891e-05, + "loss": 0.2924, + "step": 8080 + }, + { + "epoch": 0.38, + "learning_rate": 1.8890473876444128e-05, + "loss": 0.2867, + "step": 8085 + }, + { + "epoch": 0.38, + "learning_rate": 1.8889690091389338e-05, + "loss": 0.3109, + "step": 8090 + }, + { + "epoch": 0.38, + "learning_rate": 1.8888906306334552e-05, + "loss": 0.4688, + "step": 8095 + }, + { + "epoch": 0.38, + "learning_rate": 1.8888122521279766e-05, + "loss": 0.7502, + "step": 8100 + }, + { + "epoch": 0.38, + "learning_rate": 1.888733873622498e-05, + "loss": 0.2301, + "step": 8105 + }, + { + "epoch": 0.38, + "learning_rate": 1.8886554951170194e-05, + "loss": 0.1549, + "step": 8110 + }, + { + "epoch": 0.38, + "learning_rate": 1.8885771166115404e-05, + "loss": 0.1893, + "step": 8115 + }, + { + "epoch": 0.38, + "learning_rate": 1.8884987381060618e-05, + "loss": 0.1992, + "step": 8120 + }, + { + "epoch": 0.38, + "learning_rate": 1.8884203596005832e-05, + "loss": 0.1521, + "step": 8125 + }, + { + "epoch": 0.38, + "learning_rate": 1.8883419810951046e-05, + "loss": 0.2274, + "step": 8130 + }, + { + "epoch": 0.38, + "learning_rate": 1.888263602589626e-05, + "loss": 0.2885, + "step": 8135 + }, + { + "epoch": 0.38, + "learning_rate": 1.8881852240841474e-05, + "loss": 0.3451, + "step": 8140 + }, + { + "epoch": 0.38, + "learning_rate": 1.8881068455786688e-05, + "loss": 0.3565, + "step": 8145 + }, + { + "epoch": 0.38, + "learning_rate": 1.88802846707319e-05, + "loss": 0.5021, + "step": 8150 + }, + { + "epoch": 0.38, + "learning_rate": 1.8879500885677112e-05, + "loss": 0.344, + "step": 8155 + }, + { + "epoch": 0.38, + "learning_rate": 1.887871710062233e-05, + "loss": 0.2039, + "step": 8160 + }, + { + "epoch": 0.38, + "learning_rate": 1.887793331556754e-05, + "loss": 0.2314, + "step": 8165 + }, + { + "epoch": 0.38, + "learning_rate": 1.8877149530512754e-05, + "loss": 0.1719, + "step": 8170 + }, + { + "epoch": 0.38, + "learning_rate": 1.8876365745457968e-05, + "loss": 0.2557, + "step": 8175 + }, + { + "epoch": 0.38, + "learning_rate": 1.887558196040318e-05, + "loss": 0.2644, + "step": 8180 + }, + { + "epoch": 0.38, + "learning_rate": 1.8874798175348396e-05, + "loss": 0.2162, + "step": 8185 + }, + { + "epoch": 0.38, + "learning_rate": 1.8874014390293606e-05, + "loss": 0.3655, + "step": 8190 + }, + { + "epoch": 0.38, + "learning_rate": 1.887323060523882e-05, + "loss": 0.4942, + "step": 8195 + }, + { + "epoch": 0.38, + "learning_rate": 1.8872446820184034e-05, + "loss": 0.6199, + "step": 8200 + }, + { + "epoch": 0.38, + "learning_rate": 1.8871663035129248e-05, + "loss": 0.3494, + "step": 8205 + }, + { + "epoch": 0.38, + "learning_rate": 1.8870879250074462e-05, + "loss": 0.1605, + "step": 8210 + }, + { + "epoch": 0.38, + "learning_rate": 1.8870095465019676e-05, + "loss": 0.1165, + "step": 8215 + }, + { + "epoch": 0.38, + "learning_rate": 1.886931167996489e-05, + "loss": 0.1653, + "step": 8220 + }, + { + "epoch": 0.38, + "learning_rate": 1.8868527894910103e-05, + "loss": 0.1936, + "step": 8225 + }, + { + "epoch": 0.38, + "learning_rate": 1.8867744109855314e-05, + "loss": 0.2854, + "step": 8230 + }, + { + "epoch": 0.38, + "learning_rate": 1.8866960324800528e-05, + "loss": 0.2725, + "step": 8235 + }, + { + "epoch": 0.38, + "learning_rate": 1.8866176539745742e-05, + "loss": 0.4045, + "step": 8240 + }, + { + "epoch": 0.38, + "learning_rate": 1.8865392754690956e-05, + "loss": 0.3646, + "step": 8245 + }, + { + "epoch": 0.38, + "learning_rate": 1.886460896963617e-05, + "loss": 0.6643, + "step": 8250 + }, + { + "epoch": 0.39, + "learning_rate": 1.886382518458138e-05, + "loss": 0.2439, + "step": 8255 + }, + { + "epoch": 0.39, + "learning_rate": 1.8863041399526597e-05, + "loss": 0.1384, + "step": 8260 + }, + { + "epoch": 0.39, + "learning_rate": 1.8862257614471808e-05, + "loss": 0.1399, + "step": 8265 + }, + { + "epoch": 0.39, + "learning_rate": 1.8861473829417022e-05, + "loss": 0.17, + "step": 8270 + }, + { + "epoch": 0.39, + "learning_rate": 1.8860690044362236e-05, + "loss": 0.2109, + "step": 8275 + }, + { + "epoch": 0.39, + "learning_rate": 1.885990625930745e-05, + "loss": 0.2079, + "step": 8280 + }, + { + "epoch": 0.39, + "learning_rate": 1.8859122474252664e-05, + "loss": 0.2478, + "step": 8285 + }, + { + "epoch": 0.39, + "learning_rate": 1.8858338689197877e-05, + "loss": 0.2618, + "step": 8290 + }, + { + "epoch": 0.39, + "learning_rate": 1.8857554904143088e-05, + "loss": 0.3146, + "step": 8295 + }, + { + "epoch": 0.39, + "learning_rate": 1.8856771119088302e-05, + "loss": 0.5877, + "step": 8300 + }, + { + "epoch": 0.39, + "learning_rate": 1.8855987334033516e-05, + "loss": 0.2784, + "step": 8305 + }, + { + "epoch": 0.39, + "learning_rate": 1.885520354897873e-05, + "loss": 0.1436, + "step": 8310 + }, + { + "epoch": 0.39, + "learning_rate": 1.8854419763923944e-05, + "loss": 0.1598, + "step": 8315 + }, + { + "epoch": 0.39, + "learning_rate": 1.8853635978869158e-05, + "loss": 0.1545, + "step": 8320 + }, + { + "epoch": 0.39, + "learning_rate": 1.885285219381437e-05, + "loss": 0.2853, + "step": 8325 + }, + { + "epoch": 0.39, + "learning_rate": 1.8852068408759582e-05, + "loss": 0.2467, + "step": 8330 + }, + { + "epoch": 0.39, + "learning_rate": 1.8851284623704796e-05, + "loss": 0.271, + "step": 8335 + }, + { + "epoch": 0.39, + "learning_rate": 1.885050083865001e-05, + "loss": 0.3561, + "step": 8340 + }, + { + "epoch": 0.39, + "learning_rate": 1.8849717053595224e-05, + "loss": 0.3717, + "step": 8345 + }, + { + "epoch": 0.39, + "learning_rate": 1.8848933268540438e-05, + "loss": 0.6701, + "step": 8350 + }, + { + "epoch": 0.39, + "learning_rate": 1.884814948348565e-05, + "loss": 0.3146, + "step": 8355 + }, + { + "epoch": 0.39, + "learning_rate": 1.8847365698430865e-05, + "loss": 0.1088, + "step": 8360 + }, + { + "epoch": 0.39, + "learning_rate": 1.8846581913376076e-05, + "loss": 0.1436, + "step": 8365 + }, + { + "epoch": 0.39, + "learning_rate": 1.884579812832129e-05, + "loss": 0.1958, + "step": 8370 + }, + { + "epoch": 0.39, + "learning_rate": 1.8845014343266504e-05, + "loss": 0.2178, + "step": 8375 + }, + { + "epoch": 0.39, + "learning_rate": 1.8844230558211718e-05, + "loss": 0.1422, + "step": 8380 + }, + { + "epoch": 0.39, + "learning_rate": 1.884344677315693e-05, + "loss": 0.2301, + "step": 8385 + }, + { + "epoch": 0.39, + "learning_rate": 1.8842662988102145e-05, + "loss": 0.4035, + "step": 8390 + }, + { + "epoch": 0.39, + "learning_rate": 1.8841879203047356e-05, + "loss": 0.4027, + "step": 8395 + }, + { + "epoch": 0.39, + "learning_rate": 1.8841095417992573e-05, + "loss": 0.6834, + "step": 8400 + }, + { + "epoch": 0.39, + "learning_rate": 1.8840311632937784e-05, + "loss": 0.2374, + "step": 8405 + }, + { + "epoch": 0.39, + "learning_rate": 1.8839527847882998e-05, + "loss": 0.172, + "step": 8410 + }, + { + "epoch": 0.39, + "learning_rate": 1.883874406282821e-05, + "loss": 0.1755, + "step": 8415 + }, + { + "epoch": 0.39, + "learning_rate": 1.8837960277773425e-05, + "loss": 0.2204, + "step": 8420 + }, + { + "epoch": 0.39, + "learning_rate": 1.883717649271864e-05, + "loss": 0.2173, + "step": 8425 + }, + { + "epoch": 0.39, + "learning_rate": 1.883639270766385e-05, + "loss": 0.3071, + "step": 8430 + }, + { + "epoch": 0.39, + "learning_rate": 1.8835608922609064e-05, + "loss": 0.2889, + "step": 8435 + }, + { + "epoch": 0.39, + "learning_rate": 1.8834825137554278e-05, + "loss": 0.3744, + "step": 8440 + }, + { + "epoch": 0.39, + "learning_rate": 1.8834198109510448e-05, + "loss": 0.6518, + "step": 8445 + }, + { + "epoch": 0.39, + "learning_rate": 1.8833414324455662e-05, + "loss": 0.7128, + "step": 8450 + }, + { + "epoch": 0.39, + "learning_rate": 1.8832630539400876e-05, + "loss": 0.3003, + "step": 8455 + }, + { + "epoch": 0.39, + "learning_rate": 1.883184675434609e-05, + "loss": 0.0996, + "step": 8460 + }, + { + "epoch": 0.39, + "learning_rate": 1.8831062969291304e-05, + "loss": 0.1282, + "step": 8465 + }, + { + "epoch": 0.4, + "learning_rate": 1.8830279184236518e-05, + "loss": 0.1423, + "step": 8470 + }, + { + "epoch": 0.4, + "learning_rate": 1.8829495399181728e-05, + "loss": 0.2481, + "step": 8475 + }, + { + "epoch": 0.4, + "learning_rate": 1.8828711614126945e-05, + "loss": 0.1612, + "step": 8480 + }, + { + "epoch": 0.4, + "learning_rate": 1.8827927829072156e-05, + "loss": 0.3512, + "step": 8485 + }, + { + "epoch": 0.4, + "learning_rate": 1.882714404401737e-05, + "loss": 0.2906, + "step": 8490 + }, + { + "epoch": 0.4, + "learning_rate": 1.8826360258962584e-05, + "loss": 0.2834, + "step": 8495 + }, + { + "epoch": 0.4, + "learning_rate": 1.8825576473907798e-05, + "loss": 0.6138, + "step": 8500 + }, + { + "epoch": 0.4, + "learning_rate": 1.882479268885301e-05, + "loss": 0.3247, + "step": 8505 + }, + { + "epoch": 0.4, + "learning_rate": 1.8824008903798222e-05, + "loss": 0.0987, + "step": 8510 + }, + { + "epoch": 0.4, + "learning_rate": 1.882322511874344e-05, + "loss": 0.1981, + "step": 8515 + }, + { + "epoch": 0.4, + "learning_rate": 1.882244133368865e-05, + "loss": 0.2302, + "step": 8520 + }, + { + "epoch": 0.4, + "learning_rate": 1.8821657548633864e-05, + "loss": 0.286, + "step": 8525 + }, + { + "epoch": 0.4, + "learning_rate": 1.8820873763579078e-05, + "loss": 0.2, + "step": 8530 + }, + { + "epoch": 0.4, + "learning_rate": 1.882008997852429e-05, + "loss": 0.3144, + "step": 8535 + }, + { + "epoch": 0.4, + "learning_rate": 1.8819306193469506e-05, + "loss": 0.2449, + "step": 8540 + }, + { + "epoch": 0.4, + "learning_rate": 1.881852240841472e-05, + "loss": 0.324, + "step": 8545 + }, + { + "epoch": 0.4, + "learning_rate": 1.881773862335993e-05, + "loss": 0.5676, + "step": 8550 + }, + { + "epoch": 0.4, + "learning_rate": 1.8816954838305144e-05, + "loss": 0.2509, + "step": 8555 + }, + { + "epoch": 0.4, + "learning_rate": 1.8816171053250358e-05, + "loss": 0.1236, + "step": 8560 + }, + { + "epoch": 0.4, + "learning_rate": 1.8815387268195572e-05, + "loss": 0.1508, + "step": 8565 + }, + { + "epoch": 0.4, + "learning_rate": 1.8814603483140786e-05, + "loss": 0.1854, + "step": 8570 + }, + { + "epoch": 0.4, + "learning_rate": 1.8813819698085996e-05, + "loss": 0.2049, + "step": 8575 + }, + { + "epoch": 0.4, + "learning_rate": 1.8813035913031213e-05, + "loss": 0.253, + "step": 8580 + }, + { + "epoch": 0.4, + "learning_rate": 1.8812252127976424e-05, + "loss": 0.2392, + "step": 8585 + }, + { + "epoch": 0.4, + "learning_rate": 1.8811468342921638e-05, + "loss": 0.3392, + "step": 8590 + }, + { + "epoch": 0.4, + "learning_rate": 1.8810684557866852e-05, + "loss": 0.3588, + "step": 8595 + }, + { + "epoch": 0.4, + "learning_rate": 1.8809900772812066e-05, + "loss": 0.5704, + "step": 8600 + }, + { + "epoch": 0.4, + "learning_rate": 1.880911698775728e-05, + "loss": 0.2261, + "step": 8605 + }, + { + "epoch": 0.4, + "learning_rate": 1.8808333202702493e-05, + "loss": 0.1138, + "step": 8610 + }, + { + "epoch": 0.4, + "learning_rate": 1.8807549417647707e-05, + "loss": 0.1819, + "step": 8615 + }, + { + "epoch": 0.4, + "learning_rate": 1.8806765632592918e-05, + "loss": 0.2097, + "step": 8620 + }, + { + "epoch": 0.4, + "learning_rate": 1.8805981847538132e-05, + "loss": 0.33, + "step": 8625 + }, + { + "epoch": 0.4, + "learning_rate": 1.8805198062483346e-05, + "loss": 0.1766, + "step": 8630 + }, + { + "epoch": 0.4, + "learning_rate": 1.880441427742856e-05, + "loss": 0.2288, + "step": 8635 + }, + { + "epoch": 0.4, + "learning_rate": 1.8803630492373774e-05, + "loss": 0.3268, + "step": 8640 + }, + { + "epoch": 0.4, + "learning_rate": 1.8802846707318987e-05, + "loss": 0.4618, + "step": 8645 + }, + { + "epoch": 0.4, + "learning_rate": 1.8802062922264198e-05, + "loss": 0.487, + "step": 8650 + }, + { + "epoch": 0.4, + "learning_rate": 1.8801279137209415e-05, + "loss": 0.2719, + "step": 8655 + }, + { + "epoch": 0.4, + "learning_rate": 1.8800495352154626e-05, + "loss": 0.0903, + "step": 8660 + }, + { + "epoch": 0.4, + "learning_rate": 1.879971156709984e-05, + "loss": 0.1441, + "step": 8665 + }, + { + "epoch": 0.4, + "learning_rate": 1.8798927782045054e-05, + "loss": 0.1544, + "step": 8670 + }, + { + "epoch": 0.4, + "learning_rate": 1.8798143996990267e-05, + "loss": 0.2806, + "step": 8675 + }, + { + "epoch": 0.41, + "learning_rate": 1.879736021193548e-05, + "loss": 0.3168, + "step": 8680 + }, + { + "epoch": 0.41, + "learning_rate": 1.8796576426880692e-05, + "loss": 0.2666, + "step": 8685 + }, + { + "epoch": 0.41, + "learning_rate": 1.8795792641825906e-05, + "loss": 0.3645, + "step": 8690 + }, + { + "epoch": 0.41, + "learning_rate": 1.879500885677112e-05, + "loss": 0.5892, + "step": 8695 + }, + { + "epoch": 0.41, + "learning_rate": 1.8794225071716334e-05, + "loss": 0.6008, + "step": 8700 + }, + { + "epoch": 0.41, + "learning_rate": 1.8793441286661548e-05, + "loss": 0.3091, + "step": 8705 + }, + { + "epoch": 0.41, + "learning_rate": 1.879265750160676e-05, + "loss": 0.1157, + "step": 8710 + }, + { + "epoch": 0.41, + "learning_rate": 1.8791873716551975e-05, + "loss": 0.1896, + "step": 8715 + }, + { + "epoch": 0.41, + "learning_rate": 1.879108993149719e-05, + "loss": 0.1758, + "step": 8720 + }, + { + "epoch": 0.41, + "learning_rate": 1.87903061464424e-05, + "loss": 0.2199, + "step": 8725 + }, + { + "epoch": 0.41, + "learning_rate": 1.8789522361387617e-05, + "loss": 0.1525, + "step": 8730 + }, + { + "epoch": 0.41, + "learning_rate": 1.8788738576332828e-05, + "loss": 0.3416, + "step": 8735 + }, + { + "epoch": 0.41, + "learning_rate": 1.878795479127804e-05, + "loss": 0.3551, + "step": 8740 + }, + { + "epoch": 0.41, + "learning_rate": 1.8787171006223255e-05, + "loss": 0.3541, + "step": 8745 + }, + { + "epoch": 0.41, + "learning_rate": 1.8786387221168466e-05, + "loss": 0.6545, + "step": 8750 + }, + { + "epoch": 0.41, + "learning_rate": 1.8785603436113683e-05, + "loss": 0.2391, + "step": 8755 + }, + { + "epoch": 0.41, + "learning_rate": 1.8784819651058894e-05, + "loss": 0.1449, + "step": 8760 + }, + { + "epoch": 0.41, + "learning_rate": 1.8784035866004108e-05, + "loss": 0.1726, + "step": 8765 + }, + { + "epoch": 0.41, + "learning_rate": 1.878325208094932e-05, + "loss": 0.1607, + "step": 8770 + }, + { + "epoch": 0.41, + "learning_rate": 1.8782468295894535e-05, + "loss": 0.2541, + "step": 8775 + }, + { + "epoch": 0.41, + "learning_rate": 1.878168451083975e-05, + "loss": 0.1685, + "step": 8780 + }, + { + "epoch": 0.41, + "learning_rate": 1.8780900725784963e-05, + "loss": 0.3067, + "step": 8785 + }, + { + "epoch": 0.41, + "learning_rate": 1.8780116940730174e-05, + "loss": 0.3734, + "step": 8790 + }, + { + "epoch": 0.41, + "learning_rate": 1.877933315567539e-05, + "loss": 0.311, + "step": 8795 + }, + { + "epoch": 0.41, + "learning_rate": 1.87785493706206e-05, + "loss": 0.464, + "step": 8800 + }, + { + "epoch": 0.41, + "learning_rate": 1.8777765585565815e-05, + "loss": 0.2861, + "step": 8805 + }, + { + "epoch": 0.41, + "learning_rate": 1.877698180051103e-05, + "loss": 0.0895, + "step": 8810 + }, + { + "epoch": 0.41, + "learning_rate": 1.8776198015456243e-05, + "loss": 0.1917, + "step": 8815 + }, + { + "epoch": 0.41, + "learning_rate": 1.8775414230401457e-05, + "loss": 0.2786, + "step": 8820 + }, + { + "epoch": 0.41, + "learning_rate": 1.8774630445346668e-05, + "loss": 0.1561, + "step": 8825 + }, + { + "epoch": 0.41, + "learning_rate": 1.8773846660291885e-05, + "loss": 0.2006, + "step": 8830 + }, + { + "epoch": 0.41, + "learning_rate": 1.8773062875237096e-05, + "loss": 0.2568, + "step": 8835 + }, + { + "epoch": 0.41, + "learning_rate": 1.877227909018231e-05, + "loss": 0.3812, + "step": 8840 + }, + { + "epoch": 0.41, + "learning_rate": 1.8771495305127523e-05, + "loss": 0.3857, + "step": 8845 + }, + { + "epoch": 0.41, + "learning_rate": 1.8770711520072737e-05, + "loss": 0.6512, + "step": 8850 + }, + { + "epoch": 0.41, + "learning_rate": 1.876992773501795e-05, + "loss": 0.2031, + "step": 8855 + }, + { + "epoch": 0.41, + "learning_rate": 1.8769143949963165e-05, + "loss": 0.0943, + "step": 8860 + }, + { + "epoch": 0.41, + "learning_rate": 1.8768360164908376e-05, + "loss": 0.133, + "step": 8865 + }, + { + "epoch": 0.41, + "learning_rate": 1.8767576379853593e-05, + "loss": 0.181, + "step": 8870 + }, + { + "epoch": 0.41, + "learning_rate": 1.8766792594798803e-05, + "loss": 0.3046, + "step": 8875 + }, + { + "epoch": 0.41, + "learning_rate": 1.8766008809744017e-05, + "loss": 0.3182, + "step": 8880 + }, + { + "epoch": 0.41, + "learning_rate": 1.876522502468923e-05, + "loss": 0.2638, + "step": 8885 + }, + { + "epoch": 0.41, + "learning_rate": 1.8764441239634442e-05, + "loss": 0.3133, + "step": 8890 + }, + { + "epoch": 0.42, + "learning_rate": 1.876365745457966e-05, + "loss": 0.3542, + "step": 8895 + }, + { + "epoch": 0.42, + "learning_rate": 1.876287366952487e-05, + "loss": 0.7553, + "step": 8900 + }, + { + "epoch": 0.42, + "learning_rate": 1.8762089884470083e-05, + "loss": 0.2799, + "step": 8905 + }, + { + "epoch": 0.42, + "learning_rate": 1.8761306099415297e-05, + "loss": 0.1592, + "step": 8910 + }, + { + "epoch": 0.42, + "learning_rate": 1.876052231436051e-05, + "loss": 0.1252, + "step": 8915 + }, + { + "epoch": 0.42, + "learning_rate": 1.8759738529305725e-05, + "loss": 0.1886, + "step": 8920 + }, + { + "epoch": 0.42, + "learning_rate": 1.875895474425094e-05, + "loss": 0.2613, + "step": 8925 + }, + { + "epoch": 0.42, + "learning_rate": 1.8758170959196153e-05, + "loss": 0.3106, + "step": 8930 + }, + { + "epoch": 0.42, + "learning_rate": 1.8757387174141367e-05, + "loss": 0.2214, + "step": 8935 + }, + { + "epoch": 0.42, + "learning_rate": 1.8756603389086577e-05, + "loss": 0.3003, + "step": 8940 + }, + { + "epoch": 0.42, + "learning_rate": 1.875581960403179e-05, + "loss": 0.3717, + "step": 8945 + }, + { + "epoch": 0.42, + "learning_rate": 1.8755035818977005e-05, + "loss": 0.5338, + "step": 8950 + }, + { + "epoch": 0.42, + "learning_rate": 1.875425203392222e-05, + "loss": 0.2474, + "step": 8955 + }, + { + "epoch": 0.42, + "learning_rate": 1.8753468248867433e-05, + "loss": 0.1502, + "step": 8960 + }, + { + "epoch": 0.42, + "learning_rate": 1.8752684463812644e-05, + "loss": 0.1918, + "step": 8965 + }, + { + "epoch": 0.42, + "learning_rate": 1.875190067875786e-05, + "loss": 0.1617, + "step": 8970 + }, + { + "epoch": 0.42, + "learning_rate": 1.875111689370307e-05, + "loss": 0.2153, + "step": 8975 + }, + { + "epoch": 0.42, + "learning_rate": 1.8750333108648285e-05, + "loss": 0.2767, + "step": 8980 + }, + { + "epoch": 0.42, + "learning_rate": 1.87495493235935e-05, + "loss": 0.2624, + "step": 8985 + }, + { + "epoch": 0.42, + "learning_rate": 1.8748765538538713e-05, + "loss": 0.3624, + "step": 8990 + }, + { + "epoch": 0.42, + "learning_rate": 1.8747981753483927e-05, + "loss": 0.4102, + "step": 8995 + }, + { + "epoch": 0.42, + "learning_rate": 1.874719796842914e-05, + "loss": 0.6647, + "step": 9000 + }, + { + "epoch": 0.42, + "learning_rate": 1.874641418337435e-05, + "loss": 0.16, + "step": 9005 + }, + { + "epoch": 0.42, + "learning_rate": 1.8745630398319565e-05, + "loss": 0.1188, + "step": 9010 + }, + { + "epoch": 0.42, + "learning_rate": 1.874484661326478e-05, + "loss": 0.1938, + "step": 9015 + }, + { + "epoch": 0.42, + "learning_rate": 1.8744062828209993e-05, + "loss": 0.2017, + "step": 9020 + }, + { + "epoch": 0.42, + "learning_rate": 1.8743279043155207e-05, + "loss": 0.2465, + "step": 9025 + }, + { + "epoch": 0.42, + "learning_rate": 1.874249525810042e-05, + "loss": 0.2671, + "step": 9030 + }, + { + "epoch": 0.42, + "learning_rate": 1.8741711473045635e-05, + "loss": 0.1911, + "step": 9035 + }, + { + "epoch": 0.42, + "learning_rate": 1.8740927687990845e-05, + "loss": 0.3654, + "step": 9040 + }, + { + "epoch": 0.42, + "learning_rate": 1.8740143902936063e-05, + "loss": 0.3904, + "step": 9045 + }, + { + "epoch": 0.42, + "learning_rate": 1.8739360117881273e-05, + "loss": 0.5258, + "step": 9050 + }, + { + "epoch": 0.42, + "learning_rate": 1.8738576332826487e-05, + "loss": 0.2227, + "step": 9055 + }, + { + "epoch": 0.42, + "learning_rate": 1.87377925477717e-05, + "loss": 0.0826, + "step": 9060 + }, + { + "epoch": 0.42, + "learning_rate": 1.8737008762716915e-05, + "loss": 0.1681, + "step": 9065 + }, + { + "epoch": 0.42, + "learning_rate": 1.873622497766213e-05, + "loss": 0.2063, + "step": 9070 + }, + { + "epoch": 0.42, + "learning_rate": 1.873544119260734e-05, + "loss": 0.2334, + "step": 9075 + }, + { + "epoch": 0.42, + "learning_rate": 1.8734657407552553e-05, + "loss": 0.2478, + "step": 9080 + }, + { + "epoch": 0.42, + "learning_rate": 1.8733873622497767e-05, + "loss": 0.3684, + "step": 9085 + }, + { + "epoch": 0.42, + "learning_rate": 1.873308983744298e-05, + "loss": 0.292, + "step": 9090 + }, + { + "epoch": 0.42, + "learning_rate": 1.8732306052388195e-05, + "loss": 0.2457, + "step": 9095 + }, + { + "epoch": 0.42, + "learning_rate": 1.873152226733341e-05, + "loss": 0.5138, + "step": 9100 + }, + { + "epoch": 0.42, + "learning_rate": 1.873073848227862e-05, + "loss": 0.3402, + "step": 9105 + }, + { + "epoch": 0.43, + "learning_rate": 1.8729954697223837e-05, + "loss": 0.1397, + "step": 9110 + }, + { + "epoch": 0.43, + "learning_rate": 1.8729170912169047e-05, + "loss": 0.1392, + "step": 9115 + }, + { + "epoch": 0.43, + "learning_rate": 1.872838712711426e-05, + "loss": 0.109, + "step": 9120 + }, + { + "epoch": 0.43, + "learning_rate": 1.8727603342059475e-05, + "loss": 0.2441, + "step": 9125 + }, + { + "epoch": 0.43, + "learning_rate": 1.872681955700469e-05, + "loss": 0.2452, + "step": 9130 + }, + { + "epoch": 0.43, + "learning_rate": 1.8726035771949903e-05, + "loss": 0.2466, + "step": 9135 + }, + { + "epoch": 0.43, + "learning_rate": 1.8725251986895113e-05, + "loss": 0.3464, + "step": 9140 + }, + { + "epoch": 0.43, + "learning_rate": 1.872446820184033e-05, + "loss": 0.4602, + "step": 9145 + }, + { + "epoch": 0.43, + "learning_rate": 1.872368441678554e-05, + "loss": 0.6565, + "step": 9150 + }, + { + "epoch": 0.43, + "learning_rate": 1.8722900631730755e-05, + "loss": 0.3206, + "step": 9155 + }, + { + "epoch": 0.43, + "learning_rate": 1.872211684667597e-05, + "loss": 0.0716, + "step": 9160 + }, + { + "epoch": 0.43, + "learning_rate": 1.8721333061621183e-05, + "loss": 0.1231, + "step": 9165 + }, + { + "epoch": 0.43, + "learning_rate": 1.8720549276566397e-05, + "loss": 0.2065, + "step": 9170 + }, + { + "epoch": 0.43, + "learning_rate": 1.871976549151161e-05, + "loss": 0.1649, + "step": 9175 + }, + { + "epoch": 0.43, + "learning_rate": 1.871898170645682e-05, + "loss": 0.1393, + "step": 9180 + }, + { + "epoch": 0.43, + "learning_rate": 1.871819792140204e-05, + "loss": 0.2792, + "step": 9185 + }, + { + "epoch": 0.43, + "learning_rate": 1.871741413634725e-05, + "loss": 0.3291, + "step": 9190 + }, + { + "epoch": 0.43, + "learning_rate": 1.8716630351292463e-05, + "loss": 0.294, + "step": 9195 + }, + { + "epoch": 0.43, + "learning_rate": 1.8715846566237677e-05, + "loss": 0.6407, + "step": 9200 + }, + { + "epoch": 0.43, + "learning_rate": 1.8715062781182887e-05, + "loss": 0.2999, + "step": 9205 + }, + { + "epoch": 0.43, + "learning_rate": 1.8714278996128105e-05, + "loss": 0.142, + "step": 9210 + }, + { + "epoch": 0.43, + "learning_rate": 1.8713495211073315e-05, + "loss": 0.2173, + "step": 9215 + }, + { + "epoch": 0.43, + "learning_rate": 1.871271142601853e-05, + "loss": 0.2285, + "step": 9220 + }, + { + "epoch": 0.43, + "learning_rate": 1.8711927640963743e-05, + "loss": 0.1986, + "step": 9225 + }, + { + "epoch": 0.43, + "learning_rate": 1.8711143855908957e-05, + "loss": 0.2207, + "step": 9230 + }, + { + "epoch": 0.43, + "learning_rate": 1.871036007085417e-05, + "loss": 0.2179, + "step": 9235 + }, + { + "epoch": 0.43, + "learning_rate": 1.8709576285799385e-05, + "loss": 0.2123, + "step": 9240 + }, + { + "epoch": 0.43, + "learning_rate": 1.87087925007446e-05, + "loss": 0.2957, + "step": 9245 + }, + { + "epoch": 0.43, + "learning_rate": 1.8708008715689812e-05, + "loss": 0.6045, + "step": 9250 + }, + { + "epoch": 0.43, + "learning_rate": 1.8707224930635023e-05, + "loss": 0.2722, + "step": 9255 + }, + { + "epoch": 0.43, + "learning_rate": 1.870644114558024e-05, + "loss": 0.2209, + "step": 9260 + }, + { + "epoch": 0.43, + "learning_rate": 1.870565736052545e-05, + "loss": 0.1307, + "step": 9265 + }, + { + "epoch": 0.43, + "learning_rate": 1.8704873575470665e-05, + "loss": 0.152, + "step": 9270 + }, + { + "epoch": 0.43, + "learning_rate": 1.870408979041588e-05, + "loss": 0.1983, + "step": 9275 + }, + { + "epoch": 0.43, + "learning_rate": 1.870330600536109e-05, + "loss": 0.2012, + "step": 9280 + }, + { + "epoch": 0.43, + "learning_rate": 1.8702522220306306e-05, + "loss": 0.6512, + "step": 9285 + }, + { + "epoch": 0.43, + "learning_rate": 1.8701738435251517e-05, + "loss": 0.3317, + "step": 9290 + }, + { + "epoch": 0.43, + "learning_rate": 1.870095465019673e-05, + "loss": 0.4515, + "step": 9295 + }, + { + "epoch": 0.43, + "learning_rate": 1.8700170865141945e-05, + "loss": 0.7104, + "step": 9300 + }, + { + "epoch": 0.43, + "learning_rate": 1.869938708008716e-05, + "loss": 0.2689, + "step": 9305 + }, + { + "epoch": 0.43, + "learning_rate": 1.8698603295032373e-05, + "loss": 0.1028, + "step": 9310 + }, + { + "epoch": 0.43, + "learning_rate": 1.8697819509977586e-05, + "loss": 0.1626, + "step": 9315 + }, + { + "epoch": 0.43, + "learning_rate": 1.8697035724922797e-05, + "loss": 0.2462, + "step": 9320 + }, + { + "epoch": 0.44, + "learning_rate": 1.8696251939868014e-05, + "loss": 0.1763, + "step": 9325 + }, + { + "epoch": 0.44, + "learning_rate": 1.8695468154813225e-05, + "loss": 0.3345, + "step": 9330 + }, + { + "epoch": 0.44, + "learning_rate": 1.869468436975844e-05, + "loss": 0.2613, + "step": 9335 + }, + { + "epoch": 0.44, + "learning_rate": 1.8693900584703653e-05, + "loss": 0.3072, + "step": 9340 + }, + { + "epoch": 0.44, + "learning_rate": 1.8693116799648866e-05, + "loss": 0.3295, + "step": 9345 + }, + { + "epoch": 0.44, + "learning_rate": 1.869233301459408e-05, + "loss": 0.6022, + "step": 9350 + }, + { + "epoch": 0.44, + "learning_rate": 1.869154922953929e-05, + "loss": 0.3035, + "step": 9355 + }, + { + "epoch": 0.44, + "learning_rate": 1.8690765444484508e-05, + "loss": 0.2063, + "step": 9360 + }, + { + "epoch": 0.44, + "learning_rate": 1.868998165942972e-05, + "loss": 0.2401, + "step": 9365 + }, + { + "epoch": 0.44, + "learning_rate": 1.8689197874374933e-05, + "loss": 0.1846, + "step": 9370 + }, + { + "epoch": 0.44, + "learning_rate": 1.8688414089320147e-05, + "loss": 0.1649, + "step": 9375 + }, + { + "epoch": 0.44, + "learning_rate": 1.868763030426536e-05, + "loss": 0.1734, + "step": 9380 + }, + { + "epoch": 0.44, + "learning_rate": 1.8686846519210574e-05, + "loss": 0.2245, + "step": 9385 + }, + { + "epoch": 0.44, + "learning_rate": 1.8686062734155788e-05, + "loss": 0.2765, + "step": 9390 + }, + { + "epoch": 0.44, + "learning_rate": 1.8685278949101e-05, + "loss": 0.3261, + "step": 9395 + }, + { + "epoch": 0.44, + "learning_rate": 1.8684495164046213e-05, + "loss": 0.5533, + "step": 9400 + }, + { + "epoch": 0.44, + "learning_rate": 1.8683711378991427e-05, + "loss": 0.2859, + "step": 9405 + }, + { + "epoch": 0.44, + "learning_rate": 1.868292759393664e-05, + "loss": 0.0899, + "step": 9410 + }, + { + "epoch": 0.44, + "learning_rate": 1.8682143808881854e-05, + "loss": 0.1552, + "step": 9415 + }, + { + "epoch": 0.44, + "learning_rate": 1.8681360023827065e-05, + "loss": 0.1668, + "step": 9420 + }, + { + "epoch": 0.44, + "learning_rate": 1.8680576238772282e-05, + "loss": 0.2532, + "step": 9425 + }, + { + "epoch": 0.44, + "learning_rate": 1.8679792453717493e-05, + "loss": 0.254, + "step": 9430 + }, + { + "epoch": 0.44, + "learning_rate": 1.8679008668662707e-05, + "loss": 0.2807, + "step": 9435 + }, + { + "epoch": 0.44, + "learning_rate": 1.867822488360792e-05, + "loss": 0.506, + "step": 9440 + }, + { + "epoch": 0.44, + "learning_rate": 1.8677441098553134e-05, + "loss": 0.3954, + "step": 9445 + }, + { + "epoch": 0.44, + "learning_rate": 1.867665731349835e-05, + "loss": 0.5384, + "step": 9450 + }, + { + "epoch": 0.44, + "learning_rate": 1.8675873528443562e-05, + "loss": 0.1873, + "step": 9455 + }, + { + "epoch": 0.44, + "learning_rate": 1.8675089743388776e-05, + "loss": 0.1488, + "step": 9460 + }, + { + "epoch": 0.44, + "learning_rate": 1.8674305958333987e-05, + "loss": 0.1511, + "step": 9465 + }, + { + "epoch": 0.44, + "learning_rate": 1.86735221732792e-05, + "loss": 0.17, + "step": 9470 + }, + { + "epoch": 0.44, + "learning_rate": 1.8672738388224414e-05, + "loss": 0.1869, + "step": 9475 + }, + { + "epoch": 0.44, + "learning_rate": 1.867195460316963e-05, + "loss": 0.2043, + "step": 9480 + }, + { + "epoch": 0.44, + "learning_rate": 1.8671170818114842e-05, + "loss": 0.3333, + "step": 9485 + }, + { + "epoch": 0.44, + "learning_rate": 1.8670387033060056e-05, + "loss": 0.3531, + "step": 9490 + }, + { + "epoch": 0.44, + "learning_rate": 1.8669603248005267e-05, + "loss": 0.5792, + "step": 9495 + }, + { + "epoch": 0.44, + "learning_rate": 1.8668819462950484e-05, + "loss": 0.6072, + "step": 9500 + }, + { + "epoch": 0.44, + "learning_rate": 1.8668035677895695e-05, + "loss": 0.3354, + "step": 9505 + }, + { + "epoch": 0.44, + "learning_rate": 1.866725189284091e-05, + "loss": 0.081, + "step": 9510 + }, + { + "epoch": 0.44, + "learning_rate": 1.8666468107786122e-05, + "loss": 0.1598, + "step": 9515 + }, + { + "epoch": 0.44, + "learning_rate": 1.8665684322731336e-05, + "loss": 0.1325, + "step": 9520 + }, + { + "epoch": 0.44, + "learning_rate": 1.866490053767655e-05, + "loss": 0.243, + "step": 9525 + }, + { + "epoch": 0.44, + "learning_rate": 1.866411675262176e-05, + "loss": 0.256, + "step": 9530 + }, + { + "epoch": 0.44, + "learning_rate": 1.8663332967566975e-05, + "loss": 0.2464, + "step": 9535 + }, + { + "epoch": 0.45, + "learning_rate": 1.866254918251219e-05, + "loss": 0.3764, + "step": 9540 + }, + { + "epoch": 0.45, + "learning_rate": 1.8661765397457402e-05, + "loss": 0.3959, + "step": 9545 + }, + { + "epoch": 0.45, + "learning_rate": 1.8660981612402616e-05, + "loss": 0.8095, + "step": 9550 + }, + { + "epoch": 0.45, + "learning_rate": 1.866019782734783e-05, + "loss": 0.2742, + "step": 9555 + }, + { + "epoch": 0.45, + "learning_rate": 1.8659414042293044e-05, + "loss": 0.1321, + "step": 9560 + }, + { + "epoch": 0.45, + "learning_rate": 1.8658630257238258e-05, + "loss": 0.1742, + "step": 9565 + }, + { + "epoch": 0.45, + "learning_rate": 1.865784647218347e-05, + "loss": 0.1818, + "step": 9570 + }, + { + "epoch": 0.45, + "learning_rate": 1.8657062687128686e-05, + "loss": 0.1883, + "step": 9575 + }, + { + "epoch": 0.45, + "learning_rate": 1.8656278902073896e-05, + "loss": 0.2709, + "step": 9580 + }, + { + "epoch": 0.45, + "learning_rate": 1.865549511701911e-05, + "loss": 0.2502, + "step": 9585 + }, + { + "epoch": 0.45, + "learning_rate": 1.8654711331964324e-05, + "loss": 0.3082, + "step": 9590 + }, + { + "epoch": 0.45, + "learning_rate": 1.8653927546909535e-05, + "loss": 0.3196, + "step": 9595 + }, + { + "epoch": 0.45, + "learning_rate": 1.8653143761854752e-05, + "loss": 0.8118, + "step": 9600 + }, + { + "epoch": 0.45, + "learning_rate": 1.8652359976799962e-05, + "loss": 0.2884, + "step": 9605 + }, + { + "epoch": 0.45, + "learning_rate": 1.8651576191745176e-05, + "loss": 0.1232, + "step": 9610 + }, + { + "epoch": 0.45, + "learning_rate": 1.865079240669039e-05, + "loss": 0.1453, + "step": 9615 + }, + { + "epoch": 0.45, + "learning_rate": 1.8650008621635604e-05, + "loss": 0.1877, + "step": 9620 + }, + { + "epoch": 0.45, + "learning_rate": 1.8649224836580818e-05, + "loss": 0.2546, + "step": 9625 + }, + { + "epoch": 0.45, + "learning_rate": 1.8648441051526032e-05, + "loss": 0.1846, + "step": 9630 + }, + { + "epoch": 0.45, + "learning_rate": 1.8647657266471243e-05, + "loss": 0.3649, + "step": 9635 + }, + { + "epoch": 0.45, + "learning_rate": 1.864687348141646e-05, + "loss": 0.3054, + "step": 9640 + }, + { + "epoch": 0.45, + "learning_rate": 1.864608969636167e-05, + "loss": 0.2868, + "step": 9645 + }, + { + "epoch": 0.45, + "learning_rate": 1.8645305911306884e-05, + "loss": 0.6246, + "step": 9650 + }, + { + "epoch": 0.45, + "learning_rate": 1.8644522126252098e-05, + "loss": 0.2241, + "step": 9655 + }, + { + "epoch": 0.45, + "learning_rate": 1.8643738341197312e-05, + "loss": 0.1827, + "step": 9660 + }, + { + "epoch": 0.45, + "learning_rate": 1.8642954556142526e-05, + "loss": 0.1333, + "step": 9665 + }, + { + "epoch": 0.45, + "learning_rate": 1.8642170771087736e-05, + "loss": 0.2136, + "step": 9670 + }, + { + "epoch": 0.45, + "learning_rate": 1.8641386986032954e-05, + "loss": 0.164, + "step": 9675 + }, + { + "epoch": 0.45, + "learning_rate": 1.8640603200978164e-05, + "loss": 0.2427, + "step": 9680 + }, + { + "epoch": 0.45, + "learning_rate": 1.8639819415923378e-05, + "loss": 0.2968, + "step": 9685 + }, + { + "epoch": 0.45, + "learning_rate": 1.8639035630868592e-05, + "loss": 0.3661, + "step": 9690 + }, + { + "epoch": 0.45, + "learning_rate": 1.8638251845813806e-05, + "loss": 0.3389, + "step": 9695 + }, + { + "epoch": 0.45, + "learning_rate": 1.863746806075902e-05, + "loss": 0.8643, + "step": 9700 + }, + { + "epoch": 0.45, + "learning_rate": 1.8636684275704234e-05, + "loss": 0.2849, + "step": 9705 + }, + { + "epoch": 0.45, + "learning_rate": 1.8635900490649444e-05, + "loss": 0.1267, + "step": 9710 + }, + { + "epoch": 0.45, + "learning_rate": 1.863511670559466e-05, + "loss": 0.1608, + "step": 9715 + }, + { + "epoch": 0.45, + "learning_rate": 1.8634332920539872e-05, + "loss": 0.1116, + "step": 9720 + }, + { + "epoch": 0.45, + "learning_rate": 1.8633549135485086e-05, + "loss": 0.1512, + "step": 9725 + }, + { + "epoch": 0.45, + "learning_rate": 1.86327653504303e-05, + "loss": 0.2167, + "step": 9730 + }, + { + "epoch": 0.45, + "learning_rate": 1.863198156537551e-05, + "loss": 0.2379, + "step": 9735 + }, + { + "epoch": 0.45, + "learning_rate": 1.8631197780320728e-05, + "loss": 0.3606, + "step": 9740 + }, + { + "epoch": 0.45, + "learning_rate": 1.8630413995265938e-05, + "loss": 0.3912, + "step": 9745 + }, + { + "epoch": 0.45, + "learning_rate": 1.8629630210211152e-05, + "loss": 0.5597, + "step": 9750 + }, + { + "epoch": 0.46, + "learning_rate": 1.8628846425156366e-05, + "loss": 0.3225, + "step": 9755 + }, + { + "epoch": 0.46, + "learning_rate": 1.862806264010158e-05, + "loss": 0.1031, + "step": 9760 + }, + { + "epoch": 0.46, + "learning_rate": 1.8627278855046794e-05, + "loss": 0.2045, + "step": 9765 + }, + { + "epoch": 0.46, + "learning_rate": 1.8626495069992008e-05, + "loss": 0.1208, + "step": 9770 + }, + { + "epoch": 0.46, + "learning_rate": 1.8625711284937222e-05, + "loss": 0.2459, + "step": 9775 + }, + { + "epoch": 0.46, + "learning_rate": 1.8624927499882436e-05, + "loss": 0.1681, + "step": 9780 + }, + { + "epoch": 0.46, + "learning_rate": 1.8624143714827646e-05, + "loss": 0.4399, + "step": 9785 + }, + { + "epoch": 0.46, + "learning_rate": 1.862335992977286e-05, + "loss": 0.2947, + "step": 9790 + }, + { + "epoch": 0.46, + "learning_rate": 1.8622576144718074e-05, + "loss": 0.4561, + "step": 9795 + }, + { + "epoch": 0.46, + "learning_rate": 1.8621792359663288e-05, + "loss": 0.6068, + "step": 9800 + }, + { + "epoch": 0.46, + "learning_rate": 1.8621008574608502e-05, + "loss": 0.2161, + "step": 9805 + }, + { + "epoch": 0.46, + "learning_rate": 1.8620224789553712e-05, + "loss": 0.1146, + "step": 9810 + }, + { + "epoch": 0.46, + "learning_rate": 1.861944100449893e-05, + "loss": 0.1992, + "step": 9815 + }, + { + "epoch": 0.46, + "learning_rate": 1.861865721944414e-05, + "loss": 0.2146, + "step": 9820 + }, + { + "epoch": 0.46, + "learning_rate": 1.8617873434389354e-05, + "loss": 0.1645, + "step": 9825 + }, + { + "epoch": 0.46, + "learning_rate": 1.8617089649334568e-05, + "loss": 0.2525, + "step": 9830 + }, + { + "epoch": 0.46, + "learning_rate": 1.8616305864279782e-05, + "loss": 0.2867, + "step": 9835 + }, + { + "epoch": 0.46, + "learning_rate": 1.8615522079224996e-05, + "loss": 0.3965, + "step": 9840 + }, + { + "epoch": 0.46, + "learning_rate": 1.861473829417021e-05, + "loss": 0.316, + "step": 9845 + }, + { + "epoch": 0.46, + "learning_rate": 1.861395450911542e-05, + "loss": 0.4794, + "step": 9850 + }, + { + "epoch": 0.46, + "learning_rate": 1.8613170724060634e-05, + "loss": 0.2089, + "step": 9855 + }, + { + "epoch": 0.46, + "learning_rate": 1.8612386939005848e-05, + "loss": 0.0973, + "step": 9860 + }, + { + "epoch": 0.46, + "learning_rate": 1.8611603153951062e-05, + "loss": 0.1503, + "step": 9865 + }, + { + "epoch": 0.46, + "learning_rate": 1.8610819368896276e-05, + "loss": 0.1923, + "step": 9870 + }, + { + "epoch": 0.46, + "learning_rate": 1.861003558384149e-05, + "loss": 0.2079, + "step": 9875 + }, + { + "epoch": 0.46, + "learning_rate": 1.8609251798786704e-05, + "loss": 0.2932, + "step": 9880 + }, + { + "epoch": 0.46, + "learning_rate": 1.8608468013731914e-05, + "loss": 0.2118, + "step": 9885 + }, + { + "epoch": 0.46, + "learning_rate": 1.860768422867713e-05, + "loss": 0.4529, + "step": 9890 + }, + { + "epoch": 0.46, + "learning_rate": 1.8606900443622342e-05, + "loss": 0.4992, + "step": 9895 + }, + { + "epoch": 0.46, + "learning_rate": 1.8606116658567556e-05, + "loss": 0.7247, + "step": 9900 + }, + { + "epoch": 0.46, + "learning_rate": 1.860533287351277e-05, + "loss": 0.2615, + "step": 9905 + }, + { + "epoch": 0.46, + "learning_rate": 1.8604549088457984e-05, + "loss": 0.1337, + "step": 9910 + }, + { + "epoch": 0.46, + "learning_rate": 1.8603765303403198e-05, + "loss": 0.1473, + "step": 9915 + }, + { + "epoch": 0.46, + "learning_rate": 1.8602981518348408e-05, + "loss": 0.2391, + "step": 9920 + }, + { + "epoch": 0.46, + "learning_rate": 1.8602197733293622e-05, + "loss": 0.2209, + "step": 9925 + }, + { + "epoch": 0.46, + "learning_rate": 1.8601413948238836e-05, + "loss": 0.2441, + "step": 9930 + }, + { + "epoch": 0.46, + "learning_rate": 1.860063016318405e-05, + "loss": 0.2697, + "step": 9935 + }, + { + "epoch": 0.46, + "learning_rate": 1.8599846378129264e-05, + "loss": 0.3542, + "step": 9940 + }, + { + "epoch": 0.46, + "learning_rate": 1.8599062593074478e-05, + "loss": 0.3546, + "step": 9945 + }, + { + "epoch": 0.46, + "learning_rate": 1.8598278808019688e-05, + "loss": 0.8018, + "step": 9950 + }, + { + "epoch": 0.46, + "learning_rate": 1.8597495022964905e-05, + "loss": 0.2301, + "step": 9955 + }, + { + "epoch": 0.46, + "learning_rate": 1.8596711237910116e-05, + "loss": 0.0913, + "step": 9960 + }, + { + "epoch": 0.46, + "learning_rate": 1.859592745285533e-05, + "loss": 0.1875, + "step": 9965 + }, + { + "epoch": 0.47, + "learning_rate": 1.8595143667800544e-05, + "loss": 0.1521, + "step": 9970 + }, + { + "epoch": 0.47, + "learning_rate": 1.8594359882745758e-05, + "loss": 0.1545, + "step": 9975 + }, + { + "epoch": 0.47, + "learning_rate": 1.859357609769097e-05, + "loss": 0.2105, + "step": 9980 + }, + { + "epoch": 0.47, + "learning_rate": 1.8592792312636182e-05, + "loss": 0.2772, + "step": 9985 + }, + { + "epoch": 0.47, + "learning_rate": 1.85920085275814e-05, + "loss": 0.384, + "step": 9990 + }, + { + "epoch": 0.47, + "learning_rate": 1.859122474252661e-05, + "loss": 0.294, + "step": 9995 + }, + { + "epoch": 0.47, + "learning_rate": 1.8590440957471824e-05, + "loss": 0.5701, + "step": 10000 + }, + { + "epoch": 0.47, + "learning_rate": 1.8589657172417038e-05, + "loss": 0.3018, + "step": 10005 + }, + { + "epoch": 0.47, + "learning_rate": 1.858887338736225e-05, + "loss": 0.146, + "step": 10010 + }, + { + "epoch": 0.47, + "learning_rate": 1.8588089602307465e-05, + "loss": 0.2157, + "step": 10015 + }, + { + "epoch": 0.47, + "learning_rate": 1.858730581725268e-05, + "loss": 0.1809, + "step": 10020 + }, + { + "epoch": 0.47, + "learning_rate": 1.858652203219789e-05, + "loss": 0.1605, + "step": 10025 + }, + { + "epoch": 0.47, + "learning_rate": 1.8585738247143107e-05, + "loss": 0.1612, + "step": 10030 + }, + { + "epoch": 0.47, + "learning_rate": 1.8584954462088318e-05, + "loss": 0.1955, + "step": 10035 + }, + { + "epoch": 0.47, + "learning_rate": 1.858417067703353e-05, + "loss": 0.2136, + "step": 10040 + }, + { + "epoch": 0.47, + "learning_rate": 1.8583386891978746e-05, + "loss": 0.4425, + "step": 10045 + }, + { + "epoch": 0.47, + "learning_rate": 1.8582603106923956e-05, + "loss": 0.5771, + "step": 10050 + }, + { + "epoch": 0.47, + "learning_rate": 1.8581819321869173e-05, + "loss": 0.3124, + "step": 10055 + }, + { + "epoch": 0.47, + "learning_rate": 1.8581035536814384e-05, + "loss": 0.1658, + "step": 10060 + }, + { + "epoch": 0.47, + "learning_rate": 1.8580251751759598e-05, + "loss": 0.2146, + "step": 10065 + }, + { + "epoch": 0.47, + "learning_rate": 1.857946796670481e-05, + "loss": 0.1755, + "step": 10070 + }, + { + "epoch": 0.47, + "learning_rate": 1.8578684181650026e-05, + "loss": 0.2154, + "step": 10075 + }, + { + "epoch": 0.47, + "learning_rate": 1.857790039659524e-05, + "loss": 0.1933, + "step": 10080 + }, + { + "epoch": 0.47, + "learning_rate": 1.8577116611540453e-05, + "loss": 0.1655, + "step": 10085 + }, + { + "epoch": 0.47, + "learning_rate": 1.8576332826485667e-05, + "loss": 0.1967, + "step": 10090 + }, + { + "epoch": 0.47, + "learning_rate": 1.857554904143088e-05, + "loss": 0.4955, + "step": 10095 + }, + { + "epoch": 0.47, + "learning_rate": 1.8574765256376092e-05, + "loss": 0.5188, + "step": 10100 + }, + { + "epoch": 0.47, + "learning_rate": 1.857398147132131e-05, + "loss": 0.2023, + "step": 10105 + }, + { + "epoch": 0.47, + "learning_rate": 1.857319768626652e-05, + "loss": 0.1892, + "step": 10110 + }, + { + "epoch": 0.47, + "learning_rate": 1.8572413901211733e-05, + "loss": 0.1397, + "step": 10115 + }, + { + "epoch": 0.47, + "learning_rate": 1.8571630116156947e-05, + "loss": 0.1333, + "step": 10120 + }, + { + "epoch": 0.47, + "learning_rate": 1.8570846331102158e-05, + "loss": 0.2091, + "step": 10125 + }, + { + "epoch": 0.47, + "learning_rate": 1.8570062546047375e-05, + "loss": 0.1695, + "step": 10130 + }, + { + "epoch": 0.47, + "learning_rate": 1.8569278760992586e-05, + "loss": 0.3251, + "step": 10135 + }, + { + "epoch": 0.47, + "learning_rate": 1.85684949759378e-05, + "loss": 0.3039, + "step": 10140 + }, + { + "epoch": 0.47, + "learning_rate": 1.8567711190883013e-05, + "loss": 0.4027, + "step": 10145 + }, + { + "epoch": 0.47, + "learning_rate": 1.8566927405828227e-05, + "loss": 0.461, + "step": 10150 + }, + { + "epoch": 0.47, + "learning_rate": 1.856614362077344e-05, + "loss": 0.2918, + "step": 10155 + }, + { + "epoch": 0.47, + "learning_rate": 1.8565359835718655e-05, + "loss": 0.1856, + "step": 10160 + }, + { + "epoch": 0.47, + "learning_rate": 1.8564576050663866e-05, + "loss": 0.1539, + "step": 10165 + }, + { + "epoch": 0.47, + "learning_rate": 1.8563792265609083e-05, + "loss": 0.1487, + "step": 10170 + }, + { + "epoch": 0.47, + "learning_rate": 1.8563008480554294e-05, + "loss": 0.1158, + "step": 10175 + }, + { + "epoch": 0.48, + "learning_rate": 1.8562224695499507e-05, + "loss": 0.216, + "step": 10180 + }, + { + "epoch": 0.48, + "learning_rate": 1.856144091044472e-05, + "loss": 0.3013, + "step": 10185 + }, + { + "epoch": 0.48, + "learning_rate": 1.8560657125389935e-05, + "loss": 0.4077, + "step": 10190 + }, + { + "epoch": 0.48, + "learning_rate": 1.855987334033515e-05, + "loss": 0.3365, + "step": 10195 + }, + { + "epoch": 0.48, + "learning_rate": 1.855908955528036e-05, + "loss": 0.4686, + "step": 10200 + }, + { + "epoch": 0.48, + "learning_rate": 1.8558305770225577e-05, + "loss": 0.2812, + "step": 10205 + }, + { + "epoch": 0.48, + "learning_rate": 1.8557521985170787e-05, + "loss": 0.133, + "step": 10210 + }, + { + "epoch": 0.48, + "learning_rate": 1.8556738200116e-05, + "loss": 0.1349, + "step": 10215 + }, + { + "epoch": 0.48, + "learning_rate": 1.8555954415061215e-05, + "loss": 0.1394, + "step": 10220 + }, + { + "epoch": 0.48, + "learning_rate": 1.855517063000643e-05, + "loss": 0.2261, + "step": 10225 + }, + { + "epoch": 0.48, + "learning_rate": 1.8554386844951643e-05, + "loss": 0.1947, + "step": 10230 + }, + { + "epoch": 0.48, + "learning_rate": 1.8553603059896857e-05, + "loss": 0.2436, + "step": 10235 + }, + { + "epoch": 0.48, + "learning_rate": 1.8552819274842068e-05, + "loss": 0.2637, + "step": 10240 + }, + { + "epoch": 0.48, + "learning_rate": 1.855203548978728e-05, + "loss": 0.4449, + "step": 10245 + }, + { + "epoch": 0.48, + "learning_rate": 1.8551251704732495e-05, + "loss": 0.7073, + "step": 10250 + }, + { + "epoch": 0.48, + "learning_rate": 1.855046791967771e-05, + "loss": 0.2425, + "step": 10255 + }, + { + "epoch": 0.48, + "learning_rate": 1.8549684134622923e-05, + "loss": 0.1223, + "step": 10260 + }, + { + "epoch": 0.48, + "learning_rate": 1.8548900349568134e-05, + "loss": 0.1035, + "step": 10265 + }, + { + "epoch": 0.48, + "learning_rate": 1.854811656451335e-05, + "loss": 0.1336, + "step": 10270 + }, + { + "epoch": 0.48, + "learning_rate": 1.854733277945856e-05, + "loss": 0.224, + "step": 10275 + }, + { + "epoch": 0.48, + "learning_rate": 1.8546548994403775e-05, + "loss": 0.2017, + "step": 10280 + }, + { + "epoch": 0.48, + "learning_rate": 1.854576520934899e-05, + "loss": 0.3207, + "step": 10285 + }, + { + "epoch": 0.48, + "learning_rate": 1.8544981424294203e-05, + "loss": 0.3581, + "step": 10290 + }, + { + "epoch": 0.48, + "learning_rate": 1.8544197639239417e-05, + "loss": 0.299, + "step": 10295 + }, + { + "epoch": 0.48, + "learning_rate": 1.854341385418463e-05, + "loss": 0.4774, + "step": 10300 + }, + { + "epoch": 0.48, + "learning_rate": 1.8542630069129845e-05, + "loss": 0.2978, + "step": 10305 + }, + { + "epoch": 0.48, + "learning_rate": 1.8541846284075055e-05, + "loss": 0.1423, + "step": 10310 + }, + { + "epoch": 0.48, + "learning_rate": 1.854106249902027e-05, + "loss": 0.1697, + "step": 10315 + }, + { + "epoch": 0.48, + "learning_rate": 1.8540278713965483e-05, + "loss": 0.1412, + "step": 10320 + }, + { + "epoch": 0.48, + "learning_rate": 1.8539494928910697e-05, + "loss": 0.1674, + "step": 10325 + }, + { + "epoch": 0.48, + "learning_rate": 1.853871114385591e-05, + "loss": 0.2561, + "step": 10330 + }, + { + "epoch": 0.48, + "learning_rate": 1.8537927358801125e-05, + "loss": 0.3232, + "step": 10335 + }, + { + "epoch": 0.48, + "learning_rate": 1.8537143573746335e-05, + "loss": 0.2814, + "step": 10340 + }, + { + "epoch": 0.48, + "learning_rate": 1.8536359788691553e-05, + "loss": 0.4515, + "step": 10345 + }, + { + "epoch": 0.48, + "learning_rate": 1.8535576003636763e-05, + "loss": 0.6846, + "step": 10350 + }, + { + "epoch": 0.48, + "learning_rate": 1.8534792218581977e-05, + "loss": 0.3577, + "step": 10355 + }, + { + "epoch": 0.48, + "learning_rate": 1.853400843352719e-05, + "loss": 0.076, + "step": 10360 + }, + { + "epoch": 0.48, + "learning_rate": 1.8533224648472405e-05, + "loss": 0.2025, + "step": 10365 + }, + { + "epoch": 0.48, + "learning_rate": 1.853244086341762e-05, + "loss": 0.1471, + "step": 10370 + }, + { + "epoch": 0.48, + "learning_rate": 1.853165707836283e-05, + "loss": 0.1286, + "step": 10375 + }, + { + "epoch": 0.48, + "learning_rate": 1.8530873293308043e-05, + "loss": 0.2737, + "step": 10380 + }, + { + "epoch": 0.48, + "learning_rate": 1.8530089508253257e-05, + "loss": 0.2873, + "step": 10385 + }, + { + "epoch": 0.48, + "learning_rate": 1.852930572319847e-05, + "loss": 0.3913, + "step": 10390 + }, + { + "epoch": 0.49, + "learning_rate": 1.8528521938143685e-05, + "loss": 0.4033, + "step": 10395 + }, + { + "epoch": 0.49, + "learning_rate": 1.85277381530889e-05, + "loss": 0.5046, + "step": 10400 + }, + { + "epoch": 0.49, + "learning_rate": 1.8526954368034113e-05, + "loss": 0.2842, + "step": 10405 + }, + { + "epoch": 0.49, + "learning_rate": 1.8526170582979327e-05, + "loss": 0.1708, + "step": 10410 + }, + { + "epoch": 0.49, + "learning_rate": 1.8525386797924537e-05, + "loss": 0.1666, + "step": 10415 + }, + { + "epoch": 0.49, + "learning_rate": 1.8524603012869755e-05, + "loss": 0.1689, + "step": 10420 + }, + { + "epoch": 0.49, + "learning_rate": 1.8523819227814965e-05, + "loss": 0.2137, + "step": 10425 + }, + { + "epoch": 0.49, + "learning_rate": 1.852303544276018e-05, + "loss": 0.2885, + "step": 10430 + }, + { + "epoch": 0.49, + "learning_rate": 1.8522251657705393e-05, + "loss": 0.2818, + "step": 10435 + }, + { + "epoch": 0.49, + "learning_rate": 1.8521467872650603e-05, + "loss": 0.2431, + "step": 10440 + }, + { + "epoch": 0.49, + "learning_rate": 1.852068408759582e-05, + "loss": 0.3849, + "step": 10445 + }, + { + "epoch": 0.49, + "learning_rate": 1.851990030254103e-05, + "loss": 0.6032, + "step": 10450 + }, + { + "epoch": 0.49, + "learning_rate": 1.8519116517486245e-05, + "loss": 0.2557, + "step": 10455 + }, + { + "epoch": 0.49, + "learning_rate": 1.851833273243146e-05, + "loss": 0.1205, + "step": 10460 + }, + { + "epoch": 0.49, + "learning_rate": 1.8517548947376673e-05, + "loss": 0.1141, + "step": 10465 + }, + { + "epoch": 0.49, + "learning_rate": 1.8516765162321887e-05, + "loss": 0.1535, + "step": 10470 + }, + { + "epoch": 0.49, + "learning_rate": 1.85159813772671e-05, + "loss": 0.1693, + "step": 10475 + }, + { + "epoch": 0.49, + "learning_rate": 1.851519759221231e-05, + "loss": 0.2214, + "step": 10480 + }, + { + "epoch": 0.49, + "learning_rate": 1.851441380715753e-05, + "loss": 0.3034, + "step": 10485 + }, + { + "epoch": 0.49, + "learning_rate": 1.851363002210274e-05, + "loss": 0.2867, + "step": 10490 + }, + { + "epoch": 0.49, + "learning_rate": 1.8512846237047953e-05, + "loss": 0.4438, + "step": 10495 + }, + { + "epoch": 0.49, + "learning_rate": 1.8512062451993167e-05, + "loss": 0.614, + "step": 10500 + }, + { + "epoch": 0.49, + "learning_rate": 1.851127866693838e-05, + "loss": 0.2172, + "step": 10505 + }, + { + "epoch": 0.49, + "learning_rate": 1.8510494881883595e-05, + "loss": 0.0805, + "step": 10510 + }, + { + "epoch": 0.49, + "learning_rate": 1.8509711096828805e-05, + "loss": 0.1772, + "step": 10515 + }, + { + "epoch": 0.49, + "learning_rate": 1.8508927311774023e-05, + "loss": 0.1784, + "step": 10520 + }, + { + "epoch": 0.49, + "learning_rate": 1.8508143526719233e-05, + "loss": 0.2444, + "step": 10525 + }, + { + "epoch": 0.49, + "learning_rate": 1.8507359741664447e-05, + "loss": 0.1369, + "step": 10530 + }, + { + "epoch": 0.49, + "learning_rate": 1.850657595660966e-05, + "loss": 0.1954, + "step": 10535 + }, + { + "epoch": 0.49, + "learning_rate": 1.8505792171554875e-05, + "loss": 0.3048, + "step": 10540 + }, + { + "epoch": 0.49, + "learning_rate": 1.850500838650009e-05, + "loss": 0.3967, + "step": 10545 + }, + { + "epoch": 0.49, + "learning_rate": 1.8504224601445303e-05, + "loss": 0.476, + "step": 10550 + }, + { + "epoch": 0.49, + "learning_rate": 1.8503440816390513e-05, + "loss": 0.2726, + "step": 10555 + }, + { + "epoch": 0.49, + "learning_rate": 1.850265703133573e-05, + "loss": 0.0955, + "step": 10560 + }, + { + "epoch": 0.49, + "learning_rate": 1.850187324628094e-05, + "loss": 0.1345, + "step": 10565 + }, + { + "epoch": 0.49, + "learning_rate": 1.8501089461226155e-05, + "loss": 0.2196, + "step": 10570 + }, + { + "epoch": 0.49, + "learning_rate": 1.850030567617137e-05, + "loss": 0.1968, + "step": 10575 + }, + { + "epoch": 0.49, + "learning_rate": 1.849952189111658e-05, + "loss": 0.2663, + "step": 10580 + }, + { + "epoch": 0.49, + "learning_rate": 1.8498738106061797e-05, + "loss": 0.2857, + "step": 10585 + }, + { + "epoch": 0.49, + "learning_rate": 1.8497954321007007e-05, + "loss": 0.3226, + "step": 10590 + }, + { + "epoch": 0.49, + "learning_rate": 1.849717053595222e-05, + "loss": 0.3366, + "step": 10595 + }, + { + "epoch": 0.49, + "learning_rate": 1.8496386750897435e-05, + "loss": 0.4914, + "step": 10600 + }, + { + "epoch": 0.49, + "learning_rate": 1.849560296584265e-05, + "loss": 0.3555, + "step": 10605 + }, + { + "epoch": 0.5, + "learning_rate": 1.8494819180787863e-05, + "loss": 0.1054, + "step": 10610 + }, + { + "epoch": 0.5, + "learning_rate": 1.8494035395733077e-05, + "loss": 0.2007, + "step": 10615 + }, + { + "epoch": 0.5, + "learning_rate": 1.849325161067829e-05, + "loss": 0.2069, + "step": 10620 + }, + { + "epoch": 0.5, + "learning_rate": 1.8492467825623504e-05, + "loss": 0.2225, + "step": 10625 + }, + { + "epoch": 0.5, + "learning_rate": 1.8491684040568715e-05, + "loss": 0.2789, + "step": 10630 + }, + { + "epoch": 0.5, + "learning_rate": 1.849090025551393e-05, + "loss": 0.3114, + "step": 10635 + }, + { + "epoch": 0.5, + "learning_rate": 1.8490116470459143e-05, + "loss": 0.3273, + "step": 10640 + }, + { + "epoch": 0.5, + "learning_rate": 1.8489332685404357e-05, + "loss": 0.2557, + "step": 10645 + }, + { + "epoch": 0.5, + "learning_rate": 1.848854890034957e-05, + "loss": 0.5148, + "step": 10650 + }, + { + "epoch": 0.5, + "learning_rate": 1.848776511529478e-05, + "loss": 0.3063, + "step": 10655 + }, + { + "epoch": 0.5, + "learning_rate": 1.848698133024e-05, + "loss": 0.1092, + "step": 10660 + }, + { + "epoch": 0.5, + "learning_rate": 1.848619754518521e-05, + "loss": 0.1135, + "step": 10665 + }, + { + "epoch": 0.5, + "learning_rate": 1.8485413760130423e-05, + "loss": 0.1765, + "step": 10670 + }, + { + "epoch": 0.5, + "learning_rate": 1.8484629975075637e-05, + "loss": 0.1905, + "step": 10675 + }, + { + "epoch": 0.5, + "learning_rate": 1.848384619002085e-05, + "loss": 0.1588, + "step": 10680 + }, + { + "epoch": 0.5, + "learning_rate": 1.8483062404966064e-05, + "loss": 0.2983, + "step": 10685 + }, + { + "epoch": 0.5, + "learning_rate": 1.848227861991128e-05, + "loss": 0.2593, + "step": 10690 + }, + { + "epoch": 0.5, + "learning_rate": 1.848149483485649e-05, + "loss": 0.3945, + "step": 10695 + }, + { + "epoch": 0.5, + "learning_rate": 1.8480711049801703e-05, + "loss": 0.5359, + "step": 10700 + }, + { + "epoch": 0.5, + "learning_rate": 1.8479927264746917e-05, + "loss": 0.473, + "step": 10705 + }, + { + "epoch": 0.5, + "learning_rate": 1.847914347969213e-05, + "loss": 0.1142, + "step": 10710 + }, + { + "epoch": 0.5, + "learning_rate": 1.8478359694637345e-05, + "loss": 0.1146, + "step": 10715 + }, + { + "epoch": 0.5, + "learning_rate": 1.847757590958256e-05, + "loss": 0.2126, + "step": 10720 + }, + { + "epoch": 0.5, + "learning_rate": 1.8476792124527772e-05, + "loss": 0.1638, + "step": 10725 + }, + { + "epoch": 0.5, + "learning_rate": 1.8476008339472983e-05, + "loss": 0.2529, + "step": 10730 + }, + { + "epoch": 0.5, + "learning_rate": 1.84752245544182e-05, + "loss": 0.2843, + "step": 10735 + }, + { + "epoch": 0.5, + "learning_rate": 1.847444076936341e-05, + "loss": 0.3466, + "step": 10740 + }, + { + "epoch": 0.5, + "learning_rate": 1.8473656984308625e-05, + "loss": 0.3651, + "step": 10745 + }, + { + "epoch": 0.5, + "learning_rate": 1.847287319925384e-05, + "loss": 0.5283, + "step": 10750 + }, + { + "epoch": 0.5, + "learning_rate": 1.8472089414199052e-05, + "loss": 0.2919, + "step": 10755 + }, + { + "epoch": 0.5, + "learning_rate": 1.8471305629144266e-05, + "loss": 0.1133, + "step": 10760 + }, + { + "epoch": 0.5, + "learning_rate": 1.8470521844089477e-05, + "loss": 0.1283, + "step": 10765 + }, + { + "epoch": 0.5, + "learning_rate": 1.846973805903469e-05, + "loss": 0.1331, + "step": 10770 + }, + { + "epoch": 0.5, + "learning_rate": 1.8468954273979905e-05, + "loss": 0.2511, + "step": 10775 + }, + { + "epoch": 0.5, + "learning_rate": 1.846817048892512e-05, + "loss": 0.2337, + "step": 10780 + }, + { + "epoch": 0.5, + "learning_rate": 1.8467386703870332e-05, + "loss": 0.2217, + "step": 10785 + }, + { + "epoch": 0.5, + "learning_rate": 1.8466602918815546e-05, + "loss": 0.4054, + "step": 10790 + }, + { + "epoch": 0.5, + "learning_rate": 1.8465819133760757e-05, + "loss": 0.42, + "step": 10795 + }, + { + "epoch": 0.5, + "learning_rate": 1.8465035348705974e-05, + "loss": 0.4874, + "step": 10800 + }, + { + "epoch": 0.5, + "learning_rate": 1.8464251563651185e-05, + "loss": 0.2858, + "step": 10805 + }, + { + "epoch": 0.5, + "learning_rate": 1.84634677785964e-05, + "loss": 0.1269, + "step": 10810 + }, + { + "epoch": 0.5, + "learning_rate": 1.8462683993541612e-05, + "loss": 0.1334, + "step": 10815 + }, + { + "epoch": 0.5, + "learning_rate": 1.8461900208486826e-05, + "loss": 0.1715, + "step": 10820 + }, + { + "epoch": 0.51, + "learning_rate": 1.846111642343204e-05, + "loss": 0.1632, + "step": 10825 + }, + { + "epoch": 0.51, + "learning_rate": 1.846033263837725e-05, + "loss": 0.1676, + "step": 10830 + }, + { + "epoch": 0.51, + "learning_rate": 1.8459548853322468e-05, + "loss": 0.1878, + "step": 10835 + }, + { + "epoch": 0.51, + "learning_rate": 1.845876506826768e-05, + "loss": 0.2542, + "step": 10840 + }, + { + "epoch": 0.51, + "learning_rate": 1.8457981283212893e-05, + "loss": 0.3347, + "step": 10845 + }, + { + "epoch": 0.51, + "learning_rate": 1.8457197498158106e-05, + "loss": 0.5031, + "step": 10850 + }, + { + "epoch": 0.51, + "learning_rate": 1.845641371310332e-05, + "loss": 0.3013, + "step": 10855 + }, + { + "epoch": 0.51, + "learning_rate": 1.8455629928048534e-05, + "loss": 0.0807, + "step": 10860 + }, + { + "epoch": 0.51, + "learning_rate": 1.8454846142993748e-05, + "loss": 0.1866, + "step": 10865 + }, + { + "epoch": 0.51, + "learning_rate": 1.845406235793896e-05, + "loss": 0.128, + "step": 10870 + }, + { + "epoch": 0.51, + "learning_rate": 1.8453278572884176e-05, + "loss": 0.2177, + "step": 10875 + }, + { + "epoch": 0.51, + "learning_rate": 1.8452494787829386e-05, + "loss": 0.2811, + "step": 10880 + }, + { + "epoch": 0.51, + "learning_rate": 1.84517110027746e-05, + "loss": 0.3075, + "step": 10885 + }, + { + "epoch": 0.51, + "learning_rate": 1.8450927217719814e-05, + "loss": 0.3236, + "step": 10890 + }, + { + "epoch": 0.51, + "learning_rate": 1.8450143432665025e-05, + "loss": 0.2975, + "step": 10895 + }, + { + "epoch": 0.51, + "learning_rate": 1.8449359647610242e-05, + "loss": 0.7018, + "step": 10900 + }, + { + "epoch": 0.51, + "learning_rate": 1.8448575862555453e-05, + "loss": 0.3354, + "step": 10905 + }, + { + "epoch": 0.51, + "learning_rate": 1.8447792077500667e-05, + "loss": 0.1611, + "step": 10910 + }, + { + "epoch": 0.51, + "learning_rate": 1.844700829244588e-05, + "loss": 0.1056, + "step": 10915 + }, + { + "epoch": 0.51, + "learning_rate": 1.8446224507391094e-05, + "loss": 0.1905, + "step": 10920 + }, + { + "epoch": 0.51, + "learning_rate": 1.8445440722336308e-05, + "loss": 0.1444, + "step": 10925 + }, + { + "epoch": 0.51, + "learning_rate": 1.8444656937281522e-05, + "loss": 0.2417, + "step": 10930 + }, + { + "epoch": 0.51, + "learning_rate": 1.8443873152226736e-05, + "loss": 0.2322, + "step": 10935 + }, + { + "epoch": 0.51, + "learning_rate": 1.844308936717195e-05, + "loss": 0.2791, + "step": 10940 + }, + { + "epoch": 0.51, + "learning_rate": 1.844230558211716e-05, + "loss": 0.3903, + "step": 10945 + }, + { + "epoch": 0.51, + "learning_rate": 1.8441521797062378e-05, + "loss": 0.6466, + "step": 10950 + }, + { + "epoch": 0.51, + "learning_rate": 1.8440738012007588e-05, + "loss": 0.2265, + "step": 10955 + }, + { + "epoch": 0.51, + "learning_rate": 1.8439954226952802e-05, + "loss": 0.1035, + "step": 10960 + }, + { + "epoch": 0.51, + "learning_rate": 1.8439170441898016e-05, + "loss": 0.127, + "step": 10965 + }, + { + "epoch": 0.51, + "learning_rate": 1.8438386656843227e-05, + "loss": 0.1504, + "step": 10970 + }, + { + "epoch": 0.51, + "learning_rate": 1.8437602871788444e-05, + "loss": 0.3139, + "step": 10975 + }, + { + "epoch": 0.51, + "learning_rate": 1.8436819086733654e-05, + "loss": 0.2015, + "step": 10980 + }, + { + "epoch": 0.51, + "learning_rate": 1.843603530167887e-05, + "loss": 0.2831, + "step": 10985 + }, + { + "epoch": 0.51, + "learning_rate": 1.8435251516624082e-05, + "loss": 0.4307, + "step": 10990 + }, + { + "epoch": 0.51, + "learning_rate": 1.8434467731569296e-05, + "loss": 0.4474, + "step": 10995 + }, + { + "epoch": 0.51, + "learning_rate": 1.843368394651451e-05, + "loss": 0.4485, + "step": 11000 + }, + { + "epoch": 0.51, + "learning_rate": 1.8432900161459724e-05, + "loss": 0.3155, + "step": 11005 + }, + { + "epoch": 0.51, + "learning_rate": 1.8432116376404934e-05, + "loss": 0.1338, + "step": 11010 + }, + { + "epoch": 0.51, + "learning_rate": 1.8431332591350152e-05, + "loss": 0.1072, + "step": 11015 + }, + { + "epoch": 0.51, + "learning_rate": 1.8430548806295362e-05, + "loss": 0.1568, + "step": 11020 + }, + { + "epoch": 0.51, + "learning_rate": 1.8429765021240576e-05, + "loss": 0.209, + "step": 11025 + }, + { + "epoch": 0.51, + "learning_rate": 1.842898123618579e-05, + "loss": 0.2181, + "step": 11030 + }, + { + "epoch": 0.51, + "learning_rate": 1.8428197451131004e-05, + "loss": 0.1599, + "step": 11035 + }, + { + "epoch": 0.52, + "learning_rate": 1.8427413666076218e-05, + "loss": 0.2345, + "step": 11040 + }, + { + "epoch": 0.52, + "learning_rate": 1.842662988102143e-05, + "loss": 0.5213, + "step": 11045 + }, + { + "epoch": 0.52, + "learning_rate": 1.8425846095966646e-05, + "loss": 0.6337, + "step": 11050 + }, + { + "epoch": 0.52, + "learning_rate": 1.8425062310911856e-05, + "loss": 0.2929, + "step": 11055 + }, + { + "epoch": 0.52, + "learning_rate": 1.842427852585707e-05, + "loss": 0.0707, + "step": 11060 + }, + { + "epoch": 0.52, + "learning_rate": 1.8423494740802284e-05, + "loss": 0.1464, + "step": 11065 + }, + { + "epoch": 0.52, + "learning_rate": 1.8422710955747498e-05, + "loss": 0.1703, + "step": 11070 + }, + { + "epoch": 0.52, + "learning_rate": 1.8421927170692712e-05, + "loss": 0.2093, + "step": 11075 + }, + { + "epoch": 0.52, + "learning_rate": 1.8421143385637926e-05, + "loss": 0.1709, + "step": 11080 + }, + { + "epoch": 0.52, + "learning_rate": 1.8420359600583136e-05, + "loss": 0.2491, + "step": 11085 + }, + { + "epoch": 0.52, + "learning_rate": 1.841957581552835e-05, + "loss": 0.3152, + "step": 11090 + }, + { + "epoch": 0.52, + "learning_rate": 1.8418792030473564e-05, + "loss": 0.3752, + "step": 11095 + }, + { + "epoch": 0.52, + "learning_rate": 1.8418008245418778e-05, + "loss": 0.7978, + "step": 11100 + }, + { + "epoch": 0.52, + "learning_rate": 1.8417224460363992e-05, + "loss": 0.2618, + "step": 11105 + }, + { + "epoch": 0.52, + "learning_rate": 1.8416440675309202e-05, + "loss": 0.121, + "step": 11110 + }, + { + "epoch": 0.52, + "learning_rate": 1.841565689025442e-05, + "loss": 0.3329, + "step": 11115 + }, + { + "epoch": 0.52, + "learning_rate": 1.841487310519963e-05, + "loss": 0.1459, + "step": 11120 + }, + { + "epoch": 0.52, + "learning_rate": 1.8414089320144844e-05, + "loss": 0.1842, + "step": 11125 + }, + { + "epoch": 0.52, + "learning_rate": 1.8413305535090058e-05, + "loss": 0.2167, + "step": 11130 + }, + { + "epoch": 0.52, + "learning_rate": 1.8412521750035272e-05, + "loss": 0.2646, + "step": 11135 + }, + { + "epoch": 0.52, + "learning_rate": 1.8411737964980486e-05, + "loss": 0.3365, + "step": 11140 + }, + { + "epoch": 0.52, + "learning_rate": 1.84109541799257e-05, + "loss": 0.356, + "step": 11145 + }, + { + "epoch": 0.52, + "learning_rate": 1.8410170394870914e-05, + "loss": 0.7355, + "step": 11150 + }, + { + "epoch": 0.52, + "learning_rate": 1.8409386609816124e-05, + "loss": 0.2418, + "step": 11155 + }, + { + "epoch": 0.52, + "learning_rate": 1.8408602824761338e-05, + "loss": 0.091, + "step": 11160 + }, + { + "epoch": 0.52, + "learning_rate": 1.8407819039706552e-05, + "loss": 0.1088, + "step": 11165 + }, + { + "epoch": 0.52, + "learning_rate": 1.8407035254651766e-05, + "loss": 0.2117, + "step": 11170 + }, + { + "epoch": 0.52, + "learning_rate": 1.840625146959698e-05, + "loss": 0.1758, + "step": 11175 + }, + { + "epoch": 0.52, + "learning_rate": 1.8405467684542194e-05, + "loss": 0.2617, + "step": 11180 + }, + { + "epoch": 0.52, + "learning_rate": 1.8404683899487404e-05, + "loss": 0.1872, + "step": 11185 + }, + { + "epoch": 0.52, + "learning_rate": 1.840390011443262e-05, + "loss": 0.2953, + "step": 11190 + }, + { + "epoch": 0.52, + "learning_rate": 1.8403116329377832e-05, + "loss": 0.2921, + "step": 11195 + }, + { + "epoch": 0.52, + "learning_rate": 1.8402332544323046e-05, + "loss": 0.6133, + "step": 11200 + }, + { + "epoch": 0.52, + "learning_rate": 1.840154875926826e-05, + "loss": 0.2085, + "step": 11205 + }, + { + "epoch": 0.52, + "learning_rate": 1.8400764974213474e-05, + "loss": 0.1601, + "step": 11210 + }, + { + "epoch": 0.52, + "learning_rate": 1.8399981189158688e-05, + "loss": 0.1244, + "step": 11215 + }, + { + "epoch": 0.52, + "learning_rate": 1.8399197404103898e-05, + "loss": 0.1695, + "step": 11220 + }, + { + "epoch": 0.52, + "learning_rate": 1.8398413619049112e-05, + "loss": 0.1907, + "step": 11225 + }, + { + "epoch": 0.52, + "learning_rate": 1.8397629833994326e-05, + "loss": 0.2046, + "step": 11230 + }, + { + "epoch": 0.52, + "learning_rate": 1.839684604893954e-05, + "loss": 0.2313, + "step": 11235 + }, + { + "epoch": 0.52, + "learning_rate": 1.8396062263884754e-05, + "loss": 0.4132, + "step": 11240 + }, + { + "epoch": 0.52, + "learning_rate": 1.8395278478829968e-05, + "loss": 0.3662, + "step": 11245 + }, + { + "epoch": 0.52, + "learning_rate": 1.839449469377518e-05, + "loss": 0.6663, + "step": 11250 + }, + { + "epoch": 0.53, + "learning_rate": 1.8393710908720396e-05, + "loss": 0.2829, + "step": 11255 + }, + { + "epoch": 0.53, + "learning_rate": 1.8392927123665606e-05, + "loss": 0.0905, + "step": 11260 + }, + { + "epoch": 0.53, + "learning_rate": 1.8392143338610823e-05, + "loss": 0.1383, + "step": 11265 + }, + { + "epoch": 0.53, + "learning_rate": 1.8391359553556034e-05, + "loss": 0.1256, + "step": 11270 + }, + { + "epoch": 0.53, + "learning_rate": 1.8390575768501248e-05, + "loss": 0.1687, + "step": 11275 + }, + { + "epoch": 0.53, + "learning_rate": 1.838979198344646e-05, + "loss": 0.2381, + "step": 11280 + }, + { + "epoch": 0.53, + "learning_rate": 1.8389008198391672e-05, + "loss": 0.2776, + "step": 11285 + }, + { + "epoch": 0.53, + "learning_rate": 1.838822441333689e-05, + "loss": 0.314, + "step": 11290 + }, + { + "epoch": 0.53, + "learning_rate": 1.83874406282821e-05, + "loss": 0.3639, + "step": 11295 + }, + { + "epoch": 0.53, + "learning_rate": 1.8386656843227314e-05, + "loss": 0.5223, + "step": 11300 + }, + { + "epoch": 0.53, + "learning_rate": 1.8385873058172528e-05, + "loss": 0.1783, + "step": 11305 + }, + { + "epoch": 0.53, + "learning_rate": 1.8385089273117742e-05, + "loss": 0.0991, + "step": 11310 + }, + { + "epoch": 0.53, + "learning_rate": 1.8384305488062956e-05, + "loss": 0.2287, + "step": 11315 + }, + { + "epoch": 0.53, + "learning_rate": 1.838352170300817e-05, + "loss": 0.2312, + "step": 11320 + }, + { + "epoch": 0.53, + "learning_rate": 1.838273791795338e-05, + "loss": 0.2481, + "step": 11325 + }, + { + "epoch": 0.53, + "learning_rate": 1.8381954132898597e-05, + "loss": 0.1717, + "step": 11330 + }, + { + "epoch": 0.53, + "learning_rate": 1.8381170347843808e-05, + "loss": 0.2379, + "step": 11335 + }, + { + "epoch": 0.53, + "learning_rate": 1.8380386562789022e-05, + "loss": 0.3034, + "step": 11340 + }, + { + "epoch": 0.53, + "learning_rate": 1.8379602777734236e-05, + "loss": 0.3773, + "step": 11345 + }, + { + "epoch": 0.53, + "learning_rate": 1.837881899267945e-05, + "loss": 0.6601, + "step": 11350 + }, + { + "epoch": 0.53, + "learning_rate": 1.8378035207624663e-05, + "loss": 0.2288, + "step": 11355 + }, + { + "epoch": 0.53, + "learning_rate": 1.8377251422569874e-05, + "loss": 0.1123, + "step": 11360 + }, + { + "epoch": 0.53, + "learning_rate": 1.837646763751509e-05, + "loss": 0.158, + "step": 11365 + }, + { + "epoch": 0.53, + "learning_rate": 1.8375683852460302e-05, + "loss": 0.1854, + "step": 11370 + }, + { + "epoch": 0.53, + "learning_rate": 1.8374900067405516e-05, + "loss": 0.1726, + "step": 11375 + }, + { + "epoch": 0.53, + "learning_rate": 1.837411628235073e-05, + "loss": 0.2843, + "step": 11380 + }, + { + "epoch": 0.53, + "learning_rate": 1.8373332497295944e-05, + "loss": 0.2421, + "step": 11385 + }, + { + "epoch": 0.53, + "learning_rate": 1.8372548712241157e-05, + "loss": 0.3232, + "step": 11390 + }, + { + "epoch": 0.53, + "learning_rate": 1.837176492718637e-05, + "loss": 0.4045, + "step": 11395 + }, + { + "epoch": 0.53, + "learning_rate": 1.8370981142131582e-05, + "loss": 0.5567, + "step": 11400 + }, + { + "epoch": 0.53, + "learning_rate": 1.83701973570768e-05, + "loss": 0.2517, + "step": 11405 + }, + { + "epoch": 0.53, + "learning_rate": 1.836941357202201e-05, + "loss": 0.0982, + "step": 11410 + }, + { + "epoch": 0.53, + "learning_rate": 1.8368629786967224e-05, + "loss": 0.1019, + "step": 11415 + }, + { + "epoch": 0.53, + "learning_rate": 1.8367846001912437e-05, + "loss": 0.1961, + "step": 11420 + }, + { + "epoch": 0.53, + "learning_rate": 1.8367062216857648e-05, + "loss": 0.1333, + "step": 11425 + }, + { + "epoch": 0.53, + "learning_rate": 1.8366278431802865e-05, + "loss": 0.1751, + "step": 11430 + }, + { + "epoch": 0.53, + "learning_rate": 1.8365494646748076e-05, + "loss": 0.2277, + "step": 11435 + }, + { + "epoch": 0.53, + "learning_rate": 1.836471086169329e-05, + "loss": 0.2504, + "step": 11440 + }, + { + "epoch": 0.53, + "learning_rate": 1.8363927076638504e-05, + "loss": 0.4636, + "step": 11445 + }, + { + "epoch": 0.53, + "learning_rate": 1.8363143291583718e-05, + "loss": 0.5295, + "step": 11450 + }, + { + "epoch": 0.53, + "learning_rate": 1.836235950652893e-05, + "loss": 0.2709, + "step": 11455 + }, + { + "epoch": 0.53, + "learning_rate": 1.8361575721474145e-05, + "loss": 0.0808, + "step": 11460 + }, + { + "epoch": 0.53, + "learning_rate": 1.836079193641936e-05, + "loss": 0.1669, + "step": 11465 + }, + { + "epoch": 0.54, + "learning_rate": 1.8360008151364573e-05, + "loss": 0.1414, + "step": 11470 + }, + { + "epoch": 0.54, + "learning_rate": 1.8359224366309784e-05, + "loss": 0.2378, + "step": 11475 + }, + { + "epoch": 0.54, + "learning_rate": 1.8358440581254998e-05, + "loss": 0.2094, + "step": 11480 + }, + { + "epoch": 0.54, + "learning_rate": 1.835765679620021e-05, + "loss": 0.193, + "step": 11485 + }, + { + "epoch": 0.54, + "learning_rate": 1.8356873011145425e-05, + "loss": 0.3476, + "step": 11490 + }, + { + "epoch": 0.54, + "learning_rate": 1.835608922609064e-05, + "loss": 0.416, + "step": 11495 + }, + { + "epoch": 0.54, + "learning_rate": 1.835530544103585e-05, + "loss": 0.5145, + "step": 11500 + }, + { + "epoch": 0.54, + "learning_rate": 1.8354521655981067e-05, + "loss": 0.2958, + "step": 11505 + }, + { + "epoch": 0.54, + "learning_rate": 1.8353737870926278e-05, + "loss": 0.0655, + "step": 11510 + }, + { + "epoch": 0.54, + "learning_rate": 1.835295408587149e-05, + "loss": 0.1104, + "step": 11515 + }, + { + "epoch": 0.54, + "learning_rate": 1.8352170300816705e-05, + "loss": 0.1485, + "step": 11520 + }, + { + "epoch": 0.54, + "learning_rate": 1.835138651576192e-05, + "loss": 0.1616, + "step": 11525 + }, + { + "epoch": 0.54, + "learning_rate": 1.8350602730707133e-05, + "loss": 0.183, + "step": 11530 + }, + { + "epoch": 0.54, + "learning_rate": 1.8349818945652347e-05, + "loss": 0.2659, + "step": 11535 + }, + { + "epoch": 0.54, + "learning_rate": 1.8349035160597558e-05, + "loss": 0.3224, + "step": 11540 + }, + { + "epoch": 0.54, + "learning_rate": 1.834825137554277e-05, + "loss": 0.4451, + "step": 11545 + }, + { + "epoch": 0.54, + "learning_rate": 1.8347467590487985e-05, + "loss": 0.6188, + "step": 11550 + }, + { + "epoch": 0.54, + "learning_rate": 1.83466838054332e-05, + "loss": 0.2427, + "step": 11555 + }, + { + "epoch": 0.54, + "learning_rate": 1.8345900020378413e-05, + "loss": 0.0932, + "step": 11560 + }, + { + "epoch": 0.54, + "learning_rate": 1.8345116235323627e-05, + "loss": 0.201, + "step": 11565 + }, + { + "epoch": 0.54, + "learning_rate": 1.834433245026884e-05, + "loss": 0.1564, + "step": 11570 + }, + { + "epoch": 0.54, + "learning_rate": 1.834354866521405e-05, + "loss": 0.1475, + "step": 11575 + }, + { + "epoch": 0.54, + "learning_rate": 1.834276488015927e-05, + "loss": 0.1801, + "step": 11580 + }, + { + "epoch": 0.54, + "learning_rate": 1.834198109510448e-05, + "loss": 0.2695, + "step": 11585 + }, + { + "epoch": 0.54, + "learning_rate": 1.8341197310049693e-05, + "loss": 0.2264, + "step": 11590 + }, + { + "epoch": 0.54, + "learning_rate": 1.8340413524994907e-05, + "loss": 0.3982, + "step": 11595 + }, + { + "epoch": 0.54, + "learning_rate": 1.833962973994012e-05, + "loss": 0.6555, + "step": 11600 + }, + { + "epoch": 0.54, + "learning_rate": 1.8338845954885335e-05, + "loss": 0.3081, + "step": 11605 + }, + { + "epoch": 0.54, + "learning_rate": 1.8338062169830546e-05, + "loss": 0.1064, + "step": 11610 + }, + { + "epoch": 0.54, + "learning_rate": 1.833727838477576e-05, + "loss": 0.1906, + "step": 11615 + }, + { + "epoch": 0.54, + "learning_rate": 1.8336494599720973e-05, + "loss": 0.1571, + "step": 11620 + }, + { + "epoch": 0.54, + "learning_rate": 1.8335710814666187e-05, + "loss": 0.1314, + "step": 11625 + }, + { + "epoch": 0.54, + "learning_rate": 1.83349270296114e-05, + "loss": 0.233, + "step": 11630 + }, + { + "epoch": 0.54, + "learning_rate": 1.8334143244556615e-05, + "loss": 0.216, + "step": 11635 + }, + { + "epoch": 0.54, + "learning_rate": 1.8333359459501826e-05, + "loss": 0.2412, + "step": 11640 + }, + { + "epoch": 0.54, + "learning_rate": 1.8332575674447043e-05, + "loss": 0.3539, + "step": 11645 + }, + { + "epoch": 0.54, + "learning_rate": 1.8331791889392253e-05, + "loss": 0.3351, + "step": 11650 + }, + { + "epoch": 0.54, + "learning_rate": 1.8331008104337467e-05, + "loss": 0.2767, + "step": 11655 + }, + { + "epoch": 0.54, + "learning_rate": 1.833022431928268e-05, + "loss": 0.1602, + "step": 11660 + }, + { + "epoch": 0.54, + "learning_rate": 1.8329440534227895e-05, + "loss": 0.64, + "step": 11665 + }, + { + "epoch": 0.54, + "learning_rate": 1.8328813506184066e-05, + "loss": 0.128, + "step": 11670 + }, + { + "epoch": 0.54, + "learning_rate": 1.832802972112928e-05, + "loss": 0.1629, + "step": 11675 + }, + { + "epoch": 0.55, + "learning_rate": 1.832724593607449e-05, + "loss": 0.2992, + "step": 11680 + }, + { + "epoch": 0.55, + "learning_rate": 1.8326462151019707e-05, + "loss": 0.2846, + "step": 11685 + }, + { + "epoch": 0.55, + "learning_rate": 1.8325678365964918e-05, + "loss": 0.283, + "step": 11690 + }, + { + "epoch": 0.55, + "learning_rate": 1.8324894580910132e-05, + "loss": 0.5153, + "step": 11695 + }, + { + "epoch": 0.55, + "learning_rate": 1.8324110795855346e-05, + "loss": 0.5733, + "step": 11700 + }, + { + "epoch": 0.55, + "learning_rate": 1.832332701080056e-05, + "loss": 0.2492, + "step": 11705 + }, + { + "epoch": 0.55, + "learning_rate": 1.8322543225745773e-05, + "loss": 0.0945, + "step": 11710 + }, + { + "epoch": 0.55, + "learning_rate": 1.8321759440690987e-05, + "loss": 0.0858, + "step": 11715 + }, + { + "epoch": 0.55, + "learning_rate": 1.8320975655636198e-05, + "loss": 0.1475, + "step": 11720 + }, + { + "epoch": 0.55, + "learning_rate": 1.8320191870581415e-05, + "loss": 0.1625, + "step": 11725 + }, + { + "epoch": 0.55, + "learning_rate": 1.8319408085526626e-05, + "loss": 0.2204, + "step": 11730 + }, + { + "epoch": 0.55, + "learning_rate": 1.831862430047184e-05, + "loss": 0.236, + "step": 11735 + }, + { + "epoch": 0.55, + "learning_rate": 1.8317840515417053e-05, + "loss": 0.32, + "step": 11740 + }, + { + "epoch": 0.55, + "learning_rate": 1.8317056730362267e-05, + "loss": 0.4229, + "step": 11745 + }, + { + "epoch": 0.55, + "learning_rate": 1.831627294530748e-05, + "loss": 0.5182, + "step": 11750 + }, + { + "epoch": 0.55, + "learning_rate": 1.8315489160252692e-05, + "loss": 0.2982, + "step": 11755 + }, + { + "epoch": 0.55, + "learning_rate": 1.831470537519791e-05, + "loss": 0.0763, + "step": 11760 + }, + { + "epoch": 0.55, + "learning_rate": 1.831392159014312e-05, + "loss": 0.0989, + "step": 11765 + }, + { + "epoch": 0.55, + "learning_rate": 1.8313137805088334e-05, + "loss": 0.1841, + "step": 11770 + }, + { + "epoch": 0.55, + "learning_rate": 1.8312354020033547e-05, + "loss": 0.1474, + "step": 11775 + }, + { + "epoch": 0.55, + "learning_rate": 1.831157023497876e-05, + "loss": 0.186, + "step": 11780 + }, + { + "epoch": 0.55, + "learning_rate": 1.8310786449923975e-05, + "loss": 0.2091, + "step": 11785 + }, + { + "epoch": 0.55, + "learning_rate": 1.831000266486919e-05, + "loss": 0.2279, + "step": 11790 + }, + { + "epoch": 0.55, + "learning_rate": 1.83092188798144e-05, + "loss": 0.3051, + "step": 11795 + }, + { + "epoch": 0.55, + "learning_rate": 1.8308435094759617e-05, + "loss": 0.7879, + "step": 11800 + }, + { + "epoch": 0.55, + "learning_rate": 1.8307651309704827e-05, + "loss": 0.2961, + "step": 11805 + }, + { + "epoch": 0.55, + "learning_rate": 1.830686752465004e-05, + "loss": 0.1053, + "step": 11810 + }, + { + "epoch": 0.55, + "learning_rate": 1.8306083739595255e-05, + "loss": 0.1465, + "step": 11815 + }, + { + "epoch": 0.55, + "learning_rate": 1.8305299954540466e-05, + "loss": 0.1971, + "step": 11820 + }, + { + "epoch": 0.55, + "learning_rate": 1.8304516169485683e-05, + "loss": 0.1379, + "step": 11825 + }, + { + "epoch": 0.55, + "learning_rate": 1.8303732384430894e-05, + "loss": 0.1948, + "step": 11830 + }, + { + "epoch": 0.55, + "learning_rate": 1.8302948599376108e-05, + "loss": 0.3947, + "step": 11835 + }, + { + "epoch": 0.55, + "learning_rate": 1.830216481432132e-05, + "loss": 0.3241, + "step": 11840 + }, + { + "epoch": 0.55, + "learning_rate": 1.8301381029266535e-05, + "loss": 0.4234, + "step": 11845 + }, + { + "epoch": 0.55, + "learning_rate": 1.830059724421175e-05, + "loss": 0.7616, + "step": 11850 + }, + { + "epoch": 0.55, + "learning_rate": 1.8299813459156963e-05, + "loss": 0.3111, + "step": 11855 + }, + { + "epoch": 0.55, + "learning_rate": 1.8299029674102177e-05, + "loss": 0.0971, + "step": 11860 + }, + { + "epoch": 0.55, + "learning_rate": 1.829824588904739e-05, + "loss": 0.0742, + "step": 11865 + }, + { + "epoch": 0.55, + "learning_rate": 1.82974621039926e-05, + "loss": 0.1005, + "step": 11870 + }, + { + "epoch": 0.55, + "learning_rate": 1.8296678318937815e-05, + "loss": 0.1713, + "step": 11875 + }, + { + "epoch": 0.55, + "learning_rate": 1.829589453388303e-05, + "loss": 0.2496, + "step": 11880 + }, + { + "epoch": 0.55, + "learning_rate": 1.82952675058392e-05, + "loss": 0.2245, + "step": 11885 + }, + { + "epoch": 0.55, + "learning_rate": 1.8294483720784414e-05, + "loss": 0.3005, + "step": 11890 + }, + { + "epoch": 0.56, + "learning_rate": 1.8293699935729628e-05, + "loss": 0.4579, + "step": 11895 + }, + { + "epoch": 0.56, + "learning_rate": 1.829291615067484e-05, + "loss": 0.7279, + "step": 11900 + }, + { + "epoch": 0.56, + "learning_rate": 1.8292132365620055e-05, + "loss": 0.177, + "step": 11905 + }, + { + "epoch": 0.56, + "learning_rate": 1.8291348580565266e-05, + "loss": 0.1003, + "step": 11910 + }, + { + "epoch": 0.56, + "learning_rate": 1.829056479551048e-05, + "loss": 0.1564, + "step": 11915 + }, + { + "epoch": 0.56, + "learning_rate": 1.8289781010455694e-05, + "loss": 0.233, + "step": 11920 + }, + { + "epoch": 0.56, + "learning_rate": 1.8288997225400908e-05, + "loss": 0.1809, + "step": 11925 + }, + { + "epoch": 0.56, + "learning_rate": 1.828821344034612e-05, + "loss": 0.2469, + "step": 11930 + }, + { + "epoch": 0.56, + "learning_rate": 1.8287429655291332e-05, + "loss": 0.2773, + "step": 11935 + }, + { + "epoch": 0.56, + "learning_rate": 1.828664587023655e-05, + "loss": 0.3309, + "step": 11940 + }, + { + "epoch": 0.56, + "learning_rate": 1.828586208518176e-05, + "loss": 0.25, + "step": 11945 + }, + { + "epoch": 0.56, + "learning_rate": 1.8285078300126974e-05, + "loss": 0.4452, + "step": 11950 + }, + { + "epoch": 0.56, + "learning_rate": 1.8284294515072188e-05, + "loss": 0.2698, + "step": 11955 + }, + { + "epoch": 0.56, + "learning_rate": 1.82835107300174e-05, + "loss": 0.0781, + "step": 11960 + }, + { + "epoch": 0.56, + "learning_rate": 1.8282726944962615e-05, + "loss": 0.1029, + "step": 11965 + }, + { + "epoch": 0.56, + "learning_rate": 1.828194315990783e-05, + "loss": 0.1722, + "step": 11970 + }, + { + "epoch": 0.56, + "learning_rate": 1.828115937485304e-05, + "loss": 0.1502, + "step": 11975 + }, + { + "epoch": 0.56, + "learning_rate": 1.8280375589798257e-05, + "loss": 0.2189, + "step": 11980 + }, + { + "epoch": 0.56, + "learning_rate": 1.8279591804743468e-05, + "loss": 0.1885, + "step": 11985 + }, + { + "epoch": 0.56, + "learning_rate": 1.827880801968868e-05, + "loss": 0.1883, + "step": 11990 + }, + { + "epoch": 0.56, + "learning_rate": 1.8278024234633896e-05, + "loss": 0.3488, + "step": 11995 + }, + { + "epoch": 0.56, + "learning_rate": 1.827724044957911e-05, + "loss": 0.6505, + "step": 12000 + }, + { + "epoch": 0.56, + "learning_rate": 1.8276456664524323e-05, + "loss": 0.2543, + "step": 12005 + }, + { + "epoch": 0.56, + "learning_rate": 1.8275672879469534e-05, + "loss": 0.1051, + "step": 12010 + }, + { + "epoch": 0.56, + "learning_rate": 1.827488909441475e-05, + "loss": 0.106, + "step": 12015 + }, + { + "epoch": 0.56, + "learning_rate": 1.827410530935996e-05, + "loss": 0.1483, + "step": 12020 + }, + { + "epoch": 0.56, + "learning_rate": 1.8273321524305176e-05, + "loss": 0.2022, + "step": 12025 + }, + { + "epoch": 0.56, + "learning_rate": 1.827253773925039e-05, + "loss": 0.2002, + "step": 12030 + }, + { + "epoch": 0.56, + "learning_rate": 1.8271753954195603e-05, + "loss": 0.2563, + "step": 12035 + }, + { + "epoch": 0.56, + "learning_rate": 1.8270970169140817e-05, + "loss": 0.2597, + "step": 12040 + }, + { + "epoch": 0.56, + "learning_rate": 1.827018638408603e-05, + "loss": 0.429, + "step": 12045 + }, + { + "epoch": 0.56, + "learning_rate": 1.8269402599031242e-05, + "loss": 0.5843, + "step": 12050 + }, + { + "epoch": 0.56, + "learning_rate": 1.826861881397646e-05, + "loss": 0.2978, + "step": 12055 + }, + { + "epoch": 0.56, + "learning_rate": 1.826783502892167e-05, + "loss": 0.1374, + "step": 12060 + }, + { + "epoch": 0.56, + "learning_rate": 1.8267051243866883e-05, + "loss": 0.1817, + "step": 12065 + }, + { + "epoch": 0.56, + "learning_rate": 1.8266267458812097e-05, + "loss": 0.0982, + "step": 12070 + }, + { + "epoch": 0.56, + "learning_rate": 1.8265483673757308e-05, + "loss": 0.2185, + "step": 12075 + }, + { + "epoch": 0.56, + "learning_rate": 1.8264699888702525e-05, + "loss": 0.1185, + "step": 12080 + }, + { + "epoch": 0.56, + "learning_rate": 1.8263916103647736e-05, + "loss": 0.2142, + "step": 12085 + }, + { + "epoch": 0.56, + "learning_rate": 1.826313231859295e-05, + "loss": 0.3277, + "step": 12090 + }, + { + "epoch": 0.56, + "learning_rate": 1.8262348533538163e-05, + "loss": 0.4278, + "step": 12095 + }, + { + "epoch": 0.56, + "learning_rate": 1.8261564748483377e-05, + "loss": 0.5751, + "step": 12100 + }, + { + "epoch": 0.56, + "learning_rate": 1.826078096342859e-05, + "loss": 0.3273, + "step": 12105 + }, + { + "epoch": 0.57, + "learning_rate": 1.8259997178373805e-05, + "loss": 0.0808, + "step": 12110 + }, + { + "epoch": 0.57, + "learning_rate": 1.825921339331902e-05, + "loss": 0.1315, + "step": 12115 + }, + { + "epoch": 0.57, + "learning_rate": 1.8258429608264233e-05, + "loss": 0.155, + "step": 12120 + }, + { + "epoch": 0.57, + "learning_rate": 1.8257645823209444e-05, + "loss": 0.1783, + "step": 12125 + }, + { + "epoch": 0.57, + "learning_rate": 1.8256862038154657e-05, + "loss": 0.2643, + "step": 12130 + }, + { + "epoch": 0.57, + "learning_rate": 1.825607825309987e-05, + "loss": 0.2056, + "step": 12135 + }, + { + "epoch": 0.57, + "learning_rate": 1.8255294468045085e-05, + "loss": 0.3616, + "step": 12140 + }, + { + "epoch": 0.57, + "learning_rate": 1.82545106829903e-05, + "loss": 0.4022, + "step": 12145 + }, + { + "epoch": 0.57, + "learning_rate": 1.825372689793551e-05, + "loss": 0.6273, + "step": 12150 + }, + { + "epoch": 0.57, + "learning_rate": 1.8252943112880727e-05, + "loss": 0.2924, + "step": 12155 + }, + { + "epoch": 0.57, + "learning_rate": 1.8252159327825937e-05, + "loss": 0.1846, + "step": 12160 + }, + { + "epoch": 0.57, + "learning_rate": 1.825137554277115e-05, + "loss": 0.1085, + "step": 12165 + }, + { + "epoch": 0.57, + "learning_rate": 1.8250591757716365e-05, + "loss": 0.1888, + "step": 12170 + }, + { + "epoch": 0.57, + "learning_rate": 1.824980797266158e-05, + "loss": 0.1602, + "step": 12175 + }, + { + "epoch": 0.57, + "learning_rate": 1.8249024187606793e-05, + "loss": 0.3114, + "step": 12180 + }, + { + "epoch": 0.57, + "learning_rate": 1.8248240402552007e-05, + "loss": 0.2241, + "step": 12185 + }, + { + "epoch": 0.57, + "learning_rate": 1.8247456617497218e-05, + "loss": 0.3146, + "step": 12190 + }, + { + "epoch": 0.57, + "learning_rate": 1.824667283244243e-05, + "loss": 0.3433, + "step": 12195 + }, + { + "epoch": 0.57, + "learning_rate": 1.8245889047387645e-05, + "loss": 0.3809, + "step": 12200 + }, + { + "epoch": 0.57, + "learning_rate": 1.824510526233286e-05, + "loss": 0.2425, + "step": 12205 + }, + { + "epoch": 0.57, + "learning_rate": 1.8244321477278073e-05, + "loss": 0.0895, + "step": 12210 + }, + { + "epoch": 0.57, + "learning_rate": 1.8243537692223287e-05, + "loss": 0.1157, + "step": 12215 + }, + { + "epoch": 0.57, + "learning_rate": 1.82427539071685e-05, + "loss": 0.1821, + "step": 12220 + }, + { + "epoch": 0.57, + "learning_rate": 1.824197012211371e-05, + "loss": 0.2085, + "step": 12225 + }, + { + "epoch": 0.57, + "learning_rate": 1.8241186337058925e-05, + "loss": 0.2786, + "step": 12230 + }, + { + "epoch": 0.57, + "learning_rate": 1.824040255200414e-05, + "loss": 0.2232, + "step": 12235 + }, + { + "epoch": 0.57, + "learning_rate": 1.8239618766949353e-05, + "loss": 0.1914, + "step": 12240 + }, + { + "epoch": 0.57, + "learning_rate": 1.8238834981894567e-05, + "loss": 0.4057, + "step": 12245 + }, + { + "epoch": 0.57, + "learning_rate": 1.823805119683978e-05, + "loss": 0.5458, + "step": 12250 + }, + { + "epoch": 0.57, + "learning_rate": 1.8237267411784995e-05, + "loss": 0.2761, + "step": 12255 + }, + { + "epoch": 0.57, + "learning_rate": 1.8236483626730205e-05, + "loss": 0.1128, + "step": 12260 + }, + { + "epoch": 0.57, + "learning_rate": 1.823569984167542e-05, + "loss": 0.1394, + "step": 12265 + }, + { + "epoch": 0.57, + "learning_rate": 1.8234916056620633e-05, + "loss": 0.1571, + "step": 12270 + }, + { + "epoch": 0.57, + "learning_rate": 1.8234132271565847e-05, + "loss": 0.2344, + "step": 12275 + }, + { + "epoch": 0.57, + "learning_rate": 1.823334848651106e-05, + "loss": 0.2405, + "step": 12280 + }, + { + "epoch": 0.57, + "learning_rate": 1.8232564701456275e-05, + "loss": 0.2111, + "step": 12285 + }, + { + "epoch": 0.57, + "learning_rate": 1.8231780916401485e-05, + "loss": 0.2919, + "step": 12290 + }, + { + "epoch": 0.57, + "learning_rate": 1.8230997131346703e-05, + "loss": 0.461, + "step": 12295 + }, + { + "epoch": 0.57, + "learning_rate": 1.8230213346291913e-05, + "loss": 0.4065, + "step": 12300 + }, + { + "epoch": 0.57, + "learning_rate": 1.8229429561237127e-05, + "loss": 0.2381, + "step": 12305 + }, + { + "epoch": 0.57, + "learning_rate": 1.822864577618234e-05, + "loss": 0.0971, + "step": 12310 + }, + { + "epoch": 0.57, + "learning_rate": 1.8227861991127555e-05, + "loss": 0.1587, + "step": 12315 + }, + { + "epoch": 0.57, + "learning_rate": 1.822707820607277e-05, + "loss": 0.221, + "step": 12320 + }, + { + "epoch": 0.58, + "learning_rate": 1.822629442101798e-05, + "loss": 0.1664, + "step": 12325 + }, + { + "epoch": 0.58, + "learning_rate": 1.8225510635963197e-05, + "loss": 0.2185, + "step": 12330 + }, + { + "epoch": 0.58, + "learning_rate": 1.8224726850908407e-05, + "loss": 0.2366, + "step": 12335 + }, + { + "epoch": 0.58, + "learning_rate": 1.822394306585362e-05, + "loss": 0.3796, + "step": 12340 + }, + { + "epoch": 0.58, + "learning_rate": 1.8223159280798835e-05, + "loss": 0.3175, + "step": 12345 + }, + { + "epoch": 0.58, + "learning_rate": 1.822237549574405e-05, + "loss": 0.6086, + "step": 12350 + }, + { + "epoch": 0.58, + "learning_rate": 1.8221591710689263e-05, + "loss": 0.2234, + "step": 12355 + }, + { + "epoch": 0.58, + "learning_rate": 1.8220807925634477e-05, + "loss": 0.1107, + "step": 12360 + }, + { + "epoch": 0.58, + "learning_rate": 1.8220024140579687e-05, + "loss": 0.1629, + "step": 12365 + }, + { + "epoch": 0.58, + "learning_rate": 1.8219240355524905e-05, + "loss": 0.1813, + "step": 12370 + }, + { + "epoch": 0.58, + "learning_rate": 1.8218456570470115e-05, + "loss": 0.1278, + "step": 12375 + }, + { + "epoch": 0.58, + "learning_rate": 1.821767278541533e-05, + "loss": 0.2862, + "step": 12380 + }, + { + "epoch": 0.58, + "learning_rate": 1.8216889000360543e-05, + "loss": 0.1883, + "step": 12385 + }, + { + "epoch": 0.58, + "learning_rate": 1.8216105215305753e-05, + "loss": 0.3023, + "step": 12390 + }, + { + "epoch": 0.58, + "learning_rate": 1.821532143025097e-05, + "loss": 0.3543, + "step": 12395 + }, + { + "epoch": 0.58, + "learning_rate": 1.821453764519618e-05, + "loss": 0.6779, + "step": 12400 + }, + { + "epoch": 0.58, + "learning_rate": 1.8213753860141395e-05, + "loss": 0.2855, + "step": 12405 + }, + { + "epoch": 0.58, + "learning_rate": 1.821297007508661e-05, + "loss": 0.1078, + "step": 12410 + }, + { + "epoch": 0.58, + "learning_rate": 1.8212186290031823e-05, + "loss": 0.0658, + "step": 12415 + }, + { + "epoch": 0.58, + "learning_rate": 1.8211402504977037e-05, + "loss": 0.1846, + "step": 12420 + }, + { + "epoch": 0.58, + "learning_rate": 1.821061871992225e-05, + "loss": 0.2117, + "step": 12425 + }, + { + "epoch": 0.58, + "learning_rate": 1.8209834934867465e-05, + "loss": 0.2137, + "step": 12430 + }, + { + "epoch": 0.58, + "learning_rate": 1.820905114981268e-05, + "loss": 0.2952, + "step": 12435 + }, + { + "epoch": 0.58, + "learning_rate": 1.820826736475789e-05, + "loss": 0.3457, + "step": 12440 + }, + { + "epoch": 0.58, + "learning_rate": 1.8207483579703103e-05, + "loss": 0.3901, + "step": 12445 + }, + { + "epoch": 0.58, + "learning_rate": 1.8206699794648317e-05, + "loss": 0.4563, + "step": 12450 + }, + { + "epoch": 0.58, + "learning_rate": 1.820591600959353e-05, + "loss": 0.3036, + "step": 12455 + }, + { + "epoch": 0.58, + "learning_rate": 1.8205132224538745e-05, + "loss": 0.1384, + "step": 12460 + }, + { + "epoch": 0.58, + "learning_rate": 1.8204348439483955e-05, + "loss": 0.1121, + "step": 12465 + }, + { + "epoch": 0.58, + "learning_rate": 1.8203564654429173e-05, + "loss": 0.1626, + "step": 12470 + }, + { + "epoch": 0.58, + "learning_rate": 1.8202780869374383e-05, + "loss": 0.1478, + "step": 12475 + }, + { + "epoch": 0.58, + "learning_rate": 1.8201997084319597e-05, + "loss": 0.2527, + "step": 12480 + }, + { + "epoch": 0.58, + "learning_rate": 1.820121329926481e-05, + "loss": 0.2802, + "step": 12485 + }, + { + "epoch": 0.58, + "learning_rate": 1.8200429514210025e-05, + "loss": 0.3921, + "step": 12490 + }, + { + "epoch": 0.58, + "learning_rate": 1.819964572915524e-05, + "loss": 0.3346, + "step": 12495 + }, + { + "epoch": 0.58, + "learning_rate": 1.8198861944100453e-05, + "loss": 0.5112, + "step": 12500 + }, + { + "epoch": 0.58, + "learning_rate": 1.8198078159045663e-05, + "loss": 0.2538, + "step": 12505 + }, + { + "epoch": 0.58, + "learning_rate": 1.819729437399088e-05, + "loss": 0.0939, + "step": 12510 + }, + { + "epoch": 0.58, + "learning_rate": 1.819651058893609e-05, + "loss": 0.1042, + "step": 12515 + }, + { + "epoch": 0.58, + "learning_rate": 1.8195726803881305e-05, + "loss": 0.1335, + "step": 12520 + }, + { + "epoch": 0.58, + "learning_rate": 1.819494301882652e-05, + "loss": 0.1319, + "step": 12525 + }, + { + "epoch": 0.58, + "learning_rate": 1.8194159233771733e-05, + "loss": 0.1862, + "step": 12530 + }, + { + "epoch": 0.58, + "learning_rate": 1.8193375448716947e-05, + "loss": 0.2162, + "step": 12535 + }, + { + "epoch": 0.59, + "learning_rate": 1.8192591663662157e-05, + "loss": 0.3461, + "step": 12540 + }, + { + "epoch": 0.59, + "learning_rate": 1.819180787860737e-05, + "loss": 0.2864, + "step": 12545 + }, + { + "epoch": 0.59, + "learning_rate": 1.8191024093552585e-05, + "loss": 0.5461, + "step": 12550 + }, + { + "epoch": 0.59, + "learning_rate": 1.81902403084978e-05, + "loss": 0.2668, + "step": 12555 + }, + { + "epoch": 0.59, + "learning_rate": 1.8189456523443013e-05, + "loss": 0.0315, + "step": 12560 + }, + { + "epoch": 0.59, + "learning_rate": 1.8188672738388227e-05, + "loss": 0.0844, + "step": 12565 + }, + { + "epoch": 0.59, + "learning_rate": 1.818788895333344e-05, + "loss": 0.193, + "step": 12570 + }, + { + "epoch": 0.59, + "learning_rate": 1.8187105168278654e-05, + "loss": 0.1365, + "step": 12575 + }, + { + "epoch": 0.59, + "learning_rate": 1.8186321383223865e-05, + "loss": 0.1781, + "step": 12580 + }, + { + "epoch": 0.59, + "learning_rate": 1.8185537598169082e-05, + "loss": 0.331, + "step": 12585 + }, + { + "epoch": 0.59, + "learning_rate": 1.8184753813114293e-05, + "loss": 0.2479, + "step": 12590 + }, + { + "epoch": 0.59, + "learning_rate": 1.8183970028059507e-05, + "loss": 0.3348, + "step": 12595 + }, + { + "epoch": 0.59, + "learning_rate": 1.818318624300472e-05, + "loss": 0.5393, + "step": 12600 + }, + { + "epoch": 0.59, + "learning_rate": 1.818240245794993e-05, + "loss": 0.2076, + "step": 12605 + }, + { + "epoch": 0.59, + "learning_rate": 1.818161867289515e-05, + "loss": 0.0844, + "step": 12610 + }, + { + "epoch": 0.59, + "learning_rate": 1.818083488784036e-05, + "loss": 0.1753, + "step": 12615 + }, + { + "epoch": 0.59, + "learning_rate": 1.8180051102785573e-05, + "loss": 0.1373, + "step": 12620 + }, + { + "epoch": 0.59, + "learning_rate": 1.8179267317730787e-05, + "loss": 0.2813, + "step": 12625 + }, + { + "epoch": 0.59, + "learning_rate": 1.8178483532676e-05, + "loss": 0.2083, + "step": 12630 + }, + { + "epoch": 0.59, + "learning_rate": 1.8177699747621214e-05, + "loss": 0.3167, + "step": 12635 + }, + { + "epoch": 0.59, + "learning_rate": 1.817691596256643e-05, + "loss": 0.2749, + "step": 12640 + }, + { + "epoch": 0.59, + "learning_rate": 1.8176132177511642e-05, + "loss": 0.421, + "step": 12645 + }, + { + "epoch": 0.59, + "learning_rate": 1.8175348392456856e-05, + "loss": 0.6261, + "step": 12650 + }, + { + "epoch": 0.59, + "learning_rate": 1.8174564607402067e-05, + "loss": 0.2091, + "step": 12655 + }, + { + "epoch": 0.59, + "learning_rate": 1.817378082234728e-05, + "loss": 0.1761, + "step": 12660 + }, + { + "epoch": 0.59, + "learning_rate": 1.8172997037292495e-05, + "loss": 0.1845, + "step": 12665 + }, + { + "epoch": 0.59, + "learning_rate": 1.817221325223771e-05, + "loss": 0.1495, + "step": 12670 + }, + { + "epoch": 0.59, + "learning_rate": 1.8171429467182922e-05, + "loss": 0.183, + "step": 12675 + }, + { + "epoch": 0.59, + "learning_rate": 1.8170645682128133e-05, + "loss": 0.1456, + "step": 12680 + }, + { + "epoch": 0.59, + "learning_rate": 1.816986189707335e-05, + "loss": 0.3501, + "step": 12685 + }, + { + "epoch": 0.59, + "learning_rate": 1.816907811201856e-05, + "loss": 0.2666, + "step": 12690 + }, + { + "epoch": 0.59, + "learning_rate": 1.8168294326963775e-05, + "loss": 0.2329, + "step": 12695 + }, + { + "epoch": 0.59, + "learning_rate": 1.816751054190899e-05, + "loss": 0.5528, + "step": 12700 + }, + { + "epoch": 0.59, + "learning_rate": 1.8166726756854202e-05, + "loss": 0.2247, + "step": 12705 + }, + { + "epoch": 0.59, + "learning_rate": 1.8165942971799416e-05, + "loss": 0.1046, + "step": 12710 + }, + { + "epoch": 0.59, + "learning_rate": 1.816515918674463e-05, + "loss": 0.1533, + "step": 12715 + }, + { + "epoch": 0.59, + "learning_rate": 1.816437540168984e-05, + "loss": 0.1581, + "step": 12720 + }, + { + "epoch": 0.59, + "learning_rate": 1.8163591616635055e-05, + "loss": 0.1386, + "step": 12725 + }, + { + "epoch": 0.59, + "learning_rate": 1.816280783158027e-05, + "loss": 0.1616, + "step": 12730 + }, + { + "epoch": 0.59, + "learning_rate": 1.8162024046525482e-05, + "loss": 0.1669, + "step": 12735 + }, + { + "epoch": 0.59, + "learning_rate": 1.8161240261470696e-05, + "loss": 0.2804, + "step": 12740 + }, + { + "epoch": 0.59, + "learning_rate": 1.816045647641591e-05, + "loss": 0.4752, + "step": 12745 + }, + { + "epoch": 0.59, + "learning_rate": 1.8159672691361124e-05, + "loss": 0.6178, + "step": 12750 + }, + { + "epoch": 0.6, + "learning_rate": 1.8158888906306335e-05, + "loss": 0.214, + "step": 12755 + }, + { + "epoch": 0.6, + "learning_rate": 1.815810512125155e-05, + "loss": 0.1284, + "step": 12760 + }, + { + "epoch": 0.6, + "learning_rate": 1.8157321336196762e-05, + "loss": 0.0707, + "step": 12765 + }, + { + "epoch": 0.6, + "learning_rate": 1.8156537551141976e-05, + "loss": 0.0857, + "step": 12770 + }, + { + "epoch": 0.6, + "learning_rate": 1.815575376608719e-05, + "loss": 0.162, + "step": 12775 + }, + { + "epoch": 0.6, + "learning_rate": 1.8154969981032404e-05, + "loss": 0.1753, + "step": 12780 + }, + { + "epoch": 0.6, + "learning_rate": 1.8154186195977618e-05, + "loss": 0.3083, + "step": 12785 + }, + { + "epoch": 0.6, + "learning_rate": 1.815340241092283e-05, + "loss": 0.359, + "step": 12790 + }, + { + "epoch": 0.6, + "learning_rate": 1.8152618625868043e-05, + "loss": 0.3942, + "step": 12795 + }, + { + "epoch": 0.6, + "learning_rate": 1.8151834840813256e-05, + "loss": 0.5581, + "step": 12800 + }, + { + "epoch": 0.6, + "learning_rate": 1.815105105575847e-05, + "loss": 0.275, + "step": 12805 + }, + { + "epoch": 0.6, + "learning_rate": 1.8150267270703684e-05, + "loss": 0.0976, + "step": 12810 + }, + { + "epoch": 0.6, + "learning_rate": 1.8149483485648898e-05, + "loss": 0.1869, + "step": 12815 + }, + { + "epoch": 0.6, + "learning_rate": 1.814869970059411e-05, + "loss": 0.1546, + "step": 12820 + }, + { + "epoch": 0.6, + "learning_rate": 1.8147915915539326e-05, + "loss": 0.1551, + "step": 12825 + }, + { + "epoch": 0.6, + "learning_rate": 1.8147132130484536e-05, + "loss": 0.1323, + "step": 12830 + }, + { + "epoch": 0.6, + "learning_rate": 1.814634834542975e-05, + "loss": 0.199, + "step": 12835 + }, + { + "epoch": 0.6, + "learning_rate": 1.8145564560374964e-05, + "loss": 0.2718, + "step": 12840 + }, + { + "epoch": 0.6, + "learning_rate": 1.8144780775320178e-05, + "loss": 0.358, + "step": 12845 + }, + { + "epoch": 0.6, + "learning_rate": 1.8143996990265392e-05, + "loss": 0.7596, + "step": 12850 + }, + { + "epoch": 0.6, + "learning_rate": 1.8143213205210603e-05, + "loss": 0.1977, + "step": 12855 + }, + { + "epoch": 0.6, + "learning_rate": 1.814242942015582e-05, + "loss": 0.1171, + "step": 12860 + }, + { + "epoch": 0.6, + "learning_rate": 1.814164563510103e-05, + "loss": 0.117, + "step": 12865 + }, + { + "epoch": 0.6, + "learning_rate": 1.8140861850046244e-05, + "loss": 0.1249, + "step": 12870 + }, + { + "epoch": 0.6, + "learning_rate": 1.8140078064991458e-05, + "loss": 0.2719, + "step": 12875 + }, + { + "epoch": 0.6, + "learning_rate": 1.8139294279936672e-05, + "loss": 0.1671, + "step": 12880 + }, + { + "epoch": 0.6, + "learning_rate": 1.8138510494881886e-05, + "loss": 0.2753, + "step": 12885 + }, + { + "epoch": 0.6, + "learning_rate": 1.81377267098271e-05, + "loss": 0.2222, + "step": 12890 + }, + { + "epoch": 0.6, + "learning_rate": 1.813694292477231e-05, + "loss": 0.3789, + "step": 12895 + }, + { + "epoch": 0.6, + "learning_rate": 1.8136159139717528e-05, + "loss": 0.5476, + "step": 12900 + }, + { + "epoch": 0.6, + "learning_rate": 1.8135375354662738e-05, + "loss": 0.2506, + "step": 12905 + }, + { + "epoch": 0.6, + "learning_rate": 1.8134591569607952e-05, + "loss": 0.1047, + "step": 12910 + }, + { + "epoch": 0.6, + "learning_rate": 1.8133807784553166e-05, + "loss": 0.1184, + "step": 12915 + }, + { + "epoch": 0.6, + "learning_rate": 1.8133023999498377e-05, + "loss": 0.1366, + "step": 12920 + }, + { + "epoch": 0.6, + "learning_rate": 1.8132240214443594e-05, + "loss": 0.1905, + "step": 12925 + }, + { + "epoch": 0.6, + "learning_rate": 1.8131456429388804e-05, + "loss": 0.1637, + "step": 12930 + }, + { + "epoch": 0.6, + "learning_rate": 1.813067264433402e-05, + "loss": 0.2118, + "step": 12935 + }, + { + "epoch": 0.6, + "learning_rate": 1.8129888859279232e-05, + "loss": 0.4517, + "step": 12940 + }, + { + "epoch": 0.6, + "learning_rate": 1.8129105074224446e-05, + "loss": 0.3792, + "step": 12945 + }, + { + "epoch": 0.6, + "learning_rate": 1.812832128916966e-05, + "loss": 0.6719, + "step": 12950 + }, + { + "epoch": 0.6, + "learning_rate": 1.8127537504114874e-05, + "loss": 0.1911, + "step": 12955 + }, + { + "epoch": 0.6, + "learning_rate": 1.8126753719060088e-05, + "loss": 0.0936, + "step": 12960 + }, + { + "epoch": 0.6, + "learning_rate": 1.8125969934005302e-05, + "loss": 0.0923, + "step": 12965 + }, + { + "epoch": 0.61, + "learning_rate": 1.8125186148950512e-05, + "loss": 0.184, + "step": 12970 + }, + { + "epoch": 0.61, + "learning_rate": 1.8124402363895726e-05, + "loss": 0.1625, + "step": 12975 + }, + { + "epoch": 0.61, + "learning_rate": 1.812361857884094e-05, + "loss": 0.1753, + "step": 12980 + }, + { + "epoch": 0.61, + "learning_rate": 1.8122834793786154e-05, + "loss": 0.2652, + "step": 12985 + }, + { + "epoch": 0.61, + "learning_rate": 1.8122051008731368e-05, + "loss": 0.2203, + "step": 12990 + }, + { + "epoch": 0.61, + "learning_rate": 1.812126722367658e-05, + "loss": 0.3814, + "step": 12995 + }, + { + "epoch": 0.61, + "learning_rate": 1.8120483438621796e-05, + "loss": 0.613, + "step": 13000 + }, + { + "epoch": 0.61, + "learning_rate": 1.8119699653567006e-05, + "loss": 0.2086, + "step": 13005 + }, + { + "epoch": 0.61, + "learning_rate": 1.811891586851222e-05, + "loss": 0.1071, + "step": 13010 + }, + { + "epoch": 0.61, + "learning_rate": 1.8118132083457434e-05, + "loss": 0.1252, + "step": 13015 + }, + { + "epoch": 0.61, + "learning_rate": 1.8117348298402648e-05, + "loss": 0.1397, + "step": 13020 + }, + { + "epoch": 0.61, + "learning_rate": 1.8116564513347862e-05, + "loss": 0.1292, + "step": 13025 + }, + { + "epoch": 0.61, + "learning_rate": 1.8115780728293076e-05, + "loss": 0.2849, + "step": 13030 + }, + { + "epoch": 0.61, + "learning_rate": 1.8114996943238286e-05, + "loss": 0.1862, + "step": 13035 + }, + { + "epoch": 0.61, + "learning_rate": 1.8114213158183504e-05, + "loss": 0.2616, + "step": 13040 + }, + { + "epoch": 0.61, + "learning_rate": 1.8113429373128714e-05, + "loss": 0.2944, + "step": 13045 + }, + { + "epoch": 0.61, + "learning_rate": 1.8112645588073928e-05, + "loss": 0.4961, + "step": 13050 + }, + { + "epoch": 0.61, + "learning_rate": 1.8111861803019142e-05, + "loss": 0.2485, + "step": 13055 + }, + { + "epoch": 0.61, + "learning_rate": 1.8111078017964356e-05, + "loss": 0.0993, + "step": 13060 + }, + { + "epoch": 0.61, + "learning_rate": 1.811029423290957e-05, + "loss": 0.1454, + "step": 13065 + }, + { + "epoch": 0.61, + "learning_rate": 1.810951044785478e-05, + "loss": 0.1368, + "step": 13070 + }, + { + "epoch": 0.61, + "learning_rate": 1.8108726662799994e-05, + "loss": 0.178, + "step": 13075 + }, + { + "epoch": 0.61, + "learning_rate": 1.8107942877745208e-05, + "loss": 0.2138, + "step": 13080 + }, + { + "epoch": 0.61, + "learning_rate": 1.8107159092690422e-05, + "loss": 0.2884, + "step": 13085 + }, + { + "epoch": 0.61, + "learning_rate": 1.8106375307635636e-05, + "loss": 0.2957, + "step": 13090 + }, + { + "epoch": 0.61, + "learning_rate": 1.810559152258085e-05, + "loss": 0.3439, + "step": 13095 + }, + { + "epoch": 0.61, + "learning_rate": 1.8104807737526064e-05, + "loss": 0.3735, + "step": 13100 + }, + { + "epoch": 0.61, + "learning_rate": 1.8104023952471278e-05, + "loss": 0.2341, + "step": 13105 + }, + { + "epoch": 0.61, + "learning_rate": 1.8103240167416488e-05, + "loss": 0.1029, + "step": 13110 + }, + { + "epoch": 0.61, + "learning_rate": 1.8102456382361702e-05, + "loss": 0.1167, + "step": 13115 + }, + { + "epoch": 0.61, + "learning_rate": 1.8101672597306916e-05, + "loss": 0.1298, + "step": 13120 + }, + { + "epoch": 0.61, + "learning_rate": 1.810088881225213e-05, + "loss": 0.1732, + "step": 13125 + }, + { + "epoch": 0.61, + "learning_rate": 1.8100105027197344e-05, + "loss": 0.2762, + "step": 13130 + }, + { + "epoch": 0.61, + "learning_rate": 1.8099321242142554e-05, + "loss": 0.26, + "step": 13135 + }, + { + "epoch": 0.61, + "learning_rate": 1.809853745708777e-05, + "loss": 0.3426, + "step": 13140 + }, + { + "epoch": 0.61, + "learning_rate": 1.8097753672032982e-05, + "loss": 0.4019, + "step": 13145 + }, + { + "epoch": 0.61, + "learning_rate": 1.8096969886978196e-05, + "loss": 0.7487, + "step": 13150 + }, + { + "epoch": 0.61, + "learning_rate": 1.809618610192341e-05, + "loss": 0.3123, + "step": 13155 + }, + { + "epoch": 0.61, + "learning_rate": 1.8095402316868624e-05, + "loss": 0.0959, + "step": 13160 + }, + { + "epoch": 0.61, + "learning_rate": 1.8094618531813838e-05, + "loss": 0.0824, + "step": 13165 + }, + { + "epoch": 0.61, + "learning_rate": 1.809383474675905e-05, + "loss": 0.2266, + "step": 13170 + }, + { + "epoch": 0.61, + "learning_rate": 1.8093050961704265e-05, + "loss": 0.1764, + "step": 13175 + }, + { + "epoch": 0.61, + "learning_rate": 1.8092267176649476e-05, + "loss": 0.2117, + "step": 13180 + }, + { + "epoch": 0.62, + "learning_rate": 1.809148339159469e-05, + "loss": 0.2613, + "step": 13185 + }, + { + "epoch": 0.62, + "learning_rate": 1.8090699606539904e-05, + "loss": 0.2495, + "step": 13190 + }, + { + "epoch": 0.62, + "learning_rate": 1.8089915821485118e-05, + "loss": 0.3384, + "step": 13195 + }, + { + "epoch": 0.62, + "learning_rate": 1.808913203643033e-05, + "loss": 0.6137, + "step": 13200 + }, + { + "epoch": 0.62, + "learning_rate": 1.8088348251375546e-05, + "loss": 0.2418, + "step": 13205 + }, + { + "epoch": 0.62, + "learning_rate": 1.8087564466320756e-05, + "loss": 0.0482, + "step": 13210 + }, + { + "epoch": 0.62, + "learning_rate": 1.8086780681265973e-05, + "loss": 0.1059, + "step": 13215 + }, + { + "epoch": 0.62, + "learning_rate": 1.8085996896211184e-05, + "loss": 0.1762, + "step": 13220 + }, + { + "epoch": 0.62, + "learning_rate": 1.8085213111156398e-05, + "loss": 0.2271, + "step": 13225 + }, + { + "epoch": 0.62, + "learning_rate": 1.808442932610161e-05, + "loss": 0.2679, + "step": 13230 + }, + { + "epoch": 0.62, + "learning_rate": 1.8083645541046826e-05, + "loss": 0.3361, + "step": 13235 + }, + { + "epoch": 0.62, + "learning_rate": 1.808286175599204e-05, + "loss": 0.3421, + "step": 13240 + }, + { + "epoch": 0.62, + "learning_rate": 1.808207797093725e-05, + "loss": 0.3615, + "step": 13245 + }, + { + "epoch": 0.62, + "learning_rate": 1.8081294185882464e-05, + "loss": 0.6389, + "step": 13250 + }, + { + "epoch": 0.62, + "learning_rate": 1.8080510400827678e-05, + "loss": 0.2581, + "step": 13255 + }, + { + "epoch": 0.62, + "learning_rate": 1.8079726615772892e-05, + "loss": 0.1071, + "step": 13260 + }, + { + "epoch": 0.62, + "learning_rate": 1.8078942830718106e-05, + "loss": 0.1831, + "step": 13265 + }, + { + "epoch": 0.62, + "learning_rate": 1.807815904566332e-05, + "loss": 0.2567, + "step": 13270 + }, + { + "epoch": 0.62, + "learning_rate": 1.8077375260608533e-05, + "loss": 0.1976, + "step": 13275 + }, + { + "epoch": 0.62, + "learning_rate": 1.8076591475553747e-05, + "loss": 0.177, + "step": 13280 + }, + { + "epoch": 0.62, + "learning_rate": 1.8075807690498958e-05, + "loss": 0.3031, + "step": 13285 + }, + { + "epoch": 0.62, + "learning_rate": 1.8075023905444172e-05, + "loss": 0.2594, + "step": 13290 + }, + { + "epoch": 0.62, + "learning_rate": 1.8074240120389386e-05, + "loss": 0.3334, + "step": 13295 + }, + { + "epoch": 0.62, + "learning_rate": 1.80734563353346e-05, + "loss": 0.7191, + "step": 13300 + }, + { + "epoch": 0.62, + "learning_rate": 1.8072672550279813e-05, + "loss": 0.2046, + "step": 13305 + }, + { + "epoch": 0.62, + "learning_rate": 1.8071888765225024e-05, + "loss": 0.0685, + "step": 13310 + }, + { + "epoch": 0.62, + "learning_rate": 1.807110498017024e-05, + "loss": 0.1259, + "step": 13315 + }, + { + "epoch": 0.62, + "learning_rate": 1.8070321195115452e-05, + "loss": 0.2039, + "step": 13320 + }, + { + "epoch": 0.62, + "learning_rate": 1.8069537410060666e-05, + "loss": 0.2184, + "step": 13325 + }, + { + "epoch": 0.62, + "learning_rate": 1.806875362500588e-05, + "loss": 0.1575, + "step": 13330 + }, + { + "epoch": 0.62, + "learning_rate": 1.8067969839951094e-05, + "loss": 0.3521, + "step": 13335 + }, + { + "epoch": 0.62, + "learning_rate": 1.8067186054896307e-05, + "loss": 0.3732, + "step": 13340 + }, + { + "epoch": 0.62, + "learning_rate": 1.806640226984152e-05, + "loss": 0.3423, + "step": 13345 + }, + { + "epoch": 0.62, + "learning_rate": 1.8065618484786732e-05, + "loss": 0.657, + "step": 13350 + }, + { + "epoch": 0.62, + "learning_rate": 1.806483469973195e-05, + "loss": 0.3007, + "step": 13355 + }, + { + "epoch": 0.62, + "learning_rate": 1.806405091467716e-05, + "loss": 0.0585, + "step": 13360 + }, + { + "epoch": 0.62, + "learning_rate": 1.8063267129622374e-05, + "loss": 0.1533, + "step": 13365 + }, + { + "epoch": 0.62, + "learning_rate": 1.8062483344567587e-05, + "loss": 0.2043, + "step": 13370 + }, + { + "epoch": 0.62, + "learning_rate": 1.80616995595128e-05, + "loss": 0.197, + "step": 13375 + }, + { + "epoch": 0.62, + "learning_rate": 1.8060915774458015e-05, + "loss": 0.1869, + "step": 13380 + }, + { + "epoch": 0.62, + "learning_rate": 1.8060131989403226e-05, + "loss": 0.1669, + "step": 13385 + }, + { + "epoch": 0.62, + "learning_rate": 1.805934820434844e-05, + "loss": 0.3543, + "step": 13390 + }, + { + "epoch": 0.63, + "learning_rate": 1.8058564419293654e-05, + "loss": 0.291, + "step": 13395 + }, + { + "epoch": 0.63, + "learning_rate": 1.8057780634238868e-05, + "loss": 0.4537, + "step": 13400 + }, + { + "epoch": 0.63, + "learning_rate": 1.805699684918408e-05, + "loss": 0.2838, + "step": 13405 + }, + { + "epoch": 0.63, + "learning_rate": 1.8056213064129295e-05, + "loss": 0.101, + "step": 13410 + }, + { + "epoch": 0.63, + "learning_rate": 1.805542927907451e-05, + "loss": 0.109, + "step": 13415 + }, + { + "epoch": 0.63, + "learning_rate": 1.8054645494019723e-05, + "loss": 0.13, + "step": 13420 + }, + { + "epoch": 0.63, + "learning_rate": 1.8053861708964934e-05, + "loss": 0.1621, + "step": 13425 + }, + { + "epoch": 0.63, + "learning_rate": 1.805307792391015e-05, + "loss": 0.2074, + "step": 13430 + }, + { + "epoch": 0.63, + "learning_rate": 1.805229413885536e-05, + "loss": 0.2683, + "step": 13435 + }, + { + "epoch": 0.63, + "learning_rate": 1.8051510353800575e-05, + "loss": 0.265, + "step": 13440 + }, + { + "epoch": 0.63, + "learning_rate": 1.805072656874579e-05, + "loss": 0.4351, + "step": 13445 + }, + { + "epoch": 0.63, + "learning_rate": 1.8049942783691e-05, + "loss": 0.7304, + "step": 13450 + }, + { + "epoch": 0.63, + "learning_rate": 1.8049158998636217e-05, + "loss": 0.1502, + "step": 13455 + }, + { + "epoch": 0.63, + "learning_rate": 1.8048375213581428e-05, + "loss": 0.0326, + "step": 13460 + }, + { + "epoch": 0.63, + "learning_rate": 1.804759142852664e-05, + "loss": 0.0823, + "step": 13465 + }, + { + "epoch": 0.63, + "learning_rate": 1.8046807643471855e-05, + "loss": 0.1804, + "step": 13470 + }, + { + "epoch": 0.63, + "learning_rate": 1.804602385841707e-05, + "loss": 0.1101, + "step": 13475 + }, + { + "epoch": 0.63, + "learning_rate": 1.8045240073362283e-05, + "loss": 0.2353, + "step": 13480 + }, + { + "epoch": 0.63, + "learning_rate": 1.8044456288307497e-05, + "loss": 0.2658, + "step": 13485 + }, + { + "epoch": 0.63, + "learning_rate": 1.804367250325271e-05, + "loss": 0.2739, + "step": 13490 + }, + { + "epoch": 0.63, + "learning_rate": 1.8042888718197925e-05, + "loss": 0.3481, + "step": 13495 + }, + { + "epoch": 0.63, + "learning_rate": 1.8042104933143135e-05, + "loss": 0.4943, + "step": 13500 + }, + { + "epoch": 0.63, + "learning_rate": 1.804132114808835e-05, + "loss": 0.2629, + "step": 13505 + }, + { + "epoch": 0.63, + "learning_rate": 1.8040537363033563e-05, + "loss": 0.0884, + "step": 13510 + }, + { + "epoch": 0.63, + "learning_rate": 1.8039753577978777e-05, + "loss": 0.0984, + "step": 13515 + }, + { + "epoch": 0.63, + "learning_rate": 1.803896979292399e-05, + "loss": 0.0861, + "step": 13520 + }, + { + "epoch": 0.63, + "learning_rate": 1.80381860078692e-05, + "loss": 0.1505, + "step": 13525 + }, + { + "epoch": 0.63, + "learning_rate": 1.803740222281442e-05, + "loss": 0.2409, + "step": 13530 + }, + { + "epoch": 0.63, + "learning_rate": 1.803661843775963e-05, + "loss": 0.2881, + "step": 13535 + }, + { + "epoch": 0.63, + "learning_rate": 1.8035834652704843e-05, + "loss": 0.2466, + "step": 13540 + }, + { + "epoch": 0.63, + "learning_rate": 1.8035050867650057e-05, + "loss": 0.3239, + "step": 13545 + }, + { + "epoch": 0.63, + "learning_rate": 1.803426708259527e-05, + "loss": 0.4168, + "step": 13550 + }, + { + "epoch": 0.63, + "learning_rate": 1.8033483297540485e-05, + "loss": 0.2401, + "step": 13555 + }, + { + "epoch": 0.63, + "learning_rate": 1.80326995124857e-05, + "loss": 0.1225, + "step": 13560 + }, + { + "epoch": 0.63, + "learning_rate": 1.803191572743091e-05, + "loss": 0.0936, + "step": 13565 + }, + { + "epoch": 0.63, + "learning_rate": 1.8031131942376123e-05, + "loss": 0.1213, + "step": 13570 + }, + { + "epoch": 0.63, + "learning_rate": 1.8030348157321337e-05, + "loss": 0.2741, + "step": 13575 + }, + { + "epoch": 0.63, + "learning_rate": 1.802956437226655e-05, + "loss": 0.2251, + "step": 13580 + }, + { + "epoch": 0.63, + "learning_rate": 1.8028780587211765e-05, + "loss": 0.2146, + "step": 13585 + }, + { + "epoch": 0.63, + "learning_rate": 1.802799680215698e-05, + "loss": 0.323, + "step": 13590 + }, + { + "epoch": 0.63, + "learning_rate": 1.8027213017102193e-05, + "loss": 0.2774, + "step": 13595 + }, + { + "epoch": 0.63, + "learning_rate": 1.8026429232047403e-05, + "loss": 0.4286, + "step": 13600 + }, + { + "epoch": 0.63, + "learning_rate": 1.8025645446992617e-05, + "loss": 0.2137, + "step": 13605 + }, + { + "epoch": 0.64, + "learning_rate": 1.802486166193783e-05, + "loss": 0.1228, + "step": 13610 + }, + { + "epoch": 0.64, + "learning_rate": 1.8024077876883045e-05, + "loss": 0.1267, + "step": 13615 + }, + { + "epoch": 0.64, + "learning_rate": 1.802329409182826e-05, + "loss": 0.1645, + "step": 13620 + }, + { + "epoch": 0.64, + "learning_rate": 1.8022510306773473e-05, + "loss": 0.2046, + "step": 13625 + }, + { + "epoch": 0.64, + "learning_rate": 1.8021726521718687e-05, + "loss": 0.2009, + "step": 13630 + }, + { + "epoch": 0.64, + "learning_rate": 1.8020942736663897e-05, + "loss": 0.2249, + "step": 13635 + }, + { + "epoch": 0.64, + "learning_rate": 1.802015895160911e-05, + "loss": 0.2867, + "step": 13640 + }, + { + "epoch": 0.64, + "learning_rate": 1.8019375166554325e-05, + "loss": 0.3413, + "step": 13645 + }, + { + "epoch": 0.64, + "learning_rate": 1.801859138149954e-05, + "loss": 0.7257, + "step": 13650 + }, + { + "epoch": 0.64, + "learning_rate": 1.8017807596444753e-05, + "loss": 0.1693, + "step": 13655 + }, + { + "epoch": 0.64, + "learning_rate": 1.8017023811389967e-05, + "loss": 0.0749, + "step": 13660 + }, + { + "epoch": 0.64, + "learning_rate": 1.8016240026335177e-05, + "loss": 0.1104, + "step": 13665 + }, + { + "epoch": 0.64, + "learning_rate": 1.8015456241280395e-05, + "loss": 0.1834, + "step": 13670 + }, + { + "epoch": 0.64, + "learning_rate": 1.8014672456225605e-05, + "loss": 0.1609, + "step": 13675 + }, + { + "epoch": 0.64, + "learning_rate": 1.801388867117082e-05, + "loss": 0.2899, + "step": 13680 + }, + { + "epoch": 0.64, + "learning_rate": 1.8013104886116033e-05, + "loss": 0.1833, + "step": 13685 + }, + { + "epoch": 0.64, + "learning_rate": 1.8012321101061247e-05, + "loss": 0.3229, + "step": 13690 + }, + { + "epoch": 0.64, + "learning_rate": 1.801153731600646e-05, + "loss": 0.34, + "step": 13695 + }, + { + "epoch": 0.64, + "learning_rate": 1.801075353095167e-05, + "loss": 0.7479, + "step": 13700 + }, + { + "epoch": 0.64, + "learning_rate": 1.8009969745896885e-05, + "loss": 0.2743, + "step": 13705 + }, + { + "epoch": 0.64, + "learning_rate": 1.80091859608421e-05, + "loss": 0.0687, + "step": 13710 + }, + { + "epoch": 0.64, + "learning_rate": 1.8008402175787313e-05, + "loss": 0.1244, + "step": 13715 + }, + { + "epoch": 0.64, + "learning_rate": 1.8007618390732527e-05, + "loss": 0.0964, + "step": 13720 + }, + { + "epoch": 0.64, + "learning_rate": 1.800683460567774e-05, + "loss": 0.2067, + "step": 13725 + }, + { + "epoch": 0.64, + "learning_rate": 1.8006050820622955e-05, + "loss": 0.2851, + "step": 13730 + }, + { + "epoch": 0.64, + "learning_rate": 1.800526703556817e-05, + "loss": 0.145, + "step": 13735 + }, + { + "epoch": 0.64, + "learning_rate": 1.800448325051338e-05, + "loss": 0.314, + "step": 13740 + }, + { + "epoch": 0.64, + "learning_rate": 1.8003699465458597e-05, + "loss": 0.3162, + "step": 13745 + }, + { + "epoch": 0.64, + "learning_rate": 1.8002915680403807e-05, + "loss": 0.6037, + "step": 13750 + }, + { + "epoch": 0.64, + "learning_rate": 1.800213189534902e-05, + "loss": 0.2893, + "step": 13755 + }, + { + "epoch": 0.64, + "learning_rate": 1.8001348110294235e-05, + "loss": 0.1066, + "step": 13760 + }, + { + "epoch": 0.64, + "learning_rate": 1.8000564325239445e-05, + "loss": 0.1369, + "step": 13765 + }, + { + "epoch": 0.64, + "learning_rate": 1.7999780540184663e-05, + "loss": 0.119, + "step": 13770 + }, + { + "epoch": 0.64, + "learning_rate": 1.7998996755129873e-05, + "loss": 0.1651, + "step": 13775 + }, + { + "epoch": 0.64, + "learning_rate": 1.7998212970075087e-05, + "loss": 0.1298, + "step": 13780 + }, + { + "epoch": 0.64, + "learning_rate": 1.79974291850203e-05, + "loss": 0.3191, + "step": 13785 + }, + { + "epoch": 0.64, + "learning_rate": 1.7996645399965515e-05, + "loss": 0.2348, + "step": 13790 + }, + { + "epoch": 0.64, + "learning_rate": 1.799586161491073e-05, + "loss": 0.2823, + "step": 13795 + }, + { + "epoch": 0.64, + "learning_rate": 1.7995077829855943e-05, + "loss": 0.7438, + "step": 13800 + }, + { + "epoch": 0.64, + "learning_rate": 1.7994294044801157e-05, + "loss": 0.1944, + "step": 13805 + }, + { + "epoch": 0.64, + "learning_rate": 1.799351025974637e-05, + "loss": 0.0748, + "step": 13810 + }, + { + "epoch": 0.64, + "learning_rate": 1.799272647469158e-05, + "loss": 0.1483, + "step": 13815 + }, + { + "epoch": 0.64, + "learning_rate": 1.7991942689636795e-05, + "loss": 0.1346, + "step": 13820 + }, + { + "epoch": 0.65, + "learning_rate": 1.799115890458201e-05, + "loss": 0.1945, + "step": 13825 + }, + { + "epoch": 0.65, + "learning_rate": 1.7990375119527223e-05, + "loss": 0.2159, + "step": 13830 + }, + { + "epoch": 0.65, + "learning_rate": 1.7989591334472437e-05, + "loss": 0.2135, + "step": 13835 + }, + { + "epoch": 0.65, + "learning_rate": 1.7988807549417647e-05, + "loss": 0.3681, + "step": 13840 + }, + { + "epoch": 0.65, + "learning_rate": 1.7988023764362864e-05, + "loss": 0.3707, + "step": 13845 + }, + { + "epoch": 0.65, + "learning_rate": 1.7987239979308075e-05, + "loss": 0.7777, + "step": 13850 + }, + { + "epoch": 0.65, + "learning_rate": 1.798645619425329e-05, + "loss": 0.322, + "step": 13855 + }, + { + "epoch": 0.65, + "learning_rate": 1.7985672409198503e-05, + "loss": 0.0827, + "step": 13860 + }, + { + "epoch": 0.65, + "learning_rate": 1.7984888624143717e-05, + "loss": 0.1055, + "step": 13865 + }, + { + "epoch": 0.65, + "learning_rate": 1.798410483908893e-05, + "loss": 0.1698, + "step": 13870 + }, + { + "epoch": 0.65, + "learning_rate": 1.7983321054034145e-05, + "loss": 0.2299, + "step": 13875 + }, + { + "epoch": 0.65, + "learning_rate": 1.7982537268979355e-05, + "loss": 0.1941, + "step": 13880 + }, + { + "epoch": 0.65, + "learning_rate": 1.7981753483924572e-05, + "loss": 0.2909, + "step": 13885 + }, + { + "epoch": 0.65, + "learning_rate": 1.7980969698869783e-05, + "loss": 0.3056, + "step": 13890 + }, + { + "epoch": 0.65, + "learning_rate": 1.7980185913814997e-05, + "loss": 0.3902, + "step": 13895 + }, + { + "epoch": 0.65, + "learning_rate": 1.797940212876021e-05, + "loss": 0.6766, + "step": 13900 + }, + { + "epoch": 0.65, + "learning_rate": 1.7978618343705425e-05, + "loss": 0.2698, + "step": 13905 + }, + { + "epoch": 0.65, + "learning_rate": 1.797783455865064e-05, + "loss": 0.1085, + "step": 13910 + }, + { + "epoch": 0.65, + "learning_rate": 1.797705077359585e-05, + "loss": 0.1044, + "step": 13915 + }, + { + "epoch": 0.65, + "learning_rate": 1.7976266988541063e-05, + "loss": 0.1361, + "step": 13920 + }, + { + "epoch": 0.65, + "learning_rate": 1.7975483203486277e-05, + "loss": 0.1876, + "step": 13925 + }, + { + "epoch": 0.65, + "learning_rate": 1.797469941843149e-05, + "loss": 0.1581, + "step": 13930 + }, + { + "epoch": 0.65, + "learning_rate": 1.7973915633376705e-05, + "loss": 0.2839, + "step": 13935 + }, + { + "epoch": 0.65, + "learning_rate": 1.797313184832192e-05, + "loss": 0.1772, + "step": 13940 + }, + { + "epoch": 0.65, + "learning_rate": 1.7972348063267132e-05, + "loss": 0.3157, + "step": 13945 + }, + { + "epoch": 0.65, + "learning_rate": 1.7971564278212346e-05, + "loss": 0.5271, + "step": 13950 + }, + { + "epoch": 0.65, + "learning_rate": 1.7970780493157557e-05, + "loss": 0.2653, + "step": 13955 + }, + { + "epoch": 0.65, + "learning_rate": 1.796999670810277e-05, + "loss": 0.0371, + "step": 13960 + }, + { + "epoch": 0.65, + "learning_rate": 1.7969212923047985e-05, + "loss": 0.077, + "step": 13965 + }, + { + "epoch": 0.65, + "learning_rate": 1.79684291379932e-05, + "loss": 0.1503, + "step": 13970 + }, + { + "epoch": 0.65, + "learning_rate": 1.7967645352938412e-05, + "loss": 0.1859, + "step": 13975 + }, + { + "epoch": 0.65, + "learning_rate": 1.7966861567883623e-05, + "loss": 0.1487, + "step": 13980 + }, + { + "epoch": 0.65, + "learning_rate": 1.796607778282884e-05, + "loss": 0.2365, + "step": 13985 + }, + { + "epoch": 0.65, + "learning_rate": 1.796529399777405e-05, + "loss": 0.2422, + "step": 13990 + }, + { + "epoch": 0.65, + "learning_rate": 1.7964510212719265e-05, + "loss": 0.3292, + "step": 13995 + }, + { + "epoch": 0.65, + "learning_rate": 1.796372642766448e-05, + "loss": 0.6147, + "step": 14000 + }, + { + "epoch": 0.65, + "learning_rate": 1.7962942642609693e-05, + "loss": 0.222, + "step": 14005 + }, + { + "epoch": 0.65, + "learning_rate": 1.7962158857554906e-05, + "loss": 0.1412, + "step": 14010 + }, + { + "epoch": 0.65, + "learning_rate": 1.796137507250012e-05, + "loss": 0.1588, + "step": 14015 + }, + { + "epoch": 0.65, + "learning_rate": 1.7960591287445334e-05, + "loss": 0.1609, + "step": 14020 + }, + { + "epoch": 0.65, + "learning_rate": 1.7959807502390545e-05, + "loss": 0.1297, + "step": 14025 + }, + { + "epoch": 0.65, + "learning_rate": 1.795902371733576e-05, + "loss": 0.1735, + "step": 14030 + }, + { + "epoch": 0.65, + "learning_rate": 1.7958239932280973e-05, + "loss": 0.1816, + "step": 14035 + }, + { + "epoch": 0.66, + "learning_rate": 1.7957456147226186e-05, + "loss": 0.2098, + "step": 14040 + }, + { + "epoch": 0.66, + "learning_rate": 1.79566723621714e-05, + "loss": 0.3348, + "step": 14045 + }, + { + "epoch": 0.66, + "learning_rate": 1.7955888577116614e-05, + "loss": 0.534, + "step": 14050 + }, + { + "epoch": 0.66, + "learning_rate": 1.7955104792061825e-05, + "loss": 0.2183, + "step": 14055 + }, + { + "epoch": 0.66, + "learning_rate": 1.7954321007007042e-05, + "loss": 0.0817, + "step": 14060 + }, + { + "epoch": 0.66, + "learning_rate": 1.7953537221952253e-05, + "loss": 0.0821, + "step": 14065 + }, + { + "epoch": 0.66, + "learning_rate": 1.7952753436897467e-05, + "loss": 0.1166, + "step": 14070 + }, + { + "epoch": 0.66, + "learning_rate": 1.795196965184268e-05, + "loss": 0.2538, + "step": 14075 + }, + { + "epoch": 0.66, + "learning_rate": 1.7951185866787894e-05, + "loss": 0.1809, + "step": 14080 + }, + { + "epoch": 0.66, + "learning_rate": 1.7950402081733108e-05, + "loss": 0.2724, + "step": 14085 + }, + { + "epoch": 0.66, + "learning_rate": 1.794961829667832e-05, + "loss": 0.2887, + "step": 14090 + }, + { + "epoch": 0.66, + "learning_rate": 1.7948834511623533e-05, + "loss": 0.4142, + "step": 14095 + }, + { + "epoch": 0.66, + "learning_rate": 1.7948050726568747e-05, + "loss": 0.6192, + "step": 14100 + }, + { + "epoch": 0.66, + "learning_rate": 1.794726694151396e-05, + "loss": 0.251, + "step": 14105 + }, + { + "epoch": 0.66, + "learning_rate": 1.7946483156459174e-05, + "loss": 0.0909, + "step": 14110 + }, + { + "epoch": 0.66, + "learning_rate": 1.7945699371404388e-05, + "loss": 0.1046, + "step": 14115 + }, + { + "epoch": 0.66, + "learning_rate": 1.7944915586349602e-05, + "loss": 0.0959, + "step": 14120 + }, + { + "epoch": 0.66, + "learning_rate": 1.7944131801294816e-05, + "loss": 0.1656, + "step": 14125 + }, + { + "epoch": 0.66, + "learning_rate": 1.7943348016240027e-05, + "loss": 0.2122, + "step": 14130 + }, + { + "epoch": 0.66, + "learning_rate": 1.794256423118524e-05, + "loss": 0.2179, + "step": 14135 + }, + { + "epoch": 0.66, + "learning_rate": 1.7941780446130454e-05, + "loss": 0.2127, + "step": 14140 + }, + { + "epoch": 0.66, + "learning_rate": 1.794099666107567e-05, + "loss": 0.3221, + "step": 14145 + }, + { + "epoch": 0.66, + "learning_rate": 1.7940212876020882e-05, + "loss": 0.6101, + "step": 14150 + }, + { + "epoch": 0.66, + "learning_rate": 1.7939429090966093e-05, + "loss": 0.294, + "step": 14155 + }, + { + "epoch": 0.66, + "learning_rate": 1.793864530591131e-05, + "loss": 0.1212, + "step": 14160 + }, + { + "epoch": 0.66, + "learning_rate": 1.793786152085652e-05, + "loss": 0.1184, + "step": 14165 + }, + { + "epoch": 0.66, + "learning_rate": 1.7937077735801734e-05, + "loss": 0.1062, + "step": 14170 + }, + { + "epoch": 0.66, + "learning_rate": 1.793629395074695e-05, + "loss": 0.1962, + "step": 14175 + }, + { + "epoch": 0.66, + "learning_rate": 1.7935510165692162e-05, + "loss": 0.1837, + "step": 14180 + }, + { + "epoch": 0.66, + "learning_rate": 1.7934726380637376e-05, + "loss": 0.2212, + "step": 14185 + }, + { + "epoch": 0.66, + "learning_rate": 1.793394259558259e-05, + "loss": 0.3377, + "step": 14190 + }, + { + "epoch": 0.66, + "learning_rate": 1.79331588105278e-05, + "loss": 0.2935, + "step": 14195 + }, + { + "epoch": 0.66, + "learning_rate": 1.7932375025473018e-05, + "loss": 0.4795, + "step": 14200 + }, + { + "epoch": 0.66, + "learning_rate": 1.793159124041823e-05, + "loss": 0.2201, + "step": 14205 + }, + { + "epoch": 0.66, + "learning_rate": 1.7930807455363442e-05, + "loss": 0.134, + "step": 14210 + }, + { + "epoch": 0.66, + "learning_rate": 1.7930023670308656e-05, + "loss": 0.1086, + "step": 14215 + }, + { + "epoch": 0.66, + "learning_rate": 1.792923988525387e-05, + "loss": 0.1584, + "step": 14220 + }, + { + "epoch": 0.66, + "learning_rate": 1.7928456100199084e-05, + "loss": 0.1764, + "step": 14225 + }, + { + "epoch": 0.66, + "learning_rate": 1.7927672315144295e-05, + "loss": 0.1776, + "step": 14230 + }, + { + "epoch": 0.66, + "learning_rate": 1.792688853008951e-05, + "loss": 0.1828, + "step": 14235 + }, + { + "epoch": 0.66, + "learning_rate": 1.7926104745034722e-05, + "loss": 0.2791, + "step": 14240 + }, + { + "epoch": 0.66, + "learning_rate": 1.7925320959979936e-05, + "loss": 0.3181, + "step": 14245 + }, + { + "epoch": 0.66, + "learning_rate": 1.792453717492515e-05, + "loss": 0.8255, + "step": 14250 + }, + { + "epoch": 0.67, + "learning_rate": 1.7923753389870364e-05, + "loss": 0.321, + "step": 14255 + }, + { + "epoch": 0.67, + "learning_rate": 1.7922969604815578e-05, + "loss": 0.0898, + "step": 14260 + }, + { + "epoch": 0.67, + "learning_rate": 1.7922185819760792e-05, + "loss": 0.1708, + "step": 14265 + }, + { + "epoch": 0.67, + "learning_rate": 1.7921402034706002e-05, + "loss": 0.1316, + "step": 14270 + }, + { + "epoch": 0.67, + "learning_rate": 1.792061824965122e-05, + "loss": 0.1778, + "step": 14275 + }, + { + "epoch": 0.67, + "learning_rate": 1.791983446459643e-05, + "loss": 0.168, + "step": 14280 + }, + { + "epoch": 0.67, + "learning_rate": 1.7919050679541644e-05, + "loss": 0.291, + "step": 14285 + }, + { + "epoch": 0.67, + "learning_rate": 1.7918266894486858e-05, + "loss": 0.2596, + "step": 14290 + }, + { + "epoch": 0.67, + "learning_rate": 1.791748310943207e-05, + "loss": 0.2771, + "step": 14295 + }, + { + "epoch": 0.67, + "learning_rate": 1.7916699324377286e-05, + "loss": 0.6026, + "step": 14300 + }, + { + "epoch": 0.67, + "learning_rate": 1.7915915539322496e-05, + "loss": 0.2228, + "step": 14305 + }, + { + "epoch": 0.67, + "learning_rate": 1.791513175426771e-05, + "loss": 0.0702, + "step": 14310 + }, + { + "epoch": 0.67, + "learning_rate": 1.7914347969212924e-05, + "loss": 0.164, + "step": 14315 + }, + { + "epoch": 0.67, + "learning_rate": 1.7913564184158138e-05, + "loss": 0.1234, + "step": 14320 + }, + { + "epoch": 0.67, + "learning_rate": 1.7912780399103352e-05, + "loss": 0.1424, + "step": 14325 + }, + { + "epoch": 0.67, + "learning_rate": 1.7911996614048566e-05, + "loss": 0.1952, + "step": 14330 + }, + { + "epoch": 0.67, + "learning_rate": 1.791121282899378e-05, + "loss": 0.233, + "step": 14335 + }, + { + "epoch": 0.67, + "learning_rate": 1.7910429043938994e-05, + "loss": 0.2717, + "step": 14340 + }, + { + "epoch": 0.67, + "learning_rate": 1.7909645258884204e-05, + "loss": 0.2716, + "step": 14345 + }, + { + "epoch": 0.67, + "learning_rate": 1.7908861473829418e-05, + "loss": 0.5792, + "step": 14350 + }, + { + "epoch": 0.67, + "learning_rate": 1.7908077688774632e-05, + "loss": 0.1712, + "step": 14355 + }, + { + "epoch": 0.67, + "learning_rate": 1.7907293903719846e-05, + "loss": 0.0798, + "step": 14360 + }, + { + "epoch": 0.67, + "learning_rate": 1.790651011866506e-05, + "loss": 0.137, + "step": 14365 + }, + { + "epoch": 0.67, + "learning_rate": 1.790572633361027e-05, + "loss": 0.1375, + "step": 14370 + }, + { + "epoch": 0.67, + "learning_rate": 1.7904942548555488e-05, + "loss": 0.2504, + "step": 14375 + }, + { + "epoch": 0.67, + "learning_rate": 1.7904158763500698e-05, + "loss": 0.2304, + "step": 14380 + }, + { + "epoch": 0.67, + "learning_rate": 1.7903374978445912e-05, + "loss": 0.2548, + "step": 14385 + }, + { + "epoch": 0.67, + "learning_rate": 1.7902591193391126e-05, + "loss": 0.3049, + "step": 14390 + }, + { + "epoch": 0.67, + "learning_rate": 1.790180740833634e-05, + "loss": 0.4073, + "step": 14395 + }, + { + "epoch": 0.67, + "learning_rate": 1.7901023623281554e-05, + "loss": 0.5195, + "step": 14400 + }, + { + "epoch": 0.67, + "learning_rate": 1.7900239838226768e-05, + "loss": 0.267, + "step": 14405 + }, + { + "epoch": 0.67, + "learning_rate": 1.7899456053171978e-05, + "loss": 0.0918, + "step": 14410 + }, + { + "epoch": 0.67, + "learning_rate": 1.7898672268117192e-05, + "loss": 0.1025, + "step": 14415 + }, + { + "epoch": 0.67, + "learning_rate": 1.7897888483062406e-05, + "loss": 0.1415, + "step": 14420 + }, + { + "epoch": 0.67, + "learning_rate": 1.789710469800762e-05, + "loss": 0.1369, + "step": 14425 + }, + { + "epoch": 0.67, + "learning_rate": 1.7896320912952834e-05, + "loss": 0.2144, + "step": 14430 + }, + { + "epoch": 0.67, + "learning_rate": 1.7895537127898048e-05, + "loss": 0.1942, + "step": 14435 + }, + { + "epoch": 0.67, + "learning_rate": 1.789475334284326e-05, + "loss": 0.2313, + "step": 14440 + }, + { + "epoch": 0.67, + "learning_rate": 1.7893969557788472e-05, + "loss": 0.4069, + "step": 14445 + }, + { + "epoch": 0.67, + "learning_rate": 1.7893185772733686e-05, + "loss": 0.6075, + "step": 14450 + }, + { + "epoch": 0.67, + "learning_rate": 1.78924019876789e-05, + "loss": 0.2016, + "step": 14455 + }, + { + "epoch": 0.67, + "learning_rate": 1.7891618202624114e-05, + "loss": 0.0962, + "step": 14460 + }, + { + "epoch": 0.67, + "learning_rate": 1.7890834417569328e-05, + "loss": 0.1401, + "step": 14465 + }, + { + "epoch": 0.68, + "learning_rate": 1.7890050632514542e-05, + "loss": 0.1746, + "step": 14470 + }, + { + "epoch": 0.68, + "learning_rate": 1.7889266847459756e-05, + "loss": 0.2198, + "step": 14475 + }, + { + "epoch": 0.68, + "learning_rate": 1.7888483062404966e-05, + "loss": 0.1788, + "step": 14480 + }, + { + "epoch": 0.68, + "learning_rate": 1.788769927735018e-05, + "loss": 0.3015, + "step": 14485 + }, + { + "epoch": 0.68, + "learning_rate": 1.7886915492295394e-05, + "loss": 0.336, + "step": 14490 + }, + { + "epoch": 0.68, + "learning_rate": 1.7886131707240608e-05, + "loss": 0.399, + "step": 14495 + }, + { + "epoch": 0.68, + "learning_rate": 1.7885347922185822e-05, + "loss": 0.5129, + "step": 14500 + }, + { + "epoch": 0.68, + "learning_rate": 1.7884564137131036e-05, + "loss": 0.2472, + "step": 14505 + }, + { + "epoch": 0.68, + "learning_rate": 1.7883780352076246e-05, + "loss": 0.0997, + "step": 14510 + }, + { + "epoch": 0.68, + "learning_rate": 1.7882996567021463e-05, + "loss": 0.151, + "step": 14515 + }, + { + "epoch": 0.68, + "learning_rate": 1.7882212781966674e-05, + "loss": 0.1149, + "step": 14520 + }, + { + "epoch": 0.68, + "learning_rate": 1.7881428996911888e-05, + "loss": 0.1079, + "step": 14525 + }, + { + "epoch": 0.68, + "learning_rate": 1.7880645211857102e-05, + "loss": 0.1688, + "step": 14530 + }, + { + "epoch": 0.68, + "learning_rate": 1.7879861426802316e-05, + "loss": 0.2422, + "step": 14535 + }, + { + "epoch": 0.68, + "learning_rate": 1.787907764174753e-05, + "loss": 0.256, + "step": 14540 + }, + { + "epoch": 0.68, + "learning_rate": 1.787829385669274e-05, + "loss": 0.2604, + "step": 14545 + }, + { + "epoch": 0.68, + "learning_rate": 1.7877510071637954e-05, + "loss": 0.4361, + "step": 14550 + }, + { + "epoch": 0.68, + "learning_rate": 1.7876726286583168e-05, + "loss": 0.3397, + "step": 14555 + }, + { + "epoch": 0.68, + "learning_rate": 1.7875942501528382e-05, + "loss": 0.0841, + "step": 14560 + }, + { + "epoch": 0.68, + "learning_rate": 1.7875158716473596e-05, + "loss": 0.0836, + "step": 14565 + }, + { + "epoch": 0.68, + "learning_rate": 1.787437493141881e-05, + "loss": 0.1452, + "step": 14570 + }, + { + "epoch": 0.68, + "learning_rate": 1.7873591146364024e-05, + "loss": 0.1594, + "step": 14575 + }, + { + "epoch": 0.68, + "learning_rate": 1.7872807361309237e-05, + "loss": 0.2242, + "step": 14580 + }, + { + "epoch": 0.68, + "learning_rate": 1.7872023576254448e-05, + "loss": 0.2338, + "step": 14585 + }, + { + "epoch": 0.68, + "learning_rate": 1.7871239791199665e-05, + "loss": 0.218, + "step": 14590 + }, + { + "epoch": 0.68, + "learning_rate": 1.7870456006144876e-05, + "loss": 0.4391, + "step": 14595 + }, + { + "epoch": 0.68, + "learning_rate": 1.786967222109009e-05, + "loss": 0.5228, + "step": 14600 + }, + { + "epoch": 0.68, + "learning_rate": 1.7868888436035304e-05, + "loss": 0.2472, + "step": 14605 + }, + { + "epoch": 0.68, + "learning_rate": 1.7868104650980514e-05, + "loss": 0.0867, + "step": 14610 + }, + { + "epoch": 0.68, + "learning_rate": 1.786732086592573e-05, + "loss": 0.1296, + "step": 14615 + }, + { + "epoch": 0.68, + "learning_rate": 1.7866537080870942e-05, + "loss": 0.1691, + "step": 14620 + }, + { + "epoch": 0.68, + "learning_rate": 1.7865753295816156e-05, + "loss": 0.2125, + "step": 14625 + }, + { + "epoch": 0.68, + "learning_rate": 1.786496951076137e-05, + "loss": 0.2022, + "step": 14630 + }, + { + "epoch": 0.68, + "learning_rate": 1.7864185725706584e-05, + "loss": 0.1817, + "step": 14635 + }, + { + "epoch": 0.68, + "learning_rate": 1.7863401940651798e-05, + "loss": 0.3351, + "step": 14640 + }, + { + "epoch": 0.68, + "learning_rate": 1.786261815559701e-05, + "loss": 0.3566, + "step": 14645 + }, + { + "epoch": 0.68, + "learning_rate": 1.7861834370542225e-05, + "loss": 0.6346, + "step": 14650 + }, + { + "epoch": 0.68, + "learning_rate": 1.786105058548744e-05, + "loss": 0.2488, + "step": 14655 + }, + { + "epoch": 0.68, + "learning_rate": 1.786026680043265e-05, + "loss": 0.0688, + "step": 14660 + }, + { + "epoch": 0.68, + "learning_rate": 1.7859483015377864e-05, + "loss": 0.1261, + "step": 14665 + }, + { + "epoch": 0.68, + "learning_rate": 1.7858699230323078e-05, + "loss": 0.1552, + "step": 14670 + }, + { + "epoch": 0.68, + "learning_rate": 1.785791544526829e-05, + "loss": 0.1928, + "step": 14675 + }, + { + "epoch": 0.68, + "learning_rate": 1.7857131660213505e-05, + "loss": 0.1883, + "step": 14680 + }, + { + "epoch": 0.69, + "learning_rate": 1.7856347875158716e-05, + "loss": 0.2545, + "step": 14685 + }, + { + "epoch": 0.69, + "learning_rate": 1.7855564090103933e-05, + "loss": 0.3732, + "step": 14690 + }, + { + "epoch": 0.69, + "learning_rate": 1.7854780305049144e-05, + "loss": 0.3061, + "step": 14695 + }, + { + "epoch": 0.69, + "learning_rate": 1.7853996519994358e-05, + "loss": 0.5844, + "step": 14700 + }, + { + "epoch": 0.69, + "learning_rate": 1.785321273493957e-05, + "loss": 0.2783, + "step": 14705 + }, + { + "epoch": 0.69, + "learning_rate": 1.7852428949884785e-05, + "loss": 0.1046, + "step": 14710 + }, + { + "epoch": 0.69, + "learning_rate": 1.785164516483e-05, + "loss": 0.1335, + "step": 14715 + }, + { + "epoch": 0.69, + "learning_rate": 1.7850861379775213e-05, + "loss": 0.1018, + "step": 14720 + }, + { + "epoch": 0.69, + "learning_rate": 1.7850077594720424e-05, + "loss": 0.1993, + "step": 14725 + }, + { + "epoch": 0.69, + "learning_rate": 1.784929380966564e-05, + "loss": 0.1792, + "step": 14730 + }, + { + "epoch": 0.69, + "learning_rate": 1.784851002461085e-05, + "loss": 0.2488, + "step": 14735 + }, + { + "epoch": 0.69, + "learning_rate": 1.7847726239556066e-05, + "loss": 0.247, + "step": 14740 + }, + { + "epoch": 0.69, + "learning_rate": 1.784694245450128e-05, + "loss": 0.4897, + "step": 14745 + }, + { + "epoch": 0.69, + "learning_rate": 1.7846158669446493e-05, + "loss": 0.4311, + "step": 14750 + }, + { + "epoch": 0.69, + "learning_rate": 1.7845374884391707e-05, + "loss": 0.2626, + "step": 14755 + }, + { + "epoch": 0.69, + "learning_rate": 1.7844591099336918e-05, + "loss": 0.069, + "step": 14760 + }, + { + "epoch": 0.69, + "learning_rate": 1.784380731428213e-05, + "loss": 0.1219, + "step": 14765 + }, + { + "epoch": 0.69, + "learning_rate": 1.7843023529227346e-05, + "loss": 0.1279, + "step": 14770 + }, + { + "epoch": 0.69, + "learning_rate": 1.784223974417256e-05, + "loss": 0.1779, + "step": 14775 + }, + { + "epoch": 0.69, + "learning_rate": 1.7841455959117773e-05, + "loss": 0.2421, + "step": 14780 + }, + { + "epoch": 0.69, + "learning_rate": 1.7840672174062987e-05, + "loss": 0.2912, + "step": 14785 + }, + { + "epoch": 0.69, + "learning_rate": 1.78398883890082e-05, + "loss": 0.2462, + "step": 14790 + }, + { + "epoch": 0.69, + "learning_rate": 1.7839104603953415e-05, + "loss": 0.3095, + "step": 14795 + }, + { + "epoch": 0.69, + "learning_rate": 1.7838320818898626e-05, + "loss": 0.5914, + "step": 14800 + }, + { + "epoch": 0.69, + "learning_rate": 1.783753703384384e-05, + "loss": 0.2444, + "step": 14805 + }, + { + "epoch": 0.69, + "learning_rate": 1.7836753248789053e-05, + "loss": 0.0887, + "step": 14810 + }, + { + "epoch": 0.69, + "learning_rate": 1.7835969463734267e-05, + "loss": 0.1362, + "step": 14815 + }, + { + "epoch": 0.69, + "learning_rate": 1.783518567867948e-05, + "loss": 0.0878, + "step": 14820 + }, + { + "epoch": 0.69, + "learning_rate": 1.7834401893624692e-05, + "loss": 0.1273, + "step": 14825 + }, + { + "epoch": 0.69, + "learning_rate": 1.783361810856991e-05, + "loss": 0.2425, + "step": 14830 + }, + { + "epoch": 0.69, + "learning_rate": 1.783283432351512e-05, + "loss": 0.275, + "step": 14835 + }, + { + "epoch": 0.69, + "learning_rate": 1.7832050538460333e-05, + "loss": 0.2317, + "step": 14840 + }, + { + "epoch": 0.69, + "learning_rate": 1.7831266753405547e-05, + "loss": 0.2663, + "step": 14845 + }, + { + "epoch": 0.69, + "learning_rate": 1.783048296835076e-05, + "loss": 0.2922, + "step": 14850 + }, + { + "epoch": 0.69, + "learning_rate": 1.7829699183295975e-05, + "loss": 0.2981, + "step": 14855 + }, + { + "epoch": 0.69, + "learning_rate": 1.782891539824119e-05, + "loss": 0.0931, + "step": 14860 + }, + { + "epoch": 0.69, + "learning_rate": 1.78281316131864e-05, + "loss": 0.0793, + "step": 14865 + }, + { + "epoch": 0.69, + "learning_rate": 1.7827347828131614e-05, + "loss": 0.1572, + "step": 14870 + }, + { + "epoch": 0.69, + "learning_rate": 1.7826564043076827e-05, + "loss": 0.2228, + "step": 14875 + }, + { + "epoch": 0.69, + "learning_rate": 1.782578025802204e-05, + "loss": 0.1472, + "step": 14880 + }, + { + "epoch": 0.69, + "learning_rate": 1.7824996472967255e-05, + "loss": 0.2983, + "step": 14885 + }, + { + "epoch": 0.69, + "learning_rate": 1.782421268791247e-05, + "loss": 0.3364, + "step": 14890 + }, + { + "epoch": 0.7, + "learning_rate": 1.7823428902857683e-05, + "loss": 0.4755, + "step": 14895 + }, + { + "epoch": 0.7, + "learning_rate": 1.7822645117802894e-05, + "loss": 0.5627, + "step": 14900 + }, + { + "epoch": 0.7, + "learning_rate": 1.782186133274811e-05, + "loss": 0.2746, + "step": 14905 + }, + { + "epoch": 0.7, + "learning_rate": 1.782107754769332e-05, + "loss": 0.0909, + "step": 14910 + }, + { + "epoch": 0.7, + "learning_rate": 1.7820293762638535e-05, + "loss": 0.1021, + "step": 14915 + }, + { + "epoch": 0.7, + "learning_rate": 1.781950997758375e-05, + "loss": 0.1913, + "step": 14920 + }, + { + "epoch": 0.7, + "learning_rate": 1.7818726192528963e-05, + "loss": 0.1024, + "step": 14925 + }, + { + "epoch": 0.7, + "learning_rate": 1.7817942407474177e-05, + "loss": 0.1359, + "step": 14930 + }, + { + "epoch": 0.7, + "learning_rate": 1.7817158622419388e-05, + "loss": 0.2782, + "step": 14935 + }, + { + "epoch": 0.7, + "learning_rate": 1.78163748373646e-05, + "loss": 0.3662, + "step": 14940 + }, + { + "epoch": 0.7, + "learning_rate": 1.7815591052309815e-05, + "loss": 0.3173, + "step": 14945 + }, + { + "epoch": 0.7, + "learning_rate": 1.781480726725503e-05, + "loss": 0.5629, + "step": 14950 + }, + { + "epoch": 0.7, + "learning_rate": 1.7814023482200243e-05, + "loss": 0.3126, + "step": 14955 + }, + { + "epoch": 0.7, + "learning_rate": 1.7813239697145457e-05, + "loss": 0.1257, + "step": 14960 + }, + { + "epoch": 0.7, + "learning_rate": 1.781245591209067e-05, + "loss": 0.102, + "step": 14965 + }, + { + "epoch": 0.7, + "learning_rate": 1.7811672127035885e-05, + "loss": 0.0812, + "step": 14970 + }, + { + "epoch": 0.7, + "learning_rate": 1.7810888341981095e-05, + "loss": 0.1951, + "step": 14975 + }, + { + "epoch": 0.7, + "learning_rate": 1.781010455692631e-05, + "loss": 0.2276, + "step": 14980 + }, + { + "epoch": 0.7, + "learning_rate": 1.7809320771871523e-05, + "loss": 0.2704, + "step": 14985 + }, + { + "epoch": 0.7, + "learning_rate": 1.7808536986816737e-05, + "loss": 0.2971, + "step": 14990 + }, + { + "epoch": 0.7, + "learning_rate": 1.780775320176195e-05, + "loss": 0.2102, + "step": 14995 + }, + { + "epoch": 0.7, + "learning_rate": 1.780696941670716e-05, + "loss": 0.4946, + "step": 15000 + }, + { + "epoch": 0.7, + "learning_rate": 1.780618563165238e-05, + "loss": 0.2092, + "step": 15005 + }, + { + "epoch": 0.7, + "learning_rate": 1.780540184659759e-05, + "loss": 0.0679, + "step": 15010 + }, + { + "epoch": 0.7, + "learning_rate": 1.7804618061542803e-05, + "loss": 0.1363, + "step": 15015 + }, + { + "epoch": 0.7, + "learning_rate": 1.7803834276488017e-05, + "loss": 0.1115, + "step": 15020 + }, + { + "epoch": 0.7, + "learning_rate": 1.780305049143323e-05, + "loss": 0.1363, + "step": 15025 + }, + { + "epoch": 0.7, + "learning_rate": 1.7802266706378445e-05, + "loss": 0.213, + "step": 15030 + }, + { + "epoch": 0.7, + "learning_rate": 1.780148292132366e-05, + "loss": 0.2516, + "step": 15035 + }, + { + "epoch": 0.7, + "learning_rate": 1.780069913626887e-05, + "loss": 0.3133, + "step": 15040 + }, + { + "epoch": 0.7, + "learning_rate": 1.7799915351214087e-05, + "loss": 0.3108, + "step": 15045 + }, + { + "epoch": 0.7, + "learning_rate": 1.7799131566159297e-05, + "loss": 0.4203, + "step": 15050 + }, + { + "epoch": 0.7, + "learning_rate": 1.779834778110451e-05, + "loss": 0.2288, + "step": 15055 + }, + { + "epoch": 0.7, + "learning_rate": 1.7797563996049725e-05, + "loss": 0.0967, + "step": 15060 + }, + { + "epoch": 0.7, + "learning_rate": 1.779678021099494e-05, + "loss": 0.1433, + "step": 15065 + }, + { + "epoch": 0.7, + "learning_rate": 1.7795996425940153e-05, + "loss": 0.1605, + "step": 15070 + }, + { + "epoch": 0.7, + "learning_rate": 1.7795212640885363e-05, + "loss": 0.1502, + "step": 15075 + }, + { + "epoch": 0.7, + "learning_rate": 1.7794428855830577e-05, + "loss": 0.2371, + "step": 15080 + }, + { + "epoch": 0.7, + "learning_rate": 1.779364507077579e-05, + "loss": 0.2546, + "step": 15085 + }, + { + "epoch": 0.7, + "learning_rate": 1.7792861285721005e-05, + "loss": 0.2453, + "step": 15090 + }, + { + "epoch": 0.7, + "learning_rate": 1.779207750066622e-05, + "loss": 0.3223, + "step": 15095 + }, + { + "epoch": 0.7, + "learning_rate": 1.7791293715611433e-05, + "loss": 0.5728, + "step": 15100 + }, + { + "epoch": 0.7, + "learning_rate": 1.7790509930556647e-05, + "loss": 0.1818, + "step": 15105 + }, + { + "epoch": 0.71, + "learning_rate": 1.778972614550186e-05, + "loss": 0.0977, + "step": 15110 + }, + { + "epoch": 0.71, + "learning_rate": 1.778894236044707e-05, + "loss": 0.0862, + "step": 15115 + }, + { + "epoch": 0.71, + "learning_rate": 1.778815857539229e-05, + "loss": 0.1162, + "step": 15120 + }, + { + "epoch": 0.71, + "learning_rate": 1.77873747903375e-05, + "loss": 0.1285, + "step": 15125 + }, + { + "epoch": 0.71, + "learning_rate": 1.7786591005282713e-05, + "loss": 0.1789, + "step": 15130 + }, + { + "epoch": 0.71, + "learning_rate": 1.7785807220227927e-05, + "loss": 0.1586, + "step": 15135 + }, + { + "epoch": 0.71, + "learning_rate": 1.7785023435173137e-05, + "loss": 0.2264, + "step": 15140 + }, + { + "epoch": 0.71, + "learning_rate": 1.7784239650118355e-05, + "loss": 0.3286, + "step": 15145 + }, + { + "epoch": 0.71, + "learning_rate": 1.7783455865063565e-05, + "loss": 0.5867, + "step": 15150 + }, + { + "epoch": 0.71, + "learning_rate": 1.778267208000878e-05, + "loss": 0.2071, + "step": 15155 + }, + { + "epoch": 0.71, + "learning_rate": 1.7781888294953993e-05, + "loss": 0.1373, + "step": 15160 + }, + { + "epoch": 0.71, + "learning_rate": 1.7781104509899207e-05, + "loss": 0.1818, + "step": 15165 + }, + { + "epoch": 0.71, + "learning_rate": 1.778032072484442e-05, + "loss": 0.1301, + "step": 15170 + }, + { + "epoch": 0.71, + "learning_rate": 1.7779536939789635e-05, + "loss": 0.161, + "step": 15175 + }, + { + "epoch": 0.71, + "learning_rate": 1.777875315473485e-05, + "loss": 0.3043, + "step": 15180 + }, + { + "epoch": 0.71, + "learning_rate": 1.7777969369680062e-05, + "loss": 0.3192, + "step": 15185 + }, + { + "epoch": 0.71, + "learning_rate": 1.7777185584625273e-05, + "loss": 0.263, + "step": 15190 + }, + { + "epoch": 0.71, + "learning_rate": 1.7776401799570487e-05, + "loss": 0.2928, + "step": 15195 + }, + { + "epoch": 0.71, + "learning_rate": 1.77756180145157e-05, + "loss": 0.866, + "step": 15200 + }, + { + "epoch": 0.71, + "learning_rate": 1.7774834229460915e-05, + "loss": 0.1849, + "step": 15205 + }, + { + "epoch": 0.71, + "learning_rate": 1.777405044440613e-05, + "loss": 0.0998, + "step": 15210 + }, + { + "epoch": 0.71, + "learning_rate": 1.777326665935134e-05, + "loss": 0.1643, + "step": 15215 + }, + { + "epoch": 0.71, + "learning_rate": 1.7772482874296556e-05, + "loss": 0.2084, + "step": 15220 + }, + { + "epoch": 0.71, + "learning_rate": 1.7771699089241767e-05, + "loss": 0.1575, + "step": 15225 + }, + { + "epoch": 0.71, + "learning_rate": 1.777091530418698e-05, + "loss": 0.132, + "step": 15230 + }, + { + "epoch": 0.71, + "learning_rate": 1.7770131519132195e-05, + "loss": 0.2206, + "step": 15235 + }, + { + "epoch": 0.71, + "learning_rate": 1.776934773407741e-05, + "loss": 0.2715, + "step": 15240 + }, + { + "epoch": 0.71, + "learning_rate": 1.7768563949022623e-05, + "loss": 0.3409, + "step": 15245 + }, + { + "epoch": 0.71, + "learning_rate": 1.7767780163967836e-05, + "loss": 0.5405, + "step": 15250 + }, + { + "epoch": 0.71, + "learning_rate": 1.7766996378913047e-05, + "loss": 0.3159, + "step": 15255 + }, + { + "epoch": 0.71, + "learning_rate": 1.776621259385826e-05, + "loss": 0.1621, + "step": 15260 + }, + { + "epoch": 0.71, + "learning_rate": 1.7765428808803475e-05, + "loss": 0.058, + "step": 15265 + }, + { + "epoch": 0.71, + "learning_rate": 1.776464502374869e-05, + "loss": 0.1118, + "step": 15270 + }, + { + "epoch": 0.71, + "learning_rate": 1.7763861238693903e-05, + "loss": 0.0801, + "step": 15275 + }, + { + "epoch": 0.71, + "learning_rate": 1.7763077453639117e-05, + "loss": 0.1691, + "step": 15280 + }, + { + "epoch": 0.71, + "learning_rate": 1.776229366858433e-05, + "loss": 0.1647, + "step": 15285 + }, + { + "epoch": 0.71, + "learning_rate": 1.776150988352954e-05, + "loss": 0.3036, + "step": 15290 + }, + { + "epoch": 0.71, + "learning_rate": 1.7760726098474755e-05, + "loss": 0.3638, + "step": 15295 + }, + { + "epoch": 0.71, + "learning_rate": 1.775994231341997e-05, + "loss": 0.6663, + "step": 15300 + }, + { + "epoch": 0.71, + "learning_rate": 1.7759158528365183e-05, + "loss": 0.2445, + "step": 15305 + }, + { + "epoch": 0.71, + "learning_rate": 1.7758374743310397e-05, + "loss": 0.0941, + "step": 15310 + }, + { + "epoch": 0.71, + "learning_rate": 1.775759095825561e-05, + "loss": 0.1141, + "step": 15315 + }, + { + "epoch": 0.71, + "learning_rate": 1.7756807173200824e-05, + "loss": 0.1723, + "step": 15320 + }, + { + "epoch": 0.72, + "learning_rate": 1.7756023388146035e-05, + "loss": 0.1063, + "step": 15325 + }, + { + "epoch": 0.72, + "learning_rate": 1.775523960309125e-05, + "loss": 0.2201, + "step": 15330 + }, + { + "epoch": 0.72, + "learning_rate": 1.7754455818036463e-05, + "loss": 0.2288, + "step": 15335 + }, + { + "epoch": 0.72, + "learning_rate": 1.7753672032981677e-05, + "loss": 0.2141, + "step": 15340 + }, + { + "epoch": 0.72, + "learning_rate": 1.775288824792689e-05, + "loss": 0.2616, + "step": 15345 + }, + { + "epoch": 0.72, + "learning_rate": 1.7752104462872104e-05, + "loss": 0.5344, + "step": 15350 + }, + { + "epoch": 0.72, + "learning_rate": 1.7751320677817315e-05, + "loss": 0.2139, + "step": 15355 + }, + { + "epoch": 0.72, + "learning_rate": 1.7750536892762532e-05, + "loss": 0.066, + "step": 15360 + }, + { + "epoch": 0.72, + "learning_rate": 1.7749753107707743e-05, + "loss": 0.1592, + "step": 15365 + }, + { + "epoch": 0.72, + "learning_rate": 1.7748969322652957e-05, + "loss": 0.1448, + "step": 15370 + }, + { + "epoch": 0.72, + "learning_rate": 1.774818553759817e-05, + "loss": 0.171, + "step": 15375 + }, + { + "epoch": 0.72, + "learning_rate": 1.7747401752543384e-05, + "loss": 0.181, + "step": 15380 + }, + { + "epoch": 0.72, + "learning_rate": 1.77466179674886e-05, + "loss": 0.2425, + "step": 15385 + }, + { + "epoch": 0.72, + "learning_rate": 1.774583418243381e-05, + "loss": 0.2223, + "step": 15390 + }, + { + "epoch": 0.72, + "learning_rate": 1.7745050397379023e-05, + "loss": 0.3635, + "step": 15395 + }, + { + "epoch": 0.72, + "learning_rate": 1.7744266612324237e-05, + "loss": 0.542, + "step": 15400 + }, + { + "epoch": 0.72, + "learning_rate": 1.774348282726945e-05, + "loss": 0.2035, + "step": 15405 + }, + { + "epoch": 0.72, + "learning_rate": 1.7742699042214665e-05, + "loss": 0.096, + "step": 15410 + }, + { + "epoch": 0.72, + "learning_rate": 1.774191525715988e-05, + "loss": 0.1612, + "step": 15415 + }, + { + "epoch": 0.72, + "learning_rate": 1.7741131472105092e-05, + "loss": 0.114, + "step": 15420 + }, + { + "epoch": 0.72, + "learning_rate": 1.7740347687050306e-05, + "loss": 0.0943, + "step": 15425 + }, + { + "epoch": 0.72, + "learning_rate": 1.7739563901995517e-05, + "loss": 0.1448, + "step": 15430 + }, + { + "epoch": 0.72, + "learning_rate": 1.7738780116940734e-05, + "loss": 0.2311, + "step": 15435 + }, + { + "epoch": 0.72, + "learning_rate": 1.7737996331885945e-05, + "loss": 0.1838, + "step": 15440 + }, + { + "epoch": 0.72, + "learning_rate": 1.773721254683116e-05, + "loss": 0.3578, + "step": 15445 + }, + { + "epoch": 0.72, + "learning_rate": 1.7736428761776372e-05, + "loss": 0.5445, + "step": 15450 + }, + { + "epoch": 0.72, + "learning_rate": 1.7735644976721583e-05, + "loss": 0.2114, + "step": 15455 + }, + { + "epoch": 0.72, + "learning_rate": 1.77348611916668e-05, + "loss": 0.0826, + "step": 15460 + }, + { + "epoch": 0.72, + "learning_rate": 1.773407740661201e-05, + "loss": 0.0514, + "step": 15465 + }, + { + "epoch": 0.72, + "learning_rate": 1.7733293621557225e-05, + "loss": 0.1493, + "step": 15470 + }, + { + "epoch": 0.72, + "learning_rate": 1.773250983650244e-05, + "loss": 0.1625, + "step": 15475 + }, + { + "epoch": 0.72, + "learning_rate": 1.7731726051447652e-05, + "loss": 0.3053, + "step": 15480 + }, + { + "epoch": 0.72, + "learning_rate": 1.7730942266392866e-05, + "loss": 0.1644, + "step": 15485 + }, + { + "epoch": 0.72, + "learning_rate": 1.773015848133808e-05, + "loss": 0.397, + "step": 15490 + }, + { + "epoch": 0.72, + "learning_rate": 1.7729374696283294e-05, + "loss": 0.5178, + "step": 15495 + }, + { + "epoch": 0.72, + "learning_rate": 1.7728590911228508e-05, + "loss": 0.588, + "step": 15500 + }, + { + "epoch": 0.72, + "learning_rate": 1.772780712617372e-05, + "loss": 0.219, + "step": 15505 + }, + { + "epoch": 0.72, + "learning_rate": 1.7727023341118932e-05, + "loss": 0.1002, + "step": 15510 + }, + { + "epoch": 0.72, + "learning_rate": 1.7726239556064146e-05, + "loss": 0.0972, + "step": 15515 + }, + { + "epoch": 0.72, + "learning_rate": 1.772545577100936e-05, + "loss": 0.1926, + "step": 15520 + }, + { + "epoch": 0.72, + "learning_rate": 1.7724671985954574e-05, + "loss": 0.1175, + "step": 15525 + }, + { + "epoch": 0.72, + "learning_rate": 1.7723888200899785e-05, + "loss": 0.2136, + "step": 15530 + }, + { + "epoch": 0.72, + "learning_rate": 1.7723104415845002e-05, + "loss": 0.3205, + "step": 15535 + }, + { + "epoch": 0.73, + "learning_rate": 1.7722320630790213e-05, + "loss": 0.2608, + "step": 15540 + }, + { + "epoch": 0.73, + "learning_rate": 1.7721536845735426e-05, + "loss": 0.2418, + "step": 15545 + }, + { + "epoch": 0.73, + "learning_rate": 1.772075306068064e-05, + "loss": 0.5699, + "step": 15550 + }, + { + "epoch": 0.73, + "learning_rate": 1.7719969275625854e-05, + "loss": 0.2461, + "step": 15555 + }, + { + "epoch": 0.73, + "learning_rate": 1.7719185490571068e-05, + "loss": 0.0584, + "step": 15560 + }, + { + "epoch": 0.73, + "learning_rate": 1.7718401705516282e-05, + "loss": 0.0991, + "step": 15565 + }, + { + "epoch": 0.73, + "learning_rate": 1.7717617920461493e-05, + "loss": 0.1333, + "step": 15570 + }, + { + "epoch": 0.73, + "learning_rate": 1.771683413540671e-05, + "loss": 0.1246, + "step": 15575 + }, + { + "epoch": 0.73, + "learning_rate": 1.771605035035192e-05, + "loss": 0.1544, + "step": 15580 + }, + { + "epoch": 0.73, + "learning_rate": 1.7715266565297134e-05, + "loss": 0.2113, + "step": 15585 + }, + { + "epoch": 0.73, + "learning_rate": 1.7714482780242348e-05, + "loss": 0.2301, + "step": 15590 + }, + { + "epoch": 0.73, + "learning_rate": 1.7713698995187562e-05, + "loss": 0.2443, + "step": 15595 + }, + { + "epoch": 0.73, + "learning_rate": 1.7712915210132776e-05, + "loss": 0.5969, + "step": 15600 + }, + { + "epoch": 0.73, + "learning_rate": 1.7712131425077986e-05, + "loss": 0.2402, + "step": 15605 + }, + { + "epoch": 0.73, + "learning_rate": 1.77113476400232e-05, + "loss": 0.1322, + "step": 15610 + }, + { + "epoch": 0.73, + "learning_rate": 1.7710563854968414e-05, + "loss": 0.1391, + "step": 15615 + }, + { + "epoch": 0.73, + "learning_rate": 1.7709780069913628e-05, + "loss": 0.1207, + "step": 15620 + }, + { + "epoch": 0.73, + "learning_rate": 1.7708996284858842e-05, + "loss": 0.2047, + "step": 15625 + }, + { + "epoch": 0.73, + "learning_rate": 1.7708212499804056e-05, + "loss": 0.2101, + "step": 15630 + }, + { + "epoch": 0.73, + "learning_rate": 1.770742871474927e-05, + "loss": 0.2683, + "step": 15635 + }, + { + "epoch": 0.73, + "learning_rate": 1.7706644929694484e-05, + "loss": 0.2237, + "step": 15640 + }, + { + "epoch": 0.73, + "learning_rate": 1.7705861144639694e-05, + "loss": 0.3521, + "step": 15645 + }, + { + "epoch": 0.73, + "learning_rate": 1.7705077359584908e-05, + "loss": 0.5843, + "step": 15650 + }, + { + "epoch": 0.73, + "learning_rate": 1.7704293574530122e-05, + "loss": 0.1719, + "step": 15655 + }, + { + "epoch": 0.73, + "learning_rate": 1.7703509789475336e-05, + "loss": 0.1001, + "step": 15660 + }, + { + "epoch": 0.73, + "learning_rate": 1.770272600442055e-05, + "loss": 0.0878, + "step": 15665 + }, + { + "epoch": 0.73, + "learning_rate": 1.770194221936576e-05, + "loss": 0.1989, + "step": 15670 + }, + { + "epoch": 0.73, + "learning_rate": 1.7701158434310978e-05, + "loss": 0.1543, + "step": 15675 + }, + { + "epoch": 0.73, + "learning_rate": 1.770037464925619e-05, + "loss": 0.1588, + "step": 15680 + }, + { + "epoch": 0.73, + "learning_rate": 1.7699590864201402e-05, + "loss": 0.3029, + "step": 15685 + }, + { + "epoch": 0.73, + "learning_rate": 1.7698807079146616e-05, + "loss": 0.2396, + "step": 15690 + }, + { + "epoch": 0.73, + "learning_rate": 1.769802329409183e-05, + "loss": 0.357, + "step": 15695 + }, + { + "epoch": 0.73, + "learning_rate": 1.7697239509037044e-05, + "loss": 0.6507, + "step": 15700 + }, + { + "epoch": 0.73, + "learning_rate": 1.7696455723982258e-05, + "loss": 0.2289, + "step": 15705 + }, + { + "epoch": 0.73, + "learning_rate": 1.769567193892747e-05, + "loss": 0.0816, + "step": 15710 + }, + { + "epoch": 0.73, + "learning_rate": 1.7694888153872682e-05, + "loss": 0.0891, + "step": 15715 + }, + { + "epoch": 0.73, + "learning_rate": 1.7694104368817896e-05, + "loss": 0.1736, + "step": 15720 + }, + { + "epoch": 0.73, + "learning_rate": 1.769332058376311e-05, + "loss": 0.2008, + "step": 15725 + }, + { + "epoch": 0.73, + "learning_rate": 1.7692536798708324e-05, + "loss": 0.199, + "step": 15730 + }, + { + "epoch": 0.73, + "learning_rate": 1.7691753013653538e-05, + "loss": 0.255, + "step": 15735 + }, + { + "epoch": 0.73, + "learning_rate": 1.7690969228598752e-05, + "loss": 0.2359, + "step": 15740 + }, + { + "epoch": 0.73, + "learning_rate": 1.7690185443543962e-05, + "loss": 0.3103, + "step": 15745 + }, + { + "epoch": 0.73, + "learning_rate": 1.768940165848918e-05, + "loss": 0.5922, + "step": 15750 + }, + { + "epoch": 0.74, + "learning_rate": 1.768861787343439e-05, + "loss": 0.2138, + "step": 15755 + }, + { + "epoch": 0.74, + "learning_rate": 1.7687834088379604e-05, + "loss": 0.1087, + "step": 15760 + }, + { + "epoch": 0.74, + "learning_rate": 1.7687050303324818e-05, + "loss": 0.0963, + "step": 15765 + }, + { + "epoch": 0.74, + "learning_rate": 1.7686266518270032e-05, + "loss": 0.0777, + "step": 15770 + }, + { + "epoch": 0.74, + "learning_rate": 1.7685482733215246e-05, + "loss": 0.1607, + "step": 15775 + }, + { + "epoch": 0.74, + "learning_rate": 1.7684698948160456e-05, + "loss": 0.1585, + "step": 15780 + }, + { + "epoch": 0.74, + "learning_rate": 1.768391516310567e-05, + "loss": 0.2127, + "step": 15785 + }, + { + "epoch": 0.74, + "learning_rate": 1.7683131378050884e-05, + "loss": 0.2459, + "step": 15790 + }, + { + "epoch": 0.74, + "learning_rate": 1.7682347592996098e-05, + "loss": 0.3235, + "step": 15795 + }, + { + "epoch": 0.74, + "learning_rate": 1.7681563807941312e-05, + "loss": 0.5031, + "step": 15800 + }, + { + "epoch": 0.74, + "learning_rate": 1.7680780022886526e-05, + "loss": 0.2883, + "step": 15805 + }, + { + "epoch": 0.74, + "learning_rate": 1.767999623783174e-05, + "loss": 0.1161, + "step": 15810 + }, + { + "epoch": 0.74, + "learning_rate": 1.7679212452776954e-05, + "loss": 0.117, + "step": 15815 + }, + { + "epoch": 0.74, + "learning_rate": 1.7678428667722164e-05, + "loss": 0.1198, + "step": 15820 + }, + { + "epoch": 0.74, + "learning_rate": 1.7677644882667378e-05, + "loss": 0.1703, + "step": 15825 + }, + { + "epoch": 0.74, + "learning_rate": 1.7676861097612592e-05, + "loss": 0.204, + "step": 15830 + }, + { + "epoch": 0.74, + "learning_rate": 1.7676077312557806e-05, + "loss": 0.1478, + "step": 15835 + }, + { + "epoch": 0.74, + "learning_rate": 1.767529352750302e-05, + "loss": 0.2569, + "step": 15840 + }, + { + "epoch": 0.74, + "learning_rate": 1.767450974244823e-05, + "loss": 0.2254, + "step": 15845 + }, + { + "epoch": 0.74, + "learning_rate": 1.7673725957393448e-05, + "loss": 0.5472, + "step": 15850 + }, + { + "epoch": 0.74, + "learning_rate": 1.7672942172338658e-05, + "loss": 0.2518, + "step": 15855 + }, + { + "epoch": 0.74, + "learning_rate": 1.7672158387283872e-05, + "loss": 0.0926, + "step": 15860 + }, + { + "epoch": 0.74, + "learning_rate": 1.7671374602229086e-05, + "loss": 0.1318, + "step": 15865 + }, + { + "epoch": 0.74, + "learning_rate": 1.76705908171743e-05, + "loss": 0.1401, + "step": 15870 + }, + { + "epoch": 0.74, + "learning_rate": 1.7669807032119514e-05, + "loss": 0.1579, + "step": 15875 + }, + { + "epoch": 0.74, + "learning_rate": 1.7669023247064728e-05, + "loss": 0.139, + "step": 15880 + }, + { + "epoch": 0.74, + "learning_rate": 1.7668239462009938e-05, + "loss": 0.17, + "step": 15885 + }, + { + "epoch": 0.74, + "learning_rate": 1.7667455676955155e-05, + "loss": 0.2695, + "step": 15890 + }, + { + "epoch": 0.74, + "learning_rate": 1.7666671891900366e-05, + "loss": 0.2543, + "step": 15895 + }, + { + "epoch": 0.74, + "learning_rate": 1.766588810684558e-05, + "loss": 0.4279, + "step": 15900 + }, + { + "epoch": 0.74, + "learning_rate": 1.7665104321790794e-05, + "loss": 0.2844, + "step": 15905 + }, + { + "epoch": 0.74, + "learning_rate": 1.7664320536736008e-05, + "loss": 0.1015, + "step": 15910 + }, + { + "epoch": 0.74, + "learning_rate": 1.766353675168122e-05, + "loss": 0.1068, + "step": 15915 + }, + { + "epoch": 0.74, + "learning_rate": 1.7662752966626432e-05, + "loss": 0.1115, + "step": 15920 + }, + { + "epoch": 0.74, + "learning_rate": 1.7661969181571646e-05, + "loss": 0.1674, + "step": 15925 + }, + { + "epoch": 0.74, + "learning_rate": 1.766118539651686e-05, + "loss": 0.1759, + "step": 15930 + }, + { + "epoch": 0.74, + "learning_rate": 1.7660401611462074e-05, + "loss": 0.2252, + "step": 15935 + }, + { + "epoch": 0.74, + "learning_rate": 1.7659617826407288e-05, + "loss": 0.2375, + "step": 15940 + }, + { + "epoch": 0.74, + "learning_rate": 1.76588340413525e-05, + "loss": 0.3769, + "step": 15945 + }, + { + "epoch": 0.74, + "learning_rate": 1.7658050256297716e-05, + "loss": 0.4495, + "step": 15950 + }, + { + "epoch": 0.74, + "learning_rate": 1.765726647124293e-05, + "loss": 0.1948, + "step": 15955 + }, + { + "epoch": 0.74, + "learning_rate": 1.765648268618814e-05, + "loss": 0.0774, + "step": 15960 + }, + { + "epoch": 0.74, + "learning_rate": 1.7655698901133357e-05, + "loss": 0.0989, + "step": 15965 + }, + { + "epoch": 0.75, + "learning_rate": 1.7654915116078568e-05, + "loss": 0.0939, + "step": 15970 + }, + { + "epoch": 0.75, + "learning_rate": 1.765413133102378e-05, + "loss": 0.2203, + "step": 15975 + }, + { + "epoch": 0.75, + "learning_rate": 1.7653347545968996e-05, + "loss": 0.2015, + "step": 15980 + }, + { + "epoch": 0.75, + "learning_rate": 1.7652563760914206e-05, + "loss": 0.193, + "step": 15985 + }, + { + "epoch": 0.75, + "learning_rate": 1.7651779975859423e-05, + "loss": 0.2686, + "step": 15990 + }, + { + "epoch": 0.75, + "learning_rate": 1.7650996190804634e-05, + "loss": 0.213, + "step": 15995 + }, + { + "epoch": 0.75, + "learning_rate": 1.7650212405749848e-05, + "loss": 0.476, + "step": 16000 + }, + { + "epoch": 0.75, + "learning_rate": 1.764942862069506e-05, + "loss": 0.2164, + "step": 16005 + }, + { + "epoch": 0.75, + "learning_rate": 1.7648644835640276e-05, + "loss": 0.0558, + "step": 16010 + }, + { + "epoch": 0.75, + "learning_rate": 1.764786105058549e-05, + "loss": 0.1303, + "step": 16015 + }, + { + "epoch": 0.75, + "learning_rate": 1.7647077265530703e-05, + "loss": 0.1231, + "step": 16020 + }, + { + "epoch": 0.75, + "learning_rate": 1.7646293480475914e-05, + "loss": 0.1846, + "step": 16025 + }, + { + "epoch": 0.75, + "learning_rate": 1.764550969542113e-05, + "loss": 0.1873, + "step": 16030 + }, + { + "epoch": 0.75, + "learning_rate": 1.7644725910366342e-05, + "loss": 0.1345, + "step": 16035 + }, + { + "epoch": 0.75, + "learning_rate": 1.7643942125311556e-05, + "loss": 0.2916, + "step": 16040 + }, + { + "epoch": 0.75, + "learning_rate": 1.764315834025677e-05, + "loss": 0.3561, + "step": 16045 + }, + { + "epoch": 0.75, + "learning_rate": 1.7642374555201983e-05, + "loss": 0.5833, + "step": 16050 + }, + { + "epoch": 0.75, + "learning_rate": 1.7641590770147197e-05, + "loss": 0.2464, + "step": 16055 + }, + { + "epoch": 0.75, + "learning_rate": 1.7640806985092408e-05, + "loss": 0.0829, + "step": 16060 + }, + { + "epoch": 0.75, + "learning_rate": 1.7640023200037625e-05, + "loss": 0.1179, + "step": 16065 + }, + { + "epoch": 0.75, + "learning_rate": 1.7639239414982836e-05, + "loss": 0.1629, + "step": 16070 + }, + { + "epoch": 0.75, + "learning_rate": 1.763845562992805e-05, + "loss": 0.1615, + "step": 16075 + }, + { + "epoch": 0.75, + "learning_rate": 1.7637671844873264e-05, + "loss": 0.2043, + "step": 16080 + }, + { + "epoch": 0.75, + "learning_rate": 1.7636888059818477e-05, + "loss": 0.2283, + "step": 16085 + }, + { + "epoch": 0.75, + "learning_rate": 1.763610427476369e-05, + "loss": 0.306, + "step": 16090 + }, + { + "epoch": 0.75, + "learning_rate": 1.7635320489708905e-05, + "loss": 0.4119, + "step": 16095 + }, + { + "epoch": 0.75, + "learning_rate": 1.7634536704654116e-05, + "loss": 0.4373, + "step": 16100 + }, + { + "epoch": 0.75, + "learning_rate": 1.763375291959933e-05, + "loss": 0.1972, + "step": 16105 + }, + { + "epoch": 0.75, + "learning_rate": 1.7632969134544544e-05, + "loss": 0.0756, + "step": 16110 + }, + { + "epoch": 0.75, + "learning_rate": 1.7632185349489757e-05, + "loss": 0.1467, + "step": 16115 + }, + { + "epoch": 0.75, + "learning_rate": 1.763140156443497e-05, + "loss": 0.1195, + "step": 16120 + }, + { + "epoch": 0.75, + "learning_rate": 1.7630617779380185e-05, + "loss": 0.1707, + "step": 16125 + }, + { + "epoch": 0.75, + "learning_rate": 1.76298339943254e-05, + "loss": 0.2611, + "step": 16130 + }, + { + "epoch": 0.75, + "learning_rate": 1.762905020927061e-05, + "loss": 0.2035, + "step": 16135 + }, + { + "epoch": 0.75, + "learning_rate": 1.7628266424215824e-05, + "loss": 0.2931, + "step": 16140 + }, + { + "epoch": 0.75, + "learning_rate": 1.7627482639161038e-05, + "loss": 0.3166, + "step": 16145 + }, + { + "epoch": 0.75, + "learning_rate": 1.762669885410625e-05, + "loss": 0.4894, + "step": 16150 + }, + { + "epoch": 0.75, + "learning_rate": 1.7625915069051465e-05, + "loss": 0.2469, + "step": 16155 + }, + { + "epoch": 0.75, + "learning_rate": 1.762513128399668e-05, + "loss": 0.091, + "step": 16160 + }, + { + "epoch": 0.75, + "learning_rate": 1.7624347498941893e-05, + "loss": 0.1252, + "step": 16165 + }, + { + "epoch": 0.75, + "learning_rate": 1.7623563713887104e-05, + "loss": 0.1507, + "step": 16170 + }, + { + "epoch": 0.75, + "learning_rate": 1.7622779928832318e-05, + "loss": 0.0806, + "step": 16175 + }, + { + "epoch": 0.75, + "learning_rate": 1.762199614377753e-05, + "loss": 0.1723, + "step": 16180 + }, + { + "epoch": 0.76, + "learning_rate": 1.7621212358722745e-05, + "loss": 0.1738, + "step": 16185 + }, + { + "epoch": 0.76, + "learning_rate": 1.762042857366796e-05, + "loss": 0.2276, + "step": 16190 + }, + { + "epoch": 0.76, + "learning_rate": 1.7619644788613173e-05, + "loss": 0.3772, + "step": 16195 + }, + { + "epoch": 0.76, + "learning_rate": 1.7618861003558384e-05, + "loss": 0.5853, + "step": 16200 + }, + { + "epoch": 0.76, + "learning_rate": 1.76180772185036e-05, + "loss": 0.2295, + "step": 16205 + }, + { + "epoch": 0.76, + "learning_rate": 1.761729343344881e-05, + "loss": 0.0793, + "step": 16210 + }, + { + "epoch": 0.76, + "learning_rate": 1.7616509648394025e-05, + "loss": 0.1744, + "step": 16215 + }, + { + "epoch": 0.76, + "learning_rate": 1.761572586333924e-05, + "loss": 0.1258, + "step": 16220 + }, + { + "epoch": 0.76, + "learning_rate": 1.7614942078284453e-05, + "loss": 0.1278, + "step": 16225 + }, + { + "epoch": 0.76, + "learning_rate": 1.7614158293229667e-05, + "loss": 0.1408, + "step": 16230 + }, + { + "epoch": 0.76, + "learning_rate": 1.7613374508174878e-05, + "loss": 0.176, + "step": 16235 + }, + { + "epoch": 0.76, + "learning_rate": 1.761259072312009e-05, + "loss": 0.2184, + "step": 16240 + }, + { + "epoch": 0.76, + "learning_rate": 1.7611806938065305e-05, + "loss": 0.4551, + "step": 16245 + }, + { + "epoch": 0.76, + "learning_rate": 1.761102315301052e-05, + "loss": 0.4999, + "step": 16250 + }, + { + "epoch": 0.76, + "learning_rate": 1.7610239367955733e-05, + "loss": 0.1799, + "step": 16255 + }, + { + "epoch": 0.76, + "learning_rate": 1.7609455582900947e-05, + "loss": 0.0511, + "step": 16260 + }, + { + "epoch": 0.76, + "learning_rate": 1.760867179784616e-05, + "loss": 0.0712, + "step": 16265 + }, + { + "epoch": 0.76, + "learning_rate": 1.7607888012791375e-05, + "loss": 0.1195, + "step": 16270 + }, + { + "epoch": 0.76, + "learning_rate": 1.7607104227736585e-05, + "loss": 0.1133, + "step": 16275 + }, + { + "epoch": 0.76, + "learning_rate": 1.7606320442681803e-05, + "loss": 0.18, + "step": 16280 + }, + { + "epoch": 0.76, + "learning_rate": 1.7605536657627013e-05, + "loss": 0.2426, + "step": 16285 + }, + { + "epoch": 0.76, + "learning_rate": 1.7604752872572227e-05, + "loss": 0.252, + "step": 16290 + }, + { + "epoch": 0.76, + "learning_rate": 1.760396908751744e-05, + "loss": 0.4202, + "step": 16295 + }, + { + "epoch": 0.76, + "learning_rate": 1.760318530246265e-05, + "loss": 0.645, + "step": 16300 + }, + { + "epoch": 0.76, + "learning_rate": 1.760240151740787e-05, + "loss": 0.2253, + "step": 16305 + }, + { + "epoch": 0.76, + "learning_rate": 1.760161773235308e-05, + "loss": 0.0957, + "step": 16310 + }, + { + "epoch": 0.76, + "learning_rate": 1.7600833947298293e-05, + "loss": 0.0851, + "step": 16315 + }, + { + "epoch": 0.76, + "learning_rate": 1.7600050162243507e-05, + "loss": 0.1494, + "step": 16320 + }, + { + "epoch": 0.76, + "learning_rate": 1.759926637718872e-05, + "loss": 0.2095, + "step": 16325 + }, + { + "epoch": 0.76, + "learning_rate": 1.7598482592133935e-05, + "loss": 0.1371, + "step": 16330 + }, + { + "epoch": 0.76, + "learning_rate": 1.759769880707915e-05, + "loss": 0.2431, + "step": 16335 + }, + { + "epoch": 0.76, + "learning_rate": 1.7596915022024363e-05, + "loss": 0.2534, + "step": 16340 + }, + { + "epoch": 0.76, + "learning_rate": 1.7596131236969577e-05, + "loss": 0.2616, + "step": 16345 + }, + { + "epoch": 0.76, + "learning_rate": 1.7595347451914787e-05, + "loss": 0.5365, + "step": 16350 + }, + { + "epoch": 0.76, + "learning_rate": 1.759456366686e-05, + "loss": 0.1913, + "step": 16355 + }, + { + "epoch": 0.76, + "learning_rate": 1.7593779881805215e-05, + "loss": 0.0728, + "step": 16360 + }, + { + "epoch": 0.76, + "learning_rate": 1.759299609675043e-05, + "loss": 0.1707, + "step": 16365 + }, + { + "epoch": 0.76, + "learning_rate": 1.7592212311695643e-05, + "loss": 0.2359, + "step": 16370 + }, + { + "epoch": 0.76, + "learning_rate": 1.7591428526640853e-05, + "loss": 0.1137, + "step": 16375 + }, + { + "epoch": 0.76, + "learning_rate": 1.759064474158607e-05, + "loss": 0.1787, + "step": 16380 + }, + { + "epoch": 0.76, + "learning_rate": 1.758986095653128e-05, + "loss": 0.1674, + "step": 16385 + }, + { + "epoch": 0.76, + "learning_rate": 1.7589077171476495e-05, + "loss": 0.1887, + "step": 16390 + }, + { + "epoch": 0.77, + "learning_rate": 1.758829338642171e-05, + "loss": 0.4233, + "step": 16395 + }, + { + "epoch": 0.77, + "learning_rate": 1.7587509601366923e-05, + "loss": 0.5711, + "step": 16400 + }, + { + "epoch": 0.77, + "learning_rate": 1.7586725816312137e-05, + "loss": 0.1733, + "step": 16405 + }, + { + "epoch": 0.77, + "learning_rate": 1.758594203125735e-05, + "loss": 0.0874, + "step": 16410 + }, + { + "epoch": 0.77, + "learning_rate": 1.758515824620256e-05, + "loss": 0.15, + "step": 16415 + }, + { + "epoch": 0.77, + "learning_rate": 1.758437446114778e-05, + "loss": 0.1558, + "step": 16420 + }, + { + "epoch": 0.77, + "learning_rate": 1.758359067609299e-05, + "loss": 0.1326, + "step": 16425 + }, + { + "epoch": 0.77, + "learning_rate": 1.7582806891038203e-05, + "loss": 0.196, + "step": 16430 + }, + { + "epoch": 0.77, + "learning_rate": 1.7582023105983417e-05, + "loss": 0.1857, + "step": 16435 + }, + { + "epoch": 0.77, + "learning_rate": 1.758123932092863e-05, + "loss": 0.2995, + "step": 16440 + }, + { + "epoch": 0.77, + "learning_rate": 1.7580455535873845e-05, + "loss": 0.4498, + "step": 16445 + }, + { + "epoch": 0.77, + "learning_rate": 1.7579671750819055e-05, + "loss": 0.6452, + "step": 16450 + }, + { + "epoch": 0.77, + "learning_rate": 1.757888796576427e-05, + "loss": 0.2235, + "step": 16455 + }, + { + "epoch": 0.77, + "learning_rate": 1.7578104180709483e-05, + "loss": 0.0881, + "step": 16460 + }, + { + "epoch": 0.77, + "learning_rate": 1.7577320395654697e-05, + "loss": 0.063, + "step": 16465 + }, + { + "epoch": 0.77, + "learning_rate": 1.757653661059991e-05, + "loss": 0.1451, + "step": 16470 + }, + { + "epoch": 0.77, + "learning_rate": 1.7575752825545125e-05, + "loss": 0.1652, + "step": 16475 + }, + { + "epoch": 0.77, + "learning_rate": 1.757496904049034e-05, + "loss": 0.1904, + "step": 16480 + }, + { + "epoch": 0.77, + "learning_rate": 1.7574185255435553e-05, + "loss": 0.253, + "step": 16485 + }, + { + "epoch": 0.77, + "learning_rate": 1.7573401470380763e-05, + "loss": 0.1778, + "step": 16490 + }, + { + "epoch": 0.77, + "learning_rate": 1.7572617685325977e-05, + "loss": 0.4833, + "step": 16495 + }, + { + "epoch": 0.77, + "learning_rate": 1.757183390027119e-05, + "loss": 0.4754, + "step": 16500 + }, + { + "epoch": 0.77, + "learning_rate": 1.7571050115216405e-05, + "loss": 0.1547, + "step": 16505 + }, + { + "epoch": 0.77, + "learning_rate": 1.757026633016162e-05, + "loss": 0.0813, + "step": 16510 + }, + { + "epoch": 0.77, + "learning_rate": 1.756948254510683e-05, + "loss": 0.1433, + "step": 16515 + }, + { + "epoch": 0.77, + "learning_rate": 1.7568698760052047e-05, + "loss": 0.1179, + "step": 16520 + }, + { + "epoch": 0.77, + "learning_rate": 1.7567914974997257e-05, + "loss": 0.1643, + "step": 16525 + }, + { + "epoch": 0.77, + "learning_rate": 1.756713118994247e-05, + "loss": 0.1876, + "step": 16530 + }, + { + "epoch": 0.77, + "learning_rate": 1.7566347404887685e-05, + "loss": 0.1914, + "step": 16535 + }, + { + "epoch": 0.77, + "learning_rate": 1.75655636198329e-05, + "loss": 0.2733, + "step": 16540 + }, + { + "epoch": 0.77, + "learning_rate": 1.7564779834778113e-05, + "loss": 0.3474, + "step": 16545 + }, + { + "epoch": 0.77, + "learning_rate": 1.7563996049723327e-05, + "loss": 0.5545, + "step": 16550 + }, + { + "epoch": 0.77, + "learning_rate": 1.7563212264668537e-05, + "loss": 0.251, + "step": 16555 + }, + { + "epoch": 0.77, + "learning_rate": 1.756242847961375e-05, + "loss": 0.0742, + "step": 16560 + }, + { + "epoch": 0.77, + "learning_rate": 1.7561644694558965e-05, + "loss": 0.0762, + "step": 16565 + }, + { + "epoch": 0.77, + "learning_rate": 1.756086090950418e-05, + "loss": 0.1709, + "step": 16570 + }, + { + "epoch": 0.77, + "learning_rate": 1.7560077124449393e-05, + "loss": 0.1702, + "step": 16575 + }, + { + "epoch": 0.77, + "learning_rate": 1.7559293339394607e-05, + "loss": 0.2099, + "step": 16580 + }, + { + "epoch": 0.77, + "learning_rate": 1.755850955433982e-05, + "loss": 0.311, + "step": 16585 + }, + { + "epoch": 0.77, + "learning_rate": 1.755772576928503e-05, + "loss": 0.2829, + "step": 16590 + }, + { + "epoch": 0.77, + "learning_rate": 1.755694198423025e-05, + "loss": 0.3343, + "step": 16595 + }, + { + "epoch": 0.77, + "learning_rate": 1.755615819917546e-05, + "loss": 0.3147, + "step": 16600 + }, + { + "epoch": 0.77, + "learning_rate": 1.7555374414120673e-05, + "loss": 0.242, + "step": 16605 + }, + { + "epoch": 0.78, + "learning_rate": 1.7554590629065887e-05, + "loss": 0.0859, + "step": 16610 + }, + { + "epoch": 0.78, + "learning_rate": 1.75538068440111e-05, + "loss": 0.1072, + "step": 16615 + }, + { + "epoch": 0.78, + "learning_rate": 1.7553023058956315e-05, + "loss": 0.1143, + "step": 16620 + }, + { + "epoch": 0.78, + "learning_rate": 1.7552239273901525e-05, + "loss": 0.1676, + "step": 16625 + }, + { + "epoch": 0.78, + "learning_rate": 1.755145548884674e-05, + "loss": 0.1877, + "step": 16630 + }, + { + "epoch": 0.78, + "learning_rate": 1.7550671703791953e-05, + "loss": 0.2769, + "step": 16635 + }, + { + "epoch": 0.78, + "learning_rate": 1.7549887918737167e-05, + "loss": 0.2055, + "step": 16640 + }, + { + "epoch": 0.78, + "learning_rate": 1.754910413368238e-05, + "loss": 0.1681, + "step": 16645 + }, + { + "epoch": 0.78, + "learning_rate": 1.7548320348627595e-05, + "loss": 0.5035, + "step": 16650 + }, + { + "epoch": 0.78, + "learning_rate": 1.754753656357281e-05, + "loss": 0.251, + "step": 16655 + }, + { + "epoch": 0.78, + "learning_rate": 1.7546752778518022e-05, + "loss": 0.0733, + "step": 16660 + }, + { + "epoch": 0.78, + "learning_rate": 1.7545968993463233e-05, + "loss": 0.0622, + "step": 16665 + }, + { + "epoch": 0.78, + "learning_rate": 1.7545185208408447e-05, + "loss": 0.186, + "step": 16670 + }, + { + "epoch": 0.78, + "learning_rate": 1.754440142335366e-05, + "loss": 0.1875, + "step": 16675 + }, + { + "epoch": 0.78, + "learning_rate": 1.7543617638298875e-05, + "loss": 0.1456, + "step": 16680 + }, + { + "epoch": 0.78, + "learning_rate": 1.754283385324409e-05, + "loss": 0.1846, + "step": 16685 + }, + { + "epoch": 0.78, + "learning_rate": 1.75420500681893e-05, + "loss": 0.2411, + "step": 16690 + }, + { + "epoch": 0.78, + "learning_rate": 1.7541266283134516e-05, + "loss": 0.2714, + "step": 16695 + }, + { + "epoch": 0.78, + "learning_rate": 1.7540482498079727e-05, + "loss": 0.5341, + "step": 16700 + }, + { + "epoch": 0.78, + "learning_rate": 1.753969871302494e-05, + "loss": 0.2582, + "step": 16705 + }, + { + "epoch": 0.78, + "learning_rate": 1.7538914927970155e-05, + "loss": 0.0639, + "step": 16710 + }, + { + "epoch": 0.78, + "learning_rate": 1.753813114291537e-05, + "loss": 0.091, + "step": 16715 + }, + { + "epoch": 0.78, + "learning_rate": 1.7537347357860582e-05, + "loss": 0.1561, + "step": 16720 + }, + { + "epoch": 0.78, + "learning_rate": 1.7536563572805796e-05, + "loss": 0.167, + "step": 16725 + }, + { + "epoch": 0.78, + "learning_rate": 1.7535779787751007e-05, + "loss": 0.1176, + "step": 16730 + }, + { + "epoch": 0.78, + "learning_rate": 1.7534996002696224e-05, + "loss": 0.1937, + "step": 16735 + }, + { + "epoch": 0.78, + "learning_rate": 1.7534212217641435e-05, + "loss": 0.1984, + "step": 16740 + }, + { + "epoch": 0.78, + "learning_rate": 1.753342843258665e-05, + "loss": 0.2591, + "step": 16745 + }, + { + "epoch": 0.78, + "learning_rate": 1.7532644647531863e-05, + "loss": 0.4628, + "step": 16750 + }, + { + "epoch": 0.78, + "learning_rate": 1.7531860862477076e-05, + "loss": 0.1609, + "step": 16755 + }, + { + "epoch": 0.78, + "learning_rate": 1.753107707742229e-05, + "loss": 0.0979, + "step": 16760 + }, + { + "epoch": 0.78, + "learning_rate": 1.75302932923675e-05, + "loss": 0.1462, + "step": 16765 + }, + { + "epoch": 0.78, + "learning_rate": 1.7529509507312715e-05, + "loss": 0.163, + "step": 16770 + }, + { + "epoch": 0.78, + "learning_rate": 1.752872572225793e-05, + "loss": 0.1616, + "step": 16775 + }, + { + "epoch": 0.78, + "learning_rate": 1.7527941937203143e-05, + "loss": 0.1537, + "step": 16780 + }, + { + "epoch": 0.78, + "learning_rate": 1.7527158152148356e-05, + "loss": 0.1465, + "step": 16785 + }, + { + "epoch": 0.78, + "learning_rate": 1.752637436709357e-05, + "loss": 0.2385, + "step": 16790 + }, + { + "epoch": 0.78, + "learning_rate": 1.7525590582038784e-05, + "loss": 0.3273, + "step": 16795 + }, + { + "epoch": 0.78, + "learning_rate": 1.7524806796983998e-05, + "loss": 0.5533, + "step": 16800 + }, + { + "epoch": 0.78, + "learning_rate": 1.752402301192921e-05, + "loss": 0.1391, + "step": 16805 + }, + { + "epoch": 0.78, + "learning_rate": 1.7523239226874426e-05, + "loss": 0.0745, + "step": 16810 + }, + { + "epoch": 0.78, + "learning_rate": 1.7522455441819636e-05, + "loss": 0.1043, + "step": 16815 + }, + { + "epoch": 0.78, + "learning_rate": 1.752167165676485e-05, + "loss": 0.1905, + "step": 16820 + }, + { + "epoch": 0.79, + "learning_rate": 1.7520887871710064e-05, + "loss": 0.1423, + "step": 16825 + }, + { + "epoch": 0.79, + "learning_rate": 1.7520104086655275e-05, + "loss": 0.0993, + "step": 16830 + }, + { + "epoch": 0.79, + "learning_rate": 1.7519320301600492e-05, + "loss": 0.2252, + "step": 16835 + }, + { + "epoch": 0.79, + "learning_rate": 1.7518536516545703e-05, + "loss": 0.2168, + "step": 16840 + }, + { + "epoch": 0.79, + "learning_rate": 1.7517752731490917e-05, + "loss": 0.3685, + "step": 16845 + }, + { + "epoch": 0.79, + "learning_rate": 1.751696894643613e-05, + "loss": 0.428, + "step": 16850 + }, + { + "epoch": 0.79, + "learning_rate": 1.7516185161381344e-05, + "loss": 0.2561, + "step": 16855 + }, + { + "epoch": 0.79, + "learning_rate": 1.7515401376326558e-05, + "loss": 0.0574, + "step": 16860 + }, + { + "epoch": 0.79, + "learning_rate": 1.7514617591271772e-05, + "loss": 0.1081, + "step": 16865 + }, + { + "epoch": 0.79, + "learning_rate": 1.7513833806216983e-05, + "loss": 0.1428, + "step": 16870 + }, + { + "epoch": 0.79, + "learning_rate": 1.75130500211622e-05, + "loss": 0.1128, + "step": 16875 + }, + { + "epoch": 0.79, + "learning_rate": 1.751226623610741e-05, + "loss": 0.2477, + "step": 16880 + }, + { + "epoch": 0.79, + "learning_rate": 1.7511482451052624e-05, + "loss": 0.2166, + "step": 16885 + }, + { + "epoch": 0.79, + "learning_rate": 1.751069866599784e-05, + "loss": 0.2552, + "step": 16890 + }, + { + "epoch": 0.79, + "learning_rate": 1.7509914880943052e-05, + "loss": 0.2411, + "step": 16895 + }, + { + "epoch": 0.79, + "learning_rate": 1.7509131095888266e-05, + "loss": 0.4488, + "step": 16900 + }, + { + "epoch": 0.79, + "learning_rate": 1.7508347310833477e-05, + "loss": 0.2285, + "step": 16905 + }, + { + "epoch": 0.79, + "learning_rate": 1.7507563525778694e-05, + "loss": 0.0925, + "step": 16910 + }, + { + "epoch": 0.79, + "learning_rate": 1.7506779740723904e-05, + "loss": 0.1007, + "step": 16915 + }, + { + "epoch": 0.79, + "learning_rate": 1.750599595566912e-05, + "loss": 0.1318, + "step": 16920 + }, + { + "epoch": 0.79, + "learning_rate": 1.7505212170614332e-05, + "loss": 0.1029, + "step": 16925 + }, + { + "epoch": 0.79, + "learning_rate": 1.7504428385559546e-05, + "loss": 0.1626, + "step": 16930 + }, + { + "epoch": 0.79, + "learning_rate": 1.750364460050476e-05, + "loss": 0.2252, + "step": 16935 + }, + { + "epoch": 0.79, + "learning_rate": 1.7502860815449974e-05, + "loss": 0.2685, + "step": 16940 + }, + { + "epoch": 0.79, + "learning_rate": 1.7502077030395184e-05, + "loss": 0.2682, + "step": 16945 + }, + { + "epoch": 0.79, + "learning_rate": 1.75012932453404e-05, + "loss": 0.4503, + "step": 16950 + }, + { + "epoch": 0.79, + "learning_rate": 1.7500509460285612e-05, + "loss": 0.1966, + "step": 16955 + }, + { + "epoch": 0.79, + "learning_rate": 1.7499725675230826e-05, + "loss": 0.0716, + "step": 16960 + }, + { + "epoch": 0.79, + "learning_rate": 1.749894189017604e-05, + "loss": 0.0855, + "step": 16965 + }, + { + "epoch": 0.79, + "learning_rate": 1.7498158105121254e-05, + "loss": 0.1765, + "step": 16970 + }, + { + "epoch": 0.79, + "learning_rate": 1.7497374320066468e-05, + "loss": 0.1045, + "step": 16975 + }, + { + "epoch": 0.79, + "learning_rate": 1.749659053501168e-05, + "loss": 0.1965, + "step": 16980 + }, + { + "epoch": 0.79, + "learning_rate": 1.7495806749956892e-05, + "loss": 0.2889, + "step": 16985 + }, + { + "epoch": 0.79, + "learning_rate": 1.7495022964902106e-05, + "loss": 0.2706, + "step": 16990 + }, + { + "epoch": 0.79, + "learning_rate": 1.749423917984732e-05, + "loss": 0.219, + "step": 16995 + }, + { + "epoch": 0.79, + "learning_rate": 1.7493455394792534e-05, + "loss": 0.7537, + "step": 17000 + }, + { + "epoch": 0.79, + "learning_rate": 1.7492671609737748e-05, + "loss": 0.2183, + "step": 17005 + }, + { + "epoch": 0.79, + "learning_rate": 1.7491887824682962e-05, + "loss": 0.095, + "step": 17010 + }, + { + "epoch": 0.79, + "learning_rate": 1.7491104039628172e-05, + "loss": 0.0708, + "step": 17015 + }, + { + "epoch": 0.79, + "learning_rate": 1.7490320254573386e-05, + "loss": 0.1224, + "step": 17020 + }, + { + "epoch": 0.79, + "learning_rate": 1.74895364695186e-05, + "loss": 0.149, + "step": 17025 + }, + { + "epoch": 0.79, + "learning_rate": 1.7488752684463814e-05, + "loss": 0.2394, + "step": 17030 + }, + { + "epoch": 0.79, + "learning_rate": 1.7487968899409028e-05, + "loss": 0.2442, + "step": 17035 + }, + { + "epoch": 0.8, + "learning_rate": 1.7487185114354242e-05, + "loss": 0.2727, + "step": 17040 + }, + { + "epoch": 0.8, + "learning_rate": 1.7486401329299452e-05, + "loss": 0.3609, + "step": 17045 + }, + { + "epoch": 0.8, + "learning_rate": 1.748561754424467e-05, + "loss": 0.5439, + "step": 17050 + }, + { + "epoch": 0.8, + "learning_rate": 1.748483375918988e-05, + "loss": 0.2195, + "step": 17055 + }, + { + "epoch": 0.8, + "learning_rate": 1.7484049974135094e-05, + "loss": 0.0805, + "step": 17060 + }, + { + "epoch": 0.8, + "learning_rate": 1.7483266189080308e-05, + "loss": 0.1076, + "step": 17065 + }, + { + "epoch": 0.8, + "learning_rate": 1.7482482404025522e-05, + "loss": 0.2019, + "step": 17070 + }, + { + "epoch": 0.8, + "learning_rate": 1.7481698618970736e-05, + "loss": 0.1393, + "step": 17075 + }, + { + "epoch": 0.8, + "learning_rate": 1.7480914833915946e-05, + "loss": 0.1601, + "step": 17080 + }, + { + "epoch": 0.8, + "learning_rate": 1.748013104886116e-05, + "loss": 0.2208, + "step": 17085 + }, + { + "epoch": 0.8, + "learning_rate": 1.7479347263806374e-05, + "loss": 0.2692, + "step": 17090 + }, + { + "epoch": 0.8, + "learning_rate": 1.7478563478751588e-05, + "loss": 0.2507, + "step": 17095 + }, + { + "epoch": 0.8, + "learning_rate": 1.7477779693696802e-05, + "loss": 0.6805, + "step": 17100 + }, + { + "epoch": 0.8, + "learning_rate": 1.7476995908642016e-05, + "loss": 0.2378, + "step": 17105 + }, + { + "epoch": 0.8, + "learning_rate": 1.747621212358723e-05, + "loss": 0.0834, + "step": 17110 + }, + { + "epoch": 0.8, + "learning_rate": 1.7475428338532444e-05, + "loss": 0.0724, + "step": 17115 + }, + { + "epoch": 0.8, + "learning_rate": 1.7474644553477654e-05, + "loss": 0.1847, + "step": 17120 + }, + { + "epoch": 0.8, + "learning_rate": 1.747386076842287e-05, + "loss": 0.1556, + "step": 17125 + }, + { + "epoch": 0.8, + "learning_rate": 1.7473076983368082e-05, + "loss": 0.2036, + "step": 17130 + }, + { + "epoch": 0.8, + "learning_rate": 1.7472293198313296e-05, + "loss": 0.2969, + "step": 17135 + }, + { + "epoch": 0.8, + "learning_rate": 1.747150941325851e-05, + "loss": 0.3382, + "step": 17140 + }, + { + "epoch": 0.8, + "learning_rate": 1.747072562820372e-05, + "loss": 0.3471, + "step": 17145 + }, + { + "epoch": 0.8, + "learning_rate": 1.7469941843148938e-05, + "loss": 0.4779, + "step": 17150 + }, + { + "epoch": 0.8, + "learning_rate": 1.7469158058094148e-05, + "loss": 0.2133, + "step": 17155 + }, + { + "epoch": 0.8, + "learning_rate": 1.7468374273039362e-05, + "loss": 0.0909, + "step": 17160 + }, + { + "epoch": 0.8, + "learning_rate": 1.7467590487984576e-05, + "loss": 0.1217, + "step": 17165 + }, + { + "epoch": 0.8, + "learning_rate": 1.746680670292979e-05, + "loss": 0.1222, + "step": 17170 + }, + { + "epoch": 0.8, + "learning_rate": 1.7466022917875004e-05, + "loss": 0.1105, + "step": 17175 + }, + { + "epoch": 0.8, + "learning_rate": 1.7465239132820218e-05, + "loss": 0.1904, + "step": 17180 + }, + { + "epoch": 0.8, + "learning_rate": 1.7464455347765428e-05, + "loss": 0.2327, + "step": 17185 + }, + { + "epoch": 0.8, + "learning_rate": 1.7463671562710646e-05, + "loss": 0.2373, + "step": 17190 + }, + { + "epoch": 0.8, + "learning_rate": 1.7462887777655856e-05, + "loss": 0.2425, + "step": 17195 + }, + { + "epoch": 0.8, + "learning_rate": 1.746210399260107e-05, + "loss": 0.4149, + "step": 17200 + }, + { + "epoch": 0.8, + "learning_rate": 1.7461320207546284e-05, + "loss": 0.3749, + "step": 17205 + }, + { + "epoch": 0.8, + "learning_rate": 1.7460536422491498e-05, + "loss": 0.076, + "step": 17210 + }, + { + "epoch": 0.8, + "learning_rate": 1.745975263743671e-05, + "loss": 0.0653, + "step": 17215 + }, + { + "epoch": 0.8, + "learning_rate": 1.7458968852381922e-05, + "loss": 0.1085, + "step": 17220 + }, + { + "epoch": 0.8, + "learning_rate": 1.745818506732714e-05, + "loss": 0.1582, + "step": 17225 + }, + { + "epoch": 0.8, + "learning_rate": 1.745740128227235e-05, + "loss": 0.2298, + "step": 17230 + }, + { + "epoch": 0.8, + "learning_rate": 1.7456617497217564e-05, + "loss": 0.1976, + "step": 17235 + }, + { + "epoch": 0.8, + "learning_rate": 1.7455833712162778e-05, + "loss": 0.3482, + "step": 17240 + }, + { + "epoch": 0.8, + "learning_rate": 1.7455049927107992e-05, + "loss": 0.2661, + "step": 17245 + }, + { + "epoch": 0.8, + "learning_rate": 1.7454266142053206e-05, + "loss": 0.6355, + "step": 17250 + }, + { + "epoch": 0.81, + "learning_rate": 1.745348235699842e-05, + "loss": 0.2025, + "step": 17255 + }, + { + "epoch": 0.81, + "learning_rate": 1.745269857194363e-05, + "loss": 0.0766, + "step": 17260 + }, + { + "epoch": 0.81, + "learning_rate": 1.7451914786888847e-05, + "loss": 0.1712, + "step": 17265 + }, + { + "epoch": 0.81, + "learning_rate": 1.7451131001834058e-05, + "loss": 0.1655, + "step": 17270 + }, + { + "epoch": 0.81, + "learning_rate": 1.7450347216779272e-05, + "loss": 0.1001, + "step": 17275 + }, + { + "epoch": 0.81, + "learning_rate": 1.7449563431724486e-05, + "loss": 0.1872, + "step": 17280 + }, + { + "epoch": 0.81, + "learning_rate": 1.74487796466697e-05, + "loss": 0.2953, + "step": 17285 + }, + { + "epoch": 0.81, + "learning_rate": 1.7447995861614914e-05, + "loss": 0.2079, + "step": 17290 + }, + { + "epoch": 0.81, + "learning_rate": 1.7447212076560124e-05, + "loss": 0.3059, + "step": 17295 + }, + { + "epoch": 0.81, + "learning_rate": 1.7446428291505338e-05, + "loss": 0.5211, + "step": 17300 + }, + { + "epoch": 0.81, + "learning_rate": 1.7445644506450552e-05, + "loss": 0.2568, + "step": 17305 + }, + { + "epoch": 0.81, + "learning_rate": 1.7444860721395766e-05, + "loss": 0.0556, + "step": 17310 + }, + { + "epoch": 0.81, + "learning_rate": 1.744407693634098e-05, + "loss": 0.1149, + "step": 17315 + }, + { + "epoch": 0.81, + "learning_rate": 1.7443293151286194e-05, + "loss": 0.1159, + "step": 17320 + }, + { + "epoch": 0.81, + "learning_rate": 1.7442509366231407e-05, + "loss": 0.1228, + "step": 17325 + }, + { + "epoch": 0.81, + "learning_rate": 1.744172558117662e-05, + "loss": 0.1988, + "step": 17330 + }, + { + "epoch": 0.81, + "learning_rate": 1.7440941796121832e-05, + "loss": 0.129, + "step": 17335 + }, + { + "epoch": 0.81, + "learning_rate": 1.7440158011067046e-05, + "loss": 0.3034, + "step": 17340 + }, + { + "epoch": 0.81, + "learning_rate": 1.743937422601226e-05, + "loss": 0.3292, + "step": 17345 + }, + { + "epoch": 0.81, + "learning_rate": 1.7438590440957474e-05, + "loss": 0.5812, + "step": 17350 + }, + { + "epoch": 0.81, + "learning_rate": 1.7437806655902688e-05, + "loss": 0.2535, + "step": 17355 + }, + { + "epoch": 0.81, + "learning_rate": 1.7437022870847898e-05, + "loss": 0.0812, + "step": 17360 + }, + { + "epoch": 0.81, + "learning_rate": 1.7436239085793115e-05, + "loss": 0.0573, + "step": 17365 + }, + { + "epoch": 0.81, + "learning_rate": 1.7435455300738326e-05, + "loss": 0.1486, + "step": 17370 + }, + { + "epoch": 0.81, + "learning_rate": 1.743467151568354e-05, + "loss": 0.1502, + "step": 17375 + }, + { + "epoch": 0.81, + "learning_rate": 1.7433887730628754e-05, + "loss": 0.1391, + "step": 17380 + }, + { + "epoch": 0.81, + "learning_rate": 1.7433103945573968e-05, + "loss": 0.2151, + "step": 17385 + }, + { + "epoch": 0.81, + "learning_rate": 1.743232016051918e-05, + "loss": 0.2215, + "step": 17390 + }, + { + "epoch": 0.81, + "learning_rate": 1.7431536375464395e-05, + "loss": 0.3069, + "step": 17395 + }, + { + "epoch": 0.81, + "learning_rate": 1.7430752590409606e-05, + "loss": 0.5807, + "step": 17400 + }, + { + "epoch": 0.81, + "learning_rate": 1.742996880535482e-05, + "loss": 0.2226, + "step": 17405 + }, + { + "epoch": 0.81, + "learning_rate": 1.7429185020300034e-05, + "loss": 0.0589, + "step": 17410 + }, + { + "epoch": 0.81, + "learning_rate": 1.7428401235245248e-05, + "loss": 0.0894, + "step": 17415 + }, + { + "epoch": 0.81, + "learning_rate": 1.742761745019046e-05, + "loss": 0.0559, + "step": 17420 + }, + { + "epoch": 0.81, + "learning_rate": 1.7426833665135675e-05, + "loss": 0.1474, + "step": 17425 + }, + { + "epoch": 0.81, + "learning_rate": 1.742604988008089e-05, + "loss": 0.1283, + "step": 17430 + }, + { + "epoch": 0.81, + "learning_rate": 1.74252660950261e-05, + "loss": 0.2015, + "step": 17435 + }, + { + "epoch": 0.81, + "learning_rate": 1.7424482309971317e-05, + "loss": 0.2343, + "step": 17440 + }, + { + "epoch": 0.81, + "learning_rate": 1.7423698524916528e-05, + "loss": 0.2946, + "step": 17445 + }, + { + "epoch": 0.81, + "learning_rate": 1.742291473986174e-05, + "loss": 0.4734, + "step": 17450 + }, + { + "epoch": 0.81, + "learning_rate": 1.7422130954806955e-05, + "loss": 0.1872, + "step": 17455 + }, + { + "epoch": 0.81, + "learning_rate": 1.742134716975217e-05, + "loss": 0.0883, + "step": 17460 + }, + { + "epoch": 0.81, + "learning_rate": 1.7420563384697383e-05, + "loss": 0.1069, + "step": 17465 + }, + { + "epoch": 0.82, + "learning_rate": 1.7419779599642594e-05, + "loss": 0.1773, + "step": 17470 + }, + { + "epoch": 0.82, + "learning_rate": 1.7418995814587808e-05, + "loss": 0.1791, + "step": 17475 + }, + { + "epoch": 0.82, + "learning_rate": 1.741821202953302e-05, + "loss": 0.1533, + "step": 17480 + }, + { + "epoch": 0.82, + "learning_rate": 1.7417428244478235e-05, + "loss": 0.176, + "step": 17485 + }, + { + "epoch": 0.82, + "learning_rate": 1.741664445942345e-05, + "loss": 0.3268, + "step": 17490 + }, + { + "epoch": 0.82, + "learning_rate": 1.7415860674368663e-05, + "loss": 0.2135, + "step": 17495 + }, + { + "epoch": 0.82, + "learning_rate": 1.7415076889313877e-05, + "loss": 0.5947, + "step": 17500 + }, + { + "epoch": 0.82, + "learning_rate": 1.741429310425909e-05, + "loss": 0.2006, + "step": 17505 + }, + { + "epoch": 0.82, + "learning_rate": 1.74135093192043e-05, + "loss": 0.1167, + "step": 17510 + }, + { + "epoch": 0.82, + "learning_rate": 1.7412725534149516e-05, + "loss": 0.1088, + "step": 17515 + }, + { + "epoch": 0.82, + "learning_rate": 1.741194174909473e-05, + "loss": 0.0985, + "step": 17520 + }, + { + "epoch": 0.82, + "learning_rate": 1.7411157964039943e-05, + "loss": 0.1412, + "step": 17525 + }, + { + "epoch": 0.82, + "learning_rate": 1.7410374178985157e-05, + "loss": 0.2124, + "step": 17530 + }, + { + "epoch": 0.82, + "learning_rate": 1.7409590393930368e-05, + "loss": 0.1429, + "step": 17535 + }, + { + "epoch": 0.82, + "learning_rate": 1.7408806608875585e-05, + "loss": 0.2762, + "step": 17540 + }, + { + "epoch": 0.82, + "learning_rate": 1.7408022823820796e-05, + "loss": 0.336, + "step": 17545 + }, + { + "epoch": 0.82, + "learning_rate": 1.740723903876601e-05, + "loss": 0.6298, + "step": 17550 + }, + { + "epoch": 0.82, + "learning_rate": 1.7406455253711223e-05, + "loss": 0.2686, + "step": 17555 + }, + { + "epoch": 0.82, + "learning_rate": 1.7405671468656437e-05, + "loss": 0.1043, + "step": 17560 + }, + { + "epoch": 0.82, + "learning_rate": 1.740488768360165e-05, + "loss": 0.0676, + "step": 17565 + }, + { + "epoch": 0.82, + "learning_rate": 1.7404103898546865e-05, + "loss": 0.1212, + "step": 17570 + }, + { + "epoch": 0.82, + "learning_rate": 1.7403320113492076e-05, + "loss": 0.1228, + "step": 17575 + }, + { + "epoch": 0.82, + "learning_rate": 1.7402536328437293e-05, + "loss": 0.1101, + "step": 17580 + }, + { + "epoch": 0.82, + "learning_rate": 1.7401752543382503e-05, + "loss": 0.1469, + "step": 17585 + }, + { + "epoch": 0.82, + "learning_rate": 1.7400968758327717e-05, + "loss": 0.1787, + "step": 17590 + }, + { + "epoch": 0.82, + "learning_rate": 1.740018497327293e-05, + "loss": 0.2206, + "step": 17595 + }, + { + "epoch": 0.82, + "learning_rate": 1.7399401188218145e-05, + "loss": 0.4916, + "step": 17600 + }, + { + "epoch": 0.82, + "learning_rate": 1.739861740316336e-05, + "loss": 0.208, + "step": 17605 + }, + { + "epoch": 0.82, + "learning_rate": 1.739783361810857e-05, + "loss": 0.0906, + "step": 17610 + }, + { + "epoch": 0.82, + "learning_rate": 1.7397049833053783e-05, + "loss": 0.1041, + "step": 17615 + }, + { + "epoch": 0.82, + "learning_rate": 1.7396266047998997e-05, + "loss": 0.1332, + "step": 17620 + }, + { + "epoch": 0.82, + "learning_rate": 1.739548226294421e-05, + "loss": 0.1337, + "step": 17625 + }, + { + "epoch": 0.82, + "learning_rate": 1.7394698477889425e-05, + "loss": 0.1325, + "step": 17630 + }, + { + "epoch": 0.82, + "learning_rate": 1.739391469283464e-05, + "loss": 0.147, + "step": 17635 + }, + { + "epoch": 0.82, + "learning_rate": 1.7393130907779853e-05, + "loss": 0.2622, + "step": 17640 + }, + { + "epoch": 0.82, + "learning_rate": 1.7392347122725067e-05, + "loss": 0.4564, + "step": 17645 + }, + { + "epoch": 0.82, + "learning_rate": 1.7391563337670277e-05, + "loss": 0.5175, + "step": 17650 + }, + { + "epoch": 0.82, + "learning_rate": 1.7390779552615495e-05, + "loss": 0.1756, + "step": 17655 + }, + { + "epoch": 0.82, + "learning_rate": 1.7389995767560705e-05, + "loss": 0.1188, + "step": 17660 + }, + { + "epoch": 0.82, + "learning_rate": 1.738921198250592e-05, + "loss": 0.0799, + "step": 17665 + }, + { + "epoch": 0.82, + "learning_rate": 1.7388428197451133e-05, + "loss": 0.1077, + "step": 17670 + }, + { + "epoch": 0.82, + "learning_rate": 1.7387644412396344e-05, + "loss": 0.1528, + "step": 17675 + }, + { + "epoch": 0.82, + "learning_rate": 1.738686062734156e-05, + "loss": 0.1655, + "step": 17680 + }, + { + "epoch": 0.83, + "learning_rate": 1.738607684228677e-05, + "loss": 0.1754, + "step": 17685 + }, + { + "epoch": 0.83, + "learning_rate": 1.7385293057231985e-05, + "loss": 0.2688, + "step": 17690 + }, + { + "epoch": 0.83, + "learning_rate": 1.73845092721772e-05, + "loss": 0.2753, + "step": 17695 + }, + { + "epoch": 0.83, + "learning_rate": 1.7383725487122413e-05, + "loss": 0.3675, + "step": 17700 + }, + { + "epoch": 0.83, + "learning_rate": 1.7382941702067627e-05, + "loss": 0.245, + "step": 17705 + }, + { + "epoch": 0.83, + "learning_rate": 1.738215791701284e-05, + "loss": 0.1142, + "step": 17710 + }, + { + "epoch": 0.83, + "learning_rate": 1.738137413195805e-05, + "loss": 0.0702, + "step": 17715 + }, + { + "epoch": 0.83, + "learning_rate": 1.738059034690327e-05, + "loss": 0.1529, + "step": 17720 + }, + { + "epoch": 0.83, + "learning_rate": 1.737980656184848e-05, + "loss": 0.1147, + "step": 17725 + }, + { + "epoch": 0.83, + "learning_rate": 1.7379022776793693e-05, + "loss": 0.1938, + "step": 17730 + }, + { + "epoch": 0.83, + "learning_rate": 1.7378238991738907e-05, + "loss": 0.2224, + "step": 17735 + }, + { + "epoch": 0.83, + "learning_rate": 1.737745520668412e-05, + "loss": 0.1982, + "step": 17740 + }, + { + "epoch": 0.83, + "learning_rate": 1.7376671421629335e-05, + "loss": 0.3572, + "step": 17745 + }, + { + "epoch": 0.83, + "learning_rate": 1.7375887636574545e-05, + "loss": 0.6613, + "step": 17750 + }, + { + "epoch": 0.83, + "learning_rate": 1.7375103851519763e-05, + "loss": 0.2306, + "step": 17755 + }, + { + "epoch": 0.83, + "learning_rate": 1.7374320066464973e-05, + "loss": 0.0619, + "step": 17760 + }, + { + "epoch": 0.83, + "learning_rate": 1.7373536281410187e-05, + "loss": 0.1426, + "step": 17765 + }, + { + "epoch": 0.83, + "learning_rate": 1.73727524963554e-05, + "loss": 0.1614, + "step": 17770 + }, + { + "epoch": 0.83, + "learning_rate": 1.7371968711300615e-05, + "loss": 0.137, + "step": 17775 + }, + { + "epoch": 0.83, + "learning_rate": 1.737118492624583e-05, + "loss": 0.2, + "step": 17780 + }, + { + "epoch": 0.83, + "learning_rate": 1.7370401141191043e-05, + "loss": 0.2528, + "step": 17785 + }, + { + "epoch": 0.83, + "learning_rate": 1.7369617356136253e-05, + "loss": 0.3046, + "step": 17790 + }, + { + "epoch": 0.83, + "learning_rate": 1.7368833571081467e-05, + "loss": 0.3107, + "step": 17795 + }, + { + "epoch": 0.83, + "learning_rate": 1.736804978602668e-05, + "loss": 0.4765, + "step": 17800 + }, + { + "epoch": 0.83, + "learning_rate": 1.7367266000971895e-05, + "loss": 0.2553, + "step": 17805 + }, + { + "epoch": 0.83, + "learning_rate": 1.736648221591711e-05, + "loss": 0.1027, + "step": 17810 + }, + { + "epoch": 0.83, + "learning_rate": 1.7365698430862323e-05, + "loss": 0.1448, + "step": 17815 + }, + { + "epoch": 0.83, + "learning_rate": 1.7364914645807537e-05, + "loss": 0.1179, + "step": 17820 + }, + { + "epoch": 0.83, + "learning_rate": 1.7364130860752747e-05, + "loss": 0.206, + "step": 17825 + }, + { + "epoch": 0.83, + "learning_rate": 1.736334707569796e-05, + "loss": 0.1499, + "step": 17830 + }, + { + "epoch": 0.83, + "learning_rate": 1.7362563290643175e-05, + "loss": 0.2357, + "step": 17835 + }, + { + "epoch": 0.83, + "learning_rate": 1.736177950558839e-05, + "loss": 0.1637, + "step": 17840 + }, + { + "epoch": 0.83, + "learning_rate": 1.7360995720533603e-05, + "loss": 0.2992, + "step": 17845 + }, + { + "epoch": 0.83, + "learning_rate": 1.7360211935478817e-05, + "loss": 0.3762, + "step": 17850 + }, + { + "epoch": 0.83, + "learning_rate": 1.735942815042403e-05, + "loss": 0.225, + "step": 17855 + }, + { + "epoch": 0.83, + "learning_rate": 1.735864436536924e-05, + "loss": 0.0935, + "step": 17860 + }, + { + "epoch": 0.83, + "learning_rate": 1.7357860580314455e-05, + "loss": 0.0893, + "step": 17865 + }, + { + "epoch": 0.83, + "learning_rate": 1.735707679525967e-05, + "loss": 0.1329, + "step": 17870 + }, + { + "epoch": 0.83, + "learning_rate": 1.7356293010204883e-05, + "loss": 0.1481, + "step": 17875 + }, + { + "epoch": 0.83, + "learning_rate": 1.7355509225150097e-05, + "loss": 0.1654, + "step": 17880 + }, + { + "epoch": 0.83, + "learning_rate": 1.735472544009531e-05, + "loss": 0.1354, + "step": 17885 + }, + { + "epoch": 0.83, + "learning_rate": 1.735394165504052e-05, + "loss": 0.3464, + "step": 17890 + }, + { + "epoch": 0.84, + "learning_rate": 1.735315786998574e-05, + "loss": 0.2993, + "step": 17895 + }, + { + "epoch": 0.84, + "learning_rate": 1.735253084194191e-05, + "loss": 0.6743, + "step": 17900 + }, + { + "epoch": 0.84, + "learning_rate": 1.735174705688712e-05, + "loss": 0.1873, + "step": 17905 + }, + { + "epoch": 0.84, + "learning_rate": 1.7350963271832337e-05, + "loss": 0.144, + "step": 17910 + }, + { + "epoch": 0.84, + "learning_rate": 1.7350179486777547e-05, + "loss": 0.1459, + "step": 17915 + }, + { + "epoch": 0.84, + "learning_rate": 1.734939570172276e-05, + "loss": 0.0939, + "step": 17920 + }, + { + "epoch": 0.84, + "learning_rate": 1.7348611916667975e-05, + "loss": 0.1254, + "step": 17925 + }, + { + "epoch": 0.84, + "learning_rate": 1.7347828131613186e-05, + "loss": 0.1482, + "step": 17930 + }, + { + "epoch": 0.84, + "learning_rate": 1.7347044346558403e-05, + "loss": 0.1526, + "step": 17935 + }, + { + "epoch": 0.84, + "learning_rate": 1.7346260561503613e-05, + "loss": 0.2159, + "step": 17940 + }, + { + "epoch": 0.84, + "learning_rate": 1.7345476776448827e-05, + "loss": 0.2293, + "step": 17945 + }, + { + "epoch": 0.84, + "learning_rate": 1.734469299139404e-05, + "loss": 0.3899, + "step": 17950 + }, + { + "epoch": 0.84, + "learning_rate": 1.7343909206339255e-05, + "loss": 0.2237, + "step": 17955 + }, + { + "epoch": 0.84, + "learning_rate": 1.734312542128447e-05, + "loss": 0.0663, + "step": 17960 + }, + { + "epoch": 0.84, + "learning_rate": 1.7342341636229683e-05, + "loss": 0.0931, + "step": 17965 + }, + { + "epoch": 0.84, + "learning_rate": 1.7341557851174893e-05, + "loss": 0.0785, + "step": 17970 + }, + { + "epoch": 0.84, + "learning_rate": 1.734077406612011e-05, + "loss": 0.1971, + "step": 17975 + }, + { + "epoch": 0.84, + "learning_rate": 1.733999028106532e-05, + "loss": 0.2693, + "step": 17980 + }, + { + "epoch": 0.84, + "learning_rate": 1.7339206496010535e-05, + "loss": 0.2542, + "step": 17985 + }, + { + "epoch": 0.84, + "learning_rate": 1.733842271095575e-05, + "loss": 0.1786, + "step": 17990 + }, + { + "epoch": 0.84, + "learning_rate": 1.7337638925900963e-05, + "loss": 0.3282, + "step": 17995 + }, + { + "epoch": 0.84, + "learning_rate": 1.7336855140846177e-05, + "loss": 0.4567, + "step": 18000 + }, + { + "epoch": 0.84, + "learning_rate": 1.7336071355791387e-05, + "loss": 0.3093, + "step": 18005 + }, + { + "epoch": 0.84, + "learning_rate": 1.7335287570736605e-05, + "loss": 0.0859, + "step": 18010 + }, + { + "epoch": 0.84, + "learning_rate": 1.7334503785681815e-05, + "loss": 0.1042, + "step": 18015 + }, + { + "epoch": 0.84, + "learning_rate": 1.733372000062703e-05, + "loss": 0.077, + "step": 18020 + }, + { + "epoch": 0.84, + "learning_rate": 1.7332936215572243e-05, + "loss": 0.148, + "step": 18025 + }, + { + "epoch": 0.84, + "learning_rate": 1.7332152430517457e-05, + "loss": 0.1841, + "step": 18030 + }, + { + "epoch": 0.84, + "learning_rate": 1.733136864546267e-05, + "loss": 0.0989, + "step": 18035 + }, + { + "epoch": 0.84, + "learning_rate": 1.7330584860407885e-05, + "loss": 0.1978, + "step": 18040 + }, + { + "epoch": 0.84, + "learning_rate": 1.7329801075353095e-05, + "loss": 0.3673, + "step": 18045 + }, + { + "epoch": 0.84, + "learning_rate": 1.732901729029831e-05, + "loss": 0.4043, + "step": 18050 + }, + { + "epoch": 0.84, + "learning_rate": 1.7328233505243523e-05, + "loss": 0.2048, + "step": 18055 + }, + { + "epoch": 0.84, + "learning_rate": 1.7327449720188737e-05, + "loss": 0.0706, + "step": 18060 + }, + { + "epoch": 0.84, + "learning_rate": 1.732666593513395e-05, + "loss": 0.1344, + "step": 18065 + }, + { + "epoch": 0.84, + "learning_rate": 1.732588215007916e-05, + "loss": 0.1345, + "step": 18070 + }, + { + "epoch": 0.84, + "learning_rate": 1.732509836502438e-05, + "loss": 0.1103, + "step": 18075 + }, + { + "epoch": 0.84, + "learning_rate": 1.732431457996959e-05, + "loss": 0.1883, + "step": 18080 + }, + { + "epoch": 0.84, + "learning_rate": 1.7323530794914803e-05, + "loss": 0.1763, + "step": 18085 + }, + { + "epoch": 0.84, + "learning_rate": 1.7322747009860017e-05, + "loss": 0.2073, + "step": 18090 + }, + { + "epoch": 0.84, + "learning_rate": 1.732196322480523e-05, + "loss": 0.3624, + "step": 18095 + }, + { + "epoch": 0.84, + "learning_rate": 1.7321179439750445e-05, + "loss": 0.4715, + "step": 18100 + }, + { + "epoch": 0.84, + "learning_rate": 1.732039565469566e-05, + "loss": 0.1813, + "step": 18105 + }, + { + "epoch": 0.85, + "learning_rate": 1.7319611869640873e-05, + "loss": 0.0933, + "step": 18110 + }, + { + "epoch": 0.85, + "learning_rate": 1.7318828084586083e-05, + "loss": 0.1297, + "step": 18115 + }, + { + "epoch": 0.85, + "learning_rate": 1.7318044299531297e-05, + "loss": 0.1176, + "step": 18120 + }, + { + "epoch": 0.85, + "learning_rate": 1.731726051447651e-05, + "loss": 0.1331, + "step": 18125 + }, + { + "epoch": 0.85, + "learning_rate": 1.7316476729421725e-05, + "loss": 0.166, + "step": 18130 + }, + { + "epoch": 0.85, + "learning_rate": 1.731569294436694e-05, + "loss": 0.2521, + "step": 18135 + }, + { + "epoch": 0.85, + "learning_rate": 1.7314909159312153e-05, + "loss": 0.2726, + "step": 18140 + }, + { + "epoch": 0.85, + "learning_rate": 1.7314125374257363e-05, + "loss": 0.3365, + "step": 18145 + }, + { + "epoch": 0.85, + "learning_rate": 1.731334158920258e-05, + "loss": 0.6564, + "step": 18150 + }, + { + "epoch": 0.85, + "learning_rate": 1.731255780414779e-05, + "loss": 0.2698, + "step": 18155 + }, + { + "epoch": 0.85, + "learning_rate": 1.7311774019093005e-05, + "loss": 0.0988, + "step": 18160 + }, + { + "epoch": 0.85, + "learning_rate": 1.731099023403822e-05, + "loss": 0.0748, + "step": 18165 + }, + { + "epoch": 0.85, + "learning_rate": 1.7310206448983433e-05, + "loss": 0.1137, + "step": 18170 + }, + { + "epoch": 0.85, + "learning_rate": 1.7309422663928647e-05, + "loss": 0.1392, + "step": 18175 + }, + { + "epoch": 0.85, + "learning_rate": 1.7308638878873857e-05, + "loss": 0.1052, + "step": 18180 + }, + { + "epoch": 0.85, + "learning_rate": 1.730785509381907e-05, + "loss": 0.1821, + "step": 18185 + }, + { + "epoch": 0.85, + "learning_rate": 1.7307071308764285e-05, + "loss": 0.1918, + "step": 18190 + }, + { + "epoch": 0.85, + "learning_rate": 1.73062875237095e-05, + "loss": 0.3034, + "step": 18195 + }, + { + "epoch": 0.85, + "learning_rate": 1.7305503738654713e-05, + "loss": 0.5586, + "step": 18200 + }, + { + "epoch": 0.85, + "learning_rate": 1.7304719953599927e-05, + "loss": 0.3347, + "step": 18205 + }, + { + "epoch": 0.85, + "learning_rate": 1.730393616854514e-05, + "loss": 0.1197, + "step": 18210 + }, + { + "epoch": 0.85, + "learning_rate": 1.7303152383490355e-05, + "loss": 0.0808, + "step": 18215 + }, + { + "epoch": 0.85, + "learning_rate": 1.7302525355446525e-05, + "loss": 0.1548, + "step": 18220 + }, + { + "epoch": 0.85, + "learning_rate": 1.7301741570391735e-05, + "loss": 0.1336, + "step": 18225 + }, + { + "epoch": 0.85, + "learning_rate": 1.7300957785336953e-05, + "loss": 0.1881, + "step": 18230 + }, + { + "epoch": 0.85, + "learning_rate": 1.7300174000282163e-05, + "loss": 0.1734, + "step": 18235 + }, + { + "epoch": 0.85, + "learning_rate": 1.7299390215227377e-05, + "loss": 0.2273, + "step": 18240 + }, + { + "epoch": 0.85, + "learning_rate": 1.729860643017259e-05, + "loss": 0.3434, + "step": 18245 + }, + { + "epoch": 0.85, + "learning_rate": 1.72978226451178e-05, + "loss": 0.4018, + "step": 18250 + }, + { + "epoch": 0.85, + "learning_rate": 1.729703886006302e-05, + "loss": 0.2318, + "step": 18255 + }, + { + "epoch": 0.85, + "learning_rate": 1.729625507500823e-05, + "loss": 0.0581, + "step": 18260 + }, + { + "epoch": 0.85, + "learning_rate": 1.7295471289953443e-05, + "loss": 0.1156, + "step": 18265 + }, + { + "epoch": 0.85, + "learning_rate": 1.7294687504898657e-05, + "loss": 0.1539, + "step": 18270 + }, + { + "epoch": 0.85, + "learning_rate": 1.729390371984387e-05, + "loss": 0.1465, + "step": 18275 + }, + { + "epoch": 0.85, + "learning_rate": 1.7293119934789085e-05, + "loss": 0.2405, + "step": 18280 + }, + { + "epoch": 0.85, + "learning_rate": 1.72923361497343e-05, + "loss": 0.2068, + "step": 18285 + }, + { + "epoch": 0.85, + "learning_rate": 1.7291552364679513e-05, + "loss": 0.3418, + "step": 18290 + }, + { + "epoch": 0.85, + "learning_rate": 1.7290768579624727e-05, + "loss": 0.3344, + "step": 18295 + }, + { + "epoch": 0.85, + "learning_rate": 1.7289984794569937e-05, + "loss": 0.324, + "step": 18300 + }, + { + "epoch": 0.85, + "learning_rate": 1.7289201009515155e-05, + "loss": 0.2146, + "step": 18305 + }, + { + "epoch": 0.85, + "learning_rate": 1.7288417224460365e-05, + "loss": 0.12, + "step": 18310 + }, + { + "epoch": 0.85, + "learning_rate": 1.728763343940558e-05, + "loss": 0.1088, + "step": 18315 + }, + { + "epoch": 0.85, + "learning_rate": 1.7286849654350793e-05, + "loss": 0.1019, + "step": 18320 + }, + { + "epoch": 0.86, + "learning_rate": 1.7286065869296003e-05, + "loss": 0.1868, + "step": 18325 + }, + { + "epoch": 0.86, + "learning_rate": 1.728528208424122e-05, + "loss": 0.2649, + "step": 18330 + }, + { + "epoch": 0.86, + "learning_rate": 1.728449829918643e-05, + "loss": 0.2723, + "step": 18335 + }, + { + "epoch": 0.86, + "learning_rate": 1.7283714514131645e-05, + "loss": 0.3037, + "step": 18340 + }, + { + "epoch": 0.86, + "learning_rate": 1.728293072907686e-05, + "loss": 0.32, + "step": 18345 + }, + { + "epoch": 0.86, + "learning_rate": 1.7282146944022073e-05, + "loss": 0.6633, + "step": 18350 + }, + { + "epoch": 0.86, + "learning_rate": 1.7281363158967287e-05, + "loss": 0.3423, + "step": 18355 + }, + { + "epoch": 0.86, + "learning_rate": 1.72805793739125e-05, + "loss": 0.0901, + "step": 18360 + }, + { + "epoch": 0.86, + "learning_rate": 1.727979558885771e-05, + "loss": 0.0976, + "step": 18365 + }, + { + "epoch": 0.86, + "learning_rate": 1.727901180380293e-05, + "loss": 0.219, + "step": 18370 + }, + { + "epoch": 0.86, + "learning_rate": 1.727822801874814e-05, + "loss": 0.1546, + "step": 18375 + }, + { + "epoch": 0.86, + "learning_rate": 1.7277444233693353e-05, + "loss": 0.183, + "step": 18380 + }, + { + "epoch": 0.86, + "learning_rate": 1.7276660448638567e-05, + "loss": 0.1376, + "step": 18385 + }, + { + "epoch": 0.86, + "learning_rate": 1.727587666358378e-05, + "loss": 0.2397, + "step": 18390 + }, + { + "epoch": 0.86, + "learning_rate": 1.7275092878528995e-05, + "loss": 0.3504, + "step": 18395 + }, + { + "epoch": 0.86, + "learning_rate": 1.7274309093474205e-05, + "loss": 0.4606, + "step": 18400 + }, + { + "epoch": 0.86, + "learning_rate": 1.7273525308419423e-05, + "loss": 0.1816, + "step": 18405 + }, + { + "epoch": 0.86, + "learning_rate": 1.7272741523364633e-05, + "loss": 0.0837, + "step": 18410 + }, + { + "epoch": 0.86, + "learning_rate": 1.7271957738309847e-05, + "loss": 0.1249, + "step": 18415 + }, + { + "epoch": 0.86, + "learning_rate": 1.727117395325506e-05, + "loss": 0.1805, + "step": 18420 + }, + { + "epoch": 0.86, + "learning_rate": 1.7270390168200275e-05, + "loss": 0.246, + "step": 18425 + }, + { + "epoch": 0.86, + "learning_rate": 1.726960638314549e-05, + "loss": 0.1257, + "step": 18430 + }, + { + "epoch": 0.86, + "learning_rate": 1.7268822598090703e-05, + "loss": 0.1773, + "step": 18435 + }, + { + "epoch": 0.86, + "learning_rate": 1.7268038813035913e-05, + "loss": 0.1636, + "step": 18440 + }, + { + "epoch": 0.86, + "learning_rate": 1.7267255027981127e-05, + "loss": 0.2327, + "step": 18445 + }, + { + "epoch": 0.86, + "learning_rate": 1.726647124292634e-05, + "loss": 0.8099, + "step": 18450 + }, + { + "epoch": 0.86, + "learning_rate": 1.7265687457871555e-05, + "loss": 0.13, + "step": 18455 + }, + { + "epoch": 0.86, + "learning_rate": 1.726490367281677e-05, + "loss": 0.1281, + "step": 18460 + }, + { + "epoch": 0.86, + "learning_rate": 1.726411988776198e-05, + "loss": 0.1082, + "step": 18465 + }, + { + "epoch": 0.86, + "learning_rate": 1.7263336102707197e-05, + "loss": 0.0937, + "step": 18470 + }, + { + "epoch": 0.86, + "learning_rate": 1.7262552317652407e-05, + "loss": 0.1067, + "step": 18475 + }, + { + "epoch": 0.86, + "learning_rate": 1.726176853259762e-05, + "loss": 0.1886, + "step": 18480 + }, + { + "epoch": 0.86, + "learning_rate": 1.7260984747542835e-05, + "loss": 0.1727, + "step": 18485 + }, + { + "epoch": 0.86, + "learning_rate": 1.726020096248805e-05, + "loss": 0.2541, + "step": 18490 + }, + { + "epoch": 0.86, + "learning_rate": 1.7259417177433263e-05, + "loss": 0.3449, + "step": 18495 + }, + { + "epoch": 0.86, + "learning_rate": 1.7258633392378477e-05, + "loss": 0.4496, + "step": 18500 + }, + { + "epoch": 0.86, + "learning_rate": 1.725784960732369e-05, + "loss": 0.2353, + "step": 18505 + }, + { + "epoch": 0.86, + "learning_rate": 1.72570658222689e-05, + "loss": 0.0484, + "step": 18510 + }, + { + "epoch": 0.86, + "learning_rate": 1.7256282037214115e-05, + "loss": 0.0597, + "step": 18515 + }, + { + "epoch": 0.86, + "learning_rate": 1.725549825215933e-05, + "loss": 0.1713, + "step": 18520 + }, + { + "epoch": 0.86, + "learning_rate": 1.7254714467104543e-05, + "loss": 0.1206, + "step": 18525 + }, + { + "epoch": 0.86, + "learning_rate": 1.7253930682049757e-05, + "loss": 0.2763, + "step": 18530 + }, + { + "epoch": 0.86, + "learning_rate": 1.725314689699497e-05, + "loss": 0.1911, + "step": 18535 + }, + { + "epoch": 0.87, + "learning_rate": 1.725236311194018e-05, + "loss": 0.2108, + "step": 18540 + }, + { + "epoch": 0.87, + "learning_rate": 1.72515793268854e-05, + "loss": 0.3533, + "step": 18545 + }, + { + "epoch": 0.87, + "learning_rate": 1.725079554183061e-05, + "loss": 0.3996, + "step": 18550 + }, + { + "epoch": 0.87, + "learning_rate": 1.7250011756775823e-05, + "loss": 0.1907, + "step": 18555 + }, + { + "epoch": 0.87, + "learning_rate": 1.7249227971721037e-05, + "loss": 0.076, + "step": 18560 + }, + { + "epoch": 0.87, + "learning_rate": 1.724844418666625e-05, + "loss": 0.165, + "step": 18565 + }, + { + "epoch": 0.87, + "learning_rate": 1.7247660401611464e-05, + "loss": 0.1086, + "step": 18570 + }, + { + "epoch": 0.87, + "learning_rate": 1.7246876616556675e-05, + "loss": 0.1199, + "step": 18575 + }, + { + "epoch": 0.87, + "learning_rate": 1.724609283150189e-05, + "loss": 0.245, + "step": 18580 + }, + { + "epoch": 0.87, + "learning_rate": 1.7245309046447103e-05, + "loss": 0.1674, + "step": 18585 + }, + { + "epoch": 0.87, + "learning_rate": 1.7244525261392317e-05, + "loss": 0.2862, + "step": 18590 + }, + { + "epoch": 0.87, + "learning_rate": 1.724374147633753e-05, + "loss": 0.4564, + "step": 18595 + }, + { + "epoch": 0.87, + "learning_rate": 1.7242957691282745e-05, + "loss": 0.4542, + "step": 18600 + }, + { + "epoch": 0.87, + "learning_rate": 1.724217390622796e-05, + "loss": 0.2245, + "step": 18605 + }, + { + "epoch": 0.87, + "learning_rate": 1.7241390121173172e-05, + "loss": 0.1992, + "step": 18610 + }, + { + "epoch": 0.87, + "learning_rate": 1.7240606336118383e-05, + "loss": 0.0679, + "step": 18615 + }, + { + "epoch": 0.87, + "learning_rate": 1.72398225510636e-05, + "loss": 0.0884, + "step": 18620 + }, + { + "epoch": 0.87, + "learning_rate": 1.723903876600881e-05, + "loss": 0.1305, + "step": 18625 + }, + { + "epoch": 0.87, + "learning_rate": 1.7238254980954025e-05, + "loss": 0.2016, + "step": 18630 + }, + { + "epoch": 0.87, + "learning_rate": 1.723747119589924e-05, + "loss": 0.196, + "step": 18635 + }, + { + "epoch": 0.87, + "learning_rate": 1.723668741084445e-05, + "loss": 0.1693, + "step": 18640 + }, + { + "epoch": 0.87, + "learning_rate": 1.7235903625789666e-05, + "loss": 0.3803, + "step": 18645 + }, + { + "epoch": 0.87, + "learning_rate": 1.7235119840734877e-05, + "loss": 0.4806, + "step": 18650 + }, + { + "epoch": 0.87, + "learning_rate": 1.723433605568009e-05, + "loss": 0.2328, + "step": 18655 + }, + { + "epoch": 0.87, + "learning_rate": 1.7233552270625305e-05, + "loss": 0.068, + "step": 18660 + }, + { + "epoch": 0.87, + "learning_rate": 1.723276848557052e-05, + "loss": 0.0851, + "step": 18665 + }, + { + "epoch": 0.87, + "learning_rate": 1.7231984700515732e-05, + "loss": 0.1106, + "step": 18670 + }, + { + "epoch": 0.87, + "learning_rate": 1.7231200915460946e-05, + "loss": 0.1496, + "step": 18675 + }, + { + "epoch": 0.87, + "learning_rate": 1.7230417130406157e-05, + "loss": 0.1601, + "step": 18680 + }, + { + "epoch": 0.87, + "learning_rate": 1.7229633345351374e-05, + "loss": 0.2402, + "step": 18685 + }, + { + "epoch": 0.87, + "learning_rate": 1.7228849560296585e-05, + "loss": 0.255, + "step": 18690 + }, + { + "epoch": 0.87, + "learning_rate": 1.72280657752418e-05, + "loss": 0.2822, + "step": 18695 + }, + { + "epoch": 0.87, + "learning_rate": 1.7227281990187012e-05, + "loss": 0.5758, + "step": 18700 + }, + { + "epoch": 0.87, + "learning_rate": 1.7226498205132226e-05, + "loss": 0.1843, + "step": 18705 + }, + { + "epoch": 0.87, + "learning_rate": 1.722571442007744e-05, + "loss": 0.0946, + "step": 18710 + }, + { + "epoch": 0.87, + "learning_rate": 1.722493063502265e-05, + "loss": 0.0383, + "step": 18715 + }, + { + "epoch": 0.87, + "learning_rate": 1.7224146849967868e-05, + "loss": 0.1229, + "step": 18720 + }, + { + "epoch": 0.87, + "learning_rate": 1.722336306491308e-05, + "loss": 0.2175, + "step": 18725 + }, + { + "epoch": 0.87, + "learning_rate": 1.7222579279858293e-05, + "loss": 0.1291, + "step": 18730 + }, + { + "epoch": 0.87, + "learning_rate": 1.7221795494803506e-05, + "loss": 0.1822, + "step": 18735 + }, + { + "epoch": 0.87, + "learning_rate": 1.722101170974872e-05, + "loss": 0.2656, + "step": 18740 + }, + { + "epoch": 0.87, + "learning_rate": 1.7220227924693934e-05, + "loss": 0.2236, + "step": 18745 + }, + { + "epoch": 0.87, + "learning_rate": 1.7219444139639148e-05, + "loss": 0.4038, + "step": 18750 + }, + { + "epoch": 0.88, + "learning_rate": 1.721866035458436e-05, + "loss": 0.1978, + "step": 18755 + }, + { + "epoch": 0.88, + "learning_rate": 1.7217876569529576e-05, + "loss": 0.0921, + "step": 18760 + }, + { + "epoch": 0.88, + "learning_rate": 1.7217092784474786e-05, + "loss": 0.151, + "step": 18765 + }, + { + "epoch": 0.88, + "learning_rate": 1.721630899942e-05, + "loss": 0.1317, + "step": 18770 + }, + { + "epoch": 0.88, + "learning_rate": 1.7215525214365214e-05, + "loss": 0.1417, + "step": 18775 + }, + { + "epoch": 0.88, + "learning_rate": 1.7214741429310425e-05, + "loss": 0.1506, + "step": 18780 + }, + { + "epoch": 0.88, + "learning_rate": 1.7213957644255642e-05, + "loss": 0.1963, + "step": 18785 + }, + { + "epoch": 0.88, + "learning_rate": 1.7213173859200853e-05, + "loss": 0.2705, + "step": 18790 + }, + { + "epoch": 0.88, + "learning_rate": 1.7212390074146067e-05, + "loss": 0.2663, + "step": 18795 + }, + { + "epoch": 0.88, + "learning_rate": 1.721160628909128e-05, + "loss": 0.5767, + "step": 18800 + }, + { + "epoch": 0.88, + "learning_rate": 1.7210822504036494e-05, + "loss": 0.1702, + "step": 18805 + }, + { + "epoch": 0.88, + "learning_rate": 1.7210038718981708e-05, + "loss": 0.0625, + "step": 18810 + }, + { + "epoch": 0.88, + "learning_rate": 1.7209254933926922e-05, + "loss": 0.1013, + "step": 18815 + }, + { + "epoch": 0.88, + "learning_rate": 1.7208471148872136e-05, + "loss": 0.132, + "step": 18820 + }, + { + "epoch": 0.88, + "learning_rate": 1.720768736381735e-05, + "loss": 0.1107, + "step": 18825 + }, + { + "epoch": 0.88, + "learning_rate": 1.720690357876256e-05, + "loss": 0.1782, + "step": 18830 + }, + { + "epoch": 0.88, + "learning_rate": 1.7206119793707774e-05, + "loss": 0.213, + "step": 18835 + }, + { + "epoch": 0.88, + "learning_rate": 1.7205336008652988e-05, + "loss": 0.3126, + "step": 18840 + }, + { + "epoch": 0.88, + "learning_rate": 1.7204552223598202e-05, + "loss": 0.3847, + "step": 18845 + }, + { + "epoch": 0.88, + "learning_rate": 1.7203768438543416e-05, + "loss": 0.5436, + "step": 18850 + }, + { + "epoch": 0.88, + "learning_rate": 1.7202984653488627e-05, + "loss": 0.1875, + "step": 18855 + }, + { + "epoch": 0.88, + "learning_rate": 1.7202200868433844e-05, + "loss": 0.0937, + "step": 18860 + }, + { + "epoch": 0.88, + "learning_rate": 1.7201417083379054e-05, + "loss": 0.1355, + "step": 18865 + }, + { + "epoch": 0.88, + "learning_rate": 1.720063329832427e-05, + "loss": 0.1417, + "step": 18870 + }, + { + "epoch": 0.88, + "learning_rate": 1.7199849513269482e-05, + "loss": 0.1382, + "step": 18875 + }, + { + "epoch": 0.88, + "learning_rate": 1.7199065728214696e-05, + "loss": 0.2395, + "step": 18880 + }, + { + "epoch": 0.88, + "learning_rate": 1.719828194315991e-05, + "loss": 0.2275, + "step": 18885 + }, + { + "epoch": 0.88, + "learning_rate": 1.7197498158105124e-05, + "loss": 0.2511, + "step": 18890 + }, + { + "epoch": 0.88, + "learning_rate": 1.7196714373050334e-05, + "loss": 0.3154, + "step": 18895 + }, + { + "epoch": 0.88, + "learning_rate": 1.719593058799555e-05, + "loss": 0.5231, + "step": 18900 + }, + { + "epoch": 0.88, + "learning_rate": 1.7195146802940762e-05, + "loss": 0.1573, + "step": 18905 + }, + { + "epoch": 0.88, + "learning_rate": 1.7194363017885976e-05, + "loss": 0.0808, + "step": 18910 + }, + { + "epoch": 0.88, + "learning_rate": 1.719357923283119e-05, + "loss": 0.0893, + "step": 18915 + }, + { + "epoch": 0.88, + "learning_rate": 1.7192795447776404e-05, + "loss": 0.1562, + "step": 18920 + }, + { + "epoch": 0.88, + "learning_rate": 1.7192011662721618e-05, + "loss": 0.1937, + "step": 18925 + }, + { + "epoch": 0.88, + "learning_rate": 1.719122787766683e-05, + "loss": 0.1925, + "step": 18930 + }, + { + "epoch": 0.88, + "learning_rate": 1.7190444092612046e-05, + "loss": 0.2791, + "step": 18935 + }, + { + "epoch": 0.88, + "learning_rate": 1.7189660307557256e-05, + "loss": 0.3337, + "step": 18940 + }, + { + "epoch": 0.88, + "learning_rate": 1.718887652250247e-05, + "loss": 0.3198, + "step": 18945 + }, + { + "epoch": 0.88, + "learning_rate": 1.7188092737447684e-05, + "loss": 0.5088, + "step": 18950 + }, + { + "epoch": 0.88, + "learning_rate": 1.7187308952392898e-05, + "loss": 0.1709, + "step": 18955 + }, + { + "epoch": 0.88, + "learning_rate": 1.7186525167338112e-05, + "loss": 0.1017, + "step": 18960 + }, + { + "epoch": 0.88, + "learning_rate": 1.7185741382283322e-05, + "loss": 0.1423, + "step": 18965 + }, + { + "epoch": 0.89, + "learning_rate": 1.7184957597228536e-05, + "loss": 0.1257, + "step": 18970 + }, + { + "epoch": 0.89, + "learning_rate": 1.718417381217375e-05, + "loss": 0.1645, + "step": 18975 + }, + { + "epoch": 0.89, + "learning_rate": 1.7183390027118964e-05, + "loss": 0.1752, + "step": 18980 + }, + { + "epoch": 0.89, + "learning_rate": 1.7182606242064178e-05, + "loss": 0.1956, + "step": 18985 + }, + { + "epoch": 0.89, + "learning_rate": 1.7181822457009392e-05, + "loss": 0.2104, + "step": 18990 + }, + { + "epoch": 0.89, + "learning_rate": 1.7181038671954602e-05, + "loss": 0.2864, + "step": 18995 + }, + { + "epoch": 0.89, + "learning_rate": 1.718025488689982e-05, + "loss": 0.5584, + "step": 19000 + }, + { + "epoch": 0.89, + "learning_rate": 1.717947110184503e-05, + "loss": 0.2235, + "step": 19005 + }, + { + "epoch": 0.89, + "learning_rate": 1.7178687316790244e-05, + "loss": 0.0822, + "step": 19010 + }, + { + "epoch": 0.89, + "learning_rate": 1.7177903531735458e-05, + "loss": 0.1069, + "step": 19015 + }, + { + "epoch": 0.89, + "learning_rate": 1.7177119746680672e-05, + "loss": 0.1213, + "step": 19020 + }, + { + "epoch": 0.89, + "learning_rate": 1.7176335961625886e-05, + "loss": 0.2554, + "step": 19025 + }, + { + "epoch": 0.89, + "learning_rate": 1.7175552176571096e-05, + "loss": 0.1368, + "step": 19030 + }, + { + "epoch": 0.89, + "learning_rate": 1.7174768391516314e-05, + "loss": 0.1717, + "step": 19035 + }, + { + "epoch": 0.89, + "learning_rate": 1.7173984606461524e-05, + "loss": 0.2277, + "step": 19040 + }, + { + "epoch": 0.89, + "learning_rate": 1.7173200821406738e-05, + "loss": 0.2489, + "step": 19045 + }, + { + "epoch": 0.89, + "learning_rate": 1.7172417036351952e-05, + "loss": 0.5192, + "step": 19050 + }, + { + "epoch": 0.89, + "learning_rate": 1.7171633251297166e-05, + "loss": 0.1059, + "step": 19055 + }, + { + "epoch": 0.89, + "learning_rate": 1.717084946624238e-05, + "loss": 0.1134, + "step": 19060 + }, + { + "epoch": 0.89, + "learning_rate": 1.7170065681187594e-05, + "loss": 0.1005, + "step": 19065 + }, + { + "epoch": 0.89, + "learning_rate": 1.7169281896132804e-05, + "loss": 0.079, + "step": 19070 + }, + { + "epoch": 0.89, + "learning_rate": 1.716849811107802e-05, + "loss": 0.1957, + "step": 19075 + }, + { + "epoch": 0.89, + "learning_rate": 1.7167714326023232e-05, + "loss": 0.1822, + "step": 19080 + }, + { + "epoch": 0.89, + "learning_rate": 1.7166930540968446e-05, + "loss": 0.1535, + "step": 19085 + }, + { + "epoch": 0.89, + "learning_rate": 1.716614675591366e-05, + "loss": 0.2625, + "step": 19090 + }, + { + "epoch": 0.89, + "learning_rate": 1.716536297085887e-05, + "loss": 0.3252, + "step": 19095 + }, + { + "epoch": 0.89, + "learning_rate": 1.7164579185804088e-05, + "loss": 0.486, + "step": 19100 + }, + { + "epoch": 0.89, + "learning_rate": 1.7163795400749298e-05, + "loss": 0.1903, + "step": 19105 + }, + { + "epoch": 0.89, + "learning_rate": 1.7163011615694512e-05, + "loss": 0.0703, + "step": 19110 + }, + { + "epoch": 0.89, + "learning_rate": 1.7162227830639726e-05, + "loss": 0.1731, + "step": 19115 + }, + { + "epoch": 0.89, + "learning_rate": 1.716144404558494e-05, + "loss": 0.0951, + "step": 19120 + }, + { + "epoch": 0.89, + "learning_rate": 1.7160660260530154e-05, + "loss": 0.1422, + "step": 19125 + }, + { + "epoch": 0.89, + "learning_rate": 1.7159876475475368e-05, + "loss": 0.1888, + "step": 19130 + }, + { + "epoch": 0.89, + "learning_rate": 1.715909269042058e-05, + "loss": 0.1529, + "step": 19135 + }, + { + "epoch": 0.89, + "learning_rate": 1.7158308905365796e-05, + "loss": 0.1551, + "step": 19140 + }, + { + "epoch": 0.89, + "learning_rate": 1.7157525120311006e-05, + "loss": 0.2719, + "step": 19145 + }, + { + "epoch": 0.89, + "learning_rate": 1.7156741335256223e-05, + "loss": 0.5324, + "step": 19150 + }, + { + "epoch": 0.89, + "learning_rate": 1.7155957550201434e-05, + "loss": 0.2271, + "step": 19155 + }, + { + "epoch": 0.89, + "learning_rate": 1.7155173765146648e-05, + "loss": 0.1467, + "step": 19160 + }, + { + "epoch": 0.89, + "learning_rate": 1.715438998009186e-05, + "loss": 0.0991, + "step": 19165 + }, + { + "epoch": 0.89, + "learning_rate": 1.7153606195037072e-05, + "loss": 0.1359, + "step": 19170 + }, + { + "epoch": 0.89, + "learning_rate": 1.715282240998229e-05, + "loss": 0.1051, + "step": 19175 + }, + { + "epoch": 0.89, + "learning_rate": 1.71520386249275e-05, + "loss": 0.2102, + "step": 19180 + }, + { + "epoch": 0.9, + "learning_rate": 1.7151254839872714e-05, + "loss": 0.2189, + "step": 19185 + }, + { + "epoch": 0.9, + "learning_rate": 1.7150471054817928e-05, + "loss": 0.2397, + "step": 19190 + }, + { + "epoch": 0.9, + "learning_rate": 1.7149687269763142e-05, + "loss": 0.2792, + "step": 19195 + }, + { + "epoch": 0.9, + "learning_rate": 1.7148903484708356e-05, + "loss": 0.4688, + "step": 19200 + }, + { + "epoch": 0.9, + "learning_rate": 1.714811969965357e-05, + "loss": 0.2494, + "step": 19205 + }, + { + "epoch": 0.9, + "learning_rate": 1.714733591459878e-05, + "loss": 0.1013, + "step": 19210 + }, + { + "epoch": 0.9, + "learning_rate": 1.7146552129543997e-05, + "loss": 0.0738, + "step": 19215 + }, + { + "epoch": 0.9, + "learning_rate": 1.7145768344489208e-05, + "loss": 0.1392, + "step": 19220 + }, + { + "epoch": 0.9, + "learning_rate": 1.7144984559434422e-05, + "loss": 0.132, + "step": 19225 + }, + { + "epoch": 0.9, + "learning_rate": 1.7144200774379636e-05, + "loss": 0.1433, + "step": 19230 + }, + { + "epoch": 0.9, + "learning_rate": 1.714341698932485e-05, + "loss": 0.2131, + "step": 19235 + }, + { + "epoch": 0.9, + "learning_rate": 1.7142633204270063e-05, + "loss": 0.237, + "step": 19240 + }, + { + "epoch": 0.9, + "learning_rate": 1.7142006176226234e-05, + "loss": 0.3774, + "step": 19245 + }, + { + "epoch": 0.9, + "learning_rate": 1.7141222391171444e-05, + "loss": 0.4308, + "step": 19250 + }, + { + "epoch": 0.9, + "learning_rate": 1.7140438606116662e-05, + "loss": 0.2426, + "step": 19255 + }, + { + "epoch": 0.9, + "learning_rate": 1.7139654821061872e-05, + "loss": 0.1149, + "step": 19260 + }, + { + "epoch": 0.9, + "learning_rate": 1.7138871036007086e-05, + "loss": 0.0679, + "step": 19265 + }, + { + "epoch": 0.9, + "learning_rate": 1.71380872509523e-05, + "loss": 0.0922, + "step": 19270 + }, + { + "epoch": 0.9, + "learning_rate": 1.7137303465897514e-05, + "loss": 0.1522, + "step": 19275 + }, + { + "epoch": 0.9, + "learning_rate": 1.7136519680842728e-05, + "loss": 0.1422, + "step": 19280 + }, + { + "epoch": 0.9, + "learning_rate": 1.7135735895787942e-05, + "loss": 0.1224, + "step": 19285 + }, + { + "epoch": 0.9, + "learning_rate": 1.7134952110733156e-05, + "loss": 0.27, + "step": 19290 + }, + { + "epoch": 0.9, + "learning_rate": 1.7134168325678366e-05, + "loss": 0.3725, + "step": 19295 + }, + { + "epoch": 0.9, + "learning_rate": 1.713338454062358e-05, + "loss": 0.3632, + "step": 19300 + }, + { + "epoch": 0.9, + "learning_rate": 1.7132600755568794e-05, + "loss": 0.1988, + "step": 19305 + }, + { + "epoch": 0.9, + "learning_rate": 1.7131816970514008e-05, + "loss": 0.0415, + "step": 19310 + }, + { + "epoch": 0.9, + "learning_rate": 1.7131033185459222e-05, + "loss": 0.0715, + "step": 19315 + }, + { + "epoch": 0.9, + "learning_rate": 1.7130249400404436e-05, + "loss": 0.1044, + "step": 19320 + }, + { + "epoch": 0.9, + "learning_rate": 1.7129465615349646e-05, + "loss": 0.2214, + "step": 19325 + }, + { + "epoch": 0.9, + "learning_rate": 1.7128681830294864e-05, + "loss": 0.4414, + "step": 19330 + }, + { + "epoch": 0.9, + "learning_rate": 1.7127898045240074e-05, + "loss": 0.2215, + "step": 19335 + }, + { + "epoch": 0.9, + "learning_rate": 1.7127114260185288e-05, + "loss": 0.2416, + "step": 19340 + }, + { + "epoch": 0.9, + "learning_rate": 1.7126330475130502e-05, + "loss": 0.2983, + "step": 19345 + }, + { + "epoch": 0.9, + "learning_rate": 1.7125546690075716e-05, + "loss": 0.6556, + "step": 19350 + }, + { + "epoch": 0.9, + "learning_rate": 1.712476290502093e-05, + "loss": 0.1771, + "step": 19355 + }, + { + "epoch": 0.9, + "learning_rate": 1.712397911996614e-05, + "loss": 0.1033, + "step": 19360 + }, + { + "epoch": 0.9, + "learning_rate": 1.7123195334911354e-05, + "loss": 0.0999, + "step": 19365 + }, + { + "epoch": 0.9, + "learning_rate": 1.7122411549856568e-05, + "loss": 0.1314, + "step": 19370 + }, + { + "epoch": 0.9, + "learning_rate": 1.7121627764801782e-05, + "loss": 0.1199, + "step": 19375 + }, + { + "epoch": 0.9, + "learning_rate": 1.7120843979746996e-05, + "loss": 0.237, + "step": 19380 + }, + { + "epoch": 0.9, + "learning_rate": 1.712006019469221e-05, + "loss": 0.2092, + "step": 19385 + }, + { + "epoch": 0.9, + "learning_rate": 1.7119276409637424e-05, + "loss": 0.2627, + "step": 19390 + }, + { + "epoch": 0.9, + "learning_rate": 1.7118492624582638e-05, + "loss": 0.3178, + "step": 19395 + }, + { + "epoch": 0.91, + "learning_rate": 1.7117708839527848e-05, + "loss": 0.5211, + "step": 19400 + }, + { + "epoch": 0.91, + "learning_rate": 1.7116925054473062e-05, + "loss": 0.2629, + "step": 19405 + }, + { + "epoch": 0.91, + "learning_rate": 1.7116141269418276e-05, + "loss": 0.0863, + "step": 19410 + }, + { + "epoch": 0.91, + "learning_rate": 1.711535748436349e-05, + "loss": 0.1249, + "step": 19415 + }, + { + "epoch": 0.91, + "learning_rate": 1.7114573699308704e-05, + "loss": 0.1279, + "step": 19420 + }, + { + "epoch": 0.91, + "learning_rate": 1.7113789914253914e-05, + "loss": 0.0951, + "step": 19425 + }, + { + "epoch": 0.91, + "learning_rate": 1.711300612919913e-05, + "loss": 0.1773, + "step": 19430 + }, + { + "epoch": 0.91, + "learning_rate": 1.7112222344144342e-05, + "loss": 0.169, + "step": 19435 + }, + { + "epoch": 0.91, + "learning_rate": 1.7111438559089556e-05, + "loss": 0.1928, + "step": 19440 + }, + { + "epoch": 0.91, + "learning_rate": 1.711065477403477e-05, + "loss": 0.3014, + "step": 19445 + }, + { + "epoch": 0.91, + "learning_rate": 1.7109870988979984e-05, + "loss": 0.5065, + "step": 19450 + }, + { + "epoch": 0.91, + "learning_rate": 1.7109087203925198e-05, + "loss": 0.127, + "step": 19455 + }, + { + "epoch": 0.91, + "learning_rate": 1.710830341887041e-05, + "loss": 0.0899, + "step": 19460 + }, + { + "epoch": 0.91, + "learning_rate": 1.7107519633815622e-05, + "loss": 0.0991, + "step": 19465 + }, + { + "epoch": 0.91, + "learning_rate": 1.710673584876084e-05, + "loss": 0.0852, + "step": 19470 + }, + { + "epoch": 0.91, + "learning_rate": 1.710595206370605e-05, + "loss": 0.1273, + "step": 19475 + }, + { + "epoch": 0.91, + "learning_rate": 1.7105168278651264e-05, + "loss": 0.2085, + "step": 19480 + }, + { + "epoch": 0.91, + "learning_rate": 1.7104384493596478e-05, + "loss": 0.2701, + "step": 19485 + }, + { + "epoch": 0.91, + "learning_rate": 1.710360070854169e-05, + "loss": 0.214, + "step": 19490 + }, + { + "epoch": 0.91, + "learning_rate": 1.7102816923486906e-05, + "loss": 0.3061, + "step": 19495 + }, + { + "epoch": 0.91, + "learning_rate": 1.7102033138432116e-05, + "loss": 0.4987, + "step": 19500 + }, + { + "epoch": 0.91, + "learning_rate": 1.710124935337733e-05, + "loss": 0.2229, + "step": 19505 + }, + { + "epoch": 0.91, + "learning_rate": 1.7100465568322544e-05, + "loss": 0.073, + "step": 19510 + }, + { + "epoch": 0.91, + "learning_rate": 1.7099681783267758e-05, + "loss": 0.0574, + "step": 19515 + }, + { + "epoch": 0.91, + "learning_rate": 1.709889799821297e-05, + "loss": 0.0993, + "step": 19520 + }, + { + "epoch": 0.91, + "learning_rate": 1.7098114213158186e-05, + "loss": 0.1771, + "step": 19525 + }, + { + "epoch": 0.91, + "learning_rate": 1.70973304281034e-05, + "loss": 0.0893, + "step": 19530 + }, + { + "epoch": 0.91, + "learning_rate": 1.7096546643048613e-05, + "loss": 0.1401, + "step": 19535 + }, + { + "epoch": 0.91, + "learning_rate": 1.7095762857993824e-05, + "loss": 0.2401, + "step": 19540 + }, + { + "epoch": 0.91, + "learning_rate": 1.709497907293904e-05, + "loss": 0.2492, + "step": 19545 + }, + { + "epoch": 0.91, + "learning_rate": 1.709419528788425e-05, + "loss": 0.4506, + "step": 19550 + }, + { + "epoch": 0.91, + "learning_rate": 1.7093411502829466e-05, + "loss": 0.1444, + "step": 19555 + }, + { + "epoch": 0.91, + "learning_rate": 1.709262771777468e-05, + "loss": 0.0706, + "step": 19560 + }, + { + "epoch": 0.91, + "learning_rate": 1.709184393271989e-05, + "loss": 0.0977, + "step": 19565 + }, + { + "epoch": 0.91, + "learning_rate": 1.7091060147665107e-05, + "loss": 0.1293, + "step": 19570 + }, + { + "epoch": 0.91, + "learning_rate": 1.7090276362610318e-05, + "loss": 0.1469, + "step": 19575 + }, + { + "epoch": 0.91, + "learning_rate": 1.7089492577555532e-05, + "loss": 0.1896, + "step": 19580 + }, + { + "epoch": 0.91, + "learning_rate": 1.7088708792500746e-05, + "loss": 0.1649, + "step": 19585 + }, + { + "epoch": 0.91, + "learning_rate": 1.708792500744596e-05, + "loss": 0.2219, + "step": 19590 + }, + { + "epoch": 0.91, + "learning_rate": 1.7087141222391173e-05, + "loss": 0.2975, + "step": 19595 + }, + { + "epoch": 0.91, + "learning_rate": 1.7086357437336387e-05, + "loss": 0.7063, + "step": 19600 + }, + { + "epoch": 0.91, + "learning_rate": 1.70855736522816e-05, + "loss": 0.1183, + "step": 19605 + }, + { + "epoch": 0.92, + "learning_rate": 1.7084789867226815e-05, + "loss": 0.0876, + "step": 19610 + }, + { + "epoch": 0.92, + "learning_rate": 1.7084006082172026e-05, + "loss": 0.3809, + "step": 19615 + }, + { + "epoch": 0.92, + "learning_rate": 1.708322229711724e-05, + "loss": 0.1294, + "step": 19620 + }, + { + "epoch": 0.92, + "learning_rate": 1.7082438512062454e-05, + "loss": 0.1697, + "step": 19625 + }, + { + "epoch": 0.92, + "learning_rate": 1.7081654727007667e-05, + "loss": 0.224, + "step": 19630 + }, + { + "epoch": 0.92, + "learning_rate": 1.708087094195288e-05, + "loss": 0.1926, + "step": 19635 + }, + { + "epoch": 0.92, + "learning_rate": 1.7080087156898092e-05, + "loss": 0.295, + "step": 19640 + }, + { + "epoch": 0.92, + "learning_rate": 1.707930337184331e-05, + "loss": 0.3733, + "step": 19645 + }, + { + "epoch": 0.92, + "learning_rate": 1.707851958678852e-05, + "loss": 0.4389, + "step": 19650 + }, + { + "epoch": 0.92, + "learning_rate": 1.7077735801733734e-05, + "loss": 0.1884, + "step": 19655 + }, + { + "epoch": 0.92, + "learning_rate": 1.7076952016678947e-05, + "loss": 0.0577, + "step": 19660 + }, + { + "epoch": 0.92, + "learning_rate": 1.707616823162416e-05, + "loss": 0.0889, + "step": 19665 + }, + { + "epoch": 0.92, + "learning_rate": 1.7075384446569375e-05, + "loss": 0.0989, + "step": 19670 + }, + { + "epoch": 0.92, + "learning_rate": 1.707460066151459e-05, + "loss": 0.1449, + "step": 19675 + }, + { + "epoch": 0.92, + "learning_rate": 1.70738168764598e-05, + "loss": 0.1931, + "step": 19680 + }, + { + "epoch": 0.92, + "learning_rate": 1.7073033091405014e-05, + "loss": 0.1717, + "step": 19685 + }, + { + "epoch": 0.92, + "learning_rate": 1.7072249306350228e-05, + "loss": 0.2924, + "step": 19690 + }, + { + "epoch": 0.92, + "learning_rate": 1.707146552129544e-05, + "loss": 0.2265, + "step": 19695 + }, + { + "epoch": 0.92, + "learning_rate": 1.7070681736240655e-05, + "loss": 0.3034, + "step": 19700 + }, + { + "epoch": 0.92, + "learning_rate": 1.706989795118587e-05, + "loss": 0.2481, + "step": 19705 + }, + { + "epoch": 0.92, + "learning_rate": 1.7069114166131083e-05, + "loss": 0.0638, + "step": 19710 + }, + { + "epoch": 0.92, + "learning_rate": 1.7068330381076294e-05, + "loss": 0.0919, + "step": 19715 + }, + { + "epoch": 0.92, + "learning_rate": 1.7067546596021508e-05, + "loss": 0.095, + "step": 19720 + }, + { + "epoch": 0.92, + "learning_rate": 1.706676281096672e-05, + "loss": 0.1459, + "step": 19725 + }, + { + "epoch": 0.92, + "learning_rate": 1.7065979025911935e-05, + "loss": 0.1144, + "step": 19730 + }, + { + "epoch": 0.92, + "learning_rate": 1.706519524085715e-05, + "loss": 0.2965, + "step": 19735 + }, + { + "epoch": 0.92, + "learning_rate": 1.7064411455802363e-05, + "loss": 0.18, + "step": 19740 + }, + { + "epoch": 0.92, + "learning_rate": 1.7063627670747577e-05, + "loss": 0.4294, + "step": 19745 + }, + { + "epoch": 0.92, + "learning_rate": 1.7062843885692788e-05, + "loss": 0.7633, + "step": 19750 + }, + { + "epoch": 0.92, + "learning_rate": 1.7062060100638e-05, + "loss": 0.1876, + "step": 19755 + }, + { + "epoch": 0.92, + "learning_rate": 1.7061276315583215e-05, + "loss": 0.1402, + "step": 19760 + }, + { + "epoch": 0.92, + "learning_rate": 1.706049253052843e-05, + "loss": 0.0744, + "step": 19765 + }, + { + "epoch": 0.92, + "learning_rate": 1.7059708745473643e-05, + "loss": 0.1534, + "step": 19770 + }, + { + "epoch": 0.92, + "learning_rate": 1.7058924960418857e-05, + "loss": 0.105, + "step": 19775 + }, + { + "epoch": 0.92, + "learning_rate": 1.7058141175364068e-05, + "loss": 0.2081, + "step": 19780 + }, + { + "epoch": 0.92, + "learning_rate": 1.7057357390309285e-05, + "loss": 0.1488, + "step": 19785 + }, + { + "epoch": 0.92, + "learning_rate": 1.7056573605254495e-05, + "loss": 0.2616, + "step": 19790 + }, + { + "epoch": 0.92, + "learning_rate": 1.705578982019971e-05, + "loss": 0.2662, + "step": 19795 + }, + { + "epoch": 0.92, + "learning_rate": 1.7055006035144923e-05, + "loss": 0.3883, + "step": 19800 + }, + { + "epoch": 0.92, + "learning_rate": 1.7054222250090137e-05, + "loss": 0.1609, + "step": 19805 + }, + { + "epoch": 0.92, + "learning_rate": 1.705343846503535e-05, + "loss": 0.0799, + "step": 19810 + }, + { + "epoch": 0.92, + "learning_rate": 1.705265467998056e-05, + "loss": 0.087, + "step": 19815 + }, + { + "epoch": 0.92, + "learning_rate": 1.7051870894925775e-05, + "loss": 0.194, + "step": 19820 + }, + { + "epoch": 0.93, + "learning_rate": 1.705108710987099e-05, + "loss": 0.1103, + "step": 19825 + }, + { + "epoch": 0.93, + "learning_rate": 1.7050303324816203e-05, + "loss": 0.1893, + "step": 19830 + }, + { + "epoch": 0.93, + "learning_rate": 1.7049519539761417e-05, + "loss": 0.2444, + "step": 19835 + }, + { + "epoch": 0.93, + "learning_rate": 1.704873575470663e-05, + "loss": 0.2429, + "step": 19840 + }, + { + "epoch": 0.93, + "learning_rate": 1.7047951969651845e-05, + "loss": 0.3144, + "step": 19845 + }, + { + "epoch": 0.93, + "learning_rate": 1.704716818459706e-05, + "loss": 0.4834, + "step": 19850 + }, + { + "epoch": 0.93, + "learning_rate": 1.704638439954227e-05, + "loss": 0.1951, + "step": 19855 + }, + { + "epoch": 0.93, + "learning_rate": 1.7045600614487487e-05, + "loss": 0.0849, + "step": 19860 + }, + { + "epoch": 0.93, + "learning_rate": 1.7044816829432697e-05, + "loss": 0.1384, + "step": 19865 + }, + { + "epoch": 0.93, + "learning_rate": 1.704403304437791e-05, + "loss": 0.1505, + "step": 19870 + }, + { + "epoch": 0.93, + "learning_rate": 1.7043249259323125e-05, + "loss": 0.154, + "step": 19875 + }, + { + "epoch": 0.93, + "learning_rate": 1.7042465474268336e-05, + "loss": 0.2354, + "step": 19880 + }, + { + "epoch": 0.93, + "learning_rate": 1.7041681689213553e-05, + "loss": 0.2192, + "step": 19885 + }, + { + "epoch": 0.93, + "learning_rate": 1.7040897904158763e-05, + "loss": 0.2718, + "step": 19890 + }, + { + "epoch": 0.93, + "learning_rate": 1.7040114119103977e-05, + "loss": 0.3056, + "step": 19895 + }, + { + "epoch": 0.93, + "learning_rate": 1.703933033404919e-05, + "loss": 0.4475, + "step": 19900 + }, + { + "epoch": 0.93, + "learning_rate": 1.7038546548994405e-05, + "loss": 0.2562, + "step": 19905 + }, + { + "epoch": 0.93, + "learning_rate": 1.703776276393962e-05, + "loss": 0.0562, + "step": 19910 + }, + { + "epoch": 0.93, + "learning_rate": 1.7036978978884833e-05, + "loss": 0.1212, + "step": 19915 + }, + { + "epoch": 0.93, + "learning_rate": 1.7036195193830047e-05, + "loss": 0.0697, + "step": 19920 + }, + { + "epoch": 0.93, + "learning_rate": 1.703541140877526e-05, + "loss": 0.1794, + "step": 19925 + }, + { + "epoch": 0.93, + "learning_rate": 1.703462762372047e-05, + "loss": 0.1976, + "step": 19930 + }, + { + "epoch": 0.93, + "learning_rate": 1.7033843838665685e-05, + "loss": 0.185, + "step": 19935 + }, + { + "epoch": 0.93, + "learning_rate": 1.70330600536109e-05, + "loss": 0.2454, + "step": 19940 + }, + { + "epoch": 0.93, + "learning_rate": 1.7032276268556113e-05, + "loss": 0.1997, + "step": 19945 + }, + { + "epoch": 0.93, + "learning_rate": 1.7031492483501327e-05, + "loss": 0.6297, + "step": 19950 + }, + { + "epoch": 0.93, + "learning_rate": 1.7030708698446537e-05, + "loss": 0.1574, + "step": 19955 + }, + { + "epoch": 0.93, + "learning_rate": 1.7029924913391755e-05, + "loss": 0.066, + "step": 19960 + }, + { + "epoch": 0.93, + "learning_rate": 1.7029141128336965e-05, + "loss": 0.1136, + "step": 19965 + }, + { + "epoch": 0.93, + "learning_rate": 1.702835734328218e-05, + "loss": 0.1132, + "step": 19970 + }, + { + "epoch": 0.93, + "learning_rate": 1.7027573558227393e-05, + "loss": 0.1921, + "step": 19975 + }, + { + "epoch": 0.93, + "learning_rate": 1.7026789773172607e-05, + "loss": 0.1347, + "step": 19980 + }, + { + "epoch": 0.93, + "learning_rate": 1.702600598811782e-05, + "loss": 0.2405, + "step": 19985 + }, + { + "epoch": 0.93, + "learning_rate": 1.7025222203063035e-05, + "loss": 0.2259, + "step": 19990 + }, + { + "epoch": 0.93, + "learning_rate": 1.7024438418008245e-05, + "loss": 0.3136, + "step": 19995 + }, + { + "epoch": 0.93, + "learning_rate": 1.7023654632953463e-05, + "loss": 0.3853, + "step": 20000 + }, + { + "epoch": 0.93, + "learning_rate": 1.7022870847898673e-05, + "loss": 0.2158, + "step": 20005 + }, + { + "epoch": 0.93, + "learning_rate": 1.7022087062843887e-05, + "loss": 0.0802, + "step": 20010 + }, + { + "epoch": 0.93, + "learning_rate": 1.70213032777891e-05, + "loss": 0.0745, + "step": 20015 + }, + { + "epoch": 0.93, + "learning_rate": 1.7020519492734315e-05, + "loss": 0.1353, + "step": 20020 + }, + { + "epoch": 0.93, + "learning_rate": 1.701973570767953e-05, + "loss": 0.1455, + "step": 20025 + }, + { + "epoch": 0.93, + "learning_rate": 1.701895192262474e-05, + "loss": 0.1117, + "step": 20030 + }, + { + "epoch": 0.93, + "learning_rate": 1.7018168137569953e-05, + "loss": 0.1912, + "step": 20035 + }, + { + "epoch": 0.94, + "learning_rate": 1.7017384352515167e-05, + "loss": 0.211, + "step": 20040 + }, + { + "epoch": 0.94, + "learning_rate": 1.701660056746038e-05, + "loss": 0.2398, + "step": 20045 + }, + { + "epoch": 0.94, + "learning_rate": 1.7015816782405595e-05, + "loss": 0.6622, + "step": 20050 + }, + { + "epoch": 0.94, + "learning_rate": 1.701503299735081e-05, + "loss": 0.1905, + "step": 20055 + }, + { + "epoch": 0.94, + "learning_rate": 1.7014249212296023e-05, + "loss": 0.0718, + "step": 20060 + }, + { + "epoch": 0.94, + "learning_rate": 1.7013465427241237e-05, + "loss": 0.0488, + "step": 20065 + }, + { + "epoch": 0.94, + "learning_rate": 1.7012681642186447e-05, + "loss": 0.1622, + "step": 20070 + }, + { + "epoch": 0.94, + "learning_rate": 1.701189785713166e-05, + "loss": 0.1579, + "step": 20075 + }, + { + "epoch": 0.94, + "learning_rate": 1.7011114072076875e-05, + "loss": 0.1167, + "step": 20080 + }, + { + "epoch": 0.94, + "learning_rate": 1.701033028702209e-05, + "loss": 0.1729, + "step": 20085 + }, + { + "epoch": 0.94, + "learning_rate": 1.7009546501967303e-05, + "loss": 0.1643, + "step": 20090 + }, + { + "epoch": 0.94, + "learning_rate": 1.7008762716912513e-05, + "loss": 0.3448, + "step": 20095 + }, + { + "epoch": 0.94, + "learning_rate": 1.700797893185773e-05, + "loss": 0.4274, + "step": 20100 + }, + { + "epoch": 0.94, + "learning_rate": 1.700719514680294e-05, + "loss": 0.1908, + "step": 20105 + }, + { + "epoch": 0.94, + "learning_rate": 1.7006411361748155e-05, + "loss": 0.0368, + "step": 20110 + }, + { + "epoch": 0.94, + "learning_rate": 1.700562757669337e-05, + "loss": 0.0898, + "step": 20115 + }, + { + "epoch": 0.94, + "learning_rate": 1.7004843791638583e-05, + "loss": 0.1293, + "step": 20120 + }, + { + "epoch": 0.94, + "learning_rate": 1.7004060006583797e-05, + "loss": 0.1416, + "step": 20125 + }, + { + "epoch": 0.94, + "learning_rate": 1.700327622152901e-05, + "loss": 0.1573, + "step": 20130 + }, + { + "epoch": 0.94, + "learning_rate": 1.700249243647422e-05, + "loss": 0.1809, + "step": 20135 + }, + { + "epoch": 0.94, + "learning_rate": 1.7001708651419435e-05, + "loss": 0.3192, + "step": 20140 + }, + { + "epoch": 0.94, + "learning_rate": 1.700092486636465e-05, + "loss": 0.2653, + "step": 20145 + }, + { + "epoch": 0.94, + "learning_rate": 1.7000141081309863e-05, + "loss": 0.5466, + "step": 20150 + }, + { + "epoch": 0.94, + "learning_rate": 1.6999357296255077e-05, + "loss": 0.1971, + "step": 20155 + }, + { + "epoch": 0.94, + "learning_rate": 1.699857351120029e-05, + "loss": 0.1222, + "step": 20160 + }, + { + "epoch": 0.94, + "learning_rate": 1.6997789726145505e-05, + "loss": 0.1325, + "step": 20165 + }, + { + "epoch": 0.94, + "learning_rate": 1.6997005941090715e-05, + "loss": 0.0593, + "step": 20170 + }, + { + "epoch": 0.94, + "learning_rate": 1.6996222156035932e-05, + "loss": 0.1517, + "step": 20175 + }, + { + "epoch": 0.94, + "learning_rate": 1.6995438370981143e-05, + "loss": 0.1367, + "step": 20180 + }, + { + "epoch": 0.94, + "learning_rate": 1.6994654585926357e-05, + "loss": 0.2145, + "step": 20185 + }, + { + "epoch": 0.94, + "learning_rate": 1.699387080087157e-05, + "loss": 0.2504, + "step": 20190 + }, + { + "epoch": 0.94, + "learning_rate": 1.6993087015816785e-05, + "loss": 0.3664, + "step": 20195 + }, + { + "epoch": 0.94, + "learning_rate": 1.6992303230762e-05, + "loss": 0.4137, + "step": 20200 + }, + { + "epoch": 0.94, + "learning_rate": 1.699151944570721e-05, + "loss": 0.2094, + "step": 20205 + }, + { + "epoch": 0.94, + "learning_rate": 1.6990735660652423e-05, + "loss": 0.0852, + "step": 20210 + }, + { + "epoch": 0.94, + "learning_rate": 1.6989951875597637e-05, + "loss": 0.0603, + "step": 20215 + }, + { + "epoch": 0.94, + "learning_rate": 1.698916809054285e-05, + "loss": 0.161, + "step": 20220 + }, + { + "epoch": 0.94, + "learning_rate": 1.6988384305488065e-05, + "loss": 0.106, + "step": 20225 + }, + { + "epoch": 0.94, + "learning_rate": 1.698760052043328e-05, + "loss": 0.1538, + "step": 20230 + }, + { + "epoch": 0.94, + "learning_rate": 1.6986816735378492e-05, + "loss": 0.1602, + "step": 20235 + }, + { + "epoch": 0.94, + "learning_rate": 1.6986032950323706e-05, + "loss": 0.2877, + "step": 20240 + }, + { + "epoch": 0.94, + "learning_rate": 1.6985249165268917e-05, + "loss": 0.4337, + "step": 20245 + }, + { + "epoch": 0.94, + "learning_rate": 1.698446538021413e-05, + "loss": 0.6768, + "step": 20250 + }, + { + "epoch": 0.95, + "learning_rate": 1.6983681595159345e-05, + "loss": 0.2147, + "step": 20255 + }, + { + "epoch": 0.95, + "learning_rate": 1.698289781010456e-05, + "loss": 0.1292, + "step": 20260 + }, + { + "epoch": 0.95, + "learning_rate": 1.6982114025049772e-05, + "loss": 0.1835, + "step": 20265 + }, + { + "epoch": 0.95, + "learning_rate": 1.6981330239994983e-05, + "loss": 0.1112, + "step": 20270 + }, + { + "epoch": 0.95, + "learning_rate": 1.69805464549402e-05, + "loss": 0.1764, + "step": 20275 + }, + { + "epoch": 0.95, + "learning_rate": 1.697976266988541e-05, + "loss": 0.1477, + "step": 20280 + }, + { + "epoch": 0.95, + "learning_rate": 1.6978978884830625e-05, + "loss": 0.257, + "step": 20285 + }, + { + "epoch": 0.95, + "learning_rate": 1.697819509977584e-05, + "loss": 0.2419, + "step": 20290 + }, + { + "epoch": 0.95, + "learning_rate": 1.6977411314721053e-05, + "loss": 0.2276, + "step": 20295 + }, + { + "epoch": 0.95, + "learning_rate": 1.6976627529666266e-05, + "loss": 0.3961, + "step": 20300 + }, + { + "epoch": 0.95, + "learning_rate": 1.697584374461148e-05, + "loss": 0.3068, + "step": 20305 + }, + { + "epoch": 0.95, + "learning_rate": 1.697505995955669e-05, + "loss": 0.0723, + "step": 20310 + }, + { + "epoch": 0.95, + "learning_rate": 1.6974276174501908e-05, + "loss": 0.0967, + "step": 20315 + }, + { + "epoch": 0.95, + "learning_rate": 1.697349238944712e-05, + "loss": 0.1151, + "step": 20320 + }, + { + "epoch": 0.95, + "learning_rate": 1.6972708604392333e-05, + "loss": 0.1615, + "step": 20325 + }, + { + "epoch": 0.95, + "learning_rate": 1.6971924819337546e-05, + "loss": 0.219, + "step": 20330 + }, + { + "epoch": 0.95, + "learning_rate": 1.697114103428276e-05, + "loss": 0.2574, + "step": 20335 + }, + { + "epoch": 0.95, + "learning_rate": 1.6970357249227974e-05, + "loss": 0.2705, + "step": 20340 + }, + { + "epoch": 0.95, + "learning_rate": 1.6969573464173185e-05, + "loss": 0.2993, + "step": 20345 + }, + { + "epoch": 0.95, + "learning_rate": 1.69687896791184e-05, + "loss": 0.6279, + "step": 20350 + }, + { + "epoch": 0.95, + "learning_rate": 1.6968005894063613e-05, + "loss": 0.1919, + "step": 20355 + }, + { + "epoch": 0.95, + "learning_rate": 1.6967222109008826e-05, + "loss": 0.0439, + "step": 20360 + }, + { + "epoch": 0.95, + "learning_rate": 1.696643832395404e-05, + "loss": 0.0789, + "step": 20365 + }, + { + "epoch": 0.95, + "learning_rate": 1.6965654538899254e-05, + "loss": 0.1537, + "step": 20370 + }, + { + "epoch": 0.95, + "learning_rate": 1.6964870753844468e-05, + "loss": 0.0927, + "step": 20375 + }, + { + "epoch": 0.95, + "learning_rate": 1.6964086968789682e-05, + "loss": 0.1653, + "step": 20380 + }, + { + "epoch": 0.95, + "learning_rate": 1.6963303183734893e-05, + "loss": 0.2245, + "step": 20385 + }, + { + "epoch": 0.95, + "learning_rate": 1.696251939868011e-05, + "loss": 0.2526, + "step": 20390 + }, + { + "epoch": 0.95, + "learning_rate": 1.696173561362532e-05, + "loss": 0.2687, + "step": 20395 + }, + { + "epoch": 0.95, + "learning_rate": 1.6960951828570534e-05, + "loss": 0.6334, + "step": 20400 + }, + { + "epoch": 0.95, + "learning_rate": 1.6960168043515748e-05, + "loss": 0.1858, + "step": 20405 + }, + { + "epoch": 0.95, + "learning_rate": 1.695938425846096e-05, + "loss": 0.0953, + "step": 20410 + }, + { + "epoch": 0.95, + "learning_rate": 1.6958600473406176e-05, + "loss": 0.1384, + "step": 20415 + }, + { + "epoch": 0.95, + "learning_rate": 1.6957816688351387e-05, + "loss": 0.1128, + "step": 20420 + }, + { + "epoch": 0.95, + "learning_rate": 1.69570329032966e-05, + "loss": 0.1004, + "step": 20425 + }, + { + "epoch": 0.95, + "learning_rate": 1.6956249118241814e-05, + "loss": 0.1117, + "step": 20430 + }, + { + "epoch": 0.95, + "learning_rate": 1.695546533318703e-05, + "loss": 0.2534, + "step": 20435 + }, + { + "epoch": 0.95, + "learning_rate": 1.6954681548132242e-05, + "loss": 0.23, + "step": 20440 + }, + { + "epoch": 0.95, + "learning_rate": 1.6953897763077456e-05, + "loss": 0.2366, + "step": 20445 + }, + { + "epoch": 0.95, + "learning_rate": 1.695311397802267e-05, + "loss": 0.4956, + "step": 20450 + }, + { + "epoch": 0.95, + "learning_rate": 1.6952330192967884e-05, + "loss": 0.2167, + "step": 20455 + }, + { + "epoch": 0.95, + "learning_rate": 1.6951546407913094e-05, + "loss": 0.0958, + "step": 20460 + }, + { + "epoch": 0.95, + "learning_rate": 1.695076262285831e-05, + "loss": 0.0566, + "step": 20465 + }, + { + "epoch": 0.96, + "learning_rate": 1.6949978837803522e-05, + "loss": 0.1221, + "step": 20470 + }, + { + "epoch": 0.96, + "learning_rate": 1.6949195052748736e-05, + "loss": 0.0884, + "step": 20475 + }, + { + "epoch": 0.96, + "learning_rate": 1.694841126769395e-05, + "loss": 0.2541, + "step": 20480 + }, + { + "epoch": 0.96, + "learning_rate": 1.694762748263916e-05, + "loss": 0.2203, + "step": 20485 + }, + { + "epoch": 0.96, + "learning_rate": 1.6946843697584378e-05, + "loss": 0.1863, + "step": 20490 + }, + { + "epoch": 0.96, + "learning_rate": 1.694605991252959e-05, + "loss": 0.2929, + "step": 20495 + }, + { + "epoch": 0.96, + "learning_rate": 1.6945276127474802e-05, + "loss": 0.271, + "step": 20500 + }, + { + "epoch": 0.96, + "learning_rate": 1.6944492342420016e-05, + "loss": 0.1831, + "step": 20505 + }, + { + "epoch": 0.96, + "learning_rate": 1.694370855736523e-05, + "loss": 0.0501, + "step": 20510 + }, + { + "epoch": 0.96, + "learning_rate": 1.6942924772310444e-05, + "loss": 0.1323, + "step": 20515 + }, + { + "epoch": 0.96, + "learning_rate": 1.6942140987255658e-05, + "loss": 0.2169, + "step": 20520 + }, + { + "epoch": 0.96, + "learning_rate": 1.694135720220087e-05, + "loss": 0.1172, + "step": 20525 + }, + { + "epoch": 0.96, + "learning_rate": 1.6940573417146082e-05, + "loss": 0.2164, + "step": 20530 + }, + { + "epoch": 0.96, + "learning_rate": 1.6939789632091296e-05, + "loss": 0.1629, + "step": 20535 + }, + { + "epoch": 0.96, + "learning_rate": 1.693900584703651e-05, + "loss": 0.2959, + "step": 20540 + }, + { + "epoch": 0.96, + "learning_rate": 1.6938222061981724e-05, + "loss": 0.3087, + "step": 20545 + }, + { + "epoch": 0.96, + "learning_rate": 1.6937438276926938e-05, + "loss": 0.7987, + "step": 20550 + }, + { + "epoch": 0.96, + "learning_rate": 1.6936654491872152e-05, + "loss": 0.2158, + "step": 20555 + }, + { + "epoch": 0.96, + "learning_rate": 1.6935870706817362e-05, + "loss": 0.0551, + "step": 20560 + }, + { + "epoch": 0.96, + "learning_rate": 1.6935086921762576e-05, + "loss": 0.0877, + "step": 20565 + }, + { + "epoch": 0.96, + "learning_rate": 1.693430313670779e-05, + "loss": 0.1039, + "step": 20570 + }, + { + "epoch": 0.96, + "learning_rate": 1.6933519351653004e-05, + "loss": 0.1547, + "step": 20575 + }, + { + "epoch": 0.96, + "learning_rate": 1.6932735566598218e-05, + "loss": 0.167, + "step": 20580 + }, + { + "epoch": 0.96, + "learning_rate": 1.6931951781543432e-05, + "loss": 0.1186, + "step": 20585 + }, + { + "epoch": 0.96, + "learning_rate": 1.6931167996488646e-05, + "loss": 0.1959, + "step": 20590 + }, + { + "epoch": 0.96, + "learning_rate": 1.6930384211433856e-05, + "loss": 0.1781, + "step": 20595 + }, + { + "epoch": 0.96, + "learning_rate": 1.692960042637907e-05, + "loss": 0.4715, + "step": 20600 + }, + { + "epoch": 0.96, + "learning_rate": 1.6928816641324284e-05, + "loss": 0.2431, + "step": 20605 + }, + { + "epoch": 0.96, + "learning_rate": 1.6928032856269498e-05, + "loss": 0.0756, + "step": 20610 + }, + { + "epoch": 0.96, + "learning_rate": 1.6927249071214712e-05, + "loss": 0.0938, + "step": 20615 + }, + { + "epoch": 0.96, + "learning_rate": 1.6926465286159926e-05, + "loss": 0.1711, + "step": 20620 + }, + { + "epoch": 0.96, + "learning_rate": 1.6925681501105136e-05, + "loss": 0.1379, + "step": 20625 + }, + { + "epoch": 0.96, + "learning_rate": 1.6924897716050354e-05, + "loss": 0.1878, + "step": 20630 + }, + { + "epoch": 0.96, + "learning_rate": 1.6924113930995564e-05, + "loss": 0.2055, + "step": 20635 + }, + { + "epoch": 0.96, + "learning_rate": 1.6923330145940778e-05, + "loss": 0.1536, + "step": 20640 + }, + { + "epoch": 0.96, + "learning_rate": 1.6922546360885992e-05, + "loss": 0.2507, + "step": 20645 + }, + { + "epoch": 0.96, + "learning_rate": 1.6921762575831206e-05, + "loss": 0.4966, + "step": 20650 + }, + { + "epoch": 0.96, + "learning_rate": 1.692097879077642e-05, + "loss": 0.1616, + "step": 20655 + }, + { + "epoch": 0.96, + "learning_rate": 1.692019500572163e-05, + "loss": 0.0673, + "step": 20660 + }, + { + "epoch": 0.96, + "learning_rate": 1.6919411220666844e-05, + "loss": 0.0905, + "step": 20665 + }, + { + "epoch": 0.96, + "learning_rate": 1.6918627435612058e-05, + "loss": 0.1219, + "step": 20670 + }, + { + "epoch": 0.96, + "learning_rate": 1.6917843650557272e-05, + "loss": 0.0941, + "step": 20675 + }, + { + "epoch": 0.96, + "learning_rate": 1.6917059865502486e-05, + "loss": 0.1362, + "step": 20680 + }, + { + "epoch": 0.97, + "learning_rate": 1.69162760804477e-05, + "loss": 0.1788, + "step": 20685 + }, + { + "epoch": 0.97, + "learning_rate": 1.6915492295392914e-05, + "loss": 0.2372, + "step": 20690 + }, + { + "epoch": 0.97, + "learning_rate": 1.6914708510338128e-05, + "loss": 0.2672, + "step": 20695 + }, + { + "epoch": 0.97, + "learning_rate": 1.6913924725283338e-05, + "loss": 0.409, + "step": 20700 + }, + { + "epoch": 0.97, + "learning_rate": 1.6913140940228556e-05, + "loss": 0.1966, + "step": 20705 + }, + { + "epoch": 0.97, + "learning_rate": 1.6912357155173766e-05, + "loss": 0.0573, + "step": 20710 + }, + { + "epoch": 0.97, + "learning_rate": 1.691157337011898e-05, + "loss": 0.0683, + "step": 20715 + }, + { + "epoch": 0.97, + "learning_rate": 1.6910789585064194e-05, + "loss": 0.0474, + "step": 20720 + }, + { + "epoch": 0.97, + "learning_rate": 1.6910005800009404e-05, + "loss": 0.0682, + "step": 20725 + }, + { + "epoch": 0.97, + "learning_rate": 1.690922201495462e-05, + "loss": 0.1241, + "step": 20730 + }, + { + "epoch": 0.97, + "learning_rate": 1.6908438229899832e-05, + "loss": 0.1928, + "step": 20735 + }, + { + "epoch": 0.97, + "learning_rate": 1.6907654444845046e-05, + "loss": 0.23, + "step": 20740 + }, + { + "epoch": 0.97, + "learning_rate": 1.690687065979026e-05, + "loss": 0.3035, + "step": 20745 + }, + { + "epoch": 0.97, + "learning_rate": 1.6906086874735474e-05, + "loss": 0.443, + "step": 20750 + }, + { + "epoch": 0.97, + "learning_rate": 1.6905303089680688e-05, + "loss": 0.2238, + "step": 20755 + }, + { + "epoch": 0.97, + "learning_rate": 1.69045193046259e-05, + "loss": 0.0642, + "step": 20760 + }, + { + "epoch": 0.97, + "learning_rate": 1.6903735519571116e-05, + "loss": 0.0975, + "step": 20765 + }, + { + "epoch": 0.97, + "learning_rate": 1.690295173451633e-05, + "loss": 0.1374, + "step": 20770 + }, + { + "epoch": 0.97, + "learning_rate": 1.690216794946154e-05, + "loss": 0.1851, + "step": 20775 + }, + { + "epoch": 0.97, + "learning_rate": 1.6901384164406754e-05, + "loss": 0.1809, + "step": 20780 + }, + { + "epoch": 0.97, + "learning_rate": 1.6900600379351968e-05, + "loss": 0.1576, + "step": 20785 + }, + { + "epoch": 0.97, + "learning_rate": 1.6899816594297182e-05, + "loss": 0.2686, + "step": 20790 + }, + { + "epoch": 0.97, + "learning_rate": 1.6899032809242396e-05, + "loss": 0.241, + "step": 20795 + }, + { + "epoch": 0.97, + "learning_rate": 1.6898249024187606e-05, + "loss": 0.6042, + "step": 20800 + }, + { + "epoch": 0.97, + "learning_rate": 1.6897465239132823e-05, + "loss": 0.1927, + "step": 20805 + }, + { + "epoch": 0.97, + "learning_rate": 1.6896681454078034e-05, + "loss": 0.1114, + "step": 20810 + }, + { + "epoch": 0.97, + "learning_rate": 1.6895897669023248e-05, + "loss": 0.0992, + "step": 20815 + }, + { + "epoch": 0.97, + "learning_rate": 1.6895113883968462e-05, + "loss": 0.1313, + "step": 20820 + }, + { + "epoch": 0.97, + "learning_rate": 1.6894330098913676e-05, + "loss": 0.1349, + "step": 20825 + }, + { + "epoch": 0.97, + "learning_rate": 1.689354631385889e-05, + "loss": 0.0776, + "step": 20830 + }, + { + "epoch": 0.97, + "learning_rate": 1.6892762528804104e-05, + "loss": 0.2206, + "step": 20835 + }, + { + "epoch": 0.97, + "learning_rate": 1.6891978743749314e-05, + "loss": 0.2008, + "step": 20840 + }, + { + "epoch": 0.97, + "learning_rate": 1.689119495869453e-05, + "loss": 0.1772, + "step": 20845 + }, + { + "epoch": 0.97, + "learning_rate": 1.6890411173639742e-05, + "loss": 0.4666, + "step": 20850 + }, + { + "epoch": 0.97, + "learning_rate": 1.6889627388584956e-05, + "loss": 0.1922, + "step": 20855 + }, + { + "epoch": 0.97, + "learning_rate": 1.688884360353017e-05, + "loss": 0.0489, + "step": 20860 + }, + { + "epoch": 0.97, + "learning_rate": 1.6888059818475384e-05, + "loss": 0.1144, + "step": 20865 + }, + { + "epoch": 0.97, + "learning_rate": 1.6887276033420597e-05, + "loss": 0.1017, + "step": 20870 + }, + { + "epoch": 0.97, + "learning_rate": 1.6886492248365808e-05, + "loss": 0.1164, + "step": 20875 + }, + { + "epoch": 0.97, + "learning_rate": 1.6885708463311022e-05, + "loss": 0.1358, + "step": 20880 + }, + { + "epoch": 0.97, + "learning_rate": 1.6884924678256236e-05, + "loss": 0.1768, + "step": 20885 + }, + { + "epoch": 0.97, + "learning_rate": 1.688414089320145e-05, + "loss": 0.2551, + "step": 20890 + }, + { + "epoch": 0.97, + "learning_rate": 1.6883357108146664e-05, + "loss": 0.3179, + "step": 20895 + }, + { + "epoch": 0.98, + "learning_rate": 1.6882573323091878e-05, + "loss": 0.5792, + "step": 20900 + }, + { + "epoch": 0.98, + "learning_rate": 1.688178953803709e-05, + "loss": 0.197, + "step": 20905 + }, + { + "epoch": 0.98, + "learning_rate": 1.6881005752982305e-05, + "loss": 0.1008, + "step": 20910 + }, + { + "epoch": 0.98, + "learning_rate": 1.6880221967927516e-05, + "loss": 0.1025, + "step": 20915 + }, + { + "epoch": 0.98, + "learning_rate": 1.687943818287273e-05, + "loss": 0.1174, + "step": 20920 + }, + { + "epoch": 0.98, + "learning_rate": 1.6878654397817944e-05, + "loss": 0.1236, + "step": 20925 + }, + { + "epoch": 0.98, + "learning_rate": 1.6877870612763158e-05, + "loss": 0.2077, + "step": 20930 + }, + { + "epoch": 0.98, + "learning_rate": 1.687708682770837e-05, + "loss": 0.2177, + "step": 20935 + }, + { + "epoch": 0.98, + "learning_rate": 1.6876303042653582e-05, + "loss": 0.2288, + "step": 20940 + }, + { + "epoch": 0.98, + "learning_rate": 1.68755192575988e-05, + "loss": 0.3481, + "step": 20945 + }, + { + "epoch": 0.98, + "learning_rate": 1.687473547254401e-05, + "loss": 0.4198, + "step": 20950 + }, + { + "epoch": 0.98, + "learning_rate": 1.6873951687489224e-05, + "loss": 0.2261, + "step": 20955 + }, + { + "epoch": 0.98, + "learning_rate": 1.6873167902434438e-05, + "loss": 0.0703, + "step": 20960 + }, + { + "epoch": 0.98, + "learning_rate": 1.687238411737965e-05, + "loss": 0.0709, + "step": 20965 + }, + { + "epoch": 0.98, + "learning_rate": 1.6871600332324865e-05, + "loss": 0.1438, + "step": 20970 + }, + { + "epoch": 0.98, + "learning_rate": 1.687081654727008e-05, + "loss": 0.1581, + "step": 20975 + }, + { + "epoch": 0.98, + "learning_rate": 1.687003276221529e-05, + "loss": 0.1882, + "step": 20980 + }, + { + "epoch": 0.98, + "learning_rate": 1.6869248977160504e-05, + "loss": 0.1306, + "step": 20985 + }, + { + "epoch": 0.98, + "learning_rate": 1.6868465192105718e-05, + "loss": 0.3843, + "step": 20990 + }, + { + "epoch": 0.98, + "learning_rate": 1.686768140705093e-05, + "loss": 0.2341, + "step": 20995 + }, + { + "epoch": 0.98, + "learning_rate": 1.6866897621996145e-05, + "loss": 0.5793, + "step": 21000 + }, + { + "epoch": 0.98, + "learning_rate": 1.686611383694136e-05, + "loss": 0.1926, + "step": 21005 + }, + { + "epoch": 0.98, + "learning_rate": 1.6865330051886573e-05, + "loss": 0.0443, + "step": 21010 + }, + { + "epoch": 0.98, + "learning_rate": 1.6864546266831784e-05, + "loss": 0.0993, + "step": 21015 + }, + { + "epoch": 0.98, + "learning_rate": 1.6863762481777e-05, + "loss": 0.1558, + "step": 21020 + }, + { + "epoch": 0.98, + "learning_rate": 1.686297869672221e-05, + "loss": 0.1095, + "step": 21025 + }, + { + "epoch": 0.98, + "learning_rate": 1.6862194911667425e-05, + "loss": 0.1696, + "step": 21030 + }, + { + "epoch": 0.98, + "learning_rate": 1.686141112661264e-05, + "loss": 0.1484, + "step": 21035 + }, + { + "epoch": 0.98, + "learning_rate": 1.6860627341557853e-05, + "loss": 0.3471, + "step": 21040 + }, + { + "epoch": 0.98, + "learning_rate": 1.6859843556503067e-05, + "loss": 0.2733, + "step": 21045 + }, + { + "epoch": 0.98, + "learning_rate": 1.6859059771448278e-05, + "loss": 0.4663, + "step": 21050 + }, + { + "epoch": 0.98, + "learning_rate": 1.685827598639349e-05, + "loss": 0.2136, + "step": 21055 + }, + { + "epoch": 0.98, + "learning_rate": 1.6857492201338706e-05, + "loss": 0.0273, + "step": 21060 + }, + { + "epoch": 0.98, + "learning_rate": 1.685670841628392e-05, + "loss": 0.1179, + "step": 21065 + }, + { + "epoch": 0.98, + "learning_rate": 1.6855924631229133e-05, + "loss": 0.1099, + "step": 21070 + }, + { + "epoch": 0.98, + "learning_rate": 1.6855140846174347e-05, + "loss": 0.1182, + "step": 21075 + }, + { + "epoch": 0.98, + "learning_rate": 1.685435706111956e-05, + "loss": 0.1853, + "step": 21080 + }, + { + "epoch": 0.98, + "learning_rate": 1.6853573276064775e-05, + "loss": 0.0739, + "step": 21085 + }, + { + "epoch": 0.98, + "learning_rate": 1.6852789491009986e-05, + "loss": 0.2085, + "step": 21090 + }, + { + "epoch": 0.98, + "learning_rate": 1.68520057059552e-05, + "loss": 0.2557, + "step": 21095 + }, + { + "epoch": 0.98, + "learning_rate": 1.6851221920900413e-05, + "loss": 0.5123, + "step": 21100 + }, + { + "epoch": 0.98, + "learning_rate": 1.6850438135845627e-05, + "loss": 0.2266, + "step": 21105 + }, + { + "epoch": 0.99, + "learning_rate": 1.684965435079084e-05, + "loss": 0.0615, + "step": 21110 + }, + { + "epoch": 0.99, + "learning_rate": 1.6848870565736052e-05, + "loss": 0.0517, + "step": 21115 + }, + { + "epoch": 0.99, + "learning_rate": 1.684808678068127e-05, + "loss": 0.0945, + "step": 21120 + }, + { + "epoch": 0.99, + "learning_rate": 1.684730299562648e-05, + "loss": 0.1399, + "step": 21125 + }, + { + "epoch": 0.99, + "learning_rate": 1.6846519210571693e-05, + "loss": 0.1547, + "step": 21130 + }, + { + "epoch": 0.99, + "learning_rate": 1.6845735425516907e-05, + "loss": 0.2386, + "step": 21135 + }, + { + "epoch": 0.99, + "learning_rate": 1.684495164046212e-05, + "loss": 0.2613, + "step": 21140 + }, + { + "epoch": 0.99, + "learning_rate": 1.6844167855407335e-05, + "loss": 0.3378, + "step": 21145 + }, + { + "epoch": 0.99, + "learning_rate": 1.684338407035255e-05, + "loss": 0.5164, + "step": 21150 + }, + { + "epoch": 0.99, + "learning_rate": 1.684260028529776e-05, + "loss": 0.1966, + "step": 21155 + }, + { + "epoch": 0.99, + "learning_rate": 1.6841816500242977e-05, + "loss": 0.0675, + "step": 21160 + }, + { + "epoch": 0.99, + "learning_rate": 1.6841032715188187e-05, + "loss": 0.1247, + "step": 21165 + }, + { + "epoch": 0.99, + "learning_rate": 1.68402489301334e-05, + "loss": 0.1262, + "step": 21170 + }, + { + "epoch": 0.99, + "learning_rate": 1.6839465145078615e-05, + "loss": 0.1284, + "step": 21175 + }, + { + "epoch": 0.99, + "learning_rate": 1.683868136002383e-05, + "loss": 0.2704, + "step": 21180 + }, + { + "epoch": 0.99, + "learning_rate": 1.6837897574969043e-05, + "loss": 0.1451, + "step": 21185 + }, + { + "epoch": 0.99, + "learning_rate": 1.6837113789914254e-05, + "loss": 0.3121, + "step": 21190 + }, + { + "epoch": 0.99, + "learning_rate": 1.6836330004859467e-05, + "loss": 0.3131, + "step": 21195 + }, + { + "epoch": 0.99, + "learning_rate": 1.683554621980468e-05, + "loss": 0.6027, + "step": 21200 + }, + { + "epoch": 0.99, + "learning_rate": 1.6834762434749895e-05, + "loss": 0.2483, + "step": 21205 + }, + { + "epoch": 0.99, + "learning_rate": 1.683397864969511e-05, + "loss": 0.0526, + "step": 21210 + }, + { + "epoch": 0.99, + "learning_rate": 1.6833194864640323e-05, + "loss": 0.1629, + "step": 21215 + }, + { + "epoch": 0.99, + "learning_rate": 1.6832411079585537e-05, + "loss": 0.1384, + "step": 21220 + }, + { + "epoch": 0.99, + "learning_rate": 1.683162729453075e-05, + "loss": 0.1629, + "step": 21225 + }, + { + "epoch": 0.99, + "learning_rate": 1.683084350947596e-05, + "loss": 0.1871, + "step": 21230 + }, + { + "epoch": 0.99, + "learning_rate": 1.683005972442118e-05, + "loss": 0.2585, + "step": 21235 + }, + { + "epoch": 0.99, + "learning_rate": 1.682927593936639e-05, + "loss": 0.3094, + "step": 21240 + }, + { + "epoch": 0.99, + "learning_rate": 1.6828492154311603e-05, + "loss": 0.275, + "step": 21245 + }, + { + "epoch": 0.99, + "learning_rate": 1.6827708369256817e-05, + "loss": 0.4019, + "step": 21250 + }, + { + "epoch": 0.99, + "learning_rate": 1.6826924584202028e-05, + "loss": 0.2128, + "step": 21255 + }, + { + "epoch": 0.99, + "learning_rate": 1.6826140799147245e-05, + "loss": 0.0263, + "step": 21260 + }, + { + "epoch": 0.99, + "learning_rate": 1.6825357014092455e-05, + "loss": 0.1624, + "step": 21265 + }, + { + "epoch": 0.99, + "learning_rate": 1.682457322903767e-05, + "loss": 0.0927, + "step": 21270 + }, + { + "epoch": 0.99, + "learning_rate": 1.6823789443982883e-05, + "loss": 0.1146, + "step": 21275 + }, + { + "epoch": 0.99, + "learning_rate": 1.6823005658928097e-05, + "loss": 0.1037, + "step": 21280 + }, + { + "epoch": 0.99, + "learning_rate": 1.682222187387331e-05, + "loss": 0.1416, + "step": 21285 + }, + { + "epoch": 0.99, + "learning_rate": 1.6821438088818525e-05, + "loss": 0.2463, + "step": 21290 + }, + { + "epoch": 0.99, + "learning_rate": 1.6820654303763735e-05, + "loss": 0.3664, + "step": 21295 + }, + { + "epoch": 0.99, + "learning_rate": 1.6819870518708953e-05, + "loss": 0.5594, + "step": 21300 + }, + { + "epoch": 0.99, + "learning_rate": 1.6819086733654163e-05, + "loss": 0.1797, + "step": 21305 + }, + { + "epoch": 0.99, + "learning_rate": 1.6818302948599377e-05, + "loss": 0.1078, + "step": 21310 + }, + { + "epoch": 0.99, + "learning_rate": 1.681751916354459e-05, + "loss": 0.0942, + "step": 21315 + }, + { + "epoch": 0.99, + "learning_rate": 1.6816735378489805e-05, + "loss": 0.0715, + "step": 21320 + }, + { + "epoch": 1.0, + "learning_rate": 1.681595159343502e-05, + "loss": 0.0956, + "step": 21325 + }, + { + "epoch": 1.0, + "learning_rate": 1.681516780838023e-05, + "loss": 0.1574, + "step": 21330 + }, + { + "epoch": 1.0, + "learning_rate": 1.6814384023325447e-05, + "loss": 0.1546, + "step": 21335 + }, + { + "epoch": 1.0, + "learning_rate": 1.6813600238270657e-05, + "loss": 0.3196, + "step": 21340 + }, + { + "epoch": 1.0, + "learning_rate": 1.681281645321587e-05, + "loss": 0.3463, + "step": 21345 + }, + { + "epoch": 1.0, + "learning_rate": 1.6812032668161085e-05, + "loss": 0.515, + "step": 21350 + }, + { + "epoch": 1.0, + "learning_rate": 1.68112488831063e-05, + "loss": 0.1254, + "step": 21355 + }, + { + "epoch": 1.0, + "learning_rate": 1.6810465098051513e-05, + "loss": 0.1172, + "step": 21360 + }, + { + "epoch": 1.0, + "learning_rate": 1.6809681312996727e-05, + "loss": 0.081, + "step": 21365 + }, + { + "epoch": 1.0, + "learning_rate": 1.6808897527941937e-05, + "loss": 0.1016, + "step": 21370 + }, + { + "epoch": 1.0, + "learning_rate": 1.680811374288715e-05, + "loss": 0.133, + "step": 21375 + }, + { + "epoch": 1.0, + "learning_rate": 1.6807329957832365e-05, + "loss": 0.1478, + "step": 21380 + }, + { + "epoch": 1.0, + "learning_rate": 1.680654617277758e-05, + "loss": 0.2154, + "step": 21385 + }, + { + "epoch": 1.0, + "learning_rate": 1.6805762387722793e-05, + "loss": 0.2109, + "step": 21390 + }, + { + "epoch": 1.0, + "learning_rate": 1.6804978602668007e-05, + "loss": 0.5048, + "step": 21395 + }, + { + "epoch": 1.0, + "learning_rate": 1.680419481761322e-05, + "loss": 0.5279, + "step": 21400 + }, + { + "epoch": 1.0, + "learning_rate": 1.680341103255843e-05, + "loss": 0.1696, + "step": 21405 + }, + { + "epoch": 1.0, + "learning_rate": 1.6802627247503645e-05, + "loss": 0.0934, + "step": 21410 + }, + { + "epoch": 1.0, + "learning_rate": 1.680184346244886e-05, + "loss": 0.1172, + "step": 21415 + }, + { + "epoch": 1.0, + "learning_rate": 1.6801059677394073e-05, + "loss": 0.1691, + "step": 21420 + }, + { + "epoch": 1.0, + "learning_rate": 1.6800275892339287e-05, + "loss": 0.2184, + "step": 21425 + }, + { + "epoch": 1.0, + "learning_rate": 1.67994921072845e-05, + "loss": 0.3285, + "step": 21430 + }, + { + "epoch": 1.0, + "eval_cer": 0.019780983363940314, + "eval_loss": 0.6674277186393738, + "eval_runtime": 457.5932, + "eval_samples_per_second": 41.631, + "eval_steps_per_second": 5.205, + "eval_wer": 0.16735751295336787, + "step": 21431 + }, + { + "epoch": 1.0, + "learning_rate": 1.6798708322229715e-05, + "loss": 0.2708, + "step": 21435 + }, + { + "epoch": 1.0, + "learning_rate": 1.6797924537174925e-05, + "loss": 0.0632, + "step": 21440 + }, + { + "epoch": 1.0, + "learning_rate": 1.679714075212014e-05, + "loss": 0.0625, + "step": 21445 + }, + { + "epoch": 1.0, + "learning_rate": 1.6796356967065353e-05, + "loss": 0.1169, + "step": 21450 + }, + { + "epoch": 1.0, + "learning_rate": 1.6795573182010567e-05, + "loss": 0.1674, + "step": 21455 + }, + { + "epoch": 1.0, + "learning_rate": 1.679478939695578e-05, + "loss": 0.1875, + "step": 21460 + }, + { + "epoch": 1.0, + "learning_rate": 1.6794005611900995e-05, + "loss": 0.2132, + "step": 21465 + }, + { + "epoch": 1.0, + "learning_rate": 1.6793221826846205e-05, + "loss": 0.2001, + "step": 21470 + }, + { + "epoch": 1.0, + "learning_rate": 1.6792438041791422e-05, + "loss": 0.2851, + "step": 21475 + }, + { + "epoch": 1.0, + "learning_rate": 1.6791654256736633e-05, + "loss": 0.2083, + "step": 21480 + }, + { + "epoch": 1.0, + "learning_rate": 1.6790870471681847e-05, + "loss": 0.4056, + "step": 21485 + }, + { + "epoch": 1.0, + "learning_rate": 1.679008668662706e-05, + "loss": 0.0784, + "step": 21490 + }, + { + "epoch": 1.0, + "learning_rate": 1.6789302901572275e-05, + "loss": 0.1198, + "step": 21495 + }, + { + "epoch": 1.0, + "learning_rate": 1.678851911651749e-05, + "loss": 0.0637, + "step": 21500 + }, + { + "epoch": 1.0, + "learning_rate": 1.67877353314627e-05, + "loss": 0.0893, + "step": 21505 + }, + { + "epoch": 1.0, + "learning_rate": 1.6786951546407913e-05, + "loss": 0.1339, + "step": 21510 + }, + { + "epoch": 1.0, + "learning_rate": 1.6786167761353127e-05, + "loss": 0.2976, + "step": 21515 + }, + { + "epoch": 1.0, + "learning_rate": 1.678538397629834e-05, + "loss": 0.2174, + "step": 21520 + }, + { + "epoch": 1.0, + "learning_rate": 1.6784600191243555e-05, + "loss": 0.2785, + "step": 21525 + }, + { + "epoch": 1.0, + "learning_rate": 1.678381640618877e-05, + "loss": 0.5756, + "step": 21530 + }, + { + "epoch": 1.0, + "learning_rate": 1.6783032621133983e-05, + "loss": 0.3013, + "step": 21535 + }, + { + "epoch": 1.01, + "learning_rate": 1.6782248836079196e-05, + "loss": 0.0721, + "step": 21540 + }, + { + "epoch": 1.01, + "learning_rate": 1.6781465051024407e-05, + "loss": 0.0953, + "step": 21545 + }, + { + "epoch": 1.01, + "learning_rate": 1.6780681265969624e-05, + "loss": 0.0683, + "step": 21550 + }, + { + "epoch": 1.01, + "learning_rate": 1.6779897480914835e-05, + "loss": 0.155, + "step": 21555 + }, + { + "epoch": 1.01, + "learning_rate": 1.677911369586005e-05, + "loss": 0.2342, + "step": 21560 + }, + { + "epoch": 1.01, + "learning_rate": 1.6778329910805263e-05, + "loss": 0.1579, + "step": 21565 + }, + { + "epoch": 1.01, + "learning_rate": 1.6777546125750473e-05, + "loss": 0.2137, + "step": 21570 + }, + { + "epoch": 1.01, + "learning_rate": 1.677676234069569e-05, + "loss": 0.3298, + "step": 21575 + }, + { + "epoch": 1.01, + "learning_rate": 1.67759785556409e-05, + "loss": 0.3294, + "step": 21580 + }, + { + "epoch": 1.01, + "learning_rate": 1.6775194770586115e-05, + "loss": 0.347, + "step": 21585 + }, + { + "epoch": 1.01, + "learning_rate": 1.677441098553133e-05, + "loss": 0.0707, + "step": 21590 + }, + { + "epoch": 1.01, + "learning_rate": 1.6773627200476543e-05, + "loss": 0.0494, + "step": 21595 + }, + { + "epoch": 1.01, + "learning_rate": 1.6772843415421757e-05, + "loss": 0.0614, + "step": 21600 + }, + { + "epoch": 1.01, + "learning_rate": 1.677205963036697e-05, + "loss": 0.1719, + "step": 21605 + }, + { + "epoch": 1.01, + "learning_rate": 1.6771275845312184e-05, + "loss": 0.1739, + "step": 21610 + }, + { + "epoch": 1.01, + "learning_rate": 1.6770492060257398e-05, + "loss": 0.2605, + "step": 21615 + }, + { + "epoch": 1.01, + "learning_rate": 1.676970827520261e-05, + "loss": 0.2799, + "step": 21620 + }, + { + "epoch": 1.01, + "learning_rate": 1.6768924490147823e-05, + "loss": 0.3259, + "step": 21625 + }, + { + "epoch": 1.01, + "learning_rate": 1.6768140705093037e-05, + "loss": 0.3949, + "step": 21630 + }, + { + "epoch": 1.01, + "learning_rate": 1.676735692003825e-05, + "loss": 0.4475, + "step": 21635 + }, + { + "epoch": 1.01, + "learning_rate": 1.6766573134983464e-05, + "loss": 0.067, + "step": 21640 + }, + { + "epoch": 1.01, + "learning_rate": 1.6765789349928675e-05, + "loss": 0.0889, + "step": 21645 + }, + { + "epoch": 1.01, + "learning_rate": 1.6765005564873892e-05, + "loss": 0.0926, + "step": 21650 + }, + { + "epoch": 1.01, + "learning_rate": 1.6764221779819103e-05, + "loss": 0.169, + "step": 21655 + }, + { + "epoch": 1.01, + "learning_rate": 1.6763437994764317e-05, + "loss": 0.1749, + "step": 21660 + }, + { + "epoch": 1.01, + "learning_rate": 1.676265420970953e-05, + "loss": 0.2628, + "step": 21665 + }, + { + "epoch": 1.01, + "learning_rate": 1.6761870424654744e-05, + "loss": 0.2144, + "step": 21670 + }, + { + "epoch": 1.01, + "learning_rate": 1.676108663959996e-05, + "loss": 0.274, + "step": 21675 + }, + { + "epoch": 1.01, + "learning_rate": 1.6760302854545172e-05, + "loss": 0.3579, + "step": 21680 + }, + { + "epoch": 1.01, + "learning_rate": 1.6759519069490383e-05, + "loss": 0.3807, + "step": 21685 + }, + { + "epoch": 1.01, + "learning_rate": 1.67587352844356e-05, + "loss": 0.1214, + "step": 21690 + }, + { + "epoch": 1.01, + "learning_rate": 1.675795149938081e-05, + "loss": 0.1011, + "step": 21695 + }, + { + "epoch": 1.01, + "learning_rate": 1.6757167714326024e-05, + "loss": 0.0696, + "step": 21700 + }, + { + "epoch": 1.01, + "learning_rate": 1.675638392927124e-05, + "loss": 0.1463, + "step": 21705 + }, + { + "epoch": 1.01, + "learning_rate": 1.6755600144216452e-05, + "loss": 0.1416, + "step": 21710 + }, + { + "epoch": 1.01, + "learning_rate": 1.6754816359161666e-05, + "loss": 0.128, + "step": 21715 + }, + { + "epoch": 1.01, + "learning_rate": 1.6754032574106877e-05, + "loss": 0.1893, + "step": 21720 + }, + { + "epoch": 1.01, + "learning_rate": 1.675324878905209e-05, + "loss": 0.3064, + "step": 21725 + }, + { + "epoch": 1.01, + "learning_rate": 1.6752465003997305e-05, + "loss": 0.3098, + "step": 21730 + }, + { + "epoch": 1.01, + "learning_rate": 1.675168121894252e-05, + "loss": 0.3172, + "step": 21735 + }, + { + "epoch": 1.01, + "learning_rate": 1.6750897433887732e-05, + "loss": 0.0526, + "step": 21740 + }, + { + "epoch": 1.01, + "learning_rate": 1.6750113648832946e-05, + "loss": 0.0767, + "step": 21745 + }, + { + "epoch": 1.01, + "learning_rate": 1.674932986377816e-05, + "loss": 0.105, + "step": 21750 + }, + { + "epoch": 1.02, + "learning_rate": 1.6748546078723374e-05, + "loss": 0.128, + "step": 21755 + }, + { + "epoch": 1.02, + "learning_rate": 1.6747762293668585e-05, + "loss": 0.1071, + "step": 21760 + }, + { + "epoch": 1.02, + "learning_rate": 1.67469785086138e-05, + "loss": 0.1624, + "step": 21765 + }, + { + "epoch": 1.02, + "learning_rate": 1.6746194723559012e-05, + "loss": 0.3446, + "step": 21770 + }, + { + "epoch": 1.02, + "learning_rate": 1.6745410938504226e-05, + "loss": 0.3188, + "step": 21775 + }, + { + "epoch": 1.02, + "learning_rate": 1.674462715344944e-05, + "loss": 0.5742, + "step": 21780 + }, + { + "epoch": 1.02, + "learning_rate": 1.674384336839465e-05, + "loss": 0.2419, + "step": 21785 + }, + { + "epoch": 1.02, + "learning_rate": 1.6743059583339868e-05, + "loss": 0.0936, + "step": 21790 + }, + { + "epoch": 1.02, + "learning_rate": 1.674227579828508e-05, + "loss": 0.0601, + "step": 21795 + }, + { + "epoch": 1.02, + "learning_rate": 1.6741492013230292e-05, + "loss": 0.0906, + "step": 21800 + }, + { + "epoch": 1.02, + "learning_rate": 1.6740708228175506e-05, + "loss": 0.104, + "step": 21805 + }, + { + "epoch": 1.02, + "learning_rate": 1.673992444312072e-05, + "loss": 0.1736, + "step": 21810 + }, + { + "epoch": 1.02, + "learning_rate": 1.6739140658065934e-05, + "loss": 0.1642, + "step": 21815 + }, + { + "epoch": 1.02, + "learning_rate": 1.6738356873011148e-05, + "loss": 0.194, + "step": 21820 + }, + { + "epoch": 1.02, + "learning_rate": 1.673757308795636e-05, + "loss": 0.1698, + "step": 21825 + }, + { + "epoch": 1.02, + "learning_rate": 1.6736789302901572e-05, + "loss": 0.5151, + "step": 21830 + }, + { + "epoch": 1.02, + "learning_rate": 1.6736005517846786e-05, + "loss": 0.4388, + "step": 21835 + }, + { + "epoch": 1.02, + "learning_rate": 1.6735221732792e-05, + "loss": 0.0625, + "step": 21840 + }, + { + "epoch": 1.02, + "learning_rate": 1.6734437947737214e-05, + "loss": 0.0551, + "step": 21845 + }, + { + "epoch": 1.02, + "learning_rate": 1.6733654162682428e-05, + "loss": 0.086, + "step": 21850 + }, + { + "epoch": 1.02, + "learning_rate": 1.6732870377627642e-05, + "loss": 0.107, + "step": 21855 + }, + { + "epoch": 1.02, + "learning_rate": 1.6732086592572853e-05, + "loss": 0.1138, + "step": 21860 + }, + { + "epoch": 1.02, + "learning_rate": 1.673130280751807e-05, + "loss": 0.1885, + "step": 21865 + }, + { + "epoch": 1.02, + "learning_rate": 1.673051902246328e-05, + "loss": 0.2082, + "step": 21870 + }, + { + "epoch": 1.02, + "learning_rate": 1.6729735237408494e-05, + "loss": 0.2015, + "step": 21875 + }, + { + "epoch": 1.02, + "learning_rate": 1.6728951452353708e-05, + "loss": 0.4405, + "step": 21880 + }, + { + "epoch": 1.02, + "learning_rate": 1.6728167667298922e-05, + "loss": 0.2858, + "step": 21885 + }, + { + "epoch": 1.02, + "learning_rate": 1.6727383882244136e-05, + "loss": 0.0538, + "step": 21890 + }, + { + "epoch": 1.02, + "learning_rate": 1.6726600097189346e-05, + "loss": 0.0779, + "step": 21895 + }, + { + "epoch": 1.02, + "learning_rate": 1.672581631213456e-05, + "loss": 0.142, + "step": 21900 + }, + { + "epoch": 1.02, + "learning_rate": 1.6725032527079774e-05, + "loss": 0.1888, + "step": 21905 + }, + { + "epoch": 1.02, + "learning_rate": 1.6724248742024988e-05, + "loss": 0.1195, + "step": 21910 + }, + { + "epoch": 1.02, + "learning_rate": 1.6723464956970202e-05, + "loss": 0.2157, + "step": 21915 + }, + { + "epoch": 1.02, + "learning_rate": 1.6722681171915416e-05, + "loss": 0.2258, + "step": 21920 + }, + { + "epoch": 1.02, + "learning_rate": 1.672189738686063e-05, + "loss": 0.3311, + "step": 21925 + }, + { + "epoch": 1.02, + "learning_rate": 1.6721113601805844e-05, + "loss": 0.3652, + "step": 21930 + }, + { + "epoch": 1.02, + "learning_rate": 1.6720329816751054e-05, + "loss": 0.3514, + "step": 21935 + }, + { + "epoch": 1.02, + "learning_rate": 1.6719546031696268e-05, + "loss": 0.0737, + "step": 21940 + }, + { + "epoch": 1.02, + "learning_rate": 1.6718762246641482e-05, + "loss": 0.0702, + "step": 21945 + }, + { + "epoch": 1.02, + "learning_rate": 1.6717978461586696e-05, + "loss": 0.1228, + "step": 21950 + }, + { + "epoch": 1.02, + "learning_rate": 1.671719467653191e-05, + "loss": 0.1717, + "step": 21955 + }, + { + "epoch": 1.02, + "learning_rate": 1.671641089147712e-05, + "loss": 0.1327, + "step": 21960 + }, + { + "epoch": 1.02, + "learning_rate": 1.6715627106422338e-05, + "loss": 0.18, + "step": 21965 + }, + { + "epoch": 1.03, + "learning_rate": 1.6714843321367548e-05, + "loss": 0.1339, + "step": 21970 + }, + { + "epoch": 1.03, + "learning_rate": 1.6714059536312762e-05, + "loss": 0.4464, + "step": 21975 + }, + { + "epoch": 1.03, + "learning_rate": 1.6713275751257976e-05, + "loss": 0.4614, + "step": 21980 + }, + { + "epoch": 1.03, + "learning_rate": 1.671249196620319e-05, + "loss": 0.4731, + "step": 21985 + }, + { + "epoch": 1.03, + "learning_rate": 1.6711708181148404e-05, + "loss": 0.0743, + "step": 21990 + }, + { + "epoch": 1.03, + "learning_rate": 1.6710924396093618e-05, + "loss": 0.0772, + "step": 21995 + }, + { + "epoch": 1.03, + "learning_rate": 1.671014061103883e-05, + "loss": 0.1295, + "step": 22000 + }, + { + "epoch": 1.03, + "learning_rate": 1.6709356825984046e-05, + "loss": 0.1665, + "step": 22005 + }, + { + "epoch": 1.03, + "learning_rate": 1.6708573040929256e-05, + "loss": 0.1362, + "step": 22010 + }, + { + "epoch": 1.03, + "learning_rate": 1.670778925587447e-05, + "loss": 0.1636, + "step": 22015 + }, + { + "epoch": 1.03, + "learning_rate": 1.6707005470819684e-05, + "loss": 0.2899, + "step": 22020 + }, + { + "epoch": 1.03, + "learning_rate": 1.6706221685764898e-05, + "loss": 0.2897, + "step": 22025 + }, + { + "epoch": 1.03, + "learning_rate": 1.6705437900710112e-05, + "loss": 0.3886, + "step": 22030 + }, + { + "epoch": 1.03, + "learning_rate": 1.6704654115655322e-05, + "loss": 0.2882, + "step": 22035 + }, + { + "epoch": 1.03, + "learning_rate": 1.6703870330600536e-05, + "loss": 0.0652, + "step": 22040 + }, + { + "epoch": 1.03, + "learning_rate": 1.670308654554575e-05, + "loss": 0.0749, + "step": 22045 + }, + { + "epoch": 1.03, + "learning_rate": 1.6702302760490964e-05, + "loss": 0.0939, + "step": 22050 + }, + { + "epoch": 1.03, + "learning_rate": 1.6701518975436178e-05, + "loss": 0.1791, + "step": 22055 + }, + { + "epoch": 1.03, + "learning_rate": 1.6700735190381392e-05, + "loss": 0.1139, + "step": 22060 + }, + { + "epoch": 1.03, + "learning_rate": 1.6699951405326606e-05, + "loss": 0.2075, + "step": 22065 + }, + { + "epoch": 1.03, + "learning_rate": 1.669916762027182e-05, + "loss": 0.2175, + "step": 22070 + }, + { + "epoch": 1.03, + "learning_rate": 1.669838383521703e-05, + "loss": 0.2589, + "step": 22075 + }, + { + "epoch": 1.03, + "learning_rate": 1.6697600050162247e-05, + "loss": 0.42, + "step": 22080 + }, + { + "epoch": 1.03, + "learning_rate": 1.6696816265107458e-05, + "loss": 0.3279, + "step": 22085 + }, + { + "epoch": 1.03, + "learning_rate": 1.6696032480052672e-05, + "loss": 0.0295, + "step": 22090 + }, + { + "epoch": 1.03, + "learning_rate": 1.6695248694997886e-05, + "loss": 0.0318, + "step": 22095 + }, + { + "epoch": 1.03, + "learning_rate": 1.6694464909943096e-05, + "loss": 0.1887, + "step": 22100 + }, + { + "epoch": 1.03, + "learning_rate": 1.6693681124888314e-05, + "loss": 0.1017, + "step": 22105 + }, + { + "epoch": 1.03, + "learning_rate": 1.6692897339833524e-05, + "loss": 0.145, + "step": 22110 + }, + { + "epoch": 1.03, + "learning_rate": 1.6692113554778738e-05, + "loss": 0.195, + "step": 22115 + }, + { + "epoch": 1.03, + "learning_rate": 1.6691329769723952e-05, + "loss": 0.2584, + "step": 22120 + }, + { + "epoch": 1.03, + "learning_rate": 1.6690545984669166e-05, + "loss": 0.2965, + "step": 22125 + }, + { + "epoch": 1.03, + "learning_rate": 1.668976219961438e-05, + "loss": 0.3912, + "step": 22130 + }, + { + "epoch": 1.03, + "learning_rate": 1.6688978414559594e-05, + "loss": 0.3964, + "step": 22135 + }, + { + "epoch": 1.03, + "learning_rate": 1.6688194629504804e-05, + "loss": 0.0447, + "step": 22140 + }, + { + "epoch": 1.03, + "learning_rate": 1.668741084445002e-05, + "loss": 0.1708, + "step": 22145 + }, + { + "epoch": 1.03, + "learning_rate": 1.6686627059395232e-05, + "loss": 0.1072, + "step": 22150 + }, + { + "epoch": 1.03, + "learning_rate": 1.6685843274340446e-05, + "loss": 0.1223, + "step": 22155 + }, + { + "epoch": 1.03, + "learning_rate": 1.668505948928566e-05, + "loss": 0.1006, + "step": 22160 + }, + { + "epoch": 1.03, + "learning_rate": 1.6684275704230874e-05, + "loss": 0.1096, + "step": 22165 + }, + { + "epoch": 1.03, + "learning_rate": 1.6683491919176088e-05, + "loss": 0.2708, + "step": 22170 + }, + { + "epoch": 1.03, + "learning_rate": 1.6682708134121298e-05, + "loss": 0.3093, + "step": 22175 + }, + { + "epoch": 1.03, + "learning_rate": 1.6681924349066515e-05, + "loss": 0.3701, + "step": 22180 + }, + { + "epoch": 1.04, + "learning_rate": 1.6681140564011726e-05, + "loss": 0.251, + "step": 22185 + }, + { + "epoch": 1.04, + "learning_rate": 1.668035677895694e-05, + "loss": 0.0674, + "step": 22190 + }, + { + "epoch": 1.04, + "learning_rate": 1.6679572993902154e-05, + "loss": 0.1336, + "step": 22195 + }, + { + "epoch": 1.04, + "learning_rate": 1.6678789208847368e-05, + "loss": 0.136, + "step": 22200 + }, + { + "epoch": 1.04, + "learning_rate": 1.667800542379258e-05, + "loss": 0.0998, + "step": 22205 + }, + { + "epoch": 1.04, + "learning_rate": 1.6677221638737795e-05, + "loss": 0.13, + "step": 22210 + }, + { + "epoch": 1.04, + "learning_rate": 1.6676437853683006e-05, + "loss": 0.1497, + "step": 22215 + }, + { + "epoch": 1.04, + "learning_rate": 1.667565406862822e-05, + "loss": 0.1846, + "step": 22220 + }, + { + "epoch": 1.04, + "learning_rate": 1.6674870283573434e-05, + "loss": 0.2499, + "step": 22225 + }, + { + "epoch": 1.04, + "learning_rate": 1.6674086498518648e-05, + "loss": 0.4191, + "step": 22230 + }, + { + "epoch": 1.04, + "learning_rate": 1.667330271346386e-05, + "loss": 0.3157, + "step": 22235 + }, + { + "epoch": 1.04, + "learning_rate": 1.6672518928409075e-05, + "loss": 0.0555, + "step": 22240 + }, + { + "epoch": 1.04, + "learning_rate": 1.667173514335429e-05, + "loss": 0.1057, + "step": 22245 + }, + { + "epoch": 1.04, + "learning_rate": 1.66709513582995e-05, + "loss": 0.0907, + "step": 22250 + }, + { + "epoch": 1.04, + "learning_rate": 1.6670167573244714e-05, + "loss": 0.1564, + "step": 22255 + }, + { + "epoch": 1.04, + "learning_rate": 1.6669383788189928e-05, + "loss": 0.1607, + "step": 22260 + }, + { + "epoch": 1.04, + "learning_rate": 1.666860000313514e-05, + "loss": 0.1959, + "step": 22265 + }, + { + "epoch": 1.04, + "learning_rate": 1.6667816218080356e-05, + "loss": 0.2283, + "step": 22270 + }, + { + "epoch": 1.04, + "learning_rate": 1.666703243302557e-05, + "loss": 0.3232, + "step": 22275 + }, + { + "epoch": 1.04, + "learning_rate": 1.6666248647970783e-05, + "loss": 0.344, + "step": 22280 + }, + { + "epoch": 1.04, + "learning_rate": 1.6665464862915994e-05, + "loss": 0.2879, + "step": 22285 + }, + { + "epoch": 1.04, + "learning_rate": 1.6664681077861208e-05, + "loss": 0.077, + "step": 22290 + }, + { + "epoch": 1.04, + "learning_rate": 1.666389729280642e-05, + "loss": 0.0681, + "step": 22295 + }, + { + "epoch": 1.04, + "learning_rate": 1.6663113507751636e-05, + "loss": 0.0788, + "step": 22300 + }, + { + "epoch": 1.04, + "learning_rate": 1.666232972269685e-05, + "loss": 0.1024, + "step": 22305 + }, + { + "epoch": 1.04, + "learning_rate": 1.6661545937642063e-05, + "loss": 0.1455, + "step": 22310 + }, + { + "epoch": 1.04, + "learning_rate": 1.6660762152587274e-05, + "loss": 0.2212, + "step": 22315 + }, + { + "epoch": 1.04, + "learning_rate": 1.665997836753249e-05, + "loss": 0.1622, + "step": 22320 + }, + { + "epoch": 1.04, + "learning_rate": 1.6659194582477702e-05, + "loss": 0.257, + "step": 22325 + }, + { + "epoch": 1.04, + "learning_rate": 1.6658410797422916e-05, + "loss": 0.386, + "step": 22330 + }, + { + "epoch": 1.04, + "learning_rate": 1.665762701236813e-05, + "loss": 0.3866, + "step": 22335 + }, + { + "epoch": 1.04, + "learning_rate": 1.6656843227313343e-05, + "loss": 0.068, + "step": 22340 + }, + { + "epoch": 1.04, + "learning_rate": 1.6656059442258557e-05, + "loss": 0.1259, + "step": 22345 + }, + { + "epoch": 1.04, + "learning_rate": 1.6655275657203768e-05, + "loss": 0.1199, + "step": 22350 + }, + { + "epoch": 1.04, + "learning_rate": 1.6654491872148982e-05, + "loss": 0.1675, + "step": 22355 + }, + { + "epoch": 1.04, + "learning_rate": 1.6653708087094196e-05, + "loss": 0.1525, + "step": 22360 + }, + { + "epoch": 1.04, + "learning_rate": 1.665292430203941e-05, + "loss": 0.1875, + "step": 22365 + }, + { + "epoch": 1.04, + "learning_rate": 1.6652140516984623e-05, + "loss": 0.2257, + "step": 22370 + }, + { + "epoch": 1.04, + "learning_rate": 1.6651356731929837e-05, + "loss": 0.195, + "step": 22375 + }, + { + "epoch": 1.04, + "learning_rate": 1.665057294687505e-05, + "loss": 0.3703, + "step": 22380 + }, + { + "epoch": 1.04, + "learning_rate": 1.6649789161820265e-05, + "loss": 0.3111, + "step": 22385 + }, + { + "epoch": 1.04, + "learning_rate": 1.6649005376765476e-05, + "loss": 0.0501, + "step": 22390 + }, + { + "epoch": 1.04, + "learning_rate": 1.6648221591710693e-05, + "loss": 0.1009, + "step": 22395 + }, + { + "epoch": 1.05, + "learning_rate": 1.6647437806655904e-05, + "loss": 0.1188, + "step": 22400 + }, + { + "epoch": 1.05, + "learning_rate": 1.6646654021601117e-05, + "loss": 0.1223, + "step": 22405 + }, + { + "epoch": 1.05, + "learning_rate": 1.664587023654633e-05, + "loss": 0.2532, + "step": 22410 + }, + { + "epoch": 1.05, + "learning_rate": 1.6645086451491542e-05, + "loss": 0.2103, + "step": 22415 + }, + { + "epoch": 1.05, + "learning_rate": 1.664430266643676e-05, + "loss": 0.2827, + "step": 22420 + }, + { + "epoch": 1.05, + "learning_rate": 1.664351888138197e-05, + "loss": 0.3525, + "step": 22425 + }, + { + "epoch": 1.05, + "learning_rate": 1.6642735096327184e-05, + "loss": 0.4255, + "step": 22430 + }, + { + "epoch": 1.05, + "learning_rate": 1.6641951311272397e-05, + "loss": 0.3211, + "step": 22435 + }, + { + "epoch": 1.05, + "learning_rate": 1.664116752621761e-05, + "loss": 0.0945, + "step": 22440 + }, + { + "epoch": 1.05, + "learning_rate": 1.6640383741162825e-05, + "loss": 0.0941, + "step": 22445 + }, + { + "epoch": 1.05, + "learning_rate": 1.663959995610804e-05, + "loss": 0.1018, + "step": 22450 + }, + { + "epoch": 1.05, + "learning_rate": 1.663881617105325e-05, + "loss": 0.1377, + "step": 22455 + }, + { + "epoch": 1.05, + "learning_rate": 1.6638032385998467e-05, + "loss": 0.1353, + "step": 22460 + }, + { + "epoch": 1.05, + "learning_rate": 1.6637248600943678e-05, + "loss": 0.1506, + "step": 22465 + }, + { + "epoch": 1.05, + "learning_rate": 1.663646481588889e-05, + "loss": 0.2194, + "step": 22470 + }, + { + "epoch": 1.05, + "learning_rate": 1.6635681030834105e-05, + "loss": 0.2673, + "step": 22475 + }, + { + "epoch": 1.05, + "learning_rate": 1.663489724577932e-05, + "loss": 0.4444, + "step": 22480 + }, + { + "epoch": 1.05, + "learning_rate": 1.6634113460724533e-05, + "loss": 0.4203, + "step": 22485 + }, + { + "epoch": 1.05, + "learning_rate": 1.6633329675669744e-05, + "loss": 0.0998, + "step": 22490 + }, + { + "epoch": 1.05, + "learning_rate": 1.663254589061496e-05, + "loss": 0.0645, + "step": 22495 + }, + { + "epoch": 1.05, + "learning_rate": 1.663176210556017e-05, + "loss": 0.0815, + "step": 22500 + }, + { + "epoch": 1.05, + "learning_rate": 1.6630978320505385e-05, + "loss": 0.1239, + "step": 22505 + }, + { + "epoch": 1.05, + "learning_rate": 1.66301945354506e-05, + "loss": 0.1158, + "step": 22510 + }, + { + "epoch": 1.05, + "learning_rate": 1.6629410750395813e-05, + "loss": 0.1965, + "step": 22515 + }, + { + "epoch": 1.05, + "learning_rate": 1.6628626965341027e-05, + "loss": 0.2295, + "step": 22520 + }, + { + "epoch": 1.05, + "learning_rate": 1.662784318028624e-05, + "loss": 0.3215, + "step": 22525 + }, + { + "epoch": 1.05, + "learning_rate": 1.662705939523145e-05, + "loss": 0.3832, + "step": 22530 + }, + { + "epoch": 1.05, + "learning_rate": 1.662627561017667e-05, + "loss": 0.3457, + "step": 22535 + }, + { + "epoch": 1.05, + "learning_rate": 1.662549182512188e-05, + "loss": 0.1041, + "step": 22540 + }, + { + "epoch": 1.05, + "learning_rate": 1.6624708040067093e-05, + "loss": 0.0946, + "step": 22545 + }, + { + "epoch": 1.05, + "learning_rate": 1.6623924255012307e-05, + "loss": 0.0797, + "step": 22550 + }, + { + "epoch": 1.05, + "learning_rate": 1.662314046995752e-05, + "loss": 0.1227, + "step": 22555 + }, + { + "epoch": 1.05, + "learning_rate": 1.6622356684902735e-05, + "loss": 0.1388, + "step": 22560 + }, + { + "epoch": 1.05, + "learning_rate": 1.6621572899847945e-05, + "loss": 0.1356, + "step": 22565 + }, + { + "epoch": 1.05, + "learning_rate": 1.662078911479316e-05, + "loss": 0.1936, + "step": 22570 + }, + { + "epoch": 1.05, + "learning_rate": 1.6620005329738373e-05, + "loss": 0.3707, + "step": 22575 + }, + { + "epoch": 1.05, + "learning_rate": 1.6619221544683587e-05, + "loss": 0.4417, + "step": 22580 + }, + { + "epoch": 1.05, + "learning_rate": 1.66184377596288e-05, + "loss": 0.4056, + "step": 22585 + }, + { + "epoch": 1.05, + "learning_rate": 1.6617653974574015e-05, + "loss": 0.1009, + "step": 22590 + }, + { + "epoch": 1.05, + "learning_rate": 1.661687018951923e-05, + "loss": 0.1221, + "step": 22595 + }, + { + "epoch": 1.05, + "learning_rate": 1.6616086404464443e-05, + "loss": 0.1144, + "step": 22600 + }, + { + "epoch": 1.05, + "learning_rate": 1.6615302619409653e-05, + "loss": 0.1499, + "step": 22605 + }, + { + "epoch": 1.06, + "learning_rate": 1.6614518834354867e-05, + "loss": 0.1002, + "step": 22610 + }, + { + "epoch": 1.06, + "learning_rate": 1.661373504930008e-05, + "loss": 0.1595, + "step": 22615 + }, + { + "epoch": 1.06, + "learning_rate": 1.6612951264245295e-05, + "loss": 0.2121, + "step": 22620 + }, + { + "epoch": 1.06, + "learning_rate": 1.661216747919051e-05, + "loss": 0.2338, + "step": 22625 + }, + { + "epoch": 1.06, + "learning_rate": 1.661138369413572e-05, + "loss": 0.375, + "step": 22630 + }, + { + "epoch": 1.06, + "learning_rate": 1.6610599909080937e-05, + "loss": 0.2275, + "step": 22635 + }, + { + "epoch": 1.06, + "learning_rate": 1.6609816124026147e-05, + "loss": 0.0468, + "step": 22640 + }, + { + "epoch": 1.06, + "learning_rate": 1.660903233897136e-05, + "loss": 0.0573, + "step": 22645 + }, + { + "epoch": 1.06, + "learning_rate": 1.6608248553916575e-05, + "loss": 0.0989, + "step": 22650 + }, + { + "epoch": 1.06, + "learning_rate": 1.660746476886179e-05, + "loss": 0.1472, + "step": 22655 + }, + { + "epoch": 1.06, + "learning_rate": 1.6606680983807003e-05, + "loss": 0.1244, + "step": 22660 + }, + { + "epoch": 1.06, + "learning_rate": 1.6605897198752217e-05, + "loss": 0.2461, + "step": 22665 + }, + { + "epoch": 1.06, + "learning_rate": 1.6605113413697427e-05, + "loss": 0.1669, + "step": 22670 + }, + { + "epoch": 1.06, + "learning_rate": 1.660432962864264e-05, + "loss": 0.2483, + "step": 22675 + }, + { + "epoch": 1.06, + "learning_rate": 1.6603545843587855e-05, + "loss": 0.3463, + "step": 22680 + }, + { + "epoch": 1.06, + "learning_rate": 1.660276205853307e-05, + "loss": 0.3456, + "step": 22685 + }, + { + "epoch": 1.06, + "learning_rate": 1.6601978273478283e-05, + "loss": 0.0599, + "step": 22690 + }, + { + "epoch": 1.06, + "learning_rate": 1.6601194488423497e-05, + "loss": 0.0639, + "step": 22695 + }, + { + "epoch": 1.06, + "learning_rate": 1.660041070336871e-05, + "loss": 0.1262, + "step": 22700 + }, + { + "epoch": 1.06, + "learning_rate": 1.659962691831392e-05, + "loss": 0.1424, + "step": 22705 + }, + { + "epoch": 1.06, + "learning_rate": 1.659884313325914e-05, + "loss": 0.1273, + "step": 22710 + }, + { + "epoch": 1.06, + "learning_rate": 1.659805934820435e-05, + "loss": 0.2208, + "step": 22715 + }, + { + "epoch": 1.06, + "learning_rate": 1.6597275563149563e-05, + "loss": 0.1814, + "step": 22720 + }, + { + "epoch": 1.06, + "learning_rate": 1.6596491778094777e-05, + "loss": 0.2325, + "step": 22725 + }, + { + "epoch": 1.06, + "learning_rate": 1.659570799303999e-05, + "loss": 0.4139, + "step": 22730 + }, + { + "epoch": 1.06, + "learning_rate": 1.6594924207985205e-05, + "loss": 0.2675, + "step": 22735 + }, + { + "epoch": 1.06, + "learning_rate": 1.6594140422930415e-05, + "loss": 0.0246, + "step": 22740 + }, + { + "epoch": 1.06, + "learning_rate": 1.659335663787563e-05, + "loss": 0.0829, + "step": 22745 + }, + { + "epoch": 1.06, + "learning_rate": 1.6592572852820843e-05, + "loss": 0.1863, + "step": 22750 + }, + { + "epoch": 1.06, + "learning_rate": 1.6591789067766057e-05, + "loss": 0.1843, + "step": 22755 + }, + { + "epoch": 1.06, + "learning_rate": 1.659100528271127e-05, + "loss": 0.1143, + "step": 22760 + }, + { + "epoch": 1.06, + "learning_rate": 1.6590221497656485e-05, + "loss": 0.1305, + "step": 22765 + }, + { + "epoch": 1.06, + "learning_rate": 1.65894377126017e-05, + "loss": 0.229, + "step": 22770 + }, + { + "epoch": 1.06, + "learning_rate": 1.6588653927546913e-05, + "loss": 0.2498, + "step": 22775 + }, + { + "epoch": 1.06, + "learning_rate": 1.6587870142492123e-05, + "loss": 0.3766, + "step": 22780 + }, + { + "epoch": 1.06, + "learning_rate": 1.6587086357437337e-05, + "loss": 0.2961, + "step": 22785 + }, + { + "epoch": 1.06, + "learning_rate": 1.658630257238255e-05, + "loss": 0.069, + "step": 22790 + }, + { + "epoch": 1.06, + "learning_rate": 1.6585518787327765e-05, + "loss": 0.0735, + "step": 22795 + }, + { + "epoch": 1.06, + "learning_rate": 1.658473500227298e-05, + "loss": 0.1352, + "step": 22800 + }, + { + "epoch": 1.06, + "learning_rate": 1.658395121721819e-05, + "loss": 0.1563, + "step": 22805 + }, + { + "epoch": 1.06, + "learning_rate": 1.6583167432163407e-05, + "loss": 0.155, + "step": 22810 + }, + { + "epoch": 1.06, + "learning_rate": 1.6582383647108617e-05, + "loss": 0.2341, + "step": 22815 + }, + { + "epoch": 1.06, + "learning_rate": 1.658159986205383e-05, + "loss": 0.22, + "step": 22820 + }, + { + "epoch": 1.07, + "learning_rate": 1.6580816076999045e-05, + "loss": 0.2533, + "step": 22825 + }, + { + "epoch": 1.07, + "learning_rate": 1.658003229194426e-05, + "loss": 0.5077, + "step": 22830 + }, + { + "epoch": 1.07, + "learning_rate": 1.6579248506889473e-05, + "loss": 0.49, + "step": 22835 + }, + { + "epoch": 1.07, + "learning_rate": 1.6578464721834687e-05, + "loss": 0.166, + "step": 22840 + }, + { + "epoch": 1.07, + "learning_rate": 1.6577680936779897e-05, + "loss": 0.0973, + "step": 22845 + }, + { + "epoch": 1.07, + "learning_rate": 1.6576897151725114e-05, + "loss": 0.0986, + "step": 22850 + }, + { + "epoch": 1.07, + "learning_rate": 1.6576113366670325e-05, + "loss": 0.1956, + "step": 22855 + }, + { + "epoch": 1.07, + "learning_rate": 1.657532958161554e-05, + "loss": 0.2051, + "step": 22860 + }, + { + "epoch": 1.07, + "learning_rate": 1.6574545796560753e-05, + "loss": 0.1668, + "step": 22865 + }, + { + "epoch": 1.07, + "learning_rate": 1.6573762011505967e-05, + "loss": 0.2645, + "step": 22870 + }, + { + "epoch": 1.07, + "learning_rate": 1.657297822645118e-05, + "loss": 0.2412, + "step": 22875 + }, + { + "epoch": 1.07, + "learning_rate": 1.657219444139639e-05, + "loss": 0.36, + "step": 22880 + }, + { + "epoch": 1.07, + "learning_rate": 1.6571410656341605e-05, + "loss": 0.3128, + "step": 22885 + }, + { + "epoch": 1.07, + "learning_rate": 1.657062687128682e-05, + "loss": 0.0542, + "step": 22890 + }, + { + "epoch": 1.07, + "learning_rate": 1.6569843086232033e-05, + "loss": 0.0474, + "step": 22895 + }, + { + "epoch": 1.07, + "learning_rate": 1.6569059301177247e-05, + "loss": 0.1289, + "step": 22900 + }, + { + "epoch": 1.07, + "learning_rate": 1.656827551612246e-05, + "loss": 0.0668, + "step": 22905 + }, + { + "epoch": 1.07, + "learning_rate": 1.6567491731067674e-05, + "loss": 0.2174, + "step": 22910 + }, + { + "epoch": 1.07, + "learning_rate": 1.656670794601289e-05, + "loss": 0.1433, + "step": 22915 + }, + { + "epoch": 1.07, + "learning_rate": 1.65659241609581e-05, + "loss": 0.1519, + "step": 22920 + }, + { + "epoch": 1.07, + "learning_rate": 1.6565140375903316e-05, + "loss": 0.2903, + "step": 22925 + }, + { + "epoch": 1.07, + "learning_rate": 1.6564356590848527e-05, + "loss": 0.4346, + "step": 22930 + }, + { + "epoch": 1.07, + "learning_rate": 1.656357280579374e-05, + "loss": 0.3024, + "step": 22935 + }, + { + "epoch": 1.07, + "learning_rate": 1.6562789020738955e-05, + "loss": 0.031, + "step": 22940 + }, + { + "epoch": 1.07, + "learning_rate": 1.6562005235684165e-05, + "loss": 0.1083, + "step": 22945 + }, + { + "epoch": 1.07, + "learning_rate": 1.6561221450629382e-05, + "loss": 0.0789, + "step": 22950 + }, + { + "epoch": 1.07, + "learning_rate": 1.6560437665574593e-05, + "loss": 0.193, + "step": 22955 + }, + { + "epoch": 1.07, + "learning_rate": 1.6559653880519807e-05, + "loss": 0.2532, + "step": 22960 + }, + { + "epoch": 1.07, + "learning_rate": 1.655887009546502e-05, + "loss": 0.2134, + "step": 22965 + }, + { + "epoch": 1.07, + "learning_rate": 1.6558086310410235e-05, + "loss": 0.2538, + "step": 22970 + }, + { + "epoch": 1.07, + "learning_rate": 1.655730252535545e-05, + "loss": 0.2492, + "step": 22975 + }, + { + "epoch": 1.07, + "learning_rate": 1.6556518740300662e-05, + "loss": 0.4392, + "step": 22980 + }, + { + "epoch": 1.07, + "learning_rate": 1.6555734955245873e-05, + "loss": 0.2968, + "step": 22985 + }, + { + "epoch": 1.07, + "learning_rate": 1.655495117019109e-05, + "loss": 0.0663, + "step": 22990 + }, + { + "epoch": 1.07, + "learning_rate": 1.65541673851363e-05, + "loss": 0.1486, + "step": 22995 + }, + { + "epoch": 1.07, + "learning_rate": 1.6553383600081515e-05, + "loss": 0.0821, + "step": 23000 + }, + { + "epoch": 1.07, + "learning_rate": 1.655259981502673e-05, + "loss": 0.1134, + "step": 23005 + }, + { + "epoch": 1.07, + "learning_rate": 1.6551816029971942e-05, + "loss": 0.2011, + "step": 23010 + }, + { + "epoch": 1.07, + "learning_rate": 1.6551032244917156e-05, + "loss": 0.2416, + "step": 23015 + }, + { + "epoch": 1.07, + "learning_rate": 1.6550248459862367e-05, + "loss": 0.2838, + "step": 23020 + }, + { + "epoch": 1.07, + "learning_rate": 1.6549464674807584e-05, + "loss": 0.285, + "step": 23025 + }, + { + "epoch": 1.07, + "learning_rate": 1.6548680889752795e-05, + "loss": 0.3597, + "step": 23030 + }, + { + "epoch": 1.07, + "learning_rate": 1.654789710469801e-05, + "loss": 0.4682, + "step": 23035 + }, + { + "epoch": 1.08, + "learning_rate": 1.6547113319643222e-05, + "loss": 0.0424, + "step": 23040 + }, + { + "epoch": 1.08, + "learning_rate": 1.6546329534588436e-05, + "loss": 0.0739, + "step": 23045 + }, + { + "epoch": 1.08, + "learning_rate": 1.654554574953365e-05, + "loss": 0.0871, + "step": 23050 + }, + { + "epoch": 1.08, + "learning_rate": 1.6544761964478864e-05, + "loss": 0.0759, + "step": 23055 + }, + { + "epoch": 1.08, + "learning_rate": 1.6543978179424075e-05, + "loss": 0.1438, + "step": 23060 + }, + { + "epoch": 1.08, + "learning_rate": 1.654319439436929e-05, + "loss": 0.1683, + "step": 23065 + }, + { + "epoch": 1.08, + "learning_rate": 1.6542410609314503e-05, + "loss": 0.2168, + "step": 23070 + }, + { + "epoch": 1.08, + "learning_rate": 1.6541626824259716e-05, + "loss": 0.2413, + "step": 23075 + }, + { + "epoch": 1.08, + "learning_rate": 1.654084303920493e-05, + "loss": 0.3585, + "step": 23080 + }, + { + "epoch": 1.08, + "learning_rate": 1.6540059254150144e-05, + "loss": 0.2754, + "step": 23085 + }, + { + "epoch": 1.08, + "learning_rate": 1.6539275469095358e-05, + "loss": 0.0734, + "step": 23090 + }, + { + "epoch": 1.08, + "learning_rate": 1.653849168404057e-05, + "loss": 0.1821, + "step": 23095 + }, + { + "epoch": 1.08, + "learning_rate": 1.6537707898985783e-05, + "loss": 0.136, + "step": 23100 + }, + { + "epoch": 1.08, + "learning_rate": 1.6536924113930996e-05, + "loss": 0.1626, + "step": 23105 + }, + { + "epoch": 1.08, + "learning_rate": 1.653614032887621e-05, + "loss": 0.1497, + "step": 23110 + }, + { + "epoch": 1.08, + "learning_rate": 1.6535356543821424e-05, + "loss": 0.1505, + "step": 23115 + }, + { + "epoch": 1.08, + "learning_rate": 1.6534572758766638e-05, + "loss": 0.2225, + "step": 23120 + }, + { + "epoch": 1.08, + "learning_rate": 1.6533788973711852e-05, + "loss": 0.2445, + "step": 23125 + }, + { + "epoch": 1.08, + "learning_rate": 1.6533005188657063e-05, + "loss": 0.4143, + "step": 23130 + }, + { + "epoch": 1.08, + "learning_rate": 1.6532221403602277e-05, + "loss": 0.322, + "step": 23135 + }, + { + "epoch": 1.08, + "learning_rate": 1.653143761854749e-05, + "loss": 0.0707, + "step": 23140 + }, + { + "epoch": 1.08, + "learning_rate": 1.6530653833492704e-05, + "loss": 0.0468, + "step": 23145 + }, + { + "epoch": 1.08, + "learning_rate": 1.6529870048437918e-05, + "loss": 0.1199, + "step": 23150 + }, + { + "epoch": 1.08, + "learning_rate": 1.6529086263383132e-05, + "loss": 0.1203, + "step": 23155 + }, + { + "epoch": 1.08, + "learning_rate": 1.6528302478328343e-05, + "loss": 0.1562, + "step": 23160 + }, + { + "epoch": 1.08, + "learning_rate": 1.652751869327356e-05, + "loss": 0.1764, + "step": 23165 + }, + { + "epoch": 1.08, + "learning_rate": 1.652673490821877e-05, + "loss": 0.1951, + "step": 23170 + }, + { + "epoch": 1.08, + "learning_rate": 1.6525951123163984e-05, + "loss": 0.3135, + "step": 23175 + }, + { + "epoch": 1.08, + "learning_rate": 1.6525167338109198e-05, + "loss": 0.2562, + "step": 23180 + }, + { + "epoch": 1.08, + "learning_rate": 1.6524383553054412e-05, + "loss": 0.2707, + "step": 23185 + }, + { + "epoch": 1.08, + "learning_rate": 1.6523599767999626e-05, + "loss": 0.0834, + "step": 23190 + }, + { + "epoch": 1.08, + "learning_rate": 1.6522815982944837e-05, + "loss": 0.1467, + "step": 23195 + }, + { + "epoch": 1.08, + "learning_rate": 1.652203219789005e-05, + "loss": 0.093, + "step": 23200 + }, + { + "epoch": 1.08, + "learning_rate": 1.6521248412835264e-05, + "loss": 0.0902, + "step": 23205 + }, + { + "epoch": 1.08, + "learning_rate": 1.652046462778048e-05, + "loss": 0.2016, + "step": 23210 + }, + { + "epoch": 1.08, + "learning_rate": 1.6519680842725692e-05, + "loss": 0.2145, + "step": 23215 + }, + { + "epoch": 1.08, + "learning_rate": 1.6518897057670906e-05, + "loss": 0.2803, + "step": 23220 + }, + { + "epoch": 1.08, + "learning_rate": 1.651811327261612e-05, + "loss": 0.2825, + "step": 23225 + }, + { + "epoch": 1.08, + "learning_rate": 1.6517329487561334e-05, + "loss": 0.3829, + "step": 23230 + }, + { + "epoch": 1.08, + "learning_rate": 1.6516545702506544e-05, + "loss": 0.2447, + "step": 23235 + }, + { + "epoch": 1.08, + "learning_rate": 1.6515761917451762e-05, + "loss": 0.0754, + "step": 23240 + }, + { + "epoch": 1.08, + "learning_rate": 1.6514978132396972e-05, + "loss": 0.0598, + "step": 23245 + }, + { + "epoch": 1.08, + "learning_rate": 1.6514194347342186e-05, + "loss": 0.1389, + "step": 23250 + }, + { + "epoch": 1.09, + "learning_rate": 1.65134105622874e-05, + "loss": 0.156, + "step": 23255 + }, + { + "epoch": 1.09, + "learning_rate": 1.651262677723261e-05, + "loss": 0.1372, + "step": 23260 + }, + { + "epoch": 1.09, + "learning_rate": 1.6511842992177828e-05, + "loss": 0.2297, + "step": 23265 + }, + { + "epoch": 1.09, + "learning_rate": 1.651105920712304e-05, + "loss": 0.2115, + "step": 23270 + }, + { + "epoch": 1.09, + "learning_rate": 1.6510275422068252e-05, + "loss": 0.2469, + "step": 23275 + }, + { + "epoch": 1.09, + "learning_rate": 1.6509491637013466e-05, + "loss": 0.2814, + "step": 23280 + }, + { + "epoch": 1.09, + "learning_rate": 1.650870785195868e-05, + "loss": 0.3825, + "step": 23285 + }, + { + "epoch": 1.09, + "learning_rate": 1.6507924066903894e-05, + "loss": 0.0548, + "step": 23290 + }, + { + "epoch": 1.09, + "learning_rate": 1.6507140281849108e-05, + "loss": 0.0556, + "step": 23295 + }, + { + "epoch": 1.09, + "learning_rate": 1.650635649679432e-05, + "loss": 0.0974, + "step": 23300 + }, + { + "epoch": 1.09, + "learning_rate": 1.6505572711739536e-05, + "loss": 0.1694, + "step": 23305 + }, + { + "epoch": 1.09, + "learning_rate": 1.6504788926684746e-05, + "loss": 0.2936, + "step": 23310 + }, + { + "epoch": 1.09, + "learning_rate": 1.650400514162996e-05, + "loss": 0.1766, + "step": 23315 + }, + { + "epoch": 1.09, + "learning_rate": 1.6503221356575174e-05, + "loss": 0.1843, + "step": 23320 + }, + { + "epoch": 1.09, + "learning_rate": 1.6502437571520388e-05, + "loss": 0.3095, + "step": 23325 + }, + { + "epoch": 1.09, + "learning_rate": 1.6501653786465602e-05, + "loss": 0.3444, + "step": 23330 + }, + { + "epoch": 1.09, + "learning_rate": 1.6500870001410812e-05, + "loss": 0.2731, + "step": 23335 + }, + { + "epoch": 1.09, + "learning_rate": 1.650008621635603e-05, + "loss": 0.0252, + "step": 23340 + }, + { + "epoch": 1.09, + "learning_rate": 1.649930243130124e-05, + "loss": 0.1032, + "step": 23345 + }, + { + "epoch": 1.09, + "learning_rate": 1.6498518646246454e-05, + "loss": 0.1265, + "step": 23350 + }, + { + "epoch": 1.09, + "learning_rate": 1.6497734861191668e-05, + "loss": 0.1087, + "step": 23355 + }, + { + "epoch": 1.09, + "learning_rate": 1.6496951076136882e-05, + "loss": 0.1427, + "step": 23360 + }, + { + "epoch": 1.09, + "learning_rate": 1.6496167291082096e-05, + "loss": 0.2091, + "step": 23365 + }, + { + "epoch": 1.09, + "learning_rate": 1.649538350602731e-05, + "loss": 0.7426, + "step": 23370 + }, + { + "epoch": 1.09, + "learning_rate": 1.649459972097252e-05, + "loss": 0.2259, + "step": 23375 + }, + { + "epoch": 1.09, + "learning_rate": 1.6493815935917738e-05, + "loss": 0.4487, + "step": 23380 + }, + { + "epoch": 1.09, + "learning_rate": 1.6493032150862948e-05, + "loss": 0.2265, + "step": 23385 + }, + { + "epoch": 1.09, + "learning_rate": 1.6492248365808162e-05, + "loss": 0.0922, + "step": 23390 + }, + { + "epoch": 1.09, + "learning_rate": 1.6491464580753376e-05, + "loss": 0.1548, + "step": 23395 + }, + { + "epoch": 1.09, + "learning_rate": 1.649068079569859e-05, + "loss": 0.1354, + "step": 23400 + }, + { + "epoch": 1.09, + "learning_rate": 1.6489897010643804e-05, + "loss": 0.1246, + "step": 23405 + }, + { + "epoch": 1.09, + "learning_rate": 1.6489113225589014e-05, + "loss": 0.1558, + "step": 23410 + }, + { + "epoch": 1.09, + "learning_rate": 1.6488329440534228e-05, + "loss": 0.1481, + "step": 23415 + }, + { + "epoch": 1.09, + "learning_rate": 1.6487545655479442e-05, + "loss": 0.2439, + "step": 23420 + }, + { + "epoch": 1.09, + "learning_rate": 1.6486761870424656e-05, + "loss": 0.3442, + "step": 23425 + }, + { + "epoch": 1.09, + "learning_rate": 1.648597808536987e-05, + "loss": 0.3016, + "step": 23430 + }, + { + "epoch": 1.09, + "learning_rate": 1.6485194300315084e-05, + "loss": 0.2837, + "step": 23435 + }, + { + "epoch": 1.09, + "learning_rate": 1.6484410515260298e-05, + "loss": 0.0948, + "step": 23440 + }, + { + "epoch": 1.09, + "learning_rate": 1.648362673020551e-05, + "loss": 0.0681, + "step": 23445 + }, + { + "epoch": 1.09, + "learning_rate": 1.6482842945150722e-05, + "loss": 0.107, + "step": 23450 + }, + { + "epoch": 1.09, + "learning_rate": 1.6482059160095936e-05, + "loss": 0.1297, + "step": 23455 + }, + { + "epoch": 1.09, + "learning_rate": 1.648127537504115e-05, + "loss": 0.1361, + "step": 23460 + }, + { + "epoch": 1.09, + "learning_rate": 1.6480491589986364e-05, + "loss": 0.142, + "step": 23465 + }, + { + "epoch": 1.1, + "learning_rate": 1.6479707804931578e-05, + "loss": 0.2334, + "step": 23470 + }, + { + "epoch": 1.1, + "learning_rate": 1.6478924019876788e-05, + "loss": 0.277, + "step": 23475 + }, + { + "epoch": 1.1, + "learning_rate": 1.6478140234822006e-05, + "loss": 0.341, + "step": 23480 + }, + { + "epoch": 1.1, + "learning_rate": 1.6477356449767216e-05, + "loss": 0.3929, + "step": 23485 + }, + { + "epoch": 1.1, + "learning_rate": 1.647657266471243e-05, + "loss": 0.1686, + "step": 23490 + }, + { + "epoch": 1.1, + "learning_rate": 1.6475788879657644e-05, + "loss": 0.0867, + "step": 23495 + }, + { + "epoch": 1.1, + "learning_rate": 1.6475005094602858e-05, + "loss": 0.0441, + "step": 23500 + }, + { + "epoch": 1.1, + "learning_rate": 1.647422130954807e-05, + "loss": 0.1218, + "step": 23505 + }, + { + "epoch": 1.1, + "learning_rate": 1.6473437524493286e-05, + "loss": 0.1216, + "step": 23510 + }, + { + "epoch": 1.1, + "learning_rate": 1.6472653739438496e-05, + "loss": 0.2273, + "step": 23515 + }, + { + "epoch": 1.1, + "learning_rate": 1.647186995438371e-05, + "loss": 0.3123, + "step": 23520 + }, + { + "epoch": 1.1, + "learning_rate": 1.6471086169328924e-05, + "loss": 0.2573, + "step": 23525 + }, + { + "epoch": 1.1, + "learning_rate": 1.6470302384274138e-05, + "loss": 0.4518, + "step": 23530 + }, + { + "epoch": 1.1, + "learning_rate": 1.6469518599219352e-05, + "loss": 0.2675, + "step": 23535 + }, + { + "epoch": 1.1, + "learning_rate": 1.6468734814164566e-05, + "loss": 0.0457, + "step": 23540 + }, + { + "epoch": 1.1, + "learning_rate": 1.646795102910978e-05, + "loss": 0.0892, + "step": 23545 + }, + { + "epoch": 1.1, + "learning_rate": 1.646716724405499e-05, + "loss": 0.07, + "step": 23550 + }, + { + "epoch": 1.1, + "learning_rate": 1.6466383459000207e-05, + "loss": 0.1443, + "step": 23555 + }, + { + "epoch": 1.1, + "learning_rate": 1.6465599673945418e-05, + "loss": 0.1848, + "step": 23560 + }, + { + "epoch": 1.1, + "learning_rate": 1.6464815888890632e-05, + "loss": 0.1461, + "step": 23565 + }, + { + "epoch": 1.1, + "learning_rate": 1.6464032103835846e-05, + "loss": 0.1837, + "step": 23570 + }, + { + "epoch": 1.1, + "learning_rate": 1.646324831878106e-05, + "loss": 0.178, + "step": 23575 + }, + { + "epoch": 1.1, + "learning_rate": 1.6462464533726273e-05, + "loss": 0.5349, + "step": 23580 + }, + { + "epoch": 1.1, + "learning_rate": 1.6461680748671484e-05, + "loss": 0.3265, + "step": 23585 + }, + { + "epoch": 1.1, + "learning_rate": 1.6460896963616698e-05, + "loss": 0.0742, + "step": 23590 + }, + { + "epoch": 1.1, + "learning_rate": 1.6460113178561912e-05, + "loss": 0.0978, + "step": 23595 + }, + { + "epoch": 1.1, + "learning_rate": 1.6459329393507126e-05, + "loss": 0.1308, + "step": 23600 + }, + { + "epoch": 1.1, + "learning_rate": 1.645854560845234e-05, + "loss": 0.1915, + "step": 23605 + }, + { + "epoch": 1.1, + "learning_rate": 1.6457761823397554e-05, + "loss": 0.094, + "step": 23610 + }, + { + "epoch": 1.1, + "learning_rate": 1.6456978038342764e-05, + "loss": 0.1869, + "step": 23615 + }, + { + "epoch": 1.1, + "learning_rate": 1.645619425328798e-05, + "loss": 0.1502, + "step": 23620 + }, + { + "epoch": 1.1, + "learning_rate": 1.6455410468233192e-05, + "loss": 0.2174, + "step": 23625 + }, + { + "epoch": 1.1, + "learning_rate": 1.6454626683178406e-05, + "loss": 0.4411, + "step": 23630 + }, + { + "epoch": 1.1, + "learning_rate": 1.645384289812362e-05, + "loss": 0.2565, + "step": 23635 + }, + { + "epoch": 1.1, + "learning_rate": 1.6453059113068834e-05, + "loss": 0.0362, + "step": 23640 + }, + { + "epoch": 1.1, + "learning_rate": 1.6452275328014047e-05, + "loss": 0.0526, + "step": 23645 + }, + { + "epoch": 1.1, + "learning_rate": 1.6451491542959258e-05, + "loss": 0.1324, + "step": 23650 + }, + { + "epoch": 1.1, + "learning_rate": 1.6450707757904475e-05, + "loss": 0.0992, + "step": 23655 + }, + { + "epoch": 1.1, + "learning_rate": 1.6449923972849686e-05, + "loss": 0.1667, + "step": 23660 + }, + { + "epoch": 1.1, + "learning_rate": 1.64491401877949e-05, + "loss": 0.2788, + "step": 23665 + }, + { + "epoch": 1.1, + "learning_rate": 1.6448356402740114e-05, + "loss": 0.2025, + "step": 23670 + }, + { + "epoch": 1.1, + "learning_rate": 1.6447572617685328e-05, + "loss": 0.2577, + "step": 23675 + }, + { + "epoch": 1.1, + "learning_rate": 1.644678883263054e-05, + "loss": 0.2843, + "step": 23680 + }, + { + "epoch": 1.11, + "learning_rate": 1.6446005047575755e-05, + "loss": 0.2967, + "step": 23685 + }, + { + "epoch": 1.11, + "learning_rate": 1.6445221262520966e-05, + "loss": 0.0835, + "step": 23690 + }, + { + "epoch": 1.11, + "learning_rate": 1.6444437477466183e-05, + "loss": 0.0873, + "step": 23695 + }, + { + "epoch": 1.11, + "learning_rate": 1.6443653692411394e-05, + "loss": 0.1089, + "step": 23700 + }, + { + "epoch": 1.11, + "learning_rate": 1.6442869907356608e-05, + "loss": 0.1707, + "step": 23705 + }, + { + "epoch": 1.11, + "learning_rate": 1.644208612230182e-05, + "loss": 0.191, + "step": 23710 + }, + { + "epoch": 1.11, + "learning_rate": 1.6441302337247035e-05, + "loss": 0.124, + "step": 23715 + }, + { + "epoch": 1.11, + "learning_rate": 1.644051855219225e-05, + "loss": 0.2111, + "step": 23720 + }, + { + "epoch": 1.11, + "learning_rate": 1.643973476713746e-05, + "loss": 0.1918, + "step": 23725 + }, + { + "epoch": 1.11, + "learning_rate": 1.6438950982082674e-05, + "loss": 0.4829, + "step": 23730 + }, + { + "epoch": 1.11, + "learning_rate": 1.6438167197027888e-05, + "loss": 0.3964, + "step": 23735 + }, + { + "epoch": 1.11, + "learning_rate": 1.64373834119731e-05, + "loss": 0.0372, + "step": 23740 + }, + { + "epoch": 1.11, + "learning_rate": 1.6436599626918315e-05, + "loss": 0.0559, + "step": 23745 + }, + { + "epoch": 1.11, + "learning_rate": 1.643581584186353e-05, + "loss": 0.1288, + "step": 23750 + }, + { + "epoch": 1.11, + "learning_rate": 1.6435032056808743e-05, + "loss": 0.1367, + "step": 23755 + }, + { + "epoch": 1.11, + "learning_rate": 1.6434248271753957e-05, + "loss": 0.0864, + "step": 23760 + }, + { + "epoch": 1.11, + "learning_rate": 1.6433464486699168e-05, + "loss": 0.1882, + "step": 23765 + }, + { + "epoch": 1.11, + "learning_rate": 1.6432680701644385e-05, + "loss": 0.2035, + "step": 23770 + }, + { + "epoch": 1.11, + "learning_rate": 1.6431896916589595e-05, + "loss": 0.3376, + "step": 23775 + }, + { + "epoch": 1.11, + "learning_rate": 1.643111313153481e-05, + "loss": 0.5252, + "step": 23780 + }, + { + "epoch": 1.11, + "learning_rate": 1.6430329346480023e-05, + "loss": 0.2832, + "step": 23785 + }, + { + "epoch": 1.11, + "learning_rate": 1.6429545561425234e-05, + "loss": 0.0732, + "step": 23790 + }, + { + "epoch": 1.11, + "learning_rate": 1.642876177637045e-05, + "loss": 0.0599, + "step": 23795 + }, + { + "epoch": 1.11, + "learning_rate": 1.642797799131566e-05, + "loss": 0.1289, + "step": 23800 + }, + { + "epoch": 1.11, + "learning_rate": 1.6427194206260876e-05, + "loss": 0.1547, + "step": 23805 + }, + { + "epoch": 1.11, + "learning_rate": 1.642641042120609e-05, + "loss": 0.0893, + "step": 23810 + }, + { + "epoch": 1.11, + "learning_rate": 1.6425626636151303e-05, + "loss": 0.1896, + "step": 23815 + }, + { + "epoch": 1.11, + "learning_rate": 1.6424842851096517e-05, + "loss": 0.2018, + "step": 23820 + }, + { + "epoch": 1.11, + "learning_rate": 1.642405906604173e-05, + "loss": 0.2314, + "step": 23825 + }, + { + "epoch": 1.11, + "learning_rate": 1.642327528098694e-05, + "loss": 0.6314, + "step": 23830 + }, + { + "epoch": 1.11, + "learning_rate": 1.642249149593216e-05, + "loss": 0.2099, + "step": 23835 + }, + { + "epoch": 1.11, + "learning_rate": 1.642170771087737e-05, + "loss": 0.0935, + "step": 23840 + }, + { + "epoch": 1.11, + "learning_rate": 1.6420923925822583e-05, + "loss": 0.0824, + "step": 23845 + }, + { + "epoch": 1.11, + "learning_rate": 1.6420140140767797e-05, + "loss": 0.1094, + "step": 23850 + }, + { + "epoch": 1.11, + "learning_rate": 1.641935635571301e-05, + "loss": 0.1203, + "step": 23855 + }, + { + "epoch": 1.11, + "learning_rate": 1.6418572570658225e-05, + "loss": 0.0716, + "step": 23860 + }, + { + "epoch": 1.11, + "learning_rate": 1.6417788785603436e-05, + "loss": 0.0886, + "step": 23865 + }, + { + "epoch": 1.11, + "learning_rate": 1.6417005000548653e-05, + "loss": 0.2079, + "step": 23870 + }, + { + "epoch": 1.11, + "learning_rate": 1.6416221215493863e-05, + "loss": 0.2192, + "step": 23875 + }, + { + "epoch": 1.11, + "learning_rate": 1.6415437430439077e-05, + "loss": 0.1609, + "step": 23880 + }, + { + "epoch": 1.11, + "learning_rate": 1.641465364538429e-05, + "loss": 0.427, + "step": 23885 + }, + { + "epoch": 1.11, + "learning_rate": 1.6413869860329505e-05, + "loss": 0.0594, + "step": 23890 + }, + { + "epoch": 1.11, + "learning_rate": 1.641308607527472e-05, + "loss": 0.0976, + "step": 23895 + }, + { + "epoch": 1.12, + "learning_rate": 1.6412302290219933e-05, + "loss": 0.1183, + "step": 23900 + }, + { + "epoch": 1.12, + "learning_rate": 1.6411518505165143e-05, + "loss": 0.1117, + "step": 23905 + }, + { + "epoch": 1.12, + "learning_rate": 1.6410734720110357e-05, + "loss": 0.1788, + "step": 23910 + }, + { + "epoch": 1.12, + "learning_rate": 1.640995093505557e-05, + "loss": 0.1834, + "step": 23915 + }, + { + "epoch": 1.12, + "learning_rate": 1.6409167150000785e-05, + "loss": 0.2126, + "step": 23920 + }, + { + "epoch": 1.12, + "learning_rate": 1.6408383364946e-05, + "loss": 0.4336, + "step": 23925 + }, + { + "epoch": 1.12, + "learning_rate": 1.6407599579891213e-05, + "loss": 0.3505, + "step": 23930 + }, + { + "epoch": 1.12, + "learning_rate": 1.6406815794836427e-05, + "loss": 0.3065, + "step": 23935 + }, + { + "epoch": 1.12, + "learning_rate": 1.6406032009781637e-05, + "loss": 0.0399, + "step": 23940 + }, + { + "epoch": 1.12, + "learning_rate": 1.640524822472685e-05, + "loss": 0.0772, + "step": 23945 + }, + { + "epoch": 1.12, + "learning_rate": 1.6404464439672065e-05, + "loss": 0.0448, + "step": 23950 + }, + { + "epoch": 1.12, + "learning_rate": 1.640368065461728e-05, + "loss": 0.0798, + "step": 23955 + }, + { + "epoch": 1.12, + "learning_rate": 1.6402896869562493e-05, + "loss": 0.0925, + "step": 23960 + }, + { + "epoch": 1.12, + "learning_rate": 1.6402113084507707e-05, + "loss": 0.1361, + "step": 23965 + }, + { + "epoch": 1.12, + "learning_rate": 1.640132929945292e-05, + "loss": 0.1452, + "step": 23970 + }, + { + "epoch": 1.12, + "learning_rate": 1.640054551439813e-05, + "loss": 0.2325, + "step": 23975 + }, + { + "epoch": 1.12, + "learning_rate": 1.6399761729343345e-05, + "loss": 0.3527, + "step": 23980 + }, + { + "epoch": 1.12, + "learning_rate": 1.639897794428856e-05, + "loss": 0.2864, + "step": 23985 + }, + { + "epoch": 1.12, + "learning_rate": 1.6398194159233773e-05, + "loss": 0.0622, + "step": 23990 + }, + { + "epoch": 1.12, + "learning_rate": 1.6397410374178987e-05, + "loss": 0.0723, + "step": 23995 + }, + { + "epoch": 1.12, + "learning_rate": 1.63966265891242e-05, + "loss": 0.1408, + "step": 24000 + }, + { + "epoch": 1.12, + "learning_rate": 1.639584280406941e-05, + "loss": 0.1051, + "step": 24005 + }, + { + "epoch": 1.12, + "learning_rate": 1.639505901901463e-05, + "loss": 0.1084, + "step": 24010 + }, + { + "epoch": 1.12, + "learning_rate": 1.639427523395984e-05, + "loss": 0.2103, + "step": 24015 + }, + { + "epoch": 1.12, + "learning_rate": 1.6393491448905053e-05, + "loss": 0.2023, + "step": 24020 + }, + { + "epoch": 1.12, + "learning_rate": 1.6392707663850267e-05, + "loss": 0.2348, + "step": 24025 + }, + { + "epoch": 1.12, + "learning_rate": 1.639192387879548e-05, + "loss": 0.3672, + "step": 24030 + }, + { + "epoch": 1.12, + "learning_rate": 1.6391140093740695e-05, + "loss": 0.3526, + "step": 24035 + }, + { + "epoch": 1.12, + "learning_rate": 1.6390356308685905e-05, + "loss": 0.0778, + "step": 24040 + }, + { + "epoch": 1.12, + "learning_rate": 1.638957252363112e-05, + "loss": 0.1023, + "step": 24045 + }, + { + "epoch": 1.12, + "learning_rate": 1.6388788738576333e-05, + "loss": 0.0873, + "step": 24050 + }, + { + "epoch": 1.12, + "learning_rate": 1.6388004953521547e-05, + "loss": 0.1086, + "step": 24055 + }, + { + "epoch": 1.12, + "learning_rate": 1.638722116846676e-05, + "loss": 0.1551, + "step": 24060 + }, + { + "epoch": 1.12, + "learning_rate": 1.6386437383411975e-05, + "loss": 0.1625, + "step": 24065 + }, + { + "epoch": 1.12, + "learning_rate": 1.638565359835719e-05, + "loss": 0.2734, + "step": 24070 + }, + { + "epoch": 1.12, + "learning_rate": 1.6384869813302403e-05, + "loss": 0.2265, + "step": 24075 + }, + { + "epoch": 1.12, + "learning_rate": 1.6384086028247613e-05, + "loss": 0.2489, + "step": 24080 + }, + { + "epoch": 1.12, + "learning_rate": 1.638330224319283e-05, + "loss": 0.3076, + "step": 24085 + }, + { + "epoch": 1.12, + "learning_rate": 1.638251845813804e-05, + "loss": 0.1064, + "step": 24090 + }, + { + "epoch": 1.12, + "learning_rate": 1.6381734673083255e-05, + "loss": 0.0911, + "step": 24095 + }, + { + "epoch": 1.12, + "learning_rate": 1.638095088802847e-05, + "loss": 0.0852, + "step": 24100 + }, + { + "epoch": 1.12, + "learning_rate": 1.638016710297368e-05, + "loss": 0.1027, + "step": 24105 + }, + { + "epoch": 1.13, + "learning_rate": 1.6379383317918897e-05, + "loss": 0.1949, + "step": 24110 + }, + { + "epoch": 1.13, + "learning_rate": 1.6378599532864107e-05, + "loss": 0.1723, + "step": 24115 + }, + { + "epoch": 1.13, + "learning_rate": 1.637781574780932e-05, + "loss": 0.207, + "step": 24120 + }, + { + "epoch": 1.13, + "learning_rate": 1.6377031962754535e-05, + "loss": 0.2483, + "step": 24125 + }, + { + "epoch": 1.13, + "learning_rate": 1.637624817769975e-05, + "loss": 0.4968, + "step": 24130 + }, + { + "epoch": 1.13, + "learning_rate": 1.6375464392644963e-05, + "loss": 0.3258, + "step": 24135 + }, + { + "epoch": 1.13, + "learning_rate": 1.6374680607590177e-05, + "loss": 0.0432, + "step": 24140 + }, + { + "epoch": 1.13, + "learning_rate": 1.6373896822535387e-05, + "loss": 0.0878, + "step": 24145 + }, + { + "epoch": 1.13, + "learning_rate": 1.6373113037480605e-05, + "loss": 0.163, + "step": 24150 + }, + { + "epoch": 1.13, + "learning_rate": 1.6372329252425815e-05, + "loss": 0.2356, + "step": 24155 + }, + { + "epoch": 1.13, + "learning_rate": 1.637154546737103e-05, + "loss": 0.1573, + "step": 24160 + }, + { + "epoch": 1.13, + "learning_rate": 1.6370761682316243e-05, + "loss": 0.1774, + "step": 24165 + }, + { + "epoch": 1.13, + "learning_rate": 1.6369977897261457e-05, + "loss": 0.1667, + "step": 24170 + }, + { + "epoch": 1.13, + "learning_rate": 1.636919411220667e-05, + "loss": 0.1914, + "step": 24175 + }, + { + "epoch": 1.13, + "learning_rate": 1.636841032715188e-05, + "loss": 0.3366, + "step": 24180 + }, + { + "epoch": 1.13, + "learning_rate": 1.63676265420971e-05, + "loss": 0.389, + "step": 24185 + }, + { + "epoch": 1.13, + "learning_rate": 1.636684275704231e-05, + "loss": 0.0601, + "step": 24190 + }, + { + "epoch": 1.13, + "learning_rate": 1.6366058971987523e-05, + "loss": 0.0784, + "step": 24195 + }, + { + "epoch": 1.13, + "learning_rate": 1.6365275186932737e-05, + "loss": 0.1066, + "step": 24200 + }, + { + "epoch": 1.13, + "learning_rate": 1.636449140187795e-05, + "loss": 0.1525, + "step": 24205 + }, + { + "epoch": 1.13, + "learning_rate": 1.6363707616823165e-05, + "loss": 0.1988, + "step": 24210 + }, + { + "epoch": 1.13, + "learning_rate": 1.636292383176838e-05, + "loss": 0.252, + "step": 24215 + }, + { + "epoch": 1.13, + "learning_rate": 1.636214004671359e-05, + "loss": 0.3322, + "step": 24220 + }, + { + "epoch": 1.13, + "learning_rate": 1.6361356261658806e-05, + "loss": 0.2519, + "step": 24225 + }, + { + "epoch": 1.13, + "learning_rate": 1.6360572476604017e-05, + "loss": 0.2909, + "step": 24230 + }, + { + "epoch": 1.13, + "learning_rate": 1.635978869154923e-05, + "loss": 0.3463, + "step": 24235 + }, + { + "epoch": 1.13, + "learning_rate": 1.6359004906494445e-05, + "loss": 0.0224, + "step": 24240 + }, + { + "epoch": 1.13, + "learning_rate": 1.635822112143966e-05, + "loss": 0.0675, + "step": 24245 + }, + { + "epoch": 1.13, + "learning_rate": 1.6357437336384872e-05, + "loss": 0.1372, + "step": 24250 + }, + { + "epoch": 1.13, + "learning_rate": 1.6356653551330083e-05, + "loss": 0.1832, + "step": 24255 + }, + { + "epoch": 1.13, + "learning_rate": 1.6355869766275297e-05, + "loss": 0.1108, + "step": 24260 + }, + { + "epoch": 1.13, + "learning_rate": 1.635508598122051e-05, + "loss": 0.1944, + "step": 24265 + }, + { + "epoch": 1.13, + "learning_rate": 1.6354302196165725e-05, + "loss": 0.1828, + "step": 24270 + }, + { + "epoch": 1.13, + "learning_rate": 1.635351841111094e-05, + "loss": 0.2393, + "step": 24275 + }, + { + "epoch": 1.13, + "learning_rate": 1.6352734626056153e-05, + "loss": 0.4225, + "step": 24280 + }, + { + "epoch": 1.13, + "learning_rate": 1.6351950841001366e-05, + "loss": 0.3735, + "step": 24285 + }, + { + "epoch": 1.13, + "learning_rate": 1.635116705594658e-05, + "loss": 0.0227, + "step": 24290 + }, + { + "epoch": 1.13, + "learning_rate": 1.635038327089179e-05, + "loss": 0.0606, + "step": 24295 + }, + { + "epoch": 1.13, + "learning_rate": 1.6349599485837005e-05, + "loss": 0.0897, + "step": 24300 + }, + { + "epoch": 1.13, + "learning_rate": 1.634881570078222e-05, + "loss": 0.1893, + "step": 24305 + }, + { + "epoch": 1.13, + "learning_rate": 1.6348031915727433e-05, + "loss": 0.1348, + "step": 24310 + }, + { + "epoch": 1.13, + "learning_rate": 1.6347248130672646e-05, + "loss": 0.1975, + "step": 24315 + }, + { + "epoch": 1.13, + "learning_rate": 1.6346464345617857e-05, + "loss": 0.2427, + "step": 24320 + }, + { + "epoch": 1.14, + "learning_rate": 1.6345680560563074e-05, + "loss": 0.2059, + "step": 24325 + }, + { + "epoch": 1.14, + "learning_rate": 1.6344896775508285e-05, + "loss": 0.4388, + "step": 24330 + }, + { + "epoch": 1.14, + "learning_rate": 1.63441129904535e-05, + "loss": 0.2305, + "step": 24335 + }, + { + "epoch": 1.14, + "learning_rate": 1.6343329205398713e-05, + "loss": 0.0722, + "step": 24340 + }, + { + "epoch": 1.14, + "learning_rate": 1.6342545420343927e-05, + "loss": 0.1261, + "step": 24345 + }, + { + "epoch": 1.14, + "learning_rate": 1.634176163528914e-05, + "loss": 0.0991, + "step": 24350 + }, + { + "epoch": 1.14, + "learning_rate": 1.6340977850234354e-05, + "loss": 0.1313, + "step": 24355 + }, + { + "epoch": 1.14, + "learning_rate": 1.6340194065179565e-05, + "loss": 0.1284, + "step": 24360 + }, + { + "epoch": 1.14, + "learning_rate": 1.633941028012478e-05, + "loss": 0.2371, + "step": 24365 + }, + { + "epoch": 1.14, + "learning_rate": 1.6338626495069993e-05, + "loss": 0.2578, + "step": 24370 + }, + { + "epoch": 1.14, + "learning_rate": 1.6337842710015207e-05, + "loss": 0.2852, + "step": 24375 + }, + { + "epoch": 1.14, + "learning_rate": 1.633705892496042e-05, + "loss": 0.3184, + "step": 24380 + }, + { + "epoch": 1.14, + "learning_rate": 1.6336275139905634e-05, + "loss": 0.2727, + "step": 24385 + }, + { + "epoch": 1.14, + "learning_rate": 1.6335491354850848e-05, + "loss": 0.0995, + "step": 24390 + }, + { + "epoch": 1.14, + "learning_rate": 1.633470756979606e-05, + "loss": 0.075, + "step": 24395 + }, + { + "epoch": 1.14, + "learning_rate": 1.6333923784741276e-05, + "loss": 0.1169, + "step": 24400 + }, + { + "epoch": 1.14, + "learning_rate": 1.6333139999686487e-05, + "loss": 0.1186, + "step": 24405 + }, + { + "epoch": 1.14, + "learning_rate": 1.63323562146317e-05, + "loss": 0.1519, + "step": 24410 + }, + { + "epoch": 1.14, + "learning_rate": 1.6331572429576914e-05, + "loss": 0.1897, + "step": 24415 + }, + { + "epoch": 1.14, + "learning_rate": 1.633078864452213e-05, + "loss": 0.2469, + "step": 24420 + }, + { + "epoch": 1.14, + "learning_rate": 1.6330004859467342e-05, + "loss": 0.2405, + "step": 24425 + }, + { + "epoch": 1.14, + "learning_rate": 1.6329221074412553e-05, + "loss": 0.5484, + "step": 24430 + }, + { + "epoch": 1.14, + "learning_rate": 1.6328437289357767e-05, + "loss": 0.3294, + "step": 24435 + }, + { + "epoch": 1.14, + "learning_rate": 1.632765350430298e-05, + "loss": 0.0427, + "step": 24440 + }, + { + "epoch": 1.14, + "learning_rate": 1.6326869719248194e-05, + "loss": 0.1192, + "step": 24445 + }, + { + "epoch": 1.14, + "learning_rate": 1.632608593419341e-05, + "loss": 0.0817, + "step": 24450 + }, + { + "epoch": 1.14, + "learning_rate": 1.6325302149138622e-05, + "loss": 0.0814, + "step": 24455 + }, + { + "epoch": 1.14, + "learning_rate": 1.6324518364083833e-05, + "loss": 0.1558, + "step": 24460 + }, + { + "epoch": 1.14, + "learning_rate": 1.632373457902905e-05, + "loss": 0.149, + "step": 24465 + }, + { + "epoch": 1.14, + "learning_rate": 1.632295079397426e-05, + "loss": 0.1717, + "step": 24470 + }, + { + "epoch": 1.14, + "learning_rate": 1.6322167008919475e-05, + "loss": 0.2424, + "step": 24475 + }, + { + "epoch": 1.14, + "learning_rate": 1.632138322386469e-05, + "loss": 0.2916, + "step": 24480 + }, + { + "epoch": 1.14, + "learning_rate": 1.6320599438809902e-05, + "loss": 0.3675, + "step": 24485 + }, + { + "epoch": 1.14, + "learning_rate": 1.6319815653755116e-05, + "loss": 0.1092, + "step": 24490 + }, + { + "epoch": 1.14, + "learning_rate": 1.6319031868700327e-05, + "loss": 0.086, + "step": 24495 + }, + { + "epoch": 1.14, + "learning_rate": 1.6318248083645544e-05, + "loss": 0.1483, + "step": 24500 + }, + { + "epoch": 1.14, + "learning_rate": 1.6317464298590755e-05, + "loss": 0.1295, + "step": 24505 + }, + { + "epoch": 1.14, + "learning_rate": 1.631668051353597e-05, + "loss": 0.1388, + "step": 24510 + }, + { + "epoch": 1.14, + "learning_rate": 1.6315896728481182e-05, + "loss": 0.2248, + "step": 24515 + }, + { + "epoch": 1.14, + "learning_rate": 1.6315112943426396e-05, + "loss": 0.229, + "step": 24520 + }, + { + "epoch": 1.14, + "learning_rate": 1.631432915837161e-05, + "loss": 0.2604, + "step": 24525 + }, + { + "epoch": 1.14, + "learning_rate": 1.6313545373316824e-05, + "loss": 0.3488, + "step": 24530 + }, + { + "epoch": 1.14, + "learning_rate": 1.6312761588262035e-05, + "loss": 0.2816, + "step": 24535 + }, + { + "epoch": 1.15, + "learning_rate": 1.6311977803207252e-05, + "loss": 0.0694, + "step": 24540 + }, + { + "epoch": 1.15, + "learning_rate": 1.6311194018152462e-05, + "loss": 0.0728, + "step": 24545 + }, + { + "epoch": 1.15, + "learning_rate": 1.6310410233097676e-05, + "loss": 0.058, + "step": 24550 + }, + { + "epoch": 1.15, + "learning_rate": 1.630962644804289e-05, + "loss": 0.0766, + "step": 24555 + }, + { + "epoch": 1.15, + "learning_rate": 1.6308842662988104e-05, + "loss": 0.1177, + "step": 24560 + }, + { + "epoch": 1.15, + "learning_rate": 1.6308058877933318e-05, + "loss": 0.1314, + "step": 24565 + }, + { + "epoch": 1.15, + "learning_rate": 1.630727509287853e-05, + "loss": 0.1472, + "step": 24570 + }, + { + "epoch": 1.15, + "learning_rate": 1.6306491307823742e-05, + "loss": 0.3199, + "step": 24575 + }, + { + "epoch": 1.15, + "learning_rate": 1.6305707522768956e-05, + "loss": 0.3317, + "step": 24580 + }, + { + "epoch": 1.15, + "learning_rate": 1.630492373771417e-05, + "loss": 0.3662, + "step": 24585 + }, + { + "epoch": 1.15, + "learning_rate": 1.6304139952659384e-05, + "loss": 0.0684, + "step": 24590 + }, + { + "epoch": 1.15, + "learning_rate": 1.6303356167604598e-05, + "loss": 0.0825, + "step": 24595 + }, + { + "epoch": 1.15, + "learning_rate": 1.6302572382549812e-05, + "loss": 0.0647, + "step": 24600 + }, + { + "epoch": 1.15, + "learning_rate": 1.6301788597495026e-05, + "loss": 0.101, + "step": 24605 + }, + { + "epoch": 1.15, + "learning_rate": 1.6301004812440236e-05, + "loss": 0.1126, + "step": 24610 + }, + { + "epoch": 1.15, + "learning_rate": 1.6300221027385454e-05, + "loss": 0.1619, + "step": 24615 + }, + { + "epoch": 1.15, + "learning_rate": 1.6299437242330664e-05, + "loss": 0.248, + "step": 24620 + }, + { + "epoch": 1.15, + "learning_rate": 1.6298653457275878e-05, + "loss": 0.2743, + "step": 24625 + }, + { + "epoch": 1.15, + "learning_rate": 1.6297869672221092e-05, + "loss": 0.4154, + "step": 24630 + }, + { + "epoch": 1.15, + "learning_rate": 1.6297085887166303e-05, + "loss": 0.3563, + "step": 24635 + }, + { + "epoch": 1.15, + "learning_rate": 1.629630210211152e-05, + "loss": 0.0512, + "step": 24640 + }, + { + "epoch": 1.15, + "learning_rate": 1.629551831705673e-05, + "loss": 0.0548, + "step": 24645 + }, + { + "epoch": 1.15, + "learning_rate": 1.6294734532001944e-05, + "loss": 0.0815, + "step": 24650 + }, + { + "epoch": 1.15, + "learning_rate": 1.6293950746947158e-05, + "loss": 0.1276, + "step": 24655 + }, + { + "epoch": 1.15, + "learning_rate": 1.6293166961892372e-05, + "loss": 0.141, + "step": 24660 + }, + { + "epoch": 1.15, + "learning_rate": 1.6292383176837586e-05, + "loss": 0.2118, + "step": 24665 + }, + { + "epoch": 1.15, + "learning_rate": 1.62915993917828e-05, + "loss": 0.1749, + "step": 24670 + }, + { + "epoch": 1.15, + "learning_rate": 1.629081560672801e-05, + "loss": 0.2312, + "step": 24675 + }, + { + "epoch": 1.15, + "learning_rate": 1.6290031821673228e-05, + "loss": 0.5181, + "step": 24680 + }, + { + "epoch": 1.15, + "learning_rate": 1.6289248036618438e-05, + "loss": 0.2571, + "step": 24685 + }, + { + "epoch": 1.15, + "learning_rate": 1.6288464251563652e-05, + "loss": 0.0409, + "step": 24690 + }, + { + "epoch": 1.15, + "learning_rate": 1.6287680466508866e-05, + "loss": 0.0747, + "step": 24695 + }, + { + "epoch": 1.15, + "learning_rate": 1.628689668145408e-05, + "loss": 0.1181, + "step": 24700 + }, + { + "epoch": 1.15, + "learning_rate": 1.6286112896399294e-05, + "loss": 0.1395, + "step": 24705 + }, + { + "epoch": 1.15, + "learning_rate": 1.6285329111344504e-05, + "loss": 0.1435, + "step": 24710 + }, + { + "epoch": 1.15, + "learning_rate": 1.628454532628972e-05, + "loss": 0.174, + "step": 24715 + }, + { + "epoch": 1.15, + "learning_rate": 1.6283761541234932e-05, + "loss": 0.1197, + "step": 24720 + }, + { + "epoch": 1.15, + "learning_rate": 1.6282977756180146e-05, + "loss": 0.2038, + "step": 24725 + }, + { + "epoch": 1.15, + "learning_rate": 1.628219397112536e-05, + "loss": 0.3092, + "step": 24730 + }, + { + "epoch": 1.15, + "learning_rate": 1.6281410186070574e-05, + "loss": 0.4175, + "step": 24735 + }, + { + "epoch": 1.15, + "learning_rate": 1.6280626401015788e-05, + "loss": 0.0201, + "step": 24740 + }, + { + "epoch": 1.15, + "learning_rate": 1.6279842615961002e-05, + "loss": 0.1052, + "step": 24745 + }, + { + "epoch": 1.15, + "learning_rate": 1.6279058830906212e-05, + "loss": 0.1185, + "step": 24750 + }, + { + "epoch": 1.16, + "learning_rate": 1.6278275045851426e-05, + "loss": 0.1012, + "step": 24755 + }, + { + "epoch": 1.16, + "learning_rate": 1.627749126079664e-05, + "loss": 0.2098, + "step": 24760 + }, + { + "epoch": 1.16, + "learning_rate": 1.6276707475741854e-05, + "loss": 0.1507, + "step": 24765 + }, + { + "epoch": 1.16, + "learning_rate": 1.6275923690687068e-05, + "loss": 0.1518, + "step": 24770 + }, + { + "epoch": 1.16, + "learning_rate": 1.627513990563228e-05, + "loss": 0.2168, + "step": 24775 + }, + { + "epoch": 1.16, + "learning_rate": 1.6274356120577496e-05, + "loss": 0.3516, + "step": 24780 + }, + { + "epoch": 1.16, + "learning_rate": 1.6273572335522706e-05, + "loss": 0.3077, + "step": 24785 + }, + { + "epoch": 1.16, + "learning_rate": 1.627278855046792e-05, + "loss": 0.0342, + "step": 24790 + }, + { + "epoch": 1.16, + "learning_rate": 1.6272004765413134e-05, + "loss": 0.083, + "step": 24795 + }, + { + "epoch": 1.16, + "learning_rate": 1.6271220980358348e-05, + "loss": 0.0801, + "step": 24800 + }, + { + "epoch": 1.16, + "learning_rate": 1.6270437195303562e-05, + "loss": 0.1694, + "step": 24805 + }, + { + "epoch": 1.16, + "learning_rate": 1.6269653410248776e-05, + "loss": 0.1143, + "step": 24810 + }, + { + "epoch": 1.16, + "learning_rate": 1.626886962519399e-05, + "loss": 0.0924, + "step": 24815 + }, + { + "epoch": 1.16, + "learning_rate": 1.62680858401392e-05, + "loss": 0.237, + "step": 24820 + }, + { + "epoch": 1.16, + "learning_rate": 1.6267302055084414e-05, + "loss": 0.1925, + "step": 24825 + }, + { + "epoch": 1.16, + "learning_rate": 1.6266518270029628e-05, + "loss": 0.3285, + "step": 24830 + }, + { + "epoch": 1.16, + "learning_rate": 1.6265734484974842e-05, + "loss": 0.4066, + "step": 24835 + }, + { + "epoch": 1.16, + "learning_rate": 1.6264950699920056e-05, + "loss": 0.0913, + "step": 24840 + }, + { + "epoch": 1.16, + "learning_rate": 1.626416691486527e-05, + "loss": 0.09, + "step": 24845 + }, + { + "epoch": 1.16, + "learning_rate": 1.626338312981048e-05, + "loss": 0.1174, + "step": 24850 + }, + { + "epoch": 1.16, + "learning_rate": 1.6262599344755697e-05, + "loss": 0.0969, + "step": 24855 + }, + { + "epoch": 1.16, + "learning_rate": 1.6261815559700908e-05, + "loss": 0.1173, + "step": 24860 + }, + { + "epoch": 1.16, + "learning_rate": 1.6261031774646122e-05, + "loss": 0.222, + "step": 24865 + }, + { + "epoch": 1.16, + "learning_rate": 1.6260247989591336e-05, + "loss": 0.2767, + "step": 24870 + }, + { + "epoch": 1.16, + "learning_rate": 1.625946420453655e-05, + "loss": 0.1782, + "step": 24875 + }, + { + "epoch": 1.16, + "learning_rate": 1.6258680419481764e-05, + "loss": 0.2277, + "step": 24880 + }, + { + "epoch": 1.16, + "learning_rate": 1.6257896634426974e-05, + "loss": 0.2343, + "step": 24885 + }, + { + "epoch": 1.16, + "learning_rate": 1.6257112849372188e-05, + "loss": 0.3339, + "step": 24890 + }, + { + "epoch": 1.16, + "learning_rate": 1.6256329064317402e-05, + "loss": 0.0648, + "step": 24895 + }, + { + "epoch": 1.16, + "learning_rate": 1.6255545279262616e-05, + "loss": 0.1105, + "step": 24900 + }, + { + "epoch": 1.16, + "learning_rate": 1.625476149420783e-05, + "loss": 0.1184, + "step": 24905 + }, + { + "epoch": 1.16, + "learning_rate": 1.6253977709153044e-05, + "loss": 0.166, + "step": 24910 + }, + { + "epoch": 1.16, + "learning_rate": 1.6253193924098258e-05, + "loss": 0.098, + "step": 24915 + }, + { + "epoch": 1.16, + "learning_rate": 1.625241013904347e-05, + "loss": 0.2253, + "step": 24920 + }, + { + "epoch": 1.16, + "learning_rate": 1.6251626353988682e-05, + "loss": 0.2517, + "step": 24925 + }, + { + "epoch": 1.16, + "learning_rate": 1.62508425689339e-05, + "loss": 0.287, + "step": 24930 + }, + { + "epoch": 1.16, + "learning_rate": 1.625005878387911e-05, + "loss": 0.3426, + "step": 24935 + }, + { + "epoch": 1.16, + "learning_rate": 1.6249274998824324e-05, + "loss": 0.0747, + "step": 24940 + }, + { + "epoch": 1.16, + "learning_rate": 1.6248491213769538e-05, + "loss": 0.0584, + "step": 24945 + }, + { + "epoch": 1.16, + "learning_rate": 1.6247707428714748e-05, + "loss": 0.1144, + "step": 24950 + }, + { + "epoch": 1.16, + "learning_rate": 1.6246923643659965e-05, + "loss": 0.1062, + "step": 24955 + }, + { + "epoch": 1.16, + "learning_rate": 1.6246139858605176e-05, + "loss": 0.1777, + "step": 24960 + }, + { + "epoch": 1.16, + "learning_rate": 1.624535607355039e-05, + "loss": 0.1773, + "step": 24965 + }, + { + "epoch": 1.17, + "learning_rate": 1.6244572288495604e-05, + "loss": 0.2313, + "step": 24970 + }, + { + "epoch": 1.17, + "learning_rate": 1.6243788503440818e-05, + "loss": 0.2795, + "step": 24975 + }, + { + "epoch": 1.17, + "learning_rate": 1.624300471838603e-05, + "loss": 0.3885, + "step": 24980 + }, + { + "epoch": 1.17, + "learning_rate": 1.6242220933331245e-05, + "loss": 0.3908, + "step": 24985 + }, + { + "epoch": 1.17, + "learning_rate": 1.6241437148276456e-05, + "loss": 0.0605, + "step": 24990 + }, + { + "epoch": 1.17, + "learning_rate": 1.6240653363221673e-05, + "loss": 0.085, + "step": 24995 + }, + { + "epoch": 1.17, + "learning_rate": 1.6239869578166884e-05, + "loss": 0.1085, + "step": 25000 + }, + { + "epoch": 1.17, + "learning_rate": 1.6239085793112098e-05, + "loss": 0.0463, + "step": 25005 + }, + { + "epoch": 1.17, + "learning_rate": 1.623830200805731e-05, + "loss": 0.1531, + "step": 25010 + }, + { + "epoch": 1.17, + "learning_rate": 1.6237518223002526e-05, + "loss": 0.2199, + "step": 25015 + }, + { + "epoch": 1.17, + "learning_rate": 1.623673443794774e-05, + "loss": 0.2393, + "step": 25020 + }, + { + "epoch": 1.17, + "learning_rate": 1.623595065289295e-05, + "loss": 0.3733, + "step": 25025 + }, + { + "epoch": 1.17, + "learning_rate": 1.6235166867838167e-05, + "loss": 0.3225, + "step": 25030 + }, + { + "epoch": 1.17, + "learning_rate": 1.6234383082783378e-05, + "loss": 0.2117, + "step": 25035 + }, + { + "epoch": 1.17, + "learning_rate": 1.623359929772859e-05, + "loss": 0.0419, + "step": 25040 + }, + { + "epoch": 1.17, + "learning_rate": 1.6232815512673806e-05, + "loss": 0.1021, + "step": 25045 + }, + { + "epoch": 1.17, + "learning_rate": 1.623203172761902e-05, + "loss": 0.1235, + "step": 25050 + }, + { + "epoch": 1.17, + "learning_rate": 1.6231247942564233e-05, + "loss": 0.1249, + "step": 25055 + }, + { + "epoch": 1.17, + "learning_rate": 1.6230464157509447e-05, + "loss": 0.192, + "step": 25060 + }, + { + "epoch": 1.17, + "learning_rate": 1.6229680372454658e-05, + "loss": 0.1072, + "step": 25065 + }, + { + "epoch": 1.17, + "learning_rate": 1.6228896587399875e-05, + "loss": 0.1035, + "step": 25070 + }, + { + "epoch": 1.17, + "learning_rate": 1.6228112802345086e-05, + "loss": 0.2934, + "step": 25075 + }, + { + "epoch": 1.17, + "learning_rate": 1.62273290172903e-05, + "loss": 0.4639, + "step": 25080 + }, + { + "epoch": 1.17, + "learning_rate": 1.6226545232235513e-05, + "loss": 0.235, + "step": 25085 + }, + { + "epoch": 1.17, + "learning_rate": 1.6225761447180727e-05, + "loss": 0.0493, + "step": 25090 + }, + { + "epoch": 1.17, + "learning_rate": 1.622497766212594e-05, + "loss": 0.0572, + "step": 25095 + }, + { + "epoch": 1.17, + "learning_rate": 1.6224193877071152e-05, + "loss": 0.0987, + "step": 25100 + }, + { + "epoch": 1.17, + "learning_rate": 1.6223410092016366e-05, + "loss": 0.1056, + "step": 25105 + }, + { + "epoch": 1.17, + "learning_rate": 1.622262630696158e-05, + "loss": 0.1613, + "step": 25110 + }, + { + "epoch": 1.17, + "learning_rate": 1.6221842521906793e-05, + "loss": 0.1629, + "step": 25115 + }, + { + "epoch": 1.17, + "learning_rate": 1.6221058736852007e-05, + "loss": 0.2078, + "step": 25120 + }, + { + "epoch": 1.17, + "learning_rate": 1.622027495179722e-05, + "loss": 0.1947, + "step": 25125 + }, + { + "epoch": 1.17, + "learning_rate": 1.6219491166742435e-05, + "loss": 0.3467, + "step": 25130 + }, + { + "epoch": 1.17, + "learning_rate": 1.621870738168765e-05, + "loss": 0.2284, + "step": 25135 + }, + { + "epoch": 1.17, + "learning_rate": 1.621792359663286e-05, + "loss": 0.0624, + "step": 25140 + }, + { + "epoch": 1.17, + "learning_rate": 1.6217139811578074e-05, + "loss": 0.0692, + "step": 25145 + }, + { + "epoch": 1.17, + "learning_rate": 1.6216356026523287e-05, + "loss": 0.1222, + "step": 25150 + }, + { + "epoch": 1.17, + "learning_rate": 1.62155722414685e-05, + "loss": 0.1692, + "step": 25155 + }, + { + "epoch": 1.17, + "learning_rate": 1.6214788456413715e-05, + "loss": 0.1629, + "step": 25160 + }, + { + "epoch": 1.17, + "learning_rate": 1.6214004671358926e-05, + "loss": 0.1671, + "step": 25165 + }, + { + "epoch": 1.17, + "learning_rate": 1.6213220886304143e-05, + "loss": 0.2507, + "step": 25170 + }, + { + "epoch": 1.17, + "learning_rate": 1.6212437101249354e-05, + "loss": 0.2868, + "step": 25175 + }, + { + "epoch": 1.17, + "learning_rate": 1.6211653316194567e-05, + "loss": 0.3525, + "step": 25180 + }, + { + "epoch": 1.18, + "learning_rate": 1.621086953113978e-05, + "loss": 0.3272, + "step": 25185 + }, + { + "epoch": 1.18, + "learning_rate": 1.6210085746084995e-05, + "loss": 0.0452, + "step": 25190 + }, + { + "epoch": 1.18, + "learning_rate": 1.620930196103021e-05, + "loss": 0.0713, + "step": 25195 + }, + { + "epoch": 1.18, + "learning_rate": 1.6208518175975423e-05, + "loss": 0.0912, + "step": 25200 + }, + { + "epoch": 1.18, + "learning_rate": 1.6207734390920634e-05, + "loss": 0.1543, + "step": 25205 + }, + { + "epoch": 1.18, + "learning_rate": 1.6206950605865848e-05, + "loss": 0.1041, + "step": 25210 + }, + { + "epoch": 1.18, + "learning_rate": 1.620616682081106e-05, + "loss": 0.141, + "step": 25215 + }, + { + "epoch": 1.18, + "learning_rate": 1.6205383035756275e-05, + "loss": 0.1966, + "step": 25220 + }, + { + "epoch": 1.18, + "learning_rate": 1.620459925070149e-05, + "loss": 0.3014, + "step": 25225 + }, + { + "epoch": 1.18, + "learning_rate": 1.6203815465646703e-05, + "loss": 0.4792, + "step": 25230 + }, + { + "epoch": 1.18, + "learning_rate": 1.6203031680591917e-05, + "loss": 0.2327, + "step": 25235 + }, + { + "epoch": 1.18, + "learning_rate": 1.6202247895537128e-05, + "loss": 0.0362, + "step": 25240 + }, + { + "epoch": 1.18, + "learning_rate": 1.6201464110482345e-05, + "loss": 0.1217, + "step": 25245 + }, + { + "epoch": 1.18, + "learning_rate": 1.6200680325427555e-05, + "loss": 0.0556, + "step": 25250 + }, + { + "epoch": 1.18, + "learning_rate": 1.619989654037277e-05, + "loss": 0.1454, + "step": 25255 + }, + { + "epoch": 1.18, + "learning_rate": 1.6199112755317983e-05, + "loss": 0.1041, + "step": 25260 + }, + { + "epoch": 1.18, + "learning_rate": 1.6198328970263197e-05, + "loss": 0.1451, + "step": 25265 + }, + { + "epoch": 1.18, + "learning_rate": 1.619754518520841e-05, + "loss": 0.2237, + "step": 25270 + }, + { + "epoch": 1.18, + "learning_rate": 1.619691815716458e-05, + "loss": 0.2745, + "step": 25275 + }, + { + "epoch": 1.18, + "learning_rate": 1.6196134372109792e-05, + "loss": 0.4215, + "step": 25280 + }, + { + "epoch": 1.18, + "learning_rate": 1.619535058705501e-05, + "loss": 0.481, + "step": 25285 + }, + { + "epoch": 1.18, + "learning_rate": 1.619456680200022e-05, + "loss": 0.0337, + "step": 25290 + }, + { + "epoch": 1.18, + "learning_rate": 1.6193783016945434e-05, + "loss": 0.102, + "step": 25295 + }, + { + "epoch": 1.18, + "learning_rate": 1.6192999231890648e-05, + "loss": 0.0853, + "step": 25300 + }, + { + "epoch": 1.18, + "learning_rate": 1.619221544683586e-05, + "loss": 0.1298, + "step": 25305 + }, + { + "epoch": 1.18, + "learning_rate": 1.6191431661781075e-05, + "loss": 0.151, + "step": 25310 + }, + { + "epoch": 1.18, + "learning_rate": 1.619064787672629e-05, + "loss": 0.1743, + "step": 25315 + }, + { + "epoch": 1.18, + "learning_rate": 1.61898640916715e-05, + "loss": 0.247, + "step": 25320 + }, + { + "epoch": 1.18, + "learning_rate": 1.6189080306616717e-05, + "loss": 0.2403, + "step": 25325 + }, + { + "epoch": 1.18, + "learning_rate": 1.6188296521561928e-05, + "loss": 0.4411, + "step": 25330 + }, + { + "epoch": 1.18, + "learning_rate": 1.618751273650714e-05, + "loss": 0.2575, + "step": 25335 + }, + { + "epoch": 1.18, + "learning_rate": 1.6186728951452355e-05, + "loss": 0.0465, + "step": 25340 + }, + { + "epoch": 1.18, + "learning_rate": 1.6185945166397566e-05, + "loss": 0.0688, + "step": 25345 + }, + { + "epoch": 1.18, + "learning_rate": 1.6185161381342783e-05, + "loss": 0.1129, + "step": 25350 + }, + { + "epoch": 1.18, + "learning_rate": 1.6184377596287994e-05, + "loss": 0.1276, + "step": 25355 + }, + { + "epoch": 1.18, + "learning_rate": 1.6183593811233208e-05, + "loss": 0.0864, + "step": 25360 + }, + { + "epoch": 1.18, + "learning_rate": 1.618281002617842e-05, + "loss": 0.1639, + "step": 25365 + }, + { + "epoch": 1.18, + "learning_rate": 1.6182026241123636e-05, + "loss": 0.1556, + "step": 25370 + }, + { + "epoch": 1.18, + "learning_rate": 1.618124245606885e-05, + "loss": 0.3036, + "step": 25375 + }, + { + "epoch": 1.18, + "learning_rate": 1.6180458671014063e-05, + "loss": 0.3818, + "step": 25380 + }, + { + "epoch": 1.18, + "learning_rate": 1.6179674885959277e-05, + "loss": 0.2293, + "step": 25385 + }, + { + "epoch": 1.18, + "learning_rate": 1.617889110090449e-05, + "loss": 0.0748, + "step": 25390 + }, + { + "epoch": 1.18, + "learning_rate": 1.61781073158497e-05, + "loss": 0.1107, + "step": 25395 + }, + { + "epoch": 1.19, + "learning_rate": 1.6177323530794916e-05, + "loss": 0.166, + "step": 25400 + }, + { + "epoch": 1.19, + "learning_rate": 1.617653974574013e-05, + "loss": 0.0938, + "step": 25405 + }, + { + "epoch": 1.19, + "learning_rate": 1.6175755960685343e-05, + "loss": 0.1765, + "step": 25410 + }, + { + "epoch": 1.19, + "learning_rate": 1.6174972175630557e-05, + "loss": 0.1421, + "step": 25415 + }, + { + "epoch": 1.19, + "learning_rate": 1.6174188390575768e-05, + "loss": 0.169, + "step": 25420 + }, + { + "epoch": 1.19, + "learning_rate": 1.6173404605520985e-05, + "loss": 0.2901, + "step": 25425 + }, + { + "epoch": 1.19, + "learning_rate": 1.6172620820466196e-05, + "loss": 0.4629, + "step": 25430 + }, + { + "epoch": 1.19, + "learning_rate": 1.617183703541141e-05, + "loss": 0.4004, + "step": 25435 + }, + { + "epoch": 1.19, + "learning_rate": 1.6171053250356623e-05, + "loss": 0.0532, + "step": 25440 + }, + { + "epoch": 1.19, + "learning_rate": 1.6170269465301837e-05, + "loss": 0.0672, + "step": 25445 + }, + { + "epoch": 1.19, + "learning_rate": 1.616948568024705e-05, + "loss": 0.1277, + "step": 25450 + }, + { + "epoch": 1.19, + "learning_rate": 1.6168701895192265e-05, + "loss": 0.0931, + "step": 25455 + }, + { + "epoch": 1.19, + "learning_rate": 1.6167918110137476e-05, + "loss": 0.1628, + "step": 25460 + }, + { + "epoch": 1.19, + "learning_rate": 1.616713432508269e-05, + "loss": 0.1642, + "step": 25465 + }, + { + "epoch": 1.19, + "learning_rate": 1.6166350540027903e-05, + "loss": 0.1649, + "step": 25470 + }, + { + "epoch": 1.19, + "learning_rate": 1.6165566754973117e-05, + "loss": 0.2531, + "step": 25475 + }, + { + "epoch": 1.19, + "learning_rate": 1.616478296991833e-05, + "loss": 0.4141, + "step": 25480 + }, + { + "epoch": 1.19, + "learning_rate": 1.6163999184863545e-05, + "loss": 0.3355, + "step": 25485 + }, + { + "epoch": 1.19, + "learning_rate": 1.616321539980876e-05, + "loss": 0.0631, + "step": 25490 + }, + { + "epoch": 1.19, + "learning_rate": 1.616243161475397e-05, + "loss": 0.0915, + "step": 25495 + }, + { + "epoch": 1.19, + "learning_rate": 1.6161647829699187e-05, + "loss": 0.1369, + "step": 25500 + }, + { + "epoch": 1.19, + "learning_rate": 1.6160864044644397e-05, + "loss": 0.1394, + "step": 25505 + }, + { + "epoch": 1.19, + "learning_rate": 1.616008025958961e-05, + "loss": 0.096, + "step": 25510 + }, + { + "epoch": 1.19, + "learning_rate": 1.6159296474534825e-05, + "loss": 0.1094, + "step": 25515 + }, + { + "epoch": 1.19, + "learning_rate": 1.615851268948004e-05, + "loss": 0.1217, + "step": 25520 + }, + { + "epoch": 1.19, + "learning_rate": 1.6157728904425253e-05, + "loss": 0.2451, + "step": 25525 + }, + { + "epoch": 1.19, + "learning_rate": 1.6156945119370464e-05, + "loss": 0.2088, + "step": 25530 + }, + { + "epoch": 1.19, + "learning_rate": 1.6156161334315677e-05, + "loss": 0.2696, + "step": 25535 + }, + { + "epoch": 1.19, + "learning_rate": 1.615537754926089e-05, + "loss": 0.0968, + "step": 25540 + }, + { + "epoch": 1.19, + "learning_rate": 1.6154593764206105e-05, + "loss": 0.0515, + "step": 25545 + }, + { + "epoch": 1.19, + "learning_rate": 1.615380997915132e-05, + "loss": 0.0912, + "step": 25550 + }, + { + "epoch": 1.19, + "learning_rate": 1.6153026194096533e-05, + "loss": 0.1158, + "step": 25555 + }, + { + "epoch": 1.19, + "learning_rate": 1.6152242409041744e-05, + "loss": 0.1445, + "step": 25560 + }, + { + "epoch": 1.19, + "learning_rate": 1.615145862398696e-05, + "loss": 0.1769, + "step": 25565 + }, + { + "epoch": 1.19, + "learning_rate": 1.615067483893217e-05, + "loss": 0.1704, + "step": 25570 + }, + { + "epoch": 1.19, + "learning_rate": 1.6149891053877385e-05, + "loss": 0.2812, + "step": 25575 + }, + { + "epoch": 1.19, + "learning_rate": 1.61491072688226e-05, + "loss": 0.3944, + "step": 25580 + }, + { + "epoch": 1.19, + "learning_rate": 1.6148323483767813e-05, + "loss": 0.2521, + "step": 25585 + }, + { + "epoch": 1.19, + "learning_rate": 1.6147539698713027e-05, + "loss": 0.0945, + "step": 25590 + }, + { + "epoch": 1.19, + "learning_rate": 1.6146755913658238e-05, + "loss": 0.1083, + "step": 25595 + }, + { + "epoch": 1.19, + "learning_rate": 1.6145972128603455e-05, + "loss": 0.0934, + "step": 25600 + }, + { + "epoch": 1.19, + "learning_rate": 1.6145188343548665e-05, + "loss": 0.1016, + "step": 25605 + }, + { + "epoch": 1.19, + "learning_rate": 1.614440455849388e-05, + "loss": 0.1465, + "step": 25610 + }, + { + "epoch": 1.2, + "learning_rate": 1.6143620773439093e-05, + "loss": 0.1531, + "step": 25615 + }, + { + "epoch": 1.2, + "learning_rate": 1.6142836988384307e-05, + "loss": 0.1764, + "step": 25620 + }, + { + "epoch": 1.2, + "learning_rate": 1.614205320332952e-05, + "loss": 0.2381, + "step": 25625 + }, + { + "epoch": 1.2, + "learning_rate": 1.6141269418274735e-05, + "loss": 0.4556, + "step": 25630 + }, + { + "epoch": 1.2, + "learning_rate": 1.6140485633219945e-05, + "loss": 0.3126, + "step": 25635 + }, + { + "epoch": 1.2, + "learning_rate": 1.6139701848165163e-05, + "loss": 0.0332, + "step": 25640 + }, + { + "epoch": 1.2, + "learning_rate": 1.6138918063110373e-05, + "loss": 0.0406, + "step": 25645 + }, + { + "epoch": 1.2, + "learning_rate": 1.6138134278055587e-05, + "loss": 0.1147, + "step": 25650 + }, + { + "epoch": 1.2, + "learning_rate": 1.61373504930008e-05, + "loss": 0.1279, + "step": 25655 + }, + { + "epoch": 1.2, + "learning_rate": 1.613656670794601e-05, + "loss": 0.1488, + "step": 25660 + }, + { + "epoch": 1.2, + "learning_rate": 1.613578292289123e-05, + "loss": 0.0947, + "step": 25665 + }, + { + "epoch": 1.2, + "learning_rate": 1.613499913783644e-05, + "loss": 0.147, + "step": 25670 + }, + { + "epoch": 1.2, + "learning_rate": 1.6134215352781653e-05, + "loss": 0.2196, + "step": 25675 + }, + { + "epoch": 1.2, + "learning_rate": 1.6133431567726867e-05, + "loss": 0.3325, + "step": 25680 + }, + { + "epoch": 1.2, + "learning_rate": 1.613264778267208e-05, + "loss": 0.3548, + "step": 25685 + }, + { + "epoch": 1.2, + "learning_rate": 1.6131863997617295e-05, + "loss": 0.0497, + "step": 25690 + }, + { + "epoch": 1.2, + "learning_rate": 1.613108021256251e-05, + "loss": 0.0752, + "step": 25695 + }, + { + "epoch": 1.2, + "learning_rate": 1.6130296427507723e-05, + "loss": 0.0716, + "step": 25700 + }, + { + "epoch": 1.2, + "learning_rate": 1.6129512642452937e-05, + "loss": 0.1445, + "step": 25705 + }, + { + "epoch": 1.2, + "learning_rate": 1.6128728857398147e-05, + "loss": 0.1371, + "step": 25710 + }, + { + "epoch": 1.2, + "learning_rate": 1.6128101829354318e-05, + "loss": 0.1388, + "step": 25715 + }, + { + "epoch": 1.2, + "learning_rate": 1.6127318044299535e-05, + "loss": 0.1818, + "step": 25720 + }, + { + "epoch": 1.2, + "learning_rate": 1.6126534259244745e-05, + "loss": 0.1747, + "step": 25725 + }, + { + "epoch": 1.2, + "learning_rate": 1.612575047418996e-05, + "loss": 0.2925, + "step": 25730 + }, + { + "epoch": 1.2, + "learning_rate": 1.6124966689135173e-05, + "loss": 0.212, + "step": 25735 + }, + { + "epoch": 1.2, + "learning_rate": 1.6124182904080384e-05, + "loss": 0.07, + "step": 25740 + }, + { + "epoch": 1.2, + "learning_rate": 1.61233991190256e-05, + "loss": 0.0871, + "step": 25745 + }, + { + "epoch": 1.2, + "learning_rate": 1.612261533397081e-05, + "loss": 0.1572, + "step": 25750 + }, + { + "epoch": 1.2, + "learning_rate": 1.6121831548916026e-05, + "loss": 0.1053, + "step": 25755 + }, + { + "epoch": 1.2, + "learning_rate": 1.612104776386124e-05, + "loss": 0.1303, + "step": 25760 + }, + { + "epoch": 1.2, + "learning_rate": 1.6120263978806453e-05, + "loss": 0.2018, + "step": 25765 + }, + { + "epoch": 1.2, + "learning_rate": 1.6119480193751667e-05, + "loss": 0.2664, + "step": 25770 + }, + { + "epoch": 1.2, + "learning_rate": 1.611869640869688e-05, + "loss": 0.2745, + "step": 25775 + }, + { + "epoch": 1.2, + "learning_rate": 1.6117912623642095e-05, + "loss": 0.4014, + "step": 25780 + }, + { + "epoch": 1.2, + "learning_rate": 1.611712883858731e-05, + "loss": 0.2742, + "step": 25785 + }, + { + "epoch": 1.2, + "learning_rate": 1.611634505353252e-05, + "loss": 0.094, + "step": 25790 + }, + { + "epoch": 1.2, + "learning_rate": 1.6115561268477733e-05, + "loss": 0.1958, + "step": 25795 + }, + { + "epoch": 1.2, + "learning_rate": 1.6114777483422947e-05, + "loss": 0.0788, + "step": 25800 + }, + { + "epoch": 1.2, + "learning_rate": 1.611399369836816e-05, + "loss": 0.106, + "step": 25805 + }, + { + "epoch": 1.2, + "learning_rate": 1.6113209913313375e-05, + "loss": 0.1569, + "step": 25810 + }, + { + "epoch": 1.2, + "learning_rate": 1.6112426128258586e-05, + "loss": 0.126, + "step": 25815 + }, + { + "epoch": 1.2, + "learning_rate": 1.6111642343203803e-05, + "loss": 0.1446, + "step": 25820 + }, + { + "epoch": 1.21, + "learning_rate": 1.6110858558149013e-05, + "loss": 0.3583, + "step": 25825 + }, + { + "epoch": 1.21, + "learning_rate": 1.6110074773094227e-05, + "loss": 0.3009, + "step": 25830 + }, + { + "epoch": 1.21, + "learning_rate": 1.610929098803944e-05, + "loss": 0.3158, + "step": 25835 + }, + { + "epoch": 1.21, + "learning_rate": 1.6108507202984655e-05, + "loss": 0.1105, + "step": 25840 + }, + { + "epoch": 1.21, + "learning_rate": 1.610772341792987e-05, + "loss": 0.0567, + "step": 25845 + }, + { + "epoch": 1.21, + "learning_rate": 1.6106939632875083e-05, + "loss": 0.0254, + "step": 25850 + }, + { + "epoch": 1.21, + "learning_rate": 1.6106155847820293e-05, + "loss": 0.1127, + "step": 25855 + }, + { + "epoch": 1.21, + "learning_rate": 1.6105372062765507e-05, + "loss": 0.165, + "step": 25860 + }, + { + "epoch": 1.21, + "learning_rate": 1.610458827771072e-05, + "loss": 0.1691, + "step": 25865 + }, + { + "epoch": 1.21, + "learning_rate": 1.6103804492655935e-05, + "loss": 0.1748, + "step": 25870 + }, + { + "epoch": 1.21, + "learning_rate": 1.610302070760115e-05, + "loss": 0.3087, + "step": 25875 + }, + { + "epoch": 1.21, + "learning_rate": 1.6102236922546363e-05, + "loss": 0.3935, + "step": 25880 + }, + { + "epoch": 1.21, + "learning_rate": 1.6101453137491577e-05, + "loss": 0.3608, + "step": 25885 + }, + { + "epoch": 1.21, + "learning_rate": 1.6100669352436787e-05, + "loss": 0.0722, + "step": 25890 + }, + { + "epoch": 1.21, + "learning_rate": 1.6099885567382005e-05, + "loss": 0.1239, + "step": 25895 + }, + { + "epoch": 1.21, + "learning_rate": 1.6099101782327215e-05, + "loss": 0.0837, + "step": 25900 + }, + { + "epoch": 1.21, + "learning_rate": 1.609831799727243e-05, + "loss": 0.1534, + "step": 25905 + }, + { + "epoch": 1.21, + "learning_rate": 1.6097534212217643e-05, + "loss": 0.1598, + "step": 25910 + }, + { + "epoch": 1.21, + "learning_rate": 1.6096750427162857e-05, + "loss": 0.1712, + "step": 25915 + }, + { + "epoch": 1.21, + "learning_rate": 1.609596664210807e-05, + "loss": 0.1853, + "step": 25920 + }, + { + "epoch": 1.21, + "learning_rate": 1.609518285705328e-05, + "loss": 0.2454, + "step": 25925 + }, + { + "epoch": 1.21, + "learning_rate": 1.6094399071998495e-05, + "loss": 0.303, + "step": 25930 + }, + { + "epoch": 1.21, + "learning_rate": 1.609361528694371e-05, + "loss": 0.3649, + "step": 25935 + }, + { + "epoch": 1.21, + "learning_rate": 1.6092831501888923e-05, + "loss": 0.0425, + "step": 25940 + }, + { + "epoch": 1.21, + "learning_rate": 1.6092047716834137e-05, + "loss": 0.1058, + "step": 25945 + }, + { + "epoch": 1.21, + "learning_rate": 1.609126393177935e-05, + "loss": 0.0823, + "step": 25950 + }, + { + "epoch": 1.21, + "learning_rate": 1.609048014672456e-05, + "loss": 0.0905, + "step": 25955 + }, + { + "epoch": 1.21, + "learning_rate": 1.608969636166978e-05, + "loss": 0.1479, + "step": 25960 + }, + { + "epoch": 1.21, + "learning_rate": 1.608891257661499e-05, + "loss": 0.1194, + "step": 25965 + }, + { + "epoch": 1.21, + "learning_rate": 1.6088128791560203e-05, + "loss": 0.2551, + "step": 25970 + }, + { + "epoch": 1.21, + "learning_rate": 1.6087345006505417e-05, + "loss": 0.2567, + "step": 25975 + }, + { + "epoch": 1.21, + "learning_rate": 1.608656122145063e-05, + "loss": 0.4998, + "step": 25980 + }, + { + "epoch": 1.21, + "learning_rate": 1.6085777436395845e-05, + "loss": 0.3416, + "step": 25985 + }, + { + "epoch": 1.21, + "learning_rate": 1.6084993651341055e-05, + "loss": 0.0866, + "step": 25990 + }, + { + "epoch": 1.21, + "learning_rate": 1.6084209866286273e-05, + "loss": 0.1128, + "step": 25995 + }, + { + "epoch": 1.21, + "learning_rate": 1.6083426081231483e-05, + "loss": 0.1394, + "step": 26000 + }, + { + "epoch": 1.21, + "learning_rate": 1.6082642296176697e-05, + "loss": 0.163, + "step": 26005 + }, + { + "epoch": 1.21, + "learning_rate": 1.608185851112191e-05, + "loss": 0.1127, + "step": 26010 + }, + { + "epoch": 1.21, + "learning_rate": 1.6081074726067125e-05, + "loss": 0.1923, + "step": 26015 + }, + { + "epoch": 1.21, + "learning_rate": 1.608029094101234e-05, + "loss": 0.1738, + "step": 26020 + }, + { + "epoch": 1.21, + "learning_rate": 1.6079507155957553e-05, + "loss": 0.2733, + "step": 26025 + }, + { + "epoch": 1.21, + "learning_rate": 1.6078723370902763e-05, + "loss": 0.3812, + "step": 26030 + }, + { + "epoch": 1.21, + "learning_rate": 1.607793958584798e-05, + "loss": 0.2911, + "step": 26035 + }, + { + "epoch": 1.22, + "learning_rate": 1.607715580079319e-05, + "loss": 0.0426, + "step": 26040 + }, + { + "epoch": 1.22, + "learning_rate": 1.6076372015738405e-05, + "loss": 0.1156, + "step": 26045 + }, + { + "epoch": 1.22, + "learning_rate": 1.607558823068362e-05, + "loss": 0.1092, + "step": 26050 + }, + { + "epoch": 1.22, + "learning_rate": 1.607480444562883e-05, + "loss": 0.1071, + "step": 26055 + }, + { + "epoch": 1.22, + "learning_rate": 1.6074020660574047e-05, + "loss": 0.1825, + "step": 26060 + }, + { + "epoch": 1.22, + "learning_rate": 1.6073236875519257e-05, + "loss": 0.2019, + "step": 26065 + }, + { + "epoch": 1.22, + "learning_rate": 1.607245309046447e-05, + "loss": 0.2312, + "step": 26070 + }, + { + "epoch": 1.22, + "learning_rate": 1.6071669305409685e-05, + "loss": 0.1766, + "step": 26075 + }, + { + "epoch": 1.22, + "learning_rate": 1.60708855203549e-05, + "loss": 0.3091, + "step": 26080 + }, + { + "epoch": 1.22, + "learning_rate": 1.6070101735300113e-05, + "loss": 0.35, + "step": 26085 + }, + { + "epoch": 1.22, + "learning_rate": 1.6069317950245327e-05, + "loss": 0.0862, + "step": 26090 + }, + { + "epoch": 1.22, + "learning_rate": 1.606853416519054e-05, + "loss": 0.0783, + "step": 26095 + }, + { + "epoch": 1.22, + "learning_rate": 1.6067750380135755e-05, + "loss": 0.0974, + "step": 26100 + }, + { + "epoch": 1.22, + "learning_rate": 1.6066966595080965e-05, + "loss": 0.0428, + "step": 26105 + }, + { + "epoch": 1.22, + "learning_rate": 1.6066182810026182e-05, + "loss": 0.1272, + "step": 26110 + }, + { + "epoch": 1.22, + "learning_rate": 1.6065399024971393e-05, + "loss": 0.1728, + "step": 26115 + }, + { + "epoch": 1.22, + "learning_rate": 1.6064615239916607e-05, + "loss": 0.2236, + "step": 26120 + }, + { + "epoch": 1.22, + "learning_rate": 1.606383145486182e-05, + "loss": 0.3677, + "step": 26125 + }, + { + "epoch": 1.22, + "learning_rate": 1.606304766980703e-05, + "loss": 0.4215, + "step": 26130 + }, + { + "epoch": 1.22, + "learning_rate": 1.606226388475225e-05, + "loss": 0.3085, + "step": 26135 + }, + { + "epoch": 1.22, + "learning_rate": 1.606148009969746e-05, + "loss": 0.0464, + "step": 26140 + }, + { + "epoch": 1.22, + "learning_rate": 1.6060696314642673e-05, + "loss": 0.0697, + "step": 26145 + }, + { + "epoch": 1.22, + "learning_rate": 1.6059912529587887e-05, + "loss": 0.1091, + "step": 26150 + }, + { + "epoch": 1.22, + "learning_rate": 1.60591287445331e-05, + "loss": 0.1073, + "step": 26155 + }, + { + "epoch": 1.22, + "learning_rate": 1.6058344959478315e-05, + "loss": 0.1585, + "step": 26160 + }, + { + "epoch": 1.22, + "learning_rate": 1.605756117442353e-05, + "loss": 0.1376, + "step": 26165 + }, + { + "epoch": 1.22, + "learning_rate": 1.605677738936874e-05, + "loss": 0.2462, + "step": 26170 + }, + { + "epoch": 1.22, + "learning_rate": 1.6055993604313956e-05, + "loss": 0.1889, + "step": 26175 + }, + { + "epoch": 1.22, + "learning_rate": 1.6055209819259167e-05, + "loss": 0.3838, + "step": 26180 + }, + { + "epoch": 1.22, + "learning_rate": 1.605442603420438e-05, + "loss": 0.3044, + "step": 26185 + }, + { + "epoch": 1.22, + "learning_rate": 1.6053642249149595e-05, + "loss": 0.0562, + "step": 26190 + }, + { + "epoch": 1.22, + "learning_rate": 1.605285846409481e-05, + "loss": 0.0978, + "step": 26195 + }, + { + "epoch": 1.22, + "learning_rate": 1.6052074679040022e-05, + "loss": 0.1215, + "step": 26200 + }, + { + "epoch": 1.22, + "learning_rate": 1.6051290893985233e-05, + "loss": 0.1945, + "step": 26205 + }, + { + "epoch": 1.22, + "learning_rate": 1.605050710893045e-05, + "loss": 0.1485, + "step": 26210 + }, + { + "epoch": 1.22, + "learning_rate": 1.604972332387566e-05, + "loss": 0.0891, + "step": 26215 + }, + { + "epoch": 1.22, + "learning_rate": 1.6048939538820875e-05, + "loss": 0.1399, + "step": 26220 + }, + { + "epoch": 1.22, + "learning_rate": 1.604815575376609e-05, + "loss": 0.205, + "step": 26225 + }, + { + "epoch": 1.22, + "learning_rate": 1.6047371968711303e-05, + "loss": 0.3367, + "step": 26230 + }, + { + "epoch": 1.22, + "learning_rate": 1.6046588183656516e-05, + "loss": 0.2997, + "step": 26235 + }, + { + "epoch": 1.22, + "learning_rate": 1.604580439860173e-05, + "loss": 0.0592, + "step": 26240 + }, + { + "epoch": 1.22, + "learning_rate": 1.604502061354694e-05, + "loss": 0.0551, + "step": 26245 + }, + { + "epoch": 1.22, + "learning_rate": 1.6044236828492155e-05, + "loss": 0.0715, + "step": 26250 + }, + { + "epoch": 1.23, + "learning_rate": 1.604345304343737e-05, + "loss": 0.1212, + "step": 26255 + }, + { + "epoch": 1.23, + "learning_rate": 1.6042669258382583e-05, + "loss": 0.1241, + "step": 26260 + }, + { + "epoch": 1.23, + "learning_rate": 1.6041885473327796e-05, + "loss": 0.1764, + "step": 26265 + }, + { + "epoch": 1.23, + "learning_rate": 1.6041101688273007e-05, + "loss": 0.1995, + "step": 26270 + }, + { + "epoch": 1.23, + "learning_rate": 1.6040317903218224e-05, + "loss": 0.2772, + "step": 26275 + }, + { + "epoch": 1.23, + "learning_rate": 1.6039534118163435e-05, + "loss": 0.3426, + "step": 26280 + }, + { + "epoch": 1.23, + "learning_rate": 1.603875033310865e-05, + "loss": 0.2971, + "step": 26285 + }, + { + "epoch": 1.23, + "learning_rate": 1.6037966548053863e-05, + "loss": 0.065, + "step": 26290 + }, + { + "epoch": 1.23, + "learning_rate": 1.6037182762999077e-05, + "loss": 0.0287, + "step": 26295 + }, + { + "epoch": 1.23, + "learning_rate": 1.603639897794429e-05, + "loss": 0.1137, + "step": 26300 + }, + { + "epoch": 1.23, + "learning_rate": 1.6035615192889504e-05, + "loss": 0.1516, + "step": 26305 + }, + { + "epoch": 1.23, + "learning_rate": 1.6034831407834718e-05, + "loss": 0.0944, + "step": 26310 + }, + { + "epoch": 1.23, + "learning_rate": 1.603404762277993e-05, + "loss": 0.1105, + "step": 26315 + }, + { + "epoch": 1.23, + "learning_rate": 1.6033263837725143e-05, + "loss": 0.1635, + "step": 26320 + }, + { + "epoch": 1.23, + "learning_rate": 1.6032480052670357e-05, + "loss": 0.2127, + "step": 26325 + }, + { + "epoch": 1.23, + "learning_rate": 1.603169626761557e-05, + "loss": 0.4454, + "step": 26330 + }, + { + "epoch": 1.23, + "learning_rate": 1.6030912482560784e-05, + "loss": 0.3167, + "step": 26335 + }, + { + "epoch": 1.23, + "learning_rate": 1.6030128697505998e-05, + "loss": 0.0731, + "step": 26340 + }, + { + "epoch": 1.23, + "learning_rate": 1.602934491245121e-05, + "loss": 0.0499, + "step": 26345 + }, + { + "epoch": 1.23, + "learning_rate": 1.6028561127396426e-05, + "loss": 0.097, + "step": 26350 + }, + { + "epoch": 1.23, + "learning_rate": 1.6027777342341637e-05, + "loss": 0.0544, + "step": 26355 + }, + { + "epoch": 1.23, + "learning_rate": 1.602699355728685e-05, + "loss": 0.1498, + "step": 26360 + }, + { + "epoch": 1.23, + "learning_rate": 1.6026209772232064e-05, + "loss": 0.1206, + "step": 26365 + }, + { + "epoch": 1.23, + "learning_rate": 1.602542598717728e-05, + "loss": 0.2232, + "step": 26370 + }, + { + "epoch": 1.23, + "learning_rate": 1.6024642202122492e-05, + "loss": 0.3615, + "step": 26375 + }, + { + "epoch": 1.23, + "learning_rate": 1.6023858417067703e-05, + "loss": 0.3271, + "step": 26380 + }, + { + "epoch": 1.23, + "learning_rate": 1.6023074632012917e-05, + "loss": 0.3493, + "step": 26385 + }, + { + "epoch": 1.23, + "learning_rate": 1.602229084695813e-05, + "loss": 0.0708, + "step": 26390 + }, + { + "epoch": 1.23, + "learning_rate": 1.6021507061903344e-05, + "loss": 0.124, + "step": 26395 + }, + { + "epoch": 1.23, + "learning_rate": 1.602072327684856e-05, + "loss": 0.0928, + "step": 26400 + }, + { + "epoch": 1.23, + "learning_rate": 1.6019939491793772e-05, + "loss": 0.0961, + "step": 26405 + }, + { + "epoch": 1.23, + "learning_rate": 1.6019155706738986e-05, + "loss": 0.1516, + "step": 26410 + }, + { + "epoch": 1.23, + "learning_rate": 1.60183719216842e-05, + "loss": 0.1803, + "step": 26415 + }, + { + "epoch": 1.23, + "learning_rate": 1.601758813662941e-05, + "loss": 0.1668, + "step": 26420 + }, + { + "epoch": 1.23, + "learning_rate": 1.6016804351574628e-05, + "loss": 0.2315, + "step": 26425 + }, + { + "epoch": 1.23, + "learning_rate": 1.601602056651984e-05, + "loss": 0.353, + "step": 26430 + }, + { + "epoch": 1.23, + "learning_rate": 1.6015236781465052e-05, + "loss": 0.319, + "step": 26435 + }, + { + "epoch": 1.23, + "learning_rate": 1.6014452996410266e-05, + "loss": 0.0558, + "step": 26440 + }, + { + "epoch": 1.23, + "learning_rate": 1.6013669211355477e-05, + "loss": 0.1412, + "step": 26445 + }, + { + "epoch": 1.23, + "learning_rate": 1.6012885426300694e-05, + "loss": 0.0911, + "step": 26450 + }, + { + "epoch": 1.23, + "learning_rate": 1.6012101641245905e-05, + "loss": 0.0855, + "step": 26455 + }, + { + "epoch": 1.23, + "learning_rate": 1.601131785619112e-05, + "loss": 0.1342, + "step": 26460 + }, + { + "epoch": 1.23, + "learning_rate": 1.6010534071136332e-05, + "loss": 0.1502, + "step": 26465 + }, + { + "epoch": 1.24, + "learning_rate": 1.6009750286081546e-05, + "loss": 0.2149, + "step": 26470 + }, + { + "epoch": 1.24, + "learning_rate": 1.600896650102676e-05, + "loss": 0.1451, + "step": 26475 + }, + { + "epoch": 1.24, + "learning_rate": 1.6008182715971974e-05, + "loss": 0.38, + "step": 26480 + }, + { + "epoch": 1.24, + "learning_rate": 1.6007398930917185e-05, + "loss": 0.5451, + "step": 26485 + }, + { + "epoch": 1.24, + "learning_rate": 1.6006615145862402e-05, + "loss": 0.0726, + "step": 26490 + }, + { + "epoch": 1.24, + "learning_rate": 1.6005831360807612e-05, + "loss": 0.0716, + "step": 26495 + }, + { + "epoch": 1.24, + "learning_rate": 1.6005047575752826e-05, + "loss": 0.084, + "step": 26500 + }, + { + "epoch": 1.24, + "learning_rate": 1.600426379069804e-05, + "loss": 0.1345, + "step": 26505 + }, + { + "epoch": 1.24, + "learning_rate": 1.6003480005643254e-05, + "loss": 0.2791, + "step": 26510 + }, + { + "epoch": 1.24, + "learning_rate": 1.6002696220588468e-05, + "loss": 0.1281, + "step": 26515 + }, + { + "epoch": 1.24, + "learning_rate": 1.600191243553368e-05, + "loss": 0.2016, + "step": 26520 + }, + { + "epoch": 1.24, + "learning_rate": 1.6001128650478896e-05, + "loss": 0.3165, + "step": 26525 + }, + { + "epoch": 1.24, + "learning_rate": 1.6000344865424106e-05, + "loss": 0.336, + "step": 26530 + }, + { + "epoch": 1.24, + "learning_rate": 1.599956108036932e-05, + "loss": 0.3234, + "step": 26535 + }, + { + "epoch": 1.24, + "learning_rate": 1.5998777295314534e-05, + "loss": 0.1082, + "step": 26540 + }, + { + "epoch": 1.24, + "learning_rate": 1.5997993510259748e-05, + "loss": 0.0455, + "step": 26545 + }, + { + "epoch": 1.24, + "learning_rate": 1.5997209725204962e-05, + "loss": 0.1116, + "step": 26550 + }, + { + "epoch": 1.24, + "learning_rate": 1.5996425940150176e-05, + "loss": 0.0805, + "step": 26555 + }, + { + "epoch": 1.24, + "learning_rate": 1.5995642155095386e-05, + "loss": 0.1438, + "step": 26560 + }, + { + "epoch": 1.24, + "learning_rate": 1.5994858370040604e-05, + "loss": 0.1412, + "step": 26565 + }, + { + "epoch": 1.24, + "learning_rate": 1.5994074584985814e-05, + "loss": 0.2316, + "step": 26570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5993290799931028e-05, + "loss": 0.2124, + "step": 26575 + }, + { + "epoch": 1.24, + "learning_rate": 1.5992507014876242e-05, + "loss": 0.3504, + "step": 26580 + }, + { + "epoch": 1.24, + "learning_rate": 1.5991723229821453e-05, + "loss": 0.3047, + "step": 26585 + }, + { + "epoch": 1.24, + "learning_rate": 1.599093944476667e-05, + "loss": 0.0622, + "step": 26590 + }, + { + "epoch": 1.24, + "learning_rate": 1.599015565971188e-05, + "loss": 0.0547, + "step": 26595 + }, + { + "epoch": 1.24, + "learning_rate": 1.5989371874657094e-05, + "loss": 0.0788, + "step": 26600 + }, + { + "epoch": 1.24, + "learning_rate": 1.5988588089602308e-05, + "loss": 0.1388, + "step": 26605 + }, + { + "epoch": 1.24, + "learning_rate": 1.5987804304547522e-05, + "loss": 0.1345, + "step": 26610 + }, + { + "epoch": 1.24, + "learning_rate": 1.5987020519492736e-05, + "loss": 0.1711, + "step": 26615 + }, + { + "epoch": 1.24, + "learning_rate": 1.598623673443795e-05, + "loss": 0.209, + "step": 26620 + }, + { + "epoch": 1.24, + "learning_rate": 1.5985452949383164e-05, + "loss": 0.24, + "step": 26625 + }, + { + "epoch": 1.24, + "learning_rate": 1.5984669164328378e-05, + "loss": 0.2429, + "step": 26630 + }, + { + "epoch": 1.24, + "learning_rate": 1.5983885379273588e-05, + "loss": 0.2346, + "step": 26635 + }, + { + "epoch": 1.24, + "learning_rate": 1.5983101594218802e-05, + "loss": 0.052, + "step": 26640 + }, + { + "epoch": 1.24, + "learning_rate": 1.5982317809164016e-05, + "loss": 0.1405, + "step": 26645 + }, + { + "epoch": 1.24, + "learning_rate": 1.598153402410923e-05, + "loss": 0.0957, + "step": 26650 + }, + { + "epoch": 1.24, + "learning_rate": 1.5980750239054444e-05, + "loss": 0.0875, + "step": 26655 + }, + { + "epoch": 1.24, + "learning_rate": 1.5979966453999654e-05, + "loss": 0.0848, + "step": 26660 + }, + { + "epoch": 1.24, + "learning_rate": 1.597918266894487e-05, + "loss": 0.1606, + "step": 26665 + }, + { + "epoch": 1.24, + "learning_rate": 1.5978398883890082e-05, + "loss": 0.2839, + "step": 26670 + }, + { + "epoch": 1.24, + "learning_rate": 1.5977615098835296e-05, + "loss": 0.2471, + "step": 26675 + }, + { + "epoch": 1.24, + "learning_rate": 1.597683131378051e-05, + "loss": 0.3593, + "step": 26680 + }, + { + "epoch": 1.25, + "learning_rate": 1.5976047528725724e-05, + "loss": 0.3423, + "step": 26685 + }, + { + "epoch": 1.25, + "learning_rate": 1.5975263743670938e-05, + "loss": 0.0586, + "step": 26690 + }, + { + "epoch": 1.25, + "learning_rate": 1.5974479958616152e-05, + "loss": 0.0374, + "step": 26695 + }, + { + "epoch": 1.25, + "learning_rate": 1.5973696173561362e-05, + "loss": 0.1045, + "step": 26700 + }, + { + "epoch": 1.25, + "learning_rate": 1.5972912388506576e-05, + "loss": 0.1342, + "step": 26705 + }, + { + "epoch": 1.25, + "learning_rate": 1.597212860345179e-05, + "loss": 0.2293, + "step": 26710 + }, + { + "epoch": 1.25, + "learning_rate": 1.5971344818397004e-05, + "loss": 0.1674, + "step": 26715 + }, + { + "epoch": 1.25, + "learning_rate": 1.5970561033342218e-05, + "loss": 0.1991, + "step": 26720 + }, + { + "epoch": 1.25, + "learning_rate": 1.5969777248287432e-05, + "loss": 0.2317, + "step": 26725 + }, + { + "epoch": 1.25, + "learning_rate": 1.5968993463232646e-05, + "loss": 0.4123, + "step": 26730 + }, + { + "epoch": 1.25, + "learning_rate": 1.5968209678177856e-05, + "loss": 0.2453, + "step": 26735 + }, + { + "epoch": 1.25, + "learning_rate": 1.5967425893123073e-05, + "loss": 0.083, + "step": 26740 + }, + { + "epoch": 1.25, + "learning_rate": 1.5966642108068284e-05, + "loss": 0.0648, + "step": 26745 + }, + { + "epoch": 1.25, + "learning_rate": 1.5965858323013498e-05, + "loss": 0.1362, + "step": 26750 + }, + { + "epoch": 1.25, + "learning_rate": 1.5965074537958712e-05, + "loss": 0.1406, + "step": 26755 + }, + { + "epoch": 1.25, + "learning_rate": 1.5964290752903926e-05, + "loss": 0.1994, + "step": 26760 + }, + { + "epoch": 1.25, + "learning_rate": 1.596350696784914e-05, + "loss": 0.1565, + "step": 26765 + }, + { + "epoch": 1.25, + "learning_rate": 1.596272318279435e-05, + "loss": 0.2318, + "step": 26770 + }, + { + "epoch": 1.25, + "learning_rate": 1.5961939397739564e-05, + "loss": 0.2744, + "step": 26775 + }, + { + "epoch": 1.25, + "learning_rate": 1.5961155612684778e-05, + "loss": 0.4521, + "step": 26780 + }, + { + "epoch": 1.25, + "learning_rate": 1.5960371827629992e-05, + "loss": 0.2907, + "step": 26785 + }, + { + "epoch": 1.25, + "learning_rate": 1.5959588042575206e-05, + "loss": 0.086, + "step": 26790 + }, + { + "epoch": 1.25, + "learning_rate": 1.595880425752042e-05, + "loss": 0.0616, + "step": 26795 + }, + { + "epoch": 1.25, + "learning_rate": 1.595802047246563e-05, + "loss": 0.0885, + "step": 26800 + }, + { + "epoch": 1.25, + "learning_rate": 1.5957236687410847e-05, + "loss": 0.1038, + "step": 26805 + }, + { + "epoch": 1.25, + "learning_rate": 1.5956452902356058e-05, + "loss": 0.1311, + "step": 26810 + }, + { + "epoch": 1.25, + "learning_rate": 1.5955669117301272e-05, + "loss": 0.2754, + "step": 26815 + }, + { + "epoch": 1.25, + "learning_rate": 1.5954885332246486e-05, + "loss": 0.2622, + "step": 26820 + }, + { + "epoch": 1.25, + "learning_rate": 1.59541015471917e-05, + "loss": 0.3481, + "step": 26825 + }, + { + "epoch": 1.25, + "learning_rate": 1.5953317762136914e-05, + "loss": 0.3888, + "step": 26830 + }, + { + "epoch": 1.25, + "learning_rate": 1.5952533977082124e-05, + "loss": 0.3201, + "step": 26835 + }, + { + "epoch": 1.25, + "learning_rate": 1.595175019202734e-05, + "loss": 0.0431, + "step": 26840 + }, + { + "epoch": 1.25, + "learning_rate": 1.5950966406972552e-05, + "loss": 0.0601, + "step": 26845 + }, + { + "epoch": 1.25, + "learning_rate": 1.5950182621917766e-05, + "loss": 0.0401, + "step": 26850 + }, + { + "epoch": 1.25, + "learning_rate": 1.594939883686298e-05, + "loss": 0.0848, + "step": 26855 + }, + { + "epoch": 1.25, + "learning_rate": 1.5948615051808194e-05, + "loss": 0.1519, + "step": 26860 + }, + { + "epoch": 1.25, + "learning_rate": 1.5947831266753408e-05, + "loss": 0.1502, + "step": 26865 + }, + { + "epoch": 1.25, + "learning_rate": 1.594704748169862e-05, + "loss": 0.1738, + "step": 26870 + }, + { + "epoch": 1.25, + "learning_rate": 1.5946263696643832e-05, + "loss": 0.1806, + "step": 26875 + }, + { + "epoch": 1.25, + "learning_rate": 1.594547991158905e-05, + "loss": 0.359, + "step": 26880 + }, + { + "epoch": 1.25, + "learning_rate": 1.594469612653426e-05, + "loss": 0.3319, + "step": 26885 + }, + { + "epoch": 1.25, + "learning_rate": 1.5943912341479474e-05, + "loss": 0.0602, + "step": 26890 + }, + { + "epoch": 1.25, + "learning_rate": 1.5943128556424688e-05, + "loss": 0.0851, + "step": 26895 + }, + { + "epoch": 1.26, + "learning_rate": 1.5942344771369898e-05, + "loss": 0.1058, + "step": 26900 + }, + { + "epoch": 1.26, + "learning_rate": 1.5941560986315115e-05, + "loss": 0.1035, + "step": 26905 + }, + { + "epoch": 1.26, + "learning_rate": 1.5940777201260326e-05, + "loss": 0.1427, + "step": 26910 + }, + { + "epoch": 1.26, + "learning_rate": 1.593999341620554e-05, + "loss": 0.1269, + "step": 26915 + }, + { + "epoch": 1.26, + "learning_rate": 1.5939209631150754e-05, + "loss": 0.1434, + "step": 26920 + }, + { + "epoch": 1.26, + "learning_rate": 1.5938425846095968e-05, + "loss": 0.2233, + "step": 26925 + }, + { + "epoch": 1.26, + "learning_rate": 1.593764206104118e-05, + "loss": 0.4562, + "step": 26930 + }, + { + "epoch": 1.26, + "learning_rate": 1.5936858275986395e-05, + "loss": 0.3848, + "step": 26935 + }, + { + "epoch": 1.26, + "learning_rate": 1.593607449093161e-05, + "loss": 0.0614, + "step": 26940 + }, + { + "epoch": 1.26, + "learning_rate": 1.5935290705876823e-05, + "loss": 0.0845, + "step": 26945 + }, + { + "epoch": 1.26, + "learning_rate": 1.5934506920822034e-05, + "loss": 0.0953, + "step": 26950 + }, + { + "epoch": 1.26, + "learning_rate": 1.593372313576725e-05, + "loss": 0.1694, + "step": 26955 + }, + { + "epoch": 1.26, + "learning_rate": 1.593293935071246e-05, + "loss": 0.1371, + "step": 26960 + }, + { + "epoch": 1.26, + "learning_rate": 1.5932155565657676e-05, + "loss": 0.2797, + "step": 26965 + }, + { + "epoch": 1.26, + "learning_rate": 1.593137178060289e-05, + "loss": 0.2314, + "step": 26970 + }, + { + "epoch": 1.26, + "learning_rate": 1.59305879955481e-05, + "loss": 0.1904, + "step": 26975 + }, + { + "epoch": 1.26, + "learning_rate": 1.5929804210493317e-05, + "loss": 0.3626, + "step": 26980 + }, + { + "epoch": 1.26, + "learning_rate": 1.5929020425438528e-05, + "loss": 0.3654, + "step": 26985 + }, + { + "epoch": 1.26, + "learning_rate": 1.592823664038374e-05, + "loss": 0.0897, + "step": 26990 + }, + { + "epoch": 1.26, + "learning_rate": 1.5927452855328956e-05, + "loss": 0.1229, + "step": 26995 + }, + { + "epoch": 1.26, + "learning_rate": 1.592666907027417e-05, + "loss": 0.1352, + "step": 27000 + }, + { + "epoch": 1.26, + "learning_rate": 1.5925885285219383e-05, + "loss": 0.1724, + "step": 27005 + }, + { + "epoch": 1.26, + "learning_rate": 1.5925101500164597e-05, + "loss": 0.1872, + "step": 27010 + }, + { + "epoch": 1.26, + "learning_rate": 1.5924317715109808e-05, + "loss": 0.211, + "step": 27015 + }, + { + "epoch": 1.26, + "learning_rate": 1.5923533930055025e-05, + "loss": 0.2077, + "step": 27020 + }, + { + "epoch": 1.26, + "learning_rate": 1.5922750145000236e-05, + "loss": 0.2314, + "step": 27025 + }, + { + "epoch": 1.26, + "learning_rate": 1.592196635994545e-05, + "loss": 0.2645, + "step": 27030 + }, + { + "epoch": 1.26, + "learning_rate": 1.5921182574890663e-05, + "loss": 0.3657, + "step": 27035 + }, + { + "epoch": 1.26, + "learning_rate": 1.5920398789835877e-05, + "loss": 0.0485, + "step": 27040 + }, + { + "epoch": 1.26, + "learning_rate": 1.591961500478109e-05, + "loss": 0.0873, + "step": 27045 + }, + { + "epoch": 1.26, + "learning_rate": 1.5918831219726302e-05, + "loss": 0.0935, + "step": 27050 + }, + { + "epoch": 1.26, + "learning_rate": 1.591804743467152e-05, + "loss": 0.0481, + "step": 27055 + }, + { + "epoch": 1.26, + "learning_rate": 1.591726364961673e-05, + "loss": 0.1495, + "step": 27060 + }, + { + "epoch": 1.26, + "learning_rate": 1.5916479864561943e-05, + "loss": 0.2669, + "step": 27065 + }, + { + "epoch": 1.26, + "learning_rate": 1.5915696079507157e-05, + "loss": 0.2449, + "step": 27070 + }, + { + "epoch": 1.26, + "learning_rate": 1.591491229445237e-05, + "loss": 0.2292, + "step": 27075 + }, + { + "epoch": 1.26, + "learning_rate": 1.5914128509397585e-05, + "loss": 0.3765, + "step": 27080 + }, + { + "epoch": 1.26, + "learning_rate": 1.59133447243428e-05, + "loss": 0.4043, + "step": 27085 + }, + { + "epoch": 1.26, + "learning_rate": 1.591256093928801e-05, + "loss": 0.0565, + "step": 27090 + }, + { + "epoch": 1.26, + "learning_rate": 1.5911777154233224e-05, + "loss": 0.1073, + "step": 27095 + }, + { + "epoch": 1.26, + "learning_rate": 1.5910993369178437e-05, + "loss": 0.1031, + "step": 27100 + }, + { + "epoch": 1.26, + "learning_rate": 1.591020958412365e-05, + "loss": 0.1669, + "step": 27105 + }, + { + "epoch": 1.26, + "learning_rate": 1.5909425799068865e-05, + "loss": 0.1159, + "step": 27110 + }, + { + "epoch": 1.27, + "learning_rate": 1.5908642014014076e-05, + "loss": 0.1482, + "step": 27115 + }, + { + "epoch": 1.27, + "learning_rate": 1.5907858228959293e-05, + "loss": 0.2052, + "step": 27120 + }, + { + "epoch": 1.27, + "learning_rate": 1.5907074443904504e-05, + "loss": 0.1452, + "step": 27125 + }, + { + "epoch": 1.27, + "learning_rate": 1.5906290658849717e-05, + "loss": 0.3528, + "step": 27130 + }, + { + "epoch": 1.27, + "learning_rate": 1.590550687379493e-05, + "loss": 0.2911, + "step": 27135 + }, + { + "epoch": 1.27, + "learning_rate": 1.5904723088740145e-05, + "loss": 0.0498, + "step": 27140 + }, + { + "epoch": 1.27, + "learning_rate": 1.590393930368536e-05, + "loss": 0.0742, + "step": 27145 + }, + { + "epoch": 1.27, + "learning_rate": 1.5903155518630573e-05, + "loss": 0.0673, + "step": 27150 + }, + { + "epoch": 1.27, + "learning_rate": 1.5902371733575787e-05, + "loss": 0.1554, + "step": 27155 + }, + { + "epoch": 1.27, + "learning_rate": 1.5901587948520998e-05, + "loss": 0.1423, + "step": 27160 + }, + { + "epoch": 1.27, + "learning_rate": 1.590080416346621e-05, + "loss": 0.2888, + "step": 27165 + }, + { + "epoch": 1.27, + "learning_rate": 1.5900020378411425e-05, + "loss": 0.206, + "step": 27170 + }, + { + "epoch": 1.27, + "learning_rate": 1.589923659335664e-05, + "loss": 0.2769, + "step": 27175 + }, + { + "epoch": 1.27, + "learning_rate": 1.5898452808301853e-05, + "loss": 0.3929, + "step": 27180 + }, + { + "epoch": 1.27, + "learning_rate": 1.5897669023247067e-05, + "loss": 0.2299, + "step": 27185 + }, + { + "epoch": 1.27, + "learning_rate": 1.5896885238192278e-05, + "loss": 0.0616, + "step": 27190 + }, + { + "epoch": 1.27, + "learning_rate": 1.5896101453137495e-05, + "loss": 0.0768, + "step": 27195 + }, + { + "epoch": 1.27, + "learning_rate": 1.5895317668082705e-05, + "loss": 0.066, + "step": 27200 + }, + { + "epoch": 1.27, + "learning_rate": 1.589453388302792e-05, + "loss": 0.1199, + "step": 27205 + }, + { + "epoch": 1.27, + "learning_rate": 1.5893750097973133e-05, + "loss": 0.1225, + "step": 27210 + }, + { + "epoch": 1.27, + "learning_rate": 1.5892966312918347e-05, + "loss": 0.1322, + "step": 27215 + }, + { + "epoch": 1.27, + "learning_rate": 1.589218252786356e-05, + "loss": 0.1938, + "step": 27220 + }, + { + "epoch": 1.27, + "learning_rate": 1.589139874280877e-05, + "loss": 0.1989, + "step": 27225 + }, + { + "epoch": 1.27, + "learning_rate": 1.5890614957753985e-05, + "loss": 0.3783, + "step": 27230 + }, + { + "epoch": 1.27, + "learning_rate": 1.58898311726992e-05, + "loss": 0.2278, + "step": 27235 + }, + { + "epoch": 1.27, + "learning_rate": 1.5889047387644413e-05, + "loss": 0.0426, + "step": 27240 + }, + { + "epoch": 1.27, + "learning_rate": 1.5888263602589627e-05, + "loss": 0.0671, + "step": 27245 + }, + { + "epoch": 1.27, + "learning_rate": 1.588747981753484e-05, + "loss": 0.1248, + "step": 27250 + }, + { + "epoch": 1.27, + "learning_rate": 1.5886696032480055e-05, + "loss": 0.1308, + "step": 27255 + }, + { + "epoch": 1.27, + "learning_rate": 1.588591224742527e-05, + "loss": 0.1626, + "step": 27260 + }, + { + "epoch": 1.27, + "learning_rate": 1.588512846237048e-05, + "loss": 0.0983, + "step": 27265 + }, + { + "epoch": 1.27, + "learning_rate": 1.5884344677315697e-05, + "loss": 0.1998, + "step": 27270 + }, + { + "epoch": 1.27, + "learning_rate": 1.5883560892260907e-05, + "loss": 0.2857, + "step": 27275 + }, + { + "epoch": 1.27, + "learning_rate": 1.588277710720612e-05, + "loss": 0.4716, + "step": 27280 + }, + { + "epoch": 1.27, + "learning_rate": 1.5881993322151335e-05, + "loss": 0.3464, + "step": 27285 + }, + { + "epoch": 1.27, + "learning_rate": 1.5881209537096546e-05, + "loss": 0.0157, + "step": 27290 + }, + { + "epoch": 1.27, + "learning_rate": 1.5880425752041763e-05, + "loss": 0.0292, + "step": 27295 + }, + { + "epoch": 1.27, + "learning_rate": 1.5879641966986973e-05, + "loss": 0.1048, + "step": 27300 + }, + { + "epoch": 1.27, + "learning_rate": 1.5878858181932187e-05, + "loss": 0.1233, + "step": 27305 + }, + { + "epoch": 1.27, + "learning_rate": 1.58780743968774e-05, + "loss": 0.1633, + "step": 27310 + }, + { + "epoch": 1.27, + "learning_rate": 1.5877290611822615e-05, + "loss": 0.213, + "step": 27315 + }, + { + "epoch": 1.27, + "learning_rate": 1.587650682676783e-05, + "loss": 0.1694, + "step": 27320 + }, + { + "epoch": 1.28, + "learning_rate": 1.5875723041713043e-05, + "loss": 0.2286, + "step": 27325 + }, + { + "epoch": 1.28, + "learning_rate": 1.5874939256658253e-05, + "loss": 0.4041, + "step": 27330 + }, + { + "epoch": 1.28, + "learning_rate": 1.587415547160347e-05, + "loss": 0.3315, + "step": 27335 + }, + { + "epoch": 1.28, + "learning_rate": 1.587337168654868e-05, + "loss": 0.0725, + "step": 27340 + }, + { + "epoch": 1.28, + "learning_rate": 1.5872587901493895e-05, + "loss": 0.092, + "step": 27345 + }, + { + "epoch": 1.28, + "learning_rate": 1.587180411643911e-05, + "loss": 0.0829, + "step": 27350 + }, + { + "epoch": 1.28, + "learning_rate": 1.5871020331384323e-05, + "loss": 0.1871, + "step": 27355 + }, + { + "epoch": 1.28, + "learning_rate": 1.5870236546329537e-05, + "loss": 0.1217, + "step": 27360 + }, + { + "epoch": 1.28, + "learning_rate": 1.5869452761274747e-05, + "loss": 0.1402, + "step": 27365 + }, + { + "epoch": 1.28, + "learning_rate": 1.5868668976219965e-05, + "loss": 0.2723, + "step": 27370 + }, + { + "epoch": 1.28, + "learning_rate": 1.5867885191165175e-05, + "loss": 0.2456, + "step": 27375 + }, + { + "epoch": 1.28, + "learning_rate": 1.586710140611039e-05, + "loss": 0.3611, + "step": 27380 + }, + { + "epoch": 1.28, + "learning_rate": 1.5866317621055603e-05, + "loss": 0.2447, + "step": 27385 + }, + { + "epoch": 1.28, + "learning_rate": 1.5865533836000817e-05, + "loss": 0.0605, + "step": 27390 + }, + { + "epoch": 1.28, + "learning_rate": 1.586475005094603e-05, + "loss": 0.1235, + "step": 27395 + }, + { + "epoch": 1.28, + "learning_rate": 1.5863966265891245e-05, + "loss": 0.1074, + "step": 27400 + }, + { + "epoch": 1.28, + "learning_rate": 1.5863182480836455e-05, + "loss": 0.1034, + "step": 27405 + }, + { + "epoch": 1.28, + "learning_rate": 1.5862398695781672e-05, + "loss": 0.1398, + "step": 27410 + }, + { + "epoch": 1.28, + "learning_rate": 1.5861614910726883e-05, + "loss": 0.1485, + "step": 27415 + }, + { + "epoch": 1.28, + "learning_rate": 1.5860831125672097e-05, + "loss": 0.184, + "step": 27420 + }, + { + "epoch": 1.28, + "learning_rate": 1.586004734061731e-05, + "loss": 0.301, + "step": 27425 + }, + { + "epoch": 1.28, + "learning_rate": 1.585926355556252e-05, + "loss": 0.3592, + "step": 27430 + }, + { + "epoch": 1.28, + "learning_rate": 1.585847977050774e-05, + "loss": 0.4242, + "step": 27435 + }, + { + "epoch": 1.28, + "learning_rate": 1.585769598545295e-05, + "loss": 0.033, + "step": 27440 + }, + { + "epoch": 1.28, + "learning_rate": 1.5856912200398163e-05, + "loss": 0.077, + "step": 27445 + }, + { + "epoch": 1.28, + "learning_rate": 1.5856128415343377e-05, + "loss": 0.0681, + "step": 27450 + }, + { + "epoch": 1.28, + "learning_rate": 1.585534463028859e-05, + "loss": 0.0868, + "step": 27455 + }, + { + "epoch": 1.28, + "learning_rate": 1.5854560845233805e-05, + "loss": 0.1456, + "step": 27460 + }, + { + "epoch": 1.28, + "learning_rate": 1.585377706017902e-05, + "loss": 0.1694, + "step": 27465 + }, + { + "epoch": 1.28, + "learning_rate": 1.5852993275124233e-05, + "loss": 0.2192, + "step": 27470 + }, + { + "epoch": 1.28, + "learning_rate": 1.5852209490069446e-05, + "loss": 0.2615, + "step": 27475 + }, + { + "epoch": 1.28, + "learning_rate": 1.5851425705014657e-05, + "loss": 0.3738, + "step": 27480 + }, + { + "epoch": 1.28, + "learning_rate": 1.585064191995987e-05, + "loss": 0.4018, + "step": 27485 + }, + { + "epoch": 1.28, + "learning_rate": 1.5849858134905085e-05, + "loss": 0.0784, + "step": 27490 + }, + { + "epoch": 1.28, + "learning_rate": 1.58490743498503e-05, + "loss": 0.0999, + "step": 27495 + }, + { + "epoch": 1.28, + "learning_rate": 1.5848290564795513e-05, + "loss": 0.094, + "step": 27500 + }, + { + "epoch": 1.28, + "learning_rate": 1.5847506779740723e-05, + "loss": 0.1918, + "step": 27505 + }, + { + "epoch": 1.28, + "learning_rate": 1.584672299468594e-05, + "loss": 0.0998, + "step": 27510 + }, + { + "epoch": 1.28, + "learning_rate": 1.584593920963115e-05, + "loss": 0.1329, + "step": 27515 + }, + { + "epoch": 1.28, + "learning_rate": 1.5845155424576365e-05, + "loss": 0.2364, + "step": 27520 + }, + { + "epoch": 1.28, + "learning_rate": 1.584437163952158e-05, + "loss": 0.2244, + "step": 27525 + }, + { + "epoch": 1.28, + "learning_rate": 1.5843587854466793e-05, + "loss": 0.2506, + "step": 27530 + }, + { + "epoch": 1.28, + "learning_rate": 1.5842804069412007e-05, + "loss": 0.2574, + "step": 27535 + }, + { + "epoch": 1.29, + "learning_rate": 1.584202028435722e-05, + "loss": 0.0459, + "step": 27540 + }, + { + "epoch": 1.29, + "learning_rate": 1.584123649930243e-05, + "loss": 0.169, + "step": 27545 + }, + { + "epoch": 1.29, + "learning_rate": 1.5840452714247645e-05, + "loss": 0.0555, + "step": 27550 + }, + { + "epoch": 1.29, + "learning_rate": 1.583966892919286e-05, + "loss": 0.1168, + "step": 27555 + }, + { + "epoch": 1.29, + "learning_rate": 1.5838885144138073e-05, + "loss": 0.1272, + "step": 27560 + }, + { + "epoch": 1.29, + "learning_rate": 1.5838101359083287e-05, + "loss": 0.1271, + "step": 27565 + }, + { + "epoch": 1.29, + "learning_rate": 1.58373175740285e-05, + "loss": 0.2171, + "step": 27570 + }, + { + "epoch": 1.29, + "learning_rate": 1.5836533788973714e-05, + "loss": 0.2344, + "step": 27575 + }, + { + "epoch": 1.29, + "learning_rate": 1.5835750003918925e-05, + "loss": 0.441, + "step": 27580 + }, + { + "epoch": 1.29, + "learning_rate": 1.5834966218864142e-05, + "loss": 0.344, + "step": 27585 + }, + { + "epoch": 1.29, + "learning_rate": 1.5834182433809353e-05, + "loss": 0.0275, + "step": 27590 + }, + { + "epoch": 1.29, + "learning_rate": 1.5833398648754567e-05, + "loss": 0.0879, + "step": 27595 + }, + { + "epoch": 1.29, + "learning_rate": 1.583261486369978e-05, + "loss": 0.0835, + "step": 27600 + }, + { + "epoch": 1.29, + "learning_rate": 1.5831831078644994e-05, + "loss": 0.0898, + "step": 27605 + }, + { + "epoch": 1.29, + "learning_rate": 1.583104729359021e-05, + "loss": 0.1473, + "step": 27610 + }, + { + "epoch": 1.29, + "learning_rate": 1.583026350853542e-05, + "loss": 0.1409, + "step": 27615 + }, + { + "epoch": 1.29, + "learning_rate": 1.5829479723480633e-05, + "loss": 0.1624, + "step": 27620 + }, + { + "epoch": 1.29, + "learning_rate": 1.5828695938425847e-05, + "loss": 0.2715, + "step": 27625 + }, + { + "epoch": 1.29, + "learning_rate": 1.582791215337106e-05, + "loss": 0.24, + "step": 27630 + }, + { + "epoch": 1.29, + "learning_rate": 1.5827128368316275e-05, + "loss": 0.3, + "step": 27635 + }, + { + "epoch": 1.29, + "learning_rate": 1.582634458326149e-05, + "loss": 0.1383, + "step": 27640 + }, + { + "epoch": 1.29, + "learning_rate": 1.58255607982067e-05, + "loss": 0.0718, + "step": 27645 + }, + { + "epoch": 1.29, + "learning_rate": 1.5824777013151916e-05, + "loss": 0.1329, + "step": 27650 + }, + { + "epoch": 1.29, + "learning_rate": 1.5823993228097127e-05, + "loss": 0.0816, + "step": 27655 + }, + { + "epoch": 1.29, + "learning_rate": 1.582320944304234e-05, + "loss": 0.1626, + "step": 27660 + }, + { + "epoch": 1.29, + "learning_rate": 1.5822425657987555e-05, + "loss": 0.1631, + "step": 27665 + }, + { + "epoch": 1.29, + "learning_rate": 1.582164187293277e-05, + "loss": 0.176, + "step": 27670 + }, + { + "epoch": 1.29, + "learning_rate": 1.5820858087877982e-05, + "loss": 0.242, + "step": 27675 + }, + { + "epoch": 1.29, + "learning_rate": 1.5820074302823193e-05, + "loss": 0.3799, + "step": 27680 + }, + { + "epoch": 1.29, + "learning_rate": 1.581929051776841e-05, + "loss": 0.3226, + "step": 27685 + }, + { + "epoch": 1.29, + "learning_rate": 1.581850673271362e-05, + "loss": 0.0577, + "step": 27690 + }, + { + "epoch": 1.29, + "learning_rate": 1.5817722947658835e-05, + "loss": 0.0607, + "step": 27695 + }, + { + "epoch": 1.29, + "learning_rate": 1.581693916260405e-05, + "loss": 0.0799, + "step": 27700 + }, + { + "epoch": 1.29, + "learning_rate": 1.5816155377549262e-05, + "loss": 0.1294, + "step": 27705 + }, + { + "epoch": 1.29, + "learning_rate": 1.5815371592494476e-05, + "loss": 0.1614, + "step": 27710 + }, + { + "epoch": 1.29, + "learning_rate": 1.581458780743969e-05, + "loss": 0.2011, + "step": 27715 + }, + { + "epoch": 1.29, + "learning_rate": 1.58138040223849e-05, + "loss": 0.1317, + "step": 27720 + }, + { + "epoch": 1.29, + "learning_rate": 1.5813020237330118e-05, + "loss": 0.138, + "step": 27725 + }, + { + "epoch": 1.29, + "learning_rate": 1.581239320928629e-05, + "loss": 0.3351, + "step": 27730 + }, + { + "epoch": 1.29, + "learning_rate": 1.58116094242315e-05, + "loss": 0.2869, + "step": 27735 + }, + { + "epoch": 1.29, + "learning_rate": 1.5810825639176713e-05, + "loss": 0.0514, + "step": 27740 + }, + { + "epoch": 1.29, + "learning_rate": 1.5810041854121927e-05, + "loss": 0.0941, + "step": 27745 + }, + { + "epoch": 1.29, + "learning_rate": 1.580925806906714e-05, + "loss": 0.0642, + "step": 27750 + }, + { + "epoch": 1.3, + "learning_rate": 1.5808474284012355e-05, + "loss": 0.1529, + "step": 27755 + }, + { + "epoch": 1.3, + "learning_rate": 1.5807690498957565e-05, + "loss": 0.1682, + "step": 27760 + }, + { + "epoch": 1.3, + "learning_rate": 1.5806906713902782e-05, + "loss": 0.1137, + "step": 27765 + }, + { + "epoch": 1.3, + "learning_rate": 1.5806279685858953e-05, + "loss": 0.2363, + "step": 27770 + }, + { + "epoch": 1.3, + "learning_rate": 1.5805495900804163e-05, + "loss": 0.2647, + "step": 27775 + }, + { + "epoch": 1.3, + "learning_rate": 1.5804712115749377e-05, + "loss": 0.432, + "step": 27780 + }, + { + "epoch": 1.3, + "learning_rate": 1.580392833069459e-05, + "loss": 0.2727, + "step": 27785 + }, + { + "epoch": 1.3, + "learning_rate": 1.5803144545639805e-05, + "loss": 0.0257, + "step": 27790 + }, + { + "epoch": 1.3, + "learning_rate": 1.580236076058502e-05, + "loss": 0.056, + "step": 27795 + }, + { + "epoch": 1.3, + "learning_rate": 1.5801576975530233e-05, + "loss": 0.1739, + "step": 27800 + }, + { + "epoch": 1.3, + "learning_rate": 1.5800793190475447e-05, + "loss": 0.1359, + "step": 27805 + }, + { + "epoch": 1.3, + "learning_rate": 1.5800009405420657e-05, + "loss": 0.1377, + "step": 27810 + }, + { + "epoch": 1.3, + "learning_rate": 1.579922562036587e-05, + "loss": 0.2141, + "step": 27815 + }, + { + "epoch": 1.3, + "learning_rate": 1.5798441835311085e-05, + "loss": 0.2105, + "step": 27820 + }, + { + "epoch": 1.3, + "learning_rate": 1.57976580502563e-05, + "loss": 0.2329, + "step": 27825 + }, + { + "epoch": 1.3, + "learning_rate": 1.5796874265201513e-05, + "loss": 0.2853, + "step": 27830 + }, + { + "epoch": 1.3, + "learning_rate": 1.5796090480146727e-05, + "loss": 0.2378, + "step": 27835 + }, + { + "epoch": 1.3, + "learning_rate": 1.5795306695091937e-05, + "loss": 0.0456, + "step": 27840 + }, + { + "epoch": 1.3, + "learning_rate": 1.5794522910037155e-05, + "loss": 0.0577, + "step": 27845 + }, + { + "epoch": 1.3, + "learning_rate": 1.5793739124982365e-05, + "loss": 0.0971, + "step": 27850 + }, + { + "epoch": 1.3, + "learning_rate": 1.579295533992758e-05, + "loss": 0.1172, + "step": 27855 + }, + { + "epoch": 1.3, + "learning_rate": 1.5792171554872793e-05, + "loss": 0.0872, + "step": 27860 + }, + { + "epoch": 1.3, + "learning_rate": 1.5791387769818007e-05, + "loss": 0.1082, + "step": 27865 + }, + { + "epoch": 1.3, + "learning_rate": 1.579060398476322e-05, + "loss": 0.1545, + "step": 27870 + }, + { + "epoch": 1.3, + "learning_rate": 1.578982019970843e-05, + "loss": 0.2331, + "step": 27875 + }, + { + "epoch": 1.3, + "learning_rate": 1.5789036414653645e-05, + "loss": 0.3144, + "step": 27880 + }, + { + "epoch": 1.3, + "learning_rate": 1.578825262959886e-05, + "loss": 0.2813, + "step": 27885 + }, + { + "epoch": 1.3, + "learning_rate": 1.5787468844544073e-05, + "loss": 0.0546, + "step": 27890 + }, + { + "epoch": 1.3, + "learning_rate": 1.5786685059489287e-05, + "loss": 0.0614, + "step": 27895 + }, + { + "epoch": 1.3, + "learning_rate": 1.57859012744345e-05, + "loss": 0.0767, + "step": 27900 + }, + { + "epoch": 1.3, + "learning_rate": 1.5785117489379715e-05, + "loss": 0.1322, + "step": 27905 + }, + { + "epoch": 1.3, + "learning_rate": 1.578433370432493e-05, + "loss": 0.134, + "step": 27910 + }, + { + "epoch": 1.3, + "learning_rate": 1.578354991927014e-05, + "loss": 0.1844, + "step": 27915 + }, + { + "epoch": 1.3, + "learning_rate": 1.5782766134215357e-05, + "loss": 0.1431, + "step": 27920 + }, + { + "epoch": 1.3, + "learning_rate": 1.5781982349160567e-05, + "loss": 0.2983, + "step": 27925 + }, + { + "epoch": 1.3, + "learning_rate": 1.578119856410578e-05, + "loss": 0.381, + "step": 27930 + }, + { + "epoch": 1.3, + "learning_rate": 1.5780414779050995e-05, + "loss": 0.3145, + "step": 27935 + }, + { + "epoch": 1.3, + "learning_rate": 1.5779630993996205e-05, + "loss": 0.0599, + "step": 27940 + }, + { + "epoch": 1.3, + "learning_rate": 1.5778847208941423e-05, + "loss": 0.1142, + "step": 27945 + }, + { + "epoch": 1.3, + "learning_rate": 1.5778063423886633e-05, + "loss": 0.0946, + "step": 27950 + }, + { + "epoch": 1.3, + "learning_rate": 1.5777279638831847e-05, + "loss": 0.1403, + "step": 27955 + }, + { + "epoch": 1.3, + "learning_rate": 1.577649585377706e-05, + "loss": 0.1777, + "step": 27960 + }, + { + "epoch": 1.3, + "learning_rate": 1.5775712068722275e-05, + "loss": 0.2978, + "step": 27965 + }, + { + "epoch": 1.31, + "learning_rate": 1.577492828366749e-05, + "loss": 0.1597, + "step": 27970 + }, + { + "epoch": 1.31, + "learning_rate": 1.5774144498612703e-05, + "loss": 0.195, + "step": 27975 + }, + { + "epoch": 1.31, + "learning_rate": 1.5773360713557913e-05, + "loss": 0.356, + "step": 27980 + }, + { + "epoch": 1.31, + "learning_rate": 1.577257692850313e-05, + "loss": 0.3127, + "step": 27985 + }, + { + "epoch": 1.31, + "learning_rate": 1.577179314344834e-05, + "loss": 0.0726, + "step": 27990 + }, + { + "epoch": 1.31, + "learning_rate": 1.5771009358393555e-05, + "loss": 0.0738, + "step": 27995 + }, + { + "epoch": 1.31, + "learning_rate": 1.577022557333877e-05, + "loss": 0.0923, + "step": 28000 + }, + { + "epoch": 1.31, + "learning_rate": 1.5769441788283983e-05, + "loss": 0.0928, + "step": 28005 + }, + { + "epoch": 1.31, + "learning_rate": 1.5768658003229197e-05, + "loss": 0.1215, + "step": 28010 + }, + { + "epoch": 1.31, + "learning_rate": 1.5767874218174407e-05, + "loss": 0.161, + "step": 28015 + }, + { + "epoch": 1.31, + "learning_rate": 1.5767090433119624e-05, + "loss": 0.2235, + "step": 28020 + }, + { + "epoch": 1.31, + "learning_rate": 1.5766306648064835e-05, + "loss": 0.2186, + "step": 28025 + }, + { + "epoch": 1.31, + "learning_rate": 1.576552286301005e-05, + "loss": 0.3355, + "step": 28030 + }, + { + "epoch": 1.31, + "learning_rate": 1.5764739077955263e-05, + "loss": 0.3447, + "step": 28035 + }, + { + "epoch": 1.31, + "learning_rate": 1.5763955292900477e-05, + "loss": 0.0388, + "step": 28040 + }, + { + "epoch": 1.31, + "learning_rate": 1.576317150784569e-05, + "loss": 0.074, + "step": 28045 + }, + { + "epoch": 1.31, + "learning_rate": 1.5762387722790905e-05, + "loss": 0.058, + "step": 28050 + }, + { + "epoch": 1.31, + "learning_rate": 1.5761603937736115e-05, + "loss": 0.1904, + "step": 28055 + }, + { + "epoch": 1.31, + "learning_rate": 1.5760820152681332e-05, + "loss": 0.0815, + "step": 28060 + }, + { + "epoch": 1.31, + "learning_rate": 1.5760036367626543e-05, + "loss": 0.1802, + "step": 28065 + }, + { + "epoch": 1.31, + "learning_rate": 1.5759252582571757e-05, + "loss": 0.1224, + "step": 28070 + }, + { + "epoch": 1.31, + "learning_rate": 1.575846879751697e-05, + "loss": 0.2543, + "step": 28075 + }, + { + "epoch": 1.31, + "learning_rate": 1.575768501246218e-05, + "loss": 0.248, + "step": 28080 + }, + { + "epoch": 1.31, + "learning_rate": 1.57569012274074e-05, + "loss": 0.3462, + "step": 28085 + }, + { + "epoch": 1.31, + "learning_rate": 1.575611744235261e-05, + "loss": 0.0377, + "step": 28090 + }, + { + "epoch": 1.31, + "learning_rate": 1.5755333657297823e-05, + "loss": 0.0877, + "step": 28095 + }, + { + "epoch": 1.31, + "learning_rate": 1.5754549872243037e-05, + "loss": 0.1138, + "step": 28100 + }, + { + "epoch": 1.31, + "learning_rate": 1.575376608718825e-05, + "loss": 0.1132, + "step": 28105 + }, + { + "epoch": 1.31, + "learning_rate": 1.5752982302133465e-05, + "loss": 0.1068, + "step": 28110 + }, + { + "epoch": 1.31, + "learning_rate": 1.575219851707868e-05, + "loss": 0.1146, + "step": 28115 + }, + { + "epoch": 1.31, + "learning_rate": 1.5751414732023892e-05, + "loss": 0.1925, + "step": 28120 + }, + { + "epoch": 1.31, + "learning_rate": 1.5750630946969106e-05, + "loss": 0.2804, + "step": 28125 + }, + { + "epoch": 1.31, + "learning_rate": 1.5749847161914317e-05, + "loss": 0.4683, + "step": 28130 + }, + { + "epoch": 1.31, + "learning_rate": 1.574906337685953e-05, + "loss": 0.3226, + "step": 28135 + }, + { + "epoch": 1.31, + "learning_rate": 1.5748279591804745e-05, + "loss": 0.063, + "step": 28140 + }, + { + "epoch": 1.31, + "learning_rate": 1.574749580674996e-05, + "loss": 0.0538, + "step": 28145 + }, + { + "epoch": 1.31, + "learning_rate": 1.5746712021695172e-05, + "loss": 0.0772, + "step": 28150 + }, + { + "epoch": 1.31, + "learning_rate": 1.5745928236640383e-05, + "loss": 0.1166, + "step": 28155 + }, + { + "epoch": 1.31, + "learning_rate": 1.57451444515856e-05, + "loss": 0.1212, + "step": 28160 + }, + { + "epoch": 1.31, + "learning_rate": 1.574436066653081e-05, + "loss": 0.1134, + "step": 28165 + }, + { + "epoch": 1.31, + "learning_rate": 1.5743576881476025e-05, + "loss": 0.2492, + "step": 28170 + }, + { + "epoch": 1.31, + "learning_rate": 1.574279309642124e-05, + "loss": 0.3189, + "step": 28175 + }, + { + "epoch": 1.31, + "learning_rate": 1.5742009311366453e-05, + "loss": 0.2254, + "step": 28180 + }, + { + "epoch": 1.32, + "learning_rate": 1.5741225526311666e-05, + "loss": 0.2818, + "step": 28185 + }, + { + "epoch": 1.32, + "learning_rate": 1.574044174125688e-05, + "loss": 0.0439, + "step": 28190 + }, + { + "epoch": 1.32, + "learning_rate": 1.573965795620209e-05, + "loss": 0.1292, + "step": 28195 + }, + { + "epoch": 1.32, + "learning_rate": 1.5738874171147305e-05, + "loss": 0.1012, + "step": 28200 + }, + { + "epoch": 1.32, + "learning_rate": 1.573809038609252e-05, + "loss": 0.08, + "step": 28205 + }, + { + "epoch": 1.32, + "learning_rate": 1.5737306601037733e-05, + "loss": 0.1199, + "step": 28210 + }, + { + "epoch": 1.32, + "learning_rate": 1.5736522815982946e-05, + "loss": 0.2438, + "step": 28215 + }, + { + "epoch": 1.32, + "learning_rate": 1.573573903092816e-05, + "loss": 0.0924, + "step": 28220 + }, + { + "epoch": 1.32, + "learning_rate": 1.5734955245873374e-05, + "loss": 0.2114, + "step": 28225 + }, + { + "epoch": 1.32, + "learning_rate": 1.5734171460818585e-05, + "loss": 0.323, + "step": 28230 + }, + { + "epoch": 1.32, + "learning_rate": 1.5733387675763802e-05, + "loss": 0.2703, + "step": 28235 + }, + { + "epoch": 1.32, + "learning_rate": 1.5732603890709013e-05, + "loss": 0.0825, + "step": 28240 + }, + { + "epoch": 1.32, + "learning_rate": 1.5731820105654227e-05, + "loss": 0.0734, + "step": 28245 + }, + { + "epoch": 1.32, + "learning_rate": 1.573103632059944e-05, + "loss": 0.0782, + "step": 28250 + }, + { + "epoch": 1.32, + "learning_rate": 1.5730252535544654e-05, + "loss": 0.1712, + "step": 28255 + }, + { + "epoch": 1.32, + "learning_rate": 1.5729468750489868e-05, + "loss": 0.1776, + "step": 28260 + }, + { + "epoch": 1.32, + "learning_rate": 1.572868496543508e-05, + "loss": 0.1425, + "step": 28265 + }, + { + "epoch": 1.32, + "learning_rate": 1.5727901180380293e-05, + "loss": 0.2537, + "step": 28270 + }, + { + "epoch": 1.32, + "learning_rate": 1.5727117395325507e-05, + "loss": 0.2227, + "step": 28275 + }, + { + "epoch": 1.32, + "learning_rate": 1.572633361027072e-05, + "loss": 0.4145, + "step": 28280 + }, + { + "epoch": 1.32, + "learning_rate": 1.5725549825215934e-05, + "loss": 0.4865, + "step": 28285 + }, + { + "epoch": 1.32, + "learning_rate": 1.5724766040161148e-05, + "loss": 0.0633, + "step": 28290 + }, + { + "epoch": 1.32, + "learning_rate": 1.572398225510636e-05, + "loss": 0.0838, + "step": 28295 + }, + { + "epoch": 1.32, + "learning_rate": 1.5723198470051576e-05, + "loss": 0.1071, + "step": 28300 + }, + { + "epoch": 1.32, + "learning_rate": 1.5722414684996787e-05, + "loss": 0.1015, + "step": 28305 + }, + { + "epoch": 1.32, + "learning_rate": 1.5721630899942e-05, + "loss": 0.0966, + "step": 28310 + }, + { + "epoch": 1.32, + "learning_rate": 1.5720847114887214e-05, + "loss": 0.2073, + "step": 28315 + }, + { + "epoch": 1.32, + "learning_rate": 1.572006332983243e-05, + "loss": 0.1782, + "step": 28320 + }, + { + "epoch": 1.32, + "learning_rate": 1.5719279544777642e-05, + "loss": 0.2457, + "step": 28325 + }, + { + "epoch": 1.32, + "learning_rate": 1.5718495759722853e-05, + "loss": 0.3423, + "step": 28330 + }, + { + "epoch": 1.32, + "learning_rate": 1.571771197466807e-05, + "loss": 0.3075, + "step": 28335 + }, + { + "epoch": 1.32, + "learning_rate": 1.571692818961328e-05, + "loss": 0.0612, + "step": 28340 + }, + { + "epoch": 1.32, + "learning_rate": 1.5716144404558494e-05, + "loss": 0.087, + "step": 28345 + }, + { + "epoch": 1.32, + "learning_rate": 1.571536061950371e-05, + "loss": 0.0356, + "step": 28350 + }, + { + "epoch": 1.32, + "learning_rate": 1.5714576834448922e-05, + "loss": 0.1484, + "step": 28355 + }, + { + "epoch": 1.32, + "learning_rate": 1.5713793049394136e-05, + "loss": 0.1245, + "step": 28360 + }, + { + "epoch": 1.32, + "learning_rate": 1.571300926433935e-05, + "loss": 0.117, + "step": 28365 + }, + { + "epoch": 1.32, + "learning_rate": 1.571222547928456e-05, + "loss": 0.187, + "step": 28370 + }, + { + "epoch": 1.32, + "learning_rate": 1.5711441694229778e-05, + "loss": 0.3141, + "step": 28375 + }, + { + "epoch": 1.32, + "learning_rate": 1.571065790917499e-05, + "loss": 0.3338, + "step": 28380 + }, + { + "epoch": 1.32, + "learning_rate": 1.5709874124120202e-05, + "loss": 0.3198, + "step": 28385 + }, + { + "epoch": 1.32, + "learning_rate": 1.5709090339065416e-05, + "loss": 0.0383, + "step": 28390 + }, + { + "epoch": 1.32, + "learning_rate": 1.5708306554010627e-05, + "loss": 0.1249, + "step": 28395 + }, + { + "epoch": 1.33, + "learning_rate": 1.5707522768955844e-05, + "loss": 0.0838, + "step": 28400 + }, + { + "epoch": 1.33, + "learning_rate": 1.5706738983901055e-05, + "loss": 0.1157, + "step": 28405 + }, + { + "epoch": 1.33, + "learning_rate": 1.570595519884627e-05, + "loss": 0.1878, + "step": 28410 + }, + { + "epoch": 1.33, + "learning_rate": 1.5705171413791482e-05, + "loss": 0.1487, + "step": 28415 + }, + { + "epoch": 1.33, + "learning_rate": 1.5704387628736696e-05, + "loss": 0.1422, + "step": 28420 + }, + { + "epoch": 1.33, + "learning_rate": 1.570360384368191e-05, + "loss": 0.1978, + "step": 28425 + }, + { + "epoch": 1.33, + "learning_rate": 1.5702820058627124e-05, + "loss": 0.3651, + "step": 28430 + }, + { + "epoch": 1.33, + "learning_rate": 1.5702036273572338e-05, + "loss": 0.2579, + "step": 28435 + }, + { + "epoch": 1.33, + "learning_rate": 1.5701252488517552e-05, + "loss": 0.0636, + "step": 28440 + }, + { + "epoch": 1.33, + "learning_rate": 1.5700468703462762e-05, + "loss": 0.069, + "step": 28445 + }, + { + "epoch": 1.33, + "learning_rate": 1.569968491840798e-05, + "loss": 0.0931, + "step": 28450 + }, + { + "epoch": 1.33, + "learning_rate": 1.569890113335319e-05, + "loss": 0.1109, + "step": 28455 + }, + { + "epoch": 1.33, + "learning_rate": 1.5698117348298404e-05, + "loss": 0.0914, + "step": 28460 + }, + { + "epoch": 1.33, + "learning_rate": 1.5697333563243618e-05, + "loss": 0.1079, + "step": 28465 + }, + { + "epoch": 1.33, + "learning_rate": 1.569654977818883e-05, + "loss": 0.2067, + "step": 28470 + }, + { + "epoch": 1.33, + "learning_rate": 1.5695765993134046e-05, + "loss": 0.2517, + "step": 28475 + }, + { + "epoch": 1.33, + "learning_rate": 1.5694982208079256e-05, + "loss": 0.3391, + "step": 28480 + }, + { + "epoch": 1.33, + "learning_rate": 1.569419842302447e-05, + "loss": 0.1997, + "step": 28485 + }, + { + "epoch": 1.33, + "learning_rate": 1.5693414637969684e-05, + "loss": 0.0238, + "step": 28490 + }, + { + "epoch": 1.33, + "learning_rate": 1.5692630852914898e-05, + "loss": 0.0778, + "step": 28495 + }, + { + "epoch": 1.33, + "learning_rate": 1.5691847067860112e-05, + "loss": 0.0956, + "step": 28500 + }, + { + "epoch": 1.33, + "learning_rate": 1.5691063282805326e-05, + "loss": 0.1695, + "step": 28505 + }, + { + "epoch": 1.33, + "learning_rate": 1.5690279497750536e-05, + "loss": 0.0927, + "step": 28510 + }, + { + "epoch": 1.33, + "learning_rate": 1.5689495712695754e-05, + "loss": 0.109, + "step": 28515 + }, + { + "epoch": 1.33, + "learning_rate": 1.5688711927640964e-05, + "loss": 0.2123, + "step": 28520 + }, + { + "epoch": 1.33, + "learning_rate": 1.5687928142586178e-05, + "loss": 0.1514, + "step": 28525 + }, + { + "epoch": 1.33, + "learning_rate": 1.5687144357531392e-05, + "loss": 0.4057, + "step": 28530 + }, + { + "epoch": 1.33, + "learning_rate": 1.5686360572476606e-05, + "loss": 0.3171, + "step": 28535 + }, + { + "epoch": 1.33, + "learning_rate": 1.568557678742182e-05, + "loss": 0.0878, + "step": 28540 + }, + { + "epoch": 1.33, + "learning_rate": 1.568479300236703e-05, + "loss": 0.0565, + "step": 28545 + }, + { + "epoch": 1.33, + "learning_rate": 1.5684009217312248e-05, + "loss": 0.1183, + "step": 28550 + }, + { + "epoch": 1.33, + "learning_rate": 1.5683225432257458e-05, + "loss": 0.1481, + "step": 28555 + }, + { + "epoch": 1.33, + "learning_rate": 1.5682441647202672e-05, + "loss": 0.1402, + "step": 28560 + }, + { + "epoch": 1.33, + "learning_rate": 1.5681657862147886e-05, + "loss": 0.1809, + "step": 28565 + }, + { + "epoch": 1.33, + "learning_rate": 1.56808740770931e-05, + "loss": 0.2217, + "step": 28570 + }, + { + "epoch": 1.33, + "learning_rate": 1.5680090292038314e-05, + "loss": 0.3258, + "step": 28575 + }, + { + "epoch": 1.33, + "learning_rate": 1.5679306506983528e-05, + "loss": 0.2286, + "step": 28580 + }, + { + "epoch": 1.33, + "learning_rate": 1.5678522721928738e-05, + "loss": 0.2848, + "step": 28585 + }, + { + "epoch": 1.33, + "learning_rate": 1.5677738936873952e-05, + "loss": 0.0547, + "step": 28590 + }, + { + "epoch": 1.33, + "learning_rate": 1.5676955151819166e-05, + "loss": 0.0618, + "step": 28595 + }, + { + "epoch": 1.33, + "learning_rate": 1.567617136676438e-05, + "loss": 0.0941, + "step": 28600 + }, + { + "epoch": 1.33, + "learning_rate": 1.5675387581709594e-05, + "loss": 0.1233, + "step": 28605 + }, + { + "epoch": 1.33, + "learning_rate": 1.5674603796654804e-05, + "loss": 0.1848, + "step": 28610 + }, + { + "epoch": 1.34, + "learning_rate": 1.567382001160002e-05, + "loss": 0.0776, + "step": 28615 + }, + { + "epoch": 1.34, + "learning_rate": 1.5673036226545232e-05, + "loss": 0.248, + "step": 28620 + }, + { + "epoch": 1.34, + "learning_rate": 1.5672252441490446e-05, + "loss": 0.232, + "step": 28625 + }, + { + "epoch": 1.34, + "learning_rate": 1.567146865643566e-05, + "loss": 0.4364, + "step": 28630 + }, + { + "epoch": 1.34, + "learning_rate": 1.5670684871380874e-05, + "loss": 0.2168, + "step": 28635 + }, + { + "epoch": 1.34, + "learning_rate": 1.5669901086326088e-05, + "loss": 0.049, + "step": 28640 + }, + { + "epoch": 1.34, + "learning_rate": 1.5669117301271302e-05, + "loss": 0.0206, + "step": 28645 + }, + { + "epoch": 1.34, + "learning_rate": 1.5668333516216516e-05, + "loss": 0.1098, + "step": 28650 + }, + { + "epoch": 1.34, + "learning_rate": 1.5667549731161726e-05, + "loss": 0.1082, + "step": 28655 + }, + { + "epoch": 1.34, + "learning_rate": 1.566676594610694e-05, + "loss": 0.1286, + "step": 28660 + }, + { + "epoch": 1.34, + "learning_rate": 1.5665982161052154e-05, + "loss": 0.2045, + "step": 28665 + }, + { + "epoch": 1.34, + "learning_rate": 1.5665198375997368e-05, + "loss": 0.1718, + "step": 28670 + }, + { + "epoch": 1.34, + "learning_rate": 1.5664414590942582e-05, + "loss": 0.3584, + "step": 28675 + }, + { + "epoch": 1.34, + "learning_rate": 1.5663630805887796e-05, + "loss": 0.4569, + "step": 28680 + }, + { + "epoch": 1.34, + "learning_rate": 1.5662847020833006e-05, + "loss": 0.3119, + "step": 28685 + }, + { + "epoch": 1.34, + "learning_rate": 1.5662063235778223e-05, + "loss": 0.0559, + "step": 28690 + }, + { + "epoch": 1.34, + "learning_rate": 1.5661279450723434e-05, + "loss": 0.0382, + "step": 28695 + }, + { + "epoch": 1.34, + "learning_rate": 1.5660495665668648e-05, + "loss": 0.1046, + "step": 28700 + }, + { + "epoch": 1.34, + "learning_rate": 1.5659711880613862e-05, + "loss": 0.1162, + "step": 28705 + }, + { + "epoch": 1.34, + "learning_rate": 1.5658928095559076e-05, + "loss": 0.1435, + "step": 28710 + }, + { + "epoch": 1.34, + "learning_rate": 1.565814431050429e-05, + "loss": 0.203, + "step": 28715 + }, + { + "epoch": 1.34, + "learning_rate": 1.56573605254495e-05, + "loss": 0.2438, + "step": 28720 + }, + { + "epoch": 1.34, + "learning_rate": 1.5656576740394714e-05, + "loss": 0.214, + "step": 28725 + }, + { + "epoch": 1.34, + "learning_rate": 1.5655792955339928e-05, + "loss": 0.369, + "step": 28730 + }, + { + "epoch": 1.34, + "learning_rate": 1.5655009170285142e-05, + "loss": 0.2569, + "step": 28735 + }, + { + "epoch": 1.34, + "learning_rate": 1.5654225385230356e-05, + "loss": 0.0718, + "step": 28740 + }, + { + "epoch": 1.34, + "learning_rate": 1.565344160017557e-05, + "loss": 0.1807, + "step": 28745 + }, + { + "epoch": 1.34, + "learning_rate": 1.5652657815120784e-05, + "loss": 0.0907, + "step": 28750 + }, + { + "epoch": 1.34, + "learning_rate": 1.5651874030065997e-05, + "loss": 0.1447, + "step": 28755 + }, + { + "epoch": 1.34, + "learning_rate": 1.5651090245011208e-05, + "loss": 0.1166, + "step": 28760 + }, + { + "epoch": 1.34, + "learning_rate": 1.5650306459956425e-05, + "loss": 0.1564, + "step": 28765 + }, + { + "epoch": 1.34, + "learning_rate": 1.5649522674901636e-05, + "loss": 0.2352, + "step": 28770 + }, + { + "epoch": 1.34, + "learning_rate": 1.564873888984685e-05, + "loss": 0.2784, + "step": 28775 + }, + { + "epoch": 1.34, + "learning_rate": 1.5647955104792064e-05, + "loss": 0.3881, + "step": 28780 + }, + { + "epoch": 1.34, + "learning_rate": 1.5647171319737274e-05, + "loss": 0.2386, + "step": 28785 + }, + { + "epoch": 1.34, + "learning_rate": 1.564638753468249e-05, + "loss": 0.0621, + "step": 28790 + }, + { + "epoch": 1.34, + "learning_rate": 1.5645603749627702e-05, + "loss": 0.0444, + "step": 28795 + }, + { + "epoch": 1.34, + "learning_rate": 1.5644819964572916e-05, + "loss": 0.0656, + "step": 28800 + }, + { + "epoch": 1.34, + "learning_rate": 1.564403617951813e-05, + "loss": 0.0765, + "step": 28805 + }, + { + "epoch": 1.34, + "learning_rate": 1.5643252394463344e-05, + "loss": 0.2032, + "step": 28810 + }, + { + "epoch": 1.34, + "learning_rate": 1.5642468609408558e-05, + "loss": 0.1976, + "step": 28815 + }, + { + "epoch": 1.34, + "learning_rate": 1.564168482435377e-05, + "loss": 0.1735, + "step": 28820 + }, + { + "epoch": 1.35, + "learning_rate": 1.5640901039298982e-05, + "loss": 0.2373, + "step": 28825 + }, + { + "epoch": 1.35, + "learning_rate": 1.56401172542442e-05, + "loss": 0.3012, + "step": 28830 + }, + { + "epoch": 1.35, + "learning_rate": 1.563933346918941e-05, + "loss": 0.3411, + "step": 28835 + }, + { + "epoch": 1.35, + "learning_rate": 1.5638549684134624e-05, + "loss": 0.0834, + "step": 28840 + }, + { + "epoch": 1.35, + "learning_rate": 1.5637765899079838e-05, + "loss": 0.0505, + "step": 28845 + }, + { + "epoch": 1.35, + "learning_rate": 1.563698211402505e-05, + "loss": 0.076, + "step": 28850 + }, + { + "epoch": 1.35, + "learning_rate": 1.5636198328970265e-05, + "loss": 0.0456, + "step": 28855 + }, + { + "epoch": 1.35, + "learning_rate": 1.5635414543915476e-05, + "loss": 0.0746, + "step": 28860 + }, + { + "epoch": 1.35, + "learning_rate": 1.5634630758860693e-05, + "loss": 0.1093, + "step": 28865 + }, + { + "epoch": 1.35, + "learning_rate": 1.5633846973805904e-05, + "loss": 0.1907, + "step": 28870 + }, + { + "epoch": 1.35, + "learning_rate": 1.5633063188751118e-05, + "loss": 0.1332, + "step": 28875 + }, + { + "epoch": 1.35, + "learning_rate": 1.563227940369633e-05, + "loss": 0.3031, + "step": 28880 + }, + { + "epoch": 1.35, + "learning_rate": 1.5631495618641545e-05, + "loss": 0.3297, + "step": 28885 + }, + { + "epoch": 1.35, + "learning_rate": 1.563071183358676e-05, + "loss": 0.0652, + "step": 28890 + }, + { + "epoch": 1.35, + "learning_rate": 1.5629928048531973e-05, + "loss": 0.0844, + "step": 28895 + }, + { + "epoch": 1.35, + "learning_rate": 1.5629144263477184e-05, + "loss": 0.1097, + "step": 28900 + }, + { + "epoch": 1.35, + "learning_rate": 1.56283604784224e-05, + "loss": 0.115, + "step": 28905 + }, + { + "epoch": 1.35, + "learning_rate": 1.562757669336761e-05, + "loss": 0.1077, + "step": 28910 + }, + { + "epoch": 1.35, + "learning_rate": 1.5626792908312826e-05, + "loss": 0.1605, + "step": 28915 + }, + { + "epoch": 1.35, + "learning_rate": 1.562600912325804e-05, + "loss": 0.2131, + "step": 28920 + }, + { + "epoch": 1.35, + "learning_rate": 1.562522533820325e-05, + "loss": 0.1889, + "step": 28925 + }, + { + "epoch": 1.35, + "learning_rate": 1.5624441553148467e-05, + "loss": 0.3687, + "step": 28930 + }, + { + "epoch": 1.35, + "learning_rate": 1.5623657768093678e-05, + "loss": 0.2025, + "step": 28935 + }, + { + "epoch": 1.35, + "learning_rate": 1.562287398303889e-05, + "loss": 0.0673, + "step": 28940 + }, + { + "epoch": 1.35, + "learning_rate": 1.5622090197984106e-05, + "loss": 0.0705, + "step": 28945 + }, + { + "epoch": 1.35, + "learning_rate": 1.562130641292932e-05, + "loss": 0.1058, + "step": 28950 + }, + { + "epoch": 1.35, + "learning_rate": 1.5620522627874533e-05, + "loss": 0.1207, + "step": 28955 + }, + { + "epoch": 1.35, + "learning_rate": 1.5619738842819747e-05, + "loss": 0.1275, + "step": 28960 + }, + { + "epoch": 1.35, + "learning_rate": 1.561895505776496e-05, + "loss": 0.2169, + "step": 28965 + }, + { + "epoch": 1.35, + "learning_rate": 1.5618171272710175e-05, + "loss": 0.1789, + "step": 28970 + }, + { + "epoch": 1.35, + "learning_rate": 1.5617387487655386e-05, + "loss": 0.2378, + "step": 28975 + }, + { + "epoch": 1.35, + "learning_rate": 1.56166037026006e-05, + "loss": 0.4098, + "step": 28980 + }, + { + "epoch": 1.35, + "learning_rate": 1.5615819917545813e-05, + "loss": 0.2499, + "step": 28985 + }, + { + "epoch": 1.35, + "learning_rate": 1.5615036132491027e-05, + "loss": 0.0793, + "step": 28990 + }, + { + "epoch": 1.35, + "learning_rate": 1.561425234743624e-05, + "loss": 0.067, + "step": 28995 + }, + { + "epoch": 1.35, + "learning_rate": 1.5613468562381452e-05, + "loss": 0.0746, + "step": 29000 + }, + { + "epoch": 1.35, + "learning_rate": 1.561268477732667e-05, + "loss": 0.2106, + "step": 29005 + }, + { + "epoch": 1.35, + "learning_rate": 1.561190099227188e-05, + "loss": 0.1199, + "step": 29010 + }, + { + "epoch": 1.35, + "learning_rate": 1.5611117207217093e-05, + "loss": 0.0969, + "step": 29015 + }, + { + "epoch": 1.35, + "learning_rate": 1.5610333422162307e-05, + "loss": 0.3277, + "step": 29020 + }, + { + "epoch": 1.35, + "learning_rate": 1.560954963710752e-05, + "loss": 0.2931, + "step": 29025 + }, + { + "epoch": 1.35, + "learning_rate": 1.5608765852052735e-05, + "loss": 0.4393, + "step": 29030 + }, + { + "epoch": 1.35, + "learning_rate": 1.560798206699795e-05, + "loss": 0.4498, + "step": 29035 + }, + { + "epoch": 1.36, + "learning_rate": 1.560719828194316e-05, + "loss": 0.0525, + "step": 29040 + }, + { + "epoch": 1.36, + "learning_rate": 1.5606414496888374e-05, + "loss": 0.0493, + "step": 29045 + }, + { + "epoch": 1.36, + "learning_rate": 1.5605630711833587e-05, + "loss": 0.0895, + "step": 29050 + }, + { + "epoch": 1.36, + "learning_rate": 1.56048469267788e-05, + "loss": 0.1352, + "step": 29055 + }, + { + "epoch": 1.36, + "learning_rate": 1.5604063141724015e-05, + "loss": 0.1055, + "step": 29060 + }, + { + "epoch": 1.36, + "learning_rate": 1.560327935666923e-05, + "loss": 0.2887, + "step": 29065 + }, + { + "epoch": 1.36, + "learning_rate": 1.5602495571614443e-05, + "loss": 0.1776, + "step": 29070 + }, + { + "epoch": 1.36, + "learning_rate": 1.5601711786559654e-05, + "loss": 0.1566, + "step": 29075 + }, + { + "epoch": 1.36, + "learning_rate": 1.560092800150487e-05, + "loss": 0.2249, + "step": 29080 + }, + { + "epoch": 1.36, + "learning_rate": 1.560014421645008e-05, + "loss": 0.4125, + "step": 29085 + }, + { + "epoch": 1.36, + "learning_rate": 1.5599360431395295e-05, + "loss": 0.1064, + "step": 29090 + }, + { + "epoch": 1.36, + "learning_rate": 1.559857664634051e-05, + "loss": 0.1015, + "step": 29095 + }, + { + "epoch": 1.36, + "learning_rate": 1.5597792861285723e-05, + "loss": 0.0806, + "step": 29100 + }, + { + "epoch": 1.36, + "learning_rate": 1.5597009076230937e-05, + "loss": 0.1206, + "step": 29105 + }, + { + "epoch": 1.36, + "learning_rate": 1.5596225291176148e-05, + "loss": 0.1109, + "step": 29110 + }, + { + "epoch": 1.36, + "learning_rate": 1.559544150612136e-05, + "loss": 0.1305, + "step": 29115 + }, + { + "epoch": 1.36, + "learning_rate": 1.5594657721066575e-05, + "loss": 0.2071, + "step": 29120 + }, + { + "epoch": 1.36, + "learning_rate": 1.559387393601179e-05, + "loss": 0.1554, + "step": 29125 + }, + { + "epoch": 1.36, + "learning_rate": 1.5593090150957003e-05, + "loss": 0.5038, + "step": 29130 + }, + { + "epoch": 1.36, + "learning_rate": 1.5592306365902217e-05, + "loss": 0.3296, + "step": 29135 + }, + { + "epoch": 1.36, + "learning_rate": 1.5591522580847428e-05, + "loss": 0.1192, + "step": 29140 + }, + { + "epoch": 1.36, + "learning_rate": 1.5590738795792645e-05, + "loss": 0.1061, + "step": 29145 + }, + { + "epoch": 1.36, + "learning_rate": 1.5589955010737855e-05, + "loss": 0.0826, + "step": 29150 + }, + { + "epoch": 1.36, + "learning_rate": 1.558917122568307e-05, + "loss": 0.1131, + "step": 29155 + }, + { + "epoch": 1.36, + "learning_rate": 1.5588387440628283e-05, + "loss": 0.1495, + "step": 29160 + }, + { + "epoch": 1.36, + "learning_rate": 1.5587603655573497e-05, + "loss": 0.2157, + "step": 29165 + }, + { + "epoch": 1.36, + "learning_rate": 1.558681987051871e-05, + "loss": 0.2206, + "step": 29170 + }, + { + "epoch": 1.36, + "learning_rate": 1.558603608546392e-05, + "loss": 0.2772, + "step": 29175 + }, + { + "epoch": 1.36, + "learning_rate": 1.558525230040914e-05, + "loss": 0.5025, + "step": 29180 + }, + { + "epoch": 1.36, + "learning_rate": 1.558446851535435e-05, + "loss": 0.3455, + "step": 29185 + }, + { + "epoch": 1.36, + "learning_rate": 1.5583684730299563e-05, + "loss": 0.0408, + "step": 29190 + }, + { + "epoch": 1.36, + "learning_rate": 1.5582900945244777e-05, + "loss": 0.0804, + "step": 29195 + }, + { + "epoch": 1.36, + "learning_rate": 1.558211716018999e-05, + "loss": 0.1028, + "step": 29200 + }, + { + "epoch": 1.36, + "learning_rate": 1.5581333375135205e-05, + "loss": 0.0895, + "step": 29205 + }, + { + "epoch": 1.36, + "learning_rate": 1.558054959008042e-05, + "loss": 0.151, + "step": 29210 + }, + { + "epoch": 1.36, + "learning_rate": 1.557976580502563e-05, + "loss": 0.1078, + "step": 29215 + }, + { + "epoch": 1.36, + "learning_rate": 1.5578982019970847e-05, + "loss": 0.287, + "step": 29220 + }, + { + "epoch": 1.36, + "learning_rate": 1.5578198234916057e-05, + "loss": 0.2512, + "step": 29225 + }, + { + "epoch": 1.36, + "learning_rate": 1.557741444986127e-05, + "loss": 0.3908, + "step": 29230 + }, + { + "epoch": 1.36, + "learning_rate": 1.5576630664806485e-05, + "loss": 0.3252, + "step": 29235 + }, + { + "epoch": 1.36, + "learning_rate": 1.5575846879751695e-05, + "loss": 0.0572, + "step": 29240 + }, + { + "epoch": 1.36, + "learning_rate": 1.5575063094696913e-05, + "loss": 0.0728, + "step": 29245 + }, + { + "epoch": 1.36, + "learning_rate": 1.5574279309642123e-05, + "loss": 0.1044, + "step": 29250 + }, + { + "epoch": 1.37, + "learning_rate": 1.5573495524587337e-05, + "loss": 0.1306, + "step": 29255 + }, + { + "epoch": 1.37, + "learning_rate": 1.557271173953255e-05, + "loss": 0.1469, + "step": 29260 + }, + { + "epoch": 1.37, + "learning_rate": 1.5571927954477765e-05, + "loss": 0.1307, + "step": 29265 + }, + { + "epoch": 1.37, + "learning_rate": 1.557114416942298e-05, + "loss": 0.2949, + "step": 29270 + }, + { + "epoch": 1.37, + "learning_rate": 1.5570360384368193e-05, + "loss": 0.1483, + "step": 29275 + }, + { + "epoch": 1.37, + "learning_rate": 1.5569576599313407e-05, + "loss": 0.3853, + "step": 29280 + }, + { + "epoch": 1.37, + "learning_rate": 1.556879281425862e-05, + "loss": 0.2308, + "step": 29285 + }, + { + "epoch": 1.37, + "learning_rate": 1.556800902920383e-05, + "loss": 0.0336, + "step": 29290 + }, + { + "epoch": 1.37, + "learning_rate": 1.556722524414905e-05, + "loss": 0.0368, + "step": 29295 + }, + { + "epoch": 1.37, + "learning_rate": 1.556644145909426e-05, + "loss": 0.0633, + "step": 29300 + }, + { + "epoch": 1.37, + "learning_rate": 1.5565657674039473e-05, + "loss": 0.1041, + "step": 29305 + }, + { + "epoch": 1.37, + "learning_rate": 1.5564873888984687e-05, + "loss": 0.2455, + "step": 29310 + }, + { + "epoch": 1.37, + "learning_rate": 1.5564090103929897e-05, + "loss": 0.128, + "step": 29315 + }, + { + "epoch": 1.37, + "learning_rate": 1.5563306318875115e-05, + "loss": 0.1876, + "step": 29320 + }, + { + "epoch": 1.37, + "learning_rate": 1.5562522533820325e-05, + "loss": 0.1958, + "step": 29325 + }, + { + "epoch": 1.37, + "learning_rate": 1.556173874876554e-05, + "loss": 0.3136, + "step": 29330 + }, + { + "epoch": 1.37, + "learning_rate": 1.5560954963710753e-05, + "loss": 0.3116, + "step": 29335 + }, + { + "epoch": 1.37, + "learning_rate": 1.5560171178655967e-05, + "loss": 0.0619, + "step": 29340 + }, + { + "epoch": 1.37, + "learning_rate": 1.555938739360118e-05, + "loss": 0.0497, + "step": 29345 + }, + { + "epoch": 1.37, + "learning_rate": 1.5558603608546395e-05, + "loss": 0.1101, + "step": 29350 + }, + { + "epoch": 1.37, + "learning_rate": 1.5557819823491605e-05, + "loss": 0.1347, + "step": 29355 + }, + { + "epoch": 1.37, + "learning_rate": 1.5557036038436822e-05, + "loss": 0.14, + "step": 29360 + }, + { + "epoch": 1.37, + "learning_rate": 1.5556252253382033e-05, + "loss": 0.2094, + "step": 29365 + }, + { + "epoch": 1.37, + "learning_rate": 1.5555468468327247e-05, + "loss": 0.2115, + "step": 29370 + }, + { + "epoch": 1.37, + "learning_rate": 1.555468468327246e-05, + "loss": 0.23, + "step": 29375 + }, + { + "epoch": 1.37, + "learning_rate": 1.5553900898217675e-05, + "loss": 0.409, + "step": 29380 + }, + { + "epoch": 1.37, + "learning_rate": 1.555311711316289e-05, + "loss": 0.2575, + "step": 29385 + }, + { + "epoch": 1.37, + "learning_rate": 1.55523333281081e-05, + "loss": 0.0701, + "step": 29390 + }, + { + "epoch": 1.37, + "learning_rate": 1.5551549543053316e-05, + "loss": 0.0587, + "step": 29395 + }, + { + "epoch": 1.37, + "learning_rate": 1.5550765757998527e-05, + "loss": 0.1614, + "step": 29400 + }, + { + "epoch": 1.37, + "learning_rate": 1.554998197294374e-05, + "loss": 0.1066, + "step": 29405 + }, + { + "epoch": 1.37, + "learning_rate": 1.5549198187888955e-05, + "loss": 0.1333, + "step": 29410 + }, + { + "epoch": 1.37, + "learning_rate": 1.554841440283417e-05, + "loss": 0.181, + "step": 29415 + }, + { + "epoch": 1.37, + "learning_rate": 1.5547630617779383e-05, + "loss": 0.183, + "step": 29420 + }, + { + "epoch": 1.37, + "learning_rate": 1.5546846832724596e-05, + "loss": 0.2203, + "step": 29425 + }, + { + "epoch": 1.37, + "learning_rate": 1.5546063047669807e-05, + "loss": 0.4626, + "step": 29430 + }, + { + "epoch": 1.37, + "learning_rate": 1.554527926261502e-05, + "loss": 0.2009, + "step": 29435 + }, + { + "epoch": 1.37, + "learning_rate": 1.5544495477560235e-05, + "loss": 0.0562, + "step": 29440 + }, + { + "epoch": 1.37, + "learning_rate": 1.554371169250545e-05, + "loss": 0.1155, + "step": 29445 + }, + { + "epoch": 1.37, + "learning_rate": 1.5542927907450663e-05, + "loss": 0.0612, + "step": 29450 + }, + { + "epoch": 1.37, + "learning_rate": 1.5542144122395873e-05, + "loss": 0.1157, + "step": 29455 + }, + { + "epoch": 1.37, + "learning_rate": 1.554136033734109e-05, + "loss": 0.1621, + "step": 29460 + }, + { + "epoch": 1.37, + "learning_rate": 1.55405765522863e-05, + "loss": 0.0789, + "step": 29465 + }, + { + "epoch": 1.38, + "learning_rate": 1.5539792767231515e-05, + "loss": 0.2351, + "step": 29470 + }, + { + "epoch": 1.38, + "learning_rate": 1.553900898217673e-05, + "loss": 0.2513, + "step": 29475 + }, + { + "epoch": 1.38, + "learning_rate": 1.5538225197121943e-05, + "loss": 0.3753, + "step": 29480 + }, + { + "epoch": 1.38, + "learning_rate": 1.5537441412067157e-05, + "loss": 0.2661, + "step": 29485 + }, + { + "epoch": 1.38, + "learning_rate": 1.553665762701237e-05, + "loss": 0.1098, + "step": 29490 + }, + { + "epoch": 1.38, + "learning_rate": 1.5535873841957584e-05, + "loss": 0.0493, + "step": 29495 + }, + { + "epoch": 1.38, + "learning_rate": 1.5535090056902795e-05, + "loss": 0.1339, + "step": 29500 + }, + { + "epoch": 1.38, + "learning_rate": 1.553430627184801e-05, + "loss": 0.1432, + "step": 29505 + }, + { + "epoch": 1.38, + "learning_rate": 1.5533522486793223e-05, + "loss": 0.107, + "step": 29510 + }, + { + "epoch": 1.38, + "learning_rate": 1.5532738701738437e-05, + "loss": 0.1993, + "step": 29515 + }, + { + "epoch": 1.38, + "learning_rate": 1.553195491668365e-05, + "loss": 0.3002, + "step": 29520 + }, + { + "epoch": 1.38, + "learning_rate": 1.5531171131628864e-05, + "loss": 0.2679, + "step": 29525 + }, + { + "epoch": 1.38, + "learning_rate": 1.5530387346574075e-05, + "loss": 0.3519, + "step": 29530 + }, + { + "epoch": 1.38, + "learning_rate": 1.5529603561519292e-05, + "loss": 0.2758, + "step": 29535 + }, + { + "epoch": 1.38, + "learning_rate": 1.5528819776464503e-05, + "loss": 0.1208, + "step": 29540 + }, + { + "epoch": 1.38, + "learning_rate": 1.5528035991409717e-05, + "loss": 0.0767, + "step": 29545 + }, + { + "epoch": 1.38, + "learning_rate": 1.552725220635493e-05, + "loss": 0.0603, + "step": 29550 + }, + { + "epoch": 1.38, + "learning_rate": 1.5526468421300144e-05, + "loss": 0.0691, + "step": 29555 + }, + { + "epoch": 1.38, + "learning_rate": 1.552568463624536e-05, + "loss": 0.1479, + "step": 29560 + }, + { + "epoch": 1.38, + "learning_rate": 1.552490085119057e-05, + "loss": 0.0891, + "step": 29565 + }, + { + "epoch": 1.38, + "learning_rate": 1.5524117066135783e-05, + "loss": 0.2104, + "step": 29570 + }, + { + "epoch": 1.38, + "learning_rate": 1.5523333281080997e-05, + "loss": 0.2748, + "step": 29575 + }, + { + "epoch": 1.38, + "learning_rate": 1.552254949602621e-05, + "loss": 0.3622, + "step": 29580 + }, + { + "epoch": 1.38, + "learning_rate": 1.5521765710971425e-05, + "loss": 0.252, + "step": 29585 + }, + { + "epoch": 1.38, + "learning_rate": 1.552098192591664e-05, + "loss": 0.079, + "step": 29590 + }, + { + "epoch": 1.38, + "learning_rate": 1.5520198140861852e-05, + "loss": 0.0913, + "step": 29595 + }, + { + "epoch": 1.38, + "learning_rate": 1.5519414355807066e-05, + "loss": 0.1742, + "step": 29600 + }, + { + "epoch": 1.38, + "learning_rate": 1.5518630570752277e-05, + "loss": 0.073, + "step": 29605 + }, + { + "epoch": 1.38, + "learning_rate": 1.5517846785697494e-05, + "loss": 0.091, + "step": 29610 + }, + { + "epoch": 1.38, + "learning_rate": 1.5517063000642705e-05, + "loss": 0.1321, + "step": 29615 + }, + { + "epoch": 1.38, + "learning_rate": 1.551627921558792e-05, + "loss": 0.1131, + "step": 29620 + }, + { + "epoch": 1.38, + "learning_rate": 1.5515495430533132e-05, + "loss": 0.3363, + "step": 29625 + }, + { + "epoch": 1.38, + "learning_rate": 1.5514711645478343e-05, + "loss": 0.3416, + "step": 29630 + }, + { + "epoch": 1.38, + "learning_rate": 1.551392786042356e-05, + "loss": 0.3418, + "step": 29635 + }, + { + "epoch": 1.38, + "learning_rate": 1.551314407536877e-05, + "loss": 0.0653, + "step": 29640 + }, + { + "epoch": 1.38, + "learning_rate": 1.5512360290313985e-05, + "loss": 0.0418, + "step": 29645 + }, + { + "epoch": 1.38, + "learning_rate": 1.55115765052592e-05, + "loss": 0.0941, + "step": 29650 + }, + { + "epoch": 1.38, + "learning_rate": 1.5510792720204412e-05, + "loss": 0.1759, + "step": 29655 + }, + { + "epoch": 1.38, + "learning_rate": 1.5510008935149626e-05, + "loss": 0.111, + "step": 29660 + }, + { + "epoch": 1.38, + "learning_rate": 1.550922515009484e-05, + "loss": 0.1351, + "step": 29665 + }, + { + "epoch": 1.38, + "learning_rate": 1.550844136504005e-05, + "loss": 0.1784, + "step": 29670 + }, + { + "epoch": 1.38, + "learning_rate": 1.5507657579985268e-05, + "loss": 0.2896, + "step": 29675 + }, + { + "epoch": 1.38, + "learning_rate": 1.550687379493048e-05, + "loss": 0.3232, + "step": 29680 + }, + { + "epoch": 1.39, + "learning_rate": 1.5506090009875692e-05, + "loss": 0.5113, + "step": 29685 + }, + { + "epoch": 1.39, + "learning_rate": 1.5505306224820906e-05, + "loss": 0.0731, + "step": 29690 + }, + { + "epoch": 1.39, + "learning_rate": 1.550452243976612e-05, + "loss": 0.0491, + "step": 29695 + }, + { + "epoch": 1.39, + "learning_rate": 1.5503738654711334e-05, + "loss": 0.1016, + "step": 29700 + }, + { + "epoch": 1.39, + "learning_rate": 1.5502954869656545e-05, + "loss": 0.0946, + "step": 29705 + }, + { + "epoch": 1.39, + "learning_rate": 1.5502171084601762e-05, + "loss": 0.1709, + "step": 29710 + }, + { + "epoch": 1.39, + "learning_rate": 1.5501387299546973e-05, + "loss": 0.2002, + "step": 29715 + }, + { + "epoch": 1.39, + "learning_rate": 1.5500603514492186e-05, + "loss": 0.2524, + "step": 29720 + }, + { + "epoch": 1.39, + "learning_rate": 1.54998197294374e-05, + "loss": 0.2481, + "step": 29725 + }, + { + "epoch": 1.39, + "learning_rate": 1.5499035944382614e-05, + "loss": 0.4631, + "step": 29730 + }, + { + "epoch": 1.39, + "learning_rate": 1.5498252159327828e-05, + "loss": 0.273, + "step": 29735 + }, + { + "epoch": 1.39, + "learning_rate": 1.5497468374273042e-05, + "loss": 0.0508, + "step": 29740 + }, + { + "epoch": 1.39, + "learning_rate": 1.5496684589218253e-05, + "loss": 0.1125, + "step": 29745 + }, + { + "epoch": 1.39, + "learning_rate": 1.549590080416347e-05, + "loss": 0.0327, + "step": 29750 + }, + { + "epoch": 1.39, + "learning_rate": 1.549511701910868e-05, + "loss": 0.1278, + "step": 29755 + }, + { + "epoch": 1.39, + "learning_rate": 1.5494333234053894e-05, + "loss": 0.0829, + "step": 29760 + }, + { + "epoch": 1.39, + "learning_rate": 1.5493549448999108e-05, + "loss": 0.1396, + "step": 29765 + }, + { + "epoch": 1.39, + "learning_rate": 1.549276566394432e-05, + "loss": 0.1894, + "step": 29770 + }, + { + "epoch": 1.39, + "learning_rate": 1.5491981878889536e-05, + "loss": 0.2046, + "step": 29775 + }, + { + "epoch": 1.39, + "learning_rate": 1.5491198093834747e-05, + "loss": 0.2948, + "step": 29780 + }, + { + "epoch": 1.39, + "learning_rate": 1.549041430877996e-05, + "loss": 0.3256, + "step": 29785 + }, + { + "epoch": 1.39, + "learning_rate": 1.5489630523725174e-05, + "loss": 0.09, + "step": 29790 + }, + { + "epoch": 1.39, + "learning_rate": 1.5488846738670388e-05, + "loss": 0.0646, + "step": 29795 + }, + { + "epoch": 1.39, + "learning_rate": 1.5488062953615602e-05, + "loss": 0.1158, + "step": 29800 + }, + { + "epoch": 1.39, + "learning_rate": 1.5487279168560816e-05, + "loss": 0.1228, + "step": 29805 + }, + { + "epoch": 1.39, + "learning_rate": 1.548649538350603e-05, + "loss": 0.1435, + "step": 29810 + }, + { + "epoch": 1.39, + "learning_rate": 1.5485711598451244e-05, + "loss": 0.1631, + "step": 29815 + }, + { + "epoch": 1.39, + "learning_rate": 1.5484927813396454e-05, + "loss": 0.0708, + "step": 29820 + }, + { + "epoch": 1.39, + "learning_rate": 1.5484144028341668e-05, + "loss": 0.4099, + "step": 29825 + }, + { + "epoch": 1.39, + "learning_rate": 1.5483360243286882e-05, + "loss": 0.4474, + "step": 29830 + }, + { + "epoch": 1.39, + "learning_rate": 1.5482576458232096e-05, + "loss": 0.3448, + "step": 29835 + }, + { + "epoch": 1.39, + "learning_rate": 1.548179267317731e-05, + "loss": 0.0606, + "step": 29840 + }, + { + "epoch": 1.39, + "learning_rate": 1.548100888812252e-05, + "loss": 0.1094, + "step": 29845 + }, + { + "epoch": 1.39, + "learning_rate": 1.5480225103067738e-05, + "loss": 0.1081, + "step": 29850 + }, + { + "epoch": 1.39, + "learning_rate": 1.547944131801295e-05, + "loss": 0.1361, + "step": 29855 + }, + { + "epoch": 1.39, + "learning_rate": 1.5478657532958162e-05, + "loss": 0.1611, + "step": 29860 + }, + { + "epoch": 1.39, + "learning_rate": 1.5477873747903376e-05, + "loss": 0.1986, + "step": 29865 + }, + { + "epoch": 1.39, + "learning_rate": 1.547708996284859e-05, + "loss": 0.1848, + "step": 29870 + }, + { + "epoch": 1.39, + "learning_rate": 1.5476306177793804e-05, + "loss": 0.3482, + "step": 29875 + }, + { + "epoch": 1.39, + "learning_rate": 1.5475522392739018e-05, + "loss": 0.4131, + "step": 29880 + }, + { + "epoch": 1.39, + "learning_rate": 1.547473860768423e-05, + "loss": 0.2946, + "step": 29885 + }, + { + "epoch": 1.39, + "learning_rate": 1.5473954822629442e-05, + "loss": 0.0356, + "step": 29890 + }, + { + "epoch": 1.39, + "learning_rate": 1.5473171037574656e-05, + "loss": 0.0547, + "step": 29895 + }, + { + "epoch": 1.4, + "learning_rate": 1.547238725251987e-05, + "loss": 0.1375, + "step": 29900 + }, + { + "epoch": 1.4, + "learning_rate": 1.5471603467465084e-05, + "loss": 0.119, + "step": 29905 + }, + { + "epoch": 1.4, + "learning_rate": 1.5470819682410298e-05, + "loss": 0.107, + "step": 29910 + }, + { + "epoch": 1.4, + "learning_rate": 1.5470035897355512e-05, + "loss": 0.1367, + "step": 29915 + }, + { + "epoch": 1.4, + "learning_rate": 1.5469252112300722e-05, + "loss": 0.147, + "step": 29920 + }, + { + "epoch": 1.4, + "learning_rate": 1.546846832724594e-05, + "loss": 0.2123, + "step": 29925 + }, + { + "epoch": 1.4, + "learning_rate": 1.546768454219115e-05, + "loss": 0.3934, + "step": 29930 + }, + { + "epoch": 1.4, + "learning_rate": 1.5466900757136364e-05, + "loss": 0.2301, + "step": 29935 + }, + { + "epoch": 1.4, + "learning_rate": 1.5466116972081578e-05, + "loss": 0.045, + "step": 29940 + }, + { + "epoch": 1.4, + "learning_rate": 1.5465333187026792e-05, + "loss": 0.092, + "step": 29945 + }, + { + "epoch": 1.4, + "learning_rate": 1.5464549401972006e-05, + "loss": 0.0908, + "step": 29950 + }, + { + "epoch": 1.4, + "learning_rate": 1.5463765616917216e-05, + "loss": 0.1098, + "step": 29955 + }, + { + "epoch": 1.4, + "learning_rate": 1.546298183186243e-05, + "loss": 0.0832, + "step": 29960 + }, + { + "epoch": 1.4, + "learning_rate": 1.5462198046807644e-05, + "loss": 0.1968, + "step": 29965 + }, + { + "epoch": 1.4, + "learning_rate": 1.5461414261752858e-05, + "loss": 0.2016, + "step": 29970 + }, + { + "epoch": 1.4, + "learning_rate": 1.5460630476698072e-05, + "loss": 0.2575, + "step": 29975 + }, + { + "epoch": 1.4, + "learning_rate": 1.5459846691643286e-05, + "loss": 0.4952, + "step": 29980 + }, + { + "epoch": 1.4, + "learning_rate": 1.5459062906588496e-05, + "loss": 0.364, + "step": 29985 + }, + { + "epoch": 1.4, + "learning_rate": 1.5458279121533714e-05, + "loss": 0.0521, + "step": 29990 + }, + { + "epoch": 1.4, + "learning_rate": 1.5457495336478924e-05, + "loss": 0.0457, + "step": 29995 + }, + { + "epoch": 1.4, + "learning_rate": 1.5456711551424138e-05, + "loss": 0.0946, + "step": 30000 + }, + { + "epoch": 1.4, + "learning_rate": 1.5455927766369352e-05, + "loss": 0.1691, + "step": 30005 + }, + { + "epoch": 1.4, + "learning_rate": 1.5455143981314566e-05, + "loss": 0.1704, + "step": 30010 + }, + { + "epoch": 1.4, + "learning_rate": 1.545436019625978e-05, + "loss": 0.1653, + "step": 30015 + }, + { + "epoch": 1.4, + "learning_rate": 1.545357641120499e-05, + "loss": 0.1592, + "step": 30020 + }, + { + "epoch": 1.4, + "learning_rate": 1.5452792626150208e-05, + "loss": 0.2251, + "step": 30025 + }, + { + "epoch": 1.4, + "learning_rate": 1.5452008841095418e-05, + "loss": 0.2223, + "step": 30030 + }, + { + "epoch": 1.4, + "learning_rate": 1.5451225056040632e-05, + "loss": 0.3359, + "step": 30035 + }, + { + "epoch": 1.4, + "learning_rate": 1.5450441270985846e-05, + "loss": 0.0442, + "step": 30040 + }, + { + "epoch": 1.4, + "learning_rate": 1.544965748593106e-05, + "loss": 0.1268, + "step": 30045 + }, + { + "epoch": 1.4, + "learning_rate": 1.5448873700876274e-05, + "loss": 0.1322, + "step": 30050 + }, + { + "epoch": 1.4, + "learning_rate": 1.5448089915821488e-05, + "loss": 0.0535, + "step": 30055 + }, + { + "epoch": 1.4, + "learning_rate": 1.5447306130766698e-05, + "loss": 0.1431, + "step": 30060 + }, + { + "epoch": 1.4, + "learning_rate": 1.5446522345711915e-05, + "loss": 0.1551, + "step": 30065 + }, + { + "epoch": 1.4, + "learning_rate": 1.5445738560657126e-05, + "loss": 0.1445, + "step": 30070 + }, + { + "epoch": 1.4, + "learning_rate": 1.544495477560234e-05, + "loss": 0.2025, + "step": 30075 + }, + { + "epoch": 1.4, + "learning_rate": 1.5444170990547554e-05, + "loss": 0.3375, + "step": 30080 + }, + { + "epoch": 1.4, + "learning_rate": 1.5443387205492764e-05, + "loss": 0.2115, + "step": 30085 + }, + { + "epoch": 1.4, + "learning_rate": 1.544260342043798e-05, + "loss": 0.0324, + "step": 30090 + }, + { + "epoch": 1.4, + "learning_rate": 1.5441819635383192e-05, + "loss": 0.0582, + "step": 30095 + }, + { + "epoch": 1.4, + "learning_rate": 1.5441035850328406e-05, + "loss": 0.0829, + "step": 30100 + }, + { + "epoch": 1.4, + "learning_rate": 1.544025206527362e-05, + "loss": 0.0776, + "step": 30105 + }, + { + "epoch": 1.4, + "learning_rate": 1.5439468280218834e-05, + "loss": 0.1564, + "step": 30110 + }, + { + "epoch": 1.41, + "learning_rate": 1.5438684495164048e-05, + "loss": 0.1717, + "step": 30115 + }, + { + "epoch": 1.41, + "learning_rate": 1.543790071010926e-05, + "loss": 0.217, + "step": 30120 + }, + { + "epoch": 1.41, + "learning_rate": 1.5437116925054476e-05, + "loss": 0.2191, + "step": 30125 + }, + { + "epoch": 1.41, + "learning_rate": 1.543633313999969e-05, + "loss": 0.3155, + "step": 30130 + }, + { + "epoch": 1.41, + "learning_rate": 1.54355493549449e-05, + "loss": 0.3595, + "step": 30135 + }, + { + "epoch": 1.41, + "learning_rate": 1.5434765569890117e-05, + "loss": 0.0571, + "step": 30140 + }, + { + "epoch": 1.41, + "learning_rate": 1.5433981784835328e-05, + "loss": 0.0568, + "step": 30145 + }, + { + "epoch": 1.41, + "learning_rate": 1.543319799978054e-05, + "loss": 0.077, + "step": 30150 + }, + { + "epoch": 1.41, + "learning_rate": 1.5432414214725756e-05, + "loss": 0.1147, + "step": 30155 + }, + { + "epoch": 1.41, + "learning_rate": 1.5431630429670966e-05, + "loss": 0.0826, + "step": 30160 + }, + { + "epoch": 1.41, + "learning_rate": 1.5430846644616183e-05, + "loss": 0.1165, + "step": 30165 + }, + { + "epoch": 1.41, + "learning_rate": 1.5430062859561394e-05, + "loss": 0.1987, + "step": 30170 + }, + { + "epoch": 1.41, + "learning_rate": 1.5429279074506608e-05, + "loss": 0.2379, + "step": 30175 + }, + { + "epoch": 1.41, + "learning_rate": 1.542849528945182e-05, + "loss": 0.5167, + "step": 30180 + }, + { + "epoch": 1.41, + "learning_rate": 1.5427711504397036e-05, + "loss": 0.2545, + "step": 30185 + }, + { + "epoch": 1.41, + "learning_rate": 1.542692771934225e-05, + "loss": 0.0899, + "step": 30190 + }, + { + "epoch": 1.41, + "learning_rate": 1.5426143934287463e-05, + "loss": 0.0671, + "step": 30195 + }, + { + "epoch": 1.41, + "learning_rate": 1.5425360149232674e-05, + "loss": 0.1145, + "step": 30200 + }, + { + "epoch": 1.41, + "learning_rate": 1.542457636417789e-05, + "loss": 0.0848, + "step": 30205 + }, + { + "epoch": 1.41, + "learning_rate": 1.5423792579123102e-05, + "loss": 0.208, + "step": 30210 + }, + { + "epoch": 1.41, + "learning_rate": 1.5423008794068316e-05, + "loss": 0.1277, + "step": 30215 + }, + { + "epoch": 1.41, + "learning_rate": 1.542222500901353e-05, + "loss": 0.182, + "step": 30220 + }, + { + "epoch": 1.41, + "learning_rate": 1.5421441223958743e-05, + "loss": 0.2234, + "step": 30225 + }, + { + "epoch": 1.41, + "learning_rate": 1.5420657438903957e-05, + "loss": 0.3056, + "step": 30230 + }, + { + "epoch": 1.41, + "learning_rate": 1.5419873653849168e-05, + "loss": 0.4144, + "step": 30235 + }, + { + "epoch": 1.41, + "learning_rate": 1.5419089868794385e-05, + "loss": 0.0343, + "step": 30240 + }, + { + "epoch": 1.41, + "learning_rate": 1.5418306083739596e-05, + "loss": 0.0805, + "step": 30245 + }, + { + "epoch": 1.41, + "learning_rate": 1.541752229868481e-05, + "loss": 0.1667, + "step": 30250 + }, + { + "epoch": 1.41, + "learning_rate": 1.5416738513630024e-05, + "loss": 0.1355, + "step": 30255 + }, + { + "epoch": 1.41, + "learning_rate": 1.5415954728575237e-05, + "loss": 0.1269, + "step": 30260 + }, + { + "epoch": 1.41, + "learning_rate": 1.541517094352045e-05, + "loss": 0.246, + "step": 30265 + }, + { + "epoch": 1.41, + "learning_rate": 1.5414387158465665e-05, + "loss": 0.1213, + "step": 30270 + }, + { + "epoch": 1.41, + "learning_rate": 1.5413603373410876e-05, + "loss": 0.169, + "step": 30275 + }, + { + "epoch": 1.41, + "learning_rate": 1.541281958835609e-05, + "loss": 0.3212, + "step": 30280 + }, + { + "epoch": 1.41, + "learning_rate": 1.5412035803301304e-05, + "loss": 0.1963, + "step": 30285 + }, + { + "epoch": 1.41, + "learning_rate": 1.5411252018246517e-05, + "loss": 0.0683, + "step": 30290 + }, + { + "epoch": 1.41, + "learning_rate": 1.541046823319173e-05, + "loss": 0.0797, + "step": 30295 + }, + { + "epoch": 1.41, + "learning_rate": 1.5409684448136942e-05, + "loss": 0.0972, + "step": 30300 + }, + { + "epoch": 1.41, + "learning_rate": 1.540890066308216e-05, + "loss": 0.1827, + "step": 30305 + }, + { + "epoch": 1.41, + "learning_rate": 1.540811687802737e-05, + "loss": 0.0909, + "step": 30310 + }, + { + "epoch": 1.41, + "learning_rate": 1.5407333092972584e-05, + "loss": 0.1743, + "step": 30315 + }, + { + "epoch": 1.41, + "learning_rate": 1.5406549307917798e-05, + "loss": 0.1385, + "step": 30320 + }, + { + "epoch": 1.42, + "learning_rate": 1.540576552286301e-05, + "loss": 0.2003, + "step": 30325 + }, + { + "epoch": 1.42, + "learning_rate": 1.5404981737808225e-05, + "loss": 0.4004, + "step": 30330 + }, + { + "epoch": 1.42, + "learning_rate": 1.540419795275344e-05, + "loss": 0.2143, + "step": 30335 + }, + { + "epoch": 1.42, + "learning_rate": 1.5403414167698653e-05, + "loss": 0.0175, + "step": 30340 + }, + { + "epoch": 1.42, + "learning_rate": 1.5402630382643864e-05, + "loss": 0.1199, + "step": 30345 + }, + { + "epoch": 1.42, + "learning_rate": 1.5401846597589078e-05, + "loss": 0.0549, + "step": 30350 + }, + { + "epoch": 1.42, + "learning_rate": 1.540106281253429e-05, + "loss": 0.1376, + "step": 30355 + }, + { + "epoch": 1.42, + "learning_rate": 1.5400279027479505e-05, + "loss": 0.1743, + "step": 30360 + }, + { + "epoch": 1.42, + "learning_rate": 1.539949524242472e-05, + "loss": 0.2547, + "step": 30365 + }, + { + "epoch": 1.42, + "learning_rate": 1.5398711457369933e-05, + "loss": 0.1769, + "step": 30370 + }, + { + "epoch": 1.42, + "learning_rate": 1.5397927672315144e-05, + "loss": 0.3749, + "step": 30375 + }, + { + "epoch": 1.42, + "learning_rate": 1.539714388726036e-05, + "loss": 0.5307, + "step": 30380 + }, + { + "epoch": 1.42, + "learning_rate": 1.539636010220557e-05, + "loss": 0.3359, + "step": 30385 + }, + { + "epoch": 1.42, + "learning_rate": 1.5395576317150785e-05, + "loss": 0.1245, + "step": 30390 + }, + { + "epoch": 1.42, + "learning_rate": 1.5394792532096e-05, + "loss": 0.0377, + "step": 30395 + }, + { + "epoch": 1.42, + "learning_rate": 1.5394008747041213e-05, + "loss": 0.1, + "step": 30400 + }, + { + "epoch": 1.42, + "learning_rate": 1.5393224961986427e-05, + "loss": 0.146, + "step": 30405 + }, + { + "epoch": 1.42, + "learning_rate": 1.5392441176931638e-05, + "loss": 0.1585, + "step": 30410 + }, + { + "epoch": 1.42, + "learning_rate": 1.539165739187685e-05, + "loss": 0.2002, + "step": 30415 + }, + { + "epoch": 1.42, + "learning_rate": 1.5390873606822065e-05, + "loss": 0.2221, + "step": 30420 + }, + { + "epoch": 1.42, + "learning_rate": 1.539008982176728e-05, + "loss": 0.2473, + "step": 30425 + }, + { + "epoch": 1.42, + "learning_rate": 1.5389306036712493e-05, + "loss": 0.3531, + "step": 30430 + }, + { + "epoch": 1.42, + "learning_rate": 1.5388522251657707e-05, + "loss": 0.3376, + "step": 30435 + }, + { + "epoch": 1.42, + "learning_rate": 1.538773846660292e-05, + "loss": 0.1212, + "step": 30440 + }, + { + "epoch": 1.42, + "learning_rate": 1.5386954681548135e-05, + "loss": 0.0599, + "step": 30445 + }, + { + "epoch": 1.42, + "learning_rate": 1.5386170896493345e-05, + "loss": 0.0923, + "step": 30450 + }, + { + "epoch": 1.42, + "learning_rate": 1.5385387111438563e-05, + "loss": 0.0752, + "step": 30455 + }, + { + "epoch": 1.42, + "learning_rate": 1.5384603326383773e-05, + "loss": 0.074, + "step": 30460 + }, + { + "epoch": 1.42, + "learning_rate": 1.5383819541328987e-05, + "loss": 0.0945, + "step": 30465 + }, + { + "epoch": 1.42, + "learning_rate": 1.53830357562742e-05, + "loss": 0.2006, + "step": 30470 + }, + { + "epoch": 1.42, + "learning_rate": 1.538225197121941e-05, + "loss": 0.2685, + "step": 30475 + }, + { + "epoch": 1.42, + "learning_rate": 1.538146818616463e-05, + "loss": 0.2831, + "step": 30480 + }, + { + "epoch": 1.42, + "learning_rate": 1.538068440110984e-05, + "loss": 0.1803, + "step": 30485 + }, + { + "epoch": 1.42, + "learning_rate": 1.5379900616055053e-05, + "loss": 0.0256, + "step": 30490 + }, + { + "epoch": 1.42, + "learning_rate": 1.5379116831000267e-05, + "loss": 0.0892, + "step": 30495 + }, + { + "epoch": 1.42, + "learning_rate": 1.537833304594548e-05, + "loss": 0.1554, + "step": 30500 + }, + { + "epoch": 1.42, + "learning_rate": 1.5377549260890695e-05, + "loss": 0.1182, + "step": 30505 + }, + { + "epoch": 1.42, + "learning_rate": 1.537676547583591e-05, + "loss": 0.1168, + "step": 30510 + }, + { + "epoch": 1.42, + "learning_rate": 1.537598169078112e-05, + "loss": 0.1539, + "step": 30515 + }, + { + "epoch": 1.42, + "learning_rate": 1.5375197905726337e-05, + "loss": 0.229, + "step": 30520 + }, + { + "epoch": 1.42, + "learning_rate": 1.5374414120671547e-05, + "loss": 0.2612, + "step": 30525 + }, + { + "epoch": 1.42, + "learning_rate": 1.537363033561676e-05, + "loss": 0.4199, + "step": 30530 + }, + { + "epoch": 1.42, + "learning_rate": 1.5372846550561975e-05, + "loss": 0.3006, + "step": 30535 + }, + { + "epoch": 1.43, + "learning_rate": 1.537206276550719e-05, + "loss": 0.0404, + "step": 30540 + }, + { + "epoch": 1.43, + "learning_rate": 1.5371278980452403e-05, + "loss": 0.0872, + "step": 30545 + }, + { + "epoch": 1.43, + "learning_rate": 1.5370495195397613e-05, + "loss": 0.1091, + "step": 30550 + }, + { + "epoch": 1.43, + "learning_rate": 1.536971141034283e-05, + "loss": 0.0654, + "step": 30555 + }, + { + "epoch": 1.43, + "learning_rate": 1.536892762528804e-05, + "loss": 0.1402, + "step": 30560 + }, + { + "epoch": 1.43, + "learning_rate": 1.5368143840233255e-05, + "loss": 0.1676, + "step": 30565 + }, + { + "epoch": 1.43, + "learning_rate": 1.536736005517847e-05, + "loss": 0.1912, + "step": 30570 + }, + { + "epoch": 1.43, + "learning_rate": 1.5366576270123683e-05, + "loss": 0.3351, + "step": 30575 + }, + { + "epoch": 1.43, + "learning_rate": 1.5365792485068897e-05, + "loss": 0.367, + "step": 30580 + }, + { + "epoch": 1.43, + "learning_rate": 1.536500870001411e-05, + "loss": 0.2385, + "step": 30585 + }, + { + "epoch": 1.43, + "learning_rate": 1.536422491495932e-05, + "loss": 0.0754, + "step": 30590 + }, + { + "epoch": 1.43, + "learning_rate": 1.536344112990454e-05, + "loss": 0.0664, + "step": 30595 + }, + { + "epoch": 1.43, + "learning_rate": 1.536265734484975e-05, + "loss": 0.1467, + "step": 30600 + }, + { + "epoch": 1.43, + "learning_rate": 1.5361873559794963e-05, + "loss": 0.0865, + "step": 30605 + }, + { + "epoch": 1.43, + "learning_rate": 1.5361089774740177e-05, + "loss": 0.1208, + "step": 30610 + }, + { + "epoch": 1.43, + "learning_rate": 1.5360305989685387e-05, + "loss": 0.1351, + "step": 30615 + }, + { + "epoch": 1.43, + "learning_rate": 1.5359522204630605e-05, + "loss": 0.1539, + "step": 30620 + }, + { + "epoch": 1.43, + "learning_rate": 1.5358738419575815e-05, + "loss": 0.2201, + "step": 30625 + }, + { + "epoch": 1.43, + "learning_rate": 1.535795463452103e-05, + "loss": 0.3883, + "step": 30630 + }, + { + "epoch": 1.43, + "learning_rate": 1.5357170849466243e-05, + "loss": 0.2628, + "step": 30635 + }, + { + "epoch": 1.43, + "learning_rate": 1.5356387064411457e-05, + "loss": 0.0461, + "step": 30640 + }, + { + "epoch": 1.43, + "learning_rate": 1.535560327935667e-05, + "loss": 0.0915, + "step": 30645 + }, + { + "epoch": 1.43, + "learning_rate": 1.5354819494301885e-05, + "loss": 0.0698, + "step": 30650 + }, + { + "epoch": 1.43, + "learning_rate": 1.53540357092471e-05, + "loss": 0.1448, + "step": 30655 + }, + { + "epoch": 1.43, + "learning_rate": 1.5353251924192313e-05, + "loss": 0.1386, + "step": 30660 + }, + { + "epoch": 1.43, + "learning_rate": 1.5352468139137523e-05, + "loss": 0.169, + "step": 30665 + }, + { + "epoch": 1.43, + "learning_rate": 1.5351684354082737e-05, + "loss": 0.1716, + "step": 30670 + }, + { + "epoch": 1.43, + "learning_rate": 1.535090056902795e-05, + "loss": 0.1982, + "step": 30675 + }, + { + "epoch": 1.43, + "learning_rate": 1.5350116783973165e-05, + "loss": 0.3828, + "step": 30680 + }, + { + "epoch": 1.43, + "learning_rate": 1.534933299891838e-05, + "loss": 0.2975, + "step": 30685 + }, + { + "epoch": 1.43, + "learning_rate": 1.534854921386359e-05, + "loss": 0.0999, + "step": 30690 + }, + { + "epoch": 1.43, + "learning_rate": 1.5347765428808807e-05, + "loss": 0.0746, + "step": 30695 + }, + { + "epoch": 1.43, + "learning_rate": 1.5346981643754017e-05, + "loss": 0.0579, + "step": 30700 + }, + { + "epoch": 1.43, + "learning_rate": 1.534619785869923e-05, + "loss": 0.1278, + "step": 30705 + }, + { + "epoch": 1.43, + "learning_rate": 1.5345414073644445e-05, + "loss": 0.1335, + "step": 30710 + }, + { + "epoch": 1.43, + "learning_rate": 1.534463028858966e-05, + "loss": 0.1177, + "step": 30715 + }, + { + "epoch": 1.43, + "learning_rate": 1.5343846503534873e-05, + "loss": 0.1861, + "step": 30720 + }, + { + "epoch": 1.43, + "learning_rate": 1.5343062718480087e-05, + "loss": 0.2215, + "step": 30725 + }, + { + "epoch": 1.43, + "learning_rate": 1.5342278933425297e-05, + "loss": 0.3913, + "step": 30730 + }, + { + "epoch": 1.43, + "learning_rate": 1.5341495148370514e-05, + "loss": 0.332, + "step": 30735 + }, + { + "epoch": 1.43, + "learning_rate": 1.5340711363315725e-05, + "loss": 0.0479, + "step": 30740 + }, + { + "epoch": 1.43, + "learning_rate": 1.533992757826094e-05, + "loss": 0.0902, + "step": 30745 + }, + { + "epoch": 1.43, + "learning_rate": 1.5339143793206153e-05, + "loss": 0.0383, + "step": 30750 + }, + { + "epoch": 1.44, + "learning_rate": 1.5338360008151367e-05, + "loss": 0.0873, + "step": 30755 + }, + { + "epoch": 1.44, + "learning_rate": 1.533757622309658e-05, + "loss": 0.058, + "step": 30760 + }, + { + "epoch": 1.44, + "learning_rate": 1.533679243804179e-05, + "loss": 0.1621, + "step": 30765 + }, + { + "epoch": 1.44, + "learning_rate": 1.533600865298701e-05, + "loss": 0.1878, + "step": 30770 + }, + { + "epoch": 1.44, + "learning_rate": 1.533522486793222e-05, + "loss": 0.3271, + "step": 30775 + }, + { + "epoch": 1.44, + "learning_rate": 1.5334441082877433e-05, + "loss": 0.2911, + "step": 30780 + }, + { + "epoch": 1.44, + "learning_rate": 1.5333657297822647e-05, + "loss": 0.2349, + "step": 30785 + }, + { + "epoch": 1.44, + "learning_rate": 1.533287351276786e-05, + "loss": 0.1201, + "step": 30790 + }, + { + "epoch": 1.44, + "learning_rate": 1.5332089727713075e-05, + "loss": 0.0789, + "step": 30795 + }, + { + "epoch": 1.44, + "learning_rate": 1.533130594265829e-05, + "loss": 0.0823, + "step": 30800 + }, + { + "epoch": 1.44, + "learning_rate": 1.53305221576035e-05, + "loss": 0.1153, + "step": 30805 + }, + { + "epoch": 1.44, + "learning_rate": 1.5329738372548713e-05, + "loss": 0.1054, + "step": 30810 + }, + { + "epoch": 1.44, + "learning_rate": 1.5328954587493927e-05, + "loss": 0.1376, + "step": 30815 + }, + { + "epoch": 1.44, + "learning_rate": 1.532817080243914e-05, + "loss": 0.1513, + "step": 30820 + }, + { + "epoch": 1.44, + "learning_rate": 1.5327387017384355e-05, + "loss": 0.2324, + "step": 30825 + }, + { + "epoch": 1.44, + "learning_rate": 1.5326603232329565e-05, + "loss": 0.3203, + "step": 30830 + }, + { + "epoch": 1.44, + "learning_rate": 1.5325819447274782e-05, + "loss": 0.4077, + "step": 30835 + }, + { + "epoch": 1.44, + "learning_rate": 1.5325035662219993e-05, + "loss": 0.0783, + "step": 30840 + }, + { + "epoch": 1.44, + "learning_rate": 1.5324251877165207e-05, + "loss": 0.0441, + "step": 30845 + }, + { + "epoch": 1.44, + "learning_rate": 1.532346809211042e-05, + "loss": 0.0616, + "step": 30850 + }, + { + "epoch": 1.44, + "learning_rate": 1.5322684307055635e-05, + "loss": 0.1068, + "step": 30855 + }, + { + "epoch": 1.44, + "learning_rate": 1.532190052200085e-05, + "loss": 0.1137, + "step": 30860 + }, + { + "epoch": 1.44, + "learning_rate": 1.5321116736946062e-05, + "loss": 0.2083, + "step": 30865 + }, + { + "epoch": 1.44, + "learning_rate": 1.5320332951891276e-05, + "loss": 0.1234, + "step": 30870 + }, + { + "epoch": 1.44, + "learning_rate": 1.5319549166836487e-05, + "loss": 0.2183, + "step": 30875 + }, + { + "epoch": 1.44, + "learning_rate": 1.53187653817817e-05, + "loss": 0.3248, + "step": 30880 + }, + { + "epoch": 1.44, + "learning_rate": 1.5317981596726915e-05, + "loss": 0.2879, + "step": 30885 + }, + { + "epoch": 1.44, + "learning_rate": 1.531719781167213e-05, + "loss": 0.0633, + "step": 30890 + }, + { + "epoch": 1.44, + "learning_rate": 1.5316414026617342e-05, + "loss": 0.0492, + "step": 30895 + }, + { + "epoch": 1.44, + "learning_rate": 1.5315630241562556e-05, + "loss": 0.0931, + "step": 30900 + }, + { + "epoch": 1.44, + "learning_rate": 1.5314846456507767e-05, + "loss": 0.0612, + "step": 30905 + }, + { + "epoch": 1.44, + "learning_rate": 1.5314062671452984e-05, + "loss": 0.0813, + "step": 30910 + }, + { + "epoch": 1.44, + "learning_rate": 1.5313278886398195e-05, + "loss": 0.1622, + "step": 30915 + }, + { + "epoch": 1.44, + "learning_rate": 1.531249510134341e-05, + "loss": 0.1378, + "step": 30920 + }, + { + "epoch": 1.44, + "learning_rate": 1.5311711316288623e-05, + "loss": 0.1609, + "step": 30925 + }, + { + "epoch": 1.44, + "learning_rate": 1.5310927531233836e-05, + "loss": 0.5023, + "step": 30930 + }, + { + "epoch": 1.44, + "learning_rate": 1.531014374617905e-05, + "loss": 0.3593, + "step": 30935 + }, + { + "epoch": 1.44, + "learning_rate": 1.530935996112426e-05, + "loss": 0.1545, + "step": 30940 + }, + { + "epoch": 1.44, + "learning_rate": 1.5308576176069475e-05, + "loss": 0.0696, + "step": 30945 + }, + { + "epoch": 1.44, + "learning_rate": 1.530779239101469e-05, + "loss": 0.0694, + "step": 30950 + }, + { + "epoch": 1.44, + "learning_rate": 1.5307008605959903e-05, + "loss": 0.1546, + "step": 30955 + }, + { + "epoch": 1.44, + "learning_rate": 1.5306224820905116e-05, + "loss": 0.1018, + "step": 30960 + }, + { + "epoch": 1.44, + "learning_rate": 1.530544103585033e-05, + "loss": 0.1363, + "step": 30965 + }, + { + "epoch": 1.45, + "learning_rate": 1.5304657250795544e-05, + "loss": 0.3256, + "step": 30970 + }, + { + "epoch": 1.45, + "learning_rate": 1.5303873465740758e-05, + "loss": 0.3606, + "step": 30975 + }, + { + "epoch": 1.45, + "learning_rate": 1.530308968068597e-05, + "loss": 0.331, + "step": 30980 + }, + { + "epoch": 1.45, + "learning_rate": 1.5302305895631186e-05, + "loss": 0.2471, + "step": 30985 + }, + { + "epoch": 1.45, + "learning_rate": 1.5301522110576397e-05, + "loss": 0.042, + "step": 30990 + }, + { + "epoch": 1.45, + "learning_rate": 1.530073832552161e-05, + "loss": 0.0757, + "step": 30995 + }, + { + "epoch": 1.45, + "learning_rate": 1.5299954540466824e-05, + "loss": 0.0695, + "step": 31000 + }, + { + "epoch": 1.45, + "learning_rate": 1.5299170755412035e-05, + "loss": 0.1101, + "step": 31005 + }, + { + "epoch": 1.45, + "learning_rate": 1.5298386970357252e-05, + "loss": 0.1784, + "step": 31010 + }, + { + "epoch": 1.45, + "learning_rate": 1.5297603185302463e-05, + "loss": 0.1233, + "step": 31015 + }, + { + "epoch": 1.45, + "learning_rate": 1.5296819400247677e-05, + "loss": 0.2507, + "step": 31020 + }, + { + "epoch": 1.45, + "learning_rate": 1.529603561519289e-05, + "loss": 0.248, + "step": 31025 + }, + { + "epoch": 1.45, + "learning_rate": 1.5295251830138104e-05, + "loss": 0.4157, + "step": 31030 + }, + { + "epoch": 1.45, + "learning_rate": 1.5294468045083318e-05, + "loss": 0.2699, + "step": 31035 + }, + { + "epoch": 1.45, + "learning_rate": 1.5293684260028532e-05, + "loss": 0.1, + "step": 31040 + }, + { + "epoch": 1.45, + "learning_rate": 1.5292900474973743e-05, + "loss": 0.0886, + "step": 31045 + }, + { + "epoch": 1.45, + "learning_rate": 1.529211668991896e-05, + "loss": 0.0945, + "step": 31050 + }, + { + "epoch": 1.45, + "learning_rate": 1.529133290486417e-05, + "loss": 0.1045, + "step": 31055 + }, + { + "epoch": 1.45, + "learning_rate": 1.5290549119809384e-05, + "loss": 0.1411, + "step": 31060 + }, + { + "epoch": 1.45, + "learning_rate": 1.52897653347546e-05, + "loss": 0.167, + "step": 31065 + }, + { + "epoch": 1.45, + "learning_rate": 1.5288981549699812e-05, + "loss": 0.2069, + "step": 31070 + }, + { + "epoch": 1.45, + "learning_rate": 1.5288197764645026e-05, + "loss": 0.1847, + "step": 31075 + }, + { + "epoch": 1.45, + "learning_rate": 1.5287413979590237e-05, + "loss": 0.2041, + "step": 31080 + }, + { + "epoch": 1.45, + "learning_rate": 1.5286630194535454e-05, + "loss": 0.2017, + "step": 31085 + }, + { + "epoch": 1.45, + "learning_rate": 1.5285846409480664e-05, + "loss": 0.0352, + "step": 31090 + }, + { + "epoch": 1.45, + "learning_rate": 1.528506262442588e-05, + "loss": 0.0676, + "step": 31095 + }, + { + "epoch": 1.45, + "learning_rate": 1.5284278839371092e-05, + "loss": 0.1651, + "step": 31100 + }, + { + "epoch": 1.45, + "learning_rate": 1.5283495054316306e-05, + "loss": 0.095, + "step": 31105 + }, + { + "epoch": 1.45, + "learning_rate": 1.528271126926152e-05, + "loss": 0.1385, + "step": 31110 + }, + { + "epoch": 1.45, + "learning_rate": 1.5281927484206734e-05, + "loss": 0.16, + "step": 31115 + }, + { + "epoch": 1.45, + "learning_rate": 1.5281143699151944e-05, + "loss": 0.2099, + "step": 31120 + }, + { + "epoch": 1.45, + "learning_rate": 1.5280359914097162e-05, + "loss": 0.3167, + "step": 31125 + }, + { + "epoch": 1.45, + "learning_rate": 1.5279576129042372e-05, + "loss": 0.2805, + "step": 31130 + }, + { + "epoch": 1.45, + "learning_rate": 1.5278792343987586e-05, + "loss": 0.2409, + "step": 31135 + }, + { + "epoch": 1.45, + "learning_rate": 1.52780085589328e-05, + "loss": 0.0747, + "step": 31140 + }, + { + "epoch": 1.45, + "learning_rate": 1.527722477387801e-05, + "loss": 0.0464, + "step": 31145 + }, + { + "epoch": 1.45, + "learning_rate": 1.5276440988823228e-05, + "loss": 0.0867, + "step": 31150 + }, + { + "epoch": 1.45, + "learning_rate": 1.527565720376844e-05, + "loss": 0.1238, + "step": 31155 + }, + { + "epoch": 1.45, + "learning_rate": 1.5274873418713652e-05, + "loss": 0.1296, + "step": 31160 + }, + { + "epoch": 1.45, + "learning_rate": 1.5274089633658866e-05, + "loss": 0.1621, + "step": 31165 + }, + { + "epoch": 1.45, + "learning_rate": 1.527330584860408e-05, + "loss": 0.1919, + "step": 31170 + }, + { + "epoch": 1.45, + "learning_rate": 1.5272522063549294e-05, + "loss": 0.3081, + "step": 31175 + }, + { + "epoch": 1.45, + "learning_rate": 1.5271738278494508e-05, + "loss": 0.3428, + "step": 31180 + }, + { + "epoch": 1.46, + "learning_rate": 1.5270954493439722e-05, + "loss": 0.273, + "step": 31185 + }, + { + "epoch": 1.46, + "learning_rate": 1.5270170708384936e-05, + "loss": 0.1131, + "step": 31190 + }, + { + "epoch": 1.46, + "learning_rate": 1.5269386923330146e-05, + "loss": 0.0283, + "step": 31195 + }, + { + "epoch": 1.46, + "learning_rate": 1.526860313827536e-05, + "loss": 0.0582, + "step": 31200 + }, + { + "epoch": 1.46, + "learning_rate": 1.5267819353220574e-05, + "loss": 0.0585, + "step": 31205 + }, + { + "epoch": 1.46, + "learning_rate": 1.5267035568165788e-05, + "loss": 0.0735, + "step": 31210 + }, + { + "epoch": 1.46, + "learning_rate": 1.5266251783111002e-05, + "loss": 0.272, + "step": 31215 + }, + { + "epoch": 1.46, + "learning_rate": 1.5265467998056212e-05, + "loss": 0.1506, + "step": 31220 + }, + { + "epoch": 1.46, + "learning_rate": 1.526468421300143e-05, + "loss": 0.2312, + "step": 31225 + }, + { + "epoch": 1.46, + "learning_rate": 1.526390042794664e-05, + "loss": 0.2993, + "step": 31230 + }, + { + "epoch": 1.46, + "learning_rate": 1.5263116642891854e-05, + "loss": 0.3461, + "step": 31235 + }, + { + "epoch": 1.46, + "learning_rate": 1.5262332857837068e-05, + "loss": 0.0388, + "step": 31240 + }, + { + "epoch": 1.46, + "learning_rate": 1.5261549072782282e-05, + "loss": 0.0356, + "step": 31245 + }, + { + "epoch": 1.46, + "learning_rate": 1.5260765287727496e-05, + "loss": 0.1398, + "step": 31250 + }, + { + "epoch": 1.46, + "learning_rate": 1.525998150267271e-05, + "loss": 0.0815, + "step": 31255 + }, + { + "epoch": 1.46, + "learning_rate": 1.525919771761792e-05, + "loss": 0.1347, + "step": 31260 + }, + { + "epoch": 1.46, + "learning_rate": 1.5258413932563134e-05, + "loss": 0.1489, + "step": 31265 + }, + { + "epoch": 1.46, + "learning_rate": 1.5257630147508348e-05, + "loss": 0.1687, + "step": 31270 + }, + { + "epoch": 1.46, + "learning_rate": 1.5256846362453562e-05, + "loss": 0.1338, + "step": 31275 + }, + { + "epoch": 1.46, + "learning_rate": 1.5256062577398776e-05, + "loss": 0.3636, + "step": 31280 + }, + { + "epoch": 1.46, + "learning_rate": 1.5255278792343988e-05, + "loss": 0.2142, + "step": 31285 + }, + { + "epoch": 1.46, + "learning_rate": 1.5254495007289202e-05, + "loss": 0.0323, + "step": 31290 + }, + { + "epoch": 1.46, + "learning_rate": 1.5253711222234416e-05, + "loss": 0.0875, + "step": 31295 + }, + { + "epoch": 1.46, + "learning_rate": 1.525292743717963e-05, + "loss": 0.0987, + "step": 31300 + }, + { + "epoch": 1.46, + "learning_rate": 1.5252143652124842e-05, + "loss": 0.1468, + "step": 31305 + }, + { + "epoch": 1.46, + "learning_rate": 1.5251359867070058e-05, + "loss": 0.0654, + "step": 31310 + }, + { + "epoch": 1.46, + "learning_rate": 1.525057608201527e-05, + "loss": 0.1286, + "step": 31315 + }, + { + "epoch": 1.46, + "learning_rate": 1.5249792296960484e-05, + "loss": 0.1423, + "step": 31320 + }, + { + "epoch": 1.46, + "learning_rate": 1.5249008511905696e-05, + "loss": 0.2072, + "step": 31325 + }, + { + "epoch": 1.46, + "learning_rate": 1.5248224726850908e-05, + "loss": 0.3649, + "step": 31330 + }, + { + "epoch": 1.46, + "learning_rate": 1.5247440941796124e-05, + "loss": 0.366, + "step": 31335 + }, + { + "epoch": 1.46, + "learning_rate": 1.5246657156741336e-05, + "loss": 0.0274, + "step": 31340 + }, + { + "epoch": 1.46, + "learning_rate": 1.524587337168655e-05, + "loss": 0.0756, + "step": 31345 + }, + { + "epoch": 1.46, + "learning_rate": 1.5245089586631762e-05, + "loss": 0.1292, + "step": 31350 + }, + { + "epoch": 1.46, + "learning_rate": 1.5244305801576978e-05, + "loss": 0.1784, + "step": 31355 + }, + { + "epoch": 1.46, + "learning_rate": 1.524352201652219e-05, + "loss": 0.1282, + "step": 31360 + }, + { + "epoch": 1.46, + "learning_rate": 1.5242738231467404e-05, + "loss": 0.2259, + "step": 31365 + }, + { + "epoch": 1.46, + "learning_rate": 1.5241954446412616e-05, + "loss": 0.2356, + "step": 31370 + }, + { + "epoch": 1.46, + "learning_rate": 1.5241170661357832e-05, + "loss": 0.2162, + "step": 31375 + }, + { + "epoch": 1.46, + "learning_rate": 1.5240386876303044e-05, + "loss": 0.2779, + "step": 31380 + }, + { + "epoch": 1.46, + "learning_rate": 1.5239603091248258e-05, + "loss": 0.3447, + "step": 31385 + }, + { + "epoch": 1.46, + "learning_rate": 1.523881930619347e-05, + "loss": 0.0535, + "step": 31390 + }, + { + "epoch": 1.46, + "learning_rate": 1.5238035521138684e-05, + "loss": 0.1216, + "step": 31395 + }, + { + "epoch": 1.47, + "learning_rate": 1.5237251736083898e-05, + "loss": 0.0795, + "step": 31400 + }, + { + "epoch": 1.47, + "learning_rate": 1.523646795102911e-05, + "loss": 0.1059, + "step": 31405 + }, + { + "epoch": 1.47, + "learning_rate": 1.5235684165974326e-05, + "loss": 0.079, + "step": 31410 + }, + { + "epoch": 1.47, + "learning_rate": 1.5234900380919538e-05, + "loss": 0.1216, + "step": 31415 + }, + { + "epoch": 1.47, + "learning_rate": 1.5234116595864752e-05, + "loss": 0.1635, + "step": 31420 + }, + { + "epoch": 1.47, + "learning_rate": 1.5233332810809964e-05, + "loss": 0.1558, + "step": 31425 + }, + { + "epoch": 1.47, + "learning_rate": 1.523254902575518e-05, + "loss": 0.405, + "step": 31430 + }, + { + "epoch": 1.47, + "learning_rate": 1.5231765240700392e-05, + "loss": 0.4419, + "step": 31435 + }, + { + "epoch": 1.47, + "learning_rate": 1.5230981455645606e-05, + "loss": 0.0325, + "step": 31440 + }, + { + "epoch": 1.47, + "learning_rate": 1.5230197670590818e-05, + "loss": 0.1278, + "step": 31445 + }, + { + "epoch": 1.47, + "learning_rate": 1.5229413885536033e-05, + "loss": 0.1036, + "step": 31450 + }, + { + "epoch": 1.47, + "learning_rate": 1.5228630100481246e-05, + "loss": 0.1107, + "step": 31455 + }, + { + "epoch": 1.47, + "learning_rate": 1.5227846315426458e-05, + "loss": 0.0893, + "step": 31460 + }, + { + "epoch": 1.47, + "learning_rate": 1.5227062530371672e-05, + "loss": 0.1751, + "step": 31465 + }, + { + "epoch": 1.47, + "learning_rate": 1.5226278745316884e-05, + "loss": 0.1661, + "step": 31470 + }, + { + "epoch": 1.47, + "learning_rate": 1.52254949602621e-05, + "loss": 0.1933, + "step": 31475 + }, + { + "epoch": 1.47, + "learning_rate": 1.5224711175207312e-05, + "loss": 0.2263, + "step": 31480 + }, + { + "epoch": 1.47, + "learning_rate": 1.5223927390152526e-05, + "loss": 0.3486, + "step": 31485 + }, + { + "epoch": 1.47, + "learning_rate": 1.522314360509774e-05, + "loss": 0.0391, + "step": 31490 + }, + { + "epoch": 1.47, + "learning_rate": 1.5222359820042954e-05, + "loss": 0.0702, + "step": 31495 + }, + { + "epoch": 1.47, + "learning_rate": 1.5221576034988166e-05, + "loss": 0.0595, + "step": 31500 + }, + { + "epoch": 1.47, + "learning_rate": 1.522079224993338e-05, + "loss": 0.0184, + "step": 31505 + }, + { + "epoch": 1.47, + "learning_rate": 1.5220008464878594e-05, + "loss": 0.1383, + "step": 31510 + }, + { + "epoch": 1.47, + "learning_rate": 1.5219224679823807e-05, + "loss": 0.1519, + "step": 31515 + }, + { + "epoch": 1.47, + "learning_rate": 1.521844089476902e-05, + "loss": 0.1777, + "step": 31520 + }, + { + "epoch": 1.47, + "learning_rate": 1.5217657109714232e-05, + "loss": 0.2411, + "step": 31525 + }, + { + "epoch": 1.47, + "learning_rate": 1.5216873324659448e-05, + "loss": 0.3028, + "step": 31530 + }, + { + "epoch": 1.47, + "learning_rate": 1.521608953960466e-05, + "loss": 0.379, + "step": 31535 + }, + { + "epoch": 1.47, + "learning_rate": 1.5215305754549874e-05, + "loss": 0.0478, + "step": 31540 + }, + { + "epoch": 1.47, + "learning_rate": 1.5214521969495086e-05, + "loss": 0.0623, + "step": 31545 + }, + { + "epoch": 1.47, + "learning_rate": 1.5213738184440301e-05, + "loss": 0.0523, + "step": 31550 + }, + { + "epoch": 1.47, + "learning_rate": 1.5212954399385514e-05, + "loss": 0.1509, + "step": 31555 + }, + { + "epoch": 1.47, + "learning_rate": 1.5212170614330728e-05, + "loss": 0.1149, + "step": 31560 + }, + { + "epoch": 1.47, + "learning_rate": 1.521138682927594e-05, + "loss": 0.1829, + "step": 31565 + }, + { + "epoch": 1.47, + "learning_rate": 1.5210603044221155e-05, + "loss": 0.1609, + "step": 31570 + }, + { + "epoch": 1.47, + "learning_rate": 1.5209819259166368e-05, + "loss": 0.2295, + "step": 31575 + }, + { + "epoch": 1.47, + "learning_rate": 1.5209035474111581e-05, + "loss": 0.486, + "step": 31580 + }, + { + "epoch": 1.47, + "learning_rate": 1.5208251689056794e-05, + "loss": 0.2702, + "step": 31585 + }, + { + "epoch": 1.47, + "learning_rate": 1.5207467904002008e-05, + "loss": 0.0594, + "step": 31590 + }, + { + "epoch": 1.47, + "learning_rate": 1.5206684118947222e-05, + "loss": 0.1273, + "step": 31595 + }, + { + "epoch": 1.47, + "learning_rate": 1.5205900333892434e-05, + "loss": 0.0883, + "step": 31600 + }, + { + "epoch": 1.47, + "learning_rate": 1.5205116548837648e-05, + "loss": 0.0647, + "step": 31605 + }, + { + "epoch": 1.47, + "learning_rate": 1.5204332763782862e-05, + "loss": 0.0872, + "step": 31610 + }, + { + "epoch": 1.48, + "learning_rate": 1.5203548978728075e-05, + "loss": 0.1148, + "step": 31615 + }, + { + "epoch": 1.48, + "learning_rate": 1.5202765193673288e-05, + "loss": 0.1226, + "step": 31620 + }, + { + "epoch": 1.48, + "learning_rate": 1.5201981408618503e-05, + "loss": 0.2385, + "step": 31625 + }, + { + "epoch": 1.48, + "learning_rate": 1.5201197623563715e-05, + "loss": 0.3862, + "step": 31630 + }, + { + "epoch": 1.48, + "learning_rate": 1.520041383850893e-05, + "loss": 0.3206, + "step": 31635 + }, + { + "epoch": 1.48, + "learning_rate": 1.5199630053454142e-05, + "loss": 0.0743, + "step": 31640 + }, + { + "epoch": 1.48, + "learning_rate": 1.5198846268399357e-05, + "loss": 0.0545, + "step": 31645 + }, + { + "epoch": 1.48, + "learning_rate": 1.519806248334457e-05, + "loss": 0.0978, + "step": 31650 + }, + { + "epoch": 1.48, + "learning_rate": 1.5197278698289782e-05, + "loss": 0.0675, + "step": 31655 + }, + { + "epoch": 1.48, + "learning_rate": 1.5196494913234995e-05, + "loss": 0.1666, + "step": 31660 + }, + { + "epoch": 1.48, + "learning_rate": 1.5195711128180208e-05, + "loss": 0.2834, + "step": 31665 + }, + { + "epoch": 1.48, + "learning_rate": 1.5194927343125423e-05, + "loss": 0.1719, + "step": 31670 + }, + { + "epoch": 1.48, + "learning_rate": 1.5194143558070636e-05, + "loss": 0.2468, + "step": 31675 + }, + { + "epoch": 1.48, + "learning_rate": 1.519335977301585e-05, + "loss": 0.3821, + "step": 31680 + }, + { + "epoch": 1.48, + "learning_rate": 1.5192575987961062e-05, + "loss": 0.3466, + "step": 31685 + }, + { + "epoch": 1.48, + "learning_rate": 1.5191792202906277e-05, + "loss": 0.0438, + "step": 31690 + }, + { + "epoch": 1.48, + "learning_rate": 1.519100841785149e-05, + "loss": 0.066, + "step": 31695 + }, + { + "epoch": 1.48, + "learning_rate": 1.5190224632796703e-05, + "loss": 0.083, + "step": 31700 + }, + { + "epoch": 1.48, + "learning_rate": 1.5189440847741916e-05, + "loss": 0.0671, + "step": 31705 + }, + { + "epoch": 1.48, + "learning_rate": 1.5188657062687131e-05, + "loss": 0.1106, + "step": 31710 + }, + { + "epoch": 1.48, + "learning_rate": 1.5187873277632343e-05, + "loss": 0.1968, + "step": 31715 + }, + { + "epoch": 1.48, + "learning_rate": 1.5187089492577556e-05, + "loss": 0.209, + "step": 31720 + }, + { + "epoch": 1.48, + "learning_rate": 1.5186305707522771e-05, + "loss": 0.1886, + "step": 31725 + }, + { + "epoch": 1.48, + "learning_rate": 1.5185521922467983e-05, + "loss": 0.3375, + "step": 31730 + }, + { + "epoch": 1.48, + "learning_rate": 1.5184738137413197e-05, + "loss": 0.2246, + "step": 31735 + }, + { + "epoch": 1.48, + "learning_rate": 1.518395435235841e-05, + "loss": 0.0597, + "step": 31740 + }, + { + "epoch": 1.48, + "learning_rate": 1.5183170567303625e-05, + "loss": 0.0915, + "step": 31745 + }, + { + "epoch": 1.48, + "learning_rate": 1.5182386782248837e-05, + "loss": 0.1168, + "step": 31750 + }, + { + "epoch": 1.48, + "learning_rate": 1.5181602997194051e-05, + "loss": 0.0906, + "step": 31755 + }, + { + "epoch": 1.48, + "learning_rate": 1.5180819212139263e-05, + "loss": 0.0936, + "step": 31760 + }, + { + "epoch": 1.48, + "learning_rate": 1.5180035427084479e-05, + "loss": 0.1309, + "step": 31765 + }, + { + "epoch": 1.48, + "learning_rate": 1.5179251642029691e-05, + "loss": 0.149, + "step": 31770 + }, + { + "epoch": 1.48, + "learning_rate": 1.5178467856974905e-05, + "loss": 0.2683, + "step": 31775 + }, + { + "epoch": 1.48, + "learning_rate": 1.5177684071920117e-05, + "loss": 0.4699, + "step": 31780 + }, + { + "epoch": 1.48, + "learning_rate": 1.517690028686533e-05, + "loss": 0.2512, + "step": 31785 + }, + { + "epoch": 1.48, + "learning_rate": 1.5176116501810545e-05, + "loss": 0.059, + "step": 31790 + }, + { + "epoch": 1.48, + "learning_rate": 1.5175332716755757e-05, + "loss": 0.0595, + "step": 31795 + }, + { + "epoch": 1.48, + "learning_rate": 1.5174548931700971e-05, + "loss": 0.0786, + "step": 31800 + }, + { + "epoch": 1.48, + "learning_rate": 1.5173765146646185e-05, + "loss": 0.1279, + "step": 31805 + }, + { + "epoch": 1.48, + "learning_rate": 1.5172981361591399e-05, + "loss": 0.2461, + "step": 31810 + }, + { + "epoch": 1.48, + "learning_rate": 1.5172197576536611e-05, + "loss": 0.1358, + "step": 31815 + }, + { + "epoch": 1.48, + "learning_rate": 1.5171413791481825e-05, + "loss": 0.1626, + "step": 31820 + }, + { + "epoch": 1.48, + "learning_rate": 1.5170630006427039e-05, + "loss": 0.1351, + "step": 31825 + }, + { + "epoch": 1.49, + "learning_rate": 1.5169846221372253e-05, + "loss": 0.3434, + "step": 31830 + }, + { + "epoch": 1.49, + "learning_rate": 1.5169062436317465e-05, + "loss": 0.2988, + "step": 31835 + }, + { + "epoch": 1.49, + "learning_rate": 1.5168278651262681e-05, + "loss": 0.0382, + "step": 31840 + }, + { + "epoch": 1.49, + "learning_rate": 1.5167494866207893e-05, + "loss": 0.0455, + "step": 31845 + }, + { + "epoch": 1.49, + "learning_rate": 1.5166711081153105e-05, + "loss": 0.0726, + "step": 31850 + }, + { + "epoch": 1.49, + "learning_rate": 1.516592729609832e-05, + "loss": 0.0954, + "step": 31855 + }, + { + "epoch": 1.49, + "learning_rate": 1.5165143511043531e-05, + "loss": 0.0936, + "step": 31860 + }, + { + "epoch": 1.49, + "learning_rate": 1.5164359725988747e-05, + "loss": 0.1024, + "step": 31865 + }, + { + "epoch": 1.49, + "learning_rate": 1.516357594093396e-05, + "loss": 0.1589, + "step": 31870 + }, + { + "epoch": 1.49, + "learning_rate": 1.5162792155879173e-05, + "loss": 0.2689, + "step": 31875 + }, + { + "epoch": 1.49, + "learning_rate": 1.5162008370824385e-05, + "loss": 0.3105, + "step": 31880 + }, + { + "epoch": 1.49, + "learning_rate": 1.5161224585769601e-05, + "loss": 0.295, + "step": 31885 + }, + { + "epoch": 1.49, + "learning_rate": 1.5160440800714813e-05, + "loss": 0.0471, + "step": 31890 + }, + { + "epoch": 1.49, + "learning_rate": 1.5159657015660027e-05, + "loss": 0.0563, + "step": 31895 + }, + { + "epoch": 1.49, + "learning_rate": 1.515887323060524e-05, + "loss": 0.0521, + "step": 31900 + }, + { + "epoch": 1.49, + "learning_rate": 1.5158089445550455e-05, + "loss": 0.1391, + "step": 31905 + }, + { + "epoch": 1.49, + "learning_rate": 1.5157305660495667e-05, + "loss": 0.0926, + "step": 31910 + }, + { + "epoch": 1.49, + "learning_rate": 1.515652187544088e-05, + "loss": 0.1678, + "step": 31915 + }, + { + "epoch": 1.49, + "learning_rate": 1.5155738090386093e-05, + "loss": 0.1874, + "step": 31920 + }, + { + "epoch": 1.49, + "learning_rate": 1.5154954305331307e-05, + "loss": 0.1741, + "step": 31925 + }, + { + "epoch": 1.49, + "learning_rate": 1.5154170520276521e-05, + "loss": 0.362, + "step": 31930 + }, + { + "epoch": 1.49, + "learning_rate": 1.5153386735221733e-05, + "loss": 0.3195, + "step": 31935 + }, + { + "epoch": 1.49, + "learning_rate": 1.5152602950166949e-05, + "loss": 0.0607, + "step": 31940 + }, + { + "epoch": 1.49, + "learning_rate": 1.5151819165112161e-05, + "loss": 0.0734, + "step": 31945 + }, + { + "epoch": 1.49, + "learning_rate": 1.5151035380057375e-05, + "loss": 0.096, + "step": 31950 + }, + { + "epoch": 1.49, + "learning_rate": 1.5150251595002587e-05, + "loss": 0.1064, + "step": 31955 + }, + { + "epoch": 1.49, + "learning_rate": 1.5149467809947803e-05, + "loss": 0.1528, + "step": 31960 + }, + { + "epoch": 1.49, + "learning_rate": 1.5148684024893015e-05, + "loss": 0.2397, + "step": 31965 + }, + { + "epoch": 1.49, + "learning_rate": 1.5147900239838229e-05, + "loss": 0.2079, + "step": 31970 + }, + { + "epoch": 1.49, + "learning_rate": 1.5147116454783441e-05, + "loss": 0.2747, + "step": 31975 + }, + { + "epoch": 1.49, + "learning_rate": 1.5146332669728653e-05, + "loss": 0.4375, + "step": 31980 + }, + { + "epoch": 1.49, + "learning_rate": 1.5145548884673869e-05, + "loss": 0.348, + "step": 31985 + }, + { + "epoch": 1.49, + "learning_rate": 1.5144765099619081e-05, + "loss": 0.0462, + "step": 31990 + }, + { + "epoch": 1.49, + "learning_rate": 1.5143981314564295e-05, + "loss": 0.078, + "step": 31995 + }, + { + "epoch": 1.49, + "learning_rate": 1.5143197529509507e-05, + "loss": 0.0692, + "step": 32000 + }, + { + "epoch": 1.49, + "learning_rate": 1.5142413744454723e-05, + "loss": 0.0825, + "step": 32005 + }, + { + "epoch": 1.49, + "learning_rate": 1.5141629959399935e-05, + "loss": 0.138, + "step": 32010 + }, + { + "epoch": 1.49, + "learning_rate": 1.5140846174345149e-05, + "loss": 0.1539, + "step": 32015 + }, + { + "epoch": 1.49, + "learning_rate": 1.5140062389290361e-05, + "loss": 0.2308, + "step": 32020 + }, + { + "epoch": 1.49, + "learning_rate": 1.5139278604235577e-05, + "loss": 0.2213, + "step": 32025 + }, + { + "epoch": 1.49, + "learning_rate": 1.5138494819180789e-05, + "loss": 0.2618, + "step": 32030 + }, + { + "epoch": 1.49, + "learning_rate": 1.5137711034126003e-05, + "loss": 0.3362, + "step": 32035 + }, + { + "epoch": 1.5, + "learning_rate": 1.5136927249071217e-05, + "loss": 0.0699, + "step": 32040 + }, + { + "epoch": 1.5, + "learning_rate": 1.5136143464016429e-05, + "loss": 0.0228, + "step": 32045 + }, + { + "epoch": 1.5, + "learning_rate": 1.5135359678961643e-05, + "loss": 0.046, + "step": 32050 + }, + { + "epoch": 1.5, + "learning_rate": 1.5134575893906855e-05, + "loss": 0.0907, + "step": 32055 + }, + { + "epoch": 1.5, + "learning_rate": 1.513379210885207e-05, + "loss": 0.1112, + "step": 32060 + }, + { + "epoch": 1.5, + "learning_rate": 1.5133008323797283e-05, + "loss": 0.1013, + "step": 32065 + }, + { + "epoch": 1.5, + "learning_rate": 1.5132224538742497e-05, + "loss": 0.1556, + "step": 32070 + }, + { + "epoch": 1.5, + "learning_rate": 1.5131440753687709e-05, + "loss": 0.24, + "step": 32075 + }, + { + "epoch": 1.5, + "learning_rate": 1.5130656968632925e-05, + "loss": 0.5122, + "step": 32080 + }, + { + "epoch": 1.5, + "learning_rate": 1.5129873183578137e-05, + "loss": 0.2973, + "step": 32085 + }, + { + "epoch": 1.5, + "learning_rate": 1.512908939852335e-05, + "loss": 0.0492, + "step": 32090 + }, + { + "epoch": 1.5, + "learning_rate": 1.5128305613468563e-05, + "loss": 0.0999, + "step": 32095 + }, + { + "epoch": 1.5, + "learning_rate": 1.5127521828413779e-05, + "loss": 0.0568, + "step": 32100 + }, + { + "epoch": 1.5, + "learning_rate": 1.512673804335899e-05, + "loss": 0.0834, + "step": 32105 + }, + { + "epoch": 1.5, + "learning_rate": 1.5125954258304203e-05, + "loss": 0.0824, + "step": 32110 + }, + { + "epoch": 1.5, + "learning_rate": 1.5125170473249417e-05, + "loss": 0.2455, + "step": 32115 + }, + { + "epoch": 1.5, + "learning_rate": 1.512438668819463e-05, + "loss": 0.1645, + "step": 32120 + }, + { + "epoch": 1.5, + "learning_rate": 1.5123602903139845e-05, + "loss": 0.2583, + "step": 32125 + }, + { + "epoch": 1.5, + "learning_rate": 1.5122819118085057e-05, + "loss": 0.3227, + "step": 32130 + }, + { + "epoch": 1.5, + "learning_rate": 1.512203533303027e-05, + "loss": 0.3561, + "step": 32135 + }, + { + "epoch": 1.5, + "learning_rate": 1.5121251547975485e-05, + "loss": 0.0648, + "step": 32140 + }, + { + "epoch": 1.5, + "learning_rate": 1.5120467762920699e-05, + "loss": 0.016, + "step": 32145 + }, + { + "epoch": 1.5, + "learning_rate": 1.511968397786591e-05, + "loss": 0.1238, + "step": 32150 + }, + { + "epoch": 1.5, + "learning_rate": 1.5118900192811126e-05, + "loss": 0.0738, + "step": 32155 + }, + { + "epoch": 1.5, + "learning_rate": 1.5118116407756339e-05, + "loss": 0.113, + "step": 32160 + }, + { + "epoch": 1.5, + "learning_rate": 1.5117332622701553e-05, + "loss": 0.0999, + "step": 32165 + }, + { + "epoch": 1.5, + "learning_rate": 1.5116548837646765e-05, + "loss": 0.2245, + "step": 32170 + }, + { + "epoch": 1.5, + "learning_rate": 1.5115765052591977e-05, + "loss": 0.3073, + "step": 32175 + }, + { + "epoch": 1.5, + "learning_rate": 1.5114981267537193e-05, + "loss": 0.2557, + "step": 32180 + }, + { + "epoch": 1.5, + "learning_rate": 1.5114197482482405e-05, + "loss": 0.2607, + "step": 32185 + }, + { + "epoch": 1.5, + "learning_rate": 1.5113413697427619e-05, + "loss": 0.064, + "step": 32190 + }, + { + "epoch": 1.5, + "learning_rate": 1.5112629912372831e-05, + "loss": 0.076, + "step": 32195 + }, + { + "epoch": 1.5, + "learning_rate": 1.5111846127318046e-05, + "loss": 0.1157, + "step": 32200 + }, + { + "epoch": 1.5, + "learning_rate": 1.5111062342263259e-05, + "loss": 0.075, + "step": 32205 + }, + { + "epoch": 1.5, + "learning_rate": 1.5110278557208473e-05, + "loss": 0.1556, + "step": 32210 + }, + { + "epoch": 1.5, + "learning_rate": 1.5109494772153685e-05, + "loss": 0.1983, + "step": 32215 + }, + { + "epoch": 1.5, + "learning_rate": 1.51087109870989e-05, + "loss": 0.1951, + "step": 32220 + }, + { + "epoch": 1.5, + "learning_rate": 1.5107927202044113e-05, + "loss": 0.2283, + "step": 32225 + }, + { + "epoch": 1.5, + "learning_rate": 1.5107143416989327e-05, + "loss": 0.3268, + "step": 32230 + }, + { + "epoch": 1.5, + "learning_rate": 1.5106359631934539e-05, + "loss": 0.2575, + "step": 32235 + }, + { + "epoch": 1.5, + "learning_rate": 1.5105575846879753e-05, + "loss": 0.046, + "step": 32240 + }, + { + "epoch": 1.5, + "learning_rate": 1.5104792061824967e-05, + "loss": 0.0554, + "step": 32245 + }, + { + "epoch": 1.5, + "learning_rate": 1.5104008276770179e-05, + "loss": 0.0833, + "step": 32250 + }, + { + "epoch": 1.51, + "learning_rate": 1.5103224491715394e-05, + "loss": 0.0969, + "step": 32255 + }, + { + "epoch": 1.51, + "learning_rate": 1.5102440706660607e-05, + "loss": 0.1418, + "step": 32260 + }, + { + "epoch": 1.51, + "learning_rate": 1.510165692160582e-05, + "loss": 0.1293, + "step": 32265 + }, + { + "epoch": 1.51, + "learning_rate": 1.5100873136551033e-05, + "loss": 0.0968, + "step": 32270 + }, + { + "epoch": 1.51, + "learning_rate": 1.5100089351496248e-05, + "loss": 0.3261, + "step": 32275 + }, + { + "epoch": 1.51, + "learning_rate": 1.509930556644146e-05, + "loss": 0.283, + "step": 32280 + }, + { + "epoch": 1.51, + "learning_rate": 1.5098521781386674e-05, + "loss": 0.3071, + "step": 32285 + }, + { + "epoch": 1.51, + "learning_rate": 1.5097737996331887e-05, + "loss": 0.072, + "step": 32290 + }, + { + "epoch": 1.51, + "learning_rate": 1.5096954211277102e-05, + "loss": 0.1006, + "step": 32295 + }, + { + "epoch": 1.51, + "learning_rate": 1.5096170426222314e-05, + "loss": 0.1003, + "step": 32300 + }, + { + "epoch": 1.51, + "learning_rate": 1.5095386641167527e-05, + "loss": 0.1151, + "step": 32305 + }, + { + "epoch": 1.51, + "learning_rate": 1.509460285611274e-05, + "loss": 0.1184, + "step": 32310 + }, + { + "epoch": 1.51, + "learning_rate": 1.5093819071057953e-05, + "loss": 0.1667, + "step": 32315 + }, + { + "epoch": 1.51, + "learning_rate": 1.5093035286003168e-05, + "loss": 0.1405, + "step": 32320 + }, + { + "epoch": 1.51, + "learning_rate": 1.509225150094838e-05, + "loss": 0.2462, + "step": 32325 + }, + { + "epoch": 1.51, + "learning_rate": 1.5091467715893594e-05, + "loss": 0.2843, + "step": 32330 + }, + { + "epoch": 1.51, + "learning_rate": 1.5090683930838808e-05, + "loss": 0.2082, + "step": 32335 + }, + { + "epoch": 1.51, + "learning_rate": 1.5089900145784022e-05, + "loss": 0.0373, + "step": 32340 + }, + { + "epoch": 1.51, + "learning_rate": 1.5089116360729235e-05, + "loss": 0.0384, + "step": 32345 + }, + { + "epoch": 1.51, + "learning_rate": 1.5088332575674448e-05, + "loss": 0.0444, + "step": 32350 + }, + { + "epoch": 1.51, + "learning_rate": 1.5087548790619662e-05, + "loss": 0.0799, + "step": 32355 + }, + { + "epoch": 1.51, + "learning_rate": 1.5086765005564876e-05, + "loss": 0.1469, + "step": 32360 + }, + { + "epoch": 1.51, + "learning_rate": 1.5085981220510088e-05, + "loss": 0.1759, + "step": 32365 + }, + { + "epoch": 1.51, + "learning_rate": 1.50851974354553e-05, + "loss": 0.1966, + "step": 32370 + }, + { + "epoch": 1.51, + "learning_rate": 1.5084413650400516e-05, + "loss": 0.2456, + "step": 32375 + }, + { + "epoch": 1.51, + "learning_rate": 1.5083629865345728e-05, + "loss": 0.3569, + "step": 32380 + }, + { + "epoch": 1.51, + "learning_rate": 1.5082846080290942e-05, + "loss": 0.2519, + "step": 32385 + }, + { + "epoch": 1.51, + "learning_rate": 1.5082062295236155e-05, + "loss": 0.0463, + "step": 32390 + }, + { + "epoch": 1.51, + "learning_rate": 1.508127851018137e-05, + "loss": 0.0796, + "step": 32395 + }, + { + "epoch": 1.51, + "learning_rate": 1.5080494725126582e-05, + "loss": 0.0918, + "step": 32400 + }, + { + "epoch": 1.51, + "learning_rate": 1.5079710940071796e-05, + "loss": 0.0762, + "step": 32405 + }, + { + "epoch": 1.51, + "learning_rate": 1.5078927155017009e-05, + "loss": 0.0586, + "step": 32410 + }, + { + "epoch": 1.51, + "learning_rate": 1.5078143369962224e-05, + "loss": 0.1551, + "step": 32415 + }, + { + "epoch": 1.51, + "learning_rate": 1.5077359584907436e-05, + "loss": 0.2989, + "step": 32420 + }, + { + "epoch": 1.51, + "learning_rate": 1.507657579985265e-05, + "loss": 0.2065, + "step": 32425 + }, + { + "epoch": 1.51, + "learning_rate": 1.5075792014797862e-05, + "loss": 0.3864, + "step": 32430 + }, + { + "epoch": 1.51, + "learning_rate": 1.5075008229743076e-05, + "loss": 0.2009, + "step": 32435 + }, + { + "epoch": 1.51, + "learning_rate": 1.507422444468829e-05, + "loss": 0.0476, + "step": 32440 + }, + { + "epoch": 1.51, + "learning_rate": 1.5073440659633502e-05, + "loss": 0.1002, + "step": 32445 + }, + { + "epoch": 1.51, + "learning_rate": 1.5072656874578716e-05, + "loss": 0.1062, + "step": 32450 + }, + { + "epoch": 1.51, + "learning_rate": 1.507187308952393e-05, + "loss": 0.0847, + "step": 32455 + }, + { + "epoch": 1.51, + "learning_rate": 1.5071089304469144e-05, + "loss": 0.0809, + "step": 32460 + }, + { + "epoch": 1.51, + "learning_rate": 1.5070305519414356e-05, + "loss": 0.1638, + "step": 32465 + }, + { + "epoch": 1.52, + "learning_rate": 1.5069521734359572e-05, + "loss": 0.2693, + "step": 32470 + }, + { + "epoch": 1.52, + "learning_rate": 1.5068737949304784e-05, + "loss": 0.2582, + "step": 32475 + }, + { + "epoch": 1.52, + "learning_rate": 1.5067954164249998e-05, + "loss": 0.2734, + "step": 32480 + }, + { + "epoch": 1.52, + "learning_rate": 1.506717037919521e-05, + "loss": 0.2822, + "step": 32485 + }, + { + "epoch": 1.52, + "learning_rate": 1.5066386594140426e-05, + "loss": 0.0517, + "step": 32490 + }, + { + "epoch": 1.52, + "learning_rate": 1.5065602809085638e-05, + "loss": 0.0486, + "step": 32495 + }, + { + "epoch": 1.52, + "learning_rate": 1.506481902403085e-05, + "loss": 0.0478, + "step": 32500 + }, + { + "epoch": 1.52, + "learning_rate": 1.5064035238976064e-05, + "loss": 0.1418, + "step": 32505 + }, + { + "epoch": 1.52, + "learning_rate": 1.5063251453921276e-05, + "loss": 0.1221, + "step": 32510 + }, + { + "epoch": 1.52, + "learning_rate": 1.5062467668866492e-05, + "loss": 0.168, + "step": 32515 + }, + { + "epoch": 1.52, + "learning_rate": 1.5061683883811704e-05, + "loss": 0.196, + "step": 32520 + }, + { + "epoch": 1.52, + "learning_rate": 1.5060900098756918e-05, + "loss": 0.2721, + "step": 32525 + }, + { + "epoch": 1.52, + "learning_rate": 1.506011631370213e-05, + "loss": 0.3576, + "step": 32530 + }, + { + "epoch": 1.52, + "learning_rate": 1.5059332528647346e-05, + "loss": 0.3454, + "step": 32535 + }, + { + "epoch": 1.52, + "learning_rate": 1.5058548743592558e-05, + "loss": 0.0558, + "step": 32540 + }, + { + "epoch": 1.52, + "learning_rate": 1.5057764958537772e-05, + "loss": 0.0572, + "step": 32545 + }, + { + "epoch": 1.52, + "learning_rate": 1.5056981173482984e-05, + "loss": 0.0613, + "step": 32550 + }, + { + "epoch": 1.52, + "learning_rate": 1.50561973884282e-05, + "loss": 0.1226, + "step": 32555 + }, + { + "epoch": 1.52, + "learning_rate": 1.5055413603373412e-05, + "loss": 0.1035, + "step": 32560 + }, + { + "epoch": 1.52, + "learning_rate": 1.5054629818318624e-05, + "loss": 0.0922, + "step": 32565 + }, + { + "epoch": 1.52, + "learning_rate": 1.505384603326384e-05, + "loss": 0.2358, + "step": 32570 + }, + { + "epoch": 1.52, + "learning_rate": 1.5053062248209052e-05, + "loss": 0.1497, + "step": 32575 + }, + { + "epoch": 1.52, + "learning_rate": 1.5052278463154266e-05, + "loss": 0.3272, + "step": 32580 + }, + { + "epoch": 1.52, + "learning_rate": 1.5051494678099478e-05, + "loss": 0.3985, + "step": 32585 + }, + { + "epoch": 1.52, + "learning_rate": 1.5050710893044694e-05, + "loss": 0.044, + "step": 32590 + }, + { + "epoch": 1.52, + "learning_rate": 1.5049927107989906e-05, + "loss": 0.1202, + "step": 32595 + }, + { + "epoch": 1.52, + "learning_rate": 1.504914332293512e-05, + "loss": 0.1429, + "step": 32600 + }, + { + "epoch": 1.52, + "learning_rate": 1.5048359537880332e-05, + "loss": 0.0826, + "step": 32605 + }, + { + "epoch": 1.52, + "learning_rate": 1.5047575752825548e-05, + "loss": 0.1068, + "step": 32610 + }, + { + "epoch": 1.52, + "learning_rate": 1.504679196777076e-05, + "loss": 0.2536, + "step": 32615 + }, + { + "epoch": 1.52, + "learning_rate": 1.5046008182715974e-05, + "loss": 0.2464, + "step": 32620 + }, + { + "epoch": 1.52, + "learning_rate": 1.5045224397661186e-05, + "loss": 0.4023, + "step": 32625 + }, + { + "epoch": 1.52, + "learning_rate": 1.5044440612606398e-05, + "loss": 0.4528, + "step": 32630 + }, + { + "epoch": 1.52, + "learning_rate": 1.5043656827551614e-05, + "loss": 0.2417, + "step": 32635 + }, + { + "epoch": 1.52, + "learning_rate": 1.5042873042496826e-05, + "loss": 0.0292, + "step": 32640 + }, + { + "epoch": 1.52, + "learning_rate": 1.504208925744204e-05, + "loss": 0.0988, + "step": 32645 + }, + { + "epoch": 1.52, + "learning_rate": 1.5041305472387254e-05, + "loss": 0.067, + "step": 32650 + }, + { + "epoch": 1.52, + "learning_rate": 1.5040521687332468e-05, + "loss": 0.1081, + "step": 32655 + }, + { + "epoch": 1.52, + "learning_rate": 1.503973790227768e-05, + "loss": 0.1431, + "step": 32660 + }, + { + "epoch": 1.52, + "learning_rate": 1.5038954117222894e-05, + "loss": 0.1415, + "step": 32665 + }, + { + "epoch": 1.52, + "learning_rate": 1.5038170332168108e-05, + "loss": 0.1489, + "step": 32670 + }, + { + "epoch": 1.52, + "learning_rate": 1.5037386547113322e-05, + "loss": 0.187, + "step": 32675 + }, + { + "epoch": 1.52, + "learning_rate": 1.5036602762058534e-05, + "loss": 0.3941, + "step": 32680 + }, + { + "epoch": 1.53, + "learning_rate": 1.503581897700375e-05, + "loss": 0.267, + "step": 32685 + }, + { + "epoch": 1.53, + "learning_rate": 1.5035035191948962e-05, + "loss": 0.0578, + "step": 32690 + }, + { + "epoch": 1.53, + "learning_rate": 1.5034251406894174e-05, + "loss": 0.0391, + "step": 32695 + }, + { + "epoch": 1.53, + "learning_rate": 1.5033467621839388e-05, + "loss": 0.0686, + "step": 32700 + }, + { + "epoch": 1.53, + "learning_rate": 1.50326838367846e-05, + "loss": 0.1086, + "step": 32705 + }, + { + "epoch": 1.53, + "learning_rate": 1.5031900051729816e-05, + "loss": 0.1141, + "step": 32710 + }, + { + "epoch": 1.53, + "learning_rate": 1.5031116266675028e-05, + "loss": 0.1051, + "step": 32715 + }, + { + "epoch": 1.53, + "learning_rate": 1.5030332481620242e-05, + "loss": 0.2131, + "step": 32720 + }, + { + "epoch": 1.53, + "learning_rate": 1.5029548696565454e-05, + "loss": 0.2887, + "step": 32725 + }, + { + "epoch": 1.53, + "learning_rate": 1.502876491151067e-05, + "loss": 0.4061, + "step": 32730 + }, + { + "epoch": 1.53, + "learning_rate": 1.5027981126455882e-05, + "loss": 0.3098, + "step": 32735 + }, + { + "epoch": 1.53, + "learning_rate": 1.5027197341401096e-05, + "loss": 0.0542, + "step": 32740 + }, + { + "epoch": 1.53, + "learning_rate": 1.5026413556346308e-05, + "loss": 0.0951, + "step": 32745 + }, + { + "epoch": 1.53, + "learning_rate": 1.5025629771291524e-05, + "loss": 0.0702, + "step": 32750 + }, + { + "epoch": 1.53, + "learning_rate": 1.5024845986236736e-05, + "loss": 0.0914, + "step": 32755 + }, + { + "epoch": 1.53, + "learning_rate": 1.5024062201181948e-05, + "loss": 0.1323, + "step": 32760 + }, + { + "epoch": 1.53, + "learning_rate": 1.5023278416127162e-05, + "loss": 0.1518, + "step": 32765 + }, + { + "epoch": 1.53, + "learning_rate": 1.5022494631072376e-05, + "loss": 0.2011, + "step": 32770 + }, + { + "epoch": 1.53, + "learning_rate": 1.502171084601759e-05, + "loss": 0.1865, + "step": 32775 + }, + { + "epoch": 1.53, + "learning_rate": 1.5020927060962802e-05, + "loss": 0.328, + "step": 32780 + }, + { + "epoch": 1.53, + "learning_rate": 1.5020143275908018e-05, + "loss": 0.3728, + "step": 32785 + }, + { + "epoch": 1.53, + "learning_rate": 1.501935949085323e-05, + "loss": 0.0544, + "step": 32790 + }, + { + "epoch": 1.53, + "learning_rate": 1.5018575705798444e-05, + "loss": 0.0543, + "step": 32795 + }, + { + "epoch": 1.53, + "learning_rate": 1.5017791920743656e-05, + "loss": 0.0897, + "step": 32800 + }, + { + "epoch": 1.53, + "learning_rate": 1.5017008135688871e-05, + "loss": 0.1329, + "step": 32805 + }, + { + "epoch": 1.53, + "learning_rate": 1.5016224350634084e-05, + "loss": 0.0871, + "step": 32810 + }, + { + "epoch": 1.53, + "learning_rate": 1.5015440565579298e-05, + "loss": 0.1539, + "step": 32815 + }, + { + "epoch": 1.53, + "learning_rate": 1.501465678052451e-05, + "loss": 0.2018, + "step": 32820 + }, + { + "epoch": 1.53, + "learning_rate": 1.5013872995469722e-05, + "loss": 0.1788, + "step": 32825 + }, + { + "epoch": 1.53, + "learning_rate": 1.5013089210414938e-05, + "loss": 0.4134, + "step": 32830 + }, + { + "epoch": 1.53, + "learning_rate": 1.501230542536015e-05, + "loss": 0.3125, + "step": 32835 + }, + { + "epoch": 1.53, + "learning_rate": 1.5011521640305364e-05, + "loss": 0.0423, + "step": 32840 + }, + { + "epoch": 1.53, + "learning_rate": 1.5010737855250576e-05, + "loss": 0.0694, + "step": 32845 + }, + { + "epoch": 1.53, + "learning_rate": 1.5009954070195792e-05, + "loss": 0.0747, + "step": 32850 + }, + { + "epoch": 1.53, + "learning_rate": 1.5009170285141004e-05, + "loss": 0.1646, + "step": 32855 + }, + { + "epoch": 1.53, + "learning_rate": 1.5008386500086218e-05, + "loss": 0.1413, + "step": 32860 + }, + { + "epoch": 1.53, + "learning_rate": 1.500760271503143e-05, + "loss": 0.1363, + "step": 32865 + }, + { + "epoch": 1.53, + "learning_rate": 1.5006818929976645e-05, + "loss": 0.1389, + "step": 32870 + }, + { + "epoch": 1.53, + "learning_rate": 1.5006035144921858e-05, + "loss": 0.313, + "step": 32875 + }, + { + "epoch": 1.53, + "learning_rate": 1.5005251359867072e-05, + "loss": 0.4249, + "step": 32880 + }, + { + "epoch": 1.53, + "learning_rate": 1.5004467574812286e-05, + "loss": 0.2864, + "step": 32885 + }, + { + "epoch": 1.53, + "learning_rate": 1.5003683789757498e-05, + "loss": 0.0551, + "step": 32890 + }, + { + "epoch": 1.53, + "learning_rate": 1.5002900004702712e-05, + "loss": 0.0842, + "step": 32895 + }, + { + "epoch": 1.54, + "learning_rate": 1.5002116219647924e-05, + "loss": 0.0557, + "step": 32900 + }, + { + "epoch": 1.54, + "learning_rate": 1.500133243459314e-05, + "loss": 0.1035, + "step": 32905 + }, + { + "epoch": 1.54, + "learning_rate": 1.5000548649538352e-05, + "loss": 0.0518, + "step": 32910 + }, + { + "epoch": 1.54, + "learning_rate": 1.4999764864483566e-05, + "loss": 0.1531, + "step": 32915 + }, + { + "epoch": 1.54, + "learning_rate": 1.4998981079428778e-05, + "loss": 0.216, + "step": 32920 + }, + { + "epoch": 1.54, + "learning_rate": 1.4998197294373993e-05, + "loss": 0.1716, + "step": 32925 + }, + { + "epoch": 1.54, + "learning_rate": 1.4997413509319206e-05, + "loss": 0.39, + "step": 32930 + }, + { + "epoch": 1.54, + "learning_rate": 1.499662972426442e-05, + "loss": 0.2803, + "step": 32935 + }, + { + "epoch": 1.54, + "learning_rate": 1.4995845939209632e-05, + "loss": 0.1181, + "step": 32940 + }, + { + "epoch": 1.54, + "learning_rate": 1.4995062154154847e-05, + "loss": 0.0665, + "step": 32945 + }, + { + "epoch": 1.54, + "learning_rate": 1.499427836910006e-05, + "loss": 0.0879, + "step": 32950 + }, + { + "epoch": 1.54, + "learning_rate": 1.4993494584045272e-05, + "loss": 0.111, + "step": 32955 + }, + { + "epoch": 1.54, + "learning_rate": 1.4992710798990486e-05, + "loss": 0.1512, + "step": 32960 + }, + { + "epoch": 1.54, + "learning_rate": 1.49919270139357e-05, + "loss": 0.1847, + "step": 32965 + }, + { + "epoch": 1.54, + "learning_rate": 1.4991143228880913e-05, + "loss": 0.1602, + "step": 32970 + }, + { + "epoch": 1.54, + "learning_rate": 1.4990359443826126e-05, + "loss": 0.3279, + "step": 32975 + }, + { + "epoch": 1.54, + "learning_rate": 1.498957565877134e-05, + "loss": 0.2164, + "step": 32980 + }, + { + "epoch": 1.54, + "learning_rate": 1.4988791873716553e-05, + "loss": 0.2688, + "step": 32985 + }, + { + "epoch": 1.54, + "learning_rate": 1.4988008088661767e-05, + "loss": 0.0506, + "step": 32990 + }, + { + "epoch": 1.54, + "learning_rate": 1.498722430360698e-05, + "loss": 0.0692, + "step": 32995 + }, + { + "epoch": 1.54, + "learning_rate": 1.4986440518552195e-05, + "loss": 0.1243, + "step": 33000 + }, + { + "epoch": 1.54, + "learning_rate": 1.4985656733497407e-05, + "loss": 0.1195, + "step": 33005 + }, + { + "epoch": 1.54, + "learning_rate": 1.4984872948442621e-05, + "loss": 0.1462, + "step": 33010 + }, + { + "epoch": 1.54, + "learning_rate": 1.4984089163387834e-05, + "loss": 0.121, + "step": 33015 + }, + { + "epoch": 1.54, + "learning_rate": 1.4983305378333046e-05, + "loss": 0.1771, + "step": 33020 + }, + { + "epoch": 1.54, + "learning_rate": 1.4982521593278261e-05, + "loss": 0.1746, + "step": 33025 + }, + { + "epoch": 1.54, + "learning_rate": 1.4981737808223474e-05, + "loss": 0.3095, + "step": 33030 + }, + { + "epoch": 1.54, + "learning_rate": 1.4980954023168687e-05, + "loss": 0.2838, + "step": 33035 + }, + { + "epoch": 1.54, + "learning_rate": 1.49801702381139e-05, + "loss": 0.0275, + "step": 33040 + }, + { + "epoch": 1.54, + "learning_rate": 1.4979386453059115e-05, + "loss": 0.0868, + "step": 33045 + }, + { + "epoch": 1.54, + "learning_rate": 1.4978602668004327e-05, + "loss": 0.0809, + "step": 33050 + }, + { + "epoch": 1.54, + "learning_rate": 1.4977818882949541e-05, + "loss": 0.1187, + "step": 33055 + }, + { + "epoch": 1.54, + "learning_rate": 1.4977035097894754e-05, + "loss": 0.1628, + "step": 33060 + }, + { + "epoch": 1.54, + "learning_rate": 1.497625131283997e-05, + "loss": 0.1992, + "step": 33065 + }, + { + "epoch": 1.54, + "learning_rate": 1.4975467527785181e-05, + "loss": 0.2054, + "step": 33070 + }, + { + "epoch": 1.54, + "learning_rate": 1.4974683742730395e-05, + "loss": 0.2641, + "step": 33075 + }, + { + "epoch": 1.54, + "learning_rate": 1.4973899957675608e-05, + "loss": 0.4842, + "step": 33080 + }, + { + "epoch": 1.54, + "learning_rate": 1.4973116172620821e-05, + "loss": 0.4444, + "step": 33085 + }, + { + "epoch": 1.54, + "learning_rate": 1.4972332387566035e-05, + "loss": 0.0517, + "step": 33090 + }, + { + "epoch": 1.54, + "learning_rate": 1.4971548602511248e-05, + "loss": 0.0611, + "step": 33095 + }, + { + "epoch": 1.54, + "learning_rate": 1.4970764817456463e-05, + "loss": 0.0702, + "step": 33100 + }, + { + "epoch": 1.54, + "learning_rate": 1.4969981032401675e-05, + "loss": 0.143, + "step": 33105 + }, + { + "epoch": 1.54, + "learning_rate": 1.496919724734689e-05, + "loss": 0.1304, + "step": 33110 + }, + { + "epoch": 1.55, + "learning_rate": 1.4968413462292101e-05, + "loss": 0.1834, + "step": 33115 + }, + { + "epoch": 1.55, + "learning_rate": 1.4967629677237317e-05, + "loss": 0.1337, + "step": 33120 + }, + { + "epoch": 1.55, + "learning_rate": 1.496684589218253e-05, + "loss": 0.213, + "step": 33125 + }, + { + "epoch": 1.55, + "learning_rate": 1.49662188641387e-05, + "loss": 0.3345, + "step": 33130 + }, + { + "epoch": 1.55, + "learning_rate": 1.4965435079083914e-05, + "loss": 0.307, + "step": 33135 + }, + { + "epoch": 1.55, + "learning_rate": 1.4964651294029126e-05, + "loss": 0.063, + "step": 33140 + }, + { + "epoch": 1.55, + "learning_rate": 1.496386750897434e-05, + "loss": 0.0567, + "step": 33145 + }, + { + "epoch": 1.55, + "learning_rate": 1.4963083723919554e-05, + "loss": 0.0812, + "step": 33150 + }, + { + "epoch": 1.55, + "learning_rate": 1.4962299938864766e-05, + "loss": 0.0735, + "step": 33155 + }, + { + "epoch": 1.55, + "learning_rate": 1.4961516153809981e-05, + "loss": 0.1804, + "step": 33160 + }, + { + "epoch": 1.55, + "learning_rate": 1.4960732368755194e-05, + "loss": 0.125, + "step": 33165 + }, + { + "epoch": 1.55, + "learning_rate": 1.4959948583700408e-05, + "loss": 0.2112, + "step": 33170 + }, + { + "epoch": 1.55, + "learning_rate": 1.495916479864562e-05, + "loss": 0.2541, + "step": 33175 + }, + { + "epoch": 1.55, + "learning_rate": 1.4958381013590835e-05, + "loss": 0.2793, + "step": 33180 + }, + { + "epoch": 1.55, + "learning_rate": 1.4957597228536048e-05, + "loss": 0.242, + "step": 33185 + }, + { + "epoch": 1.55, + "learning_rate": 1.4956813443481262e-05, + "loss": 0.0286, + "step": 33190 + }, + { + "epoch": 1.55, + "learning_rate": 1.4956029658426474e-05, + "loss": 0.057, + "step": 33195 + }, + { + "epoch": 1.55, + "learning_rate": 1.495524587337169e-05, + "loss": 0.087, + "step": 33200 + }, + { + "epoch": 1.55, + "learning_rate": 1.4954462088316902e-05, + "loss": 0.0883, + "step": 33205 + }, + { + "epoch": 1.55, + "learning_rate": 1.4953678303262114e-05, + "loss": 0.1055, + "step": 33210 + }, + { + "epoch": 1.55, + "learning_rate": 1.4952894518207328e-05, + "loss": 0.4157, + "step": 33215 + }, + { + "epoch": 1.55, + "learning_rate": 1.495211073315254e-05, + "loss": 0.1814, + "step": 33220 + }, + { + "epoch": 1.55, + "learning_rate": 1.4951326948097755e-05, + "loss": 0.301, + "step": 33225 + }, + { + "epoch": 1.55, + "learning_rate": 1.4950543163042968e-05, + "loss": 0.3661, + "step": 33230 + }, + { + "epoch": 1.55, + "learning_rate": 1.4949759377988182e-05, + "loss": 0.2502, + "step": 33235 + }, + { + "epoch": 1.55, + "learning_rate": 1.4948975592933394e-05, + "loss": 0.0987, + "step": 33240 + }, + { + "epoch": 1.55, + "learning_rate": 1.494819180787861e-05, + "loss": 0.0505, + "step": 33245 + }, + { + "epoch": 1.55, + "learning_rate": 1.4947408022823822e-05, + "loss": 0.1345, + "step": 33250 + }, + { + "epoch": 1.55, + "learning_rate": 1.4946624237769036e-05, + "loss": 0.1235, + "step": 33255 + }, + { + "epoch": 1.55, + "learning_rate": 1.494584045271425e-05, + "loss": 0.1194, + "step": 33260 + }, + { + "epoch": 1.55, + "learning_rate": 1.4945056667659463e-05, + "loss": 0.1738, + "step": 33265 + }, + { + "epoch": 1.55, + "learning_rate": 1.4944272882604676e-05, + "loss": 0.2238, + "step": 33270 + }, + { + "epoch": 1.55, + "learning_rate": 1.4943489097549888e-05, + "loss": 0.295, + "step": 33275 + }, + { + "epoch": 1.55, + "learning_rate": 1.4942705312495103e-05, + "loss": 0.488, + "step": 33280 + }, + { + "epoch": 1.55, + "learning_rate": 1.4941921527440316e-05, + "loss": 0.392, + "step": 33285 + }, + { + "epoch": 1.55, + "learning_rate": 1.494113774238553e-05, + "loss": 0.0366, + "step": 33290 + }, + { + "epoch": 1.55, + "learning_rate": 1.4940353957330742e-05, + "loss": 0.0348, + "step": 33295 + }, + { + "epoch": 1.55, + "learning_rate": 1.4939570172275957e-05, + "loss": 0.0887, + "step": 33300 + }, + { + "epoch": 1.55, + "learning_rate": 1.493878638722117e-05, + "loss": 0.1132, + "step": 33305 + }, + { + "epoch": 1.55, + "learning_rate": 1.4938002602166383e-05, + "loss": 0.1445, + "step": 33310 + }, + { + "epoch": 1.55, + "learning_rate": 1.4937218817111596e-05, + "loss": 0.0744, + "step": 33315 + }, + { + "epoch": 1.55, + "learning_rate": 1.4936435032056811e-05, + "loss": 0.2349, + "step": 33320 + }, + { + "epoch": 1.55, + "learning_rate": 1.4935651247002023e-05, + "loss": 0.3013, + "step": 33325 + }, + { + "epoch": 1.56, + "learning_rate": 1.4934867461947237e-05, + "loss": 0.3091, + "step": 33330 + }, + { + "epoch": 1.56, + "learning_rate": 1.493408367689245e-05, + "loss": 0.3184, + "step": 33335 + }, + { + "epoch": 1.56, + "learning_rate": 1.4933299891837663e-05, + "loss": 0.0297, + "step": 33340 + }, + { + "epoch": 1.56, + "learning_rate": 1.4932516106782877e-05, + "loss": 0.0657, + "step": 33345 + }, + { + "epoch": 1.56, + "learning_rate": 1.493173232172809e-05, + "loss": 0.0838, + "step": 33350 + }, + { + "epoch": 1.56, + "learning_rate": 1.4930948536673303e-05, + "loss": 0.0415, + "step": 33355 + }, + { + "epoch": 1.56, + "learning_rate": 1.4930164751618517e-05, + "loss": 0.1043, + "step": 33360 + }, + { + "epoch": 1.56, + "learning_rate": 1.4929380966563731e-05, + "loss": 0.1528, + "step": 33365 + }, + { + "epoch": 1.56, + "learning_rate": 1.4928597181508943e-05, + "loss": 0.1359, + "step": 33370 + }, + { + "epoch": 1.56, + "learning_rate": 1.4927813396454159e-05, + "loss": 0.2899, + "step": 33375 + }, + { + "epoch": 1.56, + "learning_rate": 1.4927029611399371e-05, + "loss": 0.3585, + "step": 33380 + }, + { + "epoch": 1.56, + "learning_rate": 1.4926245826344585e-05, + "loss": 0.3395, + "step": 33385 + }, + { + "epoch": 1.56, + "learning_rate": 1.4925462041289797e-05, + "loss": 0.0484, + "step": 33390 + }, + { + "epoch": 1.56, + "learning_rate": 1.4924678256235013e-05, + "loss": 0.0426, + "step": 33395 + }, + { + "epoch": 1.56, + "learning_rate": 1.4923894471180225e-05, + "loss": 0.0988, + "step": 33400 + }, + { + "epoch": 1.56, + "learning_rate": 1.4923110686125437e-05, + "loss": 0.1479, + "step": 33405 + }, + { + "epoch": 1.56, + "learning_rate": 1.4922326901070651e-05, + "loss": 0.1827, + "step": 33410 + }, + { + "epoch": 1.56, + "learning_rate": 1.4921543116015864e-05, + "loss": 0.1848, + "step": 33415 + }, + { + "epoch": 1.56, + "learning_rate": 1.492075933096108e-05, + "loss": 0.1389, + "step": 33420 + }, + { + "epoch": 1.56, + "learning_rate": 1.4919975545906291e-05, + "loss": 0.2672, + "step": 33425 + }, + { + "epoch": 1.56, + "learning_rate": 1.4919191760851505e-05, + "loss": 0.3146, + "step": 33430 + }, + { + "epoch": 1.56, + "learning_rate": 1.4918407975796717e-05, + "loss": 0.206, + "step": 33435 + }, + { + "epoch": 1.56, + "learning_rate": 1.4917624190741933e-05, + "loss": 0.0536, + "step": 33440 + }, + { + "epoch": 1.56, + "learning_rate": 1.4916840405687145e-05, + "loss": 0.0521, + "step": 33445 + }, + { + "epoch": 1.56, + "learning_rate": 1.491605662063236e-05, + "loss": 0.053, + "step": 33450 + }, + { + "epoch": 1.56, + "learning_rate": 1.4915272835577571e-05, + "loss": 0.1253, + "step": 33455 + }, + { + "epoch": 1.56, + "learning_rate": 1.4914489050522787e-05, + "loss": 0.1643, + "step": 33460 + }, + { + "epoch": 1.56, + "learning_rate": 1.4913705265468e-05, + "loss": 0.1678, + "step": 33465 + }, + { + "epoch": 1.56, + "learning_rate": 1.4912921480413211e-05, + "loss": 0.1316, + "step": 33470 + }, + { + "epoch": 1.56, + "learning_rate": 1.4912137695358427e-05, + "loss": 0.3028, + "step": 33475 + }, + { + "epoch": 1.56, + "learning_rate": 1.491135391030364e-05, + "loss": 0.4472, + "step": 33480 + }, + { + "epoch": 1.56, + "learning_rate": 1.4910570125248853e-05, + "loss": 0.2954, + "step": 33485 + }, + { + "epoch": 1.56, + "learning_rate": 1.4909786340194065e-05, + "loss": 0.0151, + "step": 33490 + }, + { + "epoch": 1.56, + "learning_rate": 1.4909002555139281e-05, + "loss": 0.0831, + "step": 33495 + }, + { + "epoch": 1.56, + "learning_rate": 1.4908218770084493e-05, + "loss": 0.0586, + "step": 33500 + }, + { + "epoch": 1.56, + "learning_rate": 1.4907434985029707e-05, + "loss": 0.1487, + "step": 33505 + }, + { + "epoch": 1.56, + "learning_rate": 1.490665119997492e-05, + "loss": 0.0689, + "step": 33510 + }, + { + "epoch": 1.56, + "learning_rate": 1.4905867414920135e-05, + "loss": 0.1569, + "step": 33515 + }, + { + "epoch": 1.56, + "learning_rate": 1.4905083629865347e-05, + "loss": 0.1794, + "step": 33520 + }, + { + "epoch": 1.56, + "learning_rate": 1.4904299844810561e-05, + "loss": 0.2747, + "step": 33525 + }, + { + "epoch": 1.56, + "learning_rate": 1.4903516059755773e-05, + "loss": 0.3703, + "step": 33530 + }, + { + "epoch": 1.56, + "learning_rate": 1.4902732274700985e-05, + "loss": 0.336, + "step": 33535 + }, + { + "epoch": 1.57, + "learning_rate": 1.4901948489646201e-05, + "loss": 0.0809, + "step": 33540 + }, + { + "epoch": 1.57, + "learning_rate": 1.4901164704591413e-05, + "loss": 0.0574, + "step": 33545 + }, + { + "epoch": 1.57, + "learning_rate": 1.4900380919536627e-05, + "loss": 0.0474, + "step": 33550 + }, + { + "epoch": 1.57, + "learning_rate": 1.489959713448184e-05, + "loss": 0.1294, + "step": 33555 + }, + { + "epoch": 1.57, + "learning_rate": 1.4898813349427055e-05, + "loss": 0.1134, + "step": 33560 + }, + { + "epoch": 1.57, + "learning_rate": 1.4898029564372267e-05, + "loss": 0.1304, + "step": 33565 + }, + { + "epoch": 1.57, + "learning_rate": 1.4897245779317481e-05, + "loss": 0.1349, + "step": 33570 + }, + { + "epoch": 1.57, + "learning_rate": 1.4896461994262695e-05, + "loss": 0.1339, + "step": 33575 + }, + { + "epoch": 1.57, + "learning_rate": 1.4895678209207909e-05, + "loss": 0.3101, + "step": 33580 + }, + { + "epoch": 1.57, + "learning_rate": 1.4894894424153121e-05, + "loss": 0.2441, + "step": 33585 + }, + { + "epoch": 1.57, + "learning_rate": 1.4894110639098337e-05, + "loss": 0.0574, + "step": 33590 + }, + { + "epoch": 1.57, + "learning_rate": 1.4893326854043549e-05, + "loss": 0.1494, + "step": 33595 + }, + { + "epoch": 1.57, + "learning_rate": 1.4892543068988761e-05, + "loss": 0.0818, + "step": 33600 + }, + { + "epoch": 1.57, + "learning_rate": 1.4891759283933975e-05, + "loss": 0.0343, + "step": 33605 + }, + { + "epoch": 1.57, + "learning_rate": 1.4890975498879187e-05, + "loss": 0.0497, + "step": 33610 + }, + { + "epoch": 1.57, + "learning_rate": 1.4890191713824403e-05, + "loss": 0.1251, + "step": 33615 + }, + { + "epoch": 1.57, + "learning_rate": 1.4889407928769615e-05, + "loss": 0.1593, + "step": 33620 + }, + { + "epoch": 1.57, + "learning_rate": 1.4888624143714829e-05, + "loss": 0.1829, + "step": 33625 + }, + { + "epoch": 1.57, + "learning_rate": 1.4887840358660041e-05, + "loss": 0.4039, + "step": 33630 + }, + { + "epoch": 1.57, + "learning_rate": 1.4887056573605257e-05, + "loss": 0.2516, + "step": 33635 + }, + { + "epoch": 1.57, + "learning_rate": 1.4886272788550469e-05, + "loss": 0.0328, + "step": 33640 + }, + { + "epoch": 1.57, + "learning_rate": 1.4885489003495683e-05, + "loss": 0.0959, + "step": 33645 + }, + { + "epoch": 1.57, + "learning_rate": 1.4884705218440895e-05, + "loss": 0.0733, + "step": 33650 + }, + { + "epoch": 1.57, + "learning_rate": 1.488392143338611e-05, + "loss": 0.0875, + "step": 33655 + }, + { + "epoch": 1.57, + "learning_rate": 1.4883137648331323e-05, + "loss": 0.0994, + "step": 33660 + }, + { + "epoch": 1.57, + "learning_rate": 1.4882353863276535e-05, + "loss": 0.1776, + "step": 33665 + }, + { + "epoch": 1.57, + "learning_rate": 1.4881570078221749e-05, + "loss": 0.1794, + "step": 33670 + }, + { + "epoch": 1.57, + "learning_rate": 1.4880786293166963e-05, + "loss": 0.2112, + "step": 33675 + }, + { + "epoch": 1.57, + "learning_rate": 1.4880002508112177e-05, + "loss": 0.2123, + "step": 33680 + }, + { + "epoch": 1.57, + "learning_rate": 1.4879218723057389e-05, + "loss": 0.4273, + "step": 33685 + }, + { + "epoch": 1.57, + "learning_rate": 1.4878434938002605e-05, + "loss": 0.0487, + "step": 33690 + }, + { + "epoch": 1.57, + "learning_rate": 1.4877651152947817e-05, + "loss": 0.0653, + "step": 33695 + }, + { + "epoch": 1.57, + "learning_rate": 1.487686736789303e-05, + "loss": 0.0647, + "step": 33700 + }, + { + "epoch": 1.57, + "learning_rate": 1.4876083582838243e-05, + "loss": 0.0774, + "step": 33705 + }, + { + "epoch": 1.57, + "learning_rate": 1.4875299797783459e-05, + "loss": 0.1084, + "step": 33710 + }, + { + "epoch": 1.57, + "learning_rate": 1.487451601272867e-05, + "loss": 0.1575, + "step": 33715 + }, + { + "epoch": 1.57, + "learning_rate": 1.4873732227673885e-05, + "loss": 0.1553, + "step": 33720 + }, + { + "epoch": 1.57, + "learning_rate": 1.4872948442619097e-05, + "loss": 0.2492, + "step": 33725 + }, + { + "epoch": 1.57, + "learning_rate": 1.4872164657564309e-05, + "loss": 0.2886, + "step": 33730 + }, + { + "epoch": 1.57, + "learning_rate": 1.4871380872509525e-05, + "loss": 0.2957, + "step": 33735 + }, + { + "epoch": 1.57, + "learning_rate": 1.4870597087454737e-05, + "loss": 0.0997, + "step": 33740 + }, + { + "epoch": 1.57, + "learning_rate": 1.486981330239995e-05, + "loss": 0.0764, + "step": 33745 + }, + { + "epoch": 1.57, + "learning_rate": 1.4869029517345163e-05, + "loss": 0.0875, + "step": 33750 + }, + { + "epoch": 1.58, + "learning_rate": 1.4868245732290379e-05, + "loss": 0.1655, + "step": 33755 + }, + { + "epoch": 1.58, + "learning_rate": 1.4867461947235591e-05, + "loss": 0.0998, + "step": 33760 + }, + { + "epoch": 1.58, + "learning_rate": 1.4866678162180805e-05, + "loss": 0.1561, + "step": 33765 + }, + { + "epoch": 1.58, + "learning_rate": 1.4865894377126017e-05, + "loss": 0.1504, + "step": 33770 + }, + { + "epoch": 1.58, + "learning_rate": 1.4865110592071233e-05, + "loss": 0.2526, + "step": 33775 + }, + { + "epoch": 1.58, + "learning_rate": 1.4864326807016445e-05, + "loss": 0.3547, + "step": 33780 + }, + { + "epoch": 1.58, + "learning_rate": 1.4863543021961659e-05, + "loss": 0.2778, + "step": 33785 + }, + { + "epoch": 1.58, + "learning_rate": 1.4862759236906873e-05, + "loss": 0.0651, + "step": 33790 + }, + { + "epoch": 1.58, + "learning_rate": 1.4861975451852085e-05, + "loss": 0.0651, + "step": 33795 + }, + { + "epoch": 1.58, + "learning_rate": 1.4861191666797299e-05, + "loss": 0.1012, + "step": 33800 + }, + { + "epoch": 1.58, + "learning_rate": 1.4860407881742511e-05, + "loss": 0.1282, + "step": 33805 + }, + { + "epoch": 1.58, + "learning_rate": 1.4859624096687727e-05, + "loss": 0.163, + "step": 33810 + }, + { + "epoch": 1.58, + "learning_rate": 1.4858840311632939e-05, + "loss": 0.221, + "step": 33815 + }, + { + "epoch": 1.58, + "learning_rate": 1.4858056526578153e-05, + "loss": 0.1405, + "step": 33820 + }, + { + "epoch": 1.58, + "learning_rate": 1.4857272741523365e-05, + "loss": 0.3016, + "step": 33825 + }, + { + "epoch": 1.58, + "learning_rate": 1.485648895646858e-05, + "loss": 0.4393, + "step": 33830 + }, + { + "epoch": 1.58, + "learning_rate": 1.4855705171413793e-05, + "loss": 0.2873, + "step": 33835 + }, + { + "epoch": 1.58, + "learning_rate": 1.4854921386359007e-05, + "loss": 0.0466, + "step": 33840 + }, + { + "epoch": 1.58, + "learning_rate": 1.4854137601304219e-05, + "loss": 0.0364, + "step": 33845 + }, + { + "epoch": 1.58, + "learning_rate": 1.4853353816249434e-05, + "loss": 0.1092, + "step": 33850 + }, + { + "epoch": 1.58, + "learning_rate": 1.4852570031194647e-05, + "loss": 0.1333, + "step": 33855 + }, + { + "epoch": 1.58, + "learning_rate": 1.4851786246139859e-05, + "loss": 0.1323, + "step": 33860 + }, + { + "epoch": 1.58, + "learning_rate": 1.4851002461085073e-05, + "loss": 0.1203, + "step": 33865 + }, + { + "epoch": 1.58, + "learning_rate": 1.4850218676030285e-05, + "loss": 0.1091, + "step": 33870 + }, + { + "epoch": 1.58, + "learning_rate": 1.48494348909755e-05, + "loss": 0.2064, + "step": 33875 + }, + { + "epoch": 1.58, + "learning_rate": 1.4848651105920713e-05, + "loss": 0.3023, + "step": 33880 + }, + { + "epoch": 1.58, + "learning_rate": 1.4847867320865927e-05, + "loss": 0.3641, + "step": 33885 + }, + { + "epoch": 1.58, + "learning_rate": 1.484708353581114e-05, + "loss": 0.0508, + "step": 33890 + }, + { + "epoch": 1.58, + "learning_rate": 1.4846299750756354e-05, + "loss": 0.0378, + "step": 33895 + }, + { + "epoch": 1.58, + "learning_rate": 1.4845515965701567e-05, + "loss": 0.1599, + "step": 33900 + }, + { + "epoch": 1.58, + "learning_rate": 1.4844732180646782e-05, + "loss": 0.0987, + "step": 33905 + }, + { + "epoch": 1.58, + "learning_rate": 1.4843948395591994e-05, + "loss": 0.0775, + "step": 33910 + }, + { + "epoch": 1.58, + "learning_rate": 1.4843164610537208e-05, + "loss": 0.1141, + "step": 33915 + }, + { + "epoch": 1.58, + "learning_rate": 1.484238082548242e-05, + "loss": 0.1908, + "step": 33920 + }, + { + "epoch": 1.58, + "learning_rate": 1.4841597040427633e-05, + "loss": 0.2086, + "step": 33925 + }, + { + "epoch": 1.58, + "learning_rate": 1.4840813255372848e-05, + "loss": 0.3953, + "step": 33930 + }, + { + "epoch": 1.58, + "learning_rate": 1.484002947031806e-05, + "loss": 0.3082, + "step": 33935 + }, + { + "epoch": 1.58, + "learning_rate": 1.4839245685263275e-05, + "loss": 0.0498, + "step": 33940 + }, + { + "epoch": 1.58, + "learning_rate": 1.4838461900208487e-05, + "loss": 0.0308, + "step": 33945 + }, + { + "epoch": 1.58, + "learning_rate": 1.4837678115153702e-05, + "loss": 0.0608, + "step": 33950 + }, + { + "epoch": 1.58, + "learning_rate": 1.4836894330098915e-05, + "loss": 0.1048, + "step": 33955 + }, + { + "epoch": 1.58, + "learning_rate": 1.4836110545044128e-05, + "loss": 0.1423, + "step": 33960 + }, + { + "epoch": 1.58, + "learning_rate": 1.483532675998934e-05, + "loss": 0.175, + "step": 33965 + }, + { + "epoch": 1.59, + "learning_rate": 1.4834542974934556e-05, + "loss": 0.1559, + "step": 33970 + }, + { + "epoch": 1.59, + "learning_rate": 1.4833759189879768e-05, + "loss": 0.2204, + "step": 33975 + }, + { + "epoch": 1.59, + "learning_rate": 1.4832975404824982e-05, + "loss": 0.3428, + "step": 33980 + }, + { + "epoch": 1.59, + "learning_rate": 1.4832191619770195e-05, + "loss": 0.2474, + "step": 33985 + }, + { + "epoch": 1.59, + "learning_rate": 1.4831407834715409e-05, + "loss": 0.0378, + "step": 33990 + }, + { + "epoch": 1.59, + "learning_rate": 1.4830624049660622e-05, + "loss": 0.0739, + "step": 33995 + }, + { + "epoch": 1.59, + "learning_rate": 1.4829840264605835e-05, + "loss": 0.0936, + "step": 34000 + }, + { + "epoch": 1.59, + "learning_rate": 1.482905647955105e-05, + "loss": 0.0705, + "step": 34005 + }, + { + "epoch": 1.59, + "learning_rate": 1.4828272694496262e-05, + "loss": 0.0965, + "step": 34010 + }, + { + "epoch": 1.59, + "learning_rate": 1.4827488909441476e-05, + "loss": 0.0939, + "step": 34015 + }, + { + "epoch": 1.59, + "learning_rate": 1.4826705124386689e-05, + "loss": 0.1264, + "step": 34020 + }, + { + "epoch": 1.59, + "learning_rate": 1.4825921339331904e-05, + "loss": 0.1568, + "step": 34025 + }, + { + "epoch": 1.59, + "learning_rate": 1.4825137554277116e-05, + "loss": 0.2927, + "step": 34030 + }, + { + "epoch": 1.59, + "learning_rate": 1.482435376922233e-05, + "loss": 0.3461, + "step": 34035 + }, + { + "epoch": 1.59, + "learning_rate": 1.4823569984167542e-05, + "loss": 0.0535, + "step": 34040 + }, + { + "epoch": 1.59, + "learning_rate": 1.4822786199112758e-05, + "loss": 0.0372, + "step": 34045 + }, + { + "epoch": 1.59, + "learning_rate": 1.482200241405797e-05, + "loss": 0.0823, + "step": 34050 + }, + { + "epoch": 1.59, + "learning_rate": 1.4821218629003183e-05, + "loss": 0.0993, + "step": 34055 + }, + { + "epoch": 1.59, + "learning_rate": 1.4820434843948396e-05, + "loss": 0.1109, + "step": 34060 + }, + { + "epoch": 1.59, + "learning_rate": 1.4819651058893609e-05, + "loss": 0.1505, + "step": 34065 + }, + { + "epoch": 1.59, + "learning_rate": 1.4818867273838824e-05, + "loss": 0.2167, + "step": 34070 + }, + { + "epoch": 1.59, + "learning_rate": 1.4818083488784036e-05, + "loss": 0.1975, + "step": 34075 + }, + { + "epoch": 1.59, + "learning_rate": 1.481729970372925e-05, + "loss": 0.3396, + "step": 34080 + }, + { + "epoch": 1.59, + "learning_rate": 1.4816515918674463e-05, + "loss": 0.2306, + "step": 34085 + }, + { + "epoch": 1.59, + "learning_rate": 1.4815732133619678e-05, + "loss": 0.0387, + "step": 34090 + }, + { + "epoch": 1.59, + "learning_rate": 1.481494834856489e-05, + "loss": 0.0714, + "step": 34095 + }, + { + "epoch": 1.59, + "learning_rate": 1.4814164563510104e-05, + "loss": 0.1295, + "step": 34100 + }, + { + "epoch": 1.59, + "learning_rate": 1.4813380778455318e-05, + "loss": 0.1515, + "step": 34105 + }, + { + "epoch": 1.59, + "learning_rate": 1.4812596993400532e-05, + "loss": 0.1432, + "step": 34110 + }, + { + "epoch": 1.59, + "learning_rate": 1.4811813208345744e-05, + "loss": 0.1063, + "step": 34115 + }, + { + "epoch": 1.59, + "learning_rate": 1.4811029423290957e-05, + "loss": 0.175, + "step": 34120 + }, + { + "epoch": 1.59, + "learning_rate": 1.4810245638236172e-05, + "loss": 0.2832, + "step": 34125 + }, + { + "epoch": 1.59, + "learning_rate": 1.4809461853181384e-05, + "loss": 0.4135, + "step": 34130 + }, + { + "epoch": 1.59, + "learning_rate": 1.4808678068126598e-05, + "loss": 0.2442, + "step": 34135 + }, + { + "epoch": 1.59, + "learning_rate": 1.480789428307181e-05, + "loss": 0.0886, + "step": 34140 + }, + { + "epoch": 1.59, + "learning_rate": 1.4807110498017026e-05, + "loss": 0.0458, + "step": 34145 + }, + { + "epoch": 1.59, + "learning_rate": 1.4806326712962238e-05, + "loss": 0.1056, + "step": 34150 + }, + { + "epoch": 1.59, + "learning_rate": 1.4805542927907452e-05, + "loss": 0.0775, + "step": 34155 + }, + { + "epoch": 1.59, + "learning_rate": 1.4804759142852664e-05, + "loss": 0.1509, + "step": 34160 + }, + { + "epoch": 1.59, + "learning_rate": 1.480397535779788e-05, + "loss": 0.1604, + "step": 34165 + }, + { + "epoch": 1.59, + "learning_rate": 1.4803191572743092e-05, + "loss": 0.1765, + "step": 34170 + }, + { + "epoch": 1.59, + "learning_rate": 1.4802407787688306e-05, + "loss": 0.1314, + "step": 34175 + }, + { + "epoch": 1.59, + "learning_rate": 1.4801624002633518e-05, + "loss": 0.428, + "step": 34180 + }, + { + "epoch": 1.6, + "learning_rate": 1.480084021757873e-05, + "loss": 0.2183, + "step": 34185 + }, + { + "epoch": 1.6, + "learning_rate": 1.4800056432523946e-05, + "loss": 0.0748, + "step": 34190 + }, + { + "epoch": 1.6, + "learning_rate": 1.4799272647469158e-05, + "loss": 0.0489, + "step": 34195 + }, + { + "epoch": 1.6, + "learning_rate": 1.4798488862414372e-05, + "loss": 0.0639, + "step": 34200 + }, + { + "epoch": 1.6, + "learning_rate": 1.4797705077359586e-05, + "loss": 0.0929, + "step": 34205 + }, + { + "epoch": 1.6, + "learning_rate": 1.47969212923048e-05, + "loss": 0.187, + "step": 34210 + }, + { + "epoch": 1.6, + "learning_rate": 1.4796137507250012e-05, + "loss": 0.1703, + "step": 34215 + }, + { + "epoch": 1.6, + "learning_rate": 1.4795353722195228e-05, + "loss": 0.1433, + "step": 34220 + }, + { + "epoch": 1.6, + "learning_rate": 1.479456993714044e-05, + "loss": 0.2344, + "step": 34225 + }, + { + "epoch": 1.6, + "learning_rate": 1.4793786152085654e-05, + "loss": 0.3602, + "step": 34230 + }, + { + "epoch": 1.6, + "learning_rate": 1.4793002367030866e-05, + "loss": 0.3009, + "step": 34235 + }, + { + "epoch": 1.6, + "learning_rate": 1.4792218581976082e-05, + "loss": 0.107, + "step": 34240 + }, + { + "epoch": 1.6, + "learning_rate": 1.4791434796921294e-05, + "loss": 0.0895, + "step": 34245 + }, + { + "epoch": 1.6, + "learning_rate": 1.4790651011866506e-05, + "loss": 0.0729, + "step": 34250 + }, + { + "epoch": 1.6, + "learning_rate": 1.478986722681172e-05, + "loss": 0.0705, + "step": 34255 + }, + { + "epoch": 1.6, + "learning_rate": 1.4789083441756932e-05, + "loss": 0.1037, + "step": 34260 + }, + { + "epoch": 1.6, + "learning_rate": 1.4788299656702148e-05, + "loss": 0.1372, + "step": 34265 + }, + { + "epoch": 1.6, + "learning_rate": 1.478751587164736e-05, + "loss": 0.2172, + "step": 34270 + }, + { + "epoch": 1.6, + "learning_rate": 1.4786732086592574e-05, + "loss": 0.2641, + "step": 34275 + }, + { + "epoch": 1.6, + "learning_rate": 1.4785948301537786e-05, + "loss": 0.3562, + "step": 34280 + }, + { + "epoch": 1.6, + "learning_rate": 1.4785164516483002e-05, + "loss": 0.2552, + "step": 34285 + }, + { + "epoch": 1.6, + "learning_rate": 1.4784380731428214e-05, + "loss": 0.0604, + "step": 34290 + }, + { + "epoch": 1.6, + "learning_rate": 1.4783596946373428e-05, + "loss": 0.0537, + "step": 34295 + }, + { + "epoch": 1.6, + "learning_rate": 1.478281316131864e-05, + "loss": 0.1428, + "step": 34300 + }, + { + "epoch": 1.6, + "learning_rate": 1.4782029376263856e-05, + "loss": 0.1001, + "step": 34305 + }, + { + "epoch": 1.6, + "learning_rate": 1.4781245591209068e-05, + "loss": 0.0837, + "step": 34310 + }, + { + "epoch": 1.6, + "learning_rate": 1.478046180615428e-05, + "loss": 0.125, + "step": 34315 + }, + { + "epoch": 1.6, + "learning_rate": 1.4779678021099496e-05, + "loss": 0.1296, + "step": 34320 + }, + { + "epoch": 1.6, + "learning_rate": 1.4778894236044708e-05, + "loss": 0.1319, + "step": 34325 + }, + { + "epoch": 1.6, + "learning_rate": 1.4778110450989922e-05, + "loss": 0.2868, + "step": 34330 + }, + { + "epoch": 1.6, + "learning_rate": 1.4777326665935134e-05, + "loss": 0.3055, + "step": 34335 + }, + { + "epoch": 1.6, + "learning_rate": 1.477654288088035e-05, + "loss": 0.0613, + "step": 34340 + }, + { + "epoch": 1.6, + "learning_rate": 1.4775759095825562e-05, + "loss": 0.1232, + "step": 34345 + }, + { + "epoch": 1.6, + "learning_rate": 1.4774975310770776e-05, + "loss": 0.0745, + "step": 34350 + }, + { + "epoch": 1.6, + "learning_rate": 1.4774191525715988e-05, + "loss": 0.128, + "step": 34355 + }, + { + "epoch": 1.6, + "learning_rate": 1.4773407740661204e-05, + "loss": 0.0547, + "step": 34360 + }, + { + "epoch": 1.6, + "learning_rate": 1.4772623955606416e-05, + "loss": 0.1655, + "step": 34365 + }, + { + "epoch": 1.6, + "learning_rate": 1.477184017055163e-05, + "loss": 0.47, + "step": 34370 + }, + { + "epoch": 1.6, + "learning_rate": 1.4771056385496842e-05, + "loss": 0.2424, + "step": 34375 + }, + { + "epoch": 1.6, + "learning_rate": 1.4770272600442054e-05, + "loss": 0.3795, + "step": 34380 + }, + { + "epoch": 1.6, + "learning_rate": 1.476948881538727e-05, + "loss": 0.292, + "step": 34385 + }, + { + "epoch": 1.6, + "learning_rate": 1.4768705030332482e-05, + "loss": 0.0394, + "step": 34390 + }, + { + "epoch": 1.6, + "learning_rate": 1.4767921245277696e-05, + "loss": 0.0294, + "step": 34395 + }, + { + "epoch": 1.61, + "learning_rate": 1.4767137460222908e-05, + "loss": 0.0748, + "step": 34400 + }, + { + "epoch": 1.61, + "learning_rate": 1.4766353675168124e-05, + "loss": 0.1135, + "step": 34405 + }, + { + "epoch": 1.61, + "learning_rate": 1.4765569890113336e-05, + "loss": 0.1248, + "step": 34410 + }, + { + "epoch": 1.61, + "learning_rate": 1.476478610505855e-05, + "loss": 0.1699, + "step": 34415 + }, + { + "epoch": 1.61, + "learning_rate": 1.4764002320003764e-05, + "loss": 0.1724, + "step": 34420 + }, + { + "epoch": 1.61, + "learning_rate": 1.4763218534948978e-05, + "loss": 0.2934, + "step": 34425 + }, + { + "epoch": 1.61, + "learning_rate": 1.476243474989419e-05, + "loss": 0.3888, + "step": 34430 + }, + { + "epoch": 1.61, + "learning_rate": 1.4761650964839405e-05, + "loss": 0.3509, + "step": 34435 + }, + { + "epoch": 1.61, + "learning_rate": 1.4760867179784618e-05, + "loss": 0.0337, + "step": 34440 + }, + { + "epoch": 1.61, + "learning_rate": 1.476008339472983e-05, + "loss": 0.0399, + "step": 34445 + }, + { + "epoch": 1.61, + "learning_rate": 1.4759299609675044e-05, + "loss": 0.071, + "step": 34450 + }, + { + "epoch": 1.61, + "learning_rate": 1.4758515824620256e-05, + "loss": 0.1536, + "step": 34455 + }, + { + "epoch": 1.61, + "learning_rate": 1.4757732039565472e-05, + "loss": 0.0879, + "step": 34460 + }, + { + "epoch": 1.61, + "learning_rate": 1.4756948254510684e-05, + "loss": 0.256, + "step": 34465 + }, + { + "epoch": 1.61, + "learning_rate": 1.4756164469455898e-05, + "loss": 0.1642, + "step": 34470 + }, + { + "epoch": 1.61, + "learning_rate": 1.475538068440111e-05, + "loss": 0.3665, + "step": 34475 + }, + { + "epoch": 1.61, + "learning_rate": 1.4754596899346326e-05, + "loss": 0.5219, + "step": 34480 + }, + { + "epoch": 1.61, + "learning_rate": 1.4753813114291538e-05, + "loss": 0.24, + "step": 34485 + }, + { + "epoch": 1.61, + "learning_rate": 1.4753029329236752e-05, + "loss": 0.0458, + "step": 34490 + }, + { + "epoch": 1.61, + "learning_rate": 1.4752245544181964e-05, + "loss": 0.0644, + "step": 34495 + }, + { + "epoch": 1.61, + "learning_rate": 1.475146175912718e-05, + "loss": 0.0412, + "step": 34500 + }, + { + "epoch": 1.61, + "learning_rate": 1.4750677974072392e-05, + "loss": 0.0782, + "step": 34505 + }, + { + "epoch": 1.61, + "learning_rate": 1.4749894189017604e-05, + "loss": 0.102, + "step": 34510 + }, + { + "epoch": 1.61, + "learning_rate": 1.4749110403962818e-05, + "loss": 0.1028, + "step": 34515 + }, + { + "epoch": 1.61, + "learning_rate": 1.4748326618908032e-05, + "loss": 0.1788, + "step": 34520 + }, + { + "epoch": 1.61, + "learning_rate": 1.4747542833853246e-05, + "loss": 0.1417, + "step": 34525 + }, + { + "epoch": 1.61, + "learning_rate": 1.4746759048798458e-05, + "loss": 0.2783, + "step": 34530 + }, + { + "epoch": 1.61, + "learning_rate": 1.4745975263743673e-05, + "loss": 0.2968, + "step": 34535 + }, + { + "epoch": 1.61, + "learning_rate": 1.4745191478688886e-05, + "loss": 0.0946, + "step": 34540 + }, + { + "epoch": 1.61, + "learning_rate": 1.47444076936341e-05, + "loss": 0.0437, + "step": 34545 + }, + { + "epoch": 1.61, + "learning_rate": 1.4743623908579312e-05, + "loss": 0.0868, + "step": 34550 + }, + { + "epoch": 1.61, + "learning_rate": 1.4742840123524527e-05, + "loss": 0.105, + "step": 34555 + }, + { + "epoch": 1.61, + "learning_rate": 1.474205633846974e-05, + "loss": 0.1074, + "step": 34560 + }, + { + "epoch": 1.61, + "learning_rate": 1.4741272553414953e-05, + "loss": 0.1464, + "step": 34565 + }, + { + "epoch": 1.61, + "learning_rate": 1.4740488768360166e-05, + "loss": 0.1298, + "step": 34570 + }, + { + "epoch": 1.61, + "learning_rate": 1.4739704983305378e-05, + "loss": 0.2425, + "step": 34575 + }, + { + "epoch": 1.61, + "learning_rate": 1.4738921198250593e-05, + "loss": 0.3058, + "step": 34580 + }, + { + "epoch": 1.61, + "learning_rate": 1.4738137413195806e-05, + "loss": 0.2462, + "step": 34585 + }, + { + "epoch": 1.61, + "learning_rate": 1.473735362814102e-05, + "loss": 0.0695, + "step": 34590 + }, + { + "epoch": 1.61, + "learning_rate": 1.4736569843086232e-05, + "loss": 0.0971, + "step": 34595 + }, + { + "epoch": 1.61, + "learning_rate": 1.4735786058031447e-05, + "loss": 0.1426, + "step": 34600 + }, + { + "epoch": 1.61, + "learning_rate": 1.473500227297666e-05, + "loss": 0.0843, + "step": 34605 + }, + { + "epoch": 1.61, + "learning_rate": 1.4734218487921874e-05, + "loss": 0.1106, + "step": 34610 + }, + { + "epoch": 1.62, + "learning_rate": 1.4733434702867086e-05, + "loss": 0.0959, + "step": 34615 + }, + { + "epoch": 1.62, + "learning_rate": 1.4732650917812301e-05, + "loss": 0.1614, + "step": 34620 + }, + { + "epoch": 1.62, + "learning_rate": 1.4731867132757514e-05, + "loss": 0.2626, + "step": 34625 + }, + { + "epoch": 1.62, + "learning_rate": 1.4731083347702727e-05, + "loss": 0.2758, + "step": 34630 + }, + { + "epoch": 1.62, + "learning_rate": 1.4730299562647941e-05, + "loss": 0.3072, + "step": 34635 + }, + { + "epoch": 1.62, + "learning_rate": 1.4729515777593154e-05, + "loss": 0.0358, + "step": 34640 + }, + { + "epoch": 1.62, + "learning_rate": 1.4728731992538367e-05, + "loss": 0.0405, + "step": 34645 + }, + { + "epoch": 1.62, + "learning_rate": 1.472794820748358e-05, + "loss": 0.0772, + "step": 34650 + }, + { + "epoch": 1.62, + "learning_rate": 1.4727164422428795e-05, + "loss": 0.1179, + "step": 34655 + }, + { + "epoch": 1.62, + "learning_rate": 1.4726380637374008e-05, + "loss": 0.0595, + "step": 34660 + }, + { + "epoch": 1.62, + "learning_rate": 1.4725596852319221e-05, + "loss": 0.2746, + "step": 34665 + }, + { + "epoch": 1.62, + "learning_rate": 1.4724813067264434e-05, + "loss": 0.1401, + "step": 34670 + }, + { + "epoch": 1.62, + "learning_rate": 1.472402928220965e-05, + "loss": 0.1435, + "step": 34675 + }, + { + "epoch": 1.62, + "learning_rate": 1.4723245497154861e-05, + "loss": 0.2242, + "step": 34680 + }, + { + "epoch": 1.62, + "learning_rate": 1.4722461712100075e-05, + "loss": 0.3268, + "step": 34685 + }, + { + "epoch": 1.62, + "learning_rate": 1.4721677927045288e-05, + "loss": 0.034, + "step": 34690 + }, + { + "epoch": 1.62, + "learning_rate": 1.4720894141990503e-05, + "loss": 0.1351, + "step": 34695 + }, + { + "epoch": 1.62, + "learning_rate": 1.4720110356935715e-05, + "loss": 0.0795, + "step": 34700 + }, + { + "epoch": 1.62, + "learning_rate": 1.4719326571880928e-05, + "loss": 0.0898, + "step": 34705 + }, + { + "epoch": 1.62, + "learning_rate": 1.4718542786826141e-05, + "loss": 0.096, + "step": 34710 + }, + { + "epoch": 1.62, + "learning_rate": 1.4717759001771354e-05, + "loss": 0.1264, + "step": 34715 + }, + { + "epoch": 1.62, + "learning_rate": 1.471697521671657e-05, + "loss": 0.1767, + "step": 34720 + }, + { + "epoch": 1.62, + "learning_rate": 1.4716191431661782e-05, + "loss": 0.1141, + "step": 34725 + }, + { + "epoch": 1.62, + "learning_rate": 1.4715407646606995e-05, + "loss": 0.3326, + "step": 34730 + }, + { + "epoch": 1.62, + "learning_rate": 1.471462386155221e-05, + "loss": 0.3716, + "step": 34735 + }, + { + "epoch": 1.62, + "learning_rate": 1.4713840076497423e-05, + "loss": 0.0574, + "step": 34740 + }, + { + "epoch": 1.62, + "learning_rate": 1.4713056291442635e-05, + "loss": 0.0426, + "step": 34745 + }, + { + "epoch": 1.62, + "learning_rate": 1.4712272506387851e-05, + "loss": 0.0909, + "step": 34750 + }, + { + "epoch": 1.62, + "learning_rate": 1.4711488721333063e-05, + "loss": 0.1078, + "step": 34755 + }, + { + "epoch": 1.62, + "learning_rate": 1.4710704936278277e-05, + "loss": 0.1061, + "step": 34760 + }, + { + "epoch": 1.62, + "learning_rate": 1.470992115122349e-05, + "loss": 0.1885, + "step": 34765 + }, + { + "epoch": 1.62, + "learning_rate": 1.4709137366168702e-05, + "loss": 0.176, + "step": 34770 + }, + { + "epoch": 1.62, + "learning_rate": 1.4708353581113917e-05, + "loss": 0.1519, + "step": 34775 + }, + { + "epoch": 1.62, + "learning_rate": 1.470756979605913e-05, + "loss": 0.2766, + "step": 34780 + }, + { + "epoch": 1.62, + "learning_rate": 1.4706786011004343e-05, + "loss": 0.2345, + "step": 34785 + }, + { + "epoch": 1.62, + "learning_rate": 1.4706002225949556e-05, + "loss": 0.0734, + "step": 34790 + }, + { + "epoch": 1.62, + "learning_rate": 1.4705218440894771e-05, + "loss": 0.0545, + "step": 34795 + }, + { + "epoch": 1.62, + "learning_rate": 1.4704434655839983e-05, + "loss": 0.0359, + "step": 34800 + }, + { + "epoch": 1.62, + "learning_rate": 1.4703650870785197e-05, + "loss": 0.08, + "step": 34805 + }, + { + "epoch": 1.62, + "learning_rate": 1.470286708573041e-05, + "loss": 0.1442, + "step": 34810 + }, + { + "epoch": 1.62, + "learning_rate": 1.4702083300675625e-05, + "loss": 0.1107, + "step": 34815 + }, + { + "epoch": 1.62, + "learning_rate": 1.4701299515620837e-05, + "loss": 0.1809, + "step": 34820 + }, + { + "epoch": 1.62, + "learning_rate": 1.4700515730566051e-05, + "loss": 0.1639, + "step": 34825 + }, + { + "epoch": 1.63, + "learning_rate": 1.4699731945511263e-05, + "loss": 0.2368, + "step": 34830 + }, + { + "epoch": 1.63, + "learning_rate": 1.4698948160456477e-05, + "loss": 0.2754, + "step": 34835 + }, + { + "epoch": 1.63, + "learning_rate": 1.4698164375401691e-05, + "loss": 0.1172, + "step": 34840 + }, + { + "epoch": 1.63, + "learning_rate": 1.4697380590346903e-05, + "loss": 0.0575, + "step": 34845 + }, + { + "epoch": 1.63, + "learning_rate": 1.4696596805292119e-05, + "loss": 0.1116, + "step": 34850 + }, + { + "epoch": 1.63, + "learning_rate": 1.4695813020237331e-05, + "loss": 0.1418, + "step": 34855 + }, + { + "epoch": 1.63, + "learning_rate": 1.4695029235182545e-05, + "loss": 0.1506, + "step": 34860 + }, + { + "epoch": 1.63, + "learning_rate": 1.4694245450127757e-05, + "loss": 0.1702, + "step": 34865 + }, + { + "epoch": 1.63, + "learning_rate": 1.4693461665072973e-05, + "loss": 0.2012, + "step": 34870 + }, + { + "epoch": 1.63, + "learning_rate": 1.4692677880018185e-05, + "loss": 0.28, + "step": 34875 + }, + { + "epoch": 1.63, + "learning_rate": 1.4691894094963399e-05, + "loss": 0.2179, + "step": 34880 + }, + { + "epoch": 1.63, + "learning_rate": 1.4691110309908611e-05, + "loss": 0.2453, + "step": 34885 + }, + { + "epoch": 1.63, + "learning_rate": 1.4690326524853827e-05, + "loss": 0.0277, + "step": 34890 + }, + { + "epoch": 1.63, + "learning_rate": 1.4689542739799039e-05, + "loss": 0.1151, + "step": 34895 + }, + { + "epoch": 1.63, + "learning_rate": 1.4688758954744251e-05, + "loss": 0.091, + "step": 34900 + }, + { + "epoch": 1.63, + "learning_rate": 1.4687975169689465e-05, + "loss": 0.1309, + "step": 34905 + }, + { + "epoch": 1.63, + "learning_rate": 1.4687191384634677e-05, + "loss": 0.0805, + "step": 34910 + }, + { + "epoch": 1.63, + "learning_rate": 1.4686407599579893e-05, + "loss": 0.1674, + "step": 34915 + }, + { + "epoch": 1.63, + "learning_rate": 1.4685623814525105e-05, + "loss": 0.1859, + "step": 34920 + }, + { + "epoch": 1.63, + "learning_rate": 1.4684840029470319e-05, + "loss": 0.234, + "step": 34925 + }, + { + "epoch": 1.63, + "learning_rate": 1.4684056244415531e-05, + "loss": 0.3353, + "step": 34930 + }, + { + "epoch": 1.63, + "learning_rate": 1.4683272459360747e-05, + "loss": 0.3759, + "step": 34935 + }, + { + "epoch": 1.63, + "learning_rate": 1.4682488674305959e-05, + "loss": 0.0299, + "step": 34940 + }, + { + "epoch": 1.63, + "learning_rate": 1.4681704889251173e-05, + "loss": 0.0724, + "step": 34945 + }, + { + "epoch": 1.63, + "learning_rate": 1.4680921104196387e-05, + "loss": 0.1143, + "step": 34950 + }, + { + "epoch": 1.63, + "learning_rate": 1.46801373191416e-05, + "loss": 0.1031, + "step": 34955 + }, + { + "epoch": 1.63, + "learning_rate": 1.4679353534086813e-05, + "loss": 0.1564, + "step": 34960 + }, + { + "epoch": 1.63, + "learning_rate": 1.4678569749032025e-05, + "loss": 0.1843, + "step": 34965 + }, + { + "epoch": 1.63, + "learning_rate": 1.4677785963977241e-05, + "loss": 0.1421, + "step": 34970 + }, + { + "epoch": 1.63, + "learning_rate": 1.4677002178922453e-05, + "loss": 0.1495, + "step": 34975 + }, + { + "epoch": 1.63, + "learning_rate": 1.4676218393867667e-05, + "loss": 0.3336, + "step": 34980 + }, + { + "epoch": 1.63, + "learning_rate": 1.467543460881288e-05, + "loss": 0.3336, + "step": 34985 + }, + { + "epoch": 1.63, + "learning_rate": 1.4674650823758095e-05, + "loss": 0.0764, + "step": 34990 + }, + { + "epoch": 1.63, + "learning_rate": 1.4673867038703307e-05, + "loss": 0.0414, + "step": 34995 + }, + { + "epoch": 1.63, + "learning_rate": 1.4673083253648521e-05, + "loss": 0.0867, + "step": 35000 + }, + { + "epoch": 1.63, + "learning_rate": 1.4672299468593733e-05, + "loss": 0.0791, + "step": 35005 + }, + { + "epoch": 1.63, + "learning_rate": 1.4671515683538949e-05, + "loss": 0.1432, + "step": 35010 + }, + { + "epoch": 1.63, + "learning_rate": 1.4670731898484161e-05, + "loss": 0.1849, + "step": 35015 + }, + { + "epoch": 1.63, + "learning_rate": 1.4669948113429375e-05, + "loss": 0.1386, + "step": 35020 + }, + { + "epoch": 1.63, + "learning_rate": 1.4669164328374587e-05, + "loss": 0.1576, + "step": 35025 + }, + { + "epoch": 1.63, + "learning_rate": 1.46683805433198e-05, + "loss": 0.3647, + "step": 35030 + }, + { + "epoch": 1.63, + "learning_rate": 1.4667596758265015e-05, + "loss": 0.3103, + "step": 35035 + }, + { + "epoch": 1.64, + "learning_rate": 1.4666812973210227e-05, + "loss": 0.0494, + "step": 35040 + }, + { + "epoch": 1.64, + "learning_rate": 1.4666029188155441e-05, + "loss": 0.0672, + "step": 35045 + }, + { + "epoch": 1.64, + "learning_rate": 1.4665245403100655e-05, + "loss": 0.0995, + "step": 35050 + }, + { + "epoch": 1.64, + "learning_rate": 1.4664461618045869e-05, + "loss": 0.0589, + "step": 35055 + }, + { + "epoch": 1.64, + "learning_rate": 1.4663677832991081e-05, + "loss": 0.1083, + "step": 35060 + }, + { + "epoch": 1.64, + "learning_rate": 1.4662894047936297e-05, + "loss": 0.1836, + "step": 35065 + }, + { + "epoch": 1.64, + "learning_rate": 1.4662110262881509e-05, + "loss": 0.216, + "step": 35070 + }, + { + "epoch": 1.64, + "learning_rate": 1.4661326477826723e-05, + "loss": 0.2673, + "step": 35075 + }, + { + "epoch": 1.64, + "learning_rate": 1.4660542692771935e-05, + "loss": 0.3442, + "step": 35080 + }, + { + "epoch": 1.64, + "learning_rate": 1.465975890771715e-05, + "loss": 0.3217, + "step": 35085 + }, + { + "epoch": 1.64, + "learning_rate": 1.4658975122662363e-05, + "loss": 0.0211, + "step": 35090 + }, + { + "epoch": 1.64, + "learning_rate": 1.4658191337607575e-05, + "loss": 0.0388, + "step": 35095 + }, + { + "epoch": 1.64, + "learning_rate": 1.4657407552552789e-05, + "loss": 0.044, + "step": 35100 + }, + { + "epoch": 1.64, + "learning_rate": 1.4656623767498001e-05, + "loss": 0.1086, + "step": 35105 + }, + { + "epoch": 1.64, + "learning_rate": 1.4655839982443217e-05, + "loss": 0.1599, + "step": 35110 + }, + { + "epoch": 1.64, + "learning_rate": 1.4655056197388429e-05, + "loss": 0.129, + "step": 35115 + }, + { + "epoch": 1.64, + "learning_rate": 1.4654272412333643e-05, + "loss": 0.1483, + "step": 35120 + }, + { + "epoch": 1.64, + "learning_rate": 1.4653488627278855e-05, + "loss": 0.2187, + "step": 35125 + }, + { + "epoch": 1.64, + "learning_rate": 1.465270484222407e-05, + "loss": 0.4357, + "step": 35130 + }, + { + "epoch": 1.64, + "learning_rate": 1.4651921057169283e-05, + "loss": 0.2683, + "step": 35135 + }, + { + "epoch": 1.64, + "learning_rate": 1.4651137272114497e-05, + "loss": 0.0366, + "step": 35140 + }, + { + "epoch": 1.64, + "learning_rate": 1.4650353487059709e-05, + "loss": 0.0448, + "step": 35145 + }, + { + "epoch": 1.64, + "learning_rate": 1.4649569702004925e-05, + "loss": 0.0861, + "step": 35150 + }, + { + "epoch": 1.64, + "learning_rate": 1.4648785916950137e-05, + "loss": 0.0554, + "step": 35155 + }, + { + "epoch": 1.64, + "learning_rate": 1.4648002131895349e-05, + "loss": 0.143, + "step": 35160 + }, + { + "epoch": 1.64, + "learning_rate": 1.4647218346840565e-05, + "loss": 0.1561, + "step": 35165 + }, + { + "epoch": 1.64, + "learning_rate": 1.4646434561785777e-05, + "loss": 0.1867, + "step": 35170 + }, + { + "epoch": 1.64, + "learning_rate": 1.464565077673099e-05, + "loss": 0.1855, + "step": 35175 + }, + { + "epoch": 1.64, + "learning_rate": 1.4644866991676203e-05, + "loss": 0.4092, + "step": 35180 + }, + { + "epoch": 1.64, + "learning_rate": 1.4644083206621418e-05, + "loss": 0.3768, + "step": 35185 + }, + { + "epoch": 1.64, + "learning_rate": 1.464329942156663e-05, + "loss": 0.1151, + "step": 35190 + }, + { + "epoch": 1.64, + "learning_rate": 1.4642515636511845e-05, + "loss": 0.0626, + "step": 35195 + }, + { + "epoch": 1.64, + "learning_rate": 1.4641731851457057e-05, + "loss": 0.0333, + "step": 35200 + }, + { + "epoch": 1.64, + "learning_rate": 1.4640948066402272e-05, + "loss": 0.0781, + "step": 35205 + }, + { + "epoch": 1.64, + "learning_rate": 1.4640164281347485e-05, + "loss": 0.1316, + "step": 35210 + }, + { + "epoch": 1.64, + "learning_rate": 1.4639380496292699e-05, + "loss": 0.0988, + "step": 35215 + }, + { + "epoch": 1.64, + "learning_rate": 1.463859671123791e-05, + "loss": 0.1637, + "step": 35220 + }, + { + "epoch": 1.64, + "learning_rate": 1.4637812926183123e-05, + "loss": 0.2726, + "step": 35225 + }, + { + "epoch": 1.64, + "learning_rate": 1.4637029141128339e-05, + "loss": 0.336, + "step": 35230 + }, + { + "epoch": 1.64, + "learning_rate": 1.463624535607355e-05, + "loss": 0.1921, + "step": 35235 + }, + { + "epoch": 1.64, + "learning_rate": 1.4635461571018765e-05, + "loss": 0.026, + "step": 35240 + }, + { + "epoch": 1.64, + "learning_rate": 1.4634677785963977e-05, + "loss": 0.0511, + "step": 35245 + }, + { + "epoch": 1.64, + "learning_rate": 1.4633894000909192e-05, + "loss": 0.0613, + "step": 35250 + }, + { + "epoch": 1.65, + "learning_rate": 1.4633110215854405e-05, + "loss": 0.116, + "step": 35255 + }, + { + "epoch": 1.65, + "learning_rate": 1.4632326430799619e-05, + "loss": 0.1376, + "step": 35260 + }, + { + "epoch": 1.65, + "learning_rate": 1.4631542645744833e-05, + "loss": 0.1423, + "step": 35265 + }, + { + "epoch": 1.65, + "learning_rate": 1.4630758860690046e-05, + "loss": 0.0715, + "step": 35270 + }, + { + "epoch": 1.65, + "learning_rate": 1.4629975075635259e-05, + "loss": 0.2406, + "step": 35275 + }, + { + "epoch": 1.65, + "learning_rate": 1.4629191290580474e-05, + "loss": 0.2576, + "step": 35280 + }, + { + "epoch": 1.65, + "learning_rate": 1.4628407505525686e-05, + "loss": 0.1546, + "step": 35285 + }, + { + "epoch": 1.65, + "learning_rate": 1.4627623720470899e-05, + "loss": 0.0603, + "step": 35290 + }, + { + "epoch": 1.65, + "learning_rate": 1.4626839935416113e-05, + "loss": 0.0637, + "step": 35295 + }, + { + "epoch": 1.65, + "learning_rate": 1.4626056150361325e-05, + "loss": 0.0723, + "step": 35300 + }, + { + "epoch": 1.65, + "learning_rate": 1.462527236530654e-05, + "loss": 0.0931, + "step": 35305 + }, + { + "epoch": 1.65, + "learning_rate": 1.4624488580251753e-05, + "loss": 0.163, + "step": 35310 + }, + { + "epoch": 1.65, + "learning_rate": 1.4623704795196966e-05, + "loss": 0.2184, + "step": 35315 + }, + { + "epoch": 1.65, + "learning_rate": 1.4622921010142179e-05, + "loss": 0.0844, + "step": 35320 + }, + { + "epoch": 1.65, + "learning_rate": 1.4622137225087394e-05, + "loss": 0.2475, + "step": 35325 + }, + { + "epoch": 1.65, + "learning_rate": 1.4621353440032607e-05, + "loss": 0.3015, + "step": 35330 + }, + { + "epoch": 1.65, + "learning_rate": 1.462056965497782e-05, + "loss": 0.26, + "step": 35335 + }, + { + "epoch": 1.65, + "learning_rate": 1.4619785869923033e-05, + "loss": 0.0676, + "step": 35340 + }, + { + "epoch": 1.65, + "learning_rate": 1.4619002084868248e-05, + "loss": 0.0765, + "step": 35345 + }, + { + "epoch": 1.65, + "learning_rate": 1.461821829981346e-05, + "loss": 0.0856, + "step": 35350 + }, + { + "epoch": 1.65, + "learning_rate": 1.4617434514758673e-05, + "loss": 0.0801, + "step": 35355 + }, + { + "epoch": 1.65, + "learning_rate": 1.4616650729703887e-05, + "loss": 0.1053, + "step": 35360 + }, + { + "epoch": 1.65, + "learning_rate": 1.46158669446491e-05, + "loss": 0.1531, + "step": 35365 + }, + { + "epoch": 1.65, + "learning_rate": 1.4615083159594314e-05, + "loss": 0.2514, + "step": 35370 + }, + { + "epoch": 1.65, + "learning_rate": 1.4614299374539527e-05, + "loss": 0.1658, + "step": 35375 + }, + { + "epoch": 1.65, + "learning_rate": 1.4613515589484742e-05, + "loss": 0.3044, + "step": 35380 + }, + { + "epoch": 1.65, + "learning_rate": 1.4612731804429954e-05, + "loss": 0.2306, + "step": 35385 + }, + { + "epoch": 1.65, + "learning_rate": 1.4611948019375168e-05, + "loss": 0.0903, + "step": 35390 + }, + { + "epoch": 1.65, + "learning_rate": 1.461116423432038e-05, + "loss": 0.0519, + "step": 35395 + }, + { + "epoch": 1.65, + "learning_rate": 1.4610380449265596e-05, + "loss": 0.0563, + "step": 35400 + }, + { + "epoch": 1.65, + "learning_rate": 1.4609596664210808e-05, + "loss": 0.0959, + "step": 35405 + }, + { + "epoch": 1.65, + "learning_rate": 1.4608812879156022e-05, + "loss": 0.0844, + "step": 35410 + }, + { + "epoch": 1.65, + "learning_rate": 1.4608029094101234e-05, + "loss": 0.1355, + "step": 35415 + }, + { + "epoch": 1.65, + "learning_rate": 1.4607245309046447e-05, + "loss": 0.1931, + "step": 35420 + }, + { + "epoch": 1.65, + "learning_rate": 1.4606461523991662e-05, + "loss": 0.257, + "step": 35425 + }, + { + "epoch": 1.65, + "learning_rate": 1.4605677738936874e-05, + "loss": 0.5352, + "step": 35430 + }, + { + "epoch": 1.65, + "learning_rate": 1.4604893953882088e-05, + "loss": 0.3642, + "step": 35435 + }, + { + "epoch": 1.65, + "learning_rate": 1.46041101688273e-05, + "loss": 0.0393, + "step": 35440 + }, + { + "epoch": 1.65, + "learning_rate": 1.4603326383772516e-05, + "loss": 0.106, + "step": 35445 + }, + { + "epoch": 1.65, + "learning_rate": 1.4602542598717728e-05, + "loss": 0.1245, + "step": 35450 + }, + { + "epoch": 1.65, + "learning_rate": 1.4601758813662942e-05, + "loss": 0.1576, + "step": 35455 + }, + { + "epoch": 1.65, + "learning_rate": 1.4600975028608155e-05, + "loss": 0.1734, + "step": 35460 + }, + { + "epoch": 1.65, + "learning_rate": 1.460019124355337e-05, + "loss": 0.0837, + "step": 35465 + }, + { + "epoch": 1.66, + "learning_rate": 1.4599407458498582e-05, + "loss": 0.1933, + "step": 35470 + }, + { + "epoch": 1.66, + "learning_rate": 1.4598623673443796e-05, + "loss": 0.1611, + "step": 35475 + }, + { + "epoch": 1.66, + "learning_rate": 1.459783988838901e-05, + "loss": 0.2965, + "step": 35480 + }, + { + "epoch": 1.66, + "learning_rate": 1.4597056103334222e-05, + "loss": 0.314, + "step": 35485 + }, + { + "epoch": 1.66, + "learning_rate": 1.4596272318279436e-05, + "loss": 0.0355, + "step": 35490 + }, + { + "epoch": 1.66, + "learning_rate": 1.4595488533224648e-05, + "loss": 0.0476, + "step": 35495 + }, + { + "epoch": 1.66, + "learning_rate": 1.4594704748169864e-05, + "loss": 0.0686, + "step": 35500 + }, + { + "epoch": 1.66, + "learning_rate": 1.4593920963115076e-05, + "loss": 0.1746, + "step": 35505 + }, + { + "epoch": 1.66, + "learning_rate": 1.459313717806029e-05, + "loss": 0.1403, + "step": 35510 + }, + { + "epoch": 1.66, + "learning_rate": 1.4592353393005502e-05, + "loss": 0.1285, + "step": 35515 + }, + { + "epoch": 1.66, + "learning_rate": 1.4591569607950718e-05, + "loss": 0.252, + "step": 35520 + }, + { + "epoch": 1.66, + "learning_rate": 1.459078582289593e-05, + "loss": 0.2358, + "step": 35525 + }, + { + "epoch": 1.66, + "learning_rate": 1.4590002037841144e-05, + "loss": 0.2364, + "step": 35530 + }, + { + "epoch": 1.66, + "learning_rate": 1.4589218252786356e-05, + "loss": 0.2942, + "step": 35535 + }, + { + "epoch": 1.66, + "learning_rate": 1.4588434467731572e-05, + "loss": 0.0387, + "step": 35540 + }, + { + "epoch": 1.66, + "learning_rate": 1.4587650682676784e-05, + "loss": 0.1152, + "step": 35545 + }, + { + "epoch": 1.66, + "learning_rate": 1.4586866897621996e-05, + "loss": 0.0824, + "step": 35550 + }, + { + "epoch": 1.66, + "learning_rate": 1.458608311256721e-05, + "loss": 0.1106, + "step": 35555 + }, + { + "epoch": 1.66, + "learning_rate": 1.4585299327512422e-05, + "loss": 0.1017, + "step": 35560 + }, + { + "epoch": 1.66, + "learning_rate": 1.4584515542457638e-05, + "loss": 0.2116, + "step": 35565 + }, + { + "epoch": 1.66, + "learning_rate": 1.458373175740285e-05, + "loss": 0.2479, + "step": 35570 + }, + { + "epoch": 1.66, + "learning_rate": 1.4582947972348064e-05, + "loss": 0.2595, + "step": 35575 + }, + { + "epoch": 1.66, + "learning_rate": 1.4582164187293278e-05, + "loss": 0.3131, + "step": 35580 + }, + { + "epoch": 1.66, + "learning_rate": 1.4581380402238492e-05, + "loss": 0.2581, + "step": 35585 + }, + { + "epoch": 1.66, + "learning_rate": 1.4580596617183704e-05, + "loss": 0.0496, + "step": 35590 + }, + { + "epoch": 1.66, + "learning_rate": 1.457981283212892e-05, + "loss": 0.0473, + "step": 35595 + }, + { + "epoch": 1.66, + "learning_rate": 1.4579029047074132e-05, + "loss": 0.1001, + "step": 35600 + }, + { + "epoch": 1.66, + "learning_rate": 1.4578245262019346e-05, + "loss": 0.053, + "step": 35605 + }, + { + "epoch": 1.66, + "learning_rate": 1.4577461476964558e-05, + "loss": 0.1636, + "step": 35610 + }, + { + "epoch": 1.66, + "learning_rate": 1.457667769190977e-05, + "loss": 0.1338, + "step": 35615 + }, + { + "epoch": 1.66, + "learning_rate": 1.4575893906854986e-05, + "loss": 0.2236, + "step": 35620 + }, + { + "epoch": 1.66, + "learning_rate": 1.4575110121800198e-05, + "loss": 0.17, + "step": 35625 + }, + { + "epoch": 1.66, + "learning_rate": 1.4574326336745412e-05, + "loss": 0.3704, + "step": 35630 + }, + { + "epoch": 1.66, + "learning_rate": 1.4573542551690624e-05, + "loss": 0.1904, + "step": 35635 + }, + { + "epoch": 1.66, + "learning_rate": 1.457275876663584e-05, + "loss": 0.0506, + "step": 35640 + }, + { + "epoch": 1.66, + "learning_rate": 1.4571974981581052e-05, + "loss": 0.0974, + "step": 35645 + }, + { + "epoch": 1.66, + "learning_rate": 1.4571191196526266e-05, + "loss": 0.036, + "step": 35650 + }, + { + "epoch": 1.66, + "learning_rate": 1.4570407411471478e-05, + "loss": 0.1061, + "step": 35655 + }, + { + "epoch": 1.66, + "learning_rate": 1.4569623626416694e-05, + "loss": 0.1198, + "step": 35660 + }, + { + "epoch": 1.66, + "learning_rate": 1.4568839841361906e-05, + "loss": 0.244, + "step": 35665 + }, + { + "epoch": 1.66, + "learning_rate": 1.456805605630712e-05, + "loss": 0.1738, + "step": 35670 + }, + { + "epoch": 1.66, + "learning_rate": 1.4567272271252332e-05, + "loss": 0.1297, + "step": 35675 + }, + { + "epoch": 1.66, + "learning_rate": 1.4566488486197546e-05, + "loss": 0.3234, + "step": 35680 + }, + { + "epoch": 1.67, + "learning_rate": 1.456570470114276e-05, + "loss": 0.3038, + "step": 35685 + }, + { + "epoch": 1.67, + "learning_rate": 1.4564920916087972e-05, + "loss": 0.0216, + "step": 35690 + }, + { + "epoch": 1.67, + "learning_rate": 1.4564137131033188e-05, + "loss": 0.0839, + "step": 35695 + }, + { + "epoch": 1.67, + "learning_rate": 1.45633533459784e-05, + "loss": 0.0911, + "step": 35700 + }, + { + "epoch": 1.67, + "learning_rate": 1.4562569560923614e-05, + "loss": 0.1222, + "step": 35705 + }, + { + "epoch": 1.67, + "learning_rate": 1.4561785775868826e-05, + "loss": 0.0914, + "step": 35710 + }, + { + "epoch": 1.67, + "learning_rate": 1.4561001990814042e-05, + "loss": 0.1321, + "step": 35715 + }, + { + "epoch": 1.67, + "learning_rate": 1.4560218205759254e-05, + "loss": 0.1351, + "step": 35720 + }, + { + "epoch": 1.67, + "learning_rate": 1.4559434420704468e-05, + "loss": 0.2113, + "step": 35725 + }, + { + "epoch": 1.67, + "learning_rate": 1.455865063564968e-05, + "loss": 0.3217, + "step": 35730 + }, + { + "epoch": 1.67, + "learning_rate": 1.4557866850594896e-05, + "loss": 0.3432, + "step": 35735 + }, + { + "epoch": 1.67, + "learning_rate": 1.4557083065540108e-05, + "loss": 0.0595, + "step": 35740 + }, + { + "epoch": 1.67, + "learning_rate": 1.455629928048532e-05, + "loss": 0.0583, + "step": 35745 + }, + { + "epoch": 1.67, + "learning_rate": 1.4555515495430534e-05, + "loss": 0.0955, + "step": 35750 + }, + { + "epoch": 1.67, + "learning_rate": 1.4554731710375746e-05, + "loss": 0.0763, + "step": 35755 + }, + { + "epoch": 1.67, + "learning_rate": 1.4553947925320962e-05, + "loss": 0.159, + "step": 35760 + }, + { + "epoch": 1.67, + "learning_rate": 1.4553164140266174e-05, + "loss": 0.1699, + "step": 35765 + }, + { + "epoch": 1.67, + "learning_rate": 1.4552380355211388e-05, + "loss": 0.1226, + "step": 35770 + }, + { + "epoch": 1.67, + "learning_rate": 1.45515965701566e-05, + "loss": 0.233, + "step": 35775 + }, + { + "epoch": 1.67, + "learning_rate": 1.4550812785101816e-05, + "loss": 0.3687, + "step": 35780 + }, + { + "epoch": 1.67, + "learning_rate": 1.4550029000047028e-05, + "loss": 0.2873, + "step": 35785 + }, + { + "epoch": 1.67, + "learning_rate": 1.4549245214992242e-05, + "loss": 0.0251, + "step": 35790 + }, + { + "epoch": 1.67, + "learning_rate": 1.4548461429937456e-05, + "loss": 0.0768, + "step": 35795 + }, + { + "epoch": 1.67, + "learning_rate": 1.454767764488267e-05, + "loss": 0.1486, + "step": 35800 + }, + { + "epoch": 1.67, + "learning_rate": 1.4546893859827882e-05, + "loss": 0.1071, + "step": 35805 + }, + { + "epoch": 1.67, + "learning_rate": 1.4546110074773094e-05, + "loss": 0.115, + "step": 35810 + }, + { + "epoch": 1.67, + "learning_rate": 1.454532628971831e-05, + "loss": 0.1783, + "step": 35815 + }, + { + "epoch": 1.67, + "learning_rate": 1.4544542504663522e-05, + "loss": 0.1416, + "step": 35820 + }, + { + "epoch": 1.67, + "learning_rate": 1.4543758719608736e-05, + "loss": 0.2712, + "step": 35825 + }, + { + "epoch": 1.67, + "learning_rate": 1.4542974934553948e-05, + "loss": 0.3227, + "step": 35830 + }, + { + "epoch": 1.67, + "learning_rate": 1.4542191149499164e-05, + "loss": 0.3159, + "step": 35835 + }, + { + "epoch": 1.67, + "learning_rate": 1.4541407364444376e-05, + "loss": 0.0305, + "step": 35840 + }, + { + "epoch": 1.67, + "learning_rate": 1.454062357938959e-05, + "loss": 0.0921, + "step": 35845 + }, + { + "epoch": 1.67, + "learning_rate": 1.4539839794334802e-05, + "loss": 0.0471, + "step": 35850 + }, + { + "epoch": 1.67, + "learning_rate": 1.4539056009280017e-05, + "loss": 0.1261, + "step": 35855 + }, + { + "epoch": 1.67, + "learning_rate": 1.453827222422523e-05, + "loss": 0.1288, + "step": 35860 + }, + { + "epoch": 1.67, + "learning_rate": 1.4537488439170444e-05, + "loss": 0.1459, + "step": 35865 + }, + { + "epoch": 1.67, + "learning_rate": 1.4536704654115656e-05, + "loss": 0.141, + "step": 35870 + }, + { + "epoch": 1.67, + "learning_rate": 1.4535920869060868e-05, + "loss": 0.2848, + "step": 35875 + }, + { + "epoch": 1.67, + "learning_rate": 1.4535137084006084e-05, + "loss": 0.3458, + "step": 35880 + }, + { + "epoch": 1.67, + "learning_rate": 1.4534353298951296e-05, + "loss": 0.4117, + "step": 35885 + }, + { + "epoch": 1.67, + "learning_rate": 1.453356951389651e-05, + "loss": 0.0384, + "step": 35890 + }, + { + "epoch": 1.67, + "learning_rate": 1.4532785728841724e-05, + "loss": 0.1041, + "step": 35895 + }, + { + "epoch": 1.68, + "learning_rate": 1.4532001943786938e-05, + "loss": 0.1218, + "step": 35900 + }, + { + "epoch": 1.68, + "learning_rate": 1.453121815873215e-05, + "loss": 0.1158, + "step": 35905 + }, + { + "epoch": 1.68, + "learning_rate": 1.4530434373677365e-05, + "loss": 0.0852, + "step": 35910 + }, + { + "epoch": 1.68, + "learning_rate": 1.4529650588622578e-05, + "loss": 0.081, + "step": 35915 + }, + { + "epoch": 1.68, + "learning_rate": 1.4528866803567791e-05, + "loss": 0.198, + "step": 35920 + }, + { + "epoch": 1.68, + "learning_rate": 1.4528083018513004e-05, + "loss": 0.2399, + "step": 35925 + }, + { + "epoch": 1.68, + "learning_rate": 1.452729923345822e-05, + "loss": 0.2144, + "step": 35930 + }, + { + "epoch": 1.68, + "learning_rate": 1.4526515448403432e-05, + "loss": 0.2149, + "step": 35935 + }, + { + "epoch": 1.68, + "learning_rate": 1.4525731663348644e-05, + "loss": 0.0402, + "step": 35940 + }, + { + "epoch": 1.68, + "learning_rate": 1.4524947878293858e-05, + "loss": 0.0718, + "step": 35945 + }, + { + "epoch": 1.68, + "learning_rate": 1.452416409323907e-05, + "loss": 0.0818, + "step": 35950 + }, + { + "epoch": 1.68, + "learning_rate": 1.4523380308184285e-05, + "loss": 0.0695, + "step": 35955 + }, + { + "epoch": 1.68, + "learning_rate": 1.4522596523129498e-05, + "loss": 0.0995, + "step": 35960 + }, + { + "epoch": 1.68, + "learning_rate": 1.4521812738074712e-05, + "loss": 0.1538, + "step": 35965 + }, + { + "epoch": 1.68, + "learning_rate": 1.4521028953019924e-05, + "loss": 0.2184, + "step": 35970 + }, + { + "epoch": 1.68, + "learning_rate": 1.452024516796514e-05, + "loss": 0.1701, + "step": 35975 + }, + { + "epoch": 1.68, + "learning_rate": 1.4519461382910352e-05, + "loss": 0.401, + "step": 35980 + }, + { + "epoch": 1.68, + "learning_rate": 1.4518677597855565e-05, + "loss": 0.3014, + "step": 35985 + }, + { + "epoch": 1.68, + "learning_rate": 1.4517893812800778e-05, + "loss": 0.047, + "step": 35990 + }, + { + "epoch": 1.68, + "learning_rate": 1.4517110027745993e-05, + "loss": 0.0691, + "step": 35995 + }, + { + "epoch": 1.68, + "learning_rate": 1.4516326242691206e-05, + "loss": 0.1155, + "step": 36000 + }, + { + "epoch": 1.68, + "learning_rate": 1.4515542457636418e-05, + "loss": 0.1304, + "step": 36005 + }, + { + "epoch": 1.68, + "learning_rate": 1.4514758672581633e-05, + "loss": 0.1563, + "step": 36010 + }, + { + "epoch": 1.68, + "learning_rate": 1.4513974887526846e-05, + "loss": 0.1016, + "step": 36015 + }, + { + "epoch": 1.68, + "learning_rate": 1.451319110247206e-05, + "loss": 0.186, + "step": 36020 + }, + { + "epoch": 1.68, + "learning_rate": 1.4512407317417272e-05, + "loss": 0.203, + "step": 36025 + }, + { + "epoch": 1.68, + "learning_rate": 1.4511623532362487e-05, + "loss": 0.3696, + "step": 36030 + }, + { + "epoch": 1.68, + "learning_rate": 1.45108397473077e-05, + "loss": 0.2746, + "step": 36035 + }, + { + "epoch": 1.68, + "learning_rate": 1.4510055962252913e-05, + "loss": 0.0431, + "step": 36040 + }, + { + "epoch": 1.68, + "learning_rate": 1.4509272177198126e-05, + "loss": 0.0458, + "step": 36045 + }, + { + "epoch": 1.68, + "learning_rate": 1.4508488392143341e-05, + "loss": 0.068, + "step": 36050 + }, + { + "epoch": 1.68, + "learning_rate": 1.4507704607088553e-05, + "loss": 0.156, + "step": 36055 + }, + { + "epoch": 1.68, + "learning_rate": 1.4506920822033767e-05, + "loss": 0.0859, + "step": 36060 + }, + { + "epoch": 1.68, + "learning_rate": 1.450613703697898e-05, + "loss": 0.1969, + "step": 36065 + }, + { + "epoch": 1.68, + "learning_rate": 1.4505353251924192e-05, + "loss": 0.1335, + "step": 36070 + }, + { + "epoch": 1.68, + "learning_rate": 1.4504569466869407e-05, + "loss": 0.2158, + "step": 36075 + }, + { + "epoch": 1.68, + "learning_rate": 1.450378568181462e-05, + "loss": 0.2714, + "step": 36080 + }, + { + "epoch": 1.68, + "learning_rate": 1.4503001896759833e-05, + "loss": 0.2969, + "step": 36085 + }, + { + "epoch": 1.68, + "learning_rate": 1.4502218111705046e-05, + "loss": 0.0216, + "step": 36090 + }, + { + "epoch": 1.68, + "learning_rate": 1.4501434326650261e-05, + "loss": 0.0438, + "step": 36095 + }, + { + "epoch": 1.68, + "learning_rate": 1.4500650541595473e-05, + "loss": 0.0988, + "step": 36100 + }, + { + "epoch": 1.68, + "learning_rate": 1.4499866756540687e-05, + "loss": 0.0888, + "step": 36105 + }, + { + "epoch": 1.68, + "learning_rate": 1.4499082971485901e-05, + "loss": 0.0665, + "step": 36110 + }, + { + "epoch": 1.69, + "learning_rate": 1.4498299186431115e-05, + "loss": 0.1506, + "step": 36115 + }, + { + "epoch": 1.69, + "learning_rate": 1.4497515401376327e-05, + "loss": 0.1244, + "step": 36120 + }, + { + "epoch": 1.69, + "learning_rate": 1.4496731616321541e-05, + "loss": 0.1407, + "step": 36125 + }, + { + "epoch": 1.69, + "learning_rate": 1.4495947831266755e-05, + "loss": 0.389, + "step": 36130 + }, + { + "epoch": 1.69, + "learning_rate": 1.4495164046211967e-05, + "loss": 0.2841, + "step": 36135 + }, + { + "epoch": 1.69, + "learning_rate": 1.4494380261157181e-05, + "loss": 0.0355, + "step": 36140 + }, + { + "epoch": 1.69, + "learning_rate": 1.4493596476102394e-05, + "loss": 0.1308, + "step": 36145 + }, + { + "epoch": 1.69, + "learning_rate": 1.4492812691047609e-05, + "loss": 0.0888, + "step": 36150 + }, + { + "epoch": 1.69, + "learning_rate": 1.4492028905992821e-05, + "loss": 0.1226, + "step": 36155 + }, + { + "epoch": 1.69, + "learning_rate": 1.4491245120938035e-05, + "loss": 0.1033, + "step": 36160 + }, + { + "epoch": 1.69, + "learning_rate": 1.4490461335883247e-05, + "loss": 0.1986, + "step": 36165 + }, + { + "epoch": 1.69, + "learning_rate": 1.4489677550828463e-05, + "loss": 0.1476, + "step": 36170 + }, + { + "epoch": 1.69, + "learning_rate": 1.4488893765773675e-05, + "loss": 0.3752, + "step": 36175 + }, + { + "epoch": 1.69, + "learning_rate": 1.448810998071889e-05, + "loss": 0.3593, + "step": 36180 + }, + { + "epoch": 1.69, + "learning_rate": 1.4487326195664101e-05, + "loss": 0.3019, + "step": 36185 + }, + { + "epoch": 1.69, + "learning_rate": 1.4486542410609317e-05, + "loss": 0.0502, + "step": 36190 + }, + { + "epoch": 1.69, + "learning_rate": 1.448575862555453e-05, + "loss": 0.0869, + "step": 36195 + }, + { + "epoch": 1.69, + "learning_rate": 1.4484974840499741e-05, + "loss": 0.0769, + "step": 36200 + }, + { + "epoch": 1.69, + "learning_rate": 1.4484191055444955e-05, + "loss": 0.0761, + "step": 36205 + }, + { + "epoch": 1.69, + "learning_rate": 1.448340727039017e-05, + "loss": 0.1277, + "step": 36210 + }, + { + "epoch": 1.69, + "learning_rate": 1.4482623485335383e-05, + "loss": 0.1541, + "step": 36215 + }, + { + "epoch": 1.69, + "learning_rate": 1.4481839700280595e-05, + "loss": 0.1727, + "step": 36220 + }, + { + "epoch": 1.69, + "learning_rate": 1.4481055915225811e-05, + "loss": 0.2071, + "step": 36225 + }, + { + "epoch": 1.69, + "learning_rate": 1.4480272130171023e-05, + "loss": 0.2472, + "step": 36230 + }, + { + "epoch": 1.69, + "learning_rate": 1.4479488345116237e-05, + "loss": 0.1617, + "step": 36235 + }, + { + "epoch": 1.69, + "learning_rate": 1.447870456006145e-05, + "loss": 0.0486, + "step": 36240 + }, + { + "epoch": 1.69, + "learning_rate": 1.4477920775006665e-05, + "loss": 0.029, + "step": 36245 + }, + { + "epoch": 1.69, + "learning_rate": 1.4477136989951877e-05, + "loss": 0.0703, + "step": 36250 + }, + { + "epoch": 1.69, + "learning_rate": 1.4476353204897091e-05, + "loss": 0.0847, + "step": 36255 + }, + { + "epoch": 1.69, + "learning_rate": 1.4475569419842303e-05, + "loss": 0.1484, + "step": 36260 + }, + { + "epoch": 1.69, + "learning_rate": 1.4474785634787515e-05, + "loss": 0.1272, + "step": 36265 + }, + { + "epoch": 1.69, + "learning_rate": 1.4474001849732731e-05, + "loss": 0.1129, + "step": 36270 + }, + { + "epoch": 1.69, + "learning_rate": 1.4473218064677943e-05, + "loss": 0.1698, + "step": 36275 + }, + { + "epoch": 1.69, + "learning_rate": 1.4472434279623157e-05, + "loss": 0.2721, + "step": 36280 + }, + { + "epoch": 1.69, + "learning_rate": 1.447165049456837e-05, + "loss": 0.2507, + "step": 36285 + }, + { + "epoch": 1.69, + "learning_rate": 1.4470866709513585e-05, + "loss": 0.1443, + "step": 36290 + }, + { + "epoch": 1.69, + "learning_rate": 1.4470082924458797e-05, + "loss": 0.0702, + "step": 36295 + }, + { + "epoch": 1.69, + "learning_rate": 1.4469299139404011e-05, + "loss": 0.0486, + "step": 36300 + }, + { + "epoch": 1.69, + "learning_rate": 1.4468515354349223e-05, + "loss": 0.0962, + "step": 36305 + }, + { + "epoch": 1.69, + "learning_rate": 1.4467731569294439e-05, + "loss": 0.1491, + "step": 36310 + }, + { + "epoch": 1.69, + "learning_rate": 1.4466947784239651e-05, + "loss": 0.1473, + "step": 36315 + }, + { + "epoch": 1.69, + "learning_rate": 1.4466163999184865e-05, + "loss": 0.1861, + "step": 36320 + }, + { + "epoch": 1.69, + "learning_rate": 1.4465380214130079e-05, + "loss": 0.2508, + "step": 36325 + }, + { + "epoch": 1.7, + "learning_rate": 1.4464596429075291e-05, + "loss": 0.1919, + "step": 36330 + }, + { + "epoch": 1.7, + "learning_rate": 1.4463812644020505e-05, + "loss": 0.329, + "step": 36335 + }, + { + "epoch": 1.7, + "learning_rate": 1.4463028858965717e-05, + "loss": 0.0241, + "step": 36340 + }, + { + "epoch": 1.7, + "learning_rate": 1.4462245073910933e-05, + "loss": 0.0493, + "step": 36345 + }, + { + "epoch": 1.7, + "learning_rate": 1.4461461288856145e-05, + "loss": 0.0885, + "step": 36350 + }, + { + "epoch": 1.7, + "learning_rate": 1.4460677503801359e-05, + "loss": 0.0676, + "step": 36355 + }, + { + "epoch": 1.7, + "learning_rate": 1.4459893718746571e-05, + "loss": 0.138, + "step": 36360 + }, + { + "epoch": 1.7, + "learning_rate": 1.4459109933691787e-05, + "loss": 0.1602, + "step": 36365 + }, + { + "epoch": 1.7, + "learning_rate": 1.4458326148636999e-05, + "loss": 0.1771, + "step": 36370 + }, + { + "epoch": 1.7, + "learning_rate": 1.4457542363582213e-05, + "loss": 0.207, + "step": 36375 + }, + { + "epoch": 1.7, + "learning_rate": 1.4456758578527425e-05, + "loss": 0.3872, + "step": 36380 + }, + { + "epoch": 1.7, + "learning_rate": 1.445597479347264e-05, + "loss": 0.4241, + "step": 36385 + }, + { + "epoch": 1.7, + "learning_rate": 1.4455191008417853e-05, + "loss": 0.0311, + "step": 36390 + }, + { + "epoch": 1.7, + "learning_rate": 1.4454407223363065e-05, + "loss": 0.0573, + "step": 36395 + }, + { + "epoch": 1.7, + "learning_rate": 1.4453623438308279e-05, + "loss": 0.0677, + "step": 36400 + }, + { + "epoch": 1.7, + "learning_rate": 1.4452839653253491e-05, + "loss": 0.1657, + "step": 36405 + }, + { + "epoch": 1.7, + "learning_rate": 1.4452055868198707e-05, + "loss": 0.0953, + "step": 36410 + }, + { + "epoch": 1.7, + "learning_rate": 1.4451272083143919e-05, + "loss": 0.1371, + "step": 36415 + }, + { + "epoch": 1.7, + "learning_rate": 1.4450488298089133e-05, + "loss": 0.1549, + "step": 36420 + }, + { + "epoch": 1.7, + "learning_rate": 1.4449704513034347e-05, + "loss": 0.1376, + "step": 36425 + }, + { + "epoch": 1.7, + "learning_rate": 1.444892072797956e-05, + "loss": 0.3851, + "step": 36430 + }, + { + "epoch": 1.7, + "learning_rate": 1.4448136942924773e-05, + "loss": 0.2807, + "step": 36435 + }, + { + "epoch": 1.7, + "learning_rate": 1.4447353157869989e-05, + "loss": 0.0163, + "step": 36440 + }, + { + "epoch": 1.7, + "learning_rate": 1.44465693728152e-05, + "loss": 0.0581, + "step": 36445 + }, + { + "epoch": 1.7, + "learning_rate": 1.4445785587760415e-05, + "loss": 0.056, + "step": 36450 + }, + { + "epoch": 1.7, + "learning_rate": 1.4445001802705627e-05, + "loss": 0.1045, + "step": 36455 + }, + { + "epoch": 1.7, + "learning_rate": 1.4444218017650839e-05, + "loss": 0.0543, + "step": 36460 + }, + { + "epoch": 1.7, + "learning_rate": 1.4443434232596055e-05, + "loss": 0.1655, + "step": 36465 + }, + { + "epoch": 1.7, + "learning_rate": 1.4442807204552225e-05, + "loss": 0.1582, + "step": 36470 + }, + { + "epoch": 1.7, + "learning_rate": 1.4442023419497437e-05, + "loss": 0.2369, + "step": 36475 + }, + { + "epoch": 1.7, + "learning_rate": 1.4441239634442651e-05, + "loss": 0.2963, + "step": 36480 + }, + { + "epoch": 1.7, + "learning_rate": 1.4440455849387865e-05, + "loss": 0.2817, + "step": 36485 + }, + { + "epoch": 1.7, + "learning_rate": 1.4439672064333079e-05, + "loss": 0.0662, + "step": 36490 + }, + { + "epoch": 1.7, + "learning_rate": 1.4438888279278291e-05, + "loss": 0.0917, + "step": 36495 + }, + { + "epoch": 1.7, + "learning_rate": 1.4438104494223505e-05, + "loss": 0.0448, + "step": 36500 + }, + { + "epoch": 1.7, + "learning_rate": 1.4437320709168719e-05, + "loss": 0.1574, + "step": 36505 + }, + { + "epoch": 1.7, + "learning_rate": 1.4436536924113933e-05, + "loss": 0.1734, + "step": 36510 + }, + { + "epoch": 1.7, + "learning_rate": 1.4435753139059145e-05, + "loss": 0.1669, + "step": 36515 + }, + { + "epoch": 1.7, + "learning_rate": 1.443496935400436e-05, + "loss": 0.1831, + "step": 36520 + }, + { + "epoch": 1.7, + "learning_rate": 1.4434185568949573e-05, + "loss": 0.1776, + "step": 36525 + }, + { + "epoch": 1.7, + "learning_rate": 1.4433401783894785e-05, + "loss": 0.3592, + "step": 36530 + }, + { + "epoch": 1.7, + "learning_rate": 1.4432617998839999e-05, + "loss": 0.2814, + "step": 36535 + }, + { + "epoch": 1.71, + "learning_rate": 1.4431834213785211e-05, + "loss": 0.0311, + "step": 36540 + }, + { + "epoch": 1.71, + "learning_rate": 1.4431050428730427e-05, + "loss": 0.091, + "step": 36545 + }, + { + "epoch": 1.71, + "learning_rate": 1.443026664367564e-05, + "loss": 0.0954, + "step": 36550 + }, + { + "epoch": 1.71, + "learning_rate": 1.4429482858620853e-05, + "loss": 0.102, + "step": 36555 + }, + { + "epoch": 1.71, + "learning_rate": 1.4428699073566065e-05, + "loss": 0.1249, + "step": 36560 + }, + { + "epoch": 1.71, + "learning_rate": 1.4427915288511281e-05, + "loss": 0.1781, + "step": 36565 + }, + { + "epoch": 1.71, + "learning_rate": 1.4427131503456493e-05, + "loss": 0.122, + "step": 36570 + }, + { + "epoch": 1.71, + "learning_rate": 1.4426347718401707e-05, + "loss": 0.2721, + "step": 36575 + }, + { + "epoch": 1.71, + "learning_rate": 1.442556393334692e-05, + "loss": 0.3878, + "step": 36580 + }, + { + "epoch": 1.71, + "learning_rate": 1.4424780148292135e-05, + "loss": 0.2806, + "step": 36585 + }, + { + "epoch": 1.71, + "learning_rate": 1.4423996363237347e-05, + "loss": 0.0162, + "step": 36590 + }, + { + "epoch": 1.71, + "learning_rate": 1.442321257818256e-05, + "loss": 0.1438, + "step": 36595 + }, + { + "epoch": 1.71, + "learning_rate": 1.4422428793127775e-05, + "loss": 0.0547, + "step": 36600 + }, + { + "epoch": 1.71, + "learning_rate": 1.4421645008072987e-05, + "loss": 0.0748, + "step": 36605 + }, + { + "epoch": 1.71, + "learning_rate": 1.4420861223018201e-05, + "loss": 0.071, + "step": 36610 + }, + { + "epoch": 1.71, + "learning_rate": 1.4420077437963413e-05, + "loss": 0.0877, + "step": 36615 + }, + { + "epoch": 1.71, + "learning_rate": 1.4419293652908629e-05, + "loss": 0.0916, + "step": 36620 + }, + { + "epoch": 1.71, + "learning_rate": 1.4418509867853841e-05, + "loss": 0.29, + "step": 36625 + }, + { + "epoch": 1.71, + "learning_rate": 1.4417726082799055e-05, + "loss": 0.3652, + "step": 36630 + }, + { + "epoch": 1.71, + "learning_rate": 1.4416942297744267e-05, + "loss": 0.3349, + "step": 36635 + }, + { + "epoch": 1.71, + "learning_rate": 1.4416158512689483e-05, + "loss": 0.0536, + "step": 36640 + }, + { + "epoch": 1.71, + "learning_rate": 1.4415374727634695e-05, + "loss": 0.0458, + "step": 36645 + }, + { + "epoch": 1.71, + "learning_rate": 1.4414590942579909e-05, + "loss": 0.085, + "step": 36650 + }, + { + "epoch": 1.71, + "learning_rate": 1.4413807157525121e-05, + "loss": 0.0993, + "step": 36655 + }, + { + "epoch": 1.71, + "learning_rate": 1.4413023372470333e-05, + "loss": 0.1099, + "step": 36660 + }, + { + "epoch": 1.71, + "learning_rate": 1.4412239587415549e-05, + "loss": 0.112, + "step": 36665 + }, + { + "epoch": 1.71, + "learning_rate": 1.4411455802360761e-05, + "loss": 0.2057, + "step": 36670 + }, + { + "epoch": 1.71, + "learning_rate": 1.4410672017305975e-05, + "loss": 0.3171, + "step": 36675 + }, + { + "epoch": 1.71, + "learning_rate": 1.4409888232251187e-05, + "loss": 0.296, + "step": 36680 + }, + { + "epoch": 1.71, + "learning_rate": 1.4409104447196403e-05, + "loss": 0.2658, + "step": 36685 + }, + { + "epoch": 1.71, + "learning_rate": 1.4408320662141615e-05, + "loss": 0.0218, + "step": 36690 + }, + { + "epoch": 1.71, + "learning_rate": 1.4407536877086829e-05, + "loss": 0.0514, + "step": 36695 + }, + { + "epoch": 1.71, + "learning_rate": 1.4406753092032043e-05, + "loss": 0.081, + "step": 36700 + }, + { + "epoch": 1.71, + "learning_rate": 1.4405969306977257e-05, + "loss": 0.0679, + "step": 36705 + }, + { + "epoch": 1.71, + "learning_rate": 1.4405185521922469e-05, + "loss": 0.1526, + "step": 36710 + }, + { + "epoch": 1.71, + "learning_rate": 1.4404401736867683e-05, + "loss": 0.1244, + "step": 36715 + }, + { + "epoch": 1.71, + "learning_rate": 1.4403617951812897e-05, + "loss": 0.1967, + "step": 36720 + }, + { + "epoch": 1.71, + "learning_rate": 1.4402834166758109e-05, + "loss": 0.2807, + "step": 36725 + }, + { + "epoch": 1.71, + "learning_rate": 1.4402050381703323e-05, + "loss": 0.3164, + "step": 36730 + }, + { + "epoch": 1.71, + "learning_rate": 1.4401266596648535e-05, + "loss": 0.2543, + "step": 36735 + }, + { + "epoch": 1.71, + "learning_rate": 1.440048281159375e-05, + "loss": 0.0474, + "step": 36740 + }, + { + "epoch": 1.71, + "learning_rate": 1.4399699026538963e-05, + "loss": 0.0358, + "step": 36745 + }, + { + "epoch": 1.71, + "learning_rate": 1.4398915241484177e-05, + "loss": 0.0577, + "step": 36750 + }, + { + "epoch": 1.72, + "learning_rate": 1.4398131456429389e-05, + "loss": 0.1247, + "step": 36755 + }, + { + "epoch": 1.72, + "learning_rate": 1.4397347671374605e-05, + "loss": 0.0912, + "step": 36760 + }, + { + "epoch": 1.72, + "learning_rate": 1.4396563886319817e-05, + "loss": 0.1512, + "step": 36765 + }, + { + "epoch": 1.72, + "learning_rate": 1.439578010126503e-05, + "loss": 0.187, + "step": 36770 + }, + { + "epoch": 1.72, + "learning_rate": 1.4394996316210243e-05, + "loss": 0.2044, + "step": 36775 + }, + { + "epoch": 1.72, + "learning_rate": 1.4394212531155459e-05, + "loss": 0.2219, + "step": 36780 + }, + { + "epoch": 1.72, + "learning_rate": 1.439342874610067e-05, + "loss": 0.3031, + "step": 36785 + }, + { + "epoch": 1.72, + "learning_rate": 1.4392644961045883e-05, + "loss": 0.0566, + "step": 36790 + }, + { + "epoch": 1.72, + "learning_rate": 1.4391861175991097e-05, + "loss": 0.05, + "step": 36795 + }, + { + "epoch": 1.72, + "learning_rate": 1.439107739093631e-05, + "loss": 0.1119, + "step": 36800 + }, + { + "epoch": 1.72, + "learning_rate": 1.4390293605881525e-05, + "loss": 0.1107, + "step": 36805 + }, + { + "epoch": 1.72, + "learning_rate": 1.4389509820826737e-05, + "loss": 0.198, + "step": 36810 + }, + { + "epoch": 1.72, + "learning_rate": 1.438872603577195e-05, + "loss": 0.1943, + "step": 36815 + }, + { + "epoch": 1.72, + "learning_rate": 1.4387942250717165e-05, + "loss": 0.1967, + "step": 36820 + }, + { + "epoch": 1.72, + "learning_rate": 1.4387158465662379e-05, + "loss": 0.1664, + "step": 36825 + }, + { + "epoch": 1.72, + "learning_rate": 1.438637468060759e-05, + "loss": 0.3422, + "step": 36830 + }, + { + "epoch": 1.72, + "learning_rate": 1.4385590895552806e-05, + "loss": 0.2327, + "step": 36835 + }, + { + "epoch": 1.72, + "learning_rate": 1.4384807110498019e-05, + "loss": 0.0715, + "step": 36840 + }, + { + "epoch": 1.72, + "learning_rate": 1.4384023325443233e-05, + "loss": 0.0976, + "step": 36845 + }, + { + "epoch": 1.72, + "learning_rate": 1.4383239540388445e-05, + "loss": 0.0848, + "step": 36850 + }, + { + "epoch": 1.72, + "learning_rate": 1.4382455755333657e-05, + "loss": 0.0524, + "step": 36855 + }, + { + "epoch": 1.72, + "learning_rate": 1.4381671970278873e-05, + "loss": 0.1459, + "step": 36860 + }, + { + "epoch": 1.72, + "learning_rate": 1.4380888185224085e-05, + "loss": 0.1363, + "step": 36865 + }, + { + "epoch": 1.72, + "learning_rate": 1.4380104400169299e-05, + "loss": 0.1704, + "step": 36870 + }, + { + "epoch": 1.72, + "learning_rate": 1.4379320615114511e-05, + "loss": 0.3359, + "step": 36875 + }, + { + "epoch": 1.72, + "learning_rate": 1.4378536830059726e-05, + "loss": 0.4377, + "step": 36880 + }, + { + "epoch": 1.72, + "learning_rate": 1.4377753045004939e-05, + "loss": 0.1847, + "step": 36885 + }, + { + "epoch": 1.72, + "learning_rate": 1.4376969259950153e-05, + "loss": 0.0775, + "step": 36890 + }, + { + "epoch": 1.72, + "learning_rate": 1.4376185474895365e-05, + "loss": 0.0722, + "step": 36895 + }, + { + "epoch": 1.72, + "learning_rate": 1.437540168984058e-05, + "loss": 0.106, + "step": 36900 + }, + { + "epoch": 1.72, + "learning_rate": 1.4374617904785793e-05, + "loss": 0.1095, + "step": 36905 + }, + { + "epoch": 1.72, + "learning_rate": 1.4373834119731007e-05, + "loss": 0.1045, + "step": 36910 + }, + { + "epoch": 1.72, + "learning_rate": 1.437305033467622e-05, + "loss": 0.1643, + "step": 36915 + }, + { + "epoch": 1.72, + "learning_rate": 1.4372266549621433e-05, + "loss": 0.1888, + "step": 36920 + }, + { + "epoch": 1.72, + "learning_rate": 1.4371482764566647e-05, + "loss": 0.1461, + "step": 36925 + }, + { + "epoch": 1.72, + "learning_rate": 1.4370698979511859e-05, + "loss": 0.36, + "step": 36930 + }, + { + "epoch": 1.72, + "learning_rate": 1.4369915194457074e-05, + "loss": 0.1914, + "step": 36935 + }, + { + "epoch": 1.72, + "learning_rate": 1.4369131409402287e-05, + "loss": 0.0292, + "step": 36940 + }, + { + "epoch": 1.72, + "learning_rate": 1.43683476243475e-05, + "loss": 0.0758, + "step": 36945 + }, + { + "epoch": 1.72, + "learning_rate": 1.4367563839292713e-05, + "loss": 0.074, + "step": 36950 + }, + { + "epoch": 1.72, + "learning_rate": 1.4366780054237928e-05, + "loss": 0.1499, + "step": 36955 + }, + { + "epoch": 1.72, + "learning_rate": 1.436599626918314e-05, + "loss": 0.1167, + "step": 36960 + }, + { + "epoch": 1.72, + "learning_rate": 1.4365212484128354e-05, + "loss": 0.0796, + "step": 36965 + }, + { + "epoch": 1.73, + "learning_rate": 1.4364428699073567e-05, + "loss": 0.0926, + "step": 36970 + }, + { + "epoch": 1.73, + "learning_rate": 1.4363644914018782e-05, + "loss": 0.2108, + "step": 36975 + }, + { + "epoch": 1.73, + "learning_rate": 1.4362861128963994e-05, + "loss": 0.2106, + "step": 36980 + }, + { + "epoch": 1.73, + "learning_rate": 1.4362077343909207e-05, + "loss": 0.2872, + "step": 36985 + }, + { + "epoch": 1.73, + "learning_rate": 1.436129355885442e-05, + "loss": 0.0477, + "step": 36990 + }, + { + "epoch": 1.73, + "learning_rate": 1.4360509773799633e-05, + "loss": 0.0257, + "step": 36995 + }, + { + "epoch": 1.73, + "learning_rate": 1.4359725988744848e-05, + "loss": 0.1252, + "step": 37000 + }, + { + "epoch": 1.73, + "learning_rate": 1.435894220369006e-05, + "loss": 0.1321, + "step": 37005 + }, + { + "epoch": 1.73, + "learning_rate": 1.4358158418635274e-05, + "loss": 0.1789, + "step": 37010 + }, + { + "epoch": 1.73, + "learning_rate": 1.4357374633580488e-05, + "loss": 0.1004, + "step": 37015 + }, + { + "epoch": 1.73, + "learning_rate": 1.4356590848525702e-05, + "loss": 0.1946, + "step": 37020 + }, + { + "epoch": 1.73, + "learning_rate": 1.4355807063470914e-05, + "loss": 0.1881, + "step": 37025 + }, + { + "epoch": 1.73, + "learning_rate": 1.4355023278416128e-05, + "loss": 0.3375, + "step": 37030 + }, + { + "epoch": 1.73, + "learning_rate": 1.4354239493361342e-05, + "loss": 0.2199, + "step": 37035 + }, + { + "epoch": 1.73, + "learning_rate": 1.4353455708306556e-05, + "loss": 0.0575, + "step": 37040 + }, + { + "epoch": 1.73, + "learning_rate": 1.4352671923251768e-05, + "loss": 0.0641, + "step": 37045 + }, + { + "epoch": 1.73, + "learning_rate": 1.435188813819698e-05, + "loss": 0.1354, + "step": 37050 + }, + { + "epoch": 1.73, + "learning_rate": 1.4351104353142196e-05, + "loss": 0.0793, + "step": 37055 + }, + { + "epoch": 1.73, + "learning_rate": 1.4350320568087408e-05, + "loss": 0.0634, + "step": 37060 + }, + { + "epoch": 1.73, + "learning_rate": 1.4349536783032622e-05, + "loss": 0.1773, + "step": 37065 + }, + { + "epoch": 1.73, + "learning_rate": 1.4348752997977835e-05, + "loss": 0.1752, + "step": 37070 + }, + { + "epoch": 1.73, + "learning_rate": 1.434796921292305e-05, + "loss": 0.1668, + "step": 37075 + }, + { + "epoch": 1.73, + "learning_rate": 1.4347185427868262e-05, + "loss": 0.3402, + "step": 37080 + }, + { + "epoch": 1.73, + "learning_rate": 1.4346401642813476e-05, + "loss": 0.2596, + "step": 37085 + }, + { + "epoch": 1.73, + "learning_rate": 1.4345617857758688e-05, + "loss": 0.0693, + "step": 37090 + }, + { + "epoch": 1.73, + "learning_rate": 1.4344834072703904e-05, + "loss": 0.0821, + "step": 37095 + }, + { + "epoch": 1.73, + "learning_rate": 1.4344050287649116e-05, + "loss": 0.0515, + "step": 37100 + }, + { + "epoch": 1.73, + "learning_rate": 1.434326650259433e-05, + "loss": 0.0693, + "step": 37105 + }, + { + "epoch": 1.73, + "learning_rate": 1.4342482717539542e-05, + "loss": 0.0675, + "step": 37110 + }, + { + "epoch": 1.73, + "learning_rate": 1.4341698932484756e-05, + "loss": 0.0614, + "step": 37115 + }, + { + "epoch": 1.73, + "learning_rate": 1.434091514742997e-05, + "loss": 0.1118, + "step": 37120 + }, + { + "epoch": 1.73, + "learning_rate": 1.4340131362375182e-05, + "loss": 0.1841, + "step": 37125 + }, + { + "epoch": 1.73, + "learning_rate": 1.4339347577320396e-05, + "loss": 0.4163, + "step": 37130 + }, + { + "epoch": 1.73, + "learning_rate": 1.433856379226561e-05, + "loss": 0.2525, + "step": 37135 + }, + { + "epoch": 1.73, + "learning_rate": 1.4337780007210824e-05, + "loss": 0.0489, + "step": 37140 + }, + { + "epoch": 1.73, + "learning_rate": 1.4336996222156036e-05, + "loss": 0.0879, + "step": 37145 + }, + { + "epoch": 1.73, + "learning_rate": 1.4336212437101252e-05, + "loss": 0.0851, + "step": 37150 + }, + { + "epoch": 1.73, + "learning_rate": 1.4335428652046464e-05, + "loss": 0.093, + "step": 37155 + }, + { + "epoch": 1.73, + "learning_rate": 1.4334644866991678e-05, + "loss": 0.161, + "step": 37160 + }, + { + "epoch": 1.73, + "learning_rate": 1.433386108193689e-05, + "loss": 0.1694, + "step": 37165 + }, + { + "epoch": 1.73, + "learning_rate": 1.4333077296882106e-05, + "loss": 0.17, + "step": 37170 + }, + { + "epoch": 1.73, + "learning_rate": 1.4332293511827318e-05, + "loss": 0.2063, + "step": 37175 + }, + { + "epoch": 1.73, + "learning_rate": 1.433150972677253e-05, + "loss": 0.2002, + "step": 37180 + }, + { + "epoch": 1.74, + "learning_rate": 1.4330725941717744e-05, + "loss": 0.2128, + "step": 37185 + }, + { + "epoch": 1.74, + "learning_rate": 1.4329942156662956e-05, + "loss": 0.0525, + "step": 37190 + }, + { + "epoch": 1.74, + "learning_rate": 1.4329158371608172e-05, + "loss": 0.1173, + "step": 37195 + }, + { + "epoch": 1.74, + "learning_rate": 1.4328374586553384e-05, + "loss": 0.1373, + "step": 37200 + }, + { + "epoch": 1.74, + "learning_rate": 1.4327590801498598e-05, + "loss": 0.0733, + "step": 37205 + }, + { + "epoch": 1.74, + "learning_rate": 1.432680701644381e-05, + "loss": 0.118, + "step": 37210 + }, + { + "epoch": 1.74, + "learning_rate": 1.4326023231389026e-05, + "loss": 0.1538, + "step": 37215 + }, + { + "epoch": 1.74, + "learning_rate": 1.4325239446334238e-05, + "loss": 0.1563, + "step": 37220 + }, + { + "epoch": 1.74, + "learning_rate": 1.4324455661279452e-05, + "loss": 0.2196, + "step": 37225 + }, + { + "epoch": 1.74, + "learning_rate": 1.4323671876224666e-05, + "loss": 0.3397, + "step": 37230 + }, + { + "epoch": 1.74, + "learning_rate": 1.432288809116988e-05, + "loss": 0.2154, + "step": 37235 + }, + { + "epoch": 1.74, + "learning_rate": 1.4322104306115092e-05, + "loss": 0.0617, + "step": 37240 + }, + { + "epoch": 1.74, + "learning_rate": 1.4321320521060304e-05, + "loss": 0.0488, + "step": 37245 + }, + { + "epoch": 1.74, + "learning_rate": 1.432053673600552e-05, + "loss": 0.1145, + "step": 37250 + }, + { + "epoch": 1.74, + "learning_rate": 1.4319752950950732e-05, + "loss": 0.1049, + "step": 37255 + }, + { + "epoch": 1.74, + "learning_rate": 1.4318969165895946e-05, + "loss": 0.0923, + "step": 37260 + }, + { + "epoch": 1.74, + "learning_rate": 1.4318185380841158e-05, + "loss": 0.1686, + "step": 37265 + }, + { + "epoch": 1.74, + "learning_rate": 1.4317401595786374e-05, + "loss": 0.1347, + "step": 37270 + }, + { + "epoch": 1.74, + "learning_rate": 1.4316617810731586e-05, + "loss": 0.161, + "step": 37275 + }, + { + "epoch": 1.74, + "learning_rate": 1.43158340256768e-05, + "loss": 0.2883, + "step": 37280 + }, + { + "epoch": 1.74, + "learning_rate": 1.4315050240622012e-05, + "loss": 0.2093, + "step": 37285 + }, + { + "epoch": 1.74, + "learning_rate": 1.4314266455567228e-05, + "loss": 0.0615, + "step": 37290 + }, + { + "epoch": 1.74, + "learning_rate": 1.431348267051244e-05, + "loss": 0.058, + "step": 37295 + }, + { + "epoch": 1.74, + "learning_rate": 1.4312698885457654e-05, + "loss": 0.0953, + "step": 37300 + }, + { + "epoch": 1.74, + "learning_rate": 1.4311915100402866e-05, + "loss": 0.0668, + "step": 37305 + }, + { + "epoch": 1.74, + "learning_rate": 1.4311131315348078e-05, + "loss": 0.1407, + "step": 37310 + }, + { + "epoch": 1.74, + "learning_rate": 1.4310347530293294e-05, + "loss": 0.1212, + "step": 37315 + }, + { + "epoch": 1.74, + "learning_rate": 1.4309563745238506e-05, + "loss": 0.1046, + "step": 37320 + }, + { + "epoch": 1.74, + "learning_rate": 1.430877996018372e-05, + "loss": 0.1611, + "step": 37325 + }, + { + "epoch": 1.74, + "learning_rate": 1.4307996175128934e-05, + "loss": 0.3317, + "step": 37330 + }, + { + "epoch": 1.74, + "learning_rate": 1.4307212390074148e-05, + "loss": 0.2498, + "step": 37335 + }, + { + "epoch": 1.74, + "learning_rate": 1.430642860501936e-05, + "loss": 0.0457, + "step": 37340 + }, + { + "epoch": 1.74, + "learning_rate": 1.4305644819964574e-05, + "loss": 0.0306, + "step": 37345 + }, + { + "epoch": 1.74, + "learning_rate": 1.4304861034909788e-05, + "loss": 0.0676, + "step": 37350 + }, + { + "epoch": 1.74, + "learning_rate": 1.4304077249855002e-05, + "loss": 0.0872, + "step": 37355 + }, + { + "epoch": 1.74, + "learning_rate": 1.4303293464800214e-05, + "loss": 0.1829, + "step": 37360 + }, + { + "epoch": 1.74, + "learning_rate": 1.430250967974543e-05, + "loss": 0.1626, + "step": 37365 + }, + { + "epoch": 1.74, + "learning_rate": 1.4301725894690642e-05, + "loss": 0.1964, + "step": 37370 + }, + { + "epoch": 1.74, + "learning_rate": 1.4300942109635854e-05, + "loss": 0.2105, + "step": 37375 + }, + { + "epoch": 1.74, + "learning_rate": 1.4300158324581068e-05, + "loss": 0.3942, + "step": 37380 + }, + { + "epoch": 1.74, + "learning_rate": 1.429937453952628e-05, + "loss": 0.2594, + "step": 37385 + }, + { + "epoch": 1.74, + "learning_rate": 1.4298590754471496e-05, + "loss": 0.0395, + "step": 37390 + }, + { + "epoch": 1.74, + "learning_rate": 1.4297806969416708e-05, + "loss": 0.0307, + "step": 37395 + }, + { + "epoch": 1.75, + "learning_rate": 1.4297023184361922e-05, + "loss": 0.0373, + "step": 37400 + }, + { + "epoch": 1.75, + "learning_rate": 1.4296239399307134e-05, + "loss": 0.06, + "step": 37405 + }, + { + "epoch": 1.75, + "learning_rate": 1.429545561425235e-05, + "loss": 0.1288, + "step": 37410 + }, + { + "epoch": 1.75, + "learning_rate": 1.4294671829197562e-05, + "loss": 0.1277, + "step": 37415 + }, + { + "epoch": 1.75, + "learning_rate": 1.4293888044142776e-05, + "loss": 0.1357, + "step": 37420 + }, + { + "epoch": 1.75, + "learning_rate": 1.4293104259087988e-05, + "loss": 0.1732, + "step": 37425 + }, + { + "epoch": 1.75, + "learning_rate": 1.4292320474033204e-05, + "loss": 0.2856, + "step": 37430 + }, + { + "epoch": 1.75, + "learning_rate": 1.4291536688978416e-05, + "loss": 0.2645, + "step": 37435 + }, + { + "epoch": 1.75, + "learning_rate": 1.4290752903923628e-05, + "loss": 0.0783, + "step": 37440 + }, + { + "epoch": 1.75, + "learning_rate": 1.4289969118868842e-05, + "loss": 0.0332, + "step": 37445 + }, + { + "epoch": 1.75, + "learning_rate": 1.4289185333814056e-05, + "loss": 0.0371, + "step": 37450 + }, + { + "epoch": 1.75, + "learning_rate": 1.428840154875927e-05, + "loss": 0.1575, + "step": 37455 + }, + { + "epoch": 1.75, + "learning_rate": 1.4287617763704482e-05, + "loss": 0.094, + "step": 37460 + }, + { + "epoch": 1.75, + "learning_rate": 1.4286833978649698e-05, + "loss": 0.0949, + "step": 37465 + }, + { + "epoch": 1.75, + "learning_rate": 1.428605019359491e-05, + "loss": 0.1676, + "step": 37470 + }, + { + "epoch": 1.75, + "learning_rate": 1.4285266408540124e-05, + "loss": 0.1947, + "step": 37475 + }, + { + "epoch": 1.75, + "learning_rate": 1.4284482623485336e-05, + "loss": 0.1995, + "step": 37480 + }, + { + "epoch": 1.75, + "learning_rate": 1.4283698838430551e-05, + "loss": 0.2297, + "step": 37485 + }, + { + "epoch": 1.75, + "learning_rate": 1.4282915053375764e-05, + "loss": 0.0635, + "step": 37490 + }, + { + "epoch": 1.75, + "learning_rate": 1.4282131268320978e-05, + "loss": 0.049, + "step": 37495 + }, + { + "epoch": 1.75, + "learning_rate": 1.428134748326619e-05, + "loss": 0.1047, + "step": 37500 + }, + { + "epoch": 1.75, + "learning_rate": 1.4280563698211402e-05, + "loss": 0.125, + "step": 37505 + }, + { + "epoch": 1.75, + "learning_rate": 1.4279779913156618e-05, + "loss": 0.1367, + "step": 37510 + }, + { + "epoch": 1.75, + "learning_rate": 1.427899612810183e-05, + "loss": 0.16, + "step": 37515 + }, + { + "epoch": 1.75, + "learning_rate": 1.4278212343047044e-05, + "loss": 0.2038, + "step": 37520 + }, + { + "epoch": 1.75, + "learning_rate": 1.4277428557992256e-05, + "loss": 0.2235, + "step": 37525 + }, + { + "epoch": 1.75, + "learning_rate": 1.4276644772937472e-05, + "loss": 0.428, + "step": 37530 + }, + { + "epoch": 1.75, + "learning_rate": 1.4275860987882684e-05, + "loss": 0.3868, + "step": 37535 + }, + { + "epoch": 1.75, + "learning_rate": 1.4275077202827898e-05, + "loss": 0.0283, + "step": 37540 + }, + { + "epoch": 1.75, + "learning_rate": 1.4274293417773112e-05, + "loss": 0.0262, + "step": 37545 + }, + { + "epoch": 1.75, + "learning_rate": 1.4273509632718325e-05, + "loss": 0.0667, + "step": 37550 + }, + { + "epoch": 1.75, + "learning_rate": 1.4272725847663538e-05, + "loss": 0.1034, + "step": 37555 + }, + { + "epoch": 1.75, + "learning_rate": 1.4271942062608752e-05, + "loss": 0.1123, + "step": 37560 + }, + { + "epoch": 1.75, + "learning_rate": 1.4271158277553965e-05, + "loss": 0.1533, + "step": 37565 + }, + { + "epoch": 1.75, + "learning_rate": 1.4270374492499178e-05, + "loss": 0.0854, + "step": 37570 + }, + { + "epoch": 1.75, + "learning_rate": 1.4269590707444392e-05, + "loss": 0.2158, + "step": 37575 + }, + { + "epoch": 1.75, + "learning_rate": 1.4268806922389604e-05, + "loss": 0.3784, + "step": 37580 + }, + { + "epoch": 1.75, + "learning_rate": 1.426802313733482e-05, + "loss": 0.2143, + "step": 37585 + }, + { + "epoch": 1.75, + "learning_rate": 1.4267239352280032e-05, + "loss": 0.0696, + "step": 37590 + }, + { + "epoch": 1.75, + "learning_rate": 1.4266455567225246e-05, + "loss": 0.0339, + "step": 37595 + }, + { + "epoch": 1.75, + "learning_rate": 1.4265671782170458e-05, + "loss": 0.0495, + "step": 37600 + }, + { + "epoch": 1.75, + "learning_rate": 1.4264887997115673e-05, + "loss": 0.1684, + "step": 37605 + }, + { + "epoch": 1.75, + "learning_rate": 1.4264104212060886e-05, + "loss": 0.184, + "step": 37610 + }, + { + "epoch": 1.76, + "learning_rate": 1.42633204270061e-05, + "loss": 0.1755, + "step": 37615 + }, + { + "epoch": 1.76, + "learning_rate": 1.4262536641951312e-05, + "loss": 0.1916, + "step": 37620 + }, + { + "epoch": 1.76, + "learning_rate": 1.4261752856896527e-05, + "loss": 0.178, + "step": 37625 + }, + { + "epoch": 1.76, + "learning_rate": 1.426096907184174e-05, + "loss": 0.2962, + "step": 37630 + }, + { + "epoch": 1.76, + "learning_rate": 1.4260185286786952e-05, + "loss": 0.1308, + "step": 37635 + }, + { + "epoch": 1.76, + "learning_rate": 1.4259401501732166e-05, + "loss": 0.0454, + "step": 37640 + }, + { + "epoch": 1.76, + "learning_rate": 1.425861771667738e-05, + "loss": 0.0986, + "step": 37645 + }, + { + "epoch": 1.76, + "learning_rate": 1.4257833931622593e-05, + "loss": 0.0789, + "step": 37650 + }, + { + "epoch": 1.76, + "learning_rate": 1.4257050146567806e-05, + "loss": 0.1297, + "step": 37655 + }, + { + "epoch": 1.76, + "learning_rate": 1.425626636151302e-05, + "loss": 0.0659, + "step": 37660 + }, + { + "epoch": 1.76, + "learning_rate": 1.4255482576458233e-05, + "loss": 0.1486, + "step": 37665 + }, + { + "epoch": 1.76, + "learning_rate": 1.4254698791403447e-05, + "loss": 0.1687, + "step": 37670 + }, + { + "epoch": 1.76, + "learning_rate": 1.425391500634866e-05, + "loss": 0.1958, + "step": 37675 + }, + { + "epoch": 1.76, + "learning_rate": 1.4253131221293875e-05, + "loss": 0.3865, + "step": 37680 + }, + { + "epoch": 1.76, + "learning_rate": 1.4252347436239087e-05, + "loss": 0.3559, + "step": 37685 + }, + { + "epoch": 1.76, + "learning_rate": 1.4251563651184301e-05, + "loss": 0.0624, + "step": 37690 + }, + { + "epoch": 1.76, + "learning_rate": 1.4250779866129513e-05, + "loss": 0.0917, + "step": 37695 + }, + { + "epoch": 1.76, + "learning_rate": 1.4249996081074726e-05, + "loss": 0.0923, + "step": 37700 + }, + { + "epoch": 1.76, + "learning_rate": 1.4249212296019941e-05, + "loss": 0.1211, + "step": 37705 + }, + { + "epoch": 1.76, + "learning_rate": 1.4248428510965153e-05, + "loss": 0.1117, + "step": 37710 + }, + { + "epoch": 1.76, + "learning_rate": 1.4247644725910367e-05, + "loss": 0.2351, + "step": 37715 + }, + { + "epoch": 1.76, + "learning_rate": 1.424686094085558e-05, + "loss": 0.1649, + "step": 37720 + }, + { + "epoch": 1.76, + "learning_rate": 1.4246077155800795e-05, + "loss": 0.1855, + "step": 37725 + }, + { + "epoch": 1.76, + "learning_rate": 1.4245293370746007e-05, + "loss": 0.3319, + "step": 37730 + }, + { + "epoch": 1.76, + "learning_rate": 1.4244509585691221e-05, + "loss": 0.2802, + "step": 37735 + }, + { + "epoch": 1.76, + "learning_rate": 1.4243725800636434e-05, + "loss": 0.0575, + "step": 37740 + }, + { + "epoch": 1.76, + "learning_rate": 1.4242942015581649e-05, + "loss": 0.0562, + "step": 37745 + }, + { + "epoch": 1.76, + "learning_rate": 1.4242158230526861e-05, + "loss": 0.0816, + "step": 37750 + }, + { + "epoch": 1.76, + "learning_rate": 1.4241374445472075e-05, + "loss": 0.0493, + "step": 37755 + }, + { + "epoch": 1.76, + "learning_rate": 1.424059066041729e-05, + "loss": 0.1505, + "step": 37760 + }, + { + "epoch": 1.76, + "learning_rate": 1.4239806875362501e-05, + "loss": 0.1146, + "step": 37765 + }, + { + "epoch": 1.76, + "learning_rate": 1.4239023090307715e-05, + "loss": 0.2125, + "step": 37770 + }, + { + "epoch": 1.76, + "learning_rate": 1.4238239305252927e-05, + "loss": 0.2498, + "step": 37775 + }, + { + "epoch": 1.76, + "learning_rate": 1.4237455520198143e-05, + "loss": 0.3085, + "step": 37780 + }, + { + "epoch": 1.76, + "learning_rate": 1.4236671735143355e-05, + "loss": 0.4248, + "step": 37785 + }, + { + "epoch": 1.76, + "learning_rate": 1.423588795008857e-05, + "loss": 0.0457, + "step": 37790 + }, + { + "epoch": 1.76, + "learning_rate": 1.4235104165033781e-05, + "loss": 0.0464, + "step": 37795 + }, + { + "epoch": 1.76, + "learning_rate": 1.4234320379978997e-05, + "loss": 0.0329, + "step": 37800 + }, + { + "epoch": 1.76, + "learning_rate": 1.423353659492421e-05, + "loss": 0.1443, + "step": 37805 + }, + { + "epoch": 1.76, + "learning_rate": 1.4232752809869423e-05, + "loss": 0.1504, + "step": 37810 + }, + { + "epoch": 1.76, + "learning_rate": 1.4231969024814635e-05, + "loss": 0.1618, + "step": 37815 + }, + { + "epoch": 1.76, + "learning_rate": 1.4231185239759851e-05, + "loss": 0.1992, + "step": 37820 + }, + { + "epoch": 1.76, + "learning_rate": 1.4230401454705063e-05, + "loss": 0.1979, + "step": 37825 + }, + { + "epoch": 1.77, + "learning_rate": 1.4229617669650275e-05, + "loss": 0.3638, + "step": 37830 + }, + { + "epoch": 1.77, + "learning_rate": 1.422883388459549e-05, + "loss": 0.2355, + "step": 37835 + }, + { + "epoch": 1.77, + "learning_rate": 1.4228050099540701e-05, + "loss": 0.0892, + "step": 37840 + }, + { + "epoch": 1.77, + "learning_rate": 1.4227266314485917e-05, + "loss": 0.0729, + "step": 37845 + }, + { + "epoch": 1.77, + "learning_rate": 1.422648252943113e-05, + "loss": 0.1274, + "step": 37850 + }, + { + "epoch": 1.77, + "learning_rate": 1.4225698744376343e-05, + "loss": 0.0896, + "step": 37855 + }, + { + "epoch": 1.77, + "learning_rate": 1.4224914959321557e-05, + "loss": 0.0717, + "step": 37860 + }, + { + "epoch": 1.77, + "learning_rate": 1.4224131174266771e-05, + "loss": 0.1184, + "step": 37865 + }, + { + "epoch": 1.77, + "learning_rate": 1.4223347389211983e-05, + "loss": 0.1207, + "step": 37870 + }, + { + "epoch": 1.77, + "learning_rate": 1.4222563604157197e-05, + "loss": 0.2518, + "step": 37875 + }, + { + "epoch": 1.77, + "learning_rate": 1.4221779819102411e-05, + "loss": 0.3498, + "step": 37880 + }, + { + "epoch": 1.77, + "learning_rate": 1.4220996034047625e-05, + "loss": 0.3115, + "step": 37885 + }, + { + "epoch": 1.77, + "learning_rate": 1.4220212248992837e-05, + "loss": 0.0583, + "step": 37890 + }, + { + "epoch": 1.77, + "learning_rate": 1.421942846393805e-05, + "loss": 0.0557, + "step": 37895 + }, + { + "epoch": 1.77, + "learning_rate": 1.4218644678883265e-05, + "loss": 0.0753, + "step": 37900 + }, + { + "epoch": 1.77, + "learning_rate": 1.4217860893828477e-05, + "loss": 0.0799, + "step": 37905 + }, + { + "epoch": 1.77, + "learning_rate": 1.4217077108773691e-05, + "loss": 0.0979, + "step": 37910 + }, + { + "epoch": 1.77, + "learning_rate": 1.4216293323718903e-05, + "loss": 0.0891, + "step": 37915 + }, + { + "epoch": 1.77, + "learning_rate": 1.4215509538664119e-05, + "loss": 0.1499, + "step": 37920 + }, + { + "epoch": 1.77, + "learning_rate": 1.4214725753609331e-05, + "loss": 0.1411, + "step": 37925 + }, + { + "epoch": 1.77, + "learning_rate": 1.4213941968554545e-05, + "loss": 0.2998, + "step": 37930 + }, + { + "epoch": 1.77, + "learning_rate": 1.4213158183499757e-05, + "loss": 0.2268, + "step": 37935 + }, + { + "epoch": 1.77, + "learning_rate": 1.4212374398444973e-05, + "loss": 0.0357, + "step": 37940 + }, + { + "epoch": 1.77, + "learning_rate": 1.4211590613390185e-05, + "loss": 0.0426, + "step": 37945 + }, + { + "epoch": 1.77, + "learning_rate": 1.4210806828335399e-05, + "loss": 0.0794, + "step": 37950 + }, + { + "epoch": 1.77, + "learning_rate": 1.4210023043280611e-05, + "loss": 0.1651, + "step": 37955 + }, + { + "epoch": 1.77, + "learning_rate": 1.4209239258225825e-05, + "loss": 0.1167, + "step": 37960 + }, + { + "epoch": 1.77, + "learning_rate": 1.4208455473171039e-05, + "loss": 0.1324, + "step": 37965 + }, + { + "epoch": 1.77, + "learning_rate": 1.4207671688116251e-05, + "loss": 0.1587, + "step": 37970 + }, + { + "epoch": 1.77, + "learning_rate": 1.4206887903061465e-05, + "loss": 0.2, + "step": 37975 + }, + { + "epoch": 1.77, + "learning_rate": 1.4206104118006679e-05, + "loss": 0.3415, + "step": 37980 + }, + { + "epoch": 1.77, + "learning_rate": 1.4205320332951893e-05, + "loss": 0.24, + "step": 37985 + }, + { + "epoch": 1.77, + "learning_rate": 1.4204536547897105e-05, + "loss": 0.0173, + "step": 37990 + }, + { + "epoch": 1.77, + "learning_rate": 1.420375276284232e-05, + "loss": 0.0568, + "step": 37995 + }, + { + "epoch": 1.77, + "learning_rate": 1.4202968977787533e-05, + "loss": 0.0666, + "step": 38000 + }, + { + "epoch": 1.77, + "learning_rate": 1.4202185192732747e-05, + "loss": 0.1213, + "step": 38005 + }, + { + "epoch": 1.77, + "learning_rate": 1.4201401407677959e-05, + "loss": 0.1022, + "step": 38010 + }, + { + "epoch": 1.77, + "learning_rate": 1.4200617622623175e-05, + "loss": 0.1468, + "step": 38015 + }, + { + "epoch": 1.77, + "learning_rate": 1.4199833837568387e-05, + "loss": 0.2344, + "step": 38020 + }, + { + "epoch": 1.77, + "learning_rate": 1.4199050052513599e-05, + "loss": 0.2398, + "step": 38025 + }, + { + "epoch": 1.77, + "learning_rate": 1.4198266267458813e-05, + "loss": 0.3248, + "step": 38030 + }, + { + "epoch": 1.77, + "learning_rate": 1.4197482482404025e-05, + "loss": 0.2467, + "step": 38035 + }, + { + "epoch": 1.77, + "learning_rate": 1.419669869734924e-05, + "loss": 0.0681, + "step": 38040 + }, + { + "epoch": 1.78, + "learning_rate": 1.4195914912294453e-05, + "loss": 0.0525, + "step": 38045 + }, + { + "epoch": 1.78, + "learning_rate": 1.4195131127239667e-05, + "loss": 0.0501, + "step": 38050 + }, + { + "epoch": 1.78, + "learning_rate": 1.4194347342184879e-05, + "loss": 0.0514, + "step": 38055 + }, + { + "epoch": 1.78, + "learning_rate": 1.4193563557130095e-05, + "loss": 0.1218, + "step": 38060 + }, + { + "epoch": 1.78, + "learning_rate": 1.4192779772075307e-05, + "loss": 0.1249, + "step": 38065 + }, + { + "epoch": 1.78, + "learning_rate": 1.419199598702052e-05, + "loss": 0.1505, + "step": 38070 + }, + { + "epoch": 1.78, + "learning_rate": 1.4191212201965735e-05, + "loss": 0.2301, + "step": 38075 + }, + { + "epoch": 1.78, + "learning_rate": 1.4190428416910949e-05, + "loss": 0.3129, + "step": 38080 + }, + { + "epoch": 1.78, + "learning_rate": 1.4189644631856161e-05, + "loss": 0.2717, + "step": 38085 + }, + { + "epoch": 1.78, + "learning_rate": 1.4188860846801373e-05, + "loss": 0.0331, + "step": 38090 + }, + { + "epoch": 1.78, + "learning_rate": 1.4188077061746589e-05, + "loss": 0.0624, + "step": 38095 + }, + { + "epoch": 1.78, + "learning_rate": 1.4187293276691801e-05, + "loss": 0.0583, + "step": 38100 + }, + { + "epoch": 1.78, + "learning_rate": 1.4186509491637015e-05, + "loss": 0.0417, + "step": 38105 + }, + { + "epoch": 1.78, + "learning_rate": 1.4185725706582227e-05, + "loss": 0.1927, + "step": 38110 + }, + { + "epoch": 1.78, + "learning_rate": 1.4184941921527443e-05, + "loss": 0.0895, + "step": 38115 + }, + { + "epoch": 1.78, + "learning_rate": 1.4184158136472655e-05, + "loss": 0.1494, + "step": 38120 + }, + { + "epoch": 1.78, + "learning_rate": 1.4183374351417869e-05, + "loss": 0.2075, + "step": 38125 + }, + { + "epoch": 1.78, + "learning_rate": 1.4182590566363081e-05, + "loss": 0.2841, + "step": 38130 + }, + { + "epoch": 1.78, + "learning_rate": 1.4181806781308297e-05, + "loss": 0.2523, + "step": 38135 + }, + { + "epoch": 1.78, + "learning_rate": 1.4181022996253509e-05, + "loss": 0.051, + "step": 38140 + }, + { + "epoch": 1.78, + "learning_rate": 1.4180239211198723e-05, + "loss": 0.0424, + "step": 38145 + }, + { + "epoch": 1.78, + "learning_rate": 1.4179455426143935e-05, + "loss": 0.0772, + "step": 38150 + }, + { + "epoch": 1.78, + "learning_rate": 1.4178671641089147e-05, + "loss": 0.0753, + "step": 38155 + }, + { + "epoch": 1.78, + "learning_rate": 1.4177887856034363e-05, + "loss": 0.1035, + "step": 38160 + }, + { + "epoch": 1.78, + "learning_rate": 1.4177104070979575e-05, + "loss": 0.226, + "step": 38165 + }, + { + "epoch": 1.78, + "learning_rate": 1.4176320285924789e-05, + "loss": 0.2246, + "step": 38170 + }, + { + "epoch": 1.78, + "learning_rate": 1.4175536500870003e-05, + "loss": 0.1679, + "step": 38175 + }, + { + "epoch": 1.78, + "learning_rate": 1.4174752715815217e-05, + "loss": 0.3673, + "step": 38180 + }, + { + "epoch": 1.78, + "learning_rate": 1.4173968930760429e-05, + "loss": 0.2447, + "step": 38185 + }, + { + "epoch": 1.78, + "learning_rate": 1.4173185145705643e-05, + "loss": 0.1178, + "step": 38190 + }, + { + "epoch": 1.78, + "learning_rate": 1.4172401360650857e-05, + "loss": 0.0845, + "step": 38195 + }, + { + "epoch": 1.78, + "learning_rate": 1.417161757559607e-05, + "loss": 0.0427, + "step": 38200 + }, + { + "epoch": 1.78, + "learning_rate": 1.4170833790541283e-05, + "loss": 0.1022, + "step": 38205 + }, + { + "epoch": 1.78, + "learning_rate": 1.4170050005486498e-05, + "loss": 0.0848, + "step": 38210 + }, + { + "epoch": 1.78, + "learning_rate": 1.416926622043171e-05, + "loss": 0.1151, + "step": 38215 + }, + { + "epoch": 1.78, + "learning_rate": 1.4168482435376923e-05, + "loss": 0.1462, + "step": 38220 + }, + { + "epoch": 1.78, + "learning_rate": 1.4167698650322137e-05, + "loss": 0.2352, + "step": 38225 + }, + { + "epoch": 1.78, + "learning_rate": 1.4166914865267349e-05, + "loss": 0.4185, + "step": 38230 + }, + { + "epoch": 1.78, + "learning_rate": 1.4166131080212564e-05, + "loss": 0.2938, + "step": 38235 + }, + { + "epoch": 1.78, + "learning_rate": 1.4165347295157777e-05, + "loss": 0.0908, + "step": 38240 + }, + { + "epoch": 1.78, + "learning_rate": 1.416456351010299e-05, + "loss": 0.0728, + "step": 38245 + }, + { + "epoch": 1.78, + "learning_rate": 1.4163779725048203e-05, + "loss": 0.0581, + "step": 38250 + }, + { + "epoch": 1.79, + "learning_rate": 1.4162995939993418e-05, + "loss": 0.128, + "step": 38255 + }, + { + "epoch": 1.79, + "learning_rate": 1.416221215493863e-05, + "loss": 0.1307, + "step": 38260 + }, + { + "epoch": 1.79, + "learning_rate": 1.4161428369883845e-05, + "loss": 0.1847, + "step": 38265 + }, + { + "epoch": 1.79, + "learning_rate": 1.4160644584829057e-05, + "loss": 0.1701, + "step": 38270 + }, + { + "epoch": 1.79, + "learning_rate": 1.4159860799774272e-05, + "loss": 0.2065, + "step": 38275 + }, + { + "epoch": 1.79, + "learning_rate": 1.4159077014719485e-05, + "loss": 0.4186, + "step": 38280 + }, + { + "epoch": 1.79, + "learning_rate": 1.4158293229664697e-05, + "loss": 0.2782, + "step": 38285 + }, + { + "epoch": 1.79, + "learning_rate": 1.415750944460991e-05, + "loss": 0.0348, + "step": 38290 + }, + { + "epoch": 1.79, + "learning_rate": 1.4156725659555125e-05, + "loss": 0.023, + "step": 38295 + }, + { + "epoch": 1.79, + "learning_rate": 1.4155941874500338e-05, + "loss": 0.0195, + "step": 38300 + }, + { + "epoch": 1.79, + "learning_rate": 1.415515808944555e-05, + "loss": 0.1097, + "step": 38305 + }, + { + "epoch": 1.79, + "learning_rate": 1.4154374304390766e-05, + "loss": 0.1873, + "step": 38310 + }, + { + "epoch": 1.79, + "learning_rate": 1.4153590519335978e-05, + "loss": 0.1415, + "step": 38315 + }, + { + "epoch": 1.79, + "learning_rate": 1.4152806734281192e-05, + "loss": 0.1651, + "step": 38320 + }, + { + "epoch": 1.79, + "learning_rate": 1.4152022949226405e-05, + "loss": 0.2963, + "step": 38325 + }, + { + "epoch": 1.79, + "learning_rate": 1.415123916417162e-05, + "loss": 0.2719, + "step": 38330 + }, + { + "epoch": 1.79, + "learning_rate": 1.4150455379116832e-05, + "loss": 0.2542, + "step": 38335 + }, + { + "epoch": 1.79, + "learning_rate": 1.4149671594062046e-05, + "loss": 0.0304, + "step": 38340 + }, + { + "epoch": 1.79, + "learning_rate": 1.4148887809007259e-05, + "loss": 0.1187, + "step": 38345 + }, + { + "epoch": 1.79, + "learning_rate": 1.414810402395247e-05, + "loss": 0.1246, + "step": 38350 + }, + { + "epoch": 1.79, + "learning_rate": 1.4147320238897686e-05, + "loss": 0.0625, + "step": 38355 + }, + { + "epoch": 1.79, + "learning_rate": 1.4146536453842899e-05, + "loss": 0.182, + "step": 38360 + }, + { + "epoch": 1.79, + "learning_rate": 1.4145752668788112e-05, + "loss": 0.0816, + "step": 38365 + }, + { + "epoch": 1.79, + "learning_rate": 1.4144968883733325e-05, + "loss": 0.1735, + "step": 38370 + }, + { + "epoch": 1.79, + "learning_rate": 1.414418509867854e-05, + "loss": 0.1373, + "step": 38375 + }, + { + "epoch": 1.79, + "learning_rate": 1.4143401313623752e-05, + "loss": 0.4016, + "step": 38380 + }, + { + "epoch": 1.79, + "learning_rate": 1.4142617528568966e-05, + "loss": 0.2637, + "step": 38385 + }, + { + "epoch": 1.79, + "learning_rate": 1.414183374351418e-05, + "loss": 0.0964, + "step": 38390 + }, + { + "epoch": 1.79, + "learning_rate": 1.4141049958459394e-05, + "loss": 0.0635, + "step": 38395 + }, + { + "epoch": 1.79, + "learning_rate": 1.4140266173404606e-05, + "loss": 0.0482, + "step": 38400 + }, + { + "epoch": 1.79, + "learning_rate": 1.413948238834982e-05, + "loss": 0.0768, + "step": 38405 + }, + { + "epoch": 1.79, + "learning_rate": 1.4138698603295034e-05, + "loss": 0.0976, + "step": 38410 + }, + { + "epoch": 1.79, + "learning_rate": 1.4137914818240246e-05, + "loss": 0.2089, + "step": 38415 + }, + { + "epoch": 1.79, + "learning_rate": 1.413713103318546e-05, + "loss": 0.2171, + "step": 38420 + }, + { + "epoch": 1.79, + "learning_rate": 1.4136347248130673e-05, + "loss": 0.2064, + "step": 38425 + }, + { + "epoch": 1.79, + "learning_rate": 1.4135563463075888e-05, + "loss": 0.4382, + "step": 38430 + }, + { + "epoch": 1.79, + "learning_rate": 1.41347796780211e-05, + "loss": 0.2063, + "step": 38435 + }, + { + "epoch": 1.79, + "learning_rate": 1.4133995892966314e-05, + "loss": 0.0299, + "step": 38440 + }, + { + "epoch": 1.79, + "learning_rate": 1.4133212107911526e-05, + "loss": 0.08, + "step": 38445 + }, + { + "epoch": 1.79, + "learning_rate": 1.4132428322856742e-05, + "loss": 0.1102, + "step": 38450 + }, + { + "epoch": 1.79, + "learning_rate": 1.4131644537801954e-05, + "loss": 0.0615, + "step": 38455 + }, + { + "epoch": 1.79, + "learning_rate": 1.4130860752747168e-05, + "loss": 0.0704, + "step": 38460 + }, + { + "epoch": 1.79, + "learning_rate": 1.413007696769238e-05, + "loss": 0.0914, + "step": 38465 + }, + { + "epoch": 1.8, + "learning_rate": 1.4129293182637596e-05, + "loss": 0.1401, + "step": 38470 + }, + { + "epoch": 1.8, + "learning_rate": 1.4128509397582808e-05, + "loss": 0.2028, + "step": 38475 + }, + { + "epoch": 1.8, + "learning_rate": 1.412772561252802e-05, + "loss": 0.2902, + "step": 38480 + }, + { + "epoch": 1.8, + "learning_rate": 1.4126941827473234e-05, + "loss": 0.2409, + "step": 38485 + }, + { + "epoch": 1.8, + "learning_rate": 1.4126158042418448e-05, + "loss": 0.0215, + "step": 38490 + }, + { + "epoch": 1.8, + "learning_rate": 1.4125374257363662e-05, + "loss": 0.0641, + "step": 38495 + }, + { + "epoch": 1.8, + "learning_rate": 1.4124590472308874e-05, + "loss": 0.1115, + "step": 38500 + }, + { + "epoch": 1.8, + "learning_rate": 1.4123806687254088e-05, + "loss": 0.099, + "step": 38505 + }, + { + "epoch": 1.8, + "learning_rate": 1.4123022902199302e-05, + "loss": 0.0935, + "step": 38510 + }, + { + "epoch": 1.8, + "learning_rate": 1.4122239117144516e-05, + "loss": 0.1694, + "step": 38515 + }, + { + "epoch": 1.8, + "learning_rate": 1.4121455332089728e-05, + "loss": 0.234, + "step": 38520 + }, + { + "epoch": 1.8, + "learning_rate": 1.4120671547034944e-05, + "loss": 0.237, + "step": 38525 + }, + { + "epoch": 1.8, + "learning_rate": 1.4119887761980156e-05, + "loss": 0.3852, + "step": 38530 + }, + { + "epoch": 1.8, + "learning_rate": 1.411910397692537e-05, + "loss": 0.2284, + "step": 38535 + }, + { + "epoch": 1.8, + "learning_rate": 1.4118320191870582e-05, + "loss": 0.0491, + "step": 38540 + }, + { + "epoch": 1.8, + "learning_rate": 1.4117536406815794e-05, + "loss": 0.0762, + "step": 38545 + }, + { + "epoch": 1.8, + "learning_rate": 1.411675262176101e-05, + "loss": 0.1074, + "step": 38550 + }, + { + "epoch": 1.8, + "learning_rate": 1.4115968836706222e-05, + "loss": 0.1303, + "step": 38555 + }, + { + "epoch": 1.8, + "learning_rate": 1.4115185051651436e-05, + "loss": 0.1395, + "step": 38560 + }, + { + "epoch": 1.8, + "learning_rate": 1.4114401266596648e-05, + "loss": 0.156, + "step": 38565 + }, + { + "epoch": 1.8, + "learning_rate": 1.4113617481541864e-05, + "loss": 0.1675, + "step": 38570 + }, + { + "epoch": 1.8, + "learning_rate": 1.4112833696487076e-05, + "loss": 0.1753, + "step": 38575 + }, + { + "epoch": 1.8, + "learning_rate": 1.411204991143229e-05, + "loss": 0.3437, + "step": 38580 + }, + { + "epoch": 1.8, + "learning_rate": 1.4111266126377502e-05, + "loss": 0.3223, + "step": 38585 + }, + { + "epoch": 1.8, + "learning_rate": 1.4110482341322718e-05, + "loss": 0.0702, + "step": 38590 + }, + { + "epoch": 1.8, + "learning_rate": 1.410969855626793e-05, + "loss": 0.0645, + "step": 38595 + }, + { + "epoch": 1.8, + "learning_rate": 1.4108914771213144e-05, + "loss": 0.1382, + "step": 38600 + }, + { + "epoch": 1.8, + "learning_rate": 1.4108130986158356e-05, + "loss": 0.1124, + "step": 38605 + }, + { + "epoch": 1.8, + "learning_rate": 1.410734720110357e-05, + "loss": 0.0886, + "step": 38610 + }, + { + "epoch": 1.8, + "learning_rate": 1.4106563416048784e-05, + "loss": 0.1948, + "step": 38615 + }, + { + "epoch": 1.8, + "learning_rate": 1.4105779630993996e-05, + "loss": 0.2204, + "step": 38620 + }, + { + "epoch": 1.8, + "learning_rate": 1.4104995845939212e-05, + "loss": 0.1517, + "step": 38625 + }, + { + "epoch": 1.8, + "learning_rate": 1.4104212060884424e-05, + "loss": 0.3109, + "step": 38630 + }, + { + "epoch": 1.8, + "learning_rate": 1.4103428275829638e-05, + "loss": 0.1802, + "step": 38635 + }, + { + "epoch": 1.8, + "learning_rate": 1.410264449077485e-05, + "loss": 0.028, + "step": 38640 + }, + { + "epoch": 1.8, + "learning_rate": 1.4101860705720066e-05, + "loss": 0.1063, + "step": 38645 + }, + { + "epoch": 1.8, + "learning_rate": 1.4101076920665278e-05, + "loss": 0.0521, + "step": 38650 + }, + { + "epoch": 1.8, + "learning_rate": 1.4100293135610492e-05, + "loss": 0.091, + "step": 38655 + }, + { + "epoch": 1.8, + "learning_rate": 1.4099509350555704e-05, + "loss": 0.0967, + "step": 38660 + }, + { + "epoch": 1.8, + "learning_rate": 1.409872556550092e-05, + "loss": 0.117, + "step": 38665 + }, + { + "epoch": 1.8, + "learning_rate": 1.4097941780446132e-05, + "loss": 0.1346, + "step": 38670 + }, + { + "epoch": 1.8, + "learning_rate": 1.4097157995391344e-05, + "loss": 0.1494, + "step": 38675 + }, + { + "epoch": 1.8, + "learning_rate": 1.4096374210336558e-05, + "loss": 0.2843, + "step": 38680 + }, + { + "epoch": 1.81, + "learning_rate": 1.409559042528177e-05, + "loss": 0.2728, + "step": 38685 + }, + { + "epoch": 1.81, + "learning_rate": 1.4094806640226986e-05, + "loss": 0.0535, + "step": 38690 + }, + { + "epoch": 1.81, + "learning_rate": 1.4094022855172198e-05, + "loss": 0.0669, + "step": 38695 + }, + { + "epoch": 1.81, + "learning_rate": 1.4093239070117412e-05, + "loss": 0.1214, + "step": 38700 + }, + { + "epoch": 1.81, + "learning_rate": 1.4092455285062626e-05, + "loss": 0.0523, + "step": 38705 + }, + { + "epoch": 1.81, + "learning_rate": 1.409167150000784e-05, + "loss": 0.1108, + "step": 38710 + }, + { + "epoch": 1.81, + "learning_rate": 1.4090887714953052e-05, + "loss": 0.1358, + "step": 38715 + }, + { + "epoch": 1.81, + "learning_rate": 1.4090103929898266e-05, + "loss": 0.1182, + "step": 38720 + }, + { + "epoch": 1.81, + "learning_rate": 1.408932014484348e-05, + "loss": 0.1505, + "step": 38725 + }, + { + "epoch": 1.81, + "learning_rate": 1.4088536359788694e-05, + "loss": 0.3261, + "step": 38730 + }, + { + "epoch": 1.81, + "learning_rate": 1.4087752574733906e-05, + "loss": 0.3337, + "step": 38735 + }, + { + "epoch": 1.81, + "learning_rate": 1.4086968789679118e-05, + "loss": 0.0553, + "step": 38740 + }, + { + "epoch": 1.81, + "learning_rate": 1.4086185004624334e-05, + "loss": 0.0685, + "step": 38745 + }, + { + "epoch": 1.81, + "learning_rate": 1.4085401219569546e-05, + "loss": 0.0481, + "step": 38750 + }, + { + "epoch": 1.81, + "learning_rate": 1.408461743451476e-05, + "loss": 0.095, + "step": 38755 + }, + { + "epoch": 1.81, + "learning_rate": 1.4083833649459972e-05, + "loss": 0.1435, + "step": 38760 + }, + { + "epoch": 1.81, + "learning_rate": 1.4083049864405188e-05, + "loss": 0.0965, + "step": 38765 + }, + { + "epoch": 1.81, + "learning_rate": 1.40822660793504e-05, + "loss": 0.1779, + "step": 38770 + }, + { + "epoch": 1.81, + "learning_rate": 1.4081482294295614e-05, + "loss": 0.2204, + "step": 38775 + }, + { + "epoch": 1.81, + "learning_rate": 1.4080698509240826e-05, + "loss": 0.3111, + "step": 38780 + }, + { + "epoch": 1.81, + "learning_rate": 1.4079914724186042e-05, + "loss": 0.2863, + "step": 38785 + }, + { + "epoch": 1.81, + "learning_rate": 1.4079130939131254e-05, + "loss": 0.0378, + "step": 38790 + }, + { + "epoch": 1.81, + "learning_rate": 1.4078347154076468e-05, + "loss": 0.1025, + "step": 38795 + }, + { + "epoch": 1.81, + "learning_rate": 1.407756336902168e-05, + "loss": 0.0496, + "step": 38800 + }, + { + "epoch": 1.81, + "learning_rate": 1.4076779583966894e-05, + "loss": 0.0819, + "step": 38805 + }, + { + "epoch": 1.81, + "learning_rate": 1.4075995798912108e-05, + "loss": 0.071, + "step": 38810 + }, + { + "epoch": 1.81, + "learning_rate": 1.407521201385732e-05, + "loss": 0.1571, + "step": 38815 + }, + { + "epoch": 1.81, + "learning_rate": 1.4074428228802534e-05, + "loss": 0.262, + "step": 38820 + }, + { + "epoch": 1.81, + "learning_rate": 1.4073644443747748e-05, + "loss": 0.2983, + "step": 38825 + }, + { + "epoch": 1.81, + "learning_rate": 1.4072860658692962e-05, + "loss": 0.228, + "step": 38830 + }, + { + "epoch": 1.81, + "learning_rate": 1.4072076873638174e-05, + "loss": 0.1873, + "step": 38835 + }, + { + "epoch": 1.81, + "learning_rate": 1.407129308858339e-05, + "loss": 0.018, + "step": 38840 + }, + { + "epoch": 1.81, + "learning_rate": 1.4070509303528602e-05, + "loss": 0.0565, + "step": 38845 + }, + { + "epoch": 1.81, + "learning_rate": 1.4069725518473816e-05, + "loss": 0.1004, + "step": 38850 + }, + { + "epoch": 1.81, + "learning_rate": 1.4068941733419028e-05, + "loss": 0.0656, + "step": 38855 + }, + { + "epoch": 1.81, + "learning_rate": 1.4068157948364243e-05, + "loss": 0.2027, + "step": 38860 + }, + { + "epoch": 1.81, + "learning_rate": 1.4067374163309456e-05, + "loss": 0.2106, + "step": 38865 + }, + { + "epoch": 1.81, + "learning_rate": 1.4066590378254668e-05, + "loss": 0.1221, + "step": 38870 + }, + { + "epoch": 1.81, + "learning_rate": 1.4065806593199882e-05, + "loss": 0.2289, + "step": 38875 + }, + { + "epoch": 1.81, + "learning_rate": 1.4065022808145094e-05, + "loss": 0.311, + "step": 38880 + }, + { + "epoch": 1.81, + "learning_rate": 1.406423902309031e-05, + "loss": 0.3, + "step": 38885 + }, + { + "epoch": 1.81, + "learning_rate": 1.4063455238035522e-05, + "loss": 0.0561, + "step": 38890 + }, + { + "epoch": 1.81, + "learning_rate": 1.4062671452980736e-05, + "loss": 0.0676, + "step": 38895 + }, + { + "epoch": 1.82, + "learning_rate": 1.4061887667925948e-05, + "loss": 0.1541, + "step": 38900 + }, + { + "epoch": 1.82, + "learning_rate": 1.4061103882871163e-05, + "loss": 0.0841, + "step": 38905 + }, + { + "epoch": 1.82, + "learning_rate": 1.4060320097816376e-05, + "loss": 0.1486, + "step": 38910 + }, + { + "epoch": 1.82, + "learning_rate": 1.405953631276159e-05, + "loss": 0.1742, + "step": 38915 + }, + { + "epoch": 1.82, + "learning_rate": 1.4058752527706803e-05, + "loss": 0.153, + "step": 38920 + }, + { + "epoch": 1.82, + "learning_rate": 1.4057968742652017e-05, + "loss": 0.201, + "step": 38925 + }, + { + "epoch": 1.82, + "learning_rate": 1.405718495759723e-05, + "loss": 0.3103, + "step": 38930 + }, + { + "epoch": 1.82, + "learning_rate": 1.4056401172542442e-05, + "loss": 0.1888, + "step": 38935 + }, + { + "epoch": 1.82, + "learning_rate": 1.4055617387487657e-05, + "loss": 0.035, + "step": 38940 + }, + { + "epoch": 1.82, + "learning_rate": 1.405483360243287e-05, + "loss": 0.02, + "step": 38945 + }, + { + "epoch": 1.82, + "learning_rate": 1.4054049817378084e-05, + "loss": 0.0971, + "step": 38950 + }, + { + "epoch": 1.82, + "learning_rate": 1.4053266032323296e-05, + "loss": 0.1084, + "step": 38955 + }, + { + "epoch": 1.82, + "learning_rate": 1.4052482247268511e-05, + "loss": 0.0948, + "step": 38960 + }, + { + "epoch": 1.82, + "learning_rate": 1.4051698462213724e-05, + "loss": 0.1653, + "step": 38965 + }, + { + "epoch": 1.82, + "learning_rate": 1.4050914677158937e-05, + "loss": 0.1645, + "step": 38970 + }, + { + "epoch": 1.82, + "learning_rate": 1.405013089210415e-05, + "loss": 0.1154, + "step": 38975 + }, + { + "epoch": 1.82, + "learning_rate": 1.4049347107049365e-05, + "loss": 0.4433, + "step": 38980 + }, + { + "epoch": 1.82, + "learning_rate": 1.4048563321994577e-05, + "loss": 0.2112, + "step": 38985 + }, + { + "epoch": 1.82, + "learning_rate": 1.4047779536939791e-05, + "loss": 0.0622, + "step": 38990 + }, + { + "epoch": 1.82, + "learning_rate": 1.4046995751885004e-05, + "loss": 0.0382, + "step": 38995 + }, + { + "epoch": 1.82, + "learning_rate": 1.4046211966830216e-05, + "loss": 0.1102, + "step": 39000 + }, + { + "epoch": 1.82, + "learning_rate": 1.4045428181775431e-05, + "loss": 0.1516, + "step": 39005 + }, + { + "epoch": 1.82, + "learning_rate": 1.4044644396720644e-05, + "loss": 0.1424, + "step": 39010 + }, + { + "epoch": 1.82, + "learning_rate": 1.4043860611665858e-05, + "loss": 0.1871, + "step": 39015 + }, + { + "epoch": 1.82, + "learning_rate": 1.4043076826611071e-05, + "loss": 0.2085, + "step": 39020 + }, + { + "epoch": 1.82, + "learning_rate": 1.4042293041556285e-05, + "loss": 0.2028, + "step": 39025 + }, + { + "epoch": 1.82, + "learning_rate": 1.4041509256501498e-05, + "loss": 0.3885, + "step": 39030 + }, + { + "epoch": 1.82, + "learning_rate": 1.4040725471446711e-05, + "loss": 0.3049, + "step": 39035 + }, + { + "epoch": 1.82, + "learning_rate": 1.4039941686391925e-05, + "loss": 0.0252, + "step": 39040 + }, + { + "epoch": 1.82, + "learning_rate": 1.403915790133714e-05, + "loss": 0.0376, + "step": 39045 + }, + { + "epoch": 1.82, + "learning_rate": 1.4038374116282351e-05, + "loss": 0.029, + "step": 39050 + }, + { + "epoch": 1.82, + "learning_rate": 1.4037590331227567e-05, + "loss": 0.1269, + "step": 39055 + }, + { + "epoch": 1.82, + "learning_rate": 1.403680654617278e-05, + "loss": 0.1043, + "step": 39060 + }, + { + "epoch": 1.82, + "learning_rate": 1.4036022761117992e-05, + "loss": 0.1261, + "step": 39065 + }, + { + "epoch": 1.82, + "learning_rate": 1.4035238976063205e-05, + "loss": 0.2339, + "step": 39070 + }, + { + "epoch": 1.82, + "learning_rate": 1.4034455191008418e-05, + "loss": 0.1286, + "step": 39075 + }, + { + "epoch": 1.82, + "learning_rate": 1.4033671405953633e-05, + "loss": 0.2755, + "step": 39080 + }, + { + "epoch": 1.82, + "learning_rate": 1.4032887620898845e-05, + "loss": 0.3312, + "step": 39085 + }, + { + "epoch": 1.82, + "learning_rate": 1.403210383584406e-05, + "loss": 0.0087, + "step": 39090 + }, + { + "epoch": 1.82, + "learning_rate": 1.4031320050789272e-05, + "loss": 0.0794, + "step": 39095 + }, + { + "epoch": 1.82, + "learning_rate": 1.4030536265734487e-05, + "loss": 0.0557, + "step": 39100 + }, + { + "epoch": 1.82, + "learning_rate": 1.40297524806797e-05, + "loss": 0.136, + "step": 39105 + }, + { + "epoch": 1.82, + "learning_rate": 1.4028968695624913e-05, + "loss": 0.1002, + "step": 39110 + }, + { + "epoch": 1.83, + "learning_rate": 1.4028184910570125e-05, + "loss": 0.1967, + "step": 39115 + }, + { + "epoch": 1.83, + "learning_rate": 1.4027401125515341e-05, + "loss": 0.1752, + "step": 39120 + }, + { + "epoch": 1.83, + "learning_rate": 1.4026617340460553e-05, + "loss": 0.1226, + "step": 39125 + }, + { + "epoch": 1.83, + "learning_rate": 1.4025833555405766e-05, + "loss": 0.2543, + "step": 39130 + }, + { + "epoch": 1.83, + "learning_rate": 1.402504977035098e-05, + "loss": 0.3523, + "step": 39135 + }, + { + "epoch": 1.83, + "learning_rate": 1.4024265985296193e-05, + "loss": 0.086, + "step": 39140 + }, + { + "epoch": 1.83, + "learning_rate": 1.4023482200241407e-05, + "loss": 0.1465, + "step": 39145 + }, + { + "epoch": 1.83, + "learning_rate": 1.402269841518662e-05, + "loss": 0.1037, + "step": 39150 + }, + { + "epoch": 1.83, + "learning_rate": 1.4021914630131835e-05, + "loss": 0.0747, + "step": 39155 + }, + { + "epoch": 1.83, + "learning_rate": 1.4021130845077047e-05, + "loss": 0.1403, + "step": 39160 + }, + { + "epoch": 1.83, + "learning_rate": 1.4020347060022261e-05, + "loss": 0.1251, + "step": 39165 + }, + { + "epoch": 1.83, + "learning_rate": 1.4019563274967473e-05, + "loss": 0.1805, + "step": 39170 + }, + { + "epoch": 1.83, + "learning_rate": 1.4018779489912689e-05, + "loss": 0.1899, + "step": 39175 + }, + { + "epoch": 1.83, + "learning_rate": 1.4017995704857901e-05, + "loss": 0.3973, + "step": 39180 + }, + { + "epoch": 1.83, + "learning_rate": 1.4017368676814072e-05, + "loss": 0.2354, + "step": 39185 + }, + { + "epoch": 1.83, + "learning_rate": 1.4016584891759286e-05, + "loss": 0.0266, + "step": 39190 + }, + { + "epoch": 1.83, + "learning_rate": 1.4015801106704498e-05, + "loss": 0.0359, + "step": 39195 + }, + { + "epoch": 1.83, + "learning_rate": 1.4015017321649712e-05, + "loss": 0.0703, + "step": 39200 + }, + { + "epoch": 1.83, + "learning_rate": 1.4014233536594926e-05, + "loss": 0.1166, + "step": 39205 + }, + { + "epoch": 1.83, + "learning_rate": 1.4013449751540138e-05, + "loss": 0.1409, + "step": 39210 + }, + { + "epoch": 1.83, + "learning_rate": 1.4012665966485353e-05, + "loss": 0.129, + "step": 39215 + }, + { + "epoch": 1.83, + "learning_rate": 1.4011882181430566e-05, + "loss": 0.1237, + "step": 39220 + }, + { + "epoch": 1.83, + "learning_rate": 1.401109839637578e-05, + "loss": 0.1638, + "step": 39225 + }, + { + "epoch": 1.83, + "learning_rate": 1.4010314611320992e-05, + "loss": 0.2936, + "step": 39230 + }, + { + "epoch": 1.83, + "learning_rate": 1.4009530826266207e-05, + "loss": 0.2029, + "step": 39235 + }, + { + "epoch": 1.83, + "learning_rate": 1.400874704121142e-05, + "loss": 0.0323, + "step": 39240 + }, + { + "epoch": 1.83, + "learning_rate": 1.4007963256156633e-05, + "loss": 0.1204, + "step": 39245 + }, + { + "epoch": 1.83, + "learning_rate": 1.4007179471101846e-05, + "loss": 0.0603, + "step": 39250 + }, + { + "epoch": 1.83, + "learning_rate": 1.4006395686047061e-05, + "loss": 0.1161, + "step": 39255 + }, + { + "epoch": 1.83, + "learning_rate": 1.4005611900992273e-05, + "loss": 0.1051, + "step": 39260 + }, + { + "epoch": 1.83, + "learning_rate": 1.4004828115937486e-05, + "loss": 0.1097, + "step": 39265 + }, + { + "epoch": 1.83, + "learning_rate": 1.40040443308827e-05, + "loss": 0.1569, + "step": 39270 + }, + { + "epoch": 1.83, + "learning_rate": 1.4003260545827912e-05, + "loss": 0.2331, + "step": 39275 + }, + { + "epoch": 1.83, + "learning_rate": 1.4002476760773127e-05, + "loss": 0.4034, + "step": 39280 + }, + { + "epoch": 1.83, + "learning_rate": 1.400169297571834e-05, + "loss": 0.2077, + "step": 39285 + }, + { + "epoch": 1.83, + "learning_rate": 1.4000909190663553e-05, + "loss": 0.0452, + "step": 39290 + }, + { + "epoch": 1.83, + "learning_rate": 1.4000125405608766e-05, + "loss": 0.0357, + "step": 39295 + }, + { + "epoch": 1.83, + "learning_rate": 1.3999341620553981e-05, + "loss": 0.0787, + "step": 39300 + }, + { + "epoch": 1.83, + "learning_rate": 1.3998557835499194e-05, + "loss": 0.1583, + "step": 39305 + }, + { + "epoch": 1.83, + "learning_rate": 1.3997774050444407e-05, + "loss": 0.1058, + "step": 39310 + }, + { + "epoch": 1.83, + "learning_rate": 1.3996990265389621e-05, + "loss": 0.0733, + "step": 39315 + }, + { + "epoch": 1.83, + "learning_rate": 1.3996206480334835e-05, + "loss": 0.1355, + "step": 39320 + }, + { + "epoch": 1.83, + "learning_rate": 1.3995422695280047e-05, + "loss": 0.1527, + "step": 39325 + }, + { + "epoch": 1.84, + "learning_rate": 1.399463891022526e-05, + "loss": 0.2326, + "step": 39330 + }, + { + "epoch": 1.84, + "learning_rate": 1.3993855125170475e-05, + "loss": 0.2507, + "step": 39335 + }, + { + "epoch": 1.84, + "learning_rate": 1.3993071340115687e-05, + "loss": 0.0268, + "step": 39340 + }, + { + "epoch": 1.84, + "learning_rate": 1.3992287555060901e-05, + "loss": 0.0515, + "step": 39345 + }, + { + "epoch": 1.84, + "learning_rate": 1.3991503770006114e-05, + "loss": 0.0863, + "step": 39350 + }, + { + "epoch": 1.84, + "learning_rate": 1.399071998495133e-05, + "loss": 0.1121, + "step": 39355 + }, + { + "epoch": 1.84, + "learning_rate": 1.3989936199896541e-05, + "loss": 0.1218, + "step": 39360 + }, + { + "epoch": 1.84, + "learning_rate": 1.3989152414841755e-05, + "loss": 0.0561, + "step": 39365 + }, + { + "epoch": 1.84, + "learning_rate": 1.3988368629786968e-05, + "loss": 0.1979, + "step": 39370 + }, + { + "epoch": 1.84, + "learning_rate": 1.3987584844732183e-05, + "loss": 0.3349, + "step": 39375 + }, + { + "epoch": 1.84, + "learning_rate": 1.3986801059677395e-05, + "loss": 0.2712, + "step": 39380 + }, + { + "epoch": 1.84, + "learning_rate": 1.398601727462261e-05, + "loss": 0.21, + "step": 39385 + }, + { + "epoch": 1.84, + "learning_rate": 1.3985233489567821e-05, + "loss": 0.0712, + "step": 39390 + }, + { + "epoch": 1.84, + "learning_rate": 1.3984449704513035e-05, + "loss": 0.1168, + "step": 39395 + }, + { + "epoch": 1.84, + "learning_rate": 1.398366591945825e-05, + "loss": 0.0892, + "step": 39400 + }, + { + "epoch": 1.84, + "learning_rate": 1.3982882134403461e-05, + "loss": 0.1336, + "step": 39405 + }, + { + "epoch": 1.84, + "learning_rate": 1.3982098349348675e-05, + "loss": 0.172, + "step": 39410 + }, + { + "epoch": 1.84, + "learning_rate": 1.398131456429389e-05, + "loss": 0.1663, + "step": 39415 + }, + { + "epoch": 1.84, + "learning_rate": 1.3980530779239103e-05, + "loss": 0.1677, + "step": 39420 + }, + { + "epoch": 1.84, + "learning_rate": 1.3979746994184315e-05, + "loss": 0.1407, + "step": 39425 + }, + { + "epoch": 1.84, + "learning_rate": 1.3978963209129531e-05, + "loss": 0.4581, + "step": 39430 + }, + { + "epoch": 1.84, + "learning_rate": 1.3978179424074743e-05, + "loss": 0.4789, + "step": 39435 + }, + { + "epoch": 1.84, + "learning_rate": 1.3977395639019957e-05, + "loss": 0.0543, + "step": 39440 + }, + { + "epoch": 1.84, + "learning_rate": 1.397661185396517e-05, + "loss": 0.0469, + "step": 39445 + }, + { + "epoch": 1.84, + "learning_rate": 1.3975828068910385e-05, + "loss": 0.0991, + "step": 39450 + }, + { + "epoch": 1.84, + "learning_rate": 1.3975044283855597e-05, + "loss": 0.0445, + "step": 39455 + }, + { + "epoch": 1.84, + "learning_rate": 1.397426049880081e-05, + "loss": 0.1107, + "step": 39460 + }, + { + "epoch": 1.84, + "learning_rate": 1.3973476713746023e-05, + "loss": 0.0865, + "step": 39465 + }, + { + "epoch": 1.84, + "learning_rate": 1.3972692928691235e-05, + "loss": 0.1738, + "step": 39470 + }, + { + "epoch": 1.84, + "learning_rate": 1.3971909143636451e-05, + "loss": 0.3184, + "step": 39475 + }, + { + "epoch": 1.84, + "learning_rate": 1.3971125358581663e-05, + "loss": 0.2718, + "step": 39480 + }, + { + "epoch": 1.84, + "learning_rate": 1.3970341573526877e-05, + "loss": 0.3997, + "step": 39485 + }, + { + "epoch": 1.84, + "learning_rate": 1.396955778847209e-05, + "loss": 0.0962, + "step": 39490 + }, + { + "epoch": 1.84, + "learning_rate": 1.3968774003417305e-05, + "loss": 0.0429, + "step": 39495 + }, + { + "epoch": 1.84, + "learning_rate": 1.3967990218362517e-05, + "loss": 0.119, + "step": 39500 + }, + { + "epoch": 1.84, + "learning_rate": 1.3967206433307731e-05, + "loss": 0.065, + "step": 39505 + }, + { + "epoch": 1.84, + "learning_rate": 1.3966422648252943e-05, + "loss": 0.2045, + "step": 39510 + }, + { + "epoch": 1.84, + "learning_rate": 1.3965638863198159e-05, + "loss": 0.1923, + "step": 39515 + }, + { + "epoch": 1.84, + "learning_rate": 1.3964855078143371e-05, + "loss": 0.1101, + "step": 39520 + }, + { + "epoch": 1.84, + "learning_rate": 1.3964071293088583e-05, + "loss": 0.201, + "step": 39525 + }, + { + "epoch": 1.84, + "learning_rate": 1.3963287508033799e-05, + "loss": 0.2879, + "step": 39530 + }, + { + "epoch": 1.84, + "learning_rate": 1.3962503722979011e-05, + "loss": 0.1518, + "step": 39535 + }, + { + "epoch": 1.84, + "learning_rate": 1.3961719937924225e-05, + "loss": 0.0882, + "step": 39540 + }, + { + "epoch": 1.85, + "learning_rate": 1.3960936152869437e-05, + "loss": 0.0576, + "step": 39545 + }, + { + "epoch": 1.85, + "learning_rate": 1.3960152367814653e-05, + "loss": 0.0711, + "step": 39550 + }, + { + "epoch": 1.85, + "learning_rate": 1.3959368582759865e-05, + "loss": 0.1663, + "step": 39555 + }, + { + "epoch": 1.85, + "learning_rate": 1.3958584797705079e-05, + "loss": 0.1107, + "step": 39560 + }, + { + "epoch": 1.85, + "learning_rate": 1.3957801012650291e-05, + "loss": 0.1376, + "step": 39565 + }, + { + "epoch": 1.85, + "learning_rate": 1.3957017227595507e-05, + "loss": 0.2258, + "step": 39570 + }, + { + "epoch": 1.85, + "learning_rate": 1.3956233442540719e-05, + "loss": 0.1872, + "step": 39575 + }, + { + "epoch": 1.85, + "learning_rate": 1.3955449657485933e-05, + "loss": 0.3751, + "step": 39580 + }, + { + "epoch": 1.85, + "learning_rate": 1.3954665872431145e-05, + "loss": 0.3203, + "step": 39585 + }, + { + "epoch": 1.85, + "learning_rate": 1.3953882087376357e-05, + "loss": 0.0157, + "step": 39590 + }, + { + "epoch": 1.85, + "learning_rate": 1.3953098302321573e-05, + "loss": 0.0469, + "step": 39595 + }, + { + "epoch": 1.85, + "learning_rate": 1.3952314517266785e-05, + "loss": 0.0631, + "step": 39600 + }, + { + "epoch": 1.85, + "learning_rate": 1.3951530732211999e-05, + "loss": 0.0862, + "step": 39605 + }, + { + "epoch": 1.85, + "learning_rate": 1.3950746947157211e-05, + "loss": 0.1194, + "step": 39610 + }, + { + "epoch": 1.85, + "learning_rate": 1.3949963162102427e-05, + "loss": 0.1458, + "step": 39615 + }, + { + "epoch": 1.85, + "learning_rate": 1.3949179377047639e-05, + "loss": 0.2322, + "step": 39620 + }, + { + "epoch": 1.85, + "learning_rate": 1.3948395591992853e-05, + "loss": 0.363, + "step": 39625 + }, + { + "epoch": 1.85, + "learning_rate": 1.3947611806938067e-05, + "loss": 0.2981, + "step": 39630 + }, + { + "epoch": 1.85, + "learning_rate": 1.394682802188328e-05, + "loss": 0.2648, + "step": 39635 + }, + { + "epoch": 1.85, + "learning_rate": 1.3946044236828493e-05, + "loss": 0.0346, + "step": 39640 + }, + { + "epoch": 1.85, + "learning_rate": 1.3945260451773709e-05, + "loss": 0.0576, + "step": 39645 + }, + { + "epoch": 1.85, + "learning_rate": 1.394447666671892e-05, + "loss": 0.1158, + "step": 39650 + }, + { + "epoch": 1.85, + "learning_rate": 1.3943692881664133e-05, + "loss": 0.0324, + "step": 39655 + }, + { + "epoch": 1.85, + "learning_rate": 1.3942909096609347e-05, + "loss": 0.1207, + "step": 39660 + }, + { + "epoch": 1.85, + "learning_rate": 1.3942125311554559e-05, + "loss": 0.1835, + "step": 39665 + }, + { + "epoch": 1.85, + "learning_rate": 1.3941341526499775e-05, + "loss": 0.1337, + "step": 39670 + }, + { + "epoch": 1.85, + "learning_rate": 1.3940557741444987e-05, + "loss": 0.1672, + "step": 39675 + }, + { + "epoch": 1.85, + "learning_rate": 1.3939773956390201e-05, + "loss": 0.2736, + "step": 39680 + }, + { + "epoch": 1.85, + "learning_rate": 1.3938990171335413e-05, + "loss": 0.2847, + "step": 39685 + }, + { + "epoch": 1.85, + "learning_rate": 1.3938206386280629e-05, + "loss": 0.044, + "step": 39690 + }, + { + "epoch": 1.85, + "learning_rate": 1.3937422601225841e-05, + "loss": 0.0497, + "step": 39695 + }, + { + "epoch": 1.85, + "learning_rate": 1.3936638816171055e-05, + "loss": 0.1422, + "step": 39700 + }, + { + "epoch": 1.85, + "learning_rate": 1.3935855031116267e-05, + "loss": 0.0669, + "step": 39705 + }, + { + "epoch": 1.85, + "learning_rate": 1.3935071246061483e-05, + "loss": 0.0804, + "step": 39710 + }, + { + "epoch": 1.85, + "learning_rate": 1.3934287461006695e-05, + "loss": 0.0735, + "step": 39715 + }, + { + "epoch": 1.85, + "learning_rate": 1.3933503675951907e-05, + "loss": 0.1105, + "step": 39720 + }, + { + "epoch": 1.85, + "learning_rate": 1.3932719890897121e-05, + "loss": 0.1518, + "step": 39725 + }, + { + "epoch": 1.85, + "learning_rate": 1.3931936105842335e-05, + "loss": 0.3293, + "step": 39730 + }, + { + "epoch": 1.85, + "learning_rate": 1.3931152320787549e-05, + "loss": 0.2068, + "step": 39735 + }, + { + "epoch": 1.85, + "learning_rate": 1.3930368535732761e-05, + "loss": 0.0597, + "step": 39740 + }, + { + "epoch": 1.85, + "learning_rate": 1.3929584750677977e-05, + "loss": 0.0848, + "step": 39745 + }, + { + "epoch": 1.85, + "learning_rate": 1.3928800965623189e-05, + "loss": 0.0794, + "step": 39750 + }, + { + "epoch": 1.86, + "learning_rate": 1.3928017180568403e-05, + "loss": 0.0735, + "step": 39755 + }, + { + "epoch": 1.86, + "learning_rate": 1.3927233395513615e-05, + "loss": 0.1662, + "step": 39760 + }, + { + "epoch": 1.86, + "learning_rate": 1.392644961045883e-05, + "loss": 0.1268, + "step": 39765 + }, + { + "epoch": 1.86, + "learning_rate": 1.3925665825404043e-05, + "loss": 0.1259, + "step": 39770 + }, + { + "epoch": 1.86, + "learning_rate": 1.3924882040349257e-05, + "loss": 0.2228, + "step": 39775 + }, + { + "epoch": 1.86, + "learning_rate": 1.3924098255294469e-05, + "loss": 0.2452, + "step": 39780 + }, + { + "epoch": 1.86, + "learning_rate": 1.3923314470239681e-05, + "loss": 0.3537, + "step": 39785 + }, + { + "epoch": 1.86, + "learning_rate": 1.3922530685184897e-05, + "loss": 0.0579, + "step": 39790 + }, + { + "epoch": 1.86, + "learning_rate": 1.3921746900130109e-05, + "loss": 0.0819, + "step": 39795 + }, + { + "epoch": 1.86, + "learning_rate": 1.3920963115075323e-05, + "loss": 0.117, + "step": 39800 + }, + { + "epoch": 1.86, + "learning_rate": 1.3920179330020535e-05, + "loss": 0.0466, + "step": 39805 + }, + { + "epoch": 1.86, + "learning_rate": 1.391939554496575e-05, + "loss": 0.1153, + "step": 39810 + }, + { + "epoch": 1.86, + "learning_rate": 1.3918611759910963e-05, + "loss": 0.1227, + "step": 39815 + }, + { + "epoch": 1.86, + "learning_rate": 1.3917827974856177e-05, + "loss": 0.1416, + "step": 39820 + }, + { + "epoch": 1.86, + "learning_rate": 1.3917044189801389e-05, + "loss": 0.1812, + "step": 39825 + }, + { + "epoch": 1.86, + "learning_rate": 1.3916260404746604e-05, + "loss": 0.3345, + "step": 39830 + }, + { + "epoch": 1.86, + "learning_rate": 1.3915476619691817e-05, + "loss": 0.359, + "step": 39835 + }, + { + "epoch": 1.86, + "learning_rate": 1.391469283463703e-05, + "loss": 0.019, + "step": 39840 + }, + { + "epoch": 1.86, + "learning_rate": 1.3913909049582245e-05, + "loss": 0.037, + "step": 39845 + }, + { + "epoch": 1.86, + "learning_rate": 1.3913125264527457e-05, + "loss": 0.0897, + "step": 39850 + }, + { + "epoch": 1.86, + "learning_rate": 1.391234147947267e-05, + "loss": 0.0943, + "step": 39855 + }, + { + "epoch": 1.86, + "learning_rate": 1.3911557694417883e-05, + "loss": 0.1399, + "step": 39860 + }, + { + "epoch": 1.86, + "learning_rate": 1.3910773909363098e-05, + "loss": 0.1403, + "step": 39865 + }, + { + "epoch": 1.86, + "learning_rate": 1.390999012430831e-05, + "loss": 0.2049, + "step": 39870 + }, + { + "epoch": 1.86, + "learning_rate": 1.3909206339253525e-05, + "loss": 0.2028, + "step": 39875 + }, + { + "epoch": 1.86, + "learning_rate": 1.3908422554198737e-05, + "loss": 0.281, + "step": 39880 + }, + { + "epoch": 1.86, + "learning_rate": 1.3907638769143952e-05, + "loss": 0.2856, + "step": 39885 + }, + { + "epoch": 1.86, + "learning_rate": 1.3906854984089165e-05, + "loss": 0.0383, + "step": 39890 + }, + { + "epoch": 1.86, + "learning_rate": 1.3906071199034378e-05, + "loss": 0.0449, + "step": 39895 + }, + { + "epoch": 1.86, + "learning_rate": 1.390528741397959e-05, + "loss": 0.0715, + "step": 39900 + }, + { + "epoch": 1.86, + "learning_rate": 1.3904503628924806e-05, + "loss": 0.0983, + "step": 39905 + }, + { + "epoch": 1.86, + "learning_rate": 1.3903719843870019e-05, + "loss": 0.1431, + "step": 39910 + }, + { + "epoch": 1.86, + "learning_rate": 1.390293605881523e-05, + "loss": 0.1682, + "step": 39915 + }, + { + "epoch": 1.86, + "learning_rate": 1.3902152273760445e-05, + "loss": 0.1312, + "step": 39920 + }, + { + "epoch": 1.86, + "learning_rate": 1.3901368488705659e-05, + "loss": 0.1199, + "step": 39925 + }, + { + "epoch": 1.86, + "learning_rate": 1.3900584703650872e-05, + "loss": 0.1829, + "step": 39930 + }, + { + "epoch": 1.86, + "learning_rate": 1.3899800918596085e-05, + "loss": 0.2109, + "step": 39935 + }, + { + "epoch": 1.86, + "learning_rate": 1.3899017133541299e-05, + "loss": 0.0469, + "step": 39940 + }, + { + "epoch": 1.86, + "learning_rate": 1.3898233348486512e-05, + "loss": 0.056, + "step": 39945 + }, + { + "epoch": 1.86, + "learning_rate": 1.3897449563431726e-05, + "loss": 0.0796, + "step": 39950 + }, + { + "epoch": 1.86, + "learning_rate": 1.3896665778376939e-05, + "loss": 0.0767, + "step": 39955 + }, + { + "epoch": 1.86, + "learning_rate": 1.3895881993322154e-05, + "loss": 0.0807, + "step": 39960 + }, + { + "epoch": 1.86, + "learning_rate": 1.3895098208267366e-05, + "loss": 0.1694, + "step": 39965 + }, + { + "epoch": 1.87, + "learning_rate": 1.389431442321258e-05, + "loss": 0.1686, + "step": 39970 + }, + { + "epoch": 1.87, + "learning_rate": 1.3893530638157793e-05, + "loss": 0.1735, + "step": 39975 + }, + { + "epoch": 1.87, + "learning_rate": 1.3892746853103005e-05, + "loss": 0.3052, + "step": 39980 + }, + { + "epoch": 1.87, + "learning_rate": 1.389196306804822e-05, + "loss": 0.315, + "step": 39985 + }, + { + "epoch": 1.87, + "learning_rate": 1.3891179282993433e-05, + "loss": 0.0689, + "step": 39990 + }, + { + "epoch": 1.87, + "learning_rate": 1.3890395497938646e-05, + "loss": 0.0481, + "step": 39995 + }, + { + "epoch": 1.87, + "learning_rate": 1.3889611712883859e-05, + "loss": 0.0813, + "step": 40000 + }, + { + "epoch": 1.87, + "learning_rate": 1.3888827927829074e-05, + "loss": 0.0883, + "step": 40005 + }, + { + "epoch": 1.87, + "learning_rate": 1.3888044142774286e-05, + "loss": 0.0955, + "step": 40010 + }, + { + "epoch": 1.87, + "learning_rate": 1.38872603577195e-05, + "loss": 0.1675, + "step": 40015 + }, + { + "epoch": 1.87, + "learning_rate": 1.3886476572664713e-05, + "loss": 0.1361, + "step": 40020 + }, + { + "epoch": 1.87, + "learning_rate": 1.3885692787609928e-05, + "loss": 0.1712, + "step": 40025 + }, + { + "epoch": 1.87, + "learning_rate": 1.388490900255514e-05, + "loss": 0.3516, + "step": 40030 + }, + { + "epoch": 1.87, + "learning_rate": 1.3884125217500354e-05, + "loss": 0.233, + "step": 40035 + }, + { + "epoch": 1.87, + "learning_rate": 1.3883341432445567e-05, + "loss": 0.0227, + "step": 40040 + }, + { + "epoch": 1.87, + "learning_rate": 1.388255764739078e-05, + "loss": 0.0551, + "step": 40045 + }, + { + "epoch": 1.87, + "learning_rate": 1.3881773862335994e-05, + "loss": 0.0955, + "step": 40050 + }, + { + "epoch": 1.87, + "learning_rate": 1.3880990077281207e-05, + "loss": 0.1336, + "step": 40055 + }, + { + "epoch": 1.87, + "learning_rate": 1.3880206292226422e-05, + "loss": 0.0766, + "step": 40060 + }, + { + "epoch": 1.87, + "learning_rate": 1.3879422507171634e-05, + "loss": 0.0847, + "step": 40065 + }, + { + "epoch": 1.87, + "learning_rate": 1.3878638722116848e-05, + "loss": 0.143, + "step": 40070 + }, + { + "epoch": 1.87, + "learning_rate": 1.387785493706206e-05, + "loss": 0.1564, + "step": 40075 + }, + { + "epoch": 1.87, + "learning_rate": 1.3877071152007276e-05, + "loss": 0.3278, + "step": 40080 + }, + { + "epoch": 1.87, + "learning_rate": 1.3876287366952488e-05, + "loss": 0.2336, + "step": 40085 + }, + { + "epoch": 1.87, + "learning_rate": 1.3875503581897702e-05, + "loss": 0.0916, + "step": 40090 + }, + { + "epoch": 1.87, + "learning_rate": 1.3874719796842914e-05, + "loss": 0.11, + "step": 40095 + }, + { + "epoch": 1.87, + "learning_rate": 1.387393601178813e-05, + "loss": 0.0472, + "step": 40100 + }, + { + "epoch": 1.87, + "learning_rate": 1.3873152226733342e-05, + "loss": 0.0719, + "step": 40105 + }, + { + "epoch": 1.87, + "learning_rate": 1.3872368441678554e-05, + "loss": 0.1545, + "step": 40110 + }, + { + "epoch": 1.87, + "learning_rate": 1.3871584656623768e-05, + "loss": 0.1102, + "step": 40115 + }, + { + "epoch": 1.87, + "learning_rate": 1.387080087156898e-05, + "loss": 0.2047, + "step": 40120 + }, + { + "epoch": 1.87, + "learning_rate": 1.3870017086514196e-05, + "loss": 0.241, + "step": 40125 + }, + { + "epoch": 1.87, + "learning_rate": 1.3869233301459408e-05, + "loss": 0.3618, + "step": 40130 + }, + { + "epoch": 1.87, + "learning_rate": 1.3868449516404622e-05, + "loss": 0.3678, + "step": 40135 + }, + { + "epoch": 1.87, + "learning_rate": 1.3867665731349834e-05, + "loss": 0.0394, + "step": 40140 + }, + { + "epoch": 1.87, + "learning_rate": 1.386688194629505e-05, + "loss": 0.0337, + "step": 40145 + }, + { + "epoch": 1.87, + "learning_rate": 1.3866098161240262e-05, + "loss": 0.0898, + "step": 40150 + }, + { + "epoch": 1.87, + "learning_rate": 1.3865314376185476e-05, + "loss": 0.074, + "step": 40155 + }, + { + "epoch": 1.87, + "learning_rate": 1.386453059113069e-05, + "loss": 0.1356, + "step": 40160 + }, + { + "epoch": 1.87, + "learning_rate": 1.3863746806075904e-05, + "loss": 0.1607, + "step": 40165 + }, + { + "epoch": 1.87, + "learning_rate": 1.3862963021021116e-05, + "loss": 0.1426, + "step": 40170 + }, + { + "epoch": 1.87, + "learning_rate": 1.3862179235966328e-05, + "loss": 0.2663, + "step": 40175 + }, + { + "epoch": 1.87, + "learning_rate": 1.3861395450911544e-05, + "loss": 0.3673, + "step": 40180 + }, + { + "epoch": 1.88, + "learning_rate": 1.3860611665856756e-05, + "loss": 0.3103, + "step": 40185 + }, + { + "epoch": 1.88, + "learning_rate": 1.385982788080197e-05, + "loss": 0.0458, + "step": 40190 + }, + { + "epoch": 1.88, + "learning_rate": 1.3859044095747182e-05, + "loss": 0.0491, + "step": 40195 + }, + { + "epoch": 1.88, + "learning_rate": 1.3858260310692398e-05, + "loss": 0.0837, + "step": 40200 + }, + { + "epoch": 1.88, + "learning_rate": 1.385747652563761e-05, + "loss": 0.1077, + "step": 40205 + }, + { + "epoch": 1.88, + "learning_rate": 1.3856692740582824e-05, + "loss": 0.1128, + "step": 40210 + }, + { + "epoch": 1.88, + "learning_rate": 1.3855908955528036e-05, + "loss": 0.2105, + "step": 40215 + }, + { + "epoch": 1.88, + "learning_rate": 1.3855125170473252e-05, + "loss": 0.169, + "step": 40220 + }, + { + "epoch": 1.88, + "learning_rate": 1.3854341385418464e-05, + "loss": 0.1373, + "step": 40225 + }, + { + "epoch": 1.88, + "learning_rate": 1.3853557600363678e-05, + "loss": 0.2803, + "step": 40230 + }, + { + "epoch": 1.88, + "learning_rate": 1.385277381530889e-05, + "loss": 0.2458, + "step": 40235 + }, + { + "epoch": 1.88, + "learning_rate": 1.3851990030254104e-05, + "loss": 0.067, + "step": 40240 + }, + { + "epoch": 1.88, + "learning_rate": 1.3851206245199318e-05, + "loss": 0.0656, + "step": 40245 + }, + { + "epoch": 1.88, + "learning_rate": 1.385042246014453e-05, + "loss": 0.0975, + "step": 40250 + }, + { + "epoch": 1.88, + "learning_rate": 1.3849638675089744e-05, + "loss": 0.0711, + "step": 40255 + }, + { + "epoch": 1.88, + "learning_rate": 1.3848854890034958e-05, + "loss": 0.1479, + "step": 40260 + }, + { + "epoch": 1.88, + "learning_rate": 1.3848071104980172e-05, + "loss": 0.0855, + "step": 40265 + }, + { + "epoch": 1.88, + "learning_rate": 1.3847287319925384e-05, + "loss": 0.2257, + "step": 40270 + }, + { + "epoch": 1.88, + "learning_rate": 1.38465035348706e-05, + "loss": 0.2097, + "step": 40275 + }, + { + "epoch": 1.88, + "learning_rate": 1.3845719749815812e-05, + "loss": 0.4084, + "step": 40280 + }, + { + "epoch": 1.88, + "learning_rate": 1.3844935964761026e-05, + "loss": 0.2848, + "step": 40285 + }, + { + "epoch": 1.88, + "learning_rate": 1.3844152179706238e-05, + "loss": 0.104, + "step": 40290 + }, + { + "epoch": 1.88, + "learning_rate": 1.3843368394651454e-05, + "loss": 0.0429, + "step": 40295 + }, + { + "epoch": 1.88, + "learning_rate": 1.3842584609596666e-05, + "loss": 0.086, + "step": 40300 + }, + { + "epoch": 1.88, + "learning_rate": 1.3841800824541878e-05, + "loss": 0.0811, + "step": 40305 + }, + { + "epoch": 1.88, + "learning_rate": 1.3841017039487092e-05, + "loss": 0.058, + "step": 40310 + }, + { + "epoch": 1.88, + "learning_rate": 1.3840233254432304e-05, + "loss": 0.1156, + "step": 40315 + }, + { + "epoch": 1.88, + "learning_rate": 1.383944946937752e-05, + "loss": 0.2159, + "step": 40320 + }, + { + "epoch": 1.88, + "learning_rate": 1.3838665684322732e-05, + "loss": 0.2478, + "step": 40325 + }, + { + "epoch": 1.88, + "learning_rate": 1.3837881899267946e-05, + "loss": 0.2005, + "step": 40330 + }, + { + "epoch": 1.88, + "learning_rate": 1.3837098114213158e-05, + "loss": 0.3357, + "step": 40335 + }, + { + "epoch": 1.88, + "learning_rate": 1.3836314329158374e-05, + "loss": 0.0394, + "step": 40340 + }, + { + "epoch": 1.88, + "learning_rate": 1.3835530544103586e-05, + "loss": 0.0648, + "step": 40345 + }, + { + "epoch": 1.88, + "learning_rate": 1.38347467590488e-05, + "loss": 0.0823, + "step": 40350 + }, + { + "epoch": 1.88, + "learning_rate": 1.3833962973994012e-05, + "loss": 0.0483, + "step": 40355 + }, + { + "epoch": 1.88, + "learning_rate": 1.3833179188939228e-05, + "loss": 0.193, + "step": 40360 + }, + { + "epoch": 1.88, + "learning_rate": 1.383239540388444e-05, + "loss": 0.1007, + "step": 40365 + }, + { + "epoch": 1.88, + "learning_rate": 1.3831611618829652e-05, + "loss": 0.1092, + "step": 40370 + }, + { + "epoch": 1.88, + "learning_rate": 1.3830827833774868e-05, + "loss": 0.2738, + "step": 40375 + }, + { + "epoch": 1.88, + "learning_rate": 1.383004404872008e-05, + "loss": 0.2871, + "step": 40380 + }, + { + "epoch": 1.88, + "learning_rate": 1.3829260263665294e-05, + "loss": 0.2501, + "step": 40385 + }, + { + "epoch": 1.88, + "learning_rate": 1.3828476478610506e-05, + "loss": 0.0473, + "step": 40390 + }, + { + "epoch": 1.88, + "learning_rate": 1.3827692693555722e-05, + "loss": 0.0597, + "step": 40395 + }, + { + "epoch": 1.89, + "learning_rate": 1.3826908908500934e-05, + "loss": 0.0646, + "step": 40400 + }, + { + "epoch": 1.89, + "learning_rate": 1.3826125123446148e-05, + "loss": 0.1314, + "step": 40405 + }, + { + "epoch": 1.89, + "learning_rate": 1.382534133839136e-05, + "loss": 0.1628, + "step": 40410 + }, + { + "epoch": 1.89, + "learning_rate": 1.3824557553336576e-05, + "loss": 0.1858, + "step": 40415 + }, + { + "epoch": 1.89, + "learning_rate": 1.3823773768281788e-05, + "loss": 0.2056, + "step": 40420 + }, + { + "epoch": 1.89, + "learning_rate": 1.3822989983227002e-05, + "loss": 0.1649, + "step": 40425 + }, + { + "epoch": 1.89, + "learning_rate": 1.3822206198172214e-05, + "loss": 0.1464, + "step": 40430 + }, + { + "epoch": 1.89, + "learning_rate": 1.3821422413117426e-05, + "loss": 0.3024, + "step": 40435 + }, + { + "epoch": 1.89, + "learning_rate": 1.3820638628062642e-05, + "loss": 0.0702, + "step": 40440 + }, + { + "epoch": 1.89, + "learning_rate": 1.3819854843007854e-05, + "loss": 0.0468, + "step": 40445 + }, + { + "epoch": 1.89, + "learning_rate": 1.3819071057953068e-05, + "loss": 0.0472, + "step": 40450 + }, + { + "epoch": 1.89, + "learning_rate": 1.381828727289828e-05, + "loss": 0.0957, + "step": 40455 + }, + { + "epoch": 1.89, + "learning_rate": 1.3817503487843496e-05, + "loss": 0.1398, + "step": 40460 + }, + { + "epoch": 1.89, + "learning_rate": 1.3816719702788708e-05, + "loss": 0.109, + "step": 40465 + }, + { + "epoch": 1.89, + "learning_rate": 1.3815935917733922e-05, + "loss": 0.2193, + "step": 40470 + }, + { + "epoch": 1.89, + "learning_rate": 1.3815152132679136e-05, + "loss": 0.2995, + "step": 40475 + }, + { + "epoch": 1.89, + "learning_rate": 1.381436834762435e-05, + "loss": 0.336, + "step": 40480 + }, + { + "epoch": 1.89, + "learning_rate": 1.3813584562569562e-05, + "loss": 0.2809, + "step": 40485 + }, + { + "epoch": 1.89, + "learning_rate": 1.3812800777514777e-05, + "loss": 0.0376, + "step": 40490 + }, + { + "epoch": 1.89, + "learning_rate": 1.381201699245999e-05, + "loss": 0.0265, + "step": 40495 + }, + { + "epoch": 1.89, + "learning_rate": 1.3811233207405202e-05, + "loss": 0.0517, + "step": 40500 + }, + { + "epoch": 1.89, + "learning_rate": 1.3810449422350416e-05, + "loss": 0.0746, + "step": 40505 + }, + { + "epoch": 1.89, + "learning_rate": 1.3809665637295628e-05, + "loss": 0.0954, + "step": 40510 + }, + { + "epoch": 1.89, + "learning_rate": 1.3808881852240844e-05, + "loss": 0.1229, + "step": 40515 + }, + { + "epoch": 1.89, + "learning_rate": 1.3808098067186056e-05, + "loss": 0.1508, + "step": 40520 + }, + { + "epoch": 1.89, + "learning_rate": 1.380731428213127e-05, + "loss": 0.2266, + "step": 40525 + }, + { + "epoch": 1.89, + "learning_rate": 1.3806530497076482e-05, + "loss": 0.3108, + "step": 40530 + }, + { + "epoch": 1.89, + "learning_rate": 1.3805746712021697e-05, + "loss": 0.2541, + "step": 40535 + }, + { + "epoch": 1.89, + "learning_rate": 1.380496292696691e-05, + "loss": 0.0478, + "step": 40540 + }, + { + "epoch": 1.89, + "learning_rate": 1.3804179141912124e-05, + "loss": 0.0496, + "step": 40545 + }, + { + "epoch": 1.89, + "learning_rate": 1.3803395356857336e-05, + "loss": 0.0953, + "step": 40550 + }, + { + "epoch": 1.89, + "learning_rate": 1.3802611571802551e-05, + "loss": 0.1024, + "step": 40555 + }, + { + "epoch": 1.89, + "learning_rate": 1.3801827786747764e-05, + "loss": 0.0996, + "step": 40560 + }, + { + "epoch": 1.89, + "learning_rate": 1.3801044001692976e-05, + "loss": 0.1564, + "step": 40565 + }, + { + "epoch": 1.89, + "learning_rate": 1.380026021663819e-05, + "loss": 0.2166, + "step": 40570 + }, + { + "epoch": 1.89, + "learning_rate": 1.3799476431583404e-05, + "loss": 0.226, + "step": 40575 + }, + { + "epoch": 1.89, + "learning_rate": 1.3798692646528618e-05, + "loss": 0.219, + "step": 40580 + }, + { + "epoch": 1.89, + "learning_rate": 1.379790886147383e-05, + "loss": 0.1555, + "step": 40585 + }, + { + "epoch": 1.89, + "learning_rate": 1.3797125076419045e-05, + "loss": 0.0567, + "step": 40590 + }, + { + "epoch": 1.89, + "learning_rate": 1.3796341291364258e-05, + "loss": 0.0816, + "step": 40595 + }, + { + "epoch": 1.89, + "learning_rate": 1.3795557506309471e-05, + "loss": 0.0672, + "step": 40600 + }, + { + "epoch": 1.89, + "learning_rate": 1.3794773721254684e-05, + "loss": 0.0293, + "step": 40605 + }, + { + "epoch": 1.89, + "learning_rate": 1.37939899361999e-05, + "loss": 0.1691, + "step": 40610 + }, + { + "epoch": 1.9, + "learning_rate": 1.3793206151145111e-05, + "loss": 0.1475, + "step": 40615 + }, + { + "epoch": 1.9, + "learning_rate": 1.3792422366090325e-05, + "loss": 0.1439, + "step": 40620 + }, + { + "epoch": 1.9, + "learning_rate": 1.3791638581035538e-05, + "loss": 0.2063, + "step": 40625 + }, + { + "epoch": 1.9, + "learning_rate": 1.379085479598075e-05, + "loss": 0.2501, + "step": 40630 + }, + { + "epoch": 1.9, + "learning_rate": 1.3790071010925965e-05, + "loss": 0.24, + "step": 40635 + }, + { + "epoch": 1.9, + "learning_rate": 1.3789287225871178e-05, + "loss": 0.0474, + "step": 40640 + }, + { + "epoch": 1.9, + "learning_rate": 1.3788503440816392e-05, + "loss": 0.0137, + "step": 40645 + }, + { + "epoch": 1.9, + "learning_rate": 1.3787719655761604e-05, + "loss": 0.0644, + "step": 40650 + }, + { + "epoch": 1.9, + "learning_rate": 1.378693587070682e-05, + "loss": 0.0847, + "step": 40655 + }, + { + "epoch": 1.9, + "learning_rate": 1.3786152085652032e-05, + "loss": 0.136, + "step": 40660 + }, + { + "epoch": 1.9, + "learning_rate": 1.3785368300597245e-05, + "loss": 0.1225, + "step": 40665 + }, + { + "epoch": 1.9, + "learning_rate": 1.3784584515542458e-05, + "loss": 0.1019, + "step": 40670 + }, + { + "epoch": 1.9, + "learning_rate": 1.3783800730487673e-05, + "loss": 0.2475, + "step": 40675 + }, + { + "epoch": 1.9, + "learning_rate": 1.3783016945432885e-05, + "loss": 0.2234, + "step": 40680 + }, + { + "epoch": 1.9, + "learning_rate": 1.37822331603781e-05, + "loss": 0.2204, + "step": 40685 + }, + { + "epoch": 1.9, + "learning_rate": 1.3781449375323313e-05, + "loss": 0.0354, + "step": 40690 + }, + { + "epoch": 1.9, + "learning_rate": 1.3780665590268525e-05, + "loss": 0.0904, + "step": 40695 + }, + { + "epoch": 1.9, + "learning_rate": 1.377988180521374e-05, + "loss": 0.0707, + "step": 40700 + }, + { + "epoch": 1.9, + "learning_rate": 1.3779098020158952e-05, + "loss": 0.0884, + "step": 40705 + }, + { + "epoch": 1.9, + "learning_rate": 1.3778314235104167e-05, + "loss": 0.1006, + "step": 40710 + }, + { + "epoch": 1.9, + "learning_rate": 1.377753045004938e-05, + "loss": 0.0914, + "step": 40715 + }, + { + "epoch": 1.9, + "learning_rate": 1.3776746664994593e-05, + "loss": 0.2008, + "step": 40720 + }, + { + "epoch": 1.9, + "learning_rate": 1.3775962879939806e-05, + "loss": 0.1749, + "step": 40725 + }, + { + "epoch": 1.9, + "learning_rate": 1.3775179094885021e-05, + "loss": 0.3868, + "step": 40730 + }, + { + "epoch": 1.9, + "learning_rate": 1.3774395309830233e-05, + "loss": 0.3199, + "step": 40735 + }, + { + "epoch": 1.9, + "learning_rate": 1.3773611524775447e-05, + "loss": 0.065, + "step": 40740 + }, + { + "epoch": 1.9, + "learning_rate": 1.377282773972066e-05, + "loss": 0.071, + "step": 40745 + }, + { + "epoch": 1.9, + "learning_rate": 1.3772043954665875e-05, + "loss": 0.0642, + "step": 40750 + }, + { + "epoch": 1.9, + "learning_rate": 1.3771260169611087e-05, + "loss": 0.1533, + "step": 40755 + }, + { + "epoch": 1.9, + "learning_rate": 1.37704763845563e-05, + "loss": 0.0883, + "step": 40760 + }, + { + "epoch": 1.9, + "learning_rate": 1.3769692599501513e-05, + "loss": 0.2085, + "step": 40765 + }, + { + "epoch": 1.9, + "learning_rate": 1.3768908814446726e-05, + "loss": 0.1509, + "step": 40770 + }, + { + "epoch": 1.9, + "learning_rate": 1.3768125029391941e-05, + "loss": 0.1984, + "step": 40775 + }, + { + "epoch": 1.9, + "learning_rate": 1.3767341244337153e-05, + "loss": 0.4016, + "step": 40780 + }, + { + "epoch": 1.9, + "learning_rate": 1.3766557459282367e-05, + "loss": 0.2388, + "step": 40785 + }, + { + "epoch": 1.9, + "learning_rate": 1.3765773674227581e-05, + "loss": 0.0885, + "step": 40790 + }, + { + "epoch": 1.9, + "learning_rate": 1.3764989889172795e-05, + "loss": 0.0829, + "step": 40795 + }, + { + "epoch": 1.9, + "learning_rate": 1.3764206104118007e-05, + "loss": 0.0609, + "step": 40800 + }, + { + "epoch": 1.9, + "learning_rate": 1.3763422319063223e-05, + "loss": 0.0371, + "step": 40805 + }, + { + "epoch": 1.9, + "learning_rate": 1.3762638534008435e-05, + "loss": 0.0782, + "step": 40810 + }, + { + "epoch": 1.9, + "learning_rate": 1.3761854748953649e-05, + "loss": 0.1198, + "step": 40815 + }, + { + "epoch": 1.9, + "learning_rate": 1.3761070963898861e-05, + "loss": 0.1064, + "step": 40820 + }, + { + "epoch": 1.9, + "learning_rate": 1.3760287178844073e-05, + "loss": 0.2126, + "step": 40825 + }, + { + "epoch": 1.91, + "learning_rate": 1.3759503393789289e-05, + "loss": 0.2644, + "step": 40830 + }, + { + "epoch": 1.91, + "learning_rate": 1.3758719608734501e-05, + "loss": 0.2017, + "step": 40835 + }, + { + "epoch": 1.91, + "learning_rate": 1.3757935823679715e-05, + "loss": 0.0589, + "step": 40840 + }, + { + "epoch": 1.91, + "learning_rate": 1.3757152038624927e-05, + "loss": 0.0665, + "step": 40845 + }, + { + "epoch": 1.91, + "learning_rate": 1.3756368253570143e-05, + "loss": 0.0637, + "step": 40850 + }, + { + "epoch": 1.91, + "learning_rate": 1.3755584468515355e-05, + "loss": 0.1055, + "step": 40855 + }, + { + "epoch": 1.91, + "learning_rate": 1.3754800683460569e-05, + "loss": 0.0967, + "step": 40860 + }, + { + "epoch": 1.91, + "learning_rate": 1.3754016898405781e-05, + "loss": 0.1057, + "step": 40865 + }, + { + "epoch": 1.91, + "learning_rate": 1.3753233113350997e-05, + "loss": 0.1226, + "step": 40870 + }, + { + "epoch": 1.91, + "learning_rate": 1.3752449328296209e-05, + "loss": 0.1455, + "step": 40875 + }, + { + "epoch": 1.91, + "learning_rate": 1.3751665543241423e-05, + "loss": 0.4124, + "step": 40880 + }, + { + "epoch": 1.91, + "learning_rate": 1.3750881758186635e-05, + "loss": 0.2321, + "step": 40885 + }, + { + "epoch": 1.91, + "learning_rate": 1.375009797313185e-05, + "loss": 0.0824, + "step": 40890 + }, + { + "epoch": 1.91, + "learning_rate": 1.3749314188077063e-05, + "loss": 0.0691, + "step": 40895 + }, + { + "epoch": 1.91, + "learning_rate": 1.3748530403022275e-05, + "loss": 0.0824, + "step": 40900 + }, + { + "epoch": 1.91, + "learning_rate": 1.3747746617967491e-05, + "loss": 0.1582, + "step": 40905 + }, + { + "epoch": 1.91, + "learning_rate": 1.3746962832912703e-05, + "loss": 0.104, + "step": 40910 + }, + { + "epoch": 1.91, + "learning_rate": 1.3746179047857917e-05, + "loss": 0.1756, + "step": 40915 + }, + { + "epoch": 1.91, + "learning_rate": 1.374539526280313e-05, + "loss": 0.1663, + "step": 40920 + }, + { + "epoch": 1.91, + "learning_rate": 1.3744611477748345e-05, + "loss": 0.2132, + "step": 40925 + }, + { + "epoch": 1.91, + "learning_rate": 1.3743827692693557e-05, + "loss": 0.2881, + "step": 40930 + }, + { + "epoch": 1.91, + "learning_rate": 1.3743043907638771e-05, + "loss": 0.1533, + "step": 40935 + }, + { + "epoch": 1.91, + "learning_rate": 1.3742260122583983e-05, + "loss": 0.034, + "step": 40940 + }, + { + "epoch": 1.91, + "learning_rate": 1.3741476337529199e-05, + "loss": 0.0692, + "step": 40945 + }, + { + "epoch": 1.91, + "learning_rate": 1.3740692552474411e-05, + "loss": 0.0526, + "step": 40950 + }, + { + "epoch": 1.91, + "learning_rate": 1.3739908767419623e-05, + "loss": 0.1364, + "step": 40955 + }, + { + "epoch": 1.91, + "learning_rate": 1.3739124982364837e-05, + "loss": 0.0757, + "step": 40960 + }, + { + "epoch": 1.91, + "learning_rate": 1.373834119731005e-05, + "loss": 0.1443, + "step": 40965 + }, + { + "epoch": 1.91, + "learning_rate": 1.3737557412255265e-05, + "loss": 0.2183, + "step": 40970 + }, + { + "epoch": 1.91, + "learning_rate": 1.3736773627200477e-05, + "loss": 0.2046, + "step": 40975 + }, + { + "epoch": 1.91, + "learning_rate": 1.3735989842145691e-05, + "loss": 0.3461, + "step": 40980 + }, + { + "epoch": 1.91, + "learning_rate": 1.3735206057090903e-05, + "loss": 0.268, + "step": 40985 + }, + { + "epoch": 1.91, + "learning_rate": 1.3734422272036119e-05, + "loss": 0.0231, + "step": 40990 + }, + { + "epoch": 1.91, + "learning_rate": 1.3733638486981331e-05, + "loss": 0.1039, + "step": 40995 + }, + { + "epoch": 1.91, + "learning_rate": 1.3732854701926545e-05, + "loss": 0.0878, + "step": 41000 + }, + { + "epoch": 1.91, + "learning_rate": 1.3732070916871759e-05, + "loss": 0.0818, + "step": 41005 + }, + { + "epoch": 1.91, + "learning_rate": 1.3731287131816973e-05, + "loss": 0.108, + "step": 41010 + }, + { + "epoch": 1.91, + "learning_rate": 1.3730503346762185e-05, + "loss": 0.1776, + "step": 41015 + }, + { + "epoch": 1.91, + "learning_rate": 1.3729719561707397e-05, + "loss": 0.0533, + "step": 41020 + }, + { + "epoch": 1.91, + "learning_rate": 1.3728935776652613e-05, + "loss": 0.1883, + "step": 41025 + }, + { + "epoch": 1.91, + "learning_rate": 1.3728151991597825e-05, + "loss": 0.3926, + "step": 41030 + }, + { + "epoch": 1.91, + "learning_rate": 1.3727368206543039e-05, + "loss": 0.2241, + "step": 41035 + }, + { + "epoch": 1.91, + "learning_rate": 1.3726584421488251e-05, + "loss": 0.0445, + "step": 41040 + }, + { + "epoch": 1.92, + "learning_rate": 1.3725800636433467e-05, + "loss": 0.087, + "step": 41045 + }, + { + "epoch": 1.92, + "learning_rate": 1.3725016851378679e-05, + "loss": 0.0322, + "step": 41050 + }, + { + "epoch": 1.92, + "learning_rate": 1.3724233066323893e-05, + "loss": 0.0478, + "step": 41055 + }, + { + "epoch": 1.92, + "learning_rate": 1.3723449281269105e-05, + "loss": 0.1294, + "step": 41060 + }, + { + "epoch": 1.92, + "learning_rate": 1.372266549621432e-05, + "loss": 0.1646, + "step": 41065 + }, + { + "epoch": 1.92, + "learning_rate": 1.3721881711159533e-05, + "loss": 0.1664, + "step": 41070 + }, + { + "epoch": 1.92, + "learning_rate": 1.3721097926104747e-05, + "loss": 0.2329, + "step": 41075 + }, + { + "epoch": 1.92, + "learning_rate": 1.3720314141049959e-05, + "loss": 0.2623, + "step": 41080 + }, + { + "epoch": 1.92, + "learning_rate": 1.3719530355995173e-05, + "loss": 0.2138, + "step": 41085 + }, + { + "epoch": 1.92, + "learning_rate": 1.3718746570940387e-05, + "loss": 0.0504, + "step": 41090 + }, + { + "epoch": 1.92, + "learning_rate": 1.3717962785885599e-05, + "loss": 0.0839, + "step": 41095 + }, + { + "epoch": 1.92, + "learning_rate": 1.3717179000830813e-05, + "loss": 0.0589, + "step": 41100 + }, + { + "epoch": 1.92, + "learning_rate": 1.3716395215776027e-05, + "loss": 0.1023, + "step": 41105 + }, + { + "epoch": 1.92, + "learning_rate": 1.371561143072124e-05, + "loss": 0.1489, + "step": 41110 + }, + { + "epoch": 1.92, + "learning_rate": 1.3714827645666453e-05, + "loss": 0.1497, + "step": 41115 + }, + { + "epoch": 1.92, + "learning_rate": 1.3714043860611669e-05, + "loss": 0.1494, + "step": 41120 + }, + { + "epoch": 1.92, + "learning_rate": 1.371326007555688e-05, + "loss": 0.2351, + "step": 41125 + }, + { + "epoch": 1.92, + "learning_rate": 1.3712476290502095e-05, + "loss": 0.3537, + "step": 41130 + }, + { + "epoch": 1.92, + "learning_rate": 1.3711692505447307e-05, + "loss": 0.3639, + "step": 41135 + }, + { + "epoch": 1.92, + "learning_rate": 1.3710908720392522e-05, + "loss": 0.0352, + "step": 41140 + }, + { + "epoch": 1.92, + "learning_rate": 1.3710124935337735e-05, + "loss": 0.0505, + "step": 41145 + }, + { + "epoch": 1.92, + "learning_rate": 1.3709341150282947e-05, + "loss": 0.0559, + "step": 41150 + }, + { + "epoch": 1.92, + "learning_rate": 1.370855736522816e-05, + "loss": 0.0432, + "step": 41155 + }, + { + "epoch": 1.92, + "learning_rate": 1.3707773580173373e-05, + "loss": 0.2254, + "step": 41160 + }, + { + "epoch": 1.92, + "learning_rate": 1.3706989795118589e-05, + "loss": 0.1569, + "step": 41165 + }, + { + "epoch": 1.92, + "learning_rate": 1.37062060100638e-05, + "loss": 0.0941, + "step": 41170 + }, + { + "epoch": 1.92, + "learning_rate": 1.3705422225009015e-05, + "loss": 0.2558, + "step": 41175 + }, + { + "epoch": 1.92, + "learning_rate": 1.3704638439954227e-05, + "loss": 0.2511, + "step": 41180 + }, + { + "epoch": 1.92, + "learning_rate": 1.3703854654899443e-05, + "loss": 0.3308, + "step": 41185 + }, + { + "epoch": 1.92, + "learning_rate": 1.3703070869844655e-05, + "loss": 0.0499, + "step": 41190 + }, + { + "epoch": 1.92, + "learning_rate": 1.3702287084789869e-05, + "loss": 0.0471, + "step": 41195 + }, + { + "epoch": 1.92, + "learning_rate": 1.370150329973508e-05, + "loss": 0.063, + "step": 41200 + }, + { + "epoch": 1.92, + "learning_rate": 1.3700719514680296e-05, + "loss": 0.0849, + "step": 41205 + }, + { + "epoch": 1.92, + "learning_rate": 1.3699935729625509e-05, + "loss": 0.1009, + "step": 41210 + }, + { + "epoch": 1.92, + "learning_rate": 1.3699151944570721e-05, + "loss": 0.1324, + "step": 41215 + }, + { + "epoch": 1.92, + "learning_rate": 1.3698368159515936e-05, + "loss": 0.145, + "step": 41220 + }, + { + "epoch": 1.92, + "learning_rate": 1.3697584374461149e-05, + "loss": 0.206, + "step": 41225 + }, + { + "epoch": 1.92, + "learning_rate": 1.3696800589406363e-05, + "loss": 0.3881, + "step": 41230 + }, + { + "epoch": 1.92, + "learning_rate": 1.3696016804351575e-05, + "loss": 0.3881, + "step": 41235 + }, + { + "epoch": 1.92, + "learning_rate": 1.369523301929679e-05, + "loss": 0.1123, + "step": 41240 + }, + { + "epoch": 1.92, + "learning_rate": 1.3694449234242003e-05, + "loss": 0.0195, + "step": 41245 + }, + { + "epoch": 1.92, + "learning_rate": 1.3693665449187217e-05, + "loss": 0.1247, + "step": 41250 + }, + { + "epoch": 1.93, + "learning_rate": 1.3692881664132429e-05, + "loss": 0.0557, + "step": 41255 + }, + { + "epoch": 1.93, + "learning_rate": 1.3692097879077644e-05, + "loss": 0.0885, + "step": 41260 + }, + { + "epoch": 1.93, + "learning_rate": 1.3691314094022857e-05, + "loss": 0.1122, + "step": 41265 + }, + { + "epoch": 1.93, + "learning_rate": 1.369053030896807e-05, + "loss": 0.2121, + "step": 41270 + }, + { + "epoch": 1.93, + "learning_rate": 1.3689746523913283e-05, + "loss": 0.1881, + "step": 41275 + }, + { + "epoch": 1.93, + "learning_rate": 1.3688962738858495e-05, + "loss": 0.2472, + "step": 41280 + }, + { + "epoch": 1.93, + "learning_rate": 1.368817895380371e-05, + "loss": 0.1782, + "step": 41285 + }, + { + "epoch": 1.93, + "learning_rate": 1.3687395168748923e-05, + "loss": 0.0595, + "step": 41290 + }, + { + "epoch": 1.93, + "learning_rate": 1.3686611383694137e-05, + "loss": 0.0678, + "step": 41295 + }, + { + "epoch": 1.93, + "learning_rate": 1.3685827598639349e-05, + "loss": 0.1161, + "step": 41300 + }, + { + "epoch": 1.93, + "learning_rate": 1.3685043813584564e-05, + "loss": 0.0635, + "step": 41305 + }, + { + "epoch": 1.93, + "learning_rate": 1.3684260028529777e-05, + "loss": 0.1394, + "step": 41310 + }, + { + "epoch": 1.93, + "learning_rate": 1.368347624347499e-05, + "loss": 0.1814, + "step": 41315 + }, + { + "epoch": 1.93, + "learning_rate": 1.3682692458420204e-05, + "loss": 0.3026, + "step": 41320 + }, + { + "epoch": 1.93, + "learning_rate": 1.3681908673365418e-05, + "loss": 0.2154, + "step": 41325 + }, + { + "epoch": 1.93, + "learning_rate": 1.368112488831063e-05, + "loss": 0.2504, + "step": 41330 + }, + { + "epoch": 1.93, + "learning_rate": 1.3680341103255846e-05, + "loss": 0.3073, + "step": 41335 + }, + { + "epoch": 1.93, + "learning_rate": 1.3679557318201058e-05, + "loss": 0.0638, + "step": 41340 + }, + { + "epoch": 1.93, + "learning_rate": 1.367877353314627e-05, + "loss": 0.0279, + "step": 41345 + }, + { + "epoch": 1.93, + "learning_rate": 1.3677989748091484e-05, + "loss": 0.1001, + "step": 41350 + }, + { + "epoch": 1.93, + "learning_rate": 1.3677205963036697e-05, + "loss": 0.0529, + "step": 41355 + }, + { + "epoch": 1.93, + "learning_rate": 1.3676422177981912e-05, + "loss": 0.1054, + "step": 41360 + }, + { + "epoch": 1.93, + "learning_rate": 1.3675638392927124e-05, + "loss": 0.111, + "step": 41365 + }, + { + "epoch": 1.93, + "learning_rate": 1.3674854607872338e-05, + "loss": 0.0676, + "step": 41370 + }, + { + "epoch": 1.93, + "learning_rate": 1.367407082281755e-05, + "loss": 0.243, + "step": 41375 + }, + { + "epoch": 1.93, + "learning_rate": 1.3673287037762766e-05, + "loss": 0.4077, + "step": 41380 + }, + { + "epoch": 1.93, + "learning_rate": 1.3672503252707978e-05, + "loss": 0.3552, + "step": 41385 + }, + { + "epoch": 1.93, + "learning_rate": 1.3671719467653192e-05, + "loss": 0.0411, + "step": 41390 + }, + { + "epoch": 1.93, + "learning_rate": 1.3670935682598405e-05, + "loss": 0.039, + "step": 41395 + }, + { + "epoch": 1.93, + "learning_rate": 1.367015189754362e-05, + "loss": 0.0688, + "step": 41400 + }, + { + "epoch": 1.93, + "learning_rate": 1.3669368112488832e-05, + "loss": 0.1222, + "step": 41405 + }, + { + "epoch": 1.93, + "learning_rate": 1.3668584327434045e-05, + "loss": 0.073, + "step": 41410 + }, + { + "epoch": 1.93, + "learning_rate": 1.3667800542379258e-05, + "loss": 0.0939, + "step": 41415 + }, + { + "epoch": 1.93, + "learning_rate": 1.3667016757324472e-05, + "loss": 0.1917, + "step": 41420 + }, + { + "epoch": 1.93, + "learning_rate": 1.3666232972269686e-05, + "loss": 0.1409, + "step": 41425 + }, + { + "epoch": 1.93, + "learning_rate": 1.3665449187214898e-05, + "loss": 0.3746, + "step": 41430 + }, + { + "epoch": 1.93, + "learning_rate": 1.3664665402160114e-05, + "loss": 0.3048, + "step": 41435 + }, + { + "epoch": 1.93, + "learning_rate": 1.3663881617105326e-05, + "loss": 0.0518, + "step": 41440 + }, + { + "epoch": 1.93, + "learning_rate": 1.366309783205054e-05, + "loss": 0.0805, + "step": 41445 + }, + { + "epoch": 1.93, + "learning_rate": 1.3662314046995752e-05, + "loss": 0.0876, + "step": 41450 + }, + { + "epoch": 1.93, + "learning_rate": 1.3661530261940968e-05, + "loss": 0.0906, + "step": 41455 + }, + { + "epoch": 1.93, + "learning_rate": 1.366074647688618e-05, + "loss": 0.0911, + "step": 41460 + }, + { + "epoch": 1.93, + "learning_rate": 1.3659962691831394e-05, + "loss": 0.1121, + "step": 41465 + }, + { + "epoch": 1.94, + "learning_rate": 1.3659178906776606e-05, + "loss": 0.1549, + "step": 41470 + }, + { + "epoch": 1.94, + "learning_rate": 1.3658395121721819e-05, + "loss": 0.2158, + "step": 41475 + }, + { + "epoch": 1.94, + "learning_rate": 1.3657611336667034e-05, + "loss": 0.3177, + "step": 41480 + }, + { + "epoch": 1.94, + "learning_rate": 1.3656827551612246e-05, + "loss": 0.2616, + "step": 41485 + }, + { + "epoch": 1.94, + "learning_rate": 1.365604376655746e-05, + "loss": 0.0809, + "step": 41490 + }, + { + "epoch": 1.94, + "learning_rate": 1.3655259981502672e-05, + "loss": 0.0507, + "step": 41495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3654476196447888e-05, + "loss": 0.0408, + "step": 41500 + }, + { + "epoch": 1.94, + "learning_rate": 1.36536924113931e-05, + "loss": 0.079, + "step": 41505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3652908626338314e-05, + "loss": 0.1017, + "step": 41510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3652124841283526e-05, + "loss": 0.0745, + "step": 41515 + }, + { + "epoch": 1.94, + "learning_rate": 1.3651341056228742e-05, + "loss": 0.1234, + "step": 41520 + }, + { + "epoch": 1.94, + "learning_rate": 1.3650557271173954e-05, + "loss": 0.23, + "step": 41525 + }, + { + "epoch": 1.94, + "learning_rate": 1.3649773486119168e-05, + "loss": 0.2216, + "step": 41530 + }, + { + "epoch": 1.94, + "learning_rate": 1.3648989701064382e-05, + "loss": 0.2246, + "step": 41535 + }, + { + "epoch": 1.94, + "learning_rate": 1.3648205916009594e-05, + "loss": 0.0395, + "step": 41540 + }, + { + "epoch": 1.94, + "learning_rate": 1.3647422130954808e-05, + "loss": 0.0559, + "step": 41545 + }, + { + "epoch": 1.94, + "learning_rate": 1.364663834590002e-05, + "loss": 0.0517, + "step": 41550 + }, + { + "epoch": 1.94, + "learning_rate": 1.3645854560845236e-05, + "loss": 0.1465, + "step": 41555 + }, + { + "epoch": 1.94, + "learning_rate": 1.3645070775790448e-05, + "loss": 0.2362, + "step": 41560 + }, + { + "epoch": 1.94, + "learning_rate": 1.3644286990735662e-05, + "loss": 0.1821, + "step": 41565 + }, + { + "epoch": 1.94, + "learning_rate": 1.3643503205680874e-05, + "loss": 0.1984, + "step": 41570 + }, + { + "epoch": 1.94, + "learning_rate": 1.364271942062609e-05, + "loss": 0.2761, + "step": 41575 + }, + { + "epoch": 1.94, + "learning_rate": 1.3641935635571302e-05, + "loss": 0.4789, + "step": 41580 + }, + { + "epoch": 1.94, + "learning_rate": 1.3641151850516516e-05, + "loss": 0.2259, + "step": 41585 + }, + { + "epoch": 1.94, + "learning_rate": 1.3640368065461728e-05, + "loss": 0.0482, + "step": 41590 + }, + { + "epoch": 1.94, + "learning_rate": 1.3639584280406944e-05, + "loss": 0.0146, + "step": 41595 + }, + { + "epoch": 1.94, + "learning_rate": 1.3638800495352156e-05, + "loss": 0.1254, + "step": 41600 + }, + { + "epoch": 1.94, + "learning_rate": 1.3638016710297368e-05, + "loss": 0.0778, + "step": 41605 + }, + { + "epoch": 1.94, + "learning_rate": 1.3637232925242582e-05, + "loss": 0.0651, + "step": 41610 + }, + { + "epoch": 1.94, + "learning_rate": 1.3636449140187794e-05, + "loss": 0.1359, + "step": 41615 + }, + { + "epoch": 1.94, + "learning_rate": 1.363566535513301e-05, + "loss": 0.2115, + "step": 41620 + }, + { + "epoch": 1.94, + "learning_rate": 1.3634881570078222e-05, + "loss": 0.1439, + "step": 41625 + }, + { + "epoch": 1.94, + "learning_rate": 1.3634097785023436e-05, + "loss": 0.3997, + "step": 41630 + }, + { + "epoch": 1.94, + "learning_rate": 1.363331399996865e-05, + "loss": 0.2421, + "step": 41635 + }, + { + "epoch": 1.94, + "learning_rate": 1.3632530214913864e-05, + "loss": 0.0331, + "step": 41640 + }, + { + "epoch": 1.94, + "learning_rate": 1.3631746429859076e-05, + "loss": 0.1243, + "step": 41645 + }, + { + "epoch": 1.94, + "learning_rate": 1.3630962644804292e-05, + "loss": 0.147, + "step": 41650 + }, + { + "epoch": 1.94, + "learning_rate": 1.3630178859749504e-05, + "loss": 0.0483, + "step": 41655 + }, + { + "epoch": 1.94, + "learning_rate": 1.3629395074694718e-05, + "loss": 0.0896, + "step": 41660 + }, + { + "epoch": 1.94, + "learning_rate": 1.362861128963993e-05, + "loss": 0.1532, + "step": 41665 + }, + { + "epoch": 1.94, + "learning_rate": 1.3627827504585142e-05, + "loss": 0.1593, + "step": 41670 + }, + { + "epoch": 1.94, + "learning_rate": 1.3627043719530358e-05, + "loss": 0.1917, + "step": 41675 + }, + { + "epoch": 1.94, + "learning_rate": 1.362625993447557e-05, + "loss": 0.318, + "step": 41680 + }, + { + "epoch": 1.95, + "learning_rate": 1.3625476149420784e-05, + "loss": 0.3726, + "step": 41685 + }, + { + "epoch": 1.95, + "learning_rate": 1.3624692364365996e-05, + "loss": 0.0432, + "step": 41690 + }, + { + "epoch": 1.95, + "learning_rate": 1.3623908579311212e-05, + "loss": 0.0398, + "step": 41695 + }, + { + "epoch": 1.95, + "learning_rate": 1.3623124794256424e-05, + "loss": 0.0579, + "step": 41700 + }, + { + "epoch": 1.95, + "learning_rate": 1.3622341009201638e-05, + "loss": 0.0813, + "step": 41705 + }, + { + "epoch": 1.95, + "learning_rate": 1.362155722414685e-05, + "loss": 0.0798, + "step": 41710 + }, + { + "epoch": 1.95, + "learning_rate": 1.3620773439092066e-05, + "loss": 0.0768, + "step": 41715 + }, + { + "epoch": 1.95, + "learning_rate": 1.3619989654037278e-05, + "loss": 0.1886, + "step": 41720 + }, + { + "epoch": 1.95, + "learning_rate": 1.3619205868982492e-05, + "loss": 0.1185, + "step": 41725 + }, + { + "epoch": 1.95, + "learning_rate": 1.3618422083927704e-05, + "loss": 0.2939, + "step": 41730 + }, + { + "epoch": 1.95, + "learning_rate": 1.3617638298872918e-05, + "loss": 0.295, + "step": 41735 + }, + { + "epoch": 1.95, + "learning_rate": 1.3616854513818132e-05, + "loss": 0.028, + "step": 41740 + }, + { + "epoch": 1.95, + "learning_rate": 1.3616070728763344e-05, + "loss": 0.0461, + "step": 41745 + }, + { + "epoch": 1.95, + "learning_rate": 1.361528694370856e-05, + "loss": 0.0892, + "step": 41750 + }, + { + "epoch": 1.95, + "learning_rate": 1.3614503158653772e-05, + "loss": 0.0907, + "step": 41755 + }, + { + "epoch": 1.95, + "learning_rate": 1.3613719373598986e-05, + "loss": 0.0892, + "step": 41760 + }, + { + "epoch": 1.95, + "learning_rate": 1.3612935588544198e-05, + "loss": 0.1427, + "step": 41765 + }, + { + "epoch": 1.95, + "learning_rate": 1.3612151803489414e-05, + "loss": 0.108, + "step": 41770 + }, + { + "epoch": 1.95, + "learning_rate": 1.3611368018434626e-05, + "loss": 0.206, + "step": 41775 + }, + { + "epoch": 1.95, + "learning_rate": 1.361058423337984e-05, + "loss": 0.2341, + "step": 41780 + }, + { + "epoch": 1.95, + "learning_rate": 1.3609800448325052e-05, + "loss": 0.2855, + "step": 41785 + }, + { + "epoch": 1.95, + "learning_rate": 1.3609016663270268e-05, + "loss": 0.0509, + "step": 41790 + }, + { + "epoch": 1.95, + "learning_rate": 1.360823287821548e-05, + "loss": 0.0388, + "step": 41795 + }, + { + "epoch": 1.95, + "learning_rate": 1.3607449093160692e-05, + "loss": 0.0668, + "step": 41800 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606665308105906e-05, + "loss": 0.067, + "step": 41805 + }, + { + "epoch": 1.95, + "learning_rate": 1.3605881523051118e-05, + "loss": 0.1153, + "step": 41810 + }, + { + "epoch": 1.95, + "learning_rate": 1.3605097737996334e-05, + "loss": 0.183, + "step": 41815 + }, + { + "epoch": 1.95, + "learning_rate": 1.3604313952941546e-05, + "loss": 0.2191, + "step": 41820 + }, + { + "epoch": 1.95, + "learning_rate": 1.360353016788676e-05, + "loss": 0.1736, + "step": 41825 + }, + { + "epoch": 1.95, + "learning_rate": 1.3602746382831972e-05, + "loss": 0.3504, + "step": 41830 + }, + { + "epoch": 1.95, + "learning_rate": 1.3601962597777188e-05, + "loss": 0.2775, + "step": 41835 + }, + { + "epoch": 1.95, + "learning_rate": 1.36011788127224e-05, + "loss": 0.0406, + "step": 41840 + }, + { + "epoch": 1.95, + "learning_rate": 1.3600395027667614e-05, + "loss": 0.0676, + "step": 41845 + }, + { + "epoch": 1.95, + "learning_rate": 1.3599611242612828e-05, + "loss": 0.0566, + "step": 41850 + }, + { + "epoch": 1.95, + "learning_rate": 1.3598827457558042e-05, + "loss": 0.0765, + "step": 41855 + }, + { + "epoch": 1.95, + "learning_rate": 1.3598043672503254e-05, + "loss": 0.1108, + "step": 41860 + }, + { + "epoch": 1.95, + "learning_rate": 1.3597259887448466e-05, + "loss": 0.1186, + "step": 41865 + }, + { + "epoch": 1.95, + "learning_rate": 1.3596476102393682e-05, + "loss": 0.1556, + "step": 41870 + }, + { + "epoch": 1.95, + "learning_rate": 1.3595692317338894e-05, + "loss": 0.16, + "step": 41875 + }, + { + "epoch": 1.95, + "learning_rate": 1.3594908532284108e-05, + "loss": 0.3715, + "step": 41880 + }, + { + "epoch": 1.95, + "learning_rate": 1.359412474722932e-05, + "loss": 0.272, + "step": 41885 + }, + { + "epoch": 1.95, + "learning_rate": 1.3593340962174535e-05, + "loss": 0.0219, + "step": 41890 + }, + { + "epoch": 1.95, + "learning_rate": 1.3592557177119748e-05, + "loss": 0.0154, + "step": 41895 + }, + { + "epoch": 1.96, + "learning_rate": 1.3591773392064962e-05, + "loss": 0.1363, + "step": 41900 + }, + { + "epoch": 1.96, + "learning_rate": 1.3590989607010174e-05, + "loss": 0.074, + "step": 41905 + }, + { + "epoch": 1.96, + "learning_rate": 1.359020582195539e-05, + "loss": 0.1535, + "step": 41910 + }, + { + "epoch": 1.96, + "learning_rate": 1.3589422036900602e-05, + "loss": 0.0951, + "step": 41915 + }, + { + "epoch": 1.96, + "learning_rate": 1.3588638251845816e-05, + "loss": 0.1372, + "step": 41920 + }, + { + "epoch": 1.96, + "learning_rate": 1.3587854466791028e-05, + "loss": 0.2103, + "step": 41925 + }, + { + "epoch": 1.96, + "learning_rate": 1.358707068173624e-05, + "loss": 0.1851, + "step": 41930 + }, + { + "epoch": 1.96, + "learning_rate": 1.3586286896681456e-05, + "loss": 0.3416, + "step": 41935 + }, + { + "epoch": 1.96, + "learning_rate": 1.3585503111626668e-05, + "loss": 0.0271, + "step": 41940 + }, + { + "epoch": 1.96, + "learning_rate": 1.3584719326571882e-05, + "loss": 0.0605, + "step": 41945 + }, + { + "epoch": 1.96, + "learning_rate": 1.3583935541517096e-05, + "loss": 0.0437, + "step": 41950 + }, + { + "epoch": 1.96, + "learning_rate": 1.358315175646231e-05, + "loss": 0.0778, + "step": 41955 + }, + { + "epoch": 1.96, + "learning_rate": 1.3582367971407522e-05, + "loss": 0.0917, + "step": 41960 + }, + { + "epoch": 1.96, + "learning_rate": 1.3581584186352737e-05, + "loss": 0.1232, + "step": 41965 + }, + { + "epoch": 1.96, + "learning_rate": 1.358080040129795e-05, + "loss": 0.236, + "step": 41970 + }, + { + "epoch": 1.96, + "learning_rate": 1.3580016616243163e-05, + "loss": 0.2185, + "step": 41975 + }, + { + "epoch": 1.96, + "learning_rate": 1.3579232831188376e-05, + "loss": 0.393, + "step": 41980 + }, + { + "epoch": 1.96, + "learning_rate": 1.3578449046133591e-05, + "loss": 0.3043, + "step": 41985 + }, + { + "epoch": 1.96, + "learning_rate": 1.3577665261078803e-05, + "loss": 0.0506, + "step": 41990 + }, + { + "epoch": 1.96, + "learning_rate": 1.3576881476024016e-05, + "loss": 0.0796, + "step": 41995 + }, + { + "epoch": 1.96, + "learning_rate": 1.357609769096923e-05, + "loss": 0.0657, + "step": 42000 + }, + { + "epoch": 1.96, + "learning_rate": 1.3575313905914442e-05, + "loss": 0.0712, + "step": 42005 + }, + { + "epoch": 1.96, + "learning_rate": 1.3574530120859657e-05, + "loss": 0.1189, + "step": 42010 + }, + { + "epoch": 1.96, + "learning_rate": 1.357374633580487e-05, + "loss": 0.157, + "step": 42015 + }, + { + "epoch": 1.96, + "learning_rate": 1.3572962550750083e-05, + "loss": 0.1085, + "step": 42020 + }, + { + "epoch": 1.96, + "learning_rate": 1.3572178765695296e-05, + "loss": 0.2305, + "step": 42025 + }, + { + "epoch": 1.96, + "learning_rate": 1.3571394980640511e-05, + "loss": 0.3907, + "step": 42030 + }, + { + "epoch": 1.96, + "learning_rate": 1.3570611195585723e-05, + "loss": 0.2425, + "step": 42035 + }, + { + "epoch": 1.96, + "learning_rate": 1.3569827410530937e-05, + "loss": 0.039, + "step": 42040 + }, + { + "epoch": 1.96, + "learning_rate": 1.356904362547615e-05, + "loss": 0.0644, + "step": 42045 + }, + { + "epoch": 1.96, + "learning_rate": 1.3568259840421365e-05, + "loss": 0.0367, + "step": 42050 + }, + { + "epoch": 1.96, + "learning_rate": 1.3567476055366577e-05, + "loss": 0.0325, + "step": 42055 + }, + { + "epoch": 1.96, + "learning_rate": 1.356669227031179e-05, + "loss": 0.1135, + "step": 42060 + }, + { + "epoch": 1.96, + "learning_rate": 1.3565908485257005e-05, + "loss": 0.1177, + "step": 42065 + }, + { + "epoch": 1.96, + "learning_rate": 1.3565124700202217e-05, + "loss": 0.1744, + "step": 42070 + }, + { + "epoch": 1.96, + "learning_rate": 1.3564340915147431e-05, + "loss": 0.267, + "step": 42075 + }, + { + "epoch": 1.96, + "learning_rate": 1.3563557130092644e-05, + "loss": 0.2601, + "step": 42080 + }, + { + "epoch": 1.96, + "learning_rate": 1.3562773345037859e-05, + "loss": 0.3815, + "step": 42085 + }, + { + "epoch": 1.96, + "learning_rate": 1.3561989559983071e-05, + "loss": 0.0435, + "step": 42090 + }, + { + "epoch": 1.96, + "learning_rate": 1.3561205774928285e-05, + "loss": 0.0452, + "step": 42095 + }, + { + "epoch": 1.96, + "learning_rate": 1.3560421989873497e-05, + "loss": 0.1247, + "step": 42100 + }, + { + "epoch": 1.96, + "learning_rate": 1.3559638204818713e-05, + "loss": 0.0745, + "step": 42105 + }, + { + "epoch": 1.96, + "learning_rate": 1.3558854419763925e-05, + "loss": 0.1184, + "step": 42110 + }, + { + "epoch": 1.97, + "learning_rate": 1.355807063470914e-05, + "loss": 0.1522, + "step": 42115 + }, + { + "epoch": 1.97, + "learning_rate": 1.3557286849654351e-05, + "loss": 0.1299, + "step": 42120 + }, + { + "epoch": 1.97, + "learning_rate": 1.3556503064599564e-05, + "loss": 0.2057, + "step": 42125 + }, + { + "epoch": 1.97, + "learning_rate": 1.355571927954478e-05, + "loss": 0.2441, + "step": 42130 + }, + { + "epoch": 1.97, + "learning_rate": 1.3554935494489991e-05, + "loss": 0.2413, + "step": 42135 + }, + { + "epoch": 1.97, + "learning_rate": 1.3554151709435205e-05, + "loss": 0.063, + "step": 42140 + }, + { + "epoch": 1.97, + "learning_rate": 1.3553367924380418e-05, + "loss": 0.0286, + "step": 42145 + }, + { + "epoch": 1.97, + "learning_rate": 1.3552584139325633e-05, + "loss": 0.0437, + "step": 42150 + }, + { + "epoch": 1.97, + "learning_rate": 1.3551800354270845e-05, + "loss": 0.0742, + "step": 42155 + }, + { + "epoch": 1.97, + "learning_rate": 1.355101656921606e-05, + "loss": 0.1081, + "step": 42160 + }, + { + "epoch": 1.97, + "learning_rate": 1.3550232784161273e-05, + "loss": 0.1142, + "step": 42165 + }, + { + "epoch": 1.97, + "learning_rate": 1.3549448999106487e-05, + "loss": 0.1755, + "step": 42170 + }, + { + "epoch": 1.97, + "learning_rate": 1.35486652140517e-05, + "loss": 0.2252, + "step": 42175 + }, + { + "epoch": 1.97, + "learning_rate": 1.3547881428996915e-05, + "loss": 0.2728, + "step": 42180 + }, + { + "epoch": 1.97, + "learning_rate": 1.3547097643942127e-05, + "loss": 0.3548, + "step": 42185 + }, + { + "epoch": 1.97, + "learning_rate": 1.354631385888734e-05, + "loss": 0.1084, + "step": 42190 + }, + { + "epoch": 1.97, + "learning_rate": 1.3545530073832553e-05, + "loss": 0.0751, + "step": 42195 + }, + { + "epoch": 1.97, + "learning_rate": 1.3544746288777765e-05, + "loss": 0.1289, + "step": 42200 + }, + { + "epoch": 1.97, + "learning_rate": 1.3543962503722981e-05, + "loss": 0.1643, + "step": 42205 + }, + { + "epoch": 1.97, + "learning_rate": 1.3543178718668193e-05, + "loss": 0.1434, + "step": 42210 + }, + { + "epoch": 1.97, + "learning_rate": 1.3542394933613407e-05, + "loss": 0.1457, + "step": 42215 + }, + { + "epoch": 1.97, + "learning_rate": 1.354161114855862e-05, + "loss": 0.1197, + "step": 42220 + }, + { + "epoch": 1.97, + "learning_rate": 1.3540827363503835e-05, + "loss": 0.3008, + "step": 42225 + }, + { + "epoch": 1.97, + "learning_rate": 1.3540043578449047e-05, + "loss": 0.2925, + "step": 42230 + }, + { + "epoch": 1.97, + "learning_rate": 1.3539259793394261e-05, + "loss": 0.3225, + "step": 42235 + }, + { + "epoch": 1.97, + "learning_rate": 1.3538476008339473e-05, + "loss": 0.0174, + "step": 42240 + }, + { + "epoch": 1.97, + "learning_rate": 1.3537692223284689e-05, + "loss": 0.0662, + "step": 42245 + }, + { + "epoch": 1.97, + "learning_rate": 1.3536908438229901e-05, + "loss": 0.0883, + "step": 42250 + }, + { + "epoch": 1.97, + "learning_rate": 1.3536124653175113e-05, + "loss": 0.1134, + "step": 42255 + }, + { + "epoch": 1.97, + "learning_rate": 1.3535340868120327e-05, + "loss": 0.0576, + "step": 42260 + }, + { + "epoch": 1.97, + "learning_rate": 1.3534557083065541e-05, + "loss": 0.1248, + "step": 42265 + }, + { + "epoch": 1.97, + "learning_rate": 1.3533773298010755e-05, + "loss": 0.1739, + "step": 42270 + }, + { + "epoch": 1.97, + "learning_rate": 1.3532989512955967e-05, + "loss": 0.1615, + "step": 42275 + }, + { + "epoch": 1.97, + "learning_rate": 1.3532205727901183e-05, + "loss": 0.2108, + "step": 42280 + }, + { + "epoch": 1.97, + "learning_rate": 1.3531421942846395e-05, + "loss": 0.1906, + "step": 42285 + }, + { + "epoch": 1.97, + "learning_rate": 1.3530638157791609e-05, + "loss": 0.0301, + "step": 42290 + }, + { + "epoch": 1.97, + "learning_rate": 1.3529854372736821e-05, + "loss": 0.0248, + "step": 42295 + }, + { + "epoch": 1.97, + "learning_rate": 1.3529070587682037e-05, + "loss": 0.0653, + "step": 42300 + }, + { + "epoch": 1.97, + "learning_rate": 1.3528286802627249e-05, + "loss": 0.1097, + "step": 42305 + }, + { + "epoch": 1.97, + "learning_rate": 1.3527503017572463e-05, + "loss": 0.1149, + "step": 42310 + }, + { + "epoch": 1.97, + "learning_rate": 1.3526719232517675e-05, + "loss": 0.1353, + "step": 42315 + }, + { + "epoch": 1.97, + "learning_rate": 1.3525935447462887e-05, + "loss": 0.1043, + "step": 42320 + }, + { + "epoch": 1.97, + "learning_rate": 1.3525151662408103e-05, + "loss": 0.3242, + "step": 42325 + }, + { + "epoch": 1.98, + "learning_rate": 1.3524367877353315e-05, + "loss": 0.2849, + "step": 42330 + }, + { + "epoch": 1.98, + "learning_rate": 1.3523584092298529e-05, + "loss": 0.3416, + "step": 42335 + }, + { + "epoch": 1.98, + "learning_rate": 1.3522800307243741e-05, + "loss": 0.0547, + "step": 42340 + }, + { + "epoch": 1.98, + "learning_rate": 1.3522016522188957e-05, + "loss": 0.0568, + "step": 42345 + }, + { + "epoch": 1.98, + "learning_rate": 1.3521232737134169e-05, + "loss": 0.0978, + "step": 42350 + }, + { + "epoch": 1.98, + "learning_rate": 1.3520448952079383e-05, + "loss": 0.1457, + "step": 42355 + }, + { + "epoch": 1.98, + "learning_rate": 1.3519665167024595e-05, + "loss": 0.0819, + "step": 42360 + }, + { + "epoch": 1.98, + "learning_rate": 1.351888138196981e-05, + "loss": 0.1787, + "step": 42365 + }, + { + "epoch": 1.98, + "learning_rate": 1.3518097596915023e-05, + "loss": 0.1941, + "step": 42370 + }, + { + "epoch": 1.98, + "learning_rate": 1.3517313811860237e-05, + "loss": 0.2267, + "step": 42375 + }, + { + "epoch": 1.98, + "learning_rate": 1.351653002680545e-05, + "loss": 0.377, + "step": 42380 + }, + { + "epoch": 1.98, + "learning_rate": 1.3515746241750663e-05, + "loss": 0.2612, + "step": 42385 + }, + { + "epoch": 1.98, + "learning_rate": 1.3514962456695877e-05, + "loss": 0.0406, + "step": 42390 + }, + { + "epoch": 1.98, + "learning_rate": 1.3514178671641089e-05, + "loss": 0.0252, + "step": 42395 + }, + { + "epoch": 1.98, + "learning_rate": 1.3513394886586305e-05, + "loss": 0.0102, + "step": 42400 + }, + { + "epoch": 1.98, + "learning_rate": 1.3512611101531517e-05, + "loss": 0.1246, + "step": 42405 + }, + { + "epoch": 1.98, + "learning_rate": 1.351182731647673e-05, + "loss": 0.1436, + "step": 42410 + }, + { + "epoch": 1.98, + "learning_rate": 1.3511043531421943e-05, + "loss": 0.1284, + "step": 42415 + }, + { + "epoch": 1.98, + "learning_rate": 1.3510259746367159e-05, + "loss": 0.1715, + "step": 42420 + }, + { + "epoch": 1.98, + "learning_rate": 1.3509475961312371e-05, + "loss": 0.2565, + "step": 42425 + }, + { + "epoch": 1.98, + "learning_rate": 1.3508692176257585e-05, + "loss": 0.2379, + "step": 42430 + }, + { + "epoch": 1.98, + "learning_rate": 1.3507908391202797e-05, + "loss": 0.1811, + "step": 42435 + }, + { + "epoch": 1.98, + "learning_rate": 1.3507124606148013e-05, + "loss": 0.0796, + "step": 42440 + }, + { + "epoch": 1.98, + "learning_rate": 1.3506340821093225e-05, + "loss": 0.0504, + "step": 42445 + }, + { + "epoch": 1.98, + "learning_rate": 1.3505557036038437e-05, + "loss": 0.1026, + "step": 42450 + }, + { + "epoch": 1.98, + "learning_rate": 1.3504773250983651e-05, + "loss": 0.0687, + "step": 42455 + }, + { + "epoch": 1.98, + "learning_rate": 1.3503989465928863e-05, + "loss": 0.0637, + "step": 42460 + }, + { + "epoch": 1.98, + "learning_rate": 1.3503205680874079e-05, + "loss": 0.0864, + "step": 42465 + }, + { + "epoch": 1.98, + "learning_rate": 1.3502421895819291e-05, + "loss": 0.1525, + "step": 42470 + }, + { + "epoch": 1.98, + "learning_rate": 1.3501638110764505e-05, + "loss": 0.1946, + "step": 42475 + }, + { + "epoch": 1.98, + "learning_rate": 1.3500854325709719e-05, + "loss": 0.4185, + "step": 42480 + }, + { + "epoch": 1.98, + "learning_rate": 1.3500070540654933e-05, + "loss": 0.2679, + "step": 42485 + }, + { + "epoch": 1.98, + "learning_rate": 1.3499286755600145e-05, + "loss": 0.0422, + "step": 42490 + }, + { + "epoch": 1.98, + "learning_rate": 1.349850297054536e-05, + "loss": 0.1082, + "step": 42495 + }, + { + "epoch": 1.98, + "learning_rate": 1.3497719185490573e-05, + "loss": 0.0823, + "step": 42500 + }, + { + "epoch": 1.98, + "learning_rate": 1.3496935400435787e-05, + "loss": 0.0491, + "step": 42505 + }, + { + "epoch": 1.98, + "learning_rate": 1.3496151615380999e-05, + "loss": 0.1773, + "step": 42510 + }, + { + "epoch": 1.98, + "learning_rate": 1.3495367830326211e-05, + "loss": 0.1344, + "step": 42515 + }, + { + "epoch": 1.98, + "learning_rate": 1.3494584045271427e-05, + "loss": 0.0834, + "step": 42520 + }, + { + "epoch": 1.98, + "learning_rate": 1.3493800260216639e-05, + "loss": 0.2854, + "step": 42525 + }, + { + "epoch": 1.98, + "learning_rate": 1.3493016475161853e-05, + "loss": 0.3165, + "step": 42530 + }, + { + "epoch": 1.98, + "learning_rate": 1.3492232690107065e-05, + "loss": 0.2865, + "step": 42535 + }, + { + "epoch": 1.98, + "learning_rate": 1.349144890505228e-05, + "loss": 0.0551, + "step": 42540 + }, + { + "epoch": 1.99, + "learning_rate": 1.3490665119997493e-05, + "loss": 0.0634, + "step": 42545 + }, + { + "epoch": 1.99, + "learning_rate": 1.3489881334942707e-05, + "loss": 0.0897, + "step": 42550 + }, + { + "epoch": 1.99, + "learning_rate": 1.3489097549887919e-05, + "loss": 0.1488, + "step": 42555 + }, + { + "epoch": 1.99, + "learning_rate": 1.3488313764833134e-05, + "loss": 0.1682, + "step": 42560 + }, + { + "epoch": 1.99, + "learning_rate": 1.3487529979778347e-05, + "loss": 0.1566, + "step": 42565 + }, + { + "epoch": 1.99, + "learning_rate": 1.348674619472356e-05, + "loss": 0.1352, + "step": 42570 + }, + { + "epoch": 1.99, + "learning_rate": 1.3485962409668773e-05, + "loss": 0.2381, + "step": 42575 + }, + { + "epoch": 1.99, + "learning_rate": 1.3485178624613987e-05, + "loss": 0.3136, + "step": 42580 + }, + { + "epoch": 1.99, + "learning_rate": 1.34843948395592e-05, + "loss": 0.2361, + "step": 42585 + }, + { + "epoch": 1.99, + "learning_rate": 1.3483611054504413e-05, + "loss": 0.0656, + "step": 42590 + }, + { + "epoch": 1.99, + "learning_rate": 1.3482827269449628e-05, + "loss": 0.0353, + "step": 42595 + }, + { + "epoch": 1.99, + "learning_rate": 1.348204348439484e-05, + "loss": 0.0433, + "step": 42600 + }, + { + "epoch": 1.99, + "learning_rate": 1.3481259699340055e-05, + "loss": 0.0876, + "step": 42605 + }, + { + "epoch": 1.99, + "learning_rate": 1.3480475914285267e-05, + "loss": 0.1082, + "step": 42610 + }, + { + "epoch": 1.99, + "learning_rate": 1.3479692129230482e-05, + "loss": 0.1904, + "step": 42615 + }, + { + "epoch": 1.99, + "learning_rate": 1.3478908344175695e-05, + "loss": 0.2017, + "step": 42620 + }, + { + "epoch": 1.99, + "learning_rate": 1.3478124559120908e-05, + "loss": 0.1698, + "step": 42625 + }, + { + "epoch": 1.99, + "learning_rate": 1.347734077406612e-05, + "loss": 0.4722, + "step": 42630 + }, + { + "epoch": 1.99, + "learning_rate": 1.3476556989011336e-05, + "loss": 0.2895, + "step": 42635 + }, + { + "epoch": 1.99, + "learning_rate": 1.3475773203956548e-05, + "loss": 0.0633, + "step": 42640 + }, + { + "epoch": 1.99, + "learning_rate": 1.347498941890176e-05, + "loss": 0.0354, + "step": 42645 + }, + { + "epoch": 1.99, + "learning_rate": 1.3474205633846975e-05, + "loss": 0.0921, + "step": 42650 + }, + { + "epoch": 1.99, + "learning_rate": 1.3473421848792187e-05, + "loss": 0.0774, + "step": 42655 + }, + { + "epoch": 1.99, + "learning_rate": 1.3472638063737402e-05, + "loss": 0.1163, + "step": 42660 + }, + { + "epoch": 1.99, + "learning_rate": 1.3471854278682615e-05, + "loss": 0.1109, + "step": 42665 + }, + { + "epoch": 1.99, + "learning_rate": 1.3471070493627829e-05, + "loss": 0.1748, + "step": 42670 + }, + { + "epoch": 1.99, + "learning_rate": 1.347028670857304e-05, + "loss": 0.1627, + "step": 42675 + }, + { + "epoch": 1.99, + "learning_rate": 1.3469502923518256e-05, + "loss": 0.3621, + "step": 42680 + }, + { + "epoch": 1.99, + "learning_rate": 1.3468719138463469e-05, + "loss": 0.193, + "step": 42685 + }, + { + "epoch": 1.99, + "learning_rate": 1.3467935353408682e-05, + "loss": 0.0512, + "step": 42690 + }, + { + "epoch": 1.99, + "learning_rate": 1.3467151568353896e-05, + "loss": 0.0666, + "step": 42695 + }, + { + "epoch": 1.99, + "learning_rate": 1.346636778329911e-05, + "loss": 0.1495, + "step": 42700 + }, + { + "epoch": 1.99, + "learning_rate": 1.3465583998244322e-05, + "loss": 0.13, + "step": 42705 + }, + { + "epoch": 1.99, + "learning_rate": 1.3464800213189535e-05, + "loss": 0.0941, + "step": 42710 + }, + { + "epoch": 1.99, + "learning_rate": 1.346401642813475e-05, + "loss": 0.1118, + "step": 42715 + }, + { + "epoch": 1.99, + "learning_rate": 1.3463232643079963e-05, + "loss": 0.1672, + "step": 42720 + }, + { + "epoch": 1.99, + "learning_rate": 1.3462448858025176e-05, + "loss": 0.2903, + "step": 42725 + }, + { + "epoch": 1.99, + "learning_rate": 1.3461665072970389e-05, + "loss": 0.5016, + "step": 42730 + }, + { + "epoch": 1.99, + "learning_rate": 1.3460881287915604e-05, + "loss": 0.3474, + "step": 42735 + }, + { + "epoch": 1.99, + "learning_rate": 1.3460097502860816e-05, + "loss": 0.0593, + "step": 42740 + }, + { + "epoch": 1.99, + "learning_rate": 1.345931371780603e-05, + "loss": 0.0545, + "step": 42745 + }, + { + "epoch": 1.99, + "learning_rate": 1.3458529932751243e-05, + "loss": 0.0924, + "step": 42750 + }, + { + "epoch": 2.0, + "learning_rate": 1.3457746147696458e-05, + "loss": 0.0801, + "step": 42755 + }, + { + "epoch": 2.0, + "learning_rate": 1.345696236264167e-05, + "loss": 0.0866, + "step": 42760 + }, + { + "epoch": 2.0, + "learning_rate": 1.3456178577586884e-05, + "loss": 0.1346, + "step": 42765 + }, + { + "epoch": 2.0, + "learning_rate": 1.3455394792532096e-05, + "loss": 0.1853, + "step": 42770 + }, + { + "epoch": 2.0, + "learning_rate": 1.3454611007477309e-05, + "loss": 0.1966, + "step": 42775 + }, + { + "epoch": 2.0, + "learning_rate": 1.3453827222422524e-05, + "loss": 0.238, + "step": 42780 + }, + { + "epoch": 2.0, + "learning_rate": 1.3453043437367737e-05, + "loss": 0.2965, + "step": 42785 + }, + { + "epoch": 2.0, + "learning_rate": 1.345225965231295e-05, + "loss": 0.1616, + "step": 42790 + }, + { + "epoch": 2.0, + "learning_rate": 1.3451475867258164e-05, + "loss": 0.0177, + "step": 42795 + }, + { + "epoch": 2.0, + "learning_rate": 1.3450692082203378e-05, + "loss": 0.0763, + "step": 42800 + }, + { + "epoch": 2.0, + "learning_rate": 1.344990829714859e-05, + "loss": 0.108, + "step": 42805 + }, + { + "epoch": 2.0, + "learning_rate": 1.3449124512093806e-05, + "loss": 0.0828, + "step": 42810 + }, + { + "epoch": 2.0, + "learning_rate": 1.3448340727039018e-05, + "loss": 0.0873, + "step": 42815 + }, + { + "epoch": 2.0, + "learning_rate": 1.3447556941984232e-05, + "loss": 0.1054, + "step": 42820 + }, + { + "epoch": 2.0, + "learning_rate": 1.3446773156929444e-05, + "loss": 0.2872, + "step": 42825 + }, + { + "epoch": 2.0, + "learning_rate": 1.344598937187466e-05, + "loss": 0.2569, + "step": 42830 + }, + { + "epoch": 2.0, + "learning_rate": 1.3445205586819872e-05, + "loss": 0.2157, + "step": 42835 + }, + { + "epoch": 2.0, + "learning_rate": 1.3444421801765084e-05, + "loss": 0.074, + "step": 42840 + }, + { + "epoch": 2.0, + "learning_rate": 1.3443638016710298e-05, + "loss": 0.1238, + "step": 42845 + }, + { + "epoch": 2.0, + "learning_rate": 1.344285423165551e-05, + "loss": 0.1821, + "step": 42850 + }, + { + "epoch": 2.0, + "learning_rate": 1.3442070446600726e-05, + "loss": 0.0898, + "step": 42855 + }, + { + "epoch": 2.0, + "learning_rate": 1.3441286661545938e-05, + "loss": 0.269, + "step": 42860 + }, + { + "epoch": 2.0, + "eval_cer": 0.015201216458283512, + "eval_loss": 0.34811559319496155, + "eval_runtime": 472.4357, + "eval_samples_per_second": 40.323, + "eval_steps_per_second": 5.042, + "eval_wer": 0.12934863064396743, + "step": 42862 + }, + { + "epoch": 2.0, + "learning_rate": 1.3440502876491152e-05, + "loss": 0.3587, + "step": 42865 + }, + { + "epoch": 2.0, + "learning_rate": 1.3439719091436364e-05, + "loss": 0.1501, + "step": 42870 + }, + { + "epoch": 2.0, + "learning_rate": 1.343893530638158e-05, + "loss": 0.0647, + "step": 42875 + }, + { + "epoch": 2.0, + "learning_rate": 1.3438151521326792e-05, + "loss": 0.049, + "step": 42880 + }, + { + "epoch": 2.0, + "learning_rate": 1.3437367736272006e-05, + "loss": 0.0998, + "step": 42885 + }, + { + "epoch": 2.0, + "learning_rate": 1.3436583951217218e-05, + "loss": 0.0791, + "step": 42890 + }, + { + "epoch": 2.0, + "learning_rate": 1.3435800166162434e-05, + "loss": 0.2884, + "step": 42895 + }, + { + "epoch": 2.0, + "learning_rate": 1.3435016381107646e-05, + "loss": 0.1765, + "step": 42900 + }, + { + "epoch": 2.0, + "learning_rate": 1.3434232596052858e-05, + "loss": 0.1953, + "step": 42905 + }, + { + "epoch": 2.0, + "learning_rate": 1.3433448810998074e-05, + "loss": 0.2493, + "step": 42910 + }, + { + "epoch": 2.0, + "learning_rate": 1.3432665025943286e-05, + "loss": 0.2425, + "step": 42915 + }, + { + "epoch": 2.0, + "learning_rate": 1.34318812408885e-05, + "loss": 0.0484, + "step": 42920 + }, + { + "epoch": 2.0, + "learning_rate": 1.3431097455833712e-05, + "loss": 0.0687, + "step": 42925 + }, + { + "epoch": 2.0, + "learning_rate": 1.3430313670778928e-05, + "loss": 0.0949, + "step": 42930 + }, + { + "epoch": 2.0, + "learning_rate": 1.342952988572414e-05, + "loss": 0.0677, + "step": 42935 + }, + { + "epoch": 2.0, + "learning_rate": 1.3428746100669354e-05, + "loss": 0.1255, + "step": 42940 + }, + { + "epoch": 2.0, + "learning_rate": 1.3427962315614566e-05, + "loss": 0.1747, + "step": 42945 + }, + { + "epoch": 2.0, + "learning_rate": 1.3427178530559782e-05, + "loss": 0.1615, + "step": 42950 + }, + { + "epoch": 2.0, + "learning_rate": 1.3426394745504994e-05, + "loss": 0.2272, + "step": 42955 + }, + { + "epoch": 2.0, + "learning_rate": 1.3425610960450208e-05, + "loss": 0.4022, + "step": 42960 + }, + { + "epoch": 2.0, + "learning_rate": 1.342482717539542e-05, + "loss": 0.3077, + "step": 42965 + }, + { + "epoch": 2.01, + "learning_rate": 1.3424043390340632e-05, + "loss": 0.0336, + "step": 42970 + }, + { + "epoch": 2.01, + "learning_rate": 1.3423259605285848e-05, + "loss": 0.0245, + "step": 42975 + }, + { + "epoch": 2.01, + "learning_rate": 1.342247582023106e-05, + "loss": 0.0466, + "step": 42980 + }, + { + "epoch": 2.01, + "learning_rate": 1.3421692035176274e-05, + "loss": 0.049, + "step": 42985 + }, + { + "epoch": 2.01, + "learning_rate": 1.3420908250121486e-05, + "loss": 0.081, + "step": 42990 + }, + { + "epoch": 2.01, + "learning_rate": 1.3420124465066702e-05, + "loss": 0.2433, + "step": 42995 + }, + { + "epoch": 2.01, + "learning_rate": 1.3419340680011914e-05, + "loss": 0.0825, + "step": 43000 + }, + { + "epoch": 2.01, + "learning_rate": 1.3418556894957128e-05, + "loss": 0.176, + "step": 43005 + }, + { + "epoch": 2.01, + "learning_rate": 1.3417773109902342e-05, + "loss": 0.3964, + "step": 43010 + }, + { + "epoch": 2.01, + "learning_rate": 1.3416989324847556e-05, + "loss": 0.2797, + "step": 43015 + }, + { + "epoch": 2.01, + "learning_rate": 1.3416205539792768e-05, + "loss": 0.0541, + "step": 43020 + }, + { + "epoch": 2.01, + "learning_rate": 1.3415421754737984e-05, + "loss": 0.1003, + "step": 43025 + }, + { + "epoch": 2.01, + "learning_rate": 1.3414637969683196e-05, + "loss": 0.0925, + "step": 43030 + }, + { + "epoch": 2.01, + "learning_rate": 1.3413854184628408e-05, + "loss": 0.0643, + "step": 43035 + }, + { + "epoch": 2.01, + "learning_rate": 1.3413070399573622e-05, + "loss": 0.0862, + "step": 43040 + }, + { + "epoch": 2.01, + "learning_rate": 1.3412286614518834e-05, + "loss": 0.1016, + "step": 43045 + }, + { + "epoch": 2.01, + "learning_rate": 1.341150282946405e-05, + "loss": 0.1259, + "step": 43050 + }, + { + "epoch": 2.01, + "learning_rate": 1.3410719044409262e-05, + "loss": 0.1814, + "step": 43055 + }, + { + "epoch": 2.01, + "learning_rate": 1.3409935259354476e-05, + "loss": 0.2568, + "step": 43060 + }, + { + "epoch": 2.01, + "learning_rate": 1.3409151474299688e-05, + "loss": 0.3472, + "step": 43065 + }, + { + "epoch": 2.01, + "learning_rate": 1.3408367689244904e-05, + "loss": 0.0966, + "step": 43070 + }, + { + "epoch": 2.01, + "learning_rate": 1.3407583904190116e-05, + "loss": 0.0378, + "step": 43075 + }, + { + "epoch": 2.01, + "learning_rate": 1.340680011913533e-05, + "loss": 0.066, + "step": 43080 + }, + { + "epoch": 2.01, + "learning_rate": 1.3406016334080542e-05, + "loss": 0.1326, + "step": 43085 + }, + { + "epoch": 2.01, + "learning_rate": 1.3405232549025758e-05, + "loss": 0.0976, + "step": 43090 + }, + { + "epoch": 2.01, + "learning_rate": 1.340444876397097e-05, + "loss": 0.1264, + "step": 43095 + }, + { + "epoch": 2.01, + "learning_rate": 1.3403664978916182e-05, + "loss": 0.1619, + "step": 43100 + }, + { + "epoch": 2.01, + "learning_rate": 1.3402881193861396e-05, + "loss": 0.2066, + "step": 43105 + }, + { + "epoch": 2.01, + "learning_rate": 1.340209740880661e-05, + "loss": 0.2903, + "step": 43110 + }, + { + "epoch": 2.01, + "learning_rate": 1.3401313623751824e-05, + "loss": 0.2423, + "step": 43115 + }, + { + "epoch": 2.01, + "learning_rate": 1.3400529838697036e-05, + "loss": 0.0823, + "step": 43120 + }, + { + "epoch": 2.01, + "learning_rate": 1.3399746053642252e-05, + "loss": 0.0537, + "step": 43125 + }, + { + "epoch": 2.01, + "learning_rate": 1.3398962268587464e-05, + "loss": 0.061, + "step": 43130 + }, + { + "epoch": 2.01, + "learning_rate": 1.3398178483532678e-05, + "loss": 0.1014, + "step": 43135 + }, + { + "epoch": 2.01, + "learning_rate": 1.339739469847789e-05, + "loss": 0.1187, + "step": 43140 + }, + { + "epoch": 2.01, + "learning_rate": 1.3396610913423106e-05, + "loss": 0.0802, + "step": 43145 + }, + { + "epoch": 2.01, + "learning_rate": 1.3395827128368318e-05, + "loss": 0.1449, + "step": 43150 + }, + { + "epoch": 2.01, + "learning_rate": 1.3395043343313532e-05, + "loss": 0.2398, + "step": 43155 + }, + { + "epoch": 2.01, + "learning_rate": 1.3394259558258744e-05, + "loss": 0.1875, + "step": 43160 + }, + { + "epoch": 2.01, + "learning_rate": 1.3393475773203956e-05, + "loss": 0.3293, + "step": 43165 + }, + { + "epoch": 2.01, + "learning_rate": 1.3392691988149172e-05, + "loss": 0.0328, + "step": 43170 + }, + { + "epoch": 2.01, + "learning_rate": 1.3391908203094384e-05, + "loss": 0.0352, + "step": 43175 + }, + { + "epoch": 2.01, + "learning_rate": 1.3391124418039598e-05, + "loss": 0.0326, + "step": 43180 + }, + { + "epoch": 2.02, + "learning_rate": 1.339034063298481e-05, + "loss": 0.1078, + "step": 43185 + }, + { + "epoch": 2.02, + "learning_rate": 1.3389556847930026e-05, + "loss": 0.0485, + "step": 43190 + }, + { + "epoch": 2.02, + "learning_rate": 1.3388773062875238e-05, + "loss": 0.1721, + "step": 43195 + }, + { + "epoch": 2.02, + "learning_rate": 1.3387989277820452e-05, + "loss": 0.2037, + "step": 43200 + }, + { + "epoch": 2.02, + "learning_rate": 1.3387205492765664e-05, + "loss": 0.1506, + "step": 43205 + }, + { + "epoch": 2.02, + "learning_rate": 1.338642170771088e-05, + "loss": 0.2595, + "step": 43210 + }, + { + "epoch": 2.02, + "learning_rate": 1.3385637922656092e-05, + "loss": 0.2567, + "step": 43215 + }, + { + "epoch": 2.02, + "learning_rate": 1.3384854137601306e-05, + "loss": 0.0354, + "step": 43220 + }, + { + "epoch": 2.02, + "learning_rate": 1.338407035254652e-05, + "loss": 0.0445, + "step": 43225 + }, + { + "epoch": 2.02, + "learning_rate": 1.3383286567491732e-05, + "loss": 0.1386, + "step": 43230 + }, + { + "epoch": 2.02, + "learning_rate": 1.3382502782436946e-05, + "loss": 0.1286, + "step": 43235 + }, + { + "epoch": 2.02, + "learning_rate": 1.3381718997382158e-05, + "loss": 0.1383, + "step": 43240 + }, + { + "epoch": 2.02, + "learning_rate": 1.3380935212327373e-05, + "loss": 0.164, + "step": 43245 + }, + { + "epoch": 2.02, + "learning_rate": 1.3380151427272586e-05, + "loss": 0.1469, + "step": 43250 + }, + { + "epoch": 2.02, + "learning_rate": 1.33793676422178e-05, + "loss": 0.1615, + "step": 43255 + }, + { + "epoch": 2.02, + "learning_rate": 1.3378583857163012e-05, + "loss": 0.2817, + "step": 43260 + }, + { + "epoch": 2.02, + "learning_rate": 1.3377800072108227e-05, + "loss": 0.3087, + "step": 43265 + }, + { + "epoch": 2.02, + "learning_rate": 1.337701628705344e-05, + "loss": 0.0857, + "step": 43270 + }, + { + "epoch": 2.02, + "learning_rate": 1.3376232501998654e-05, + "loss": 0.0569, + "step": 43275 + }, + { + "epoch": 2.02, + "learning_rate": 1.3375448716943866e-05, + "loss": 0.0329, + "step": 43280 + }, + { + "epoch": 2.02, + "learning_rate": 1.3374664931889081e-05, + "loss": 0.0912, + "step": 43285 + }, + { + "epoch": 2.02, + "learning_rate": 1.3373881146834294e-05, + "loss": 0.0769, + "step": 43290 + }, + { + "epoch": 2.02, + "learning_rate": 1.3373097361779506e-05, + "loss": 0.0714, + "step": 43295 + }, + { + "epoch": 2.02, + "learning_rate": 1.337231357672472e-05, + "loss": 0.1705, + "step": 43300 + }, + { + "epoch": 2.02, + "learning_rate": 1.3371529791669932e-05, + "loss": 0.3426, + "step": 43305 + }, + { + "epoch": 2.02, + "learning_rate": 1.3370746006615147e-05, + "loss": 0.3034, + "step": 43310 + }, + { + "epoch": 2.02, + "learning_rate": 1.336996222156036e-05, + "loss": 0.2828, + "step": 43315 + }, + { + "epoch": 2.02, + "learning_rate": 1.3369178436505574e-05, + "loss": 0.0257, + "step": 43320 + }, + { + "epoch": 2.02, + "learning_rate": 1.3368394651450788e-05, + "loss": 0.0729, + "step": 43325 + }, + { + "epoch": 2.02, + "learning_rate": 1.3367610866396001e-05, + "loss": 0.132, + "step": 43330 + }, + { + "epoch": 2.02, + "learning_rate": 1.3366827081341214e-05, + "loss": 0.1212, + "step": 43335 + }, + { + "epoch": 2.02, + "learning_rate": 1.336604329628643e-05, + "loss": 0.1476, + "step": 43340 + }, + { + "epoch": 2.02, + "learning_rate": 1.3365259511231641e-05, + "loss": 0.1749, + "step": 43345 + }, + { + "epoch": 2.02, + "learning_rate": 1.3364475726176855e-05, + "loss": 0.2094, + "step": 43350 + }, + { + "epoch": 2.02, + "learning_rate": 1.3363691941122068e-05, + "loss": 0.1269, + "step": 43355 + }, + { + "epoch": 2.02, + "learning_rate": 1.336290815606728e-05, + "loss": 0.397, + "step": 43360 + }, + { + "epoch": 2.02, + "learning_rate": 1.3362124371012495e-05, + "loss": 0.3699, + "step": 43365 + }, + { + "epoch": 2.02, + "learning_rate": 1.3361340585957708e-05, + "loss": 0.0477, + "step": 43370 + }, + { + "epoch": 2.02, + "learning_rate": 1.3360556800902921e-05, + "loss": 0.1156, + "step": 43375 + }, + { + "epoch": 2.02, + "learning_rate": 1.3359773015848134e-05, + "loss": 0.0311, + "step": 43380 + }, + { + "epoch": 2.02, + "learning_rate": 1.335898923079335e-05, + "loss": 0.3633, + "step": 43385 + }, + { + "epoch": 2.02, + "learning_rate": 1.3358205445738562e-05, + "loss": 0.1132, + "step": 43390 + }, + { + "epoch": 2.02, + "learning_rate": 1.3357421660683775e-05, + "loss": 0.0839, + "step": 43395 + }, + { + "epoch": 2.03, + "learning_rate": 1.3356637875628988e-05, + "loss": 0.1844, + "step": 43400 + }, + { + "epoch": 2.03, + "learning_rate": 1.3355854090574203e-05, + "loss": 0.2091, + "step": 43405 + }, + { + "epoch": 2.03, + "learning_rate": 1.3355070305519415e-05, + "loss": 0.2798, + "step": 43410 + }, + { + "epoch": 2.03, + "learning_rate": 1.335428652046463e-05, + "loss": 0.2263, + "step": 43415 + }, + { + "epoch": 2.03, + "learning_rate": 1.3353502735409842e-05, + "loss": 0.0277, + "step": 43420 + }, + { + "epoch": 2.03, + "learning_rate": 1.3352718950355055e-05, + "loss": 0.0383, + "step": 43425 + }, + { + "epoch": 2.03, + "learning_rate": 1.335193516530027e-05, + "loss": 0.0466, + "step": 43430 + }, + { + "epoch": 2.03, + "learning_rate": 1.3351151380245482e-05, + "loss": 0.0454, + "step": 43435 + }, + { + "epoch": 2.03, + "learning_rate": 1.3350367595190697e-05, + "loss": 0.1619, + "step": 43440 + }, + { + "epoch": 2.03, + "learning_rate": 1.3349740567146868e-05, + "loss": 0.0808, + "step": 43445 + }, + { + "epoch": 2.03, + "learning_rate": 1.334895678209208e-05, + "loss": 0.1816, + "step": 43450 + }, + { + "epoch": 2.03, + "learning_rate": 1.3348172997037294e-05, + "loss": 0.2489, + "step": 43455 + }, + { + "epoch": 2.03, + "learning_rate": 1.3347389211982506e-05, + "loss": 0.2021, + "step": 43460 + }, + { + "epoch": 2.03, + "learning_rate": 1.3346762183938678e-05, + "loss": 0.3291, + "step": 43465 + }, + { + "epoch": 2.03, + "learning_rate": 1.3345978398883892e-05, + "loss": 0.0624, + "step": 43470 + }, + { + "epoch": 2.03, + "learning_rate": 1.3345194613829104e-05, + "loss": 0.0649, + "step": 43475 + }, + { + "epoch": 2.03, + "learning_rate": 1.334441082877432e-05, + "loss": 0.093, + "step": 43480 + }, + { + "epoch": 2.03, + "learning_rate": 1.3343627043719532e-05, + "loss": 0.124, + "step": 43485 + }, + { + "epoch": 2.03, + "learning_rate": 1.3342843258664744e-05, + "loss": 0.0791, + "step": 43490 + }, + { + "epoch": 2.03, + "learning_rate": 1.3342059473609958e-05, + "loss": 0.1554, + "step": 43495 + }, + { + "epoch": 2.03, + "learning_rate": 1.334127568855517e-05, + "loss": 0.1531, + "step": 43500 + }, + { + "epoch": 2.03, + "learning_rate": 1.3340491903500386e-05, + "loss": 0.1726, + "step": 43505 + }, + { + "epoch": 2.03, + "learning_rate": 1.3339708118445598e-05, + "loss": 0.3568, + "step": 43510 + }, + { + "epoch": 2.03, + "learning_rate": 1.3338924333390812e-05, + "loss": 0.3171, + "step": 43515 + }, + { + "epoch": 2.03, + "learning_rate": 1.3338140548336024e-05, + "loss": 0.0196, + "step": 43520 + }, + { + "epoch": 2.03, + "learning_rate": 1.333735676328124e-05, + "loss": 0.0832, + "step": 43525 + }, + { + "epoch": 2.03, + "learning_rate": 1.3336572978226452e-05, + "loss": 0.0747, + "step": 43530 + }, + { + "epoch": 2.03, + "learning_rate": 1.3335789193171666e-05, + "loss": 0.0519, + "step": 43535 + }, + { + "epoch": 2.03, + "learning_rate": 1.3335005408116878e-05, + "loss": 0.1477, + "step": 43540 + }, + { + "epoch": 2.03, + "learning_rate": 1.3334221623062094e-05, + "loss": 0.231, + "step": 43545 + }, + { + "epoch": 2.03, + "learning_rate": 1.3333437838007306e-05, + "loss": 0.2418, + "step": 43550 + }, + { + "epoch": 2.03, + "learning_rate": 1.3332654052952518e-05, + "loss": 0.2191, + "step": 43555 + }, + { + "epoch": 2.03, + "learning_rate": 1.3331870267897732e-05, + "loss": 0.396, + "step": 43560 + }, + { + "epoch": 2.03, + "learning_rate": 1.3331086482842946e-05, + "loss": 0.3743, + "step": 43565 + }, + { + "epoch": 2.03, + "learning_rate": 1.333030269778816e-05, + "loss": 0.0503, + "step": 43570 + }, + { + "epoch": 2.03, + "learning_rate": 1.3329518912733372e-05, + "loss": 0.0244, + "step": 43575 + }, + { + "epoch": 2.03, + "learning_rate": 1.3328735127678588e-05, + "loss": 0.0543, + "step": 43580 + }, + { + "epoch": 2.03, + "learning_rate": 1.33279513426238e-05, + "loss": 0.0519, + "step": 43585 + }, + { + "epoch": 2.03, + "learning_rate": 1.3327167557569014e-05, + "loss": 0.0916, + "step": 43590 + }, + { + "epoch": 2.03, + "learning_rate": 1.3326383772514226e-05, + "loss": 0.167, + "step": 43595 + }, + { + "epoch": 2.03, + "learning_rate": 1.3325599987459442e-05, + "loss": 0.1703, + "step": 43600 + }, + { + "epoch": 2.03, + "learning_rate": 1.3324816202404654e-05, + "loss": 0.1012, + "step": 43605 + }, + { + "epoch": 2.03, + "learning_rate": 1.3324032417349868e-05, + "loss": 0.1885, + "step": 43610 + }, + { + "epoch": 2.04, + "learning_rate": 1.332324863229508e-05, + "loss": 0.2534, + "step": 43615 + }, + { + "epoch": 2.04, + "learning_rate": 1.3322464847240292e-05, + "loss": 0.0711, + "step": 43620 + }, + { + "epoch": 2.04, + "learning_rate": 1.3321681062185508e-05, + "loss": 0.0885, + "step": 43625 + }, + { + "epoch": 2.04, + "learning_rate": 1.332089727713072e-05, + "loss": 0.0919, + "step": 43630 + }, + { + "epoch": 2.04, + "learning_rate": 1.3320113492075934e-05, + "loss": 0.0972, + "step": 43635 + }, + { + "epoch": 2.04, + "learning_rate": 1.3319329707021146e-05, + "loss": 0.0946, + "step": 43640 + }, + { + "epoch": 2.04, + "learning_rate": 1.3318545921966362e-05, + "loss": 0.0836, + "step": 43645 + }, + { + "epoch": 2.04, + "learning_rate": 1.3317762136911574e-05, + "loss": 0.1282, + "step": 43650 + }, + { + "epoch": 2.04, + "learning_rate": 1.3316978351856788e-05, + "loss": 0.1321, + "step": 43655 + }, + { + "epoch": 2.04, + "learning_rate": 1.3316194566802002e-05, + "loss": 0.266, + "step": 43660 + }, + { + "epoch": 2.04, + "learning_rate": 1.3315410781747216e-05, + "loss": 0.3727, + "step": 43665 + }, + { + "epoch": 2.04, + "learning_rate": 1.3314626996692428e-05, + "loss": 0.0841, + "step": 43670 + }, + { + "epoch": 2.04, + "learning_rate": 1.3313843211637642e-05, + "loss": 0.0567, + "step": 43675 + }, + { + "epoch": 2.04, + "learning_rate": 1.3313059426582856e-05, + "loss": 0.0792, + "step": 43680 + }, + { + "epoch": 2.04, + "learning_rate": 1.3312275641528068e-05, + "loss": 0.0857, + "step": 43685 + }, + { + "epoch": 2.04, + "learning_rate": 1.3311491856473282e-05, + "loss": 0.0897, + "step": 43690 + }, + { + "epoch": 2.04, + "learning_rate": 1.3310708071418494e-05, + "loss": 0.1323, + "step": 43695 + }, + { + "epoch": 2.04, + "learning_rate": 1.330992428636371e-05, + "loss": 0.3455, + "step": 43700 + }, + { + "epoch": 2.04, + "learning_rate": 1.3309140501308922e-05, + "loss": 0.1129, + "step": 43705 + }, + { + "epoch": 2.04, + "learning_rate": 1.3308356716254136e-05, + "loss": 0.2136, + "step": 43710 + }, + { + "epoch": 2.04, + "learning_rate": 1.3307572931199348e-05, + "loss": 0.3236, + "step": 43715 + }, + { + "epoch": 2.04, + "learning_rate": 1.3306789146144564e-05, + "loss": 0.0699, + "step": 43720 + }, + { + "epoch": 2.04, + "learning_rate": 1.3306005361089776e-05, + "loss": 0.063, + "step": 43725 + }, + { + "epoch": 2.04, + "learning_rate": 1.330522157603499e-05, + "loss": 0.0609, + "step": 43730 + }, + { + "epoch": 2.04, + "learning_rate": 1.3304437790980202e-05, + "loss": 0.0902, + "step": 43735 + }, + { + "epoch": 2.04, + "learning_rate": 1.3303654005925417e-05, + "loss": 0.0967, + "step": 43740 + }, + { + "epoch": 2.04, + "learning_rate": 1.330287022087063e-05, + "loss": 0.1716, + "step": 43745 + }, + { + "epoch": 2.04, + "learning_rate": 1.3302086435815842e-05, + "loss": 0.117, + "step": 43750 + }, + { + "epoch": 2.04, + "learning_rate": 1.3301302650761056e-05, + "loss": 0.2584, + "step": 43755 + }, + { + "epoch": 2.04, + "learning_rate": 1.330051886570627e-05, + "loss": 0.3534, + "step": 43760 + }, + { + "epoch": 2.04, + "learning_rate": 1.3299735080651484e-05, + "loss": 0.2967, + "step": 43765 + }, + { + "epoch": 2.04, + "learning_rate": 1.3298951295596696e-05, + "loss": 0.0437, + "step": 43770 + }, + { + "epoch": 2.04, + "learning_rate": 1.329816751054191e-05, + "loss": 0.0362, + "step": 43775 + }, + { + "epoch": 2.04, + "learning_rate": 1.3297383725487124e-05, + "loss": 0.0681, + "step": 43780 + }, + { + "epoch": 2.04, + "learning_rate": 1.3296599940432338e-05, + "loss": 0.0636, + "step": 43785 + }, + { + "epoch": 2.04, + "learning_rate": 1.329581615537755e-05, + "loss": 0.1009, + "step": 43790 + }, + { + "epoch": 2.04, + "learning_rate": 1.3295032370322765e-05, + "loss": 0.1697, + "step": 43795 + }, + { + "epoch": 2.04, + "learning_rate": 1.3294248585267978e-05, + "loss": 0.1545, + "step": 43800 + }, + { + "epoch": 2.04, + "learning_rate": 1.3293464800213191e-05, + "loss": 0.1749, + "step": 43805 + }, + { + "epoch": 2.04, + "learning_rate": 1.3292681015158404e-05, + "loss": 0.2246, + "step": 43810 + }, + { + "epoch": 2.04, + "learning_rate": 1.3291897230103616e-05, + "loss": 0.2266, + "step": 43815 + }, + { + "epoch": 2.04, + "learning_rate": 1.3291113445048832e-05, + "loss": 0.0738, + "step": 43820 + }, + { + "epoch": 2.04, + "learning_rate": 1.3290329659994044e-05, + "loss": 0.0333, + "step": 43825 + }, + { + "epoch": 2.05, + "learning_rate": 1.3289545874939258e-05, + "loss": 0.0439, + "step": 43830 + }, + { + "epoch": 2.05, + "learning_rate": 1.328876208988447e-05, + "loss": 0.0997, + "step": 43835 + }, + { + "epoch": 2.05, + "learning_rate": 1.3287978304829685e-05, + "loss": 0.0818, + "step": 43840 + }, + { + "epoch": 2.05, + "learning_rate": 1.3287194519774898e-05, + "loss": 0.0876, + "step": 43845 + }, + { + "epoch": 2.05, + "learning_rate": 1.3286410734720112e-05, + "loss": 0.1397, + "step": 43850 + }, + { + "epoch": 2.05, + "learning_rate": 1.3285626949665324e-05, + "loss": 0.1035, + "step": 43855 + }, + { + "epoch": 2.05, + "learning_rate": 1.328484316461054e-05, + "loss": 0.3954, + "step": 43860 + }, + { + "epoch": 2.05, + "learning_rate": 1.3284059379555752e-05, + "loss": 0.4025, + "step": 43865 + }, + { + "epoch": 2.05, + "learning_rate": 1.3283275594500965e-05, + "loss": 0.0292, + "step": 43870 + }, + { + "epoch": 2.05, + "learning_rate": 1.328249180944618e-05, + "loss": 0.091, + "step": 43875 + }, + { + "epoch": 2.05, + "learning_rate": 1.3281708024391392e-05, + "loss": 0.0692, + "step": 43880 + }, + { + "epoch": 2.05, + "learning_rate": 1.3280924239336606e-05, + "loss": 0.0826, + "step": 43885 + }, + { + "epoch": 2.05, + "learning_rate": 1.3280140454281818e-05, + "loss": 0.1142, + "step": 43890 + }, + { + "epoch": 2.05, + "learning_rate": 1.3279356669227033e-05, + "loss": 0.1234, + "step": 43895 + }, + { + "epoch": 2.05, + "learning_rate": 1.3278572884172246e-05, + "loss": 0.1834, + "step": 43900 + }, + { + "epoch": 2.05, + "learning_rate": 1.327778909911746e-05, + "loss": 0.227, + "step": 43905 + }, + { + "epoch": 2.05, + "learning_rate": 1.3277005314062672e-05, + "loss": 0.336, + "step": 43910 + }, + { + "epoch": 2.05, + "learning_rate": 1.3276221529007887e-05, + "loss": 0.2137, + "step": 43915 + }, + { + "epoch": 2.05, + "learning_rate": 1.32754377439531e-05, + "loss": 0.08, + "step": 43920 + }, + { + "epoch": 2.05, + "learning_rate": 1.3274653958898313e-05, + "loss": 0.0813, + "step": 43925 + }, + { + "epoch": 2.05, + "learning_rate": 1.3273870173843526e-05, + "loss": 0.0463, + "step": 43930 + }, + { + "epoch": 2.05, + "learning_rate": 1.3273086388788741e-05, + "loss": 0.0811, + "step": 43935 + }, + { + "epoch": 2.05, + "learning_rate": 1.3272302603733953e-05, + "loss": 0.0861, + "step": 43940 + }, + { + "epoch": 2.05, + "learning_rate": 1.3271518818679166e-05, + "loss": 0.1037, + "step": 43945 + }, + { + "epoch": 2.05, + "learning_rate": 1.327073503362438e-05, + "loss": 0.2025, + "step": 43950 + }, + { + "epoch": 2.05, + "learning_rate": 1.3269951248569592e-05, + "loss": 0.2942, + "step": 43955 + }, + { + "epoch": 2.05, + "learning_rate": 1.3269167463514807e-05, + "loss": 0.1693, + "step": 43960 + }, + { + "epoch": 2.05, + "learning_rate": 1.326838367846002e-05, + "loss": 0.269, + "step": 43965 + }, + { + "epoch": 2.05, + "learning_rate": 1.3267599893405233e-05, + "loss": 0.1356, + "step": 43970 + }, + { + "epoch": 2.05, + "learning_rate": 1.3266816108350447e-05, + "loss": 0.016, + "step": 43975 + }, + { + "epoch": 2.05, + "learning_rate": 1.3266032323295661e-05, + "loss": 0.0608, + "step": 43980 + }, + { + "epoch": 2.05, + "learning_rate": 1.3265248538240873e-05, + "loss": 0.0614, + "step": 43985 + }, + { + "epoch": 2.05, + "learning_rate": 1.3264464753186087e-05, + "loss": 0.0645, + "step": 43990 + }, + { + "epoch": 2.05, + "learning_rate": 1.3263680968131301e-05, + "loss": 0.1768, + "step": 43995 + }, + { + "epoch": 2.05, + "learning_rate": 1.3262897183076515e-05, + "loss": 0.0816, + "step": 44000 + }, + { + "epoch": 2.05, + "learning_rate": 1.3262113398021727e-05, + "loss": 0.121, + "step": 44005 + }, + { + "epoch": 2.05, + "learning_rate": 1.326132961296694e-05, + "loss": 0.1856, + "step": 44010 + }, + { + "epoch": 2.05, + "learning_rate": 1.3260545827912155e-05, + "loss": 0.2582, + "step": 44015 + }, + { + "epoch": 2.05, + "learning_rate": 1.3259762042857367e-05, + "loss": 0.0342, + "step": 44020 + }, + { + "epoch": 2.05, + "learning_rate": 1.3258978257802581e-05, + "loss": 0.0482, + "step": 44025 + }, + { + "epoch": 2.05, + "learning_rate": 1.3258194472747794e-05, + "loss": 0.066, + "step": 44030 + }, + { + "epoch": 2.05, + "learning_rate": 1.3257410687693009e-05, + "loss": 0.1002, + "step": 44035 + }, + { + "epoch": 2.05, + "learning_rate": 1.3256626902638221e-05, + "loss": 0.0774, + "step": 44040 + }, + { + "epoch": 2.06, + "learning_rate": 1.3255843117583435e-05, + "loss": 0.2158, + "step": 44045 + }, + { + "epoch": 2.06, + "learning_rate": 1.3255059332528647e-05, + "loss": 0.1574, + "step": 44050 + }, + { + "epoch": 2.06, + "learning_rate": 1.3254275547473863e-05, + "loss": 0.2037, + "step": 44055 + }, + { + "epoch": 2.06, + "learning_rate": 1.3253491762419075e-05, + "loss": 0.3914, + "step": 44060 + }, + { + "epoch": 2.06, + "learning_rate": 1.325270797736429e-05, + "loss": 0.299, + "step": 44065 + }, + { + "epoch": 2.06, + "learning_rate": 1.3251924192309501e-05, + "loss": 0.0783, + "step": 44070 + }, + { + "epoch": 2.06, + "learning_rate": 1.3251140407254715e-05, + "loss": 0.0358, + "step": 44075 + }, + { + "epoch": 2.06, + "learning_rate": 1.325035662219993e-05, + "loss": 0.0511, + "step": 44080 + }, + { + "epoch": 2.06, + "learning_rate": 1.3249572837145141e-05, + "loss": 0.0737, + "step": 44085 + }, + { + "epoch": 2.06, + "learning_rate": 1.3248789052090355e-05, + "loss": 0.0319, + "step": 44090 + }, + { + "epoch": 2.06, + "learning_rate": 1.324800526703557e-05, + "loss": 0.0438, + "step": 44095 + }, + { + "epoch": 2.06, + "learning_rate": 1.3247221481980783e-05, + "loss": 0.1289, + "step": 44100 + }, + { + "epoch": 2.06, + "learning_rate": 1.3246437696925995e-05, + "loss": 0.2054, + "step": 44105 + }, + { + "epoch": 2.06, + "learning_rate": 1.3245653911871211e-05, + "loss": 0.2511, + "step": 44110 + }, + { + "epoch": 2.06, + "learning_rate": 1.3244870126816423e-05, + "loss": 0.3578, + "step": 44115 + }, + { + "epoch": 2.06, + "learning_rate": 1.3244086341761637e-05, + "loss": 0.0513, + "step": 44120 + }, + { + "epoch": 2.06, + "learning_rate": 1.324330255670685e-05, + "loss": 0.0485, + "step": 44125 + }, + { + "epoch": 2.06, + "learning_rate": 1.3242518771652065e-05, + "loss": 0.0457, + "step": 44130 + }, + { + "epoch": 2.06, + "learning_rate": 1.3241734986597277e-05, + "loss": 0.0965, + "step": 44135 + }, + { + "epoch": 2.06, + "learning_rate": 1.324095120154249e-05, + "loss": 0.0919, + "step": 44140 + }, + { + "epoch": 2.06, + "learning_rate": 1.3240167416487703e-05, + "loss": 0.1236, + "step": 44145 + }, + { + "epoch": 2.06, + "learning_rate": 1.3239383631432915e-05, + "loss": 0.1037, + "step": 44150 + }, + { + "epoch": 2.06, + "learning_rate": 1.3238599846378131e-05, + "loss": 0.1805, + "step": 44155 + }, + { + "epoch": 2.06, + "learning_rate": 1.3237816061323343e-05, + "loss": 0.2902, + "step": 44160 + }, + { + "epoch": 2.06, + "learning_rate": 1.3237032276268557e-05, + "loss": 0.2851, + "step": 44165 + }, + { + "epoch": 2.06, + "learning_rate": 1.323624849121377e-05, + "loss": 0.0588, + "step": 44170 + }, + { + "epoch": 2.06, + "learning_rate": 1.3235464706158985e-05, + "loss": 0.1677, + "step": 44175 + }, + { + "epoch": 2.06, + "learning_rate": 1.3234680921104197e-05, + "loss": 0.0701, + "step": 44180 + }, + { + "epoch": 2.06, + "learning_rate": 1.3233897136049411e-05, + "loss": 0.0714, + "step": 44185 + }, + { + "epoch": 2.06, + "learning_rate": 1.3233113350994625e-05, + "loss": 0.0526, + "step": 44190 + }, + { + "epoch": 2.06, + "learning_rate": 1.3232329565939839e-05, + "loss": 0.0875, + "step": 44195 + }, + { + "epoch": 2.06, + "learning_rate": 1.3231545780885051e-05, + "loss": 0.1067, + "step": 44200 + }, + { + "epoch": 2.06, + "learning_rate": 1.3230761995830263e-05, + "loss": 0.1859, + "step": 44205 + }, + { + "epoch": 2.06, + "learning_rate": 1.3229978210775479e-05, + "loss": 0.2862, + "step": 44210 + }, + { + "epoch": 2.06, + "learning_rate": 1.3229194425720691e-05, + "loss": 0.3919, + "step": 44215 + }, + { + "epoch": 2.06, + "learning_rate": 1.3228410640665905e-05, + "loss": 0.0165, + "step": 44220 + }, + { + "epoch": 2.06, + "learning_rate": 1.3227626855611117e-05, + "loss": 0.0282, + "step": 44225 + }, + { + "epoch": 2.06, + "learning_rate": 1.3226843070556333e-05, + "loss": 0.1177, + "step": 44230 + }, + { + "epoch": 2.06, + "learning_rate": 1.3226059285501545e-05, + "loss": 0.0937, + "step": 44235 + }, + { + "epoch": 2.06, + "learning_rate": 1.3225275500446759e-05, + "loss": 0.1239, + "step": 44240 + }, + { + "epoch": 2.06, + "learning_rate": 1.3224491715391971e-05, + "loss": 0.1821, + "step": 44245 + }, + { + "epoch": 2.06, + "learning_rate": 1.3223707930337187e-05, + "loss": 0.205, + "step": 44250 + }, + { + "epoch": 2.06, + "learning_rate": 1.3222924145282399e-05, + "loss": 0.1652, + "step": 44255 + }, + { + "epoch": 2.07, + "learning_rate": 1.3222140360227613e-05, + "loss": 0.1785, + "step": 44260 + }, + { + "epoch": 2.07, + "learning_rate": 1.3221356575172825e-05, + "loss": 0.2491, + "step": 44265 + }, + { + "epoch": 2.07, + "learning_rate": 1.3220572790118037e-05, + "loss": 0.026, + "step": 44270 + }, + { + "epoch": 2.07, + "learning_rate": 1.3219789005063253e-05, + "loss": 0.0235, + "step": 44275 + }, + { + "epoch": 2.07, + "learning_rate": 1.3219005220008465e-05, + "loss": 0.0774, + "step": 44280 + }, + { + "epoch": 2.07, + "learning_rate": 1.3218221434953679e-05, + "loss": 0.08, + "step": 44285 + }, + { + "epoch": 2.07, + "learning_rate": 1.3217437649898893e-05, + "loss": 0.0742, + "step": 44290 + }, + { + "epoch": 2.07, + "learning_rate": 1.3216653864844107e-05, + "loss": 0.1891, + "step": 44295 + }, + { + "epoch": 2.07, + "learning_rate": 1.3215870079789319e-05, + "loss": 0.1566, + "step": 44300 + }, + { + "epoch": 2.07, + "learning_rate": 1.3215086294734533e-05, + "loss": 0.2266, + "step": 44305 + }, + { + "epoch": 2.07, + "learning_rate": 1.3214302509679747e-05, + "loss": 0.2815, + "step": 44310 + }, + { + "epoch": 2.07, + "learning_rate": 1.321351872462496e-05, + "loss": 0.2934, + "step": 44315 + }, + { + "epoch": 2.07, + "learning_rate": 1.3212734939570173e-05, + "loss": 0.0386, + "step": 44320 + }, + { + "epoch": 2.07, + "learning_rate": 1.3211951154515389e-05, + "loss": 0.079, + "step": 44325 + }, + { + "epoch": 2.07, + "learning_rate": 1.32111673694606e-05, + "loss": 0.0483, + "step": 44330 + }, + { + "epoch": 2.07, + "learning_rate": 1.3210383584405813e-05, + "loss": 0.12, + "step": 44335 + }, + { + "epoch": 2.07, + "learning_rate": 1.3209599799351027e-05, + "loss": 0.1331, + "step": 44340 + }, + { + "epoch": 2.07, + "learning_rate": 1.3208816014296239e-05, + "loss": 0.1102, + "step": 44345 + }, + { + "epoch": 2.07, + "learning_rate": 1.3208032229241455e-05, + "loss": 0.116, + "step": 44350 + }, + { + "epoch": 2.07, + "learning_rate": 1.3207248444186667e-05, + "loss": 0.1937, + "step": 44355 + }, + { + "epoch": 2.07, + "learning_rate": 1.320646465913188e-05, + "loss": 0.2692, + "step": 44360 + }, + { + "epoch": 2.07, + "learning_rate": 1.3205837631088051e-05, + "loss": 0.3699, + "step": 44365 + }, + { + "epoch": 2.07, + "learning_rate": 1.3205053846033265e-05, + "loss": 0.0153, + "step": 44370 + }, + { + "epoch": 2.07, + "learning_rate": 1.3204270060978479e-05, + "loss": 0.0688, + "step": 44375 + }, + { + "epoch": 2.07, + "learning_rate": 1.3203486275923691e-05, + "loss": 0.0807, + "step": 44380 + }, + { + "epoch": 2.07, + "learning_rate": 1.3202702490868907e-05, + "loss": 0.0753, + "step": 44385 + }, + { + "epoch": 2.07, + "learning_rate": 1.3201918705814119e-05, + "loss": 0.083, + "step": 44390 + }, + { + "epoch": 2.07, + "learning_rate": 1.3201134920759333e-05, + "loss": 0.165, + "step": 44395 + }, + { + "epoch": 2.07, + "learning_rate": 1.3200351135704545e-05, + "loss": 0.169, + "step": 44400 + }, + { + "epoch": 2.07, + "learning_rate": 1.3199567350649757e-05, + "loss": 0.2277, + "step": 44405 + }, + { + "epoch": 2.07, + "learning_rate": 1.3198783565594973e-05, + "loss": 0.3473, + "step": 44410 + }, + { + "epoch": 2.07, + "learning_rate": 1.3197999780540185e-05, + "loss": 0.2985, + "step": 44415 + }, + { + "epoch": 2.07, + "learning_rate": 1.3197215995485399e-05, + "loss": 0.0606, + "step": 44420 + }, + { + "epoch": 2.07, + "learning_rate": 1.3196432210430611e-05, + "loss": 0.0483, + "step": 44425 + }, + { + "epoch": 2.07, + "learning_rate": 1.3195648425375827e-05, + "loss": 0.0787, + "step": 44430 + }, + { + "epoch": 2.07, + "learning_rate": 1.319486464032104e-05, + "loss": 0.1182, + "step": 44435 + }, + { + "epoch": 2.07, + "learning_rate": 1.3194080855266253e-05, + "loss": 0.152, + "step": 44440 + }, + { + "epoch": 2.07, + "learning_rate": 1.3193297070211465e-05, + "loss": 0.1204, + "step": 44445 + }, + { + "epoch": 2.07, + "learning_rate": 1.3192513285156681e-05, + "loss": 0.0873, + "step": 44450 + }, + { + "epoch": 2.07, + "learning_rate": 1.3191729500101893e-05, + "loss": 0.1337, + "step": 44455 + }, + { + "epoch": 2.07, + "learning_rate": 1.3190945715047107e-05, + "loss": 0.2727, + "step": 44460 + }, + { + "epoch": 2.07, + "learning_rate": 1.319016192999232e-05, + "loss": 0.2811, + "step": 44465 + }, + { + "epoch": 2.08, + "learning_rate": 1.3189378144937533e-05, + "loss": 0.0753, + "step": 44470 + }, + { + "epoch": 2.08, + "learning_rate": 1.3188594359882747e-05, + "loss": 0.0589, + "step": 44475 + }, + { + "epoch": 2.08, + "learning_rate": 1.318781057482796e-05, + "loss": 0.1141, + "step": 44480 + }, + { + "epoch": 2.08, + "learning_rate": 1.3187026789773175e-05, + "loss": 0.0495, + "step": 44485 + }, + { + "epoch": 2.08, + "learning_rate": 1.3186243004718387e-05, + "loss": 0.0716, + "step": 44490 + }, + { + "epoch": 2.08, + "learning_rate": 1.3185459219663601e-05, + "loss": 0.1369, + "step": 44495 + }, + { + "epoch": 2.08, + "learning_rate": 1.3184675434608813e-05, + "loss": 0.1567, + "step": 44500 + }, + { + "epoch": 2.08, + "learning_rate": 1.3183891649554029e-05, + "loss": 0.1988, + "step": 44505 + }, + { + "epoch": 2.08, + "learning_rate": 1.3183107864499241e-05, + "loss": 0.3542, + "step": 44510 + }, + { + "epoch": 2.08, + "learning_rate": 1.3182324079444455e-05, + "loss": 0.3402, + "step": 44515 + }, + { + "epoch": 2.08, + "learning_rate": 1.3181540294389667e-05, + "loss": 0.0784, + "step": 44520 + }, + { + "epoch": 2.08, + "learning_rate": 1.3180756509334883e-05, + "loss": 0.0768, + "step": 44525 + }, + { + "epoch": 2.08, + "learning_rate": 1.3179972724280095e-05, + "loss": 0.0386, + "step": 44530 + }, + { + "epoch": 2.08, + "learning_rate": 1.3179188939225307e-05, + "loss": 0.0284, + "step": 44535 + }, + { + "epoch": 2.08, + "learning_rate": 1.3178405154170521e-05, + "loss": 0.0668, + "step": 44540 + }, + { + "epoch": 2.08, + "learning_rate": 1.3177621369115733e-05, + "loss": 0.1517, + "step": 44545 + }, + { + "epoch": 2.08, + "learning_rate": 1.3176837584060949e-05, + "loss": 0.1934, + "step": 44550 + }, + { + "epoch": 2.08, + "learning_rate": 1.3176053799006161e-05, + "loss": 0.321, + "step": 44555 + }, + { + "epoch": 2.08, + "learning_rate": 1.3175270013951375e-05, + "loss": 0.3407, + "step": 44560 + }, + { + "epoch": 2.08, + "learning_rate": 1.3174486228896587e-05, + "loss": 0.2514, + "step": 44565 + }, + { + "epoch": 2.08, + "learning_rate": 1.3173702443841803e-05, + "loss": 0.041, + "step": 44570 + }, + { + "epoch": 2.08, + "learning_rate": 1.3172918658787015e-05, + "loss": 0.0766, + "step": 44575 + }, + { + "epoch": 2.08, + "learning_rate": 1.3172134873732229e-05, + "loss": 0.0465, + "step": 44580 + }, + { + "epoch": 2.08, + "learning_rate": 1.3171351088677443e-05, + "loss": 0.1087, + "step": 44585 + }, + { + "epoch": 2.08, + "learning_rate": 1.3170567303622657e-05, + "loss": 0.1005, + "step": 44590 + }, + { + "epoch": 2.08, + "learning_rate": 1.3169783518567869e-05, + "loss": 0.1296, + "step": 44595 + }, + { + "epoch": 2.08, + "learning_rate": 1.3168999733513081e-05, + "loss": 0.1509, + "step": 44600 + }, + { + "epoch": 2.08, + "learning_rate": 1.3168215948458297e-05, + "loss": 0.1615, + "step": 44605 + }, + { + "epoch": 2.08, + "learning_rate": 1.3167432163403509e-05, + "loss": 0.1695, + "step": 44610 + }, + { + "epoch": 2.08, + "learning_rate": 1.3166648378348723e-05, + "loss": 0.2675, + "step": 44615 + }, + { + "epoch": 2.08, + "learning_rate": 1.3165864593293935e-05, + "loss": 0.0326, + "step": 44620 + }, + { + "epoch": 2.08, + "learning_rate": 1.316508080823915e-05, + "loss": 0.065, + "step": 44625 + }, + { + "epoch": 2.08, + "learning_rate": 1.3164297023184363e-05, + "loss": 0.0504, + "step": 44630 + }, + { + "epoch": 2.08, + "learning_rate": 1.3163513238129577e-05, + "loss": 0.0747, + "step": 44635 + }, + { + "epoch": 2.08, + "learning_rate": 1.3162729453074789e-05, + "loss": 0.064, + "step": 44640 + }, + { + "epoch": 2.08, + "learning_rate": 1.3161945668020005e-05, + "loss": 0.1414, + "step": 44645 + }, + { + "epoch": 2.08, + "learning_rate": 1.3161161882965217e-05, + "loss": 0.2267, + "step": 44650 + }, + { + "epoch": 2.08, + "learning_rate": 1.316037809791043e-05, + "loss": 0.1493, + "step": 44655 + }, + { + "epoch": 2.08, + "learning_rate": 1.3159594312855643e-05, + "loss": 0.3172, + "step": 44660 + }, + { + "epoch": 2.08, + "learning_rate": 1.3158810527800857e-05, + "loss": 0.2474, + "step": 44665 + }, + { + "epoch": 2.08, + "learning_rate": 1.315802674274607e-05, + "loss": 0.0665, + "step": 44670 + }, + { + "epoch": 2.08, + "learning_rate": 1.3157242957691283e-05, + "loss": 0.0464, + "step": 44675 + }, + { + "epoch": 2.08, + "learning_rate": 1.3156459172636497e-05, + "loss": 0.0733, + "step": 44680 + }, + { + "epoch": 2.09, + "learning_rate": 1.315567538758171e-05, + "loss": 0.1709, + "step": 44685 + }, + { + "epoch": 2.09, + "learning_rate": 1.3154891602526925e-05, + "loss": 0.0666, + "step": 44690 + }, + { + "epoch": 2.09, + "learning_rate": 1.3154107817472137e-05, + "loss": 0.0916, + "step": 44695 + }, + { + "epoch": 2.09, + "learning_rate": 1.3153324032417352e-05, + "loss": 0.0939, + "step": 44700 + }, + { + "epoch": 2.09, + "learning_rate": 1.3152540247362565e-05, + "loss": 0.155, + "step": 44705 + }, + { + "epoch": 2.09, + "learning_rate": 1.3151756462307779e-05, + "loss": 0.2603, + "step": 44710 + }, + { + "epoch": 2.09, + "learning_rate": 1.315097267725299e-05, + "loss": 0.2802, + "step": 44715 + }, + { + "epoch": 2.09, + "learning_rate": 1.3150188892198206e-05, + "loss": 0.0517, + "step": 44720 + }, + { + "epoch": 2.09, + "learning_rate": 1.3149405107143419e-05, + "loss": 0.0623, + "step": 44725 + }, + { + "epoch": 2.09, + "learning_rate": 1.314862132208863e-05, + "loss": 0.0714, + "step": 44730 + }, + { + "epoch": 2.09, + "learning_rate": 1.3147837537033845e-05, + "loss": 0.0585, + "step": 44735 + }, + { + "epoch": 2.09, + "learning_rate": 1.3147053751979057e-05, + "loss": 0.1013, + "step": 44740 + }, + { + "epoch": 2.09, + "learning_rate": 1.3146269966924273e-05, + "loss": 0.0922, + "step": 44745 + }, + { + "epoch": 2.09, + "learning_rate": 1.3145486181869485e-05, + "loss": 0.1085, + "step": 44750 + }, + { + "epoch": 2.09, + "learning_rate": 1.3144702396814699e-05, + "loss": 0.1422, + "step": 44755 + }, + { + "epoch": 2.09, + "learning_rate": 1.3143918611759911e-05, + "loss": 0.2372, + "step": 44760 + }, + { + "epoch": 2.09, + "learning_rate": 1.3143134826705126e-05, + "loss": 0.3727, + "step": 44765 + }, + { + "epoch": 2.09, + "learning_rate": 1.3142351041650339e-05, + "loss": 0.1013, + "step": 44770 + }, + { + "epoch": 2.09, + "learning_rate": 1.3141567256595553e-05, + "loss": 0.0361, + "step": 44775 + }, + { + "epoch": 2.09, + "learning_rate": 1.3140783471540765e-05, + "loss": 0.088, + "step": 44780 + }, + { + "epoch": 2.09, + "learning_rate": 1.313999968648598e-05, + "loss": 0.0731, + "step": 44785 + }, + { + "epoch": 2.09, + "learning_rate": 1.3139215901431193e-05, + "loss": 0.0661, + "step": 44790 + }, + { + "epoch": 2.09, + "learning_rate": 1.3138432116376405e-05, + "loss": 0.1273, + "step": 44795 + }, + { + "epoch": 2.09, + "learning_rate": 1.313764833132162e-05, + "loss": 0.1032, + "step": 44800 + }, + { + "epoch": 2.09, + "learning_rate": 1.3136864546266833e-05, + "loss": 0.2088, + "step": 44805 + }, + { + "epoch": 2.09, + "learning_rate": 1.3136080761212047e-05, + "loss": 0.3149, + "step": 44810 + }, + { + "epoch": 2.09, + "learning_rate": 1.3135296976157259e-05, + "loss": 0.3955, + "step": 44815 + }, + { + "epoch": 2.09, + "learning_rate": 1.3134513191102474e-05, + "loss": 0.0629, + "step": 44820 + }, + { + "epoch": 2.09, + "learning_rate": 1.3133729406047687e-05, + "loss": 0.0434, + "step": 44825 + }, + { + "epoch": 2.09, + "learning_rate": 1.31329456209929e-05, + "loss": 0.1228, + "step": 44830 + }, + { + "epoch": 2.09, + "learning_rate": 1.3132161835938113e-05, + "loss": 0.0403, + "step": 44835 + }, + { + "epoch": 2.09, + "learning_rate": 1.3131378050883328e-05, + "loss": 0.0699, + "step": 44840 + }, + { + "epoch": 2.09, + "learning_rate": 1.313059426582854e-05, + "loss": 0.1441, + "step": 44845 + }, + { + "epoch": 2.09, + "learning_rate": 1.3129810480773754e-05, + "loss": 0.1262, + "step": 44850 + }, + { + "epoch": 2.09, + "learning_rate": 1.3129026695718967e-05, + "loss": 0.1689, + "step": 44855 + }, + { + "epoch": 2.09, + "learning_rate": 1.3128242910664179e-05, + "loss": 0.2219, + "step": 44860 + }, + { + "epoch": 2.09, + "learning_rate": 1.3127459125609394e-05, + "loss": 0.3262, + "step": 44865 + }, + { + "epoch": 2.09, + "learning_rate": 1.3126675340554607e-05, + "loss": 0.0307, + "step": 44870 + }, + { + "epoch": 2.09, + "learning_rate": 1.312589155549982e-05, + "loss": 0.0441, + "step": 44875 + }, + { + "epoch": 2.09, + "learning_rate": 1.3125107770445034e-05, + "loss": 0.0356, + "step": 44880 + }, + { + "epoch": 2.09, + "learning_rate": 1.3124323985390248e-05, + "loss": 0.0829, + "step": 44885 + }, + { + "epoch": 2.09, + "learning_rate": 1.312354020033546e-05, + "loss": 0.0608, + "step": 44890 + }, + { + "epoch": 2.09, + "learning_rate": 1.3122756415280674e-05, + "loss": 0.1429, + "step": 44895 + }, + { + "epoch": 2.1, + "learning_rate": 1.3121972630225888e-05, + "loss": 0.125, + "step": 44900 + }, + { + "epoch": 2.1, + "learning_rate": 1.3121188845171102e-05, + "loss": 0.1752, + "step": 44905 + }, + { + "epoch": 2.1, + "learning_rate": 1.3120405060116314e-05, + "loss": 0.1865, + "step": 44910 + }, + { + "epoch": 2.1, + "learning_rate": 1.311962127506153e-05, + "loss": 0.358, + "step": 44915 + }, + { + "epoch": 2.1, + "learning_rate": 1.3118837490006742e-05, + "loss": 0.0342, + "step": 44920 + }, + { + "epoch": 2.1, + "learning_rate": 1.3118053704951955e-05, + "loss": 0.0759, + "step": 44925 + }, + { + "epoch": 2.1, + "learning_rate": 1.3117269919897168e-05, + "loss": 0.0653, + "step": 44930 + }, + { + "epoch": 2.1, + "learning_rate": 1.311648613484238e-05, + "loss": 0.0713, + "step": 44935 + }, + { + "epoch": 2.1, + "learning_rate": 1.3115702349787596e-05, + "loss": 0.0867, + "step": 44940 + }, + { + "epoch": 2.1, + "learning_rate": 1.3114918564732808e-05, + "loss": 0.0849, + "step": 44945 + }, + { + "epoch": 2.1, + "learning_rate": 1.3114134779678022e-05, + "loss": 0.175, + "step": 44950 + }, + { + "epoch": 2.1, + "learning_rate": 1.3113350994623235e-05, + "loss": 0.1791, + "step": 44955 + }, + { + "epoch": 2.1, + "learning_rate": 1.311256720956845e-05, + "loss": 0.1874, + "step": 44960 + }, + { + "epoch": 2.1, + "learning_rate": 1.3111783424513662e-05, + "loss": 0.2178, + "step": 44965 + }, + { + "epoch": 2.1, + "learning_rate": 1.3110999639458876e-05, + "loss": 0.0445, + "step": 44970 + }, + { + "epoch": 2.1, + "learning_rate": 1.3110215854404088e-05, + "loss": 0.0422, + "step": 44975 + }, + { + "epoch": 2.1, + "learning_rate": 1.3109432069349304e-05, + "loss": 0.0326, + "step": 44980 + }, + { + "epoch": 2.1, + "learning_rate": 1.3108648284294516e-05, + "loss": 0.0743, + "step": 44985 + }, + { + "epoch": 2.1, + "learning_rate": 1.3107864499239729e-05, + "loss": 0.1022, + "step": 44990 + }, + { + "epoch": 2.1, + "learning_rate": 1.3107080714184942e-05, + "loss": 0.1278, + "step": 44995 + }, + { + "epoch": 2.1, + "learning_rate": 1.3106296929130156e-05, + "loss": 0.1631, + "step": 45000 + }, + { + "epoch": 2.1, + "learning_rate": 1.310551314407537e-05, + "loss": 0.1475, + "step": 45005 + }, + { + "epoch": 2.1, + "learning_rate": 1.3104729359020582e-05, + "loss": 0.2213, + "step": 45010 + }, + { + "epoch": 2.1, + "learning_rate": 1.3103945573965798e-05, + "loss": 0.3505, + "step": 45015 + }, + { + "epoch": 2.1, + "learning_rate": 1.310316178891101e-05, + "loss": 0.0454, + "step": 45020 + }, + { + "epoch": 2.1, + "learning_rate": 1.3102378003856224e-05, + "loss": 0.0718, + "step": 45025 + }, + { + "epoch": 2.1, + "learning_rate": 1.3101594218801436e-05, + "loss": 0.0849, + "step": 45030 + }, + { + "epoch": 2.1, + "learning_rate": 1.3100810433746652e-05, + "loss": 0.0966, + "step": 45035 + }, + { + "epoch": 2.1, + "learning_rate": 1.3100026648691864e-05, + "loss": 0.1208, + "step": 45040 + }, + { + "epoch": 2.1, + "learning_rate": 1.3099242863637078e-05, + "loss": 0.1753, + "step": 45045 + }, + { + "epoch": 2.1, + "learning_rate": 1.309845907858229e-05, + "loss": 0.2323, + "step": 45050 + }, + { + "epoch": 2.1, + "learning_rate": 1.3097675293527503e-05, + "loss": 0.2094, + "step": 45055 + }, + { + "epoch": 2.1, + "learning_rate": 1.3096891508472718e-05, + "loss": 0.2613, + "step": 45060 + }, + { + "epoch": 2.1, + "learning_rate": 1.309610772341793e-05, + "loss": 0.3646, + "step": 45065 + }, + { + "epoch": 2.1, + "learning_rate": 1.3095323938363144e-05, + "loss": 0.0468, + "step": 45070 + }, + { + "epoch": 2.1, + "learning_rate": 1.3094540153308356e-05, + "loss": 0.0398, + "step": 45075 + }, + { + "epoch": 2.1, + "learning_rate": 1.3093756368253572e-05, + "loss": 0.0819, + "step": 45080 + }, + { + "epoch": 2.1, + "learning_rate": 1.3092972583198784e-05, + "loss": 0.0739, + "step": 45085 + }, + { + "epoch": 2.1, + "learning_rate": 1.3092188798143998e-05, + "loss": 0.1802, + "step": 45090 + }, + { + "epoch": 2.1, + "learning_rate": 1.309140501308921e-05, + "loss": 0.1384, + "step": 45095 + }, + { + "epoch": 2.1, + "learning_rate": 1.3090621228034426e-05, + "loss": 0.1445, + "step": 45100 + }, + { + "epoch": 2.1, + "learning_rate": 1.3089837442979638e-05, + "loss": 0.1752, + "step": 45105 + }, + { + "epoch": 2.1, + "learning_rate": 1.3089053657924852e-05, + "loss": 0.4362, + "step": 45110 + }, + { + "epoch": 2.11, + "learning_rate": 1.3088269872870066e-05, + "loss": 0.3223, + "step": 45115 + }, + { + "epoch": 2.11, + "learning_rate": 1.3087486087815278e-05, + "loss": 0.0303, + "step": 45120 + }, + { + "epoch": 2.11, + "learning_rate": 1.3086702302760492e-05, + "loss": 0.047, + "step": 45125 + }, + { + "epoch": 2.11, + "learning_rate": 1.3085918517705704e-05, + "loss": 0.0458, + "step": 45130 + }, + { + "epoch": 2.11, + "learning_rate": 1.308513473265092e-05, + "loss": 0.097, + "step": 45135 + }, + { + "epoch": 2.11, + "learning_rate": 1.3084350947596132e-05, + "loss": 0.1045, + "step": 45140 + }, + { + "epoch": 2.11, + "learning_rate": 1.3083567162541346e-05, + "loss": 0.1566, + "step": 45145 + }, + { + "epoch": 2.11, + "learning_rate": 1.3082783377486558e-05, + "loss": 0.098, + "step": 45150 + }, + { + "epoch": 2.11, + "learning_rate": 1.3081999592431774e-05, + "loss": 0.1235, + "step": 45155 + }, + { + "epoch": 2.11, + "learning_rate": 1.3081215807376986e-05, + "loss": 0.2617, + "step": 45160 + }, + { + "epoch": 2.11, + "learning_rate": 1.30804320223222e-05, + "loss": 0.2604, + "step": 45165 + }, + { + "epoch": 2.11, + "learning_rate": 1.3079648237267412e-05, + "loss": 0.0638, + "step": 45170 + }, + { + "epoch": 2.11, + "learning_rate": 1.3078864452212628e-05, + "loss": 0.0437, + "step": 45175 + }, + { + "epoch": 2.11, + "learning_rate": 1.307808066715784e-05, + "loss": 0.125, + "step": 45180 + }, + { + "epoch": 2.11, + "learning_rate": 1.3077296882103052e-05, + "loss": 0.0794, + "step": 45185 + }, + { + "epoch": 2.11, + "learning_rate": 1.3076513097048266e-05, + "loss": 0.0686, + "step": 45190 + }, + { + "epoch": 2.11, + "learning_rate": 1.307572931199348e-05, + "loss": 0.1249, + "step": 45195 + }, + { + "epoch": 2.11, + "learning_rate": 1.3074945526938694e-05, + "loss": 0.1045, + "step": 45200 + }, + { + "epoch": 2.11, + "learning_rate": 1.3074161741883906e-05, + "loss": 0.151, + "step": 45205 + }, + { + "epoch": 2.11, + "learning_rate": 1.307337795682912e-05, + "loss": 0.2116, + "step": 45210 + }, + { + "epoch": 2.11, + "learning_rate": 1.3072594171774334e-05, + "loss": 0.3223, + "step": 45215 + }, + { + "epoch": 2.11, + "learning_rate": 1.3071810386719548e-05, + "loss": 0.1557, + "step": 45220 + }, + { + "epoch": 2.11, + "learning_rate": 1.307102660166476e-05, + "loss": 0.0382, + "step": 45225 + }, + { + "epoch": 2.11, + "learning_rate": 1.3070242816609976e-05, + "loss": 0.0262, + "step": 45230 + }, + { + "epoch": 2.11, + "learning_rate": 1.3069459031555188e-05, + "loss": 0.1138, + "step": 45235 + }, + { + "epoch": 2.11, + "learning_rate": 1.3068675246500402e-05, + "loss": 0.0946, + "step": 45240 + }, + { + "epoch": 2.11, + "learning_rate": 1.3067891461445614e-05, + "loss": 0.0577, + "step": 45245 + }, + { + "epoch": 2.11, + "learning_rate": 1.3067107676390826e-05, + "loss": 0.125, + "step": 45250 + }, + { + "epoch": 2.11, + "learning_rate": 1.3066323891336042e-05, + "loss": 0.0984, + "step": 45255 + }, + { + "epoch": 2.11, + "learning_rate": 1.3065540106281254e-05, + "loss": 0.3112, + "step": 45260 + }, + { + "epoch": 2.11, + "learning_rate": 1.3064756321226468e-05, + "loss": 0.4456, + "step": 45265 + }, + { + "epoch": 2.11, + "learning_rate": 1.306397253617168e-05, + "loss": 0.1024, + "step": 45270 + }, + { + "epoch": 2.11, + "learning_rate": 1.3063188751116896e-05, + "loss": 0.0654, + "step": 45275 + }, + { + "epoch": 2.11, + "learning_rate": 1.3062404966062108e-05, + "loss": 0.063, + "step": 45280 + }, + { + "epoch": 2.11, + "learning_rate": 1.3061621181007322e-05, + "loss": 0.073, + "step": 45285 + }, + { + "epoch": 2.11, + "learning_rate": 1.3060837395952534e-05, + "loss": 0.0433, + "step": 45290 + }, + { + "epoch": 2.11, + "learning_rate": 1.306005361089775e-05, + "loss": 0.0807, + "step": 45295 + }, + { + "epoch": 2.11, + "learning_rate": 1.3059269825842962e-05, + "loss": 0.1388, + "step": 45300 + }, + { + "epoch": 2.11, + "learning_rate": 1.3058486040788176e-05, + "loss": 0.1288, + "step": 45305 + }, + { + "epoch": 2.11, + "learning_rate": 1.3057702255733388e-05, + "loss": 0.1973, + "step": 45310 + }, + { + "epoch": 2.11, + "learning_rate": 1.3056918470678602e-05, + "loss": 0.2249, + "step": 45315 + }, + { + "epoch": 2.11, + "learning_rate": 1.3056134685623816e-05, + "loss": 0.0417, + "step": 45320 + }, + { + "epoch": 2.11, + "learning_rate": 1.3055350900569028e-05, + "loss": 0.0328, + "step": 45325 + }, + { + "epoch": 2.12, + "learning_rate": 1.3054567115514244e-05, + "loss": 0.092, + "step": 45330 + }, + { + "epoch": 2.12, + "learning_rate": 1.3053783330459456e-05, + "loss": 0.1243, + "step": 45335 + }, + { + "epoch": 2.12, + "learning_rate": 1.305299954540467e-05, + "loss": 0.056, + "step": 45340 + }, + { + "epoch": 2.12, + "learning_rate": 1.3052215760349882e-05, + "loss": 0.1449, + "step": 45345 + }, + { + "epoch": 2.12, + "learning_rate": 1.3051431975295098e-05, + "loss": 0.2336, + "step": 45350 + }, + { + "epoch": 2.12, + "learning_rate": 1.305064819024031e-05, + "loss": 0.1807, + "step": 45355 + }, + { + "epoch": 2.12, + "learning_rate": 1.3049864405185524e-05, + "loss": 0.373, + "step": 45360 + }, + { + "epoch": 2.12, + "learning_rate": 1.3049080620130736e-05, + "loss": 0.3324, + "step": 45365 + }, + { + "epoch": 2.12, + "learning_rate": 1.3048296835075951e-05, + "loss": 0.0863, + "step": 45370 + }, + { + "epoch": 2.12, + "learning_rate": 1.3047513050021164e-05, + "loss": 0.0827, + "step": 45375 + }, + { + "epoch": 2.12, + "learning_rate": 1.3046729264966376e-05, + "loss": 0.053, + "step": 45380 + }, + { + "epoch": 2.12, + "learning_rate": 1.304594547991159e-05, + "loss": 0.0841, + "step": 45385 + }, + { + "epoch": 2.12, + "learning_rate": 1.3045161694856802e-05, + "loss": 0.1131, + "step": 45390 + }, + { + "epoch": 2.12, + "learning_rate": 1.3044377909802018e-05, + "loss": 0.2027, + "step": 45395 + }, + { + "epoch": 2.12, + "learning_rate": 1.304359412474723e-05, + "loss": 0.1405, + "step": 45400 + }, + { + "epoch": 2.12, + "learning_rate": 1.3042810339692444e-05, + "loss": 0.1859, + "step": 45405 + }, + { + "epoch": 2.12, + "learning_rate": 1.3042026554637656e-05, + "loss": 0.2386, + "step": 45410 + }, + { + "epoch": 2.12, + "learning_rate": 1.3041242769582872e-05, + "loss": 0.3155, + "step": 45415 + }, + { + "epoch": 2.12, + "learning_rate": 1.3040458984528084e-05, + "loss": 0.0233, + "step": 45420 + }, + { + "epoch": 2.12, + "learning_rate": 1.3039675199473298e-05, + "loss": 0.0834, + "step": 45425 + }, + { + "epoch": 2.12, + "learning_rate": 1.3038891414418512e-05, + "loss": 0.0897, + "step": 45430 + }, + { + "epoch": 2.12, + "learning_rate": 1.3038107629363725e-05, + "loss": 0.0855, + "step": 45435 + }, + { + "epoch": 2.12, + "learning_rate": 1.3037323844308938e-05, + "loss": 0.0508, + "step": 45440 + }, + { + "epoch": 2.12, + "learning_rate": 1.303654005925415e-05, + "loss": 0.0879, + "step": 45445 + }, + { + "epoch": 2.12, + "learning_rate": 1.3035756274199365e-05, + "loss": 0.1952, + "step": 45450 + }, + { + "epoch": 2.12, + "learning_rate": 1.3034972489144578e-05, + "loss": 0.1725, + "step": 45455 + }, + { + "epoch": 2.12, + "learning_rate": 1.3034188704089792e-05, + "loss": 0.3064, + "step": 45460 + }, + { + "epoch": 2.12, + "learning_rate": 1.3033404919035004e-05, + "loss": 0.2897, + "step": 45465 + }, + { + "epoch": 2.12, + "learning_rate": 1.303262113398022e-05, + "loss": 0.0456, + "step": 45470 + }, + { + "epoch": 2.12, + "learning_rate": 1.3031837348925432e-05, + "loss": 0.0907, + "step": 45475 + }, + { + "epoch": 2.12, + "learning_rate": 1.3031053563870646e-05, + "loss": 0.0953, + "step": 45480 + }, + { + "epoch": 2.12, + "learning_rate": 1.3030269778815858e-05, + "loss": 0.0623, + "step": 45485 + }, + { + "epoch": 2.12, + "learning_rate": 1.3029485993761073e-05, + "loss": 0.0422, + "step": 45490 + }, + { + "epoch": 2.12, + "learning_rate": 1.3028702208706286e-05, + "loss": 0.1251, + "step": 45495 + }, + { + "epoch": 2.12, + "learning_rate": 1.30279184236515e-05, + "loss": 0.2241, + "step": 45500 + }, + { + "epoch": 2.12, + "learning_rate": 1.3027134638596712e-05, + "loss": 0.206, + "step": 45505 + }, + { + "epoch": 2.12, + "learning_rate": 1.3026350853541926e-05, + "loss": 0.1835, + "step": 45510 + }, + { + "epoch": 2.12, + "learning_rate": 1.302556706848714e-05, + "loss": 0.2396, + "step": 45515 + }, + { + "epoch": 2.12, + "learning_rate": 1.3024783283432352e-05, + "loss": 0.0602, + "step": 45520 + }, + { + "epoch": 2.12, + "learning_rate": 1.3023999498377566e-05, + "loss": 0.0384, + "step": 45525 + }, + { + "epoch": 2.12, + "learning_rate": 1.302321571332278e-05, + "loss": 0.0227, + "step": 45530 + }, + { + "epoch": 2.12, + "learning_rate": 1.3022431928267993e-05, + "loss": 0.0598, + "step": 45535 + }, + { + "epoch": 2.12, + "learning_rate": 1.3021648143213206e-05, + "loss": 0.1411, + "step": 45540 + }, + { + "epoch": 2.13, + "learning_rate": 1.3020864358158421e-05, + "loss": 0.1612, + "step": 45545 + }, + { + "epoch": 2.13, + "learning_rate": 1.3020080573103633e-05, + "loss": 0.218, + "step": 45550 + }, + { + "epoch": 2.13, + "learning_rate": 1.3019296788048847e-05, + "loss": 0.2523, + "step": 45555 + }, + { + "epoch": 2.13, + "learning_rate": 1.301851300299406e-05, + "loss": 0.1978, + "step": 45560 + }, + { + "epoch": 2.13, + "learning_rate": 1.3017729217939275e-05, + "loss": 0.3117, + "step": 45565 + }, + { + "epoch": 2.13, + "learning_rate": 1.3016945432884487e-05, + "loss": 0.1062, + "step": 45570 + }, + { + "epoch": 2.13, + "learning_rate": 1.30161616478297e-05, + "loss": 0.1145, + "step": 45575 + }, + { + "epoch": 2.13, + "learning_rate": 1.3015377862774913e-05, + "loss": 0.1031, + "step": 45580 + }, + { + "epoch": 2.13, + "learning_rate": 1.3014594077720126e-05, + "loss": 0.0727, + "step": 45585 + }, + { + "epoch": 2.13, + "learning_rate": 1.3013810292665341e-05, + "loss": 0.0766, + "step": 45590 + }, + { + "epoch": 2.13, + "learning_rate": 1.3013026507610554e-05, + "loss": 0.118, + "step": 45595 + }, + { + "epoch": 2.13, + "learning_rate": 1.3012242722555767e-05, + "loss": 0.0794, + "step": 45600 + }, + { + "epoch": 2.13, + "learning_rate": 1.301145893750098e-05, + "loss": 0.2502, + "step": 45605 + }, + { + "epoch": 2.13, + "learning_rate": 1.3010675152446195e-05, + "loss": 0.2556, + "step": 45610 + }, + { + "epoch": 2.13, + "learning_rate": 1.3009891367391407e-05, + "loss": 0.2407, + "step": 45615 + }, + { + "epoch": 2.13, + "learning_rate": 1.3009107582336621e-05, + "loss": 0.0709, + "step": 45620 + }, + { + "epoch": 2.13, + "learning_rate": 1.3008323797281834e-05, + "loss": 0.0743, + "step": 45625 + }, + { + "epoch": 2.13, + "learning_rate": 1.3007540012227049e-05, + "loss": 0.0364, + "step": 45630 + }, + { + "epoch": 2.13, + "learning_rate": 1.3006756227172261e-05, + "loss": 0.0777, + "step": 45635 + }, + { + "epoch": 2.13, + "learning_rate": 1.3005972442117474e-05, + "loss": 0.0727, + "step": 45640 + }, + { + "epoch": 2.13, + "learning_rate": 1.300518865706269e-05, + "loss": 0.1977, + "step": 45645 + }, + { + "epoch": 2.13, + "learning_rate": 1.3004404872007901e-05, + "loss": 0.1681, + "step": 45650 + }, + { + "epoch": 2.13, + "learning_rate": 1.3003621086953115e-05, + "loss": 0.2623, + "step": 45655 + }, + { + "epoch": 2.13, + "learning_rate": 1.3002837301898328e-05, + "loss": 0.3067, + "step": 45660 + }, + { + "epoch": 2.13, + "learning_rate": 1.3002053516843543e-05, + "loss": 0.3166, + "step": 45665 + }, + { + "epoch": 2.13, + "learning_rate": 1.3001269731788755e-05, + "loss": 0.1028, + "step": 45670 + }, + { + "epoch": 2.13, + "learning_rate": 1.300048594673397e-05, + "loss": 0.0572, + "step": 45675 + }, + { + "epoch": 2.13, + "learning_rate": 1.2999702161679181e-05, + "loss": 0.0974, + "step": 45680 + }, + { + "epoch": 2.13, + "learning_rate": 1.2998918376624397e-05, + "loss": 0.1084, + "step": 45685 + }, + { + "epoch": 2.13, + "learning_rate": 1.299813459156961e-05, + "loss": 0.1394, + "step": 45690 + }, + { + "epoch": 2.13, + "learning_rate": 1.2997350806514823e-05, + "loss": 0.1306, + "step": 45695 + }, + { + "epoch": 2.13, + "learning_rate": 1.2996567021460035e-05, + "loss": 0.1705, + "step": 45700 + }, + { + "epoch": 2.13, + "learning_rate": 1.2995783236405248e-05, + "loss": 0.1305, + "step": 45705 + }, + { + "epoch": 2.13, + "learning_rate": 1.2994999451350463e-05, + "loss": 0.3169, + "step": 45710 + }, + { + "epoch": 2.13, + "learning_rate": 1.2994215666295675e-05, + "loss": 0.2909, + "step": 45715 + }, + { + "epoch": 2.13, + "learning_rate": 1.299343188124089e-05, + "loss": 0.0292, + "step": 45720 + }, + { + "epoch": 2.13, + "learning_rate": 1.2992648096186102e-05, + "loss": 0.0243, + "step": 45725 + }, + { + "epoch": 2.13, + "learning_rate": 1.2991864311131317e-05, + "loss": 0.0665, + "step": 45730 + }, + { + "epoch": 2.13, + "learning_rate": 1.299108052607653e-05, + "loss": 0.0384, + "step": 45735 + }, + { + "epoch": 2.13, + "learning_rate": 1.2990296741021743e-05, + "loss": 0.0984, + "step": 45740 + }, + { + "epoch": 2.13, + "learning_rate": 1.2989512955966957e-05, + "loss": 0.0985, + "step": 45745 + }, + { + "epoch": 2.13, + "learning_rate": 1.2988729170912171e-05, + "loss": 0.1137, + "step": 45750 + }, + { + "epoch": 2.13, + "learning_rate": 1.2987945385857383e-05, + "loss": 0.2239, + "step": 45755 + }, + { + "epoch": 2.14, + "learning_rate": 1.2987161600802599e-05, + "loss": 0.1643, + "step": 45760 + }, + { + "epoch": 2.14, + "learning_rate": 1.2986377815747811e-05, + "loss": 0.4238, + "step": 45765 + }, + { + "epoch": 2.14, + "learning_rate": 1.2985594030693023e-05, + "loss": 0.0549, + "step": 45770 + }, + { + "epoch": 2.14, + "learning_rate": 1.2984810245638237e-05, + "loss": 0.0319, + "step": 45775 + }, + { + "epoch": 2.14, + "learning_rate": 1.298402646058345e-05, + "loss": 0.0423, + "step": 45780 + }, + { + "epoch": 2.14, + "learning_rate": 1.2983242675528665e-05, + "loss": 0.0657, + "step": 45785 + }, + { + "epoch": 2.14, + "learning_rate": 1.2982458890473877e-05, + "loss": 0.065, + "step": 45790 + }, + { + "epoch": 2.14, + "learning_rate": 1.2981675105419091e-05, + "loss": 0.1418, + "step": 45795 + }, + { + "epoch": 2.14, + "learning_rate": 1.2980891320364303e-05, + "loss": 0.1648, + "step": 45800 + }, + { + "epoch": 2.14, + "learning_rate": 1.2980107535309519e-05, + "loss": 0.1982, + "step": 45805 + }, + { + "epoch": 2.14, + "learning_rate": 1.2979323750254731e-05, + "loss": 0.196, + "step": 45810 + }, + { + "epoch": 2.14, + "learning_rate": 1.2978539965199945e-05, + "loss": 0.3101, + "step": 45815 + }, + { + "epoch": 2.14, + "learning_rate": 1.2977756180145157e-05, + "loss": 0.0599, + "step": 45820 + }, + { + "epoch": 2.14, + "learning_rate": 1.2976972395090373e-05, + "loss": 0.0809, + "step": 45825 + }, + { + "epoch": 2.14, + "learning_rate": 1.2976188610035585e-05, + "loss": 0.1309, + "step": 45830 + }, + { + "epoch": 2.14, + "learning_rate": 1.2975404824980797e-05, + "loss": 0.0561, + "step": 45835 + }, + { + "epoch": 2.14, + "learning_rate": 1.2974621039926011e-05, + "loss": 0.0666, + "step": 45840 + }, + { + "epoch": 2.14, + "learning_rate": 1.2973837254871225e-05, + "loss": 0.1195, + "step": 45845 + }, + { + "epoch": 2.14, + "learning_rate": 1.2973053469816439e-05, + "loss": 0.1992, + "step": 45850 + }, + { + "epoch": 2.14, + "learning_rate": 1.2972269684761651e-05, + "loss": 0.1405, + "step": 45855 + }, + { + "epoch": 2.14, + "learning_rate": 1.2971485899706867e-05, + "loss": 0.2539, + "step": 45860 + }, + { + "epoch": 2.14, + "learning_rate": 1.2970702114652079e-05, + "loss": 0.3003, + "step": 45865 + }, + { + "epoch": 2.14, + "learning_rate": 1.2969918329597293e-05, + "loss": 0.0268, + "step": 45870 + }, + { + "epoch": 2.14, + "learning_rate": 1.2969134544542505e-05, + "loss": 0.029, + "step": 45875 + }, + { + "epoch": 2.14, + "learning_rate": 1.296835075948772e-05, + "loss": 0.0899, + "step": 45880 + }, + { + "epoch": 2.14, + "learning_rate": 1.2967566974432933e-05, + "loss": 0.0464, + "step": 45885 + }, + { + "epoch": 2.14, + "learning_rate": 1.2966783189378147e-05, + "loss": 0.1088, + "step": 45890 + }, + { + "epoch": 2.14, + "learning_rate": 1.2965999404323359e-05, + "loss": 0.1264, + "step": 45895 + }, + { + "epoch": 2.14, + "learning_rate": 1.2965215619268571e-05, + "loss": 0.1743, + "step": 45900 + }, + { + "epoch": 2.14, + "learning_rate": 1.2964431834213787e-05, + "loss": 0.2256, + "step": 45905 + }, + { + "epoch": 2.14, + "learning_rate": 1.2963648049158999e-05, + "loss": 0.235, + "step": 45910 + }, + { + "epoch": 2.14, + "learning_rate": 1.2962864264104213e-05, + "loss": 0.4299, + "step": 45915 + }, + { + "epoch": 2.14, + "learning_rate": 1.2962080479049425e-05, + "loss": 0.0647, + "step": 45920 + }, + { + "epoch": 2.14, + "learning_rate": 1.296129669399464e-05, + "loss": 0.0445, + "step": 45925 + }, + { + "epoch": 2.14, + "learning_rate": 1.2960512908939853e-05, + "loss": 0.0616, + "step": 45930 + }, + { + "epoch": 2.14, + "learning_rate": 1.2959729123885067e-05, + "loss": 0.0822, + "step": 45935 + }, + { + "epoch": 2.14, + "learning_rate": 1.2958945338830279e-05, + "loss": 0.1084, + "step": 45940 + }, + { + "epoch": 2.14, + "learning_rate": 1.2958161553775495e-05, + "loss": 0.1383, + "step": 45945 + }, + { + "epoch": 2.14, + "learning_rate": 1.2957377768720707e-05, + "loss": 0.309, + "step": 45950 + }, + { + "epoch": 2.14, + "learning_rate": 1.295659398366592e-05, + "loss": 0.1302, + "step": 45955 + }, + { + "epoch": 2.14, + "learning_rate": 1.2955810198611135e-05, + "loss": 0.4292, + "step": 45960 + }, + { + "epoch": 2.14, + "learning_rate": 1.2955026413556347e-05, + "loss": 0.3065, + "step": 45965 + }, + { + "epoch": 2.15, + "learning_rate": 1.2954242628501561e-05, + "loss": 0.0508, + "step": 45970 + }, + { + "epoch": 2.15, + "learning_rate": 1.2953458843446773e-05, + "loss": 0.0602, + "step": 45975 + }, + { + "epoch": 2.15, + "learning_rate": 1.2952675058391989e-05, + "loss": 0.08, + "step": 45980 + }, + { + "epoch": 2.15, + "learning_rate": 1.2951891273337201e-05, + "loss": 0.0903, + "step": 45985 + }, + { + "epoch": 2.15, + "learning_rate": 1.2951107488282415e-05, + "loss": 0.0842, + "step": 45990 + }, + { + "epoch": 2.15, + "learning_rate": 1.2950323703227627e-05, + "loss": 0.115, + "step": 45995 + }, + { + "epoch": 2.15, + "learning_rate": 1.2949539918172843e-05, + "loss": 0.0675, + "step": 46000 + }, + { + "epoch": 2.15, + "learning_rate": 1.2948756133118055e-05, + "loss": 0.1959, + "step": 46005 + }, + { + "epoch": 2.15, + "learning_rate": 1.2947972348063269e-05, + "loss": 0.2618, + "step": 46010 + }, + { + "epoch": 2.15, + "learning_rate": 1.2947188563008481e-05, + "loss": 0.291, + "step": 46015 + }, + { + "epoch": 2.15, + "learning_rate": 1.2946404777953697e-05, + "loss": 0.0569, + "step": 46020 + }, + { + "epoch": 2.15, + "learning_rate": 1.2945620992898909e-05, + "loss": 0.0237, + "step": 46025 + }, + { + "epoch": 2.15, + "learning_rate": 1.2944837207844121e-05, + "loss": 0.0394, + "step": 46030 + }, + { + "epoch": 2.15, + "learning_rate": 1.2944053422789335e-05, + "loss": 0.029, + "step": 46035 + }, + { + "epoch": 2.15, + "learning_rate": 1.2943269637734549e-05, + "loss": 0.069, + "step": 46040 + }, + { + "epoch": 2.15, + "learning_rate": 1.2942485852679763e-05, + "loss": 0.0995, + "step": 46045 + }, + { + "epoch": 2.15, + "learning_rate": 1.2941702067624975e-05, + "loss": 0.0975, + "step": 46050 + }, + { + "epoch": 2.15, + "learning_rate": 1.2940918282570189e-05, + "loss": 0.1855, + "step": 46055 + }, + { + "epoch": 2.15, + "learning_rate": 1.2940134497515403e-05, + "loss": 0.2035, + "step": 46060 + }, + { + "epoch": 2.15, + "learning_rate": 1.2939350712460617e-05, + "loss": 0.3937, + "step": 46065 + }, + { + "epoch": 2.15, + "learning_rate": 1.2938566927405829e-05, + "loss": 0.1026, + "step": 46070 + }, + { + "epoch": 2.15, + "learning_rate": 1.2937783142351044e-05, + "loss": 0.0271, + "step": 46075 + }, + { + "epoch": 2.15, + "learning_rate": 1.2936999357296257e-05, + "loss": 0.0701, + "step": 46080 + }, + { + "epoch": 2.15, + "learning_rate": 1.293621557224147e-05, + "loss": 0.0962, + "step": 46085 + }, + { + "epoch": 2.15, + "learning_rate": 1.2935431787186683e-05, + "loss": 0.0695, + "step": 46090 + }, + { + "epoch": 2.15, + "learning_rate": 1.2934648002131895e-05, + "loss": 0.0941, + "step": 46095 + }, + { + "epoch": 2.15, + "learning_rate": 1.293386421707711e-05, + "loss": 0.1758, + "step": 46100 + }, + { + "epoch": 2.15, + "learning_rate": 1.2933080432022323e-05, + "loss": 0.2329, + "step": 46105 + }, + { + "epoch": 2.15, + "learning_rate": 1.2932296646967537e-05, + "loss": 0.257, + "step": 46110 + }, + { + "epoch": 2.15, + "learning_rate": 1.2931512861912749e-05, + "loss": 0.1914, + "step": 46115 + }, + { + "epoch": 2.15, + "learning_rate": 1.2930729076857964e-05, + "loss": 0.0743, + "step": 46120 + }, + { + "epoch": 2.15, + "learning_rate": 1.2929945291803177e-05, + "loss": 0.0692, + "step": 46125 + }, + { + "epoch": 2.15, + "learning_rate": 1.292916150674839e-05, + "loss": 0.0397, + "step": 46130 + }, + { + "epoch": 2.15, + "learning_rate": 1.2928377721693603e-05, + "loss": 0.0593, + "step": 46135 + }, + { + "epoch": 2.15, + "learning_rate": 1.2927593936638818e-05, + "loss": 0.1479, + "step": 46140 + }, + { + "epoch": 2.15, + "learning_rate": 1.292681015158403e-05, + "loss": 0.0926, + "step": 46145 + }, + { + "epoch": 2.15, + "learning_rate": 1.2926026366529245e-05, + "loss": 0.1446, + "step": 46150 + }, + { + "epoch": 2.15, + "learning_rate": 1.2925242581474457e-05, + "loss": 0.1745, + "step": 46155 + }, + { + "epoch": 2.15, + "learning_rate": 1.292445879641967e-05, + "loss": 0.2465, + "step": 46160 + }, + { + "epoch": 2.15, + "learning_rate": 1.2923675011364885e-05, + "loss": 0.2131, + "step": 46165 + }, + { + "epoch": 2.15, + "learning_rate": 1.2922891226310097e-05, + "loss": 0.0662, + "step": 46170 + }, + { + "epoch": 2.15, + "learning_rate": 1.2922107441255312e-05, + "loss": 0.0392, + "step": 46175 + }, + { + "epoch": 2.15, + "learning_rate": 1.2921323656200525e-05, + "loss": 0.1003, + "step": 46180 + }, + { + "epoch": 2.16, + "learning_rate": 1.2920539871145738e-05, + "loss": 0.1232, + "step": 46185 + }, + { + "epoch": 2.16, + "learning_rate": 1.291975608609095e-05, + "loss": 0.1044, + "step": 46190 + }, + { + "epoch": 2.16, + "learning_rate": 1.2918972301036166e-05, + "loss": 0.0763, + "step": 46195 + }, + { + "epoch": 2.16, + "learning_rate": 1.2918188515981379e-05, + "loss": 0.1681, + "step": 46200 + }, + { + "epoch": 2.16, + "learning_rate": 1.2917404730926592e-05, + "loss": 0.1915, + "step": 46205 + }, + { + "epoch": 2.16, + "learning_rate": 1.2916620945871805e-05, + "loss": 0.2404, + "step": 46210 + }, + { + "epoch": 2.16, + "learning_rate": 1.291583716081702e-05, + "loss": 0.2095, + "step": 46215 + }, + { + "epoch": 2.16, + "learning_rate": 1.2915053375762232e-05, + "loss": 0.0508, + "step": 46220 + }, + { + "epoch": 2.16, + "learning_rate": 1.2914269590707445e-05, + "loss": 0.0292, + "step": 46225 + }, + { + "epoch": 2.16, + "learning_rate": 1.2913485805652659e-05, + "loss": 0.0615, + "step": 46230 + }, + { + "epoch": 2.16, + "learning_rate": 1.291270202059787e-05, + "loss": 0.0614, + "step": 46235 + }, + { + "epoch": 2.16, + "learning_rate": 1.2911918235543086e-05, + "loss": 0.1748, + "step": 46240 + }, + { + "epoch": 2.16, + "learning_rate": 1.2911134450488299e-05, + "loss": 0.0778, + "step": 46245 + }, + { + "epoch": 2.16, + "learning_rate": 1.2910350665433512e-05, + "loss": 0.1938, + "step": 46250 + }, + { + "epoch": 2.16, + "learning_rate": 1.2909566880378725e-05, + "loss": 0.22, + "step": 46255 + }, + { + "epoch": 2.16, + "learning_rate": 1.290878309532394e-05, + "loss": 0.2227, + "step": 46260 + }, + { + "epoch": 2.16, + "learning_rate": 1.2907999310269153e-05, + "loss": 0.2721, + "step": 46265 + }, + { + "epoch": 2.16, + "learning_rate": 1.2907215525214366e-05, + "loss": 0.0752, + "step": 46270 + }, + { + "epoch": 2.16, + "learning_rate": 1.290643174015958e-05, + "loss": 0.0638, + "step": 46275 + }, + { + "epoch": 2.16, + "learning_rate": 1.2905647955104794e-05, + "loss": 0.0867, + "step": 46280 + }, + { + "epoch": 2.16, + "learning_rate": 1.2904864170050006e-05, + "loss": 0.0472, + "step": 46285 + }, + { + "epoch": 2.16, + "learning_rate": 1.2904080384995219e-05, + "loss": 0.0876, + "step": 46290 + }, + { + "epoch": 2.16, + "learning_rate": 1.2903296599940434e-05, + "loss": 0.1803, + "step": 46295 + }, + { + "epoch": 2.16, + "learning_rate": 1.2902512814885646e-05, + "loss": 0.1082, + "step": 46300 + }, + { + "epoch": 2.16, + "learning_rate": 1.290172902983086e-05, + "loss": 0.203, + "step": 46305 + }, + { + "epoch": 2.16, + "learning_rate": 1.2900945244776073e-05, + "loss": 0.3482, + "step": 46310 + }, + { + "epoch": 2.16, + "learning_rate": 1.2900161459721288e-05, + "loss": 0.31, + "step": 46315 + }, + { + "epoch": 2.16, + "learning_rate": 1.28993776746665e-05, + "loss": 0.0936, + "step": 46320 + }, + { + "epoch": 2.16, + "learning_rate": 1.2898593889611714e-05, + "loss": 0.0706, + "step": 46325 + }, + { + "epoch": 2.16, + "learning_rate": 1.2897810104556927e-05, + "loss": 0.0416, + "step": 46330 + }, + { + "epoch": 2.16, + "learning_rate": 1.2897026319502142e-05, + "loss": 0.1093, + "step": 46335 + }, + { + "epoch": 2.16, + "learning_rate": 1.2896242534447354e-05, + "loss": 0.1354, + "step": 46340 + }, + { + "epoch": 2.16, + "learning_rate": 1.2895458749392568e-05, + "loss": 0.0767, + "step": 46345 + }, + { + "epoch": 2.16, + "learning_rate": 1.289467496433778e-05, + "loss": 0.1904, + "step": 46350 + }, + { + "epoch": 2.16, + "learning_rate": 1.2893891179282994e-05, + "loss": 0.1699, + "step": 46355 + }, + { + "epoch": 2.16, + "learning_rate": 1.2893107394228208e-05, + "loss": 0.1509, + "step": 46360 + }, + { + "epoch": 2.16, + "learning_rate": 1.289232360917342e-05, + "loss": 0.3889, + "step": 46365 + }, + { + "epoch": 2.16, + "learning_rate": 1.2891539824118634e-05, + "loss": 0.0727, + "step": 46370 + }, + { + "epoch": 2.16, + "learning_rate": 1.2890756039063848e-05, + "loss": 0.0465, + "step": 46375 + }, + { + "epoch": 2.16, + "learning_rate": 1.2889972254009062e-05, + "loss": 0.0655, + "step": 46380 + }, + { + "epoch": 2.16, + "learning_rate": 1.2889188468954274e-05, + "loss": 0.0616, + "step": 46385 + }, + { + "epoch": 2.16, + "learning_rate": 1.288840468389949e-05, + "loss": 0.1318, + "step": 46390 + }, + { + "epoch": 2.16, + "learning_rate": 1.2887620898844702e-05, + "loss": 0.1106, + "step": 46395 + }, + { + "epoch": 2.17, + "learning_rate": 1.2886837113789916e-05, + "loss": 0.175, + "step": 46400 + }, + { + "epoch": 2.17, + "learning_rate": 1.2886053328735128e-05, + "loss": 0.2635, + "step": 46405 + }, + { + "epoch": 2.17, + "learning_rate": 1.2885269543680344e-05, + "loss": 0.2759, + "step": 46410 + }, + { + "epoch": 2.17, + "learning_rate": 1.2884485758625556e-05, + "loss": 0.2409, + "step": 46415 + }, + { + "epoch": 2.17, + "learning_rate": 1.2883701973570768e-05, + "loss": 0.0379, + "step": 46420 + }, + { + "epoch": 2.17, + "learning_rate": 1.2882918188515982e-05, + "loss": 0.0526, + "step": 46425 + }, + { + "epoch": 2.17, + "learning_rate": 1.2882134403461194e-05, + "loss": 0.0494, + "step": 46430 + }, + { + "epoch": 2.17, + "learning_rate": 1.288135061840641e-05, + "loss": 0.0765, + "step": 46435 + }, + { + "epoch": 2.17, + "learning_rate": 1.2880566833351622e-05, + "loss": 0.1189, + "step": 46440 + }, + { + "epoch": 2.17, + "learning_rate": 1.2879783048296836e-05, + "loss": 0.1203, + "step": 46445 + }, + { + "epoch": 2.17, + "learning_rate": 1.2878999263242048e-05, + "loss": 0.1393, + "step": 46450 + }, + { + "epoch": 2.17, + "learning_rate": 1.2878215478187264e-05, + "loss": 0.1572, + "step": 46455 + }, + { + "epoch": 2.17, + "learning_rate": 1.2877431693132476e-05, + "loss": 0.3012, + "step": 46460 + }, + { + "epoch": 2.17, + "learning_rate": 1.287664790807769e-05, + "loss": 0.2512, + "step": 46465 + }, + { + "epoch": 2.17, + "learning_rate": 1.2875864123022902e-05, + "loss": 0.0924, + "step": 46470 + }, + { + "epoch": 2.17, + "learning_rate": 1.2875080337968118e-05, + "loss": 0.0359, + "step": 46475 + }, + { + "epoch": 2.17, + "learning_rate": 1.287429655291333e-05, + "loss": 0.079, + "step": 46480 + }, + { + "epoch": 2.17, + "learning_rate": 1.2873512767858542e-05, + "loss": 0.0759, + "step": 46485 + }, + { + "epoch": 2.17, + "learning_rate": 1.2872728982803758e-05, + "loss": 0.103, + "step": 46490 + }, + { + "epoch": 2.17, + "learning_rate": 1.287194519774897e-05, + "loss": 0.0971, + "step": 46495 + }, + { + "epoch": 2.17, + "learning_rate": 1.2871161412694184e-05, + "loss": 0.1746, + "step": 46500 + }, + { + "epoch": 2.17, + "learning_rate": 1.2870377627639396e-05, + "loss": 0.1998, + "step": 46505 + }, + { + "epoch": 2.17, + "learning_rate": 1.2869593842584612e-05, + "loss": 0.2048, + "step": 46510 + }, + { + "epoch": 2.17, + "learning_rate": 1.2868810057529824e-05, + "loss": 0.3011, + "step": 46515 + }, + { + "epoch": 2.17, + "learning_rate": 1.2868026272475038e-05, + "loss": 0.0584, + "step": 46520 + }, + { + "epoch": 2.17, + "learning_rate": 1.286724248742025e-05, + "loss": 0.064, + "step": 46525 + }, + { + "epoch": 2.17, + "learning_rate": 1.2866458702365466e-05, + "loss": 0.0913, + "step": 46530 + }, + { + "epoch": 2.17, + "learning_rate": 1.2865674917310678e-05, + "loss": 0.1153, + "step": 46535 + }, + { + "epoch": 2.17, + "learning_rate": 1.2864891132255892e-05, + "loss": 0.069, + "step": 46540 + }, + { + "epoch": 2.17, + "learning_rate": 1.2864107347201104e-05, + "loss": 0.068, + "step": 46545 + }, + { + "epoch": 2.17, + "learning_rate": 1.2863323562146316e-05, + "loss": 0.1295, + "step": 46550 + }, + { + "epoch": 2.17, + "learning_rate": 1.2862539777091532e-05, + "loss": 0.229, + "step": 46555 + }, + { + "epoch": 2.17, + "learning_rate": 1.2861755992036744e-05, + "loss": 0.2234, + "step": 46560 + }, + { + "epoch": 2.17, + "learning_rate": 1.2860972206981958e-05, + "loss": 0.2418, + "step": 46565 + }, + { + "epoch": 2.17, + "learning_rate": 1.286018842192717e-05, + "loss": 0.0448, + "step": 46570 + }, + { + "epoch": 2.17, + "learning_rate": 1.2859404636872386e-05, + "loss": 0.0333, + "step": 46575 + }, + { + "epoch": 2.17, + "learning_rate": 1.2858620851817598e-05, + "loss": 0.0544, + "step": 46580 + }, + { + "epoch": 2.17, + "learning_rate": 1.2857837066762812e-05, + "loss": 0.126, + "step": 46585 + }, + { + "epoch": 2.17, + "learning_rate": 1.2857053281708026e-05, + "loss": 0.0373, + "step": 46590 + }, + { + "epoch": 2.17, + "learning_rate": 1.285626949665324e-05, + "loss": 0.0796, + "step": 46595 + }, + { + "epoch": 2.17, + "learning_rate": 1.2855485711598452e-05, + "loss": 0.1411, + "step": 46600 + }, + { + "epoch": 2.17, + "learning_rate": 1.2854701926543668e-05, + "loss": 0.1499, + "step": 46605 + }, + { + "epoch": 2.17, + "learning_rate": 1.285391814148888e-05, + "loss": 0.259, + "step": 46610 + }, + { + "epoch": 2.18, + "learning_rate": 1.2853134356434092e-05, + "loss": 0.2371, + "step": 46615 + }, + { + "epoch": 2.18, + "learning_rate": 1.2852350571379306e-05, + "loss": 0.0754, + "step": 46620 + }, + { + "epoch": 2.18, + "learning_rate": 1.2851566786324518e-05, + "loss": 0.0368, + "step": 46625 + }, + { + "epoch": 2.18, + "learning_rate": 1.2850783001269734e-05, + "loss": 0.0324, + "step": 46630 + }, + { + "epoch": 2.18, + "learning_rate": 1.2849999216214946e-05, + "loss": 0.0577, + "step": 46635 + }, + { + "epoch": 2.18, + "learning_rate": 1.284921543116016e-05, + "loss": 0.0888, + "step": 46640 + }, + { + "epoch": 2.18, + "learning_rate": 1.2848431646105372e-05, + "loss": 0.1034, + "step": 46645 + }, + { + "epoch": 2.18, + "learning_rate": 1.2847647861050588e-05, + "loss": 0.1378, + "step": 46650 + }, + { + "epoch": 2.18, + "learning_rate": 1.28468640759958e-05, + "loss": 0.2142, + "step": 46655 + }, + { + "epoch": 2.18, + "learning_rate": 1.2846080290941014e-05, + "loss": 0.3083, + "step": 46660 + }, + { + "epoch": 2.18, + "learning_rate": 1.2845296505886226e-05, + "loss": 0.193, + "step": 46665 + }, + { + "epoch": 2.18, + "learning_rate": 1.2844512720831442e-05, + "loss": 0.0664, + "step": 46670 + }, + { + "epoch": 2.18, + "learning_rate": 1.2843728935776654e-05, + "loss": 0.04, + "step": 46675 + }, + { + "epoch": 2.18, + "learning_rate": 1.2842945150721866e-05, + "loss": 0.0385, + "step": 46680 + }, + { + "epoch": 2.18, + "learning_rate": 1.284216136566708e-05, + "loss": 0.0412, + "step": 46685 + }, + { + "epoch": 2.18, + "learning_rate": 1.2841377580612294e-05, + "loss": 0.0962, + "step": 46690 + }, + { + "epoch": 2.18, + "learning_rate": 1.2840593795557508e-05, + "loss": 0.1109, + "step": 46695 + }, + { + "epoch": 2.18, + "learning_rate": 1.283981001050272e-05, + "loss": 0.1748, + "step": 46700 + }, + { + "epoch": 2.18, + "learning_rate": 1.2839026225447936e-05, + "loss": 0.1363, + "step": 46705 + }, + { + "epoch": 2.18, + "learning_rate": 1.2838242440393148e-05, + "loss": 0.3242, + "step": 46710 + }, + { + "epoch": 2.18, + "learning_rate": 1.2837458655338362e-05, + "loss": 0.2394, + "step": 46715 + }, + { + "epoch": 2.18, + "learning_rate": 1.2836674870283574e-05, + "loss": 0.0867, + "step": 46720 + }, + { + "epoch": 2.18, + "learning_rate": 1.283589108522879e-05, + "loss": 0.053, + "step": 46725 + }, + { + "epoch": 2.18, + "learning_rate": 1.2835107300174002e-05, + "loss": 0.0816, + "step": 46730 + }, + { + "epoch": 2.18, + "learning_rate": 1.2834323515119216e-05, + "loss": 0.0848, + "step": 46735 + }, + { + "epoch": 2.18, + "learning_rate": 1.2833539730064428e-05, + "loss": 0.1253, + "step": 46740 + }, + { + "epoch": 2.18, + "learning_rate": 1.283275594500964e-05, + "loss": 0.1279, + "step": 46745 + }, + { + "epoch": 2.18, + "learning_rate": 1.2831972159954856e-05, + "loss": 0.1643, + "step": 46750 + }, + { + "epoch": 2.18, + "learning_rate": 1.2831188374900068e-05, + "loss": 0.1052, + "step": 46755 + }, + { + "epoch": 2.18, + "learning_rate": 1.2830404589845282e-05, + "loss": 0.414, + "step": 46760 + }, + { + "epoch": 2.18, + "learning_rate": 1.2829620804790494e-05, + "loss": 0.3846, + "step": 46765 + }, + { + "epoch": 2.18, + "learning_rate": 1.282883701973571e-05, + "loss": 0.0487, + "step": 46770 + }, + { + "epoch": 2.18, + "learning_rate": 1.2828053234680922e-05, + "loss": 0.049, + "step": 46775 + }, + { + "epoch": 2.18, + "learning_rate": 1.2827269449626136e-05, + "loss": 0.0539, + "step": 46780 + }, + { + "epoch": 2.18, + "learning_rate": 1.2826485664571348e-05, + "loss": 0.0454, + "step": 46785 + }, + { + "epoch": 2.18, + "learning_rate": 1.2825701879516563e-05, + "loss": 0.1597, + "step": 46790 + }, + { + "epoch": 2.18, + "learning_rate": 1.2824918094461776e-05, + "loss": 0.1002, + "step": 46795 + }, + { + "epoch": 2.18, + "learning_rate": 1.282413430940699e-05, + "loss": 0.1373, + "step": 46800 + }, + { + "epoch": 2.18, + "learning_rate": 1.2823350524352204e-05, + "loss": 0.2601, + "step": 46805 + }, + { + "epoch": 2.18, + "learning_rate": 1.2822566739297416e-05, + "loss": 0.2318, + "step": 46810 + }, + { + "epoch": 2.18, + "learning_rate": 1.282178295424263e-05, + "loss": 0.3056, + "step": 46815 + }, + { + "epoch": 2.18, + "learning_rate": 1.2820999169187842e-05, + "loss": 0.026, + "step": 46820 + }, + { + "epoch": 2.18, + "learning_rate": 1.2820215384133057e-05, + "loss": 0.103, + "step": 46825 + }, + { + "epoch": 2.19, + "learning_rate": 1.281943159907827e-05, + "loss": 0.0383, + "step": 46830 + }, + { + "epoch": 2.19, + "learning_rate": 1.2818647814023484e-05, + "loss": 0.0978, + "step": 46835 + }, + { + "epoch": 2.19, + "learning_rate": 1.2817864028968696e-05, + "loss": 0.0945, + "step": 46840 + }, + { + "epoch": 2.19, + "learning_rate": 1.2817080243913911e-05, + "loss": 0.1017, + "step": 46845 + }, + { + "epoch": 2.19, + "learning_rate": 1.2816296458859124e-05, + "loss": 0.0924, + "step": 46850 + }, + { + "epoch": 2.19, + "learning_rate": 1.2815512673804337e-05, + "loss": 0.156, + "step": 46855 + }, + { + "epoch": 2.19, + "learning_rate": 1.281472888874955e-05, + "loss": 0.2734, + "step": 46860 + }, + { + "epoch": 2.19, + "learning_rate": 1.2813945103694765e-05, + "loss": 0.3109, + "step": 46865 + }, + { + "epoch": 2.19, + "learning_rate": 1.2813161318639978e-05, + "loss": 0.0688, + "step": 46870 + }, + { + "epoch": 2.19, + "learning_rate": 1.281237753358519e-05, + "loss": 0.0376, + "step": 46875 + }, + { + "epoch": 2.19, + "learning_rate": 1.2811593748530404e-05, + "loss": 0.0537, + "step": 46880 + }, + { + "epoch": 2.19, + "learning_rate": 1.2810809963475616e-05, + "loss": 0.084, + "step": 46885 + }, + { + "epoch": 2.19, + "learning_rate": 1.2810026178420831e-05, + "loss": 0.1507, + "step": 46890 + }, + { + "epoch": 2.19, + "learning_rate": 1.2809242393366044e-05, + "loss": 0.144, + "step": 46895 + }, + { + "epoch": 2.19, + "learning_rate": 1.2808458608311258e-05, + "loss": 0.1558, + "step": 46900 + }, + { + "epoch": 2.19, + "learning_rate": 1.2807674823256471e-05, + "loss": 0.1787, + "step": 46905 + }, + { + "epoch": 2.19, + "learning_rate": 1.2806891038201685e-05, + "loss": 0.2776, + "step": 46910 + }, + { + "epoch": 2.19, + "learning_rate": 1.2806107253146898e-05, + "loss": 0.3282, + "step": 46915 + }, + { + "epoch": 2.19, + "learning_rate": 1.2805323468092113e-05, + "loss": 0.0975, + "step": 46920 + }, + { + "epoch": 2.19, + "learning_rate": 1.2804539683037325e-05, + "loss": 0.0326, + "step": 46925 + }, + { + "epoch": 2.19, + "learning_rate": 1.280375589798254e-05, + "loss": 0.0807, + "step": 46930 + }, + { + "epoch": 2.19, + "learning_rate": 1.2802972112927752e-05, + "loss": 0.1076, + "step": 46935 + }, + { + "epoch": 2.19, + "learning_rate": 1.2802188327872964e-05, + "loss": 0.123, + "step": 46940 + }, + { + "epoch": 2.19, + "learning_rate": 1.280140454281818e-05, + "loss": 0.1297, + "step": 46945 + }, + { + "epoch": 2.19, + "learning_rate": 1.2800620757763392e-05, + "loss": 0.13, + "step": 46950 + }, + { + "epoch": 2.19, + "learning_rate": 1.2799836972708605e-05, + "loss": 0.1437, + "step": 46955 + }, + { + "epoch": 2.19, + "learning_rate": 1.2799053187653818e-05, + "loss": 0.1865, + "step": 46960 + }, + { + "epoch": 2.19, + "learning_rate": 1.2798269402599033e-05, + "loss": 0.2441, + "step": 46965 + }, + { + "epoch": 2.19, + "learning_rate": 1.2797485617544245e-05, + "loss": 0.0758, + "step": 46970 + }, + { + "epoch": 2.19, + "learning_rate": 1.279670183248946e-05, + "loss": 0.0861, + "step": 46975 + }, + { + "epoch": 2.19, + "learning_rate": 1.2795918047434672e-05, + "loss": 0.0704, + "step": 46980 + }, + { + "epoch": 2.19, + "learning_rate": 1.2795134262379887e-05, + "loss": 0.0482, + "step": 46985 + }, + { + "epoch": 2.19, + "learning_rate": 1.27943504773251e-05, + "loss": 0.0407, + "step": 46990 + }, + { + "epoch": 2.19, + "learning_rate": 1.2793566692270313e-05, + "loss": 0.1309, + "step": 46995 + }, + { + "epoch": 2.19, + "learning_rate": 1.2792782907215526e-05, + "loss": 0.1658, + "step": 47000 + }, + { + "epoch": 2.19, + "learning_rate": 1.279199912216074e-05, + "loss": 0.1571, + "step": 47005 + }, + { + "epoch": 2.19, + "learning_rate": 1.2791215337105953e-05, + "loss": 0.2098, + "step": 47010 + }, + { + "epoch": 2.19, + "learning_rate": 1.2790431552051166e-05, + "loss": 0.2532, + "step": 47015 + }, + { + "epoch": 2.19, + "learning_rate": 1.2789647766996381e-05, + "loss": 0.0152, + "step": 47020 + }, + { + "epoch": 2.19, + "learning_rate": 1.2788863981941593e-05, + "loss": 0.0505, + "step": 47025 + }, + { + "epoch": 2.19, + "learning_rate": 1.2788080196886807e-05, + "loss": 0.0752, + "step": 47030 + }, + { + "epoch": 2.19, + "learning_rate": 1.278729641183202e-05, + "loss": 0.0727, + "step": 47035 + }, + { + "epoch": 2.19, + "learning_rate": 1.2786512626777235e-05, + "loss": 0.1866, + "step": 47040 + }, + { + "epoch": 2.2, + "learning_rate": 1.2785728841722447e-05, + "loss": 0.0927, + "step": 47045 + }, + { + "epoch": 2.2, + "learning_rate": 1.2784945056667661e-05, + "loss": 0.0914, + "step": 47050 + }, + { + "epoch": 2.2, + "learning_rate": 1.2784161271612873e-05, + "loss": 0.2837, + "step": 47055 + }, + { + "epoch": 2.2, + "learning_rate": 1.2783377486558089e-05, + "loss": 0.3236, + "step": 47060 + }, + { + "epoch": 2.2, + "learning_rate": 1.2782593701503301e-05, + "loss": 0.3262, + "step": 47065 + }, + { + "epoch": 2.2, + "learning_rate": 1.2781809916448513e-05, + "loss": 0.0784, + "step": 47070 + }, + { + "epoch": 2.2, + "learning_rate": 1.2781026131393727e-05, + "loss": 0.0751, + "step": 47075 + }, + { + "epoch": 2.2, + "learning_rate": 1.278024234633894e-05, + "loss": 0.0486, + "step": 47080 + }, + { + "epoch": 2.2, + "learning_rate": 1.2779458561284155e-05, + "loss": 0.1281, + "step": 47085 + }, + { + "epoch": 2.2, + "learning_rate": 1.2778674776229367e-05, + "loss": 0.1138, + "step": 47090 + }, + { + "epoch": 2.2, + "learning_rate": 1.2777890991174581e-05, + "loss": 0.1101, + "step": 47095 + }, + { + "epoch": 2.2, + "learning_rate": 1.2777107206119793e-05, + "loss": 0.1547, + "step": 47100 + }, + { + "epoch": 2.2, + "learning_rate": 1.2776323421065009e-05, + "loss": 0.1095, + "step": 47105 + }, + { + "epoch": 2.2, + "learning_rate": 1.2775539636010221e-05, + "loss": 0.3924, + "step": 47110 + }, + { + "epoch": 2.2, + "learning_rate": 1.2774755850955435e-05, + "loss": 0.314, + "step": 47115 + }, + { + "epoch": 2.2, + "learning_rate": 1.2773972065900649e-05, + "loss": 0.0539, + "step": 47120 + }, + { + "epoch": 2.2, + "learning_rate": 1.2773188280845863e-05, + "loss": 0.039, + "step": 47125 + }, + { + "epoch": 2.2, + "learning_rate": 1.2772404495791075e-05, + "loss": 0.0658, + "step": 47130 + }, + { + "epoch": 2.2, + "learning_rate": 1.2771620710736287e-05, + "loss": 0.0625, + "step": 47135 + }, + { + "epoch": 2.2, + "learning_rate": 1.2770836925681503e-05, + "loss": 0.115, + "step": 47140 + }, + { + "epoch": 2.2, + "learning_rate": 1.2770053140626715e-05, + "loss": 0.1128, + "step": 47145 + }, + { + "epoch": 2.2, + "learning_rate": 1.2769269355571929e-05, + "loss": 0.1195, + "step": 47150 + }, + { + "epoch": 2.2, + "learning_rate": 1.2768485570517141e-05, + "loss": 0.1494, + "step": 47155 + }, + { + "epoch": 2.2, + "learning_rate": 1.2767701785462357e-05, + "loss": 0.1993, + "step": 47160 + }, + { + "epoch": 2.2, + "learning_rate": 1.2766918000407569e-05, + "loss": 0.2361, + "step": 47165 + }, + { + "epoch": 2.2, + "learning_rate": 1.2766134215352783e-05, + "loss": 0.0823, + "step": 47170 + }, + { + "epoch": 2.2, + "learning_rate": 1.2765350430297995e-05, + "loss": 0.0557, + "step": 47175 + }, + { + "epoch": 2.2, + "learning_rate": 1.2764566645243211e-05, + "loss": 0.0295, + "step": 47180 + }, + { + "epoch": 2.2, + "learning_rate": 1.2763782860188423e-05, + "loss": 0.0804, + "step": 47185 + }, + { + "epoch": 2.2, + "learning_rate": 1.2762999075133637e-05, + "loss": 0.0986, + "step": 47190 + }, + { + "epoch": 2.2, + "learning_rate": 1.276221529007885e-05, + "loss": 0.1192, + "step": 47195 + }, + { + "epoch": 2.2, + "learning_rate": 1.2761431505024063e-05, + "loss": 0.1397, + "step": 47200 + }, + { + "epoch": 2.2, + "learning_rate": 1.2760647719969277e-05, + "loss": 0.2456, + "step": 47205 + }, + { + "epoch": 2.2, + "learning_rate": 1.275986393491449e-05, + "loss": 0.267, + "step": 47210 + }, + { + "epoch": 2.2, + "learning_rate": 1.2759080149859703e-05, + "loss": 0.3058, + "step": 47215 + }, + { + "epoch": 2.2, + "learning_rate": 1.2758296364804917e-05, + "loss": 0.0419, + "step": 47220 + }, + { + "epoch": 2.2, + "learning_rate": 1.2757512579750131e-05, + "loss": 0.0535, + "step": 47225 + }, + { + "epoch": 2.2, + "learning_rate": 1.2756728794695343e-05, + "loss": 0.0522, + "step": 47230 + }, + { + "epoch": 2.2, + "learning_rate": 1.2755945009640559e-05, + "loss": 0.0544, + "step": 47235 + }, + { + "epoch": 2.2, + "learning_rate": 1.2755161224585771e-05, + "loss": 0.1465, + "step": 47240 + }, + { + "epoch": 2.2, + "learning_rate": 1.2754377439530985e-05, + "loss": 0.1576, + "step": 47245 + }, + { + "epoch": 2.2, + "learning_rate": 1.2753593654476197e-05, + "loss": 0.1764, + "step": 47250 + }, + { + "epoch": 2.2, + "learning_rate": 1.2752809869421413e-05, + "loss": 0.2072, + "step": 47255 + }, + { + "epoch": 2.21, + "learning_rate": 1.2752026084366625e-05, + "loss": 0.2698, + "step": 47260 + }, + { + "epoch": 2.21, + "learning_rate": 1.2751242299311837e-05, + "loss": 0.2404, + "step": 47265 + }, + { + "epoch": 2.21, + "learning_rate": 1.2750458514257051e-05, + "loss": 0.1002, + "step": 47270 + }, + { + "epoch": 2.21, + "learning_rate": 1.2749674729202263e-05, + "loss": 0.0465, + "step": 47275 + }, + { + "epoch": 2.21, + "learning_rate": 1.2748890944147479e-05, + "loss": 0.0432, + "step": 47280 + }, + { + "epoch": 2.21, + "learning_rate": 1.2748107159092691e-05, + "loss": 0.0784, + "step": 47285 + }, + { + "epoch": 2.21, + "learning_rate": 1.2747323374037905e-05, + "loss": 0.0798, + "step": 47290 + }, + { + "epoch": 2.21, + "learning_rate": 1.2746539588983117e-05, + "loss": 0.0903, + "step": 47295 + }, + { + "epoch": 2.21, + "learning_rate": 1.2745755803928333e-05, + "loss": 0.197, + "step": 47300 + }, + { + "epoch": 2.21, + "learning_rate": 1.2744972018873545e-05, + "loss": 0.2127, + "step": 47305 + }, + { + "epoch": 2.21, + "learning_rate": 1.2744188233818759e-05, + "loss": 0.2387, + "step": 47310 + }, + { + "epoch": 2.21, + "learning_rate": 1.2743404448763971e-05, + "loss": 0.2397, + "step": 47315 + }, + { + "epoch": 2.21, + "learning_rate": 1.2742620663709187e-05, + "loss": 0.0321, + "step": 47320 + }, + { + "epoch": 2.21, + "learning_rate": 1.2741836878654399e-05, + "loss": 0.0406, + "step": 47325 + }, + { + "epoch": 2.21, + "learning_rate": 1.2741053093599611e-05, + "loss": 0.0561, + "step": 47330 + }, + { + "epoch": 2.21, + "learning_rate": 1.2740269308544827e-05, + "loss": 0.1, + "step": 47335 + }, + { + "epoch": 2.21, + "learning_rate": 1.2739485523490039e-05, + "loss": 0.0816, + "step": 47340 + }, + { + "epoch": 2.21, + "learning_rate": 1.2738701738435253e-05, + "loss": 0.1594, + "step": 47345 + }, + { + "epoch": 2.21, + "learning_rate": 1.2737917953380465e-05, + "loss": 0.1174, + "step": 47350 + }, + { + "epoch": 2.21, + "learning_rate": 1.273713416832568e-05, + "loss": 0.2226, + "step": 47355 + }, + { + "epoch": 2.21, + "learning_rate": 1.2736350383270893e-05, + "loss": 0.2778, + "step": 47360 + }, + { + "epoch": 2.21, + "learning_rate": 1.2735566598216107e-05, + "loss": 0.3245, + "step": 47365 + }, + { + "epoch": 2.21, + "learning_rate": 1.2734782813161319e-05, + "loss": 0.0526, + "step": 47370 + }, + { + "epoch": 2.21, + "learning_rate": 1.2733999028106535e-05, + "loss": 0.0418, + "step": 47375 + }, + { + "epoch": 2.21, + "learning_rate": 1.2733215243051747e-05, + "loss": 0.1414, + "step": 47380 + }, + { + "epoch": 2.21, + "learning_rate": 1.273243145799696e-05, + "loss": 0.0651, + "step": 47385 + }, + { + "epoch": 2.21, + "learning_rate": 1.2731647672942173e-05, + "loss": 0.1081, + "step": 47390 + }, + { + "epoch": 2.21, + "learning_rate": 1.2730863887887385e-05, + "loss": 0.1513, + "step": 47395 + }, + { + "epoch": 2.21, + "learning_rate": 1.27300801028326e-05, + "loss": 0.1419, + "step": 47400 + }, + { + "epoch": 2.21, + "learning_rate": 1.2729296317777813e-05, + "loss": 0.3379, + "step": 47405 + }, + { + "epoch": 2.21, + "learning_rate": 1.2728512532723027e-05, + "loss": 0.2274, + "step": 47410 + }, + { + "epoch": 2.21, + "learning_rate": 1.2727728747668239e-05, + "loss": 0.3456, + "step": 47415 + }, + { + "epoch": 2.21, + "learning_rate": 1.2726944962613455e-05, + "loss": 0.0683, + "step": 47420 + }, + { + "epoch": 2.21, + "learning_rate": 1.2726161177558667e-05, + "loss": 0.0611, + "step": 47425 + }, + { + "epoch": 2.21, + "learning_rate": 1.272537739250388e-05, + "loss": 0.0673, + "step": 47430 + }, + { + "epoch": 2.21, + "learning_rate": 1.2724593607449095e-05, + "loss": 0.1342, + "step": 47435 + }, + { + "epoch": 2.21, + "learning_rate": 1.2723809822394309e-05, + "loss": 0.0678, + "step": 47440 + }, + { + "epoch": 2.21, + "learning_rate": 1.272302603733952e-05, + "loss": 0.254, + "step": 47445 + }, + { + "epoch": 2.21, + "learning_rate": 1.2722242252284736e-05, + "loss": 0.1418, + "step": 47450 + }, + { + "epoch": 2.21, + "learning_rate": 1.2721458467229949e-05, + "loss": 0.1771, + "step": 47455 + }, + { + "epoch": 2.21, + "learning_rate": 1.272067468217516e-05, + "loss": 0.2067, + "step": 47460 + }, + { + "epoch": 2.21, + "learning_rate": 1.2719890897120375e-05, + "loss": 0.302, + "step": 47465 + }, + { + "epoch": 2.22, + "learning_rate": 1.2719107112065587e-05, + "loss": 0.0339, + "step": 47470 + }, + { + "epoch": 2.22, + "learning_rate": 1.2718323327010803e-05, + "loss": 0.068, + "step": 47475 + }, + { + "epoch": 2.22, + "learning_rate": 1.2717539541956015e-05, + "loss": 0.101, + "step": 47480 + }, + { + "epoch": 2.22, + "learning_rate": 1.2716755756901229e-05, + "loss": 0.0721, + "step": 47485 + }, + { + "epoch": 2.22, + "learning_rate": 1.271597197184644e-05, + "loss": 0.0679, + "step": 47490 + }, + { + "epoch": 2.22, + "learning_rate": 1.2715188186791656e-05, + "loss": 0.155, + "step": 47495 + }, + { + "epoch": 2.22, + "learning_rate": 1.2714404401736869e-05, + "loss": 0.0995, + "step": 47500 + }, + { + "epoch": 2.22, + "learning_rate": 1.2713620616682083e-05, + "loss": 0.2467, + "step": 47505 + }, + { + "epoch": 2.22, + "learning_rate": 1.2712836831627295e-05, + "loss": 0.2413, + "step": 47510 + }, + { + "epoch": 2.22, + "learning_rate": 1.271205304657251e-05, + "loss": 0.2587, + "step": 47515 + }, + { + "epoch": 2.22, + "learning_rate": 1.2711269261517723e-05, + "loss": 0.0649, + "step": 47520 + }, + { + "epoch": 2.22, + "learning_rate": 1.2710485476462935e-05, + "loss": 0.0333, + "step": 47525 + }, + { + "epoch": 2.22, + "learning_rate": 1.2709701691408149e-05, + "loss": 0.0575, + "step": 47530 + }, + { + "epoch": 2.22, + "learning_rate": 1.2708917906353363e-05, + "loss": 0.1551, + "step": 47535 + }, + { + "epoch": 2.22, + "learning_rate": 1.2708134121298577e-05, + "loss": 0.2555, + "step": 47540 + }, + { + "epoch": 2.22, + "learning_rate": 1.2707350336243789e-05, + "loss": 0.0763, + "step": 47545 + }, + { + "epoch": 2.22, + "learning_rate": 1.2706566551189004e-05, + "loss": 0.1238, + "step": 47550 + }, + { + "epoch": 2.22, + "learning_rate": 1.2705782766134217e-05, + "loss": 0.1277, + "step": 47555 + }, + { + "epoch": 2.22, + "learning_rate": 1.270499898107943e-05, + "loss": 0.2487, + "step": 47560 + }, + { + "epoch": 2.22, + "learning_rate": 1.2704215196024643e-05, + "loss": 0.2534, + "step": 47565 + }, + { + "epoch": 2.22, + "learning_rate": 1.2703431410969858e-05, + "loss": 0.0228, + "step": 47570 + }, + { + "epoch": 2.22, + "learning_rate": 1.270264762591507e-05, + "loss": 0.0238, + "step": 47575 + }, + { + "epoch": 2.22, + "learning_rate": 1.2701863840860284e-05, + "loss": 0.0532, + "step": 47580 + }, + { + "epoch": 2.22, + "learning_rate": 1.2701080055805497e-05, + "loss": 0.1343, + "step": 47585 + }, + { + "epoch": 2.22, + "learning_rate": 1.2700296270750709e-05, + "loss": 0.1061, + "step": 47590 + }, + { + "epoch": 2.22, + "learning_rate": 1.2699512485695924e-05, + "loss": 0.1548, + "step": 47595 + }, + { + "epoch": 2.22, + "learning_rate": 1.2698728700641137e-05, + "loss": 0.2443, + "step": 47600 + }, + { + "epoch": 2.22, + "learning_rate": 1.269794491558635e-05, + "loss": 0.1995, + "step": 47605 + }, + { + "epoch": 2.22, + "learning_rate": 1.2697161130531563e-05, + "loss": 0.4085, + "step": 47610 + }, + { + "epoch": 2.22, + "learning_rate": 1.2696377345476778e-05, + "loss": 0.2198, + "step": 47615 + }, + { + "epoch": 2.22, + "learning_rate": 1.269559356042199e-05, + "loss": 0.0253, + "step": 47620 + }, + { + "epoch": 2.22, + "learning_rate": 1.2694809775367204e-05, + "loss": 0.0511, + "step": 47625 + }, + { + "epoch": 2.22, + "learning_rate": 1.2694025990312417e-05, + "loss": 0.0841, + "step": 47630 + }, + { + "epoch": 2.22, + "learning_rate": 1.2693242205257632e-05, + "loss": 0.0448, + "step": 47635 + }, + { + "epoch": 2.22, + "learning_rate": 1.2692458420202844e-05, + "loss": 0.0793, + "step": 47640 + }, + { + "epoch": 2.22, + "learning_rate": 1.2691674635148058e-05, + "loss": 0.0846, + "step": 47645 + }, + { + "epoch": 2.22, + "learning_rate": 1.2690890850093272e-05, + "loss": 0.1674, + "step": 47650 + }, + { + "epoch": 2.22, + "learning_rate": 1.2690107065038484e-05, + "loss": 0.129, + "step": 47655 + }, + { + "epoch": 2.22, + "learning_rate": 1.2689323279983698e-05, + "loss": 0.2383, + "step": 47660 + }, + { + "epoch": 2.22, + "learning_rate": 1.268853949492891e-05, + "loss": 0.3317, + "step": 47665 + }, + { + "epoch": 2.22, + "learning_rate": 1.2687755709874126e-05, + "loss": 0.0892, + "step": 47670 + }, + { + "epoch": 2.22, + "learning_rate": 1.2686971924819338e-05, + "loss": 0.0223, + "step": 47675 + }, + { + "epoch": 2.22, + "learning_rate": 1.2686188139764552e-05, + "loss": 0.077, + "step": 47680 + }, + { + "epoch": 2.23, + "learning_rate": 1.2685404354709765e-05, + "loss": 0.0558, + "step": 47685 + }, + { + "epoch": 2.23, + "learning_rate": 1.268462056965498e-05, + "loss": 0.0593, + "step": 47690 + }, + { + "epoch": 2.23, + "learning_rate": 1.2683836784600192e-05, + "loss": 0.0725, + "step": 47695 + }, + { + "epoch": 2.23, + "learning_rate": 1.2683052999545406e-05, + "loss": 0.2018, + "step": 47700 + }, + { + "epoch": 2.23, + "learning_rate": 1.2682269214490618e-05, + "loss": 0.1803, + "step": 47705 + }, + { + "epoch": 2.23, + "learning_rate": 1.2681485429435834e-05, + "loss": 0.2223, + "step": 47710 + }, + { + "epoch": 2.23, + "learning_rate": 1.2680701644381046e-05, + "loss": 0.4095, + "step": 47715 + }, + { + "epoch": 2.23, + "learning_rate": 1.2679917859326258e-05, + "loss": 0.0528, + "step": 47720 + }, + { + "epoch": 2.23, + "learning_rate": 1.2679134074271472e-05, + "loss": 0.0447, + "step": 47725 + }, + { + "epoch": 2.23, + "learning_rate": 1.2678350289216685e-05, + "loss": 0.0256, + "step": 47730 + }, + { + "epoch": 2.23, + "learning_rate": 1.26775665041619e-05, + "loss": 0.0814, + "step": 47735 + }, + { + "epoch": 2.23, + "learning_rate": 1.2676782719107112e-05, + "loss": 0.1283, + "step": 47740 + }, + { + "epoch": 2.23, + "learning_rate": 1.2675998934052326e-05, + "loss": 0.0823, + "step": 47745 + }, + { + "epoch": 2.23, + "learning_rate": 1.267521514899754e-05, + "loss": 0.0804, + "step": 47750 + }, + { + "epoch": 2.23, + "learning_rate": 1.2674431363942754e-05, + "loss": 0.1365, + "step": 47755 + }, + { + "epoch": 2.23, + "learning_rate": 1.2673647578887966e-05, + "loss": 0.2962, + "step": 47760 + }, + { + "epoch": 2.23, + "learning_rate": 1.2672863793833182e-05, + "loss": 0.3267, + "step": 47765 + }, + { + "epoch": 2.23, + "learning_rate": 1.2672080008778394e-05, + "loss": 0.0293, + "step": 47770 + }, + { + "epoch": 2.23, + "learning_rate": 1.2671296223723608e-05, + "loss": 0.0746, + "step": 47775 + }, + { + "epoch": 2.23, + "learning_rate": 1.267051243866882e-05, + "loss": 0.0665, + "step": 47780 + }, + { + "epoch": 2.23, + "learning_rate": 1.2669728653614032e-05, + "loss": 0.0971, + "step": 47785 + }, + { + "epoch": 2.23, + "learning_rate": 1.2668944868559248e-05, + "loss": 0.0418, + "step": 47790 + }, + { + "epoch": 2.23, + "learning_rate": 1.266816108350446e-05, + "loss": 0.1123, + "step": 47795 + }, + { + "epoch": 2.23, + "learning_rate": 1.2667377298449674e-05, + "loss": 0.0995, + "step": 47800 + }, + { + "epoch": 2.23, + "learning_rate": 1.2666593513394886e-05, + "loss": 0.1129, + "step": 47805 + }, + { + "epoch": 2.23, + "learning_rate": 1.2665809728340102e-05, + "loss": 0.2582, + "step": 47810 + }, + { + "epoch": 2.23, + "learning_rate": 1.2665025943285314e-05, + "loss": 0.3506, + "step": 47815 + }, + { + "epoch": 2.23, + "learning_rate": 1.2664242158230528e-05, + "loss": 0.0258, + "step": 47820 + }, + { + "epoch": 2.23, + "learning_rate": 1.266345837317574e-05, + "loss": 0.0343, + "step": 47825 + }, + { + "epoch": 2.23, + "learning_rate": 1.2662674588120956e-05, + "loss": 0.0366, + "step": 47830 + }, + { + "epoch": 2.23, + "learning_rate": 1.2661890803066168e-05, + "loss": 0.1132, + "step": 47835 + }, + { + "epoch": 2.23, + "learning_rate": 1.2661107018011382e-05, + "loss": 0.0819, + "step": 47840 + }, + { + "epoch": 2.23, + "learning_rate": 1.2660323232956594e-05, + "loss": 0.1235, + "step": 47845 + }, + { + "epoch": 2.23, + "learning_rate": 1.2659539447901808e-05, + "loss": 0.1482, + "step": 47850 + }, + { + "epoch": 2.23, + "learning_rate": 1.2658755662847022e-05, + "loss": 0.1346, + "step": 47855 + }, + { + "epoch": 2.23, + "learning_rate": 1.2657971877792234e-05, + "loss": 0.1981, + "step": 47860 + }, + { + "epoch": 2.23, + "learning_rate": 1.265718809273745e-05, + "loss": 0.2022, + "step": 47865 + }, + { + "epoch": 2.23, + "learning_rate": 1.2656404307682662e-05, + "loss": 0.0667, + "step": 47870 + }, + { + "epoch": 2.23, + "learning_rate": 1.2655620522627876e-05, + "loss": 0.0178, + "step": 47875 + }, + { + "epoch": 2.23, + "learning_rate": 1.2654836737573088e-05, + "loss": 0.1099, + "step": 47880 + }, + { + "epoch": 2.23, + "learning_rate": 1.2654052952518304e-05, + "loss": 0.0823, + "step": 47885 + }, + { + "epoch": 2.23, + "learning_rate": 1.2653269167463516e-05, + "loss": 0.0406, + "step": 47890 + }, + { + "epoch": 2.23, + "learning_rate": 1.265248538240873e-05, + "loss": 0.1551, + "step": 47895 + }, + { + "epoch": 2.24, + "learning_rate": 1.2651701597353942e-05, + "loss": 0.1111, + "step": 47900 + }, + { + "epoch": 2.24, + "learning_rate": 1.2650917812299158e-05, + "loss": 0.1663, + "step": 47905 + }, + { + "epoch": 2.24, + "learning_rate": 1.265013402724437e-05, + "loss": 0.2386, + "step": 47910 + }, + { + "epoch": 2.24, + "learning_rate": 1.2649350242189582e-05, + "loss": 0.2496, + "step": 47915 + }, + { + "epoch": 2.24, + "learning_rate": 1.2648566457134796e-05, + "loss": 0.0195, + "step": 47920 + }, + { + "epoch": 2.24, + "learning_rate": 1.2647782672080008e-05, + "loss": 0.0671, + "step": 47925 + }, + { + "epoch": 2.24, + "learning_rate": 1.2646998887025224e-05, + "loss": 0.0558, + "step": 47930 + }, + { + "epoch": 2.24, + "learning_rate": 1.2646215101970436e-05, + "loss": 0.0917, + "step": 47935 + }, + { + "epoch": 2.24, + "learning_rate": 1.264543131691565e-05, + "loss": 0.1752, + "step": 47940 + }, + { + "epoch": 2.24, + "learning_rate": 1.2644647531860862e-05, + "loss": 0.1103, + "step": 47945 + }, + { + "epoch": 2.24, + "learning_rate": 1.2643863746806078e-05, + "loss": 0.1002, + "step": 47950 + }, + { + "epoch": 2.24, + "learning_rate": 1.264307996175129e-05, + "loss": 0.1335, + "step": 47955 + }, + { + "epoch": 2.24, + "learning_rate": 1.2642296176696504e-05, + "loss": 0.3335, + "step": 47960 + }, + { + "epoch": 2.24, + "learning_rate": 1.2641512391641718e-05, + "loss": 0.3093, + "step": 47965 + }, + { + "epoch": 2.24, + "learning_rate": 1.2640728606586932e-05, + "loss": 0.0686, + "step": 47970 + }, + { + "epoch": 2.24, + "learning_rate": 1.2639944821532144e-05, + "loss": 0.0387, + "step": 47975 + }, + { + "epoch": 2.24, + "learning_rate": 1.2639161036477356e-05, + "loss": 0.0357, + "step": 47980 + }, + { + "epoch": 2.24, + "learning_rate": 1.2638377251422572e-05, + "loss": 0.0849, + "step": 47985 + }, + { + "epoch": 2.24, + "learning_rate": 1.2637593466367784e-05, + "loss": 0.1276, + "step": 47990 + }, + { + "epoch": 2.24, + "learning_rate": 1.2636809681312998e-05, + "loss": 0.1694, + "step": 47995 + }, + { + "epoch": 2.24, + "learning_rate": 1.263602589625821e-05, + "loss": 0.1722, + "step": 48000 + }, + { + "epoch": 2.24, + "learning_rate": 1.2635242111203426e-05, + "loss": 0.1639, + "step": 48005 + }, + { + "epoch": 2.24, + "learning_rate": 1.2634458326148638e-05, + "loss": 0.326, + "step": 48010 + }, + { + "epoch": 2.24, + "learning_rate": 1.2633674541093852e-05, + "loss": 0.3992, + "step": 48015 + }, + { + "epoch": 2.24, + "learning_rate": 1.2632890756039064e-05, + "loss": 0.0436, + "step": 48020 + }, + { + "epoch": 2.24, + "learning_rate": 1.263210697098428e-05, + "loss": 0.0531, + "step": 48025 + }, + { + "epoch": 2.24, + "learning_rate": 1.2631323185929492e-05, + "loss": 0.0581, + "step": 48030 + }, + { + "epoch": 2.24, + "learning_rate": 1.2630539400874706e-05, + "loss": 0.0561, + "step": 48035 + }, + { + "epoch": 2.24, + "learning_rate": 1.2629755615819918e-05, + "loss": 0.1014, + "step": 48040 + }, + { + "epoch": 2.24, + "learning_rate": 1.262897183076513e-05, + "loss": 0.1809, + "step": 48045 + }, + { + "epoch": 2.24, + "learning_rate": 1.2628188045710346e-05, + "loss": 0.1019, + "step": 48050 + }, + { + "epoch": 2.24, + "learning_rate": 1.2627404260655558e-05, + "loss": 0.1508, + "step": 48055 + }, + { + "epoch": 2.24, + "learning_rate": 1.2626620475600772e-05, + "loss": 0.309, + "step": 48060 + }, + { + "epoch": 2.24, + "learning_rate": 1.2625836690545986e-05, + "loss": 0.2341, + "step": 48065 + }, + { + "epoch": 2.24, + "learning_rate": 1.26250529054912e-05, + "loss": 0.0517, + "step": 48070 + }, + { + "epoch": 2.24, + "learning_rate": 1.2624269120436412e-05, + "loss": 0.039, + "step": 48075 + }, + { + "epoch": 2.24, + "learning_rate": 1.2623485335381628e-05, + "loss": 0.0392, + "step": 48080 + }, + { + "epoch": 2.24, + "learning_rate": 1.262270155032684e-05, + "loss": 0.0337, + "step": 48085 + }, + { + "epoch": 2.24, + "learning_rate": 1.2621917765272054e-05, + "loss": 0.0574, + "step": 48090 + }, + { + "epoch": 2.24, + "learning_rate": 1.2621133980217266e-05, + "loss": 0.0952, + "step": 48095 + }, + { + "epoch": 2.24, + "learning_rate": 1.2620350195162481e-05, + "loss": 0.1344, + "step": 48100 + }, + { + "epoch": 2.24, + "learning_rate": 1.2619566410107694e-05, + "loss": 0.235, + "step": 48105 + }, + { + "epoch": 2.24, + "learning_rate": 1.2618782625052906e-05, + "loss": 0.2454, + "step": 48110 + }, + { + "epoch": 2.25, + "learning_rate": 1.261799883999812e-05, + "loss": 0.26, + "step": 48115 + }, + { + "epoch": 2.25, + "learning_rate": 1.2617215054943332e-05, + "loss": 0.0369, + "step": 48120 + }, + { + "epoch": 2.25, + "learning_rate": 1.2616431269888548e-05, + "loss": 0.0334, + "step": 48125 + }, + { + "epoch": 2.25, + "learning_rate": 1.261564748483376e-05, + "loss": 0.0459, + "step": 48130 + }, + { + "epoch": 2.25, + "learning_rate": 1.2614863699778974e-05, + "loss": 0.0905, + "step": 48135 + }, + { + "epoch": 2.25, + "learning_rate": 1.2614079914724186e-05, + "loss": 0.1017, + "step": 48140 + }, + { + "epoch": 2.25, + "learning_rate": 1.2613296129669402e-05, + "loss": 0.1289, + "step": 48145 + }, + { + "epoch": 2.25, + "learning_rate": 1.2612512344614614e-05, + "loss": 0.1654, + "step": 48150 + }, + { + "epoch": 2.25, + "learning_rate": 1.2611728559559828e-05, + "loss": 0.1671, + "step": 48155 + }, + { + "epoch": 2.25, + "learning_rate": 1.261094477450504e-05, + "loss": 0.2027, + "step": 48160 + }, + { + "epoch": 2.25, + "learning_rate": 1.2610160989450255e-05, + "loss": 0.2967, + "step": 48165 + }, + { + "epoch": 2.25, + "learning_rate": 1.2609377204395468e-05, + "loss": 0.0546, + "step": 48170 + }, + { + "epoch": 2.25, + "learning_rate": 1.260859341934068e-05, + "loss": 0.0423, + "step": 48175 + }, + { + "epoch": 2.25, + "learning_rate": 1.2607809634285895e-05, + "loss": 0.0787, + "step": 48180 + }, + { + "epoch": 2.25, + "learning_rate": 1.2607025849231108e-05, + "loss": 0.1554, + "step": 48185 + }, + { + "epoch": 2.25, + "learning_rate": 1.2606242064176322e-05, + "loss": 0.0672, + "step": 48190 + }, + { + "epoch": 2.25, + "learning_rate": 1.2605458279121534e-05, + "loss": 0.1474, + "step": 48195 + }, + { + "epoch": 2.25, + "learning_rate": 1.260467449406675e-05, + "loss": 0.0994, + "step": 48200 + }, + { + "epoch": 2.25, + "learning_rate": 1.2603890709011962e-05, + "loss": 0.1914, + "step": 48205 + }, + { + "epoch": 2.25, + "learning_rate": 1.2603106923957176e-05, + "loss": 0.2273, + "step": 48210 + }, + { + "epoch": 2.25, + "learning_rate": 1.2602323138902388e-05, + "loss": 0.3731, + "step": 48215 + }, + { + "epoch": 2.25, + "learning_rate": 1.2601539353847603e-05, + "loss": 0.0643, + "step": 48220 + }, + { + "epoch": 2.25, + "learning_rate": 1.2600755568792816e-05, + "loss": 0.0238, + "step": 48225 + }, + { + "epoch": 2.25, + "learning_rate": 1.259997178373803e-05, + "loss": 0.0542, + "step": 48230 + }, + { + "epoch": 2.25, + "learning_rate": 1.2599187998683242e-05, + "loss": 0.0509, + "step": 48235 + }, + { + "epoch": 2.25, + "learning_rate": 1.2598404213628454e-05, + "loss": 0.1615, + "step": 48240 + }, + { + "epoch": 2.25, + "learning_rate": 1.259762042857367e-05, + "loss": 0.0994, + "step": 48245 + }, + { + "epoch": 2.25, + "learning_rate": 1.2596836643518882e-05, + "loss": 0.1109, + "step": 48250 + }, + { + "epoch": 2.25, + "learning_rate": 1.2596052858464096e-05, + "loss": 0.1553, + "step": 48255 + }, + { + "epoch": 2.25, + "learning_rate": 1.2595269073409308e-05, + "loss": 0.3124, + "step": 48260 + }, + { + "epoch": 2.25, + "learning_rate": 1.2594485288354523e-05, + "loss": 0.2899, + "step": 48265 + }, + { + "epoch": 2.25, + "learning_rate": 1.2593701503299736e-05, + "loss": 0.0501, + "step": 48270 + }, + { + "epoch": 2.25, + "learning_rate": 1.259291771824495e-05, + "loss": 0.0855, + "step": 48275 + }, + { + "epoch": 2.25, + "learning_rate": 1.2592133933190163e-05, + "loss": 0.0823, + "step": 48280 + }, + { + "epoch": 2.25, + "learning_rate": 1.2591350148135377e-05, + "loss": 0.145, + "step": 48285 + }, + { + "epoch": 2.25, + "learning_rate": 1.259056636308059e-05, + "loss": 0.0703, + "step": 48290 + }, + { + "epoch": 2.25, + "learning_rate": 1.2589782578025805e-05, + "loss": 0.143, + "step": 48295 + }, + { + "epoch": 2.25, + "learning_rate": 1.2588998792971017e-05, + "loss": 0.1223, + "step": 48300 + }, + { + "epoch": 2.25, + "learning_rate": 1.258821500791623e-05, + "loss": 0.1272, + "step": 48305 + }, + { + "epoch": 2.25, + "learning_rate": 1.2587431222861443e-05, + "loss": 0.1794, + "step": 48310 + }, + { + "epoch": 2.25, + "learning_rate": 1.2586647437806656e-05, + "loss": 0.2009, + "step": 48315 + }, + { + "epoch": 2.25, + "learning_rate": 1.2585863652751871e-05, + "loss": 0.0941, + "step": 48320 + }, + { + "epoch": 2.25, + "learning_rate": 1.2585079867697083e-05, + "loss": 0.0456, + "step": 48325 + }, + { + "epoch": 2.26, + "learning_rate": 1.2584296082642297e-05, + "loss": 0.0458, + "step": 48330 + }, + { + "epoch": 2.26, + "learning_rate": 1.258351229758751e-05, + "loss": 0.0858, + "step": 48335 + }, + { + "epoch": 2.26, + "learning_rate": 1.2582728512532725e-05, + "loss": 0.1233, + "step": 48340 + }, + { + "epoch": 2.26, + "learning_rate": 1.2581944727477937e-05, + "loss": 0.0849, + "step": 48345 + }, + { + "epoch": 2.26, + "learning_rate": 1.2581160942423151e-05, + "loss": 0.1166, + "step": 48350 + }, + { + "epoch": 2.26, + "learning_rate": 1.2580377157368364e-05, + "loss": 0.2759, + "step": 48355 + }, + { + "epoch": 2.26, + "learning_rate": 1.2579593372313579e-05, + "loss": 0.2935, + "step": 48360 + }, + { + "epoch": 2.26, + "learning_rate": 1.2578809587258791e-05, + "loss": 0.3687, + "step": 48365 + }, + { + "epoch": 2.26, + "learning_rate": 1.2578025802204004e-05, + "loss": 0.0169, + "step": 48370 + }, + { + "epoch": 2.26, + "learning_rate": 1.2577242017149217e-05, + "loss": 0.0272, + "step": 48375 + }, + { + "epoch": 2.26, + "learning_rate": 1.2576458232094431e-05, + "loss": 0.0452, + "step": 48380 + }, + { + "epoch": 2.26, + "learning_rate": 1.2575674447039645e-05, + "loss": 0.0606, + "step": 48385 + }, + { + "epoch": 2.26, + "learning_rate": 1.2574890661984857e-05, + "loss": 0.1621, + "step": 48390 + }, + { + "epoch": 2.26, + "learning_rate": 1.2574106876930073e-05, + "loss": 0.0805, + "step": 48395 + }, + { + "epoch": 2.26, + "learning_rate": 1.2573323091875285e-05, + "loss": 0.1328, + "step": 48400 + }, + { + "epoch": 2.26, + "learning_rate": 1.25725393068205e-05, + "loss": 0.1362, + "step": 48405 + }, + { + "epoch": 2.26, + "learning_rate": 1.2571755521765711e-05, + "loss": 0.1753, + "step": 48410 + }, + { + "epoch": 2.26, + "learning_rate": 1.2570971736710927e-05, + "loss": 0.2823, + "step": 48415 + }, + { + "epoch": 2.26, + "learning_rate": 1.257018795165614e-05, + "loss": 0.0633, + "step": 48420 + }, + { + "epoch": 2.26, + "learning_rate": 1.2569404166601353e-05, + "loss": 0.0347, + "step": 48425 + }, + { + "epoch": 2.26, + "learning_rate": 1.2568620381546565e-05, + "loss": 0.0578, + "step": 48430 + }, + { + "epoch": 2.26, + "learning_rate": 1.2567836596491778e-05, + "loss": 0.0631, + "step": 48435 + }, + { + "epoch": 2.26, + "learning_rate": 1.2567052811436993e-05, + "loss": 0.0828, + "step": 48440 + }, + { + "epoch": 2.26, + "learning_rate": 1.2566269026382205e-05, + "loss": 0.1166, + "step": 48445 + }, + { + "epoch": 2.26, + "learning_rate": 1.256548524132742e-05, + "loss": 0.1153, + "step": 48450 + }, + { + "epoch": 2.26, + "learning_rate": 1.2564701456272631e-05, + "loss": 0.1634, + "step": 48455 + }, + { + "epoch": 2.26, + "learning_rate": 1.2563917671217847e-05, + "loss": 0.2301, + "step": 48460 + }, + { + "epoch": 2.26, + "learning_rate": 1.256313388616306e-05, + "loss": 0.2951, + "step": 48465 + }, + { + "epoch": 2.26, + "learning_rate": 1.2562350101108273e-05, + "loss": 0.046, + "step": 48470 + }, + { + "epoch": 2.26, + "learning_rate": 1.2561566316053485e-05, + "loss": 0.0414, + "step": 48475 + }, + { + "epoch": 2.26, + "learning_rate": 1.2560782530998701e-05, + "loss": 0.0748, + "step": 48480 + }, + { + "epoch": 2.26, + "learning_rate": 1.2559998745943913e-05, + "loss": 0.0706, + "step": 48485 + }, + { + "epoch": 2.26, + "learning_rate": 1.2559214960889127e-05, + "loss": 0.1279, + "step": 48490 + }, + { + "epoch": 2.26, + "learning_rate": 1.2558431175834341e-05, + "loss": 0.0743, + "step": 48495 + }, + { + "epoch": 2.26, + "learning_rate": 1.2557647390779553e-05, + "loss": 0.1205, + "step": 48500 + }, + { + "epoch": 2.26, + "learning_rate": 1.2556863605724767e-05, + "loss": 0.1911, + "step": 48505 + }, + { + "epoch": 2.26, + "learning_rate": 1.255607982066998e-05, + "loss": 0.1648, + "step": 48510 + }, + { + "epoch": 2.26, + "learning_rate": 1.2555296035615195e-05, + "loss": 0.2687, + "step": 48515 + }, + { + "epoch": 2.26, + "learning_rate": 1.2554512250560407e-05, + "loss": 0.0847, + "step": 48520 + }, + { + "epoch": 2.26, + "learning_rate": 1.2553728465505621e-05, + "loss": 0.1152, + "step": 48525 + }, + { + "epoch": 2.26, + "learning_rate": 1.2552944680450833e-05, + "loss": 0.0977, + "step": 48530 + }, + { + "epoch": 2.26, + "learning_rate": 1.2552160895396049e-05, + "loss": 0.0585, + "step": 48535 + }, + { + "epoch": 2.26, + "learning_rate": 1.2551377110341261e-05, + "loss": 0.1386, + "step": 48540 + }, + { + "epoch": 2.27, + "learning_rate": 1.2550593325286475e-05, + "loss": 0.1517, + "step": 48545 + }, + { + "epoch": 2.27, + "learning_rate": 1.2549809540231687e-05, + "loss": 0.0852, + "step": 48550 + }, + { + "epoch": 2.27, + "learning_rate": 1.2549025755176903e-05, + "loss": 0.1604, + "step": 48555 + }, + { + "epoch": 2.27, + "learning_rate": 1.2548241970122115e-05, + "loss": 0.3075, + "step": 48560 + }, + { + "epoch": 2.27, + "learning_rate": 1.2547458185067327e-05, + "loss": 0.3901, + "step": 48565 + }, + { + "epoch": 2.27, + "learning_rate": 1.2546674400012541e-05, + "loss": 0.0896, + "step": 48570 + }, + { + "epoch": 2.27, + "learning_rate": 1.2545890614957753e-05, + "loss": 0.0526, + "step": 48575 + }, + { + "epoch": 2.27, + "learning_rate": 1.2545106829902969e-05, + "loss": 0.0586, + "step": 48580 + }, + { + "epoch": 2.27, + "learning_rate": 1.2544323044848181e-05, + "loss": 0.0928, + "step": 48585 + }, + { + "epoch": 2.27, + "learning_rate": 1.2543539259793395e-05, + "loss": 0.1047, + "step": 48590 + }, + { + "epoch": 2.27, + "learning_rate": 1.2542755474738609e-05, + "loss": 0.144, + "step": 48595 + }, + { + "epoch": 2.27, + "learning_rate": 1.2541971689683823e-05, + "loss": 0.1413, + "step": 48600 + }, + { + "epoch": 2.27, + "learning_rate": 1.2541187904629035e-05, + "loss": 0.1345, + "step": 48605 + }, + { + "epoch": 2.27, + "learning_rate": 1.254040411957425e-05, + "loss": 0.2603, + "step": 48610 + }, + { + "epoch": 2.27, + "learning_rate": 1.2539620334519463e-05, + "loss": 0.3291, + "step": 48615 + }, + { + "epoch": 2.27, + "learning_rate": 1.2538836549464677e-05, + "loss": 0.0751, + "step": 48620 + }, + { + "epoch": 2.27, + "learning_rate": 1.2538052764409889e-05, + "loss": 0.0066, + "step": 48625 + }, + { + "epoch": 2.27, + "learning_rate": 1.2537268979355101e-05, + "loss": 0.076, + "step": 48630 + }, + { + "epoch": 2.27, + "learning_rate": 1.2536485194300317e-05, + "loss": 0.131, + "step": 48635 + }, + { + "epoch": 2.27, + "learning_rate": 1.2535701409245529e-05, + "loss": 0.0862, + "step": 48640 + }, + { + "epoch": 2.27, + "learning_rate": 1.2534917624190743e-05, + "loss": 0.0589, + "step": 48645 + }, + { + "epoch": 2.27, + "learning_rate": 1.2534133839135955e-05, + "loss": 0.1927, + "step": 48650 + }, + { + "epoch": 2.27, + "learning_rate": 1.253335005408117e-05, + "loss": 0.103, + "step": 48655 + }, + { + "epoch": 2.27, + "learning_rate": 1.2532566269026383e-05, + "loss": 0.305, + "step": 48660 + }, + { + "epoch": 2.27, + "learning_rate": 1.2531782483971597e-05, + "loss": 0.3637, + "step": 48665 + }, + { + "epoch": 2.27, + "learning_rate": 1.2530998698916809e-05, + "loss": 0.0596, + "step": 48670 + }, + { + "epoch": 2.27, + "learning_rate": 1.2530214913862025e-05, + "loss": 0.0414, + "step": 48675 + }, + { + "epoch": 2.27, + "learning_rate": 1.2529431128807237e-05, + "loss": 0.0615, + "step": 48680 + }, + { + "epoch": 2.27, + "learning_rate": 1.252864734375245e-05, + "loss": 0.1236, + "step": 48685 + }, + { + "epoch": 2.27, + "learning_rate": 1.2527863558697663e-05, + "loss": 0.1301, + "step": 48690 + }, + { + "epoch": 2.27, + "learning_rate": 1.2527079773642877e-05, + "loss": 0.0993, + "step": 48695 + }, + { + "epoch": 2.27, + "learning_rate": 1.252629598858809e-05, + "loss": 0.1616, + "step": 48700 + }, + { + "epoch": 2.27, + "learning_rate": 1.2525512203533303e-05, + "loss": 0.1623, + "step": 48705 + }, + { + "epoch": 2.27, + "learning_rate": 1.2524728418478519e-05, + "loss": 0.3241, + "step": 48710 + }, + { + "epoch": 2.27, + "learning_rate": 1.2523944633423731e-05, + "loss": 0.2883, + "step": 48715 + }, + { + "epoch": 2.27, + "learning_rate": 1.2523160848368945e-05, + "loss": 0.1131, + "step": 48720 + }, + { + "epoch": 2.27, + "learning_rate": 1.2522377063314157e-05, + "loss": 0.0587, + "step": 48725 + }, + { + "epoch": 2.27, + "learning_rate": 1.2521593278259373e-05, + "loss": 0.0835, + "step": 48730 + }, + { + "epoch": 2.27, + "learning_rate": 1.2520809493204585e-05, + "loss": 0.0798, + "step": 48735 + }, + { + "epoch": 2.27, + "learning_rate": 1.2520025708149799e-05, + "loss": 0.112, + "step": 48740 + }, + { + "epoch": 2.27, + "learning_rate": 1.2519241923095011e-05, + "loss": 0.0651, + "step": 48745 + }, + { + "epoch": 2.27, + "learning_rate": 1.2518458138040227e-05, + "loss": 0.248, + "step": 48750 + }, + { + "epoch": 2.27, + "learning_rate": 1.2517674352985439e-05, + "loss": 0.2368, + "step": 48755 + }, + { + "epoch": 2.28, + "learning_rate": 1.2516890567930651e-05, + "loss": 0.1879, + "step": 48760 + }, + { + "epoch": 2.28, + "learning_rate": 1.2516106782875865e-05, + "loss": 0.267, + "step": 48765 + }, + { + "epoch": 2.28, + "learning_rate": 1.2515322997821077e-05, + "loss": 0.0323, + "step": 48770 + }, + { + "epoch": 2.28, + "learning_rate": 1.2514539212766293e-05, + "loss": 0.0736, + "step": 48775 + }, + { + "epoch": 2.28, + "learning_rate": 1.2513755427711505e-05, + "loss": 0.0625, + "step": 48780 + }, + { + "epoch": 2.28, + "learning_rate": 1.2512971642656719e-05, + "loss": 0.089, + "step": 48785 + }, + { + "epoch": 2.28, + "learning_rate": 1.2512187857601931e-05, + "loss": 0.0695, + "step": 48790 + }, + { + "epoch": 2.28, + "learning_rate": 1.2511404072547147e-05, + "loss": 0.1172, + "step": 48795 + }, + { + "epoch": 2.28, + "learning_rate": 1.2510620287492359e-05, + "loss": 0.2187, + "step": 48800 + }, + { + "epoch": 2.28, + "learning_rate": 1.2509836502437573e-05, + "loss": 0.1755, + "step": 48805 + }, + { + "epoch": 2.28, + "learning_rate": 1.2509052717382787e-05, + "loss": 0.3494, + "step": 48810 + }, + { + "epoch": 2.28, + "learning_rate": 1.2508268932328e-05, + "loss": 0.3253, + "step": 48815 + }, + { + "epoch": 2.28, + "learning_rate": 1.2507485147273213e-05, + "loss": 0.1197, + "step": 48820 + }, + { + "epoch": 2.28, + "learning_rate": 1.2506701362218425e-05, + "loss": 0.0672, + "step": 48825 + }, + { + "epoch": 2.28, + "learning_rate": 1.250591757716364e-05, + "loss": 0.0997, + "step": 48830 + }, + { + "epoch": 2.28, + "learning_rate": 1.2505133792108853e-05, + "loss": 0.0682, + "step": 48835 + }, + { + "epoch": 2.28, + "learning_rate": 1.2504350007054067e-05, + "loss": 0.0378, + "step": 48840 + }, + { + "epoch": 2.28, + "learning_rate": 1.2503566221999279e-05, + "loss": 0.0569, + "step": 48845 + }, + { + "epoch": 2.28, + "learning_rate": 1.2502782436944494e-05, + "loss": 0.0817, + "step": 48850 + }, + { + "epoch": 2.28, + "learning_rate": 1.2501998651889707e-05, + "loss": 0.1632, + "step": 48855 + }, + { + "epoch": 2.28, + "learning_rate": 1.250121486683492e-05, + "loss": 0.2643, + "step": 48860 + }, + { + "epoch": 2.28, + "learning_rate": 1.2500431081780133e-05, + "loss": 0.419, + "step": 48865 + }, + { + "epoch": 2.28, + "learning_rate": 1.2499647296725348e-05, + "loss": 0.0458, + "step": 48870 + }, + { + "epoch": 2.28, + "learning_rate": 1.249886351167056e-05, + "loss": 0.0664, + "step": 48875 + }, + { + "epoch": 2.28, + "learning_rate": 1.2498079726615774e-05, + "loss": 0.0878, + "step": 48880 + }, + { + "epoch": 2.28, + "learning_rate": 1.2497295941560987e-05, + "loss": 0.069, + "step": 48885 + }, + { + "epoch": 2.28, + "learning_rate": 1.2496512156506199e-05, + "loss": 0.1751, + "step": 48890 + }, + { + "epoch": 2.28, + "learning_rate": 1.2495728371451415e-05, + "loss": 0.1207, + "step": 48895 + }, + { + "epoch": 2.28, + "learning_rate": 1.2494944586396627e-05, + "loss": 0.1929, + "step": 48900 + }, + { + "epoch": 2.28, + "learning_rate": 1.249416080134184e-05, + "loss": 0.2412, + "step": 48905 + }, + { + "epoch": 2.28, + "learning_rate": 1.2493377016287055e-05, + "loss": 0.2499, + "step": 48910 + }, + { + "epoch": 2.28, + "learning_rate": 1.2492593231232268e-05, + "loss": 0.3758, + "step": 48915 + }, + { + "epoch": 2.28, + "learning_rate": 1.249180944617748e-05, + "loss": 0.0861, + "step": 48920 + }, + { + "epoch": 2.28, + "learning_rate": 1.2491025661122696e-05, + "loss": 0.051, + "step": 48925 + }, + { + "epoch": 2.28, + "learning_rate": 1.2490241876067908e-05, + "loss": 0.0319, + "step": 48930 + }, + { + "epoch": 2.28, + "learning_rate": 1.2489458091013122e-05, + "loss": 0.0455, + "step": 48935 + }, + { + "epoch": 2.28, + "learning_rate": 1.2488674305958335e-05, + "loss": 0.0583, + "step": 48940 + }, + { + "epoch": 2.28, + "learning_rate": 1.248789052090355e-05, + "loss": 0.0818, + "step": 48945 + }, + { + "epoch": 2.28, + "learning_rate": 1.2487106735848762e-05, + "loss": 0.1859, + "step": 48950 + }, + { + "epoch": 2.28, + "learning_rate": 1.2486322950793975e-05, + "loss": 0.1899, + "step": 48955 + }, + { + "epoch": 2.28, + "learning_rate": 1.2485539165739189e-05, + "loss": 0.4047, + "step": 48960 + }, + { + "epoch": 2.28, + "learning_rate": 1.24847553806844e-05, + "loss": 0.3151, + "step": 48965 + }, + { + "epoch": 2.29, + "learning_rate": 1.2483971595629616e-05, + "loss": 0.0545, + "step": 48970 + }, + { + "epoch": 2.29, + "learning_rate": 1.2483187810574829e-05, + "loss": 0.0155, + "step": 48975 + }, + { + "epoch": 2.29, + "learning_rate": 1.2482404025520042e-05, + "loss": 0.0754, + "step": 48980 + }, + { + "epoch": 2.29, + "learning_rate": 1.2481620240465255e-05, + "loss": 0.039, + "step": 48985 + }, + { + "epoch": 2.29, + "learning_rate": 1.248083645541047e-05, + "loss": 0.08, + "step": 48990 + }, + { + "epoch": 2.29, + "learning_rate": 1.2480052670355682e-05, + "loss": 0.1233, + "step": 48995 + }, + { + "epoch": 2.29, + "learning_rate": 1.2479268885300896e-05, + "loss": 0.1165, + "step": 49000 + }, + { + "epoch": 2.29, + "learning_rate": 1.2478485100246109e-05, + "loss": 0.1813, + "step": 49005 + }, + { + "epoch": 2.29, + "learning_rate": 1.2477701315191324e-05, + "loss": 0.2639, + "step": 49010 + }, + { + "epoch": 2.29, + "learning_rate": 1.2476917530136536e-05, + "loss": 0.3061, + "step": 49015 + }, + { + "epoch": 2.29, + "learning_rate": 1.2476133745081749e-05, + "loss": 0.0448, + "step": 49020 + }, + { + "epoch": 2.29, + "learning_rate": 1.2475349960026964e-05, + "loss": 0.0651, + "step": 49025 + }, + { + "epoch": 2.29, + "learning_rate": 1.2474566174972176e-05, + "loss": 0.0556, + "step": 49030 + }, + { + "epoch": 2.29, + "learning_rate": 1.247378238991739e-05, + "loss": 0.0711, + "step": 49035 + }, + { + "epoch": 2.29, + "learning_rate": 1.2472998604862603e-05, + "loss": 0.0944, + "step": 49040 + }, + { + "epoch": 2.29, + "learning_rate": 1.2472214819807818e-05, + "loss": 0.1723, + "step": 49045 + }, + { + "epoch": 2.29, + "learning_rate": 1.247143103475303e-05, + "loss": 0.1437, + "step": 49050 + }, + { + "epoch": 2.29, + "learning_rate": 1.2470647249698244e-05, + "loss": 0.1611, + "step": 49055 + }, + { + "epoch": 2.29, + "learning_rate": 1.2469863464643456e-05, + "loss": 0.2474, + "step": 49060 + }, + { + "epoch": 2.29, + "learning_rate": 1.2469079679588672e-05, + "loss": 0.3262, + "step": 49065 + }, + { + "epoch": 2.29, + "learning_rate": 1.2468295894533884e-05, + "loss": 0.0379, + "step": 49070 + }, + { + "epoch": 2.29, + "learning_rate": 1.2467512109479098e-05, + "loss": 0.0395, + "step": 49075 + }, + { + "epoch": 2.29, + "learning_rate": 1.246672832442431e-05, + "loss": 0.0432, + "step": 49080 + }, + { + "epoch": 2.29, + "learning_rate": 1.2465944539369523e-05, + "loss": 0.075, + "step": 49085 + }, + { + "epoch": 2.29, + "learning_rate": 1.2465160754314738e-05, + "loss": 0.051, + "step": 49090 + }, + { + "epoch": 2.29, + "learning_rate": 1.246437696925995e-05, + "loss": 0.1434, + "step": 49095 + }, + { + "epoch": 2.29, + "learning_rate": 1.2463593184205164e-05, + "loss": 0.1986, + "step": 49100 + }, + { + "epoch": 2.29, + "learning_rate": 1.2462809399150377e-05, + "loss": 0.1596, + "step": 49105 + }, + { + "epoch": 2.29, + "learning_rate": 1.2462025614095592e-05, + "loss": 0.3813, + "step": 49110 + }, + { + "epoch": 2.29, + "learning_rate": 1.2461241829040804e-05, + "loss": 0.2625, + "step": 49115 + }, + { + "epoch": 2.29, + "learning_rate": 1.2460458043986018e-05, + "loss": 0.0344, + "step": 49120 + }, + { + "epoch": 2.29, + "learning_rate": 1.2459674258931232e-05, + "loss": 0.034, + "step": 49125 + }, + { + "epoch": 2.29, + "learning_rate": 1.2458890473876446e-05, + "loss": 0.0638, + "step": 49130 + }, + { + "epoch": 2.29, + "learning_rate": 1.2458106688821658e-05, + "loss": 0.045, + "step": 49135 + }, + { + "epoch": 2.29, + "learning_rate": 1.2457322903766872e-05, + "loss": 0.0743, + "step": 49140 + }, + { + "epoch": 2.29, + "learning_rate": 1.2456539118712086e-05, + "loss": 0.1166, + "step": 49145 + }, + { + "epoch": 2.29, + "learning_rate": 1.2455755333657298e-05, + "loss": 0.1078, + "step": 49150 + }, + { + "epoch": 2.29, + "learning_rate": 1.2454971548602512e-05, + "loss": 0.1802, + "step": 49155 + }, + { + "epoch": 2.29, + "learning_rate": 1.2454187763547724e-05, + "loss": 0.3078, + "step": 49160 + }, + { + "epoch": 2.29, + "learning_rate": 1.245340397849294e-05, + "loss": 0.3489, + "step": 49165 + }, + { + "epoch": 2.29, + "learning_rate": 1.2452620193438152e-05, + "loss": 0.0332, + "step": 49170 + }, + { + "epoch": 2.29, + "learning_rate": 1.2451836408383366e-05, + "loss": 0.0592, + "step": 49175 + }, + { + "epoch": 2.29, + "learning_rate": 1.2451052623328578e-05, + "loss": 0.0291, + "step": 49180 + }, + { + "epoch": 2.3, + "learning_rate": 1.2450268838273794e-05, + "loss": 0.0695, + "step": 49185 + }, + { + "epoch": 2.3, + "learning_rate": 1.2449485053219006e-05, + "loss": 0.081, + "step": 49190 + }, + { + "epoch": 2.3, + "learning_rate": 1.244870126816422e-05, + "loss": 0.1311, + "step": 49195 + }, + { + "epoch": 2.3, + "learning_rate": 1.2447917483109432e-05, + "loss": 0.1058, + "step": 49200 + }, + { + "epoch": 2.3, + "learning_rate": 1.2447133698054648e-05, + "loss": 0.1388, + "step": 49205 + }, + { + "epoch": 2.3, + "learning_rate": 1.244634991299986e-05, + "loss": 0.3073, + "step": 49210 + }, + { + "epoch": 2.3, + "learning_rate": 1.2445566127945072e-05, + "loss": 0.2426, + "step": 49215 + }, + { + "epoch": 2.3, + "learning_rate": 1.2444782342890286e-05, + "loss": 0.0957, + "step": 49220 + }, + { + "epoch": 2.3, + "learning_rate": 1.24439985578355e-05, + "loss": 0.055, + "step": 49225 + }, + { + "epoch": 2.3, + "learning_rate": 1.2443214772780714e-05, + "loss": 0.0305, + "step": 49230 + }, + { + "epoch": 2.3, + "learning_rate": 1.2442430987725926e-05, + "loss": 0.1408, + "step": 49235 + }, + { + "epoch": 2.3, + "learning_rate": 1.2441647202671142e-05, + "loss": 0.0908, + "step": 49240 + }, + { + "epoch": 2.3, + "learning_rate": 1.2440863417616354e-05, + "loss": 0.0978, + "step": 49245 + }, + { + "epoch": 2.3, + "learning_rate": 1.2440079632561568e-05, + "loss": 0.1482, + "step": 49250 + }, + { + "epoch": 2.3, + "learning_rate": 1.243929584750678e-05, + "loss": 0.2039, + "step": 49255 + }, + { + "epoch": 2.3, + "learning_rate": 1.2438512062451996e-05, + "loss": 0.3077, + "step": 49260 + }, + { + "epoch": 2.3, + "learning_rate": 1.2437728277397208e-05, + "loss": 0.1668, + "step": 49265 + }, + { + "epoch": 2.3, + "learning_rate": 1.2436944492342422e-05, + "loss": 0.0675, + "step": 49270 + }, + { + "epoch": 2.3, + "learning_rate": 1.2436160707287634e-05, + "loss": 0.0325, + "step": 49275 + }, + { + "epoch": 2.3, + "learning_rate": 1.2435376922232846e-05, + "loss": 0.0352, + "step": 49280 + }, + { + "epoch": 2.3, + "learning_rate": 1.2434593137178062e-05, + "loss": 0.0273, + "step": 49285 + }, + { + "epoch": 2.3, + "learning_rate": 1.2433809352123274e-05, + "loss": 0.1357, + "step": 49290 + }, + { + "epoch": 2.3, + "learning_rate": 1.2433025567068488e-05, + "loss": 0.1806, + "step": 49295 + }, + { + "epoch": 2.3, + "learning_rate": 1.24322417820137e-05, + "loss": 0.1102, + "step": 49300 + }, + { + "epoch": 2.3, + "learning_rate": 1.2431457996958916e-05, + "loss": 0.2882, + "step": 49305 + }, + { + "epoch": 2.3, + "learning_rate": 1.2430674211904128e-05, + "loss": 0.301, + "step": 49310 + }, + { + "epoch": 2.3, + "learning_rate": 1.2429890426849342e-05, + "loss": 0.2907, + "step": 49315 + }, + { + "epoch": 2.3, + "learning_rate": 1.2429106641794554e-05, + "loss": 0.0655, + "step": 49320 + }, + { + "epoch": 2.3, + "learning_rate": 1.242832285673977e-05, + "loss": 0.0572, + "step": 49325 + }, + { + "epoch": 2.3, + "learning_rate": 1.2427539071684982e-05, + "loss": 0.0607, + "step": 49330 + }, + { + "epoch": 2.3, + "learning_rate": 1.2426755286630196e-05, + "loss": 0.0779, + "step": 49335 + }, + { + "epoch": 2.3, + "learning_rate": 1.242597150157541e-05, + "loss": 0.0683, + "step": 49340 + }, + { + "epoch": 2.3, + "learning_rate": 1.2425187716520622e-05, + "loss": 0.1186, + "step": 49345 + }, + { + "epoch": 2.3, + "learning_rate": 1.2424403931465836e-05, + "loss": 0.1542, + "step": 49350 + }, + { + "epoch": 2.3, + "learning_rate": 1.2423620146411048e-05, + "loss": 0.11, + "step": 49355 + }, + { + "epoch": 2.3, + "learning_rate": 1.2422836361356264e-05, + "loss": 0.2608, + "step": 49360 + }, + { + "epoch": 2.3, + "learning_rate": 1.2422052576301476e-05, + "loss": 0.3212, + "step": 49365 + }, + { + "epoch": 2.3, + "learning_rate": 1.242126879124669e-05, + "loss": 0.031, + "step": 49370 + }, + { + "epoch": 2.3, + "learning_rate": 1.2420485006191902e-05, + "loss": 0.0297, + "step": 49375 + }, + { + "epoch": 2.3, + "learning_rate": 1.2419701221137118e-05, + "loss": 0.1169, + "step": 49380 + }, + { + "epoch": 2.3, + "learning_rate": 1.241891743608233e-05, + "loss": 0.0557, + "step": 49385 + }, + { + "epoch": 2.3, + "learning_rate": 1.2418133651027544e-05, + "loss": 0.1283, + "step": 49390 + }, + { + "epoch": 2.3, + "learning_rate": 1.2417349865972756e-05, + "loss": 0.114, + "step": 49395 + }, + { + "epoch": 2.31, + "learning_rate": 1.2416566080917972e-05, + "loss": 0.0548, + "step": 49400 + }, + { + "epoch": 2.31, + "learning_rate": 1.2415782295863184e-05, + "loss": 0.1379, + "step": 49405 + }, + { + "epoch": 2.31, + "learning_rate": 1.2414998510808396e-05, + "loss": 0.2058, + "step": 49410 + }, + { + "epoch": 2.31, + "learning_rate": 1.241421472575361e-05, + "loss": 0.2836, + "step": 49415 + }, + { + "epoch": 2.31, + "learning_rate": 1.2413430940698822e-05, + "loss": 0.0669, + "step": 49420 + }, + { + "epoch": 2.31, + "learning_rate": 1.2412647155644038e-05, + "loss": 0.0242, + "step": 49425 + }, + { + "epoch": 2.31, + "learning_rate": 1.241186337058925e-05, + "loss": 0.0977, + "step": 49430 + }, + { + "epoch": 2.31, + "learning_rate": 1.2411079585534464e-05, + "loss": 0.0507, + "step": 49435 + }, + { + "epoch": 2.31, + "learning_rate": 1.2410295800479678e-05, + "loss": 0.063, + "step": 49440 + }, + { + "epoch": 2.31, + "learning_rate": 1.2409512015424892e-05, + "loss": 0.1527, + "step": 49445 + }, + { + "epoch": 2.31, + "learning_rate": 1.2408728230370104e-05, + "loss": 0.0736, + "step": 49450 + }, + { + "epoch": 2.31, + "learning_rate": 1.240794444531532e-05, + "loss": 0.2219, + "step": 49455 + }, + { + "epoch": 2.31, + "learning_rate": 1.2407160660260532e-05, + "loss": 0.2611, + "step": 49460 + }, + { + "epoch": 2.31, + "learning_rate": 1.2406376875205746e-05, + "loss": 0.3693, + "step": 49465 + }, + { + "epoch": 2.31, + "learning_rate": 1.2405593090150958e-05, + "loss": 0.1247, + "step": 49470 + }, + { + "epoch": 2.31, + "learning_rate": 1.240480930509617e-05, + "loss": 0.0177, + "step": 49475 + }, + { + "epoch": 2.31, + "learning_rate": 1.2404025520041386e-05, + "loss": 0.0873, + "step": 49480 + }, + { + "epoch": 2.31, + "learning_rate": 1.2403241734986598e-05, + "loss": 0.0888, + "step": 49485 + }, + { + "epoch": 2.31, + "learning_rate": 1.2402457949931812e-05, + "loss": 0.0582, + "step": 49490 + }, + { + "epoch": 2.31, + "learning_rate": 1.2401674164877024e-05, + "loss": 0.1075, + "step": 49495 + }, + { + "epoch": 2.31, + "learning_rate": 1.240089037982224e-05, + "loss": 0.1859, + "step": 49500 + }, + { + "epoch": 2.31, + "learning_rate": 1.2400106594767452e-05, + "loss": 0.1537, + "step": 49505 + }, + { + "epoch": 2.31, + "learning_rate": 1.2399322809712666e-05, + "loss": 0.2297, + "step": 49510 + }, + { + "epoch": 2.31, + "learning_rate": 1.2398695781668836e-05, + "loss": 0.2882, + "step": 49515 + }, + { + "epoch": 2.31, + "learning_rate": 1.239791199661405e-05, + "loss": 0.0205, + "step": 49520 + }, + { + "epoch": 2.31, + "learning_rate": 1.2397128211559264e-05, + "loss": 0.0498, + "step": 49525 + }, + { + "epoch": 2.31, + "learning_rate": 1.2396344426504476e-05, + "loss": 0.0858, + "step": 49530 + }, + { + "epoch": 2.31, + "learning_rate": 1.2395560641449692e-05, + "loss": 0.1835, + "step": 49535 + }, + { + "epoch": 2.31, + "learning_rate": 1.2394776856394904e-05, + "loss": 0.1171, + "step": 49540 + }, + { + "epoch": 2.31, + "learning_rate": 1.2393993071340116e-05, + "loss": 0.0876, + "step": 49545 + }, + { + "epoch": 2.31, + "learning_rate": 1.239320928628533e-05, + "loss": 0.0962, + "step": 49550 + }, + { + "epoch": 2.31, + "learning_rate": 1.2392425501230542e-05, + "loss": 0.0861, + "step": 49555 + }, + { + "epoch": 2.31, + "learning_rate": 1.2391641716175758e-05, + "loss": 0.2315, + "step": 49560 + }, + { + "epoch": 2.31, + "learning_rate": 1.239085793112097e-05, + "loss": 0.278, + "step": 49565 + }, + { + "epoch": 2.31, + "learning_rate": 1.2390074146066184e-05, + "loss": 0.0506, + "step": 49570 + }, + { + "epoch": 2.31, + "learning_rate": 1.2389290361011396e-05, + "loss": 0.0443, + "step": 49575 + }, + { + "epoch": 2.31, + "learning_rate": 1.2388506575956612e-05, + "loss": 0.1289, + "step": 49580 + }, + { + "epoch": 2.31, + "learning_rate": 1.2387722790901824e-05, + "loss": 0.0433, + "step": 49585 + }, + { + "epoch": 2.31, + "learning_rate": 1.2386939005847038e-05, + "loss": 0.1379, + "step": 49590 + }, + { + "epoch": 2.31, + "learning_rate": 1.238615522079225e-05, + "loss": 0.108, + "step": 49595 + }, + { + "epoch": 2.31, + "learning_rate": 1.2385371435737466e-05, + "loss": 0.1276, + "step": 49600 + }, + { + "epoch": 2.31, + "learning_rate": 1.2384587650682678e-05, + "loss": 0.1717, + "step": 49605 + }, + { + "epoch": 2.31, + "learning_rate": 1.238380386562789e-05, + "loss": 0.2584, + "step": 49610 + }, + { + "epoch": 2.32, + "learning_rate": 1.2383020080573106e-05, + "loss": 0.2265, + "step": 49615 + }, + { + "epoch": 2.32, + "learning_rate": 1.2382236295518318e-05, + "loss": 0.1069, + "step": 49620 + }, + { + "epoch": 2.32, + "learning_rate": 1.2381452510463532e-05, + "loss": 0.0404, + "step": 49625 + }, + { + "epoch": 2.32, + "learning_rate": 1.2380668725408744e-05, + "loss": 0.0704, + "step": 49630 + }, + { + "epoch": 2.32, + "learning_rate": 1.237988494035396e-05, + "loss": 0.0985, + "step": 49635 + }, + { + "epoch": 2.32, + "learning_rate": 1.2379101155299172e-05, + "loss": 0.084, + "step": 49640 + }, + { + "epoch": 2.32, + "learning_rate": 1.2378317370244386e-05, + "loss": 0.2373, + "step": 49645 + }, + { + "epoch": 2.32, + "learning_rate": 1.2377533585189598e-05, + "loss": 0.0689, + "step": 49650 + }, + { + "epoch": 2.32, + "learning_rate": 1.2376749800134814e-05, + "loss": 0.1609, + "step": 49655 + }, + { + "epoch": 2.32, + "learning_rate": 1.2375966015080026e-05, + "loss": 0.2887, + "step": 49660 + }, + { + "epoch": 2.32, + "learning_rate": 1.237518223002524e-05, + "loss": 0.2715, + "step": 49665 + }, + { + "epoch": 2.32, + "learning_rate": 1.2374398444970452e-05, + "loss": 0.0749, + "step": 49670 + }, + { + "epoch": 2.32, + "learning_rate": 1.2373614659915664e-05, + "loss": 0.0717, + "step": 49675 + }, + { + "epoch": 2.32, + "learning_rate": 1.237283087486088e-05, + "loss": 0.0777, + "step": 49680 + }, + { + "epoch": 2.32, + "learning_rate": 1.2372047089806092e-05, + "loss": 0.0669, + "step": 49685 + }, + { + "epoch": 2.32, + "learning_rate": 1.2371263304751306e-05, + "loss": 0.0836, + "step": 49690 + }, + { + "epoch": 2.32, + "learning_rate": 1.2370479519696518e-05, + "loss": 0.2553, + "step": 49695 + }, + { + "epoch": 2.32, + "learning_rate": 1.2369695734641734e-05, + "loss": 0.1672, + "step": 49700 + }, + { + "epoch": 2.32, + "learning_rate": 1.2368911949586946e-05, + "loss": 0.1128, + "step": 49705 + }, + { + "epoch": 2.32, + "learning_rate": 1.236812816453216e-05, + "loss": 0.2423, + "step": 49710 + }, + { + "epoch": 2.32, + "learning_rate": 1.2367344379477374e-05, + "loss": 0.3124, + "step": 49715 + }, + { + "epoch": 2.32, + "learning_rate": 1.2366560594422588e-05, + "loss": 0.0414, + "step": 49720 + }, + { + "epoch": 2.32, + "learning_rate": 1.23657768093678e-05, + "loss": 0.0981, + "step": 49725 + }, + { + "epoch": 2.32, + "learning_rate": 1.2364993024313014e-05, + "loss": 0.1131, + "step": 49730 + }, + { + "epoch": 2.32, + "learning_rate": 1.2364209239258228e-05, + "loss": 0.0668, + "step": 49735 + }, + { + "epoch": 2.32, + "learning_rate": 1.236342545420344e-05, + "loss": 0.1405, + "step": 49740 + }, + { + "epoch": 2.32, + "learning_rate": 1.2362641669148654e-05, + "loss": 0.0965, + "step": 49745 + }, + { + "epoch": 2.32, + "learning_rate": 1.2361857884093866e-05, + "loss": 0.1463, + "step": 49750 + }, + { + "epoch": 2.32, + "learning_rate": 1.2361074099039082e-05, + "loss": 0.2175, + "step": 49755 + }, + { + "epoch": 2.32, + "learning_rate": 1.2360290313984294e-05, + "loss": 0.1888, + "step": 49760 + }, + { + "epoch": 2.32, + "learning_rate": 1.2359506528929508e-05, + "loss": 0.3132, + "step": 49765 + }, + { + "epoch": 2.32, + "learning_rate": 1.235872274387472e-05, + "loss": 0.1664, + "step": 49770 + }, + { + "epoch": 2.32, + "learning_rate": 1.2357938958819935e-05, + "loss": 0.0261, + "step": 49775 + }, + { + "epoch": 2.32, + "learning_rate": 1.2357155173765148e-05, + "loss": 0.015, + "step": 49780 + }, + { + "epoch": 2.32, + "learning_rate": 1.2356371388710362e-05, + "loss": 0.0923, + "step": 49785 + }, + { + "epoch": 2.32, + "learning_rate": 1.2355587603655574e-05, + "loss": 0.0871, + "step": 49790 + }, + { + "epoch": 2.32, + "learning_rate": 1.235480381860079e-05, + "loss": 0.1367, + "step": 49795 + }, + { + "epoch": 2.32, + "learning_rate": 1.2354020033546002e-05, + "loss": 0.1528, + "step": 49800 + }, + { + "epoch": 2.32, + "learning_rate": 1.2353236248491214e-05, + "loss": 0.1766, + "step": 49805 + }, + { + "epoch": 2.32, + "learning_rate": 1.2352452463436428e-05, + "loss": 0.2554, + "step": 49810 + }, + { + "epoch": 2.32, + "learning_rate": 1.2351668678381642e-05, + "loss": 0.3166, + "step": 49815 + }, + { + "epoch": 2.32, + "learning_rate": 1.2350884893326856e-05, + "loss": 0.0502, + "step": 49820 + }, + { + "epoch": 2.32, + "learning_rate": 1.2350101108272068e-05, + "loss": 0.0437, + "step": 49825 + }, + { + "epoch": 2.33, + "learning_rate": 1.2349317323217282e-05, + "loss": 0.0936, + "step": 49830 + }, + { + "epoch": 2.33, + "learning_rate": 1.2348533538162496e-05, + "loss": 0.0694, + "step": 49835 + }, + { + "epoch": 2.33, + "learning_rate": 1.234774975310771e-05, + "loss": 0.0366, + "step": 49840 + }, + { + "epoch": 2.33, + "learning_rate": 1.2346965968052922e-05, + "loss": 0.0825, + "step": 49845 + }, + { + "epoch": 2.33, + "learning_rate": 1.2346182182998137e-05, + "loss": 0.1329, + "step": 49850 + }, + { + "epoch": 2.33, + "learning_rate": 1.234539839794335e-05, + "loss": 0.0959, + "step": 49855 + }, + { + "epoch": 2.33, + "learning_rate": 1.2344614612888563e-05, + "loss": 0.1717, + "step": 49860 + }, + { + "epoch": 2.33, + "learning_rate": 1.2343830827833776e-05, + "loss": 0.2998, + "step": 49865 + }, + { + "epoch": 2.33, + "learning_rate": 1.2343047042778988e-05, + "loss": 0.123, + "step": 49870 + }, + { + "epoch": 2.33, + "learning_rate": 1.2342263257724203e-05, + "loss": 0.0147, + "step": 49875 + }, + { + "epoch": 2.33, + "learning_rate": 1.2341479472669416e-05, + "loss": 0.0361, + "step": 49880 + }, + { + "epoch": 2.33, + "learning_rate": 1.234069568761463e-05, + "loss": 0.089, + "step": 49885 + }, + { + "epoch": 2.33, + "learning_rate": 1.2339911902559842e-05, + "loss": 0.057, + "step": 49890 + }, + { + "epoch": 2.33, + "learning_rate": 1.2339128117505057e-05, + "loss": 0.066, + "step": 49895 + }, + { + "epoch": 2.33, + "learning_rate": 1.233834433245027e-05, + "loss": 0.2305, + "step": 49900 + }, + { + "epoch": 2.33, + "learning_rate": 1.2337560547395483e-05, + "loss": 0.2467, + "step": 49905 + }, + { + "epoch": 2.33, + "learning_rate": 1.2336776762340696e-05, + "loss": 0.226, + "step": 49910 + }, + { + "epoch": 2.33, + "learning_rate": 1.2335992977285911e-05, + "loss": 0.2258, + "step": 49915 + }, + { + "epoch": 2.33, + "learning_rate": 1.2335209192231123e-05, + "loss": 0.0611, + "step": 49920 + }, + { + "epoch": 2.33, + "learning_rate": 1.2334425407176337e-05, + "loss": 0.0755, + "step": 49925 + }, + { + "epoch": 2.33, + "learning_rate": 1.2333641622121551e-05, + "loss": 0.0437, + "step": 49930 + }, + { + "epoch": 2.33, + "learning_rate": 1.2332857837066764e-05, + "loss": 0.1033, + "step": 49935 + }, + { + "epoch": 2.33, + "learning_rate": 1.2332074052011977e-05, + "loss": 0.0961, + "step": 49940 + }, + { + "epoch": 2.33, + "learning_rate": 1.233129026695719e-05, + "loss": 0.0845, + "step": 49945 + }, + { + "epoch": 2.33, + "learning_rate": 1.2330506481902405e-05, + "loss": 0.1404, + "step": 49950 + }, + { + "epoch": 2.33, + "learning_rate": 1.2329722696847617e-05, + "loss": 0.2093, + "step": 49955 + }, + { + "epoch": 2.33, + "learning_rate": 1.2328938911792831e-05, + "loss": 0.1984, + "step": 49960 + }, + { + "epoch": 2.33, + "learning_rate": 1.2328155126738044e-05, + "loss": 0.2746, + "step": 49965 + }, + { + "epoch": 2.33, + "learning_rate": 1.232737134168326e-05, + "loss": 0.0631, + "step": 49970 + }, + { + "epoch": 2.33, + "learning_rate": 1.2326587556628471e-05, + "loss": 0.0268, + "step": 49975 + }, + { + "epoch": 2.33, + "learning_rate": 1.2325803771573685e-05, + "loss": 0.0344, + "step": 49980 + }, + { + "epoch": 2.33, + "learning_rate": 1.2325019986518897e-05, + "loss": 0.0861, + "step": 49985 + }, + { + "epoch": 2.33, + "learning_rate": 1.2324236201464113e-05, + "loss": 0.1821, + "step": 49990 + }, + { + "epoch": 2.33, + "learning_rate": 1.2323452416409325e-05, + "loss": 0.1076, + "step": 49995 + }, + { + "epoch": 2.33, + "learning_rate": 1.2322668631354538e-05, + "loss": 0.1184, + "step": 50000 + }, + { + "epoch": 2.33, + "learning_rate": 1.2321884846299751e-05, + "loss": 0.0798, + "step": 50005 + }, + { + "epoch": 2.33, + "learning_rate": 1.2321101061244964e-05, + "loss": 0.2843, + "step": 50010 + }, + { + "epoch": 2.33, + "learning_rate": 1.232031727619018e-05, + "loss": 0.1617, + "step": 50015 + }, + { + "epoch": 2.33, + "learning_rate": 1.2319533491135391e-05, + "loss": 0.0382, + "step": 50020 + }, + { + "epoch": 2.33, + "learning_rate": 1.2318749706080605e-05, + "loss": 0.0547, + "step": 50025 + }, + { + "epoch": 2.33, + "learning_rate": 1.231796592102582e-05, + "loss": 0.0349, + "step": 50030 + }, + { + "epoch": 2.33, + "learning_rate": 1.2317182135971033e-05, + "loss": 0.0498, + "step": 50035 + }, + { + "epoch": 2.33, + "learning_rate": 1.2316398350916245e-05, + "loss": 0.0729, + "step": 50040 + }, + { + "epoch": 2.34, + "learning_rate": 1.231561456586146e-05, + "loss": 0.1601, + "step": 50045 + }, + { + "epoch": 2.34, + "learning_rate": 1.2314830780806673e-05, + "loss": 0.1798, + "step": 50050 + }, + { + "epoch": 2.34, + "learning_rate": 1.2314046995751887e-05, + "loss": 0.1981, + "step": 50055 + }, + { + "epoch": 2.34, + "learning_rate": 1.23132632106971e-05, + "loss": 0.3657, + "step": 50060 + }, + { + "epoch": 2.34, + "learning_rate": 1.2312479425642312e-05, + "loss": 0.3816, + "step": 50065 + }, + { + "epoch": 2.34, + "learning_rate": 1.2311695640587527e-05, + "loss": 0.0421, + "step": 50070 + }, + { + "epoch": 2.34, + "learning_rate": 1.231091185553274e-05, + "loss": 0.0836, + "step": 50075 + }, + { + "epoch": 2.34, + "learning_rate": 1.2310128070477953e-05, + "loss": 0.0849, + "step": 50080 + }, + { + "epoch": 2.34, + "learning_rate": 1.2309344285423165e-05, + "loss": 0.1037, + "step": 50085 + }, + { + "epoch": 2.34, + "learning_rate": 1.2308560500368381e-05, + "loss": 0.1266, + "step": 50090 + }, + { + "epoch": 2.34, + "learning_rate": 1.2307776715313593e-05, + "loss": 0.1167, + "step": 50095 + }, + { + "epoch": 2.34, + "learning_rate": 1.2306992930258807e-05, + "loss": 0.1929, + "step": 50100 + }, + { + "epoch": 2.34, + "learning_rate": 1.230620914520402e-05, + "loss": 0.2002, + "step": 50105 + }, + { + "epoch": 2.34, + "learning_rate": 1.2305425360149235e-05, + "loss": 0.3381, + "step": 50110 + }, + { + "epoch": 2.34, + "learning_rate": 1.2304641575094447e-05, + "loss": 0.3687, + "step": 50115 + }, + { + "epoch": 2.34, + "learning_rate": 1.2303857790039661e-05, + "loss": 0.0699, + "step": 50120 + }, + { + "epoch": 2.34, + "learning_rate": 1.2303074004984873e-05, + "loss": 0.0607, + "step": 50125 + }, + { + "epoch": 2.34, + "learning_rate": 1.2302290219930087e-05, + "loss": 0.0523, + "step": 50130 + }, + { + "epoch": 2.34, + "learning_rate": 1.2301506434875301e-05, + "loss": 0.0631, + "step": 50135 + }, + { + "epoch": 2.34, + "learning_rate": 1.2300722649820513e-05, + "loss": 0.1482, + "step": 50140 + }, + { + "epoch": 2.34, + "learning_rate": 1.2299938864765727e-05, + "loss": 0.1021, + "step": 50145 + }, + { + "epoch": 2.34, + "learning_rate": 1.2299155079710941e-05, + "loss": 0.1413, + "step": 50150 + }, + { + "epoch": 2.34, + "learning_rate": 1.2298371294656155e-05, + "loss": 0.1853, + "step": 50155 + }, + { + "epoch": 2.34, + "learning_rate": 1.2297587509601367e-05, + "loss": 0.2019, + "step": 50160 + }, + { + "epoch": 2.34, + "learning_rate": 1.2296803724546583e-05, + "loss": 0.3643, + "step": 50165 + }, + { + "epoch": 2.34, + "learning_rate": 1.2296019939491795e-05, + "loss": 0.0704, + "step": 50170 + }, + { + "epoch": 2.34, + "learning_rate": 1.2295236154437009e-05, + "loss": 0.0761, + "step": 50175 + }, + { + "epoch": 2.34, + "learning_rate": 1.2294452369382221e-05, + "loss": 0.0657, + "step": 50180 + }, + { + "epoch": 2.34, + "learning_rate": 1.2293668584327437e-05, + "loss": 0.0956, + "step": 50185 + }, + { + "epoch": 2.34, + "learning_rate": 1.2292884799272649e-05, + "loss": 0.057, + "step": 50190 + }, + { + "epoch": 2.34, + "learning_rate": 1.2292101014217861e-05, + "loss": 0.0977, + "step": 50195 + }, + { + "epoch": 2.34, + "learning_rate": 1.2291317229163075e-05, + "loss": 0.1418, + "step": 50200 + }, + { + "epoch": 2.34, + "learning_rate": 1.2290533444108287e-05, + "loss": 0.2145, + "step": 50205 + }, + { + "epoch": 2.34, + "learning_rate": 1.2289749659053503e-05, + "loss": 0.1492, + "step": 50210 + }, + { + "epoch": 2.34, + "learning_rate": 1.2288965873998715e-05, + "loss": 0.3733, + "step": 50215 + }, + { + "epoch": 2.34, + "learning_rate": 1.2288182088943929e-05, + "loss": 0.0474, + "step": 50220 + }, + { + "epoch": 2.34, + "learning_rate": 1.2287398303889141e-05, + "loss": 0.0178, + "step": 50225 + }, + { + "epoch": 2.34, + "learning_rate": 1.2286614518834357e-05, + "loss": 0.1194, + "step": 50230 + }, + { + "epoch": 2.34, + "learning_rate": 1.2285830733779569e-05, + "loss": 0.0799, + "step": 50235 + }, + { + "epoch": 2.34, + "learning_rate": 1.2285046948724783e-05, + "loss": 0.1206, + "step": 50240 + }, + { + "epoch": 2.34, + "learning_rate": 1.2284263163669997e-05, + "loss": 0.1841, + "step": 50245 + }, + { + "epoch": 2.34, + "learning_rate": 1.228347937861521e-05, + "loss": 0.1276, + "step": 50250 + }, + { + "epoch": 2.34, + "learning_rate": 1.2282695593560423e-05, + "loss": 0.2027, + "step": 50255 + }, + { + "epoch": 2.35, + "learning_rate": 1.2281911808505635e-05, + "loss": 0.1799, + "step": 50260 + }, + { + "epoch": 2.35, + "learning_rate": 1.228112802345085e-05, + "loss": 0.375, + "step": 50265 + }, + { + "epoch": 2.35, + "learning_rate": 1.2280344238396063e-05, + "loss": 0.0396, + "step": 50270 + }, + { + "epoch": 2.35, + "learning_rate": 1.2279560453341277e-05, + "loss": 0.0545, + "step": 50275 + }, + { + "epoch": 2.35, + "learning_rate": 1.2278776668286489e-05, + "loss": 0.0606, + "step": 50280 + }, + { + "epoch": 2.35, + "learning_rate": 1.2277992883231705e-05, + "loss": 0.0914, + "step": 50285 + }, + { + "epoch": 2.35, + "learning_rate": 1.2277209098176917e-05, + "loss": 0.0412, + "step": 50290 + }, + { + "epoch": 2.35, + "learning_rate": 1.2276425313122131e-05, + "loss": 0.1434, + "step": 50295 + }, + { + "epoch": 2.35, + "learning_rate": 1.2275641528067343e-05, + "loss": 0.2036, + "step": 50300 + }, + { + "epoch": 2.35, + "learning_rate": 1.2274857743012559e-05, + "loss": 0.219, + "step": 50305 + }, + { + "epoch": 2.35, + "learning_rate": 1.2274073957957771e-05, + "loss": 0.263, + "step": 50310 + }, + { + "epoch": 2.35, + "learning_rate": 1.2273290172902985e-05, + "loss": 0.2972, + "step": 50315 + }, + { + "epoch": 2.35, + "learning_rate": 1.2272506387848197e-05, + "loss": 0.0396, + "step": 50320 + }, + { + "epoch": 2.35, + "learning_rate": 1.227172260279341e-05, + "loss": 0.0763, + "step": 50325 + }, + { + "epoch": 2.35, + "learning_rate": 1.2270938817738625e-05, + "loss": 0.0389, + "step": 50330 + }, + { + "epoch": 2.35, + "learning_rate": 1.2270155032683837e-05, + "loss": 0.0813, + "step": 50335 + }, + { + "epoch": 2.35, + "learning_rate": 1.2269371247629051e-05, + "loss": 0.1306, + "step": 50340 + }, + { + "epoch": 2.35, + "learning_rate": 1.2268587462574265e-05, + "loss": 0.0779, + "step": 50345 + }, + { + "epoch": 2.35, + "learning_rate": 1.2267803677519479e-05, + "loss": 0.187, + "step": 50350 + }, + { + "epoch": 2.35, + "learning_rate": 1.2267019892464691e-05, + "loss": 0.1718, + "step": 50355 + }, + { + "epoch": 2.35, + "learning_rate": 1.2266236107409905e-05, + "loss": 0.3207, + "step": 50360 + }, + { + "epoch": 2.35, + "learning_rate": 1.2265452322355119e-05, + "loss": 0.2648, + "step": 50365 + }, + { + "epoch": 2.35, + "learning_rate": 1.2264668537300333e-05, + "loss": 0.0264, + "step": 50370 + }, + { + "epoch": 2.35, + "learning_rate": 1.2263884752245545e-05, + "loss": 0.0418, + "step": 50375 + }, + { + "epoch": 2.35, + "learning_rate": 1.226310096719076e-05, + "loss": 0.0412, + "step": 50380 + }, + { + "epoch": 2.35, + "learning_rate": 1.2262317182135973e-05, + "loss": 0.0641, + "step": 50385 + }, + { + "epoch": 2.35, + "learning_rate": 1.2261533397081185e-05, + "loss": 0.1354, + "step": 50390 + }, + { + "epoch": 2.35, + "learning_rate": 1.2260749612026399e-05, + "loss": 0.0955, + "step": 50395 + }, + { + "epoch": 2.35, + "learning_rate": 1.2259965826971611e-05, + "loss": 0.1032, + "step": 50400 + }, + { + "epoch": 2.35, + "learning_rate": 1.2259182041916827e-05, + "loss": 0.2132, + "step": 50405 + }, + { + "epoch": 2.35, + "learning_rate": 1.2258398256862039e-05, + "loss": 0.2025, + "step": 50410 + }, + { + "epoch": 2.35, + "learning_rate": 1.2257614471807253e-05, + "loss": 0.2278, + "step": 50415 + }, + { + "epoch": 2.35, + "learning_rate": 1.2256830686752465e-05, + "loss": 0.0977, + "step": 50420 + }, + { + "epoch": 2.35, + "learning_rate": 1.225604690169768e-05, + "loss": 0.0473, + "step": 50425 + }, + { + "epoch": 2.35, + "learning_rate": 1.2255263116642893e-05, + "loss": 0.0606, + "step": 50430 + }, + { + "epoch": 2.35, + "learning_rate": 1.2254479331588107e-05, + "loss": 0.0981, + "step": 50435 + }, + { + "epoch": 2.35, + "learning_rate": 1.2253695546533319e-05, + "loss": 0.0818, + "step": 50440 + }, + { + "epoch": 2.35, + "learning_rate": 1.2252911761478534e-05, + "loss": 0.1135, + "step": 50445 + }, + { + "epoch": 2.35, + "learning_rate": 1.2252127976423747e-05, + "loss": 0.1628, + "step": 50450 + }, + { + "epoch": 2.35, + "learning_rate": 1.2251344191368959e-05, + "loss": 0.1521, + "step": 50455 + }, + { + "epoch": 2.35, + "learning_rate": 1.2250560406314174e-05, + "loss": 0.3401, + "step": 50460 + }, + { + "epoch": 2.35, + "learning_rate": 1.2249776621259387e-05, + "loss": 0.2756, + "step": 50465 + }, + { + "epoch": 2.35, + "learning_rate": 1.22489928362046e-05, + "loss": 0.0554, + "step": 50470 + }, + { + "epoch": 2.36, + "learning_rate": 1.2248209051149813e-05, + "loss": 0.0253, + "step": 50475 + }, + { + "epoch": 2.36, + "learning_rate": 1.2247425266095028e-05, + "loss": 0.1059, + "step": 50480 + }, + { + "epoch": 2.36, + "learning_rate": 1.224664148104024e-05, + "loss": 0.1775, + "step": 50485 + }, + { + "epoch": 2.36, + "learning_rate": 1.2245857695985455e-05, + "loss": 0.0703, + "step": 50490 + }, + { + "epoch": 2.36, + "learning_rate": 1.2245073910930667e-05, + "loss": 0.0463, + "step": 50495 + }, + { + "epoch": 2.36, + "learning_rate": 1.2244290125875882e-05, + "loss": 0.1483, + "step": 50500 + }, + { + "epoch": 2.36, + "learning_rate": 1.2243506340821095e-05, + "loss": 0.2113, + "step": 50505 + }, + { + "epoch": 2.36, + "learning_rate": 1.2242722555766308e-05, + "loss": 0.2143, + "step": 50510 + }, + { + "epoch": 2.36, + "learning_rate": 1.224193877071152e-05, + "loss": 0.3792, + "step": 50515 + }, + { + "epoch": 2.36, + "learning_rate": 1.2241154985656733e-05, + "loss": 0.0368, + "step": 50520 + }, + { + "epoch": 2.36, + "learning_rate": 1.2240371200601948e-05, + "loss": 0.0232, + "step": 50525 + }, + { + "epoch": 2.36, + "learning_rate": 1.223958741554716e-05, + "loss": 0.0197, + "step": 50530 + }, + { + "epoch": 2.36, + "learning_rate": 1.2238803630492375e-05, + "loss": 0.1343, + "step": 50535 + }, + { + "epoch": 2.36, + "learning_rate": 1.2238019845437587e-05, + "loss": 0.1147, + "step": 50540 + }, + { + "epoch": 2.36, + "learning_rate": 1.2237236060382802e-05, + "loss": 0.135, + "step": 50545 + }, + { + "epoch": 2.36, + "learning_rate": 1.2236452275328015e-05, + "loss": 0.1731, + "step": 50550 + }, + { + "epoch": 2.36, + "learning_rate": 1.2235668490273229e-05, + "loss": 0.1098, + "step": 50555 + }, + { + "epoch": 2.36, + "learning_rate": 1.2234884705218442e-05, + "loss": 0.2273, + "step": 50560 + }, + { + "epoch": 2.36, + "learning_rate": 1.2234100920163656e-05, + "loss": 0.2623, + "step": 50565 + }, + { + "epoch": 2.36, + "learning_rate": 1.2233317135108869e-05, + "loss": 0.0419, + "step": 50570 + }, + { + "epoch": 2.36, + "learning_rate": 1.2232533350054082e-05, + "loss": 0.0451, + "step": 50575 + }, + { + "epoch": 2.36, + "learning_rate": 1.2231749564999296e-05, + "loss": 0.0982, + "step": 50580 + }, + { + "epoch": 2.36, + "learning_rate": 1.2230965779944509e-05, + "loss": 0.0518, + "step": 50585 + }, + { + "epoch": 2.36, + "learning_rate": 1.2230181994889722e-05, + "loss": 0.0991, + "step": 50590 + }, + { + "epoch": 2.36, + "learning_rate": 1.2229398209834935e-05, + "loss": 0.0705, + "step": 50595 + }, + { + "epoch": 2.36, + "learning_rate": 1.222861442478015e-05, + "loss": 0.1053, + "step": 50600 + }, + { + "epoch": 2.36, + "learning_rate": 1.2227830639725363e-05, + "loss": 0.1624, + "step": 50605 + }, + { + "epoch": 2.36, + "learning_rate": 1.2227046854670576e-05, + "loss": 0.2793, + "step": 50610 + }, + { + "epoch": 2.36, + "learning_rate": 1.2226263069615789e-05, + "loss": 0.2738, + "step": 50615 + }, + { + "epoch": 2.36, + "learning_rate": 1.2225479284561004e-05, + "loss": 0.0397, + "step": 50620 + }, + { + "epoch": 2.36, + "learning_rate": 1.2224695499506216e-05, + "loss": 0.0552, + "step": 50625 + }, + { + "epoch": 2.36, + "learning_rate": 1.222391171445143e-05, + "loss": 0.1016, + "step": 50630 + }, + { + "epoch": 2.36, + "learning_rate": 1.2223127929396643e-05, + "loss": 0.1722, + "step": 50635 + }, + { + "epoch": 2.36, + "learning_rate": 1.2222344144341858e-05, + "loss": 0.1134, + "step": 50640 + }, + { + "epoch": 2.36, + "learning_rate": 1.222156035928707e-05, + "loss": 0.1319, + "step": 50645 + }, + { + "epoch": 2.36, + "learning_rate": 1.2220776574232283e-05, + "loss": 0.1036, + "step": 50650 + }, + { + "epoch": 2.36, + "learning_rate": 1.2219992789177496e-05, + "loss": 0.1984, + "step": 50655 + }, + { + "epoch": 2.36, + "learning_rate": 1.221920900412271e-05, + "loss": 0.2497, + "step": 50660 + }, + { + "epoch": 2.36, + "learning_rate": 1.2218425219067924e-05, + "loss": 0.3259, + "step": 50665 + }, + { + "epoch": 2.36, + "learning_rate": 1.2217641434013137e-05, + "loss": 0.0318, + "step": 50670 + }, + { + "epoch": 2.36, + "learning_rate": 1.221685764895835e-05, + "loss": 0.0643, + "step": 50675 + }, + { + "epoch": 2.36, + "learning_rate": 1.2216073863903564e-05, + "loss": 0.1131, + "step": 50680 + }, + { + "epoch": 2.37, + "learning_rate": 1.2215290078848778e-05, + "loss": 0.073, + "step": 50685 + }, + { + "epoch": 2.37, + "learning_rate": 1.221450629379399e-05, + "loss": 0.1513, + "step": 50690 + }, + { + "epoch": 2.37, + "learning_rate": 1.2213722508739206e-05, + "loss": 0.1274, + "step": 50695 + }, + { + "epoch": 2.37, + "learning_rate": 1.2212938723684418e-05, + "loss": 0.1174, + "step": 50700 + }, + { + "epoch": 2.37, + "learning_rate": 1.2212154938629632e-05, + "loss": 0.196, + "step": 50705 + }, + { + "epoch": 2.37, + "learning_rate": 1.2211371153574844e-05, + "loss": 0.2789, + "step": 50710 + }, + { + "epoch": 2.37, + "learning_rate": 1.2210587368520057e-05, + "loss": 0.2517, + "step": 50715 + }, + { + "epoch": 2.37, + "learning_rate": 1.2209803583465272e-05, + "loss": 0.0358, + "step": 50720 + }, + { + "epoch": 2.37, + "learning_rate": 1.2209019798410484e-05, + "loss": 0.0353, + "step": 50725 + }, + { + "epoch": 2.37, + "learning_rate": 1.2208236013355698e-05, + "loss": 0.116, + "step": 50730 + }, + { + "epoch": 2.37, + "learning_rate": 1.220745222830091e-05, + "loss": 0.0738, + "step": 50735 + }, + { + "epoch": 2.37, + "learning_rate": 1.2206668443246126e-05, + "loss": 0.0956, + "step": 50740 + }, + { + "epoch": 2.37, + "learning_rate": 1.2205884658191338e-05, + "loss": 0.0734, + "step": 50745 + }, + { + "epoch": 2.37, + "learning_rate": 1.2205100873136552e-05, + "loss": 0.1592, + "step": 50750 + }, + { + "epoch": 2.37, + "learning_rate": 1.2204317088081764e-05, + "loss": 0.2333, + "step": 50755 + }, + { + "epoch": 2.37, + "learning_rate": 1.220353330302698e-05, + "loss": 0.1929, + "step": 50760 + }, + { + "epoch": 2.37, + "learning_rate": 1.2202749517972192e-05, + "loss": 0.5826, + "step": 50765 + }, + { + "epoch": 2.37, + "learning_rate": 1.2201965732917406e-05, + "loss": 0.0678, + "step": 50770 + }, + { + "epoch": 2.37, + "learning_rate": 1.220118194786262e-05, + "loss": 0.0588, + "step": 50775 + }, + { + "epoch": 2.37, + "learning_rate": 1.2200398162807832e-05, + "loss": 0.0301, + "step": 50780 + }, + { + "epoch": 2.37, + "learning_rate": 1.2199614377753046e-05, + "loss": 0.0743, + "step": 50785 + }, + { + "epoch": 2.37, + "learning_rate": 1.2198830592698258e-05, + "loss": 0.0594, + "step": 50790 + }, + { + "epoch": 2.37, + "learning_rate": 1.2198046807643474e-05, + "loss": 0.0979, + "step": 50795 + }, + { + "epoch": 2.37, + "learning_rate": 1.2197263022588686e-05, + "loss": 0.0678, + "step": 50800 + }, + { + "epoch": 2.37, + "learning_rate": 1.21964792375339e-05, + "loss": 0.1468, + "step": 50805 + }, + { + "epoch": 2.37, + "learning_rate": 1.2195695452479112e-05, + "loss": 0.238, + "step": 50810 + }, + { + "epoch": 2.37, + "learning_rate": 1.2194911667424328e-05, + "loss": 0.185, + "step": 50815 + }, + { + "epoch": 2.37, + "learning_rate": 1.219412788236954e-05, + "loss": 0.0528, + "step": 50820 + }, + { + "epoch": 2.37, + "learning_rate": 1.2193344097314754e-05, + "loss": 0.0391, + "step": 50825 + }, + { + "epoch": 2.37, + "learning_rate": 1.2192560312259966e-05, + "loss": 0.0602, + "step": 50830 + }, + { + "epoch": 2.37, + "learning_rate": 1.2191776527205182e-05, + "loss": 0.0745, + "step": 50835 + }, + { + "epoch": 2.37, + "learning_rate": 1.2190992742150394e-05, + "loss": 0.0926, + "step": 50840 + }, + { + "epoch": 2.37, + "learning_rate": 1.2190208957095606e-05, + "loss": 0.1335, + "step": 50845 + }, + { + "epoch": 2.37, + "learning_rate": 1.218942517204082e-05, + "loss": 0.1373, + "step": 50850 + }, + { + "epoch": 2.37, + "learning_rate": 1.2188641386986032e-05, + "loss": 0.204, + "step": 50855 + }, + { + "epoch": 2.37, + "learning_rate": 1.2187857601931248e-05, + "loss": 0.2939, + "step": 50860 + }, + { + "epoch": 2.37, + "learning_rate": 1.218707381687646e-05, + "loss": 0.2599, + "step": 50865 + }, + { + "epoch": 2.37, + "learning_rate": 1.2186290031821674e-05, + "loss": 0.1391, + "step": 50870 + }, + { + "epoch": 2.37, + "learning_rate": 1.2185506246766888e-05, + "loss": 0.0675, + "step": 50875 + }, + { + "epoch": 2.37, + "learning_rate": 1.2184722461712102e-05, + "loss": 0.0415, + "step": 50880 + }, + { + "epoch": 2.37, + "learning_rate": 1.2183938676657314e-05, + "loss": 0.0662, + "step": 50885 + }, + { + "epoch": 2.37, + "learning_rate": 1.2183154891602528e-05, + "loss": 0.0667, + "step": 50890 + }, + { + "epoch": 2.37, + "learning_rate": 1.2182371106547742e-05, + "loss": 0.0808, + "step": 50895 + }, + { + "epoch": 2.38, + "learning_rate": 1.2181587321492956e-05, + "loss": 0.1179, + "step": 50900 + }, + { + "epoch": 2.38, + "learning_rate": 1.2180803536438168e-05, + "loss": 0.1444, + "step": 50905 + }, + { + "epoch": 2.38, + "learning_rate": 1.218001975138338e-05, + "loss": 0.3121, + "step": 50910 + }, + { + "epoch": 2.38, + "learning_rate": 1.2179235966328596e-05, + "loss": 0.1573, + "step": 50915 + }, + { + "epoch": 2.38, + "learning_rate": 1.2178452181273808e-05, + "loss": 0.1081, + "step": 50920 + }, + { + "epoch": 2.38, + "learning_rate": 1.2177668396219022e-05, + "loss": 0.0486, + "step": 50925 + }, + { + "epoch": 2.38, + "learning_rate": 1.2176884611164234e-05, + "loss": 0.0354, + "step": 50930 + }, + { + "epoch": 2.38, + "learning_rate": 1.217610082610945e-05, + "loss": 0.1187, + "step": 50935 + }, + { + "epoch": 2.38, + "learning_rate": 1.2175317041054662e-05, + "loss": 0.0868, + "step": 50940 + }, + { + "epoch": 2.38, + "learning_rate": 1.2174533255999876e-05, + "loss": 0.1052, + "step": 50945 + }, + { + "epoch": 2.38, + "learning_rate": 1.2173749470945088e-05, + "loss": 0.1715, + "step": 50950 + }, + { + "epoch": 2.38, + "learning_rate": 1.2172965685890304e-05, + "loss": 0.2318, + "step": 50955 + }, + { + "epoch": 2.38, + "learning_rate": 1.2172181900835516e-05, + "loss": 0.2057, + "step": 50960 + }, + { + "epoch": 2.38, + "learning_rate": 1.217139811578073e-05, + "loss": 0.2589, + "step": 50965 + }, + { + "epoch": 2.38, + "learning_rate": 1.2170614330725942e-05, + "loss": 0.0572, + "step": 50970 + }, + { + "epoch": 2.38, + "learning_rate": 1.2169830545671156e-05, + "loss": 0.0639, + "step": 50975 + }, + { + "epoch": 2.38, + "learning_rate": 1.216904676061637e-05, + "loss": 0.0619, + "step": 50980 + }, + { + "epoch": 2.38, + "learning_rate": 1.2168262975561582e-05, + "loss": 0.0854, + "step": 50985 + }, + { + "epoch": 2.38, + "learning_rate": 1.2167479190506796e-05, + "loss": 0.0625, + "step": 50990 + }, + { + "epoch": 2.38, + "learning_rate": 1.216669540545201e-05, + "loss": 0.0908, + "step": 50995 + }, + { + "epoch": 2.38, + "learning_rate": 1.2165911620397224e-05, + "loss": 0.1432, + "step": 51000 + }, + { + "epoch": 2.38, + "learning_rate": 1.2165127835342436e-05, + "loss": 0.1444, + "step": 51005 + }, + { + "epoch": 2.38, + "learning_rate": 1.2164344050287652e-05, + "loss": 0.1782, + "step": 51010 + }, + { + "epoch": 2.38, + "learning_rate": 1.2163560265232864e-05, + "loss": 0.2457, + "step": 51015 + }, + { + "epoch": 2.38, + "learning_rate": 1.2162776480178078e-05, + "loss": 0.0601, + "step": 51020 + }, + { + "epoch": 2.38, + "learning_rate": 1.216199269512329e-05, + "loss": 0.1218, + "step": 51025 + }, + { + "epoch": 2.38, + "learning_rate": 1.2161208910068506e-05, + "loss": 0.059, + "step": 51030 + }, + { + "epoch": 2.38, + "learning_rate": 1.2160425125013718e-05, + "loss": 0.0545, + "step": 51035 + }, + { + "epoch": 2.38, + "learning_rate": 1.215964133995893e-05, + "loss": 0.1509, + "step": 51040 + }, + { + "epoch": 2.38, + "learning_rate": 1.2158857554904144e-05, + "loss": 0.0776, + "step": 51045 + }, + { + "epoch": 2.38, + "learning_rate": 1.2158073769849356e-05, + "loss": 0.2111, + "step": 51050 + }, + { + "epoch": 2.38, + "learning_rate": 1.2157289984794572e-05, + "loss": 0.1845, + "step": 51055 + }, + { + "epoch": 2.38, + "learning_rate": 1.2156506199739784e-05, + "loss": 0.295, + "step": 51060 + }, + { + "epoch": 2.38, + "learning_rate": 1.2155722414684998e-05, + "loss": 0.3386, + "step": 51065 + }, + { + "epoch": 2.38, + "learning_rate": 1.215493862963021e-05, + "loss": 0.0308, + "step": 51070 + }, + { + "epoch": 2.38, + "learning_rate": 1.2154154844575426e-05, + "loss": 0.0535, + "step": 51075 + }, + { + "epoch": 2.38, + "learning_rate": 1.2153371059520638e-05, + "loss": 0.0747, + "step": 51080 + }, + { + "epoch": 2.38, + "learning_rate": 1.2152587274465852e-05, + "loss": 0.0781, + "step": 51085 + }, + { + "epoch": 2.38, + "learning_rate": 1.2151803489411066e-05, + "loss": 0.074, + "step": 51090 + }, + { + "epoch": 2.38, + "learning_rate": 1.215101970435628e-05, + "loss": 0.0889, + "step": 51095 + }, + { + "epoch": 2.38, + "learning_rate": 1.2150235919301492e-05, + "loss": 0.0867, + "step": 51100 + }, + { + "epoch": 2.38, + "learning_rate": 1.2149452134246704e-05, + "loss": 0.1321, + "step": 51105 + }, + { + "epoch": 2.38, + "learning_rate": 1.214866834919192e-05, + "loss": 0.1562, + "step": 51110 + }, + { + "epoch": 2.39, + "learning_rate": 1.2147884564137132e-05, + "loss": 0.632, + "step": 51115 + }, + { + "epoch": 2.39, + "learning_rate": 1.2147100779082346e-05, + "loss": 0.0545, + "step": 51120 + }, + { + "epoch": 2.39, + "learning_rate": 1.2146316994027558e-05, + "loss": 0.0554, + "step": 51125 + }, + { + "epoch": 2.39, + "learning_rate": 1.2145533208972773e-05, + "loss": 0.0916, + "step": 51130 + }, + { + "epoch": 2.39, + "learning_rate": 1.2144749423917986e-05, + "loss": 0.0778, + "step": 51135 + }, + { + "epoch": 2.39, + "learning_rate": 1.21439656388632e-05, + "loss": 0.1447, + "step": 51140 + }, + { + "epoch": 2.39, + "learning_rate": 1.2143181853808412e-05, + "loss": 0.062, + "step": 51145 + }, + { + "epoch": 2.39, + "learning_rate": 1.2142398068753627e-05, + "loss": 0.1337, + "step": 51150 + }, + { + "epoch": 2.39, + "learning_rate": 1.214161428369884e-05, + "loss": 0.1622, + "step": 51155 + }, + { + "epoch": 2.39, + "learning_rate": 1.2140830498644054e-05, + "loss": 0.2186, + "step": 51160 + }, + { + "epoch": 2.39, + "learning_rate": 1.2140046713589266e-05, + "loss": 0.2537, + "step": 51165 + }, + { + "epoch": 2.39, + "learning_rate": 1.2139262928534478e-05, + "loss": 0.0788, + "step": 51170 + }, + { + "epoch": 2.39, + "learning_rate": 1.2138479143479694e-05, + "loss": 0.0364, + "step": 51175 + }, + { + "epoch": 2.39, + "learning_rate": 1.2137695358424906e-05, + "loss": 0.1155, + "step": 51180 + }, + { + "epoch": 2.39, + "learning_rate": 1.213691157337012e-05, + "loss": 0.0683, + "step": 51185 + }, + { + "epoch": 2.39, + "learning_rate": 1.2136127788315334e-05, + "loss": 0.1416, + "step": 51190 + }, + { + "epoch": 2.39, + "learning_rate": 1.2135344003260547e-05, + "loss": 0.0975, + "step": 51195 + }, + { + "epoch": 2.39, + "learning_rate": 1.213456021820576e-05, + "loss": 0.0998, + "step": 51200 + }, + { + "epoch": 2.39, + "learning_rate": 1.2133776433150974e-05, + "loss": 0.2315, + "step": 51205 + }, + { + "epoch": 2.39, + "learning_rate": 1.2132992648096188e-05, + "loss": 0.2483, + "step": 51210 + }, + { + "epoch": 2.39, + "learning_rate": 1.2132208863041401e-05, + "loss": 0.2255, + "step": 51215 + }, + { + "epoch": 2.39, + "learning_rate": 1.2131425077986614e-05, + "loss": 0.1239, + "step": 51220 + }, + { + "epoch": 2.39, + "learning_rate": 1.213064129293183e-05, + "loss": 0.0421, + "step": 51225 + }, + { + "epoch": 2.39, + "learning_rate": 1.2129857507877041e-05, + "loss": 0.0431, + "step": 51230 + }, + { + "epoch": 2.39, + "learning_rate": 1.2129073722822254e-05, + "loss": 0.0604, + "step": 51235 + }, + { + "epoch": 2.39, + "learning_rate": 1.2128289937767468e-05, + "loss": 0.0692, + "step": 51240 + }, + { + "epoch": 2.39, + "learning_rate": 1.212750615271268e-05, + "loss": 0.0819, + "step": 51245 + }, + { + "epoch": 2.39, + "learning_rate": 1.2126722367657895e-05, + "loss": 0.0985, + "step": 51250 + }, + { + "epoch": 2.39, + "learning_rate": 1.2125938582603108e-05, + "loss": 0.1348, + "step": 51255 + }, + { + "epoch": 2.39, + "learning_rate": 1.2125154797548321e-05, + "loss": 0.184, + "step": 51260 + }, + { + "epoch": 2.39, + "learning_rate": 1.2124371012493534e-05, + "loss": 0.3057, + "step": 51265 + }, + { + "epoch": 2.39, + "learning_rate": 1.212358722743875e-05, + "loss": 0.088, + "step": 51270 + }, + { + "epoch": 2.39, + "learning_rate": 1.2122803442383962e-05, + "loss": 0.0724, + "step": 51275 + }, + { + "epoch": 2.39, + "learning_rate": 1.2122019657329175e-05, + "loss": 0.0378, + "step": 51280 + }, + { + "epoch": 2.39, + "learning_rate": 1.2121235872274388e-05, + "loss": 0.0781, + "step": 51285 + }, + { + "epoch": 2.39, + "learning_rate": 1.2120452087219603e-05, + "loss": 0.1908, + "step": 51290 + }, + { + "epoch": 2.39, + "learning_rate": 1.2119668302164815e-05, + "loss": 0.1843, + "step": 51295 + }, + { + "epoch": 2.39, + "learning_rate": 1.2118884517110028e-05, + "loss": 0.085, + "step": 51300 + }, + { + "epoch": 2.39, + "learning_rate": 1.2118100732055242e-05, + "loss": 0.1861, + "step": 51305 + }, + { + "epoch": 2.39, + "learning_rate": 1.2117316947000455e-05, + "loss": 0.252, + "step": 51310 + }, + { + "epoch": 2.39, + "learning_rate": 1.211653316194567e-05, + "loss": 0.341, + "step": 51315 + }, + { + "epoch": 2.39, + "learning_rate": 1.2115749376890882e-05, + "loss": 0.054, + "step": 51320 + }, + { + "epoch": 2.39, + "learning_rate": 1.2114965591836097e-05, + "loss": 0.0834, + "step": 51325 + }, + { + "epoch": 2.4, + "learning_rate": 1.211418180678131e-05, + "loss": 0.0253, + "step": 51330 + }, + { + "epoch": 2.4, + "learning_rate": 1.2113398021726523e-05, + "loss": 0.0387, + "step": 51335 + }, + { + "epoch": 2.4, + "learning_rate": 1.2112614236671736e-05, + "loss": 0.058, + "step": 51340 + }, + { + "epoch": 2.4, + "learning_rate": 1.2111830451616951e-05, + "loss": 0.0874, + "step": 51345 + }, + { + "epoch": 2.4, + "learning_rate": 1.2111046666562163e-05, + "loss": 0.0858, + "step": 51350 + }, + { + "epoch": 2.4, + "learning_rate": 1.2110262881507377e-05, + "loss": 0.1828, + "step": 51355 + }, + { + "epoch": 2.4, + "learning_rate": 1.210947909645259e-05, + "loss": 0.2306, + "step": 51360 + }, + { + "epoch": 2.4, + "learning_rate": 1.2108695311397802e-05, + "loss": 0.3866, + "step": 51365 + }, + { + "epoch": 2.4, + "learning_rate": 1.2107911526343017e-05, + "loss": 0.0208, + "step": 51370 + }, + { + "epoch": 2.4, + "learning_rate": 1.210712774128823e-05, + "loss": 0.0327, + "step": 51375 + }, + { + "epoch": 2.4, + "learning_rate": 1.2106343956233443e-05, + "loss": 0.0503, + "step": 51380 + }, + { + "epoch": 2.4, + "learning_rate": 1.2105560171178656e-05, + "loss": 0.1202, + "step": 51385 + }, + { + "epoch": 2.4, + "learning_rate": 1.2104776386123871e-05, + "loss": 0.1188, + "step": 51390 + }, + { + "epoch": 2.4, + "learning_rate": 1.2103992601069083e-05, + "loss": 0.0703, + "step": 51395 + }, + { + "epoch": 2.4, + "learning_rate": 1.2103208816014297e-05, + "loss": 0.1258, + "step": 51400 + }, + { + "epoch": 2.4, + "learning_rate": 1.2102425030959511e-05, + "loss": 0.2325, + "step": 51405 + }, + { + "epoch": 2.4, + "learning_rate": 1.2101641245904725e-05, + "loss": 0.1977, + "step": 51410 + }, + { + "epoch": 2.4, + "learning_rate": 1.2100857460849937e-05, + "loss": 0.3474, + "step": 51415 + }, + { + "epoch": 2.4, + "learning_rate": 1.2100073675795151e-05, + "loss": 0.077, + "step": 51420 + }, + { + "epoch": 2.4, + "learning_rate": 1.2099289890740365e-05, + "loss": 0.0395, + "step": 51425 + }, + { + "epoch": 2.4, + "learning_rate": 1.2098506105685577e-05, + "loss": 0.0626, + "step": 51430 + }, + { + "epoch": 2.4, + "learning_rate": 1.2097722320630791e-05, + "loss": 0.0527, + "step": 51435 + }, + { + "epoch": 2.4, + "learning_rate": 1.2096938535576003e-05, + "loss": 0.1469, + "step": 51440 + }, + { + "epoch": 2.4, + "learning_rate": 1.2096154750521219e-05, + "loss": 0.1648, + "step": 51445 + }, + { + "epoch": 2.4, + "learning_rate": 1.2095370965466431e-05, + "loss": 0.1252, + "step": 51450 + }, + { + "epoch": 2.4, + "learning_rate": 1.2094587180411645e-05, + "loss": 0.199, + "step": 51455 + }, + { + "epoch": 2.4, + "learning_rate": 1.2093803395356857e-05, + "loss": 0.252, + "step": 51460 + }, + { + "epoch": 2.4, + "learning_rate": 1.2093019610302073e-05, + "loss": 0.2616, + "step": 51465 + }, + { + "epoch": 2.4, + "learning_rate": 1.2092235825247285e-05, + "loss": 0.0359, + "step": 51470 + }, + { + "epoch": 2.4, + "learning_rate": 1.2091452040192499e-05, + "loss": 0.0569, + "step": 51475 + }, + { + "epoch": 2.4, + "learning_rate": 1.2090668255137711e-05, + "loss": 0.0824, + "step": 51480 + }, + { + "epoch": 2.4, + "learning_rate": 1.2089884470082927e-05, + "loss": 0.065, + "step": 51485 + }, + { + "epoch": 2.4, + "learning_rate": 1.2089100685028139e-05, + "loss": 0.0876, + "step": 51490 + }, + { + "epoch": 2.4, + "learning_rate": 1.2088316899973351e-05, + "loss": 0.1345, + "step": 51495 + }, + { + "epoch": 2.4, + "learning_rate": 1.2087533114918565e-05, + "loss": 0.0907, + "step": 51500 + }, + { + "epoch": 2.4, + "learning_rate": 1.2086749329863779e-05, + "loss": 0.3917, + "step": 51505 + }, + { + "epoch": 2.4, + "learning_rate": 1.2085965544808993e-05, + "loss": 0.3124, + "step": 51510 + }, + { + "epoch": 2.4, + "learning_rate": 1.2085181759754205e-05, + "loss": 0.3424, + "step": 51515 + }, + { + "epoch": 2.4, + "learning_rate": 1.208439797469942e-05, + "loss": 0.0339, + "step": 51520 + }, + { + "epoch": 2.4, + "learning_rate": 1.2083614189644633e-05, + "loss": 0.0568, + "step": 51525 + }, + { + "epoch": 2.4, + "learning_rate": 1.2082830404589847e-05, + "loss": 0.1319, + "step": 51530 + }, + { + "epoch": 2.4, + "learning_rate": 1.208204661953506e-05, + "loss": 0.0855, + "step": 51535 + }, + { + "epoch": 2.4, + "learning_rate": 1.2081262834480275e-05, + "loss": 0.0986, + "step": 51540 + }, + { + "epoch": 2.41, + "learning_rate": 1.2080479049425487e-05, + "loss": 0.1848, + "step": 51545 + }, + { + "epoch": 2.41, + "learning_rate": 1.2079695264370701e-05, + "loss": 0.1471, + "step": 51550 + }, + { + "epoch": 2.41, + "learning_rate": 1.2078911479315913e-05, + "loss": 0.1345, + "step": 51555 + }, + { + "epoch": 2.41, + "learning_rate": 1.2078127694261125e-05, + "loss": 0.1926, + "step": 51560 + }, + { + "epoch": 2.41, + "learning_rate": 1.2077343909206341e-05, + "loss": 0.3924, + "step": 51565 + }, + { + "epoch": 2.41, + "learning_rate": 1.2076560124151553e-05, + "loss": 0.0675, + "step": 51570 + }, + { + "epoch": 2.41, + "learning_rate": 1.2075776339096767e-05, + "loss": 0.0411, + "step": 51575 + }, + { + "epoch": 2.41, + "learning_rate": 1.207499255404198e-05, + "loss": 0.0543, + "step": 51580 + }, + { + "epoch": 2.41, + "learning_rate": 1.2074208768987195e-05, + "loss": 0.148, + "step": 51585 + }, + { + "epoch": 2.41, + "learning_rate": 1.2073424983932407e-05, + "loss": 0.0893, + "step": 51590 + }, + { + "epoch": 2.41, + "learning_rate": 1.2072641198877621e-05, + "loss": 0.1574, + "step": 51595 + }, + { + "epoch": 2.41, + "learning_rate": 1.2071857413822833e-05, + "loss": 0.1502, + "step": 51600 + }, + { + "epoch": 2.41, + "learning_rate": 1.2071073628768049e-05, + "loss": 0.2346, + "step": 51605 + }, + { + "epoch": 2.41, + "learning_rate": 1.2070289843713261e-05, + "loss": 0.2326, + "step": 51610 + }, + { + "epoch": 2.41, + "learning_rate": 1.2069506058658475e-05, + "loss": 0.3411, + "step": 51615 + }, + { + "epoch": 2.41, + "learning_rate": 1.2068722273603689e-05, + "loss": 0.0766, + "step": 51620 + }, + { + "epoch": 2.41, + "learning_rate": 1.2067938488548901e-05, + "loss": 0.022, + "step": 51625 + }, + { + "epoch": 2.41, + "learning_rate": 1.2067154703494115e-05, + "loss": 0.0682, + "step": 51630 + }, + { + "epoch": 2.41, + "learning_rate": 1.2066370918439327e-05, + "loss": 0.0439, + "step": 51635 + }, + { + "epoch": 2.41, + "learning_rate": 1.2065587133384543e-05, + "loss": 0.0751, + "step": 51640 + }, + { + "epoch": 2.41, + "learning_rate": 1.2064803348329755e-05, + "loss": 0.0661, + "step": 51645 + }, + { + "epoch": 2.41, + "learning_rate": 1.2064019563274969e-05, + "loss": 0.215, + "step": 51650 + }, + { + "epoch": 2.41, + "learning_rate": 1.2063235778220181e-05, + "loss": 0.2804, + "step": 51655 + }, + { + "epoch": 2.41, + "learning_rate": 1.2062451993165397e-05, + "loss": 0.3444, + "step": 51660 + }, + { + "epoch": 2.41, + "learning_rate": 1.2061668208110609e-05, + "loss": 0.2795, + "step": 51665 + }, + { + "epoch": 2.41, + "learning_rate": 1.2060884423055823e-05, + "loss": 0.0677, + "step": 51670 + }, + { + "epoch": 2.41, + "learning_rate": 1.2060100638001035e-05, + "loss": 0.0437, + "step": 51675 + }, + { + "epoch": 2.41, + "learning_rate": 1.205931685294625e-05, + "loss": 0.0713, + "step": 51680 + }, + { + "epoch": 2.41, + "learning_rate": 1.2058533067891463e-05, + "loss": 0.032, + "step": 51685 + }, + { + "epoch": 2.41, + "learning_rate": 1.2057749282836675e-05, + "loss": 0.0629, + "step": 51690 + }, + { + "epoch": 2.41, + "learning_rate": 1.2056965497781889e-05, + "loss": 0.1758, + "step": 51695 + }, + { + "epoch": 2.41, + "learning_rate": 1.2056181712727101e-05, + "loss": 0.0785, + "step": 51700 + }, + { + "epoch": 2.41, + "learning_rate": 1.2055397927672317e-05, + "loss": 0.1533, + "step": 51705 + }, + { + "epoch": 2.41, + "learning_rate": 1.2054614142617529e-05, + "loss": 0.249, + "step": 51710 + }, + { + "epoch": 2.41, + "learning_rate": 1.2053830357562743e-05, + "loss": 0.186, + "step": 51715 + }, + { + "epoch": 2.41, + "learning_rate": 1.2053046572507957e-05, + "loss": 0.01, + "step": 51720 + }, + { + "epoch": 2.41, + "learning_rate": 1.205226278745317e-05, + "loss": 0.0311, + "step": 51725 + }, + { + "epoch": 2.41, + "learning_rate": 1.2051479002398383e-05, + "loss": 0.0707, + "step": 51730 + }, + { + "epoch": 2.41, + "learning_rate": 1.2050695217343597e-05, + "loss": 0.0547, + "step": 51735 + }, + { + "epoch": 2.41, + "learning_rate": 1.204991143228881e-05, + "loss": 0.1202, + "step": 51740 + }, + { + "epoch": 2.41, + "learning_rate": 1.2049127647234025e-05, + "loss": 0.0915, + "step": 51745 + }, + { + "epoch": 2.41, + "learning_rate": 1.2048343862179237e-05, + "loss": 0.2305, + "step": 51750 + }, + { + "epoch": 2.41, + "learning_rate": 1.2047560077124449e-05, + "loss": 0.2337, + "step": 51755 + }, + { + "epoch": 2.42, + "learning_rate": 1.2046776292069665e-05, + "loss": 0.2104, + "step": 51760 + }, + { + "epoch": 2.42, + "learning_rate": 1.2045992507014877e-05, + "loss": 0.2833, + "step": 51765 + }, + { + "epoch": 2.42, + "learning_rate": 1.204520872196009e-05, + "loss": 0.022, + "step": 51770 + }, + { + "epoch": 2.42, + "learning_rate": 1.2044424936905303e-05, + "loss": 0.0689, + "step": 51775 + }, + { + "epoch": 2.42, + "learning_rate": 1.2043641151850519e-05, + "loss": 0.142, + "step": 51780 + }, + { + "epoch": 2.42, + "learning_rate": 1.204285736679573e-05, + "loss": 0.1489, + "step": 51785 + }, + { + "epoch": 2.42, + "learning_rate": 1.2042073581740945e-05, + "loss": 0.0738, + "step": 51790 + }, + { + "epoch": 2.42, + "learning_rate": 1.2041289796686157e-05, + "loss": 0.114, + "step": 51795 + }, + { + "epoch": 2.42, + "learning_rate": 1.2040506011631372e-05, + "loss": 0.1686, + "step": 51800 + }, + { + "epoch": 2.42, + "learning_rate": 1.2039722226576585e-05, + "loss": 0.1656, + "step": 51805 + }, + { + "epoch": 2.42, + "learning_rate": 1.2038938441521799e-05, + "loss": 0.1686, + "step": 51810 + }, + { + "epoch": 2.42, + "learning_rate": 1.203815465646701e-05, + "loss": 0.2587, + "step": 51815 + }, + { + "epoch": 2.42, + "learning_rate": 1.2037370871412225e-05, + "loss": 0.0502, + "step": 51820 + }, + { + "epoch": 2.42, + "learning_rate": 1.2036587086357439e-05, + "loss": 0.0239, + "step": 51825 + }, + { + "epoch": 2.42, + "learning_rate": 1.203580330130265e-05, + "loss": 0.0367, + "step": 51830 + }, + { + "epoch": 2.42, + "learning_rate": 1.2035019516247865e-05, + "loss": 0.0598, + "step": 51835 + }, + { + "epoch": 2.42, + "learning_rate": 1.2034235731193079e-05, + "loss": 0.1056, + "step": 51840 + }, + { + "epoch": 2.42, + "learning_rate": 1.2033451946138293e-05, + "loss": 0.1451, + "step": 51845 + }, + { + "epoch": 2.42, + "learning_rate": 1.2032668161083505e-05, + "loss": 0.174, + "step": 51850 + }, + { + "epoch": 2.42, + "learning_rate": 1.203188437602872e-05, + "loss": 0.199, + "step": 51855 + }, + { + "epoch": 2.42, + "learning_rate": 1.2031100590973933e-05, + "loss": 0.3578, + "step": 51860 + }, + { + "epoch": 2.42, + "learning_rate": 1.2030316805919146e-05, + "loss": 0.3888, + "step": 51865 + }, + { + "epoch": 2.42, + "learning_rate": 1.2029533020864359e-05, + "loss": 0.0432, + "step": 51870 + }, + { + "epoch": 2.42, + "learning_rate": 1.2028749235809574e-05, + "loss": 0.0301, + "step": 51875 + }, + { + "epoch": 2.42, + "learning_rate": 1.2027965450754787e-05, + "loss": 0.0577, + "step": 51880 + }, + { + "epoch": 2.42, + "learning_rate": 1.2027181665699999e-05, + "loss": 0.0879, + "step": 51885 + }, + { + "epoch": 2.42, + "learning_rate": 1.2026397880645213e-05, + "loss": 0.1147, + "step": 51890 + }, + { + "epoch": 2.42, + "learning_rate": 1.2025614095590425e-05, + "loss": 0.1604, + "step": 51895 + }, + { + "epoch": 2.42, + "learning_rate": 1.202483031053564e-05, + "loss": 0.0468, + "step": 51900 + }, + { + "epoch": 2.42, + "learning_rate": 1.2024046525480853e-05, + "loss": 0.1913, + "step": 51905 + }, + { + "epoch": 2.42, + "learning_rate": 1.2023262740426067e-05, + "loss": 0.2669, + "step": 51910 + }, + { + "epoch": 2.42, + "learning_rate": 1.2022478955371279e-05, + "loss": 0.17, + "step": 51915 + }, + { + "epoch": 2.42, + "learning_rate": 1.2021695170316494e-05, + "loss": 0.1219, + "step": 51920 + }, + { + "epoch": 2.42, + "learning_rate": 1.2020911385261707e-05, + "loss": 0.0375, + "step": 51925 + }, + { + "epoch": 2.42, + "learning_rate": 1.202012760020692e-05, + "loss": 0.0667, + "step": 51930 + }, + { + "epoch": 2.42, + "learning_rate": 1.2019343815152134e-05, + "loss": 0.0615, + "step": 51935 + }, + { + "epoch": 2.42, + "learning_rate": 1.2018560030097348e-05, + "loss": 0.0947, + "step": 51940 + }, + { + "epoch": 2.42, + "learning_rate": 1.201777624504256e-05, + "loss": 0.1589, + "step": 51945 + }, + { + "epoch": 2.42, + "learning_rate": 1.2016992459987773e-05, + "loss": 0.0904, + "step": 51950 + }, + { + "epoch": 2.42, + "learning_rate": 1.2016208674932988e-05, + "loss": 0.2414, + "step": 51955 + }, + { + "epoch": 2.42, + "learning_rate": 1.20154248898782e-05, + "loss": 0.3085, + "step": 51960 + }, + { + "epoch": 2.42, + "learning_rate": 1.2014641104823414e-05, + "loss": 0.3275, + "step": 51965 + }, + { + "epoch": 2.42, + "learning_rate": 1.2013857319768627e-05, + "loss": 0.0334, + "step": 51970 + }, + { + "epoch": 2.43, + "learning_rate": 1.2013073534713842e-05, + "loss": 0.0268, + "step": 51975 + }, + { + "epoch": 2.43, + "learning_rate": 1.2012289749659054e-05, + "loss": 0.1292, + "step": 51980 + }, + { + "epoch": 2.43, + "learning_rate": 1.2011505964604268e-05, + "loss": 0.0277, + "step": 51985 + }, + { + "epoch": 2.43, + "learning_rate": 1.201072217954948e-05, + "loss": 0.0855, + "step": 51990 + }, + { + "epoch": 2.43, + "learning_rate": 1.2009938394494696e-05, + "loss": 0.0944, + "step": 51995 + }, + { + "epoch": 2.43, + "learning_rate": 1.2009154609439908e-05, + "loss": 0.0794, + "step": 52000 + }, + { + "epoch": 2.43, + "learning_rate": 1.2008370824385122e-05, + "loss": 0.1485, + "step": 52005 + }, + { + "epoch": 2.43, + "learning_rate": 1.2007587039330335e-05, + "loss": 0.2271, + "step": 52010 + }, + { + "epoch": 2.43, + "learning_rate": 1.2006803254275547e-05, + "loss": 0.2456, + "step": 52015 + }, + { + "epoch": 2.43, + "learning_rate": 1.2006019469220762e-05, + "loss": 0.085, + "step": 52020 + }, + { + "epoch": 2.43, + "learning_rate": 1.2005235684165975e-05, + "loss": 0.0676, + "step": 52025 + }, + { + "epoch": 2.43, + "learning_rate": 1.2004451899111188e-05, + "loss": 0.0165, + "step": 52030 + }, + { + "epoch": 2.43, + "learning_rate": 1.2003668114056402e-05, + "loss": 0.0606, + "step": 52035 + }, + { + "epoch": 2.43, + "learning_rate": 1.2002884329001616e-05, + "loss": 0.1106, + "step": 52040 + }, + { + "epoch": 2.43, + "learning_rate": 1.2002100543946828e-05, + "loss": 0.1506, + "step": 52045 + }, + { + "epoch": 2.43, + "learning_rate": 1.2001316758892042e-05, + "loss": 0.0898, + "step": 52050 + }, + { + "epoch": 2.43, + "learning_rate": 1.2000532973837256e-05, + "loss": 0.1746, + "step": 52055 + }, + { + "epoch": 2.43, + "learning_rate": 1.199974918878247e-05, + "loss": 0.251, + "step": 52060 + }, + { + "epoch": 2.43, + "learning_rate": 1.1998965403727682e-05, + "loss": 0.2887, + "step": 52065 + }, + { + "epoch": 2.43, + "learning_rate": 1.1998181618672898e-05, + "loss": 0.0429, + "step": 52070 + }, + { + "epoch": 2.43, + "learning_rate": 1.199739783361811e-05, + "loss": 0.0352, + "step": 52075 + }, + { + "epoch": 2.43, + "learning_rate": 1.1996614048563322e-05, + "loss": 0.1168, + "step": 52080 + }, + { + "epoch": 2.43, + "learning_rate": 1.1995830263508536e-05, + "loss": 0.0959, + "step": 52085 + }, + { + "epoch": 2.43, + "learning_rate": 1.1995046478453749e-05, + "loss": 0.154, + "step": 52090 + }, + { + "epoch": 2.43, + "learning_rate": 1.1994262693398964e-05, + "loss": 0.0716, + "step": 52095 + }, + { + "epoch": 2.43, + "learning_rate": 1.1993478908344176e-05, + "loss": 0.1781, + "step": 52100 + }, + { + "epoch": 2.43, + "learning_rate": 1.199269512328939e-05, + "loss": 0.2264, + "step": 52105 + }, + { + "epoch": 2.43, + "learning_rate": 1.1991911338234602e-05, + "loss": 0.2278, + "step": 52110 + }, + { + "epoch": 2.43, + "learning_rate": 1.1991127553179818e-05, + "loss": 0.3195, + "step": 52115 + }, + { + "epoch": 2.43, + "learning_rate": 1.199034376812503e-05, + "loss": 0.0321, + "step": 52120 + }, + { + "epoch": 2.43, + "learning_rate": 1.1989559983070244e-05, + "loss": 0.0277, + "step": 52125 + }, + { + "epoch": 2.43, + "learning_rate": 1.1988776198015456e-05, + "loss": 0.0288, + "step": 52130 + }, + { + "epoch": 2.43, + "learning_rate": 1.1987992412960672e-05, + "loss": 0.0316, + "step": 52135 + }, + { + "epoch": 2.43, + "learning_rate": 1.1987208627905884e-05, + "loss": 0.0851, + "step": 52140 + }, + { + "epoch": 2.43, + "learning_rate": 1.1986424842851096e-05, + "loss": 0.1352, + "step": 52145 + }, + { + "epoch": 2.43, + "learning_rate": 1.198564105779631e-05, + "loss": 0.1331, + "step": 52150 + }, + { + "epoch": 2.43, + "learning_rate": 1.1984857272741524e-05, + "loss": 0.1579, + "step": 52155 + }, + { + "epoch": 2.43, + "learning_rate": 1.1984073487686738e-05, + "loss": 0.2474, + "step": 52160 + }, + { + "epoch": 2.43, + "learning_rate": 1.198328970263195e-05, + "loss": 0.2848, + "step": 52165 + }, + { + "epoch": 2.43, + "learning_rate": 1.1982505917577166e-05, + "loss": 0.0673, + "step": 52170 + }, + { + "epoch": 2.43, + "learning_rate": 1.1981722132522378e-05, + "loss": 0.0426, + "step": 52175 + }, + { + "epoch": 2.43, + "learning_rate": 1.1980938347467592e-05, + "loss": 0.0501, + "step": 52180 + }, + { + "epoch": 2.44, + "learning_rate": 1.1980154562412804e-05, + "loss": 0.0576, + "step": 52185 + }, + { + "epoch": 2.44, + "learning_rate": 1.197937077735802e-05, + "loss": 0.105, + "step": 52190 + }, + { + "epoch": 2.44, + "learning_rate": 1.1978586992303232e-05, + "loss": 0.1487, + "step": 52195 + }, + { + "epoch": 2.44, + "learning_rate": 1.1977803207248446e-05, + "loss": 0.063, + "step": 52200 + }, + { + "epoch": 2.44, + "learning_rate": 1.1977019422193658e-05, + "loss": 0.1358, + "step": 52205 + }, + { + "epoch": 2.44, + "learning_rate": 1.197623563713887e-05, + "loss": 0.4102, + "step": 52210 + }, + { + "epoch": 2.44, + "learning_rate": 1.1975451852084086e-05, + "loss": 0.2808, + "step": 52215 + }, + { + "epoch": 2.44, + "learning_rate": 1.1974668067029298e-05, + "loss": 0.1002, + "step": 52220 + }, + { + "epoch": 2.44, + "learning_rate": 1.1973884281974512e-05, + "loss": 0.0695, + "step": 52225 + }, + { + "epoch": 2.44, + "learning_rate": 1.1973100496919724e-05, + "loss": 0.0389, + "step": 52230 + }, + { + "epoch": 2.44, + "learning_rate": 1.197231671186494e-05, + "loss": 0.0591, + "step": 52235 + }, + { + "epoch": 2.44, + "learning_rate": 1.1971532926810152e-05, + "loss": 0.1046, + "step": 52240 + }, + { + "epoch": 2.44, + "learning_rate": 1.1970749141755366e-05, + "loss": 0.0821, + "step": 52245 + }, + { + "epoch": 2.44, + "learning_rate": 1.196996535670058e-05, + "loss": 0.1596, + "step": 52250 + }, + { + "epoch": 2.44, + "learning_rate": 1.1969181571645794e-05, + "loss": 0.0936, + "step": 52255 + }, + { + "epoch": 2.44, + "learning_rate": 1.1968397786591006e-05, + "loss": 0.1988, + "step": 52260 + }, + { + "epoch": 2.44, + "learning_rate": 1.196761400153622e-05, + "loss": 0.3196, + "step": 52265 + }, + { + "epoch": 2.44, + "learning_rate": 1.1966830216481434e-05, + "loss": 0.0698, + "step": 52270 + }, + { + "epoch": 2.44, + "learning_rate": 1.1966046431426646e-05, + "loss": 0.0393, + "step": 52275 + }, + { + "epoch": 2.44, + "learning_rate": 1.196526264637186e-05, + "loss": 0.0488, + "step": 52280 + }, + { + "epoch": 2.44, + "learning_rate": 1.1964478861317072e-05, + "loss": 0.096, + "step": 52285 + }, + { + "epoch": 2.44, + "learning_rate": 1.1963695076262288e-05, + "loss": 0.0349, + "step": 52290 + }, + { + "epoch": 2.44, + "learning_rate": 1.19629112912075e-05, + "loss": 0.2553, + "step": 52295 + }, + { + "epoch": 2.44, + "learning_rate": 1.1962127506152714e-05, + "loss": 0.1327, + "step": 52300 + }, + { + "epoch": 2.44, + "learning_rate": 1.1961343721097926e-05, + "loss": 0.1643, + "step": 52305 + }, + { + "epoch": 2.44, + "learning_rate": 1.1960559936043142e-05, + "loss": 0.2194, + "step": 52310 + }, + { + "epoch": 2.44, + "learning_rate": 1.1959776150988354e-05, + "loss": 0.3497, + "step": 52315 + }, + { + "epoch": 2.44, + "learning_rate": 1.1958992365933568e-05, + "loss": 0.0641, + "step": 52320 + }, + { + "epoch": 2.44, + "learning_rate": 1.195820858087878e-05, + "loss": 0.0347, + "step": 52325 + }, + { + "epoch": 2.44, + "learning_rate": 1.1957424795823996e-05, + "loss": 0.0945, + "step": 52330 + }, + { + "epoch": 2.44, + "learning_rate": 1.1956641010769208e-05, + "loss": 0.0739, + "step": 52335 + }, + { + "epoch": 2.44, + "learning_rate": 1.195585722571442e-05, + "loss": 0.1243, + "step": 52340 + }, + { + "epoch": 2.44, + "learning_rate": 1.1955073440659634e-05, + "loss": 0.1303, + "step": 52345 + }, + { + "epoch": 2.44, + "learning_rate": 1.1954289655604848e-05, + "loss": 0.1913, + "step": 52350 + }, + { + "epoch": 2.44, + "learning_rate": 1.1953505870550062e-05, + "loss": 0.1298, + "step": 52355 + }, + { + "epoch": 2.44, + "learning_rate": 1.1952722085495274e-05, + "loss": 0.2293, + "step": 52360 + }, + { + "epoch": 2.44, + "learning_rate": 1.1951938300440488e-05, + "loss": 0.2134, + "step": 52365 + }, + { + "epoch": 2.44, + "learning_rate": 1.1951154515385702e-05, + "loss": 0.0677, + "step": 52370 + }, + { + "epoch": 2.44, + "learning_rate": 1.1950370730330916e-05, + "loss": 0.0779, + "step": 52375 + }, + { + "epoch": 2.44, + "learning_rate": 1.1949586945276128e-05, + "loss": 0.0791, + "step": 52380 + }, + { + "epoch": 2.44, + "learning_rate": 1.1948803160221344e-05, + "loss": 0.086, + "step": 52385 + }, + { + "epoch": 2.44, + "learning_rate": 1.1948019375166556e-05, + "loss": 0.1275, + "step": 52390 + }, + { + "epoch": 2.44, + "learning_rate": 1.194723559011177e-05, + "loss": 0.0879, + "step": 52395 + }, + { + "epoch": 2.45, + "learning_rate": 1.1946451805056982e-05, + "loss": 0.133, + "step": 52400 + }, + { + "epoch": 2.45, + "learning_rate": 1.1945668020002194e-05, + "loss": 0.2451, + "step": 52405 + }, + { + "epoch": 2.45, + "learning_rate": 1.194488423494741e-05, + "loss": 0.2477, + "step": 52410 + }, + { + "epoch": 2.45, + "learning_rate": 1.1944100449892622e-05, + "loss": 0.4061, + "step": 52415 + }, + { + "epoch": 2.45, + "learning_rate": 1.1943316664837836e-05, + "loss": 0.0401, + "step": 52420 + }, + { + "epoch": 2.45, + "learning_rate": 1.1942532879783048e-05, + "loss": 0.0241, + "step": 52425 + }, + { + "epoch": 2.45, + "learning_rate": 1.1941749094728264e-05, + "loss": 0.0777, + "step": 52430 + }, + { + "epoch": 2.45, + "learning_rate": 1.1940965309673476e-05, + "loss": 0.065, + "step": 52435 + }, + { + "epoch": 2.45, + "learning_rate": 1.194018152461869e-05, + "loss": 0.1047, + "step": 52440 + }, + { + "epoch": 2.45, + "learning_rate": 1.1939397739563902e-05, + "loss": 0.0401, + "step": 52445 + }, + { + "epoch": 2.45, + "learning_rate": 1.1938613954509118e-05, + "loss": 0.0681, + "step": 52450 + }, + { + "epoch": 2.45, + "learning_rate": 1.193783016945433e-05, + "loss": 0.1758, + "step": 52455 + }, + { + "epoch": 2.45, + "learning_rate": 1.1937046384399544e-05, + "loss": 0.1733, + "step": 52460 + }, + { + "epoch": 2.45, + "learning_rate": 1.1936262599344756e-05, + "loss": 0.3287, + "step": 52465 + }, + { + "epoch": 2.45, + "learning_rate": 1.193547881428997e-05, + "loss": 0.049, + "step": 52470 + }, + { + "epoch": 2.45, + "learning_rate": 1.1934695029235184e-05, + "loss": 0.0322, + "step": 52475 + }, + { + "epoch": 2.45, + "learning_rate": 1.1933911244180396e-05, + "loss": 0.069, + "step": 52480 + }, + { + "epoch": 2.45, + "learning_rate": 1.1933127459125612e-05, + "loss": 0.0482, + "step": 52485 + }, + { + "epoch": 2.45, + "learning_rate": 1.1932343674070824e-05, + "loss": 0.1658, + "step": 52490 + }, + { + "epoch": 2.45, + "learning_rate": 1.1931559889016038e-05, + "loss": 0.1137, + "step": 52495 + }, + { + "epoch": 2.45, + "learning_rate": 1.193077610396125e-05, + "loss": 0.2187, + "step": 52500 + }, + { + "epoch": 2.45, + "learning_rate": 1.1929992318906465e-05, + "loss": 0.1515, + "step": 52505 + }, + { + "epoch": 2.45, + "learning_rate": 1.1929208533851678e-05, + "loss": 0.307, + "step": 52510 + }, + { + "epoch": 2.45, + "learning_rate": 1.1928424748796892e-05, + "loss": 0.3527, + "step": 52515 + }, + { + "epoch": 2.45, + "learning_rate": 1.1927640963742104e-05, + "loss": 0.0675, + "step": 52520 + }, + { + "epoch": 2.45, + "learning_rate": 1.192685717868732e-05, + "loss": 0.0585, + "step": 52525 + }, + { + "epoch": 2.45, + "learning_rate": 1.1926073393632532e-05, + "loss": 0.0977, + "step": 52530 + }, + { + "epoch": 2.45, + "learning_rate": 1.1925289608577744e-05, + "loss": 0.0608, + "step": 52535 + }, + { + "epoch": 2.45, + "learning_rate": 1.1924505823522958e-05, + "loss": 0.061, + "step": 52540 + }, + { + "epoch": 2.45, + "learning_rate": 1.192372203846817e-05, + "loss": 0.0563, + "step": 52545 + }, + { + "epoch": 2.45, + "learning_rate": 1.1922938253413386e-05, + "loss": 0.1041, + "step": 52550 + }, + { + "epoch": 2.45, + "learning_rate": 1.1922154468358598e-05, + "loss": 0.1776, + "step": 52555 + }, + { + "epoch": 2.45, + "learning_rate": 1.1921370683303812e-05, + "loss": 0.2721, + "step": 52560 + }, + { + "epoch": 2.45, + "learning_rate": 1.1920586898249026e-05, + "loss": 0.2447, + "step": 52565 + }, + { + "epoch": 2.45, + "learning_rate": 1.191980311319424e-05, + "loss": 0.0822, + "step": 52570 + }, + { + "epoch": 2.45, + "learning_rate": 1.1919019328139452e-05, + "loss": 0.0929, + "step": 52575 + }, + { + "epoch": 2.45, + "learning_rate": 1.1918235543084666e-05, + "loss": 0.0734, + "step": 52580 + }, + { + "epoch": 2.45, + "learning_rate": 1.191745175802988e-05, + "loss": 0.0762, + "step": 52585 + }, + { + "epoch": 2.45, + "learning_rate": 1.1916667972975093e-05, + "loss": 0.1376, + "step": 52590 + }, + { + "epoch": 2.45, + "learning_rate": 1.1915884187920306e-05, + "loss": 0.1607, + "step": 52595 + }, + { + "epoch": 2.45, + "learning_rate": 1.1915100402865518e-05, + "loss": 0.2222, + "step": 52600 + }, + { + "epoch": 2.45, + "learning_rate": 1.1914316617810733e-05, + "loss": 0.2216, + "step": 52605 + }, + { + "epoch": 2.45, + "learning_rate": 1.1913532832755946e-05, + "loss": 0.2515, + "step": 52610 + }, + { + "epoch": 2.46, + "learning_rate": 1.191274904770116e-05, + "loss": 0.3711, + "step": 52615 + }, + { + "epoch": 2.46, + "learning_rate": 1.1911965262646372e-05, + "loss": 0.0333, + "step": 52620 + }, + { + "epoch": 2.46, + "learning_rate": 1.1911181477591587e-05, + "loss": 0.0515, + "step": 52625 + }, + { + "epoch": 2.46, + "learning_rate": 1.19103976925368e-05, + "loss": 0.0941, + "step": 52630 + }, + { + "epoch": 2.46, + "learning_rate": 1.1909613907482013e-05, + "loss": 0.1234, + "step": 52635 + }, + { + "epoch": 2.46, + "learning_rate": 1.1908830122427226e-05, + "loss": 0.0329, + "step": 52640 + }, + { + "epoch": 2.46, + "learning_rate": 1.1908046337372441e-05, + "loss": 0.1261, + "step": 52645 + }, + { + "epoch": 2.46, + "learning_rate": 1.1907262552317653e-05, + "loss": 0.1531, + "step": 52650 + }, + { + "epoch": 2.46, + "learning_rate": 1.1906478767262867e-05, + "loss": 0.1486, + "step": 52655 + }, + { + "epoch": 2.46, + "learning_rate": 1.190569498220808e-05, + "loss": 0.2458, + "step": 52660 + }, + { + "epoch": 2.46, + "learning_rate": 1.1904911197153293e-05, + "loss": 0.3166, + "step": 52665 + }, + { + "epoch": 2.46, + "learning_rate": 1.1904127412098507e-05, + "loss": 0.0304, + "step": 52670 + }, + { + "epoch": 2.46, + "learning_rate": 1.190334362704372e-05, + "loss": 0.0736, + "step": 52675 + }, + { + "epoch": 2.46, + "learning_rate": 1.1902559841988934e-05, + "loss": 0.0754, + "step": 52680 + }, + { + "epoch": 2.46, + "learning_rate": 1.1901776056934147e-05, + "loss": 0.0731, + "step": 52685 + }, + { + "epoch": 2.46, + "learning_rate": 1.1900992271879361e-05, + "loss": 0.0482, + "step": 52690 + }, + { + "epoch": 2.46, + "learning_rate": 1.1900208486824574e-05, + "loss": 0.0744, + "step": 52695 + }, + { + "epoch": 2.46, + "learning_rate": 1.1899424701769789e-05, + "loss": 0.1076, + "step": 52700 + }, + { + "epoch": 2.46, + "learning_rate": 1.1898640916715001e-05, + "loss": 0.1539, + "step": 52705 + }, + { + "epoch": 2.46, + "learning_rate": 1.1897857131660215e-05, + "loss": 0.2656, + "step": 52710 + }, + { + "epoch": 2.46, + "learning_rate": 1.1897073346605427e-05, + "loss": 0.2979, + "step": 52715 + }, + { + "epoch": 2.46, + "learning_rate": 1.1896289561550643e-05, + "loss": 0.0254, + "step": 52720 + }, + { + "epoch": 2.46, + "learning_rate": 1.1895505776495855e-05, + "loss": 0.0355, + "step": 52725 + }, + { + "epoch": 2.46, + "learning_rate": 1.1894721991441067e-05, + "loss": 0.0842, + "step": 52730 + }, + { + "epoch": 2.46, + "learning_rate": 1.1893938206386281e-05, + "loss": 0.075, + "step": 52735 + }, + { + "epoch": 2.46, + "learning_rate": 1.1893154421331494e-05, + "loss": 0.122, + "step": 52740 + }, + { + "epoch": 2.46, + "learning_rate": 1.189237063627671e-05, + "loss": 0.1795, + "step": 52745 + }, + { + "epoch": 2.46, + "learning_rate": 1.1891586851221921e-05, + "loss": 0.1504, + "step": 52750 + }, + { + "epoch": 2.46, + "learning_rate": 1.1890803066167135e-05, + "loss": 0.2475, + "step": 52755 + }, + { + "epoch": 2.46, + "learning_rate": 1.1890019281112348e-05, + "loss": 0.1841, + "step": 52760 + }, + { + "epoch": 2.46, + "learning_rate": 1.1889235496057563e-05, + "loss": 0.3067, + "step": 52765 + }, + { + "epoch": 2.46, + "learning_rate": 1.1888451711002775e-05, + "loss": 0.0693, + "step": 52770 + }, + { + "epoch": 2.46, + "learning_rate": 1.188766792594799e-05, + "loss": 0.0174, + "step": 52775 + }, + { + "epoch": 2.46, + "learning_rate": 1.1886884140893203e-05, + "loss": 0.0503, + "step": 52780 + }, + { + "epoch": 2.46, + "learning_rate": 1.1886100355838417e-05, + "loss": 0.1001, + "step": 52785 + }, + { + "epoch": 2.46, + "learning_rate": 1.188531657078363e-05, + "loss": 0.1064, + "step": 52790 + }, + { + "epoch": 2.46, + "learning_rate": 1.1884532785728841e-05, + "loss": 0.0935, + "step": 52795 + }, + { + "epoch": 2.46, + "learning_rate": 1.1883749000674057e-05, + "loss": 0.1237, + "step": 52800 + }, + { + "epoch": 2.46, + "learning_rate": 1.188296521561927e-05, + "loss": 0.2529, + "step": 52805 + }, + { + "epoch": 2.46, + "learning_rate": 1.1882181430564483e-05, + "loss": 0.2904, + "step": 52810 + }, + { + "epoch": 2.46, + "learning_rate": 1.1881397645509695e-05, + "loss": 0.2634, + "step": 52815 + }, + { + "epoch": 2.46, + "learning_rate": 1.1880613860454911e-05, + "loss": 0.0563, + "step": 52820 + }, + { + "epoch": 2.46, + "learning_rate": 1.1879830075400123e-05, + "loss": 0.1206, + "step": 52825 + }, + { + "epoch": 2.47, + "learning_rate": 1.1879046290345337e-05, + "loss": 0.0276, + "step": 52830 + }, + { + "epoch": 2.47, + "learning_rate": 1.187826250529055e-05, + "loss": 0.0621, + "step": 52835 + }, + { + "epoch": 2.47, + "learning_rate": 1.1877478720235765e-05, + "loss": 0.1221, + "step": 52840 + }, + { + "epoch": 2.47, + "learning_rate": 1.1876694935180977e-05, + "loss": 0.1302, + "step": 52845 + }, + { + "epoch": 2.47, + "learning_rate": 1.1875911150126191e-05, + "loss": 0.1104, + "step": 52850 + }, + { + "epoch": 2.47, + "learning_rate": 1.1875127365071403e-05, + "loss": 0.2365, + "step": 52855 + }, + { + "epoch": 2.47, + "learning_rate": 1.1874343580016615e-05, + "loss": 0.2957, + "step": 52860 + }, + { + "epoch": 2.47, + "learning_rate": 1.1873559794961831e-05, + "loss": 0.1747, + "step": 52865 + }, + { + "epoch": 2.47, + "learning_rate": 1.1872776009907043e-05, + "loss": 0.0363, + "step": 52870 + }, + { + "epoch": 2.47, + "learning_rate": 1.1871992224852257e-05, + "loss": 0.0558, + "step": 52875 + }, + { + "epoch": 2.47, + "learning_rate": 1.1871208439797471e-05, + "loss": 0.1079, + "step": 52880 + }, + { + "epoch": 2.47, + "learning_rate": 1.1870424654742685e-05, + "loss": 0.0892, + "step": 52885 + }, + { + "epoch": 2.47, + "learning_rate": 1.1869640869687897e-05, + "loss": 0.0586, + "step": 52890 + }, + { + "epoch": 2.47, + "learning_rate": 1.1868857084633111e-05, + "loss": 0.0733, + "step": 52895 + }, + { + "epoch": 2.47, + "learning_rate": 1.1868073299578325e-05, + "loss": 0.1407, + "step": 52900 + }, + { + "epoch": 2.47, + "learning_rate": 1.1867289514523539e-05, + "loss": 0.1468, + "step": 52905 + }, + { + "epoch": 2.47, + "learning_rate": 1.1866505729468751e-05, + "loss": 0.312, + "step": 52910 + }, + { + "epoch": 2.47, + "learning_rate": 1.1865721944413967e-05, + "loss": 0.1312, + "step": 52915 + }, + { + "epoch": 2.47, + "learning_rate": 1.1864938159359179e-05, + "loss": 0.074, + "step": 52920 + }, + { + "epoch": 2.47, + "learning_rate": 1.1864154374304391e-05, + "loss": 0.0478, + "step": 52925 + }, + { + "epoch": 2.47, + "learning_rate": 1.1863370589249605e-05, + "loss": 0.0973, + "step": 52930 + }, + { + "epoch": 2.47, + "learning_rate": 1.1862586804194817e-05, + "loss": 0.0613, + "step": 52935 + }, + { + "epoch": 2.47, + "learning_rate": 1.1861803019140033e-05, + "loss": 0.0627, + "step": 52940 + }, + { + "epoch": 2.47, + "learning_rate": 1.1861019234085245e-05, + "loss": 0.0968, + "step": 52945 + }, + { + "epoch": 2.47, + "learning_rate": 1.1860235449030459e-05, + "loss": 0.0754, + "step": 52950 + }, + { + "epoch": 2.47, + "learning_rate": 1.1859451663975671e-05, + "loss": 0.1461, + "step": 52955 + }, + { + "epoch": 2.47, + "learning_rate": 1.1858667878920887e-05, + "loss": 0.2842, + "step": 52960 + }, + { + "epoch": 2.47, + "learning_rate": 1.1857884093866099e-05, + "loss": 0.2588, + "step": 52965 + }, + { + "epoch": 2.47, + "learning_rate": 1.1857100308811313e-05, + "loss": 0.044, + "step": 52970 + }, + { + "epoch": 2.47, + "learning_rate": 1.1856473280767485e-05, + "loss": 0.0901, + "step": 52975 + }, + { + "epoch": 2.47, + "learning_rate": 1.1855689495712697e-05, + "loss": 0.098, + "step": 52980 + }, + { + "epoch": 2.47, + "learning_rate": 1.185490571065791e-05, + "loss": 0.1027, + "step": 52985 + }, + { + "epoch": 2.47, + "learning_rate": 1.1854121925603123e-05, + "loss": 0.0727, + "step": 52990 + }, + { + "epoch": 2.47, + "learning_rate": 1.1853338140548336e-05, + "loss": 0.086, + "step": 52995 + }, + { + "epoch": 2.47, + "learning_rate": 1.1852554355493551e-05, + "loss": 0.1336, + "step": 53000 + }, + { + "epoch": 2.47, + "learning_rate": 1.1851770570438763e-05, + "loss": 0.2022, + "step": 53005 + }, + { + "epoch": 2.47, + "learning_rate": 1.1850986785383977e-05, + "loss": 0.2916, + "step": 53010 + }, + { + "epoch": 2.47, + "learning_rate": 1.185020300032919e-05, + "loss": 0.1668, + "step": 53015 + }, + { + "epoch": 2.47, + "learning_rate": 1.1849419215274405e-05, + "loss": 0.0279, + "step": 53020 + }, + { + "epoch": 2.47, + "learning_rate": 1.1848635430219617e-05, + "loss": 0.0569, + "step": 53025 + }, + { + "epoch": 2.47, + "learning_rate": 1.1847851645164831e-05, + "loss": 0.0159, + "step": 53030 + }, + { + "epoch": 2.47, + "learning_rate": 1.1847067860110043e-05, + "loss": 0.0532, + "step": 53035 + }, + { + "epoch": 2.47, + "learning_rate": 1.1846284075055259e-05, + "loss": 0.0931, + "step": 53040 + }, + { + "epoch": 2.48, + "learning_rate": 1.1845500290000471e-05, + "loss": 0.1704, + "step": 53045 + }, + { + "epoch": 2.48, + "learning_rate": 1.1844716504945683e-05, + "loss": 0.1275, + "step": 53050 + }, + { + "epoch": 2.48, + "learning_rate": 1.1843932719890897e-05, + "loss": 0.204, + "step": 53055 + }, + { + "epoch": 2.48, + "learning_rate": 1.1843148934836111e-05, + "loss": 0.2639, + "step": 53060 + }, + { + "epoch": 2.48, + "learning_rate": 1.1842365149781325e-05, + "loss": 0.3573, + "step": 53065 + }, + { + "epoch": 2.48, + "learning_rate": 1.1841581364726537e-05, + "loss": 0.0625, + "step": 53070 + }, + { + "epoch": 2.48, + "learning_rate": 1.1840797579671753e-05, + "loss": 0.0354, + "step": 53075 + }, + { + "epoch": 2.48, + "learning_rate": 1.1840013794616965e-05, + "loss": 0.0453, + "step": 53080 + }, + { + "epoch": 2.48, + "learning_rate": 1.1839230009562179e-05, + "loss": 0.0939, + "step": 53085 + }, + { + "epoch": 2.48, + "learning_rate": 1.1838446224507391e-05, + "loss": 0.0633, + "step": 53090 + }, + { + "epoch": 2.48, + "learning_rate": 1.1837662439452607e-05, + "loss": 0.1074, + "step": 53095 + }, + { + "epoch": 2.48, + "learning_rate": 1.183687865439782e-05, + "loss": 0.0843, + "step": 53100 + }, + { + "epoch": 2.48, + "learning_rate": 1.1836094869343033e-05, + "loss": 0.1816, + "step": 53105 + }, + { + "epoch": 2.48, + "learning_rate": 1.1835311084288245e-05, + "loss": 0.283, + "step": 53110 + }, + { + "epoch": 2.48, + "learning_rate": 1.1834527299233457e-05, + "loss": 0.4184, + "step": 53115 + }, + { + "epoch": 2.48, + "learning_rate": 1.1833743514178673e-05, + "loss": 0.0683, + "step": 53120 + }, + { + "epoch": 2.48, + "learning_rate": 1.1832959729123885e-05, + "loss": 0.0495, + "step": 53125 + }, + { + "epoch": 2.48, + "learning_rate": 1.18321759440691e-05, + "loss": 0.0605, + "step": 53130 + }, + { + "epoch": 2.48, + "learning_rate": 1.1831392159014311e-05, + "loss": 0.1192, + "step": 53135 + }, + { + "epoch": 2.48, + "learning_rate": 1.1830608373959527e-05, + "loss": 0.1119, + "step": 53140 + }, + { + "epoch": 2.48, + "learning_rate": 1.182982458890474e-05, + "loss": 0.1132, + "step": 53145 + }, + { + "epoch": 2.48, + "learning_rate": 1.1829040803849953e-05, + "loss": 0.1361, + "step": 53150 + }, + { + "epoch": 2.48, + "learning_rate": 1.1828257018795165e-05, + "loss": 0.2086, + "step": 53155 + }, + { + "epoch": 2.48, + "learning_rate": 1.1827473233740381e-05, + "loss": 0.192, + "step": 53160 + }, + { + "epoch": 2.48, + "learning_rate": 1.1826689448685593e-05, + "loss": 0.2593, + "step": 53165 + }, + { + "epoch": 2.48, + "learning_rate": 1.1825905663630807e-05, + "loss": 0.0378, + "step": 53170 + }, + { + "epoch": 2.48, + "learning_rate": 1.1825121878576021e-05, + "loss": 0.0689, + "step": 53175 + }, + { + "epoch": 2.48, + "learning_rate": 1.1824338093521233e-05, + "loss": 0.0995, + "step": 53180 + }, + { + "epoch": 2.48, + "learning_rate": 1.1823554308466447e-05, + "loss": 0.0625, + "step": 53185 + }, + { + "epoch": 2.48, + "learning_rate": 1.182277052341166e-05, + "loss": 0.0522, + "step": 53190 + }, + { + "epoch": 2.48, + "learning_rate": 1.1821986738356875e-05, + "loss": 0.1119, + "step": 53195 + }, + { + "epoch": 2.48, + "learning_rate": 1.1821202953302087e-05, + "loss": 0.2608, + "step": 53200 + }, + { + "epoch": 2.48, + "learning_rate": 1.1820419168247301e-05, + "loss": 0.1296, + "step": 53205 + }, + { + "epoch": 2.48, + "learning_rate": 1.1819635383192513e-05, + "loss": 0.2392, + "step": 53210 + }, + { + "epoch": 2.48, + "learning_rate": 1.1818851598137729e-05, + "loss": 0.3229, + "step": 53215 + }, + { + "epoch": 2.48, + "learning_rate": 1.1818067813082941e-05, + "loss": 0.0728, + "step": 53220 + }, + { + "epoch": 2.48, + "learning_rate": 1.1817284028028155e-05, + "loss": 0.0496, + "step": 53225 + }, + { + "epoch": 2.48, + "learning_rate": 1.1816500242973367e-05, + "loss": 0.1219, + "step": 53230 + }, + { + "epoch": 2.48, + "learning_rate": 1.1815716457918583e-05, + "loss": 0.0571, + "step": 53235 + }, + { + "epoch": 2.48, + "learning_rate": 1.1814932672863795e-05, + "loss": 0.0712, + "step": 53240 + }, + { + "epoch": 2.48, + "learning_rate": 1.1814148887809007e-05, + "loss": 0.1081, + "step": 53245 + }, + { + "epoch": 2.48, + "learning_rate": 1.1813365102754221e-05, + "loss": 0.1176, + "step": 53250 + }, + { + "epoch": 2.48, + "learning_rate": 1.1812581317699435e-05, + "loss": 0.1212, + "step": 53255 + }, + { + "epoch": 2.49, + "learning_rate": 1.1811797532644649e-05, + "loss": 0.3415, + "step": 53260 + }, + { + "epoch": 2.49, + "learning_rate": 1.1811013747589861e-05, + "loss": 0.2234, + "step": 53265 + }, + { + "epoch": 2.49, + "learning_rate": 1.1810229962535075e-05, + "loss": 0.0399, + "step": 53270 + }, + { + "epoch": 2.49, + "learning_rate": 1.1809446177480289e-05, + "loss": 0.0437, + "step": 53275 + }, + { + "epoch": 2.49, + "learning_rate": 1.1808662392425503e-05, + "loss": 0.0732, + "step": 53280 + }, + { + "epoch": 2.49, + "learning_rate": 1.1807878607370715e-05, + "loss": 0.068, + "step": 53285 + }, + { + "epoch": 2.49, + "learning_rate": 1.180709482231593e-05, + "loss": 0.1564, + "step": 53290 + }, + { + "epoch": 2.49, + "learning_rate": 1.1806311037261143e-05, + "loss": 0.1242, + "step": 53295 + }, + { + "epoch": 2.49, + "learning_rate": 1.1805527252206357e-05, + "loss": 0.0852, + "step": 53300 + }, + { + "epoch": 2.49, + "learning_rate": 1.1804743467151569e-05, + "loss": 0.1913, + "step": 53305 + }, + { + "epoch": 2.49, + "learning_rate": 1.1803959682096781e-05, + "loss": 0.2867, + "step": 53310 + }, + { + "epoch": 2.49, + "learning_rate": 1.1803175897041997e-05, + "loss": 0.2384, + "step": 53315 + }, + { + "epoch": 2.49, + "learning_rate": 1.1802392111987209e-05, + "loss": 0.0679, + "step": 53320 + }, + { + "epoch": 2.49, + "learning_rate": 1.1801608326932423e-05, + "loss": 0.015, + "step": 53325 + }, + { + "epoch": 2.49, + "learning_rate": 1.1800824541877635e-05, + "loss": 0.0466, + "step": 53330 + }, + { + "epoch": 2.49, + "learning_rate": 1.180004075682285e-05, + "loss": 0.0421, + "step": 53335 + }, + { + "epoch": 2.49, + "learning_rate": 1.1799256971768063e-05, + "loss": 0.0758, + "step": 53340 + }, + { + "epoch": 2.49, + "learning_rate": 1.1798473186713277e-05, + "loss": 0.062, + "step": 53345 + }, + { + "epoch": 2.49, + "learning_rate": 1.1797689401658489e-05, + "loss": 0.1198, + "step": 53350 + }, + { + "epoch": 2.49, + "learning_rate": 1.1796905616603705e-05, + "loss": 0.2479, + "step": 53355 + }, + { + "epoch": 2.49, + "learning_rate": 1.1796121831548917e-05, + "loss": 0.2566, + "step": 53360 + }, + { + "epoch": 2.49, + "learning_rate": 1.179533804649413e-05, + "loss": 0.5469, + "step": 53365 + }, + { + "epoch": 2.49, + "learning_rate": 1.1794554261439343e-05, + "loss": 0.0849, + "step": 53370 + }, + { + "epoch": 2.49, + "learning_rate": 1.1793770476384557e-05, + "loss": 0.0693, + "step": 53375 + }, + { + "epoch": 2.49, + "learning_rate": 1.179298669132977e-05, + "loss": 0.0148, + "step": 53380 + }, + { + "epoch": 2.49, + "learning_rate": 1.1792202906274983e-05, + "loss": 0.0683, + "step": 53385 + }, + { + "epoch": 2.49, + "learning_rate": 1.1791419121220199e-05, + "loss": 0.0983, + "step": 53390 + }, + { + "epoch": 2.49, + "learning_rate": 1.179063533616541e-05, + "loss": 0.1328, + "step": 53395 + }, + { + "epoch": 2.49, + "learning_rate": 1.1789851551110625e-05, + "loss": 0.089, + "step": 53400 + }, + { + "epoch": 2.49, + "learning_rate": 1.1789067766055837e-05, + "loss": 0.2294, + "step": 53405 + }, + { + "epoch": 2.49, + "learning_rate": 1.1788283981001053e-05, + "loss": 0.216, + "step": 53410 + }, + { + "epoch": 2.49, + "learning_rate": 1.1787500195946265e-05, + "loss": 0.3105, + "step": 53415 + }, + { + "epoch": 2.49, + "learning_rate": 1.1786716410891479e-05, + "loss": 0.0386, + "step": 53420 + }, + { + "epoch": 2.49, + "learning_rate": 1.1785932625836691e-05, + "loss": 0.0221, + "step": 53425 + }, + { + "epoch": 2.49, + "learning_rate": 1.1785148840781906e-05, + "loss": 0.0662, + "step": 53430 + }, + { + "epoch": 2.49, + "learning_rate": 1.1784365055727119e-05, + "loss": 0.1186, + "step": 53435 + }, + { + "epoch": 2.49, + "learning_rate": 1.1783581270672331e-05, + "loss": 0.1842, + "step": 53440 + }, + { + "epoch": 2.49, + "learning_rate": 1.1782797485617545e-05, + "loss": 0.0644, + "step": 53445 + }, + { + "epoch": 2.49, + "learning_rate": 1.1782013700562757e-05, + "loss": 0.1558, + "step": 53450 + }, + { + "epoch": 2.49, + "learning_rate": 1.1781229915507973e-05, + "loss": 0.1776, + "step": 53455 + }, + { + "epoch": 2.49, + "learning_rate": 1.1780446130453185e-05, + "loss": 0.1429, + "step": 53460 + }, + { + "epoch": 2.49, + "learning_rate": 1.1779662345398399e-05, + "loss": 0.2067, + "step": 53465 + }, + { + "epoch": 2.49, + "learning_rate": 1.1778878560343611e-05, + "loss": 0.0238, + "step": 53470 + }, + { + "epoch": 2.5, + "learning_rate": 1.1778094775288827e-05, + "loss": 0.0882, + "step": 53475 + }, + { + "epoch": 2.5, + "learning_rate": 1.1777310990234039e-05, + "loss": 0.0437, + "step": 53480 + }, + { + "epoch": 2.5, + "learning_rate": 1.1776527205179253e-05, + "loss": 0.1085, + "step": 53485 + }, + { + "epoch": 2.5, + "learning_rate": 1.1775743420124467e-05, + "loss": 0.082, + "step": 53490 + }, + { + "epoch": 2.5, + "learning_rate": 1.177495963506968e-05, + "loss": 0.0515, + "step": 53495 + }, + { + "epoch": 2.5, + "learning_rate": 1.1774175850014893e-05, + "loss": 0.1574, + "step": 53500 + }, + { + "epoch": 2.5, + "learning_rate": 1.1773392064960105e-05, + "loss": 0.176, + "step": 53505 + }, + { + "epoch": 2.5, + "learning_rate": 1.177260827990532e-05, + "loss": 0.3283, + "step": 53510 + }, + { + "epoch": 2.5, + "learning_rate": 1.1771824494850533e-05, + "loss": 0.2003, + "step": 53515 + }, + { + "epoch": 2.5, + "learning_rate": 1.1771040709795747e-05, + "loss": 0.0773, + "step": 53520 + }, + { + "epoch": 2.5, + "learning_rate": 1.1770256924740959e-05, + "loss": 0.0402, + "step": 53525 + }, + { + "epoch": 2.5, + "learning_rate": 1.1769473139686174e-05, + "loss": 0.0686, + "step": 53530 + }, + { + "epoch": 2.5, + "learning_rate": 1.1768689354631387e-05, + "loss": 0.1132, + "step": 53535 + }, + { + "epoch": 2.5, + "learning_rate": 1.17679055695766e-05, + "loss": 0.109, + "step": 53540 + }, + { + "epoch": 2.5, + "learning_rate": 1.1767121784521813e-05, + "loss": 0.1463, + "step": 53545 + }, + { + "epoch": 2.5, + "learning_rate": 1.1766337999467028e-05, + "loss": 0.1783, + "step": 53550 + }, + { + "epoch": 2.5, + "learning_rate": 1.176555421441224e-05, + "loss": 0.1049, + "step": 53555 + }, + { + "epoch": 2.5, + "learning_rate": 1.1764770429357454e-05, + "loss": 0.4038, + "step": 53560 + }, + { + "epoch": 2.5, + "learning_rate": 1.1763986644302667e-05, + "loss": 0.2925, + "step": 53565 + }, + { + "epoch": 2.5, + "learning_rate": 1.176320285924788e-05, + "loss": 0.0268, + "step": 53570 + }, + { + "epoch": 2.5, + "learning_rate": 1.1762419074193094e-05, + "loss": 0.0987, + "step": 53575 + }, + { + "epoch": 2.5, + "learning_rate": 1.1761635289138307e-05, + "loss": 0.0748, + "step": 53580 + }, + { + "epoch": 2.5, + "learning_rate": 1.176085150408352e-05, + "loss": 0.0521, + "step": 53585 + }, + { + "epoch": 2.5, + "learning_rate": 1.1760067719028734e-05, + "loss": 0.1107, + "step": 53590 + }, + { + "epoch": 2.5, + "learning_rate": 1.1759283933973948e-05, + "loss": 0.1081, + "step": 53595 + }, + { + "epoch": 2.5, + "learning_rate": 1.175850014891916e-05, + "loss": 0.0851, + "step": 53600 + }, + { + "epoch": 2.5, + "learning_rate": 1.1757716363864376e-05, + "loss": 0.1295, + "step": 53605 + }, + { + "epoch": 2.5, + "learning_rate": 1.1756932578809588e-05, + "loss": 0.3218, + "step": 53610 + }, + { + "epoch": 2.5, + "learning_rate": 1.1756148793754802e-05, + "loss": 0.3039, + "step": 53615 + }, + { + "epoch": 2.5, + "learning_rate": 1.1755365008700015e-05, + "loss": 0.0713, + "step": 53620 + }, + { + "epoch": 2.5, + "learning_rate": 1.175458122364523e-05, + "loss": 0.0448, + "step": 53625 + }, + { + "epoch": 2.5, + "learning_rate": 1.1753797438590442e-05, + "loss": 0.0533, + "step": 53630 + }, + { + "epoch": 2.5, + "learning_rate": 1.1753013653535655e-05, + "loss": 0.0662, + "step": 53635 + }, + { + "epoch": 2.5, + "learning_rate": 1.1752229868480868e-05, + "loss": 0.0825, + "step": 53640 + }, + { + "epoch": 2.5, + "learning_rate": 1.175144608342608e-05, + "loss": 0.1356, + "step": 53645 + }, + { + "epoch": 2.5, + "learning_rate": 1.1750662298371296e-05, + "loss": 0.119, + "step": 53650 + }, + { + "epoch": 2.5, + "learning_rate": 1.1749878513316508e-05, + "loss": 0.177, + "step": 53655 + }, + { + "epoch": 2.5, + "learning_rate": 1.1749094728261722e-05, + "loss": 0.2674, + "step": 53660 + }, + { + "epoch": 2.5, + "learning_rate": 1.1748310943206935e-05, + "loss": 0.2447, + "step": 53665 + }, + { + "epoch": 2.5, + "learning_rate": 1.174752715815215e-05, + "loss": 0.0238, + "step": 53670 + }, + { + "epoch": 2.5, + "learning_rate": 1.1746743373097362e-05, + "loss": 0.0325, + "step": 53675 + }, + { + "epoch": 2.5, + "learning_rate": 1.1745959588042576e-05, + "loss": 0.0444, + "step": 53680 + }, + { + "epoch": 2.51, + "learning_rate": 1.1745175802987789e-05, + "loss": 0.0893, + "step": 53685 + }, + { + "epoch": 2.51, + "learning_rate": 1.1744392017933004e-05, + "loss": 0.0845, + "step": 53690 + }, + { + "epoch": 2.51, + "learning_rate": 1.1743608232878216e-05, + "loss": 0.0906, + "step": 53695 + }, + { + "epoch": 2.51, + "learning_rate": 1.1742824447823429e-05, + "loss": 0.1417, + "step": 53700 + }, + { + "epoch": 2.51, + "learning_rate": 1.1742040662768644e-05, + "loss": 0.1703, + "step": 53705 + }, + { + "epoch": 2.51, + "learning_rate": 1.1741256877713856e-05, + "loss": 0.2382, + "step": 53710 + }, + { + "epoch": 2.51, + "learning_rate": 1.174047309265907e-05, + "loss": 0.3102, + "step": 53715 + }, + { + "epoch": 2.51, + "learning_rate": 1.1739689307604282e-05, + "loss": 0.0984, + "step": 53720 + }, + { + "epoch": 2.51, + "learning_rate": 1.1738905522549498e-05, + "loss": 0.0297, + "step": 53725 + }, + { + "epoch": 2.51, + "learning_rate": 1.173812173749471e-05, + "loss": 0.0989, + "step": 53730 + }, + { + "epoch": 2.51, + "learning_rate": 1.1737337952439924e-05, + "loss": 0.076, + "step": 53735 + }, + { + "epoch": 2.51, + "learning_rate": 1.1736554167385136e-05, + "loss": 0.1314, + "step": 53740 + }, + { + "epoch": 2.51, + "learning_rate": 1.1735770382330352e-05, + "loss": 0.1822, + "step": 53745 + }, + { + "epoch": 2.51, + "learning_rate": 1.1734986597275564e-05, + "loss": 0.2006, + "step": 53750 + }, + { + "epoch": 2.51, + "learning_rate": 1.1734202812220778e-05, + "loss": 0.1828, + "step": 53755 + }, + { + "epoch": 2.51, + "learning_rate": 1.173341902716599e-05, + "loss": 0.3078, + "step": 53760 + }, + { + "epoch": 2.51, + "learning_rate": 1.1732635242111203e-05, + "loss": 0.3112, + "step": 53765 + }, + { + "epoch": 2.51, + "learning_rate": 1.1731851457056418e-05, + "loss": 0.0143, + "step": 53770 + }, + { + "epoch": 2.51, + "learning_rate": 1.173106767200163e-05, + "loss": 0.0488, + "step": 53775 + }, + { + "epoch": 2.51, + "learning_rate": 1.1730283886946844e-05, + "loss": 0.0628, + "step": 53780 + }, + { + "epoch": 2.51, + "learning_rate": 1.1729500101892058e-05, + "loss": 0.162, + "step": 53785 + }, + { + "epoch": 2.51, + "learning_rate": 1.1728716316837272e-05, + "loss": 0.1037, + "step": 53790 + }, + { + "epoch": 2.51, + "learning_rate": 1.1727932531782484e-05, + "loss": 0.0923, + "step": 53795 + }, + { + "epoch": 2.51, + "learning_rate": 1.1727148746727698e-05, + "loss": 0.0755, + "step": 53800 + }, + { + "epoch": 2.51, + "learning_rate": 1.1726364961672912e-05, + "loss": 0.1833, + "step": 53805 + }, + { + "epoch": 2.51, + "learning_rate": 1.1725581176618126e-05, + "loss": 0.1893, + "step": 53810 + }, + { + "epoch": 2.51, + "learning_rate": 1.1724797391563338e-05, + "loss": 0.3448, + "step": 53815 + }, + { + "epoch": 2.51, + "learning_rate": 1.1724013606508554e-05, + "loss": 0.0322, + "step": 53820 + }, + { + "epoch": 2.51, + "learning_rate": 1.1723229821453766e-05, + "loss": 0.0322, + "step": 53825 + }, + { + "epoch": 2.51, + "learning_rate": 1.1722446036398978e-05, + "loss": 0.0736, + "step": 53830 + }, + { + "epoch": 2.51, + "learning_rate": 1.1721662251344192e-05, + "loss": 0.0983, + "step": 53835 + }, + { + "epoch": 2.51, + "learning_rate": 1.1720878466289404e-05, + "loss": 0.0753, + "step": 53840 + }, + { + "epoch": 2.51, + "learning_rate": 1.172009468123462e-05, + "loss": 0.0771, + "step": 53845 + }, + { + "epoch": 2.51, + "learning_rate": 1.1719310896179832e-05, + "loss": 0.1376, + "step": 53850 + }, + { + "epoch": 2.51, + "learning_rate": 1.1718527111125046e-05, + "loss": 0.1836, + "step": 53855 + }, + { + "epoch": 2.51, + "learning_rate": 1.1717743326070258e-05, + "loss": 0.2587, + "step": 53860 + }, + { + "epoch": 2.51, + "learning_rate": 1.1716959541015474e-05, + "loss": 0.2983, + "step": 53865 + }, + { + "epoch": 2.51, + "learning_rate": 1.1716175755960686e-05, + "loss": 0.0732, + "step": 53870 + }, + { + "epoch": 2.51, + "learning_rate": 1.17153919709059e-05, + "loss": 0.0571, + "step": 53875 + }, + { + "epoch": 2.51, + "learning_rate": 1.1714608185851112e-05, + "loss": 0.0792, + "step": 53880 + }, + { + "epoch": 2.51, + "learning_rate": 1.1713824400796328e-05, + "loss": 0.0731, + "step": 53885 + }, + { + "epoch": 2.51, + "learning_rate": 1.171304061574154e-05, + "loss": 0.0635, + "step": 53890 + }, + { + "epoch": 2.51, + "learning_rate": 1.1712256830686752e-05, + "loss": 0.1637, + "step": 53895 + }, + { + "epoch": 2.52, + "learning_rate": 1.1711473045631966e-05, + "loss": 0.2193, + "step": 53900 + }, + { + "epoch": 2.52, + "learning_rate": 1.171068926057718e-05, + "loss": 0.2715, + "step": 53905 + }, + { + "epoch": 2.52, + "learning_rate": 1.1709905475522394e-05, + "loss": 0.3451, + "step": 53910 + }, + { + "epoch": 2.52, + "learning_rate": 1.1709121690467606e-05, + "loss": 0.2351, + "step": 53915 + }, + { + "epoch": 2.52, + "learning_rate": 1.1708337905412822e-05, + "loss": 0.0587, + "step": 53920 + }, + { + "epoch": 2.52, + "learning_rate": 1.1707554120358034e-05, + "loss": 0.0571, + "step": 53925 + }, + { + "epoch": 2.52, + "learning_rate": 1.1706770335303248e-05, + "loss": 0.0329, + "step": 53930 + }, + { + "epoch": 2.52, + "learning_rate": 1.170598655024846e-05, + "loss": 0.0982, + "step": 53935 + }, + { + "epoch": 2.52, + "learning_rate": 1.1705202765193676e-05, + "loss": 0.056, + "step": 53940 + }, + { + "epoch": 2.52, + "learning_rate": 1.1704418980138888e-05, + "loss": 0.1438, + "step": 53945 + }, + { + "epoch": 2.52, + "learning_rate": 1.1703635195084102e-05, + "loss": 0.1759, + "step": 53950 + }, + { + "epoch": 2.52, + "learning_rate": 1.1702851410029314e-05, + "loss": 0.2695, + "step": 53955 + }, + { + "epoch": 2.52, + "learning_rate": 1.1702067624974526e-05, + "loss": 0.2936, + "step": 53960 + }, + { + "epoch": 2.52, + "learning_rate": 1.1701283839919742e-05, + "loss": 0.2662, + "step": 53965 + }, + { + "epoch": 2.52, + "learning_rate": 1.1700500054864954e-05, + "loss": 0.0413, + "step": 53970 + }, + { + "epoch": 2.52, + "learning_rate": 1.1699716269810168e-05, + "loss": 0.0348, + "step": 53975 + }, + { + "epoch": 2.52, + "learning_rate": 1.169893248475538e-05, + "loss": 0.0487, + "step": 53980 + }, + { + "epoch": 2.52, + "learning_rate": 1.1698148699700596e-05, + "loss": 0.0561, + "step": 53985 + }, + { + "epoch": 2.52, + "learning_rate": 1.1697364914645808e-05, + "loss": 0.1206, + "step": 53990 + }, + { + "epoch": 2.52, + "learning_rate": 1.1696581129591022e-05, + "loss": 0.1703, + "step": 53995 + }, + { + "epoch": 2.52, + "learning_rate": 1.1695797344536234e-05, + "loss": 0.1549, + "step": 54000 + }, + { + "epoch": 2.52, + "learning_rate": 1.169501355948145e-05, + "loss": 0.209, + "step": 54005 + }, + { + "epoch": 2.52, + "learning_rate": 1.1694229774426662e-05, + "loss": 0.4151, + "step": 54010 + }, + { + "epoch": 2.52, + "learning_rate": 1.1693445989371876e-05, + "loss": 0.1933, + "step": 54015 + }, + { + "epoch": 2.52, + "learning_rate": 1.169266220431709e-05, + "loss": 0.0544, + "step": 54020 + }, + { + "epoch": 2.52, + "learning_rate": 1.1691878419262302e-05, + "loss": 0.0166, + "step": 54025 + }, + { + "epoch": 2.52, + "learning_rate": 1.1691094634207516e-05, + "loss": 0.0557, + "step": 54030 + }, + { + "epoch": 2.52, + "learning_rate": 1.1690310849152728e-05, + "loss": 0.0669, + "step": 54035 + }, + { + "epoch": 2.52, + "learning_rate": 1.1689527064097944e-05, + "loss": 0.0985, + "step": 54040 + }, + { + "epoch": 2.52, + "learning_rate": 1.1688743279043156e-05, + "loss": 0.1685, + "step": 54045 + }, + { + "epoch": 2.52, + "learning_rate": 1.168795949398837e-05, + "loss": 0.1963, + "step": 54050 + }, + { + "epoch": 2.52, + "learning_rate": 1.1687175708933582e-05, + "loss": 0.192, + "step": 54055 + }, + { + "epoch": 2.52, + "learning_rate": 1.1686391923878798e-05, + "loss": 0.3119, + "step": 54060 + }, + { + "epoch": 2.52, + "learning_rate": 1.168560813882401e-05, + "loss": 0.3424, + "step": 54065 + }, + { + "epoch": 2.52, + "learning_rate": 1.1684824353769224e-05, + "loss": 0.0345, + "step": 54070 + }, + { + "epoch": 2.52, + "learning_rate": 1.1684040568714436e-05, + "loss": 0.0528, + "step": 54075 + }, + { + "epoch": 2.52, + "learning_rate": 1.1683256783659652e-05, + "loss": 0.053, + "step": 54080 + }, + { + "epoch": 2.52, + "learning_rate": 1.1682472998604864e-05, + "loss": 0.0567, + "step": 54085 + }, + { + "epoch": 2.52, + "learning_rate": 1.1681689213550076e-05, + "loss": 0.1253, + "step": 54090 + }, + { + "epoch": 2.52, + "learning_rate": 1.168090542849529e-05, + "loss": 0.1384, + "step": 54095 + }, + { + "epoch": 2.52, + "learning_rate": 1.1680121643440504e-05, + "loss": 0.1149, + "step": 54100 + }, + { + "epoch": 2.52, + "learning_rate": 1.1679337858385718e-05, + "loss": 0.1669, + "step": 54105 + }, + { + "epoch": 2.52, + "learning_rate": 1.1678710830341888e-05, + "loss": 0.1833, + "step": 54110 + }, + { + "epoch": 2.53, + "learning_rate": 1.16779270452871e-05, + "loss": 0.2386, + "step": 54115 + }, + { + "epoch": 2.53, + "learning_rate": 1.1677143260232316e-05, + "loss": 0.0729, + "step": 54120 + }, + { + "epoch": 2.53, + "learning_rate": 1.1676359475177528e-05, + "loss": 0.0523, + "step": 54125 + }, + { + "epoch": 2.53, + "learning_rate": 1.1675575690122742e-05, + "loss": 0.0632, + "step": 54130 + }, + { + "epoch": 2.53, + "learning_rate": 1.1674791905067954e-05, + "loss": 0.0702, + "step": 54135 + }, + { + "epoch": 2.53, + "learning_rate": 1.167400812001317e-05, + "loss": 0.0453, + "step": 54140 + }, + { + "epoch": 2.53, + "learning_rate": 1.1673224334958382e-05, + "loss": 0.1348, + "step": 54145 + }, + { + "epoch": 2.53, + "learning_rate": 1.1672440549903596e-05, + "loss": 0.156, + "step": 54150 + }, + { + "epoch": 2.53, + "learning_rate": 1.1671656764848808e-05, + "loss": 0.1055, + "step": 54155 + }, + { + "epoch": 2.53, + "learning_rate": 1.167087297979402e-05, + "loss": 0.1431, + "step": 54160 + }, + { + "epoch": 2.53, + "learning_rate": 1.1670089194739236e-05, + "loss": 0.3631, + "step": 54165 + }, + { + "epoch": 2.53, + "learning_rate": 1.1669305409684448e-05, + "loss": 0.0497, + "step": 54170 + }, + { + "epoch": 2.53, + "learning_rate": 1.1668521624629662e-05, + "loss": 0.0897, + "step": 54175 + }, + { + "epoch": 2.53, + "learning_rate": 1.1667737839574876e-05, + "loss": 0.0819, + "step": 54180 + }, + { + "epoch": 2.53, + "learning_rate": 1.166695405452009e-05, + "loss": 0.0764, + "step": 54185 + }, + { + "epoch": 2.53, + "learning_rate": 1.1666170269465302e-05, + "loss": 0.0894, + "step": 54190 + }, + { + "epoch": 2.53, + "learning_rate": 1.1665386484410518e-05, + "loss": 0.0866, + "step": 54195 + }, + { + "epoch": 2.53, + "learning_rate": 1.166460269935573e-05, + "loss": 0.1523, + "step": 54200 + }, + { + "epoch": 2.53, + "learning_rate": 1.1663818914300944e-05, + "loss": 0.1929, + "step": 54205 + }, + { + "epoch": 2.53, + "learning_rate": 1.1663035129246156e-05, + "loss": 0.2861, + "step": 54210 + }, + { + "epoch": 2.53, + "learning_rate": 1.1662251344191372e-05, + "loss": 0.2864, + "step": 54215 + }, + { + "epoch": 2.53, + "learning_rate": 1.1661467559136584e-05, + "loss": 0.0513, + "step": 54220 + }, + { + "epoch": 2.53, + "learning_rate": 1.1660683774081796e-05, + "loss": 0.0342, + "step": 54225 + }, + { + "epoch": 2.53, + "learning_rate": 1.165989998902701e-05, + "loss": 0.078, + "step": 54230 + }, + { + "epoch": 2.53, + "learning_rate": 1.1659116203972222e-05, + "loss": 0.0786, + "step": 54235 + }, + { + "epoch": 2.53, + "learning_rate": 1.1658332418917438e-05, + "loss": 0.1127, + "step": 54240 + }, + { + "epoch": 2.53, + "learning_rate": 1.165754863386265e-05, + "loss": 0.1296, + "step": 54245 + }, + { + "epoch": 2.53, + "learning_rate": 1.1656764848807864e-05, + "loss": 0.134, + "step": 54250 + }, + { + "epoch": 2.53, + "learning_rate": 1.1655981063753076e-05, + "loss": 0.2205, + "step": 54255 + }, + { + "epoch": 2.53, + "learning_rate": 1.1655197278698292e-05, + "loss": 0.1846, + "step": 54260 + }, + { + "epoch": 2.53, + "learning_rate": 1.1654413493643504e-05, + "loss": 0.3259, + "step": 54265 + }, + { + "epoch": 2.53, + "learning_rate": 1.1653629708588718e-05, + "loss": 0.0222, + "step": 54270 + }, + { + "epoch": 2.53, + "learning_rate": 1.165284592353393e-05, + "loss": 0.0677, + "step": 54275 + }, + { + "epoch": 2.53, + "learning_rate": 1.1652062138479146e-05, + "loss": 0.0542, + "step": 54280 + }, + { + "epoch": 2.53, + "learning_rate": 1.1651278353424358e-05, + "loss": 0.0699, + "step": 54285 + }, + { + "epoch": 2.53, + "learning_rate": 1.165049456836957e-05, + "loss": 0.0722, + "step": 54290 + }, + { + "epoch": 2.53, + "learning_rate": 1.1649710783314786e-05, + "loss": 0.1666, + "step": 54295 + }, + { + "epoch": 2.53, + "learning_rate": 1.1648926998259998e-05, + "loss": 0.2113, + "step": 54300 + }, + { + "epoch": 2.53, + "learning_rate": 1.1648143213205212e-05, + "loss": 0.1151, + "step": 54305 + }, + { + "epoch": 2.53, + "learning_rate": 1.1647359428150424e-05, + "loss": 0.2646, + "step": 54310 + }, + { + "epoch": 2.53, + "learning_rate": 1.164657564309564e-05, + "loss": 0.2441, + "step": 54315 + }, + { + "epoch": 2.53, + "learning_rate": 1.1645791858040852e-05, + "loss": 0.0764, + "step": 54320 + }, + { + "epoch": 2.53, + "learning_rate": 1.1645008072986066e-05, + "loss": 0.0424, + "step": 54325 + }, + { + "epoch": 2.54, + "learning_rate": 1.1644224287931278e-05, + "loss": 0.0538, + "step": 54330 + }, + { + "epoch": 2.54, + "learning_rate": 1.1643440502876494e-05, + "loss": 0.0319, + "step": 54335 + }, + { + "epoch": 2.54, + "learning_rate": 1.1642656717821706e-05, + "loss": 0.1016, + "step": 54340 + }, + { + "epoch": 2.54, + "learning_rate": 1.164187293276692e-05, + "loss": 0.0941, + "step": 54345 + }, + { + "epoch": 2.54, + "learning_rate": 1.1641089147712132e-05, + "loss": 0.0697, + "step": 54350 + }, + { + "epoch": 2.54, + "learning_rate": 1.1640305362657344e-05, + "loss": 0.1258, + "step": 54355 + }, + { + "epoch": 2.54, + "learning_rate": 1.163952157760256e-05, + "loss": 0.2775, + "step": 54360 + }, + { + "epoch": 2.54, + "learning_rate": 1.1638737792547772e-05, + "loss": 0.3991, + "step": 54365 + }, + { + "epoch": 2.54, + "learning_rate": 1.1637954007492986e-05, + "loss": 0.098, + "step": 54370 + }, + { + "epoch": 2.54, + "learning_rate": 1.1637170222438198e-05, + "loss": 0.0404, + "step": 54375 + }, + { + "epoch": 2.54, + "learning_rate": 1.1636386437383414e-05, + "loss": 0.0601, + "step": 54380 + }, + { + "epoch": 2.54, + "learning_rate": 1.1635602652328626e-05, + "loss": 0.0452, + "step": 54385 + }, + { + "epoch": 2.54, + "learning_rate": 1.163481886727384e-05, + "loss": 0.1061, + "step": 54390 + }, + { + "epoch": 2.54, + "learning_rate": 1.1634035082219054e-05, + "loss": 0.0819, + "step": 54395 + }, + { + "epoch": 2.54, + "learning_rate": 1.1633251297164268e-05, + "loss": 0.1277, + "step": 54400 + }, + { + "epoch": 2.54, + "learning_rate": 1.163246751210948e-05, + "loss": 0.2061, + "step": 54405 + }, + { + "epoch": 2.54, + "learning_rate": 1.1631683727054695e-05, + "loss": 0.3015, + "step": 54410 + }, + { + "epoch": 2.54, + "learning_rate": 1.1630899941999908e-05, + "loss": 0.4022, + "step": 54415 + }, + { + "epoch": 2.54, + "learning_rate": 1.163011615694512e-05, + "loss": 0.0536, + "step": 54420 + }, + { + "epoch": 2.54, + "learning_rate": 1.1629332371890334e-05, + "loss": 0.0426, + "step": 54425 + }, + { + "epoch": 2.54, + "learning_rate": 1.1628548586835546e-05, + "loss": 0.046, + "step": 54430 + }, + { + "epoch": 2.54, + "learning_rate": 1.1627764801780761e-05, + "loss": 0.0336, + "step": 54435 + }, + { + "epoch": 2.54, + "learning_rate": 1.1626981016725974e-05, + "loss": 0.0846, + "step": 54440 + }, + { + "epoch": 2.54, + "learning_rate": 1.1626197231671188e-05, + "loss": 0.0895, + "step": 54445 + }, + { + "epoch": 2.54, + "learning_rate": 1.16254134466164e-05, + "loss": 0.0969, + "step": 54450 + }, + { + "epoch": 2.54, + "learning_rate": 1.1624629661561615e-05, + "loss": 0.1187, + "step": 54455 + }, + { + "epoch": 2.54, + "learning_rate": 1.1623845876506828e-05, + "loss": 0.2181, + "step": 54460 + }, + { + "epoch": 2.54, + "learning_rate": 1.1623062091452042e-05, + "loss": 0.2948, + "step": 54465 + }, + { + "epoch": 2.54, + "learning_rate": 1.1622278306397254e-05, + "loss": 0.0419, + "step": 54470 + }, + { + "epoch": 2.54, + "learning_rate": 1.162149452134247e-05, + "loss": 0.0756, + "step": 54475 + }, + { + "epoch": 2.54, + "learning_rate": 1.1620710736287682e-05, + "loss": 0.0746, + "step": 54480 + }, + { + "epoch": 2.54, + "learning_rate": 1.1619926951232894e-05, + "loss": 0.0578, + "step": 54485 + }, + { + "epoch": 2.54, + "learning_rate": 1.1619143166178108e-05, + "loss": 0.047, + "step": 54490 + }, + { + "epoch": 2.54, + "learning_rate": 1.1618359381123322e-05, + "loss": 0.1197, + "step": 54495 + }, + { + "epoch": 2.54, + "learning_rate": 1.1617575596068535e-05, + "loss": 0.0707, + "step": 54500 + }, + { + "epoch": 2.54, + "learning_rate": 1.1616791811013748e-05, + "loss": 0.1846, + "step": 54505 + }, + { + "epoch": 2.54, + "learning_rate": 1.1616008025958963e-05, + "loss": 0.3005, + "step": 54510 + }, + { + "epoch": 2.54, + "learning_rate": 1.1615224240904176e-05, + "loss": 0.3327, + "step": 54515 + }, + { + "epoch": 2.54, + "learning_rate": 1.161444045584939e-05, + "loss": 0.0754, + "step": 54520 + }, + { + "epoch": 2.54, + "learning_rate": 1.1613656670794602e-05, + "loss": 0.0423, + "step": 54525 + }, + { + "epoch": 2.54, + "learning_rate": 1.1612872885739817e-05, + "loss": 0.0361, + "step": 54530 + }, + { + "epoch": 2.54, + "learning_rate": 1.161208910068503e-05, + "loss": 0.0925, + "step": 54535 + }, + { + "epoch": 2.54, + "learning_rate": 1.1611305315630243e-05, + "loss": 0.0593, + "step": 54540 + }, + { + "epoch": 2.55, + "learning_rate": 1.1610521530575456e-05, + "loss": 0.1092, + "step": 54545 + }, + { + "epoch": 2.55, + "learning_rate": 1.1609737745520668e-05, + "loss": 0.1291, + "step": 54550 + }, + { + "epoch": 2.55, + "learning_rate": 1.1608953960465883e-05, + "loss": 0.2171, + "step": 54555 + }, + { + "epoch": 2.55, + "learning_rate": 1.1608170175411096e-05, + "loss": 0.1797, + "step": 54560 + }, + { + "epoch": 2.55, + "learning_rate": 1.160738639035631e-05, + "loss": 0.3179, + "step": 54565 + }, + { + "epoch": 2.55, + "learning_rate": 1.1606602605301522e-05, + "loss": 0.0331, + "step": 54570 + }, + { + "epoch": 2.55, + "learning_rate": 1.1605818820246737e-05, + "loss": 0.0527, + "step": 54575 + }, + { + "epoch": 2.55, + "learning_rate": 1.160503503519195e-05, + "loss": 0.0389, + "step": 54580 + }, + { + "epoch": 2.55, + "learning_rate": 1.1604251250137163e-05, + "loss": 0.0399, + "step": 54585 + }, + { + "epoch": 2.55, + "learning_rate": 1.1603467465082376e-05, + "loss": 0.0983, + "step": 54590 + }, + { + "epoch": 2.55, + "learning_rate": 1.1602683680027591e-05, + "loss": 0.0581, + "step": 54595 + }, + { + "epoch": 2.55, + "learning_rate": 1.1601899894972803e-05, + "loss": 0.1263, + "step": 54600 + }, + { + "epoch": 2.55, + "learning_rate": 1.1601116109918017e-05, + "loss": 0.1091, + "step": 54605 + }, + { + "epoch": 2.55, + "learning_rate": 1.1600332324863231e-05, + "loss": 0.3447, + "step": 54610 + }, + { + "epoch": 2.55, + "learning_rate": 1.1599548539808443e-05, + "loss": 0.2702, + "step": 54615 + }, + { + "epoch": 2.55, + "learning_rate": 1.1598764754753657e-05, + "loss": 0.1182, + "step": 54620 + }, + { + "epoch": 2.55, + "learning_rate": 1.159798096969887e-05, + "loss": 0.0375, + "step": 54625 + }, + { + "epoch": 2.55, + "learning_rate": 1.1597197184644085e-05, + "loss": 0.0246, + "step": 54630 + }, + { + "epoch": 2.55, + "learning_rate": 1.1596413399589297e-05, + "loss": 0.0543, + "step": 54635 + }, + { + "epoch": 2.55, + "learning_rate": 1.1595629614534511e-05, + "loss": 0.0953, + "step": 54640 + }, + { + "epoch": 2.55, + "learning_rate": 1.1594845829479724e-05, + "loss": 0.206, + "step": 54645 + }, + { + "epoch": 2.55, + "learning_rate": 1.1594062044424939e-05, + "loss": 0.1144, + "step": 54650 + }, + { + "epoch": 2.55, + "learning_rate": 1.1593278259370151e-05, + "loss": 0.1205, + "step": 54655 + }, + { + "epoch": 2.55, + "learning_rate": 1.1592494474315365e-05, + "loss": 0.2163, + "step": 54660 + }, + { + "epoch": 2.55, + "learning_rate": 1.1591710689260577e-05, + "loss": 0.34, + "step": 54665 + }, + { + "epoch": 2.55, + "learning_rate": 1.1590926904205793e-05, + "loss": 0.0405, + "step": 54670 + }, + { + "epoch": 2.55, + "learning_rate": 1.1590143119151005e-05, + "loss": 0.0285, + "step": 54675 + }, + { + "epoch": 2.55, + "learning_rate": 1.1589359334096217e-05, + "loss": 0.0436, + "step": 54680 + }, + { + "epoch": 2.55, + "learning_rate": 1.1588575549041431e-05, + "loss": 0.036, + "step": 54685 + }, + { + "epoch": 2.55, + "learning_rate": 1.1587791763986644e-05, + "loss": 0.0615, + "step": 54690 + }, + { + "epoch": 2.55, + "learning_rate": 1.158700797893186e-05, + "loss": 0.073, + "step": 54695 + }, + { + "epoch": 2.55, + "learning_rate": 1.1586224193877071e-05, + "loss": 0.0988, + "step": 54700 + }, + { + "epoch": 2.55, + "learning_rate": 1.1585440408822285e-05, + "loss": 0.1362, + "step": 54705 + }, + { + "epoch": 2.55, + "learning_rate": 1.15846566237675e-05, + "loss": 0.2456, + "step": 54710 + }, + { + "epoch": 2.55, + "learning_rate": 1.1583872838712713e-05, + "loss": 0.2314, + "step": 54715 + }, + { + "epoch": 2.55, + "learning_rate": 1.1583089053657925e-05, + "loss": 0.1112, + "step": 54720 + }, + { + "epoch": 2.55, + "learning_rate": 1.1582305268603141e-05, + "loss": 0.125, + "step": 54725 + }, + { + "epoch": 2.55, + "learning_rate": 1.1581521483548353e-05, + "loss": 0.0651, + "step": 54730 + }, + { + "epoch": 2.55, + "learning_rate": 1.1580737698493567e-05, + "loss": 0.1076, + "step": 54735 + }, + { + "epoch": 2.55, + "learning_rate": 1.157995391343878e-05, + "loss": 0.0838, + "step": 54740 + }, + { + "epoch": 2.55, + "learning_rate": 1.1579170128383991e-05, + "loss": 0.0998, + "step": 54745 + }, + { + "epoch": 2.55, + "learning_rate": 1.1578386343329207e-05, + "loss": 0.0908, + "step": 54750 + }, + { + "epoch": 2.55, + "learning_rate": 1.157760255827442e-05, + "loss": 0.1566, + "step": 54755 + }, + { + "epoch": 2.56, + "learning_rate": 1.1576818773219633e-05, + "loss": 0.325, + "step": 54760 + }, + { + "epoch": 2.56, + "learning_rate": 1.1576034988164845e-05, + "loss": 0.3982, + "step": 54765 + }, + { + "epoch": 2.56, + "learning_rate": 1.1575251203110061e-05, + "loss": 0.0304, + "step": 54770 + }, + { + "epoch": 2.56, + "learning_rate": 1.1574467418055273e-05, + "loss": 0.0603, + "step": 54775 + }, + { + "epoch": 2.56, + "learning_rate": 1.1573683633000487e-05, + "loss": 0.0578, + "step": 54780 + }, + { + "epoch": 2.56, + "learning_rate": 1.15728998479457e-05, + "loss": 0.0566, + "step": 54785 + }, + { + "epoch": 2.56, + "learning_rate": 1.1572116062890915e-05, + "loss": 0.0771, + "step": 54790 + }, + { + "epoch": 2.56, + "learning_rate": 1.1571332277836127e-05, + "loss": 0.1294, + "step": 54795 + }, + { + "epoch": 2.56, + "learning_rate": 1.1570548492781341e-05, + "loss": 0.1138, + "step": 54800 + }, + { + "epoch": 2.56, + "learning_rate": 1.1569764707726553e-05, + "loss": 0.1218, + "step": 54805 + }, + { + "epoch": 2.56, + "learning_rate": 1.1568980922671767e-05, + "loss": 0.256, + "step": 54810 + }, + { + "epoch": 2.56, + "learning_rate": 1.1568197137616981e-05, + "loss": 0.3657, + "step": 54815 + }, + { + "epoch": 2.56, + "learning_rate": 1.1567413352562193e-05, + "loss": 0.0775, + "step": 54820 + }, + { + "epoch": 2.56, + "learning_rate": 1.1566629567507409e-05, + "loss": 0.0329, + "step": 54825 + }, + { + "epoch": 2.56, + "learning_rate": 1.1565845782452621e-05, + "loss": 0.02, + "step": 54830 + }, + { + "epoch": 2.56, + "learning_rate": 1.1565061997397835e-05, + "loss": 0.0346, + "step": 54835 + }, + { + "epoch": 2.56, + "learning_rate": 1.1564278212343047e-05, + "loss": 0.0832, + "step": 54840 + }, + { + "epoch": 2.56, + "learning_rate": 1.1563494427288263e-05, + "loss": 0.0888, + "step": 54845 + }, + { + "epoch": 2.56, + "learning_rate": 1.1562710642233475e-05, + "loss": 0.1356, + "step": 54850 + }, + { + "epoch": 2.56, + "learning_rate": 1.1561926857178689e-05, + "loss": 0.1574, + "step": 54855 + }, + { + "epoch": 2.56, + "learning_rate": 1.1561143072123901e-05, + "loss": 0.2213, + "step": 54860 + }, + { + "epoch": 2.56, + "learning_rate": 1.1560359287069117e-05, + "loss": 0.2773, + "step": 54865 + }, + { + "epoch": 2.56, + "learning_rate": 1.1559575502014329e-05, + "loss": 0.073, + "step": 54870 + }, + { + "epoch": 2.56, + "learning_rate": 1.1558791716959541e-05, + "loss": 0.0188, + "step": 54875 + }, + { + "epoch": 2.56, + "learning_rate": 1.1558007931904755e-05, + "loss": 0.0895, + "step": 54880 + }, + { + "epoch": 2.56, + "learning_rate": 1.1557224146849967e-05, + "loss": 0.0839, + "step": 54885 + }, + { + "epoch": 2.56, + "learning_rate": 1.1556440361795183e-05, + "loss": 0.0635, + "step": 54890 + }, + { + "epoch": 2.56, + "learning_rate": 1.1555656576740395e-05, + "loss": 0.0701, + "step": 54895 + }, + { + "epoch": 2.56, + "learning_rate": 1.1554872791685609e-05, + "loss": 0.1132, + "step": 54900 + }, + { + "epoch": 2.56, + "learning_rate": 1.1554089006630821e-05, + "loss": 0.1373, + "step": 54905 + }, + { + "epoch": 2.56, + "learning_rate": 1.1553305221576037e-05, + "loss": 0.1604, + "step": 54910 + }, + { + "epoch": 2.56, + "learning_rate": 1.1552521436521249e-05, + "loss": 0.3499, + "step": 54915 + }, + { + "epoch": 2.56, + "learning_rate": 1.1551737651466463e-05, + "loss": 0.0524, + "step": 54920 + }, + { + "epoch": 2.56, + "learning_rate": 1.1550953866411677e-05, + "loss": 0.026, + "step": 54925 + }, + { + "epoch": 2.56, + "learning_rate": 1.155017008135689e-05, + "loss": 0.0591, + "step": 54930 + }, + { + "epoch": 2.56, + "learning_rate": 1.1549386296302103e-05, + "loss": 0.1202, + "step": 54935 + }, + { + "epoch": 2.56, + "learning_rate": 1.1548602511247315e-05, + "loss": 0.0974, + "step": 54940 + }, + { + "epoch": 2.56, + "learning_rate": 1.154781872619253e-05, + "loss": 0.1075, + "step": 54945 + }, + { + "epoch": 2.56, + "learning_rate": 1.1547034941137743e-05, + "loss": 0.588, + "step": 54950 + }, + { + "epoch": 2.56, + "learning_rate": 1.1546251156082957e-05, + "loss": 0.1737, + "step": 54955 + }, + { + "epoch": 2.56, + "learning_rate": 1.1545467371028169e-05, + "loss": 0.2966, + "step": 54960 + }, + { + "epoch": 2.56, + "learning_rate": 1.1544683585973385e-05, + "loss": 0.1925, + "step": 54965 + }, + { + "epoch": 2.56, + "learning_rate": 1.1543899800918597e-05, + "loss": 0.0461, + "step": 54970 + }, + { + "epoch": 2.57, + "learning_rate": 1.154311601586381e-05, + "loss": 0.0533, + "step": 54975 + }, + { + "epoch": 2.57, + "learning_rate": 1.1542332230809023e-05, + "loss": 0.0946, + "step": 54980 + }, + { + "epoch": 2.57, + "learning_rate": 1.1541548445754239e-05, + "loss": 0.0699, + "step": 54985 + }, + { + "epoch": 2.57, + "learning_rate": 1.154076466069945e-05, + "loss": 0.125, + "step": 54990 + }, + { + "epoch": 2.57, + "learning_rate": 1.1539980875644665e-05, + "loss": 0.0972, + "step": 54995 + }, + { + "epoch": 2.57, + "learning_rate": 1.1539197090589877e-05, + "loss": 0.2018, + "step": 55000 + }, + { + "epoch": 2.57, + "learning_rate": 1.153841330553509e-05, + "loss": 0.2578, + "step": 55005 + }, + { + "epoch": 2.57, + "learning_rate": 1.1537629520480305e-05, + "loss": 0.2174, + "step": 55010 + }, + { + "epoch": 2.57, + "learning_rate": 1.1536845735425517e-05, + "loss": 0.2747, + "step": 55015 + }, + { + "epoch": 2.57, + "learning_rate": 1.1536061950370731e-05, + "loss": 0.1615, + "step": 55020 + }, + { + "epoch": 2.57, + "learning_rate": 1.1535278165315945e-05, + "loss": 0.034, + "step": 55025 + }, + { + "epoch": 2.57, + "learning_rate": 1.1534494380261159e-05, + "loss": 0.101, + "step": 55030 + }, + { + "epoch": 2.57, + "learning_rate": 1.1533710595206371e-05, + "loss": 0.1131, + "step": 55035 + }, + { + "epoch": 2.57, + "learning_rate": 1.1532926810151586e-05, + "loss": 0.142, + "step": 55040 + }, + { + "epoch": 2.57, + "learning_rate": 1.1532143025096799e-05, + "loss": 0.0432, + "step": 55045 + }, + { + "epoch": 2.57, + "learning_rate": 1.1531359240042013e-05, + "loss": 0.1238, + "step": 55050 + }, + { + "epoch": 2.57, + "learning_rate": 1.1530575454987225e-05, + "loss": 0.1882, + "step": 55055 + }, + { + "epoch": 2.57, + "learning_rate": 1.152979166993244e-05, + "loss": 0.1863, + "step": 55060 + }, + { + "epoch": 2.57, + "learning_rate": 1.1529007884877653e-05, + "loss": 0.3653, + "step": 55065 + }, + { + "epoch": 2.57, + "learning_rate": 1.1528224099822865e-05, + "loss": 0.043, + "step": 55070 + }, + { + "epoch": 2.57, + "learning_rate": 1.1527440314768079e-05, + "loss": 0.0351, + "step": 55075 + }, + { + "epoch": 2.57, + "learning_rate": 1.1526656529713291e-05, + "loss": 0.0739, + "step": 55080 + }, + { + "epoch": 2.57, + "learning_rate": 1.1525872744658507e-05, + "loss": 0.0644, + "step": 55085 + }, + { + "epoch": 2.57, + "learning_rate": 1.1525088959603719e-05, + "loss": 0.1164, + "step": 55090 + }, + { + "epoch": 2.57, + "learning_rate": 1.1524305174548933e-05, + "loss": 0.1265, + "step": 55095 + }, + { + "epoch": 2.57, + "learning_rate": 1.1523521389494145e-05, + "loss": 0.0871, + "step": 55100 + }, + { + "epoch": 2.57, + "learning_rate": 1.152273760443936e-05, + "loss": 0.1264, + "step": 55105 + }, + { + "epoch": 2.57, + "learning_rate": 1.1521953819384573e-05, + "loss": 0.3587, + "step": 55110 + }, + { + "epoch": 2.57, + "learning_rate": 1.1521170034329787e-05, + "loss": 0.4438, + "step": 55115 + }, + { + "epoch": 2.57, + "learning_rate": 1.1520386249274999e-05, + "loss": 0.0607, + "step": 55120 + }, + { + "epoch": 2.57, + "learning_rate": 1.1519602464220214e-05, + "loss": 0.0342, + "step": 55125 + }, + { + "epoch": 2.57, + "learning_rate": 1.1518818679165427e-05, + "loss": 0.022, + "step": 55130 + }, + { + "epoch": 2.57, + "learning_rate": 1.1518034894110639e-05, + "loss": 0.0265, + "step": 55135 + }, + { + "epoch": 2.57, + "learning_rate": 1.1517251109055854e-05, + "loss": 0.0946, + "step": 55140 + }, + { + "epoch": 2.57, + "learning_rate": 1.1516467324001067e-05, + "loss": 0.1235, + "step": 55145 + }, + { + "epoch": 2.57, + "learning_rate": 1.151568353894628e-05, + "loss": 0.2051, + "step": 55150 + }, + { + "epoch": 2.57, + "learning_rate": 1.1514899753891493e-05, + "loss": 0.2828, + "step": 55155 + }, + { + "epoch": 2.57, + "learning_rate": 1.1514115968836708e-05, + "loss": 0.2089, + "step": 55160 + }, + { + "epoch": 2.57, + "learning_rate": 1.151333218378192e-05, + "loss": 0.2756, + "step": 55165 + }, + { + "epoch": 2.57, + "learning_rate": 1.1512548398727134e-05, + "loss": 0.0753, + "step": 55170 + }, + { + "epoch": 2.57, + "learning_rate": 1.1511764613672347e-05, + "loss": 0.0189, + "step": 55175 + }, + { + "epoch": 2.57, + "learning_rate": 1.1510980828617562e-05, + "loss": 0.0486, + "step": 55180 + }, + { + "epoch": 2.58, + "learning_rate": 1.1510197043562775e-05, + "loss": 0.0668, + "step": 55185 + }, + { + "epoch": 2.58, + "learning_rate": 1.1509413258507988e-05, + "loss": 0.0484, + "step": 55190 + }, + { + "epoch": 2.58, + "learning_rate": 1.15086294734532e-05, + "loss": 0.2344, + "step": 55195 + }, + { + "epoch": 2.58, + "learning_rate": 1.1507845688398413e-05, + "loss": 0.1022, + "step": 55200 + }, + { + "epoch": 2.58, + "learning_rate": 1.1507061903343628e-05, + "loss": 0.2039, + "step": 55205 + }, + { + "epoch": 2.58, + "learning_rate": 1.150627811828884e-05, + "loss": 0.3759, + "step": 55210 + }, + { + "epoch": 2.58, + "learning_rate": 1.1505494333234055e-05, + "loss": 0.3593, + "step": 55215 + }, + { + "epoch": 2.58, + "learning_rate": 1.1504710548179267e-05, + "loss": 0.0577, + "step": 55220 + }, + { + "epoch": 2.58, + "learning_rate": 1.1503926763124482e-05, + "loss": 0.0329, + "step": 55225 + }, + { + "epoch": 2.58, + "learning_rate": 1.1503142978069695e-05, + "loss": 0.0644, + "step": 55230 + }, + { + "epoch": 2.58, + "learning_rate": 1.1502359193014908e-05, + "loss": 0.0507, + "step": 55235 + }, + { + "epoch": 2.58, + "learning_rate": 1.1501575407960122e-05, + "loss": 0.0605, + "step": 55240 + }, + { + "epoch": 2.58, + "learning_rate": 1.1500791622905336e-05, + "loss": 0.1677, + "step": 55245 + }, + { + "epoch": 2.58, + "learning_rate": 1.1500007837850549e-05, + "loss": 0.1497, + "step": 55250 + }, + { + "epoch": 2.58, + "learning_rate": 1.1499224052795762e-05, + "loss": 0.1236, + "step": 55255 + }, + { + "epoch": 2.58, + "learning_rate": 1.1498440267740976e-05, + "loss": 0.2027, + "step": 55260 + }, + { + "epoch": 2.58, + "learning_rate": 1.1497656482686189e-05, + "loss": 0.3551, + "step": 55265 + }, + { + "epoch": 2.58, + "learning_rate": 1.1496872697631402e-05, + "loss": 0.1194, + "step": 55270 + }, + { + "epoch": 2.58, + "learning_rate": 1.1496088912576615e-05, + "loss": 0.0431, + "step": 55275 + }, + { + "epoch": 2.58, + "learning_rate": 1.149530512752183e-05, + "loss": 0.0716, + "step": 55280 + }, + { + "epoch": 2.58, + "learning_rate": 1.1494521342467042e-05, + "loss": 0.0789, + "step": 55285 + }, + { + "epoch": 2.58, + "learning_rate": 1.1493737557412256e-05, + "loss": 0.0781, + "step": 55290 + }, + { + "epoch": 2.58, + "learning_rate": 1.1492953772357469e-05, + "loss": 0.0786, + "step": 55295 + }, + { + "epoch": 2.58, + "learning_rate": 1.1492169987302684e-05, + "loss": 0.1363, + "step": 55300 + }, + { + "epoch": 2.58, + "learning_rate": 1.1491386202247896e-05, + "loss": 0.2028, + "step": 55305 + }, + { + "epoch": 2.58, + "learning_rate": 1.149060241719311e-05, + "loss": 0.1638, + "step": 55310 + }, + { + "epoch": 2.58, + "learning_rate": 1.1489818632138323e-05, + "loss": 0.2609, + "step": 55315 + }, + { + "epoch": 2.58, + "learning_rate": 1.1489034847083538e-05, + "loss": 0.0372, + "step": 55320 + }, + { + "epoch": 2.58, + "learning_rate": 1.148825106202875e-05, + "loss": 0.0139, + "step": 55325 + }, + { + "epoch": 2.58, + "learning_rate": 1.1487467276973963e-05, + "loss": 0.0547, + "step": 55330 + }, + { + "epoch": 2.58, + "learning_rate": 1.1486683491919176e-05, + "loss": 0.0491, + "step": 55335 + }, + { + "epoch": 2.58, + "learning_rate": 1.148589970686439e-05, + "loss": 0.0403, + "step": 55340 + }, + { + "epoch": 2.58, + "learning_rate": 1.1485115921809604e-05, + "loss": 0.1877, + "step": 55345 + }, + { + "epoch": 2.58, + "learning_rate": 1.1484332136754816e-05, + "loss": 0.1706, + "step": 55350 + }, + { + "epoch": 2.58, + "learning_rate": 1.1483548351700032e-05, + "loss": 0.1843, + "step": 55355 + }, + { + "epoch": 2.58, + "learning_rate": 1.1482764566645244e-05, + "loss": 0.2677, + "step": 55360 + }, + { + "epoch": 2.58, + "learning_rate": 1.1481980781590458e-05, + "loss": 0.1672, + "step": 55365 + }, + { + "epoch": 2.58, + "learning_rate": 1.148119699653567e-05, + "loss": 0.0489, + "step": 55370 + }, + { + "epoch": 2.58, + "learning_rate": 1.1480413211480886e-05, + "loss": 0.0291, + "step": 55375 + }, + { + "epoch": 2.58, + "learning_rate": 1.1479629426426098e-05, + "loss": 0.034, + "step": 55380 + }, + { + "epoch": 2.58, + "learning_rate": 1.1478845641371312e-05, + "loss": 0.1155, + "step": 55385 + }, + { + "epoch": 2.58, + "learning_rate": 1.1478061856316524e-05, + "loss": 0.1148, + "step": 55390 + }, + { + "epoch": 2.58, + "learning_rate": 1.1477278071261737e-05, + "loss": 0.1079, + "step": 55395 + }, + { + "epoch": 2.59, + "learning_rate": 1.1476494286206952e-05, + "loss": 0.1543, + "step": 55400 + }, + { + "epoch": 2.59, + "learning_rate": 1.1475710501152164e-05, + "loss": 0.1308, + "step": 55405 + }, + { + "epoch": 2.59, + "learning_rate": 1.1474926716097378e-05, + "loss": 0.3201, + "step": 55410 + }, + { + "epoch": 2.59, + "learning_rate": 1.147414293104259e-05, + "loss": 0.1922, + "step": 55415 + }, + { + "epoch": 2.59, + "learning_rate": 1.1473359145987806e-05, + "loss": 0.056, + "step": 55420 + }, + { + "epoch": 2.59, + "learning_rate": 1.1472575360933018e-05, + "loss": 0.0477, + "step": 55425 + }, + { + "epoch": 2.59, + "learning_rate": 1.1471791575878232e-05, + "loss": 0.0765, + "step": 55430 + }, + { + "epoch": 2.59, + "learning_rate": 1.1471007790823444e-05, + "loss": 0.0625, + "step": 55435 + }, + { + "epoch": 2.59, + "learning_rate": 1.147022400576866e-05, + "loss": 0.0535, + "step": 55440 + }, + { + "epoch": 2.59, + "learning_rate": 1.1469440220713872e-05, + "loss": 0.1265, + "step": 55445 + }, + { + "epoch": 2.59, + "learning_rate": 1.1468656435659086e-05, + "loss": 0.0814, + "step": 55450 + }, + { + "epoch": 2.59, + "learning_rate": 1.14678726506043e-05, + "loss": 0.1643, + "step": 55455 + }, + { + "epoch": 2.59, + "learning_rate": 1.1467088865549512e-05, + "loss": 0.3128, + "step": 55460 + }, + { + "epoch": 2.59, + "learning_rate": 1.1466305080494726e-05, + "loss": 0.2313, + "step": 55465 + }, + { + "epoch": 2.59, + "learning_rate": 1.1465521295439938e-05, + "loss": 0.0514, + "step": 55470 + }, + { + "epoch": 2.59, + "learning_rate": 1.1464737510385154e-05, + "loss": 0.0917, + "step": 55475 + }, + { + "epoch": 2.59, + "learning_rate": 1.1463953725330366e-05, + "loss": 0.0453, + "step": 55480 + }, + { + "epoch": 2.59, + "learning_rate": 1.146316994027558e-05, + "loss": 0.0833, + "step": 55485 + }, + { + "epoch": 2.59, + "learning_rate": 1.1462386155220792e-05, + "loss": 0.1032, + "step": 55490 + }, + { + "epoch": 2.59, + "learning_rate": 1.1461602370166008e-05, + "loss": 0.1346, + "step": 55495 + }, + { + "epoch": 2.59, + "learning_rate": 1.146081858511122e-05, + "loss": 0.1739, + "step": 55500 + }, + { + "epoch": 2.59, + "learning_rate": 1.1460034800056434e-05, + "loss": 0.2011, + "step": 55505 + }, + { + "epoch": 2.59, + "learning_rate": 1.1459251015001646e-05, + "loss": 0.1148, + "step": 55510 + }, + { + "epoch": 2.59, + "learning_rate": 1.1458467229946862e-05, + "loss": 0.2026, + "step": 55515 + }, + { + "epoch": 2.59, + "learning_rate": 1.1457683444892074e-05, + "loss": 0.0575, + "step": 55520 + }, + { + "epoch": 2.59, + "learning_rate": 1.1456899659837286e-05, + "loss": 0.0359, + "step": 55525 + }, + { + "epoch": 2.59, + "learning_rate": 1.14561158747825e-05, + "loss": 0.0556, + "step": 55530 + }, + { + "epoch": 2.59, + "learning_rate": 1.1455332089727712e-05, + "loss": 0.0612, + "step": 55535 + }, + { + "epoch": 2.59, + "learning_rate": 1.1454548304672928e-05, + "loss": 0.0526, + "step": 55540 + }, + { + "epoch": 2.59, + "learning_rate": 1.145376451961814e-05, + "loss": 0.0315, + "step": 55545 + }, + { + "epoch": 2.59, + "learning_rate": 1.1452980734563354e-05, + "loss": 0.144, + "step": 55550 + }, + { + "epoch": 2.59, + "learning_rate": 1.1452196949508568e-05, + "loss": 0.1281, + "step": 55555 + }, + { + "epoch": 2.59, + "learning_rate": 1.1451413164453782e-05, + "loss": 0.2331, + "step": 55560 + }, + { + "epoch": 2.59, + "learning_rate": 1.1450629379398994e-05, + "loss": 0.2845, + "step": 55565 + }, + { + "epoch": 2.59, + "learning_rate": 1.1449845594344208e-05, + "loss": 0.0317, + "step": 55570 + }, + { + "epoch": 2.59, + "learning_rate": 1.1449061809289422e-05, + "loss": 0.03, + "step": 55575 + }, + { + "epoch": 2.59, + "learning_rate": 1.1448278024234636e-05, + "loss": 0.0739, + "step": 55580 + }, + { + "epoch": 2.59, + "learning_rate": 1.1447494239179848e-05, + "loss": 0.1004, + "step": 55585 + }, + { + "epoch": 2.59, + "learning_rate": 1.144671045412506e-05, + "loss": 0.1453, + "step": 55590 + }, + { + "epoch": 2.59, + "learning_rate": 1.1445926669070276e-05, + "loss": 0.2057, + "step": 55595 + }, + { + "epoch": 2.59, + "learning_rate": 1.1445142884015488e-05, + "loss": 0.2203, + "step": 55600 + }, + { + "epoch": 2.59, + "learning_rate": 1.1444359098960702e-05, + "loss": 0.1848, + "step": 55605 + }, + { + "epoch": 2.59, + "learning_rate": 1.1443575313905914e-05, + "loss": 0.2693, + "step": 55610 + }, + { + "epoch": 2.6, + "learning_rate": 1.144279152885113e-05, + "loss": 0.2025, + "step": 55615 + }, + { + "epoch": 2.6, + "learning_rate": 1.1442007743796342e-05, + "loss": 0.0353, + "step": 55620 + }, + { + "epoch": 2.6, + "learning_rate": 1.1441223958741556e-05, + "loss": 0.0815, + "step": 55625 + }, + { + "epoch": 2.6, + "learning_rate": 1.1440440173686768e-05, + "loss": 0.0724, + "step": 55630 + }, + { + "epoch": 2.6, + "learning_rate": 1.1439656388631984e-05, + "loss": 0.0461, + "step": 55635 + }, + { + "epoch": 2.6, + "learning_rate": 1.1438872603577196e-05, + "loss": 0.1578, + "step": 55640 + }, + { + "epoch": 2.6, + "learning_rate": 1.143808881852241e-05, + "loss": 0.091, + "step": 55645 + }, + { + "epoch": 2.6, + "learning_rate": 1.1437305033467622e-05, + "loss": 0.1293, + "step": 55650 + }, + { + "epoch": 2.6, + "learning_rate": 1.1436521248412836e-05, + "loss": 0.2032, + "step": 55655 + }, + { + "epoch": 2.6, + "learning_rate": 1.143573746335805e-05, + "loss": 0.2329, + "step": 55660 + }, + { + "epoch": 2.6, + "learning_rate": 1.1434953678303262e-05, + "loss": 0.246, + "step": 55665 + }, + { + "epoch": 2.6, + "learning_rate": 1.1434169893248478e-05, + "loss": 0.0633, + "step": 55670 + }, + { + "epoch": 2.6, + "learning_rate": 1.143338610819369e-05, + "loss": 0.0702, + "step": 55675 + }, + { + "epoch": 2.6, + "learning_rate": 1.1432602323138904e-05, + "loss": 0.0746, + "step": 55680 + }, + { + "epoch": 2.6, + "learning_rate": 1.1431818538084116e-05, + "loss": 0.0994, + "step": 55685 + }, + { + "epoch": 2.6, + "learning_rate": 1.1431034753029332e-05, + "loss": 0.0673, + "step": 55690 + }, + { + "epoch": 2.6, + "learning_rate": 1.1430250967974544e-05, + "loss": 0.1014, + "step": 55695 + }, + { + "epoch": 2.6, + "learning_rate": 1.1429467182919758e-05, + "loss": 0.1598, + "step": 55700 + }, + { + "epoch": 2.6, + "learning_rate": 1.142868339786497e-05, + "loss": 0.1542, + "step": 55705 + }, + { + "epoch": 2.6, + "learning_rate": 1.1427899612810185e-05, + "loss": 0.1683, + "step": 55710 + }, + { + "epoch": 2.6, + "learning_rate": 1.1427115827755398e-05, + "loss": 0.3066, + "step": 55715 + }, + { + "epoch": 2.6, + "learning_rate": 1.142633204270061e-05, + "loss": 0.0337, + "step": 55720 + }, + { + "epoch": 2.6, + "learning_rate": 1.1425548257645824e-05, + "loss": 0.0113, + "step": 55725 + }, + { + "epoch": 2.6, + "learning_rate": 1.1424764472591036e-05, + "loss": 0.0901, + "step": 55730 + }, + { + "epoch": 2.6, + "learning_rate": 1.1423980687536252e-05, + "loss": 0.0709, + "step": 55735 + }, + { + "epoch": 2.6, + "learning_rate": 1.1423196902481464e-05, + "loss": 0.0917, + "step": 55740 + }, + { + "epoch": 2.6, + "learning_rate": 1.1422413117426678e-05, + "loss": 0.0923, + "step": 55745 + }, + { + "epoch": 2.6, + "learning_rate": 1.142162933237189e-05, + "loss": 0.1011, + "step": 55750 + }, + { + "epoch": 2.6, + "learning_rate": 1.1420845547317106e-05, + "loss": 0.2065, + "step": 55755 + }, + { + "epoch": 2.6, + "learning_rate": 1.1420061762262318e-05, + "loss": 0.2262, + "step": 55760 + }, + { + "epoch": 2.6, + "learning_rate": 1.1419277977207532e-05, + "loss": 0.3833, + "step": 55765 + }, + { + "epoch": 2.6, + "learning_rate": 1.1418494192152746e-05, + "loss": 0.0298, + "step": 55770 + }, + { + "epoch": 2.6, + "learning_rate": 1.141771040709796e-05, + "loss": 0.0199, + "step": 55775 + }, + { + "epoch": 2.6, + "learning_rate": 1.1416926622043172e-05, + "loss": 0.0608, + "step": 55780 + }, + { + "epoch": 2.6, + "learning_rate": 1.1416142836988384e-05, + "loss": 0.0674, + "step": 55785 + }, + { + "epoch": 2.6, + "learning_rate": 1.14153590519336e-05, + "loss": 0.05, + "step": 55790 + }, + { + "epoch": 2.6, + "learning_rate": 1.1414575266878812e-05, + "loss": 0.4276, + "step": 55795 + }, + { + "epoch": 2.6, + "learning_rate": 1.1413791481824026e-05, + "loss": 0.1672, + "step": 55800 + }, + { + "epoch": 2.6, + "learning_rate": 1.1413007696769238e-05, + "loss": 0.2332, + "step": 55805 + }, + { + "epoch": 2.6, + "learning_rate": 1.1412223911714453e-05, + "loss": 0.2378, + "step": 55810 + }, + { + "epoch": 2.6, + "learning_rate": 1.1411440126659666e-05, + "loss": 0.3594, + "step": 55815 + }, + { + "epoch": 2.6, + "learning_rate": 1.141065634160488e-05, + "loss": 0.0743, + "step": 55820 + }, + { + "epoch": 2.6, + "learning_rate": 1.1409872556550092e-05, + "loss": 0.0567, + "step": 55825 + }, + { + "epoch": 2.61, + "learning_rate": 1.1409088771495307e-05, + "loss": 0.0386, + "step": 55830 + }, + { + "epoch": 2.61, + "learning_rate": 1.140830498644052e-05, + "loss": 0.0708, + "step": 55835 + }, + { + "epoch": 2.61, + "learning_rate": 1.1407521201385733e-05, + "loss": 0.1147, + "step": 55840 + }, + { + "epoch": 2.61, + "learning_rate": 1.1406737416330946e-05, + "loss": 0.107, + "step": 55845 + }, + { + "epoch": 2.61, + "learning_rate": 1.1405953631276158e-05, + "loss": 0.1496, + "step": 55850 + }, + { + "epoch": 2.61, + "learning_rate": 1.1405169846221374e-05, + "loss": 0.1711, + "step": 55855 + }, + { + "epoch": 2.61, + "learning_rate": 1.1404386061166586e-05, + "loss": 0.1991, + "step": 55860 + }, + { + "epoch": 2.61, + "learning_rate": 1.14036022761118e-05, + "loss": 0.2534, + "step": 55865 + }, + { + "epoch": 2.61, + "learning_rate": 1.1402818491057014e-05, + "loss": 0.0475, + "step": 55870 + }, + { + "epoch": 2.61, + "learning_rate": 1.1402034706002227e-05, + "loss": 0.0586, + "step": 55875 + }, + { + "epoch": 2.61, + "learning_rate": 1.140125092094744e-05, + "loss": 0.0615, + "step": 55880 + }, + { + "epoch": 2.61, + "learning_rate": 1.1400467135892655e-05, + "loss": 0.0973, + "step": 55885 + }, + { + "epoch": 2.61, + "learning_rate": 1.1399683350837867e-05, + "loss": 0.0871, + "step": 55890 + }, + { + "epoch": 2.61, + "learning_rate": 1.1398899565783081e-05, + "loss": 0.0739, + "step": 55895 + }, + { + "epoch": 2.61, + "learning_rate": 1.1398115780728294e-05, + "loss": 0.1811, + "step": 55900 + }, + { + "epoch": 2.61, + "learning_rate": 1.139733199567351e-05, + "loss": 0.1679, + "step": 55905 + }, + { + "epoch": 2.61, + "learning_rate": 1.1396548210618721e-05, + "loss": 0.2377, + "step": 55910 + }, + { + "epoch": 2.61, + "learning_rate": 1.1395764425563934e-05, + "loss": 0.29, + "step": 55915 + }, + { + "epoch": 2.61, + "learning_rate": 1.1394980640509148e-05, + "loss": 0.0498, + "step": 55920 + }, + { + "epoch": 2.61, + "learning_rate": 1.139419685545436e-05, + "loss": 0.0492, + "step": 55925 + }, + { + "epoch": 2.61, + "learning_rate": 1.1393413070399575e-05, + "loss": 0.0697, + "step": 55930 + }, + { + "epoch": 2.61, + "learning_rate": 1.1392629285344788e-05, + "loss": 0.0466, + "step": 55935 + }, + { + "epoch": 2.61, + "learning_rate": 1.1391845500290001e-05, + "loss": 0.0723, + "step": 55940 + }, + { + "epoch": 2.61, + "learning_rate": 1.1391061715235214e-05, + "loss": 0.0772, + "step": 55945 + }, + { + "epoch": 2.61, + "learning_rate": 1.139027793018043e-05, + "loss": 0.1073, + "step": 55950 + }, + { + "epoch": 2.61, + "learning_rate": 1.1389494145125641e-05, + "loss": 0.1866, + "step": 55955 + }, + { + "epoch": 2.61, + "learning_rate": 1.1388710360070855e-05, + "loss": 0.3105, + "step": 55960 + }, + { + "epoch": 2.61, + "learning_rate": 1.1387926575016068e-05, + "loss": 0.3204, + "step": 55965 + }, + { + "epoch": 2.61, + "learning_rate": 1.1387142789961283e-05, + "loss": 0.0536, + "step": 55970 + }, + { + "epoch": 2.61, + "learning_rate": 1.1386359004906495e-05, + "loss": 0.0331, + "step": 55975 + }, + { + "epoch": 2.61, + "learning_rate": 1.1385575219851708e-05, + "loss": 0.0336, + "step": 55980 + }, + { + "epoch": 2.61, + "learning_rate": 1.1384791434796923e-05, + "loss": 0.0781, + "step": 55985 + }, + { + "epoch": 2.61, + "learning_rate": 1.1384007649742135e-05, + "loss": 0.0902, + "step": 55990 + }, + { + "epoch": 2.61, + "learning_rate": 1.138322386468735e-05, + "loss": 0.1057, + "step": 55995 + }, + { + "epoch": 2.61, + "learning_rate": 1.1382440079632562e-05, + "loss": 0.1152, + "step": 56000 + }, + { + "epoch": 2.61, + "learning_rate": 1.1381656294577777e-05, + "loss": 0.1095, + "step": 56005 + }, + { + "epoch": 2.61, + "learning_rate": 1.138087250952299e-05, + "loss": 0.2313, + "step": 56010 + }, + { + "epoch": 2.61, + "learning_rate": 1.1380088724468203e-05, + "loss": 0.3165, + "step": 56015 + }, + { + "epoch": 2.61, + "learning_rate": 1.1379304939413415e-05, + "loss": 0.0596, + "step": 56020 + }, + { + "epoch": 2.61, + "learning_rate": 1.1378521154358631e-05, + "loss": 0.0576, + "step": 56025 + }, + { + "epoch": 2.61, + "learning_rate": 1.1377737369303843e-05, + "loss": 0.0679, + "step": 56030 + }, + { + "epoch": 2.61, + "learning_rate": 1.1376953584249057e-05, + "loss": 0.0265, + "step": 56035 + }, + { + "epoch": 2.61, + "learning_rate": 1.137616979919427e-05, + "loss": 0.0645, + "step": 56040 + }, + { + "epoch": 2.62, + "learning_rate": 1.1375386014139482e-05, + "loss": 0.1304, + "step": 56045 + }, + { + "epoch": 2.62, + "learning_rate": 1.1374602229084697e-05, + "loss": 0.2927, + "step": 56050 + }, + { + "epoch": 2.62, + "learning_rate": 1.137381844402991e-05, + "loss": 0.1191, + "step": 56055 + }, + { + "epoch": 2.62, + "learning_rate": 1.1373034658975123e-05, + "loss": 0.2488, + "step": 56060 + }, + { + "epoch": 2.62, + "learning_rate": 1.1372250873920336e-05, + "loss": 0.1988, + "step": 56065 + }, + { + "epoch": 2.62, + "learning_rate": 1.1371467088865551e-05, + "loss": 0.0326, + "step": 56070 + }, + { + "epoch": 2.62, + "learning_rate": 1.1370683303810763e-05, + "loss": 0.0501, + "step": 56075 + }, + { + "epoch": 2.62, + "learning_rate": 1.1369899518755977e-05, + "loss": 0.0934, + "step": 56080 + }, + { + "epoch": 2.62, + "learning_rate": 1.1369115733701191e-05, + "loss": 0.0529, + "step": 56085 + }, + { + "epoch": 2.62, + "learning_rate": 1.1368331948646405e-05, + "loss": 0.1376, + "step": 56090 + }, + { + "epoch": 2.62, + "learning_rate": 1.1367548163591617e-05, + "loss": 0.0725, + "step": 56095 + }, + { + "epoch": 2.62, + "learning_rate": 1.1366764378536831e-05, + "loss": 0.1274, + "step": 56100 + }, + { + "epoch": 2.62, + "learning_rate": 1.1365980593482045e-05, + "loss": 0.1481, + "step": 56105 + }, + { + "epoch": 2.62, + "learning_rate": 1.1365196808427257e-05, + "loss": 0.1959, + "step": 56110 + }, + { + "epoch": 2.62, + "learning_rate": 1.1364413023372471e-05, + "loss": 0.1771, + "step": 56115 + }, + { + "epoch": 2.62, + "learning_rate": 1.1363629238317683e-05, + "loss": 0.0172, + "step": 56120 + }, + { + "epoch": 2.62, + "learning_rate": 1.1362845453262899e-05, + "loss": 0.0531, + "step": 56125 + }, + { + "epoch": 2.62, + "learning_rate": 1.1362061668208111e-05, + "loss": 0.0984, + "step": 56130 + }, + { + "epoch": 2.62, + "learning_rate": 1.1361277883153325e-05, + "loss": 0.074, + "step": 56135 + }, + { + "epoch": 2.62, + "learning_rate": 1.1360494098098537e-05, + "loss": 0.0676, + "step": 56140 + }, + { + "epoch": 2.62, + "learning_rate": 1.1359710313043753e-05, + "loss": 0.1362, + "step": 56145 + }, + { + "epoch": 2.62, + "learning_rate": 1.1358926527988965e-05, + "loss": 0.1122, + "step": 56150 + }, + { + "epoch": 2.62, + "learning_rate": 1.1358142742934179e-05, + "loss": 0.1515, + "step": 56155 + }, + { + "epoch": 2.62, + "learning_rate": 1.1357358957879391e-05, + "loss": 0.261, + "step": 56160 + }, + { + "epoch": 2.62, + "learning_rate": 1.1356575172824607e-05, + "loss": 0.293, + "step": 56165 + }, + { + "epoch": 2.62, + "learning_rate": 1.1355791387769819e-05, + "loss": 0.0781, + "step": 56170 + }, + { + "epoch": 2.62, + "learning_rate": 1.1355007602715031e-05, + "loss": 0.0546, + "step": 56175 + }, + { + "epoch": 2.62, + "learning_rate": 1.1354223817660245e-05, + "loss": 0.0495, + "step": 56180 + }, + { + "epoch": 2.62, + "learning_rate": 1.1353440032605459e-05, + "loss": 0.0399, + "step": 56185 + }, + { + "epoch": 2.62, + "learning_rate": 1.1352656247550673e-05, + "loss": 0.1285, + "step": 56190 + }, + { + "epoch": 2.62, + "learning_rate": 1.1351872462495885e-05, + "loss": 0.1656, + "step": 56195 + }, + { + "epoch": 2.62, + "learning_rate": 1.13510886774411e-05, + "loss": 0.1061, + "step": 56200 + }, + { + "epoch": 2.62, + "learning_rate": 1.1350304892386313e-05, + "loss": 0.1784, + "step": 56205 + }, + { + "epoch": 2.62, + "learning_rate": 1.1349521107331527e-05, + "loss": 0.254, + "step": 56210 + }, + { + "epoch": 2.62, + "learning_rate": 1.134873732227674e-05, + "loss": 0.3493, + "step": 56215 + }, + { + "epoch": 2.62, + "learning_rate": 1.1347953537221955e-05, + "loss": 0.0536, + "step": 56220 + }, + { + "epoch": 2.62, + "learning_rate": 1.1347169752167167e-05, + "loss": 0.0224, + "step": 56225 + }, + { + "epoch": 2.62, + "learning_rate": 1.1346385967112381e-05, + "loss": 0.06, + "step": 56230 + }, + { + "epoch": 2.62, + "learning_rate": 1.1345602182057593e-05, + "loss": 0.1365, + "step": 56235 + }, + { + "epoch": 2.62, + "learning_rate": 1.1344818397002805e-05, + "loss": 0.0783, + "step": 56240 + }, + { + "epoch": 2.62, + "learning_rate": 1.1344034611948021e-05, + "loss": 0.1582, + "step": 56245 + }, + { + "epoch": 2.62, + "learning_rate": 1.1343250826893233e-05, + "loss": 0.0864, + "step": 56250 + }, + { + "epoch": 2.62, + "learning_rate": 1.1342467041838447e-05, + "loss": 0.1631, + "step": 56255 + }, + { + "epoch": 2.63, + "learning_rate": 1.134168325678366e-05, + "loss": 0.2896, + "step": 56260 + }, + { + "epoch": 2.63, + "learning_rate": 1.1340899471728875e-05, + "loss": 0.2259, + "step": 56265 + }, + { + "epoch": 2.63, + "learning_rate": 1.1340115686674087e-05, + "loss": 0.083, + "step": 56270 + }, + { + "epoch": 2.63, + "learning_rate": 1.1339331901619301e-05, + "loss": 0.0676, + "step": 56275 + }, + { + "epoch": 2.63, + "learning_rate": 1.1338548116564513e-05, + "loss": 0.0453, + "step": 56280 + }, + { + "epoch": 2.63, + "learning_rate": 1.1337764331509729e-05, + "loss": 0.0732, + "step": 56285 + }, + { + "epoch": 2.63, + "learning_rate": 1.1336980546454941e-05, + "loss": 0.0857, + "step": 56290 + }, + { + "epoch": 2.63, + "learning_rate": 1.1336196761400155e-05, + "loss": 0.0953, + "step": 56295 + }, + { + "epoch": 2.63, + "learning_rate": 1.1335412976345369e-05, + "loss": 0.1977, + "step": 56300 + }, + { + "epoch": 2.63, + "learning_rate": 1.1334629191290581e-05, + "loss": 0.1663, + "step": 56305 + }, + { + "epoch": 2.63, + "learning_rate": 1.1333845406235795e-05, + "loss": 0.2465, + "step": 56310 + }, + { + "epoch": 2.63, + "learning_rate": 1.1333061621181007e-05, + "loss": 0.2748, + "step": 56315 + }, + { + "epoch": 2.63, + "learning_rate": 1.1332277836126223e-05, + "loss": 0.0505, + "step": 56320 + }, + { + "epoch": 2.63, + "learning_rate": 1.1331494051071435e-05, + "loss": 0.0276, + "step": 56325 + }, + { + "epoch": 2.63, + "learning_rate": 1.1330710266016649e-05, + "loss": 0.0824, + "step": 56330 + }, + { + "epoch": 2.63, + "learning_rate": 1.1329926480961861e-05, + "loss": 0.1261, + "step": 56335 + }, + { + "epoch": 2.63, + "learning_rate": 1.1329142695907077e-05, + "loss": 0.074, + "step": 56340 + }, + { + "epoch": 2.63, + "learning_rate": 1.1328358910852289e-05, + "loss": 0.206, + "step": 56345 + }, + { + "epoch": 2.63, + "learning_rate": 1.1327575125797503e-05, + "loss": 0.1365, + "step": 56350 + }, + { + "epoch": 2.63, + "learning_rate": 1.1326791340742715e-05, + "loss": 0.1942, + "step": 56355 + }, + { + "epoch": 2.63, + "learning_rate": 1.132600755568793e-05, + "loss": 0.2217, + "step": 56360 + }, + { + "epoch": 2.63, + "learning_rate": 1.1325223770633143e-05, + "loss": 0.3364, + "step": 56365 + }, + { + "epoch": 2.63, + "learning_rate": 1.1324439985578355e-05, + "loss": 0.0938, + "step": 56370 + }, + { + "epoch": 2.63, + "learning_rate": 1.1323656200523569e-05, + "loss": 0.0528, + "step": 56375 + }, + { + "epoch": 2.63, + "learning_rate": 1.1322872415468781e-05, + "loss": 0.0529, + "step": 56380 + }, + { + "epoch": 2.63, + "learning_rate": 1.1322088630413997e-05, + "loss": 0.0723, + "step": 56385 + }, + { + "epoch": 2.63, + "learning_rate": 1.1321304845359209e-05, + "loss": 0.0862, + "step": 56390 + }, + { + "epoch": 2.63, + "learning_rate": 1.1320521060304423e-05, + "loss": 0.1117, + "step": 56395 + }, + { + "epoch": 2.63, + "learning_rate": 1.1319737275249637e-05, + "loss": 0.1197, + "step": 56400 + }, + { + "epoch": 2.63, + "learning_rate": 1.131895349019485e-05, + "loss": 0.2187, + "step": 56405 + }, + { + "epoch": 2.63, + "learning_rate": 1.1318169705140063e-05, + "loss": 0.119, + "step": 56410 + }, + { + "epoch": 2.63, + "learning_rate": 1.1317385920085277e-05, + "loss": 0.4055, + "step": 56415 + }, + { + "epoch": 2.63, + "learning_rate": 1.131660213503049e-05, + "loss": 0.0653, + "step": 56420 + }, + { + "epoch": 2.63, + "learning_rate": 1.1315818349975705e-05, + "loss": 0.0624, + "step": 56425 + }, + { + "epoch": 2.63, + "learning_rate": 1.1315034564920917e-05, + "loss": 0.1205, + "step": 56430 + }, + { + "epoch": 2.63, + "learning_rate": 1.1314250779866129e-05, + "loss": 0.0533, + "step": 56435 + }, + { + "epoch": 2.63, + "learning_rate": 1.1313466994811345e-05, + "loss": 0.1112, + "step": 56440 + }, + { + "epoch": 2.63, + "learning_rate": 1.1312683209756557e-05, + "loss": 0.0813, + "step": 56445 + }, + { + "epoch": 2.63, + "learning_rate": 1.131189942470177e-05, + "loss": 0.1179, + "step": 56450 + }, + { + "epoch": 2.63, + "learning_rate": 1.1311115639646983e-05, + "loss": 0.108, + "step": 56455 + }, + { + "epoch": 2.63, + "learning_rate": 1.1310331854592199e-05, + "loss": 0.3539, + "step": 56460 + }, + { + "epoch": 2.63, + "learning_rate": 1.130954806953741e-05, + "loss": 0.3225, + "step": 56465 + }, + { + "epoch": 2.63, + "learning_rate": 1.1308764284482625e-05, + "loss": 0.1074, + "step": 56470 + }, + { + "epoch": 2.64, + "learning_rate": 1.1307980499427837e-05, + "loss": 0.0635, + "step": 56475 + }, + { + "epoch": 2.64, + "learning_rate": 1.1307196714373052e-05, + "loss": 0.0487, + "step": 56480 + }, + { + "epoch": 2.64, + "learning_rate": 1.1306412929318265e-05, + "loss": 0.0477, + "step": 56485 + }, + { + "epoch": 2.64, + "learning_rate": 1.1305629144263479e-05, + "loss": 0.0545, + "step": 56490 + }, + { + "epoch": 2.64, + "learning_rate": 1.130484535920869e-05, + "loss": 0.1771, + "step": 56495 + }, + { + "epoch": 2.64, + "learning_rate": 1.1304061574153905e-05, + "loss": 0.1594, + "step": 56500 + }, + { + "epoch": 2.64, + "learning_rate": 1.1303277789099119e-05, + "loss": 0.1938, + "step": 56505 + }, + { + "epoch": 2.64, + "learning_rate": 1.130249400404433e-05, + "loss": 0.2132, + "step": 56510 + }, + { + "epoch": 2.64, + "learning_rate": 1.1301710218989546e-05, + "loss": 0.2514, + "step": 56515 + }, + { + "epoch": 2.64, + "learning_rate": 1.1300926433934759e-05, + "loss": 0.042, + "step": 56520 + }, + { + "epoch": 2.64, + "learning_rate": 1.1300142648879973e-05, + "loss": 0.0904, + "step": 56525 + }, + { + "epoch": 2.64, + "learning_rate": 1.1299358863825185e-05, + "loss": 0.0356, + "step": 56530 + }, + { + "epoch": 2.64, + "learning_rate": 1.12985750787704e-05, + "loss": 0.1147, + "step": 56535 + }, + { + "epoch": 2.64, + "learning_rate": 1.1297791293715613e-05, + "loss": 0.0365, + "step": 56540 + }, + { + "epoch": 2.64, + "learning_rate": 1.1297007508660826e-05, + "loss": 0.1268, + "step": 56545 + }, + { + "epoch": 2.64, + "learning_rate": 1.1296223723606039e-05, + "loss": 0.1831, + "step": 56550 + }, + { + "epoch": 2.64, + "learning_rate": 1.1295439938551254e-05, + "loss": 0.1668, + "step": 56555 + }, + { + "epoch": 2.64, + "learning_rate": 1.1294656153496466e-05, + "loss": 0.3163, + "step": 56560 + }, + { + "epoch": 2.64, + "learning_rate": 1.1293872368441679e-05, + "loss": 0.2783, + "step": 56565 + }, + { + "epoch": 2.64, + "learning_rate": 1.1293088583386893e-05, + "loss": 0.0947, + "step": 56570 + }, + { + "epoch": 2.64, + "learning_rate": 1.1292304798332105e-05, + "loss": 0.0528, + "step": 56575 + }, + { + "epoch": 2.64, + "learning_rate": 1.129152101327732e-05, + "loss": 0.0567, + "step": 56580 + }, + { + "epoch": 2.64, + "learning_rate": 1.1290737228222533e-05, + "loss": 0.0607, + "step": 56585 + }, + { + "epoch": 2.64, + "learning_rate": 1.1289953443167747e-05, + "loss": 0.0598, + "step": 56590 + }, + { + "epoch": 2.64, + "learning_rate": 1.1289169658112959e-05, + "loss": 0.151, + "step": 56595 + }, + { + "epoch": 2.64, + "learning_rate": 1.1288385873058174e-05, + "loss": 0.2271, + "step": 56600 + }, + { + "epoch": 2.64, + "learning_rate": 1.1287602088003387e-05, + "loss": 0.1683, + "step": 56605 + }, + { + "epoch": 2.64, + "learning_rate": 1.12868183029486e-05, + "loss": 0.2022, + "step": 56610 + }, + { + "epoch": 2.64, + "learning_rate": 1.1286034517893814e-05, + "loss": 0.2549, + "step": 56615 + }, + { + "epoch": 2.64, + "learning_rate": 1.1285250732839028e-05, + "loss": 0.0524, + "step": 56620 + }, + { + "epoch": 2.64, + "learning_rate": 1.128446694778424e-05, + "loss": 0.0105, + "step": 56625 + }, + { + "epoch": 2.64, + "learning_rate": 1.1283683162729453e-05, + "loss": 0.0215, + "step": 56630 + }, + { + "epoch": 2.64, + "learning_rate": 1.1282899377674668e-05, + "loss": 0.0492, + "step": 56635 + }, + { + "epoch": 2.64, + "learning_rate": 1.128211559261988e-05, + "loss": 0.1595, + "step": 56640 + }, + { + "epoch": 2.64, + "learning_rate": 1.1281331807565094e-05, + "loss": 0.0684, + "step": 56645 + }, + { + "epoch": 2.64, + "learning_rate": 1.1280548022510307e-05, + "loss": 0.1465, + "step": 56650 + }, + { + "epoch": 2.64, + "learning_rate": 1.1279764237455522e-05, + "loss": 0.1494, + "step": 56655 + }, + { + "epoch": 2.64, + "learning_rate": 1.1278980452400734e-05, + "loss": 0.2374, + "step": 56660 + }, + { + "epoch": 2.64, + "learning_rate": 1.1278196667345948e-05, + "loss": 0.2635, + "step": 56665 + }, + { + "epoch": 2.64, + "learning_rate": 1.127741288229116e-05, + "loss": 0.0529, + "step": 56670 + }, + { + "epoch": 2.64, + "learning_rate": 1.1276629097236376e-05, + "loss": 0.0205, + "step": 56675 + }, + { + "epoch": 2.64, + "learning_rate": 1.1275845312181588e-05, + "loss": 0.1151, + "step": 56680 + }, + { + "epoch": 2.65, + "learning_rate": 1.1275061527126802e-05, + "loss": 0.0793, + "step": 56685 + }, + { + "epoch": 2.65, + "learning_rate": 1.1274277742072014e-05, + "loss": 0.058, + "step": 56690 + }, + { + "epoch": 2.65, + "learning_rate": 1.1273493957017227e-05, + "loss": 0.0678, + "step": 56695 + }, + { + "epoch": 2.65, + "learning_rate": 1.1272710171962442e-05, + "loss": 0.1245, + "step": 56700 + }, + { + "epoch": 2.65, + "learning_rate": 1.1271926386907654e-05, + "loss": 0.3038, + "step": 56705 + }, + { + "epoch": 2.65, + "learning_rate": 1.1271142601852868e-05, + "loss": 0.2822, + "step": 56710 + }, + { + "epoch": 2.65, + "learning_rate": 1.1270358816798082e-05, + "loss": 0.2808, + "step": 56715 + }, + { + "epoch": 2.65, + "learning_rate": 1.1269575031743296e-05, + "loss": 0.0484, + "step": 56720 + }, + { + "epoch": 2.65, + "learning_rate": 1.1268791246688508e-05, + "loss": 0.0473, + "step": 56725 + }, + { + "epoch": 2.65, + "learning_rate": 1.1268007461633722e-05, + "loss": 0.0651, + "step": 56730 + }, + { + "epoch": 2.65, + "learning_rate": 1.1267223676578936e-05, + "loss": 0.0608, + "step": 56735 + }, + { + "epoch": 2.65, + "learning_rate": 1.126643989152415e-05, + "loss": 0.0815, + "step": 56740 + }, + { + "epoch": 2.65, + "learning_rate": 1.1265656106469362e-05, + "loss": 0.0831, + "step": 56745 + }, + { + "epoch": 2.65, + "learning_rate": 1.1264872321414578e-05, + "loss": 0.1264, + "step": 56750 + }, + { + "epoch": 2.65, + "learning_rate": 1.126408853635979e-05, + "loss": 0.196, + "step": 56755 + }, + { + "epoch": 2.65, + "learning_rate": 1.1263304751305002e-05, + "loss": 0.1935, + "step": 56760 + }, + { + "epoch": 2.65, + "learning_rate": 1.1262520966250216e-05, + "loss": 0.2181, + "step": 56765 + }, + { + "epoch": 2.65, + "learning_rate": 1.1261737181195428e-05, + "loss": 0.0669, + "step": 56770 + }, + { + "epoch": 2.65, + "learning_rate": 1.1260953396140644e-05, + "loss": 0.0351, + "step": 56775 + }, + { + "epoch": 2.65, + "learning_rate": 1.1260169611085856e-05, + "loss": 0.031, + "step": 56780 + }, + { + "epoch": 2.65, + "learning_rate": 1.125938582603107e-05, + "loss": 0.1046, + "step": 56785 + }, + { + "epoch": 2.65, + "learning_rate": 1.1258602040976282e-05, + "loss": 0.0576, + "step": 56790 + }, + { + "epoch": 2.65, + "learning_rate": 1.1257818255921498e-05, + "loss": 0.1757, + "step": 56795 + }, + { + "epoch": 2.65, + "learning_rate": 1.125703447086671e-05, + "loss": 0.0646, + "step": 56800 + }, + { + "epoch": 2.65, + "learning_rate": 1.1256250685811924e-05, + "loss": 0.0942, + "step": 56805 + }, + { + "epoch": 2.65, + "learning_rate": 1.1255466900757136e-05, + "loss": 0.2081, + "step": 56810 + }, + { + "epoch": 2.65, + "learning_rate": 1.1254683115702352e-05, + "loss": 0.2665, + "step": 56815 + }, + { + "epoch": 2.65, + "learning_rate": 1.1253899330647564e-05, + "loss": 0.1061, + "step": 56820 + }, + { + "epoch": 2.65, + "learning_rate": 1.1253115545592776e-05, + "loss": 0.0643, + "step": 56825 + }, + { + "epoch": 2.65, + "learning_rate": 1.1252331760537992e-05, + "loss": 0.0954, + "step": 56830 + }, + { + "epoch": 2.65, + "learning_rate": 1.1251547975483204e-05, + "loss": 0.1031, + "step": 56835 + }, + { + "epoch": 2.65, + "learning_rate": 1.1250764190428418e-05, + "loss": 0.0669, + "step": 56840 + }, + { + "epoch": 2.65, + "learning_rate": 1.124998040537363e-05, + "loss": 0.102, + "step": 56845 + }, + { + "epoch": 2.65, + "learning_rate": 1.1249196620318846e-05, + "loss": 0.0921, + "step": 56850 + }, + { + "epoch": 2.65, + "learning_rate": 1.1248412835264058e-05, + "loss": 0.2177, + "step": 56855 + }, + { + "epoch": 2.65, + "learning_rate": 1.1247629050209272e-05, + "loss": 0.2624, + "step": 56860 + }, + { + "epoch": 2.65, + "learning_rate": 1.1246845265154484e-05, + "loss": 0.2272, + "step": 56865 + }, + { + "epoch": 2.65, + "learning_rate": 1.12460614800997e-05, + "loss": 0.0577, + "step": 56870 + }, + { + "epoch": 2.65, + "learning_rate": 1.1245277695044912e-05, + "loss": 0.0705, + "step": 56875 + }, + { + "epoch": 2.65, + "learning_rate": 1.1244493909990126e-05, + "loss": 0.0784, + "step": 56880 + }, + { + "epoch": 2.65, + "learning_rate": 1.1243710124935338e-05, + "loss": 0.0725, + "step": 56885 + }, + { + "epoch": 2.65, + "learning_rate": 1.124292633988055e-05, + "loss": 0.079, + "step": 56890 + }, + { + "epoch": 2.65, + "learning_rate": 1.1242142554825766e-05, + "loss": 0.0578, + "step": 56895 + }, + { + "epoch": 2.66, + "learning_rate": 1.1241358769770978e-05, + "loss": 0.1903, + "step": 56900 + }, + { + "epoch": 2.66, + "learning_rate": 1.1240574984716192e-05, + "loss": 0.1061, + "step": 56905 + }, + { + "epoch": 2.66, + "learning_rate": 1.1239791199661404e-05, + "loss": 0.3914, + "step": 56910 + }, + { + "epoch": 2.66, + "learning_rate": 1.123900741460662e-05, + "loss": 0.4026, + "step": 56915 + }, + { + "epoch": 2.66, + "learning_rate": 1.1238223629551832e-05, + "loss": 0.0493, + "step": 56920 + }, + { + "epoch": 2.66, + "learning_rate": 1.1237439844497046e-05, + "loss": 0.066, + "step": 56925 + }, + { + "epoch": 2.66, + "learning_rate": 1.123665605944226e-05, + "loss": 0.0681, + "step": 56930 + }, + { + "epoch": 2.66, + "learning_rate": 1.1235872274387474e-05, + "loss": 0.0228, + "step": 56935 + }, + { + "epoch": 2.66, + "learning_rate": 1.1235088489332686e-05, + "loss": 0.0556, + "step": 56940 + }, + { + "epoch": 2.66, + "learning_rate": 1.12343047042779e-05, + "loss": 0.1144, + "step": 56945 + }, + { + "epoch": 2.66, + "learning_rate": 1.1233520919223114e-05, + "loss": 0.0883, + "step": 56950 + }, + { + "epoch": 2.66, + "learning_rate": 1.1232737134168326e-05, + "loss": 0.156, + "step": 56955 + }, + { + "epoch": 2.66, + "learning_rate": 1.123195334911354e-05, + "loss": 0.2826, + "step": 56960 + }, + { + "epoch": 2.66, + "learning_rate": 1.1231169564058752e-05, + "loss": 0.2467, + "step": 56965 + }, + { + "epoch": 2.66, + "learning_rate": 1.1230385779003968e-05, + "loss": 0.0382, + "step": 56970 + }, + { + "epoch": 2.66, + "learning_rate": 1.122960199394918e-05, + "loss": 0.068, + "step": 56975 + }, + { + "epoch": 2.66, + "learning_rate": 1.1228818208894394e-05, + "loss": 0.0677, + "step": 56980 + }, + { + "epoch": 2.66, + "learning_rate": 1.1228034423839606e-05, + "loss": 0.1175, + "step": 56985 + }, + { + "epoch": 2.66, + "learning_rate": 1.1227250638784822e-05, + "loss": 0.0899, + "step": 56990 + }, + { + "epoch": 2.66, + "learning_rate": 1.1226466853730034e-05, + "loss": 0.1353, + "step": 56995 + }, + { + "epoch": 2.66, + "learning_rate": 1.1225683068675248e-05, + "loss": 0.1473, + "step": 57000 + }, + { + "epoch": 2.66, + "learning_rate": 1.122489928362046e-05, + "loss": 0.1321, + "step": 57005 + }, + { + "epoch": 2.66, + "learning_rate": 1.1224115498565676e-05, + "loss": 0.3038, + "step": 57010 + }, + { + "epoch": 2.66, + "learning_rate": 1.1223331713510888e-05, + "loss": 0.4092, + "step": 57015 + }, + { + "epoch": 2.66, + "learning_rate": 1.12225479284561e-05, + "loss": 0.0233, + "step": 57020 + }, + { + "epoch": 2.66, + "learning_rate": 1.1221764143401314e-05, + "loss": 0.0531, + "step": 57025 + }, + { + "epoch": 2.66, + "learning_rate": 1.1220980358346528e-05, + "loss": 0.011, + "step": 57030 + }, + { + "epoch": 2.66, + "learning_rate": 1.1220196573291742e-05, + "loss": 0.0896, + "step": 57035 + }, + { + "epoch": 2.66, + "learning_rate": 1.1219412788236954e-05, + "loss": 0.038, + "step": 57040 + }, + { + "epoch": 2.66, + "learning_rate": 1.121862900318217e-05, + "loss": 0.1052, + "step": 57045 + }, + { + "epoch": 2.66, + "learning_rate": 1.1217845218127382e-05, + "loss": 0.1334, + "step": 57050 + }, + { + "epoch": 2.66, + "learning_rate": 1.1217061433072596e-05, + "loss": 0.1431, + "step": 57055 + }, + { + "epoch": 2.66, + "learning_rate": 1.1216277648017808e-05, + "loss": 0.1915, + "step": 57060 + }, + { + "epoch": 2.66, + "learning_rate": 1.1215493862963024e-05, + "loss": 0.287, + "step": 57065 + }, + { + "epoch": 2.66, + "learning_rate": 1.1214710077908236e-05, + "loss": 0.1064, + "step": 57070 + }, + { + "epoch": 2.66, + "learning_rate": 1.121392629285345e-05, + "loss": 0.0665, + "step": 57075 + }, + { + "epoch": 2.66, + "learning_rate": 1.1213142507798662e-05, + "loss": 0.0537, + "step": 57080 + }, + { + "epoch": 2.66, + "learning_rate": 1.1212358722743874e-05, + "loss": 0.0913, + "step": 57085 + }, + { + "epoch": 2.66, + "learning_rate": 1.121157493768909e-05, + "loss": 0.0652, + "step": 57090 + }, + { + "epoch": 2.66, + "learning_rate": 1.1210791152634302e-05, + "loss": 0.0595, + "step": 57095 + }, + { + "epoch": 2.66, + "learning_rate": 1.1210007367579516e-05, + "loss": 0.0666, + "step": 57100 + }, + { + "epoch": 2.66, + "learning_rate": 1.1209223582524728e-05, + "loss": 0.2029, + "step": 57105 + }, + { + "epoch": 2.66, + "learning_rate": 1.1208439797469944e-05, + "loss": 0.2374, + "step": 57110 + }, + { + "epoch": 2.67, + "learning_rate": 1.1207656012415156e-05, + "loss": 0.2624, + "step": 57115 + }, + { + "epoch": 2.67, + "learning_rate": 1.120687222736037e-05, + "loss": 0.0789, + "step": 57120 + }, + { + "epoch": 2.67, + "learning_rate": 1.1206088442305582e-05, + "loss": 0.0434, + "step": 57125 + }, + { + "epoch": 2.67, + "learning_rate": 1.1205304657250798e-05, + "loss": 0.0571, + "step": 57130 + }, + { + "epoch": 2.67, + "learning_rate": 1.120452087219601e-05, + "loss": 0.0856, + "step": 57135 + }, + { + "epoch": 2.67, + "learning_rate": 1.1203737087141224e-05, + "loss": 0.1147, + "step": 57140 + }, + { + "epoch": 2.67, + "learning_rate": 1.1202953302086438e-05, + "loss": 0.1438, + "step": 57145 + }, + { + "epoch": 2.67, + "learning_rate": 1.120216951703165e-05, + "loss": 0.1312, + "step": 57150 + }, + { + "epoch": 2.67, + "learning_rate": 1.1201385731976864e-05, + "loss": 0.1851, + "step": 57155 + }, + { + "epoch": 2.67, + "learning_rate": 1.1200601946922076e-05, + "loss": 0.1552, + "step": 57160 + }, + { + "epoch": 2.67, + "learning_rate": 1.1199818161867291e-05, + "loss": 0.2446, + "step": 57165 + }, + { + "epoch": 2.67, + "learning_rate": 1.1199034376812504e-05, + "loss": 0.0227, + "step": 57170 + }, + { + "epoch": 2.67, + "learning_rate": 1.1198250591757718e-05, + "loss": 0.071, + "step": 57175 + }, + { + "epoch": 2.67, + "learning_rate": 1.119746680670293e-05, + "loss": 0.0333, + "step": 57180 + }, + { + "epoch": 2.67, + "learning_rate": 1.1196683021648145e-05, + "loss": 0.0713, + "step": 57185 + }, + { + "epoch": 2.67, + "learning_rate": 1.1195899236593358e-05, + "loss": 0.0717, + "step": 57190 + }, + { + "epoch": 2.67, + "learning_rate": 1.1195115451538572e-05, + "loss": 0.0648, + "step": 57195 + }, + { + "epoch": 2.67, + "learning_rate": 1.1194331666483784e-05, + "loss": 0.1173, + "step": 57200 + }, + { + "epoch": 2.67, + "learning_rate": 1.1193547881429e-05, + "loss": 0.09, + "step": 57205 + }, + { + "epoch": 2.67, + "learning_rate": 1.1192764096374212e-05, + "loss": 0.2192, + "step": 57210 + }, + { + "epoch": 2.67, + "learning_rate": 1.1191980311319424e-05, + "loss": 0.3397, + "step": 57215 + }, + { + "epoch": 2.67, + "learning_rate": 1.1191196526264638e-05, + "loss": 0.0862, + "step": 57220 + }, + { + "epoch": 2.67, + "learning_rate": 1.119041274120985e-05, + "loss": 0.0396, + "step": 57225 + }, + { + "epoch": 2.67, + "learning_rate": 1.1189628956155065e-05, + "loss": 0.0291, + "step": 57230 + }, + { + "epoch": 2.67, + "learning_rate": 1.1188845171100278e-05, + "loss": 0.0333, + "step": 57235 + }, + { + "epoch": 2.67, + "learning_rate": 1.1188061386045492e-05, + "loss": 0.0601, + "step": 57240 + }, + { + "epoch": 2.67, + "learning_rate": 1.1187277600990705e-05, + "loss": 0.0811, + "step": 57245 + }, + { + "epoch": 2.67, + "learning_rate": 1.118649381593592e-05, + "loss": 0.0969, + "step": 57250 + }, + { + "epoch": 2.67, + "learning_rate": 1.1185710030881132e-05, + "loss": 0.229, + "step": 57255 + }, + { + "epoch": 2.67, + "learning_rate": 1.1184926245826346e-05, + "loss": 0.294, + "step": 57260 + }, + { + "epoch": 2.67, + "learning_rate": 1.118414246077156e-05, + "loss": 0.3216, + "step": 57265 + }, + { + "epoch": 2.67, + "learning_rate": 1.1183358675716773e-05, + "loss": 0.0457, + "step": 57270 + }, + { + "epoch": 2.67, + "learning_rate": 1.1182574890661986e-05, + "loss": 0.0488, + "step": 57275 + }, + { + "epoch": 2.67, + "learning_rate": 1.1181791105607198e-05, + "loss": 0.0694, + "step": 57280 + }, + { + "epoch": 2.67, + "learning_rate": 1.1181007320552413e-05, + "loss": 0.0691, + "step": 57285 + }, + { + "epoch": 2.67, + "learning_rate": 1.1180223535497626e-05, + "loss": 0.1437, + "step": 57290 + }, + { + "epoch": 2.67, + "learning_rate": 1.117943975044284e-05, + "loss": 0.1237, + "step": 57295 + }, + { + "epoch": 2.67, + "learning_rate": 1.1178655965388052e-05, + "loss": 0.1094, + "step": 57300 + }, + { + "epoch": 2.67, + "learning_rate": 1.1177872180333267e-05, + "loss": 0.3414, + "step": 57305 + }, + { + "epoch": 2.67, + "learning_rate": 1.117708839527848e-05, + "loss": 0.2766, + "step": 57310 + }, + { + "epoch": 2.67, + "learning_rate": 1.1176304610223693e-05, + "loss": 0.2673, + "step": 57315 + }, + { + "epoch": 2.67, + "learning_rate": 1.1175520825168906e-05, + "loss": 0.0421, + "step": 57320 + }, + { + "epoch": 2.67, + "learning_rate": 1.1174737040114121e-05, + "loss": 0.0769, + "step": 57325 + }, + { + "epoch": 2.68, + "learning_rate": 1.1173953255059333e-05, + "loss": 0.042, + "step": 57330 + }, + { + "epoch": 2.68, + "learning_rate": 1.1173169470004547e-05, + "loss": 0.0679, + "step": 57335 + }, + { + "epoch": 2.68, + "learning_rate": 1.117238568494976e-05, + "loss": 0.0895, + "step": 57340 + }, + { + "epoch": 2.68, + "learning_rate": 1.1171601899894973e-05, + "loss": 0.1201, + "step": 57345 + }, + { + "epoch": 2.68, + "learning_rate": 1.1170818114840187e-05, + "loss": 0.1044, + "step": 57350 + }, + { + "epoch": 2.68, + "learning_rate": 1.11700343297854e-05, + "loss": 0.0649, + "step": 57355 + }, + { + "epoch": 2.68, + "learning_rate": 1.1169250544730615e-05, + "loss": 0.224, + "step": 57360 + }, + { + "epoch": 2.68, + "learning_rate": 1.1168466759675827e-05, + "loss": 0.3896, + "step": 57365 + }, + { + "epoch": 2.68, + "learning_rate": 1.1167682974621041e-05, + "loss": 0.0307, + "step": 57370 + }, + { + "epoch": 2.68, + "learning_rate": 1.1166899189566253e-05, + "loss": 0.0542, + "step": 57375 + }, + { + "epoch": 2.68, + "learning_rate": 1.1166115404511469e-05, + "loss": 0.0674, + "step": 57380 + }, + { + "epoch": 2.68, + "learning_rate": 1.1165331619456681e-05, + "loss": 0.0919, + "step": 57385 + }, + { + "epoch": 2.68, + "learning_rate": 1.1164547834401895e-05, + "loss": 0.0887, + "step": 57390 + }, + { + "epoch": 2.68, + "learning_rate": 1.1163764049347107e-05, + "loss": 0.1729, + "step": 57395 + }, + { + "epoch": 2.68, + "learning_rate": 1.1162980264292323e-05, + "loss": 0.1892, + "step": 57400 + }, + { + "epoch": 2.68, + "learning_rate": 1.1162196479237535e-05, + "loss": 0.1451, + "step": 57405 + }, + { + "epoch": 2.68, + "learning_rate": 1.1161412694182747e-05, + "loss": 0.3484, + "step": 57410 + }, + { + "epoch": 2.68, + "learning_rate": 1.1160628909127961e-05, + "loss": 0.3337, + "step": 57415 + }, + { + "epoch": 2.68, + "learning_rate": 1.1159845124073174e-05, + "loss": 0.0925, + "step": 57420 + }, + { + "epoch": 2.68, + "learning_rate": 1.1159061339018389e-05, + "loss": 0.0863, + "step": 57425 + }, + { + "epoch": 2.68, + "learning_rate": 1.1158277553963601e-05, + "loss": 0.0389, + "step": 57430 + }, + { + "epoch": 2.68, + "learning_rate": 1.1157493768908815e-05, + "loss": 0.0606, + "step": 57435 + }, + { + "epoch": 2.68, + "learning_rate": 1.1156709983854027e-05, + "loss": 0.1619, + "step": 57440 + }, + { + "epoch": 2.68, + "learning_rate": 1.1155926198799243e-05, + "loss": 0.177, + "step": 57445 + }, + { + "epoch": 2.68, + "learning_rate": 1.1155142413744455e-05, + "loss": 0.1052, + "step": 57450 + }, + { + "epoch": 2.68, + "learning_rate": 1.115435862868967e-05, + "loss": 0.2065, + "step": 57455 + }, + { + "epoch": 2.68, + "learning_rate": 1.1153574843634883e-05, + "loss": 0.1876, + "step": 57460 + }, + { + "epoch": 2.68, + "learning_rate": 1.1152791058580097e-05, + "loss": 0.1923, + "step": 57465 + }, + { + "epoch": 2.68, + "learning_rate": 1.115200727352531e-05, + "loss": 0.0589, + "step": 57470 + }, + { + "epoch": 2.68, + "learning_rate": 1.1151223488470521e-05, + "loss": 0.0346, + "step": 57475 + }, + { + "epoch": 2.68, + "learning_rate": 1.1150439703415737e-05, + "loss": 0.0486, + "step": 57480 + }, + { + "epoch": 2.68, + "learning_rate": 1.114965591836095e-05, + "loss": 0.0673, + "step": 57485 + }, + { + "epoch": 2.68, + "learning_rate": 1.1148872133306163e-05, + "loss": 0.0864, + "step": 57490 + }, + { + "epoch": 2.68, + "learning_rate": 1.1148088348251375e-05, + "loss": 0.103, + "step": 57495 + }, + { + "epoch": 2.68, + "learning_rate": 1.1147304563196591e-05, + "loss": 0.2384, + "step": 57500 + }, + { + "epoch": 2.68, + "learning_rate": 1.1146520778141803e-05, + "loss": 0.151, + "step": 57505 + }, + { + "epoch": 2.68, + "learning_rate": 1.1145736993087017e-05, + "loss": 0.1685, + "step": 57510 + }, + { + "epoch": 2.68, + "learning_rate": 1.114495320803223e-05, + "loss": 0.2351, + "step": 57515 + }, + { + "epoch": 2.68, + "learning_rate": 1.1144169422977445e-05, + "loss": 0.0842, + "step": 57520 + }, + { + "epoch": 2.68, + "learning_rate": 1.1143385637922657e-05, + "loss": 0.0256, + "step": 57525 + }, + { + "epoch": 2.68, + "learning_rate": 1.1142601852867871e-05, + "loss": 0.0696, + "step": 57530 + }, + { + "epoch": 2.68, + "learning_rate": 1.1141818067813083e-05, + "loss": 0.0371, + "step": 57535 + }, + { + "epoch": 2.68, + "learning_rate": 1.1141034282758295e-05, + "loss": 0.0474, + "step": 57540 + }, + { + "epoch": 2.69, + "learning_rate": 1.1140250497703511e-05, + "loss": 0.0927, + "step": 57545 + }, + { + "epoch": 2.69, + "learning_rate": 1.1139466712648723e-05, + "loss": 0.0978, + "step": 57550 + }, + { + "epoch": 2.69, + "learning_rate": 1.1138682927593937e-05, + "loss": 0.223, + "step": 57555 + }, + { + "epoch": 2.69, + "learning_rate": 1.1137899142539151e-05, + "loss": 0.2131, + "step": 57560 + }, + { + "epoch": 2.69, + "learning_rate": 1.1137115357484365e-05, + "loss": 0.2394, + "step": 57565 + }, + { + "epoch": 2.69, + "learning_rate": 1.1136331572429577e-05, + "loss": 0.0255, + "step": 57570 + }, + { + "epoch": 2.69, + "learning_rate": 1.1135547787374791e-05, + "loss": 0.0629, + "step": 57575 + }, + { + "epoch": 2.69, + "learning_rate": 1.1134764002320005e-05, + "loss": 0.0524, + "step": 57580 + }, + { + "epoch": 2.69, + "learning_rate": 1.1133980217265219e-05, + "loss": 0.0692, + "step": 57585 + }, + { + "epoch": 2.69, + "learning_rate": 1.1133196432210431e-05, + "loss": 0.0791, + "step": 57590 + }, + { + "epoch": 2.69, + "learning_rate": 1.1132412647155647e-05, + "loss": 0.0576, + "step": 57595 + }, + { + "epoch": 2.69, + "learning_rate": 1.1131628862100859e-05, + "loss": 0.1245, + "step": 57600 + }, + { + "epoch": 2.69, + "learning_rate": 1.1130845077046071e-05, + "loss": 0.1853, + "step": 57605 + }, + { + "epoch": 2.69, + "learning_rate": 1.1130061291991285e-05, + "loss": 0.4984, + "step": 57610 + }, + { + "epoch": 2.69, + "learning_rate": 1.1129277506936497e-05, + "loss": 0.306, + "step": 57615 + }, + { + "epoch": 2.69, + "learning_rate": 1.1128493721881713e-05, + "loss": 0.0546, + "step": 57620 + }, + { + "epoch": 2.69, + "learning_rate": 1.1127709936826925e-05, + "loss": 0.0659, + "step": 57625 + }, + { + "epoch": 2.69, + "learning_rate": 1.1126926151772139e-05, + "loss": 0.043, + "step": 57630 + }, + { + "epoch": 2.69, + "learning_rate": 1.1126142366717351e-05, + "loss": 0.0508, + "step": 57635 + }, + { + "epoch": 2.69, + "learning_rate": 1.1125358581662567e-05, + "loss": 0.0316, + "step": 57640 + }, + { + "epoch": 2.69, + "learning_rate": 1.1124574796607779e-05, + "loss": 0.0663, + "step": 57645 + }, + { + "epoch": 2.69, + "learning_rate": 1.1123791011552993e-05, + "loss": 0.098, + "step": 57650 + }, + { + "epoch": 2.69, + "learning_rate": 1.1123007226498205e-05, + "loss": 0.1989, + "step": 57655 + }, + { + "epoch": 2.69, + "learning_rate": 1.112222344144342e-05, + "loss": 0.25, + "step": 57660 + }, + { + "epoch": 2.69, + "learning_rate": 1.1121439656388633e-05, + "loss": 0.3534, + "step": 57665 + }, + { + "epoch": 2.69, + "learning_rate": 1.1120655871333845e-05, + "loss": 0.018, + "step": 57670 + }, + { + "epoch": 2.69, + "learning_rate": 1.111987208627906e-05, + "loss": 0.0352, + "step": 57675 + }, + { + "epoch": 2.69, + "learning_rate": 1.1119088301224273e-05, + "loss": 0.0879, + "step": 57680 + }, + { + "epoch": 2.69, + "learning_rate": 1.1118304516169487e-05, + "loss": 0.1559, + "step": 57685 + }, + { + "epoch": 2.69, + "learning_rate": 1.1117520731114699e-05, + "loss": 0.1489, + "step": 57690 + }, + { + "epoch": 2.69, + "learning_rate": 1.1116736946059915e-05, + "loss": 0.0735, + "step": 57695 + }, + { + "epoch": 2.69, + "learning_rate": 1.1115953161005127e-05, + "loss": 0.1344, + "step": 57700 + }, + { + "epoch": 2.69, + "learning_rate": 1.111516937595034e-05, + "loss": 0.1133, + "step": 57705 + }, + { + "epoch": 2.69, + "learning_rate": 1.1114385590895553e-05, + "loss": 0.1911, + "step": 57710 + }, + { + "epoch": 2.69, + "learning_rate": 1.1113601805840769e-05, + "loss": 0.3633, + "step": 57715 + }, + { + "epoch": 2.69, + "learning_rate": 1.111281802078598e-05, + "loss": 0.036, + "step": 57720 + }, + { + "epoch": 2.69, + "learning_rate": 1.1112034235731195e-05, + "loss": 0.0589, + "step": 57725 + }, + { + "epoch": 2.69, + "learning_rate": 1.1111250450676407e-05, + "loss": 0.0351, + "step": 57730 + }, + { + "epoch": 2.69, + "learning_rate": 1.1110466665621619e-05, + "loss": 0.0914, + "step": 57735 + }, + { + "epoch": 2.69, + "learning_rate": 1.1109682880566835e-05, + "loss": 0.1147, + "step": 57740 + }, + { + "epoch": 2.69, + "learning_rate": 1.1108899095512047e-05, + "loss": 0.0828, + "step": 57745 + }, + { + "epoch": 2.69, + "learning_rate": 1.110811531045726e-05, + "loss": 0.1332, + "step": 57750 + }, + { + "epoch": 2.69, + "learning_rate": 1.1107331525402473e-05, + "loss": 0.2566, + "step": 57755 + }, + { + "epoch": 2.7, + "learning_rate": 1.1106547740347689e-05, + "loss": 0.2272, + "step": 57760 + }, + { + "epoch": 2.7, + "learning_rate": 1.1105763955292901e-05, + "loss": 0.2749, + "step": 57765 + }, + { + "epoch": 2.7, + "learning_rate": 1.1104980170238115e-05, + "loss": 0.0733, + "step": 57770 + }, + { + "epoch": 2.7, + "learning_rate": 1.1104196385183329e-05, + "loss": 0.0124, + "step": 57775 + }, + { + "epoch": 2.7, + "learning_rate": 1.1103412600128543e-05, + "loss": 0.0851, + "step": 57780 + }, + { + "epoch": 2.7, + "learning_rate": 1.1102628815073755e-05, + "loss": 0.0767, + "step": 57785 + }, + { + "epoch": 2.7, + "learning_rate": 1.1101845030018969e-05, + "loss": 0.0738, + "step": 57790 + }, + { + "epoch": 2.7, + "learning_rate": 1.1101061244964183e-05, + "loss": 0.0922, + "step": 57795 + }, + { + "epoch": 2.7, + "learning_rate": 1.1100277459909395e-05, + "loss": 0.121, + "step": 57800 + }, + { + "epoch": 2.7, + "learning_rate": 1.1099493674854609e-05, + "loss": 0.2056, + "step": 57805 + }, + { + "epoch": 2.7, + "learning_rate": 1.1098709889799821e-05, + "loss": 0.1976, + "step": 57810 + }, + { + "epoch": 2.7, + "learning_rate": 1.1097926104745037e-05, + "loss": 0.3544, + "step": 57815 + }, + { + "epoch": 2.7, + "learning_rate": 1.1097142319690249e-05, + "loss": 0.0921, + "step": 57820 + }, + { + "epoch": 2.7, + "learning_rate": 1.1096358534635463e-05, + "loss": 0.0193, + "step": 57825 + }, + { + "epoch": 2.7, + "learning_rate": 1.1095574749580675e-05, + "loss": 0.0806, + "step": 57830 + }, + { + "epoch": 2.7, + "learning_rate": 1.109479096452589e-05, + "loss": 0.0968, + "step": 57835 + }, + { + "epoch": 2.7, + "learning_rate": 1.1094007179471103e-05, + "loss": 0.0627, + "step": 57840 + }, + { + "epoch": 2.7, + "learning_rate": 1.1093223394416317e-05, + "loss": 0.1318, + "step": 57845 + }, + { + "epoch": 2.7, + "learning_rate": 1.1092439609361529e-05, + "loss": 0.1039, + "step": 57850 + }, + { + "epoch": 2.7, + "learning_rate": 1.1091655824306744e-05, + "loss": 0.1956, + "step": 57855 + }, + { + "epoch": 2.7, + "learning_rate": 1.1090872039251957e-05, + "loss": 0.1967, + "step": 57860 + }, + { + "epoch": 2.7, + "learning_rate": 1.1090088254197169e-05, + "loss": 0.4312, + "step": 57865 + }, + { + "epoch": 2.7, + "learning_rate": 1.1089304469142383e-05, + "loss": 0.054, + "step": 57870 + }, + { + "epoch": 2.7, + "learning_rate": 1.1088520684087597e-05, + "loss": 0.0584, + "step": 57875 + }, + { + "epoch": 2.7, + "learning_rate": 1.108773689903281e-05, + "loss": 0.0745, + "step": 57880 + }, + { + "epoch": 2.7, + "learning_rate": 1.1086953113978023e-05, + "loss": 0.0903, + "step": 57885 + }, + { + "epoch": 2.7, + "learning_rate": 1.1086169328923237e-05, + "loss": 0.0683, + "step": 57890 + }, + { + "epoch": 2.7, + "learning_rate": 1.108538554386845e-05, + "loss": 0.1025, + "step": 57895 + }, + { + "epoch": 2.7, + "learning_rate": 1.1084601758813664e-05, + "loss": 0.129, + "step": 57900 + }, + { + "epoch": 2.7, + "learning_rate": 1.1083817973758877e-05, + "loss": 0.0918, + "step": 57905 + }, + { + "epoch": 2.7, + "learning_rate": 1.1083034188704092e-05, + "loss": 0.1841, + "step": 57910 + }, + { + "epoch": 2.7, + "learning_rate": 1.1082250403649304e-05, + "loss": 0.237, + "step": 57915 + }, + { + "epoch": 2.7, + "learning_rate": 1.1081466618594518e-05, + "loss": 0.1046, + "step": 57920 + }, + { + "epoch": 2.7, + "learning_rate": 1.108068283353973e-05, + "loss": 0.0991, + "step": 57925 + }, + { + "epoch": 2.7, + "learning_rate": 1.1079899048484943e-05, + "loss": 0.0322, + "step": 57930 + }, + { + "epoch": 2.7, + "learning_rate": 1.1079115263430158e-05, + "loss": 0.0464, + "step": 57935 + }, + { + "epoch": 2.7, + "learning_rate": 1.107833147837537e-05, + "loss": 0.0611, + "step": 57940 + }, + { + "epoch": 2.7, + "learning_rate": 1.1077547693320585e-05, + "loss": 0.1024, + "step": 57945 + }, + { + "epoch": 2.7, + "learning_rate": 1.1076763908265797e-05, + "loss": 0.2626, + "step": 57950 + }, + { + "epoch": 2.7, + "learning_rate": 1.1075980123211012e-05, + "loss": 0.1249, + "step": 57955 + }, + { + "epoch": 2.7, + "learning_rate": 1.1075196338156225e-05, + "loss": 0.2525, + "step": 57960 + }, + { + "epoch": 2.7, + "learning_rate": 1.1074412553101438e-05, + "loss": 0.2988, + "step": 57965 + }, + { + "epoch": 2.7, + "learning_rate": 1.107362876804665e-05, + "loss": 0.0208, + "step": 57970 + }, + { + "epoch": 2.71, + "learning_rate": 1.1072844982991866e-05, + "loss": 0.0614, + "step": 57975 + }, + { + "epoch": 2.71, + "learning_rate": 1.1072061197937078e-05, + "loss": 0.056, + "step": 57980 + }, + { + "epoch": 2.71, + "learning_rate": 1.1071277412882292e-05, + "loss": 0.0867, + "step": 57985 + }, + { + "epoch": 2.71, + "learning_rate": 1.1070493627827506e-05, + "loss": 0.0771, + "step": 57990 + }, + { + "epoch": 2.71, + "learning_rate": 1.1069709842772719e-05, + "loss": 0.1148, + "step": 57995 + }, + { + "epoch": 2.71, + "learning_rate": 1.1068926057717932e-05, + "loss": 0.1713, + "step": 58000 + }, + { + "epoch": 2.71, + "learning_rate": 1.1068142272663145e-05, + "loss": 0.1785, + "step": 58005 + }, + { + "epoch": 2.71, + "learning_rate": 1.106735848760836e-05, + "loss": 0.1555, + "step": 58010 + }, + { + "epoch": 2.71, + "learning_rate": 1.1066574702553572e-05, + "loss": 0.3208, + "step": 58015 + }, + { + "epoch": 2.71, + "learning_rate": 1.1065790917498786e-05, + "loss": 0.0433, + "step": 58020 + }, + { + "epoch": 2.71, + "learning_rate": 1.1065007132443999e-05, + "loss": 0.0256, + "step": 58025 + }, + { + "epoch": 2.71, + "learning_rate": 1.1064223347389214e-05, + "loss": 0.09, + "step": 58030 + }, + { + "epoch": 2.71, + "learning_rate": 1.1063439562334426e-05, + "loss": 0.0716, + "step": 58035 + }, + { + "epoch": 2.71, + "learning_rate": 1.106265577727964e-05, + "loss": 0.0298, + "step": 58040 + }, + { + "epoch": 2.71, + "learning_rate": 1.1061871992224852e-05, + "loss": 0.0745, + "step": 58045 + }, + { + "epoch": 2.71, + "learning_rate": 1.1061088207170068e-05, + "loss": 0.1625, + "step": 58050 + }, + { + "epoch": 2.71, + "learning_rate": 1.106030442211528e-05, + "loss": 0.1818, + "step": 58055 + }, + { + "epoch": 2.71, + "learning_rate": 1.1059520637060493e-05, + "loss": 0.2606, + "step": 58060 + }, + { + "epoch": 2.71, + "learning_rate": 1.1058736852005706e-05, + "loss": 0.2315, + "step": 58065 + }, + { + "epoch": 2.71, + "learning_rate": 1.1057953066950919e-05, + "loss": 0.0346, + "step": 58070 + }, + { + "epoch": 2.71, + "learning_rate": 1.1057169281896134e-05, + "loss": 0.0731, + "step": 58075 + }, + { + "epoch": 2.71, + "learning_rate": 1.1056385496841346e-05, + "loss": 0.0567, + "step": 58080 + }, + { + "epoch": 2.71, + "learning_rate": 1.105560171178656e-05, + "loss": 0.0793, + "step": 58085 + }, + { + "epoch": 2.71, + "learning_rate": 1.1054817926731774e-05, + "loss": 0.0618, + "step": 58090 + }, + { + "epoch": 2.71, + "learning_rate": 1.1054034141676988e-05, + "loss": 0.1303, + "step": 58095 + }, + { + "epoch": 2.71, + "learning_rate": 1.10532503566222e-05, + "loss": 0.1785, + "step": 58100 + }, + { + "epoch": 2.71, + "learning_rate": 1.1052466571567414e-05, + "loss": 0.197, + "step": 58105 + }, + { + "epoch": 2.71, + "learning_rate": 1.1051682786512628e-05, + "loss": 0.3032, + "step": 58110 + }, + { + "epoch": 2.71, + "learning_rate": 1.1050899001457842e-05, + "loss": 0.3066, + "step": 58115 + }, + { + "epoch": 2.71, + "learning_rate": 1.1050115216403054e-05, + "loss": 0.0911, + "step": 58120 + }, + { + "epoch": 2.71, + "learning_rate": 1.1049331431348267e-05, + "loss": 0.0323, + "step": 58125 + }, + { + "epoch": 2.71, + "learning_rate": 1.1048547646293482e-05, + "loss": 0.0549, + "step": 58130 + }, + { + "epoch": 2.71, + "learning_rate": 1.1047763861238694e-05, + "loss": 0.1821, + "step": 58135 + }, + { + "epoch": 2.71, + "learning_rate": 1.1046980076183908e-05, + "loss": 0.0566, + "step": 58140 + }, + { + "epoch": 2.71, + "learning_rate": 1.104619629112912e-05, + "loss": 0.0821, + "step": 58145 + }, + { + "epoch": 2.71, + "learning_rate": 1.1045412506074336e-05, + "loss": 0.1769, + "step": 58150 + }, + { + "epoch": 2.71, + "learning_rate": 1.1044628721019548e-05, + "loss": 0.1196, + "step": 58155 + }, + { + "epoch": 2.71, + "learning_rate": 1.1043844935964762e-05, + "loss": 0.1464, + "step": 58160 + }, + { + "epoch": 2.71, + "learning_rate": 1.1043061150909974e-05, + "loss": 0.2286, + "step": 58165 + }, + { + "epoch": 2.71, + "learning_rate": 1.104227736585519e-05, + "loss": 0.0928, + "step": 58170 + }, + { + "epoch": 2.71, + "learning_rate": 1.1041493580800402e-05, + "loss": 0.0066, + "step": 58175 + }, + { + "epoch": 2.71, + "learning_rate": 1.1040709795745616e-05, + "loss": 0.0367, + "step": 58180 + }, + { + "epoch": 2.71, + "learning_rate": 1.1039926010690828e-05, + "loss": 0.0453, + "step": 58185 + }, + { + "epoch": 2.72, + "learning_rate": 1.1039142225636042e-05, + "loss": 0.0471, + "step": 58190 + }, + { + "epoch": 2.72, + "learning_rate": 1.1038358440581256e-05, + "loss": 0.1814, + "step": 58195 + }, + { + "epoch": 2.72, + "learning_rate": 1.1037574655526468e-05, + "loss": 0.1526, + "step": 58200 + }, + { + "epoch": 2.72, + "learning_rate": 1.1036790870471684e-05, + "loss": 0.2725, + "step": 58205 + }, + { + "epoch": 2.72, + "learning_rate": 1.1036007085416896e-05, + "loss": 0.1844, + "step": 58210 + }, + { + "epoch": 2.72, + "learning_rate": 1.103522330036211e-05, + "loss": 0.1978, + "step": 58215 + }, + { + "epoch": 2.72, + "learning_rate": 1.1034439515307322e-05, + "loss": 0.1005, + "step": 58220 + }, + { + "epoch": 2.72, + "learning_rate": 1.1033655730252538e-05, + "loss": 0.0466, + "step": 58225 + }, + { + "epoch": 2.72, + "learning_rate": 1.103287194519775e-05, + "loss": 0.0683, + "step": 58230 + }, + { + "epoch": 2.72, + "learning_rate": 1.1032088160142964e-05, + "loss": 0.0518, + "step": 58235 + }, + { + "epoch": 2.72, + "learning_rate": 1.1031304375088176e-05, + "loss": 0.0813, + "step": 58240 + }, + { + "epoch": 2.72, + "learning_rate": 1.1030520590033392e-05, + "loss": 0.0502, + "step": 58245 + }, + { + "epoch": 2.72, + "learning_rate": 1.1029736804978604e-05, + "loss": 0.1395, + "step": 58250 + }, + { + "epoch": 2.72, + "learning_rate": 1.1028953019923816e-05, + "loss": 0.0872, + "step": 58255 + }, + { + "epoch": 2.72, + "learning_rate": 1.102816923486903e-05, + "loss": 0.3588, + "step": 58260 + }, + { + "epoch": 2.72, + "learning_rate": 1.1027385449814242e-05, + "loss": 0.2024, + "step": 58265 + }, + { + "epoch": 2.72, + "learning_rate": 1.1026601664759458e-05, + "loss": 0.1045, + "step": 58270 + }, + { + "epoch": 2.72, + "learning_rate": 1.102581787970467e-05, + "loss": 0.0733, + "step": 58275 + }, + { + "epoch": 2.72, + "learning_rate": 1.1025034094649884e-05, + "loss": 0.0158, + "step": 58280 + }, + { + "epoch": 2.72, + "learning_rate": 1.1024250309595096e-05, + "loss": 0.0791, + "step": 58285 + }, + { + "epoch": 2.72, + "learning_rate": 1.1023466524540312e-05, + "loss": 0.0812, + "step": 58290 + }, + { + "epoch": 2.72, + "learning_rate": 1.1022682739485524e-05, + "loss": 0.0696, + "step": 58295 + }, + { + "epoch": 2.72, + "learning_rate": 1.1021898954430738e-05, + "loss": 0.1509, + "step": 58300 + }, + { + "epoch": 2.72, + "learning_rate": 1.1021115169375952e-05, + "loss": 0.1556, + "step": 58305 + }, + { + "epoch": 2.72, + "learning_rate": 1.1020331384321166e-05, + "loss": 0.2344, + "step": 58310 + }, + { + "epoch": 2.72, + "learning_rate": 1.1019547599266378e-05, + "loss": 0.3865, + "step": 58315 + }, + { + "epoch": 2.72, + "learning_rate": 1.101876381421159e-05, + "loss": 0.0351, + "step": 58320 + }, + { + "epoch": 2.72, + "learning_rate": 1.1017980029156806e-05, + "loss": 0.0247, + "step": 58325 + }, + { + "epoch": 2.72, + "learning_rate": 1.1017196244102018e-05, + "loss": 0.034, + "step": 58330 + }, + { + "epoch": 2.72, + "learning_rate": 1.1016412459047232e-05, + "loss": 0.0443, + "step": 58335 + }, + { + "epoch": 2.72, + "learning_rate": 1.1015628673992444e-05, + "loss": 0.0533, + "step": 58340 + }, + { + "epoch": 2.72, + "learning_rate": 1.101484488893766e-05, + "loss": 0.113, + "step": 58345 + }, + { + "epoch": 2.72, + "learning_rate": 1.1014061103882872e-05, + "loss": 0.1294, + "step": 58350 + }, + { + "epoch": 2.72, + "learning_rate": 1.1013277318828086e-05, + "loss": 0.2372, + "step": 58355 + }, + { + "epoch": 2.72, + "learning_rate": 1.1012493533773298e-05, + "loss": 0.1815, + "step": 58360 + }, + { + "epoch": 2.72, + "learning_rate": 1.1011709748718514e-05, + "loss": 0.4188, + "step": 58365 + }, + { + "epoch": 2.72, + "learning_rate": 1.1010925963663726e-05, + "loss": 0.0725, + "step": 58370 + }, + { + "epoch": 2.72, + "learning_rate": 1.101014217860894e-05, + "loss": 0.0558, + "step": 58375 + }, + { + "epoch": 2.72, + "learning_rate": 1.1009358393554152e-05, + "loss": 0.0533, + "step": 58380 + }, + { + "epoch": 2.72, + "learning_rate": 1.1008574608499364e-05, + "loss": 0.0649, + "step": 58385 + }, + { + "epoch": 2.72, + "learning_rate": 1.100779082344458e-05, + "loss": 0.0457, + "step": 58390 + }, + { + "epoch": 2.72, + "learning_rate": 1.1007007038389792e-05, + "loss": 0.063, + "step": 58395 + }, + { + "epoch": 2.73, + "learning_rate": 1.1006223253335006e-05, + "loss": 0.2506, + "step": 58400 + }, + { + "epoch": 2.73, + "learning_rate": 1.100543946828022e-05, + "loss": 0.1326, + "step": 58405 + }, + { + "epoch": 2.73, + "learning_rate": 1.1004655683225434e-05, + "loss": 0.2099, + "step": 58410 + }, + { + "epoch": 2.73, + "learning_rate": 1.1003871898170646e-05, + "loss": 0.2704, + "step": 58415 + }, + { + "epoch": 2.73, + "learning_rate": 1.100308811311586e-05, + "loss": 0.0142, + "step": 58420 + }, + { + "epoch": 2.73, + "learning_rate": 1.1002304328061074e-05, + "loss": 0.0844, + "step": 58425 + }, + { + "epoch": 2.73, + "learning_rate": 1.1001520543006288e-05, + "loss": 0.0416, + "step": 58430 + }, + { + "epoch": 2.73, + "learning_rate": 1.10007367579515e-05, + "loss": 0.0186, + "step": 58435 + }, + { + "epoch": 2.73, + "learning_rate": 1.0999952972896715e-05, + "loss": 0.0636, + "step": 58440 + }, + { + "epoch": 2.73, + "learning_rate": 1.0999169187841928e-05, + "loss": 0.1073, + "step": 58445 + }, + { + "epoch": 2.73, + "learning_rate": 1.099838540278714e-05, + "loss": 0.1895, + "step": 58450 + }, + { + "epoch": 2.73, + "learning_rate": 1.0997601617732354e-05, + "loss": 0.157, + "step": 58455 + }, + { + "epoch": 2.73, + "learning_rate": 1.0996817832677566e-05, + "loss": 0.2624, + "step": 58460 + }, + { + "epoch": 2.73, + "learning_rate": 1.0996034047622782e-05, + "loss": 0.2066, + "step": 58465 + }, + { + "epoch": 2.73, + "learning_rate": 1.0995250262567994e-05, + "loss": 0.0556, + "step": 58470 + }, + { + "epoch": 2.73, + "learning_rate": 1.0994466477513208e-05, + "loss": 0.0346, + "step": 58475 + }, + { + "epoch": 2.73, + "learning_rate": 1.099368269245842e-05, + "loss": 0.0719, + "step": 58480 + }, + { + "epoch": 2.73, + "learning_rate": 1.0992898907403636e-05, + "loss": 0.064, + "step": 58485 + }, + { + "epoch": 2.73, + "learning_rate": 1.0992115122348848e-05, + "loss": 0.0563, + "step": 58490 + }, + { + "epoch": 2.73, + "learning_rate": 1.0991331337294062e-05, + "loss": 0.1556, + "step": 58495 + }, + { + "epoch": 2.73, + "learning_rate": 1.0990547552239274e-05, + "loss": 0.1548, + "step": 58500 + }, + { + "epoch": 2.73, + "learning_rate": 1.098976376718449e-05, + "loss": 0.1101, + "step": 58505 + }, + { + "epoch": 2.73, + "learning_rate": 1.0988979982129702e-05, + "loss": 0.3026, + "step": 58510 + }, + { + "epoch": 2.73, + "learning_rate": 1.0988196197074914e-05, + "loss": 0.3281, + "step": 58515 + }, + { + "epoch": 2.73, + "learning_rate": 1.098741241202013e-05, + "loss": 0.1004, + "step": 58520 + }, + { + "epoch": 2.73, + "learning_rate": 1.0986628626965342e-05, + "loss": 0.0659, + "step": 58525 + }, + { + "epoch": 2.73, + "learning_rate": 1.0985844841910556e-05, + "loss": 0.0501, + "step": 58530 + }, + { + "epoch": 2.73, + "learning_rate": 1.0985061056855768e-05, + "loss": 0.0952, + "step": 58535 + }, + { + "epoch": 2.73, + "learning_rate": 1.0984277271800983e-05, + "loss": 0.1421, + "step": 58540 + }, + { + "epoch": 2.73, + "learning_rate": 1.0983493486746196e-05, + "loss": 0.0672, + "step": 58545 + }, + { + "epoch": 2.73, + "learning_rate": 1.098270970169141e-05, + "loss": 0.1645, + "step": 58550 + }, + { + "epoch": 2.73, + "learning_rate": 1.0981925916636622e-05, + "loss": 0.2002, + "step": 58555 + }, + { + "epoch": 2.73, + "learning_rate": 1.0981142131581837e-05, + "loss": 0.2722, + "step": 58560 + }, + { + "epoch": 2.73, + "learning_rate": 1.098035834652705e-05, + "loss": 0.3582, + "step": 58565 + }, + { + "epoch": 2.73, + "learning_rate": 1.0979574561472263e-05, + "loss": 0.0799, + "step": 58570 + }, + { + "epoch": 2.73, + "learning_rate": 1.0978790776417476e-05, + "loss": 0.0616, + "step": 58575 + }, + { + "epoch": 2.73, + "learning_rate": 1.0978006991362688e-05, + "loss": 0.0691, + "step": 58580 + }, + { + "epoch": 2.73, + "learning_rate": 1.0977223206307903e-05, + "loss": 0.0437, + "step": 58585 + }, + { + "epoch": 2.73, + "learning_rate": 1.0976439421253116e-05, + "loss": 0.0749, + "step": 58590 + }, + { + "epoch": 2.73, + "learning_rate": 1.097565563619833e-05, + "loss": 0.0765, + "step": 58595 + }, + { + "epoch": 2.73, + "learning_rate": 1.0975028608154502e-05, + "loss": 0.0997, + "step": 58600 + }, + { + "epoch": 2.73, + "learning_rate": 1.0974244823099714e-05, + "loss": 0.1803, + "step": 58605 + }, + { + "epoch": 2.73, + "learning_rate": 1.0973461038044928e-05, + "loss": 0.0981, + "step": 58610 + }, + { + "epoch": 2.74, + "learning_rate": 1.097267725299014e-05, + "loss": 0.3792, + "step": 58615 + }, + { + "epoch": 2.74, + "learning_rate": 1.0971893467935356e-05, + "loss": 0.0301, + "step": 58620 + }, + { + "epoch": 2.74, + "learning_rate": 1.0971109682880568e-05, + "loss": 0.0599, + "step": 58625 + }, + { + "epoch": 2.74, + "learning_rate": 1.0970325897825782e-05, + "loss": 0.0407, + "step": 58630 + }, + { + "epoch": 2.74, + "learning_rate": 1.0969542112770994e-05, + "loss": 0.0282, + "step": 58635 + }, + { + "epoch": 2.74, + "learning_rate": 1.096875832771621e-05, + "loss": 0.1213, + "step": 58640 + }, + { + "epoch": 2.74, + "learning_rate": 1.0967974542661422e-05, + "loss": 0.0466, + "step": 58645 + }, + { + "epoch": 2.74, + "learning_rate": 1.0967190757606634e-05, + "loss": 0.1724, + "step": 58650 + }, + { + "epoch": 2.74, + "learning_rate": 1.0966406972551848e-05, + "loss": 0.127, + "step": 58655 + }, + { + "epoch": 2.74, + "learning_rate": 1.096562318749706e-05, + "loss": 0.1452, + "step": 58660 + }, + { + "epoch": 2.74, + "learning_rate": 1.0964839402442276e-05, + "loss": 0.1929, + "step": 58665 + }, + { + "epoch": 2.74, + "learning_rate": 1.0964055617387488e-05, + "loss": 0.0849, + "step": 58670 + }, + { + "epoch": 2.74, + "learning_rate": 1.0963271832332702e-05, + "loss": 0.0543, + "step": 58675 + }, + { + "epoch": 2.74, + "learning_rate": 1.0962488047277916e-05, + "loss": 0.0736, + "step": 58680 + }, + { + "epoch": 2.74, + "learning_rate": 1.096170426222313e-05, + "loss": 0.0518, + "step": 58685 + }, + { + "epoch": 2.74, + "learning_rate": 1.0960920477168342e-05, + "loss": 0.0852, + "step": 58690 + }, + { + "epoch": 2.74, + "learning_rate": 1.0960136692113556e-05, + "loss": 0.1975, + "step": 58695 + }, + { + "epoch": 2.74, + "learning_rate": 1.095935290705877e-05, + "loss": 0.1414, + "step": 58700 + }, + { + "epoch": 2.74, + "learning_rate": 1.0958569122003984e-05, + "loss": 0.2228, + "step": 58705 + }, + { + "epoch": 2.74, + "learning_rate": 1.0957785336949196e-05, + "loss": 0.1912, + "step": 58710 + }, + { + "epoch": 2.74, + "learning_rate": 1.0957001551894408e-05, + "loss": 0.2865, + "step": 58715 + }, + { + "epoch": 2.74, + "learning_rate": 1.0956217766839624e-05, + "loss": 0.0588, + "step": 58720 + }, + { + "epoch": 2.74, + "learning_rate": 1.0955433981784836e-05, + "loss": 0.0375, + "step": 58725 + }, + { + "epoch": 2.74, + "learning_rate": 1.095465019673005e-05, + "loss": 0.1023, + "step": 58730 + }, + { + "epoch": 2.74, + "learning_rate": 1.0953866411675262e-05, + "loss": 0.0744, + "step": 58735 + }, + { + "epoch": 2.74, + "learning_rate": 1.0953082626620478e-05, + "loss": 0.114, + "step": 58740 + }, + { + "epoch": 2.74, + "learning_rate": 1.095229884156569e-05, + "loss": 0.1044, + "step": 58745 + }, + { + "epoch": 2.74, + "learning_rate": 1.0951515056510904e-05, + "loss": 0.1161, + "step": 58750 + }, + { + "epoch": 2.74, + "learning_rate": 1.0950731271456116e-05, + "loss": 0.1975, + "step": 58755 + }, + { + "epoch": 2.74, + "learning_rate": 1.0949947486401331e-05, + "loss": 0.2124, + "step": 58760 + }, + { + "epoch": 2.74, + "learning_rate": 1.0949163701346544e-05, + "loss": 0.4417, + "step": 58765 + }, + { + "epoch": 2.74, + "learning_rate": 1.0948379916291758e-05, + "loss": 0.0748, + "step": 58770 + }, + { + "epoch": 2.74, + "learning_rate": 1.094759613123697e-05, + "loss": 0.0129, + "step": 58775 + }, + { + "epoch": 2.74, + "learning_rate": 1.0946812346182184e-05, + "loss": 0.0311, + "step": 58780 + }, + { + "epoch": 2.74, + "learning_rate": 1.0946028561127398e-05, + "loss": 0.1198, + "step": 58785 + }, + { + "epoch": 2.74, + "learning_rate": 1.094524477607261e-05, + "loss": 0.0663, + "step": 58790 + }, + { + "epoch": 2.74, + "learning_rate": 1.0944460991017824e-05, + "loss": 0.0381, + "step": 58795 + }, + { + "epoch": 2.74, + "learning_rate": 1.0943677205963038e-05, + "loss": 0.1238, + "step": 58800 + }, + { + "epoch": 2.74, + "learning_rate": 1.0942893420908252e-05, + "loss": 0.213, + "step": 58805 + }, + { + "epoch": 2.74, + "learning_rate": 1.0942109635853464e-05, + "loss": 0.1942, + "step": 58810 + }, + { + "epoch": 2.74, + "learning_rate": 1.094132585079868e-05, + "loss": 0.349, + "step": 58815 + }, + { + "epoch": 2.74, + "learning_rate": 1.0940542065743892e-05, + "loss": 0.0369, + "step": 58820 + }, + { + "epoch": 2.74, + "learning_rate": 1.0939758280689105e-05, + "loss": 0.0544, + "step": 58825 + }, + { + "epoch": 2.75, + "learning_rate": 1.0938974495634318e-05, + "loss": 0.0355, + "step": 58830 + }, + { + "epoch": 2.75, + "learning_rate": 1.0938190710579533e-05, + "loss": 0.0572, + "step": 58835 + }, + { + "epoch": 2.75, + "learning_rate": 1.0937406925524746e-05, + "loss": 0.1182, + "step": 58840 + }, + { + "epoch": 2.75, + "learning_rate": 1.0936623140469958e-05, + "loss": 0.1305, + "step": 58845 + }, + { + "epoch": 2.75, + "learning_rate": 1.0935839355415172e-05, + "loss": 0.1772, + "step": 58850 + }, + { + "epoch": 2.75, + "learning_rate": 1.0935055570360384e-05, + "loss": 0.1283, + "step": 58855 + }, + { + "epoch": 2.75, + "learning_rate": 1.09342717853056e-05, + "loss": 0.1314, + "step": 58860 + }, + { + "epoch": 2.75, + "learning_rate": 1.0933488000250812e-05, + "loss": 0.2568, + "step": 58865 + }, + { + "epoch": 2.75, + "learning_rate": 1.0932704215196026e-05, + "loss": 0.0676, + "step": 58870 + }, + { + "epoch": 2.75, + "learning_rate": 1.0931920430141238e-05, + "loss": 0.0313, + "step": 58875 + }, + { + "epoch": 2.75, + "learning_rate": 1.0931136645086453e-05, + "loss": 0.0603, + "step": 58880 + }, + { + "epoch": 2.75, + "learning_rate": 1.0930352860031666e-05, + "loss": 0.0634, + "step": 58885 + }, + { + "epoch": 2.75, + "learning_rate": 1.092956907497688e-05, + "loss": 0.0677, + "step": 58890 + }, + { + "epoch": 2.75, + "learning_rate": 1.0928785289922092e-05, + "loss": 0.0863, + "step": 58895 + }, + { + "epoch": 2.75, + "learning_rate": 1.0928001504867307e-05, + "loss": 0.063, + "step": 58900 + }, + { + "epoch": 2.75, + "learning_rate": 1.092721771981252e-05, + "loss": 0.17, + "step": 58905 + }, + { + "epoch": 2.75, + "learning_rate": 1.0926433934757732e-05, + "loss": 0.1749, + "step": 58910 + }, + { + "epoch": 2.75, + "learning_rate": 1.0925650149702947e-05, + "loss": 0.2382, + "step": 58915 + }, + { + "epoch": 2.75, + "learning_rate": 1.092486636464816e-05, + "loss": 0.0386, + "step": 58920 + }, + { + "epoch": 2.75, + "learning_rate": 1.0924082579593373e-05, + "loss": 0.0822, + "step": 58925 + }, + { + "epoch": 2.75, + "learning_rate": 1.0923298794538586e-05, + "loss": 0.0393, + "step": 58930 + }, + { + "epoch": 2.75, + "learning_rate": 1.0922515009483801e-05, + "loss": 0.0475, + "step": 58935 + }, + { + "epoch": 2.75, + "learning_rate": 1.0921731224429013e-05, + "loss": 0.0143, + "step": 58940 + }, + { + "epoch": 2.75, + "learning_rate": 1.0920947439374227e-05, + "loss": 0.1933, + "step": 58945 + }, + { + "epoch": 2.75, + "learning_rate": 1.092016365431944e-05, + "loss": 0.1644, + "step": 58950 + }, + { + "epoch": 2.75, + "learning_rate": 1.0919379869264655e-05, + "loss": 0.1502, + "step": 58955 + }, + { + "epoch": 2.75, + "learning_rate": 1.0918596084209867e-05, + "loss": 0.1974, + "step": 58960 + }, + { + "epoch": 2.75, + "learning_rate": 1.0917812299155081e-05, + "loss": 0.2703, + "step": 58965 + }, + { + "epoch": 2.75, + "learning_rate": 1.0917028514100293e-05, + "loss": 0.0824, + "step": 58970 + }, + { + "epoch": 2.75, + "learning_rate": 1.0916244729045506e-05, + "loss": 0.0104, + "step": 58975 + }, + { + "epoch": 2.75, + "learning_rate": 1.0915460943990721e-05, + "loss": 0.0975, + "step": 58980 + }, + { + "epoch": 2.75, + "learning_rate": 1.0914677158935934e-05, + "loss": 0.0575, + "step": 58985 + }, + { + "epoch": 2.75, + "learning_rate": 1.0913893373881147e-05, + "loss": 0.059, + "step": 58990 + }, + { + "epoch": 2.75, + "learning_rate": 1.0913109588826361e-05, + "loss": 0.1536, + "step": 58995 + }, + { + "epoch": 2.75, + "learning_rate": 1.0912325803771575e-05, + "loss": 0.1253, + "step": 59000 + }, + { + "epoch": 2.75, + "learning_rate": 1.0911542018716787e-05, + "loss": 0.198, + "step": 59005 + }, + { + "epoch": 2.75, + "learning_rate": 1.0910758233662001e-05, + "loss": 0.2212, + "step": 59010 + }, + { + "epoch": 2.75, + "learning_rate": 1.0909974448607215e-05, + "loss": 0.3376, + "step": 59015 + }, + { + "epoch": 2.75, + "learning_rate": 1.090919066355243e-05, + "loss": 0.0407, + "step": 59020 + }, + { + "epoch": 2.75, + "learning_rate": 1.0908406878497641e-05, + "loss": 0.0796, + "step": 59025 + }, + { + "epoch": 2.75, + "learning_rate": 1.0907623093442857e-05, + "loss": 0.0662, + "step": 59030 + }, + { + "epoch": 2.75, + "learning_rate": 1.090683930838807e-05, + "loss": 0.0887, + "step": 59035 + }, + { + "epoch": 2.75, + "learning_rate": 1.0906055523333281e-05, + "loss": 0.0557, + "step": 59040 + }, + { + "epoch": 2.76, + "learning_rate": 1.0905271738278495e-05, + "loss": 0.1039, + "step": 59045 + }, + { + "epoch": 2.76, + "learning_rate": 1.0904487953223708e-05, + "loss": 0.1408, + "step": 59050 + }, + { + "epoch": 2.76, + "learning_rate": 1.0903704168168923e-05, + "loss": 0.1745, + "step": 59055 + }, + { + "epoch": 2.76, + "learning_rate": 1.0902920383114135e-05, + "loss": 0.2041, + "step": 59060 + }, + { + "epoch": 2.76, + "learning_rate": 1.090213659805935e-05, + "loss": 0.3081, + "step": 59065 + }, + { + "epoch": 2.76, + "learning_rate": 1.0901352813004561e-05, + "loss": 0.0728, + "step": 59070 + }, + { + "epoch": 2.76, + "learning_rate": 1.0900569027949777e-05, + "loss": 0.0263, + "step": 59075 + }, + { + "epoch": 2.76, + "learning_rate": 1.089978524289499e-05, + "loss": 0.0564, + "step": 59080 + }, + { + "epoch": 2.76, + "learning_rate": 1.0899001457840203e-05, + "loss": 0.0237, + "step": 59085 + }, + { + "epoch": 2.76, + "learning_rate": 1.0898217672785415e-05, + "loss": 0.1018, + "step": 59090 + }, + { + "epoch": 2.76, + "learning_rate": 1.0897433887730631e-05, + "loss": 0.1151, + "step": 59095 + }, + { + "epoch": 2.76, + "learning_rate": 1.0896650102675843e-05, + "loss": 0.147, + "step": 59100 + }, + { + "epoch": 2.76, + "learning_rate": 1.0895866317621055e-05, + "loss": 0.1815, + "step": 59105 + }, + { + "epoch": 2.76, + "learning_rate": 1.089508253256627e-05, + "loss": 0.1446, + "step": 59110 + }, + { + "epoch": 2.76, + "learning_rate": 1.0894298747511483e-05, + "loss": 0.2181, + "step": 59115 + }, + { + "epoch": 2.76, + "learning_rate": 1.0893514962456697e-05, + "loss": 0.0203, + "step": 59120 + }, + { + "epoch": 2.76, + "learning_rate": 1.089273117740191e-05, + "loss": 0.0237, + "step": 59125 + }, + { + "epoch": 2.76, + "learning_rate": 1.0891947392347125e-05, + "loss": 0.0479, + "step": 59130 + }, + { + "epoch": 2.76, + "learning_rate": 1.0891163607292337e-05, + "loss": 0.0397, + "step": 59135 + }, + { + "epoch": 2.76, + "learning_rate": 1.0890379822237551e-05, + "loss": 0.0788, + "step": 59140 + }, + { + "epoch": 2.76, + "learning_rate": 1.0889596037182763e-05, + "loss": 0.1114, + "step": 59145 + }, + { + "epoch": 2.76, + "learning_rate": 1.0888812252127979e-05, + "loss": 0.1694, + "step": 59150 + }, + { + "epoch": 2.76, + "learning_rate": 1.0888028467073191e-05, + "loss": 0.2413, + "step": 59155 + }, + { + "epoch": 2.76, + "learning_rate": 1.0887244682018405e-05, + "loss": 0.2377, + "step": 59160 + }, + { + "epoch": 2.76, + "learning_rate": 1.0886460896963617e-05, + "loss": 0.2335, + "step": 59165 + }, + { + "epoch": 2.76, + "learning_rate": 1.088567711190883e-05, + "loss": 0.0697, + "step": 59170 + }, + { + "epoch": 2.76, + "learning_rate": 1.0884893326854045e-05, + "loss": 0.0551, + "step": 59175 + }, + { + "epoch": 2.76, + "learning_rate": 1.0884109541799257e-05, + "loss": 0.0589, + "step": 59180 + }, + { + "epoch": 2.76, + "learning_rate": 1.0883325756744471e-05, + "loss": 0.0893, + "step": 59185 + }, + { + "epoch": 2.76, + "learning_rate": 1.0882541971689683e-05, + "loss": 0.1181, + "step": 59190 + }, + { + "epoch": 2.76, + "learning_rate": 1.0881758186634899e-05, + "loss": 0.0627, + "step": 59195 + }, + { + "epoch": 2.76, + "learning_rate": 1.0880974401580111e-05, + "loss": 0.0549, + "step": 59200 + }, + { + "epoch": 2.76, + "learning_rate": 1.0880190616525325e-05, + "loss": 0.1895, + "step": 59205 + }, + { + "epoch": 2.76, + "learning_rate": 1.0879406831470539e-05, + "loss": 0.1951, + "step": 59210 + }, + { + "epoch": 2.76, + "learning_rate": 1.0878623046415753e-05, + "loss": 0.2903, + "step": 59215 + }, + { + "epoch": 2.76, + "learning_rate": 1.0877839261360965e-05, + "loss": 0.0543, + "step": 59220 + }, + { + "epoch": 2.76, + "learning_rate": 1.0877055476306179e-05, + "loss": 0.0562, + "step": 59225 + }, + { + "epoch": 2.76, + "learning_rate": 1.0876271691251393e-05, + "loss": 0.0212, + "step": 59230 + }, + { + "epoch": 2.76, + "learning_rate": 1.0875487906196605e-05, + "loss": 0.0372, + "step": 59235 + }, + { + "epoch": 2.76, + "learning_rate": 1.0874704121141819e-05, + "loss": 0.0557, + "step": 59240 + }, + { + "epoch": 2.76, + "learning_rate": 1.0873920336087031e-05, + "loss": 0.0941, + "step": 59245 + }, + { + "epoch": 2.76, + "learning_rate": 1.0873136551032247e-05, + "loss": 0.1335, + "step": 59250 + }, + { + "epoch": 2.76, + "learning_rate": 1.0872352765977459e-05, + "loss": 0.1388, + "step": 59255 + }, + { + "epoch": 2.77, + "learning_rate": 1.0871568980922673e-05, + "loss": 0.3518, + "step": 59260 + }, + { + "epoch": 2.77, + "learning_rate": 1.0870785195867885e-05, + "loss": 0.2424, + "step": 59265 + }, + { + "epoch": 2.77, + "learning_rate": 1.08700014108131e-05, + "loss": 0.0594, + "step": 59270 + }, + { + "epoch": 2.77, + "learning_rate": 1.0869217625758313e-05, + "loss": 0.0566, + "step": 59275 + }, + { + "epoch": 2.77, + "learning_rate": 1.0868433840703527e-05, + "loss": 0.0809, + "step": 59280 + }, + { + "epoch": 2.77, + "learning_rate": 1.0867650055648739e-05, + "loss": 0.0813, + "step": 59285 + }, + { + "epoch": 2.77, + "learning_rate": 1.0866866270593955e-05, + "loss": 0.1093, + "step": 59290 + }, + { + "epoch": 2.77, + "learning_rate": 1.0866082485539167e-05, + "loss": 0.0765, + "step": 59295 + }, + { + "epoch": 2.77, + "learning_rate": 1.0865298700484379e-05, + "loss": 0.1189, + "step": 59300 + }, + { + "epoch": 2.77, + "learning_rate": 1.0864514915429593e-05, + "loss": 0.2332, + "step": 59305 + }, + { + "epoch": 2.77, + "learning_rate": 1.0863731130374807e-05, + "loss": 0.2367, + "step": 59310 + }, + { + "epoch": 2.77, + "learning_rate": 1.086294734532002e-05, + "loss": 0.3552, + "step": 59315 + }, + { + "epoch": 2.77, + "learning_rate": 1.0862163560265233e-05, + "loss": 0.0216, + "step": 59320 + }, + { + "epoch": 2.77, + "learning_rate": 1.0861379775210447e-05, + "loss": 0.0542, + "step": 59325 + }, + { + "epoch": 2.77, + "learning_rate": 1.086059599015566e-05, + "loss": 0.0626, + "step": 59330 + }, + { + "epoch": 2.77, + "learning_rate": 1.0859812205100875e-05, + "loss": 0.0793, + "step": 59335 + }, + { + "epoch": 2.77, + "learning_rate": 1.0859028420046087e-05, + "loss": 0.0943, + "step": 59340 + }, + { + "epoch": 2.77, + "learning_rate": 1.0858244634991303e-05, + "loss": 0.1106, + "step": 59345 + }, + { + "epoch": 2.77, + "learning_rate": 1.0857460849936515e-05, + "loss": 0.1345, + "step": 59350 + }, + { + "epoch": 2.77, + "learning_rate": 1.0856677064881729e-05, + "loss": 0.2081, + "step": 59355 + }, + { + "epoch": 2.77, + "learning_rate": 1.0855893279826941e-05, + "loss": 0.3029, + "step": 59360 + }, + { + "epoch": 2.77, + "learning_rate": 1.0855109494772153e-05, + "loss": 0.2904, + "step": 59365 + }, + { + "epoch": 2.77, + "learning_rate": 1.0854325709717369e-05, + "loss": 0.0358, + "step": 59370 + }, + { + "epoch": 2.77, + "learning_rate": 1.0853541924662581e-05, + "loss": 0.0264, + "step": 59375 + }, + { + "epoch": 2.77, + "learning_rate": 1.0852758139607795e-05, + "loss": 0.0394, + "step": 59380 + }, + { + "epoch": 2.77, + "learning_rate": 1.0851974354553007e-05, + "loss": 0.0479, + "step": 59385 + }, + { + "epoch": 2.77, + "learning_rate": 1.0851190569498223e-05, + "loss": 0.0999, + "step": 59390 + }, + { + "epoch": 2.77, + "learning_rate": 1.0850406784443435e-05, + "loss": 0.0661, + "step": 59395 + }, + { + "epoch": 2.77, + "learning_rate": 1.0849622999388649e-05, + "loss": 0.0937, + "step": 59400 + }, + { + "epoch": 2.77, + "learning_rate": 1.0848839214333861e-05, + "loss": 0.1815, + "step": 59405 + }, + { + "epoch": 2.77, + "learning_rate": 1.0848055429279077e-05, + "loss": 0.3394, + "step": 59410 + }, + { + "epoch": 2.77, + "learning_rate": 1.0847271644224289e-05, + "loss": 0.3475, + "step": 59415 + }, + { + "epoch": 2.77, + "learning_rate": 1.0846487859169503e-05, + "loss": 0.0382, + "step": 59420 + }, + { + "epoch": 2.77, + "learning_rate": 1.0845704074114715e-05, + "loss": 0.0969, + "step": 59425 + }, + { + "epoch": 2.77, + "learning_rate": 1.0844920289059929e-05, + "loss": 0.0548, + "step": 59430 + }, + { + "epoch": 2.77, + "learning_rate": 1.0844136504005143e-05, + "loss": 0.0841, + "step": 59435 + }, + { + "epoch": 2.77, + "learning_rate": 1.0843352718950355e-05, + "loss": 0.0781, + "step": 59440 + }, + { + "epoch": 2.77, + "learning_rate": 1.084256893389557e-05, + "loss": 0.1071, + "step": 59445 + }, + { + "epoch": 2.77, + "learning_rate": 1.0841785148840783e-05, + "loss": 0.1219, + "step": 59450 + }, + { + "epoch": 2.77, + "learning_rate": 1.0841001363785997e-05, + "loss": 0.1605, + "step": 59455 + }, + { + "epoch": 2.77, + "learning_rate": 1.0840217578731209e-05, + "loss": 0.3221, + "step": 59460 + }, + { + "epoch": 2.77, + "learning_rate": 1.0839433793676424e-05, + "loss": 0.2631, + "step": 59465 + }, + { + "epoch": 2.77, + "learning_rate": 1.0838650008621637e-05, + "loss": 0.0423, + "step": 59470 + }, + { + "epoch": 2.78, + "learning_rate": 1.083786622356685e-05, + "loss": 0.0269, + "step": 59475 + }, + { + "epoch": 2.78, + "learning_rate": 1.0837082438512063e-05, + "loss": 0.0213, + "step": 59480 + }, + { + "epoch": 2.78, + "learning_rate": 1.0836298653457278e-05, + "loss": 0.0742, + "step": 59485 + }, + { + "epoch": 2.78, + "learning_rate": 1.083551486840249e-05, + "loss": 0.0317, + "step": 59490 + }, + { + "epoch": 2.78, + "learning_rate": 1.0834731083347703e-05, + "loss": 0.1428, + "step": 59495 + }, + { + "epoch": 2.78, + "learning_rate": 1.0833947298292917e-05, + "loss": 0.1145, + "step": 59500 + }, + { + "epoch": 2.78, + "learning_rate": 1.0833163513238129e-05, + "loss": 0.1002, + "step": 59505 + }, + { + "epoch": 2.78, + "learning_rate": 1.0832379728183344e-05, + "loss": 0.2298, + "step": 59510 + }, + { + "epoch": 2.78, + "learning_rate": 1.0831595943128557e-05, + "loss": 0.3871, + "step": 59515 + }, + { + "epoch": 2.78, + "learning_rate": 1.083081215807377e-05, + "loss": 0.03, + "step": 59520 + }, + { + "epoch": 2.78, + "learning_rate": 1.0830028373018985e-05, + "loss": 0.0571, + "step": 59525 + }, + { + "epoch": 2.78, + "learning_rate": 1.0829244587964198e-05, + "loss": 0.0582, + "step": 59530 + }, + { + "epoch": 2.78, + "learning_rate": 1.082846080290941e-05, + "loss": 0.0255, + "step": 59535 + }, + { + "epoch": 2.78, + "learning_rate": 1.0827677017854625e-05, + "loss": 0.0613, + "step": 59540 + }, + { + "epoch": 2.78, + "learning_rate": 1.0826893232799838e-05, + "loss": 0.0576, + "step": 59545 + }, + { + "epoch": 2.78, + "learning_rate": 1.0826109447745052e-05, + "loss": 0.172, + "step": 59550 + }, + { + "epoch": 2.78, + "learning_rate": 1.0825325662690265e-05, + "loss": 0.1161, + "step": 59555 + }, + { + "epoch": 2.78, + "learning_rate": 1.0824541877635477e-05, + "loss": 0.3, + "step": 59560 + }, + { + "epoch": 2.78, + "learning_rate": 1.0823758092580692e-05, + "loss": 0.3196, + "step": 59565 + }, + { + "epoch": 2.78, + "learning_rate": 1.0822974307525905e-05, + "loss": 0.0401, + "step": 59570 + }, + { + "epoch": 2.78, + "learning_rate": 1.0822190522471118e-05, + "loss": 0.0501, + "step": 59575 + }, + { + "epoch": 2.78, + "learning_rate": 1.082140673741633e-05, + "loss": 0.03, + "step": 59580 + }, + { + "epoch": 2.78, + "learning_rate": 1.0820622952361546e-05, + "loss": 0.1153, + "step": 59585 + }, + { + "epoch": 2.78, + "learning_rate": 1.0819839167306759e-05, + "loss": 0.095, + "step": 59590 + }, + { + "epoch": 2.78, + "learning_rate": 1.0819055382251972e-05, + "loss": 0.0778, + "step": 59595 + }, + { + "epoch": 2.78, + "learning_rate": 1.0818271597197185e-05, + "loss": 0.0841, + "step": 59600 + }, + { + "epoch": 2.78, + "learning_rate": 1.08174878121424e-05, + "loss": 0.1166, + "step": 59605 + }, + { + "epoch": 2.78, + "learning_rate": 1.0816704027087612e-05, + "loss": 0.202, + "step": 59610 + }, + { + "epoch": 2.78, + "learning_rate": 1.0815920242032826e-05, + "loss": 0.2211, + "step": 59615 + }, + { + "epoch": 2.78, + "learning_rate": 1.0815136456978039e-05, + "loss": 0.0165, + "step": 59620 + }, + { + "epoch": 2.78, + "learning_rate": 1.0814352671923252e-05, + "loss": 0.0745, + "step": 59625 + }, + { + "epoch": 2.78, + "learning_rate": 1.0813568886868466e-05, + "loss": 0.0653, + "step": 59630 + }, + { + "epoch": 2.78, + "learning_rate": 1.0812785101813679e-05, + "loss": 0.0922, + "step": 59635 + }, + { + "epoch": 2.78, + "learning_rate": 1.0812001316758892e-05, + "loss": 0.1051, + "step": 59640 + }, + { + "epoch": 2.78, + "learning_rate": 1.0811217531704106e-05, + "loss": 0.1219, + "step": 59645 + }, + { + "epoch": 2.78, + "learning_rate": 1.081043374664932e-05, + "loss": 0.2201, + "step": 59650 + }, + { + "epoch": 2.78, + "learning_rate": 1.0809649961594533e-05, + "loss": 0.1585, + "step": 59655 + }, + { + "epoch": 2.78, + "learning_rate": 1.0808866176539748e-05, + "loss": 0.2065, + "step": 59660 + }, + { + "epoch": 2.78, + "learning_rate": 1.080808239148496e-05, + "loss": 0.3347, + "step": 59665 + }, + { + "epoch": 2.78, + "learning_rate": 1.0807298606430174e-05, + "loss": 0.0825, + "step": 59670 + }, + { + "epoch": 2.78, + "learning_rate": 1.0806514821375386e-05, + "loss": 0.0217, + "step": 59675 + }, + { + "epoch": 2.78, + "learning_rate": 1.0805731036320602e-05, + "loss": 0.0387, + "step": 59680 + }, + { + "epoch": 2.78, + "learning_rate": 1.0804947251265814e-05, + "loss": 0.0708, + "step": 59685 + }, + { + "epoch": 2.79, + "learning_rate": 1.0804163466211026e-05, + "loss": 0.1803, + "step": 59690 + }, + { + "epoch": 2.79, + "learning_rate": 1.080337968115624e-05, + "loss": 0.0929, + "step": 59695 + }, + { + "epoch": 2.79, + "learning_rate": 1.0802595896101453e-05, + "loss": 0.0763, + "step": 59700 + }, + { + "epoch": 2.79, + "learning_rate": 1.0801812111046668e-05, + "loss": 0.1241, + "step": 59705 + }, + { + "epoch": 2.79, + "learning_rate": 1.080102832599188e-05, + "loss": 0.2267, + "step": 59710 + }, + { + "epoch": 2.79, + "learning_rate": 1.0800244540937094e-05, + "loss": 0.1755, + "step": 59715 + }, + { + "epoch": 2.79, + "learning_rate": 1.0799460755882307e-05, + "loss": 0.0372, + "step": 59720 + }, + { + "epoch": 2.79, + "learning_rate": 1.0798676970827522e-05, + "loss": 0.0253, + "step": 59725 + }, + { + "epoch": 2.79, + "learning_rate": 1.0797893185772734e-05, + "loss": 0.1087, + "step": 59730 + }, + { + "epoch": 2.79, + "learning_rate": 1.0797109400717948e-05, + "loss": 0.0811, + "step": 59735 + }, + { + "epoch": 2.79, + "learning_rate": 1.079632561566316e-05, + "loss": 0.0563, + "step": 59740 + }, + { + "epoch": 2.79, + "learning_rate": 1.0795541830608376e-05, + "loss": 0.1129, + "step": 59745 + }, + { + "epoch": 2.79, + "learning_rate": 1.0794758045553588e-05, + "loss": 0.1649, + "step": 59750 + }, + { + "epoch": 2.79, + "learning_rate": 1.07939742604988e-05, + "loss": 0.1641, + "step": 59755 + }, + { + "epoch": 2.79, + "learning_rate": 1.0793190475444016e-05, + "loss": 0.2485, + "step": 59760 + }, + { + "epoch": 2.79, + "learning_rate": 1.0792406690389228e-05, + "loss": 0.2066, + "step": 59765 + }, + { + "epoch": 2.79, + "learning_rate": 1.0791622905334442e-05, + "loss": 0.0445, + "step": 59770 + }, + { + "epoch": 2.79, + "learning_rate": 1.0790839120279654e-05, + "loss": 0.0544, + "step": 59775 + }, + { + "epoch": 2.79, + "learning_rate": 1.079005533522487e-05, + "loss": 0.0232, + "step": 59780 + }, + { + "epoch": 2.79, + "learning_rate": 1.0789271550170082e-05, + "loss": 0.0835, + "step": 59785 + }, + { + "epoch": 2.79, + "learning_rate": 1.0788487765115296e-05, + "loss": 0.1308, + "step": 59790 + }, + { + "epoch": 2.79, + "learning_rate": 1.0787703980060508e-05, + "loss": 0.0412, + "step": 59795 + }, + { + "epoch": 2.79, + "learning_rate": 1.0786920195005724e-05, + "loss": 0.2067, + "step": 59800 + }, + { + "epoch": 2.79, + "learning_rate": 1.0786136409950936e-05, + "loss": 0.1801, + "step": 59805 + }, + { + "epoch": 2.79, + "learning_rate": 1.078535262489615e-05, + "loss": 0.2654, + "step": 59810 + }, + { + "epoch": 2.79, + "learning_rate": 1.0784568839841362e-05, + "loss": 0.2404, + "step": 59815 + }, + { + "epoch": 2.79, + "learning_rate": 1.0783785054786574e-05, + "loss": 0.0479, + "step": 59820 + }, + { + "epoch": 2.79, + "learning_rate": 1.078300126973179e-05, + "loss": 0.0855, + "step": 59825 + }, + { + "epoch": 2.79, + "learning_rate": 1.0782217484677002e-05, + "loss": 0.0729, + "step": 59830 + }, + { + "epoch": 2.79, + "learning_rate": 1.0781433699622216e-05, + "loss": 0.0526, + "step": 59835 + }, + { + "epoch": 2.79, + "learning_rate": 1.078064991456743e-05, + "loss": 0.0608, + "step": 59840 + }, + { + "epoch": 2.79, + "learning_rate": 1.0779866129512644e-05, + "loss": 0.0754, + "step": 59845 + }, + { + "epoch": 2.79, + "learning_rate": 1.0779082344457856e-05, + "loss": 0.1885, + "step": 59850 + }, + { + "epoch": 2.79, + "learning_rate": 1.077829855940307e-05, + "loss": 0.1078, + "step": 59855 + }, + { + "epoch": 2.79, + "learning_rate": 1.0777514774348284e-05, + "loss": 0.2044, + "step": 59860 + }, + { + "epoch": 2.79, + "learning_rate": 1.0776730989293498e-05, + "loss": 0.3295, + "step": 59865 + }, + { + "epoch": 2.79, + "learning_rate": 1.077594720423871e-05, + "loss": 0.0507, + "step": 59870 + }, + { + "epoch": 2.79, + "learning_rate": 1.0775163419183926e-05, + "loss": 0.0369, + "step": 59875 + }, + { + "epoch": 2.79, + "learning_rate": 1.0774379634129138e-05, + "loss": 0.0298, + "step": 59880 + }, + { + "epoch": 2.79, + "learning_rate": 1.077359584907435e-05, + "loss": 0.0492, + "step": 59885 + }, + { + "epoch": 2.79, + "learning_rate": 1.0772812064019564e-05, + "loss": 0.0656, + "step": 59890 + }, + { + "epoch": 2.79, + "learning_rate": 1.0772028278964776e-05, + "loss": 0.0895, + "step": 59895 + }, + { + "epoch": 2.8, + "learning_rate": 1.0771244493909992e-05, + "loss": 0.1175, + "step": 59900 + }, + { + "epoch": 2.8, + "learning_rate": 1.0770460708855204e-05, + "loss": 0.1537, + "step": 59905 + }, + { + "epoch": 2.8, + "learning_rate": 1.0769676923800418e-05, + "loss": 0.3598, + "step": 59910 + }, + { + "epoch": 2.8, + "learning_rate": 1.076889313874563e-05, + "loss": 0.2923, + "step": 59915 + }, + { + "epoch": 2.8, + "learning_rate": 1.0768109353690846e-05, + "loss": 0.0608, + "step": 59920 + }, + { + "epoch": 2.8, + "learning_rate": 1.0767325568636058e-05, + "loss": 0.0519, + "step": 59925 + }, + { + "epoch": 2.8, + "learning_rate": 1.0766541783581272e-05, + "loss": 0.0307, + "step": 59930 + }, + { + "epoch": 2.8, + "learning_rate": 1.0765757998526484e-05, + "loss": 0.1519, + "step": 59935 + }, + { + "epoch": 2.8, + "learning_rate": 1.07649742134717e-05, + "loss": 0.0559, + "step": 59940 + }, + { + "epoch": 2.8, + "learning_rate": 1.0764190428416912e-05, + "loss": 0.1429, + "step": 59945 + }, + { + "epoch": 2.8, + "learning_rate": 1.0763406643362124e-05, + "loss": 0.085, + "step": 59950 + }, + { + "epoch": 2.8, + "learning_rate": 1.0762622858307338e-05, + "loss": 0.2346, + "step": 59955 + }, + { + "epoch": 2.8, + "learning_rate": 1.0761839073252552e-05, + "loss": 0.2619, + "step": 59960 + }, + { + "epoch": 2.8, + "learning_rate": 1.0761055288197766e-05, + "loss": 0.1942, + "step": 59965 + }, + { + "epoch": 2.8, + "learning_rate": 1.0760271503142978e-05, + "loss": 0.0377, + "step": 59970 + }, + { + "epoch": 2.8, + "learning_rate": 1.0759487718088194e-05, + "loss": 0.0325, + "step": 59975 + }, + { + "epoch": 2.8, + "learning_rate": 1.0758703933033406e-05, + "loss": 0.0534, + "step": 59980 + }, + { + "epoch": 2.8, + "learning_rate": 1.075792014797862e-05, + "loss": 0.0849, + "step": 59985 + }, + { + "epoch": 2.8, + "learning_rate": 1.0757136362923832e-05, + "loss": 0.0567, + "step": 59990 + }, + { + "epoch": 2.8, + "learning_rate": 1.0756352577869048e-05, + "loss": 0.0723, + "step": 59995 + }, + { + "epoch": 2.8, + "learning_rate": 1.075556879281426e-05, + "loss": 0.1649, + "step": 60000 + }, + { + "epoch": 2.8, + "learning_rate": 1.0754785007759474e-05, + "loss": 0.1065, + "step": 60005 + }, + { + "epoch": 2.8, + "learning_rate": 1.0754001222704686e-05, + "loss": 0.0981, + "step": 60010 + }, + { + "epoch": 2.8, + "learning_rate": 1.0753217437649898e-05, + "loss": 0.3571, + "step": 60015 + }, + { + "epoch": 2.8, + "learning_rate": 1.0752433652595114e-05, + "loss": 0.0691, + "step": 60020 + }, + { + "epoch": 2.8, + "learning_rate": 1.0751649867540326e-05, + "loss": 0.0567, + "step": 60025 + }, + { + "epoch": 2.8, + "learning_rate": 1.075086608248554e-05, + "loss": 0.0616, + "step": 60030 + }, + { + "epoch": 2.8, + "learning_rate": 1.0750082297430752e-05, + "loss": 0.0686, + "step": 60035 + }, + { + "epoch": 2.8, + "learning_rate": 1.0749298512375968e-05, + "loss": 0.0687, + "step": 60040 + }, + { + "epoch": 2.8, + "learning_rate": 1.074851472732118e-05, + "loss": 0.1136, + "step": 60045 + }, + { + "epoch": 2.8, + "learning_rate": 1.0747730942266394e-05, + "loss": 0.1254, + "step": 60050 + }, + { + "epoch": 2.8, + "learning_rate": 1.0746947157211606e-05, + "loss": 0.2474, + "step": 60055 + }, + { + "epoch": 2.8, + "learning_rate": 1.0746163372156822e-05, + "loss": 0.1883, + "step": 60060 + }, + { + "epoch": 2.8, + "learning_rate": 1.0745379587102034e-05, + "loss": 0.4343, + "step": 60065 + }, + { + "epoch": 2.8, + "learning_rate": 1.0744595802047248e-05, + "loss": 0.0974, + "step": 60070 + }, + { + "epoch": 2.8, + "learning_rate": 1.0743812016992462e-05, + "loss": 0.0394, + "step": 60075 + }, + { + "epoch": 2.8, + "learning_rate": 1.0743028231937674e-05, + "loss": 0.0907, + "step": 60080 + }, + { + "epoch": 2.8, + "learning_rate": 1.0742244446882888e-05, + "loss": 0.1173, + "step": 60085 + }, + { + "epoch": 2.8, + "learning_rate": 1.07414606618281e-05, + "loss": 0.096, + "step": 60090 + }, + { + "epoch": 2.8, + "learning_rate": 1.0740676876773316e-05, + "loss": 0.1801, + "step": 60095 + }, + { + "epoch": 2.8, + "learning_rate": 1.0739893091718528e-05, + "loss": 0.2232, + "step": 60100 + }, + { + "epoch": 2.8, + "learning_rate": 1.0739109306663742e-05, + "loss": 0.1869, + "step": 60105 + }, + { + "epoch": 2.8, + "learning_rate": 1.0738325521608954e-05, + "loss": 0.2526, + "step": 60110 + }, + { + "epoch": 2.81, + "learning_rate": 1.073754173655417e-05, + "loss": 0.1866, + "step": 60115 + }, + { + "epoch": 2.81, + "learning_rate": 1.0736757951499382e-05, + "loss": 0.088, + "step": 60120 + }, + { + "epoch": 2.81, + "learning_rate": 1.0735974166444596e-05, + "loss": 0.0488, + "step": 60125 + }, + { + "epoch": 2.81, + "learning_rate": 1.0735190381389808e-05, + "loss": 0.0262, + "step": 60130 + }, + { + "epoch": 2.81, + "learning_rate": 1.0734406596335023e-05, + "loss": 0.0383, + "step": 60135 + }, + { + "epoch": 2.81, + "learning_rate": 1.0733622811280236e-05, + "loss": 0.1139, + "step": 60140 + }, + { + "epoch": 2.81, + "learning_rate": 1.0732839026225448e-05, + "loss": 0.0991, + "step": 60145 + }, + { + "epoch": 2.81, + "learning_rate": 1.0732055241170662e-05, + "loss": 0.08, + "step": 60150 + }, + { + "epoch": 2.81, + "learning_rate": 1.0731271456115876e-05, + "loss": 0.1972, + "step": 60155 + }, + { + "epoch": 2.81, + "learning_rate": 1.073048767106109e-05, + "loss": 0.157, + "step": 60160 + }, + { + "epoch": 2.81, + "learning_rate": 1.0729703886006302e-05, + "loss": 0.3585, + "step": 60165 + }, + { + "epoch": 2.81, + "learning_rate": 1.0728920100951516e-05, + "loss": 0.0269, + "step": 60170 + }, + { + "epoch": 2.81, + "learning_rate": 1.072813631589673e-05, + "loss": 0.0937, + "step": 60175 + }, + { + "epoch": 2.81, + "learning_rate": 1.0727352530841943e-05, + "loss": 0.0532, + "step": 60180 + }, + { + "epoch": 2.81, + "learning_rate": 1.0726568745787156e-05, + "loss": 0.065, + "step": 60185 + }, + { + "epoch": 2.81, + "learning_rate": 1.0725784960732371e-05, + "loss": 0.1078, + "step": 60190 + }, + { + "epoch": 2.81, + "learning_rate": 1.0725001175677584e-05, + "loss": 0.0757, + "step": 60195 + }, + { + "epoch": 2.81, + "learning_rate": 1.0724217390622797e-05, + "loss": 0.1246, + "step": 60200 + }, + { + "epoch": 2.81, + "learning_rate": 1.072343360556801e-05, + "loss": 0.1834, + "step": 60205 + }, + { + "epoch": 2.81, + "learning_rate": 1.0722649820513222e-05, + "loss": 0.3636, + "step": 60210 + }, + { + "epoch": 2.81, + "learning_rate": 1.0721866035458437e-05, + "loss": 0.2649, + "step": 60215 + }, + { + "epoch": 2.81, + "learning_rate": 1.072108225040365e-05, + "loss": 0.0994, + "step": 60220 + }, + { + "epoch": 2.81, + "learning_rate": 1.0720298465348864e-05, + "loss": 0.0371, + "step": 60225 + }, + { + "epoch": 2.81, + "learning_rate": 1.0719514680294076e-05, + "loss": 0.0265, + "step": 60230 + }, + { + "epoch": 2.81, + "learning_rate": 1.0718730895239291e-05, + "loss": 0.0794, + "step": 60235 + }, + { + "epoch": 2.81, + "learning_rate": 1.0717947110184504e-05, + "loss": 0.0882, + "step": 60240 + }, + { + "epoch": 2.81, + "learning_rate": 1.0717163325129717e-05, + "loss": 0.0821, + "step": 60245 + }, + { + "epoch": 2.81, + "learning_rate": 1.071637954007493e-05, + "loss": 0.0938, + "step": 60250 + }, + { + "epoch": 2.81, + "learning_rate": 1.0715595755020145e-05, + "loss": 0.0977, + "step": 60255 + }, + { + "epoch": 2.81, + "learning_rate": 1.0714811969965358e-05, + "loss": 0.165, + "step": 60260 + }, + { + "epoch": 2.81, + "learning_rate": 1.0714028184910571e-05, + "loss": 0.3129, + "step": 60265 + }, + { + "epoch": 2.81, + "learning_rate": 1.0713244399855784e-05, + "loss": 0.0767, + "step": 60270 + }, + { + "epoch": 2.81, + "learning_rate": 1.0712460614800998e-05, + "loss": 0.0686, + "step": 60275 + }, + { + "epoch": 2.81, + "learning_rate": 1.0711676829746211e-05, + "loss": 0.0351, + "step": 60280 + }, + { + "epoch": 2.81, + "learning_rate": 1.0710893044691424e-05, + "loss": 0.0881, + "step": 60285 + }, + { + "epoch": 2.81, + "learning_rate": 1.071010925963664e-05, + "loss": 0.0737, + "step": 60290 + }, + { + "epoch": 2.81, + "learning_rate": 1.0709325474581851e-05, + "loss": 0.125, + "step": 60295 + }, + { + "epoch": 2.81, + "learning_rate": 1.0708541689527065e-05, + "loss": 0.1237, + "step": 60300 + }, + { + "epoch": 2.81, + "learning_rate": 1.0707757904472278e-05, + "loss": 0.1589, + "step": 60305 + }, + { + "epoch": 2.81, + "learning_rate": 1.0706974119417493e-05, + "loss": 0.1822, + "step": 60310 + }, + { + "epoch": 2.81, + "learning_rate": 1.0706190334362705e-05, + "loss": 0.2297, + "step": 60315 + }, + { + "epoch": 2.81, + "learning_rate": 1.070540654930792e-05, + "loss": 0.0529, + "step": 60320 + }, + { + "epoch": 2.81, + "learning_rate": 1.0704622764253132e-05, + "loss": 0.0159, + "step": 60325 + }, + { + "epoch": 2.82, + "learning_rate": 1.0703838979198347e-05, + "loss": 0.0392, + "step": 60330 + }, + { + "epoch": 2.82, + "learning_rate": 1.070305519414356e-05, + "loss": 0.0952, + "step": 60335 + }, + { + "epoch": 2.82, + "learning_rate": 1.0702271409088772e-05, + "loss": 0.1019, + "step": 60340 + }, + { + "epoch": 2.82, + "learning_rate": 1.0701487624033985e-05, + "loss": 0.1498, + "step": 60345 + }, + { + "epoch": 2.82, + "learning_rate": 1.0700703838979198e-05, + "loss": 0.1059, + "step": 60350 + }, + { + "epoch": 2.82, + "learning_rate": 1.0699920053924413e-05, + "loss": 0.1739, + "step": 60355 + }, + { + "epoch": 2.82, + "learning_rate": 1.0699136268869625e-05, + "loss": 0.2294, + "step": 60360 + }, + { + "epoch": 2.82, + "learning_rate": 1.069835248381484e-05, + "loss": 0.3432, + "step": 60365 + }, + { + "epoch": 2.82, + "learning_rate": 1.0697568698760053e-05, + "loss": 0.0413, + "step": 60370 + }, + { + "epoch": 2.82, + "learning_rate": 1.0696784913705267e-05, + "loss": 0.0329, + "step": 60375 + }, + { + "epoch": 2.82, + "learning_rate": 1.069600112865048e-05, + "loss": 0.0649, + "step": 60380 + }, + { + "epoch": 2.82, + "learning_rate": 1.0695217343595693e-05, + "loss": 0.0388, + "step": 60385 + }, + { + "epoch": 2.82, + "learning_rate": 1.0694433558540907e-05, + "loss": 0.1039, + "step": 60390 + }, + { + "epoch": 2.82, + "learning_rate": 1.0693649773486121e-05, + "loss": 0.0985, + "step": 60395 + }, + { + "epoch": 2.82, + "learning_rate": 1.0692865988431333e-05, + "loss": 0.152, + "step": 60400 + }, + { + "epoch": 2.82, + "learning_rate": 1.0692082203376546e-05, + "loss": 0.1796, + "step": 60405 + }, + { + "epoch": 2.82, + "learning_rate": 1.0691298418321761e-05, + "loss": 0.1797, + "step": 60410 + }, + { + "epoch": 2.82, + "learning_rate": 1.0690514633266973e-05, + "loss": 0.2262, + "step": 60415 + }, + { + "epoch": 2.82, + "learning_rate": 1.0689730848212187e-05, + "loss": 0.0712, + "step": 60420 + }, + { + "epoch": 2.82, + "learning_rate": 1.06889470631574e-05, + "loss": 0.0351, + "step": 60425 + }, + { + "epoch": 2.82, + "learning_rate": 1.0688163278102615e-05, + "loss": 0.0411, + "step": 60430 + }, + { + "epoch": 2.82, + "learning_rate": 1.0687379493047827e-05, + "loss": 0.0386, + "step": 60435 + }, + { + "epoch": 2.82, + "learning_rate": 1.0686595707993041e-05, + "loss": 0.0963, + "step": 60440 + }, + { + "epoch": 2.82, + "learning_rate": 1.0685811922938253e-05, + "loss": 0.1084, + "step": 60445 + }, + { + "epoch": 2.82, + "learning_rate": 1.0685028137883469e-05, + "loss": 0.1291, + "step": 60450 + }, + { + "epoch": 2.82, + "learning_rate": 1.0684244352828681e-05, + "loss": 0.2619, + "step": 60455 + }, + { + "epoch": 2.82, + "learning_rate": 1.0683460567773895e-05, + "loss": 0.2443, + "step": 60460 + }, + { + "epoch": 2.82, + "learning_rate": 1.0682676782719107e-05, + "loss": 0.2697, + "step": 60465 + }, + { + "epoch": 2.82, + "learning_rate": 1.0681892997664321e-05, + "loss": 0.0326, + "step": 60470 + }, + { + "epoch": 2.82, + "learning_rate": 1.0681109212609535e-05, + "loss": 0.0153, + "step": 60475 + }, + { + "epoch": 2.82, + "learning_rate": 1.0680325427554747e-05, + "loss": 0.0179, + "step": 60480 + }, + { + "epoch": 2.82, + "learning_rate": 1.0679541642499961e-05, + "loss": 0.036, + "step": 60485 + }, + { + "epoch": 2.82, + "learning_rate": 1.0678757857445175e-05, + "loss": 0.057, + "step": 60490 + }, + { + "epoch": 2.82, + "learning_rate": 1.0677974072390389e-05, + "loss": 0.0842, + "step": 60495 + }, + { + "epoch": 2.82, + "learning_rate": 1.0677190287335601e-05, + "loss": 0.1821, + "step": 60500 + }, + { + "epoch": 2.82, + "learning_rate": 1.0676406502280817e-05, + "loss": 0.1145, + "step": 60505 + }, + { + "epoch": 2.82, + "learning_rate": 1.0675622717226029e-05, + "loss": 0.1985, + "step": 60510 + }, + { + "epoch": 2.82, + "learning_rate": 1.0674838932171243e-05, + "loss": 0.202, + "step": 60515 + }, + { + "epoch": 2.82, + "learning_rate": 1.0674055147116455e-05, + "loss": 0.0571, + "step": 60520 + }, + { + "epoch": 2.82, + "learning_rate": 1.067327136206167e-05, + "loss": 0.046, + "step": 60525 + }, + { + "epoch": 2.82, + "learning_rate": 1.0672487577006883e-05, + "loss": 0.0769, + "step": 60530 + }, + { + "epoch": 2.82, + "learning_rate": 1.0671703791952095e-05, + "loss": 0.0421, + "step": 60535 + }, + { + "epoch": 2.82, + "learning_rate": 1.0670920006897309e-05, + "loss": 0.121, + "step": 60540 + }, + { + "epoch": 2.83, + "learning_rate": 1.0670136221842521e-05, + "loss": 0.099, + "step": 60545 + }, + { + "epoch": 2.83, + "learning_rate": 1.0669352436787737e-05, + "loss": 0.1578, + "step": 60550 + }, + { + "epoch": 2.83, + "learning_rate": 1.066856865173295e-05, + "loss": 0.1956, + "step": 60555 + }, + { + "epoch": 2.83, + "learning_rate": 1.0667784866678163e-05, + "loss": 0.4068, + "step": 60560 + }, + { + "epoch": 2.83, + "learning_rate": 1.0667001081623375e-05, + "loss": 0.3239, + "step": 60565 + }, + { + "epoch": 2.83, + "learning_rate": 1.0666217296568591e-05, + "loss": 0.0318, + "step": 60570 + }, + { + "epoch": 2.83, + "learning_rate": 1.0665433511513803e-05, + "loss": 0.0266, + "step": 60575 + }, + { + "epoch": 2.83, + "learning_rate": 1.0664649726459017e-05, + "loss": 0.0429, + "step": 60580 + }, + { + "epoch": 2.83, + "learning_rate": 1.066386594140423e-05, + "loss": 0.0474, + "step": 60585 + }, + { + "epoch": 2.83, + "learning_rate": 1.0663082156349445e-05, + "loss": 0.0555, + "step": 60590 + }, + { + "epoch": 2.83, + "learning_rate": 1.0662298371294657e-05, + "loss": 0.0826, + "step": 60595 + }, + { + "epoch": 2.83, + "learning_rate": 1.066151458623987e-05, + "loss": 0.1561, + "step": 60600 + }, + { + "epoch": 2.83, + "learning_rate": 1.0660730801185085e-05, + "loss": 0.1313, + "step": 60605 + }, + { + "epoch": 2.83, + "learning_rate": 1.0659947016130297e-05, + "loss": 0.2536, + "step": 60610 + }, + { + "epoch": 2.83, + "learning_rate": 1.0659163231075511e-05, + "loss": 0.2586, + "step": 60615 + }, + { + "epoch": 2.83, + "learning_rate": 1.0658379446020723e-05, + "loss": 0.0328, + "step": 60620 + }, + { + "epoch": 2.83, + "learning_rate": 1.0657595660965939e-05, + "loss": 0.0261, + "step": 60625 + }, + { + "epoch": 2.83, + "learning_rate": 1.0656811875911151e-05, + "loss": 0.0413, + "step": 60630 + }, + { + "epoch": 2.83, + "learning_rate": 1.0656028090856365e-05, + "loss": 0.0651, + "step": 60635 + }, + { + "epoch": 2.83, + "learning_rate": 1.0655244305801577e-05, + "loss": 0.1421, + "step": 60640 + }, + { + "epoch": 2.83, + "learning_rate": 1.0654460520746793e-05, + "loss": 0.1075, + "step": 60645 + }, + { + "epoch": 2.83, + "learning_rate": 1.0653676735692005e-05, + "loss": 0.1013, + "step": 60650 + }, + { + "epoch": 2.83, + "learning_rate": 1.0652892950637219e-05, + "loss": 0.2221, + "step": 60655 + }, + { + "epoch": 2.83, + "learning_rate": 1.0652109165582431e-05, + "loss": 0.1437, + "step": 60660 + }, + { + "epoch": 2.83, + "learning_rate": 1.0651325380527643e-05, + "loss": 0.2729, + "step": 60665 + }, + { + "epoch": 2.83, + "learning_rate": 1.0650541595472859e-05, + "loss": 0.0612, + "step": 60670 + }, + { + "epoch": 2.83, + "learning_rate": 1.0649757810418071e-05, + "loss": 0.0464, + "step": 60675 + }, + { + "epoch": 2.83, + "learning_rate": 1.0648974025363285e-05, + "loss": 0.03, + "step": 60680 + }, + { + "epoch": 2.83, + "learning_rate": 1.0648190240308499e-05, + "loss": 0.0097, + "step": 60685 + }, + { + "epoch": 2.83, + "learning_rate": 1.0647406455253713e-05, + "loss": 0.1497, + "step": 60690 + }, + { + "epoch": 2.83, + "learning_rate": 1.0646622670198925e-05, + "loss": 0.0659, + "step": 60695 + }, + { + "epoch": 2.83, + "learning_rate": 1.0645838885144139e-05, + "loss": 0.1278, + "step": 60700 + }, + { + "epoch": 2.83, + "learning_rate": 1.0645055100089353e-05, + "loss": 0.1808, + "step": 60705 + }, + { + "epoch": 2.83, + "learning_rate": 1.0644271315034567e-05, + "loss": 0.3674, + "step": 60710 + }, + { + "epoch": 2.83, + "learning_rate": 1.0643487529979779e-05, + "loss": 0.395, + "step": 60715 + }, + { + "epoch": 2.83, + "learning_rate": 1.0642703744924994e-05, + "loss": 0.0519, + "step": 60720 + }, + { + "epoch": 2.83, + "learning_rate": 1.0641919959870207e-05, + "loss": 0.0121, + "step": 60725 + }, + { + "epoch": 2.83, + "learning_rate": 1.0641136174815419e-05, + "loss": 0.0334, + "step": 60730 + }, + { + "epoch": 2.83, + "learning_rate": 1.0640352389760633e-05, + "loss": 0.0302, + "step": 60735 + }, + { + "epoch": 2.83, + "learning_rate": 1.0639568604705845e-05, + "loss": 0.2201, + "step": 60740 + }, + { + "epoch": 2.83, + "learning_rate": 1.063878481965106e-05, + "loss": 0.1408, + "step": 60745 + }, + { + "epoch": 2.83, + "learning_rate": 1.0638001034596273e-05, + "loss": 0.1301, + "step": 60750 + }, + { + "epoch": 2.83, + "learning_rate": 1.0637217249541487e-05, + "loss": 0.1961, + "step": 60755 + }, + { + "epoch": 2.84, + "learning_rate": 1.0636433464486699e-05, + "loss": 0.1496, + "step": 60760 + }, + { + "epoch": 2.84, + "learning_rate": 1.0635649679431915e-05, + "loss": 0.2515, + "step": 60765 + }, + { + "epoch": 2.84, + "learning_rate": 1.0634865894377127e-05, + "loss": 0.0856, + "step": 60770 + }, + { + "epoch": 2.84, + "learning_rate": 1.063408210932234e-05, + "loss": 0.0536, + "step": 60775 + }, + { + "epoch": 2.84, + "learning_rate": 1.0633298324267553e-05, + "loss": 0.0576, + "step": 60780 + }, + { + "epoch": 2.84, + "learning_rate": 1.0632514539212768e-05, + "loss": 0.0704, + "step": 60785 + }, + { + "epoch": 2.84, + "learning_rate": 1.063173075415798e-05, + "loss": 0.068, + "step": 60790 + }, + { + "epoch": 2.84, + "learning_rate": 1.0630946969103193e-05, + "loss": 0.1093, + "step": 60795 + }, + { + "epoch": 2.84, + "learning_rate": 1.0630163184048407e-05, + "loss": 0.1841, + "step": 60800 + }, + { + "epoch": 2.84, + "learning_rate": 1.062937939899362e-05, + "loss": 0.1482, + "step": 60805 + }, + { + "epoch": 2.84, + "learning_rate": 1.0628595613938835e-05, + "loss": 0.1913, + "step": 60810 + }, + { + "epoch": 2.84, + "learning_rate": 1.0627811828884047e-05, + "loss": 0.2599, + "step": 60815 + }, + { + "epoch": 2.84, + "learning_rate": 1.0627028043829262e-05, + "loss": 0.0773, + "step": 60820 + }, + { + "epoch": 2.84, + "learning_rate": 1.0626244258774475e-05, + "loss": 0.0442, + "step": 60825 + }, + { + "epoch": 2.84, + "learning_rate": 1.0625460473719689e-05, + "loss": 0.0988, + "step": 60830 + }, + { + "epoch": 2.84, + "learning_rate": 1.06246766886649e-05, + "loss": 0.1, + "step": 60835 + }, + { + "epoch": 2.84, + "learning_rate": 1.0623892903610116e-05, + "loss": 0.0715, + "step": 60840 + }, + { + "epoch": 2.84, + "learning_rate": 1.0623109118555329e-05, + "loss": 0.0782, + "step": 60845 + }, + { + "epoch": 2.84, + "learning_rate": 1.0622325333500542e-05, + "loss": 0.1205, + "step": 60850 + }, + { + "epoch": 2.84, + "learning_rate": 1.0621541548445755e-05, + "loss": 0.1423, + "step": 60855 + }, + { + "epoch": 2.84, + "learning_rate": 1.0620757763390967e-05, + "loss": 0.2899, + "step": 60860 + }, + { + "epoch": 2.84, + "learning_rate": 1.0619973978336183e-05, + "loss": 0.2523, + "step": 60865 + }, + { + "epoch": 2.84, + "learning_rate": 1.0619190193281395e-05, + "loss": 0.0619, + "step": 60870 + }, + { + "epoch": 2.84, + "learning_rate": 1.0618406408226609e-05, + "loss": 0.044, + "step": 60875 + }, + { + "epoch": 2.84, + "learning_rate": 1.0617622623171821e-05, + "loss": 0.0412, + "step": 60880 + }, + { + "epoch": 2.84, + "learning_rate": 1.0616838838117036e-05, + "loss": 0.0697, + "step": 60885 + }, + { + "epoch": 2.84, + "learning_rate": 1.0616055053062249e-05, + "loss": 0.0875, + "step": 60890 + }, + { + "epoch": 2.84, + "learning_rate": 1.0615271268007463e-05, + "loss": 0.1086, + "step": 60895 + }, + { + "epoch": 2.84, + "learning_rate": 1.0614487482952675e-05, + "loss": 0.1613, + "step": 60900 + }, + { + "epoch": 2.84, + "learning_rate": 1.061370369789789e-05, + "loss": 0.1262, + "step": 60905 + }, + { + "epoch": 2.84, + "learning_rate": 1.0612919912843103e-05, + "loss": 0.181, + "step": 60910 + }, + { + "epoch": 2.84, + "learning_rate": 1.0612136127788316e-05, + "loss": 0.2297, + "step": 60915 + }, + { + "epoch": 2.84, + "learning_rate": 1.061135234273353e-05, + "loss": 0.0381, + "step": 60920 + }, + { + "epoch": 2.84, + "learning_rate": 1.0610568557678743e-05, + "loss": 0.0273, + "step": 60925 + }, + { + "epoch": 2.84, + "learning_rate": 1.0609784772623957e-05, + "loss": 0.0332, + "step": 60930 + }, + { + "epoch": 2.84, + "learning_rate": 1.0609000987569169e-05, + "loss": 0.0649, + "step": 60935 + }, + { + "epoch": 2.84, + "learning_rate": 1.0608217202514384e-05, + "loss": 0.0916, + "step": 60940 + }, + { + "epoch": 2.84, + "learning_rate": 1.0607433417459597e-05, + "loss": 0.0944, + "step": 60945 + }, + { + "epoch": 2.84, + "learning_rate": 1.060664963240481e-05, + "loss": 0.1064, + "step": 60950 + }, + { + "epoch": 2.84, + "learning_rate": 1.0605865847350023e-05, + "loss": 0.1578, + "step": 60955 + }, + { + "epoch": 2.84, + "learning_rate": 1.0605082062295238e-05, + "loss": 0.1276, + "step": 60960 + }, + { + "epoch": 2.84, + "learning_rate": 1.060429827724045e-05, + "loss": 0.3312, + "step": 60965 + }, + { + "epoch": 2.84, + "learning_rate": 1.0603514492185664e-05, + "loss": 0.0179, + "step": 60970 + }, + { + "epoch": 2.85, + "learning_rate": 1.0602730707130877e-05, + "loss": 0.062, + "step": 60975 + }, + { + "epoch": 2.85, + "learning_rate": 1.0601946922076092e-05, + "loss": 0.0659, + "step": 60980 + }, + { + "epoch": 2.85, + "learning_rate": 1.0601163137021304e-05, + "loss": 0.1297, + "step": 60985 + }, + { + "epoch": 2.85, + "learning_rate": 1.0600379351966517e-05, + "loss": 0.0375, + "step": 60990 + }, + { + "epoch": 2.85, + "learning_rate": 1.059959556691173e-05, + "loss": 0.118, + "step": 60995 + }, + { + "epoch": 2.85, + "learning_rate": 1.0598811781856944e-05, + "loss": 0.0791, + "step": 61000 + }, + { + "epoch": 2.85, + "learning_rate": 1.0598027996802158e-05, + "loss": 0.1074, + "step": 61005 + }, + { + "epoch": 2.85, + "learning_rate": 1.059724421174737e-05, + "loss": 0.2143, + "step": 61010 + }, + { + "epoch": 2.85, + "learning_rate": 1.0596460426692584e-05, + "loss": 0.2168, + "step": 61015 + }, + { + "epoch": 2.85, + "learning_rate": 1.0595676641637798e-05, + "loss": 0.07, + "step": 61020 + }, + { + "epoch": 2.85, + "learning_rate": 1.0594892856583012e-05, + "loss": 0.0378, + "step": 61025 + }, + { + "epoch": 2.85, + "learning_rate": 1.0594109071528224e-05, + "loss": 0.0538, + "step": 61030 + }, + { + "epoch": 2.85, + "learning_rate": 1.059332528647344e-05, + "loss": 0.058, + "step": 61035 + }, + { + "epoch": 2.85, + "learning_rate": 1.0592541501418652e-05, + "loss": 0.1057, + "step": 61040 + }, + { + "epoch": 2.85, + "learning_rate": 1.0591757716363866e-05, + "loss": 0.114, + "step": 61045 + }, + { + "epoch": 2.85, + "learning_rate": 1.0590973931309078e-05, + "loss": 0.1695, + "step": 61050 + }, + { + "epoch": 2.85, + "learning_rate": 1.059019014625429e-05, + "loss": 0.171, + "step": 61055 + }, + { + "epoch": 2.85, + "learning_rate": 1.0589406361199506e-05, + "loss": 0.3068, + "step": 61060 + }, + { + "epoch": 2.85, + "learning_rate": 1.0588622576144718e-05, + "loss": 0.2925, + "step": 61065 + }, + { + "epoch": 2.85, + "learning_rate": 1.0587838791089932e-05, + "loss": 0.0337, + "step": 61070 + }, + { + "epoch": 2.85, + "learning_rate": 1.0587055006035145e-05, + "loss": 0.0352, + "step": 61075 + }, + { + "epoch": 2.85, + "learning_rate": 1.058627122098036e-05, + "loss": 0.0502, + "step": 61080 + }, + { + "epoch": 2.85, + "learning_rate": 1.0585487435925572e-05, + "loss": 0.0615, + "step": 61085 + }, + { + "epoch": 2.85, + "learning_rate": 1.0584703650870786e-05, + "loss": 0.1042, + "step": 61090 + }, + { + "epoch": 2.85, + "learning_rate": 1.0583919865815998e-05, + "loss": 0.1052, + "step": 61095 + }, + { + "epoch": 2.85, + "learning_rate": 1.0583136080761214e-05, + "loss": 0.1772, + "step": 61100 + }, + { + "epoch": 2.85, + "learning_rate": 1.0582352295706426e-05, + "loss": 0.208, + "step": 61105 + }, + { + "epoch": 2.85, + "learning_rate": 1.058156851065164e-05, + "loss": 0.2459, + "step": 61110 + }, + { + "epoch": 2.85, + "learning_rate": 1.0580784725596852e-05, + "loss": 0.432, + "step": 61115 + }, + { + "epoch": 2.85, + "learning_rate": 1.0580000940542066e-05, + "loss": 0.064, + "step": 61120 + }, + { + "epoch": 2.85, + "learning_rate": 1.057921715548728e-05, + "loss": 0.0289, + "step": 61125 + }, + { + "epoch": 2.85, + "learning_rate": 1.0578433370432492e-05, + "loss": 0.0404, + "step": 61130 + }, + { + "epoch": 2.85, + "learning_rate": 1.0577649585377708e-05, + "loss": 0.0537, + "step": 61135 + }, + { + "epoch": 2.85, + "learning_rate": 1.057686580032292e-05, + "loss": 0.0917, + "step": 61140 + }, + { + "epoch": 2.85, + "learning_rate": 1.0576082015268134e-05, + "loss": 0.0957, + "step": 61145 + }, + { + "epoch": 2.85, + "learning_rate": 1.0575298230213346e-05, + "loss": 0.0822, + "step": 61150 + }, + { + "epoch": 2.85, + "learning_rate": 1.0574514445158562e-05, + "loss": 0.1236, + "step": 61155 + }, + { + "epoch": 2.85, + "learning_rate": 1.0573730660103774e-05, + "loss": 0.1939, + "step": 61160 + }, + { + "epoch": 2.85, + "learning_rate": 1.0572946875048988e-05, + "loss": 0.1853, + "step": 61165 + }, + { + "epoch": 2.85, + "learning_rate": 1.05721630899942e-05, + "loss": 0.028, + "step": 61170 + }, + { + "epoch": 2.85, + "learning_rate": 1.0571379304939416e-05, + "loss": 0.0427, + "step": 61175 + }, + { + "epoch": 2.85, + "learning_rate": 1.0570595519884628e-05, + "loss": 0.0422, + "step": 61180 + }, + { + "epoch": 2.85, + "learning_rate": 1.056981173482984e-05, + "loss": 0.0487, + "step": 61185 + }, + { + "epoch": 2.86, + "learning_rate": 1.0569027949775054e-05, + "loss": 0.0449, + "step": 61190 + }, + { + "epoch": 2.86, + "learning_rate": 1.0568244164720266e-05, + "loss": 0.1407, + "step": 61195 + }, + { + "epoch": 2.86, + "learning_rate": 1.0567460379665482e-05, + "loss": 0.1246, + "step": 61200 + }, + { + "epoch": 2.86, + "learning_rate": 1.0566676594610694e-05, + "loss": 0.085, + "step": 61205 + }, + { + "epoch": 2.86, + "learning_rate": 1.0565892809555908e-05, + "loss": 0.2521, + "step": 61210 + }, + { + "epoch": 2.86, + "learning_rate": 1.056510902450112e-05, + "loss": 0.1783, + "step": 61215 + }, + { + "epoch": 2.86, + "learning_rate": 1.0564325239446336e-05, + "loss": 0.1145, + "step": 61220 + }, + { + "epoch": 2.86, + "learning_rate": 1.0563541454391548e-05, + "loss": 0.03, + "step": 61225 + }, + { + "epoch": 2.86, + "learning_rate": 1.0562757669336762e-05, + "loss": 0.0302, + "step": 61230 + }, + { + "epoch": 2.86, + "learning_rate": 1.0561973884281976e-05, + "loss": 0.0758, + "step": 61235 + }, + { + "epoch": 2.86, + "learning_rate": 1.056119009922719e-05, + "loss": 0.0925, + "step": 61240 + }, + { + "epoch": 2.86, + "learning_rate": 1.0560406314172402e-05, + "loss": 0.0622, + "step": 61245 + }, + { + "epoch": 2.86, + "learning_rate": 1.0559622529117614e-05, + "loss": 0.0614, + "step": 61250 + }, + { + "epoch": 2.86, + "learning_rate": 1.055883874406283e-05, + "loss": 0.1648, + "step": 61255 + }, + { + "epoch": 2.86, + "learning_rate": 1.0558054959008042e-05, + "loss": 0.2341, + "step": 61260 + }, + { + "epoch": 2.86, + "learning_rate": 1.0557271173953256e-05, + "loss": 0.269, + "step": 61265 + }, + { + "epoch": 2.86, + "learning_rate": 1.0556487388898468e-05, + "loss": 0.0684, + "step": 61270 + }, + { + "epoch": 2.86, + "learning_rate": 1.0555703603843684e-05, + "loss": 0.107, + "step": 61275 + }, + { + "epoch": 2.86, + "learning_rate": 1.0554919818788896e-05, + "loss": 0.0388, + "step": 61280 + }, + { + "epoch": 2.86, + "learning_rate": 1.055413603373411e-05, + "loss": 0.0633, + "step": 61285 + }, + { + "epoch": 2.86, + "learning_rate": 1.0553352248679322e-05, + "loss": 0.1063, + "step": 61290 + }, + { + "epoch": 2.86, + "learning_rate": 1.0552568463624538e-05, + "loss": 0.1456, + "step": 61295 + }, + { + "epoch": 2.86, + "learning_rate": 1.055178467856975e-05, + "loss": 0.1461, + "step": 61300 + }, + { + "epoch": 2.86, + "learning_rate": 1.0551000893514964e-05, + "loss": 0.1618, + "step": 61305 + }, + { + "epoch": 2.86, + "learning_rate": 1.0550217108460176e-05, + "loss": 0.2129, + "step": 61310 + }, + { + "epoch": 2.86, + "learning_rate": 1.054943332340539e-05, + "loss": 0.2407, + "step": 61315 + }, + { + "epoch": 2.86, + "learning_rate": 1.0548649538350604e-05, + "loss": 0.0771, + "step": 61320 + }, + { + "epoch": 2.86, + "learning_rate": 1.0547865753295816e-05, + "loss": 0.0186, + "step": 61325 + }, + { + "epoch": 2.86, + "learning_rate": 1.054708196824103e-05, + "loss": 0.0309, + "step": 61330 + }, + { + "epoch": 2.86, + "learning_rate": 1.0546298183186244e-05, + "loss": 0.1447, + "step": 61335 + }, + { + "epoch": 2.86, + "learning_rate": 1.0545514398131458e-05, + "loss": 0.0258, + "step": 61340 + }, + { + "epoch": 2.86, + "learning_rate": 1.054473061307667e-05, + "loss": 0.1747, + "step": 61345 + }, + { + "epoch": 2.86, + "learning_rate": 1.0543946828021886e-05, + "loss": 0.1391, + "step": 61350 + }, + { + "epoch": 2.86, + "learning_rate": 1.0543163042967098e-05, + "loss": 0.217, + "step": 61355 + }, + { + "epoch": 2.86, + "learning_rate": 1.0542379257912312e-05, + "loss": 0.2874, + "step": 61360 + }, + { + "epoch": 2.86, + "learning_rate": 1.0541595472857524e-05, + "loss": 0.2921, + "step": 61365 + }, + { + "epoch": 2.86, + "learning_rate": 1.054081168780274e-05, + "loss": 0.0604, + "step": 61370 + }, + { + "epoch": 2.86, + "learning_rate": 1.0540027902747952e-05, + "loss": 0.0352, + "step": 61375 + }, + { + "epoch": 2.86, + "learning_rate": 1.0539244117693164e-05, + "loss": 0.0296, + "step": 61380 + }, + { + "epoch": 2.86, + "learning_rate": 1.0538460332638378e-05, + "loss": 0.0616, + "step": 61385 + }, + { + "epoch": 2.86, + "learning_rate": 1.053767654758359e-05, + "loss": 0.094, + "step": 61390 + }, + { + "epoch": 2.86, + "learning_rate": 1.0536892762528806e-05, + "loss": 0.1211, + "step": 61395 + }, + { + "epoch": 2.87, + "learning_rate": 1.0536108977474018e-05, + "loss": 0.104, + "step": 61400 + }, + { + "epoch": 2.87, + "learning_rate": 1.0535325192419232e-05, + "loss": 0.154, + "step": 61405 + }, + { + "epoch": 2.87, + "learning_rate": 1.0534541407364444e-05, + "loss": 0.1931, + "step": 61410 + }, + { + "epoch": 2.87, + "learning_rate": 1.053375762230966e-05, + "loss": 0.1805, + "step": 61415 + }, + { + "epoch": 2.87, + "learning_rate": 1.0532973837254872e-05, + "loss": 0.0324, + "step": 61420 + }, + { + "epoch": 2.87, + "learning_rate": 1.0532190052200086e-05, + "loss": 0.0607, + "step": 61425 + }, + { + "epoch": 2.87, + "learning_rate": 1.0531406267145298e-05, + "loss": 0.0146, + "step": 61430 + }, + { + "epoch": 2.87, + "learning_rate": 1.0530622482090514e-05, + "loss": 0.0607, + "step": 61435 + }, + { + "epoch": 2.87, + "learning_rate": 1.0529838697035726e-05, + "loss": 0.0477, + "step": 61440 + }, + { + "epoch": 2.87, + "learning_rate": 1.0529054911980938e-05, + "loss": 0.0923, + "step": 61445 + }, + { + "epoch": 2.87, + "learning_rate": 1.0528271126926154e-05, + "loss": 0.0744, + "step": 61450 + }, + { + "epoch": 2.87, + "learning_rate": 1.0527487341871366e-05, + "loss": 0.1322, + "step": 61455 + }, + { + "epoch": 2.87, + "learning_rate": 1.052670355681658e-05, + "loss": 0.2138, + "step": 61460 + }, + { + "epoch": 2.87, + "learning_rate": 1.0525919771761792e-05, + "loss": 0.2974, + "step": 61465 + }, + { + "epoch": 2.87, + "learning_rate": 1.0525135986707008e-05, + "loss": 0.0482, + "step": 61470 + }, + { + "epoch": 2.87, + "learning_rate": 1.052435220165222e-05, + "loss": 0.0448, + "step": 61475 + }, + { + "epoch": 2.87, + "learning_rate": 1.0523568416597434e-05, + "loss": 0.05, + "step": 61480 + }, + { + "epoch": 2.87, + "learning_rate": 1.0522784631542646e-05, + "loss": 0.0529, + "step": 61485 + }, + { + "epoch": 2.87, + "learning_rate": 1.0522000846487861e-05, + "loss": 0.1533, + "step": 61490 + }, + { + "epoch": 2.87, + "learning_rate": 1.0521217061433074e-05, + "loss": 0.1484, + "step": 61495 + }, + { + "epoch": 2.87, + "learning_rate": 1.0520433276378288e-05, + "loss": 0.1626, + "step": 61500 + }, + { + "epoch": 2.87, + "learning_rate": 1.05196494913235e-05, + "loss": 0.1731, + "step": 61505 + }, + { + "epoch": 2.87, + "learning_rate": 1.0518865706268712e-05, + "loss": 0.33, + "step": 61510 + }, + { + "epoch": 2.87, + "learning_rate": 1.0518081921213928e-05, + "loss": 0.3039, + "step": 61515 + }, + { + "epoch": 2.87, + "learning_rate": 1.051729813615914e-05, + "loss": 0.01, + "step": 61520 + }, + { + "epoch": 2.87, + "learning_rate": 1.0516514351104354e-05, + "loss": 0.0286, + "step": 61525 + }, + { + "epoch": 2.87, + "learning_rate": 1.0515730566049568e-05, + "loss": 0.0298, + "step": 61530 + }, + { + "epoch": 2.87, + "learning_rate": 1.0514946780994782e-05, + "loss": 0.077, + "step": 61535 + }, + { + "epoch": 2.87, + "learning_rate": 1.0514162995939994e-05, + "loss": 0.1129, + "step": 61540 + }, + { + "epoch": 2.87, + "learning_rate": 1.0513379210885208e-05, + "loss": 0.0934, + "step": 61545 + }, + { + "epoch": 2.87, + "learning_rate": 1.0512595425830422e-05, + "loss": 0.1048, + "step": 61550 + }, + { + "epoch": 2.87, + "learning_rate": 1.0511811640775635e-05, + "loss": 0.1929, + "step": 61555 + }, + { + "epoch": 2.87, + "learning_rate": 1.0511027855720848e-05, + "loss": 0.281, + "step": 61560 + }, + { + "epoch": 2.87, + "learning_rate": 1.0510244070666063e-05, + "loss": 0.345, + "step": 61565 + }, + { + "epoch": 2.87, + "learning_rate": 1.0509460285611275e-05, + "loss": 0.0306, + "step": 61570 + }, + { + "epoch": 2.87, + "learning_rate": 1.0508676500556488e-05, + "loss": 0.0636, + "step": 61575 + }, + { + "epoch": 2.87, + "learning_rate": 1.0507892715501702e-05, + "loss": 0.0506, + "step": 61580 + }, + { + "epoch": 2.87, + "learning_rate": 1.0507108930446914e-05, + "loss": 0.112, + "step": 61585 + }, + { + "epoch": 2.87, + "learning_rate": 1.050632514539213e-05, + "loss": 0.0215, + "step": 61590 + }, + { + "epoch": 2.87, + "learning_rate": 1.0505541360337342e-05, + "loss": 0.0534, + "step": 61595 + }, + { + "epoch": 2.87, + "learning_rate": 1.0504757575282556e-05, + "loss": 0.0811, + "step": 61600 + }, + { + "epoch": 2.87, + "learning_rate": 1.0503973790227768e-05, + "loss": 0.1887, + "step": 61605 + }, + { + "epoch": 2.87, + "learning_rate": 1.0503190005172983e-05, + "loss": 0.2217, + "step": 61610 + }, + { + "epoch": 2.88, + "learning_rate": 1.0502406220118196e-05, + "loss": 0.3228, + "step": 61615 + }, + { + "epoch": 2.88, + "learning_rate": 1.050162243506341e-05, + "loss": 0.052, + "step": 61620 + }, + { + "epoch": 2.88, + "learning_rate": 1.0500838650008622e-05, + "loss": 0.0327, + "step": 61625 + }, + { + "epoch": 2.88, + "learning_rate": 1.0500054864953837e-05, + "loss": 0.0629, + "step": 61630 + }, + { + "epoch": 2.88, + "learning_rate": 1.049927107989905e-05, + "loss": 0.0539, + "step": 61635 + }, + { + "epoch": 2.88, + "learning_rate": 1.0498487294844262e-05, + "loss": 0.0616, + "step": 61640 + }, + { + "epoch": 2.88, + "learning_rate": 1.0497703509789476e-05, + "loss": 0.0863, + "step": 61645 + }, + { + "epoch": 2.88, + "learning_rate": 1.049691972473469e-05, + "loss": 0.1395, + "step": 61650 + }, + { + "epoch": 2.88, + "learning_rate": 1.0496135939679903e-05, + "loss": 0.0981, + "step": 61655 + }, + { + "epoch": 2.88, + "learning_rate": 1.0495352154625116e-05, + "loss": 0.2278, + "step": 61660 + }, + { + "epoch": 2.88, + "learning_rate": 1.0494568369570331e-05, + "loss": 0.2109, + "step": 61665 + }, + { + "epoch": 2.88, + "learning_rate": 1.0493784584515543e-05, + "loss": 0.0107, + "step": 61670 + }, + { + "epoch": 2.88, + "learning_rate": 1.0493000799460757e-05, + "loss": 0.085, + "step": 61675 + }, + { + "epoch": 2.88, + "learning_rate": 1.049221701440597e-05, + "loss": 0.0593, + "step": 61680 + }, + { + "epoch": 2.88, + "learning_rate": 1.0491433229351185e-05, + "loss": 0.1071, + "step": 61685 + }, + { + "epoch": 2.88, + "learning_rate": 1.0490649444296397e-05, + "loss": 0.0502, + "step": 61690 + }, + { + "epoch": 2.88, + "learning_rate": 1.0489865659241611e-05, + "loss": 0.0725, + "step": 61695 + }, + { + "epoch": 2.88, + "learning_rate": 1.0489081874186823e-05, + "loss": 0.1771, + "step": 61700 + }, + { + "epoch": 2.88, + "learning_rate": 1.0488298089132036e-05, + "loss": 0.1795, + "step": 61705 + }, + { + "epoch": 2.88, + "learning_rate": 1.0487514304077251e-05, + "loss": 0.2024, + "step": 61710 + }, + { + "epoch": 2.88, + "learning_rate": 1.0486730519022463e-05, + "loss": 0.3146, + "step": 61715 + }, + { + "epoch": 2.88, + "learning_rate": 1.0485946733967677e-05, + "loss": 0.0512, + "step": 61720 + }, + { + "epoch": 2.88, + "learning_rate": 1.048516294891289e-05, + "loss": 0.0375, + "step": 61725 + }, + { + "epoch": 2.88, + "learning_rate": 1.0484379163858105e-05, + "loss": 0.0448, + "step": 61730 + }, + { + "epoch": 2.88, + "learning_rate": 1.0483595378803317e-05, + "loss": 0.062, + "step": 61735 + }, + { + "epoch": 2.88, + "learning_rate": 1.0482811593748531e-05, + "loss": 0.1169, + "step": 61740 + }, + { + "epoch": 2.88, + "learning_rate": 1.0482027808693744e-05, + "loss": 0.0705, + "step": 61745 + }, + { + "epoch": 2.88, + "learning_rate": 1.0481244023638959e-05, + "loss": 0.0729, + "step": 61750 + }, + { + "epoch": 2.88, + "learning_rate": 1.0480460238584171e-05, + "loss": 0.1679, + "step": 61755 + }, + { + "epoch": 2.88, + "learning_rate": 1.0479676453529385e-05, + "loss": 0.188, + "step": 61760 + }, + { + "epoch": 2.88, + "learning_rate": 1.04788926684746e-05, + "loss": 0.2839, + "step": 61765 + }, + { + "epoch": 2.88, + "learning_rate": 1.0478108883419811e-05, + "loss": 0.0484, + "step": 61770 + }, + { + "epoch": 2.88, + "learning_rate": 1.0477325098365025e-05, + "loss": 0.0209, + "step": 61775 + }, + { + "epoch": 2.88, + "learning_rate": 1.0476541313310237e-05, + "loss": 0.072, + "step": 61780 + }, + { + "epoch": 2.88, + "learning_rate": 1.0475757528255453e-05, + "loss": 0.043, + "step": 61785 + }, + { + "epoch": 2.88, + "learning_rate": 1.0474973743200665e-05, + "loss": 0.1533, + "step": 61790 + }, + { + "epoch": 2.88, + "learning_rate": 1.047418995814588e-05, + "loss": 0.062, + "step": 61795 + }, + { + "epoch": 2.88, + "learning_rate": 1.0473406173091091e-05, + "loss": 0.0718, + "step": 61800 + }, + { + "epoch": 2.88, + "learning_rate": 1.0472622388036307e-05, + "loss": 0.1603, + "step": 61805 + }, + { + "epoch": 2.88, + "learning_rate": 1.047183860298152e-05, + "loss": 0.2909, + "step": 61810 + }, + { + "epoch": 2.88, + "learning_rate": 1.0471054817926733e-05, + "loss": 0.182, + "step": 61815 + }, + { + "epoch": 2.88, + "learning_rate": 1.0470271032871945e-05, + "loss": 0.0543, + "step": 61820 + }, + { + "epoch": 2.88, + "learning_rate": 1.0469487247817161e-05, + "loss": 0.039, + "step": 61825 + }, + { + "epoch": 2.89, + "learning_rate": 1.0468703462762373e-05, + "loss": 0.0647, + "step": 61830 + }, + { + "epoch": 2.89, + "learning_rate": 1.0467919677707585e-05, + "loss": 0.0652, + "step": 61835 + }, + { + "epoch": 2.89, + "learning_rate": 1.04671358926528e-05, + "loss": 0.0565, + "step": 61840 + }, + { + "epoch": 2.89, + "learning_rate": 1.0466352107598013e-05, + "loss": 0.1336, + "step": 61845 + }, + { + "epoch": 2.89, + "learning_rate": 1.0465568322543227e-05, + "loss": 0.1115, + "step": 61850 + }, + { + "epoch": 2.89, + "learning_rate": 1.046478453748844e-05, + "loss": 0.1073, + "step": 61855 + }, + { + "epoch": 2.89, + "learning_rate": 1.046415750944461e-05, + "loss": 0.2446, + "step": 61860 + }, + { + "epoch": 2.89, + "learning_rate": 1.046353048140078e-05, + "loss": 0.3299, + "step": 61865 + }, + { + "epoch": 2.89, + "learning_rate": 1.0462746696345996e-05, + "loss": 0.0518, + "step": 61870 + }, + { + "epoch": 2.89, + "learning_rate": 1.0461962911291208e-05, + "loss": 0.0517, + "step": 61875 + }, + { + "epoch": 2.89, + "learning_rate": 1.0461179126236422e-05, + "loss": 0.0442, + "step": 61880 + }, + { + "epoch": 2.89, + "learning_rate": 1.0460395341181636e-05, + "loss": 0.0964, + "step": 61885 + }, + { + "epoch": 2.89, + "learning_rate": 1.045961155612685e-05, + "loss": 0.0607, + "step": 61890 + }, + { + "epoch": 2.89, + "learning_rate": 1.0458827771072062e-05, + "loss": 0.1274, + "step": 61895 + }, + { + "epoch": 2.89, + "learning_rate": 1.0458043986017274e-05, + "loss": 0.2033, + "step": 61900 + }, + { + "epoch": 2.89, + "learning_rate": 1.045726020096249e-05, + "loss": 0.2348, + "step": 61905 + }, + { + "epoch": 2.89, + "learning_rate": 1.0456476415907702e-05, + "loss": 0.1433, + "step": 61910 + }, + { + "epoch": 2.89, + "learning_rate": 1.0455692630852916e-05, + "loss": 0.2228, + "step": 61915 + }, + { + "epoch": 2.89, + "learning_rate": 1.0454908845798128e-05, + "loss": 0.0411, + "step": 61920 + }, + { + "epoch": 2.89, + "learning_rate": 1.0454125060743344e-05, + "loss": 0.0441, + "step": 61925 + }, + { + "epoch": 2.89, + "learning_rate": 1.0453341275688556e-05, + "loss": 0.0757, + "step": 61930 + }, + { + "epoch": 2.89, + "learning_rate": 1.045255749063377e-05, + "loss": 0.0323, + "step": 61935 + }, + { + "epoch": 2.89, + "learning_rate": 1.0451773705578982e-05, + "loss": 0.0426, + "step": 61940 + }, + { + "epoch": 2.89, + "learning_rate": 1.0450989920524198e-05, + "loss": 0.0863, + "step": 61945 + }, + { + "epoch": 2.89, + "learning_rate": 1.045020613546941e-05, + "loss": 0.184, + "step": 61950 + }, + { + "epoch": 2.89, + "learning_rate": 1.0449422350414624e-05, + "loss": 0.162, + "step": 61955 + }, + { + "epoch": 2.89, + "learning_rate": 1.0448638565359836e-05, + "loss": 0.1879, + "step": 61960 + }, + { + "epoch": 2.89, + "learning_rate": 1.0447854780305048e-05, + "loss": 0.2687, + "step": 61965 + }, + { + "epoch": 2.89, + "learning_rate": 1.0447070995250264e-05, + "loss": 0.0468, + "step": 61970 + }, + { + "epoch": 2.89, + "learning_rate": 1.0446287210195476e-05, + "loss": 0.0404, + "step": 61975 + }, + { + "epoch": 2.89, + "learning_rate": 1.044550342514069e-05, + "loss": 0.0753, + "step": 61980 + }, + { + "epoch": 2.89, + "learning_rate": 1.0444719640085904e-05, + "loss": 0.0453, + "step": 61985 + }, + { + "epoch": 2.89, + "learning_rate": 1.0443935855031118e-05, + "loss": 0.1476, + "step": 61990 + }, + { + "epoch": 2.89, + "learning_rate": 1.044315206997633e-05, + "loss": 0.0593, + "step": 61995 + }, + { + "epoch": 2.89, + "learning_rate": 1.0442368284921545e-05, + "loss": 0.1595, + "step": 62000 + }, + { + "epoch": 2.89, + "learning_rate": 1.0441584499866758e-05, + "loss": 0.1356, + "step": 62005 + }, + { + "epoch": 2.89, + "learning_rate": 1.0440800714811972e-05, + "loss": 0.2253, + "step": 62010 + }, + { + "epoch": 2.89, + "learning_rate": 1.0440016929757184e-05, + "loss": 0.2739, + "step": 62015 + }, + { + "epoch": 2.89, + "learning_rate": 1.04392331447024e-05, + "loss": 0.0676, + "step": 62020 + }, + { + "epoch": 2.89, + "learning_rate": 1.0438449359647612e-05, + "loss": 0.076, + "step": 62025 + }, + { + "epoch": 2.89, + "learning_rate": 1.0437665574592824e-05, + "loss": 0.0512, + "step": 62030 + }, + { + "epoch": 2.89, + "learning_rate": 1.0436881789538038e-05, + "loss": 0.0411, + "step": 62035 + }, + { + "epoch": 2.89, + "learning_rate": 1.043609800448325e-05, + "loss": 0.2422, + "step": 62040 + }, + { + "epoch": 2.9, + "learning_rate": 1.0435314219428466e-05, + "loss": 0.1052, + "step": 62045 + }, + { + "epoch": 2.9, + "learning_rate": 1.0434530434373678e-05, + "loss": 0.1557, + "step": 62050 + }, + { + "epoch": 2.9, + "learning_rate": 1.0433746649318892e-05, + "loss": 0.217, + "step": 62055 + }, + { + "epoch": 2.9, + "learning_rate": 1.0432962864264104e-05, + "loss": 0.1938, + "step": 62060 + }, + { + "epoch": 2.9, + "learning_rate": 1.043217907920932e-05, + "loss": 0.1747, + "step": 62065 + }, + { + "epoch": 2.9, + "learning_rate": 1.0431395294154532e-05, + "loss": 0.0406, + "step": 62070 + }, + { + "epoch": 2.9, + "learning_rate": 1.0430611509099746e-05, + "loss": 0.0092, + "step": 62075 + }, + { + "epoch": 2.9, + "learning_rate": 1.0429827724044958e-05, + "loss": 0.0618, + "step": 62080 + }, + { + "epoch": 2.9, + "learning_rate": 1.0429043938990173e-05, + "loss": 0.0918, + "step": 62085 + }, + { + "epoch": 2.9, + "learning_rate": 1.0428260153935386e-05, + "loss": 0.1263, + "step": 62090 + }, + { + "epoch": 2.9, + "learning_rate": 1.0427476368880598e-05, + "loss": 0.1332, + "step": 62095 + }, + { + "epoch": 2.9, + "learning_rate": 1.0426692583825813e-05, + "loss": 0.1137, + "step": 62100 + }, + { + "epoch": 2.9, + "learning_rate": 1.0425908798771026e-05, + "loss": 0.0945, + "step": 62105 + }, + { + "epoch": 2.9, + "learning_rate": 1.042512501371624e-05, + "loss": 0.1955, + "step": 62110 + }, + { + "epoch": 2.9, + "learning_rate": 1.0424341228661452e-05, + "loss": 0.226, + "step": 62115 + }, + { + "epoch": 2.9, + "learning_rate": 1.0423557443606667e-05, + "loss": 0.0538, + "step": 62120 + }, + { + "epoch": 2.9, + "learning_rate": 1.042277365855188e-05, + "loss": 0.0342, + "step": 62125 + }, + { + "epoch": 2.9, + "learning_rate": 1.0421989873497093e-05, + "loss": 0.0623, + "step": 62130 + }, + { + "epoch": 2.9, + "learning_rate": 1.0421206088442306e-05, + "loss": 0.0489, + "step": 62135 + }, + { + "epoch": 2.9, + "learning_rate": 1.0420422303387521e-05, + "loss": 0.108, + "step": 62140 + }, + { + "epoch": 2.9, + "learning_rate": 1.0419638518332734e-05, + "loss": 0.0872, + "step": 62145 + }, + { + "epoch": 2.9, + "learning_rate": 1.0418854733277947e-05, + "loss": 0.0748, + "step": 62150 + }, + { + "epoch": 2.9, + "learning_rate": 1.041807094822316e-05, + "loss": 0.1508, + "step": 62155 + }, + { + "epoch": 2.9, + "learning_rate": 1.0417287163168372e-05, + "loss": 0.2324, + "step": 62160 + }, + { + "epoch": 2.9, + "learning_rate": 1.0416503378113587e-05, + "loss": 0.3346, + "step": 62165 + }, + { + "epoch": 2.9, + "learning_rate": 1.04157195930588e-05, + "loss": 0.0784, + "step": 62170 + }, + { + "epoch": 2.9, + "learning_rate": 1.0414935808004014e-05, + "loss": 0.0446, + "step": 62175 + }, + { + "epoch": 2.9, + "learning_rate": 1.0414152022949226e-05, + "loss": 0.0434, + "step": 62180 + }, + { + "epoch": 2.9, + "learning_rate": 1.0413368237894441e-05, + "loss": 0.0521, + "step": 62185 + }, + { + "epoch": 2.9, + "learning_rate": 1.0412584452839654e-05, + "loss": 0.0605, + "step": 62190 + }, + { + "epoch": 2.9, + "learning_rate": 1.0411800667784867e-05, + "loss": 0.1037, + "step": 62195 + }, + { + "epoch": 2.9, + "learning_rate": 1.0411016882730081e-05, + "loss": 0.0748, + "step": 62200 + }, + { + "epoch": 2.9, + "learning_rate": 1.0410233097675295e-05, + "loss": 0.2214, + "step": 62205 + }, + { + "epoch": 2.9, + "learning_rate": 1.0409449312620508e-05, + "loss": 0.1377, + "step": 62210 + }, + { + "epoch": 2.9, + "learning_rate": 1.0408665527565721e-05, + "loss": 0.2861, + "step": 62215 + }, + { + "epoch": 2.9, + "learning_rate": 1.0407881742510935e-05, + "loss": 0.0281, + "step": 62220 + }, + { + "epoch": 2.9, + "learning_rate": 1.0407097957456148e-05, + "loss": 0.0211, + "step": 62225 + }, + { + "epoch": 2.9, + "learning_rate": 1.0406314172401361e-05, + "loss": 0.0767, + "step": 62230 + }, + { + "epoch": 2.9, + "learning_rate": 1.0405530387346574e-05, + "loss": 0.0752, + "step": 62235 + }, + { + "epoch": 2.9, + "learning_rate": 1.040474660229179e-05, + "loss": 0.0622, + "step": 62240 + }, + { + "epoch": 2.9, + "learning_rate": 1.0403962817237001e-05, + "loss": 0.0847, + "step": 62245 + }, + { + "epoch": 2.9, + "learning_rate": 1.0403179032182215e-05, + "loss": 0.0918, + "step": 62250 + }, + { + "epoch": 2.9, + "learning_rate": 1.0402395247127428e-05, + "loss": 0.0615, + "step": 62255 + }, + { + "epoch": 2.91, + "learning_rate": 1.0401611462072643e-05, + "loss": 0.1679, + "step": 62260 + }, + { + "epoch": 2.91, + "learning_rate": 1.0400827677017855e-05, + "loss": 0.3511, + "step": 62265 + }, + { + "epoch": 2.91, + "learning_rate": 1.040004389196307e-05, + "loss": 0.0629, + "step": 62270 + }, + { + "epoch": 2.91, + "learning_rate": 1.0399260106908282e-05, + "loss": 0.0183, + "step": 62275 + }, + { + "epoch": 2.91, + "learning_rate": 1.0398476321853497e-05, + "loss": 0.0467, + "step": 62280 + }, + { + "epoch": 2.91, + "learning_rate": 1.039769253679871e-05, + "loss": 0.0157, + "step": 62285 + }, + { + "epoch": 2.91, + "learning_rate": 1.0396908751743922e-05, + "loss": 0.0978, + "step": 62290 + }, + { + "epoch": 2.91, + "learning_rate": 1.0396124966689135e-05, + "loss": 0.0457, + "step": 62295 + }, + { + "epoch": 2.91, + "learning_rate": 1.039534118163435e-05, + "loss": 0.1463, + "step": 62300 + }, + { + "epoch": 2.91, + "learning_rate": 1.0394557396579563e-05, + "loss": 0.1993, + "step": 62305 + }, + { + "epoch": 2.91, + "learning_rate": 1.0393773611524775e-05, + "loss": 0.1728, + "step": 62310 + }, + { + "epoch": 2.91, + "learning_rate": 1.0392989826469991e-05, + "loss": 0.163, + "step": 62315 + }, + { + "epoch": 2.91, + "learning_rate": 1.0392206041415203e-05, + "loss": 0.0391, + "step": 62320 + }, + { + "epoch": 2.91, + "learning_rate": 1.0391422256360417e-05, + "loss": 0.054, + "step": 62325 + }, + { + "epoch": 2.91, + "learning_rate": 1.039063847130563e-05, + "loss": 0.0313, + "step": 62330 + }, + { + "epoch": 2.91, + "learning_rate": 1.0389854686250845e-05, + "loss": 0.0868, + "step": 62335 + }, + { + "epoch": 2.91, + "learning_rate": 1.0389070901196057e-05, + "loss": 0.0782, + "step": 62340 + }, + { + "epoch": 2.91, + "learning_rate": 1.0388287116141271e-05, + "loss": 0.0603, + "step": 62345 + }, + { + "epoch": 2.91, + "learning_rate": 1.0387503331086483e-05, + "loss": 0.1835, + "step": 62350 + }, + { + "epoch": 2.91, + "learning_rate": 1.0386719546031696e-05, + "loss": 0.1227, + "step": 62355 + }, + { + "epoch": 2.91, + "learning_rate": 1.0385935760976911e-05, + "loss": 0.4369, + "step": 62360 + }, + { + "epoch": 2.91, + "learning_rate": 1.0385151975922123e-05, + "loss": 0.3164, + "step": 62365 + }, + { + "epoch": 2.91, + "learning_rate": 1.0384368190867337e-05, + "loss": 0.0913, + "step": 62370 + }, + { + "epoch": 2.91, + "learning_rate": 1.038358440581255e-05, + "loss": 0.0212, + "step": 62375 + }, + { + "epoch": 2.91, + "learning_rate": 1.0382800620757765e-05, + "loss": 0.0354, + "step": 62380 + }, + { + "epoch": 2.91, + "learning_rate": 1.0382016835702977e-05, + "loss": 0.0694, + "step": 62385 + }, + { + "epoch": 2.91, + "learning_rate": 1.0381233050648191e-05, + "loss": 0.1128, + "step": 62390 + }, + { + "epoch": 2.91, + "learning_rate": 1.0380449265593403e-05, + "loss": 0.1718, + "step": 62395 + }, + { + "epoch": 2.91, + "learning_rate": 1.0379665480538619e-05, + "loss": 0.1168, + "step": 62400 + }, + { + "epoch": 2.91, + "learning_rate": 1.0378881695483831e-05, + "loss": 0.2419, + "step": 62405 + }, + { + "epoch": 2.91, + "learning_rate": 1.0378097910429045e-05, + "loss": 0.2319, + "step": 62410 + }, + { + "epoch": 2.91, + "learning_rate": 1.0377314125374259e-05, + "loss": 0.261, + "step": 62415 + }, + { + "epoch": 2.91, + "learning_rate": 1.0376530340319471e-05, + "loss": 0.0797, + "step": 62420 + }, + { + "epoch": 2.91, + "learning_rate": 1.0375746555264685e-05, + "loss": 0.0227, + "step": 62425 + }, + { + "epoch": 2.91, + "learning_rate": 1.0374962770209897e-05, + "loss": 0.0566, + "step": 62430 + }, + { + "epoch": 2.91, + "learning_rate": 1.0374178985155113e-05, + "loss": 0.1145, + "step": 62435 + }, + { + "epoch": 2.91, + "learning_rate": 1.0373395200100325e-05, + "loss": 0.0866, + "step": 62440 + }, + { + "epoch": 2.91, + "learning_rate": 1.0372611415045539e-05, + "loss": 0.0935, + "step": 62445 + }, + { + "epoch": 2.91, + "learning_rate": 1.0371827629990751e-05, + "loss": 0.1222, + "step": 62450 + }, + { + "epoch": 2.91, + "learning_rate": 1.0371043844935967e-05, + "loss": 0.1236, + "step": 62455 + }, + { + "epoch": 2.91, + "learning_rate": 1.0370260059881179e-05, + "loss": 0.2584, + "step": 62460 + }, + { + "epoch": 2.91, + "learning_rate": 1.0369476274826393e-05, + "loss": 0.3033, + "step": 62465 + }, + { + "epoch": 2.91, + "learning_rate": 1.0368692489771605e-05, + "loss": 0.0242, + "step": 62470 + }, + { + "epoch": 2.92, + "learning_rate": 1.036790870471682e-05, + "loss": 0.036, + "step": 62475 + }, + { + "epoch": 2.92, + "learning_rate": 1.0367124919662033e-05, + "loss": 0.0251, + "step": 62480 + }, + { + "epoch": 2.92, + "learning_rate": 1.0366341134607245e-05, + "loss": 0.0294, + "step": 62485 + }, + { + "epoch": 2.92, + "learning_rate": 1.0365557349552459e-05, + "loss": 0.0555, + "step": 62490 + }, + { + "epoch": 2.92, + "learning_rate": 1.0364773564497671e-05, + "loss": 0.0958, + "step": 62495 + }, + { + "epoch": 2.92, + "learning_rate": 1.0363989779442887e-05, + "loss": 0.1204, + "step": 62500 + }, + { + "epoch": 2.92, + "learning_rate": 1.0363205994388099e-05, + "loss": 0.0712, + "step": 62505 + }, + { + "epoch": 2.92, + "learning_rate": 1.0362422209333313e-05, + "loss": 0.1334, + "step": 62510 + }, + { + "epoch": 2.92, + "learning_rate": 1.0361638424278527e-05, + "loss": 0.2207, + "step": 62515 + }, + { + "epoch": 2.92, + "learning_rate": 1.0360854639223741e-05, + "loss": 0.0133, + "step": 62520 + }, + { + "epoch": 2.92, + "learning_rate": 1.0360070854168953e-05, + "loss": 0.0209, + "step": 62525 + }, + { + "epoch": 2.92, + "learning_rate": 1.0359287069114167e-05, + "loss": 0.0494, + "step": 62530 + }, + { + "epoch": 2.92, + "learning_rate": 1.0358503284059381e-05, + "loss": 0.0147, + "step": 62535 + }, + { + "epoch": 2.92, + "learning_rate": 1.0357719499004595e-05, + "loss": 0.1232, + "step": 62540 + }, + { + "epoch": 2.92, + "learning_rate": 1.0356935713949807e-05, + "loss": 0.0512, + "step": 62545 + }, + { + "epoch": 2.92, + "learning_rate": 1.035615192889502e-05, + "loss": 0.2343, + "step": 62550 + }, + { + "epoch": 2.92, + "learning_rate": 1.0355368143840235e-05, + "loss": 0.1384, + "step": 62555 + }, + { + "epoch": 2.92, + "learning_rate": 1.0354584358785447e-05, + "loss": 0.2172, + "step": 62560 + }, + { + "epoch": 2.92, + "learning_rate": 1.0353800573730661e-05, + "loss": 0.2982, + "step": 62565 + }, + { + "epoch": 2.92, + "learning_rate": 1.0353016788675873e-05, + "loss": 0.0251, + "step": 62570 + }, + { + "epoch": 2.92, + "learning_rate": 1.0352233003621089e-05, + "loss": 0.1112, + "step": 62575 + }, + { + "epoch": 2.92, + "learning_rate": 1.0351449218566301e-05, + "loss": 0.0654, + "step": 62580 + }, + { + "epoch": 2.92, + "learning_rate": 1.0350665433511515e-05, + "loss": 0.0838, + "step": 62585 + }, + { + "epoch": 2.92, + "learning_rate": 1.0349881648456727e-05, + "loss": 0.1541, + "step": 62590 + }, + { + "epoch": 2.92, + "learning_rate": 1.0349097863401943e-05, + "loss": 0.0852, + "step": 62595 + }, + { + "epoch": 2.92, + "learning_rate": 1.0348314078347155e-05, + "loss": 0.2073, + "step": 62600 + }, + { + "epoch": 2.92, + "learning_rate": 1.0347530293292369e-05, + "loss": 0.1479, + "step": 62605 + }, + { + "epoch": 2.92, + "learning_rate": 1.0346746508237581e-05, + "loss": 0.2269, + "step": 62610 + }, + { + "epoch": 2.92, + "learning_rate": 1.0345962723182795e-05, + "loss": 0.1799, + "step": 62615 + }, + { + "epoch": 2.92, + "learning_rate": 1.0345178938128009e-05, + "loss": 0.0507, + "step": 62620 + }, + { + "epoch": 2.92, + "learning_rate": 1.0344395153073221e-05, + "loss": 0.0694, + "step": 62625 + }, + { + "epoch": 2.92, + "learning_rate": 1.0343611368018437e-05, + "loss": 0.0565, + "step": 62630 + }, + { + "epoch": 2.92, + "learning_rate": 1.0342827582963649e-05, + "loss": 0.0774, + "step": 62635 + }, + { + "epoch": 2.92, + "learning_rate": 1.0342043797908863e-05, + "loss": 0.1522, + "step": 62640 + }, + { + "epoch": 2.92, + "learning_rate": 1.0341260012854075e-05, + "loss": 0.1715, + "step": 62645 + }, + { + "epoch": 2.92, + "learning_rate": 1.034047622779929e-05, + "loss": 0.1394, + "step": 62650 + }, + { + "epoch": 2.92, + "learning_rate": 1.0339692442744503e-05, + "loss": 0.2182, + "step": 62655 + }, + { + "epoch": 2.92, + "learning_rate": 1.0338908657689717e-05, + "loss": 0.3029, + "step": 62660 + }, + { + "epoch": 2.92, + "learning_rate": 1.0338124872634929e-05, + "loss": 0.2364, + "step": 62665 + }, + { + "epoch": 2.92, + "learning_rate": 1.0337341087580144e-05, + "loss": 0.036, + "step": 62670 + }, + { + "epoch": 2.92, + "learning_rate": 1.0336557302525357e-05, + "loss": 0.0416, + "step": 62675 + }, + { + "epoch": 2.92, + "learning_rate": 1.0335773517470569e-05, + "loss": 0.0485, + "step": 62680 + }, + { + "epoch": 2.92, + "learning_rate": 1.0334989732415783e-05, + "loss": 0.0772, + "step": 62685 + }, + { + "epoch": 2.93, + "learning_rate": 1.0334205947360995e-05, + "loss": 0.0545, + "step": 62690 + }, + { + "epoch": 2.93, + "learning_rate": 1.033342216230621e-05, + "loss": 0.0758, + "step": 62695 + }, + { + "epoch": 2.93, + "learning_rate": 1.0332638377251423e-05, + "loss": 0.1142, + "step": 62700 + }, + { + "epoch": 2.93, + "learning_rate": 1.0331854592196637e-05, + "loss": 0.1031, + "step": 62705 + }, + { + "epoch": 2.93, + "learning_rate": 1.0331070807141849e-05, + "loss": 0.3682, + "step": 62710 + }, + { + "epoch": 2.93, + "learning_rate": 1.0330287022087065e-05, + "loss": 0.3997, + "step": 62715 + }, + { + "epoch": 2.93, + "learning_rate": 1.0329503237032277e-05, + "loss": 0.052, + "step": 62720 + }, + { + "epoch": 2.93, + "learning_rate": 1.032871945197749e-05, + "loss": 0.0213, + "step": 62725 + }, + { + "epoch": 2.93, + "learning_rate": 1.0327935666922705e-05, + "loss": 0.0501, + "step": 62730 + }, + { + "epoch": 2.93, + "learning_rate": 1.0327151881867918e-05, + "loss": 0.0711, + "step": 62735 + }, + { + "epoch": 2.93, + "learning_rate": 1.032636809681313e-05, + "loss": 0.1498, + "step": 62740 + }, + { + "epoch": 2.93, + "learning_rate": 1.0325584311758343e-05, + "loss": 0.1179, + "step": 62745 + }, + { + "epoch": 2.93, + "learning_rate": 1.0324800526703559e-05, + "loss": 0.1803, + "step": 62750 + }, + { + "epoch": 2.93, + "learning_rate": 1.032401674164877e-05, + "loss": 0.1619, + "step": 62755 + }, + { + "epoch": 2.93, + "learning_rate": 1.0323232956593985e-05, + "loss": 0.2089, + "step": 62760 + }, + { + "epoch": 2.93, + "learning_rate": 1.0322449171539197e-05, + "loss": 0.4382, + "step": 62765 + }, + { + "epoch": 2.93, + "learning_rate": 1.0321665386484412e-05, + "loss": 0.0535, + "step": 62770 + }, + { + "epoch": 2.93, + "learning_rate": 1.0320881601429625e-05, + "loss": 0.029, + "step": 62775 + }, + { + "epoch": 2.93, + "learning_rate": 1.0320097816374839e-05, + "loss": 0.0429, + "step": 62780 + }, + { + "epoch": 2.93, + "learning_rate": 1.031931403132005e-05, + "loss": 0.0829, + "step": 62785 + }, + { + "epoch": 2.93, + "learning_rate": 1.0318530246265266e-05, + "loss": 0.0953, + "step": 62790 + }, + { + "epoch": 2.93, + "learning_rate": 1.0317746461210479e-05, + "loss": 0.1233, + "step": 62795 + }, + { + "epoch": 2.93, + "learning_rate": 1.0316962676155692e-05, + "loss": 0.0843, + "step": 62800 + }, + { + "epoch": 2.93, + "learning_rate": 1.0316178891100905e-05, + "loss": 0.1054, + "step": 62805 + }, + { + "epoch": 2.93, + "learning_rate": 1.0315395106046117e-05, + "loss": 0.277, + "step": 62810 + }, + { + "epoch": 2.93, + "learning_rate": 1.0314611320991333e-05, + "loss": 0.4017, + "step": 62815 + }, + { + "epoch": 2.93, + "learning_rate": 1.0313827535936545e-05, + "loss": 0.031, + "step": 62820 + }, + { + "epoch": 2.93, + "learning_rate": 1.0313043750881759e-05, + "loss": 0.0184, + "step": 62825 + }, + { + "epoch": 2.93, + "learning_rate": 1.0312259965826973e-05, + "loss": 0.1057, + "step": 62830 + }, + { + "epoch": 2.93, + "learning_rate": 1.0311476180772186e-05, + "loss": 0.1594, + "step": 62835 + }, + { + "epoch": 2.93, + "learning_rate": 1.0310692395717399e-05, + "loss": 0.1388, + "step": 62840 + }, + { + "epoch": 2.93, + "learning_rate": 1.0309908610662613e-05, + "loss": 0.0698, + "step": 62845 + }, + { + "epoch": 2.93, + "learning_rate": 1.0309124825607826e-05, + "loss": 0.2027, + "step": 62850 + }, + { + "epoch": 2.93, + "learning_rate": 1.030834104055304e-05, + "loss": 0.0898, + "step": 62855 + }, + { + "epoch": 2.93, + "learning_rate": 1.0307557255498253e-05, + "loss": 0.2479, + "step": 62860 + }, + { + "epoch": 2.93, + "learning_rate": 1.0306773470443468e-05, + "loss": 0.2501, + "step": 62865 + }, + { + "epoch": 2.93, + "learning_rate": 1.030598968538868e-05, + "loss": 0.0262, + "step": 62870 + }, + { + "epoch": 2.93, + "learning_rate": 1.0305205900333893e-05, + "loss": 0.0397, + "step": 62875 + }, + { + "epoch": 2.93, + "learning_rate": 1.0304422115279107e-05, + "loss": 0.074, + "step": 62880 + }, + { + "epoch": 2.93, + "learning_rate": 1.0303638330224319e-05, + "loss": 0.0701, + "step": 62885 + }, + { + "epoch": 2.93, + "learning_rate": 1.0302854545169534e-05, + "loss": 0.0451, + "step": 62890 + }, + { + "epoch": 2.93, + "learning_rate": 1.0302070760114747e-05, + "loss": 0.1437, + "step": 62895 + }, + { + "epoch": 2.94, + "learning_rate": 1.030128697505996e-05, + "loss": 0.0555, + "step": 62900 + }, + { + "epoch": 2.94, + "learning_rate": 1.0300503190005173e-05, + "loss": 0.1305, + "step": 62905 + }, + { + "epoch": 2.94, + "learning_rate": 1.0299719404950388e-05, + "loss": 0.2862, + "step": 62910 + }, + { + "epoch": 2.94, + "learning_rate": 1.02989356198956e-05, + "loss": 0.2047, + "step": 62915 + }, + { + "epoch": 2.94, + "learning_rate": 1.0298151834840814e-05, + "loss": 0.0395, + "step": 62920 + }, + { + "epoch": 2.94, + "learning_rate": 1.0297368049786027e-05, + "loss": 0.0727, + "step": 62925 + }, + { + "epoch": 2.94, + "learning_rate": 1.0296584264731242e-05, + "loss": 0.0484, + "step": 62930 + }, + { + "epoch": 2.94, + "learning_rate": 1.0295800479676454e-05, + "loss": 0.0634, + "step": 62935 + }, + { + "epoch": 2.94, + "learning_rate": 1.0295016694621667e-05, + "loss": 0.1216, + "step": 62940 + }, + { + "epoch": 2.94, + "learning_rate": 1.0294232909566882e-05, + "loss": 0.0631, + "step": 62945 + }, + { + "epoch": 2.94, + "learning_rate": 1.0293449124512094e-05, + "loss": 0.0701, + "step": 62950 + }, + { + "epoch": 2.94, + "learning_rate": 1.0292665339457308e-05, + "loss": 0.1508, + "step": 62955 + }, + { + "epoch": 2.94, + "learning_rate": 1.029188155440252e-05, + "loss": 0.1583, + "step": 62960 + }, + { + "epoch": 2.94, + "learning_rate": 1.0291097769347736e-05, + "loss": 0.2747, + "step": 62965 + }, + { + "epoch": 2.94, + "learning_rate": 1.0290313984292948e-05, + "loss": 0.027, + "step": 62970 + }, + { + "epoch": 2.94, + "learning_rate": 1.0289530199238162e-05, + "loss": 0.0262, + "step": 62975 + }, + { + "epoch": 2.94, + "learning_rate": 1.0288746414183374e-05, + "loss": 0.0405, + "step": 62980 + }, + { + "epoch": 2.94, + "learning_rate": 1.028796262912859e-05, + "loss": 0.0914, + "step": 62985 + }, + { + "epoch": 2.94, + "learning_rate": 1.0287178844073802e-05, + "loss": 0.0421, + "step": 62990 + }, + { + "epoch": 2.94, + "learning_rate": 1.0286395059019016e-05, + "loss": 0.1202, + "step": 62995 + }, + { + "epoch": 2.94, + "learning_rate": 1.0285611273964228e-05, + "loss": 0.1285, + "step": 63000 + }, + { + "epoch": 2.94, + "learning_rate": 1.028482748890944e-05, + "loss": 0.169, + "step": 63005 + }, + { + "epoch": 2.94, + "learning_rate": 1.0284043703854656e-05, + "loss": 0.1355, + "step": 63010 + }, + { + "epoch": 2.94, + "learning_rate": 1.0283259918799868e-05, + "loss": 0.2761, + "step": 63015 + }, + { + "epoch": 2.94, + "learning_rate": 1.0282476133745082e-05, + "loss": 0.089, + "step": 63020 + }, + { + "epoch": 2.94, + "learning_rate": 1.0281692348690295e-05, + "loss": 0.0571, + "step": 63025 + }, + { + "epoch": 2.94, + "learning_rate": 1.028090856363551e-05, + "loss": 0.0232, + "step": 63030 + }, + { + "epoch": 2.94, + "learning_rate": 1.0280124778580722e-05, + "loss": 0.0644, + "step": 63035 + }, + { + "epoch": 2.94, + "learning_rate": 1.0279340993525936e-05, + "loss": 0.0646, + "step": 63040 + }, + { + "epoch": 2.94, + "learning_rate": 1.027855720847115e-05, + "loss": 0.1479, + "step": 63045 + }, + { + "epoch": 2.94, + "learning_rate": 1.0277773423416364e-05, + "loss": 0.1911, + "step": 63050 + }, + { + "epoch": 2.94, + "learning_rate": 1.0276989638361576e-05, + "loss": 0.1927, + "step": 63055 + }, + { + "epoch": 2.94, + "learning_rate": 1.027620585330679e-05, + "loss": 0.1748, + "step": 63060 + }, + { + "epoch": 2.94, + "learning_rate": 1.0275422068252004e-05, + "loss": 0.2667, + "step": 63065 + }, + { + "epoch": 2.94, + "learning_rate": 1.0274638283197216e-05, + "loss": 0.0506, + "step": 63070 + }, + { + "epoch": 2.94, + "learning_rate": 1.027385449814243e-05, + "loss": 0.0323, + "step": 63075 + }, + { + "epoch": 2.94, + "learning_rate": 1.0273070713087642e-05, + "loss": 0.0294, + "step": 63080 + }, + { + "epoch": 2.94, + "learning_rate": 1.0272286928032858e-05, + "loss": 0.0357, + "step": 63085 + }, + { + "epoch": 2.94, + "learning_rate": 1.027150314297807e-05, + "loss": 0.0781, + "step": 63090 + }, + { + "epoch": 2.94, + "learning_rate": 1.0270719357923284e-05, + "loss": 0.1175, + "step": 63095 + }, + { + "epoch": 2.94, + "learning_rate": 1.0269935572868496e-05, + "loss": 0.1468, + "step": 63100 + }, + { + "epoch": 2.94, + "learning_rate": 1.0269151787813712e-05, + "loss": 0.1011, + "step": 63105 + }, + { + "epoch": 2.94, + "learning_rate": 1.0268368002758924e-05, + "loss": 0.2504, + "step": 63110 + }, + { + "epoch": 2.95, + "learning_rate": 1.0267584217704138e-05, + "loss": 0.3333, + "step": 63115 + }, + { + "epoch": 2.95, + "learning_rate": 1.026680043264935e-05, + "loss": 0.03, + "step": 63120 + }, + { + "epoch": 2.95, + "learning_rate": 1.0266016647594566e-05, + "loss": 0.0314, + "step": 63125 + }, + { + "epoch": 2.95, + "learning_rate": 1.0265232862539778e-05, + "loss": 0.0549, + "step": 63130 + }, + { + "epoch": 2.95, + "learning_rate": 1.026444907748499e-05, + "loss": 0.0711, + "step": 63135 + }, + { + "epoch": 2.95, + "learning_rate": 1.0263665292430204e-05, + "loss": 0.0903, + "step": 63140 + }, + { + "epoch": 2.95, + "learning_rate": 1.0262881507375418e-05, + "loss": 0.1095, + "step": 63145 + }, + { + "epoch": 2.95, + "learning_rate": 1.0262097722320632e-05, + "loss": 0.1305, + "step": 63150 + }, + { + "epoch": 2.95, + "learning_rate": 1.0261313937265844e-05, + "loss": 0.1326, + "step": 63155 + }, + { + "epoch": 2.95, + "learning_rate": 1.026053015221106e-05, + "loss": 0.2379, + "step": 63160 + }, + { + "epoch": 2.95, + "learning_rate": 1.0259746367156272e-05, + "loss": 0.3484, + "step": 63165 + }, + { + "epoch": 2.95, + "learning_rate": 1.0258962582101486e-05, + "loss": 0.0702, + "step": 63170 + }, + { + "epoch": 2.95, + "learning_rate": 1.0258178797046698e-05, + "loss": 0.0214, + "step": 63175 + }, + { + "epoch": 2.95, + "learning_rate": 1.0257395011991914e-05, + "loss": 0.0748, + "step": 63180 + }, + { + "epoch": 2.95, + "learning_rate": 1.0256611226937126e-05, + "loss": 0.046, + "step": 63185 + }, + { + "epoch": 2.95, + "learning_rate": 1.025582744188234e-05, + "loss": 0.0891, + "step": 63190 + }, + { + "epoch": 2.95, + "learning_rate": 1.0255043656827552e-05, + "loss": 0.0839, + "step": 63195 + }, + { + "epoch": 2.95, + "learning_rate": 1.0254259871772764e-05, + "loss": 0.1694, + "step": 63200 + }, + { + "epoch": 2.95, + "learning_rate": 1.025347608671798e-05, + "loss": 0.1068, + "step": 63205 + }, + { + "epoch": 2.95, + "learning_rate": 1.0252692301663192e-05, + "loss": 0.3257, + "step": 63210 + }, + { + "epoch": 2.95, + "learning_rate": 1.0251908516608406e-05, + "loss": 0.21, + "step": 63215 + }, + { + "epoch": 2.95, + "learning_rate": 1.0251124731553618e-05, + "loss": 0.0523, + "step": 63220 + }, + { + "epoch": 2.95, + "learning_rate": 1.0250340946498834e-05, + "loss": 0.0437, + "step": 63225 + }, + { + "epoch": 2.95, + "learning_rate": 1.0249557161444046e-05, + "loss": 0.0957, + "step": 63230 + }, + { + "epoch": 2.95, + "learning_rate": 1.024877337638926e-05, + "loss": 0.0554, + "step": 63235 + }, + { + "epoch": 2.95, + "learning_rate": 1.0247989591334472e-05, + "loss": 0.0969, + "step": 63240 + }, + { + "epoch": 2.95, + "learning_rate": 1.0247205806279688e-05, + "loss": 0.1589, + "step": 63245 + }, + { + "epoch": 2.95, + "learning_rate": 1.02464220212249e-05, + "loss": 0.1167, + "step": 63250 + }, + { + "epoch": 2.95, + "learning_rate": 1.0245638236170114e-05, + "loss": 0.1879, + "step": 63255 + }, + { + "epoch": 2.95, + "learning_rate": 1.0244854451115328e-05, + "loss": 0.1869, + "step": 63260 + }, + { + "epoch": 2.95, + "learning_rate": 1.024407066606054e-05, + "loss": 0.1766, + "step": 63265 + }, + { + "epoch": 2.95, + "learning_rate": 1.0243286881005754e-05, + "loss": 0.0105, + "step": 63270 + }, + { + "epoch": 2.95, + "learning_rate": 1.0242503095950966e-05, + "loss": 0.0834, + "step": 63275 + }, + { + "epoch": 2.95, + "learning_rate": 1.0241719310896182e-05, + "loss": 0.0791, + "step": 63280 + }, + { + "epoch": 2.95, + "learning_rate": 1.0240935525841394e-05, + "loss": 0.0993, + "step": 63285 + }, + { + "epoch": 2.95, + "learning_rate": 1.0240151740786608e-05, + "loss": 0.0776, + "step": 63290 + }, + { + "epoch": 2.95, + "learning_rate": 1.023936795573182e-05, + "loss": 0.1271, + "step": 63295 + }, + { + "epoch": 2.95, + "learning_rate": 1.0238584170677036e-05, + "loss": 0.0777, + "step": 63300 + }, + { + "epoch": 2.95, + "learning_rate": 1.0237800385622248e-05, + "loss": 0.2114, + "step": 63305 + }, + { + "epoch": 2.95, + "learning_rate": 1.0237016600567462e-05, + "loss": 0.2023, + "step": 63310 + }, + { + "epoch": 2.95, + "learning_rate": 1.0236232815512674e-05, + "loss": 0.2993, + "step": 63315 + }, + { + "epoch": 2.95, + "learning_rate": 1.023544903045789e-05, + "loss": 0.0464, + "step": 63320 + }, + { + "epoch": 2.95, + "learning_rate": 1.0234665245403102e-05, + "loss": 0.0378, + "step": 63325 + }, + { + "epoch": 2.96, + "learning_rate": 1.0233881460348314e-05, + "loss": 0.0215, + "step": 63330 + }, + { + "epoch": 2.96, + "learning_rate": 1.0233097675293528e-05, + "loss": 0.0999, + "step": 63335 + }, + { + "epoch": 2.96, + "learning_rate": 1.023231389023874e-05, + "loss": 0.0813, + "step": 63340 + }, + { + "epoch": 2.96, + "learning_rate": 1.0231530105183956e-05, + "loss": 0.0367, + "step": 63345 + }, + { + "epoch": 2.96, + "learning_rate": 1.0230746320129168e-05, + "loss": 0.1402, + "step": 63350 + }, + { + "epoch": 2.96, + "learning_rate": 1.0229962535074382e-05, + "loss": 0.2003, + "step": 63355 + }, + { + "epoch": 2.96, + "learning_rate": 1.0229178750019596e-05, + "loss": 0.1149, + "step": 63360 + }, + { + "epoch": 2.96, + "learning_rate": 1.022839496496481e-05, + "loss": 0.3584, + "step": 63365 + }, + { + "epoch": 2.96, + "learning_rate": 1.0227611179910022e-05, + "loss": 0.1109, + "step": 63370 + }, + { + "epoch": 2.96, + "learning_rate": 1.0226827394855236e-05, + "loss": 0.0422, + "step": 63375 + }, + { + "epoch": 2.96, + "learning_rate": 1.022604360980045e-05, + "loss": 0.0409, + "step": 63380 + }, + { + "epoch": 2.96, + "learning_rate": 1.0225259824745664e-05, + "loss": 0.0294, + "step": 63385 + }, + { + "epoch": 2.96, + "learning_rate": 1.0224476039690876e-05, + "loss": 0.0393, + "step": 63390 + }, + { + "epoch": 2.96, + "learning_rate": 1.0223692254636088e-05, + "loss": 0.0275, + "step": 63395 + }, + { + "epoch": 2.96, + "learning_rate": 1.0222908469581304e-05, + "loss": 0.1272, + "step": 63400 + }, + { + "epoch": 2.96, + "learning_rate": 1.0222124684526516e-05, + "loss": 0.1611, + "step": 63405 + }, + { + "epoch": 2.96, + "learning_rate": 1.022134089947173e-05, + "loss": 0.3513, + "step": 63410 + }, + { + "epoch": 2.96, + "learning_rate": 1.0220557114416942e-05, + "loss": 0.2599, + "step": 63415 + }, + { + "epoch": 2.96, + "learning_rate": 1.0219773329362158e-05, + "loss": 0.0898, + "step": 63420 + }, + { + "epoch": 2.96, + "learning_rate": 1.021898954430737e-05, + "loss": 0.0381, + "step": 63425 + }, + { + "epoch": 2.96, + "learning_rate": 1.0218205759252584e-05, + "loss": 0.0469, + "step": 63430 + }, + { + "epoch": 2.96, + "learning_rate": 1.0217421974197796e-05, + "loss": 0.0635, + "step": 63435 + }, + { + "epoch": 2.96, + "learning_rate": 1.0216638189143011e-05, + "loss": 0.0756, + "step": 63440 + }, + { + "epoch": 2.96, + "learning_rate": 1.0215854404088224e-05, + "loss": 0.0884, + "step": 63445 + }, + { + "epoch": 2.96, + "learning_rate": 1.0215070619033438e-05, + "loss": 0.1282, + "step": 63450 + }, + { + "epoch": 2.96, + "learning_rate": 1.021428683397865e-05, + "loss": 0.1419, + "step": 63455 + }, + { + "epoch": 2.96, + "learning_rate": 1.0213503048923864e-05, + "loss": 0.2619, + "step": 63460 + }, + { + "epoch": 2.96, + "learning_rate": 1.0212719263869078e-05, + "loss": 0.2593, + "step": 63465 + }, + { + "epoch": 2.96, + "learning_rate": 1.021193547881429e-05, + "loss": 0.0369, + "step": 63470 + }, + { + "epoch": 2.96, + "learning_rate": 1.0211151693759505e-05, + "loss": 0.0356, + "step": 63475 + }, + { + "epoch": 2.96, + "learning_rate": 1.0210367908704718e-05, + "loss": 0.0794, + "step": 63480 + }, + { + "epoch": 2.96, + "learning_rate": 1.0209584123649932e-05, + "loss": 0.0407, + "step": 63485 + }, + { + "epoch": 2.96, + "learning_rate": 1.0208800338595144e-05, + "loss": 0.0817, + "step": 63490 + }, + { + "epoch": 2.96, + "learning_rate": 1.020801655354036e-05, + "loss": 0.0535, + "step": 63495 + }, + { + "epoch": 2.96, + "learning_rate": 1.0207232768485572e-05, + "loss": 0.1159, + "step": 63500 + }, + { + "epoch": 2.96, + "learning_rate": 1.0206448983430785e-05, + "loss": 0.2946, + "step": 63505 + }, + { + "epoch": 2.96, + "learning_rate": 1.0205665198375998e-05, + "loss": 0.2417, + "step": 63510 + }, + { + "epoch": 2.96, + "learning_rate": 1.0204881413321213e-05, + "loss": 0.226, + "step": 63515 + }, + { + "epoch": 2.96, + "learning_rate": 1.0204097628266425e-05, + "loss": 0.0084, + "step": 63520 + }, + { + "epoch": 2.96, + "learning_rate": 1.0203313843211638e-05, + "loss": 0.041, + "step": 63525 + }, + { + "epoch": 2.96, + "learning_rate": 1.0202530058156852e-05, + "loss": 0.0369, + "step": 63530 + }, + { + "epoch": 2.96, + "learning_rate": 1.0201746273102064e-05, + "loss": 0.0958, + "step": 63535 + }, + { + "epoch": 2.96, + "learning_rate": 1.020096248804728e-05, + "loss": 0.1259, + "step": 63540 + }, + { + "epoch": 2.97, + "learning_rate": 1.0200178702992492e-05, + "loss": 0.1039, + "step": 63545 + }, + { + "epoch": 2.97, + "learning_rate": 1.0199394917937706e-05, + "loss": 0.1852, + "step": 63550 + }, + { + "epoch": 2.97, + "learning_rate": 1.0198611132882918e-05, + "loss": 0.1922, + "step": 63555 + }, + { + "epoch": 2.97, + "learning_rate": 1.0197827347828133e-05, + "loss": 0.2911, + "step": 63560 + }, + { + "epoch": 2.97, + "learning_rate": 1.0197043562773346e-05, + "loss": 0.2036, + "step": 63565 + }, + { + "epoch": 2.97, + "learning_rate": 1.019625977771856e-05, + "loss": 0.0676, + "step": 63570 + }, + { + "epoch": 2.97, + "learning_rate": 1.0195475992663773e-05, + "loss": 0.0323, + "step": 63575 + }, + { + "epoch": 2.97, + "learning_rate": 1.0194692207608987e-05, + "loss": 0.0438, + "step": 63580 + }, + { + "epoch": 2.97, + "learning_rate": 1.01939084225542e-05, + "loss": 0.0627, + "step": 63585 + }, + { + "epoch": 2.97, + "learning_rate": 1.0193124637499412e-05, + "loss": 0.1465, + "step": 63590 + }, + { + "epoch": 2.97, + "learning_rate": 1.0192340852444627e-05, + "loss": 0.0959, + "step": 63595 + }, + { + "epoch": 2.97, + "learning_rate": 1.019155706738984e-05, + "loss": 0.0919, + "step": 63600 + }, + { + "epoch": 2.97, + "learning_rate": 1.0190773282335053e-05, + "loss": 0.194, + "step": 63605 + }, + { + "epoch": 2.97, + "learning_rate": 1.0189989497280266e-05, + "loss": 0.1797, + "step": 63610 + }, + { + "epoch": 2.97, + "learning_rate": 1.0189205712225481e-05, + "loss": 0.2537, + "step": 63615 + }, + { + "epoch": 2.97, + "learning_rate": 1.0188421927170693e-05, + "loss": 0.0516, + "step": 63620 + }, + { + "epoch": 2.97, + "learning_rate": 1.0187638142115907e-05, + "loss": 0.0706, + "step": 63625 + }, + { + "epoch": 2.97, + "learning_rate": 1.018685435706112e-05, + "loss": 0.0674, + "step": 63630 + }, + { + "epoch": 2.97, + "learning_rate": 1.0186070572006335e-05, + "loss": 0.0511, + "step": 63635 + }, + { + "epoch": 2.97, + "learning_rate": 1.0185286786951547e-05, + "loss": 0.0479, + "step": 63640 + }, + { + "epoch": 2.97, + "learning_rate": 1.0184503001896761e-05, + "loss": 0.1024, + "step": 63645 + }, + { + "epoch": 2.97, + "learning_rate": 1.0183719216841973e-05, + "loss": 0.0979, + "step": 63650 + }, + { + "epoch": 2.97, + "learning_rate": 1.0182935431787186e-05, + "loss": 0.1912, + "step": 63655 + }, + { + "epoch": 2.97, + "learning_rate": 1.0182151646732401e-05, + "loss": 0.23, + "step": 63660 + }, + { + "epoch": 2.97, + "learning_rate": 1.0181367861677613e-05, + "loss": 0.3174, + "step": 63665 + }, + { + "epoch": 2.97, + "learning_rate": 1.0180584076622827e-05, + "loss": 0.0348, + "step": 63670 + }, + { + "epoch": 2.97, + "learning_rate": 1.0179800291568041e-05, + "loss": 0.0672, + "step": 63675 + }, + { + "epoch": 2.97, + "learning_rate": 1.0179016506513255e-05, + "loss": 0.0229, + "step": 63680 + }, + { + "epoch": 2.97, + "learning_rate": 1.0178232721458467e-05, + "loss": 0.0771, + "step": 63685 + }, + { + "epoch": 2.97, + "learning_rate": 1.0177448936403681e-05, + "loss": 0.114, + "step": 63690 + }, + { + "epoch": 2.97, + "learning_rate": 1.0176665151348895e-05, + "loss": 0.0955, + "step": 63695 + }, + { + "epoch": 2.97, + "learning_rate": 1.0175881366294109e-05, + "loss": 0.0727, + "step": 63700 + }, + { + "epoch": 2.97, + "learning_rate": 1.0175097581239321e-05, + "loss": 0.0753, + "step": 63705 + }, + { + "epoch": 2.97, + "learning_rate": 1.0174313796184537e-05, + "loss": 0.1925, + "step": 63710 + }, + { + "epoch": 2.97, + "learning_rate": 1.0173530011129749e-05, + "loss": 0.2355, + "step": 63715 + }, + { + "epoch": 2.97, + "learning_rate": 1.0172746226074961e-05, + "loss": 0.0492, + "step": 63720 + }, + { + "epoch": 2.97, + "learning_rate": 1.0171962441020175e-05, + "loss": 0.033, + "step": 63725 + }, + { + "epoch": 2.97, + "learning_rate": 1.0171178655965387e-05, + "loss": 0.0612, + "step": 63730 + }, + { + "epoch": 2.97, + "learning_rate": 1.0170394870910603e-05, + "loss": 0.0285, + "step": 63735 + }, + { + "epoch": 2.97, + "learning_rate": 1.0169611085855815e-05, + "loss": 0.0306, + "step": 63740 + }, + { + "epoch": 2.97, + "learning_rate": 1.016882730080103e-05, + "loss": 0.0759, + "step": 63745 + }, + { + "epoch": 2.97, + "learning_rate": 1.0168043515746241e-05, + "loss": 0.1336, + "step": 63750 + }, + { + "epoch": 2.97, + "learning_rate": 1.0167259730691457e-05, + "loss": 0.2051, + "step": 63755 + }, + { + "epoch": 2.98, + "learning_rate": 1.016647594563667e-05, + "loss": 0.1794, + "step": 63760 + }, + { + "epoch": 2.98, + "learning_rate": 1.0165692160581883e-05, + "loss": 0.2222, + "step": 63765 + }, + { + "epoch": 2.98, + "learning_rate": 1.0164908375527095e-05, + "loss": 0.0448, + "step": 63770 + }, + { + "epoch": 2.98, + "learning_rate": 1.0164124590472311e-05, + "loss": 0.1489, + "step": 63775 + }, + { + "epoch": 2.98, + "learning_rate": 1.0163340805417523e-05, + "loss": 0.0714, + "step": 63780 + }, + { + "epoch": 2.98, + "learning_rate": 1.0162557020362735e-05, + "loss": 0.1082, + "step": 63785 + }, + { + "epoch": 2.98, + "learning_rate": 1.0161773235307951e-05, + "loss": 0.0898, + "step": 63790 + }, + { + "epoch": 2.98, + "learning_rate": 1.0160989450253163e-05, + "loss": 0.0668, + "step": 63795 + }, + { + "epoch": 2.98, + "learning_rate": 1.0160205665198377e-05, + "loss": 0.0864, + "step": 63800 + }, + { + "epoch": 2.98, + "learning_rate": 1.015942188014359e-05, + "loss": 0.1504, + "step": 63805 + }, + { + "epoch": 2.98, + "learning_rate": 1.0158638095088805e-05, + "loss": 0.1931, + "step": 63810 + }, + { + "epoch": 2.98, + "learning_rate": 1.0157854310034017e-05, + "loss": 0.1701, + "step": 63815 + }, + { + "epoch": 2.98, + "learning_rate": 1.0157070524979231e-05, + "loss": 0.0198, + "step": 63820 + }, + { + "epoch": 2.98, + "learning_rate": 1.0156286739924443e-05, + "loss": 0.064, + "step": 63825 + }, + { + "epoch": 2.98, + "learning_rate": 1.0155502954869659e-05, + "loss": 0.0866, + "step": 63830 + }, + { + "epoch": 2.98, + "learning_rate": 1.0154719169814871e-05, + "loss": 0.0517, + "step": 63835 + }, + { + "epoch": 2.98, + "learning_rate": 1.0153935384760085e-05, + "loss": 0.0199, + "step": 63840 + }, + { + "epoch": 2.98, + "learning_rate": 1.0153151599705297e-05, + "loss": 0.1171, + "step": 63845 + }, + { + "epoch": 2.98, + "learning_rate": 1.015236781465051e-05, + "loss": 0.1281, + "step": 63850 + }, + { + "epoch": 2.98, + "learning_rate": 1.0151584029595725e-05, + "loss": 0.1711, + "step": 63855 + }, + { + "epoch": 2.98, + "learning_rate": 1.0150800244540937e-05, + "loss": 0.1402, + "step": 63860 + }, + { + "epoch": 2.98, + "learning_rate": 1.0150016459486151e-05, + "loss": 0.2753, + "step": 63865 + }, + { + "epoch": 2.98, + "learning_rate": 1.0149232674431363e-05, + "loss": 0.0264, + "step": 63870 + }, + { + "epoch": 2.98, + "learning_rate": 1.0148448889376579e-05, + "loss": 0.0742, + "step": 63875 + }, + { + "epoch": 2.98, + "learning_rate": 1.0147665104321791e-05, + "loss": 0.0883, + "step": 63880 + }, + { + "epoch": 2.98, + "learning_rate": 1.0146881319267005e-05, + "loss": 0.059, + "step": 63885 + }, + { + "epoch": 2.98, + "learning_rate": 1.0146097534212219e-05, + "loss": 0.0566, + "step": 63890 + }, + { + "epoch": 2.98, + "learning_rate": 1.0145313749157433e-05, + "loss": 0.2492, + "step": 63895 + }, + { + "epoch": 2.98, + "learning_rate": 1.0144529964102645e-05, + "loss": 0.1694, + "step": 63900 + }, + { + "epoch": 2.98, + "learning_rate": 1.0143746179047859e-05, + "loss": 0.1478, + "step": 63905 + }, + { + "epoch": 2.98, + "learning_rate": 1.0142962393993073e-05, + "loss": 0.2933, + "step": 63910 + }, + { + "epoch": 2.98, + "learning_rate": 1.0142178608938285e-05, + "loss": 0.3151, + "step": 63915 + }, + { + "epoch": 2.98, + "learning_rate": 1.0141394823883499e-05, + "loss": 0.0621, + "step": 63920 + }, + { + "epoch": 2.98, + "learning_rate": 1.0140611038828711e-05, + "loss": 0.0457, + "step": 63925 + }, + { + "epoch": 2.98, + "learning_rate": 1.0139827253773927e-05, + "loss": 0.0642, + "step": 63930 + }, + { + "epoch": 2.98, + "learning_rate": 1.0139043468719139e-05, + "loss": 0.091, + "step": 63935 + }, + { + "epoch": 2.98, + "learning_rate": 1.0138259683664353e-05, + "loss": 0.078, + "step": 63940 + }, + { + "epoch": 2.98, + "learning_rate": 1.0137475898609565e-05, + "loss": 0.0961, + "step": 63945 + }, + { + "epoch": 2.98, + "learning_rate": 1.013669211355478e-05, + "loss": 0.212, + "step": 63950 + }, + { + "epoch": 2.98, + "learning_rate": 1.0135908328499993e-05, + "loss": 0.1122, + "step": 63955 + }, + { + "epoch": 2.98, + "learning_rate": 1.0135124543445207e-05, + "loss": 0.1807, + "step": 63960 + }, + { + "epoch": 2.98, + "learning_rate": 1.0134340758390419e-05, + "loss": 0.216, + "step": 63965 + }, + { + "epoch": 2.98, + "learning_rate": 1.0133556973335635e-05, + "loss": 0.0358, + "step": 63970 + }, + { + "epoch": 2.99, + "learning_rate": 1.0132773188280847e-05, + "loss": 0.291, + "step": 63975 + }, + { + "epoch": 2.99, + "learning_rate": 1.0131989403226059e-05, + "loss": 0.0242, + "step": 63980 + }, + { + "epoch": 2.99, + "learning_rate": 1.0131205618171273e-05, + "loss": 0.1145, + "step": 63985 + }, + { + "epoch": 2.99, + "learning_rate": 1.0130421833116487e-05, + "loss": 0.0473, + "step": 63990 + }, + { + "epoch": 2.99, + "learning_rate": 1.01296380480617e-05, + "loss": 0.1068, + "step": 63995 + }, + { + "epoch": 2.99, + "learning_rate": 1.0128854263006913e-05, + "loss": 0.0515, + "step": 64000 + }, + { + "epoch": 2.99, + "learning_rate": 1.0128070477952127e-05, + "loss": 0.1869, + "step": 64005 + }, + { + "epoch": 2.99, + "learning_rate": 1.012728669289734e-05, + "loss": 0.2932, + "step": 64010 + }, + { + "epoch": 2.99, + "learning_rate": 1.0126502907842555e-05, + "loss": 0.3333, + "step": 64015 + }, + { + "epoch": 2.99, + "learning_rate": 1.0125719122787767e-05, + "loss": 0.0225, + "step": 64020 + }, + { + "epoch": 2.99, + "learning_rate": 1.0124935337732983e-05, + "loss": 0.0358, + "step": 64025 + }, + { + "epoch": 2.99, + "learning_rate": 1.0124151552678195e-05, + "loss": 0.0832, + "step": 64030 + }, + { + "epoch": 2.99, + "learning_rate": 1.0123367767623409e-05, + "loss": 0.0553, + "step": 64035 + }, + { + "epoch": 2.99, + "learning_rate": 1.012258398256862e-05, + "loss": 0.0834, + "step": 64040 + }, + { + "epoch": 2.99, + "learning_rate": 1.0121800197513833e-05, + "loss": 0.1044, + "step": 64045 + }, + { + "epoch": 2.99, + "learning_rate": 1.0121016412459049e-05, + "loss": 0.1887, + "step": 64050 + }, + { + "epoch": 2.99, + "learning_rate": 1.0120232627404261e-05, + "loss": 0.2406, + "step": 64055 + }, + { + "epoch": 2.99, + "learning_rate": 1.0119448842349475e-05, + "loss": 0.1824, + "step": 64060 + }, + { + "epoch": 2.99, + "learning_rate": 1.0118665057294687e-05, + "loss": 0.3448, + "step": 64065 + }, + { + "epoch": 2.99, + "learning_rate": 1.0117881272239903e-05, + "loss": 0.0214, + "step": 64070 + }, + { + "epoch": 2.99, + "learning_rate": 1.0117097487185115e-05, + "loss": 0.0121, + "step": 64075 + }, + { + "epoch": 2.99, + "learning_rate": 1.0116313702130329e-05, + "loss": 0.0553, + "step": 64080 + }, + { + "epoch": 2.99, + "learning_rate": 1.0115529917075541e-05, + "loss": 0.0827, + "step": 64085 + }, + { + "epoch": 2.99, + "learning_rate": 1.0114746132020757e-05, + "loss": 0.0886, + "step": 64090 + }, + { + "epoch": 2.99, + "learning_rate": 1.0113962346965969e-05, + "loss": 0.0533, + "step": 64095 + }, + { + "epoch": 2.99, + "learning_rate": 1.0113178561911183e-05, + "loss": 0.143, + "step": 64100 + }, + { + "epoch": 2.99, + "learning_rate": 1.0112394776856397e-05, + "loss": 0.1127, + "step": 64105 + }, + { + "epoch": 2.99, + "learning_rate": 1.0111610991801609e-05, + "loss": 0.2126, + "step": 64110 + }, + { + "epoch": 2.99, + "learning_rate": 1.0110827206746823e-05, + "loss": 0.2579, + "step": 64115 + }, + { + "epoch": 2.99, + "learning_rate": 1.0110043421692035e-05, + "loss": 0.0479, + "step": 64120 + }, + { + "epoch": 2.99, + "learning_rate": 1.010925963663725e-05, + "loss": 0.0203, + "step": 64125 + }, + { + "epoch": 2.99, + "learning_rate": 1.0108475851582463e-05, + "loss": 0.031, + "step": 64130 + }, + { + "epoch": 2.99, + "learning_rate": 1.0107692066527677e-05, + "loss": 0.1182, + "step": 64135 + }, + { + "epoch": 2.99, + "learning_rate": 1.0106908281472889e-05, + "loss": 0.059, + "step": 64140 + }, + { + "epoch": 2.99, + "learning_rate": 1.0106124496418104e-05, + "loss": 0.1079, + "step": 64145 + }, + { + "epoch": 2.99, + "learning_rate": 1.0105340711363317e-05, + "loss": 0.0826, + "step": 64150 + }, + { + "epoch": 2.99, + "learning_rate": 1.010455692630853e-05, + "loss": 0.1238, + "step": 64155 + }, + { + "epoch": 2.99, + "learning_rate": 1.0103773141253743e-05, + "loss": 0.1699, + "step": 64160 + }, + { + "epoch": 2.99, + "learning_rate": 1.0102989356198958e-05, + "loss": 0.224, + "step": 64165 + }, + { + "epoch": 2.99, + "learning_rate": 1.010220557114417e-05, + "loss": 0.0326, + "step": 64170 + }, + { + "epoch": 2.99, + "learning_rate": 1.0101421786089383e-05, + "loss": 0.061, + "step": 64175 + }, + { + "epoch": 2.99, + "learning_rate": 1.0100638001034597e-05, + "loss": 0.0567, + "step": 64180 + }, + { + "epoch": 2.99, + "learning_rate": 1.0099854215979809e-05, + "loss": 0.0547, + "step": 64185 + }, + { + "epoch": 3.0, + "learning_rate": 1.0099070430925024e-05, + "loss": 0.0667, + "step": 64190 + }, + { + "epoch": 3.0, + "learning_rate": 1.0098286645870237e-05, + "loss": 0.1063, + "step": 64195 + }, + { + "epoch": 3.0, + "learning_rate": 1.009750286081545e-05, + "loss": 0.1263, + "step": 64200 + }, + { + "epoch": 3.0, + "learning_rate": 1.0096719075760664e-05, + "loss": 0.1766, + "step": 64205 + }, + { + "epoch": 3.0, + "learning_rate": 1.0095935290705878e-05, + "loss": 0.2074, + "step": 64210 + }, + { + "epoch": 3.0, + "learning_rate": 1.009515150565109e-05, + "loss": 0.2975, + "step": 64215 + }, + { + "epoch": 3.0, + "learning_rate": 1.0094367720596305e-05, + "loss": 0.0328, + "step": 64220 + }, + { + "epoch": 3.0, + "learning_rate": 1.0093583935541518e-05, + "loss": 0.0301, + "step": 64225 + }, + { + "epoch": 3.0, + "learning_rate": 1.0092800150486732e-05, + "loss": 0.051, + "step": 64230 + }, + { + "epoch": 3.0, + "learning_rate": 1.0092016365431945e-05, + "loss": 0.0794, + "step": 64235 + }, + { + "epoch": 3.0, + "learning_rate": 1.0091232580377157e-05, + "loss": 0.2081, + "step": 64240 + }, + { + "epoch": 3.0, + "learning_rate": 1.0090448795322372e-05, + "loss": 0.0887, + "step": 64245 + }, + { + "epoch": 3.0, + "learning_rate": 1.0089665010267585e-05, + "loss": 0.0939, + "step": 64250 + }, + { + "epoch": 3.0, + "learning_rate": 1.0088881225212798e-05, + "loss": 0.152, + "step": 64255 + }, + { + "epoch": 3.0, + "learning_rate": 1.008809744015801e-05, + "loss": 0.1754, + "step": 64260 + }, + { + "epoch": 3.0, + "learning_rate": 1.0087313655103226e-05, + "loss": 0.2495, + "step": 64265 + }, + { + "epoch": 3.0, + "learning_rate": 1.0086529870048438e-05, + "loss": 0.0218, + "step": 64270 + }, + { + "epoch": 3.0, + "learning_rate": 1.0085746084993652e-05, + "loss": 0.0405, + "step": 64275 + }, + { + "epoch": 3.0, + "learning_rate": 1.0084962299938865e-05, + "loss": 0.0995, + "step": 64280 + }, + { + "epoch": 3.0, + "learning_rate": 1.008417851488408e-05, + "loss": 0.1217, + "step": 64285 + }, + { + "epoch": 3.0, + "learning_rate": 1.0083394729829292e-05, + "loss": 0.1712, + "step": 64290 + }, + { + "epoch": 3.0, + "eval_cer": 0.013233947127380689, + "eval_loss": 0.03575053811073303, + "eval_runtime": 477.3319, + "eval_samples_per_second": 39.909, + "eval_steps_per_second": 4.99, + "eval_wer": 0.11354552183567727, + "step": 64293 + }, + { + "epoch": 3.0, + "learning_rate": 1.0082610944774506e-05, + "loss": 0.1887, + "step": 64295 + }, + { + "epoch": 3.0, + "learning_rate": 1.0081827159719719e-05, + "loss": 0.0727, + "step": 64300 + }, + { + "epoch": 3.0, + "learning_rate": 1.0081043374664932e-05, + "loss": 0.0216, + "step": 64305 + }, + { + "epoch": 3.0, + "learning_rate": 1.0080259589610146e-05, + "loss": 0.0405, + "step": 64310 + }, + { + "epoch": 3.0, + "learning_rate": 1.0079475804555359e-05, + "loss": 0.083, + "step": 64315 + }, + { + "epoch": 3.0, + "learning_rate": 1.0078692019500574e-05, + "loss": 0.0631, + "step": 64320 + }, + { + "epoch": 3.0, + "learning_rate": 1.0077908234445786e-05, + "loss": 0.0888, + "step": 64325 + }, + { + "epoch": 3.0, + "learning_rate": 1.0077124449391e-05, + "loss": 0.2566, + "step": 64330 + }, + { + "epoch": 3.0, + "learning_rate": 1.0076340664336212e-05, + "loss": 0.1636, + "step": 64335 + }, + { + "epoch": 3.0, + "learning_rate": 1.0075556879281428e-05, + "loss": 0.1579, + "step": 64340 + }, + { + "epoch": 3.0, + "learning_rate": 1.007477309422664e-05, + "loss": 0.2837, + "step": 64345 + }, + { + "epoch": 3.0, + "learning_rate": 1.0073989309171854e-05, + "loss": 0.1294, + "step": 64350 + }, + { + "epoch": 3.0, + "learning_rate": 1.0073205524117066e-05, + "loss": 0.038, + "step": 64355 + }, + { + "epoch": 3.0, + "learning_rate": 1.0072421739062282e-05, + "loss": 0.0945, + "step": 64360 + }, + { + "epoch": 3.0, + "learning_rate": 1.0071637954007494e-05, + "loss": 0.0394, + "step": 64365 + }, + { + "epoch": 3.0, + "learning_rate": 1.0070854168952706e-05, + "loss": 0.0792, + "step": 64370 + }, + { + "epoch": 3.0, + "learning_rate": 1.007007038389792e-05, + "loss": 0.114, + "step": 64375 + }, + { + "epoch": 3.0, + "learning_rate": 1.0069286598843133e-05, + "loss": 0.0888, + "step": 64380 + }, + { + "epoch": 3.0, + "learning_rate": 1.0068502813788348e-05, + "loss": 0.1336, + "step": 64385 + }, + { + "epoch": 3.0, + "learning_rate": 1.006771902873356e-05, + "loss": 0.2821, + "step": 64390 + }, + { + "epoch": 3.0, + "learning_rate": 1.0066935243678774e-05, + "loss": 0.2377, + "step": 64395 + }, + { + "epoch": 3.0, + "learning_rate": 1.0066151458623986e-05, + "loss": 0.1294, + "step": 64400 + }, + { + "epoch": 3.01, + "learning_rate": 1.0065367673569202e-05, + "loss": 0.0552, + "step": 64405 + }, + { + "epoch": 3.01, + "learning_rate": 1.0064583888514414e-05, + "loss": 0.0234, + "step": 64410 + }, + { + "epoch": 3.01, + "learning_rate": 1.0063800103459628e-05, + "loss": 0.0932, + "step": 64415 + }, + { + "epoch": 3.01, + "learning_rate": 1.0063016318404842e-05, + "loss": 0.1105, + "step": 64420 + }, + { + "epoch": 3.01, + "learning_rate": 1.0062232533350056e-05, + "loss": 0.0516, + "step": 64425 + }, + { + "epoch": 3.01, + "learning_rate": 1.0061448748295268e-05, + "loss": 0.2342, + "step": 64430 + }, + { + "epoch": 3.01, + "learning_rate": 1.006066496324048e-05, + "loss": 0.1758, + "step": 64435 + }, + { + "epoch": 3.01, + "learning_rate": 1.0059881178185696e-05, + "loss": 0.1461, + "step": 64440 + }, + { + "epoch": 3.01, + "learning_rate": 1.0059097393130908e-05, + "loss": 0.3334, + "step": 64445 + }, + { + "epoch": 3.01, + "learning_rate": 1.0058313608076122e-05, + "loss": 0.0562, + "step": 64450 + }, + { + "epoch": 3.01, + "learning_rate": 1.0057529823021334e-05, + "loss": 0.0572, + "step": 64455 + }, + { + "epoch": 3.01, + "learning_rate": 1.005674603796655e-05, + "loss": 0.0752, + "step": 64460 + }, + { + "epoch": 3.01, + "learning_rate": 1.0055962252911762e-05, + "loss": 0.0533, + "step": 64465 + }, + { + "epoch": 3.01, + "learning_rate": 1.0055178467856976e-05, + "loss": 0.0731, + "step": 64470 + }, + { + "epoch": 3.01, + "learning_rate": 1.0054394682802188e-05, + "loss": 0.0838, + "step": 64475 + }, + { + "epoch": 3.01, + "learning_rate": 1.0053610897747404e-05, + "loss": 0.0864, + "step": 64480 + }, + { + "epoch": 3.01, + "learning_rate": 1.0052827112692616e-05, + "loss": 0.2098, + "step": 64485 + }, + { + "epoch": 3.01, + "learning_rate": 1.005204332763783e-05, + "loss": 0.1538, + "step": 64490 + }, + { + "epoch": 3.01, + "learning_rate": 1.0051259542583042e-05, + "loss": 0.2549, + "step": 64495 + }, + { + "epoch": 3.01, + "learning_rate": 1.0050475757528254e-05, + "loss": 0.1237, + "step": 64500 + }, + { + "epoch": 3.01, + "learning_rate": 1.004969197247347e-05, + "loss": 0.0369, + "step": 64505 + }, + { + "epoch": 3.01, + "learning_rate": 1.0048908187418682e-05, + "loss": 0.0278, + "step": 64510 + }, + { + "epoch": 3.01, + "learning_rate": 1.0048124402363896e-05, + "loss": 0.074, + "step": 64515 + }, + { + "epoch": 3.01, + "learning_rate": 1.004734061730911e-05, + "loss": 0.0517, + "step": 64520 + }, + { + "epoch": 3.01, + "learning_rate": 1.0046556832254324e-05, + "loss": 0.0991, + "step": 64525 + }, + { + "epoch": 3.01, + "learning_rate": 1.0045773047199536e-05, + "loss": 0.1158, + "step": 64530 + }, + { + "epoch": 3.01, + "learning_rate": 1.004498926214475e-05, + "loss": 0.1029, + "step": 64535 + }, + { + "epoch": 3.01, + "learning_rate": 1.0044205477089964e-05, + "loss": 0.1561, + "step": 64540 + }, + { + "epoch": 3.01, + "learning_rate": 1.0043421692035178e-05, + "loss": 0.2435, + "step": 64545 + }, + { + "epoch": 3.01, + "learning_rate": 1.004263790698039e-05, + "loss": 0.0767, + "step": 64550 + }, + { + "epoch": 3.01, + "learning_rate": 1.0041854121925606e-05, + "loss": 0.0818, + "step": 64555 + }, + { + "epoch": 3.01, + "learning_rate": 1.0041070336870818e-05, + "loss": 0.0948, + "step": 64560 + }, + { + "epoch": 3.01, + "learning_rate": 1.004028655181603e-05, + "loss": 0.0388, + "step": 64565 + }, + { + "epoch": 3.01, + "learning_rate": 1.0039502766761244e-05, + "loss": 0.0989, + "step": 64570 + }, + { + "epoch": 3.01, + "learning_rate": 1.0038718981706456e-05, + "loss": 0.1033, + "step": 64575 + }, + { + "epoch": 3.01, + "learning_rate": 1.0037935196651672e-05, + "loss": 0.0571, + "step": 64580 + }, + { + "epoch": 3.01, + "learning_rate": 1.0037151411596884e-05, + "loss": 0.1396, + "step": 64585 + }, + { + "epoch": 3.01, + "learning_rate": 1.0036367626542098e-05, + "loss": 0.1573, + "step": 64590 + }, + { + "epoch": 3.01, + "learning_rate": 1.003558384148731e-05, + "loss": 0.2187, + "step": 64595 + }, + { + "epoch": 3.01, + "learning_rate": 1.0034800056432526e-05, + "loss": 0.0839, + "step": 64600 + }, + { + "epoch": 3.01, + "learning_rate": 1.0034016271377738e-05, + "loss": 0.0647, + "step": 64605 + }, + { + "epoch": 3.01, + "learning_rate": 1.0033232486322952e-05, + "loss": 0.0414, + "step": 64610 + }, + { + "epoch": 3.02, + "learning_rate": 1.0032448701268164e-05, + "loss": 0.0753, + "step": 64615 + }, + { + "epoch": 3.02, + "learning_rate": 1.003166491621338e-05, + "loss": 0.0433, + "step": 64620 + }, + { + "epoch": 3.02, + "learning_rate": 1.0030881131158592e-05, + "loss": 0.0816, + "step": 64625 + }, + { + "epoch": 3.02, + "learning_rate": 1.0030097346103804e-05, + "loss": 0.0984, + "step": 64630 + }, + { + "epoch": 3.02, + "learning_rate": 1.002931356104902e-05, + "loss": 0.1363, + "step": 64635 + }, + { + "epoch": 3.02, + "learning_rate": 1.002868653300519e-05, + "loss": 0.1344, + "step": 64640 + }, + { + "epoch": 3.02, + "learning_rate": 1.0027902747950402e-05, + "loss": 0.2421, + "step": 64645 + }, + { + "epoch": 3.02, + "learning_rate": 1.0027118962895616e-05, + "loss": 0.1472, + "step": 64650 + }, + { + "epoch": 3.02, + "learning_rate": 1.0026335177840828e-05, + "loss": 0.0602, + "step": 64655 + }, + { + "epoch": 3.02, + "learning_rate": 1.0025551392786044e-05, + "loss": 0.0546, + "step": 64660 + }, + { + "epoch": 3.02, + "learning_rate": 1.0024767607731256e-05, + "loss": 0.04, + "step": 64665 + }, + { + "epoch": 3.02, + "learning_rate": 1.002398382267647e-05, + "loss": 0.0398, + "step": 64670 + }, + { + "epoch": 3.02, + "learning_rate": 1.0023200037621682e-05, + "loss": 0.0285, + "step": 64675 + }, + { + "epoch": 3.02, + "learning_rate": 1.0022416252566898e-05, + "loss": 0.134, + "step": 64680 + }, + { + "epoch": 3.02, + "learning_rate": 1.002163246751211e-05, + "loss": 0.0968, + "step": 64685 + }, + { + "epoch": 3.02, + "learning_rate": 1.0020848682457324e-05, + "loss": 0.2418, + "step": 64690 + }, + { + "epoch": 3.02, + "learning_rate": 1.0020064897402536e-05, + "loss": 0.3335, + "step": 64695 + }, + { + "epoch": 3.02, + "learning_rate": 1.001928111234775e-05, + "loss": 0.065, + "step": 64700 + }, + { + "epoch": 3.02, + "learning_rate": 1.0018497327292964e-05, + "loss": 0.0142, + "step": 64705 + }, + { + "epoch": 3.02, + "learning_rate": 1.0017713542238176e-05, + "loss": 0.0253, + "step": 64710 + }, + { + "epoch": 3.02, + "learning_rate": 1.0016929757183392e-05, + "loss": 0.0308, + "step": 64715 + }, + { + "epoch": 3.02, + "learning_rate": 1.0016145972128604e-05, + "loss": 0.0628, + "step": 64720 + }, + { + "epoch": 3.02, + "learning_rate": 1.0015362187073818e-05, + "loss": 0.1409, + "step": 64725 + }, + { + "epoch": 3.02, + "learning_rate": 1.001457840201903e-05, + "loss": 0.0527, + "step": 64730 + }, + { + "epoch": 3.02, + "learning_rate": 1.0013794616964246e-05, + "loss": 0.1638, + "step": 64735 + }, + { + "epoch": 3.02, + "learning_rate": 1.0013010831909458e-05, + "loss": 0.1495, + "step": 64740 + }, + { + "epoch": 3.02, + "learning_rate": 1.0012227046854672e-05, + "loss": 0.3142, + "step": 64745 + }, + { + "epoch": 3.02, + "learning_rate": 1.0011443261799884e-05, + "loss": 0.0679, + "step": 64750 + }, + { + "epoch": 3.02, + "learning_rate": 1.00106594767451e-05, + "loss": 0.0159, + "step": 64755 + }, + { + "epoch": 3.02, + "learning_rate": 1.0009875691690312e-05, + "loss": 0.0669, + "step": 64760 + }, + { + "epoch": 3.02, + "learning_rate": 1.0009091906635524e-05, + "loss": 0.0254, + "step": 64765 + }, + { + "epoch": 3.02, + "learning_rate": 1.0008308121580738e-05, + "loss": 0.0483, + "step": 64770 + }, + { + "epoch": 3.02, + "learning_rate": 1.000752433652595e-05, + "loss": 0.0889, + "step": 64775 + }, + { + "epoch": 3.02, + "learning_rate": 1.0006740551471166e-05, + "loss": 0.0646, + "step": 64780 + }, + { + "epoch": 3.02, + "learning_rate": 1.0005956766416378e-05, + "loss": 0.1802, + "step": 64785 + }, + { + "epoch": 3.02, + "learning_rate": 1.0005172981361592e-05, + "loss": 0.2382, + "step": 64790 + }, + { + "epoch": 3.02, + "learning_rate": 1.0004389196306806e-05, + "loss": 0.3438, + "step": 64795 + }, + { + "epoch": 3.02, + "learning_rate": 1.000360541125202e-05, + "loss": 0.072, + "step": 64800 + }, + { + "epoch": 3.02, + "learning_rate": 1.0002821626197232e-05, + "loss": 0.0219, + "step": 64805 + }, + { + "epoch": 3.02, + "learning_rate": 1.0002037841142446e-05, + "loss": 0.0681, + "step": 64810 + }, + { + "epoch": 3.02, + "learning_rate": 1.000125405608766e-05, + "loss": 0.0652, + "step": 64815 + }, + { + "epoch": 3.02, + "learning_rate": 1.0000470271032874e-05, + "loss": 0.1291, + "step": 64820 + }, + { + "epoch": 3.02, + "learning_rate": 9.999686485978086e-06, + "loss": 0.1223, + "step": 64825 + }, + { + "epoch": 3.03, + "learning_rate": 9.9989027009233e-06, + "loss": 0.1653, + "step": 64830 + }, + { + "epoch": 3.03, + "learning_rate": 9.998118915868514e-06, + "loss": 0.1461, + "step": 64835 + }, + { + "epoch": 3.03, + "learning_rate": 9.997335130813726e-06, + "loss": 0.2772, + "step": 64840 + }, + { + "epoch": 3.03, + "learning_rate": 9.99655134575894e-06, + "loss": 0.2285, + "step": 64845 + }, + { + "epoch": 3.03, + "learning_rate": 9.995767560704154e-06, + "loss": 0.1091, + "step": 64850 + }, + { + "epoch": 3.03, + "learning_rate": 9.994983775649368e-06, + "loss": 0.0319, + "step": 64855 + }, + { + "epoch": 3.03, + "learning_rate": 9.99419999059458e-06, + "loss": 0.044, + "step": 64860 + }, + { + "epoch": 3.03, + "learning_rate": 9.993416205539794e-06, + "loss": 0.08, + "step": 64865 + }, + { + "epoch": 3.03, + "learning_rate": 9.992632420485006e-06, + "loss": 0.1067, + "step": 64870 + }, + { + "epoch": 3.03, + "learning_rate": 9.99184863543022e-06, + "loss": 0.1008, + "step": 64875 + }, + { + "epoch": 3.03, + "learning_rate": 9.991064850375434e-06, + "loss": 0.1137, + "step": 64880 + }, + { + "epoch": 3.03, + "learning_rate": 9.990281065320648e-06, + "loss": 0.0964, + "step": 64885 + }, + { + "epoch": 3.03, + "learning_rate": 9.98949728026586e-06, + "loss": 0.2726, + "step": 64890 + }, + { + "epoch": 3.03, + "learning_rate": 9.988713495211074e-06, + "loss": 0.3439, + "step": 64895 + }, + { + "epoch": 3.03, + "learning_rate": 9.987929710156288e-06, + "loss": 0.0521, + "step": 64900 + }, + { + "epoch": 3.03, + "learning_rate": 9.987145925101502e-06, + "loss": 0.0303, + "step": 64905 + }, + { + "epoch": 3.03, + "learning_rate": 9.986362140046714e-06, + "loss": 0.0604, + "step": 64910 + }, + { + "epoch": 3.03, + "learning_rate": 9.985578354991928e-06, + "loss": 0.0909, + "step": 64915 + }, + { + "epoch": 3.03, + "learning_rate": 9.984794569937142e-06, + "loss": 0.0717, + "step": 64920 + }, + { + "epoch": 3.03, + "learning_rate": 9.984010784882354e-06, + "loss": 0.0903, + "step": 64925 + }, + { + "epoch": 3.03, + "learning_rate": 9.983226999827568e-06, + "loss": 0.0838, + "step": 64930 + }, + { + "epoch": 3.03, + "learning_rate": 9.982443214772782e-06, + "loss": 0.1498, + "step": 64935 + }, + { + "epoch": 3.03, + "learning_rate": 9.981659429717994e-06, + "loss": 0.1015, + "step": 64940 + }, + { + "epoch": 3.03, + "learning_rate": 9.980875644663208e-06, + "loss": 0.2865, + "step": 64945 + }, + { + "epoch": 3.03, + "learning_rate": 9.980091859608422e-06, + "loss": 0.0189, + "step": 64950 + }, + { + "epoch": 3.03, + "learning_rate": 9.979308074553636e-06, + "loss": 0.0376, + "step": 64955 + }, + { + "epoch": 3.03, + "learning_rate": 9.978524289498848e-06, + "loss": 0.0662, + "step": 64960 + }, + { + "epoch": 3.03, + "learning_rate": 9.977740504444062e-06, + "loss": 0.0318, + "step": 64965 + }, + { + "epoch": 3.03, + "learning_rate": 9.976956719389276e-06, + "loss": 0.0951, + "step": 64970 + }, + { + "epoch": 3.03, + "learning_rate": 9.97617293433449e-06, + "loss": 0.0406, + "step": 64975 + }, + { + "epoch": 3.03, + "learning_rate": 9.975389149279704e-06, + "loss": 0.0863, + "step": 64980 + }, + { + "epoch": 3.03, + "learning_rate": 9.974605364224916e-06, + "loss": 0.1428, + "step": 64985 + }, + { + "epoch": 3.03, + "learning_rate": 9.973821579170128e-06, + "loss": 0.2493, + "step": 64990 + }, + { + "epoch": 3.03, + "learning_rate": 9.973037794115342e-06, + "loss": 0.2759, + "step": 64995 + }, + { + "epoch": 3.03, + "learning_rate": 9.972254009060556e-06, + "loss": 0.0916, + "step": 65000 + }, + { + "epoch": 3.03, + "learning_rate": 9.97147022400577e-06, + "loss": 0.029, + "step": 65005 + }, + { + "epoch": 3.03, + "learning_rate": 9.970686438950982e-06, + "loss": 0.0638, + "step": 65010 + }, + { + "epoch": 3.03, + "learning_rate": 9.969902653896196e-06, + "loss": 0.0404, + "step": 65015 + }, + { + "epoch": 3.03, + "learning_rate": 9.96911886884141e-06, + "loss": 0.0599, + "step": 65020 + }, + { + "epoch": 3.03, + "learning_rate": 9.968335083786624e-06, + "loss": 0.1324, + "step": 65025 + }, + { + "epoch": 3.03, + "learning_rate": 9.967551298731838e-06, + "loss": 0.1053, + "step": 65030 + }, + { + "epoch": 3.03, + "learning_rate": 9.96676751367705e-06, + "loss": 0.0871, + "step": 65035 + }, + { + "epoch": 3.03, + "learning_rate": 9.965983728622264e-06, + "loss": 0.3495, + "step": 65040 + }, + { + "epoch": 3.04, + "learning_rate": 9.965199943567478e-06, + "loss": 0.2196, + "step": 65045 + }, + { + "epoch": 3.04, + "learning_rate": 9.964416158512691e-06, + "loss": 0.0536, + "step": 65050 + }, + { + "epoch": 3.04, + "learning_rate": 9.963632373457904e-06, + "loss": 0.0329, + "step": 65055 + }, + { + "epoch": 3.04, + "learning_rate": 9.962848588403118e-06, + "loss": 0.0575, + "step": 65060 + }, + { + "epoch": 3.04, + "learning_rate": 9.96206480334833e-06, + "loss": 0.0783, + "step": 65065 + }, + { + "epoch": 3.04, + "learning_rate": 9.961281018293544e-06, + "loss": 0.0488, + "step": 65070 + }, + { + "epoch": 3.04, + "learning_rate": 9.960497233238758e-06, + "loss": 0.1067, + "step": 65075 + }, + { + "epoch": 3.04, + "learning_rate": 9.959713448183972e-06, + "loss": 0.1122, + "step": 65080 + }, + { + "epoch": 3.04, + "learning_rate": 9.958929663129184e-06, + "loss": 0.1402, + "step": 65085 + }, + { + "epoch": 3.04, + "learning_rate": 9.958145878074398e-06, + "loss": 0.1527, + "step": 65090 + }, + { + "epoch": 3.04, + "learning_rate": 9.957362093019612e-06, + "loss": 0.173, + "step": 65095 + }, + { + "epoch": 3.04, + "learning_rate": 9.956578307964825e-06, + "loss": 0.0723, + "step": 65100 + }, + { + "epoch": 3.04, + "learning_rate": 9.955794522910038e-06, + "loss": 0.0479, + "step": 65105 + }, + { + "epoch": 3.04, + "learning_rate": 9.955010737855252e-06, + "loss": 0.087, + "step": 65110 + }, + { + "epoch": 3.04, + "learning_rate": 9.954226952800465e-06, + "loss": 0.0572, + "step": 65115 + }, + { + "epoch": 3.04, + "learning_rate": 9.953443167745678e-06, + "loss": 0.0548, + "step": 65120 + }, + { + "epoch": 3.04, + "learning_rate": 9.952659382690892e-06, + "loss": 0.0851, + "step": 65125 + }, + { + "epoch": 3.04, + "learning_rate": 9.951875597636105e-06, + "loss": 0.1876, + "step": 65130 + }, + { + "epoch": 3.04, + "learning_rate": 9.951091812581318e-06, + "loss": 0.1328, + "step": 65135 + }, + { + "epoch": 3.04, + "learning_rate": 9.950308027526532e-06, + "loss": 0.1966, + "step": 65140 + }, + { + "epoch": 3.04, + "learning_rate": 9.949524242471746e-06, + "loss": 0.222, + "step": 65145 + }, + { + "epoch": 3.04, + "learning_rate": 9.94874045741696e-06, + "loss": 0.0636, + "step": 65150 + }, + { + "epoch": 3.04, + "learning_rate": 9.947956672362172e-06, + "loss": 0.055, + "step": 65155 + }, + { + "epoch": 3.04, + "learning_rate": 9.947172887307386e-06, + "loss": 0.0413, + "step": 65160 + }, + { + "epoch": 3.04, + "learning_rate": 9.9463891022526e-06, + "loss": 0.0801, + "step": 65165 + }, + { + "epoch": 3.04, + "learning_rate": 9.945605317197813e-06, + "loss": 0.081, + "step": 65170 + }, + { + "epoch": 3.04, + "learning_rate": 9.944821532143026e-06, + "loss": 0.0356, + "step": 65175 + }, + { + "epoch": 3.04, + "learning_rate": 9.94403774708824e-06, + "loss": 0.1325, + "step": 65180 + }, + { + "epoch": 3.04, + "learning_rate": 9.943253962033452e-06, + "loss": 0.1551, + "step": 65185 + }, + { + "epoch": 3.04, + "learning_rate": 9.942470176978666e-06, + "loss": 0.2802, + "step": 65190 + }, + { + "epoch": 3.04, + "learning_rate": 9.94168639192388e-06, + "loss": 0.4242, + "step": 65195 + }, + { + "epoch": 3.04, + "learning_rate": 9.940902606869093e-06, + "loss": 0.0853, + "step": 65200 + }, + { + "epoch": 3.04, + "learning_rate": 9.940118821814306e-06, + "loss": 0.0255, + "step": 65205 + }, + { + "epoch": 3.04, + "learning_rate": 9.93933503675952e-06, + "loss": 0.0622, + "step": 65210 + }, + { + "epoch": 3.04, + "learning_rate": 9.938551251704733e-06, + "loss": 0.0154, + "step": 65215 + }, + { + "epoch": 3.04, + "learning_rate": 9.937767466649947e-06, + "loss": 0.093, + "step": 65220 + }, + { + "epoch": 3.04, + "learning_rate": 9.93698368159516e-06, + "loss": 0.2092, + "step": 65225 + }, + { + "epoch": 3.04, + "learning_rate": 9.936199896540373e-06, + "loss": 0.0847, + "step": 65230 + }, + { + "epoch": 3.04, + "learning_rate": 9.935416111485587e-06, + "loss": 0.1413, + "step": 65235 + }, + { + "epoch": 3.04, + "learning_rate": 9.934632326430801e-06, + "loss": 0.2631, + "step": 65240 + }, + { + "epoch": 3.04, + "learning_rate": 9.933848541376015e-06, + "loss": 0.3179, + "step": 65245 + }, + { + "epoch": 3.04, + "learning_rate": 9.933064756321227e-06, + "loss": 0.0971, + "step": 65250 + }, + { + "epoch": 3.04, + "learning_rate": 9.93228097126644e-06, + "loss": 0.055, + "step": 65255 + }, + { + "epoch": 3.05, + "learning_rate": 9.931497186211653e-06, + "loss": 0.0182, + "step": 65260 + }, + { + "epoch": 3.05, + "learning_rate": 9.930713401156867e-06, + "loss": 0.1053, + "step": 65265 + }, + { + "epoch": 3.05, + "learning_rate": 9.929929616102081e-06, + "loss": 0.0475, + "step": 65270 + }, + { + "epoch": 3.05, + "learning_rate": 9.929145831047294e-06, + "loss": 0.0945, + "step": 65275 + }, + { + "epoch": 3.05, + "learning_rate": 9.928362045992507e-06, + "loss": 0.0834, + "step": 65280 + }, + { + "epoch": 3.05, + "learning_rate": 9.927578260937721e-06, + "loss": 0.1769, + "step": 65285 + }, + { + "epoch": 3.05, + "learning_rate": 9.926794475882935e-06, + "loss": 0.2825, + "step": 65290 + }, + { + "epoch": 3.05, + "learning_rate": 9.926010690828149e-06, + "loss": 0.2869, + "step": 65295 + }, + { + "epoch": 3.05, + "learning_rate": 9.925226905773361e-06, + "loss": 0.1088, + "step": 65300 + }, + { + "epoch": 3.05, + "learning_rate": 9.924443120718575e-06, + "loss": 0.035, + "step": 65305 + }, + { + "epoch": 3.05, + "learning_rate": 9.92365933566379e-06, + "loss": 0.0313, + "step": 65310 + }, + { + "epoch": 3.05, + "learning_rate": 9.922875550609001e-06, + "loss": 0.0684, + "step": 65315 + }, + { + "epoch": 3.05, + "learning_rate": 9.922091765554215e-06, + "loss": 0.0795, + "step": 65320 + }, + { + "epoch": 3.05, + "learning_rate": 9.921307980499427e-06, + "loss": 0.0704, + "step": 65325 + }, + { + "epoch": 3.05, + "learning_rate": 9.920524195444641e-06, + "loss": 0.125, + "step": 65330 + }, + { + "epoch": 3.05, + "learning_rate": 9.919740410389855e-06, + "loss": 0.1401, + "step": 65335 + }, + { + "epoch": 3.05, + "learning_rate": 9.91895662533507e-06, + "loss": 0.2818, + "step": 65340 + }, + { + "epoch": 3.05, + "learning_rate": 9.918172840280283e-06, + "loss": 0.2141, + "step": 65345 + }, + { + "epoch": 3.05, + "learning_rate": 9.917389055225495e-06, + "loss": 0.1145, + "step": 65350 + }, + { + "epoch": 3.05, + "learning_rate": 9.91660527017071e-06, + "loss": 0.0228, + "step": 65355 + }, + { + "epoch": 3.05, + "learning_rate": 9.915821485115923e-06, + "loss": 0.0151, + "step": 65360 + }, + { + "epoch": 3.05, + "learning_rate": 9.915037700061137e-06, + "loss": 0.0571, + "step": 65365 + }, + { + "epoch": 3.05, + "learning_rate": 9.91425391500635e-06, + "loss": 0.0924, + "step": 65370 + }, + { + "epoch": 3.05, + "learning_rate": 9.913470129951563e-06, + "loss": 0.0435, + "step": 65375 + }, + { + "epoch": 3.05, + "learning_rate": 9.912686344896775e-06, + "loss": 0.1203, + "step": 65380 + }, + { + "epoch": 3.05, + "learning_rate": 9.91190255984199e-06, + "loss": 0.1247, + "step": 65385 + }, + { + "epoch": 3.05, + "learning_rate": 9.911118774787203e-06, + "loss": 0.1618, + "step": 65390 + }, + { + "epoch": 3.05, + "learning_rate": 9.910334989732417e-06, + "loss": 0.2363, + "step": 65395 + }, + { + "epoch": 3.05, + "learning_rate": 9.90955120467763e-06, + "loss": 0.0811, + "step": 65400 + }, + { + "epoch": 3.05, + "learning_rate": 9.908767419622843e-06, + "loss": 0.0359, + "step": 65405 + }, + { + "epoch": 3.05, + "learning_rate": 9.907983634568057e-06, + "loss": 0.0697, + "step": 65410 + }, + { + "epoch": 3.05, + "learning_rate": 9.907199849513271e-06, + "loss": 0.0408, + "step": 65415 + }, + { + "epoch": 3.05, + "learning_rate": 9.906416064458483e-06, + "loss": 0.0404, + "step": 65420 + }, + { + "epoch": 3.05, + "learning_rate": 9.905632279403697e-06, + "loss": 0.1271, + "step": 65425 + }, + { + "epoch": 3.05, + "learning_rate": 9.904848494348911e-06, + "loss": 0.149, + "step": 65430 + }, + { + "epoch": 3.05, + "learning_rate": 9.904064709294125e-06, + "loss": 0.1755, + "step": 65435 + }, + { + "epoch": 3.05, + "learning_rate": 9.903280924239337e-06, + "loss": 0.2316, + "step": 65440 + }, + { + "epoch": 3.05, + "learning_rate": 9.902497139184551e-06, + "loss": 0.2651, + "step": 65445 + }, + { + "epoch": 3.05, + "learning_rate": 9.901713354129763e-06, + "loss": 0.0487, + "step": 65450 + }, + { + "epoch": 3.05, + "learning_rate": 9.900929569074977e-06, + "loss": 0.0474, + "step": 65455 + }, + { + "epoch": 3.05, + "learning_rate": 9.900145784020191e-06, + "loss": 0.0345, + "step": 65460 + }, + { + "epoch": 3.05, + "learning_rate": 9.899361998965405e-06, + "loss": 0.0674, + "step": 65465 + }, + { + "epoch": 3.05, + "learning_rate": 9.898578213910617e-06, + "loss": 0.1199, + "step": 65470 + }, + { + "epoch": 3.06, + "learning_rate": 9.897794428855831e-06, + "loss": 0.075, + "step": 65475 + }, + { + "epoch": 3.06, + "learning_rate": 9.897010643801045e-06, + "loss": 0.0844, + "step": 65480 + }, + { + "epoch": 3.06, + "learning_rate": 9.896226858746259e-06, + "loss": 0.1378, + "step": 65485 + }, + { + "epoch": 3.06, + "learning_rate": 9.895443073691471e-06, + "loss": 0.2872, + "step": 65490 + }, + { + "epoch": 3.06, + "learning_rate": 9.894659288636685e-06, + "loss": 0.276, + "step": 65495 + }, + { + "epoch": 3.06, + "learning_rate": 9.893875503581899e-06, + "loss": 0.1098, + "step": 65500 + }, + { + "epoch": 3.06, + "learning_rate": 9.893091718527113e-06, + "loss": 0.0231, + "step": 65505 + }, + { + "epoch": 3.06, + "learning_rate": 9.892307933472325e-06, + "loss": 0.0576, + "step": 65510 + }, + { + "epoch": 3.06, + "learning_rate": 9.891524148417539e-06, + "loss": 0.1021, + "step": 65515 + }, + { + "epoch": 3.06, + "learning_rate": 9.890740363362751e-06, + "loss": 0.1098, + "step": 65520 + }, + { + "epoch": 3.06, + "learning_rate": 9.889956578307965e-06, + "loss": 0.0809, + "step": 65525 + }, + { + "epoch": 3.06, + "learning_rate": 9.889172793253179e-06, + "loss": 0.0706, + "step": 65530 + }, + { + "epoch": 3.06, + "learning_rate": 9.888389008198393e-06, + "loss": 0.1328, + "step": 65535 + }, + { + "epoch": 3.06, + "learning_rate": 9.887605223143605e-06, + "loss": 0.1188, + "step": 65540 + }, + { + "epoch": 3.06, + "learning_rate": 9.886821438088819e-06, + "loss": 0.2656, + "step": 65545 + }, + { + "epoch": 3.06, + "learning_rate": 9.886037653034033e-06, + "loss": 0.1282, + "step": 65550 + }, + { + "epoch": 3.06, + "learning_rate": 9.885253867979247e-06, + "loss": 0.0373, + "step": 65555 + }, + { + "epoch": 3.06, + "learning_rate": 9.88447008292446e-06, + "loss": 0.0479, + "step": 65560 + }, + { + "epoch": 3.06, + "learning_rate": 9.883686297869673e-06, + "loss": 0.0388, + "step": 65565 + }, + { + "epoch": 3.06, + "learning_rate": 9.882902512814887e-06, + "loss": 0.0458, + "step": 65570 + }, + { + "epoch": 3.06, + "learning_rate": 9.882118727760099e-06, + "loss": 0.06, + "step": 65575 + }, + { + "epoch": 3.06, + "learning_rate": 9.881334942705313e-06, + "loss": 0.1196, + "step": 65580 + }, + { + "epoch": 3.06, + "learning_rate": 9.880551157650527e-06, + "loss": 0.2084, + "step": 65585 + }, + { + "epoch": 3.06, + "learning_rate": 9.879767372595739e-06, + "loss": 0.247, + "step": 65590 + }, + { + "epoch": 3.06, + "learning_rate": 9.878983587540953e-06, + "loss": 0.3117, + "step": 65595 + }, + { + "epoch": 3.06, + "learning_rate": 9.878199802486167e-06, + "loss": 0.1258, + "step": 65600 + }, + { + "epoch": 3.06, + "learning_rate": 9.87741601743138e-06, + "loss": 0.0392, + "step": 65605 + }, + { + "epoch": 3.06, + "learning_rate": 9.876632232376595e-06, + "loss": 0.0395, + "step": 65610 + }, + { + "epoch": 3.06, + "learning_rate": 9.875848447321807e-06, + "loss": 0.0822, + "step": 65615 + }, + { + "epoch": 3.06, + "learning_rate": 9.87506466226702e-06, + "loss": 0.1316, + "step": 65620 + }, + { + "epoch": 3.06, + "learning_rate": 9.874280877212235e-06, + "loss": 0.0738, + "step": 65625 + }, + { + "epoch": 3.06, + "learning_rate": 9.873497092157449e-06, + "loss": 0.1253, + "step": 65630 + }, + { + "epoch": 3.06, + "learning_rate": 9.872713307102661e-06, + "loss": 0.1578, + "step": 65635 + }, + { + "epoch": 3.06, + "learning_rate": 9.871929522047875e-06, + "loss": 0.2201, + "step": 65640 + }, + { + "epoch": 3.06, + "learning_rate": 9.871145736993087e-06, + "loss": 0.2527, + "step": 65645 + }, + { + "epoch": 3.06, + "learning_rate": 9.870361951938301e-06, + "loss": 0.0481, + "step": 65650 + }, + { + "epoch": 3.06, + "learning_rate": 9.869578166883515e-06, + "loss": 0.0439, + "step": 65655 + }, + { + "epoch": 3.06, + "learning_rate": 9.868794381828729e-06, + "loss": 0.0479, + "step": 65660 + }, + { + "epoch": 3.06, + "learning_rate": 9.868010596773941e-06, + "loss": 0.0606, + "step": 65665 + }, + { + "epoch": 3.06, + "learning_rate": 9.867226811719155e-06, + "loss": 0.0551, + "step": 65670 + }, + { + "epoch": 3.06, + "learning_rate": 9.866443026664369e-06, + "loss": 0.1018, + "step": 65675 + }, + { + "epoch": 3.06, + "learning_rate": 9.865659241609583e-06, + "loss": 0.0722, + "step": 65680 + }, + { + "epoch": 3.06, + "learning_rate": 9.864875456554795e-06, + "loss": 0.1421, + "step": 65685 + }, + { + "epoch": 3.07, + "learning_rate": 9.864091671500009e-06, + "loss": 0.2154, + "step": 65690 + }, + { + "epoch": 3.07, + "learning_rate": 9.863307886445223e-06, + "loss": 0.3339, + "step": 65695 + }, + { + "epoch": 3.07, + "learning_rate": 9.862524101390437e-06, + "loss": 0.0759, + "step": 65700 + }, + { + "epoch": 3.07, + "learning_rate": 9.861740316335649e-06, + "loss": 0.0287, + "step": 65705 + }, + { + "epoch": 3.07, + "learning_rate": 9.860956531280863e-06, + "loss": 0.096, + "step": 65710 + }, + { + "epoch": 3.07, + "learning_rate": 9.860172746226075e-06, + "loss": 0.0382, + "step": 65715 + }, + { + "epoch": 3.07, + "learning_rate": 9.859388961171289e-06, + "loss": 0.0794, + "step": 65720 + }, + { + "epoch": 3.07, + "learning_rate": 9.858605176116503e-06, + "loss": 0.1148, + "step": 65725 + }, + { + "epoch": 3.07, + "learning_rate": 9.857821391061717e-06, + "loss": 0.1004, + "step": 65730 + }, + { + "epoch": 3.07, + "learning_rate": 9.857037606006929e-06, + "loss": 0.1558, + "step": 65735 + }, + { + "epoch": 3.07, + "learning_rate": 9.856253820952143e-06, + "loss": 0.1553, + "step": 65740 + }, + { + "epoch": 3.07, + "learning_rate": 9.855470035897357e-06, + "loss": 0.3653, + "step": 65745 + }, + { + "epoch": 3.07, + "learning_rate": 9.85468625084257e-06, + "loss": 0.0707, + "step": 65750 + }, + { + "epoch": 3.07, + "learning_rate": 9.853902465787783e-06, + "loss": 0.0303, + "step": 65755 + }, + { + "epoch": 3.07, + "learning_rate": 9.853118680732997e-06, + "loss": 0.0592, + "step": 65760 + }, + { + "epoch": 3.07, + "learning_rate": 9.85233489567821e-06, + "loss": 0.0684, + "step": 65765 + }, + { + "epoch": 3.07, + "learning_rate": 9.851551110623423e-06, + "loss": 0.073, + "step": 65770 + }, + { + "epoch": 3.07, + "learning_rate": 9.850767325568637e-06, + "loss": 0.085, + "step": 65775 + }, + { + "epoch": 3.07, + "learning_rate": 9.84998354051385e-06, + "loss": 0.1083, + "step": 65780 + }, + { + "epoch": 3.07, + "learning_rate": 9.849199755459063e-06, + "loss": 0.1539, + "step": 65785 + }, + { + "epoch": 3.07, + "learning_rate": 9.848415970404277e-06, + "loss": 0.1221, + "step": 65790 + }, + { + "epoch": 3.07, + "learning_rate": 9.84763218534949e-06, + "loss": 0.3577, + "step": 65795 + }, + { + "epoch": 3.07, + "learning_rate": 9.846848400294704e-06, + "loss": 0.12, + "step": 65800 + }, + { + "epoch": 3.07, + "learning_rate": 9.846064615239917e-06, + "loss": 0.022, + "step": 65805 + }, + { + "epoch": 3.07, + "learning_rate": 9.84528083018513e-06, + "loss": 0.082, + "step": 65810 + }, + { + "epoch": 3.07, + "learning_rate": 9.844497045130345e-06, + "loss": 0.0643, + "step": 65815 + }, + { + "epoch": 3.07, + "learning_rate": 9.843713260075558e-06, + "loss": 0.0713, + "step": 65820 + }, + { + "epoch": 3.07, + "learning_rate": 9.842929475020772e-06, + "loss": 0.0629, + "step": 65825 + }, + { + "epoch": 3.07, + "learning_rate": 9.842145689965985e-06, + "loss": 0.1637, + "step": 65830 + }, + { + "epoch": 3.07, + "learning_rate": 9.841361904911197e-06, + "loss": 0.1241, + "step": 65835 + }, + { + "epoch": 3.07, + "learning_rate": 9.84057811985641e-06, + "loss": 0.2507, + "step": 65840 + }, + { + "epoch": 3.07, + "learning_rate": 9.839794334801625e-06, + "loss": 0.4136, + "step": 65845 + }, + { + "epoch": 3.07, + "learning_rate": 9.839010549746838e-06, + "loss": 0.0744, + "step": 65850 + }, + { + "epoch": 3.07, + "learning_rate": 9.83822676469205e-06, + "loss": 0.0148, + "step": 65855 + }, + { + "epoch": 3.07, + "learning_rate": 9.837442979637265e-06, + "loss": 0.0641, + "step": 65860 + }, + { + "epoch": 3.07, + "learning_rate": 9.836659194582478e-06, + "loss": 0.0579, + "step": 65865 + }, + { + "epoch": 3.07, + "learning_rate": 9.835875409527692e-06, + "loss": 0.0924, + "step": 65870 + }, + { + "epoch": 3.07, + "learning_rate": 9.835091624472906e-06, + "loss": 0.0568, + "step": 65875 + }, + { + "epoch": 3.07, + "learning_rate": 9.834307839418119e-06, + "loss": 0.1328, + "step": 65880 + }, + { + "epoch": 3.07, + "learning_rate": 9.833524054363332e-06, + "loss": 0.2094, + "step": 65885 + }, + { + "epoch": 3.07, + "learning_rate": 9.832740269308546e-06, + "loss": 0.1356, + "step": 65890 + }, + { + "epoch": 3.07, + "learning_rate": 9.83195648425376e-06, + "loss": 0.3647, + "step": 65895 + }, + { + "epoch": 3.07, + "learning_rate": 9.831172699198972e-06, + "loss": 0.0792, + "step": 65900 + }, + { + "epoch": 3.08, + "learning_rate": 9.830388914144185e-06, + "loss": 0.0556, + "step": 65905 + }, + { + "epoch": 3.08, + "learning_rate": 9.829605129089399e-06, + "loss": 0.0496, + "step": 65910 + }, + { + "epoch": 3.08, + "learning_rate": 9.828821344034612e-06, + "loss": 0.0758, + "step": 65915 + }, + { + "epoch": 3.08, + "learning_rate": 9.828037558979826e-06, + "loss": 0.0409, + "step": 65920 + }, + { + "epoch": 3.08, + "learning_rate": 9.82725377392504e-06, + "loss": 0.053, + "step": 65925 + }, + { + "epoch": 3.08, + "learning_rate": 9.826469988870252e-06, + "loss": 0.0676, + "step": 65930 + }, + { + "epoch": 3.08, + "learning_rate": 9.825686203815466e-06, + "loss": 0.1414, + "step": 65935 + }, + { + "epoch": 3.08, + "learning_rate": 9.82490241876068e-06, + "loss": 0.233, + "step": 65940 + }, + { + "epoch": 3.08, + "learning_rate": 9.824118633705894e-06, + "loss": 0.2629, + "step": 65945 + }, + { + "epoch": 3.08, + "learning_rate": 9.823334848651106e-06, + "loss": 0.0594, + "step": 65950 + }, + { + "epoch": 3.08, + "learning_rate": 9.82255106359632e-06, + "loss": 0.0329, + "step": 65955 + }, + { + "epoch": 3.08, + "learning_rate": 9.821767278541534e-06, + "loss": 0.0585, + "step": 65960 + }, + { + "epoch": 3.08, + "learning_rate": 9.820983493486746e-06, + "loss": 0.0716, + "step": 65965 + }, + { + "epoch": 3.08, + "learning_rate": 9.82019970843196e-06, + "loss": 0.0227, + "step": 65970 + }, + { + "epoch": 3.08, + "learning_rate": 9.819415923377174e-06, + "loss": 0.1145, + "step": 65975 + }, + { + "epoch": 3.08, + "learning_rate": 9.818632138322386e-06, + "loss": 0.0928, + "step": 65980 + }, + { + "epoch": 3.08, + "learning_rate": 9.8178483532676e-06, + "loss": 0.1671, + "step": 65985 + }, + { + "epoch": 3.08, + "learning_rate": 9.817064568212814e-06, + "loss": 0.3238, + "step": 65990 + }, + { + "epoch": 3.08, + "learning_rate": 9.816280783158028e-06, + "loss": 0.2865, + "step": 65995 + }, + { + "epoch": 3.08, + "learning_rate": 9.81549699810324e-06, + "loss": 0.0648, + "step": 66000 + }, + { + "epoch": 3.08, + "learning_rate": 9.814713213048454e-06, + "loss": 0.0084, + "step": 66005 + }, + { + "epoch": 3.08, + "learning_rate": 9.813929427993668e-06, + "loss": 0.0627, + "step": 66010 + }, + { + "epoch": 3.08, + "learning_rate": 9.813145642938882e-06, + "loss": 0.0375, + "step": 66015 + }, + { + "epoch": 3.08, + "learning_rate": 9.812361857884094e-06, + "loss": 0.0965, + "step": 66020 + }, + { + "epoch": 3.08, + "learning_rate": 9.811578072829308e-06, + "loss": 0.0915, + "step": 66025 + }, + { + "epoch": 3.08, + "learning_rate": 9.81079428777452e-06, + "loss": 0.1729, + "step": 66030 + }, + { + "epoch": 3.08, + "learning_rate": 9.810010502719734e-06, + "loss": 0.0954, + "step": 66035 + }, + { + "epoch": 3.08, + "learning_rate": 9.809226717664948e-06, + "loss": 0.1511, + "step": 66040 + }, + { + "epoch": 3.08, + "learning_rate": 9.808442932610162e-06, + "loss": 0.2462, + "step": 66045 + }, + { + "epoch": 3.08, + "learning_rate": 9.807659147555374e-06, + "loss": 0.0609, + "step": 66050 + }, + { + "epoch": 3.08, + "learning_rate": 9.806875362500588e-06, + "loss": 0.063, + "step": 66055 + }, + { + "epoch": 3.08, + "learning_rate": 9.806091577445802e-06, + "loss": 0.042, + "step": 66060 + }, + { + "epoch": 3.08, + "learning_rate": 9.805307792391016e-06, + "loss": 0.0742, + "step": 66065 + }, + { + "epoch": 3.08, + "learning_rate": 9.804524007336228e-06, + "loss": 0.0461, + "step": 66070 + }, + { + "epoch": 3.08, + "learning_rate": 9.803740222281442e-06, + "loss": 0.1103, + "step": 66075 + }, + { + "epoch": 3.08, + "learning_rate": 9.802956437226656e-06, + "loss": 0.1392, + "step": 66080 + }, + { + "epoch": 3.08, + "learning_rate": 9.80217265217187e-06, + "loss": 0.112, + "step": 66085 + }, + { + "epoch": 3.08, + "learning_rate": 9.801388867117084e-06, + "loss": 0.1597, + "step": 66090 + }, + { + "epoch": 3.08, + "learning_rate": 9.800605082062296e-06, + "loss": 0.3931, + "step": 66095 + }, + { + "epoch": 3.08, + "learning_rate": 9.799821297007508e-06, + "loss": 0.038, + "step": 66100 + }, + { + "epoch": 3.08, + "learning_rate": 9.799037511952722e-06, + "loss": 0.0841, + "step": 66105 + }, + { + "epoch": 3.08, + "learning_rate": 9.798253726897936e-06, + "loss": 0.0283, + "step": 66110 + }, + { + "epoch": 3.09, + "learning_rate": 9.79746994184315e-06, + "loss": 0.0556, + "step": 66115 + }, + { + "epoch": 3.09, + "learning_rate": 9.796686156788362e-06, + "loss": 0.0488, + "step": 66120 + }, + { + "epoch": 3.09, + "learning_rate": 9.795902371733576e-06, + "loss": 0.0714, + "step": 66125 + }, + { + "epoch": 3.09, + "learning_rate": 9.79511858667879e-06, + "loss": 0.0883, + "step": 66130 + }, + { + "epoch": 3.09, + "learning_rate": 9.794334801624004e-06, + "loss": 0.0689, + "step": 66135 + }, + { + "epoch": 3.09, + "learning_rate": 9.793551016569218e-06, + "loss": 0.2007, + "step": 66140 + }, + { + "epoch": 3.09, + "learning_rate": 9.79276723151443e-06, + "loss": 0.3548, + "step": 66145 + }, + { + "epoch": 3.09, + "learning_rate": 9.791983446459644e-06, + "loss": 0.0955, + "step": 66150 + }, + { + "epoch": 3.09, + "learning_rate": 9.791199661404858e-06, + "loss": 0.0395, + "step": 66155 + }, + { + "epoch": 3.09, + "learning_rate": 9.79041587635007e-06, + "loss": 0.0361, + "step": 66160 + }, + { + "epoch": 3.09, + "learning_rate": 9.789632091295284e-06, + "loss": 0.0398, + "step": 66165 + }, + { + "epoch": 3.09, + "learning_rate": 9.788848306240496e-06, + "loss": 0.0547, + "step": 66170 + }, + { + "epoch": 3.09, + "learning_rate": 9.78806452118571e-06, + "loss": 0.0618, + "step": 66175 + }, + { + "epoch": 3.09, + "learning_rate": 9.787280736130924e-06, + "loss": 0.1462, + "step": 66180 + }, + { + "epoch": 3.09, + "learning_rate": 9.786496951076138e-06, + "loss": 0.1491, + "step": 66185 + }, + { + "epoch": 3.09, + "learning_rate": 9.785713166021352e-06, + "loss": 0.177, + "step": 66190 + }, + { + "epoch": 3.09, + "learning_rate": 9.784929380966564e-06, + "loss": 0.2986, + "step": 66195 + }, + { + "epoch": 3.09, + "learning_rate": 9.784145595911778e-06, + "loss": 0.0846, + "step": 66200 + }, + { + "epoch": 3.09, + "learning_rate": 9.783361810856992e-06, + "loss": 0.0278, + "step": 66205 + }, + { + "epoch": 3.09, + "learning_rate": 9.782578025802206e-06, + "loss": 0.0166, + "step": 66210 + }, + { + "epoch": 3.09, + "learning_rate": 9.781794240747418e-06, + "loss": 0.051, + "step": 66215 + }, + { + "epoch": 3.09, + "learning_rate": 9.781010455692632e-06, + "loss": 0.0734, + "step": 66220 + }, + { + "epoch": 3.09, + "learning_rate": 9.780226670637844e-06, + "loss": 0.0638, + "step": 66225 + }, + { + "epoch": 3.09, + "learning_rate": 9.779442885583058e-06, + "loss": 0.0957, + "step": 66230 + }, + { + "epoch": 3.09, + "learning_rate": 9.778659100528272e-06, + "loss": 0.1582, + "step": 66235 + }, + { + "epoch": 3.09, + "learning_rate": 9.777875315473486e-06, + "loss": 0.2031, + "step": 66240 + }, + { + "epoch": 3.09, + "learning_rate": 9.777091530418698e-06, + "loss": 0.3371, + "step": 66245 + }, + { + "epoch": 3.09, + "learning_rate": 9.776307745363912e-06, + "loss": 0.0844, + "step": 66250 + }, + { + "epoch": 3.09, + "learning_rate": 9.775523960309126e-06, + "loss": 0.0135, + "step": 66255 + }, + { + "epoch": 3.09, + "learning_rate": 9.77474017525434e-06, + "loss": 0.0855, + "step": 66260 + }, + { + "epoch": 3.09, + "learning_rate": 9.773956390199552e-06, + "loss": 0.0722, + "step": 66265 + }, + { + "epoch": 3.09, + "learning_rate": 9.773172605144766e-06, + "loss": 0.1067, + "step": 66270 + }, + { + "epoch": 3.09, + "learning_rate": 9.77238882008998e-06, + "loss": 0.118, + "step": 66275 + }, + { + "epoch": 3.09, + "learning_rate": 9.771605035035194e-06, + "loss": 0.193, + "step": 66280 + }, + { + "epoch": 3.09, + "learning_rate": 9.770821249980406e-06, + "loss": 0.172, + "step": 66285 + }, + { + "epoch": 3.09, + "learning_rate": 9.77003746492562e-06, + "loss": 0.2177, + "step": 66290 + }, + { + "epoch": 3.09, + "learning_rate": 9.769253679870832e-06, + "loss": 0.46, + "step": 66295 + }, + { + "epoch": 3.09, + "learning_rate": 9.768469894816046e-06, + "loss": 0.085, + "step": 66300 + }, + { + "epoch": 3.09, + "learning_rate": 9.76768610976126e-06, + "loss": 0.027, + "step": 66305 + }, + { + "epoch": 3.09, + "learning_rate": 9.766902324706474e-06, + "loss": 0.0324, + "step": 66310 + }, + { + "epoch": 3.09, + "learning_rate": 9.766118539651686e-06, + "loss": 0.0424, + "step": 66315 + }, + { + "epoch": 3.09, + "learning_rate": 9.7653347545969e-06, + "loss": 0.1014, + "step": 66320 + }, + { + "epoch": 3.09, + "learning_rate": 9.764550969542114e-06, + "loss": 0.1147, + "step": 66325 + }, + { + "epoch": 3.1, + "learning_rate": 9.763767184487328e-06, + "loss": 0.1188, + "step": 66330 + }, + { + "epoch": 3.1, + "learning_rate": 9.76298339943254e-06, + "loss": 0.2082, + "step": 66335 + }, + { + "epoch": 3.1, + "learning_rate": 9.762199614377754e-06, + "loss": 0.2368, + "step": 66340 + }, + { + "epoch": 3.1, + "learning_rate": 9.761415829322968e-06, + "loss": 0.2593, + "step": 66345 + }, + { + "epoch": 3.1, + "learning_rate": 9.760632044268182e-06, + "loss": 0.0695, + "step": 66350 + }, + { + "epoch": 3.1, + "learning_rate": 9.759848259213394e-06, + "loss": 0.0248, + "step": 66355 + }, + { + "epoch": 3.1, + "learning_rate": 9.759064474158608e-06, + "loss": 0.048, + "step": 66360 + }, + { + "epoch": 3.1, + "learning_rate": 9.75828068910382e-06, + "loss": 0.0906, + "step": 66365 + }, + { + "epoch": 3.1, + "learning_rate": 9.757496904049034e-06, + "loss": 0.1016, + "step": 66370 + }, + { + "epoch": 3.1, + "learning_rate": 9.756713118994248e-06, + "loss": 0.0903, + "step": 66375 + }, + { + "epoch": 3.1, + "learning_rate": 9.755929333939462e-06, + "loss": 0.1557, + "step": 66380 + }, + { + "epoch": 3.1, + "learning_rate": 9.755145548884674e-06, + "loss": 0.2118, + "step": 66385 + }, + { + "epoch": 3.1, + "learning_rate": 9.754361763829888e-06, + "loss": 0.3594, + "step": 66390 + }, + { + "epoch": 3.1, + "learning_rate": 9.753577978775102e-06, + "loss": 0.3155, + "step": 66395 + }, + { + "epoch": 3.1, + "learning_rate": 9.752794193720316e-06, + "loss": 0.0644, + "step": 66400 + }, + { + "epoch": 3.1, + "learning_rate": 9.75201040866553e-06, + "loss": 0.0568, + "step": 66405 + }, + { + "epoch": 3.1, + "learning_rate": 9.751226623610742e-06, + "loss": 0.0713, + "step": 66410 + }, + { + "epoch": 3.1, + "learning_rate": 9.750442838555956e-06, + "loss": 0.0646, + "step": 66415 + }, + { + "epoch": 3.1, + "learning_rate": 9.749659053501168e-06, + "loss": 0.0589, + "step": 66420 + }, + { + "epoch": 3.1, + "learning_rate": 9.748875268446382e-06, + "loss": 0.1199, + "step": 66425 + }, + { + "epoch": 3.1, + "learning_rate": 9.748091483391596e-06, + "loss": 0.0658, + "step": 66430 + }, + { + "epoch": 3.1, + "learning_rate": 9.747307698336808e-06, + "loss": 0.1286, + "step": 66435 + }, + { + "epoch": 3.1, + "learning_rate": 9.746523913282022e-06, + "loss": 0.2039, + "step": 66440 + }, + { + "epoch": 3.1, + "learning_rate": 9.745740128227236e-06, + "loss": 0.3109, + "step": 66445 + }, + { + "epoch": 3.1, + "learning_rate": 9.74495634317245e-06, + "loss": 0.0421, + "step": 66450 + }, + { + "epoch": 3.1, + "learning_rate": 9.744172558117663e-06, + "loss": 0.0413, + "step": 66455 + }, + { + "epoch": 3.1, + "learning_rate": 9.743388773062876e-06, + "loss": 0.0249, + "step": 66460 + }, + { + "epoch": 3.1, + "learning_rate": 9.74260498800809e-06, + "loss": 0.0545, + "step": 66465 + }, + { + "epoch": 3.1, + "learning_rate": 9.741821202953303e-06, + "loss": 0.099, + "step": 66470 + }, + { + "epoch": 3.1, + "learning_rate": 9.741037417898517e-06, + "loss": 0.0948, + "step": 66475 + }, + { + "epoch": 3.1, + "learning_rate": 9.74025363284373e-06, + "loss": 0.0998, + "step": 66480 + }, + { + "epoch": 3.1, + "learning_rate": 9.739469847788942e-06, + "loss": 0.1262, + "step": 66485 + }, + { + "epoch": 3.1, + "learning_rate": 9.738686062734156e-06, + "loss": 0.1912, + "step": 66490 + }, + { + "epoch": 3.1, + "learning_rate": 9.73790227767937e-06, + "loss": 0.2891, + "step": 66495 + }, + { + "epoch": 3.1, + "learning_rate": 9.737118492624584e-06, + "loss": 0.0749, + "step": 66500 + }, + { + "epoch": 3.1, + "learning_rate": 9.736334707569797e-06, + "loss": 0.1176, + "step": 66505 + }, + { + "epoch": 3.1, + "learning_rate": 9.73555092251501e-06, + "loss": 0.0257, + "step": 66510 + }, + { + "epoch": 3.1, + "learning_rate": 9.734767137460224e-06, + "loss": 0.0613, + "step": 66515 + }, + { + "epoch": 3.1, + "learning_rate": 9.733983352405437e-06, + "loss": 0.1069, + "step": 66520 + }, + { + "epoch": 3.1, + "learning_rate": 9.733199567350651e-06, + "loss": 0.0754, + "step": 66525 + }, + { + "epoch": 3.1, + "learning_rate": 9.732415782295864e-06, + "loss": 0.0548, + "step": 66530 + }, + { + "epoch": 3.1, + "learning_rate": 9.731631997241077e-06, + "loss": 0.1278, + "step": 66535 + }, + { + "epoch": 3.1, + "learning_rate": 9.730848212186291e-06, + "loss": 0.1902, + "step": 66540 + }, + { + "epoch": 3.11, + "learning_rate": 9.730064427131505e-06, + "loss": 0.3116, + "step": 66545 + }, + { + "epoch": 3.11, + "learning_rate": 9.729280642076718e-06, + "loss": 0.0847, + "step": 66550 + }, + { + "epoch": 3.11, + "learning_rate": 9.728496857021931e-06, + "loss": 0.0388, + "step": 66555 + }, + { + "epoch": 3.11, + "learning_rate": 9.727713071967144e-06, + "loss": 0.0235, + "step": 66560 + }, + { + "epoch": 3.11, + "learning_rate": 9.726929286912358e-06, + "loss": 0.0432, + "step": 66565 + }, + { + "epoch": 3.11, + "learning_rate": 9.726145501857571e-06, + "loss": 0.0687, + "step": 66570 + }, + { + "epoch": 3.11, + "learning_rate": 9.725361716802785e-06, + "loss": 0.1861, + "step": 66575 + }, + { + "epoch": 3.11, + "learning_rate": 9.724577931747998e-06, + "loss": 0.1056, + "step": 66580 + }, + { + "epoch": 3.11, + "learning_rate": 9.723794146693211e-06, + "loss": 0.1404, + "step": 66585 + }, + { + "epoch": 3.11, + "learning_rate": 9.723010361638425e-06, + "loss": 0.2881, + "step": 66590 + }, + { + "epoch": 3.11, + "learning_rate": 9.72222657658364e-06, + "loss": 0.2303, + "step": 66595 + }, + { + "epoch": 3.11, + "learning_rate": 9.721442791528851e-06, + "loss": 0.0623, + "step": 66600 + }, + { + "epoch": 3.11, + "learning_rate": 9.720659006474065e-06, + "loss": 0.016, + "step": 66605 + }, + { + "epoch": 3.11, + "learning_rate": 9.71987522141928e-06, + "loss": 0.0418, + "step": 66610 + }, + { + "epoch": 3.11, + "learning_rate": 9.719091436364492e-06, + "loss": 0.0622, + "step": 66615 + }, + { + "epoch": 3.11, + "learning_rate": 9.718307651309705e-06, + "loss": 0.163, + "step": 66620 + }, + { + "epoch": 3.11, + "learning_rate": 9.71752386625492e-06, + "loss": 0.1038, + "step": 66625 + }, + { + "epoch": 3.11, + "learning_rate": 9.716740081200132e-06, + "loss": 0.1165, + "step": 66630 + }, + { + "epoch": 3.11, + "learning_rate": 9.715956296145345e-06, + "loss": 0.0962, + "step": 66635 + }, + { + "epoch": 3.11, + "learning_rate": 9.71517251109056e-06, + "loss": 0.164, + "step": 66640 + }, + { + "epoch": 3.11, + "learning_rate": 9.714388726035773e-06, + "loss": 0.4056, + "step": 66645 + }, + { + "epoch": 3.11, + "learning_rate": 9.713604940980985e-06, + "loss": 0.0518, + "step": 66650 + }, + { + "epoch": 3.11, + "learning_rate": 9.7128211559262e-06, + "loss": 0.069, + "step": 66655 + }, + { + "epoch": 3.11, + "learning_rate": 9.712037370871413e-06, + "loss": 0.0428, + "step": 66660 + }, + { + "epoch": 3.11, + "learning_rate": 9.711253585816627e-06, + "loss": 0.1114, + "step": 66665 + }, + { + "epoch": 3.11, + "learning_rate": 9.710469800761841e-06, + "loss": 0.0601, + "step": 66670 + }, + { + "epoch": 3.11, + "learning_rate": 9.709686015707053e-06, + "loss": 0.0506, + "step": 66675 + }, + { + "epoch": 3.11, + "learning_rate": 9.708902230652266e-06, + "loss": 0.1785, + "step": 66680 + }, + { + "epoch": 3.11, + "learning_rate": 9.70811844559748e-06, + "loss": 0.198, + "step": 66685 + }, + { + "epoch": 3.11, + "learning_rate": 9.707334660542693e-06, + "loss": 0.2683, + "step": 66690 + }, + { + "epoch": 3.11, + "learning_rate": 9.706550875487907e-06, + "loss": 0.3954, + "step": 66695 + }, + { + "epoch": 3.11, + "learning_rate": 9.70576709043312e-06, + "loss": 0.081, + "step": 66700 + }, + { + "epoch": 3.11, + "learning_rate": 9.704983305378333e-06, + "loss": 0.0303, + "step": 66705 + }, + { + "epoch": 3.11, + "learning_rate": 9.704199520323547e-06, + "loss": 0.0428, + "step": 66710 + }, + { + "epoch": 3.11, + "learning_rate": 9.703415735268761e-06, + "loss": 0.0387, + "step": 66715 + }, + { + "epoch": 3.11, + "learning_rate": 9.702631950213975e-06, + "loss": 0.0866, + "step": 66720 + }, + { + "epoch": 3.11, + "learning_rate": 9.701848165159187e-06, + "loss": 0.1363, + "step": 66725 + }, + { + "epoch": 3.11, + "learning_rate": 9.701064380104401e-06, + "loss": 0.208, + "step": 66730 + }, + { + "epoch": 3.11, + "learning_rate": 9.700280595049615e-06, + "loss": 0.1501, + "step": 66735 + }, + { + "epoch": 3.11, + "learning_rate": 9.699496809994829e-06, + "loss": 0.1701, + "step": 66740 + }, + { + "epoch": 3.11, + "learning_rate": 9.698713024940041e-06, + "loss": 0.23, + "step": 66745 + }, + { + "epoch": 3.11, + "learning_rate": 9.697929239885253e-06, + "loss": 0.0608, + "step": 66750 + }, + { + "epoch": 3.11, + "learning_rate": 9.697145454830467e-06, + "loss": 0.0249, + "step": 66755 + }, + { + "epoch": 3.12, + "learning_rate": 9.696361669775681e-06, + "loss": 0.0245, + "step": 66760 + }, + { + "epoch": 3.12, + "learning_rate": 9.695577884720895e-06, + "loss": 0.0743, + "step": 66765 + }, + { + "epoch": 3.12, + "learning_rate": 9.694794099666109e-06, + "loss": 0.0734, + "step": 66770 + }, + { + "epoch": 3.12, + "learning_rate": 9.694010314611321e-06, + "loss": 0.0864, + "step": 66775 + }, + { + "epoch": 3.12, + "learning_rate": 9.693226529556535e-06, + "loss": 0.1432, + "step": 66780 + }, + { + "epoch": 3.12, + "learning_rate": 9.692442744501749e-06, + "loss": 0.1472, + "step": 66785 + }, + { + "epoch": 3.12, + "learning_rate": 9.691658959446963e-06, + "loss": 0.0936, + "step": 66790 + }, + { + "epoch": 3.12, + "learning_rate": 9.690875174392175e-06, + "loss": 0.3271, + "step": 66795 + }, + { + "epoch": 3.12, + "learning_rate": 9.690091389337389e-06, + "loss": 0.0738, + "step": 66800 + }, + { + "epoch": 3.12, + "learning_rate": 9.689307604282603e-06, + "loss": 0.0401, + "step": 66805 + }, + { + "epoch": 3.12, + "learning_rate": 9.688523819227815e-06, + "loss": 0.0529, + "step": 66810 + }, + { + "epoch": 3.12, + "learning_rate": 9.687740034173029e-06, + "loss": 0.0409, + "step": 66815 + }, + { + "epoch": 3.12, + "learning_rate": 9.686956249118243e-06, + "loss": 0.0777, + "step": 66820 + }, + { + "epoch": 3.12, + "learning_rate": 9.686172464063455e-06, + "loss": 0.1198, + "step": 66825 + }, + { + "epoch": 3.12, + "learning_rate": 9.685388679008669e-06, + "loss": 0.1342, + "step": 66830 + }, + { + "epoch": 3.12, + "learning_rate": 9.684604893953883e-06, + "loss": 0.0932, + "step": 66835 + }, + { + "epoch": 3.12, + "learning_rate": 9.683821108899097e-06, + "loss": 0.2755, + "step": 66840 + }, + { + "epoch": 3.12, + "learning_rate": 9.68303732384431e-06, + "loss": 0.2539, + "step": 66845 + }, + { + "epoch": 3.12, + "learning_rate": 9.682253538789523e-06, + "loss": 0.059, + "step": 66850 + }, + { + "epoch": 3.12, + "learning_rate": 9.681469753734737e-06, + "loss": 0.0206, + "step": 66855 + }, + { + "epoch": 3.12, + "learning_rate": 9.680685968679951e-06, + "loss": 0.0485, + "step": 66860 + }, + { + "epoch": 3.12, + "learning_rate": 9.679902183625163e-06, + "loss": 0.0562, + "step": 66865 + }, + { + "epoch": 3.12, + "learning_rate": 9.679118398570377e-06, + "loss": 0.0851, + "step": 66870 + }, + { + "epoch": 3.12, + "learning_rate": 9.67833461351559e-06, + "loss": 0.1055, + "step": 66875 + }, + { + "epoch": 3.12, + "learning_rate": 9.677550828460803e-06, + "loss": 0.0656, + "step": 66880 + }, + { + "epoch": 3.12, + "learning_rate": 9.676767043406017e-06, + "loss": 0.1683, + "step": 66885 + }, + { + "epoch": 3.12, + "learning_rate": 9.675983258351231e-06, + "loss": 0.1515, + "step": 66890 + }, + { + "epoch": 3.12, + "learning_rate": 9.675199473296443e-06, + "loss": 0.3496, + "step": 66895 + }, + { + "epoch": 3.12, + "learning_rate": 9.674415688241657e-06, + "loss": 0.0908, + "step": 66900 + }, + { + "epoch": 3.12, + "learning_rate": 9.673631903186871e-06, + "loss": 0.0156, + "step": 66905 + }, + { + "epoch": 3.12, + "learning_rate": 9.672848118132085e-06, + "loss": 0.0492, + "step": 66910 + }, + { + "epoch": 3.12, + "learning_rate": 9.672064333077297e-06, + "loss": 0.0519, + "step": 66915 + }, + { + "epoch": 3.12, + "learning_rate": 9.671280548022511e-06, + "loss": 0.1146, + "step": 66920 + }, + { + "epoch": 3.12, + "learning_rate": 9.670496762967725e-06, + "loss": 0.1169, + "step": 66925 + }, + { + "epoch": 3.12, + "learning_rate": 9.669712977912939e-06, + "loss": 0.0662, + "step": 66930 + }, + { + "epoch": 3.12, + "learning_rate": 9.668929192858153e-06, + "loss": 0.1658, + "step": 66935 + }, + { + "epoch": 3.12, + "learning_rate": 9.668145407803365e-06, + "loss": 0.1625, + "step": 66940 + }, + { + "epoch": 3.12, + "learning_rate": 9.667361622748577e-06, + "loss": 0.1685, + "step": 66945 + }, + { + "epoch": 3.12, + "learning_rate": 9.666577837693791e-06, + "loss": 0.0706, + "step": 66950 + }, + { + "epoch": 3.12, + "learning_rate": 9.665794052639005e-06, + "loss": 0.0915, + "step": 66955 + }, + { + "epoch": 3.12, + "learning_rate": 9.665010267584219e-06, + "loss": 0.0695, + "step": 66960 + }, + { + "epoch": 3.12, + "learning_rate": 9.664226482529431e-06, + "loss": 0.0764, + "step": 66965 + }, + { + "epoch": 3.12, + "learning_rate": 9.663442697474645e-06, + "loss": 0.0657, + "step": 66970 + }, + { + "epoch": 3.13, + "learning_rate": 9.662658912419859e-06, + "loss": 0.0836, + "step": 66975 + }, + { + "epoch": 3.13, + "learning_rate": 9.661875127365073e-06, + "loss": 0.0888, + "step": 66980 + }, + { + "epoch": 3.13, + "learning_rate": 9.661091342310287e-06, + "loss": 0.1487, + "step": 66985 + }, + { + "epoch": 3.13, + "learning_rate": 9.660307557255499e-06, + "loss": 0.3276, + "step": 66990 + }, + { + "epoch": 3.13, + "learning_rate": 9.659523772200713e-06, + "loss": 0.3517, + "step": 66995 + }, + { + "epoch": 3.13, + "learning_rate": 9.658739987145927e-06, + "loss": 0.0516, + "step": 67000 + }, + { + "epoch": 3.13, + "learning_rate": 9.657956202091139e-06, + "loss": 0.0154, + "step": 67005 + }, + { + "epoch": 3.13, + "learning_rate": 9.657172417036353e-06, + "loss": 0.0361, + "step": 67010 + }, + { + "epoch": 3.13, + "learning_rate": 9.656388631981565e-06, + "loss": 0.0564, + "step": 67015 + }, + { + "epoch": 3.13, + "learning_rate": 9.655604846926779e-06, + "loss": 0.0617, + "step": 67020 + }, + { + "epoch": 3.13, + "learning_rate": 9.654821061871993e-06, + "loss": 0.0484, + "step": 67025 + }, + { + "epoch": 3.13, + "learning_rate": 9.654037276817207e-06, + "loss": 0.0938, + "step": 67030 + }, + { + "epoch": 3.13, + "learning_rate": 9.65325349176242e-06, + "loss": 0.104, + "step": 67035 + }, + { + "epoch": 3.13, + "learning_rate": 9.652469706707633e-06, + "loss": 0.2527, + "step": 67040 + }, + { + "epoch": 3.13, + "learning_rate": 9.651685921652847e-06, + "loss": 0.2725, + "step": 67045 + }, + { + "epoch": 3.13, + "learning_rate": 9.65090213659806e-06, + "loss": 0.1013, + "step": 67050 + }, + { + "epoch": 3.13, + "learning_rate": 9.650118351543275e-06, + "loss": 0.0622, + "step": 67055 + }, + { + "epoch": 3.13, + "learning_rate": 9.649334566488487e-06, + "loss": 0.0481, + "step": 67060 + }, + { + "epoch": 3.13, + "learning_rate": 9.6485507814337e-06, + "loss": 0.0547, + "step": 67065 + }, + { + "epoch": 3.13, + "learning_rate": 9.647766996378913e-06, + "loss": 0.1045, + "step": 67070 + }, + { + "epoch": 3.13, + "learning_rate": 9.646983211324127e-06, + "loss": 0.0833, + "step": 67075 + }, + { + "epoch": 3.13, + "learning_rate": 9.64619942626934e-06, + "loss": 0.1074, + "step": 67080 + }, + { + "epoch": 3.13, + "learning_rate": 9.645415641214555e-06, + "loss": 0.1298, + "step": 67085 + }, + { + "epoch": 3.13, + "learning_rate": 9.644631856159767e-06, + "loss": 0.1969, + "step": 67090 + }, + { + "epoch": 3.13, + "learning_rate": 9.64384807110498e-06, + "loss": 0.2585, + "step": 67095 + }, + { + "epoch": 3.13, + "learning_rate": 9.643064286050195e-06, + "loss": 0.0726, + "step": 67100 + }, + { + "epoch": 3.13, + "learning_rate": 9.642280500995409e-06, + "loss": 0.0397, + "step": 67105 + }, + { + "epoch": 3.13, + "learning_rate": 9.64149671594062e-06, + "loss": 0.0865, + "step": 67110 + }, + { + "epoch": 3.13, + "learning_rate": 9.640712930885835e-06, + "loss": 0.0552, + "step": 67115 + }, + { + "epoch": 3.13, + "learning_rate": 9.639929145831049e-06, + "loss": 0.0323, + "step": 67120 + }, + { + "epoch": 3.13, + "learning_rate": 9.639145360776262e-06, + "loss": 0.0693, + "step": 67125 + }, + { + "epoch": 3.13, + "learning_rate": 9.638361575721475e-06, + "loss": 0.0847, + "step": 67130 + }, + { + "epoch": 3.13, + "learning_rate": 9.637577790666689e-06, + "loss": 0.1027, + "step": 67135 + }, + { + "epoch": 3.13, + "learning_rate": 9.6367940056119e-06, + "loss": 0.2035, + "step": 67140 + }, + { + "epoch": 3.13, + "learning_rate": 9.636010220557115e-06, + "loss": 0.225, + "step": 67145 + }, + { + "epoch": 3.13, + "learning_rate": 9.635226435502329e-06, + "loss": 0.1034, + "step": 67150 + }, + { + "epoch": 3.13, + "learning_rate": 9.634442650447543e-06, + "loss": 0.0304, + "step": 67155 + }, + { + "epoch": 3.13, + "learning_rate": 9.633658865392755e-06, + "loss": 0.0912, + "step": 67160 + }, + { + "epoch": 3.13, + "learning_rate": 9.632875080337969e-06, + "loss": 0.0627, + "step": 67165 + }, + { + "epoch": 3.13, + "learning_rate": 9.632091295283183e-06, + "loss": 0.0269, + "step": 67170 + }, + { + "epoch": 3.13, + "learning_rate": 9.631307510228396e-06, + "loss": 0.0673, + "step": 67175 + }, + { + "epoch": 3.13, + "learning_rate": 9.630523725173609e-06, + "loss": 0.1514, + "step": 67180 + }, + { + "epoch": 3.13, + "learning_rate": 9.629739940118823e-06, + "loss": 0.146, + "step": 67185 + }, + { + "epoch": 3.14, + "learning_rate": 9.628956155064036e-06, + "loss": 0.185, + "step": 67190 + }, + { + "epoch": 3.14, + "learning_rate": 9.62817237000925e-06, + "loss": 0.3927, + "step": 67195 + }, + { + "epoch": 3.14, + "learning_rate": 9.627388584954463e-06, + "loss": 0.0939, + "step": 67200 + }, + { + "epoch": 3.14, + "learning_rate": 9.626604799899676e-06, + "loss": 0.034, + "step": 67205 + }, + { + "epoch": 3.14, + "learning_rate": 9.625821014844889e-06, + "loss": 0.0339, + "step": 67210 + }, + { + "epoch": 3.14, + "learning_rate": 9.625037229790103e-06, + "loss": 0.1123, + "step": 67215 + }, + { + "epoch": 3.14, + "learning_rate": 9.624253444735317e-06, + "loss": 0.0462, + "step": 67220 + }, + { + "epoch": 3.14, + "learning_rate": 9.62346965968053e-06, + "loss": 0.1015, + "step": 67225 + }, + { + "epoch": 3.14, + "learning_rate": 9.622685874625743e-06, + "loss": 0.1491, + "step": 67230 + }, + { + "epoch": 3.14, + "learning_rate": 9.621902089570957e-06, + "loss": 0.1363, + "step": 67235 + }, + { + "epoch": 3.14, + "learning_rate": 9.62111830451617e-06, + "loss": 0.2497, + "step": 67240 + }, + { + "epoch": 3.14, + "learning_rate": 9.620334519461384e-06, + "loss": 0.2666, + "step": 67245 + }, + { + "epoch": 3.14, + "learning_rate": 9.619550734406598e-06, + "loss": 0.0863, + "step": 67250 + }, + { + "epoch": 3.14, + "learning_rate": 9.61876694935181e-06, + "loss": 0.0429, + "step": 67255 + }, + { + "epoch": 3.14, + "learning_rate": 9.617983164297024e-06, + "loss": 0.0542, + "step": 67260 + }, + { + "epoch": 3.14, + "learning_rate": 9.617199379242237e-06, + "loss": 0.0543, + "step": 67265 + }, + { + "epoch": 3.14, + "learning_rate": 9.61641559418745e-06, + "loss": 0.0545, + "step": 67270 + }, + { + "epoch": 3.14, + "learning_rate": 9.615631809132664e-06, + "loss": 0.0679, + "step": 67275 + }, + { + "epoch": 3.14, + "learning_rate": 9.614848024077877e-06, + "loss": 0.1376, + "step": 67280 + }, + { + "epoch": 3.14, + "learning_rate": 9.61406423902309e-06, + "loss": 0.1443, + "step": 67285 + }, + { + "epoch": 3.14, + "learning_rate": 9.613280453968304e-06, + "loss": 0.1009, + "step": 67290 + }, + { + "epoch": 3.14, + "learning_rate": 9.612496668913518e-06, + "loss": 0.2522, + "step": 67295 + }, + { + "epoch": 3.14, + "learning_rate": 9.611712883858732e-06, + "loss": 0.0529, + "step": 67300 + }, + { + "epoch": 3.14, + "learning_rate": 9.610929098803944e-06, + "loss": 0.0183, + "step": 67305 + }, + { + "epoch": 3.14, + "learning_rate": 9.610145313749158e-06, + "loss": 0.0619, + "step": 67310 + }, + { + "epoch": 3.14, + "learning_rate": 9.609361528694372e-06, + "loss": 0.0607, + "step": 67315 + }, + { + "epoch": 3.14, + "learning_rate": 9.608577743639586e-06, + "loss": 0.0888, + "step": 67320 + }, + { + "epoch": 3.14, + "learning_rate": 9.607793958584798e-06, + "loss": 0.1036, + "step": 67325 + }, + { + "epoch": 3.14, + "learning_rate": 9.60701017353001e-06, + "loss": 0.117, + "step": 67330 + }, + { + "epoch": 3.14, + "learning_rate": 9.606226388475224e-06, + "loss": 0.1202, + "step": 67335 + }, + { + "epoch": 3.14, + "learning_rate": 9.605442603420438e-06, + "loss": 0.2976, + "step": 67340 + }, + { + "epoch": 3.14, + "learning_rate": 9.604658818365652e-06, + "loss": 0.3352, + "step": 67345 + }, + { + "epoch": 3.14, + "learning_rate": 9.603875033310866e-06, + "loss": 0.0541, + "step": 67350 + }, + { + "epoch": 3.14, + "learning_rate": 9.603091248256078e-06, + "loss": 0.017, + "step": 67355 + }, + { + "epoch": 3.14, + "learning_rate": 9.602307463201292e-06, + "loss": 0.0692, + "step": 67360 + }, + { + "epoch": 3.14, + "learning_rate": 9.601523678146506e-06, + "loss": 0.0356, + "step": 67365 + }, + { + "epoch": 3.14, + "learning_rate": 9.60073989309172e-06, + "loss": 0.0566, + "step": 67370 + }, + { + "epoch": 3.14, + "learning_rate": 9.599956108036932e-06, + "loss": 0.0287, + "step": 67375 + }, + { + "epoch": 3.14, + "learning_rate": 9.599172322982146e-06, + "loss": 0.1013, + "step": 67380 + }, + { + "epoch": 3.14, + "learning_rate": 9.59838853792736e-06, + "loss": 0.1021, + "step": 67385 + }, + { + "epoch": 3.14, + "learning_rate": 9.597604752872574e-06, + "loss": 0.1245, + "step": 67390 + }, + { + "epoch": 3.14, + "learning_rate": 9.596820967817786e-06, + "loss": 0.2174, + "step": 67395 + }, + { + "epoch": 3.14, + "learning_rate": 9.596037182763e-06, + "loss": 0.0651, + "step": 67400 + }, + { + "epoch": 3.15, + "learning_rate": 9.595253397708212e-06, + "loss": 0.0342, + "step": 67405 + }, + { + "epoch": 3.15, + "learning_rate": 9.594469612653426e-06, + "loss": 0.0149, + "step": 67410 + }, + { + "epoch": 3.15, + "learning_rate": 9.59368582759864e-06, + "loss": 0.0436, + "step": 67415 + }, + { + "epoch": 3.15, + "learning_rate": 9.592902042543854e-06, + "loss": 0.0795, + "step": 67420 + }, + { + "epoch": 3.15, + "learning_rate": 9.592118257489066e-06, + "loss": 0.1034, + "step": 67425 + }, + { + "epoch": 3.15, + "learning_rate": 9.59133447243428e-06, + "loss": 0.1312, + "step": 67430 + }, + { + "epoch": 3.15, + "learning_rate": 9.590550687379494e-06, + "loss": 0.1473, + "step": 67435 + }, + { + "epoch": 3.15, + "learning_rate": 9.589766902324708e-06, + "loss": 0.1771, + "step": 67440 + }, + { + "epoch": 3.15, + "learning_rate": 9.58898311726992e-06, + "loss": 0.2701, + "step": 67445 + }, + { + "epoch": 3.15, + "learning_rate": 9.588199332215134e-06, + "loss": 0.0963, + "step": 67450 + }, + { + "epoch": 3.15, + "learning_rate": 9.587415547160348e-06, + "loss": 0.0433, + "step": 67455 + }, + { + "epoch": 3.15, + "learning_rate": 9.58663176210556e-06, + "loss": 0.0833, + "step": 67460 + }, + { + "epoch": 3.15, + "learning_rate": 9.585847977050774e-06, + "loss": 0.0404, + "step": 67465 + }, + { + "epoch": 3.15, + "learning_rate": 9.585064191995988e-06, + "loss": 0.1147, + "step": 67470 + }, + { + "epoch": 3.15, + "learning_rate": 9.5842804069412e-06, + "loss": 0.1279, + "step": 67475 + }, + { + "epoch": 3.15, + "learning_rate": 9.583496621886414e-06, + "loss": 0.0957, + "step": 67480 + }, + { + "epoch": 3.15, + "learning_rate": 9.582712836831628e-06, + "loss": 0.1441, + "step": 67485 + }, + { + "epoch": 3.15, + "learning_rate": 9.581929051776842e-06, + "loss": 0.1781, + "step": 67490 + }, + { + "epoch": 3.15, + "learning_rate": 9.581145266722054e-06, + "loss": 0.3565, + "step": 67495 + }, + { + "epoch": 3.15, + "learning_rate": 9.580361481667268e-06, + "loss": 0.0838, + "step": 67500 + }, + { + "epoch": 3.15, + "learning_rate": 9.579577696612482e-06, + "loss": 0.025, + "step": 67505 + }, + { + "epoch": 3.15, + "learning_rate": 9.578793911557696e-06, + "loss": 0.0205, + "step": 67510 + }, + { + "epoch": 3.15, + "learning_rate": 9.57801012650291e-06, + "loss": 0.0888, + "step": 67515 + }, + { + "epoch": 3.15, + "learning_rate": 9.577226341448122e-06, + "loss": 0.0704, + "step": 67520 + }, + { + "epoch": 3.15, + "learning_rate": 9.576442556393334e-06, + "loss": 0.097, + "step": 67525 + }, + { + "epoch": 3.15, + "learning_rate": 9.575658771338548e-06, + "loss": 0.1136, + "step": 67530 + }, + { + "epoch": 3.15, + "learning_rate": 9.574874986283762e-06, + "loss": 0.1827, + "step": 67535 + }, + { + "epoch": 3.15, + "learning_rate": 9.574091201228976e-06, + "loss": 0.3217, + "step": 67540 + }, + { + "epoch": 3.15, + "learning_rate": 9.573307416174188e-06, + "loss": 0.3586, + "step": 67545 + }, + { + "epoch": 3.15, + "learning_rate": 9.572523631119402e-06, + "loss": 0.094, + "step": 67550 + }, + { + "epoch": 3.15, + "learning_rate": 9.571739846064616e-06, + "loss": 0.0299, + "step": 67555 + }, + { + "epoch": 3.15, + "learning_rate": 9.57095606100983e-06, + "loss": 0.0378, + "step": 67560 + }, + { + "epoch": 3.15, + "learning_rate": 9.570172275955044e-06, + "loss": 0.0593, + "step": 67565 + }, + { + "epoch": 3.15, + "learning_rate": 9.569388490900256e-06, + "loss": 0.0426, + "step": 67570 + }, + { + "epoch": 3.15, + "learning_rate": 9.56860470584547e-06, + "loss": 0.1359, + "step": 67575 + }, + { + "epoch": 3.15, + "learning_rate": 9.567820920790684e-06, + "loss": 0.0562, + "step": 67580 + }, + { + "epoch": 3.15, + "learning_rate": 9.567037135735898e-06, + "loss": 0.1838, + "step": 67585 + }, + { + "epoch": 3.15, + "learning_rate": 9.56625335068111e-06, + "loss": 0.2696, + "step": 67590 + }, + { + "epoch": 3.15, + "learning_rate": 9.565469565626322e-06, + "loss": 0.4682, + "step": 67595 + }, + { + "epoch": 3.15, + "learning_rate": 9.564685780571536e-06, + "loss": 0.0999, + "step": 67600 + }, + { + "epoch": 3.15, + "learning_rate": 9.56390199551675e-06, + "loss": 0.0261, + "step": 67605 + }, + { + "epoch": 3.15, + "learning_rate": 9.563118210461964e-06, + "loss": 0.1327, + "step": 67610 + }, + { + "epoch": 3.16, + "learning_rate": 9.562334425407178e-06, + "loss": 0.0242, + "step": 67615 + }, + { + "epoch": 3.16, + "learning_rate": 9.56155064035239e-06, + "loss": 0.0565, + "step": 67620 + }, + { + "epoch": 3.16, + "learning_rate": 9.560766855297604e-06, + "loss": 0.1068, + "step": 67625 + }, + { + "epoch": 3.16, + "learning_rate": 9.559983070242818e-06, + "loss": 0.0941, + "step": 67630 + }, + { + "epoch": 3.16, + "learning_rate": 9.559199285188032e-06, + "loss": 0.2012, + "step": 67635 + }, + { + "epoch": 3.16, + "learning_rate": 9.558415500133244e-06, + "loss": 0.2336, + "step": 67640 + }, + { + "epoch": 3.16, + "learning_rate": 9.557631715078458e-06, + "loss": 0.2486, + "step": 67645 + }, + { + "epoch": 3.16, + "learning_rate": 9.556847930023672e-06, + "loss": 0.1049, + "step": 67650 + }, + { + "epoch": 3.16, + "learning_rate": 9.556064144968884e-06, + "loss": 0.0125, + "step": 67655 + }, + { + "epoch": 3.16, + "learning_rate": 9.555280359914098e-06, + "loss": 0.0499, + "step": 67660 + }, + { + "epoch": 3.16, + "learning_rate": 9.554496574859312e-06, + "loss": 0.1079, + "step": 67665 + }, + { + "epoch": 3.16, + "learning_rate": 9.553712789804524e-06, + "loss": 0.0323, + "step": 67670 + }, + { + "epoch": 3.16, + "learning_rate": 9.552929004749738e-06, + "loss": 0.0886, + "step": 67675 + }, + { + "epoch": 3.16, + "learning_rate": 9.552145219694952e-06, + "loss": 0.1056, + "step": 67680 + }, + { + "epoch": 3.16, + "learning_rate": 9.551361434640166e-06, + "loss": 0.1175, + "step": 67685 + }, + { + "epoch": 3.16, + "learning_rate": 9.550577649585378e-06, + "loss": 0.3264, + "step": 67690 + }, + { + "epoch": 3.16, + "learning_rate": 9.549793864530592e-06, + "loss": 0.5046, + "step": 67695 + }, + { + "epoch": 3.16, + "learning_rate": 9.549010079475806e-06, + "loss": 0.0509, + "step": 67700 + }, + { + "epoch": 3.16, + "learning_rate": 9.54822629442102e-06, + "loss": 0.0376, + "step": 67705 + }, + { + "epoch": 3.16, + "learning_rate": 9.547442509366232e-06, + "loss": 0.0338, + "step": 67710 + }, + { + "epoch": 3.16, + "learning_rate": 9.546658724311446e-06, + "loss": 0.033, + "step": 67715 + }, + { + "epoch": 3.16, + "learning_rate": 9.545874939256658e-06, + "loss": 0.0897, + "step": 67720 + }, + { + "epoch": 3.16, + "learning_rate": 9.545091154201872e-06, + "loss": 0.1058, + "step": 67725 + }, + { + "epoch": 3.16, + "learning_rate": 9.544307369147086e-06, + "loss": 0.0865, + "step": 67730 + }, + { + "epoch": 3.16, + "learning_rate": 9.5435235840923e-06, + "loss": 0.1294, + "step": 67735 + }, + { + "epoch": 3.16, + "learning_rate": 9.542739799037512e-06, + "loss": 0.1835, + "step": 67740 + }, + { + "epoch": 3.16, + "learning_rate": 9.541956013982726e-06, + "loss": 0.3487, + "step": 67745 + }, + { + "epoch": 3.16, + "learning_rate": 9.54117222892794e-06, + "loss": 0.0457, + "step": 67750 + }, + { + "epoch": 3.16, + "learning_rate": 9.540388443873154e-06, + "loss": 0.0352, + "step": 67755 + }, + { + "epoch": 3.16, + "learning_rate": 9.539604658818366e-06, + "loss": 0.026, + "step": 67760 + }, + { + "epoch": 3.16, + "learning_rate": 9.53882087376358e-06, + "loss": 0.0794, + "step": 67765 + }, + { + "epoch": 3.16, + "learning_rate": 9.538037088708794e-06, + "loss": 0.0649, + "step": 67770 + }, + { + "epoch": 3.16, + "learning_rate": 9.537253303654008e-06, + "loss": 0.0954, + "step": 67775 + }, + { + "epoch": 3.16, + "learning_rate": 9.536469518599221e-06, + "loss": 0.0569, + "step": 67780 + }, + { + "epoch": 3.16, + "learning_rate": 9.535685733544434e-06, + "loss": 0.1698, + "step": 67785 + }, + { + "epoch": 3.16, + "learning_rate": 9.534901948489646e-06, + "loss": 0.2006, + "step": 67790 + }, + { + "epoch": 3.16, + "learning_rate": 9.53411816343486e-06, + "loss": 0.1633, + "step": 67795 + }, + { + "epoch": 3.16, + "learning_rate": 9.533334378380074e-06, + "loss": 0.078, + "step": 67800 + }, + { + "epoch": 3.16, + "learning_rate": 9.532550593325288e-06, + "loss": 0.0221, + "step": 67805 + }, + { + "epoch": 3.16, + "learning_rate": 9.5317668082705e-06, + "loss": 0.1055, + "step": 67810 + }, + { + "epoch": 3.16, + "learning_rate": 9.530983023215714e-06, + "loss": 0.0439, + "step": 67815 + }, + { + "epoch": 3.16, + "learning_rate": 9.530199238160928e-06, + "loss": 0.0632, + "step": 67820 + }, + { + "epoch": 3.16, + "learning_rate": 9.529415453106142e-06, + "loss": 0.1022, + "step": 67825 + }, + { + "epoch": 3.17, + "learning_rate": 9.528631668051355e-06, + "loss": 0.1556, + "step": 67830 + }, + { + "epoch": 3.17, + "learning_rate": 9.527847882996568e-06, + "loss": 0.118, + "step": 67835 + }, + { + "epoch": 3.17, + "learning_rate": 9.527064097941782e-06, + "loss": 0.1185, + "step": 67840 + }, + { + "epoch": 3.17, + "learning_rate": 9.526280312886995e-06, + "loss": 0.2748, + "step": 67845 + }, + { + "epoch": 3.17, + "learning_rate": 9.525496527832208e-06, + "loss": 0.0664, + "step": 67850 + }, + { + "epoch": 3.17, + "learning_rate": 9.524712742777422e-06, + "loss": 0.0549, + "step": 67855 + }, + { + "epoch": 3.17, + "learning_rate": 9.523928957722634e-06, + "loss": 0.0159, + "step": 67860 + }, + { + "epoch": 3.17, + "learning_rate": 9.523145172667848e-06, + "loss": 0.029, + "step": 67865 + }, + { + "epoch": 3.17, + "learning_rate": 9.522361387613062e-06, + "loss": 0.0671, + "step": 67870 + }, + { + "epoch": 3.17, + "learning_rate": 9.521577602558275e-06, + "loss": 0.0725, + "step": 67875 + }, + { + "epoch": 3.17, + "learning_rate": 9.52079381750349e-06, + "loss": 0.2095, + "step": 67880 + }, + { + "epoch": 3.17, + "learning_rate": 9.520010032448702e-06, + "loss": 0.2265, + "step": 67885 + }, + { + "epoch": 3.17, + "learning_rate": 9.519226247393916e-06, + "loss": 0.3112, + "step": 67890 + }, + { + "epoch": 3.17, + "learning_rate": 9.51844246233913e-06, + "loss": 0.2284, + "step": 67895 + }, + { + "epoch": 3.17, + "learning_rate": 9.517658677284343e-06, + "loss": 0.0592, + "step": 67900 + }, + { + "epoch": 3.17, + "learning_rate": 9.516874892229556e-06, + "loss": 0.0495, + "step": 67905 + }, + { + "epoch": 3.17, + "learning_rate": 9.51609110717477e-06, + "loss": 0.02, + "step": 67910 + }, + { + "epoch": 3.17, + "learning_rate": 9.515307322119982e-06, + "loss": 0.0283, + "step": 67915 + }, + { + "epoch": 3.17, + "learning_rate": 9.514523537065196e-06, + "loss": 0.0647, + "step": 67920 + }, + { + "epoch": 3.17, + "learning_rate": 9.51373975201041e-06, + "loss": 0.1077, + "step": 67925 + }, + { + "epoch": 3.17, + "learning_rate": 9.512955966955623e-06, + "loss": 0.1124, + "step": 67930 + }, + { + "epoch": 3.17, + "learning_rate": 9.512172181900836e-06, + "loss": 0.1757, + "step": 67935 + }, + { + "epoch": 3.17, + "learning_rate": 9.51138839684605e-06, + "loss": 0.217, + "step": 67940 + }, + { + "epoch": 3.17, + "learning_rate": 9.510604611791263e-06, + "loss": 0.3992, + "step": 67945 + }, + { + "epoch": 3.17, + "learning_rate": 9.509820826736477e-06, + "loss": 0.0693, + "step": 67950 + }, + { + "epoch": 3.17, + "learning_rate": 9.50903704168169e-06, + "loss": 0.067, + "step": 67955 + }, + { + "epoch": 3.17, + "learning_rate": 9.508253256626903e-06, + "loss": 0.0275, + "step": 67960 + }, + { + "epoch": 3.17, + "learning_rate": 9.507469471572117e-06, + "loss": 0.0541, + "step": 67965 + }, + { + "epoch": 3.17, + "learning_rate": 9.506685686517331e-06, + "loss": 0.055, + "step": 67970 + }, + { + "epoch": 3.17, + "learning_rate": 9.505901901462543e-06, + "loss": 0.0966, + "step": 67975 + }, + { + "epoch": 3.17, + "learning_rate": 9.505118116407757e-06, + "loss": 0.1219, + "step": 67980 + }, + { + "epoch": 3.17, + "learning_rate": 9.50433433135297e-06, + "loss": 0.2099, + "step": 67985 + }, + { + "epoch": 3.17, + "learning_rate": 9.503550546298183e-06, + "loss": 0.1988, + "step": 67990 + }, + { + "epoch": 3.17, + "learning_rate": 9.502766761243397e-06, + "loss": 0.3276, + "step": 67995 + }, + { + "epoch": 3.17, + "learning_rate": 9.501982976188611e-06, + "loss": 0.1253, + "step": 68000 + }, + { + "epoch": 3.17, + "learning_rate": 9.501199191133823e-06, + "loss": 0.0215, + "step": 68005 + }, + { + "epoch": 3.17, + "learning_rate": 9.500415406079037e-06, + "loss": 0.0237, + "step": 68010 + }, + { + "epoch": 3.17, + "learning_rate": 9.499631621024251e-06, + "loss": 0.0624, + "step": 68015 + }, + { + "epoch": 3.17, + "learning_rate": 9.498847835969465e-06, + "loss": 0.0849, + "step": 68020 + }, + { + "epoch": 3.17, + "learning_rate": 9.498064050914677e-06, + "loss": 0.083, + "step": 68025 + }, + { + "epoch": 3.17, + "learning_rate": 9.497280265859891e-06, + "loss": 0.1419, + "step": 68030 + }, + { + "epoch": 3.17, + "learning_rate": 9.496496480805105e-06, + "loss": 0.1342, + "step": 68035 + }, + { + "epoch": 3.17, + "learning_rate": 9.495712695750319e-06, + "loss": 0.1337, + "step": 68040 + }, + { + "epoch": 3.18, + "learning_rate": 9.494928910695531e-06, + "loss": 0.2854, + "step": 68045 + }, + { + "epoch": 3.18, + "learning_rate": 9.494145125640745e-06, + "loss": 0.146, + "step": 68050 + }, + { + "epoch": 3.18, + "learning_rate": 9.493361340585957e-06, + "loss": 0.0851, + "step": 68055 + }, + { + "epoch": 3.18, + "learning_rate": 9.492577555531171e-06, + "loss": 0.0148, + "step": 68060 + }, + { + "epoch": 3.18, + "learning_rate": 9.491793770476385e-06, + "loss": 0.0833, + "step": 68065 + }, + { + "epoch": 3.18, + "learning_rate": 9.4910099854216e-06, + "loss": 0.0505, + "step": 68070 + }, + { + "epoch": 3.18, + "learning_rate": 9.490226200366811e-06, + "loss": 0.103, + "step": 68075 + }, + { + "epoch": 3.18, + "learning_rate": 9.489442415312025e-06, + "loss": 0.1506, + "step": 68080 + }, + { + "epoch": 3.18, + "learning_rate": 9.48865863025724e-06, + "loss": 0.1196, + "step": 68085 + }, + { + "epoch": 3.18, + "learning_rate": 9.487874845202453e-06, + "loss": 0.1659, + "step": 68090 + }, + { + "epoch": 3.18, + "learning_rate": 9.487091060147667e-06, + "loss": 0.3757, + "step": 68095 + }, + { + "epoch": 3.18, + "learning_rate": 9.48630727509288e-06, + "loss": 0.0993, + "step": 68100 + }, + { + "epoch": 3.18, + "learning_rate": 9.485523490038093e-06, + "loss": 0.0322, + "step": 68105 + }, + { + "epoch": 3.18, + "learning_rate": 9.484739704983305e-06, + "loss": 0.047, + "step": 68110 + }, + { + "epoch": 3.18, + "learning_rate": 9.48395591992852e-06, + "loss": 0.081, + "step": 68115 + }, + { + "epoch": 3.18, + "learning_rate": 9.483172134873733e-06, + "loss": 0.0764, + "step": 68120 + }, + { + "epoch": 3.18, + "learning_rate": 9.482388349818945e-06, + "loss": 0.0573, + "step": 68125 + }, + { + "epoch": 3.18, + "learning_rate": 9.48160456476416e-06, + "loss": 0.1731, + "step": 68130 + }, + { + "epoch": 3.18, + "learning_rate": 9.480820779709373e-06, + "loss": 0.1322, + "step": 68135 + }, + { + "epoch": 3.18, + "learning_rate": 9.480036994654587e-06, + "loss": 0.1864, + "step": 68140 + }, + { + "epoch": 3.18, + "learning_rate": 9.479253209599801e-06, + "loss": 0.2936, + "step": 68145 + }, + { + "epoch": 3.18, + "learning_rate": 9.478469424545013e-06, + "loss": 0.0338, + "step": 68150 + }, + { + "epoch": 3.18, + "learning_rate": 9.477685639490227e-06, + "loss": 0.0361, + "step": 68155 + }, + { + "epoch": 3.18, + "learning_rate": 9.476901854435441e-06, + "loss": 0.0377, + "step": 68160 + }, + { + "epoch": 3.18, + "learning_rate": 9.476118069380655e-06, + "loss": 0.018, + "step": 68165 + }, + { + "epoch": 3.18, + "learning_rate": 9.475334284325867e-06, + "loss": 0.0892, + "step": 68170 + }, + { + "epoch": 3.18, + "learning_rate": 9.47455049927108e-06, + "loss": 0.0872, + "step": 68175 + }, + { + "epoch": 3.18, + "learning_rate": 9.473766714216293e-06, + "loss": 0.1519, + "step": 68180 + }, + { + "epoch": 3.18, + "learning_rate": 9.472982929161507e-06, + "loss": 0.2009, + "step": 68185 + }, + { + "epoch": 3.18, + "learning_rate": 9.472199144106721e-06, + "loss": 0.122, + "step": 68190 + }, + { + "epoch": 3.18, + "learning_rate": 9.471415359051935e-06, + "loss": 0.2358, + "step": 68195 + }, + { + "epoch": 3.18, + "learning_rate": 9.470631573997147e-06, + "loss": 0.1171, + "step": 68200 + }, + { + "epoch": 3.18, + "learning_rate": 9.469847788942361e-06, + "loss": 0.0262, + "step": 68205 + }, + { + "epoch": 3.18, + "learning_rate": 9.469064003887575e-06, + "loss": 0.0397, + "step": 68210 + }, + { + "epoch": 3.18, + "learning_rate": 9.468280218832789e-06, + "loss": 0.0614, + "step": 68215 + }, + { + "epoch": 3.18, + "learning_rate": 9.467496433778001e-06, + "loss": 0.0558, + "step": 68220 + }, + { + "epoch": 3.18, + "learning_rate": 9.466712648723215e-06, + "loss": 0.106, + "step": 68225 + }, + { + "epoch": 3.18, + "learning_rate": 9.465928863668429e-06, + "loss": 0.0806, + "step": 68230 + }, + { + "epoch": 3.18, + "learning_rate": 9.465145078613643e-06, + "loss": 0.1555, + "step": 68235 + }, + { + "epoch": 3.18, + "learning_rate": 9.464361293558855e-06, + "loss": 0.1864, + "step": 68240 + }, + { + "epoch": 3.18, + "learning_rate": 9.463577508504069e-06, + "loss": 0.2363, + "step": 68245 + }, + { + "epoch": 3.18, + "learning_rate": 9.462793723449281e-06, + "loss": 0.0648, + "step": 68250 + }, + { + "epoch": 3.18, + "learning_rate": 9.462009938394495e-06, + "loss": 0.0486, + "step": 68255 + }, + { + "epoch": 3.19, + "learning_rate": 9.461226153339709e-06, + "loss": 0.0306, + "step": 68260 + }, + { + "epoch": 3.19, + "learning_rate": 9.460442368284923e-06, + "loss": 0.0714, + "step": 68265 + }, + { + "epoch": 3.19, + "learning_rate": 9.459658583230135e-06, + "loss": 0.05, + "step": 68270 + }, + { + "epoch": 3.19, + "learning_rate": 9.458874798175349e-06, + "loss": 0.0767, + "step": 68275 + }, + { + "epoch": 3.19, + "learning_rate": 9.458091013120563e-06, + "loss": 0.062, + "step": 68280 + }, + { + "epoch": 3.19, + "learning_rate": 9.457307228065777e-06, + "loss": 0.1273, + "step": 68285 + }, + { + "epoch": 3.19, + "learning_rate": 9.456523443010989e-06, + "loss": 0.1011, + "step": 68290 + }, + { + "epoch": 3.19, + "learning_rate": 9.455739657956203e-06, + "loss": 0.2082, + "step": 68295 + }, + { + "epoch": 3.19, + "learning_rate": 9.454955872901417e-06, + "loss": 0.0501, + "step": 68300 + }, + { + "epoch": 3.19, + "learning_rate": 9.454172087846629e-06, + "loss": 0.0578, + "step": 68305 + }, + { + "epoch": 3.19, + "learning_rate": 9.453388302791843e-06, + "loss": 0.0514, + "step": 68310 + }, + { + "epoch": 3.19, + "learning_rate": 9.452604517737057e-06, + "loss": 0.0431, + "step": 68315 + }, + { + "epoch": 3.19, + "learning_rate": 9.451820732682269e-06, + "loss": 0.0945, + "step": 68320 + }, + { + "epoch": 3.19, + "learning_rate": 9.451036947627483e-06, + "loss": 0.1068, + "step": 68325 + }, + { + "epoch": 3.19, + "learning_rate": 9.450253162572697e-06, + "loss": 0.1171, + "step": 68330 + }, + { + "epoch": 3.19, + "learning_rate": 9.44946937751791e-06, + "loss": 0.0724, + "step": 68335 + }, + { + "epoch": 3.19, + "learning_rate": 9.448685592463123e-06, + "loss": 0.2423, + "step": 68340 + }, + { + "epoch": 3.19, + "learning_rate": 9.447901807408337e-06, + "loss": 0.2527, + "step": 68345 + }, + { + "epoch": 3.19, + "learning_rate": 9.44711802235355e-06, + "loss": 0.0918, + "step": 68350 + }, + { + "epoch": 3.19, + "learning_rate": 9.446334237298765e-06, + "loss": 0.0222, + "step": 68355 + }, + { + "epoch": 3.19, + "learning_rate": 9.445550452243979e-06, + "loss": 0.0152, + "step": 68360 + }, + { + "epoch": 3.19, + "learning_rate": 9.44476666718919e-06, + "loss": 0.0738, + "step": 68365 + }, + { + "epoch": 3.19, + "learning_rate": 9.443982882134403e-06, + "loss": 0.0907, + "step": 68370 + }, + { + "epoch": 3.19, + "learning_rate": 9.443199097079617e-06, + "loss": 0.0834, + "step": 68375 + }, + { + "epoch": 3.19, + "learning_rate": 9.44241531202483e-06, + "loss": 0.107, + "step": 68380 + }, + { + "epoch": 3.19, + "learning_rate": 9.441631526970045e-06, + "loss": 0.2131, + "step": 68385 + }, + { + "epoch": 3.19, + "learning_rate": 9.440847741915257e-06, + "loss": 0.1436, + "step": 68390 + }, + { + "epoch": 3.19, + "learning_rate": 9.440063956860471e-06, + "loss": 0.3737, + "step": 68395 + }, + { + "epoch": 3.19, + "learning_rate": 9.439280171805685e-06, + "loss": 0.1027, + "step": 68400 + }, + { + "epoch": 3.19, + "learning_rate": 9.438496386750899e-06, + "loss": 0.0675, + "step": 68405 + }, + { + "epoch": 3.19, + "learning_rate": 9.437712601696113e-06, + "loss": 0.0592, + "step": 68410 + }, + { + "epoch": 3.19, + "learning_rate": 9.436928816641325e-06, + "loss": 0.0521, + "step": 68415 + }, + { + "epoch": 3.19, + "learning_rate": 9.436145031586539e-06, + "loss": 0.0423, + "step": 68420 + }, + { + "epoch": 3.19, + "learning_rate": 9.435361246531753e-06, + "loss": 0.0177, + "step": 68425 + }, + { + "epoch": 3.19, + "learning_rate": 9.434577461476967e-06, + "loss": 0.0523, + "step": 68430 + }, + { + "epoch": 3.19, + "learning_rate": 9.433793676422179e-06, + "loss": 0.1323, + "step": 68435 + }, + { + "epoch": 3.19, + "learning_rate": 9.433009891367391e-06, + "loss": 0.1668, + "step": 68440 + }, + { + "epoch": 3.19, + "learning_rate": 9.432226106312605e-06, + "loss": 0.25, + "step": 68445 + }, + { + "epoch": 3.19, + "learning_rate": 9.431442321257819e-06, + "loss": 0.0571, + "step": 68450 + }, + { + "epoch": 3.19, + "learning_rate": 9.430658536203033e-06, + "loss": 0.0383, + "step": 68455 + }, + { + "epoch": 3.19, + "learning_rate": 9.429874751148247e-06, + "loss": 0.0444, + "step": 68460 + }, + { + "epoch": 3.19, + "learning_rate": 9.429090966093459e-06, + "loss": 0.0427, + "step": 68465 + }, + { + "epoch": 3.19, + "learning_rate": 9.428307181038673e-06, + "loss": 0.0621, + "step": 68470 + }, + { + "epoch": 3.2, + "learning_rate": 9.427523395983887e-06, + "loss": 0.0821, + "step": 68475 + }, + { + "epoch": 3.2, + "learning_rate": 9.4267396109291e-06, + "loss": 0.1072, + "step": 68480 + }, + { + "epoch": 3.2, + "learning_rate": 9.425955825874313e-06, + "loss": 0.1496, + "step": 68485 + }, + { + "epoch": 3.2, + "learning_rate": 9.425172040819527e-06, + "loss": 0.327, + "step": 68490 + }, + { + "epoch": 3.2, + "learning_rate": 9.42438825576474e-06, + "loss": 0.3904, + "step": 68495 + }, + { + "epoch": 3.2, + "learning_rate": 9.423604470709953e-06, + "loss": 0.0764, + "step": 68500 + }, + { + "epoch": 3.2, + "learning_rate": 9.422820685655167e-06, + "loss": 0.0141, + "step": 68505 + }, + { + "epoch": 3.2, + "learning_rate": 9.42203690060038e-06, + "loss": 0.0912, + "step": 68510 + }, + { + "epoch": 3.2, + "learning_rate": 9.421253115545593e-06, + "loss": 0.0373, + "step": 68515 + }, + { + "epoch": 3.2, + "learning_rate": 9.420469330490807e-06, + "loss": 0.1329, + "step": 68520 + }, + { + "epoch": 3.2, + "learning_rate": 9.41968554543602e-06, + "loss": 0.0989, + "step": 68525 + }, + { + "epoch": 3.2, + "learning_rate": 9.418901760381234e-06, + "loss": 0.103, + "step": 68530 + }, + { + "epoch": 3.2, + "learning_rate": 9.418117975326447e-06, + "loss": 0.1543, + "step": 68535 + }, + { + "epoch": 3.2, + "learning_rate": 9.41733419027166e-06, + "loss": 0.1886, + "step": 68540 + }, + { + "epoch": 3.2, + "learning_rate": 9.416550405216874e-06, + "loss": 0.3793, + "step": 68545 + }, + { + "epoch": 3.2, + "learning_rate": 9.415766620162088e-06, + "loss": 0.0654, + "step": 68550 + }, + { + "epoch": 3.2, + "learning_rate": 9.4149828351073e-06, + "loss": 0.0313, + "step": 68555 + }, + { + "epoch": 3.2, + "learning_rate": 9.414199050052515e-06, + "loss": 0.0346, + "step": 68560 + }, + { + "epoch": 3.2, + "learning_rate": 9.413415264997727e-06, + "loss": 0.0393, + "step": 68565 + }, + { + "epoch": 3.2, + "learning_rate": 9.41263147994294e-06, + "loss": 0.0822, + "step": 68570 + }, + { + "epoch": 3.2, + "learning_rate": 9.411847694888155e-06, + "loss": 0.1126, + "step": 68575 + }, + { + "epoch": 3.2, + "learning_rate": 9.411063909833368e-06, + "loss": 0.0963, + "step": 68580 + }, + { + "epoch": 3.2, + "learning_rate": 9.41028012477858e-06, + "loss": 0.1581, + "step": 68585 + }, + { + "epoch": 3.2, + "learning_rate": 9.409496339723795e-06, + "loss": 0.2612, + "step": 68590 + }, + { + "epoch": 3.2, + "learning_rate": 9.408712554669008e-06, + "loss": 0.323, + "step": 68595 + }, + { + "epoch": 3.2, + "learning_rate": 9.407928769614222e-06, + "loss": 0.0175, + "step": 68600 + }, + { + "epoch": 3.2, + "learning_rate": 9.407144984559435e-06, + "loss": 0.0262, + "step": 68605 + }, + { + "epoch": 3.2, + "learning_rate": 9.406361199504648e-06, + "loss": 0.0282, + "step": 68610 + }, + { + "epoch": 3.2, + "learning_rate": 9.405577414449862e-06, + "loss": 0.0527, + "step": 68615 + }, + { + "epoch": 3.2, + "learning_rate": 9.404793629395076e-06, + "loss": 0.0796, + "step": 68620 + }, + { + "epoch": 3.2, + "learning_rate": 9.40400984434029e-06, + "loss": 0.0827, + "step": 68625 + }, + { + "epoch": 3.2, + "learning_rate": 9.403226059285502e-06, + "loss": 0.0572, + "step": 68630 + }, + { + "epoch": 3.2, + "learning_rate": 9.402442274230715e-06, + "loss": 0.1104, + "step": 68635 + }, + { + "epoch": 3.2, + "learning_rate": 9.401658489175929e-06, + "loss": 0.1353, + "step": 68640 + }, + { + "epoch": 3.2, + "learning_rate": 9.400874704121142e-06, + "loss": 0.3382, + "step": 68645 + }, + { + "epoch": 3.2, + "learning_rate": 9.400090919066356e-06, + "loss": 0.0508, + "step": 68650 + }, + { + "epoch": 3.2, + "learning_rate": 9.399307134011569e-06, + "loss": 0.0686, + "step": 68655 + }, + { + "epoch": 3.2, + "learning_rate": 9.398523348956782e-06, + "loss": 0.0725, + "step": 68660 + }, + { + "epoch": 3.2, + "learning_rate": 9.397739563901996e-06, + "loss": 0.1, + "step": 68665 + }, + { + "epoch": 3.2, + "learning_rate": 9.39695577884721e-06, + "loss": 0.0732, + "step": 68670 + }, + { + "epoch": 3.2, + "learning_rate": 9.396171993792424e-06, + "loss": 0.0743, + "step": 68675 + }, + { + "epoch": 3.2, + "learning_rate": 9.395388208737636e-06, + "loss": 0.0961, + "step": 68680 + }, + { + "epoch": 3.2, + "learning_rate": 9.39460442368285e-06, + "loss": 0.1475, + "step": 68685 + }, + { + "epoch": 3.21, + "learning_rate": 9.393820638628064e-06, + "loss": 0.197, + "step": 68690 + }, + { + "epoch": 3.21, + "learning_rate": 9.393036853573276e-06, + "loss": 0.2542, + "step": 68695 + }, + { + "epoch": 3.21, + "learning_rate": 9.39225306851849e-06, + "loss": 0.0426, + "step": 68700 + }, + { + "epoch": 3.21, + "learning_rate": 9.391469283463703e-06, + "loss": 0.0372, + "step": 68705 + }, + { + "epoch": 3.21, + "learning_rate": 9.390685498408916e-06, + "loss": 0.0381, + "step": 68710 + }, + { + "epoch": 3.21, + "learning_rate": 9.38990171335413e-06, + "loss": 0.07, + "step": 68715 + }, + { + "epoch": 3.21, + "learning_rate": 9.389117928299344e-06, + "loss": 0.0335, + "step": 68720 + }, + { + "epoch": 3.21, + "learning_rate": 9.388334143244558e-06, + "loss": 0.0311, + "step": 68725 + }, + { + "epoch": 3.21, + "learning_rate": 9.38755035818977e-06, + "loss": 0.0415, + "step": 68730 + }, + { + "epoch": 3.21, + "learning_rate": 9.386766573134984e-06, + "loss": 0.1207, + "step": 68735 + }, + { + "epoch": 3.21, + "learning_rate": 9.385982788080198e-06, + "loss": 0.2369, + "step": 68740 + }, + { + "epoch": 3.21, + "learning_rate": 9.385199003025412e-06, + "loss": 0.2357, + "step": 68745 + }, + { + "epoch": 3.21, + "learning_rate": 9.384415217970624e-06, + "loss": 0.1444, + "step": 68750 + }, + { + "epoch": 3.21, + "learning_rate": 9.383631432915838e-06, + "loss": 0.0314, + "step": 68755 + }, + { + "epoch": 3.21, + "learning_rate": 9.38284764786105e-06, + "loss": 0.0433, + "step": 68760 + }, + { + "epoch": 3.21, + "learning_rate": 9.382063862806264e-06, + "loss": 0.0297, + "step": 68765 + }, + { + "epoch": 3.21, + "learning_rate": 9.381280077751478e-06, + "loss": 0.0899, + "step": 68770 + }, + { + "epoch": 3.21, + "learning_rate": 9.380496292696692e-06, + "loss": 0.06, + "step": 68775 + }, + { + "epoch": 3.21, + "learning_rate": 9.379712507641904e-06, + "loss": 0.1671, + "step": 68780 + }, + { + "epoch": 3.21, + "learning_rate": 9.378928722587118e-06, + "loss": 0.1742, + "step": 68785 + }, + { + "epoch": 3.21, + "learning_rate": 9.378144937532332e-06, + "loss": 0.243, + "step": 68790 + }, + { + "epoch": 3.21, + "learning_rate": 9.377361152477546e-06, + "loss": 0.2625, + "step": 68795 + }, + { + "epoch": 3.21, + "learning_rate": 9.376577367422758e-06, + "loss": 0.0928, + "step": 68800 + }, + { + "epoch": 3.21, + "learning_rate": 9.375793582367972e-06, + "loss": 0.0624, + "step": 68805 + }, + { + "epoch": 3.21, + "learning_rate": 9.375009797313186e-06, + "loss": 0.0441, + "step": 68810 + }, + { + "epoch": 3.21, + "learning_rate": 9.3742260122584e-06, + "loss": 0.0707, + "step": 68815 + }, + { + "epoch": 3.21, + "learning_rate": 9.373442227203612e-06, + "loss": 0.0557, + "step": 68820 + }, + { + "epoch": 3.21, + "learning_rate": 9.372658442148826e-06, + "loss": 0.0632, + "step": 68825 + }, + { + "epoch": 3.21, + "learning_rate": 9.371874657094038e-06, + "loss": 0.0644, + "step": 68830 + }, + { + "epoch": 3.21, + "learning_rate": 9.371090872039252e-06, + "loss": 0.1814, + "step": 68835 + }, + { + "epoch": 3.21, + "learning_rate": 9.370307086984466e-06, + "loss": 0.1505, + "step": 68840 + }, + { + "epoch": 3.21, + "learning_rate": 9.36952330192968e-06, + "loss": 0.2694, + "step": 68845 + }, + { + "epoch": 3.21, + "learning_rate": 9.368739516874892e-06, + "loss": 0.0535, + "step": 68850 + }, + { + "epoch": 3.21, + "learning_rate": 9.367955731820106e-06, + "loss": 0.0589, + "step": 68855 + }, + { + "epoch": 3.21, + "learning_rate": 9.36717194676532e-06, + "loss": 0.0595, + "step": 68860 + }, + { + "epoch": 3.21, + "learning_rate": 9.366388161710534e-06, + "loss": 0.0274, + "step": 68865 + }, + { + "epoch": 3.21, + "learning_rate": 9.365604376655746e-06, + "loss": 0.1054, + "step": 68870 + }, + { + "epoch": 3.21, + "learning_rate": 9.36482059160096e-06, + "loss": 0.0694, + "step": 68875 + }, + { + "epoch": 3.21, + "learning_rate": 9.364036806546174e-06, + "loss": 0.0681, + "step": 68880 + }, + { + "epoch": 3.21, + "learning_rate": 9.363253021491388e-06, + "loss": 0.0854, + "step": 68885 + }, + { + "epoch": 3.21, + "learning_rate": 9.3624692364366e-06, + "loss": 0.306, + "step": 68890 + }, + { + "epoch": 3.21, + "learning_rate": 9.361685451381814e-06, + "loss": 0.1706, + "step": 68895 + }, + { + "epoch": 3.21, + "learning_rate": 9.360901666327026e-06, + "loss": 0.037, + "step": 68900 + }, + { + "epoch": 3.22, + "learning_rate": 9.36011788127224e-06, + "loss": 0.0279, + "step": 68905 + }, + { + "epoch": 3.22, + "learning_rate": 9.359334096217454e-06, + "loss": 0.0225, + "step": 68910 + }, + { + "epoch": 3.22, + "learning_rate": 9.358550311162668e-06, + "loss": 0.0484, + "step": 68915 + }, + { + "epoch": 3.22, + "learning_rate": 9.35776652610788e-06, + "loss": 0.101, + "step": 68920 + }, + { + "epoch": 3.22, + "learning_rate": 9.356982741053094e-06, + "loss": 0.0671, + "step": 68925 + }, + { + "epoch": 3.22, + "learning_rate": 9.356198955998308e-06, + "loss": 0.1678, + "step": 68930 + }, + { + "epoch": 3.22, + "learning_rate": 9.355415170943522e-06, + "loss": 0.1417, + "step": 68935 + }, + { + "epoch": 3.22, + "learning_rate": 9.354631385888736e-06, + "loss": 0.212, + "step": 68940 + }, + { + "epoch": 3.22, + "learning_rate": 9.353847600833948e-06, + "loss": 0.3602, + "step": 68945 + }, + { + "epoch": 3.22, + "learning_rate": 9.353063815779162e-06, + "loss": 0.0381, + "step": 68950 + }, + { + "epoch": 3.22, + "learning_rate": 9.352280030724374e-06, + "loss": 0.0262, + "step": 68955 + }, + { + "epoch": 3.22, + "learning_rate": 9.351496245669588e-06, + "loss": 0.0538, + "step": 68960 + }, + { + "epoch": 3.22, + "learning_rate": 9.350712460614802e-06, + "loss": 0.0429, + "step": 68965 + }, + { + "epoch": 3.22, + "learning_rate": 9.349928675560014e-06, + "loss": 0.0416, + "step": 68970 + }, + { + "epoch": 3.22, + "learning_rate": 9.349144890505228e-06, + "loss": 0.0407, + "step": 68975 + }, + { + "epoch": 3.22, + "learning_rate": 9.348361105450442e-06, + "loss": 0.129, + "step": 68980 + }, + { + "epoch": 3.22, + "learning_rate": 9.347577320395656e-06, + "loss": 0.0655, + "step": 68985 + }, + { + "epoch": 3.22, + "learning_rate": 9.34679353534087e-06, + "loss": 0.2001, + "step": 68990 + }, + { + "epoch": 3.22, + "learning_rate": 9.346009750286082e-06, + "loss": 0.2085, + "step": 68995 + }, + { + "epoch": 3.22, + "learning_rate": 9.345225965231296e-06, + "loss": 0.0888, + "step": 69000 + }, + { + "epoch": 3.22, + "learning_rate": 9.34444218017651e-06, + "loss": 0.0161, + "step": 69005 + }, + { + "epoch": 3.22, + "learning_rate": 9.343658395121724e-06, + "loss": 0.0768, + "step": 69010 + }, + { + "epoch": 3.22, + "learning_rate": 9.342874610066936e-06, + "loss": 0.0276, + "step": 69015 + }, + { + "epoch": 3.22, + "learning_rate": 9.342090825012148e-06, + "loss": 0.0582, + "step": 69020 + }, + { + "epoch": 3.22, + "learning_rate": 9.341307039957362e-06, + "loss": 0.0803, + "step": 69025 + }, + { + "epoch": 3.22, + "learning_rate": 9.340523254902576e-06, + "loss": 0.0411, + "step": 69030 + }, + { + "epoch": 3.22, + "learning_rate": 9.33973946984779e-06, + "loss": 0.1711, + "step": 69035 + }, + { + "epoch": 3.22, + "learning_rate": 9.338955684793004e-06, + "loss": 0.1804, + "step": 69040 + }, + { + "epoch": 3.22, + "learning_rate": 9.338171899738216e-06, + "loss": 0.231, + "step": 69045 + }, + { + "epoch": 3.22, + "learning_rate": 9.33738811468343e-06, + "loss": 0.0687, + "step": 69050 + }, + { + "epoch": 3.22, + "learning_rate": 9.336604329628644e-06, + "loss": 0.0435, + "step": 69055 + }, + { + "epoch": 3.22, + "learning_rate": 9.335820544573858e-06, + "loss": 0.0353, + "step": 69060 + }, + { + "epoch": 3.22, + "learning_rate": 9.33503675951907e-06, + "loss": 0.0694, + "step": 69065 + }, + { + "epoch": 3.22, + "learning_rate": 9.334252974464284e-06, + "loss": 0.0859, + "step": 69070 + }, + { + "epoch": 3.22, + "learning_rate": 9.333469189409498e-06, + "loss": 0.0612, + "step": 69075 + }, + { + "epoch": 3.22, + "learning_rate": 9.332685404354712e-06, + "loss": 0.1262, + "step": 69080 + }, + { + "epoch": 3.22, + "learning_rate": 9.331901619299924e-06, + "loss": 0.1553, + "step": 69085 + }, + { + "epoch": 3.22, + "learning_rate": 9.331117834245138e-06, + "loss": 0.2087, + "step": 69090 + }, + { + "epoch": 3.22, + "learning_rate": 9.33033404919035e-06, + "loss": 0.1873, + "step": 69095 + }, + { + "epoch": 3.22, + "learning_rate": 9.329550264135564e-06, + "loss": 0.0867, + "step": 69100 + }, + { + "epoch": 3.22, + "learning_rate": 9.328766479080778e-06, + "loss": 0.0105, + "step": 69105 + }, + { + "epoch": 3.22, + "learning_rate": 9.327982694025992e-06, + "loss": 0.0393, + "step": 69110 + }, + { + "epoch": 3.23, + "learning_rate": 9.327198908971204e-06, + "loss": 0.0326, + "step": 69115 + }, + { + "epoch": 3.23, + "learning_rate": 9.326415123916418e-06, + "loss": 0.0376, + "step": 69120 + }, + { + "epoch": 3.23, + "learning_rate": 9.325631338861632e-06, + "loss": 0.0788, + "step": 69125 + }, + { + "epoch": 3.23, + "learning_rate": 9.324847553806846e-06, + "loss": 0.0968, + "step": 69130 + }, + { + "epoch": 3.23, + "learning_rate": 9.324063768752058e-06, + "loss": 0.215, + "step": 69135 + }, + { + "epoch": 3.23, + "learning_rate": 9.323279983697272e-06, + "loss": 0.2422, + "step": 69140 + }, + { + "epoch": 3.23, + "learning_rate": 9.322496198642486e-06, + "loss": 0.4225, + "step": 69145 + }, + { + "epoch": 3.23, + "learning_rate": 9.321712413587698e-06, + "loss": 0.0695, + "step": 69150 + }, + { + "epoch": 3.23, + "learning_rate": 9.320928628532912e-06, + "loss": 0.0703, + "step": 69155 + }, + { + "epoch": 3.23, + "learning_rate": 9.320144843478126e-06, + "loss": 0.1488, + "step": 69160 + }, + { + "epoch": 3.23, + "learning_rate": 9.319361058423338e-06, + "loss": 0.0382, + "step": 69165 + }, + { + "epoch": 3.23, + "learning_rate": 9.318577273368552e-06, + "loss": 0.1603, + "step": 69170 + }, + { + "epoch": 3.23, + "learning_rate": 9.317793488313766e-06, + "loss": 0.1347, + "step": 69175 + }, + { + "epoch": 3.23, + "learning_rate": 9.31700970325898e-06, + "loss": 0.0366, + "step": 69180 + }, + { + "epoch": 3.23, + "learning_rate": 9.316225918204192e-06, + "loss": 0.1063, + "step": 69185 + }, + { + "epoch": 3.23, + "learning_rate": 9.315442133149406e-06, + "loss": 0.2112, + "step": 69190 + }, + { + "epoch": 3.23, + "learning_rate": 9.31465834809462e-06, + "loss": 0.3252, + "step": 69195 + }, + { + "epoch": 3.23, + "learning_rate": 9.313874563039833e-06, + "loss": 0.0515, + "step": 69200 + }, + { + "epoch": 3.23, + "learning_rate": 9.313090777985047e-06, + "loss": 0.0263, + "step": 69205 + }, + { + "epoch": 3.23, + "learning_rate": 9.31230699293026e-06, + "loss": 0.0196, + "step": 69210 + }, + { + "epoch": 3.23, + "learning_rate": 9.311523207875472e-06, + "loss": 0.0484, + "step": 69215 + }, + { + "epoch": 3.23, + "learning_rate": 9.310739422820686e-06, + "loss": 0.0974, + "step": 69220 + }, + { + "epoch": 3.23, + "learning_rate": 9.3099556377659e-06, + "loss": 0.1009, + "step": 69225 + }, + { + "epoch": 3.23, + "learning_rate": 9.309171852711114e-06, + "loss": 0.1064, + "step": 69230 + }, + { + "epoch": 3.23, + "learning_rate": 9.308388067656326e-06, + "loss": 0.1218, + "step": 69235 + }, + { + "epoch": 3.23, + "learning_rate": 9.30760428260154e-06, + "loss": 0.1984, + "step": 69240 + }, + { + "epoch": 3.23, + "learning_rate": 9.306820497546754e-06, + "loss": 0.3309, + "step": 69245 + }, + { + "epoch": 3.23, + "learning_rate": 9.306036712491967e-06, + "loss": 0.0605, + "step": 69250 + }, + { + "epoch": 3.23, + "learning_rate": 9.305252927437181e-06, + "loss": 0.0544, + "step": 69255 + }, + { + "epoch": 3.23, + "learning_rate": 9.304469142382394e-06, + "loss": 0.044, + "step": 69260 + }, + { + "epoch": 3.23, + "learning_rate": 9.303685357327607e-06, + "loss": 0.046, + "step": 69265 + }, + { + "epoch": 3.23, + "learning_rate": 9.302901572272821e-06, + "loss": 0.0805, + "step": 69270 + }, + { + "epoch": 3.23, + "learning_rate": 9.302117787218035e-06, + "loss": 0.1386, + "step": 69275 + }, + { + "epoch": 3.23, + "learning_rate": 9.301334002163247e-06, + "loss": 0.1237, + "step": 69280 + }, + { + "epoch": 3.23, + "learning_rate": 9.30055021710846e-06, + "loss": 0.1503, + "step": 69285 + }, + { + "epoch": 3.23, + "learning_rate": 9.299766432053674e-06, + "loss": 0.1735, + "step": 69290 + }, + { + "epoch": 3.23, + "learning_rate": 9.298982646998888e-06, + "loss": 0.2164, + "step": 69295 + }, + { + "epoch": 3.23, + "learning_rate": 9.298198861944101e-06, + "loss": 0.0882, + "step": 69300 + }, + { + "epoch": 3.23, + "learning_rate": 9.297415076889315e-06, + "loss": 0.0838, + "step": 69305 + }, + { + "epoch": 3.23, + "learning_rate": 9.296631291834528e-06, + "loss": 0.0821, + "step": 69310 + }, + { + "epoch": 3.23, + "learning_rate": 9.295847506779741e-06, + "loss": 0.0388, + "step": 69315 + }, + { + "epoch": 3.23, + "learning_rate": 9.295063721724955e-06, + "loss": 0.1242, + "step": 69320 + }, + { + "epoch": 3.23, + "learning_rate": 9.29427993667017e-06, + "loss": 0.1089, + "step": 69325 + }, + { + "epoch": 3.24, + "learning_rate": 9.293496151615381e-06, + "loss": 0.0842, + "step": 69330 + }, + { + "epoch": 3.24, + "learning_rate": 9.292712366560595e-06, + "loss": 0.1545, + "step": 69335 + }, + { + "epoch": 3.24, + "learning_rate": 9.29192858150581e-06, + "loss": 0.2175, + "step": 69340 + }, + { + "epoch": 3.24, + "learning_rate": 9.291144796451021e-06, + "loss": 0.3745, + "step": 69345 + }, + { + "epoch": 3.24, + "learning_rate": 9.290361011396235e-06, + "loss": 0.093, + "step": 69350 + }, + { + "epoch": 3.24, + "learning_rate": 9.28957722634145e-06, + "loss": 0.0221, + "step": 69355 + }, + { + "epoch": 3.24, + "learning_rate": 9.288793441286662e-06, + "loss": 0.0494, + "step": 69360 + }, + { + "epoch": 3.24, + "learning_rate": 9.288009656231875e-06, + "loss": 0.06, + "step": 69365 + }, + { + "epoch": 3.24, + "learning_rate": 9.28722587117709e-06, + "loss": 0.0468, + "step": 69370 + }, + { + "epoch": 3.24, + "learning_rate": 9.286442086122303e-06, + "loss": 0.0985, + "step": 69375 + }, + { + "epoch": 3.24, + "learning_rate": 9.285658301067515e-06, + "loss": 0.2375, + "step": 69380 + }, + { + "epoch": 3.24, + "learning_rate": 9.28487451601273e-06, + "loss": 0.0803, + "step": 69385 + }, + { + "epoch": 3.24, + "learning_rate": 9.284090730957943e-06, + "loss": 0.3352, + "step": 69390 + }, + { + "epoch": 3.24, + "learning_rate": 9.283306945903157e-06, + "loss": 0.2779, + "step": 69395 + }, + { + "epoch": 3.24, + "learning_rate": 9.28252316084837e-06, + "loss": 0.0764, + "step": 69400 + }, + { + "epoch": 3.24, + "learning_rate": 9.281739375793583e-06, + "loss": 0.0254, + "step": 69405 + }, + { + "epoch": 3.24, + "learning_rate": 9.280955590738795e-06, + "loss": 0.0204, + "step": 69410 + }, + { + "epoch": 3.24, + "learning_rate": 9.28017180568401e-06, + "loss": 0.0758, + "step": 69415 + }, + { + "epoch": 3.24, + "learning_rate": 9.279388020629223e-06, + "loss": 0.0516, + "step": 69420 + }, + { + "epoch": 3.24, + "learning_rate": 9.278604235574437e-06, + "loss": 0.1475, + "step": 69425 + }, + { + "epoch": 3.24, + "learning_rate": 9.27782045051965e-06, + "loss": 0.1257, + "step": 69430 + }, + { + "epoch": 3.24, + "learning_rate": 9.277036665464863e-06, + "loss": 0.1269, + "step": 69435 + }, + { + "epoch": 3.24, + "learning_rate": 9.276252880410077e-06, + "loss": 0.2353, + "step": 69440 + }, + { + "epoch": 3.24, + "learning_rate": 9.275469095355291e-06, + "loss": 0.3649, + "step": 69445 + }, + { + "epoch": 3.24, + "learning_rate": 9.274685310300503e-06, + "loss": 0.0354, + "step": 69450 + }, + { + "epoch": 3.24, + "learning_rate": 9.273901525245717e-06, + "loss": 0.0686, + "step": 69455 + }, + { + "epoch": 3.24, + "learning_rate": 9.273117740190931e-06, + "loss": 0.0279, + "step": 69460 + }, + { + "epoch": 3.24, + "learning_rate": 9.272333955136145e-06, + "loss": 0.0607, + "step": 69465 + }, + { + "epoch": 3.24, + "learning_rate": 9.271550170081359e-06, + "loss": 0.0724, + "step": 69470 + }, + { + "epoch": 3.24, + "learning_rate": 9.270766385026571e-06, + "loss": 0.1336, + "step": 69475 + }, + { + "epoch": 3.24, + "learning_rate": 9.269982599971783e-06, + "loss": 0.1661, + "step": 69480 + }, + { + "epoch": 3.24, + "learning_rate": 9.269198814916997e-06, + "loss": 0.1611, + "step": 69485 + }, + { + "epoch": 3.24, + "learning_rate": 9.268415029862211e-06, + "loss": 0.3469, + "step": 69490 + }, + { + "epoch": 3.24, + "learning_rate": 9.267631244807425e-06, + "loss": 0.3338, + "step": 69495 + }, + { + "epoch": 3.24, + "learning_rate": 9.266847459752637e-06, + "loss": 0.0401, + "step": 69500 + }, + { + "epoch": 3.24, + "learning_rate": 9.266063674697851e-06, + "loss": 0.072, + "step": 69505 + }, + { + "epoch": 3.24, + "learning_rate": 9.265279889643065e-06, + "loss": 0.0282, + "step": 69510 + }, + { + "epoch": 3.24, + "learning_rate": 9.264496104588279e-06, + "loss": 0.0697, + "step": 69515 + }, + { + "epoch": 3.24, + "learning_rate": 9.263712319533493e-06, + "loss": 0.1149, + "step": 69520 + }, + { + "epoch": 3.24, + "learning_rate": 9.262928534478705e-06, + "loss": 0.0651, + "step": 69525 + }, + { + "epoch": 3.24, + "learning_rate": 9.262144749423919e-06, + "loss": 0.0796, + "step": 69530 + }, + { + "epoch": 3.24, + "learning_rate": 9.261360964369133e-06, + "loss": 0.1888, + "step": 69535 + }, + { + "epoch": 3.24, + "learning_rate": 9.260577179314345e-06, + "loss": 0.1374, + "step": 69540 + }, + { + "epoch": 3.25, + "learning_rate": 9.259793394259559e-06, + "loss": 0.2406, + "step": 69545 + }, + { + "epoch": 3.25, + "learning_rate": 9.259009609204771e-06, + "loss": 0.088, + "step": 69550 + }, + { + "epoch": 3.25, + "learning_rate": 9.258225824149985e-06, + "loss": 0.1102, + "step": 69555 + }, + { + "epoch": 3.25, + "learning_rate": 9.257442039095199e-06, + "loss": 0.014, + "step": 69560 + }, + { + "epoch": 3.25, + "learning_rate": 9.256658254040413e-06, + "loss": 0.0231, + "step": 69565 + }, + { + "epoch": 3.25, + "learning_rate": 9.255874468985627e-06, + "loss": 0.0708, + "step": 69570 + }, + { + "epoch": 3.25, + "learning_rate": 9.255090683930839e-06, + "loss": 0.0572, + "step": 69575 + }, + { + "epoch": 3.25, + "learning_rate": 9.254306898876053e-06, + "loss": 0.0775, + "step": 69580 + }, + { + "epoch": 3.25, + "learning_rate": 9.253523113821267e-06, + "loss": 0.1815, + "step": 69585 + }, + { + "epoch": 3.25, + "learning_rate": 9.25273932876648e-06, + "loss": 0.2664, + "step": 69590 + }, + { + "epoch": 3.25, + "learning_rate": 9.251955543711693e-06, + "loss": 0.308, + "step": 69595 + }, + { + "epoch": 3.25, + "learning_rate": 9.251171758656907e-06, + "loss": 0.1139, + "step": 69600 + }, + { + "epoch": 3.25, + "learning_rate": 9.25038797360212e-06, + "loss": 0.0147, + "step": 69605 + }, + { + "epoch": 3.25, + "learning_rate": 9.249604188547333e-06, + "loss": 0.0446, + "step": 69610 + }, + { + "epoch": 3.25, + "learning_rate": 9.248820403492547e-06, + "loss": 0.0582, + "step": 69615 + }, + { + "epoch": 3.25, + "learning_rate": 9.248036618437761e-06, + "loss": 0.0645, + "step": 69620 + }, + { + "epoch": 3.25, + "learning_rate": 9.247252833382973e-06, + "loss": 0.103, + "step": 69625 + }, + { + "epoch": 3.25, + "learning_rate": 9.246469048328187e-06, + "loss": 0.0929, + "step": 69630 + }, + { + "epoch": 3.25, + "learning_rate": 9.245685263273401e-06, + "loss": 0.104, + "step": 69635 + }, + { + "epoch": 3.25, + "learning_rate": 9.244901478218615e-06, + "loss": 0.2659, + "step": 69640 + }, + { + "epoch": 3.25, + "learning_rate": 9.244117693163827e-06, + "loss": 0.2342, + "step": 69645 + }, + { + "epoch": 3.25, + "learning_rate": 9.243333908109041e-06, + "loss": 0.6174, + "step": 69650 + }, + { + "epoch": 3.25, + "learning_rate": 9.242550123054255e-06, + "loss": 0.0114, + "step": 69655 + }, + { + "epoch": 3.25, + "learning_rate": 9.241766337999469e-06, + "loss": 0.0766, + "step": 69660 + }, + { + "epoch": 3.25, + "learning_rate": 9.240982552944681e-06, + "loss": 0.0467, + "step": 69665 + }, + { + "epoch": 3.25, + "learning_rate": 9.240198767889895e-06, + "loss": 0.0568, + "step": 69670 + }, + { + "epoch": 3.25, + "learning_rate": 9.239414982835107e-06, + "loss": 0.1458, + "step": 69675 + }, + { + "epoch": 3.25, + "learning_rate": 9.238631197780321e-06, + "loss": 0.2081, + "step": 69680 + }, + { + "epoch": 3.25, + "learning_rate": 9.237847412725535e-06, + "loss": 0.2514, + "step": 69685 + }, + { + "epoch": 3.25, + "learning_rate": 9.237063627670749e-06, + "loss": 0.1977, + "step": 69690 + }, + { + "epoch": 3.25, + "learning_rate": 9.236279842615961e-06, + "loss": 0.2282, + "step": 69695 + }, + { + "epoch": 3.25, + "learning_rate": 9.235496057561175e-06, + "loss": 0.0591, + "step": 69700 + }, + { + "epoch": 3.25, + "learning_rate": 9.234712272506389e-06, + "loss": 0.0384, + "step": 69705 + }, + { + "epoch": 3.25, + "learning_rate": 9.233928487451603e-06, + "loss": 0.0815, + "step": 69710 + }, + { + "epoch": 3.25, + "learning_rate": 9.233144702396815e-06, + "loss": 0.0774, + "step": 69715 + }, + { + "epoch": 3.25, + "learning_rate": 9.232360917342029e-06, + "loss": 0.0509, + "step": 69720 + }, + { + "epoch": 3.25, + "learning_rate": 9.231577132287243e-06, + "loss": 0.0877, + "step": 69725 + }, + { + "epoch": 3.25, + "learning_rate": 9.230793347232457e-06, + "loss": 0.1008, + "step": 69730 + }, + { + "epoch": 3.25, + "learning_rate": 9.230009562177669e-06, + "loss": 0.126, + "step": 69735 + }, + { + "epoch": 3.25, + "learning_rate": 9.229225777122883e-06, + "loss": 0.1484, + "step": 69740 + }, + { + "epoch": 3.25, + "learning_rate": 9.228441992068095e-06, + "loss": 0.2838, + "step": 69745 + }, + { + "epoch": 3.25, + "learning_rate": 9.227658207013309e-06, + "loss": 0.1356, + "step": 69750 + }, + { + "epoch": 3.25, + "learning_rate": 9.226874421958523e-06, + "loss": 0.0381, + "step": 69755 + }, + { + "epoch": 3.26, + "learning_rate": 9.226090636903737e-06, + "loss": 0.0354, + "step": 69760 + }, + { + "epoch": 3.26, + "learning_rate": 9.225306851848949e-06, + "loss": 0.0649, + "step": 69765 + }, + { + "epoch": 3.26, + "learning_rate": 9.224523066794163e-06, + "loss": 0.0909, + "step": 69770 + }, + { + "epoch": 3.26, + "learning_rate": 9.223739281739377e-06, + "loss": 0.0969, + "step": 69775 + }, + { + "epoch": 3.26, + "learning_rate": 9.22295549668459e-06, + "loss": 0.1227, + "step": 69780 + }, + { + "epoch": 3.26, + "learning_rate": 9.222171711629805e-06, + "loss": 0.1145, + "step": 69785 + }, + { + "epoch": 3.26, + "learning_rate": 9.221387926575017e-06, + "loss": 0.2365, + "step": 69790 + }, + { + "epoch": 3.26, + "learning_rate": 9.22060414152023e-06, + "loss": 0.2366, + "step": 69795 + }, + { + "epoch": 3.26, + "learning_rate": 9.219820356465443e-06, + "loss": 0.1243, + "step": 69800 + }, + { + "epoch": 3.26, + "learning_rate": 9.219036571410657e-06, + "loss": 0.0516, + "step": 69805 + }, + { + "epoch": 3.26, + "learning_rate": 9.21825278635587e-06, + "loss": 0.0249, + "step": 69810 + }, + { + "epoch": 3.26, + "learning_rate": 9.217469001301083e-06, + "loss": 0.0937, + "step": 69815 + }, + { + "epoch": 3.26, + "learning_rate": 9.216685216246297e-06, + "loss": 0.1407, + "step": 69820 + }, + { + "epoch": 3.26, + "learning_rate": 9.21590143119151e-06, + "loss": 0.0835, + "step": 69825 + }, + { + "epoch": 3.26, + "learning_rate": 9.215117646136725e-06, + "loss": 0.104, + "step": 69830 + }, + { + "epoch": 3.26, + "learning_rate": 9.214333861081939e-06, + "loss": 0.2058, + "step": 69835 + }, + { + "epoch": 3.26, + "learning_rate": 9.21355007602715e-06, + "loss": 0.2023, + "step": 69840 + }, + { + "epoch": 3.26, + "learning_rate": 9.212766290972365e-06, + "loss": 0.3072, + "step": 69845 + }, + { + "epoch": 3.26, + "learning_rate": 9.211982505917579e-06, + "loss": 0.0897, + "step": 69850 + }, + { + "epoch": 3.26, + "learning_rate": 9.211198720862792e-06, + "loss": 0.036, + "step": 69855 + }, + { + "epoch": 3.26, + "learning_rate": 9.210414935808005e-06, + "loss": 0.0343, + "step": 69860 + }, + { + "epoch": 3.26, + "learning_rate": 9.209631150753217e-06, + "loss": 0.0693, + "step": 69865 + }, + { + "epoch": 3.26, + "learning_rate": 9.20884736569843e-06, + "loss": 0.0274, + "step": 69870 + }, + { + "epoch": 3.26, + "learning_rate": 9.208063580643645e-06, + "loss": 0.0446, + "step": 69875 + }, + { + "epoch": 3.26, + "learning_rate": 9.207279795588859e-06, + "loss": 0.1388, + "step": 69880 + }, + { + "epoch": 3.26, + "learning_rate": 9.206496010534072e-06, + "loss": 0.1066, + "step": 69885 + }, + { + "epoch": 3.26, + "learning_rate": 9.205712225479285e-06, + "loss": 0.1121, + "step": 69890 + }, + { + "epoch": 3.26, + "learning_rate": 9.204928440424499e-06, + "loss": 0.283, + "step": 69895 + }, + { + "epoch": 3.26, + "learning_rate": 9.204144655369713e-06, + "loss": 0.0839, + "step": 69900 + }, + { + "epoch": 3.26, + "learning_rate": 9.203360870314926e-06, + "loss": 0.053, + "step": 69905 + }, + { + "epoch": 3.26, + "learning_rate": 9.202577085260139e-06, + "loss": 0.0771, + "step": 69910 + }, + { + "epoch": 3.26, + "learning_rate": 9.201793300205353e-06, + "loss": 0.1367, + "step": 69915 + }, + { + "epoch": 3.26, + "learning_rate": 9.201009515150566e-06, + "loss": 0.0701, + "step": 69920 + }, + { + "epoch": 3.26, + "learning_rate": 9.20022573009578e-06, + "loss": 0.0521, + "step": 69925 + }, + { + "epoch": 3.26, + "learning_rate": 9.199441945040993e-06, + "loss": 0.1232, + "step": 69930 + }, + { + "epoch": 3.26, + "learning_rate": 9.198658159986206e-06, + "loss": 0.0843, + "step": 69935 + }, + { + "epoch": 3.26, + "learning_rate": 9.197874374931419e-06, + "loss": 0.2965, + "step": 69940 + }, + { + "epoch": 3.26, + "learning_rate": 9.197090589876633e-06, + "loss": 0.277, + "step": 69945 + }, + { + "epoch": 3.26, + "learning_rate": 9.196306804821846e-06, + "loss": 0.0945, + "step": 69950 + }, + { + "epoch": 3.26, + "learning_rate": 9.19552301976706e-06, + "loss": 0.0235, + "step": 69955 + }, + { + "epoch": 3.26, + "learning_rate": 9.194739234712273e-06, + "loss": 0.0263, + "step": 69960 + }, + { + "epoch": 3.26, + "learning_rate": 9.193955449657487e-06, + "loss": 0.0488, + "step": 69965 + }, + { + "epoch": 3.26, + "learning_rate": 9.1931716646027e-06, + "loss": 0.0679, + "step": 69970 + }, + { + "epoch": 3.27, + "learning_rate": 9.192387879547914e-06, + "loss": 0.1051, + "step": 69975 + }, + { + "epoch": 3.27, + "learning_rate": 9.191604094493127e-06, + "loss": 0.1029, + "step": 69980 + }, + { + "epoch": 3.27, + "learning_rate": 9.19082030943834e-06, + "loss": 0.1088, + "step": 69985 + }, + { + "epoch": 3.27, + "learning_rate": 9.190036524383554e-06, + "loss": 0.1562, + "step": 69990 + }, + { + "epoch": 3.27, + "learning_rate": 9.189252739328767e-06, + "loss": 0.2521, + "step": 69995 + }, + { + "epoch": 3.27, + "learning_rate": 9.18846895427398e-06, + "loss": 0.0542, + "step": 70000 + }, + { + "epoch": 3.27, + "learning_rate": 9.187685169219194e-06, + "loss": 0.0405, + "step": 70005 + }, + { + "epoch": 3.27, + "learning_rate": 9.186901384164407e-06, + "loss": 0.0403, + "step": 70010 + }, + { + "epoch": 3.27, + "learning_rate": 9.18611759910962e-06, + "loss": 0.0638, + "step": 70015 + }, + { + "epoch": 3.27, + "learning_rate": 9.185333814054834e-06, + "loss": 0.1057, + "step": 70020 + }, + { + "epoch": 3.27, + "learning_rate": 9.184550029000048e-06, + "loss": 0.0915, + "step": 70025 + }, + { + "epoch": 3.27, + "learning_rate": 9.18376624394526e-06, + "loss": 0.0756, + "step": 70030 + }, + { + "epoch": 3.27, + "learning_rate": 9.182982458890474e-06, + "loss": 0.1864, + "step": 70035 + }, + { + "epoch": 3.27, + "learning_rate": 9.182198673835688e-06, + "loss": 0.1794, + "step": 70040 + }, + { + "epoch": 3.27, + "learning_rate": 9.181414888780902e-06, + "loss": 0.3143, + "step": 70045 + }, + { + "epoch": 3.27, + "learning_rate": 9.180631103726116e-06, + "loss": 0.0461, + "step": 70050 + }, + { + "epoch": 3.27, + "learning_rate": 9.179847318671328e-06, + "loss": 0.0468, + "step": 70055 + }, + { + "epoch": 3.27, + "learning_rate": 9.17906353361654e-06, + "loss": 0.058, + "step": 70060 + }, + { + "epoch": 3.27, + "learning_rate": 9.178279748561754e-06, + "loss": 0.0569, + "step": 70065 + }, + { + "epoch": 3.27, + "learning_rate": 9.177495963506968e-06, + "loss": 0.0887, + "step": 70070 + }, + { + "epoch": 3.27, + "learning_rate": 9.176712178452182e-06, + "loss": 0.0843, + "step": 70075 + }, + { + "epoch": 3.27, + "learning_rate": 9.175928393397394e-06, + "loss": 0.1165, + "step": 70080 + }, + { + "epoch": 3.27, + "learning_rate": 9.175144608342608e-06, + "loss": 0.1447, + "step": 70085 + }, + { + "epoch": 3.27, + "learning_rate": 9.174360823287822e-06, + "loss": 0.1615, + "step": 70090 + }, + { + "epoch": 3.27, + "learning_rate": 9.173577038233036e-06, + "loss": 0.2179, + "step": 70095 + }, + { + "epoch": 3.27, + "learning_rate": 9.17279325317825e-06, + "loss": 0.0924, + "step": 70100 + }, + { + "epoch": 3.27, + "learning_rate": 9.172009468123462e-06, + "loss": 0.0574, + "step": 70105 + }, + { + "epoch": 3.27, + "learning_rate": 9.171225683068676e-06, + "loss": 0.0505, + "step": 70110 + }, + { + "epoch": 3.27, + "learning_rate": 9.17044189801389e-06, + "loss": 0.0461, + "step": 70115 + }, + { + "epoch": 3.27, + "learning_rate": 9.169658112959104e-06, + "loss": 0.1141, + "step": 70120 + }, + { + "epoch": 3.27, + "learning_rate": 9.168874327904316e-06, + "loss": 0.1524, + "step": 70125 + }, + { + "epoch": 3.27, + "learning_rate": 9.168090542849528e-06, + "loss": 0.0598, + "step": 70130 + }, + { + "epoch": 3.27, + "learning_rate": 9.167306757794742e-06, + "loss": 0.1248, + "step": 70135 + }, + { + "epoch": 3.27, + "learning_rate": 9.166522972739956e-06, + "loss": 0.1644, + "step": 70140 + }, + { + "epoch": 3.27, + "learning_rate": 9.16573918768517e-06, + "loss": 0.1953, + "step": 70145 + }, + { + "epoch": 3.27, + "learning_rate": 9.164955402630384e-06, + "loss": 0.0394, + "step": 70150 + }, + { + "epoch": 3.27, + "learning_rate": 9.164171617575596e-06, + "loss": 0.0543, + "step": 70155 + }, + { + "epoch": 3.27, + "learning_rate": 9.16338783252081e-06, + "loss": 0.0456, + "step": 70160 + }, + { + "epoch": 3.27, + "learning_rate": 9.162604047466024e-06, + "loss": 0.0717, + "step": 70165 + }, + { + "epoch": 3.27, + "learning_rate": 9.161820262411238e-06, + "loss": 0.1553, + "step": 70170 + }, + { + "epoch": 3.27, + "learning_rate": 9.16103647735645e-06, + "loss": 0.0369, + "step": 70175 + }, + { + "epoch": 3.27, + "learning_rate": 9.160252692301664e-06, + "loss": 0.11, + "step": 70180 + }, + { + "epoch": 3.27, + "learning_rate": 9.159468907246878e-06, + "loss": 0.1583, + "step": 70185 + }, + { + "epoch": 3.28, + "learning_rate": 9.15868512219209e-06, + "loss": 0.2984, + "step": 70190 + }, + { + "epoch": 3.28, + "learning_rate": 9.157901337137304e-06, + "loss": 0.2892, + "step": 70195 + }, + { + "epoch": 3.28, + "learning_rate": 9.157117552082518e-06, + "loss": 0.0816, + "step": 70200 + }, + { + "epoch": 3.28, + "learning_rate": 9.15633376702773e-06, + "loss": 0.0477, + "step": 70205 + }, + { + "epoch": 3.28, + "learning_rate": 9.155549981972944e-06, + "loss": 0.0191, + "step": 70210 + }, + { + "epoch": 3.28, + "learning_rate": 9.154766196918158e-06, + "loss": 0.058, + "step": 70215 + }, + { + "epoch": 3.28, + "learning_rate": 9.153982411863372e-06, + "loss": 0.0377, + "step": 70220 + }, + { + "epoch": 3.28, + "learning_rate": 9.153198626808584e-06, + "loss": 0.1391, + "step": 70225 + }, + { + "epoch": 3.28, + "learning_rate": 9.152414841753798e-06, + "loss": 0.1003, + "step": 70230 + }, + { + "epoch": 3.28, + "learning_rate": 9.151631056699012e-06, + "loss": 0.1134, + "step": 70235 + }, + { + "epoch": 3.28, + "learning_rate": 9.150847271644226e-06, + "loss": 0.2391, + "step": 70240 + }, + { + "epoch": 3.28, + "learning_rate": 9.150063486589438e-06, + "loss": 0.1599, + "step": 70245 + }, + { + "epoch": 3.28, + "learning_rate": 9.149279701534652e-06, + "loss": 0.0952, + "step": 70250 + }, + { + "epoch": 3.28, + "learning_rate": 9.148495916479864e-06, + "loss": 0.0636, + "step": 70255 + }, + { + "epoch": 3.28, + "learning_rate": 9.147712131425078e-06, + "loss": 0.0402, + "step": 70260 + }, + { + "epoch": 3.28, + "learning_rate": 9.146928346370292e-06, + "loss": 0.0885, + "step": 70265 + }, + { + "epoch": 3.28, + "learning_rate": 9.146144561315506e-06, + "loss": 0.1678, + "step": 70270 + }, + { + "epoch": 3.28, + "learning_rate": 9.145360776260718e-06, + "loss": 0.0763, + "step": 70275 + }, + { + "epoch": 3.28, + "learning_rate": 9.144576991205932e-06, + "loss": 0.0798, + "step": 70280 + }, + { + "epoch": 3.28, + "learning_rate": 9.143793206151146e-06, + "loss": 0.1155, + "step": 70285 + }, + { + "epoch": 3.28, + "learning_rate": 9.14300942109636e-06, + "loss": 0.2216, + "step": 70290 + }, + { + "epoch": 3.28, + "learning_rate": 9.142225636041572e-06, + "loss": 0.288, + "step": 70295 + }, + { + "epoch": 3.28, + "learning_rate": 9.141441850986786e-06, + "loss": 0.0696, + "step": 70300 + }, + { + "epoch": 3.28, + "learning_rate": 9.140658065932e-06, + "loss": 0.0834, + "step": 70305 + }, + { + "epoch": 3.28, + "learning_rate": 9.139874280877214e-06, + "loss": 0.0411, + "step": 70310 + }, + { + "epoch": 3.28, + "learning_rate": 9.139090495822428e-06, + "loss": 0.0357, + "step": 70315 + }, + { + "epoch": 3.28, + "learning_rate": 9.13830671076764e-06, + "loss": 0.1072, + "step": 70320 + }, + { + "epoch": 3.28, + "learning_rate": 9.137522925712852e-06, + "loss": 0.0559, + "step": 70325 + }, + { + "epoch": 3.28, + "learning_rate": 9.136739140658066e-06, + "loss": 0.1139, + "step": 70330 + }, + { + "epoch": 3.28, + "learning_rate": 9.13595535560328e-06, + "loss": 0.1853, + "step": 70335 + }, + { + "epoch": 3.28, + "learning_rate": 9.135171570548494e-06, + "loss": 0.1746, + "step": 70340 + }, + { + "epoch": 3.28, + "learning_rate": 9.134387785493706e-06, + "loss": 0.2631, + "step": 70345 + }, + { + "epoch": 3.28, + "learning_rate": 9.13360400043892e-06, + "loss": 0.0738, + "step": 70350 + }, + { + "epoch": 3.28, + "learning_rate": 9.132820215384134e-06, + "loss": 0.018, + "step": 70355 + }, + { + "epoch": 3.28, + "learning_rate": 9.132036430329348e-06, + "loss": 0.0448, + "step": 70360 + }, + { + "epoch": 3.28, + "learning_rate": 9.131252645274562e-06, + "loss": 0.063, + "step": 70365 + }, + { + "epoch": 3.28, + "learning_rate": 9.130468860219774e-06, + "loss": 0.0829, + "step": 70370 + }, + { + "epoch": 3.28, + "learning_rate": 9.129685075164988e-06, + "loss": 0.0777, + "step": 70375 + }, + { + "epoch": 3.28, + "learning_rate": 9.128901290110202e-06, + "loss": 0.0711, + "step": 70380 + }, + { + "epoch": 3.28, + "learning_rate": 9.128117505055414e-06, + "loss": 0.1215, + "step": 70385 + }, + { + "epoch": 3.28, + "learning_rate": 9.127333720000628e-06, + "loss": 0.171, + "step": 70390 + }, + { + "epoch": 3.28, + "learning_rate": 9.12654993494584e-06, + "loss": 0.3134, + "step": 70395 + }, + { + "epoch": 3.28, + "learning_rate": 9.125766149891054e-06, + "loss": 0.0421, + "step": 70400 + }, + { + "epoch": 3.29, + "learning_rate": 9.124982364836268e-06, + "loss": 0.0682, + "step": 70405 + }, + { + "epoch": 3.29, + "learning_rate": 9.124198579781482e-06, + "loss": 0.0628, + "step": 70410 + }, + { + "epoch": 3.29, + "learning_rate": 9.123571551737652e-06, + "loss": 0.0632, + "step": 70415 + }, + { + "epoch": 3.29, + "learning_rate": 9.122787766682866e-06, + "loss": 0.0502, + "step": 70420 + }, + { + "epoch": 3.29, + "learning_rate": 9.12200398162808e-06, + "loss": 0.0768, + "step": 70425 + }, + { + "epoch": 3.29, + "learning_rate": 9.121220196573292e-06, + "loss": 0.114, + "step": 70430 + }, + { + "epoch": 3.29, + "learning_rate": 9.120436411518506e-06, + "loss": 0.2299, + "step": 70435 + }, + { + "epoch": 3.29, + "learning_rate": 9.11965262646372e-06, + "loss": 0.1957, + "step": 70440 + }, + { + "epoch": 3.29, + "learning_rate": 9.118868841408934e-06, + "loss": 0.3038, + "step": 70445 + }, + { + "epoch": 3.29, + "learning_rate": 9.118085056354146e-06, + "loss": 0.0454, + "step": 70450 + }, + { + "epoch": 3.29, + "learning_rate": 9.117301271299358e-06, + "loss": 0.0615, + "step": 70455 + }, + { + "epoch": 3.29, + "learning_rate": 9.116517486244572e-06, + "loss": 0.0582, + "step": 70460 + }, + { + "epoch": 3.29, + "learning_rate": 9.115733701189786e-06, + "loss": 0.113, + "step": 70465 + }, + { + "epoch": 3.29, + "learning_rate": 9.114949916135e-06, + "loss": 0.0629, + "step": 70470 + }, + { + "epoch": 3.29, + "learning_rate": 9.114166131080214e-06, + "loss": 0.0821, + "step": 70475 + }, + { + "epoch": 3.29, + "learning_rate": 9.113382346025426e-06, + "loss": 0.1246, + "step": 70480 + }, + { + "epoch": 3.29, + "learning_rate": 9.11259856097064e-06, + "loss": 0.199, + "step": 70485 + }, + { + "epoch": 3.29, + "learning_rate": 9.111814775915854e-06, + "loss": 0.2589, + "step": 70490 + }, + { + "epoch": 3.29, + "learning_rate": 9.111030990861068e-06, + "loss": 0.2966, + "step": 70495 + }, + { + "epoch": 3.29, + "learning_rate": 9.11024720580628e-06, + "loss": 0.0677, + "step": 70500 + }, + { + "epoch": 3.29, + "learning_rate": 9.109463420751494e-06, + "loss": 0.0092, + "step": 70505 + }, + { + "epoch": 3.29, + "learning_rate": 9.108679635696708e-06, + "loss": 0.0471, + "step": 70510 + }, + { + "epoch": 3.29, + "learning_rate": 9.10789585064192e-06, + "loss": 0.0891, + "step": 70515 + }, + { + "epoch": 3.29, + "learning_rate": 9.107112065587134e-06, + "loss": 0.1139, + "step": 70520 + }, + { + "epoch": 3.29, + "learning_rate": 9.106328280532348e-06, + "loss": 0.0479, + "step": 70525 + }, + { + "epoch": 3.29, + "learning_rate": 9.10554449547756e-06, + "loss": 0.0951, + "step": 70530 + }, + { + "epoch": 3.29, + "learning_rate": 9.104760710422774e-06, + "loss": 0.1251, + "step": 70535 + }, + { + "epoch": 3.29, + "learning_rate": 9.103976925367988e-06, + "loss": 0.1632, + "step": 70540 + }, + { + "epoch": 3.29, + "learning_rate": 9.103193140313202e-06, + "loss": 0.3112, + "step": 70545 + }, + { + "epoch": 3.29, + "learning_rate": 9.102409355258414e-06, + "loss": 0.0694, + "step": 70550 + }, + { + "epoch": 3.29, + "learning_rate": 9.101625570203628e-06, + "loss": 0.0419, + "step": 70555 + }, + { + "epoch": 3.29, + "learning_rate": 9.100841785148842e-06, + "loss": 0.0534, + "step": 70560 + }, + { + "epoch": 3.29, + "learning_rate": 9.100058000094056e-06, + "loss": 0.0536, + "step": 70565 + }, + { + "epoch": 3.29, + "learning_rate": 9.099274215039268e-06, + "loss": 0.0873, + "step": 70570 + }, + { + "epoch": 3.29, + "learning_rate": 9.098490429984482e-06, + "loss": 0.1011, + "step": 70575 + }, + { + "epoch": 3.29, + "learning_rate": 9.097706644929694e-06, + "loss": 0.1586, + "step": 70580 + }, + { + "epoch": 3.29, + "learning_rate": 9.096922859874908e-06, + "loss": 0.1024, + "step": 70585 + }, + { + "epoch": 3.29, + "learning_rate": 9.096139074820122e-06, + "loss": 0.1611, + "step": 70590 + }, + { + "epoch": 3.29, + "learning_rate": 9.095355289765336e-06, + "loss": 0.3784, + "step": 70595 + }, + { + "epoch": 3.29, + "learning_rate": 9.094571504710548e-06, + "loss": 0.1113, + "step": 70600 + }, + { + "epoch": 3.29, + "learning_rate": 9.093787719655762e-06, + "loss": 0.0555, + "step": 70605 + }, + { + "epoch": 3.29, + "learning_rate": 9.093003934600976e-06, + "loss": 0.0418, + "step": 70610 + }, + { + "epoch": 3.29, + "learning_rate": 9.09222014954619e-06, + "loss": 0.0386, + "step": 70615 + }, + { + "epoch": 3.3, + "learning_rate": 9.091436364491402e-06, + "loss": 0.076, + "step": 70620 + }, + { + "epoch": 3.3, + "learning_rate": 9.090652579436616e-06, + "loss": 0.0295, + "step": 70625 + }, + { + "epoch": 3.3, + "learning_rate": 9.08986879438183e-06, + "loss": 0.1095, + "step": 70630 + }, + { + "epoch": 3.3, + "learning_rate": 9.089085009327044e-06, + "loss": 0.1026, + "step": 70635 + }, + { + "epoch": 3.3, + "learning_rate": 9.088301224272258e-06, + "loss": 0.2221, + "step": 70640 + }, + { + "epoch": 3.3, + "learning_rate": 9.08751743921747e-06, + "loss": 0.2452, + "step": 70645 + }, + { + "epoch": 3.3, + "learning_rate": 9.086733654162682e-06, + "loss": 0.0779, + "step": 70650 + }, + { + "epoch": 3.3, + "learning_rate": 9.085949869107896e-06, + "loss": 0.0415, + "step": 70655 + }, + { + "epoch": 3.3, + "learning_rate": 9.08516608405311e-06, + "loss": 0.065, + "step": 70660 + }, + { + "epoch": 3.3, + "learning_rate": 9.084382298998324e-06, + "loss": 0.1131, + "step": 70665 + }, + { + "epoch": 3.3, + "learning_rate": 9.083598513943536e-06, + "loss": 0.0668, + "step": 70670 + }, + { + "epoch": 3.3, + "learning_rate": 9.08281472888875e-06, + "loss": 0.0669, + "step": 70675 + }, + { + "epoch": 3.3, + "learning_rate": 9.082030943833964e-06, + "loss": 0.1014, + "step": 70680 + }, + { + "epoch": 3.3, + "learning_rate": 9.081247158779178e-06, + "loss": 0.0679, + "step": 70685 + }, + { + "epoch": 3.3, + "learning_rate": 9.080463373724392e-06, + "loss": 0.1369, + "step": 70690 + }, + { + "epoch": 3.3, + "learning_rate": 9.079679588669604e-06, + "loss": 0.3386, + "step": 70695 + }, + { + "epoch": 3.3, + "learning_rate": 9.078895803614818e-06, + "loss": 0.097, + "step": 70700 + }, + { + "epoch": 3.3, + "learning_rate": 9.078112018560032e-06, + "loss": 0.0637, + "step": 70705 + }, + { + "epoch": 3.3, + "learning_rate": 9.077328233505244e-06, + "loss": 0.0306, + "step": 70710 + }, + { + "epoch": 3.3, + "learning_rate": 9.076544448450458e-06, + "loss": 0.0302, + "step": 70715 + }, + { + "epoch": 3.3, + "learning_rate": 9.07576066339567e-06, + "loss": 0.0902, + "step": 70720 + }, + { + "epoch": 3.3, + "learning_rate": 9.074976878340884e-06, + "loss": 0.1711, + "step": 70725 + }, + { + "epoch": 3.3, + "learning_rate": 9.074193093286098e-06, + "loss": 0.0799, + "step": 70730 + }, + { + "epoch": 3.3, + "learning_rate": 9.073409308231312e-06, + "loss": 0.0986, + "step": 70735 + }, + { + "epoch": 3.3, + "learning_rate": 9.072625523176526e-06, + "loss": 0.2437, + "step": 70740 + }, + { + "epoch": 3.3, + "learning_rate": 9.071841738121738e-06, + "loss": 0.3259, + "step": 70745 + }, + { + "epoch": 3.3, + "learning_rate": 9.071057953066952e-06, + "loss": 0.0754, + "step": 70750 + }, + { + "epoch": 3.3, + "learning_rate": 9.070274168012166e-06, + "loss": 0.0394, + "step": 70755 + }, + { + "epoch": 3.3, + "learning_rate": 9.06949038295738e-06, + "loss": 0.0298, + "step": 70760 + }, + { + "epoch": 3.3, + "learning_rate": 9.068706597902592e-06, + "loss": 0.0491, + "step": 70765 + }, + { + "epoch": 3.3, + "learning_rate": 9.067922812847806e-06, + "loss": 0.0504, + "step": 70770 + }, + { + "epoch": 3.3, + "learning_rate": 9.067139027793018e-06, + "loss": 0.0863, + "step": 70775 + }, + { + "epoch": 3.3, + "learning_rate": 9.066355242738232e-06, + "loss": 0.133, + "step": 70780 + }, + { + "epoch": 3.3, + "learning_rate": 9.065571457683446e-06, + "loss": 0.1856, + "step": 70785 + }, + { + "epoch": 3.3, + "learning_rate": 9.06478767262866e-06, + "loss": 0.33, + "step": 70790 + }, + { + "epoch": 3.3, + "learning_rate": 9.064003887573872e-06, + "loss": 0.293, + "step": 70795 + }, + { + "epoch": 3.3, + "learning_rate": 9.063220102519086e-06, + "loss": 0.0919, + "step": 70800 + }, + { + "epoch": 3.3, + "learning_rate": 9.0624363174643e-06, + "loss": 0.0686, + "step": 70805 + }, + { + "epoch": 3.3, + "learning_rate": 9.061652532409513e-06, + "loss": 0.0482, + "step": 70810 + }, + { + "epoch": 3.3, + "learning_rate": 9.060868747354726e-06, + "loss": 0.0436, + "step": 70815 + }, + { + "epoch": 3.3, + "learning_rate": 9.06008496229994e-06, + "loss": 0.1321, + "step": 70820 + }, + { + "epoch": 3.3, + "learning_rate": 9.059301177245154e-06, + "loss": 0.0897, + "step": 70825 + }, + { + "epoch": 3.31, + "learning_rate": 9.058517392190367e-06, + "loss": 0.1366, + "step": 70830 + }, + { + "epoch": 3.31, + "learning_rate": 9.05773360713558e-06, + "loss": 0.0788, + "step": 70835 + }, + { + "epoch": 3.31, + "learning_rate": 9.056949822080794e-06, + "loss": 0.1021, + "step": 70840 + }, + { + "epoch": 3.31, + "learning_rate": 9.056166037026006e-06, + "loss": 0.4001, + "step": 70845 + }, + { + "epoch": 3.31, + "learning_rate": 9.05538225197122e-06, + "loss": 0.0607, + "step": 70850 + }, + { + "epoch": 3.31, + "learning_rate": 9.054598466916434e-06, + "loss": 0.0377, + "step": 70855 + }, + { + "epoch": 3.31, + "learning_rate": 9.053814681861647e-06, + "loss": 0.0581, + "step": 70860 + }, + { + "epoch": 3.31, + "learning_rate": 9.05303089680686e-06, + "loss": 0.0553, + "step": 70865 + }, + { + "epoch": 3.31, + "learning_rate": 9.052247111752074e-06, + "loss": 0.0653, + "step": 70870 + }, + { + "epoch": 3.31, + "learning_rate": 9.051463326697287e-06, + "loss": 0.17, + "step": 70875 + }, + { + "epoch": 3.31, + "learning_rate": 9.050679541642501e-06, + "loss": 0.1759, + "step": 70880 + }, + { + "epoch": 3.31, + "learning_rate": 9.049895756587714e-06, + "loss": 0.0962, + "step": 70885 + }, + { + "epoch": 3.31, + "learning_rate": 9.049111971532928e-06, + "loss": 0.1679, + "step": 70890 + }, + { + "epoch": 3.31, + "learning_rate": 9.048328186478141e-06, + "loss": 0.3484, + "step": 70895 + }, + { + "epoch": 3.31, + "learning_rate": 9.047544401423355e-06, + "loss": 0.0895, + "step": 70900 + }, + { + "epoch": 3.31, + "learning_rate": 9.046760616368568e-06, + "loss": 0.0192, + "step": 70905 + }, + { + "epoch": 3.31, + "learning_rate": 9.045976831313781e-06, + "loss": 0.0598, + "step": 70910 + }, + { + "epoch": 3.31, + "learning_rate": 9.045193046258994e-06, + "loss": 0.0646, + "step": 70915 + }, + { + "epoch": 3.31, + "learning_rate": 9.044409261204208e-06, + "loss": 0.125, + "step": 70920 + }, + { + "epoch": 3.31, + "learning_rate": 9.043625476149421e-06, + "loss": 0.0955, + "step": 70925 + }, + { + "epoch": 3.31, + "learning_rate": 9.042841691094635e-06, + "loss": 0.0751, + "step": 70930 + }, + { + "epoch": 3.31, + "learning_rate": 9.042057906039848e-06, + "loss": 0.0934, + "step": 70935 + }, + { + "epoch": 3.31, + "learning_rate": 9.041274120985061e-06, + "loss": 0.3588, + "step": 70940 + }, + { + "epoch": 3.31, + "learning_rate": 9.040490335930275e-06, + "loss": 0.4088, + "step": 70945 + }, + { + "epoch": 3.31, + "learning_rate": 9.03970655087549e-06, + "loss": 0.096, + "step": 70950 + }, + { + "epoch": 3.31, + "learning_rate": 9.038922765820703e-06, + "loss": 0.039, + "step": 70955 + }, + { + "epoch": 3.31, + "learning_rate": 9.038138980765915e-06, + "loss": 0.0466, + "step": 70960 + }, + { + "epoch": 3.31, + "learning_rate": 9.03735519571113e-06, + "loss": 0.056, + "step": 70965 + }, + { + "epoch": 3.31, + "learning_rate": 9.036571410656342e-06, + "loss": 0.0959, + "step": 70970 + }, + { + "epoch": 3.31, + "learning_rate": 9.035787625601555e-06, + "loss": 0.0983, + "step": 70975 + }, + { + "epoch": 3.31, + "learning_rate": 9.03500384054677e-06, + "loss": 0.0763, + "step": 70980 + }, + { + "epoch": 3.31, + "learning_rate": 9.034220055491982e-06, + "loss": 0.1379, + "step": 70985 + }, + { + "epoch": 3.31, + "learning_rate": 9.033436270437195e-06, + "loss": 0.1648, + "step": 70990 + }, + { + "epoch": 3.31, + "learning_rate": 9.03265248538241e-06, + "loss": 0.3926, + "step": 70995 + }, + { + "epoch": 3.31, + "learning_rate": 9.031868700327623e-06, + "loss": 0.0747, + "step": 71000 + }, + { + "epoch": 3.31, + "learning_rate": 9.031084915272837e-06, + "loss": 0.0096, + "step": 71005 + }, + { + "epoch": 3.31, + "learning_rate": 9.03030113021805e-06, + "loss": 0.0439, + "step": 71010 + }, + { + "epoch": 3.31, + "learning_rate": 9.029517345163263e-06, + "loss": 0.0545, + "step": 71015 + }, + { + "epoch": 3.31, + "learning_rate": 9.028733560108477e-06, + "loss": 0.0585, + "step": 71020 + }, + { + "epoch": 3.31, + "learning_rate": 9.027949775053691e-06, + "loss": 0.0719, + "step": 71025 + }, + { + "epoch": 3.31, + "learning_rate": 9.027165989998903e-06, + "loss": 0.0609, + "step": 71030 + }, + { + "epoch": 3.31, + "learning_rate": 9.026382204944116e-06, + "loss": 0.1347, + "step": 71035 + }, + { + "epoch": 3.31, + "learning_rate": 9.02559841988933e-06, + "loss": 0.2076, + "step": 71040 + }, + { + "epoch": 3.32, + "learning_rate": 9.024814634834543e-06, + "loss": 0.2844, + "step": 71045 + }, + { + "epoch": 3.32, + "learning_rate": 9.024030849779757e-06, + "loss": 0.0975, + "step": 71050 + }, + { + "epoch": 3.32, + "learning_rate": 9.023247064724971e-06, + "loss": 0.0456, + "step": 71055 + }, + { + "epoch": 3.32, + "learning_rate": 9.022463279670183e-06, + "loss": 0.0962, + "step": 71060 + }, + { + "epoch": 3.32, + "learning_rate": 9.021679494615397e-06, + "loss": 0.1066, + "step": 71065 + }, + { + "epoch": 3.32, + "learning_rate": 9.020895709560611e-06, + "loss": 0.0887, + "step": 71070 + }, + { + "epoch": 3.32, + "learning_rate": 9.020111924505825e-06, + "loss": 0.1813, + "step": 71075 + }, + { + "epoch": 3.32, + "learning_rate": 9.019328139451037e-06, + "loss": 0.1152, + "step": 71080 + }, + { + "epoch": 3.32, + "learning_rate": 9.018544354396251e-06, + "loss": 0.1858, + "step": 71085 + }, + { + "epoch": 3.32, + "learning_rate": 9.017760569341465e-06, + "loss": 0.2239, + "step": 71090 + }, + { + "epoch": 3.32, + "learning_rate": 9.016976784286679e-06, + "loss": 0.3627, + "step": 71095 + }, + { + "epoch": 3.32, + "learning_rate": 9.016192999231891e-06, + "loss": 0.0499, + "step": 71100 + }, + { + "epoch": 3.32, + "learning_rate": 9.015409214177105e-06, + "loss": 0.033, + "step": 71105 + }, + { + "epoch": 3.32, + "learning_rate": 9.014625429122317e-06, + "loss": 0.0734, + "step": 71110 + }, + { + "epoch": 3.32, + "learning_rate": 9.013841644067531e-06, + "loss": 0.0203, + "step": 71115 + }, + { + "epoch": 3.32, + "learning_rate": 9.013057859012745e-06, + "loss": 0.0896, + "step": 71120 + }, + { + "epoch": 3.32, + "learning_rate": 9.012274073957959e-06, + "loss": 0.0496, + "step": 71125 + }, + { + "epoch": 3.32, + "learning_rate": 9.011490288903171e-06, + "loss": 0.1073, + "step": 71130 + }, + { + "epoch": 3.32, + "learning_rate": 9.010706503848385e-06, + "loss": 0.2097, + "step": 71135 + }, + { + "epoch": 3.32, + "learning_rate": 9.009922718793599e-06, + "loss": 0.2172, + "step": 71140 + }, + { + "epoch": 3.32, + "learning_rate": 9.009138933738813e-06, + "loss": 0.3922, + "step": 71145 + }, + { + "epoch": 3.32, + "learning_rate": 9.008355148684025e-06, + "loss": 0.0941, + "step": 71150 + }, + { + "epoch": 3.32, + "learning_rate": 9.007571363629239e-06, + "loss": 0.0167, + "step": 71155 + }, + { + "epoch": 3.32, + "learning_rate": 9.006787578574453e-06, + "loss": 0.042, + "step": 71160 + }, + { + "epoch": 3.32, + "learning_rate": 9.006003793519665e-06, + "loss": 0.0617, + "step": 71165 + }, + { + "epoch": 3.32, + "learning_rate": 9.005220008464879e-06, + "loss": 0.0855, + "step": 71170 + }, + { + "epoch": 3.32, + "learning_rate": 9.004436223410093e-06, + "loss": 0.1239, + "step": 71175 + }, + { + "epoch": 3.32, + "learning_rate": 9.003652438355305e-06, + "loss": 0.0801, + "step": 71180 + }, + { + "epoch": 3.32, + "learning_rate": 9.00286865330052e-06, + "loss": 0.1824, + "step": 71185 + }, + { + "epoch": 3.32, + "learning_rate": 9.002084868245733e-06, + "loss": 0.1555, + "step": 71190 + }, + { + "epoch": 3.32, + "learning_rate": 9.001301083190947e-06, + "loss": 0.3987, + "step": 71195 + }, + { + "epoch": 3.32, + "learning_rate": 9.00051729813616e-06, + "loss": 0.0915, + "step": 71200 + }, + { + "epoch": 3.32, + "learning_rate": 8.999733513081373e-06, + "loss": 0.0749, + "step": 71205 + }, + { + "epoch": 3.32, + "learning_rate": 8.998949728026587e-06, + "loss": 0.0269, + "step": 71210 + }, + { + "epoch": 3.32, + "learning_rate": 8.998165942971801e-06, + "loss": 0.0165, + "step": 71215 + }, + { + "epoch": 3.32, + "learning_rate": 8.997382157917015e-06, + "loss": 0.0827, + "step": 71220 + }, + { + "epoch": 3.32, + "learning_rate": 8.996598372862227e-06, + "loss": 0.0354, + "step": 71225 + }, + { + "epoch": 3.32, + "learning_rate": 8.99581458780744e-06, + "loss": 0.1169, + "step": 71230 + }, + { + "epoch": 3.32, + "learning_rate": 8.995030802752653e-06, + "loss": 0.1483, + "step": 71235 + }, + { + "epoch": 3.32, + "learning_rate": 8.994247017697867e-06, + "loss": 0.1475, + "step": 71240 + }, + { + "epoch": 3.32, + "learning_rate": 8.993463232643081e-06, + "loss": 0.199, + "step": 71245 + }, + { + "epoch": 3.32, + "learning_rate": 8.992679447588293e-06, + "loss": 0.1115, + "step": 71250 + }, + { + "epoch": 3.32, + "learning_rate": 8.991895662533507e-06, + "loss": 0.0245, + "step": 71255 + }, + { + "epoch": 3.33, + "learning_rate": 8.991111877478721e-06, + "loss": 0.04, + "step": 71260 + }, + { + "epoch": 3.33, + "learning_rate": 8.990328092423935e-06, + "loss": 0.0279, + "step": 71265 + }, + { + "epoch": 3.33, + "learning_rate": 8.989544307369149e-06, + "loss": 0.0768, + "step": 71270 + }, + { + "epoch": 3.33, + "learning_rate": 8.988760522314361e-06, + "loss": 0.079, + "step": 71275 + }, + { + "epoch": 3.33, + "learning_rate": 8.987976737259575e-06, + "loss": 0.1345, + "step": 71280 + }, + { + "epoch": 3.33, + "learning_rate": 8.987192952204789e-06, + "loss": 0.1671, + "step": 71285 + }, + { + "epoch": 3.33, + "learning_rate": 8.986409167150003e-06, + "loss": 0.1551, + "step": 71290 + }, + { + "epoch": 3.33, + "learning_rate": 8.985625382095215e-06, + "loss": 0.2852, + "step": 71295 + }, + { + "epoch": 3.33, + "learning_rate": 8.984841597040427e-06, + "loss": 0.0474, + "step": 71300 + }, + { + "epoch": 3.33, + "learning_rate": 8.984057811985641e-06, + "loss": 0.0449, + "step": 71305 + }, + { + "epoch": 3.33, + "learning_rate": 8.983274026930855e-06, + "loss": 0.0172, + "step": 71310 + }, + { + "epoch": 3.33, + "learning_rate": 8.982490241876069e-06, + "loss": 0.0817, + "step": 71315 + }, + { + "epoch": 3.33, + "learning_rate": 8.981706456821283e-06, + "loss": 0.0804, + "step": 71320 + }, + { + "epoch": 3.33, + "learning_rate": 8.980922671766495e-06, + "loss": 0.0379, + "step": 71325 + }, + { + "epoch": 3.33, + "learning_rate": 8.980138886711709e-06, + "loss": 0.0653, + "step": 71330 + }, + { + "epoch": 3.33, + "learning_rate": 8.979355101656923e-06, + "loss": 0.099, + "step": 71335 + }, + { + "epoch": 3.33, + "learning_rate": 8.978571316602137e-06, + "loss": 0.149, + "step": 71340 + }, + { + "epoch": 3.33, + "learning_rate": 8.977787531547349e-06, + "loss": 0.2112, + "step": 71345 + }, + { + "epoch": 3.33, + "learning_rate": 8.977003746492563e-06, + "loss": 0.077, + "step": 71350 + }, + { + "epoch": 3.33, + "learning_rate": 8.976219961437777e-06, + "loss": 0.0558, + "step": 71355 + }, + { + "epoch": 3.33, + "learning_rate": 8.975436176382989e-06, + "loss": 0.0582, + "step": 71360 + }, + { + "epoch": 3.33, + "learning_rate": 8.974652391328203e-06, + "loss": 0.0369, + "step": 71365 + }, + { + "epoch": 3.33, + "learning_rate": 8.973868606273417e-06, + "loss": 0.0759, + "step": 71370 + }, + { + "epoch": 3.33, + "learning_rate": 8.973084821218629e-06, + "loss": 0.0728, + "step": 71375 + }, + { + "epoch": 3.33, + "learning_rate": 8.972301036163843e-06, + "loss": 0.0995, + "step": 71380 + }, + { + "epoch": 3.33, + "learning_rate": 8.971517251109057e-06, + "loss": 0.4829, + "step": 71385 + }, + { + "epoch": 3.33, + "learning_rate": 8.97073346605427e-06, + "loss": 0.1905, + "step": 71390 + }, + { + "epoch": 3.33, + "learning_rate": 8.969949680999483e-06, + "loss": 0.355, + "step": 71395 + }, + { + "epoch": 3.33, + "learning_rate": 8.969165895944697e-06, + "loss": 0.0518, + "step": 71400 + }, + { + "epoch": 3.33, + "learning_rate": 8.96838211088991e-06, + "loss": 0.0564, + "step": 71405 + }, + { + "epoch": 3.33, + "learning_rate": 8.967598325835125e-06, + "loss": 0.0721, + "step": 71410 + }, + { + "epoch": 3.33, + "learning_rate": 8.966814540780337e-06, + "loss": 0.0613, + "step": 71415 + }, + { + "epoch": 3.33, + "learning_rate": 8.96603075572555e-06, + "loss": 0.1354, + "step": 71420 + }, + { + "epoch": 3.33, + "learning_rate": 8.965246970670763e-06, + "loss": 0.1019, + "step": 71425 + }, + { + "epoch": 3.33, + "learning_rate": 8.964463185615977e-06, + "loss": 0.071, + "step": 71430 + }, + { + "epoch": 3.33, + "learning_rate": 8.96367940056119e-06, + "loss": 0.0923, + "step": 71435 + }, + { + "epoch": 3.33, + "learning_rate": 8.962895615506405e-06, + "loss": 0.3224, + "step": 71440 + }, + { + "epoch": 3.33, + "learning_rate": 8.962111830451617e-06, + "loss": 0.3049, + "step": 71445 + }, + { + "epoch": 3.33, + "learning_rate": 8.96132804539683e-06, + "loss": 0.0961, + "step": 71450 + }, + { + "epoch": 3.33, + "learning_rate": 8.960544260342045e-06, + "loss": 0.0378, + "step": 71455 + }, + { + "epoch": 3.33, + "learning_rate": 8.959760475287259e-06, + "loss": 0.0332, + "step": 71460 + }, + { + "epoch": 3.33, + "learning_rate": 8.95897669023247e-06, + "loss": 0.0462, + "step": 71465 + }, + { + "epoch": 3.33, + "learning_rate": 8.958192905177685e-06, + "loss": 0.0546, + "step": 71470 + }, + { + "epoch": 3.34, + "learning_rate": 8.957409120122899e-06, + "loss": 0.0442, + "step": 71475 + }, + { + "epoch": 3.34, + "learning_rate": 8.956625335068112e-06, + "loss": 0.1452, + "step": 71480 + }, + { + "epoch": 3.34, + "learning_rate": 8.955841550013326e-06, + "loss": 0.1316, + "step": 71485 + }, + { + "epoch": 3.34, + "learning_rate": 8.955057764958539e-06, + "loss": 0.1885, + "step": 71490 + }, + { + "epoch": 3.34, + "learning_rate": 8.95427397990375e-06, + "loss": 0.1965, + "step": 71495 + }, + { + "epoch": 3.34, + "learning_rate": 8.953490194848965e-06, + "loss": 0.0727, + "step": 71500 + }, + { + "epoch": 3.34, + "learning_rate": 8.952706409794179e-06, + "loss": 0.0363, + "step": 71505 + }, + { + "epoch": 3.34, + "learning_rate": 8.951922624739393e-06, + "loss": 0.0834, + "step": 71510 + }, + { + "epoch": 3.34, + "learning_rate": 8.951138839684605e-06, + "loss": 0.053, + "step": 71515 + }, + { + "epoch": 3.34, + "learning_rate": 8.950355054629819e-06, + "loss": 0.0988, + "step": 71520 + }, + { + "epoch": 3.34, + "learning_rate": 8.949571269575033e-06, + "loss": 0.1021, + "step": 71525 + }, + { + "epoch": 3.34, + "learning_rate": 8.948787484520246e-06, + "loss": 0.1247, + "step": 71530 + }, + { + "epoch": 3.34, + "learning_rate": 8.94800369946546e-06, + "loss": 0.1089, + "step": 71535 + }, + { + "epoch": 3.34, + "learning_rate": 8.947219914410673e-06, + "loss": 0.213, + "step": 71540 + }, + { + "epoch": 3.34, + "learning_rate": 8.946436129355886e-06, + "loss": 0.3372, + "step": 71545 + }, + { + "epoch": 3.34, + "learning_rate": 8.9456523443011e-06, + "loss": 0.0676, + "step": 71550 + }, + { + "epoch": 3.34, + "learning_rate": 8.944868559246313e-06, + "loss": 0.0195, + "step": 71555 + }, + { + "epoch": 3.34, + "learning_rate": 8.944084774191527e-06, + "loss": 0.0476, + "step": 71560 + }, + { + "epoch": 3.34, + "learning_rate": 8.943300989136739e-06, + "loss": 0.0935, + "step": 71565 + }, + { + "epoch": 3.34, + "learning_rate": 8.942517204081953e-06, + "loss": 0.0661, + "step": 71570 + }, + { + "epoch": 3.34, + "learning_rate": 8.941733419027167e-06, + "loss": 0.1716, + "step": 71575 + }, + { + "epoch": 3.34, + "learning_rate": 8.94094963397238e-06, + "loss": 0.1298, + "step": 71580 + }, + { + "epoch": 3.34, + "learning_rate": 8.940165848917594e-06, + "loss": 0.1413, + "step": 71585 + }, + { + "epoch": 3.34, + "learning_rate": 8.939382063862807e-06, + "loss": 0.2108, + "step": 71590 + }, + { + "epoch": 3.34, + "learning_rate": 8.93859827880802e-06, + "loss": 0.2585, + "step": 71595 + }, + { + "epoch": 3.34, + "learning_rate": 8.937814493753234e-06, + "loss": 0.0759, + "step": 71600 + }, + { + "epoch": 3.34, + "learning_rate": 8.937030708698448e-06, + "loss": 0.0262, + "step": 71605 + }, + { + "epoch": 3.34, + "learning_rate": 8.93624692364366e-06, + "loss": 0.0391, + "step": 71610 + }, + { + "epoch": 3.34, + "learning_rate": 8.935463138588874e-06, + "loss": 0.0442, + "step": 71615 + }, + { + "epoch": 3.34, + "learning_rate": 8.934679353534087e-06, + "loss": 0.1691, + "step": 71620 + }, + { + "epoch": 3.34, + "learning_rate": 8.9338955684793e-06, + "loss": 0.1244, + "step": 71625 + }, + { + "epoch": 3.34, + "learning_rate": 8.933111783424514e-06, + "loss": 0.1147, + "step": 71630 + }, + { + "epoch": 3.34, + "learning_rate": 8.932327998369728e-06, + "loss": 0.1015, + "step": 71635 + }, + { + "epoch": 3.34, + "learning_rate": 8.93154421331494e-06, + "loss": 0.2465, + "step": 71640 + }, + { + "epoch": 3.34, + "learning_rate": 8.930760428260154e-06, + "loss": 0.2367, + "step": 71645 + }, + { + "epoch": 3.34, + "learning_rate": 8.929976643205368e-06, + "loss": 0.0882, + "step": 71650 + }, + { + "epoch": 3.34, + "learning_rate": 8.929192858150582e-06, + "loss": 0.0552, + "step": 71655 + }, + { + "epoch": 3.34, + "learning_rate": 8.928409073095794e-06, + "loss": 0.0628, + "step": 71660 + }, + { + "epoch": 3.34, + "learning_rate": 8.927625288041008e-06, + "loss": 0.0651, + "step": 71665 + }, + { + "epoch": 3.34, + "learning_rate": 8.926841502986222e-06, + "loss": 0.0243, + "step": 71670 + }, + { + "epoch": 3.34, + "learning_rate": 8.926057717931436e-06, + "loss": 0.113, + "step": 71675 + }, + { + "epoch": 3.34, + "learning_rate": 8.925273932876648e-06, + "loss": 0.1623, + "step": 71680 + }, + { + "epoch": 3.34, + "learning_rate": 8.924490147821862e-06, + "loss": 0.1306, + "step": 71685 + }, + { + "epoch": 3.35, + "learning_rate": 8.923706362767075e-06, + "loss": 0.2359, + "step": 71690 + }, + { + "epoch": 3.35, + "learning_rate": 8.922922577712288e-06, + "loss": 0.2171, + "step": 71695 + }, + { + "epoch": 3.35, + "learning_rate": 8.922138792657502e-06, + "loss": 0.1319, + "step": 71700 + }, + { + "epoch": 3.35, + "learning_rate": 8.921355007602716e-06, + "loss": 0.0114, + "step": 71705 + }, + { + "epoch": 3.35, + "learning_rate": 8.920571222547928e-06, + "loss": 0.0387, + "step": 71710 + }, + { + "epoch": 3.35, + "learning_rate": 8.919787437493142e-06, + "loss": 0.0901, + "step": 71715 + }, + { + "epoch": 3.35, + "learning_rate": 8.919003652438356e-06, + "loss": 0.0646, + "step": 71720 + }, + { + "epoch": 3.35, + "learning_rate": 8.91821986738357e-06, + "loss": 0.0286, + "step": 71725 + }, + { + "epoch": 3.35, + "learning_rate": 8.917436082328782e-06, + "loss": 0.1043, + "step": 71730 + }, + { + "epoch": 3.35, + "learning_rate": 8.916652297273996e-06, + "loss": 0.1484, + "step": 71735 + }, + { + "epoch": 3.35, + "learning_rate": 8.91586851221921e-06, + "loss": 0.1106, + "step": 71740 + }, + { + "epoch": 3.35, + "learning_rate": 8.915084727164424e-06, + "loss": 0.2136, + "step": 71745 + }, + { + "epoch": 3.35, + "learning_rate": 8.914300942109636e-06, + "loss": 0.0145, + "step": 71750 + }, + { + "epoch": 3.35, + "learning_rate": 8.91351715705485e-06, + "loss": 0.0284, + "step": 71755 + }, + { + "epoch": 3.35, + "learning_rate": 8.912733372000062e-06, + "loss": 0.0206, + "step": 71760 + }, + { + "epoch": 3.35, + "learning_rate": 8.911949586945276e-06, + "loss": 0.0796, + "step": 71765 + }, + { + "epoch": 3.35, + "learning_rate": 8.91116580189049e-06, + "loss": 0.0947, + "step": 71770 + }, + { + "epoch": 3.35, + "learning_rate": 8.910382016835704e-06, + "loss": 0.0731, + "step": 71775 + }, + { + "epoch": 3.35, + "learning_rate": 8.909598231780916e-06, + "loss": 0.1135, + "step": 71780 + }, + { + "epoch": 3.35, + "learning_rate": 8.90881444672613e-06, + "loss": 0.1274, + "step": 71785 + }, + { + "epoch": 3.35, + "learning_rate": 8.908030661671344e-06, + "loss": 0.2114, + "step": 71790 + }, + { + "epoch": 3.35, + "learning_rate": 8.907246876616558e-06, + "loss": 0.29, + "step": 71795 + }, + { + "epoch": 3.35, + "learning_rate": 8.906463091561772e-06, + "loss": 0.1027, + "step": 71800 + }, + { + "epoch": 3.35, + "learning_rate": 8.905679306506984e-06, + "loss": 0.0313, + "step": 71805 + }, + { + "epoch": 3.35, + "learning_rate": 8.904895521452198e-06, + "loss": 0.057, + "step": 71810 + }, + { + "epoch": 3.35, + "learning_rate": 8.90411173639741e-06, + "loss": 0.039, + "step": 71815 + }, + { + "epoch": 3.35, + "learning_rate": 8.903327951342624e-06, + "loss": 0.0554, + "step": 71820 + }, + { + "epoch": 3.35, + "learning_rate": 8.902544166287838e-06, + "loss": 0.0639, + "step": 71825 + }, + { + "epoch": 3.35, + "learning_rate": 8.90176038123305e-06, + "loss": 0.1176, + "step": 71830 + }, + { + "epoch": 3.35, + "learning_rate": 8.900976596178264e-06, + "loss": 0.0734, + "step": 71835 + }, + { + "epoch": 3.35, + "learning_rate": 8.900192811123478e-06, + "loss": 0.2307, + "step": 71840 + }, + { + "epoch": 3.35, + "learning_rate": 8.899409026068692e-06, + "loss": 0.3026, + "step": 71845 + }, + { + "epoch": 3.35, + "learning_rate": 8.898625241013906e-06, + "loss": 0.0709, + "step": 71850 + }, + { + "epoch": 3.35, + "learning_rate": 8.897841455959118e-06, + "loss": 0.0339, + "step": 71855 + }, + { + "epoch": 3.35, + "learning_rate": 8.897057670904332e-06, + "loss": 0.0831, + "step": 71860 + }, + { + "epoch": 3.35, + "learning_rate": 8.896273885849546e-06, + "loss": 0.0316, + "step": 71865 + }, + { + "epoch": 3.35, + "learning_rate": 8.89549010079476e-06, + "loss": 0.103, + "step": 71870 + }, + { + "epoch": 3.35, + "learning_rate": 8.894706315739972e-06, + "loss": 0.0875, + "step": 71875 + }, + { + "epoch": 3.35, + "learning_rate": 8.893922530685184e-06, + "loss": 0.0532, + "step": 71880 + }, + { + "epoch": 3.35, + "learning_rate": 8.893138745630398e-06, + "loss": 0.1521, + "step": 71885 + }, + { + "epoch": 3.35, + "learning_rate": 8.892354960575612e-06, + "loss": 0.1833, + "step": 71890 + }, + { + "epoch": 3.35, + "learning_rate": 8.891571175520826e-06, + "loss": 0.4676, + "step": 71895 + }, + { + "epoch": 3.35, + "learning_rate": 8.89078739046604e-06, + "loss": 0.0905, + "step": 71900 + }, + { + "epoch": 3.36, + "learning_rate": 8.890003605411252e-06, + "loss": 0.0352, + "step": 71905 + }, + { + "epoch": 3.36, + "learning_rate": 8.889219820356466e-06, + "loss": 0.0343, + "step": 71910 + }, + { + "epoch": 3.36, + "learning_rate": 8.88843603530168e-06, + "loss": 0.0217, + "step": 71915 + }, + { + "epoch": 3.36, + "learning_rate": 8.887652250246894e-06, + "loss": 0.0845, + "step": 71920 + }, + { + "epoch": 3.36, + "learning_rate": 8.886868465192106e-06, + "loss": 0.0914, + "step": 71925 + }, + { + "epoch": 3.36, + "learning_rate": 8.88608468013732e-06, + "loss": 0.0889, + "step": 71930 + }, + { + "epoch": 3.36, + "learning_rate": 8.885300895082534e-06, + "loss": 0.0855, + "step": 71935 + }, + { + "epoch": 3.36, + "learning_rate": 8.884517110027748e-06, + "loss": 0.2037, + "step": 71940 + }, + { + "epoch": 3.36, + "learning_rate": 8.88373332497296e-06, + "loss": 0.3553, + "step": 71945 + }, + { + "epoch": 3.36, + "learning_rate": 8.882949539918174e-06, + "loss": 0.0663, + "step": 71950 + }, + { + "epoch": 3.36, + "learning_rate": 8.882165754863386e-06, + "loss": 0.0432, + "step": 71955 + }, + { + "epoch": 3.36, + "learning_rate": 8.8813819698086e-06, + "loss": 0.0513, + "step": 71960 + }, + { + "epoch": 3.36, + "learning_rate": 8.880598184753814e-06, + "loss": 0.027, + "step": 71965 + }, + { + "epoch": 3.36, + "learning_rate": 8.879814399699028e-06, + "loss": 0.1287, + "step": 71970 + }, + { + "epoch": 3.36, + "learning_rate": 8.87903061464424e-06, + "loss": 0.0541, + "step": 71975 + }, + { + "epoch": 3.36, + "learning_rate": 8.878246829589454e-06, + "loss": 0.0934, + "step": 71980 + }, + { + "epoch": 3.36, + "learning_rate": 8.877463044534668e-06, + "loss": 0.0739, + "step": 71985 + }, + { + "epoch": 3.36, + "learning_rate": 8.876679259479882e-06, + "loss": 0.1659, + "step": 71990 + }, + { + "epoch": 3.36, + "learning_rate": 8.875895474425094e-06, + "loss": 0.2925, + "step": 71995 + }, + { + "epoch": 3.36, + "learning_rate": 8.875111689370308e-06, + "loss": 0.0761, + "step": 72000 + }, + { + "epoch": 3.36, + "learning_rate": 8.874327904315522e-06, + "loss": 0.0252, + "step": 72005 + }, + { + "epoch": 3.36, + "learning_rate": 8.873544119260734e-06, + "loss": 0.0426, + "step": 72010 + }, + { + "epoch": 3.36, + "learning_rate": 8.872760334205948e-06, + "loss": 0.0238, + "step": 72015 + }, + { + "epoch": 3.36, + "learning_rate": 8.871976549151162e-06, + "loss": 0.0345, + "step": 72020 + }, + { + "epoch": 3.36, + "learning_rate": 8.871192764096374e-06, + "loss": 0.0779, + "step": 72025 + }, + { + "epoch": 3.36, + "learning_rate": 8.870408979041588e-06, + "loss": 0.0438, + "step": 72030 + }, + { + "epoch": 3.36, + "learning_rate": 8.869625193986802e-06, + "loss": 0.1802, + "step": 72035 + }, + { + "epoch": 3.36, + "learning_rate": 8.868841408932016e-06, + "loss": 0.1752, + "step": 72040 + }, + { + "epoch": 3.36, + "learning_rate": 8.868057623877228e-06, + "loss": 0.2804, + "step": 72045 + }, + { + "epoch": 3.36, + "learning_rate": 8.867273838822442e-06, + "loss": 0.0499, + "step": 72050 + }, + { + "epoch": 3.36, + "learning_rate": 8.866490053767656e-06, + "loss": 0.0281, + "step": 72055 + }, + { + "epoch": 3.36, + "learning_rate": 8.86570626871287e-06, + "loss": 0.0557, + "step": 72060 + }, + { + "epoch": 3.36, + "learning_rate": 8.864922483658084e-06, + "loss": 0.0986, + "step": 72065 + }, + { + "epoch": 3.36, + "learning_rate": 8.864138698603296e-06, + "loss": 0.0801, + "step": 72070 + }, + { + "epoch": 3.36, + "learning_rate": 8.863354913548508e-06, + "loss": 0.1067, + "step": 72075 + }, + { + "epoch": 3.36, + "learning_rate": 8.862571128493722e-06, + "loss": 0.069, + "step": 72080 + }, + { + "epoch": 3.36, + "learning_rate": 8.861787343438936e-06, + "loss": 0.1912, + "step": 72085 + }, + { + "epoch": 3.36, + "learning_rate": 8.86100355838415e-06, + "loss": 0.2374, + "step": 72090 + }, + { + "epoch": 3.36, + "learning_rate": 8.860219773329362e-06, + "loss": 0.2909, + "step": 72095 + }, + { + "epoch": 3.36, + "learning_rate": 8.859435988274576e-06, + "loss": 0.0656, + "step": 72100 + }, + { + "epoch": 3.36, + "learning_rate": 8.85865220321979e-06, + "loss": 0.0112, + "step": 72105 + }, + { + "epoch": 3.36, + "learning_rate": 8.857868418165004e-06, + "loss": 0.0553, + "step": 72110 + }, + { + "epoch": 3.36, + "learning_rate": 8.857084633110218e-06, + "loss": 0.045, + "step": 72115 + }, + { + "epoch": 3.37, + "learning_rate": 8.85630084805543e-06, + "loss": 0.1336, + "step": 72120 + }, + { + "epoch": 3.37, + "learning_rate": 8.855517063000644e-06, + "loss": 0.1027, + "step": 72125 + }, + { + "epoch": 3.37, + "learning_rate": 8.854733277945858e-06, + "loss": 0.1755, + "step": 72130 + }, + { + "epoch": 3.37, + "learning_rate": 8.853949492891071e-06, + "loss": 0.0975, + "step": 72135 + }, + { + "epoch": 3.37, + "learning_rate": 8.853165707836284e-06, + "loss": 0.219, + "step": 72140 + }, + { + "epoch": 3.37, + "learning_rate": 8.852381922781496e-06, + "loss": 0.2413, + "step": 72145 + }, + { + "epoch": 3.37, + "learning_rate": 8.85159813772671e-06, + "loss": 0.1244, + "step": 72150 + }, + { + "epoch": 3.37, + "learning_rate": 8.850814352671924e-06, + "loss": 0.0359, + "step": 72155 + }, + { + "epoch": 3.37, + "learning_rate": 8.850030567617138e-06, + "loss": 0.0713, + "step": 72160 + }, + { + "epoch": 3.37, + "learning_rate": 8.849246782562352e-06, + "loss": 0.0794, + "step": 72165 + }, + { + "epoch": 3.37, + "learning_rate": 8.848462997507564e-06, + "loss": 0.0946, + "step": 72170 + }, + { + "epoch": 3.37, + "learning_rate": 8.847679212452778e-06, + "loss": 0.0688, + "step": 72175 + }, + { + "epoch": 3.37, + "learning_rate": 8.846895427397992e-06, + "loss": 0.1037, + "step": 72180 + }, + { + "epoch": 3.37, + "learning_rate": 8.846111642343205e-06, + "loss": 0.1237, + "step": 72185 + }, + { + "epoch": 3.37, + "learning_rate": 8.845327857288418e-06, + "loss": 0.1358, + "step": 72190 + }, + { + "epoch": 3.37, + "learning_rate": 8.844544072233632e-06, + "loss": 0.5546, + "step": 72195 + }, + { + "epoch": 3.37, + "learning_rate": 8.843760287178845e-06, + "loss": 0.0636, + "step": 72200 + }, + { + "epoch": 3.37, + "learning_rate": 8.842976502124058e-06, + "loss": 0.0212, + "step": 72205 + }, + { + "epoch": 3.37, + "learning_rate": 8.842192717069272e-06, + "loss": 0.0511, + "step": 72210 + }, + { + "epoch": 3.37, + "learning_rate": 8.841408932014485e-06, + "loss": 0.0671, + "step": 72215 + }, + { + "epoch": 3.37, + "learning_rate": 8.840625146959698e-06, + "loss": 0.1021, + "step": 72220 + }, + { + "epoch": 3.37, + "learning_rate": 8.839841361904912e-06, + "loss": 0.1161, + "step": 72225 + }, + { + "epoch": 3.37, + "learning_rate": 8.839057576850126e-06, + "loss": 0.1356, + "step": 72230 + }, + { + "epoch": 3.37, + "learning_rate": 8.83827379179534e-06, + "loss": 0.2081, + "step": 72235 + }, + { + "epoch": 3.37, + "learning_rate": 8.837490006740552e-06, + "loss": 0.1651, + "step": 72240 + }, + { + "epoch": 3.37, + "learning_rate": 8.836706221685766e-06, + "loss": 0.1851, + "step": 72245 + }, + { + "epoch": 3.37, + "learning_rate": 8.83592243663098e-06, + "loss": 0.0708, + "step": 72250 + }, + { + "epoch": 3.37, + "learning_rate": 8.835138651576193e-06, + "loss": 0.007, + "step": 72255 + }, + { + "epoch": 3.37, + "learning_rate": 8.834354866521406e-06, + "loss": 0.0373, + "step": 72260 + }, + { + "epoch": 3.37, + "learning_rate": 8.83357108146662e-06, + "loss": 0.0591, + "step": 72265 + }, + { + "epoch": 3.37, + "learning_rate": 8.832787296411832e-06, + "loss": 0.0865, + "step": 72270 + }, + { + "epoch": 3.37, + "learning_rate": 8.832003511357046e-06, + "loss": 0.0524, + "step": 72275 + }, + { + "epoch": 3.37, + "learning_rate": 8.83121972630226e-06, + "loss": 0.1104, + "step": 72280 + }, + { + "epoch": 3.37, + "learning_rate": 8.830435941247473e-06, + "loss": 0.1238, + "step": 72285 + }, + { + "epoch": 3.37, + "learning_rate": 8.829652156192686e-06, + "loss": 0.1583, + "step": 72290 + }, + { + "epoch": 3.37, + "learning_rate": 8.8288683711379e-06, + "loss": 0.1681, + "step": 72295 + }, + { + "epoch": 3.37, + "learning_rate": 8.828084586083113e-06, + "loss": 0.1097, + "step": 72300 + }, + { + "epoch": 3.37, + "learning_rate": 8.827300801028327e-06, + "loss": 0.0426, + "step": 72305 + }, + { + "epoch": 3.37, + "learning_rate": 8.82651701597354e-06, + "loss": 0.0368, + "step": 72310 + }, + { + "epoch": 3.37, + "learning_rate": 8.825733230918753e-06, + "loss": 0.0631, + "step": 72315 + }, + { + "epoch": 3.37, + "learning_rate": 8.824949445863967e-06, + "loss": 0.0818, + "step": 72320 + }, + { + "epoch": 3.37, + "learning_rate": 8.824165660809181e-06, + "loss": 0.0878, + "step": 72325 + }, + { + "epoch": 3.38, + "learning_rate": 8.823381875754393e-06, + "loss": 0.1477, + "step": 72330 + }, + { + "epoch": 3.38, + "learning_rate": 8.822598090699607e-06, + "loss": 0.2534, + "step": 72335 + }, + { + "epoch": 3.38, + "learning_rate": 8.82181430564482e-06, + "loss": 0.2975, + "step": 72340 + }, + { + "epoch": 3.38, + "learning_rate": 8.821030520590033e-06, + "loss": 0.2963, + "step": 72345 + }, + { + "epoch": 3.38, + "learning_rate": 8.820246735535247e-06, + "loss": 0.0708, + "step": 72350 + }, + { + "epoch": 3.38, + "learning_rate": 8.819462950480461e-06, + "loss": 0.0599, + "step": 72355 + }, + { + "epoch": 3.38, + "learning_rate": 8.818679165425674e-06, + "loss": 0.0268, + "step": 72360 + }, + { + "epoch": 3.38, + "learning_rate": 8.817895380370887e-06, + "loss": 0.0357, + "step": 72365 + }, + { + "epoch": 3.38, + "learning_rate": 8.817111595316101e-06, + "loss": 0.1084, + "step": 72370 + }, + { + "epoch": 3.38, + "learning_rate": 8.816327810261315e-06, + "loss": 0.1211, + "step": 72375 + }, + { + "epoch": 3.38, + "learning_rate": 8.815544025206529e-06, + "loss": 0.1424, + "step": 72380 + }, + { + "epoch": 3.38, + "learning_rate": 8.814760240151741e-06, + "loss": 0.1438, + "step": 72385 + }, + { + "epoch": 3.38, + "learning_rate": 8.813976455096955e-06, + "loss": 0.2297, + "step": 72390 + }, + { + "epoch": 3.38, + "learning_rate": 8.81319267004217e-06, + "loss": 0.2569, + "step": 72395 + }, + { + "epoch": 3.38, + "learning_rate": 8.812408884987381e-06, + "loss": 0.0635, + "step": 72400 + }, + { + "epoch": 3.38, + "learning_rate": 8.811625099932595e-06, + "loss": 0.0358, + "step": 72405 + }, + { + "epoch": 3.38, + "learning_rate": 8.810841314877807e-06, + "loss": 0.033, + "step": 72410 + }, + { + "epoch": 3.38, + "learning_rate": 8.810057529823021e-06, + "loss": 0.0559, + "step": 72415 + }, + { + "epoch": 3.38, + "learning_rate": 8.809273744768235e-06, + "loss": 0.0961, + "step": 72420 + }, + { + "epoch": 3.38, + "learning_rate": 8.80848995971345e-06, + "loss": 0.133, + "step": 72425 + }, + { + "epoch": 3.38, + "learning_rate": 8.807706174658663e-06, + "loss": 0.1051, + "step": 72430 + }, + { + "epoch": 3.38, + "learning_rate": 8.806922389603875e-06, + "loss": 0.1934, + "step": 72435 + }, + { + "epoch": 3.38, + "learning_rate": 8.80613860454909e-06, + "loss": 0.1838, + "step": 72440 + }, + { + "epoch": 3.38, + "learning_rate": 8.805354819494303e-06, + "loss": 0.2467, + "step": 72445 + }, + { + "epoch": 3.38, + "learning_rate": 8.804571034439517e-06, + "loss": 0.064, + "step": 72450 + }, + { + "epoch": 3.38, + "learning_rate": 8.80378724938473e-06, + "loss": 0.0198, + "step": 72455 + }, + { + "epoch": 3.38, + "learning_rate": 8.803003464329943e-06, + "loss": 0.0427, + "step": 72460 + }, + { + "epoch": 3.38, + "learning_rate": 8.802219679275155e-06, + "loss": 0.05, + "step": 72465 + }, + { + "epoch": 3.38, + "learning_rate": 8.80143589422037e-06, + "loss": 0.1142, + "step": 72470 + }, + { + "epoch": 3.38, + "learning_rate": 8.800652109165583e-06, + "loss": 0.0693, + "step": 72475 + }, + { + "epoch": 3.38, + "learning_rate": 8.799868324110797e-06, + "loss": 0.0562, + "step": 72480 + }, + { + "epoch": 3.38, + "learning_rate": 8.79908453905601e-06, + "loss": 0.1657, + "step": 72485 + }, + { + "epoch": 3.38, + "learning_rate": 8.798300754001223e-06, + "loss": 0.2237, + "step": 72490 + }, + { + "epoch": 3.38, + "learning_rate": 8.797516968946437e-06, + "loss": 0.2782, + "step": 72495 + }, + { + "epoch": 3.38, + "learning_rate": 8.796733183891651e-06, + "loss": 0.037, + "step": 72500 + }, + { + "epoch": 3.38, + "learning_rate": 8.795949398836863e-06, + "loss": 0.0315, + "step": 72505 + }, + { + "epoch": 3.38, + "learning_rate": 8.795165613782077e-06, + "loss": 0.0494, + "step": 72510 + }, + { + "epoch": 3.38, + "learning_rate": 8.794381828727291e-06, + "loss": 0.0724, + "step": 72515 + }, + { + "epoch": 3.38, + "learning_rate": 8.793598043672505e-06, + "loss": 0.1049, + "step": 72520 + }, + { + "epoch": 3.38, + "learning_rate": 8.792814258617717e-06, + "loss": 0.0456, + "step": 72525 + }, + { + "epoch": 3.38, + "learning_rate": 8.792030473562931e-06, + "loss": 0.0621, + "step": 72530 + }, + { + "epoch": 3.38, + "learning_rate": 8.791246688508143e-06, + "loss": 0.0755, + "step": 72535 + }, + { + "epoch": 3.38, + "learning_rate": 8.790462903453357e-06, + "loss": 0.2267, + "step": 72540 + }, + { + "epoch": 3.39, + "learning_rate": 8.789679118398571e-06, + "loss": 0.4156, + "step": 72545 + }, + { + "epoch": 3.39, + "learning_rate": 8.788895333343785e-06, + "loss": 0.0726, + "step": 72550 + }, + { + "epoch": 3.39, + "learning_rate": 8.788111548288997e-06, + "loss": 0.0252, + "step": 72555 + }, + { + "epoch": 3.39, + "learning_rate": 8.787327763234211e-06, + "loss": 0.0365, + "step": 72560 + }, + { + "epoch": 3.39, + "learning_rate": 8.786543978179425e-06, + "loss": 0.0294, + "step": 72565 + }, + { + "epoch": 3.39, + "learning_rate": 8.785760193124639e-06, + "loss": 0.0894, + "step": 72570 + }, + { + "epoch": 3.39, + "learning_rate": 8.784976408069851e-06, + "loss": 0.1237, + "step": 72575 + }, + { + "epoch": 3.39, + "learning_rate": 8.784192623015065e-06, + "loss": 0.1305, + "step": 72580 + }, + { + "epoch": 3.39, + "learning_rate": 8.783408837960279e-06, + "loss": 0.2606, + "step": 72585 + }, + { + "epoch": 3.39, + "learning_rate": 8.782625052905493e-06, + "loss": 0.1614, + "step": 72590 + }, + { + "epoch": 3.39, + "learning_rate": 8.781841267850705e-06, + "loss": 0.4077, + "step": 72595 + }, + { + "epoch": 3.39, + "learning_rate": 8.781057482795919e-06, + "loss": 0.0702, + "step": 72600 + }, + { + "epoch": 3.39, + "learning_rate": 8.780273697741131e-06, + "loss": 0.1083, + "step": 72605 + }, + { + "epoch": 3.39, + "learning_rate": 8.779489912686345e-06, + "loss": 0.0806, + "step": 72610 + }, + { + "epoch": 3.39, + "learning_rate": 8.778706127631559e-06, + "loss": 0.07, + "step": 72615 + }, + { + "epoch": 3.39, + "learning_rate": 8.777922342576773e-06, + "loss": 0.0412, + "step": 72620 + }, + { + "epoch": 3.39, + "learning_rate": 8.777138557521985e-06, + "loss": 0.1552, + "step": 72625 + }, + { + "epoch": 3.39, + "learning_rate": 8.776354772467199e-06, + "loss": 0.1425, + "step": 72630 + }, + { + "epoch": 3.39, + "learning_rate": 8.775570987412413e-06, + "loss": 0.2181, + "step": 72635 + }, + { + "epoch": 3.39, + "learning_rate": 8.774787202357627e-06, + "loss": 0.1684, + "step": 72640 + }, + { + "epoch": 3.39, + "learning_rate": 8.77400341730284e-06, + "loss": 0.2213, + "step": 72645 + }, + { + "epoch": 3.39, + "learning_rate": 8.773219632248053e-06, + "loss": 0.1042, + "step": 72650 + }, + { + "epoch": 3.39, + "learning_rate": 8.772435847193267e-06, + "loss": 0.0489, + "step": 72655 + }, + { + "epoch": 3.39, + "learning_rate": 8.771652062138479e-06, + "loss": 0.0216, + "step": 72660 + }, + { + "epoch": 3.39, + "learning_rate": 8.770868277083693e-06, + "loss": 0.0553, + "step": 72665 + }, + { + "epoch": 3.39, + "learning_rate": 8.770084492028907e-06, + "loss": 0.1275, + "step": 72670 + }, + { + "epoch": 3.39, + "learning_rate": 8.769300706974119e-06, + "loss": 0.1648, + "step": 72675 + }, + { + "epoch": 3.39, + "learning_rate": 8.768516921919333e-06, + "loss": 0.0575, + "step": 72680 + }, + { + "epoch": 3.39, + "learning_rate": 8.767733136864547e-06, + "loss": 0.1371, + "step": 72685 + }, + { + "epoch": 3.39, + "learning_rate": 8.76694935180976e-06, + "loss": 0.21, + "step": 72690 + }, + { + "epoch": 3.39, + "learning_rate": 8.766165566754975e-06, + "loss": 0.3111, + "step": 72695 + }, + { + "epoch": 3.39, + "learning_rate": 8.765381781700187e-06, + "loss": 0.0959, + "step": 72700 + }, + { + "epoch": 3.39, + "learning_rate": 8.7645979966454e-06, + "loss": 0.0205, + "step": 72705 + }, + { + "epoch": 3.39, + "learning_rate": 8.763814211590615e-06, + "loss": 0.0431, + "step": 72710 + }, + { + "epoch": 3.39, + "learning_rate": 8.763030426535829e-06, + "loss": 0.0457, + "step": 72715 + }, + { + "epoch": 3.39, + "learning_rate": 8.762246641481041e-06, + "loss": 0.0813, + "step": 72720 + }, + { + "epoch": 3.39, + "learning_rate": 8.761462856426253e-06, + "loss": 0.057, + "step": 72725 + }, + { + "epoch": 3.39, + "learning_rate": 8.760679071371467e-06, + "loss": 0.1586, + "step": 72730 + }, + { + "epoch": 3.39, + "learning_rate": 8.759895286316681e-06, + "loss": 0.1567, + "step": 72735 + }, + { + "epoch": 3.39, + "learning_rate": 8.759111501261895e-06, + "loss": 0.1828, + "step": 72740 + }, + { + "epoch": 3.39, + "learning_rate": 8.758327716207109e-06, + "loss": 0.3974, + "step": 72745 + }, + { + "epoch": 3.39, + "learning_rate": 8.757543931152321e-06, + "loss": 0.0386, + "step": 72750 + }, + { + "epoch": 3.39, + "learning_rate": 8.756760146097535e-06, + "loss": 0.0502, + "step": 72755 + }, + { + "epoch": 3.4, + "learning_rate": 8.755976361042749e-06, + "loss": 0.0306, + "step": 72760 + }, + { + "epoch": 3.4, + "learning_rate": 8.755192575987963e-06, + "loss": 0.0475, + "step": 72765 + }, + { + "epoch": 3.4, + "learning_rate": 8.754408790933175e-06, + "loss": 0.0952, + "step": 72770 + }, + { + "epoch": 3.4, + "learning_rate": 8.753625005878389e-06, + "loss": 0.1277, + "step": 72775 + }, + { + "epoch": 3.4, + "learning_rate": 8.752841220823603e-06, + "loss": 0.0964, + "step": 72780 + }, + { + "epoch": 3.4, + "learning_rate": 8.752057435768817e-06, + "loss": 0.1072, + "step": 72785 + }, + { + "epoch": 3.4, + "learning_rate": 8.751273650714029e-06, + "loss": 0.2131, + "step": 72790 + }, + { + "epoch": 3.4, + "learning_rate": 8.750489865659243e-06, + "loss": 0.2558, + "step": 72795 + }, + { + "epoch": 3.4, + "learning_rate": 8.749706080604455e-06, + "loss": 0.0444, + "step": 72800 + }, + { + "epoch": 3.4, + "learning_rate": 8.748922295549669e-06, + "loss": 0.057, + "step": 72805 + }, + { + "epoch": 3.4, + "learning_rate": 8.748138510494883e-06, + "loss": 0.0217, + "step": 72810 + }, + { + "epoch": 3.4, + "learning_rate": 8.747354725440097e-06, + "loss": 0.0364, + "step": 72815 + }, + { + "epoch": 3.4, + "learning_rate": 8.746570940385309e-06, + "loss": 0.0689, + "step": 72820 + }, + { + "epoch": 3.4, + "learning_rate": 8.745787155330523e-06, + "loss": 0.1745, + "step": 72825 + }, + { + "epoch": 3.4, + "learning_rate": 8.745003370275737e-06, + "loss": 0.1426, + "step": 72830 + }, + { + "epoch": 3.4, + "learning_rate": 8.74421958522095e-06, + "loss": 0.2692, + "step": 72835 + }, + { + "epoch": 3.4, + "learning_rate": 8.743435800166163e-06, + "loss": 0.2706, + "step": 72840 + }, + { + "epoch": 3.4, + "learning_rate": 8.742652015111377e-06, + "loss": 0.1699, + "step": 72845 + }, + { + "epoch": 3.4, + "learning_rate": 8.74186823005659e-06, + "loss": 0.0315, + "step": 72850 + }, + { + "epoch": 3.4, + "learning_rate": 8.741084445001804e-06, + "loss": 0.0269, + "step": 72855 + }, + { + "epoch": 3.4, + "learning_rate": 8.740300659947017e-06, + "loss": 0.0178, + "step": 72860 + }, + { + "epoch": 3.4, + "learning_rate": 8.73951687489223e-06, + "loss": 0.0391, + "step": 72865 + }, + { + "epoch": 3.4, + "learning_rate": 8.738733089837443e-06, + "loss": 0.0862, + "step": 72870 + }, + { + "epoch": 3.4, + "learning_rate": 8.737949304782657e-06, + "loss": 0.0993, + "step": 72875 + }, + { + "epoch": 3.4, + "learning_rate": 8.73716551972787e-06, + "loss": 0.1759, + "step": 72880 + }, + { + "epoch": 3.4, + "learning_rate": 8.736381734673084e-06, + "loss": 0.1792, + "step": 72885 + }, + { + "epoch": 3.4, + "learning_rate": 8.735597949618297e-06, + "loss": 0.2136, + "step": 72890 + }, + { + "epoch": 3.4, + "learning_rate": 8.73481416456351e-06, + "loss": 0.1858, + "step": 72895 + }, + { + "epoch": 3.4, + "learning_rate": 8.734030379508725e-06, + "loss": 0.0937, + "step": 72900 + }, + { + "epoch": 3.4, + "learning_rate": 8.733246594453938e-06, + "loss": 0.0136, + "step": 72905 + }, + { + "epoch": 3.4, + "learning_rate": 8.73246280939915e-06, + "loss": 0.0673, + "step": 72910 + }, + { + "epoch": 3.4, + "learning_rate": 8.731679024344365e-06, + "loss": 0.0488, + "step": 72915 + }, + { + "epoch": 3.4, + "learning_rate": 8.730895239289578e-06, + "loss": 0.0564, + "step": 72920 + }, + { + "epoch": 3.4, + "learning_rate": 8.73011145423479e-06, + "loss": 0.158, + "step": 72925 + }, + { + "epoch": 3.4, + "learning_rate": 8.729327669180005e-06, + "loss": 0.1729, + "step": 72930 + }, + { + "epoch": 3.4, + "learning_rate": 8.728543884125218e-06, + "loss": 0.1008, + "step": 72935 + }, + { + "epoch": 3.4, + "learning_rate": 8.72776009907043e-06, + "loss": 0.1283, + "step": 72940 + }, + { + "epoch": 3.4, + "learning_rate": 8.726976314015645e-06, + "loss": 0.4005, + "step": 72945 + }, + { + "epoch": 3.4, + "learning_rate": 8.726192528960858e-06, + "loss": 0.0513, + "step": 72950 + }, + { + "epoch": 3.4, + "learning_rate": 8.725408743906072e-06, + "loss": 0.0246, + "step": 72955 + }, + { + "epoch": 3.4, + "learning_rate": 8.724624958851286e-06, + "loss": 0.0681, + "step": 72960 + }, + { + "epoch": 3.4, + "learning_rate": 8.723841173796499e-06, + "loss": 0.0764, + "step": 72965 + }, + { + "epoch": 3.4, + "learning_rate": 8.723057388741712e-06, + "loss": 0.0384, + "step": 72970 + }, + { + "epoch": 3.41, + "learning_rate": 8.722273603686926e-06, + "loss": 0.0427, + "step": 72975 + }, + { + "epoch": 3.41, + "learning_rate": 8.72148981863214e-06, + "loss": 0.1383, + "step": 72980 + }, + { + "epoch": 3.41, + "learning_rate": 8.720706033577352e-06, + "loss": 0.1134, + "step": 72985 + }, + { + "epoch": 3.41, + "learning_rate": 8.719922248522565e-06, + "loss": 0.1788, + "step": 72990 + }, + { + "epoch": 3.41, + "learning_rate": 8.719138463467779e-06, + "loss": 0.1881, + "step": 72995 + }, + { + "epoch": 3.41, + "learning_rate": 8.718354678412992e-06, + "loss": 0.0662, + "step": 73000 + }, + { + "epoch": 3.41, + "learning_rate": 8.717570893358206e-06, + "loss": 0.0656, + "step": 73005 + }, + { + "epoch": 3.41, + "learning_rate": 8.71678710830342e-06, + "loss": 0.091, + "step": 73010 + }, + { + "epoch": 3.41, + "learning_rate": 8.716003323248632e-06, + "loss": 0.0671, + "step": 73015 + }, + { + "epoch": 3.41, + "learning_rate": 8.715219538193846e-06, + "loss": 0.0417, + "step": 73020 + }, + { + "epoch": 3.41, + "learning_rate": 8.71443575313906e-06, + "loss": 0.1163, + "step": 73025 + }, + { + "epoch": 3.41, + "learning_rate": 8.713651968084274e-06, + "loss": 0.0666, + "step": 73030 + }, + { + "epoch": 3.41, + "learning_rate": 8.712868183029486e-06, + "loss": 0.128, + "step": 73035 + }, + { + "epoch": 3.41, + "learning_rate": 8.7120843979747e-06, + "loss": 0.1942, + "step": 73040 + }, + { + "epoch": 3.41, + "learning_rate": 8.711300612919914e-06, + "loss": 0.2439, + "step": 73045 + }, + { + "epoch": 3.41, + "learning_rate": 8.710516827865128e-06, + "loss": 0.067, + "step": 73050 + }, + { + "epoch": 3.41, + "learning_rate": 8.70973304281034e-06, + "loss": 0.0163, + "step": 73055 + }, + { + "epoch": 3.41, + "learning_rate": 8.708949257755554e-06, + "loss": 0.0617, + "step": 73060 + }, + { + "epoch": 3.41, + "learning_rate": 8.708165472700766e-06, + "loss": 0.0927, + "step": 73065 + }, + { + "epoch": 3.41, + "learning_rate": 8.70738168764598e-06, + "loss": 0.0931, + "step": 73070 + }, + { + "epoch": 3.41, + "learning_rate": 8.706597902591194e-06, + "loss": 0.0455, + "step": 73075 + }, + { + "epoch": 3.41, + "learning_rate": 8.705814117536408e-06, + "loss": 0.1762, + "step": 73080 + }, + { + "epoch": 3.41, + "learning_rate": 8.70503033248162e-06, + "loss": 0.1451, + "step": 73085 + }, + { + "epoch": 3.41, + "learning_rate": 8.704246547426834e-06, + "loss": 0.1864, + "step": 73090 + }, + { + "epoch": 3.41, + "learning_rate": 8.703462762372048e-06, + "loss": 0.2506, + "step": 73095 + }, + { + "epoch": 3.41, + "learning_rate": 8.702678977317262e-06, + "loss": 0.136, + "step": 73100 + }, + { + "epoch": 3.41, + "learning_rate": 8.701895192262474e-06, + "loss": 0.0409, + "step": 73105 + }, + { + "epoch": 3.41, + "learning_rate": 8.701111407207688e-06, + "loss": 0.0228, + "step": 73110 + }, + { + "epoch": 3.41, + "learning_rate": 8.700327622152902e-06, + "loss": 0.0804, + "step": 73115 + }, + { + "epoch": 3.41, + "learning_rate": 8.699543837098114e-06, + "loss": 0.0498, + "step": 73120 + }, + { + "epoch": 3.41, + "learning_rate": 8.698916809054285e-06, + "loss": 0.0897, + "step": 73125 + }, + { + "epoch": 3.41, + "learning_rate": 8.698133023999499e-06, + "loss": 0.0444, + "step": 73130 + }, + { + "epoch": 3.41, + "learning_rate": 8.697349238944713e-06, + "loss": 0.1355, + "step": 73135 + }, + { + "epoch": 3.41, + "learning_rate": 8.696565453889927e-06, + "loss": 0.1694, + "step": 73140 + }, + { + "epoch": 3.41, + "learning_rate": 8.695781668835139e-06, + "loss": 0.2544, + "step": 73145 + }, + { + "epoch": 3.41, + "learning_rate": 8.694997883780353e-06, + "loss": 0.0608, + "step": 73150 + }, + { + "epoch": 3.41, + "learning_rate": 8.694214098725567e-06, + "loss": 0.0813, + "step": 73155 + }, + { + "epoch": 3.41, + "learning_rate": 8.69343031367078e-06, + "loss": 0.049, + "step": 73160 + }, + { + "epoch": 3.41, + "learning_rate": 8.692646528615993e-06, + "loss": 0.0249, + "step": 73165 + }, + { + "epoch": 3.41, + "learning_rate": 8.691862743561207e-06, + "loss": 0.0771, + "step": 73170 + }, + { + "epoch": 3.41, + "learning_rate": 8.69107895850642e-06, + "loss": 0.1589, + "step": 73175 + }, + { + "epoch": 3.41, + "learning_rate": 8.690295173451634e-06, + "loss": 0.0805, + "step": 73180 + }, + { + "epoch": 3.41, + "learning_rate": 8.689511388396847e-06, + "loss": 0.1952, + "step": 73185 + }, + { + "epoch": 3.42, + "learning_rate": 8.68872760334206e-06, + "loss": 0.2552, + "step": 73190 + }, + { + "epoch": 3.42, + "learning_rate": 8.687943818287273e-06, + "loss": 0.2266, + "step": 73195 + }, + { + "epoch": 3.42, + "learning_rate": 8.687160033232487e-06, + "loss": 0.0267, + "step": 73200 + }, + { + "epoch": 3.42, + "learning_rate": 8.6863762481777e-06, + "loss": 0.0572, + "step": 73205 + }, + { + "epoch": 3.42, + "learning_rate": 8.685592463122914e-06, + "loss": 0.0195, + "step": 73210 + }, + { + "epoch": 3.42, + "learning_rate": 8.684808678068127e-06, + "loss": 0.1335, + "step": 73215 + }, + { + "epoch": 3.42, + "learning_rate": 8.68402489301334e-06, + "loss": 0.0398, + "step": 73220 + }, + { + "epoch": 3.42, + "learning_rate": 8.683241107958554e-06, + "loss": 0.2702, + "step": 73225 + }, + { + "epoch": 3.42, + "learning_rate": 8.682457322903768e-06, + "loss": 0.1329, + "step": 73230 + }, + { + "epoch": 3.42, + "learning_rate": 8.68167353784898e-06, + "loss": 0.1341, + "step": 73235 + }, + { + "epoch": 3.42, + "learning_rate": 8.680889752794194e-06, + "loss": 0.1694, + "step": 73240 + }, + { + "epoch": 3.42, + "learning_rate": 8.680105967739408e-06, + "loss": 0.3827, + "step": 73245 + }, + { + "epoch": 3.42, + "learning_rate": 8.67932218268462e-06, + "loss": 0.0979, + "step": 73250 + }, + { + "epoch": 3.42, + "learning_rate": 8.678538397629834e-06, + "loss": 0.0562, + "step": 73255 + }, + { + "epoch": 3.42, + "learning_rate": 8.677754612575048e-06, + "loss": 0.0399, + "step": 73260 + }, + { + "epoch": 3.42, + "learning_rate": 8.67697082752026e-06, + "loss": 0.0307, + "step": 73265 + }, + { + "epoch": 3.42, + "learning_rate": 8.676187042465475e-06, + "loss": 0.0571, + "step": 73270 + }, + { + "epoch": 3.42, + "learning_rate": 8.675403257410688e-06, + "loss": 0.0912, + "step": 73275 + }, + { + "epoch": 3.42, + "learning_rate": 8.674619472355902e-06, + "loss": 0.0733, + "step": 73280 + }, + { + "epoch": 3.42, + "learning_rate": 8.673835687301115e-06, + "loss": 0.0705, + "step": 73285 + }, + { + "epoch": 3.42, + "learning_rate": 8.673051902246328e-06, + "loss": 0.1932, + "step": 73290 + }, + { + "epoch": 3.42, + "learning_rate": 8.672268117191542e-06, + "loss": 0.2412, + "step": 73295 + }, + { + "epoch": 3.42, + "learning_rate": 8.671484332136756e-06, + "loss": 0.1248, + "step": 73300 + }, + { + "epoch": 3.42, + "learning_rate": 8.67070054708197e-06, + "loss": 0.0374, + "step": 73305 + }, + { + "epoch": 3.42, + "learning_rate": 8.669916762027182e-06, + "loss": 0.0487, + "step": 73310 + }, + { + "epoch": 3.42, + "learning_rate": 8.669132976972395e-06, + "loss": 0.087, + "step": 73315 + }, + { + "epoch": 3.42, + "learning_rate": 8.668349191917608e-06, + "loss": 0.0897, + "step": 73320 + }, + { + "epoch": 3.42, + "learning_rate": 8.667565406862822e-06, + "loss": 0.092, + "step": 73325 + }, + { + "epoch": 3.42, + "learning_rate": 8.666781621808036e-06, + "loss": 0.1125, + "step": 73330 + }, + { + "epoch": 3.42, + "learning_rate": 8.665997836753249e-06, + "loss": 0.1317, + "step": 73335 + }, + { + "epoch": 3.42, + "learning_rate": 8.665214051698462e-06, + "loss": 0.2335, + "step": 73340 + }, + { + "epoch": 3.42, + "learning_rate": 8.664430266643676e-06, + "loss": 0.2869, + "step": 73345 + }, + { + "epoch": 3.42, + "learning_rate": 8.66364648158889e-06, + "loss": 0.0941, + "step": 73350 + }, + { + "epoch": 3.42, + "learning_rate": 8.662862696534104e-06, + "loss": 0.0136, + "step": 73355 + }, + { + "epoch": 3.42, + "learning_rate": 8.662078911479316e-06, + "loss": 0.065, + "step": 73360 + }, + { + "epoch": 3.42, + "learning_rate": 8.66129512642453e-06, + "loss": 0.0897, + "step": 73365 + }, + { + "epoch": 3.42, + "learning_rate": 8.660511341369744e-06, + "loss": 0.0441, + "step": 73370 + }, + { + "epoch": 3.42, + "learning_rate": 8.659727556314958e-06, + "loss": 0.0534, + "step": 73375 + }, + { + "epoch": 3.42, + "learning_rate": 8.65894377126017e-06, + "loss": 0.147, + "step": 73380 + }, + { + "epoch": 3.42, + "learning_rate": 8.658159986205384e-06, + "loss": 0.0851, + "step": 73385 + }, + { + "epoch": 3.42, + "learning_rate": 8.657376201150596e-06, + "loss": 0.1515, + "step": 73390 + }, + { + "epoch": 3.42, + "learning_rate": 8.65659241609581e-06, + "loss": 0.272, + "step": 73395 + }, + { + "epoch": 3.42, + "learning_rate": 8.655808631041024e-06, + "loss": 0.0938, + "step": 73400 + }, + { + "epoch": 3.43, + "learning_rate": 8.655024845986238e-06, + "loss": 0.0485, + "step": 73405 + }, + { + "epoch": 3.43, + "learning_rate": 8.65424106093145e-06, + "loss": 0.0487, + "step": 73410 + }, + { + "epoch": 3.43, + "learning_rate": 8.653457275876664e-06, + "loss": 0.024, + "step": 73415 + }, + { + "epoch": 3.43, + "learning_rate": 8.652673490821878e-06, + "loss": 0.0974, + "step": 73420 + }, + { + "epoch": 3.43, + "learning_rate": 8.651889705767092e-06, + "loss": 0.111, + "step": 73425 + }, + { + "epoch": 3.43, + "learning_rate": 8.651105920712304e-06, + "loss": 0.088, + "step": 73430 + }, + { + "epoch": 3.43, + "learning_rate": 8.650322135657518e-06, + "loss": 0.1132, + "step": 73435 + }, + { + "epoch": 3.43, + "learning_rate": 8.649538350602732e-06, + "loss": 0.1547, + "step": 73440 + }, + { + "epoch": 3.43, + "learning_rate": 8.648754565547944e-06, + "loss": 0.1941, + "step": 73445 + }, + { + "epoch": 3.43, + "learning_rate": 8.647970780493158e-06, + "loss": 0.0833, + "step": 73450 + }, + { + "epoch": 3.43, + "learning_rate": 8.647186995438372e-06, + "loss": 0.028, + "step": 73455 + }, + { + "epoch": 3.43, + "learning_rate": 8.646403210383584e-06, + "loss": 0.0611, + "step": 73460 + }, + { + "epoch": 3.43, + "learning_rate": 8.645619425328798e-06, + "loss": 0.0603, + "step": 73465 + }, + { + "epoch": 3.43, + "learning_rate": 8.644835640274012e-06, + "loss": 0.0568, + "step": 73470 + }, + { + "epoch": 3.43, + "learning_rate": 8.644051855219226e-06, + "loss": 0.0533, + "step": 73475 + }, + { + "epoch": 3.43, + "learning_rate": 8.643268070164438e-06, + "loss": 0.0709, + "step": 73480 + }, + { + "epoch": 3.43, + "learning_rate": 8.642484285109652e-06, + "loss": 0.0942, + "step": 73485 + }, + { + "epoch": 3.43, + "learning_rate": 8.641700500054866e-06, + "loss": 0.1067, + "step": 73490 + }, + { + "epoch": 3.43, + "learning_rate": 8.64091671500008e-06, + "loss": 0.2943, + "step": 73495 + }, + { + "epoch": 3.43, + "learning_rate": 8.640132929945292e-06, + "loss": 0.0612, + "step": 73500 + }, + { + "epoch": 3.43, + "learning_rate": 8.639349144890506e-06, + "loss": 0.041, + "step": 73505 + }, + { + "epoch": 3.43, + "learning_rate": 8.638565359835718e-06, + "loss": 0.0527, + "step": 73510 + }, + { + "epoch": 3.43, + "learning_rate": 8.637781574780932e-06, + "loss": 0.0718, + "step": 73515 + }, + { + "epoch": 3.43, + "learning_rate": 8.636997789726146e-06, + "loss": 0.1534, + "step": 73520 + }, + { + "epoch": 3.43, + "learning_rate": 8.63621400467136e-06, + "loss": 0.0413, + "step": 73525 + }, + { + "epoch": 3.43, + "learning_rate": 8.635430219616572e-06, + "loss": 0.1293, + "step": 73530 + }, + { + "epoch": 3.43, + "learning_rate": 8.634646434561786e-06, + "loss": 0.0855, + "step": 73535 + }, + { + "epoch": 3.43, + "learning_rate": 8.633862649507e-06, + "loss": 0.2193, + "step": 73540 + }, + { + "epoch": 3.43, + "learning_rate": 8.633078864452214e-06, + "loss": 0.3822, + "step": 73545 + }, + { + "epoch": 3.43, + "learning_rate": 8.632295079397426e-06, + "loss": 0.0456, + "step": 73550 + }, + { + "epoch": 3.43, + "learning_rate": 8.63151129434264e-06, + "loss": 0.0161, + "step": 73555 + }, + { + "epoch": 3.43, + "learning_rate": 8.630727509287854e-06, + "loss": 0.054, + "step": 73560 + }, + { + "epoch": 3.43, + "learning_rate": 8.629943724233068e-06, + "loss": 0.1103, + "step": 73565 + }, + { + "epoch": 3.43, + "learning_rate": 8.629159939178282e-06, + "loss": 0.0462, + "step": 73570 + }, + { + "epoch": 3.43, + "learning_rate": 8.628376154123494e-06, + "loss": 0.1141, + "step": 73575 + }, + { + "epoch": 3.43, + "learning_rate": 8.627592369068706e-06, + "loss": 0.1126, + "step": 73580 + }, + { + "epoch": 3.43, + "learning_rate": 8.62680858401392e-06, + "loss": 0.0803, + "step": 73585 + }, + { + "epoch": 3.43, + "learning_rate": 8.626024798959134e-06, + "loss": 0.1266, + "step": 73590 + }, + { + "epoch": 3.43, + "learning_rate": 8.625241013904348e-06, + "loss": 0.2645, + "step": 73595 + }, + { + "epoch": 3.43, + "learning_rate": 8.62445722884956e-06, + "loss": 0.09, + "step": 73600 + }, + { + "epoch": 3.43, + "learning_rate": 8.623673443794774e-06, + "loss": 0.0246, + "step": 73605 + }, + { + "epoch": 3.43, + "learning_rate": 8.622889658739988e-06, + "loss": 0.0233, + "step": 73610 + }, + { + "epoch": 3.43, + "learning_rate": 8.622105873685202e-06, + "loss": 0.037, + "step": 73615 + }, + { + "epoch": 3.44, + "learning_rate": 8.621322088630416e-06, + "loss": 0.0779, + "step": 73620 + }, + { + "epoch": 3.44, + "learning_rate": 8.620538303575628e-06, + "loss": 0.0583, + "step": 73625 + }, + { + "epoch": 3.44, + "learning_rate": 8.619754518520842e-06, + "loss": 0.1562, + "step": 73630 + }, + { + "epoch": 3.44, + "learning_rate": 8.618970733466056e-06, + "loss": 0.084, + "step": 73635 + }, + { + "epoch": 3.44, + "learning_rate": 8.618186948411268e-06, + "loss": 0.2324, + "step": 73640 + }, + { + "epoch": 3.44, + "learning_rate": 8.617403163356482e-06, + "loss": 0.3043, + "step": 73645 + }, + { + "epoch": 3.44, + "learning_rate": 8.616619378301696e-06, + "loss": 0.1044, + "step": 73650 + }, + { + "epoch": 3.44, + "learning_rate": 8.615835593246908e-06, + "loss": 0.0041, + "step": 73655 + }, + { + "epoch": 3.44, + "learning_rate": 8.615051808192122e-06, + "loss": 0.0606, + "step": 73660 + }, + { + "epoch": 3.44, + "learning_rate": 8.614268023137336e-06, + "loss": 0.0305, + "step": 73665 + }, + { + "epoch": 3.44, + "learning_rate": 8.61348423808255e-06, + "loss": 0.1222, + "step": 73670 + }, + { + "epoch": 3.44, + "learning_rate": 8.612700453027762e-06, + "loss": 0.0978, + "step": 73675 + }, + { + "epoch": 3.44, + "learning_rate": 8.611916667972976e-06, + "loss": 0.105, + "step": 73680 + }, + { + "epoch": 3.44, + "learning_rate": 8.61113288291819e-06, + "loss": 0.0781, + "step": 73685 + }, + { + "epoch": 3.44, + "learning_rate": 8.610349097863404e-06, + "loss": 0.196, + "step": 73690 + }, + { + "epoch": 3.44, + "learning_rate": 8.609565312808616e-06, + "loss": 0.2442, + "step": 73695 + }, + { + "epoch": 3.44, + "learning_rate": 8.60878152775383e-06, + "loss": 0.105, + "step": 73700 + }, + { + "epoch": 3.44, + "learning_rate": 8.607997742699042e-06, + "loss": 0.0184, + "step": 73705 + }, + { + "epoch": 3.44, + "learning_rate": 8.607213957644256e-06, + "loss": 0.0643, + "step": 73710 + }, + { + "epoch": 3.44, + "learning_rate": 8.60643017258947e-06, + "loss": 0.0301, + "step": 73715 + }, + { + "epoch": 3.44, + "learning_rate": 8.605646387534684e-06, + "loss": 0.0908, + "step": 73720 + }, + { + "epoch": 3.44, + "learning_rate": 8.604862602479896e-06, + "loss": 0.057, + "step": 73725 + }, + { + "epoch": 3.44, + "learning_rate": 8.60407881742511e-06, + "loss": 0.0879, + "step": 73730 + }, + { + "epoch": 3.44, + "learning_rate": 8.603295032370324e-06, + "loss": 0.1638, + "step": 73735 + }, + { + "epoch": 3.44, + "learning_rate": 8.602511247315538e-06, + "loss": 0.1459, + "step": 73740 + }, + { + "epoch": 3.44, + "learning_rate": 8.60172746226075e-06, + "loss": 0.3662, + "step": 73745 + }, + { + "epoch": 3.44, + "learning_rate": 8.600943677205964e-06, + "loss": 0.1232, + "step": 73750 + }, + { + "epoch": 3.44, + "learning_rate": 8.600159892151178e-06, + "loss": 0.0439, + "step": 73755 + }, + { + "epoch": 3.44, + "learning_rate": 8.599376107096392e-06, + "loss": 0.0612, + "step": 73760 + }, + { + "epoch": 3.44, + "learning_rate": 8.598592322041604e-06, + "loss": 0.0793, + "step": 73765 + }, + { + "epoch": 3.44, + "learning_rate": 8.597808536986818e-06, + "loss": 0.1031, + "step": 73770 + }, + { + "epoch": 3.44, + "learning_rate": 8.59702475193203e-06, + "loss": 0.1145, + "step": 73775 + }, + { + "epoch": 3.44, + "learning_rate": 8.596240966877244e-06, + "loss": 0.0756, + "step": 73780 + }, + { + "epoch": 3.44, + "learning_rate": 8.595457181822458e-06, + "loss": 0.1199, + "step": 73785 + }, + { + "epoch": 3.44, + "learning_rate": 8.594673396767672e-06, + "loss": 0.1422, + "step": 73790 + }, + { + "epoch": 3.44, + "learning_rate": 8.593889611712884e-06, + "loss": 0.2619, + "step": 73795 + }, + { + "epoch": 3.44, + "learning_rate": 8.593105826658098e-06, + "loss": 0.0832, + "step": 73800 + }, + { + "epoch": 3.44, + "learning_rate": 8.592322041603312e-06, + "loss": 0.0297, + "step": 73805 + }, + { + "epoch": 3.44, + "learning_rate": 8.591538256548526e-06, + "loss": 0.0311, + "step": 73810 + }, + { + "epoch": 3.44, + "learning_rate": 8.590754471493738e-06, + "loss": 0.0356, + "step": 73815 + }, + { + "epoch": 3.44, + "learning_rate": 8.589970686438952e-06, + "loss": 0.1395, + "step": 73820 + }, + { + "epoch": 3.44, + "learning_rate": 8.589186901384166e-06, + "loss": 0.321, + "step": 73825 + }, + { + "epoch": 3.45, + "learning_rate": 8.58840311632938e-06, + "loss": 0.1941, + "step": 73830 + }, + { + "epoch": 3.45, + "learning_rate": 8.587619331274592e-06, + "loss": 0.1362, + "step": 73835 + }, + { + "epoch": 3.45, + "learning_rate": 8.586835546219806e-06, + "loss": 0.1958, + "step": 73840 + }, + { + "epoch": 3.45, + "learning_rate": 8.586051761165018e-06, + "loss": 0.3383, + "step": 73845 + }, + { + "epoch": 3.45, + "learning_rate": 8.585267976110232e-06, + "loss": 0.0636, + "step": 73850 + }, + { + "epoch": 3.45, + "learning_rate": 8.584484191055446e-06, + "loss": 0.041, + "step": 73855 + }, + { + "epoch": 3.45, + "learning_rate": 8.58370040600066e-06, + "loss": 0.0478, + "step": 73860 + }, + { + "epoch": 3.45, + "learning_rate": 8.582916620945872e-06, + "loss": 0.0538, + "step": 73865 + }, + { + "epoch": 3.45, + "learning_rate": 8.582132835891086e-06, + "loss": 0.1034, + "step": 73870 + }, + { + "epoch": 3.45, + "learning_rate": 8.5813490508363e-06, + "loss": 0.1014, + "step": 73875 + }, + { + "epoch": 3.45, + "learning_rate": 8.580565265781513e-06, + "loss": 0.039, + "step": 73880 + }, + { + "epoch": 3.45, + "learning_rate": 8.579781480726727e-06, + "loss": 0.2059, + "step": 73885 + }, + { + "epoch": 3.45, + "learning_rate": 8.57899769567194e-06, + "loss": 0.2326, + "step": 73890 + }, + { + "epoch": 3.45, + "learning_rate": 8.578213910617153e-06, + "loss": 0.2216, + "step": 73895 + }, + { + "epoch": 3.45, + "learning_rate": 8.577430125562366e-06, + "loss": 0.0655, + "step": 73900 + }, + { + "epoch": 3.45, + "learning_rate": 8.57664634050758e-06, + "loss": 0.0306, + "step": 73905 + }, + { + "epoch": 3.45, + "learning_rate": 8.575862555452793e-06, + "loss": 0.041, + "step": 73910 + }, + { + "epoch": 3.45, + "learning_rate": 8.575078770398006e-06, + "loss": 0.0813, + "step": 73915 + }, + { + "epoch": 3.45, + "learning_rate": 8.57429498534322e-06, + "loss": 0.0851, + "step": 73920 + }, + { + "epoch": 3.45, + "learning_rate": 8.573511200288433e-06, + "loss": 0.0716, + "step": 73925 + }, + { + "epoch": 3.45, + "learning_rate": 8.572727415233647e-06, + "loss": 0.1528, + "step": 73930 + }, + { + "epoch": 3.45, + "learning_rate": 8.571943630178861e-06, + "loss": 0.0754, + "step": 73935 + }, + { + "epoch": 3.45, + "learning_rate": 8.571159845124074e-06, + "loss": 0.3233, + "step": 73940 + }, + { + "epoch": 3.45, + "learning_rate": 8.570376060069287e-06, + "loss": 0.2341, + "step": 73945 + }, + { + "epoch": 3.45, + "learning_rate": 8.569592275014501e-06, + "loss": 0.0771, + "step": 73950 + }, + { + "epoch": 3.45, + "learning_rate": 8.568808489959715e-06, + "loss": 0.0268, + "step": 73955 + }, + { + "epoch": 3.45, + "learning_rate": 8.568024704904927e-06, + "loss": 0.0653, + "step": 73960 + }, + { + "epoch": 3.45, + "learning_rate": 8.567240919850141e-06, + "loss": 0.0982, + "step": 73965 + }, + { + "epoch": 3.45, + "learning_rate": 8.566457134795354e-06, + "loss": 0.0533, + "step": 73970 + }, + { + "epoch": 3.45, + "learning_rate": 8.565673349740567e-06, + "loss": 0.0812, + "step": 73975 + }, + { + "epoch": 3.45, + "learning_rate": 8.564889564685781e-06, + "loss": 0.1737, + "step": 73980 + }, + { + "epoch": 3.45, + "learning_rate": 8.564105779630995e-06, + "loss": 0.1171, + "step": 73985 + }, + { + "epoch": 3.45, + "learning_rate": 8.563321994576207e-06, + "loss": 0.2456, + "step": 73990 + }, + { + "epoch": 3.45, + "learning_rate": 8.562538209521421e-06, + "loss": 0.2324, + "step": 73995 + }, + { + "epoch": 3.45, + "learning_rate": 8.561754424466635e-06, + "loss": 0.0594, + "step": 74000 + }, + { + "epoch": 3.45, + "learning_rate": 8.56097063941185e-06, + "loss": 0.0404, + "step": 74005 + }, + { + "epoch": 3.45, + "learning_rate": 8.560186854357061e-06, + "loss": 0.0326, + "step": 74010 + }, + { + "epoch": 3.45, + "learning_rate": 8.559403069302275e-06, + "loss": 0.0492, + "step": 74015 + }, + { + "epoch": 3.45, + "learning_rate": 8.55861928424749e-06, + "loss": 0.0784, + "step": 74020 + }, + { + "epoch": 3.45, + "learning_rate": 8.557835499192703e-06, + "loss": 0.1076, + "step": 74025 + }, + { + "epoch": 3.45, + "learning_rate": 8.557051714137915e-06, + "loss": 0.122, + "step": 74030 + }, + { + "epoch": 3.45, + "learning_rate": 8.55626792908313e-06, + "loss": 0.1381, + "step": 74035 + }, + { + "epoch": 3.45, + "learning_rate": 8.555484144028341e-06, + "loss": 0.1295, + "step": 74040 + }, + { + "epoch": 3.46, + "learning_rate": 8.554700358973555e-06, + "loss": 0.3292, + "step": 74045 + }, + { + "epoch": 3.46, + "learning_rate": 8.55391657391877e-06, + "loss": 0.0633, + "step": 74050 + }, + { + "epoch": 3.46, + "learning_rate": 8.553132788863983e-06, + "loss": 0.039, + "step": 74055 + }, + { + "epoch": 3.46, + "learning_rate": 8.552349003809195e-06, + "loss": 0.031, + "step": 74060 + }, + { + "epoch": 3.46, + "learning_rate": 8.55156521875441e-06, + "loss": 0.1121, + "step": 74065 + }, + { + "epoch": 3.46, + "learning_rate": 8.550781433699623e-06, + "loss": 0.0995, + "step": 74070 + }, + { + "epoch": 3.46, + "learning_rate": 8.549997648644837e-06, + "loss": 0.0764, + "step": 74075 + }, + { + "epoch": 3.46, + "learning_rate": 8.54921386359005e-06, + "loss": 0.1044, + "step": 74080 + }, + { + "epoch": 3.46, + "learning_rate": 8.548430078535263e-06, + "loss": 0.072, + "step": 74085 + }, + { + "epoch": 3.46, + "learning_rate": 8.547646293480477e-06, + "loss": 0.2137, + "step": 74090 + }, + { + "epoch": 3.46, + "learning_rate": 8.54686250842569e-06, + "loss": 0.2968, + "step": 74095 + }, + { + "epoch": 3.46, + "learning_rate": 8.546078723370903e-06, + "loss": 0.0581, + "step": 74100 + }, + { + "epoch": 3.46, + "learning_rate": 8.545294938316117e-06, + "loss": 0.0139, + "step": 74105 + }, + { + "epoch": 3.46, + "learning_rate": 8.54451115326133e-06, + "loss": 0.032, + "step": 74110 + }, + { + "epoch": 3.46, + "learning_rate": 8.543727368206543e-06, + "loss": 0.1237, + "step": 74115 + }, + { + "epoch": 3.46, + "learning_rate": 8.542943583151757e-06, + "loss": 0.0868, + "step": 74120 + }, + { + "epoch": 3.46, + "learning_rate": 8.542159798096971e-06, + "loss": 0.0434, + "step": 74125 + }, + { + "epoch": 3.46, + "learning_rate": 8.541376013042183e-06, + "loss": 0.0577, + "step": 74130 + }, + { + "epoch": 3.46, + "learning_rate": 8.540592227987397e-06, + "loss": 0.1186, + "step": 74135 + }, + { + "epoch": 3.46, + "learning_rate": 8.539808442932611e-06, + "loss": 0.2465, + "step": 74140 + }, + { + "epoch": 3.46, + "learning_rate": 8.539024657877825e-06, + "loss": 0.4115, + "step": 74145 + }, + { + "epoch": 3.46, + "learning_rate": 8.538240872823039e-06, + "loss": 0.0344, + "step": 74150 + }, + { + "epoch": 3.46, + "learning_rate": 8.537457087768251e-06, + "loss": 0.0311, + "step": 74155 + }, + { + "epoch": 3.46, + "learning_rate": 8.536673302713463e-06, + "loss": 0.0506, + "step": 74160 + }, + { + "epoch": 3.46, + "learning_rate": 8.535889517658677e-06, + "loss": 0.1081, + "step": 74165 + }, + { + "epoch": 3.46, + "learning_rate": 8.535105732603891e-06, + "loss": 0.0612, + "step": 74170 + }, + { + "epoch": 3.46, + "learning_rate": 8.534321947549105e-06, + "loss": 0.0644, + "step": 74175 + }, + { + "epoch": 3.46, + "learning_rate": 8.533538162494317e-06, + "loss": 0.0716, + "step": 74180 + }, + { + "epoch": 3.46, + "learning_rate": 8.532754377439531e-06, + "loss": 0.219, + "step": 74185 + }, + { + "epoch": 3.46, + "learning_rate": 8.531970592384745e-06, + "loss": 0.1413, + "step": 74190 + }, + { + "epoch": 3.46, + "learning_rate": 8.531186807329959e-06, + "loss": 0.2697, + "step": 74195 + }, + { + "epoch": 3.46, + "learning_rate": 8.530403022275173e-06, + "loss": 0.1026, + "step": 74200 + }, + { + "epoch": 3.46, + "learning_rate": 8.529619237220385e-06, + "loss": 0.1033, + "step": 74205 + }, + { + "epoch": 3.46, + "learning_rate": 8.528835452165599e-06, + "loss": 0.0327, + "step": 74210 + }, + { + "epoch": 3.46, + "learning_rate": 8.528051667110813e-06, + "loss": 0.0793, + "step": 74215 + }, + { + "epoch": 3.46, + "learning_rate": 8.527267882056027e-06, + "loss": 0.0608, + "step": 74220 + }, + { + "epoch": 3.46, + "learning_rate": 8.526484097001239e-06, + "loss": 0.1283, + "step": 74225 + }, + { + "epoch": 3.46, + "learning_rate": 8.525700311946453e-06, + "loss": 0.1363, + "step": 74230 + }, + { + "epoch": 3.46, + "learning_rate": 8.524916526891665e-06, + "loss": 0.1228, + "step": 74235 + }, + { + "epoch": 3.46, + "learning_rate": 8.524132741836879e-06, + "loss": 0.1047, + "step": 74240 + }, + { + "epoch": 3.46, + "learning_rate": 8.523348956782093e-06, + "loss": 0.3583, + "step": 74245 + }, + { + "epoch": 3.46, + "learning_rate": 8.522565171727307e-06, + "loss": 0.0644, + "step": 74250 + }, + { + "epoch": 3.46, + "learning_rate": 8.521781386672519e-06, + "loss": 0.0303, + "step": 74255 + }, + { + "epoch": 3.47, + "learning_rate": 8.520997601617733e-06, + "loss": 0.0464, + "step": 74260 + }, + { + "epoch": 3.47, + "learning_rate": 8.520213816562947e-06, + "loss": 0.0405, + "step": 74265 + }, + { + "epoch": 3.47, + "learning_rate": 8.51943003150816e-06, + "loss": 0.0766, + "step": 74270 + }, + { + "epoch": 3.47, + "learning_rate": 8.518646246453373e-06, + "loss": 0.0909, + "step": 74275 + }, + { + "epoch": 3.47, + "learning_rate": 8.517862461398587e-06, + "loss": 0.0987, + "step": 74280 + }, + { + "epoch": 3.47, + "learning_rate": 8.5170786763438e-06, + "loss": 0.0941, + "step": 74285 + }, + { + "epoch": 3.47, + "learning_rate": 8.516294891289013e-06, + "loss": 0.1221, + "step": 74290 + }, + { + "epoch": 3.47, + "learning_rate": 8.515511106234227e-06, + "loss": 0.2891, + "step": 74295 + }, + { + "epoch": 3.47, + "learning_rate": 8.51472732117944e-06, + "loss": 0.104, + "step": 74300 + }, + { + "epoch": 3.47, + "learning_rate": 8.513943536124653e-06, + "loss": 0.0496, + "step": 74305 + }, + { + "epoch": 3.47, + "learning_rate": 8.513159751069867e-06, + "loss": 0.0794, + "step": 74310 + }, + { + "epoch": 3.47, + "learning_rate": 8.512375966015081e-06, + "loss": 0.1103, + "step": 74315 + }, + { + "epoch": 3.47, + "learning_rate": 8.511592180960295e-06, + "loss": 0.0626, + "step": 74320 + }, + { + "epoch": 3.47, + "learning_rate": 8.510808395905507e-06, + "loss": 0.1357, + "step": 74325 + }, + { + "epoch": 3.47, + "learning_rate": 8.510024610850721e-06, + "loss": 0.1465, + "step": 74330 + }, + { + "epoch": 3.47, + "learning_rate": 8.509240825795935e-06, + "loss": 0.1323, + "step": 74335 + }, + { + "epoch": 3.47, + "learning_rate": 8.508457040741149e-06, + "loss": 0.2311, + "step": 74340 + }, + { + "epoch": 3.47, + "learning_rate": 8.507673255686361e-06, + "loss": 0.3059, + "step": 74345 + }, + { + "epoch": 3.47, + "learning_rate": 8.506889470631575e-06, + "loss": 0.0809, + "step": 74350 + }, + { + "epoch": 3.47, + "learning_rate": 8.506105685576787e-06, + "loss": 0.0233, + "step": 74355 + }, + { + "epoch": 3.47, + "learning_rate": 8.505321900522001e-06, + "loss": 0.0304, + "step": 74360 + }, + { + "epoch": 3.47, + "learning_rate": 8.504538115467215e-06, + "loss": 0.0697, + "step": 74365 + }, + { + "epoch": 3.47, + "learning_rate": 8.503754330412429e-06, + "loss": 0.1096, + "step": 74370 + }, + { + "epoch": 3.47, + "learning_rate": 8.502970545357641e-06, + "loss": 0.1051, + "step": 74375 + }, + { + "epoch": 3.47, + "learning_rate": 8.502186760302855e-06, + "loss": 0.0537, + "step": 74380 + }, + { + "epoch": 3.47, + "learning_rate": 8.501402975248069e-06, + "loss": 0.1299, + "step": 74385 + }, + { + "epoch": 3.47, + "learning_rate": 8.500619190193283e-06, + "loss": 0.2017, + "step": 74390 + }, + { + "epoch": 3.47, + "learning_rate": 8.499835405138495e-06, + "loss": 0.2505, + "step": 74395 + }, + { + "epoch": 3.47, + "learning_rate": 8.499051620083709e-06, + "loss": 0.0293, + "step": 74400 + }, + { + "epoch": 3.47, + "learning_rate": 8.498267835028923e-06, + "loss": 0.0581, + "step": 74405 + }, + { + "epoch": 3.47, + "learning_rate": 8.497484049974137e-06, + "loss": 0.0503, + "step": 74410 + }, + { + "epoch": 3.47, + "learning_rate": 8.49670026491935e-06, + "loss": 0.0547, + "step": 74415 + }, + { + "epoch": 3.47, + "learning_rate": 8.495916479864563e-06, + "loss": 0.0854, + "step": 74420 + }, + { + "epoch": 3.47, + "learning_rate": 8.495132694809775e-06, + "loss": 0.0962, + "step": 74425 + }, + { + "epoch": 3.47, + "learning_rate": 8.494348909754989e-06, + "loss": 0.0931, + "step": 74430 + }, + { + "epoch": 3.47, + "learning_rate": 8.493565124700203e-06, + "loss": 0.1318, + "step": 74435 + }, + { + "epoch": 3.47, + "learning_rate": 8.492781339645417e-06, + "loss": 0.223, + "step": 74440 + }, + { + "epoch": 3.47, + "learning_rate": 8.491997554590629e-06, + "loss": 0.2705, + "step": 74445 + }, + { + "epoch": 3.47, + "learning_rate": 8.491213769535843e-06, + "loss": 0.0941, + "step": 74450 + }, + { + "epoch": 3.47, + "learning_rate": 8.490429984481057e-06, + "loss": 0.0466, + "step": 74455 + }, + { + "epoch": 3.47, + "learning_rate": 8.48964619942627e-06, + "loss": 0.0315, + "step": 74460 + }, + { + "epoch": 3.47, + "learning_rate": 8.488862414371484e-06, + "loss": 0.044, + "step": 74465 + }, + { + "epoch": 3.47, + "learning_rate": 8.488078629316697e-06, + "loss": 0.1042, + "step": 74470 + }, + { + "epoch": 3.48, + "learning_rate": 8.48729484426191e-06, + "loss": 0.071, + "step": 74475 + }, + { + "epoch": 3.48, + "learning_rate": 8.486511059207125e-06, + "loss": 0.1504, + "step": 74480 + }, + { + "epoch": 3.48, + "learning_rate": 8.485727274152337e-06, + "loss": 0.1746, + "step": 74485 + }, + { + "epoch": 3.48, + "learning_rate": 8.48494348909755e-06, + "loss": 0.295, + "step": 74490 + }, + { + "epoch": 3.48, + "learning_rate": 8.484159704042763e-06, + "loss": 0.2217, + "step": 74495 + }, + { + "epoch": 3.48, + "learning_rate": 8.483375918987977e-06, + "loss": 0.0303, + "step": 74500 + }, + { + "epoch": 3.48, + "learning_rate": 8.48259213393319e-06, + "loss": 0.0239, + "step": 74505 + }, + { + "epoch": 3.48, + "learning_rate": 8.481808348878405e-06, + "loss": 0.0651, + "step": 74510 + }, + { + "epoch": 3.48, + "learning_rate": 8.481024563823618e-06, + "loss": 0.0711, + "step": 74515 + }, + { + "epoch": 3.48, + "learning_rate": 8.48024077876883e-06, + "loss": 0.162, + "step": 74520 + }, + { + "epoch": 3.48, + "learning_rate": 8.479456993714045e-06, + "loss": 0.0623, + "step": 74525 + }, + { + "epoch": 3.48, + "learning_rate": 8.478673208659258e-06, + "loss": 0.0458, + "step": 74530 + }, + { + "epoch": 3.48, + "learning_rate": 8.477889423604472e-06, + "loss": 0.0875, + "step": 74535 + }, + { + "epoch": 3.48, + "learning_rate": 8.477105638549685e-06, + "loss": 0.2352, + "step": 74540 + }, + { + "epoch": 3.48, + "learning_rate": 8.476321853494899e-06, + "loss": 0.2607, + "step": 74545 + }, + { + "epoch": 3.48, + "learning_rate": 8.47553806844011e-06, + "loss": 0.0826, + "step": 74550 + }, + { + "epoch": 3.48, + "learning_rate": 8.474754283385325e-06, + "loss": 0.0826, + "step": 74555 + }, + { + "epoch": 3.48, + "learning_rate": 8.473970498330539e-06, + "loss": 0.0637, + "step": 74560 + }, + { + "epoch": 3.48, + "learning_rate": 8.473186713275752e-06, + "loss": 0.053, + "step": 74565 + }, + { + "epoch": 3.48, + "learning_rate": 8.472402928220965e-06, + "loss": 0.064, + "step": 74570 + }, + { + "epoch": 3.48, + "learning_rate": 8.471619143166179e-06, + "loss": 0.0851, + "step": 74575 + }, + { + "epoch": 3.48, + "learning_rate": 8.470835358111392e-06, + "loss": 0.1457, + "step": 74580 + }, + { + "epoch": 3.48, + "learning_rate": 8.470051573056606e-06, + "loss": 0.1017, + "step": 74585 + }, + { + "epoch": 3.48, + "learning_rate": 8.469267788001819e-06, + "loss": 0.1879, + "step": 74590 + }, + { + "epoch": 3.48, + "learning_rate": 8.468484002947032e-06, + "loss": 0.3116, + "step": 74595 + }, + { + "epoch": 3.48, + "learning_rate": 8.467700217892246e-06, + "loss": 0.1218, + "step": 74600 + }, + { + "epoch": 3.48, + "learning_rate": 8.46691643283746e-06, + "loss": 0.0432, + "step": 74605 + }, + { + "epoch": 3.48, + "learning_rate": 8.466132647782673e-06, + "loss": 0.0217, + "step": 74610 + }, + { + "epoch": 3.48, + "learning_rate": 8.465348862727886e-06, + "loss": 0.0888, + "step": 74615 + }, + { + "epoch": 3.48, + "learning_rate": 8.464565077673099e-06, + "loss": 0.0544, + "step": 74620 + }, + { + "epoch": 3.48, + "learning_rate": 8.463781292618313e-06, + "loss": 0.0501, + "step": 74625 + }, + { + "epoch": 3.48, + "learning_rate": 8.462997507563526e-06, + "loss": 0.0863, + "step": 74630 + }, + { + "epoch": 3.48, + "learning_rate": 8.46221372250874e-06, + "loss": 0.2043, + "step": 74635 + }, + { + "epoch": 3.48, + "learning_rate": 8.461429937453953e-06, + "loss": 0.288, + "step": 74640 + }, + { + "epoch": 3.48, + "learning_rate": 8.460646152399166e-06, + "loss": 0.3324, + "step": 74645 + }, + { + "epoch": 3.48, + "learning_rate": 8.45986236734438e-06, + "loss": 0.0586, + "step": 74650 + }, + { + "epoch": 3.48, + "learning_rate": 8.459078582289594e-06, + "loss": 0.0707, + "step": 74655 + }, + { + "epoch": 3.48, + "learning_rate": 8.458294797234806e-06, + "loss": 0.0335, + "step": 74660 + }, + { + "epoch": 3.48, + "learning_rate": 8.45751101218002e-06, + "loss": 0.0967, + "step": 74665 + }, + { + "epoch": 3.48, + "learning_rate": 8.456727227125234e-06, + "loss": 0.0504, + "step": 74670 + }, + { + "epoch": 3.48, + "learning_rate": 8.455943442070448e-06, + "loss": 0.0912, + "step": 74675 + }, + { + "epoch": 3.48, + "learning_rate": 8.45515965701566e-06, + "loss": 0.1916, + "step": 74680 + }, + { + "epoch": 3.48, + "learning_rate": 8.454375871960874e-06, + "loss": 0.195, + "step": 74685 + }, + { + "epoch": 3.49, + "learning_rate": 8.453592086906087e-06, + "loss": 0.1557, + "step": 74690 + }, + { + "epoch": 3.49, + "learning_rate": 8.4528083018513e-06, + "loss": 0.3294, + "step": 74695 + }, + { + "epoch": 3.49, + "learning_rate": 8.452024516796514e-06, + "loss": 0.1187, + "step": 74700 + }, + { + "epoch": 3.49, + "learning_rate": 8.451240731741728e-06, + "loss": 0.0509, + "step": 74705 + }, + { + "epoch": 3.49, + "learning_rate": 8.45045694668694e-06, + "loss": 0.0239, + "step": 74710 + }, + { + "epoch": 3.49, + "learning_rate": 8.449673161632154e-06, + "loss": 0.0198, + "step": 74715 + }, + { + "epoch": 3.49, + "learning_rate": 8.448889376577368e-06, + "loss": 0.0729, + "step": 74720 + }, + { + "epoch": 3.49, + "learning_rate": 8.448105591522582e-06, + "loss": 0.0967, + "step": 74725 + }, + { + "epoch": 3.49, + "learning_rate": 8.447321806467796e-06, + "loss": 0.1026, + "step": 74730 + }, + { + "epoch": 3.49, + "learning_rate": 8.446538021413008e-06, + "loss": 0.1301, + "step": 74735 + }, + { + "epoch": 3.49, + "learning_rate": 8.445754236358222e-06, + "loss": 0.2965, + "step": 74740 + }, + { + "epoch": 3.49, + "learning_rate": 8.444970451303434e-06, + "loss": 0.3699, + "step": 74745 + }, + { + "epoch": 3.49, + "learning_rate": 8.444186666248648e-06, + "loss": 0.0774, + "step": 74750 + }, + { + "epoch": 3.49, + "learning_rate": 8.443402881193862e-06, + "loss": 0.036, + "step": 74755 + }, + { + "epoch": 3.49, + "learning_rate": 8.442619096139074e-06, + "loss": 0.0738, + "step": 74760 + }, + { + "epoch": 3.49, + "learning_rate": 8.441835311084288e-06, + "loss": 0.146, + "step": 74765 + }, + { + "epoch": 3.49, + "learning_rate": 8.441051526029502e-06, + "loss": 0.0819, + "step": 74770 + }, + { + "epoch": 3.49, + "learning_rate": 8.440267740974716e-06, + "loss": 0.0764, + "step": 74775 + }, + { + "epoch": 3.49, + "learning_rate": 8.43948395591993e-06, + "loss": 0.1826, + "step": 74780 + }, + { + "epoch": 3.49, + "learning_rate": 8.438700170865142e-06, + "loss": 0.1201, + "step": 74785 + }, + { + "epoch": 3.49, + "learning_rate": 8.437916385810356e-06, + "loss": 0.2108, + "step": 74790 + }, + { + "epoch": 3.49, + "learning_rate": 8.43713260075557e-06, + "loss": 0.3365, + "step": 74795 + }, + { + "epoch": 3.49, + "learning_rate": 8.436348815700784e-06, + "loss": 0.0217, + "step": 74800 + }, + { + "epoch": 3.49, + "learning_rate": 8.435565030645996e-06, + "loss": 0.0271, + "step": 74805 + }, + { + "epoch": 3.49, + "learning_rate": 8.434781245591208e-06, + "loss": 0.1088, + "step": 74810 + }, + { + "epoch": 3.49, + "learning_rate": 8.433997460536422e-06, + "loss": 0.0373, + "step": 74815 + }, + { + "epoch": 3.49, + "learning_rate": 8.433213675481636e-06, + "loss": 0.0418, + "step": 74820 + }, + { + "epoch": 3.49, + "learning_rate": 8.43242989042685e-06, + "loss": 0.0449, + "step": 74825 + }, + { + "epoch": 3.49, + "learning_rate": 8.431646105372064e-06, + "loss": 0.1189, + "step": 74830 + }, + { + "epoch": 3.49, + "learning_rate": 8.430862320317276e-06, + "loss": 0.2114, + "step": 74835 + }, + { + "epoch": 3.49, + "learning_rate": 8.43007853526249e-06, + "loss": 0.2039, + "step": 74840 + }, + { + "epoch": 3.49, + "learning_rate": 8.429294750207704e-06, + "loss": 0.2547, + "step": 74845 + }, + { + "epoch": 3.49, + "learning_rate": 8.428510965152918e-06, + "loss": 0.109, + "step": 74850 + }, + { + "epoch": 3.49, + "learning_rate": 8.42772718009813e-06, + "loss": 0.0629, + "step": 74855 + }, + { + "epoch": 3.49, + "learning_rate": 8.426943395043344e-06, + "loss": 0.0208, + "step": 74860 + }, + { + "epoch": 3.49, + "learning_rate": 8.426159609988558e-06, + "loss": 0.058, + "step": 74865 + }, + { + "epoch": 3.49, + "learning_rate": 8.425375824933772e-06, + "loss": 0.0206, + "step": 74870 + }, + { + "epoch": 3.49, + "learning_rate": 8.424592039878984e-06, + "loss": 0.0756, + "step": 74875 + }, + { + "epoch": 3.49, + "learning_rate": 8.423808254824198e-06, + "loss": 0.0402, + "step": 74880 + }, + { + "epoch": 3.49, + "learning_rate": 8.42302446976941e-06, + "loss": 0.1043, + "step": 74885 + }, + { + "epoch": 3.49, + "learning_rate": 8.422240684714624e-06, + "loss": 0.2587, + "step": 74890 + }, + { + "epoch": 3.49, + "learning_rate": 8.421456899659838e-06, + "loss": 0.283, + "step": 74895 + }, + { + "epoch": 3.49, + "learning_rate": 8.420673114605052e-06, + "loss": 0.0697, + "step": 74900 + }, + { + "epoch": 3.5, + "learning_rate": 8.419889329550264e-06, + "loss": 0.0106, + "step": 74905 + }, + { + "epoch": 3.5, + "learning_rate": 8.419105544495478e-06, + "loss": 0.0514, + "step": 74910 + }, + { + "epoch": 3.5, + "learning_rate": 8.418321759440692e-06, + "loss": 0.0274, + "step": 74915 + }, + { + "epoch": 3.5, + "learning_rate": 8.417537974385906e-06, + "loss": 0.1058, + "step": 74920 + }, + { + "epoch": 3.5, + "learning_rate": 8.416754189331118e-06, + "loss": 0.0667, + "step": 74925 + }, + { + "epoch": 3.5, + "learning_rate": 8.415970404276332e-06, + "loss": 0.0528, + "step": 74930 + }, + { + "epoch": 3.5, + "learning_rate": 8.415186619221546e-06, + "loss": 0.1184, + "step": 74935 + }, + { + "epoch": 3.5, + "learning_rate": 8.414402834166758e-06, + "loss": 0.2604, + "step": 74940 + }, + { + "epoch": 3.5, + "learning_rate": 8.413619049111972e-06, + "loss": 0.3226, + "step": 74945 + }, + { + "epoch": 3.5, + "learning_rate": 8.412835264057186e-06, + "loss": 0.0827, + "step": 74950 + }, + { + "epoch": 3.5, + "learning_rate": 8.412051479002398e-06, + "loss": 0.0426, + "step": 74955 + }, + { + "epoch": 3.5, + "learning_rate": 8.411267693947612e-06, + "loss": 0.0343, + "step": 74960 + }, + { + "epoch": 3.5, + "learning_rate": 8.410483908892826e-06, + "loss": 0.1049, + "step": 74965 + }, + { + "epoch": 3.5, + "learning_rate": 8.40970012383804e-06, + "loss": 0.056, + "step": 74970 + }, + { + "epoch": 3.5, + "learning_rate": 8.408916338783252e-06, + "loss": 0.1389, + "step": 74975 + }, + { + "epoch": 3.5, + "learning_rate": 8.408132553728466e-06, + "loss": 0.0572, + "step": 74980 + }, + { + "epoch": 3.5, + "learning_rate": 8.40734876867368e-06, + "loss": 0.115, + "step": 74985 + }, + { + "epoch": 3.5, + "learning_rate": 8.406564983618894e-06, + "loss": 0.2138, + "step": 74990 + }, + { + "epoch": 3.5, + "learning_rate": 8.405781198564108e-06, + "loss": 0.3076, + "step": 74995 + }, + { + "epoch": 3.5, + "learning_rate": 8.40499741350932e-06, + "loss": 0.0509, + "step": 75000 + }, + { + "epoch": 3.5, + "learning_rate": 8.404213628454532e-06, + "loss": 0.0308, + "step": 75005 + }, + { + "epoch": 3.5, + "learning_rate": 8.403429843399746e-06, + "loss": 0.0843, + "step": 75010 + }, + { + "epoch": 3.5, + "learning_rate": 8.40264605834496e-06, + "loss": 0.1278, + "step": 75015 + }, + { + "epoch": 3.5, + "learning_rate": 8.401862273290174e-06, + "loss": 0.0298, + "step": 75020 + }, + { + "epoch": 3.5, + "learning_rate": 8.401078488235386e-06, + "loss": 0.0982, + "step": 75025 + }, + { + "epoch": 3.5, + "learning_rate": 8.4002947031806e-06, + "loss": 0.1339, + "step": 75030 + }, + { + "epoch": 3.5, + "learning_rate": 8.399510918125814e-06, + "loss": 0.1617, + "step": 75035 + }, + { + "epoch": 3.5, + "learning_rate": 8.398727133071028e-06, + "loss": 0.2432, + "step": 75040 + }, + { + "epoch": 3.5, + "learning_rate": 8.397943348016242e-06, + "loss": 0.2742, + "step": 75045 + }, + { + "epoch": 3.5, + "learning_rate": 8.397159562961454e-06, + "loss": 0.0797, + "step": 75050 + }, + { + "epoch": 3.5, + "learning_rate": 8.396375777906668e-06, + "loss": 0.048, + "step": 75055 + }, + { + "epoch": 3.5, + "learning_rate": 8.395591992851882e-06, + "loss": 0.035, + "step": 75060 + }, + { + "epoch": 3.5, + "learning_rate": 8.394808207797096e-06, + "loss": 0.0585, + "step": 75065 + }, + { + "epoch": 3.5, + "learning_rate": 8.394024422742308e-06, + "loss": 0.1197, + "step": 75070 + }, + { + "epoch": 3.5, + "learning_rate": 8.39324063768752e-06, + "loss": 0.0651, + "step": 75075 + }, + { + "epoch": 3.5, + "learning_rate": 8.392456852632734e-06, + "loss": 0.1699, + "step": 75080 + }, + { + "epoch": 3.5, + "learning_rate": 8.391673067577948e-06, + "loss": 0.1379, + "step": 75085 + }, + { + "epoch": 3.5, + "learning_rate": 8.390889282523162e-06, + "loss": 0.2202, + "step": 75090 + }, + { + "epoch": 3.5, + "learning_rate": 8.390105497468376e-06, + "loss": 0.3336, + "step": 75095 + }, + { + "epoch": 3.5, + "learning_rate": 8.389321712413588e-06, + "loss": 0.0507, + "step": 75100 + }, + { + "epoch": 3.5, + "learning_rate": 8.388537927358802e-06, + "loss": 0.0492, + "step": 75105 + }, + { + "epoch": 3.5, + "learning_rate": 8.387754142304016e-06, + "loss": 0.0252, + "step": 75110 + }, + { + "epoch": 3.5, + "learning_rate": 8.38697035724923e-06, + "loss": 0.1295, + "step": 75115 + }, + { + "epoch": 3.51, + "learning_rate": 8.386186572194442e-06, + "loss": 0.1012, + "step": 75120 + }, + { + "epoch": 3.51, + "learning_rate": 8.385402787139656e-06, + "loss": 0.1108, + "step": 75125 + }, + { + "epoch": 3.51, + "learning_rate": 8.38461900208487e-06, + "loss": 0.0637, + "step": 75130 + }, + { + "epoch": 3.51, + "learning_rate": 8.383835217030082e-06, + "loss": 0.1363, + "step": 75135 + }, + { + "epoch": 3.51, + "learning_rate": 8.383051431975296e-06, + "loss": 0.2128, + "step": 75140 + }, + { + "epoch": 3.51, + "learning_rate": 8.38226764692051e-06, + "loss": 0.2319, + "step": 75145 + }, + { + "epoch": 3.51, + "learning_rate": 8.381483861865722e-06, + "loss": 0.0976, + "step": 75150 + }, + { + "epoch": 3.51, + "learning_rate": 8.380700076810936e-06, + "loss": 0.0321, + "step": 75155 + }, + { + "epoch": 3.51, + "learning_rate": 8.37991629175615e-06, + "loss": 0.0293, + "step": 75160 + }, + { + "epoch": 3.51, + "learning_rate": 8.379132506701364e-06, + "loss": 0.0679, + "step": 75165 + }, + { + "epoch": 3.51, + "learning_rate": 8.378348721646576e-06, + "loss": 0.1282, + "step": 75170 + }, + { + "epoch": 3.51, + "learning_rate": 8.37756493659179e-06, + "loss": 0.0671, + "step": 75175 + }, + { + "epoch": 3.51, + "learning_rate": 8.376781151537004e-06, + "loss": 0.0583, + "step": 75180 + }, + { + "epoch": 3.51, + "learning_rate": 8.375997366482217e-06, + "loss": 0.1728, + "step": 75185 + }, + { + "epoch": 3.51, + "learning_rate": 8.37521358142743e-06, + "loss": 0.1375, + "step": 75190 + }, + { + "epoch": 3.51, + "learning_rate": 8.374429796372644e-06, + "loss": 0.1859, + "step": 75195 + }, + { + "epoch": 3.51, + "learning_rate": 8.373646011317856e-06, + "loss": 0.0792, + "step": 75200 + }, + { + "epoch": 3.51, + "learning_rate": 8.37286222626307e-06, + "loss": 0.0548, + "step": 75205 + }, + { + "epoch": 3.51, + "learning_rate": 8.372078441208284e-06, + "loss": 0.0164, + "step": 75210 + }, + { + "epoch": 3.51, + "learning_rate": 8.371294656153498e-06, + "loss": 0.0193, + "step": 75215 + }, + { + "epoch": 3.51, + "learning_rate": 8.37051087109871e-06, + "loss": 0.0914, + "step": 75220 + }, + { + "epoch": 3.51, + "learning_rate": 8.369727086043924e-06, + "loss": 0.1217, + "step": 75225 + }, + { + "epoch": 3.51, + "learning_rate": 8.368943300989138e-06, + "loss": 0.1028, + "step": 75230 + }, + { + "epoch": 3.51, + "learning_rate": 8.368159515934351e-06, + "loss": 0.1744, + "step": 75235 + }, + { + "epoch": 3.51, + "learning_rate": 8.367375730879564e-06, + "loss": 0.2296, + "step": 75240 + }, + { + "epoch": 3.51, + "learning_rate": 8.366591945824778e-06, + "loss": 0.2154, + "step": 75245 + }, + { + "epoch": 3.51, + "learning_rate": 8.365808160769991e-06, + "loss": 0.0799, + "step": 75250 + }, + { + "epoch": 3.51, + "learning_rate": 8.365024375715205e-06, + "loss": 0.0403, + "step": 75255 + }, + { + "epoch": 3.51, + "learning_rate": 8.36424059066042e-06, + "loss": 0.0285, + "step": 75260 + }, + { + "epoch": 3.51, + "learning_rate": 8.363456805605631e-06, + "loss": 0.0687, + "step": 75265 + }, + { + "epoch": 3.51, + "learning_rate": 8.362673020550844e-06, + "loss": 0.1119, + "step": 75270 + }, + { + "epoch": 3.51, + "learning_rate": 8.361889235496058e-06, + "loss": 0.0707, + "step": 75275 + }, + { + "epoch": 3.51, + "learning_rate": 8.361105450441272e-06, + "loss": 0.0419, + "step": 75280 + }, + { + "epoch": 3.51, + "learning_rate": 8.360321665386485e-06, + "loss": 0.1551, + "step": 75285 + }, + { + "epoch": 3.51, + "learning_rate": 8.359537880331698e-06, + "loss": 0.209, + "step": 75290 + }, + { + "epoch": 3.51, + "learning_rate": 8.358754095276912e-06, + "loss": 0.3307, + "step": 75295 + }, + { + "epoch": 3.51, + "learning_rate": 8.357970310222125e-06, + "loss": 0.0807, + "step": 75300 + }, + { + "epoch": 3.51, + "learning_rate": 8.35718652516734e-06, + "loss": 0.0273, + "step": 75305 + }, + { + "epoch": 3.51, + "learning_rate": 8.356402740112553e-06, + "loss": 0.0654, + "step": 75310 + }, + { + "epoch": 3.51, + "learning_rate": 8.355618955057765e-06, + "loss": 0.0535, + "step": 75315 + }, + { + "epoch": 3.51, + "learning_rate": 8.35483517000298e-06, + "loss": 0.0402, + "step": 75320 + }, + { + "epoch": 3.51, + "learning_rate": 8.354051384948193e-06, + "loss": 0.0842, + "step": 75325 + }, + { + "epoch": 3.52, + "learning_rate": 8.353267599893405e-06, + "loss": 0.1353, + "step": 75330 + }, + { + "epoch": 3.52, + "learning_rate": 8.35248381483862e-06, + "loss": 0.1386, + "step": 75335 + }, + { + "epoch": 3.52, + "learning_rate": 8.351700029783832e-06, + "loss": 0.1482, + "step": 75340 + }, + { + "epoch": 3.52, + "learning_rate": 8.350916244729046e-06, + "loss": 0.4411, + "step": 75345 + }, + { + "epoch": 3.52, + "learning_rate": 8.35013245967426e-06, + "loss": 0.0583, + "step": 75350 + }, + { + "epoch": 3.52, + "learning_rate": 8.349348674619473e-06, + "loss": 0.0396, + "step": 75355 + }, + { + "epoch": 3.52, + "learning_rate": 8.348564889564687e-06, + "loss": 0.02, + "step": 75360 + }, + { + "epoch": 3.52, + "learning_rate": 8.3477811045099e-06, + "loss": 0.0512, + "step": 75365 + }, + { + "epoch": 3.52, + "learning_rate": 8.346997319455113e-06, + "loss": 0.0338, + "step": 75370 + }, + { + "epoch": 3.52, + "learning_rate": 8.346213534400327e-06, + "loss": 0.0807, + "step": 75375 + }, + { + "epoch": 3.52, + "learning_rate": 8.345429749345541e-06, + "loss": 0.1244, + "step": 75380 + }, + { + "epoch": 3.52, + "learning_rate": 8.344645964290753e-06, + "loss": 0.1919, + "step": 75385 + }, + { + "epoch": 3.52, + "learning_rate": 8.343862179235967e-06, + "loss": 0.2456, + "step": 75390 + }, + { + "epoch": 3.52, + "learning_rate": 8.34307839418118e-06, + "loss": 0.3295, + "step": 75395 + }, + { + "epoch": 3.52, + "learning_rate": 8.342294609126393e-06, + "loss": 0.0937, + "step": 75400 + }, + { + "epoch": 3.52, + "learning_rate": 8.341510824071607e-06, + "loss": 0.0356, + "step": 75405 + }, + { + "epoch": 3.52, + "learning_rate": 8.340727039016821e-06, + "loss": 0.0717, + "step": 75410 + }, + { + "epoch": 3.52, + "learning_rate": 8.339943253962033e-06, + "loss": 0.0336, + "step": 75415 + }, + { + "epoch": 3.52, + "learning_rate": 8.339159468907247e-06, + "loss": 0.0833, + "step": 75420 + }, + { + "epoch": 3.52, + "learning_rate": 8.338375683852461e-06, + "loss": 0.1159, + "step": 75425 + }, + { + "epoch": 3.52, + "learning_rate": 8.337591898797675e-06, + "loss": 0.0786, + "step": 75430 + }, + { + "epoch": 3.52, + "learning_rate": 8.336808113742887e-06, + "loss": 0.1555, + "step": 75435 + }, + { + "epoch": 3.52, + "learning_rate": 8.336024328688101e-06, + "loss": 0.2435, + "step": 75440 + }, + { + "epoch": 3.52, + "learning_rate": 8.335240543633315e-06, + "loss": 0.2153, + "step": 75445 + }, + { + "epoch": 3.52, + "learning_rate": 8.334456758578529e-06, + "loss": 0.0385, + "step": 75450 + }, + { + "epoch": 3.52, + "learning_rate": 8.333672973523741e-06, + "loss": 0.0182, + "step": 75455 + }, + { + "epoch": 3.52, + "learning_rate": 8.332889188468955e-06, + "loss": 0.0516, + "step": 75460 + }, + { + "epoch": 3.52, + "learning_rate": 8.332105403414167e-06, + "loss": 0.0697, + "step": 75465 + }, + { + "epoch": 3.52, + "learning_rate": 8.331321618359381e-06, + "loss": 0.1061, + "step": 75470 + }, + { + "epoch": 3.52, + "learning_rate": 8.330537833304595e-06, + "loss": 0.1071, + "step": 75475 + }, + { + "epoch": 3.52, + "learning_rate": 8.329754048249809e-06, + "loss": 0.0414, + "step": 75480 + }, + { + "epoch": 3.52, + "learning_rate": 8.328970263195021e-06, + "loss": 0.1574, + "step": 75485 + }, + { + "epoch": 3.52, + "learning_rate": 8.328186478140235e-06, + "loss": 0.191, + "step": 75490 + }, + { + "epoch": 3.52, + "learning_rate": 8.327402693085449e-06, + "loss": 0.1444, + "step": 75495 + }, + { + "epoch": 3.52, + "learning_rate": 8.326618908030663e-06, + "loss": 0.0571, + "step": 75500 + }, + { + "epoch": 3.52, + "learning_rate": 8.325835122975875e-06, + "loss": 0.0229, + "step": 75505 + }, + { + "epoch": 3.52, + "learning_rate": 8.325051337921089e-06, + "loss": 0.063, + "step": 75510 + }, + { + "epoch": 3.52, + "learning_rate": 8.324267552866303e-06, + "loss": 0.0546, + "step": 75515 + }, + { + "epoch": 3.52, + "learning_rate": 8.323483767811517e-06, + "loss": 0.0901, + "step": 75520 + }, + { + "epoch": 3.52, + "learning_rate": 8.32269998275673e-06, + "loss": 0.0649, + "step": 75525 + }, + { + "epoch": 3.52, + "learning_rate": 8.321916197701943e-06, + "loss": 0.0851, + "step": 75530 + }, + { + "epoch": 3.52, + "learning_rate": 8.321132412647155e-06, + "loss": 0.1331, + "step": 75535 + }, + { + "epoch": 3.52, + "learning_rate": 8.32034862759237e-06, + "loss": 0.2535, + "step": 75540 + }, + { + "epoch": 3.53, + "learning_rate": 8.319564842537583e-06, + "loss": 0.2529, + "step": 75545 + }, + { + "epoch": 3.53, + "learning_rate": 8.318781057482797e-06, + "loss": 0.0799, + "step": 75550 + }, + { + "epoch": 3.53, + "learning_rate": 8.31799727242801e-06, + "loss": 0.0059, + "step": 75555 + }, + { + "epoch": 3.53, + "learning_rate": 8.317213487373223e-06, + "loss": 0.0637, + "step": 75560 + }, + { + "epoch": 3.53, + "learning_rate": 8.316429702318437e-06, + "loss": 0.03, + "step": 75565 + }, + { + "epoch": 3.53, + "learning_rate": 8.315645917263651e-06, + "loss": 0.081, + "step": 75570 + }, + { + "epoch": 3.53, + "learning_rate": 8.314862132208865e-06, + "loss": 0.1242, + "step": 75575 + }, + { + "epoch": 3.53, + "learning_rate": 8.314078347154077e-06, + "loss": 0.0443, + "step": 75580 + }, + { + "epoch": 3.53, + "learning_rate": 8.313294562099291e-06, + "loss": 0.1409, + "step": 75585 + }, + { + "epoch": 3.53, + "learning_rate": 8.312510777044503e-06, + "loss": 0.0827, + "step": 75590 + }, + { + "epoch": 3.53, + "learning_rate": 8.311726991989717e-06, + "loss": 0.2518, + "step": 75595 + }, + { + "epoch": 3.53, + "learning_rate": 8.310943206934931e-06, + "loss": 0.0828, + "step": 75600 + }, + { + "epoch": 3.53, + "learning_rate": 8.310159421880143e-06, + "loss": 0.015, + "step": 75605 + }, + { + "epoch": 3.53, + "learning_rate": 8.309375636825357e-06, + "loss": 0.0221, + "step": 75610 + }, + { + "epoch": 3.53, + "learning_rate": 8.308591851770571e-06, + "loss": 0.0275, + "step": 75615 + }, + { + "epoch": 3.53, + "learning_rate": 8.307808066715785e-06, + "loss": 0.0394, + "step": 75620 + }, + { + "epoch": 3.53, + "learning_rate": 8.307024281660999e-06, + "loss": 0.0226, + "step": 75625 + }, + { + "epoch": 3.53, + "learning_rate": 8.306240496606211e-06, + "loss": 0.0955, + "step": 75630 + }, + { + "epoch": 3.53, + "learning_rate": 8.305456711551425e-06, + "loss": 0.1029, + "step": 75635 + }, + { + "epoch": 3.53, + "learning_rate": 8.304672926496639e-06, + "loss": 0.2034, + "step": 75640 + }, + { + "epoch": 3.53, + "learning_rate": 8.303889141441853e-06, + "loss": 0.3854, + "step": 75645 + }, + { + "epoch": 3.53, + "learning_rate": 8.303105356387065e-06, + "loss": 0.0793, + "step": 75650 + }, + { + "epoch": 3.53, + "learning_rate": 8.302321571332277e-06, + "loss": 0.0189, + "step": 75655 + }, + { + "epoch": 3.53, + "learning_rate": 8.301537786277491e-06, + "loss": 0.0376, + "step": 75660 + }, + { + "epoch": 3.53, + "learning_rate": 8.300754001222705e-06, + "loss": 0.0251, + "step": 75665 + }, + { + "epoch": 3.53, + "learning_rate": 8.299970216167919e-06, + "loss": 0.068, + "step": 75670 + }, + { + "epoch": 3.53, + "learning_rate": 8.299186431113133e-06, + "loss": 0.1966, + "step": 75675 + }, + { + "epoch": 3.53, + "learning_rate": 8.298402646058345e-06, + "loss": 0.0765, + "step": 75680 + }, + { + "epoch": 3.53, + "learning_rate": 8.297618861003559e-06, + "loss": 0.0766, + "step": 75685 + }, + { + "epoch": 3.53, + "learning_rate": 8.296835075948773e-06, + "loss": 0.2139, + "step": 75690 + }, + { + "epoch": 3.53, + "learning_rate": 8.296051290893987e-06, + "loss": 0.2011, + "step": 75695 + }, + { + "epoch": 3.53, + "learning_rate": 8.295267505839199e-06, + "loss": 0.0308, + "step": 75700 + }, + { + "epoch": 3.53, + "learning_rate": 8.294483720784413e-06, + "loss": 0.0239, + "step": 75705 + }, + { + "epoch": 3.53, + "learning_rate": 8.293699935729627e-06, + "loss": 0.0442, + "step": 75710 + }, + { + "epoch": 3.53, + "learning_rate": 8.29291615067484e-06, + "loss": 0.1048, + "step": 75715 + }, + { + "epoch": 3.53, + "learning_rate": 8.292132365620053e-06, + "loss": 0.1053, + "step": 75720 + }, + { + "epoch": 3.53, + "learning_rate": 8.291348580565267e-06, + "loss": 0.0838, + "step": 75725 + }, + { + "epoch": 3.53, + "learning_rate": 8.290564795510479e-06, + "loss": 0.1122, + "step": 75730 + }, + { + "epoch": 3.53, + "learning_rate": 8.289781010455693e-06, + "loss": 0.0573, + "step": 75735 + }, + { + "epoch": 3.53, + "learning_rate": 8.288997225400907e-06, + "loss": 0.286, + "step": 75740 + }, + { + "epoch": 3.53, + "learning_rate": 8.28821344034612e-06, + "loss": 0.3212, + "step": 75745 + }, + { + "epoch": 3.53, + "learning_rate": 8.287429655291333e-06, + "loss": 0.0789, + "step": 75750 + }, + { + "epoch": 3.53, + "learning_rate": 8.286645870236547e-06, + "loss": 0.0362, + "step": 75755 + }, + { + "epoch": 3.54, + "learning_rate": 8.28586208518176e-06, + "loss": 0.0528, + "step": 75760 + }, + { + "epoch": 3.54, + "learning_rate": 8.285078300126975e-06, + "loss": 0.0774, + "step": 75765 + }, + { + "epoch": 3.54, + "learning_rate": 8.284294515072187e-06, + "loss": 0.037, + "step": 75770 + }, + { + "epoch": 3.54, + "learning_rate": 8.2835107300174e-06, + "loss": 0.0995, + "step": 75775 + }, + { + "epoch": 3.54, + "learning_rate": 8.282726944962615e-06, + "loss": 0.0897, + "step": 75780 + }, + { + "epoch": 3.54, + "learning_rate": 8.281943159907827e-06, + "loss": 0.0795, + "step": 75785 + }, + { + "epoch": 3.54, + "learning_rate": 8.28115937485304e-06, + "loss": 0.1816, + "step": 75790 + }, + { + "epoch": 3.54, + "learning_rate": 8.280375589798255e-06, + "loss": 0.2481, + "step": 75795 + }, + { + "epoch": 3.54, + "learning_rate": 8.279591804743467e-06, + "loss": 0.0503, + "step": 75800 + }, + { + "epoch": 3.54, + "learning_rate": 8.27880801968868e-06, + "loss": 0.0122, + "step": 75805 + }, + { + "epoch": 3.54, + "learning_rate": 8.278024234633895e-06, + "loss": 0.0234, + "step": 75810 + }, + { + "epoch": 3.54, + "learning_rate": 8.277240449579109e-06, + "loss": 0.0445, + "step": 75815 + }, + { + "epoch": 3.54, + "learning_rate": 8.27645666452432e-06, + "loss": 0.059, + "step": 75820 + }, + { + "epoch": 3.54, + "learning_rate": 8.275672879469535e-06, + "loss": 0.0987, + "step": 75825 + }, + { + "epoch": 3.54, + "learning_rate": 8.274889094414749e-06, + "loss": 0.1198, + "step": 75830 + }, + { + "epoch": 3.54, + "learning_rate": 8.274105309359963e-06, + "loss": 0.1624, + "step": 75835 + }, + { + "epoch": 3.54, + "learning_rate": 8.273321524305176e-06, + "loss": 0.1389, + "step": 75840 + }, + { + "epoch": 3.54, + "learning_rate": 8.272537739250389e-06, + "loss": 0.2726, + "step": 75845 + }, + { + "epoch": 3.54, + "learning_rate": 8.271753954195601e-06, + "loss": 0.0476, + "step": 75850 + }, + { + "epoch": 3.54, + "learning_rate": 8.270970169140815e-06, + "loss": 0.0124, + "step": 75855 + }, + { + "epoch": 3.54, + "learning_rate": 8.270186384086029e-06, + "loss": 0.0568, + "step": 75860 + }, + { + "epoch": 3.54, + "learning_rate": 8.269402599031243e-06, + "loss": 0.0667, + "step": 75865 + }, + { + "epoch": 3.54, + "learning_rate": 8.268618813976455e-06, + "loss": 0.1202, + "step": 75870 + }, + { + "epoch": 3.54, + "learning_rate": 8.267835028921669e-06, + "loss": 0.0241, + "step": 75875 + }, + { + "epoch": 3.54, + "learning_rate": 8.267051243866883e-06, + "loss": 0.1229, + "step": 75880 + }, + { + "epoch": 3.54, + "learning_rate": 8.266267458812097e-06, + "loss": 0.0868, + "step": 75885 + }, + { + "epoch": 3.54, + "learning_rate": 8.26548367375731e-06, + "loss": 0.201, + "step": 75890 + }, + { + "epoch": 3.54, + "learning_rate": 8.264699888702523e-06, + "loss": 0.2908, + "step": 75895 + }, + { + "epoch": 3.54, + "learning_rate": 8.263916103647737e-06, + "loss": 0.0415, + "step": 75900 + }, + { + "epoch": 3.54, + "learning_rate": 8.26313231859295e-06, + "loss": 0.0291, + "step": 75905 + }, + { + "epoch": 3.54, + "learning_rate": 8.262348533538164e-06, + "loss": 0.012, + "step": 75910 + }, + { + "epoch": 3.54, + "learning_rate": 8.261564748483377e-06, + "loss": 0.0537, + "step": 75915 + }, + { + "epoch": 3.54, + "learning_rate": 8.260780963428589e-06, + "loss": 0.078, + "step": 75920 + }, + { + "epoch": 3.54, + "learning_rate": 8.259997178373803e-06, + "loss": 0.162, + "step": 75925 + }, + { + "epoch": 3.54, + "learning_rate": 8.259213393319017e-06, + "loss": 0.1439, + "step": 75930 + }, + { + "epoch": 3.54, + "learning_rate": 8.25842960826423e-06, + "loss": 0.09, + "step": 75935 + }, + { + "epoch": 3.54, + "learning_rate": 8.257645823209444e-06, + "loss": 0.1553, + "step": 75940 + }, + { + "epoch": 3.54, + "learning_rate": 8.256862038154657e-06, + "loss": 0.2545, + "step": 75945 + }, + { + "epoch": 3.54, + "learning_rate": 8.25607825309987e-06, + "loss": 0.0797, + "step": 75950 + }, + { + "epoch": 3.54, + "learning_rate": 8.255294468045084e-06, + "loss": 0.0807, + "step": 75955 + }, + { + "epoch": 3.54, + "learning_rate": 8.254510682990298e-06, + "loss": 0.0415, + "step": 75960 + }, + { + "epoch": 3.54, + "learning_rate": 8.25372689793551e-06, + "loss": 0.0477, + "step": 75965 + }, + { + "epoch": 3.54, + "learning_rate": 8.252943112880724e-06, + "loss": 0.1314, + "step": 75970 + }, + { + "epoch": 3.55, + "learning_rate": 8.252159327825938e-06, + "loss": 0.0979, + "step": 75975 + }, + { + "epoch": 3.55, + "learning_rate": 8.25137554277115e-06, + "loss": 0.0582, + "step": 75980 + }, + { + "epoch": 3.55, + "learning_rate": 8.250591757716364e-06, + "loss": 0.0899, + "step": 75985 + }, + { + "epoch": 3.55, + "learning_rate": 8.249807972661578e-06, + "loss": 0.2765, + "step": 75990 + }, + { + "epoch": 3.55, + "learning_rate": 8.24902418760679e-06, + "loss": 0.2902, + "step": 75995 + }, + { + "epoch": 3.55, + "learning_rate": 8.248240402552004e-06, + "loss": 0.1004, + "step": 76000 + }, + { + "epoch": 3.55, + "learning_rate": 8.247456617497218e-06, + "loss": 0.0277, + "step": 76005 + }, + { + "epoch": 3.55, + "learning_rate": 8.246672832442432e-06, + "loss": 0.0526, + "step": 76010 + }, + { + "epoch": 3.55, + "learning_rate": 8.245889047387645e-06, + "loss": 0.0395, + "step": 76015 + }, + { + "epoch": 3.55, + "learning_rate": 8.245105262332858e-06, + "loss": 0.0749, + "step": 76020 + }, + { + "epoch": 3.55, + "learning_rate": 8.244321477278072e-06, + "loss": 0.0923, + "step": 76025 + }, + { + "epoch": 3.55, + "learning_rate": 8.243537692223286e-06, + "loss": 0.0788, + "step": 76030 + }, + { + "epoch": 3.55, + "learning_rate": 8.242753907168498e-06, + "loss": 0.1192, + "step": 76035 + }, + { + "epoch": 3.55, + "learning_rate": 8.241970122113712e-06, + "loss": 0.1008, + "step": 76040 + }, + { + "epoch": 3.55, + "learning_rate": 8.241186337058925e-06, + "loss": 0.4365, + "step": 76045 + }, + { + "epoch": 3.55, + "learning_rate": 8.240402552004138e-06, + "loss": 0.0791, + "step": 76050 + }, + { + "epoch": 3.55, + "learning_rate": 8.239618766949352e-06, + "loss": 0.0438, + "step": 76055 + }, + { + "epoch": 3.55, + "learning_rate": 8.238834981894566e-06, + "loss": 0.0265, + "step": 76060 + }, + { + "epoch": 3.55, + "learning_rate": 8.238051196839778e-06, + "loss": 0.0122, + "step": 76065 + }, + { + "epoch": 3.55, + "learning_rate": 8.237267411784992e-06, + "loss": 0.0748, + "step": 76070 + }, + { + "epoch": 3.55, + "learning_rate": 8.236483626730206e-06, + "loss": 0.1352, + "step": 76075 + }, + { + "epoch": 3.55, + "learning_rate": 8.23569984167542e-06, + "loss": 0.1491, + "step": 76080 + }, + { + "epoch": 3.55, + "learning_rate": 8.234916056620632e-06, + "loss": 0.1648, + "step": 76085 + }, + { + "epoch": 3.55, + "learning_rate": 8.234132271565846e-06, + "loss": 0.1639, + "step": 76090 + }, + { + "epoch": 3.55, + "learning_rate": 8.23334848651106e-06, + "loss": 0.1581, + "step": 76095 + }, + { + "epoch": 3.55, + "learning_rate": 8.232564701456274e-06, + "loss": 0.1422, + "step": 76100 + }, + { + "epoch": 3.55, + "learning_rate": 8.231780916401488e-06, + "loss": 0.0529, + "step": 76105 + }, + { + "epoch": 3.55, + "learning_rate": 8.2309971313467e-06, + "loss": 0.0227, + "step": 76110 + }, + { + "epoch": 3.55, + "learning_rate": 8.230213346291912e-06, + "loss": 0.0177, + "step": 76115 + }, + { + "epoch": 3.55, + "learning_rate": 8.229429561237126e-06, + "loss": 0.0846, + "step": 76120 + }, + { + "epoch": 3.55, + "learning_rate": 8.22864577618234e-06, + "loss": 0.0638, + "step": 76125 + }, + { + "epoch": 3.55, + "learning_rate": 8.227861991127554e-06, + "loss": 0.1441, + "step": 76130 + }, + { + "epoch": 3.55, + "learning_rate": 8.227078206072766e-06, + "loss": 0.1508, + "step": 76135 + }, + { + "epoch": 3.55, + "learning_rate": 8.22629442101798e-06, + "loss": 0.2952, + "step": 76140 + }, + { + "epoch": 3.55, + "learning_rate": 8.225510635963194e-06, + "loss": 0.268, + "step": 76145 + }, + { + "epoch": 3.55, + "learning_rate": 8.224726850908408e-06, + "loss": 0.0742, + "step": 76150 + }, + { + "epoch": 3.55, + "learning_rate": 8.223943065853622e-06, + "loss": 0.034, + "step": 76155 + }, + { + "epoch": 3.55, + "learning_rate": 8.223159280798834e-06, + "loss": 0.0313, + "step": 76160 + }, + { + "epoch": 3.55, + "learning_rate": 8.222375495744048e-06, + "loss": 0.052, + "step": 76165 + }, + { + "epoch": 3.55, + "learning_rate": 8.221591710689262e-06, + "loss": 0.0843, + "step": 76170 + }, + { + "epoch": 3.55, + "learning_rate": 8.220807925634474e-06, + "loss": 0.0998, + "step": 76175 + }, + { + "epoch": 3.55, + "learning_rate": 8.220024140579688e-06, + "loss": 0.1013, + "step": 76180 + }, + { + "epoch": 3.55, + "learning_rate": 8.2192403555249e-06, + "loss": 0.1085, + "step": 76185 + }, + { + "epoch": 3.56, + "learning_rate": 8.218456570470114e-06, + "loss": 0.1796, + "step": 76190 + }, + { + "epoch": 3.56, + "learning_rate": 8.217672785415328e-06, + "loss": 0.293, + "step": 76195 + }, + { + "epoch": 3.56, + "learning_rate": 8.216889000360542e-06, + "loss": 0.0851, + "step": 76200 + }, + { + "epoch": 3.56, + "learning_rate": 8.216105215305756e-06, + "loss": 0.0458, + "step": 76205 + }, + { + "epoch": 3.56, + "learning_rate": 8.215321430250968e-06, + "loss": 0.043, + "step": 76210 + }, + { + "epoch": 3.56, + "learning_rate": 8.214537645196182e-06, + "loss": 0.0795, + "step": 76215 + }, + { + "epoch": 3.56, + "learning_rate": 8.213753860141396e-06, + "loss": 0.0633, + "step": 76220 + }, + { + "epoch": 3.56, + "learning_rate": 8.21297007508661e-06, + "loss": 0.0637, + "step": 76225 + }, + { + "epoch": 3.56, + "learning_rate": 8.212186290031822e-06, + "loss": 0.1009, + "step": 76230 + }, + { + "epoch": 3.56, + "learning_rate": 8.211402504977036e-06, + "loss": 0.1289, + "step": 76235 + }, + { + "epoch": 3.56, + "learning_rate": 8.210618719922248e-06, + "loss": 0.1908, + "step": 76240 + }, + { + "epoch": 3.56, + "learning_rate": 8.209834934867462e-06, + "loss": 0.2687, + "step": 76245 + }, + { + "epoch": 3.56, + "learning_rate": 8.209051149812676e-06, + "loss": 0.092, + "step": 76250 + }, + { + "epoch": 3.56, + "learning_rate": 8.20826736475789e-06, + "loss": 0.057, + "step": 76255 + }, + { + "epoch": 3.56, + "learning_rate": 8.207483579703102e-06, + "loss": 0.0251, + "step": 76260 + }, + { + "epoch": 3.56, + "learning_rate": 8.206699794648316e-06, + "loss": 0.0866, + "step": 76265 + }, + { + "epoch": 3.56, + "learning_rate": 8.20591600959353e-06, + "loss": 0.1353, + "step": 76270 + }, + { + "epoch": 3.56, + "learning_rate": 8.205132224538744e-06, + "loss": 0.0878, + "step": 76275 + }, + { + "epoch": 3.56, + "learning_rate": 8.204348439483956e-06, + "loss": 0.0853, + "step": 76280 + }, + { + "epoch": 3.56, + "learning_rate": 8.20356465442917e-06, + "loss": 0.0852, + "step": 76285 + }, + { + "epoch": 3.56, + "learning_rate": 8.202780869374384e-06, + "loss": 0.1853, + "step": 76290 + }, + { + "epoch": 3.56, + "learning_rate": 8.201997084319598e-06, + "loss": 0.2395, + "step": 76295 + }, + { + "epoch": 3.56, + "learning_rate": 8.20121329926481e-06, + "loss": 0.0643, + "step": 76300 + }, + { + "epoch": 3.56, + "learning_rate": 8.200429514210024e-06, + "loss": 0.044, + "step": 76305 + }, + { + "epoch": 3.56, + "learning_rate": 8.199645729155236e-06, + "loss": 0.0486, + "step": 76310 + }, + { + "epoch": 3.56, + "learning_rate": 8.19886194410045e-06, + "loss": 0.0564, + "step": 76315 + }, + { + "epoch": 3.56, + "learning_rate": 8.198078159045664e-06, + "loss": 0.0806, + "step": 76320 + }, + { + "epoch": 3.56, + "learning_rate": 8.197294373990878e-06, + "loss": 0.0805, + "step": 76325 + }, + { + "epoch": 3.56, + "learning_rate": 8.19651058893609e-06, + "loss": 0.1667, + "step": 76330 + }, + { + "epoch": 3.56, + "learning_rate": 8.195726803881304e-06, + "loss": 0.2686, + "step": 76335 + }, + { + "epoch": 3.56, + "learning_rate": 8.194943018826518e-06, + "loss": 0.2024, + "step": 76340 + }, + { + "epoch": 3.56, + "learning_rate": 8.194159233771732e-06, + "loss": 0.2692, + "step": 76345 + }, + { + "epoch": 3.56, + "learning_rate": 8.193375448716944e-06, + "loss": 0.0956, + "step": 76350 + }, + { + "epoch": 3.56, + "learning_rate": 8.192591663662158e-06, + "loss": 0.0181, + "step": 76355 + }, + { + "epoch": 3.56, + "learning_rate": 8.191807878607372e-06, + "loss": 0.0212, + "step": 76360 + }, + { + "epoch": 3.56, + "learning_rate": 8.191024093552586e-06, + "loss": 0.1716, + "step": 76365 + }, + { + "epoch": 3.56, + "learning_rate": 8.190240308497798e-06, + "loss": 0.1032, + "step": 76370 + }, + { + "epoch": 3.56, + "learning_rate": 8.189456523443012e-06, + "loss": 0.0507, + "step": 76375 + }, + { + "epoch": 3.56, + "learning_rate": 8.188672738388224e-06, + "loss": 0.1214, + "step": 76380 + }, + { + "epoch": 3.56, + "learning_rate": 8.187888953333438e-06, + "loss": 0.1668, + "step": 76385 + }, + { + "epoch": 3.56, + "learning_rate": 8.187105168278652e-06, + "loss": 0.2541, + "step": 76390 + }, + { + "epoch": 3.56, + "learning_rate": 8.186321383223866e-06, + "loss": 0.2692, + "step": 76395 + }, + { + "epoch": 3.56, + "learning_rate": 8.185537598169078e-06, + "loss": 0.0889, + "step": 76400 + }, + { + "epoch": 3.57, + "learning_rate": 8.184753813114292e-06, + "loss": 0.0115, + "step": 76405 + }, + { + "epoch": 3.57, + "learning_rate": 8.183970028059506e-06, + "loss": 0.0236, + "step": 76410 + }, + { + "epoch": 3.57, + "learning_rate": 8.18318624300472e-06, + "loss": 0.022, + "step": 76415 + }, + { + "epoch": 3.57, + "learning_rate": 8.182402457949934e-06, + "loss": 0.1446, + "step": 76420 + }, + { + "epoch": 3.57, + "learning_rate": 8.181618672895146e-06, + "loss": 0.0501, + "step": 76425 + }, + { + "epoch": 3.57, + "learning_rate": 8.18083488784036e-06, + "loss": 0.1078, + "step": 76430 + }, + { + "epoch": 3.57, + "learning_rate": 8.180051102785572e-06, + "loss": 0.1363, + "step": 76435 + }, + { + "epoch": 3.57, + "learning_rate": 8.179267317730786e-06, + "loss": 0.1027, + "step": 76440 + }, + { + "epoch": 3.57, + "learning_rate": 8.178483532676e-06, + "loss": 0.3394, + "step": 76445 + }, + { + "epoch": 3.57, + "learning_rate": 8.177699747621212e-06, + "loss": 0.0626, + "step": 76450 + }, + { + "epoch": 3.57, + "learning_rate": 8.176915962566426e-06, + "loss": 0.0337, + "step": 76455 + }, + { + "epoch": 3.57, + "learning_rate": 8.17613217751164e-06, + "loss": 0.0466, + "step": 76460 + }, + { + "epoch": 3.57, + "learning_rate": 8.175348392456854e-06, + "loss": 0.0782, + "step": 76465 + }, + { + "epoch": 3.57, + "learning_rate": 8.174564607402068e-06, + "loss": 0.0538, + "step": 76470 + }, + { + "epoch": 3.57, + "learning_rate": 8.17378082234728e-06, + "loss": 0.2283, + "step": 76475 + }, + { + "epoch": 3.57, + "learning_rate": 8.172997037292494e-06, + "loss": 0.1222, + "step": 76480 + }, + { + "epoch": 3.57, + "learning_rate": 8.172213252237708e-06, + "loss": 0.1388, + "step": 76485 + }, + { + "epoch": 3.57, + "learning_rate": 8.171429467182922e-06, + "loss": 0.2528, + "step": 76490 + }, + { + "epoch": 3.57, + "learning_rate": 8.170645682128134e-06, + "loss": 0.2953, + "step": 76495 + }, + { + "epoch": 3.57, + "learning_rate": 8.169861897073346e-06, + "loss": 0.1195, + "step": 76500 + }, + { + "epoch": 3.57, + "learning_rate": 8.16907811201856e-06, + "loss": 0.029, + "step": 76505 + }, + { + "epoch": 3.57, + "learning_rate": 8.168294326963774e-06, + "loss": 0.0202, + "step": 76510 + }, + { + "epoch": 3.57, + "learning_rate": 8.167510541908988e-06, + "loss": 0.0609, + "step": 76515 + }, + { + "epoch": 3.57, + "learning_rate": 8.166726756854202e-06, + "loss": 0.0825, + "step": 76520 + }, + { + "epoch": 3.57, + "learning_rate": 8.165942971799414e-06, + "loss": 0.0671, + "step": 76525 + }, + { + "epoch": 3.57, + "learning_rate": 8.165159186744628e-06, + "loss": 0.1079, + "step": 76530 + }, + { + "epoch": 3.57, + "learning_rate": 8.164375401689842e-06, + "loss": 0.1552, + "step": 76535 + }, + { + "epoch": 3.57, + "learning_rate": 8.163591616635055e-06, + "loss": 0.2753, + "step": 76540 + }, + { + "epoch": 3.57, + "learning_rate": 8.162807831580268e-06, + "loss": 0.2377, + "step": 76545 + }, + { + "epoch": 3.57, + "learning_rate": 8.162024046525482e-06, + "loss": 0.0422, + "step": 76550 + }, + { + "epoch": 3.57, + "learning_rate": 8.161240261470696e-06, + "loss": 0.0695, + "step": 76555 + }, + { + "epoch": 3.57, + "learning_rate": 8.16045647641591e-06, + "loss": 0.0457, + "step": 76560 + }, + { + "epoch": 3.57, + "learning_rate": 8.159672691361122e-06, + "loss": 0.0343, + "step": 76565 + }, + { + "epoch": 3.57, + "learning_rate": 8.158888906306336e-06, + "loss": 0.0607, + "step": 76570 + }, + { + "epoch": 3.57, + "learning_rate": 8.158105121251548e-06, + "loss": 0.0968, + "step": 76575 + }, + { + "epoch": 3.57, + "learning_rate": 8.157321336196762e-06, + "loss": 0.0807, + "step": 76580 + }, + { + "epoch": 3.57, + "learning_rate": 8.156537551141976e-06, + "loss": 0.1224, + "step": 76585 + }, + { + "epoch": 3.57, + "learning_rate": 8.15575376608719e-06, + "loss": 0.0768, + "step": 76590 + }, + { + "epoch": 3.57, + "learning_rate": 8.154969981032402e-06, + "loss": 0.3868, + "step": 76595 + }, + { + "epoch": 3.57, + "learning_rate": 8.154186195977616e-06, + "loss": 0.0574, + "step": 76600 + }, + { + "epoch": 3.57, + "learning_rate": 8.15340241092283e-06, + "loss": 0.0198, + "step": 76605 + }, + { + "epoch": 3.57, + "learning_rate": 8.152618625868043e-06, + "loss": 0.0272, + "step": 76610 + }, + { + "epoch": 3.57, + "learning_rate": 8.151834840813256e-06, + "loss": 0.1063, + "step": 76615 + }, + { + "epoch": 3.58, + "learning_rate": 8.15105105575847e-06, + "loss": 0.0647, + "step": 76620 + }, + { + "epoch": 3.58, + "learning_rate": 8.150267270703683e-06, + "loss": 0.0933, + "step": 76625 + }, + { + "epoch": 3.58, + "learning_rate": 8.149483485648896e-06, + "loss": 0.1446, + "step": 76630 + }, + { + "epoch": 3.58, + "learning_rate": 8.14869970059411e-06, + "loss": 0.1358, + "step": 76635 + }, + { + "epoch": 3.58, + "learning_rate": 8.147915915539323e-06, + "loss": 0.2393, + "step": 76640 + }, + { + "epoch": 3.58, + "learning_rate": 8.147132130484536e-06, + "loss": 0.3171, + "step": 76645 + }, + { + "epoch": 3.58, + "learning_rate": 8.14634834542975e-06, + "loss": 0.1299, + "step": 76650 + }, + { + "epoch": 3.58, + "learning_rate": 8.145564560374963e-06, + "loss": 0.0429, + "step": 76655 + }, + { + "epoch": 3.58, + "learning_rate": 8.144780775320177e-06, + "loss": 0.0264, + "step": 76660 + }, + { + "epoch": 3.58, + "learning_rate": 8.14399699026539e-06, + "loss": 0.0446, + "step": 76665 + }, + { + "epoch": 3.58, + "learning_rate": 8.143213205210603e-06, + "loss": 0.1154, + "step": 76670 + }, + { + "epoch": 3.58, + "learning_rate": 8.142429420155817e-06, + "loss": 0.105, + "step": 76675 + }, + { + "epoch": 3.58, + "learning_rate": 8.141645635101031e-06, + "loss": 0.0613, + "step": 76680 + }, + { + "epoch": 3.58, + "learning_rate": 8.140861850046245e-06, + "loss": 0.2053, + "step": 76685 + }, + { + "epoch": 3.58, + "learning_rate": 8.140078064991457e-06, + "loss": 0.1957, + "step": 76690 + }, + { + "epoch": 3.58, + "learning_rate": 8.13929427993667e-06, + "loss": 0.2316, + "step": 76695 + }, + { + "epoch": 3.58, + "learning_rate": 8.138510494881884e-06, + "loss": 0.0859, + "step": 76700 + }, + { + "epoch": 3.58, + "learning_rate": 8.137726709827097e-06, + "loss": 0.0217, + "step": 76705 + }, + { + "epoch": 3.58, + "learning_rate": 8.136942924772311e-06, + "loss": 0.0319, + "step": 76710 + }, + { + "epoch": 3.58, + "learning_rate": 8.136159139717524e-06, + "loss": 0.0411, + "step": 76715 + }, + { + "epoch": 3.58, + "learning_rate": 8.135375354662737e-06, + "loss": 0.0563, + "step": 76720 + }, + { + "epoch": 3.58, + "learning_rate": 8.134591569607951e-06, + "loss": 0.1346, + "step": 76725 + }, + { + "epoch": 3.58, + "learning_rate": 8.133807784553165e-06, + "loss": 0.1397, + "step": 76730 + }, + { + "epoch": 3.58, + "learning_rate": 8.13302399949838e-06, + "loss": 0.0917, + "step": 76735 + }, + { + "epoch": 3.58, + "learning_rate": 8.13239697145455e-06, + "loss": 0.2303, + "step": 76740 + }, + { + "epoch": 3.58, + "learning_rate": 8.131613186399764e-06, + "loss": 0.6665, + "step": 76745 + }, + { + "epoch": 3.58, + "learning_rate": 8.130829401344976e-06, + "loss": 0.0425, + "step": 76750 + }, + { + "epoch": 3.58, + "learning_rate": 8.13004561629019e-06, + "loss": 0.0298, + "step": 76755 + }, + { + "epoch": 3.58, + "learning_rate": 8.129261831235402e-06, + "loss": 0.0255, + "step": 76760 + }, + { + "epoch": 3.58, + "learning_rate": 8.128478046180616e-06, + "loss": 0.0327, + "step": 76765 + }, + { + "epoch": 3.58, + "learning_rate": 8.12769426112583e-06, + "loss": 0.075, + "step": 76770 + }, + { + "epoch": 3.58, + "learning_rate": 8.126910476071042e-06, + "loss": 0.0938, + "step": 76775 + }, + { + "epoch": 3.58, + "learning_rate": 8.126126691016256e-06, + "loss": 0.1333, + "step": 76780 + }, + { + "epoch": 3.58, + "learning_rate": 8.12534290596147e-06, + "loss": 0.1166, + "step": 76785 + }, + { + "epoch": 3.58, + "learning_rate": 8.124559120906684e-06, + "loss": 0.1987, + "step": 76790 + }, + { + "epoch": 3.58, + "learning_rate": 8.123775335851897e-06, + "loss": 0.3264, + "step": 76795 + }, + { + "epoch": 3.58, + "learning_rate": 8.12299155079711e-06, + "loss": 0.096, + "step": 76800 + }, + { + "epoch": 3.58, + "learning_rate": 8.122207765742324e-06, + "loss": 0.034, + "step": 76805 + }, + { + "epoch": 3.58, + "learning_rate": 8.121423980687538e-06, + "loss": 0.0516, + "step": 76810 + }, + { + "epoch": 3.58, + "learning_rate": 8.120640195632751e-06, + "loss": 0.0528, + "step": 76815 + }, + { + "epoch": 3.58, + "learning_rate": 8.119856410577964e-06, + "loss": 0.0884, + "step": 76820 + }, + { + "epoch": 3.58, + "learning_rate": 8.119072625523176e-06, + "loss": 0.0855, + "step": 76825 + }, + { + "epoch": 3.58, + "learning_rate": 8.11828884046839e-06, + "loss": 0.0948, + "step": 76830 + }, + { + "epoch": 3.59, + "learning_rate": 8.117505055413604e-06, + "loss": 0.0889, + "step": 76835 + }, + { + "epoch": 3.59, + "learning_rate": 8.116721270358818e-06, + "loss": 0.2022, + "step": 76840 + }, + { + "epoch": 3.59, + "learning_rate": 8.115937485304031e-06, + "loss": 0.2287, + "step": 76845 + }, + { + "epoch": 3.59, + "learning_rate": 8.115153700249244e-06, + "loss": 0.0573, + "step": 76850 + }, + { + "epoch": 3.59, + "learning_rate": 8.114369915194458e-06, + "loss": 0.0238, + "step": 76855 + }, + { + "epoch": 3.59, + "learning_rate": 8.113586130139671e-06, + "loss": 0.0125, + "step": 76860 + }, + { + "epoch": 3.59, + "learning_rate": 8.112802345084885e-06, + "loss": 0.1019, + "step": 76865 + }, + { + "epoch": 3.59, + "learning_rate": 8.112018560030098e-06, + "loss": 0.1157, + "step": 76870 + }, + { + "epoch": 3.59, + "learning_rate": 8.111234774975312e-06, + "loss": 0.0912, + "step": 76875 + }, + { + "epoch": 3.59, + "learning_rate": 8.110450989920525e-06, + "loss": 0.1185, + "step": 76880 + }, + { + "epoch": 3.59, + "learning_rate": 8.10966720486574e-06, + "loss": 0.1433, + "step": 76885 + }, + { + "epoch": 3.59, + "learning_rate": 8.108883419810952e-06, + "loss": 0.2892, + "step": 76890 + }, + { + "epoch": 3.59, + "learning_rate": 8.108099634756165e-06, + "loss": 0.375, + "step": 76895 + }, + { + "epoch": 3.59, + "learning_rate": 8.107315849701378e-06, + "loss": 0.0706, + "step": 76900 + }, + { + "epoch": 3.59, + "learning_rate": 8.106532064646592e-06, + "loss": 0.022, + "step": 76905 + }, + { + "epoch": 3.59, + "learning_rate": 8.105748279591805e-06, + "loss": 0.0182, + "step": 76910 + }, + { + "epoch": 3.59, + "learning_rate": 8.10496449453702e-06, + "loss": 0.0763, + "step": 76915 + }, + { + "epoch": 3.59, + "learning_rate": 8.104180709482232e-06, + "loss": 0.0429, + "step": 76920 + }, + { + "epoch": 3.59, + "learning_rate": 8.103396924427445e-06, + "loss": 0.0621, + "step": 76925 + }, + { + "epoch": 3.59, + "learning_rate": 8.10261313937266e-06, + "loss": 0.0587, + "step": 76930 + }, + { + "epoch": 3.59, + "learning_rate": 8.101829354317873e-06, + "loss": 0.1526, + "step": 76935 + }, + { + "epoch": 3.59, + "learning_rate": 8.101045569263086e-06, + "loss": 0.101, + "step": 76940 + }, + { + "epoch": 3.59, + "learning_rate": 8.1002617842083e-06, + "loss": 0.213, + "step": 76945 + }, + { + "epoch": 3.59, + "learning_rate": 8.099477999153513e-06, + "loss": 0.0705, + "step": 76950 + }, + { + "epoch": 3.59, + "learning_rate": 8.098694214098726e-06, + "loss": 0.0205, + "step": 76955 + }, + { + "epoch": 3.59, + "learning_rate": 8.09791042904394e-06, + "loss": 0.0226, + "step": 76960 + }, + { + "epoch": 3.59, + "learning_rate": 8.097126643989153e-06, + "loss": 0.0564, + "step": 76965 + }, + { + "epoch": 3.59, + "learning_rate": 8.096342858934366e-06, + "loss": 0.0664, + "step": 76970 + }, + { + "epoch": 3.59, + "learning_rate": 8.09555907387958e-06, + "loss": 0.0594, + "step": 76975 + }, + { + "epoch": 3.59, + "learning_rate": 8.094775288824793e-06, + "loss": 0.1133, + "step": 76980 + }, + { + "epoch": 3.59, + "learning_rate": 8.093991503770007e-06, + "loss": 0.1443, + "step": 76985 + }, + { + "epoch": 3.59, + "learning_rate": 8.09320771871522e-06, + "loss": 0.2564, + "step": 76990 + }, + { + "epoch": 3.59, + "learning_rate": 8.092423933660433e-06, + "loss": 0.3879, + "step": 76995 + }, + { + "epoch": 3.59, + "learning_rate": 8.091640148605647e-06, + "loss": 0.0718, + "step": 77000 + }, + { + "epoch": 3.59, + "learning_rate": 8.090856363550861e-06, + "loss": 0.0181, + "step": 77005 + }, + { + "epoch": 3.59, + "learning_rate": 8.090072578496075e-06, + "loss": 0.0427, + "step": 77010 + }, + { + "epoch": 3.59, + "learning_rate": 8.089288793441287e-06, + "loss": 0.0605, + "step": 77015 + }, + { + "epoch": 3.59, + "learning_rate": 8.0885050083865e-06, + "loss": 0.0279, + "step": 77020 + }, + { + "epoch": 3.59, + "learning_rate": 8.087721223331713e-06, + "loss": 0.2084, + "step": 77025 + }, + { + "epoch": 3.59, + "learning_rate": 8.086937438276927e-06, + "loss": 0.097, + "step": 77030 + }, + { + "epoch": 3.59, + "learning_rate": 8.086153653222141e-06, + "loss": 0.1676, + "step": 77035 + }, + { + "epoch": 3.59, + "learning_rate": 8.085369868167353e-06, + "loss": 0.1195, + "step": 77040 + }, + { + "epoch": 3.6, + "learning_rate": 8.084586083112567e-06, + "loss": 0.1522, + "step": 77045 + }, + { + "epoch": 3.6, + "learning_rate": 8.083802298057781e-06, + "loss": 0.0322, + "step": 77050 + }, + { + "epoch": 3.6, + "learning_rate": 8.083018513002995e-06, + "loss": 0.035, + "step": 77055 + }, + { + "epoch": 3.6, + "learning_rate": 8.082234727948209e-06, + "loss": 0.047, + "step": 77060 + }, + { + "epoch": 3.6, + "learning_rate": 8.081450942893421e-06, + "loss": 0.0983, + "step": 77065 + }, + { + "epoch": 3.6, + "learning_rate": 8.080667157838635e-06, + "loss": 0.1496, + "step": 77070 + }, + { + "epoch": 3.6, + "learning_rate": 8.079883372783849e-06, + "loss": 0.1533, + "step": 77075 + }, + { + "epoch": 3.6, + "learning_rate": 8.079099587729063e-06, + "loss": 0.1387, + "step": 77080 + }, + { + "epoch": 3.6, + "learning_rate": 8.078315802674275e-06, + "loss": 0.0755, + "step": 77085 + }, + { + "epoch": 3.6, + "learning_rate": 8.077532017619487e-06, + "loss": 0.215, + "step": 77090 + }, + { + "epoch": 3.6, + "learning_rate": 8.076748232564701e-06, + "loss": 0.172, + "step": 77095 + }, + { + "epoch": 3.6, + "learning_rate": 8.075964447509915e-06, + "loss": 0.0512, + "step": 77100 + }, + { + "epoch": 3.6, + "learning_rate": 8.07518066245513e-06, + "loss": 0.0308, + "step": 77105 + }, + { + "epoch": 3.6, + "learning_rate": 8.074396877400343e-06, + "loss": 0.0165, + "step": 77110 + }, + { + "epoch": 3.6, + "learning_rate": 8.073613092345555e-06, + "loss": 0.0949, + "step": 77115 + }, + { + "epoch": 3.6, + "learning_rate": 8.07282930729077e-06, + "loss": 0.0572, + "step": 77120 + }, + { + "epoch": 3.6, + "learning_rate": 8.072045522235983e-06, + "loss": 0.0503, + "step": 77125 + }, + { + "epoch": 3.6, + "learning_rate": 8.071261737181197e-06, + "loss": 0.0687, + "step": 77130 + }, + { + "epoch": 3.6, + "learning_rate": 8.07047795212641e-06, + "loss": 0.1899, + "step": 77135 + }, + { + "epoch": 3.6, + "learning_rate": 8.069694167071623e-06, + "loss": 0.1388, + "step": 77140 + }, + { + "epoch": 3.6, + "learning_rate": 8.068910382016837e-06, + "loss": 0.2585, + "step": 77145 + }, + { + "epoch": 3.6, + "learning_rate": 8.06812659696205e-06, + "loss": 0.0928, + "step": 77150 + }, + { + "epoch": 3.6, + "learning_rate": 8.067342811907263e-06, + "loss": 0.082, + "step": 77155 + }, + { + "epoch": 3.6, + "learning_rate": 8.066559026852477e-06, + "loss": 0.0616, + "step": 77160 + }, + { + "epoch": 3.6, + "learning_rate": 8.06577524179769e-06, + "loss": 0.1023, + "step": 77165 + }, + { + "epoch": 3.6, + "learning_rate": 8.064991456742903e-06, + "loss": 0.0539, + "step": 77170 + }, + { + "epoch": 3.6, + "learning_rate": 8.064207671688117e-06, + "loss": 0.0532, + "step": 77175 + }, + { + "epoch": 3.6, + "learning_rate": 8.063423886633331e-06, + "loss": 0.1331, + "step": 77180 + }, + { + "epoch": 3.6, + "learning_rate": 8.062640101578543e-06, + "loss": 0.1599, + "step": 77185 + }, + { + "epoch": 3.6, + "learning_rate": 8.061856316523757e-06, + "loss": 0.2474, + "step": 77190 + }, + { + "epoch": 3.6, + "learning_rate": 8.061072531468971e-06, + "loss": 0.2996, + "step": 77195 + }, + { + "epoch": 3.6, + "learning_rate": 8.060288746414185e-06, + "loss": 0.0673, + "step": 77200 + }, + { + "epoch": 3.6, + "learning_rate": 8.059504961359397e-06, + "loss": 0.0557, + "step": 77205 + }, + { + "epoch": 3.6, + "learning_rate": 8.058721176304611e-06, + "loss": 0.0642, + "step": 77210 + }, + { + "epoch": 3.6, + "learning_rate": 8.057937391249823e-06, + "loss": 0.0275, + "step": 77215 + }, + { + "epoch": 3.6, + "learning_rate": 8.057153606195037e-06, + "loss": 0.0654, + "step": 77220 + }, + { + "epoch": 3.6, + "learning_rate": 8.056369821140251e-06, + "loss": 0.0903, + "step": 77225 + }, + { + "epoch": 3.6, + "learning_rate": 8.055586036085465e-06, + "loss": 0.0431, + "step": 77230 + }, + { + "epoch": 3.6, + "learning_rate": 8.054802251030677e-06, + "loss": 0.0962, + "step": 77235 + }, + { + "epoch": 3.6, + "learning_rate": 8.054018465975891e-06, + "loss": 0.237, + "step": 77240 + }, + { + "epoch": 3.6, + "learning_rate": 8.053234680921105e-06, + "loss": 0.3863, + "step": 77245 + }, + { + "epoch": 3.6, + "learning_rate": 8.052450895866319e-06, + "loss": 0.0608, + "step": 77250 + }, + { + "epoch": 3.6, + "learning_rate": 8.051667110811531e-06, + "loss": 0.0215, + "step": 77255 + }, + { + "epoch": 3.61, + "learning_rate": 8.050883325756745e-06, + "loss": 0.088, + "step": 77260 + }, + { + "epoch": 3.61, + "learning_rate": 8.050099540701959e-06, + "loss": 0.0515, + "step": 77265 + }, + { + "epoch": 3.61, + "learning_rate": 8.049315755647173e-06, + "loss": 0.0528, + "step": 77270 + }, + { + "epoch": 3.61, + "learning_rate": 8.048531970592387e-06, + "loss": 0.0872, + "step": 77275 + }, + { + "epoch": 3.61, + "learning_rate": 8.047748185537599e-06, + "loss": 0.1265, + "step": 77280 + }, + { + "epoch": 3.61, + "learning_rate": 8.046964400482811e-06, + "loss": 0.1019, + "step": 77285 + }, + { + "epoch": 3.61, + "learning_rate": 8.046180615428025e-06, + "loss": 0.1935, + "step": 77290 + }, + { + "epoch": 3.61, + "learning_rate": 8.045396830373239e-06, + "loss": 0.331, + "step": 77295 + }, + { + "epoch": 3.61, + "learning_rate": 8.044613045318453e-06, + "loss": 0.0256, + "step": 77300 + }, + { + "epoch": 3.61, + "learning_rate": 8.043829260263665e-06, + "loss": 0.06, + "step": 77305 + }, + { + "epoch": 3.61, + "learning_rate": 8.043045475208879e-06, + "loss": 0.0504, + "step": 77310 + }, + { + "epoch": 3.61, + "learning_rate": 8.042261690154093e-06, + "loss": 0.0804, + "step": 77315 + }, + { + "epoch": 3.61, + "learning_rate": 8.041477905099307e-06, + "loss": 0.0677, + "step": 77320 + }, + { + "epoch": 3.61, + "learning_rate": 8.04069412004452e-06, + "loss": 0.088, + "step": 77325 + }, + { + "epoch": 3.61, + "learning_rate": 8.039910334989733e-06, + "loss": 0.095, + "step": 77330 + }, + { + "epoch": 3.61, + "learning_rate": 8.039126549934947e-06, + "loss": 0.0983, + "step": 77335 + }, + { + "epoch": 3.61, + "learning_rate": 8.03834276488016e-06, + "loss": 0.204, + "step": 77340 + }, + { + "epoch": 3.61, + "learning_rate": 8.037558979825373e-06, + "loss": 0.3072, + "step": 77345 + }, + { + "epoch": 3.61, + "learning_rate": 8.036775194770587e-06, + "loss": 0.0787, + "step": 77350 + }, + { + "epoch": 3.61, + "learning_rate": 8.035991409715799e-06, + "loss": 0.0133, + "step": 77355 + }, + { + "epoch": 3.61, + "learning_rate": 8.035207624661013e-06, + "loss": 0.0537, + "step": 77360 + }, + { + "epoch": 3.61, + "learning_rate": 8.034423839606227e-06, + "loss": 0.0439, + "step": 77365 + }, + { + "epoch": 3.61, + "learning_rate": 8.03364005455144e-06, + "loss": 0.098, + "step": 77370 + }, + { + "epoch": 3.61, + "learning_rate": 8.032856269496655e-06, + "loss": 0.0948, + "step": 77375 + }, + { + "epoch": 3.61, + "learning_rate": 8.032072484441867e-06, + "loss": 0.08, + "step": 77380 + }, + { + "epoch": 3.61, + "learning_rate": 8.03128869938708e-06, + "loss": 0.0979, + "step": 77385 + }, + { + "epoch": 3.61, + "learning_rate": 8.030504914332295e-06, + "loss": 0.2572, + "step": 77390 + }, + { + "epoch": 3.61, + "learning_rate": 8.029721129277509e-06, + "loss": 0.3724, + "step": 77395 + }, + { + "epoch": 3.61, + "learning_rate": 8.02893734422272e-06, + "loss": 0.0675, + "step": 77400 + }, + { + "epoch": 3.61, + "learning_rate": 8.028153559167935e-06, + "loss": 0.0416, + "step": 77405 + }, + { + "epoch": 3.61, + "learning_rate": 8.027369774113147e-06, + "loss": 0.046, + "step": 77410 + }, + { + "epoch": 3.61, + "learning_rate": 8.02658598905836e-06, + "loss": 0.1032, + "step": 77415 + }, + { + "epoch": 3.61, + "learning_rate": 8.025802204003575e-06, + "loss": 0.0493, + "step": 77420 + }, + { + "epoch": 3.61, + "learning_rate": 8.025018418948789e-06, + "loss": 0.0605, + "step": 77425 + }, + { + "epoch": 3.61, + "learning_rate": 8.024234633894001e-06, + "loss": 0.1291, + "step": 77430 + }, + { + "epoch": 3.61, + "learning_rate": 8.023450848839215e-06, + "loss": 0.1378, + "step": 77435 + }, + { + "epoch": 3.61, + "learning_rate": 8.022667063784429e-06, + "loss": 0.1254, + "step": 77440 + }, + { + "epoch": 3.61, + "learning_rate": 8.021883278729643e-06, + "loss": 0.3442, + "step": 77445 + }, + { + "epoch": 3.61, + "learning_rate": 8.021099493674855e-06, + "loss": 0.0681, + "step": 77450 + }, + { + "epoch": 3.61, + "learning_rate": 8.020315708620069e-06, + "loss": 0.0381, + "step": 77455 + }, + { + "epoch": 3.61, + "learning_rate": 8.019531923565283e-06, + "loss": 0.0804, + "step": 77460 + }, + { + "epoch": 3.61, + "learning_rate": 8.018748138510496e-06, + "loss": 0.0281, + "step": 77465 + }, + { + "epoch": 3.61, + "learning_rate": 8.017964353455709e-06, + "loss": 0.0935, + "step": 77470 + }, + { + "epoch": 3.62, + "learning_rate": 8.017180568400923e-06, + "loss": 0.1158, + "step": 77475 + }, + { + "epoch": 3.62, + "learning_rate": 8.016396783346135e-06, + "loss": 0.1317, + "step": 77480 + }, + { + "epoch": 3.62, + "learning_rate": 8.015612998291349e-06, + "loss": 0.1924, + "step": 77485 + }, + { + "epoch": 3.62, + "learning_rate": 8.014829213236563e-06, + "loss": 0.2521, + "step": 77490 + }, + { + "epoch": 3.62, + "learning_rate": 8.014045428181777e-06, + "loss": 0.2315, + "step": 77495 + }, + { + "epoch": 3.62, + "learning_rate": 8.013261643126989e-06, + "loss": 0.0565, + "step": 77500 + }, + { + "epoch": 3.62, + "learning_rate": 8.012477858072203e-06, + "loss": 0.0091, + "step": 77505 + }, + { + "epoch": 3.62, + "learning_rate": 8.011694073017417e-06, + "loss": 0.0347, + "step": 77510 + }, + { + "epoch": 3.62, + "learning_rate": 8.01091028796263e-06, + "loss": 0.0646, + "step": 77515 + }, + { + "epoch": 3.62, + "learning_rate": 8.010126502907843e-06, + "loss": 0.0254, + "step": 77520 + }, + { + "epoch": 3.62, + "learning_rate": 8.009342717853057e-06, + "loss": 0.0536, + "step": 77525 + }, + { + "epoch": 3.62, + "learning_rate": 8.00855893279827e-06, + "loss": 0.1028, + "step": 77530 + }, + { + "epoch": 3.62, + "learning_rate": 8.007775147743484e-06, + "loss": 0.0755, + "step": 77535 + }, + { + "epoch": 3.62, + "learning_rate": 8.006991362688697e-06, + "loss": 0.19, + "step": 77540 + }, + { + "epoch": 3.62, + "learning_rate": 8.00620757763391e-06, + "loss": 0.4053, + "step": 77545 + }, + { + "epoch": 3.62, + "learning_rate": 8.005423792579123e-06, + "loss": 0.0813, + "step": 77550 + }, + { + "epoch": 3.62, + "learning_rate": 8.004640007524337e-06, + "loss": 0.0142, + "step": 77555 + }, + { + "epoch": 3.62, + "learning_rate": 8.00385622246955e-06, + "loss": 0.0355, + "step": 77560 + }, + { + "epoch": 3.62, + "learning_rate": 8.003072437414764e-06, + "loss": 0.0556, + "step": 77565 + }, + { + "epoch": 3.62, + "learning_rate": 8.002288652359977e-06, + "loss": 0.0372, + "step": 77570 + }, + { + "epoch": 3.62, + "learning_rate": 8.00150486730519e-06, + "loss": 0.054, + "step": 77575 + }, + { + "epoch": 3.62, + "learning_rate": 8.000721082250404e-06, + "loss": 0.0955, + "step": 77580 + }, + { + "epoch": 3.62, + "learning_rate": 7.999937297195618e-06, + "loss": 0.1179, + "step": 77585 + }, + { + "epoch": 3.62, + "learning_rate": 7.999153512140832e-06, + "loss": 0.1637, + "step": 77590 + }, + { + "epoch": 3.62, + "learning_rate": 7.998369727086044e-06, + "loss": 0.2502, + "step": 77595 + }, + { + "epoch": 3.62, + "learning_rate": 7.997585942031258e-06, + "loss": 0.0943, + "step": 77600 + }, + { + "epoch": 3.62, + "learning_rate": 7.99680215697647e-06, + "loss": 0.0234, + "step": 77605 + }, + { + "epoch": 3.62, + "learning_rate": 7.996018371921685e-06, + "loss": 0.0662, + "step": 77610 + }, + { + "epoch": 3.62, + "learning_rate": 7.995234586866898e-06, + "loss": 0.0476, + "step": 77615 + }, + { + "epoch": 3.62, + "learning_rate": 7.99445080181211e-06, + "loss": 0.0984, + "step": 77620 + }, + { + "epoch": 3.62, + "learning_rate": 7.993667016757325e-06, + "loss": 0.0983, + "step": 77625 + }, + { + "epoch": 3.62, + "learning_rate": 7.992883231702538e-06, + "loss": 0.0697, + "step": 77630 + }, + { + "epoch": 3.62, + "learning_rate": 7.992099446647752e-06, + "loss": 0.0726, + "step": 77635 + }, + { + "epoch": 3.62, + "learning_rate": 7.991315661592966e-06, + "loss": 0.1864, + "step": 77640 + }, + { + "epoch": 3.62, + "learning_rate": 7.990531876538178e-06, + "loss": 0.2873, + "step": 77645 + }, + { + "epoch": 3.62, + "learning_rate": 7.989748091483392e-06, + "loss": 0.0721, + "step": 77650 + }, + { + "epoch": 3.62, + "learning_rate": 7.988964306428606e-06, + "loss": 0.0206, + "step": 77655 + }, + { + "epoch": 3.62, + "learning_rate": 7.98818052137382e-06, + "loss": 0.0385, + "step": 77660 + }, + { + "epoch": 3.62, + "learning_rate": 7.987396736319032e-06, + "loss": 0.0535, + "step": 77665 + }, + { + "epoch": 3.62, + "learning_rate": 7.986612951264245e-06, + "loss": 0.0577, + "step": 77670 + }, + { + "epoch": 3.62, + "learning_rate": 7.985829166209459e-06, + "loss": 0.0842, + "step": 77675 + }, + { + "epoch": 3.62, + "learning_rate": 7.985045381154672e-06, + "loss": 0.0724, + "step": 77680 + }, + { + "epoch": 3.62, + "learning_rate": 7.984261596099886e-06, + "loss": 0.0749, + "step": 77685 + }, + { + "epoch": 3.63, + "learning_rate": 7.9834778110451e-06, + "loss": 0.1389, + "step": 77690 + }, + { + "epoch": 3.63, + "learning_rate": 7.982694025990312e-06, + "loss": 0.2554, + "step": 77695 + }, + { + "epoch": 3.63, + "learning_rate": 7.981910240935526e-06, + "loss": 0.0241, + "step": 77700 + }, + { + "epoch": 3.63, + "learning_rate": 7.98112645588074e-06, + "loss": 0.0397, + "step": 77705 + }, + { + "epoch": 3.63, + "learning_rate": 7.980342670825954e-06, + "loss": 0.0711, + "step": 77710 + }, + { + "epoch": 3.63, + "learning_rate": 7.979558885771166e-06, + "loss": 0.0933, + "step": 77715 + }, + { + "epoch": 3.63, + "learning_rate": 7.97877510071638e-06, + "loss": 0.0517, + "step": 77720 + }, + { + "epoch": 3.63, + "learning_rate": 7.977991315661594e-06, + "loss": 0.1226, + "step": 77725 + }, + { + "epoch": 3.63, + "learning_rate": 7.977207530606808e-06, + "loss": 0.1274, + "step": 77730 + }, + { + "epoch": 3.63, + "learning_rate": 7.97642374555202e-06, + "loss": 0.1621, + "step": 77735 + }, + { + "epoch": 3.63, + "learning_rate": 7.975639960497234e-06, + "loss": 0.279, + "step": 77740 + }, + { + "epoch": 3.63, + "learning_rate": 7.974856175442446e-06, + "loss": 0.2862, + "step": 77745 + }, + { + "epoch": 3.63, + "learning_rate": 7.97407239038766e-06, + "loss": 0.0266, + "step": 77750 + }, + { + "epoch": 3.63, + "learning_rate": 7.973288605332874e-06, + "loss": 0.0083, + "step": 77755 + }, + { + "epoch": 3.63, + "learning_rate": 7.972504820278088e-06, + "loss": 0.0477, + "step": 77760 + }, + { + "epoch": 3.63, + "learning_rate": 7.9717210352233e-06, + "loss": 0.0244, + "step": 77765 + }, + { + "epoch": 3.63, + "learning_rate": 7.970937250168514e-06, + "loss": 0.0371, + "step": 77770 + }, + { + "epoch": 3.63, + "learning_rate": 7.970153465113728e-06, + "loss": 0.1224, + "step": 77775 + }, + { + "epoch": 3.63, + "learning_rate": 7.969369680058942e-06, + "loss": 0.1255, + "step": 77780 + }, + { + "epoch": 3.63, + "learning_rate": 7.968585895004154e-06, + "loss": 0.1138, + "step": 77785 + }, + { + "epoch": 3.63, + "learning_rate": 7.967802109949368e-06, + "loss": 0.174, + "step": 77790 + }, + { + "epoch": 3.63, + "learning_rate": 7.967018324894582e-06, + "loss": 0.2551, + "step": 77795 + }, + { + "epoch": 3.63, + "learning_rate": 7.966234539839794e-06, + "loss": 0.0698, + "step": 77800 + }, + { + "epoch": 3.63, + "learning_rate": 7.965450754785008e-06, + "loss": 0.0357, + "step": 77805 + }, + { + "epoch": 3.63, + "learning_rate": 7.964666969730222e-06, + "loss": 0.0263, + "step": 77810 + }, + { + "epoch": 3.63, + "learning_rate": 7.963883184675434e-06, + "loss": 0.0402, + "step": 77815 + }, + { + "epoch": 3.63, + "learning_rate": 7.963099399620648e-06, + "loss": 0.0769, + "step": 77820 + }, + { + "epoch": 3.63, + "learning_rate": 7.962315614565862e-06, + "loss": 0.0841, + "step": 77825 + }, + { + "epoch": 3.63, + "learning_rate": 7.961531829511076e-06, + "loss": 0.1091, + "step": 77830 + }, + { + "epoch": 3.63, + "learning_rate": 7.960748044456288e-06, + "loss": 0.0706, + "step": 77835 + }, + { + "epoch": 3.63, + "learning_rate": 7.959964259401502e-06, + "loss": 0.1679, + "step": 77840 + }, + { + "epoch": 3.63, + "learning_rate": 7.959180474346716e-06, + "loss": 0.2807, + "step": 77845 + }, + { + "epoch": 3.63, + "learning_rate": 7.95839668929193e-06, + "loss": 0.0608, + "step": 77850 + }, + { + "epoch": 3.63, + "learning_rate": 7.957612904237144e-06, + "loss": 0.0379, + "step": 77855 + }, + { + "epoch": 3.63, + "learning_rate": 7.956829119182356e-06, + "loss": 0.012, + "step": 77860 + }, + { + "epoch": 3.63, + "learning_rate": 7.956045334127568e-06, + "loss": 0.1044, + "step": 77865 + }, + { + "epoch": 3.63, + "learning_rate": 7.955261549072782e-06, + "loss": 0.0755, + "step": 77870 + }, + { + "epoch": 3.63, + "learning_rate": 7.954477764017996e-06, + "loss": 0.1034, + "step": 77875 + }, + { + "epoch": 3.63, + "learning_rate": 7.95369397896321e-06, + "loss": 0.078, + "step": 77880 + }, + { + "epoch": 3.63, + "learning_rate": 7.952910193908422e-06, + "loss": 0.1178, + "step": 77885 + }, + { + "epoch": 3.63, + "learning_rate": 7.952126408853636e-06, + "loss": 0.2689, + "step": 77890 + }, + { + "epoch": 3.63, + "learning_rate": 7.95134262379885e-06, + "loss": 0.297, + "step": 77895 + }, + { + "epoch": 3.63, + "learning_rate": 7.950558838744064e-06, + "loss": 0.1267, + "step": 77900 + }, + { + "epoch": 3.64, + "learning_rate": 7.949775053689278e-06, + "loss": 0.0355, + "step": 77905 + }, + { + "epoch": 3.64, + "learning_rate": 7.94899126863449e-06, + "loss": 0.0595, + "step": 77910 + }, + { + "epoch": 3.64, + "learning_rate": 7.948207483579704e-06, + "loss": 0.0467, + "step": 77915 + }, + { + "epoch": 3.64, + "learning_rate": 7.947423698524918e-06, + "loss": 0.0638, + "step": 77920 + }, + { + "epoch": 3.64, + "learning_rate": 7.946639913470132e-06, + "loss": 0.0661, + "step": 77925 + }, + { + "epoch": 3.64, + "learning_rate": 7.945856128415344e-06, + "loss": 0.0676, + "step": 77930 + }, + { + "epoch": 3.64, + "learning_rate": 7.945072343360556e-06, + "loss": 0.123, + "step": 77935 + }, + { + "epoch": 3.64, + "learning_rate": 7.94428855830577e-06, + "loss": 0.2575, + "step": 77940 + }, + { + "epoch": 3.64, + "learning_rate": 7.943504773250984e-06, + "loss": 0.2391, + "step": 77945 + }, + { + "epoch": 3.64, + "learning_rate": 7.942720988196198e-06, + "loss": 0.0485, + "step": 77950 + }, + { + "epoch": 3.64, + "learning_rate": 7.941937203141412e-06, + "loss": 0.0493, + "step": 77955 + }, + { + "epoch": 3.64, + "learning_rate": 7.941153418086624e-06, + "loss": 0.0492, + "step": 77960 + }, + { + "epoch": 3.64, + "learning_rate": 7.940369633031838e-06, + "loss": 0.0334, + "step": 77965 + }, + { + "epoch": 3.64, + "learning_rate": 7.939585847977052e-06, + "loss": 0.0832, + "step": 77970 + }, + { + "epoch": 3.64, + "learning_rate": 7.938802062922266e-06, + "loss": 0.0248, + "step": 77975 + }, + { + "epoch": 3.64, + "learning_rate": 7.938018277867478e-06, + "loss": 0.0332, + "step": 77980 + }, + { + "epoch": 3.64, + "learning_rate": 7.937234492812692e-06, + "loss": 0.1669, + "step": 77985 + }, + { + "epoch": 3.64, + "learning_rate": 7.936450707757906e-06, + "loss": 0.2448, + "step": 77990 + }, + { + "epoch": 3.64, + "learning_rate": 7.935666922703118e-06, + "loss": 0.3489, + "step": 77995 + }, + { + "epoch": 3.64, + "learning_rate": 7.934883137648332e-06, + "loss": 0.0499, + "step": 78000 + }, + { + "epoch": 3.64, + "learning_rate": 7.934099352593546e-06, + "loss": 0.0775, + "step": 78005 + }, + { + "epoch": 3.64, + "learning_rate": 7.933315567538758e-06, + "loss": 0.0262, + "step": 78010 + }, + { + "epoch": 3.64, + "learning_rate": 7.932531782483972e-06, + "loss": 0.0921, + "step": 78015 + }, + { + "epoch": 3.64, + "learning_rate": 7.931747997429186e-06, + "loss": 0.0541, + "step": 78020 + }, + { + "epoch": 3.64, + "learning_rate": 7.9309642123744e-06, + "loss": 0.1125, + "step": 78025 + }, + { + "epoch": 3.64, + "learning_rate": 7.930180427319612e-06, + "loss": 0.083, + "step": 78030 + }, + { + "epoch": 3.64, + "learning_rate": 7.929396642264826e-06, + "loss": 0.0961, + "step": 78035 + }, + { + "epoch": 3.64, + "learning_rate": 7.92861285721004e-06, + "loss": 0.1987, + "step": 78040 + }, + { + "epoch": 3.64, + "learning_rate": 7.927829072155254e-06, + "loss": 0.3407, + "step": 78045 + }, + { + "epoch": 3.64, + "learning_rate": 7.927045287100466e-06, + "loss": 0.0674, + "step": 78050 + }, + { + "epoch": 3.64, + "learning_rate": 7.92626150204568e-06, + "loss": 0.0582, + "step": 78055 + }, + { + "epoch": 3.64, + "learning_rate": 7.925477716990892e-06, + "loss": 0.0877, + "step": 78060 + }, + { + "epoch": 3.64, + "learning_rate": 7.924693931936106e-06, + "loss": 0.6231, + "step": 78065 + }, + { + "epoch": 3.64, + "learning_rate": 7.92391014688132e-06, + "loss": 0.0963, + "step": 78070 + }, + { + "epoch": 3.64, + "learning_rate": 7.923126361826534e-06, + "loss": 0.0848, + "step": 78075 + }, + { + "epoch": 3.64, + "learning_rate": 7.922342576771746e-06, + "loss": 0.119, + "step": 78080 + }, + { + "epoch": 3.64, + "learning_rate": 7.92155879171696e-06, + "loss": 0.1368, + "step": 78085 + }, + { + "epoch": 3.64, + "learning_rate": 7.920775006662174e-06, + "loss": 0.299, + "step": 78090 + }, + { + "epoch": 3.64, + "learning_rate": 7.919991221607388e-06, + "loss": 0.2911, + "step": 78095 + }, + { + "epoch": 3.64, + "learning_rate": 7.9192074365526e-06, + "loss": 0.0824, + "step": 78100 + }, + { + "epoch": 3.64, + "learning_rate": 7.918423651497814e-06, + "loss": 0.0767, + "step": 78105 + }, + { + "epoch": 3.64, + "learning_rate": 7.917639866443028e-06, + "loss": 0.0962, + "step": 78110 + }, + { + "epoch": 3.64, + "learning_rate": 7.916856081388242e-06, + "loss": 0.0316, + "step": 78115 + }, + { + "epoch": 3.65, + "learning_rate": 7.916072296333455e-06, + "loss": 0.1537, + "step": 78120 + }, + { + "epoch": 3.65, + "learning_rate": 7.915288511278668e-06, + "loss": 0.1014, + "step": 78125 + }, + { + "epoch": 3.65, + "learning_rate": 7.91450472622388e-06, + "loss": 0.0757, + "step": 78130 + }, + { + "epoch": 3.65, + "learning_rate": 7.913720941169094e-06, + "loss": 0.1169, + "step": 78135 + }, + { + "epoch": 3.65, + "learning_rate": 7.912937156114308e-06, + "loss": 0.1633, + "step": 78140 + }, + { + "epoch": 3.65, + "learning_rate": 7.912153371059522e-06, + "loss": 0.301, + "step": 78145 + }, + { + "epoch": 3.65, + "learning_rate": 7.911369586004734e-06, + "loss": 0.07, + "step": 78150 + }, + { + "epoch": 3.65, + "learning_rate": 7.910585800949948e-06, + "loss": 0.0381, + "step": 78155 + }, + { + "epoch": 3.65, + "learning_rate": 7.909802015895162e-06, + "loss": 0.0312, + "step": 78160 + }, + { + "epoch": 3.65, + "learning_rate": 7.909018230840376e-06, + "loss": 0.0264, + "step": 78165 + }, + { + "epoch": 3.65, + "learning_rate": 7.90823444578559e-06, + "loss": 0.0543, + "step": 78170 + }, + { + "epoch": 3.65, + "learning_rate": 7.907450660730802e-06, + "loss": 0.0919, + "step": 78175 + }, + { + "epoch": 3.65, + "learning_rate": 7.906666875676016e-06, + "loss": 0.1097, + "step": 78180 + }, + { + "epoch": 3.65, + "learning_rate": 7.90588309062123e-06, + "loss": 0.0907, + "step": 78185 + }, + { + "epoch": 3.65, + "learning_rate": 7.905099305566442e-06, + "loss": 0.1474, + "step": 78190 + }, + { + "epoch": 3.65, + "learning_rate": 7.904315520511656e-06, + "loss": 0.2071, + "step": 78195 + }, + { + "epoch": 3.65, + "learning_rate": 7.903531735456868e-06, + "loss": 0.0569, + "step": 78200 + }, + { + "epoch": 3.65, + "learning_rate": 7.902747950402082e-06, + "loss": 0.0304, + "step": 78205 + }, + { + "epoch": 3.65, + "learning_rate": 7.901964165347296e-06, + "loss": 0.0876, + "step": 78210 + }, + { + "epoch": 3.65, + "learning_rate": 7.90118038029251e-06, + "loss": 0.0544, + "step": 78215 + }, + { + "epoch": 3.65, + "learning_rate": 7.900396595237723e-06, + "loss": 0.0574, + "step": 78220 + }, + { + "epoch": 3.65, + "learning_rate": 7.899612810182936e-06, + "loss": 0.0921, + "step": 78225 + }, + { + "epoch": 3.65, + "learning_rate": 7.89882902512815e-06, + "loss": 0.1187, + "step": 78230 + }, + { + "epoch": 3.65, + "learning_rate": 7.898045240073363e-06, + "loss": 0.1302, + "step": 78235 + }, + { + "epoch": 3.65, + "learning_rate": 7.897261455018577e-06, + "loss": 0.1524, + "step": 78240 + }, + { + "epoch": 3.65, + "learning_rate": 7.89647766996379e-06, + "loss": 0.2349, + "step": 78245 + }, + { + "epoch": 3.65, + "learning_rate": 7.895693884909003e-06, + "loss": 0.0869, + "step": 78250 + }, + { + "epoch": 3.65, + "learning_rate": 7.894910099854216e-06, + "loss": 0.0222, + "step": 78255 + }, + { + "epoch": 3.65, + "learning_rate": 7.89412631479943e-06, + "loss": 0.0833, + "step": 78260 + }, + { + "epoch": 3.65, + "learning_rate": 7.893342529744643e-06, + "loss": 0.0476, + "step": 78265 + }, + { + "epoch": 3.65, + "learning_rate": 7.892558744689857e-06, + "loss": 0.063, + "step": 78270 + }, + { + "epoch": 3.65, + "learning_rate": 7.89177495963507e-06, + "loss": 0.085, + "step": 78275 + }, + { + "epoch": 3.65, + "learning_rate": 7.890991174580284e-06, + "loss": 0.1516, + "step": 78280 + }, + { + "epoch": 3.65, + "learning_rate": 7.890207389525497e-06, + "loss": 0.0996, + "step": 78285 + }, + { + "epoch": 3.65, + "learning_rate": 7.889423604470711e-06, + "loss": 0.178, + "step": 78290 + }, + { + "epoch": 3.65, + "learning_rate": 7.888639819415924e-06, + "loss": 0.2889, + "step": 78295 + }, + { + "epoch": 3.65, + "learning_rate": 7.887856034361137e-06, + "loss": 0.0732, + "step": 78300 + }, + { + "epoch": 3.65, + "learning_rate": 7.887072249306351e-06, + "loss": 0.0353, + "step": 78305 + }, + { + "epoch": 3.65, + "learning_rate": 7.886288464251565e-06, + "loss": 0.0454, + "step": 78310 + }, + { + "epoch": 3.65, + "learning_rate": 7.885504679196777e-06, + "loss": 0.0422, + "step": 78315 + }, + { + "epoch": 3.65, + "learning_rate": 7.884720894141991e-06, + "loss": 0.073, + "step": 78320 + }, + { + "epoch": 3.65, + "learning_rate": 7.883937109087204e-06, + "loss": 0.0793, + "step": 78325 + }, + { + "epoch": 3.65, + "learning_rate": 7.883153324032417e-06, + "loss": 0.0979, + "step": 78330 + }, + { + "epoch": 3.66, + "learning_rate": 7.882369538977631e-06, + "loss": 0.1539, + "step": 78335 + }, + { + "epoch": 3.66, + "learning_rate": 7.881585753922845e-06, + "loss": 0.2239, + "step": 78340 + }, + { + "epoch": 3.66, + "learning_rate": 7.880801968868058e-06, + "loss": 0.2689, + "step": 78345 + }, + { + "epoch": 3.66, + "learning_rate": 7.880018183813271e-06, + "loss": 0.0597, + "step": 78350 + }, + { + "epoch": 3.66, + "learning_rate": 7.879234398758485e-06, + "loss": 0.0656, + "step": 78355 + }, + { + "epoch": 3.66, + "learning_rate": 7.8784506137037e-06, + "loss": 0.0343, + "step": 78360 + }, + { + "epoch": 3.66, + "learning_rate": 7.877666828648911e-06, + "loss": 0.0812, + "step": 78365 + }, + { + "epoch": 3.66, + "learning_rate": 7.876883043594125e-06, + "loss": 0.0899, + "step": 78370 + }, + { + "epoch": 3.66, + "learning_rate": 7.87609925853934e-06, + "loss": 0.0821, + "step": 78375 + }, + { + "epoch": 3.66, + "learning_rate": 7.875315473484553e-06, + "loss": 0.0868, + "step": 78380 + }, + { + "epoch": 3.66, + "learning_rate": 7.874531688429765e-06, + "loss": 0.0812, + "step": 78385 + }, + { + "epoch": 3.66, + "learning_rate": 7.87374790337498e-06, + "loss": 0.1704, + "step": 78390 + }, + { + "epoch": 3.66, + "learning_rate": 7.872964118320191e-06, + "loss": 0.4426, + "step": 78395 + }, + { + "epoch": 3.66, + "learning_rate": 7.872180333265405e-06, + "loss": 0.0825, + "step": 78400 + }, + { + "epoch": 3.66, + "learning_rate": 7.87139654821062e-06, + "loss": 0.0304, + "step": 78405 + }, + { + "epoch": 3.66, + "learning_rate": 7.870612763155833e-06, + "loss": 0.0108, + "step": 78410 + }, + { + "epoch": 3.66, + "learning_rate": 7.869828978101045e-06, + "loss": 0.061, + "step": 78415 + }, + { + "epoch": 3.66, + "learning_rate": 7.86904519304626e-06, + "loss": 0.044, + "step": 78420 + }, + { + "epoch": 3.66, + "learning_rate": 7.868261407991473e-06, + "loss": 0.1055, + "step": 78425 + }, + { + "epoch": 3.66, + "learning_rate": 7.867477622936687e-06, + "loss": 0.0752, + "step": 78430 + }, + { + "epoch": 3.66, + "learning_rate": 7.866693837881901e-06, + "loss": 0.1773, + "step": 78435 + }, + { + "epoch": 3.66, + "learning_rate": 7.865910052827113e-06, + "loss": 0.1699, + "step": 78440 + }, + { + "epoch": 3.66, + "learning_rate": 7.865126267772327e-06, + "loss": 0.1484, + "step": 78445 + }, + { + "epoch": 3.66, + "learning_rate": 7.86434248271754e-06, + "loss": 0.0886, + "step": 78450 + }, + { + "epoch": 3.66, + "learning_rate": 7.863558697662753e-06, + "loss": 0.028, + "step": 78455 + }, + { + "epoch": 3.66, + "learning_rate": 7.862774912607967e-06, + "loss": 0.0327, + "step": 78460 + }, + { + "epoch": 3.66, + "learning_rate": 7.86199112755318e-06, + "loss": 0.0557, + "step": 78465 + }, + { + "epoch": 3.66, + "learning_rate": 7.861207342498393e-06, + "loss": 0.068, + "step": 78470 + }, + { + "epoch": 3.66, + "learning_rate": 7.860423557443607e-06, + "loss": 0.0406, + "step": 78475 + }, + { + "epoch": 3.66, + "learning_rate": 7.859639772388821e-06, + "loss": 0.1129, + "step": 78480 + }, + { + "epoch": 3.66, + "learning_rate": 7.858855987334035e-06, + "loss": 0.0918, + "step": 78485 + }, + { + "epoch": 3.66, + "learning_rate": 7.858072202279247e-06, + "loss": 0.1735, + "step": 78490 + }, + { + "epoch": 3.66, + "learning_rate": 7.857288417224461e-06, + "loss": 0.2278, + "step": 78495 + }, + { + "epoch": 3.66, + "learning_rate": 7.856504632169675e-06, + "loss": 0.063, + "step": 78500 + }, + { + "epoch": 3.66, + "learning_rate": 7.855720847114889e-06, + "loss": 0.0642, + "step": 78505 + }, + { + "epoch": 3.66, + "learning_rate": 7.854937062060101e-06, + "loss": 0.029, + "step": 78510 + }, + { + "epoch": 3.66, + "learning_rate": 7.854153277005313e-06, + "loss": 0.0382, + "step": 78515 + }, + { + "epoch": 3.66, + "learning_rate": 7.853369491950527e-06, + "loss": 0.0576, + "step": 78520 + }, + { + "epoch": 3.66, + "learning_rate": 7.852585706895741e-06, + "loss": 0.096, + "step": 78525 + }, + { + "epoch": 3.66, + "learning_rate": 7.851801921840955e-06, + "loss": 0.0674, + "step": 78530 + }, + { + "epoch": 3.66, + "learning_rate": 7.851018136786169e-06, + "loss": 0.2253, + "step": 78535 + }, + { + "epoch": 3.66, + "learning_rate": 7.850234351731381e-06, + "loss": 0.1518, + "step": 78540 + }, + { + "epoch": 3.67, + "learning_rate": 7.849450566676595e-06, + "loss": 0.386, + "step": 78545 + }, + { + "epoch": 3.67, + "learning_rate": 7.848666781621809e-06, + "loss": 0.0507, + "step": 78550 + }, + { + "epoch": 3.67, + "learning_rate": 7.847882996567023e-06, + "loss": 0.0119, + "step": 78555 + }, + { + "epoch": 3.67, + "learning_rate": 7.847099211512235e-06, + "loss": 0.0605, + "step": 78560 + }, + { + "epoch": 3.67, + "learning_rate": 7.846315426457449e-06, + "loss": 0.0267, + "step": 78565 + }, + { + "epoch": 3.67, + "learning_rate": 7.845531641402663e-06, + "loss": 0.054, + "step": 78570 + }, + { + "epoch": 3.67, + "learning_rate": 7.844747856347877e-06, + "loss": 0.1297, + "step": 78575 + }, + { + "epoch": 3.67, + "learning_rate": 7.843964071293089e-06, + "loss": 0.1203, + "step": 78580 + }, + { + "epoch": 3.67, + "learning_rate": 7.843180286238303e-06, + "loss": 0.0647, + "step": 78585 + }, + { + "epoch": 3.67, + "learning_rate": 7.842396501183515e-06, + "loss": 0.1628, + "step": 78590 + }, + { + "epoch": 3.67, + "learning_rate": 7.841612716128729e-06, + "loss": 0.3329, + "step": 78595 + }, + { + "epoch": 3.67, + "learning_rate": 7.840828931073943e-06, + "loss": 0.05, + "step": 78600 + }, + { + "epoch": 3.67, + "learning_rate": 7.840045146019157e-06, + "loss": 0.0131, + "step": 78605 + }, + { + "epoch": 3.67, + "learning_rate": 7.839261360964369e-06, + "loss": 0.0388, + "step": 78610 + }, + { + "epoch": 3.67, + "learning_rate": 7.838477575909583e-06, + "loss": 0.0757, + "step": 78615 + }, + { + "epoch": 3.67, + "learning_rate": 7.837693790854797e-06, + "loss": 0.0546, + "step": 78620 + }, + { + "epoch": 3.67, + "learning_rate": 7.83691000580001e-06, + "loss": 0.1291, + "step": 78625 + }, + { + "epoch": 3.67, + "learning_rate": 7.836126220745223e-06, + "loss": 0.0914, + "step": 78630 + }, + { + "epoch": 3.67, + "learning_rate": 7.835342435690437e-06, + "loss": 0.2011, + "step": 78635 + }, + { + "epoch": 3.67, + "learning_rate": 7.834558650635651e-06, + "loss": 0.1913, + "step": 78640 + }, + { + "epoch": 3.67, + "learning_rate": 7.833774865580863e-06, + "loss": 0.3214, + "step": 78645 + }, + { + "epoch": 3.67, + "learning_rate": 7.832991080526077e-06, + "loss": 0.0318, + "step": 78650 + }, + { + "epoch": 3.67, + "learning_rate": 7.832207295471291e-06, + "loss": 0.0228, + "step": 78655 + }, + { + "epoch": 3.67, + "learning_rate": 7.831423510416503e-06, + "loss": 0.0552, + "step": 78660 + }, + { + "epoch": 3.67, + "learning_rate": 7.830639725361717e-06, + "loss": 0.0665, + "step": 78665 + }, + { + "epoch": 3.67, + "learning_rate": 7.829855940306931e-06, + "loss": 0.0686, + "step": 78670 + }, + { + "epoch": 3.67, + "learning_rate": 7.829072155252145e-06, + "loss": 0.0723, + "step": 78675 + }, + { + "epoch": 3.67, + "learning_rate": 7.828288370197357e-06, + "loss": 0.1752, + "step": 78680 + }, + { + "epoch": 3.67, + "learning_rate": 7.827504585142571e-06, + "loss": 0.1152, + "step": 78685 + }, + { + "epoch": 3.67, + "learning_rate": 7.826720800087785e-06, + "loss": 0.1724, + "step": 78690 + }, + { + "epoch": 3.67, + "learning_rate": 7.825937015032999e-06, + "loss": 0.1554, + "step": 78695 + }, + { + "epoch": 3.67, + "learning_rate": 7.825153229978213e-06, + "loss": 0.054, + "step": 78700 + }, + { + "epoch": 3.67, + "learning_rate": 7.824369444923425e-06, + "loss": 0.0047, + "step": 78705 + }, + { + "epoch": 3.67, + "learning_rate": 7.823585659868637e-06, + "loss": 0.0628, + "step": 78710 + }, + { + "epoch": 3.67, + "learning_rate": 7.822801874813851e-06, + "loss": 0.1317, + "step": 78715 + }, + { + "epoch": 3.67, + "learning_rate": 7.822018089759065e-06, + "loss": 0.0651, + "step": 78720 + }, + { + "epoch": 3.67, + "learning_rate": 7.821234304704279e-06, + "loss": 0.0526, + "step": 78725 + }, + { + "epoch": 3.67, + "learning_rate": 7.820450519649491e-06, + "loss": 0.0481, + "step": 78730 + }, + { + "epoch": 3.67, + "learning_rate": 7.819666734594705e-06, + "loss": 0.1255, + "step": 78735 + }, + { + "epoch": 3.67, + "learning_rate": 7.818882949539919e-06, + "loss": 0.2002, + "step": 78740 + }, + { + "epoch": 3.67, + "learning_rate": 7.818099164485133e-06, + "loss": 0.2313, + "step": 78745 + }, + { + "epoch": 3.67, + "learning_rate": 7.817315379430347e-06, + "loss": 0.1013, + "step": 78750 + }, + { + "epoch": 3.67, + "learning_rate": 7.816531594375559e-06, + "loss": 0.0301, + "step": 78755 + }, + { + "epoch": 3.68, + "learning_rate": 7.815747809320773e-06, + "loss": 0.0135, + "step": 78760 + }, + { + "epoch": 3.68, + "learning_rate": 7.814964024265987e-06, + "loss": 0.0525, + "step": 78765 + }, + { + "epoch": 3.68, + "learning_rate": 7.8141802392112e-06, + "loss": 0.1247, + "step": 78770 + }, + { + "epoch": 3.68, + "learning_rate": 7.813396454156413e-06, + "loss": 0.0658, + "step": 78775 + }, + { + "epoch": 3.68, + "learning_rate": 7.812612669101625e-06, + "loss": 0.1077, + "step": 78780 + }, + { + "epoch": 3.68, + "learning_rate": 7.811828884046839e-06, + "loss": 0.1567, + "step": 78785 + }, + { + "epoch": 3.68, + "learning_rate": 7.811045098992053e-06, + "loss": 0.1781, + "step": 78790 + }, + { + "epoch": 3.68, + "learning_rate": 7.810261313937267e-06, + "loss": 0.2068, + "step": 78795 + }, + { + "epoch": 3.68, + "learning_rate": 7.80947752888248e-06, + "loss": 0.1577, + "step": 78800 + }, + { + "epoch": 3.68, + "learning_rate": 7.808693743827693e-06, + "loss": 0.0462, + "step": 78805 + }, + { + "epoch": 3.68, + "learning_rate": 7.807909958772907e-06, + "loss": 0.0606, + "step": 78810 + }, + { + "epoch": 3.68, + "learning_rate": 7.80712617371812e-06, + "loss": 0.0329, + "step": 78815 + }, + { + "epoch": 3.68, + "learning_rate": 7.806342388663335e-06, + "loss": 0.0622, + "step": 78820 + }, + { + "epoch": 3.68, + "learning_rate": 7.805558603608547e-06, + "loss": 0.2249, + "step": 78825 + }, + { + "epoch": 3.68, + "learning_rate": 7.80477481855376e-06, + "loss": 0.1718, + "step": 78830 + }, + { + "epoch": 3.68, + "learning_rate": 7.803991033498975e-06, + "loss": 0.1659, + "step": 78835 + }, + { + "epoch": 3.68, + "learning_rate": 7.803207248444187e-06, + "loss": 0.1572, + "step": 78840 + }, + { + "epoch": 3.68, + "learning_rate": 7.8024234633894e-06, + "loss": 0.3135, + "step": 78845 + }, + { + "epoch": 3.68, + "learning_rate": 7.801639678334615e-06, + "loss": 0.0707, + "step": 78850 + }, + { + "epoch": 3.68, + "learning_rate": 7.800855893279827e-06, + "loss": 0.0092, + "step": 78855 + }, + { + "epoch": 3.68, + "learning_rate": 7.80007210822504e-06, + "loss": 0.061, + "step": 78860 + }, + { + "epoch": 3.68, + "learning_rate": 7.799288323170255e-06, + "loss": 0.0245, + "step": 78865 + }, + { + "epoch": 3.68, + "learning_rate": 7.798504538115468e-06, + "loss": 0.0909, + "step": 78870 + }, + { + "epoch": 3.68, + "learning_rate": 7.79772075306068e-06, + "loss": 0.1077, + "step": 78875 + }, + { + "epoch": 3.68, + "learning_rate": 7.796936968005895e-06, + "loss": 0.0799, + "step": 78880 + }, + { + "epoch": 3.68, + "learning_rate": 7.796153182951109e-06, + "loss": 0.1157, + "step": 78885 + }, + { + "epoch": 3.68, + "learning_rate": 7.795369397896322e-06, + "loss": 0.2334, + "step": 78890 + }, + { + "epoch": 3.68, + "learning_rate": 7.794585612841535e-06, + "loss": 0.4019, + "step": 78895 + }, + { + "epoch": 3.68, + "learning_rate": 7.793801827786749e-06, + "loss": 0.0946, + "step": 78900 + }, + { + "epoch": 3.68, + "learning_rate": 7.79301804273196e-06, + "loss": 0.0261, + "step": 78905 + }, + { + "epoch": 3.68, + "learning_rate": 7.792234257677175e-06, + "loss": 0.0345, + "step": 78910 + }, + { + "epoch": 3.68, + "learning_rate": 7.791450472622389e-06, + "loss": 0.0157, + "step": 78915 + }, + { + "epoch": 3.68, + "learning_rate": 7.790666687567602e-06, + "loss": 0.0677, + "step": 78920 + }, + { + "epoch": 3.68, + "learning_rate": 7.789882902512815e-06, + "loss": 0.1063, + "step": 78925 + }, + { + "epoch": 3.68, + "learning_rate": 7.789099117458029e-06, + "loss": 0.0972, + "step": 78930 + }, + { + "epoch": 3.68, + "learning_rate": 7.788315332403242e-06, + "loss": 0.1355, + "step": 78935 + }, + { + "epoch": 3.68, + "learning_rate": 7.787531547348456e-06, + "loss": 0.2036, + "step": 78940 + }, + { + "epoch": 3.68, + "learning_rate": 7.786747762293669e-06, + "loss": 0.296, + "step": 78945 + }, + { + "epoch": 3.68, + "learning_rate": 7.785963977238883e-06, + "loss": 0.0542, + "step": 78950 + }, + { + "epoch": 3.68, + "learning_rate": 7.785180192184096e-06, + "loss": 0.0348, + "step": 78955 + }, + { + "epoch": 3.68, + "learning_rate": 7.78439640712931e-06, + "loss": 0.0292, + "step": 78960 + }, + { + "epoch": 3.68, + "learning_rate": 7.783612622074524e-06, + "loss": 0.115, + "step": 78965 + }, + { + "epoch": 3.68, + "learning_rate": 7.782828837019736e-06, + "loss": 0.1179, + "step": 78970 + }, + { + "epoch": 3.69, + "learning_rate": 7.782045051964949e-06, + "loss": 0.0641, + "step": 78975 + }, + { + "epoch": 3.69, + "learning_rate": 7.781261266910163e-06, + "loss": 0.0712, + "step": 78980 + }, + { + "epoch": 3.69, + "learning_rate": 7.780477481855376e-06, + "loss": 0.0613, + "step": 78985 + }, + { + "epoch": 3.69, + "learning_rate": 7.77969369680059e-06, + "loss": 0.2245, + "step": 78990 + }, + { + "epoch": 3.69, + "learning_rate": 7.778909911745803e-06, + "loss": 0.1215, + "step": 78995 + }, + { + "epoch": 3.69, + "learning_rate": 7.778126126691016e-06, + "loss": 0.0407, + "step": 79000 + }, + { + "epoch": 3.69, + "learning_rate": 7.77734234163623e-06, + "loss": 0.0457, + "step": 79005 + }, + { + "epoch": 3.69, + "learning_rate": 7.776558556581444e-06, + "loss": 0.0425, + "step": 79010 + }, + { + "epoch": 3.69, + "learning_rate": 7.775774771526658e-06, + "loss": 0.0623, + "step": 79015 + }, + { + "epoch": 3.69, + "learning_rate": 7.77499098647187e-06, + "loss": 0.0741, + "step": 79020 + }, + { + "epoch": 3.69, + "learning_rate": 7.774207201417084e-06, + "loss": 0.0935, + "step": 79025 + }, + { + "epoch": 3.69, + "learning_rate": 7.773423416362298e-06, + "loss": 0.0852, + "step": 79030 + }, + { + "epoch": 3.69, + "learning_rate": 7.77263963130751e-06, + "loss": 0.1269, + "step": 79035 + }, + { + "epoch": 3.69, + "learning_rate": 7.771855846252724e-06, + "loss": 0.2653, + "step": 79040 + }, + { + "epoch": 3.69, + "learning_rate": 7.771072061197937e-06, + "loss": 0.3134, + "step": 79045 + }, + { + "epoch": 3.69, + "learning_rate": 7.77028827614315e-06, + "loss": 0.0349, + "step": 79050 + }, + { + "epoch": 3.69, + "learning_rate": 7.769504491088364e-06, + "loss": 0.0252, + "step": 79055 + }, + { + "epoch": 3.69, + "learning_rate": 7.768720706033578e-06, + "loss": 0.0486, + "step": 79060 + }, + { + "epoch": 3.69, + "learning_rate": 7.767936920978792e-06, + "loss": 0.0675, + "step": 79065 + }, + { + "epoch": 3.69, + "learning_rate": 7.767153135924004e-06, + "loss": 0.0798, + "step": 79070 + }, + { + "epoch": 3.69, + "learning_rate": 7.766369350869218e-06, + "loss": 0.1307, + "step": 79075 + }, + { + "epoch": 3.69, + "learning_rate": 7.765585565814432e-06, + "loss": 0.0834, + "step": 79080 + }, + { + "epoch": 3.69, + "learning_rate": 7.764801780759646e-06, + "loss": 0.1156, + "step": 79085 + }, + { + "epoch": 3.69, + "learning_rate": 7.764017995704858e-06, + "loss": 0.3002, + "step": 79090 + }, + { + "epoch": 3.69, + "learning_rate": 7.763234210650072e-06, + "loss": 0.1833, + "step": 79095 + }, + { + "epoch": 3.69, + "learning_rate": 7.762450425595284e-06, + "loss": 0.0513, + "step": 79100 + }, + { + "epoch": 3.69, + "learning_rate": 7.761666640540498e-06, + "loss": 0.0273, + "step": 79105 + }, + { + "epoch": 3.69, + "learning_rate": 7.760882855485712e-06, + "loss": 0.0704, + "step": 79110 + }, + { + "epoch": 3.69, + "learning_rate": 7.760099070430926e-06, + "loss": 0.0579, + "step": 79115 + }, + { + "epoch": 3.69, + "learning_rate": 7.759315285376138e-06, + "loss": 0.1112, + "step": 79120 + }, + { + "epoch": 3.69, + "learning_rate": 7.758531500321352e-06, + "loss": 0.067, + "step": 79125 + }, + { + "epoch": 3.69, + "learning_rate": 7.757747715266566e-06, + "loss": 0.1091, + "step": 79130 + }, + { + "epoch": 3.69, + "learning_rate": 7.75696393021178e-06, + "loss": 0.1161, + "step": 79135 + }, + { + "epoch": 3.69, + "learning_rate": 7.756180145156992e-06, + "loss": 0.2371, + "step": 79140 + }, + { + "epoch": 3.69, + "learning_rate": 7.755396360102206e-06, + "loss": 0.277, + "step": 79145 + }, + { + "epoch": 3.69, + "learning_rate": 7.75461257504742e-06, + "loss": 0.0751, + "step": 79150 + }, + { + "epoch": 3.69, + "learning_rate": 7.75398554700359e-06, + "loss": 0.0851, + "step": 79155 + }, + { + "epoch": 3.69, + "learning_rate": 7.753201761948804e-06, + "loss": 0.0308, + "step": 79160 + }, + { + "epoch": 3.69, + "learning_rate": 7.752417976894018e-06, + "loss": 0.0851, + "step": 79165 + }, + { + "epoch": 3.69, + "learning_rate": 7.75163419183923e-06, + "loss": 0.0434, + "step": 79170 + }, + { + "epoch": 3.69, + "learning_rate": 7.750850406784444e-06, + "loss": 0.0541, + "step": 79175 + }, + { + "epoch": 3.69, + "learning_rate": 7.750066621729657e-06, + "loss": 0.1718, + "step": 79180 + }, + { + "epoch": 3.69, + "learning_rate": 7.74928283667487e-06, + "loss": 0.0957, + "step": 79185 + }, + { + "epoch": 3.7, + "learning_rate": 7.748499051620085e-06, + "loss": 0.2069, + "step": 79190 + }, + { + "epoch": 3.7, + "learning_rate": 7.747715266565298e-06, + "loss": 0.3096, + "step": 79195 + }, + { + "epoch": 3.7, + "learning_rate": 7.74693148151051e-06, + "loss": 0.0314, + "step": 79200 + }, + { + "epoch": 3.7, + "learning_rate": 7.746147696455725e-06, + "loss": 0.026, + "step": 79205 + }, + { + "epoch": 3.7, + "learning_rate": 7.745363911400938e-06, + "loss": 0.0116, + "step": 79210 + }, + { + "epoch": 3.7, + "learning_rate": 7.744580126346152e-06, + "loss": 0.0652, + "step": 79215 + }, + { + "epoch": 3.7, + "learning_rate": 7.743796341291365e-06, + "loss": 0.0941, + "step": 79220 + }, + { + "epoch": 3.7, + "learning_rate": 7.743012556236578e-06, + "loss": 0.054, + "step": 79225 + }, + { + "epoch": 3.7, + "learning_rate": 7.742228771181792e-06, + "loss": 0.0919, + "step": 79230 + }, + { + "epoch": 3.7, + "learning_rate": 7.741444986127005e-06, + "loss": 0.0863, + "step": 79235 + }, + { + "epoch": 3.7, + "learning_rate": 7.740661201072218e-06, + "loss": 0.2058, + "step": 79240 + }, + { + "epoch": 3.7, + "learning_rate": 7.739877416017432e-06, + "loss": 0.3746, + "step": 79245 + }, + { + "epoch": 3.7, + "learning_rate": 7.739093630962645e-06, + "loss": 0.0631, + "step": 79250 + }, + { + "epoch": 3.7, + "learning_rate": 7.738309845907859e-06, + "loss": 0.0152, + "step": 79255 + }, + { + "epoch": 3.7, + "learning_rate": 7.737526060853072e-06, + "loss": 0.0425, + "step": 79260 + }, + { + "epoch": 3.7, + "learning_rate": 7.736742275798286e-06, + "loss": 0.104, + "step": 79265 + }, + { + "epoch": 3.7, + "learning_rate": 7.735958490743499e-06, + "loss": 0.0209, + "step": 79270 + }, + { + "epoch": 3.7, + "learning_rate": 7.735174705688712e-06, + "loss": 0.0863, + "step": 79275 + }, + { + "epoch": 3.7, + "learning_rate": 7.734390920633926e-06, + "loss": 0.1091, + "step": 79280 + }, + { + "epoch": 3.7, + "learning_rate": 7.73360713557914e-06, + "loss": 0.1356, + "step": 79285 + }, + { + "epoch": 3.7, + "learning_rate": 7.732823350524352e-06, + "loss": 0.1522, + "step": 79290 + }, + { + "epoch": 3.7, + "learning_rate": 7.732039565469566e-06, + "loss": 0.1584, + "step": 79295 + }, + { + "epoch": 3.7, + "learning_rate": 7.731255780414779e-06, + "loss": 0.0795, + "step": 79300 + }, + { + "epoch": 3.7, + "learning_rate": 7.730471995359992e-06, + "loss": 0.0596, + "step": 79305 + }, + { + "epoch": 3.7, + "learning_rate": 7.729688210305206e-06, + "loss": 0.0502, + "step": 79310 + }, + { + "epoch": 3.7, + "learning_rate": 7.72890442525042e-06, + "loss": 0.0671, + "step": 79315 + }, + { + "epoch": 3.7, + "learning_rate": 7.728120640195633e-06, + "loss": 0.0851, + "step": 79320 + }, + { + "epoch": 3.7, + "learning_rate": 7.727336855140846e-06, + "loss": 0.0644, + "step": 79325 + }, + { + "epoch": 3.7, + "learning_rate": 7.72655307008606e-06, + "loss": 0.104, + "step": 79330 + }, + { + "epoch": 3.7, + "learning_rate": 7.725769285031274e-06, + "loss": 0.1543, + "step": 79335 + }, + { + "epoch": 3.7, + "learning_rate": 7.724985499976486e-06, + "loss": 0.1573, + "step": 79340 + }, + { + "epoch": 3.7, + "learning_rate": 7.7242017149217e-06, + "loss": 0.2184, + "step": 79345 + }, + { + "epoch": 3.7, + "learning_rate": 7.723417929866914e-06, + "loss": 0.0369, + "step": 79350 + }, + { + "epoch": 3.7, + "learning_rate": 7.722634144812128e-06, + "loss": 0.0109, + "step": 79355 + }, + { + "epoch": 3.7, + "learning_rate": 7.721850359757342e-06, + "loss": 0.0269, + "step": 79360 + }, + { + "epoch": 3.7, + "learning_rate": 7.721066574702554e-06, + "loss": 0.0179, + "step": 79365 + }, + { + "epoch": 3.7, + "learning_rate": 7.720282789647766e-06, + "loss": 0.0539, + "step": 79370 + }, + { + "epoch": 3.7, + "learning_rate": 7.71949900459298e-06, + "loss": 0.0521, + "step": 79375 + }, + { + "epoch": 3.7, + "learning_rate": 7.718715219538194e-06, + "loss": 0.0838, + "step": 79380 + }, + { + "epoch": 3.7, + "learning_rate": 7.717931434483408e-06, + "loss": 0.0885, + "step": 79385 + }, + { + "epoch": 3.7, + "learning_rate": 7.717147649428622e-06, + "loss": 0.2194, + "step": 79390 + }, + { + "epoch": 3.7, + "learning_rate": 7.716363864373834e-06, + "loss": 0.2308, + "step": 79395 + }, + { + "epoch": 3.7, + "learning_rate": 7.715580079319048e-06, + "loss": 0.0599, + "step": 79400 + }, + { + "epoch": 3.71, + "learning_rate": 7.714796294264262e-06, + "loss": 0.0291, + "step": 79405 + }, + { + "epoch": 3.71, + "learning_rate": 7.714012509209476e-06, + "loss": 0.0124, + "step": 79410 + }, + { + "epoch": 3.71, + "learning_rate": 7.713228724154688e-06, + "loss": 0.1041, + "step": 79415 + }, + { + "epoch": 3.71, + "learning_rate": 7.712444939099902e-06, + "loss": 0.0448, + "step": 79420 + }, + { + "epoch": 3.71, + "learning_rate": 7.711661154045116e-06, + "loss": 0.0642, + "step": 79425 + }, + { + "epoch": 3.71, + "learning_rate": 7.710877368990328e-06, + "loss": 0.0951, + "step": 79430 + }, + { + "epoch": 3.71, + "learning_rate": 7.710093583935542e-06, + "loss": 0.1728, + "step": 79435 + }, + { + "epoch": 3.71, + "learning_rate": 7.709309798880756e-06, + "loss": 0.1487, + "step": 79440 + }, + { + "epoch": 3.71, + "learning_rate": 7.708526013825968e-06, + "loss": 0.237, + "step": 79445 + }, + { + "epoch": 3.71, + "learning_rate": 7.707742228771182e-06, + "loss": 0.0672, + "step": 79450 + }, + { + "epoch": 3.71, + "learning_rate": 7.706958443716396e-06, + "loss": 0.0275, + "step": 79455 + }, + { + "epoch": 3.71, + "learning_rate": 7.70617465866161e-06, + "loss": 0.0369, + "step": 79460 + }, + { + "epoch": 3.71, + "learning_rate": 7.705390873606822e-06, + "loss": 0.0482, + "step": 79465 + }, + { + "epoch": 3.71, + "learning_rate": 7.704607088552036e-06, + "loss": 0.0857, + "step": 79470 + }, + { + "epoch": 3.71, + "learning_rate": 7.70382330349725e-06, + "loss": 0.0598, + "step": 79475 + }, + { + "epoch": 3.71, + "learning_rate": 7.703039518442464e-06, + "loss": 0.1201, + "step": 79480 + }, + { + "epoch": 3.71, + "learning_rate": 7.702255733387676e-06, + "loss": 0.1677, + "step": 79485 + }, + { + "epoch": 3.71, + "learning_rate": 7.70147194833289e-06, + "loss": 0.2025, + "step": 79490 + }, + { + "epoch": 3.71, + "learning_rate": 7.700688163278102e-06, + "loss": 0.3051, + "step": 79495 + }, + { + "epoch": 3.71, + "learning_rate": 7.699904378223316e-06, + "loss": 0.1057, + "step": 79500 + }, + { + "epoch": 3.71, + "learning_rate": 7.69912059316853e-06, + "loss": 0.0513, + "step": 79505 + }, + { + "epoch": 3.71, + "learning_rate": 7.698336808113744e-06, + "loss": 0.0324, + "step": 79510 + }, + { + "epoch": 3.71, + "learning_rate": 7.697553023058956e-06, + "loss": 0.031, + "step": 79515 + }, + { + "epoch": 3.71, + "learning_rate": 7.69676923800417e-06, + "loss": 0.0548, + "step": 79520 + }, + { + "epoch": 3.71, + "learning_rate": 7.695985452949384e-06, + "loss": 0.0992, + "step": 79525 + }, + { + "epoch": 3.71, + "learning_rate": 7.695201667894598e-06, + "loss": 0.1201, + "step": 79530 + }, + { + "epoch": 3.71, + "learning_rate": 7.69441788283981e-06, + "loss": 0.0911, + "step": 79535 + }, + { + "epoch": 3.71, + "learning_rate": 7.693634097785024e-06, + "loss": 0.3037, + "step": 79540 + }, + { + "epoch": 3.71, + "learning_rate": 7.692850312730238e-06, + "loss": 0.4432, + "step": 79545 + }, + { + "epoch": 3.71, + "learning_rate": 7.692066527675452e-06, + "loss": 0.0432, + "step": 79550 + }, + { + "epoch": 3.71, + "learning_rate": 7.691282742620664e-06, + "loss": 0.0619, + "step": 79555 + }, + { + "epoch": 3.71, + "learning_rate": 7.690498957565878e-06, + "loss": 0.0823, + "step": 79560 + }, + { + "epoch": 3.71, + "learning_rate": 7.68971517251109e-06, + "loss": 0.0433, + "step": 79565 + }, + { + "epoch": 3.71, + "learning_rate": 7.688931387456304e-06, + "loss": 0.0585, + "step": 79570 + }, + { + "epoch": 3.71, + "learning_rate": 7.688147602401518e-06, + "loss": 0.1646, + "step": 79575 + }, + { + "epoch": 3.71, + "learning_rate": 7.687363817346732e-06, + "loss": 0.1671, + "step": 79580 + }, + { + "epoch": 3.71, + "learning_rate": 7.686580032291944e-06, + "loss": 0.0532, + "step": 79585 + }, + { + "epoch": 3.71, + "learning_rate": 7.685796247237158e-06, + "loss": 0.1626, + "step": 79590 + }, + { + "epoch": 3.71, + "learning_rate": 7.685012462182372e-06, + "loss": 0.3362, + "step": 79595 + }, + { + "epoch": 3.71, + "learning_rate": 7.684228677127586e-06, + "loss": 0.0745, + "step": 79600 + }, + { + "epoch": 3.71, + "learning_rate": 7.683444892072798e-06, + "loss": 0.0608, + "step": 79605 + }, + { + "epoch": 3.71, + "learning_rate": 7.682661107018012e-06, + "loss": 0.0552, + "step": 79610 + }, + { + "epoch": 3.71, + "learning_rate": 7.681877321963226e-06, + "loss": 0.0213, + "step": 79615 + }, + { + "epoch": 3.72, + "learning_rate": 7.68109353690844e-06, + "loss": 0.0431, + "step": 79620 + }, + { + "epoch": 3.72, + "learning_rate": 7.680309751853652e-06, + "loss": 0.084, + "step": 79625 + }, + { + "epoch": 3.72, + "learning_rate": 7.679525966798866e-06, + "loss": 0.0523, + "step": 79630 + }, + { + "epoch": 3.72, + "learning_rate": 7.678898938755036e-06, + "loss": 0.1399, + "step": 79635 + }, + { + "epoch": 3.72, + "learning_rate": 7.67811515370025e-06, + "loss": 0.1198, + "step": 79640 + }, + { + "epoch": 3.72, + "learning_rate": 7.677331368645462e-06, + "loss": 0.1457, + "step": 79645 + }, + { + "epoch": 3.72, + "learning_rate": 7.676547583590676e-06, + "loss": 0.0347, + "step": 79650 + }, + { + "epoch": 3.72, + "learning_rate": 7.67576379853589e-06, + "loss": 0.0446, + "step": 79655 + }, + { + "epoch": 3.72, + "learning_rate": 7.674980013481104e-06, + "loss": 0.0179, + "step": 79660 + }, + { + "epoch": 3.72, + "learning_rate": 7.674196228426316e-06, + "loss": 0.058, + "step": 79665 + }, + { + "epoch": 3.72, + "learning_rate": 7.67341244337153e-06, + "loss": 0.041, + "step": 79670 + }, + { + "epoch": 3.72, + "learning_rate": 7.672628658316744e-06, + "loss": 0.112, + "step": 79675 + }, + { + "epoch": 3.72, + "learning_rate": 7.671844873261958e-06, + "loss": 0.1677, + "step": 79680 + }, + { + "epoch": 3.72, + "learning_rate": 7.671061088207172e-06, + "loss": 0.148, + "step": 79685 + }, + { + "epoch": 3.72, + "learning_rate": 7.670277303152384e-06, + "loss": 0.2323, + "step": 79690 + }, + { + "epoch": 3.72, + "learning_rate": 7.669493518097596e-06, + "loss": 0.1864, + "step": 79695 + }, + { + "epoch": 3.72, + "learning_rate": 7.66870973304281e-06, + "loss": 0.046, + "step": 79700 + }, + { + "epoch": 3.72, + "learning_rate": 7.667925947988024e-06, + "loss": 0.0314, + "step": 79705 + }, + { + "epoch": 3.72, + "learning_rate": 7.667142162933238e-06, + "loss": 0.0366, + "step": 79710 + }, + { + "epoch": 3.72, + "learning_rate": 7.66635837787845e-06, + "loss": 0.0915, + "step": 79715 + }, + { + "epoch": 3.72, + "learning_rate": 7.665574592823664e-06, + "loss": 0.074, + "step": 79720 + }, + { + "epoch": 3.72, + "learning_rate": 7.664790807768878e-06, + "loss": 0.1124, + "step": 79725 + }, + { + "epoch": 3.72, + "learning_rate": 7.664007022714092e-06, + "loss": 0.0639, + "step": 79730 + }, + { + "epoch": 3.72, + "learning_rate": 7.663223237659306e-06, + "loss": 0.134, + "step": 79735 + }, + { + "epoch": 3.72, + "learning_rate": 7.662439452604518e-06, + "loss": 0.0796, + "step": 79740 + }, + { + "epoch": 3.72, + "learning_rate": 7.661655667549732e-06, + "loss": 0.2107, + "step": 79745 + }, + { + "epoch": 3.72, + "learning_rate": 7.660871882494946e-06, + "loss": 0.035, + "step": 79750 + }, + { + "epoch": 3.72, + "learning_rate": 7.660088097440158e-06, + "loss": 0.0502, + "step": 79755 + }, + { + "epoch": 3.72, + "learning_rate": 7.659304312385372e-06, + "loss": 0.0185, + "step": 79760 + }, + { + "epoch": 3.72, + "learning_rate": 7.658520527330584e-06, + "loss": 0.0351, + "step": 79765 + }, + { + "epoch": 3.72, + "learning_rate": 7.657736742275798e-06, + "loss": 0.1205, + "step": 79770 + }, + { + "epoch": 3.72, + "learning_rate": 7.656952957221012e-06, + "loss": 0.0339, + "step": 79775 + }, + { + "epoch": 3.72, + "learning_rate": 7.656169172166226e-06, + "loss": 0.1625, + "step": 79780 + }, + { + "epoch": 3.72, + "learning_rate": 7.65538538711144e-06, + "loss": 0.2515, + "step": 79785 + }, + { + "epoch": 3.72, + "learning_rate": 7.654601602056652e-06, + "loss": 0.1826, + "step": 79790 + }, + { + "epoch": 3.72, + "learning_rate": 7.653817817001866e-06, + "loss": 0.2473, + "step": 79795 + }, + { + "epoch": 3.72, + "learning_rate": 7.65303403194708e-06, + "loss": 0.108, + "step": 79800 + }, + { + "epoch": 3.72, + "learning_rate": 7.652250246892294e-06, + "loss": 0.029, + "step": 79805 + }, + { + "epoch": 3.72, + "learning_rate": 7.651466461837506e-06, + "loss": 0.0212, + "step": 79810 + }, + { + "epoch": 3.72, + "learning_rate": 7.65068267678272e-06, + "loss": 0.0281, + "step": 79815 + }, + { + "epoch": 3.72, + "learning_rate": 7.649898891727932e-06, + "loss": 0.1148, + "step": 79820 + }, + { + "epoch": 3.72, + "learning_rate": 7.649115106673146e-06, + "loss": 0.1028, + "step": 79825 + }, + { + "epoch": 3.72, + "learning_rate": 7.64833132161836e-06, + "loss": 0.1096, + "step": 79830 + }, + { + "epoch": 3.73, + "learning_rate": 7.647547536563574e-06, + "loss": 0.1648, + "step": 79835 + }, + { + "epoch": 3.73, + "learning_rate": 7.646763751508786e-06, + "loss": 0.2439, + "step": 79840 + }, + { + "epoch": 3.73, + "learning_rate": 7.645979966454e-06, + "loss": 0.3196, + "step": 79845 + }, + { + "epoch": 3.73, + "learning_rate": 7.645196181399214e-06, + "loss": 0.0447, + "step": 79850 + }, + { + "epoch": 3.73, + "learning_rate": 7.644412396344428e-06, + "loss": 0.0287, + "step": 79855 + }, + { + "epoch": 3.73, + "learning_rate": 7.64362861128964e-06, + "loss": 0.0343, + "step": 79860 + }, + { + "epoch": 3.73, + "learning_rate": 7.642844826234854e-06, + "loss": 0.0918, + "step": 79865 + }, + { + "epoch": 3.73, + "learning_rate": 7.642061041180068e-06, + "loss": 0.0603, + "step": 79870 + }, + { + "epoch": 3.73, + "learning_rate": 7.641277256125282e-06, + "loss": 0.0978, + "step": 79875 + }, + { + "epoch": 3.73, + "learning_rate": 7.640493471070494e-06, + "loss": 0.1099, + "step": 79880 + }, + { + "epoch": 3.73, + "learning_rate": 7.639709686015708e-06, + "loss": 0.0707, + "step": 79885 + }, + { + "epoch": 3.73, + "learning_rate": 7.63892590096092e-06, + "loss": 0.1441, + "step": 79890 + }, + { + "epoch": 3.73, + "learning_rate": 7.638142115906134e-06, + "loss": 0.2386, + "step": 79895 + }, + { + "epoch": 3.73, + "learning_rate": 7.637358330851348e-06, + "loss": 0.0736, + "step": 79900 + }, + { + "epoch": 3.73, + "learning_rate": 7.636574545796562e-06, + "loss": 0.0412, + "step": 79905 + }, + { + "epoch": 3.73, + "learning_rate": 7.635790760741774e-06, + "loss": 0.0549, + "step": 79910 + }, + { + "epoch": 3.73, + "learning_rate": 7.635006975686988e-06, + "loss": 0.0487, + "step": 79915 + }, + { + "epoch": 3.73, + "learning_rate": 7.634223190632202e-06, + "loss": 0.071, + "step": 79920 + }, + { + "epoch": 3.73, + "learning_rate": 7.633439405577416e-06, + "loss": 0.0594, + "step": 79925 + }, + { + "epoch": 3.73, + "learning_rate": 7.632655620522628e-06, + "loss": 0.0753, + "step": 79930 + }, + { + "epoch": 3.73, + "learning_rate": 7.631871835467842e-06, + "loss": 0.1202, + "step": 79935 + }, + { + "epoch": 3.73, + "learning_rate": 7.631088050413056e-06, + "loss": 0.1864, + "step": 79940 + }, + { + "epoch": 3.73, + "learning_rate": 7.63030426535827e-06, + "loss": 0.2801, + "step": 79945 + }, + { + "epoch": 3.73, + "learning_rate": 7.629520480303482e-06, + "loss": 0.0676, + "step": 79950 + }, + { + "epoch": 3.73, + "learning_rate": 7.628736695248695e-06, + "loss": 0.0707, + "step": 79955 + }, + { + "epoch": 3.73, + "learning_rate": 7.627952910193909e-06, + "loss": 0.0497, + "step": 79960 + }, + { + "epoch": 3.73, + "learning_rate": 7.627169125139122e-06, + "loss": 0.0591, + "step": 79965 + }, + { + "epoch": 3.73, + "learning_rate": 7.626385340084336e-06, + "loss": 0.0237, + "step": 79970 + }, + { + "epoch": 3.73, + "learning_rate": 7.625601555029549e-06, + "loss": 0.0949, + "step": 79975 + }, + { + "epoch": 3.73, + "learning_rate": 7.624817769974763e-06, + "loss": 0.0499, + "step": 79980 + }, + { + "epoch": 3.73, + "learning_rate": 7.624033984919977e-06, + "loss": 0.1492, + "step": 79985 + }, + { + "epoch": 3.73, + "learning_rate": 7.62325019986519e-06, + "loss": 0.1709, + "step": 79990 + }, + { + "epoch": 3.73, + "learning_rate": 7.622466414810404e-06, + "loss": 0.3034, + "step": 79995 + }, + { + "epoch": 3.73, + "learning_rate": 7.621682629755617e-06, + "loss": 0.1068, + "step": 80000 + }, + { + "epoch": 3.73, + "learning_rate": 7.620898844700831e-06, + "loss": 0.0279, + "step": 80005 + }, + { + "epoch": 3.73, + "learning_rate": 7.620115059646044e-06, + "loss": 0.08, + "step": 80010 + }, + { + "epoch": 3.73, + "learning_rate": 7.619331274591256e-06, + "loss": 0.0887, + "step": 80015 + }, + { + "epoch": 3.73, + "learning_rate": 7.61854748953647e-06, + "loss": 0.0659, + "step": 80020 + }, + { + "epoch": 3.73, + "learning_rate": 7.617763704481683e-06, + "loss": 0.1461, + "step": 80025 + }, + { + "epoch": 3.73, + "learning_rate": 7.616979919426897e-06, + "loss": 0.1, + "step": 80030 + }, + { + "epoch": 3.73, + "learning_rate": 7.616196134372111e-06, + "loss": 0.1256, + "step": 80035 + }, + { + "epoch": 3.73, + "learning_rate": 7.615412349317324e-06, + "loss": 0.1121, + "step": 80040 + }, + { + "epoch": 3.74, + "learning_rate": 7.614628564262538e-06, + "loss": 0.3639, + "step": 80045 + }, + { + "epoch": 3.74, + "learning_rate": 7.613844779207751e-06, + "loss": 0.0781, + "step": 80050 + }, + { + "epoch": 3.74, + "learning_rate": 7.613060994152965e-06, + "loss": 0.0196, + "step": 80055 + }, + { + "epoch": 3.74, + "learning_rate": 7.612277209098178e-06, + "loss": 0.1044, + "step": 80060 + }, + { + "epoch": 3.74, + "learning_rate": 7.6114934240433916e-06, + "loss": 0.0287, + "step": 80065 + }, + { + "epoch": 3.74, + "learning_rate": 7.610709638988605e-06, + "loss": 0.0434, + "step": 80070 + }, + { + "epoch": 3.74, + "learning_rate": 7.6099258539338185e-06, + "loss": 0.0981, + "step": 80075 + }, + { + "epoch": 3.74, + "learning_rate": 7.609142068879031e-06, + "loss": 0.0543, + "step": 80080 + }, + { + "epoch": 3.74, + "learning_rate": 7.608358283824245e-06, + "loss": 0.0924, + "step": 80085 + }, + { + "epoch": 3.74, + "learning_rate": 7.607574498769458e-06, + "loss": 0.2712, + "step": 80090 + }, + { + "epoch": 3.74, + "learning_rate": 7.606790713714672e-06, + "loss": 0.2002, + "step": 80095 + }, + { + "epoch": 3.74, + "learning_rate": 7.606006928659885e-06, + "loss": 0.0818, + "step": 80100 + }, + { + "epoch": 3.74, + "learning_rate": 7.6052231436050986e-06, + "loss": 0.0633, + "step": 80105 + }, + { + "epoch": 3.74, + "learning_rate": 7.604439358550312e-06, + "loss": 0.0325, + "step": 80110 + }, + { + "epoch": 3.74, + "learning_rate": 7.6036555734955255e-06, + "loss": 0.1279, + "step": 80115 + }, + { + "epoch": 3.74, + "learning_rate": 7.602871788440739e-06, + "loss": 0.0537, + "step": 80120 + }, + { + "epoch": 3.74, + "learning_rate": 7.6020880033859525e-06, + "loss": 0.193, + "step": 80125 + }, + { + "epoch": 3.74, + "learning_rate": 7.6013042183311655e-06, + "loss": 0.0717, + "step": 80130 + }, + { + "epoch": 3.74, + "learning_rate": 7.6005204332763795e-06, + "loss": 0.1594, + "step": 80135 + }, + { + "epoch": 3.74, + "learning_rate": 7.5997366482215925e-06, + "loss": 0.1187, + "step": 80140 + }, + { + "epoch": 3.74, + "learning_rate": 7.5989528631668056e-06, + "loss": 0.3229, + "step": 80145 + }, + { + "epoch": 3.74, + "learning_rate": 7.598169078112019e-06, + "loss": 0.0789, + "step": 80150 + }, + { + "epoch": 3.74, + "learning_rate": 7.5973852930572325e-06, + "loss": 0.0952, + "step": 80155 + }, + { + "epoch": 3.74, + "learning_rate": 7.596601508002446e-06, + "loss": 0.0218, + "step": 80160 + }, + { + "epoch": 3.74, + "learning_rate": 7.5958177229476595e-06, + "loss": 0.0401, + "step": 80165 + }, + { + "epoch": 3.74, + "learning_rate": 7.5950339378928726e-06, + "loss": 0.0259, + "step": 80170 + }, + { + "epoch": 3.74, + "learning_rate": 7.5942501528380865e-06, + "loss": 0.1339, + "step": 80175 + }, + { + "epoch": 3.74, + "learning_rate": 7.5934663677832995e-06, + "loss": 0.0555, + "step": 80180 + }, + { + "epoch": 3.74, + "learning_rate": 7.5926825827285134e-06, + "loss": 0.1528, + "step": 80185 + }, + { + "epoch": 3.74, + "learning_rate": 7.5918987976737265e-06, + "loss": 0.2276, + "step": 80190 + }, + { + "epoch": 3.74, + "learning_rate": 7.59111501261894e-06, + "loss": 0.5037, + "step": 80195 + }, + { + "epoch": 3.74, + "learning_rate": 7.5903312275641535e-06, + "loss": 0.0713, + "step": 80200 + }, + { + "epoch": 3.74, + "learning_rate": 7.589547442509367e-06, + "loss": 0.0174, + "step": 80205 + }, + { + "epoch": 3.74, + "learning_rate": 7.5887636574545796e-06, + "loss": 0.016, + "step": 80210 + }, + { + "epoch": 3.74, + "learning_rate": 7.5879798723997935e-06, + "loss": 0.0537, + "step": 80215 + }, + { + "epoch": 3.74, + "learning_rate": 7.5871960873450065e-06, + "loss": 0.1113, + "step": 80220 + }, + { + "epoch": 3.74, + "learning_rate": 7.5864123022902204e-06, + "loss": 0.1015, + "step": 80225 + }, + { + "epoch": 3.74, + "learning_rate": 7.5856285172354335e-06, + "loss": 0.1157, + "step": 80230 + }, + { + "epoch": 3.74, + "learning_rate": 7.584844732180647e-06, + "loss": 0.097, + "step": 80235 + }, + { + "epoch": 3.74, + "learning_rate": 7.5840609471258605e-06, + "loss": 0.1815, + "step": 80240 + }, + { + "epoch": 3.74, + "learning_rate": 7.583277162071074e-06, + "loss": 0.2547, + "step": 80245 + }, + { + "epoch": 3.74, + "learning_rate": 7.5824933770162874e-06, + "loss": 0.064, + "step": 80250 + }, + { + "epoch": 3.74, + "learning_rate": 7.581709591961501e-06, + "loss": 0.0617, + "step": 80255 + }, + { + "epoch": 3.75, + "learning_rate": 7.580925806906715e-06, + "loss": 0.0732, + "step": 80260 + }, + { + "epoch": 3.75, + "learning_rate": 7.580142021851928e-06, + "loss": 0.0817, + "step": 80265 + }, + { + "epoch": 3.75, + "learning_rate": 7.579358236797142e-06, + "loss": 0.0828, + "step": 80270 + }, + { + "epoch": 3.75, + "learning_rate": 7.578574451742354e-06, + "loss": 0.1249, + "step": 80275 + }, + { + "epoch": 3.75, + "learning_rate": 7.5777906666875675e-06, + "loss": 0.085, + "step": 80280 + }, + { + "epoch": 3.75, + "learning_rate": 7.577006881632781e-06, + "loss": 0.2085, + "step": 80285 + }, + { + "epoch": 3.75, + "learning_rate": 7.5762230965779944e-06, + "loss": 0.1444, + "step": 80290 + }, + { + "epoch": 3.75, + "learning_rate": 7.575439311523208e-06, + "loss": 0.2968, + "step": 80295 + }, + { + "epoch": 3.75, + "learning_rate": 7.574655526468422e-06, + "loss": 0.0624, + "step": 80300 + }, + { + "epoch": 3.75, + "learning_rate": 7.573871741413635e-06, + "loss": 0.0325, + "step": 80305 + }, + { + "epoch": 3.75, + "learning_rate": 7.573087956358849e-06, + "loss": 0.0343, + "step": 80310 + }, + { + "epoch": 3.75, + "learning_rate": 7.572304171304062e-06, + "loss": 0.0628, + "step": 80315 + }, + { + "epoch": 3.75, + "learning_rate": 7.571520386249276e-06, + "loss": 0.0368, + "step": 80320 + }, + { + "epoch": 3.75, + "learning_rate": 7.570736601194489e-06, + "loss": 0.1064, + "step": 80325 + }, + { + "epoch": 3.75, + "learning_rate": 7.569952816139703e-06, + "loss": 0.1008, + "step": 80330 + }, + { + "epoch": 3.75, + "learning_rate": 7.569169031084916e-06, + "loss": 0.0986, + "step": 80335 + }, + { + "epoch": 3.75, + "learning_rate": 7.568385246030128e-06, + "loss": 0.1832, + "step": 80340 + }, + { + "epoch": 3.75, + "learning_rate": 7.567601460975342e-06, + "loss": 0.2612, + "step": 80345 + }, + { + "epoch": 3.75, + "learning_rate": 7.566817675920556e-06, + "loss": 0.0729, + "step": 80350 + }, + { + "epoch": 3.75, + "learning_rate": 7.566033890865769e-06, + "loss": 0.0367, + "step": 80355 + }, + { + "epoch": 3.75, + "learning_rate": 7.565250105810983e-06, + "loss": 0.0083, + "step": 80360 + }, + { + "epoch": 3.75, + "learning_rate": 7.564466320756196e-06, + "loss": 0.1255, + "step": 80365 + }, + { + "epoch": 3.75, + "learning_rate": 7.56368253570141e-06, + "loss": 0.0915, + "step": 80370 + }, + { + "epoch": 3.75, + "learning_rate": 7.562898750646623e-06, + "loss": 0.0961, + "step": 80375 + }, + { + "epoch": 3.75, + "learning_rate": 7.562114965591837e-06, + "loss": 0.0742, + "step": 80380 + }, + { + "epoch": 3.75, + "learning_rate": 7.56133118053705e-06, + "loss": 0.0985, + "step": 80385 + }, + { + "epoch": 3.75, + "learning_rate": 7.560547395482264e-06, + "loss": 0.2192, + "step": 80390 + }, + { + "epoch": 3.75, + "learning_rate": 7.559763610427477e-06, + "loss": 0.1856, + "step": 80395 + }, + { + "epoch": 3.75, + "learning_rate": 7.558979825372691e-06, + "loss": 0.0524, + "step": 80400 + }, + { + "epoch": 3.75, + "learning_rate": 7.558196040317903e-06, + "loss": 0.068, + "step": 80405 + }, + { + "epoch": 3.75, + "learning_rate": 7.557412255263117e-06, + "loss": 0.0281, + "step": 80410 + }, + { + "epoch": 3.75, + "learning_rate": 7.55662847020833e-06, + "loss": 0.0676, + "step": 80415 + }, + { + "epoch": 3.75, + "learning_rate": 7.555844685153544e-06, + "loss": 0.0235, + "step": 80420 + }, + { + "epoch": 3.75, + "learning_rate": 7.555060900098757e-06, + "loss": 0.0761, + "step": 80425 + }, + { + "epoch": 3.75, + "learning_rate": 7.554277115043971e-06, + "loss": 0.145, + "step": 80430 + }, + { + "epoch": 3.75, + "learning_rate": 7.553493329989184e-06, + "loss": 0.1257, + "step": 80435 + }, + { + "epoch": 3.75, + "learning_rate": 7.552709544934398e-06, + "loss": 0.2336, + "step": 80440 + }, + { + "epoch": 3.75, + "learning_rate": 7.551925759879611e-06, + "loss": 0.146, + "step": 80445 + }, + { + "epoch": 3.75, + "learning_rate": 7.551141974824825e-06, + "loss": 0.071, + "step": 80450 + }, + { + "epoch": 3.75, + "learning_rate": 7.550358189770038e-06, + "loss": 0.0779, + "step": 80455 + }, + { + "epoch": 3.75, + "learning_rate": 7.549574404715252e-06, + "loss": 0.0214, + "step": 80460 + }, + { + "epoch": 3.75, + "learning_rate": 7.548790619660465e-06, + "loss": 0.0261, + "step": 80465 + }, + { + "epoch": 3.75, + "learning_rate": 7.548006834605678e-06, + "loss": 0.0636, + "step": 80470 + }, + { + "epoch": 3.76, + "learning_rate": 7.547223049550891e-06, + "loss": 0.1001, + "step": 80475 + }, + { + "epoch": 3.76, + "learning_rate": 7.546439264496105e-06, + "loss": 0.0961, + "step": 80480 + }, + { + "epoch": 3.76, + "learning_rate": 7.545655479441318e-06, + "loss": 0.1074, + "step": 80485 + }, + { + "epoch": 3.76, + "learning_rate": 7.544871694386532e-06, + "loss": 0.1157, + "step": 80490 + }, + { + "epoch": 3.76, + "learning_rate": 7.544087909331745e-06, + "loss": 0.4315, + "step": 80495 + }, + { + "epoch": 3.76, + "learning_rate": 7.543304124276959e-06, + "loss": 0.0414, + "step": 80500 + }, + { + "epoch": 3.76, + "learning_rate": 7.542520339222172e-06, + "loss": 0.0545, + "step": 80505 + }, + { + "epoch": 3.76, + "learning_rate": 7.541736554167386e-06, + "loss": 0.0484, + "step": 80510 + }, + { + "epoch": 3.76, + "learning_rate": 7.540952769112599e-06, + "loss": 0.0749, + "step": 80515 + }, + { + "epoch": 3.76, + "learning_rate": 7.540168984057813e-06, + "loss": 0.0789, + "step": 80520 + }, + { + "epoch": 3.76, + "learning_rate": 7.539385199003027e-06, + "loss": 0.0878, + "step": 80525 + }, + { + "epoch": 3.76, + "learning_rate": 7.53860141394824e-06, + "loss": 0.0794, + "step": 80530 + }, + { + "epoch": 3.76, + "learning_rate": 7.537817628893452e-06, + "loss": 0.1222, + "step": 80535 + }, + { + "epoch": 3.76, + "learning_rate": 7.537033843838666e-06, + "loss": 0.1389, + "step": 80540 + }, + { + "epoch": 3.76, + "learning_rate": 7.536250058783879e-06, + "loss": 0.1715, + "step": 80545 + }, + { + "epoch": 3.76, + "learning_rate": 7.535466273729093e-06, + "loss": 0.0799, + "step": 80550 + }, + { + "epoch": 3.76, + "learning_rate": 7.534682488674306e-06, + "loss": 0.0222, + "step": 80555 + }, + { + "epoch": 3.76, + "learning_rate": 7.53389870361952e-06, + "loss": 0.0624, + "step": 80560 + }, + { + "epoch": 3.76, + "learning_rate": 7.533114918564734e-06, + "loss": 0.1226, + "step": 80565 + }, + { + "epoch": 3.76, + "learning_rate": 7.532331133509947e-06, + "loss": 0.1108, + "step": 80570 + }, + { + "epoch": 3.76, + "learning_rate": 7.531547348455161e-06, + "loss": 0.101, + "step": 80575 + }, + { + "epoch": 3.76, + "learning_rate": 7.530763563400374e-06, + "loss": 0.0392, + "step": 80580 + }, + { + "epoch": 3.76, + "learning_rate": 7.529979778345588e-06, + "loss": 0.1091, + "step": 80585 + }, + { + "epoch": 3.76, + "learning_rate": 7.529195993290801e-06, + "loss": 0.1041, + "step": 80590 + }, + { + "epoch": 3.76, + "learning_rate": 7.528412208236015e-06, + "loss": 0.2562, + "step": 80595 + }, + { + "epoch": 3.76, + "learning_rate": 7.527628423181227e-06, + "loss": 0.033, + "step": 80600 + }, + { + "epoch": 3.76, + "learning_rate": 7.52684463812644e-06, + "loss": 0.0161, + "step": 80605 + }, + { + "epoch": 3.76, + "learning_rate": 7.526060853071654e-06, + "loss": 0.0184, + "step": 80610 + }, + { + "epoch": 3.76, + "learning_rate": 7.525277068016868e-06, + "loss": 0.034, + "step": 80615 + }, + { + "epoch": 3.76, + "learning_rate": 7.524493282962081e-06, + "loss": 0.0559, + "step": 80620 + }, + { + "epoch": 3.76, + "learning_rate": 7.523709497907295e-06, + "loss": 0.0886, + "step": 80625 + }, + { + "epoch": 3.76, + "learning_rate": 7.522925712852508e-06, + "loss": 0.1356, + "step": 80630 + }, + { + "epoch": 3.76, + "learning_rate": 7.522141927797722e-06, + "loss": 0.2386, + "step": 80635 + }, + { + "epoch": 3.76, + "learning_rate": 7.521358142742935e-06, + "loss": 0.1457, + "step": 80640 + }, + { + "epoch": 3.76, + "learning_rate": 7.520574357688149e-06, + "loss": 0.2578, + "step": 80645 + }, + { + "epoch": 3.76, + "learning_rate": 7.519790572633362e-06, + "loss": 0.0684, + "step": 80650 + }, + { + "epoch": 3.76, + "learning_rate": 7.519006787578576e-06, + "loss": 0.0222, + "step": 80655 + }, + { + "epoch": 3.76, + "learning_rate": 7.518223002523789e-06, + "loss": 0.0702, + "step": 80660 + }, + { + "epoch": 3.76, + "learning_rate": 7.517439217469002e-06, + "loss": 0.0117, + "step": 80665 + }, + { + "epoch": 3.76, + "learning_rate": 7.516655432414215e-06, + "loss": 0.0342, + "step": 80670 + }, + { + "epoch": 3.76, + "learning_rate": 7.515871647359429e-06, + "loss": 0.1044, + "step": 80675 + }, + { + "epoch": 3.76, + "learning_rate": 7.515087862304642e-06, + "loss": 0.1218, + "step": 80680 + }, + { + "epoch": 3.76, + "learning_rate": 7.514304077249856e-06, + "loss": 0.1304, + "step": 80685 + }, + { + "epoch": 3.77, + "learning_rate": 7.513520292195069e-06, + "loss": 0.2197, + "step": 80690 + }, + { + "epoch": 3.77, + "learning_rate": 7.512736507140283e-06, + "loss": 0.3307, + "step": 80695 + }, + { + "epoch": 3.77, + "learning_rate": 7.511952722085496e-06, + "loss": 0.07, + "step": 80700 + }, + { + "epoch": 3.77, + "learning_rate": 7.51116893703071e-06, + "loss": 0.0504, + "step": 80705 + }, + { + "epoch": 3.77, + "learning_rate": 7.510385151975923e-06, + "loss": 0.0316, + "step": 80710 + }, + { + "epoch": 3.77, + "learning_rate": 7.509601366921137e-06, + "loss": 0.0594, + "step": 80715 + }, + { + "epoch": 3.77, + "learning_rate": 7.50881758186635e-06, + "loss": 0.1011, + "step": 80720 + }, + { + "epoch": 3.77, + "learning_rate": 7.508033796811564e-06, + "loss": 0.0862, + "step": 80725 + }, + { + "epoch": 3.77, + "learning_rate": 7.507250011756776e-06, + "loss": 0.1308, + "step": 80730 + }, + { + "epoch": 3.77, + "learning_rate": 7.50646622670199e-06, + "loss": 0.1348, + "step": 80735 + }, + { + "epoch": 3.77, + "learning_rate": 7.505682441647203e-06, + "loss": 0.2836, + "step": 80740 + }, + { + "epoch": 3.77, + "learning_rate": 7.504898656592417e-06, + "loss": 0.3387, + "step": 80745 + }, + { + "epoch": 3.77, + "learning_rate": 7.50411487153763e-06, + "loss": 0.1073, + "step": 80750 + }, + { + "epoch": 3.77, + "learning_rate": 7.503331086482844e-06, + "loss": 0.016, + "step": 80755 + }, + { + "epoch": 3.77, + "learning_rate": 7.502547301428057e-06, + "loss": 0.0336, + "step": 80760 + }, + { + "epoch": 3.77, + "learning_rate": 7.501763516373271e-06, + "loss": 0.0689, + "step": 80765 + }, + { + "epoch": 3.77, + "learning_rate": 7.500979731318484e-06, + "loss": 0.0737, + "step": 80770 + }, + { + "epoch": 3.77, + "learning_rate": 7.5001959462636976e-06, + "loss": 0.0906, + "step": 80775 + }, + { + "epoch": 3.77, + "learning_rate": 7.499412161208911e-06, + "loss": 0.1477, + "step": 80780 + }, + { + "epoch": 3.77, + "learning_rate": 7.4986283761541245e-06, + "loss": 0.1053, + "step": 80785 + }, + { + "epoch": 3.77, + "learning_rate": 7.4978445910993384e-06, + "loss": 0.1451, + "step": 80790 + }, + { + "epoch": 3.77, + "learning_rate": 7.497060806044551e-06, + "loss": 0.2624, + "step": 80795 + }, + { + "epoch": 3.77, + "learning_rate": 7.496277020989764e-06, + "loss": 0.0496, + "step": 80800 + }, + { + "epoch": 3.77, + "learning_rate": 7.495493235934978e-06, + "loss": 0.0683, + "step": 80805 + }, + { + "epoch": 3.77, + "learning_rate": 7.494709450880191e-06, + "loss": 0.0146, + "step": 80810 + }, + { + "epoch": 3.77, + "learning_rate": 7.4939256658254046e-06, + "loss": 0.0139, + "step": 80815 + }, + { + "epoch": 3.77, + "learning_rate": 7.493141880770618e-06, + "loss": 0.0735, + "step": 80820 + }, + { + "epoch": 3.77, + "learning_rate": 7.4923580957158315e-06, + "loss": 0.0898, + "step": 80825 + }, + { + "epoch": 3.77, + "learning_rate": 7.491574310661045e-06, + "loss": 0.0639, + "step": 80830 + }, + { + "epoch": 3.77, + "learning_rate": 7.4907905256062585e-06, + "loss": 0.1063, + "step": 80835 + }, + { + "epoch": 3.77, + "learning_rate": 7.490006740551472e-06, + "loss": 0.1522, + "step": 80840 + }, + { + "epoch": 3.77, + "learning_rate": 7.4892229554966855e-06, + "loss": 0.1718, + "step": 80845 + }, + { + "epoch": 3.77, + "learning_rate": 7.488439170441899e-06, + "loss": 0.1038, + "step": 80850 + }, + { + "epoch": 3.77, + "learning_rate": 7.4876553853871124e-06, + "loss": 0.0347, + "step": 80855 + }, + { + "epoch": 3.77, + "learning_rate": 7.486871600332325e-06, + "loss": 0.0486, + "step": 80860 + }, + { + "epoch": 3.77, + "learning_rate": 7.4860878152775385e-06, + "loss": 0.0466, + "step": 80865 + }, + { + "epoch": 3.77, + "learning_rate": 7.485304030222752e-06, + "loss": 0.1096, + "step": 80870 + }, + { + "epoch": 3.77, + "learning_rate": 7.4845202451679655e-06, + "loss": 0.1127, + "step": 80875 + }, + { + "epoch": 3.77, + "learning_rate": 7.483736460113179e-06, + "loss": 0.108, + "step": 80880 + }, + { + "epoch": 3.77, + "learning_rate": 7.4829526750583925e-06, + "loss": 0.1396, + "step": 80885 + }, + { + "epoch": 3.77, + "learning_rate": 7.482168890003606e-06, + "loss": 0.1624, + "step": 80890 + }, + { + "epoch": 3.77, + "learning_rate": 7.4813851049488194e-06, + "loss": 0.2717, + "step": 80895 + }, + { + "epoch": 3.77, + "learning_rate": 7.480601319894033e-06, + "loss": 0.0359, + "step": 80900 + }, + { + "epoch": 3.78, + "learning_rate": 7.479817534839246e-06, + "loss": 0.0532, + "step": 80905 + }, + { + "epoch": 3.78, + "learning_rate": 7.47903374978446e-06, + "loss": 0.0593, + "step": 80910 + }, + { + "epoch": 3.78, + "learning_rate": 7.478249964729673e-06, + "loss": 0.0459, + "step": 80915 + }, + { + "epoch": 3.78, + "learning_rate": 7.477466179674887e-06, + "loss": 0.0694, + "step": 80920 + }, + { + "epoch": 3.78, + "learning_rate": 7.4766823946200995e-06, + "loss": 0.0415, + "step": 80925 + }, + { + "epoch": 3.78, + "learning_rate": 7.475898609565313e-06, + "loss": 0.106, + "step": 80930 + }, + { + "epoch": 3.78, + "learning_rate": 7.4751148245105265e-06, + "loss": 0.1636, + "step": 80935 + }, + { + "epoch": 3.78, + "learning_rate": 7.47433103945574e-06, + "loss": 0.1601, + "step": 80940 + }, + { + "epoch": 3.78, + "learning_rate": 7.473547254400953e-06, + "loss": 0.277, + "step": 80945 + }, + { + "epoch": 3.78, + "learning_rate": 7.472763469346167e-06, + "loss": 0.065, + "step": 80950 + }, + { + "epoch": 3.78, + "learning_rate": 7.47197968429138e-06, + "loss": 0.0045, + "step": 80955 + }, + { + "epoch": 3.78, + "learning_rate": 7.471195899236594e-06, + "loss": 0.0261, + "step": 80960 + }, + { + "epoch": 3.78, + "learning_rate": 7.470412114181807e-06, + "loss": 0.0298, + "step": 80965 + }, + { + "epoch": 3.78, + "learning_rate": 7.469628329127021e-06, + "loss": 0.0836, + "step": 80970 + }, + { + "epoch": 3.78, + "learning_rate": 7.468844544072234e-06, + "loss": 0.0585, + "step": 80975 + }, + { + "epoch": 3.78, + "learning_rate": 7.468060759017448e-06, + "loss": 0.0885, + "step": 80980 + }, + { + "epoch": 3.78, + "learning_rate": 7.467276973962661e-06, + "loss": 0.1012, + "step": 80985 + }, + { + "epoch": 3.78, + "learning_rate": 7.466493188907874e-06, + "loss": 0.1778, + "step": 80990 + }, + { + "epoch": 3.78, + "learning_rate": 7.465709403853087e-06, + "loss": 0.1979, + "step": 80995 + }, + { + "epoch": 3.78, + "learning_rate": 7.464925618798301e-06, + "loss": 0.0336, + "step": 81000 + }, + { + "epoch": 3.78, + "learning_rate": 7.464141833743514e-06, + "loss": 0.0493, + "step": 81005 + }, + { + "epoch": 3.78, + "learning_rate": 7.463358048688728e-06, + "loss": 0.0297, + "step": 81010 + }, + { + "epoch": 3.78, + "learning_rate": 7.462574263633941e-06, + "loss": 0.017, + "step": 81015 + }, + { + "epoch": 3.78, + "learning_rate": 7.461790478579155e-06, + "loss": 0.0602, + "step": 81020 + }, + { + "epoch": 3.78, + "learning_rate": 7.461006693524368e-06, + "loss": 0.0804, + "step": 81025 + }, + { + "epoch": 3.78, + "learning_rate": 7.460222908469582e-06, + "loss": 0.0775, + "step": 81030 + }, + { + "epoch": 3.78, + "learning_rate": 7.459439123414795e-06, + "loss": 0.2335, + "step": 81035 + }, + { + "epoch": 3.78, + "learning_rate": 7.458655338360009e-06, + "loss": 0.1755, + "step": 81040 + }, + { + "epoch": 3.78, + "learning_rate": 7.457871553305222e-06, + "loss": 0.2894, + "step": 81045 + }, + { + "epoch": 3.78, + "learning_rate": 7.457087768250436e-06, + "loss": 0.0208, + "step": 81050 + }, + { + "epoch": 3.78, + "learning_rate": 7.456303983195648e-06, + "loss": 0.0958, + "step": 81055 + }, + { + "epoch": 3.78, + "learning_rate": 7.455520198140862e-06, + "loss": 0.0727, + "step": 81060 + }, + { + "epoch": 3.78, + "learning_rate": 7.454736413086075e-06, + "loss": 0.0566, + "step": 81065 + }, + { + "epoch": 3.78, + "learning_rate": 7.453952628031289e-06, + "loss": 0.0311, + "step": 81070 + }, + { + "epoch": 3.78, + "learning_rate": 7.453168842976502e-06, + "loss": 0.1151, + "step": 81075 + }, + { + "epoch": 3.78, + "learning_rate": 7.452385057921716e-06, + "loss": 0.1571, + "step": 81080 + }, + { + "epoch": 3.78, + "learning_rate": 7.451601272866929e-06, + "loss": 0.1032, + "step": 81085 + }, + { + "epoch": 3.78, + "learning_rate": 7.450817487812143e-06, + "loss": 0.1239, + "step": 81090 + }, + { + "epoch": 3.78, + "learning_rate": 7.450033702757356e-06, + "loss": 0.3272, + "step": 81095 + }, + { + "epoch": 3.78, + "learning_rate": 7.44924991770257e-06, + "loss": 0.088, + "step": 81100 + }, + { + "epoch": 3.78, + "learning_rate": 7.448466132647784e-06, + "loss": 0.0075, + "step": 81105 + }, + { + "epoch": 3.78, + "learning_rate": 7.447682347592997e-06, + "loss": 0.0353, + "step": 81110 + }, + { + "epoch": 3.78, + "learning_rate": 7.446898562538211e-06, + "loss": 0.0448, + "step": 81115 + }, + { + "epoch": 3.79, + "learning_rate": 7.446114777483423e-06, + "loss": 0.0518, + "step": 81120 + }, + { + "epoch": 3.79, + "learning_rate": 7.445330992428636e-06, + "loss": 0.1159, + "step": 81125 + }, + { + "epoch": 3.79, + "learning_rate": 7.44454720737385e-06, + "loss": 0.0917, + "step": 81130 + }, + { + "epoch": 3.79, + "learning_rate": 7.443763422319063e-06, + "loss": 0.1232, + "step": 81135 + }, + { + "epoch": 3.79, + "learning_rate": 7.442979637264277e-06, + "loss": 0.2608, + "step": 81140 + }, + { + "epoch": 3.79, + "learning_rate": 7.442195852209491e-06, + "loss": 0.2487, + "step": 81145 + }, + { + "epoch": 3.79, + "learning_rate": 7.441412067154704e-06, + "loss": 0.0297, + "step": 81150 + }, + { + "epoch": 3.79, + "learning_rate": 7.440628282099918e-06, + "loss": 0.0077, + "step": 81155 + }, + { + "epoch": 3.79, + "learning_rate": 7.439844497045131e-06, + "loss": 0.0524, + "step": 81160 + }, + { + "epoch": 3.79, + "learning_rate": 7.439060711990345e-06, + "loss": 0.0355, + "step": 81165 + }, + { + "epoch": 3.79, + "learning_rate": 7.438276926935558e-06, + "loss": 0.0954, + "step": 81170 + }, + { + "epoch": 3.79, + "learning_rate": 7.437493141880772e-06, + "loss": 0.1215, + "step": 81175 + }, + { + "epoch": 3.79, + "learning_rate": 7.436709356825985e-06, + "loss": 0.1101, + "step": 81180 + }, + { + "epoch": 3.79, + "learning_rate": 7.435925571771197e-06, + "loss": 0.1134, + "step": 81185 + }, + { + "epoch": 3.79, + "learning_rate": 7.435141786716411e-06, + "loss": 0.1016, + "step": 81190 + }, + { + "epoch": 3.79, + "learning_rate": 7.434358001661625e-06, + "loss": 0.3215, + "step": 81195 + }, + { + "epoch": 3.79, + "learning_rate": 7.433574216606838e-06, + "loss": 0.0755, + "step": 81200 + }, + { + "epoch": 3.79, + "learning_rate": 7.432790431552052e-06, + "loss": 0.0231, + "step": 81205 + }, + { + "epoch": 3.79, + "learning_rate": 7.432006646497265e-06, + "loss": 0.0457, + "step": 81210 + }, + { + "epoch": 3.79, + "learning_rate": 7.431222861442479e-06, + "loss": 0.0341, + "step": 81215 + }, + { + "epoch": 3.79, + "learning_rate": 7.430439076387692e-06, + "loss": 0.1064, + "step": 81220 + }, + { + "epoch": 3.79, + "learning_rate": 7.429655291332906e-06, + "loss": 0.105, + "step": 81225 + }, + { + "epoch": 3.79, + "learning_rate": 7.428871506278119e-06, + "loss": 0.0941, + "step": 81230 + }, + { + "epoch": 3.79, + "learning_rate": 7.428087721223333e-06, + "loss": 0.0624, + "step": 81235 + }, + { + "epoch": 3.79, + "learning_rate": 7.427303936168546e-06, + "loss": 0.2318, + "step": 81240 + }, + { + "epoch": 3.79, + "learning_rate": 7.42652015111376e-06, + "loss": 0.2825, + "step": 81245 + }, + { + "epoch": 3.79, + "learning_rate": 7.425736366058972e-06, + "loss": 0.0709, + "step": 81250 + }, + { + "epoch": 3.79, + "learning_rate": 7.424952581004186e-06, + "loss": 0.0251, + "step": 81255 + }, + { + "epoch": 3.79, + "learning_rate": 7.424168795949399e-06, + "loss": 0.0932, + "step": 81260 + }, + { + "epoch": 3.79, + "learning_rate": 7.423385010894613e-06, + "loss": 0.0567, + "step": 81265 + }, + { + "epoch": 3.79, + "learning_rate": 7.422601225839826e-06, + "loss": 0.0956, + "step": 81270 + }, + { + "epoch": 3.79, + "learning_rate": 7.42181744078504e-06, + "loss": 0.0827, + "step": 81275 + }, + { + "epoch": 3.79, + "learning_rate": 7.421033655730253e-06, + "loss": 0.1244, + "step": 81280 + }, + { + "epoch": 3.79, + "learning_rate": 7.420249870675467e-06, + "loss": 0.049, + "step": 81285 + }, + { + "epoch": 3.79, + "learning_rate": 7.41946608562068e-06, + "loss": 0.1748, + "step": 81290 + }, + { + "epoch": 3.79, + "learning_rate": 7.418682300565894e-06, + "loss": 0.2296, + "step": 81295 + }, + { + "epoch": 3.79, + "learning_rate": 7.417898515511107e-06, + "loss": 0.0165, + "step": 81300 + }, + { + "epoch": 3.79, + "learning_rate": 7.417114730456321e-06, + "loss": 0.0275, + "step": 81305 + }, + { + "epoch": 3.79, + "learning_rate": 7.416330945401534e-06, + "loss": 0.0393, + "step": 81310 + }, + { + "epoch": 3.79, + "learning_rate": 7.415547160346747e-06, + "loss": 0.0505, + "step": 81315 + }, + { + "epoch": 3.79, + "learning_rate": 7.41476337529196e-06, + "loss": 0.148, + "step": 81320 + }, + { + "epoch": 3.79, + "learning_rate": 7.413979590237174e-06, + "loss": 0.0627, + "step": 81325 + }, + { + "epoch": 3.79, + "learning_rate": 7.413195805182387e-06, + "loss": 0.111, + "step": 81330 + }, + { + "epoch": 3.8, + "learning_rate": 7.412412020127601e-06, + "loss": 0.1179, + "step": 81335 + }, + { + "epoch": 3.8, + "learning_rate": 7.411628235072814e-06, + "loss": 0.2385, + "step": 81340 + }, + { + "epoch": 3.8, + "learning_rate": 7.410844450018028e-06, + "loss": 0.2367, + "step": 81345 + }, + { + "epoch": 3.8, + "learning_rate": 7.410060664963241e-06, + "loss": 0.0705, + "step": 81350 + }, + { + "epoch": 3.8, + "learning_rate": 7.409276879908455e-06, + "loss": 0.0118, + "step": 81355 + }, + { + "epoch": 3.8, + "learning_rate": 7.408493094853668e-06, + "loss": 0.0494, + "step": 81360 + }, + { + "epoch": 3.8, + "learning_rate": 7.407709309798882e-06, + "loss": 0.0659, + "step": 81365 + }, + { + "epoch": 3.8, + "learning_rate": 7.406925524744096e-06, + "loss": 0.0815, + "step": 81370 + }, + { + "epoch": 3.8, + "learning_rate": 7.406141739689309e-06, + "loss": 0.1722, + "step": 81375 + }, + { + "epoch": 3.8, + "learning_rate": 7.405357954634521e-06, + "loss": 0.0848, + "step": 81380 + }, + { + "epoch": 3.8, + "learning_rate": 7.404574169579735e-06, + "loss": 0.1096, + "step": 81385 + }, + { + "epoch": 3.8, + "learning_rate": 7.403790384524948e-06, + "loss": 0.2585, + "step": 81390 + }, + { + "epoch": 3.8, + "learning_rate": 7.403006599470162e-06, + "loss": 0.3918, + "step": 81395 + }, + { + "epoch": 3.8, + "learning_rate": 7.402222814415375e-06, + "loss": 0.0702, + "step": 81400 + }, + { + "epoch": 3.8, + "learning_rate": 7.401439029360589e-06, + "loss": 0.016, + "step": 81405 + }, + { + "epoch": 3.8, + "learning_rate": 7.400655244305802e-06, + "loss": 0.0741, + "step": 81410 + }, + { + "epoch": 3.8, + "learning_rate": 7.399871459251016e-06, + "loss": 0.0389, + "step": 81415 + }, + { + "epoch": 3.8, + "learning_rate": 7.3990876741962296e-06, + "loss": 0.0683, + "step": 81420 + }, + { + "epoch": 3.8, + "learning_rate": 7.398303889141443e-06, + "loss": 0.1226, + "step": 81425 + }, + { + "epoch": 3.8, + "learning_rate": 7.3975201040866565e-06, + "loss": 0.1473, + "step": 81430 + }, + { + "epoch": 3.8, + "learning_rate": 7.39673631903187e-06, + "loss": 0.1458, + "step": 81435 + }, + { + "epoch": 3.8, + "learning_rate": 7.3959525339770835e-06, + "loss": 0.1623, + "step": 81440 + }, + { + "epoch": 3.8, + "learning_rate": 7.395168748922296e-06, + "loss": 0.2272, + "step": 81445 + }, + { + "epoch": 3.8, + "learning_rate": 7.394384963867509e-06, + "loss": 0.0278, + "step": 81450 + }, + { + "epoch": 3.8, + "learning_rate": 7.393601178812723e-06, + "loss": 0.026, + "step": 81455 + }, + { + "epoch": 3.8, + "learning_rate": 7.392817393757937e-06, + "loss": 0.0562, + "step": 81460 + }, + { + "epoch": 3.8, + "learning_rate": 7.39203360870315e-06, + "loss": 0.0508, + "step": 81465 + }, + { + "epoch": 3.8, + "learning_rate": 7.3912498236483635e-06, + "loss": 0.067, + "step": 81470 + }, + { + "epoch": 3.8, + "learning_rate": 7.390466038593577e-06, + "loss": 0.1139, + "step": 81475 + }, + { + "epoch": 3.8, + "learning_rate": 7.3896822535387905e-06, + "loss": 0.0491, + "step": 81480 + }, + { + "epoch": 3.8, + "learning_rate": 7.3888984684840036e-06, + "loss": 0.1286, + "step": 81485 + }, + { + "epoch": 3.8, + "learning_rate": 7.3881146834292175e-06, + "loss": 0.2884, + "step": 81490 + }, + { + "epoch": 3.8, + "learning_rate": 7.3873308983744305e-06, + "loss": 0.1804, + "step": 81495 + }, + { + "epoch": 3.8, + "learning_rate": 7.3865471133196444e-06, + "loss": 0.0247, + "step": 81500 + }, + { + "epoch": 3.8, + "learning_rate": 7.3857633282648575e-06, + "loss": 0.0351, + "step": 81505 + }, + { + "epoch": 3.8, + "learning_rate": 7.3849795432100706e-06, + "loss": 0.0495, + "step": 81510 + }, + { + "epoch": 3.8, + "learning_rate": 7.384195758155284e-06, + "loss": 0.0383, + "step": 81515 + }, + { + "epoch": 3.8, + "learning_rate": 7.3834119731004975e-06, + "loss": 0.0481, + "step": 81520 + }, + { + "epoch": 3.8, + "learning_rate": 7.382628188045711e-06, + "loss": 0.096, + "step": 81525 + }, + { + "epoch": 3.8, + "learning_rate": 7.3818444029909245e-06, + "loss": 0.0954, + "step": 81530 + }, + { + "epoch": 3.8, + "learning_rate": 7.3810606179361375e-06, + "loss": 0.1689, + "step": 81535 + }, + { + "epoch": 3.8, + "learning_rate": 7.3802768328813515e-06, + "loss": 0.1517, + "step": 81540 + }, + { + "epoch": 3.81, + "learning_rate": 7.3794930478265645e-06, + "loss": 0.2761, + "step": 81545 + }, + { + "epoch": 3.81, + "learning_rate": 7.378709262771778e-06, + "loss": 0.0598, + "step": 81550 + }, + { + "epoch": 3.81, + "learning_rate": 7.3779254777169915e-06, + "loss": 0.035, + "step": 81555 + }, + { + "epoch": 3.81, + "learning_rate": 7.377141692662205e-06, + "loss": 0.0392, + "step": 81560 + }, + { + "epoch": 3.81, + "learning_rate": 7.3763579076074184e-06, + "loss": 0.0373, + "step": 81565 + }, + { + "epoch": 3.81, + "learning_rate": 7.375574122552632e-06, + "loss": 0.0863, + "step": 81570 + }, + { + "epoch": 3.81, + "learning_rate": 7.3747903374978446e-06, + "loss": 0.0891, + "step": 81575 + }, + { + "epoch": 3.81, + "learning_rate": 7.3740065524430585e-06, + "loss": 0.22, + "step": 81580 + }, + { + "epoch": 3.81, + "learning_rate": 7.3732227673882715e-06, + "loss": 0.1706, + "step": 81585 + }, + { + "epoch": 3.81, + "learning_rate": 7.3724389823334854e-06, + "loss": 0.1848, + "step": 81590 + }, + { + "epoch": 3.81, + "learning_rate": 7.3716551972786985e-06, + "loss": 0.2883, + "step": 81595 + }, + { + "epoch": 3.81, + "learning_rate": 7.370871412223912e-06, + "loss": 0.0452, + "step": 81600 + }, + { + "epoch": 3.81, + "learning_rate": 7.3700876271691255e-06, + "loss": 0.0269, + "step": 81605 + }, + { + "epoch": 3.81, + "learning_rate": 7.369303842114339e-06, + "loss": 0.0488, + "step": 81610 + }, + { + "epoch": 3.81, + "learning_rate": 7.368520057059552e-06, + "loss": 0.1098, + "step": 81615 + }, + { + "epoch": 3.81, + "learning_rate": 7.367736272004766e-06, + "loss": 0.044, + "step": 81620 + }, + { + "epoch": 3.81, + "learning_rate": 7.366952486949979e-06, + "loss": 0.0823, + "step": 81625 + }, + { + "epoch": 3.81, + "learning_rate": 7.366168701895193e-06, + "loss": 0.102, + "step": 81630 + }, + { + "epoch": 3.81, + "learning_rate": 7.365384916840407e-06, + "loss": 0.0871, + "step": 81635 + }, + { + "epoch": 3.81, + "learning_rate": 7.364601131785619e-06, + "loss": 0.1703, + "step": 81640 + }, + { + "epoch": 3.81, + "learning_rate": 7.3638173467308325e-06, + "loss": 0.2861, + "step": 81645 + }, + { + "epoch": 3.81, + "learning_rate": 7.363033561676046e-06, + "loss": 0.0518, + "step": 81650 + }, + { + "epoch": 3.81, + "learning_rate": 7.3622497766212594e-06, + "loss": 0.0568, + "step": 81655 + }, + { + "epoch": 3.81, + "learning_rate": 7.361465991566473e-06, + "loss": 0.0474, + "step": 81660 + }, + { + "epoch": 3.81, + "learning_rate": 7.360682206511686e-06, + "loss": 0.0214, + "step": 81665 + }, + { + "epoch": 3.81, + "learning_rate": 7.3598984214569e-06, + "loss": 0.0658, + "step": 81670 + }, + { + "epoch": 3.81, + "learning_rate": 7.359114636402113e-06, + "loss": 0.0758, + "step": 81675 + }, + { + "epoch": 3.81, + "learning_rate": 7.358330851347327e-06, + "loss": 0.0836, + "step": 81680 + }, + { + "epoch": 3.81, + "learning_rate": 7.357547066292541e-06, + "loss": 0.0765, + "step": 81685 + }, + { + "epoch": 3.81, + "learning_rate": 7.356763281237754e-06, + "loss": 0.1627, + "step": 81690 + }, + { + "epoch": 3.81, + "learning_rate": 7.355979496182968e-06, + "loss": 0.3071, + "step": 81695 + }, + { + "epoch": 3.81, + "learning_rate": 7.355195711128181e-06, + "loss": 0.1123, + "step": 81700 + }, + { + "epoch": 3.81, + "learning_rate": 7.354411926073393e-06, + "loss": 0.0162, + "step": 81705 + }, + { + "epoch": 3.81, + "learning_rate": 7.353628141018607e-06, + "loss": 0.0456, + "step": 81710 + }, + { + "epoch": 3.81, + "learning_rate": 7.35284435596382e-06, + "loss": 0.052, + "step": 81715 + }, + { + "epoch": 3.81, + "learning_rate": 7.352060570909034e-06, + "loss": 0.0999, + "step": 81720 + }, + { + "epoch": 3.81, + "learning_rate": 7.351276785854248e-06, + "loss": 0.1386, + "step": 81725 + }, + { + "epoch": 3.81, + "learning_rate": 7.350493000799461e-06, + "loss": 0.0898, + "step": 81730 + }, + { + "epoch": 3.81, + "learning_rate": 7.349709215744675e-06, + "loss": 0.1664, + "step": 81735 + }, + { + "epoch": 3.81, + "learning_rate": 7.348925430689888e-06, + "loss": 0.2276, + "step": 81740 + }, + { + "epoch": 3.81, + "learning_rate": 7.348141645635102e-06, + "loss": 0.2442, + "step": 81745 + }, + { + "epoch": 3.81, + "learning_rate": 7.347357860580315e-06, + "loss": 0.0724, + "step": 81750 + }, + { + "epoch": 3.81, + "learning_rate": 7.346574075525529e-06, + "loss": 0.0132, + "step": 81755 + }, + { + "epoch": 3.82, + "learning_rate": 7.345790290470742e-06, + "loss": 0.0106, + "step": 81760 + }, + { + "epoch": 3.82, + "learning_rate": 7.345006505415956e-06, + "loss": 0.0188, + "step": 81765 + }, + { + "epoch": 3.82, + "learning_rate": 7.344222720361168e-06, + "loss": 0.0648, + "step": 81770 + }, + { + "epoch": 3.82, + "learning_rate": 7.343438935306382e-06, + "loss": 0.1224, + "step": 81775 + }, + { + "epoch": 3.82, + "learning_rate": 7.342655150251595e-06, + "loss": 0.0454, + "step": 81780 + }, + { + "epoch": 3.82, + "learning_rate": 7.341871365196809e-06, + "loss": 0.1007, + "step": 81785 + }, + { + "epoch": 3.82, + "learning_rate": 7.341087580142022e-06, + "loss": 0.227, + "step": 81790 + }, + { + "epoch": 3.82, + "learning_rate": 7.340303795087236e-06, + "loss": 0.206, + "step": 81795 + }, + { + "epoch": 3.82, + "learning_rate": 7.339520010032449e-06, + "loss": 0.0636, + "step": 81800 + }, + { + "epoch": 3.82, + "learning_rate": 7.338736224977663e-06, + "loss": 0.0383, + "step": 81805 + }, + { + "epoch": 3.82, + "learning_rate": 7.337952439922876e-06, + "loss": 0.0408, + "step": 81810 + }, + { + "epoch": 3.82, + "learning_rate": 7.33716865486809e-06, + "loss": 0.0514, + "step": 81815 + }, + { + "epoch": 3.82, + "learning_rate": 7.336384869813303e-06, + "loss": 0.0681, + "step": 81820 + }, + { + "epoch": 3.82, + "learning_rate": 7.335601084758517e-06, + "loss": 0.0524, + "step": 81825 + }, + { + "epoch": 3.82, + "learning_rate": 7.33481729970373e-06, + "loss": 0.0948, + "step": 81830 + }, + { + "epoch": 3.82, + "learning_rate": 7.334033514648943e-06, + "loss": 0.1006, + "step": 81835 + }, + { + "epoch": 3.82, + "learning_rate": 7.333249729594156e-06, + "loss": 0.1285, + "step": 81840 + }, + { + "epoch": 3.82, + "learning_rate": 7.33246594453937e-06, + "loss": 0.2243, + "step": 81845 + }, + { + "epoch": 3.82, + "learning_rate": 7.331682159484583e-06, + "loss": 0.0631, + "step": 81850 + }, + { + "epoch": 3.82, + "learning_rate": 7.330898374429797e-06, + "loss": 0.0126, + "step": 81855 + }, + { + "epoch": 3.82, + "learning_rate": 7.33011458937501e-06, + "loss": 0.033, + "step": 81860 + }, + { + "epoch": 3.82, + "learning_rate": 7.329330804320224e-06, + "loss": 0.0864, + "step": 81865 + }, + { + "epoch": 3.82, + "learning_rate": 7.328547019265437e-06, + "loss": 0.0507, + "step": 81870 + }, + { + "epoch": 3.82, + "learning_rate": 7.327763234210651e-06, + "loss": 0.0965, + "step": 81875 + }, + { + "epoch": 3.82, + "learning_rate": 7.326979449155864e-06, + "loss": 0.1274, + "step": 81880 + }, + { + "epoch": 3.82, + "learning_rate": 7.326195664101078e-06, + "loss": 0.1476, + "step": 81885 + }, + { + "epoch": 3.82, + "learning_rate": 7.325411879046291e-06, + "loss": 0.239, + "step": 81890 + }, + { + "epoch": 3.82, + "learning_rate": 7.324628093991505e-06, + "loss": 0.2824, + "step": 81895 + }, + { + "epoch": 3.82, + "learning_rate": 7.323844308936717e-06, + "loss": 0.046, + "step": 81900 + }, + { + "epoch": 3.82, + "learning_rate": 7.323060523881931e-06, + "loss": 0.0408, + "step": 81905 + }, + { + "epoch": 3.82, + "learning_rate": 7.322276738827144e-06, + "loss": 0.0289, + "step": 81910 + }, + { + "epoch": 3.82, + "learning_rate": 7.321492953772358e-06, + "loss": 0.0431, + "step": 81915 + }, + { + "epoch": 3.82, + "learning_rate": 7.320709168717571e-06, + "loss": 0.025, + "step": 81920 + }, + { + "epoch": 3.82, + "learning_rate": 7.319925383662785e-06, + "loss": 0.0459, + "step": 81925 + }, + { + "epoch": 3.82, + "learning_rate": 7.319141598607998e-06, + "loss": 0.1007, + "step": 81930 + }, + { + "epoch": 3.82, + "learning_rate": 7.318357813553212e-06, + "loss": 0.1685, + "step": 81935 + }, + { + "epoch": 3.82, + "learning_rate": 7.317574028498425e-06, + "loss": 0.2215, + "step": 81940 + }, + { + "epoch": 3.82, + "learning_rate": 7.316790243443639e-06, + "loss": 0.2574, + "step": 81945 + }, + { + "epoch": 3.82, + "learning_rate": 7.316006458388853e-06, + "loss": 0.0614, + "step": 81950 + }, + { + "epoch": 3.82, + "learning_rate": 7.315222673334066e-06, + "loss": 0.0751, + "step": 81955 + }, + { + "epoch": 3.82, + "learning_rate": 7.31443888827928e-06, + "loss": 0.0696, + "step": 81960 + }, + { + "epoch": 3.82, + "learning_rate": 7.313655103224492e-06, + "loss": 0.0804, + "step": 81965 + }, + { + "epoch": 3.82, + "learning_rate": 7.312871318169705e-06, + "loss": 0.0579, + "step": 81970 + }, + { + "epoch": 3.83, + "learning_rate": 7.312087533114919e-06, + "loss": 0.064, + "step": 81975 + }, + { + "epoch": 3.83, + "learning_rate": 7.311303748060132e-06, + "loss": 0.0764, + "step": 81980 + }, + { + "epoch": 3.83, + "learning_rate": 7.310519963005346e-06, + "loss": 0.1372, + "step": 81985 + }, + { + "epoch": 3.83, + "learning_rate": 7.309736177950559e-06, + "loss": 0.2091, + "step": 81990 + }, + { + "epoch": 3.83, + "learning_rate": 7.308952392895773e-06, + "loss": 0.2188, + "step": 81995 + }, + { + "epoch": 3.83, + "learning_rate": 7.308168607840987e-06, + "loss": 0.0708, + "step": 82000 + }, + { + "epoch": 3.83, + "learning_rate": 7.3073848227862e-06, + "loss": 0.0232, + "step": 82005 + }, + { + "epoch": 3.83, + "learning_rate": 7.306601037731414e-06, + "loss": 0.0316, + "step": 82010 + }, + { + "epoch": 3.83, + "learning_rate": 7.305817252676627e-06, + "loss": 0.0794, + "step": 82015 + }, + { + "epoch": 3.83, + "learning_rate": 7.305033467621841e-06, + "loss": 0.0471, + "step": 82020 + }, + { + "epoch": 3.83, + "learning_rate": 7.304249682567054e-06, + "loss": 0.1317, + "step": 82025 + }, + { + "epoch": 3.83, + "learning_rate": 7.303465897512266e-06, + "loss": 0.1332, + "step": 82030 + }, + { + "epoch": 3.83, + "learning_rate": 7.30268211245748e-06, + "loss": 0.0663, + "step": 82035 + }, + { + "epoch": 3.83, + "learning_rate": 7.301898327402694e-06, + "loss": 0.1334, + "step": 82040 + }, + { + "epoch": 3.83, + "learning_rate": 7.301114542347907e-06, + "loss": 0.2608, + "step": 82045 + }, + { + "epoch": 3.83, + "learning_rate": 7.300330757293121e-06, + "loss": 0.1023, + "step": 82050 + }, + { + "epoch": 3.83, + "learning_rate": 7.299546972238334e-06, + "loss": 0.042, + "step": 82055 + }, + { + "epoch": 3.83, + "learning_rate": 7.298763187183548e-06, + "loss": 0.0138, + "step": 82060 + }, + { + "epoch": 3.83, + "learning_rate": 7.297979402128761e-06, + "loss": 0.0702, + "step": 82065 + }, + { + "epoch": 3.83, + "learning_rate": 7.297195617073975e-06, + "loss": 0.1097, + "step": 82070 + }, + { + "epoch": 3.83, + "learning_rate": 7.296411832019188e-06, + "loss": 0.0231, + "step": 82075 + }, + { + "epoch": 3.83, + "learning_rate": 7.295628046964402e-06, + "loss": 0.0528, + "step": 82080 + }, + { + "epoch": 3.83, + "learning_rate": 7.294844261909615e-06, + "loss": 0.0923, + "step": 82085 + }, + { + "epoch": 3.83, + "learning_rate": 7.2940604768548286e-06, + "loss": 0.153, + "step": 82090 + }, + { + "epoch": 3.83, + "learning_rate": 7.293276691800041e-06, + "loss": 0.2262, + "step": 82095 + }, + { + "epoch": 3.83, + "learning_rate": 7.292492906745255e-06, + "loss": 0.0514, + "step": 82100 + }, + { + "epoch": 3.83, + "learning_rate": 7.291709121690468e-06, + "loss": 0.0909, + "step": 82105 + }, + { + "epoch": 3.83, + "learning_rate": 7.290925336635682e-06, + "loss": 0.0377, + "step": 82110 + }, + { + "epoch": 3.83, + "learning_rate": 7.290141551580895e-06, + "loss": 0.0834, + "step": 82115 + }, + { + "epoch": 3.83, + "learning_rate": 7.289357766526109e-06, + "loss": 0.0565, + "step": 82120 + }, + { + "epoch": 3.83, + "learning_rate": 7.288573981471322e-06, + "loss": 0.1259, + "step": 82125 + }, + { + "epoch": 3.83, + "learning_rate": 7.287790196416536e-06, + "loss": 0.1021, + "step": 82130 + }, + { + "epoch": 3.83, + "learning_rate": 7.287006411361749e-06, + "loss": 0.1543, + "step": 82135 + }, + { + "epoch": 3.83, + "learning_rate": 7.2862226263069625e-06, + "loss": 0.1117, + "step": 82140 + }, + { + "epoch": 3.83, + "learning_rate": 7.285438841252176e-06, + "loss": 0.2498, + "step": 82145 + }, + { + "epoch": 3.83, + "learning_rate": 7.2846550561973895e-06, + "loss": 0.074, + "step": 82150 + }, + { + "epoch": 3.83, + "learning_rate": 7.2838712711426026e-06, + "loss": 0.014, + "step": 82155 + }, + { + "epoch": 3.83, + "learning_rate": 7.283087486087816e-06, + "loss": 0.0593, + "step": 82160 + }, + { + "epoch": 3.83, + "learning_rate": 7.282303701033029e-06, + "loss": 0.0556, + "step": 82165 + }, + { + "epoch": 3.83, + "learning_rate": 7.281519915978243e-06, + "loss": 0.1292, + "step": 82170 + }, + { + "epoch": 3.83, + "learning_rate": 7.280736130923456e-06, + "loss": 0.1866, + "step": 82175 + }, + { + "epoch": 3.83, + "learning_rate": 7.2799523458686696e-06, + "loss": 0.1015, + "step": 82180 + }, + { + "epoch": 3.83, + "learning_rate": 7.279168560813883e-06, + "loss": 0.1122, + "step": 82185 + }, + { + "epoch": 3.84, + "learning_rate": 7.2783847757590965e-06, + "loss": 0.2874, + "step": 82190 + }, + { + "epoch": 3.84, + "learning_rate": 7.27760099070431e-06, + "loss": 0.2783, + "step": 82195 + }, + { + "epoch": 3.84, + "learning_rate": 7.2768172056495235e-06, + "loss": 0.0762, + "step": 82200 + }, + { + "epoch": 3.84, + "learning_rate": 7.2760334205947365e-06, + "loss": 0.0206, + "step": 82205 + }, + { + "epoch": 3.84, + "learning_rate": 7.2752496355399504e-06, + "loss": 0.0353, + "step": 82210 + }, + { + "epoch": 3.84, + "learning_rate": 7.274465850485164e-06, + "loss": 0.0914, + "step": 82215 + }, + { + "epoch": 3.84, + "learning_rate": 7.273682065430377e-06, + "loss": 0.0593, + "step": 82220 + }, + { + "epoch": 3.84, + "learning_rate": 7.27289828037559e-06, + "loss": 0.0548, + "step": 82225 + }, + { + "epoch": 3.84, + "learning_rate": 7.2721144953208035e-06, + "loss": 0.1621, + "step": 82230 + }, + { + "epoch": 3.84, + "learning_rate": 7.271330710266017e-06, + "loss": 0.1095, + "step": 82235 + }, + { + "epoch": 3.84, + "learning_rate": 7.2705469252112305e-06, + "loss": 0.1579, + "step": 82240 + }, + { + "epoch": 3.84, + "learning_rate": 7.2697631401564436e-06, + "loss": 0.3142, + "step": 82245 + }, + { + "epoch": 3.84, + "learning_rate": 7.2689793551016575e-06, + "loss": 0.0763, + "step": 82250 + }, + { + "epoch": 3.84, + "learning_rate": 7.2681955700468705e-06, + "loss": 0.0297, + "step": 82255 + }, + { + "epoch": 3.84, + "learning_rate": 7.2674117849920844e-06, + "loss": 0.051, + "step": 82260 + }, + { + "epoch": 3.84, + "learning_rate": 7.266627999937298e-06, + "loss": 0.0488, + "step": 82265 + }, + { + "epoch": 3.84, + "learning_rate": 7.265844214882511e-06, + "loss": 0.069, + "step": 82270 + }, + { + "epoch": 3.84, + "learning_rate": 7.265060429827725e-06, + "loss": 0.0763, + "step": 82275 + }, + { + "epoch": 3.84, + "learning_rate": 7.264276644772938e-06, + "loss": 0.0423, + "step": 82280 + }, + { + "epoch": 3.84, + "learning_rate": 7.263492859718152e-06, + "loss": 0.0885, + "step": 82285 + }, + { + "epoch": 3.84, + "learning_rate": 7.2627090746633645e-06, + "loss": 0.1617, + "step": 82290 + }, + { + "epoch": 3.84, + "learning_rate": 7.2619252896085775e-06, + "loss": 0.3108, + "step": 82295 + }, + { + "epoch": 3.84, + "learning_rate": 7.2611415045537914e-06, + "loss": 0.0586, + "step": 82300 + }, + { + "epoch": 3.84, + "learning_rate": 7.260357719499005e-06, + "loss": 0.0536, + "step": 82305 + }, + { + "epoch": 3.84, + "learning_rate": 7.259573934444218e-06, + "loss": 0.0501, + "step": 82310 + }, + { + "epoch": 3.84, + "learning_rate": 7.258790149389432e-06, + "loss": 0.0268, + "step": 82315 + }, + { + "epoch": 3.84, + "learning_rate": 7.258006364334645e-06, + "loss": 0.1857, + "step": 82320 + }, + { + "epoch": 3.84, + "learning_rate": 7.257222579279859e-06, + "loss": 0.0637, + "step": 82325 + }, + { + "epoch": 3.84, + "learning_rate": 7.256438794225072e-06, + "loss": 0.1362, + "step": 82330 + }, + { + "epoch": 3.84, + "learning_rate": 7.255655009170286e-06, + "loss": 0.1791, + "step": 82335 + }, + { + "epoch": 3.84, + "learning_rate": 7.254871224115499e-06, + "loss": 0.1739, + "step": 82340 + }, + { + "epoch": 3.84, + "learning_rate": 7.254087439060713e-06, + "loss": 0.222, + "step": 82345 + }, + { + "epoch": 3.84, + "learning_rate": 7.253303654005926e-06, + "loss": 0.0556, + "step": 82350 + }, + { + "epoch": 3.84, + "learning_rate": 7.252519868951139e-06, + "loss": 0.0175, + "step": 82355 + }, + { + "epoch": 3.84, + "learning_rate": 7.251736083896352e-06, + "loss": 0.0208, + "step": 82360 + }, + { + "epoch": 3.84, + "learning_rate": 7.250952298841566e-06, + "loss": 0.0106, + "step": 82365 + }, + { + "epoch": 3.84, + "learning_rate": 7.250168513786779e-06, + "loss": 0.0689, + "step": 82370 + }, + { + "epoch": 3.84, + "learning_rate": 7.249384728731993e-06, + "loss": 0.0633, + "step": 82375 + }, + { + "epoch": 3.84, + "learning_rate": 7.248600943677206e-06, + "loss": 0.0498, + "step": 82380 + }, + { + "epoch": 3.84, + "learning_rate": 7.24781715862242e-06, + "loss": 0.1595, + "step": 82385 + }, + { + "epoch": 3.84, + "learning_rate": 7.247033373567633e-06, + "loss": 0.1389, + "step": 82390 + }, + { + "epoch": 3.84, + "learning_rate": 7.246249588512847e-06, + "loss": 0.2956, + "step": 82395 + }, + { + "epoch": 3.84, + "learning_rate": 7.24546580345806e-06, + "loss": 0.0601, + "step": 82400 + }, + { + "epoch": 3.85, + "learning_rate": 7.244682018403274e-06, + "loss": 0.0084, + "step": 82405 + }, + { + "epoch": 3.85, + "learning_rate": 7.243898233348487e-06, + "loss": 0.0508, + "step": 82410 + }, + { + "epoch": 3.85, + "learning_rate": 7.243114448293701e-06, + "loss": 0.0388, + "step": 82415 + }, + { + "epoch": 3.85, + "learning_rate": 7.242330663238913e-06, + "loss": 0.0569, + "step": 82420 + }, + { + "epoch": 3.85, + "learning_rate": 7.241546878184127e-06, + "loss": 0.0943, + "step": 82425 + }, + { + "epoch": 3.85, + "learning_rate": 7.24076309312934e-06, + "loss": 0.0948, + "step": 82430 + }, + { + "epoch": 3.85, + "learning_rate": 7.239979308074554e-06, + "loss": 0.1843, + "step": 82435 + }, + { + "epoch": 3.85, + "learning_rate": 7.239195523019767e-06, + "loss": 0.2833, + "step": 82440 + }, + { + "epoch": 3.85, + "learning_rate": 7.238411737964981e-06, + "loss": 0.1999, + "step": 82445 + }, + { + "epoch": 3.85, + "learning_rate": 7.237627952910194e-06, + "loss": 0.056, + "step": 82450 + }, + { + "epoch": 3.85, + "learning_rate": 7.236844167855408e-06, + "loss": 0.009, + "step": 82455 + }, + { + "epoch": 3.85, + "learning_rate": 7.236060382800621e-06, + "loss": 0.0356, + "step": 82460 + }, + { + "epoch": 3.85, + "learning_rate": 7.235276597745835e-06, + "loss": 0.0509, + "step": 82465 + }, + { + "epoch": 3.85, + "learning_rate": 7.234492812691048e-06, + "loss": 0.0332, + "step": 82470 + }, + { + "epoch": 3.85, + "learning_rate": 7.233709027636262e-06, + "loss": 0.0879, + "step": 82475 + }, + { + "epoch": 3.85, + "learning_rate": 7.232925242581476e-06, + "loss": 0.1112, + "step": 82480 + }, + { + "epoch": 3.85, + "learning_rate": 7.232141457526688e-06, + "loss": 0.1709, + "step": 82485 + }, + { + "epoch": 3.85, + "learning_rate": 7.231357672471901e-06, + "loss": 0.1559, + "step": 82490 + }, + { + "epoch": 3.85, + "learning_rate": 7.230573887417115e-06, + "loss": 0.2309, + "step": 82495 + }, + { + "epoch": 3.85, + "learning_rate": 7.229790102362328e-06, + "loss": 0.0397, + "step": 82500 + }, + { + "epoch": 3.85, + "learning_rate": 7.229006317307542e-06, + "loss": 0.0336, + "step": 82505 + }, + { + "epoch": 3.85, + "learning_rate": 7.228222532252755e-06, + "loss": 0.0623, + "step": 82510 + }, + { + "epoch": 3.85, + "learning_rate": 7.227438747197969e-06, + "loss": 0.0456, + "step": 82515 + }, + { + "epoch": 3.85, + "learning_rate": 7.226654962143182e-06, + "loss": 0.0266, + "step": 82520 + }, + { + "epoch": 3.85, + "learning_rate": 7.225871177088396e-06, + "loss": 0.067, + "step": 82525 + }, + { + "epoch": 3.85, + "learning_rate": 7.22508739203361e-06, + "loss": 0.1142, + "step": 82530 + }, + { + "epoch": 3.85, + "learning_rate": 7.224303606978823e-06, + "loss": 0.1408, + "step": 82535 + }, + { + "epoch": 3.85, + "learning_rate": 7.223519821924037e-06, + "loss": 0.1929, + "step": 82540 + }, + { + "epoch": 3.85, + "learning_rate": 7.22273603686925e-06, + "loss": 0.2066, + "step": 82545 + }, + { + "epoch": 3.85, + "learning_rate": 7.221952251814462e-06, + "loss": 0.0644, + "step": 82550 + }, + { + "epoch": 3.85, + "learning_rate": 7.221168466759676e-06, + "loss": 0.0071, + "step": 82555 + }, + { + "epoch": 3.85, + "learning_rate": 7.220384681704889e-06, + "loss": 0.0073, + "step": 82560 + }, + { + "epoch": 3.85, + "learning_rate": 7.219600896650103e-06, + "loss": 0.0488, + "step": 82565 + }, + { + "epoch": 3.85, + "learning_rate": 7.218817111595316e-06, + "loss": 0.03, + "step": 82570 + }, + { + "epoch": 3.85, + "learning_rate": 7.21803332654053e-06, + "loss": 0.0536, + "step": 82575 + }, + { + "epoch": 3.85, + "learning_rate": 7.217249541485744e-06, + "loss": 0.0703, + "step": 82580 + }, + { + "epoch": 3.85, + "learning_rate": 7.216465756430957e-06, + "loss": 0.1186, + "step": 82585 + }, + { + "epoch": 3.85, + "learning_rate": 7.215681971376171e-06, + "loss": 0.1826, + "step": 82590 + }, + { + "epoch": 3.85, + "learning_rate": 7.214898186321384e-06, + "loss": 0.2697, + "step": 82595 + }, + { + "epoch": 3.85, + "learning_rate": 7.214114401266598e-06, + "loss": 0.095, + "step": 82600 + }, + { + "epoch": 3.85, + "learning_rate": 7.213330616211811e-06, + "loss": 0.0544, + "step": 82605 + }, + { + "epoch": 3.85, + "learning_rate": 7.212546831157025e-06, + "loss": 0.0774, + "step": 82610 + }, + { + "epoch": 3.85, + "learning_rate": 7.211763046102237e-06, + "loss": 0.0261, + "step": 82615 + }, + { + "epoch": 3.86, + "learning_rate": 7.210979261047451e-06, + "loss": 0.0929, + "step": 82620 + }, + { + "epoch": 3.86, + "learning_rate": 7.210195475992664e-06, + "loss": 0.0937, + "step": 82625 + }, + { + "epoch": 3.86, + "learning_rate": 7.209411690937878e-06, + "loss": 0.1644, + "step": 82630 + }, + { + "epoch": 3.86, + "learning_rate": 7.208627905883091e-06, + "loss": 0.2539, + "step": 82635 + }, + { + "epoch": 3.86, + "learning_rate": 7.207844120828305e-06, + "loss": 0.2675, + "step": 82640 + }, + { + "epoch": 3.86, + "learning_rate": 7.207060335773518e-06, + "loss": 0.4391, + "step": 82645 + }, + { + "epoch": 3.86, + "learning_rate": 7.206276550718732e-06, + "loss": 0.1219, + "step": 82650 + }, + { + "epoch": 3.86, + "learning_rate": 7.205492765663945e-06, + "loss": 0.034, + "step": 82655 + }, + { + "epoch": 3.86, + "learning_rate": 7.204708980609159e-06, + "loss": 0.0426, + "step": 82660 + }, + { + "epoch": 3.86, + "learning_rate": 7.203925195554372e-06, + "loss": 0.0084, + "step": 82665 + }, + { + "epoch": 3.86, + "learning_rate": 7.203141410499586e-06, + "loss": 0.0198, + "step": 82670 + }, + { + "epoch": 3.86, + "learning_rate": 7.202357625444799e-06, + "loss": 0.1924, + "step": 82675 + }, + { + "epoch": 3.86, + "learning_rate": 7.201573840390012e-06, + "loss": 0.1191, + "step": 82680 + }, + { + "epoch": 3.86, + "learning_rate": 7.200790055335225e-06, + "loss": 0.1241, + "step": 82685 + }, + { + "epoch": 3.86, + "learning_rate": 7.200006270280439e-06, + "loss": 0.1177, + "step": 82690 + }, + { + "epoch": 3.86, + "learning_rate": 7.199222485225652e-06, + "loss": 0.1445, + "step": 82695 + }, + { + "epoch": 3.86, + "learning_rate": 7.198438700170866e-06, + "loss": 0.1037, + "step": 82700 + }, + { + "epoch": 3.86, + "learning_rate": 7.197654915116079e-06, + "loss": 0.0354, + "step": 82705 + }, + { + "epoch": 3.86, + "learning_rate": 7.196871130061293e-06, + "loss": 0.0342, + "step": 82710 + }, + { + "epoch": 3.86, + "learning_rate": 7.196087345006506e-06, + "loss": 0.0374, + "step": 82715 + }, + { + "epoch": 3.86, + "learning_rate": 7.19530355995172e-06, + "loss": 0.0835, + "step": 82720 + }, + { + "epoch": 3.86, + "learning_rate": 7.194519774896933e-06, + "loss": 0.065, + "step": 82725 + }, + { + "epoch": 3.86, + "learning_rate": 7.193735989842147e-06, + "loss": 0.1511, + "step": 82730 + }, + { + "epoch": 3.86, + "learning_rate": 7.19295220478736e-06, + "loss": 0.0832, + "step": 82735 + }, + { + "epoch": 3.86, + "learning_rate": 7.192168419732574e-06, + "loss": 0.3202, + "step": 82740 + }, + { + "epoch": 3.86, + "learning_rate": 7.191384634677786e-06, + "loss": 0.1847, + "step": 82745 + }, + { + "epoch": 3.86, + "learning_rate": 7.190600849623e-06, + "loss": 0.0422, + "step": 82750 + }, + { + "epoch": 3.86, + "learning_rate": 7.189817064568213e-06, + "loss": 0.0422, + "step": 82755 + }, + { + "epoch": 3.86, + "learning_rate": 7.189033279513427e-06, + "loss": 0.0557, + "step": 82760 + }, + { + "epoch": 3.86, + "learning_rate": 7.18824949445864e-06, + "loss": 0.0645, + "step": 82765 + }, + { + "epoch": 3.86, + "learning_rate": 7.187465709403854e-06, + "loss": 0.0985, + "step": 82770 + }, + { + "epoch": 3.86, + "learning_rate": 7.186681924349067e-06, + "loss": 0.1161, + "step": 82775 + }, + { + "epoch": 3.86, + "learning_rate": 7.185898139294281e-06, + "loss": 0.0945, + "step": 82780 + }, + { + "epoch": 3.86, + "learning_rate": 7.185114354239494e-06, + "loss": 0.1213, + "step": 82785 + }, + { + "epoch": 3.86, + "learning_rate": 7.184330569184708e-06, + "loss": 0.1853, + "step": 82790 + }, + { + "epoch": 3.86, + "learning_rate": 7.1835467841299215e-06, + "loss": 0.1965, + "step": 82795 + }, + { + "epoch": 3.86, + "learning_rate": 7.182762999075135e-06, + "loss": 0.0679, + "step": 82800 + }, + { + "epoch": 3.86, + "learning_rate": 7.1819792140203485e-06, + "loss": 0.0218, + "step": 82805 + }, + { + "epoch": 3.86, + "learning_rate": 7.181195428965561e-06, + "loss": 0.0262, + "step": 82810 + }, + { + "epoch": 3.86, + "learning_rate": 7.180411643910774e-06, + "loss": 0.0487, + "step": 82815 + }, + { + "epoch": 3.86, + "learning_rate": 7.179627858855988e-06, + "loss": 0.0454, + "step": 82820 + }, + { + "epoch": 3.86, + "learning_rate": 7.178844073801201e-06, + "loss": 0.064, + "step": 82825 + }, + { + "epoch": 3.86, + "learning_rate": 7.178060288746415e-06, + "loss": 0.0518, + "step": 82830 + }, + { + "epoch": 3.87, + "learning_rate": 7.177276503691628e-06, + "loss": 0.0888, + "step": 82835 + }, + { + "epoch": 3.87, + "learning_rate": 7.176492718636842e-06, + "loss": 0.2183, + "step": 82840 + }, + { + "epoch": 3.87, + "learning_rate": 7.1757089335820555e-06, + "loss": 0.3927, + "step": 82845 + }, + { + "epoch": 3.87, + "learning_rate": 7.1749251485272686e-06, + "loss": 0.0595, + "step": 82850 + }, + { + "epoch": 3.87, + "learning_rate": 7.1741413634724825e-06, + "loss": 0.0189, + "step": 82855 + }, + { + "epoch": 3.87, + "learning_rate": 7.1733575784176955e-06, + "loss": 0.0272, + "step": 82860 + }, + { + "epoch": 3.87, + "learning_rate": 7.1725737933629094e-06, + "loss": 0.0464, + "step": 82865 + }, + { + "epoch": 3.87, + "learning_rate": 7.1717900083081225e-06, + "loss": 0.0883, + "step": 82870 + }, + { + "epoch": 3.87, + "learning_rate": 7.171006223253335e-06, + "loss": 0.0915, + "step": 82875 + }, + { + "epoch": 3.87, + "learning_rate": 7.170222438198549e-06, + "loss": 0.0331, + "step": 82880 + }, + { + "epoch": 3.87, + "learning_rate": 7.1694386531437625e-06, + "loss": 0.111, + "step": 82885 + }, + { + "epoch": 3.87, + "learning_rate": 7.1686548680889756e-06, + "loss": 0.1855, + "step": 82890 + }, + { + "epoch": 3.87, + "learning_rate": 7.1678710830341895e-06, + "loss": 0.1827, + "step": 82895 + }, + { + "epoch": 3.87, + "learning_rate": 7.1670872979794025e-06, + "loss": 0.0682, + "step": 82900 + }, + { + "epoch": 3.87, + "learning_rate": 7.1663035129246164e-06, + "loss": 0.009, + "step": 82905 + }, + { + "epoch": 3.87, + "learning_rate": 7.1655197278698295e-06, + "loss": 0.0417, + "step": 82910 + }, + { + "epoch": 3.87, + "learning_rate": 7.164735942815043e-06, + "loss": 0.129, + "step": 82915 + }, + { + "epoch": 3.87, + "learning_rate": 7.1639521577602565e-06, + "loss": 0.0611, + "step": 82920 + }, + { + "epoch": 3.87, + "learning_rate": 7.16316837270547e-06, + "loss": 0.0629, + "step": 82925 + }, + { + "epoch": 3.87, + "learning_rate": 7.1623845876506834e-06, + "loss": 0.1248, + "step": 82930 + }, + { + "epoch": 3.87, + "learning_rate": 7.161600802595897e-06, + "loss": 0.1549, + "step": 82935 + }, + { + "epoch": 3.87, + "learning_rate": 7.1608170175411095e-06, + "loss": 0.2218, + "step": 82940 + }, + { + "epoch": 3.87, + "learning_rate": 7.1600332324863234e-06, + "loss": 0.433, + "step": 82945 + }, + { + "epoch": 3.87, + "learning_rate": 7.1592494474315365e-06, + "loss": 0.0611, + "step": 82950 + }, + { + "epoch": 3.87, + "learning_rate": 7.15846566237675e-06, + "loss": 0.0288, + "step": 82955 + }, + { + "epoch": 3.87, + "learning_rate": 7.1576818773219635e-06, + "loss": 0.048, + "step": 82960 + }, + { + "epoch": 3.87, + "learning_rate": 7.156898092267177e-06, + "loss": 0.0867, + "step": 82965 + }, + { + "epoch": 3.87, + "learning_rate": 7.1561143072123904e-06, + "loss": 0.0412, + "step": 82970 + }, + { + "epoch": 3.87, + "learning_rate": 7.155330522157604e-06, + "loss": 0.0969, + "step": 82975 + }, + { + "epoch": 3.87, + "learning_rate": 7.154546737102817e-06, + "loss": 0.1484, + "step": 82980 + }, + { + "epoch": 3.87, + "learning_rate": 7.153762952048031e-06, + "loss": 0.1506, + "step": 82985 + }, + { + "epoch": 3.87, + "learning_rate": 7.152979166993244e-06, + "loss": 0.2028, + "step": 82990 + }, + { + "epoch": 3.87, + "learning_rate": 7.152195381938458e-06, + "loss": 0.204, + "step": 82995 + }, + { + "epoch": 3.87, + "learning_rate": 7.151411596883671e-06, + "loss": 0.0698, + "step": 83000 + }, + { + "epoch": 3.87, + "learning_rate": 7.150627811828884e-06, + "loss": 0.0254, + "step": 83005 + }, + { + "epoch": 3.87, + "learning_rate": 7.1498440267740974e-06, + "loss": 0.0321, + "step": 83010 + }, + { + "epoch": 3.87, + "learning_rate": 7.149060241719311e-06, + "loss": 0.0639, + "step": 83015 + }, + { + "epoch": 3.87, + "learning_rate": 7.148276456664524e-06, + "loss": 0.0328, + "step": 83020 + }, + { + "epoch": 3.87, + "learning_rate": 7.147492671609738e-06, + "loss": 0.0518, + "step": 83025 + }, + { + "epoch": 3.87, + "learning_rate": 7.146708886554951e-06, + "loss": 0.108, + "step": 83030 + }, + { + "epoch": 3.87, + "learning_rate": 7.145925101500165e-06, + "loss": 0.1571, + "step": 83035 + }, + { + "epoch": 3.87, + "learning_rate": 7.145141316445378e-06, + "loss": 0.1334, + "step": 83040 + }, + { + "epoch": 3.87, + "learning_rate": 7.144357531390592e-06, + "loss": 0.3672, + "step": 83045 + }, + { + "epoch": 3.88, + "learning_rate": 7.143573746335805e-06, + "loss": 0.0319, + "step": 83050 + }, + { + "epoch": 3.88, + "learning_rate": 7.142789961281019e-06, + "loss": 0.0214, + "step": 83055 + }, + { + "epoch": 3.88, + "learning_rate": 7.142006176226233e-06, + "loss": 0.0112, + "step": 83060 + }, + { + "epoch": 3.88, + "learning_rate": 7.141222391171446e-06, + "loss": 0.0584, + "step": 83065 + }, + { + "epoch": 3.88, + "learning_rate": 7.140438606116658e-06, + "loss": 0.0678, + "step": 83070 + }, + { + "epoch": 3.88, + "learning_rate": 7.139654821061872e-06, + "loss": 0.0804, + "step": 83075 + }, + { + "epoch": 3.88, + "learning_rate": 7.138871036007085e-06, + "loss": 0.0552, + "step": 83080 + }, + { + "epoch": 3.88, + "learning_rate": 7.138087250952299e-06, + "loss": 0.0976, + "step": 83085 + }, + { + "epoch": 3.88, + "learning_rate": 7.137303465897512e-06, + "loss": 0.2082, + "step": 83090 + }, + { + "epoch": 3.88, + "learning_rate": 7.136519680842726e-06, + "loss": 0.2262, + "step": 83095 + }, + { + "epoch": 3.88, + "learning_rate": 7.135735895787939e-06, + "loss": 0.0472, + "step": 83100 + }, + { + "epoch": 3.88, + "learning_rate": 7.134952110733153e-06, + "loss": 0.0212, + "step": 83105 + }, + { + "epoch": 3.88, + "learning_rate": 7.134168325678367e-06, + "loss": 0.0167, + "step": 83110 + }, + { + "epoch": 3.88, + "learning_rate": 7.13338454062358e-06, + "loss": 0.0431, + "step": 83115 + }, + { + "epoch": 3.88, + "learning_rate": 7.132600755568794e-06, + "loss": 0.0403, + "step": 83120 + }, + { + "epoch": 3.88, + "learning_rate": 7.131816970514007e-06, + "loss": 0.0445, + "step": 83125 + }, + { + "epoch": 3.88, + "learning_rate": 7.131033185459221e-06, + "loss": 0.0775, + "step": 83130 + }, + { + "epoch": 3.88, + "learning_rate": 7.130249400404433e-06, + "loss": 0.1693, + "step": 83135 + }, + { + "epoch": 3.88, + "learning_rate": 7.129465615349646e-06, + "loss": 0.1404, + "step": 83140 + }, + { + "epoch": 3.88, + "learning_rate": 7.12868183029486e-06, + "loss": 0.1983, + "step": 83145 + }, + { + "epoch": 3.88, + "learning_rate": 7.127898045240073e-06, + "loss": 0.0576, + "step": 83150 + }, + { + "epoch": 3.88, + "learning_rate": 7.127114260185287e-06, + "loss": 0.0225, + "step": 83155 + }, + { + "epoch": 3.88, + "learning_rate": 7.126330475130501e-06, + "loss": 0.042, + "step": 83160 + }, + { + "epoch": 3.88, + "learning_rate": 7.125546690075714e-06, + "loss": 0.047, + "step": 83165 + }, + { + "epoch": 3.88, + "learning_rate": 7.124762905020928e-06, + "loss": 0.0822, + "step": 83170 + }, + { + "epoch": 3.88, + "learning_rate": 7.123979119966141e-06, + "loss": 0.0317, + "step": 83175 + }, + { + "epoch": 3.88, + "learning_rate": 7.123195334911355e-06, + "loss": 0.178, + "step": 83180 + }, + { + "epoch": 3.88, + "learning_rate": 7.122411549856568e-06, + "loss": 0.0838, + "step": 83185 + }, + { + "epoch": 3.88, + "learning_rate": 7.121627764801782e-06, + "loss": 0.1366, + "step": 83190 + }, + { + "epoch": 3.88, + "learning_rate": 7.120843979746995e-06, + "loss": 0.22, + "step": 83195 + }, + { + "epoch": 3.88, + "learning_rate": 7.120060194692208e-06, + "loss": 0.0103, + "step": 83200 + }, + { + "epoch": 3.88, + "learning_rate": 7.119276409637421e-06, + "loss": 0.0207, + "step": 83205 + }, + { + "epoch": 3.88, + "learning_rate": 7.118492624582635e-06, + "loss": 0.0341, + "step": 83210 + }, + { + "epoch": 3.88, + "learning_rate": 7.117708839527848e-06, + "loss": 0.0363, + "step": 83215 + }, + { + "epoch": 3.88, + "learning_rate": 7.116925054473062e-06, + "loss": 0.0304, + "step": 83220 + }, + { + "epoch": 3.88, + "learning_rate": 7.116141269418275e-06, + "loss": 0.1067, + "step": 83225 + }, + { + "epoch": 3.88, + "learning_rate": 7.115357484363489e-06, + "loss": 0.0815, + "step": 83230 + }, + { + "epoch": 3.88, + "learning_rate": 7.114573699308702e-06, + "loss": 0.1673, + "step": 83235 + }, + { + "epoch": 3.88, + "learning_rate": 7.113789914253916e-06, + "loss": 0.2259, + "step": 83240 + }, + { + "epoch": 3.88, + "learning_rate": 7.113006129199129e-06, + "loss": 0.2584, + "step": 83245 + }, + { + "epoch": 3.88, + "learning_rate": 7.112222344144343e-06, + "loss": 0.1109, + "step": 83250 + }, + { + "epoch": 3.88, + "learning_rate": 7.111438559089556e-06, + "loss": 0.1151, + "step": 83255 + }, + { + "epoch": 3.89, + "learning_rate": 7.11065477403477e-06, + "loss": 0.0398, + "step": 83260 + }, + { + "epoch": 3.89, + "learning_rate": 7.109870988979982e-06, + "loss": 0.0395, + "step": 83265 + }, + { + "epoch": 3.89, + "learning_rate": 7.109087203925196e-06, + "loss": 0.0706, + "step": 83270 + }, + { + "epoch": 3.89, + "learning_rate": 7.108303418870409e-06, + "loss": 0.0666, + "step": 83275 + }, + { + "epoch": 3.89, + "learning_rate": 7.107519633815623e-06, + "loss": 0.0801, + "step": 83280 + }, + { + "epoch": 3.89, + "learning_rate": 7.106735848760836e-06, + "loss": 0.119, + "step": 83285 + }, + { + "epoch": 3.89, + "learning_rate": 7.10595206370605e-06, + "loss": 0.2241, + "step": 83290 + }, + { + "epoch": 3.89, + "learning_rate": 7.105168278651263e-06, + "loss": 0.1682, + "step": 83295 + }, + { + "epoch": 3.89, + "learning_rate": 7.104384493596477e-06, + "loss": 0.0431, + "step": 83300 + }, + { + "epoch": 3.89, + "learning_rate": 7.10360070854169e-06, + "loss": 0.011, + "step": 83305 + }, + { + "epoch": 3.89, + "learning_rate": 7.102816923486904e-06, + "loss": 0.0472, + "step": 83310 + }, + { + "epoch": 3.89, + "learning_rate": 7.102033138432117e-06, + "loss": 0.0674, + "step": 83315 + }, + { + "epoch": 3.89, + "learning_rate": 7.101249353377331e-06, + "loss": 0.0227, + "step": 83320 + }, + { + "epoch": 3.89, + "learning_rate": 7.100465568322545e-06, + "loss": 0.0615, + "step": 83325 + }, + { + "epoch": 3.89, + "learning_rate": 7.099838540278714e-06, + "loss": 0.1183, + "step": 83330 + }, + { + "epoch": 3.89, + "learning_rate": 7.099054755223927e-06, + "loss": 0.0832, + "step": 83335 + }, + { + "epoch": 3.89, + "learning_rate": 7.098270970169141e-06, + "loss": 0.1191, + "step": 83340 + }, + { + "epoch": 3.89, + "learning_rate": 7.097487185114354e-06, + "loss": 0.3515, + "step": 83345 + }, + { + "epoch": 3.89, + "learning_rate": 7.096703400059568e-06, + "loss": 0.106, + "step": 83350 + }, + { + "epoch": 3.89, + "learning_rate": 7.095919615004781e-06, + "loss": 0.0136, + "step": 83355 + }, + { + "epoch": 3.89, + "learning_rate": 7.095135829949995e-06, + "loss": 0.0589, + "step": 83360 + }, + { + "epoch": 3.89, + "learning_rate": 7.094352044895208e-06, + "loss": 0.0601, + "step": 83365 + }, + { + "epoch": 3.89, + "learning_rate": 7.093568259840422e-06, + "loss": 0.0845, + "step": 83370 + }, + { + "epoch": 3.89, + "learning_rate": 7.092784474785635e-06, + "loss": 0.1721, + "step": 83375 + }, + { + "epoch": 3.89, + "learning_rate": 7.092000689730849e-06, + "loss": 0.0775, + "step": 83380 + }, + { + "epoch": 3.89, + "learning_rate": 7.091216904676062e-06, + "loss": 0.1601, + "step": 83385 + }, + { + "epoch": 3.89, + "learning_rate": 7.090433119621276e-06, + "loss": 0.1606, + "step": 83390 + }, + { + "epoch": 3.89, + "learning_rate": 7.089649334566488e-06, + "loss": 0.2723, + "step": 83395 + }, + { + "epoch": 3.89, + "learning_rate": 7.088865549511702e-06, + "loss": 0.0425, + "step": 83400 + }, + { + "epoch": 3.89, + "learning_rate": 7.088081764456915e-06, + "loss": 0.0291, + "step": 83405 + }, + { + "epoch": 3.89, + "learning_rate": 7.087297979402129e-06, + "loss": 0.038, + "step": 83410 + }, + { + "epoch": 3.89, + "learning_rate": 7.086514194347342e-06, + "loss": 0.0745, + "step": 83415 + }, + { + "epoch": 3.89, + "learning_rate": 7.085730409292556e-06, + "loss": 0.0439, + "step": 83420 + }, + { + "epoch": 3.89, + "learning_rate": 7.084946624237769e-06, + "loss": 0.0769, + "step": 83425 + }, + { + "epoch": 3.89, + "learning_rate": 7.084162839182983e-06, + "loss": 0.0507, + "step": 83430 + }, + { + "epoch": 3.89, + "learning_rate": 7.083379054128197e-06, + "loss": 0.1185, + "step": 83435 + }, + { + "epoch": 3.89, + "learning_rate": 7.08259526907341e-06, + "loss": 0.2094, + "step": 83440 + }, + { + "epoch": 3.89, + "learning_rate": 7.081811484018624e-06, + "loss": 0.3072, + "step": 83445 + }, + { + "epoch": 3.89, + "learning_rate": 7.081027698963837e-06, + "loss": 0.044, + "step": 83450 + }, + { + "epoch": 3.89, + "learning_rate": 7.080243913909051e-06, + "loss": 0.0651, + "step": 83455 + }, + { + "epoch": 3.89, + "learning_rate": 7.079460128854263e-06, + "loss": 0.0351, + "step": 83460 + }, + { + "epoch": 3.89, + "learning_rate": 7.078676343799476e-06, + "loss": 0.023, + "step": 83465 + }, + { + "epoch": 3.89, + "learning_rate": 7.07789255874469e-06, + "loss": 0.1239, + "step": 83470 + }, + { + "epoch": 3.9, + "learning_rate": 7.077108773689903e-06, + "loss": 0.1016, + "step": 83475 + }, + { + "epoch": 3.9, + "learning_rate": 7.076324988635117e-06, + "loss": 0.093, + "step": 83480 + }, + { + "epoch": 3.9, + "learning_rate": 7.075541203580331e-06, + "loss": 0.1165, + "step": 83485 + }, + { + "epoch": 3.9, + "learning_rate": 7.074757418525544e-06, + "loss": 0.165, + "step": 83490 + }, + { + "epoch": 3.9, + "learning_rate": 7.073973633470758e-06, + "loss": 0.2434, + "step": 83495 + }, + { + "epoch": 3.9, + "learning_rate": 7.073189848415971e-06, + "loss": 0.0324, + "step": 83500 + }, + { + "epoch": 3.9, + "learning_rate": 7.072406063361185e-06, + "loss": 0.0379, + "step": 83505 + }, + { + "epoch": 3.9, + "learning_rate": 7.071622278306398e-06, + "loss": 0.0729, + "step": 83510 + }, + { + "epoch": 3.9, + "learning_rate": 7.070838493251612e-06, + "loss": 0.0829, + "step": 83515 + }, + { + "epoch": 3.9, + "learning_rate": 7.070054708196825e-06, + "loss": 0.046, + "step": 83520 + }, + { + "epoch": 3.9, + "learning_rate": 7.069270923142037e-06, + "loss": 0.0305, + "step": 83525 + }, + { + "epoch": 3.9, + "learning_rate": 7.068487138087251e-06, + "loss": 0.0839, + "step": 83530 + }, + { + "epoch": 3.9, + "learning_rate": 7.067703353032465e-06, + "loss": 0.1183, + "step": 83535 + }, + { + "epoch": 3.9, + "learning_rate": 7.066919567977678e-06, + "loss": 0.1723, + "step": 83540 + }, + { + "epoch": 3.9, + "learning_rate": 7.066135782922892e-06, + "loss": 0.1977, + "step": 83545 + }, + { + "epoch": 3.9, + "learning_rate": 7.065351997868105e-06, + "loss": 0.0894, + "step": 83550 + }, + { + "epoch": 3.9, + "learning_rate": 7.064568212813319e-06, + "loss": 0.0183, + "step": 83555 + }, + { + "epoch": 3.9, + "learning_rate": 7.063784427758532e-06, + "loss": 0.0516, + "step": 83560 + }, + { + "epoch": 3.9, + "learning_rate": 7.063000642703746e-06, + "loss": 0.0505, + "step": 83565 + }, + { + "epoch": 3.9, + "learning_rate": 7.062216857648959e-06, + "loss": 0.0342, + "step": 83570 + }, + { + "epoch": 3.9, + "learning_rate": 7.061433072594173e-06, + "loss": 0.0858, + "step": 83575 + }, + { + "epoch": 3.9, + "learning_rate": 7.060649287539386e-06, + "loss": 0.0855, + "step": 83580 + }, + { + "epoch": 3.9, + "learning_rate": 7.0598655024846e-06, + "loss": 0.1751, + "step": 83585 + }, + { + "epoch": 3.9, + "learning_rate": 7.059081717429812e-06, + "loss": 0.1959, + "step": 83590 + }, + { + "epoch": 3.9, + "learning_rate": 7.058297932375026e-06, + "loss": 0.2493, + "step": 83595 + }, + { + "epoch": 3.9, + "learning_rate": 7.057514147320239e-06, + "loss": 0.0743, + "step": 83600 + }, + { + "epoch": 3.9, + "learning_rate": 7.056730362265453e-06, + "loss": 0.0131, + "step": 83605 + }, + { + "epoch": 3.9, + "learning_rate": 7.055946577210666e-06, + "loss": 0.029, + "step": 83610 + }, + { + "epoch": 3.9, + "learning_rate": 7.05516279215588e-06, + "loss": 0.0278, + "step": 83615 + }, + { + "epoch": 3.9, + "learning_rate": 7.054379007101093e-06, + "loss": 0.0613, + "step": 83620 + }, + { + "epoch": 3.9, + "learning_rate": 7.053595222046307e-06, + "loss": 0.0682, + "step": 83625 + }, + { + "epoch": 3.9, + "learning_rate": 7.05281143699152e-06, + "loss": 0.0363, + "step": 83630 + }, + { + "epoch": 3.9, + "learning_rate": 7.052027651936734e-06, + "loss": 0.1171, + "step": 83635 + }, + { + "epoch": 3.9, + "learning_rate": 7.051243866881947e-06, + "loss": 0.1654, + "step": 83640 + }, + { + "epoch": 3.9, + "learning_rate": 7.050460081827161e-06, + "loss": 0.2631, + "step": 83645 + }, + { + "epoch": 3.9, + "learning_rate": 7.049676296772374e-06, + "loss": 0.0565, + "step": 83650 + }, + { + "epoch": 3.9, + "learning_rate": 7.048892511717587e-06, + "loss": 0.0405, + "step": 83655 + }, + { + "epoch": 3.9, + "learning_rate": 7.0481087266628e-06, + "loss": 0.0411, + "step": 83660 + }, + { + "epoch": 3.9, + "learning_rate": 7.047324941608014e-06, + "loss": 0.0245, + "step": 83665 + }, + { + "epoch": 3.9, + "learning_rate": 7.046541156553227e-06, + "loss": 0.075, + "step": 83670 + }, + { + "epoch": 3.9, + "learning_rate": 7.045757371498441e-06, + "loss": 0.1692, + "step": 83675 + }, + { + "epoch": 3.9, + "learning_rate": 7.044973586443654e-06, + "loss": 0.1396, + "step": 83680 + }, + { + "epoch": 3.9, + "learning_rate": 7.044189801388868e-06, + "loss": 0.136, + "step": 83685 + }, + { + "epoch": 3.91, + "learning_rate": 7.043406016334081e-06, + "loss": 0.2351, + "step": 83690 + }, + { + "epoch": 3.91, + "learning_rate": 7.042622231279295e-06, + "loss": 0.3361, + "step": 83695 + }, + { + "epoch": 3.91, + "learning_rate": 7.041838446224508e-06, + "loss": 0.1006, + "step": 83700 + }, + { + "epoch": 3.91, + "learning_rate": 7.041054661169722e-06, + "loss": 0.03, + "step": 83705 + }, + { + "epoch": 3.91, + "learning_rate": 7.0402708761149356e-06, + "loss": 0.0043, + "step": 83710 + }, + { + "epoch": 3.91, + "learning_rate": 7.039487091060149e-06, + "loss": 0.0544, + "step": 83715 + }, + { + "epoch": 3.91, + "learning_rate": 7.038703306005361e-06, + "loss": 0.0858, + "step": 83720 + }, + { + "epoch": 3.91, + "learning_rate": 7.037919520950575e-06, + "loss": 0.1035, + "step": 83725 + }, + { + "epoch": 3.91, + "learning_rate": 7.037135735895788e-06, + "loss": 0.1786, + "step": 83730 + }, + { + "epoch": 3.91, + "learning_rate": 7.036351950841002e-06, + "loss": 0.1902, + "step": 83735 + }, + { + "epoch": 3.91, + "learning_rate": 7.035568165786215e-06, + "loss": 0.1655, + "step": 83740 + }, + { + "epoch": 3.91, + "learning_rate": 7.034784380731429e-06, + "loss": 0.3697, + "step": 83745 + }, + { + "epoch": 3.91, + "learning_rate": 7.034000595676643e-06, + "loss": 0.0693, + "step": 83750 + }, + { + "epoch": 3.91, + "learning_rate": 7.033216810621856e-06, + "loss": 0.0175, + "step": 83755 + }, + { + "epoch": 3.91, + "learning_rate": 7.0324330255670695e-06, + "loss": 0.0227, + "step": 83760 + }, + { + "epoch": 3.91, + "learning_rate": 7.031649240512283e-06, + "loss": 0.0299, + "step": 83765 + }, + { + "epoch": 3.91, + "learning_rate": 7.0308654554574965e-06, + "loss": 0.0819, + "step": 83770 + }, + { + "epoch": 3.91, + "learning_rate": 7.0300816704027096e-06, + "loss": 0.0887, + "step": 83775 + }, + { + "epoch": 3.91, + "learning_rate": 7.0292978853479235e-06, + "loss": 0.0337, + "step": 83780 + }, + { + "epoch": 3.91, + "learning_rate": 7.028514100293136e-06, + "loss": 0.1754, + "step": 83785 + }, + { + "epoch": 3.91, + "learning_rate": 7.027730315238349e-06, + "loss": 0.1529, + "step": 83790 + }, + { + "epoch": 3.91, + "learning_rate": 7.026946530183563e-06, + "loss": 0.2586, + "step": 83795 + }, + { + "epoch": 3.91, + "learning_rate": 7.0261627451287766e-06, + "loss": 0.0753, + "step": 83800 + }, + { + "epoch": 3.91, + "learning_rate": 7.02537896007399e-06, + "loss": 0.0732, + "step": 83805 + }, + { + "epoch": 3.91, + "learning_rate": 7.0245951750192035e-06, + "loss": 0.0517, + "step": 83810 + }, + { + "epoch": 3.91, + "learning_rate": 7.023811389964417e-06, + "loss": 0.0656, + "step": 83815 + }, + { + "epoch": 3.91, + "learning_rate": 7.0230276049096305e-06, + "loss": 0.0995, + "step": 83820 + }, + { + "epoch": 3.91, + "learning_rate": 7.0222438198548435e-06, + "loss": 0.0459, + "step": 83825 + }, + { + "epoch": 3.91, + "learning_rate": 7.0214600348000575e-06, + "loss": 0.0869, + "step": 83830 + }, + { + "epoch": 3.91, + "learning_rate": 7.0206762497452705e-06, + "loss": 0.0683, + "step": 83835 + }, + { + "epoch": 3.91, + "learning_rate": 7.019892464690484e-06, + "loss": 0.1981, + "step": 83840 + }, + { + "epoch": 3.91, + "learning_rate": 7.0191086796356975e-06, + "loss": 0.3106, + "step": 83845 + }, + { + "epoch": 3.91, + "learning_rate": 7.0183248945809105e-06, + "loss": 0.0214, + "step": 83850 + }, + { + "epoch": 3.91, + "learning_rate": 7.017541109526124e-06, + "loss": 0.0107, + "step": 83855 + }, + { + "epoch": 3.91, + "learning_rate": 7.0167573244713375e-06, + "loss": 0.0483, + "step": 83860 + }, + { + "epoch": 3.91, + "learning_rate": 7.0159735394165506e-06, + "loss": 0.0288, + "step": 83865 + }, + { + "epoch": 3.91, + "learning_rate": 7.0151897543617645e-06, + "loss": 0.0898, + "step": 83870 + }, + { + "epoch": 3.91, + "learning_rate": 7.0144059693069775e-06, + "loss": 0.1365, + "step": 83875 + }, + { + "epoch": 3.91, + "learning_rate": 7.0136221842521914e-06, + "loss": 0.0615, + "step": 83880 + }, + { + "epoch": 3.91, + "learning_rate": 7.0128383991974045e-06, + "loss": 0.1307, + "step": 83885 + }, + { + "epoch": 3.91, + "learning_rate": 7.012054614142618e-06, + "loss": 0.1897, + "step": 83890 + }, + { + "epoch": 3.91, + "learning_rate": 7.0112708290878315e-06, + "loss": 0.2455, + "step": 83895 + }, + { + "epoch": 3.91, + "learning_rate": 7.010487044033045e-06, + "loss": 0.0779, + "step": 83900 + }, + { + "epoch": 3.92, + "learning_rate": 7.009703258978258e-06, + "loss": 0.0304, + "step": 83905 + }, + { + "epoch": 3.92, + "learning_rate": 7.008919473923472e-06, + "loss": 0.0261, + "step": 83910 + }, + { + "epoch": 3.92, + "learning_rate": 7.0081356888686845e-06, + "loss": 0.0529, + "step": 83915 + }, + { + "epoch": 3.92, + "learning_rate": 7.0073519038138984e-06, + "loss": 0.0457, + "step": 83920 + }, + { + "epoch": 3.92, + "learning_rate": 7.0065681187591115e-06, + "loss": 0.1164, + "step": 83925 + }, + { + "epoch": 3.92, + "learning_rate": 7.005784333704325e-06, + "loss": 0.0913, + "step": 83930 + }, + { + "epoch": 3.92, + "learning_rate": 7.0050005486495385e-06, + "loss": 0.1972, + "step": 83935 + }, + { + "epoch": 3.92, + "learning_rate": 7.004216763594752e-06, + "loss": 0.137, + "step": 83940 + }, + { + "epoch": 3.92, + "learning_rate": 7.0034329785399654e-06, + "loss": 0.2245, + "step": 83945 + }, + { + "epoch": 3.92, + "learning_rate": 7.002649193485179e-06, + "loss": 0.0774, + "step": 83950 + }, + { + "epoch": 3.92, + "learning_rate": 7.001865408430392e-06, + "loss": 0.0612, + "step": 83955 + }, + { + "epoch": 3.92, + "learning_rate": 7.001081623375606e-06, + "loss": 0.0355, + "step": 83960 + }, + { + "epoch": 3.92, + "learning_rate": 7.000297838320819e-06, + "loss": 0.0663, + "step": 83965 + }, + { + "epoch": 3.92, + "learning_rate": 6.999514053266033e-06, + "loss": 0.0779, + "step": 83970 + }, + { + "epoch": 3.92, + "learning_rate": 6.998730268211247e-06, + "loss": 0.1159, + "step": 83975 + }, + { + "epoch": 3.92, + "learning_rate": 6.997946483156459e-06, + "loss": 0.1134, + "step": 83980 + }, + { + "epoch": 3.92, + "learning_rate": 6.9971626981016724e-06, + "loss": 0.0894, + "step": 83985 + }, + { + "epoch": 3.92, + "learning_rate": 6.996378913046886e-06, + "loss": 0.1549, + "step": 83990 + }, + { + "epoch": 3.92, + "learning_rate": 6.995595127992099e-06, + "loss": 0.266, + "step": 83995 + }, + { + "epoch": 3.92, + "learning_rate": 6.994811342937313e-06, + "loss": 0.064, + "step": 84000 + }, + { + "epoch": 3.92, + "learning_rate": 6.994027557882526e-06, + "loss": 0.0812, + "step": 84005 + }, + { + "epoch": 3.92, + "learning_rate": 6.99324377282774e-06, + "loss": 0.0724, + "step": 84010 + }, + { + "epoch": 3.92, + "learning_rate": 6.992459987772954e-06, + "loss": 0.056, + "step": 84015 + }, + { + "epoch": 3.92, + "learning_rate": 6.991676202718167e-06, + "loss": 0.0807, + "step": 84020 + }, + { + "epoch": 3.92, + "learning_rate": 6.990892417663381e-06, + "loss": 0.0549, + "step": 84025 + }, + { + "epoch": 3.92, + "learning_rate": 6.990108632608594e-06, + "loss": 0.0929, + "step": 84030 + }, + { + "epoch": 3.92, + "learning_rate": 6.989324847553808e-06, + "loss": 0.1392, + "step": 84035 + }, + { + "epoch": 3.92, + "learning_rate": 6.988541062499021e-06, + "loss": 0.2502, + "step": 84040 + }, + { + "epoch": 3.92, + "learning_rate": 6.987757277444233e-06, + "loss": 0.3448, + "step": 84045 + }, + { + "epoch": 3.92, + "learning_rate": 6.986973492389447e-06, + "loss": 0.0538, + "step": 84050 + }, + { + "epoch": 3.92, + "learning_rate": 6.98618970733466e-06, + "loss": 0.0152, + "step": 84055 + }, + { + "epoch": 3.92, + "learning_rate": 6.985405922279874e-06, + "loss": 0.074, + "step": 84060 + }, + { + "epoch": 3.92, + "learning_rate": 6.984622137225088e-06, + "loss": 0.0497, + "step": 84065 + }, + { + "epoch": 3.92, + "learning_rate": 6.983838352170301e-06, + "loss": 0.0891, + "step": 84070 + }, + { + "epoch": 3.92, + "learning_rate": 6.983054567115515e-06, + "loss": 0.0899, + "step": 84075 + }, + { + "epoch": 3.92, + "learning_rate": 6.982270782060728e-06, + "loss": 0.0808, + "step": 84080 + }, + { + "epoch": 3.92, + "learning_rate": 6.981486997005942e-06, + "loss": 0.1104, + "step": 84085 + }, + { + "epoch": 3.92, + "learning_rate": 6.980703211951155e-06, + "loss": 0.2043, + "step": 84090 + }, + { + "epoch": 3.92, + "learning_rate": 6.979919426896369e-06, + "loss": 0.2488, + "step": 84095 + }, + { + "epoch": 3.92, + "learning_rate": 6.979135641841582e-06, + "loss": 0.0789, + "step": 84100 + }, + { + "epoch": 3.92, + "learning_rate": 6.978351856786796e-06, + "loss": 0.0489, + "step": 84105 + }, + { + "epoch": 3.92, + "learning_rate": 6.977568071732008e-06, + "loss": 0.107, + "step": 84110 + }, + { + "epoch": 3.92, + "learning_rate": 6.976784286677222e-06, + "loss": 0.0696, + "step": 84115 + }, + { + "epoch": 3.93, + "learning_rate": 6.976000501622435e-06, + "loss": 0.1148, + "step": 84120 + }, + { + "epoch": 3.93, + "learning_rate": 6.975216716567649e-06, + "loss": 0.1428, + "step": 84125 + }, + { + "epoch": 3.93, + "learning_rate": 6.974432931512862e-06, + "loss": 0.12, + "step": 84130 + }, + { + "epoch": 3.93, + "learning_rate": 6.973649146458076e-06, + "loss": 0.2055, + "step": 84135 + }, + { + "epoch": 3.93, + "learning_rate": 6.972865361403289e-06, + "loss": 0.2817, + "step": 84140 + }, + { + "epoch": 3.93, + "learning_rate": 6.972081576348503e-06, + "loss": 0.2038, + "step": 84145 + }, + { + "epoch": 3.93, + "learning_rate": 6.971297791293716e-06, + "loss": 0.1615, + "step": 84150 + }, + { + "epoch": 3.93, + "learning_rate": 6.97051400623893e-06, + "loss": 0.0239, + "step": 84155 + }, + { + "epoch": 3.93, + "learning_rate": 6.969730221184143e-06, + "loss": 0.0394, + "step": 84160 + }, + { + "epoch": 3.93, + "learning_rate": 6.968946436129357e-06, + "loss": 0.0815, + "step": 84165 + }, + { + "epoch": 3.93, + "learning_rate": 6.96816265107457e-06, + "loss": 0.0547, + "step": 84170 + }, + { + "epoch": 3.93, + "learning_rate": 6.967378866019783e-06, + "loss": 0.0423, + "step": 84175 + }, + { + "epoch": 3.93, + "learning_rate": 6.966595080964996e-06, + "loss": 0.1092, + "step": 84180 + }, + { + "epoch": 3.93, + "learning_rate": 6.96581129591021e-06, + "loss": 0.0865, + "step": 84185 + }, + { + "epoch": 3.93, + "learning_rate": 6.965027510855423e-06, + "loss": 0.0807, + "step": 84190 + }, + { + "epoch": 3.93, + "learning_rate": 6.964243725800637e-06, + "loss": 0.2152, + "step": 84195 + }, + { + "epoch": 3.93, + "learning_rate": 6.96345994074585e-06, + "loss": 0.0474, + "step": 84200 + }, + { + "epoch": 3.93, + "learning_rate": 6.962676155691064e-06, + "loss": 0.0093, + "step": 84205 + }, + { + "epoch": 3.93, + "learning_rate": 6.961892370636277e-06, + "loss": 0.0628, + "step": 84210 + }, + { + "epoch": 3.93, + "learning_rate": 6.961108585581491e-06, + "loss": 0.0878, + "step": 84215 + }, + { + "epoch": 3.93, + "learning_rate": 6.960324800526704e-06, + "loss": 0.0281, + "step": 84220 + }, + { + "epoch": 3.93, + "learning_rate": 6.959541015471918e-06, + "loss": 0.1112, + "step": 84225 + }, + { + "epoch": 3.93, + "learning_rate": 6.958757230417131e-06, + "loss": 0.1499, + "step": 84230 + }, + { + "epoch": 3.93, + "learning_rate": 6.957973445362345e-06, + "loss": 0.0709, + "step": 84235 + }, + { + "epoch": 3.93, + "learning_rate": 6.957189660307559e-06, + "loss": 0.0987, + "step": 84240 + }, + { + "epoch": 3.93, + "learning_rate": 6.956405875252771e-06, + "loss": 0.3597, + "step": 84245 + }, + { + "epoch": 3.93, + "learning_rate": 6.955622090197984e-06, + "loss": 0.0683, + "step": 84250 + }, + { + "epoch": 3.93, + "learning_rate": 6.954838305143198e-06, + "loss": 0.043, + "step": 84255 + }, + { + "epoch": 3.93, + "learning_rate": 6.954054520088411e-06, + "loss": 0.0472, + "step": 84260 + }, + { + "epoch": 3.93, + "learning_rate": 6.953270735033625e-06, + "loss": 0.0598, + "step": 84265 + }, + { + "epoch": 3.93, + "learning_rate": 6.952486949978838e-06, + "loss": 0.0462, + "step": 84270 + }, + { + "epoch": 3.93, + "learning_rate": 6.951703164924052e-06, + "loss": 0.0809, + "step": 84275 + }, + { + "epoch": 3.93, + "learning_rate": 6.950919379869265e-06, + "loss": 0.0949, + "step": 84280 + }, + { + "epoch": 3.93, + "learning_rate": 6.950135594814479e-06, + "loss": 0.2274, + "step": 84285 + }, + { + "epoch": 3.93, + "learning_rate": 6.949351809759693e-06, + "loss": 0.1614, + "step": 84290 + }, + { + "epoch": 3.93, + "learning_rate": 6.948568024704906e-06, + "loss": 0.1468, + "step": 84295 + }, + { + "epoch": 3.93, + "learning_rate": 6.94778423965012e-06, + "loss": 0.0695, + "step": 84300 + }, + { + "epoch": 3.93, + "learning_rate": 6.947000454595333e-06, + "loss": 0.04, + "step": 84305 + }, + { + "epoch": 3.93, + "learning_rate": 6.946216669540545e-06, + "loss": 0.0326, + "step": 84310 + }, + { + "epoch": 3.93, + "learning_rate": 6.945432884485759e-06, + "loss": 0.4171, + "step": 84315 + }, + { + "epoch": 3.93, + "learning_rate": 6.944649099430972e-06, + "loss": 0.0765, + "step": 84320 + }, + { + "epoch": 3.93, + "learning_rate": 6.943865314376186e-06, + "loss": 0.0823, + "step": 84325 + }, + { + "epoch": 3.93, + "learning_rate": 6.9430815293214e-06, + "loss": 0.0574, + "step": 84330 + }, + { + "epoch": 3.94, + "learning_rate": 6.942297744266613e-06, + "loss": 0.1587, + "step": 84335 + }, + { + "epoch": 3.94, + "learning_rate": 6.941513959211827e-06, + "loss": 0.2067, + "step": 84340 + }, + { + "epoch": 3.94, + "learning_rate": 6.94073017415704e-06, + "loss": 0.3755, + "step": 84345 + }, + { + "epoch": 3.94, + "learning_rate": 6.939946389102254e-06, + "loss": 0.0658, + "step": 84350 + }, + { + "epoch": 3.94, + "learning_rate": 6.939162604047467e-06, + "loss": 0.0162, + "step": 84355 + }, + { + "epoch": 3.94, + "learning_rate": 6.938378818992681e-06, + "loss": 0.0154, + "step": 84360 + }, + { + "epoch": 3.94, + "learning_rate": 6.937595033937894e-06, + "loss": 0.0647, + "step": 84365 + }, + { + "epoch": 3.94, + "learning_rate": 6.936811248883108e-06, + "loss": 0.0973, + "step": 84370 + }, + { + "epoch": 3.94, + "learning_rate": 6.93602746382832e-06, + "loss": 0.073, + "step": 84375 + }, + { + "epoch": 3.94, + "learning_rate": 6.935243678773534e-06, + "loss": 0.2244, + "step": 84380 + }, + { + "epoch": 3.94, + "learning_rate": 6.934459893718747e-06, + "loss": 0.0851, + "step": 84385 + }, + { + "epoch": 3.94, + "learning_rate": 6.933676108663961e-06, + "loss": 0.2985, + "step": 84390 + }, + { + "epoch": 3.94, + "learning_rate": 6.932892323609174e-06, + "loss": 0.2953, + "step": 84395 + }, + { + "epoch": 3.94, + "learning_rate": 6.932108538554388e-06, + "loss": 0.0488, + "step": 84400 + }, + { + "epoch": 3.94, + "learning_rate": 6.931324753499601e-06, + "loss": 0.0575, + "step": 84405 + }, + { + "epoch": 3.94, + "learning_rate": 6.930540968444815e-06, + "loss": 0.0392, + "step": 84410 + }, + { + "epoch": 3.94, + "learning_rate": 6.929757183390028e-06, + "loss": 0.0602, + "step": 84415 + }, + { + "epoch": 3.94, + "learning_rate": 6.928973398335242e-06, + "loss": 0.1123, + "step": 84420 + }, + { + "epoch": 3.94, + "learning_rate": 6.928189613280455e-06, + "loss": 0.0457, + "step": 84425 + }, + { + "epoch": 3.94, + "learning_rate": 6.9274058282256685e-06, + "loss": 0.0504, + "step": 84430 + }, + { + "epoch": 3.94, + "learning_rate": 6.926622043170882e-06, + "loss": 0.104, + "step": 84435 + }, + { + "epoch": 3.94, + "learning_rate": 6.925838258116095e-06, + "loss": 0.1921, + "step": 84440 + }, + { + "epoch": 3.94, + "learning_rate": 6.925054473061308e-06, + "loss": 0.4111, + "step": 84445 + }, + { + "epoch": 3.94, + "learning_rate": 6.924270688006522e-06, + "loss": 0.0485, + "step": 84450 + }, + { + "epoch": 3.94, + "learning_rate": 6.923486902951735e-06, + "loss": 0.0229, + "step": 84455 + }, + { + "epoch": 3.94, + "learning_rate": 6.922703117896949e-06, + "loss": 0.0297, + "step": 84460 + }, + { + "epoch": 3.94, + "learning_rate": 6.921919332842162e-06, + "loss": 0.0345, + "step": 84465 + }, + { + "epoch": 3.94, + "learning_rate": 6.9211355477873756e-06, + "loss": 0.1021, + "step": 84470 + }, + { + "epoch": 3.94, + "learning_rate": 6.920351762732589e-06, + "loss": 0.1215, + "step": 84475 + }, + { + "epoch": 3.94, + "learning_rate": 6.9195679776778025e-06, + "loss": 0.1225, + "step": 84480 + }, + { + "epoch": 3.94, + "learning_rate": 6.918784192623016e-06, + "loss": 0.1005, + "step": 84485 + }, + { + "epoch": 3.94, + "learning_rate": 6.9180004075682295e-06, + "loss": 0.2317, + "step": 84490 + }, + { + "epoch": 3.94, + "learning_rate": 6.9172166225134425e-06, + "loss": 0.3476, + "step": 84495 + }, + { + "epoch": 3.94, + "learning_rate": 6.9164328374586565e-06, + "loss": 0.052, + "step": 84500 + }, + { + "epoch": 3.94, + "learning_rate": 6.915649052403869e-06, + "loss": 0.0212, + "step": 84505 + }, + { + "epoch": 3.94, + "learning_rate": 6.9148652673490826e-06, + "loss": 0.0163, + "step": 84510 + }, + { + "epoch": 3.94, + "learning_rate": 6.914081482294296e-06, + "loss": 0.0298, + "step": 84515 + }, + { + "epoch": 3.94, + "learning_rate": 6.9132976972395095e-06, + "loss": 0.0641, + "step": 84520 + }, + { + "epoch": 3.94, + "learning_rate": 6.912513912184723e-06, + "loss": 0.0839, + "step": 84525 + }, + { + "epoch": 3.94, + "learning_rate": 6.9117301271299365e-06, + "loss": 0.0815, + "step": 84530 + }, + { + "epoch": 3.94, + "learning_rate": 6.9109463420751496e-06, + "loss": 0.1028, + "step": 84535 + }, + { + "epoch": 3.94, + "learning_rate": 6.9101625570203635e-06, + "loss": 0.2123, + "step": 84540 + }, + { + "epoch": 3.94, + "learning_rate": 6.9093787719655765e-06, + "loss": 0.2505, + "step": 84545 + }, + { + "epoch": 3.95, + "learning_rate": 6.9085949869107904e-06, + "loss": 0.0627, + "step": 84550 + }, + { + "epoch": 3.95, + "learning_rate": 6.907811201856004e-06, + "loss": 0.0441, + "step": 84555 + }, + { + "epoch": 3.95, + "learning_rate": 6.907027416801217e-06, + "loss": 0.0371, + "step": 84560 + }, + { + "epoch": 3.95, + "learning_rate": 6.906243631746431e-06, + "loss": 0.0606, + "step": 84565 + }, + { + "epoch": 3.95, + "learning_rate": 6.9054598466916435e-06, + "loss": 0.0634, + "step": 84570 + }, + { + "epoch": 3.95, + "learning_rate": 6.9046760616368566e-06, + "loss": 0.0775, + "step": 84575 + }, + { + "epoch": 3.95, + "learning_rate": 6.9038922765820705e-06, + "loss": 0.067, + "step": 84580 + }, + { + "epoch": 3.95, + "learning_rate": 6.9031084915272835e-06, + "loss": 0.1218, + "step": 84585 + }, + { + "epoch": 3.95, + "learning_rate": 6.9023247064724974e-06, + "loss": 0.2002, + "step": 84590 + }, + { + "epoch": 3.95, + "learning_rate": 6.901540921417711e-06, + "loss": 0.3358, + "step": 84595 + }, + { + "epoch": 3.95, + "learning_rate": 6.900757136362924e-06, + "loss": 0.1052, + "step": 84600 + }, + { + "epoch": 3.95, + "learning_rate": 6.899973351308138e-06, + "loss": 0.0229, + "step": 84605 + }, + { + "epoch": 3.95, + "learning_rate": 6.899189566253351e-06, + "loss": 0.0396, + "step": 84610 + }, + { + "epoch": 3.95, + "learning_rate": 6.898405781198565e-06, + "loss": 0.0759, + "step": 84615 + }, + { + "epoch": 3.95, + "learning_rate": 6.897621996143778e-06, + "loss": 0.0415, + "step": 84620 + }, + { + "epoch": 3.95, + "learning_rate": 6.896838211088992e-06, + "loss": 0.0713, + "step": 84625 + }, + { + "epoch": 3.95, + "learning_rate": 6.896054426034205e-06, + "loss": 0.0515, + "step": 84630 + }, + { + "epoch": 3.95, + "learning_rate": 6.8952706409794175e-06, + "loss": 0.1614, + "step": 84635 + }, + { + "epoch": 3.95, + "learning_rate": 6.894486855924631e-06, + "loss": 0.1235, + "step": 84640 + }, + { + "epoch": 3.95, + "learning_rate": 6.893703070869845e-06, + "loss": 0.2719, + "step": 84645 + }, + { + "epoch": 3.95, + "learning_rate": 6.892919285815058e-06, + "loss": 0.0453, + "step": 84650 + }, + { + "epoch": 3.95, + "learning_rate": 6.892135500760272e-06, + "loss": 0.0359, + "step": 84655 + }, + { + "epoch": 3.95, + "learning_rate": 6.891351715705485e-06, + "loss": 0.0588, + "step": 84660 + }, + { + "epoch": 3.95, + "learning_rate": 6.890567930650699e-06, + "loss": 0.0396, + "step": 84665 + }, + { + "epoch": 3.95, + "learning_rate": 6.889784145595912e-06, + "loss": 0.0536, + "step": 84670 + }, + { + "epoch": 3.95, + "learning_rate": 6.889000360541126e-06, + "loss": 0.0496, + "step": 84675 + }, + { + "epoch": 3.95, + "learning_rate": 6.888216575486339e-06, + "loss": 0.1221, + "step": 84680 + }, + { + "epoch": 3.95, + "learning_rate": 6.887432790431553e-06, + "loss": 0.1342, + "step": 84685 + }, + { + "epoch": 3.95, + "learning_rate": 6.886649005376766e-06, + "loss": 0.2477, + "step": 84690 + }, + { + "epoch": 3.95, + "learning_rate": 6.88586522032198e-06, + "loss": 0.2642, + "step": 84695 + }, + { + "epoch": 3.95, + "learning_rate": 6.885081435267192e-06, + "loss": 0.0678, + "step": 84700 + }, + { + "epoch": 3.95, + "learning_rate": 6.884297650212406e-06, + "loss": 0.0121, + "step": 84705 + }, + { + "epoch": 3.95, + "learning_rate": 6.883513865157619e-06, + "loss": 0.0479, + "step": 84710 + }, + { + "epoch": 3.95, + "learning_rate": 6.882730080102833e-06, + "loss": 0.0348, + "step": 84715 + }, + { + "epoch": 3.95, + "learning_rate": 6.881946295048046e-06, + "loss": 0.0813, + "step": 84720 + }, + { + "epoch": 3.95, + "learning_rate": 6.88116250999326e-06, + "loss": 0.0826, + "step": 84725 + }, + { + "epoch": 3.95, + "learning_rate": 6.880378724938473e-06, + "loss": 0.0925, + "step": 84730 + }, + { + "epoch": 3.95, + "learning_rate": 6.879594939883687e-06, + "loss": 0.1139, + "step": 84735 + }, + { + "epoch": 3.95, + "learning_rate": 6.8788111548289e-06, + "loss": 0.1557, + "step": 84740 + }, + { + "epoch": 3.95, + "learning_rate": 6.878027369774114e-06, + "loss": 0.2379, + "step": 84745 + }, + { + "epoch": 3.95, + "learning_rate": 6.877243584719327e-06, + "loss": 0.1039, + "step": 84750 + }, + { + "epoch": 3.95, + "learning_rate": 6.876459799664541e-06, + "loss": 0.0495, + "step": 84755 + }, + { + "epoch": 3.96, + "learning_rate": 6.875676014609754e-06, + "loss": 0.0733, + "step": 84760 + }, + { + "epoch": 3.96, + "learning_rate": 6.874892229554967e-06, + "loss": 0.1012, + "step": 84765 + }, + { + "epoch": 3.96, + "learning_rate": 6.87410844450018e-06, + "loss": 0.1164, + "step": 84770 + }, + { + "epoch": 3.96, + "learning_rate": 6.873324659445394e-06, + "loss": 0.098, + "step": 84775 + }, + { + "epoch": 3.96, + "learning_rate": 6.872540874390607e-06, + "loss": 0.1502, + "step": 84780 + }, + { + "epoch": 3.96, + "learning_rate": 6.871757089335821e-06, + "loss": 0.159, + "step": 84785 + }, + { + "epoch": 3.96, + "learning_rate": 6.870973304281034e-06, + "loss": 0.1656, + "step": 84790 + }, + { + "epoch": 3.96, + "learning_rate": 6.870189519226248e-06, + "loss": 0.2007, + "step": 84795 + }, + { + "epoch": 3.96, + "learning_rate": 6.869405734171461e-06, + "loss": 0.0573, + "step": 84800 + }, + { + "epoch": 3.96, + "learning_rate": 6.868621949116675e-06, + "loss": 0.0321, + "step": 84805 + }, + { + "epoch": 3.96, + "learning_rate": 6.867838164061888e-06, + "loss": 0.0407, + "step": 84810 + }, + { + "epoch": 3.96, + "learning_rate": 6.867054379007102e-06, + "loss": 0.0898, + "step": 84815 + }, + { + "epoch": 3.96, + "learning_rate": 6.866270593952316e-06, + "loss": 0.0688, + "step": 84820 + }, + { + "epoch": 3.96, + "learning_rate": 6.865486808897529e-06, + "loss": 0.0849, + "step": 84825 + }, + { + "epoch": 3.96, + "learning_rate": 6.864703023842741e-06, + "loss": 0.055, + "step": 84830 + }, + { + "epoch": 3.96, + "learning_rate": 6.863919238787955e-06, + "loss": 0.1568, + "step": 84835 + }, + { + "epoch": 3.96, + "learning_rate": 6.863135453733168e-06, + "loss": 0.1795, + "step": 84840 + }, + { + "epoch": 3.96, + "learning_rate": 6.862351668678382e-06, + "loss": 0.2184, + "step": 84845 + }, + { + "epoch": 3.96, + "learning_rate": 6.861567883623595e-06, + "loss": 0.0367, + "step": 84850 + }, + { + "epoch": 3.96, + "learning_rate": 6.860784098568809e-06, + "loss": 0.0136, + "step": 84855 + }, + { + "epoch": 3.96, + "learning_rate": 6.860000313514022e-06, + "loss": 0.05, + "step": 84860 + }, + { + "epoch": 3.96, + "learning_rate": 6.859216528459236e-06, + "loss": 0.0288, + "step": 84865 + }, + { + "epoch": 3.96, + "learning_rate": 6.85843274340445e-06, + "loss": 0.0863, + "step": 84870 + }, + { + "epoch": 3.96, + "learning_rate": 6.857648958349663e-06, + "loss": 0.0304, + "step": 84875 + }, + { + "epoch": 3.96, + "learning_rate": 6.856865173294877e-06, + "loss": 0.1366, + "step": 84880 + }, + { + "epoch": 3.96, + "learning_rate": 6.85608138824009e-06, + "loss": 0.1891, + "step": 84885 + }, + { + "epoch": 3.96, + "learning_rate": 6.855297603185304e-06, + "loss": 0.2073, + "step": 84890 + }, + { + "epoch": 3.96, + "learning_rate": 6.854513818130516e-06, + "loss": 0.3622, + "step": 84895 + }, + { + "epoch": 3.96, + "learning_rate": 6.853730033075729e-06, + "loss": 0.0635, + "step": 84900 + }, + { + "epoch": 3.96, + "learning_rate": 6.852946248020943e-06, + "loss": 0.005, + "step": 84905 + }, + { + "epoch": 3.96, + "learning_rate": 6.852162462966157e-06, + "loss": 0.0065, + "step": 84910 + }, + { + "epoch": 3.96, + "learning_rate": 6.85137867791137e-06, + "loss": 0.0524, + "step": 84915 + }, + { + "epoch": 3.96, + "learning_rate": 6.850594892856584e-06, + "loss": 0.0971, + "step": 84920 + }, + { + "epoch": 3.96, + "learning_rate": 6.849811107801797e-06, + "loss": 0.0614, + "step": 84925 + }, + { + "epoch": 3.96, + "learning_rate": 6.849027322747011e-06, + "loss": 0.1487, + "step": 84930 + }, + { + "epoch": 3.96, + "learning_rate": 6.848243537692224e-06, + "loss": 0.146, + "step": 84935 + }, + { + "epoch": 3.96, + "learning_rate": 6.847459752637438e-06, + "loss": 0.2943, + "step": 84940 + }, + { + "epoch": 3.96, + "learning_rate": 6.846675967582651e-06, + "loss": 0.3352, + "step": 84945 + }, + { + "epoch": 3.96, + "learning_rate": 6.845892182527865e-06, + "loss": 0.034, + "step": 84950 + }, + { + "epoch": 3.96, + "learning_rate": 6.845108397473078e-06, + "loss": 0.0656, + "step": 84955 + }, + { + "epoch": 3.96, + "learning_rate": 6.844324612418291e-06, + "loss": 0.0307, + "step": 84960 + }, + { + "epoch": 3.96, + "learning_rate": 6.843540827363504e-06, + "loss": 0.0868, + "step": 84965 + }, + { + "epoch": 3.96, + "learning_rate": 6.842757042308718e-06, + "loss": 0.0561, + "step": 84970 + }, + { + "epoch": 3.97, + "learning_rate": 6.841973257253931e-06, + "loss": 0.1256, + "step": 84975 + }, + { + "epoch": 3.97, + "learning_rate": 6.841189472199145e-06, + "loss": 0.0754, + "step": 84980 + }, + { + "epoch": 3.97, + "learning_rate": 6.840405687144358e-06, + "loss": 0.1401, + "step": 84985 + }, + { + "epoch": 3.97, + "learning_rate": 6.839621902089572e-06, + "loss": 0.135, + "step": 84990 + }, + { + "epoch": 3.97, + "learning_rate": 6.838838117034785e-06, + "loss": 0.2946, + "step": 84995 + }, + { + "epoch": 3.97, + "learning_rate": 6.838054331979999e-06, + "loss": 0.0858, + "step": 85000 + }, + { + "epoch": 3.97, + "learning_rate": 6.837270546925212e-06, + "loss": 0.0219, + "step": 85005 + }, + { + "epoch": 3.97, + "learning_rate": 6.836486761870426e-06, + "loss": 0.026, + "step": 85010 + }, + { + "epoch": 3.97, + "learning_rate": 6.835702976815639e-06, + "loss": 0.0867, + "step": 85015 + }, + { + "epoch": 3.97, + "learning_rate": 6.834919191760853e-06, + "loss": 0.045, + "step": 85020 + }, + { + "epoch": 3.97, + "learning_rate": 6.834135406706065e-06, + "loss": 0.0908, + "step": 85025 + }, + { + "epoch": 3.97, + "learning_rate": 6.833351621651279e-06, + "loss": 0.2008, + "step": 85030 + }, + { + "epoch": 3.97, + "learning_rate": 6.832567836596492e-06, + "loss": 0.1749, + "step": 85035 + }, + { + "epoch": 3.97, + "learning_rate": 6.831784051541706e-06, + "loss": 0.1238, + "step": 85040 + }, + { + "epoch": 3.97, + "learning_rate": 6.831000266486919e-06, + "loss": 0.284, + "step": 85045 + }, + { + "epoch": 3.97, + "learning_rate": 6.830216481432133e-06, + "loss": 0.1071, + "step": 85050 + }, + { + "epoch": 3.97, + "learning_rate": 6.829432696377346e-06, + "loss": 0.0221, + "step": 85055 + }, + { + "epoch": 3.97, + "learning_rate": 6.82864891132256e-06, + "loss": 0.0341, + "step": 85060 + }, + { + "epoch": 3.97, + "learning_rate": 6.827865126267773e-06, + "loss": 0.0256, + "step": 85065 + }, + { + "epoch": 3.97, + "learning_rate": 6.827081341212987e-06, + "loss": 0.0422, + "step": 85070 + }, + { + "epoch": 3.97, + "learning_rate": 6.8262975561582e-06, + "loss": 0.0906, + "step": 85075 + }, + { + "epoch": 3.97, + "learning_rate": 6.825513771103414e-06, + "loss": 0.1339, + "step": 85080 + }, + { + "epoch": 3.97, + "learning_rate": 6.8247299860486275e-06, + "loss": 0.1046, + "step": 85085 + }, + { + "epoch": 3.97, + "learning_rate": 6.82394620099384e-06, + "loss": 0.1263, + "step": 85090 + }, + { + "epoch": 3.97, + "learning_rate": 6.823162415939053e-06, + "loss": 0.2744, + "step": 85095 + }, + { + "epoch": 3.97, + "learning_rate": 6.822378630884267e-06, + "loss": 0.0645, + "step": 85100 + }, + { + "epoch": 3.97, + "learning_rate": 6.82159484582948e-06, + "loss": 0.016, + "step": 85105 + }, + { + "epoch": 3.97, + "learning_rate": 6.820811060774694e-06, + "loss": 0.0759, + "step": 85110 + }, + { + "epoch": 3.97, + "learning_rate": 6.820027275719907e-06, + "loss": 0.0534, + "step": 85115 + }, + { + "epoch": 3.97, + "learning_rate": 6.819243490665121e-06, + "loss": 0.0791, + "step": 85120 + }, + { + "epoch": 3.97, + "learning_rate": 6.818459705610334e-06, + "loss": 0.1052, + "step": 85125 + }, + { + "epoch": 3.97, + "learning_rate": 6.817675920555548e-06, + "loss": 0.0868, + "step": 85130 + }, + { + "epoch": 3.97, + "learning_rate": 6.8168921355007615e-06, + "loss": 0.0865, + "step": 85135 + }, + { + "epoch": 3.97, + "learning_rate": 6.8161083504459746e-06, + "loss": 0.2504, + "step": 85140 + }, + { + "epoch": 3.97, + "learning_rate": 6.8153245653911885e-06, + "loss": 0.2567, + "step": 85145 + }, + { + "epoch": 3.97, + "learning_rate": 6.8145407803364015e-06, + "loss": 0.0648, + "step": 85150 + }, + { + "epoch": 3.97, + "learning_rate": 6.813756995281614e-06, + "loss": 0.0277, + "step": 85155 + }, + { + "epoch": 3.97, + "learning_rate": 6.812973210226828e-06, + "loss": 0.0593, + "step": 85160 + }, + { + "epoch": 3.97, + "learning_rate": 6.812189425172041e-06, + "loss": 0.1007, + "step": 85165 + }, + { + "epoch": 3.97, + "learning_rate": 6.811405640117255e-06, + "loss": 0.1356, + "step": 85170 + }, + { + "epoch": 3.97, + "learning_rate": 6.8106218550624685e-06, + "loss": 0.0813, + "step": 85175 + }, + { + "epoch": 3.97, + "learning_rate": 6.8098380700076816e-06, + "loss": 0.1681, + "step": 85180 + }, + { + "epoch": 3.97, + "learning_rate": 6.8090542849528955e-06, + "loss": 0.1017, + "step": 85185 + }, + { + "epoch": 3.98, + "learning_rate": 6.8082704998981085e-06, + "loss": 0.1854, + "step": 85190 + }, + { + "epoch": 3.98, + "learning_rate": 6.8074867148433224e-06, + "loss": 0.3727, + "step": 85195 + }, + { + "epoch": 3.98, + "learning_rate": 6.8067029297885355e-06, + "loss": 0.0719, + "step": 85200 + }, + { + "epoch": 3.98, + "learning_rate": 6.805919144733749e-06, + "loss": 0.0192, + "step": 85205 + }, + { + "epoch": 3.98, + "learning_rate": 6.8051353596789625e-06, + "loss": 0.0716, + "step": 85210 + }, + { + "epoch": 3.98, + "learning_rate": 6.804351574624176e-06, + "loss": 0.0414, + "step": 85215 + }, + { + "epoch": 3.98, + "learning_rate": 6.803567789569389e-06, + "loss": 0.0351, + "step": 85220 + }, + { + "epoch": 3.98, + "learning_rate": 6.8027840045146025e-06, + "loss": 0.075, + "step": 85225 + }, + { + "epoch": 3.98, + "learning_rate": 6.8020002194598155e-06, + "loss": 0.2622, + "step": 85230 + }, + { + "epoch": 3.98, + "learning_rate": 6.8012164344050294e-06, + "loss": 0.0557, + "step": 85235 + }, + { + "epoch": 3.98, + "learning_rate": 6.8004326493502425e-06, + "loss": 0.1608, + "step": 85240 + }, + { + "epoch": 3.98, + "learning_rate": 6.799648864295456e-06, + "loss": 0.1829, + "step": 85245 + }, + { + "epoch": 3.98, + "learning_rate": 6.7988650792406695e-06, + "loss": 0.1448, + "step": 85250 + }, + { + "epoch": 3.98, + "learning_rate": 6.798081294185883e-06, + "loss": 0.0546, + "step": 85255 + }, + { + "epoch": 3.98, + "learning_rate": 6.7972975091310964e-06, + "loss": 0.032, + "step": 85260 + }, + { + "epoch": 3.98, + "learning_rate": 6.79651372407631e-06, + "loss": 0.0511, + "step": 85265 + }, + { + "epoch": 3.98, + "learning_rate": 6.795729939021523e-06, + "loss": 0.0677, + "step": 85270 + }, + { + "epoch": 3.98, + "learning_rate": 6.794946153966737e-06, + "loss": 0.0947, + "step": 85275 + }, + { + "epoch": 3.98, + "learning_rate": 6.79416236891195e-06, + "loss": 0.1098, + "step": 85280 + }, + { + "epoch": 3.98, + "learning_rate": 6.7933785838571634e-06, + "loss": 0.1172, + "step": 85285 + }, + { + "epoch": 3.98, + "learning_rate": 6.7925947988023765e-06, + "loss": 0.1582, + "step": 85290 + }, + { + "epoch": 3.98, + "learning_rate": 6.79181101374759e-06, + "loss": 0.2443, + "step": 85295 + }, + { + "epoch": 3.98, + "learning_rate": 6.7910272286928034e-06, + "loss": 0.0805, + "step": 85300 + }, + { + "epoch": 3.98, + "learning_rate": 6.790243443638017e-06, + "loss": 0.0288, + "step": 85305 + }, + { + "epoch": 3.98, + "learning_rate": 6.78945965858323e-06, + "loss": 0.083, + "step": 85310 + }, + { + "epoch": 3.98, + "learning_rate": 6.788675873528444e-06, + "loss": 0.0465, + "step": 85315 + }, + { + "epoch": 3.98, + "learning_rate": 6.787892088473657e-06, + "loss": 0.048, + "step": 85320 + }, + { + "epoch": 3.98, + "learning_rate": 6.787108303418871e-06, + "loss": 0.0728, + "step": 85325 + }, + { + "epoch": 3.98, + "learning_rate": 6.786324518364084e-06, + "loss": 0.1453, + "step": 85330 + }, + { + "epoch": 3.98, + "learning_rate": 6.785540733309298e-06, + "loss": 0.2035, + "step": 85335 + }, + { + "epoch": 3.98, + "learning_rate": 6.784756948254511e-06, + "loss": 0.2733, + "step": 85340 + }, + { + "epoch": 3.98, + "learning_rate": 6.783973163199725e-06, + "loss": 0.1481, + "step": 85345 + }, + { + "epoch": 3.98, + "learning_rate": 6.783189378144937e-06, + "loss": 0.0662, + "step": 85350 + }, + { + "epoch": 3.98, + "learning_rate": 6.782405593090151e-06, + "loss": 0.0241, + "step": 85355 + }, + { + "epoch": 3.98, + "learning_rate": 6.781621808035364e-06, + "loss": 0.0078, + "step": 85360 + }, + { + "epoch": 3.98, + "learning_rate": 6.780838022980578e-06, + "loss": 0.0335, + "step": 85365 + }, + { + "epoch": 3.98, + "learning_rate": 6.780054237925791e-06, + "loss": 0.0504, + "step": 85370 + }, + { + "epoch": 3.98, + "learning_rate": 6.779270452871005e-06, + "loss": 0.0439, + "step": 85375 + }, + { + "epoch": 3.98, + "learning_rate": 6.778486667816218e-06, + "loss": 0.0861, + "step": 85380 + }, + { + "epoch": 3.98, + "learning_rate": 6.777702882761432e-06, + "loss": 0.1036, + "step": 85385 + }, + { + "epoch": 3.98, + "learning_rate": 6.776919097706645e-06, + "loss": 0.0982, + "step": 85390 + }, + { + "epoch": 3.98, + "learning_rate": 6.776135312651859e-06, + "loss": 0.4021, + "step": 85395 + }, + { + "epoch": 3.98, + "learning_rate": 6.775351527597073e-06, + "loss": 0.0563, + "step": 85400 + }, + { + "epoch": 3.99, + "learning_rate": 6.774567742542286e-06, + "loss": 0.0224, + "step": 85405 + }, + { + "epoch": 3.99, + "learning_rate": 6.7737839574875e-06, + "loss": 0.0658, + "step": 85410 + }, + { + "epoch": 3.99, + "learning_rate": 6.773000172432712e-06, + "loss": 0.0374, + "step": 85415 + }, + { + "epoch": 3.99, + "learning_rate": 6.772216387377925e-06, + "loss": 0.0689, + "step": 85420 + }, + { + "epoch": 3.99, + "learning_rate": 6.771432602323139e-06, + "loss": 0.0196, + "step": 85425 + }, + { + "epoch": 3.99, + "learning_rate": 6.770648817268352e-06, + "loss": 0.0726, + "step": 85430 + }, + { + "epoch": 3.99, + "learning_rate": 6.769865032213566e-06, + "loss": 0.1081, + "step": 85435 + }, + { + "epoch": 3.99, + "learning_rate": 6.769081247158779e-06, + "loss": 0.23, + "step": 85440 + }, + { + "epoch": 3.99, + "learning_rate": 6.768297462103993e-06, + "loss": 0.1952, + "step": 85445 + }, + { + "epoch": 3.99, + "learning_rate": 6.767513677049207e-06, + "loss": 0.0612, + "step": 85450 + }, + { + "epoch": 3.99, + "learning_rate": 6.76672989199442e-06, + "loss": 0.0818, + "step": 85455 + }, + { + "epoch": 3.99, + "learning_rate": 6.765946106939634e-06, + "loss": 0.0122, + "step": 85460 + }, + { + "epoch": 3.99, + "learning_rate": 6.765162321884847e-06, + "loss": 0.0459, + "step": 85465 + }, + { + "epoch": 3.99, + "learning_rate": 6.764378536830061e-06, + "loss": 0.0668, + "step": 85470 + }, + { + "epoch": 3.99, + "learning_rate": 6.763594751775274e-06, + "loss": 0.0418, + "step": 85475 + }, + { + "epoch": 3.99, + "learning_rate": 6.762810966720486e-06, + "loss": 0.0965, + "step": 85480 + }, + { + "epoch": 3.99, + "learning_rate": 6.7620271816657e-06, + "loss": 0.1879, + "step": 85485 + }, + { + "epoch": 3.99, + "learning_rate": 6.761243396610914e-06, + "loss": 0.2135, + "step": 85490 + }, + { + "epoch": 3.99, + "learning_rate": 6.760459611556127e-06, + "loss": 0.1978, + "step": 85495 + }, + { + "epoch": 3.99, + "learning_rate": 6.759675826501341e-06, + "loss": 0.111, + "step": 85500 + }, + { + "epoch": 3.99, + "learning_rate": 6.758892041446554e-06, + "loss": 0.0223, + "step": 85505 + }, + { + "epoch": 3.99, + "learning_rate": 6.758108256391768e-06, + "loss": 0.0205, + "step": 85510 + }, + { + "epoch": 3.99, + "learning_rate": 6.757324471336981e-06, + "loss": 0.0967, + "step": 85515 + }, + { + "epoch": 3.99, + "learning_rate": 6.756540686282195e-06, + "loss": 0.0362, + "step": 85520 + }, + { + "epoch": 3.99, + "learning_rate": 6.755756901227408e-06, + "loss": 0.1377, + "step": 85525 + }, + { + "epoch": 3.99, + "learning_rate": 6.754973116172622e-06, + "loss": 0.0516, + "step": 85530 + }, + { + "epoch": 3.99, + "learning_rate": 6.754189331117835e-06, + "loss": 0.1818, + "step": 85535 + }, + { + "epoch": 3.99, + "learning_rate": 6.753405546063049e-06, + "loss": 0.1526, + "step": 85540 + }, + { + "epoch": 3.99, + "learning_rate": 6.752621761008261e-06, + "loss": 0.3196, + "step": 85545 + }, + { + "epoch": 3.99, + "learning_rate": 6.751837975953475e-06, + "loss": 0.0523, + "step": 85550 + }, + { + "epoch": 3.99, + "learning_rate": 6.751054190898688e-06, + "loss": 0.0336, + "step": 85555 + }, + { + "epoch": 3.99, + "learning_rate": 6.750270405843902e-06, + "loss": 0.0413, + "step": 85560 + }, + { + "epoch": 3.99, + "learning_rate": 6.749486620789115e-06, + "loss": 0.1544, + "step": 85565 + }, + { + "epoch": 3.99, + "learning_rate": 6.748702835734329e-06, + "loss": 0.0657, + "step": 85570 + }, + { + "epoch": 3.99, + "learning_rate": 6.747919050679542e-06, + "loss": 0.0705, + "step": 85575 + }, + { + "epoch": 3.99, + "learning_rate": 6.747135265624756e-06, + "loss": 0.1632, + "step": 85580 + }, + { + "epoch": 3.99, + "learning_rate": 6.746351480569969e-06, + "loss": 0.1428, + "step": 85585 + }, + { + "epoch": 3.99, + "learning_rate": 6.745567695515183e-06, + "loss": 0.1539, + "step": 85590 + }, + { + "epoch": 3.99, + "learning_rate": 6.744783910460396e-06, + "loss": 0.2851, + "step": 85595 + }, + { + "epoch": 3.99, + "learning_rate": 6.74400012540561e-06, + "loss": 0.1251, + "step": 85600 + }, + { + "epoch": 3.99, + "learning_rate": 6.743216340350823e-06, + "loss": 0.0252, + "step": 85605 + }, + { + "epoch": 3.99, + "learning_rate": 6.742432555296036e-06, + "loss": 0.0494, + "step": 85610 + }, + { + "epoch": 3.99, + "learning_rate": 6.741648770241249e-06, + "loss": 0.0426, + "step": 85615 + }, + { + "epoch": 4.0, + "learning_rate": 6.740864985186463e-06, + "loss": 0.1323, + "step": 85620 + }, + { + "epoch": 4.0, + "learning_rate": 6.740081200131676e-06, + "loss": 0.1494, + "step": 85625 + }, + { + "epoch": 4.0, + "learning_rate": 6.73929741507689e-06, + "loss": 0.1759, + "step": 85630 + }, + { + "epoch": 4.0, + "learning_rate": 6.738513630022103e-06, + "loss": 0.1965, + "step": 85635 + }, + { + "epoch": 4.0, + "learning_rate": 6.737886601978274e-06, + "loss": 0.2621, + "step": 85640 + }, + { + "epoch": 4.0, + "learning_rate": 6.737102816923487e-06, + "loss": 0.3035, + "step": 85645 + }, + { + "epoch": 4.0, + "learning_rate": 6.736319031868701e-06, + "loss": 0.0709, + "step": 85650 + }, + { + "epoch": 4.0, + "learning_rate": 6.735535246813914e-06, + "loss": 0.0263, + "step": 85655 + }, + { + "epoch": 4.0, + "learning_rate": 6.734751461759128e-06, + "loss": 0.0484, + "step": 85660 + }, + { + "epoch": 4.0, + "learning_rate": 6.733967676704341e-06, + "loss": 0.0793, + "step": 85665 + }, + { + "epoch": 4.0, + "learning_rate": 6.733183891649555e-06, + "loss": 0.0728, + "step": 85670 + }, + { + "epoch": 4.0, + "learning_rate": 6.732400106594767e-06, + "loss": 0.0888, + "step": 85675 + }, + { + "epoch": 4.0, + "learning_rate": 6.731616321539981e-06, + "loss": 0.0878, + "step": 85680 + }, + { + "epoch": 4.0, + "learning_rate": 6.730832536485194e-06, + "loss": 0.2161, + "step": 85685 + }, + { + "epoch": 4.0, + "learning_rate": 6.730048751430408e-06, + "loss": 0.1678, + "step": 85690 + }, + { + "epoch": 4.0, + "learning_rate": 6.729264966375621e-06, + "loss": 0.2515, + "step": 85695 + }, + { + "epoch": 4.0, + "learning_rate": 6.728481181320835e-06, + "loss": 0.0519, + "step": 85700 + }, + { + "epoch": 4.0, + "learning_rate": 6.727697396266048e-06, + "loss": 0.035, + "step": 85705 + }, + { + "epoch": 4.0, + "learning_rate": 6.726913611211262e-06, + "loss": 0.0771, + "step": 85710 + }, + { + "epoch": 4.0, + "learning_rate": 6.726129826156475e-06, + "loss": 0.0916, + "step": 85715 + }, + { + "epoch": 4.0, + "learning_rate": 6.725346041101689e-06, + "loss": 0.163, + "step": 85720 + }, + { + "epoch": 4.0, + "eval_cer": 0.01181715224228095, + "eval_loss": 0.08370912075042725, + "eval_runtime": 455.3512, + "eval_samples_per_second": 41.836, + "eval_steps_per_second": 5.231, + "eval_wer": 0.10025906735751296, + "step": 85724 + }, + { + "epoch": 4.0, + "learning_rate": 6.724562256046903e-06, + "loss": 0.3869, + "step": 85725 + }, + { + "epoch": 4.0, + "learning_rate": 6.723778470992116e-06, + "loss": 0.1034, + "step": 85730 + }, + { + "epoch": 4.0, + "learning_rate": 6.72299468593733e-06, + "loss": 0.0496, + "step": 85735 + }, + { + "epoch": 4.0, + "learning_rate": 6.722210900882542e-06, + "loss": 0.0717, + "step": 85740 + }, + { + "epoch": 4.0, + "learning_rate": 6.721427115827755e-06, + "loss": 0.0621, + "step": 85745 + }, + { + "epoch": 4.0, + "learning_rate": 6.720643330772969e-06, + "loss": 0.0433, + "step": 85750 + }, + { + "epoch": 4.0, + "learning_rate": 6.719859545718182e-06, + "loss": 0.0813, + "step": 85755 + }, + { + "epoch": 4.0, + "learning_rate": 6.719075760663396e-06, + "loss": 0.0747, + "step": 85760 + }, + { + "epoch": 4.0, + "learning_rate": 6.718291975608609e-06, + "loss": 0.1336, + "step": 85765 + }, + { + "epoch": 4.0, + "learning_rate": 6.717508190553823e-06, + "loss": 0.3293, + "step": 85770 + }, + { + "epoch": 4.0, + "learning_rate": 6.716724405499037e-06, + "loss": 0.2614, + "step": 85775 + }, + { + "epoch": 4.0, + "learning_rate": 6.71594062044425e-06, + "loss": 0.1055, + "step": 85780 + }, + { + "epoch": 4.0, + "learning_rate": 6.715156835389464e-06, + "loss": 0.0226, + "step": 85785 + }, + { + "epoch": 4.0, + "learning_rate": 6.714373050334677e-06, + "loss": 0.0676, + "step": 85790 + }, + { + "epoch": 4.0, + "learning_rate": 6.713589265279891e-06, + "loss": 0.034, + "step": 85795 + }, + { + "epoch": 4.0, + "learning_rate": 6.712805480225104e-06, + "loss": 0.0403, + "step": 85800 + }, + { + "epoch": 4.0, + "learning_rate": 6.712021695170316e-06, + "loss": 0.0654, + "step": 85805 + }, + { + "epoch": 4.0, + "learning_rate": 6.71123791011553e-06, + "loss": 0.1075, + "step": 85810 + }, + { + "epoch": 4.0, + "learning_rate": 6.710454125060743e-06, + "loss": 0.0859, + "step": 85815 + }, + { + "epoch": 4.0, + "learning_rate": 6.709670340005957e-06, + "loss": 0.1162, + "step": 85820 + }, + { + "epoch": 4.0, + "learning_rate": 6.708886554951171e-06, + "loss": 0.2354, + "step": 85825 + }, + { + "epoch": 4.0, + "learning_rate": 6.708102769896384e-06, + "loss": 0.0505, + "step": 85830 + }, + { + "epoch": 4.01, + "learning_rate": 6.707318984841598e-06, + "loss": 0.0303, + "step": 85835 + }, + { + "epoch": 4.01, + "learning_rate": 6.706535199786811e-06, + "loss": 0.0591, + "step": 85840 + }, + { + "epoch": 4.01, + "learning_rate": 6.705751414732025e-06, + "loss": 0.0535, + "step": 85845 + }, + { + "epoch": 4.01, + "learning_rate": 6.704967629677238e-06, + "loss": 0.0453, + "step": 85850 + }, + { + "epoch": 4.01, + "learning_rate": 6.704183844622452e-06, + "loss": 0.0624, + "step": 85855 + }, + { + "epoch": 4.01, + "learning_rate": 6.703400059567665e-06, + "loss": 0.1451, + "step": 85860 + }, + { + "epoch": 4.01, + "learning_rate": 6.702616274512879e-06, + "loss": 0.0887, + "step": 85865 + }, + { + "epoch": 4.01, + "learning_rate": 6.701832489458091e-06, + "loss": 0.1117, + "step": 85870 + }, + { + "epoch": 4.01, + "learning_rate": 6.701048704403305e-06, + "loss": 0.2585, + "step": 85875 + }, + { + "epoch": 4.01, + "learning_rate": 6.700264919348518e-06, + "loss": 0.1133, + "step": 85880 + }, + { + "epoch": 4.01, + "learning_rate": 6.699481134293732e-06, + "loss": 0.0398, + "step": 85885 + }, + { + "epoch": 4.01, + "learning_rate": 6.698697349238945e-06, + "loss": 0.0391, + "step": 85890 + }, + { + "epoch": 4.01, + "learning_rate": 6.697913564184159e-06, + "loss": 0.0312, + "step": 85895 + }, + { + "epoch": 4.01, + "learning_rate": 6.697129779129372e-06, + "loss": 0.0566, + "step": 85900 + }, + { + "epoch": 4.01, + "learning_rate": 6.696345994074586e-06, + "loss": 0.1049, + "step": 85905 + }, + { + "epoch": 4.01, + "learning_rate": 6.695562209019799e-06, + "loss": 0.0799, + "step": 85910 + }, + { + "epoch": 4.01, + "learning_rate": 6.694778423965013e-06, + "loss": 0.1532, + "step": 85915 + }, + { + "epoch": 4.01, + "learning_rate": 6.693994638910226e-06, + "loss": 0.2829, + "step": 85920 + }, + { + "epoch": 4.01, + "learning_rate": 6.69321085385544e-06, + "loss": 0.3848, + "step": 85925 + }, + { + "epoch": 4.01, + "learning_rate": 6.692427068800653e-06, + "loss": 0.1048, + "step": 85930 + }, + { + "epoch": 4.01, + "learning_rate": 6.691643283745866e-06, + "loss": 0.0416, + "step": 85935 + }, + { + "epoch": 4.01, + "learning_rate": 6.690859498691079e-06, + "loss": 0.0483, + "step": 85940 + }, + { + "epoch": 4.01, + "learning_rate": 6.690075713636293e-06, + "loss": 0.0257, + "step": 85945 + }, + { + "epoch": 4.01, + "learning_rate": 6.689291928581506e-06, + "loss": 0.0497, + "step": 85950 + }, + { + "epoch": 4.01, + "learning_rate": 6.68850814352672e-06, + "loss": 0.1049, + "step": 85955 + }, + { + "epoch": 4.01, + "learning_rate": 6.687724358471933e-06, + "loss": 0.0786, + "step": 85960 + }, + { + "epoch": 4.01, + "learning_rate": 6.686940573417147e-06, + "loss": 0.244, + "step": 85965 + }, + { + "epoch": 4.01, + "learning_rate": 6.68615678836236e-06, + "loss": 0.1713, + "step": 85970 + }, + { + "epoch": 4.01, + "learning_rate": 6.685373003307574e-06, + "loss": 0.3134, + "step": 85975 + }, + { + "epoch": 4.01, + "learning_rate": 6.684589218252787e-06, + "loss": 0.0779, + "step": 85980 + }, + { + "epoch": 4.01, + "learning_rate": 6.683805433198001e-06, + "loss": 0.0033, + "step": 85985 + }, + { + "epoch": 4.01, + "learning_rate": 6.683021648143215e-06, + "loss": 0.0437, + "step": 85990 + }, + { + "epoch": 4.01, + "learning_rate": 6.682237863088428e-06, + "loss": 0.0269, + "step": 85995 + }, + { + "epoch": 4.01, + "learning_rate": 6.68145407803364e-06, + "loss": 0.0517, + "step": 86000 + }, + { + "epoch": 4.01, + "learning_rate": 6.680670292978854e-06, + "loss": 0.0475, + "step": 86005 + }, + { + "epoch": 4.01, + "learning_rate": 6.679886507924067e-06, + "loss": 0.1424, + "step": 86010 + }, + { + "epoch": 4.01, + "learning_rate": 6.679102722869281e-06, + "loss": 0.1523, + "step": 86015 + }, + { + "epoch": 4.01, + "learning_rate": 6.678318937814494e-06, + "loss": 0.1225, + "step": 86020 + }, + { + "epoch": 4.01, + "learning_rate": 6.677535152759708e-06, + "loss": 0.3258, + "step": 86025 + }, + { + "epoch": 4.01, + "learning_rate": 6.676751367704921e-06, + "loss": 0.0924, + "step": 86030 + }, + { + "epoch": 4.01, + "learning_rate": 6.675967582650135e-06, + "loss": 0.008, + "step": 86035 + }, + { + "epoch": 4.01, + "learning_rate": 6.675183797595349e-06, + "loss": 0.0142, + "step": 86040 + }, + { + "epoch": 4.01, + "learning_rate": 6.674400012540562e-06, + "loss": 0.0669, + "step": 86045 + }, + { + "epoch": 4.02, + "learning_rate": 6.6736162274857756e-06, + "loss": 0.0549, + "step": 86050 + }, + { + "epoch": 4.02, + "learning_rate": 6.672832442430989e-06, + "loss": 0.1676, + "step": 86055 + }, + { + "epoch": 4.02, + "learning_rate": 6.6720486573762025e-06, + "loss": 0.1218, + "step": 86060 + }, + { + "epoch": 4.02, + "learning_rate": 6.671264872321415e-06, + "loss": 0.1531, + "step": 86065 + }, + { + "epoch": 4.02, + "learning_rate": 6.670481087266628e-06, + "loss": 0.2372, + "step": 86070 + }, + { + "epoch": 4.02, + "learning_rate": 6.669697302211842e-06, + "loss": 0.201, + "step": 86075 + }, + { + "epoch": 4.02, + "learning_rate": 6.668913517157055e-06, + "loss": 0.0905, + "step": 86080 + }, + { + "epoch": 4.02, + "learning_rate": 6.668129732102269e-06, + "loss": 0.0378, + "step": 86085 + }, + { + "epoch": 4.02, + "learning_rate": 6.6673459470474826e-06, + "loss": 0.0084, + "step": 86090 + }, + { + "epoch": 4.02, + "learning_rate": 6.666562161992696e-06, + "loss": 0.0831, + "step": 86095 + }, + { + "epoch": 4.02, + "learning_rate": 6.6657783769379095e-06, + "loss": 0.0376, + "step": 86100 + }, + { + "epoch": 4.02, + "learning_rate": 6.664994591883123e-06, + "loss": 0.0516, + "step": 86105 + }, + { + "epoch": 4.02, + "learning_rate": 6.6642108068283365e-06, + "loss": 0.0656, + "step": 86110 + }, + { + "epoch": 4.02, + "learning_rate": 6.6634270217735495e-06, + "loss": 0.1976, + "step": 86115 + }, + { + "epoch": 4.02, + "learning_rate": 6.6626432367187635e-06, + "loss": 0.0929, + "step": 86120 + }, + { + "epoch": 4.02, + "learning_rate": 6.6618594516639765e-06, + "loss": 0.2294, + "step": 86125 + }, + { + "epoch": 4.02, + "learning_rate": 6.661075666609189e-06, + "loss": 0.0865, + "step": 86130 + }, + { + "epoch": 4.02, + "learning_rate": 6.660291881554403e-06, + "loss": 0.0254, + "step": 86135 + }, + { + "epoch": 4.02, + "learning_rate": 6.6595080964996165e-06, + "loss": 0.0696, + "step": 86140 + }, + { + "epoch": 4.02, + "learning_rate": 6.65872431144483e-06, + "loss": 0.03, + "step": 86145 + }, + { + "epoch": 4.02, + "learning_rate": 6.6579405263900435e-06, + "loss": 0.0553, + "step": 86150 + }, + { + "epoch": 4.02, + "learning_rate": 6.6571567413352566e-06, + "loss": 0.052, + "step": 86155 + }, + { + "epoch": 4.02, + "learning_rate": 6.6563729562804705e-06, + "loss": 0.0559, + "step": 86160 + }, + { + "epoch": 4.02, + "learning_rate": 6.6555891712256835e-06, + "loss": 0.218, + "step": 86165 + }, + { + "epoch": 4.02, + "learning_rate": 6.6548053861708974e-06, + "loss": 0.1176, + "step": 86170 + }, + { + "epoch": 4.02, + "learning_rate": 6.6540216011161105e-06, + "loss": 0.2272, + "step": 86175 + }, + { + "epoch": 4.02, + "learning_rate": 6.653237816061324e-06, + "loss": 0.0639, + "step": 86180 + }, + { + "epoch": 4.02, + "learning_rate": 6.6524540310065375e-06, + "loss": 0.0154, + "step": 86185 + }, + { + "epoch": 4.02, + "learning_rate": 6.651670245951751e-06, + "loss": 0.0691, + "step": 86190 + }, + { + "epoch": 4.02, + "learning_rate": 6.6508864608969636e-06, + "loss": 0.0433, + "step": 86195 + }, + { + "epoch": 4.02, + "learning_rate": 6.6501026758421775e-06, + "loss": 0.0739, + "step": 86200 + }, + { + "epoch": 4.02, + "learning_rate": 6.6493188907873905e-06, + "loss": 0.1184, + "step": 86205 + }, + { + "epoch": 4.02, + "learning_rate": 6.6485351057326044e-06, + "loss": 0.0466, + "step": 86210 + }, + { + "epoch": 4.02, + "learning_rate": 6.6477513206778175e-06, + "loss": 0.1509, + "step": 86215 + }, + { + "epoch": 4.02, + "learning_rate": 6.646967535623031e-06, + "loss": 0.1283, + "step": 86220 + }, + { + "epoch": 4.02, + "learning_rate": 6.6461837505682445e-06, + "loss": 0.3265, + "step": 86225 + }, + { + "epoch": 4.02, + "learning_rate": 6.645399965513458e-06, + "loss": 0.1273, + "step": 86230 + }, + { + "epoch": 4.02, + "learning_rate": 6.6446161804586714e-06, + "loss": 0.098, + "step": 86235 + }, + { + "epoch": 4.02, + "learning_rate": 6.643832395403885e-06, + "loss": 0.0169, + "step": 86240 + }, + { + "epoch": 4.02, + "learning_rate": 6.643048610349098e-06, + "loss": 0.0439, + "step": 86245 + }, + { + "epoch": 4.02, + "learning_rate": 6.642264825294312e-06, + "loss": 0.1139, + "step": 86250 + }, + { + "epoch": 4.02, + "learning_rate": 6.641481040239525e-06, + "loss": 0.057, + "step": 86255 + }, + { + "epoch": 4.03, + "learning_rate": 6.640697255184738e-06, + "loss": 0.1377, + "step": 86260 + }, + { + "epoch": 4.03, + "learning_rate": 6.6399134701299515e-06, + "loss": 0.1528, + "step": 86265 + }, + { + "epoch": 4.03, + "learning_rate": 6.639129685075165e-06, + "loss": 0.1555, + "step": 86270 + }, + { + "epoch": 4.03, + "learning_rate": 6.6383459000203784e-06, + "loss": 0.2474, + "step": 86275 + }, + { + "epoch": 4.03, + "learning_rate": 6.637562114965592e-06, + "loss": 0.0938, + "step": 86280 + }, + { + "epoch": 4.03, + "learning_rate": 6.636778329910805e-06, + "loss": 0.0148, + "step": 86285 + }, + { + "epoch": 4.03, + "learning_rate": 6.635994544856019e-06, + "loss": 0.0507, + "step": 86290 + }, + { + "epoch": 4.03, + "learning_rate": 6.635210759801232e-06, + "loss": 0.0622, + "step": 86295 + }, + { + "epoch": 4.03, + "learning_rate": 6.634426974746446e-06, + "loss": 0.059, + "step": 86300 + }, + { + "epoch": 4.03, + "learning_rate": 6.63364318969166e-06, + "loss": 0.0548, + "step": 86305 + }, + { + "epoch": 4.03, + "learning_rate": 6.632859404636873e-06, + "loss": 0.1452, + "step": 86310 + }, + { + "epoch": 4.03, + "learning_rate": 6.632075619582087e-06, + "loss": 0.1255, + "step": 86315 + }, + { + "epoch": 4.03, + "learning_rate": 6.6312918345273e-06, + "loss": 0.13, + "step": 86320 + }, + { + "epoch": 4.03, + "learning_rate": 6.630508049472512e-06, + "loss": 0.2595, + "step": 86325 + }, + { + "epoch": 4.03, + "learning_rate": 6.629724264417726e-06, + "loss": 0.0796, + "step": 86330 + }, + { + "epoch": 4.03, + "learning_rate": 6.628940479362939e-06, + "loss": 0.0438, + "step": 86335 + }, + { + "epoch": 4.03, + "learning_rate": 6.628156694308153e-06, + "loss": 0.0639, + "step": 86340 + }, + { + "epoch": 4.03, + "learning_rate": 6.627372909253366e-06, + "loss": 0.0474, + "step": 86345 + }, + { + "epoch": 4.03, + "learning_rate": 6.62658912419858e-06, + "loss": 0.0273, + "step": 86350 + }, + { + "epoch": 4.03, + "learning_rate": 6.625805339143794e-06, + "loss": 0.0312, + "step": 86355 + }, + { + "epoch": 4.03, + "learning_rate": 6.625021554089007e-06, + "loss": 0.1291, + "step": 86360 + }, + { + "epoch": 4.03, + "learning_rate": 6.624237769034221e-06, + "loss": 0.1709, + "step": 86365 + }, + { + "epoch": 4.03, + "learning_rate": 6.623453983979434e-06, + "loss": 0.14, + "step": 86370 + }, + { + "epoch": 4.03, + "learning_rate": 6.622670198924648e-06, + "loss": 0.2679, + "step": 86375 + }, + { + "epoch": 4.03, + "learning_rate": 6.621886413869861e-06, + "loss": 0.0834, + "step": 86380 + }, + { + "epoch": 4.03, + "learning_rate": 6.621102628815075e-06, + "loss": 0.0173, + "step": 86385 + }, + { + "epoch": 4.03, + "learning_rate": 6.620318843760287e-06, + "loss": 0.0317, + "step": 86390 + }, + { + "epoch": 4.03, + "learning_rate": 6.6195350587055e-06, + "loss": 0.093, + "step": 86395 + }, + { + "epoch": 4.03, + "learning_rate": 6.618751273650714e-06, + "loss": 0.0668, + "step": 86400 + }, + { + "epoch": 4.03, + "learning_rate": 6.617967488595928e-06, + "loss": 0.1152, + "step": 86405 + }, + { + "epoch": 4.03, + "learning_rate": 6.617183703541141e-06, + "loss": 0.212, + "step": 86410 + }, + { + "epoch": 4.03, + "learning_rate": 6.616399918486355e-06, + "loss": 0.1631, + "step": 86415 + }, + { + "epoch": 4.03, + "learning_rate": 6.615616133431568e-06, + "loss": 0.2263, + "step": 86420 + }, + { + "epoch": 4.03, + "learning_rate": 6.614832348376782e-06, + "loss": 0.1845, + "step": 86425 + }, + { + "epoch": 4.03, + "learning_rate": 6.614048563321995e-06, + "loss": 0.0779, + "step": 86430 + }, + { + "epoch": 4.03, + "learning_rate": 6.613264778267209e-06, + "loss": 0.03, + "step": 86435 + }, + { + "epoch": 4.03, + "learning_rate": 6.612480993212422e-06, + "loss": 0.0334, + "step": 86440 + }, + { + "epoch": 4.03, + "learning_rate": 6.611697208157636e-06, + "loss": 0.0441, + "step": 86445 + }, + { + "epoch": 4.03, + "learning_rate": 6.610913423102849e-06, + "loss": 0.0852, + "step": 86450 + }, + { + "epoch": 4.03, + "learning_rate": 6.610129638048062e-06, + "loss": 0.1316, + "step": 86455 + }, + { + "epoch": 4.03, + "learning_rate": 6.609345852993275e-06, + "loss": 0.0496, + "step": 86460 + }, + { + "epoch": 4.03, + "learning_rate": 6.608562067938489e-06, + "loss": 0.1089, + "step": 86465 + }, + { + "epoch": 4.03, + "learning_rate": 6.607778282883702e-06, + "loss": 0.104, + "step": 86470 + }, + { + "epoch": 4.04, + "learning_rate": 6.606994497828916e-06, + "loss": 0.3546, + "step": 86475 + }, + { + "epoch": 4.04, + "learning_rate": 6.606210712774129e-06, + "loss": 0.0999, + "step": 86480 + }, + { + "epoch": 4.04, + "learning_rate": 6.605426927719343e-06, + "loss": 0.0555, + "step": 86485 + }, + { + "epoch": 4.04, + "learning_rate": 6.604643142664556e-06, + "loss": 0.0279, + "step": 86490 + }, + { + "epoch": 4.04, + "learning_rate": 6.60385935760977e-06, + "loss": 0.0759, + "step": 86495 + }, + { + "epoch": 4.04, + "learning_rate": 6.603075572554983e-06, + "loss": 0.084, + "step": 86500 + }, + { + "epoch": 4.04, + "learning_rate": 6.602291787500197e-06, + "loss": 0.0451, + "step": 86505 + }, + { + "epoch": 4.04, + "learning_rate": 6.60150800244541e-06, + "loss": 0.0754, + "step": 86510 + }, + { + "epoch": 4.04, + "learning_rate": 6.600724217390624e-06, + "loss": 0.0851, + "step": 86515 + }, + { + "epoch": 4.04, + "learning_rate": 6.599940432335836e-06, + "loss": 0.1036, + "step": 86520 + }, + { + "epoch": 4.04, + "learning_rate": 6.59915664728105e-06, + "loss": 0.278, + "step": 86525 + }, + { + "epoch": 4.04, + "learning_rate": 6.598372862226263e-06, + "loss": 0.0583, + "step": 86530 + }, + { + "epoch": 4.04, + "learning_rate": 6.597589077171477e-06, + "loss": 0.0428, + "step": 86535 + }, + { + "epoch": 4.04, + "learning_rate": 6.59680529211669e-06, + "loss": 0.0496, + "step": 86540 + }, + { + "epoch": 4.04, + "learning_rate": 6.596021507061904e-06, + "loss": 0.0367, + "step": 86545 + }, + { + "epoch": 4.04, + "learning_rate": 6.595237722007117e-06, + "loss": 0.0872, + "step": 86550 + }, + { + "epoch": 4.04, + "learning_rate": 6.594453936952331e-06, + "loss": 0.0676, + "step": 86555 + }, + { + "epoch": 4.04, + "learning_rate": 6.593670151897544e-06, + "loss": 0.1954, + "step": 86560 + }, + { + "epoch": 4.04, + "learning_rate": 6.592886366842758e-06, + "loss": 0.0746, + "step": 86565 + }, + { + "epoch": 4.04, + "learning_rate": 6.592102581787972e-06, + "loss": 0.1854, + "step": 86570 + }, + { + "epoch": 4.04, + "learning_rate": 6.591318796733185e-06, + "loss": 0.1785, + "step": 86575 + }, + { + "epoch": 4.04, + "learning_rate": 6.590535011678399e-06, + "loss": 0.0884, + "step": 86580 + }, + { + "epoch": 4.04, + "learning_rate": 6.589751226623611e-06, + "loss": 0.0615, + "step": 86585 + }, + { + "epoch": 4.04, + "learning_rate": 6.588967441568824e-06, + "loss": 0.0387, + "step": 86590 + }, + { + "epoch": 4.04, + "learning_rate": 6.588183656514038e-06, + "loss": 0.0626, + "step": 86595 + }, + { + "epoch": 4.04, + "learning_rate": 6.587399871459251e-06, + "loss": 0.045, + "step": 86600 + }, + { + "epoch": 4.04, + "learning_rate": 6.586616086404465e-06, + "loss": 0.0564, + "step": 86605 + }, + { + "epoch": 4.04, + "learning_rate": 6.585832301349678e-06, + "loss": 0.0815, + "step": 86610 + }, + { + "epoch": 4.04, + "learning_rate": 6.585048516294892e-06, + "loss": 0.1293, + "step": 86615 + }, + { + "epoch": 4.04, + "learning_rate": 6.584264731240106e-06, + "loss": 0.1797, + "step": 86620 + }, + { + "epoch": 4.04, + "learning_rate": 6.583480946185319e-06, + "loss": 0.2436, + "step": 86625 + }, + { + "epoch": 4.04, + "learning_rate": 6.582697161130533e-06, + "loss": 0.1078, + "step": 86630 + }, + { + "epoch": 4.04, + "learning_rate": 6.581913376075746e-06, + "loss": 0.0091, + "step": 86635 + }, + { + "epoch": 4.04, + "learning_rate": 6.58112959102096e-06, + "loss": 0.0221, + "step": 86640 + }, + { + "epoch": 4.04, + "learning_rate": 6.580345805966173e-06, + "loss": 0.07, + "step": 86645 + }, + { + "epoch": 4.04, + "learning_rate": 6.579562020911385e-06, + "loss": 0.0768, + "step": 86650 + }, + { + "epoch": 4.04, + "learning_rate": 6.578778235856599e-06, + "loss": 0.0561, + "step": 86655 + }, + { + "epoch": 4.04, + "learning_rate": 6.577994450801812e-06, + "loss": 0.098, + "step": 86660 + }, + { + "epoch": 4.04, + "learning_rate": 6.577210665747026e-06, + "loss": 0.0594, + "step": 86665 + }, + { + "epoch": 4.04, + "learning_rate": 6.57642688069224e-06, + "loss": 0.2244, + "step": 86670 + }, + { + "epoch": 4.04, + "learning_rate": 6.575643095637453e-06, + "loss": 0.3049, + "step": 86675 + }, + { + "epoch": 4.04, + "learning_rate": 6.574859310582667e-06, + "loss": 0.1069, + "step": 86680 + }, + { + "epoch": 4.04, + "learning_rate": 6.57407552552788e-06, + "loss": 0.0324, + "step": 86685 + }, + { + "epoch": 4.05, + "learning_rate": 6.573291740473094e-06, + "loss": 0.0757, + "step": 86690 + }, + { + "epoch": 4.05, + "learning_rate": 6.572507955418307e-06, + "loss": 0.0384, + "step": 86695 + }, + { + "epoch": 4.05, + "learning_rate": 6.571724170363521e-06, + "loss": 0.0515, + "step": 86700 + }, + { + "epoch": 4.05, + "learning_rate": 6.570940385308734e-06, + "loss": 0.1113, + "step": 86705 + }, + { + "epoch": 4.05, + "learning_rate": 6.570156600253948e-06, + "loss": 0.1069, + "step": 86710 + }, + { + "epoch": 4.05, + "learning_rate": 6.56937281519916e-06, + "loss": 0.1026, + "step": 86715 + }, + { + "epoch": 4.05, + "learning_rate": 6.568589030144374e-06, + "loss": 0.1806, + "step": 86720 + }, + { + "epoch": 4.05, + "learning_rate": 6.567805245089587e-06, + "loss": 0.2835, + "step": 86725 + }, + { + "epoch": 4.05, + "learning_rate": 6.567021460034801e-06, + "loss": 0.1147, + "step": 86730 + }, + { + "epoch": 4.05, + "learning_rate": 6.566237674980014e-06, + "loss": 0.0272, + "step": 86735 + }, + { + "epoch": 4.05, + "learning_rate": 6.565453889925228e-06, + "loss": 0.0532, + "step": 86740 + }, + { + "epoch": 4.05, + "learning_rate": 6.564670104870441e-06, + "loss": 0.0573, + "step": 86745 + }, + { + "epoch": 4.05, + "learning_rate": 6.563886319815655e-06, + "loss": 0.1069, + "step": 86750 + }, + { + "epoch": 4.05, + "learning_rate": 6.563102534760868e-06, + "loss": 0.1186, + "step": 86755 + }, + { + "epoch": 4.05, + "learning_rate": 6.5623187497060816e-06, + "loss": 0.1125, + "step": 86760 + }, + { + "epoch": 4.05, + "learning_rate": 6.561534964651295e-06, + "loss": 0.0842, + "step": 86765 + }, + { + "epoch": 4.05, + "learning_rate": 6.5607511795965085e-06, + "loss": 0.1723, + "step": 86770 + }, + { + "epoch": 4.05, + "learning_rate": 6.559967394541722e-06, + "loss": 0.2628, + "step": 86775 + }, + { + "epoch": 4.05, + "learning_rate": 6.559183609486935e-06, + "loss": 0.0976, + "step": 86780 + }, + { + "epoch": 4.05, + "learning_rate": 6.558399824432148e-06, + "loss": 0.025, + "step": 86785 + }, + { + "epoch": 4.05, + "learning_rate": 6.557616039377362e-06, + "loss": 0.0342, + "step": 86790 + }, + { + "epoch": 4.05, + "learning_rate": 6.556832254322575e-06, + "loss": 0.0495, + "step": 86795 + }, + { + "epoch": 4.05, + "learning_rate": 6.5560484692677886e-06, + "loss": 0.0585, + "step": 86800 + }, + { + "epoch": 4.05, + "learning_rate": 6.555264684213002e-06, + "loss": 0.0427, + "step": 86805 + }, + { + "epoch": 4.05, + "learning_rate": 6.5544808991582155e-06, + "loss": 0.1364, + "step": 86810 + }, + { + "epoch": 4.05, + "learning_rate": 6.553697114103429e-06, + "loss": 0.1192, + "step": 86815 + }, + { + "epoch": 4.05, + "learning_rate": 6.5529133290486425e-06, + "loss": 0.148, + "step": 86820 + }, + { + "epoch": 4.05, + "learning_rate": 6.5521295439938556e-06, + "loss": 0.2054, + "step": 86825 + }, + { + "epoch": 4.05, + "learning_rate": 6.5513457589390695e-06, + "loss": 0.1116, + "step": 86830 + }, + { + "epoch": 4.05, + "learning_rate": 6.5505619738842825e-06, + "loss": 0.0213, + "step": 86835 + }, + { + "epoch": 4.05, + "learning_rate": 6.5497781888294964e-06, + "loss": 0.0252, + "step": 86840 + }, + { + "epoch": 4.05, + "learning_rate": 6.548994403774709e-06, + "loss": 0.0363, + "step": 86845 + }, + { + "epoch": 4.05, + "learning_rate": 6.5482106187199225e-06, + "loss": 0.055, + "step": 86850 + }, + { + "epoch": 4.05, + "learning_rate": 6.547426833665136e-06, + "loss": 0.063, + "step": 86855 + }, + { + "epoch": 4.05, + "learning_rate": 6.5466430486103495e-06, + "loss": 0.0967, + "step": 86860 + }, + { + "epoch": 4.05, + "learning_rate": 6.5458592635555626e-06, + "loss": 0.091, + "step": 86865 + }, + { + "epoch": 4.05, + "learning_rate": 6.5450754785007765e-06, + "loss": 0.1131, + "step": 86870 + }, + { + "epoch": 4.05, + "learning_rate": 6.5442916934459895e-06, + "loss": 0.3417, + "step": 86875 + }, + { + "epoch": 4.05, + "learning_rate": 6.5435079083912034e-06, + "loss": 0.0783, + "step": 86880 + }, + { + "epoch": 4.05, + "learning_rate": 6.542724123336417e-06, + "loss": 0.027, + "step": 86885 + }, + { + "epoch": 4.05, + "learning_rate": 6.54194033828163e-06, + "loss": 0.0212, + "step": 86890 + }, + { + "epoch": 4.05, + "learning_rate": 6.541156553226844e-06, + "loss": 0.0443, + "step": 86895 + }, + { + "epoch": 4.05, + "learning_rate": 6.540372768172057e-06, + "loss": 0.0521, + "step": 86900 + }, + { + "epoch": 4.06, + "learning_rate": 6.539588983117271e-06, + "loss": 0.0464, + "step": 86905 + }, + { + "epoch": 4.06, + "learning_rate": 6.5388051980624835e-06, + "loss": 0.2171, + "step": 86910 + }, + { + "epoch": 4.06, + "learning_rate": 6.5380214130076965e-06, + "loss": 0.1469, + "step": 86915 + }, + { + "epoch": 4.06, + "learning_rate": 6.5372376279529105e-06, + "loss": 0.1103, + "step": 86920 + }, + { + "epoch": 4.06, + "learning_rate": 6.5364538428981235e-06, + "loss": 0.2553, + "step": 86925 + }, + { + "epoch": 4.06, + "learning_rate": 6.535670057843337e-06, + "loss": 0.0803, + "step": 86930 + }, + { + "epoch": 4.06, + "learning_rate": 6.534886272788551e-06, + "loss": 0.0526, + "step": 86935 + }, + { + "epoch": 4.06, + "learning_rate": 6.534102487733764e-06, + "loss": 0.0252, + "step": 86940 + }, + { + "epoch": 4.06, + "learning_rate": 6.533318702678978e-06, + "loss": 0.0656, + "step": 86945 + }, + { + "epoch": 4.06, + "learning_rate": 6.532534917624191e-06, + "loss": 0.0772, + "step": 86950 + }, + { + "epoch": 4.06, + "learning_rate": 6.531751132569405e-06, + "loss": 0.0528, + "step": 86955 + }, + { + "epoch": 4.06, + "learning_rate": 6.530967347514618e-06, + "loss": 0.1471, + "step": 86960 + }, + { + "epoch": 4.06, + "learning_rate": 6.530183562459832e-06, + "loss": 0.0885, + "step": 86965 + }, + { + "epoch": 4.06, + "learning_rate": 6.529399777405045e-06, + "loss": 0.2131, + "step": 86970 + }, + { + "epoch": 4.06, + "learning_rate": 6.5286159923502575e-06, + "loss": 0.235, + "step": 86975 + }, + { + "epoch": 4.06, + "learning_rate": 6.527832207295471e-06, + "loss": 0.0936, + "step": 86980 + }, + { + "epoch": 4.06, + "learning_rate": 6.527048422240685e-06, + "loss": 0.0433, + "step": 86985 + }, + { + "epoch": 4.06, + "learning_rate": 6.526264637185898e-06, + "loss": 0.0312, + "step": 86990 + }, + { + "epoch": 4.06, + "learning_rate": 6.525480852131112e-06, + "loss": 0.0286, + "step": 86995 + }, + { + "epoch": 4.06, + "learning_rate": 6.524697067076325e-06, + "loss": 0.1035, + "step": 87000 + }, + { + "epoch": 4.06, + "learning_rate": 6.523913282021539e-06, + "loss": 0.1216, + "step": 87005 + }, + { + "epoch": 4.06, + "learning_rate": 6.523129496966752e-06, + "loss": 0.1191, + "step": 87010 + }, + { + "epoch": 4.06, + "learning_rate": 6.522345711911966e-06, + "loss": 0.1446, + "step": 87015 + }, + { + "epoch": 4.06, + "learning_rate": 6.521561926857179e-06, + "loss": 0.1196, + "step": 87020 + }, + { + "epoch": 4.06, + "learning_rate": 6.520778141802393e-06, + "loss": 0.1584, + "step": 87025 + }, + { + "epoch": 4.06, + "learning_rate": 6.519994356747606e-06, + "loss": 0.0627, + "step": 87030 + }, + { + "epoch": 4.06, + "learning_rate": 6.51921057169282e-06, + "loss": 0.0391, + "step": 87035 + }, + { + "epoch": 4.06, + "learning_rate": 6.518426786638032e-06, + "loss": 0.0231, + "step": 87040 + }, + { + "epoch": 4.06, + "learning_rate": 6.517643001583246e-06, + "loss": 0.0221, + "step": 87045 + }, + { + "epoch": 4.06, + "learning_rate": 6.516859216528459e-06, + "loss": 0.0765, + "step": 87050 + }, + { + "epoch": 4.06, + "learning_rate": 6.516075431473673e-06, + "loss": 0.0949, + "step": 87055 + }, + { + "epoch": 4.06, + "learning_rate": 6.515291646418886e-06, + "loss": 0.073, + "step": 87060 + }, + { + "epoch": 4.06, + "learning_rate": 6.5145078613641e-06, + "loss": 0.0697, + "step": 87065 + }, + { + "epoch": 4.06, + "learning_rate": 6.513724076309313e-06, + "loss": 0.181, + "step": 87070 + }, + { + "epoch": 4.06, + "learning_rate": 6.512940291254527e-06, + "loss": 0.4061, + "step": 87075 + }, + { + "epoch": 4.06, + "learning_rate": 6.51215650619974e-06, + "loss": 0.0987, + "step": 87080 + }, + { + "epoch": 4.06, + "learning_rate": 6.511372721144954e-06, + "loss": 0.0311, + "step": 87085 + }, + { + "epoch": 4.06, + "learning_rate": 6.510588936090167e-06, + "loss": 0.0548, + "step": 87090 + }, + { + "epoch": 4.06, + "learning_rate": 6.509805151035381e-06, + "loss": 0.0441, + "step": 87095 + }, + { + "epoch": 4.06, + "learning_rate": 6.509021365980594e-06, + "loss": 0.0354, + "step": 87100 + }, + { + "epoch": 4.06, + "learning_rate": 6.508237580925807e-06, + "loss": 0.056, + "step": 87105 + }, + { + "epoch": 4.06, + "learning_rate": 6.50745379587102e-06, + "loss": 0.1044, + "step": 87110 + }, + { + "epoch": 4.06, + "learning_rate": 6.506670010816234e-06, + "loss": 0.1385, + "step": 87115 + }, + { + "epoch": 4.07, + "learning_rate": 6.505886225761447e-06, + "loss": 0.1795, + "step": 87120 + }, + { + "epoch": 4.07, + "learning_rate": 6.505102440706661e-06, + "loss": 0.1908, + "step": 87125 + }, + { + "epoch": 4.07, + "learning_rate": 6.504318655651874e-06, + "loss": 0.0907, + "step": 87130 + }, + { + "epoch": 4.07, + "learning_rate": 6.503534870597088e-06, + "loss": 0.0217, + "step": 87135 + }, + { + "epoch": 4.07, + "learning_rate": 6.502751085542301e-06, + "loss": 0.0208, + "step": 87140 + }, + { + "epoch": 4.07, + "learning_rate": 6.501967300487515e-06, + "loss": 0.0597, + "step": 87145 + }, + { + "epoch": 4.07, + "learning_rate": 6.501183515432729e-06, + "loss": 0.0216, + "step": 87150 + }, + { + "epoch": 4.07, + "learning_rate": 6.500399730377942e-06, + "loss": 0.0424, + "step": 87155 + }, + { + "epoch": 4.07, + "learning_rate": 6.499615945323156e-06, + "loss": 0.1451, + "step": 87160 + }, + { + "epoch": 4.07, + "learning_rate": 6.498832160268369e-06, + "loss": 0.1819, + "step": 87165 + }, + { + "epoch": 4.07, + "learning_rate": 6.498048375213581e-06, + "loss": 0.1446, + "step": 87170 + }, + { + "epoch": 4.07, + "learning_rate": 6.497264590158795e-06, + "loss": 0.3621, + "step": 87175 + }, + { + "epoch": 4.07, + "learning_rate": 6.496480805104008e-06, + "loss": 0.0368, + "step": 87180 + }, + { + "epoch": 4.07, + "learning_rate": 6.495697020049222e-06, + "loss": 0.0267, + "step": 87185 + }, + { + "epoch": 4.07, + "learning_rate": 6.494913234994435e-06, + "loss": 0.0279, + "step": 87190 + }, + { + "epoch": 4.07, + "learning_rate": 6.494129449939649e-06, + "loss": 0.0602, + "step": 87195 + }, + { + "epoch": 4.07, + "learning_rate": 6.493345664884863e-06, + "loss": 0.0937, + "step": 87200 + }, + { + "epoch": 4.07, + "learning_rate": 6.492561879830076e-06, + "loss": 0.0745, + "step": 87205 + }, + { + "epoch": 4.07, + "learning_rate": 6.49177809477529e-06, + "loss": 0.0735, + "step": 87210 + }, + { + "epoch": 4.07, + "learning_rate": 6.490994309720503e-06, + "loss": 0.1595, + "step": 87215 + }, + { + "epoch": 4.07, + "learning_rate": 6.490210524665717e-06, + "loss": 0.2627, + "step": 87220 + }, + { + "epoch": 4.07, + "learning_rate": 6.48942673961093e-06, + "loss": 0.2398, + "step": 87225 + }, + { + "epoch": 4.07, + "learning_rate": 6.488642954556144e-06, + "loss": 0.0896, + "step": 87230 + }, + { + "epoch": 4.07, + "learning_rate": 6.487859169501356e-06, + "loss": 0.0176, + "step": 87235 + }, + { + "epoch": 4.07, + "learning_rate": 6.487075384446569e-06, + "loss": 0.0297, + "step": 87240 + }, + { + "epoch": 4.07, + "learning_rate": 6.486291599391783e-06, + "loss": 0.0487, + "step": 87245 + }, + { + "epoch": 4.07, + "learning_rate": 6.485507814336997e-06, + "loss": 0.045, + "step": 87250 + }, + { + "epoch": 4.07, + "learning_rate": 6.48472402928221e-06, + "loss": 0.0454, + "step": 87255 + }, + { + "epoch": 4.07, + "learning_rate": 6.483940244227424e-06, + "loss": 0.1093, + "step": 87260 + }, + { + "epoch": 4.07, + "learning_rate": 6.483156459172637e-06, + "loss": 0.1986, + "step": 87265 + }, + { + "epoch": 4.07, + "learning_rate": 6.482372674117851e-06, + "loss": 0.1638, + "step": 87270 + }, + { + "epoch": 4.07, + "learning_rate": 6.481588889063064e-06, + "loss": 0.2735, + "step": 87275 + }, + { + "epoch": 4.07, + "learning_rate": 6.480805104008278e-06, + "loss": 0.0861, + "step": 87280 + }, + { + "epoch": 4.07, + "learning_rate": 6.480021318953491e-06, + "loss": 0.0302, + "step": 87285 + }, + { + "epoch": 4.07, + "learning_rate": 6.479237533898705e-06, + "loss": 0.0729, + "step": 87290 + }, + { + "epoch": 4.07, + "learning_rate": 6.478453748843918e-06, + "loss": 0.0527, + "step": 87295 + }, + { + "epoch": 4.07, + "learning_rate": 6.477669963789131e-06, + "loss": 0.058, + "step": 87300 + }, + { + "epoch": 4.07, + "learning_rate": 6.476886178734344e-06, + "loss": 0.0528, + "step": 87305 + }, + { + "epoch": 4.07, + "learning_rate": 6.476102393679558e-06, + "loss": 0.0929, + "step": 87310 + }, + { + "epoch": 4.07, + "learning_rate": 6.475318608624771e-06, + "loss": 0.0393, + "step": 87315 + }, + { + "epoch": 4.07, + "learning_rate": 6.474534823569985e-06, + "loss": 0.1798, + "step": 87320 + }, + { + "epoch": 4.07, + "learning_rate": 6.473751038515198e-06, + "loss": 0.2319, + "step": 87325 + }, + { + "epoch": 4.07, + "learning_rate": 6.472967253460412e-06, + "loss": 0.0834, + "step": 87330 + }, + { + "epoch": 4.08, + "learning_rate": 6.472183468405625e-06, + "loss": 0.0408, + "step": 87335 + }, + { + "epoch": 4.08, + "learning_rate": 6.471399683350839e-06, + "loss": 0.0341, + "step": 87340 + }, + { + "epoch": 4.08, + "learning_rate": 6.470615898296052e-06, + "loss": 0.0598, + "step": 87345 + }, + { + "epoch": 4.08, + "learning_rate": 6.469832113241266e-06, + "loss": 0.133, + "step": 87350 + }, + { + "epoch": 4.08, + "learning_rate": 6.469048328186479e-06, + "loss": 0.0457, + "step": 87355 + }, + { + "epoch": 4.08, + "learning_rate": 6.468264543131693e-06, + "loss": 0.1221, + "step": 87360 + }, + { + "epoch": 4.08, + "learning_rate": 6.467480758076905e-06, + "loss": 0.1247, + "step": 87365 + }, + { + "epoch": 4.08, + "learning_rate": 6.466696973022119e-06, + "loss": 0.1616, + "step": 87370 + }, + { + "epoch": 4.08, + "learning_rate": 6.465913187967332e-06, + "loss": 0.3098, + "step": 87375 + }, + { + "epoch": 4.08, + "learning_rate": 6.465129402912546e-06, + "loss": 0.1429, + "step": 87380 + }, + { + "epoch": 4.08, + "learning_rate": 6.464345617857759e-06, + "loss": 0.0121, + "step": 87385 + }, + { + "epoch": 4.08, + "learning_rate": 6.463561832802973e-06, + "loss": 0.0204, + "step": 87390 + }, + { + "epoch": 4.08, + "learning_rate": 6.462778047748186e-06, + "loss": 0.0238, + "step": 87395 + }, + { + "epoch": 4.08, + "learning_rate": 6.4619942626934e-06, + "loss": 0.0535, + "step": 87400 + }, + { + "epoch": 4.08, + "learning_rate": 6.461210477638613e-06, + "loss": 0.1136, + "step": 87405 + }, + { + "epoch": 4.08, + "learning_rate": 6.460426692583827e-06, + "loss": 0.0846, + "step": 87410 + }, + { + "epoch": 4.08, + "learning_rate": 6.45964290752904e-06, + "loss": 0.0988, + "step": 87415 + }, + { + "epoch": 4.08, + "learning_rate": 6.458859122474254e-06, + "loss": 0.2105, + "step": 87420 + }, + { + "epoch": 4.08, + "learning_rate": 6.4580753374194675e-06, + "loss": 0.3813, + "step": 87425 + }, + { + "epoch": 4.08, + "learning_rate": 6.45729155236468e-06, + "loss": 0.0747, + "step": 87430 + }, + { + "epoch": 4.08, + "learning_rate": 6.456507767309893e-06, + "loss": 0.0165, + "step": 87435 + }, + { + "epoch": 4.08, + "learning_rate": 6.455723982255107e-06, + "loss": 0.0097, + "step": 87440 + }, + { + "epoch": 4.08, + "learning_rate": 6.45494019720032e-06, + "loss": 0.0734, + "step": 87445 + }, + { + "epoch": 4.08, + "learning_rate": 6.454156412145534e-06, + "loss": 0.0889, + "step": 87450 + }, + { + "epoch": 4.08, + "learning_rate": 6.453372627090747e-06, + "loss": 0.0418, + "step": 87455 + }, + { + "epoch": 4.08, + "learning_rate": 6.452588842035961e-06, + "loss": 0.0242, + "step": 87460 + }, + { + "epoch": 4.08, + "learning_rate": 6.4518050569811745e-06, + "loss": 0.1215, + "step": 87465 + }, + { + "epoch": 4.08, + "learning_rate": 6.4510212719263876e-06, + "loss": 0.1715, + "step": 87470 + }, + { + "epoch": 4.08, + "learning_rate": 6.4502374868716015e-06, + "loss": 0.2123, + "step": 87475 + }, + { + "epoch": 4.08, + "learning_rate": 6.4494537018168145e-06, + "loss": 0.0803, + "step": 87480 + }, + { + "epoch": 4.08, + "learning_rate": 6.4486699167620284e-06, + "loss": 0.0692, + "step": 87485 + }, + { + "epoch": 4.08, + "learning_rate": 6.4478861317072415e-06, + "loss": 0.0068, + "step": 87490 + }, + { + "epoch": 4.08, + "learning_rate": 6.447102346652454e-06, + "loss": 0.039, + "step": 87495 + }, + { + "epoch": 4.08, + "learning_rate": 6.446318561597668e-06, + "loss": 0.0553, + "step": 87500 + }, + { + "epoch": 4.08, + "learning_rate": 6.445534776542881e-06, + "loss": 0.1083, + "step": 87505 + }, + { + "epoch": 4.08, + "learning_rate": 6.444750991488095e-06, + "loss": 0.1061, + "step": 87510 + }, + { + "epoch": 4.08, + "learning_rate": 6.4439672064333085e-06, + "loss": 0.0711, + "step": 87515 + }, + { + "epoch": 4.08, + "learning_rate": 6.4431834213785215e-06, + "loss": 0.118, + "step": 87520 + }, + { + "epoch": 4.08, + "learning_rate": 6.4423996363237355e-06, + "loss": 0.1431, + "step": 87525 + }, + { + "epoch": 4.08, + "learning_rate": 6.4416158512689485e-06, + "loss": 0.1181, + "step": 87530 + }, + { + "epoch": 4.08, + "learning_rate": 6.440832066214162e-06, + "loss": 0.013, + "step": 87535 + }, + { + "epoch": 4.08, + "learning_rate": 6.4400482811593755e-06, + "loss": 0.0418, + "step": 87540 + }, + { + "epoch": 4.08, + "learning_rate": 6.439264496104589e-06, + "loss": 0.0703, + "step": 87545 + }, + { + "epoch": 4.09, + "learning_rate": 6.4384807110498024e-06, + "loss": 0.088, + "step": 87550 + }, + { + "epoch": 4.09, + "learning_rate": 6.437696925995016e-06, + "loss": 0.0454, + "step": 87555 + }, + { + "epoch": 4.09, + "learning_rate": 6.4369131409402286e-06, + "loss": 0.0776, + "step": 87560 + }, + { + "epoch": 4.09, + "learning_rate": 6.4361293558854425e-06, + "loss": 0.076, + "step": 87565 + }, + { + "epoch": 4.09, + "learning_rate": 6.4353455708306555e-06, + "loss": 0.1337, + "step": 87570 + }, + { + "epoch": 4.09, + "learning_rate": 6.4345617857758694e-06, + "loss": 0.1474, + "step": 87575 + }, + { + "epoch": 4.09, + "learning_rate": 6.4337780007210825e-06, + "loss": 0.0905, + "step": 87580 + }, + { + "epoch": 4.09, + "learning_rate": 6.432994215666296e-06, + "loss": 0.0118, + "step": 87585 + }, + { + "epoch": 4.09, + "learning_rate": 6.4322104306115094e-06, + "loss": 0.1066, + "step": 87590 + }, + { + "epoch": 4.09, + "learning_rate": 6.431426645556723e-06, + "loss": 0.0728, + "step": 87595 + }, + { + "epoch": 4.09, + "learning_rate": 6.430642860501936e-06, + "loss": 0.0312, + "step": 87600 + }, + { + "epoch": 4.09, + "learning_rate": 6.42985907544715e-06, + "loss": 0.0588, + "step": 87605 + }, + { + "epoch": 4.09, + "learning_rate": 6.429075290392363e-06, + "loss": 0.1211, + "step": 87610 + }, + { + "epoch": 4.09, + "learning_rate": 6.428291505337577e-06, + "loss": 0.1271, + "step": 87615 + }, + { + "epoch": 4.09, + "learning_rate": 6.42750772028279e-06, + "loss": 0.1344, + "step": 87620 + }, + { + "epoch": 4.09, + "learning_rate": 6.426723935228003e-06, + "loss": 0.3591, + "step": 87625 + }, + { + "epoch": 4.09, + "learning_rate": 6.4259401501732165e-06, + "loss": 0.0625, + "step": 87630 + }, + { + "epoch": 4.09, + "learning_rate": 6.42515636511843e-06, + "loss": 0.0176, + "step": 87635 + }, + { + "epoch": 4.09, + "learning_rate": 6.4243725800636434e-06, + "loss": 0.0641, + "step": 87640 + }, + { + "epoch": 4.09, + "learning_rate": 6.423588795008857e-06, + "loss": 0.0652, + "step": 87645 + }, + { + "epoch": 4.09, + "learning_rate": 6.42280500995407e-06, + "loss": 0.0472, + "step": 87650 + }, + { + "epoch": 4.09, + "learning_rate": 6.422021224899284e-06, + "loss": 0.1405, + "step": 87655 + }, + { + "epoch": 4.09, + "learning_rate": 6.421237439844497e-06, + "loss": 0.1497, + "step": 87660 + }, + { + "epoch": 4.09, + "learning_rate": 6.420453654789711e-06, + "loss": 0.0665, + "step": 87665 + }, + { + "epoch": 4.09, + "learning_rate": 6.419669869734924e-06, + "loss": 0.1844, + "step": 87670 + }, + { + "epoch": 4.09, + "learning_rate": 6.418886084680138e-06, + "loss": 0.2518, + "step": 87675 + }, + { + "epoch": 4.09, + "learning_rate": 6.418102299625351e-06, + "loss": 0.1124, + "step": 87680 + }, + { + "epoch": 4.09, + "learning_rate": 6.417318514570565e-06, + "loss": 0.0288, + "step": 87685 + }, + { + "epoch": 4.09, + "learning_rate": 6.416534729515777e-06, + "loss": 0.0627, + "step": 87690 + }, + { + "epoch": 4.09, + "learning_rate": 6.415750944460991e-06, + "loss": 0.049, + "step": 87695 + }, + { + "epoch": 4.09, + "learning_rate": 6.414967159406204e-06, + "loss": 0.0789, + "step": 87700 + }, + { + "epoch": 4.09, + "learning_rate": 6.414183374351418e-06, + "loss": 0.1132, + "step": 87705 + }, + { + "epoch": 4.09, + "learning_rate": 6.413399589296631e-06, + "loss": 0.081, + "step": 87710 + }, + { + "epoch": 4.09, + "learning_rate": 6.412615804241845e-06, + "loss": 0.1379, + "step": 87715 + }, + { + "epoch": 4.09, + "learning_rate": 6.411832019187058e-06, + "loss": 0.1553, + "step": 87720 + }, + { + "epoch": 4.09, + "learning_rate": 6.411048234132272e-06, + "loss": 0.1935, + "step": 87725 + }, + { + "epoch": 4.09, + "learning_rate": 6.410264449077486e-06, + "loss": 0.0936, + "step": 87730 + }, + { + "epoch": 4.09, + "learning_rate": 6.409480664022699e-06, + "loss": 0.0153, + "step": 87735 + }, + { + "epoch": 4.09, + "learning_rate": 6.408696878967913e-06, + "loss": 0.0296, + "step": 87740 + }, + { + "epoch": 4.09, + "learning_rate": 6.407913093913126e-06, + "loss": 0.0789, + "step": 87745 + }, + { + "epoch": 4.09, + "learning_rate": 6.40712930885834e-06, + "loss": 0.0188, + "step": 87750 + }, + { + "epoch": 4.09, + "learning_rate": 6.406345523803552e-06, + "loss": 0.1239, + "step": 87755 + }, + { + "epoch": 4.1, + "learning_rate": 6.405561738748765e-06, + "loss": 0.098, + "step": 87760 + }, + { + "epoch": 4.1, + "learning_rate": 6.404777953693979e-06, + "loss": 0.118, + "step": 87765 + }, + { + "epoch": 4.1, + "learning_rate": 6.403994168639192e-06, + "loss": 0.1554, + "step": 87770 + }, + { + "epoch": 4.1, + "learning_rate": 6.403210383584406e-06, + "loss": 0.1592, + "step": 87775 + }, + { + "epoch": 4.1, + "learning_rate": 6.40242659852962e-06, + "loss": 0.0485, + "step": 87780 + }, + { + "epoch": 4.1, + "learning_rate": 6.401642813474833e-06, + "loss": 0.0476, + "step": 87785 + }, + { + "epoch": 4.1, + "learning_rate": 6.400859028420047e-06, + "loss": 0.0409, + "step": 87790 + }, + { + "epoch": 4.1, + "learning_rate": 6.40007524336526e-06, + "loss": 0.0595, + "step": 87795 + }, + { + "epoch": 4.1, + "learning_rate": 6.399291458310474e-06, + "loss": 0.0683, + "step": 87800 + }, + { + "epoch": 4.1, + "learning_rate": 6.398507673255687e-06, + "loss": 0.1037, + "step": 87805 + }, + { + "epoch": 4.1, + "learning_rate": 6.397723888200901e-06, + "loss": 0.0907, + "step": 87810 + }, + { + "epoch": 4.1, + "learning_rate": 6.396940103146114e-06, + "loss": 0.0964, + "step": 87815 + }, + { + "epoch": 4.1, + "learning_rate": 6.396156318091326e-06, + "loss": 0.2069, + "step": 87820 + }, + { + "epoch": 4.1, + "learning_rate": 6.39537253303654e-06, + "loss": 0.2369, + "step": 87825 + }, + { + "epoch": 4.1, + "learning_rate": 6.394588747981754e-06, + "loss": 0.074, + "step": 87830 + }, + { + "epoch": 4.1, + "learning_rate": 6.393804962926967e-06, + "loss": 0.0256, + "step": 87835 + }, + { + "epoch": 4.1, + "learning_rate": 6.393021177872181e-06, + "loss": 0.0736, + "step": 87840 + }, + { + "epoch": 4.1, + "learning_rate": 6.392237392817394e-06, + "loss": 0.0449, + "step": 87845 + }, + { + "epoch": 4.1, + "learning_rate": 6.391453607762608e-06, + "loss": 0.0345, + "step": 87850 + }, + { + "epoch": 4.1, + "learning_rate": 6.390669822707821e-06, + "loss": 0.0579, + "step": 87855 + }, + { + "epoch": 4.1, + "learning_rate": 6.389886037653035e-06, + "loss": 0.0877, + "step": 87860 + }, + { + "epoch": 4.1, + "learning_rate": 6.389102252598248e-06, + "loss": 0.1333, + "step": 87865 + }, + { + "epoch": 4.1, + "learning_rate": 6.388318467543462e-06, + "loss": 0.1365, + "step": 87870 + }, + { + "epoch": 4.1, + "learning_rate": 6.387534682488675e-06, + "loss": 0.5326, + "step": 87875 + }, + { + "epoch": 4.1, + "learning_rate": 6.386750897433889e-06, + "loss": 0.0746, + "step": 87880 + }, + { + "epoch": 4.1, + "learning_rate": 6.385967112379101e-06, + "loss": 0.0096, + "step": 87885 + }, + { + "epoch": 4.1, + "learning_rate": 6.385183327324315e-06, + "loss": 0.0508, + "step": 87890 + }, + { + "epoch": 4.1, + "learning_rate": 6.384399542269528e-06, + "loss": 0.0339, + "step": 87895 + }, + { + "epoch": 4.1, + "learning_rate": 6.383615757214742e-06, + "loss": 0.0654, + "step": 87900 + }, + { + "epoch": 4.1, + "learning_rate": 6.382831972159955e-06, + "loss": 0.0918, + "step": 87905 + }, + { + "epoch": 4.1, + "learning_rate": 6.382048187105169e-06, + "loss": 0.0296, + "step": 87910 + }, + { + "epoch": 4.1, + "learning_rate": 6.381264402050382e-06, + "loss": 0.1635, + "step": 87915 + }, + { + "epoch": 4.1, + "learning_rate": 6.380480616995596e-06, + "loss": 0.2078, + "step": 87920 + }, + { + "epoch": 4.1, + "learning_rate": 6.379696831940809e-06, + "loss": 0.444, + "step": 87925 + }, + { + "epoch": 4.1, + "learning_rate": 6.378913046886023e-06, + "loss": 0.1321, + "step": 87930 + }, + { + "epoch": 4.1, + "learning_rate": 6.378129261831236e-06, + "loss": 0.0388, + "step": 87935 + }, + { + "epoch": 4.1, + "learning_rate": 6.37734547677645e-06, + "loss": 0.025, + "step": 87940 + }, + { + "epoch": 4.1, + "learning_rate": 6.376561691721663e-06, + "loss": 0.0972, + "step": 87945 + }, + { + "epoch": 4.1, + "learning_rate": 6.375777906666876e-06, + "loss": 0.0553, + "step": 87950 + }, + { + "epoch": 4.1, + "learning_rate": 6.374994121612089e-06, + "loss": 0.0721, + "step": 87955 + }, + { + "epoch": 4.1, + "learning_rate": 6.374210336557303e-06, + "loss": 0.0768, + "step": 87960 + }, + { + "epoch": 4.1, + "learning_rate": 6.373426551502516e-06, + "loss": 0.1282, + "step": 87965 + }, + { + "epoch": 4.1, + "learning_rate": 6.37264276644773e-06, + "loss": 0.2805, + "step": 87970 + }, + { + "epoch": 4.11, + "learning_rate": 6.371858981392943e-06, + "loss": 0.3247, + "step": 87975 + }, + { + "epoch": 4.11, + "learning_rate": 6.371075196338157e-06, + "loss": 0.0896, + "step": 87980 + }, + { + "epoch": 4.11, + "learning_rate": 6.37029141128337e-06, + "loss": 0.0147, + "step": 87985 + }, + { + "epoch": 4.11, + "learning_rate": 6.369507626228584e-06, + "loss": 0.0398, + "step": 87990 + }, + { + "epoch": 4.11, + "learning_rate": 6.368723841173797e-06, + "loss": 0.0385, + "step": 87995 + }, + { + "epoch": 4.11, + "learning_rate": 6.367940056119011e-06, + "loss": 0.0444, + "step": 88000 + }, + { + "epoch": 4.11, + "learning_rate": 6.367156271064225e-06, + "loss": 0.0426, + "step": 88005 + }, + { + "epoch": 4.11, + "learning_rate": 6.366372486009438e-06, + "loss": 0.0861, + "step": 88010 + }, + { + "epoch": 4.11, + "learning_rate": 6.36558870095465e-06, + "loss": 0.1315, + "step": 88015 + }, + { + "epoch": 4.11, + "learning_rate": 6.364804915899864e-06, + "loss": 0.1505, + "step": 88020 + }, + { + "epoch": 4.11, + "learning_rate": 6.364021130845077e-06, + "loss": 0.2346, + "step": 88025 + }, + { + "epoch": 4.11, + "learning_rate": 6.363237345790291e-06, + "loss": 0.0561, + "step": 88030 + }, + { + "epoch": 4.11, + "learning_rate": 6.362453560735504e-06, + "loss": 0.0582, + "step": 88035 + }, + { + "epoch": 4.11, + "learning_rate": 6.361669775680718e-06, + "loss": 0.0745, + "step": 88040 + }, + { + "epoch": 4.11, + "learning_rate": 6.360885990625932e-06, + "loss": 0.0559, + "step": 88045 + }, + { + "epoch": 4.11, + "learning_rate": 6.360102205571145e-06, + "loss": 0.0377, + "step": 88050 + }, + { + "epoch": 4.11, + "learning_rate": 6.359318420516359e-06, + "loss": 0.0607, + "step": 88055 + }, + { + "epoch": 4.11, + "learning_rate": 6.358534635461572e-06, + "loss": 0.1128, + "step": 88060 + }, + { + "epoch": 4.11, + "learning_rate": 6.357750850406786e-06, + "loss": 0.1015, + "step": 88065 + }, + { + "epoch": 4.11, + "learning_rate": 6.356967065351999e-06, + "loss": 0.1739, + "step": 88070 + }, + { + "epoch": 4.11, + "learning_rate": 6.3561832802972126e-06, + "loss": 0.1732, + "step": 88075 + }, + { + "epoch": 4.11, + "learning_rate": 6.355399495242425e-06, + "loss": 0.0648, + "step": 88080 + }, + { + "epoch": 4.11, + "learning_rate": 6.354615710187638e-06, + "loss": 0.0636, + "step": 88085 + }, + { + "epoch": 4.11, + "learning_rate": 6.353831925132852e-06, + "loss": 0.0788, + "step": 88090 + }, + { + "epoch": 4.11, + "learning_rate": 6.353048140078066e-06, + "loss": 0.055, + "step": 88095 + }, + { + "epoch": 4.11, + "learning_rate": 6.352264355023279e-06, + "loss": 0.0447, + "step": 88100 + }, + { + "epoch": 4.11, + "learning_rate": 6.351480569968493e-06, + "loss": 0.082, + "step": 88105 + }, + { + "epoch": 4.11, + "learning_rate": 6.350696784913706e-06, + "loss": 0.116, + "step": 88110 + }, + { + "epoch": 4.11, + "learning_rate": 6.34991299985892e-06, + "loss": 0.1232, + "step": 88115 + }, + { + "epoch": 4.11, + "learning_rate": 6.349129214804133e-06, + "loss": 0.0898, + "step": 88120 + }, + { + "epoch": 4.11, + "learning_rate": 6.3483454297493465e-06, + "loss": 0.3035, + "step": 88125 + }, + { + "epoch": 4.11, + "learning_rate": 6.34756164469456e-06, + "loss": 0.0733, + "step": 88130 + }, + { + "epoch": 4.11, + "learning_rate": 6.3467778596397735e-06, + "loss": 0.027, + "step": 88135 + }, + { + "epoch": 4.11, + "learning_rate": 6.3459940745849866e-06, + "loss": 0.0534, + "step": 88140 + }, + { + "epoch": 4.11, + "learning_rate": 6.3452102895302e-06, + "loss": 0.0438, + "step": 88145 + }, + { + "epoch": 4.11, + "learning_rate": 6.344426504475413e-06, + "loss": 0.035, + "step": 88150 + }, + { + "epoch": 4.11, + "learning_rate": 6.343642719420627e-06, + "loss": 0.075, + "step": 88155 + }, + { + "epoch": 4.11, + "learning_rate": 6.34285893436584e-06, + "loss": 0.1387, + "step": 88160 + }, + { + "epoch": 4.11, + "learning_rate": 6.3420751493110536e-06, + "loss": 0.0755, + "step": 88165 + }, + { + "epoch": 4.11, + "learning_rate": 6.341291364256267e-06, + "loss": 0.2312, + "step": 88170 + }, + { + "epoch": 4.11, + "learning_rate": 6.3405075792014805e-06, + "loss": 0.1952, + "step": 88175 + }, + { + "epoch": 4.11, + "learning_rate": 6.339723794146694e-06, + "loss": 0.0571, + "step": 88180 + }, + { + "epoch": 4.11, + "learning_rate": 6.3389400090919075e-06, + "loss": 0.0349, + "step": 88185 + }, + { + "epoch": 4.12, + "learning_rate": 6.3381562240371205e-06, + "loss": 0.0373, + "step": 88190 + }, + { + "epoch": 4.12, + "learning_rate": 6.3373724389823344e-06, + "loss": 0.0718, + "step": 88195 + }, + { + "epoch": 4.12, + "learning_rate": 6.3365886539275475e-06, + "loss": 0.0581, + "step": 88200 + }, + { + "epoch": 4.12, + "learning_rate": 6.335804868872761e-06, + "loss": 0.0827, + "step": 88205 + }, + { + "epoch": 4.12, + "learning_rate": 6.335021083817974e-06, + "loss": 0.0851, + "step": 88210 + }, + { + "epoch": 4.12, + "learning_rate": 6.3342372987631875e-06, + "loss": 0.1056, + "step": 88215 + }, + { + "epoch": 4.12, + "learning_rate": 6.333453513708401e-06, + "loss": 0.0726, + "step": 88220 + }, + { + "epoch": 4.12, + "learning_rate": 6.3326697286536145e-06, + "loss": 0.211, + "step": 88225 + }, + { + "epoch": 4.12, + "learning_rate": 6.3318859435988276e-06, + "loss": 0.1066, + "step": 88230 + }, + { + "epoch": 4.12, + "learning_rate": 6.3311021585440415e-06, + "loss": 0.0176, + "step": 88235 + }, + { + "epoch": 4.12, + "learning_rate": 6.3303183734892545e-06, + "loss": 0.0497, + "step": 88240 + }, + { + "epoch": 4.12, + "learning_rate": 6.3295345884344684e-06, + "loss": 0.0497, + "step": 88245 + }, + { + "epoch": 4.12, + "learning_rate": 6.3287508033796815e-06, + "loss": 0.0895, + "step": 88250 + }, + { + "epoch": 4.12, + "learning_rate": 6.327967018324895e-06, + "loss": 0.0476, + "step": 88255 + }, + { + "epoch": 4.12, + "learning_rate": 6.3271832332701084e-06, + "loss": 0.028, + "step": 88260 + }, + { + "epoch": 4.12, + "learning_rate": 6.326399448215322e-06, + "loss": 0.1155, + "step": 88265 + }, + { + "epoch": 4.12, + "learning_rate": 6.325615663160536e-06, + "loss": 0.1764, + "step": 88270 + }, + { + "epoch": 4.12, + "learning_rate": 6.3248318781057485e-06, + "loss": 0.2684, + "step": 88275 + }, + { + "epoch": 4.12, + "learning_rate": 6.3240480930509615e-06, + "loss": 0.1124, + "step": 88280 + }, + { + "epoch": 4.12, + "learning_rate": 6.3232643079961754e-06, + "loss": 0.0361, + "step": 88285 + }, + { + "epoch": 4.12, + "learning_rate": 6.3224805229413885e-06, + "loss": 0.0578, + "step": 88290 + }, + { + "epoch": 4.12, + "learning_rate": 6.321696737886602e-06, + "loss": 0.0501, + "step": 88295 + }, + { + "epoch": 4.12, + "learning_rate": 6.3209129528318155e-06, + "loss": 0.0494, + "step": 88300 + }, + { + "epoch": 4.12, + "learning_rate": 6.320129167777029e-06, + "loss": 0.0669, + "step": 88305 + }, + { + "epoch": 4.12, + "learning_rate": 6.319345382722243e-06, + "loss": 0.0575, + "step": 88310 + }, + { + "epoch": 4.12, + "learning_rate": 6.318561597667456e-06, + "loss": 0.2526, + "step": 88315 + }, + { + "epoch": 4.12, + "learning_rate": 6.31777781261267e-06, + "loss": 0.1299, + "step": 88320 + }, + { + "epoch": 4.12, + "learning_rate": 6.316994027557883e-06, + "loss": 0.3254, + "step": 88325 + }, + { + "epoch": 4.12, + "learning_rate": 6.316210242503097e-06, + "loss": 0.1457, + "step": 88330 + }, + { + "epoch": 4.12, + "learning_rate": 6.31542645744831e-06, + "loss": 0.0232, + "step": 88335 + }, + { + "epoch": 4.12, + "learning_rate": 6.3146426723935225e-06, + "loss": 0.0495, + "step": 88340 + }, + { + "epoch": 4.12, + "learning_rate": 6.313858887338736e-06, + "loss": 0.0355, + "step": 88345 + }, + { + "epoch": 4.12, + "learning_rate": 6.3130751022839494e-06, + "loss": 0.1044, + "step": 88350 + }, + { + "epoch": 4.12, + "learning_rate": 6.312291317229163e-06, + "loss": 0.0989, + "step": 88355 + }, + { + "epoch": 4.12, + "learning_rate": 6.311507532174377e-06, + "loss": 0.0778, + "step": 88360 + }, + { + "epoch": 4.12, + "learning_rate": 6.31072374711959e-06, + "loss": 0.0731, + "step": 88365 + }, + { + "epoch": 4.12, + "learning_rate": 6.309939962064804e-06, + "loss": 0.1624, + "step": 88370 + }, + { + "epoch": 4.12, + "learning_rate": 6.309156177010017e-06, + "loss": 0.214, + "step": 88375 + }, + { + "epoch": 4.12, + "learning_rate": 6.308372391955231e-06, + "loss": 0.103, + "step": 88380 + }, + { + "epoch": 4.12, + "learning_rate": 6.307588606900444e-06, + "loss": 0.0078, + "step": 88385 + }, + { + "epoch": 4.12, + "learning_rate": 6.306804821845658e-06, + "loss": 0.0153, + "step": 88390 + }, + { + "epoch": 4.12, + "learning_rate": 6.306021036790871e-06, + "loss": 0.0967, + "step": 88395 + }, + { + "epoch": 4.12, + "learning_rate": 6.305237251736085e-06, + "loss": 0.0975, + "step": 88400 + }, + { + "epoch": 4.13, + "learning_rate": 6.304453466681297e-06, + "loss": 0.1028, + "step": 88405 + }, + { + "epoch": 4.13, + "learning_rate": 6.303669681626511e-06, + "loss": 0.0814, + "step": 88410 + }, + { + "epoch": 4.13, + "learning_rate": 6.302885896571724e-06, + "loss": 0.1753, + "step": 88415 + }, + { + "epoch": 4.13, + "learning_rate": 6.302102111516938e-06, + "loss": 0.1501, + "step": 88420 + }, + { + "epoch": 4.13, + "learning_rate": 6.301318326462151e-06, + "loss": 0.2274, + "step": 88425 + }, + { + "epoch": 4.13, + "learning_rate": 6.300534541407365e-06, + "loss": 0.0729, + "step": 88430 + }, + { + "epoch": 4.13, + "learning_rate": 6.299750756352578e-06, + "loss": 0.0138, + "step": 88435 + }, + { + "epoch": 4.13, + "learning_rate": 6.298966971297792e-06, + "loss": 0.0052, + "step": 88440 + }, + { + "epoch": 4.13, + "learning_rate": 6.298183186243005e-06, + "loss": 0.0546, + "step": 88445 + }, + { + "epoch": 4.13, + "learning_rate": 6.297399401188219e-06, + "loss": 0.063, + "step": 88450 + }, + { + "epoch": 4.13, + "learning_rate": 6.296615616133432e-06, + "loss": 0.0907, + "step": 88455 + }, + { + "epoch": 4.13, + "learning_rate": 6.295831831078646e-06, + "loss": 0.0612, + "step": 88460 + }, + { + "epoch": 4.13, + "learning_rate": 6.295048046023859e-06, + "loss": 0.1018, + "step": 88465 + }, + { + "epoch": 4.13, + "learning_rate": 6.294264260969072e-06, + "loss": 0.2148, + "step": 88470 + }, + { + "epoch": 4.13, + "learning_rate": 6.293480475914285e-06, + "loss": 0.4048, + "step": 88475 + }, + { + "epoch": 4.13, + "learning_rate": 6.292696690859499e-06, + "loss": 0.1049, + "step": 88480 + }, + { + "epoch": 4.13, + "learning_rate": 6.291912905804712e-06, + "loss": 0.0336, + "step": 88485 + }, + { + "epoch": 4.13, + "learning_rate": 6.291129120749926e-06, + "loss": 0.0259, + "step": 88490 + }, + { + "epoch": 4.13, + "learning_rate": 6.290345335695139e-06, + "loss": 0.0528, + "step": 88495 + }, + { + "epoch": 4.13, + "learning_rate": 6.289561550640353e-06, + "loss": 0.0865, + "step": 88500 + }, + { + "epoch": 4.13, + "learning_rate": 6.288777765585566e-06, + "loss": 0.1226, + "step": 88505 + }, + { + "epoch": 4.13, + "learning_rate": 6.28799398053078e-06, + "loss": 0.0952, + "step": 88510 + }, + { + "epoch": 4.13, + "learning_rate": 6.287210195475993e-06, + "loss": 0.2186, + "step": 88515 + }, + { + "epoch": 4.13, + "learning_rate": 6.286426410421207e-06, + "loss": 0.2144, + "step": 88520 + }, + { + "epoch": 4.13, + "learning_rate": 6.28564262536642e-06, + "loss": 0.3712, + "step": 88525 + }, + { + "epoch": 4.13, + "learning_rate": 6.284858840311634e-06, + "loss": 0.1564, + "step": 88530 + }, + { + "epoch": 4.13, + "learning_rate": 6.284075055256846e-06, + "loss": 0.065, + "step": 88535 + }, + { + "epoch": 4.13, + "learning_rate": 6.28329127020206e-06, + "loss": 0.0444, + "step": 88540 + }, + { + "epoch": 4.13, + "learning_rate": 6.282507485147273e-06, + "loss": 0.0389, + "step": 88545 + }, + { + "epoch": 4.13, + "learning_rate": 6.281723700092487e-06, + "loss": 0.0365, + "step": 88550 + }, + { + "epoch": 4.13, + "learning_rate": 6.2809399150377e-06, + "loss": 0.1061, + "step": 88555 + }, + { + "epoch": 4.13, + "learning_rate": 6.280156129982914e-06, + "loss": 0.1723, + "step": 88560 + }, + { + "epoch": 4.13, + "learning_rate": 6.279372344928127e-06, + "loss": 0.1295, + "step": 88565 + }, + { + "epoch": 4.13, + "learning_rate": 6.278588559873341e-06, + "loss": 0.2212, + "step": 88570 + }, + { + "epoch": 4.13, + "learning_rate": 6.277804774818554e-06, + "loss": 0.2194, + "step": 88575 + }, + { + "epoch": 4.13, + "learning_rate": 6.277020989763768e-06, + "loss": 0.0349, + "step": 88580 + }, + { + "epoch": 4.13, + "learning_rate": 6.276237204708982e-06, + "loss": 0.0238, + "step": 88585 + }, + { + "epoch": 4.13, + "learning_rate": 6.275453419654195e-06, + "loss": 0.0562, + "step": 88590 + }, + { + "epoch": 4.13, + "learning_rate": 6.274669634599409e-06, + "loss": 0.0424, + "step": 88595 + }, + { + "epoch": 4.13, + "learning_rate": 6.273885849544621e-06, + "loss": 0.0506, + "step": 88600 + }, + { + "epoch": 4.13, + "learning_rate": 6.273102064489834e-06, + "loss": 0.0527, + "step": 88605 + }, + { + "epoch": 4.13, + "learning_rate": 6.272318279435048e-06, + "loss": 0.077, + "step": 88610 + }, + { + "epoch": 4.13, + "learning_rate": 6.271534494380261e-06, + "loss": 0.1514, + "step": 88615 + }, + { + "epoch": 4.14, + "learning_rate": 6.270750709325475e-06, + "loss": 0.1529, + "step": 88620 + }, + { + "epoch": 4.14, + "learning_rate": 6.269966924270689e-06, + "loss": 0.2726, + "step": 88625 + }, + { + "epoch": 4.14, + "learning_rate": 6.269183139215902e-06, + "loss": 0.0565, + "step": 88630 + }, + { + "epoch": 4.14, + "learning_rate": 6.268399354161116e-06, + "loss": 0.0463, + "step": 88635 + }, + { + "epoch": 4.14, + "learning_rate": 6.267615569106329e-06, + "loss": 0.0227, + "step": 88640 + }, + { + "epoch": 4.14, + "learning_rate": 6.266831784051543e-06, + "loss": 0.0636, + "step": 88645 + }, + { + "epoch": 4.14, + "learning_rate": 6.266047998996756e-06, + "loss": 0.053, + "step": 88650 + }, + { + "epoch": 4.14, + "learning_rate": 6.26526421394197e-06, + "loss": 0.0368, + "step": 88655 + }, + { + "epoch": 4.14, + "learning_rate": 6.264480428887183e-06, + "loss": 0.0499, + "step": 88660 + }, + { + "epoch": 4.14, + "learning_rate": 6.263696643832395e-06, + "loss": 0.1441, + "step": 88665 + }, + { + "epoch": 4.14, + "learning_rate": 6.262912858777609e-06, + "loss": 0.1812, + "step": 88670 + }, + { + "epoch": 4.14, + "learning_rate": 6.262129073722823e-06, + "loss": 0.2325, + "step": 88675 + }, + { + "epoch": 4.14, + "learning_rate": 6.261345288668036e-06, + "loss": 0.0733, + "step": 88680 + }, + { + "epoch": 4.14, + "learning_rate": 6.26056150361325e-06, + "loss": 0.0314, + "step": 88685 + }, + { + "epoch": 4.14, + "learning_rate": 6.259777718558463e-06, + "loss": 0.0476, + "step": 88690 + }, + { + "epoch": 4.14, + "learning_rate": 6.258993933503677e-06, + "loss": 0.0603, + "step": 88695 + }, + { + "epoch": 4.14, + "learning_rate": 6.25821014844889e-06, + "loss": 0.0249, + "step": 88700 + }, + { + "epoch": 4.14, + "learning_rate": 6.257426363394104e-06, + "loss": 0.0983, + "step": 88705 + }, + { + "epoch": 4.14, + "learning_rate": 6.256642578339317e-06, + "loss": 0.2059, + "step": 88710 + }, + { + "epoch": 4.14, + "learning_rate": 6.255858793284531e-06, + "loss": 0.0781, + "step": 88715 + }, + { + "epoch": 4.14, + "learning_rate": 6.255075008229744e-06, + "loss": 0.1035, + "step": 88720 + }, + { + "epoch": 4.14, + "learning_rate": 6.254291223174958e-06, + "loss": 0.5021, + "step": 88725 + }, + { + "epoch": 4.14, + "learning_rate": 6.25350743812017e-06, + "loss": 0.0905, + "step": 88730 + }, + { + "epoch": 4.14, + "learning_rate": 6.252723653065384e-06, + "loss": 0.0015, + "step": 88735 + }, + { + "epoch": 4.14, + "learning_rate": 6.251939868010597e-06, + "loss": 0.0715, + "step": 88740 + }, + { + "epoch": 4.14, + "learning_rate": 6.251156082955811e-06, + "loss": 0.0508, + "step": 88745 + }, + { + "epoch": 4.14, + "learning_rate": 6.250372297901024e-06, + "loss": 0.0438, + "step": 88750 + }, + { + "epoch": 4.14, + "learning_rate": 6.249588512846238e-06, + "loss": 0.0913, + "step": 88755 + }, + { + "epoch": 4.14, + "learning_rate": 6.248804727791451e-06, + "loss": 0.0922, + "step": 88760 + }, + { + "epoch": 4.14, + "learning_rate": 6.248020942736665e-06, + "loss": 0.1287, + "step": 88765 + }, + { + "epoch": 4.14, + "learning_rate": 6.247237157681878e-06, + "loss": 0.1945, + "step": 88770 + }, + { + "epoch": 4.14, + "learning_rate": 6.246453372627092e-06, + "loss": 0.4309, + "step": 88775 + }, + { + "epoch": 4.14, + "learning_rate": 6.245669587572305e-06, + "loss": 0.0991, + "step": 88780 + }, + { + "epoch": 4.14, + "learning_rate": 6.244885802517519e-06, + "loss": 0.1138, + "step": 88785 + }, + { + "epoch": 4.14, + "learning_rate": 6.244102017462732e-06, + "loss": 0.0286, + "step": 88790 + }, + { + "epoch": 4.14, + "learning_rate": 6.243318232407945e-06, + "loss": 0.0282, + "step": 88795 + }, + { + "epoch": 4.14, + "learning_rate": 6.242534447353158e-06, + "loss": 0.0464, + "step": 88800 + }, + { + "epoch": 4.14, + "learning_rate": 6.241750662298372e-06, + "loss": 0.1241, + "step": 88805 + }, + { + "epoch": 4.14, + "learning_rate": 6.240966877243585e-06, + "loss": 0.0799, + "step": 88810 + }, + { + "epoch": 4.14, + "learning_rate": 6.240183092188799e-06, + "loss": 0.0658, + "step": 88815 + }, + { + "epoch": 4.14, + "learning_rate": 6.239399307134012e-06, + "loss": 0.1682, + "step": 88820 + }, + { + "epoch": 4.14, + "learning_rate": 6.238615522079226e-06, + "loss": 0.1947, + "step": 88825 + }, + { + "epoch": 4.14, + "learning_rate": 6.237831737024439e-06, + "loss": 0.109, + "step": 88830 + }, + { + "epoch": 4.15, + "learning_rate": 6.2370479519696526e-06, + "loss": 0.0184, + "step": 88835 + }, + { + "epoch": 4.15, + "learning_rate": 6.236264166914866e-06, + "loss": 0.0241, + "step": 88840 + }, + { + "epoch": 4.15, + "learning_rate": 6.2354803818600795e-06, + "loss": 0.0518, + "step": 88845 + }, + { + "epoch": 4.15, + "learning_rate": 6.2346965968052934e-06, + "loss": 0.0389, + "step": 88850 + }, + { + "epoch": 4.15, + "learning_rate": 6.2339128117505065e-06, + "loss": 0.0605, + "step": 88855 + }, + { + "epoch": 4.15, + "learning_rate": 6.233129026695719e-06, + "loss": 0.1007, + "step": 88860 + }, + { + "epoch": 4.15, + "learning_rate": 6.232345241640933e-06, + "loss": 0.1136, + "step": 88865 + }, + { + "epoch": 4.15, + "learning_rate": 6.231561456586146e-06, + "loss": 0.1695, + "step": 88870 + }, + { + "epoch": 4.15, + "learning_rate": 6.2307776715313596e-06, + "loss": 0.3442, + "step": 88875 + }, + { + "epoch": 4.15, + "learning_rate": 6.229993886476573e-06, + "loss": 0.13, + "step": 88880 + }, + { + "epoch": 4.15, + "learning_rate": 6.2292101014217865e-06, + "loss": 0.0032, + "step": 88885 + }, + { + "epoch": 4.15, + "learning_rate": 6.2284263163670004e-06, + "loss": 0.0323, + "step": 88890 + }, + { + "epoch": 4.15, + "learning_rate": 6.2276425313122135e-06, + "loss": 0.0368, + "step": 88895 + }, + { + "epoch": 4.15, + "learning_rate": 6.226858746257427e-06, + "loss": 0.0523, + "step": 88900 + }, + { + "epoch": 4.15, + "learning_rate": 6.2260749612026405e-06, + "loss": 0.0824, + "step": 88905 + }, + { + "epoch": 4.15, + "learning_rate": 6.225291176147854e-06, + "loss": 0.1514, + "step": 88910 + }, + { + "epoch": 4.15, + "learning_rate": 6.224507391093067e-06, + "loss": 0.1334, + "step": 88915 + }, + { + "epoch": 4.15, + "learning_rate": 6.223723606038281e-06, + "loss": 0.1909, + "step": 88920 + }, + { + "epoch": 4.15, + "learning_rate": 6.2229398209834935e-06, + "loss": 0.2378, + "step": 88925 + }, + { + "epoch": 4.15, + "learning_rate": 6.222156035928707e-06, + "loss": 0.1063, + "step": 88930 + }, + { + "epoch": 4.15, + "learning_rate": 6.2213722508739205e-06, + "loss": 0.0204, + "step": 88935 + }, + { + "epoch": 4.15, + "learning_rate": 6.220588465819134e-06, + "loss": 0.0123, + "step": 88940 + }, + { + "epoch": 4.15, + "learning_rate": 6.2198046807643475e-06, + "loss": 0.0628, + "step": 88945 + }, + { + "epoch": 4.15, + "learning_rate": 6.219020895709561e-06, + "loss": 0.02, + "step": 88950 + }, + { + "epoch": 4.15, + "learning_rate": 6.2182371106547744e-06, + "loss": 0.1009, + "step": 88955 + }, + { + "epoch": 4.15, + "learning_rate": 6.217453325599988e-06, + "loss": 0.071, + "step": 88960 + }, + { + "epoch": 4.15, + "learning_rate": 6.216669540545201e-06, + "loss": 0.08, + "step": 88965 + }, + { + "epoch": 4.15, + "learning_rate": 6.215885755490415e-06, + "loss": 0.0997, + "step": 88970 + }, + { + "epoch": 4.15, + "learning_rate": 6.215101970435628e-06, + "loss": 0.277, + "step": 88975 + }, + { + "epoch": 4.15, + "learning_rate": 6.214318185380842e-06, + "loss": 0.0999, + "step": 88980 + }, + { + "epoch": 4.15, + "learning_rate": 6.213534400326055e-06, + "loss": 0.0273, + "step": 88985 + }, + { + "epoch": 4.15, + "learning_rate": 6.212750615271268e-06, + "loss": 0.0476, + "step": 88990 + }, + { + "epoch": 4.15, + "learning_rate": 6.2119668302164814e-06, + "loss": 0.0303, + "step": 88995 + }, + { + "epoch": 4.15, + "learning_rate": 6.211183045161695e-06, + "loss": 0.0347, + "step": 89000 + }, + { + "epoch": 4.15, + "learning_rate": 6.210399260106908e-06, + "loss": 0.0411, + "step": 89005 + }, + { + "epoch": 4.15, + "learning_rate": 6.209615475052122e-06, + "loss": 0.0864, + "step": 89010 + }, + { + "epoch": 4.15, + "learning_rate": 6.208831689997335e-06, + "loss": 0.1301, + "step": 89015 + }, + { + "epoch": 4.15, + "learning_rate": 6.208047904942549e-06, + "loss": 0.1993, + "step": 89020 + }, + { + "epoch": 4.15, + "learning_rate": 6.207264119887762e-06, + "loss": 0.0947, + "step": 89025 + }, + { + "epoch": 4.15, + "learning_rate": 6.206480334832976e-06, + "loss": 0.078, + "step": 89030 + }, + { + "epoch": 4.15, + "learning_rate": 6.205696549778189e-06, + "loss": 0.0447, + "step": 89035 + }, + { + "epoch": 4.15, + "learning_rate": 6.204912764723403e-06, + "loss": 0.032, + "step": 89040 + }, + { + "epoch": 4.15, + "learning_rate": 6.204128979668616e-06, + "loss": 0.11, + "step": 89045 + }, + { + "epoch": 4.16, + "learning_rate": 6.20334519461383e-06, + "loss": 0.1048, + "step": 89050 + }, + { + "epoch": 4.16, + "learning_rate": 6.202561409559042e-06, + "loss": 0.0573, + "step": 89055 + }, + { + "epoch": 4.16, + "learning_rate": 6.201777624504256e-06, + "loss": 0.0839, + "step": 89060 + }, + { + "epoch": 4.16, + "learning_rate": 6.200993839449469e-06, + "loss": 0.1672, + "step": 89065 + }, + { + "epoch": 4.16, + "learning_rate": 6.200210054394683e-06, + "loss": 0.2303, + "step": 89070 + }, + { + "epoch": 4.16, + "learning_rate": 6.199426269339896e-06, + "loss": 0.4011, + "step": 89075 + }, + { + "epoch": 4.16, + "learning_rate": 6.19864248428511e-06, + "loss": 0.0853, + "step": 89080 + }, + { + "epoch": 4.16, + "learning_rate": 6.197858699230323e-06, + "loss": 0.0321, + "step": 89085 + }, + { + "epoch": 4.16, + "learning_rate": 6.197074914175537e-06, + "loss": 0.0147, + "step": 89090 + }, + { + "epoch": 4.16, + "learning_rate": 6.19629112912075e-06, + "loss": 0.051, + "step": 89095 + }, + { + "epoch": 4.16, + "learning_rate": 6.195507344065964e-06, + "loss": 0.0454, + "step": 89100 + }, + { + "epoch": 4.16, + "learning_rate": 6.194723559011177e-06, + "loss": 0.0369, + "step": 89105 + }, + { + "epoch": 4.16, + "learning_rate": 6.193939773956391e-06, + "loss": 0.1615, + "step": 89110 + }, + { + "epoch": 4.16, + "learning_rate": 6.193155988901605e-06, + "loss": 0.1234, + "step": 89115 + }, + { + "epoch": 4.16, + "learning_rate": 6.192372203846817e-06, + "loss": 0.1306, + "step": 89120 + }, + { + "epoch": 4.16, + "learning_rate": 6.19158841879203e-06, + "loss": 0.305, + "step": 89125 + }, + { + "epoch": 4.16, + "learning_rate": 6.190804633737244e-06, + "loss": 0.067, + "step": 89130 + }, + { + "epoch": 4.16, + "learning_rate": 6.190020848682457e-06, + "loss": 0.0449, + "step": 89135 + }, + { + "epoch": 4.16, + "learning_rate": 6.189237063627671e-06, + "loss": 0.0631, + "step": 89140 + }, + { + "epoch": 4.16, + "learning_rate": 6.188453278572884e-06, + "loss": 0.0649, + "step": 89145 + }, + { + "epoch": 4.16, + "learning_rate": 6.187669493518098e-06, + "loss": 0.0574, + "step": 89150 + }, + { + "epoch": 4.16, + "learning_rate": 6.186885708463311e-06, + "loss": 0.0685, + "step": 89155 + }, + { + "epoch": 4.16, + "learning_rate": 6.186101923408525e-06, + "loss": 0.0932, + "step": 89160 + }, + { + "epoch": 4.16, + "learning_rate": 6.185318138353739e-06, + "loss": 0.1932, + "step": 89165 + }, + { + "epoch": 4.16, + "learning_rate": 6.184534353298952e-06, + "loss": 0.2358, + "step": 89170 + }, + { + "epoch": 4.16, + "learning_rate": 6.183750568244166e-06, + "loss": 0.2233, + "step": 89175 + }, + { + "epoch": 4.16, + "learning_rate": 6.182966783189379e-06, + "loss": 0.0727, + "step": 89180 + }, + { + "epoch": 4.16, + "learning_rate": 6.182182998134591e-06, + "loss": 0.0821, + "step": 89185 + }, + { + "epoch": 4.16, + "learning_rate": 6.181399213079805e-06, + "loss": 0.0073, + "step": 89190 + }, + { + "epoch": 4.16, + "learning_rate": 6.180615428025018e-06, + "loss": 0.0236, + "step": 89195 + }, + { + "epoch": 4.16, + "learning_rate": 6.179831642970232e-06, + "loss": 0.0535, + "step": 89200 + }, + { + "epoch": 4.16, + "learning_rate": 6.179047857915446e-06, + "loss": 0.0256, + "step": 89205 + }, + { + "epoch": 4.16, + "learning_rate": 6.178264072860659e-06, + "loss": 0.0649, + "step": 89210 + }, + { + "epoch": 4.16, + "learning_rate": 6.177480287805873e-06, + "loss": 0.065, + "step": 89215 + }, + { + "epoch": 4.16, + "learning_rate": 6.176696502751086e-06, + "loss": 0.2857, + "step": 89220 + }, + { + "epoch": 4.16, + "learning_rate": 6.1759127176963e-06, + "loss": 0.2933, + "step": 89225 + }, + { + "epoch": 4.16, + "learning_rate": 6.175128932641513e-06, + "loss": 0.1056, + "step": 89230 + }, + { + "epoch": 4.16, + "learning_rate": 6.174345147586727e-06, + "loss": 0.0446, + "step": 89235 + }, + { + "epoch": 4.16, + "learning_rate": 6.17356136253194e-06, + "loss": 0.0522, + "step": 89240 + }, + { + "epoch": 4.16, + "learning_rate": 6.172777577477154e-06, + "loss": 0.0394, + "step": 89245 + }, + { + "epoch": 4.16, + "learning_rate": 6.171993792422366e-06, + "loss": 0.0233, + "step": 89250 + }, + { + "epoch": 4.16, + "learning_rate": 6.17121000736758e-06, + "loss": 0.0849, + "step": 89255 + }, + { + "epoch": 4.16, + "learning_rate": 6.170426222312793e-06, + "loss": 0.079, + "step": 89260 + }, + { + "epoch": 4.17, + "learning_rate": 6.169642437258007e-06, + "loss": 0.1592, + "step": 89265 + }, + { + "epoch": 4.17, + "learning_rate": 6.16885865220322e-06, + "loss": 0.0925, + "step": 89270 + }, + { + "epoch": 4.17, + "learning_rate": 6.168074867148434e-06, + "loss": 0.1958, + "step": 89275 + }, + { + "epoch": 4.17, + "learning_rate": 6.167291082093647e-06, + "loss": 0.1201, + "step": 89280 + }, + { + "epoch": 4.17, + "learning_rate": 6.166507297038861e-06, + "loss": 0.0116, + "step": 89285 + }, + { + "epoch": 4.17, + "learning_rate": 6.165723511984074e-06, + "loss": 0.0647, + "step": 89290 + }, + { + "epoch": 4.17, + "learning_rate": 6.164939726929288e-06, + "loss": 0.0438, + "step": 89295 + }, + { + "epoch": 4.17, + "learning_rate": 6.164155941874501e-06, + "loss": 0.0513, + "step": 89300 + }, + { + "epoch": 4.17, + "learning_rate": 6.163372156819715e-06, + "loss": 0.1133, + "step": 89305 + }, + { + "epoch": 4.17, + "learning_rate": 6.162588371764928e-06, + "loss": 0.1608, + "step": 89310 + }, + { + "epoch": 4.17, + "learning_rate": 6.161804586710141e-06, + "loss": 0.0674, + "step": 89315 + }, + { + "epoch": 4.17, + "learning_rate": 6.161020801655354e-06, + "loss": 0.1883, + "step": 89320 + }, + { + "epoch": 4.17, + "learning_rate": 6.160237016600568e-06, + "loss": 0.3976, + "step": 89325 + }, + { + "epoch": 4.17, + "learning_rate": 6.159453231545781e-06, + "loss": 0.0882, + "step": 89330 + }, + { + "epoch": 4.17, + "learning_rate": 6.158669446490995e-06, + "loss": 0.042, + "step": 89335 + }, + { + "epoch": 4.17, + "learning_rate": 6.157885661436208e-06, + "loss": 0.0464, + "step": 89340 + }, + { + "epoch": 4.17, + "learning_rate": 6.157101876381422e-06, + "loss": 0.0564, + "step": 89345 + }, + { + "epoch": 4.17, + "learning_rate": 6.156318091326635e-06, + "loss": 0.0426, + "step": 89350 + }, + { + "epoch": 4.17, + "learning_rate": 6.155534306271849e-06, + "loss": 0.0953, + "step": 89355 + }, + { + "epoch": 4.17, + "learning_rate": 6.154750521217062e-06, + "loss": 0.0831, + "step": 89360 + }, + { + "epoch": 4.17, + "learning_rate": 6.153966736162276e-06, + "loss": 0.1253, + "step": 89365 + }, + { + "epoch": 4.17, + "learning_rate": 6.153182951107489e-06, + "loss": 0.131, + "step": 89370 + }, + { + "epoch": 4.17, + "learning_rate": 6.152399166052703e-06, + "loss": 0.2002, + "step": 89375 + }, + { + "epoch": 4.17, + "learning_rate": 6.151615380997915e-06, + "loss": 0.0783, + "step": 89380 + }, + { + "epoch": 4.17, + "learning_rate": 6.150831595943129e-06, + "loss": 0.0061, + "step": 89385 + }, + { + "epoch": 4.17, + "learning_rate": 6.150047810888342e-06, + "loss": 0.012, + "step": 89390 + }, + { + "epoch": 4.17, + "learning_rate": 6.149264025833556e-06, + "loss": 0.0333, + "step": 89395 + }, + { + "epoch": 4.17, + "learning_rate": 6.148480240778769e-06, + "loss": 0.0697, + "step": 89400 + }, + { + "epoch": 4.17, + "learning_rate": 6.147696455723983e-06, + "loss": 0.0735, + "step": 89405 + }, + { + "epoch": 4.17, + "learning_rate": 6.146912670669196e-06, + "loss": 0.0776, + "step": 89410 + }, + { + "epoch": 4.17, + "learning_rate": 6.14612888561441e-06, + "loss": 0.172, + "step": 89415 + }, + { + "epoch": 4.17, + "learning_rate": 6.145345100559623e-06, + "loss": 0.1534, + "step": 89420 + }, + { + "epoch": 4.17, + "learning_rate": 6.144561315504837e-06, + "loss": 0.3467, + "step": 89425 + }, + { + "epoch": 4.17, + "learning_rate": 6.143777530450051e-06, + "loss": 0.0825, + "step": 89430 + }, + { + "epoch": 4.17, + "learning_rate": 6.142993745395264e-06, + "loss": 0.046, + "step": 89435 + }, + { + "epoch": 4.17, + "learning_rate": 6.1422099603404776e-06, + "loss": 0.0452, + "step": 89440 + }, + { + "epoch": 4.17, + "learning_rate": 6.14142617528569e-06, + "loss": 0.0702, + "step": 89445 + }, + { + "epoch": 4.17, + "learning_rate": 6.140642390230903e-06, + "loss": 0.0492, + "step": 89450 + }, + { + "epoch": 4.17, + "learning_rate": 6.139858605176117e-06, + "loss": 0.0686, + "step": 89455 + }, + { + "epoch": 4.17, + "learning_rate": 6.13907482012133e-06, + "loss": 0.0756, + "step": 89460 + }, + { + "epoch": 4.17, + "learning_rate": 6.138291035066544e-06, + "loss": 0.1233, + "step": 89465 + }, + { + "epoch": 4.17, + "learning_rate": 6.137507250011758e-06, + "loss": 0.1597, + "step": 89470 + }, + { + "epoch": 4.18, + "learning_rate": 6.136723464956971e-06, + "loss": 0.1574, + "step": 89475 + }, + { + "epoch": 4.18, + "learning_rate": 6.1359396799021846e-06, + "loss": 0.0584, + "step": 89480 + }, + { + "epoch": 4.18, + "learning_rate": 6.135155894847398e-06, + "loss": 0.0504, + "step": 89485 + }, + { + "epoch": 4.18, + "learning_rate": 6.1343721097926115e-06, + "loss": 0.0132, + "step": 89490 + }, + { + "epoch": 4.18, + "learning_rate": 6.133588324737825e-06, + "loss": 0.1002, + "step": 89495 + }, + { + "epoch": 4.18, + "learning_rate": 6.1328045396830385e-06, + "loss": 0.0326, + "step": 89500 + }, + { + "epoch": 4.18, + "learning_rate": 6.1320207546282516e-06, + "loss": 0.15, + "step": 89505 + }, + { + "epoch": 4.18, + "learning_rate": 6.131236969573464e-06, + "loss": 0.0347, + "step": 89510 + }, + { + "epoch": 4.18, + "learning_rate": 6.130453184518678e-06, + "loss": 0.1959, + "step": 89515 + }, + { + "epoch": 4.18, + "learning_rate": 6.1296693994638916e-06, + "loss": 0.2061, + "step": 89520 + }, + { + "epoch": 4.18, + "learning_rate": 6.128885614409105e-06, + "loss": 0.2217, + "step": 89525 + }, + { + "epoch": 4.18, + "learning_rate": 6.1281018293543185e-06, + "loss": 0.077, + "step": 89530 + }, + { + "epoch": 4.18, + "learning_rate": 6.127318044299532e-06, + "loss": 0.0137, + "step": 89535 + }, + { + "epoch": 4.18, + "learning_rate": 6.1265342592447455e-06, + "loss": 0.0232, + "step": 89540 + }, + { + "epoch": 4.18, + "learning_rate": 6.1257504741899586e-06, + "loss": 0.0565, + "step": 89545 + }, + { + "epoch": 4.18, + "learning_rate": 6.1249666891351725e-06, + "loss": 0.0814, + "step": 89550 + }, + { + "epoch": 4.18, + "learning_rate": 6.1241829040803855e-06, + "loss": 0.056, + "step": 89555 + }, + { + "epoch": 4.18, + "learning_rate": 6.1233991190255994e-06, + "loss": 0.1184, + "step": 89560 + }, + { + "epoch": 4.18, + "learning_rate": 6.1226153339708125e-06, + "loss": 0.0755, + "step": 89565 + }, + { + "epoch": 4.18, + "learning_rate": 6.121831548916026e-06, + "loss": 0.244, + "step": 89570 + }, + { + "epoch": 4.18, + "learning_rate": 6.121047763861239e-06, + "loss": 0.187, + "step": 89575 + }, + { + "epoch": 4.18, + "learning_rate": 6.1202639788064525e-06, + "loss": 0.104, + "step": 89580 + }, + { + "epoch": 4.18, + "learning_rate": 6.1194801937516656e-06, + "loss": 0.0128, + "step": 89585 + }, + { + "epoch": 4.18, + "learning_rate": 6.1186964086968795e-06, + "loss": 0.1322, + "step": 89590 + }, + { + "epoch": 4.18, + "learning_rate": 6.1179126236420925e-06, + "loss": 0.0376, + "step": 89595 + }, + { + "epoch": 4.18, + "learning_rate": 6.1171288385873064e-06, + "loss": 0.0948, + "step": 89600 + }, + { + "epoch": 4.18, + "learning_rate": 6.1163450535325195e-06, + "loss": 0.0404, + "step": 89605 + }, + { + "epoch": 4.18, + "learning_rate": 6.115561268477733e-06, + "loss": 0.0668, + "step": 89610 + }, + { + "epoch": 4.18, + "learning_rate": 6.1147774834229465e-06, + "loss": 0.1415, + "step": 89615 + }, + { + "epoch": 4.18, + "learning_rate": 6.11399369836816e-06, + "loss": 0.1557, + "step": 89620 + }, + { + "epoch": 4.18, + "learning_rate": 6.1132099133133734e-06, + "loss": 0.3609, + "step": 89625 + }, + { + "epoch": 4.18, + "learning_rate": 6.112426128258587e-06, + "loss": 0.1221, + "step": 89630 + }, + { + "epoch": 4.18, + "learning_rate": 6.1116423432038e-06, + "loss": 0.0591, + "step": 89635 + }, + { + "epoch": 4.18, + "learning_rate": 6.1108585581490135e-06, + "loss": 0.0227, + "step": 89640 + }, + { + "epoch": 4.18, + "learning_rate": 6.1100747730942265e-06, + "loss": 0.0375, + "step": 89645 + }, + { + "epoch": 4.18, + "learning_rate": 6.10929098803944e-06, + "loss": 0.0431, + "step": 89650 + }, + { + "epoch": 4.18, + "learning_rate": 6.1085072029846535e-06, + "loss": 0.063, + "step": 89655 + }, + { + "epoch": 4.18, + "learning_rate": 6.107723417929867e-06, + "loss": 0.0882, + "step": 89660 + }, + { + "epoch": 4.18, + "learning_rate": 6.1069396328750804e-06, + "loss": 0.1451, + "step": 89665 + }, + { + "epoch": 4.18, + "learning_rate": 6.106155847820294e-06, + "loss": 0.1139, + "step": 89670 + }, + { + "epoch": 4.18, + "learning_rate": 6.105372062765507e-06, + "loss": 0.2785, + "step": 89675 + }, + { + "epoch": 4.18, + "learning_rate": 6.104588277710721e-06, + "loss": 0.064, + "step": 89680 + }, + { + "epoch": 4.18, + "learning_rate": 6.103804492655934e-06, + "loss": 0.0145, + "step": 89685 + }, + { + "epoch": 4.19, + "learning_rate": 6.103020707601148e-06, + "loss": 0.017, + "step": 89690 + }, + { + "epoch": 4.19, + "learning_rate": 6.102236922546362e-06, + "loss": 0.0564, + "step": 89695 + }, + { + "epoch": 4.19, + "learning_rate": 6.101453137491575e-06, + "loss": 0.0964, + "step": 89700 + }, + { + "epoch": 4.19, + "learning_rate": 6.1006693524367875e-06, + "loss": 0.0644, + "step": 89705 + }, + { + "epoch": 4.19, + "learning_rate": 6.099885567382001e-06, + "loss": 0.0778, + "step": 89710 + }, + { + "epoch": 4.19, + "learning_rate": 6.099101782327214e-06, + "loss": 0.1623, + "step": 89715 + }, + { + "epoch": 4.19, + "learning_rate": 6.098317997272428e-06, + "loss": 0.2354, + "step": 89720 + }, + { + "epoch": 4.19, + "learning_rate": 6.097534212217641e-06, + "loss": 0.2151, + "step": 89725 + }, + { + "epoch": 4.19, + "learning_rate": 6.096750427162855e-06, + "loss": 0.1062, + "step": 89730 + }, + { + "epoch": 4.19, + "learning_rate": 6.095966642108068e-06, + "loss": 0.0041, + "step": 89735 + }, + { + "epoch": 4.19, + "learning_rate": 6.095182857053282e-06, + "loss": 0.0345, + "step": 89740 + }, + { + "epoch": 4.19, + "learning_rate": 6.094399071998496e-06, + "loss": 0.0483, + "step": 89745 + }, + { + "epoch": 4.19, + "learning_rate": 6.093615286943709e-06, + "loss": 0.0438, + "step": 89750 + }, + { + "epoch": 4.19, + "learning_rate": 6.092831501888923e-06, + "loss": 0.0892, + "step": 89755 + }, + { + "epoch": 4.19, + "learning_rate": 6.092047716834136e-06, + "loss": 0.0652, + "step": 89760 + }, + { + "epoch": 4.19, + "learning_rate": 6.09126393177935e-06, + "loss": 0.133, + "step": 89765 + }, + { + "epoch": 4.19, + "learning_rate": 6.090480146724562e-06, + "loss": 0.2187, + "step": 89770 + }, + { + "epoch": 4.19, + "learning_rate": 6.089696361669775e-06, + "loss": 0.2867, + "step": 89775 + }, + { + "epoch": 4.19, + "learning_rate": 6.088912576614989e-06, + "loss": 0.0944, + "step": 89780 + }, + { + "epoch": 4.19, + "learning_rate": 6.088128791560203e-06, + "loss": 0.0482, + "step": 89785 + }, + { + "epoch": 4.19, + "learning_rate": 6.087345006505416e-06, + "loss": 0.0895, + "step": 89790 + }, + { + "epoch": 4.19, + "learning_rate": 6.08656122145063e-06, + "loss": 0.0182, + "step": 89795 + }, + { + "epoch": 4.19, + "learning_rate": 6.085777436395843e-06, + "loss": 0.1007, + "step": 89800 + }, + { + "epoch": 4.19, + "learning_rate": 6.084993651341057e-06, + "loss": 0.054, + "step": 89805 + }, + { + "epoch": 4.19, + "learning_rate": 6.08420986628627e-06, + "loss": 0.0982, + "step": 89810 + }, + { + "epoch": 4.19, + "learning_rate": 6.083426081231484e-06, + "loss": 0.1718, + "step": 89815 + }, + { + "epoch": 4.19, + "learning_rate": 6.082642296176697e-06, + "loss": 0.1158, + "step": 89820 + }, + { + "epoch": 4.19, + "learning_rate": 6.081858511121911e-06, + "loss": 0.2076, + "step": 89825 + }, + { + "epoch": 4.19, + "learning_rate": 6.081074726067124e-06, + "loss": 0.108, + "step": 89830 + }, + { + "epoch": 4.19, + "learning_rate": 6.080290941012337e-06, + "loss": 0.0433, + "step": 89835 + }, + { + "epoch": 4.19, + "learning_rate": 6.07950715595755e-06, + "loss": 0.0423, + "step": 89840 + }, + { + "epoch": 4.19, + "learning_rate": 6.078723370902764e-06, + "loss": 0.049, + "step": 89845 + }, + { + "epoch": 4.19, + "learning_rate": 6.077939585847977e-06, + "loss": 0.0125, + "step": 89850 + }, + { + "epoch": 4.19, + "learning_rate": 6.077155800793191e-06, + "loss": 0.0529, + "step": 89855 + }, + { + "epoch": 4.19, + "learning_rate": 6.076372015738404e-06, + "loss": 0.4706, + "step": 89860 + }, + { + "epoch": 4.19, + "learning_rate": 6.075588230683618e-06, + "loss": 0.0698, + "step": 89865 + }, + { + "epoch": 4.19, + "learning_rate": 6.074804445628831e-06, + "loss": 0.1114, + "step": 89870 + }, + { + "epoch": 4.19, + "learning_rate": 6.074020660574045e-06, + "loss": 0.318, + "step": 89875 + }, + { + "epoch": 4.19, + "learning_rate": 6.073236875519258e-06, + "loss": 0.0609, + "step": 89880 + }, + { + "epoch": 4.19, + "learning_rate": 6.072453090464472e-06, + "loss": 0.0141, + "step": 89885 + }, + { + "epoch": 4.19, + "learning_rate": 6.071669305409685e-06, + "loss": 0.0518, + "step": 89890 + }, + { + "epoch": 4.19, + "learning_rate": 6.070885520354899e-06, + "loss": 0.0432, + "step": 89895 + }, + { + "epoch": 4.19, + "learning_rate": 6.070101735300111e-06, + "loss": 0.0527, + "step": 89900 + }, + { + "epoch": 4.2, + "learning_rate": 6.069317950245325e-06, + "loss": 0.0655, + "step": 89905 + }, + { + "epoch": 4.2, + "learning_rate": 6.068534165190538e-06, + "loss": 0.1085, + "step": 89910 + }, + { + "epoch": 4.2, + "learning_rate": 6.067750380135752e-06, + "loss": 0.1045, + "step": 89915 + }, + { + "epoch": 4.2, + "learning_rate": 6.066966595080965e-06, + "loss": 0.0901, + "step": 89920 + }, + { + "epoch": 4.2, + "learning_rate": 6.066182810026179e-06, + "loss": 0.2014, + "step": 89925 + }, + { + "epoch": 4.2, + "learning_rate": 6.065399024971392e-06, + "loss": 0.0804, + "step": 89930 + }, + { + "epoch": 4.2, + "learning_rate": 6.064615239916606e-06, + "loss": 0.0183, + "step": 89935 + }, + { + "epoch": 4.2, + "learning_rate": 6.063831454861819e-06, + "loss": 0.0362, + "step": 89940 + }, + { + "epoch": 4.2, + "learning_rate": 6.063047669807033e-06, + "loss": 0.034, + "step": 89945 + }, + { + "epoch": 4.2, + "learning_rate": 6.062263884752246e-06, + "loss": 0.0835, + "step": 89950 + }, + { + "epoch": 4.2, + "learning_rate": 6.06148009969746e-06, + "loss": 0.0713, + "step": 89955 + }, + { + "epoch": 4.2, + "learning_rate": 6.060696314642674e-06, + "loss": 0.123, + "step": 89960 + }, + { + "epoch": 4.2, + "learning_rate": 6.059912529587886e-06, + "loss": 0.049, + "step": 89965 + }, + { + "epoch": 4.2, + "learning_rate": 6.059128744533099e-06, + "loss": 0.1066, + "step": 89970 + }, + { + "epoch": 4.2, + "learning_rate": 6.058344959478313e-06, + "loss": 0.3859, + "step": 89975 + }, + { + "epoch": 4.2, + "learning_rate": 6.057561174423526e-06, + "loss": 0.0998, + "step": 89980 + }, + { + "epoch": 4.2, + "learning_rate": 6.05677738936874e-06, + "loss": 0.0131, + "step": 89985 + }, + { + "epoch": 4.2, + "learning_rate": 6.055993604313953e-06, + "loss": 0.0514, + "step": 89990 + }, + { + "epoch": 4.2, + "learning_rate": 6.055209819259167e-06, + "loss": 0.0365, + "step": 89995 + }, + { + "epoch": 4.2, + "learning_rate": 6.05442603420438e-06, + "loss": 0.0505, + "step": 90000 + }, + { + "epoch": 4.2, + "learning_rate": 6.053642249149594e-06, + "loss": 0.0449, + "step": 90005 + }, + { + "epoch": 4.2, + "learning_rate": 6.052858464094808e-06, + "loss": 0.0937, + "step": 90010 + }, + { + "epoch": 4.2, + "learning_rate": 6.052074679040021e-06, + "loss": 0.071, + "step": 90015 + }, + { + "epoch": 4.2, + "learning_rate": 6.051290893985235e-06, + "loss": 0.2377, + "step": 90020 + }, + { + "epoch": 4.2, + "learning_rate": 6.050507108930448e-06, + "loss": 0.3692, + "step": 90025 + }, + { + "epoch": 4.2, + "learning_rate": 6.04972332387566e-06, + "loss": 0.0788, + "step": 90030 + }, + { + "epoch": 4.2, + "learning_rate": 6.048939538820874e-06, + "loss": 0.0178, + "step": 90035 + }, + { + "epoch": 4.2, + "learning_rate": 6.048155753766087e-06, + "loss": 0.0402, + "step": 90040 + }, + { + "epoch": 4.2, + "learning_rate": 6.047371968711301e-06, + "loss": 0.0688, + "step": 90045 + }, + { + "epoch": 4.2, + "learning_rate": 6.046588183656515e-06, + "loss": 0.0793, + "step": 90050 + }, + { + "epoch": 4.2, + "learning_rate": 6.045804398601728e-06, + "loss": 0.0935, + "step": 90055 + }, + { + "epoch": 4.2, + "learning_rate": 6.045020613546942e-06, + "loss": 0.1183, + "step": 90060 + }, + { + "epoch": 4.2, + "learning_rate": 6.044236828492155e-06, + "loss": 0.1245, + "step": 90065 + }, + { + "epoch": 4.2, + "learning_rate": 6.043453043437369e-06, + "loss": 0.2181, + "step": 90070 + }, + { + "epoch": 4.2, + "learning_rate": 6.042669258382582e-06, + "loss": 0.1639, + "step": 90075 + }, + { + "epoch": 4.2, + "learning_rate": 6.041885473327796e-06, + "loss": 0.0822, + "step": 90080 + }, + { + "epoch": 4.2, + "learning_rate": 6.041101688273009e-06, + "loss": 0.0224, + "step": 90085 + }, + { + "epoch": 4.2, + "learning_rate": 6.040317903218223e-06, + "loss": 0.0694, + "step": 90090 + }, + { + "epoch": 4.2, + "learning_rate": 6.039534118163435e-06, + "loss": 0.0438, + "step": 90095 + }, + { + "epoch": 4.2, + "learning_rate": 6.038750333108649e-06, + "loss": 0.0986, + "step": 90100 + }, + { + "epoch": 4.2, + "learning_rate": 6.037966548053862e-06, + "loss": 0.0518, + "step": 90105 + }, + { + "epoch": 4.2, + "learning_rate": 6.037182762999076e-06, + "loss": 0.1075, + "step": 90110 + }, + { + "epoch": 4.2, + "learning_rate": 6.036398977944289e-06, + "loss": 0.0888, + "step": 90115 + }, + { + "epoch": 4.21, + "learning_rate": 6.035615192889503e-06, + "loss": 0.1886, + "step": 90120 + }, + { + "epoch": 4.21, + "learning_rate": 6.034831407834716e-06, + "loss": 0.242, + "step": 90125 + }, + { + "epoch": 4.21, + "learning_rate": 6.03404762277993e-06, + "loss": 0.0662, + "step": 90130 + }, + { + "epoch": 4.21, + "learning_rate": 6.033263837725143e-06, + "loss": 0.0289, + "step": 90135 + }, + { + "epoch": 4.21, + "learning_rate": 6.032480052670357e-06, + "loss": 0.0234, + "step": 90140 + }, + { + "epoch": 4.21, + "learning_rate": 6.03169626761557e-06, + "loss": 0.0416, + "step": 90145 + }, + { + "epoch": 4.21, + "learning_rate": 6.0309124825607836e-06, + "loss": 0.0352, + "step": 90150 + }, + { + "epoch": 4.21, + "learning_rate": 6.030128697505997e-06, + "loss": 0.0751, + "step": 90155 + }, + { + "epoch": 4.21, + "learning_rate": 6.02934491245121e-06, + "loss": 0.1329, + "step": 90160 + }, + { + "epoch": 4.21, + "learning_rate": 6.028561127396423e-06, + "loss": 0.1839, + "step": 90165 + }, + { + "epoch": 4.21, + "learning_rate": 6.027777342341637e-06, + "loss": 0.2593, + "step": 90170 + }, + { + "epoch": 4.21, + "learning_rate": 6.02699355728685e-06, + "loss": 0.284, + "step": 90175 + }, + { + "epoch": 4.21, + "learning_rate": 6.026209772232064e-06, + "loss": 0.0334, + "step": 90180 + }, + { + "epoch": 4.21, + "learning_rate": 6.025425987177277e-06, + "loss": 0.0346, + "step": 90185 + }, + { + "epoch": 4.21, + "learning_rate": 6.0246422021224906e-06, + "loss": 0.0354, + "step": 90190 + }, + { + "epoch": 4.21, + "learning_rate": 6.023858417067704e-06, + "loss": 0.0417, + "step": 90195 + }, + { + "epoch": 4.21, + "learning_rate": 6.0230746320129175e-06, + "loss": 0.1311, + "step": 90200 + }, + { + "epoch": 4.21, + "learning_rate": 6.022290846958131e-06, + "loss": 0.0548, + "step": 90205 + }, + { + "epoch": 4.21, + "learning_rate": 6.0215070619033445e-06, + "loss": 0.0819, + "step": 90210 + }, + { + "epoch": 4.21, + "learning_rate": 6.0207232768485576e-06, + "loss": 0.1508, + "step": 90215 + }, + { + "epoch": 4.21, + "learning_rate": 6.0199394917937715e-06, + "loss": 0.2033, + "step": 90220 + }, + { + "epoch": 4.21, + "learning_rate": 6.019155706738984e-06, + "loss": 0.2608, + "step": 90225 + }, + { + "epoch": 4.21, + "learning_rate": 6.018371921684198e-06, + "loss": 0.1337, + "step": 90230 + }, + { + "epoch": 4.21, + "learning_rate": 6.017588136629411e-06, + "loss": 0.0074, + "step": 90235 + }, + { + "epoch": 4.21, + "learning_rate": 6.0168043515746245e-06, + "loss": 0.0424, + "step": 90240 + }, + { + "epoch": 4.21, + "learning_rate": 6.016020566519838e-06, + "loss": 0.0388, + "step": 90245 + }, + { + "epoch": 4.21, + "learning_rate": 6.0152367814650515e-06, + "loss": 0.0768, + "step": 90250 + }, + { + "epoch": 4.21, + "learning_rate": 6.0144529964102646e-06, + "loss": 0.0497, + "step": 90255 + }, + { + "epoch": 4.21, + "learning_rate": 6.0136692113554785e-06, + "loss": 0.0285, + "step": 90260 + }, + { + "epoch": 4.21, + "learning_rate": 6.0128854263006915e-06, + "loss": 0.0737, + "step": 90265 + }, + { + "epoch": 4.21, + "learning_rate": 6.0121016412459054e-06, + "loss": 0.185, + "step": 90270 + }, + { + "epoch": 4.21, + "learning_rate": 6.011317856191119e-06, + "loss": 0.2338, + "step": 90275 + }, + { + "epoch": 4.21, + "learning_rate": 6.010534071136332e-06, + "loss": 0.058, + "step": 90280 + }, + { + "epoch": 4.21, + "learning_rate": 6.009750286081546e-06, + "loss": 0.0251, + "step": 90285 + }, + { + "epoch": 4.21, + "learning_rate": 6.0089665010267585e-06, + "loss": 0.0317, + "step": 90290 + }, + { + "epoch": 4.21, + "learning_rate": 6.008182715971972e-06, + "loss": 0.0508, + "step": 90295 + }, + { + "epoch": 4.21, + "learning_rate": 6.0073989309171855e-06, + "loss": 0.0738, + "step": 90300 + }, + { + "epoch": 4.21, + "learning_rate": 6.0066151458623985e-06, + "loss": 0.0557, + "step": 90305 + }, + { + "epoch": 4.21, + "learning_rate": 6.0058313608076125e-06, + "loss": 0.085, + "step": 90310 + }, + { + "epoch": 4.21, + "learning_rate": 6.0050475757528255e-06, + "loss": 0.1645, + "step": 90315 + }, + { + "epoch": 4.21, + "learning_rate": 6.004263790698039e-06, + "loss": 0.1144, + "step": 90320 + }, + { + "epoch": 4.21, + "learning_rate": 6.003480005643253e-06, + "loss": 0.2678, + "step": 90325 + }, + { + "epoch": 4.21, + "learning_rate": 6.002696220588466e-06, + "loss": 0.1039, + "step": 90330 + }, + { + "epoch": 4.22, + "learning_rate": 6.00191243553368e-06, + "loss": 0.0477, + "step": 90335 + }, + { + "epoch": 4.22, + "learning_rate": 6.001128650478893e-06, + "loss": 0.0343, + "step": 90340 + }, + { + "epoch": 4.22, + "learning_rate": 6.000344865424107e-06, + "loss": 0.0525, + "step": 90345 + }, + { + "epoch": 4.22, + "learning_rate": 5.99956108036932e-06, + "loss": 0.0651, + "step": 90350 + }, + { + "epoch": 4.22, + "learning_rate": 5.9987772953145325e-06, + "loss": 0.1345, + "step": 90355 + }, + { + "epoch": 4.22, + "learning_rate": 5.9979935102597464e-06, + "loss": 0.0969, + "step": 90360 + }, + { + "epoch": 4.22, + "learning_rate": 5.99720972520496e-06, + "loss": 0.1482, + "step": 90365 + }, + { + "epoch": 4.22, + "learning_rate": 5.996425940150173e-06, + "loss": 0.1354, + "step": 90370 + }, + { + "epoch": 4.22, + "learning_rate": 5.995642155095387e-06, + "loss": 0.3137, + "step": 90375 + }, + { + "epoch": 4.22, + "learning_rate": 5.9948583700406e-06, + "loss": 0.0598, + "step": 90380 + }, + { + "epoch": 4.22, + "learning_rate": 5.994074584985814e-06, + "loss": 0.0126, + "step": 90385 + }, + { + "epoch": 4.22, + "learning_rate": 5.993290799931027e-06, + "loss": 0.0265, + "step": 90390 + }, + { + "epoch": 4.22, + "learning_rate": 5.992507014876241e-06, + "loss": 0.0334, + "step": 90395 + }, + { + "epoch": 4.22, + "learning_rate": 5.991723229821454e-06, + "loss": 0.0125, + "step": 90400 + }, + { + "epoch": 4.22, + "learning_rate": 5.990939444766668e-06, + "loss": 0.0424, + "step": 90405 + }, + { + "epoch": 4.22, + "learning_rate": 5.990155659711881e-06, + "loss": 0.0839, + "step": 90410 + }, + { + "epoch": 4.22, + "learning_rate": 5.989371874657095e-06, + "loss": 0.0869, + "step": 90415 + }, + { + "epoch": 4.22, + "learning_rate": 5.988588089602307e-06, + "loss": 0.2141, + "step": 90420 + }, + { + "epoch": 4.22, + "learning_rate": 5.987804304547521e-06, + "loss": 0.284, + "step": 90425 + }, + { + "epoch": 4.22, + "learning_rate": 5.987020519492734e-06, + "loss": 0.1168, + "step": 90430 + }, + { + "epoch": 4.22, + "learning_rate": 5.986236734437948e-06, + "loss": 0.069, + "step": 90435 + }, + { + "epoch": 4.22, + "learning_rate": 5.985452949383161e-06, + "loss": 0.0413, + "step": 90440 + }, + { + "epoch": 4.22, + "learning_rate": 5.984669164328375e-06, + "loss": 0.0248, + "step": 90445 + }, + { + "epoch": 4.22, + "learning_rate": 5.983885379273588e-06, + "loss": 0.0796, + "step": 90450 + }, + { + "epoch": 4.22, + "learning_rate": 5.983101594218802e-06, + "loss": 0.1198, + "step": 90455 + }, + { + "epoch": 4.22, + "learning_rate": 5.982317809164015e-06, + "loss": 0.1314, + "step": 90460 + }, + { + "epoch": 4.22, + "learning_rate": 5.981534024109229e-06, + "loss": 0.1196, + "step": 90465 + }, + { + "epoch": 4.22, + "learning_rate": 5.980750239054442e-06, + "loss": 0.253, + "step": 90470 + }, + { + "epoch": 4.22, + "learning_rate": 5.979966453999656e-06, + "loss": 0.2472, + "step": 90475 + }, + { + "epoch": 4.22, + "learning_rate": 5.979182668944869e-06, + "loss": 0.13, + "step": 90480 + }, + { + "epoch": 4.22, + "learning_rate": 5.978398883890082e-06, + "loss": 0.008, + "step": 90485 + }, + { + "epoch": 4.22, + "learning_rate": 5.977615098835295e-06, + "loss": 0.0168, + "step": 90490 + }, + { + "epoch": 4.22, + "learning_rate": 5.976831313780509e-06, + "loss": 0.0762, + "step": 90495 + }, + { + "epoch": 4.22, + "learning_rate": 5.976047528725722e-06, + "loss": 0.0723, + "step": 90500 + }, + { + "epoch": 4.22, + "learning_rate": 5.975263743670936e-06, + "loss": 0.0596, + "step": 90505 + }, + { + "epoch": 4.22, + "learning_rate": 5.974479958616149e-06, + "loss": 0.1083, + "step": 90510 + }, + { + "epoch": 4.22, + "learning_rate": 5.973696173561363e-06, + "loss": 0.1273, + "step": 90515 + }, + { + "epoch": 4.22, + "learning_rate": 5.972912388506576e-06, + "loss": 0.1285, + "step": 90520 + }, + { + "epoch": 4.22, + "learning_rate": 5.97212860345179e-06, + "loss": 0.2257, + "step": 90525 + }, + { + "epoch": 4.22, + "learning_rate": 5.9715015754079605e-06, + "loss": 0.0868, + "step": 90530 + }, + { + "epoch": 4.22, + "learning_rate": 5.9707177903531744e-06, + "loss": 0.058, + "step": 90535 + }, + { + "epoch": 4.22, + "learning_rate": 5.9699340052983875e-06, + "loss": 0.0261, + "step": 90540 + }, + { + "epoch": 4.22, + "learning_rate": 5.969150220243601e-06, + "loss": 0.0416, + "step": 90545 + }, + { + "epoch": 4.23, + "learning_rate": 5.968366435188814e-06, + "loss": 0.0643, + "step": 90550 + }, + { + "epoch": 4.23, + "learning_rate": 5.9675826501340275e-06, + "loss": 0.0362, + "step": 90555 + }, + { + "epoch": 4.23, + "learning_rate": 5.9667988650792406e-06, + "loss": 0.1408, + "step": 90560 + }, + { + "epoch": 4.23, + "learning_rate": 5.9660150800244545e-06, + "loss": 0.1165, + "step": 90565 + }, + { + "epoch": 4.23, + "learning_rate": 5.9652312949696675e-06, + "loss": 0.0919, + "step": 90570 + }, + { + "epoch": 4.23, + "learning_rate": 5.9644475099148814e-06, + "loss": 0.2328, + "step": 90575 + }, + { + "epoch": 4.23, + "learning_rate": 5.9636637248600945e-06, + "loss": 0.1449, + "step": 90580 + }, + { + "epoch": 4.23, + "learning_rate": 5.962879939805308e-06, + "loss": 0.01, + "step": 90585 + }, + { + "epoch": 4.23, + "learning_rate": 5.9620961547505215e-06, + "loss": 0.0488, + "step": 90590 + }, + { + "epoch": 4.23, + "learning_rate": 5.961312369695735e-06, + "loss": 0.0404, + "step": 90595 + }, + { + "epoch": 4.23, + "learning_rate": 5.960528584640949e-06, + "loss": 0.0393, + "step": 90600 + }, + { + "epoch": 4.23, + "learning_rate": 5.959744799586162e-06, + "loss": 0.0907, + "step": 90605 + }, + { + "epoch": 4.23, + "learning_rate": 5.958961014531376e-06, + "loss": 0.0856, + "step": 90610 + }, + { + "epoch": 4.23, + "learning_rate": 5.9581772294765884e-06, + "loss": 0.0902, + "step": 90615 + }, + { + "epoch": 4.23, + "learning_rate": 5.9573934444218015e-06, + "loss": 0.1232, + "step": 90620 + }, + { + "epoch": 4.23, + "learning_rate": 5.956609659367015e-06, + "loss": 0.2334, + "step": 90625 + }, + { + "epoch": 4.23, + "learning_rate": 5.9558258743122285e-06, + "loss": 0.0768, + "step": 90630 + }, + { + "epoch": 4.23, + "learning_rate": 5.955042089257442e-06, + "loss": 0.0328, + "step": 90635 + }, + { + "epoch": 4.23, + "learning_rate": 5.9542583042026554e-06, + "loss": 0.0109, + "step": 90640 + }, + { + "epoch": 4.23, + "learning_rate": 5.953474519147869e-06, + "loss": 0.0723, + "step": 90645 + }, + { + "epoch": 4.23, + "learning_rate": 5.952690734093083e-06, + "loss": 0.1425, + "step": 90650 + }, + { + "epoch": 4.23, + "learning_rate": 5.951906949038296e-06, + "loss": 0.0614, + "step": 90655 + }, + { + "epoch": 4.23, + "learning_rate": 5.95112316398351e-06, + "loss": 0.1139, + "step": 90660 + }, + { + "epoch": 4.23, + "learning_rate": 5.950339378928723e-06, + "loss": 0.1506, + "step": 90665 + }, + { + "epoch": 4.23, + "learning_rate": 5.949555593873937e-06, + "loss": 0.1338, + "step": 90670 + }, + { + "epoch": 4.23, + "learning_rate": 5.94877180881915e-06, + "loss": 0.2628, + "step": 90675 + }, + { + "epoch": 4.23, + "learning_rate": 5.9479880237643624e-06, + "loss": 0.1299, + "step": 90680 + }, + { + "epoch": 4.23, + "learning_rate": 5.947204238709576e-06, + "loss": 0.0146, + "step": 90685 + }, + { + "epoch": 4.23, + "learning_rate": 5.946420453654789e-06, + "loss": 0.0128, + "step": 90690 + }, + { + "epoch": 4.23, + "learning_rate": 5.945636668600003e-06, + "loss": 0.0892, + "step": 90695 + }, + { + "epoch": 4.23, + "learning_rate": 5.944852883545217e-06, + "loss": 0.0325, + "step": 90700 + }, + { + "epoch": 4.23, + "learning_rate": 5.94406909849043e-06, + "loss": 0.0615, + "step": 90705 + }, + { + "epoch": 4.23, + "learning_rate": 5.943285313435644e-06, + "loss": 0.0741, + "step": 90710 + }, + { + "epoch": 4.23, + "learning_rate": 5.942501528380857e-06, + "loss": 0.0939, + "step": 90715 + }, + { + "epoch": 4.23, + "learning_rate": 5.941717743326071e-06, + "loss": 0.1592, + "step": 90720 + }, + { + "epoch": 4.23, + "learning_rate": 5.940933958271284e-06, + "loss": 0.2863, + "step": 90725 + }, + { + "epoch": 4.23, + "learning_rate": 5.940150173216498e-06, + "loss": 0.0575, + "step": 90730 + }, + { + "epoch": 4.23, + "learning_rate": 5.939366388161711e-06, + "loss": 0.017, + "step": 90735 + }, + { + "epoch": 4.23, + "learning_rate": 5.938582603106925e-06, + "loss": 0.0257, + "step": 90740 + }, + { + "epoch": 4.23, + "learning_rate": 5.937798818052137e-06, + "loss": 0.0217, + "step": 90745 + }, + { + "epoch": 4.23, + "learning_rate": 5.937015032997351e-06, + "loss": 0.0478, + "step": 90750 + }, + { + "epoch": 4.23, + "learning_rate": 5.936231247942564e-06, + "loss": 0.0328, + "step": 90755 + }, + { + "epoch": 4.23, + "learning_rate": 5.935447462887778e-06, + "loss": 0.1046, + "step": 90760 + }, + { + "epoch": 4.24, + "learning_rate": 5.934663677832991e-06, + "loss": 0.07, + "step": 90765 + }, + { + "epoch": 4.24, + "learning_rate": 5.933879892778205e-06, + "loss": 0.1482, + "step": 90770 + }, + { + "epoch": 4.24, + "learning_rate": 5.933096107723418e-06, + "loss": 0.312, + "step": 90775 + }, + { + "epoch": 4.24, + "learning_rate": 5.932312322668632e-06, + "loss": 0.0711, + "step": 90780 + }, + { + "epoch": 4.24, + "learning_rate": 5.931528537613845e-06, + "loss": 0.0331, + "step": 90785 + }, + { + "epoch": 4.24, + "learning_rate": 5.930744752559059e-06, + "loss": 0.0518, + "step": 90790 + }, + { + "epoch": 4.24, + "learning_rate": 5.929960967504272e-06, + "loss": 0.0243, + "step": 90795 + }, + { + "epoch": 4.24, + "learning_rate": 5.929177182449486e-06, + "loss": 0.0966, + "step": 90800 + }, + { + "epoch": 4.24, + "learning_rate": 5.928393397394699e-06, + "loss": 0.0981, + "step": 90805 + }, + { + "epoch": 4.24, + "learning_rate": 5.927609612339912e-06, + "loss": 0.1148, + "step": 90810 + }, + { + "epoch": 4.24, + "learning_rate": 5.926825827285125e-06, + "loss": 0.119, + "step": 90815 + }, + { + "epoch": 4.24, + "learning_rate": 5.926042042230339e-06, + "loss": 0.1786, + "step": 90820 + }, + { + "epoch": 4.24, + "learning_rate": 5.925258257175552e-06, + "loss": 0.3331, + "step": 90825 + }, + { + "epoch": 4.24, + "learning_rate": 5.924474472120766e-06, + "loss": 0.074, + "step": 90830 + }, + { + "epoch": 4.24, + "learning_rate": 5.923690687065979e-06, + "loss": 0.0332, + "step": 90835 + }, + { + "epoch": 4.24, + "learning_rate": 5.922906902011193e-06, + "loss": 0.0213, + "step": 90840 + }, + { + "epoch": 4.24, + "learning_rate": 5.922123116956406e-06, + "loss": 0.0228, + "step": 90845 + }, + { + "epoch": 4.24, + "learning_rate": 5.92133933190162e-06, + "loss": 0.0234, + "step": 90850 + }, + { + "epoch": 4.24, + "learning_rate": 5.920555546846833e-06, + "loss": 0.0954, + "step": 90855 + }, + { + "epoch": 4.24, + "learning_rate": 5.919771761792047e-06, + "loss": 0.0538, + "step": 90860 + }, + { + "epoch": 4.24, + "learning_rate": 5.918987976737261e-06, + "loss": 0.1178, + "step": 90865 + }, + { + "epoch": 4.24, + "learning_rate": 5.918204191682474e-06, + "loss": 0.1837, + "step": 90870 + }, + { + "epoch": 4.24, + "learning_rate": 5.917420406627686e-06, + "loss": 0.3523, + "step": 90875 + }, + { + "epoch": 4.24, + "learning_rate": 5.9166366215729e-06, + "loss": 0.078, + "step": 90880 + }, + { + "epoch": 4.24, + "learning_rate": 5.915852836518113e-06, + "loss": 0.0019, + "step": 90885 + }, + { + "epoch": 4.24, + "learning_rate": 5.915069051463327e-06, + "loss": 0.0377, + "step": 90890 + }, + { + "epoch": 4.24, + "learning_rate": 5.91428526640854e-06, + "loss": 0.0181, + "step": 90895 + }, + { + "epoch": 4.24, + "learning_rate": 5.913501481353754e-06, + "loss": 0.0752, + "step": 90900 + }, + { + "epoch": 4.24, + "learning_rate": 5.912717696298967e-06, + "loss": 0.1215, + "step": 90905 + }, + { + "epoch": 4.24, + "learning_rate": 5.911933911244181e-06, + "loss": 0.0927, + "step": 90910 + }, + { + "epoch": 4.24, + "learning_rate": 5.911150126189395e-06, + "loss": 0.1609, + "step": 90915 + }, + { + "epoch": 4.24, + "learning_rate": 5.910366341134608e-06, + "loss": 0.1373, + "step": 90920 + }, + { + "epoch": 4.24, + "learning_rate": 5.909582556079822e-06, + "loss": 0.167, + "step": 90925 + }, + { + "epoch": 4.24, + "learning_rate": 5.908798771025035e-06, + "loss": 0.0965, + "step": 90930 + }, + { + "epoch": 4.24, + "learning_rate": 5.908014985970249e-06, + "loss": 0.0219, + "step": 90935 + }, + { + "epoch": 4.24, + "learning_rate": 5.907231200915461e-06, + "loss": 0.0882, + "step": 90940 + }, + { + "epoch": 4.24, + "learning_rate": 5.906447415860674e-06, + "loss": 0.0619, + "step": 90945 + }, + { + "epoch": 4.24, + "learning_rate": 5.905663630805888e-06, + "loss": 0.0978, + "step": 90950 + }, + { + "epoch": 4.24, + "learning_rate": 5.904879845751101e-06, + "loss": 0.0855, + "step": 90955 + }, + { + "epoch": 4.24, + "learning_rate": 5.904096060696315e-06, + "loss": 0.1471, + "step": 90960 + }, + { + "epoch": 4.24, + "learning_rate": 5.903312275641529e-06, + "loss": 0.184, + "step": 90965 + }, + { + "epoch": 4.24, + "learning_rate": 5.902528490586742e-06, + "loss": 0.1576, + "step": 90970 + }, + { + "epoch": 4.25, + "learning_rate": 5.901744705531956e-06, + "loss": 0.2986, + "step": 90975 + }, + { + "epoch": 4.25, + "learning_rate": 5.900960920477169e-06, + "loss": 0.1072, + "step": 90980 + }, + { + "epoch": 4.25, + "learning_rate": 5.900177135422383e-06, + "loss": 0.0297, + "step": 90985 + }, + { + "epoch": 4.25, + "learning_rate": 5.899393350367596e-06, + "loss": 0.0241, + "step": 90990 + }, + { + "epoch": 4.25, + "learning_rate": 5.89860956531281e-06, + "loss": 0.11, + "step": 90995 + }, + { + "epoch": 4.25, + "learning_rate": 5.897825780258023e-06, + "loss": 0.0495, + "step": 91000 + }, + { + "epoch": 4.25, + "learning_rate": 5.897041995203235e-06, + "loss": 0.1527, + "step": 91005 + }, + { + "epoch": 4.25, + "learning_rate": 5.896258210148449e-06, + "loss": 0.1269, + "step": 91010 + }, + { + "epoch": 4.25, + "learning_rate": 5.895474425093663e-06, + "loss": 0.0705, + "step": 91015 + }, + { + "epoch": 4.25, + "learning_rate": 5.894690640038876e-06, + "loss": 0.1735, + "step": 91020 + }, + { + "epoch": 4.25, + "learning_rate": 5.89390685498409e-06, + "loss": 0.4046, + "step": 91025 + }, + { + "epoch": 4.25, + "learning_rate": 5.893123069929303e-06, + "loss": 0.0778, + "step": 91030 + }, + { + "epoch": 4.25, + "learning_rate": 5.892339284874517e-06, + "loss": 0.0062, + "step": 91035 + }, + { + "epoch": 4.25, + "learning_rate": 5.89155549981973e-06, + "loss": 0.0657, + "step": 91040 + }, + { + "epoch": 4.25, + "learning_rate": 5.890771714764944e-06, + "loss": 0.0449, + "step": 91045 + }, + { + "epoch": 4.25, + "learning_rate": 5.889987929710157e-06, + "loss": 0.0381, + "step": 91050 + }, + { + "epoch": 4.25, + "learning_rate": 5.889204144655371e-06, + "loss": 0.0849, + "step": 91055 + }, + { + "epoch": 4.25, + "learning_rate": 5.888420359600584e-06, + "loss": 0.1032, + "step": 91060 + }, + { + "epoch": 4.25, + "learning_rate": 5.887636574545798e-06, + "loss": 0.1318, + "step": 91065 + }, + { + "epoch": 4.25, + "learning_rate": 5.886852789491011e-06, + "loss": 0.2191, + "step": 91070 + }, + { + "epoch": 4.25, + "learning_rate": 5.886069004436224e-06, + "loss": 0.1968, + "step": 91075 + }, + { + "epoch": 4.25, + "learning_rate": 5.885285219381437e-06, + "loss": 0.0889, + "step": 91080 + }, + { + "epoch": 4.25, + "learning_rate": 5.884501434326651e-06, + "loss": 0.0098, + "step": 91085 + }, + { + "epoch": 4.25, + "learning_rate": 5.883717649271864e-06, + "loss": 0.0416, + "step": 91090 + }, + { + "epoch": 4.25, + "learning_rate": 5.882933864217078e-06, + "loss": 0.0278, + "step": 91095 + }, + { + "epoch": 4.25, + "learning_rate": 5.882150079162291e-06, + "loss": 0.1008, + "step": 91100 + }, + { + "epoch": 4.25, + "learning_rate": 5.881366294107505e-06, + "loss": 0.1075, + "step": 91105 + }, + { + "epoch": 4.25, + "learning_rate": 5.880582509052718e-06, + "loss": 0.1326, + "step": 91110 + }, + { + "epoch": 4.25, + "learning_rate": 5.879798723997932e-06, + "loss": 0.1041, + "step": 91115 + }, + { + "epoch": 4.25, + "learning_rate": 5.879014938943145e-06, + "loss": 0.1203, + "step": 91120 + }, + { + "epoch": 4.25, + "learning_rate": 5.8782311538883586e-06, + "loss": 0.3442, + "step": 91125 + }, + { + "epoch": 4.25, + "learning_rate": 5.877447368833572e-06, + "loss": 0.0813, + "step": 91130 + }, + { + "epoch": 4.25, + "learning_rate": 5.8766635837787855e-06, + "loss": 0.0219, + "step": 91135 + }, + { + "epoch": 4.25, + "learning_rate": 5.875879798723998e-06, + "loss": 0.0666, + "step": 91140 + }, + { + "epoch": 4.25, + "learning_rate": 5.875096013669212e-06, + "loss": 0.0523, + "step": 91145 + }, + { + "epoch": 4.25, + "learning_rate": 5.874312228614425e-06, + "loss": 0.0992, + "step": 91150 + }, + { + "epoch": 4.25, + "learning_rate": 5.873528443559639e-06, + "loss": 0.0499, + "step": 91155 + }, + { + "epoch": 4.25, + "learning_rate": 5.872744658504852e-06, + "loss": 0.1551, + "step": 91160 + }, + { + "epoch": 4.25, + "learning_rate": 5.8719608734500656e-06, + "loss": 0.1377, + "step": 91165 + }, + { + "epoch": 4.25, + "learning_rate": 5.871177088395279e-06, + "loss": 0.2001, + "step": 91170 + }, + { + "epoch": 4.25, + "learning_rate": 5.8703933033404925e-06, + "loss": 0.3385, + "step": 91175 + }, + { + "epoch": 4.25, + "learning_rate": 5.8696095182857064e-06, + "loss": 0.0546, + "step": 91180 + }, + { + "epoch": 4.25, + "learning_rate": 5.8688257332309195e-06, + "loss": 0.0133, + "step": 91185 + }, + { + "epoch": 4.26, + "learning_rate": 5.868041948176133e-06, + "loss": 0.0212, + "step": 91190 + }, + { + "epoch": 4.26, + "learning_rate": 5.8672581631213465e-06, + "loss": 0.1172, + "step": 91195 + }, + { + "epoch": 4.26, + "learning_rate": 5.86647437806656e-06, + "loss": 0.0705, + "step": 91200 + }, + { + "epoch": 4.26, + "learning_rate": 5.865690593011773e-06, + "loss": 0.039, + "step": 91205 + }, + { + "epoch": 4.26, + "learning_rate": 5.864906807956986e-06, + "loss": 0.0975, + "step": 91210 + }, + { + "epoch": 4.26, + "learning_rate": 5.8641230229021995e-06, + "loss": 0.1576, + "step": 91215 + }, + { + "epoch": 4.26, + "learning_rate": 5.863339237847413e-06, + "loss": 0.1465, + "step": 91220 + }, + { + "epoch": 4.26, + "learning_rate": 5.8625554527926265e-06, + "loss": 0.2648, + "step": 91225 + }, + { + "epoch": 4.26, + "learning_rate": 5.86177166773784e-06, + "loss": 0.0647, + "step": 91230 + }, + { + "epoch": 4.26, + "learning_rate": 5.8609878826830535e-06, + "loss": 0.0208, + "step": 91235 + }, + { + "epoch": 4.26, + "learning_rate": 5.860204097628267e-06, + "loss": 0.0095, + "step": 91240 + }, + { + "epoch": 4.26, + "learning_rate": 5.8594203125734804e-06, + "loss": 0.019, + "step": 91245 + }, + { + "epoch": 4.26, + "learning_rate": 5.858636527518694e-06, + "loss": 0.054, + "step": 91250 + }, + { + "epoch": 4.26, + "learning_rate": 5.857852742463907e-06, + "loss": 0.0704, + "step": 91255 + }, + { + "epoch": 4.26, + "learning_rate": 5.857068957409121e-06, + "loss": 0.0985, + "step": 91260 + }, + { + "epoch": 4.26, + "learning_rate": 5.856285172354334e-06, + "loss": 0.1225, + "step": 91265 + }, + { + "epoch": 4.26, + "learning_rate": 5.8555013872995466e-06, + "loss": 0.117, + "step": 91270 + }, + { + "epoch": 4.26, + "learning_rate": 5.8547176022447605e-06, + "loss": 0.2884, + "step": 91275 + }, + { + "epoch": 4.26, + "learning_rate": 5.853933817189974e-06, + "loss": 0.0266, + "step": 91280 + }, + { + "epoch": 4.26, + "learning_rate": 5.8531500321351874e-06, + "loss": 0.0104, + "step": 91285 + }, + { + "epoch": 4.26, + "learning_rate": 5.852366247080401e-06, + "loss": 0.0138, + "step": 91290 + }, + { + "epoch": 4.26, + "learning_rate": 5.851582462025614e-06, + "loss": 0.0621, + "step": 91295 + }, + { + "epoch": 4.26, + "learning_rate": 5.850798676970828e-06, + "loss": 0.0707, + "step": 91300 + }, + { + "epoch": 4.26, + "learning_rate": 5.850014891916041e-06, + "loss": 0.1172, + "step": 91305 + }, + { + "epoch": 4.26, + "learning_rate": 5.849231106861255e-06, + "loss": 0.0875, + "step": 91310 + }, + { + "epoch": 4.26, + "learning_rate": 5.848447321806468e-06, + "loss": 0.134, + "step": 91315 + }, + { + "epoch": 4.26, + "learning_rate": 5.847663536751682e-06, + "loss": 0.2569, + "step": 91320 + }, + { + "epoch": 4.26, + "learning_rate": 5.846879751696895e-06, + "loss": 0.3717, + "step": 91325 + }, + { + "epoch": 4.26, + "learning_rate": 5.846095966642109e-06, + "loss": 0.1126, + "step": 91330 + }, + { + "epoch": 4.26, + "learning_rate": 5.845312181587321e-06, + "loss": 0.023, + "step": 91335 + }, + { + "epoch": 4.26, + "learning_rate": 5.844528396532535e-06, + "loss": 0.0438, + "step": 91340 + }, + { + "epoch": 4.26, + "learning_rate": 5.843744611477748e-06, + "loss": 0.0307, + "step": 91345 + }, + { + "epoch": 4.26, + "learning_rate": 5.842960826422962e-06, + "loss": 0.0288, + "step": 91350 + }, + { + "epoch": 4.26, + "learning_rate": 5.842177041368175e-06, + "loss": 0.0755, + "step": 91355 + }, + { + "epoch": 4.26, + "learning_rate": 5.841393256313389e-06, + "loss": 0.121, + "step": 91360 + }, + { + "epoch": 4.26, + "learning_rate": 5.840609471258602e-06, + "loss": 0.077, + "step": 91365 + }, + { + "epoch": 4.26, + "learning_rate": 5.839825686203816e-06, + "loss": 0.2005, + "step": 91370 + }, + { + "epoch": 4.26, + "learning_rate": 5.839041901149029e-06, + "loss": 0.2854, + "step": 91375 + }, + { + "epoch": 4.26, + "learning_rate": 5.838258116094243e-06, + "loss": 0.061, + "step": 91380 + }, + { + "epoch": 4.26, + "learning_rate": 5.837474331039456e-06, + "loss": 0.0745, + "step": 91385 + }, + { + "epoch": 4.26, + "learning_rate": 5.83669054598467e-06, + "loss": 0.011, + "step": 91390 + }, + { + "epoch": 4.26, + "learning_rate": 5.835906760929883e-06, + "loss": 0.0118, + "step": 91395 + }, + { + "epoch": 4.26, + "learning_rate": 5.835122975875096e-06, + "loss": 0.0725, + "step": 91400 + }, + { + "epoch": 4.27, + "learning_rate": 5.834339190820309e-06, + "loss": 0.1007, + "step": 91405 + }, + { + "epoch": 4.27, + "learning_rate": 5.833555405765523e-06, + "loss": 0.1028, + "step": 91410 + }, + { + "epoch": 4.27, + "learning_rate": 5.832771620710736e-06, + "loss": 0.0941, + "step": 91415 + }, + { + "epoch": 4.27, + "learning_rate": 5.83198783565595e-06, + "loss": 0.1198, + "step": 91420 + }, + { + "epoch": 4.27, + "learning_rate": 5.831204050601163e-06, + "loss": 0.2649, + "step": 91425 + }, + { + "epoch": 4.27, + "learning_rate": 5.830420265546377e-06, + "loss": 0.0966, + "step": 91430 + }, + { + "epoch": 4.27, + "learning_rate": 5.82963648049159e-06, + "loss": 0.026, + "step": 91435 + }, + { + "epoch": 4.27, + "learning_rate": 5.828852695436804e-06, + "loss": 0.02, + "step": 91440 + }, + { + "epoch": 4.27, + "learning_rate": 5.828068910382018e-06, + "loss": 0.0755, + "step": 91445 + }, + { + "epoch": 4.27, + "learning_rate": 5.827285125327231e-06, + "loss": 0.085, + "step": 91450 + }, + { + "epoch": 4.27, + "learning_rate": 5.826501340272445e-06, + "loss": 0.1138, + "step": 91455 + }, + { + "epoch": 4.27, + "learning_rate": 5.825717555217658e-06, + "loss": 0.0904, + "step": 91460 + }, + { + "epoch": 4.27, + "learning_rate": 5.82493377016287e-06, + "loss": 0.148, + "step": 91465 + }, + { + "epoch": 4.27, + "learning_rate": 5.824149985108084e-06, + "loss": 0.2005, + "step": 91470 + }, + { + "epoch": 4.27, + "learning_rate": 5.823366200053297e-06, + "loss": 0.2441, + "step": 91475 + }, + { + "epoch": 4.27, + "learning_rate": 5.822582414998511e-06, + "loss": 0.1025, + "step": 91480 + }, + { + "epoch": 4.27, + "learning_rate": 5.821798629943724e-06, + "loss": 0.0482, + "step": 91485 + }, + { + "epoch": 4.27, + "learning_rate": 5.821014844888938e-06, + "loss": 0.0478, + "step": 91490 + }, + { + "epoch": 4.27, + "learning_rate": 5.820231059834152e-06, + "loss": 0.085, + "step": 91495 + }, + { + "epoch": 4.27, + "learning_rate": 5.819447274779365e-06, + "loss": 0.0428, + "step": 91500 + }, + { + "epoch": 4.27, + "learning_rate": 5.818663489724579e-06, + "loss": 0.0218, + "step": 91505 + }, + { + "epoch": 4.27, + "learning_rate": 5.817879704669792e-06, + "loss": 0.1121, + "step": 91510 + }, + { + "epoch": 4.27, + "learning_rate": 5.817095919615006e-06, + "loss": 0.1174, + "step": 91515 + }, + { + "epoch": 4.27, + "learning_rate": 5.816312134560219e-06, + "loss": 0.1658, + "step": 91520 + }, + { + "epoch": 4.27, + "learning_rate": 5.815528349505433e-06, + "loss": 0.2823, + "step": 91525 + }, + { + "epoch": 4.27, + "learning_rate": 5.814744564450645e-06, + "loss": 0.0966, + "step": 91530 + }, + { + "epoch": 4.27, + "learning_rate": 5.813960779395858e-06, + "loss": 0.0231, + "step": 91535 + }, + { + "epoch": 4.27, + "learning_rate": 5.813176994341072e-06, + "loss": 0.0423, + "step": 91540 + }, + { + "epoch": 4.27, + "learning_rate": 5.812393209286286e-06, + "loss": 0.0486, + "step": 91545 + }, + { + "epoch": 4.27, + "learning_rate": 5.811609424231499e-06, + "loss": 0.044, + "step": 91550 + }, + { + "epoch": 4.27, + "learning_rate": 5.810825639176713e-06, + "loss": 0.0484, + "step": 91555 + }, + { + "epoch": 4.27, + "learning_rate": 5.810041854121926e-06, + "loss": 0.1279, + "step": 91560 + }, + { + "epoch": 4.27, + "learning_rate": 5.80925806906714e-06, + "loss": 0.0939, + "step": 91565 + }, + { + "epoch": 4.27, + "learning_rate": 5.808474284012353e-06, + "loss": 0.1795, + "step": 91570 + }, + { + "epoch": 4.27, + "learning_rate": 5.807690498957567e-06, + "loss": 0.2573, + "step": 91575 + }, + { + "epoch": 4.27, + "learning_rate": 5.80690671390278e-06, + "loss": 0.0619, + "step": 91580 + }, + { + "epoch": 4.27, + "learning_rate": 5.806122928847994e-06, + "loss": 0.0259, + "step": 91585 + }, + { + "epoch": 4.27, + "learning_rate": 5.805339143793207e-06, + "loss": 0.0099, + "step": 91590 + }, + { + "epoch": 4.27, + "learning_rate": 5.80455535873842e-06, + "loss": 0.0308, + "step": 91595 + }, + { + "epoch": 4.27, + "learning_rate": 5.803771573683633e-06, + "loss": 0.049, + "step": 91600 + }, + { + "epoch": 4.27, + "learning_rate": 5.802987788628847e-06, + "loss": 0.0281, + "step": 91605 + }, + { + "epoch": 4.27, + "learning_rate": 5.80220400357406e-06, + "loss": 0.0662, + "step": 91610 + }, + { + "epoch": 4.27, + "learning_rate": 5.801420218519274e-06, + "loss": 0.1175, + "step": 91615 + }, + { + "epoch": 4.28, + "learning_rate": 5.800636433464487e-06, + "loss": 0.135, + "step": 91620 + }, + { + "epoch": 4.28, + "learning_rate": 5.799852648409701e-06, + "loss": 0.1982, + "step": 91625 + }, + { + "epoch": 4.28, + "learning_rate": 5.799068863354914e-06, + "loss": 0.0358, + "step": 91630 + }, + { + "epoch": 4.28, + "learning_rate": 5.798285078300128e-06, + "loss": 0.0624, + "step": 91635 + }, + { + "epoch": 4.28, + "learning_rate": 5.797501293245341e-06, + "loss": 0.1193, + "step": 91640 + }, + { + "epoch": 4.28, + "learning_rate": 5.796717508190555e-06, + "loss": 0.0967, + "step": 91645 + }, + { + "epoch": 4.28, + "learning_rate": 5.795933723135768e-06, + "loss": 0.0787, + "step": 91650 + }, + { + "epoch": 4.28, + "learning_rate": 5.795149938080982e-06, + "loss": 0.0629, + "step": 91655 + }, + { + "epoch": 4.28, + "learning_rate": 5.794366153026194e-06, + "loss": 0.1216, + "step": 91660 + }, + { + "epoch": 4.28, + "learning_rate": 5.793582367971408e-06, + "loss": 0.1739, + "step": 91665 + }, + { + "epoch": 4.28, + "learning_rate": 5.792798582916621e-06, + "loss": 0.1603, + "step": 91670 + }, + { + "epoch": 4.28, + "learning_rate": 5.792014797861835e-06, + "loss": 0.2816, + "step": 91675 + }, + { + "epoch": 4.28, + "learning_rate": 5.791231012807048e-06, + "loss": 0.0642, + "step": 91680 + }, + { + "epoch": 4.28, + "learning_rate": 5.790447227752262e-06, + "loss": 0.0517, + "step": 91685 + }, + { + "epoch": 4.28, + "learning_rate": 5.789663442697475e-06, + "loss": 0.0424, + "step": 91690 + }, + { + "epoch": 4.28, + "learning_rate": 5.788879657642689e-06, + "loss": 0.0467, + "step": 91695 + }, + { + "epoch": 4.28, + "learning_rate": 5.788095872587902e-06, + "loss": 0.1118, + "step": 91700 + }, + { + "epoch": 4.28, + "learning_rate": 5.787312087533116e-06, + "loss": 0.0512, + "step": 91705 + }, + { + "epoch": 4.28, + "learning_rate": 5.786528302478329e-06, + "loss": 0.0556, + "step": 91710 + }, + { + "epoch": 4.28, + "learning_rate": 5.785744517423543e-06, + "loss": 0.1616, + "step": 91715 + }, + { + "epoch": 4.28, + "learning_rate": 5.784960732368757e-06, + "loss": 0.1424, + "step": 91720 + }, + { + "epoch": 4.28, + "learning_rate": 5.784176947313969e-06, + "loss": 0.361, + "step": 91725 + }, + { + "epoch": 4.28, + "learning_rate": 5.783393162259182e-06, + "loss": 0.0721, + "step": 91730 + }, + { + "epoch": 4.28, + "learning_rate": 5.782609377204396e-06, + "loss": 0.0352, + "step": 91735 + }, + { + "epoch": 4.28, + "learning_rate": 5.781825592149609e-06, + "loss": 0.0332, + "step": 91740 + }, + { + "epoch": 4.28, + "learning_rate": 5.781041807094823e-06, + "loss": 0.0272, + "step": 91745 + }, + { + "epoch": 4.28, + "learning_rate": 5.780258022040036e-06, + "loss": 0.0199, + "step": 91750 + }, + { + "epoch": 4.28, + "learning_rate": 5.77947423698525e-06, + "loss": 0.1228, + "step": 91755 + }, + { + "epoch": 4.28, + "learning_rate": 5.778690451930464e-06, + "loss": 0.106, + "step": 91760 + }, + { + "epoch": 4.28, + "learning_rate": 5.777906666875677e-06, + "loss": 0.1563, + "step": 91765 + }, + { + "epoch": 4.28, + "learning_rate": 5.7771228818208906e-06, + "loss": 0.1254, + "step": 91770 + }, + { + "epoch": 4.28, + "learning_rate": 5.776339096766104e-06, + "loss": 0.2847, + "step": 91775 + }, + { + "epoch": 4.28, + "learning_rate": 5.7755553117113175e-06, + "loss": 0.1171, + "step": 91780 + }, + { + "epoch": 4.28, + "learning_rate": 5.774771526656531e-06, + "loss": 0.0268, + "step": 91785 + }, + { + "epoch": 4.28, + "learning_rate": 5.773987741601743e-06, + "loss": 0.0125, + "step": 91790 + }, + { + "epoch": 4.28, + "learning_rate": 5.773203956546957e-06, + "loss": 0.0511, + "step": 91795 + }, + { + "epoch": 4.28, + "learning_rate": 5.77242017149217e-06, + "loss": 0.0268, + "step": 91800 + }, + { + "epoch": 4.28, + "learning_rate": 5.771636386437384e-06, + "loss": 0.0658, + "step": 91805 + }, + { + "epoch": 4.28, + "learning_rate": 5.770852601382598e-06, + "loss": 0.0453, + "step": 91810 + }, + { + "epoch": 4.28, + "learning_rate": 5.770068816327811e-06, + "loss": 0.1421, + "step": 91815 + }, + { + "epoch": 4.28, + "learning_rate": 5.7692850312730245e-06, + "loss": 0.1635, + "step": 91820 + }, + { + "epoch": 4.28, + "learning_rate": 5.768501246218238e-06, + "loss": 0.3275, + "step": 91825 + }, + { + "epoch": 4.28, + "learning_rate": 5.7677174611634515e-06, + "loss": 0.0868, + "step": 91830 + }, + { + "epoch": 4.29, + "learning_rate": 5.7669336761086646e-06, + "loss": 0.0254, + "step": 91835 + }, + { + "epoch": 4.29, + "learning_rate": 5.7661498910538785e-06, + "loss": 0.0464, + "step": 91840 + }, + { + "epoch": 4.29, + "learning_rate": 5.7653661059990915e-06, + "loss": 0.0873, + "step": 91845 + }, + { + "epoch": 4.29, + "learning_rate": 5.7645823209443054e-06, + "loss": 0.1071, + "step": 91850 + }, + { + "epoch": 4.29, + "learning_rate": 5.763798535889518e-06, + "loss": 0.0801, + "step": 91855 + }, + { + "epoch": 4.29, + "learning_rate": 5.7630147508347316e-06, + "loss": 0.11, + "step": 91860 + }, + { + "epoch": 4.29, + "learning_rate": 5.762230965779945e-06, + "loss": 0.0467, + "step": 91865 + }, + { + "epoch": 4.29, + "learning_rate": 5.7614471807251585e-06, + "loss": 0.2116, + "step": 91870 + }, + { + "epoch": 4.29, + "learning_rate": 5.7606633956703716e-06, + "loss": 0.2985, + "step": 91875 + }, + { + "epoch": 4.29, + "learning_rate": 5.7598796106155855e-06, + "loss": 0.0908, + "step": 91880 + }, + { + "epoch": 4.29, + "learning_rate": 5.7590958255607985e-06, + "loss": 0.0278, + "step": 91885 + }, + { + "epoch": 4.29, + "learning_rate": 5.7583120405060124e-06, + "loss": 0.0273, + "step": 91890 + }, + { + "epoch": 4.29, + "learning_rate": 5.7575282554512255e-06, + "loss": 0.0394, + "step": 91895 + }, + { + "epoch": 4.29, + "learning_rate": 5.756744470396439e-06, + "loss": 0.0433, + "step": 91900 + }, + { + "epoch": 4.29, + "learning_rate": 5.7559606853416525e-06, + "loss": 0.0555, + "step": 91905 + }, + { + "epoch": 4.29, + "learning_rate": 5.755176900286866e-06, + "loss": 0.0859, + "step": 91910 + }, + { + "epoch": 4.29, + "learning_rate": 5.7543931152320794e-06, + "loss": 0.0759, + "step": 91915 + }, + { + "epoch": 4.29, + "learning_rate": 5.7536093301772925e-06, + "loss": 0.092, + "step": 91920 + }, + { + "epoch": 4.29, + "learning_rate": 5.7528255451225056e-06, + "loss": 0.3305, + "step": 91925 + }, + { + "epoch": 4.29, + "learning_rate": 5.7520417600677195e-06, + "loss": 0.0687, + "step": 91930 + }, + { + "epoch": 4.29, + "learning_rate": 5.7512579750129325e-06, + "loss": 0.0257, + "step": 91935 + }, + { + "epoch": 4.29, + "learning_rate": 5.750474189958146e-06, + "loss": 0.0409, + "step": 91940 + }, + { + "epoch": 4.29, + "learning_rate": 5.7496904049033595e-06, + "loss": 0.0293, + "step": 91945 + }, + { + "epoch": 4.29, + "learning_rate": 5.748906619848573e-06, + "loss": 0.0237, + "step": 91950 + }, + { + "epoch": 4.29, + "learning_rate": 5.7481228347937864e-06, + "loss": 0.1458, + "step": 91955 + }, + { + "epoch": 4.29, + "learning_rate": 5.747339049739e-06, + "loss": 0.0827, + "step": 91960 + }, + { + "epoch": 4.29, + "learning_rate": 5.746555264684213e-06, + "loss": 0.0964, + "step": 91965 + }, + { + "epoch": 4.29, + "learning_rate": 5.745771479629427e-06, + "loss": 0.2677, + "step": 91970 + }, + { + "epoch": 4.29, + "learning_rate": 5.74498769457464e-06, + "loss": 0.3508, + "step": 91975 + }, + { + "epoch": 4.29, + "learning_rate": 5.744203909519854e-06, + "loss": 0.0687, + "step": 91980 + }, + { + "epoch": 4.29, + "learning_rate": 5.7434201244650665e-06, + "loss": 0.0129, + "step": 91985 + }, + { + "epoch": 4.29, + "learning_rate": 5.74263633941028e-06, + "loss": 0.0404, + "step": 91990 + }, + { + "epoch": 4.29, + "learning_rate": 5.7418525543554935e-06, + "loss": 0.0596, + "step": 91995 + }, + { + "epoch": 4.29, + "learning_rate": 5.741068769300707e-06, + "loss": 0.0869, + "step": 92000 + }, + { + "epoch": 4.29, + "learning_rate": 5.74028498424592e-06, + "loss": 0.0776, + "step": 92005 + }, + { + "epoch": 4.29, + "learning_rate": 5.739501199191134e-06, + "loss": 0.1546, + "step": 92010 + }, + { + "epoch": 4.29, + "learning_rate": 5.738717414136347e-06, + "loss": 0.19, + "step": 92015 + }, + { + "epoch": 4.29, + "learning_rate": 5.737933629081561e-06, + "loss": 0.1823, + "step": 92020 + }, + { + "epoch": 4.29, + "learning_rate": 5.737149844026774e-06, + "loss": 0.3525, + "step": 92025 + }, + { + "epoch": 4.29, + "learning_rate": 5.736366058971988e-06, + "loss": 0.0582, + "step": 92030 + }, + { + "epoch": 4.29, + "learning_rate": 5.735582273917202e-06, + "loss": 0.014, + "step": 92035 + }, + { + "epoch": 4.29, + "learning_rate": 5.734798488862415e-06, + "loss": 0.0688, + "step": 92040 + }, + { + "epoch": 4.29, + "learning_rate": 5.734014703807629e-06, + "loss": 0.0203, + "step": 92045 + }, + { + "epoch": 4.3, + "learning_rate": 5.733230918752841e-06, + "loss": 0.0462, + "step": 92050 + }, + { + "epoch": 4.3, + "learning_rate": 5.732447133698054e-06, + "loss": 0.0837, + "step": 92055 + }, + { + "epoch": 4.3, + "learning_rate": 5.731663348643268e-06, + "loss": 0.0777, + "step": 92060 + }, + { + "epoch": 4.3, + "learning_rate": 5.730879563588481e-06, + "loss": 0.1023, + "step": 92065 + }, + { + "epoch": 4.3, + "learning_rate": 5.730095778533695e-06, + "loss": 0.3469, + "step": 92070 + }, + { + "epoch": 4.3, + "learning_rate": 5.729311993478909e-06, + "loss": 0.3225, + "step": 92075 + }, + { + "epoch": 4.3, + "learning_rate": 5.728528208424122e-06, + "loss": 0.1317, + "step": 92080 + }, + { + "epoch": 4.3, + "learning_rate": 5.727744423369336e-06, + "loss": 0.0505, + "step": 92085 + }, + { + "epoch": 4.3, + "learning_rate": 5.726960638314549e-06, + "loss": 0.0072, + "step": 92090 + }, + { + "epoch": 4.3, + "learning_rate": 5.726176853259763e-06, + "loss": 0.0432, + "step": 92095 + }, + { + "epoch": 4.3, + "learning_rate": 5.725393068204976e-06, + "loss": 0.0355, + "step": 92100 + }, + { + "epoch": 4.3, + "learning_rate": 5.72460928315019e-06, + "loss": 0.081, + "step": 92105 + }, + { + "epoch": 4.3, + "learning_rate": 5.723825498095403e-06, + "loss": 0.1141, + "step": 92110 + }, + { + "epoch": 4.3, + "learning_rate": 5.723041713040615e-06, + "loss": 0.0998, + "step": 92115 + }, + { + "epoch": 4.3, + "learning_rate": 5.722257927985829e-06, + "loss": 0.1343, + "step": 92120 + }, + { + "epoch": 4.3, + "learning_rate": 5.721474142931043e-06, + "loss": 0.2115, + "step": 92125 + }, + { + "epoch": 4.3, + "learning_rate": 5.720690357876256e-06, + "loss": 0.1211, + "step": 92130 + }, + { + "epoch": 4.3, + "learning_rate": 5.71990657282147e-06, + "loss": 0.0248, + "step": 92135 + }, + { + "epoch": 4.3, + "learning_rate": 5.719122787766683e-06, + "loss": 0.0224, + "step": 92140 + }, + { + "epoch": 4.3, + "learning_rate": 5.718339002711897e-06, + "loss": 0.02, + "step": 92145 + }, + { + "epoch": 4.3, + "learning_rate": 5.71755521765711e-06, + "loss": 0.0683, + "step": 92150 + }, + { + "epoch": 4.3, + "learning_rate": 5.716771432602324e-06, + "loss": 0.1205, + "step": 92155 + }, + { + "epoch": 4.3, + "learning_rate": 5.715987647547537e-06, + "loss": 0.0763, + "step": 92160 + }, + { + "epoch": 4.3, + "learning_rate": 5.715203862492751e-06, + "loss": 0.0742, + "step": 92165 + }, + { + "epoch": 4.3, + "learning_rate": 5.714420077437964e-06, + "loss": 0.1694, + "step": 92170 + }, + { + "epoch": 4.3, + "learning_rate": 5.713636292383178e-06, + "loss": 0.3072, + "step": 92175 + }, + { + "epoch": 4.3, + "learning_rate": 5.71285250732839e-06, + "loss": 0.0853, + "step": 92180 + }, + { + "epoch": 4.3, + "learning_rate": 5.712068722273604e-06, + "loss": 0.0238, + "step": 92185 + }, + { + "epoch": 4.3, + "learning_rate": 5.711284937218817e-06, + "loss": 0.0184, + "step": 92190 + }, + { + "epoch": 4.3, + "learning_rate": 5.710501152164031e-06, + "loss": 0.0385, + "step": 92195 + }, + { + "epoch": 4.3, + "learning_rate": 5.709717367109244e-06, + "loss": 0.0656, + "step": 92200 + }, + { + "epoch": 4.3, + "learning_rate": 5.708933582054458e-06, + "loss": 0.0727, + "step": 92205 + }, + { + "epoch": 4.3, + "learning_rate": 5.708149796999671e-06, + "loss": 0.0878, + "step": 92210 + }, + { + "epoch": 4.3, + "learning_rate": 5.707366011944885e-06, + "loss": 0.084, + "step": 92215 + }, + { + "epoch": 4.3, + "learning_rate": 5.706582226890098e-06, + "loss": 0.2025, + "step": 92220 + }, + { + "epoch": 4.3, + "learning_rate": 5.705798441835312e-06, + "loss": 0.3276, + "step": 92225 + }, + { + "epoch": 4.3, + "learning_rate": 5.705014656780525e-06, + "loss": 0.0611, + "step": 92230 + }, + { + "epoch": 4.3, + "learning_rate": 5.704230871725739e-06, + "loss": 0.1299, + "step": 92235 + }, + { + "epoch": 4.3, + "learning_rate": 5.703447086670952e-06, + "loss": 0.0497, + "step": 92240 + }, + { + "epoch": 4.3, + "learning_rate": 5.702663301616165e-06, + "loss": 0.0629, + "step": 92245 + }, + { + "epoch": 4.3, + "learning_rate": 5.701879516561378e-06, + "loss": 0.0587, + "step": 92250 + }, + { + "epoch": 4.3, + "learning_rate": 5.701095731506592e-06, + "loss": 0.0905, + "step": 92255 + }, + { + "epoch": 4.3, + "learning_rate": 5.700311946451805e-06, + "loss": 0.1049, + "step": 92260 + }, + { + "epoch": 4.31, + "learning_rate": 5.699528161397019e-06, + "loss": 0.1427, + "step": 92265 + }, + { + "epoch": 4.31, + "learning_rate": 5.698744376342232e-06, + "loss": 0.2616, + "step": 92270 + }, + { + "epoch": 4.31, + "learning_rate": 5.697960591287446e-06, + "loss": 0.1968, + "step": 92275 + }, + { + "epoch": 4.31, + "learning_rate": 5.697176806232659e-06, + "loss": 0.0754, + "step": 92280 + }, + { + "epoch": 4.31, + "learning_rate": 5.696393021177873e-06, + "loss": 0.0246, + "step": 92285 + }, + { + "epoch": 4.31, + "learning_rate": 5.695609236123086e-06, + "loss": 0.0334, + "step": 92290 + }, + { + "epoch": 4.31, + "learning_rate": 5.6948254510683e-06, + "loss": 0.0211, + "step": 92295 + }, + { + "epoch": 4.31, + "learning_rate": 5.694041666013514e-06, + "loss": 0.0471, + "step": 92300 + }, + { + "epoch": 4.31, + "learning_rate": 5.693257880958727e-06, + "loss": 0.1016, + "step": 92305 + }, + { + "epoch": 4.31, + "learning_rate": 5.692474095903939e-06, + "loss": 0.1201, + "step": 92310 + }, + { + "epoch": 4.31, + "learning_rate": 5.691690310849153e-06, + "loss": 0.0935, + "step": 92315 + }, + { + "epoch": 4.31, + "learning_rate": 5.690906525794366e-06, + "loss": 0.2127, + "step": 92320 + }, + { + "epoch": 4.31, + "learning_rate": 5.69012274073958e-06, + "loss": 0.299, + "step": 92325 + }, + { + "epoch": 4.31, + "learning_rate": 5.689338955684793e-06, + "loss": 0.089, + "step": 92330 + }, + { + "epoch": 4.31, + "learning_rate": 5.688555170630007e-06, + "loss": 0.0127, + "step": 92335 + }, + { + "epoch": 4.31, + "learning_rate": 5.687771385575221e-06, + "loss": 0.0356, + "step": 92340 + }, + { + "epoch": 4.31, + "learning_rate": 5.686987600520434e-06, + "loss": 0.0709, + "step": 92345 + }, + { + "epoch": 4.31, + "learning_rate": 5.686203815465648e-06, + "loss": 0.0501, + "step": 92350 + }, + { + "epoch": 4.31, + "learning_rate": 5.685420030410861e-06, + "loss": 0.0524, + "step": 92355 + }, + { + "epoch": 4.31, + "learning_rate": 5.684636245356075e-06, + "loss": 0.0441, + "step": 92360 + }, + { + "epoch": 4.31, + "learning_rate": 5.683852460301288e-06, + "loss": 0.1178, + "step": 92365 + }, + { + "epoch": 4.31, + "learning_rate": 5.683068675246502e-06, + "loss": 0.2904, + "step": 92370 + }, + { + "epoch": 4.31, + "learning_rate": 5.682284890191714e-06, + "loss": 0.2871, + "step": 92375 + }, + { + "epoch": 4.31, + "learning_rate": 5.681501105136927e-06, + "loss": 0.1098, + "step": 92380 + }, + { + "epoch": 4.31, + "learning_rate": 5.680717320082141e-06, + "loss": 0.0876, + "step": 92385 + }, + { + "epoch": 4.31, + "learning_rate": 5.679933535027355e-06, + "loss": 0.0149, + "step": 92390 + }, + { + "epoch": 4.31, + "learning_rate": 5.679149749972568e-06, + "loss": 0.036, + "step": 92395 + }, + { + "epoch": 4.31, + "learning_rate": 5.678365964917782e-06, + "loss": 0.0441, + "step": 92400 + }, + { + "epoch": 4.31, + "learning_rate": 5.677582179862995e-06, + "loss": 0.0723, + "step": 92405 + }, + { + "epoch": 4.31, + "learning_rate": 5.676798394808209e-06, + "loss": 0.0428, + "step": 92410 + }, + { + "epoch": 4.31, + "learning_rate": 5.676014609753422e-06, + "loss": 0.1119, + "step": 92415 + }, + { + "epoch": 4.31, + "learning_rate": 5.675230824698636e-06, + "loss": 0.0758, + "step": 92420 + }, + { + "epoch": 4.31, + "learning_rate": 5.674447039643849e-06, + "loss": 0.3472, + "step": 92425 + }, + { + "epoch": 4.31, + "learning_rate": 5.673663254589063e-06, + "loss": 0.0403, + "step": 92430 + }, + { + "epoch": 4.31, + "learning_rate": 5.672879469534276e-06, + "loss": 0.0232, + "step": 92435 + }, + { + "epoch": 4.31, + "learning_rate": 5.672095684479489e-06, + "loss": 0.015, + "step": 92440 + }, + { + "epoch": 4.31, + "learning_rate": 5.671311899424702e-06, + "loss": 0.0632, + "step": 92445 + }, + { + "epoch": 4.31, + "learning_rate": 5.670528114369916e-06, + "loss": 0.0359, + "step": 92450 + }, + { + "epoch": 4.31, + "learning_rate": 5.669744329315129e-06, + "loss": 0.0974, + "step": 92455 + }, + { + "epoch": 4.31, + "learning_rate": 5.668960544260343e-06, + "loss": 0.0389, + "step": 92460 + }, + { + "epoch": 4.31, + "learning_rate": 5.668176759205556e-06, + "loss": 0.1899, + "step": 92465 + }, + { + "epoch": 4.31, + "learning_rate": 5.66739297415077e-06, + "loss": 0.1237, + "step": 92470 + }, + { + "epoch": 4.32, + "learning_rate": 5.666609189095983e-06, + "loss": 0.4016, + "step": 92475 + }, + { + "epoch": 4.32, + "learning_rate": 5.6658254040411966e-06, + "loss": 0.0711, + "step": 92480 + }, + { + "epoch": 4.32, + "learning_rate": 5.66504161898641e-06, + "loss": 0.0164, + "step": 92485 + }, + { + "epoch": 4.32, + "learning_rate": 5.6642578339316235e-06, + "loss": 0.0291, + "step": 92490 + }, + { + "epoch": 4.32, + "learning_rate": 5.663474048876837e-06, + "loss": 0.0205, + "step": 92495 + }, + { + "epoch": 4.32, + "learning_rate": 5.6626902638220505e-06, + "loss": 0.1012, + "step": 92500 + }, + { + "epoch": 4.32, + "learning_rate": 5.661906478767263e-06, + "loss": 0.1101, + "step": 92505 + }, + { + "epoch": 4.32, + "learning_rate": 5.661122693712477e-06, + "loss": 0.1274, + "step": 92510 + }, + { + "epoch": 4.32, + "learning_rate": 5.66033890865769e-06, + "loss": 0.0668, + "step": 92515 + }, + { + "epoch": 4.32, + "learning_rate": 5.659555123602904e-06, + "loss": 0.1267, + "step": 92520 + }, + { + "epoch": 4.32, + "learning_rate": 5.658771338548117e-06, + "loss": 0.2288, + "step": 92525 + }, + { + "epoch": 4.32, + "learning_rate": 5.6579875534933306e-06, + "loss": 0.1182, + "step": 92530 + }, + { + "epoch": 4.32, + "learning_rate": 5.657203768438544e-06, + "loss": 0.0092, + "step": 92535 + }, + { + "epoch": 4.32, + "learning_rate": 5.6564199833837575e-06, + "loss": 0.0518, + "step": 92540 + }, + { + "epoch": 4.32, + "learning_rate": 5.6556361983289706e-06, + "loss": 0.0696, + "step": 92545 + }, + { + "epoch": 4.32, + "learning_rate": 5.6548524132741845e-06, + "loss": 0.0519, + "step": 92550 + }, + { + "epoch": 4.32, + "learning_rate": 5.6540686282193975e-06, + "loss": 0.0841, + "step": 92555 + }, + { + "epoch": 4.32, + "learning_rate": 5.6532848431646114e-06, + "loss": 0.0735, + "step": 92560 + }, + { + "epoch": 4.32, + "learning_rate": 5.652501058109825e-06, + "loss": 0.1308, + "step": 92565 + }, + { + "epoch": 4.32, + "learning_rate": 5.6517172730550376e-06, + "loss": 0.1185, + "step": 92570 + }, + { + "epoch": 4.32, + "learning_rate": 5.650933488000251e-06, + "loss": 0.2907, + "step": 92575 + }, + { + "epoch": 4.32, + "learning_rate": 5.6501497029454645e-06, + "loss": 0.0912, + "step": 92580 + }, + { + "epoch": 4.32, + "learning_rate": 5.649365917890678e-06, + "loss": 0.0116, + "step": 92585 + }, + { + "epoch": 4.32, + "learning_rate": 5.6485821328358915e-06, + "loss": 0.0385, + "step": 92590 + }, + { + "epoch": 4.32, + "learning_rate": 5.6477983477811045e-06, + "loss": 0.0395, + "step": 92595 + }, + { + "epoch": 4.32, + "learning_rate": 5.6470145627263185e-06, + "loss": 0.0793, + "step": 92600 + }, + { + "epoch": 4.32, + "learning_rate": 5.6462307776715315e-06, + "loss": 0.0814, + "step": 92605 + }, + { + "epoch": 4.32, + "learning_rate": 5.645446992616745e-06, + "loss": 0.0984, + "step": 92610 + }, + { + "epoch": 4.32, + "learning_rate": 5.644663207561959e-06, + "loss": 0.0662, + "step": 92615 + }, + { + "epoch": 4.32, + "learning_rate": 5.643879422507172e-06, + "loss": 0.2063, + "step": 92620 + }, + { + "epoch": 4.32, + "learning_rate": 5.643095637452386e-06, + "loss": 0.2029, + "step": 92625 + }, + { + "epoch": 4.32, + "learning_rate": 5.642311852397599e-06, + "loss": 0.0741, + "step": 92630 + }, + { + "epoch": 4.32, + "learning_rate": 5.6415280673428116e-06, + "loss": 0.0396, + "step": 92635 + }, + { + "epoch": 4.32, + "learning_rate": 5.6407442822880255e-06, + "loss": 0.0642, + "step": 92640 + }, + { + "epoch": 4.32, + "learning_rate": 5.6399604972332385e-06, + "loss": 0.0635, + "step": 92645 + }, + { + "epoch": 4.32, + "learning_rate": 5.6391767121784524e-06, + "loss": 0.0769, + "step": 92650 + }, + { + "epoch": 4.32, + "learning_rate": 5.638392927123666e-06, + "loss": 0.077, + "step": 92655 + }, + { + "epoch": 4.32, + "learning_rate": 5.637609142068879e-06, + "loss": 0.0887, + "step": 92660 + }, + { + "epoch": 4.32, + "learning_rate": 5.636825357014093e-06, + "loss": 0.0834, + "step": 92665 + }, + { + "epoch": 4.32, + "learning_rate": 5.636041571959306e-06, + "loss": 0.1668, + "step": 92670 + }, + { + "epoch": 4.32, + "learning_rate": 5.63525778690452e-06, + "loss": 0.122, + "step": 92675 + }, + { + "epoch": 4.32, + "learning_rate": 5.634474001849733e-06, + "loss": 0.1064, + "step": 92680 + }, + { + "epoch": 4.32, + "learning_rate": 5.633690216794947e-06, + "loss": 0.0488, + "step": 92685 + }, + { + "epoch": 4.33, + "learning_rate": 5.63290643174016e-06, + "loss": 0.017, + "step": 92690 + }, + { + "epoch": 4.33, + "learning_rate": 5.632122646685374e-06, + "loss": 0.0428, + "step": 92695 + }, + { + "epoch": 4.33, + "learning_rate": 5.631338861630586e-06, + "loss": 0.1279, + "step": 92700 + }, + { + "epoch": 4.33, + "learning_rate": 5.6305550765758e-06, + "loss": 0.029, + "step": 92705 + }, + { + "epoch": 4.33, + "learning_rate": 5.629771291521013e-06, + "loss": 0.078, + "step": 92710 + }, + { + "epoch": 4.33, + "learning_rate": 5.628987506466227e-06, + "loss": 0.0682, + "step": 92715 + }, + { + "epoch": 4.33, + "learning_rate": 5.62820372141144e-06, + "loss": 0.1448, + "step": 92720 + }, + { + "epoch": 4.33, + "learning_rate": 5.627419936356654e-06, + "loss": 0.2363, + "step": 92725 + }, + { + "epoch": 4.33, + "learning_rate": 5.626636151301867e-06, + "loss": 0.0887, + "step": 92730 + }, + { + "epoch": 4.33, + "learning_rate": 5.625852366247081e-06, + "loss": 0.0445, + "step": 92735 + }, + { + "epoch": 4.33, + "learning_rate": 5.625068581192294e-06, + "loss": 0.0443, + "step": 92740 + }, + { + "epoch": 4.33, + "learning_rate": 5.624284796137508e-06, + "loss": 0.0344, + "step": 92745 + }, + { + "epoch": 4.33, + "learning_rate": 5.623501011082721e-06, + "loss": 0.0571, + "step": 92750 + }, + { + "epoch": 4.33, + "learning_rate": 5.622717226027935e-06, + "loss": 0.0273, + "step": 92755 + }, + { + "epoch": 4.33, + "learning_rate": 5.621933440973148e-06, + "loss": 0.056, + "step": 92760 + }, + { + "epoch": 4.33, + "learning_rate": 5.621149655918361e-06, + "loss": 0.1013, + "step": 92765 + }, + { + "epoch": 4.33, + "learning_rate": 5.620365870863574e-06, + "loss": 0.2048, + "step": 92770 + }, + { + "epoch": 4.33, + "learning_rate": 5.619582085808788e-06, + "loss": 0.3133, + "step": 92775 + }, + { + "epoch": 4.33, + "learning_rate": 5.618798300754001e-06, + "loss": 0.1164, + "step": 92780 + }, + { + "epoch": 4.33, + "learning_rate": 5.618014515699215e-06, + "loss": 0.0282, + "step": 92785 + }, + { + "epoch": 4.33, + "learning_rate": 5.617230730644428e-06, + "loss": 0.0423, + "step": 92790 + }, + { + "epoch": 4.33, + "learning_rate": 5.616446945589642e-06, + "loss": 0.0698, + "step": 92795 + }, + { + "epoch": 4.33, + "learning_rate": 5.615663160534855e-06, + "loss": 0.0539, + "step": 92800 + }, + { + "epoch": 4.33, + "learning_rate": 5.614879375480069e-06, + "loss": 0.1238, + "step": 92805 + }, + { + "epoch": 4.33, + "learning_rate": 5.614095590425282e-06, + "loss": 0.0739, + "step": 92810 + }, + { + "epoch": 4.33, + "learning_rate": 5.613311805370496e-06, + "loss": 0.1163, + "step": 92815 + }, + { + "epoch": 4.33, + "learning_rate": 5.612528020315709e-06, + "loss": 0.1494, + "step": 92820 + }, + { + "epoch": 4.33, + "learning_rate": 5.611744235260923e-06, + "loss": 0.3012, + "step": 92825 + }, + { + "epoch": 4.33, + "learning_rate": 5.610960450206135e-06, + "loss": 0.1179, + "step": 92830 + }, + { + "epoch": 4.33, + "learning_rate": 5.610176665151349e-06, + "loss": 0.023, + "step": 92835 + }, + { + "epoch": 4.33, + "learning_rate": 5.609392880096562e-06, + "loss": 0.0438, + "step": 92840 + }, + { + "epoch": 4.33, + "learning_rate": 5.608609095041776e-06, + "loss": 0.0163, + "step": 92845 + }, + { + "epoch": 4.33, + "learning_rate": 5.607825309986989e-06, + "loss": 0.0502, + "step": 92850 + }, + { + "epoch": 4.33, + "learning_rate": 5.607041524932203e-06, + "loss": 0.1047, + "step": 92855 + }, + { + "epoch": 4.33, + "learning_rate": 5.606257739877416e-06, + "loss": 0.1257, + "step": 92860 + }, + { + "epoch": 4.33, + "learning_rate": 5.60547395482263e-06, + "loss": 0.1548, + "step": 92865 + }, + { + "epoch": 4.33, + "learning_rate": 5.604690169767843e-06, + "loss": 0.2065, + "step": 92870 + }, + { + "epoch": 4.33, + "learning_rate": 5.603906384713057e-06, + "loss": 0.3251, + "step": 92875 + }, + { + "epoch": 4.33, + "learning_rate": 5.603122599658271e-06, + "loss": 0.0702, + "step": 92880 + }, + { + "epoch": 4.33, + "learning_rate": 5.602338814603484e-06, + "loss": 0.0147, + "step": 92885 + }, + { + "epoch": 4.33, + "learning_rate": 5.601555029548698e-06, + "loss": 0.0529, + "step": 92890 + }, + { + "epoch": 4.33, + "learning_rate": 5.60077124449391e-06, + "loss": 0.0425, + "step": 92895 + }, + { + "epoch": 4.33, + "learning_rate": 5.599987459439123e-06, + "loss": 0.0575, + "step": 92900 + }, + { + "epoch": 4.34, + "learning_rate": 5.599203674384337e-06, + "loss": 0.0861, + "step": 92905 + }, + { + "epoch": 4.34, + "learning_rate": 5.59841988932955e-06, + "loss": 0.0507, + "step": 92910 + }, + { + "epoch": 4.34, + "learning_rate": 5.597636104274764e-06, + "loss": 0.0715, + "step": 92915 + }, + { + "epoch": 4.34, + "learning_rate": 5.596852319219978e-06, + "loss": 0.2377, + "step": 92920 + }, + { + "epoch": 4.34, + "learning_rate": 5.596068534165191e-06, + "loss": 0.3196, + "step": 92925 + }, + { + "epoch": 4.34, + "learning_rate": 5.595284749110405e-06, + "loss": 0.0734, + "step": 92930 + }, + { + "epoch": 4.34, + "learning_rate": 5.594500964055618e-06, + "loss": 0.0267, + "step": 92935 + }, + { + "epoch": 4.34, + "learning_rate": 5.593717179000832e-06, + "loss": 0.0639, + "step": 92940 + }, + { + "epoch": 4.34, + "learning_rate": 5.592933393946045e-06, + "loss": 0.0708, + "step": 92945 + }, + { + "epoch": 4.34, + "learning_rate": 5.592149608891259e-06, + "loss": 0.0917, + "step": 92950 + }, + { + "epoch": 4.34, + "learning_rate": 5.591365823836472e-06, + "loss": 0.0304, + "step": 92955 + }, + { + "epoch": 4.34, + "learning_rate": 5.590582038781684e-06, + "loss": 0.2022, + "step": 92960 + }, + { + "epoch": 4.34, + "learning_rate": 5.589798253726898e-06, + "loss": 0.1388, + "step": 92965 + }, + { + "epoch": 4.34, + "learning_rate": 5.589014468672112e-06, + "loss": 0.1263, + "step": 92970 + }, + { + "epoch": 4.34, + "learning_rate": 5.588230683617325e-06, + "loss": 0.2642, + "step": 92975 + }, + { + "epoch": 4.34, + "learning_rate": 5.587446898562539e-06, + "loss": 0.09, + "step": 92980 + }, + { + "epoch": 4.34, + "learning_rate": 5.586663113507752e-06, + "loss": 0.0289, + "step": 92985 + }, + { + "epoch": 4.34, + "learning_rate": 5.585879328452966e-06, + "loss": 0.0786, + "step": 92990 + }, + { + "epoch": 4.34, + "learning_rate": 5.585095543398179e-06, + "loss": 0.0513, + "step": 92995 + }, + { + "epoch": 4.34, + "learning_rate": 5.584311758343393e-06, + "loss": 0.0352, + "step": 93000 + }, + { + "epoch": 4.34, + "learning_rate": 5.583527973288606e-06, + "loss": 0.0673, + "step": 93005 + }, + { + "epoch": 4.34, + "learning_rate": 5.58274418823382e-06, + "loss": 0.0303, + "step": 93010 + }, + { + "epoch": 4.34, + "learning_rate": 5.581960403179033e-06, + "loss": 0.1573, + "step": 93015 + }, + { + "epoch": 4.34, + "learning_rate": 5.581176618124247e-06, + "loss": 0.1667, + "step": 93020 + }, + { + "epoch": 4.34, + "learning_rate": 5.580392833069459e-06, + "loss": 0.3154, + "step": 93025 + }, + { + "epoch": 4.34, + "learning_rate": 5.579609048014673e-06, + "loss": 0.0641, + "step": 93030 + }, + { + "epoch": 4.34, + "learning_rate": 5.578825262959886e-06, + "loss": 0.0214, + "step": 93035 + }, + { + "epoch": 4.34, + "learning_rate": 5.5780414779051e-06, + "loss": 0.0258, + "step": 93040 + }, + { + "epoch": 4.34, + "learning_rate": 5.577257692850313e-06, + "loss": 0.049, + "step": 93045 + }, + { + "epoch": 4.34, + "learning_rate": 5.576473907795527e-06, + "loss": 0.0479, + "step": 93050 + }, + { + "epoch": 4.34, + "learning_rate": 5.57569012274074e-06, + "loss": 0.0786, + "step": 93055 + }, + { + "epoch": 4.34, + "learning_rate": 5.574906337685954e-06, + "loss": 0.0668, + "step": 93060 + }, + { + "epoch": 4.34, + "learning_rate": 5.574122552631167e-06, + "loss": 0.1106, + "step": 93065 + }, + { + "epoch": 4.34, + "learning_rate": 5.573338767576381e-06, + "loss": 0.0765, + "step": 93070 + }, + { + "epoch": 4.34, + "learning_rate": 5.572554982521594e-06, + "loss": 0.2523, + "step": 93075 + }, + { + "epoch": 4.34, + "learning_rate": 5.571771197466808e-06, + "loss": 0.1044, + "step": 93080 + }, + { + "epoch": 4.34, + "learning_rate": 5.570987412412021e-06, + "loss": 0.036, + "step": 93085 + }, + { + "epoch": 4.34, + "learning_rate": 5.570203627357234e-06, + "loss": 0.0114, + "step": 93090 + }, + { + "epoch": 4.34, + "learning_rate": 5.569419842302447e-06, + "loss": 0.052, + "step": 93095 + }, + { + "epoch": 4.34, + "learning_rate": 5.568636057247661e-06, + "loss": 0.0453, + "step": 93100 + }, + { + "epoch": 4.34, + "learning_rate": 5.567852272192874e-06, + "loss": 0.0893, + "step": 93105 + }, + { + "epoch": 4.34, + "learning_rate": 5.567068487138088e-06, + "loss": 0.1599, + "step": 93110 + }, + { + "epoch": 4.34, + "learning_rate": 5.566284702083301e-06, + "loss": 0.1558, + "step": 93115 + }, + { + "epoch": 4.35, + "learning_rate": 5.565500917028515e-06, + "loss": 0.1103, + "step": 93120 + }, + { + "epoch": 4.35, + "learning_rate": 5.564717131973728e-06, + "loss": 0.2208, + "step": 93125 + }, + { + "epoch": 4.35, + "learning_rate": 5.563933346918942e-06, + "loss": 0.0891, + "step": 93130 + }, + { + "epoch": 4.35, + "learning_rate": 5.563149561864155e-06, + "loss": 0.0128, + "step": 93135 + }, + { + "epoch": 4.35, + "learning_rate": 5.562365776809369e-06, + "loss": 0.1125, + "step": 93140 + }, + { + "epoch": 4.35, + "learning_rate": 5.5615819917545825e-06, + "loss": 0.0648, + "step": 93145 + }, + { + "epoch": 4.35, + "learning_rate": 5.5607982066997956e-06, + "loss": 0.0401, + "step": 93150 + }, + { + "epoch": 4.35, + "learning_rate": 5.560014421645008e-06, + "loss": 0.0634, + "step": 93155 + }, + { + "epoch": 4.35, + "learning_rate": 5.559230636590222e-06, + "loss": 0.0621, + "step": 93160 + }, + { + "epoch": 4.35, + "learning_rate": 5.558446851535435e-06, + "loss": 0.1719, + "step": 93165 + }, + { + "epoch": 4.35, + "learning_rate": 5.557663066480649e-06, + "loss": 0.1685, + "step": 93170 + }, + { + "epoch": 4.35, + "learning_rate": 5.556879281425862e-06, + "loss": 0.289, + "step": 93175 + }, + { + "epoch": 4.35, + "learning_rate": 5.556095496371076e-06, + "loss": 0.0859, + "step": 93180 + }, + { + "epoch": 4.35, + "learning_rate": 5.555311711316289e-06, + "loss": 0.0215, + "step": 93185 + }, + { + "epoch": 4.35, + "learning_rate": 5.554527926261503e-06, + "loss": 0.0543, + "step": 93190 + }, + { + "epoch": 4.35, + "learning_rate": 5.5537441412067165e-06, + "loss": 0.0316, + "step": 93195 + }, + { + "epoch": 4.35, + "learning_rate": 5.5529603561519295e-06, + "loss": 0.0465, + "step": 93200 + }, + { + "epoch": 4.35, + "learning_rate": 5.5521765710971435e-06, + "loss": 0.0542, + "step": 93205 + }, + { + "epoch": 4.35, + "learning_rate": 5.5513927860423565e-06, + "loss": 0.063, + "step": 93210 + }, + { + "epoch": 4.35, + "learning_rate": 5.55060900098757e-06, + "loss": 0.1363, + "step": 93215 + }, + { + "epoch": 4.35, + "learning_rate": 5.549825215932783e-06, + "loss": 0.1544, + "step": 93220 + }, + { + "epoch": 4.35, + "learning_rate": 5.549041430877996e-06, + "loss": 0.2124, + "step": 93225 + }, + { + "epoch": 4.35, + "learning_rate": 5.54825764582321e-06, + "loss": 0.1124, + "step": 93230 + }, + { + "epoch": 4.35, + "learning_rate": 5.5474738607684235e-06, + "loss": 0.0105, + "step": 93235 + }, + { + "epoch": 4.35, + "learning_rate": 5.5466900757136366e-06, + "loss": 0.0801, + "step": 93240 + }, + { + "epoch": 4.35, + "learning_rate": 5.5459062906588505e-06, + "loss": 0.0604, + "step": 93245 + }, + { + "epoch": 4.35, + "learning_rate": 5.5451225056040635e-06, + "loss": 0.0618, + "step": 93250 + }, + { + "epoch": 4.35, + "learning_rate": 5.5443387205492774e-06, + "loss": 0.0735, + "step": 93255 + }, + { + "epoch": 4.35, + "learning_rate": 5.5435549354944905e-06, + "loss": 0.2767, + "step": 93260 + }, + { + "epoch": 4.35, + "learning_rate": 5.542771150439704e-06, + "loss": 0.0636, + "step": 93265 + }, + { + "epoch": 4.35, + "learning_rate": 5.5419873653849175e-06, + "loss": 0.2251, + "step": 93270 + }, + { + "epoch": 4.35, + "learning_rate": 5.541203580330131e-06, + "loss": 0.2216, + "step": 93275 + }, + { + "epoch": 4.35, + "learning_rate": 5.540419795275344e-06, + "loss": 0.064, + "step": 93280 + }, + { + "epoch": 4.35, + "learning_rate": 5.5396360102205575e-06, + "loss": 0.0138, + "step": 93285 + }, + { + "epoch": 4.35, + "learning_rate": 5.5388522251657705e-06, + "loss": 0.0296, + "step": 93290 + }, + { + "epoch": 4.35, + "learning_rate": 5.5380684401109844e-06, + "loss": 0.0228, + "step": 93295 + }, + { + "epoch": 4.35, + "learning_rate": 5.5372846550561975e-06, + "loss": 0.059, + "step": 93300 + }, + { + "epoch": 4.35, + "learning_rate": 5.536500870001411e-06, + "loss": 0.0708, + "step": 93305 + }, + { + "epoch": 4.35, + "learning_rate": 5.5357170849466245e-06, + "loss": 0.1692, + "step": 93310 + }, + { + "epoch": 4.35, + "learning_rate": 5.534933299891838e-06, + "loss": 0.1297, + "step": 93315 + }, + { + "epoch": 4.35, + "learning_rate": 5.5341495148370514e-06, + "loss": 0.1018, + "step": 93320 + }, + { + "epoch": 4.35, + "learning_rate": 5.533365729782265e-06, + "loss": 0.2941, + "step": 93325 + }, + { + "epoch": 4.35, + "learning_rate": 5.532581944727478e-06, + "loss": 0.0471, + "step": 93330 + }, + { + "epoch": 4.36, + "learning_rate": 5.531798159672692e-06, + "loss": 0.0068, + "step": 93335 + }, + { + "epoch": 4.36, + "learning_rate": 5.531014374617905e-06, + "loss": 0.0373, + "step": 93340 + }, + { + "epoch": 4.36, + "learning_rate": 5.530230589563119e-06, + "loss": 0.0521, + "step": 93345 + }, + { + "epoch": 4.36, + "learning_rate": 5.5294468045083315e-06, + "loss": 0.0578, + "step": 93350 + }, + { + "epoch": 4.36, + "learning_rate": 5.528663019453545e-06, + "loss": 0.0775, + "step": 93355 + }, + { + "epoch": 4.36, + "learning_rate": 5.5278792343987584e-06, + "loss": 0.0369, + "step": 93360 + }, + { + "epoch": 4.36, + "learning_rate": 5.527095449343972e-06, + "loss": 0.1059, + "step": 93365 + }, + { + "epoch": 4.36, + "learning_rate": 5.526311664289185e-06, + "loss": 0.1973, + "step": 93370 + }, + { + "epoch": 4.36, + "learning_rate": 5.525527879234399e-06, + "loss": 0.3005, + "step": 93375 + }, + { + "epoch": 4.36, + "learning_rate": 5.524744094179612e-06, + "loss": 0.0914, + "step": 93380 + }, + { + "epoch": 4.36, + "learning_rate": 5.523960309124826e-06, + "loss": 0.0168, + "step": 93385 + }, + { + "epoch": 4.36, + "learning_rate": 5.523176524070039e-06, + "loss": 0.04, + "step": 93390 + }, + { + "epoch": 4.36, + "learning_rate": 5.522392739015253e-06, + "loss": 0.0474, + "step": 93395 + }, + { + "epoch": 4.36, + "learning_rate": 5.521608953960466e-06, + "loss": 0.055, + "step": 93400 + }, + { + "epoch": 4.36, + "learning_rate": 5.52082516890568e-06, + "loss": 0.0748, + "step": 93405 + }, + { + "epoch": 4.36, + "learning_rate": 5.520041383850894e-06, + "loss": 0.0981, + "step": 93410 + }, + { + "epoch": 4.36, + "learning_rate": 5.519257598796106e-06, + "loss": 0.0634, + "step": 93415 + }, + { + "epoch": 4.36, + "learning_rate": 5.518473813741319e-06, + "loss": 0.1274, + "step": 93420 + }, + { + "epoch": 4.36, + "learning_rate": 5.517690028686533e-06, + "loss": 0.3517, + "step": 93425 + }, + { + "epoch": 4.36, + "learning_rate": 5.516906243631746e-06, + "loss": 0.1113, + "step": 93430 + }, + { + "epoch": 4.36, + "learning_rate": 5.51612245857696e-06, + "loss": 0.0349, + "step": 93435 + }, + { + "epoch": 4.36, + "learning_rate": 5.515338673522173e-06, + "loss": 0.0222, + "step": 93440 + }, + { + "epoch": 4.36, + "learning_rate": 5.514554888467387e-06, + "loss": 0.0488, + "step": 93445 + }, + { + "epoch": 4.36, + "learning_rate": 5.5137711034126e-06, + "loss": 0.0884, + "step": 93450 + }, + { + "epoch": 4.36, + "learning_rate": 5.512987318357814e-06, + "loss": 0.0643, + "step": 93455 + }, + { + "epoch": 4.36, + "learning_rate": 5.512203533303028e-06, + "loss": 0.0958, + "step": 93460 + }, + { + "epoch": 4.36, + "learning_rate": 5.511419748248241e-06, + "loss": 0.1162, + "step": 93465 + }, + { + "epoch": 4.36, + "learning_rate": 5.510635963193455e-06, + "loss": 0.1336, + "step": 93470 + }, + { + "epoch": 4.36, + "learning_rate": 5.509852178138668e-06, + "loss": 0.35, + "step": 93475 + }, + { + "epoch": 4.36, + "learning_rate": 5.50906839308388e-06, + "loss": 0.1198, + "step": 93480 + }, + { + "epoch": 4.36, + "learning_rate": 5.508284608029094e-06, + "loss": 0.0225, + "step": 93485 + }, + { + "epoch": 4.36, + "learning_rate": 5.507500822974307e-06, + "loss": 0.0169, + "step": 93490 + }, + { + "epoch": 4.36, + "learning_rate": 5.506717037919521e-06, + "loss": 0.0478, + "step": 93495 + }, + { + "epoch": 4.36, + "learning_rate": 5.505933252864735e-06, + "loss": 0.0886, + "step": 93500 + }, + { + "epoch": 4.36, + "learning_rate": 5.505149467809948e-06, + "loss": 0.0692, + "step": 93505 + }, + { + "epoch": 4.36, + "learning_rate": 5.504365682755162e-06, + "loss": 0.1396, + "step": 93510 + }, + { + "epoch": 4.36, + "learning_rate": 5.503581897700375e-06, + "loss": 0.1287, + "step": 93515 + }, + { + "epoch": 4.36, + "learning_rate": 5.502798112645589e-06, + "loss": 0.2369, + "step": 93520 + }, + { + "epoch": 4.36, + "learning_rate": 5.502014327590802e-06, + "loss": 0.2279, + "step": 93525 + }, + { + "epoch": 4.36, + "learning_rate": 5.501230542536016e-06, + "loss": 0.0846, + "step": 93530 + }, + { + "epoch": 4.36, + "learning_rate": 5.500446757481229e-06, + "loss": 0.0063, + "step": 93535 + }, + { + "epoch": 4.36, + "learning_rate": 5.499662972426443e-06, + "loss": 0.0303, + "step": 93540 + }, + { + "epoch": 4.36, + "learning_rate": 5.498879187371655e-06, + "loss": 0.0422, + "step": 93545 + }, + { + "epoch": 4.37, + "learning_rate": 5.498095402316869e-06, + "loss": 0.0655, + "step": 93550 + }, + { + "epoch": 4.37, + "learning_rate": 5.497311617262082e-06, + "loss": 0.0336, + "step": 93555 + }, + { + "epoch": 4.37, + "learning_rate": 5.496527832207296e-06, + "loss": 0.1132, + "step": 93560 + }, + { + "epoch": 4.37, + "learning_rate": 5.495744047152509e-06, + "loss": 0.1235, + "step": 93565 + }, + { + "epoch": 4.37, + "learning_rate": 5.494960262097723e-06, + "loss": 0.1276, + "step": 93570 + }, + { + "epoch": 4.37, + "learning_rate": 5.494176477042936e-06, + "loss": 0.2389, + "step": 93575 + }, + { + "epoch": 4.37, + "learning_rate": 5.49339269198815e-06, + "loss": 0.0394, + "step": 93580 + }, + { + "epoch": 4.37, + "learning_rate": 5.492608906933363e-06, + "loss": 0.0187, + "step": 93585 + }, + { + "epoch": 4.37, + "learning_rate": 5.491825121878577e-06, + "loss": 0.0364, + "step": 93590 + }, + { + "epoch": 4.37, + "learning_rate": 5.49104133682379e-06, + "loss": 0.0756, + "step": 93595 + }, + { + "epoch": 4.37, + "learning_rate": 5.490257551769004e-06, + "loss": 0.1331, + "step": 93600 + }, + { + "epoch": 4.37, + "learning_rate": 5.489473766714217e-06, + "loss": 0.0545, + "step": 93605 + }, + { + "epoch": 4.37, + "learning_rate": 5.48868998165943e-06, + "loss": 0.0692, + "step": 93610 + }, + { + "epoch": 4.37, + "learning_rate": 5.487906196604643e-06, + "loss": 0.0762, + "step": 93615 + }, + { + "epoch": 4.37, + "learning_rate": 5.487122411549857e-06, + "loss": 0.1696, + "step": 93620 + }, + { + "epoch": 4.37, + "learning_rate": 5.48633862649507e-06, + "loss": 0.2664, + "step": 93625 + }, + { + "epoch": 4.37, + "learning_rate": 5.485554841440284e-06, + "loss": 0.1063, + "step": 93630 + }, + { + "epoch": 4.37, + "learning_rate": 5.484771056385497e-06, + "loss": 0.0347, + "step": 93635 + }, + { + "epoch": 4.37, + "learning_rate": 5.483987271330711e-06, + "loss": 0.0382, + "step": 93640 + }, + { + "epoch": 4.37, + "learning_rate": 5.483203486275924e-06, + "loss": 0.0234, + "step": 93645 + }, + { + "epoch": 4.37, + "learning_rate": 5.482419701221138e-06, + "loss": 0.0562, + "step": 93650 + }, + { + "epoch": 4.37, + "learning_rate": 5.481635916166351e-06, + "loss": 0.1205, + "step": 93655 + }, + { + "epoch": 4.37, + "learning_rate": 5.480852131111565e-06, + "loss": 0.1197, + "step": 93660 + }, + { + "epoch": 4.37, + "learning_rate": 5.480068346056778e-06, + "loss": 0.1209, + "step": 93665 + }, + { + "epoch": 4.37, + "learning_rate": 5.479284561001992e-06, + "loss": 0.1612, + "step": 93670 + }, + { + "epoch": 4.37, + "learning_rate": 5.478500775947204e-06, + "loss": 0.263, + "step": 93675 + }, + { + "epoch": 4.37, + "learning_rate": 5.477716990892418e-06, + "loss": 0.071, + "step": 93680 + }, + { + "epoch": 4.37, + "learning_rate": 5.476933205837631e-06, + "loss": 0.0193, + "step": 93685 + }, + { + "epoch": 4.37, + "learning_rate": 5.476149420782845e-06, + "loss": 0.0398, + "step": 93690 + }, + { + "epoch": 4.37, + "learning_rate": 5.475365635728058e-06, + "loss": 0.0171, + "step": 93695 + }, + { + "epoch": 4.37, + "learning_rate": 5.474581850673272e-06, + "loss": 0.0671, + "step": 93700 + }, + { + "epoch": 4.37, + "learning_rate": 5.473798065618485e-06, + "loss": 0.0807, + "step": 93705 + }, + { + "epoch": 4.37, + "learning_rate": 5.473014280563699e-06, + "loss": 0.0761, + "step": 93710 + }, + { + "epoch": 4.37, + "learning_rate": 5.472230495508912e-06, + "loss": 0.0736, + "step": 93715 + }, + { + "epoch": 4.37, + "learning_rate": 5.471446710454126e-06, + "loss": 0.3259, + "step": 93720 + }, + { + "epoch": 4.37, + "learning_rate": 5.47066292539934e-06, + "loss": 0.3525, + "step": 93725 + }, + { + "epoch": 4.37, + "learning_rate": 5.469879140344553e-06, + "loss": 0.1093, + "step": 93730 + }, + { + "epoch": 4.37, + "learning_rate": 5.469095355289767e-06, + "loss": 0.0386, + "step": 93735 + }, + { + "epoch": 4.37, + "learning_rate": 5.468311570234979e-06, + "loss": 0.0511, + "step": 93740 + }, + { + "epoch": 4.37, + "learning_rate": 5.467527785180192e-06, + "loss": 0.0422, + "step": 93745 + }, + { + "epoch": 4.37, + "learning_rate": 5.466744000125406e-06, + "loss": 0.0714, + "step": 93750 + }, + { + "epoch": 4.37, + "learning_rate": 5.465960215070619e-06, + "loss": 0.021, + "step": 93755 + }, + { + "epoch": 4.37, + "learning_rate": 5.465176430015833e-06, + "loss": 0.074, + "step": 93760 + }, + { + "epoch": 4.38, + "learning_rate": 5.464549401972003e-06, + "loss": 0.1566, + "step": 93765 + }, + { + "epoch": 4.38, + "learning_rate": 5.463765616917217e-06, + "loss": 0.1308, + "step": 93770 + }, + { + "epoch": 4.38, + "learning_rate": 5.46298183186243e-06, + "loss": 0.2837, + "step": 93775 + }, + { + "epoch": 4.38, + "learning_rate": 5.462198046807644e-06, + "loss": 0.0954, + "step": 93780 + }, + { + "epoch": 4.38, + "learning_rate": 5.461414261752858e-06, + "loss": 0.0132, + "step": 93785 + }, + { + "epoch": 4.38, + "learning_rate": 5.460630476698071e-06, + "loss": 0.026, + "step": 93790 + }, + { + "epoch": 4.38, + "learning_rate": 5.459846691643285e-06, + "loss": 0.065, + "step": 93795 + }, + { + "epoch": 4.38, + "learning_rate": 5.459062906588498e-06, + "loss": 0.0837, + "step": 93800 + }, + { + "epoch": 4.38, + "learning_rate": 5.45827912153371e-06, + "loss": 0.1, + "step": 93805 + }, + { + "epoch": 4.38, + "learning_rate": 5.457495336478924e-06, + "loss": 0.0343, + "step": 93810 + }, + { + "epoch": 4.38, + "learning_rate": 5.456711551424137e-06, + "loss": 0.0407, + "step": 93815 + }, + { + "epoch": 4.38, + "learning_rate": 5.455927766369351e-06, + "loss": 0.1016, + "step": 93820 + }, + { + "epoch": 4.38, + "learning_rate": 5.455143981314564e-06, + "loss": 0.2205, + "step": 93825 + }, + { + "epoch": 4.38, + "learning_rate": 5.454360196259778e-06, + "loss": 0.0554, + "step": 93830 + }, + { + "epoch": 4.38, + "learning_rate": 5.453576411204992e-06, + "loss": 0.0252, + "step": 93835 + }, + { + "epoch": 4.38, + "learning_rate": 5.452792626150205e-06, + "loss": 0.0667, + "step": 93840 + }, + { + "epoch": 4.38, + "learning_rate": 5.452008841095419e-06, + "loss": 0.0099, + "step": 93845 + }, + { + "epoch": 4.38, + "learning_rate": 5.451225056040632e-06, + "loss": 0.0591, + "step": 93850 + }, + { + "epoch": 4.38, + "learning_rate": 5.450441270985846e-06, + "loss": 0.0771, + "step": 93855 + }, + { + "epoch": 4.38, + "learning_rate": 5.449657485931059e-06, + "loss": 0.0646, + "step": 93860 + }, + { + "epoch": 4.38, + "learning_rate": 5.448873700876273e-06, + "loss": 0.0979, + "step": 93865 + }, + { + "epoch": 4.38, + "learning_rate": 5.448089915821485e-06, + "loss": 0.1096, + "step": 93870 + }, + { + "epoch": 4.38, + "learning_rate": 5.447306130766698e-06, + "loss": 0.3933, + "step": 93875 + }, + { + "epoch": 4.38, + "learning_rate": 5.446522345711912e-06, + "loss": 0.1126, + "step": 93880 + }, + { + "epoch": 4.38, + "learning_rate": 5.445738560657126e-06, + "loss": 0.022, + "step": 93885 + }, + { + "epoch": 4.38, + "learning_rate": 5.444954775602339e-06, + "loss": 0.0044, + "step": 93890 + }, + { + "epoch": 4.38, + "learning_rate": 5.444170990547553e-06, + "loss": 0.0406, + "step": 93895 + }, + { + "epoch": 4.38, + "learning_rate": 5.443387205492766e-06, + "loss": 0.042, + "step": 93900 + }, + { + "epoch": 4.38, + "learning_rate": 5.44260342043798e-06, + "loss": 0.0792, + "step": 93905 + }, + { + "epoch": 4.38, + "learning_rate": 5.441819635383193e-06, + "loss": 0.0912, + "step": 93910 + }, + { + "epoch": 4.38, + "learning_rate": 5.441035850328407e-06, + "loss": 0.1177, + "step": 93915 + }, + { + "epoch": 4.38, + "learning_rate": 5.44025206527362e-06, + "loss": 0.2086, + "step": 93920 + }, + { + "epoch": 4.38, + "learning_rate": 5.439468280218834e-06, + "loss": 0.2436, + "step": 93925 + }, + { + "epoch": 4.38, + "learning_rate": 5.438684495164047e-06, + "loss": 0.0886, + "step": 93930 + }, + { + "epoch": 4.38, + "learning_rate": 5.43790071010926e-06, + "loss": 0.0233, + "step": 93935 + }, + { + "epoch": 4.38, + "learning_rate": 5.437116925054473e-06, + "loss": 0.0095, + "step": 93940 + }, + { + "epoch": 4.38, + "learning_rate": 5.436333139999687e-06, + "loss": 0.1044, + "step": 93945 + }, + { + "epoch": 4.38, + "learning_rate": 5.4355493549449e-06, + "loss": 0.0649, + "step": 93950 + }, + { + "epoch": 4.38, + "learning_rate": 5.434765569890114e-06, + "loss": 0.0504, + "step": 93955 + }, + { + "epoch": 4.38, + "learning_rate": 5.433981784835327e-06, + "loss": 0.0792, + "step": 93960 + }, + { + "epoch": 4.38, + "learning_rate": 5.433197999780541e-06, + "loss": 0.1823, + "step": 93965 + }, + { + "epoch": 4.38, + "learning_rate": 5.432414214725754e-06, + "loss": 0.14, + "step": 93970 + }, + { + "epoch": 4.39, + "learning_rate": 5.431630429670968e-06, + "loss": 0.2092, + "step": 93975 + }, + { + "epoch": 4.39, + "learning_rate": 5.430846644616181e-06, + "loss": 0.0764, + "step": 93980 + }, + { + "epoch": 4.39, + "learning_rate": 5.430062859561395e-06, + "loss": 0.0467, + "step": 93985 + }, + { + "epoch": 4.39, + "learning_rate": 5.429279074506608e-06, + "loss": 0.0643, + "step": 93990 + }, + { + "epoch": 4.39, + "learning_rate": 5.428495289451822e-06, + "loss": 0.042, + "step": 93995 + }, + { + "epoch": 4.39, + "learning_rate": 5.427711504397034e-06, + "loss": 0.1081, + "step": 94000 + }, + { + "epoch": 4.39, + "learning_rate": 5.426927719342248e-06, + "loss": 0.0642, + "step": 94005 + }, + { + "epoch": 4.39, + "learning_rate": 5.426143934287461e-06, + "loss": 0.1115, + "step": 94010 + }, + { + "epoch": 4.39, + "learning_rate": 5.425360149232675e-06, + "loss": 0.1697, + "step": 94015 + }, + { + "epoch": 4.39, + "learning_rate": 5.424576364177888e-06, + "loss": 0.0586, + "step": 94020 + }, + { + "epoch": 4.39, + "learning_rate": 5.423792579123102e-06, + "loss": 0.1889, + "step": 94025 + }, + { + "epoch": 4.39, + "learning_rate": 5.423008794068315e-06, + "loss": 0.0461, + "step": 94030 + }, + { + "epoch": 4.39, + "learning_rate": 5.422225009013529e-06, + "loss": 0.0606, + "step": 94035 + }, + { + "epoch": 4.39, + "learning_rate": 5.421441223958742e-06, + "loss": 0.0025, + "step": 94040 + }, + { + "epoch": 4.39, + "learning_rate": 5.420657438903956e-06, + "loss": 0.0234, + "step": 94045 + }, + { + "epoch": 4.39, + "learning_rate": 5.41987365384917e-06, + "loss": 0.0614, + "step": 94050 + }, + { + "epoch": 4.39, + "learning_rate": 5.419089868794383e-06, + "loss": 0.1099, + "step": 94055 + }, + { + "epoch": 4.39, + "learning_rate": 5.4183060837395966e-06, + "loss": 0.0848, + "step": 94060 + }, + { + "epoch": 4.39, + "learning_rate": 5.417522298684809e-06, + "loss": 0.1277, + "step": 94065 + }, + { + "epoch": 4.39, + "learning_rate": 5.416738513630022e-06, + "loss": 0.1046, + "step": 94070 + }, + { + "epoch": 4.39, + "learning_rate": 5.415954728575236e-06, + "loss": 0.278, + "step": 94075 + }, + { + "epoch": 4.39, + "learning_rate": 5.415170943520449e-06, + "loss": 0.056, + "step": 94080 + }, + { + "epoch": 4.39, + "learning_rate": 5.414387158465663e-06, + "loss": 0.0217, + "step": 94085 + }, + { + "epoch": 4.39, + "learning_rate": 5.413603373410876e-06, + "loss": 0.044, + "step": 94090 + }, + { + "epoch": 4.39, + "learning_rate": 5.41281958835609e-06, + "loss": 0.0647, + "step": 94095 + }, + { + "epoch": 4.39, + "learning_rate": 5.412035803301304e-06, + "loss": 0.0422, + "step": 94100 + }, + { + "epoch": 4.39, + "learning_rate": 5.411252018246517e-06, + "loss": 0.0544, + "step": 94105 + }, + { + "epoch": 4.39, + "learning_rate": 5.4104682331917305e-06, + "loss": 0.0294, + "step": 94110 + }, + { + "epoch": 4.39, + "learning_rate": 5.409684448136944e-06, + "loss": 0.098, + "step": 94115 + }, + { + "epoch": 4.39, + "learning_rate": 5.4089006630821575e-06, + "loss": 0.1774, + "step": 94120 + }, + { + "epoch": 4.39, + "learning_rate": 5.4081168780273706e-06, + "loss": 0.9083, + "step": 94125 + }, + { + "epoch": 4.39, + "learning_rate": 5.407333092972583e-06, + "loss": 0.0559, + "step": 94130 + }, + { + "epoch": 4.39, + "learning_rate": 5.406549307917797e-06, + "loss": 0.0424, + "step": 94135 + }, + { + "epoch": 4.39, + "learning_rate": 5.40576552286301e-06, + "loss": 0.0275, + "step": 94140 + }, + { + "epoch": 4.39, + "learning_rate": 5.404981737808224e-06, + "loss": 0.0261, + "step": 94145 + }, + { + "epoch": 4.39, + "learning_rate": 5.4041979527534376e-06, + "loss": 0.1056, + "step": 94150 + }, + { + "epoch": 4.39, + "learning_rate": 5.403414167698651e-06, + "loss": 0.0855, + "step": 94155 + }, + { + "epoch": 4.39, + "learning_rate": 5.4026303826438645e-06, + "loss": 0.0668, + "step": 94160 + }, + { + "epoch": 4.39, + "learning_rate": 5.401846597589078e-06, + "loss": 0.0595, + "step": 94165 + }, + { + "epoch": 4.39, + "learning_rate": 5.4010628125342915e-06, + "loss": 0.1655, + "step": 94170 + }, + { + "epoch": 4.39, + "learning_rate": 5.4002790274795045e-06, + "loss": 0.4077, + "step": 94175 + }, + { + "epoch": 4.39, + "learning_rate": 5.3994952424247184e-06, + "loss": 0.112, + "step": 94180 + }, + { + "epoch": 4.39, + "learning_rate": 5.3987114573699315e-06, + "loss": 0.0193, + "step": 94185 + }, + { + "epoch": 4.4, + "learning_rate": 5.397927672315145e-06, + "loss": 0.0298, + "step": 94190 + }, + { + "epoch": 4.4, + "learning_rate": 5.397143887260358e-06, + "loss": 0.1002, + "step": 94195 + }, + { + "epoch": 4.4, + "learning_rate": 5.3963601022055715e-06, + "loss": 0.1018, + "step": 94200 + }, + { + "epoch": 4.4, + "learning_rate": 5.395576317150785e-06, + "loss": 0.092, + "step": 94205 + }, + { + "epoch": 4.4, + "learning_rate": 5.3947925320959985e-06, + "loss": 0.0637, + "step": 94210 + }, + { + "epoch": 4.4, + "learning_rate": 5.3940087470412116e-06, + "loss": 0.0959, + "step": 94215 + }, + { + "epoch": 4.4, + "learning_rate": 5.3932249619864255e-06, + "loss": 0.3055, + "step": 94220 + }, + { + "epoch": 4.4, + "learning_rate": 5.3924411769316385e-06, + "loss": 0.2291, + "step": 94225 + }, + { + "epoch": 4.4, + "learning_rate": 5.391657391876852e-06, + "loss": 0.0576, + "step": 94230 + }, + { + "epoch": 4.4, + "learning_rate": 5.3908736068220655e-06, + "loss": 0.0308, + "step": 94235 + }, + { + "epoch": 4.4, + "learning_rate": 5.390089821767279e-06, + "loss": 0.0825, + "step": 94240 + }, + { + "epoch": 4.4, + "learning_rate": 5.3893060367124924e-06, + "loss": 0.0327, + "step": 94245 + }, + { + "epoch": 4.4, + "learning_rate": 5.388522251657706e-06, + "loss": 0.101, + "step": 94250 + }, + { + "epoch": 4.4, + "learning_rate": 5.387738466602919e-06, + "loss": 0.1118, + "step": 94255 + }, + { + "epoch": 4.4, + "learning_rate": 5.3869546815481325e-06, + "loss": 0.0906, + "step": 94260 + }, + { + "epoch": 4.4, + "learning_rate": 5.3861708964933455e-06, + "loss": 0.1445, + "step": 94265 + }, + { + "epoch": 4.4, + "learning_rate": 5.3853871114385594e-06, + "loss": 0.1794, + "step": 94270 + }, + { + "epoch": 4.4, + "learning_rate": 5.3846033263837725e-06, + "loss": 0.3127, + "step": 94275 + }, + { + "epoch": 4.4, + "learning_rate": 5.383819541328986e-06, + "loss": 0.1108, + "step": 94280 + }, + { + "epoch": 4.4, + "learning_rate": 5.3830357562741995e-06, + "loss": 0.0173, + "step": 94285 + }, + { + "epoch": 4.4, + "learning_rate": 5.382251971219413e-06, + "loss": 0.0147, + "step": 94290 + }, + { + "epoch": 4.4, + "learning_rate": 5.381468186164626e-06, + "loss": 0.0375, + "step": 94295 + }, + { + "epoch": 4.4, + "learning_rate": 5.38068440110984e-06, + "loss": 0.0357, + "step": 94300 + }, + { + "epoch": 4.4, + "learning_rate": 5.379900616055053e-06, + "loss": 0.0637, + "step": 94305 + }, + { + "epoch": 4.4, + "learning_rate": 5.379116831000267e-06, + "loss": 0.0859, + "step": 94310 + }, + { + "epoch": 4.4, + "learning_rate": 5.378333045945481e-06, + "loss": 0.0535, + "step": 94315 + }, + { + "epoch": 4.4, + "learning_rate": 5.377549260890694e-06, + "loss": 0.0934, + "step": 94320 + }, + { + "epoch": 4.4, + "learning_rate": 5.3767654758359065e-06, + "loss": 0.1851, + "step": 94325 + }, + { + "epoch": 4.4, + "learning_rate": 5.37598169078112e-06, + "loss": 0.0937, + "step": 94330 + }, + { + "epoch": 4.4, + "learning_rate": 5.3751979057263334e-06, + "loss": 0.0283, + "step": 94335 + }, + { + "epoch": 4.4, + "learning_rate": 5.374414120671547e-06, + "loss": 0.0502, + "step": 94340 + }, + { + "epoch": 4.4, + "learning_rate": 5.37363033561676e-06, + "loss": 0.0551, + "step": 94345 + }, + { + "epoch": 4.4, + "learning_rate": 5.372846550561974e-06, + "loss": 0.0547, + "step": 94350 + }, + { + "epoch": 4.4, + "learning_rate": 5.372062765507187e-06, + "loss": 0.0846, + "step": 94355 + }, + { + "epoch": 4.4, + "learning_rate": 5.371278980452401e-06, + "loss": 0.0917, + "step": 94360 + }, + { + "epoch": 4.4, + "learning_rate": 5.370495195397615e-06, + "loss": 0.0626, + "step": 94365 + }, + { + "epoch": 4.4, + "learning_rate": 5.369711410342828e-06, + "loss": 0.3187, + "step": 94370 + }, + { + "epoch": 4.4, + "learning_rate": 5.368927625288042e-06, + "loss": 0.3131, + "step": 94375 + }, + { + "epoch": 4.4, + "learning_rate": 5.368143840233255e-06, + "loss": 0.0509, + "step": 94380 + }, + { + "epoch": 4.4, + "learning_rate": 5.367360055178469e-06, + "loss": 0.0031, + "step": 94385 + }, + { + "epoch": 4.4, + "learning_rate": 5.366576270123681e-06, + "loss": 0.042, + "step": 94390 + }, + { + "epoch": 4.4, + "learning_rate": 5.365792485068894e-06, + "loss": 0.0337, + "step": 94395 + }, + { + "epoch": 4.4, + "learning_rate": 5.365008700014108e-06, + "loss": 0.0632, + "step": 94400 + }, + { + "epoch": 4.41, + "learning_rate": 5.364224914959321e-06, + "loss": 0.0857, + "step": 94405 + }, + { + "epoch": 4.41, + "learning_rate": 5.363441129904535e-06, + "loss": 0.1284, + "step": 94410 + }, + { + "epoch": 4.41, + "learning_rate": 5.362657344849749e-06, + "loss": 0.1608, + "step": 94415 + }, + { + "epoch": 4.41, + "learning_rate": 5.361873559794962e-06, + "loss": 0.2023, + "step": 94420 + }, + { + "epoch": 4.41, + "learning_rate": 5.361089774740176e-06, + "loss": 0.3012, + "step": 94425 + }, + { + "epoch": 4.41, + "learning_rate": 5.360305989685389e-06, + "loss": 0.0722, + "step": 94430 + }, + { + "epoch": 4.41, + "learning_rate": 5.359522204630603e-06, + "loss": 0.0794, + "step": 94435 + }, + { + "epoch": 4.41, + "learning_rate": 5.358738419575816e-06, + "loss": 0.0341, + "step": 94440 + }, + { + "epoch": 4.41, + "learning_rate": 5.35795463452103e-06, + "loss": 0.0394, + "step": 94445 + }, + { + "epoch": 4.41, + "learning_rate": 5.357170849466243e-06, + "loss": 0.0824, + "step": 94450 + }, + { + "epoch": 4.41, + "learning_rate": 5.356387064411455e-06, + "loss": 0.107, + "step": 94455 + }, + { + "epoch": 4.41, + "learning_rate": 5.355603279356669e-06, + "loss": 0.1359, + "step": 94460 + }, + { + "epoch": 4.41, + "learning_rate": 5.354819494301883e-06, + "loss": 0.0842, + "step": 94465 + }, + { + "epoch": 4.41, + "learning_rate": 5.354035709247096e-06, + "loss": 0.1772, + "step": 94470 + }, + { + "epoch": 4.41, + "learning_rate": 5.35325192419231e-06, + "loss": 0.2817, + "step": 94475 + }, + { + "epoch": 4.41, + "learning_rate": 5.352468139137523e-06, + "loss": 0.0653, + "step": 94480 + }, + { + "epoch": 4.41, + "learning_rate": 5.351684354082737e-06, + "loss": 0.0257, + "step": 94485 + }, + { + "epoch": 4.41, + "learning_rate": 5.35090056902795e-06, + "loss": 0.0314, + "step": 94490 + }, + { + "epoch": 4.41, + "learning_rate": 5.350116783973164e-06, + "loss": 0.03, + "step": 94495 + }, + { + "epoch": 4.41, + "learning_rate": 5.349332998918377e-06, + "loss": 0.0837, + "step": 94500 + }, + { + "epoch": 4.41, + "learning_rate": 5.348549213863591e-06, + "loss": 0.068, + "step": 94505 + }, + { + "epoch": 4.41, + "learning_rate": 5.347765428808804e-06, + "loss": 0.1035, + "step": 94510 + }, + { + "epoch": 4.41, + "learning_rate": 5.346981643754018e-06, + "loss": 0.1012, + "step": 94515 + }, + { + "epoch": 4.41, + "learning_rate": 5.34619785869923e-06, + "loss": 0.1492, + "step": 94520 + }, + { + "epoch": 4.41, + "learning_rate": 5.345414073644444e-06, + "loss": 0.3077, + "step": 94525 + }, + { + "epoch": 4.41, + "learning_rate": 5.344630288589657e-06, + "loss": 0.1079, + "step": 94530 + }, + { + "epoch": 4.41, + "learning_rate": 5.343846503534871e-06, + "loss": 0.0021, + "step": 94535 + }, + { + "epoch": 4.41, + "learning_rate": 5.343062718480084e-06, + "loss": 0.0187, + "step": 94540 + }, + { + "epoch": 4.41, + "learning_rate": 5.342278933425298e-06, + "loss": 0.0476, + "step": 94545 + }, + { + "epoch": 4.41, + "learning_rate": 5.341495148370511e-06, + "loss": 0.0735, + "step": 94550 + }, + { + "epoch": 4.41, + "learning_rate": 5.340711363315725e-06, + "loss": 0.0585, + "step": 94555 + }, + { + "epoch": 4.41, + "learning_rate": 5.339927578260938e-06, + "loss": 0.0936, + "step": 94560 + }, + { + "epoch": 4.41, + "learning_rate": 5.339143793206152e-06, + "loss": 0.1221, + "step": 94565 + }, + { + "epoch": 4.41, + "learning_rate": 5.338360008151365e-06, + "loss": 0.1027, + "step": 94570 + }, + { + "epoch": 4.41, + "learning_rate": 5.337576223096579e-06, + "loss": 0.3455, + "step": 94575 + }, + { + "epoch": 4.41, + "learning_rate": 5.336792438041792e-06, + "loss": 0.1167, + "step": 94580 + }, + { + "epoch": 4.41, + "learning_rate": 5.336008652987005e-06, + "loss": 0.0593, + "step": 94585 + }, + { + "epoch": 4.41, + "learning_rate": 5.335224867932218e-06, + "loss": 0.0618, + "step": 94590 + }, + { + "epoch": 4.41, + "learning_rate": 5.334441082877432e-06, + "loss": 0.0215, + "step": 94595 + }, + { + "epoch": 4.41, + "learning_rate": 5.333657297822645e-06, + "loss": 0.0478, + "step": 94600 + }, + { + "epoch": 4.41, + "learning_rate": 5.332873512767859e-06, + "loss": 0.0374, + "step": 94605 + }, + { + "epoch": 4.41, + "learning_rate": 5.332089727713072e-06, + "loss": 0.1203, + "step": 94610 + }, + { + "epoch": 4.41, + "learning_rate": 5.331305942658286e-06, + "loss": 0.096, + "step": 94615 + }, + { + "epoch": 4.42, + "learning_rate": 5.330522157603499e-06, + "loss": 0.1288, + "step": 94620 + }, + { + "epoch": 4.42, + "learning_rate": 5.329738372548713e-06, + "loss": 0.3298, + "step": 94625 + }, + { + "epoch": 4.42, + "learning_rate": 5.328954587493927e-06, + "loss": 0.0654, + "step": 94630 + }, + { + "epoch": 4.42, + "learning_rate": 5.32817080243914e-06, + "loss": 0.0372, + "step": 94635 + }, + { + "epoch": 4.42, + "learning_rate": 5.327387017384354e-06, + "loss": 0.0341, + "step": 94640 + }, + { + "epoch": 4.42, + "learning_rate": 5.326603232329567e-06, + "loss": 0.1115, + "step": 94645 + }, + { + "epoch": 4.42, + "learning_rate": 5.325819447274779e-06, + "loss": 0.1194, + "step": 94650 + }, + { + "epoch": 4.42, + "learning_rate": 5.325035662219993e-06, + "loss": 0.0683, + "step": 94655 + }, + { + "epoch": 4.42, + "learning_rate": 5.324251877165206e-06, + "loss": 0.125, + "step": 94660 + }, + { + "epoch": 4.42, + "learning_rate": 5.32346809211042e-06, + "loss": 0.1841, + "step": 94665 + }, + { + "epoch": 4.42, + "learning_rate": 5.322684307055633e-06, + "loss": 0.1564, + "step": 94670 + }, + { + "epoch": 4.42, + "learning_rate": 5.321900522000847e-06, + "loss": 0.2087, + "step": 94675 + }, + { + "epoch": 4.42, + "learning_rate": 5.321116736946061e-06, + "loss": 0.067, + "step": 94680 + }, + { + "epoch": 4.42, + "learning_rate": 5.320332951891274e-06, + "loss": 0.0378, + "step": 94685 + }, + { + "epoch": 4.42, + "learning_rate": 5.319549166836488e-06, + "loss": 0.0236, + "step": 94690 + }, + { + "epoch": 4.42, + "learning_rate": 5.318765381781701e-06, + "loss": 0.0267, + "step": 94695 + }, + { + "epoch": 4.42, + "learning_rate": 5.317981596726915e-06, + "loss": 0.0426, + "step": 94700 + }, + { + "epoch": 4.42, + "learning_rate": 5.317197811672128e-06, + "loss": 0.0339, + "step": 94705 + }, + { + "epoch": 4.42, + "learning_rate": 5.316414026617342e-06, + "loss": 0.086, + "step": 94710 + }, + { + "epoch": 4.42, + "learning_rate": 5.315630241562554e-06, + "loss": 0.1495, + "step": 94715 + }, + { + "epoch": 4.42, + "learning_rate": 5.314846456507767e-06, + "loss": 0.1409, + "step": 94720 + }, + { + "epoch": 4.42, + "learning_rate": 5.314062671452981e-06, + "loss": 0.2498, + "step": 94725 + }, + { + "epoch": 4.42, + "learning_rate": 5.313278886398195e-06, + "loss": 0.1208, + "step": 94730 + }, + { + "epoch": 4.42, + "learning_rate": 5.312495101343408e-06, + "loss": 0.0606, + "step": 94735 + }, + { + "epoch": 4.42, + "learning_rate": 5.311711316288622e-06, + "loss": 0.0261, + "step": 94740 + }, + { + "epoch": 4.42, + "learning_rate": 5.310927531233835e-06, + "loss": 0.1263, + "step": 94745 + }, + { + "epoch": 4.42, + "learning_rate": 5.310143746179049e-06, + "loss": 0.0717, + "step": 94750 + }, + { + "epoch": 4.42, + "learning_rate": 5.309359961124262e-06, + "loss": 0.0305, + "step": 94755 + }, + { + "epoch": 4.42, + "learning_rate": 5.308576176069476e-06, + "loss": 0.0571, + "step": 94760 + }, + { + "epoch": 4.42, + "learning_rate": 5.307792391014689e-06, + "loss": 0.2048, + "step": 94765 + }, + { + "epoch": 4.42, + "learning_rate": 5.307008605959903e-06, + "loss": 0.1052, + "step": 94770 + }, + { + "epoch": 4.42, + "learning_rate": 5.306224820905116e-06, + "loss": 0.3887, + "step": 94775 + }, + { + "epoch": 4.42, + "learning_rate": 5.305441035850329e-06, + "loss": 0.1179, + "step": 94780 + }, + { + "epoch": 4.42, + "learning_rate": 5.304657250795542e-06, + "loss": 0.0314, + "step": 94785 + }, + { + "epoch": 4.42, + "learning_rate": 5.303873465740756e-06, + "loss": 0.0317, + "step": 94790 + }, + { + "epoch": 4.42, + "learning_rate": 5.303089680685969e-06, + "loss": 0.0489, + "step": 94795 + }, + { + "epoch": 4.42, + "learning_rate": 5.302305895631183e-06, + "loss": 0.0492, + "step": 94800 + }, + { + "epoch": 4.42, + "learning_rate": 5.301522110576396e-06, + "loss": 0.0911, + "step": 94805 + }, + { + "epoch": 4.42, + "learning_rate": 5.30073832552161e-06, + "loss": 0.073, + "step": 94810 + }, + { + "epoch": 4.42, + "learning_rate": 5.299954540466823e-06, + "loss": 0.1565, + "step": 94815 + }, + { + "epoch": 4.42, + "learning_rate": 5.2991707554120366e-06, + "loss": 0.1232, + "step": 94820 + }, + { + "epoch": 4.42, + "learning_rate": 5.29838697035725e-06, + "loss": 0.4846, + "step": 94825 + }, + { + "epoch": 4.42, + "learning_rate": 5.2976031853024635e-06, + "loss": 0.0942, + "step": 94830 + }, + { + "epoch": 4.43, + "learning_rate": 5.2968194002476766e-06, + "loss": 0.0406, + "step": 94835 + }, + { + "epoch": 4.43, + "learning_rate": 5.2960356151928905e-06, + "loss": 0.0519, + "step": 94840 + }, + { + "epoch": 4.43, + "learning_rate": 5.295251830138103e-06, + "loss": 0.0323, + "step": 94845 + }, + { + "epoch": 4.43, + "learning_rate": 5.294468045083317e-06, + "loss": 0.0845, + "step": 94850 + }, + { + "epoch": 4.43, + "learning_rate": 5.29368426002853e-06, + "loss": 0.0372, + "step": 94855 + }, + { + "epoch": 4.43, + "learning_rate": 5.2929004749737436e-06, + "loss": 0.1241, + "step": 94860 + }, + { + "epoch": 4.43, + "learning_rate": 5.292116689918957e-06, + "loss": 0.0852, + "step": 94865 + }, + { + "epoch": 4.43, + "learning_rate": 5.2913329048641705e-06, + "loss": 0.1992, + "step": 94870 + }, + { + "epoch": 4.43, + "learning_rate": 5.290549119809384e-06, + "loss": 0.3362, + "step": 94875 + }, + { + "epoch": 4.43, + "learning_rate": 5.2897653347545975e-06, + "loss": 0.0508, + "step": 94880 + }, + { + "epoch": 4.43, + "learning_rate": 5.2889815496998106e-06, + "loss": 0.017, + "step": 94885 + }, + { + "epoch": 4.43, + "learning_rate": 5.2881977646450245e-06, + "loss": 0.0439, + "step": 94890 + }, + { + "epoch": 4.43, + "learning_rate": 5.287413979590238e-06, + "loss": 0.0687, + "step": 94895 + }, + { + "epoch": 4.43, + "learning_rate": 5.286630194535451e-06, + "loss": 0.0565, + "step": 94900 + }, + { + "epoch": 4.43, + "learning_rate": 5.285846409480665e-06, + "loss": 0.0778, + "step": 94905 + }, + { + "epoch": 4.43, + "learning_rate": 5.2850626244258775e-06, + "loss": 0.0566, + "step": 94910 + }, + { + "epoch": 4.43, + "learning_rate": 5.284278839371091e-06, + "loss": 0.1712, + "step": 94915 + }, + { + "epoch": 4.43, + "learning_rate": 5.2834950543163045e-06, + "loss": 0.2344, + "step": 94920 + }, + { + "epoch": 4.43, + "learning_rate": 5.2827112692615176e-06, + "loss": 0.3864, + "step": 94925 + }, + { + "epoch": 4.43, + "learning_rate": 5.2819274842067315e-06, + "loss": 0.0749, + "step": 94930 + }, + { + "epoch": 4.43, + "learning_rate": 5.2811436991519445e-06, + "loss": 0.0081, + "step": 94935 + }, + { + "epoch": 4.43, + "learning_rate": 5.2803599140971584e-06, + "loss": 0.0292, + "step": 94940 + }, + { + "epoch": 4.43, + "learning_rate": 5.279576129042372e-06, + "loss": 0.0565, + "step": 94945 + }, + { + "epoch": 4.43, + "learning_rate": 5.278792343987585e-06, + "loss": 0.0914, + "step": 94950 + }, + { + "epoch": 4.43, + "learning_rate": 5.278008558932799e-06, + "loss": 0.0806, + "step": 94955 + }, + { + "epoch": 4.43, + "learning_rate": 5.277224773878012e-06, + "loss": 0.1211, + "step": 94960 + }, + { + "epoch": 4.43, + "learning_rate": 5.276440988823226e-06, + "loss": 0.0831, + "step": 94965 + }, + { + "epoch": 4.43, + "learning_rate": 5.275657203768439e-06, + "loss": 0.1333, + "step": 94970 + }, + { + "epoch": 4.43, + "learning_rate": 5.2748734187136515e-06, + "loss": 0.3398, + "step": 94975 + }, + { + "epoch": 4.43, + "learning_rate": 5.2740896336588654e-06, + "loss": 0.0687, + "step": 94980 + }, + { + "epoch": 4.43, + "learning_rate": 5.2733058486040785e-06, + "loss": 0.023, + "step": 94985 + }, + { + "epoch": 4.43, + "learning_rate": 5.272522063549292e-06, + "loss": 0.0358, + "step": 94990 + }, + { + "epoch": 4.43, + "learning_rate": 5.271738278494506e-06, + "loss": 0.0276, + "step": 94995 + }, + { + "epoch": 4.43, + "learning_rate": 5.270954493439719e-06, + "loss": 0.0435, + "step": 95000 + }, + { + "epoch": 4.43, + "learning_rate": 5.270170708384933e-06, + "loss": 0.0563, + "step": 95005 + }, + { + "epoch": 4.43, + "learning_rate": 5.269386923330146e-06, + "loss": 0.0479, + "step": 95010 + }, + { + "epoch": 4.43, + "learning_rate": 5.26860313827536e-06, + "loss": 0.1242, + "step": 95015 + }, + { + "epoch": 4.43, + "learning_rate": 5.267819353220573e-06, + "loss": 0.1067, + "step": 95020 + }, + { + "epoch": 4.43, + "learning_rate": 5.267035568165787e-06, + "loss": 0.187, + "step": 95025 + }, + { + "epoch": 4.43, + "learning_rate": 5.266251783111e-06, + "loss": 0.1048, + "step": 95030 + }, + { + "epoch": 4.43, + "learning_rate": 5.265467998056214e-06, + "loss": 0.0121, + "step": 95035 + }, + { + "epoch": 4.43, + "learning_rate": 5.264684213001426e-06, + "loss": 0.0388, + "step": 95040 + }, + { + "epoch": 4.43, + "learning_rate": 5.26390042794664e-06, + "loss": 0.039, + "step": 95045 + }, + { + "epoch": 4.44, + "learning_rate": 5.263116642891853e-06, + "loss": 0.0672, + "step": 95050 + }, + { + "epoch": 4.44, + "learning_rate": 5.262332857837067e-06, + "loss": 0.0662, + "step": 95055 + }, + { + "epoch": 4.44, + "learning_rate": 5.26154907278228e-06, + "loss": 0.0824, + "step": 95060 + }, + { + "epoch": 4.44, + "learning_rate": 5.260765287727494e-06, + "loss": 0.1178, + "step": 95065 + }, + { + "epoch": 4.44, + "learning_rate": 5.259981502672707e-06, + "loss": 0.1222, + "step": 95070 + }, + { + "epoch": 4.44, + "learning_rate": 5.259197717617921e-06, + "loss": 0.2457, + "step": 95075 + }, + { + "epoch": 4.44, + "learning_rate": 5.258413932563134e-06, + "loss": 0.1489, + "step": 95080 + }, + { + "epoch": 4.44, + "learning_rate": 5.257630147508348e-06, + "loss": 0.0279, + "step": 95085 + }, + { + "epoch": 4.44, + "learning_rate": 5.256846362453561e-06, + "loss": 0.0245, + "step": 95090 + }, + { + "epoch": 4.44, + "learning_rate": 5.256062577398775e-06, + "loss": 0.0522, + "step": 95095 + }, + { + "epoch": 4.44, + "learning_rate": 5.255278792343988e-06, + "loss": 0.0658, + "step": 95100 + }, + { + "epoch": 4.44, + "learning_rate": 5.254495007289201e-06, + "loss": 0.0312, + "step": 95105 + }, + { + "epoch": 4.44, + "learning_rate": 5.253711222234414e-06, + "loss": 0.0863, + "step": 95110 + }, + { + "epoch": 4.44, + "learning_rate": 5.252927437179628e-06, + "loss": 0.1052, + "step": 95115 + }, + { + "epoch": 4.44, + "learning_rate": 5.252143652124841e-06, + "loss": 0.2086, + "step": 95120 + }, + { + "epoch": 4.44, + "learning_rate": 5.251359867070055e-06, + "loss": 0.3319, + "step": 95125 + }, + { + "epoch": 4.44, + "learning_rate": 5.250576082015268e-06, + "loss": 0.0817, + "step": 95130 + }, + { + "epoch": 4.44, + "learning_rate": 5.249792296960482e-06, + "loss": 0.033, + "step": 95135 + }, + { + "epoch": 4.44, + "learning_rate": 5.249008511905695e-06, + "loss": 0.0278, + "step": 95140 + }, + { + "epoch": 4.44, + "learning_rate": 5.248224726850909e-06, + "loss": 0.0582, + "step": 95145 + }, + { + "epoch": 4.44, + "learning_rate": 5.247440941796122e-06, + "loss": 0.0525, + "step": 95150 + }, + { + "epoch": 4.44, + "learning_rate": 5.246657156741336e-06, + "loss": 0.0768, + "step": 95155 + }, + { + "epoch": 4.44, + "learning_rate": 5.245873371686549e-06, + "loss": 0.0597, + "step": 95160 + }, + { + "epoch": 4.44, + "learning_rate": 5.245089586631763e-06, + "loss": 0.1307, + "step": 95165 + }, + { + "epoch": 4.44, + "learning_rate": 5.244305801576975e-06, + "loss": 0.298, + "step": 95170 + }, + { + "epoch": 4.44, + "learning_rate": 5.243522016522189e-06, + "loss": 0.2818, + "step": 95175 + }, + { + "epoch": 4.44, + "learning_rate": 5.242738231467402e-06, + "loss": 0.0809, + "step": 95180 + }, + { + "epoch": 4.44, + "learning_rate": 5.241954446412616e-06, + "loss": 0.0503, + "step": 95185 + }, + { + "epoch": 4.44, + "learning_rate": 5.241170661357829e-06, + "loss": 0.0217, + "step": 95190 + }, + { + "epoch": 4.44, + "learning_rate": 5.240386876303043e-06, + "loss": 0.0091, + "step": 95195 + }, + { + "epoch": 4.44, + "learning_rate": 5.239603091248256e-06, + "loss": 0.2524, + "step": 95200 + }, + { + "epoch": 4.44, + "learning_rate": 5.23881930619347e-06, + "loss": 0.1054, + "step": 95205 + }, + { + "epoch": 4.44, + "learning_rate": 5.238035521138684e-06, + "loss": 0.1351, + "step": 95210 + }, + { + "epoch": 4.44, + "learning_rate": 5.237251736083897e-06, + "loss": 0.1043, + "step": 95215 + }, + { + "epoch": 4.44, + "learning_rate": 5.236467951029111e-06, + "loss": 0.1674, + "step": 95220 + }, + { + "epoch": 4.44, + "learning_rate": 5.235684165974324e-06, + "loss": 0.2654, + "step": 95225 + }, + { + "epoch": 4.44, + "learning_rate": 5.234900380919538e-06, + "loss": 0.0535, + "step": 95230 + }, + { + "epoch": 4.44, + "learning_rate": 5.23411659586475e-06, + "loss": 0.0409, + "step": 95235 + }, + { + "epoch": 4.44, + "learning_rate": 5.233332810809963e-06, + "loss": 0.0328, + "step": 95240 + }, + { + "epoch": 4.44, + "learning_rate": 5.232549025755177e-06, + "loss": 0.0177, + "step": 95245 + }, + { + "epoch": 4.44, + "learning_rate": 5.23176524070039e-06, + "loss": 0.0562, + "step": 95250 + }, + { + "epoch": 4.44, + "learning_rate": 5.230981455645604e-06, + "loss": 0.0974, + "step": 95255 + }, + { + "epoch": 4.44, + "learning_rate": 5.230197670590818e-06, + "loss": 0.1391, + "step": 95260 + }, + { + "epoch": 4.45, + "learning_rate": 5.229413885536031e-06, + "loss": 0.1046, + "step": 95265 + }, + { + "epoch": 4.45, + "learning_rate": 5.228630100481245e-06, + "loss": 0.1406, + "step": 95270 + }, + { + "epoch": 4.45, + "learning_rate": 5.227846315426458e-06, + "loss": 0.3384, + "step": 95275 + }, + { + "epoch": 4.45, + "learning_rate": 5.227062530371672e-06, + "loss": 0.0687, + "step": 95280 + }, + { + "epoch": 4.45, + "learning_rate": 5.226278745316885e-06, + "loss": 0.0178, + "step": 95285 + }, + { + "epoch": 4.45, + "learning_rate": 5.225494960262099e-06, + "loss": 0.0261, + "step": 95290 + }, + { + "epoch": 4.45, + "learning_rate": 5.224711175207312e-06, + "loss": 0.0263, + "step": 95295 + }, + { + "epoch": 4.45, + "learning_rate": 5.223927390152524e-06, + "loss": 0.026, + "step": 95300 + }, + { + "epoch": 4.45, + "learning_rate": 5.223143605097738e-06, + "loss": 0.054, + "step": 95305 + }, + { + "epoch": 4.45, + "learning_rate": 5.222359820042952e-06, + "loss": 0.1107, + "step": 95310 + }, + { + "epoch": 4.45, + "learning_rate": 5.221576034988165e-06, + "loss": 0.1421, + "step": 95315 + }, + { + "epoch": 4.45, + "learning_rate": 5.220792249933379e-06, + "loss": 0.1066, + "step": 95320 + }, + { + "epoch": 4.45, + "learning_rate": 5.220008464878592e-06, + "loss": 0.3201, + "step": 95325 + }, + { + "epoch": 4.45, + "learning_rate": 5.219224679823806e-06, + "loss": 0.1401, + "step": 95330 + }, + { + "epoch": 4.45, + "learning_rate": 5.218440894769019e-06, + "loss": 0.0372, + "step": 95335 + }, + { + "epoch": 4.45, + "learning_rate": 5.217657109714233e-06, + "loss": 0.0276, + "step": 95340 + }, + { + "epoch": 4.45, + "learning_rate": 5.216873324659446e-06, + "loss": 0.055, + "step": 95345 + }, + { + "epoch": 4.45, + "learning_rate": 5.21608953960466e-06, + "loss": 0.0375, + "step": 95350 + }, + { + "epoch": 4.45, + "learning_rate": 5.215305754549873e-06, + "loss": 0.0857, + "step": 95355 + }, + { + "epoch": 4.45, + "learning_rate": 5.214521969495087e-06, + "loss": 0.0805, + "step": 95360 + }, + { + "epoch": 4.45, + "learning_rate": 5.213738184440299e-06, + "loss": 0.0871, + "step": 95365 + }, + { + "epoch": 4.45, + "learning_rate": 5.212954399385513e-06, + "loss": 0.1381, + "step": 95370 + }, + { + "epoch": 4.45, + "learning_rate": 5.212170614330726e-06, + "loss": 0.3392, + "step": 95375 + }, + { + "epoch": 4.45, + "learning_rate": 5.21138682927594e-06, + "loss": 0.0491, + "step": 95380 + }, + { + "epoch": 4.45, + "learning_rate": 5.210603044221153e-06, + "loss": 0.0252, + "step": 95385 + }, + { + "epoch": 4.45, + "learning_rate": 5.209819259166367e-06, + "loss": 0.0284, + "step": 95390 + }, + { + "epoch": 4.45, + "learning_rate": 5.20903547411158e-06, + "loss": 0.0535, + "step": 95395 + }, + { + "epoch": 4.45, + "learning_rate": 5.208251689056794e-06, + "loss": 0.0496, + "step": 95400 + }, + { + "epoch": 4.45, + "learning_rate": 5.207467904002007e-06, + "loss": 0.0431, + "step": 95405 + }, + { + "epoch": 4.45, + "learning_rate": 5.206684118947221e-06, + "loss": 0.0613, + "step": 95410 + }, + { + "epoch": 4.45, + "learning_rate": 5.205900333892434e-06, + "loss": 0.1329, + "step": 95415 + }, + { + "epoch": 4.45, + "learning_rate": 5.205116548837648e-06, + "loss": 0.2121, + "step": 95420 + }, + { + "epoch": 4.45, + "learning_rate": 5.204332763782861e-06, + "loss": 0.1938, + "step": 95425 + }, + { + "epoch": 4.45, + "learning_rate": 5.203548978728074e-06, + "loss": 0.1142, + "step": 95430 + }, + { + "epoch": 4.45, + "learning_rate": 5.202765193673287e-06, + "loss": 0.0108, + "step": 95435 + }, + { + "epoch": 4.45, + "learning_rate": 5.201981408618501e-06, + "loss": 0.0168, + "step": 95440 + }, + { + "epoch": 4.45, + "learning_rate": 5.201197623563714e-06, + "loss": 0.078, + "step": 95445 + }, + { + "epoch": 4.45, + "learning_rate": 5.200413838508928e-06, + "loss": 0.0936, + "step": 95450 + }, + { + "epoch": 4.45, + "learning_rate": 5.199630053454141e-06, + "loss": 0.0835, + "step": 95455 + }, + { + "epoch": 4.45, + "learning_rate": 5.198846268399355e-06, + "loss": 0.144, + "step": 95460 + }, + { + "epoch": 4.45, + "learning_rate": 5.198062483344568e-06, + "loss": 0.1013, + "step": 95465 + }, + { + "epoch": 4.45, + "learning_rate": 5.197278698289782e-06, + "loss": 0.1307, + "step": 95470 + }, + { + "epoch": 4.45, + "learning_rate": 5.1964949132349955e-06, + "loss": 0.2666, + "step": 95475 + }, + { + "epoch": 4.46, + "learning_rate": 5.195711128180209e-06, + "loss": 0.1471, + "step": 95480 + }, + { + "epoch": 4.46, + "learning_rate": 5.1949273431254225e-06, + "loss": 0.0119, + "step": 95485 + }, + { + "epoch": 4.46, + "learning_rate": 5.1941435580706356e-06, + "loss": 0.0086, + "step": 95490 + }, + { + "epoch": 4.46, + "learning_rate": 5.193359773015848e-06, + "loss": 0.0188, + "step": 95495 + }, + { + "epoch": 4.46, + "learning_rate": 5.192575987961062e-06, + "loss": 0.0327, + "step": 95500 + }, + { + "epoch": 4.46, + "learning_rate": 5.191792202906275e-06, + "loss": 0.0809, + "step": 95505 + }, + { + "epoch": 4.46, + "learning_rate": 5.191008417851489e-06, + "loss": 0.1441, + "step": 95510 + }, + { + "epoch": 4.46, + "learning_rate": 5.190224632796702e-06, + "loss": 0.1278, + "step": 95515 + }, + { + "epoch": 4.46, + "learning_rate": 5.189440847741916e-06, + "loss": 0.0984, + "step": 95520 + }, + { + "epoch": 4.46, + "learning_rate": 5.1886570626871295e-06, + "loss": 0.1737, + "step": 95525 + }, + { + "epoch": 4.46, + "learning_rate": 5.1878732776323426e-06, + "loss": 0.0491, + "step": 95530 + }, + { + "epoch": 4.46, + "learning_rate": 5.1870894925775565e-06, + "loss": 0.0195, + "step": 95535 + }, + { + "epoch": 4.46, + "learning_rate": 5.1863057075227695e-06, + "loss": 0.0383, + "step": 95540 + }, + { + "epoch": 4.46, + "learning_rate": 5.1855219224679834e-06, + "loss": 0.0671, + "step": 95545 + }, + { + "epoch": 4.46, + "learning_rate": 5.1847381374131965e-06, + "loss": 0.0446, + "step": 95550 + }, + { + "epoch": 4.46, + "learning_rate": 5.18395435235841e-06, + "loss": 0.0744, + "step": 95555 + }, + { + "epoch": 4.46, + "learning_rate": 5.183170567303623e-06, + "loss": 0.1538, + "step": 95560 + }, + { + "epoch": 4.46, + "learning_rate": 5.182386782248836e-06, + "loss": 0.1187, + "step": 95565 + }, + { + "epoch": 4.46, + "learning_rate": 5.1816029971940496e-06, + "loss": 0.1129, + "step": 95570 + }, + { + "epoch": 4.46, + "learning_rate": 5.1808192121392635e-06, + "loss": 0.2866, + "step": 95575 + }, + { + "epoch": 4.46, + "learning_rate": 5.1800354270844765e-06, + "loss": 0.0671, + "step": 95580 + }, + { + "epoch": 4.46, + "learning_rate": 5.1792516420296904e-06, + "loss": 0.0258, + "step": 95585 + }, + { + "epoch": 4.46, + "learning_rate": 5.1784678569749035e-06, + "loss": 0.0303, + "step": 95590 + }, + { + "epoch": 4.46, + "learning_rate": 5.177684071920117e-06, + "loss": 0.0669, + "step": 95595 + }, + { + "epoch": 4.46, + "learning_rate": 5.1769002868653305e-06, + "loss": 0.1529, + "step": 95600 + }, + { + "epoch": 4.46, + "learning_rate": 5.176116501810544e-06, + "loss": 0.096, + "step": 95605 + }, + { + "epoch": 4.46, + "learning_rate": 5.1753327167557574e-06, + "loss": 0.038, + "step": 95610 + }, + { + "epoch": 4.46, + "learning_rate": 5.174548931700971e-06, + "loss": 0.0692, + "step": 95615 + }, + { + "epoch": 4.46, + "learning_rate": 5.173765146646184e-06, + "loss": 0.1471, + "step": 95620 + }, + { + "epoch": 4.46, + "learning_rate": 5.1729813615913975e-06, + "loss": 0.2807, + "step": 95625 + }, + { + "epoch": 4.46, + "learning_rate": 5.1721975765366105e-06, + "loss": 0.066, + "step": 95630 + }, + { + "epoch": 4.46, + "learning_rate": 5.171413791481824e-06, + "loss": 0.0502, + "step": 95635 + }, + { + "epoch": 4.46, + "learning_rate": 5.1706300064270375e-06, + "loss": 0.0317, + "step": 95640 + }, + { + "epoch": 4.46, + "learning_rate": 5.169846221372251e-06, + "loss": 0.0687, + "step": 95645 + }, + { + "epoch": 4.46, + "learning_rate": 5.1690624363174644e-06, + "loss": 0.0326, + "step": 95650 + }, + { + "epoch": 4.46, + "learning_rate": 5.168278651262678e-06, + "loss": 0.047, + "step": 95655 + }, + { + "epoch": 4.46, + "learning_rate": 5.167494866207891e-06, + "loss": 0.1274, + "step": 95660 + }, + { + "epoch": 4.46, + "learning_rate": 5.166711081153105e-06, + "loss": 0.0978, + "step": 95665 + }, + { + "epoch": 4.46, + "learning_rate": 5.165927296098318e-06, + "loss": 0.2717, + "step": 95670 + }, + { + "epoch": 4.46, + "learning_rate": 5.165143511043532e-06, + "loss": 0.2618, + "step": 95675 + }, + { + "epoch": 4.46, + "learning_rate": 5.164359725988745e-06, + "loss": 0.1012, + "step": 95680 + }, + { + "epoch": 4.46, + "learning_rate": 5.163575940933959e-06, + "loss": 0.0164, + "step": 95685 + }, + { + "epoch": 4.47, + "learning_rate": 5.1627921558791715e-06, + "loss": 0.0476, + "step": 95690 + }, + { + "epoch": 4.47, + "learning_rate": 5.162008370824385e-06, + "loss": 0.0308, + "step": 95695 + }, + { + "epoch": 4.47, + "learning_rate": 5.161224585769598e-06, + "loss": 0.0313, + "step": 95700 + }, + { + "epoch": 4.47, + "learning_rate": 5.160440800714812e-06, + "loss": 0.0541, + "step": 95705 + }, + { + "epoch": 4.47, + "learning_rate": 5.159657015660025e-06, + "loss": 0.0743, + "step": 95710 + }, + { + "epoch": 4.47, + "learning_rate": 5.158873230605239e-06, + "loss": 0.0944, + "step": 95715 + }, + { + "epoch": 4.47, + "learning_rate": 5.158089445550452e-06, + "loss": 0.1476, + "step": 95720 + }, + { + "epoch": 4.47, + "learning_rate": 5.157305660495666e-06, + "loss": 0.5071, + "step": 95725 + }, + { + "epoch": 4.47, + "learning_rate": 5.156521875440879e-06, + "loss": 0.0948, + "step": 95730 + }, + { + "epoch": 4.47, + "learning_rate": 5.155738090386093e-06, + "loss": 0.0133, + "step": 95735 + }, + { + "epoch": 4.47, + "learning_rate": 5.154954305331306e-06, + "loss": 0.034, + "step": 95740 + }, + { + "epoch": 4.47, + "learning_rate": 5.15417052027652e-06, + "loss": 0.0286, + "step": 95745 + }, + { + "epoch": 4.47, + "learning_rate": 5.153386735221734e-06, + "loss": 0.0403, + "step": 95750 + }, + { + "epoch": 4.47, + "learning_rate": 5.152602950166946e-06, + "loss": 0.0946, + "step": 95755 + }, + { + "epoch": 4.47, + "learning_rate": 5.151819165112159e-06, + "loss": 0.0878, + "step": 95760 + }, + { + "epoch": 4.47, + "learning_rate": 5.151035380057373e-06, + "loss": 0.1599, + "step": 95765 + }, + { + "epoch": 4.47, + "learning_rate": 5.150251595002586e-06, + "loss": 0.1451, + "step": 95770 + }, + { + "epoch": 4.47, + "learning_rate": 5.1494678099478e-06, + "loss": 0.2104, + "step": 95775 + }, + { + "epoch": 4.47, + "learning_rate": 5.148684024893013e-06, + "loss": 0.063, + "step": 95780 + }, + { + "epoch": 4.47, + "learning_rate": 5.147900239838227e-06, + "loss": 0.0221, + "step": 95785 + }, + { + "epoch": 4.47, + "learning_rate": 5.147116454783441e-06, + "loss": 0.0417, + "step": 95790 + }, + { + "epoch": 4.47, + "learning_rate": 5.146332669728654e-06, + "loss": 0.082, + "step": 95795 + }, + { + "epoch": 4.47, + "learning_rate": 5.145548884673868e-06, + "loss": 0.0665, + "step": 95800 + }, + { + "epoch": 4.47, + "learning_rate": 5.144765099619081e-06, + "loss": 0.0728, + "step": 95805 + }, + { + "epoch": 4.47, + "learning_rate": 5.143981314564295e-06, + "loss": 0.1377, + "step": 95810 + }, + { + "epoch": 4.47, + "learning_rate": 5.143197529509508e-06, + "loss": 0.1398, + "step": 95815 + }, + { + "epoch": 4.47, + "learning_rate": 5.14241374445472e-06, + "loss": 0.101, + "step": 95820 + }, + { + "epoch": 4.47, + "learning_rate": 5.141629959399934e-06, + "loss": 0.3332, + "step": 95825 + }, + { + "epoch": 4.47, + "learning_rate": 5.140846174345147e-06, + "loss": 0.0265, + "step": 95830 + }, + { + "epoch": 4.47, + "learning_rate": 5.140062389290361e-06, + "loss": 0.0367, + "step": 95835 + }, + { + "epoch": 4.47, + "learning_rate": 5.139278604235575e-06, + "loss": 0.0642, + "step": 95840 + }, + { + "epoch": 4.47, + "learning_rate": 5.138494819180788e-06, + "loss": 0.0294, + "step": 95845 + }, + { + "epoch": 4.47, + "learning_rate": 5.137711034126002e-06, + "loss": 0.0645, + "step": 95850 + }, + { + "epoch": 4.47, + "learning_rate": 5.136927249071215e-06, + "loss": 0.0848, + "step": 95855 + }, + { + "epoch": 4.47, + "learning_rate": 5.136143464016429e-06, + "loss": 0.0672, + "step": 95860 + }, + { + "epoch": 4.47, + "learning_rate": 5.135359678961642e-06, + "loss": 0.0753, + "step": 95865 + }, + { + "epoch": 4.47, + "learning_rate": 5.134575893906856e-06, + "loss": 0.1993, + "step": 95870 + }, + { + "epoch": 4.47, + "learning_rate": 5.133792108852069e-06, + "loss": 0.3417, + "step": 95875 + }, + { + "epoch": 4.47, + "learning_rate": 5.133008323797283e-06, + "loss": 0.1209, + "step": 95880 + }, + { + "epoch": 4.47, + "learning_rate": 5.132224538742495e-06, + "loss": 0.0442, + "step": 95885 + }, + { + "epoch": 4.47, + "learning_rate": 5.131440753687709e-06, + "loss": 0.0433, + "step": 95890 + }, + { + "epoch": 4.47, + "learning_rate": 5.130656968632922e-06, + "loss": 0.04, + "step": 95895 + }, + { + "epoch": 4.47, + "learning_rate": 5.129873183578136e-06, + "loss": 0.0476, + "step": 95900 + }, + { + "epoch": 4.48, + "learning_rate": 5.129089398523349e-06, + "loss": 0.0567, + "step": 95905 + }, + { + "epoch": 4.48, + "learning_rate": 5.128305613468563e-06, + "loss": 0.1427, + "step": 95910 + }, + { + "epoch": 4.48, + "learning_rate": 5.127521828413776e-06, + "loss": 0.1675, + "step": 95915 + }, + { + "epoch": 4.48, + "learning_rate": 5.12673804335899e-06, + "loss": 0.1834, + "step": 95920 + }, + { + "epoch": 4.48, + "learning_rate": 5.125954258304203e-06, + "loss": 0.2382, + "step": 95925 + }, + { + "epoch": 4.48, + "learning_rate": 5.125170473249417e-06, + "loss": 0.0803, + "step": 95930 + }, + { + "epoch": 4.48, + "learning_rate": 5.12438668819463e-06, + "loss": 0.0404, + "step": 95935 + }, + { + "epoch": 4.48, + "learning_rate": 5.123602903139844e-06, + "loss": 0.0186, + "step": 95940 + }, + { + "epoch": 4.48, + "learning_rate": 5.122819118085057e-06, + "loss": 0.049, + "step": 95945 + }, + { + "epoch": 4.48, + "learning_rate": 5.12203533303027e-06, + "loss": 0.0433, + "step": 95950 + }, + { + "epoch": 4.48, + "learning_rate": 5.121251547975483e-06, + "loss": 0.0587, + "step": 95955 + }, + { + "epoch": 4.48, + "learning_rate": 5.120467762920697e-06, + "loss": 0.0541, + "step": 95960 + }, + { + "epoch": 4.48, + "learning_rate": 5.11968397786591e-06, + "loss": 0.1232, + "step": 95965 + }, + { + "epoch": 4.48, + "learning_rate": 5.118900192811124e-06, + "loss": 0.215, + "step": 95970 + }, + { + "epoch": 4.48, + "learning_rate": 5.118116407756337e-06, + "loss": 0.2574, + "step": 95975 + }, + { + "epoch": 4.48, + "learning_rate": 5.117332622701551e-06, + "loss": 0.0626, + "step": 95980 + }, + { + "epoch": 4.48, + "learning_rate": 5.116548837646764e-06, + "loss": 0.0274, + "step": 95985 + }, + { + "epoch": 4.48, + "learning_rate": 5.115765052591978e-06, + "loss": 0.0429, + "step": 95990 + }, + { + "epoch": 4.48, + "learning_rate": 5.114981267537191e-06, + "loss": 0.0308, + "step": 95995 + }, + { + "epoch": 4.48, + "learning_rate": 5.114197482482405e-06, + "loss": 0.0277, + "step": 96000 + }, + { + "epoch": 4.48, + "learning_rate": 5.113413697427618e-06, + "loss": 0.0708, + "step": 96005 + }, + { + "epoch": 4.48, + "learning_rate": 5.112629912372832e-06, + "loss": 0.0683, + "step": 96010 + }, + { + "epoch": 4.48, + "learning_rate": 5.111846127318044e-06, + "loss": 0.1439, + "step": 96015 + }, + { + "epoch": 4.48, + "learning_rate": 5.111062342263258e-06, + "loss": 0.2307, + "step": 96020 + }, + { + "epoch": 4.48, + "learning_rate": 5.110278557208471e-06, + "loss": 0.2271, + "step": 96025 + }, + { + "epoch": 4.48, + "learning_rate": 5.109494772153685e-06, + "loss": 0.0971, + "step": 96030 + }, + { + "epoch": 4.48, + "learning_rate": 5.108710987098898e-06, + "loss": 0.0569, + "step": 96035 + }, + { + "epoch": 4.48, + "learning_rate": 5.107927202044112e-06, + "loss": 0.0343, + "step": 96040 + }, + { + "epoch": 4.48, + "learning_rate": 5.107143416989325e-06, + "loss": 0.0377, + "step": 96045 + }, + { + "epoch": 4.48, + "learning_rate": 5.106359631934539e-06, + "loss": 0.0933, + "step": 96050 + }, + { + "epoch": 4.48, + "learning_rate": 5.105575846879753e-06, + "loss": 0.0564, + "step": 96055 + }, + { + "epoch": 4.48, + "learning_rate": 5.104792061824966e-06, + "loss": 0.0657, + "step": 96060 + }, + { + "epoch": 4.48, + "learning_rate": 5.10400827677018e-06, + "loss": 0.0747, + "step": 96065 + }, + { + "epoch": 4.48, + "learning_rate": 5.103224491715393e-06, + "loss": 0.1499, + "step": 96070 + }, + { + "epoch": 4.48, + "learning_rate": 5.102440706660607e-06, + "loss": 0.2215, + "step": 96075 + }, + { + "epoch": 4.48, + "learning_rate": 5.101656921605819e-06, + "loss": 0.0642, + "step": 96080 + }, + { + "epoch": 4.48, + "learning_rate": 5.100873136551032e-06, + "loss": 0.0254, + "step": 96085 + }, + { + "epoch": 4.48, + "learning_rate": 5.100089351496246e-06, + "loss": 0.0405, + "step": 96090 + }, + { + "epoch": 4.48, + "learning_rate": 5.099305566441459e-06, + "loss": 0.0456, + "step": 96095 + }, + { + "epoch": 4.48, + "learning_rate": 5.098521781386673e-06, + "loss": 0.0553, + "step": 96100 + }, + { + "epoch": 4.48, + "learning_rate": 5.097737996331887e-06, + "loss": 0.0686, + "step": 96105 + }, + { + "epoch": 4.48, + "learning_rate": 5.0969542112771e-06, + "loss": 0.1628, + "step": 96110 + }, + { + "epoch": 4.48, + "learning_rate": 5.096170426222314e-06, + "loss": 0.1578, + "step": 96115 + }, + { + "epoch": 4.49, + "learning_rate": 5.095386641167527e-06, + "loss": 0.1804, + "step": 96120 + }, + { + "epoch": 4.49, + "learning_rate": 5.094602856112741e-06, + "loss": 0.2932, + "step": 96125 + }, + { + "epoch": 4.49, + "learning_rate": 5.093819071057954e-06, + "loss": 0.0573, + "step": 96130 + }, + { + "epoch": 4.49, + "learning_rate": 5.0930352860031676e-06, + "loss": 0.0141, + "step": 96135 + }, + { + "epoch": 4.49, + "learning_rate": 5.092251500948381e-06, + "loss": 0.0573, + "step": 96140 + }, + { + "epoch": 4.49, + "learning_rate": 5.091467715893593e-06, + "loss": 0.0622, + "step": 96145 + }, + { + "epoch": 4.49, + "learning_rate": 5.090683930838807e-06, + "loss": 0.0876, + "step": 96150 + }, + { + "epoch": 4.49, + "learning_rate": 5.089900145784021e-06, + "loss": 0.091, + "step": 96155 + }, + { + "epoch": 4.49, + "learning_rate": 5.089116360729234e-06, + "loss": 0.064, + "step": 96160 + }, + { + "epoch": 4.49, + "learning_rate": 5.088332575674448e-06, + "loss": 0.1033, + "step": 96165 + }, + { + "epoch": 4.49, + "learning_rate": 5.087548790619661e-06, + "loss": 0.1584, + "step": 96170 + }, + { + "epoch": 4.49, + "learning_rate": 5.0867650055648746e-06, + "loss": 0.3323, + "step": 96175 + }, + { + "epoch": 4.49, + "learning_rate": 5.085981220510088e-06, + "loss": 0.0875, + "step": 96180 + }, + { + "epoch": 4.49, + "learning_rate": 5.0851974354553015e-06, + "loss": 0.0535, + "step": 96185 + }, + { + "epoch": 4.49, + "learning_rate": 5.084413650400515e-06, + "loss": 0.0159, + "step": 96190 + }, + { + "epoch": 4.49, + "learning_rate": 5.0836298653457285e-06, + "loss": 0.0927, + "step": 96195 + }, + { + "epoch": 4.49, + "learning_rate": 5.0828460802909416e-06, + "loss": 0.0372, + "step": 96200 + }, + { + "epoch": 4.49, + "learning_rate": 5.0820622952361555e-06, + "loss": 0.0431, + "step": 96205 + }, + { + "epoch": 4.49, + "learning_rate": 5.081278510181368e-06, + "loss": 0.0643, + "step": 96210 + }, + { + "epoch": 4.49, + "learning_rate": 5.080494725126582e-06, + "loss": 0.036, + "step": 96215 + }, + { + "epoch": 4.49, + "learning_rate": 5.079710940071795e-06, + "loss": 0.1155, + "step": 96220 + }, + { + "epoch": 4.49, + "learning_rate": 5.0789271550170085e-06, + "loss": 0.2546, + "step": 96225 + }, + { + "epoch": 4.49, + "learning_rate": 5.078143369962222e-06, + "loss": 0.0981, + "step": 96230 + }, + { + "epoch": 4.49, + "learning_rate": 5.0773595849074355e-06, + "loss": 0.0233, + "step": 96235 + }, + { + "epoch": 4.49, + "learning_rate": 5.0765757998526486e-06, + "loss": 0.022, + "step": 96240 + }, + { + "epoch": 4.49, + "learning_rate": 5.0757920147978625e-06, + "loss": 0.0384, + "step": 96245 + }, + { + "epoch": 4.49, + "learning_rate": 5.0750082297430755e-06, + "loss": 0.0542, + "step": 96250 + }, + { + "epoch": 4.49, + "learning_rate": 5.0742244446882894e-06, + "loss": 0.061, + "step": 96255 + }, + { + "epoch": 4.49, + "learning_rate": 5.0734406596335025e-06, + "loss": 0.1771, + "step": 96260 + }, + { + "epoch": 4.49, + "learning_rate": 5.072656874578716e-06, + "loss": 0.0665, + "step": 96265 + }, + { + "epoch": 4.49, + "learning_rate": 5.0718730895239295e-06, + "loss": 0.1996, + "step": 96270 + }, + { + "epoch": 4.49, + "learning_rate": 5.0710893044691425e-06, + "loss": 0.2324, + "step": 96275 + }, + { + "epoch": 4.49, + "learning_rate": 5.070305519414356e-06, + "loss": 0.0724, + "step": 96280 + }, + { + "epoch": 4.49, + "learning_rate": 5.0695217343595695e-06, + "loss": 0.0442, + "step": 96285 + }, + { + "epoch": 4.49, + "learning_rate": 5.0687379493047825e-06, + "loss": 0.0201, + "step": 96290 + }, + { + "epoch": 4.49, + "learning_rate": 5.0679541642499965e-06, + "loss": 0.0359, + "step": 96295 + }, + { + "epoch": 4.49, + "learning_rate": 5.0671703791952095e-06, + "loss": 0.0216, + "step": 96300 + }, + { + "epoch": 4.49, + "learning_rate": 5.066386594140423e-06, + "loss": 0.0556, + "step": 96305 + }, + { + "epoch": 4.49, + "learning_rate": 5.0656028090856365e-06, + "loss": 0.1117, + "step": 96310 + }, + { + "epoch": 4.49, + "learning_rate": 5.06481902403085e-06, + "loss": 0.0927, + "step": 96315 + }, + { + "epoch": 4.49, + "learning_rate": 5.0640352389760634e-06, + "loss": 0.1306, + "step": 96320 + }, + { + "epoch": 4.49, + "learning_rate": 5.063251453921277e-06, + "loss": 0.4051, + "step": 96325 + }, + { + "epoch": 4.49, + "learning_rate": 5.062467668866491e-06, + "loss": 0.0428, + "step": 96330 + }, + { + "epoch": 4.5, + "learning_rate": 5.061683883811704e-06, + "loss": 0.024, + "step": 96335 + }, + { + "epoch": 4.5, + "learning_rate": 5.0609000987569165e-06, + "loss": 0.067, + "step": 96340 + }, + { + "epoch": 4.5, + "learning_rate": 5.0601163137021304e-06, + "loss": 0.0682, + "step": 96345 + }, + { + "epoch": 4.5, + "learning_rate": 5.0593325286473435e-06, + "loss": 0.0241, + "step": 96350 + }, + { + "epoch": 4.5, + "learning_rate": 5.058548743592557e-06, + "loss": 0.0588, + "step": 96355 + }, + { + "epoch": 4.5, + "learning_rate": 5.0577649585377705e-06, + "loss": 0.0678, + "step": 96360 + }, + { + "epoch": 4.5, + "learning_rate": 5.056981173482984e-06, + "loss": 0.0742, + "step": 96365 + }, + { + "epoch": 4.5, + "learning_rate": 5.056197388428198e-06, + "loss": 0.2118, + "step": 96370 + }, + { + "epoch": 4.5, + "learning_rate": 5.055413603373411e-06, + "loss": 0.1926, + "step": 96375 + }, + { + "epoch": 4.5, + "learning_rate": 5.054629818318625e-06, + "loss": 0.1095, + "step": 96380 + }, + { + "epoch": 4.5, + "learning_rate": 5.053846033263838e-06, + "loss": 0.037, + "step": 96385 + }, + { + "epoch": 4.5, + "learning_rate": 5.053062248209052e-06, + "loss": 0.0292, + "step": 96390 + }, + { + "epoch": 4.5, + "learning_rate": 5.052278463154265e-06, + "loss": 0.0827, + "step": 96395 + }, + { + "epoch": 4.5, + "learning_rate": 5.051494678099479e-06, + "loss": 0.043, + "step": 96400 + }, + { + "epoch": 4.5, + "learning_rate": 5.050710893044691e-06, + "loss": 0.0861, + "step": 96405 + }, + { + "epoch": 4.5, + "learning_rate": 5.0499271079899044e-06, + "loss": 0.0493, + "step": 96410 + }, + { + "epoch": 4.5, + "learning_rate": 5.049143322935118e-06, + "loss": 0.0618, + "step": 96415 + }, + { + "epoch": 4.5, + "learning_rate": 5.048359537880332e-06, + "loss": 0.1525, + "step": 96420 + }, + { + "epoch": 4.5, + "learning_rate": 5.047575752825545e-06, + "loss": 0.3023, + "step": 96425 + }, + { + "epoch": 4.5, + "learning_rate": 5.046791967770759e-06, + "loss": 0.0925, + "step": 96430 + }, + { + "epoch": 4.5, + "learning_rate": 5.046008182715972e-06, + "loss": 0.0056, + "step": 96435 + }, + { + "epoch": 4.5, + "learning_rate": 5.045224397661186e-06, + "loss": 0.0344, + "step": 96440 + }, + { + "epoch": 4.5, + "learning_rate": 5.044440612606399e-06, + "loss": 0.0139, + "step": 96445 + }, + { + "epoch": 4.5, + "learning_rate": 5.043656827551613e-06, + "loss": 0.0358, + "step": 96450 + }, + { + "epoch": 4.5, + "learning_rate": 5.042873042496826e-06, + "loss": 0.0974, + "step": 96455 + }, + { + "epoch": 4.5, + "learning_rate": 5.04208925744204e-06, + "loss": 0.0716, + "step": 96460 + }, + { + "epoch": 4.5, + "learning_rate": 5.041305472387253e-06, + "loss": 0.0823, + "step": 96465 + }, + { + "epoch": 4.5, + "learning_rate": 5.040521687332466e-06, + "loss": 0.1722, + "step": 96470 + }, + { + "epoch": 4.5, + "learning_rate": 5.039737902277679e-06, + "loss": 0.3896, + "step": 96475 + }, + { + "epoch": 4.5, + "learning_rate": 5.038954117222893e-06, + "loss": 0.0872, + "step": 96480 + }, + { + "epoch": 4.5, + "learning_rate": 5.038170332168106e-06, + "loss": 0.016, + "step": 96485 + }, + { + "epoch": 4.5, + "learning_rate": 5.03738654711332e-06, + "loss": 0.0659, + "step": 96490 + }, + { + "epoch": 4.5, + "learning_rate": 5.036602762058533e-06, + "loss": 0.0786, + "step": 96495 + }, + { + "epoch": 4.5, + "learning_rate": 5.035818977003747e-06, + "loss": 0.0762, + "step": 96500 + }, + { + "epoch": 4.5, + "learning_rate": 5.03503519194896e-06, + "loss": 0.1367, + "step": 96505 + }, + { + "epoch": 4.5, + "learning_rate": 5.034251406894174e-06, + "loss": 0.0624, + "step": 96510 + }, + { + "epoch": 4.5, + "learning_rate": 5.033467621839387e-06, + "loss": 0.1344, + "step": 96515 + }, + { + "epoch": 4.5, + "learning_rate": 5.032683836784601e-06, + "loss": 0.1567, + "step": 96520 + }, + { + "epoch": 4.5, + "learning_rate": 5.031900051729814e-06, + "loss": 0.2945, + "step": 96525 + }, + { + "epoch": 4.5, + "learning_rate": 5.031116266675028e-06, + "loss": 0.1039, + "step": 96530 + }, + { + "epoch": 4.5, + "learning_rate": 5.03033248162024e-06, + "loss": 0.0075, + "step": 96535 + }, + { + "epoch": 4.5, + "learning_rate": 5.029548696565454e-06, + "loss": 0.0324, + "step": 96540 + }, + { + "epoch": 4.5, + "learning_rate": 5.028764911510667e-06, + "loss": 0.047, + "step": 96545 + }, + { + "epoch": 4.51, + "learning_rate": 5.027981126455881e-06, + "loss": 0.0376, + "step": 96550 + }, + { + "epoch": 4.51, + "learning_rate": 5.027197341401094e-06, + "loss": 0.058, + "step": 96555 + }, + { + "epoch": 4.51, + "learning_rate": 5.026413556346308e-06, + "loss": 0.145, + "step": 96560 + }, + { + "epoch": 4.51, + "learning_rate": 5.025629771291521e-06, + "loss": 0.0721, + "step": 96565 + }, + { + "epoch": 4.51, + "learning_rate": 5.024845986236735e-06, + "loss": 0.196, + "step": 96570 + }, + { + "epoch": 4.51, + "learning_rate": 5.024062201181948e-06, + "loss": 0.3162, + "step": 96575 + }, + { + "epoch": 4.51, + "learning_rate": 5.023278416127162e-06, + "loss": 0.0714, + "step": 96580 + }, + { + "epoch": 4.51, + "learning_rate": 5.022494631072375e-06, + "loss": 0.0322, + "step": 96585 + }, + { + "epoch": 4.51, + "learning_rate": 5.021710846017589e-06, + "loss": 0.0425, + "step": 96590 + }, + { + "epoch": 4.51, + "learning_rate": 5.020927060962803e-06, + "loss": 0.0744, + "step": 96595 + }, + { + "epoch": 4.51, + "learning_rate": 5.020143275908015e-06, + "loss": 0.0285, + "step": 96600 + }, + { + "epoch": 4.51, + "learning_rate": 5.019359490853228e-06, + "loss": 0.123, + "step": 96605 + }, + { + "epoch": 4.51, + "learning_rate": 5.018575705798442e-06, + "loss": 0.0877, + "step": 96610 + }, + { + "epoch": 4.51, + "learning_rate": 5.017791920743655e-06, + "loss": 0.0745, + "step": 96615 + }, + { + "epoch": 4.51, + "learning_rate": 5.017008135688869e-06, + "loss": 0.1371, + "step": 96620 + }, + { + "epoch": 4.51, + "learning_rate": 5.016224350634082e-06, + "loss": 0.2937, + "step": 96625 + }, + { + "epoch": 4.51, + "learning_rate": 5.015440565579296e-06, + "loss": 0.0385, + "step": 96630 + }, + { + "epoch": 4.51, + "learning_rate": 5.01465678052451e-06, + "loss": 0.0205, + "step": 96635 + }, + { + "epoch": 4.51, + "learning_rate": 5.013872995469723e-06, + "loss": 0.071, + "step": 96640 + }, + { + "epoch": 4.51, + "learning_rate": 5.013089210414937e-06, + "loss": 0.0438, + "step": 96645 + }, + { + "epoch": 4.51, + "learning_rate": 5.01230542536015e-06, + "loss": 0.2251, + "step": 96650 + }, + { + "epoch": 4.51, + "learning_rate": 5.011521640305364e-06, + "loss": 0.1259, + "step": 96655 + }, + { + "epoch": 4.51, + "learning_rate": 5.010737855250577e-06, + "loss": 0.0676, + "step": 96660 + }, + { + "epoch": 4.51, + "learning_rate": 5.009954070195789e-06, + "loss": 0.1151, + "step": 96665 + }, + { + "epoch": 4.51, + "learning_rate": 5.009170285141003e-06, + "loss": 0.1968, + "step": 96670 + }, + { + "epoch": 4.51, + "learning_rate": 5.008386500086216e-06, + "loss": 0.3049, + "step": 96675 + }, + { + "epoch": 4.51, + "learning_rate": 5.00760271503143e-06, + "loss": 0.0801, + "step": 96680 + }, + { + "epoch": 4.51, + "learning_rate": 5.006818929976644e-06, + "loss": 0.0246, + "step": 96685 + }, + { + "epoch": 4.51, + "learning_rate": 5.006035144921857e-06, + "loss": 0.0264, + "step": 96690 + }, + { + "epoch": 4.51, + "learning_rate": 5.005251359867071e-06, + "loss": 0.0641, + "step": 96695 + }, + { + "epoch": 4.51, + "learning_rate": 5.004467574812284e-06, + "loss": 0.0993, + "step": 96700 + }, + { + "epoch": 4.51, + "learning_rate": 5.003683789757498e-06, + "loss": 0.0571, + "step": 96705 + }, + { + "epoch": 4.51, + "learning_rate": 5.002900004702711e-06, + "loss": 0.0889, + "step": 96710 + }, + { + "epoch": 4.51, + "learning_rate": 5.002116219647925e-06, + "loss": 0.0814, + "step": 96715 + }, + { + "epoch": 4.51, + "learning_rate": 5.001332434593138e-06, + "loss": 0.2205, + "step": 96720 + }, + { + "epoch": 4.51, + "learning_rate": 5.000548649538352e-06, + "loss": 0.2633, + "step": 96725 + }, + { + "epoch": 4.51, + "learning_rate": 4.999764864483565e-06, + "loss": 0.1192, + "step": 96730 + }, + { + "epoch": 4.51, + "learning_rate": 4.998981079428778e-06, + "loss": 0.0091, + "step": 96735 + }, + { + "epoch": 4.51, + "learning_rate": 4.998197294373992e-06, + "loss": 0.0728, + "step": 96740 + }, + { + "epoch": 4.51, + "learning_rate": 4.997413509319205e-06, + "loss": 0.048, + "step": 96745 + }, + { + "epoch": 4.51, + "learning_rate": 4.996629724264418e-06, + "loss": 0.0502, + "step": 96750 + }, + { + "epoch": 4.51, + "learning_rate": 4.995845939209632e-06, + "loss": 0.1211, + "step": 96755 + }, + { + "epoch": 4.51, + "learning_rate": 4.995062154154845e-06, + "loss": 0.0787, + "step": 96760 + }, + { + "epoch": 4.52, + "learning_rate": 4.994278369100059e-06, + "loss": 0.1444, + "step": 96765 + }, + { + "epoch": 4.52, + "learning_rate": 4.993494584045272e-06, + "loss": 0.1242, + "step": 96770 + }, + { + "epoch": 4.52, + "learning_rate": 4.992710798990486e-06, + "loss": 0.313, + "step": 96775 + }, + { + "epoch": 4.52, + "learning_rate": 4.991927013935699e-06, + "loss": 0.0916, + "step": 96780 + }, + { + "epoch": 4.52, + "learning_rate": 4.991143228880912e-06, + "loss": 0.0141, + "step": 96785 + }, + { + "epoch": 4.52, + "learning_rate": 4.990359443826126e-06, + "loss": 0.0885, + "step": 96790 + }, + { + "epoch": 4.52, + "learning_rate": 4.989575658771339e-06, + "loss": 0.0595, + "step": 96795 + }, + { + "epoch": 4.52, + "learning_rate": 4.988791873716553e-06, + "loss": 0.0519, + "step": 96800 + }, + { + "epoch": 4.52, + "learning_rate": 4.988008088661766e-06, + "loss": 0.0709, + "step": 96805 + }, + { + "epoch": 4.52, + "learning_rate": 4.987224303606979e-06, + "loss": 0.0593, + "step": 96810 + }, + { + "epoch": 4.52, + "learning_rate": 4.986440518552193e-06, + "loss": 0.0647, + "step": 96815 + }, + { + "epoch": 4.52, + "learning_rate": 4.985656733497406e-06, + "loss": 0.1616, + "step": 96820 + }, + { + "epoch": 4.52, + "learning_rate": 4.98487294844262e-06, + "loss": 0.1433, + "step": 96825 + }, + { + "epoch": 4.52, + "learning_rate": 4.984089163387833e-06, + "loss": 0.0904, + "step": 96830 + }, + { + "epoch": 4.52, + "learning_rate": 4.983305378333047e-06, + "loss": 0.0042, + "step": 96835 + }, + { + "epoch": 4.52, + "learning_rate": 4.982678350289217e-06, + "loss": 0.0378, + "step": 96840 + }, + { + "epoch": 4.52, + "learning_rate": 4.98189456523443e-06, + "loss": 0.0806, + "step": 96845 + }, + { + "epoch": 4.52, + "learning_rate": 4.981110780179644e-06, + "loss": 0.0883, + "step": 96850 + }, + { + "epoch": 4.52, + "learning_rate": 4.980326995124857e-06, + "loss": 0.0702, + "step": 96855 + }, + { + "epoch": 4.52, + "learning_rate": 4.979543210070071e-06, + "loss": 0.0795, + "step": 96860 + }, + { + "epoch": 4.52, + "learning_rate": 4.978759425015284e-06, + "loss": 0.2138, + "step": 96865 + }, + { + "epoch": 4.52, + "learning_rate": 4.977975639960498e-06, + "loss": 0.0481, + "step": 96870 + }, + { + "epoch": 4.52, + "learning_rate": 4.977191854905711e-06, + "loss": 0.3093, + "step": 96875 + }, + { + "epoch": 4.52, + "learning_rate": 4.976408069850924e-06, + "loss": 0.0415, + "step": 96880 + }, + { + "epoch": 4.52, + "learning_rate": 4.975624284796138e-06, + "loss": 0.0263, + "step": 96885 + }, + { + "epoch": 4.52, + "learning_rate": 4.974840499741351e-06, + "loss": 0.0565, + "step": 96890 + }, + { + "epoch": 4.52, + "learning_rate": 4.974056714686565e-06, + "loss": 0.0206, + "step": 96895 + }, + { + "epoch": 4.52, + "learning_rate": 4.973272929631778e-06, + "loss": 0.0273, + "step": 96900 + }, + { + "epoch": 4.52, + "learning_rate": 4.972489144576992e-06, + "loss": 0.0562, + "step": 96905 + }, + { + "epoch": 4.52, + "learning_rate": 4.971705359522205e-06, + "loss": 0.1214, + "step": 96910 + }, + { + "epoch": 4.52, + "learning_rate": 4.970921574467418e-06, + "loss": 0.059, + "step": 96915 + }, + { + "epoch": 4.52, + "learning_rate": 4.970137789412632e-06, + "loss": 0.1313, + "step": 96920 + }, + { + "epoch": 4.52, + "learning_rate": 4.969354004357845e-06, + "loss": 0.1764, + "step": 96925 + }, + { + "epoch": 4.52, + "learning_rate": 4.968570219303059e-06, + "loss": 0.0834, + "step": 96930 + }, + { + "epoch": 4.52, + "learning_rate": 4.967786434248272e-06, + "loss": 0.0578, + "step": 96935 + }, + { + "epoch": 4.52, + "learning_rate": 4.967002649193486e-06, + "loss": 0.0122, + "step": 96940 + }, + { + "epoch": 4.52, + "learning_rate": 4.966218864138699e-06, + "loss": 0.0402, + "step": 96945 + }, + { + "epoch": 4.52, + "learning_rate": 4.965435079083912e-06, + "loss": 0.0754, + "step": 96950 + }, + { + "epoch": 4.52, + "learning_rate": 4.964651294029126e-06, + "loss": 0.1326, + "step": 96955 + }, + { + "epoch": 4.52, + "learning_rate": 4.963867508974339e-06, + "loss": 0.0854, + "step": 96960 + }, + { + "epoch": 4.52, + "learning_rate": 4.963083723919553e-06, + "loss": 0.1199, + "step": 96965 + }, + { + "epoch": 4.52, + "learning_rate": 4.962299938864767e-06, + "loss": 0.157, + "step": 96970 + }, + { + "epoch": 4.52, + "learning_rate": 4.961516153809979e-06, + "loss": 0.2244, + "step": 96975 + }, + { + "epoch": 4.53, + "learning_rate": 4.960732368755193e-06, + "loss": 0.0522, + "step": 96980 + }, + { + "epoch": 4.53, + "learning_rate": 4.959948583700406e-06, + "loss": 0.03, + "step": 96985 + }, + { + "epoch": 4.53, + "learning_rate": 4.95916479864562e-06, + "loss": 0.0559, + "step": 96990 + }, + { + "epoch": 4.53, + "learning_rate": 4.958381013590834e-06, + "loss": 0.029, + "step": 96995 + }, + { + "epoch": 4.53, + "learning_rate": 4.957597228536047e-06, + "loss": 0.017, + "step": 97000 + }, + { + "epoch": 4.53, + "learning_rate": 4.956813443481261e-06, + "loss": 0.0463, + "step": 97005 + }, + { + "epoch": 4.53, + "learning_rate": 4.956029658426473e-06, + "loss": 0.0514, + "step": 97010 + }, + { + "epoch": 4.53, + "learning_rate": 4.955245873371687e-06, + "loss": 0.1119, + "step": 97015 + }, + { + "epoch": 4.53, + "learning_rate": 4.954462088316901e-06, + "loss": 0.1411, + "step": 97020 + }, + { + "epoch": 4.53, + "learning_rate": 4.953678303262114e-06, + "loss": 0.4395, + "step": 97025 + }, + { + "epoch": 4.53, + "learning_rate": 4.952894518207328e-06, + "loss": 0.1218, + "step": 97030 + }, + { + "epoch": 4.53, + "learning_rate": 4.952110733152541e-06, + "loss": 0.0304, + "step": 97035 + }, + { + "epoch": 4.53, + "learning_rate": 4.951326948097754e-06, + "loss": 0.0219, + "step": 97040 + }, + { + "epoch": 4.53, + "learning_rate": 4.950543163042968e-06, + "loss": 0.0266, + "step": 97045 + }, + { + "epoch": 4.53, + "learning_rate": 4.949759377988181e-06, + "loss": 0.0438, + "step": 97050 + }, + { + "epoch": 4.53, + "learning_rate": 4.948975592933395e-06, + "loss": 0.0769, + "step": 97055 + }, + { + "epoch": 4.53, + "learning_rate": 4.948191807878608e-06, + "loss": 0.0346, + "step": 97060 + }, + { + "epoch": 4.53, + "learning_rate": 4.947408022823822e-06, + "loss": 0.1147, + "step": 97065 + }, + { + "epoch": 4.53, + "learning_rate": 4.946624237769035e-06, + "loss": 0.1371, + "step": 97070 + }, + { + "epoch": 4.53, + "learning_rate": 4.945840452714248e-06, + "loss": 0.3511, + "step": 97075 + }, + { + "epoch": 4.53, + "learning_rate": 4.945056667659462e-06, + "loss": 0.1057, + "step": 97080 + }, + { + "epoch": 4.53, + "learning_rate": 4.944272882604675e-06, + "loss": 0.0914, + "step": 97085 + }, + { + "epoch": 4.53, + "learning_rate": 4.943489097549889e-06, + "loss": 0.0389, + "step": 97090 + }, + { + "epoch": 4.53, + "learning_rate": 4.942705312495102e-06, + "loss": 0.0314, + "step": 97095 + }, + { + "epoch": 4.53, + "learning_rate": 4.941921527440316e-06, + "loss": 0.0409, + "step": 97100 + }, + { + "epoch": 4.53, + "learning_rate": 4.941137742385529e-06, + "loss": 0.0336, + "step": 97105 + }, + { + "epoch": 4.53, + "learning_rate": 4.940353957330742e-06, + "loss": 0.0996, + "step": 97110 + }, + { + "epoch": 4.53, + "learning_rate": 4.939570172275956e-06, + "loss": 0.1607, + "step": 97115 + }, + { + "epoch": 4.53, + "learning_rate": 4.938786387221169e-06, + "loss": 0.1231, + "step": 97120 + }, + { + "epoch": 4.53, + "learning_rate": 4.938002602166383e-06, + "loss": 0.3094, + "step": 97125 + }, + { + "epoch": 4.53, + "learning_rate": 4.937218817111596e-06, + "loss": 0.0607, + "step": 97130 + }, + { + "epoch": 4.53, + "learning_rate": 4.9364350320568095e-06, + "loss": 0.0167, + "step": 97135 + }, + { + "epoch": 4.53, + "learning_rate": 4.935651247002023e-06, + "loss": 0.0365, + "step": 97140 + }, + { + "epoch": 4.53, + "learning_rate": 4.934867461947236e-06, + "loss": 0.0406, + "step": 97145 + }, + { + "epoch": 4.53, + "learning_rate": 4.9340836768924496e-06, + "loss": 0.0798, + "step": 97150 + }, + { + "epoch": 4.53, + "learning_rate": 4.933299891837663e-06, + "loss": 0.0943, + "step": 97155 + }, + { + "epoch": 4.53, + "learning_rate": 4.9325161067828765e-06, + "loss": 0.1016, + "step": 97160 + }, + { + "epoch": 4.53, + "learning_rate": 4.93173232172809e-06, + "loss": 0.1101, + "step": 97165 + }, + { + "epoch": 4.53, + "learning_rate": 4.930948536673303e-06, + "loss": 0.1823, + "step": 97170 + }, + { + "epoch": 4.53, + "learning_rate": 4.9301647516185166e-06, + "loss": 0.3086, + "step": 97175 + }, + { + "epoch": 4.53, + "learning_rate": 4.92938096656373e-06, + "loss": 0.0975, + "step": 97180 + }, + { + "epoch": 4.53, + "learning_rate": 4.9285971815089435e-06, + "loss": 0.0212, + "step": 97185 + }, + { + "epoch": 4.54, + "learning_rate": 4.9278133964541566e-06, + "loss": 0.0369, + "step": 97190 + }, + { + "epoch": 4.54, + "learning_rate": 4.9270296113993705e-06, + "loss": 0.0369, + "step": 97195 + }, + { + "epoch": 4.54, + "learning_rate": 4.9262458263445835e-06, + "loss": 0.0732, + "step": 97200 + }, + { + "epoch": 4.54, + "learning_rate": 4.925462041289797e-06, + "loss": 0.0529, + "step": 97205 + }, + { + "epoch": 4.54, + "learning_rate": 4.9246782562350105e-06, + "loss": 0.1272, + "step": 97210 + }, + { + "epoch": 4.54, + "learning_rate": 4.9238944711802236e-06, + "loss": 0.0712, + "step": 97215 + }, + { + "epoch": 4.54, + "learning_rate": 4.9231106861254375e-06, + "loss": 0.1005, + "step": 97220 + }, + { + "epoch": 4.54, + "learning_rate": 4.9223269010706505e-06, + "loss": 0.2915, + "step": 97225 + }, + { + "epoch": 4.54, + "learning_rate": 4.9215431160158644e-06, + "loss": 0.1004, + "step": 97230 + }, + { + "epoch": 4.54, + "learning_rate": 4.9207593309610775e-06, + "loss": 0.0223, + "step": 97235 + }, + { + "epoch": 4.54, + "learning_rate": 4.9199755459062906e-06, + "loss": 0.0694, + "step": 97240 + }, + { + "epoch": 4.54, + "learning_rate": 4.9191917608515045e-06, + "loss": 0.0434, + "step": 97245 + }, + { + "epoch": 4.54, + "learning_rate": 4.9184079757967175e-06, + "loss": 0.0165, + "step": 97250 + }, + { + "epoch": 4.54, + "learning_rate": 4.917780947752889e-06, + "loss": 0.0741, + "step": 97255 + }, + { + "epoch": 4.54, + "learning_rate": 4.916997162698102e-06, + "loss": 0.0778, + "step": 97260 + }, + { + "epoch": 4.54, + "learning_rate": 4.916213377643316e-06, + "loss": 0.1341, + "step": 97265 + }, + { + "epoch": 4.54, + "learning_rate": 4.915429592588529e-06, + "loss": 0.1537, + "step": 97270 + }, + { + "epoch": 4.54, + "learning_rate": 4.914645807533742e-06, + "loss": 0.148, + "step": 97275 + }, + { + "epoch": 4.54, + "learning_rate": 4.913862022478956e-06, + "loss": 0.0608, + "step": 97280 + }, + { + "epoch": 4.54, + "learning_rate": 4.913078237424169e-06, + "loss": 0.0336, + "step": 97285 + }, + { + "epoch": 4.54, + "learning_rate": 4.912294452369383e-06, + "loss": 0.036, + "step": 97290 + }, + { + "epoch": 4.54, + "learning_rate": 4.911510667314597e-06, + "loss": 0.0348, + "step": 97295 + }, + { + "epoch": 4.54, + "learning_rate": 4.910726882259809e-06, + "loss": 0.02, + "step": 97300 + }, + { + "epoch": 4.54, + "learning_rate": 4.909943097205023e-06, + "loss": 0.045, + "step": 97305 + }, + { + "epoch": 4.54, + "learning_rate": 4.909159312150236e-06, + "loss": 0.0736, + "step": 97310 + }, + { + "epoch": 4.54, + "learning_rate": 4.90837552709545e-06, + "loss": 0.1461, + "step": 97315 + }, + { + "epoch": 4.54, + "learning_rate": 4.907591742040664e-06, + "loss": 0.1316, + "step": 97320 + }, + { + "epoch": 4.54, + "learning_rate": 4.906807956985877e-06, + "loss": 0.2203, + "step": 97325 + }, + { + "epoch": 4.54, + "learning_rate": 4.906024171931091e-06, + "loss": 0.1042, + "step": 97330 + }, + { + "epoch": 4.54, + "learning_rate": 4.905240386876303e-06, + "loss": 0.0091, + "step": 97335 + }, + { + "epoch": 4.54, + "learning_rate": 4.904456601821517e-06, + "loss": 0.0188, + "step": 97340 + }, + { + "epoch": 4.54, + "learning_rate": 4.903672816766731e-06, + "loss": 0.0192, + "step": 97345 + }, + { + "epoch": 4.54, + "learning_rate": 4.902889031711944e-06, + "loss": 0.0486, + "step": 97350 + }, + { + "epoch": 4.54, + "learning_rate": 4.902105246657158e-06, + "loss": 0.1115, + "step": 97355 + }, + { + "epoch": 4.54, + "learning_rate": 4.901321461602371e-06, + "loss": 0.1391, + "step": 97360 + }, + { + "epoch": 4.54, + "learning_rate": 4.900537676547584e-06, + "loss": 0.1454, + "step": 97365 + }, + { + "epoch": 4.54, + "learning_rate": 4.899753891492798e-06, + "loss": 0.0928, + "step": 97370 + }, + { + "epoch": 4.54, + "learning_rate": 4.898970106438011e-06, + "loss": 0.4642, + "step": 97375 + }, + { + "epoch": 4.54, + "learning_rate": 4.898186321383225e-06, + "loss": 0.1042, + "step": 97380 + }, + { + "epoch": 4.54, + "learning_rate": 4.897402536328438e-06, + "loss": 0.0262, + "step": 97385 + }, + { + "epoch": 4.54, + "learning_rate": 4.8966187512736516e-06, + "loss": 0.0667, + "step": 97390 + }, + { + "epoch": 4.54, + "learning_rate": 4.895834966218865e-06, + "loss": 0.0581, + "step": 97395 + }, + { + "epoch": 4.54, + "learning_rate": 4.895051181164078e-06, + "loss": 0.0437, + "step": 97400 + }, + { + "epoch": 4.55, + "learning_rate": 4.894267396109292e-06, + "loss": 0.0215, + "step": 97405 + }, + { + "epoch": 4.55, + "learning_rate": 4.893483611054505e-06, + "loss": 0.0662, + "step": 97410 + }, + { + "epoch": 4.55, + "learning_rate": 4.8926998259997185e-06, + "loss": 0.1121, + "step": 97415 + }, + { + "epoch": 4.55, + "learning_rate": 4.891916040944932e-06, + "loss": 0.1535, + "step": 97420 + }, + { + "epoch": 4.55, + "learning_rate": 4.8911322558901455e-06, + "loss": 0.3421, + "step": 97425 + }, + { + "epoch": 4.55, + "learning_rate": 4.8903484708353586e-06, + "loss": 0.0633, + "step": 97430 + }, + { + "epoch": 4.55, + "learning_rate": 4.889564685780572e-06, + "loss": 0.0489, + "step": 97435 + }, + { + "epoch": 4.55, + "learning_rate": 4.8887809007257855e-06, + "loss": 0.0765, + "step": 97440 + }, + { + "epoch": 4.55, + "learning_rate": 4.887997115670999e-06, + "loss": 0.0729, + "step": 97445 + }, + { + "epoch": 4.55, + "learning_rate": 4.8872133306162125e-06, + "loss": 0.0675, + "step": 97450 + }, + { + "epoch": 4.55, + "learning_rate": 4.8864295455614256e-06, + "loss": 0.0514, + "step": 97455 + }, + { + "epoch": 4.55, + "learning_rate": 4.8856457605066395e-06, + "loss": 0.0788, + "step": 97460 + }, + { + "epoch": 4.55, + "learning_rate": 4.8848619754518525e-06, + "loss": 0.042, + "step": 97465 + }, + { + "epoch": 4.55, + "learning_rate": 4.884078190397066e-06, + "loss": 0.1569, + "step": 97470 + }, + { + "epoch": 4.55, + "learning_rate": 4.8832944053422795e-06, + "loss": 0.3501, + "step": 97475 + }, + { + "epoch": 4.55, + "learning_rate": 4.8825106202874925e-06, + "loss": 0.098, + "step": 97480 + }, + { + "epoch": 4.55, + "learning_rate": 4.8817268352327065e-06, + "loss": 0.0277, + "step": 97485 + }, + { + "epoch": 4.55, + "learning_rate": 4.8809430501779195e-06, + "loss": 0.0588, + "step": 97490 + }, + { + "epoch": 4.55, + "learning_rate": 4.8801592651231326e-06, + "loss": 0.0777, + "step": 97495 + }, + { + "epoch": 4.55, + "learning_rate": 4.8793754800683465e-06, + "loss": 0.0718, + "step": 97500 + }, + { + "epoch": 4.55, + "learning_rate": 4.8785916950135595e-06, + "loss": 0.1042, + "step": 97505 + }, + { + "epoch": 4.55, + "learning_rate": 4.8778079099587734e-06, + "loss": 0.0908, + "step": 97510 + }, + { + "epoch": 4.55, + "learning_rate": 4.8770241249039865e-06, + "loss": 0.1177, + "step": 97515 + }, + { + "epoch": 4.55, + "learning_rate": 4.8762403398492e-06, + "loss": 0.1094, + "step": 97520 + }, + { + "epoch": 4.55, + "learning_rate": 4.8754565547944135e-06, + "loss": 0.3185, + "step": 97525 + }, + { + "epoch": 4.55, + "learning_rate": 4.8746727697396265e-06, + "loss": 0.0354, + "step": 97530 + }, + { + "epoch": 4.55, + "learning_rate": 4.8738889846848404e-06, + "loss": 0.0217, + "step": 97535 + }, + { + "epoch": 4.55, + "learning_rate": 4.8731051996300535e-06, + "loss": 0.0749, + "step": 97540 + }, + { + "epoch": 4.55, + "learning_rate": 4.872321414575267e-06, + "loss": 0.0125, + "step": 97545 + }, + { + "epoch": 4.55, + "learning_rate": 4.8715376295204805e-06, + "loss": 0.0637, + "step": 97550 + }, + { + "epoch": 4.55, + "learning_rate": 4.870753844465694e-06, + "loss": 0.0765, + "step": 97555 + }, + { + "epoch": 4.55, + "learning_rate": 4.869970059410907e-06, + "loss": 0.0634, + "step": 97560 + }, + { + "epoch": 4.55, + "learning_rate": 4.8691862743561205e-06, + "loss": 0.064, + "step": 97565 + }, + { + "epoch": 4.55, + "learning_rate": 4.868402489301334e-06, + "loss": 0.2301, + "step": 97570 + }, + { + "epoch": 4.55, + "learning_rate": 4.8676187042465474e-06, + "loss": 0.2494, + "step": 97575 + }, + { + "epoch": 4.55, + "learning_rate": 4.866834919191761e-06, + "loss": 0.1085, + "step": 97580 + }, + { + "epoch": 4.55, + "learning_rate": 4.866051134136975e-06, + "loss": 0.0254, + "step": 97585 + }, + { + "epoch": 4.55, + "learning_rate": 4.865267349082188e-06, + "loss": 0.0543, + "step": 97590 + }, + { + "epoch": 4.55, + "learning_rate": 4.864483564027401e-06, + "loss": 0.0431, + "step": 97595 + }, + { + "epoch": 4.55, + "learning_rate": 4.8636997789726144e-06, + "loss": 0.0635, + "step": 97600 + }, + { + "epoch": 4.55, + "learning_rate": 4.862915993917828e-06, + "loss": 0.0282, + "step": 97605 + }, + { + "epoch": 4.55, + "learning_rate": 4.862132208863042e-06, + "loss": 0.142, + "step": 97610 + }, + { + "epoch": 4.55, + "learning_rate": 4.861348423808255e-06, + "loss": 0.1611, + "step": 97615 + }, + { + "epoch": 4.56, + "learning_rate": 4.860564638753469e-06, + "loss": 0.1703, + "step": 97620 + }, + { + "epoch": 4.56, + "learning_rate": 4.859780853698681e-06, + "loss": 0.2248, + "step": 97625 + }, + { + "epoch": 4.56, + "learning_rate": 4.858997068643895e-06, + "loss": 0.0997, + "step": 97630 + }, + { + "epoch": 4.56, + "learning_rate": 4.858213283589109e-06, + "loss": 0.0345, + "step": 97635 + }, + { + "epoch": 4.56, + "learning_rate": 4.857429498534322e-06, + "loss": 0.0514, + "step": 97640 + }, + { + "epoch": 4.56, + "learning_rate": 4.856645713479536e-06, + "loss": 0.0553, + "step": 97645 + }, + { + "epoch": 4.56, + "learning_rate": 4.855861928424749e-06, + "loss": 0.0776, + "step": 97650 + }, + { + "epoch": 4.56, + "learning_rate": 4.855078143369963e-06, + "loss": 0.0566, + "step": 97655 + }, + { + "epoch": 4.56, + "learning_rate": 4.854294358315176e-06, + "loss": 0.1198, + "step": 97660 + }, + { + "epoch": 4.56, + "learning_rate": 4.853510573260389e-06, + "loss": 0.123, + "step": 97665 + }, + { + "epoch": 4.56, + "learning_rate": 4.852726788205603e-06, + "loss": 0.2919, + "step": 97670 + }, + { + "epoch": 4.56, + "learning_rate": 4.851943003150816e-06, + "loss": 0.1835, + "step": 97675 + }, + { + "epoch": 4.56, + "learning_rate": 4.85115921809603e-06, + "loss": 0.0758, + "step": 97680 + }, + { + "epoch": 4.56, + "learning_rate": 4.850375433041243e-06, + "loss": 0.0462, + "step": 97685 + }, + { + "epoch": 4.56, + "learning_rate": 4.849591647986456e-06, + "loss": 0.0409, + "step": 97690 + }, + { + "epoch": 4.56, + "learning_rate": 4.84880786293167e-06, + "loss": 0.0213, + "step": 97695 + }, + { + "epoch": 4.56, + "learning_rate": 4.848024077876883e-06, + "loss": 0.044, + "step": 97700 + }, + { + "epoch": 4.56, + "learning_rate": 4.847240292822097e-06, + "loss": 0.0829, + "step": 97705 + }, + { + "epoch": 4.56, + "learning_rate": 4.84645650776731e-06, + "loss": 0.113, + "step": 97710 + }, + { + "epoch": 4.56, + "learning_rate": 4.845672722712524e-06, + "loss": 0.1968, + "step": 97715 + }, + { + "epoch": 4.56, + "learning_rate": 4.844888937657737e-06, + "loss": 0.2569, + "step": 97720 + }, + { + "epoch": 4.56, + "learning_rate": 4.84410515260295e-06, + "loss": 0.3715, + "step": 97725 + }, + { + "epoch": 4.56, + "learning_rate": 4.843321367548164e-06, + "loss": 0.077, + "step": 97730 + }, + { + "epoch": 4.56, + "learning_rate": 4.842537582493377e-06, + "loss": 0.0426, + "step": 97735 + }, + { + "epoch": 4.56, + "learning_rate": 4.841753797438591e-06, + "loss": 0.0269, + "step": 97740 + }, + { + "epoch": 4.56, + "learning_rate": 4.840970012383804e-06, + "loss": 0.0554, + "step": 97745 + }, + { + "epoch": 4.56, + "learning_rate": 4.840186227329018e-06, + "loss": 0.1202, + "step": 97750 + }, + { + "epoch": 4.56, + "learning_rate": 4.839402442274231e-06, + "loss": 0.0734, + "step": 97755 + }, + { + "epoch": 4.56, + "learning_rate": 4.838618657219444e-06, + "loss": 0.1045, + "step": 97760 + }, + { + "epoch": 4.56, + "learning_rate": 4.837834872164658e-06, + "loss": 0.1338, + "step": 97765 + }, + { + "epoch": 4.56, + "learning_rate": 4.837051087109871e-06, + "loss": 0.1308, + "step": 97770 + }, + { + "epoch": 4.56, + "learning_rate": 4.836267302055085e-06, + "loss": 0.2176, + "step": 97775 + }, + { + "epoch": 4.56, + "learning_rate": 4.835483517000298e-06, + "loss": 0.0666, + "step": 97780 + }, + { + "epoch": 4.56, + "learning_rate": 4.834699731945512e-06, + "loss": 0.0196, + "step": 97785 + }, + { + "epoch": 4.56, + "learning_rate": 4.833915946890725e-06, + "loss": 0.0603, + "step": 97790 + }, + { + "epoch": 4.56, + "learning_rate": 4.833132161835938e-06, + "loss": 0.0616, + "step": 97795 + }, + { + "epoch": 4.56, + "learning_rate": 4.832348376781152e-06, + "loss": 0.0545, + "step": 97800 + }, + { + "epoch": 4.56, + "learning_rate": 4.831564591726365e-06, + "loss": 0.0742, + "step": 97805 + }, + { + "epoch": 4.56, + "learning_rate": 4.830780806671579e-06, + "loss": 0.065, + "step": 97810 + }, + { + "epoch": 4.56, + "learning_rate": 4.829997021616792e-06, + "loss": 0.1645, + "step": 97815 + }, + { + "epoch": 4.56, + "learning_rate": 4.829213236562005e-06, + "loss": 0.1676, + "step": 97820 + }, + { + "epoch": 4.56, + "learning_rate": 4.828429451507219e-06, + "loss": 0.2441, + "step": 97825 + }, + { + "epoch": 4.56, + "learning_rate": 4.827645666452432e-06, + "loss": 0.0652, + "step": 97830 + }, + { + "epoch": 4.57, + "learning_rate": 4.826861881397646e-06, + "loss": 0.0025, + "step": 97835 + }, + { + "epoch": 4.57, + "learning_rate": 4.826078096342859e-06, + "loss": 0.0241, + "step": 97840 + }, + { + "epoch": 4.57, + "learning_rate": 4.825294311288073e-06, + "loss": 0.0687, + "step": 97845 + }, + { + "epoch": 4.57, + "learning_rate": 4.824510526233286e-06, + "loss": 0.0281, + "step": 97850 + }, + { + "epoch": 4.57, + "learning_rate": 4.823726741178499e-06, + "loss": 0.0065, + "step": 97855 + }, + { + "epoch": 4.57, + "learning_rate": 4.822942956123713e-06, + "loss": 0.132, + "step": 97860 + }, + { + "epoch": 4.57, + "learning_rate": 4.822159171068926e-06, + "loss": 0.0904, + "step": 97865 + }, + { + "epoch": 4.57, + "learning_rate": 4.82137538601414e-06, + "loss": 0.133, + "step": 97870 + }, + { + "epoch": 4.57, + "learning_rate": 4.820591600959354e-06, + "loss": 0.308, + "step": 97875 + }, + { + "epoch": 4.57, + "learning_rate": 4.819807815904567e-06, + "loss": 0.1002, + "step": 97880 + }, + { + "epoch": 4.57, + "learning_rate": 4.81902403084978e-06, + "loss": 0.0092, + "step": 97885 + }, + { + "epoch": 4.57, + "learning_rate": 4.818240245794993e-06, + "loss": 0.0551, + "step": 97890 + }, + { + "epoch": 4.57, + "learning_rate": 4.817456460740207e-06, + "loss": 0.0431, + "step": 97895 + }, + { + "epoch": 4.57, + "learning_rate": 4.816672675685421e-06, + "loss": 0.0457, + "step": 97900 + }, + { + "epoch": 4.57, + "learning_rate": 4.815888890630634e-06, + "loss": 0.0607, + "step": 97905 + }, + { + "epoch": 4.57, + "learning_rate": 4.815105105575848e-06, + "loss": 0.1423, + "step": 97910 + }, + { + "epoch": 4.57, + "learning_rate": 4.814321320521061e-06, + "loss": 0.1044, + "step": 97915 + }, + { + "epoch": 4.57, + "learning_rate": 4.813537535466274e-06, + "loss": 0.1191, + "step": 97920 + }, + { + "epoch": 4.57, + "learning_rate": 4.812753750411488e-06, + "loss": 0.3935, + "step": 97925 + }, + { + "epoch": 4.57, + "learning_rate": 4.811969965356701e-06, + "loss": 0.1108, + "step": 97930 + }, + { + "epoch": 4.57, + "learning_rate": 4.811186180301915e-06, + "loss": 0.0128, + "step": 97935 + }, + { + "epoch": 4.57, + "learning_rate": 4.810402395247128e-06, + "loss": 0.0096, + "step": 97940 + }, + { + "epoch": 4.57, + "learning_rate": 4.809618610192342e-06, + "loss": 0.0065, + "step": 97945 + }, + { + "epoch": 4.57, + "learning_rate": 4.808834825137555e-06, + "loss": 0.0238, + "step": 97950 + }, + { + "epoch": 4.57, + "learning_rate": 4.808051040082768e-06, + "loss": 0.0254, + "step": 97955 + }, + { + "epoch": 4.57, + "learning_rate": 4.807267255027982e-06, + "loss": 0.0706, + "step": 97960 + }, + { + "epoch": 4.57, + "learning_rate": 4.806483469973195e-06, + "loss": 0.0878, + "step": 97965 + }, + { + "epoch": 4.57, + "learning_rate": 4.805699684918409e-06, + "loss": 0.1129, + "step": 97970 + }, + { + "epoch": 4.57, + "learning_rate": 4.804915899863622e-06, + "loss": 0.247, + "step": 97975 + }, + { + "epoch": 4.57, + "learning_rate": 4.804132114808836e-06, + "loss": 0.088, + "step": 97980 + }, + { + "epoch": 4.57, + "learning_rate": 4.803348329754049e-06, + "loss": 0.0146, + "step": 97985 + }, + { + "epoch": 4.57, + "learning_rate": 4.802564544699262e-06, + "loss": 0.0342, + "step": 97990 + }, + { + "epoch": 4.57, + "learning_rate": 4.801780759644476e-06, + "loss": 0.0343, + "step": 97995 + }, + { + "epoch": 4.57, + "learning_rate": 4.800996974589689e-06, + "loss": 0.0808, + "step": 98000 + }, + { + "epoch": 4.57, + "learning_rate": 4.800213189534903e-06, + "loss": 0.0295, + "step": 98005 + }, + { + "epoch": 4.57, + "learning_rate": 4.799429404480116e-06, + "loss": 0.0679, + "step": 98010 + }, + { + "epoch": 4.57, + "learning_rate": 4.798645619425329e-06, + "loss": 0.1157, + "step": 98015 + }, + { + "epoch": 4.57, + "learning_rate": 4.797861834370543e-06, + "loss": 0.224, + "step": 98020 + }, + { + "epoch": 4.57, + "learning_rate": 4.797078049315756e-06, + "loss": 0.1635, + "step": 98025 + }, + { + "epoch": 4.57, + "learning_rate": 4.79629426426097e-06, + "loss": 0.068, + "step": 98030 + }, + { + "epoch": 4.57, + "learning_rate": 4.795510479206183e-06, + "loss": 0.0249, + "step": 98035 + }, + { + "epoch": 4.57, + "learning_rate": 4.794726694151397e-06, + "loss": 0.0808, + "step": 98040 + }, + { + "epoch": 4.57, + "learning_rate": 4.79394290909661e-06, + "loss": 0.0745, + "step": 98045 + }, + { + "epoch": 4.58, + "learning_rate": 4.793159124041823e-06, + "loss": 0.0261, + "step": 98050 + }, + { + "epoch": 4.58, + "learning_rate": 4.792375338987037e-06, + "loss": 0.0553, + "step": 98055 + }, + { + "epoch": 4.58, + "learning_rate": 4.79159155393225e-06, + "loss": 0.0576, + "step": 98060 + }, + { + "epoch": 4.58, + "learning_rate": 4.790807768877464e-06, + "loss": 0.0739, + "step": 98065 + }, + { + "epoch": 4.58, + "learning_rate": 4.790023983822677e-06, + "loss": 0.2026, + "step": 98070 + }, + { + "epoch": 4.58, + "learning_rate": 4.789240198767891e-06, + "loss": 0.3587, + "step": 98075 + }, + { + "epoch": 4.58, + "learning_rate": 4.788456413713104e-06, + "loss": 0.1066, + "step": 98080 + }, + { + "epoch": 4.58, + "learning_rate": 4.787672628658317e-06, + "loss": 0.0227, + "step": 98085 + }, + { + "epoch": 4.58, + "learning_rate": 4.786888843603531e-06, + "loss": 0.0534, + "step": 98090 + }, + { + "epoch": 4.58, + "learning_rate": 4.786105058548744e-06, + "loss": 0.0241, + "step": 98095 + }, + { + "epoch": 4.58, + "learning_rate": 4.7853212734939576e-06, + "loss": 0.0361, + "step": 98100 + }, + { + "epoch": 4.58, + "learning_rate": 4.784537488439171e-06, + "loss": 0.1102, + "step": 98105 + }, + { + "epoch": 4.58, + "learning_rate": 4.7837537033843845e-06, + "loss": 0.0765, + "step": 98110 + }, + { + "epoch": 4.58, + "learning_rate": 4.782969918329598e-06, + "loss": 0.1188, + "step": 98115 + }, + { + "epoch": 4.58, + "learning_rate": 4.782186133274811e-06, + "loss": 0.1429, + "step": 98120 + }, + { + "epoch": 4.58, + "learning_rate": 4.7814023482200246e-06, + "loss": 0.3327, + "step": 98125 + }, + { + "epoch": 4.58, + "learning_rate": 4.780618563165238e-06, + "loss": 0.073, + "step": 98130 + }, + { + "epoch": 4.58, + "learning_rate": 4.7798347781104515e-06, + "loss": 0.0439, + "step": 98135 + }, + { + "epoch": 4.58, + "learning_rate": 4.779050993055665e-06, + "loss": 0.0206, + "step": 98140 + }, + { + "epoch": 4.58, + "learning_rate": 4.778267208000878e-06, + "loss": 0.0281, + "step": 98145 + }, + { + "epoch": 4.58, + "learning_rate": 4.7774834229460915e-06, + "loss": 0.0482, + "step": 98150 + }, + { + "epoch": 4.58, + "learning_rate": 4.776699637891305e-06, + "loss": 0.0913, + "step": 98155 + }, + { + "epoch": 4.58, + "learning_rate": 4.7759158528365185e-06, + "loss": 0.1045, + "step": 98160 + }, + { + "epoch": 4.58, + "learning_rate": 4.775132067781732e-06, + "loss": 0.0822, + "step": 98165 + }, + { + "epoch": 4.58, + "learning_rate": 4.7743482827269455e-06, + "loss": 0.1039, + "step": 98170 + }, + { + "epoch": 4.58, + "learning_rate": 4.773564497672159e-06, + "loss": 0.3043, + "step": 98175 + }, + { + "epoch": 4.58, + "learning_rate": 4.772780712617372e-06, + "loss": 0.0674, + "step": 98180 + }, + { + "epoch": 4.58, + "learning_rate": 4.7719969275625855e-06, + "loss": 0.0219, + "step": 98185 + }, + { + "epoch": 4.58, + "learning_rate": 4.771213142507799e-06, + "loss": 0.0163, + "step": 98190 + }, + { + "epoch": 4.58, + "learning_rate": 4.7704293574530125e-06, + "loss": 0.0547, + "step": 98195 + }, + { + "epoch": 4.58, + "learning_rate": 4.769645572398226e-06, + "loss": 0.0748, + "step": 98200 + }, + { + "epoch": 4.58, + "learning_rate": 4.7688617873434394e-06, + "loss": 0.0547, + "step": 98205 + }, + { + "epoch": 4.58, + "learning_rate": 4.7680780022886525e-06, + "loss": 0.1277, + "step": 98210 + }, + { + "epoch": 4.58, + "learning_rate": 4.767294217233866e-06, + "loss": 0.1327, + "step": 98215 + }, + { + "epoch": 4.58, + "learning_rate": 4.7665104321790795e-06, + "loss": 0.1201, + "step": 98220 + }, + { + "epoch": 4.58, + "learning_rate": 4.765726647124293e-06, + "loss": 0.3079, + "step": 98225 + }, + { + "epoch": 4.58, + "learning_rate": 4.764942862069506e-06, + "loss": 0.075, + "step": 98230 + }, + { + "epoch": 4.58, + "learning_rate": 4.76415907701472e-06, + "loss": 0.0479, + "step": 98235 + }, + { + "epoch": 4.58, + "learning_rate": 4.763375291959933e-06, + "loss": 0.0288, + "step": 98240 + }, + { + "epoch": 4.58, + "learning_rate": 4.7625915069051464e-06, + "loss": 0.0471, + "step": 98245 + }, + { + "epoch": 4.58, + "learning_rate": 4.76180772185036e-06, + "loss": 0.0723, + "step": 98250 + }, + { + "epoch": 4.58, + "learning_rate": 4.761023936795573e-06, + "loss": 0.0457, + "step": 98255 + }, + { + "epoch": 4.58, + "learning_rate": 4.760240151740787e-06, + "loss": 0.162, + "step": 98260 + }, + { + "epoch": 4.59, + "learning_rate": 4.759456366686e-06, + "loss": 0.164, + "step": 98265 + }, + { + "epoch": 4.59, + "learning_rate": 4.758672581631214e-06, + "loss": 0.0886, + "step": 98270 + }, + { + "epoch": 4.59, + "learning_rate": 4.757888796576427e-06, + "loss": 0.4929, + "step": 98275 + }, + { + "epoch": 4.59, + "learning_rate": 4.75710501152164e-06, + "loss": 0.0793, + "step": 98280 + }, + { + "epoch": 4.59, + "learning_rate": 4.756321226466854e-06, + "loss": 0.0243, + "step": 98285 + }, + { + "epoch": 4.59, + "learning_rate": 4.755537441412067e-06, + "loss": 0.0293, + "step": 98290 + }, + { + "epoch": 4.59, + "learning_rate": 4.754753656357281e-06, + "loss": 0.024, + "step": 98295 + }, + { + "epoch": 4.59, + "learning_rate": 4.753969871302494e-06, + "loss": 0.0699, + "step": 98300 + }, + { + "epoch": 4.59, + "learning_rate": 4.753186086247708e-06, + "loss": 0.0388, + "step": 98305 + }, + { + "epoch": 4.59, + "learning_rate": 4.752402301192921e-06, + "loss": 0.0463, + "step": 98310 + }, + { + "epoch": 4.59, + "learning_rate": 4.751618516138134e-06, + "loss": 0.0508, + "step": 98315 + }, + { + "epoch": 4.59, + "learning_rate": 4.750834731083348e-06, + "loss": 0.1005, + "step": 98320 + }, + { + "epoch": 4.59, + "learning_rate": 4.750050946028561e-06, + "loss": 0.1858, + "step": 98325 + }, + { + "epoch": 4.59, + "learning_rate": 4.749267160973775e-06, + "loss": 0.0884, + "step": 98330 + }, + { + "epoch": 4.59, + "learning_rate": 4.748483375918988e-06, + "loss": 0.0103, + "step": 98335 + }, + { + "epoch": 4.59, + "learning_rate": 4.747699590864201e-06, + "loss": 0.029, + "step": 98340 + }, + { + "epoch": 4.59, + "learning_rate": 4.746915805809415e-06, + "loss": 0.0599, + "step": 98345 + }, + { + "epoch": 4.59, + "learning_rate": 4.746132020754628e-06, + "loss": 0.093, + "step": 98350 + }, + { + "epoch": 4.59, + "learning_rate": 4.745348235699842e-06, + "loss": 0.056, + "step": 98355 + }, + { + "epoch": 4.59, + "learning_rate": 4.744564450645055e-06, + "loss": 0.0937, + "step": 98360 + }, + { + "epoch": 4.59, + "learning_rate": 4.743780665590269e-06, + "loss": 0.1793, + "step": 98365 + }, + { + "epoch": 4.59, + "learning_rate": 4.742996880535482e-06, + "loss": 0.1542, + "step": 98370 + }, + { + "epoch": 4.59, + "learning_rate": 4.742213095480695e-06, + "loss": 0.292, + "step": 98375 + }, + { + "epoch": 4.59, + "learning_rate": 4.741429310425909e-06, + "loss": 0.0954, + "step": 98380 + }, + { + "epoch": 4.59, + "learning_rate": 4.740645525371122e-06, + "loss": 0.0101, + "step": 98385 + }, + { + "epoch": 4.59, + "learning_rate": 4.739861740316336e-06, + "loss": 0.0294, + "step": 98390 + }, + { + "epoch": 4.59, + "learning_rate": 4.739077955261549e-06, + "loss": 0.0526, + "step": 98395 + }, + { + "epoch": 4.59, + "learning_rate": 4.738294170206763e-06, + "loss": 0.1069, + "step": 98400 + }, + { + "epoch": 4.59, + "learning_rate": 4.737510385151976e-06, + "loss": 0.07, + "step": 98405 + }, + { + "epoch": 4.59, + "learning_rate": 4.736726600097189e-06, + "loss": 0.083, + "step": 98410 + }, + { + "epoch": 4.59, + "learning_rate": 4.735942815042403e-06, + "loss": 0.114, + "step": 98415 + }, + { + "epoch": 4.59, + "learning_rate": 4.735159029987616e-06, + "loss": 0.1524, + "step": 98420 + }, + { + "epoch": 4.59, + "learning_rate": 4.73437524493283e-06, + "loss": 0.2291, + "step": 98425 + }, + { + "epoch": 4.59, + "learning_rate": 4.733591459878043e-06, + "loss": 0.0698, + "step": 98430 + }, + { + "epoch": 4.59, + "learning_rate": 4.732807674823257e-06, + "loss": 0.0594, + "step": 98435 + }, + { + "epoch": 4.59, + "learning_rate": 4.73202388976847e-06, + "loss": 0.0253, + "step": 98440 + }, + { + "epoch": 4.59, + "learning_rate": 4.731240104713683e-06, + "loss": 0.0151, + "step": 98445 + }, + { + "epoch": 4.59, + "learning_rate": 4.730456319658897e-06, + "loss": 0.0529, + "step": 98450 + }, + { + "epoch": 4.59, + "learning_rate": 4.729672534604111e-06, + "loss": 0.0582, + "step": 98455 + }, + { + "epoch": 4.59, + "learning_rate": 4.728888749549324e-06, + "loss": 0.1311, + "step": 98460 + }, + { + "epoch": 4.59, + "learning_rate": 4.728104964494538e-06, + "loss": 0.1393, + "step": 98465 + }, + { + "epoch": 4.59, + "learning_rate": 4.727321179439751e-06, + "loss": 0.1408, + "step": 98470 + }, + { + "epoch": 4.59, + "learning_rate": 4.726537394384964e-06, + "loss": 0.204, + "step": 98475 + }, + { + "epoch": 4.6, + "learning_rate": 4.725753609330178e-06, + "loss": 0.0507, + "step": 98480 + }, + { + "epoch": 4.6, + "learning_rate": 4.724969824275391e-06, + "loss": 0.0056, + "step": 98485 + }, + { + "epoch": 4.6, + "learning_rate": 4.724186039220605e-06, + "loss": 0.0239, + "step": 98490 + }, + { + "epoch": 4.6, + "learning_rate": 4.723402254165818e-06, + "loss": 0.0197, + "step": 98495 + }, + { + "epoch": 4.6, + "learning_rate": 4.722618469111032e-06, + "loss": 0.03, + "step": 98500 + }, + { + "epoch": 4.6, + "learning_rate": 4.721834684056245e-06, + "loss": 0.0459, + "step": 98505 + }, + { + "epoch": 4.6, + "learning_rate": 4.721050899001458e-06, + "loss": 0.1789, + "step": 98510 + }, + { + "epoch": 4.6, + "learning_rate": 4.720267113946672e-06, + "loss": 0.1291, + "step": 98515 + }, + { + "epoch": 4.6, + "learning_rate": 4.719483328891885e-06, + "loss": 0.1693, + "step": 98520 + }, + { + "epoch": 4.6, + "learning_rate": 4.718699543837099e-06, + "loss": 0.3627, + "step": 98525 + }, + { + "epoch": 4.6, + "learning_rate": 4.717915758782312e-06, + "loss": 0.0747, + "step": 98530 + }, + { + "epoch": 4.6, + "learning_rate": 4.717131973727526e-06, + "loss": 0.0521, + "step": 98535 + }, + { + "epoch": 4.6, + "learning_rate": 4.716348188672739e-06, + "loss": 0.0338, + "step": 98540 + }, + { + "epoch": 4.6, + "learning_rate": 4.715564403617952e-06, + "loss": 0.0316, + "step": 98545 + }, + { + "epoch": 4.6, + "learning_rate": 4.714780618563166e-06, + "loss": 0.0676, + "step": 98550 + }, + { + "epoch": 4.6, + "learning_rate": 4.713996833508379e-06, + "loss": 0.1055, + "step": 98555 + }, + { + "epoch": 4.6, + "learning_rate": 4.713213048453593e-06, + "loss": 0.111, + "step": 98560 + }, + { + "epoch": 4.6, + "learning_rate": 4.712429263398806e-06, + "loss": 0.1715, + "step": 98565 + }, + { + "epoch": 4.6, + "learning_rate": 4.711645478344019e-06, + "loss": 0.1806, + "step": 98570 + }, + { + "epoch": 4.6, + "learning_rate": 4.710861693289233e-06, + "loss": 0.2132, + "step": 98575 + }, + { + "epoch": 4.6, + "learning_rate": 4.710077908234446e-06, + "loss": 0.0975, + "step": 98580 + }, + { + "epoch": 4.6, + "learning_rate": 4.70929412317966e-06, + "loss": 0.0238, + "step": 98585 + }, + { + "epoch": 4.6, + "learning_rate": 4.708510338124873e-06, + "loss": 0.0189, + "step": 98590 + }, + { + "epoch": 4.6, + "learning_rate": 4.707726553070087e-06, + "loss": 0.0709, + "step": 98595 + }, + { + "epoch": 4.6, + "learning_rate": 4.7069427680153e-06, + "loss": 0.0812, + "step": 98600 + }, + { + "epoch": 4.6, + "learning_rate": 4.706158982960513e-06, + "loss": 0.0753, + "step": 98605 + }, + { + "epoch": 4.6, + "learning_rate": 4.705375197905727e-06, + "loss": 0.0977, + "step": 98610 + }, + { + "epoch": 4.6, + "learning_rate": 4.70459141285094e-06, + "loss": 0.0735, + "step": 98615 + }, + { + "epoch": 4.6, + "learning_rate": 4.703807627796154e-06, + "loss": 0.114, + "step": 98620 + }, + { + "epoch": 4.6, + "learning_rate": 4.703023842741367e-06, + "loss": 0.2105, + "step": 98625 + }, + { + "epoch": 4.6, + "learning_rate": 4.702240057686581e-06, + "loss": 0.09, + "step": 98630 + }, + { + "epoch": 4.6, + "learning_rate": 4.701456272631794e-06, + "loss": 0.028, + "step": 98635 + }, + { + "epoch": 4.6, + "learning_rate": 4.700672487577007e-06, + "loss": 0.0076, + "step": 98640 + }, + { + "epoch": 4.6, + "learning_rate": 4.699888702522221e-06, + "loss": 0.069, + "step": 98645 + }, + { + "epoch": 4.6, + "learning_rate": 4.699104917467434e-06, + "loss": 0.0845, + "step": 98650 + }, + { + "epoch": 4.6, + "learning_rate": 4.698321132412648e-06, + "loss": 0.1237, + "step": 98655 + }, + { + "epoch": 4.6, + "learning_rate": 4.697537347357861e-06, + "loss": 0.0693, + "step": 98660 + }, + { + "epoch": 4.6, + "learning_rate": 4.696753562303075e-06, + "loss": 0.0838, + "step": 98665 + }, + { + "epoch": 4.6, + "learning_rate": 4.695969777248288e-06, + "loss": 0.2664, + "step": 98670 + }, + { + "epoch": 4.6, + "learning_rate": 4.695185992193501e-06, + "loss": 0.3182, + "step": 98675 + }, + { + "epoch": 4.6, + "learning_rate": 4.694402207138715e-06, + "loss": 0.1123, + "step": 98680 + }, + { + "epoch": 4.6, + "learning_rate": 4.693618422083928e-06, + "loss": 0.0013, + "step": 98685 + }, + { + "epoch": 4.61, + "learning_rate": 4.692834637029142e-06, + "loss": 0.046, + "step": 98690 + }, + { + "epoch": 4.61, + "learning_rate": 4.692050851974355e-06, + "loss": 0.0534, + "step": 98695 + }, + { + "epoch": 4.61, + "learning_rate": 4.691267066919568e-06, + "loss": 0.068, + "step": 98700 + }, + { + "epoch": 4.61, + "learning_rate": 4.690483281864782e-06, + "loss": 0.0488, + "step": 98705 + }, + { + "epoch": 4.61, + "learning_rate": 4.689699496809995e-06, + "loss": 0.1138, + "step": 98710 + }, + { + "epoch": 4.61, + "learning_rate": 4.688915711755209e-06, + "loss": 0.0755, + "step": 98715 + }, + { + "epoch": 4.61, + "learning_rate": 4.688131926700422e-06, + "loss": 0.0916, + "step": 98720 + }, + { + "epoch": 4.61, + "learning_rate": 4.687348141645636e-06, + "loss": 0.2622, + "step": 98725 + }, + { + "epoch": 4.61, + "learning_rate": 4.6865643565908496e-06, + "loss": 0.1045, + "step": 98730 + }, + { + "epoch": 4.61, + "learning_rate": 4.685780571536062e-06, + "loss": 0.0511, + "step": 98735 + }, + { + "epoch": 4.61, + "learning_rate": 4.684996786481276e-06, + "loss": 0.0175, + "step": 98740 + }, + { + "epoch": 4.61, + "learning_rate": 4.68421300142649e-06, + "loss": 0.0457, + "step": 98745 + }, + { + "epoch": 4.61, + "learning_rate": 4.683429216371703e-06, + "loss": 0.0618, + "step": 98750 + }, + { + "epoch": 4.61, + "learning_rate": 4.6826454313169165e-06, + "loss": 0.0635, + "step": 98755 + }, + { + "epoch": 4.61, + "learning_rate": 4.68186164626213e-06, + "loss": 0.1034, + "step": 98760 + }, + { + "epoch": 4.61, + "learning_rate": 4.681077861207343e-06, + "loss": 0.1423, + "step": 98765 + }, + { + "epoch": 4.61, + "learning_rate": 4.6802940761525566e-06, + "loss": 0.0631, + "step": 98770 + }, + { + "epoch": 4.61, + "learning_rate": 4.67951029109777e-06, + "loss": 0.3452, + "step": 98775 + }, + { + "epoch": 4.61, + "learning_rate": 4.6787265060429835e-06, + "loss": 0.0838, + "step": 98780 + }, + { + "epoch": 4.61, + "learning_rate": 4.677942720988197e-06, + "loss": 0.0484, + "step": 98785 + }, + { + "epoch": 4.61, + "learning_rate": 4.6771589359334105e-06, + "loss": 0.0497, + "step": 98790 + }, + { + "epoch": 4.61, + "learning_rate": 4.6763751508786236e-06, + "loss": 0.1333, + "step": 98795 + }, + { + "epoch": 4.61, + "learning_rate": 4.675591365823837e-06, + "loss": 0.0566, + "step": 98800 + }, + { + "epoch": 4.61, + "learning_rate": 4.6748075807690505e-06, + "loss": 0.0735, + "step": 98805 + }, + { + "epoch": 4.61, + "learning_rate": 4.674023795714264e-06, + "loss": 0.0919, + "step": 98810 + }, + { + "epoch": 4.61, + "learning_rate": 4.6732400106594775e-06, + "loss": 0.0704, + "step": 98815 + }, + { + "epoch": 4.61, + "learning_rate": 4.6724562256046905e-06, + "loss": 0.1036, + "step": 98820 + }, + { + "epoch": 4.61, + "learning_rate": 4.6716724405499044e-06, + "loss": 0.3106, + "step": 98825 + }, + { + "epoch": 4.61, + "learning_rate": 4.6708886554951175e-06, + "loss": 0.1045, + "step": 98830 + }, + { + "epoch": 4.61, + "learning_rate": 4.6701048704403306e-06, + "loss": 0.0328, + "step": 98835 + }, + { + "epoch": 4.61, + "learning_rate": 4.6693210853855445e-06, + "loss": 0.0274, + "step": 98840 + }, + { + "epoch": 4.61, + "learning_rate": 4.6685373003307575e-06, + "loss": 0.0643, + "step": 98845 + }, + { + "epoch": 4.61, + "learning_rate": 4.6677535152759714e-06, + "loss": 0.046, + "step": 98850 + }, + { + "epoch": 4.61, + "learning_rate": 4.6669697302211845e-06, + "loss": 0.085, + "step": 98855 + }, + { + "epoch": 4.61, + "learning_rate": 4.666185945166398e-06, + "loss": 0.1397, + "step": 98860 + }, + { + "epoch": 4.61, + "learning_rate": 4.6654021601116115e-06, + "loss": 0.1457, + "step": 98865 + }, + { + "epoch": 4.61, + "learning_rate": 4.6646183750568245e-06, + "loss": 0.2372, + "step": 98870 + }, + { + "epoch": 4.61, + "learning_rate": 4.6638345900020384e-06, + "loss": 0.2352, + "step": 98875 + }, + { + "epoch": 4.61, + "learning_rate": 4.6630508049472515e-06, + "loss": 0.0608, + "step": 98880 + }, + { + "epoch": 4.61, + "learning_rate": 4.662267019892465e-06, + "loss": 0.0154, + "step": 98885 + }, + { + "epoch": 4.61, + "learning_rate": 4.6614832348376784e-06, + "loss": 0.0452, + "step": 98890 + }, + { + "epoch": 4.61, + "learning_rate": 4.6606994497828915e-06, + "loss": 0.0785, + "step": 98895 + }, + { + "epoch": 4.61, + "learning_rate": 4.659915664728105e-06, + "loss": 0.0461, + "step": 98900 + }, + { + "epoch": 4.62, + "learning_rate": 4.6591318796733185e-06, + "loss": 0.0656, + "step": 98905 + }, + { + "epoch": 4.62, + "learning_rate": 4.658348094618532e-06, + "loss": 0.0561, + "step": 98910 + }, + { + "epoch": 4.62, + "learning_rate": 4.6575643095637454e-06, + "loss": 0.1176, + "step": 98915 + }, + { + "epoch": 4.62, + "learning_rate": 4.656780524508959e-06, + "loss": 0.2113, + "step": 98920 + }, + { + "epoch": 4.62, + "learning_rate": 4.655996739454172e-06, + "loss": 0.2794, + "step": 98925 + }, + { + "epoch": 4.62, + "learning_rate": 4.6552129543993855e-06, + "loss": 0.0784, + "step": 98930 + }, + { + "epoch": 4.62, + "learning_rate": 4.654429169344599e-06, + "loss": 0.0445, + "step": 98935 + }, + { + "epoch": 4.62, + "learning_rate": 4.6536453842898124e-06, + "loss": 0.0491, + "step": 98940 + }, + { + "epoch": 4.62, + "learning_rate": 4.652861599235026e-06, + "loss": 0.0283, + "step": 98945 + }, + { + "epoch": 4.62, + "learning_rate": 4.652077814180239e-06, + "loss": 0.0677, + "step": 98950 + }, + { + "epoch": 4.62, + "learning_rate": 4.651294029125453e-06, + "loss": 0.0318, + "step": 98955 + }, + { + "epoch": 4.62, + "learning_rate": 4.650510244070666e-06, + "loss": 0.0479, + "step": 98960 + }, + { + "epoch": 4.62, + "learning_rate": 4.649726459015879e-06, + "loss": 0.0969, + "step": 98965 + }, + { + "epoch": 4.62, + "learning_rate": 4.648942673961093e-06, + "loss": 0.1436, + "step": 98970 + }, + { + "epoch": 4.62, + "learning_rate": 4.648158888906306e-06, + "loss": 0.4882, + "step": 98975 + }, + { + "epoch": 4.62, + "learning_rate": 4.64737510385152e-06, + "loss": 0.0895, + "step": 98980 + }, + { + "epoch": 4.62, + "learning_rate": 4.646591318796733e-06, + "loss": 0.0144, + "step": 98985 + }, + { + "epoch": 4.62, + "learning_rate": 4.645807533741947e-06, + "loss": 0.0107, + "step": 98990 + }, + { + "epoch": 4.62, + "learning_rate": 4.64502374868716e-06, + "loss": 0.0203, + "step": 98995 + }, + { + "epoch": 4.62, + "learning_rate": 4.644239963632373e-06, + "loss": 0.0975, + "step": 99000 + }, + { + "epoch": 4.62, + "learning_rate": 4.643456178577587e-06, + "loss": 0.0675, + "step": 99005 + }, + { + "epoch": 4.62, + "learning_rate": 4.6426723935228e-06, + "loss": 0.105, + "step": 99010 + }, + { + "epoch": 4.62, + "learning_rate": 4.641888608468014e-06, + "loss": 0.1148, + "step": 99015 + }, + { + "epoch": 4.62, + "learning_rate": 4.641104823413228e-06, + "loss": 0.1345, + "step": 99020 + }, + { + "epoch": 4.62, + "learning_rate": 4.64032103835844e-06, + "loss": 0.2658, + "step": 99025 + }, + { + "epoch": 4.62, + "learning_rate": 4.639537253303654e-06, + "loss": 0.0614, + "step": 99030 + }, + { + "epoch": 4.62, + "learning_rate": 4.638753468248868e-06, + "loss": 0.0439, + "step": 99035 + }, + { + "epoch": 4.62, + "learning_rate": 4.637969683194081e-06, + "loss": 0.0631, + "step": 99040 + }, + { + "epoch": 4.62, + "learning_rate": 4.637185898139295e-06, + "loss": 0.031, + "step": 99045 + }, + { + "epoch": 4.62, + "learning_rate": 4.636402113084508e-06, + "loss": 0.0375, + "step": 99050 + }, + { + "epoch": 4.62, + "learning_rate": 4.635618328029722e-06, + "loss": 0.1032, + "step": 99055 + }, + { + "epoch": 4.62, + "learning_rate": 4.634834542974935e-06, + "loss": 0.1198, + "step": 99060 + }, + { + "epoch": 4.62, + "learning_rate": 4.634050757920148e-06, + "loss": 0.158, + "step": 99065 + }, + { + "epoch": 4.62, + "learning_rate": 4.633266972865362e-06, + "loss": 0.1609, + "step": 99070 + }, + { + "epoch": 4.62, + "learning_rate": 4.632483187810575e-06, + "loss": 0.3688, + "step": 99075 + }, + { + "epoch": 4.62, + "learning_rate": 4.631699402755789e-06, + "loss": 0.1057, + "step": 99080 + }, + { + "epoch": 4.62, + "learning_rate": 4.630915617701002e-06, + "loss": 0.0403, + "step": 99085 + }, + { + "epoch": 4.62, + "learning_rate": 4.630131832646215e-06, + "loss": 0.0434, + "step": 99090 + }, + { + "epoch": 4.62, + "learning_rate": 4.629348047591429e-06, + "loss": 0.0496, + "step": 99095 + }, + { + "epoch": 4.62, + "learning_rate": 4.628564262536642e-06, + "loss": 0.0234, + "step": 99100 + }, + { + "epoch": 4.62, + "learning_rate": 4.627780477481856e-06, + "loss": 0.0994, + "step": 99105 + }, + { + "epoch": 4.62, + "learning_rate": 4.626996692427069e-06, + "loss": 0.0785, + "step": 99110 + }, + { + "epoch": 4.62, + "learning_rate": 4.626212907372283e-06, + "loss": 0.155, + "step": 99115 + }, + { + "epoch": 4.63, + "learning_rate": 4.625429122317496e-06, + "loss": 0.1887, + "step": 99120 + }, + { + "epoch": 4.63, + "learning_rate": 4.624645337262709e-06, + "loss": 0.2303, + "step": 99125 + }, + { + "epoch": 4.63, + "learning_rate": 4.623861552207923e-06, + "loss": 0.0835, + "step": 99130 + }, + { + "epoch": 4.63, + "learning_rate": 4.623077767153136e-06, + "loss": 0.0357, + "step": 99135 + }, + { + "epoch": 4.63, + "learning_rate": 4.62229398209835e-06, + "loss": 0.0441, + "step": 99140 + }, + { + "epoch": 4.63, + "learning_rate": 4.621510197043563e-06, + "loss": 0.008, + "step": 99145 + }, + { + "epoch": 4.63, + "learning_rate": 4.620726411988777e-06, + "loss": 0.0457, + "step": 99150 + }, + { + "epoch": 4.63, + "learning_rate": 4.61994262693399e-06, + "loss": 0.0527, + "step": 99155 + }, + { + "epoch": 4.63, + "learning_rate": 4.619158841879203e-06, + "loss": 0.0727, + "step": 99160 + }, + { + "epoch": 4.63, + "learning_rate": 4.618375056824417e-06, + "loss": 0.1954, + "step": 99165 + }, + { + "epoch": 4.63, + "learning_rate": 4.61759127176963e-06, + "loss": 0.1393, + "step": 99170 + }, + { + "epoch": 4.63, + "learning_rate": 4.616807486714844e-06, + "loss": 0.2573, + "step": 99175 + }, + { + "epoch": 4.63, + "learning_rate": 4.616023701660057e-06, + "loss": 0.1002, + "step": 99180 + }, + { + "epoch": 4.63, + "learning_rate": 4.615239916605271e-06, + "loss": 0.0111, + "step": 99185 + }, + { + "epoch": 4.63, + "learning_rate": 4.614456131550484e-06, + "loss": 0.0747, + "step": 99190 + }, + { + "epoch": 4.63, + "learning_rate": 4.613672346495697e-06, + "loss": 0.0535, + "step": 99195 + }, + { + "epoch": 4.63, + "learning_rate": 4.612888561440911e-06, + "loss": 0.0439, + "step": 99200 + }, + { + "epoch": 4.63, + "learning_rate": 4.612104776386124e-06, + "loss": 0.068, + "step": 99205 + }, + { + "epoch": 4.63, + "learning_rate": 4.611320991331338e-06, + "loss": 0.085, + "step": 99210 + }, + { + "epoch": 4.63, + "learning_rate": 4.610537206276551e-06, + "loss": 0.1352, + "step": 99215 + }, + { + "epoch": 4.63, + "learning_rate": 4.609753421221764e-06, + "loss": 0.1039, + "step": 99220 + }, + { + "epoch": 4.63, + "learning_rate": 4.608969636166978e-06, + "loss": 0.296, + "step": 99225 + }, + { + "epoch": 4.63, + "learning_rate": 4.608185851112191e-06, + "loss": 0.0724, + "step": 99230 + }, + { + "epoch": 4.63, + "learning_rate": 4.607402066057405e-06, + "loss": 0.008, + "step": 99235 + }, + { + "epoch": 4.63, + "learning_rate": 4.606618281002618e-06, + "loss": 0.0358, + "step": 99240 + }, + { + "epoch": 4.63, + "learning_rate": 4.605834495947832e-06, + "loss": 0.0265, + "step": 99245 + }, + { + "epoch": 4.63, + "learning_rate": 4.605050710893045e-06, + "loss": 0.0432, + "step": 99250 + }, + { + "epoch": 4.63, + "learning_rate": 4.604266925838258e-06, + "loss": 0.0298, + "step": 99255 + }, + { + "epoch": 4.63, + "learning_rate": 4.603483140783472e-06, + "loss": 0.101, + "step": 99260 + }, + { + "epoch": 4.63, + "learning_rate": 4.602699355728685e-06, + "loss": 0.1657, + "step": 99265 + }, + { + "epoch": 4.63, + "learning_rate": 4.601915570673899e-06, + "loss": 0.2017, + "step": 99270 + }, + { + "epoch": 4.63, + "learning_rate": 4.601131785619112e-06, + "loss": 0.2428, + "step": 99275 + }, + { + "epoch": 4.63, + "learning_rate": 4.600348000564326e-06, + "loss": 0.0889, + "step": 99280 + }, + { + "epoch": 4.63, + "learning_rate": 4.599564215509539e-06, + "loss": 0.0376, + "step": 99285 + }, + { + "epoch": 4.63, + "learning_rate": 4.598780430454752e-06, + "loss": 0.0571, + "step": 99290 + }, + { + "epoch": 4.63, + "learning_rate": 4.597996645399966e-06, + "loss": 0.0663, + "step": 99295 + }, + { + "epoch": 4.63, + "learning_rate": 4.597212860345179e-06, + "loss": 0.0245, + "step": 99300 + }, + { + "epoch": 4.63, + "learning_rate": 4.596429075290393e-06, + "loss": 0.084, + "step": 99305 + }, + { + "epoch": 4.63, + "learning_rate": 4.595645290235607e-06, + "loss": 0.1281, + "step": 99310 + }, + { + "epoch": 4.63, + "learning_rate": 4.59486150518082e-06, + "loss": 0.2, + "step": 99315 + }, + { + "epoch": 4.63, + "learning_rate": 4.594077720126033e-06, + "loss": 0.0928, + "step": 99320 + }, + { + "epoch": 4.63, + "learning_rate": 4.593293935071247e-06, + "loss": 0.2263, + "step": 99325 + }, + { + "epoch": 4.63, + "learning_rate": 4.59251015001646e-06, + "loss": 0.0963, + "step": 99330 + }, + { + "epoch": 4.64, + "learning_rate": 4.591726364961674e-06, + "loss": 0.0292, + "step": 99335 + }, + { + "epoch": 4.64, + "learning_rate": 4.590942579906887e-06, + "loss": 0.0582, + "step": 99340 + }, + { + "epoch": 4.64, + "learning_rate": 4.590158794852101e-06, + "loss": 0.046, + "step": 99345 + }, + { + "epoch": 4.64, + "learning_rate": 4.589375009797314e-06, + "loss": 0.043, + "step": 99350 + }, + { + "epoch": 4.64, + "learning_rate": 4.588591224742527e-06, + "loss": 0.0467, + "step": 99355 + }, + { + "epoch": 4.64, + "learning_rate": 4.587807439687741e-06, + "loss": 0.1622, + "step": 99360 + }, + { + "epoch": 4.64, + "learning_rate": 4.587023654632954e-06, + "loss": 0.0681, + "step": 99365 + }, + { + "epoch": 4.64, + "learning_rate": 4.586239869578168e-06, + "loss": 0.1997, + "step": 99370 + }, + { + "epoch": 4.64, + "learning_rate": 4.585456084523381e-06, + "loss": 0.2206, + "step": 99375 + }, + { + "epoch": 4.64, + "learning_rate": 4.584672299468595e-06, + "loss": 0.0906, + "step": 99380 + }, + { + "epoch": 4.64, + "learning_rate": 4.583888514413808e-06, + "loss": 0.0321, + "step": 99385 + }, + { + "epoch": 4.64, + "learning_rate": 4.583104729359021e-06, + "loss": 0.0637, + "step": 99390 + }, + { + "epoch": 4.64, + "learning_rate": 4.582320944304235e-06, + "loss": 0.0361, + "step": 99395 + }, + { + "epoch": 4.64, + "learning_rate": 4.581537159249448e-06, + "loss": 0.0721, + "step": 99400 + }, + { + "epoch": 4.64, + "learning_rate": 4.580753374194662e-06, + "loss": 0.1105, + "step": 99405 + }, + { + "epoch": 4.64, + "learning_rate": 4.579969589139875e-06, + "loss": 0.0972, + "step": 99410 + }, + { + "epoch": 4.64, + "learning_rate": 4.579185804085088e-06, + "loss": 0.1117, + "step": 99415 + }, + { + "epoch": 4.64, + "learning_rate": 4.578402019030302e-06, + "loss": 0.165, + "step": 99420 + }, + { + "epoch": 4.64, + "learning_rate": 4.577618233975515e-06, + "loss": 0.1939, + "step": 99425 + }, + { + "epoch": 4.64, + "learning_rate": 4.576834448920729e-06, + "loss": 0.0918, + "step": 99430 + }, + { + "epoch": 4.64, + "learning_rate": 4.576050663865942e-06, + "loss": 0.0138, + "step": 99435 + }, + { + "epoch": 4.64, + "learning_rate": 4.5752668788111556e-06, + "loss": 0.0605, + "step": 99440 + }, + { + "epoch": 4.64, + "learning_rate": 4.574483093756369e-06, + "loss": 0.0318, + "step": 99445 + }, + { + "epoch": 4.64, + "learning_rate": 4.573699308701582e-06, + "loss": 0.0423, + "step": 99450 + }, + { + "epoch": 4.64, + "learning_rate": 4.572915523646796e-06, + "loss": 0.0584, + "step": 99455 + }, + { + "epoch": 4.64, + "learning_rate": 4.572131738592009e-06, + "loss": 0.0923, + "step": 99460 + }, + { + "epoch": 4.64, + "learning_rate": 4.5713479535372226e-06, + "loss": 0.1019, + "step": 99465 + }, + { + "epoch": 4.64, + "learning_rate": 4.570564168482436e-06, + "loss": 0.2771, + "step": 99470 + }, + { + "epoch": 4.64, + "learning_rate": 4.5697803834276495e-06, + "loss": 0.4815, + "step": 99475 + }, + { + "epoch": 4.64, + "learning_rate": 4.568996598372863e-06, + "loss": 0.0602, + "step": 99480 + }, + { + "epoch": 4.64, + "learning_rate": 4.568212813318076e-06, + "loss": 0.0252, + "step": 99485 + }, + { + "epoch": 4.64, + "learning_rate": 4.5674290282632895e-06, + "loss": 0.0183, + "step": 99490 + }, + { + "epoch": 4.64, + "learning_rate": 4.566645243208503e-06, + "loss": 0.0262, + "step": 99495 + }, + { + "epoch": 4.64, + "learning_rate": 4.5658614581537165e-06, + "loss": 0.0902, + "step": 99500 + }, + { + "epoch": 4.64, + "learning_rate": 4.5650776730989296e-06, + "loss": 0.099, + "step": 99505 + }, + { + "epoch": 4.64, + "learning_rate": 4.5642938880441435e-06, + "loss": 0.0742, + "step": 99510 + }, + { + "epoch": 4.64, + "learning_rate": 4.5635101029893565e-06, + "loss": 0.1073, + "step": 99515 + }, + { + "epoch": 4.64, + "learning_rate": 4.56272631793457e-06, + "loss": 0.2168, + "step": 99520 + }, + { + "epoch": 4.64, + "learning_rate": 4.5619425328797835e-06, + "loss": 0.3789, + "step": 99525 + }, + { + "epoch": 4.64, + "learning_rate": 4.5611587478249966e-06, + "loss": 0.0901, + "step": 99530 + }, + { + "epoch": 4.64, + "learning_rate": 4.5603749627702105e-06, + "loss": 0.0096, + "step": 99535 + }, + { + "epoch": 4.64, + "learning_rate": 4.5595911777154235e-06, + "loss": 0.0438, + "step": 99540 + }, + { + "epoch": 4.64, + "learning_rate": 4.558807392660637e-06, + "loss": 0.0671, + "step": 99545 + }, + { + "epoch": 4.65, + "learning_rate": 4.5580236076058505e-06, + "loss": 0.0396, + "step": 99550 + }, + { + "epoch": 4.65, + "learning_rate": 4.5572398225510635e-06, + "loss": 0.0452, + "step": 99555 + }, + { + "epoch": 4.65, + "learning_rate": 4.5564560374962774e-06, + "loss": 0.0448, + "step": 99560 + }, + { + "epoch": 4.65, + "learning_rate": 4.5556722524414905e-06, + "loss": 0.1305, + "step": 99565 + }, + { + "epoch": 4.65, + "learning_rate": 4.554888467386704e-06, + "loss": 0.1631, + "step": 99570 + }, + { + "epoch": 4.65, + "learning_rate": 4.554104682331918e-06, + "loss": 0.2078, + "step": 99575 + }, + { + "epoch": 4.65, + "learning_rate": 4.5533208972771305e-06, + "loss": 0.0835, + "step": 99580 + }, + { + "epoch": 4.65, + "learning_rate": 4.5525371122223444e-06, + "loss": 0.0182, + "step": 99585 + }, + { + "epoch": 4.65, + "learning_rate": 4.5517533271675575e-06, + "loss": 0.0456, + "step": 99590 + }, + { + "epoch": 4.65, + "learning_rate": 4.550969542112771e-06, + "loss": 0.0753, + "step": 99595 + }, + { + "epoch": 4.65, + "learning_rate": 4.550185757057985e-06, + "loss": 0.0423, + "step": 99600 + }, + { + "epoch": 4.65, + "learning_rate": 4.549401972003198e-06, + "loss": 0.1024, + "step": 99605 + }, + { + "epoch": 4.65, + "learning_rate": 4.548618186948411e-06, + "loss": 0.0633, + "step": 99610 + }, + { + "epoch": 4.65, + "learning_rate": 4.547834401893625e-06, + "loss": 0.1306, + "step": 99615 + }, + { + "epoch": 4.65, + "learning_rate": 4.547050616838838e-06, + "loss": 0.0914, + "step": 99620 + }, + { + "epoch": 4.65, + "learning_rate": 4.546266831784052e-06, + "loss": 0.3363, + "step": 99625 + }, + { + "epoch": 4.65, + "learning_rate": 4.545483046729265e-06, + "loss": 0.1153, + "step": 99630 + }, + { + "epoch": 4.65, + "learning_rate": 4.544699261674479e-06, + "loss": 0.0315, + "step": 99635 + }, + { + "epoch": 4.65, + "learning_rate": 4.543915476619692e-06, + "loss": 0.0346, + "step": 99640 + }, + { + "epoch": 4.65, + "learning_rate": 4.543131691564905e-06, + "loss": 0.0418, + "step": 99645 + }, + { + "epoch": 4.65, + "learning_rate": 4.542347906510119e-06, + "loss": 0.0681, + "step": 99650 + }, + { + "epoch": 4.65, + "learning_rate": 4.541564121455332e-06, + "loss": 0.0773, + "step": 99655 + }, + { + "epoch": 4.65, + "learning_rate": 4.540780336400546e-06, + "loss": 0.1295, + "step": 99660 + }, + { + "epoch": 4.65, + "learning_rate": 4.539996551345759e-06, + "loss": 0.1052, + "step": 99665 + }, + { + "epoch": 4.65, + "learning_rate": 4.539212766290973e-06, + "loss": 0.1512, + "step": 99670 + }, + { + "epoch": 4.65, + "learning_rate": 4.538428981236186e-06, + "loss": 0.1871, + "step": 99675 + }, + { + "epoch": 4.65, + "learning_rate": 4.537645196181399e-06, + "loss": 0.0809, + "step": 99680 + }, + { + "epoch": 4.65, + "learning_rate": 4.536861411126613e-06, + "loss": 0.0047, + "step": 99685 + }, + { + "epoch": 4.65, + "learning_rate": 4.536077626071826e-06, + "loss": 0.0064, + "step": 99690 + }, + { + "epoch": 4.65, + "learning_rate": 4.53529384101704e-06, + "loss": 0.0203, + "step": 99695 + }, + { + "epoch": 4.65, + "learning_rate": 4.534510055962253e-06, + "loss": 0.0274, + "step": 99700 + }, + { + "epoch": 4.65, + "learning_rate": 4.533726270907467e-06, + "loss": 0.056, + "step": 99705 + }, + { + "epoch": 4.65, + "learning_rate": 4.53294248585268e-06, + "loss": 0.0571, + "step": 99710 + }, + { + "epoch": 4.65, + "learning_rate": 4.532158700797893e-06, + "loss": 0.0781, + "step": 99715 + }, + { + "epoch": 4.65, + "learning_rate": 4.531374915743107e-06, + "loss": 0.2368, + "step": 99720 + }, + { + "epoch": 4.65, + "learning_rate": 4.53059113068832e-06, + "loss": 0.2704, + "step": 99725 + }, + { + "epoch": 4.65, + "learning_rate": 4.529807345633534e-06, + "loss": 0.0933, + "step": 99730 + }, + { + "epoch": 4.65, + "learning_rate": 4.529023560578747e-06, + "loss": 0.0763, + "step": 99735 + }, + { + "epoch": 4.65, + "learning_rate": 4.52823977552396e-06, + "loss": 0.0332, + "step": 99740 + }, + { + "epoch": 4.65, + "learning_rate": 4.527455990469174e-06, + "loss": 0.0182, + "step": 99745 + }, + { + "epoch": 4.65, + "learning_rate": 4.526672205414387e-06, + "loss": 0.0203, + "step": 99750 + }, + { + "epoch": 4.65, + "learning_rate": 4.525888420359601e-06, + "loss": 0.0483, + "step": 99755 + }, + { + "epoch": 4.65, + "learning_rate": 4.525104635304814e-06, + "loss": 0.0245, + "step": 99760 + }, + { + "epoch": 4.66, + "learning_rate": 4.524320850250028e-06, + "loss": 0.1421, + "step": 99765 + }, + { + "epoch": 4.66, + "learning_rate": 4.523537065195241e-06, + "loss": 0.1428, + "step": 99770 + }, + { + "epoch": 4.66, + "learning_rate": 4.522753280140454e-06, + "loss": 0.3552, + "step": 99775 + }, + { + "epoch": 4.66, + "learning_rate": 4.521969495085668e-06, + "loss": 0.0537, + "step": 99780 + }, + { + "epoch": 4.66, + "learning_rate": 4.521185710030881e-06, + "loss": 0.0148, + "step": 99785 + }, + { + "epoch": 4.66, + "learning_rate": 4.520401924976095e-06, + "loss": 0.0154, + "step": 99790 + }, + { + "epoch": 4.66, + "learning_rate": 4.519618139921308e-06, + "loss": 0.028, + "step": 99795 + }, + { + "epoch": 4.66, + "learning_rate": 4.518834354866522e-06, + "loss": 0.0268, + "step": 99800 + }, + { + "epoch": 4.66, + "learning_rate": 4.518050569811735e-06, + "loss": 0.0685, + "step": 99805 + }, + { + "epoch": 4.66, + "learning_rate": 4.517266784756948e-06, + "loss": 0.1018, + "step": 99810 + }, + { + "epoch": 4.66, + "learning_rate": 4.516482999702162e-06, + "loss": 0.1029, + "step": 99815 + }, + { + "epoch": 4.66, + "learning_rate": 4.515699214647375e-06, + "loss": 0.1975, + "step": 99820 + }, + { + "epoch": 4.66, + "learning_rate": 4.514915429592589e-06, + "loss": 0.3415, + "step": 99825 + }, + { + "epoch": 4.66, + "learning_rate": 4.514131644537802e-06, + "loss": 0.0967, + "step": 99830 + }, + { + "epoch": 4.66, + "learning_rate": 4.513347859483016e-06, + "loss": 0.0452, + "step": 99835 + }, + { + "epoch": 4.66, + "learning_rate": 4.512564074428229e-06, + "loss": 0.0264, + "step": 99840 + }, + { + "epoch": 4.66, + "learning_rate": 4.511780289373442e-06, + "loss": 0.0325, + "step": 99845 + }, + { + "epoch": 4.66, + "learning_rate": 4.510996504318656e-06, + "loss": 0.079, + "step": 99850 + }, + { + "epoch": 4.66, + "learning_rate": 4.510212719263869e-06, + "loss": 0.0827, + "step": 99855 + }, + { + "epoch": 4.66, + "learning_rate": 4.509428934209083e-06, + "loss": 0.1163, + "step": 99860 + }, + { + "epoch": 4.66, + "learning_rate": 4.508645149154297e-06, + "loss": 0.0894, + "step": 99865 + }, + { + "epoch": 4.66, + "learning_rate": 4.507861364099509e-06, + "loss": 0.1934, + "step": 99870 + }, + { + "epoch": 4.66, + "learning_rate": 4.507077579044723e-06, + "loss": 0.4402, + "step": 99875 + }, + { + "epoch": 4.66, + "learning_rate": 4.506293793989936e-06, + "loss": 0.0838, + "step": 99880 + }, + { + "epoch": 4.66, + "learning_rate": 4.50551000893515e-06, + "loss": 0.0098, + "step": 99885 + }, + { + "epoch": 4.66, + "learning_rate": 4.504726223880364e-06, + "loss": 0.0539, + "step": 99890 + }, + { + "epoch": 4.66, + "learning_rate": 4.503942438825577e-06, + "loss": 0.0893, + "step": 99895 + }, + { + "epoch": 4.66, + "learning_rate": 4.503158653770791e-06, + "loss": 0.0316, + "step": 99900 + }, + { + "epoch": 4.66, + "learning_rate": 4.502374868716004e-06, + "loss": 0.0923, + "step": 99905 + }, + { + "epoch": 4.66, + "learning_rate": 4.501591083661217e-06, + "loss": 0.1255, + "step": 99910 + }, + { + "epoch": 4.66, + "learning_rate": 4.500807298606431e-06, + "loss": 0.065, + "step": 99915 + }, + { + "epoch": 4.66, + "learning_rate": 4.500023513551644e-06, + "loss": 0.1271, + "step": 99920 + }, + { + "epoch": 4.66, + "learning_rate": 4.499239728496858e-06, + "loss": 0.3634, + "step": 99925 + }, + { + "epoch": 4.66, + "learning_rate": 4.498455943442071e-06, + "loss": 0.1126, + "step": 99930 + }, + { + "epoch": 4.66, + "learning_rate": 4.497672158387284e-06, + "loss": 0.0364, + "step": 99935 + }, + { + "epoch": 4.66, + "learning_rate": 4.496888373332498e-06, + "loss": 0.018, + "step": 99940 + }, + { + "epoch": 4.66, + "learning_rate": 4.496104588277711e-06, + "loss": 0.016, + "step": 99945 + }, + { + "epoch": 4.66, + "learning_rate": 4.495320803222925e-06, + "loss": 0.0291, + "step": 99950 + }, + { + "epoch": 4.66, + "learning_rate": 4.494537018168138e-06, + "loss": 0.0379, + "step": 99955 + }, + { + "epoch": 4.66, + "learning_rate": 4.493753233113352e-06, + "loss": 0.0958, + "step": 99960 + }, + { + "epoch": 4.66, + "learning_rate": 4.492969448058565e-06, + "loss": 0.0772, + "step": 99965 + }, + { + "epoch": 4.66, + "learning_rate": 4.492185663003778e-06, + "loss": 0.1524, + "step": 99970 + }, + { + "epoch": 4.66, + "learning_rate": 4.491401877948992e-06, + "loss": 0.1936, + "step": 99975 + }, + { + "epoch": 4.67, + "learning_rate": 4.490618092894205e-06, + "loss": 0.0644, + "step": 99980 + }, + { + "epoch": 4.67, + "learning_rate": 4.489834307839419e-06, + "loss": 0.0495, + "step": 99985 + }, + { + "epoch": 4.67, + "learning_rate": 4.489050522784632e-06, + "loss": 0.0511, + "step": 99990 + }, + { + "epoch": 4.67, + "learning_rate": 4.488266737729846e-06, + "loss": 0.043, + "step": 99995 + }, + { + "epoch": 4.67, + "learning_rate": 4.487482952675059e-06, + "loss": 0.0653, + "step": 100000 + }, + { + "epoch": 4.67, + "learning_rate": 4.486699167620272e-06, + "loss": 0.1064, + "step": 100005 + }, + { + "epoch": 4.67, + "learning_rate": 4.485915382565486e-06, + "loss": 0.1228, + "step": 100010 + }, + { + "epoch": 4.67, + "learning_rate": 4.485131597510699e-06, + "loss": 0.0842, + "step": 100015 + }, + { + "epoch": 4.67, + "learning_rate": 4.484347812455913e-06, + "loss": 0.163, + "step": 100020 + }, + { + "epoch": 4.67, + "learning_rate": 4.483564027401126e-06, + "loss": 0.2458, + "step": 100025 + }, + { + "epoch": 4.67, + "learning_rate": 4.48278024234634e-06, + "loss": 0.0753, + "step": 100030 + }, + { + "epoch": 4.67, + "learning_rate": 4.481996457291553e-06, + "loss": 0.0374, + "step": 100035 + }, + { + "epoch": 4.67, + "learning_rate": 4.481212672236766e-06, + "loss": 0.0364, + "step": 100040 + }, + { + "epoch": 4.67, + "learning_rate": 4.48042888718198e-06, + "loss": 0.0259, + "step": 100045 + }, + { + "epoch": 4.67, + "learning_rate": 4.479645102127193e-06, + "loss": 0.0265, + "step": 100050 + }, + { + "epoch": 4.67, + "learning_rate": 4.478861317072407e-06, + "loss": 0.0263, + "step": 100055 + }, + { + "epoch": 4.67, + "learning_rate": 4.47807753201762e-06, + "loss": 0.085, + "step": 100060 + }, + { + "epoch": 4.67, + "learning_rate": 4.477293746962833e-06, + "loss": 0.1516, + "step": 100065 + }, + { + "epoch": 4.67, + "learning_rate": 4.476509961908047e-06, + "loss": 0.1031, + "step": 100070 + }, + { + "epoch": 4.67, + "learning_rate": 4.47572617685326e-06, + "loss": 0.2501, + "step": 100075 + }, + { + "epoch": 4.67, + "learning_rate": 4.474942391798474e-06, + "loss": 0.0514, + "step": 100080 + }, + { + "epoch": 4.67, + "learning_rate": 4.474158606743687e-06, + "loss": 0.0301, + "step": 100085 + }, + { + "epoch": 4.67, + "learning_rate": 4.473374821688901e-06, + "loss": 0.0145, + "step": 100090 + }, + { + "epoch": 4.67, + "learning_rate": 4.472591036634114e-06, + "loss": 0.0621, + "step": 100095 + }, + { + "epoch": 4.67, + "learning_rate": 4.471807251579327e-06, + "loss": 0.0611, + "step": 100100 + }, + { + "epoch": 4.67, + "learning_rate": 4.471023466524541e-06, + "loss": 0.1244, + "step": 100105 + }, + { + "epoch": 4.67, + "learning_rate": 4.470239681469754e-06, + "loss": 0.066, + "step": 100110 + }, + { + "epoch": 4.67, + "learning_rate": 4.469455896414968e-06, + "loss": 0.1101, + "step": 100115 + }, + { + "epoch": 4.67, + "learning_rate": 4.468672111360181e-06, + "loss": 0.1427, + "step": 100120 + }, + { + "epoch": 4.67, + "learning_rate": 4.467888326305395e-06, + "loss": 0.4447, + "step": 100125 + }, + { + "epoch": 4.67, + "learning_rate": 4.467104541250608e-06, + "loss": 0.0871, + "step": 100130 + }, + { + "epoch": 4.67, + "learning_rate": 4.466320756195821e-06, + "loss": 0.0325, + "step": 100135 + }, + { + "epoch": 4.67, + "learning_rate": 4.465536971141035e-06, + "loss": 0.0313, + "step": 100140 + }, + { + "epoch": 4.67, + "learning_rate": 4.464753186086248e-06, + "loss": 0.1006, + "step": 100145 + }, + { + "epoch": 4.67, + "learning_rate": 4.463969401031462e-06, + "loss": 0.0335, + "step": 100150 + }, + { + "epoch": 4.67, + "learning_rate": 4.4631856159766755e-06, + "loss": 0.0333, + "step": 100155 + }, + { + "epoch": 4.67, + "learning_rate": 4.4624018309218885e-06, + "loss": 0.1575, + "step": 100160 + }, + { + "epoch": 4.67, + "learning_rate": 4.461618045867102e-06, + "loss": 0.1068, + "step": 100165 + }, + { + "epoch": 4.67, + "learning_rate": 4.460834260812315e-06, + "loss": 0.1535, + "step": 100170 + }, + { + "epoch": 4.67, + "learning_rate": 4.4600504757575286e-06, + "loss": 0.3579, + "step": 100175 + }, + { + "epoch": 4.67, + "learning_rate": 4.4592666907027425e-06, + "loss": 0.0549, + "step": 100180 + }, + { + "epoch": 4.67, + "learning_rate": 4.4584829056479555e-06, + "loss": 0.029, + "step": 100185 + }, + { + "epoch": 4.68, + "learning_rate": 4.4576991205931694e-06, + "loss": 0.0116, + "step": 100190 + }, + { + "epoch": 4.68, + "learning_rate": 4.4569153355383825e-06, + "loss": 0.0231, + "step": 100195 + }, + { + "epoch": 4.68, + "learning_rate": 4.4561315504835956e-06, + "loss": 0.0429, + "step": 100200 + }, + { + "epoch": 4.68, + "learning_rate": 4.4553477654288095e-06, + "loss": 0.05, + "step": 100205 + }, + { + "epoch": 4.68, + "learning_rate": 4.4545639803740225e-06, + "loss": 0.1529, + "step": 100210 + }, + { + "epoch": 4.68, + "learning_rate": 4.453780195319236e-06, + "loss": 0.1274, + "step": 100215 + }, + { + "epoch": 4.68, + "learning_rate": 4.4529964102644495e-06, + "loss": 0.1877, + "step": 100220 + }, + { + "epoch": 4.68, + "learning_rate": 4.452212625209663e-06, + "loss": 0.4301, + "step": 100225 + }, + { + "epoch": 4.68, + "learning_rate": 4.4514288401548764e-06, + "loss": 0.0988, + "step": 100230 + }, + { + "epoch": 4.68, + "learning_rate": 4.4506450551000895e-06, + "loss": 0.0515, + "step": 100235 + }, + { + "epoch": 4.68, + "learning_rate": 4.449861270045303e-06, + "loss": 0.0358, + "step": 100240 + }, + { + "epoch": 4.68, + "learning_rate": 4.4490774849905165e-06, + "loss": 0.0721, + "step": 100245 + }, + { + "epoch": 4.68, + "learning_rate": 4.44829369993573e-06, + "loss": 0.0414, + "step": 100250 + }, + { + "epoch": 4.68, + "learning_rate": 4.4475099148809434e-06, + "loss": 0.0563, + "step": 100255 + }, + { + "epoch": 4.68, + "learning_rate": 4.4467261298261565e-06, + "loss": 0.1743, + "step": 100260 + }, + { + "epoch": 4.68, + "learning_rate": 4.44594234477137e-06, + "loss": 0.0877, + "step": 100265 + }, + { + "epoch": 4.68, + "learning_rate": 4.4451585597165835e-06, + "loss": 0.1266, + "step": 100270 + }, + { + "epoch": 4.68, + "learning_rate": 4.444374774661797e-06, + "loss": 0.287, + "step": 100275 + }, + { + "epoch": 4.68, + "learning_rate": 4.44359098960701e-06, + "loss": 0.1152, + "step": 100280 + }, + { + "epoch": 4.68, + "learning_rate": 4.442807204552224e-06, + "loss": 0.0285, + "step": 100285 + }, + { + "epoch": 4.68, + "learning_rate": 4.442023419497437e-06, + "loss": 0.0665, + "step": 100290 + }, + { + "epoch": 4.68, + "learning_rate": 4.4412396344426504e-06, + "loss": 0.0313, + "step": 100295 + }, + { + "epoch": 4.68, + "learning_rate": 4.440455849387864e-06, + "loss": 0.052, + "step": 100300 + }, + { + "epoch": 4.68, + "learning_rate": 4.439672064333077e-06, + "loss": 0.0535, + "step": 100305 + }, + { + "epoch": 4.68, + "learning_rate": 4.438888279278291e-06, + "loss": 0.0936, + "step": 100310 + }, + { + "epoch": 4.68, + "learning_rate": 4.438104494223504e-06, + "loss": 0.0685, + "step": 100315 + }, + { + "epoch": 4.68, + "learning_rate": 4.437320709168718e-06, + "loss": 0.2202, + "step": 100320 + }, + { + "epoch": 4.68, + "learning_rate": 4.436536924113931e-06, + "loss": 0.1813, + "step": 100325 + }, + { + "epoch": 4.68, + "learning_rate": 4.435753139059144e-06, + "loss": 0.0361, + "step": 100330 + }, + { + "epoch": 4.68, + "learning_rate": 4.434969354004358e-06, + "loss": 0.0256, + "step": 100335 + }, + { + "epoch": 4.68, + "learning_rate": 4.434185568949571e-06, + "loss": 0.0306, + "step": 100340 + }, + { + "epoch": 4.68, + "learning_rate": 4.433401783894785e-06, + "loss": 0.0158, + "step": 100345 + }, + { + "epoch": 4.68, + "learning_rate": 4.432617998839998e-06, + "loss": 0.0229, + "step": 100350 + }, + { + "epoch": 4.68, + "learning_rate": 4.431834213785212e-06, + "loss": 0.081, + "step": 100355 + }, + { + "epoch": 4.68, + "learning_rate": 4.431050428730425e-06, + "loss": 0.1585, + "step": 100360 + }, + { + "epoch": 4.68, + "learning_rate": 4.430266643675638e-06, + "loss": 0.1321, + "step": 100365 + }, + { + "epoch": 4.68, + "learning_rate": 4.429482858620852e-06, + "loss": 0.1093, + "step": 100370 + }, + { + "epoch": 4.68, + "learning_rate": 4.428699073566065e-06, + "loss": 0.1172, + "step": 100375 + }, + { + "epoch": 4.68, + "learning_rate": 4.427915288511279e-06, + "loss": 0.0643, + "step": 100380 + }, + { + "epoch": 4.68, + "learning_rate": 4.427131503456492e-06, + "loss": 0.0622, + "step": 100385 + }, + { + "epoch": 4.68, + "learning_rate": 4.426347718401705e-06, + "loss": 0.0311, + "step": 100390 + }, + { + "epoch": 4.68, + "learning_rate": 4.425563933346919e-06, + "loss": 0.0394, + "step": 100395 + }, + { + "epoch": 4.68, + "learning_rate": 4.424780148292132e-06, + "loss": 0.0623, + "step": 100400 + }, + { + "epoch": 4.69, + "learning_rate": 4.423996363237346e-06, + "loss": 0.0637, + "step": 100405 + }, + { + "epoch": 4.69, + "learning_rate": 4.423212578182559e-06, + "loss": 0.0639, + "step": 100410 + }, + { + "epoch": 4.69, + "learning_rate": 4.422428793127773e-06, + "loss": 0.145, + "step": 100415 + }, + { + "epoch": 4.69, + "learning_rate": 4.421645008072987e-06, + "loss": 0.2184, + "step": 100420 + }, + { + "epoch": 4.69, + "learning_rate": 4.420861223018199e-06, + "loss": 0.3487, + "step": 100425 + }, + { + "epoch": 4.69, + "learning_rate": 4.420077437963413e-06, + "loss": 0.0608, + "step": 100430 + }, + { + "epoch": 4.69, + "learning_rate": 4.419293652908626e-06, + "loss": 0.0113, + "step": 100435 + }, + { + "epoch": 4.69, + "learning_rate": 4.41850986785384e-06, + "loss": 0.0447, + "step": 100440 + }, + { + "epoch": 4.69, + "learning_rate": 4.417726082799054e-06, + "loss": 0.0929, + "step": 100445 + }, + { + "epoch": 4.69, + "learning_rate": 4.416942297744267e-06, + "loss": 0.0528, + "step": 100450 + }, + { + "epoch": 4.69, + "learning_rate": 4.41615851268948e-06, + "loss": 0.0348, + "step": 100455 + }, + { + "epoch": 4.69, + "learning_rate": 4.415374727634693e-06, + "loss": 0.0728, + "step": 100460 + }, + { + "epoch": 4.69, + "learning_rate": 4.414590942579907e-06, + "loss": 0.0971, + "step": 100465 + }, + { + "epoch": 4.69, + "learning_rate": 4.413807157525121e-06, + "loss": 0.1023, + "step": 100470 + }, + { + "epoch": 4.69, + "learning_rate": 4.413023372470334e-06, + "loss": 0.3496, + "step": 100475 + }, + { + "epoch": 4.69, + "learning_rate": 4.412239587415548e-06, + "loss": 0.0878, + "step": 100480 + }, + { + "epoch": 4.69, + "learning_rate": 4.411455802360761e-06, + "loss": 0.0398, + "step": 100485 + }, + { + "epoch": 4.69, + "learning_rate": 4.410672017305974e-06, + "loss": 0.053, + "step": 100490 + }, + { + "epoch": 4.69, + "learning_rate": 4.409888232251188e-06, + "loss": 0.0338, + "step": 100495 + }, + { + "epoch": 4.69, + "learning_rate": 4.409104447196401e-06, + "loss": 0.0303, + "step": 100500 + }, + { + "epoch": 4.69, + "learning_rate": 4.408320662141615e-06, + "loss": 0.0656, + "step": 100505 + }, + { + "epoch": 4.69, + "learning_rate": 4.407536877086828e-06, + "loss": 0.046, + "step": 100510 + }, + { + "epoch": 4.69, + "learning_rate": 4.406753092032042e-06, + "loss": 0.0531, + "step": 100515 + }, + { + "epoch": 4.69, + "learning_rate": 4.405969306977255e-06, + "loss": 0.2252, + "step": 100520 + }, + { + "epoch": 4.69, + "learning_rate": 4.405185521922468e-06, + "loss": 0.2124, + "step": 100525 + }, + { + "epoch": 4.69, + "learning_rate": 4.404401736867682e-06, + "loss": 0.0876, + "step": 100530 + }, + { + "epoch": 4.69, + "learning_rate": 4.403617951812895e-06, + "loss": 0.0197, + "step": 100535 + }, + { + "epoch": 4.69, + "learning_rate": 4.402834166758109e-06, + "loss": 0.0438, + "step": 100540 + }, + { + "epoch": 4.69, + "learning_rate": 4.402050381703322e-06, + "loss": 0.0313, + "step": 100545 + }, + { + "epoch": 4.69, + "learning_rate": 4.401266596648536e-06, + "loss": 0.0481, + "step": 100550 + }, + { + "epoch": 4.69, + "learning_rate": 4.400482811593749e-06, + "loss": 0.0594, + "step": 100555 + }, + { + "epoch": 4.69, + "learning_rate": 4.399699026538962e-06, + "loss": 0.1185, + "step": 100560 + }, + { + "epoch": 4.69, + "learning_rate": 4.398915241484176e-06, + "loss": 0.129, + "step": 100565 + }, + { + "epoch": 4.69, + "learning_rate": 4.398131456429389e-06, + "loss": 0.1099, + "step": 100570 + }, + { + "epoch": 4.69, + "learning_rate": 4.397347671374603e-06, + "loss": 0.3222, + "step": 100575 + }, + { + "epoch": 4.69, + "learning_rate": 4.396563886319816e-06, + "loss": 0.0973, + "step": 100580 + }, + { + "epoch": 4.69, + "learning_rate": 4.395780101265029e-06, + "loss": 0.0292, + "step": 100585 + }, + { + "epoch": 4.69, + "learning_rate": 4.394996316210243e-06, + "loss": 0.0147, + "step": 100590 + }, + { + "epoch": 4.69, + "learning_rate": 4.394212531155456e-06, + "loss": 0.0115, + "step": 100595 + }, + { + "epoch": 4.69, + "learning_rate": 4.39342874610067e-06, + "loss": 0.0616, + "step": 100600 + }, + { + "epoch": 4.69, + "learning_rate": 4.392644961045883e-06, + "loss": 0.0233, + "step": 100605 + }, + { + "epoch": 4.69, + "learning_rate": 4.391861175991097e-06, + "loss": 0.105, + "step": 100610 + }, + { + "epoch": 4.69, + "learning_rate": 4.39107739093631e-06, + "loss": 0.1102, + "step": 100615 + }, + { + "epoch": 4.7, + "learning_rate": 4.390293605881523e-06, + "loss": 0.1404, + "step": 100620 + }, + { + "epoch": 4.7, + "learning_rate": 4.389509820826737e-06, + "loss": 0.2775, + "step": 100625 + }, + { + "epoch": 4.7, + "learning_rate": 4.38872603577195e-06, + "loss": 0.0757, + "step": 100630 + }, + { + "epoch": 4.7, + "learning_rate": 4.387942250717164e-06, + "loss": 0.0186, + "step": 100635 + }, + { + "epoch": 4.7, + "learning_rate": 4.387158465662377e-06, + "loss": 0.0139, + "step": 100640 + }, + { + "epoch": 4.7, + "learning_rate": 4.386374680607591e-06, + "loss": 0.0193, + "step": 100645 + }, + { + "epoch": 4.7, + "learning_rate": 4.385590895552804e-06, + "loss": 0.0621, + "step": 100650 + }, + { + "epoch": 4.7, + "learning_rate": 4.384807110498017e-06, + "loss": 0.1055, + "step": 100655 + }, + { + "epoch": 4.7, + "learning_rate": 4.384023325443231e-06, + "loss": 0.0997, + "step": 100660 + }, + { + "epoch": 4.7, + "learning_rate": 4.383239540388444e-06, + "loss": 0.0955, + "step": 100665 + }, + { + "epoch": 4.7, + "learning_rate": 4.382455755333658e-06, + "loss": 0.0834, + "step": 100670 + }, + { + "epoch": 4.7, + "learning_rate": 4.381671970278871e-06, + "loss": 0.1876, + "step": 100675 + }, + { + "epoch": 4.7, + "learning_rate": 4.380888185224085e-06, + "loss": 0.1019, + "step": 100680 + }, + { + "epoch": 4.7, + "learning_rate": 4.380104400169298e-06, + "loss": 0.0129, + "step": 100685 + }, + { + "epoch": 4.7, + "learning_rate": 4.379320615114511e-06, + "loss": 0.0639, + "step": 100690 + }, + { + "epoch": 4.7, + "learning_rate": 4.378536830059725e-06, + "loss": 0.036, + "step": 100695 + }, + { + "epoch": 4.7, + "learning_rate": 4.377753045004938e-06, + "loss": 0.0129, + "step": 100700 + }, + { + "epoch": 4.7, + "learning_rate": 4.376969259950152e-06, + "loss": 0.0503, + "step": 100705 + }, + { + "epoch": 4.7, + "learning_rate": 4.376185474895366e-06, + "loss": 0.1166, + "step": 100710 + }, + { + "epoch": 4.7, + "learning_rate": 4.375401689840578e-06, + "loss": 0.0856, + "step": 100715 + }, + { + "epoch": 4.7, + "learning_rate": 4.374617904785792e-06, + "loss": 0.1412, + "step": 100720 + }, + { + "epoch": 4.7, + "learning_rate": 4.373834119731005e-06, + "loss": 0.2159, + "step": 100725 + }, + { + "epoch": 4.7, + "learning_rate": 4.373050334676219e-06, + "loss": 0.1446, + "step": 100730 + }, + { + "epoch": 4.7, + "learning_rate": 4.372266549621433e-06, + "loss": 0.0103, + "step": 100735 + }, + { + "epoch": 4.7, + "learning_rate": 4.371482764566646e-06, + "loss": 0.0469, + "step": 100740 + }, + { + "epoch": 4.7, + "learning_rate": 4.37069897951186e-06, + "loss": 0.0531, + "step": 100745 + }, + { + "epoch": 4.7, + "learning_rate": 4.369915194457072e-06, + "loss": 0.0338, + "step": 100750 + }, + { + "epoch": 4.7, + "learning_rate": 4.369131409402286e-06, + "loss": 0.0647, + "step": 100755 + }, + { + "epoch": 4.7, + "learning_rate": 4.3683476243475e-06, + "loss": 0.0474, + "step": 100760 + }, + { + "epoch": 4.7, + "learning_rate": 4.367563839292713e-06, + "loss": 0.135, + "step": 100765 + }, + { + "epoch": 4.7, + "learning_rate": 4.366780054237927e-06, + "loss": 0.1614, + "step": 100770 + }, + { + "epoch": 4.7, + "learning_rate": 4.36599626918314e-06, + "loss": 0.4187, + "step": 100775 + }, + { + "epoch": 4.7, + "learning_rate": 4.365212484128353e-06, + "loss": 0.0599, + "step": 100780 + }, + { + "epoch": 4.7, + "learning_rate": 4.364428699073567e-06, + "loss": 0.0413, + "step": 100785 + }, + { + "epoch": 4.7, + "learning_rate": 4.36364491401878e-06, + "loss": 0.0776, + "step": 100790 + }, + { + "epoch": 4.7, + "learning_rate": 4.362861128963994e-06, + "loss": 0.1015, + "step": 100795 + }, + { + "epoch": 4.7, + "learning_rate": 4.362077343909207e-06, + "loss": 0.0761, + "step": 100800 + }, + { + "epoch": 4.7, + "learning_rate": 4.3612935588544206e-06, + "loss": 0.0833, + "step": 100805 + }, + { + "epoch": 4.7, + "learning_rate": 4.360509773799634e-06, + "loss": 0.0517, + "step": 100810 + }, + { + "epoch": 4.7, + "learning_rate": 4.359725988744847e-06, + "loss": 0.1294, + "step": 100815 + }, + { + "epoch": 4.7, + "learning_rate": 4.3589422036900606e-06, + "loss": 0.1531, + "step": 100820 + }, + { + "epoch": 4.7, + "learning_rate": 4.358158418635274e-06, + "loss": 0.2448, + "step": 100825 + }, + { + "epoch": 4.7, + "learning_rate": 4.3573746335804875e-06, + "loss": 0.071, + "step": 100830 + }, + { + "epoch": 4.71, + "learning_rate": 4.356590848525701e-06, + "loss": 0.0241, + "step": 100835 + }, + { + "epoch": 4.71, + "learning_rate": 4.3558070634709145e-06, + "loss": 0.0162, + "step": 100840 + }, + { + "epoch": 4.71, + "learning_rate": 4.3550232784161276e-06, + "loss": 0.0181, + "step": 100845 + }, + { + "epoch": 4.71, + "learning_rate": 4.354239493361341e-06, + "loss": 0.0426, + "step": 100850 + }, + { + "epoch": 4.71, + "learning_rate": 4.3534557083065545e-06, + "loss": 0.1073, + "step": 100855 + }, + { + "epoch": 4.71, + "learning_rate": 4.352671923251768e-06, + "loss": 0.1044, + "step": 100860 + }, + { + "epoch": 4.71, + "learning_rate": 4.3518881381969815e-06, + "loss": 0.1284, + "step": 100865 + }, + { + "epoch": 4.71, + "learning_rate": 4.3511043531421946e-06, + "loss": 0.2058, + "step": 100870 + }, + { + "epoch": 4.71, + "learning_rate": 4.3503205680874085e-06, + "loss": 0.3124, + "step": 100875 + }, + { + "epoch": 4.71, + "learning_rate": 4.3495367830326215e-06, + "loss": 0.1156, + "step": 100880 + }, + { + "epoch": 4.71, + "learning_rate": 4.3487529979778346e-06, + "loss": 0.0418, + "step": 100885 + }, + { + "epoch": 4.71, + "learning_rate": 4.3479692129230485e-06, + "loss": 0.0096, + "step": 100890 + }, + { + "epoch": 4.71, + "learning_rate": 4.3471854278682615e-06, + "loss": 0.036, + "step": 100895 + }, + { + "epoch": 4.71, + "learning_rate": 4.3464016428134754e-06, + "loss": 0.0427, + "step": 100900 + }, + { + "epoch": 4.71, + "learning_rate": 4.3456178577586885e-06, + "loss": 0.0462, + "step": 100905 + }, + { + "epoch": 4.71, + "learning_rate": 4.3448340727039016e-06, + "loss": 0.0838, + "step": 100910 + }, + { + "epoch": 4.71, + "learning_rate": 4.3440502876491155e-06, + "loss": 0.0962, + "step": 100915 + }, + { + "epoch": 4.71, + "learning_rate": 4.3432665025943285e-06, + "loss": 0.1006, + "step": 100920 + }, + { + "epoch": 4.71, + "learning_rate": 4.3424827175395424e-06, + "loss": 0.2515, + "step": 100925 + }, + { + "epoch": 4.71, + "learning_rate": 4.3416989324847555e-06, + "loss": 0.085, + "step": 100930 + }, + { + "epoch": 4.71, + "learning_rate": 4.340915147429969e-06, + "loss": 0.0464, + "step": 100935 + }, + { + "epoch": 4.71, + "learning_rate": 4.3401313623751825e-06, + "loss": 0.0113, + "step": 100940 + }, + { + "epoch": 4.71, + "learning_rate": 4.3393475773203955e-06, + "loss": 0.0565, + "step": 100945 + }, + { + "epoch": 4.71, + "learning_rate": 4.338563792265609e-06, + "loss": 0.0574, + "step": 100950 + }, + { + "epoch": 4.71, + "learning_rate": 4.3377800072108225e-06, + "loss": 0.0767, + "step": 100955 + }, + { + "epoch": 4.71, + "learning_rate": 4.336996222156036e-06, + "loss": 0.148, + "step": 100960 + }, + { + "epoch": 4.71, + "learning_rate": 4.3362124371012494e-06, + "loss": 0.0429, + "step": 100965 + }, + { + "epoch": 4.71, + "learning_rate": 4.335428652046463e-06, + "loss": 0.1585, + "step": 100970 + }, + { + "epoch": 4.71, + "learning_rate": 4.334644866991676e-06, + "loss": 0.3714, + "step": 100975 + }, + { + "epoch": 4.71, + "learning_rate": 4.3338610819368895e-06, + "loss": 0.1246, + "step": 100980 + }, + { + "epoch": 4.71, + "learning_rate": 4.333077296882103e-06, + "loss": 0.0045, + "step": 100985 + }, + { + "epoch": 4.71, + "learning_rate": 4.3322935118273164e-06, + "loss": 0.0094, + "step": 100990 + }, + { + "epoch": 4.71, + "learning_rate": 4.33150972677253e-06, + "loss": 0.1085, + "step": 100995 + }, + { + "epoch": 4.71, + "learning_rate": 4.330725941717744e-06, + "loss": 0.0222, + "step": 101000 + }, + { + "epoch": 4.71, + "learning_rate": 4.329942156662957e-06, + "loss": 0.0283, + "step": 101005 + }, + { + "epoch": 4.71, + "learning_rate": 4.32915837160817e-06, + "loss": 0.0531, + "step": 101010 + }, + { + "epoch": 4.71, + "learning_rate": 4.328374586553383e-06, + "loss": 0.1308, + "step": 101015 + }, + { + "epoch": 4.71, + "learning_rate": 4.327590801498597e-06, + "loss": 0.122, + "step": 101020 + }, + { + "epoch": 4.71, + "learning_rate": 4.326807016443811e-06, + "loss": 0.2022, + "step": 101025 + }, + { + "epoch": 4.71, + "learning_rate": 4.326023231389024e-06, + "loss": 0.0853, + "step": 101030 + }, + { + "epoch": 4.71, + "learning_rate": 4.325239446334238e-06, + "loss": 0.0304, + "step": 101035 + }, + { + "epoch": 4.71, + "learning_rate": 4.32445566127945e-06, + "loss": 0.0448, + "step": 101040 + }, + { + "epoch": 4.71, + "learning_rate": 4.323671876224664e-06, + "loss": 0.0421, + "step": 101045 + }, + { + "epoch": 4.72, + "learning_rate": 4.322888091169878e-06, + "loss": 0.0299, + "step": 101050 + }, + { + "epoch": 4.72, + "learning_rate": 4.322104306115091e-06, + "loss": 0.1093, + "step": 101055 + }, + { + "epoch": 4.72, + "learning_rate": 4.321320521060305e-06, + "loss": 0.1019, + "step": 101060 + }, + { + "epoch": 4.72, + "learning_rate": 4.320536736005518e-06, + "loss": 0.1684, + "step": 101065 + }, + { + "epoch": 4.72, + "learning_rate": 4.319752950950732e-06, + "loss": 0.1416, + "step": 101070 + }, + { + "epoch": 4.72, + "learning_rate": 4.318969165895945e-06, + "loss": 0.1617, + "step": 101075 + }, + { + "epoch": 4.72, + "learning_rate": 4.318185380841158e-06, + "loss": 0.0798, + "step": 101080 + }, + { + "epoch": 4.72, + "learning_rate": 4.317401595786372e-06, + "loss": 0.0269, + "step": 101085 + }, + { + "epoch": 4.72, + "learning_rate": 4.316617810731585e-06, + "loss": 0.0215, + "step": 101090 + }, + { + "epoch": 4.72, + "learning_rate": 4.315834025676799e-06, + "loss": 0.0319, + "step": 101095 + }, + { + "epoch": 4.72, + "learning_rate": 4.315050240622012e-06, + "loss": 0.1137, + "step": 101100 + }, + { + "epoch": 4.72, + "learning_rate": 4.314266455567225e-06, + "loss": 0.0468, + "step": 101105 + }, + { + "epoch": 4.72, + "learning_rate": 4.313482670512439e-06, + "loss": 0.1588, + "step": 101110 + }, + { + "epoch": 4.72, + "learning_rate": 4.312698885457652e-06, + "loss": 0.1657, + "step": 101115 + }, + { + "epoch": 4.72, + "learning_rate": 4.311915100402866e-06, + "loss": 0.1466, + "step": 101120 + }, + { + "epoch": 4.72, + "learning_rate": 4.311131315348079e-06, + "loss": 0.2507, + "step": 101125 + }, + { + "epoch": 4.72, + "learning_rate": 4.310347530293293e-06, + "loss": 0.1321, + "step": 101130 + }, + { + "epoch": 4.72, + "learning_rate": 4.309563745238506e-06, + "loss": 0.0487, + "step": 101135 + }, + { + "epoch": 4.72, + "learning_rate": 4.308779960183719e-06, + "loss": 0.0144, + "step": 101140 + }, + { + "epoch": 4.72, + "learning_rate": 4.307996175128933e-06, + "loss": 0.0329, + "step": 101145 + }, + { + "epoch": 4.72, + "learning_rate": 4.307212390074146e-06, + "loss": 0.0198, + "step": 101150 + }, + { + "epoch": 4.72, + "learning_rate": 4.30642860501936e-06, + "loss": 0.0716, + "step": 101155 + }, + { + "epoch": 4.72, + "learning_rate": 4.305644819964573e-06, + "loss": 0.0512, + "step": 101160 + }, + { + "epoch": 4.72, + "learning_rate": 4.304861034909787e-06, + "loss": 0.1013, + "step": 101165 + }, + { + "epoch": 4.72, + "learning_rate": 4.304077249855e-06, + "loss": 0.1608, + "step": 101170 + }, + { + "epoch": 4.72, + "learning_rate": 4.303293464800213e-06, + "loss": 0.2271, + "step": 101175 + }, + { + "epoch": 4.72, + "learning_rate": 4.302509679745427e-06, + "loss": 0.1106, + "step": 101180 + }, + { + "epoch": 4.72, + "learning_rate": 4.30172589469064e-06, + "loss": 0.0343, + "step": 101185 + }, + { + "epoch": 4.72, + "learning_rate": 4.300942109635854e-06, + "loss": 0.0277, + "step": 101190 + }, + { + "epoch": 4.72, + "learning_rate": 4.300158324581067e-06, + "loss": 0.0313, + "step": 101195 + }, + { + "epoch": 4.72, + "learning_rate": 4.299374539526281e-06, + "loss": 0.0511, + "step": 101200 + }, + { + "epoch": 4.72, + "learning_rate": 4.298590754471494e-06, + "loss": 0.0606, + "step": 101205 + }, + { + "epoch": 4.72, + "learning_rate": 4.297806969416707e-06, + "loss": 0.0386, + "step": 101210 + }, + { + "epoch": 4.72, + "learning_rate": 4.297023184361921e-06, + "loss": 0.0875, + "step": 101215 + }, + { + "epoch": 4.72, + "learning_rate": 4.296239399307134e-06, + "loss": 0.1837, + "step": 101220 + }, + { + "epoch": 4.72, + "learning_rate": 4.295455614252348e-06, + "loss": 0.3151, + "step": 101225 + }, + { + "epoch": 4.72, + "learning_rate": 4.294671829197561e-06, + "loss": 0.1033, + "step": 101230 + }, + { + "epoch": 4.72, + "learning_rate": 4.293888044142774e-06, + "loss": 0.0009, + "step": 101235 + }, + { + "epoch": 4.72, + "learning_rate": 4.293104259087988e-06, + "loss": 0.0203, + "step": 101240 + }, + { + "epoch": 4.72, + "learning_rate": 4.292320474033201e-06, + "loss": 0.0396, + "step": 101245 + }, + { + "epoch": 4.72, + "learning_rate": 4.291536688978415e-06, + "loss": 0.0467, + "step": 101250 + }, + { + "epoch": 4.72, + "learning_rate": 4.290752903923628e-06, + "loss": 0.0414, + "step": 101255 + }, + { + "epoch": 4.72, + "learning_rate": 4.289969118868842e-06, + "loss": 0.0745, + "step": 101260 + }, + { + "epoch": 4.73, + "learning_rate": 4.289185333814056e-06, + "loss": 0.1038, + "step": 101265 + }, + { + "epoch": 4.73, + "learning_rate": 4.288401548759268e-06, + "loss": 0.2039, + "step": 101270 + }, + { + "epoch": 4.73, + "learning_rate": 4.287617763704482e-06, + "loss": 0.2246, + "step": 101275 + }, + { + "epoch": 4.73, + "learning_rate": 4.286833978649695e-06, + "loss": 0.1132, + "step": 101280 + }, + { + "epoch": 4.73, + "learning_rate": 4.286050193594909e-06, + "loss": 0.0273, + "step": 101285 + }, + { + "epoch": 4.73, + "learning_rate": 4.285266408540123e-06, + "loss": 0.0254, + "step": 101290 + }, + { + "epoch": 4.73, + "learning_rate": 4.284482623485336e-06, + "loss": 0.0892, + "step": 101295 + }, + { + "epoch": 4.73, + "learning_rate": 4.283698838430549e-06, + "loss": 0.0356, + "step": 101300 + }, + { + "epoch": 4.73, + "learning_rate": 4.282915053375762e-06, + "loss": 0.0393, + "step": 101305 + }, + { + "epoch": 4.73, + "learning_rate": 4.282131268320976e-06, + "loss": 0.08, + "step": 101310 + }, + { + "epoch": 4.73, + "learning_rate": 4.28134748326619e-06, + "loss": 0.1312, + "step": 101315 + }, + { + "epoch": 4.73, + "learning_rate": 4.280563698211403e-06, + "loss": 0.213, + "step": 101320 + }, + { + "epoch": 4.73, + "learning_rate": 4.279779913156617e-06, + "loss": 0.2586, + "step": 101325 + }, + { + "epoch": 4.73, + "learning_rate": 4.27899612810183e-06, + "loss": 0.0852, + "step": 101330 + }, + { + "epoch": 4.73, + "learning_rate": 4.278212343047043e-06, + "loss": 0.0176, + "step": 101335 + }, + { + "epoch": 4.73, + "learning_rate": 4.277428557992257e-06, + "loss": 0.0589, + "step": 101340 + }, + { + "epoch": 4.73, + "learning_rate": 4.27664477293747e-06, + "loss": 0.0514, + "step": 101345 + }, + { + "epoch": 4.73, + "learning_rate": 4.275860987882684e-06, + "loss": 0.0651, + "step": 101350 + }, + { + "epoch": 4.73, + "learning_rate": 4.275077202827897e-06, + "loss": 0.0596, + "step": 101355 + }, + { + "epoch": 4.73, + "learning_rate": 4.274293417773111e-06, + "loss": 0.1311, + "step": 101360 + }, + { + "epoch": 4.73, + "learning_rate": 4.273509632718324e-06, + "loss": 0.0907, + "step": 101365 + }, + { + "epoch": 4.73, + "learning_rate": 4.272725847663537e-06, + "loss": 0.0822, + "step": 101370 + }, + { + "epoch": 4.73, + "learning_rate": 4.271942062608751e-06, + "loss": 0.3338, + "step": 101375 + }, + { + "epoch": 4.73, + "learning_rate": 4.271158277553964e-06, + "loss": 0.0661, + "step": 101380 + }, + { + "epoch": 4.73, + "learning_rate": 4.270374492499178e-06, + "loss": 0.0185, + "step": 101385 + }, + { + "epoch": 4.73, + "learning_rate": 4.269590707444391e-06, + "loss": 0.0155, + "step": 101390 + }, + { + "epoch": 4.73, + "learning_rate": 4.268806922389605e-06, + "loss": 0.0548, + "step": 101395 + }, + { + "epoch": 4.73, + "learning_rate": 4.268023137334818e-06, + "loss": 0.112, + "step": 101400 + }, + { + "epoch": 4.73, + "learning_rate": 4.267239352280031e-06, + "loss": 0.0483, + "step": 101405 + }, + { + "epoch": 4.73, + "learning_rate": 4.266455567225245e-06, + "loss": 0.0991, + "step": 101410 + }, + { + "epoch": 4.73, + "learning_rate": 4.265671782170458e-06, + "loss": 0.0935, + "step": 101415 + }, + { + "epoch": 4.73, + "learning_rate": 4.264887997115672e-06, + "loss": 0.1664, + "step": 101420 + }, + { + "epoch": 4.73, + "learning_rate": 4.264104212060885e-06, + "loss": 0.3062, + "step": 101425 + }, + { + "epoch": 4.73, + "learning_rate": 4.263320427006098e-06, + "loss": 0.0956, + "step": 101430 + }, + { + "epoch": 4.73, + "learning_rate": 4.262536641951312e-06, + "loss": 0.0355, + "step": 101435 + }, + { + "epoch": 4.73, + "learning_rate": 4.261752856896525e-06, + "loss": 0.0291, + "step": 101440 + }, + { + "epoch": 4.73, + "learning_rate": 4.260969071841739e-06, + "loss": 0.0411, + "step": 101445 + }, + { + "epoch": 4.73, + "learning_rate": 4.260185286786952e-06, + "loss": 0.0863, + "step": 101450 + }, + { + "epoch": 4.73, + "learning_rate": 4.259401501732166e-06, + "loss": 0.128, + "step": 101455 + }, + { + "epoch": 4.73, + "learning_rate": 4.258617716677379e-06, + "loss": 0.0704, + "step": 101460 + }, + { + "epoch": 4.73, + "learning_rate": 4.257833931622592e-06, + "loss": 0.0494, + "step": 101465 + }, + { + "epoch": 4.73, + "learning_rate": 4.257050146567806e-06, + "loss": 0.1606, + "step": 101470 + }, + { + "epoch": 4.73, + "learning_rate": 4.256266361513019e-06, + "loss": 0.4529, + "step": 101475 + }, + { + "epoch": 4.74, + "learning_rate": 4.255482576458233e-06, + "loss": 0.075, + "step": 101480 + }, + { + "epoch": 4.74, + "learning_rate": 4.254698791403446e-06, + "loss": 0.0068, + "step": 101485 + }, + { + "epoch": 4.74, + "learning_rate": 4.2539150063486596e-06, + "loss": 0.3438, + "step": 101490 + }, + { + "epoch": 4.74, + "learning_rate": 4.253131221293873e-06, + "loss": 0.0544, + "step": 101495 + }, + { + "epoch": 4.74, + "learning_rate": 4.252347436239086e-06, + "loss": 0.0778, + "step": 101500 + }, + { + "epoch": 4.74, + "learning_rate": 4.2515636511843e-06, + "loss": 0.0615, + "step": 101505 + }, + { + "epoch": 4.74, + "learning_rate": 4.250779866129513e-06, + "loss": 0.0813, + "step": 101510 + }, + { + "epoch": 4.74, + "learning_rate": 4.2499960810747266e-06, + "loss": 0.0782, + "step": 101515 + }, + { + "epoch": 4.74, + "learning_rate": 4.24921229601994e-06, + "loss": 0.1386, + "step": 101520 + }, + { + "epoch": 4.74, + "learning_rate": 4.2484285109651535e-06, + "loss": 0.1643, + "step": 101525 + }, + { + "epoch": 4.74, + "learning_rate": 4.247644725910367e-06, + "loss": 0.0699, + "step": 101530 + }, + { + "epoch": 4.74, + "learning_rate": 4.24686094085558e-06, + "loss": 0.001, + "step": 101535 + }, + { + "epoch": 4.74, + "learning_rate": 4.2460771558007935e-06, + "loss": 0.0392, + "step": 101540 + }, + { + "epoch": 4.74, + "learning_rate": 4.245293370746007e-06, + "loss": 0.037, + "step": 101545 + }, + { + "epoch": 4.74, + "learning_rate": 4.2445095856912205e-06, + "loss": 0.0647, + "step": 101550 + }, + { + "epoch": 4.74, + "learning_rate": 4.243725800636434e-06, + "loss": 0.1063, + "step": 101555 + }, + { + "epoch": 4.74, + "learning_rate": 4.242942015581647e-06, + "loss": 0.101, + "step": 101560 + }, + { + "epoch": 4.74, + "learning_rate": 4.2421582305268605e-06, + "loss": 0.0777, + "step": 101565 + }, + { + "epoch": 4.74, + "learning_rate": 4.241374445472074e-06, + "loss": 0.2107, + "step": 101570 + }, + { + "epoch": 4.74, + "learning_rate": 4.2405906604172875e-06, + "loss": 0.3052, + "step": 101575 + }, + { + "epoch": 4.74, + "learning_rate": 4.239806875362501e-06, + "loss": 0.1206, + "step": 101580 + }, + { + "epoch": 4.74, + "learning_rate": 4.2390230903077145e-06, + "loss": 0.0267, + "step": 101585 + }, + { + "epoch": 4.74, + "learning_rate": 4.238239305252928e-06, + "loss": 0.0488, + "step": 101590 + }, + { + "epoch": 4.74, + "learning_rate": 4.237455520198141e-06, + "loss": 0.0565, + "step": 101595 + }, + { + "epoch": 4.74, + "learning_rate": 4.2366717351433545e-06, + "loss": 0.078, + "step": 101600 + }, + { + "epoch": 4.74, + "learning_rate": 4.235887950088568e-06, + "loss": 0.0374, + "step": 101605 + }, + { + "epoch": 4.74, + "learning_rate": 4.2351041650337815e-06, + "loss": 0.0958, + "step": 101610 + }, + { + "epoch": 4.74, + "learning_rate": 4.234320379978995e-06, + "loss": 0.1608, + "step": 101615 + }, + { + "epoch": 4.74, + "learning_rate": 4.233536594924208e-06, + "loss": 0.2207, + "step": 101620 + }, + { + "epoch": 4.74, + "learning_rate": 4.2327528098694215e-06, + "loss": 0.3258, + "step": 101625 + }, + { + "epoch": 4.74, + "learning_rate": 4.231969024814635e-06, + "loss": 0.0515, + "step": 101630 + }, + { + "epoch": 4.74, + "learning_rate": 4.2311852397598484e-06, + "loss": 0.0243, + "step": 101635 + }, + { + "epoch": 4.74, + "learning_rate": 4.230401454705062e-06, + "loss": 0.0392, + "step": 101640 + }, + { + "epoch": 4.74, + "learning_rate": 4.229617669650275e-06, + "loss": 0.012, + "step": 101645 + }, + { + "epoch": 4.74, + "learning_rate": 4.228833884595489e-06, + "loss": 0.0937, + "step": 101650 + }, + { + "epoch": 4.74, + "learning_rate": 4.228050099540702e-06, + "loss": 0.0467, + "step": 101655 + }, + { + "epoch": 4.74, + "learning_rate": 4.2272663144859154e-06, + "loss": 0.1344, + "step": 101660 + }, + { + "epoch": 4.74, + "learning_rate": 4.226482529431129e-06, + "loss": 0.0932, + "step": 101665 + }, + { + "epoch": 4.74, + "learning_rate": 4.225698744376342e-06, + "loss": 0.2049, + "step": 101670 + }, + { + "epoch": 4.74, + "learning_rate": 4.224914959321556e-06, + "loss": 0.2129, + "step": 101675 + }, + { + "epoch": 4.74, + "learning_rate": 4.224131174266769e-06, + "loss": 0.1162, + "step": 101680 + }, + { + "epoch": 4.74, + "learning_rate": 4.223347389211983e-06, + "loss": 0.0149, + "step": 101685 + }, + { + "epoch": 4.74, + "learning_rate": 4.222563604157196e-06, + "loss": 0.0502, + "step": 101690 + }, + { + "epoch": 4.75, + "learning_rate": 4.221779819102409e-06, + "loss": 0.0724, + "step": 101695 + }, + { + "epoch": 4.75, + "learning_rate": 4.220996034047623e-06, + "loss": 0.0126, + "step": 101700 + }, + { + "epoch": 4.75, + "learning_rate": 4.220212248992836e-06, + "loss": 0.0404, + "step": 101705 + }, + { + "epoch": 4.75, + "learning_rate": 4.21942846393805e-06, + "loss": 0.0577, + "step": 101710 + }, + { + "epoch": 4.75, + "learning_rate": 4.218644678883263e-06, + "loss": 0.0525, + "step": 101715 + }, + { + "epoch": 4.75, + "learning_rate": 4.217860893828477e-06, + "loss": 0.1736, + "step": 101720 + }, + { + "epoch": 4.75, + "learning_rate": 4.21707710877369e-06, + "loss": 0.1916, + "step": 101725 + }, + { + "epoch": 4.75, + "learning_rate": 4.216293323718903e-06, + "loss": 0.1016, + "step": 101730 + }, + { + "epoch": 4.75, + "learning_rate": 4.215509538664117e-06, + "loss": 0.0209, + "step": 101735 + }, + { + "epoch": 4.75, + "learning_rate": 4.21472575360933e-06, + "loss": 0.0245, + "step": 101740 + }, + { + "epoch": 4.75, + "learning_rate": 4.213941968554544e-06, + "loss": 0.0573, + "step": 101745 + }, + { + "epoch": 4.75, + "learning_rate": 4.213158183499757e-06, + "loss": 0.0473, + "step": 101750 + }, + { + "epoch": 4.75, + "learning_rate": 4.21237439844497e-06, + "loss": 0.0722, + "step": 101755 + }, + { + "epoch": 4.75, + "learning_rate": 4.211590613390184e-06, + "loss": 0.0564, + "step": 101760 + }, + { + "epoch": 4.75, + "learning_rate": 4.210806828335397e-06, + "loss": 0.1332, + "step": 101765 + }, + { + "epoch": 4.75, + "learning_rate": 4.210023043280611e-06, + "loss": 0.0761, + "step": 101770 + }, + { + "epoch": 4.75, + "learning_rate": 4.209239258225824e-06, + "loss": 0.2551, + "step": 101775 + }, + { + "epoch": 4.75, + "learning_rate": 4.208455473171038e-06, + "loss": 0.0988, + "step": 101780 + }, + { + "epoch": 4.75, + "learning_rate": 4.207671688116251e-06, + "loss": 0.0175, + "step": 101785 + }, + { + "epoch": 4.75, + "learning_rate": 4.206887903061464e-06, + "loss": 0.0135, + "step": 101790 + }, + { + "epoch": 4.75, + "learning_rate": 4.206104118006678e-06, + "loss": 0.0462, + "step": 101795 + }, + { + "epoch": 4.75, + "learning_rate": 4.205320332951891e-06, + "loss": 0.0332, + "step": 101800 + }, + { + "epoch": 4.75, + "learning_rate": 4.204536547897105e-06, + "loss": 0.068, + "step": 101805 + }, + { + "epoch": 4.75, + "learning_rate": 4.203752762842318e-06, + "loss": 0.1954, + "step": 101810 + }, + { + "epoch": 4.75, + "learning_rate": 4.202968977787532e-06, + "loss": 0.1114, + "step": 101815 + }, + { + "epoch": 4.75, + "learning_rate": 4.202185192732745e-06, + "loss": 0.1719, + "step": 101820 + }, + { + "epoch": 4.75, + "learning_rate": 4.201401407677958e-06, + "loss": 0.2613, + "step": 101825 + }, + { + "epoch": 4.75, + "learning_rate": 4.200617622623172e-06, + "loss": 0.0916, + "step": 101830 + }, + { + "epoch": 4.75, + "learning_rate": 4.199833837568385e-06, + "loss": 0.0624, + "step": 101835 + }, + { + "epoch": 4.75, + "learning_rate": 4.199050052513599e-06, + "loss": 0.0462, + "step": 101840 + }, + { + "epoch": 4.75, + "learning_rate": 4.198266267458813e-06, + "loss": 0.0621, + "step": 101845 + }, + { + "epoch": 4.75, + "learning_rate": 4.197482482404026e-06, + "loss": 0.0872, + "step": 101850 + }, + { + "epoch": 4.75, + "learning_rate": 4.196698697349239e-06, + "loss": 0.0472, + "step": 101855 + }, + { + "epoch": 4.75, + "learning_rate": 4.195914912294452e-06, + "loss": 0.073, + "step": 101860 + }, + { + "epoch": 4.75, + "learning_rate": 4.195131127239666e-06, + "loss": 0.0993, + "step": 101865 + }, + { + "epoch": 4.75, + "learning_rate": 4.19434734218488e-06, + "loss": 0.1161, + "step": 101870 + }, + { + "epoch": 4.75, + "learning_rate": 4.193563557130093e-06, + "loss": 0.3303, + "step": 101875 + }, + { + "epoch": 4.75, + "learning_rate": 4.192779772075307e-06, + "loss": 0.0925, + "step": 101880 + }, + { + "epoch": 4.75, + "learning_rate": 4.191995987020519e-06, + "loss": 0.0127, + "step": 101885 + }, + { + "epoch": 4.75, + "learning_rate": 4.191212201965733e-06, + "loss": 0.0241, + "step": 101890 + }, + { + "epoch": 4.75, + "learning_rate": 4.190428416910947e-06, + "loss": 0.009, + "step": 101895 + }, + { + "epoch": 4.75, + "learning_rate": 4.18964463185616e-06, + "loss": 0.0566, + "step": 101900 + }, + { + "epoch": 4.76, + "learning_rate": 4.188860846801374e-06, + "loss": 0.037, + "step": 101905 + }, + { + "epoch": 4.76, + "learning_rate": 4.188077061746587e-06, + "loss": 0.0406, + "step": 101910 + }, + { + "epoch": 4.76, + "learning_rate": 4.187293276691801e-06, + "loss": 0.069, + "step": 101915 + }, + { + "epoch": 4.76, + "learning_rate": 4.186509491637014e-06, + "loss": 0.1592, + "step": 101920 + }, + { + "epoch": 4.76, + "learning_rate": 4.185725706582227e-06, + "loss": 0.3098, + "step": 101925 + }, + { + "epoch": 4.76, + "learning_rate": 4.184941921527441e-06, + "loss": 0.1094, + "step": 101930 + }, + { + "epoch": 4.76, + "learning_rate": 4.184158136472654e-06, + "loss": 0.0389, + "step": 101935 + }, + { + "epoch": 4.76, + "learning_rate": 4.183374351417868e-06, + "loss": 0.0345, + "step": 101940 + }, + { + "epoch": 4.76, + "learning_rate": 4.182590566363081e-06, + "loss": 0.0811, + "step": 101945 + }, + { + "epoch": 4.76, + "learning_rate": 4.181806781308294e-06, + "loss": 0.0345, + "step": 101950 + }, + { + "epoch": 4.76, + "learning_rate": 4.181022996253508e-06, + "loss": 0.0336, + "step": 101955 + }, + { + "epoch": 4.76, + "learning_rate": 4.180239211198721e-06, + "loss": 0.09, + "step": 101960 + }, + { + "epoch": 4.76, + "learning_rate": 4.179455426143935e-06, + "loss": 0.0354, + "step": 101965 + }, + { + "epoch": 4.76, + "learning_rate": 4.178671641089148e-06, + "loss": 0.157, + "step": 101970 + }, + { + "epoch": 4.76, + "learning_rate": 4.177887856034362e-06, + "loss": 0.2399, + "step": 101975 + }, + { + "epoch": 4.76, + "learning_rate": 4.177104070979575e-06, + "loss": 0.0876, + "step": 101980 + }, + { + "epoch": 4.76, + "learning_rate": 4.176320285924788e-06, + "loss": 0.0391, + "step": 101985 + }, + { + "epoch": 4.76, + "learning_rate": 4.175536500870002e-06, + "loss": 0.0371, + "step": 101990 + }, + { + "epoch": 4.76, + "learning_rate": 4.174752715815215e-06, + "loss": 0.0353, + "step": 101995 + }, + { + "epoch": 4.76, + "learning_rate": 4.173968930760429e-06, + "loss": 0.0632, + "step": 102000 + }, + { + "epoch": 4.76, + "learning_rate": 4.173185145705642e-06, + "loss": 0.0878, + "step": 102005 + }, + { + "epoch": 4.76, + "learning_rate": 4.172401360650856e-06, + "loss": 0.0843, + "step": 102010 + }, + { + "epoch": 4.76, + "learning_rate": 4.171617575596069e-06, + "loss": 0.0658, + "step": 102015 + }, + { + "epoch": 4.76, + "learning_rate": 4.170833790541282e-06, + "loss": 0.0957, + "step": 102020 + }, + { + "epoch": 4.76, + "learning_rate": 4.170050005486496e-06, + "loss": 0.1669, + "step": 102025 + }, + { + "epoch": 4.76, + "learning_rate": 4.169266220431709e-06, + "loss": 0.0512, + "step": 102030 + }, + { + "epoch": 4.76, + "learning_rate": 4.168482435376923e-06, + "loss": 0.0405, + "step": 102035 + }, + { + "epoch": 4.76, + "learning_rate": 4.167698650322136e-06, + "loss": 0.0266, + "step": 102040 + }, + { + "epoch": 4.76, + "learning_rate": 4.16691486526735e-06, + "loss": 0.0463, + "step": 102045 + }, + { + "epoch": 4.76, + "learning_rate": 4.166131080212563e-06, + "loss": 0.0861, + "step": 102050 + }, + { + "epoch": 4.76, + "learning_rate": 4.165347295157776e-06, + "loss": 0.0911, + "step": 102055 + }, + { + "epoch": 4.76, + "learning_rate": 4.16456351010299e-06, + "loss": 0.0714, + "step": 102060 + }, + { + "epoch": 4.76, + "learning_rate": 4.163779725048203e-06, + "loss": 0.0743, + "step": 102065 + }, + { + "epoch": 4.76, + "learning_rate": 4.162995939993417e-06, + "loss": 0.2581, + "step": 102070 + }, + { + "epoch": 4.76, + "learning_rate": 4.16221215493863e-06, + "loss": 0.3153, + "step": 102075 + }, + { + "epoch": 4.76, + "learning_rate": 4.161428369883843e-06, + "loss": 0.0794, + "step": 102080 + }, + { + "epoch": 4.76, + "learning_rate": 4.160644584829057e-06, + "loss": 0.0138, + "step": 102085 + }, + { + "epoch": 4.76, + "learning_rate": 4.15986079977427e-06, + "loss": 0.0238, + "step": 102090 + }, + { + "epoch": 4.76, + "learning_rate": 4.159077014719484e-06, + "loss": 0.0386, + "step": 102095 + }, + { + "epoch": 4.76, + "learning_rate": 4.158293229664697e-06, + "loss": 0.0344, + "step": 102100 + }, + { + "epoch": 4.76, + "learning_rate": 4.157509444609911e-06, + "loss": 0.0561, + "step": 102105 + }, + { + "epoch": 4.76, + "learning_rate": 4.156725659555125e-06, + "loss": 0.052, + "step": 102110 + }, + { + "epoch": 4.76, + "learning_rate": 4.155941874500337e-06, + "loss": 0.135, + "step": 102115 + }, + { + "epoch": 4.77, + "learning_rate": 4.155158089445551e-06, + "loss": 0.0471, + "step": 102120 + }, + { + "epoch": 4.77, + "learning_rate": 4.154374304390764e-06, + "loss": 0.2026, + "step": 102125 + }, + { + "epoch": 4.77, + "learning_rate": 4.153590519335978e-06, + "loss": 0.0847, + "step": 102130 + }, + { + "epoch": 4.77, + "learning_rate": 4.152806734281192e-06, + "loss": 0.0033, + "step": 102135 + }, + { + "epoch": 4.77, + "learning_rate": 4.152022949226405e-06, + "loss": 0.0082, + "step": 102140 + }, + { + "epoch": 4.77, + "learning_rate": 4.151239164171618e-06, + "loss": 0.0512, + "step": 102145 + }, + { + "epoch": 4.77, + "learning_rate": 4.150455379116831e-06, + "loss": 0.0657, + "step": 102150 + }, + { + "epoch": 4.77, + "learning_rate": 4.149671594062045e-06, + "loss": 0.0595, + "step": 102155 + }, + { + "epoch": 4.77, + "learning_rate": 4.1488878090072586e-06, + "loss": 0.1565, + "step": 102160 + }, + { + "epoch": 4.77, + "learning_rate": 4.148104023952472e-06, + "loss": 0.0536, + "step": 102165 + }, + { + "epoch": 4.77, + "learning_rate": 4.1473202388976855e-06, + "loss": 0.1117, + "step": 102170 + }, + { + "epoch": 4.77, + "learning_rate": 4.146536453842899e-06, + "loss": 0.2592, + "step": 102175 + }, + { + "epoch": 4.77, + "learning_rate": 4.145752668788112e-06, + "loss": 0.1001, + "step": 102180 + }, + { + "epoch": 4.77, + "learning_rate": 4.1449688837333256e-06, + "loss": 0.0175, + "step": 102185 + }, + { + "epoch": 4.77, + "learning_rate": 4.144185098678539e-06, + "loss": 0.0481, + "step": 102190 + }, + { + "epoch": 4.77, + "learning_rate": 4.1434013136237525e-06, + "loss": 0.1071, + "step": 102195 + }, + { + "epoch": 4.77, + "learning_rate": 4.142617528568966e-06, + "loss": 0.0326, + "step": 102200 + }, + { + "epoch": 4.77, + "learning_rate": 4.1418337435141795e-06, + "loss": 0.037, + "step": 102205 + }, + { + "epoch": 4.77, + "learning_rate": 4.1410499584593925e-06, + "loss": 0.1006, + "step": 102210 + }, + { + "epoch": 4.77, + "learning_rate": 4.140266173404606e-06, + "loss": 0.1192, + "step": 102215 + }, + { + "epoch": 4.77, + "learning_rate": 4.1394823883498195e-06, + "loss": 0.3034, + "step": 102220 + }, + { + "epoch": 4.77, + "learning_rate": 4.1386986032950326e-06, + "loss": 0.2872, + "step": 102225 + }, + { + "epoch": 4.77, + "learning_rate": 4.1379148182402465e-06, + "loss": 0.0789, + "step": 102230 + }, + { + "epoch": 4.77, + "learning_rate": 4.1371310331854595e-06, + "loss": 0.0159, + "step": 102235 + }, + { + "epoch": 4.77, + "learning_rate": 4.1363472481306734e-06, + "loss": 0.0175, + "step": 102240 + }, + { + "epoch": 4.77, + "learning_rate": 4.1355634630758865e-06, + "loss": 0.0292, + "step": 102245 + }, + { + "epoch": 4.77, + "learning_rate": 4.1347796780210996e-06, + "loss": 0.0747, + "step": 102250 + }, + { + "epoch": 4.77, + "learning_rate": 4.1339958929663135e-06, + "loss": 0.0835, + "step": 102255 + }, + { + "epoch": 4.77, + "learning_rate": 4.1332121079115265e-06, + "loss": 0.0788, + "step": 102260 + }, + { + "epoch": 4.77, + "learning_rate": 4.1324283228567404e-06, + "loss": 0.0587, + "step": 102265 + }, + { + "epoch": 4.77, + "learning_rate": 4.1316445378019535e-06, + "loss": 0.122, + "step": 102270 + }, + { + "epoch": 4.77, + "learning_rate": 4.1308607527471665e-06, + "loss": 0.3336, + "step": 102275 + }, + { + "epoch": 4.77, + "learning_rate": 4.1300769676923805e-06, + "loss": 0.0666, + "step": 102280 + }, + { + "epoch": 4.77, + "learning_rate": 4.1292931826375935e-06, + "loss": 0.0359, + "step": 102285 + }, + { + "epoch": 4.77, + "learning_rate": 4.128509397582807e-06, + "loss": 0.0399, + "step": 102290 + }, + { + "epoch": 4.77, + "learning_rate": 4.1277256125280205e-06, + "loss": 0.0313, + "step": 102295 + }, + { + "epoch": 4.77, + "learning_rate": 4.126941827473234e-06, + "loss": 0.0549, + "step": 102300 + }, + { + "epoch": 4.77, + "learning_rate": 4.1261580424184474e-06, + "loss": 0.0659, + "step": 102305 + }, + { + "epoch": 4.77, + "learning_rate": 4.1253742573636605e-06, + "loss": 0.0922, + "step": 102310 + }, + { + "epoch": 4.77, + "learning_rate": 4.124590472308874e-06, + "loss": 0.0788, + "step": 102315 + }, + { + "epoch": 4.77, + "learning_rate": 4.1238066872540875e-06, + "loss": 0.099, + "step": 102320 + }, + { + "epoch": 4.77, + "learning_rate": 4.123022902199301e-06, + "loss": 0.26, + "step": 102325 + }, + { + "epoch": 4.77, + "learning_rate": 4.1222391171445144e-06, + "loss": 0.0631, + "step": 102330 + }, + { + "epoch": 4.78, + "learning_rate": 4.121455332089728e-06, + "loss": 0.0161, + "step": 102335 + }, + { + "epoch": 4.78, + "learning_rate": 4.120671547034941e-06, + "loss": 0.0277, + "step": 102340 + }, + { + "epoch": 4.78, + "learning_rate": 4.1198877619801545e-06, + "loss": 0.0392, + "step": 102345 + }, + { + "epoch": 4.78, + "learning_rate": 4.119103976925368e-06, + "loss": 0.0096, + "step": 102350 + }, + { + "epoch": 4.78, + "learning_rate": 4.118320191870581e-06, + "loss": 0.0451, + "step": 102355 + }, + { + "epoch": 4.78, + "learning_rate": 4.117536406815795e-06, + "loss": 0.0639, + "step": 102360 + }, + { + "epoch": 4.78, + "learning_rate": 4.116752621761008e-06, + "loss": 0.0636, + "step": 102365 + }, + { + "epoch": 4.78, + "learning_rate": 4.115968836706222e-06, + "loss": 0.2446, + "step": 102370 + }, + { + "epoch": 4.78, + "learning_rate": 4.115185051651435e-06, + "loss": 0.2221, + "step": 102375 + }, + { + "epoch": 4.78, + "learning_rate": 4.114401266596648e-06, + "loss": 0.0824, + "step": 102380 + }, + { + "epoch": 4.78, + "learning_rate": 4.113617481541862e-06, + "loss": 0.0295, + "step": 102385 + }, + { + "epoch": 4.78, + "learning_rate": 4.112833696487075e-06, + "loss": 0.0207, + "step": 102390 + }, + { + "epoch": 4.78, + "learning_rate": 4.112049911432289e-06, + "loss": 0.0631, + "step": 102395 + }, + { + "epoch": 4.78, + "learning_rate": 4.111266126377503e-06, + "loss": 0.102, + "step": 102400 + }, + { + "epoch": 4.78, + "learning_rate": 4.110482341322715e-06, + "loss": 0.0641, + "step": 102405 + }, + { + "epoch": 4.78, + "learning_rate": 4.109698556267929e-06, + "loss": 0.0622, + "step": 102410 + }, + { + "epoch": 4.78, + "learning_rate": 4.108914771213142e-06, + "loss": 0.0673, + "step": 102415 + }, + { + "epoch": 4.78, + "learning_rate": 4.108130986158356e-06, + "loss": 0.131, + "step": 102420 + }, + { + "epoch": 4.78, + "learning_rate": 4.10734720110357e-06, + "loss": 0.2828, + "step": 102425 + }, + { + "epoch": 4.78, + "learning_rate": 4.106563416048783e-06, + "loss": 0.0347, + "step": 102430 + }, + { + "epoch": 4.78, + "learning_rate": 4.105779630993997e-06, + "loss": 0.0513, + "step": 102435 + }, + { + "epoch": 4.78, + "learning_rate": 4.104995845939209e-06, + "loss": 0.0369, + "step": 102440 + }, + { + "epoch": 4.78, + "learning_rate": 4.104212060884423e-06, + "loss": 0.0293, + "step": 102445 + }, + { + "epoch": 4.78, + "learning_rate": 4.103428275829637e-06, + "loss": 0.0855, + "step": 102450 + }, + { + "epoch": 4.78, + "learning_rate": 4.10264449077485e-06, + "loss": 0.0622, + "step": 102455 + }, + { + "epoch": 4.78, + "learning_rate": 4.101860705720064e-06, + "loss": 0.0408, + "step": 102460 + }, + { + "epoch": 4.78, + "learning_rate": 4.101076920665277e-06, + "loss": 0.0885, + "step": 102465 + }, + { + "epoch": 4.78, + "learning_rate": 4.10029313561049e-06, + "loss": 0.129, + "step": 102470 + }, + { + "epoch": 4.78, + "learning_rate": 4.099509350555704e-06, + "loss": 0.1714, + "step": 102475 + }, + { + "epoch": 4.78, + "learning_rate": 4.098725565500917e-06, + "loss": 0.0854, + "step": 102480 + }, + { + "epoch": 4.78, + "learning_rate": 4.097941780446131e-06, + "loss": 0.0391, + "step": 102485 + }, + { + "epoch": 4.78, + "learning_rate": 4.097157995391344e-06, + "loss": 0.005, + "step": 102490 + }, + { + "epoch": 4.78, + "learning_rate": 4.096374210336558e-06, + "loss": 0.103, + "step": 102495 + }, + { + "epoch": 4.78, + "learning_rate": 4.095590425281771e-06, + "loss": 0.0286, + "step": 102500 + }, + { + "epoch": 4.78, + "learning_rate": 4.094806640226984e-06, + "loss": 0.0226, + "step": 102505 + }, + { + "epoch": 4.78, + "learning_rate": 4.094022855172198e-06, + "loss": 0.0568, + "step": 102510 + }, + { + "epoch": 4.78, + "learning_rate": 4.093239070117411e-06, + "loss": 0.1216, + "step": 102515 + }, + { + "epoch": 4.78, + "learning_rate": 4.092455285062625e-06, + "loss": 0.2009, + "step": 102520 + }, + { + "epoch": 4.78, + "learning_rate": 4.091671500007838e-06, + "loss": 0.2164, + "step": 102525 + }, + { + "epoch": 4.78, + "learning_rate": 4.090887714953052e-06, + "loss": 0.0642, + "step": 102530 + }, + { + "epoch": 4.78, + "learning_rate": 4.090103929898265e-06, + "loss": 0.0415, + "step": 102535 + }, + { + "epoch": 4.78, + "learning_rate": 4.089320144843478e-06, + "loss": 0.0613, + "step": 102540 + }, + { + "epoch": 4.78, + "learning_rate": 4.088536359788692e-06, + "loss": 0.0675, + "step": 102545 + }, + { + "epoch": 4.79, + "learning_rate": 4.087752574733905e-06, + "loss": 0.1069, + "step": 102550 + }, + { + "epoch": 4.79, + "learning_rate": 4.086968789679119e-06, + "loss": 0.0902, + "step": 102555 + }, + { + "epoch": 4.79, + "learning_rate": 4.086185004624332e-06, + "loss": 0.0892, + "step": 102560 + }, + { + "epoch": 4.79, + "learning_rate": 4.085401219569546e-06, + "loss": 0.1369, + "step": 102565 + }, + { + "epoch": 4.79, + "learning_rate": 4.084617434514759e-06, + "loss": 0.137, + "step": 102570 + }, + { + "epoch": 4.79, + "learning_rate": 4.083833649459972e-06, + "loss": 0.389, + "step": 102575 + }, + { + "epoch": 4.79, + "learning_rate": 4.083049864405186e-06, + "loss": 0.0436, + "step": 102580 + }, + { + "epoch": 4.79, + "learning_rate": 4.082266079350399e-06, + "loss": 0.0341, + "step": 102585 + }, + { + "epoch": 4.79, + "learning_rate": 4.081482294295613e-06, + "loss": 0.0508, + "step": 102590 + }, + { + "epoch": 4.79, + "learning_rate": 4.080698509240826e-06, + "loss": 0.0604, + "step": 102595 + }, + { + "epoch": 4.79, + "learning_rate": 4.079914724186039e-06, + "loss": 0.0976, + "step": 102600 + }, + { + "epoch": 4.79, + "learning_rate": 4.079130939131253e-06, + "loss": 0.1002, + "step": 102605 + }, + { + "epoch": 4.79, + "learning_rate": 4.078347154076466e-06, + "loss": 0.0617, + "step": 102610 + }, + { + "epoch": 4.79, + "learning_rate": 4.07756336902168e-06, + "loss": 0.0887, + "step": 102615 + }, + { + "epoch": 4.79, + "learning_rate": 4.076779583966893e-06, + "loss": 0.115, + "step": 102620 + }, + { + "epoch": 4.79, + "learning_rate": 4.075995798912107e-06, + "loss": 0.3442, + "step": 102625 + }, + { + "epoch": 4.79, + "learning_rate": 4.07521201385732e-06, + "loss": 0.0787, + "step": 102630 + }, + { + "epoch": 4.79, + "learning_rate": 4.074428228802533e-06, + "loss": 0.0072, + "step": 102635 + }, + { + "epoch": 4.79, + "learning_rate": 4.073644443747747e-06, + "loss": 0.0509, + "step": 102640 + }, + { + "epoch": 4.79, + "learning_rate": 4.07286065869296e-06, + "loss": 0.0265, + "step": 102645 + }, + { + "epoch": 4.79, + "learning_rate": 4.072076873638174e-06, + "loss": 0.065, + "step": 102650 + }, + { + "epoch": 4.79, + "learning_rate": 4.071293088583387e-06, + "loss": 0.044, + "step": 102655 + }, + { + "epoch": 4.79, + "learning_rate": 4.070509303528601e-06, + "loss": 0.1027, + "step": 102660 + }, + { + "epoch": 4.79, + "learning_rate": 4.069725518473814e-06, + "loss": 0.0917, + "step": 102665 + }, + { + "epoch": 4.79, + "learning_rate": 4.068941733419027e-06, + "loss": 0.1766, + "step": 102670 + }, + { + "epoch": 4.79, + "learning_rate": 4.068157948364241e-06, + "loss": 0.2478, + "step": 102675 + }, + { + "epoch": 4.79, + "learning_rate": 4.067374163309454e-06, + "loss": 0.1261, + "step": 102680 + }, + { + "epoch": 4.79, + "learning_rate": 4.066590378254668e-06, + "loss": 0.0209, + "step": 102685 + }, + { + "epoch": 4.79, + "learning_rate": 4.065806593199882e-06, + "loss": 0.0438, + "step": 102690 + }, + { + "epoch": 4.79, + "learning_rate": 4.065022808145095e-06, + "loss": 0.0216, + "step": 102695 + }, + { + "epoch": 4.79, + "learning_rate": 4.064239023090308e-06, + "loss": 0.0541, + "step": 102700 + }, + { + "epoch": 4.79, + "learning_rate": 4.063455238035521e-06, + "loss": 0.0367, + "step": 102705 + }, + { + "epoch": 4.79, + "learning_rate": 4.062671452980735e-06, + "loss": 0.1052, + "step": 102710 + }, + { + "epoch": 4.79, + "learning_rate": 4.061887667925949e-06, + "loss": 0.1206, + "step": 102715 + }, + { + "epoch": 4.79, + "learning_rate": 4.061103882871162e-06, + "loss": 0.1606, + "step": 102720 + }, + { + "epoch": 4.79, + "learning_rate": 4.060320097816376e-06, + "loss": 0.347, + "step": 102725 + }, + { + "epoch": 4.79, + "learning_rate": 4.059536312761588e-06, + "loss": 0.0909, + "step": 102730 + }, + { + "epoch": 4.79, + "learning_rate": 4.058752527706802e-06, + "loss": 0.046, + "step": 102735 + }, + { + "epoch": 4.79, + "learning_rate": 4.057968742652016e-06, + "loss": 0.0274, + "step": 102740 + }, + { + "epoch": 4.79, + "learning_rate": 4.057184957597229e-06, + "loss": 0.0535, + "step": 102745 + }, + { + "epoch": 4.79, + "learning_rate": 4.056401172542443e-06, + "loss": 0.0331, + "step": 102750 + }, + { + "epoch": 4.79, + "learning_rate": 4.055617387487656e-06, + "loss": 0.1107, + "step": 102755 + }, + { + "epoch": 4.79, + "learning_rate": 4.05483360243287e-06, + "loss": 0.0586, + "step": 102760 + }, + { + "epoch": 4.8, + "learning_rate": 4.054049817378083e-06, + "loss": 0.1788, + "step": 102765 + }, + { + "epoch": 4.8, + "learning_rate": 4.053266032323296e-06, + "loss": 0.1341, + "step": 102770 + }, + { + "epoch": 4.8, + "learning_rate": 4.05248224726851e-06, + "loss": 0.2509, + "step": 102775 + }, + { + "epoch": 4.8, + "learning_rate": 4.051698462213723e-06, + "loss": 0.1412, + "step": 102780 + }, + { + "epoch": 4.8, + "learning_rate": 4.050914677158937e-06, + "loss": 0.0198, + "step": 102785 + }, + { + "epoch": 4.8, + "learning_rate": 4.05013089210415e-06, + "loss": 0.0228, + "step": 102790 + }, + { + "epoch": 4.8, + "learning_rate": 4.049347107049363e-06, + "loss": 0.0901, + "step": 102795 + }, + { + "epoch": 4.8, + "learning_rate": 4.048563321994577e-06, + "loss": 0.0139, + "step": 102800 + }, + { + "epoch": 4.8, + "learning_rate": 4.04777953693979e-06, + "loss": 0.0793, + "step": 102805 + }, + { + "epoch": 4.8, + "learning_rate": 4.046995751885004e-06, + "loss": 0.0542, + "step": 102810 + }, + { + "epoch": 4.8, + "learning_rate": 4.046211966830217e-06, + "loss": 0.0913, + "step": 102815 + }, + { + "epoch": 4.8, + "learning_rate": 4.045428181775431e-06, + "loss": 0.1549, + "step": 102820 + }, + { + "epoch": 4.8, + "learning_rate": 4.044644396720644e-06, + "loss": 0.4346, + "step": 102825 + }, + { + "epoch": 4.8, + "learning_rate": 4.043860611665857e-06, + "loss": 0.0802, + "step": 102830 + }, + { + "epoch": 4.8, + "learning_rate": 4.043076826611071e-06, + "loss": 0.0731, + "step": 102835 + }, + { + "epoch": 4.8, + "learning_rate": 4.042293041556284e-06, + "loss": 0.0238, + "step": 102840 + }, + { + "epoch": 4.8, + "learning_rate": 4.041509256501498e-06, + "loss": 0.021, + "step": 102845 + }, + { + "epoch": 4.8, + "learning_rate": 4.040725471446711e-06, + "loss": 0.0786, + "step": 102850 + }, + { + "epoch": 4.8, + "learning_rate": 4.0399416863919246e-06, + "loss": 0.0648, + "step": 102855 + }, + { + "epoch": 4.8, + "learning_rate": 4.039157901337138e-06, + "loss": 0.0493, + "step": 102860 + }, + { + "epoch": 4.8, + "learning_rate": 4.038374116282351e-06, + "loss": 0.0754, + "step": 102865 + }, + { + "epoch": 4.8, + "learning_rate": 4.037590331227565e-06, + "loss": 0.123, + "step": 102870 + }, + { + "epoch": 4.8, + "learning_rate": 4.036806546172778e-06, + "loss": 0.3846, + "step": 102875 + }, + { + "epoch": 4.8, + "learning_rate": 4.0360227611179915e-06, + "loss": 0.1, + "step": 102880 + }, + { + "epoch": 4.8, + "learning_rate": 4.035238976063205e-06, + "loss": 0.0153, + "step": 102885 + }, + { + "epoch": 4.8, + "learning_rate": 4.0344551910084185e-06, + "loss": 0.0395, + "step": 102890 + }, + { + "epoch": 4.8, + "learning_rate": 4.0336714059536316e-06, + "loss": 0.0692, + "step": 102895 + }, + { + "epoch": 4.8, + "learning_rate": 4.032887620898845e-06, + "loss": 0.0354, + "step": 102900 + }, + { + "epoch": 4.8, + "learning_rate": 4.0321038358440585e-06, + "loss": 0.0385, + "step": 102905 + }, + { + "epoch": 4.8, + "learning_rate": 4.031320050789272e-06, + "loss": 0.137, + "step": 102910 + }, + { + "epoch": 4.8, + "learning_rate": 4.0305362657344855e-06, + "loss": 0.0659, + "step": 102915 + }, + { + "epoch": 4.8, + "learning_rate": 4.0297524806796986e-06, + "loss": 0.1472, + "step": 102920 + }, + { + "epoch": 4.8, + "learning_rate": 4.028968695624912e-06, + "loss": 0.2286, + "step": 102925 + }, + { + "epoch": 4.8, + "learning_rate": 4.0281849105701255e-06, + "loss": 0.0944, + "step": 102930 + }, + { + "epoch": 4.8, + "learning_rate": 4.027401125515339e-06, + "loss": 0.0156, + "step": 102935 + }, + { + "epoch": 4.8, + "learning_rate": 4.02677409747151e-06, + "loss": 0.0433, + "step": 102940 + }, + { + "epoch": 4.8, + "learning_rate": 4.025990312416723e-06, + "loss": 0.0392, + "step": 102945 + }, + { + "epoch": 4.8, + "learning_rate": 4.025206527361937e-06, + "loss": 0.104, + "step": 102950 + }, + { + "epoch": 4.8, + "learning_rate": 4.02442274230715e-06, + "loss": 0.0597, + "step": 102955 + }, + { + "epoch": 4.8, + "learning_rate": 4.023638957252363e-06, + "loss": 0.0833, + "step": 102960 + }, + { + "epoch": 4.8, + "learning_rate": 4.022855172197577e-06, + "loss": 0.0934, + "step": 102965 + }, + { + "epoch": 4.8, + "learning_rate": 4.02207138714279e-06, + "loss": 0.1016, + "step": 102970 + }, + { + "epoch": 4.8, + "learning_rate": 4.021287602088004e-06, + "loss": 0.1621, + "step": 102975 + }, + { + "epoch": 4.81, + "learning_rate": 4.020503817033217e-06, + "loss": 0.1038, + "step": 102980 + }, + { + "epoch": 4.81, + "learning_rate": 4.019720031978431e-06, + "loss": 0.0445, + "step": 102985 + }, + { + "epoch": 4.81, + "learning_rate": 4.018936246923644e-06, + "loss": 0.0221, + "step": 102990 + }, + { + "epoch": 4.81, + "learning_rate": 4.018152461868857e-06, + "loss": 0.013, + "step": 102995 + }, + { + "epoch": 4.81, + "learning_rate": 4.017368676814071e-06, + "loss": 0.0599, + "step": 103000 + }, + { + "epoch": 4.81, + "learning_rate": 4.016584891759284e-06, + "loss": 0.0751, + "step": 103005 + }, + { + "epoch": 4.81, + "learning_rate": 4.015801106704498e-06, + "loss": 0.0242, + "step": 103010 + }, + { + "epoch": 4.81, + "learning_rate": 4.015017321649711e-06, + "loss": 0.0305, + "step": 103015 + }, + { + "epoch": 4.81, + "learning_rate": 4.014233536594925e-06, + "loss": 0.0854, + "step": 103020 + }, + { + "epoch": 4.81, + "learning_rate": 4.013449751540138e-06, + "loss": 0.2494, + "step": 103025 + }, + { + "epoch": 4.81, + "learning_rate": 4.012665966485351e-06, + "loss": 0.101, + "step": 103030 + }, + { + "epoch": 4.81, + "learning_rate": 4.011882181430565e-06, + "loss": 0.015, + "step": 103035 + }, + { + "epoch": 4.81, + "learning_rate": 4.011098396375778e-06, + "loss": 0.0303, + "step": 103040 + }, + { + "epoch": 4.81, + "learning_rate": 4.010314611320992e-06, + "loss": 0.0303, + "step": 103045 + }, + { + "epoch": 4.81, + "learning_rate": 4.009530826266206e-06, + "loss": 0.0611, + "step": 103050 + }, + { + "epoch": 4.81, + "learning_rate": 4.008747041211419e-06, + "loss": 0.0483, + "step": 103055 + }, + { + "epoch": 4.81, + "learning_rate": 4.007963256156632e-06, + "loss": 0.0349, + "step": 103060 + }, + { + "epoch": 4.81, + "learning_rate": 4.007179471101846e-06, + "loss": 0.0967, + "step": 103065 + }, + { + "epoch": 4.81, + "learning_rate": 4.006395686047059e-06, + "loss": 0.1971, + "step": 103070 + }, + { + "epoch": 4.81, + "learning_rate": 4.005611900992273e-06, + "loss": 0.304, + "step": 103075 + }, + { + "epoch": 4.81, + "learning_rate": 4.004828115937486e-06, + "loss": 0.0572, + "step": 103080 + }, + { + "epoch": 4.81, + "learning_rate": 4.0040443308827e-06, + "loss": 0.0324, + "step": 103085 + }, + { + "epoch": 4.81, + "learning_rate": 4.003260545827913e-06, + "loss": 0.0645, + "step": 103090 + }, + { + "epoch": 4.81, + "learning_rate": 4.002476760773126e-06, + "loss": 0.0743, + "step": 103095 + }, + { + "epoch": 4.81, + "learning_rate": 4.00169297571834e-06, + "loss": 0.0575, + "step": 103100 + }, + { + "epoch": 4.81, + "learning_rate": 4.000909190663553e-06, + "loss": 0.058, + "step": 103105 + }, + { + "epoch": 4.81, + "learning_rate": 4.000125405608767e-06, + "loss": 0.0634, + "step": 103110 + }, + { + "epoch": 4.81, + "learning_rate": 3.99934162055398e-06, + "loss": 0.1741, + "step": 103115 + }, + { + "epoch": 4.81, + "learning_rate": 3.9985578354991935e-06, + "loss": 0.2418, + "step": 103120 + }, + { + "epoch": 4.81, + "learning_rate": 3.997774050444407e-06, + "loss": 0.2416, + "step": 103125 + }, + { + "epoch": 4.81, + "learning_rate": 3.99699026538962e-06, + "loss": 0.0668, + "step": 103130 + }, + { + "epoch": 4.81, + "learning_rate": 3.9962064803348336e-06, + "loss": 0.0078, + "step": 103135 + }, + { + "epoch": 4.81, + "learning_rate": 3.995422695280047e-06, + "loss": 0.0146, + "step": 103140 + }, + { + "epoch": 4.81, + "learning_rate": 3.9946389102252605e-06, + "loss": 0.0574, + "step": 103145 + }, + { + "epoch": 4.81, + "learning_rate": 3.993855125170474e-06, + "loss": 0.0532, + "step": 103150 + }, + { + "epoch": 4.81, + "learning_rate": 3.993071340115687e-06, + "loss": 0.1054, + "step": 103155 + }, + { + "epoch": 4.81, + "learning_rate": 3.9922875550609006e-06, + "loss": 0.0364, + "step": 103160 + }, + { + "epoch": 4.81, + "learning_rate": 3.991503770006114e-06, + "loss": 0.1254, + "step": 103165 + }, + { + "epoch": 4.81, + "learning_rate": 3.9907199849513275e-06, + "loss": 0.1064, + "step": 103170 + }, + { + "epoch": 4.81, + "learning_rate": 3.9899361998965406e-06, + "loss": 0.1906, + "step": 103175 + }, + { + "epoch": 4.81, + "learning_rate": 3.9891524148417545e-06, + "loss": 0.1021, + "step": 103180 + }, + { + "epoch": 4.81, + "learning_rate": 3.9883686297869675e-06, + "loss": 0.0421, + "step": 103185 + }, + { + "epoch": 4.81, + "learning_rate": 3.987584844732181e-06, + "loss": 0.0177, + "step": 103190 + }, + { + "epoch": 4.82, + "learning_rate": 3.9868010596773945e-06, + "loss": 0.0091, + "step": 103195 + }, + { + "epoch": 4.82, + "learning_rate": 3.9860172746226076e-06, + "loss": 0.0355, + "step": 103200 + }, + { + "epoch": 4.82, + "learning_rate": 3.9852334895678215e-06, + "loss": 0.1109, + "step": 103205 + }, + { + "epoch": 4.82, + "learning_rate": 3.9844497045130345e-06, + "loss": 0.096, + "step": 103210 + }, + { + "epoch": 4.82, + "learning_rate": 3.9836659194582484e-06, + "loss": 0.1427, + "step": 103215 + }, + { + "epoch": 4.82, + "learning_rate": 3.9828821344034615e-06, + "loss": 0.1414, + "step": 103220 + }, + { + "epoch": 4.82, + "learning_rate": 3.9820983493486746e-06, + "loss": 0.3227, + "step": 103225 + }, + { + "epoch": 4.82, + "learning_rate": 3.9813145642938885e-06, + "loss": 0.1128, + "step": 103230 + }, + { + "epoch": 4.82, + "learning_rate": 3.9805307792391015e-06, + "loss": 0.037, + "step": 103235 + }, + { + "epoch": 4.82, + "learning_rate": 3.979746994184315e-06, + "loss": 0.0061, + "step": 103240 + }, + { + "epoch": 4.82, + "learning_rate": 3.9789632091295285e-06, + "loss": 0.0445, + "step": 103245 + }, + { + "epoch": 4.82, + "learning_rate": 3.978179424074742e-06, + "loss": 0.0726, + "step": 103250 + }, + { + "epoch": 4.82, + "learning_rate": 3.9773956390199554e-06, + "loss": 0.0325, + "step": 103255 + }, + { + "epoch": 4.82, + "learning_rate": 3.9766118539651685e-06, + "loss": 0.0483, + "step": 103260 + }, + { + "epoch": 4.82, + "learning_rate": 3.975828068910382e-06, + "loss": 0.1595, + "step": 103265 + }, + { + "epoch": 4.82, + "learning_rate": 3.9750442838555955e-06, + "loss": 0.1825, + "step": 103270 + }, + { + "epoch": 4.82, + "learning_rate": 3.974260498800809e-06, + "loss": 0.2561, + "step": 103275 + }, + { + "epoch": 4.82, + "learning_rate": 3.9734767137460224e-06, + "loss": 0.0649, + "step": 103280 + }, + { + "epoch": 4.82, + "learning_rate": 3.9726929286912355e-06, + "loss": 0.0169, + "step": 103285 + }, + { + "epoch": 4.82, + "learning_rate": 3.971909143636449e-06, + "loss": 0.0352, + "step": 103290 + }, + { + "epoch": 4.82, + "learning_rate": 3.9711253585816625e-06, + "loss": 0.0377, + "step": 103295 + }, + { + "epoch": 4.82, + "learning_rate": 3.970341573526876e-06, + "loss": 0.107, + "step": 103300 + }, + { + "epoch": 4.82, + "learning_rate": 3.969557788472089e-06, + "loss": 0.0849, + "step": 103305 + }, + { + "epoch": 4.82, + "learning_rate": 3.968774003417303e-06, + "loss": 0.0782, + "step": 103310 + }, + { + "epoch": 4.82, + "learning_rate": 3.967990218362517e-06, + "loss": 0.048, + "step": 103315 + }, + { + "epoch": 4.82, + "learning_rate": 3.9672064333077294e-06, + "loss": 0.1307, + "step": 103320 + }, + { + "epoch": 4.82, + "learning_rate": 3.966422648252943e-06, + "loss": 0.311, + "step": 103325 + }, + { + "epoch": 4.82, + "learning_rate": 3.965638863198156e-06, + "loss": 0.0546, + "step": 103330 + }, + { + "epoch": 4.82, + "learning_rate": 3.96485507814337e-06, + "loss": 0.0187, + "step": 103335 + }, + { + "epoch": 4.82, + "learning_rate": 3.964071293088584e-06, + "loss": 0.0921, + "step": 103340 + }, + { + "epoch": 4.82, + "learning_rate": 3.963287508033797e-06, + "loss": 0.0385, + "step": 103345 + }, + { + "epoch": 4.82, + "learning_rate": 3.96250372297901e-06, + "loss": 0.0431, + "step": 103350 + }, + { + "epoch": 4.82, + "learning_rate": 3.961719937924224e-06, + "loss": 0.0755, + "step": 103355 + }, + { + "epoch": 4.82, + "learning_rate": 3.960936152869437e-06, + "loss": 0.1237, + "step": 103360 + }, + { + "epoch": 4.82, + "learning_rate": 3.960152367814651e-06, + "loss": 0.1876, + "step": 103365 + }, + { + "epoch": 4.82, + "learning_rate": 3.959368582759864e-06, + "loss": 0.2078, + "step": 103370 + }, + { + "epoch": 4.82, + "learning_rate": 3.958584797705078e-06, + "loss": 0.3256, + "step": 103375 + }, + { + "epoch": 4.82, + "learning_rate": 3.957801012650291e-06, + "loss": 0.1022, + "step": 103380 + }, + { + "epoch": 4.82, + "learning_rate": 3.957017227595504e-06, + "loss": 0.0328, + "step": 103385 + }, + { + "epoch": 4.82, + "learning_rate": 3.956233442540718e-06, + "loss": 0.0335, + "step": 103390 + }, + { + "epoch": 4.82, + "learning_rate": 3.955449657485931e-06, + "loss": 0.0269, + "step": 103395 + }, + { + "epoch": 4.82, + "learning_rate": 3.954665872431145e-06, + "loss": 0.0909, + "step": 103400 + }, + { + "epoch": 4.83, + "learning_rate": 3.953882087376358e-06, + "loss": 0.0272, + "step": 103405 + }, + { + "epoch": 4.83, + "learning_rate": 3.953098302321572e-06, + "loss": 0.1165, + "step": 103410 + }, + { + "epoch": 4.83, + "learning_rate": 3.952314517266785e-06, + "loss": 0.0829, + "step": 103415 + }, + { + "epoch": 4.83, + "learning_rate": 3.951530732211998e-06, + "loss": 0.1673, + "step": 103420 + }, + { + "epoch": 4.83, + "learning_rate": 3.950746947157212e-06, + "loss": 0.3563, + "step": 103425 + }, + { + "epoch": 4.83, + "learning_rate": 3.949963162102425e-06, + "loss": 0.1074, + "step": 103430 + }, + { + "epoch": 4.83, + "learning_rate": 3.949179377047639e-06, + "loss": 0.0321, + "step": 103435 + }, + { + "epoch": 4.83, + "learning_rate": 3.948395591992852e-06, + "loss": 0.0144, + "step": 103440 + }, + { + "epoch": 4.83, + "learning_rate": 3.947611806938066e-06, + "loss": 0.0367, + "step": 103445 + }, + { + "epoch": 4.83, + "learning_rate": 3.946828021883279e-06, + "loss": 0.0579, + "step": 103450 + }, + { + "epoch": 4.83, + "learning_rate": 3.946044236828492e-06, + "loss": 0.086, + "step": 103455 + }, + { + "epoch": 4.83, + "learning_rate": 3.945260451773706e-06, + "loss": 0.1298, + "step": 103460 + }, + { + "epoch": 4.83, + "learning_rate": 3.944476666718919e-06, + "loss": 0.1572, + "step": 103465 + }, + { + "epoch": 4.83, + "learning_rate": 3.943692881664133e-06, + "loss": 0.1239, + "step": 103470 + }, + { + "epoch": 4.83, + "learning_rate": 3.942909096609346e-06, + "loss": 0.3083, + "step": 103475 + }, + { + "epoch": 4.83, + "learning_rate": 3.942125311554559e-06, + "loss": 0.1227, + "step": 103480 + }, + { + "epoch": 4.83, + "learning_rate": 3.941341526499773e-06, + "loss": 0.043, + "step": 103485 + }, + { + "epoch": 4.83, + "learning_rate": 3.940557741444986e-06, + "loss": 0.0544, + "step": 103490 + }, + { + "epoch": 4.83, + "learning_rate": 3.9397739563902e-06, + "loss": 0.0572, + "step": 103495 + }, + { + "epoch": 4.83, + "learning_rate": 3.938990171335413e-06, + "loss": 0.0971, + "step": 103500 + }, + { + "epoch": 4.83, + "learning_rate": 3.938206386280627e-06, + "loss": 0.0853, + "step": 103505 + }, + { + "epoch": 4.83, + "learning_rate": 3.93742260122584e-06, + "loss": 0.107, + "step": 103510 + }, + { + "epoch": 4.83, + "learning_rate": 3.936638816171053e-06, + "loss": 0.0928, + "step": 103515 + }, + { + "epoch": 4.83, + "learning_rate": 3.935855031116267e-06, + "loss": 0.0694, + "step": 103520 + }, + { + "epoch": 4.83, + "learning_rate": 3.93507124606148e-06, + "loss": 0.2613, + "step": 103525 + }, + { + "epoch": 4.83, + "learning_rate": 3.934287461006694e-06, + "loss": 0.064, + "step": 103530 + }, + { + "epoch": 4.83, + "learning_rate": 3.933503675951907e-06, + "loss": 0.0321, + "step": 103535 + }, + { + "epoch": 4.83, + "learning_rate": 3.932719890897121e-06, + "loss": 0.0268, + "step": 103540 + }, + { + "epoch": 4.83, + "learning_rate": 3.931936105842334e-06, + "loss": 0.0935, + "step": 103545 + }, + { + "epoch": 4.83, + "learning_rate": 3.931152320787547e-06, + "loss": 0.0853, + "step": 103550 + }, + { + "epoch": 4.83, + "learning_rate": 3.930368535732761e-06, + "loss": 0.0589, + "step": 103555 + }, + { + "epoch": 4.83, + "learning_rate": 3.929584750677974e-06, + "loss": 0.0774, + "step": 103560 + }, + { + "epoch": 4.83, + "learning_rate": 3.928800965623188e-06, + "loss": 0.1165, + "step": 103565 + }, + { + "epoch": 4.83, + "learning_rate": 3.928017180568401e-06, + "loss": 0.1892, + "step": 103570 + }, + { + "epoch": 4.83, + "learning_rate": 3.927233395513615e-06, + "loss": 0.2343, + "step": 103575 + }, + { + "epoch": 4.83, + "learning_rate": 3.926449610458828e-06, + "loss": 0.114, + "step": 103580 + }, + { + "epoch": 4.83, + "learning_rate": 3.925665825404041e-06, + "loss": 0.0522, + "step": 103585 + }, + { + "epoch": 4.83, + "learning_rate": 3.924882040349255e-06, + "loss": 0.0052, + "step": 103590 + }, + { + "epoch": 4.83, + "learning_rate": 3.924098255294468e-06, + "loss": 0.0324, + "step": 103595 + }, + { + "epoch": 4.83, + "learning_rate": 3.923314470239682e-06, + "loss": 0.0281, + "step": 103600 + }, + { + "epoch": 4.83, + "learning_rate": 3.922530685184896e-06, + "loss": 0.0895, + "step": 103605 + }, + { + "epoch": 4.83, + "learning_rate": 3.921746900130108e-06, + "loss": 0.0671, + "step": 103610 + }, + { + "epoch": 4.83, + "learning_rate": 3.920963115075322e-06, + "loss": 0.1254, + "step": 103615 + }, + { + "epoch": 4.84, + "learning_rate": 3.920179330020535e-06, + "loss": 0.1376, + "step": 103620 + }, + { + "epoch": 4.84, + "learning_rate": 3.919395544965749e-06, + "loss": 0.2706, + "step": 103625 + }, + { + "epoch": 4.84, + "learning_rate": 3.918611759910963e-06, + "loss": 0.0861, + "step": 103630 + }, + { + "epoch": 4.84, + "learning_rate": 3.917827974856176e-06, + "loss": 0.023, + "step": 103635 + }, + { + "epoch": 4.84, + "learning_rate": 3.91704418980139e-06, + "loss": 0.0176, + "step": 103640 + }, + { + "epoch": 4.84, + "learning_rate": 3.916260404746603e-06, + "loss": 0.0464, + "step": 103645 + }, + { + "epoch": 4.84, + "learning_rate": 3.915476619691816e-06, + "loss": 0.0682, + "step": 103650 + }, + { + "epoch": 4.84, + "learning_rate": 3.91469283463703e-06, + "loss": 0.0477, + "step": 103655 + }, + { + "epoch": 4.84, + "learning_rate": 3.913909049582243e-06, + "loss": 0.1352, + "step": 103660 + }, + { + "epoch": 4.84, + "learning_rate": 3.913125264527457e-06, + "loss": 0.0775, + "step": 103665 + }, + { + "epoch": 4.84, + "learning_rate": 3.91234147947267e-06, + "loss": 0.1178, + "step": 103670 + }, + { + "epoch": 4.84, + "learning_rate": 3.911557694417883e-06, + "loss": 0.2288, + "step": 103675 + }, + { + "epoch": 4.84, + "learning_rate": 3.910773909363097e-06, + "loss": 0.1158, + "step": 103680 + }, + { + "epoch": 4.84, + "learning_rate": 3.90999012430831e-06, + "loss": 0.0239, + "step": 103685 + }, + { + "epoch": 4.84, + "learning_rate": 3.909206339253524e-06, + "loss": 0.0458, + "step": 103690 + }, + { + "epoch": 4.84, + "learning_rate": 3.908422554198737e-06, + "loss": 0.0558, + "step": 103695 + }, + { + "epoch": 4.84, + "learning_rate": 3.907638769143951e-06, + "loss": 0.051, + "step": 103700 + }, + { + "epoch": 4.84, + "learning_rate": 3.906854984089164e-06, + "loss": 0.0805, + "step": 103705 + }, + { + "epoch": 4.84, + "learning_rate": 3.906071199034377e-06, + "loss": 0.1299, + "step": 103710 + }, + { + "epoch": 4.84, + "learning_rate": 3.905287413979591e-06, + "loss": 0.1771, + "step": 103715 + }, + { + "epoch": 4.84, + "learning_rate": 3.904503628924804e-06, + "loss": 0.1707, + "step": 103720 + }, + { + "epoch": 4.84, + "learning_rate": 3.903719843870018e-06, + "loss": 0.3106, + "step": 103725 + }, + { + "epoch": 4.84, + "learning_rate": 3.902936058815231e-06, + "loss": 0.0489, + "step": 103730 + }, + { + "epoch": 4.84, + "learning_rate": 3.902152273760445e-06, + "loss": 0.0107, + "step": 103735 + }, + { + "epoch": 4.84, + "learning_rate": 3.901368488705658e-06, + "loss": 0.0066, + "step": 103740 + }, + { + "epoch": 4.84, + "learning_rate": 3.900584703650871e-06, + "loss": 0.0215, + "step": 103745 + }, + { + "epoch": 4.84, + "learning_rate": 3.899800918596085e-06, + "loss": 0.0477, + "step": 103750 + }, + { + "epoch": 4.84, + "learning_rate": 3.899017133541298e-06, + "loss": 0.0937, + "step": 103755 + }, + { + "epoch": 4.84, + "learning_rate": 3.898233348486512e-06, + "loss": 0.1393, + "step": 103760 + }, + { + "epoch": 4.84, + "learning_rate": 3.897449563431725e-06, + "loss": 0.1036, + "step": 103765 + }, + { + "epoch": 4.84, + "learning_rate": 3.896665778376939e-06, + "loss": 0.1716, + "step": 103770 + }, + { + "epoch": 4.84, + "learning_rate": 3.895881993322152e-06, + "loss": 0.3847, + "step": 103775 + }, + { + "epoch": 4.84, + "learning_rate": 3.895098208267365e-06, + "loss": 0.0385, + "step": 103780 + }, + { + "epoch": 4.84, + "learning_rate": 3.894314423212579e-06, + "loss": 0.0286, + "step": 103785 + }, + { + "epoch": 4.84, + "learning_rate": 3.893530638157792e-06, + "loss": 0.0342, + "step": 103790 + }, + { + "epoch": 4.84, + "learning_rate": 3.892746853103006e-06, + "loss": 0.0366, + "step": 103795 + }, + { + "epoch": 4.84, + "learning_rate": 3.891963068048219e-06, + "loss": 0.0518, + "step": 103800 + }, + { + "epoch": 4.84, + "learning_rate": 3.891179282993432e-06, + "loss": 0.0312, + "step": 103805 + }, + { + "epoch": 4.84, + "learning_rate": 3.890395497938646e-06, + "loss": 0.1374, + "step": 103810 + }, + { + "epoch": 4.84, + "learning_rate": 3.889611712883859e-06, + "loss": 0.6932, + "step": 103815 + }, + { + "epoch": 4.84, + "learning_rate": 3.888827927829073e-06, + "loss": 0.1461, + "step": 103820 + }, + { + "epoch": 4.84, + "learning_rate": 3.888044142774286e-06, + "loss": 0.1767, + "step": 103825 + }, + { + "epoch": 4.84, + "learning_rate": 3.8872603577194996e-06, + "loss": 0.0822, + "step": 103830 + }, + { + "epoch": 4.85, + "learning_rate": 3.886476572664713e-06, + "loss": 0.0385, + "step": 103835 + }, + { + "epoch": 4.85, + "learning_rate": 3.885692787609926e-06, + "loss": 0.0192, + "step": 103840 + }, + { + "epoch": 4.85, + "learning_rate": 3.8849090025551396e-06, + "loss": 0.0313, + "step": 103845 + }, + { + "epoch": 4.85, + "learning_rate": 3.884125217500353e-06, + "loss": 0.0084, + "step": 103850 + }, + { + "epoch": 4.85, + "learning_rate": 3.8833414324455665e-06, + "loss": 0.0516, + "step": 103855 + }, + { + "epoch": 4.85, + "learning_rate": 3.88255764739078e-06, + "loss": 0.1076, + "step": 103860 + }, + { + "epoch": 4.85, + "learning_rate": 3.8817738623359935e-06, + "loss": 0.1215, + "step": 103865 + }, + { + "epoch": 4.85, + "learning_rate": 3.8809900772812066e-06, + "loss": 0.1948, + "step": 103870 + }, + { + "epoch": 4.85, + "learning_rate": 3.88020629222642e-06, + "loss": 0.2432, + "step": 103875 + }, + { + "epoch": 4.85, + "learning_rate": 3.8794225071716335e-06, + "loss": 0.0687, + "step": 103880 + }, + { + "epoch": 4.85, + "learning_rate": 3.878638722116847e-06, + "loss": 0.0108, + "step": 103885 + }, + { + "epoch": 4.85, + "learning_rate": 3.8778549370620605e-06, + "loss": 0.0307, + "step": 103890 + }, + { + "epoch": 4.85, + "learning_rate": 3.877071152007274e-06, + "loss": 0.0369, + "step": 103895 + }, + { + "epoch": 4.85, + "learning_rate": 3.8762873669524875e-06, + "loss": 0.0361, + "step": 103900 + }, + { + "epoch": 4.85, + "learning_rate": 3.8755035818977005e-06, + "loss": 0.1103, + "step": 103905 + }, + { + "epoch": 4.85, + "learning_rate": 3.8747197968429136e-06, + "loss": 0.0409, + "step": 103910 + }, + { + "epoch": 4.85, + "learning_rate": 3.8739360117881275e-06, + "loss": 0.2798, + "step": 103915 + }, + { + "epoch": 4.85, + "learning_rate": 3.873152226733341e-06, + "loss": 0.1021, + "step": 103920 + }, + { + "epoch": 4.85, + "learning_rate": 3.8723684416785544e-06, + "loss": 0.2691, + "step": 103925 + }, + { + "epoch": 4.85, + "learning_rate": 3.871584656623768e-06, + "loss": 0.0768, + "step": 103930 + }, + { + "epoch": 4.85, + "learning_rate": 3.870800871568981e-06, + "loss": 0.0344, + "step": 103935 + }, + { + "epoch": 4.85, + "learning_rate": 3.8700170865141945e-06, + "loss": 0.035, + "step": 103940 + }, + { + "epoch": 4.85, + "learning_rate": 3.869233301459408e-06, + "loss": 0.013, + "step": 103945 + }, + { + "epoch": 4.85, + "learning_rate": 3.8684495164046214e-06, + "loss": 0.0583, + "step": 103950 + }, + { + "epoch": 4.85, + "learning_rate": 3.867665731349835e-06, + "loss": 0.0442, + "step": 103955 + }, + { + "epoch": 4.85, + "learning_rate": 3.866881946295048e-06, + "loss": 0.0508, + "step": 103960 + }, + { + "epoch": 4.85, + "learning_rate": 3.866098161240262e-06, + "loss": 0.2123, + "step": 103965 + }, + { + "epoch": 4.85, + "learning_rate": 3.865314376185475e-06, + "loss": 0.1634, + "step": 103970 + }, + { + "epoch": 4.85, + "learning_rate": 3.864530591130688e-06, + "loss": 0.3576, + "step": 103975 + }, + { + "epoch": 4.85, + "learning_rate": 3.863746806075902e-06, + "loss": 0.0756, + "step": 103980 + }, + { + "epoch": 4.85, + "learning_rate": 3.862963021021115e-06, + "loss": 0.0478, + "step": 103985 + }, + { + "epoch": 4.85, + "learning_rate": 3.862179235966329e-06, + "loss": 0.022, + "step": 103990 + }, + { + "epoch": 4.85, + "learning_rate": 3.861395450911542e-06, + "loss": 0.0396, + "step": 103995 + }, + { + "epoch": 4.85, + "learning_rate": 3.860611665856755e-06, + "loss": 0.0329, + "step": 104000 + }, + { + "epoch": 4.85, + "learning_rate": 3.859827880801969e-06, + "loss": 0.0604, + "step": 104005 + }, + { + "epoch": 4.85, + "learning_rate": 3.859044095747182e-06, + "loss": 0.0841, + "step": 104010 + }, + { + "epoch": 4.85, + "learning_rate": 3.858260310692396e-06, + "loss": 0.1389, + "step": 104015 + }, + { + "epoch": 4.85, + "learning_rate": 3.857476525637609e-06, + "loss": 0.1726, + "step": 104020 + }, + { + "epoch": 4.85, + "learning_rate": 3.856692740582823e-06, + "loss": 0.1992, + "step": 104025 + }, + { + "epoch": 4.85, + "learning_rate": 3.855908955528036e-06, + "loss": 0.0709, + "step": 104030 + }, + { + "epoch": 4.85, + "learning_rate": 3.855125170473249e-06, + "loss": 0.0322, + "step": 104035 + }, + { + "epoch": 4.85, + "learning_rate": 3.854341385418463e-06, + "loss": 0.0241, + "step": 104040 + }, + { + "epoch": 4.85, + "learning_rate": 3.853557600363676e-06, + "loss": 0.0664, + "step": 104045 + }, + { + "epoch": 4.86, + "learning_rate": 3.85277381530889e-06, + "loss": 0.0412, + "step": 104050 + }, + { + "epoch": 4.86, + "learning_rate": 3.851990030254103e-06, + "loss": 0.0373, + "step": 104055 + }, + { + "epoch": 4.86, + "learning_rate": 3.851206245199317e-06, + "loss": 0.101, + "step": 104060 + }, + { + "epoch": 4.86, + "learning_rate": 3.85042246014453e-06, + "loss": 0.0951, + "step": 104065 + }, + { + "epoch": 4.86, + "learning_rate": 3.849638675089743e-06, + "loss": 0.1652, + "step": 104070 + }, + { + "epoch": 4.86, + "learning_rate": 3.848854890034957e-06, + "loss": 0.2592, + "step": 104075 + }, + { + "epoch": 4.86, + "learning_rate": 3.84807110498017e-06, + "loss": 0.1164, + "step": 104080 + }, + { + "epoch": 4.86, + "learning_rate": 3.847287319925384e-06, + "loss": 0.0214, + "step": 104085 + }, + { + "epoch": 4.86, + "learning_rate": 3.846503534870597e-06, + "loss": 0.022, + "step": 104090 + }, + { + "epoch": 4.86, + "learning_rate": 3.845719749815811e-06, + "loss": 0.06, + "step": 104095 + }, + { + "epoch": 4.86, + "learning_rate": 3.844935964761024e-06, + "loss": 0.039, + "step": 104100 + }, + { + "epoch": 4.86, + "learning_rate": 3.844152179706237e-06, + "loss": 0.0806, + "step": 104105 + }, + { + "epoch": 4.86, + "learning_rate": 3.843368394651451e-06, + "loss": 0.1003, + "step": 104110 + }, + { + "epoch": 4.86, + "learning_rate": 3.842584609596664e-06, + "loss": 0.1216, + "step": 104115 + }, + { + "epoch": 4.86, + "learning_rate": 3.841800824541878e-06, + "loss": 0.1058, + "step": 104120 + }, + { + "epoch": 4.86, + "learning_rate": 3.841017039487091e-06, + "loss": 0.3209, + "step": 104125 + }, + { + "epoch": 4.86, + "learning_rate": 3.840233254432304e-06, + "loss": 0.063, + "step": 104130 + }, + { + "epoch": 4.86, + "learning_rate": 3.839449469377518e-06, + "loss": 0.0431, + "step": 104135 + }, + { + "epoch": 4.86, + "learning_rate": 3.838665684322731e-06, + "loss": 0.0974, + "step": 104140 + }, + { + "epoch": 4.86, + "learning_rate": 3.837881899267945e-06, + "loss": 0.097, + "step": 104145 + }, + { + "epoch": 4.86, + "learning_rate": 3.837098114213158e-06, + "loss": 0.0729, + "step": 104150 + }, + { + "epoch": 4.86, + "learning_rate": 3.836314329158372e-06, + "loss": 0.1119, + "step": 104155 + }, + { + "epoch": 4.86, + "learning_rate": 3.8356873011145425e-06, + "loss": 0.0985, + "step": 104160 + }, + { + "epoch": 4.86, + "learning_rate": 3.834903516059756e-06, + "loss": 0.1325, + "step": 104165 + }, + { + "epoch": 4.86, + "learning_rate": 3.8341197310049695e-06, + "loss": 0.1151, + "step": 104170 + }, + { + "epoch": 4.86, + "learning_rate": 3.8333359459501826e-06, + "loss": 0.2696, + "step": 104175 + }, + { + "epoch": 4.86, + "learning_rate": 3.8325521608953965e-06, + "loss": 0.0508, + "step": 104180 + }, + { + "epoch": 4.86, + "learning_rate": 3.8317683758406095e-06, + "loss": 0.0219, + "step": 104185 + }, + { + "epoch": 4.86, + "learning_rate": 3.8309845907858234e-06, + "loss": 0.0272, + "step": 104190 + }, + { + "epoch": 4.86, + "learning_rate": 3.8302008057310365e-06, + "loss": 0.0218, + "step": 104195 + }, + { + "epoch": 4.86, + "learning_rate": 3.8294170206762495e-06, + "loss": 0.031, + "step": 104200 + }, + { + "epoch": 4.86, + "learning_rate": 3.8286332356214634e-06, + "loss": 0.0838, + "step": 104205 + }, + { + "epoch": 4.86, + "learning_rate": 3.8278494505666765e-06, + "loss": 0.115, + "step": 104210 + }, + { + "epoch": 4.86, + "learning_rate": 3.82706566551189e-06, + "loss": 0.1272, + "step": 104215 + }, + { + "epoch": 4.86, + "learning_rate": 3.826281880457104e-06, + "loss": 0.0836, + "step": 104220 + }, + { + "epoch": 4.86, + "learning_rate": 3.825498095402317e-06, + "loss": 0.2306, + "step": 104225 + }, + { + "epoch": 4.86, + "learning_rate": 3.8247143103475304e-06, + "loss": 0.0771, + "step": 104230 + }, + { + "epoch": 4.86, + "learning_rate": 3.8239305252927435e-06, + "loss": 0.0475, + "step": 104235 + }, + { + "epoch": 4.86, + "learning_rate": 3.823146740237957e-06, + "loss": 0.0571, + "step": 104240 + }, + { + "epoch": 4.86, + "learning_rate": 3.822362955183171e-06, + "loss": 0.0217, + "step": 104245 + }, + { + "epoch": 4.86, + "learning_rate": 3.821579170128384e-06, + "loss": 0.0567, + "step": 104250 + }, + { + "epoch": 4.86, + "learning_rate": 3.820795385073598e-06, + "loss": 0.0462, + "step": 104255 + }, + { + "epoch": 4.86, + "learning_rate": 3.8200116000188105e-06, + "loss": 0.0543, + "step": 104260 + }, + { + "epoch": 4.87, + "learning_rate": 3.819227814964024e-06, + "loss": 0.0734, + "step": 104265 + }, + { + "epoch": 4.87, + "learning_rate": 3.818444029909238e-06, + "loss": 0.1393, + "step": 104270 + }, + { + "epoch": 4.87, + "learning_rate": 3.817660244854451e-06, + "loss": 0.4331, + "step": 104275 + }, + { + "epoch": 4.87, + "learning_rate": 3.816876459799665e-06, + "loss": 0.0945, + "step": 104280 + }, + { + "epoch": 4.87, + "learning_rate": 3.816092674744878e-06, + "loss": 0.0238, + "step": 104285 + }, + { + "epoch": 4.87, + "learning_rate": 3.815308889690092e-06, + "loss": 0.0089, + "step": 104290 + }, + { + "epoch": 4.87, + "learning_rate": 3.814525104635305e-06, + "loss": 0.0413, + "step": 104295 + }, + { + "epoch": 4.87, + "learning_rate": 3.8137413195805183e-06, + "loss": 0.0926, + "step": 104300 + }, + { + "epoch": 4.87, + "learning_rate": 3.812957534525732e-06, + "loss": 0.0629, + "step": 104305 + }, + { + "epoch": 4.87, + "learning_rate": 3.8121737494709453e-06, + "loss": 0.0692, + "step": 104310 + }, + { + "epoch": 4.87, + "learning_rate": 3.8113899644161588e-06, + "loss": 0.1042, + "step": 104315 + }, + { + "epoch": 4.87, + "learning_rate": 3.8106061793613723e-06, + "loss": 0.2014, + "step": 104320 + }, + { + "epoch": 4.87, + "learning_rate": 3.8098223943065853e-06, + "loss": 0.3619, + "step": 104325 + }, + { + "epoch": 4.87, + "learning_rate": 3.809038609251799e-06, + "loss": 0.0675, + "step": 104330 + }, + { + "epoch": 4.87, + "learning_rate": 3.8082548241970123e-06, + "loss": 0.0589, + "step": 104335 + }, + { + "epoch": 4.87, + "learning_rate": 3.8074710391422258e-06, + "loss": 0.0188, + "step": 104340 + }, + { + "epoch": 4.87, + "learning_rate": 3.8066872540874393e-06, + "loss": 0.0493, + "step": 104345 + }, + { + "epoch": 4.87, + "learning_rate": 3.805903469032653e-06, + "loss": 0.0553, + "step": 104350 + }, + { + "epoch": 4.87, + "learning_rate": 3.8051196839778666e-06, + "loss": 0.0499, + "step": 104355 + }, + { + "epoch": 4.87, + "learning_rate": 3.8043358989230793e-06, + "loss": 0.1176, + "step": 104360 + }, + { + "epoch": 4.87, + "learning_rate": 3.8035521138682928e-06, + "loss": 0.1711, + "step": 104365 + }, + { + "epoch": 4.87, + "learning_rate": 3.8027683288135062e-06, + "loss": 0.1206, + "step": 104370 + }, + { + "epoch": 4.87, + "learning_rate": 3.80198454375872e-06, + "loss": 0.3171, + "step": 104375 + }, + { + "epoch": 4.87, + "learning_rate": 3.8012007587039336e-06, + "loss": 0.1069, + "step": 104380 + }, + { + "epoch": 4.87, + "learning_rate": 3.800416973649147e-06, + "loss": 0.0294, + "step": 104385 + }, + { + "epoch": 4.87, + "learning_rate": 3.7996331885943598e-06, + "loss": 0.0244, + "step": 104390 + }, + { + "epoch": 4.87, + "learning_rate": 3.7988494035395732e-06, + "loss": 0.0496, + "step": 104395 + }, + { + "epoch": 4.87, + "learning_rate": 3.798065618484787e-06, + "loss": 0.0659, + "step": 104400 + }, + { + "epoch": 4.87, + "learning_rate": 3.7972818334300006e-06, + "loss": 0.0906, + "step": 104405 + }, + { + "epoch": 4.87, + "learning_rate": 3.796498048375214e-06, + "loss": 0.0464, + "step": 104410 + }, + { + "epoch": 4.87, + "learning_rate": 3.7957142633204276e-06, + "loss": 0.074, + "step": 104415 + }, + { + "epoch": 4.87, + "learning_rate": 3.794930478265641e-06, + "loss": 0.1876, + "step": 104420 + }, + { + "epoch": 4.87, + "learning_rate": 3.794146693210854e-06, + "loss": 0.1694, + "step": 104425 + }, + { + "epoch": 4.87, + "learning_rate": 3.7933629081560676e-06, + "loss": 0.0543, + "step": 104430 + }, + { + "epoch": 4.87, + "learning_rate": 3.792579123101281e-06, + "loss": 0.0542, + "step": 104435 + }, + { + "epoch": 4.87, + "learning_rate": 3.7917953380464946e-06, + "loss": 0.0331, + "step": 104440 + }, + { + "epoch": 4.87, + "learning_rate": 3.791011552991708e-06, + "loss": 0.0342, + "step": 104445 + }, + { + "epoch": 4.87, + "learning_rate": 3.7902277679369215e-06, + "loss": 0.0512, + "step": 104450 + }, + { + "epoch": 4.87, + "learning_rate": 3.7894439828821346e-06, + "loss": 0.0409, + "step": 104455 + }, + { + "epoch": 4.87, + "learning_rate": 3.788660197827348e-06, + "loss": 0.0698, + "step": 104460 + }, + { + "epoch": 4.87, + "learning_rate": 3.7878764127725616e-06, + "loss": 0.1367, + "step": 104465 + }, + { + "epoch": 4.87, + "learning_rate": 3.787092627717775e-06, + "loss": 0.0767, + "step": 104470 + }, + { + "epoch": 4.87, + "learning_rate": 3.7863088426629885e-06, + "loss": 0.2956, + "step": 104475 + }, + { + "epoch": 4.88, + "learning_rate": 3.785525057608202e-06, + "loss": 0.1179, + "step": 104480 + }, + { + "epoch": 4.88, + "learning_rate": 3.7847412725534155e-06, + "loss": 0.0188, + "step": 104485 + }, + { + "epoch": 4.88, + "learning_rate": 3.7839574874986285e-06, + "loss": 0.0236, + "step": 104490 + }, + { + "epoch": 4.88, + "learning_rate": 3.783173702443842e-06, + "loss": 0.0645, + "step": 104495 + }, + { + "epoch": 4.88, + "learning_rate": 3.7823899173890555e-06, + "loss": 0.0343, + "step": 104500 + }, + { + "epoch": 4.88, + "learning_rate": 3.781606132334269e-06, + "loss": 0.0502, + "step": 104505 + }, + { + "epoch": 4.88, + "learning_rate": 3.7808223472794825e-06, + "loss": 0.0718, + "step": 104510 + }, + { + "epoch": 4.88, + "learning_rate": 3.780038562224696e-06, + "loss": 0.1089, + "step": 104515 + }, + { + "epoch": 4.88, + "learning_rate": 3.779254777169909e-06, + "loss": 0.1572, + "step": 104520 + }, + { + "epoch": 4.88, + "learning_rate": 3.7784709921151225e-06, + "loss": 0.2072, + "step": 104525 + }, + { + "epoch": 4.88, + "learning_rate": 3.777687207060336e-06, + "loss": 0.0979, + "step": 104530 + }, + { + "epoch": 4.88, + "learning_rate": 3.7769034220055495e-06, + "loss": 0.0279, + "step": 104535 + }, + { + "epoch": 4.88, + "learning_rate": 3.776119636950763e-06, + "loss": 0.0278, + "step": 104540 + }, + { + "epoch": 4.88, + "learning_rate": 3.7753358518959764e-06, + "loss": 0.0386, + "step": 104545 + }, + { + "epoch": 4.88, + "learning_rate": 3.77455206684119e-06, + "loss": 0.091, + "step": 104550 + }, + { + "epoch": 4.88, + "learning_rate": 3.773768281786403e-06, + "loss": 0.0301, + "step": 104555 + }, + { + "epoch": 4.88, + "learning_rate": 3.7729844967316165e-06, + "loss": 0.0991, + "step": 104560 + }, + { + "epoch": 4.88, + "learning_rate": 3.77220071167683e-06, + "loss": 0.1363, + "step": 104565 + }, + { + "epoch": 4.88, + "learning_rate": 3.7714169266220434e-06, + "loss": 0.1163, + "step": 104570 + }, + { + "epoch": 4.88, + "learning_rate": 3.770633141567257e-06, + "loss": 0.1723, + "step": 104575 + }, + { + "epoch": 4.88, + "learning_rate": 3.7698493565124704e-06, + "loss": 0.0797, + "step": 104580 + }, + { + "epoch": 4.88, + "learning_rate": 3.7690655714576834e-06, + "loss": 0.039, + "step": 104585 + }, + { + "epoch": 4.88, + "learning_rate": 3.768281786402897e-06, + "loss": 0.0169, + "step": 104590 + }, + { + "epoch": 4.88, + "learning_rate": 3.7674980013481104e-06, + "loss": 0.0126, + "step": 104595 + }, + { + "epoch": 4.88, + "learning_rate": 3.766714216293324e-06, + "loss": 0.0455, + "step": 104600 + }, + { + "epoch": 4.88, + "learning_rate": 3.7659304312385374e-06, + "loss": 0.1075, + "step": 104605 + }, + { + "epoch": 4.88, + "learning_rate": 3.765146646183751e-06, + "loss": 0.0571, + "step": 104610 + }, + { + "epoch": 4.88, + "learning_rate": 3.7643628611289643e-06, + "loss": 0.0888, + "step": 104615 + }, + { + "epoch": 4.88, + "learning_rate": 3.7635790760741774e-06, + "loss": 0.2116, + "step": 104620 + }, + { + "epoch": 4.88, + "learning_rate": 3.762795291019391e-06, + "loss": 0.2732, + "step": 104625 + }, + { + "epoch": 4.88, + "learning_rate": 3.7620115059646044e-06, + "loss": 0.0612, + "step": 104630 + }, + { + "epoch": 4.88, + "learning_rate": 3.761227720909818e-06, + "loss": 0.0124, + "step": 104635 + }, + { + "epoch": 4.88, + "learning_rate": 3.7604439358550317e-06, + "loss": 0.0435, + "step": 104640 + }, + { + "epoch": 4.88, + "learning_rate": 3.7596601508002452e-06, + "loss": 0.0506, + "step": 104645 + }, + { + "epoch": 4.88, + "learning_rate": 3.758876365745458e-06, + "loss": 0.0708, + "step": 104650 + }, + { + "epoch": 4.88, + "learning_rate": 3.7580925806906713e-06, + "loss": 0.1099, + "step": 104655 + }, + { + "epoch": 4.88, + "learning_rate": 3.757308795635885e-06, + "loss": 0.0849, + "step": 104660 + }, + { + "epoch": 4.88, + "learning_rate": 3.7565250105810987e-06, + "loss": 0.081, + "step": 104665 + }, + { + "epoch": 4.88, + "learning_rate": 3.7557412255263122e-06, + "loss": 0.1532, + "step": 104670 + }, + { + "epoch": 4.88, + "learning_rate": 3.7549574404715257e-06, + "loss": 0.2863, + "step": 104675 + }, + { + "epoch": 4.88, + "learning_rate": 3.754173655416739e-06, + "loss": 0.0662, + "step": 104680 + }, + { + "epoch": 4.88, + "learning_rate": 3.753389870361952e-06, + "loss": 0.0086, + "step": 104685 + }, + { + "epoch": 4.88, + "learning_rate": 3.7526060853071657e-06, + "loss": 0.0236, + "step": 104690 + }, + { + "epoch": 4.89, + "learning_rate": 3.751822300252379e-06, + "loss": 0.0273, + "step": 104695 + }, + { + "epoch": 4.89, + "learning_rate": 3.7510385151975927e-06, + "loss": 0.0747, + "step": 104700 + }, + { + "epoch": 4.89, + "learning_rate": 3.750254730142806e-06, + "loss": 0.0386, + "step": 104705 + }, + { + "epoch": 4.89, + "learning_rate": 3.7494709450880197e-06, + "loss": 0.0544, + "step": 104710 + }, + { + "epoch": 4.89, + "learning_rate": 3.7486871600332327e-06, + "loss": 0.0868, + "step": 104715 + }, + { + "epoch": 4.89, + "learning_rate": 3.747903374978446e-06, + "loss": 0.1007, + "step": 104720 + }, + { + "epoch": 4.89, + "learning_rate": 3.7471195899236597e-06, + "loss": 0.2702, + "step": 104725 + }, + { + "epoch": 4.89, + "learning_rate": 3.746335804868873e-06, + "loss": 0.0489, + "step": 104730 + }, + { + "epoch": 4.89, + "learning_rate": 3.7455520198140866e-06, + "loss": 0.0162, + "step": 104735 + }, + { + "epoch": 4.89, + "learning_rate": 3.7447682347593e-06, + "loss": 0.0074, + "step": 104740 + }, + { + "epoch": 4.89, + "learning_rate": 3.7439844497045136e-06, + "loss": 0.0208, + "step": 104745 + }, + { + "epoch": 4.89, + "learning_rate": 3.7432006646497267e-06, + "loss": 0.0309, + "step": 104750 + }, + { + "epoch": 4.89, + "learning_rate": 3.74241687959494e-06, + "loss": 0.1404, + "step": 104755 + }, + { + "epoch": 4.89, + "learning_rate": 3.7416330945401536e-06, + "loss": 0.0762, + "step": 104760 + }, + { + "epoch": 4.89, + "learning_rate": 3.740849309485367e-06, + "loss": 0.0931, + "step": 104765 + }, + { + "epoch": 4.89, + "learning_rate": 3.7400655244305806e-06, + "loss": 0.1815, + "step": 104770 + }, + { + "epoch": 4.89, + "learning_rate": 3.739281739375794e-06, + "loss": 0.204, + "step": 104775 + }, + { + "epoch": 4.89, + "learning_rate": 3.738497954321007e-06, + "loss": 0.0989, + "step": 104780 + }, + { + "epoch": 4.89, + "learning_rate": 3.7377141692662206e-06, + "loss": 0.0097, + "step": 104785 + }, + { + "epoch": 4.89, + "learning_rate": 3.736930384211434e-06, + "loss": 0.0749, + "step": 104790 + }, + { + "epoch": 4.89, + "learning_rate": 3.7361465991566476e-06, + "loss": 0.054, + "step": 104795 + }, + { + "epoch": 4.89, + "learning_rate": 3.735362814101861e-06, + "loss": 0.1055, + "step": 104800 + }, + { + "epoch": 4.89, + "learning_rate": 3.7345790290470745e-06, + "loss": 0.1324, + "step": 104805 + }, + { + "epoch": 4.89, + "learning_rate": 3.733795243992288e-06, + "loss": 0.117, + "step": 104810 + }, + { + "epoch": 4.89, + "learning_rate": 3.733011458937501e-06, + "loss": 0.1579, + "step": 104815 + }, + { + "epoch": 4.89, + "learning_rate": 3.7322276738827146e-06, + "loss": 0.267, + "step": 104820 + }, + { + "epoch": 4.89, + "learning_rate": 3.731443888827928e-06, + "loss": 0.3815, + "step": 104825 + }, + { + "epoch": 4.89, + "learning_rate": 3.7306601037731415e-06, + "loss": 0.0846, + "step": 104830 + }, + { + "epoch": 4.89, + "learning_rate": 3.729876318718355e-06, + "loss": 0.0701, + "step": 104835 + }, + { + "epoch": 4.89, + "learning_rate": 3.7290925336635685e-06, + "loss": 0.0461, + "step": 104840 + }, + { + "epoch": 4.89, + "learning_rate": 3.7283087486087816e-06, + "loss": 0.1035, + "step": 104845 + }, + { + "epoch": 4.89, + "learning_rate": 3.727524963553995e-06, + "loss": 0.0412, + "step": 104850 + }, + { + "epoch": 4.89, + "learning_rate": 3.7267411784992085e-06, + "loss": 0.0638, + "step": 104855 + }, + { + "epoch": 4.89, + "learning_rate": 3.725957393444422e-06, + "loss": 0.0766, + "step": 104860 + }, + { + "epoch": 4.89, + "learning_rate": 3.7251736083896355e-06, + "loss": 0.126, + "step": 104865 + }, + { + "epoch": 4.89, + "learning_rate": 3.724389823334849e-06, + "loss": 0.2201, + "step": 104870 + }, + { + "epoch": 4.89, + "learning_rate": 3.7236060382800624e-06, + "loss": 0.2927, + "step": 104875 + }, + { + "epoch": 4.89, + "learning_rate": 3.7228222532252755e-06, + "loss": 0.0888, + "step": 104880 + }, + { + "epoch": 4.89, + "learning_rate": 3.722038468170489e-06, + "loss": 0.0045, + "step": 104885 + }, + { + "epoch": 4.89, + "learning_rate": 3.7212546831157025e-06, + "loss": 0.0513, + "step": 104890 + }, + { + "epoch": 4.89, + "learning_rate": 3.720470898060916e-06, + "loss": 0.0512, + "step": 104895 + }, + { + "epoch": 4.89, + "learning_rate": 3.7196871130061294e-06, + "loss": 0.0232, + "step": 104900 + }, + { + "epoch": 4.9, + "learning_rate": 3.718903327951343e-06, + "loss": 0.1024, + "step": 104905 + }, + { + "epoch": 4.9, + "learning_rate": 3.718119542896556e-06, + "loss": 0.0991, + "step": 104910 + }, + { + "epoch": 4.9, + "learning_rate": 3.7173357578417695e-06, + "loss": 0.1334, + "step": 104915 + }, + { + "epoch": 4.9, + "learning_rate": 3.716551972786983e-06, + "loss": 0.1207, + "step": 104920 + }, + { + "epoch": 4.9, + "learning_rate": 3.7157681877321964e-06, + "loss": 0.1867, + "step": 104925 + }, + { + "epoch": 4.9, + "learning_rate": 3.7149844026774103e-06, + "loss": 0.1111, + "step": 104930 + }, + { + "epoch": 4.9, + "learning_rate": 3.714200617622624e-06, + "loss": 0.0232, + "step": 104935 + }, + { + "epoch": 4.9, + "learning_rate": 3.7134168325678373e-06, + "loss": 0.0331, + "step": 104940 + }, + { + "epoch": 4.9, + "learning_rate": 3.71263304751305e-06, + "loss": 0.0229, + "step": 104945 + }, + { + "epoch": 4.9, + "learning_rate": 3.7118492624582634e-06, + "loss": 0.0565, + "step": 104950 + }, + { + "epoch": 4.9, + "learning_rate": 3.7110654774034773e-06, + "loss": 0.0266, + "step": 104955 + }, + { + "epoch": 4.9, + "learning_rate": 3.710281692348691e-06, + "loss": 0.0631, + "step": 104960 + }, + { + "epoch": 4.9, + "learning_rate": 3.7094979072939043e-06, + "loss": 0.0721, + "step": 104965 + }, + { + "epoch": 4.9, + "learning_rate": 3.7087141222391178e-06, + "loss": 0.1649, + "step": 104970 + }, + { + "epoch": 4.9, + "learning_rate": 3.7079303371843304e-06, + "loss": 0.2464, + "step": 104975 + }, + { + "epoch": 4.9, + "learning_rate": 3.7071465521295443e-06, + "loss": 0.0854, + "step": 104980 + }, + { + "epoch": 4.9, + "learning_rate": 3.7063627670747578e-06, + "loss": 0.0231, + "step": 104985 + }, + { + "epoch": 4.9, + "learning_rate": 3.7055789820199713e-06, + "loss": 0.0248, + "step": 104990 + }, + { + "epoch": 4.9, + "learning_rate": 3.7047951969651848e-06, + "loss": 0.0292, + "step": 104995 + }, + { + "epoch": 4.9, + "learning_rate": 3.7040114119103982e-06, + "loss": 0.0154, + "step": 105000 + }, + { + "epoch": 4.9, + "learning_rate": 3.7032276268556117e-06, + "loss": 0.0512, + "step": 105005 + }, + { + "epoch": 4.9, + "learning_rate": 3.7024438418008248e-06, + "loss": 0.0861, + "step": 105010 + }, + { + "epoch": 4.9, + "learning_rate": 3.7016600567460383e-06, + "loss": 0.1469, + "step": 105015 + }, + { + "epoch": 4.9, + "learning_rate": 3.7008762716912517e-06, + "loss": 0.1815, + "step": 105020 + }, + { + "epoch": 4.9, + "learning_rate": 3.7000924866364652e-06, + "loss": 0.2861, + "step": 105025 + }, + { + "epoch": 4.9, + "learning_rate": 3.6993087015816787e-06, + "loss": 0.1016, + "step": 105030 + }, + { + "epoch": 4.9, + "learning_rate": 3.698524916526892e-06, + "loss": 0.0331, + "step": 105035 + }, + { + "epoch": 4.9, + "learning_rate": 3.6977411314721052e-06, + "loss": 0.0587, + "step": 105040 + }, + { + "epoch": 4.9, + "learning_rate": 3.6969573464173187e-06, + "loss": 0.0411, + "step": 105045 + }, + { + "epoch": 4.9, + "learning_rate": 3.696173561362532e-06, + "loss": 0.0557, + "step": 105050 + }, + { + "epoch": 4.9, + "learning_rate": 3.6953897763077457e-06, + "loss": 0.0695, + "step": 105055 + }, + { + "epoch": 4.9, + "learning_rate": 3.694605991252959e-06, + "loss": 0.0538, + "step": 105060 + }, + { + "epoch": 4.9, + "learning_rate": 3.6938222061981727e-06, + "loss": 0.1193, + "step": 105065 + }, + { + "epoch": 4.9, + "learning_rate": 3.693038421143386e-06, + "loss": 0.2479, + "step": 105070 + }, + { + "epoch": 4.9, + "learning_rate": 3.692254636088599e-06, + "loss": 0.1821, + "step": 105075 + }, + { + "epoch": 4.9, + "learning_rate": 3.6914708510338127e-06, + "loss": 0.0823, + "step": 105080 + }, + { + "epoch": 4.9, + "learning_rate": 3.690687065979026e-06, + "loss": 0.0265, + "step": 105085 + }, + { + "epoch": 4.9, + "learning_rate": 3.6899032809242396e-06, + "loss": 0.038, + "step": 105090 + }, + { + "epoch": 4.9, + "learning_rate": 3.689119495869453e-06, + "loss": 0.0093, + "step": 105095 + }, + { + "epoch": 4.9, + "learning_rate": 3.6883357108146666e-06, + "loss": 0.0537, + "step": 105100 + }, + { + "epoch": 4.9, + "learning_rate": 3.6875519257598797e-06, + "loss": 0.0959, + "step": 105105 + }, + { + "epoch": 4.9, + "learning_rate": 3.686768140705093e-06, + "loss": 0.1264, + "step": 105110 + }, + { + "epoch": 4.9, + "learning_rate": 3.6859843556503066e-06, + "loss": 0.071, + "step": 105115 + }, + { + "epoch": 4.91, + "learning_rate": 3.68520057059552e-06, + "loss": 0.1434, + "step": 105120 + }, + { + "epoch": 4.91, + "learning_rate": 3.6844167855407336e-06, + "loss": 0.564, + "step": 105125 + }, + { + "epoch": 4.91, + "learning_rate": 3.683633000485947e-06, + "loss": 0.0735, + "step": 105130 + }, + { + "epoch": 4.91, + "learning_rate": 3.6828492154311606e-06, + "loss": 0.0766, + "step": 105135 + }, + { + "epoch": 4.91, + "learning_rate": 3.6820654303763736e-06, + "loss": 0.0182, + "step": 105140 + }, + { + "epoch": 4.91, + "learning_rate": 3.681281645321587e-06, + "loss": 0.0329, + "step": 105145 + }, + { + "epoch": 4.91, + "learning_rate": 3.6804978602668006e-06, + "loss": 0.0504, + "step": 105150 + }, + { + "epoch": 4.91, + "learning_rate": 3.679714075212014e-06, + "loss": 0.0693, + "step": 105155 + }, + { + "epoch": 4.91, + "learning_rate": 3.6789302901572275e-06, + "loss": 0.1481, + "step": 105160 + }, + { + "epoch": 4.91, + "learning_rate": 3.678146505102441e-06, + "loss": 0.1619, + "step": 105165 + }, + { + "epoch": 4.91, + "learning_rate": 3.677362720047654e-06, + "loss": 0.197, + "step": 105170 + }, + { + "epoch": 4.91, + "learning_rate": 3.6765789349928676e-06, + "loss": 0.2148, + "step": 105175 + }, + { + "epoch": 4.91, + "learning_rate": 3.675795149938081e-06, + "loss": 0.0953, + "step": 105180 + }, + { + "epoch": 4.91, + "learning_rate": 3.6750113648832945e-06, + "loss": 0.0041, + "step": 105185 + }, + { + "epoch": 4.91, + "learning_rate": 3.674227579828508e-06, + "loss": 0.028, + "step": 105190 + }, + { + "epoch": 4.91, + "learning_rate": 3.6734437947737215e-06, + "loss": 0.0801, + "step": 105195 + }, + { + "epoch": 4.91, + "learning_rate": 3.6726600097189354e-06, + "loss": 0.0918, + "step": 105200 + }, + { + "epoch": 4.91, + "learning_rate": 3.671876224664148e-06, + "loss": 0.1148, + "step": 105205 + }, + { + "epoch": 4.91, + "learning_rate": 3.6710924396093615e-06, + "loss": 0.0428, + "step": 105210 + }, + { + "epoch": 4.91, + "learning_rate": 3.670308654554575e-06, + "loss": 0.1085, + "step": 105215 + }, + { + "epoch": 4.91, + "learning_rate": 3.669524869499789e-06, + "loss": 0.1252, + "step": 105220 + }, + { + "epoch": 4.91, + "learning_rate": 3.6687410844450024e-06, + "loss": 0.2745, + "step": 105225 + }, + { + "epoch": 4.91, + "learning_rate": 3.667957299390216e-06, + "loss": 0.0748, + "step": 105230 + }, + { + "epoch": 4.91, + "learning_rate": 3.6671735143354285e-06, + "loss": 0.0161, + "step": 105235 + }, + { + "epoch": 4.91, + "learning_rate": 3.666389729280642e-06, + "loss": 0.0287, + "step": 105240 + }, + { + "epoch": 4.91, + "learning_rate": 3.665605944225856e-06, + "loss": 0.0626, + "step": 105245 + }, + { + "epoch": 4.91, + "learning_rate": 3.6648221591710694e-06, + "loss": 0.1104, + "step": 105250 + }, + { + "epoch": 4.91, + "learning_rate": 3.664038374116283e-06, + "loss": 0.0796, + "step": 105255 + }, + { + "epoch": 4.91, + "learning_rate": 3.6632545890614963e-06, + "loss": 0.1188, + "step": 105260 + }, + { + "epoch": 4.91, + "learning_rate": 3.66247080400671e-06, + "loss": 0.1062, + "step": 105265 + }, + { + "epoch": 4.91, + "learning_rate": 3.661687018951923e-06, + "loss": 0.1849, + "step": 105270 + }, + { + "epoch": 4.91, + "learning_rate": 3.6609032338971364e-06, + "loss": 0.3491, + "step": 105275 + }, + { + "epoch": 4.91, + "learning_rate": 3.66011944884235e-06, + "loss": 0.1149, + "step": 105280 + }, + { + "epoch": 4.91, + "learning_rate": 3.6593356637875633e-06, + "loss": 0.0833, + "step": 105285 + }, + { + "epoch": 4.91, + "learning_rate": 3.658551878732777e-06, + "loss": 0.0371, + "step": 105290 + }, + { + "epoch": 4.91, + "learning_rate": 3.6577680936779903e-06, + "loss": 0.0682, + "step": 105295 + }, + { + "epoch": 4.91, + "learning_rate": 3.6569843086232038e-06, + "loss": 0.0367, + "step": 105300 + }, + { + "epoch": 4.91, + "learning_rate": 3.656200523568417e-06, + "loss": 0.0784, + "step": 105305 + }, + { + "epoch": 4.91, + "learning_rate": 3.6554167385136303e-06, + "loss": 0.1564, + "step": 105310 + }, + { + "epoch": 4.91, + "learning_rate": 3.654632953458844e-06, + "loss": 0.1572, + "step": 105315 + }, + { + "epoch": 4.91, + "learning_rate": 3.6538491684040573e-06, + "loss": 0.1604, + "step": 105320 + }, + { + "epoch": 4.91, + "learning_rate": 3.6530653833492708e-06, + "loss": 0.2611, + "step": 105325 + }, + { + "epoch": 4.91, + "learning_rate": 3.6522815982944843e-06, + "loss": 0.1115, + "step": 105330 + }, + { + "epoch": 4.92, + "learning_rate": 3.6514978132396973e-06, + "loss": 0.0122, + "step": 105335 + }, + { + "epoch": 4.92, + "learning_rate": 3.650714028184911e-06, + "loss": 0.0497, + "step": 105340 + }, + { + "epoch": 4.92, + "learning_rate": 3.6499302431301243e-06, + "loss": 0.0518, + "step": 105345 + }, + { + "epoch": 4.92, + "learning_rate": 3.6491464580753378e-06, + "loss": 0.0271, + "step": 105350 + }, + { + "epoch": 4.92, + "learning_rate": 3.6483626730205512e-06, + "loss": 0.1048, + "step": 105355 + }, + { + "epoch": 4.92, + "learning_rate": 3.6475788879657647e-06, + "loss": 0.1923, + "step": 105360 + }, + { + "epoch": 4.92, + "learning_rate": 3.646795102910978e-06, + "loss": 0.1279, + "step": 105365 + }, + { + "epoch": 4.92, + "learning_rate": 3.6460113178561913e-06, + "loss": 0.1805, + "step": 105370 + }, + { + "epoch": 4.92, + "learning_rate": 3.6452275328014047e-06, + "loss": 0.3541, + "step": 105375 + }, + { + "epoch": 4.92, + "learning_rate": 3.6444437477466182e-06, + "loss": 0.0504, + "step": 105380 + }, + { + "epoch": 4.92, + "learning_rate": 3.6436599626918317e-06, + "loss": 0.0187, + "step": 105385 + }, + { + "epoch": 4.92, + "learning_rate": 3.642876177637045e-06, + "loss": 0.0183, + "step": 105390 + }, + { + "epoch": 4.92, + "learning_rate": 3.6420923925822587e-06, + "loss": 0.0423, + "step": 105395 + }, + { + "epoch": 4.92, + "learning_rate": 3.6413086075274717e-06, + "loss": 0.0516, + "step": 105400 + }, + { + "epoch": 4.92, + "learning_rate": 3.6405248224726852e-06, + "loss": 0.0368, + "step": 105405 + }, + { + "epoch": 4.92, + "learning_rate": 3.6397410374178987e-06, + "loss": 0.1547, + "step": 105410 + }, + { + "epoch": 4.92, + "learning_rate": 3.638957252363112e-06, + "loss": 0.085, + "step": 105415 + }, + { + "epoch": 4.92, + "learning_rate": 3.6381734673083257e-06, + "loss": 0.2139, + "step": 105420 + }, + { + "epoch": 4.92, + "learning_rate": 3.637389682253539e-06, + "loss": 0.3813, + "step": 105425 + }, + { + "epoch": 4.92, + "learning_rate": 3.6366058971987526e-06, + "loss": 0.0819, + "step": 105430 + }, + { + "epoch": 4.92, + "learning_rate": 3.6358221121439657e-06, + "loss": 0.0529, + "step": 105435 + }, + { + "epoch": 4.92, + "learning_rate": 3.635038327089179e-06, + "loss": 0.0524, + "step": 105440 + }, + { + "epoch": 4.92, + "learning_rate": 3.6342545420343926e-06, + "loss": 0.041, + "step": 105445 + }, + { + "epoch": 4.92, + "learning_rate": 3.633470756979606e-06, + "loss": 0.0679, + "step": 105450 + }, + { + "epoch": 4.92, + "learning_rate": 3.6326869719248196e-06, + "loss": 0.0766, + "step": 105455 + }, + { + "epoch": 4.92, + "learning_rate": 3.631903186870033e-06, + "loss": 0.1548, + "step": 105460 + }, + { + "epoch": 4.92, + "learning_rate": 3.631119401815246e-06, + "loss": 0.1758, + "step": 105465 + }, + { + "epoch": 4.92, + "learning_rate": 3.6303356167604596e-06, + "loss": 0.1583, + "step": 105470 + }, + { + "epoch": 4.92, + "learning_rate": 3.629551831705673e-06, + "loss": 0.2218, + "step": 105475 + }, + { + "epoch": 4.92, + "learning_rate": 3.6287680466508866e-06, + "loss": 0.056, + "step": 105480 + }, + { + "epoch": 4.92, + "learning_rate": 3.6279842615961e-06, + "loss": 0.03, + "step": 105485 + }, + { + "epoch": 4.92, + "learning_rate": 3.627200476541314e-06, + "loss": 0.0319, + "step": 105490 + }, + { + "epoch": 4.92, + "learning_rate": 3.6264166914865275e-06, + "loss": 0.0156, + "step": 105495 + }, + { + "epoch": 4.92, + "learning_rate": 3.62563290643174e-06, + "loss": 0.0721, + "step": 105500 + }, + { + "epoch": 4.92, + "learning_rate": 3.6248491213769536e-06, + "loss": 0.0434, + "step": 105505 + }, + { + "epoch": 4.92, + "learning_rate": 3.6240653363221675e-06, + "loss": 0.0501, + "step": 105510 + }, + { + "epoch": 4.92, + "learning_rate": 3.623281551267381e-06, + "loss": 0.1003, + "step": 105515 + }, + { + "epoch": 4.92, + "learning_rate": 3.6224977662125945e-06, + "loss": 0.1763, + "step": 105520 + }, + { + "epoch": 4.92, + "learning_rate": 3.621713981157808e-06, + "loss": 0.1113, + "step": 105525 + }, + { + "epoch": 4.92, + "learning_rate": 3.6209301961030206e-06, + "loss": 0.1485, + "step": 105530 + }, + { + "epoch": 4.92, + "learning_rate": 3.6201464110482345e-06, + "loss": 0.0027, + "step": 105535 + }, + { + "epoch": 4.92, + "learning_rate": 3.619362625993448e-06, + "loss": 0.0314, + "step": 105540 + }, + { + "epoch": 4.92, + "learning_rate": 3.6185788409386614e-06, + "loss": 0.0288, + "step": 105545 + }, + { + "epoch": 4.93, + "learning_rate": 3.617795055883875e-06, + "loss": 0.0805, + "step": 105550 + }, + { + "epoch": 4.93, + "learning_rate": 3.6170112708290884e-06, + "loss": 0.0234, + "step": 105555 + }, + { + "epoch": 4.93, + "learning_rate": 3.616227485774302e-06, + "loss": 0.0523, + "step": 105560 + }, + { + "epoch": 4.93, + "learning_rate": 3.615443700719515e-06, + "loss": 0.1578, + "step": 105565 + }, + { + "epoch": 4.93, + "learning_rate": 3.6146599156647284e-06, + "loss": 0.1157, + "step": 105570 + }, + { + "epoch": 4.93, + "learning_rate": 3.613876130609942e-06, + "loss": 0.1026, + "step": 105575 + }, + { + "epoch": 4.93, + "learning_rate": 3.6130923455551554e-06, + "loss": 0.0726, + "step": 105580 + }, + { + "epoch": 4.93, + "learning_rate": 3.612308560500369e-06, + "loss": 0.0271, + "step": 105585 + }, + { + "epoch": 4.93, + "learning_rate": 3.6115247754455824e-06, + "loss": 0.03, + "step": 105590 + }, + { + "epoch": 4.93, + "learning_rate": 3.6107409903907954e-06, + "loss": 0.0297, + "step": 105595 + }, + { + "epoch": 4.93, + "learning_rate": 3.609957205336009e-06, + "loss": 0.0933, + "step": 105600 + }, + { + "epoch": 4.93, + "learning_rate": 3.6091734202812224e-06, + "loss": 0.0728, + "step": 105605 + }, + { + "epoch": 4.93, + "learning_rate": 3.608389635226436e-06, + "loss": 0.0844, + "step": 105610 + }, + { + "epoch": 4.93, + "learning_rate": 3.6076058501716494e-06, + "loss": 0.0902, + "step": 105615 + }, + { + "epoch": 4.93, + "learning_rate": 3.606822065116863e-06, + "loss": 0.1571, + "step": 105620 + }, + { + "epoch": 4.93, + "learning_rate": 3.6060382800620763e-06, + "loss": 0.3609, + "step": 105625 + }, + { + "epoch": 4.93, + "learning_rate": 3.6052544950072894e-06, + "loss": 0.1032, + "step": 105630 + }, + { + "epoch": 4.93, + "learning_rate": 3.604470709952503e-06, + "loss": 0.0413, + "step": 105635 + }, + { + "epoch": 4.93, + "learning_rate": 3.6036869248977163e-06, + "loss": 0.0128, + "step": 105640 + }, + { + "epoch": 4.93, + "learning_rate": 3.60290313984293e-06, + "loss": 0.0673, + "step": 105645 + }, + { + "epoch": 4.93, + "learning_rate": 3.6021193547881433e-06, + "loss": 0.0585, + "step": 105650 + }, + { + "epoch": 4.93, + "learning_rate": 3.6013355697333568e-06, + "loss": 0.0956, + "step": 105655 + }, + { + "epoch": 4.93, + "learning_rate": 3.60055178467857e-06, + "loss": 0.1386, + "step": 105660 + }, + { + "epoch": 4.93, + "learning_rate": 3.5997679996237833e-06, + "loss": 0.0706, + "step": 105665 + }, + { + "epoch": 4.93, + "learning_rate": 3.598984214568997e-06, + "loss": 0.0991, + "step": 105670 + }, + { + "epoch": 4.93, + "learning_rate": 3.5982004295142103e-06, + "loss": 0.2258, + "step": 105675 + }, + { + "epoch": 4.93, + "learning_rate": 3.5974166444594238e-06, + "loss": 0.0855, + "step": 105680 + }, + { + "epoch": 4.93, + "learning_rate": 3.5966328594046373e-06, + "loss": 0.0263, + "step": 105685 + }, + { + "epoch": 4.93, + "learning_rate": 3.5958490743498507e-06, + "loss": 0.0155, + "step": 105690 + }, + { + "epoch": 4.93, + "learning_rate": 3.595065289295064e-06, + "loss": 0.0519, + "step": 105695 + }, + { + "epoch": 4.93, + "learning_rate": 3.5942815042402773e-06, + "loss": 0.0827, + "step": 105700 + }, + { + "epoch": 4.93, + "learning_rate": 3.5934977191854908e-06, + "loss": 0.0672, + "step": 105705 + }, + { + "epoch": 4.93, + "learning_rate": 3.5927139341307042e-06, + "loss": 0.105, + "step": 105710 + }, + { + "epoch": 4.93, + "learning_rate": 3.5919301490759177e-06, + "loss": 0.0844, + "step": 105715 + }, + { + "epoch": 4.93, + "learning_rate": 3.591146364021131e-06, + "loss": 0.1419, + "step": 105720 + }, + { + "epoch": 4.93, + "learning_rate": 3.5903625789663443e-06, + "loss": 0.3696, + "step": 105725 + }, + { + "epoch": 4.93, + "learning_rate": 3.5895787939115577e-06, + "loss": 0.077, + "step": 105730 + }, + { + "epoch": 4.93, + "learning_rate": 3.5887950088567712e-06, + "loss": 0.0292, + "step": 105735 + }, + { + "epoch": 4.93, + "learning_rate": 3.5880112238019847e-06, + "loss": 0.0378, + "step": 105740 + }, + { + "epoch": 4.93, + "learning_rate": 3.587227438747198e-06, + "loss": 0.0408, + "step": 105745 + }, + { + "epoch": 4.93, + "learning_rate": 3.5864436536924117e-06, + "loss": 0.0388, + "step": 105750 + }, + { + "epoch": 4.93, + "learning_rate": 3.5856598686376256e-06, + "loss": 0.0425, + "step": 105755 + }, + { + "epoch": 4.93, + "learning_rate": 3.5848760835828382e-06, + "loss": 0.0547, + "step": 105760 + }, + { + "epoch": 4.94, + "learning_rate": 3.5840922985280517e-06, + "loss": 0.1345, + "step": 105765 + }, + { + "epoch": 4.94, + "learning_rate": 3.583308513473265e-06, + "loss": 0.1975, + "step": 105770 + }, + { + "epoch": 4.94, + "learning_rate": 3.5825247284184787e-06, + "loss": 0.2356, + "step": 105775 + }, + { + "epoch": 4.94, + "learning_rate": 3.5817409433636926e-06, + "loss": 0.0878, + "step": 105780 + }, + { + "epoch": 4.94, + "learning_rate": 3.580957158308906e-06, + "loss": 0.0163, + "step": 105785 + }, + { + "epoch": 4.94, + "learning_rate": 3.5801733732541187e-06, + "loss": 0.0549, + "step": 105790 + }, + { + "epoch": 4.94, + "learning_rate": 3.579389588199332e-06, + "loss": 0.0457, + "step": 105795 + }, + { + "epoch": 4.94, + "learning_rate": 3.578605803144546e-06, + "loss": 0.022, + "step": 105800 + }, + { + "epoch": 4.94, + "learning_rate": 3.5778220180897596e-06, + "loss": 0.0487, + "step": 105805 + }, + { + "epoch": 4.94, + "learning_rate": 3.577038233034973e-06, + "loss": 0.0787, + "step": 105810 + }, + { + "epoch": 4.94, + "learning_rate": 3.5762544479801865e-06, + "loss": 0.104, + "step": 105815 + }, + { + "epoch": 4.94, + "learning_rate": 3.5754706629254e-06, + "loss": 0.0664, + "step": 105820 + }, + { + "epoch": 4.94, + "learning_rate": 3.574686877870613e-06, + "loss": 0.2142, + "step": 105825 + }, + { + "epoch": 4.94, + "learning_rate": 3.5739030928158265e-06, + "loss": 0.0669, + "step": 105830 + }, + { + "epoch": 4.94, + "learning_rate": 3.57311930776104e-06, + "loss": 0.0148, + "step": 105835 + }, + { + "epoch": 4.94, + "learning_rate": 3.5723355227062535e-06, + "loss": 0.0472, + "step": 105840 + }, + { + "epoch": 4.94, + "learning_rate": 3.571551737651467e-06, + "loss": 0.0227, + "step": 105845 + }, + { + "epoch": 4.94, + "learning_rate": 3.5707679525966805e-06, + "loss": 0.0485, + "step": 105850 + }, + { + "epoch": 4.94, + "learning_rate": 3.5699841675418935e-06, + "loss": 0.052, + "step": 105855 + }, + { + "epoch": 4.94, + "learning_rate": 3.569200382487107e-06, + "loss": 0.0552, + "step": 105860 + }, + { + "epoch": 4.94, + "learning_rate": 3.5684165974323205e-06, + "loss": 0.1724, + "step": 105865 + }, + { + "epoch": 4.94, + "learning_rate": 3.567632812377534e-06, + "loss": 0.2415, + "step": 105870 + }, + { + "epoch": 4.94, + "learning_rate": 3.5668490273227475e-06, + "loss": 0.252, + "step": 105875 + }, + { + "epoch": 4.94, + "learning_rate": 3.566065242267961e-06, + "loss": 0.0695, + "step": 105880 + }, + { + "epoch": 4.94, + "learning_rate": 3.5652814572131744e-06, + "loss": 0.0181, + "step": 105885 + }, + { + "epoch": 4.94, + "learning_rate": 3.5644976721583875e-06, + "loss": 0.0542, + "step": 105890 + }, + { + "epoch": 4.94, + "learning_rate": 3.563713887103601e-06, + "loss": 0.0415, + "step": 105895 + }, + { + "epoch": 4.94, + "learning_rate": 3.5629301020488145e-06, + "loss": 0.0706, + "step": 105900 + }, + { + "epoch": 4.94, + "learning_rate": 3.562146316994028e-06, + "loss": 0.1483, + "step": 105905 + }, + { + "epoch": 4.94, + "learning_rate": 3.5613625319392414e-06, + "loss": 0.0904, + "step": 105910 + }, + { + "epoch": 4.94, + "learning_rate": 3.560578746884455e-06, + "loss": 0.1684, + "step": 105915 + }, + { + "epoch": 4.94, + "learning_rate": 3.559794961829668e-06, + "loss": 0.2194, + "step": 105920 + }, + { + "epoch": 4.94, + "learning_rate": 3.5590111767748814e-06, + "loss": 0.2801, + "step": 105925 + }, + { + "epoch": 4.94, + "learning_rate": 3.558227391720095e-06, + "loss": 0.1085, + "step": 105930 + }, + { + "epoch": 4.94, + "learning_rate": 3.5574436066653084e-06, + "loss": 0.026, + "step": 105935 + }, + { + "epoch": 4.94, + "learning_rate": 3.556659821610522e-06, + "loss": 0.0574, + "step": 105940 + }, + { + "epoch": 4.94, + "learning_rate": 3.5558760365557354e-06, + "loss": 0.0476, + "step": 105945 + }, + { + "epoch": 4.94, + "learning_rate": 3.555092251500949e-06, + "loss": 0.0264, + "step": 105950 + }, + { + "epoch": 4.94, + "learning_rate": 3.554308466446162e-06, + "loss": 0.1067, + "step": 105955 + }, + { + "epoch": 4.94, + "learning_rate": 3.5535246813913754e-06, + "loss": 0.2285, + "step": 105960 + }, + { + "epoch": 4.94, + "learning_rate": 3.552740896336589e-06, + "loss": 0.0687, + "step": 105965 + }, + { + "epoch": 4.94, + "learning_rate": 3.5519571112818024e-06, + "loss": 0.169, + "step": 105970 + }, + { + "epoch": 4.94, + "learning_rate": 3.551173326227016e-06, + "loss": 0.2477, + "step": 105975 + }, + { + "epoch": 4.95, + "learning_rate": 3.5503895411722293e-06, + "loss": 0.1724, + "step": 105980 + }, + { + "epoch": 4.95, + "learning_rate": 3.5496057561174424e-06, + "loss": 0.0145, + "step": 105985 + }, + { + "epoch": 4.95, + "learning_rate": 3.548821971062656e-06, + "loss": 0.0335, + "step": 105990 + }, + { + "epoch": 4.95, + "learning_rate": 3.5480381860078693e-06, + "loss": 0.0486, + "step": 105995 + }, + { + "epoch": 4.95, + "learning_rate": 3.547254400953083e-06, + "loss": 0.0692, + "step": 106000 + }, + { + "epoch": 4.95, + "learning_rate": 3.5464706158982963e-06, + "loss": 0.0605, + "step": 106005 + }, + { + "epoch": 4.95, + "learning_rate": 3.54568683084351e-06, + "loss": 0.0902, + "step": 106010 + }, + { + "epoch": 4.95, + "learning_rate": 3.5449030457887233e-06, + "loss": 0.0785, + "step": 106015 + }, + { + "epoch": 4.95, + "learning_rate": 3.5441192607339363e-06, + "loss": 0.2275, + "step": 106020 + }, + { + "epoch": 4.95, + "learning_rate": 3.54333547567915e-06, + "loss": 0.2043, + "step": 106025 + }, + { + "epoch": 4.95, + "learning_rate": 3.5425516906243633e-06, + "loss": 0.1101, + "step": 106030 + }, + { + "epoch": 4.95, + "learning_rate": 3.5417679055695768e-06, + "loss": 0.025, + "step": 106035 + }, + { + "epoch": 4.95, + "learning_rate": 3.5409841205147903e-06, + "loss": 0.0647, + "step": 106040 + }, + { + "epoch": 4.95, + "learning_rate": 3.540200335460004e-06, + "loss": 0.0437, + "step": 106045 + }, + { + "epoch": 4.95, + "learning_rate": 3.539416550405217e-06, + "loss": 0.0448, + "step": 106050 + }, + { + "epoch": 4.95, + "learning_rate": 3.5386327653504303e-06, + "loss": 0.0725, + "step": 106055 + }, + { + "epoch": 4.95, + "learning_rate": 3.5378489802956438e-06, + "loss": 0.227, + "step": 106060 + }, + { + "epoch": 4.95, + "learning_rate": 3.5370651952408572e-06, + "loss": 0.1955, + "step": 106065 + }, + { + "epoch": 4.95, + "learning_rate": 3.536281410186071e-06, + "loss": 0.081, + "step": 106070 + }, + { + "epoch": 4.95, + "learning_rate": 3.5354976251312846e-06, + "loss": 0.254, + "step": 106075 + }, + { + "epoch": 4.95, + "learning_rate": 3.534713840076498e-06, + "loss": 0.0661, + "step": 106080 + }, + { + "epoch": 4.95, + "learning_rate": 3.5339300550217108e-06, + "loss": 0.0277, + "step": 106085 + }, + { + "epoch": 4.95, + "learning_rate": 3.5331462699669242e-06, + "loss": 0.0263, + "step": 106090 + }, + { + "epoch": 4.95, + "learning_rate": 3.532362484912138e-06, + "loss": 0.032, + "step": 106095 + }, + { + "epoch": 4.95, + "learning_rate": 3.5315786998573516e-06, + "loss": 0.0333, + "step": 106100 + }, + { + "epoch": 4.95, + "learning_rate": 3.530794914802565e-06, + "loss": 0.0421, + "step": 106105 + }, + { + "epoch": 4.95, + "learning_rate": 3.5300111297477786e-06, + "loss": 0.0675, + "step": 106110 + }, + { + "epoch": 4.95, + "learning_rate": 3.5292273446929916e-06, + "loss": 0.4258, + "step": 106115 + }, + { + "epoch": 4.95, + "learning_rate": 3.528443559638205e-06, + "loss": 0.161, + "step": 106120 + }, + { + "epoch": 4.95, + "learning_rate": 3.5276597745834186e-06, + "loss": 0.278, + "step": 106125 + }, + { + "epoch": 4.95, + "learning_rate": 3.526875989528632e-06, + "loss": 0.0798, + "step": 106130 + }, + { + "epoch": 4.95, + "learning_rate": 3.5260922044738456e-06, + "loss": 0.0228, + "step": 106135 + }, + { + "epoch": 4.95, + "learning_rate": 3.525308419419059e-06, + "loss": 0.0544, + "step": 106140 + }, + { + "epoch": 4.95, + "learning_rate": 3.5245246343642725e-06, + "loss": 0.0382, + "step": 106145 + }, + { + "epoch": 4.95, + "learning_rate": 3.5237408493094856e-06, + "loss": 0.0646, + "step": 106150 + }, + { + "epoch": 4.95, + "learning_rate": 3.522957064254699e-06, + "loss": 0.0415, + "step": 106155 + }, + { + "epoch": 4.95, + "learning_rate": 3.5221732791999126e-06, + "loss": 0.0916, + "step": 106160 + }, + { + "epoch": 4.95, + "learning_rate": 3.521389494145126e-06, + "loss": 0.107, + "step": 106165 + }, + { + "epoch": 4.95, + "learning_rate": 3.5206057090903395e-06, + "loss": 0.0806, + "step": 106170 + }, + { + "epoch": 4.95, + "learning_rate": 3.519821924035553e-06, + "loss": 0.1467, + "step": 106175 + }, + { + "epoch": 4.95, + "learning_rate": 3.519038138980766e-06, + "loss": 0.0809, + "step": 106180 + }, + { + "epoch": 4.95, + "learning_rate": 3.5182543539259796e-06, + "loss": 0.0925, + "step": 106185 + }, + { + "epoch": 4.95, + "learning_rate": 3.517470568871193e-06, + "loss": 0.0654, + "step": 106190 + }, + { + "epoch": 4.96, + "learning_rate": 3.5166867838164065e-06, + "loss": 0.043, + "step": 106195 + }, + { + "epoch": 4.96, + "learning_rate": 3.51590299876162e-06, + "loss": 0.1209, + "step": 106200 + }, + { + "epoch": 4.96, + "learning_rate": 3.5151192137068335e-06, + "loss": 0.0499, + "step": 106205 + }, + { + "epoch": 4.96, + "learning_rate": 3.514335428652047e-06, + "loss": 0.0794, + "step": 106210 + }, + { + "epoch": 4.96, + "learning_rate": 3.51355164359726e-06, + "loss": 0.091, + "step": 106215 + }, + { + "epoch": 4.96, + "learning_rate": 3.5127678585424735e-06, + "loss": 0.0741, + "step": 106220 + }, + { + "epoch": 4.96, + "learning_rate": 3.511984073487687e-06, + "loss": 0.2565, + "step": 106225 + }, + { + "epoch": 4.96, + "learning_rate": 3.5112002884329005e-06, + "loss": 0.0434, + "step": 106230 + }, + { + "epoch": 4.96, + "learning_rate": 3.510416503378114e-06, + "loss": 0.0221, + "step": 106235 + }, + { + "epoch": 4.96, + "learning_rate": 3.5096327183233274e-06, + "loss": 0.0363, + "step": 106240 + }, + { + "epoch": 4.96, + "learning_rate": 3.5088489332685405e-06, + "loss": 0.0437, + "step": 106245 + }, + { + "epoch": 4.96, + "learning_rate": 3.508065148213754e-06, + "loss": 0.0652, + "step": 106250 + }, + { + "epoch": 4.96, + "learning_rate": 3.5072813631589675e-06, + "loss": 0.0989, + "step": 106255 + }, + { + "epoch": 4.96, + "learning_rate": 3.506497578104181e-06, + "loss": 0.1147, + "step": 106260 + }, + { + "epoch": 4.96, + "learning_rate": 3.5057137930493944e-06, + "loss": 0.1318, + "step": 106265 + }, + { + "epoch": 4.96, + "learning_rate": 3.504930007994608e-06, + "loss": 0.157, + "step": 106270 + }, + { + "epoch": 4.96, + "learning_rate": 3.5041462229398214e-06, + "loss": 0.4112, + "step": 106275 + }, + { + "epoch": 4.96, + "learning_rate": 3.5033624378850344e-06, + "loss": 0.0757, + "step": 106280 + }, + { + "epoch": 4.96, + "learning_rate": 3.502578652830248e-06, + "loss": 0.0156, + "step": 106285 + }, + { + "epoch": 4.96, + "learning_rate": 3.5017948677754614e-06, + "loss": 0.0737, + "step": 106290 + }, + { + "epoch": 4.96, + "learning_rate": 3.501011082720675e-06, + "loss": 0.0418, + "step": 106295 + }, + { + "epoch": 4.96, + "learning_rate": 3.5002272976658884e-06, + "loss": 0.0456, + "step": 106300 + }, + { + "epoch": 4.96, + "learning_rate": 3.499443512611102e-06, + "loss": 0.0979, + "step": 106305 + }, + { + "epoch": 4.96, + "learning_rate": 3.498659727556315e-06, + "loss": 0.0544, + "step": 106310 + }, + { + "epoch": 4.96, + "learning_rate": 3.4978759425015284e-06, + "loss": 0.1414, + "step": 106315 + }, + { + "epoch": 4.96, + "learning_rate": 3.497092157446742e-06, + "loss": 0.0642, + "step": 106320 + }, + { + "epoch": 4.96, + "learning_rate": 3.4963083723919554e-06, + "loss": 0.2217, + "step": 106325 + }, + { + "epoch": 4.96, + "learning_rate": 3.495524587337169e-06, + "loss": 0.075, + "step": 106330 + }, + { + "epoch": 4.96, + "learning_rate": 3.4947408022823827e-06, + "loss": 0.0124, + "step": 106335 + }, + { + "epoch": 4.96, + "learning_rate": 3.4939570172275962e-06, + "loss": 0.0152, + "step": 106340 + }, + { + "epoch": 4.96, + "learning_rate": 3.493173232172809e-06, + "loss": 0.0246, + "step": 106345 + }, + { + "epoch": 4.96, + "learning_rate": 3.4923894471180223e-06, + "loss": 0.0479, + "step": 106350 + }, + { + "epoch": 4.96, + "learning_rate": 3.491605662063236e-06, + "loss": 0.0742, + "step": 106355 + }, + { + "epoch": 4.96, + "learning_rate": 3.4908218770084497e-06, + "loss": 0.0471, + "step": 106360 + }, + { + "epoch": 4.96, + "learning_rate": 3.4900380919536632e-06, + "loss": 0.1336, + "step": 106365 + }, + { + "epoch": 4.96, + "learning_rate": 3.4892543068988767e-06, + "loss": 0.1388, + "step": 106370 + }, + { + "epoch": 4.96, + "learning_rate": 3.4884705218440893e-06, + "loss": 0.3218, + "step": 106375 + }, + { + "epoch": 4.96, + "learning_rate": 3.487686736789303e-06, + "loss": 0.0743, + "step": 106380 + }, + { + "epoch": 4.96, + "learning_rate": 3.4869029517345167e-06, + "loss": 0.0092, + "step": 106385 + }, + { + "epoch": 4.96, + "learning_rate": 3.48611916667973e-06, + "loss": 0.0394, + "step": 106390 + }, + { + "epoch": 4.96, + "learning_rate": 3.4853353816249437e-06, + "loss": 0.0466, + "step": 106395 + }, + { + "epoch": 4.96, + "learning_rate": 3.484551596570157e-06, + "loss": 0.0722, + "step": 106400 + }, + { + "epoch": 4.97, + "learning_rate": 3.4837678115153707e-06, + "loss": 0.0559, + "step": 106405 + }, + { + "epoch": 4.97, + "learning_rate": 3.4829840264605837e-06, + "loss": 0.1109, + "step": 106410 + }, + { + "epoch": 4.97, + "learning_rate": 3.482200241405797e-06, + "loss": 0.1401, + "step": 106415 + }, + { + "epoch": 4.97, + "learning_rate": 3.4814164563510107e-06, + "loss": 0.128, + "step": 106420 + }, + { + "epoch": 4.97, + "learning_rate": 3.480632671296224e-06, + "loss": 0.3628, + "step": 106425 + }, + { + "epoch": 4.97, + "learning_rate": 3.4798488862414376e-06, + "loss": 0.1111, + "step": 106430 + }, + { + "epoch": 4.97, + "learning_rate": 3.479065101186651e-06, + "loss": 0.0278, + "step": 106435 + }, + { + "epoch": 4.97, + "learning_rate": 3.478281316131864e-06, + "loss": 0.0346, + "step": 106440 + }, + { + "epoch": 4.97, + "learning_rate": 3.4774975310770777e-06, + "loss": 0.0373, + "step": 106445 + }, + { + "epoch": 4.97, + "learning_rate": 3.476713746022291e-06, + "loss": 0.0616, + "step": 106450 + }, + { + "epoch": 4.97, + "learning_rate": 3.4759299609675046e-06, + "loss": 0.0689, + "step": 106455 + }, + { + "epoch": 4.97, + "learning_rate": 3.475146175912718e-06, + "loss": 0.0737, + "step": 106460 + }, + { + "epoch": 4.97, + "learning_rate": 3.4743623908579316e-06, + "loss": 0.1977, + "step": 106465 + }, + { + "epoch": 4.97, + "learning_rate": 3.473578605803145e-06, + "loss": 0.1243, + "step": 106470 + }, + { + "epoch": 4.97, + "learning_rate": 3.472794820748358e-06, + "loss": 0.2419, + "step": 106475 + }, + { + "epoch": 4.97, + "learning_rate": 3.4720110356935716e-06, + "loss": 0.1013, + "step": 106480 + }, + { + "epoch": 4.97, + "learning_rate": 3.471227250638785e-06, + "loss": 0.0313, + "step": 106485 + }, + { + "epoch": 4.97, + "learning_rate": 3.4704434655839986e-06, + "loss": 0.0174, + "step": 106490 + }, + { + "epoch": 4.97, + "learning_rate": 3.469659680529212e-06, + "loss": 0.0356, + "step": 106495 + }, + { + "epoch": 4.97, + "learning_rate": 3.4688758954744255e-06, + "loss": 0.0787, + "step": 106500 + }, + { + "epoch": 4.97, + "learning_rate": 3.4680921104196386e-06, + "loss": 0.05, + "step": 106505 + }, + { + "epoch": 4.97, + "learning_rate": 3.467308325364852e-06, + "loss": 0.1218, + "step": 106510 + }, + { + "epoch": 4.97, + "learning_rate": 3.4665245403100656e-06, + "loss": 0.0471, + "step": 106515 + }, + { + "epoch": 4.97, + "learning_rate": 3.465740755255279e-06, + "loss": 0.2304, + "step": 106520 + }, + { + "epoch": 4.97, + "learning_rate": 3.4649569702004925e-06, + "loss": 0.3126, + "step": 106525 + }, + { + "epoch": 4.97, + "learning_rate": 3.464173185145706e-06, + "loss": 0.1508, + "step": 106530 + }, + { + "epoch": 4.97, + "learning_rate": 3.4633894000909195e-06, + "loss": 0.0197, + "step": 106535 + }, + { + "epoch": 4.97, + "learning_rate": 3.4626056150361326e-06, + "loss": 0.0725, + "step": 106540 + }, + { + "epoch": 4.97, + "learning_rate": 3.461821829981346e-06, + "loss": 0.0503, + "step": 106545 + }, + { + "epoch": 4.97, + "learning_rate": 3.4610380449265595e-06, + "loss": 0.0553, + "step": 106550 + }, + { + "epoch": 4.97, + "learning_rate": 3.460254259871773e-06, + "loss": 0.0367, + "step": 106555 + }, + { + "epoch": 4.97, + "learning_rate": 3.4594704748169865e-06, + "loss": 0.0992, + "step": 106560 + }, + { + "epoch": 4.97, + "learning_rate": 3.4586866897622e-06, + "loss": 0.0794, + "step": 106565 + }, + { + "epoch": 4.97, + "learning_rate": 3.457902904707413e-06, + "loss": 0.193, + "step": 106570 + }, + { + "epoch": 4.97, + "learning_rate": 3.4571191196526265e-06, + "loss": 0.2616, + "step": 106575 + }, + { + "epoch": 4.97, + "learning_rate": 3.45633533459784e-06, + "loss": 0.1108, + "step": 106580 + }, + { + "epoch": 4.97, + "learning_rate": 3.4555515495430535e-06, + "loss": 0.0291, + "step": 106585 + }, + { + "epoch": 4.97, + "learning_rate": 3.454767764488267e-06, + "loss": 0.0615, + "step": 106590 + }, + { + "epoch": 4.97, + "learning_rate": 3.4539839794334804e-06, + "loss": 0.0407, + "step": 106595 + }, + { + "epoch": 4.97, + "learning_rate": 3.4532001943786943e-06, + "loss": 0.0377, + "step": 106600 + }, + { + "epoch": 4.97, + "learning_rate": 3.452416409323907e-06, + "loss": 0.0444, + "step": 106605 + }, + { + "epoch": 4.97, + "learning_rate": 3.4516326242691205e-06, + "loss": 0.0721, + "step": 106610 + }, + { + "epoch": 4.97, + "learning_rate": 3.450848839214334e-06, + "loss": 0.0693, + "step": 106615 + }, + { + "epoch": 4.98, + "learning_rate": 3.4500650541595474e-06, + "loss": 0.2011, + "step": 106620 + }, + { + "epoch": 4.98, + "learning_rate": 3.4492812691047613e-06, + "loss": 0.3003, + "step": 106625 + }, + { + "epoch": 4.98, + "learning_rate": 3.448497484049975e-06, + "loss": 0.1171, + "step": 106630 + }, + { + "epoch": 4.98, + "learning_rate": 3.4477136989951874e-06, + "loss": 0.0382, + "step": 106635 + }, + { + "epoch": 4.98, + "learning_rate": 3.446929913940401e-06, + "loss": 0.0392, + "step": 106640 + }, + { + "epoch": 4.98, + "learning_rate": 3.4461461288856144e-06, + "loss": 0.0206, + "step": 106645 + }, + { + "epoch": 4.98, + "learning_rate": 3.4453623438308283e-06, + "loss": 0.0474, + "step": 106650 + }, + { + "epoch": 4.98, + "learning_rate": 3.444578558776042e-06, + "loss": 0.0398, + "step": 106655 + }, + { + "epoch": 4.98, + "learning_rate": 3.4437947737212553e-06, + "loss": 0.1376, + "step": 106660 + }, + { + "epoch": 4.98, + "learning_rate": 3.4430109886664688e-06, + "loss": 0.0836, + "step": 106665 + }, + { + "epoch": 4.98, + "learning_rate": 3.4422272036116814e-06, + "loss": 0.168, + "step": 106670 + }, + { + "epoch": 4.98, + "learning_rate": 3.4414434185568953e-06, + "loss": 0.2874, + "step": 106675 + }, + { + "epoch": 4.98, + "learning_rate": 3.4406596335021088e-06, + "loss": 0.0857, + "step": 106680 + }, + { + "epoch": 4.98, + "learning_rate": 3.4398758484473223e-06, + "loss": 0.0456, + "step": 106685 + }, + { + "epoch": 4.98, + "learning_rate": 3.4390920633925358e-06, + "loss": 0.0468, + "step": 106690 + }, + { + "epoch": 4.98, + "learning_rate": 3.4383082783377492e-06, + "loss": 0.0476, + "step": 106695 + }, + { + "epoch": 4.98, + "learning_rate": 3.4375244932829623e-06, + "loss": 0.0507, + "step": 106700 + }, + { + "epoch": 4.98, + "learning_rate": 3.4367407082281758e-06, + "loss": 0.041, + "step": 106705 + }, + { + "epoch": 4.98, + "learning_rate": 3.4359569231733893e-06, + "loss": 0.0779, + "step": 106710 + }, + { + "epoch": 4.98, + "learning_rate": 3.4351731381186027e-06, + "loss": 0.1492, + "step": 106715 + }, + { + "epoch": 4.98, + "learning_rate": 3.4343893530638162e-06, + "loss": 0.1987, + "step": 106720 + }, + { + "epoch": 4.98, + "learning_rate": 3.4336055680090297e-06, + "loss": 0.1688, + "step": 106725 + }, + { + "epoch": 4.98, + "learning_rate": 3.432821782954243e-06, + "loss": 0.1182, + "step": 106730 + }, + { + "epoch": 4.98, + "learning_rate": 3.4320379978994562e-06, + "loss": 0.0452, + "step": 106735 + }, + { + "epoch": 4.98, + "learning_rate": 3.4312542128446697e-06, + "loss": 0.026, + "step": 106740 + }, + { + "epoch": 4.98, + "learning_rate": 3.430470427789883e-06, + "loss": 0.0615, + "step": 106745 + }, + { + "epoch": 4.98, + "learning_rate": 3.4296866427350967e-06, + "loss": 0.054, + "step": 106750 + }, + { + "epoch": 4.98, + "learning_rate": 3.42890285768031e-06, + "loss": 0.0355, + "step": 106755 + }, + { + "epoch": 4.98, + "learning_rate": 3.4281190726255237e-06, + "loss": 0.0659, + "step": 106760 + }, + { + "epoch": 4.98, + "learning_rate": 3.4273352875707367e-06, + "loss": 0.1147, + "step": 106765 + }, + { + "epoch": 4.98, + "learning_rate": 3.42655150251595e-06, + "loss": 0.0859, + "step": 106770 + }, + { + "epoch": 4.98, + "learning_rate": 3.4257677174611637e-06, + "loss": 0.1798, + "step": 106775 + }, + { + "epoch": 4.98, + "learning_rate": 3.424983932406377e-06, + "loss": 0.0941, + "step": 106780 + }, + { + "epoch": 4.98, + "learning_rate": 3.4242001473515906e-06, + "loss": 0.0168, + "step": 106785 + }, + { + "epoch": 4.98, + "learning_rate": 3.423416362296804e-06, + "loss": 0.0317, + "step": 106790 + }, + { + "epoch": 4.98, + "learning_rate": 3.4226325772420176e-06, + "loss": 0.0486, + "step": 106795 + }, + { + "epoch": 4.98, + "learning_rate": 3.4218487921872307e-06, + "loss": 0.0382, + "step": 106800 + }, + { + "epoch": 4.98, + "learning_rate": 3.421065007132444e-06, + "loss": 0.0989, + "step": 106805 + }, + { + "epoch": 4.98, + "learning_rate": 3.4202812220776576e-06, + "loss": 0.0674, + "step": 106810 + }, + { + "epoch": 4.98, + "learning_rate": 3.419497437022871e-06, + "loss": 0.0986, + "step": 106815 + }, + { + "epoch": 4.98, + "learning_rate": 3.4187136519680846e-06, + "loss": 0.1039, + "step": 106820 + }, + { + "epoch": 4.98, + "learning_rate": 3.417929866913298e-06, + "loss": 0.2248, + "step": 106825 + }, + { + "epoch": 4.98, + "learning_rate": 3.417146081858511e-06, + "loss": 0.1063, + "step": 106830 + }, + { + "epoch": 4.99, + "learning_rate": 3.4163622968037246e-06, + "loss": 0.0143, + "step": 106835 + }, + { + "epoch": 4.99, + "learning_rate": 3.415578511748938e-06, + "loss": 0.035, + "step": 106840 + }, + { + "epoch": 4.99, + "learning_rate": 3.4147947266941516e-06, + "loss": 0.0637, + "step": 106845 + }, + { + "epoch": 4.99, + "learning_rate": 3.414010941639365e-06, + "loss": 0.1092, + "step": 106850 + }, + { + "epoch": 4.99, + "learning_rate": 3.4132271565845785e-06, + "loss": 0.1324, + "step": 106855 + }, + { + "epoch": 4.99, + "learning_rate": 3.412443371529792e-06, + "loss": 0.0607, + "step": 106860 + }, + { + "epoch": 4.99, + "learning_rate": 3.411659586475005e-06, + "loss": 0.1144, + "step": 106865 + }, + { + "epoch": 4.99, + "learning_rate": 3.4108758014202186e-06, + "loss": 0.1065, + "step": 106870 + }, + { + "epoch": 4.99, + "learning_rate": 3.410092016365432e-06, + "loss": 0.2655, + "step": 106875 + }, + { + "epoch": 4.99, + "learning_rate": 3.4093082313106455e-06, + "loss": 0.082, + "step": 106880 + }, + { + "epoch": 4.99, + "learning_rate": 3.408524446255859e-06, + "loss": 0.0192, + "step": 106885 + }, + { + "epoch": 4.99, + "learning_rate": 3.407740661201073e-06, + "loss": 0.0386, + "step": 106890 + }, + { + "epoch": 4.99, + "learning_rate": 3.4069568761462856e-06, + "loss": 0.086, + "step": 106895 + }, + { + "epoch": 4.99, + "learning_rate": 3.406173091091499e-06, + "loss": 0.0492, + "step": 106900 + }, + { + "epoch": 4.99, + "learning_rate": 3.4053893060367125e-06, + "loss": 0.1052, + "step": 106905 + }, + { + "epoch": 4.99, + "learning_rate": 3.404605520981926e-06, + "loss": 0.1199, + "step": 106910 + }, + { + "epoch": 4.99, + "learning_rate": 3.40382173592714e-06, + "loss": 0.1372, + "step": 106915 + }, + { + "epoch": 4.99, + "learning_rate": 3.4030379508723534e-06, + "loss": 0.1377, + "step": 106920 + }, + { + "epoch": 4.99, + "learning_rate": 3.402254165817567e-06, + "loss": 0.4274, + "step": 106925 + }, + { + "epoch": 4.99, + "learning_rate": 3.4014703807627795e-06, + "loss": 0.0971, + "step": 106930 + }, + { + "epoch": 4.99, + "learning_rate": 3.400686595707993e-06, + "loss": 0.0229, + "step": 106935 + }, + { + "epoch": 4.99, + "learning_rate": 3.399902810653207e-06, + "loss": 0.0343, + "step": 106940 + }, + { + "epoch": 4.99, + "learning_rate": 3.3991190255984204e-06, + "loss": 0.1156, + "step": 106945 + }, + { + "epoch": 4.99, + "learning_rate": 3.398335240543634e-06, + "loss": 0.0992, + "step": 106950 + }, + { + "epoch": 4.99, + "learning_rate": 3.3975514554888473e-06, + "loss": 0.0567, + "step": 106955 + }, + { + "epoch": 4.99, + "learning_rate": 3.39676767043406e-06, + "loss": 0.1088, + "step": 106960 + }, + { + "epoch": 4.99, + "learning_rate": 3.395983885379274e-06, + "loss": 0.1391, + "step": 106965 + }, + { + "epoch": 4.99, + "learning_rate": 3.3952001003244874e-06, + "loss": 0.2341, + "step": 106970 + }, + { + "epoch": 4.99, + "learning_rate": 3.394416315269701e-06, + "loss": 0.2564, + "step": 106975 + }, + { + "epoch": 4.99, + "learning_rate": 3.3936325302149143e-06, + "loss": 0.0925, + "step": 106980 + }, + { + "epoch": 4.99, + "learning_rate": 3.392848745160128e-06, + "loss": 0.0425, + "step": 106985 + }, + { + "epoch": 4.99, + "learning_rate": 3.3920649601053413e-06, + "loss": 0.0186, + "step": 106990 + }, + { + "epoch": 4.99, + "learning_rate": 3.3912811750505544e-06, + "loss": 0.0554, + "step": 106995 + }, + { + "epoch": 4.99, + "learning_rate": 3.390497389995768e-06, + "loss": 0.0929, + "step": 107000 + }, + { + "epoch": 4.99, + "learning_rate": 3.3897136049409813e-06, + "loss": 0.0481, + "step": 107005 + }, + { + "epoch": 4.99, + "learning_rate": 3.388929819886195e-06, + "loss": 0.1231, + "step": 107010 + }, + { + "epoch": 4.99, + "learning_rate": 3.3881460348314083e-06, + "loss": 0.1097, + "step": 107015 + }, + { + "epoch": 4.99, + "learning_rate": 3.3873622497766218e-06, + "loss": 0.1566, + "step": 107020 + }, + { + "epoch": 4.99, + "learning_rate": 3.386578464721835e-06, + "loss": 0.3127, + "step": 107025 + }, + { + "epoch": 4.99, + "learning_rate": 3.3857946796670483e-06, + "loss": 0.0991, + "step": 107030 + }, + { + "epoch": 4.99, + "learning_rate": 3.385010894612262e-06, + "loss": 0.0376, + "step": 107035 + }, + { + "epoch": 4.99, + "learning_rate": 3.3842271095574753e-06, + "loss": 0.0389, + "step": 107040 + }, + { + "epoch": 4.99, + "learning_rate": 3.3834433245026888e-06, + "loss": 0.0523, + "step": 107045 + }, + { + "epoch": 5.0, + "learning_rate": 3.3826595394479022e-06, + "loss": 0.0397, + "step": 107050 + }, + { + "epoch": 5.0, + "learning_rate": 3.3818757543931157e-06, + "loss": 0.0767, + "step": 107055 + }, + { + "epoch": 5.0, + "learning_rate": 3.3810919693383288e-06, + "loss": 0.0496, + "step": 107060 + }, + { + "epoch": 5.0, + "learning_rate": 3.3803081842835423e-06, + "loss": 0.1208, + "step": 107065 + }, + { + "epoch": 5.0, + "learning_rate": 3.3795243992287557e-06, + "loss": 0.1989, + "step": 107070 + }, + { + "epoch": 5.0, + "learning_rate": 3.3787406141739692e-06, + "loss": 0.1885, + "step": 107075 + }, + { + "epoch": 5.0, + "learning_rate": 3.3779568291191827e-06, + "loss": 0.1041, + "step": 107080 + }, + { + "epoch": 5.0, + "learning_rate": 3.377173044064396e-06, + "loss": 0.0277, + "step": 107085 + }, + { + "epoch": 5.0, + "learning_rate": 3.3763892590096093e-06, + "loss": 0.0263, + "step": 107090 + }, + { + "epoch": 5.0, + "learning_rate": 3.3756054739548227e-06, + "loss": 0.0636, + "step": 107095 + }, + { + "epoch": 5.0, + "learning_rate": 3.3748216889000362e-06, + "loss": 0.1094, + "step": 107100 + }, + { + "epoch": 5.0, + "learning_rate": 3.3740379038452497e-06, + "loss": 0.0627, + "step": 107105 + }, + { + "epoch": 5.0, + "learning_rate": 3.373254118790463e-06, + "loss": 0.1205, + "step": 107110 + }, + { + "epoch": 5.0, + "learning_rate": 3.3724703337356767e-06, + "loss": 0.0908, + "step": 107115 + }, + { + "epoch": 5.0, + "learning_rate": 3.37168654868089e-06, + "loss": 0.109, + "step": 107120 + }, + { + "epoch": 5.0, + "learning_rate": 3.370902763626103e-06, + "loss": 0.157, + "step": 107125 + }, + { + "epoch": 5.0, + "learning_rate": 3.3701189785713167e-06, + "loss": 0.0523, + "step": 107130 + }, + { + "epoch": 5.0, + "learning_rate": 3.36933519351653e-06, + "loss": 0.0538, + "step": 107135 + }, + { + "epoch": 5.0, + "learning_rate": 3.3685514084617436e-06, + "loss": 0.0309, + "step": 107140 + }, + { + "epoch": 5.0, + "learning_rate": 3.367767623406957e-06, + "loss": 0.1201, + "step": 107145 + }, + { + "epoch": 5.0, + "learning_rate": 3.3669838383521706e-06, + "loss": 0.0899, + "step": 107150 + }, + { + "epoch": 5.0, + "learning_rate": 3.3662000532973837e-06, + "loss": 0.3519, + "step": 107155 + }, + { + "epoch": 5.0, + "eval_cer": 0.011018513087304345, + "eval_loss": 0.06931844353675842, + "eval_runtime": 470.9171, + "eval_samples_per_second": 40.453, + "eval_steps_per_second": 5.058, + "eval_wer": 0.09341228719467061, + "step": 107155 + }, + { + "epoch": 5.0, + "learning_rate": 3.365416268242597e-06, + "loss": 0.1422, + "step": 107160 + }, + { + "epoch": 5.0, + "learning_rate": 3.3646324831878106e-06, + "loss": 0.0562, + "step": 107165 + }, + { + "epoch": 5.0, + "learning_rate": 3.363848698133024e-06, + "loss": 0.0277, + "step": 107170 + }, + { + "epoch": 5.0, + "learning_rate": 3.3630649130782376e-06, + "loss": 0.0679, + "step": 107175 + }, + { + "epoch": 5.0, + "learning_rate": 3.3622811280234515e-06, + "loss": 0.066, + "step": 107180 + }, + { + "epoch": 5.0, + "learning_rate": 3.361497342968665e-06, + "loss": 0.1016, + "step": 107185 + }, + { + "epoch": 5.0, + "learning_rate": 3.3607135579138776e-06, + "loss": 0.0465, + "step": 107190 + }, + { + "epoch": 5.0, + "learning_rate": 3.359929772859091e-06, + "loss": 0.0587, + "step": 107195 + }, + { + "epoch": 5.0, + "learning_rate": 3.3591459878043046e-06, + "loss": 0.1986, + "step": 107200 + }, + { + "epoch": 5.0, + "learning_rate": 3.3583622027495185e-06, + "loss": 0.1939, + "step": 107205 + }, + { + "epoch": 5.0, + "learning_rate": 3.357578417694732e-06, + "loss": 0.1557, + "step": 107210 + }, + { + "epoch": 5.0, + "learning_rate": 3.3567946326399455e-06, + "loss": 0.0271, + "step": 107215 + }, + { + "epoch": 5.0, + "learning_rate": 3.356010847585158e-06, + "loss": 0.0215, + "step": 107220 + }, + { + "epoch": 5.0, + "learning_rate": 3.3552270625303716e-06, + "loss": 0.0608, + "step": 107225 + }, + { + "epoch": 5.0, + "learning_rate": 3.3544432774755855e-06, + "loss": 0.1111, + "step": 107230 + }, + { + "epoch": 5.0, + "learning_rate": 3.353659492420799e-06, + "loss": 0.0185, + "step": 107235 + }, + { + "epoch": 5.0, + "learning_rate": 3.3528757073660124e-06, + "loss": 0.0908, + "step": 107240 + }, + { + "epoch": 5.0, + "learning_rate": 3.352091922311226e-06, + "loss": 0.0968, + "step": 107245 + }, + { + "epoch": 5.0, + "learning_rate": 3.3513081372564394e-06, + "loss": 0.1501, + "step": 107250 + }, + { + "epoch": 5.0, + "learning_rate": 3.3505243522016525e-06, + "loss": 0.1508, + "step": 107255 + }, + { + "epoch": 5.0, + "learning_rate": 3.349740567146866e-06, + "loss": 0.0717, + "step": 107260 + }, + { + "epoch": 5.01, + "learning_rate": 3.3489567820920794e-06, + "loss": 0.0202, + "step": 107265 + }, + { + "epoch": 5.01, + "learning_rate": 3.348172997037293e-06, + "loss": 0.0303, + "step": 107270 + }, + { + "epoch": 5.01, + "learning_rate": 3.3473892119825064e-06, + "loss": 0.0496, + "step": 107275 + }, + { + "epoch": 5.01, + "learning_rate": 3.34660542692772e-06, + "loss": 0.0155, + "step": 107280 + }, + { + "epoch": 5.01, + "learning_rate": 3.345821641872933e-06, + "loss": 0.0375, + "step": 107285 + }, + { + "epoch": 5.01, + "learning_rate": 3.3450378568181464e-06, + "loss": 0.0909, + "step": 107290 + }, + { + "epoch": 5.01, + "learning_rate": 3.34425407176336e-06, + "loss": 0.164, + "step": 107295 + }, + { + "epoch": 5.01, + "learning_rate": 3.3434702867085734e-06, + "loss": 0.1214, + "step": 107300 + }, + { + "epoch": 5.01, + "learning_rate": 3.342686501653787e-06, + "loss": 0.2073, + "step": 107305 + }, + { + "epoch": 5.01, + "learning_rate": 3.3419027165990004e-06, + "loss": 0.1256, + "step": 107310 + }, + { + "epoch": 5.01, + "learning_rate": 3.341118931544214e-06, + "loss": 0.0329, + "step": 107315 + }, + { + "epoch": 5.01, + "learning_rate": 3.340335146489427e-06, + "loss": 0.0709, + "step": 107320 + }, + { + "epoch": 5.01, + "learning_rate": 3.3395513614346404e-06, + "loss": 0.0664, + "step": 107325 + }, + { + "epoch": 5.01, + "learning_rate": 3.338767576379854e-06, + "loss": 0.042, + "step": 107330 + }, + { + "epoch": 5.01, + "learning_rate": 3.3379837913250673e-06, + "loss": 0.0727, + "step": 107335 + }, + { + "epoch": 5.01, + "learning_rate": 3.337200006270281e-06, + "loss": 0.0737, + "step": 107340 + }, + { + "epoch": 5.01, + "learning_rate": 3.3364162212154943e-06, + "loss": 0.1233, + "step": 107345 + }, + { + "epoch": 5.01, + "learning_rate": 3.3356324361607074e-06, + "loss": 0.1083, + "step": 107350 + }, + { + "epoch": 5.01, + "learning_rate": 3.334848651105921e-06, + "loss": 0.3747, + "step": 107355 + }, + { + "epoch": 5.01, + "learning_rate": 3.3340648660511343e-06, + "loss": 0.1091, + "step": 107360 + }, + { + "epoch": 5.01, + "learning_rate": 3.333281080996348e-06, + "loss": 0.0053, + "step": 107365 + }, + { + "epoch": 5.01, + "learning_rate": 3.3324972959415613e-06, + "loss": 0.004, + "step": 107370 + }, + { + "epoch": 5.01, + "learning_rate": 3.3317135108867748e-06, + "loss": 0.0219, + "step": 107375 + }, + { + "epoch": 5.01, + "learning_rate": 3.3309297258319883e-06, + "loss": 0.065, + "step": 107380 + }, + { + "epoch": 5.01, + "learning_rate": 3.3301459407772013e-06, + "loss": 0.0182, + "step": 107385 + }, + { + "epoch": 5.01, + "learning_rate": 3.329362155722415e-06, + "loss": 0.0865, + "step": 107390 + }, + { + "epoch": 5.01, + "learning_rate": 3.3285783706676283e-06, + "loss": 0.0667, + "step": 107395 + }, + { + "epoch": 5.01, + "learning_rate": 3.3277945856128418e-06, + "loss": 0.1567, + "step": 107400 + }, + { + "epoch": 5.01, + "learning_rate": 3.3270108005580552e-06, + "loss": 0.3945, + "step": 107405 + }, + { + "epoch": 5.01, + "learning_rate": 3.3262270155032687e-06, + "loss": 0.0794, + "step": 107410 + }, + { + "epoch": 5.01, + "learning_rate": 3.3254432304484818e-06, + "loss": 0.0433, + "step": 107415 + }, + { + "epoch": 5.01, + "learning_rate": 3.3246594453936953e-06, + "loss": 0.0414, + "step": 107420 + }, + { + "epoch": 5.01, + "learning_rate": 3.3238756603389087e-06, + "loss": 0.0213, + "step": 107425 + }, + { + "epoch": 5.01, + "learning_rate": 3.3230918752841222e-06, + "loss": 0.0535, + "step": 107430 + }, + { + "epoch": 5.01, + "learning_rate": 3.3223080902293357e-06, + "loss": 0.1511, + "step": 107435 + }, + { + "epoch": 5.01, + "learning_rate": 3.321524305174549e-06, + "loss": 0.0589, + "step": 107440 + }, + { + "epoch": 5.01, + "learning_rate": 3.3207405201197627e-06, + "loss": 0.1411, + "step": 107445 + }, + { + "epoch": 5.01, + "learning_rate": 3.3199567350649757e-06, + "loss": 0.1361, + "step": 107450 + }, + { + "epoch": 5.01, + "learning_rate": 3.3191729500101892e-06, + "loss": 0.3542, + "step": 107455 + }, + { + "epoch": 5.01, + "learning_rate": 3.3183891649554027e-06, + "loss": 0.1307, + "step": 107460 + }, + { + "epoch": 5.01, + "learning_rate": 3.317605379900616e-06, + "loss": 0.0451, + "step": 107465 + }, + { + "epoch": 5.01, + "learning_rate": 3.31682159484583e-06, + "loss": 0.0223, + "step": 107470 + }, + { + "epoch": 5.01, + "learning_rate": 3.3160378097910436e-06, + "loss": 0.0564, + "step": 107475 + }, + { + "epoch": 5.02, + "learning_rate": 3.315254024736256e-06, + "loss": 0.0352, + "step": 107480 + }, + { + "epoch": 5.02, + "learning_rate": 3.3144702396814697e-06, + "loss": 0.0854, + "step": 107485 + }, + { + "epoch": 5.02, + "learning_rate": 3.313686454626683e-06, + "loss": 0.0946, + "step": 107490 + }, + { + "epoch": 5.02, + "learning_rate": 3.312902669571897e-06, + "loss": 0.1038, + "step": 107495 + }, + { + "epoch": 5.02, + "learning_rate": 3.3121188845171106e-06, + "loss": 0.2178, + "step": 107500 + }, + { + "epoch": 5.02, + "learning_rate": 3.311335099462324e-06, + "loss": 0.29, + "step": 107505 + }, + { + "epoch": 5.02, + "learning_rate": 3.3105513144075375e-06, + "loss": 0.0829, + "step": 107510 + }, + { + "epoch": 5.02, + "learning_rate": 3.30976752935275e-06, + "loss": 0.0584, + "step": 107515 + }, + { + "epoch": 5.02, + "learning_rate": 3.308983744297964e-06, + "loss": 0.076, + "step": 107520 + }, + { + "epoch": 5.02, + "learning_rate": 3.3081999592431775e-06, + "loss": 0.0636, + "step": 107525 + }, + { + "epoch": 5.02, + "learning_rate": 3.307416174188391e-06, + "loss": 0.0525, + "step": 107530 + }, + { + "epoch": 5.02, + "learning_rate": 3.3066323891336045e-06, + "loss": 0.051, + "step": 107535 + }, + { + "epoch": 5.02, + "learning_rate": 3.305848604078818e-06, + "loss": 0.1244, + "step": 107540 + }, + { + "epoch": 5.02, + "learning_rate": 3.305064819024031e-06, + "loss": 0.0477, + "step": 107545 + }, + { + "epoch": 5.02, + "learning_rate": 3.3042810339692445e-06, + "loss": 0.1156, + "step": 107550 + }, + { + "epoch": 5.02, + "learning_rate": 3.303497248914458e-06, + "loss": 0.3324, + "step": 107555 + }, + { + "epoch": 5.02, + "learning_rate": 3.3027134638596715e-06, + "loss": 0.1291, + "step": 107560 + }, + { + "epoch": 5.02, + "learning_rate": 3.301929678804885e-06, + "loss": 0.0271, + "step": 107565 + }, + { + "epoch": 5.02, + "learning_rate": 3.3011458937500985e-06, + "loss": 0.0287, + "step": 107570 + }, + { + "epoch": 5.02, + "learning_rate": 3.300362108695312e-06, + "loss": 0.029, + "step": 107575 + }, + { + "epoch": 5.02, + "learning_rate": 3.299578323640525e-06, + "loss": 0.0577, + "step": 107580 + }, + { + "epoch": 5.02, + "learning_rate": 3.2987945385857385e-06, + "loss": 0.0766, + "step": 107585 + }, + { + "epoch": 5.02, + "learning_rate": 3.298010753530952e-06, + "loss": 0.0514, + "step": 107590 + }, + { + "epoch": 5.02, + "learning_rate": 3.2972269684761655e-06, + "loss": 0.073, + "step": 107595 + }, + { + "epoch": 5.02, + "learning_rate": 3.296443183421379e-06, + "loss": 0.1746, + "step": 107600 + }, + { + "epoch": 5.02, + "learning_rate": 3.2956593983665924e-06, + "loss": 0.1658, + "step": 107605 + }, + { + "epoch": 5.02, + "learning_rate": 3.2948756133118055e-06, + "loss": 0.1045, + "step": 107610 + }, + { + "epoch": 5.02, + "learning_rate": 3.294091828257019e-06, + "loss": 0.0505, + "step": 107615 + }, + { + "epoch": 5.02, + "learning_rate": 3.2933080432022324e-06, + "loss": 0.0382, + "step": 107620 + }, + { + "epoch": 5.02, + "learning_rate": 3.292524258147446e-06, + "loss": 0.0314, + "step": 107625 + }, + { + "epoch": 5.02, + "learning_rate": 3.2917404730926594e-06, + "loss": 0.0587, + "step": 107630 + }, + { + "epoch": 5.02, + "learning_rate": 3.290956688037873e-06, + "loss": 0.0886, + "step": 107635 + }, + { + "epoch": 5.02, + "learning_rate": 3.2901729029830864e-06, + "loss": 0.0612, + "step": 107640 + }, + { + "epoch": 5.02, + "learning_rate": 3.2893891179282994e-06, + "loss": 0.0624, + "step": 107645 + }, + { + "epoch": 5.02, + "learning_rate": 3.288605332873513e-06, + "loss": 0.11, + "step": 107650 + }, + { + "epoch": 5.02, + "learning_rate": 3.2878215478187264e-06, + "loss": 0.2971, + "step": 107655 + }, + { + "epoch": 5.02, + "learning_rate": 3.28703776276394e-06, + "loss": 0.1275, + "step": 107660 + }, + { + "epoch": 5.02, + "learning_rate": 3.2862539777091534e-06, + "loss": 0.0327, + "step": 107665 + }, + { + "epoch": 5.02, + "learning_rate": 3.285470192654367e-06, + "loss": 0.032, + "step": 107670 + }, + { + "epoch": 5.02, + "learning_rate": 3.28468640759958e-06, + "loss": 0.0244, + "step": 107675 + }, + { + "epoch": 5.02, + "learning_rate": 3.2839026225447934e-06, + "loss": 0.0464, + "step": 107680 + }, + { + "epoch": 5.02, + "learning_rate": 3.283118837490007e-06, + "loss": 0.037, + "step": 107685 + }, + { + "epoch": 5.02, + "learning_rate": 3.2823350524352203e-06, + "loss": 0.1062, + "step": 107690 + }, + { + "epoch": 5.03, + "learning_rate": 3.281551267380434e-06, + "loss": 0.2397, + "step": 107695 + }, + { + "epoch": 5.03, + "learning_rate": 3.2807674823256473e-06, + "loss": 0.1325, + "step": 107700 + }, + { + "epoch": 5.03, + "learning_rate": 3.279983697270861e-06, + "loss": 0.3151, + "step": 107705 + }, + { + "epoch": 5.03, + "learning_rate": 3.279199912216074e-06, + "loss": 0.1135, + "step": 107710 + }, + { + "epoch": 5.03, + "learning_rate": 3.2784161271612873e-06, + "loss": 0.0135, + "step": 107715 + }, + { + "epoch": 5.03, + "learning_rate": 3.277632342106501e-06, + "loss": 0.0186, + "step": 107720 + }, + { + "epoch": 5.03, + "learning_rate": 3.2768485570517143e-06, + "loss": 0.0561, + "step": 107725 + }, + { + "epoch": 5.03, + "learning_rate": 3.2760647719969278e-06, + "loss": 0.063, + "step": 107730 + }, + { + "epoch": 5.03, + "learning_rate": 3.2752809869421413e-06, + "loss": 0.0903, + "step": 107735 + }, + { + "epoch": 5.03, + "learning_rate": 3.2744972018873543e-06, + "loss": 0.1117, + "step": 107740 + }, + { + "epoch": 5.03, + "learning_rate": 3.273713416832568e-06, + "loss": 0.1376, + "step": 107745 + }, + { + "epoch": 5.03, + "learning_rate": 3.2729296317777813e-06, + "loss": 0.1156, + "step": 107750 + }, + { + "epoch": 5.03, + "learning_rate": 3.2721458467229948e-06, + "loss": 0.2507, + "step": 107755 + }, + { + "epoch": 5.03, + "learning_rate": 3.2713620616682087e-06, + "loss": 0.1059, + "step": 107760 + }, + { + "epoch": 5.03, + "learning_rate": 3.270578276613422e-06, + "loss": 0.0255, + "step": 107765 + }, + { + "epoch": 5.03, + "learning_rate": 3.2697944915586356e-06, + "loss": 0.0273, + "step": 107770 + }, + { + "epoch": 5.03, + "learning_rate": 3.2690107065038483e-06, + "loss": 0.0612, + "step": 107775 + }, + { + "epoch": 5.03, + "learning_rate": 3.2682269214490618e-06, + "loss": 0.0479, + "step": 107780 + }, + { + "epoch": 5.03, + "learning_rate": 3.2674431363942757e-06, + "loss": 0.0702, + "step": 107785 + }, + { + "epoch": 5.03, + "learning_rate": 3.266659351339489e-06, + "loss": 0.0244, + "step": 107790 + }, + { + "epoch": 5.03, + "learning_rate": 3.2658755662847026e-06, + "loss": 0.0575, + "step": 107795 + }, + { + "epoch": 5.03, + "learning_rate": 3.265091781229916e-06, + "loss": 0.2072, + "step": 107800 + }, + { + "epoch": 5.03, + "learning_rate": 3.2643079961751287e-06, + "loss": 0.2973, + "step": 107805 + }, + { + "epoch": 5.03, + "learning_rate": 3.2635242111203426e-06, + "loss": 0.1217, + "step": 107810 + }, + { + "epoch": 5.03, + "learning_rate": 3.262740426065556e-06, + "loss": 0.0461, + "step": 107815 + }, + { + "epoch": 5.03, + "learning_rate": 3.2619566410107696e-06, + "loss": 0.0387, + "step": 107820 + }, + { + "epoch": 5.03, + "learning_rate": 3.261172855955983e-06, + "loss": 0.0489, + "step": 107825 + }, + { + "epoch": 5.03, + "learning_rate": 3.2603890709011966e-06, + "loss": 0.0881, + "step": 107830 + }, + { + "epoch": 5.03, + "learning_rate": 3.25960528584641e-06, + "loss": 0.0156, + "step": 107835 + }, + { + "epoch": 5.03, + "learning_rate": 3.258821500791623e-06, + "loss": 0.0707, + "step": 107840 + }, + { + "epoch": 5.03, + "learning_rate": 3.2580377157368366e-06, + "loss": 0.1257, + "step": 107845 + }, + { + "epoch": 5.03, + "learning_rate": 3.25725393068205e-06, + "loss": 0.1345, + "step": 107850 + }, + { + "epoch": 5.03, + "learning_rate": 3.2564701456272636e-06, + "loss": 0.3686, + "step": 107855 + }, + { + "epoch": 5.03, + "learning_rate": 3.255686360572477e-06, + "loss": 0.1119, + "step": 107860 + }, + { + "epoch": 5.03, + "learning_rate": 3.2549025755176905e-06, + "loss": 0.0195, + "step": 107865 + }, + { + "epoch": 5.03, + "learning_rate": 3.2541187904629036e-06, + "loss": 0.0149, + "step": 107870 + }, + { + "epoch": 5.03, + "learning_rate": 3.253335005408117e-06, + "loss": 0.0408, + "step": 107875 + }, + { + "epoch": 5.03, + "learning_rate": 3.2525512203533306e-06, + "loss": 0.0847, + "step": 107880 + }, + { + "epoch": 5.03, + "learning_rate": 3.251767435298544e-06, + "loss": 0.0889, + "step": 107885 + }, + { + "epoch": 5.03, + "learning_rate": 3.2509836502437575e-06, + "loss": 0.1235, + "step": 107890 + }, + { + "epoch": 5.03, + "learning_rate": 3.250199865188971e-06, + "loss": 0.0459, + "step": 107895 + }, + { + "epoch": 5.03, + "learning_rate": 3.2494160801341845e-06, + "loss": 0.2144, + "step": 107900 + }, + { + "epoch": 5.03, + "learning_rate": 3.2486322950793975e-06, + "loss": 0.5062, + "step": 107905 + }, + { + "epoch": 5.04, + "learning_rate": 3.247848510024611e-06, + "loss": 0.112, + "step": 107910 + }, + { + "epoch": 5.04, + "learning_rate": 3.2470647249698245e-06, + "loss": 0.0134, + "step": 107915 + }, + { + "epoch": 5.04, + "learning_rate": 3.246280939915038e-06, + "loss": 0.0631, + "step": 107920 + }, + { + "epoch": 5.04, + "learning_rate": 3.2454971548602515e-06, + "loss": 0.0568, + "step": 107925 + }, + { + "epoch": 5.04, + "learning_rate": 3.244713369805465e-06, + "loss": 0.0085, + "step": 107930 + }, + { + "epoch": 5.04, + "learning_rate": 3.243929584750678e-06, + "loss": 0.1647, + "step": 107935 + }, + { + "epoch": 5.04, + "learning_rate": 3.2431457996958915e-06, + "loss": 0.067, + "step": 107940 + }, + { + "epoch": 5.04, + "learning_rate": 3.242362014641105e-06, + "loss": 0.0793, + "step": 107945 + }, + { + "epoch": 5.04, + "learning_rate": 3.2415782295863185e-06, + "loss": 0.2082, + "step": 107950 + }, + { + "epoch": 5.04, + "learning_rate": 3.240794444531532e-06, + "loss": 0.3051, + "step": 107955 + }, + { + "epoch": 5.04, + "learning_rate": 3.2400106594767454e-06, + "loss": 0.1199, + "step": 107960 + }, + { + "epoch": 5.04, + "learning_rate": 3.239226874421959e-06, + "loss": 0.0123, + "step": 107965 + }, + { + "epoch": 5.04, + "learning_rate": 3.238443089367172e-06, + "loss": 0.0344, + "step": 107970 + }, + { + "epoch": 5.04, + "learning_rate": 3.2376593043123854e-06, + "loss": 0.0364, + "step": 107975 + }, + { + "epoch": 5.04, + "learning_rate": 3.236875519257599e-06, + "loss": 0.072, + "step": 107980 + }, + { + "epoch": 5.04, + "learning_rate": 3.2360917342028124e-06, + "loss": 0.0398, + "step": 107985 + }, + { + "epoch": 5.04, + "learning_rate": 3.235307949148026e-06, + "loss": 0.0835, + "step": 107990 + }, + { + "epoch": 5.04, + "learning_rate": 3.2345241640932394e-06, + "loss": 0.1001, + "step": 107995 + }, + { + "epoch": 5.04, + "learning_rate": 3.2337403790384524e-06, + "loss": 0.0513, + "step": 108000 + }, + { + "epoch": 5.04, + "learning_rate": 3.232956593983666e-06, + "loss": 0.3597, + "step": 108005 + }, + { + "epoch": 5.04, + "learning_rate": 3.2321728089288794e-06, + "loss": 0.1407, + "step": 108010 + }, + { + "epoch": 5.04, + "learning_rate": 3.231389023874093e-06, + "loss": 0.015, + "step": 108015 + }, + { + "epoch": 5.04, + "learning_rate": 3.2306052388193064e-06, + "loss": 0.0237, + "step": 108020 + }, + { + "epoch": 5.04, + "learning_rate": 3.22982145376452e-06, + "loss": 0.0726, + "step": 108025 + }, + { + "epoch": 5.04, + "learning_rate": 3.2290376687097337e-06, + "loss": 0.1018, + "step": 108030 + }, + { + "epoch": 5.04, + "learning_rate": 3.2282538836549464e-06, + "loss": 0.0648, + "step": 108035 + }, + { + "epoch": 5.04, + "learning_rate": 3.22747009860016e-06, + "loss": 0.0267, + "step": 108040 + }, + { + "epoch": 5.04, + "learning_rate": 3.2266863135453733e-06, + "loss": 0.0763, + "step": 108045 + }, + { + "epoch": 5.04, + "learning_rate": 3.2259025284905873e-06, + "loss": 0.1733, + "step": 108050 + }, + { + "epoch": 5.04, + "learning_rate": 3.2251187434358007e-06, + "loss": 0.3107, + "step": 108055 + }, + { + "epoch": 5.04, + "learning_rate": 3.2243349583810142e-06, + "loss": 0.1072, + "step": 108060 + }, + { + "epoch": 5.04, + "learning_rate": 3.223551173326227e-06, + "loss": 0.0162, + "step": 108065 + }, + { + "epoch": 5.04, + "learning_rate": 3.2227673882714403e-06, + "loss": 0.0682, + "step": 108070 + }, + { + "epoch": 5.04, + "learning_rate": 3.2219836032166542e-06, + "loss": 0.027, + "step": 108075 + }, + { + "epoch": 5.04, + "learning_rate": 3.2211998181618677e-06, + "loss": 0.0383, + "step": 108080 + }, + { + "epoch": 5.04, + "learning_rate": 3.220416033107081e-06, + "loss": 0.0755, + "step": 108085 + }, + { + "epoch": 5.04, + "learning_rate": 3.2196322480522947e-06, + "loss": 0.0593, + "step": 108090 + }, + { + "epoch": 5.04, + "learning_rate": 3.218848462997508e-06, + "loss": 0.0942, + "step": 108095 + }, + { + "epoch": 5.04, + "learning_rate": 3.2180646779427212e-06, + "loss": 0.1365, + "step": 108100 + }, + { + "epoch": 5.04, + "learning_rate": 3.2172808928879347e-06, + "loss": 0.1773, + "step": 108105 + }, + { + "epoch": 5.04, + "learning_rate": 3.216497107833148e-06, + "loss": 0.0859, + "step": 108110 + }, + { + "epoch": 5.04, + "learning_rate": 3.2157133227783617e-06, + "loss": 0.0657, + "step": 108115 + }, + { + "epoch": 5.05, + "learning_rate": 3.214929537723575e-06, + "loss": 0.0102, + "step": 108120 + }, + { + "epoch": 5.05, + "learning_rate": 3.2141457526687886e-06, + "loss": 0.0469, + "step": 108125 + }, + { + "epoch": 5.05, + "learning_rate": 3.2133619676140017e-06, + "loss": 0.0511, + "step": 108130 + }, + { + "epoch": 5.05, + "learning_rate": 3.212578182559215e-06, + "loss": 0.103, + "step": 108135 + }, + { + "epoch": 5.05, + "learning_rate": 3.2117943975044287e-06, + "loss": 0.0373, + "step": 108140 + }, + { + "epoch": 5.05, + "learning_rate": 3.211010612449642e-06, + "loss": 0.0914, + "step": 108145 + }, + { + "epoch": 5.05, + "learning_rate": 3.2102268273948556e-06, + "loss": 0.0742, + "step": 108150 + }, + { + "epoch": 5.05, + "learning_rate": 3.209443042340069e-06, + "loss": 0.5008, + "step": 108155 + }, + { + "epoch": 5.05, + "learning_rate": 3.2086592572852826e-06, + "loss": 0.112, + "step": 108160 + }, + { + "epoch": 5.05, + "learning_rate": 3.2078754722304957e-06, + "loss": 0.0364, + "step": 108165 + }, + { + "epoch": 5.05, + "learning_rate": 3.207091687175709e-06, + "loss": 0.0154, + "step": 108170 + }, + { + "epoch": 5.05, + "learning_rate": 3.2063079021209226e-06, + "loss": 0.033, + "step": 108175 + }, + { + "epoch": 5.05, + "learning_rate": 3.205524117066136e-06, + "loss": 0.0478, + "step": 108180 + }, + { + "epoch": 5.05, + "learning_rate": 3.2047403320113496e-06, + "loss": 0.0508, + "step": 108185 + }, + { + "epoch": 5.05, + "learning_rate": 3.203956546956563e-06, + "loss": 0.0542, + "step": 108190 + }, + { + "epoch": 5.05, + "learning_rate": 3.203172761901776e-06, + "loss": 0.1701, + "step": 108195 + }, + { + "epoch": 5.05, + "learning_rate": 3.2023889768469896e-06, + "loss": 0.0875, + "step": 108200 + }, + { + "epoch": 5.05, + "learning_rate": 3.201605191792203e-06, + "loss": 0.1873, + "step": 108205 + }, + { + "epoch": 5.05, + "learning_rate": 3.2008214067374166e-06, + "loss": 0.1173, + "step": 108210 + }, + { + "epoch": 5.05, + "learning_rate": 3.20003762168263e-06, + "loss": 0.0514, + "step": 108215 + }, + { + "epoch": 5.05, + "learning_rate": 3.1992538366278435e-06, + "loss": 0.0613, + "step": 108220 + }, + { + "epoch": 5.05, + "learning_rate": 3.198470051573057e-06, + "loss": 0.0274, + "step": 108225 + }, + { + "epoch": 5.05, + "learning_rate": 3.19768626651827e-06, + "loss": 0.0689, + "step": 108230 + }, + { + "epoch": 5.05, + "learning_rate": 3.1969024814634836e-06, + "loss": 0.0306, + "step": 108235 + }, + { + "epoch": 5.05, + "learning_rate": 3.196118696408697e-06, + "loss": 0.1183, + "step": 108240 + }, + { + "epoch": 5.05, + "learning_rate": 3.1953349113539105e-06, + "loss": 0.0792, + "step": 108245 + }, + { + "epoch": 5.05, + "learning_rate": 3.194551126299124e-06, + "loss": 0.2155, + "step": 108250 + }, + { + "epoch": 5.05, + "learning_rate": 3.1937673412443375e-06, + "loss": 0.2556, + "step": 108255 + }, + { + "epoch": 5.05, + "learning_rate": 3.1929835561895505e-06, + "loss": 0.1195, + "step": 108260 + }, + { + "epoch": 5.05, + "learning_rate": 3.192199771134764e-06, + "loss": 0.0133, + "step": 108265 + }, + { + "epoch": 5.05, + "learning_rate": 3.1914159860799775e-06, + "loss": 0.0214, + "step": 108270 + }, + { + "epoch": 5.05, + "learning_rate": 3.190632201025191e-06, + "loss": 0.0183, + "step": 108275 + }, + { + "epoch": 5.05, + "learning_rate": 3.1898484159704045e-06, + "loss": 0.0193, + "step": 108280 + }, + { + "epoch": 5.05, + "learning_rate": 3.189064630915618e-06, + "loss": 0.0453, + "step": 108285 + }, + { + "epoch": 5.05, + "learning_rate": 3.1882808458608314e-06, + "loss": 0.0818, + "step": 108290 + }, + { + "epoch": 5.05, + "learning_rate": 3.1874970608060445e-06, + "loss": 0.0368, + "step": 108295 + }, + { + "epoch": 5.05, + "learning_rate": 3.186713275751258e-06, + "loss": 0.1105, + "step": 108300 + }, + { + "epoch": 5.05, + "learning_rate": 3.1859294906964715e-06, + "loss": 0.262, + "step": 108305 + }, + { + "epoch": 5.05, + "learning_rate": 3.185145705641685e-06, + "loss": 0.1415, + "step": 108310 + }, + { + "epoch": 5.05, + "learning_rate": 3.1843619205868984e-06, + "loss": 0.0008, + "step": 108315 + }, + { + "epoch": 5.05, + "learning_rate": 3.1835781355321123e-06, + "loss": 0.0241, + "step": 108320 + }, + { + "epoch": 5.05, + "learning_rate": 3.182794350477325e-06, + "loss": 0.0698, + "step": 108325 + }, + { + "epoch": 5.05, + "learning_rate": 3.1820105654225384e-06, + "loss": 0.0514, + "step": 108330 + }, + { + "epoch": 5.06, + "learning_rate": 3.181226780367752e-06, + "loss": 0.0421, + "step": 108335 + }, + { + "epoch": 5.06, + "learning_rate": 3.180442995312966e-06, + "loss": 0.0826, + "step": 108340 + }, + { + "epoch": 5.06, + "learning_rate": 3.1796592102581793e-06, + "loss": 0.0493, + "step": 108345 + }, + { + "epoch": 5.06, + "learning_rate": 3.178875425203393e-06, + "loss": 0.1459, + "step": 108350 + }, + { + "epoch": 5.06, + "learning_rate": 3.1780916401486063e-06, + "loss": 0.2562, + "step": 108355 + }, + { + "epoch": 5.06, + "learning_rate": 3.177307855093819e-06, + "loss": 0.1059, + "step": 108360 + }, + { + "epoch": 5.06, + "learning_rate": 3.176524070039033e-06, + "loss": 0.0204, + "step": 108365 + }, + { + "epoch": 5.06, + "learning_rate": 3.1757402849842463e-06, + "loss": 0.0238, + "step": 108370 + }, + { + "epoch": 5.06, + "learning_rate": 3.17495649992946e-06, + "loss": 0.0399, + "step": 108375 + }, + { + "epoch": 5.06, + "learning_rate": 3.1741727148746733e-06, + "loss": 0.0353, + "step": 108380 + }, + { + "epoch": 5.06, + "learning_rate": 3.1733889298198868e-06, + "loss": 0.0493, + "step": 108385 + }, + { + "epoch": 5.06, + "learning_rate": 3.1726051447651e-06, + "loss": 0.1781, + "step": 108390 + }, + { + "epoch": 5.06, + "learning_rate": 3.1718213597103133e-06, + "loss": 0.1154, + "step": 108395 + }, + { + "epoch": 5.06, + "learning_rate": 3.1710375746555268e-06, + "loss": 0.1813, + "step": 108400 + }, + { + "epoch": 5.06, + "learning_rate": 3.1702537896007403e-06, + "loss": 0.2548, + "step": 108405 + }, + { + "epoch": 5.06, + "learning_rate": 3.1694700045459537e-06, + "loss": 0.0545, + "step": 108410 + }, + { + "epoch": 5.06, + "learning_rate": 3.1686862194911672e-06, + "loss": 0.0042, + "step": 108415 + }, + { + "epoch": 5.06, + "learning_rate": 3.1679024344363807e-06, + "loss": 0.0295, + "step": 108420 + }, + { + "epoch": 5.06, + "learning_rate": 3.1671186493815938e-06, + "loss": 0.0693, + "step": 108425 + }, + { + "epoch": 5.06, + "learning_rate": 3.1663348643268072e-06, + "loss": 0.0483, + "step": 108430 + }, + { + "epoch": 5.06, + "learning_rate": 3.1655510792720207e-06, + "loss": 0.0562, + "step": 108435 + }, + { + "epoch": 5.06, + "learning_rate": 3.1647672942172342e-06, + "loss": 0.0283, + "step": 108440 + }, + { + "epoch": 5.06, + "learning_rate": 3.1639835091624477e-06, + "loss": 0.0865, + "step": 108445 + }, + { + "epoch": 5.06, + "learning_rate": 3.163199724107661e-06, + "loss": 0.117, + "step": 108450 + }, + { + "epoch": 5.06, + "learning_rate": 3.1624159390528742e-06, + "loss": 0.2528, + "step": 108455 + }, + { + "epoch": 5.06, + "learning_rate": 3.1616321539980877e-06, + "loss": 0.0903, + "step": 108460 + }, + { + "epoch": 5.06, + "learning_rate": 3.160848368943301e-06, + "loss": 0.0147, + "step": 108465 + }, + { + "epoch": 5.06, + "learning_rate": 3.1600645838885147e-06, + "loss": 0.0784, + "step": 108470 + }, + { + "epoch": 5.06, + "learning_rate": 3.159280798833728e-06, + "loss": 0.0281, + "step": 108475 + }, + { + "epoch": 5.06, + "learning_rate": 3.1584970137789416e-06, + "loss": 0.0347, + "step": 108480 + }, + { + "epoch": 5.06, + "learning_rate": 3.157713228724155e-06, + "loss": 0.0399, + "step": 108485 + }, + { + "epoch": 5.06, + "learning_rate": 3.156929443669368e-06, + "loss": 0.1013, + "step": 108490 + }, + { + "epoch": 5.06, + "learning_rate": 3.1561456586145817e-06, + "loss": 0.0987, + "step": 108495 + }, + { + "epoch": 5.06, + "learning_rate": 3.155361873559795e-06, + "loss": 0.1921, + "step": 108500 + }, + { + "epoch": 5.06, + "learning_rate": 3.1545780885050086e-06, + "loss": 0.1726, + "step": 108505 + }, + { + "epoch": 5.06, + "learning_rate": 3.153794303450222e-06, + "loss": 0.0748, + "step": 108510 + }, + { + "epoch": 5.06, + "learning_rate": 3.1530105183954356e-06, + "loss": 0.0237, + "step": 108515 + }, + { + "epoch": 5.06, + "learning_rate": 3.1522267333406487e-06, + "loss": 0.0345, + "step": 108520 + }, + { + "epoch": 5.06, + "learning_rate": 3.151442948285862e-06, + "loss": 0.0248, + "step": 108525 + }, + { + "epoch": 5.06, + "learning_rate": 3.1506591632310756e-06, + "loss": 0.051, + "step": 108530 + }, + { + "epoch": 5.06, + "learning_rate": 3.149875378176289e-06, + "loss": 0.0411, + "step": 108535 + }, + { + "epoch": 5.06, + "learning_rate": 3.1490915931215026e-06, + "loss": 0.0782, + "step": 108540 + }, + { + "epoch": 5.06, + "learning_rate": 3.148307808066716e-06, + "loss": 0.1336, + "step": 108545 + }, + { + "epoch": 5.07, + "learning_rate": 3.1475240230119296e-06, + "loss": 0.1177, + "step": 108550 + }, + { + "epoch": 5.07, + "learning_rate": 3.1467402379571426e-06, + "loss": 0.3517, + "step": 108555 + }, + { + "epoch": 5.07, + "learning_rate": 3.145956452902356e-06, + "loss": 0.1216, + "step": 108560 + }, + { + "epoch": 5.07, + "learning_rate": 3.1451726678475696e-06, + "loss": 0.0147, + "step": 108565 + }, + { + "epoch": 5.07, + "learning_rate": 3.144388882792783e-06, + "loss": 0.0304, + "step": 108570 + }, + { + "epoch": 5.07, + "learning_rate": 3.1436050977379965e-06, + "loss": 0.031, + "step": 108575 + }, + { + "epoch": 5.07, + "learning_rate": 3.14282131268321e-06, + "loss": 0.0745, + "step": 108580 + }, + { + "epoch": 5.07, + "learning_rate": 3.142037527628423e-06, + "loss": 0.0765, + "step": 108585 + }, + { + "epoch": 5.07, + "learning_rate": 3.1412537425736366e-06, + "loss": 0.0899, + "step": 108590 + }, + { + "epoch": 5.07, + "learning_rate": 3.14046995751885e-06, + "loss": 0.0818, + "step": 108595 + }, + { + "epoch": 5.07, + "learning_rate": 3.1396861724640635e-06, + "loss": 0.139, + "step": 108600 + }, + { + "epoch": 5.07, + "learning_rate": 3.138902387409277e-06, + "loss": 0.32, + "step": 108605 + }, + { + "epoch": 5.07, + "learning_rate": 3.138118602354491e-06, + "loss": 0.0856, + "step": 108610 + }, + { + "epoch": 5.07, + "learning_rate": 3.1373348172997044e-06, + "loss": 0.0122, + "step": 108615 + }, + { + "epoch": 5.07, + "learning_rate": 3.136551032244917e-06, + "loss": 0.0178, + "step": 108620 + }, + { + "epoch": 5.07, + "learning_rate": 3.1357672471901305e-06, + "loss": 0.0298, + "step": 108625 + }, + { + "epoch": 5.07, + "learning_rate": 3.1349834621353444e-06, + "loss": 0.0345, + "step": 108630 + }, + { + "epoch": 5.07, + "learning_rate": 3.134199677080558e-06, + "loss": 0.0533, + "step": 108635 + }, + { + "epoch": 5.07, + "learning_rate": 3.1334158920257714e-06, + "loss": 0.0551, + "step": 108640 + }, + { + "epoch": 5.07, + "learning_rate": 3.132632106970985e-06, + "loss": 0.1127, + "step": 108645 + }, + { + "epoch": 5.07, + "learning_rate": 3.1318483219161975e-06, + "loss": 0.1152, + "step": 108650 + }, + { + "epoch": 5.07, + "learning_rate": 3.1310645368614114e-06, + "loss": 0.3671, + "step": 108655 + }, + { + "epoch": 5.07, + "learning_rate": 3.130280751806625e-06, + "loss": 0.0803, + "step": 108660 + }, + { + "epoch": 5.07, + "learning_rate": 3.1294969667518384e-06, + "loss": 0.0161, + "step": 108665 + }, + { + "epoch": 5.07, + "learning_rate": 3.128713181697052e-06, + "loss": 0.0339, + "step": 108670 + }, + { + "epoch": 5.07, + "learning_rate": 3.1279293966422653e-06, + "loss": 0.0282, + "step": 108675 + }, + { + "epoch": 5.07, + "learning_rate": 3.127145611587479e-06, + "loss": 0.1044, + "step": 108680 + }, + { + "epoch": 5.07, + "learning_rate": 3.126361826532692e-06, + "loss": 0.0387, + "step": 108685 + }, + { + "epoch": 5.07, + "learning_rate": 3.1255780414779054e-06, + "loss": 0.1301, + "step": 108690 + }, + { + "epoch": 5.07, + "learning_rate": 3.124794256423119e-06, + "loss": 0.0328, + "step": 108695 + }, + { + "epoch": 5.07, + "learning_rate": 3.1240104713683323e-06, + "loss": 0.094, + "step": 108700 + }, + { + "epoch": 5.07, + "learning_rate": 3.123226686313546e-06, + "loss": 0.3349, + "step": 108705 + }, + { + "epoch": 5.07, + "learning_rate": 3.1224429012587593e-06, + "loss": 0.1208, + "step": 108710 + }, + { + "epoch": 5.07, + "learning_rate": 3.1216591162039723e-06, + "loss": 0.0371, + "step": 108715 + }, + { + "epoch": 5.07, + "learning_rate": 3.120875331149186e-06, + "loss": 0.0333, + "step": 108720 + }, + { + "epoch": 5.07, + "learning_rate": 3.1200915460943993e-06, + "loss": 0.0303, + "step": 108725 + }, + { + "epoch": 5.07, + "learning_rate": 3.119307761039613e-06, + "loss": 0.0473, + "step": 108730 + }, + { + "epoch": 5.07, + "learning_rate": 3.1185239759848263e-06, + "loss": 0.0617, + "step": 108735 + }, + { + "epoch": 5.07, + "learning_rate": 3.1177401909300398e-06, + "loss": 0.0783, + "step": 108740 + }, + { + "epoch": 5.07, + "learning_rate": 3.1169564058752532e-06, + "loss": 0.1141, + "step": 108745 + }, + { + "epoch": 5.07, + "learning_rate": 3.1161726208204663e-06, + "loss": 0.1789, + "step": 108750 + }, + { + "epoch": 5.07, + "learning_rate": 3.1153888357656798e-06, + "loss": 0.2666, + "step": 108755 + }, + { + "epoch": 5.07, + "learning_rate": 3.1146050507108933e-06, + "loss": 0.092, + "step": 108760 + }, + { + "epoch": 5.08, + "learning_rate": 3.1138212656561067e-06, + "loss": 0.0099, + "step": 108765 + }, + { + "epoch": 5.08, + "learning_rate": 3.1130374806013202e-06, + "loss": 0.0448, + "step": 108770 + }, + { + "epoch": 5.08, + "learning_rate": 3.1122536955465337e-06, + "loss": 0.0757, + "step": 108775 + }, + { + "epoch": 5.08, + "learning_rate": 3.1114699104917468e-06, + "loss": 0.0831, + "step": 108780 + }, + { + "epoch": 5.08, + "learning_rate": 3.1106861254369603e-06, + "loss": 0.0407, + "step": 108785 + }, + { + "epoch": 5.08, + "learning_rate": 3.1099023403821737e-06, + "loss": 0.0778, + "step": 108790 + }, + { + "epoch": 5.08, + "learning_rate": 3.1091185553273872e-06, + "loss": 0.0238, + "step": 108795 + }, + { + "epoch": 5.08, + "learning_rate": 3.1083347702726007e-06, + "loss": 0.1482, + "step": 108800 + }, + { + "epoch": 5.08, + "learning_rate": 3.107550985217814e-06, + "loss": 0.2287, + "step": 108805 + }, + { + "epoch": 5.08, + "learning_rate": 3.1067672001630277e-06, + "loss": 0.0884, + "step": 108810 + }, + { + "epoch": 5.08, + "learning_rate": 3.1059834151082407e-06, + "loss": 0.0241, + "step": 108815 + }, + { + "epoch": 5.08, + "learning_rate": 3.105199630053454e-06, + "loss": 0.0143, + "step": 108820 + }, + { + "epoch": 5.08, + "learning_rate": 3.1044158449986677e-06, + "loss": 0.0285, + "step": 108825 + }, + { + "epoch": 5.08, + "learning_rate": 3.103632059943881e-06, + "loss": 0.0392, + "step": 108830 + }, + { + "epoch": 5.08, + "learning_rate": 3.1028482748890947e-06, + "loss": 0.042, + "step": 108835 + }, + { + "epoch": 5.08, + "learning_rate": 3.102064489834308e-06, + "loss": 0.0351, + "step": 108840 + }, + { + "epoch": 5.08, + "learning_rate": 3.101280704779521e-06, + "loss": 0.1297, + "step": 108845 + }, + { + "epoch": 5.08, + "learning_rate": 3.1004969197247347e-06, + "loss": 0.1745, + "step": 108850 + }, + { + "epoch": 5.08, + "learning_rate": 3.099713134669948e-06, + "loss": 0.1901, + "step": 108855 + }, + { + "epoch": 5.08, + "learning_rate": 3.0989293496151616e-06, + "loss": 0.1214, + "step": 108860 + }, + { + "epoch": 5.08, + "learning_rate": 3.098145564560375e-06, + "loss": 0.0247, + "step": 108865 + }, + { + "epoch": 5.08, + "learning_rate": 3.0973617795055886e-06, + "loss": 0.0526, + "step": 108870 + }, + { + "epoch": 5.08, + "learning_rate": 3.0965779944508025e-06, + "loss": 0.0206, + "step": 108875 + }, + { + "epoch": 5.08, + "learning_rate": 3.095794209396015e-06, + "loss": 0.0145, + "step": 108880 + }, + { + "epoch": 5.08, + "learning_rate": 3.0950104243412286e-06, + "loss": 0.0554, + "step": 108885 + }, + { + "epoch": 5.08, + "learning_rate": 3.094226639286442e-06, + "loss": 0.1846, + "step": 108890 + }, + { + "epoch": 5.08, + "learning_rate": 3.0934428542316556e-06, + "loss": 0.2037, + "step": 108895 + }, + { + "epoch": 5.08, + "learning_rate": 3.0926590691768695e-06, + "loss": 0.1475, + "step": 108900 + }, + { + "epoch": 5.08, + "learning_rate": 3.091875284122083e-06, + "loss": 0.2563, + "step": 108905 + }, + { + "epoch": 5.08, + "learning_rate": 3.0910914990672956e-06, + "loss": 0.126, + "step": 108910 + }, + { + "epoch": 5.08, + "learning_rate": 3.090307714012509e-06, + "loss": 0.0177, + "step": 108915 + }, + { + "epoch": 5.08, + "learning_rate": 3.089523928957723e-06, + "loss": 0.0091, + "step": 108920 + }, + { + "epoch": 5.08, + "learning_rate": 3.0887401439029365e-06, + "loss": 0.0842, + "step": 108925 + }, + { + "epoch": 5.08, + "learning_rate": 3.08795635884815e-06, + "loss": 0.114, + "step": 108930 + }, + { + "epoch": 5.08, + "learning_rate": 3.0871725737933634e-06, + "loss": 0.0896, + "step": 108935 + }, + { + "epoch": 5.08, + "learning_rate": 3.086388788738577e-06, + "loss": 0.0764, + "step": 108940 + }, + { + "epoch": 5.08, + "learning_rate": 3.08560500368379e-06, + "loss": 0.2, + "step": 108945 + }, + { + "epoch": 5.08, + "learning_rate": 3.0848212186290035e-06, + "loss": 0.1154, + "step": 108950 + }, + { + "epoch": 5.08, + "learning_rate": 3.084037433574217e-06, + "loss": 0.2358, + "step": 108955 + }, + { + "epoch": 5.08, + "learning_rate": 3.0832536485194304e-06, + "loss": 0.1253, + "step": 108960 + }, + { + "epoch": 5.08, + "learning_rate": 3.082469863464644e-06, + "loss": 0.0503, + "step": 108965 + }, + { + "epoch": 5.08, + "learning_rate": 3.0816860784098574e-06, + "loss": 0.0303, + "step": 108970 + }, + { + "epoch": 5.08, + "learning_rate": 3.0809022933550705e-06, + "loss": 0.0137, + "step": 108975 + }, + { + "epoch": 5.09, + "learning_rate": 3.080118508300284e-06, + "loss": 0.0243, + "step": 108980 + }, + { + "epoch": 5.09, + "learning_rate": 3.0793347232454974e-06, + "loss": 0.0402, + "step": 108985 + }, + { + "epoch": 5.09, + "learning_rate": 3.078550938190711e-06, + "loss": 0.0642, + "step": 108990 + }, + { + "epoch": 5.09, + "learning_rate": 3.0777671531359244e-06, + "loss": 0.0397, + "step": 108995 + }, + { + "epoch": 5.09, + "learning_rate": 3.076983368081138e-06, + "loss": 0.1656, + "step": 109000 + }, + { + "epoch": 5.09, + "learning_rate": 3.0761995830263514e-06, + "loss": 0.3368, + "step": 109005 + }, + { + "epoch": 5.09, + "learning_rate": 3.0754157979715644e-06, + "loss": 0.0882, + "step": 109010 + }, + { + "epoch": 5.09, + "learning_rate": 3.074632012916778e-06, + "loss": 0.0076, + "step": 109015 + }, + { + "epoch": 5.09, + "learning_rate": 3.0738482278619914e-06, + "loss": 0.0077, + "step": 109020 + }, + { + "epoch": 5.09, + "learning_rate": 3.073064442807205e-06, + "loss": 0.0411, + "step": 109025 + }, + { + "epoch": 5.09, + "learning_rate": 3.0722806577524183e-06, + "loss": 0.044, + "step": 109030 + }, + { + "epoch": 5.09, + "learning_rate": 3.071496872697632e-06, + "loss": 0.0475, + "step": 109035 + }, + { + "epoch": 5.09, + "learning_rate": 3.070713087642845e-06, + "loss": 0.0899, + "step": 109040 + }, + { + "epoch": 5.09, + "learning_rate": 3.0699293025880584e-06, + "loss": 0.1165, + "step": 109045 + }, + { + "epoch": 5.09, + "learning_rate": 3.069145517533272e-06, + "loss": 0.2288, + "step": 109050 + }, + { + "epoch": 5.09, + "learning_rate": 3.0683617324784853e-06, + "loss": 0.217, + "step": 109055 + }, + { + "epoch": 5.09, + "learning_rate": 3.067577947423699e-06, + "loss": 0.1103, + "step": 109060 + }, + { + "epoch": 5.09, + "learning_rate": 3.0667941623689123e-06, + "loss": 0.0215, + "step": 109065 + }, + { + "epoch": 5.09, + "learning_rate": 3.0660103773141258e-06, + "loss": 0.0227, + "step": 109070 + }, + { + "epoch": 5.09, + "learning_rate": 3.065226592259339e-06, + "loss": 0.0254, + "step": 109075 + }, + { + "epoch": 5.09, + "learning_rate": 3.0644428072045523e-06, + "loss": 0.026, + "step": 109080 + }, + { + "epoch": 5.09, + "learning_rate": 3.063659022149766e-06, + "loss": 0.0432, + "step": 109085 + }, + { + "epoch": 5.09, + "learning_rate": 3.0628752370949793e-06, + "loss": 0.0691, + "step": 109090 + }, + { + "epoch": 5.09, + "learning_rate": 3.0620914520401928e-06, + "loss": 0.0711, + "step": 109095 + }, + { + "epoch": 5.09, + "learning_rate": 3.0613076669854062e-06, + "loss": 0.1133, + "step": 109100 + }, + { + "epoch": 5.09, + "learning_rate": 3.0605238819306193e-06, + "loss": 0.3968, + "step": 109105 + }, + { + "epoch": 5.09, + "learning_rate": 3.0597400968758328e-06, + "loss": 0.1226, + "step": 109110 + }, + { + "epoch": 5.09, + "learning_rate": 3.0589563118210463e-06, + "loss": 0.0235, + "step": 109115 + }, + { + "epoch": 5.09, + "learning_rate": 3.0581725267662598e-06, + "loss": 0.0248, + "step": 109120 + }, + { + "epoch": 5.09, + "learning_rate": 3.0573887417114732e-06, + "loss": 0.0422, + "step": 109125 + }, + { + "epoch": 5.09, + "learning_rate": 3.0566049566566867e-06, + "loss": 0.0903, + "step": 109130 + }, + { + "epoch": 5.09, + "learning_rate": 3.0558211716019e-06, + "loss": 0.0814, + "step": 109135 + }, + { + "epoch": 5.09, + "learning_rate": 3.0550373865471133e-06, + "loss": 0.0624, + "step": 109140 + }, + { + "epoch": 5.09, + "learning_rate": 3.0542536014923267e-06, + "loss": 0.0963, + "step": 109145 + }, + { + "epoch": 5.09, + "learning_rate": 3.0534698164375402e-06, + "loss": 0.061, + "step": 109150 + }, + { + "epoch": 5.09, + "learning_rate": 3.0526860313827537e-06, + "loss": 0.3272, + "step": 109155 + }, + { + "epoch": 5.09, + "learning_rate": 3.051902246327967e-06, + "loss": 0.0942, + "step": 109160 + }, + { + "epoch": 5.09, + "learning_rate": 3.051118461273181e-06, + "loss": 0.0315, + "step": 109165 + }, + { + "epoch": 5.09, + "learning_rate": 3.0503346762183937e-06, + "loss": 0.0237, + "step": 109170 + }, + { + "epoch": 5.09, + "learning_rate": 3.049550891163607e-06, + "loss": 0.0603, + "step": 109175 + }, + { + "epoch": 5.09, + "learning_rate": 3.0487671061088207e-06, + "loss": 0.0589, + "step": 109180 + }, + { + "epoch": 5.09, + "learning_rate": 3.047983321054034e-06, + "loss": 0.1033, + "step": 109185 + }, + { + "epoch": 5.09, + "learning_rate": 3.047199535999248e-06, + "loss": 0.0501, + "step": 109190 + }, + { + "epoch": 5.1, + "learning_rate": 3.0464157509444616e-06, + "loss": 0.0479, + "step": 109195 + }, + { + "epoch": 5.1, + "learning_rate": 3.045631965889675e-06, + "loss": 0.1008, + "step": 109200 + }, + { + "epoch": 5.1, + "learning_rate": 3.0448481808348877e-06, + "loss": 0.2205, + "step": 109205 + }, + { + "epoch": 5.1, + "learning_rate": 3.0440643957801016e-06, + "loss": 0.0888, + "step": 109210 + }, + { + "epoch": 5.1, + "learning_rate": 3.043280610725315e-06, + "loss": 0.0933, + "step": 109215 + }, + { + "epoch": 5.1, + "learning_rate": 3.0424968256705285e-06, + "loss": 0.0607, + "step": 109220 + }, + { + "epoch": 5.1, + "learning_rate": 3.041713040615742e-06, + "loss": 0.0276, + "step": 109225 + }, + { + "epoch": 5.1, + "learning_rate": 3.0409292555609555e-06, + "loss": 0.0648, + "step": 109230 + }, + { + "epoch": 5.1, + "learning_rate": 3.0401454705061686e-06, + "loss": 0.0271, + "step": 109235 + }, + { + "epoch": 5.1, + "learning_rate": 3.039361685451382e-06, + "loss": 0.0328, + "step": 109240 + }, + { + "epoch": 5.1, + "learning_rate": 3.0385779003965955e-06, + "loss": 0.0817, + "step": 109245 + }, + { + "epoch": 5.1, + "learning_rate": 3.037794115341809e-06, + "loss": 0.1338, + "step": 109250 + }, + { + "epoch": 5.1, + "learning_rate": 3.0370103302870225e-06, + "loss": 0.322, + "step": 109255 + }, + { + "epoch": 5.1, + "learning_rate": 3.036226545232236e-06, + "loss": 0.0959, + "step": 109260 + }, + { + "epoch": 5.1, + "learning_rate": 3.0354427601774495e-06, + "loss": 0.0177, + "step": 109265 + }, + { + "epoch": 5.1, + "learning_rate": 3.0346589751226625e-06, + "loss": 0.0612, + "step": 109270 + }, + { + "epoch": 5.1, + "learning_rate": 3.033875190067876e-06, + "loss": 0.0862, + "step": 109275 + }, + { + "epoch": 5.1, + "learning_rate": 3.0330914050130895e-06, + "loss": 0.0352, + "step": 109280 + }, + { + "epoch": 5.1, + "learning_rate": 3.032307619958303e-06, + "loss": 0.0241, + "step": 109285 + }, + { + "epoch": 5.1, + "learning_rate": 3.0315238349035165e-06, + "loss": 0.0974, + "step": 109290 + }, + { + "epoch": 5.1, + "learning_rate": 3.03074004984873e-06, + "loss": 0.0794, + "step": 109295 + }, + { + "epoch": 5.1, + "learning_rate": 3.029956264793943e-06, + "loss": 0.1266, + "step": 109300 + }, + { + "epoch": 5.1, + "learning_rate": 3.0291724797391565e-06, + "loss": 0.2129, + "step": 109305 + }, + { + "epoch": 5.1, + "learning_rate": 3.02838869468437e-06, + "loss": 0.1213, + "step": 109310 + }, + { + "epoch": 5.1, + "learning_rate": 3.0276049096295834e-06, + "loss": 0.0227, + "step": 109315 + }, + { + "epoch": 5.1, + "learning_rate": 3.026821124574797e-06, + "loss": 0.019, + "step": 109320 + }, + { + "epoch": 5.1, + "learning_rate": 3.0260373395200104e-06, + "loss": 0.0308, + "step": 109325 + }, + { + "epoch": 5.1, + "learning_rate": 3.025253554465224e-06, + "loss": 0.091, + "step": 109330 + }, + { + "epoch": 5.1, + "learning_rate": 3.024469769410437e-06, + "loss": 0.0614, + "step": 109335 + }, + { + "epoch": 5.1, + "learning_rate": 3.0236859843556504e-06, + "loss": 0.0869, + "step": 109340 + }, + { + "epoch": 5.1, + "learning_rate": 3.022902199300864e-06, + "loss": 0.0439, + "step": 109345 + }, + { + "epoch": 5.1, + "learning_rate": 3.0221184142460774e-06, + "loss": 0.1748, + "step": 109350 + }, + { + "epoch": 5.1, + "learning_rate": 3.021334629191291e-06, + "loss": 0.2627, + "step": 109355 + }, + { + "epoch": 5.1, + "learning_rate": 3.0205508441365044e-06, + "loss": 0.113, + "step": 109360 + }, + { + "epoch": 5.1, + "learning_rate": 3.0197670590817174e-06, + "loss": 0.0324, + "step": 109365 + }, + { + "epoch": 5.1, + "learning_rate": 3.018983274026931e-06, + "loss": 0.0451, + "step": 109370 + }, + { + "epoch": 5.1, + "learning_rate": 3.0181994889721444e-06, + "loss": 0.0254, + "step": 109375 + }, + { + "epoch": 5.1, + "learning_rate": 3.017415703917358e-06, + "loss": 0.0326, + "step": 109380 + }, + { + "epoch": 5.1, + "learning_rate": 3.0166319188625713e-06, + "loss": 0.076, + "step": 109385 + }, + { + "epoch": 5.1, + "learning_rate": 3.015848133807785e-06, + "loss": 0.1163, + "step": 109390 + }, + { + "epoch": 5.1, + "learning_rate": 3.0150643487529983e-06, + "loss": 0.1397, + "step": 109395 + }, + { + "epoch": 5.1, + "learning_rate": 3.0142805636982114e-06, + "loss": 0.1042, + "step": 109400 + }, + { + "epoch": 5.1, + "learning_rate": 3.013496778643425e-06, + "loss": 0.1983, + "step": 109405 + }, + { + "epoch": 5.11, + "learning_rate": 3.0127129935886383e-06, + "loss": 0.1144, + "step": 109410 + }, + { + "epoch": 5.11, + "learning_rate": 3.011929208533852e-06, + "loss": 0.0275, + "step": 109415 + }, + { + "epoch": 5.11, + "learning_rate": 3.0111454234790653e-06, + "loss": 0.0287, + "step": 109420 + }, + { + "epoch": 5.11, + "learning_rate": 3.0103616384242788e-06, + "loss": 0.0411, + "step": 109425 + }, + { + "epoch": 5.11, + "learning_rate": 3.009577853369492e-06, + "loss": 0.0587, + "step": 109430 + }, + { + "epoch": 5.11, + "learning_rate": 3.0087940683147053e-06, + "loss": 0.0785, + "step": 109435 + }, + { + "epoch": 5.11, + "learning_rate": 3.008010283259919e-06, + "loss": 0.0662, + "step": 109440 + }, + { + "epoch": 5.11, + "learning_rate": 3.0072264982051323e-06, + "loss": 0.0824, + "step": 109445 + }, + { + "epoch": 5.11, + "learning_rate": 3.0064427131503458e-06, + "loss": 0.1646, + "step": 109450 + }, + { + "epoch": 5.11, + "learning_rate": 3.0058156851065166e-06, + "loss": 0.2444, + "step": 109455 + }, + { + "epoch": 5.11, + "learning_rate": 3.00503190005173e-06, + "loss": 0.1169, + "step": 109460 + }, + { + "epoch": 5.11, + "learning_rate": 3.004248114996943e-06, + "loss": 0.0347, + "step": 109465 + }, + { + "epoch": 5.11, + "learning_rate": 3.0034643299421567e-06, + "loss": 0.0452, + "step": 109470 + }, + { + "epoch": 5.11, + "learning_rate": 3.00268054488737e-06, + "loss": 0.0272, + "step": 109475 + }, + { + "epoch": 5.11, + "learning_rate": 3.0018967598325836e-06, + "loss": 0.0618, + "step": 109480 + }, + { + "epoch": 5.11, + "learning_rate": 3.001112974777797e-06, + "loss": 0.0832, + "step": 109485 + }, + { + "epoch": 5.11, + "learning_rate": 3.0003291897230106e-06, + "loss": 0.06, + "step": 109490 + }, + { + "epoch": 5.11, + "learning_rate": 2.9995454046682245e-06, + "loss": 0.1029, + "step": 109495 + }, + { + "epoch": 5.11, + "learning_rate": 2.998761619613437e-06, + "loss": 0.1437, + "step": 109500 + }, + { + "epoch": 5.11, + "learning_rate": 2.9979778345586506e-06, + "loss": 0.3037, + "step": 109505 + }, + { + "epoch": 5.11, + "learning_rate": 2.997194049503864e-06, + "loss": 0.1295, + "step": 109510 + }, + { + "epoch": 5.11, + "learning_rate": 2.9964102644490776e-06, + "loss": 0.0208, + "step": 109515 + }, + { + "epoch": 5.11, + "learning_rate": 2.9956264793942915e-06, + "loss": 0.0875, + "step": 109520 + }, + { + "epoch": 5.11, + "learning_rate": 2.994842694339505e-06, + "loss": 0.0659, + "step": 109525 + }, + { + "epoch": 5.11, + "learning_rate": 2.9940589092847176e-06, + "loss": 0.0814, + "step": 109530 + }, + { + "epoch": 5.11, + "learning_rate": 2.993275124229931e-06, + "loss": 0.0154, + "step": 109535 + }, + { + "epoch": 5.11, + "learning_rate": 2.992491339175145e-06, + "loss": 0.0495, + "step": 109540 + }, + { + "epoch": 5.11, + "learning_rate": 2.9917075541203585e-06, + "loss": 0.0837, + "step": 109545 + }, + { + "epoch": 5.11, + "learning_rate": 2.990923769065572e-06, + "loss": 0.0725, + "step": 109550 + }, + { + "epoch": 5.11, + "learning_rate": 2.9901399840107854e-06, + "loss": 0.2416, + "step": 109555 + }, + { + "epoch": 5.11, + "learning_rate": 2.989356198955999e-06, + "loss": 0.1064, + "step": 109560 + }, + { + "epoch": 5.11, + "learning_rate": 2.988572413901212e-06, + "loss": 0.0053, + "step": 109565 + }, + { + "epoch": 5.11, + "learning_rate": 2.9877886288464255e-06, + "loss": 0.0263, + "step": 109570 + }, + { + "epoch": 5.11, + "learning_rate": 2.987004843791639e-06, + "loss": 0.0083, + "step": 109575 + }, + { + "epoch": 5.11, + "learning_rate": 2.9862210587368524e-06, + "loss": 0.0307, + "step": 109580 + }, + { + "epoch": 5.11, + "learning_rate": 2.985437273682066e-06, + "loss": 0.1051, + "step": 109585 + }, + { + "epoch": 5.11, + "learning_rate": 2.9846534886272794e-06, + "loss": 0.0576, + "step": 109590 + }, + { + "epoch": 5.11, + "learning_rate": 2.9838697035724924e-06, + "loss": 0.1378, + "step": 109595 + }, + { + "epoch": 5.11, + "learning_rate": 2.983085918517706e-06, + "loss": 0.1131, + "step": 109600 + }, + { + "epoch": 5.11, + "learning_rate": 2.9823021334629194e-06, + "loss": 0.193, + "step": 109605 + }, + { + "epoch": 5.11, + "learning_rate": 2.981518348408133e-06, + "loss": 0.1186, + "step": 109610 + }, + { + "epoch": 5.11, + "learning_rate": 2.9807345633533464e-06, + "loss": 0.0163, + "step": 109615 + }, + { + "epoch": 5.12, + "learning_rate": 2.97995077829856e-06, + "loss": 0.0132, + "step": 109620 + }, + { + "epoch": 5.12, + "learning_rate": 2.9791669932437733e-06, + "loss": 0.0478, + "step": 109625 + }, + { + "epoch": 5.12, + "learning_rate": 2.9783832081889864e-06, + "loss": 0.0182, + "step": 109630 + }, + { + "epoch": 5.12, + "learning_rate": 2.9775994231342e-06, + "loss": 0.0693, + "step": 109635 + }, + { + "epoch": 5.12, + "learning_rate": 2.9768156380794134e-06, + "loss": 0.0355, + "step": 109640 + }, + { + "epoch": 5.12, + "learning_rate": 2.976031853024627e-06, + "loss": 0.081, + "step": 109645 + }, + { + "epoch": 5.12, + "learning_rate": 2.9752480679698403e-06, + "loss": 0.1178, + "step": 109650 + }, + { + "epoch": 5.12, + "learning_rate": 2.974464282915054e-06, + "loss": 0.2502, + "step": 109655 + }, + { + "epoch": 5.12, + "learning_rate": 2.973680497860267e-06, + "loss": 0.0586, + "step": 109660 + }, + { + "epoch": 5.12, + "learning_rate": 2.9728967128054804e-06, + "loss": 0.0152, + "step": 109665 + }, + { + "epoch": 5.12, + "learning_rate": 2.972112927750694e-06, + "loss": 0.2016, + "step": 109670 + }, + { + "epoch": 5.12, + "learning_rate": 2.9713291426959073e-06, + "loss": 0.062, + "step": 109675 + }, + { + "epoch": 5.12, + "learning_rate": 2.970545357641121e-06, + "loss": 0.036, + "step": 109680 + }, + { + "epoch": 5.12, + "learning_rate": 2.9697615725863343e-06, + "loss": 0.0847, + "step": 109685 + }, + { + "epoch": 5.12, + "learning_rate": 2.9689777875315478e-06, + "loss": 0.0873, + "step": 109690 + }, + { + "epoch": 5.12, + "learning_rate": 2.968194002476761e-06, + "loss": 0.0534, + "step": 109695 + }, + { + "epoch": 5.12, + "learning_rate": 2.9674102174219743e-06, + "loss": 0.0872, + "step": 109700 + }, + { + "epoch": 5.12, + "learning_rate": 2.9666264323671878e-06, + "loss": 0.3284, + "step": 109705 + }, + { + "epoch": 5.12, + "learning_rate": 2.9658426473124013e-06, + "loss": 0.1, + "step": 109710 + }, + { + "epoch": 5.12, + "learning_rate": 2.9650588622576148e-06, + "loss": 0.0315, + "step": 109715 + }, + { + "epoch": 5.12, + "learning_rate": 2.9642750772028282e-06, + "loss": 0.036, + "step": 109720 + }, + { + "epoch": 5.12, + "learning_rate": 2.9634912921480413e-06, + "loss": 0.0219, + "step": 109725 + }, + { + "epoch": 5.12, + "learning_rate": 2.9627075070932548e-06, + "loss": 0.0624, + "step": 109730 + }, + { + "epoch": 5.12, + "learning_rate": 2.9619237220384683e-06, + "loss": 0.073, + "step": 109735 + }, + { + "epoch": 5.12, + "learning_rate": 2.9611399369836817e-06, + "loss": 0.0663, + "step": 109740 + }, + { + "epoch": 5.12, + "learning_rate": 2.9603561519288952e-06, + "loss": 0.1624, + "step": 109745 + }, + { + "epoch": 5.12, + "learning_rate": 2.9595723668741087e-06, + "loss": 0.1061, + "step": 109750 + }, + { + "epoch": 5.12, + "learning_rate": 2.958788581819322e-06, + "loss": 0.2459, + "step": 109755 + }, + { + "epoch": 5.12, + "learning_rate": 2.9580047967645352e-06, + "loss": 0.1499, + "step": 109760 + }, + { + "epoch": 5.12, + "learning_rate": 2.9572210117097487e-06, + "loss": 0.0087, + "step": 109765 + }, + { + "epoch": 5.12, + "learning_rate": 2.956437226654962e-06, + "loss": 0.0151, + "step": 109770 + }, + { + "epoch": 5.12, + "learning_rate": 2.9556534416001757e-06, + "loss": 0.0055, + "step": 109775 + }, + { + "epoch": 5.12, + "learning_rate": 2.954869656545389e-06, + "loss": 0.0309, + "step": 109780 + }, + { + "epoch": 5.12, + "learning_rate": 2.954085871490603e-06, + "loss": 0.0355, + "step": 109785 + }, + { + "epoch": 5.12, + "learning_rate": 2.9533020864358157e-06, + "loss": 0.0631, + "step": 109790 + }, + { + "epoch": 5.12, + "learning_rate": 2.952518301381029e-06, + "loss": 0.1262, + "step": 109795 + }, + { + "epoch": 5.12, + "learning_rate": 2.9517345163262427e-06, + "loss": 0.0788, + "step": 109800 + }, + { + "epoch": 5.12, + "learning_rate": 2.950950731271456e-06, + "loss": 0.2242, + "step": 109805 + }, + { + "epoch": 5.12, + "learning_rate": 2.95016694621667e-06, + "loss": 0.1869, + "step": 109810 + }, + { + "epoch": 5.12, + "learning_rate": 2.9493831611618835e-06, + "loss": 0.0294, + "step": 109815 + }, + { + "epoch": 5.12, + "learning_rate": 2.948599376107097e-06, + "loss": 0.0186, + "step": 109820 + }, + { + "epoch": 5.12, + "learning_rate": 2.9478155910523097e-06, + "loss": 0.007, + "step": 109825 + }, + { + "epoch": 5.12, + "learning_rate": 2.9470318059975236e-06, + "loss": 0.0537, + "step": 109830 + }, + { + "epoch": 5.13, + "learning_rate": 2.946248020942737e-06, + "loss": 0.064, + "step": 109835 + }, + { + "epoch": 5.13, + "learning_rate": 2.9454642358879505e-06, + "loss": 0.0366, + "step": 109840 + }, + { + "epoch": 5.13, + "learning_rate": 2.944680450833164e-06, + "loss": 0.0651, + "step": 109845 + }, + { + "epoch": 5.13, + "learning_rate": 2.9438966657783775e-06, + "loss": 0.118, + "step": 109850 + }, + { + "epoch": 5.13, + "learning_rate": 2.9431128807235906e-06, + "loss": 0.1959, + "step": 109855 + }, + { + "epoch": 5.13, + "learning_rate": 2.942329095668804e-06, + "loss": 0.117, + "step": 109860 + }, + { + "epoch": 5.13, + "learning_rate": 2.9415453106140175e-06, + "loss": 0.002, + "step": 109865 + }, + { + "epoch": 5.13, + "learning_rate": 2.940761525559231e-06, + "loss": 0.0134, + "step": 109870 + }, + { + "epoch": 5.13, + "learning_rate": 2.9399777405044445e-06, + "loss": 0.0329, + "step": 109875 + }, + { + "epoch": 5.13, + "learning_rate": 2.939193955449658e-06, + "loss": 0.0411, + "step": 109880 + }, + { + "epoch": 5.13, + "learning_rate": 2.9384101703948715e-06, + "loss": 0.0449, + "step": 109885 + }, + { + "epoch": 5.13, + "learning_rate": 2.9376263853400845e-06, + "loss": 0.0558, + "step": 109890 + }, + { + "epoch": 5.13, + "learning_rate": 2.936842600285298e-06, + "loss": 0.1279, + "step": 109895 + }, + { + "epoch": 5.13, + "learning_rate": 2.9360588152305115e-06, + "loss": 0.0998, + "step": 109900 + }, + { + "epoch": 5.13, + "learning_rate": 2.935275030175725e-06, + "loss": 0.2597, + "step": 109905 + }, + { + "epoch": 5.13, + "learning_rate": 2.9344912451209384e-06, + "loss": 0.1405, + "step": 109910 + }, + { + "epoch": 5.13, + "learning_rate": 2.933707460066152e-06, + "loss": 0.0211, + "step": 109915 + }, + { + "epoch": 5.13, + "learning_rate": 2.932923675011365e-06, + "loss": 0.0207, + "step": 109920 + }, + { + "epoch": 5.13, + "learning_rate": 2.9321398899565785e-06, + "loss": 0.0642, + "step": 109925 + }, + { + "epoch": 5.13, + "learning_rate": 2.931356104901792e-06, + "loss": 0.0413, + "step": 109930 + }, + { + "epoch": 5.13, + "learning_rate": 2.9305723198470054e-06, + "loss": 0.0165, + "step": 109935 + }, + { + "epoch": 5.13, + "learning_rate": 2.929788534792219e-06, + "loss": 0.1093, + "step": 109940 + }, + { + "epoch": 5.13, + "learning_rate": 2.9290047497374324e-06, + "loss": 0.0971, + "step": 109945 + }, + { + "epoch": 5.13, + "learning_rate": 2.928220964682646e-06, + "loss": 0.0756, + "step": 109950 + }, + { + "epoch": 5.13, + "learning_rate": 2.927437179627859e-06, + "loss": 0.2205, + "step": 109955 + }, + { + "epoch": 5.13, + "learning_rate": 2.9266533945730724e-06, + "loss": 0.1388, + "step": 109960 + }, + { + "epoch": 5.13, + "learning_rate": 2.925869609518286e-06, + "loss": 0.021, + "step": 109965 + }, + { + "epoch": 5.13, + "learning_rate": 2.9250858244634994e-06, + "loss": 0.0115, + "step": 109970 + }, + { + "epoch": 5.13, + "learning_rate": 2.924302039408713e-06, + "loss": 0.0675, + "step": 109975 + }, + { + "epoch": 5.13, + "learning_rate": 2.9235182543539263e-06, + "loss": 0.0713, + "step": 109980 + }, + { + "epoch": 5.13, + "learning_rate": 2.9227344692991394e-06, + "loss": 0.0616, + "step": 109985 + }, + { + "epoch": 5.13, + "learning_rate": 2.921950684244353e-06, + "loss": 0.0719, + "step": 109990 + }, + { + "epoch": 5.13, + "learning_rate": 2.9211668991895664e-06, + "loss": 0.1247, + "step": 109995 + }, + { + "epoch": 5.13, + "learning_rate": 2.92038311413478e-06, + "loss": 0.0793, + "step": 110000 + }, + { + "epoch": 5.13, + "learning_rate": 2.9195993290799933e-06, + "loss": 0.1546, + "step": 110005 + }, + { + "epoch": 5.13, + "learning_rate": 2.918815544025207e-06, + "loss": 0.0756, + "step": 110010 + }, + { + "epoch": 5.13, + "learning_rate": 2.9180317589704203e-06, + "loss": 0.0188, + "step": 110015 + }, + { + "epoch": 5.13, + "learning_rate": 2.9172479739156334e-06, + "loss": 0.003, + "step": 110020 + }, + { + "epoch": 5.13, + "learning_rate": 2.916464188860847e-06, + "loss": 0.0208, + "step": 110025 + }, + { + "epoch": 5.13, + "learning_rate": 2.9156804038060603e-06, + "loss": 0.0301, + "step": 110030 + }, + { + "epoch": 5.13, + "learning_rate": 2.914896618751274e-06, + "loss": 0.0485, + "step": 110035 + }, + { + "epoch": 5.13, + "learning_rate": 2.9141128336964873e-06, + "loss": 0.0895, + "step": 110040 + }, + { + "epoch": 5.13, + "learning_rate": 2.9133290486417008e-06, + "loss": 0.084, + "step": 110045 + }, + { + "epoch": 5.14, + "learning_rate": 2.912545263586914e-06, + "loss": 0.0461, + "step": 110050 + }, + { + "epoch": 5.14, + "learning_rate": 2.9117614785321273e-06, + "loss": 0.2127, + "step": 110055 + }, + { + "epoch": 5.14, + "learning_rate": 2.910977693477341e-06, + "loss": 0.1009, + "step": 110060 + }, + { + "epoch": 5.14, + "learning_rate": 2.9101939084225543e-06, + "loss": 0.0244, + "step": 110065 + }, + { + "epoch": 5.14, + "learning_rate": 2.9094101233677678e-06, + "loss": 0.0275, + "step": 110070 + }, + { + "epoch": 5.14, + "learning_rate": 2.9086263383129817e-06, + "loss": 0.0398, + "step": 110075 + }, + { + "epoch": 5.14, + "learning_rate": 2.907842553258195e-06, + "loss": 0.0678, + "step": 110080 + }, + { + "epoch": 5.14, + "learning_rate": 2.9070587682034078e-06, + "loss": 0.0541, + "step": 110085 + }, + { + "epoch": 5.14, + "learning_rate": 2.9062749831486213e-06, + "loss": 0.0625, + "step": 110090 + }, + { + "epoch": 5.14, + "learning_rate": 2.9054911980938347e-06, + "loss": 0.0482, + "step": 110095 + }, + { + "epoch": 5.14, + "learning_rate": 2.9047074130390486e-06, + "loss": 0.0621, + "step": 110100 + }, + { + "epoch": 5.14, + "learning_rate": 2.903923627984262e-06, + "loss": 0.2166, + "step": 110105 + }, + { + "epoch": 5.14, + "learning_rate": 2.9031398429294756e-06, + "loss": 0.0828, + "step": 110110 + }, + { + "epoch": 5.14, + "learning_rate": 2.9023560578746882e-06, + "loss": 0.0434, + "step": 110115 + }, + { + "epoch": 5.14, + "learning_rate": 2.901572272819902e-06, + "loss": 0.0287, + "step": 110120 + }, + { + "epoch": 5.14, + "learning_rate": 2.9007884877651156e-06, + "loss": 0.0693, + "step": 110125 + }, + { + "epoch": 5.14, + "learning_rate": 2.900004702710329e-06, + "loss": 0.0757, + "step": 110130 + }, + { + "epoch": 5.14, + "learning_rate": 2.8992209176555426e-06, + "loss": 0.0416, + "step": 110135 + }, + { + "epoch": 5.14, + "learning_rate": 2.898437132600756e-06, + "loss": 0.126, + "step": 110140 + }, + { + "epoch": 5.14, + "learning_rate": 2.8976533475459696e-06, + "loss": 0.0551, + "step": 110145 + }, + { + "epoch": 5.14, + "learning_rate": 2.8968695624911826e-06, + "loss": 0.1697, + "step": 110150 + }, + { + "epoch": 5.14, + "learning_rate": 2.896085777436396e-06, + "loss": 0.1895, + "step": 110155 + }, + { + "epoch": 5.14, + "learning_rate": 2.8953019923816096e-06, + "loss": 0.0676, + "step": 110160 + }, + { + "epoch": 5.14, + "learning_rate": 2.894518207326823e-06, + "loss": 0.0277, + "step": 110165 + }, + { + "epoch": 5.14, + "learning_rate": 2.8937344222720366e-06, + "loss": 0.0697, + "step": 110170 + }, + { + "epoch": 5.14, + "learning_rate": 2.89295063721725e-06, + "loss": 0.0967, + "step": 110175 + }, + { + "epoch": 5.14, + "learning_rate": 2.892166852162463e-06, + "loss": 0.0717, + "step": 110180 + }, + { + "epoch": 5.14, + "learning_rate": 2.8913830671076766e-06, + "loss": 0.0476, + "step": 110185 + }, + { + "epoch": 5.14, + "learning_rate": 2.89059928205289e-06, + "loss": 0.0721, + "step": 110190 + }, + { + "epoch": 5.14, + "learning_rate": 2.8898154969981035e-06, + "loss": 0.1644, + "step": 110195 + }, + { + "epoch": 5.14, + "learning_rate": 2.889031711943317e-06, + "loss": 0.1193, + "step": 110200 + }, + { + "epoch": 5.14, + "learning_rate": 2.8882479268885305e-06, + "loss": 0.3526, + "step": 110205 + }, + { + "epoch": 5.14, + "learning_rate": 2.887464141833744e-06, + "loss": 0.1362, + "step": 110210 + }, + { + "epoch": 5.14, + "learning_rate": 2.886680356778957e-06, + "loss": 0.021, + "step": 110215 + }, + { + "epoch": 5.14, + "learning_rate": 2.8858965717241705e-06, + "loss": 0.0532, + "step": 110220 + }, + { + "epoch": 5.14, + "learning_rate": 2.885112786669384e-06, + "loss": 0.0698, + "step": 110225 + }, + { + "epoch": 5.14, + "learning_rate": 2.8843290016145975e-06, + "loss": 0.0433, + "step": 110230 + }, + { + "epoch": 5.14, + "learning_rate": 2.883545216559811e-06, + "loss": 0.0936, + "step": 110235 + }, + { + "epoch": 5.14, + "learning_rate": 2.8827614315050245e-06, + "loss": 0.1025, + "step": 110240 + }, + { + "epoch": 5.14, + "learning_rate": 2.8819776464502375e-06, + "loss": 0.1063, + "step": 110245 + }, + { + "epoch": 5.14, + "learning_rate": 2.881193861395451e-06, + "loss": 0.1011, + "step": 110250 + }, + { + "epoch": 5.14, + "learning_rate": 2.8804100763406645e-06, + "loss": 0.2958, + "step": 110255 + }, + { + "epoch": 5.14, + "learning_rate": 2.879626291285878e-06, + "loss": 0.1005, + "step": 110260 + }, + { + "epoch": 5.15, + "learning_rate": 2.8788425062310914e-06, + "loss": 0.0161, + "step": 110265 + }, + { + "epoch": 5.15, + "learning_rate": 2.878058721176305e-06, + "loss": 0.0144, + "step": 110270 + }, + { + "epoch": 5.15, + "learning_rate": 2.8772749361215184e-06, + "loss": 0.032, + "step": 110275 + }, + { + "epoch": 5.15, + "learning_rate": 2.8764911510667315e-06, + "loss": 0.0751, + "step": 110280 + }, + { + "epoch": 5.15, + "learning_rate": 2.875707366011945e-06, + "loss": 0.0407, + "step": 110285 + }, + { + "epoch": 5.15, + "learning_rate": 2.8749235809571584e-06, + "loss": 0.12, + "step": 110290 + }, + { + "epoch": 5.15, + "learning_rate": 2.874139795902372e-06, + "loss": 0.0719, + "step": 110295 + }, + { + "epoch": 5.15, + "learning_rate": 2.8733560108475854e-06, + "loss": 0.0626, + "step": 110300 + }, + { + "epoch": 5.15, + "learning_rate": 2.872572225792799e-06, + "loss": 0.2607, + "step": 110305 + }, + { + "epoch": 5.15, + "learning_rate": 2.871788440738012e-06, + "loss": 0.1495, + "step": 110310 + }, + { + "epoch": 5.15, + "learning_rate": 2.8710046556832254e-06, + "loss": 0.0177, + "step": 110315 + }, + { + "epoch": 5.15, + "learning_rate": 2.870220870628439e-06, + "loss": 0.0309, + "step": 110320 + }, + { + "epoch": 5.15, + "learning_rate": 2.8694370855736524e-06, + "loss": 0.0777, + "step": 110325 + }, + { + "epoch": 5.15, + "learning_rate": 2.868653300518866e-06, + "loss": 0.0482, + "step": 110330 + }, + { + "epoch": 5.15, + "learning_rate": 2.8678695154640794e-06, + "loss": 0.0604, + "step": 110335 + }, + { + "epoch": 5.15, + "learning_rate": 2.8670857304092933e-06, + "loss": 0.0974, + "step": 110340 + }, + { + "epoch": 5.15, + "learning_rate": 2.866301945354506e-06, + "loss": 0.0613, + "step": 110345 + }, + { + "epoch": 5.15, + "learning_rate": 2.8655181602997194e-06, + "loss": 0.1618, + "step": 110350 + }, + { + "epoch": 5.15, + "learning_rate": 2.864734375244933e-06, + "loss": 0.2705, + "step": 110355 + }, + { + "epoch": 5.15, + "learning_rate": 2.8639505901901463e-06, + "loss": 0.1574, + "step": 110360 + }, + { + "epoch": 5.15, + "learning_rate": 2.8631668051353602e-06, + "loss": 0.0143, + "step": 110365 + }, + { + "epoch": 5.15, + "learning_rate": 2.8623830200805737e-06, + "loss": 0.0486, + "step": 110370 + }, + { + "epoch": 5.15, + "learning_rate": 2.8615992350257864e-06, + "loss": 0.0473, + "step": 110375 + }, + { + "epoch": 5.15, + "learning_rate": 2.860815449971e-06, + "loss": 0.0384, + "step": 110380 + }, + { + "epoch": 5.15, + "learning_rate": 2.8600316649162133e-06, + "loss": 0.0316, + "step": 110385 + }, + { + "epoch": 5.15, + "learning_rate": 2.8592478798614272e-06, + "loss": 0.0565, + "step": 110390 + }, + { + "epoch": 5.15, + "learning_rate": 2.8584640948066407e-06, + "loss": 0.109, + "step": 110395 + }, + { + "epoch": 5.15, + "learning_rate": 2.857680309751854e-06, + "loss": 0.1209, + "step": 110400 + }, + { + "epoch": 5.15, + "learning_rate": 2.8568965246970677e-06, + "loss": 0.2363, + "step": 110405 + }, + { + "epoch": 5.15, + "learning_rate": 2.8561127396422807e-06, + "loss": 0.1161, + "step": 110410 + }, + { + "epoch": 5.15, + "learning_rate": 2.8553289545874942e-06, + "loss": 0.0243, + "step": 110415 + }, + { + "epoch": 5.15, + "learning_rate": 2.8545451695327077e-06, + "loss": 0.0396, + "step": 110420 + }, + { + "epoch": 5.15, + "learning_rate": 2.853761384477921e-06, + "loss": 0.0399, + "step": 110425 + }, + { + "epoch": 5.15, + "learning_rate": 2.8529775994231347e-06, + "loss": 0.0544, + "step": 110430 + }, + { + "epoch": 5.15, + "learning_rate": 2.852193814368348e-06, + "loss": 0.1116, + "step": 110435 + }, + { + "epoch": 5.15, + "learning_rate": 2.851410029313561e-06, + "loss": 0.1346, + "step": 110440 + }, + { + "epoch": 5.15, + "learning_rate": 2.8506262442587747e-06, + "loss": 0.0918, + "step": 110445 + }, + { + "epoch": 5.15, + "learning_rate": 2.849842459203988e-06, + "loss": 0.1715, + "step": 110450 + }, + { + "epoch": 5.15, + "learning_rate": 2.8490586741492017e-06, + "loss": 0.2296, + "step": 110455 + }, + { + "epoch": 5.15, + "learning_rate": 2.848274889094415e-06, + "loss": 0.0733, + "step": 110460 + }, + { + "epoch": 5.15, + "learning_rate": 2.8474911040396286e-06, + "loss": 0.0271, + "step": 110465 + }, + { + "epoch": 5.15, + "learning_rate": 2.846707318984842e-06, + "loss": 0.0547, + "step": 110470 + }, + { + "epoch": 5.15, + "learning_rate": 2.845923533930055e-06, + "loss": 0.0504, + "step": 110475 + }, + { + "epoch": 5.16, + "learning_rate": 2.8451397488752686e-06, + "loss": 0.0243, + "step": 110480 + }, + { + "epoch": 5.16, + "learning_rate": 2.844355963820482e-06, + "loss": 0.0742, + "step": 110485 + }, + { + "epoch": 5.16, + "learning_rate": 2.8435721787656956e-06, + "loss": 0.0256, + "step": 110490 + }, + { + "epoch": 5.16, + "learning_rate": 2.842788393710909e-06, + "loss": 0.0845, + "step": 110495 + }, + { + "epoch": 5.16, + "learning_rate": 2.8420046086561226e-06, + "loss": 0.1336, + "step": 110500 + }, + { + "epoch": 5.16, + "learning_rate": 2.8412208236013356e-06, + "loss": 0.2542, + "step": 110505 + }, + { + "epoch": 5.16, + "learning_rate": 2.840437038546549e-06, + "loss": 0.1028, + "step": 110510 + }, + { + "epoch": 5.16, + "learning_rate": 2.8396532534917626e-06, + "loss": 0.045, + "step": 110515 + }, + { + "epoch": 5.16, + "learning_rate": 2.838869468436976e-06, + "loss": 0.0251, + "step": 110520 + }, + { + "epoch": 5.16, + "learning_rate": 2.8380856833821896e-06, + "loss": 0.0987, + "step": 110525 + }, + { + "epoch": 5.16, + "learning_rate": 2.837301898327403e-06, + "loss": 0.0203, + "step": 110530 + }, + { + "epoch": 5.16, + "learning_rate": 2.8365181132726165e-06, + "loss": 0.059, + "step": 110535 + }, + { + "epoch": 5.16, + "learning_rate": 2.8357343282178296e-06, + "loss": 0.089, + "step": 110540 + }, + { + "epoch": 5.16, + "learning_rate": 2.834950543163043e-06, + "loss": 0.0852, + "step": 110545 + }, + { + "epoch": 5.16, + "learning_rate": 2.8341667581082565e-06, + "loss": 0.2171, + "step": 110550 + }, + { + "epoch": 5.16, + "learning_rate": 2.83338297305347e-06, + "loss": 0.2243, + "step": 110555 + }, + { + "epoch": 5.16, + "learning_rate": 2.8325991879986835e-06, + "loss": 0.1048, + "step": 110560 + }, + { + "epoch": 5.16, + "learning_rate": 2.831815402943897e-06, + "loss": 0.0299, + "step": 110565 + }, + { + "epoch": 5.16, + "learning_rate": 2.83103161788911e-06, + "loss": 0.0335, + "step": 110570 + }, + { + "epoch": 5.16, + "learning_rate": 2.8302478328343235e-06, + "loss": 0.0347, + "step": 110575 + }, + { + "epoch": 5.16, + "learning_rate": 2.829464047779537e-06, + "loss": 0.0404, + "step": 110580 + }, + { + "epoch": 5.16, + "learning_rate": 2.8286802627247505e-06, + "loss": 0.0475, + "step": 110585 + }, + { + "epoch": 5.16, + "learning_rate": 2.827896477669964e-06, + "loss": 0.0479, + "step": 110590 + }, + { + "epoch": 5.16, + "learning_rate": 2.8271126926151775e-06, + "loss": 0.1873, + "step": 110595 + }, + { + "epoch": 5.16, + "learning_rate": 2.826328907560391e-06, + "loss": 0.1729, + "step": 110600 + }, + { + "epoch": 5.16, + "learning_rate": 2.825545122505604e-06, + "loss": 0.2594, + "step": 110605 + }, + { + "epoch": 5.16, + "learning_rate": 2.8247613374508175e-06, + "loss": 0.1163, + "step": 110610 + }, + { + "epoch": 5.16, + "learning_rate": 2.823977552396031e-06, + "loss": 0.0277, + "step": 110615 + }, + { + "epoch": 5.16, + "learning_rate": 2.8231937673412445e-06, + "loss": 0.0298, + "step": 110620 + }, + { + "epoch": 5.16, + "learning_rate": 2.822409982286458e-06, + "loss": 0.0248, + "step": 110625 + }, + { + "epoch": 5.16, + "learning_rate": 2.821626197231672e-06, + "loss": 0.0549, + "step": 110630 + }, + { + "epoch": 5.16, + "learning_rate": 2.8208424121768845e-06, + "loss": 0.1356, + "step": 110635 + }, + { + "epoch": 5.16, + "learning_rate": 2.820058627122098e-06, + "loss": 0.0816, + "step": 110640 + }, + { + "epoch": 5.16, + "learning_rate": 2.8192748420673114e-06, + "loss": 0.1013, + "step": 110645 + }, + { + "epoch": 5.16, + "learning_rate": 2.818491057012525e-06, + "loss": 0.0936, + "step": 110650 + }, + { + "epoch": 5.16, + "learning_rate": 2.817707271957739e-06, + "loss": 0.2366, + "step": 110655 + }, + { + "epoch": 5.16, + "learning_rate": 2.8169234869029523e-06, + "loss": 0.0688, + "step": 110660 + }, + { + "epoch": 5.16, + "learning_rate": 2.816139701848166e-06, + "loss": 0.0202, + "step": 110665 + }, + { + "epoch": 5.16, + "learning_rate": 2.8153559167933784e-06, + "loss": 0.0291, + "step": 110670 + }, + { + "epoch": 5.16, + "learning_rate": 2.814572131738592e-06, + "loss": 0.0128, + "step": 110675 + }, + { + "epoch": 5.16, + "learning_rate": 2.813788346683806e-06, + "loss": 0.0661, + "step": 110680 + }, + { + "epoch": 5.16, + "learning_rate": 2.8130045616290193e-06, + "loss": 0.0514, + "step": 110685 + }, + { + "epoch": 5.16, + "learning_rate": 2.8122207765742328e-06, + "loss": 0.1246, + "step": 110690 + }, + { + "epoch": 5.17, + "learning_rate": 2.8114369915194463e-06, + "loss": 0.0571, + "step": 110695 + }, + { + "epoch": 5.17, + "learning_rate": 2.8106532064646593e-06, + "loss": 0.1807, + "step": 110700 + }, + { + "epoch": 5.17, + "learning_rate": 2.809869421409873e-06, + "loss": 0.3077, + "step": 110705 + }, + { + "epoch": 5.17, + "learning_rate": 2.8090856363550863e-06, + "loss": 0.1025, + "step": 110710 + }, + { + "epoch": 5.17, + "learning_rate": 2.8083018513002998e-06, + "loss": 0.0255, + "step": 110715 + }, + { + "epoch": 5.17, + "learning_rate": 2.8075180662455132e-06, + "loss": 0.0559, + "step": 110720 + }, + { + "epoch": 5.17, + "learning_rate": 2.8067342811907267e-06, + "loss": 0.0272, + "step": 110725 + }, + { + "epoch": 5.17, + "learning_rate": 2.8059504961359402e-06, + "loss": 0.069, + "step": 110730 + }, + { + "epoch": 5.17, + "learning_rate": 2.8051667110811533e-06, + "loss": 0.0205, + "step": 110735 + }, + { + "epoch": 5.17, + "learning_rate": 2.8043829260263668e-06, + "loss": 0.0962, + "step": 110740 + }, + { + "epoch": 5.17, + "learning_rate": 2.8035991409715802e-06, + "loss": 0.0834, + "step": 110745 + }, + { + "epoch": 5.17, + "learning_rate": 2.8028153559167937e-06, + "loss": 0.0996, + "step": 110750 + }, + { + "epoch": 5.17, + "learning_rate": 2.802031570862007e-06, + "loss": 0.2966, + "step": 110755 + }, + { + "epoch": 5.17, + "learning_rate": 2.8012477858072207e-06, + "loss": 0.1497, + "step": 110760 + }, + { + "epoch": 5.17, + "learning_rate": 2.8004640007524337e-06, + "loss": 0.0827, + "step": 110765 + }, + { + "epoch": 5.17, + "learning_rate": 2.7996802156976472e-06, + "loss": 0.0403, + "step": 110770 + }, + { + "epoch": 5.17, + "learning_rate": 2.7988964306428607e-06, + "loss": 0.0145, + "step": 110775 + }, + { + "epoch": 5.17, + "learning_rate": 2.798112645588074e-06, + "loss": 0.0734, + "step": 110780 + }, + { + "epoch": 5.17, + "learning_rate": 2.7973288605332877e-06, + "loss": 0.0669, + "step": 110785 + }, + { + "epoch": 5.17, + "learning_rate": 2.796545075478501e-06, + "loss": 0.1167, + "step": 110790 + }, + { + "epoch": 5.17, + "learning_rate": 2.7957612904237146e-06, + "loss": 0.0927, + "step": 110795 + }, + { + "epoch": 5.17, + "learning_rate": 2.7949775053689277e-06, + "loss": 0.106, + "step": 110800 + }, + { + "epoch": 5.17, + "learning_rate": 2.794193720314141e-06, + "loss": 0.2813, + "step": 110805 + }, + { + "epoch": 5.17, + "learning_rate": 2.7934099352593547e-06, + "loss": 0.1391, + "step": 110810 + }, + { + "epoch": 5.17, + "learning_rate": 2.792626150204568e-06, + "loss": 0.0288, + "step": 110815 + }, + { + "epoch": 5.17, + "learning_rate": 2.7918423651497816e-06, + "loss": 0.0225, + "step": 110820 + }, + { + "epoch": 5.17, + "learning_rate": 2.791058580094995e-06, + "loss": 0.0183, + "step": 110825 + }, + { + "epoch": 5.17, + "learning_rate": 2.790274795040208e-06, + "loss": 0.0173, + "step": 110830 + }, + { + "epoch": 5.17, + "learning_rate": 2.7894910099854216e-06, + "loss": 0.0522, + "step": 110835 + }, + { + "epoch": 5.17, + "learning_rate": 2.788707224930635e-06, + "loss": 0.1165, + "step": 110840 + }, + { + "epoch": 5.17, + "learning_rate": 2.7879234398758486e-06, + "loss": 0.042, + "step": 110845 + }, + { + "epoch": 5.17, + "learning_rate": 2.787139654821062e-06, + "loss": 0.0959, + "step": 110850 + }, + { + "epoch": 5.17, + "learning_rate": 2.7863558697662756e-06, + "loss": 0.1952, + "step": 110855 + }, + { + "epoch": 5.17, + "learning_rate": 2.785572084711489e-06, + "loss": 0.1404, + "step": 110860 + }, + { + "epoch": 5.17, + "learning_rate": 2.784788299656702e-06, + "loss": 0.0014, + "step": 110865 + }, + { + "epoch": 5.17, + "learning_rate": 2.7840045146019156e-06, + "loss": 0.0329, + "step": 110870 + }, + { + "epoch": 5.17, + "learning_rate": 2.783220729547129e-06, + "loss": 0.0459, + "step": 110875 + }, + { + "epoch": 5.17, + "learning_rate": 2.7824369444923426e-06, + "loss": 0.0251, + "step": 110880 + }, + { + "epoch": 5.17, + "learning_rate": 2.781653159437556e-06, + "loss": 0.0509, + "step": 110885 + }, + { + "epoch": 5.17, + "learning_rate": 2.7808693743827695e-06, + "loss": 0.1095, + "step": 110890 + }, + { + "epoch": 5.17, + "learning_rate": 2.7800855893279826e-06, + "loss": 0.0784, + "step": 110895 + }, + { + "epoch": 5.17, + "learning_rate": 2.779301804273196e-06, + "loss": 0.2027, + "step": 110900 + }, + { + "epoch": 5.17, + "learning_rate": 2.7785180192184096e-06, + "loss": 0.1482, + "step": 110905 + }, + { + "epoch": 5.18, + "learning_rate": 2.777734234163623e-06, + "loss": 0.0851, + "step": 110910 + }, + { + "epoch": 5.18, + "learning_rate": 2.7769504491088365e-06, + "loss": 0.0212, + "step": 110915 + }, + { + "epoch": 5.18, + "learning_rate": 2.7761666640540504e-06, + "loss": 0.0183, + "step": 110920 + }, + { + "epoch": 5.18, + "learning_rate": 2.775382878999264e-06, + "loss": 0.0269, + "step": 110925 + }, + { + "epoch": 5.18, + "learning_rate": 2.7745990939444765e-06, + "loss": 0.0704, + "step": 110930 + }, + { + "epoch": 5.18, + "learning_rate": 2.77381530888969e-06, + "loss": 0.061, + "step": 110935 + }, + { + "epoch": 5.18, + "learning_rate": 2.7730315238349035e-06, + "loss": 0.1017, + "step": 110940 + }, + { + "epoch": 5.18, + "learning_rate": 2.7722477387801174e-06, + "loss": 0.1158, + "step": 110945 + }, + { + "epoch": 5.18, + "learning_rate": 2.771463953725331e-06, + "loss": 0.1232, + "step": 110950 + }, + { + "epoch": 5.18, + "learning_rate": 2.7706801686705444e-06, + "loss": 0.3087, + "step": 110955 + }, + { + "epoch": 5.18, + "learning_rate": 2.769896383615757e-06, + "loss": 0.0677, + "step": 110960 + }, + { + "epoch": 5.18, + "learning_rate": 2.7691125985609705e-06, + "loss": 0.0355, + "step": 110965 + }, + { + "epoch": 5.18, + "learning_rate": 2.7683288135061844e-06, + "loss": 0.0226, + "step": 110970 + }, + { + "epoch": 5.18, + "learning_rate": 2.767545028451398e-06, + "loss": 0.0628, + "step": 110975 + }, + { + "epoch": 5.18, + "learning_rate": 2.7667612433966114e-06, + "loss": 0.0186, + "step": 110980 + }, + { + "epoch": 5.18, + "learning_rate": 2.765977458341825e-06, + "loss": 0.0557, + "step": 110985 + }, + { + "epoch": 5.18, + "learning_rate": 2.7651936732870383e-06, + "loss": 0.0903, + "step": 110990 + }, + { + "epoch": 5.18, + "learning_rate": 2.7644098882322514e-06, + "loss": 0.0838, + "step": 110995 + }, + { + "epoch": 5.18, + "learning_rate": 2.763626103177465e-06, + "loss": 0.1017, + "step": 111000 + }, + { + "epoch": 5.18, + "learning_rate": 2.7628423181226783e-06, + "loss": 0.2797, + "step": 111005 + }, + { + "epoch": 5.18, + "learning_rate": 2.762058533067892e-06, + "loss": 0.0916, + "step": 111010 + }, + { + "epoch": 5.18, + "learning_rate": 2.7612747480131053e-06, + "loss": 0.0106, + "step": 111015 + }, + { + "epoch": 5.18, + "learning_rate": 2.760490962958319e-06, + "loss": 0.041, + "step": 111020 + }, + { + "epoch": 5.18, + "learning_rate": 2.759707177903532e-06, + "loss": 0.0526, + "step": 111025 + }, + { + "epoch": 5.18, + "learning_rate": 2.7589233928487453e-06, + "loss": 0.0514, + "step": 111030 + }, + { + "epoch": 5.18, + "learning_rate": 2.758139607793959e-06, + "loss": 0.1262, + "step": 111035 + }, + { + "epoch": 5.18, + "learning_rate": 2.7573558227391723e-06, + "loss": 0.0835, + "step": 111040 + }, + { + "epoch": 5.18, + "learning_rate": 2.7565720376843858e-06, + "loss": 0.1002, + "step": 111045 + }, + { + "epoch": 5.18, + "learning_rate": 2.7557882526295993e-06, + "loss": 0.1225, + "step": 111050 + }, + { + "epoch": 5.18, + "learning_rate": 2.7550044675748127e-06, + "loss": 0.2417, + "step": 111055 + }, + { + "epoch": 5.18, + "learning_rate": 2.754220682520026e-06, + "loss": 0.1295, + "step": 111060 + }, + { + "epoch": 5.18, + "learning_rate": 2.7534368974652393e-06, + "loss": 0.0459, + "step": 111065 + }, + { + "epoch": 5.18, + "learning_rate": 2.7526531124104528e-06, + "loss": 0.0046, + "step": 111070 + }, + { + "epoch": 5.18, + "learning_rate": 2.7518693273556663e-06, + "loss": 0.016, + "step": 111075 + }, + { + "epoch": 5.18, + "learning_rate": 2.7510855423008797e-06, + "loss": 0.0461, + "step": 111080 + }, + { + "epoch": 5.18, + "learning_rate": 2.7503017572460932e-06, + "loss": 0.0475, + "step": 111085 + }, + { + "epoch": 5.18, + "learning_rate": 2.7495179721913063e-06, + "loss": 0.0837, + "step": 111090 + }, + { + "epoch": 5.18, + "learning_rate": 2.7487341871365198e-06, + "loss": 0.0662, + "step": 111095 + }, + { + "epoch": 5.18, + "learning_rate": 2.7479504020817332e-06, + "loss": 0.1958, + "step": 111100 + }, + { + "epoch": 5.18, + "learning_rate": 2.7471666170269467e-06, + "loss": 0.192, + "step": 111105 + }, + { + "epoch": 5.18, + "learning_rate": 2.74638283197216e-06, + "loss": 0.1028, + "step": 111110 + }, + { + "epoch": 5.18, + "learning_rate": 2.7455990469173737e-06, + "loss": 0.0511, + "step": 111115 + }, + { + "epoch": 5.19, + "learning_rate": 2.744815261862587e-06, + "loss": 0.0271, + "step": 111120 + }, + { + "epoch": 5.19, + "learning_rate": 2.7440314768078002e-06, + "loss": 0.004, + "step": 111125 + }, + { + "epoch": 5.19, + "learning_rate": 2.7432476917530137e-06, + "loss": 0.0606, + "step": 111130 + }, + { + "epoch": 5.19, + "learning_rate": 2.742463906698227e-06, + "loss": 0.0727, + "step": 111135 + }, + { + "epoch": 5.19, + "learning_rate": 2.7416801216434407e-06, + "loss": 0.0376, + "step": 111140 + }, + { + "epoch": 5.19, + "learning_rate": 2.740896336588654e-06, + "loss": 0.1519, + "step": 111145 + }, + { + "epoch": 5.19, + "learning_rate": 2.7401125515338676e-06, + "loss": 0.1033, + "step": 111150 + }, + { + "epoch": 5.19, + "learning_rate": 2.7393287664790807e-06, + "loss": 0.2459, + "step": 111155 + }, + { + "epoch": 5.19, + "learning_rate": 2.738544981424294e-06, + "loss": 0.0999, + "step": 111160 + }, + { + "epoch": 5.19, + "learning_rate": 2.7377611963695077e-06, + "loss": 0.0213, + "step": 111165 + }, + { + "epoch": 5.19, + "learning_rate": 2.736977411314721e-06, + "loss": 0.033, + "step": 111170 + }, + { + "epoch": 5.19, + "learning_rate": 2.7361936262599346e-06, + "loss": 0.1261, + "step": 111175 + }, + { + "epoch": 5.19, + "learning_rate": 2.735409841205148e-06, + "loss": 0.0718, + "step": 111180 + }, + { + "epoch": 5.19, + "learning_rate": 2.734626056150362e-06, + "loss": 0.0463, + "step": 111185 + }, + { + "epoch": 5.19, + "learning_rate": 2.7338422710955747e-06, + "loss": 0.1141, + "step": 111190 + }, + { + "epoch": 5.19, + "learning_rate": 2.733058486040788e-06, + "loss": 0.1329, + "step": 111195 + }, + { + "epoch": 5.19, + "learning_rate": 2.7322747009860016e-06, + "loss": 0.1476, + "step": 111200 + }, + { + "epoch": 5.19, + "learning_rate": 2.731490915931215e-06, + "loss": 0.3631, + "step": 111205 + }, + { + "epoch": 5.19, + "learning_rate": 2.730707130876429e-06, + "loss": 0.1358, + "step": 111210 + }, + { + "epoch": 5.19, + "learning_rate": 2.7299233458216425e-06, + "loss": 0.0159, + "step": 111215 + }, + { + "epoch": 5.19, + "learning_rate": 2.729139560766855e-06, + "loss": 0.0519, + "step": 111220 + }, + { + "epoch": 5.19, + "learning_rate": 2.7283557757120686e-06, + "loss": 0.0753, + "step": 111225 + }, + { + "epoch": 5.19, + "learning_rate": 2.727571990657282e-06, + "loss": 0.0479, + "step": 111230 + }, + { + "epoch": 5.19, + "learning_rate": 2.726788205602496e-06, + "loss": 0.0356, + "step": 111235 + }, + { + "epoch": 5.19, + "learning_rate": 2.7260044205477095e-06, + "loss": 0.1165, + "step": 111240 + }, + { + "epoch": 5.19, + "learning_rate": 2.725220635492923e-06, + "loss": 0.1931, + "step": 111245 + }, + { + "epoch": 5.19, + "learning_rate": 2.7244368504381364e-06, + "loss": 0.1391, + "step": 111250 + }, + { + "epoch": 5.19, + "learning_rate": 2.723653065383349e-06, + "loss": 0.2356, + "step": 111255 + }, + { + "epoch": 5.19, + "learning_rate": 2.722869280328563e-06, + "loss": 0.1007, + "step": 111260 + }, + { + "epoch": 5.19, + "learning_rate": 2.7220854952737765e-06, + "loss": 0.0425, + "step": 111265 + }, + { + "epoch": 5.19, + "learning_rate": 2.72130171021899e-06, + "loss": 0.0438, + "step": 111270 + }, + { + "epoch": 5.19, + "learning_rate": 2.7205179251642034e-06, + "loss": 0.0386, + "step": 111275 + }, + { + "epoch": 5.19, + "learning_rate": 2.719734140109417e-06, + "loss": 0.0225, + "step": 111280 + }, + { + "epoch": 5.19, + "learning_rate": 2.71895035505463e-06, + "loss": 0.1099, + "step": 111285 + }, + { + "epoch": 5.19, + "learning_rate": 2.7181665699998434e-06, + "loss": 0.0546, + "step": 111290 + }, + { + "epoch": 5.19, + "learning_rate": 2.717382784945057e-06, + "loss": 0.1142, + "step": 111295 + }, + { + "epoch": 5.19, + "learning_rate": 2.7165989998902704e-06, + "loss": 0.1371, + "step": 111300 + }, + { + "epoch": 5.19, + "learning_rate": 2.715815214835484e-06, + "loss": 0.324, + "step": 111305 + }, + { + "epoch": 5.19, + "learning_rate": 2.7150314297806974e-06, + "loss": 0.1101, + "step": 111310 + }, + { + "epoch": 5.19, + "learning_rate": 2.714247644725911e-06, + "loss": 0.0208, + "step": 111315 + }, + { + "epoch": 5.19, + "learning_rate": 2.713463859671124e-06, + "loss": 0.0509, + "step": 111320 + }, + { + "epoch": 5.19, + "learning_rate": 2.7126800746163374e-06, + "loss": 0.0477, + "step": 111325 + }, + { + "epoch": 5.19, + "learning_rate": 2.711896289561551e-06, + "loss": 0.0569, + "step": 111330 + }, + { + "epoch": 5.2, + "learning_rate": 2.7111125045067644e-06, + "loss": 0.0232, + "step": 111335 + }, + { + "epoch": 5.2, + "learning_rate": 2.710328719451978e-06, + "loss": 0.0674, + "step": 111340 + }, + { + "epoch": 5.2, + "learning_rate": 2.7095449343971913e-06, + "loss": 0.0825, + "step": 111345 + }, + { + "epoch": 5.2, + "learning_rate": 2.7087611493424044e-06, + "loss": 0.2367, + "step": 111350 + }, + { + "epoch": 5.2, + "learning_rate": 2.707977364287618e-06, + "loss": 0.2218, + "step": 111355 + }, + { + "epoch": 5.2, + "learning_rate": 2.7071935792328314e-06, + "loss": 0.0874, + "step": 111360 + }, + { + "epoch": 5.2, + "learning_rate": 2.706409794178045e-06, + "loss": 0.0277, + "step": 111365 + }, + { + "epoch": 5.2, + "learning_rate": 2.7056260091232583e-06, + "loss": 0.0317, + "step": 111370 + }, + { + "epoch": 5.2, + "learning_rate": 2.704842224068472e-06, + "loss": 0.0857, + "step": 111375 + }, + { + "epoch": 5.2, + "learning_rate": 2.7040584390136853e-06, + "loss": 0.0327, + "step": 111380 + }, + { + "epoch": 5.2, + "learning_rate": 2.7032746539588983e-06, + "loss": 0.0667, + "step": 111385 + }, + { + "epoch": 5.2, + "learning_rate": 2.702490868904112e-06, + "loss": 0.1215, + "step": 111390 + }, + { + "epoch": 5.2, + "learning_rate": 2.7017070838493253e-06, + "loss": 0.1587, + "step": 111395 + }, + { + "epoch": 5.2, + "learning_rate": 2.700923298794539e-06, + "loss": 0.127, + "step": 111400 + }, + { + "epoch": 5.2, + "learning_rate": 2.7001395137397523e-06, + "loss": 0.2394, + "step": 111405 + }, + { + "epoch": 5.2, + "learning_rate": 2.6993557286849658e-06, + "loss": 0.1071, + "step": 111410 + }, + { + "epoch": 5.2, + "learning_rate": 2.698571943630179e-06, + "loss": 0.0391, + "step": 111415 + }, + { + "epoch": 5.2, + "learning_rate": 2.6977881585753923e-06, + "loss": 0.0585, + "step": 111420 + }, + { + "epoch": 5.2, + "learning_rate": 2.6970043735206058e-06, + "loss": 0.0087, + "step": 111425 + }, + { + "epoch": 5.2, + "learning_rate": 2.6962205884658193e-06, + "loss": 0.0296, + "step": 111430 + }, + { + "epoch": 5.2, + "learning_rate": 2.6954368034110327e-06, + "loss": 0.0319, + "step": 111435 + }, + { + "epoch": 5.2, + "learning_rate": 2.6946530183562462e-06, + "loss": 0.0824, + "step": 111440 + }, + { + "epoch": 5.2, + "learning_rate": 2.6938692333014597e-06, + "loss": 0.1059, + "step": 111445 + }, + { + "epoch": 5.2, + "learning_rate": 2.6930854482466728e-06, + "loss": 0.1318, + "step": 111450 + }, + { + "epoch": 5.2, + "learning_rate": 2.6923016631918862e-06, + "loss": 0.2099, + "step": 111455 + }, + { + "epoch": 5.2, + "learning_rate": 2.6915178781370997e-06, + "loss": 0.0967, + "step": 111460 + }, + { + "epoch": 5.2, + "learning_rate": 2.690734093082313e-06, + "loss": 0.0286, + "step": 111465 + }, + { + "epoch": 5.2, + "learning_rate": 2.6899503080275267e-06, + "loss": 0.0132, + "step": 111470 + }, + { + "epoch": 5.2, + "learning_rate": 2.6891665229727406e-06, + "loss": 0.015, + "step": 111475 + }, + { + "epoch": 5.2, + "learning_rate": 2.6883827379179532e-06, + "loss": 0.0583, + "step": 111480 + }, + { + "epoch": 5.2, + "learning_rate": 2.6875989528631667e-06, + "loss": 0.0456, + "step": 111485 + }, + { + "epoch": 5.2, + "learning_rate": 2.68681516780838e-06, + "loss": 0.0796, + "step": 111490 + }, + { + "epoch": 5.2, + "learning_rate": 2.6860313827535937e-06, + "loss": 0.1397, + "step": 111495 + }, + { + "epoch": 5.2, + "learning_rate": 2.6852475976988076e-06, + "loss": 0.1901, + "step": 111500 + }, + { + "epoch": 5.2, + "learning_rate": 2.684463812644021e-06, + "loss": 0.2873, + "step": 111505 + }, + { + "epoch": 5.2, + "learning_rate": 2.6836800275892346e-06, + "loss": 0.094, + "step": 111510 + }, + { + "epoch": 5.2, + "learning_rate": 2.682896242534447e-06, + "loss": 0.0125, + "step": 111515 + }, + { + "epoch": 5.2, + "learning_rate": 2.6821124574796607e-06, + "loss": 0.0153, + "step": 111520 + }, + { + "epoch": 5.2, + "learning_rate": 2.6813286724248746e-06, + "loss": 0.0554, + "step": 111525 + }, + { + "epoch": 5.2, + "learning_rate": 2.680544887370088e-06, + "loss": 0.0562, + "step": 111530 + }, + { + "epoch": 5.2, + "learning_rate": 2.6797611023153015e-06, + "loss": 0.0578, + "step": 111535 + }, + { + "epoch": 5.2, + "learning_rate": 2.678977317260515e-06, + "loss": 0.0769, + "step": 111540 + }, + { + "epoch": 5.2, + "learning_rate": 2.6781935322057277e-06, + "loss": 0.0818, + "step": 111545 + }, + { + "epoch": 5.21, + "learning_rate": 2.6774097471509416e-06, + "loss": 0.1068, + "step": 111550 + }, + { + "epoch": 5.21, + "learning_rate": 2.676625962096155e-06, + "loss": 0.3222, + "step": 111555 + }, + { + "epoch": 5.21, + "learning_rate": 2.6758421770413685e-06, + "loss": 0.1048, + "step": 111560 + }, + { + "epoch": 5.21, + "learning_rate": 2.675058391986582e-06, + "loss": 0.0697, + "step": 111565 + }, + { + "epoch": 5.21, + "learning_rate": 2.6742746069317955e-06, + "loss": 0.0452, + "step": 111570 + }, + { + "epoch": 5.21, + "learning_rate": 2.673490821877009e-06, + "loss": 0.0178, + "step": 111575 + }, + { + "epoch": 5.21, + "learning_rate": 2.672707036822222e-06, + "loss": 0.0659, + "step": 111580 + }, + { + "epoch": 5.21, + "learning_rate": 2.6719232517674355e-06, + "loss": 0.086, + "step": 111585 + }, + { + "epoch": 5.21, + "learning_rate": 2.671139466712649e-06, + "loss": 0.0831, + "step": 111590 + }, + { + "epoch": 5.21, + "learning_rate": 2.6703556816578625e-06, + "loss": 0.1485, + "step": 111595 + }, + { + "epoch": 5.21, + "learning_rate": 2.669571896603076e-06, + "loss": 0.131, + "step": 111600 + }, + { + "epoch": 5.21, + "learning_rate": 2.6687881115482894e-06, + "loss": 0.2683, + "step": 111605 + }, + { + "epoch": 5.21, + "learning_rate": 2.6680043264935025e-06, + "loss": 0.0803, + "step": 111610 + }, + { + "epoch": 5.21, + "learning_rate": 2.667220541438716e-06, + "loss": 0.0115, + "step": 111615 + }, + { + "epoch": 5.21, + "learning_rate": 2.6664367563839295e-06, + "loss": 0.046, + "step": 111620 + }, + { + "epoch": 5.21, + "learning_rate": 2.665652971329143e-06, + "loss": 0.0238, + "step": 111625 + }, + { + "epoch": 5.21, + "learning_rate": 2.6648691862743564e-06, + "loss": 0.0679, + "step": 111630 + }, + { + "epoch": 5.21, + "learning_rate": 2.66408540121957e-06, + "loss": 0.0195, + "step": 111635 + }, + { + "epoch": 5.21, + "learning_rate": 2.6633016161647834e-06, + "loss": 0.0404, + "step": 111640 + }, + { + "epoch": 5.21, + "learning_rate": 2.6625178311099965e-06, + "loss": 0.0838, + "step": 111645 + }, + { + "epoch": 5.21, + "learning_rate": 2.66173404605521e-06, + "loss": 0.1376, + "step": 111650 + }, + { + "epoch": 5.21, + "learning_rate": 2.6609502610004234e-06, + "loss": 0.3248, + "step": 111655 + }, + { + "epoch": 5.21, + "learning_rate": 2.660166475945637e-06, + "loss": 0.0769, + "step": 111660 + }, + { + "epoch": 5.21, + "learning_rate": 2.6593826908908504e-06, + "loss": 0.0074, + "step": 111665 + }, + { + "epoch": 5.21, + "learning_rate": 2.658598905836064e-06, + "loss": 0.0267, + "step": 111670 + }, + { + "epoch": 5.21, + "learning_rate": 2.657815120781277e-06, + "loss": 0.0812, + "step": 111675 + }, + { + "epoch": 5.21, + "learning_rate": 2.6570313357264904e-06, + "loss": 0.0331, + "step": 111680 + }, + { + "epoch": 5.21, + "learning_rate": 2.656247550671704e-06, + "loss": 0.0812, + "step": 111685 + }, + { + "epoch": 5.21, + "learning_rate": 2.6554637656169174e-06, + "loss": 0.1129, + "step": 111690 + }, + { + "epoch": 5.21, + "learning_rate": 2.654679980562131e-06, + "loss": 0.1313, + "step": 111695 + }, + { + "epoch": 5.21, + "learning_rate": 2.6538961955073443e-06, + "loss": 0.1197, + "step": 111700 + }, + { + "epoch": 5.21, + "learning_rate": 2.653112410452558e-06, + "loss": 0.3903, + "step": 111705 + }, + { + "epoch": 5.21, + "learning_rate": 2.652328625397771e-06, + "loss": 0.1518, + "step": 111710 + }, + { + "epoch": 5.21, + "learning_rate": 2.6515448403429844e-06, + "loss": 0.0086, + "step": 111715 + }, + { + "epoch": 5.21, + "learning_rate": 2.650761055288198e-06, + "loss": 0.0308, + "step": 111720 + }, + { + "epoch": 5.21, + "learning_rate": 2.6499772702334113e-06, + "loss": 0.0161, + "step": 111725 + }, + { + "epoch": 5.21, + "learning_rate": 2.649193485178625e-06, + "loss": 0.0413, + "step": 111730 + }, + { + "epoch": 5.21, + "learning_rate": 2.6484097001238383e-06, + "loss": 0.0759, + "step": 111735 + }, + { + "epoch": 5.21, + "learning_rate": 2.6476259150690513e-06, + "loss": 0.0635, + "step": 111740 + }, + { + "epoch": 5.21, + "learning_rate": 2.646842130014265e-06, + "loss": 0.0655, + "step": 111745 + }, + { + "epoch": 5.21, + "learning_rate": 2.6460583449594783e-06, + "loss": 0.2303, + "step": 111750 + }, + { + "epoch": 5.21, + "learning_rate": 2.645274559904692e-06, + "loss": 0.1643, + "step": 111755 + }, + { + "epoch": 5.21, + "learning_rate": 2.6444907748499053e-06, + "loss": 0.0899, + "step": 111760 + }, + { + "epoch": 5.22, + "learning_rate": 2.643706989795119e-06, + "loss": 0.0201, + "step": 111765 + }, + { + "epoch": 5.22, + "learning_rate": 2.6429232047403327e-06, + "loss": 0.0349, + "step": 111770 + }, + { + "epoch": 5.22, + "learning_rate": 2.6421394196855453e-06, + "loss": 0.0446, + "step": 111775 + }, + { + "epoch": 5.22, + "learning_rate": 2.6413556346307588e-06, + "loss": 0.0339, + "step": 111780 + }, + { + "epoch": 5.22, + "learning_rate": 2.6405718495759723e-06, + "loss": 0.0642, + "step": 111785 + }, + { + "epoch": 5.22, + "learning_rate": 2.639788064521186e-06, + "loss": 0.0585, + "step": 111790 + }, + { + "epoch": 5.22, + "learning_rate": 2.6390042794663997e-06, + "loss": 0.0451, + "step": 111795 + }, + { + "epoch": 5.22, + "learning_rate": 2.638220494411613e-06, + "loss": 0.1086, + "step": 111800 + }, + { + "epoch": 5.22, + "learning_rate": 2.6374367093568258e-06, + "loss": 0.3316, + "step": 111805 + }, + { + "epoch": 5.22, + "learning_rate": 2.6366529243020393e-06, + "loss": 0.0979, + "step": 111810 + }, + { + "epoch": 5.22, + "learning_rate": 2.635869139247253e-06, + "loss": 0.0238, + "step": 111815 + }, + { + "epoch": 5.22, + "learning_rate": 2.6350853541924666e-06, + "loss": 0.041, + "step": 111820 + }, + { + "epoch": 5.22, + "learning_rate": 2.63430156913768e-06, + "loss": 0.0969, + "step": 111825 + }, + { + "epoch": 5.22, + "learning_rate": 2.6335177840828936e-06, + "loss": 0.0661, + "step": 111830 + }, + { + "epoch": 5.22, + "learning_rate": 2.632733999028107e-06, + "loss": 0.1171, + "step": 111835 + }, + { + "epoch": 5.22, + "learning_rate": 2.63195021397332e-06, + "loss": 0.0743, + "step": 111840 + }, + { + "epoch": 5.22, + "learning_rate": 2.6311664289185336e-06, + "loss": 0.1666, + "step": 111845 + }, + { + "epoch": 5.22, + "learning_rate": 2.630382643863747e-06, + "loss": 0.2137, + "step": 111850 + }, + { + "epoch": 5.22, + "learning_rate": 2.6295988588089606e-06, + "loss": 0.3169, + "step": 111855 + }, + { + "epoch": 5.22, + "learning_rate": 2.628815073754174e-06, + "loss": 0.0905, + "step": 111860 + }, + { + "epoch": 5.22, + "learning_rate": 2.6280312886993876e-06, + "loss": 0.0098, + "step": 111865 + }, + { + "epoch": 5.22, + "learning_rate": 2.6272475036446006e-06, + "loss": 0.0096, + "step": 111870 + }, + { + "epoch": 5.22, + "learning_rate": 2.626463718589814e-06, + "loss": 0.0619, + "step": 111875 + }, + { + "epoch": 5.22, + "learning_rate": 2.6256799335350276e-06, + "loss": 0.0595, + "step": 111880 + }, + { + "epoch": 5.22, + "learning_rate": 2.624896148480241e-06, + "loss": 0.043, + "step": 111885 + }, + { + "epoch": 5.22, + "learning_rate": 2.6241123634254545e-06, + "loss": 0.0627, + "step": 111890 + }, + { + "epoch": 5.22, + "learning_rate": 2.623328578370668e-06, + "loss": 0.0607, + "step": 111895 + }, + { + "epoch": 5.22, + "learning_rate": 2.6225447933158815e-06, + "loss": 0.0592, + "step": 111900 + }, + { + "epoch": 5.22, + "learning_rate": 2.6217610082610946e-06, + "loss": 0.3254, + "step": 111905 + }, + { + "epoch": 5.22, + "learning_rate": 2.620977223206308e-06, + "loss": 0.1125, + "step": 111910 + }, + { + "epoch": 5.22, + "learning_rate": 2.6201934381515215e-06, + "loss": 0.0114, + "step": 111915 + }, + { + "epoch": 5.22, + "learning_rate": 2.619409653096735e-06, + "loss": 0.006, + "step": 111920 + }, + { + "epoch": 5.22, + "learning_rate": 2.6186258680419485e-06, + "loss": 0.0435, + "step": 111925 + }, + { + "epoch": 5.22, + "learning_rate": 2.617842082987162e-06, + "loss": 0.0387, + "step": 111930 + }, + { + "epoch": 5.22, + "learning_rate": 2.617058297932375e-06, + "loss": 0.0557, + "step": 111935 + }, + { + "epoch": 5.22, + "learning_rate": 2.6162745128775885e-06, + "loss": 0.0485, + "step": 111940 + }, + { + "epoch": 5.22, + "learning_rate": 2.615490727822802e-06, + "loss": 0.1323, + "step": 111945 + }, + { + "epoch": 5.22, + "learning_rate": 2.6147069427680155e-06, + "loss": 0.1549, + "step": 111950 + }, + { + "epoch": 5.22, + "learning_rate": 2.613923157713229e-06, + "loss": 0.1458, + "step": 111955 + }, + { + "epoch": 5.22, + "learning_rate": 2.6131393726584424e-06, + "loss": 0.0834, + "step": 111960 + }, + { + "epoch": 5.22, + "learning_rate": 2.612355587603656e-06, + "loss": 0.0286, + "step": 111965 + }, + { + "epoch": 5.22, + "learning_rate": 2.611571802548869e-06, + "loss": 0.0198, + "step": 111970 + }, + { + "epoch": 5.22, + "learning_rate": 2.6107880174940825e-06, + "loss": 0.0252, + "step": 111975 + }, + { + "epoch": 5.23, + "learning_rate": 2.610004232439296e-06, + "loss": 0.0231, + "step": 111980 + }, + { + "epoch": 5.23, + "learning_rate": 2.6092204473845094e-06, + "loss": 0.1476, + "step": 111985 + }, + { + "epoch": 5.23, + "learning_rate": 2.608436662329723e-06, + "loss": 0.0478, + "step": 111990 + }, + { + "epoch": 5.23, + "learning_rate": 2.6076528772749364e-06, + "loss": 0.108, + "step": 111995 + }, + { + "epoch": 5.23, + "learning_rate": 2.6068690922201495e-06, + "loss": 0.1081, + "step": 112000 + }, + { + "epoch": 5.23, + "learning_rate": 2.606085307165363e-06, + "loss": 0.28, + "step": 112005 + }, + { + "epoch": 5.23, + "learning_rate": 2.6053015221105764e-06, + "loss": 0.0771, + "step": 112010 + }, + { + "epoch": 5.23, + "learning_rate": 2.60451773705579e-06, + "loss": 0.0055, + "step": 112015 + }, + { + "epoch": 5.23, + "learning_rate": 2.6037339520010034e-06, + "loss": 0.0252, + "step": 112020 + }, + { + "epoch": 5.23, + "learning_rate": 2.602950166946217e-06, + "loss": 0.0317, + "step": 112025 + }, + { + "epoch": 5.23, + "learning_rate": 2.6021663818914304e-06, + "loss": 0.0313, + "step": 112030 + }, + { + "epoch": 5.23, + "learning_rate": 2.6013825968366434e-06, + "loss": 0.0517, + "step": 112035 + }, + { + "epoch": 5.23, + "learning_rate": 2.600598811781857e-06, + "loss": 0.083, + "step": 112040 + }, + { + "epoch": 5.23, + "learning_rate": 2.5998150267270704e-06, + "loss": 0.0908, + "step": 112045 + }, + { + "epoch": 5.23, + "learning_rate": 2.599031241672284e-06, + "loss": 0.1609, + "step": 112050 + }, + { + "epoch": 5.23, + "learning_rate": 2.5982474566174978e-06, + "loss": 0.2076, + "step": 112055 + }, + { + "epoch": 5.23, + "learning_rate": 2.5974636715627112e-06, + "loss": 0.0759, + "step": 112060 + }, + { + "epoch": 5.23, + "learning_rate": 2.596679886507924e-06, + "loss": 0.0008, + "step": 112065 + }, + { + "epoch": 5.23, + "learning_rate": 2.5958961014531374e-06, + "loss": 0.0306, + "step": 112070 + }, + { + "epoch": 5.23, + "learning_rate": 2.595112316398351e-06, + "loss": 0.0309, + "step": 112075 + }, + { + "epoch": 5.23, + "learning_rate": 2.5943285313435648e-06, + "loss": 0.0405, + "step": 112080 + }, + { + "epoch": 5.23, + "learning_rate": 2.5935447462887782e-06, + "loss": 0.0828, + "step": 112085 + }, + { + "epoch": 5.23, + "learning_rate": 2.5927609612339917e-06, + "loss": 0.0481, + "step": 112090 + }, + { + "epoch": 5.23, + "learning_rate": 2.591977176179205e-06, + "loss": 0.0954, + "step": 112095 + }, + { + "epoch": 5.23, + "learning_rate": 2.591193391124418e-06, + "loss": 0.171, + "step": 112100 + }, + { + "epoch": 5.23, + "learning_rate": 2.5904096060696317e-06, + "loss": 0.358, + "step": 112105 + }, + { + "epoch": 5.23, + "learning_rate": 2.5896258210148452e-06, + "loss": 0.1094, + "step": 112110 + }, + { + "epoch": 5.23, + "learning_rate": 2.5888420359600587e-06, + "loss": 0.0701, + "step": 112115 + }, + { + "epoch": 5.23, + "learning_rate": 2.588058250905272e-06, + "loss": 0.0271, + "step": 112120 + }, + { + "epoch": 5.23, + "learning_rate": 2.5872744658504857e-06, + "loss": 0.0694, + "step": 112125 + }, + { + "epoch": 5.23, + "learning_rate": 2.5864906807956987e-06, + "loss": 0.0899, + "step": 112130 + }, + { + "epoch": 5.23, + "learning_rate": 2.585706895740912e-06, + "loss": 0.054, + "step": 112135 + }, + { + "epoch": 5.23, + "learning_rate": 2.5849231106861257e-06, + "loss": 0.1205, + "step": 112140 + }, + { + "epoch": 5.23, + "learning_rate": 2.584139325631339e-06, + "loss": 0.1443, + "step": 112145 + }, + { + "epoch": 5.23, + "learning_rate": 2.5833555405765527e-06, + "loss": 0.1196, + "step": 112150 + }, + { + "epoch": 5.23, + "learning_rate": 2.582571755521766e-06, + "loss": 0.3816, + "step": 112155 + }, + { + "epoch": 5.23, + "learning_rate": 2.5817879704669796e-06, + "loss": 0.0807, + "step": 112160 + }, + { + "epoch": 5.23, + "learning_rate": 2.5810041854121927e-06, + "loss": 0.0064, + "step": 112165 + }, + { + "epoch": 5.23, + "learning_rate": 2.580220400357406e-06, + "loss": 0.0135, + "step": 112170 + }, + { + "epoch": 5.23, + "learning_rate": 2.5794366153026196e-06, + "loss": 0.03, + "step": 112175 + }, + { + "epoch": 5.23, + "learning_rate": 2.578652830247833e-06, + "loss": 0.0668, + "step": 112180 + }, + { + "epoch": 5.23, + "learning_rate": 2.5778690451930466e-06, + "loss": 0.0728, + "step": 112185 + }, + { + "epoch": 5.23, + "learning_rate": 2.57708526013826e-06, + "loss": 0.0899, + "step": 112190 + }, + { + "epoch": 5.24, + "learning_rate": 2.576301475083473e-06, + "loss": 0.0916, + "step": 112195 + }, + { + "epoch": 5.24, + "learning_rate": 2.5755176900286866e-06, + "loss": 0.0987, + "step": 112200 + }, + { + "epoch": 5.24, + "learning_rate": 2.5747339049739e-06, + "loss": 0.3148, + "step": 112205 + }, + { + "epoch": 5.24, + "learning_rate": 2.5739501199191136e-06, + "loss": 0.1306, + "step": 112210 + }, + { + "epoch": 5.24, + "learning_rate": 2.573166334864327e-06, + "loss": 0.0141, + "step": 112215 + }, + { + "epoch": 5.24, + "learning_rate": 2.5723825498095406e-06, + "loss": 0.0091, + "step": 112220 + }, + { + "epoch": 5.24, + "learning_rate": 2.571598764754754e-06, + "loss": 0.0264, + "step": 112225 + }, + { + "epoch": 5.24, + "learning_rate": 2.570814979699967e-06, + "loss": 0.0335, + "step": 112230 + }, + { + "epoch": 5.24, + "learning_rate": 2.5700311946451806e-06, + "loss": 0.1011, + "step": 112235 + }, + { + "epoch": 5.24, + "learning_rate": 2.569247409590394e-06, + "loss": 0.0354, + "step": 112240 + }, + { + "epoch": 5.24, + "learning_rate": 2.5684636245356075e-06, + "loss": 0.0845, + "step": 112245 + }, + { + "epoch": 5.24, + "learning_rate": 2.567679839480821e-06, + "loss": 0.1885, + "step": 112250 + }, + { + "epoch": 5.24, + "learning_rate": 2.5668960544260345e-06, + "loss": 0.2966, + "step": 112255 + }, + { + "epoch": 5.24, + "learning_rate": 2.5661122693712476e-06, + "loss": 0.0775, + "step": 112260 + }, + { + "epoch": 5.24, + "learning_rate": 2.565328484316461e-06, + "loss": 0.0151, + "step": 112265 + }, + { + "epoch": 5.24, + "learning_rate": 2.5645446992616745e-06, + "loss": 0.0315, + "step": 112270 + }, + { + "epoch": 5.24, + "learning_rate": 2.563760914206888e-06, + "loss": 0.0583, + "step": 112275 + }, + { + "epoch": 5.24, + "learning_rate": 2.5629771291521015e-06, + "loss": 0.0253, + "step": 112280 + }, + { + "epoch": 5.24, + "learning_rate": 2.562193344097315e-06, + "loss": 0.0832, + "step": 112285 + }, + { + "epoch": 5.24, + "learning_rate": 2.5614095590425285e-06, + "loss": 0.0872, + "step": 112290 + }, + { + "epoch": 5.24, + "learning_rate": 2.5606257739877415e-06, + "loss": 0.0892, + "step": 112295 + }, + { + "epoch": 5.24, + "learning_rate": 2.559841988932955e-06, + "loss": 0.1163, + "step": 112300 + }, + { + "epoch": 5.24, + "learning_rate": 2.5590582038781685e-06, + "loss": 0.1923, + "step": 112305 + }, + { + "epoch": 5.24, + "learning_rate": 2.558274418823382e-06, + "loss": 0.0841, + "step": 112310 + }, + { + "epoch": 5.24, + "learning_rate": 2.5574906337685955e-06, + "loss": 0.015, + "step": 112315 + }, + { + "epoch": 5.24, + "learning_rate": 2.556706848713809e-06, + "loss": 0.0141, + "step": 112320 + }, + { + "epoch": 5.24, + "learning_rate": 2.555923063659022e-06, + "loss": 0.0573, + "step": 112325 + }, + { + "epoch": 5.24, + "learning_rate": 2.5551392786042355e-06, + "loss": 0.0698, + "step": 112330 + }, + { + "epoch": 5.24, + "learning_rate": 2.554355493549449e-06, + "loss": 0.0665, + "step": 112335 + }, + { + "epoch": 5.24, + "learning_rate": 2.5535717084946624e-06, + "loss": 0.0405, + "step": 112340 + }, + { + "epoch": 5.24, + "learning_rate": 2.5527879234398763e-06, + "loss": 0.0946, + "step": 112345 + }, + { + "epoch": 5.24, + "learning_rate": 2.55200413838509e-06, + "loss": 0.1517, + "step": 112350 + }, + { + "epoch": 5.24, + "learning_rate": 2.5512203533303033e-06, + "loss": 0.3531, + "step": 112355 + }, + { + "epoch": 5.24, + "learning_rate": 2.550436568275516e-06, + "loss": 0.0766, + "step": 112360 + }, + { + "epoch": 5.24, + "learning_rate": 2.5496527832207294e-06, + "loss": 0.018, + "step": 112365 + }, + { + "epoch": 5.24, + "learning_rate": 2.5488689981659433e-06, + "loss": 0.0209, + "step": 112370 + }, + { + "epoch": 5.24, + "learning_rate": 2.548085213111157e-06, + "loss": 0.0137, + "step": 112375 + }, + { + "epoch": 5.24, + "learning_rate": 2.5473014280563703e-06, + "loss": 0.0455, + "step": 112380 + }, + { + "epoch": 5.24, + "learning_rate": 2.5465176430015838e-06, + "loss": 0.0336, + "step": 112385 + }, + { + "epoch": 5.24, + "learning_rate": 2.5457338579467964e-06, + "loss": 0.0969, + "step": 112390 + }, + { + "epoch": 5.24, + "learning_rate": 2.5449500728920103e-06, + "loss": 0.1565, + "step": 112395 + }, + { + "epoch": 5.24, + "learning_rate": 2.544166287837224e-06, + "loss": 0.1084, + "step": 112400 + }, + { + "epoch": 5.24, + "learning_rate": 2.5433825027824373e-06, + "loss": 0.2, + "step": 112405 + }, + { + "epoch": 5.25, + "learning_rate": 2.5425987177276508e-06, + "loss": 0.1441, + "step": 112410 + }, + { + "epoch": 5.25, + "learning_rate": 2.5418149326728643e-06, + "loss": 0.0244, + "step": 112415 + }, + { + "epoch": 5.25, + "learning_rate": 2.5410311476180777e-06, + "loss": 0.0386, + "step": 112420 + }, + { + "epoch": 5.25, + "learning_rate": 2.540247362563291e-06, + "loss": 0.0368, + "step": 112425 + }, + { + "epoch": 5.25, + "learning_rate": 2.5394635775085043e-06, + "loss": 0.0462, + "step": 112430 + }, + { + "epoch": 5.25, + "learning_rate": 2.5386797924537178e-06, + "loss": 0.0928, + "step": 112435 + }, + { + "epoch": 5.25, + "learning_rate": 2.5378960073989312e-06, + "loss": 0.0417, + "step": 112440 + }, + { + "epoch": 5.25, + "learning_rate": 2.5371122223441447e-06, + "loss": 0.094, + "step": 112445 + }, + { + "epoch": 5.25, + "learning_rate": 2.536328437289358e-06, + "loss": 0.1817, + "step": 112450 + }, + { + "epoch": 5.25, + "learning_rate": 2.5355446522345713e-06, + "loss": 0.4067, + "step": 112455 + }, + { + "epoch": 5.25, + "learning_rate": 2.5347608671797847e-06, + "loss": 0.0658, + "step": 112460 + }, + { + "epoch": 5.25, + "learning_rate": 2.5339770821249982e-06, + "loss": 0.0201, + "step": 112465 + }, + { + "epoch": 5.25, + "learning_rate": 2.5331932970702117e-06, + "loss": 0.026, + "step": 112470 + }, + { + "epoch": 5.25, + "learning_rate": 2.532409512015425e-06, + "loss": 0.0453, + "step": 112475 + }, + { + "epoch": 5.25, + "learning_rate": 2.5316257269606387e-06, + "loss": 0.0705, + "step": 112480 + }, + { + "epoch": 5.25, + "learning_rate": 2.530841941905852e-06, + "loss": 0.0589, + "step": 112485 + }, + { + "epoch": 5.25, + "learning_rate": 2.5300581568510652e-06, + "loss": 0.1003, + "step": 112490 + }, + { + "epoch": 5.25, + "learning_rate": 2.5292743717962787e-06, + "loss": 0.1141, + "step": 112495 + }, + { + "epoch": 5.25, + "learning_rate": 2.528490586741492e-06, + "loss": 0.2016, + "step": 112500 + }, + { + "epoch": 5.25, + "learning_rate": 2.5277068016867057e-06, + "loss": 0.2471, + "step": 112505 + }, + { + "epoch": 5.25, + "learning_rate": 2.526923016631919e-06, + "loss": 0.0656, + "step": 112510 + }, + { + "epoch": 5.25, + "learning_rate": 2.5261392315771326e-06, + "loss": 0.0264, + "step": 112515 + }, + { + "epoch": 5.25, + "learning_rate": 2.5253554465223457e-06, + "loss": 0.0256, + "step": 112520 + }, + { + "epoch": 5.25, + "learning_rate": 2.524571661467559e-06, + "loss": 0.0449, + "step": 112525 + }, + { + "epoch": 5.25, + "learning_rate": 2.5237878764127726e-06, + "loss": 0.0665, + "step": 112530 + }, + { + "epoch": 5.25, + "learning_rate": 2.523004091357986e-06, + "loss": 0.1005, + "step": 112535 + }, + { + "epoch": 5.25, + "learning_rate": 2.5222203063031996e-06, + "loss": 0.0226, + "step": 112540 + }, + { + "epoch": 5.25, + "learning_rate": 2.5215932782593705e-06, + "loss": 0.0917, + "step": 112545 + }, + { + "epoch": 5.25, + "learning_rate": 2.520809493204584e-06, + "loss": 0.1316, + "step": 112550 + }, + { + "epoch": 5.25, + "learning_rate": 2.520025708149797e-06, + "loss": 0.131, + "step": 112555 + }, + { + "epoch": 5.25, + "learning_rate": 2.5192419230950105e-06, + "loss": 0.1093, + "step": 112560 + }, + { + "epoch": 5.25, + "learning_rate": 2.518458138040224e-06, + "loss": 0.0275, + "step": 112565 + }, + { + "epoch": 5.25, + "learning_rate": 2.5176743529854375e-06, + "loss": 0.0192, + "step": 112570 + }, + { + "epoch": 5.25, + "learning_rate": 2.516890567930651e-06, + "loss": 0.0694, + "step": 112575 + }, + { + "epoch": 5.25, + "learning_rate": 2.5161067828758644e-06, + "loss": 0.0353, + "step": 112580 + }, + { + "epoch": 5.25, + "learning_rate": 2.515322997821078e-06, + "loss": 0.045, + "step": 112585 + }, + { + "epoch": 5.25, + "learning_rate": 2.514539212766291e-06, + "loss": 0.0539, + "step": 112590 + }, + { + "epoch": 5.25, + "learning_rate": 2.5137554277115045e-06, + "loss": 0.1378, + "step": 112595 + }, + { + "epoch": 5.25, + "learning_rate": 2.512971642656718e-06, + "loss": 0.1072, + "step": 112600 + }, + { + "epoch": 5.25, + "learning_rate": 2.5121878576019314e-06, + "loss": 0.3234, + "step": 112605 + }, + { + "epoch": 5.25, + "learning_rate": 2.511404072547145e-06, + "loss": 0.1097, + "step": 112610 + }, + { + "epoch": 5.25, + "learning_rate": 2.5106202874923584e-06, + "loss": 0.0166, + "step": 112615 + }, + { + "epoch": 5.26, + "learning_rate": 2.5098365024375714e-06, + "loss": 0.0173, + "step": 112620 + }, + { + "epoch": 5.26, + "learning_rate": 2.509052717382785e-06, + "loss": 0.048, + "step": 112625 + }, + { + "epoch": 5.26, + "learning_rate": 2.5082689323279984e-06, + "loss": 0.0635, + "step": 112630 + }, + { + "epoch": 5.26, + "learning_rate": 2.507485147273212e-06, + "loss": 0.0436, + "step": 112635 + }, + { + "epoch": 5.26, + "learning_rate": 2.5067013622184254e-06, + "loss": 0.1164, + "step": 112640 + }, + { + "epoch": 5.26, + "learning_rate": 2.505917577163639e-06, + "loss": 0.1422, + "step": 112645 + }, + { + "epoch": 5.26, + "learning_rate": 2.5051337921088523e-06, + "loss": 0.1208, + "step": 112650 + }, + { + "epoch": 5.26, + "learning_rate": 2.5043500070540654e-06, + "loss": 0.3539, + "step": 112655 + }, + { + "epoch": 5.26, + "learning_rate": 2.503566221999279e-06, + "loss": 0.0913, + "step": 112660 + }, + { + "epoch": 5.26, + "learning_rate": 2.5027824369444924e-06, + "loss": 0.0849, + "step": 112665 + }, + { + "epoch": 5.26, + "learning_rate": 2.501998651889706e-06, + "loss": 0.044, + "step": 112670 + }, + { + "epoch": 5.26, + "learning_rate": 2.5012148668349198e-06, + "loss": 0.0241, + "step": 112675 + }, + { + "epoch": 5.26, + "learning_rate": 2.5004310817801332e-06, + "loss": 0.0365, + "step": 112680 + }, + { + "epoch": 5.26, + "learning_rate": 2.4996472967253463e-06, + "loss": 0.0746, + "step": 112685 + }, + { + "epoch": 5.26, + "learning_rate": 2.4988635116705598e-06, + "loss": 0.0586, + "step": 112690 + }, + { + "epoch": 5.26, + "learning_rate": 2.498079726615773e-06, + "loss": 0.1057, + "step": 112695 + }, + { + "epoch": 5.26, + "learning_rate": 2.4972959415609867e-06, + "loss": 0.1193, + "step": 112700 + }, + { + "epoch": 5.26, + "learning_rate": 2.4965121565062002e-06, + "loss": 0.1961, + "step": 112705 + }, + { + "epoch": 5.26, + "learning_rate": 2.4957283714514133e-06, + "loss": 0.1052, + "step": 112710 + }, + { + "epoch": 5.26, + "learning_rate": 2.4949445863966268e-06, + "loss": 0.0204, + "step": 112715 + }, + { + "epoch": 5.26, + "learning_rate": 2.4941608013418402e-06, + "loss": 0.0866, + "step": 112720 + }, + { + "epoch": 5.26, + "learning_rate": 2.4933770162870537e-06, + "loss": 0.0377, + "step": 112725 + }, + { + "epoch": 5.26, + "learning_rate": 2.492593231232267e-06, + "loss": 0.0411, + "step": 112730 + }, + { + "epoch": 5.26, + "learning_rate": 2.4918094461774807e-06, + "loss": 0.0852, + "step": 112735 + }, + { + "epoch": 5.26, + "learning_rate": 2.4910256611226938e-06, + "loss": 0.0665, + "step": 112740 + }, + { + "epoch": 5.26, + "learning_rate": 2.4902418760679072e-06, + "loss": 0.0834, + "step": 112745 + }, + { + "epoch": 5.26, + "learning_rate": 2.4894580910131207e-06, + "loss": 0.0985, + "step": 112750 + }, + { + "epoch": 5.26, + "learning_rate": 2.488674305958334e-06, + "loss": 0.1815, + "step": 112755 + }, + { + "epoch": 5.26, + "learning_rate": 2.4878905209035477e-06, + "loss": 0.0701, + "step": 112760 + }, + { + "epoch": 5.26, + "learning_rate": 2.487106735848761e-06, + "loss": 0.007, + "step": 112765 + }, + { + "epoch": 5.26, + "learning_rate": 2.4863229507939746e-06, + "loss": 0.0276, + "step": 112770 + }, + { + "epoch": 5.26, + "learning_rate": 2.4855391657391877e-06, + "loss": 0.0431, + "step": 112775 + }, + { + "epoch": 5.26, + "learning_rate": 2.484755380684401e-06, + "loss": 0.0454, + "step": 112780 + }, + { + "epoch": 5.26, + "learning_rate": 2.4839715956296147e-06, + "loss": 0.1059, + "step": 112785 + }, + { + "epoch": 5.26, + "learning_rate": 2.483187810574828e-06, + "loss": 0.0786, + "step": 112790 + }, + { + "epoch": 5.26, + "learning_rate": 2.4824040255200416e-06, + "loss": 0.0746, + "step": 112795 + }, + { + "epoch": 5.26, + "learning_rate": 2.481620240465255e-06, + "loss": 0.1368, + "step": 112800 + }, + { + "epoch": 5.26, + "learning_rate": 2.480836455410468e-06, + "loss": 0.1438, + "step": 112805 + }, + { + "epoch": 5.26, + "learning_rate": 2.4800526703556817e-06, + "loss": 0.0979, + "step": 112810 + }, + { + "epoch": 5.26, + "learning_rate": 2.479268885300895e-06, + "loss": 0.0378, + "step": 112815 + }, + { + "epoch": 5.26, + "learning_rate": 2.478485100246109e-06, + "loss": 0.0095, + "step": 112820 + }, + { + "epoch": 5.26, + "learning_rate": 2.477701315191322e-06, + "loss": 0.0227, + "step": 112825 + }, + { + "epoch": 5.26, + "learning_rate": 2.4769175301365356e-06, + "loss": 0.0596, + "step": 112830 + }, + { + "epoch": 5.27, + "learning_rate": 2.476133745081749e-06, + "loss": 0.0528, + "step": 112835 + }, + { + "epoch": 5.27, + "learning_rate": 2.475349960026962e-06, + "loss": 0.0915, + "step": 112840 + }, + { + "epoch": 5.27, + "learning_rate": 2.474566174972176e-06, + "loss": 0.1002, + "step": 112845 + }, + { + "epoch": 5.27, + "learning_rate": 2.4737823899173895e-06, + "loss": 0.139, + "step": 112850 + }, + { + "epoch": 5.27, + "learning_rate": 2.4729986048626026e-06, + "loss": 0.2503, + "step": 112855 + }, + { + "epoch": 5.27, + "learning_rate": 2.472214819807816e-06, + "loss": 0.0893, + "step": 112860 + }, + { + "epoch": 5.27, + "learning_rate": 2.4714310347530295e-06, + "loss": 0.0103, + "step": 112865 + }, + { + "epoch": 5.27, + "learning_rate": 2.470647249698243e-06, + "loss": 0.0341, + "step": 112870 + }, + { + "epoch": 5.27, + "learning_rate": 2.4698634646434565e-06, + "loss": 0.0358, + "step": 112875 + }, + { + "epoch": 5.27, + "learning_rate": 2.46907967958867e-06, + "loss": 0.0422, + "step": 112880 + }, + { + "epoch": 5.27, + "learning_rate": 2.4682958945338835e-06, + "loss": 0.0265, + "step": 112885 + }, + { + "epoch": 5.27, + "learning_rate": 2.4675121094790965e-06, + "loss": 0.0708, + "step": 112890 + }, + { + "epoch": 5.27, + "learning_rate": 2.46672832442431e-06, + "loss": 0.0952, + "step": 112895 + }, + { + "epoch": 5.27, + "learning_rate": 2.4659445393695235e-06, + "loss": 0.0831, + "step": 112900 + }, + { + "epoch": 5.27, + "learning_rate": 2.465160754314737e-06, + "loss": 0.2088, + "step": 112905 + }, + { + "epoch": 5.27, + "learning_rate": 2.4643769692599505e-06, + "loss": 0.0972, + "step": 112910 + }, + { + "epoch": 5.27, + "learning_rate": 2.463593184205164e-06, + "loss": 0.023, + "step": 112915 + }, + { + "epoch": 5.27, + "learning_rate": 2.462809399150377e-06, + "loss": 0.0408, + "step": 112920 + }, + { + "epoch": 5.27, + "learning_rate": 2.4620256140955905e-06, + "loss": 0.0212, + "step": 112925 + }, + { + "epoch": 5.27, + "learning_rate": 2.461241829040804e-06, + "loss": 0.0824, + "step": 112930 + }, + { + "epoch": 5.27, + "learning_rate": 2.4604580439860174e-06, + "loss": 0.0626, + "step": 112935 + }, + { + "epoch": 5.27, + "learning_rate": 2.459674258931231e-06, + "loss": 0.0567, + "step": 112940 + }, + { + "epoch": 5.27, + "learning_rate": 2.4588904738764444e-06, + "loss": 0.0994, + "step": 112945 + }, + { + "epoch": 5.27, + "learning_rate": 2.458106688821658e-06, + "loss": 0.1075, + "step": 112950 + }, + { + "epoch": 5.27, + "learning_rate": 2.457322903766871e-06, + "loss": 0.2899, + "step": 112955 + }, + { + "epoch": 5.27, + "learning_rate": 2.4565391187120844e-06, + "loss": 0.0978, + "step": 112960 + }, + { + "epoch": 5.27, + "learning_rate": 2.4557553336572983e-06, + "loss": 0.0173, + "step": 112965 + }, + { + "epoch": 5.27, + "learning_rate": 2.4549715486025114e-06, + "loss": 0.0307, + "step": 112970 + }, + { + "epoch": 5.27, + "learning_rate": 2.454187763547725e-06, + "loss": 0.0342, + "step": 112975 + }, + { + "epoch": 5.27, + "learning_rate": 2.4534039784929384e-06, + "loss": 0.048, + "step": 112980 + }, + { + "epoch": 5.27, + "learning_rate": 2.4526201934381514e-06, + "loss": 0.0667, + "step": 112985 + }, + { + "epoch": 5.27, + "learning_rate": 2.4518364083833653e-06, + "loss": 0.0892, + "step": 112990 + }, + { + "epoch": 5.27, + "learning_rate": 2.451052623328579e-06, + "loss": 0.0977, + "step": 112995 + }, + { + "epoch": 5.27, + "learning_rate": 2.450268838273792e-06, + "loss": 0.1665, + "step": 113000 + }, + { + "epoch": 5.27, + "learning_rate": 2.4494850532190053e-06, + "loss": 0.2567, + "step": 113005 + }, + { + "epoch": 5.27, + "learning_rate": 2.448701268164219e-06, + "loss": 0.1079, + "step": 113010 + }, + { + "epoch": 5.27, + "learning_rate": 2.4479174831094323e-06, + "loss": 0.0151, + "step": 113015 + }, + { + "epoch": 5.27, + "learning_rate": 2.447133698054646e-06, + "loss": 0.0609, + "step": 113020 + }, + { + "epoch": 5.27, + "learning_rate": 2.4463499129998593e-06, + "loss": 0.091, + "step": 113025 + }, + { + "epoch": 5.27, + "learning_rate": 2.4455661279450728e-06, + "loss": 0.0559, + "step": 113030 + }, + { + "epoch": 5.27, + "learning_rate": 2.444782342890286e-06, + "loss": 0.0264, + "step": 113035 + }, + { + "epoch": 5.27, + "learning_rate": 2.4439985578354993e-06, + "loss": 0.0718, + "step": 113040 + }, + { + "epoch": 5.27, + "learning_rate": 2.4432147727807128e-06, + "loss": 0.0673, + "step": 113045 + }, + { + "epoch": 5.28, + "learning_rate": 2.4424309877259263e-06, + "loss": 0.0738, + "step": 113050 + }, + { + "epoch": 5.28, + "learning_rate": 2.4416472026711397e-06, + "loss": 0.0716, + "step": 113055 + }, + { + "epoch": 5.28, + "learning_rate": 2.4408634176163532e-06, + "loss": 0.0933, + "step": 113060 + }, + { + "epoch": 5.28, + "learning_rate": 2.4400796325615663e-06, + "loss": 0.0033, + "step": 113065 + }, + { + "epoch": 5.28, + "learning_rate": 2.4392958475067798e-06, + "loss": 0.0447, + "step": 113070 + }, + { + "epoch": 5.28, + "learning_rate": 2.4385120624519932e-06, + "loss": 0.0356, + "step": 113075 + }, + { + "epoch": 5.28, + "learning_rate": 2.4377282773972067e-06, + "loss": 0.0301, + "step": 113080 + }, + { + "epoch": 5.28, + "learning_rate": 2.4369444923424202e-06, + "loss": 0.0846, + "step": 113085 + }, + { + "epoch": 5.28, + "learning_rate": 2.4361607072876337e-06, + "loss": 0.1028, + "step": 113090 + }, + { + "epoch": 5.28, + "learning_rate": 2.435376922232847e-06, + "loss": 0.072, + "step": 113095 + }, + { + "epoch": 5.28, + "learning_rate": 2.4345931371780602e-06, + "loss": 0.1479, + "step": 113100 + }, + { + "epoch": 5.28, + "learning_rate": 2.4338093521232737e-06, + "loss": 0.4138, + "step": 113105 + }, + { + "epoch": 5.28, + "learning_rate": 2.4330255670684876e-06, + "loss": 0.1084, + "step": 113110 + }, + { + "epoch": 5.28, + "learning_rate": 2.4322417820137007e-06, + "loss": 0.0189, + "step": 113115 + }, + { + "epoch": 5.28, + "learning_rate": 2.431457996958914e-06, + "loss": 0.033, + "step": 113120 + }, + { + "epoch": 5.28, + "learning_rate": 2.4306742119041276e-06, + "loss": 0.0629, + "step": 113125 + }, + { + "epoch": 5.28, + "learning_rate": 2.4298904268493407e-06, + "loss": 0.1082, + "step": 113130 + }, + { + "epoch": 5.28, + "learning_rate": 2.4291066417945546e-06, + "loss": 0.0841, + "step": 113135 + }, + { + "epoch": 5.28, + "learning_rate": 2.428322856739768e-06, + "loss": 0.1255, + "step": 113140 + }, + { + "epoch": 5.28, + "learning_rate": 2.4275390716849816e-06, + "loss": 0.0508, + "step": 113145 + }, + { + "epoch": 5.28, + "learning_rate": 2.4267552866301946e-06, + "loss": 0.0736, + "step": 113150 + }, + { + "epoch": 5.28, + "learning_rate": 2.425971501575408e-06, + "loss": 0.2079, + "step": 113155 + }, + { + "epoch": 5.28, + "learning_rate": 2.4251877165206216e-06, + "loss": 0.0867, + "step": 113160 + }, + { + "epoch": 5.28, + "learning_rate": 2.424403931465835e-06, + "loss": 0.0257, + "step": 113165 + }, + { + "epoch": 5.28, + "learning_rate": 2.4236201464110486e-06, + "loss": 0.0089, + "step": 113170 + }, + { + "epoch": 5.28, + "learning_rate": 2.422836361356262e-06, + "loss": 0.0359, + "step": 113175 + }, + { + "epoch": 5.28, + "learning_rate": 2.422052576301475e-06, + "loss": 0.0617, + "step": 113180 + }, + { + "epoch": 5.28, + "learning_rate": 2.4212687912466886e-06, + "loss": 0.0709, + "step": 113185 + }, + { + "epoch": 5.28, + "learning_rate": 2.420485006191902e-06, + "loss": 0.0541, + "step": 113190 + }, + { + "epoch": 5.28, + "learning_rate": 2.4197012211371156e-06, + "loss": 0.1182, + "step": 113195 + }, + { + "epoch": 5.28, + "learning_rate": 2.418917436082329e-06, + "loss": 0.1367, + "step": 113200 + }, + { + "epoch": 5.28, + "learning_rate": 2.4181336510275425e-06, + "loss": 0.2053, + "step": 113205 + }, + { + "epoch": 5.28, + "learning_rate": 2.417349865972756e-06, + "loss": 0.082, + "step": 113210 + }, + { + "epoch": 5.28, + "learning_rate": 2.416566080917969e-06, + "loss": 0.0134, + "step": 113215 + }, + { + "epoch": 5.28, + "learning_rate": 2.4157822958631825e-06, + "loss": 0.0311, + "step": 113220 + }, + { + "epoch": 5.28, + "learning_rate": 2.414998510808396e-06, + "loss": 0.0239, + "step": 113225 + }, + { + "epoch": 5.28, + "learning_rate": 2.4142147257536095e-06, + "loss": 0.0335, + "step": 113230 + }, + { + "epoch": 5.28, + "learning_rate": 2.413430940698823e-06, + "loss": 0.0714, + "step": 113235 + }, + { + "epoch": 5.28, + "learning_rate": 2.4126471556440365e-06, + "loss": 0.0554, + "step": 113240 + }, + { + "epoch": 5.28, + "learning_rate": 2.4118633705892495e-06, + "loss": 0.1198, + "step": 113245 + }, + { + "epoch": 5.28, + "learning_rate": 2.411079585534463e-06, + "loss": 0.2333, + "step": 113250 + }, + { + "epoch": 5.28, + "learning_rate": 2.410295800479677e-06, + "loss": 0.23, + "step": 113255 + }, + { + "epoch": 5.28, + "learning_rate": 2.40951201542489e-06, + "loss": 0.0826, + "step": 113260 + }, + { + "epoch": 5.29, + "learning_rate": 2.4087282303701035e-06, + "loss": 0.0385, + "step": 113265 + }, + { + "epoch": 5.29, + "learning_rate": 2.407944445315317e-06, + "loss": 0.044, + "step": 113270 + }, + { + "epoch": 5.29, + "learning_rate": 2.4071606602605304e-06, + "loss": 0.0543, + "step": 113275 + }, + { + "epoch": 5.29, + "learning_rate": 2.406376875205744e-06, + "loss": 0.0598, + "step": 113280 + }, + { + "epoch": 5.29, + "learning_rate": 2.4055930901509574e-06, + "loss": 0.0529, + "step": 113285 + }, + { + "epoch": 5.29, + "learning_rate": 2.404809305096171e-06, + "loss": 0.1198, + "step": 113290 + }, + { + "epoch": 5.29, + "learning_rate": 2.404025520041384e-06, + "loss": 0.1394, + "step": 113295 + }, + { + "epoch": 5.29, + "learning_rate": 2.4032417349865974e-06, + "loss": 0.1923, + "step": 113300 + }, + { + "epoch": 5.29, + "learning_rate": 2.402457949931811e-06, + "loss": 0.2237, + "step": 113305 + }, + { + "epoch": 5.29, + "learning_rate": 2.4016741648770244e-06, + "loss": 0.1613, + "step": 113310 + }, + { + "epoch": 5.29, + "learning_rate": 2.400890379822238e-06, + "loss": 0.0315, + "step": 113315 + }, + { + "epoch": 5.29, + "learning_rate": 2.4001065947674513e-06, + "loss": 0.0256, + "step": 113320 + }, + { + "epoch": 5.29, + "learning_rate": 2.3993228097126644e-06, + "loss": 0.037, + "step": 113325 + }, + { + "epoch": 5.29, + "learning_rate": 2.398539024657878e-06, + "loss": 0.0664, + "step": 113330 + }, + { + "epoch": 5.29, + "learning_rate": 2.3977552396030914e-06, + "loss": 0.033, + "step": 113335 + }, + { + "epoch": 5.29, + "learning_rate": 2.396971454548305e-06, + "loss": 0.0454, + "step": 113340 + }, + { + "epoch": 5.29, + "learning_rate": 2.3961876694935183e-06, + "loss": 0.1043, + "step": 113345 + }, + { + "epoch": 5.29, + "learning_rate": 2.395403884438732e-06, + "loss": 0.1337, + "step": 113350 + }, + { + "epoch": 5.29, + "learning_rate": 2.3946200993839453e-06, + "loss": 0.1989, + "step": 113355 + }, + { + "epoch": 5.29, + "learning_rate": 2.3938363143291583e-06, + "loss": 0.0843, + "step": 113360 + }, + { + "epoch": 5.29, + "learning_rate": 2.393052529274372e-06, + "loss": 0.0348, + "step": 113365 + }, + { + "epoch": 5.29, + "learning_rate": 2.3922687442195853e-06, + "loss": 0.0327, + "step": 113370 + }, + { + "epoch": 5.29, + "learning_rate": 2.391484959164799e-06, + "loss": 0.0634, + "step": 113375 + }, + { + "epoch": 5.29, + "learning_rate": 2.3907011741100123e-06, + "loss": 0.053, + "step": 113380 + }, + { + "epoch": 5.29, + "learning_rate": 2.3899173890552258e-06, + "loss": 0.062, + "step": 113385 + }, + { + "epoch": 5.29, + "learning_rate": 2.389133604000439e-06, + "loss": 0.04, + "step": 113390 + }, + { + "epoch": 5.29, + "learning_rate": 2.3883498189456523e-06, + "loss": 0.1191, + "step": 113395 + }, + { + "epoch": 5.29, + "learning_rate": 2.387566033890866e-06, + "loss": 0.0481, + "step": 113400 + }, + { + "epoch": 5.29, + "learning_rate": 2.3867822488360797e-06, + "loss": 0.2566, + "step": 113405 + }, + { + "epoch": 5.29, + "learning_rate": 2.3859984637812927e-06, + "loss": 0.1366, + "step": 113410 + }, + { + "epoch": 5.29, + "learning_rate": 2.3852146787265062e-06, + "loss": 0.0411, + "step": 113415 + }, + { + "epoch": 5.29, + "learning_rate": 2.3844308936717197e-06, + "loss": 0.0358, + "step": 113420 + }, + { + "epoch": 5.29, + "learning_rate": 2.383647108616933e-06, + "loss": 0.0213, + "step": 113425 + }, + { + "epoch": 5.29, + "learning_rate": 2.3828633235621467e-06, + "loss": 0.0303, + "step": 113430 + }, + { + "epoch": 5.29, + "learning_rate": 2.38207953850736e-06, + "loss": 0.0556, + "step": 113435 + }, + { + "epoch": 5.29, + "learning_rate": 2.3812957534525732e-06, + "loss": 0.0827, + "step": 113440 + }, + { + "epoch": 5.29, + "learning_rate": 2.3805119683977867e-06, + "loss": 0.1041, + "step": 113445 + }, + { + "epoch": 5.29, + "learning_rate": 2.379728183343e-06, + "loss": 0.1295, + "step": 113450 + }, + { + "epoch": 5.29, + "learning_rate": 2.3789443982882137e-06, + "loss": 0.2279, + "step": 113455 + }, + { + "epoch": 5.29, + "learning_rate": 2.378160613233427e-06, + "loss": 0.1159, + "step": 113460 + }, + { + "epoch": 5.29, + "learning_rate": 2.3773768281786406e-06, + "loss": 0.015, + "step": 113465 + }, + { + "epoch": 5.29, + "learning_rate": 2.376593043123854e-06, + "loss": 0.0227, + "step": 113470 + }, + { + "epoch": 5.29, + "learning_rate": 2.375809258069067e-06, + "loss": 0.03, + "step": 113475 + }, + { + "epoch": 5.3, + "learning_rate": 2.3750254730142807e-06, + "loss": 0.2252, + "step": 113480 + }, + { + "epoch": 5.3, + "learning_rate": 2.374241687959494e-06, + "loss": 0.0647, + "step": 113485 + }, + { + "epoch": 5.3, + "learning_rate": 2.3734579029047076e-06, + "loss": 0.0869, + "step": 113490 + }, + { + "epoch": 5.3, + "learning_rate": 2.372674117849921e-06, + "loss": 0.0719, + "step": 113495 + }, + { + "epoch": 5.3, + "learning_rate": 2.3718903327951346e-06, + "loss": 0.1789, + "step": 113500 + }, + { + "epoch": 5.3, + "learning_rate": 2.3711065477403476e-06, + "loss": 0.2984, + "step": 113505 + }, + { + "epoch": 5.3, + "learning_rate": 2.370322762685561e-06, + "loss": 0.1039, + "step": 113510 + }, + { + "epoch": 5.3, + "learning_rate": 2.3695389776307746e-06, + "loss": 0.0223, + "step": 113515 + }, + { + "epoch": 5.3, + "learning_rate": 2.368755192575988e-06, + "loss": 0.002, + "step": 113520 + }, + { + "epoch": 5.3, + "learning_rate": 2.3679714075212016e-06, + "loss": 0.015, + "step": 113525 + }, + { + "epoch": 5.3, + "learning_rate": 2.367187622466415e-06, + "loss": 0.0337, + "step": 113530 + }, + { + "epoch": 5.3, + "learning_rate": 2.3664038374116285e-06, + "loss": 0.0779, + "step": 113535 + }, + { + "epoch": 5.3, + "learning_rate": 2.3656200523568416e-06, + "loss": 0.1013, + "step": 113540 + }, + { + "epoch": 5.3, + "learning_rate": 2.3648362673020555e-06, + "loss": 0.1683, + "step": 113545 + }, + { + "epoch": 5.3, + "learning_rate": 2.364052482247269e-06, + "loss": 0.194, + "step": 113550 + }, + { + "epoch": 5.3, + "learning_rate": 2.363268697192482e-06, + "loss": 0.2535, + "step": 113555 + }, + { + "epoch": 5.3, + "learning_rate": 2.3624849121376955e-06, + "loss": 0.1042, + "step": 113560 + }, + { + "epoch": 5.3, + "learning_rate": 2.361701127082909e-06, + "loss": 0.0622, + "step": 113565 + }, + { + "epoch": 5.3, + "learning_rate": 2.3609173420281225e-06, + "loss": 0.0146, + "step": 113570 + }, + { + "epoch": 5.3, + "learning_rate": 2.360133556973336e-06, + "loss": 0.0617, + "step": 113575 + }, + { + "epoch": 5.3, + "learning_rate": 2.3593497719185495e-06, + "loss": 0.0191, + "step": 113580 + }, + { + "epoch": 5.3, + "learning_rate": 2.358565986863763e-06, + "loss": 0.0487, + "step": 113585 + }, + { + "epoch": 5.3, + "learning_rate": 2.357782201808976e-06, + "loss": 0.095, + "step": 113590 + }, + { + "epoch": 5.3, + "learning_rate": 2.3569984167541895e-06, + "loss": 0.1199, + "step": 113595 + }, + { + "epoch": 5.3, + "learning_rate": 2.356214631699403e-06, + "loss": 0.0844, + "step": 113600 + }, + { + "epoch": 5.3, + "learning_rate": 2.3554308466446164e-06, + "loss": 0.1909, + "step": 113605 + }, + { + "epoch": 5.3, + "learning_rate": 2.35464706158983e-06, + "loss": 0.1466, + "step": 113610 + }, + { + "epoch": 5.3, + "learning_rate": 2.3538632765350434e-06, + "loss": 0.0373, + "step": 113615 + }, + { + "epoch": 5.3, + "learning_rate": 2.3530794914802565e-06, + "loss": 0.0447, + "step": 113620 + }, + { + "epoch": 5.3, + "learning_rate": 2.35229570642547e-06, + "loss": 0.0589, + "step": 113625 + }, + { + "epoch": 5.3, + "learning_rate": 2.3515119213706834e-06, + "loss": 0.0794, + "step": 113630 + }, + { + "epoch": 5.3, + "learning_rate": 2.350728136315897e-06, + "loss": 0.0388, + "step": 113635 + }, + { + "epoch": 5.3, + "learning_rate": 2.3499443512611104e-06, + "loss": 0.0728, + "step": 113640 + }, + { + "epoch": 5.3, + "learning_rate": 2.349160566206324e-06, + "loss": 0.1119, + "step": 113645 + }, + { + "epoch": 5.3, + "learning_rate": 2.3483767811515374e-06, + "loss": 0.1342, + "step": 113650 + }, + { + "epoch": 5.3, + "learning_rate": 2.3475929960967504e-06, + "loss": 0.2651, + "step": 113655 + }, + { + "epoch": 5.3, + "learning_rate": 2.346809211041964e-06, + "loss": 0.0827, + "step": 113660 + }, + { + "epoch": 5.3, + "learning_rate": 2.3460254259871774e-06, + "loss": 0.0179, + "step": 113665 + }, + { + "epoch": 5.3, + "learning_rate": 2.345241640932391e-06, + "loss": 0.0506, + "step": 113670 + }, + { + "epoch": 5.3, + "learning_rate": 2.3444578558776043e-06, + "loss": 0.0673, + "step": 113675 + }, + { + "epoch": 5.3, + "learning_rate": 2.343674070822818e-06, + "loss": 0.0445, + "step": 113680 + }, + { + "epoch": 5.3, + "learning_rate": 2.342890285768031e-06, + "loss": 0.0816, + "step": 113685 + }, + { + "epoch": 5.3, + "learning_rate": 2.342106500713245e-06, + "loss": 0.0454, + "step": 113690 + }, + { + "epoch": 5.31, + "learning_rate": 2.3413227156584583e-06, + "loss": 0.0801, + "step": 113695 + }, + { + "epoch": 5.31, + "learning_rate": 2.3405389306036713e-06, + "loss": 0.1662, + "step": 113700 + }, + { + "epoch": 5.31, + "learning_rate": 2.339755145548885e-06, + "loss": 0.1692, + "step": 113705 + }, + { + "epoch": 5.31, + "learning_rate": 2.3389713604940983e-06, + "loss": 0.1204, + "step": 113710 + }, + { + "epoch": 5.31, + "learning_rate": 2.3381875754393118e-06, + "loss": 0.0048, + "step": 113715 + }, + { + "epoch": 5.31, + "learning_rate": 2.3374037903845253e-06, + "loss": 0.0215, + "step": 113720 + }, + { + "epoch": 5.31, + "learning_rate": 2.3366200053297387e-06, + "loss": 0.0991, + "step": 113725 + }, + { + "epoch": 5.31, + "learning_rate": 2.3358362202749522e-06, + "loss": 0.0659, + "step": 113730 + }, + { + "epoch": 5.31, + "learning_rate": 2.3350524352201653e-06, + "loss": 0.1019, + "step": 113735 + }, + { + "epoch": 5.31, + "learning_rate": 2.3342686501653788e-06, + "loss": 0.0758, + "step": 113740 + }, + { + "epoch": 5.31, + "learning_rate": 2.3334848651105922e-06, + "loss": 0.1558, + "step": 113745 + }, + { + "epoch": 5.31, + "learning_rate": 2.3327010800558057e-06, + "loss": 0.1101, + "step": 113750 + }, + { + "epoch": 5.31, + "learning_rate": 2.3319172950010192e-06, + "loss": 0.1442, + "step": 113755 + }, + { + "epoch": 5.31, + "learning_rate": 2.3311335099462327e-06, + "loss": 0.0883, + "step": 113760 + }, + { + "epoch": 5.31, + "learning_rate": 2.3303497248914458e-06, + "loss": 0.0048, + "step": 113765 + }, + { + "epoch": 5.31, + "learning_rate": 2.3295659398366592e-06, + "loss": 0.0235, + "step": 113770 + }, + { + "epoch": 5.31, + "learning_rate": 2.3287821547818727e-06, + "loss": 0.0166, + "step": 113775 + }, + { + "epoch": 5.31, + "learning_rate": 2.327998369727086e-06, + "loss": 0.0351, + "step": 113780 + }, + { + "epoch": 5.31, + "learning_rate": 2.3272145846722997e-06, + "loss": 0.0433, + "step": 113785 + }, + { + "epoch": 5.31, + "learning_rate": 2.326430799617513e-06, + "loss": 0.1951, + "step": 113790 + }, + { + "epoch": 5.31, + "learning_rate": 2.3256470145627266e-06, + "loss": 0.1054, + "step": 113795 + }, + { + "epoch": 5.31, + "learning_rate": 2.3248632295079397e-06, + "loss": 0.1329, + "step": 113800 + }, + { + "epoch": 5.31, + "learning_rate": 2.324079444453153e-06, + "loss": 0.2805, + "step": 113805 + }, + { + "epoch": 5.31, + "learning_rate": 2.3232956593983667e-06, + "loss": 0.0937, + "step": 113810 + }, + { + "epoch": 5.31, + "learning_rate": 2.32251187434358e-06, + "loss": 0.0124, + "step": 113815 + }, + { + "epoch": 5.31, + "learning_rate": 2.3217280892887936e-06, + "loss": 0.0824, + "step": 113820 + }, + { + "epoch": 5.31, + "learning_rate": 2.320944304234007e-06, + "loss": 0.0302, + "step": 113825 + }, + { + "epoch": 5.31, + "learning_rate": 2.32016051917922e-06, + "loss": 0.0166, + "step": 113830 + }, + { + "epoch": 5.31, + "learning_rate": 2.319376734124434e-06, + "loss": 0.072, + "step": 113835 + }, + { + "epoch": 5.31, + "learning_rate": 2.3185929490696476e-06, + "loss": 0.0861, + "step": 113840 + }, + { + "epoch": 5.31, + "learning_rate": 2.317809164014861e-06, + "loss": 0.0804, + "step": 113845 + }, + { + "epoch": 5.31, + "learning_rate": 2.317025378960074e-06, + "loss": 0.0584, + "step": 113850 + }, + { + "epoch": 5.31, + "learning_rate": 2.3162415939052876e-06, + "loss": 0.4127, + "step": 113855 + }, + { + "epoch": 5.31, + "learning_rate": 2.315457808850501e-06, + "loss": 0.1546, + "step": 113860 + }, + { + "epoch": 5.31, + "learning_rate": 2.3146740237957146e-06, + "loss": 0.0175, + "step": 113865 + }, + { + "epoch": 5.31, + "learning_rate": 2.313890238740928e-06, + "loss": 0.0304, + "step": 113870 + }, + { + "epoch": 5.31, + "learning_rate": 2.3131064536861415e-06, + "loss": 0.0643, + "step": 113875 + }, + { + "epoch": 5.31, + "learning_rate": 2.3123226686313546e-06, + "loss": 0.0303, + "step": 113880 + }, + { + "epoch": 5.31, + "learning_rate": 2.311538883576568e-06, + "loss": 0.0213, + "step": 113885 + }, + { + "epoch": 5.31, + "learning_rate": 2.3107550985217815e-06, + "loss": 0.0421, + "step": 113890 + }, + { + "epoch": 5.31, + "learning_rate": 2.309971313466995e-06, + "loss": 0.0726, + "step": 113895 + }, + { + "epoch": 5.31, + "learning_rate": 2.3091875284122085e-06, + "loss": 0.1626, + "step": 113900 + }, + { + "epoch": 5.31, + "learning_rate": 2.308403743357422e-06, + "loss": 0.2512, + "step": 113905 + }, + { + "epoch": 5.32, + "learning_rate": 2.3076199583026355e-06, + "loss": 0.0851, + "step": 113910 + }, + { + "epoch": 5.32, + "learning_rate": 2.3068361732478485e-06, + "loss": 0.0084, + "step": 113915 + }, + { + "epoch": 5.32, + "learning_rate": 2.306052388193062e-06, + "loss": 0.044, + "step": 113920 + }, + { + "epoch": 5.32, + "learning_rate": 2.3052686031382755e-06, + "loss": 0.1108, + "step": 113925 + }, + { + "epoch": 5.32, + "learning_rate": 2.304484818083489e-06, + "loss": 0.0596, + "step": 113930 + }, + { + "epoch": 5.32, + "learning_rate": 2.3037010330287025e-06, + "loss": 0.0255, + "step": 113935 + }, + { + "epoch": 5.32, + "learning_rate": 2.302917247973916e-06, + "loss": 0.0579, + "step": 113940 + }, + { + "epoch": 5.32, + "learning_rate": 2.302133462919129e-06, + "loss": 0.108, + "step": 113945 + }, + { + "epoch": 5.32, + "learning_rate": 2.3013496778643425e-06, + "loss": 0.1503, + "step": 113950 + }, + { + "epoch": 5.32, + "learning_rate": 2.300565892809556e-06, + "loss": 0.3383, + "step": 113955 + }, + { + "epoch": 5.32, + "learning_rate": 2.2997821077547694e-06, + "loss": 0.0982, + "step": 113960 + }, + { + "epoch": 5.32, + "learning_rate": 2.298998322699983e-06, + "loss": 0.0189, + "step": 113965 + }, + { + "epoch": 5.32, + "learning_rate": 2.2982145376451964e-06, + "loss": 0.0402, + "step": 113970 + }, + { + "epoch": 5.32, + "learning_rate": 2.29743075259041e-06, + "loss": 0.1427, + "step": 113975 + }, + { + "epoch": 5.32, + "learning_rate": 2.2966469675356234e-06, + "loss": 0.0326, + "step": 113980 + }, + { + "epoch": 5.32, + "learning_rate": 2.295863182480837e-06, + "loss": 0.1383, + "step": 113985 + }, + { + "epoch": 5.32, + "learning_rate": 2.2950793974260503e-06, + "loss": 0.0744, + "step": 113990 + }, + { + "epoch": 5.32, + "learning_rate": 2.2942956123712634e-06, + "loss": 0.0695, + "step": 113995 + }, + { + "epoch": 5.32, + "learning_rate": 2.293511827316477e-06, + "loss": 0.1357, + "step": 114000 + }, + { + "epoch": 5.32, + "learning_rate": 2.2928847992726477e-06, + "loss": 0.223, + "step": 114005 + }, + { + "epoch": 5.32, + "learning_rate": 2.292101014217861e-06, + "loss": 0.1576, + "step": 114010 + }, + { + "epoch": 5.32, + "learning_rate": 2.2913172291630743e-06, + "loss": 0.0256, + "step": 114015 + }, + { + "epoch": 5.32, + "learning_rate": 2.290533444108288e-06, + "loss": 0.0279, + "step": 114020 + }, + { + "epoch": 5.32, + "learning_rate": 2.2897496590535013e-06, + "loss": 0.0396, + "step": 114025 + }, + { + "epoch": 5.32, + "learning_rate": 2.2889658739987147e-06, + "loss": 0.0544, + "step": 114030 + }, + { + "epoch": 5.32, + "learning_rate": 2.2881820889439282e-06, + "loss": 0.0474, + "step": 114035 + }, + { + "epoch": 5.32, + "learning_rate": 2.2873983038891417e-06, + "loss": 0.0998, + "step": 114040 + }, + { + "epoch": 5.32, + "learning_rate": 2.286614518834355e-06, + "loss": 0.0857, + "step": 114045 + }, + { + "epoch": 5.32, + "learning_rate": 2.2858307337795687e-06, + "loss": 0.1684, + "step": 114050 + }, + { + "epoch": 5.32, + "learning_rate": 2.285046948724782e-06, + "loss": 0.2541, + "step": 114055 + }, + { + "epoch": 5.32, + "learning_rate": 2.284263163669995e-06, + "loss": 0.0926, + "step": 114060 + }, + { + "epoch": 5.32, + "learning_rate": 2.2834793786152087e-06, + "loss": 0.0034, + "step": 114065 + }, + { + "epoch": 5.32, + "learning_rate": 2.282695593560422e-06, + "loss": 0.0067, + "step": 114070 + }, + { + "epoch": 5.32, + "learning_rate": 2.2819118085056357e-06, + "loss": 0.1003, + "step": 114075 + }, + { + "epoch": 5.32, + "learning_rate": 2.281128023450849e-06, + "loss": 0.0739, + "step": 114080 + }, + { + "epoch": 5.32, + "learning_rate": 2.2803442383960626e-06, + "loss": 0.0441, + "step": 114085 + }, + { + "epoch": 5.32, + "learning_rate": 2.2795604533412757e-06, + "loss": 0.0595, + "step": 114090 + }, + { + "epoch": 5.32, + "learning_rate": 2.278776668286489e-06, + "loss": 0.1018, + "step": 114095 + }, + { + "epoch": 5.32, + "learning_rate": 2.2779928832317026e-06, + "loss": 0.1779, + "step": 114100 + }, + { + "epoch": 5.32, + "learning_rate": 2.277209098176916e-06, + "loss": 0.2514, + "step": 114105 + }, + { + "epoch": 5.32, + "learning_rate": 2.2764253131221296e-06, + "loss": 0.1196, + "step": 114110 + }, + { + "epoch": 5.32, + "learning_rate": 2.275641528067343e-06, + "loss": 0.0096, + "step": 114115 + }, + { + "epoch": 5.32, + "learning_rate": 2.2748577430125566e-06, + "loss": 0.0427, + "step": 114120 + }, + { + "epoch": 5.33, + "learning_rate": 2.2740739579577696e-06, + "loss": 0.0777, + "step": 114125 + }, + { + "epoch": 5.33, + "learning_rate": 2.273290172902983e-06, + "loss": 0.0301, + "step": 114130 + }, + { + "epoch": 5.33, + "learning_rate": 2.2725063878481966e-06, + "loss": 0.0508, + "step": 114135 + }, + { + "epoch": 5.33, + "learning_rate": 2.27172260279341e-06, + "loss": 0.1026, + "step": 114140 + }, + { + "epoch": 5.33, + "learning_rate": 2.2709388177386236e-06, + "loss": 0.0801, + "step": 114145 + }, + { + "epoch": 5.33, + "learning_rate": 2.270155032683837e-06, + "loss": 0.1183, + "step": 114150 + }, + { + "epoch": 5.33, + "learning_rate": 2.2693712476290505e-06, + "loss": 0.2837, + "step": 114155 + }, + { + "epoch": 5.33, + "learning_rate": 2.2685874625742636e-06, + "loss": 0.141, + "step": 114160 + }, + { + "epoch": 5.33, + "learning_rate": 2.2678036775194775e-06, + "loss": 0.0136, + "step": 114165 + }, + { + "epoch": 5.33, + "learning_rate": 2.267019892464691e-06, + "loss": 0.0342, + "step": 114170 + }, + { + "epoch": 5.33, + "learning_rate": 2.266236107409904e-06, + "loss": 0.0624, + "step": 114175 + }, + { + "epoch": 5.33, + "learning_rate": 2.2654523223551175e-06, + "loss": 0.0273, + "step": 114180 + }, + { + "epoch": 5.33, + "learning_rate": 2.264668537300331e-06, + "loss": 0.0666, + "step": 114185 + }, + { + "epoch": 5.33, + "learning_rate": 2.2638847522455445e-06, + "loss": 0.0603, + "step": 114190 + }, + { + "epoch": 5.33, + "learning_rate": 2.263100967190758e-06, + "loss": 0.1554, + "step": 114195 + }, + { + "epoch": 5.33, + "learning_rate": 2.2623171821359714e-06, + "loss": 0.1507, + "step": 114200 + }, + { + "epoch": 5.33, + "learning_rate": 2.2615333970811845e-06, + "loss": 0.2124, + "step": 114205 + }, + { + "epoch": 5.33, + "learning_rate": 2.260749612026398e-06, + "loss": 0.1127, + "step": 114210 + }, + { + "epoch": 5.33, + "learning_rate": 2.2599658269716115e-06, + "loss": 0.0124, + "step": 114215 + }, + { + "epoch": 5.33, + "learning_rate": 2.259182041916825e-06, + "loss": 0.029, + "step": 114220 + }, + { + "epoch": 5.33, + "learning_rate": 2.2583982568620384e-06, + "loss": 0.0215, + "step": 114225 + }, + { + "epoch": 5.33, + "learning_rate": 2.257614471807252e-06, + "loss": 0.0876, + "step": 114230 + }, + { + "epoch": 5.33, + "learning_rate": 2.2568306867524654e-06, + "loss": 0.069, + "step": 114235 + }, + { + "epoch": 5.33, + "learning_rate": 2.2560469016976785e-06, + "loss": 0.1046, + "step": 114240 + }, + { + "epoch": 5.33, + "learning_rate": 2.255263116642892e-06, + "loss": 0.0707, + "step": 114245 + }, + { + "epoch": 5.33, + "learning_rate": 2.2544793315881054e-06, + "loss": 0.2633, + "step": 114250 + }, + { + "epoch": 5.33, + "learning_rate": 2.253695546533319e-06, + "loss": 0.2161, + "step": 114255 + }, + { + "epoch": 5.33, + "learning_rate": 2.2529117614785324e-06, + "loss": 0.1, + "step": 114260 + }, + { + "epoch": 5.33, + "learning_rate": 2.252127976423746e-06, + "loss": 0.0584, + "step": 114265 + }, + { + "epoch": 5.33, + "learning_rate": 2.251344191368959e-06, + "loss": 0.0843, + "step": 114270 + }, + { + "epoch": 5.33, + "learning_rate": 2.2505604063141724e-06, + "loss": 0.0296, + "step": 114275 + }, + { + "epoch": 5.33, + "learning_rate": 2.249776621259386e-06, + "loss": 0.0639, + "step": 114280 + }, + { + "epoch": 5.33, + "learning_rate": 2.2489928362045994e-06, + "loss": 0.0724, + "step": 114285 + }, + { + "epoch": 5.33, + "learning_rate": 2.248209051149813e-06, + "loss": 0.0458, + "step": 114290 + }, + { + "epoch": 5.33, + "learning_rate": 2.2474252660950263e-06, + "loss": 0.0836, + "step": 114295 + }, + { + "epoch": 5.33, + "learning_rate": 2.24664148104024e-06, + "loss": 0.0736, + "step": 114300 + }, + { + "epoch": 5.33, + "learning_rate": 2.245857695985453e-06, + "loss": 0.3309, + "step": 114305 + }, + { + "epoch": 5.33, + "learning_rate": 2.2450739109306668e-06, + "loss": 0.0765, + "step": 114310 + }, + { + "epoch": 5.33, + "learning_rate": 2.2442901258758803e-06, + "loss": 0.0115, + "step": 114315 + }, + { + "epoch": 5.33, + "learning_rate": 2.2435063408210933e-06, + "loss": 0.0271, + "step": 114320 + }, + { + "epoch": 5.33, + "learning_rate": 2.242722555766307e-06, + "loss": 0.0165, + "step": 114325 + }, + { + "epoch": 5.33, + "learning_rate": 2.2419387707115203e-06, + "loss": 0.0243, + "step": 114330 + }, + { + "epoch": 5.34, + "learning_rate": 2.2411549856567338e-06, + "loss": 0.0466, + "step": 114335 + }, + { + "epoch": 5.34, + "learning_rate": 2.2403712006019472e-06, + "loss": 0.072, + "step": 114340 + }, + { + "epoch": 5.34, + "learning_rate": 2.2395874155471607e-06, + "loss": 0.0845, + "step": 114345 + }, + { + "epoch": 5.34, + "learning_rate": 2.2388036304923742e-06, + "loss": 0.152, + "step": 114350 + }, + { + "epoch": 5.34, + "learning_rate": 2.2380198454375873e-06, + "loss": 0.3009, + "step": 114355 + }, + { + "epoch": 5.34, + "learning_rate": 2.2372360603828008e-06, + "loss": 0.1086, + "step": 114360 + }, + { + "epoch": 5.34, + "learning_rate": 2.2364522753280142e-06, + "loss": 0.0129, + "step": 114365 + }, + { + "epoch": 5.34, + "learning_rate": 2.2356684902732277e-06, + "loss": 0.008, + "step": 114370 + }, + { + "epoch": 5.34, + "learning_rate": 2.234884705218441e-06, + "loss": 0.0284, + "step": 114375 + }, + { + "epoch": 5.34, + "learning_rate": 2.2341009201636547e-06, + "loss": 0.0518, + "step": 114380 + }, + { + "epoch": 5.34, + "learning_rate": 2.2333171351088677e-06, + "loss": 0.0962, + "step": 114385 + }, + { + "epoch": 5.34, + "learning_rate": 2.2325333500540812e-06, + "loss": 0.0958, + "step": 114390 + }, + { + "epoch": 5.34, + "learning_rate": 2.2317495649992947e-06, + "loss": 0.0896, + "step": 114395 + }, + { + "epoch": 5.34, + "learning_rate": 2.230965779944508e-06, + "loss": 0.0748, + "step": 114400 + }, + { + "epoch": 5.34, + "learning_rate": 2.2301819948897217e-06, + "loss": 0.3203, + "step": 114405 + }, + { + "epoch": 5.34, + "learning_rate": 2.229398209834935e-06, + "loss": 0.1206, + "step": 114410 + }, + { + "epoch": 5.34, + "learning_rate": 2.2286144247801486e-06, + "loss": 0.0109, + "step": 114415 + }, + { + "epoch": 5.34, + "learning_rate": 2.2278306397253617e-06, + "loss": 0.02, + "step": 114420 + }, + { + "epoch": 5.34, + "learning_rate": 2.227046854670575e-06, + "loss": 0.0615, + "step": 114425 + }, + { + "epoch": 5.34, + "learning_rate": 2.2262630696157887e-06, + "loss": 0.0564, + "step": 114430 + }, + { + "epoch": 5.34, + "learning_rate": 2.225479284561002e-06, + "loss": 0.0257, + "step": 114435 + }, + { + "epoch": 5.34, + "learning_rate": 2.2246954995062156e-06, + "loss": 0.0867, + "step": 114440 + }, + { + "epoch": 5.34, + "learning_rate": 2.223911714451429e-06, + "loss": 0.0626, + "step": 114445 + }, + { + "epoch": 5.34, + "learning_rate": 2.223127929396642e-06, + "loss": 0.1104, + "step": 114450 + }, + { + "epoch": 5.34, + "learning_rate": 2.222344144341856e-06, + "loss": 0.1552, + "step": 114455 + }, + { + "epoch": 5.34, + "learning_rate": 2.2215603592870696e-06, + "loss": 0.0643, + "step": 114460 + }, + { + "epoch": 5.34, + "learning_rate": 2.2207765742322826e-06, + "loss": 0.0186, + "step": 114465 + }, + { + "epoch": 5.34, + "learning_rate": 2.219992789177496e-06, + "loss": 0.0092, + "step": 114470 + }, + { + "epoch": 5.34, + "learning_rate": 2.2192090041227096e-06, + "loss": 0.036, + "step": 114475 + }, + { + "epoch": 5.34, + "learning_rate": 2.218425219067923e-06, + "loss": 0.0764, + "step": 114480 + }, + { + "epoch": 5.34, + "learning_rate": 2.2176414340131365e-06, + "loss": 0.0664, + "step": 114485 + }, + { + "epoch": 5.34, + "learning_rate": 2.21685764895835e-06, + "loss": 0.0917, + "step": 114490 + }, + { + "epoch": 5.34, + "learning_rate": 2.2160738639035635e-06, + "loss": 0.0601, + "step": 114495 + }, + { + "epoch": 5.34, + "learning_rate": 2.2152900788487766e-06, + "loss": 0.0829, + "step": 114500 + }, + { + "epoch": 5.34, + "learning_rate": 2.21450629379399e-06, + "loss": 0.1625, + "step": 114505 + }, + { + "epoch": 5.34, + "learning_rate": 2.2137225087392035e-06, + "loss": 0.1355, + "step": 114510 + }, + { + "epoch": 5.34, + "learning_rate": 2.212938723684417e-06, + "loss": 0.0046, + "step": 114515 + }, + { + "epoch": 5.34, + "learning_rate": 2.2121549386296305e-06, + "loss": 0.0217, + "step": 114520 + }, + { + "epoch": 5.34, + "learning_rate": 2.211371153574844e-06, + "loss": 0.089, + "step": 114525 + }, + { + "epoch": 5.34, + "learning_rate": 2.210587368520057e-06, + "loss": 0.0531, + "step": 114530 + }, + { + "epoch": 5.34, + "learning_rate": 2.2098035834652705e-06, + "loss": 0.0689, + "step": 114535 + }, + { + "epoch": 5.34, + "learning_rate": 2.209019798410484e-06, + "loss": 0.0361, + "step": 114540 + }, + { + "epoch": 5.34, + "learning_rate": 2.2082360133556975e-06, + "loss": 0.1226, + "step": 114545 + }, + { + "epoch": 5.35, + "learning_rate": 2.207452228300911e-06, + "loss": 0.0929, + "step": 114550 + }, + { + "epoch": 5.35, + "learning_rate": 2.2066684432461244e-06, + "loss": 0.3044, + "step": 114555 + }, + { + "epoch": 5.35, + "learning_rate": 2.205884658191338e-06, + "loss": 0.0909, + "step": 114560 + }, + { + "epoch": 5.35, + "learning_rate": 2.205100873136551e-06, + "loss": 0.021, + "step": 114565 + }, + { + "epoch": 5.35, + "learning_rate": 2.2043170880817645e-06, + "loss": 0.0193, + "step": 114570 + }, + { + "epoch": 5.35, + "learning_rate": 2.203533303026978e-06, + "loss": 0.089, + "step": 114575 + }, + { + "epoch": 5.35, + "learning_rate": 2.2027495179721914e-06, + "loss": 0.0621, + "step": 114580 + }, + { + "epoch": 5.35, + "learning_rate": 2.201965732917405e-06, + "loss": 0.0491, + "step": 114585 + }, + { + "epoch": 5.35, + "learning_rate": 2.2011819478626184e-06, + "loss": 0.0844, + "step": 114590 + }, + { + "epoch": 5.35, + "learning_rate": 2.2003981628078315e-06, + "loss": 0.0694, + "step": 114595 + }, + { + "epoch": 5.35, + "learning_rate": 2.1996143777530454e-06, + "loss": 0.1194, + "step": 114600 + }, + { + "epoch": 5.35, + "learning_rate": 2.198830592698259e-06, + "loss": 0.1884, + "step": 114605 + }, + { + "epoch": 5.35, + "learning_rate": 2.1980468076434723e-06, + "loss": 0.0773, + "step": 114610 + }, + { + "epoch": 5.35, + "learning_rate": 2.1972630225886854e-06, + "loss": 0.0458, + "step": 114615 + }, + { + "epoch": 5.35, + "learning_rate": 2.196479237533899e-06, + "loss": 0.0124, + "step": 114620 + }, + { + "epoch": 5.35, + "learning_rate": 2.1956954524791123e-06, + "loss": 0.0186, + "step": 114625 + }, + { + "epoch": 5.35, + "learning_rate": 2.194911667424326e-06, + "loss": 0.0312, + "step": 114630 + }, + { + "epoch": 5.35, + "learning_rate": 2.1941278823695393e-06, + "loss": 0.069, + "step": 114635 + }, + { + "epoch": 5.35, + "learning_rate": 2.193344097314753e-06, + "loss": 0.0984, + "step": 114640 + }, + { + "epoch": 5.35, + "learning_rate": 2.192560312259966e-06, + "loss": 0.1613, + "step": 114645 + }, + { + "epoch": 5.35, + "learning_rate": 2.1917765272051793e-06, + "loss": 0.1512, + "step": 114650 + }, + { + "epoch": 5.35, + "learning_rate": 2.190992742150393e-06, + "loss": 0.4084, + "step": 114655 + }, + { + "epoch": 5.35, + "learning_rate": 2.1902089570956063e-06, + "loss": 0.0739, + "step": 114660 + }, + { + "epoch": 5.35, + "learning_rate": 2.1894251720408198e-06, + "loss": 0.0183, + "step": 114665 + }, + { + "epoch": 5.35, + "learning_rate": 2.1886413869860333e-06, + "loss": 0.0144, + "step": 114670 + }, + { + "epoch": 5.35, + "learning_rate": 2.1878576019312467e-06, + "loss": 0.0382, + "step": 114675 + }, + { + "epoch": 5.35, + "learning_rate": 2.18707381687646e-06, + "loss": 0.0849, + "step": 114680 + }, + { + "epoch": 5.35, + "learning_rate": 2.1862900318216733e-06, + "loss": 0.0995, + "step": 114685 + }, + { + "epoch": 5.35, + "learning_rate": 2.1855062467668868e-06, + "loss": 0.0731, + "step": 114690 + }, + { + "epoch": 5.35, + "learning_rate": 2.1847224617121003e-06, + "loss": 0.1009, + "step": 114695 + }, + { + "epoch": 5.35, + "learning_rate": 2.1839386766573137e-06, + "loss": 0.1344, + "step": 114700 + }, + { + "epoch": 5.35, + "learning_rate": 2.1831548916025272e-06, + "loss": 0.1975, + "step": 114705 + }, + { + "epoch": 5.35, + "learning_rate": 2.1823711065477403e-06, + "loss": 0.1205, + "step": 114710 + }, + { + "epoch": 5.35, + "learning_rate": 2.1815873214929538e-06, + "loss": 0.0319, + "step": 114715 + }, + { + "epoch": 5.35, + "learning_rate": 2.1808035364381672e-06, + "loss": 0.0209, + "step": 114720 + }, + { + "epoch": 5.35, + "learning_rate": 2.1800197513833807e-06, + "loss": 0.082, + "step": 114725 + }, + { + "epoch": 5.35, + "learning_rate": 2.179235966328594e-06, + "loss": 0.0191, + "step": 114730 + }, + { + "epoch": 5.35, + "learning_rate": 2.1784521812738077e-06, + "loss": 0.0293, + "step": 114735 + }, + { + "epoch": 5.35, + "learning_rate": 2.177668396219021e-06, + "loss": 0.0258, + "step": 114740 + }, + { + "epoch": 5.35, + "learning_rate": 2.1768846111642347e-06, + "loss": 0.0885, + "step": 114745 + }, + { + "epoch": 5.35, + "learning_rate": 2.176100826109448e-06, + "loss": 0.148, + "step": 114750 + }, + { + "epoch": 5.35, + "learning_rate": 2.1753170410546616e-06, + "loss": 0.3726, + "step": 114755 + }, + { + "epoch": 5.35, + "learning_rate": 2.1745332559998747e-06, + "loss": 0.0919, + "step": 114760 + }, + { + "epoch": 5.36, + "learning_rate": 2.173749470945088e-06, + "loss": 0.0117, + "step": 114765 + }, + { + "epoch": 5.36, + "learning_rate": 2.1729656858903016e-06, + "loss": 0.0439, + "step": 114770 + }, + { + "epoch": 5.36, + "learning_rate": 2.172181900835515e-06, + "loss": 0.0115, + "step": 114775 + }, + { + "epoch": 5.36, + "learning_rate": 2.1713981157807286e-06, + "loss": 0.0409, + "step": 114780 + }, + { + "epoch": 5.36, + "learning_rate": 2.170614330725942e-06, + "loss": 0.0427, + "step": 114785 + }, + { + "epoch": 5.36, + "learning_rate": 2.169830545671155e-06, + "loss": 0.0486, + "step": 114790 + }, + { + "epoch": 5.36, + "learning_rate": 2.1690467606163686e-06, + "loss": 0.1149, + "step": 114795 + }, + { + "epoch": 5.36, + "learning_rate": 2.168262975561582e-06, + "loss": 0.1438, + "step": 114800 + }, + { + "epoch": 5.36, + "learning_rate": 2.1674791905067956e-06, + "loss": 0.3771, + "step": 114805 + }, + { + "epoch": 5.36, + "learning_rate": 2.166695405452009e-06, + "loss": 0.1444, + "step": 114810 + }, + { + "epoch": 5.36, + "learning_rate": 2.1659116203972226e-06, + "loss": 0.0099, + "step": 114815 + }, + { + "epoch": 5.36, + "learning_rate": 2.165127835342436e-06, + "loss": 0.0017, + "step": 114820 + }, + { + "epoch": 5.36, + "learning_rate": 2.164344050287649e-06, + "loss": 0.0077, + "step": 114825 + }, + { + "epoch": 5.36, + "learning_rate": 2.1635602652328626e-06, + "loss": 0.035, + "step": 114830 + }, + { + "epoch": 5.36, + "learning_rate": 2.162776480178076e-06, + "loss": 0.0789, + "step": 114835 + }, + { + "epoch": 5.36, + "learning_rate": 2.1619926951232895e-06, + "loss": 0.0469, + "step": 114840 + }, + { + "epoch": 5.36, + "learning_rate": 2.161208910068503e-06, + "loss": 0.1061, + "step": 114845 + }, + { + "epoch": 5.36, + "learning_rate": 2.1604251250137165e-06, + "loss": 0.1729, + "step": 114850 + }, + { + "epoch": 5.36, + "learning_rate": 2.1596413399589296e-06, + "loss": 0.2633, + "step": 114855 + }, + { + "epoch": 5.36, + "learning_rate": 2.158857554904143e-06, + "loss": 0.1098, + "step": 114860 + }, + { + "epoch": 5.36, + "learning_rate": 2.1580737698493565e-06, + "loss": 0.0058, + "step": 114865 + }, + { + "epoch": 5.36, + "learning_rate": 2.1572899847945704e-06, + "loss": 0.0135, + "step": 114870 + }, + { + "epoch": 5.36, + "learning_rate": 2.1565061997397835e-06, + "loss": 0.0544, + "step": 114875 + }, + { + "epoch": 5.36, + "learning_rate": 2.155722414684997e-06, + "loss": 0.058, + "step": 114880 + }, + { + "epoch": 5.36, + "learning_rate": 2.1549386296302105e-06, + "loss": 0.0646, + "step": 114885 + }, + { + "epoch": 5.36, + "learning_rate": 2.154154844575424e-06, + "loss": 0.0431, + "step": 114890 + }, + { + "epoch": 5.36, + "learning_rate": 2.1533710595206374e-06, + "loss": 0.0901, + "step": 114895 + }, + { + "epoch": 5.36, + "learning_rate": 2.152587274465851e-06, + "loss": 0.188, + "step": 114900 + }, + { + "epoch": 5.36, + "learning_rate": 2.151803489411064e-06, + "loss": 0.3369, + "step": 114905 + }, + { + "epoch": 5.36, + "learning_rate": 2.1510197043562774e-06, + "loss": 0.1229, + "step": 114910 + }, + { + "epoch": 5.36, + "learning_rate": 2.150235919301491e-06, + "loss": 0.0155, + "step": 114915 + }, + { + "epoch": 5.36, + "learning_rate": 2.1494521342467044e-06, + "loss": 0.0344, + "step": 114920 + }, + { + "epoch": 5.36, + "learning_rate": 2.148668349191918e-06, + "loss": 0.0608, + "step": 114925 + }, + { + "epoch": 5.36, + "learning_rate": 2.1478845641371314e-06, + "loss": 0.0337, + "step": 114930 + }, + { + "epoch": 5.36, + "learning_rate": 2.147100779082345e-06, + "loss": 0.0555, + "step": 114935 + }, + { + "epoch": 5.36, + "learning_rate": 2.146316994027558e-06, + "loss": 0.0975, + "step": 114940 + }, + { + "epoch": 5.36, + "learning_rate": 2.1455332089727714e-06, + "loss": 0.1196, + "step": 114945 + }, + { + "epoch": 5.36, + "learning_rate": 2.144749423917985e-06, + "loss": 0.066, + "step": 114950 + }, + { + "epoch": 5.36, + "learning_rate": 2.1439656388631984e-06, + "loss": 0.3219, + "step": 114955 + }, + { + "epoch": 5.36, + "learning_rate": 2.143181853808412e-06, + "loss": 0.1043, + "step": 114960 + }, + { + "epoch": 5.36, + "learning_rate": 2.1423980687536253e-06, + "loss": 0.0174, + "step": 114965 + }, + { + "epoch": 5.36, + "learning_rate": 2.1416142836988384e-06, + "loss": 0.0921, + "step": 114970 + }, + { + "epoch": 5.36, + "learning_rate": 2.140830498644052e-06, + "loss": 0.0171, + "step": 114975 + }, + { + "epoch": 5.37, + "learning_rate": 2.1400467135892654e-06, + "loss": 0.1261, + "step": 114980 + }, + { + "epoch": 5.37, + "learning_rate": 2.139262928534479e-06, + "loss": 0.0336, + "step": 114985 + }, + { + "epoch": 5.37, + "learning_rate": 2.1384791434796923e-06, + "loss": 0.0852, + "step": 114990 + }, + { + "epoch": 5.37, + "learning_rate": 2.137695358424906e-06, + "loss": 0.0988, + "step": 114995 + }, + { + "epoch": 5.37, + "learning_rate": 2.1369115733701193e-06, + "loss": 0.2128, + "step": 115000 + }, + { + "epoch": 5.37, + "learning_rate": 2.1361277883153323e-06, + "loss": 0.3928, + "step": 115005 + }, + { + "epoch": 5.37, + "learning_rate": 2.135344003260546e-06, + "loss": 0.1253, + "step": 115010 + }, + { + "epoch": 5.37, + "learning_rate": 2.1345602182057597e-06, + "loss": 0.0161, + "step": 115015 + }, + { + "epoch": 5.37, + "learning_rate": 2.1337764331509728e-06, + "loss": 0.0984, + "step": 115020 + }, + { + "epoch": 5.37, + "learning_rate": 2.1329926480961863e-06, + "loss": 0.0342, + "step": 115025 + }, + { + "epoch": 5.37, + "learning_rate": 2.1322088630413998e-06, + "loss": 0.0933, + "step": 115030 + }, + { + "epoch": 5.37, + "learning_rate": 2.1314250779866132e-06, + "loss": 0.0739, + "step": 115035 + }, + { + "epoch": 5.37, + "learning_rate": 2.1306412929318267e-06, + "loss": 0.0832, + "step": 115040 + }, + { + "epoch": 5.37, + "learning_rate": 2.12985750787704e-06, + "loss": 0.0653, + "step": 115045 + }, + { + "epoch": 5.37, + "learning_rate": 2.1290737228222533e-06, + "loss": 0.1685, + "step": 115050 + }, + { + "epoch": 5.37, + "learning_rate": 2.1282899377674667e-06, + "loss": 0.2922, + "step": 115055 + }, + { + "epoch": 5.37, + "learning_rate": 2.1275061527126802e-06, + "loss": 0.0904, + "step": 115060 + }, + { + "epoch": 5.37, + "learning_rate": 2.1267223676578937e-06, + "loss": 0.0089, + "step": 115065 + }, + { + "epoch": 5.37, + "learning_rate": 2.125938582603107e-06, + "loss": 0.0185, + "step": 115070 + }, + { + "epoch": 5.37, + "learning_rate": 2.1251547975483207e-06, + "loss": 0.0418, + "step": 115075 + }, + { + "epoch": 5.37, + "learning_rate": 2.124371012493534e-06, + "loss": 0.0594, + "step": 115080 + }, + { + "epoch": 5.37, + "learning_rate": 2.123587227438747e-06, + "loss": 0.0448, + "step": 115085 + }, + { + "epoch": 5.37, + "learning_rate": 2.1228034423839607e-06, + "loss": 0.0513, + "step": 115090 + }, + { + "epoch": 5.37, + "learning_rate": 2.122019657329174e-06, + "loss": 0.0629, + "step": 115095 + }, + { + "epoch": 5.37, + "learning_rate": 2.1212358722743877e-06, + "loss": 0.1338, + "step": 115100 + }, + { + "epoch": 5.37, + "learning_rate": 2.120452087219601e-06, + "loss": 0.1898, + "step": 115105 + }, + { + "epoch": 5.37, + "learning_rate": 2.1196683021648146e-06, + "loss": 0.1236, + "step": 115110 + }, + { + "epoch": 5.37, + "learning_rate": 2.1188845171100277e-06, + "loss": 0.0048, + "step": 115115 + }, + { + "epoch": 5.37, + "learning_rate": 2.118100732055241e-06, + "loss": 0.0219, + "step": 115120 + }, + { + "epoch": 5.37, + "learning_rate": 2.1173169470004546e-06, + "loss": 0.1011, + "step": 115125 + }, + { + "epoch": 5.37, + "learning_rate": 2.116533161945668e-06, + "loss": 0.032, + "step": 115130 + }, + { + "epoch": 5.37, + "learning_rate": 2.1157493768908816e-06, + "loss": 0.0325, + "step": 115135 + }, + { + "epoch": 5.37, + "learning_rate": 2.114965591836095e-06, + "loss": 0.0806, + "step": 115140 + }, + { + "epoch": 5.37, + "learning_rate": 2.1141818067813086e-06, + "loss": 0.1579, + "step": 115145 + }, + { + "epoch": 5.37, + "learning_rate": 2.1133980217265216e-06, + "loss": 0.1171, + "step": 115150 + }, + { + "epoch": 5.37, + "learning_rate": 2.112614236671735e-06, + "loss": 0.3076, + "step": 115155 + }, + { + "epoch": 5.37, + "learning_rate": 2.111830451616949e-06, + "loss": 0.0763, + "step": 115160 + }, + { + "epoch": 5.37, + "learning_rate": 2.111046666562162e-06, + "loss": 0.0312, + "step": 115165 + }, + { + "epoch": 5.37, + "learning_rate": 2.1102628815073756e-06, + "loss": 0.0175, + "step": 115170 + }, + { + "epoch": 5.37, + "learning_rate": 2.109479096452589e-06, + "loss": 0.0124, + "step": 115175 + }, + { + "epoch": 5.37, + "learning_rate": 2.108695311397802e-06, + "loss": 0.0467, + "step": 115180 + }, + { + "epoch": 5.37, + "learning_rate": 2.107911526343016e-06, + "loss": 0.0458, + "step": 115185 + }, + { + "epoch": 5.37, + "learning_rate": 2.1071277412882295e-06, + "loss": 0.1046, + "step": 115190 + }, + { + "epoch": 5.38, + "learning_rate": 2.106343956233443e-06, + "loss": 0.0754, + "step": 115195 + }, + { + "epoch": 5.38, + "learning_rate": 2.105560171178656e-06, + "loss": 0.0779, + "step": 115200 + }, + { + "epoch": 5.38, + "learning_rate": 2.1047763861238695e-06, + "loss": 0.2468, + "step": 115205 + }, + { + "epoch": 5.38, + "learning_rate": 2.103992601069083e-06, + "loss": 0.1224, + "step": 115210 + }, + { + "epoch": 5.38, + "learning_rate": 2.1032088160142965e-06, + "loss": 0.0156, + "step": 115215 + }, + { + "epoch": 5.38, + "learning_rate": 2.10242503095951e-06, + "loss": 0.0122, + "step": 115220 + }, + { + "epoch": 5.38, + "learning_rate": 2.1016412459047234e-06, + "loss": 0.0375, + "step": 115225 + }, + { + "epoch": 5.38, + "learning_rate": 2.1008574608499365e-06, + "loss": 0.0562, + "step": 115230 + }, + { + "epoch": 5.38, + "learning_rate": 2.10007367579515e-06, + "loss": 0.0365, + "step": 115235 + }, + { + "epoch": 5.38, + "learning_rate": 2.0992898907403635e-06, + "loss": 0.1328, + "step": 115240 + }, + { + "epoch": 5.38, + "learning_rate": 2.098506105685577e-06, + "loss": 0.1437, + "step": 115245 + }, + { + "epoch": 5.38, + "learning_rate": 2.0977223206307904e-06, + "loss": 0.1406, + "step": 115250 + }, + { + "epoch": 5.38, + "learning_rate": 2.096938535576004e-06, + "loss": 0.2885, + "step": 115255 + }, + { + "epoch": 5.38, + "learning_rate": 2.0961547505212174e-06, + "loss": 0.0861, + "step": 115260 + }, + { + "epoch": 5.38, + "learning_rate": 2.0953709654664305e-06, + "loss": 0.0025, + "step": 115265 + }, + { + "epoch": 5.38, + "learning_rate": 2.094587180411644e-06, + "loss": 0.0125, + "step": 115270 + }, + { + "epoch": 5.38, + "learning_rate": 2.0938033953568574e-06, + "loss": 0.0191, + "step": 115275 + }, + { + "epoch": 5.38, + "learning_rate": 2.093019610302071e-06, + "loss": 0.0712, + "step": 115280 + }, + { + "epoch": 5.38, + "learning_rate": 2.0922358252472844e-06, + "loss": 0.0918, + "step": 115285 + }, + { + "epoch": 5.38, + "learning_rate": 2.091452040192498e-06, + "loss": 0.0906, + "step": 115290 + }, + { + "epoch": 5.38, + "learning_rate": 2.090668255137711e-06, + "loss": 0.1603, + "step": 115295 + }, + { + "epoch": 5.38, + "learning_rate": 2.0898844700829244e-06, + "loss": 0.1228, + "step": 115300 + }, + { + "epoch": 5.38, + "learning_rate": 2.0891006850281383e-06, + "loss": 0.2567, + "step": 115305 + }, + { + "epoch": 5.38, + "learning_rate": 2.0883168999733514e-06, + "loss": 0.0824, + "step": 115310 + }, + { + "epoch": 5.38, + "learning_rate": 2.087533114918565e-06, + "loss": 0.0163, + "step": 115315 + }, + { + "epoch": 5.38, + "learning_rate": 2.0867493298637783e-06, + "loss": 0.0352, + "step": 115320 + }, + { + "epoch": 5.38, + "learning_rate": 2.085965544808992e-06, + "loss": 0.0235, + "step": 115325 + }, + { + "epoch": 5.38, + "learning_rate": 2.0851817597542053e-06, + "loss": 0.0896, + "step": 115330 + }, + { + "epoch": 5.38, + "learning_rate": 2.0843979746994188e-06, + "loss": 0.1152, + "step": 115335 + }, + { + "epoch": 5.38, + "learning_rate": 2.0836141896446323e-06, + "loss": 0.0546, + "step": 115340 + }, + { + "epoch": 5.38, + "learning_rate": 2.0828304045898453e-06, + "loss": 0.079, + "step": 115345 + }, + { + "epoch": 5.38, + "learning_rate": 2.082046619535059e-06, + "loss": 0.2231, + "step": 115350 + }, + { + "epoch": 5.38, + "learning_rate": 2.0812628344802723e-06, + "loss": 0.3122, + "step": 115355 + }, + { + "epoch": 5.38, + "learning_rate": 2.0804790494254858e-06, + "loss": 0.0986, + "step": 115360 + }, + { + "epoch": 5.38, + "learning_rate": 2.0796952643706993e-06, + "loss": 0.0053, + "step": 115365 + }, + { + "epoch": 5.38, + "learning_rate": 2.0789114793159127e-06, + "loss": 0.0371, + "step": 115370 + }, + { + "epoch": 5.38, + "learning_rate": 2.078127694261126e-06, + "loss": 0.0705, + "step": 115375 + }, + { + "epoch": 5.38, + "learning_rate": 2.0773439092063393e-06, + "loss": 0.0279, + "step": 115380 + }, + { + "epoch": 5.38, + "learning_rate": 2.0765601241515528e-06, + "loss": 0.0803, + "step": 115385 + }, + { + "epoch": 5.38, + "learning_rate": 2.0757763390967662e-06, + "loss": 0.0471, + "step": 115390 + }, + { + "epoch": 5.38, + "learning_rate": 2.0749925540419797e-06, + "loss": 0.0676, + "step": 115395 + }, + { + "epoch": 5.38, + "learning_rate": 2.074208768987193e-06, + "loss": 0.1652, + "step": 115400 + }, + { + "epoch": 5.38, + "learning_rate": 2.0734249839324067e-06, + "loss": 0.1565, + "step": 115405 + }, + { + "epoch": 5.39, + "learning_rate": 2.0726411988776197e-06, + "loss": 0.1075, + "step": 115410 + }, + { + "epoch": 5.39, + "learning_rate": 2.0718574138228332e-06, + "loss": 0.0177, + "step": 115415 + }, + { + "epoch": 5.39, + "learning_rate": 2.0710736287680467e-06, + "loss": 0.0499, + "step": 115420 + }, + { + "epoch": 5.39, + "learning_rate": 2.07028984371326e-06, + "loss": 0.0125, + "step": 115425 + }, + { + "epoch": 5.39, + "learning_rate": 2.0695060586584737e-06, + "loss": 0.0285, + "step": 115430 + }, + { + "epoch": 5.39, + "learning_rate": 2.068722273603687e-06, + "loss": 0.1033, + "step": 115435 + }, + { + "epoch": 5.39, + "learning_rate": 2.0679384885489002e-06, + "loss": 0.0286, + "step": 115440 + }, + { + "epoch": 5.39, + "learning_rate": 2.0671547034941137e-06, + "loss": 0.0793, + "step": 115445 + }, + { + "epoch": 5.39, + "learning_rate": 2.0663709184393276e-06, + "loss": 0.1257, + "step": 115450 + }, + { + "epoch": 5.39, + "learning_rate": 2.065587133384541e-06, + "loss": 0.2255, + "step": 115455 + }, + { + "epoch": 5.39, + "learning_rate": 2.064803348329754e-06, + "loss": 0.0846, + "step": 115460 + }, + { + "epoch": 5.39, + "learning_rate": 2.0640195632749676e-06, + "loss": 0.0251, + "step": 115465 + }, + { + "epoch": 5.39, + "learning_rate": 2.063235778220181e-06, + "loss": 0.0139, + "step": 115470 + }, + { + "epoch": 5.39, + "learning_rate": 2.0624519931653946e-06, + "loss": 0.0478, + "step": 115475 + }, + { + "epoch": 5.39, + "learning_rate": 2.061668208110608e-06, + "loss": 0.0769, + "step": 115480 + }, + { + "epoch": 5.39, + "learning_rate": 2.0608844230558216e-06, + "loss": 0.0742, + "step": 115485 + }, + { + "epoch": 5.39, + "learning_rate": 2.0601006380010346e-06, + "loss": 0.0269, + "step": 115490 + }, + { + "epoch": 5.39, + "learning_rate": 2.059316852946248e-06, + "loss": 0.0613, + "step": 115495 + }, + { + "epoch": 5.39, + "learning_rate": 2.0585330678914616e-06, + "loss": 0.1555, + "step": 115500 + }, + { + "epoch": 5.39, + "learning_rate": 2.057749282836675e-06, + "loss": 0.2386, + "step": 115505 + }, + { + "epoch": 5.39, + "learning_rate": 2.0569654977818885e-06, + "loss": 0.1245, + "step": 115510 + }, + { + "epoch": 5.39, + "learning_rate": 2.056181712727102e-06, + "loss": 0.0183, + "step": 115515 + }, + { + "epoch": 5.39, + "learning_rate": 2.0553979276723155e-06, + "loss": 0.0305, + "step": 115520 + }, + { + "epoch": 5.39, + "learning_rate": 2.0546141426175286e-06, + "loss": 0.0696, + "step": 115525 + }, + { + "epoch": 5.39, + "learning_rate": 2.053830357562742e-06, + "loss": 0.0348, + "step": 115530 + }, + { + "epoch": 5.39, + "learning_rate": 2.0530465725079555e-06, + "loss": 0.047, + "step": 115535 + }, + { + "epoch": 5.39, + "learning_rate": 2.052262787453169e-06, + "loss": 0.0485, + "step": 115540 + }, + { + "epoch": 5.39, + "learning_rate": 2.0514790023983825e-06, + "loss": 0.1107, + "step": 115545 + }, + { + "epoch": 5.39, + "learning_rate": 2.050695217343596e-06, + "loss": 0.1805, + "step": 115550 + }, + { + "epoch": 5.39, + "learning_rate": 2.049911432288809e-06, + "loss": 0.3203, + "step": 115555 + }, + { + "epoch": 5.39, + "learning_rate": 2.0491276472340225e-06, + "loss": 0.0971, + "step": 115560 + }, + { + "epoch": 5.39, + "learning_rate": 2.048343862179236e-06, + "loss": 0.0425, + "step": 115565 + }, + { + "epoch": 5.39, + "learning_rate": 2.0475600771244495e-06, + "loss": 0.0272, + "step": 115570 + }, + { + "epoch": 5.39, + "learning_rate": 2.046776292069663e-06, + "loss": 0.0688, + "step": 115575 + }, + { + "epoch": 5.39, + "learning_rate": 2.0459925070148764e-06, + "loss": 0.0173, + "step": 115580 + }, + { + "epoch": 5.39, + "learning_rate": 2.04520872196009e-06, + "loss": 0.089, + "step": 115585 + }, + { + "epoch": 5.39, + "learning_rate": 2.044424936905303e-06, + "loss": 0.0774, + "step": 115590 + }, + { + "epoch": 5.39, + "learning_rate": 2.043641151850517e-06, + "loss": 0.1296, + "step": 115595 + }, + { + "epoch": 5.39, + "learning_rate": 2.0428573667957304e-06, + "loss": 0.2031, + "step": 115600 + }, + { + "epoch": 5.39, + "learning_rate": 2.0420735817409434e-06, + "loss": 0.3265, + "step": 115605 + }, + { + "epoch": 5.39, + "learning_rate": 2.041289796686157e-06, + "loss": 0.1298, + "step": 115610 + }, + { + "epoch": 5.39, + "learning_rate": 2.0405060116313704e-06, + "loss": 0.0235, + "step": 115615 + }, + { + "epoch": 5.39, + "learning_rate": 2.039722226576584e-06, + "loss": 0.0198, + "step": 115620 + }, + { + "epoch": 5.4, + "learning_rate": 2.0389384415217974e-06, + "loss": 0.0662, + "step": 115625 + }, + { + "epoch": 5.4, + "learning_rate": 2.038154656467011e-06, + "loss": 0.1048, + "step": 115630 + }, + { + "epoch": 5.4, + "learning_rate": 2.037370871412224e-06, + "loss": 0.0346, + "step": 115635 + }, + { + "epoch": 5.4, + "learning_rate": 2.0365870863574374e-06, + "loss": 0.07, + "step": 115640 + }, + { + "epoch": 5.4, + "learning_rate": 2.035803301302651e-06, + "loss": 0.0924, + "step": 115645 + }, + { + "epoch": 5.4, + "learning_rate": 2.0350195162478644e-06, + "loss": 0.1165, + "step": 115650 + }, + { + "epoch": 5.4, + "learning_rate": 2.034235731193078e-06, + "loss": 0.2925, + "step": 115655 + }, + { + "epoch": 5.4, + "learning_rate": 2.0334519461382913e-06, + "loss": 0.0926, + "step": 115660 + }, + { + "epoch": 5.4, + "learning_rate": 2.032668161083505e-06, + "loss": 0.0774, + "step": 115665 + }, + { + "epoch": 5.4, + "learning_rate": 2.031884376028718e-06, + "loss": 0.0518, + "step": 115670 + }, + { + "epoch": 5.4, + "learning_rate": 2.0311005909739313e-06, + "loss": 0.0312, + "step": 115675 + }, + { + "epoch": 5.4, + "learning_rate": 2.030316805919145e-06, + "loss": 0.0124, + "step": 115680 + }, + { + "epoch": 5.4, + "learning_rate": 2.0295330208643583e-06, + "loss": 0.0939, + "step": 115685 + }, + { + "epoch": 5.4, + "learning_rate": 2.0287492358095718e-06, + "loss": 0.0622, + "step": 115690 + }, + { + "epoch": 5.4, + "learning_rate": 2.0279654507547853e-06, + "loss": 0.0936, + "step": 115695 + }, + { + "epoch": 5.4, + "learning_rate": 2.0271816656999983e-06, + "loss": 0.1034, + "step": 115700 + }, + { + "epoch": 5.4, + "learning_rate": 2.026397880645212e-06, + "loss": 0.2334, + "step": 115705 + }, + { + "epoch": 5.4, + "learning_rate": 2.0256140955904253e-06, + "loss": 0.086, + "step": 115710 + }, + { + "epoch": 5.4, + "learning_rate": 2.024830310535639e-06, + "loss": 0.0156, + "step": 115715 + }, + { + "epoch": 5.4, + "learning_rate": 2.0240465254808523e-06, + "loss": 0.0178, + "step": 115720 + }, + { + "epoch": 5.4, + "learning_rate": 2.0232627404260657e-06, + "loss": 0.0807, + "step": 115725 + }, + { + "epoch": 5.4, + "learning_rate": 2.0224789553712792e-06, + "loss": 0.0486, + "step": 115730 + }, + { + "epoch": 5.4, + "learning_rate": 2.0216951703164923e-06, + "loss": 0.0621, + "step": 115735 + }, + { + "epoch": 5.4, + "learning_rate": 2.020911385261706e-06, + "loss": 0.0201, + "step": 115740 + }, + { + "epoch": 5.4, + "learning_rate": 2.0201276002069197e-06, + "loss": 0.2375, + "step": 115745 + }, + { + "epoch": 5.4, + "learning_rate": 2.0193438151521327e-06, + "loss": 0.2459, + "step": 115750 + }, + { + "epoch": 5.4, + "learning_rate": 2.018560030097346e-06, + "loss": 0.3371, + "step": 115755 + }, + { + "epoch": 5.4, + "learning_rate": 2.0177762450425597e-06, + "loss": 0.0908, + "step": 115760 + }, + { + "epoch": 5.4, + "learning_rate": 2.016992459987773e-06, + "loss": 0.0042, + "step": 115765 + }, + { + "epoch": 5.4, + "learning_rate": 2.0162086749329867e-06, + "loss": 0.0552, + "step": 115770 + }, + { + "epoch": 5.4, + "learning_rate": 2.0154248898782e-06, + "loss": 0.0192, + "step": 115775 + }, + { + "epoch": 5.4, + "learning_rate": 2.0146411048234136e-06, + "loss": 0.0266, + "step": 115780 + }, + { + "epoch": 5.4, + "learning_rate": 2.0138573197686267e-06, + "loss": 0.018, + "step": 115785 + }, + { + "epoch": 5.4, + "learning_rate": 2.01307353471384e-06, + "loss": 0.0443, + "step": 115790 + }, + { + "epoch": 5.4, + "learning_rate": 2.0122897496590536e-06, + "loss": 0.1047, + "step": 115795 + }, + { + "epoch": 5.4, + "learning_rate": 2.011505964604267e-06, + "loss": 0.1531, + "step": 115800 + }, + { + "epoch": 5.4, + "learning_rate": 2.0107221795494806e-06, + "loss": 0.2669, + "step": 115805 + }, + { + "epoch": 5.4, + "learning_rate": 2.009938394494694e-06, + "loss": 0.117, + "step": 115810 + }, + { + "epoch": 5.4, + "learning_rate": 2.009154609439907e-06, + "loss": 0.0035, + "step": 115815 + }, + { + "epoch": 5.4, + "learning_rate": 2.0083708243851206e-06, + "loss": 0.012, + "step": 115820 + }, + { + "epoch": 5.4, + "learning_rate": 2.007587039330334e-06, + "loss": 0.0207, + "step": 115825 + }, + { + "epoch": 5.4, + "learning_rate": 2.0068032542755476e-06, + "loss": 0.0598, + "step": 115830 + }, + { + "epoch": 5.41, + "learning_rate": 2.006019469220761e-06, + "loss": 0.0501, + "step": 115835 + }, + { + "epoch": 5.41, + "learning_rate": 2.0052356841659746e-06, + "loss": 0.0826, + "step": 115840 + }, + { + "epoch": 5.41, + "learning_rate": 2.004451899111188e-06, + "loss": 0.1009, + "step": 115845 + }, + { + "epoch": 5.41, + "learning_rate": 2.003668114056401e-06, + "loss": 0.0692, + "step": 115850 + }, + { + "epoch": 5.41, + "learning_rate": 2.0028843290016146e-06, + "loss": 0.2586, + "step": 115855 + }, + { + "epoch": 5.41, + "learning_rate": 2.0021005439468285e-06, + "loss": 0.0874, + "step": 115860 + }, + { + "epoch": 5.41, + "learning_rate": 2.0013167588920415e-06, + "loss": 0.0182, + "step": 115865 + }, + { + "epoch": 5.41, + "learning_rate": 2.000532973837255e-06, + "loss": 0.0058, + "step": 115870 + }, + { + "epoch": 5.41, + "learning_rate": 1.9997491887824685e-06, + "loss": 0.0712, + "step": 115875 + }, + { + "epoch": 5.41, + "learning_rate": 1.9989654037276816e-06, + "loss": 0.0448, + "step": 115880 + }, + { + "epoch": 5.41, + "learning_rate": 1.9981816186728955e-06, + "loss": 0.0763, + "step": 115885 + }, + { + "epoch": 5.41, + "learning_rate": 1.997397833618109e-06, + "loss": 0.0667, + "step": 115890 + }, + { + "epoch": 5.41, + "learning_rate": 1.996614048563322e-06, + "loss": 0.1138, + "step": 115895 + }, + { + "epoch": 5.41, + "learning_rate": 1.9958302635085355e-06, + "loss": 0.2712, + "step": 115900 + }, + { + "epoch": 5.41, + "learning_rate": 1.995046478453749e-06, + "loss": 0.1617, + "step": 115905 + }, + { + "epoch": 5.41, + "learning_rate": 1.9942626933989625e-06, + "loss": 0.1406, + "step": 115910 + }, + { + "epoch": 5.41, + "learning_rate": 1.993478908344176e-06, + "loss": 0.0225, + "step": 115915 + }, + { + "epoch": 5.41, + "learning_rate": 1.9926951232893894e-06, + "loss": 0.0158, + "step": 115920 + }, + { + "epoch": 5.41, + "learning_rate": 1.991911338234603e-06, + "loss": 0.0058, + "step": 115925 + }, + { + "epoch": 5.41, + "learning_rate": 1.991127553179816e-06, + "loss": 0.0865, + "step": 115930 + }, + { + "epoch": 5.41, + "learning_rate": 1.9903437681250295e-06, + "loss": 0.0841, + "step": 115935 + }, + { + "epoch": 5.41, + "learning_rate": 1.989559983070243e-06, + "loss": 0.0706, + "step": 115940 + }, + { + "epoch": 5.41, + "learning_rate": 1.9887761980154564e-06, + "loss": 0.1273, + "step": 115945 + }, + { + "epoch": 5.41, + "learning_rate": 1.98799241296067e-06, + "loss": 0.0869, + "step": 115950 + }, + { + "epoch": 5.41, + "learning_rate": 1.9872086279058834e-06, + "loss": 0.3253, + "step": 115955 + }, + { + "epoch": 5.41, + "learning_rate": 1.9864248428510964e-06, + "loss": 0.1099, + "step": 115960 + }, + { + "epoch": 5.41, + "learning_rate": 1.98564105779631e-06, + "loss": 0.0047, + "step": 115965 + }, + { + "epoch": 5.41, + "learning_rate": 1.9848572727415234e-06, + "loss": 0.0077, + "step": 115970 + }, + { + "epoch": 5.41, + "learning_rate": 1.984073487686737e-06, + "loss": 0.052, + "step": 115975 + }, + { + "epoch": 5.41, + "learning_rate": 1.9832897026319504e-06, + "loss": 0.2624, + "step": 115980 + }, + { + "epoch": 5.41, + "learning_rate": 1.982505917577164e-06, + "loss": 0.0502, + "step": 115985 + }, + { + "epoch": 5.41, + "learning_rate": 1.9817221325223773e-06, + "loss": 0.0784, + "step": 115990 + }, + { + "epoch": 5.41, + "learning_rate": 1.9809383474675904e-06, + "loss": 0.145, + "step": 115995 + }, + { + "epoch": 5.41, + "learning_rate": 1.980154562412804e-06, + "loss": 0.1857, + "step": 116000 + }, + { + "epoch": 5.41, + "learning_rate": 1.9793707773580178e-06, + "loss": 0.3659, + "step": 116005 + }, + { + "epoch": 5.41, + "learning_rate": 1.978586992303231e-06, + "loss": 0.1309, + "step": 116010 + }, + { + "epoch": 5.41, + "learning_rate": 1.9778032072484443e-06, + "loss": 0.0122, + "step": 116015 + }, + { + "epoch": 5.41, + "learning_rate": 1.977019422193658e-06, + "loss": 0.0119, + "step": 116020 + }, + { + "epoch": 5.41, + "learning_rate": 1.976235637138871e-06, + "loss": 0.0356, + "step": 116025 + }, + { + "epoch": 5.41, + "learning_rate": 1.9754518520840848e-06, + "loss": 0.0308, + "step": 116030 + }, + { + "epoch": 5.41, + "learning_rate": 1.9746680670292982e-06, + "loss": 0.0424, + "step": 116035 + }, + { + "epoch": 5.41, + "learning_rate": 1.9738842819745117e-06, + "loss": 0.0752, + "step": 116040 + }, + { + "epoch": 5.41, + "learning_rate": 1.973100496919725e-06, + "loss": 0.0868, + "step": 116045 + }, + { + "epoch": 5.42, + "learning_rate": 1.9723167118649383e-06, + "loss": 0.2166, + "step": 116050 + }, + { + "epoch": 5.42, + "learning_rate": 1.9715329268101518e-06, + "loss": 0.4255, + "step": 116055 + }, + { + "epoch": 5.42, + "learning_rate": 1.9707491417553652e-06, + "loss": 0.1001, + "step": 116060 + }, + { + "epoch": 5.42, + "learning_rate": 1.9699653567005787e-06, + "loss": 0.0063, + "step": 116065 + }, + { + "epoch": 5.42, + "learning_rate": 1.969181571645792e-06, + "loss": 0.0336, + "step": 116070 + }, + { + "epoch": 5.42, + "learning_rate": 1.9683977865910053e-06, + "loss": 0.0032, + "step": 116075 + }, + { + "epoch": 5.42, + "learning_rate": 1.9676140015362187e-06, + "loss": 0.0302, + "step": 116080 + }, + { + "epoch": 5.42, + "learning_rate": 1.9668302164814322e-06, + "loss": 0.052, + "step": 116085 + }, + { + "epoch": 5.42, + "learning_rate": 1.9660464314266457e-06, + "loss": 0.1253, + "step": 116090 + }, + { + "epoch": 5.42, + "learning_rate": 1.965262646371859e-06, + "loss": 0.1241, + "step": 116095 + }, + { + "epoch": 5.42, + "learning_rate": 1.9644788613170727e-06, + "loss": 0.1836, + "step": 116100 + }, + { + "epoch": 5.42, + "learning_rate": 1.963695076262286e-06, + "loss": 0.3454, + "step": 116105 + }, + { + "epoch": 5.42, + "learning_rate": 1.9629112912074992e-06, + "loss": 0.1024, + "step": 116110 + }, + { + "epoch": 5.42, + "learning_rate": 1.9621275061527127e-06, + "loss": 0.0137, + "step": 116115 + }, + { + "epoch": 5.42, + "learning_rate": 1.961343721097926e-06, + "loss": 0.0313, + "step": 116120 + }, + { + "epoch": 5.42, + "learning_rate": 1.9605599360431397e-06, + "loss": 0.0338, + "step": 116125 + }, + { + "epoch": 5.42, + "learning_rate": 1.959776150988353e-06, + "loss": 0.0839, + "step": 116130 + }, + { + "epoch": 5.42, + "learning_rate": 1.9589923659335666e-06, + "loss": 0.0623, + "step": 116135 + }, + { + "epoch": 5.42, + "learning_rate": 1.9582085808787797e-06, + "loss": 0.0618, + "step": 116140 + }, + { + "epoch": 5.42, + "learning_rate": 1.957424795823993e-06, + "loss": 0.0644, + "step": 116145 + }, + { + "epoch": 5.42, + "learning_rate": 1.956641010769207e-06, + "loss": 0.0613, + "step": 116150 + }, + { + "epoch": 5.42, + "learning_rate": 1.95585722571442e-06, + "loss": 0.3095, + "step": 116155 + }, + { + "epoch": 5.42, + "learning_rate": 1.9550734406596336e-06, + "loss": 0.1247, + "step": 116160 + }, + { + "epoch": 5.42, + "learning_rate": 1.954289655604847e-06, + "loss": 0.0199, + "step": 116165 + }, + { + "epoch": 5.42, + "learning_rate": 1.9535058705500606e-06, + "loss": 0.0112, + "step": 116170 + }, + { + "epoch": 5.42, + "learning_rate": 1.952722085495274e-06, + "loss": 0.0207, + "step": 116175 + }, + { + "epoch": 5.42, + "learning_rate": 1.9519383004404875e-06, + "loss": 0.0734, + "step": 116180 + }, + { + "epoch": 5.42, + "learning_rate": 1.951154515385701e-06, + "loss": 0.0485, + "step": 116185 + }, + { + "epoch": 5.42, + "learning_rate": 1.950370730330914e-06, + "loss": 0.0649, + "step": 116190 + }, + { + "epoch": 5.42, + "learning_rate": 1.9495869452761276e-06, + "loss": 0.1152, + "step": 116195 + }, + { + "epoch": 5.42, + "learning_rate": 1.948803160221341e-06, + "loss": 0.1278, + "step": 116200 + }, + { + "epoch": 5.42, + "learning_rate": 1.9480193751665545e-06, + "loss": 0.221, + "step": 116205 + }, + { + "epoch": 5.42, + "learning_rate": 1.947235590111768e-06, + "loss": 0.0373, + "step": 116210 + }, + { + "epoch": 5.42, + "learning_rate": 1.9464518050569815e-06, + "loss": 0.0165, + "step": 116215 + }, + { + "epoch": 5.42, + "learning_rate": 1.9456680200021946e-06, + "loss": 0.0066, + "step": 116220 + }, + { + "epoch": 5.42, + "learning_rate": 1.944884234947408e-06, + "loss": 0.0488, + "step": 116225 + }, + { + "epoch": 5.42, + "learning_rate": 1.9441004498926215e-06, + "loss": 0.0718, + "step": 116230 + }, + { + "epoch": 5.42, + "learning_rate": 1.943316664837835e-06, + "loss": 0.0983, + "step": 116235 + }, + { + "epoch": 5.42, + "learning_rate": 1.9425328797830485e-06, + "loss": 0.0531, + "step": 116240 + }, + { + "epoch": 5.42, + "learning_rate": 1.941749094728262e-06, + "loss": 0.1097, + "step": 116245 + }, + { + "epoch": 5.42, + "learning_rate": 1.9409653096734754e-06, + "loss": 0.1823, + "step": 116250 + }, + { + "epoch": 5.42, + "learning_rate": 1.9401815246186885e-06, + "loss": 0.3177, + "step": 116255 + }, + { + "epoch": 5.42, + "learning_rate": 1.939397739563902e-06, + "loss": 0.1398, + "step": 116260 + }, + { + "epoch": 5.43, + "learning_rate": 1.9386139545091155e-06, + "loss": 0.0244, + "step": 116265 + }, + { + "epoch": 5.43, + "learning_rate": 1.937830169454329e-06, + "loss": 0.0146, + "step": 116270 + }, + { + "epoch": 5.43, + "learning_rate": 1.9370463843995424e-06, + "loss": 0.0343, + "step": 116275 + }, + { + "epoch": 5.43, + "learning_rate": 1.936262599344756e-06, + "loss": 0.0422, + "step": 116280 + }, + { + "epoch": 5.43, + "learning_rate": 1.935478814289969e-06, + "loss": 0.0462, + "step": 116285 + }, + { + "epoch": 5.43, + "learning_rate": 1.9346950292351825e-06, + "loss": 0.0945, + "step": 116290 + }, + { + "epoch": 5.43, + "learning_rate": 1.9339112441803964e-06, + "loss": 0.2791, + "step": 116295 + }, + { + "epoch": 5.43, + "learning_rate": 1.93312745912561e-06, + "loss": 0.1044, + "step": 116300 + }, + { + "epoch": 5.43, + "learning_rate": 1.932343674070823e-06, + "loss": 0.1835, + "step": 116305 + }, + { + "epoch": 5.43, + "learning_rate": 1.9315598890160364e-06, + "loss": 0.0975, + "step": 116310 + }, + { + "epoch": 5.43, + "learning_rate": 1.93077610396125e-06, + "loss": 0.0204, + "step": 116315 + }, + { + "epoch": 5.43, + "learning_rate": 1.9299923189064634e-06, + "loss": 0.0303, + "step": 116320 + }, + { + "epoch": 5.43, + "learning_rate": 1.929208533851677e-06, + "loss": 0.0164, + "step": 116325 + }, + { + "epoch": 5.43, + "learning_rate": 1.9284247487968903e-06, + "loss": 0.0614, + "step": 116330 + }, + { + "epoch": 5.43, + "learning_rate": 1.9276409637421034e-06, + "loss": 0.012, + "step": 116335 + }, + { + "epoch": 5.43, + "learning_rate": 1.926857178687317e-06, + "loss": 0.0567, + "step": 116340 + }, + { + "epoch": 5.43, + "learning_rate": 1.9260733936325303e-06, + "loss": 0.1095, + "step": 116345 + }, + { + "epoch": 5.43, + "learning_rate": 1.925289608577744e-06, + "loss": 0.2159, + "step": 116350 + }, + { + "epoch": 5.43, + "learning_rate": 1.9245058235229573e-06, + "loss": 0.3211, + "step": 116355 + }, + { + "epoch": 5.43, + "learning_rate": 1.9237220384681708e-06, + "loss": 0.1055, + "step": 116360 + }, + { + "epoch": 5.43, + "learning_rate": 1.9229382534133843e-06, + "loss": 0.017, + "step": 116365 + }, + { + "epoch": 5.43, + "learning_rate": 1.9221544683585973e-06, + "loss": 0.0635, + "step": 116370 + }, + { + "epoch": 5.43, + "learning_rate": 1.921370683303811e-06, + "loss": 0.0291, + "step": 116375 + }, + { + "epoch": 5.43, + "learning_rate": 1.9205868982490243e-06, + "loss": 0.0452, + "step": 116380 + }, + { + "epoch": 5.43, + "learning_rate": 1.9198031131942378e-06, + "loss": 0.0633, + "step": 116385 + }, + { + "epoch": 5.43, + "learning_rate": 1.9190193281394513e-06, + "loss": 0.1258, + "step": 116390 + }, + { + "epoch": 5.43, + "learning_rate": 1.9182355430846647e-06, + "loss": 0.1971, + "step": 116395 + }, + { + "epoch": 5.43, + "learning_rate": 1.917451758029878e-06, + "loss": 0.2227, + "step": 116400 + }, + { + "epoch": 5.43, + "learning_rate": 1.9166679729750913e-06, + "loss": 0.2383, + "step": 116405 + }, + { + "epoch": 5.43, + "learning_rate": 1.9158841879203048e-06, + "loss": 0.1306, + "step": 116410 + }, + { + "epoch": 5.43, + "learning_rate": 1.9151004028655182e-06, + "loss": 0.0219, + "step": 116415 + }, + { + "epoch": 5.43, + "learning_rate": 1.9143166178107317e-06, + "loss": 0.0526, + "step": 116420 + }, + { + "epoch": 5.43, + "learning_rate": 1.913532832755945e-06, + "loss": 0.0244, + "step": 116425 + }, + { + "epoch": 5.43, + "learning_rate": 1.9127490477011587e-06, + "loss": 0.0929, + "step": 116430 + }, + { + "epoch": 5.43, + "learning_rate": 1.9119652626463717e-06, + "loss": 0.0604, + "step": 116435 + }, + { + "epoch": 5.43, + "learning_rate": 1.9111814775915857e-06, + "loss": 0.0968, + "step": 116440 + }, + { + "epoch": 5.43, + "learning_rate": 1.910397692536799e-06, + "loss": 0.0793, + "step": 116445 + }, + { + "epoch": 5.43, + "learning_rate": 1.909613907482012e-06, + "loss": 0.1977, + "step": 116450 + }, + { + "epoch": 5.43, + "learning_rate": 1.9088301224272257e-06, + "loss": 0.426, + "step": 116455 + }, + { + "epoch": 5.43, + "learning_rate": 1.908046337372439e-06, + "loss": 0.0949, + "step": 116460 + }, + { + "epoch": 5.43, + "learning_rate": 1.9072625523176524e-06, + "loss": 0.0219, + "step": 116465 + }, + { + "epoch": 5.43, + "learning_rate": 1.906478767262866e-06, + "loss": 0.0201, + "step": 116470 + }, + { + "epoch": 5.43, + "learning_rate": 1.9056949822080794e-06, + "loss": 0.0717, + "step": 116475 + }, + { + "epoch": 5.44, + "learning_rate": 1.9049111971532927e-06, + "loss": 0.049, + "step": 116480 + }, + { + "epoch": 5.44, + "learning_rate": 1.9041274120985061e-06, + "loss": 0.0407, + "step": 116485 + }, + { + "epoch": 5.44, + "learning_rate": 1.9033436270437196e-06, + "loss": 0.0667, + "step": 116490 + }, + { + "epoch": 5.44, + "learning_rate": 1.9025598419889333e-06, + "loss": 0.0928, + "step": 116495 + }, + { + "epoch": 5.44, + "learning_rate": 1.9017760569341464e-06, + "loss": 0.1236, + "step": 116500 + }, + { + "epoch": 5.44, + "learning_rate": 1.90099227187936e-06, + "loss": 0.3275, + "step": 116505 + }, + { + "epoch": 5.44, + "learning_rate": 1.9002084868245736e-06, + "loss": 0.0646, + "step": 116510 + }, + { + "epoch": 5.44, + "learning_rate": 1.8994247017697866e-06, + "loss": 0.0096, + "step": 116515 + }, + { + "epoch": 5.44, + "learning_rate": 1.8986409167150003e-06, + "loss": 0.0447, + "step": 116520 + }, + { + "epoch": 5.44, + "learning_rate": 1.8978571316602138e-06, + "loss": 0.0553, + "step": 116525 + }, + { + "epoch": 5.44, + "learning_rate": 1.897073346605427e-06, + "loss": 0.0537, + "step": 116530 + }, + { + "epoch": 5.44, + "learning_rate": 1.8962895615506405e-06, + "loss": 0.0286, + "step": 116535 + }, + { + "epoch": 5.44, + "learning_rate": 1.895505776495854e-06, + "loss": 0.077, + "step": 116540 + }, + { + "epoch": 5.44, + "learning_rate": 1.8947219914410673e-06, + "loss": 0.1162, + "step": 116545 + }, + { + "epoch": 5.44, + "learning_rate": 1.8939382063862808e-06, + "loss": 0.2083, + "step": 116550 + }, + { + "epoch": 5.44, + "learning_rate": 1.8931544213314943e-06, + "loss": 0.1319, + "step": 116555 + }, + { + "epoch": 5.44, + "learning_rate": 1.8923706362767077e-06, + "loss": 0.0825, + "step": 116560 + }, + { + "epoch": 5.44, + "learning_rate": 1.891586851221921e-06, + "loss": 0.0155, + "step": 116565 + }, + { + "epoch": 5.44, + "learning_rate": 1.8908030661671345e-06, + "loss": 0.037, + "step": 116570 + }, + { + "epoch": 5.44, + "learning_rate": 1.890019281112348e-06, + "loss": 0.022, + "step": 116575 + }, + { + "epoch": 5.44, + "learning_rate": 1.8892354960575613e-06, + "loss": 0.0557, + "step": 116580 + }, + { + "epoch": 5.44, + "learning_rate": 1.8884517110027747e-06, + "loss": 0.0419, + "step": 116585 + }, + { + "epoch": 5.44, + "learning_rate": 1.8876679259479882e-06, + "loss": 0.0562, + "step": 116590 + }, + { + "epoch": 5.44, + "learning_rate": 1.8868841408932015e-06, + "loss": 0.036, + "step": 116595 + }, + { + "epoch": 5.44, + "learning_rate": 1.886100355838415e-06, + "loss": 0.2233, + "step": 116600 + }, + { + "epoch": 5.44, + "learning_rate": 1.8853165707836285e-06, + "loss": 0.3011, + "step": 116605 + }, + { + "epoch": 5.44, + "learning_rate": 1.8845327857288417e-06, + "loss": 0.0853, + "step": 116610 + }, + { + "epoch": 5.44, + "learning_rate": 1.8837490006740552e-06, + "loss": 0.0277, + "step": 116615 + }, + { + "epoch": 5.44, + "learning_rate": 1.8829652156192687e-06, + "loss": 0.0154, + "step": 116620 + }, + { + "epoch": 5.44, + "learning_rate": 1.8821814305644822e-06, + "loss": 0.0835, + "step": 116625 + }, + { + "epoch": 5.44, + "learning_rate": 1.8813976455096954e-06, + "loss": 0.097, + "step": 116630 + }, + { + "epoch": 5.44, + "learning_rate": 1.880613860454909e-06, + "loss": 0.0347, + "step": 116635 + }, + { + "epoch": 5.44, + "learning_rate": 1.8798300754001226e-06, + "loss": 0.0854, + "step": 116640 + }, + { + "epoch": 5.44, + "learning_rate": 1.8790462903453357e-06, + "loss": 0.112, + "step": 116645 + }, + { + "epoch": 5.44, + "learning_rate": 1.8782625052905494e-06, + "loss": 0.1733, + "step": 116650 + }, + { + "epoch": 5.44, + "learning_rate": 1.8774787202357628e-06, + "loss": 0.2132, + "step": 116655 + }, + { + "epoch": 5.44, + "learning_rate": 1.876694935180976e-06, + "loss": 0.1479, + "step": 116660 + }, + { + "epoch": 5.44, + "learning_rate": 1.8759111501261896e-06, + "loss": 0.0072, + "step": 116665 + }, + { + "epoch": 5.44, + "learning_rate": 1.875127365071403e-06, + "loss": 0.0174, + "step": 116670 + }, + { + "epoch": 5.44, + "learning_rate": 1.8743435800166164e-06, + "loss": 0.0421, + "step": 116675 + }, + { + "epoch": 5.44, + "learning_rate": 1.8735597949618298e-06, + "loss": 0.0389, + "step": 116680 + }, + { + "epoch": 5.44, + "learning_rate": 1.8727760099070433e-06, + "loss": 0.0436, + "step": 116685 + }, + { + "epoch": 5.44, + "learning_rate": 1.8719922248522568e-06, + "loss": 0.0991, + "step": 116690 + }, + { + "epoch": 5.45, + "learning_rate": 1.87120843979747e-06, + "loss": 0.0797, + "step": 116695 + }, + { + "epoch": 5.45, + "learning_rate": 1.8704246547426836e-06, + "loss": 0.0885, + "step": 116700 + }, + { + "epoch": 5.45, + "learning_rate": 1.869640869687897e-06, + "loss": 0.2933, + "step": 116705 + }, + { + "epoch": 5.45, + "learning_rate": 1.8688570846331103e-06, + "loss": 0.1631, + "step": 116710 + }, + { + "epoch": 5.45, + "learning_rate": 1.8680732995783238e-06, + "loss": 0.0373, + "step": 116715 + }, + { + "epoch": 5.45, + "learning_rate": 1.8672895145235373e-06, + "loss": 0.0041, + "step": 116720 + }, + { + "epoch": 5.45, + "learning_rate": 1.8665057294687505e-06, + "loss": 0.0111, + "step": 116725 + }, + { + "epoch": 5.45, + "learning_rate": 1.865721944413964e-06, + "loss": 0.0474, + "step": 116730 + }, + { + "epoch": 5.45, + "learning_rate": 1.8649381593591775e-06, + "loss": 0.0956, + "step": 116735 + }, + { + "epoch": 5.45, + "learning_rate": 1.8641543743043908e-06, + "loss": 0.0566, + "step": 116740 + }, + { + "epoch": 5.45, + "learning_rate": 1.8633705892496043e-06, + "loss": 0.1487, + "step": 116745 + }, + { + "epoch": 5.45, + "learning_rate": 1.8625868041948177e-06, + "loss": 0.1068, + "step": 116750 + }, + { + "epoch": 5.45, + "learning_rate": 1.8618030191400312e-06, + "loss": 0.2394, + "step": 116755 + }, + { + "epoch": 5.45, + "learning_rate": 1.8610192340852445e-06, + "loss": 0.0486, + "step": 116760 + }, + { + "epoch": 5.45, + "learning_rate": 1.860235449030458e-06, + "loss": 0.0609, + "step": 116765 + }, + { + "epoch": 5.45, + "learning_rate": 1.8594516639756715e-06, + "loss": 0.0223, + "step": 116770 + }, + { + "epoch": 5.45, + "learning_rate": 1.8586678789208847e-06, + "loss": 0.032, + "step": 116775 + }, + { + "epoch": 5.45, + "learning_rate": 1.8578840938660982e-06, + "loss": 0.0894, + "step": 116780 + }, + { + "epoch": 5.45, + "learning_rate": 1.857100308811312e-06, + "loss": 0.0612, + "step": 116785 + }, + { + "epoch": 5.45, + "learning_rate": 1.856316523756525e-06, + "loss": 0.0247, + "step": 116790 + }, + { + "epoch": 5.45, + "learning_rate": 1.8555327387017387e-06, + "loss": 0.15, + "step": 116795 + }, + { + "epoch": 5.45, + "learning_rate": 1.8547489536469521e-06, + "loss": 0.0956, + "step": 116800 + }, + { + "epoch": 5.45, + "learning_rate": 1.8539651685921652e-06, + "loss": 0.221, + "step": 116805 + }, + { + "epoch": 5.45, + "learning_rate": 1.8531813835373789e-06, + "loss": 0.0946, + "step": 116810 + }, + { + "epoch": 5.45, + "learning_rate": 1.8523975984825924e-06, + "loss": 0.0381, + "step": 116815 + }, + { + "epoch": 5.45, + "learning_rate": 1.8516138134278059e-06, + "loss": 0.0531, + "step": 116820 + }, + { + "epoch": 5.45, + "learning_rate": 1.8508300283730191e-06, + "loss": 0.0692, + "step": 116825 + }, + { + "epoch": 5.45, + "learning_rate": 1.8500462433182326e-06, + "loss": 0.0491, + "step": 116830 + }, + { + "epoch": 5.45, + "learning_rate": 1.849262458263446e-06, + "loss": 0.0889, + "step": 116835 + }, + { + "epoch": 5.45, + "learning_rate": 1.8484786732086594e-06, + "loss": 0.0499, + "step": 116840 + }, + { + "epoch": 5.45, + "learning_rate": 1.8476948881538728e-06, + "loss": 0.1794, + "step": 116845 + }, + { + "epoch": 5.45, + "learning_rate": 1.8469111030990863e-06, + "loss": 0.1024, + "step": 116850 + }, + { + "epoch": 5.45, + "learning_rate": 1.8461273180442996e-06, + "loss": 0.2845, + "step": 116855 + }, + { + "epoch": 5.45, + "learning_rate": 1.845343532989513e-06, + "loss": 0.1044, + "step": 116860 + }, + { + "epoch": 5.45, + "learning_rate": 1.8445597479347266e-06, + "loss": 0.037, + "step": 116865 + }, + { + "epoch": 5.45, + "learning_rate": 1.8437759628799398e-06, + "loss": 0.0458, + "step": 116870 + }, + { + "epoch": 5.45, + "learning_rate": 1.8429921778251533e-06, + "loss": 0.048, + "step": 116875 + }, + { + "epoch": 5.45, + "learning_rate": 1.8422083927703668e-06, + "loss": 0.0399, + "step": 116880 + }, + { + "epoch": 5.45, + "learning_rate": 1.8414246077155803e-06, + "loss": 0.0975, + "step": 116885 + }, + { + "epoch": 5.45, + "learning_rate": 1.8406408226607936e-06, + "loss": 0.0612, + "step": 116890 + }, + { + "epoch": 5.45, + "learning_rate": 1.839857037606007e-06, + "loss": 0.1245, + "step": 116895 + }, + { + "epoch": 5.45, + "learning_rate": 1.8390732525512205e-06, + "loss": 0.1702, + "step": 116900 + }, + { + "epoch": 5.45, + "learning_rate": 1.8382894674964338e-06, + "loss": 0.1923, + "step": 116905 + }, + { + "epoch": 5.46, + "learning_rate": 1.8375056824416473e-06, + "loss": 0.0821, + "step": 116910 + }, + { + "epoch": 5.46, + "learning_rate": 1.8367218973868608e-06, + "loss": 0.0214, + "step": 116915 + }, + { + "epoch": 5.46, + "learning_rate": 1.835938112332074e-06, + "loss": 0.0485, + "step": 116920 + }, + { + "epoch": 5.46, + "learning_rate": 1.8351543272772875e-06, + "loss": 0.0125, + "step": 116925 + }, + { + "epoch": 5.46, + "learning_rate": 1.8343705422225012e-06, + "loss": 0.0642, + "step": 116930 + }, + { + "epoch": 5.46, + "learning_rate": 1.8335867571677143e-06, + "loss": 0.0815, + "step": 116935 + }, + { + "epoch": 5.46, + "learning_rate": 1.832802972112928e-06, + "loss": 0.0769, + "step": 116940 + }, + { + "epoch": 5.46, + "learning_rate": 1.8320191870581414e-06, + "loss": 0.0938, + "step": 116945 + }, + { + "epoch": 5.46, + "learning_rate": 1.831235402003355e-06, + "loss": 0.1729, + "step": 116950 + }, + { + "epoch": 5.46, + "learning_rate": 1.8304516169485682e-06, + "loss": 0.3435, + "step": 116955 + }, + { + "epoch": 5.46, + "learning_rate": 1.8296678318937817e-06, + "loss": 0.1025, + "step": 116960 + }, + { + "epoch": 5.46, + "learning_rate": 1.8288840468389951e-06, + "loss": 0.0372, + "step": 116965 + }, + { + "epoch": 5.46, + "learning_rate": 1.8281002617842084e-06, + "loss": 0.019, + "step": 116970 + }, + { + "epoch": 5.46, + "learning_rate": 1.827316476729422e-06, + "loss": 0.0253, + "step": 116975 + }, + { + "epoch": 5.46, + "learning_rate": 1.8265326916746354e-06, + "loss": 0.0308, + "step": 116980 + }, + { + "epoch": 5.46, + "learning_rate": 1.8257489066198487e-06, + "loss": 0.0638, + "step": 116985 + }, + { + "epoch": 5.46, + "learning_rate": 1.8249651215650621e-06, + "loss": 0.123, + "step": 116990 + }, + { + "epoch": 5.46, + "learning_rate": 1.8241813365102756e-06, + "loss": 0.1014, + "step": 116995 + }, + { + "epoch": 5.46, + "learning_rate": 1.823397551455489e-06, + "loss": 0.084, + "step": 117000 + }, + { + "epoch": 5.46, + "learning_rate": 1.8226137664007024e-06, + "loss": 0.2877, + "step": 117005 + }, + { + "epoch": 5.46, + "learning_rate": 1.8218299813459159e-06, + "loss": 0.0618, + "step": 117010 + }, + { + "epoch": 5.46, + "learning_rate": 1.8210461962911293e-06, + "loss": 0.0979, + "step": 117015 + }, + { + "epoch": 5.46, + "learning_rate": 1.8202624112363426e-06, + "loss": 0.0441, + "step": 117020 + }, + { + "epoch": 5.46, + "learning_rate": 1.819478626181556e-06, + "loss": 0.0205, + "step": 117025 + }, + { + "epoch": 5.46, + "learning_rate": 1.8186948411267696e-06, + "loss": 0.046, + "step": 117030 + }, + { + "epoch": 5.46, + "learning_rate": 1.8179110560719828e-06, + "loss": 0.0575, + "step": 117035 + }, + { + "epoch": 5.46, + "learning_rate": 1.8171272710171963e-06, + "loss": 0.0406, + "step": 117040 + }, + { + "epoch": 5.46, + "learning_rate": 1.8163434859624098e-06, + "loss": 0.0418, + "step": 117045 + }, + { + "epoch": 5.46, + "learning_rate": 1.815559700907623e-06, + "loss": 0.1029, + "step": 117050 + }, + { + "epoch": 5.46, + "learning_rate": 1.8147759158528366e-06, + "loss": 0.3829, + "step": 117055 + }, + { + "epoch": 5.46, + "learning_rate": 1.81399213079805e-06, + "loss": 0.1297, + "step": 117060 + }, + { + "epoch": 5.46, + "learning_rate": 1.8132083457432637e-06, + "loss": 0.0336, + "step": 117065 + }, + { + "epoch": 5.46, + "learning_rate": 1.8124245606884768e-06, + "loss": 0.026, + "step": 117070 + }, + { + "epoch": 5.46, + "learning_rate": 1.8116407756336905e-06, + "loss": 0.0593, + "step": 117075 + }, + { + "epoch": 5.46, + "learning_rate": 1.810856990578904e-06, + "loss": 0.0672, + "step": 117080 + }, + { + "epoch": 5.46, + "learning_rate": 1.8100732055241172e-06, + "loss": 0.112, + "step": 117085 + }, + { + "epoch": 5.46, + "learning_rate": 1.8092894204693307e-06, + "loss": 0.0577, + "step": 117090 + }, + { + "epoch": 5.46, + "learning_rate": 1.8085056354145442e-06, + "loss": 0.1115, + "step": 117095 + }, + { + "epoch": 5.46, + "learning_rate": 1.8077218503597575e-06, + "loss": 0.0959, + "step": 117100 + }, + { + "epoch": 5.46, + "learning_rate": 1.806938065304971e-06, + "loss": 0.3098, + "step": 117105 + }, + { + "epoch": 5.46, + "learning_rate": 1.8061542802501844e-06, + "loss": 0.1418, + "step": 117110 + }, + { + "epoch": 5.46, + "learning_rate": 1.8053704951953977e-06, + "loss": 0.0061, + "step": 117115 + }, + { + "epoch": 5.46, + "learning_rate": 1.8045867101406112e-06, + "loss": 0.04, + "step": 117120 + }, + { + "epoch": 5.47, + "learning_rate": 1.8038029250858247e-06, + "loss": 0.0568, + "step": 117125 + }, + { + "epoch": 5.47, + "learning_rate": 1.8030191400310382e-06, + "loss": 0.0282, + "step": 117130 + }, + { + "epoch": 5.47, + "learning_rate": 1.8022353549762514e-06, + "loss": 0.0671, + "step": 117135 + }, + { + "epoch": 5.47, + "learning_rate": 1.801451569921465e-06, + "loss": 0.088, + "step": 117140 + }, + { + "epoch": 5.47, + "learning_rate": 1.8006677848666784e-06, + "loss": 0.0675, + "step": 117145 + }, + { + "epoch": 5.47, + "learning_rate": 1.7998839998118917e-06, + "loss": 0.193, + "step": 117150 + }, + { + "epoch": 5.47, + "learning_rate": 1.7991002147571051e-06, + "loss": 0.2967, + "step": 117155 + }, + { + "epoch": 5.47, + "learning_rate": 1.7983164297023186e-06, + "loss": 0.1049, + "step": 117160 + }, + { + "epoch": 5.47, + "learning_rate": 1.797532644647532e-06, + "loss": 0.0103, + "step": 117165 + }, + { + "epoch": 5.47, + "learning_rate": 1.7967488595927454e-06, + "loss": 0.0339, + "step": 117170 + }, + { + "epoch": 5.47, + "learning_rate": 1.7959650745379589e-06, + "loss": 0.0525, + "step": 117175 + }, + { + "epoch": 5.47, + "learning_rate": 1.7951812894831721e-06, + "loss": 0.0661, + "step": 117180 + }, + { + "epoch": 5.47, + "learning_rate": 1.7943975044283856e-06, + "loss": 0.0442, + "step": 117185 + }, + { + "epoch": 5.47, + "learning_rate": 1.793613719373599e-06, + "loss": 0.0885, + "step": 117190 + }, + { + "epoch": 5.47, + "learning_rate": 1.7928299343188128e-06, + "loss": 0.0766, + "step": 117195 + }, + { + "epoch": 5.47, + "learning_rate": 1.7920461492640259e-06, + "loss": 0.111, + "step": 117200 + }, + { + "epoch": 5.47, + "learning_rate": 1.7912623642092393e-06, + "loss": 0.3517, + "step": 117205 + }, + { + "epoch": 5.47, + "learning_rate": 1.790478579154453e-06, + "loss": 0.0979, + "step": 117210 + }, + { + "epoch": 5.47, + "learning_rate": 1.789694794099666e-06, + "loss": 0.0139, + "step": 117215 + }, + { + "epoch": 5.47, + "learning_rate": 1.7889110090448798e-06, + "loss": 0.0042, + "step": 117220 + }, + { + "epoch": 5.47, + "learning_rate": 1.7881272239900933e-06, + "loss": 0.0358, + "step": 117225 + }, + { + "epoch": 5.47, + "learning_rate": 1.7873434389353065e-06, + "loss": 0.0358, + "step": 117230 + }, + { + "epoch": 5.47, + "learning_rate": 1.78655965388052e-06, + "loss": 0.0799, + "step": 117235 + }, + { + "epoch": 5.47, + "learning_rate": 1.7857758688257335e-06, + "loss": 0.0432, + "step": 117240 + }, + { + "epoch": 5.47, + "learning_rate": 1.7849920837709468e-06, + "loss": 0.0769, + "step": 117245 + }, + { + "epoch": 5.47, + "learning_rate": 1.7842082987161602e-06, + "loss": 0.0906, + "step": 117250 + }, + { + "epoch": 5.47, + "learning_rate": 1.7834245136613737e-06, + "loss": 0.1895, + "step": 117255 + }, + { + "epoch": 5.47, + "learning_rate": 1.7826407286065872e-06, + "loss": 0.08, + "step": 117260 + }, + { + "epoch": 5.47, + "learning_rate": 1.7818569435518005e-06, + "loss": 0.0085, + "step": 117265 + }, + { + "epoch": 5.47, + "learning_rate": 1.781073158497014e-06, + "loss": 0.0354, + "step": 117270 + }, + { + "epoch": 5.47, + "learning_rate": 1.7802893734422274e-06, + "loss": 0.0548, + "step": 117275 + }, + { + "epoch": 5.47, + "learning_rate": 1.7795055883874407e-06, + "loss": 0.0215, + "step": 117280 + }, + { + "epoch": 5.47, + "learning_rate": 1.7787218033326542e-06, + "loss": 0.0743, + "step": 117285 + }, + { + "epoch": 5.47, + "learning_rate": 1.7779380182778677e-06, + "loss": 0.0816, + "step": 117290 + }, + { + "epoch": 5.47, + "learning_rate": 1.777154233223081e-06, + "loss": 0.1677, + "step": 117295 + }, + { + "epoch": 5.47, + "learning_rate": 1.7763704481682944e-06, + "loss": 0.1175, + "step": 117300 + }, + { + "epoch": 5.47, + "learning_rate": 1.775586663113508e-06, + "loss": 0.2947, + "step": 117305 + }, + { + "epoch": 5.47, + "learning_rate": 1.7748028780587212e-06, + "loss": 0.0923, + "step": 117310 + }, + { + "epoch": 5.47, + "learning_rate": 1.7740190930039347e-06, + "loss": 0.0125, + "step": 117315 + }, + { + "epoch": 5.47, + "learning_rate": 1.7732353079491482e-06, + "loss": 0.0309, + "step": 117320 + }, + { + "epoch": 5.47, + "learning_rate": 1.7724515228943616e-06, + "loss": 0.0627, + "step": 117325 + }, + { + "epoch": 5.47, + "learning_rate": 1.771667737839575e-06, + "loss": 0.0277, + "step": 117330 + }, + { + "epoch": 5.48, + "learning_rate": 1.7708839527847884e-06, + "loss": 0.0635, + "step": 117335 + }, + { + "epoch": 5.48, + "learning_rate": 1.770100167730002e-06, + "loss": 0.0639, + "step": 117340 + }, + { + "epoch": 5.48, + "learning_rate": 1.7693163826752151e-06, + "loss": 0.0872, + "step": 117345 + }, + { + "epoch": 5.48, + "learning_rate": 1.7685325976204286e-06, + "loss": 0.0748, + "step": 117350 + }, + { + "epoch": 5.48, + "learning_rate": 1.7677488125656423e-06, + "loss": 0.2067, + "step": 117355 + }, + { + "epoch": 5.48, + "learning_rate": 1.7669650275108554e-06, + "loss": 0.1693, + "step": 117360 + }, + { + "epoch": 5.48, + "learning_rate": 1.766181242456069e-06, + "loss": 0.0233, + "step": 117365 + }, + { + "epoch": 5.48, + "learning_rate": 1.7653974574012826e-06, + "loss": 0.02, + "step": 117370 + }, + { + "epoch": 5.48, + "learning_rate": 1.7646136723464958e-06, + "loss": 0.0089, + "step": 117375 + }, + { + "epoch": 5.48, + "learning_rate": 1.7638298872917093e-06, + "loss": 0.0462, + "step": 117380 + }, + { + "epoch": 5.48, + "learning_rate": 1.7630461022369228e-06, + "loss": 0.0992, + "step": 117385 + }, + { + "epoch": 5.48, + "learning_rate": 1.7622623171821363e-06, + "loss": 0.0926, + "step": 117390 + }, + { + "epoch": 5.48, + "learning_rate": 1.7614785321273495e-06, + "loss": 0.079, + "step": 117395 + }, + { + "epoch": 5.48, + "learning_rate": 1.760694747072563e-06, + "loss": 0.1457, + "step": 117400 + }, + { + "epoch": 5.48, + "learning_rate": 1.7599109620177765e-06, + "loss": 0.3734, + "step": 117405 + }, + { + "epoch": 5.48, + "learning_rate": 1.7591271769629898e-06, + "loss": 0.069, + "step": 117410 + }, + { + "epoch": 5.48, + "learning_rate": 1.7583433919082033e-06, + "loss": 0.0292, + "step": 117415 + }, + { + "epoch": 5.48, + "learning_rate": 1.7575596068534167e-06, + "loss": 0.0254, + "step": 117420 + }, + { + "epoch": 5.48, + "learning_rate": 1.75677582179863e-06, + "loss": 0.0181, + "step": 117425 + }, + { + "epoch": 5.48, + "learning_rate": 1.7559920367438435e-06, + "loss": 0.0486, + "step": 117430 + }, + { + "epoch": 5.48, + "learning_rate": 1.755208251689057e-06, + "loss": 0.0448, + "step": 117435 + }, + { + "epoch": 5.48, + "learning_rate": 1.7544244666342702e-06, + "loss": 0.0877, + "step": 117440 + }, + { + "epoch": 5.48, + "learning_rate": 1.7536406815794837e-06, + "loss": 0.03, + "step": 117445 + }, + { + "epoch": 5.48, + "learning_rate": 1.7528568965246972e-06, + "loss": 0.1161, + "step": 117450 + }, + { + "epoch": 5.48, + "learning_rate": 1.7520731114699107e-06, + "loss": 0.2105, + "step": 117455 + }, + { + "epoch": 5.48, + "learning_rate": 1.751289326415124e-06, + "loss": 0.1203, + "step": 117460 + }, + { + "epoch": 5.48, + "learning_rate": 1.7505055413603374e-06, + "loss": 0.0405, + "step": 117465 + }, + { + "epoch": 5.48, + "learning_rate": 1.749721756305551e-06, + "loss": 0.044, + "step": 117470 + }, + { + "epoch": 5.48, + "learning_rate": 1.7489379712507642e-06, + "loss": 0.0602, + "step": 117475 + }, + { + "epoch": 5.48, + "learning_rate": 1.7481541861959777e-06, + "loss": 0.077, + "step": 117480 + }, + { + "epoch": 5.48, + "learning_rate": 1.7473704011411914e-06, + "loss": 0.0231, + "step": 117485 + }, + { + "epoch": 5.48, + "learning_rate": 1.7465866160864044e-06, + "loss": 0.0492, + "step": 117490 + }, + { + "epoch": 5.48, + "learning_rate": 1.745802831031618e-06, + "loss": 0.0588, + "step": 117495 + }, + { + "epoch": 5.48, + "learning_rate": 1.7450190459768316e-06, + "loss": 0.084, + "step": 117500 + }, + { + "epoch": 5.48, + "learning_rate": 1.7442352609220447e-06, + "loss": 0.3295, + "step": 117505 + }, + { + "epoch": 5.48, + "learning_rate": 1.7434514758672584e-06, + "loss": 0.1009, + "step": 117510 + }, + { + "epoch": 5.48, + "learning_rate": 1.7426676908124718e-06, + "loss": 0.0079, + "step": 117515 + }, + { + "epoch": 5.48, + "learning_rate": 1.7418839057576853e-06, + "loss": 0.0757, + "step": 117520 + }, + { + "epoch": 5.48, + "learning_rate": 1.7411001207028986e-06, + "loss": 0.0527, + "step": 117525 + }, + { + "epoch": 5.48, + "learning_rate": 1.740316335648112e-06, + "loss": 0.043, + "step": 117530 + }, + { + "epoch": 5.48, + "learning_rate": 1.7395325505933256e-06, + "loss": 0.0321, + "step": 117535 + }, + { + "epoch": 5.48, + "learning_rate": 1.7387487655385388e-06, + "loss": 0.1386, + "step": 117540 + }, + { + "epoch": 5.48, + "learning_rate": 1.7379649804837523e-06, + "loss": 0.0891, + "step": 117545 + }, + { + "epoch": 5.49, + "learning_rate": 1.7371811954289658e-06, + "loss": 0.1549, + "step": 117550 + }, + { + "epoch": 5.49, + "learning_rate": 1.736397410374179e-06, + "loss": 0.317, + "step": 117555 + }, + { + "epoch": 5.49, + "learning_rate": 1.7356136253193925e-06, + "loss": 0.0684, + "step": 117560 + }, + { + "epoch": 5.49, + "learning_rate": 1.734829840264606e-06, + "loss": 0.0312, + "step": 117565 + }, + { + "epoch": 5.49, + "learning_rate": 1.7340460552098193e-06, + "loss": 0.0121, + "step": 117570 + }, + { + "epoch": 5.49, + "learning_rate": 1.7332622701550328e-06, + "loss": 0.04, + "step": 117575 + }, + { + "epoch": 5.49, + "learning_rate": 1.7324784851002463e-06, + "loss": 0.0438, + "step": 117580 + }, + { + "epoch": 5.49, + "learning_rate": 1.7316947000454597e-06, + "loss": 0.0451, + "step": 117585 + }, + { + "epoch": 5.49, + "learning_rate": 1.730910914990673e-06, + "loss": 0.0463, + "step": 117590 + }, + { + "epoch": 5.49, + "learning_rate": 1.7301271299358865e-06, + "loss": 0.0672, + "step": 117595 + }, + { + "epoch": 5.49, + "learning_rate": 1.7293433448811e-06, + "loss": 0.1381, + "step": 117600 + }, + { + "epoch": 5.49, + "learning_rate": 1.7285595598263133e-06, + "loss": 0.3179, + "step": 117605 + }, + { + "epoch": 5.49, + "learning_rate": 1.7277757747715267e-06, + "loss": 0.0999, + "step": 117610 + }, + { + "epoch": 5.49, + "learning_rate": 1.7269919897167402e-06, + "loss": 0.0055, + "step": 117615 + }, + { + "epoch": 5.49, + "learning_rate": 1.7262082046619535e-06, + "loss": 0.0165, + "step": 117620 + }, + { + "epoch": 5.49, + "learning_rate": 1.725424419607167e-06, + "loss": 0.0673, + "step": 117625 + }, + { + "epoch": 5.49, + "learning_rate": 1.7246406345523807e-06, + "loss": 0.0206, + "step": 117630 + }, + { + "epoch": 5.49, + "learning_rate": 1.7238568494975937e-06, + "loss": 0.0298, + "step": 117635 + }, + { + "epoch": 5.49, + "learning_rate": 1.7230730644428072e-06, + "loss": 0.1023, + "step": 117640 + }, + { + "epoch": 5.49, + "learning_rate": 1.722289279388021e-06, + "loss": 0.0798, + "step": 117645 + }, + { + "epoch": 5.49, + "learning_rate": 1.7215054943332344e-06, + "loss": 0.1287, + "step": 117650 + }, + { + "epoch": 5.49, + "learning_rate": 1.7207217092784477e-06, + "loss": 0.248, + "step": 117655 + }, + { + "epoch": 5.49, + "learning_rate": 1.7199379242236611e-06, + "loss": 0.0702, + "step": 117660 + }, + { + "epoch": 5.49, + "learning_rate": 1.7191541391688746e-06, + "loss": 0.0305, + "step": 117665 + }, + { + "epoch": 5.49, + "learning_rate": 1.7183703541140879e-06, + "loss": 0.0116, + "step": 117670 + }, + { + "epoch": 5.49, + "learning_rate": 1.7175865690593014e-06, + "loss": 0.0305, + "step": 117675 + }, + { + "epoch": 5.49, + "learning_rate": 1.7168027840045149e-06, + "loss": 0.0683, + "step": 117680 + }, + { + "epoch": 5.49, + "learning_rate": 1.7160189989497281e-06, + "loss": 0.0758, + "step": 117685 + }, + { + "epoch": 5.49, + "learning_rate": 1.7152352138949416e-06, + "loss": 0.0601, + "step": 117690 + }, + { + "epoch": 5.49, + "learning_rate": 1.714451428840155e-06, + "loss": 0.0574, + "step": 117695 + }, + { + "epoch": 5.49, + "learning_rate": 1.7136676437853684e-06, + "loss": 0.1063, + "step": 117700 + }, + { + "epoch": 5.49, + "learning_rate": 1.7128838587305818e-06, + "loss": 0.3697, + "step": 117705 + }, + { + "epoch": 5.49, + "learning_rate": 1.7121000736757953e-06, + "loss": 0.0764, + "step": 117710 + }, + { + "epoch": 5.49, + "learning_rate": 1.7113162886210088e-06, + "loss": 0.0051, + "step": 117715 + }, + { + "epoch": 5.49, + "learning_rate": 1.710532503566222e-06, + "loss": 0.0278, + "step": 117720 + }, + { + "epoch": 5.49, + "learning_rate": 1.7097487185114356e-06, + "loss": 0.0931, + "step": 117725 + }, + { + "epoch": 5.49, + "learning_rate": 1.708964933456649e-06, + "loss": 0.0552, + "step": 117730 + }, + { + "epoch": 5.49, + "learning_rate": 1.7081811484018623e-06, + "loss": 0.089, + "step": 117735 + }, + { + "epoch": 5.49, + "learning_rate": 1.7073973633470758e-06, + "loss": 0.0496, + "step": 117740 + }, + { + "epoch": 5.49, + "learning_rate": 1.7066135782922893e-06, + "loss": 0.1246, + "step": 117745 + }, + { + "epoch": 5.49, + "learning_rate": 1.7058297932375025e-06, + "loss": 0.1311, + "step": 117750 + }, + { + "epoch": 5.49, + "learning_rate": 1.705046008182716e-06, + "loss": 0.3968, + "step": 117755 + }, + { + "epoch": 5.49, + "learning_rate": 1.7042622231279295e-06, + "loss": 0.1013, + "step": 117760 + }, + { + "epoch": 5.5, + "learning_rate": 1.7034784380731428e-06, + "loss": 0.0043, + "step": 117765 + }, + { + "epoch": 5.5, + "learning_rate": 1.7026946530183563e-06, + "loss": 0.053, + "step": 117770 + }, + { + "epoch": 5.5, + "learning_rate": 1.70191086796357e-06, + "loss": 0.0063, + "step": 117775 + }, + { + "epoch": 5.5, + "learning_rate": 1.7011270829087834e-06, + "loss": 0.0418, + "step": 117780 + }, + { + "epoch": 5.5, + "learning_rate": 1.7003432978539965e-06, + "loss": 0.0773, + "step": 117785 + }, + { + "epoch": 5.5, + "learning_rate": 1.6995595127992102e-06, + "loss": 0.0835, + "step": 117790 + }, + { + "epoch": 5.5, + "learning_rate": 1.6987757277444237e-06, + "loss": 0.0835, + "step": 117795 + }, + { + "epoch": 5.5, + "learning_rate": 1.697991942689637e-06, + "loss": 0.1206, + "step": 117800 + }, + { + "epoch": 5.5, + "learning_rate": 1.6972081576348504e-06, + "loss": 0.2685, + "step": 117805 + }, + { + "epoch": 5.5, + "learning_rate": 1.696424372580064e-06, + "loss": 0.1391, + "step": 117810 + }, + { + "epoch": 5.5, + "learning_rate": 1.6956405875252772e-06, + "loss": 0.0375, + "step": 117815 + }, + { + "epoch": 5.5, + "learning_rate": 1.6948568024704907e-06, + "loss": 0.0352, + "step": 117820 + }, + { + "epoch": 5.5, + "learning_rate": 1.6940730174157041e-06, + "loss": 0.0619, + "step": 117825 + }, + { + "epoch": 5.5, + "learning_rate": 1.6932892323609174e-06, + "loss": 0.0863, + "step": 117830 + }, + { + "epoch": 5.5, + "learning_rate": 1.692505447306131e-06, + "loss": 0.0567, + "step": 117835 + }, + { + "epoch": 5.5, + "learning_rate": 1.6917216622513444e-06, + "loss": 0.1202, + "step": 117840 + }, + { + "epoch": 5.5, + "learning_rate": 1.6909378771965579e-06, + "loss": 0.1154, + "step": 117845 + }, + { + "epoch": 5.5, + "learning_rate": 1.6901540921417711e-06, + "loss": 0.2777, + "step": 117850 + }, + { + "epoch": 5.5, + "learning_rate": 1.6893703070869846e-06, + "loss": 0.314, + "step": 117855 + }, + { + "epoch": 5.5, + "learning_rate": 1.688586522032198e-06, + "loss": 0.0993, + "step": 117860 + }, + { + "epoch": 5.5, + "learning_rate": 1.6878027369774114e-06, + "loss": 0.0182, + "step": 117865 + }, + { + "epoch": 5.5, + "learning_rate": 1.6870189519226248e-06, + "loss": 0.0344, + "step": 117870 + }, + { + "epoch": 5.5, + "learning_rate": 1.6862351668678383e-06, + "loss": 0.076, + "step": 117875 + }, + { + "epoch": 5.5, + "learning_rate": 1.6854513818130516e-06, + "loss": 0.064, + "step": 117880 + }, + { + "epoch": 5.5, + "learning_rate": 1.684667596758265e-06, + "loss": 0.0612, + "step": 117885 + }, + { + "epoch": 5.5, + "learning_rate": 1.6838838117034786e-06, + "loss": 0.0981, + "step": 117890 + }, + { + "epoch": 5.5, + "learning_rate": 1.6831000266486918e-06, + "loss": 0.0824, + "step": 117895 + }, + { + "epoch": 5.5, + "learning_rate": 1.6823162415939053e-06, + "loss": 0.0658, + "step": 117900 + }, + { + "epoch": 5.5, + "learning_rate": 1.6815324565391188e-06, + "loss": 0.3467, + "step": 117905 + }, + { + "epoch": 5.5, + "learning_rate": 1.6807486714843325e-06, + "loss": 0.0717, + "step": 117910 + }, + { + "epoch": 5.5, + "learning_rate": 1.6799648864295456e-06, + "loss": 0.0342, + "step": 117915 + }, + { + "epoch": 5.5, + "learning_rate": 1.6791811013747592e-06, + "loss": 0.0049, + "step": 117920 + }, + { + "epoch": 5.5, + "learning_rate": 1.6783973163199727e-06, + "loss": 0.0268, + "step": 117925 + }, + { + "epoch": 5.5, + "learning_rate": 1.6776135312651858e-06, + "loss": 0.0638, + "step": 117930 + }, + { + "epoch": 5.5, + "learning_rate": 1.6768297462103995e-06, + "loss": 0.091, + "step": 117935 + }, + { + "epoch": 5.5, + "learning_rate": 1.676045961155613e-06, + "loss": 0.0688, + "step": 117940 + }, + { + "epoch": 5.5, + "learning_rate": 1.6752621761008262e-06, + "loss": 0.0867, + "step": 117945 + }, + { + "epoch": 5.5, + "learning_rate": 1.6744783910460397e-06, + "loss": 0.1746, + "step": 117950 + }, + { + "epoch": 5.5, + "learning_rate": 1.6736946059912532e-06, + "loss": 0.2293, + "step": 117955 + }, + { + "epoch": 5.5, + "learning_rate": 1.6729108209364665e-06, + "loss": 0.0802, + "step": 117960 + }, + { + "epoch": 5.5, + "learning_rate": 1.67212703588168e-06, + "loss": 0.0191, + "step": 117965 + }, + { + "epoch": 5.5, + "learning_rate": 1.6713432508268934e-06, + "loss": 0.018, + "step": 117970 + }, + { + "epoch": 5.5, + "learning_rate": 1.670559465772107e-06, + "loss": 0.0239, + "step": 117975 + }, + { + "epoch": 5.51, + "learning_rate": 1.6697756807173202e-06, + "loss": 0.0454, + "step": 117980 + }, + { + "epoch": 5.51, + "learning_rate": 1.6689918956625337e-06, + "loss": 0.0179, + "step": 117985 + }, + { + "epoch": 5.51, + "learning_rate": 1.6682081106077472e-06, + "loss": 0.0683, + "step": 117990 + }, + { + "epoch": 5.51, + "learning_rate": 1.6674243255529604e-06, + "loss": 0.1159, + "step": 117995 + }, + { + "epoch": 5.51, + "learning_rate": 1.666640540498174e-06, + "loss": 0.1475, + "step": 118000 + }, + { + "epoch": 5.51, + "learning_rate": 1.6658567554433874e-06, + "loss": 0.2457, + "step": 118005 + }, + { + "epoch": 5.51, + "learning_rate": 1.6650729703886007e-06, + "loss": 0.0946, + "step": 118010 + }, + { + "epoch": 5.51, + "learning_rate": 1.6642891853338141e-06, + "loss": 0.004, + "step": 118015 + }, + { + "epoch": 5.51, + "learning_rate": 1.6635054002790276e-06, + "loss": 0.0275, + "step": 118020 + }, + { + "epoch": 5.51, + "learning_rate": 1.6627216152242409e-06, + "loss": 0.0138, + "step": 118025 + }, + { + "epoch": 5.51, + "learning_rate": 1.6619378301694544e-06, + "loss": 0.0769, + "step": 118030 + }, + { + "epoch": 5.51, + "learning_rate": 1.6611540451146679e-06, + "loss": 0.0829, + "step": 118035 + }, + { + "epoch": 5.51, + "learning_rate": 1.6603702600598813e-06, + "loss": 0.0981, + "step": 118040 + }, + { + "epoch": 5.51, + "learning_rate": 1.6595864750050946e-06, + "loss": 0.1702, + "step": 118045 + }, + { + "epoch": 5.51, + "learning_rate": 1.658802689950308e-06, + "loss": 0.0893, + "step": 118050 + }, + { + "epoch": 5.51, + "learning_rate": 1.6580189048955218e-06, + "loss": 0.2586, + "step": 118055 + }, + { + "epoch": 5.51, + "learning_rate": 1.6572351198407348e-06, + "loss": 0.1072, + "step": 118060 + }, + { + "epoch": 5.51, + "learning_rate": 1.6564513347859485e-06, + "loss": 0.0348, + "step": 118065 + }, + { + "epoch": 5.51, + "learning_rate": 1.655667549731162e-06, + "loss": 0.0639, + "step": 118070 + }, + { + "epoch": 5.51, + "learning_rate": 1.654883764676375e-06, + "loss": 0.0479, + "step": 118075 + }, + { + "epoch": 5.51, + "learning_rate": 1.6540999796215888e-06, + "loss": 0.0849, + "step": 118080 + }, + { + "epoch": 5.51, + "learning_rate": 1.6533161945668023e-06, + "loss": 0.0619, + "step": 118085 + }, + { + "epoch": 5.51, + "learning_rate": 1.6525324095120155e-06, + "loss": 0.0591, + "step": 118090 + }, + { + "epoch": 5.51, + "learning_rate": 1.651748624457229e-06, + "loss": 0.0603, + "step": 118095 + }, + { + "epoch": 5.51, + "learning_rate": 1.6509648394024425e-06, + "loss": 0.2684, + "step": 118100 + }, + { + "epoch": 5.51, + "learning_rate": 1.650181054347656e-06, + "loss": 0.2614, + "step": 118105 + }, + { + "epoch": 5.51, + "learning_rate": 1.6493972692928692e-06, + "loss": 0.1064, + "step": 118110 + }, + { + "epoch": 5.51, + "learning_rate": 1.6486134842380827e-06, + "loss": 0.0228, + "step": 118115 + }, + { + "epoch": 5.51, + "learning_rate": 1.6478296991832962e-06, + "loss": 0.0179, + "step": 118120 + }, + { + "epoch": 5.51, + "learning_rate": 1.6470459141285095e-06, + "loss": 0.0282, + "step": 118125 + }, + { + "epoch": 5.51, + "learning_rate": 1.646262129073723e-06, + "loss": 0.0687, + "step": 118130 + }, + { + "epoch": 5.51, + "learning_rate": 1.6454783440189364e-06, + "loss": 0.0428, + "step": 118135 + }, + { + "epoch": 5.51, + "learning_rate": 1.6446945589641497e-06, + "loss": 0.1387, + "step": 118140 + }, + { + "epoch": 5.51, + "learning_rate": 1.6439107739093632e-06, + "loss": 0.0604, + "step": 118145 + }, + { + "epoch": 5.51, + "learning_rate": 1.6431269888545767e-06, + "loss": 0.1279, + "step": 118150 + }, + { + "epoch": 5.51, + "learning_rate": 1.64234320379979e-06, + "loss": 0.4142, + "step": 118155 + }, + { + "epoch": 5.51, + "learning_rate": 1.6415594187450034e-06, + "loss": 0.1169, + "step": 118160 + }, + { + "epoch": 5.51, + "learning_rate": 1.640775633690217e-06, + "loss": 0.0054, + "step": 118165 + }, + { + "epoch": 5.51, + "learning_rate": 1.6399918486354304e-06, + "loss": 0.0393, + "step": 118170 + }, + { + "epoch": 5.51, + "learning_rate": 1.6392080635806437e-06, + "loss": 0.0182, + "step": 118175 + }, + { + "epoch": 5.51, + "learning_rate": 1.6384242785258571e-06, + "loss": 0.0511, + "step": 118180 + }, + { + "epoch": 5.51, + "learning_rate": 1.6376404934710706e-06, + "loss": 0.0713, + "step": 118185 + }, + { + "epoch": 5.51, + "learning_rate": 1.636856708416284e-06, + "loss": 0.0537, + "step": 118190 + }, + { + "epoch": 5.52, + "learning_rate": 1.6360729233614974e-06, + "loss": 0.0818, + "step": 118195 + }, + { + "epoch": 5.52, + "learning_rate": 1.635289138306711e-06, + "loss": 0.1695, + "step": 118200 + }, + { + "epoch": 5.52, + "learning_rate": 1.6345053532519241e-06, + "loss": 0.3009, + "step": 118205 + }, + { + "epoch": 5.52, + "learning_rate": 1.6337215681971378e-06, + "loss": 0.0653, + "step": 118210 + }, + { + "epoch": 5.52, + "learning_rate": 1.6329377831423513e-06, + "loss": 0.0449, + "step": 118215 + }, + { + "epoch": 5.52, + "learning_rate": 1.6321539980875644e-06, + "loss": 0.055, + "step": 118220 + }, + { + "epoch": 5.52, + "learning_rate": 1.631370213032778e-06, + "loss": 0.0595, + "step": 118225 + }, + { + "epoch": 5.52, + "learning_rate": 1.6305864279779915e-06, + "loss": 0.0548, + "step": 118230 + }, + { + "epoch": 5.52, + "learning_rate": 1.629802642923205e-06, + "loss": 0.0833, + "step": 118235 + }, + { + "epoch": 5.52, + "learning_rate": 1.6290188578684183e-06, + "loss": 0.0923, + "step": 118240 + }, + { + "epoch": 5.52, + "learning_rate": 1.6282350728136318e-06, + "loss": 0.0801, + "step": 118245 + }, + { + "epoch": 5.52, + "learning_rate": 1.6274512877588453e-06, + "loss": 0.1039, + "step": 118250 + }, + { + "epoch": 5.52, + "learning_rate": 1.6266675027040585e-06, + "loss": 0.2408, + "step": 118255 + }, + { + "epoch": 5.52, + "learning_rate": 1.625883717649272e-06, + "loss": 0.1223, + "step": 118260 + }, + { + "epoch": 5.52, + "learning_rate": 1.6250999325944855e-06, + "loss": 0.0113, + "step": 118265 + }, + { + "epoch": 5.52, + "learning_rate": 1.6243161475396988e-06, + "loss": 0.0336, + "step": 118270 + }, + { + "epoch": 5.52, + "learning_rate": 1.6235323624849123e-06, + "loss": 0.0765, + "step": 118275 + }, + { + "epoch": 5.52, + "learning_rate": 1.6227485774301257e-06, + "loss": 0.027, + "step": 118280 + }, + { + "epoch": 5.52, + "learning_rate": 1.621964792375339e-06, + "loss": 0.0104, + "step": 118285 + }, + { + "epoch": 5.52, + "learning_rate": 1.6211810073205525e-06, + "loss": 0.1367, + "step": 118290 + }, + { + "epoch": 5.52, + "learning_rate": 1.620397222265766e-06, + "loss": 0.1494, + "step": 118295 + }, + { + "epoch": 5.52, + "learning_rate": 1.6196134372109795e-06, + "loss": 0.122, + "step": 118300 + }, + { + "epoch": 5.52, + "learning_rate": 1.6188296521561927e-06, + "loss": 0.1893, + "step": 118305 + }, + { + "epoch": 5.52, + "learning_rate": 1.6180458671014062e-06, + "loss": 0.0698, + "step": 118310 + }, + { + "epoch": 5.52, + "learning_rate": 1.6172620820466197e-06, + "loss": 0.0176, + "step": 118315 + }, + { + "epoch": 5.52, + "learning_rate": 1.616478296991833e-06, + "loss": 0.0252, + "step": 118320 + }, + { + "epoch": 5.52, + "learning_rate": 1.6156945119370464e-06, + "loss": 0.0615, + "step": 118325 + }, + { + "epoch": 5.52, + "learning_rate": 1.61491072688226e-06, + "loss": 0.0267, + "step": 118330 + }, + { + "epoch": 5.52, + "learning_rate": 1.6141269418274732e-06, + "loss": 0.0748, + "step": 118335 + }, + { + "epoch": 5.52, + "learning_rate": 1.6133431567726867e-06, + "loss": 0.083, + "step": 118340 + }, + { + "epoch": 5.52, + "learning_rate": 1.6125593717179004e-06, + "loss": 0.0917, + "step": 118345 + }, + { + "epoch": 5.52, + "learning_rate": 1.6117755866631134e-06, + "loss": 0.1498, + "step": 118350 + }, + { + "epoch": 5.52, + "learning_rate": 1.6109918016083271e-06, + "loss": 0.297, + "step": 118355 + }, + { + "epoch": 5.52, + "learning_rate": 1.6102080165535406e-06, + "loss": 0.1074, + "step": 118360 + }, + { + "epoch": 5.52, + "learning_rate": 1.609424231498754e-06, + "loss": 0.0027, + "step": 118365 + }, + { + "epoch": 5.52, + "learning_rate": 1.6086404464439674e-06, + "loss": 0.0598, + "step": 118370 + }, + { + "epoch": 5.52, + "learning_rate": 1.6078566613891808e-06, + "loss": 0.0369, + "step": 118375 + }, + { + "epoch": 5.52, + "learning_rate": 1.6070728763343943e-06, + "loss": 0.106, + "step": 118380 + }, + { + "epoch": 5.52, + "learning_rate": 1.6062890912796076e-06, + "loss": 0.0611, + "step": 118385 + }, + { + "epoch": 5.52, + "learning_rate": 1.605505306224821e-06, + "loss": 0.0589, + "step": 118390 + }, + { + "epoch": 5.52, + "learning_rate": 1.6047215211700346e-06, + "loss": 0.1073, + "step": 118395 + }, + { + "epoch": 5.52, + "learning_rate": 1.6039377361152478e-06, + "loss": 0.0661, + "step": 118400 + }, + { + "epoch": 5.52, + "learning_rate": 1.6031539510604613e-06, + "loss": 0.279, + "step": 118405 + }, + { + "epoch": 5.53, + "learning_rate": 1.6023701660056748e-06, + "loss": 0.0799, + "step": 118410 + }, + { + "epoch": 5.53, + "learning_rate": 1.601586380950888e-06, + "loss": 0.0207, + "step": 118415 + }, + { + "epoch": 5.53, + "learning_rate": 1.6008025958961015e-06, + "loss": 0.0293, + "step": 118420 + }, + { + "epoch": 5.53, + "learning_rate": 1.600018810841315e-06, + "loss": 0.0249, + "step": 118425 + }, + { + "epoch": 5.53, + "learning_rate": 1.5992350257865285e-06, + "loss": 0.0597, + "step": 118430 + }, + { + "epoch": 5.53, + "learning_rate": 1.5984512407317418e-06, + "loss": 0.0234, + "step": 118435 + }, + { + "epoch": 5.53, + "learning_rate": 1.5976674556769553e-06, + "loss": 0.2071, + "step": 118440 + }, + { + "epoch": 5.53, + "learning_rate": 1.5968836706221687e-06, + "loss": 0.1068, + "step": 118445 + }, + { + "epoch": 5.53, + "learning_rate": 1.596099885567382e-06, + "loss": 0.106, + "step": 118450 + }, + { + "epoch": 5.53, + "learning_rate": 1.5953161005125955e-06, + "loss": 0.2303, + "step": 118455 + }, + { + "epoch": 5.53, + "learning_rate": 1.594532315457809e-06, + "loss": 0.0838, + "step": 118460 + }, + { + "epoch": 5.53, + "learning_rate": 1.5937485304030222e-06, + "loss": 0.036, + "step": 118465 + }, + { + "epoch": 5.53, + "learning_rate": 1.5929647453482357e-06, + "loss": 0.0363, + "step": 118470 + }, + { + "epoch": 5.53, + "learning_rate": 1.5921809602934492e-06, + "loss": 0.0271, + "step": 118475 + }, + { + "epoch": 5.53, + "learning_rate": 1.5913971752386625e-06, + "loss": 0.0693, + "step": 118480 + }, + { + "epoch": 5.53, + "learning_rate": 1.590613390183876e-06, + "loss": 0.033, + "step": 118485 + }, + { + "epoch": 5.53, + "learning_rate": 1.5898296051290897e-06, + "loss": 0.0728, + "step": 118490 + }, + { + "epoch": 5.53, + "learning_rate": 1.5890458200743031e-06, + "loss": 0.0943, + "step": 118495 + }, + { + "epoch": 5.53, + "learning_rate": 1.5882620350195164e-06, + "loss": 0.1027, + "step": 118500 + }, + { + "epoch": 5.53, + "learning_rate": 1.58747824996473e-06, + "loss": 0.2441, + "step": 118505 + }, + { + "epoch": 5.53, + "learning_rate": 1.5866944649099434e-06, + "loss": 0.0659, + "step": 118510 + }, + { + "epoch": 5.53, + "learning_rate": 1.5859106798551566e-06, + "loss": 0.0282, + "step": 118515 + }, + { + "epoch": 5.53, + "learning_rate": 1.5851268948003701e-06, + "loss": 0.035, + "step": 118520 + }, + { + "epoch": 5.53, + "learning_rate": 1.5843431097455836e-06, + "loss": 0.0712, + "step": 118525 + }, + { + "epoch": 5.53, + "learning_rate": 1.5835593246907969e-06, + "loss": 0.0301, + "step": 118530 + }, + { + "epoch": 5.53, + "learning_rate": 1.5827755396360104e-06, + "loss": 0.097, + "step": 118535 + }, + { + "epoch": 5.53, + "learning_rate": 1.5819917545812238e-06, + "loss": 0.0893, + "step": 118540 + }, + { + "epoch": 5.53, + "learning_rate": 1.5812079695264371e-06, + "loss": 0.1268, + "step": 118545 + }, + { + "epoch": 5.53, + "learning_rate": 1.5804241844716506e-06, + "loss": 0.0838, + "step": 118550 + }, + { + "epoch": 5.53, + "learning_rate": 1.579640399416864e-06, + "loss": 0.2881, + "step": 118555 + }, + { + "epoch": 5.53, + "learning_rate": 1.5788566143620776e-06, + "loss": 0.1335, + "step": 118560 + }, + { + "epoch": 5.53, + "learning_rate": 1.5780728293072908e-06, + "loss": 0.0256, + "step": 118565 + }, + { + "epoch": 5.53, + "learning_rate": 1.5772890442525043e-06, + "loss": 0.0154, + "step": 118570 + }, + { + "epoch": 5.53, + "learning_rate": 1.5765052591977178e-06, + "loss": 0.0305, + "step": 118575 + }, + { + "epoch": 5.53, + "learning_rate": 1.575721474142931e-06, + "loss": 0.0292, + "step": 118580 + }, + { + "epoch": 5.53, + "learning_rate": 1.5749376890881446e-06, + "loss": 0.0725, + "step": 118585 + }, + { + "epoch": 5.53, + "learning_rate": 1.574153904033358e-06, + "loss": 0.0743, + "step": 118590 + }, + { + "epoch": 5.53, + "learning_rate": 1.5733701189785713e-06, + "loss": 0.0789, + "step": 118595 + }, + { + "epoch": 5.53, + "learning_rate": 1.5725863339237848e-06, + "loss": 0.1487, + "step": 118600 + }, + { + "epoch": 5.53, + "learning_rate": 1.5718025488689983e-06, + "loss": 0.3297, + "step": 118605 + }, + { + "epoch": 5.53, + "learning_rate": 1.5710187638142115e-06, + "loss": 0.0704, + "step": 118610 + }, + { + "epoch": 5.53, + "learning_rate": 1.570234978759425e-06, + "loss": 0.0043, + "step": 118615 + }, + { + "epoch": 5.53, + "learning_rate": 1.5694511937046385e-06, + "loss": 0.0226, + "step": 118620 + }, + { + "epoch": 5.54, + "learning_rate": 1.5686674086498522e-06, + "loss": 0.0479, + "step": 118625 + }, + { + "epoch": 5.54, + "learning_rate": 1.5678836235950653e-06, + "loss": 0.0604, + "step": 118630 + }, + { + "epoch": 5.54, + "learning_rate": 1.567099838540279e-06, + "loss": 0.0329, + "step": 118635 + }, + { + "epoch": 5.54, + "learning_rate": 1.5663160534854924e-06, + "loss": 0.0751, + "step": 118640 + }, + { + "epoch": 5.54, + "learning_rate": 1.5655322684307057e-06, + "loss": 0.0855, + "step": 118645 + }, + { + "epoch": 5.54, + "learning_rate": 1.5647484833759192e-06, + "loss": 0.1244, + "step": 118650 + }, + { + "epoch": 5.54, + "learning_rate": 1.5639646983211327e-06, + "loss": 0.2482, + "step": 118655 + }, + { + "epoch": 5.54, + "learning_rate": 1.563180913266346e-06, + "loss": 0.1385, + "step": 118660 + }, + { + "epoch": 5.54, + "learning_rate": 1.5623971282115594e-06, + "loss": 0.0043, + "step": 118665 + }, + { + "epoch": 5.54, + "learning_rate": 1.561613343156773e-06, + "loss": 0.0313, + "step": 118670 + }, + { + "epoch": 5.54, + "learning_rate": 1.5608295581019862e-06, + "loss": 0.034, + "step": 118675 + }, + { + "epoch": 5.54, + "learning_rate": 1.5600457730471997e-06, + "loss": 0.0365, + "step": 118680 + }, + { + "epoch": 5.54, + "learning_rate": 1.5592619879924131e-06, + "loss": 0.0207, + "step": 118685 + }, + { + "epoch": 5.54, + "learning_rate": 1.5584782029376266e-06, + "loss": 0.0873, + "step": 118690 + }, + { + "epoch": 5.54, + "learning_rate": 1.5576944178828399e-06, + "loss": 0.1147, + "step": 118695 + }, + { + "epoch": 5.54, + "learning_rate": 1.5569106328280534e-06, + "loss": 0.0848, + "step": 118700 + }, + { + "epoch": 5.54, + "learning_rate": 1.5561268477732669e-06, + "loss": 0.2956, + "step": 118705 + }, + { + "epoch": 5.54, + "learning_rate": 1.5553430627184801e-06, + "loss": 0.1095, + "step": 118710 + }, + { + "epoch": 5.54, + "learning_rate": 1.5545592776636936e-06, + "loss": 0.0238, + "step": 118715 + }, + { + "epoch": 5.54, + "learning_rate": 1.553775492608907e-06, + "loss": 0.0194, + "step": 118720 + }, + { + "epoch": 5.54, + "learning_rate": 1.5529917075541204e-06, + "loss": 0.0332, + "step": 118725 + }, + { + "epoch": 5.54, + "learning_rate": 1.5522079224993338e-06, + "loss": 0.0478, + "step": 118730 + }, + { + "epoch": 5.54, + "learning_rate": 1.5514241374445473e-06, + "loss": 0.0276, + "step": 118735 + }, + { + "epoch": 5.54, + "learning_rate": 1.5506403523897606e-06, + "loss": 0.0804, + "step": 118740 + }, + { + "epoch": 5.54, + "learning_rate": 1.549856567334974e-06, + "loss": 0.1079, + "step": 118745 + }, + { + "epoch": 5.54, + "learning_rate": 1.5490727822801876e-06, + "loss": 0.1106, + "step": 118750 + }, + { + "epoch": 5.54, + "learning_rate": 1.5482889972254013e-06, + "loss": 0.2475, + "step": 118755 + }, + { + "epoch": 5.54, + "learning_rate": 1.5475052121706143e-06, + "loss": 0.1058, + "step": 118760 + }, + { + "epoch": 5.54, + "learning_rate": 1.5467214271158278e-06, + "loss": 0.0144, + "step": 118765 + }, + { + "epoch": 5.54, + "learning_rate": 1.5459376420610415e-06, + "loss": 0.0537, + "step": 118770 + }, + { + "epoch": 5.54, + "learning_rate": 1.5451538570062545e-06, + "loss": 0.0692, + "step": 118775 + }, + { + "epoch": 5.54, + "learning_rate": 1.5443700719514682e-06, + "loss": 0.0387, + "step": 118780 + }, + { + "epoch": 5.54, + "learning_rate": 1.5435862868966817e-06, + "loss": 0.0385, + "step": 118785 + }, + { + "epoch": 5.54, + "learning_rate": 1.542802501841895e-06, + "loss": 0.1174, + "step": 118790 + }, + { + "epoch": 5.54, + "learning_rate": 1.5420187167871085e-06, + "loss": 0.095, + "step": 118795 + }, + { + "epoch": 5.54, + "learning_rate": 1.541234931732322e-06, + "loss": 0.1169, + "step": 118800 + }, + { + "epoch": 5.54, + "learning_rate": 1.5404511466775352e-06, + "loss": 0.3064, + "step": 118805 + }, + { + "epoch": 5.54, + "learning_rate": 1.5396673616227487e-06, + "loss": 0.1231, + "step": 118810 + }, + { + "epoch": 5.54, + "learning_rate": 1.5388835765679622e-06, + "loss": 0.0153, + "step": 118815 + }, + { + "epoch": 5.54, + "learning_rate": 1.5380997915131757e-06, + "loss": 0.0398, + "step": 118820 + }, + { + "epoch": 5.54, + "learning_rate": 1.537316006458389e-06, + "loss": 0.0411, + "step": 118825 + }, + { + "epoch": 5.54, + "learning_rate": 1.5365322214036024e-06, + "loss": 0.0748, + "step": 118830 + }, + { + "epoch": 5.55, + "learning_rate": 1.535748436348816e-06, + "loss": 0.028, + "step": 118835 + }, + { + "epoch": 5.55, + "learning_rate": 1.5349646512940292e-06, + "loss": 0.0999, + "step": 118840 + }, + { + "epoch": 5.55, + "learning_rate": 1.5341808662392427e-06, + "loss": 0.1921, + "step": 118845 + }, + { + "epoch": 5.55, + "learning_rate": 1.5333970811844561e-06, + "loss": 0.2165, + "step": 118850 + }, + { + "epoch": 5.55, + "learning_rate": 1.5326132961296694e-06, + "loss": 0.3023, + "step": 118855 + }, + { + "epoch": 5.55, + "learning_rate": 1.531829511074883e-06, + "loss": 0.095, + "step": 118860 + }, + { + "epoch": 5.55, + "learning_rate": 1.5310457260200964e-06, + "loss": 0.0143, + "step": 118865 + }, + { + "epoch": 5.55, + "learning_rate": 1.5302619409653097e-06, + "loss": 0.0126, + "step": 118870 + }, + { + "epoch": 5.55, + "learning_rate": 1.5294781559105231e-06, + "loss": 0.0291, + "step": 118875 + }, + { + "epoch": 5.55, + "learning_rate": 1.5286943708557366e-06, + "loss": 0.0222, + "step": 118880 + }, + { + "epoch": 5.55, + "learning_rate": 1.52791058580095e-06, + "loss": 0.0692, + "step": 118885 + }, + { + "epoch": 5.55, + "learning_rate": 1.5271268007461634e-06, + "loss": 0.1077, + "step": 118890 + }, + { + "epoch": 5.55, + "learning_rate": 1.5263430156913769e-06, + "loss": 0.1884, + "step": 118895 + }, + { + "epoch": 5.55, + "learning_rate": 1.5255592306365905e-06, + "loss": 0.1587, + "step": 118900 + }, + { + "epoch": 5.55, + "learning_rate": 1.5247754455818036e-06, + "loss": 0.1827, + "step": 118905 + }, + { + "epoch": 5.55, + "learning_rate": 1.523991660527017e-06, + "loss": 0.1055, + "step": 118910 + }, + { + "epoch": 5.55, + "learning_rate": 1.5232078754722308e-06, + "loss": 0.0347, + "step": 118915 + }, + { + "epoch": 5.55, + "learning_rate": 1.5224240904174438e-06, + "loss": 0.0059, + "step": 118920 + }, + { + "epoch": 5.55, + "learning_rate": 1.5216403053626575e-06, + "loss": 0.006, + "step": 118925 + }, + { + "epoch": 5.55, + "learning_rate": 1.520856520307871e-06, + "loss": 0.0428, + "step": 118930 + }, + { + "epoch": 5.55, + "learning_rate": 1.5200727352530843e-06, + "loss": 0.0508, + "step": 118935 + }, + { + "epoch": 5.55, + "learning_rate": 1.5192889501982978e-06, + "loss": 0.0586, + "step": 118940 + }, + { + "epoch": 5.55, + "learning_rate": 1.5185051651435113e-06, + "loss": 0.0857, + "step": 118945 + }, + { + "epoch": 5.55, + "learning_rate": 1.5177213800887247e-06, + "loss": 0.1197, + "step": 118950 + }, + { + "epoch": 5.55, + "learning_rate": 1.516937595033938e-06, + "loss": 0.2449, + "step": 118955 + }, + { + "epoch": 5.55, + "learning_rate": 1.5161538099791515e-06, + "loss": 0.0527, + "step": 118960 + }, + { + "epoch": 5.55, + "learning_rate": 1.515370024924365e-06, + "loss": 0.0111, + "step": 118965 + }, + { + "epoch": 5.55, + "learning_rate": 1.5145862398695782e-06, + "loss": 0.0357, + "step": 118970 + }, + { + "epoch": 5.55, + "learning_rate": 1.5138024548147917e-06, + "loss": 0.0146, + "step": 118975 + }, + { + "epoch": 5.55, + "learning_rate": 1.5130186697600052e-06, + "loss": 0.0539, + "step": 118980 + }, + { + "epoch": 5.55, + "learning_rate": 1.5122348847052185e-06, + "loss": 0.0435, + "step": 118985 + }, + { + "epoch": 5.55, + "learning_rate": 1.511451099650432e-06, + "loss": 0.0692, + "step": 118990 + }, + { + "epoch": 5.55, + "learning_rate": 1.5106673145956454e-06, + "loss": 0.1095, + "step": 118995 + }, + { + "epoch": 5.55, + "learning_rate": 1.5098835295408587e-06, + "loss": 0.0693, + "step": 119000 + }, + { + "epoch": 5.55, + "learning_rate": 1.5090997444860722e-06, + "loss": 0.2331, + "step": 119005 + }, + { + "epoch": 5.55, + "learning_rate": 1.5083159594312857e-06, + "loss": 0.1562, + "step": 119010 + }, + { + "epoch": 5.55, + "learning_rate": 1.5075321743764992e-06, + "loss": 0.0565, + "step": 119015 + }, + { + "epoch": 5.55, + "learning_rate": 1.5067483893217124e-06, + "loss": 0.0834, + "step": 119020 + }, + { + "epoch": 5.55, + "learning_rate": 1.505964604266926e-06, + "loss": 0.027, + "step": 119025 + }, + { + "epoch": 5.55, + "learning_rate": 1.5051808192121394e-06, + "loss": 0.035, + "step": 119030 + }, + { + "epoch": 5.55, + "learning_rate": 1.5043970341573527e-06, + "loss": 0.0575, + "step": 119035 + }, + { + "epoch": 5.55, + "learning_rate": 1.5036132491025661e-06, + "loss": 0.0866, + "step": 119040 + }, + { + "epoch": 5.55, + "learning_rate": 1.5028294640477798e-06, + "loss": 0.0746, + "step": 119045 + }, + { + "epoch": 5.56, + "learning_rate": 1.5022024360039503e-06, + "loss": 0.2151, + "step": 119050 + }, + { + "epoch": 5.56, + "learning_rate": 1.5014186509491638e-06, + "loss": 0.1852, + "step": 119055 + }, + { + "epoch": 5.56, + "learning_rate": 1.5006348658943772e-06, + "loss": 0.1276, + "step": 119060 + }, + { + "epoch": 5.56, + "learning_rate": 1.4998510808395907e-06, + "loss": 0.03, + "step": 119065 + }, + { + "epoch": 5.56, + "learning_rate": 1.499067295784804e-06, + "loss": 0.0218, + "step": 119070 + }, + { + "epoch": 5.56, + "learning_rate": 1.4982835107300175e-06, + "loss": 0.041, + "step": 119075 + }, + { + "epoch": 5.56, + "learning_rate": 1.497499725675231e-06, + "loss": 0.0136, + "step": 119080 + }, + { + "epoch": 5.56, + "learning_rate": 1.4967159406204442e-06, + "loss": 0.0525, + "step": 119085 + }, + { + "epoch": 5.56, + "learning_rate": 1.4959321555656577e-06, + "loss": 0.0304, + "step": 119090 + }, + { + "epoch": 5.56, + "learning_rate": 1.4951483705108712e-06, + "loss": 0.1557, + "step": 119095 + }, + { + "epoch": 5.56, + "learning_rate": 1.4943645854560845e-06, + "loss": 0.1877, + "step": 119100 + }, + { + "epoch": 5.56, + "learning_rate": 1.493580800401298e-06, + "loss": 0.2421, + "step": 119105 + }, + { + "epoch": 5.56, + "learning_rate": 1.4927970153465116e-06, + "loss": 0.0618, + "step": 119110 + }, + { + "epoch": 5.56, + "learning_rate": 1.4920132302917247e-06, + "loss": 0.0166, + "step": 119115 + }, + { + "epoch": 5.56, + "learning_rate": 1.4912294452369384e-06, + "loss": 0.0466, + "step": 119120 + }, + { + "epoch": 5.56, + "learning_rate": 1.4904456601821519e-06, + "loss": 0.0847, + "step": 119125 + }, + { + "epoch": 5.56, + "learning_rate": 1.4896618751273654e-06, + "loss": 0.0279, + "step": 119130 + }, + { + "epoch": 5.56, + "learning_rate": 1.4888780900725786e-06, + "loss": 0.0361, + "step": 119135 + }, + { + "epoch": 5.56, + "learning_rate": 1.4880943050177921e-06, + "loss": 0.0897, + "step": 119140 + }, + { + "epoch": 5.56, + "learning_rate": 1.4873105199630056e-06, + "loss": 0.0633, + "step": 119145 + }, + { + "epoch": 5.56, + "learning_rate": 1.4865267349082189e-06, + "loss": 0.1593, + "step": 119150 + }, + { + "epoch": 5.56, + "learning_rate": 1.4857429498534324e-06, + "loss": 0.214, + "step": 119155 + }, + { + "epoch": 5.56, + "learning_rate": 1.4849591647986458e-06, + "loss": 0.0675, + "step": 119160 + }, + { + "epoch": 5.56, + "learning_rate": 1.484175379743859e-06, + "loss": 0.0281, + "step": 119165 + }, + { + "epoch": 5.56, + "learning_rate": 1.4833915946890726e-06, + "loss": 0.0379, + "step": 119170 + }, + { + "epoch": 5.56, + "learning_rate": 1.482607809634286e-06, + "loss": 0.0872, + "step": 119175 + }, + { + "epoch": 5.56, + "learning_rate": 1.4818240245794993e-06, + "loss": 0.0645, + "step": 119180 + }, + { + "epoch": 5.56, + "learning_rate": 1.4810402395247128e-06, + "loss": 0.0305, + "step": 119185 + }, + { + "epoch": 5.56, + "learning_rate": 1.4802564544699263e-06, + "loss": 0.0786, + "step": 119190 + }, + { + "epoch": 5.56, + "learning_rate": 1.4794726694151398e-06, + "loss": 0.0747, + "step": 119195 + }, + { + "epoch": 5.56, + "learning_rate": 1.478688884360353e-06, + "loss": 0.0986, + "step": 119200 + }, + { + "epoch": 5.56, + "learning_rate": 1.4779050993055665e-06, + "loss": 0.1527, + "step": 119205 + }, + { + "epoch": 5.56, + "learning_rate": 1.47712131425078e-06, + "loss": 0.0889, + "step": 119210 + }, + { + "epoch": 5.56, + "learning_rate": 1.4763375291959933e-06, + "loss": 0.0081, + "step": 119215 + }, + { + "epoch": 5.56, + "learning_rate": 1.4755537441412068e-06, + "loss": 0.0247, + "step": 119220 + }, + { + "epoch": 5.56, + "learning_rate": 1.4747699590864203e-06, + "loss": 0.0194, + "step": 119225 + }, + { + "epoch": 5.56, + "learning_rate": 1.4739861740316335e-06, + "loss": 0.0373, + "step": 119230 + }, + { + "epoch": 5.56, + "learning_rate": 1.473202388976847e-06, + "loss": 0.0995, + "step": 119235 + }, + { + "epoch": 5.56, + "learning_rate": 1.4724186039220605e-06, + "loss": 0.0479, + "step": 119240 + }, + { + "epoch": 5.56, + "learning_rate": 1.4716348188672738e-06, + "loss": 0.0799, + "step": 119245 + }, + { + "epoch": 5.56, + "learning_rate": 1.4708510338124872e-06, + "loss": 0.16, + "step": 119250 + }, + { + "epoch": 5.56, + "learning_rate": 1.470067248757701e-06, + "loss": 0.2498, + "step": 119255 + }, + { + "epoch": 5.56, + "learning_rate": 1.4692834637029144e-06, + "loss": 0.0825, + "step": 119260 + }, + { + "epoch": 5.57, + "learning_rate": 1.4684996786481277e-06, + "loss": 0.0157, + "step": 119265 + }, + { + "epoch": 5.57, + "learning_rate": 1.4677158935933412e-06, + "loss": 0.0203, + "step": 119270 + }, + { + "epoch": 5.57, + "learning_rate": 1.4669321085385547e-06, + "loss": 0.0235, + "step": 119275 + }, + { + "epoch": 5.57, + "learning_rate": 1.466148323483768e-06, + "loss": 0.0416, + "step": 119280 + }, + { + "epoch": 5.57, + "learning_rate": 1.4653645384289814e-06, + "loss": 0.0782, + "step": 119285 + }, + { + "epoch": 5.57, + "learning_rate": 1.4645807533741949e-06, + "loss": 0.1039, + "step": 119290 + }, + { + "epoch": 5.57, + "learning_rate": 1.4637969683194082e-06, + "loss": 0.0757, + "step": 119295 + }, + { + "epoch": 5.57, + "learning_rate": 1.4630131832646216e-06, + "loss": 0.1332, + "step": 119300 + }, + { + "epoch": 5.57, + "learning_rate": 1.4622293982098351e-06, + "loss": 0.3004, + "step": 119305 + }, + { + "epoch": 5.57, + "learning_rate": 1.4614456131550484e-06, + "loss": 0.0947, + "step": 119310 + }, + { + "epoch": 5.57, + "learning_rate": 1.4606618281002619e-06, + "loss": 0.0191, + "step": 119315 + }, + { + "epoch": 5.57, + "learning_rate": 1.4598780430454754e-06, + "loss": 0.0686, + "step": 119320 + }, + { + "epoch": 5.57, + "learning_rate": 1.4590942579906888e-06, + "loss": 0.0075, + "step": 119325 + }, + { + "epoch": 5.57, + "learning_rate": 1.4583104729359021e-06, + "loss": 0.0394, + "step": 119330 + }, + { + "epoch": 5.57, + "learning_rate": 1.4575266878811156e-06, + "loss": 0.0769, + "step": 119335 + }, + { + "epoch": 5.57, + "learning_rate": 1.456742902826329e-06, + "loss": 0.0328, + "step": 119340 + }, + { + "epoch": 5.57, + "learning_rate": 1.4559591177715423e-06, + "loss": 0.0869, + "step": 119345 + }, + { + "epoch": 5.57, + "learning_rate": 1.4551753327167558e-06, + "loss": 0.1473, + "step": 119350 + }, + { + "epoch": 5.57, + "learning_rate": 1.4543915476619693e-06, + "loss": 0.2843, + "step": 119355 + }, + { + "epoch": 5.57, + "learning_rate": 1.4536077626071826e-06, + "loss": 0.1204, + "step": 119360 + }, + { + "epoch": 5.57, + "learning_rate": 1.452823977552396e-06, + "loss": 0.0454, + "step": 119365 + }, + { + "epoch": 5.57, + "learning_rate": 1.4520401924976095e-06, + "loss": 0.0207, + "step": 119370 + }, + { + "epoch": 5.57, + "learning_rate": 1.4512564074428228e-06, + "loss": 0.0333, + "step": 119375 + }, + { + "epoch": 5.57, + "learning_rate": 1.4504726223880363e-06, + "loss": 0.063, + "step": 119380 + }, + { + "epoch": 5.57, + "learning_rate": 1.4496888373332498e-06, + "loss": 0.1307, + "step": 119385 + }, + { + "epoch": 5.57, + "learning_rate": 1.4489050522784635e-06, + "loss": 0.0762, + "step": 119390 + }, + { + "epoch": 5.57, + "learning_rate": 1.4481212672236765e-06, + "loss": 0.1187, + "step": 119395 + }, + { + "epoch": 5.57, + "learning_rate": 1.4473374821688902e-06, + "loss": 0.09, + "step": 119400 + }, + { + "epoch": 5.57, + "learning_rate": 1.4465536971141037e-06, + "loss": 0.2913, + "step": 119405 + }, + { + "epoch": 5.57, + "learning_rate": 1.445769912059317e-06, + "loss": 0.0975, + "step": 119410 + }, + { + "epoch": 5.57, + "learning_rate": 1.4449861270045305e-06, + "loss": 0.0567, + "step": 119415 + }, + { + "epoch": 5.57, + "learning_rate": 1.444202341949744e-06, + "loss": 0.0442, + "step": 119420 + }, + { + "epoch": 5.57, + "learning_rate": 1.4434185568949572e-06, + "loss": 0.0182, + "step": 119425 + }, + { + "epoch": 5.57, + "learning_rate": 1.4426347718401707e-06, + "loss": 0.1, + "step": 119430 + }, + { + "epoch": 5.57, + "learning_rate": 1.4418509867853842e-06, + "loss": 0.025, + "step": 119435 + }, + { + "epoch": 5.57, + "learning_rate": 1.4410672017305975e-06, + "loss": 0.0876, + "step": 119440 + }, + { + "epoch": 5.57, + "learning_rate": 1.440283416675811e-06, + "loss": 0.1207, + "step": 119445 + }, + { + "epoch": 5.57, + "learning_rate": 1.4394996316210244e-06, + "loss": 0.0996, + "step": 119450 + }, + { + "epoch": 5.57, + "learning_rate": 1.438715846566238e-06, + "loss": 0.2162, + "step": 119455 + }, + { + "epoch": 5.57, + "learning_rate": 1.4379320615114512e-06, + "loss": 0.0602, + "step": 119460 + }, + { + "epoch": 5.57, + "learning_rate": 1.4371482764566647e-06, + "loss": 0.0126, + "step": 119465 + }, + { + "epoch": 5.57, + "learning_rate": 1.4363644914018781e-06, + "loss": 0.016, + "step": 119470 + }, + { + "epoch": 5.57, + "learning_rate": 1.4355807063470914e-06, + "loss": 0.0566, + "step": 119475 + }, + { + "epoch": 5.58, + "learning_rate": 1.4347969212923049e-06, + "loss": 0.0261, + "step": 119480 + }, + { + "epoch": 5.58, + "learning_rate": 1.4340131362375184e-06, + "loss": 0.0573, + "step": 119485 + }, + { + "epoch": 5.58, + "learning_rate": 1.4332293511827316e-06, + "loss": 0.0618, + "step": 119490 + }, + { + "epoch": 5.58, + "learning_rate": 1.4324455661279451e-06, + "loss": 0.0797, + "step": 119495 + }, + { + "epoch": 5.58, + "learning_rate": 1.4316617810731586e-06, + "loss": 0.1261, + "step": 119500 + }, + { + "epoch": 5.58, + "learning_rate": 1.4308779960183719e-06, + "loss": 0.2044, + "step": 119505 + }, + { + "epoch": 5.58, + "learning_rate": 1.4300942109635854e-06, + "loss": 0.1315, + "step": 119510 + }, + { + "epoch": 5.58, + "learning_rate": 1.4293104259087988e-06, + "loss": 0.0295, + "step": 119515 + }, + { + "epoch": 5.58, + "learning_rate": 1.4285266408540125e-06, + "loss": 0.0546, + "step": 119520 + }, + { + "epoch": 5.58, + "learning_rate": 1.4277428557992256e-06, + "loss": 0.0315, + "step": 119525 + }, + { + "epoch": 5.58, + "learning_rate": 1.426959070744439e-06, + "loss": 0.0318, + "step": 119530 + }, + { + "epoch": 5.58, + "learning_rate": 1.4261752856896528e-06, + "loss": 0.0285, + "step": 119535 + }, + { + "epoch": 5.58, + "learning_rate": 1.4253915006348658e-06, + "loss": 0.0465, + "step": 119540 + }, + { + "epoch": 5.58, + "learning_rate": 1.4246077155800795e-06, + "loss": 0.0528, + "step": 119545 + }, + { + "epoch": 5.58, + "learning_rate": 1.423823930525293e-06, + "loss": 0.1867, + "step": 119550 + }, + { + "epoch": 5.58, + "learning_rate": 1.4230401454705063e-06, + "loss": 0.3447, + "step": 119555 + }, + { + "epoch": 5.58, + "learning_rate": 1.4222563604157198e-06, + "loss": 0.0937, + "step": 119560 + }, + { + "epoch": 5.58, + "learning_rate": 1.4214725753609332e-06, + "loss": 0.0172, + "step": 119565 + }, + { + "epoch": 5.58, + "learning_rate": 1.4206887903061465e-06, + "loss": 0.028, + "step": 119570 + }, + { + "epoch": 5.58, + "learning_rate": 1.41990500525136e-06, + "loss": 0.0227, + "step": 119575 + }, + { + "epoch": 5.58, + "learning_rate": 1.4191212201965735e-06, + "loss": 0.068, + "step": 119580 + }, + { + "epoch": 5.58, + "learning_rate": 1.418337435141787e-06, + "loss": 0.0724, + "step": 119585 + }, + { + "epoch": 5.58, + "learning_rate": 1.4175536500870002e-06, + "loss": 0.1201, + "step": 119590 + }, + { + "epoch": 5.58, + "learning_rate": 1.4167698650322137e-06, + "loss": 0.1346, + "step": 119595 + }, + { + "epoch": 5.58, + "learning_rate": 1.4159860799774272e-06, + "loss": 0.0971, + "step": 119600 + }, + { + "epoch": 5.58, + "learning_rate": 1.4152022949226405e-06, + "loss": 0.1889, + "step": 119605 + }, + { + "epoch": 5.58, + "learning_rate": 1.414418509867854e-06, + "loss": 0.1111, + "step": 119610 + }, + { + "epoch": 5.58, + "learning_rate": 1.4136347248130674e-06, + "loss": 0.0086, + "step": 119615 + }, + { + "epoch": 5.58, + "learning_rate": 1.4128509397582807e-06, + "loss": 0.017, + "step": 119620 + }, + { + "epoch": 5.58, + "learning_rate": 1.4120671547034942e-06, + "loss": 0.0139, + "step": 119625 + }, + { + "epoch": 5.58, + "learning_rate": 1.4112833696487077e-06, + "loss": 0.0487, + "step": 119630 + }, + { + "epoch": 5.58, + "learning_rate": 1.410499584593921e-06, + "loss": 0.0156, + "step": 119635 + }, + { + "epoch": 5.58, + "learning_rate": 1.4097157995391344e-06, + "loss": 0.0677, + "step": 119640 + }, + { + "epoch": 5.58, + "learning_rate": 1.408932014484348e-06, + "loss": 0.0629, + "step": 119645 + }, + { + "epoch": 5.58, + "learning_rate": 1.4081482294295614e-06, + "loss": 0.1281, + "step": 119650 + }, + { + "epoch": 5.58, + "learning_rate": 1.4073644443747746e-06, + "loss": 0.2522, + "step": 119655 + }, + { + "epoch": 5.58, + "learning_rate": 1.4065806593199881e-06, + "loss": 0.1002, + "step": 119660 + }, + { + "epoch": 5.58, + "learning_rate": 1.4057968742652018e-06, + "loss": 0.0027, + "step": 119665 + }, + { + "epoch": 5.58, + "learning_rate": 1.4050130892104149e-06, + "loss": 0.0327, + "step": 119670 + }, + { + "epoch": 5.58, + "learning_rate": 1.4042293041556284e-06, + "loss": 0.0328, + "step": 119675 + }, + { + "epoch": 5.58, + "learning_rate": 1.403445519100842e-06, + "loss": 0.1028, + "step": 119680 + }, + { + "epoch": 5.58, + "learning_rate": 1.4026617340460551e-06, + "loss": 0.0295, + "step": 119685 + }, + { + "epoch": 5.58, + "learning_rate": 1.4018779489912688e-06, + "loss": 0.0157, + "step": 119690 + }, + { + "epoch": 5.59, + "learning_rate": 1.4010941639364823e-06, + "loss": 0.1268, + "step": 119695 + }, + { + "epoch": 5.59, + "learning_rate": 1.4003103788816956e-06, + "loss": 0.1734, + "step": 119700 + }, + { + "epoch": 5.59, + "learning_rate": 1.399526593826909e-06, + "loss": 0.2152, + "step": 119705 + }, + { + "epoch": 5.59, + "learning_rate": 1.3987428087721225e-06, + "loss": 0.1292, + "step": 119710 + }, + { + "epoch": 5.59, + "learning_rate": 1.397959023717336e-06, + "loss": 0.0306, + "step": 119715 + }, + { + "epoch": 5.59, + "learning_rate": 1.3971752386625493e-06, + "loss": 0.0096, + "step": 119720 + }, + { + "epoch": 5.59, + "learning_rate": 1.3963914536077628e-06, + "loss": 0.0065, + "step": 119725 + }, + { + "epoch": 5.59, + "learning_rate": 1.3956076685529762e-06, + "loss": 0.0651, + "step": 119730 + }, + { + "epoch": 5.59, + "learning_rate": 1.3948238834981895e-06, + "loss": 0.0389, + "step": 119735 + }, + { + "epoch": 5.59, + "learning_rate": 1.394040098443403e-06, + "loss": 0.077, + "step": 119740 + }, + { + "epoch": 5.59, + "learning_rate": 1.3932563133886165e-06, + "loss": 0.0908, + "step": 119745 + }, + { + "epoch": 5.59, + "learning_rate": 1.3924725283338298e-06, + "loss": 0.1581, + "step": 119750 + }, + { + "epoch": 5.59, + "learning_rate": 1.3916887432790432e-06, + "loss": 0.2812, + "step": 119755 + }, + { + "epoch": 5.59, + "learning_rate": 1.3909049582242567e-06, + "loss": 0.0894, + "step": 119760 + }, + { + "epoch": 5.59, + "learning_rate": 1.39012117316947e-06, + "loss": 0.0295, + "step": 119765 + }, + { + "epoch": 5.59, + "learning_rate": 1.3893373881146835e-06, + "loss": 0.0279, + "step": 119770 + }, + { + "epoch": 5.59, + "learning_rate": 1.388553603059897e-06, + "loss": 0.008, + "step": 119775 + }, + { + "epoch": 5.59, + "learning_rate": 1.3877698180051104e-06, + "loss": 0.044, + "step": 119780 + }, + { + "epoch": 5.59, + "learning_rate": 1.3869860329503237e-06, + "loss": 0.0431, + "step": 119785 + }, + { + "epoch": 5.59, + "learning_rate": 1.3862022478955372e-06, + "loss": 0.0526, + "step": 119790 + }, + { + "epoch": 5.59, + "learning_rate": 1.3854184628407507e-06, + "loss": 0.0799, + "step": 119795 + }, + { + "epoch": 5.59, + "learning_rate": 1.384634677785964e-06, + "loss": 0.2444, + "step": 119800 + }, + { + "epoch": 5.59, + "learning_rate": 1.3838508927311774e-06, + "loss": 0.162, + "step": 119805 + }, + { + "epoch": 5.59, + "learning_rate": 1.3830671076763911e-06, + "loss": 0.0913, + "step": 119810 + }, + { + "epoch": 5.59, + "learning_rate": 1.3822833226216042e-06, + "loss": 0.0102, + "step": 119815 + }, + { + "epoch": 5.59, + "learning_rate": 1.3814995375668177e-06, + "loss": 0.0054, + "step": 119820 + }, + { + "epoch": 5.59, + "learning_rate": 1.3807157525120314e-06, + "loss": 0.0155, + "step": 119825 + }, + { + "epoch": 5.59, + "learning_rate": 1.3799319674572444e-06, + "loss": 0.0348, + "step": 119830 + }, + { + "epoch": 5.59, + "learning_rate": 1.379148182402458e-06, + "loss": 0.0631, + "step": 119835 + }, + { + "epoch": 5.59, + "learning_rate": 1.3783643973476716e-06, + "loss": 0.0692, + "step": 119840 + }, + { + "epoch": 5.59, + "learning_rate": 1.377580612292885e-06, + "loss": 0.2007, + "step": 119845 + }, + { + "epoch": 5.59, + "learning_rate": 1.3767968272380983e-06, + "loss": 0.0705, + "step": 119850 + }, + { + "epoch": 5.59, + "learning_rate": 1.3760130421833118e-06, + "loss": 0.3, + "step": 119855 + }, + { + "epoch": 5.59, + "learning_rate": 1.3752292571285253e-06, + "loss": 0.1477, + "step": 119860 + }, + { + "epoch": 5.59, + "learning_rate": 1.3744454720737386e-06, + "loss": 0.0161, + "step": 119865 + }, + { + "epoch": 5.59, + "learning_rate": 1.373661687018952e-06, + "loss": 0.0041, + "step": 119870 + }, + { + "epoch": 5.59, + "learning_rate": 1.3728779019641655e-06, + "loss": 0.0423, + "step": 119875 + }, + { + "epoch": 5.59, + "learning_rate": 1.3720941169093788e-06, + "loss": 0.0241, + "step": 119880 + }, + { + "epoch": 5.59, + "learning_rate": 1.3713103318545923e-06, + "loss": 0.0593, + "step": 119885 + }, + { + "epoch": 5.59, + "learning_rate": 1.3705265467998058e-06, + "loss": 0.088, + "step": 119890 + }, + { + "epoch": 5.59, + "learning_rate": 1.369742761745019e-06, + "loss": 0.106, + "step": 119895 + }, + { + "epoch": 5.59, + "learning_rate": 1.3689589766902325e-06, + "loss": 0.155, + "step": 119900 + }, + { + "epoch": 5.59, + "learning_rate": 1.368175191635446e-06, + "loss": 0.1626, + "step": 119905 + }, + { + "epoch": 5.6, + "learning_rate": 1.3673914065806595e-06, + "loss": 0.1062, + "step": 119910 + }, + { + "epoch": 5.6, + "learning_rate": 1.3666076215258728e-06, + "loss": 0.0165, + "step": 119915 + }, + { + "epoch": 5.6, + "learning_rate": 1.3658238364710862e-06, + "loss": 0.0473, + "step": 119920 + }, + { + "epoch": 5.6, + "learning_rate": 1.3650400514162997e-06, + "loss": 0.0463, + "step": 119925 + }, + { + "epoch": 5.6, + "learning_rate": 1.364256266361513e-06, + "loss": 0.0555, + "step": 119930 + }, + { + "epoch": 5.6, + "learning_rate": 1.3634724813067265e-06, + "loss": 0.0784, + "step": 119935 + }, + { + "epoch": 5.6, + "learning_rate": 1.36268869625194e-06, + "loss": 0.0445, + "step": 119940 + }, + { + "epoch": 5.6, + "learning_rate": 1.3619049111971532e-06, + "loss": 0.1154, + "step": 119945 + }, + { + "epoch": 5.6, + "learning_rate": 1.3611211261423667e-06, + "loss": 0.0605, + "step": 119950 + }, + { + "epoch": 5.6, + "learning_rate": 1.3603373410875804e-06, + "loss": 0.1967, + "step": 119955 + }, + { + "epoch": 5.6, + "learning_rate": 1.3595535560327935e-06, + "loss": 0.0974, + "step": 119960 + }, + { + "epoch": 5.6, + "learning_rate": 1.358769770978007e-06, + "loss": 0.0328, + "step": 119965 + }, + { + "epoch": 5.6, + "learning_rate": 1.3579859859232206e-06, + "loss": 0.032, + "step": 119970 + }, + { + "epoch": 5.6, + "learning_rate": 1.3572022008684341e-06, + "loss": 0.0314, + "step": 119975 + }, + { + "epoch": 5.6, + "learning_rate": 1.3564184158136474e-06, + "loss": 0.0306, + "step": 119980 + }, + { + "epoch": 5.6, + "learning_rate": 1.3556346307588609e-06, + "loss": 0.0422, + "step": 119985 + }, + { + "epoch": 5.6, + "learning_rate": 1.3548508457040744e-06, + "loss": 0.1199, + "step": 119990 + }, + { + "epoch": 5.6, + "learning_rate": 1.3540670606492876e-06, + "loss": 0.084, + "step": 119995 + }, + { + "epoch": 5.6, + "learning_rate": 1.3532832755945011e-06, + "loss": 0.1078, + "step": 120000 + }, + { + "epoch": 5.6, + "learning_rate": 1.3524994905397146e-06, + "loss": 0.3066, + "step": 120005 + }, + { + "epoch": 5.6, + "learning_rate": 1.3517157054849279e-06, + "loss": 0.0609, + "step": 120010 + }, + { + "epoch": 5.6, + "learning_rate": 1.3509319204301413e-06, + "loss": 0.0049, + "step": 120015 + }, + { + "epoch": 5.6, + "learning_rate": 1.3501481353753548e-06, + "loss": 0.0839, + "step": 120020 + }, + { + "epoch": 5.6, + "learning_rate": 1.349364350320568e-06, + "loss": 0.0377, + "step": 120025 + }, + { + "epoch": 5.6, + "learning_rate": 1.3485805652657816e-06, + "loss": 0.0383, + "step": 120030 + }, + { + "epoch": 5.6, + "learning_rate": 1.347796780210995e-06, + "loss": 0.1469, + "step": 120035 + }, + { + "epoch": 5.6, + "learning_rate": 1.3470129951562085e-06, + "loss": 0.089, + "step": 120040 + }, + { + "epoch": 5.6, + "learning_rate": 1.3462292101014218e-06, + "loss": 0.0979, + "step": 120045 + }, + { + "epoch": 5.6, + "learning_rate": 1.3454454250466353e-06, + "loss": 0.1833, + "step": 120050 + }, + { + "epoch": 5.6, + "learning_rate": 1.3446616399918488e-06, + "loss": 0.5054, + "step": 120055 + }, + { + "epoch": 5.6, + "learning_rate": 1.343877854937062e-06, + "loss": 0.0851, + "step": 120060 + }, + { + "epoch": 5.6, + "learning_rate": 1.3430940698822755e-06, + "loss": 0.008, + "step": 120065 + }, + { + "epoch": 5.6, + "learning_rate": 1.342310284827489e-06, + "loss": 0.041, + "step": 120070 + }, + { + "epoch": 5.6, + "learning_rate": 1.3415264997727023e-06, + "loss": 0.0614, + "step": 120075 + }, + { + "epoch": 5.6, + "learning_rate": 1.3407427147179158e-06, + "loss": 0.055, + "step": 120080 + }, + { + "epoch": 5.6, + "learning_rate": 1.3399589296631293e-06, + "loss": 0.0652, + "step": 120085 + }, + { + "epoch": 5.6, + "learning_rate": 1.3391751446083425e-06, + "loss": 0.1625, + "step": 120090 + }, + { + "epoch": 5.6, + "learning_rate": 1.338391359553556e-06, + "loss": 0.0314, + "step": 120095 + }, + { + "epoch": 5.6, + "learning_rate": 1.3376075744987697e-06, + "loss": 0.1463, + "step": 120100 + }, + { + "epoch": 5.6, + "learning_rate": 1.3368237894439832e-06, + "loss": 0.1992, + "step": 120105 + }, + { + "epoch": 5.6, + "learning_rate": 1.3360400043891962e-06, + "loss": 0.122, + "step": 120110 + }, + { + "epoch": 5.6, + "learning_rate": 1.33525621933441e-06, + "loss": 0.0266, + "step": 120115 + }, + { + "epoch": 5.6, + "learning_rate": 1.3344724342796234e-06, + "loss": 0.0131, + "step": 120120 + }, + { + "epoch": 5.61, + "learning_rate": 1.3336886492248367e-06, + "loss": 0.0326, + "step": 120125 + }, + { + "epoch": 5.61, + "learning_rate": 1.3329048641700502e-06, + "loss": 0.0605, + "step": 120130 + }, + { + "epoch": 5.61, + "learning_rate": 1.3321210791152637e-06, + "loss": 0.0591, + "step": 120135 + }, + { + "epoch": 5.61, + "learning_rate": 1.331337294060477e-06, + "loss": 0.0633, + "step": 120140 + }, + { + "epoch": 5.61, + "learning_rate": 1.3305535090056904e-06, + "loss": 0.0907, + "step": 120145 + }, + { + "epoch": 5.61, + "learning_rate": 1.3297697239509039e-06, + "loss": 0.17, + "step": 120150 + }, + { + "epoch": 5.61, + "learning_rate": 1.3289859388961172e-06, + "loss": 0.294, + "step": 120155 + }, + { + "epoch": 5.61, + "learning_rate": 1.3282021538413306e-06, + "loss": 0.1515, + "step": 120160 + }, + { + "epoch": 5.61, + "learning_rate": 1.3274183687865441e-06, + "loss": 0.0203, + "step": 120165 + }, + { + "epoch": 5.61, + "learning_rate": 1.3266345837317576e-06, + "loss": 0.0242, + "step": 120170 + }, + { + "epoch": 5.61, + "learning_rate": 1.3258507986769709e-06, + "loss": 0.0477, + "step": 120175 + }, + { + "epoch": 5.61, + "learning_rate": 1.3250670136221844e-06, + "loss": 0.0213, + "step": 120180 + }, + { + "epoch": 5.61, + "learning_rate": 1.3242832285673978e-06, + "loss": 0.0576, + "step": 120185 + }, + { + "epoch": 5.61, + "learning_rate": 1.3234994435126111e-06, + "loss": 0.0489, + "step": 120190 + }, + { + "epoch": 5.61, + "learning_rate": 1.322872415468782e-06, + "loss": 0.1421, + "step": 120195 + }, + { + "epoch": 5.61, + "learning_rate": 1.3220886304139955e-06, + "loss": 0.1762, + "step": 120200 + }, + { + "epoch": 5.61, + "learning_rate": 1.3213048453592087e-06, + "loss": 0.2435, + "step": 120205 + }, + { + "epoch": 5.61, + "learning_rate": 1.3205210603044222e-06, + "loss": 0.0665, + "step": 120210 + }, + { + "epoch": 5.61, + "learning_rate": 1.3197372752496357e-06, + "loss": 0.0316, + "step": 120215 + }, + { + "epoch": 5.61, + "learning_rate": 1.3189534901948492e-06, + "loss": 0.0284, + "step": 120220 + }, + { + "epoch": 5.61, + "learning_rate": 1.3181697051400624e-06, + "loss": 0.0504, + "step": 120225 + }, + { + "epoch": 5.61, + "learning_rate": 1.317385920085276e-06, + "loss": 0.0809, + "step": 120230 + }, + { + "epoch": 5.61, + "learning_rate": 1.3166021350304894e-06, + "loss": 0.1471, + "step": 120235 + }, + { + "epoch": 5.61, + "learning_rate": 1.3158183499757027e-06, + "loss": 0.1509, + "step": 120240 + }, + { + "epoch": 5.61, + "learning_rate": 1.3150345649209162e-06, + "loss": 0.1442, + "step": 120245 + }, + { + "epoch": 5.61, + "learning_rate": 1.3142507798661296e-06, + "loss": 0.18, + "step": 120250 + }, + { + "epoch": 5.61, + "learning_rate": 1.313466994811343e-06, + "loss": 0.318, + "step": 120255 + }, + { + "epoch": 5.61, + "learning_rate": 1.3126832097565564e-06, + "loss": 0.1029, + "step": 120260 + }, + { + "epoch": 5.61, + "learning_rate": 1.3118994247017699e-06, + "loss": 0.11, + "step": 120265 + }, + { + "epoch": 5.61, + "learning_rate": 1.3111156396469832e-06, + "loss": 0.0185, + "step": 120270 + }, + { + "epoch": 5.61, + "learning_rate": 1.3103318545921966e-06, + "loss": 0.0748, + "step": 120275 + }, + { + "epoch": 5.61, + "learning_rate": 1.3095480695374101e-06, + "loss": 0.0412, + "step": 120280 + }, + { + "epoch": 5.61, + "learning_rate": 1.3087642844826238e-06, + "loss": 0.105, + "step": 120285 + }, + { + "epoch": 5.61, + "learning_rate": 1.3079804994278369e-06, + "loss": 0.096, + "step": 120290 + }, + { + "epoch": 5.61, + "learning_rate": 1.3071967143730504e-06, + "loss": 0.093, + "step": 120295 + }, + { + "epoch": 5.61, + "learning_rate": 1.306412929318264e-06, + "loss": 0.1279, + "step": 120300 + }, + { + "epoch": 5.61, + "learning_rate": 1.3056291442634771e-06, + "loss": 0.3209, + "step": 120305 + }, + { + "epoch": 5.61, + "learning_rate": 1.3048453592086908e-06, + "loss": 0.1124, + "step": 120310 + }, + { + "epoch": 5.61, + "learning_rate": 1.3040615741539043e-06, + "loss": 0.0261, + "step": 120315 + }, + { + "epoch": 5.61, + "learning_rate": 1.3032777890991176e-06, + "loss": 0.0228, + "step": 120320 + }, + { + "epoch": 5.61, + "learning_rate": 1.302494004044331e-06, + "loss": 0.0628, + "step": 120325 + }, + { + "epoch": 5.61, + "learning_rate": 1.3017102189895445e-06, + "loss": 0.0499, + "step": 120330 + }, + { + "epoch": 5.61, + "learning_rate": 1.3009264339347578e-06, + "loss": 0.0527, + "step": 120335 + }, + { + "epoch": 5.62, + "learning_rate": 1.3001426488799713e-06, + "loss": 0.04, + "step": 120340 + }, + { + "epoch": 5.62, + "learning_rate": 1.2993588638251848e-06, + "loss": 0.1023, + "step": 120345 + }, + { + "epoch": 5.62, + "learning_rate": 1.2985750787703982e-06, + "loss": 0.0832, + "step": 120350 + }, + { + "epoch": 5.62, + "learning_rate": 1.2977912937156115e-06, + "loss": 0.3017, + "step": 120355 + }, + { + "epoch": 5.62, + "learning_rate": 1.297007508660825e-06, + "loss": 0.0362, + "step": 120360 + }, + { + "epoch": 5.62, + "learning_rate": 1.2962237236060385e-06, + "loss": 0.0038, + "step": 120365 + }, + { + "epoch": 5.62, + "learning_rate": 1.2954399385512517e-06, + "loss": 0.0184, + "step": 120370 + }, + { + "epoch": 5.62, + "learning_rate": 1.2946561534964652e-06, + "loss": 0.0201, + "step": 120375 + }, + { + "epoch": 5.62, + "learning_rate": 1.2938723684416787e-06, + "loss": 0.0382, + "step": 120380 + }, + { + "epoch": 5.62, + "learning_rate": 1.293088583386892e-06, + "loss": 0.0926, + "step": 120385 + }, + { + "epoch": 5.62, + "learning_rate": 1.2923047983321055e-06, + "loss": 0.0512, + "step": 120390 + }, + { + "epoch": 5.62, + "learning_rate": 1.291521013277319e-06, + "loss": 0.1139, + "step": 120395 + }, + { + "epoch": 5.62, + "learning_rate": 1.2907372282225322e-06, + "loss": 0.2386, + "step": 120400 + }, + { + "epoch": 5.62, + "learning_rate": 1.2899534431677457e-06, + "loss": 0.248, + "step": 120405 + }, + { + "epoch": 5.62, + "learning_rate": 1.2891696581129592e-06, + "loss": 0.1115, + "step": 120410 + }, + { + "epoch": 5.62, + "learning_rate": 1.2883858730581727e-06, + "loss": 0.0373, + "step": 120415 + }, + { + "epoch": 5.62, + "learning_rate": 1.287602088003386e-06, + "loss": 0.0516, + "step": 120420 + }, + { + "epoch": 5.62, + "learning_rate": 1.2868183029485994e-06, + "loss": 0.032, + "step": 120425 + }, + { + "epoch": 5.62, + "learning_rate": 1.286034517893813e-06, + "loss": 0.0898, + "step": 120430 + }, + { + "epoch": 5.62, + "learning_rate": 1.2852507328390262e-06, + "loss": 0.0567, + "step": 120435 + }, + { + "epoch": 5.62, + "learning_rate": 1.2844669477842396e-06, + "loss": 0.1069, + "step": 120440 + }, + { + "epoch": 5.62, + "learning_rate": 1.2836831627294533e-06, + "loss": 0.0802, + "step": 120445 + }, + { + "epoch": 5.62, + "learning_rate": 1.2828993776746664e-06, + "loss": 0.2333, + "step": 120450 + }, + { + "epoch": 5.62, + "learning_rate": 1.28211559261988e-06, + "loss": 0.4216, + "step": 120455 + }, + { + "epoch": 5.62, + "learning_rate": 1.2813318075650936e-06, + "loss": 0.074, + "step": 120460 + }, + { + "epoch": 5.62, + "learning_rate": 1.2805480225103068e-06, + "loss": 0.0049, + "step": 120465 + }, + { + "epoch": 5.62, + "learning_rate": 1.2797642374555203e-06, + "loss": 0.0235, + "step": 120470 + }, + { + "epoch": 5.62, + "learning_rate": 1.2789804524007338e-06, + "loss": 0.0246, + "step": 120475 + }, + { + "epoch": 5.62, + "learning_rate": 1.2781966673459473e-06, + "loss": 0.0251, + "step": 120480 + }, + { + "epoch": 5.62, + "learning_rate": 1.2774128822911606e-06, + "loss": 0.0651, + "step": 120485 + }, + { + "epoch": 5.62, + "learning_rate": 1.276629097236374e-06, + "loss": 0.0726, + "step": 120490 + }, + { + "epoch": 5.62, + "learning_rate": 1.2758453121815875e-06, + "loss": 0.1283, + "step": 120495 + }, + { + "epoch": 5.62, + "learning_rate": 1.2750615271268008e-06, + "loss": 0.1331, + "step": 120500 + }, + { + "epoch": 5.62, + "learning_rate": 1.2742777420720143e-06, + "loss": 0.2607, + "step": 120505 + }, + { + "epoch": 5.62, + "learning_rate": 1.2734939570172278e-06, + "loss": 0.0889, + "step": 120510 + }, + { + "epoch": 5.62, + "learning_rate": 1.272710171962441e-06, + "loss": 0.0214, + "step": 120515 + }, + { + "epoch": 5.62, + "learning_rate": 1.2719263869076545e-06, + "loss": 0.0264, + "step": 120520 + }, + { + "epoch": 5.62, + "learning_rate": 1.271142601852868e-06, + "loss": 0.0466, + "step": 120525 + }, + { + "epoch": 5.62, + "learning_rate": 1.2703588167980813e-06, + "loss": 0.0771, + "step": 120530 + }, + { + "epoch": 5.62, + "learning_rate": 1.2695750317432947e-06, + "loss": 0.0259, + "step": 120535 + }, + { + "epoch": 5.62, + "learning_rate": 1.2687912466885082e-06, + "loss": 0.0283, + "step": 120540 + }, + { + "epoch": 5.62, + "learning_rate": 1.2680074616337217e-06, + "loss": 0.0338, + "step": 120545 + }, + { + "epoch": 5.63, + "learning_rate": 1.267223676578935e-06, + "loss": 0.1226, + "step": 120550 + }, + { + "epoch": 5.63, + "learning_rate": 1.2664398915241485e-06, + "loss": 0.5776, + "step": 120555 + }, + { + "epoch": 5.63, + "learning_rate": 1.265656106469362e-06, + "loss": 0.0962, + "step": 120560 + }, + { + "epoch": 5.63, + "learning_rate": 1.2648723214145752e-06, + "loss": 0.0112, + "step": 120565 + }, + { + "epoch": 5.63, + "learning_rate": 1.2640885363597887e-06, + "loss": 0.0235, + "step": 120570 + }, + { + "epoch": 5.63, + "learning_rate": 1.2633047513050024e-06, + "loss": 0.0086, + "step": 120575 + }, + { + "epoch": 5.63, + "learning_rate": 1.2625209662502155e-06, + "loss": 0.0607, + "step": 120580 + }, + { + "epoch": 5.63, + "learning_rate": 1.261737181195429e-06, + "loss": 0.0572, + "step": 120585 + }, + { + "epoch": 5.63, + "learning_rate": 1.2609533961406426e-06, + "loss": 0.0511, + "step": 120590 + }, + { + "epoch": 5.63, + "learning_rate": 1.2601696110858557e-06, + "loss": 0.1468, + "step": 120595 + }, + { + "epoch": 5.63, + "learning_rate": 1.2593858260310694e-06, + "loss": 0.1361, + "step": 120600 + }, + { + "epoch": 5.63, + "learning_rate": 1.2586020409762829e-06, + "loss": 0.3354, + "step": 120605 + }, + { + "epoch": 5.63, + "learning_rate": 1.2578182559214963e-06, + "loss": 0.1291, + "step": 120610 + }, + { + "epoch": 5.63, + "learning_rate": 1.2570344708667096e-06, + "loss": 0.0345, + "step": 120615 + }, + { + "epoch": 5.63, + "learning_rate": 1.256250685811923e-06, + "loss": 0.0261, + "step": 120620 + }, + { + "epoch": 5.63, + "learning_rate": 1.2554669007571366e-06, + "loss": 0.0326, + "step": 120625 + }, + { + "epoch": 5.63, + "learning_rate": 1.2546831157023499e-06, + "loss": 0.0909, + "step": 120630 + }, + { + "epoch": 5.63, + "learning_rate": 1.2538993306475633e-06, + "loss": 0.0625, + "step": 120635 + }, + { + "epoch": 5.63, + "learning_rate": 1.2531155455927768e-06, + "loss": 0.1717, + "step": 120640 + }, + { + "epoch": 5.63, + "learning_rate": 1.25233176053799e-06, + "loss": 0.1105, + "step": 120645 + }, + { + "epoch": 5.63, + "learning_rate": 1.2515479754832036e-06, + "loss": 0.1085, + "step": 120650 + }, + { + "epoch": 5.63, + "learning_rate": 1.250764190428417e-06, + "loss": 0.2955, + "step": 120655 + }, + { + "epoch": 5.63, + "learning_rate": 1.2499804053736305e-06, + "loss": 0.1055, + "step": 120660 + }, + { + "epoch": 5.63, + "learning_rate": 1.2491966203188438e-06, + "loss": 0.0224, + "step": 120665 + }, + { + "epoch": 5.63, + "learning_rate": 1.2484128352640573e-06, + "loss": 0.0165, + "step": 120670 + }, + { + "epoch": 5.63, + "learning_rate": 1.2476290502092708e-06, + "loss": 0.0576, + "step": 120675 + }, + { + "epoch": 5.63, + "learning_rate": 1.2468452651544843e-06, + "loss": 0.0319, + "step": 120680 + }, + { + "epoch": 5.63, + "learning_rate": 1.2460614800996975e-06, + "loss": 0.126, + "step": 120685 + }, + { + "epoch": 5.63, + "learning_rate": 1.245277695044911e-06, + "loss": 0.0756, + "step": 120690 + }, + { + "epoch": 5.63, + "learning_rate": 1.2444939099901245e-06, + "loss": 0.0949, + "step": 120695 + }, + { + "epoch": 5.63, + "learning_rate": 1.2437101249353378e-06, + "loss": 0.083, + "step": 120700 + }, + { + "epoch": 5.63, + "learning_rate": 1.2429263398805512e-06, + "loss": 0.1792, + "step": 120705 + }, + { + "epoch": 5.63, + "learning_rate": 1.2421425548257647e-06, + "loss": 0.1065, + "step": 120710 + }, + { + "epoch": 5.63, + "learning_rate": 1.241358769770978e-06, + "loss": 0.0062, + "step": 120715 + }, + { + "epoch": 5.63, + "learning_rate": 1.2405749847161917e-06, + "loss": 0.0363, + "step": 120720 + }, + { + "epoch": 5.63, + "learning_rate": 1.239791199661405e-06, + "loss": 0.0404, + "step": 120725 + }, + { + "epoch": 5.63, + "learning_rate": 1.2390074146066182e-06, + "loss": 0.0332, + "step": 120730 + }, + { + "epoch": 5.63, + "learning_rate": 1.238223629551832e-06, + "loss": 0.0895, + "step": 120735 + }, + { + "epoch": 5.63, + "learning_rate": 1.2374398444970452e-06, + "loss": 0.0745, + "step": 120740 + }, + { + "epoch": 5.63, + "learning_rate": 1.2366560594422587e-06, + "loss": 0.0712, + "step": 120745 + }, + { + "epoch": 5.63, + "learning_rate": 1.2358722743874722e-06, + "loss": 0.0736, + "step": 120750 + }, + { + "epoch": 5.63, + "learning_rate": 1.2350884893326854e-06, + "loss": 0.3885, + "step": 120755 + }, + { + "epoch": 5.63, + "learning_rate": 1.234304704277899e-06, + "loss": 0.137, + "step": 120760 + }, + { + "epoch": 5.64, + "learning_rate": 1.2335209192231124e-06, + "loss": 0.0363, + "step": 120765 + }, + { + "epoch": 5.64, + "learning_rate": 1.2327371341683257e-06, + "loss": 0.0188, + "step": 120770 + }, + { + "epoch": 5.64, + "learning_rate": 1.2319533491135391e-06, + "loss": 0.0726, + "step": 120775 + }, + { + "epoch": 5.64, + "learning_rate": 1.2311695640587526e-06, + "loss": 0.135, + "step": 120780 + }, + { + "epoch": 5.64, + "learning_rate": 1.2303857790039661e-06, + "loss": 0.0422, + "step": 120785 + }, + { + "epoch": 5.64, + "learning_rate": 1.2296019939491794e-06, + "loss": 0.0864, + "step": 120790 + }, + { + "epoch": 5.64, + "learning_rate": 1.2288182088943929e-06, + "loss": 0.0973, + "step": 120795 + }, + { + "epoch": 5.64, + "learning_rate": 1.2280344238396063e-06, + "loss": 0.1046, + "step": 120800 + }, + { + "epoch": 5.64, + "learning_rate": 1.2272506387848198e-06, + "loss": 0.3584, + "step": 120805 + }, + { + "epoch": 5.64, + "learning_rate": 1.2264668537300333e-06, + "loss": 0.0787, + "step": 120810 + }, + { + "epoch": 5.64, + "learning_rate": 1.2256830686752466e-06, + "loss": 0.0202, + "step": 120815 + }, + { + "epoch": 5.64, + "learning_rate": 1.22489928362046e-06, + "loss": 0.0283, + "step": 120820 + }, + { + "epoch": 5.64, + "learning_rate": 1.2241154985656735e-06, + "loss": 0.1251, + "step": 120825 + }, + { + "epoch": 5.64, + "learning_rate": 1.2233317135108868e-06, + "loss": 0.0721, + "step": 120830 + }, + { + "epoch": 5.64, + "learning_rate": 1.2225479284561003e-06, + "loss": 0.1061, + "step": 120835 + }, + { + "epoch": 5.64, + "learning_rate": 1.2217641434013138e-06, + "loss": 0.1088, + "step": 120840 + }, + { + "epoch": 5.64, + "learning_rate": 1.220980358346527e-06, + "loss": 0.1043, + "step": 120845 + }, + { + "epoch": 5.64, + "learning_rate": 1.2201965732917405e-06, + "loss": 0.0747, + "step": 120850 + }, + { + "epoch": 5.64, + "learning_rate": 1.219412788236954e-06, + "loss": 0.4414, + "step": 120855 + }, + { + "epoch": 5.64, + "learning_rate": 1.2186290031821673e-06, + "loss": 0.0853, + "step": 120860 + }, + { + "epoch": 5.64, + "learning_rate": 1.217845218127381e-06, + "loss": 0.0219, + "step": 120865 + }, + { + "epoch": 5.64, + "learning_rate": 1.2170614330725942e-06, + "loss": 0.0177, + "step": 120870 + }, + { + "epoch": 5.64, + "learning_rate": 1.2162776480178077e-06, + "loss": 0.0132, + "step": 120875 + }, + { + "epoch": 5.64, + "learning_rate": 1.2154938629630212e-06, + "loss": 0.0814, + "step": 120880 + }, + { + "epoch": 5.64, + "learning_rate": 1.2147100779082345e-06, + "loss": 0.0752, + "step": 120885 + }, + { + "epoch": 5.64, + "learning_rate": 1.213926292853448e-06, + "loss": 0.104, + "step": 120890 + }, + { + "epoch": 5.64, + "learning_rate": 1.2131425077986614e-06, + "loss": 0.1472, + "step": 120895 + }, + { + "epoch": 5.64, + "learning_rate": 1.2123587227438747e-06, + "loss": 0.1699, + "step": 120900 + }, + { + "epoch": 5.64, + "learning_rate": 1.2115749376890882e-06, + "loss": 0.2289, + "step": 120905 + }, + { + "epoch": 5.64, + "learning_rate": 1.2107911526343017e-06, + "loss": 0.0767, + "step": 120910 + }, + { + "epoch": 5.64, + "learning_rate": 1.2100073675795152e-06, + "loss": 0.0158, + "step": 120915 + }, + { + "epoch": 5.64, + "learning_rate": 1.2092235825247284e-06, + "loss": 0.0363, + "step": 120920 + }, + { + "epoch": 5.64, + "learning_rate": 1.208439797469942e-06, + "loss": 0.0445, + "step": 120925 + }, + { + "epoch": 5.64, + "learning_rate": 1.2076560124151554e-06, + "loss": 0.0486, + "step": 120930 + }, + { + "epoch": 5.64, + "learning_rate": 1.2068722273603687e-06, + "loss": 0.0392, + "step": 120935 + }, + { + "epoch": 5.64, + "learning_rate": 1.2060884423055824e-06, + "loss": 0.0663, + "step": 120940 + }, + { + "epoch": 5.64, + "learning_rate": 1.2053046572507956e-06, + "loss": 0.1281, + "step": 120945 + }, + { + "epoch": 5.64, + "learning_rate": 1.2045208721960091e-06, + "loss": 0.1384, + "step": 120950 + }, + { + "epoch": 5.64, + "learning_rate": 1.2037370871412226e-06, + "loss": 0.1752, + "step": 120955 + }, + { + "epoch": 5.64, + "learning_rate": 1.2029533020864359e-06, + "loss": 0.0985, + "step": 120960 + }, + { + "epoch": 5.64, + "learning_rate": 1.2021695170316494e-06, + "loss": 0.0016, + "step": 120965 + }, + { + "epoch": 5.64, + "learning_rate": 1.2013857319768628e-06, + "loss": 0.0177, + "step": 120970 + }, + { + "epoch": 5.64, + "learning_rate": 1.200601946922076e-06, + "loss": 0.0362, + "step": 120975 + }, + { + "epoch": 5.65, + "learning_rate": 1.1998181618672896e-06, + "loss": 0.0332, + "step": 120980 + }, + { + "epoch": 5.65, + "learning_rate": 1.199034376812503e-06, + "loss": 0.0568, + "step": 120985 + }, + { + "epoch": 5.65, + "learning_rate": 1.1982505917577163e-06, + "loss": 0.1809, + "step": 120990 + }, + { + "epoch": 5.65, + "learning_rate": 1.1974668067029298e-06, + "loss": 0.0728, + "step": 120995 + }, + { + "epoch": 5.65, + "learning_rate": 1.1966830216481433e-06, + "loss": 0.1553, + "step": 121000 + }, + { + "epoch": 5.65, + "learning_rate": 1.1958992365933568e-06, + "loss": 0.3785, + "step": 121005 + }, + { + "epoch": 5.65, + "learning_rate": 1.1951154515385703e-06, + "loss": 0.1099, + "step": 121010 + }, + { + "epoch": 5.65, + "learning_rate": 1.1943316664837835e-06, + "loss": 0.0456, + "step": 121015 + }, + { + "epoch": 5.65, + "learning_rate": 1.193547881428997e-06, + "loss": 0.011, + "step": 121020 + }, + { + "epoch": 5.65, + "learning_rate": 1.1927640963742105e-06, + "loss": 0.0408, + "step": 121025 + }, + { + "epoch": 5.65, + "learning_rate": 1.1919803113194238e-06, + "loss": 0.0307, + "step": 121030 + }, + { + "epoch": 5.65, + "learning_rate": 1.1911965262646373e-06, + "loss": 0.0777, + "step": 121035 + }, + { + "epoch": 5.65, + "learning_rate": 1.1904127412098507e-06, + "loss": 0.0622, + "step": 121040 + }, + { + "epoch": 5.65, + "learning_rate": 1.1896289561550642e-06, + "loss": 0.0811, + "step": 121045 + }, + { + "epoch": 5.65, + "learning_rate": 1.1888451711002775e-06, + "loss": 0.1499, + "step": 121050 + }, + { + "epoch": 5.65, + "learning_rate": 1.188061386045491e-06, + "loss": 0.2398, + "step": 121055 + }, + { + "epoch": 5.65, + "learning_rate": 1.1872776009907045e-06, + "loss": 0.1401, + "step": 121060 + }, + { + "epoch": 5.65, + "learning_rate": 1.1864938159359177e-06, + "loss": 0.0526, + "step": 121065 + }, + { + "epoch": 5.65, + "learning_rate": 1.1857100308811314e-06, + "loss": 0.0114, + "step": 121070 + }, + { + "epoch": 5.65, + "learning_rate": 1.1849262458263447e-06, + "loss": 0.0227, + "step": 121075 + }, + { + "epoch": 5.65, + "learning_rate": 1.184142460771558e-06, + "loss": 0.0221, + "step": 121080 + }, + { + "epoch": 5.65, + "learning_rate": 1.1833586757167717e-06, + "loss": 0.0688, + "step": 121085 + }, + { + "epoch": 5.65, + "learning_rate": 1.182574890661985e-06, + "loss": 0.0591, + "step": 121090 + }, + { + "epoch": 5.65, + "learning_rate": 1.1817911056071984e-06, + "loss": 0.1032, + "step": 121095 + }, + { + "epoch": 5.65, + "learning_rate": 1.1810073205524119e-06, + "loss": 0.148, + "step": 121100 + }, + { + "epoch": 5.65, + "learning_rate": 1.1802235354976252e-06, + "loss": 0.2236, + "step": 121105 + }, + { + "epoch": 5.65, + "learning_rate": 1.1794397504428386e-06, + "loss": 0.0954, + "step": 121110 + }, + { + "epoch": 5.65, + "learning_rate": 1.1786559653880521e-06, + "loss": 0.0222, + "step": 121115 + }, + { + "epoch": 5.65, + "learning_rate": 1.1778721803332654e-06, + "loss": 0.0325, + "step": 121120 + }, + { + "epoch": 5.65, + "learning_rate": 1.1770883952784789e-06, + "loss": 0.0492, + "step": 121125 + }, + { + "epoch": 5.65, + "learning_rate": 1.1763046102236924e-06, + "loss": 0.0187, + "step": 121130 + }, + { + "epoch": 5.65, + "learning_rate": 1.1755208251689058e-06, + "loss": 0.0735, + "step": 121135 + }, + { + "epoch": 5.65, + "learning_rate": 1.1747370401141191e-06, + "loss": 0.0803, + "step": 121140 + }, + { + "epoch": 5.65, + "learning_rate": 1.1739532550593326e-06, + "loss": 0.1004, + "step": 121145 + }, + { + "epoch": 5.65, + "learning_rate": 1.173169470004546e-06, + "loss": 0.0906, + "step": 121150 + }, + { + "epoch": 5.65, + "learning_rate": 1.1723856849497596e-06, + "loss": 0.29, + "step": 121155 + }, + { + "epoch": 5.65, + "learning_rate": 1.1716018998949728e-06, + "loss": 0.0895, + "step": 121160 + }, + { + "epoch": 5.65, + "learning_rate": 1.1708181148401863e-06, + "loss": 0.0225, + "step": 121165 + }, + { + "epoch": 5.65, + "learning_rate": 1.1700343297853998e-06, + "loss": 0.0184, + "step": 121170 + }, + { + "epoch": 5.65, + "learning_rate": 1.1692505447306133e-06, + "loss": 0.0154, + "step": 121175 + }, + { + "epoch": 5.65, + "learning_rate": 1.1684667596758265e-06, + "loss": 0.0152, + "step": 121180 + }, + { + "epoch": 5.65, + "learning_rate": 1.16768297462104e-06, + "loss": 0.0993, + "step": 121185 + }, + { + "epoch": 5.65, + "learning_rate": 1.1668991895662535e-06, + "loss": 0.0812, + "step": 121190 + }, + { + "epoch": 5.66, + "learning_rate": 1.1661154045114668e-06, + "loss": 0.0662, + "step": 121195 + }, + { + "epoch": 5.66, + "learning_rate": 1.1653316194566803e-06, + "loss": 0.2011, + "step": 121200 + }, + { + "epoch": 5.66, + "learning_rate": 1.1645478344018937e-06, + "loss": 0.2143, + "step": 121205 + }, + { + "epoch": 5.66, + "learning_rate": 1.163764049347107e-06, + "loss": 0.0823, + "step": 121210 + }, + { + "epoch": 5.66, + "learning_rate": 1.1629802642923207e-06, + "loss": 0.0463, + "step": 121215 + }, + { + "epoch": 5.66, + "learning_rate": 1.162196479237534e-06, + "loss": 0.0216, + "step": 121220 + }, + { + "epoch": 5.66, + "learning_rate": 1.1614126941827473e-06, + "loss": 0.0621, + "step": 121225 + }, + { + "epoch": 5.66, + "learning_rate": 1.160628909127961e-06, + "loss": 0.051, + "step": 121230 + }, + { + "epoch": 5.66, + "learning_rate": 1.1598451240731742e-06, + "loss": 0.0302, + "step": 121235 + }, + { + "epoch": 5.66, + "learning_rate": 1.1590613390183877e-06, + "loss": 0.0577, + "step": 121240 + }, + { + "epoch": 5.66, + "learning_rate": 1.1582775539636012e-06, + "loss": 0.082, + "step": 121245 + }, + { + "epoch": 5.66, + "learning_rate": 1.1574937689088145e-06, + "loss": 0.109, + "step": 121250 + }, + { + "epoch": 5.66, + "learning_rate": 1.156709983854028e-06, + "loss": 0.282, + "step": 121255 + }, + { + "epoch": 5.66, + "learning_rate": 1.1559261987992414e-06, + "loss": 0.1198, + "step": 121260 + }, + { + "epoch": 5.66, + "learning_rate": 1.155142413744455e-06, + "loss": 0.0418, + "step": 121265 + }, + { + "epoch": 5.66, + "learning_rate": 1.1543586286896682e-06, + "loss": 0.0342, + "step": 121270 + }, + { + "epoch": 5.66, + "learning_rate": 1.1535748436348817e-06, + "loss": 0.0579, + "step": 121275 + }, + { + "epoch": 5.66, + "learning_rate": 1.1527910585800951e-06, + "loss": 0.0589, + "step": 121280 + }, + { + "epoch": 5.66, + "learning_rate": 1.1520072735253084e-06, + "loss": 0.0644, + "step": 121285 + }, + { + "epoch": 5.66, + "learning_rate": 1.1512234884705219e-06, + "loss": 0.0406, + "step": 121290 + }, + { + "epoch": 5.66, + "learning_rate": 1.1504397034157354e-06, + "loss": 0.0502, + "step": 121295 + }, + { + "epoch": 5.66, + "learning_rate": 1.1496559183609489e-06, + "loss": 0.1221, + "step": 121300 + }, + { + "epoch": 5.66, + "learning_rate": 1.1488721333061623e-06, + "loss": 0.229, + "step": 121305 + }, + { + "epoch": 5.66, + "learning_rate": 1.1480883482513756e-06, + "loss": 0.0949, + "step": 121310 + }, + { + "epoch": 5.66, + "learning_rate": 1.147304563196589e-06, + "loss": 0.0142, + "step": 121315 + }, + { + "epoch": 5.66, + "learning_rate": 1.1465207781418026e-06, + "loss": 0.0377, + "step": 121320 + }, + { + "epoch": 5.66, + "learning_rate": 1.1457369930870158e-06, + "loss": 0.035, + "step": 121325 + }, + { + "epoch": 5.66, + "learning_rate": 1.1449532080322293e-06, + "loss": 0.0959, + "step": 121330 + }, + { + "epoch": 5.66, + "learning_rate": 1.1441694229774428e-06, + "loss": 0.0656, + "step": 121335 + }, + { + "epoch": 5.66, + "learning_rate": 1.143385637922656e-06, + "loss": 0.086, + "step": 121340 + }, + { + "epoch": 5.66, + "learning_rate": 1.1426018528678696e-06, + "loss": 0.1377, + "step": 121345 + }, + { + "epoch": 5.66, + "learning_rate": 1.141818067813083e-06, + "loss": 0.0912, + "step": 121350 + }, + { + "epoch": 5.66, + "learning_rate": 1.1410342827582963e-06, + "loss": 0.2221, + "step": 121355 + }, + { + "epoch": 5.66, + "learning_rate": 1.14025049770351e-06, + "loss": 0.0791, + "step": 121360 + }, + { + "epoch": 5.66, + "learning_rate": 1.1394667126487233e-06, + "loss": 0.004, + "step": 121365 + }, + { + "epoch": 5.66, + "learning_rate": 1.1386829275939368e-06, + "loss": 0.0119, + "step": 121370 + }, + { + "epoch": 5.66, + "learning_rate": 1.1378991425391502e-06, + "loss": 0.0606, + "step": 121375 + }, + { + "epoch": 5.66, + "learning_rate": 1.1371153574843635e-06, + "loss": 0.0763, + "step": 121380 + }, + { + "epoch": 5.66, + "learning_rate": 1.136331572429577e-06, + "loss": 0.0663, + "step": 121385 + }, + { + "epoch": 5.66, + "learning_rate": 1.1355477873747905e-06, + "loss": 0.0433, + "step": 121390 + }, + { + "epoch": 5.66, + "learning_rate": 1.134764002320004e-06, + "loss": 0.1626, + "step": 121395 + }, + { + "epoch": 5.66, + "learning_rate": 1.1339802172652172e-06, + "loss": 0.1627, + "step": 121400 + }, + { + "epoch": 5.66, + "learning_rate": 1.1331964322104307e-06, + "loss": 0.2007, + "step": 121405 + }, + { + "epoch": 5.67, + "learning_rate": 1.1324126471556442e-06, + "loss": 0.123, + "step": 121410 + }, + { + "epoch": 5.67, + "learning_rate": 1.1316288621008575e-06, + "loss": 0.0476, + "step": 121415 + }, + { + "epoch": 5.67, + "learning_rate": 1.130845077046071e-06, + "loss": 0.0418, + "step": 121420 + }, + { + "epoch": 5.67, + "learning_rate": 1.1300612919912844e-06, + "loss": 0.1087, + "step": 121425 + }, + { + "epoch": 5.67, + "learning_rate": 1.1292775069364977e-06, + "loss": 0.0844, + "step": 121430 + }, + { + "epoch": 5.67, + "learning_rate": 1.1284937218817114e-06, + "loss": 0.0733, + "step": 121435 + }, + { + "epoch": 5.67, + "learning_rate": 1.1277099368269247e-06, + "loss": 0.0434, + "step": 121440 + }, + { + "epoch": 5.67, + "learning_rate": 1.1269261517721381e-06, + "loss": 0.1135, + "step": 121445 + }, + { + "epoch": 5.67, + "learning_rate": 1.1261423667173516e-06, + "loss": 0.1213, + "step": 121450 + }, + { + "epoch": 5.67, + "learning_rate": 1.125358581662565e-06, + "loss": 0.2247, + "step": 121455 + }, + { + "epoch": 5.67, + "learning_rate": 1.1245747966077784e-06, + "loss": 0.102, + "step": 121460 + }, + { + "epoch": 5.67, + "learning_rate": 1.1237910115529919e-06, + "loss": 0.0244, + "step": 121465 + }, + { + "epoch": 5.67, + "learning_rate": 1.1230072264982051e-06, + "loss": 0.0206, + "step": 121470 + }, + { + "epoch": 5.67, + "learning_rate": 1.1222234414434186e-06, + "loss": 0.0432, + "step": 121475 + }, + { + "epoch": 5.67, + "learning_rate": 1.121439656388632e-06, + "loss": 0.0421, + "step": 121480 + }, + { + "epoch": 5.67, + "learning_rate": 1.1206558713338454e-06, + "loss": 0.0593, + "step": 121485 + }, + { + "epoch": 5.67, + "learning_rate": 1.1198720862790588e-06, + "loss": 0.0967, + "step": 121490 + }, + { + "epoch": 5.67, + "learning_rate": 1.1190883012242723e-06, + "loss": 0.0939, + "step": 121495 + }, + { + "epoch": 5.67, + "learning_rate": 1.1183045161694858e-06, + "loss": 0.1578, + "step": 121500 + }, + { + "epoch": 5.67, + "learning_rate": 1.1175207311146993e-06, + "loss": 0.2754, + "step": 121505 + }, + { + "epoch": 5.67, + "learning_rate": 1.1167369460599126e-06, + "loss": 0.1108, + "step": 121510 + }, + { + "epoch": 5.67, + "learning_rate": 1.115953161005126e-06, + "loss": 0.0037, + "step": 121515 + }, + { + "epoch": 5.67, + "learning_rate": 1.1151693759503395e-06, + "loss": 0.0454, + "step": 121520 + }, + { + "epoch": 5.67, + "learning_rate": 1.114385590895553e-06, + "loss": 0.0473, + "step": 121525 + }, + { + "epoch": 5.67, + "learning_rate": 1.1136018058407663e-06, + "loss": 0.0657, + "step": 121530 + }, + { + "epoch": 5.67, + "learning_rate": 1.1128180207859798e-06, + "loss": 0.0888, + "step": 121535 + }, + { + "epoch": 5.67, + "learning_rate": 1.1120342357311932e-06, + "loss": 0.0498, + "step": 121540 + }, + { + "epoch": 5.67, + "learning_rate": 1.1112504506764065e-06, + "loss": 0.0877, + "step": 121545 + }, + { + "epoch": 5.67, + "learning_rate": 1.11046666562162e-06, + "loss": 0.1933, + "step": 121550 + }, + { + "epoch": 5.67, + "learning_rate": 1.1096828805668335e-06, + "loss": 0.1975, + "step": 121555 + }, + { + "epoch": 5.67, + "learning_rate": 1.1088990955120468e-06, + "loss": 0.0875, + "step": 121560 + }, + { + "epoch": 5.67, + "learning_rate": 1.1081153104572604e-06, + "loss": 0.0093, + "step": 121565 + }, + { + "epoch": 5.67, + "learning_rate": 1.1073315254024737e-06, + "loss": 0.0283, + "step": 121570 + }, + { + "epoch": 5.67, + "learning_rate": 1.106547740347687e-06, + "loss": 0.0821, + "step": 121575 + }, + { + "epoch": 5.67, + "learning_rate": 1.1057639552929007e-06, + "loss": 0.036, + "step": 121580 + }, + { + "epoch": 5.67, + "learning_rate": 1.104980170238114e-06, + "loss": 0.0735, + "step": 121585 + }, + { + "epoch": 5.67, + "learning_rate": 1.1041963851833274e-06, + "loss": 0.1129, + "step": 121590 + }, + { + "epoch": 5.67, + "learning_rate": 1.103412600128541e-06, + "loss": 0.067, + "step": 121595 + }, + { + "epoch": 5.67, + "learning_rate": 1.1026288150737542e-06, + "loss": 0.1754, + "step": 121600 + }, + { + "epoch": 5.67, + "learning_rate": 1.1018450300189677e-06, + "loss": 0.2668, + "step": 121605 + }, + { + "epoch": 5.67, + "learning_rate": 1.1010612449641812e-06, + "loss": 0.1055, + "step": 121610 + }, + { + "epoch": 5.67, + "learning_rate": 1.1002774599093944e-06, + "loss": 0.0407, + "step": 121615 + }, + { + "epoch": 5.67, + "learning_rate": 1.099493674854608e-06, + "loss": 0.0113, + "step": 121620 + }, + { + "epoch": 5.68, + "learning_rate": 1.0987098897998214e-06, + "loss": 0.0126, + "step": 121625 + }, + { + "epoch": 5.68, + "learning_rate": 1.0979261047450349e-06, + "loss": 0.0358, + "step": 121630 + }, + { + "epoch": 5.68, + "learning_rate": 1.0971423196902481e-06, + "loss": 0.0766, + "step": 121635 + }, + { + "epoch": 5.68, + "learning_rate": 1.0963585346354616e-06, + "loss": 0.1352, + "step": 121640 + }, + { + "epoch": 5.68, + "learning_rate": 1.095574749580675e-06, + "loss": 0.0428, + "step": 121645 + }, + { + "epoch": 5.68, + "learning_rate": 1.0947909645258886e-06, + "loss": 0.1308, + "step": 121650 + }, + { + "epoch": 5.68, + "learning_rate": 1.094007179471102e-06, + "loss": 0.1624, + "step": 121655 + }, + { + "epoch": 5.68, + "learning_rate": 1.0932233944163153e-06, + "loss": 0.1009, + "step": 121660 + }, + { + "epoch": 5.68, + "learning_rate": 1.0924396093615288e-06, + "loss": 0.057, + "step": 121665 + }, + { + "epoch": 5.68, + "learning_rate": 1.0916558243067423e-06, + "loss": 0.0318, + "step": 121670 + }, + { + "epoch": 5.68, + "learning_rate": 1.0908720392519556e-06, + "loss": 0.0646, + "step": 121675 + }, + { + "epoch": 5.68, + "learning_rate": 1.090088254197169e-06, + "loss": 0.0556, + "step": 121680 + }, + { + "epoch": 5.68, + "learning_rate": 1.0893044691423825e-06, + "loss": 0.0374, + "step": 121685 + }, + { + "epoch": 5.68, + "learning_rate": 1.0885206840875958e-06, + "loss": 0.0659, + "step": 121690 + }, + { + "epoch": 5.68, + "learning_rate": 1.0877368990328093e-06, + "loss": 0.1471, + "step": 121695 + }, + { + "epoch": 5.68, + "learning_rate": 1.0869531139780228e-06, + "loss": 0.0971, + "step": 121700 + }, + { + "epoch": 5.68, + "learning_rate": 1.086169328923236e-06, + "loss": 0.4055, + "step": 121705 + }, + { + "epoch": 5.68, + "learning_rate": 1.0853855438684497e-06, + "loss": 0.0939, + "step": 121710 + }, + { + "epoch": 5.68, + "learning_rate": 1.084601758813663e-06, + "loss": 0.0114, + "step": 121715 + }, + { + "epoch": 5.68, + "learning_rate": 1.0838179737588765e-06, + "loss": 0.0734, + "step": 121720 + }, + { + "epoch": 5.68, + "learning_rate": 1.08303418870409e-06, + "loss": 0.0386, + "step": 121725 + }, + { + "epoch": 5.68, + "learning_rate": 1.0822504036493032e-06, + "loss": 0.0255, + "step": 121730 + }, + { + "epoch": 5.68, + "learning_rate": 1.0814666185945167e-06, + "loss": 0.1145, + "step": 121735 + }, + { + "epoch": 5.68, + "learning_rate": 1.0806828335397302e-06, + "loss": 0.0645, + "step": 121740 + }, + { + "epoch": 5.68, + "learning_rate": 1.0798990484849437e-06, + "loss": 0.166, + "step": 121745 + }, + { + "epoch": 5.68, + "learning_rate": 1.079115263430157e-06, + "loss": 0.2137, + "step": 121750 + }, + { + "epoch": 5.68, + "learning_rate": 1.0783314783753704e-06, + "loss": 0.1509, + "step": 121755 + }, + { + "epoch": 5.68, + "learning_rate": 1.077547693320584e-06, + "loss": 0.1264, + "step": 121760 + }, + { + "epoch": 5.68, + "learning_rate": 1.0767639082657972e-06, + "loss": 0.0254, + "step": 121765 + }, + { + "epoch": 5.68, + "learning_rate": 1.0759801232110107e-06, + "loss": 0.0379, + "step": 121770 + }, + { + "epoch": 5.68, + "learning_rate": 1.0751963381562242e-06, + "loss": 0.0586, + "step": 121775 + }, + { + "epoch": 5.68, + "learning_rate": 1.0744125531014374e-06, + "loss": 0.0708, + "step": 121780 + }, + { + "epoch": 5.68, + "learning_rate": 1.0736287680466511e-06, + "loss": 0.0748, + "step": 121785 + }, + { + "epoch": 5.68, + "learning_rate": 1.0728449829918644e-06, + "loss": 0.079, + "step": 121790 + }, + { + "epoch": 5.68, + "learning_rate": 1.0720611979370779e-06, + "loss": 0.0826, + "step": 121795 + }, + { + "epoch": 5.68, + "learning_rate": 1.0712774128822914e-06, + "loss": 0.1292, + "step": 121800 + }, + { + "epoch": 5.68, + "learning_rate": 1.0704936278275046e-06, + "loss": 0.3699, + "step": 121805 + }, + { + "epoch": 5.68, + "learning_rate": 1.0697098427727181e-06, + "loss": 0.1241, + "step": 121810 + }, + { + "epoch": 5.68, + "learning_rate": 1.0689260577179316e-06, + "loss": 0.0236, + "step": 121815 + }, + { + "epoch": 5.68, + "learning_rate": 1.0681422726631449e-06, + "loss": 0.0142, + "step": 121820 + }, + { + "epoch": 5.68, + "learning_rate": 1.0673584876083583e-06, + "loss": 0.0294, + "step": 121825 + }, + { + "epoch": 5.68, + "learning_rate": 1.0665747025535718e-06, + "loss": 0.0568, + "step": 121830 + }, + { + "epoch": 5.68, + "learning_rate": 1.065790917498785e-06, + "loss": 0.0282, + "step": 121835 + }, + { + "epoch": 5.69, + "learning_rate": 1.0650071324439986e-06, + "loss": 0.0969, + "step": 121840 + }, + { + "epoch": 5.69, + "learning_rate": 1.064223347389212e-06, + "loss": 0.0886, + "step": 121845 + }, + { + "epoch": 5.69, + "learning_rate": 1.0634395623344255e-06, + "loss": 0.1252, + "step": 121850 + }, + { + "epoch": 5.69, + "learning_rate": 1.0626557772796388e-06, + "loss": 0.2445, + "step": 121855 + }, + { + "epoch": 5.69, + "learning_rate": 1.0618719922248523e-06, + "loss": 0.0983, + "step": 121860 + }, + { + "epoch": 5.69, + "learning_rate": 1.0610882071700658e-06, + "loss": 0.0054, + "step": 121865 + }, + { + "epoch": 5.69, + "learning_rate": 1.0603044221152793e-06, + "loss": 0.0689, + "step": 121870 + }, + { + "epoch": 5.69, + "learning_rate": 1.0595206370604927e-06, + "loss": 0.0224, + "step": 121875 + }, + { + "epoch": 5.69, + "learning_rate": 1.058736852005706e-06, + "loss": 0.0219, + "step": 121880 + }, + { + "epoch": 5.69, + "learning_rate": 1.0579530669509195e-06, + "loss": 0.0729, + "step": 121885 + }, + { + "epoch": 5.69, + "learning_rate": 1.057169281896133e-06, + "loss": 0.1406, + "step": 121890 + }, + { + "epoch": 5.69, + "learning_rate": 1.0563854968413463e-06, + "loss": 0.1095, + "step": 121895 + }, + { + "epoch": 5.69, + "learning_rate": 1.0556017117865597e-06, + "loss": 0.1385, + "step": 121900 + }, + { + "epoch": 5.69, + "learning_rate": 1.0548179267317732e-06, + "loss": 0.2523, + "step": 121905 + }, + { + "epoch": 5.69, + "learning_rate": 1.0540341416769865e-06, + "loss": 0.0777, + "step": 121910 + }, + { + "epoch": 5.69, + "learning_rate": 1.0532503566222e-06, + "loss": 0.0374, + "step": 121915 + }, + { + "epoch": 5.69, + "learning_rate": 1.0524665715674135e-06, + "loss": 0.0268, + "step": 121920 + }, + { + "epoch": 5.69, + "learning_rate": 1.0516827865126267e-06, + "loss": 0.1292, + "step": 121925 + }, + { + "epoch": 5.69, + "learning_rate": 1.0508990014578404e-06, + "loss": 0.0319, + "step": 121930 + }, + { + "epoch": 5.69, + "learning_rate": 1.0501152164030537e-06, + "loss": 0.0694, + "step": 121935 + }, + { + "epoch": 5.69, + "learning_rate": 1.0493314313482672e-06, + "loss": 0.0249, + "step": 121940 + }, + { + "epoch": 5.69, + "learning_rate": 1.0485476462934807e-06, + "loss": 0.0735, + "step": 121945 + }, + { + "epoch": 5.69, + "learning_rate": 1.047763861238694e-06, + "loss": 0.2104, + "step": 121950 + }, + { + "epoch": 5.69, + "learning_rate": 1.0469800761839074e-06, + "loss": 0.1893, + "step": 121955 + }, + { + "epoch": 5.69, + "learning_rate": 1.0461962911291209e-06, + "loss": 0.0977, + "step": 121960 + }, + { + "epoch": 5.69, + "learning_rate": 1.0454125060743342e-06, + "loss": 0.0175, + "step": 121965 + }, + { + "epoch": 5.69, + "learning_rate": 1.0446287210195476e-06, + "loss": 0.0194, + "step": 121970 + }, + { + "epoch": 5.69, + "learning_rate": 1.0438449359647611e-06, + "loss": 0.0664, + "step": 121975 + }, + { + "epoch": 5.69, + "learning_rate": 1.0430611509099746e-06, + "loss": 0.0463, + "step": 121980 + }, + { + "epoch": 5.69, + "learning_rate": 1.0422773658551879e-06, + "loss": 0.0504, + "step": 121985 + }, + { + "epoch": 5.69, + "learning_rate": 1.0414935808004014e-06, + "loss": 0.1254, + "step": 121990 + }, + { + "epoch": 5.69, + "learning_rate": 1.0407097957456148e-06, + "loss": 0.064, + "step": 121995 + }, + { + "epoch": 5.69, + "learning_rate": 1.0399260106908281e-06, + "loss": 0.1051, + "step": 122000 + }, + { + "epoch": 5.69, + "learning_rate": 1.0391422256360418e-06, + "loss": 0.3171, + "step": 122005 + }, + { + "epoch": 5.69, + "learning_rate": 1.038358440581255e-06, + "loss": 0.1252, + "step": 122010 + }, + { + "epoch": 5.69, + "learning_rate": 1.0375746555264686e-06, + "loss": 0.0269, + "step": 122015 + }, + { + "epoch": 5.69, + "learning_rate": 1.036790870471682e-06, + "loss": 0.029, + "step": 122020 + }, + { + "epoch": 5.69, + "learning_rate": 1.0360070854168953e-06, + "loss": 0.0232, + "step": 122025 + }, + { + "epoch": 5.69, + "learning_rate": 1.0352233003621088e-06, + "loss": 0.0706, + "step": 122030 + }, + { + "epoch": 5.69, + "learning_rate": 1.0344395153073223e-06, + "loss": 0.0516, + "step": 122035 + }, + { + "epoch": 5.69, + "learning_rate": 1.0336557302525355e-06, + "loss": 0.0311, + "step": 122040 + }, + { + "epoch": 5.69, + "learning_rate": 1.032871945197749e-06, + "loss": 0.0797, + "step": 122045 + }, + { + "epoch": 5.7, + "learning_rate": 1.0320881601429625e-06, + "loss": 0.0376, + "step": 122050 + }, + { + "epoch": 5.7, + "learning_rate": 1.0313043750881758e-06, + "loss": 0.2682, + "step": 122055 + }, + { + "epoch": 5.7, + "learning_rate": 1.0305205900333893e-06, + "loss": 0.1489, + "step": 122060 + }, + { + "epoch": 5.7, + "learning_rate": 1.0297368049786027e-06, + "loss": 0.0341, + "step": 122065 + }, + { + "epoch": 5.7, + "learning_rate": 1.0289530199238162e-06, + "loss": 0.016, + "step": 122070 + }, + { + "epoch": 5.7, + "learning_rate": 1.0281692348690297e-06, + "loss": 0.0241, + "step": 122075 + }, + { + "epoch": 5.7, + "learning_rate": 1.027385449814243e-06, + "loss": 0.096, + "step": 122080 + }, + { + "epoch": 5.7, + "learning_rate": 1.0266016647594565e-06, + "loss": 0.0292, + "step": 122085 + }, + { + "epoch": 5.7, + "learning_rate": 1.02581787970467e-06, + "loss": 0.0795, + "step": 122090 + }, + { + "epoch": 5.7, + "learning_rate": 1.0250340946498832e-06, + "loss": 0.1448, + "step": 122095 + }, + { + "epoch": 5.7, + "learning_rate": 1.0242503095950967e-06, + "loss": 0.1945, + "step": 122100 + }, + { + "epoch": 5.7, + "learning_rate": 1.0234665245403102e-06, + "loss": 0.2381, + "step": 122105 + }, + { + "epoch": 5.7, + "learning_rate": 1.0226827394855237e-06, + "loss": 0.0892, + "step": 122110 + }, + { + "epoch": 5.7, + "learning_rate": 1.021898954430737e-06, + "loss": 0.0083, + "step": 122115 + }, + { + "epoch": 5.7, + "learning_rate": 1.0211151693759504e-06, + "loss": 0.0217, + "step": 122120 + }, + { + "epoch": 5.7, + "learning_rate": 1.0203313843211639e-06, + "loss": 0.0218, + "step": 122125 + }, + { + "epoch": 5.7, + "learning_rate": 1.0195475992663772e-06, + "loss": 0.0426, + "step": 122130 + }, + { + "epoch": 5.7, + "learning_rate": 1.0187638142115909e-06, + "loss": 0.0948, + "step": 122135 + }, + { + "epoch": 5.7, + "learning_rate": 1.0179800291568041e-06, + "loss": 0.0627, + "step": 122140 + }, + { + "epoch": 5.7, + "learning_rate": 1.0171962441020174e-06, + "loss": 0.1859, + "step": 122145 + }, + { + "epoch": 5.7, + "learning_rate": 1.016412459047231e-06, + "loss": 0.1398, + "step": 122150 + }, + { + "epoch": 5.7, + "learning_rate": 1.0156286739924444e-06, + "loss": 0.2217, + "step": 122155 + }, + { + "epoch": 5.7, + "learning_rate": 1.0148448889376578e-06, + "loss": 0.1076, + "step": 122160 + }, + { + "epoch": 5.7, + "learning_rate": 1.0140611038828713e-06, + "loss": 0.0352, + "step": 122165 + }, + { + "epoch": 5.7, + "learning_rate": 1.0132773188280846e-06, + "loss": 0.0304, + "step": 122170 + }, + { + "epoch": 5.7, + "learning_rate": 1.012493533773298e-06, + "loss": 0.1042, + "step": 122175 + }, + { + "epoch": 5.7, + "learning_rate": 1.0117097487185116e-06, + "loss": 0.0539, + "step": 122180 + }, + { + "epoch": 5.7, + "learning_rate": 1.0109259636637248e-06, + "loss": 0.1051, + "step": 122185 + }, + { + "epoch": 5.7, + "learning_rate": 1.0101421786089383e-06, + "loss": 0.0662, + "step": 122190 + }, + { + "epoch": 5.7, + "learning_rate": 1.0093583935541518e-06, + "loss": 0.1859, + "step": 122195 + }, + { + "epoch": 5.7, + "learning_rate": 1.0085746084993653e-06, + "loss": 0.1296, + "step": 122200 + }, + { + "epoch": 5.7, + "learning_rate": 1.0077908234445786e-06, + "loss": 0.2456, + "step": 122205 + }, + { + "epoch": 5.7, + "learning_rate": 1.007007038389792e-06, + "loss": 0.1291, + "step": 122210 + }, + { + "epoch": 5.7, + "learning_rate": 1.0062232533350055e-06, + "loss": 0.0626, + "step": 122215 + }, + { + "epoch": 5.7, + "learning_rate": 1.005439468280219e-06, + "loss": 0.0892, + "step": 122220 + }, + { + "epoch": 5.7, + "learning_rate": 1.0046556832254323e-06, + "loss": 0.0785, + "step": 122225 + }, + { + "epoch": 5.7, + "learning_rate": 1.0038718981706458e-06, + "loss": 0.0375, + "step": 122230 + }, + { + "epoch": 5.7, + "learning_rate": 1.0030881131158592e-06, + "loss": 0.0452, + "step": 122235 + }, + { + "epoch": 5.7, + "learning_rate": 1.0023043280610727e-06, + "loss": 0.0938, + "step": 122240 + }, + { + "epoch": 5.7, + "learning_rate": 1.001520543006286e-06, + "loss": 0.0747, + "step": 122245 + }, + { + "epoch": 5.7, + "learning_rate": 1.0007367579514995e-06, + "loss": 0.1274, + "step": 122250 + }, + { + "epoch": 5.7, + "learning_rate": 9.99952972896713e-07, + "loss": 0.4308, + "step": 122255 + }, + { + "epoch": 5.7, + "learning_rate": 9.991691878419262e-07, + "loss": 0.1138, + "step": 122260 + }, + { + "epoch": 5.71, + "learning_rate": 9.983854027871397e-07, + "loss": 0.0062, + "step": 122265 + }, + { + "epoch": 5.71, + "learning_rate": 9.976016177323532e-07, + "loss": 0.0129, + "step": 122270 + }, + { + "epoch": 5.71, + "learning_rate": 9.968178326775665e-07, + "loss": 0.0311, + "step": 122275 + }, + { + "epoch": 5.71, + "learning_rate": 9.960340476227801e-07, + "loss": 0.0622, + "step": 122280 + }, + { + "epoch": 5.71, + "learning_rate": 9.952502625679934e-07, + "loss": 0.0373, + "step": 122285 + }, + { + "epoch": 5.71, + "learning_rate": 9.944664775132067e-07, + "loss": 0.0864, + "step": 122290 + }, + { + "epoch": 5.71, + "learning_rate": 9.936826924584204e-07, + "loss": 0.1067, + "step": 122295 + }, + { + "epoch": 5.71, + "learning_rate": 9.928989074036337e-07, + "loss": 0.1115, + "step": 122300 + }, + { + "epoch": 5.71, + "learning_rate": 9.921151223488471e-07, + "loss": 0.2187, + "step": 122305 + }, + { + "epoch": 5.71, + "learning_rate": 9.913313372940606e-07, + "loss": 0.1283, + "step": 122310 + }, + { + "epoch": 5.71, + "learning_rate": 9.905475522392739e-07, + "loss": 0.0137, + "step": 122315 + }, + { + "epoch": 5.71, + "learning_rate": 9.897637671844874e-07, + "loss": 0.0303, + "step": 122320 + }, + { + "epoch": 5.71, + "learning_rate": 9.889799821297009e-07, + "loss": 0.0138, + "step": 122325 + }, + { + "epoch": 5.71, + "learning_rate": 9.881961970749143e-07, + "loss": 0.0635, + "step": 122330 + }, + { + "epoch": 5.71, + "learning_rate": 9.874124120201276e-07, + "loss": 0.0625, + "step": 122335 + }, + { + "epoch": 5.71, + "learning_rate": 9.86628626965341e-07, + "loss": 0.0603, + "step": 122340 + }, + { + "epoch": 5.71, + "learning_rate": 9.858448419105546e-07, + "loss": 0.1419, + "step": 122345 + }, + { + "epoch": 5.71, + "learning_rate": 9.850610568557678e-07, + "loss": 0.1018, + "step": 122350 + }, + { + "epoch": 5.71, + "learning_rate": 9.842772718009813e-07, + "loss": 0.3135, + "step": 122355 + }, + { + "epoch": 5.71, + "learning_rate": 9.834934867461948e-07, + "loss": 0.1303, + "step": 122360 + }, + { + "epoch": 5.71, + "learning_rate": 9.827097016914083e-07, + "loss": 0.0272, + "step": 122365 + }, + { + "epoch": 5.71, + "learning_rate": 9.819259166366218e-07, + "loss": 0.0581, + "step": 122370 + }, + { + "epoch": 5.71, + "learning_rate": 9.81142131581835e-07, + "loss": 0.0455, + "step": 122375 + }, + { + "epoch": 5.71, + "learning_rate": 9.803583465270485e-07, + "loss": 0.0264, + "step": 122380 + }, + { + "epoch": 5.71, + "learning_rate": 9.79574561472262e-07, + "loss": 0.0762, + "step": 122385 + }, + { + "epoch": 5.71, + "learning_rate": 9.787907764174753e-07, + "loss": 0.0447, + "step": 122390 + }, + { + "epoch": 5.71, + "learning_rate": 9.780069913626888e-07, + "loss": 0.0891, + "step": 122395 + }, + { + "epoch": 5.71, + "learning_rate": 9.772232063079022e-07, + "loss": 0.191, + "step": 122400 + }, + { + "epoch": 5.71, + "learning_rate": 9.764394212531155e-07, + "loss": 0.2204, + "step": 122405 + }, + { + "epoch": 5.71, + "learning_rate": 9.75655636198329e-07, + "loss": 0.0971, + "step": 122410 + }, + { + "epoch": 5.71, + "learning_rate": 9.748718511435425e-07, + "loss": 0.0223, + "step": 122415 + }, + { + "epoch": 5.71, + "learning_rate": 9.740880660887557e-07, + "loss": 0.0322, + "step": 122420 + }, + { + "epoch": 5.71, + "learning_rate": 9.733042810339694e-07, + "loss": 0.0266, + "step": 122425 + }, + { + "epoch": 5.71, + "learning_rate": 9.725204959791827e-07, + "loss": 0.0188, + "step": 122430 + }, + { + "epoch": 5.71, + "learning_rate": 9.717367109243962e-07, + "loss": 0.0506, + "step": 122435 + }, + { + "epoch": 5.71, + "learning_rate": 9.709529258696097e-07, + "loss": 0.0549, + "step": 122440 + }, + { + "epoch": 5.71, + "learning_rate": 9.70169140814823e-07, + "loss": 0.0526, + "step": 122445 + }, + { + "epoch": 5.71, + "learning_rate": 9.693853557600364e-07, + "loss": 0.1659, + "step": 122450 + }, + { + "epoch": 5.71, + "learning_rate": 9.6860157070525e-07, + "loss": 0.3478, + "step": 122455 + }, + { + "epoch": 5.71, + "learning_rate": 9.678177856504634e-07, + "loss": 0.1121, + "step": 122460 + }, + { + "epoch": 5.71, + "learning_rate": 9.670340005956767e-07, + "loss": 0.0361, + "step": 122465 + }, + { + "epoch": 5.71, + "learning_rate": 9.662502155408901e-07, + "loss": 0.0611, + "step": 122470 + }, + { + "epoch": 5.71, + "learning_rate": 9.654664304861036e-07, + "loss": 0.0193, + "step": 122475 + }, + { + "epoch": 5.72, + "learning_rate": 9.64682645431317e-07, + "loss": 0.0431, + "step": 122480 + }, + { + "epoch": 5.72, + "learning_rate": 9.638988603765304e-07, + "loss": 0.0692, + "step": 122485 + }, + { + "epoch": 5.72, + "learning_rate": 9.631150753217439e-07, + "loss": 0.0312, + "step": 122490 + }, + { + "epoch": 5.72, + "learning_rate": 9.623312902669571e-07, + "loss": 0.0769, + "step": 122495 + }, + { + "epoch": 5.72, + "learning_rate": 9.615475052121708e-07, + "loss": 0.1026, + "step": 122500 + }, + { + "epoch": 5.72, + "learning_rate": 9.60763720157384e-07, + "loss": 0.3082, + "step": 122505 + }, + { + "epoch": 5.72, + "learning_rate": 9.599799351025976e-07, + "loss": 0.1518, + "step": 122510 + }, + { + "epoch": 5.72, + "learning_rate": 9.59196150047811e-07, + "loss": 0.0305, + "step": 122515 + }, + { + "epoch": 5.72, + "learning_rate": 9.584123649930243e-07, + "loss": 0.051, + "step": 122520 + }, + { + "epoch": 5.72, + "learning_rate": 9.576285799382378e-07, + "loss": 0.0373, + "step": 122525 + }, + { + "epoch": 5.72, + "learning_rate": 9.568447948834513e-07, + "loss": 0.0636, + "step": 122530 + }, + { + "epoch": 5.72, + "learning_rate": 9.560610098286646e-07, + "loss": 0.0838, + "step": 122535 + }, + { + "epoch": 5.72, + "learning_rate": 9.55277224773878e-07, + "loss": 0.0359, + "step": 122540 + }, + { + "epoch": 5.72, + "learning_rate": 9.544934397190915e-07, + "loss": 0.1131, + "step": 122545 + }, + { + "epoch": 5.72, + "learning_rate": 9.537096546643048e-07, + "loss": 0.1072, + "step": 122550 + }, + { + "epoch": 5.72, + "learning_rate": 9.529258696095184e-07, + "loss": 0.3184, + "step": 122555 + }, + { + "epoch": 5.72, + "learning_rate": 9.521420845547318e-07, + "loss": 0.0824, + "step": 122560 + }, + { + "epoch": 5.72, + "learning_rate": 9.513582994999452e-07, + "loss": 0.0506, + "step": 122565 + }, + { + "epoch": 5.72, + "learning_rate": 9.505745144451586e-07, + "loss": 0.0226, + "step": 122570 + }, + { + "epoch": 5.72, + "learning_rate": 9.49790729390372e-07, + "loss": 0.0116, + "step": 122575 + }, + { + "epoch": 5.72, + "learning_rate": 9.490069443355855e-07, + "loss": 0.0535, + "step": 122580 + }, + { + "epoch": 5.72, + "learning_rate": 9.482231592807989e-07, + "loss": 0.0864, + "step": 122585 + }, + { + "epoch": 5.72, + "learning_rate": 9.474393742260124e-07, + "loss": 0.0463, + "step": 122590 + }, + { + "epoch": 5.72, + "learning_rate": 9.466555891712257e-07, + "loss": 0.1092, + "step": 122595 + }, + { + "epoch": 5.72, + "learning_rate": 9.458718041164391e-07, + "loss": 0.1153, + "step": 122600 + }, + { + "epoch": 5.72, + "learning_rate": 9.450880190616527e-07, + "loss": 0.2128, + "step": 122605 + }, + { + "epoch": 5.72, + "learning_rate": 9.443042340068661e-07, + "loss": 0.086, + "step": 122610 + }, + { + "epoch": 5.72, + "learning_rate": 9.435204489520794e-07, + "loss": 0.0281, + "step": 122615 + }, + { + "epoch": 5.72, + "learning_rate": 9.427366638972929e-07, + "loss": 0.0532, + "step": 122620 + }, + { + "epoch": 5.72, + "learning_rate": 9.419528788425063e-07, + "loss": 0.0319, + "step": 122625 + }, + { + "epoch": 5.72, + "learning_rate": 9.411690937877198e-07, + "loss": 0.0616, + "step": 122630 + }, + { + "epoch": 5.72, + "learning_rate": 9.403853087329332e-07, + "loss": 0.0482, + "step": 122635 + }, + { + "epoch": 5.72, + "learning_rate": 9.396015236781465e-07, + "loss": 0.0664, + "step": 122640 + }, + { + "epoch": 5.72, + "learning_rate": 9.3881773862336e-07, + "loss": 0.0684, + "step": 122645 + }, + { + "epoch": 5.72, + "learning_rate": 9.380339535685734e-07, + "loss": 0.1922, + "step": 122650 + }, + { + "epoch": 5.72, + "learning_rate": 9.372501685137869e-07, + "loss": 0.2819, + "step": 122655 + }, + { + "epoch": 5.72, + "learning_rate": 9.364663834590002e-07, + "loss": 0.0817, + "step": 122660 + }, + { + "epoch": 5.72, + "learning_rate": 9.356825984042136e-07, + "loss": 0.0482, + "step": 122665 + }, + { + "epoch": 5.72, + "learning_rate": 9.348988133494272e-07, + "loss": 0.095, + "step": 122670 + }, + { + "epoch": 5.72, + "learning_rate": 9.341150282946406e-07, + "loss": 0.0318, + "step": 122675 + }, + { + "epoch": 5.72, + "learning_rate": 9.33331243239854e-07, + "loss": 0.0356, + "step": 122680 + }, + { + "epoch": 5.72, + "learning_rate": 9.325474581850674e-07, + "loss": 0.09, + "step": 122685 + }, + { + "epoch": 5.72, + "learning_rate": 9.317636731302808e-07, + "loss": 0.0315, + "step": 122690 + }, + { + "epoch": 5.73, + "learning_rate": 9.309798880754943e-07, + "loss": 0.099, + "step": 122695 + }, + { + "epoch": 5.73, + "learning_rate": 9.301961030207077e-07, + "loss": 0.0515, + "step": 122700 + }, + { + "epoch": 5.73, + "learning_rate": 9.294123179659211e-07, + "loss": 0.1471, + "step": 122705 + }, + { + "epoch": 5.73, + "learning_rate": 9.286285329111345e-07, + "loss": 0.1335, + "step": 122710 + }, + { + "epoch": 5.73, + "learning_rate": 9.278447478563479e-07, + "loss": 0.0433, + "step": 122715 + }, + { + "epoch": 5.73, + "learning_rate": 9.270609628015614e-07, + "loss": 0.0072, + "step": 122720 + }, + { + "epoch": 5.73, + "learning_rate": 9.262771777467748e-07, + "loss": 0.0335, + "step": 122725 + }, + { + "epoch": 5.73, + "learning_rate": 9.254933926919882e-07, + "loss": 0.1065, + "step": 122730 + }, + { + "epoch": 5.73, + "learning_rate": 9.247096076372017e-07, + "loss": 0.0447, + "step": 122735 + }, + { + "epoch": 5.73, + "learning_rate": 9.23925822582415e-07, + "loss": 0.0981, + "step": 122740 + }, + { + "epoch": 5.73, + "learning_rate": 9.231420375276284e-07, + "loss": 0.1174, + "step": 122745 + }, + { + "epoch": 5.73, + "learning_rate": 9.22358252472842e-07, + "loss": 0.1437, + "step": 122750 + }, + { + "epoch": 5.73, + "learning_rate": 9.215744674180554e-07, + "loss": 0.3263, + "step": 122755 + }, + { + "epoch": 5.73, + "learning_rate": 9.207906823632688e-07, + "loss": 0.0906, + "step": 122760 + }, + { + "epoch": 5.73, + "learning_rate": 9.200068973084822e-07, + "loss": 0.0186, + "step": 122765 + }, + { + "epoch": 5.73, + "learning_rate": 9.192231122536956e-07, + "loss": 0.0564, + "step": 122770 + }, + { + "epoch": 5.73, + "learning_rate": 9.184393271989091e-07, + "loss": 0.0547, + "step": 122775 + }, + { + "epoch": 5.73, + "learning_rate": 9.176555421441224e-07, + "loss": 0.0294, + "step": 122780 + }, + { + "epoch": 5.73, + "learning_rate": 9.168717570893359e-07, + "loss": 0.0273, + "step": 122785 + }, + { + "epoch": 5.73, + "learning_rate": 9.160879720345493e-07, + "loss": 0.0662, + "step": 122790 + }, + { + "epoch": 5.73, + "learning_rate": 9.153041869797627e-07, + "loss": 0.1098, + "step": 122795 + }, + { + "epoch": 5.73, + "learning_rate": 9.145204019249762e-07, + "loss": 0.1221, + "step": 122800 + }, + { + "epoch": 5.73, + "learning_rate": 9.137366168701895e-07, + "loss": 0.2861, + "step": 122805 + }, + { + "epoch": 5.73, + "learning_rate": 9.129528318154029e-07, + "loss": 0.1048, + "step": 122810 + }, + { + "epoch": 5.73, + "learning_rate": 9.121690467606165e-07, + "loss": 0.0081, + "step": 122815 + }, + { + "epoch": 5.73, + "learning_rate": 9.113852617058299e-07, + "loss": 0.0302, + "step": 122820 + }, + { + "epoch": 5.73, + "learning_rate": 9.106014766510434e-07, + "loss": 0.019, + "step": 122825 + }, + { + "epoch": 5.73, + "learning_rate": 9.098176915962567e-07, + "loss": 0.0501, + "step": 122830 + }, + { + "epoch": 5.73, + "learning_rate": 9.090339065414701e-07, + "loss": 0.0166, + "step": 122835 + }, + { + "epoch": 5.73, + "learning_rate": 9.082501214866836e-07, + "loss": 0.0748, + "step": 122840 + }, + { + "epoch": 5.73, + "learning_rate": 9.07466336431897e-07, + "loss": 0.0294, + "step": 122845 + }, + { + "epoch": 5.73, + "learning_rate": 9.066825513771105e-07, + "loss": 0.1187, + "step": 122850 + }, + { + "epoch": 5.73, + "learning_rate": 9.058987663223238e-07, + "loss": 0.2233, + "step": 122855 + }, + { + "epoch": 5.73, + "learning_rate": 9.051149812675372e-07, + "loss": 0.1032, + "step": 122860 + }, + { + "epoch": 5.73, + "learning_rate": 9.043311962127507e-07, + "loss": 0.031, + "step": 122865 + }, + { + "epoch": 5.73, + "learning_rate": 9.035474111579641e-07, + "loss": 0.0393, + "step": 122870 + }, + { + "epoch": 5.73, + "learning_rate": 9.027636261031774e-07, + "loss": 0.0644, + "step": 122875 + }, + { + "epoch": 5.73, + "learning_rate": 9.01979841048391e-07, + "loss": 0.0783, + "step": 122880 + }, + { + "epoch": 5.73, + "learning_rate": 9.011960559936043e-07, + "loss": 0.0759, + "step": 122885 + }, + { + "epoch": 5.73, + "learning_rate": 9.004122709388179e-07, + "loss": 0.0996, + "step": 122890 + }, + { + "epoch": 5.73, + "learning_rate": 8.996284858840313e-07, + "loss": 0.0627, + "step": 122895 + }, + { + "epoch": 5.73, + "learning_rate": 8.988447008292446e-07, + "loss": 0.0756, + "step": 122900 + }, + { + "epoch": 5.73, + "learning_rate": 8.980609157744581e-07, + "loss": 0.1225, + "step": 122905 + }, + { + "epoch": 5.74, + "learning_rate": 8.972771307196715e-07, + "loss": 0.1195, + "step": 122910 + }, + { + "epoch": 5.74, + "learning_rate": 8.96493345664885e-07, + "loss": 0.0108, + "step": 122915 + }, + { + "epoch": 5.74, + "learning_rate": 8.957095606100984e-07, + "loss": 0.0357, + "step": 122920 + }, + { + "epoch": 5.74, + "learning_rate": 8.949257755553117e-07, + "loss": 0.0216, + "step": 122925 + }, + { + "epoch": 5.74, + "learning_rate": 8.941419905005252e-07, + "loss": 0.0223, + "step": 122930 + }, + { + "epoch": 5.74, + "learning_rate": 8.933582054457386e-07, + "loss": 0.0261, + "step": 122935 + }, + { + "epoch": 5.74, + "learning_rate": 8.92574420390952e-07, + "loss": 0.0575, + "step": 122940 + }, + { + "epoch": 5.74, + "learning_rate": 8.917906353361655e-07, + "loss": 0.066, + "step": 122945 + }, + { + "epoch": 5.74, + "learning_rate": 8.910068502813788e-07, + "loss": 0.0518, + "step": 122950 + }, + { + "epoch": 5.74, + "learning_rate": 8.902230652265924e-07, + "loss": 0.2972, + "step": 122955 + }, + { + "epoch": 5.74, + "learning_rate": 8.894392801718058e-07, + "loss": 0.1078, + "step": 122960 + }, + { + "epoch": 5.74, + "learning_rate": 8.886554951170192e-07, + "loss": 0.0131, + "step": 122965 + }, + { + "epoch": 5.74, + "learning_rate": 8.878717100622327e-07, + "loss": 0.0323, + "step": 122970 + }, + { + "epoch": 5.74, + "learning_rate": 8.87087925007446e-07, + "loss": 0.0152, + "step": 122975 + }, + { + "epoch": 5.74, + "learning_rate": 8.863041399526595e-07, + "loss": 0.0689, + "step": 122980 + }, + { + "epoch": 5.74, + "learning_rate": 8.855203548978729e-07, + "loss": 0.0789, + "step": 122985 + }, + { + "epoch": 5.74, + "learning_rate": 8.847365698430863e-07, + "loss": 0.1257, + "step": 122990 + }, + { + "epoch": 5.74, + "learning_rate": 8.839527847882997e-07, + "loss": 0.0598, + "step": 122995 + }, + { + "epoch": 5.74, + "learning_rate": 8.831689997335131e-07, + "loss": 0.192, + "step": 123000 + }, + { + "epoch": 5.74, + "learning_rate": 8.823852146787265e-07, + "loss": 0.1543, + "step": 123005 + }, + { + "epoch": 5.74, + "learning_rate": 8.8160142962394e-07, + "loss": 0.0894, + "step": 123010 + }, + { + "epoch": 5.74, + "learning_rate": 8.808176445691534e-07, + "loss": 0.0661, + "step": 123015 + }, + { + "epoch": 5.74, + "learning_rate": 8.800338595143669e-07, + "loss": 0.0229, + "step": 123020 + }, + { + "epoch": 5.74, + "learning_rate": 8.792500744595803e-07, + "loss": 0.0451, + "step": 123025 + }, + { + "epoch": 5.74, + "learning_rate": 8.784662894047936e-07, + "loss": 0.04, + "step": 123030 + }, + { + "epoch": 5.74, + "learning_rate": 8.776825043500072e-07, + "loss": 0.0539, + "step": 123035 + }, + { + "epoch": 5.74, + "learning_rate": 8.768987192952206e-07, + "loss": 0.0427, + "step": 123040 + }, + { + "epoch": 5.74, + "learning_rate": 8.76114934240434e-07, + "loss": 0.0852, + "step": 123045 + }, + { + "epoch": 5.74, + "learning_rate": 8.753311491856474e-07, + "loss": 0.1067, + "step": 123050 + }, + { + "epoch": 5.74, + "learning_rate": 8.745473641308608e-07, + "loss": 0.0869, + "step": 123055 + }, + { + "epoch": 5.74, + "learning_rate": 8.737635790760743e-07, + "loss": 0.0773, + "step": 123060 + }, + { + "epoch": 5.74, + "learning_rate": 8.729797940212877e-07, + "loss": 0.0168, + "step": 123065 + }, + { + "epoch": 5.74, + "learning_rate": 8.72196008966501e-07, + "loss": 0.0329, + "step": 123070 + }, + { + "epoch": 5.74, + "learning_rate": 8.714122239117145e-07, + "loss": 0.0443, + "step": 123075 + }, + { + "epoch": 5.74, + "learning_rate": 8.706284388569279e-07, + "loss": 0.0428, + "step": 123080 + }, + { + "epoch": 5.74, + "learning_rate": 8.698446538021414e-07, + "loss": 0.0678, + "step": 123085 + }, + { + "epoch": 5.74, + "learning_rate": 8.690608687473547e-07, + "loss": 0.0552, + "step": 123090 + }, + { + "epoch": 5.74, + "learning_rate": 8.682770836925681e-07, + "loss": 0.108, + "step": 123095 + }, + { + "epoch": 5.74, + "learning_rate": 8.674932986377817e-07, + "loss": 0.1061, + "step": 123100 + }, + { + "epoch": 5.74, + "learning_rate": 8.667095135829951e-07, + "loss": 0.2139, + "step": 123105 + }, + { + "epoch": 5.74, + "learning_rate": 8.659257285282086e-07, + "loss": 0.1057, + "step": 123110 + }, + { + "epoch": 5.74, + "learning_rate": 8.651419434734219e-07, + "loss": 0.0211, + "step": 123115 + }, + { + "epoch": 5.74, + "learning_rate": 8.643581584186353e-07, + "loss": 0.0262, + "step": 123120 + }, + { + "epoch": 5.75, + "learning_rate": 8.635743733638488e-07, + "loss": 0.0871, + "step": 123125 + }, + { + "epoch": 5.75, + "learning_rate": 8.627905883090622e-07, + "loss": 0.0444, + "step": 123130 + }, + { + "epoch": 5.75, + "learning_rate": 8.620068032542757e-07, + "loss": 0.072, + "step": 123135 + }, + { + "epoch": 5.75, + "learning_rate": 8.61223018199489e-07, + "loss": 0.0715, + "step": 123140 + }, + { + "epoch": 5.75, + "learning_rate": 8.604392331447024e-07, + "loss": 0.1519, + "step": 123145 + }, + { + "epoch": 5.75, + "learning_rate": 8.596554480899159e-07, + "loss": 0.2025, + "step": 123150 + }, + { + "epoch": 5.75, + "learning_rate": 8.588716630351293e-07, + "loss": 0.338, + "step": 123155 + }, + { + "epoch": 5.75, + "learning_rate": 8.580878779803426e-07, + "loss": 0.0958, + "step": 123160 + }, + { + "epoch": 5.75, + "learning_rate": 8.573040929255562e-07, + "loss": 0.0195, + "step": 123165 + }, + { + "epoch": 5.75, + "learning_rate": 8.565203078707696e-07, + "loss": 0.0393, + "step": 123170 + }, + { + "epoch": 5.75, + "learning_rate": 8.557365228159831e-07, + "loss": 0.0347, + "step": 123175 + }, + { + "epoch": 5.75, + "learning_rate": 8.549527377611965e-07, + "loss": 0.0662, + "step": 123180 + }, + { + "epoch": 5.75, + "learning_rate": 8.541689527064098e-07, + "loss": 0.0703, + "step": 123185 + }, + { + "epoch": 5.75, + "learning_rate": 8.533851676516233e-07, + "loss": 0.0471, + "step": 123190 + }, + { + "epoch": 5.75, + "learning_rate": 8.526013825968367e-07, + "loss": 0.0517, + "step": 123195 + }, + { + "epoch": 5.75, + "learning_rate": 8.518175975420502e-07, + "loss": 0.2205, + "step": 123200 + }, + { + "epoch": 5.75, + "learning_rate": 8.510338124872636e-07, + "loss": 0.1899, + "step": 123205 + }, + { + "epoch": 5.75, + "learning_rate": 8.502500274324769e-07, + "loss": 0.0847, + "step": 123210 + }, + { + "epoch": 5.75, + "learning_rate": 8.494662423776904e-07, + "loss": 0.0073, + "step": 123215 + }, + { + "epoch": 5.75, + "learning_rate": 8.486824573229038e-07, + "loss": 0.0139, + "step": 123220 + }, + { + "epoch": 5.75, + "learning_rate": 8.478986722681172e-07, + "loss": 0.0306, + "step": 123225 + }, + { + "epoch": 5.75, + "learning_rate": 8.471148872133307e-07, + "loss": 0.0285, + "step": 123230 + }, + { + "epoch": 5.75, + "learning_rate": 8.46331102158544e-07, + "loss": 0.0272, + "step": 123235 + }, + { + "epoch": 5.75, + "learning_rate": 8.455473171037576e-07, + "loss": 0.0601, + "step": 123240 + }, + { + "epoch": 5.75, + "learning_rate": 8.44763532048971e-07, + "loss": 0.1205, + "step": 123245 + }, + { + "epoch": 5.75, + "learning_rate": 8.439797469941844e-07, + "loss": 0.1304, + "step": 123250 + }, + { + "epoch": 5.75, + "learning_rate": 8.431959619393979e-07, + "loss": 0.3329, + "step": 123255 + }, + { + "epoch": 5.75, + "learning_rate": 8.424121768846112e-07, + "loss": 0.0734, + "step": 123260 + }, + { + "epoch": 5.75, + "learning_rate": 8.416283918298247e-07, + "loss": 0.0089, + "step": 123265 + }, + { + "epoch": 5.75, + "learning_rate": 8.408446067750381e-07, + "loss": 0.0497, + "step": 123270 + }, + { + "epoch": 5.75, + "learning_rate": 8.400608217202515e-07, + "loss": 0.0184, + "step": 123275 + }, + { + "epoch": 5.75, + "learning_rate": 8.39277036665465e-07, + "loss": 0.0737, + "step": 123280 + }, + { + "epoch": 5.75, + "learning_rate": 8.384932516106783e-07, + "loss": 0.0881, + "step": 123285 + }, + { + "epoch": 5.75, + "learning_rate": 8.377094665558917e-07, + "loss": 0.1049, + "step": 123290 + }, + { + "epoch": 5.75, + "learning_rate": 8.369256815011052e-07, + "loss": 0.0925, + "step": 123295 + }, + { + "epoch": 5.75, + "learning_rate": 8.361418964463186e-07, + "loss": 0.1627, + "step": 123300 + }, + { + "epoch": 5.75, + "learning_rate": 8.353581113915322e-07, + "loss": 0.3616, + "step": 123305 + }, + { + "epoch": 5.75, + "learning_rate": 8.345743263367455e-07, + "loss": 0.1208, + "step": 123310 + }, + { + "epoch": 5.75, + "learning_rate": 8.337905412819589e-07, + "loss": 0.0099, + "step": 123315 + }, + { + "epoch": 5.75, + "learning_rate": 8.330067562271724e-07, + "loss": 0.0175, + "step": 123320 + }, + { + "epoch": 5.75, + "learning_rate": 8.322229711723858e-07, + "loss": 0.0638, + "step": 123325 + }, + { + "epoch": 5.75, + "learning_rate": 8.314391861175992e-07, + "loss": 0.0435, + "step": 123330 + }, + { + "epoch": 5.75, + "learning_rate": 8.306554010628126e-07, + "loss": 0.0526, + "step": 123335 + }, + { + "epoch": 5.76, + "learning_rate": 8.29871616008026e-07, + "loss": 0.059, + "step": 123340 + }, + { + "epoch": 5.76, + "learning_rate": 8.290878309532395e-07, + "loss": 0.1033, + "step": 123345 + }, + { + "epoch": 5.76, + "learning_rate": 8.283040458984529e-07, + "loss": 0.0492, + "step": 123350 + }, + { + "epoch": 5.76, + "learning_rate": 8.275202608436662e-07, + "loss": 0.1298, + "step": 123355 + }, + { + "epoch": 5.76, + "learning_rate": 8.267364757888797e-07, + "loss": 0.0818, + "step": 123360 + }, + { + "epoch": 5.76, + "learning_rate": 8.259526907340931e-07, + "loss": 0.0153, + "step": 123365 + }, + { + "epoch": 5.76, + "learning_rate": 8.251689056793067e-07, + "loss": 0.0285, + "step": 123370 + }, + { + "epoch": 5.76, + "learning_rate": 8.2438512062452e-07, + "loss": 0.0239, + "step": 123375 + }, + { + "epoch": 5.76, + "learning_rate": 8.236013355697333e-07, + "loss": 0.1306, + "step": 123380 + }, + { + "epoch": 5.76, + "learning_rate": 8.228175505149469e-07, + "loss": 0.0335, + "step": 123385 + }, + { + "epoch": 5.76, + "learning_rate": 8.220337654601603e-07, + "loss": 0.0613, + "step": 123390 + }, + { + "epoch": 5.76, + "learning_rate": 8.212499804053738e-07, + "loss": 0.0531, + "step": 123395 + }, + { + "epoch": 5.76, + "learning_rate": 8.204661953505872e-07, + "loss": 0.2323, + "step": 123400 + }, + { + "epoch": 5.76, + "learning_rate": 8.196824102958005e-07, + "loss": 0.3191, + "step": 123405 + }, + { + "epoch": 5.76, + "learning_rate": 8.18898625241014e-07, + "loss": 0.13, + "step": 123410 + }, + { + "epoch": 5.76, + "learning_rate": 8.181148401862274e-07, + "loss": 0.0238, + "step": 123415 + }, + { + "epoch": 5.76, + "learning_rate": 8.173310551314408e-07, + "loss": 0.0412, + "step": 123420 + }, + { + "epoch": 5.76, + "learning_rate": 8.165472700766542e-07, + "loss": 0.0354, + "step": 123425 + }, + { + "epoch": 5.76, + "learning_rate": 8.157634850218676e-07, + "loss": 0.062, + "step": 123430 + }, + { + "epoch": 5.76, + "learning_rate": 8.149796999670811e-07, + "loss": 0.0495, + "step": 123435 + }, + { + "epoch": 5.76, + "learning_rate": 8.141959149122945e-07, + "loss": 0.0884, + "step": 123440 + }, + { + "epoch": 5.76, + "learning_rate": 8.134121298575079e-07, + "loss": 0.067, + "step": 123445 + }, + { + "epoch": 5.76, + "learning_rate": 8.126283448027214e-07, + "loss": 0.0691, + "step": 123450 + }, + { + "epoch": 5.76, + "learning_rate": 8.118445597479348e-07, + "loss": 0.2667, + "step": 123455 + }, + { + "epoch": 5.76, + "learning_rate": 8.110607746931483e-07, + "loss": 0.1347, + "step": 123460 + }, + { + "epoch": 5.76, + "learning_rate": 8.102769896383617e-07, + "loss": 0.02, + "step": 123465 + }, + { + "epoch": 5.76, + "learning_rate": 8.094932045835751e-07, + "loss": 0.0603, + "step": 123470 + }, + { + "epoch": 5.76, + "learning_rate": 8.087094195287885e-07, + "loss": 0.0343, + "step": 123475 + }, + { + "epoch": 5.76, + "learning_rate": 8.079256344740019e-07, + "loss": 0.0347, + "step": 123480 + }, + { + "epoch": 5.76, + "learning_rate": 8.071418494192153e-07, + "loss": 0.0908, + "step": 123485 + }, + { + "epoch": 5.76, + "learning_rate": 8.063580643644288e-07, + "loss": 0.1062, + "step": 123490 + }, + { + "epoch": 5.76, + "learning_rate": 8.055742793096421e-07, + "loss": 0.0733, + "step": 123495 + }, + { + "epoch": 5.76, + "learning_rate": 8.047904942548556e-07, + "loss": 0.1631, + "step": 123500 + }, + { + "epoch": 5.76, + "learning_rate": 8.04006709200069e-07, + "loss": 0.4118, + "step": 123505 + }, + { + "epoch": 5.76, + "learning_rate": 8.032229241452824e-07, + "loss": 0.1021, + "step": 123510 + }, + { + "epoch": 5.76, + "learning_rate": 8.02439139090496e-07, + "loss": 0.0313, + "step": 123515 + }, + { + "epoch": 5.76, + "learning_rate": 8.016553540357092e-07, + "loss": 0.0027, + "step": 123520 + }, + { + "epoch": 5.76, + "learning_rate": 8.008715689809228e-07, + "loss": 0.0291, + "step": 123525 + }, + { + "epoch": 5.76, + "learning_rate": 8.000877839261362e-07, + "loss": 0.0211, + "step": 123530 + }, + { + "epoch": 5.76, + "learning_rate": 7.993039988713496e-07, + "loss": 0.0671, + "step": 123535 + }, + { + "epoch": 5.76, + "learning_rate": 7.985202138165631e-07, + "loss": 0.0999, + "step": 123540 + }, + { + "epoch": 5.76, + "learning_rate": 7.977364287617764e-07, + "loss": 0.078, + "step": 123545 + }, + { + "epoch": 5.77, + "learning_rate": 7.969526437069898e-07, + "loss": 0.1253, + "step": 123550 + }, + { + "epoch": 5.77, + "learning_rate": 7.961688586522033e-07, + "loss": 0.1727, + "step": 123555 + }, + { + "epoch": 5.77, + "learning_rate": 7.953850735974167e-07, + "loss": 0.071, + "step": 123560 + }, + { + "epoch": 5.77, + "learning_rate": 7.946012885426302e-07, + "loss": 0.069, + "step": 123565 + }, + { + "epoch": 5.77, + "learning_rate": 7.938175034878435e-07, + "loss": 0.0152, + "step": 123570 + }, + { + "epoch": 5.77, + "learning_rate": 7.930337184330569e-07, + "loss": 0.0877, + "step": 123575 + }, + { + "epoch": 5.77, + "learning_rate": 7.922499333782704e-07, + "loss": 0.0413, + "step": 123580 + }, + { + "epoch": 5.77, + "learning_rate": 7.914661483234838e-07, + "loss": 0.1282, + "step": 123585 + }, + { + "epoch": 5.77, + "learning_rate": 7.906823632686974e-07, + "loss": 0.0444, + "step": 123590 + }, + { + "epoch": 5.77, + "learning_rate": 7.898985782139107e-07, + "loss": 0.0657, + "step": 123595 + }, + { + "epoch": 5.77, + "learning_rate": 7.891147931591241e-07, + "loss": 0.1234, + "step": 123600 + }, + { + "epoch": 5.77, + "learning_rate": 7.883310081043376e-07, + "loss": 0.2533, + "step": 123605 + }, + { + "epoch": 5.77, + "learning_rate": 7.87547223049551e-07, + "loss": 0.0928, + "step": 123610 + }, + { + "epoch": 5.77, + "learning_rate": 7.867634379947643e-07, + "loss": 0.0137, + "step": 123615 + }, + { + "epoch": 5.77, + "learning_rate": 7.859796529399778e-07, + "loss": 0.0437, + "step": 123620 + }, + { + "epoch": 5.77, + "learning_rate": 7.851958678851912e-07, + "loss": 0.0812, + "step": 123625 + }, + { + "epoch": 5.77, + "learning_rate": 7.844120828304047e-07, + "loss": 0.0439, + "step": 123630 + }, + { + "epoch": 5.77, + "learning_rate": 7.836282977756181e-07, + "loss": 0.0566, + "step": 123635 + }, + { + "epoch": 5.77, + "learning_rate": 7.828445127208314e-07, + "loss": 0.0862, + "step": 123640 + }, + { + "epoch": 5.77, + "learning_rate": 7.820607276660449e-07, + "loss": 0.0584, + "step": 123645 + }, + { + "epoch": 5.77, + "learning_rate": 7.812769426112583e-07, + "loss": 0.1447, + "step": 123650 + }, + { + "epoch": 5.77, + "learning_rate": 7.804931575564719e-07, + "loss": 0.388, + "step": 123655 + }, + { + "epoch": 5.77, + "learning_rate": 7.797093725016853e-07, + "loss": 0.0964, + "step": 123660 + }, + { + "epoch": 5.77, + "learning_rate": 7.789255874468985e-07, + "loss": 0.0375, + "step": 123665 + }, + { + "epoch": 5.77, + "learning_rate": 7.781418023921121e-07, + "loss": 0.0898, + "step": 123670 + }, + { + "epoch": 5.77, + "learning_rate": 7.773580173373255e-07, + "loss": 0.0637, + "step": 123675 + }, + { + "epoch": 5.77, + "learning_rate": 7.765742322825389e-07, + "loss": 0.053, + "step": 123680 + }, + { + "epoch": 5.77, + "learning_rate": 7.757904472277524e-07, + "loss": 0.0759, + "step": 123685 + }, + { + "epoch": 5.77, + "learning_rate": 7.750066621729657e-07, + "loss": 0.1069, + "step": 123690 + }, + { + "epoch": 5.77, + "learning_rate": 7.742228771181792e-07, + "loss": 0.0881, + "step": 123695 + }, + { + "epoch": 5.77, + "learning_rate": 7.734390920633926e-07, + "loss": 0.0783, + "step": 123700 + }, + { + "epoch": 5.77, + "learning_rate": 7.72655307008606e-07, + "loss": 0.364, + "step": 123705 + }, + { + "epoch": 5.77, + "learning_rate": 7.718715219538195e-07, + "loss": 0.0901, + "step": 123710 + }, + { + "epoch": 5.77, + "learning_rate": 7.710877368990328e-07, + "loss": 0.0279, + "step": 123715 + }, + { + "epoch": 5.77, + "learning_rate": 7.703039518442463e-07, + "loss": 0.0411, + "step": 123720 + }, + { + "epoch": 5.77, + "learning_rate": 7.695201667894597e-07, + "loss": 0.0812, + "step": 123725 + }, + { + "epoch": 5.77, + "learning_rate": 7.687363817346731e-07, + "loss": 0.1118, + "step": 123730 + }, + { + "epoch": 5.77, + "learning_rate": 7.679525966798867e-07, + "loss": 0.1866, + "step": 123735 + }, + { + "epoch": 5.77, + "learning_rate": 7.671688116251e-07, + "loss": 0.048, + "step": 123740 + }, + { + "epoch": 5.77, + "learning_rate": 7.663850265703134e-07, + "loss": 0.154, + "step": 123745 + }, + { + "epoch": 5.77, + "learning_rate": 7.656012415155269e-07, + "loss": 0.1242, + "step": 123750 + }, + { + "epoch": 5.77, + "learning_rate": 7.648174564607403e-07, + "loss": 0.3092, + "step": 123755 + }, + { + "epoch": 5.77, + "learning_rate": 7.640336714059537e-07, + "loss": 0.1386, + "step": 123760 + }, + { + "epoch": 5.78, + "learning_rate": 7.632498863511671e-07, + "loss": 0.0339, + "step": 123765 + }, + { + "epoch": 5.78, + "learning_rate": 7.624661012963805e-07, + "loss": 0.0626, + "step": 123770 + }, + { + "epoch": 5.78, + "learning_rate": 7.61682316241594e-07, + "loss": 0.0325, + "step": 123775 + }, + { + "epoch": 5.78, + "learning_rate": 7.608985311868074e-07, + "loss": 0.0592, + "step": 123780 + }, + { + "epoch": 5.78, + "learning_rate": 7.601147461320208e-07, + "loss": 0.0542, + "step": 123785 + }, + { + "epoch": 5.78, + "learning_rate": 7.593309610772342e-07, + "loss": 0.0315, + "step": 123790 + }, + { + "epoch": 5.78, + "learning_rate": 7.585471760224476e-07, + "loss": 0.0905, + "step": 123795 + }, + { + "epoch": 5.78, + "learning_rate": 7.577633909676612e-07, + "loss": 0.1081, + "step": 123800 + }, + { + "epoch": 5.78, + "learning_rate": 7.569796059128746e-07, + "loss": 0.2308, + "step": 123805 + }, + { + "epoch": 5.78, + "learning_rate": 7.561958208580878e-07, + "loss": 0.0953, + "step": 123810 + }, + { + "epoch": 5.78, + "learning_rate": 7.554120358033014e-07, + "loss": 0.0203, + "step": 123815 + }, + { + "epoch": 5.78, + "learning_rate": 7.546282507485148e-07, + "loss": 0.0782, + "step": 123820 + }, + { + "epoch": 5.78, + "learning_rate": 7.538444656937283e-07, + "loss": 0.034, + "step": 123825 + }, + { + "epoch": 5.78, + "learning_rate": 7.530606806389416e-07, + "loss": 0.0539, + "step": 123830 + }, + { + "epoch": 5.78, + "learning_rate": 7.52276895584155e-07, + "loss": 0.103, + "step": 123835 + }, + { + "epoch": 5.78, + "learning_rate": 7.514931105293685e-07, + "loss": 0.0665, + "step": 123840 + }, + { + "epoch": 5.78, + "learning_rate": 7.507093254745819e-07, + "loss": 0.0891, + "step": 123845 + }, + { + "epoch": 5.78, + "learning_rate": 7.499255404197954e-07, + "loss": 0.1351, + "step": 123850 + }, + { + "epoch": 5.78, + "learning_rate": 7.491417553650087e-07, + "loss": 0.2894, + "step": 123855 + }, + { + "epoch": 5.78, + "learning_rate": 7.483579703102221e-07, + "loss": 0.1033, + "step": 123860 + }, + { + "epoch": 5.78, + "learning_rate": 7.475741852554356e-07, + "loss": 0.0372, + "step": 123865 + }, + { + "epoch": 5.78, + "learning_rate": 7.46790400200649e-07, + "loss": 0.0294, + "step": 123870 + }, + { + "epoch": 5.78, + "learning_rate": 7.460066151458624e-07, + "loss": 0.0532, + "step": 123875 + }, + { + "epoch": 5.78, + "learning_rate": 7.452228300910759e-07, + "loss": 0.0906, + "step": 123880 + }, + { + "epoch": 5.78, + "learning_rate": 7.444390450362893e-07, + "loss": 0.0519, + "step": 123885 + }, + { + "epoch": 5.78, + "learning_rate": 7.436552599815028e-07, + "loss": 0.0472, + "step": 123890 + }, + { + "epoch": 5.78, + "learning_rate": 7.428714749267162e-07, + "loss": 0.0717, + "step": 123895 + }, + { + "epoch": 5.78, + "learning_rate": 7.420876898719296e-07, + "loss": 0.1929, + "step": 123900 + }, + { + "epoch": 5.78, + "learning_rate": 7.41303904817143e-07, + "loss": 0.2571, + "step": 123905 + }, + { + "epoch": 5.78, + "learning_rate": 7.405201197623564e-07, + "loss": 0.0856, + "step": 123910 + }, + { + "epoch": 5.78, + "learning_rate": 7.397363347075699e-07, + "loss": 0.0409, + "step": 123915 + }, + { + "epoch": 5.78, + "learning_rate": 7.389525496527833e-07, + "loss": 0.0602, + "step": 123920 + }, + { + "epoch": 5.78, + "learning_rate": 7.381687645979966e-07, + "loss": 0.0323, + "step": 123925 + }, + { + "epoch": 5.78, + "learning_rate": 7.373849795432101e-07, + "loss": 0.0883, + "step": 123930 + }, + { + "epoch": 5.78, + "learning_rate": 7.366011944884235e-07, + "loss": 0.0962, + "step": 123935 + }, + { + "epoch": 5.78, + "learning_rate": 7.358174094336369e-07, + "loss": 0.1288, + "step": 123940 + }, + { + "epoch": 5.78, + "learning_rate": 7.350336243788505e-07, + "loss": 0.18, + "step": 123945 + }, + { + "epoch": 5.78, + "learning_rate": 7.342498393240638e-07, + "loss": 0.0779, + "step": 123950 + }, + { + "epoch": 5.78, + "learning_rate": 7.334660542692773e-07, + "loss": 0.2423, + "step": 123955 + }, + { + "epoch": 5.78, + "learning_rate": 7.326822692144907e-07, + "loss": 0.0798, + "step": 123960 + }, + { + "epoch": 5.78, + "learning_rate": 7.318984841597041e-07, + "loss": 0.0255, + "step": 123965 + }, + { + "epoch": 5.78, + "learning_rate": 7.311146991049176e-07, + "loss": 0.0519, + "step": 123970 + }, + { + "epoch": 5.78, + "learning_rate": 7.303309140501309e-07, + "loss": 0.0256, + "step": 123975 + }, + { + "epoch": 5.79, + "learning_rate": 7.295471289953444e-07, + "loss": 0.0642, + "step": 123980 + }, + { + "epoch": 5.79, + "learning_rate": 7.287633439405578e-07, + "loss": 0.0993, + "step": 123985 + }, + { + "epoch": 5.79, + "learning_rate": 7.279795588857712e-07, + "loss": 0.0837, + "step": 123990 + }, + { + "epoch": 5.79, + "learning_rate": 7.271957738309847e-07, + "loss": 0.1022, + "step": 123995 + }, + { + "epoch": 5.79, + "learning_rate": 7.26411988776198e-07, + "loss": 0.1735, + "step": 124000 + }, + { + "epoch": 5.79, + "learning_rate": 7.256282037214114e-07, + "loss": 0.3865, + "step": 124005 + }, + { + "epoch": 5.79, + "learning_rate": 7.248444186666249e-07, + "loss": 0.0915, + "step": 124010 + }, + { + "epoch": 5.79, + "learning_rate": 7.240606336118383e-07, + "loss": 0.0114, + "step": 124015 + }, + { + "epoch": 5.79, + "learning_rate": 7.232768485570519e-07, + "loss": 0.0443, + "step": 124020 + }, + { + "epoch": 5.79, + "learning_rate": 7.224930635022652e-07, + "loss": 0.0167, + "step": 124025 + }, + { + "epoch": 5.79, + "learning_rate": 7.217092784474786e-07, + "loss": 0.0592, + "step": 124030 + }, + { + "epoch": 5.79, + "learning_rate": 7.209254933926921e-07, + "loss": 0.0328, + "step": 124035 + }, + { + "epoch": 5.79, + "learning_rate": 7.201417083379055e-07, + "loss": 0.0384, + "step": 124040 + }, + { + "epoch": 5.79, + "learning_rate": 7.19357923283119e-07, + "loss": 0.0736, + "step": 124045 + }, + { + "epoch": 5.79, + "learning_rate": 7.185741382283323e-07, + "loss": 0.172, + "step": 124050 + }, + { + "epoch": 5.79, + "learning_rate": 7.177903531735457e-07, + "loss": 0.217, + "step": 124055 + }, + { + "epoch": 5.79, + "learning_rate": 7.170065681187592e-07, + "loss": 0.0865, + "step": 124060 + }, + { + "epoch": 5.79, + "learning_rate": 7.162227830639726e-07, + "loss": 0.0343, + "step": 124065 + }, + { + "epoch": 5.79, + "learning_rate": 7.154389980091859e-07, + "loss": 0.0263, + "step": 124070 + }, + { + "epoch": 5.79, + "learning_rate": 7.146552129543994e-07, + "loss": 0.0296, + "step": 124075 + }, + { + "epoch": 5.79, + "learning_rate": 7.138714278996128e-07, + "loss": 0.0766, + "step": 124080 + }, + { + "epoch": 5.79, + "learning_rate": 7.130876428448264e-07, + "loss": 0.0366, + "step": 124085 + }, + { + "epoch": 5.79, + "learning_rate": 7.123038577900398e-07, + "loss": 0.1031, + "step": 124090 + }, + { + "epoch": 5.79, + "learning_rate": 7.115200727352531e-07, + "loss": 0.1588, + "step": 124095 + }, + { + "epoch": 5.79, + "learning_rate": 7.107362876804666e-07, + "loss": 0.2584, + "step": 124100 + }, + { + "epoch": 5.79, + "learning_rate": 7.0995250262568e-07, + "loss": 0.2363, + "step": 124105 + }, + { + "epoch": 5.79, + "learning_rate": 7.091687175708935e-07, + "loss": 0.0848, + "step": 124110 + }, + { + "epoch": 5.79, + "learning_rate": 7.083849325161069e-07, + "loss": 0.0214, + "step": 124115 + }, + { + "epoch": 5.79, + "learning_rate": 7.076011474613202e-07, + "loss": 0.0604, + "step": 124120 + }, + { + "epoch": 5.79, + "learning_rate": 7.068173624065337e-07, + "loss": 0.0537, + "step": 124125 + }, + { + "epoch": 5.79, + "learning_rate": 7.060335773517471e-07, + "loss": 0.0966, + "step": 124130 + }, + { + "epoch": 5.79, + "learning_rate": 7.052497922969605e-07, + "loss": 0.0425, + "step": 124135 + }, + { + "epoch": 5.79, + "learning_rate": 7.04466007242174e-07, + "loss": 0.0875, + "step": 124140 + }, + { + "epoch": 5.79, + "learning_rate": 7.036822221873873e-07, + "loss": 0.1051, + "step": 124145 + }, + { + "epoch": 5.79, + "learning_rate": 7.028984371326009e-07, + "loss": 0.0774, + "step": 124150 + }, + { + "epoch": 5.79, + "learning_rate": 7.021146520778142e-07, + "loss": 0.2896, + "step": 124155 + }, + { + "epoch": 5.79, + "learning_rate": 7.013308670230276e-07, + "loss": 0.0778, + "step": 124160 + }, + { + "epoch": 5.79, + "learning_rate": 7.005470819682411e-07, + "loss": 0.0508, + "step": 124165 + }, + { + "epoch": 5.79, + "learning_rate": 6.997632969134545e-07, + "loss": 0.0206, + "step": 124170 + }, + { + "epoch": 5.79, + "learning_rate": 6.98979511858668e-07, + "loss": 0.0329, + "step": 124175 + }, + { + "epoch": 5.79, + "learning_rate": 6.981957268038814e-07, + "loss": 0.0736, + "step": 124180 + }, + { + "epoch": 5.79, + "learning_rate": 6.974119417490948e-07, + "loss": 0.0544, + "step": 124185 + }, + { + "epoch": 5.79, + "learning_rate": 6.966281566943082e-07, + "loss": 0.0876, + "step": 124190 + }, + { + "epoch": 5.8, + "learning_rate": 6.958443716395216e-07, + "loss": 0.1214, + "step": 124195 + }, + { + "epoch": 5.8, + "learning_rate": 6.95060586584735e-07, + "loss": 0.0945, + "step": 124200 + }, + { + "epoch": 5.8, + "learning_rate": 6.942768015299485e-07, + "loss": 0.3975, + "step": 124205 + }, + { + "epoch": 5.8, + "learning_rate": 6.934930164751619e-07, + "loss": 0.1205, + "step": 124210 + }, + { + "epoch": 5.8, + "learning_rate": 6.927092314203753e-07, + "loss": 0.0409, + "step": 124215 + }, + { + "epoch": 5.8, + "learning_rate": 6.919254463655887e-07, + "loss": 0.0128, + "step": 124220 + }, + { + "epoch": 5.8, + "learning_rate": 6.911416613108021e-07, + "loss": 0.0086, + "step": 124225 + }, + { + "epoch": 5.8, + "learning_rate": 6.903578762560157e-07, + "loss": 0.041, + "step": 124230 + }, + { + "epoch": 5.8, + "learning_rate": 6.89574091201229e-07, + "loss": 0.0356, + "step": 124235 + }, + { + "epoch": 5.8, + "learning_rate": 6.887903061464425e-07, + "loss": 0.1066, + "step": 124240 + }, + { + "epoch": 5.8, + "learning_rate": 6.880065210916559e-07, + "loss": 0.0992, + "step": 124245 + }, + { + "epoch": 5.8, + "learning_rate": 6.872227360368693e-07, + "loss": 0.169, + "step": 124250 + }, + { + "epoch": 5.8, + "learning_rate": 6.864389509820828e-07, + "loss": 0.2472, + "step": 124255 + }, + { + "epoch": 5.8, + "learning_rate": 6.856551659272961e-07, + "loss": 0.0998, + "step": 124260 + }, + { + "epoch": 5.8, + "learning_rate": 6.848713808725095e-07, + "loss": 0.0258, + "step": 124265 + }, + { + "epoch": 5.8, + "learning_rate": 6.84087595817723e-07, + "loss": 0.0269, + "step": 124270 + }, + { + "epoch": 5.8, + "learning_rate": 6.833038107629364e-07, + "loss": 0.0342, + "step": 124275 + }, + { + "epoch": 5.8, + "learning_rate": 6.825200257081499e-07, + "loss": 0.0543, + "step": 124280 + }, + { + "epoch": 5.8, + "learning_rate": 6.817362406533632e-07, + "loss": 0.0642, + "step": 124285 + }, + { + "epoch": 5.8, + "learning_rate": 6.809524555985766e-07, + "loss": 0.0855, + "step": 124290 + }, + { + "epoch": 5.8, + "learning_rate": 6.801686705437902e-07, + "loss": 0.1233, + "step": 124295 + }, + { + "epoch": 5.8, + "learning_rate": 6.793848854890035e-07, + "loss": 0.0949, + "step": 124300 + }, + { + "epoch": 5.8, + "learning_rate": 6.786011004342171e-07, + "loss": 0.2795, + "step": 124305 + }, + { + "epoch": 5.8, + "learning_rate": 6.778173153794304e-07, + "loss": 0.1434, + "step": 124310 + }, + { + "epoch": 5.8, + "learning_rate": 6.770335303246438e-07, + "loss": 0.0067, + "step": 124315 + }, + { + "epoch": 5.8, + "learning_rate": 6.762497452698573e-07, + "loss": 0.0562, + "step": 124320 + }, + { + "epoch": 5.8, + "learning_rate": 6.754659602150707e-07, + "loss": 0.029, + "step": 124325 + }, + { + "epoch": 5.8, + "learning_rate": 6.74682175160284e-07, + "loss": 0.0515, + "step": 124330 + }, + { + "epoch": 5.8, + "learning_rate": 6.738983901054975e-07, + "loss": 0.0962, + "step": 124335 + }, + { + "epoch": 5.8, + "learning_rate": 6.731146050507109e-07, + "loss": 0.0843, + "step": 124340 + }, + { + "epoch": 5.8, + "learning_rate": 6.723308199959244e-07, + "loss": 0.122, + "step": 124345 + }, + { + "epoch": 5.8, + "learning_rate": 6.715470349411378e-07, + "loss": 0.1206, + "step": 124350 + }, + { + "epoch": 5.8, + "learning_rate": 6.707632498863511e-07, + "loss": 0.3559, + "step": 124355 + }, + { + "epoch": 5.8, + "learning_rate": 6.699794648315646e-07, + "loss": 0.1165, + "step": 124360 + }, + { + "epoch": 5.8, + "learning_rate": 6.69195679776778e-07, + "loss": 0.0147, + "step": 124365 + }, + { + "epoch": 5.8, + "learning_rate": 6.684118947219916e-07, + "loss": 0.0265, + "step": 124370 + }, + { + "epoch": 5.8, + "learning_rate": 6.67628109667205e-07, + "loss": 0.0386, + "step": 124375 + }, + { + "epoch": 5.8, + "learning_rate": 6.668443246124183e-07, + "loss": 0.0408, + "step": 124380 + }, + { + "epoch": 5.8, + "learning_rate": 6.660605395576318e-07, + "loss": 0.0604, + "step": 124385 + }, + { + "epoch": 5.8, + "learning_rate": 6.652767545028452e-07, + "loss": 0.0364, + "step": 124390 + }, + { + "epoch": 5.8, + "learning_rate": 6.644929694480586e-07, + "loss": 0.0698, + "step": 124395 + }, + { + "epoch": 5.8, + "learning_rate": 6.637091843932721e-07, + "loss": 0.0668, + "step": 124400 + }, + { + "epoch": 5.8, + "learning_rate": 6.629253993384854e-07, + "loss": 0.2191, + "step": 124405 + }, + { + "epoch": 5.81, + "learning_rate": 6.621416142836989e-07, + "loss": 0.0953, + "step": 124410 + }, + { + "epoch": 5.81, + "learning_rate": 6.613578292289123e-07, + "loss": 0.0472, + "step": 124415 + }, + { + "epoch": 5.81, + "learning_rate": 6.605740441741257e-07, + "loss": 0.0161, + "step": 124420 + }, + { + "epoch": 5.81, + "learning_rate": 6.597902591193392e-07, + "loss": 0.0503, + "step": 124425 + }, + { + "epoch": 5.81, + "learning_rate": 6.590064740645525e-07, + "loss": 0.0921, + "step": 124430 + }, + { + "epoch": 5.81, + "learning_rate": 6.582226890097661e-07, + "loss": 0.0522, + "step": 124435 + }, + { + "epoch": 5.81, + "learning_rate": 6.574389039549795e-07, + "loss": 0.0843, + "step": 124440 + }, + { + "epoch": 5.81, + "learning_rate": 6.566551189001928e-07, + "loss": 0.0834, + "step": 124445 + }, + { + "epoch": 5.81, + "learning_rate": 6.558713338454064e-07, + "loss": 0.0994, + "step": 124450 + }, + { + "epoch": 5.81, + "learning_rate": 6.550875487906197e-07, + "loss": 0.1865, + "step": 124455 + }, + { + "epoch": 5.81, + "learning_rate": 6.543037637358331e-07, + "loss": 0.0679, + "step": 124460 + }, + { + "epoch": 5.81, + "learning_rate": 6.535199786810466e-07, + "loss": 0.051, + "step": 124465 + }, + { + "epoch": 5.81, + "learning_rate": 6.5273619362626e-07, + "loss": 0.0196, + "step": 124470 + }, + { + "epoch": 5.81, + "learning_rate": 6.519524085714734e-07, + "loss": 0.0648, + "step": 124475 + }, + { + "epoch": 5.81, + "learning_rate": 6.511686235166868e-07, + "loss": 0.0549, + "step": 124480 + }, + { + "epoch": 5.81, + "learning_rate": 6.503848384619002e-07, + "loss": 0.0333, + "step": 124485 + }, + { + "epoch": 5.81, + "learning_rate": 6.496010534071137e-07, + "loss": 0.0495, + "step": 124490 + }, + { + "epoch": 5.81, + "learning_rate": 6.488172683523271e-07, + "loss": 0.0736, + "step": 124495 + }, + { + "epoch": 5.81, + "learning_rate": 6.480334832975406e-07, + "loss": 0.1036, + "step": 124500 + }, + { + "epoch": 5.81, + "learning_rate": 6.472496982427539e-07, + "loss": 0.1825, + "step": 124505 + }, + { + "epoch": 5.81, + "learning_rate": 6.464659131879673e-07, + "loss": 0.093, + "step": 124510 + }, + { + "epoch": 5.81, + "learning_rate": 6.456821281331809e-07, + "loss": 0.0152, + "step": 124515 + }, + { + "epoch": 5.81, + "learning_rate": 6.448983430783943e-07, + "loss": 0.0292, + "step": 124520 + }, + { + "epoch": 5.81, + "learning_rate": 6.441145580236077e-07, + "loss": 0.0474, + "step": 124525 + }, + { + "epoch": 5.81, + "learning_rate": 6.433307729688211e-07, + "loss": 0.0589, + "step": 124530 + }, + { + "epoch": 5.81, + "learning_rate": 6.425469879140345e-07, + "loss": 0.0341, + "step": 124535 + }, + { + "epoch": 5.81, + "learning_rate": 6.41763202859248e-07, + "loss": 0.0813, + "step": 124540 + }, + { + "epoch": 5.81, + "learning_rate": 6.409794178044614e-07, + "loss": 0.0954, + "step": 124545 + }, + { + "epoch": 5.81, + "learning_rate": 6.401956327496747e-07, + "loss": 0.1325, + "step": 124550 + }, + { + "epoch": 5.81, + "learning_rate": 6.394118476948882e-07, + "loss": 0.2702, + "step": 124555 + }, + { + "epoch": 5.81, + "learning_rate": 6.386280626401016e-07, + "loss": 0.1172, + "step": 124560 + }, + { + "epoch": 5.81, + "learning_rate": 6.378442775853151e-07, + "loss": 0.0202, + "step": 124565 + }, + { + "epoch": 5.81, + "learning_rate": 6.370604925305284e-07, + "loss": 0.0111, + "step": 124570 + }, + { + "epoch": 5.81, + "learning_rate": 6.362767074757418e-07, + "loss": 0.0369, + "step": 124575 + }, + { + "epoch": 5.81, + "learning_rate": 6.354929224209554e-07, + "loss": 0.0727, + "step": 124580 + }, + { + "epoch": 5.81, + "learning_rate": 6.347091373661688e-07, + "loss": 0.0661, + "step": 124585 + }, + { + "epoch": 5.81, + "learning_rate": 6.339253523113823e-07, + "loss": 0.0784, + "step": 124590 + }, + { + "epoch": 5.81, + "learning_rate": 6.331415672565956e-07, + "loss": 0.1316, + "step": 124595 + }, + { + "epoch": 5.81, + "learning_rate": 6.32357782201809e-07, + "loss": 0.1371, + "step": 124600 + }, + { + "epoch": 5.81, + "learning_rate": 6.315739971470225e-07, + "loss": 0.2319, + "step": 124605 + }, + { + "epoch": 5.81, + "learning_rate": 6.307902120922359e-07, + "loss": 0.0474, + "step": 124610 + }, + { + "epoch": 5.81, + "learning_rate": 6.300064270374493e-07, + "loss": 0.0125, + "step": 124615 + }, + { + "epoch": 5.81, + "learning_rate": 6.292226419826627e-07, + "loss": 0.0267, + "step": 124620 + }, + { + "epoch": 5.82, + "learning_rate": 6.284388569278761e-07, + "loss": 0.0286, + "step": 124625 + }, + { + "epoch": 5.82, + "learning_rate": 6.276550718730896e-07, + "loss": 0.0598, + "step": 124630 + }, + { + "epoch": 5.82, + "learning_rate": 6.26871286818303e-07, + "loss": 0.0721, + "step": 124635 + }, + { + "epoch": 5.82, + "learning_rate": 6.260875017635163e-07, + "loss": 0.0251, + "step": 124640 + }, + { + "epoch": 5.82, + "learning_rate": 6.253037167087299e-07, + "loss": 0.0819, + "step": 124645 + }, + { + "epoch": 5.82, + "learning_rate": 6.245199316539432e-07, + "loss": 0.154, + "step": 124650 + }, + { + "epoch": 5.82, + "learning_rate": 6.237361465991567e-07, + "loss": 0.2428, + "step": 124655 + }, + { + "epoch": 5.82, + "learning_rate": 6.229523615443702e-07, + "loss": 0.0573, + "step": 124660 + }, + { + "epoch": 5.82, + "learning_rate": 6.221685764895835e-07, + "loss": 0.0418, + "step": 124665 + }, + { + "epoch": 5.82, + "learning_rate": 6.213847914347969e-07, + "loss": 0.0223, + "step": 124670 + }, + { + "epoch": 5.82, + "learning_rate": 6.206010063800104e-07, + "loss": 0.0566, + "step": 124675 + }, + { + "epoch": 5.82, + "learning_rate": 6.198172213252238e-07, + "loss": 0.0485, + "step": 124680 + }, + { + "epoch": 5.82, + "learning_rate": 6.190334362704373e-07, + "loss": 0.0479, + "step": 124685 + }, + { + "epoch": 5.82, + "learning_rate": 6.182496512156506e-07, + "loss": 0.156, + "step": 124690 + }, + { + "epoch": 5.82, + "learning_rate": 6.174658661608641e-07, + "loss": 0.0955, + "step": 124695 + }, + { + "epoch": 5.82, + "learning_rate": 6.166820811060775e-07, + "loss": 0.1148, + "step": 124700 + }, + { + "epoch": 5.82, + "learning_rate": 6.15898296051291e-07, + "loss": 0.1755, + "step": 124705 + }, + { + "epoch": 5.82, + "learning_rate": 6.151145109965044e-07, + "loss": 0.0931, + "step": 124710 + }, + { + "epoch": 5.82, + "learning_rate": 6.143307259417177e-07, + "loss": 0.0317, + "step": 124715 + }, + { + "epoch": 5.82, + "learning_rate": 6.135469408869312e-07, + "loss": 0.0507, + "step": 124720 + }, + { + "epoch": 5.82, + "learning_rate": 6.127631558321447e-07, + "loss": 0.0426, + "step": 124725 + }, + { + "epoch": 5.82, + "learning_rate": 6.119793707773581e-07, + "loss": 0.0811, + "step": 124730 + }, + { + "epoch": 5.82, + "learning_rate": 6.111955857225715e-07, + "loss": 0.0507, + "step": 124735 + }, + { + "epoch": 5.82, + "learning_rate": 6.104118006677849e-07, + "loss": 0.1093, + "step": 124740 + }, + { + "epoch": 5.82, + "learning_rate": 6.096280156129983e-07, + "loss": 0.1305, + "step": 124745 + }, + { + "epoch": 5.82, + "learning_rate": 6.088442305582118e-07, + "loss": 0.0911, + "step": 124750 + }, + { + "epoch": 5.82, + "learning_rate": 6.080604455034252e-07, + "loss": 0.1897, + "step": 124755 + }, + { + "epoch": 5.82, + "learning_rate": 6.072766604486387e-07, + "loss": 0.1614, + "step": 124760 + }, + { + "epoch": 5.82, + "learning_rate": 6.06492875393852e-07, + "loss": 0.0269, + "step": 124765 + }, + { + "epoch": 5.82, + "learning_rate": 6.057090903390655e-07, + "loss": 0.0155, + "step": 124770 + }, + { + "epoch": 5.82, + "learning_rate": 6.049253052842789e-07, + "loss": 0.0393, + "step": 124775 + }, + { + "epoch": 5.82, + "learning_rate": 6.041415202294923e-07, + "loss": 0.0532, + "step": 124780 + }, + { + "epoch": 5.82, + "learning_rate": 6.033577351747057e-07, + "loss": 0.0301, + "step": 124785 + }, + { + "epoch": 5.82, + "learning_rate": 6.025739501199192e-07, + "loss": 0.0772, + "step": 124790 + }, + { + "epoch": 5.82, + "learning_rate": 6.017901650651326e-07, + "loss": 0.0446, + "step": 124795 + }, + { + "epoch": 5.82, + "learning_rate": 6.01006380010346e-07, + "loss": 0.1685, + "step": 124800 + }, + { + "epoch": 5.82, + "learning_rate": 6.002225949555595e-07, + "loss": 0.1671, + "step": 124805 + }, + { + "epoch": 5.82, + "learning_rate": 5.994388099007728e-07, + "loss": 0.1625, + "step": 124810 + }, + { + "epoch": 5.82, + "learning_rate": 5.986550248459863e-07, + "loss": 0.0503, + "step": 124815 + }, + { + "epoch": 5.82, + "learning_rate": 5.978712397911997e-07, + "loss": 0.0563, + "step": 124820 + }, + { + "epoch": 5.82, + "learning_rate": 5.970874547364131e-07, + "loss": 0.0963, + "step": 124825 + }, + { + "epoch": 5.82, + "learning_rate": 5.963036696816266e-07, + "loss": 0.0487, + "step": 124830 + }, + { + "epoch": 5.82, + "learning_rate": 5.9551988462684e-07, + "loss": 0.0124, + "step": 124835 + }, + { + "epoch": 5.83, + "learning_rate": 5.947360995720534e-07, + "loss": 0.0291, + "step": 124840 + }, + { + "epoch": 5.83, + "learning_rate": 5.939523145172668e-07, + "loss": 0.1077, + "step": 124845 + }, + { + "epoch": 5.83, + "learning_rate": 5.931685294624803e-07, + "loss": 0.1676, + "step": 124850 + }, + { + "epoch": 5.83, + "learning_rate": 5.923847444076937e-07, + "loss": 0.3512, + "step": 124855 + }, + { + "epoch": 5.83, + "learning_rate": 5.916009593529071e-07, + "loss": 0.1101, + "step": 124860 + }, + { + "epoch": 5.83, + "learning_rate": 5.908171742981205e-07, + "loss": 0.0183, + "step": 124865 + }, + { + "epoch": 5.83, + "learning_rate": 5.90033389243334e-07, + "loss": 0.0091, + "step": 124870 + }, + { + "epoch": 5.83, + "learning_rate": 5.892496041885474e-07, + "loss": 0.0154, + "step": 124875 + }, + { + "epoch": 5.83, + "learning_rate": 5.884658191337609e-07, + "loss": 0.0644, + "step": 124880 + }, + { + "epoch": 5.83, + "learning_rate": 5.876820340789742e-07, + "loss": 0.056, + "step": 124885 + }, + { + "epoch": 5.83, + "learning_rate": 5.868982490241876e-07, + "loss": 0.0773, + "step": 124890 + }, + { + "epoch": 5.83, + "learning_rate": 5.861144639694011e-07, + "loss": 0.114, + "step": 124895 + }, + { + "epoch": 5.83, + "learning_rate": 5.853306789146146e-07, + "loss": 0.1437, + "step": 124900 + }, + { + "epoch": 5.83, + "learning_rate": 5.845468938598279e-07, + "loss": 0.2537, + "step": 124905 + }, + { + "epoch": 5.83, + "learning_rate": 5.837631088050413e-07, + "loss": 0.1259, + "step": 124910 + }, + { + "epoch": 5.83, + "learning_rate": 5.829793237502548e-07, + "loss": 0.0075, + "step": 124915 + }, + { + "epoch": 5.83, + "learning_rate": 5.821955386954682e-07, + "loss": 0.0163, + "step": 124920 + }, + { + "epoch": 5.83, + "learning_rate": 5.814117536406817e-07, + "loss": 0.0225, + "step": 124925 + }, + { + "epoch": 5.83, + "learning_rate": 5.80627968585895e-07, + "loss": 0.0736, + "step": 124930 + }, + { + "epoch": 5.83, + "learning_rate": 5.798441835311085e-07, + "loss": 0.0398, + "step": 124935 + }, + { + "epoch": 5.83, + "learning_rate": 5.790603984763219e-07, + "loss": 0.0911, + "step": 124940 + }, + { + "epoch": 5.83, + "learning_rate": 5.782766134215354e-07, + "loss": 0.0495, + "step": 124945 + }, + { + "epoch": 5.83, + "learning_rate": 5.774928283667488e-07, + "loss": 0.1378, + "step": 124950 + }, + { + "epoch": 5.83, + "learning_rate": 5.767090433119621e-07, + "loss": 0.2817, + "step": 124955 + }, + { + "epoch": 5.83, + "learning_rate": 5.759252582571756e-07, + "loss": 0.0973, + "step": 124960 + }, + { + "epoch": 5.83, + "learning_rate": 5.75141473202389e-07, + "loss": 0.0411, + "step": 124965 + }, + { + "epoch": 5.83, + "learning_rate": 5.743576881476025e-07, + "loss": 0.0275, + "step": 124970 + }, + { + "epoch": 5.83, + "learning_rate": 5.735739030928158e-07, + "loss": 0.0268, + "step": 124975 + }, + { + "epoch": 5.83, + "learning_rate": 5.727901180380293e-07, + "loss": 0.0438, + "step": 124980 + }, + { + "epoch": 5.83, + "learning_rate": 5.720063329832427e-07, + "loss": 0.0901, + "step": 124985 + }, + { + "epoch": 5.83, + "learning_rate": 5.712225479284562e-07, + "loss": 0.0585, + "step": 124990 + }, + { + "epoch": 5.83, + "learning_rate": 5.704387628736696e-07, + "loss": 0.0485, + "step": 124995 + }, + { + "epoch": 5.83, + "learning_rate": 5.696549778188829e-07, + "loss": 0.1229, + "step": 125000 + }, + { + "epoch": 5.83, + "learning_rate": 5.688711927640964e-07, + "loss": 0.231, + "step": 125005 + }, + { + "epoch": 5.83, + "learning_rate": 5.680874077093099e-07, + "loss": 0.1177, + "step": 125010 + }, + { + "epoch": 5.83, + "learning_rate": 5.673036226545233e-07, + "loss": 0.0348, + "step": 125015 + }, + { + "epoch": 5.83, + "learning_rate": 5.665198375997367e-07, + "loss": 0.0177, + "step": 125020 + }, + { + "epoch": 5.83, + "learning_rate": 5.657360525449501e-07, + "loss": 0.0381, + "step": 125025 + }, + { + "epoch": 5.83, + "learning_rate": 5.649522674901635e-07, + "loss": 0.0895, + "step": 125030 + }, + { + "epoch": 5.83, + "learning_rate": 5.64168482435377e-07, + "loss": 0.0573, + "step": 125035 + }, + { + "epoch": 5.83, + "learning_rate": 5.633846973805904e-07, + "loss": 0.0769, + "step": 125040 + }, + { + "epoch": 5.83, + "learning_rate": 5.626009123258039e-07, + "loss": 0.0875, + "step": 125045 + }, + { + "epoch": 5.84, + "learning_rate": 5.618171272710172e-07, + "loss": 0.1346, + "step": 125050 + }, + { + "epoch": 5.84, + "learning_rate": 5.610333422162307e-07, + "loss": 0.2877, + "step": 125055 + }, + { + "epoch": 5.84, + "learning_rate": 5.602495571614441e-07, + "loss": 0.0832, + "step": 125060 + }, + { + "epoch": 5.84, + "learning_rate": 5.594657721066575e-07, + "loss": 0.0289, + "step": 125065 + }, + { + "epoch": 5.84, + "learning_rate": 5.58681987051871e-07, + "loss": 0.0562, + "step": 125070 + }, + { + "epoch": 5.84, + "learning_rate": 5.578982019970844e-07, + "loss": 0.0122, + "step": 125075 + }, + { + "epoch": 5.84, + "learning_rate": 5.571144169422978e-07, + "loss": 0.0484, + "step": 125080 + }, + { + "epoch": 5.84, + "learning_rate": 5.563306318875112e-07, + "loss": 0.0585, + "step": 125085 + }, + { + "epoch": 5.84, + "learning_rate": 5.555468468327247e-07, + "loss": 0.0953, + "step": 125090 + }, + { + "epoch": 5.84, + "learning_rate": 5.54763061777938e-07, + "loss": 0.1156, + "step": 125095 + }, + { + "epoch": 5.84, + "learning_rate": 5.539792767231515e-07, + "loss": 0.0746, + "step": 125100 + }, + { + "epoch": 5.84, + "learning_rate": 5.531954916683649e-07, + "loss": 0.2925, + "step": 125105 + }, + { + "epoch": 5.84, + "learning_rate": 5.524117066135783e-07, + "loss": 0.1169, + "step": 125110 + }, + { + "epoch": 5.84, + "learning_rate": 5.516279215587918e-07, + "loss": 0.042, + "step": 125115 + }, + { + "epoch": 5.84, + "learning_rate": 5.508441365040052e-07, + "loss": 0.021, + "step": 125120 + }, + { + "epoch": 5.84, + "learning_rate": 5.500603514492186e-07, + "loss": 0.0465, + "step": 125125 + }, + { + "epoch": 5.84, + "learning_rate": 5.49276566394432e-07, + "loss": 0.0523, + "step": 125130 + }, + { + "epoch": 5.84, + "learning_rate": 5.484927813396455e-07, + "loss": 0.0316, + "step": 125135 + }, + { + "epoch": 5.84, + "learning_rate": 5.477089962848589e-07, + "loss": 0.0717, + "step": 125140 + }, + { + "epoch": 5.84, + "learning_rate": 5.469252112300722e-07, + "loss": 0.0685, + "step": 125145 + }, + { + "epoch": 5.84, + "learning_rate": 5.461414261752857e-07, + "loss": 0.1428, + "step": 125150 + }, + { + "epoch": 5.84, + "learning_rate": 5.453576411204992e-07, + "loss": 0.2507, + "step": 125155 + }, + { + "epoch": 5.84, + "learning_rate": 5.445738560657126e-07, + "loss": 0.0666, + "step": 125160 + }, + { + "epoch": 5.84, + "learning_rate": 5.437900710109261e-07, + "loss": 0.0296, + "step": 125165 + }, + { + "epoch": 5.84, + "learning_rate": 5.430062859561394e-07, + "loss": 0.0207, + "step": 125170 + }, + { + "epoch": 5.84, + "learning_rate": 5.422225009013528e-07, + "loss": 0.023, + "step": 125175 + }, + { + "epoch": 5.84, + "learning_rate": 5.414387158465663e-07, + "loss": 0.0138, + "step": 125180 + }, + { + "epoch": 5.84, + "learning_rate": 5.406549307917798e-07, + "loss": 0.0948, + "step": 125185 + }, + { + "epoch": 5.84, + "learning_rate": 5.398711457369932e-07, + "loss": 0.2338, + "step": 125190 + }, + { + "epoch": 5.84, + "learning_rate": 5.390873606822065e-07, + "loss": 0.082, + "step": 125195 + }, + { + "epoch": 5.84, + "learning_rate": 5.3830357562742e-07, + "loss": 0.1887, + "step": 125200 + }, + { + "epoch": 5.84, + "learning_rate": 5.375197905726334e-07, + "loss": 0.3003, + "step": 125205 + }, + { + "epoch": 5.84, + "learning_rate": 5.367360055178468e-07, + "loss": 0.0713, + "step": 125210 + }, + { + "epoch": 5.84, + "learning_rate": 5.359522204630602e-07, + "loss": 0.0396, + "step": 125215 + }, + { + "epoch": 5.84, + "learning_rate": 5.351684354082737e-07, + "loss": 0.0123, + "step": 125220 + }, + { + "epoch": 5.84, + "learning_rate": 5.343846503534871e-07, + "loss": 0.0577, + "step": 125225 + }, + { + "epoch": 5.84, + "learning_rate": 5.336008652987006e-07, + "loss": 0.0919, + "step": 125230 + }, + { + "epoch": 5.84, + "learning_rate": 5.32817080243914e-07, + "loss": 0.0644, + "step": 125235 + }, + { + "epoch": 5.84, + "learning_rate": 5.320332951891273e-07, + "loss": 0.0383, + "step": 125240 + }, + { + "epoch": 5.84, + "learning_rate": 5.312495101343408e-07, + "loss": 0.0422, + "step": 125245 + }, + { + "epoch": 5.84, + "learning_rate": 5.304657250795543e-07, + "loss": 0.1549, + "step": 125250 + }, + { + "epoch": 5.84, + "learning_rate": 5.296819400247676e-07, + "loss": 0.3916, + "step": 125255 + }, + { + "epoch": 5.84, + "learning_rate": 5.288981549699811e-07, + "loss": 0.0758, + "step": 125260 + }, + { + "epoch": 5.85, + "learning_rate": 5.281143699151945e-07, + "loss": 0.0225, + "step": 125265 + }, + { + "epoch": 5.85, + "learning_rate": 5.273305848604079e-07, + "loss": 0.0239, + "step": 125270 + }, + { + "epoch": 5.85, + "learning_rate": 5.265467998056213e-07, + "loss": 0.0507, + "step": 125275 + }, + { + "epoch": 5.85, + "learning_rate": 5.257630147508348e-07, + "loss": 0.0369, + "step": 125280 + }, + { + "epoch": 5.85, + "learning_rate": 5.249792296960481e-07, + "loss": 0.0399, + "step": 125285 + }, + { + "epoch": 5.85, + "learning_rate": 5.241954446412616e-07, + "loss": 0.078, + "step": 125290 + }, + { + "epoch": 5.85, + "learning_rate": 5.234116595864751e-07, + "loss": 0.0975, + "step": 125295 + }, + { + "epoch": 5.85, + "learning_rate": 5.227846315426458e-07, + "loss": 0.1356, + "step": 125300 + }, + { + "epoch": 5.85, + "learning_rate": 5.220008464878591e-07, + "loss": 0.2313, + "step": 125305 + }, + { + "epoch": 5.85, + "learning_rate": 5.212170614330726e-07, + "loss": 0.0892, + "step": 125310 + }, + { + "epoch": 5.85, + "learning_rate": 5.204332763782861e-07, + "loss": 0.0299, + "step": 125315 + }, + { + "epoch": 5.85, + "learning_rate": 5.196494913234995e-07, + "loss": 0.0341, + "step": 125320 + }, + { + "epoch": 5.85, + "learning_rate": 5.188657062687129e-07, + "loss": 0.064, + "step": 125325 + }, + { + "epoch": 5.85, + "learning_rate": 5.180819212139263e-07, + "loss": 0.0212, + "step": 125330 + }, + { + "epoch": 5.85, + "learning_rate": 5.172981361591397e-07, + "loss": 0.1037, + "step": 125335 + }, + { + "epoch": 5.85, + "learning_rate": 5.165143511043532e-07, + "loss": 0.0772, + "step": 125340 + }, + { + "epoch": 5.85, + "learning_rate": 5.157305660495666e-07, + "loss": 0.0434, + "step": 125345 + }, + { + "epoch": 5.85, + "learning_rate": 5.149467809947801e-07, + "loss": 0.1951, + "step": 125350 + }, + { + "epoch": 5.85, + "learning_rate": 5.141629959399934e-07, + "loss": 0.3603, + "step": 125355 + }, + { + "epoch": 5.85, + "learning_rate": 5.133792108852069e-07, + "loss": 0.0547, + "step": 125360 + }, + { + "epoch": 5.85, + "learning_rate": 5.125954258304203e-07, + "loss": 0.0418, + "step": 125365 + }, + { + "epoch": 5.85, + "learning_rate": 5.118116407756337e-07, + "loss": 0.034, + "step": 125370 + }, + { + "epoch": 5.85, + "learning_rate": 5.110278557208472e-07, + "loss": 0.0089, + "step": 125375 + }, + { + "epoch": 5.85, + "learning_rate": 5.102440706660606e-07, + "loss": 0.0283, + "step": 125380 + }, + { + "epoch": 5.85, + "learning_rate": 5.09460285611274e-07, + "loss": 0.0423, + "step": 125385 + }, + { + "epoch": 5.85, + "learning_rate": 5.086765005564874e-07, + "loss": 0.0958, + "step": 125390 + }, + { + "epoch": 5.85, + "learning_rate": 5.078927155017009e-07, + "loss": 0.0886, + "step": 125395 + }, + { + "epoch": 5.85, + "learning_rate": 5.071089304469143e-07, + "loss": 0.1682, + "step": 125400 + }, + { + "epoch": 5.85, + "learning_rate": 5.063251453921277e-07, + "loss": 0.3699, + "step": 125405 + }, + { + "epoch": 5.85, + "learning_rate": 5.055413603373411e-07, + "loss": 0.1276, + "step": 125410 + }, + { + "epoch": 5.85, + "learning_rate": 5.047575752825545e-07, + "loss": 0.0123, + "step": 125415 + }, + { + "epoch": 5.85, + "learning_rate": 5.03973790227768e-07, + "loss": 0.0225, + "step": 125420 + }, + { + "epoch": 5.85, + "learning_rate": 5.031900051729815e-07, + "loss": 0.136, + "step": 125425 + }, + { + "epoch": 5.85, + "learning_rate": 5.024062201181948e-07, + "loss": 0.1377, + "step": 125430 + }, + { + "epoch": 5.85, + "learning_rate": 5.016224350634082e-07, + "loss": 0.0364, + "step": 125435 + }, + { + "epoch": 5.85, + "learning_rate": 5.008386500086217e-07, + "loss": 0.1446, + "step": 125440 + }, + { + "epoch": 5.85, + "learning_rate": 5.000548649538351e-07, + "loss": 0.1591, + "step": 125445 + }, + { + "epoch": 5.85, + "learning_rate": 4.992710798990485e-07, + "loss": 0.0772, + "step": 125450 + }, + { + "epoch": 5.85, + "learning_rate": 4.984872948442619e-07, + "loss": 0.258, + "step": 125455 + }, + { + "epoch": 5.85, + "learning_rate": 4.977035097894754e-07, + "loss": 0.0844, + "step": 125460 + }, + { + "epoch": 5.85, + "learning_rate": 4.969197247346888e-07, + "loss": 0.009, + "step": 125465 + }, + { + "epoch": 5.85, + "learning_rate": 4.961359396799023e-07, + "loss": 0.0326, + "step": 125470 + }, + { + "epoch": 5.85, + "learning_rate": 4.953521546251156e-07, + "loss": 0.0315, + "step": 125475 + }, + { + "epoch": 5.86, + "learning_rate": 4.94568369570329e-07, + "loss": 0.0555, + "step": 125480 + }, + { + "epoch": 5.86, + "learning_rate": 4.937845845155425e-07, + "loss": 0.0492, + "step": 125485 + }, + { + "epoch": 5.86, + "learning_rate": 4.93000799460756e-07, + "loss": 0.0715, + "step": 125490 + }, + { + "epoch": 5.86, + "learning_rate": 4.922170144059694e-07, + "loss": 0.0469, + "step": 125495 + }, + { + "epoch": 5.86, + "learning_rate": 4.914332293511827e-07, + "loss": 0.1208, + "step": 125500 + }, + { + "epoch": 5.86, + "learning_rate": 4.906494442963962e-07, + "loss": 0.331, + "step": 125505 + }, + { + "epoch": 5.86, + "learning_rate": 4.898656592416096e-07, + "loss": 0.113, + "step": 125510 + }, + { + "epoch": 5.86, + "learning_rate": 4.890818741868231e-07, + "loss": 0.0317, + "step": 125515 + }, + { + "epoch": 5.86, + "learning_rate": 4.882980891320365e-07, + "loss": 0.0412, + "step": 125520 + }, + { + "epoch": 5.86, + "learning_rate": 4.875143040772499e-07, + "loss": 0.0242, + "step": 125525 + }, + { + "epoch": 5.86, + "learning_rate": 4.867305190224633e-07, + "loss": 0.0251, + "step": 125530 + }, + { + "epoch": 5.86, + "learning_rate": 4.859467339676768e-07, + "loss": 0.015, + "step": 125535 + }, + { + "epoch": 5.86, + "learning_rate": 4.851629489128902e-07, + "loss": 0.0936, + "step": 125540 + }, + { + "epoch": 5.86, + "learning_rate": 4.843791638581035e-07, + "loss": 0.1024, + "step": 125545 + }, + { + "epoch": 5.86, + "learning_rate": 4.83595378803317e-07, + "loss": 0.1067, + "step": 125550 + }, + { + "epoch": 5.86, + "learning_rate": 4.828115937485304e-07, + "loss": 0.2384, + "step": 125555 + }, + { + "epoch": 5.86, + "learning_rate": 4.820278086937439e-07, + "loss": 0.1145, + "step": 125560 + }, + { + "epoch": 5.86, + "learning_rate": 4.812440236389573e-07, + "loss": 0.0384, + "step": 125565 + }, + { + "epoch": 5.86, + "learning_rate": 4.804602385841707e-07, + "loss": 0.0344, + "step": 125570 + }, + { + "epoch": 5.86, + "learning_rate": 4.796764535293841e-07, + "loss": 0.0348, + "step": 125575 + }, + { + "epoch": 5.86, + "learning_rate": 4.788926684745976e-07, + "loss": 0.0475, + "step": 125580 + }, + { + "epoch": 5.86, + "learning_rate": 4.78108883419811e-07, + "loss": 0.0294, + "step": 125585 + }, + { + "epoch": 5.86, + "learning_rate": 4.773250983650244e-07, + "loss": 0.0723, + "step": 125590 + }, + { + "epoch": 5.86, + "learning_rate": 4.7654131331023784e-07, + "loss": 0.1736, + "step": 125595 + }, + { + "epoch": 5.86, + "learning_rate": 4.7575752825545127e-07, + "loss": 0.1118, + "step": 125600 + }, + { + "epoch": 5.86, + "learning_rate": 4.7497374320066475e-07, + "loss": 0.4409, + "step": 125605 + }, + { + "epoch": 5.86, + "learning_rate": 4.7418995814587807e-07, + "loss": 0.0993, + "step": 125610 + }, + { + "epoch": 5.86, + "learning_rate": 4.7340617309109155e-07, + "loss": 0.014, + "step": 125615 + }, + { + "epoch": 5.86, + "learning_rate": 4.72622388036305e-07, + "loss": 0.0324, + "step": 125620 + }, + { + "epoch": 5.86, + "learning_rate": 4.718386029815184e-07, + "loss": 0.0504, + "step": 125625 + }, + { + "epoch": 5.86, + "learning_rate": 4.710548179267318e-07, + "loss": 0.0698, + "step": 125630 + }, + { + "epoch": 5.86, + "learning_rate": 4.702710328719452e-07, + "loss": 0.0542, + "step": 125635 + }, + { + "epoch": 5.86, + "learning_rate": 4.6948724781715865e-07, + "loss": 0.0223, + "step": 125640 + }, + { + "epoch": 5.86, + "learning_rate": 4.6870346276237213e-07, + "loss": 0.1163, + "step": 125645 + }, + { + "epoch": 5.86, + "learning_rate": 4.6791967770758545e-07, + "loss": 0.0806, + "step": 125650 + }, + { + "epoch": 5.86, + "learning_rate": 4.6713589265279894e-07, + "loss": 0.2093, + "step": 125655 + }, + { + "epoch": 5.86, + "learning_rate": 4.6635210759801236e-07, + "loss": 0.1428, + "step": 125660 + }, + { + "epoch": 5.86, + "learning_rate": 4.655683225432258e-07, + "loss": 0.0207, + "step": 125665 + }, + { + "epoch": 5.86, + "learning_rate": 4.647845374884392e-07, + "loss": 0.028, + "step": 125670 + }, + { + "epoch": 5.86, + "learning_rate": 4.640007524336526e-07, + "loss": 0.0224, + "step": 125675 + }, + { + "epoch": 5.86, + "learning_rate": 4.6321696737886603e-07, + "loss": 0.0223, + "step": 125680 + }, + { + "epoch": 5.86, + "learning_rate": 4.624331823240795e-07, + "loss": 0.0524, + "step": 125685 + }, + { + "epoch": 5.86, + "learning_rate": 4.6164939726929294e-07, + "loss": 0.1071, + "step": 125690 + }, + { + "epoch": 5.87, + "learning_rate": 4.608656122145063e-07, + "loss": 0.028, + "step": 125695 + }, + { + "epoch": 5.87, + "learning_rate": 4.6008182715971975e-07, + "loss": 0.1229, + "step": 125700 + }, + { + "epoch": 5.87, + "learning_rate": 4.592980421049332e-07, + "loss": 0.2866, + "step": 125705 + }, + { + "epoch": 5.87, + "learning_rate": 4.585142570501466e-07, + "loss": 0.1069, + "step": 125710 + }, + { + "epoch": 5.87, + "learning_rate": 4.5773047199536e-07, + "loss": 0.0152, + "step": 125715 + }, + { + "epoch": 5.87, + "learning_rate": 4.5694668694057346e-07, + "loss": 0.0321, + "step": 125720 + }, + { + "epoch": 5.87, + "learning_rate": 4.561629018857869e-07, + "loss": 0.0177, + "step": 125725 + }, + { + "epoch": 5.87, + "learning_rate": 4.553791168310003e-07, + "loss": 0.0248, + "step": 125730 + }, + { + "epoch": 5.87, + "learning_rate": 4.5459533177621375e-07, + "loss": 0.0776, + "step": 125735 + }, + { + "epoch": 5.87, + "learning_rate": 4.5381154672142713e-07, + "loss": 0.0242, + "step": 125740 + }, + { + "epoch": 5.87, + "learning_rate": 4.5302776166664056e-07, + "loss": 0.0818, + "step": 125745 + }, + { + "epoch": 5.87, + "learning_rate": 4.5224397661185404e-07, + "loss": 0.1937, + "step": 125750 + }, + { + "epoch": 5.87, + "learning_rate": 4.5146019155706747e-07, + "loss": 0.2767, + "step": 125755 + }, + { + "epoch": 5.87, + "learning_rate": 4.5067640650228085e-07, + "loss": 0.102, + "step": 125760 + }, + { + "epoch": 5.87, + "learning_rate": 4.498926214474943e-07, + "loss": 0.0351, + "step": 125765 + }, + { + "epoch": 5.87, + "learning_rate": 4.491088363927077e-07, + "loss": 0.023, + "step": 125770 + }, + { + "epoch": 5.87, + "learning_rate": 4.4832505133792113e-07, + "loss": 0.0449, + "step": 125775 + }, + { + "epoch": 5.87, + "learning_rate": 4.475412662831345e-07, + "loss": 0.0316, + "step": 125780 + }, + { + "epoch": 5.87, + "learning_rate": 4.4675748122834794e-07, + "loss": 0.0607, + "step": 125785 + }, + { + "epoch": 5.87, + "learning_rate": 4.459736961735614e-07, + "loss": 0.0754, + "step": 125790 + }, + { + "epoch": 5.87, + "learning_rate": 4.4518991111877485e-07, + "loss": 0.0695, + "step": 125795 + }, + { + "epoch": 5.87, + "learning_rate": 4.444061260639883e-07, + "loss": 0.1225, + "step": 125800 + }, + { + "epoch": 5.87, + "learning_rate": 4.4362234100920166e-07, + "loss": 0.3429, + "step": 125805 + }, + { + "epoch": 5.87, + "learning_rate": 4.428385559544151e-07, + "loss": 0.0938, + "step": 125810 + }, + { + "epoch": 5.87, + "learning_rate": 4.420547708996285e-07, + "loss": 0.026, + "step": 125815 + }, + { + "epoch": 5.87, + "learning_rate": 4.41270985844842e-07, + "loss": 0.0462, + "step": 125820 + }, + { + "epoch": 5.87, + "learning_rate": 4.404872007900553e-07, + "loss": 0.0113, + "step": 125825 + }, + { + "epoch": 5.87, + "learning_rate": 4.397034157352688e-07, + "loss": 0.0309, + "step": 125830 + }, + { + "epoch": 5.87, + "learning_rate": 4.3891963068048223e-07, + "loss": 0.0571, + "step": 125835 + }, + { + "epoch": 5.87, + "learning_rate": 4.3813584562569566e-07, + "loss": 0.0266, + "step": 125840 + }, + { + "epoch": 5.87, + "learning_rate": 4.3735206057090904e-07, + "loss": 0.0851, + "step": 125845 + }, + { + "epoch": 5.87, + "learning_rate": 4.3656827551612247e-07, + "loss": 0.1302, + "step": 125850 + }, + { + "epoch": 5.87, + "learning_rate": 4.357844904613359e-07, + "loss": 0.3259, + "step": 125855 + }, + { + "epoch": 5.87, + "learning_rate": 4.350007054065494e-07, + "loss": 0.1361, + "step": 125860 + }, + { + "epoch": 5.87, + "learning_rate": 4.342169203517628e-07, + "loss": 0.0326, + "step": 125865 + }, + { + "epoch": 5.87, + "learning_rate": 4.334331352969762e-07, + "loss": 0.0385, + "step": 125870 + }, + { + "epoch": 5.87, + "learning_rate": 4.326493502421896e-07, + "loss": 0.0556, + "step": 125875 + }, + { + "epoch": 5.87, + "learning_rate": 4.3186556518740304e-07, + "loss": 0.0535, + "step": 125880 + }, + { + "epoch": 5.87, + "learning_rate": 4.3108178013261647e-07, + "loss": 0.0262, + "step": 125885 + }, + { + "epoch": 5.87, + "learning_rate": 4.3029799507782985e-07, + "loss": 0.1555, + "step": 125890 + }, + { + "epoch": 5.87, + "learning_rate": 4.295142100230433e-07, + "loss": 0.0911, + "step": 125895 + }, + { + "epoch": 5.87, + "learning_rate": 4.2873042496825676e-07, + "loss": 0.1903, + "step": 125900 + }, + { + "epoch": 5.87, + "learning_rate": 4.279466399134702e-07, + "loss": 0.3489, + "step": 125905 + }, + { + "epoch": 5.88, + "learning_rate": 4.2716285485868357e-07, + "loss": 0.1316, + "step": 125910 + }, + { + "epoch": 5.88, + "learning_rate": 4.26379069803897e-07, + "loss": 0.0659, + "step": 125915 + }, + { + "epoch": 5.88, + "learning_rate": 4.255952847491104e-07, + "loss": 0.0148, + "step": 125920 + }, + { + "epoch": 5.88, + "learning_rate": 4.2481149969432385e-07, + "loss": 0.0428, + "step": 125925 + }, + { + "epoch": 5.88, + "learning_rate": 4.2402771463953734e-07, + "loss": 0.0199, + "step": 125930 + }, + { + "epoch": 5.88, + "learning_rate": 4.232439295847507e-07, + "loss": 0.074, + "step": 125935 + }, + { + "epoch": 5.88, + "learning_rate": 4.2246014452996414e-07, + "loss": 0.0551, + "step": 125940 + }, + { + "epoch": 5.88, + "learning_rate": 4.2167635947517757e-07, + "loss": 0.0515, + "step": 125945 + }, + { + "epoch": 5.88, + "learning_rate": 4.20892574420391e-07, + "loss": 0.1589, + "step": 125950 + }, + { + "epoch": 5.88, + "learning_rate": 4.201087893656044e-07, + "loss": 0.2368, + "step": 125955 + }, + { + "epoch": 5.88, + "learning_rate": 4.193250043108178e-07, + "loss": 0.1025, + "step": 125960 + }, + { + "epoch": 5.88, + "learning_rate": 4.185412192560313e-07, + "loss": 0.0012, + "step": 125965 + }, + { + "epoch": 5.88, + "learning_rate": 4.177574342012447e-07, + "loss": 0.0493, + "step": 125970 + }, + { + "epoch": 5.88, + "learning_rate": 4.169736491464581e-07, + "loss": 0.0188, + "step": 125975 + }, + { + "epoch": 5.88, + "learning_rate": 4.161898640916715e-07, + "loss": 0.0268, + "step": 125980 + }, + { + "epoch": 5.88, + "learning_rate": 4.1540607903688495e-07, + "loss": 0.062, + "step": 125985 + }, + { + "epoch": 5.88, + "learning_rate": 4.146222939820984e-07, + "loss": 0.1823, + "step": 125990 + }, + { + "epoch": 5.88, + "learning_rate": 4.1383850892731186e-07, + "loss": 0.1825, + "step": 125995 + }, + { + "epoch": 5.88, + "learning_rate": 4.130547238725252e-07, + "loss": 0.1463, + "step": 126000 + }, + { + "epoch": 5.88, + "learning_rate": 4.1227093881773867e-07, + "loss": 0.254, + "step": 126005 + }, + { + "epoch": 5.88, + "learning_rate": 4.114871537629521e-07, + "loss": 0.0922, + "step": 126010 + }, + { + "epoch": 5.88, + "learning_rate": 4.1070336870816553e-07, + "loss": 0.0083, + "step": 126015 + }, + { + "epoch": 5.88, + "learning_rate": 4.099195836533789e-07, + "loss": 0.0782, + "step": 126020 + }, + { + "epoch": 5.88, + "learning_rate": 4.0913579859859233e-07, + "loss": 0.0108, + "step": 126025 + }, + { + "epoch": 5.88, + "learning_rate": 4.0835201354380576e-07, + "loss": 0.0316, + "step": 126030 + }, + { + "epoch": 5.88, + "learning_rate": 4.0756822848901925e-07, + "loss": 0.0602, + "step": 126035 + }, + { + "epoch": 5.88, + "learning_rate": 4.0678444343423257e-07, + "loss": 0.0778, + "step": 126040 + }, + { + "epoch": 5.88, + "learning_rate": 4.0600065837944605e-07, + "loss": 0.151, + "step": 126045 + }, + { + "epoch": 5.88, + "learning_rate": 4.052168733246595e-07, + "loss": 0.131, + "step": 126050 + }, + { + "epoch": 5.88, + "learning_rate": 4.044330882698729e-07, + "loss": 0.33, + "step": 126055 + }, + { + "epoch": 5.88, + "learning_rate": 4.0364930321508634e-07, + "loss": 0.0576, + "step": 126060 + }, + { + "epoch": 5.88, + "learning_rate": 4.028655181602997e-07, + "loss": 0.0244, + "step": 126065 + }, + { + "epoch": 5.88, + "learning_rate": 4.0208173310551314e-07, + "loss": 0.0684, + "step": 126070 + }, + { + "epoch": 5.88, + "learning_rate": 4.0129794805072663e-07, + "loss": 0.0169, + "step": 126075 + }, + { + "epoch": 5.88, + "learning_rate": 4.0051416299594006e-07, + "loss": 0.054, + "step": 126080 + }, + { + "epoch": 5.88, + "learning_rate": 3.9973037794115343e-07, + "loss": 0.0273, + "step": 126085 + }, + { + "epoch": 5.88, + "learning_rate": 3.9894659288636686e-07, + "loss": 0.0505, + "step": 126090 + }, + { + "epoch": 5.88, + "learning_rate": 3.981628078315803e-07, + "loss": 0.054, + "step": 126095 + }, + { + "epoch": 5.88, + "learning_rate": 3.973790227767937e-07, + "loss": 0.1351, + "step": 126100 + }, + { + "epoch": 5.88, + "learning_rate": 3.965952377220071e-07, + "loss": 0.1904, + "step": 126105 + }, + { + "epoch": 5.88, + "learning_rate": 3.958114526672206e-07, + "loss": 0.1008, + "step": 126110 + }, + { + "epoch": 5.88, + "learning_rate": 3.95027667612434e-07, + "loss": 0.012, + "step": 126115 + }, + { + "epoch": 5.88, + "learning_rate": 3.9424388255764744e-07, + "loss": 0.0129, + "step": 126120 + }, + { + "epoch": 5.89, + "learning_rate": 3.9346009750286087e-07, + "loss": 0.0449, + "step": 126125 + }, + { + "epoch": 5.89, + "learning_rate": 3.9267631244807424e-07, + "loss": 0.0323, + "step": 126130 + }, + { + "epoch": 5.89, + "learning_rate": 3.9189252739328767e-07, + "loss": 0.0079, + "step": 126135 + }, + { + "epoch": 5.89, + "learning_rate": 3.9110874233850116e-07, + "loss": 0.0837, + "step": 126140 + }, + { + "epoch": 5.89, + "learning_rate": 3.903249572837146e-07, + "loss": 0.0403, + "step": 126145 + }, + { + "epoch": 5.89, + "learning_rate": 3.8954117222892796e-07, + "loss": 0.0911, + "step": 126150 + }, + { + "epoch": 5.89, + "learning_rate": 3.887573871741414e-07, + "loss": 0.3092, + "step": 126155 + }, + { + "epoch": 5.89, + "learning_rate": 3.879736021193548e-07, + "loss": 0.1029, + "step": 126160 + }, + { + "epoch": 5.89, + "learning_rate": 3.8718981706456825e-07, + "loss": 0.016, + "step": 126165 + }, + { + "epoch": 5.89, + "learning_rate": 3.864060320097817e-07, + "loss": 0.0919, + "step": 126170 + }, + { + "epoch": 5.89, + "learning_rate": 3.8562224695499505e-07, + "loss": 0.021, + "step": 126175 + }, + { + "epoch": 5.89, + "learning_rate": 3.8483846190020854e-07, + "loss": 0.0564, + "step": 126180 + }, + { + "epoch": 5.89, + "learning_rate": 3.8405467684542197e-07, + "loss": 0.0509, + "step": 126185 + }, + { + "epoch": 5.89, + "learning_rate": 3.832708917906354e-07, + "loss": 0.1998, + "step": 126190 + }, + { + "epoch": 5.89, + "learning_rate": 3.8248710673584877e-07, + "loss": 0.0931, + "step": 126195 + }, + { + "epoch": 5.89, + "learning_rate": 3.817033216810622e-07, + "loss": 0.0734, + "step": 126200 + }, + { + "epoch": 5.89, + "learning_rate": 3.8091953662627563e-07, + "loss": 0.3515, + "step": 126205 + }, + { + "epoch": 5.89, + "learning_rate": 3.801357515714891e-07, + "loss": 0.1169, + "step": 126210 + }, + { + "epoch": 5.89, + "learning_rate": 3.7935196651670244e-07, + "loss": 0.0275, + "step": 126215 + }, + { + "epoch": 5.89, + "learning_rate": 3.785681814619159e-07, + "loss": 0.0332, + "step": 126220 + }, + { + "epoch": 5.89, + "learning_rate": 3.7778439640712935e-07, + "loss": 0.0838, + "step": 126225 + }, + { + "epoch": 5.89, + "learning_rate": 3.770006113523428e-07, + "loss": 0.0341, + "step": 126230 + }, + { + "epoch": 5.89, + "learning_rate": 3.762168262975562e-07, + "loss": 0.0414, + "step": 126235 + }, + { + "epoch": 5.89, + "learning_rate": 3.754330412427696e-07, + "loss": 0.106, + "step": 126240 + }, + { + "epoch": 5.89, + "learning_rate": 3.74649256187983e-07, + "loss": 0.1217, + "step": 126245 + }, + { + "epoch": 5.89, + "learning_rate": 3.738654711331965e-07, + "loss": 0.1379, + "step": 126250 + }, + { + "epoch": 5.89, + "learning_rate": 3.730816860784099e-07, + "loss": 0.2479, + "step": 126255 + }, + { + "epoch": 5.89, + "learning_rate": 3.722979010236233e-07, + "loss": 0.1085, + "step": 126260 + }, + { + "epoch": 5.89, + "learning_rate": 3.7151411596883673e-07, + "loss": 0.0144, + "step": 126265 + }, + { + "epoch": 5.89, + "learning_rate": 3.7073033091405016e-07, + "loss": 0.0376, + "step": 126270 + }, + { + "epoch": 5.89, + "learning_rate": 3.699465458592636e-07, + "loss": 0.0575, + "step": 126275 + }, + { + "epoch": 5.89, + "learning_rate": 3.6916276080447696e-07, + "loss": 0.0594, + "step": 126280 + }, + { + "epoch": 5.89, + "learning_rate": 3.6837897574969045e-07, + "loss": 0.0953, + "step": 126285 + }, + { + "epoch": 5.89, + "learning_rate": 3.675951906949039e-07, + "loss": 0.0642, + "step": 126290 + }, + { + "epoch": 5.89, + "learning_rate": 3.668114056401173e-07, + "loss": 0.1034, + "step": 126295 + }, + { + "epoch": 5.89, + "learning_rate": 3.6602762058533073e-07, + "loss": 0.1206, + "step": 126300 + }, + { + "epoch": 5.89, + "learning_rate": 3.652438355305441e-07, + "loss": 0.2759, + "step": 126305 + }, + { + "epoch": 5.89, + "learning_rate": 3.6446005047575754e-07, + "loss": 0.0634, + "step": 126310 + }, + { + "epoch": 5.89, + "learning_rate": 3.6367626542097097e-07, + "loss": 0.0591, + "step": 126315 + }, + { + "epoch": 5.89, + "learning_rate": 3.6289248036618445e-07, + "loss": 0.0209, + "step": 126320 + }, + { + "epoch": 5.89, + "learning_rate": 3.6210869531139783e-07, + "loss": 0.0286, + "step": 126325 + }, + { + "epoch": 5.89, + "learning_rate": 3.6132491025661126e-07, + "loss": 0.0129, + "step": 126330 + }, + { + "epoch": 5.89, + "learning_rate": 3.605411252018247e-07, + "loss": 0.0632, + "step": 126335 + }, + { + "epoch": 5.9, + "learning_rate": 3.597573401470381e-07, + "loss": 0.056, + "step": 126340 + }, + { + "epoch": 5.9, + "learning_rate": 3.589735550922515e-07, + "loss": 0.1355, + "step": 126345 + }, + { + "epoch": 5.9, + "learning_rate": 3.581897700374649e-07, + "loss": 0.0513, + "step": 126350 + }, + { + "epoch": 5.9, + "learning_rate": 3.574059849826784e-07, + "loss": 0.2379, + "step": 126355 + }, + { + "epoch": 5.9, + "learning_rate": 3.5662219992789183e-07, + "loss": 0.0959, + "step": 126360 + }, + { + "epoch": 5.9, + "learning_rate": 3.5583841487310526e-07, + "loss": 0.0021, + "step": 126365 + }, + { + "epoch": 5.9, + "learning_rate": 3.5505462981831864e-07, + "loss": 0.0213, + "step": 126370 + }, + { + "epoch": 5.9, + "learning_rate": 3.5427084476353207e-07, + "loss": 0.0379, + "step": 126375 + }, + { + "epoch": 5.9, + "learning_rate": 3.534870597087455e-07, + "loss": 0.0689, + "step": 126380 + }, + { + "epoch": 5.9, + "learning_rate": 3.52703274653959e-07, + "loss": 0.0472, + "step": 126385 + }, + { + "epoch": 5.9, + "learning_rate": 3.519194895991723e-07, + "loss": 0.0635, + "step": 126390 + }, + { + "epoch": 5.9, + "learning_rate": 3.511357045443858e-07, + "loss": 0.0904, + "step": 126395 + }, + { + "epoch": 5.9, + "learning_rate": 3.503519194895992e-07, + "loss": 0.124, + "step": 126400 + }, + { + "epoch": 5.9, + "learning_rate": 3.4956813443481264e-07, + "loss": 0.3752, + "step": 126405 + }, + { + "epoch": 5.9, + "learning_rate": 3.48784349380026e-07, + "loss": 0.1019, + "step": 126410 + }, + { + "epoch": 5.9, + "learning_rate": 3.4800056432523945e-07, + "loss": 0.0984, + "step": 126415 + }, + { + "epoch": 5.9, + "learning_rate": 3.472167792704529e-07, + "loss": 0.0207, + "step": 126420 + }, + { + "epoch": 5.9, + "learning_rate": 3.4643299421566636e-07, + "loss": 0.0393, + "step": 126425 + }, + { + "epoch": 5.9, + "learning_rate": 3.456492091608798e-07, + "loss": 0.0792, + "step": 126430 + }, + { + "epoch": 5.9, + "learning_rate": 3.4486542410609317e-07, + "loss": 0.0582, + "step": 126435 + }, + { + "epoch": 5.9, + "learning_rate": 3.440816390513066e-07, + "loss": 0.0697, + "step": 126440 + }, + { + "epoch": 5.9, + "learning_rate": 3.4329785399652e-07, + "loss": 0.0755, + "step": 126445 + }, + { + "epoch": 5.9, + "learning_rate": 3.4251406894173346e-07, + "loss": 0.1035, + "step": 126450 + }, + { + "epoch": 5.9, + "learning_rate": 3.4173028388694683e-07, + "loss": 0.2971, + "step": 126455 + }, + { + "epoch": 5.9, + "learning_rate": 3.4094649883216026e-07, + "loss": 0.0703, + "step": 126460 + }, + { + "epoch": 5.9, + "learning_rate": 3.4016271377737374e-07, + "loss": 0.0029, + "step": 126465 + }, + { + "epoch": 5.9, + "learning_rate": 3.3937892872258717e-07, + "loss": 0.0159, + "step": 126470 + }, + { + "epoch": 5.9, + "learning_rate": 3.3859514366780055e-07, + "loss": 0.0833, + "step": 126475 + }, + { + "epoch": 5.9, + "learning_rate": 3.37811358613014e-07, + "loss": 0.0557, + "step": 126480 + }, + { + "epoch": 5.9, + "learning_rate": 3.370275735582274e-07, + "loss": 0.0549, + "step": 126485 + }, + { + "epoch": 5.9, + "learning_rate": 3.3624378850344084e-07, + "loss": 0.1573, + "step": 126490 + }, + { + "epoch": 5.9, + "learning_rate": 3.354600034486543e-07, + "loss": 0.1363, + "step": 126495 + }, + { + "epoch": 5.9, + "learning_rate": 3.346762183938677e-07, + "loss": 0.1628, + "step": 126500 + }, + { + "epoch": 5.9, + "learning_rate": 3.338924333390811e-07, + "loss": 0.2185, + "step": 126505 + }, + { + "epoch": 5.9, + "learning_rate": 3.3310864828429455e-07, + "loss": 0.1226, + "step": 126510 + }, + { + "epoch": 5.9, + "learning_rate": 3.32324863229508e-07, + "loss": 0.0266, + "step": 126515 + }, + { + "epoch": 5.9, + "learning_rate": 3.3154107817472136e-07, + "loss": 0.0425, + "step": 126520 + }, + { + "epoch": 5.9, + "learning_rate": 3.307572931199348e-07, + "loss": 0.0444, + "step": 126525 + }, + { + "epoch": 5.9, + "learning_rate": 3.2997350806514827e-07, + "loss": 0.0729, + "step": 126530 + }, + { + "epoch": 5.9, + "learning_rate": 3.291897230103617e-07, + "loss": 0.0588, + "step": 126535 + }, + { + "epoch": 5.9, + "learning_rate": 3.284059379555751e-07, + "loss": 0.0726, + "step": 126540 + }, + { + "epoch": 5.9, + "learning_rate": 3.276221529007885e-07, + "loss": 0.0627, + "step": 126545 + }, + { + "epoch": 5.9, + "learning_rate": 3.2683836784600194e-07, + "loss": 0.2044, + "step": 126550 + }, + { + "epoch": 5.91, + "learning_rate": 3.2605458279121536e-07, + "loss": 0.2197, + "step": 126555 + }, + { + "epoch": 5.91, + "learning_rate": 3.252707977364288e-07, + "loss": 0.1188, + "step": 126560 + }, + { + "epoch": 5.91, + "learning_rate": 3.2448701268164217e-07, + "loss": 0.0015, + "step": 126565 + }, + { + "epoch": 5.91, + "learning_rate": 3.2370322762685565e-07, + "loss": 0.0247, + "step": 126570 + }, + { + "epoch": 5.91, + "learning_rate": 3.229194425720691e-07, + "loss": 0.0406, + "step": 126575 + }, + { + "epoch": 5.91, + "learning_rate": 3.221356575172825e-07, + "loss": 0.0362, + "step": 126580 + }, + { + "epoch": 5.91, + "learning_rate": 3.213518724624959e-07, + "loss": 0.0535, + "step": 126585 + }, + { + "epoch": 5.91, + "learning_rate": 3.205680874077093e-07, + "loss": 0.0992, + "step": 126590 + }, + { + "epoch": 5.91, + "learning_rate": 3.1978430235292275e-07, + "loss": 0.1036, + "step": 126595 + }, + { + "epoch": 5.91, + "learning_rate": 3.1900051729813623e-07, + "loss": 0.1547, + "step": 126600 + }, + { + "epoch": 5.91, + "learning_rate": 3.1821673224334955e-07, + "loss": 0.3051, + "step": 126605 + }, + { + "epoch": 5.91, + "learning_rate": 3.1743294718856303e-07, + "loss": 0.0775, + "step": 126610 + }, + { + "epoch": 5.91, + "learning_rate": 3.1664916213377646e-07, + "loss": 0.0139, + "step": 126615 + }, + { + "epoch": 5.91, + "learning_rate": 3.158653770789899e-07, + "loss": 0.0661, + "step": 126620 + }, + { + "epoch": 5.91, + "learning_rate": 3.150815920242033e-07, + "loss": 0.0453, + "step": 126625 + }, + { + "epoch": 5.91, + "learning_rate": 3.142978069694167e-07, + "loss": 0.0851, + "step": 126630 + }, + { + "epoch": 5.91, + "learning_rate": 3.1351402191463013e-07, + "loss": 0.0962, + "step": 126635 + }, + { + "epoch": 5.91, + "learning_rate": 3.127302368598436e-07, + "loss": 0.0652, + "step": 126640 + }, + { + "epoch": 5.91, + "learning_rate": 3.11946451805057e-07, + "loss": 0.1415, + "step": 126645 + }, + { + "epoch": 5.91, + "learning_rate": 3.111626667502704e-07, + "loss": 0.0981, + "step": 126650 + }, + { + "epoch": 5.91, + "learning_rate": 3.1037888169548385e-07, + "loss": 0.204, + "step": 126655 + }, + { + "epoch": 5.91, + "learning_rate": 3.095950966406973e-07, + "loss": 0.0997, + "step": 126660 + }, + { + "epoch": 5.91, + "learning_rate": 3.088113115859107e-07, + "loss": 0.0178, + "step": 126665 + }, + { + "epoch": 5.91, + "learning_rate": 3.0802752653112413e-07, + "loss": 0.0343, + "step": 126670 + }, + { + "epoch": 5.91, + "learning_rate": 3.0724374147633756e-07, + "loss": 0.035, + "step": 126675 + }, + { + "epoch": 5.91, + "learning_rate": 3.06459956421551e-07, + "loss": 0.0573, + "step": 126680 + }, + { + "epoch": 5.91, + "learning_rate": 3.056761713667644e-07, + "loss": 0.0502, + "step": 126685 + }, + { + "epoch": 5.91, + "learning_rate": 3.0489238631197785e-07, + "loss": 0.0847, + "step": 126690 + }, + { + "epoch": 5.91, + "learning_rate": 3.041086012571913e-07, + "loss": 0.0414, + "step": 126695 + }, + { + "epoch": 5.91, + "learning_rate": 3.0332481620240466e-07, + "loss": 0.2004, + "step": 126700 + }, + { + "epoch": 5.91, + "learning_rate": 3.025410311476181e-07, + "loss": 0.3079, + "step": 126705 + }, + { + "epoch": 5.91, + "learning_rate": 3.017572460928315e-07, + "loss": 0.1266, + "step": 126710 + }, + { + "epoch": 5.91, + "learning_rate": 3.0097346103804494e-07, + "loss": 0.0371, + "step": 126715 + }, + { + "epoch": 5.91, + "learning_rate": 3.0018967598325837e-07, + "loss": 0.029, + "step": 126720 + }, + { + "epoch": 5.91, + "learning_rate": 2.994058909284718e-07, + "loss": 0.0088, + "step": 126725 + }, + { + "epoch": 5.91, + "learning_rate": 2.9862210587368523e-07, + "loss": 0.0504, + "step": 126730 + }, + { + "epoch": 5.91, + "learning_rate": 2.9783832081889866e-07, + "loss": 0.0536, + "step": 126735 + }, + { + "epoch": 5.91, + "learning_rate": 2.970545357641121e-07, + "loss": 0.0858, + "step": 126740 + }, + { + "epoch": 5.91, + "learning_rate": 2.962707507093255e-07, + "loss": 0.0256, + "step": 126745 + }, + { + "epoch": 5.91, + "learning_rate": 2.9548696565453895e-07, + "loss": 0.099, + "step": 126750 + }, + { + "epoch": 5.91, + "learning_rate": 2.947031805997523e-07, + "loss": 0.3971, + "step": 126755 + }, + { + "epoch": 5.91, + "learning_rate": 2.939193955449658e-07, + "loss": 0.1031, + "step": 126760 + }, + { + "epoch": 5.92, + "learning_rate": 2.931356104901792e-07, + "loss": 0.0077, + "step": 126765 + }, + { + "epoch": 5.92, + "learning_rate": 2.923518254353926e-07, + "loss": 0.0005, + "step": 126770 + }, + { + "epoch": 5.92, + "learning_rate": 2.9156804038060604e-07, + "loss": 0.0256, + "step": 126775 + }, + { + "epoch": 5.92, + "learning_rate": 2.9078425532581947e-07, + "loss": 0.0436, + "step": 126780 + }, + { + "epoch": 5.92, + "learning_rate": 2.900004702710329e-07, + "loss": 0.0395, + "step": 126785 + }, + { + "epoch": 5.92, + "learning_rate": 2.8921668521624633e-07, + "loss": 0.1954, + "step": 126790 + }, + { + "epoch": 5.92, + "learning_rate": 2.884329001614597e-07, + "loss": 0.1202, + "step": 126795 + }, + { + "epoch": 5.92, + "learning_rate": 2.876491151066732e-07, + "loss": 0.1597, + "step": 126800 + }, + { + "epoch": 5.92, + "learning_rate": 2.868653300518866e-07, + "loss": 0.2994, + "step": 126805 + }, + { + "epoch": 5.92, + "learning_rate": 2.860815449971e-07, + "loss": 0.0933, + "step": 126810 + }, + { + "epoch": 5.92, + "learning_rate": 2.852977599423135e-07, + "loss": 0.0244, + "step": 126815 + }, + { + "epoch": 5.92, + "learning_rate": 2.8451397488752685e-07, + "loss": 0.0075, + "step": 126820 + }, + { + "epoch": 5.92, + "learning_rate": 2.837301898327403e-07, + "loss": 0.0449, + "step": 126825 + }, + { + "epoch": 5.92, + "learning_rate": 2.829464047779537e-07, + "loss": 0.0725, + "step": 126830 + }, + { + "epoch": 5.92, + "learning_rate": 2.8216261972316714e-07, + "loss": 0.0244, + "step": 126835 + }, + { + "epoch": 5.92, + "learning_rate": 2.8137883466838057e-07, + "loss": 0.0963, + "step": 126840 + }, + { + "epoch": 5.92, + "learning_rate": 2.80595049613594e-07, + "loss": 0.0828, + "step": 126845 + }, + { + "epoch": 5.92, + "learning_rate": 2.798112645588074e-07, + "loss": 0.1557, + "step": 126850 + }, + { + "epoch": 5.92, + "learning_rate": 2.7902747950402086e-07, + "loss": 0.2002, + "step": 126855 + }, + { + "epoch": 5.92, + "learning_rate": 2.7824369444923424e-07, + "loss": 0.1117, + "step": 126860 + }, + { + "epoch": 5.92, + "learning_rate": 2.7745990939444766e-07, + "loss": 0.0148, + "step": 126865 + }, + { + "epoch": 5.92, + "learning_rate": 2.7667612433966115e-07, + "loss": 0.0576, + "step": 126870 + }, + { + "epoch": 5.92, + "learning_rate": 2.758923392848745e-07, + "loss": 0.0132, + "step": 126875 + }, + { + "epoch": 5.92, + "learning_rate": 2.7510855423008795e-07, + "loss": 0.069, + "step": 126880 + }, + { + "epoch": 5.92, + "learning_rate": 2.743247691753014e-07, + "loss": 0.0826, + "step": 126885 + }, + { + "epoch": 5.92, + "learning_rate": 2.735409841205148e-07, + "loss": 0.1004, + "step": 126890 + }, + { + "epoch": 5.92, + "learning_rate": 2.7275719906572824e-07, + "loss": 0.0945, + "step": 126895 + }, + { + "epoch": 5.92, + "learning_rate": 2.7197341401094167e-07, + "loss": 0.1569, + "step": 126900 + }, + { + "epoch": 5.92, + "learning_rate": 2.711896289561551e-07, + "loss": 0.3853, + "step": 126905 + }, + { + "epoch": 5.92, + "learning_rate": 2.7040584390136853e-07, + "loss": 0.072, + "step": 126910 + }, + { + "epoch": 5.92, + "learning_rate": 2.696220588465819e-07, + "loss": 0.0294, + "step": 126915 + }, + { + "epoch": 5.92, + "learning_rate": 2.688382737917954e-07, + "loss": 0.0112, + "step": 126920 + }, + { + "epoch": 5.92, + "learning_rate": 2.6805448873700876e-07, + "loss": 0.0136, + "step": 126925 + }, + { + "epoch": 5.92, + "learning_rate": 2.672707036822222e-07, + "loss": 0.0156, + "step": 126930 + }, + { + "epoch": 5.92, + "learning_rate": 2.664869186274357e-07, + "loss": 0.026, + "step": 126935 + }, + { + "epoch": 5.92, + "learning_rate": 2.6570313357264905e-07, + "loss": 0.069, + "step": 126940 + }, + { + "epoch": 5.92, + "learning_rate": 2.649193485178625e-07, + "loss": 0.1126, + "step": 126945 + }, + { + "epoch": 5.92, + "learning_rate": 2.641355634630759e-07, + "loss": 0.0802, + "step": 126950 + }, + { + "epoch": 5.92, + "learning_rate": 2.6335177840828934e-07, + "loss": 0.3289, + "step": 126955 + }, + { + "epoch": 5.92, + "learning_rate": 2.6256799335350277e-07, + "loss": 0.0734, + "step": 126960 + }, + { + "epoch": 5.92, + "learning_rate": 2.617842082987162e-07, + "loss": 0.0141, + "step": 126965 + }, + { + "epoch": 5.92, + "learning_rate": 2.610004232439296e-07, + "loss": 0.055, + "step": 126970 + }, + { + "epoch": 5.92, + "learning_rate": 2.6021663818914306e-07, + "loss": 0.0095, + "step": 126975 + }, + { + "epoch": 5.93, + "learning_rate": 2.5943285313435643e-07, + "loss": 0.0962, + "step": 126980 + }, + { + "epoch": 5.93, + "learning_rate": 2.5864906807956986e-07, + "loss": 0.0649, + "step": 126985 + }, + { + "epoch": 5.93, + "learning_rate": 2.578652830247833e-07, + "loss": 0.0923, + "step": 126990 + }, + { + "epoch": 5.93, + "learning_rate": 2.570814979699967e-07, + "loss": 0.0643, + "step": 126995 + }, + { + "epoch": 5.93, + "learning_rate": 2.5629771291521015e-07, + "loss": 0.0888, + "step": 127000 + }, + { + "epoch": 5.93, + "learning_rate": 2.555139278604236e-07, + "loss": 0.2873, + "step": 127005 + }, + { + "epoch": 5.93, + "learning_rate": 2.54730142805637e-07, + "loss": 0.1016, + "step": 127010 + }, + { + "epoch": 5.93, + "learning_rate": 2.5394635775085044e-07, + "loss": 0.0044, + "step": 127015 + }, + { + "epoch": 5.93, + "learning_rate": 2.5316257269606387e-07, + "loss": 0.0167, + "step": 127020 + }, + { + "epoch": 5.93, + "learning_rate": 2.5237878764127724e-07, + "loss": 0.0387, + "step": 127025 + }, + { + "epoch": 5.93, + "learning_rate": 2.515950025864907e-07, + "loss": 0.0373, + "step": 127030 + }, + { + "epoch": 5.93, + "learning_rate": 2.508112175317041e-07, + "loss": 0.0457, + "step": 127035 + }, + { + "epoch": 5.93, + "learning_rate": 2.5002743247691753e-07, + "loss": 0.0099, + "step": 127040 + }, + { + "epoch": 5.93, + "learning_rate": 2.4924364742213096e-07, + "loss": 0.0908, + "step": 127045 + }, + { + "epoch": 5.93, + "learning_rate": 2.484598623673444e-07, + "loss": 0.0785, + "step": 127050 + }, + { + "epoch": 5.93, + "learning_rate": 2.476760773125578e-07, + "loss": 0.2785, + "step": 127055 + }, + { + "epoch": 5.93, + "learning_rate": 2.4689229225777125e-07, + "loss": 0.1418, + "step": 127060 + }, + { + "epoch": 5.93, + "learning_rate": 2.461085072029847e-07, + "loss": 0.0174, + "step": 127065 + }, + { + "epoch": 5.93, + "learning_rate": 2.453247221481981e-07, + "loss": 0.0525, + "step": 127070 + }, + { + "epoch": 5.93, + "learning_rate": 2.4454093709341154e-07, + "loss": 0.0581, + "step": 127075 + }, + { + "epoch": 5.93, + "learning_rate": 2.4375715203862497e-07, + "loss": 0.0381, + "step": 127080 + }, + { + "epoch": 5.93, + "learning_rate": 2.429733669838384e-07, + "loss": 0.0702, + "step": 127085 + }, + { + "epoch": 5.93, + "learning_rate": 2.4218958192905177e-07, + "loss": 0.0924, + "step": 127090 + }, + { + "epoch": 5.93, + "learning_rate": 2.414057968742652e-07, + "loss": 0.1031, + "step": 127095 + }, + { + "epoch": 5.93, + "learning_rate": 2.4062201181947863e-07, + "loss": 0.1618, + "step": 127100 + }, + { + "epoch": 5.93, + "learning_rate": 2.3983822676469206e-07, + "loss": 0.2684, + "step": 127105 + }, + { + "epoch": 5.93, + "learning_rate": 2.390544417099055e-07, + "loss": 0.1463, + "step": 127110 + }, + { + "epoch": 5.93, + "learning_rate": 2.3827065665511892e-07, + "loss": 0.0012, + "step": 127115 + }, + { + "epoch": 5.93, + "learning_rate": 2.3748687160033237e-07, + "loss": 0.0306, + "step": 127120 + }, + { + "epoch": 5.93, + "learning_rate": 2.3670308654554578e-07, + "loss": 0.0157, + "step": 127125 + }, + { + "epoch": 5.93, + "learning_rate": 2.359193014907592e-07, + "loss": 0.049, + "step": 127130 + }, + { + "epoch": 5.93, + "learning_rate": 2.351355164359726e-07, + "loss": 0.0796, + "step": 127135 + }, + { + "epoch": 5.93, + "learning_rate": 2.3435173138118606e-07, + "loss": 0.049, + "step": 127140 + }, + { + "epoch": 5.93, + "learning_rate": 2.3356794632639947e-07, + "loss": 0.0885, + "step": 127145 + }, + { + "epoch": 5.93, + "learning_rate": 2.327841612716129e-07, + "loss": 0.138, + "step": 127150 + }, + { + "epoch": 5.93, + "learning_rate": 2.320003762168263e-07, + "loss": 0.1563, + "step": 127155 + }, + { + "epoch": 5.93, + "learning_rate": 2.3121659116203976e-07, + "loss": 0.1106, + "step": 127160 + }, + { + "epoch": 5.93, + "learning_rate": 2.3043280610725316e-07, + "loss": 0.0171, + "step": 127165 + }, + { + "epoch": 5.93, + "learning_rate": 2.296490210524666e-07, + "loss": 0.0328, + "step": 127170 + }, + { + "epoch": 5.93, + "learning_rate": 2.2886523599768e-07, + "loss": 0.034, + "step": 127175 + }, + { + "epoch": 5.93, + "learning_rate": 2.2808145094289345e-07, + "loss": 0.0351, + "step": 127180 + }, + { + "epoch": 5.93, + "learning_rate": 2.2729766588810688e-07, + "loss": 0.0382, + "step": 127185 + }, + { + "epoch": 5.93, + "learning_rate": 2.2651388083332028e-07, + "loss": 0.0432, + "step": 127190 + }, + { + "epoch": 5.94, + "learning_rate": 2.2573009577853373e-07, + "loss": 0.115, + "step": 127195 + }, + { + "epoch": 5.94, + "learning_rate": 2.2494631072374714e-07, + "loss": 0.1928, + "step": 127200 + }, + { + "epoch": 5.94, + "learning_rate": 2.2416252566896057e-07, + "loss": 0.3175, + "step": 127205 + }, + { + "epoch": 5.94, + "learning_rate": 2.2337874061417397e-07, + "loss": 0.1512, + "step": 127210 + }, + { + "epoch": 5.94, + "learning_rate": 2.2259495555938743e-07, + "loss": 0.0081, + "step": 127215 + }, + { + "epoch": 5.94, + "learning_rate": 2.2181117050460083e-07, + "loss": 0.0028, + "step": 127220 + }, + { + "epoch": 5.94, + "learning_rate": 2.2102738544981426e-07, + "loss": 0.0457, + "step": 127225 + }, + { + "epoch": 5.94, + "learning_rate": 2.2024360039502766e-07, + "loss": 0.0253, + "step": 127230 + }, + { + "epoch": 5.94, + "learning_rate": 2.1945981534024112e-07, + "loss": 0.0723, + "step": 127235 + }, + { + "epoch": 5.94, + "learning_rate": 2.1867603028545452e-07, + "loss": 0.09, + "step": 127240 + }, + { + "epoch": 5.94, + "learning_rate": 2.1789224523066795e-07, + "loss": 0.0946, + "step": 127245 + }, + { + "epoch": 5.94, + "learning_rate": 2.171084601758814e-07, + "loss": 0.1801, + "step": 127250 + }, + { + "epoch": 5.94, + "learning_rate": 2.163246751210948e-07, + "loss": 0.2827, + "step": 127255 + }, + { + "epoch": 5.94, + "learning_rate": 2.1554089006630824e-07, + "loss": 0.1085, + "step": 127260 + }, + { + "epoch": 5.94, + "learning_rate": 2.1475710501152164e-07, + "loss": 0.0486, + "step": 127265 + }, + { + "epoch": 5.94, + "learning_rate": 2.139733199567351e-07, + "loss": 0.0105, + "step": 127270 + }, + { + "epoch": 5.94, + "learning_rate": 2.131895349019485e-07, + "loss": 0.0613, + "step": 127275 + }, + { + "epoch": 5.94, + "learning_rate": 2.1240574984716193e-07, + "loss": 0.0275, + "step": 127280 + }, + { + "epoch": 5.94, + "learning_rate": 2.1162196479237536e-07, + "loss": 0.0573, + "step": 127285 + }, + { + "epoch": 5.94, + "learning_rate": 2.1083817973758879e-07, + "loss": 0.0699, + "step": 127290 + }, + { + "epoch": 5.94, + "learning_rate": 2.100543946828022e-07, + "loss": 0.0877, + "step": 127295 + }, + { + "epoch": 5.94, + "learning_rate": 2.0927060962801564e-07, + "loss": 0.1608, + "step": 127300 + }, + { + "epoch": 5.94, + "learning_rate": 2.0848682457322905e-07, + "loss": 0.2718, + "step": 127305 + }, + { + "epoch": 5.94, + "learning_rate": 2.0770303951844248e-07, + "loss": 0.0743, + "step": 127310 + }, + { + "epoch": 5.94, + "learning_rate": 2.0691925446365593e-07, + "loss": 0.0334, + "step": 127315 + }, + { + "epoch": 5.94, + "learning_rate": 2.0613546940886933e-07, + "loss": 0.0267, + "step": 127320 + }, + { + "epoch": 5.94, + "learning_rate": 2.0535168435408276e-07, + "loss": 0.0626, + "step": 127325 + }, + { + "epoch": 5.94, + "learning_rate": 2.0456789929929617e-07, + "loss": 0.0842, + "step": 127330 + }, + { + "epoch": 5.94, + "learning_rate": 2.0378411424450962e-07, + "loss": 0.0542, + "step": 127335 + }, + { + "epoch": 5.94, + "learning_rate": 2.0300032918972303e-07, + "loss": 0.0768, + "step": 127340 + }, + { + "epoch": 5.94, + "learning_rate": 2.0221654413493645e-07, + "loss": 0.0804, + "step": 127345 + }, + { + "epoch": 5.94, + "learning_rate": 2.0143275908014986e-07, + "loss": 0.1293, + "step": 127350 + }, + { + "epoch": 5.94, + "learning_rate": 2.0064897402536331e-07, + "loss": 0.1564, + "step": 127355 + }, + { + "epoch": 5.94, + "learning_rate": 1.9986518897057672e-07, + "loss": 0.1123, + "step": 127360 + }, + { + "epoch": 5.94, + "learning_rate": 1.9908140391579015e-07, + "loss": 0.0058, + "step": 127365 + }, + { + "epoch": 5.94, + "learning_rate": 1.9829761886100355e-07, + "loss": 0.0318, + "step": 127370 + }, + { + "epoch": 5.94, + "learning_rate": 1.97513833806217e-07, + "loss": 0.0269, + "step": 127375 + }, + { + "epoch": 5.94, + "learning_rate": 1.9673004875143043e-07, + "loss": 0.0674, + "step": 127380 + }, + { + "epoch": 5.94, + "learning_rate": 1.9594626369664384e-07, + "loss": 0.0628, + "step": 127385 + }, + { + "epoch": 5.94, + "learning_rate": 1.951624786418573e-07, + "loss": 0.0344, + "step": 127390 + }, + { + "epoch": 5.94, + "learning_rate": 1.943786935870707e-07, + "loss": 0.0706, + "step": 127395 + }, + { + "epoch": 5.94, + "learning_rate": 1.9359490853228412e-07, + "loss": 0.1094, + "step": 127400 + }, + { + "epoch": 5.94, + "learning_rate": 1.9281112347749753e-07, + "loss": 0.2021, + "step": 127405 + }, + { + "epoch": 5.95, + "learning_rate": 1.9202733842271098e-07, + "loss": 0.0957, + "step": 127410 + }, + { + "epoch": 5.95, + "learning_rate": 1.9124355336792439e-07, + "loss": 0.0014, + "step": 127415 + }, + { + "epoch": 5.95, + "learning_rate": 1.9045976831313782e-07, + "loss": 0.03, + "step": 127420 + }, + { + "epoch": 5.95, + "learning_rate": 1.8967598325835122e-07, + "loss": 0.0449, + "step": 127425 + }, + { + "epoch": 5.95, + "learning_rate": 1.8889219820356467e-07, + "loss": 0.0341, + "step": 127430 + }, + { + "epoch": 5.95, + "learning_rate": 1.881084131487781e-07, + "loss": 0.0874, + "step": 127435 + }, + { + "epoch": 5.95, + "learning_rate": 1.873246280939915e-07, + "loss": 0.0716, + "step": 127440 + }, + { + "epoch": 5.95, + "learning_rate": 1.8654084303920496e-07, + "loss": 0.1771, + "step": 127445 + }, + { + "epoch": 5.95, + "learning_rate": 1.8575705798441836e-07, + "loss": 0.1036, + "step": 127450 + }, + { + "epoch": 5.95, + "learning_rate": 1.849732729296318e-07, + "loss": 0.489, + "step": 127455 + }, + { + "epoch": 5.95, + "learning_rate": 1.8418948787484522e-07, + "loss": 0.1333, + "step": 127460 + }, + { + "epoch": 5.95, + "learning_rate": 1.8340570282005865e-07, + "loss": 0.0107, + "step": 127465 + }, + { + "epoch": 5.95, + "learning_rate": 1.8262191776527206e-07, + "loss": 0.0248, + "step": 127470 + }, + { + "epoch": 5.95, + "learning_rate": 1.8183813271048548e-07, + "loss": 0.0368, + "step": 127475 + }, + { + "epoch": 5.95, + "learning_rate": 1.8105434765569891e-07, + "loss": 0.0945, + "step": 127480 + }, + { + "epoch": 5.95, + "learning_rate": 1.8027056260091234e-07, + "loss": 0.0362, + "step": 127485 + }, + { + "epoch": 5.95, + "learning_rate": 1.7948677754612575e-07, + "loss": 0.0411, + "step": 127490 + }, + { + "epoch": 5.95, + "learning_rate": 1.787029924913392e-07, + "loss": 0.1967, + "step": 127495 + }, + { + "epoch": 5.95, + "learning_rate": 1.7791920743655263e-07, + "loss": 0.1942, + "step": 127500 + }, + { + "epoch": 5.95, + "learning_rate": 1.7713542238176603e-07, + "loss": 0.3554, + "step": 127505 + }, + { + "epoch": 5.95, + "learning_rate": 1.763516373269795e-07, + "loss": 0.1078, + "step": 127510 + }, + { + "epoch": 5.95, + "learning_rate": 1.755678522721929e-07, + "loss": 0.041, + "step": 127515 + }, + { + "epoch": 5.95, + "learning_rate": 1.7478406721740632e-07, + "loss": 0.0108, + "step": 127520 + }, + { + "epoch": 5.95, + "learning_rate": 1.7400028216261972e-07, + "loss": 0.0287, + "step": 127525 + }, + { + "epoch": 5.95, + "learning_rate": 1.7321649710783318e-07, + "loss": 0.0538, + "step": 127530 + }, + { + "epoch": 5.95, + "learning_rate": 1.7243271205304658e-07, + "loss": 0.1042, + "step": 127535 + }, + { + "epoch": 5.95, + "learning_rate": 1.7164892699826e-07, + "loss": 0.0795, + "step": 127540 + }, + { + "epoch": 5.95, + "learning_rate": 1.7086514194347342e-07, + "loss": 0.188, + "step": 127545 + }, + { + "epoch": 5.95, + "learning_rate": 1.7008135688868687e-07, + "loss": 0.1249, + "step": 127550 + }, + { + "epoch": 5.95, + "learning_rate": 1.6929757183390027e-07, + "loss": 0.3469, + "step": 127555 + }, + { + "epoch": 5.95, + "learning_rate": 1.685137867791137e-07, + "loss": 0.0994, + "step": 127560 + }, + { + "epoch": 5.95, + "learning_rate": 1.6773000172432716e-07, + "loss": 0.0087, + "step": 127565 + }, + { + "epoch": 5.95, + "learning_rate": 1.6694621666954056e-07, + "loss": 0.0193, + "step": 127570 + }, + { + "epoch": 5.95, + "learning_rate": 1.66162431614754e-07, + "loss": 0.051, + "step": 127575 + }, + { + "epoch": 5.95, + "learning_rate": 1.653786465599674e-07, + "loss": 0.0531, + "step": 127580 + }, + { + "epoch": 5.95, + "learning_rate": 1.6459486150518085e-07, + "loss": 0.0351, + "step": 127585 + }, + { + "epoch": 5.95, + "learning_rate": 1.6381107645039425e-07, + "loss": 0.1435, + "step": 127590 + }, + { + "epoch": 5.95, + "learning_rate": 1.6302729139560768e-07, + "loss": 0.1477, + "step": 127595 + }, + { + "epoch": 5.95, + "learning_rate": 1.6224350634082109e-07, + "loss": 0.1103, + "step": 127600 + }, + { + "epoch": 5.95, + "learning_rate": 1.6145972128603454e-07, + "loss": 0.3621, + "step": 127605 + }, + { + "epoch": 5.95, + "learning_rate": 1.6067593623124794e-07, + "loss": 0.0815, + "step": 127610 + }, + { + "epoch": 5.95, + "learning_rate": 1.5989215117646137e-07, + "loss": 0.0426, + "step": 127615 + }, + { + "epoch": 5.95, + "learning_rate": 1.5910836612167478e-07, + "loss": 0.0185, + "step": 127620 + }, + { + "epoch": 5.96, + "learning_rate": 1.5832458106688823e-07, + "loss": 0.0089, + "step": 127625 + }, + { + "epoch": 5.96, + "learning_rate": 1.5754079601210166e-07, + "loss": 0.0506, + "step": 127630 + }, + { + "epoch": 5.96, + "learning_rate": 1.5675701095731506e-07, + "loss": 0.0611, + "step": 127635 + }, + { + "epoch": 5.96, + "learning_rate": 1.559732259025285e-07, + "loss": 0.0697, + "step": 127640 + }, + { + "epoch": 5.96, + "learning_rate": 1.5518944084774192e-07, + "loss": 0.1094, + "step": 127645 + }, + { + "epoch": 5.96, + "learning_rate": 1.5440565579295535e-07, + "loss": 0.0939, + "step": 127650 + }, + { + "epoch": 5.96, + "learning_rate": 1.5362187073816878e-07, + "loss": 0.2679, + "step": 127655 + }, + { + "epoch": 5.96, + "learning_rate": 1.528380856833822e-07, + "loss": 0.1133, + "step": 127660 + }, + { + "epoch": 5.96, + "learning_rate": 1.5205430062859564e-07, + "loss": 0.0132, + "step": 127665 + }, + { + "epoch": 5.96, + "learning_rate": 1.5127051557380904e-07, + "loss": 0.0141, + "step": 127670 + }, + { + "epoch": 5.96, + "learning_rate": 1.5048673051902247e-07, + "loss": 0.0272, + "step": 127675 + }, + { + "epoch": 5.96, + "learning_rate": 1.497029454642359e-07, + "loss": 0.0724, + "step": 127680 + }, + { + "epoch": 5.96, + "learning_rate": 1.4891916040944933e-07, + "loss": 0.1169, + "step": 127685 + }, + { + "epoch": 5.96, + "learning_rate": 1.4813537535466276e-07, + "loss": 0.0431, + "step": 127690 + }, + { + "epoch": 5.96, + "learning_rate": 1.4735159029987616e-07, + "loss": 0.1203, + "step": 127695 + }, + { + "epoch": 5.96, + "learning_rate": 1.465678052450896e-07, + "loss": 0.1294, + "step": 127700 + }, + { + "epoch": 5.96, + "learning_rate": 1.4578402019030302e-07, + "loss": 0.3897, + "step": 127705 + }, + { + "epoch": 5.96, + "learning_rate": 1.4500023513551645e-07, + "loss": 0.1042, + "step": 127710 + }, + { + "epoch": 5.96, + "learning_rate": 1.4421645008072985e-07, + "loss": 0.0448, + "step": 127715 + }, + { + "epoch": 5.96, + "learning_rate": 1.434326650259433e-07, + "loss": 0.0137, + "step": 127720 + }, + { + "epoch": 5.96, + "learning_rate": 1.4264887997115674e-07, + "loss": 0.0375, + "step": 127725 + }, + { + "epoch": 5.96, + "learning_rate": 1.4186509491637014e-07, + "loss": 0.0726, + "step": 127730 + }, + { + "epoch": 5.96, + "learning_rate": 1.4108130986158357e-07, + "loss": 0.014, + "step": 127735 + }, + { + "epoch": 5.96, + "learning_rate": 1.40297524806797e-07, + "loss": 0.0383, + "step": 127740 + }, + { + "epoch": 5.96, + "learning_rate": 1.3951373975201043e-07, + "loss": 0.0878, + "step": 127745 + }, + { + "epoch": 5.96, + "learning_rate": 1.3872995469722383e-07, + "loss": 0.1318, + "step": 127750 + }, + { + "epoch": 5.96, + "learning_rate": 1.3794616964243726e-07, + "loss": 0.2634, + "step": 127755 + }, + { + "epoch": 5.96, + "learning_rate": 1.371623845876507e-07, + "loss": 0.1325, + "step": 127760 + }, + { + "epoch": 5.96, + "learning_rate": 1.3637859953286412e-07, + "loss": 0.0267, + "step": 127765 + }, + { + "epoch": 5.96, + "learning_rate": 1.3559481447807755e-07, + "loss": 0.0386, + "step": 127770 + }, + { + "epoch": 5.96, + "learning_rate": 1.3481102942329095e-07, + "loss": 0.0629, + "step": 127775 + }, + { + "epoch": 5.96, + "learning_rate": 1.3402724436850438e-07, + "loss": 0.0793, + "step": 127780 + }, + { + "epoch": 5.96, + "learning_rate": 1.3324345931371784e-07, + "loss": 0.0658, + "step": 127785 + }, + { + "epoch": 5.96, + "learning_rate": 1.3245967425893124e-07, + "loss": 0.0778, + "step": 127790 + }, + { + "epoch": 5.96, + "learning_rate": 1.3167588920414467e-07, + "loss": 0.0508, + "step": 127795 + }, + { + "epoch": 5.96, + "learning_rate": 1.308921041493581e-07, + "loss": 0.1157, + "step": 127800 + }, + { + "epoch": 5.96, + "learning_rate": 1.3010831909457153e-07, + "loss": 0.2239, + "step": 127805 + }, + { + "epoch": 5.96, + "learning_rate": 1.2932453403978493e-07, + "loss": 0.1321, + "step": 127810 + }, + { + "epoch": 5.96, + "learning_rate": 1.2854074898499836e-07, + "loss": 0.0088, + "step": 127815 + }, + { + "epoch": 5.96, + "learning_rate": 1.277569639302118e-07, + "loss": 0.025, + "step": 127820 + }, + { + "epoch": 5.96, + "learning_rate": 1.2697317887542522e-07, + "loss": 0.0107, + "step": 127825 + }, + { + "epoch": 5.96, + "learning_rate": 1.2618939382063862e-07, + "loss": 0.0233, + "step": 127830 + }, + { + "epoch": 5.96, + "learning_rate": 1.2540560876585205e-07, + "loss": 0.0372, + "step": 127835 + }, + { + "epoch": 5.97, + "learning_rate": 1.2462182371106548e-07, + "loss": 0.089, + "step": 127840 + }, + { + "epoch": 5.97, + "learning_rate": 1.238380386562789e-07, + "loss": 0.1495, + "step": 127845 + }, + { + "epoch": 5.97, + "learning_rate": 1.2305425360149234e-07, + "loss": 0.1951, + "step": 127850 + }, + { + "epoch": 5.97, + "learning_rate": 1.2227046854670577e-07, + "loss": 0.1915, + "step": 127855 + }, + { + "epoch": 5.97, + "learning_rate": 1.214866834919192e-07, + "loss": 0.1162, + "step": 127860 + }, + { + "epoch": 5.97, + "learning_rate": 1.207028984371326e-07, + "loss": 0.0193, + "step": 127865 + }, + { + "epoch": 5.97, + "learning_rate": 1.1991911338234603e-07, + "loss": 0.0563, + "step": 127870 + }, + { + "epoch": 5.97, + "learning_rate": 1.1913532832755946e-07, + "loss": 0.0173, + "step": 127875 + }, + { + "epoch": 5.97, + "learning_rate": 1.1835154327277289e-07, + "loss": 0.0424, + "step": 127880 + }, + { + "epoch": 5.97, + "learning_rate": 1.175677582179863e-07, + "loss": 0.0781, + "step": 127885 + }, + { + "epoch": 5.97, + "learning_rate": 1.1678397316319973e-07, + "loss": 0.1055, + "step": 127890 + }, + { + "epoch": 5.97, + "learning_rate": 1.1600018810841315e-07, + "loss": 0.0482, + "step": 127895 + }, + { + "epoch": 5.97, + "learning_rate": 1.1521640305362658e-07, + "loss": 0.0813, + "step": 127900 + }, + { + "epoch": 5.97, + "learning_rate": 1.1443261799884e-07, + "loss": 0.188, + "step": 127905 + }, + { + "epoch": 5.97, + "learning_rate": 1.1364883294405344e-07, + "loss": 0.0988, + "step": 127910 + }, + { + "epoch": 5.97, + "learning_rate": 1.1286504788926687e-07, + "loss": 0.0088, + "step": 127915 + }, + { + "epoch": 5.97, + "learning_rate": 1.1208126283448028e-07, + "loss": 0.0435, + "step": 127920 + }, + { + "epoch": 5.97, + "learning_rate": 1.1129747777969371e-07, + "loss": 0.0883, + "step": 127925 + }, + { + "epoch": 5.97, + "learning_rate": 1.1051369272490713e-07, + "loss": 0.0851, + "step": 127930 + }, + { + "epoch": 5.97, + "learning_rate": 1.0972990767012056e-07, + "loss": 0.0516, + "step": 127935 + }, + { + "epoch": 5.97, + "learning_rate": 1.0894612261533397e-07, + "loss": 0.0755, + "step": 127940 + }, + { + "epoch": 5.97, + "learning_rate": 1.081623375605474e-07, + "loss": 0.1421, + "step": 127945 + }, + { + "epoch": 5.97, + "learning_rate": 1.0737855250576082e-07, + "loss": 0.1164, + "step": 127950 + }, + { + "epoch": 5.97, + "learning_rate": 1.0659476745097425e-07, + "loss": 0.2777, + "step": 127955 + }, + { + "epoch": 5.97, + "learning_rate": 1.0581098239618768e-07, + "loss": 0.0718, + "step": 127960 + }, + { + "epoch": 5.97, + "learning_rate": 1.050271973414011e-07, + "loss": 0.0444, + "step": 127965 + }, + { + "epoch": 5.97, + "learning_rate": 1.0424341228661452e-07, + "loss": 0.0672, + "step": 127970 + }, + { + "epoch": 5.97, + "learning_rate": 1.0345962723182797e-07, + "loss": 0.0232, + "step": 127975 + }, + { + "epoch": 5.97, + "learning_rate": 1.0267584217704138e-07, + "loss": 0.0403, + "step": 127980 + }, + { + "epoch": 5.97, + "learning_rate": 1.0189205712225481e-07, + "loss": 0.1562, + "step": 127985 + }, + { + "epoch": 5.97, + "learning_rate": 1.0110827206746823e-07, + "loss": 0.0266, + "step": 127990 + }, + { + "epoch": 5.97, + "learning_rate": 1.0032448701268166e-07, + "loss": 0.1062, + "step": 127995 + }, + { + "epoch": 5.97, + "learning_rate": 9.954070195789507e-08, + "loss": 0.1003, + "step": 128000 + }, + { + "epoch": 5.97, + "learning_rate": 9.87569169031085e-08, + "loss": 0.3442, + "step": 128005 + }, + { + "epoch": 5.97, + "learning_rate": 9.797313184832192e-08, + "loss": 0.096, + "step": 128010 + }, + { + "epoch": 5.97, + "learning_rate": 9.718934679353535e-08, + "loss": 0.0186, + "step": 128015 + }, + { + "epoch": 5.97, + "learning_rate": 9.640556173874876e-08, + "loss": 0.0408, + "step": 128020 + }, + { + "epoch": 5.97, + "learning_rate": 9.562177668396219e-08, + "loss": 0.057, + "step": 128025 + }, + { + "epoch": 5.97, + "learning_rate": 9.483799162917561e-08, + "loss": 0.0311, + "step": 128030 + }, + { + "epoch": 5.97, + "learning_rate": 9.405420657438905e-08, + "loss": 0.0177, + "step": 128035 + }, + { + "epoch": 5.97, + "learning_rate": 9.327042151960248e-08, + "loss": 0.0527, + "step": 128040 + }, + { + "epoch": 5.97, + "learning_rate": 9.24866364648159e-08, + "loss": 0.0309, + "step": 128045 + }, + { + "epoch": 5.97, + "learning_rate": 9.170285141002933e-08, + "loss": 0.1293, + "step": 128050 + }, + { + "epoch": 5.98, + "learning_rate": 9.091906635524274e-08, + "loss": 0.388, + "step": 128055 + }, + { + "epoch": 5.98, + "learning_rate": 9.013528130045617e-08, + "loss": 0.1261, + "step": 128060 + }, + { + "epoch": 5.98, + "learning_rate": 8.93514962456696e-08, + "loss": 0.0145, + "step": 128065 + }, + { + "epoch": 5.98, + "learning_rate": 8.856771119088302e-08, + "loss": 0.0439, + "step": 128070 + }, + { + "epoch": 5.98, + "learning_rate": 8.778392613609645e-08, + "loss": 0.059, + "step": 128075 + }, + { + "epoch": 5.98, + "learning_rate": 8.700014108130986e-08, + "loss": 0.0874, + "step": 128080 + }, + { + "epoch": 5.98, + "learning_rate": 8.621635602652329e-08, + "loss": 0.0181, + "step": 128085 + }, + { + "epoch": 5.98, + "learning_rate": 8.543257097173671e-08, + "loss": 0.0512, + "step": 128090 + }, + { + "epoch": 5.98, + "learning_rate": 8.464878591695014e-08, + "loss": 0.075, + "step": 128095 + }, + { + "epoch": 5.98, + "learning_rate": 8.386500086216358e-08, + "loss": 0.1364, + "step": 128100 + }, + { + "epoch": 5.98, + "learning_rate": 8.3081215807377e-08, + "loss": 0.2467, + "step": 128105 + }, + { + "epoch": 5.98, + "learning_rate": 8.229743075259043e-08, + "loss": 0.0606, + "step": 128110 + }, + { + "epoch": 5.98, + "learning_rate": 8.151364569780384e-08, + "loss": 0.0173, + "step": 128115 + }, + { + "epoch": 5.98, + "learning_rate": 8.072986064301727e-08, + "loss": 0.0322, + "step": 128120 + }, + { + "epoch": 5.98, + "learning_rate": 7.994607558823069e-08, + "loss": 0.074, + "step": 128125 + }, + { + "epoch": 5.98, + "learning_rate": 7.916229053344412e-08, + "loss": 0.0697, + "step": 128130 + }, + { + "epoch": 5.98, + "learning_rate": 7.837850547865753e-08, + "loss": 0.0336, + "step": 128135 + }, + { + "epoch": 5.98, + "learning_rate": 7.759472042387096e-08, + "loss": 0.1156, + "step": 128140 + }, + { + "epoch": 5.98, + "learning_rate": 7.681093536908439e-08, + "loss": 0.0779, + "step": 128145 + }, + { + "epoch": 5.98, + "learning_rate": 7.602715031429782e-08, + "loss": 0.1723, + "step": 128150 + }, + { + "epoch": 5.98, + "learning_rate": 7.524336525951124e-08, + "loss": 0.3823, + "step": 128155 + }, + { + "epoch": 5.98, + "learning_rate": 7.445958020472467e-08, + "loss": 0.0704, + "step": 128160 + }, + { + "epoch": 5.98, + "learning_rate": 7.367579514993808e-08, + "loss": 0.0189, + "step": 128165 + }, + { + "epoch": 5.98, + "learning_rate": 7.289201009515151e-08, + "loss": 0.1423, + "step": 128170 + }, + { + "epoch": 5.98, + "learning_rate": 7.210822504036493e-08, + "loss": 0.0191, + "step": 128175 + }, + { + "epoch": 5.98, + "learning_rate": 7.132443998557837e-08, + "loss": 0.0472, + "step": 128180 + }, + { + "epoch": 5.98, + "learning_rate": 7.054065493079179e-08, + "loss": 0.0554, + "step": 128185 + }, + { + "epoch": 5.98, + "learning_rate": 6.975686987600521e-08, + "loss": 0.0773, + "step": 128190 + }, + { + "epoch": 5.98, + "learning_rate": 6.897308482121863e-08, + "loss": 0.0916, + "step": 128195 + }, + { + "epoch": 5.98, + "learning_rate": 6.818929976643206e-08, + "loss": 0.1866, + "step": 128200 + }, + { + "epoch": 5.98, + "learning_rate": 6.740551471164548e-08, + "loss": 0.2296, + "step": 128205 + }, + { + "epoch": 5.98, + "learning_rate": 6.662172965685892e-08, + "loss": 0.0842, + "step": 128210 + }, + { + "epoch": 5.98, + "learning_rate": 6.583794460207233e-08, + "loss": 0.011, + "step": 128215 + }, + { + "epoch": 5.98, + "learning_rate": 6.505415954728576e-08, + "loss": 0.0964, + "step": 128220 + }, + { + "epoch": 5.98, + "learning_rate": 6.427037449249918e-08, + "loss": 0.0646, + "step": 128225 + }, + { + "epoch": 5.98, + "learning_rate": 6.348658943771261e-08, + "loss": 0.0629, + "step": 128230 + }, + { + "epoch": 5.98, + "learning_rate": 6.270280438292603e-08, + "loss": 0.082, + "step": 128235 + }, + { + "epoch": 5.98, + "learning_rate": 6.191901932813945e-08, + "loss": 0.0416, + "step": 128240 + }, + { + "epoch": 5.98, + "learning_rate": 6.113523427335288e-08, + "loss": 0.1095, + "step": 128245 + }, + { + "epoch": 5.98, + "learning_rate": 6.03514492185663e-08, + "loss": 0.1295, + "step": 128250 + }, + { + "epoch": 5.98, + "learning_rate": 5.956766416377973e-08, + "loss": 0.2104, + "step": 128255 + }, + { + "epoch": 5.98, + "learning_rate": 5.878387910899315e-08, + "loss": 0.0944, + "step": 128260 + }, + { + "epoch": 5.99, + "learning_rate": 5.8000094054206575e-08, + "loss": 0.0151, + "step": 128265 + }, + { + "epoch": 5.99, + "learning_rate": 5.721630899942e-08, + "loss": 0.0196, + "step": 128270 + }, + { + "epoch": 5.99, + "learning_rate": 5.6432523944633434e-08, + "loss": 0.0398, + "step": 128275 + }, + { + "epoch": 5.99, + "learning_rate": 5.5648738889846856e-08, + "loss": 0.0225, + "step": 128280 + }, + { + "epoch": 5.99, + "learning_rate": 5.486495383506028e-08, + "loss": 0.0897, + "step": 128285 + }, + { + "epoch": 5.99, + "learning_rate": 5.40811687802737e-08, + "loss": 0.0548, + "step": 128290 + }, + { + "epoch": 5.99, + "learning_rate": 5.3297383725487124e-08, + "loss": 0.0571, + "step": 128295 + }, + { + "epoch": 5.99, + "learning_rate": 5.251359867070055e-08, + "loss": 0.0864, + "step": 128300 + }, + { + "epoch": 5.99, + "learning_rate": 5.172981361591398e-08, + "loss": 0.2825, + "step": 128305 + }, + { + "epoch": 5.99, + "learning_rate": 5.0946028561127406e-08, + "loss": 0.139, + "step": 128310 + }, + { + "epoch": 5.99, + "learning_rate": 5.016224350634083e-08, + "loss": 0.0071, + "step": 128315 + }, + { + "epoch": 5.99, + "learning_rate": 4.937845845155425e-08, + "loss": 0.0238, + "step": 128320 + }, + { + "epoch": 5.99, + "learning_rate": 4.8594673396767674e-08, + "loss": 0.0364, + "step": 128325 + }, + { + "epoch": 5.99, + "learning_rate": 4.7810888341981096e-08, + "loss": 0.044, + "step": 128330 + }, + { + "epoch": 5.99, + "learning_rate": 4.7027103287194526e-08, + "loss": 0.0651, + "step": 128335 + }, + { + "epoch": 5.99, + "learning_rate": 4.624331823240795e-08, + "loss": 0.1214, + "step": 128340 + }, + { + "epoch": 5.99, + "learning_rate": 4.545953317762137e-08, + "loss": 0.0639, + "step": 128345 + }, + { + "epoch": 5.99, + "learning_rate": 4.46757481228348e-08, + "loss": 0.1152, + "step": 128350 + }, + { + "epoch": 5.99, + "learning_rate": 4.389196306804822e-08, + "loss": 0.1782, + "step": 128355 + }, + { + "epoch": 5.99, + "learning_rate": 4.3108178013261646e-08, + "loss": 0.0965, + "step": 128360 + }, + { + "epoch": 5.99, + "learning_rate": 4.232439295847507e-08, + "loss": 0.0082, + "step": 128365 + }, + { + "epoch": 5.99, + "learning_rate": 4.15406079036885e-08, + "loss": 0.0346, + "step": 128370 + }, + { + "epoch": 5.99, + "learning_rate": 4.075682284890192e-08, + "loss": 0.0545, + "step": 128375 + }, + { + "epoch": 5.99, + "learning_rate": 3.997303779411534e-08, + "loss": 0.0331, + "step": 128380 + }, + { + "epoch": 5.99, + "learning_rate": 3.9189252739328766e-08, + "loss": 0.0791, + "step": 128385 + }, + { + "epoch": 5.99, + "learning_rate": 3.8405467684542195e-08, + "loss": 0.14, + "step": 128390 + }, + { + "epoch": 5.99, + "learning_rate": 3.762168262975562e-08, + "loss": 0.0331, + "step": 128395 + }, + { + "epoch": 5.99, + "learning_rate": 3.683789757496904e-08, + "loss": 0.0909, + "step": 128400 + }, + { + "epoch": 5.99, + "learning_rate": 3.6054112520182463e-08, + "loss": 0.333, + "step": 128405 + }, + { + "epoch": 5.99, + "learning_rate": 3.527032746539589e-08, + "loss": 0.0592, + "step": 128410 + }, + { + "epoch": 5.99, + "learning_rate": 3.4486542410609315e-08, + "loss": 0.0043, + "step": 128415 + }, + { + "epoch": 5.99, + "learning_rate": 3.370275735582274e-08, + "loss": 0.0193, + "step": 128420 + }, + { + "epoch": 5.99, + "learning_rate": 3.291897230103617e-08, + "loss": 0.03, + "step": 128425 + }, + { + "epoch": 5.99, + "learning_rate": 3.213518724624959e-08, + "loss": 0.0619, + "step": 128430 + }, + { + "epoch": 5.99, + "learning_rate": 3.135140219146301e-08, + "loss": 0.0298, + "step": 128435 + }, + { + "epoch": 5.99, + "learning_rate": 3.056761713667644e-08, + "loss": 0.1012, + "step": 128440 + }, + { + "epoch": 5.99, + "learning_rate": 2.9783832081889865e-08, + "loss": 0.0795, + "step": 128445 + }, + { + "epoch": 5.99, + "learning_rate": 2.9000047027103287e-08, + "loss": 0.1666, + "step": 128450 + }, + { + "epoch": 5.99, + "learning_rate": 2.8216261972316717e-08, + "loss": 0.2524, + "step": 128455 + }, + { + "epoch": 5.99, + "learning_rate": 2.743247691753014e-08, + "loss": 0.1183, + "step": 128460 + }, + { + "epoch": 5.99, + "learning_rate": 2.6648691862743562e-08, + "loss": 0.0191, + "step": 128465 + }, + { + "epoch": 5.99, + "learning_rate": 2.586490680795699e-08, + "loss": 0.1089, + "step": 128470 + }, + { + "epoch": 5.99, + "learning_rate": 2.5081121753170414e-08, + "loss": 0.0609, + "step": 128475 + }, + { + "epoch": 6.0, + "learning_rate": 2.4297336698383837e-08, + "loss": 0.0399, + "step": 128480 + }, + { + "epoch": 6.0, + "learning_rate": 2.3513551643597263e-08, + "loss": 0.0608, + "step": 128485 + }, + { + "epoch": 6.0, + "learning_rate": 2.2729766588810686e-08, + "loss": 0.0817, + "step": 128490 + }, + { + "epoch": 6.0, + "learning_rate": 2.194598153402411e-08, + "loss": 0.073, + "step": 128495 + }, + { + "epoch": 6.0, + "learning_rate": 2.1162196479237534e-08, + "loss": 0.1842, + "step": 128500 + }, + { + "epoch": 6.0, + "learning_rate": 2.037841142445096e-08, + "loss": 0.2138, + "step": 128505 + }, + { + "epoch": 6.0, + "learning_rate": 1.9594626369664383e-08, + "loss": 0.0997, + "step": 128510 + }, + { + "epoch": 6.0, + "learning_rate": 1.881084131487781e-08, + "loss": 0.0312, + "step": 128515 + }, + { + "epoch": 6.0, + "learning_rate": 1.8027056260091232e-08, + "loss": 0.0118, + "step": 128520 + }, + { + "epoch": 6.0, + "learning_rate": 1.7243271205304658e-08, + "loss": 0.0496, + "step": 128525 + }, + { + "epoch": 6.0, + "learning_rate": 1.6459486150518084e-08, + "loss": 0.0232, + "step": 128530 + }, + { + "epoch": 6.0, + "learning_rate": 1.5675701095731506e-08, + "loss": 0.0877, + "step": 128535 + }, + { + "epoch": 6.0, + "learning_rate": 1.4891916040944932e-08, + "loss": 0.0576, + "step": 128540 + }, + { + "epoch": 6.0, + "learning_rate": 1.4108130986158358e-08, + "loss": 0.1731, + "step": 128545 + }, + { + "epoch": 6.0, + "learning_rate": 1.3324345931371781e-08, + "loss": 0.1483, + "step": 128550 + }, + { + "epoch": 6.0, + "learning_rate": 1.2540560876585207e-08, + "loss": 0.269, + "step": 128555 + }, + { + "epoch": 6.0, + "learning_rate": 1.1756775821798631e-08, + "loss": 0.1055, + "step": 128560 + }, + { + "epoch": 6.0, + "learning_rate": 1.0972990767012056e-08, + "loss": 0.0285, + "step": 128565 + }, + { + "epoch": 6.0, + "learning_rate": 1.018920571222548e-08, + "loss": 0.0093, + "step": 128570 + }, + { + "epoch": 6.0, + "learning_rate": 9.405420657438904e-09, + "loss": 0.0552, + "step": 128575 + }, + { + "epoch": 6.0, + "learning_rate": 8.621635602652329e-09, + "loss": 0.039, + "step": 128580 + }, + { + "epoch": 6.0, + "learning_rate": 7.837850547865753e-09, + "loss": 0.1005, + "step": 128585 + }, + { + "epoch": 6.0, + "eval_cer": 0.010486086983986608, + "eval_loss": 0.05890187993645668, + "eval_runtime": 471.9392, + "eval_samples_per_second": 40.365, + "eval_steps_per_second": 5.047, + "eval_wer": 0.0882679496669134, + "step": 128586 + } + ], + "max_steps": 128586, + "num_train_epochs": 6, + "total_flos": 1.5434577183658717e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-128586/training_args.bin b/checkpoint-128586/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..36cc7cb27194c4763ad57ba9f820c49b1d0a2bcf --- /dev/null +++ b/checkpoint-128586/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35a655ca2fa82ac80a7162e5149caad102a189b97deb1fba1f94f21e15657a07 +size 3055 diff --git a/checkpoint-21431/config.json b/checkpoint-21431/config.json new file mode 100644 index 0000000000000000000000000000000000000000..382a3e79497e514ac876eee8114c7079c255a204 --- /dev/null +++ b/checkpoint-21431/config.json @@ -0,0 +1,109 @@ +{ + "_name_or_path": "facebook/wav2vec2-base-960h", + "activation_dropout": 0.1, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForCTC" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "group", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.1, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 12, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 12, + "num_negatives": 100, + "output_hidden_size": 768, + "pad_token_id": 0, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} diff --git a/checkpoint-21431/optimizer.pt b/checkpoint-21431/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..022d953639afd7439782aab32b853eef265f37c8 --- /dev/null +++ b/checkpoint-21431/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbc0ceb73b78984d35ca4225b37f77eea675853d73d4f4aa78b1a5298b5c7efb +size 1847481 diff --git a/checkpoint-21431/preprocessor_config.json b/checkpoint-21431/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a0b7227fc1d916e469b14f6c154ad6dfea1e6891 --- /dev/null +++ b/checkpoint-21431/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-21431/pytorch_model.bin b/checkpoint-21431/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..4ee74d4b09df13890a8df2dcceb74fa6766de80e --- /dev/null +++ b/checkpoint-21431/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc640a03ef3657ca4d5cf8f7b990ec216e7d36076c65d7f7c88120186dfe266 +size 377656855 diff --git a/checkpoint-21431/rng_state.pth b/checkpoint-21431/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..be0ecd96afcd88da7c0ef9ccf4a0e7f2a38a7f52 --- /dev/null +++ b/checkpoint-21431/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96f4004fe2f8de9a170928561f8a5ded034ae31960d81507028041444245b7f4 +size 14503 diff --git a/checkpoint-21431/scaler.pt b/checkpoint-21431/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..05c9d0f8fce5221fb01d049d3b450125bb20fd8b --- /dev/null +++ b/checkpoint-21431/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6ee3f6caba91b9e51a76e357a586b95a811762bc3fd2b4fe01240e8b40e0f39 +size 559 diff --git a/checkpoint-21431/scheduler.pt b/checkpoint-21431/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec2a2689e7a621f68532fae4eb6e0dfe111c1537 --- /dev/null +++ b/checkpoint-21431/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:139f075edbd69c339b2bf6d5502790a86ac6b7a54a0022742cd8db8102fd7bb0 +size 623 diff --git a/checkpoint-21431/trainer_state.json b/checkpoint-21431/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..374e1bf7aec4f1576f8d7f6ff6aca0ea8cc8da4f --- /dev/null +++ b/checkpoint-21431/trainer_state.json @@ -0,0 +1,25742 @@ +{ + "best_metric": 0.6674277186393738, + "best_model_checkpoint": "wav2vec2-base-pem123-960h-la/checkpoint-21431", + "epoch": 1.0, + "global_step": 21431, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 6.000000000000001e-08, + "loss": 2.6531, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.6e-07, + "loss": 3.4824, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 2.6e-07, + "loss": 3.2682, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 3.6e-07, + "loss": 3.2567, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.6000000000000004e-07, + "loss": 3.5979, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 5.6e-07, + "loss": 3.3327, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 6.6e-07, + "loss": 3.7519, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 7.6e-07, + "loss": 3.5748, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 8.6e-07, + "loss": 3.5357, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 9.400000000000001e-07, + "loss": 3.4531, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.04e-06, + "loss": 2.5381, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 1.14e-06, + "loss": 2.9048, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 1.2400000000000002e-06, + "loss": 3.0937, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 1.34e-06, + "loss": 3.0091, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 1.44e-06, + "loss": 2.8452, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.54e-06, + "loss": 2.6674, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 1.6400000000000002e-06, + "loss": 2.9619, + "step": 85 + }, + { + "epoch": 0.0, + "learning_rate": 1.74e-06, + "loss": 2.7327, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 1.8400000000000002e-06, + "loss": 2.7925, + "step": 95 + }, + { + "epoch": 0.0, + "learning_rate": 1.94e-06, + "loss": 3.0929, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.04e-06, + "loss": 1.7821, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 2.1400000000000003e-06, + "loss": 1.9388, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 2.24e-06, + "loss": 2.1683, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 2.3400000000000005e-06, + "loss": 1.8805, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 2.4400000000000004e-06, + "loss": 2.0734, + "step": 125 + }, + { + "epoch": 0.01, + "learning_rate": 2.5400000000000002e-06, + "loss": 2.0576, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 2.64e-06, + "loss": 1.778, + "step": 135 + }, + { + "epoch": 0.01, + "learning_rate": 2.7400000000000004e-06, + "loss": 1.866, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 2.84e-06, + "loss": 2.0255, + "step": 145 + }, + { + "epoch": 0.01, + "learning_rate": 2.9400000000000002e-06, + "loss": 2.1399, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 3.04e-06, + "loss": 1.4145, + "step": 155 + }, + { + "epoch": 0.01, + "learning_rate": 3.1400000000000004e-06, + "loss": 1.2365, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 3.2400000000000003e-06, + "loss": 1.5569, + "step": 165 + }, + { + "epoch": 0.01, + "learning_rate": 3.3400000000000006e-06, + "loss": 1.6138, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 3.44e-06, + "loss": 1.3237, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.54e-06, + "loss": 1.3709, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 3.6400000000000003e-06, + "loss": 1.475, + "step": 185 + }, + { + "epoch": 0.01, + "learning_rate": 3.74e-06, + "loss": 1.5188, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 3.8400000000000005e-06, + "loss": 1.7965, + "step": 195 + }, + { + "epoch": 0.01, + "learning_rate": 3.94e-06, + "loss": 1.9079, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.04e-06, + "loss": 1.1918, + "step": 205 + }, + { + "epoch": 0.01, + "learning_rate": 4.14e-06, + "loss": 0.9466, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 4.24e-06, + "loss": 1.186, + "step": 215 + }, + { + "epoch": 0.01, + "learning_rate": 4.34e-06, + "loss": 1.1864, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 4.440000000000001e-06, + "loss": 1.1844, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.540000000000001e-06, + "loss": 1.2449, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 4.6400000000000005e-06, + "loss": 1.5866, + "step": 235 + }, + { + "epoch": 0.01, + "learning_rate": 4.74e-06, + "loss": 1.3059, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 4.84e-06, + "loss": 1.4398, + "step": 245 + }, + { + "epoch": 0.01, + "learning_rate": 4.94e-06, + "loss": 1.8654, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.04e-06, + "loss": 1.2339, + "step": 255 + }, + { + "epoch": 0.01, + "learning_rate": 5.140000000000001e-06, + "loss": 0.8202, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 5.240000000000001e-06, + "loss": 1.151, + "step": 265 + }, + { + "epoch": 0.01, + "learning_rate": 5.3400000000000005e-06, + "loss": 1.1299, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 5.4400000000000004e-06, + "loss": 1.154, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.540000000000001e-06, + "loss": 1.2657, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 5.64e-06, + "loss": 1.3412, + "step": 285 + }, + { + "epoch": 0.01, + "learning_rate": 5.72e-06, + "loss": 1.2532, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 5.82e-06, + "loss": 1.5254, + "step": 295 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 1.9021, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.02e-06, + "loss": 1.2932, + "step": 305 + }, + { + "epoch": 0.01, + "learning_rate": 6.120000000000001e-06, + "loss": 0.882, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 6.220000000000001e-06, + "loss": 0.8607, + "step": 315 + }, + { + "epoch": 0.01, + "learning_rate": 6.3200000000000005e-06, + "loss": 0.9375, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 6.42e-06, + "loss": 1.0688, + "step": 325 + }, + { + "epoch": 0.02, + "learning_rate": 6.520000000000001e-06, + "loss": 1.0282, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 6.620000000000001e-06, + "loss": 1.1712, + "step": 335 + }, + { + "epoch": 0.02, + "learning_rate": 6.720000000000001e-06, + "loss": 1.3186, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 6.820000000000001e-06, + "loss": 1.3102, + "step": 345 + }, + { + "epoch": 0.02, + "learning_rate": 6.9e-06, + "loss": 2.0291, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 7e-06, + "loss": 1.0834, + "step": 355 + }, + { + "epoch": 0.02, + "learning_rate": 7.100000000000001e-06, + "loss": 0.7925, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 7.2000000000000005e-06, + "loss": 0.9559, + "step": 365 + }, + { + "epoch": 0.02, + "learning_rate": 7.3e-06, + "loss": 0.9066, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 7.4e-06, + "loss": 1.0408, + "step": 375 + }, + { + "epoch": 0.02, + "learning_rate": 7.500000000000001e-06, + "loss": 1.0672, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 7.600000000000001e-06, + "loss": 1.3249, + "step": 385 + }, + { + "epoch": 0.02, + "learning_rate": 7.7e-06, + "loss": 1.3579, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 7.800000000000002e-06, + "loss": 1.4037, + "step": 395 + }, + { + "epoch": 0.02, + "learning_rate": 7.9e-06, + "loss": 1.5432, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.9596, + "step": 405 + }, + { + "epoch": 0.02, + "learning_rate": 8.1e-06, + "loss": 0.6342, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 8.2e-06, + "loss": 0.8461, + "step": 415 + }, + { + "epoch": 0.02, + "learning_rate": 8.3e-06, + "loss": 0.9826, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 8.400000000000001e-06, + "loss": 0.9279, + "step": 425 + }, + { + "epoch": 0.02, + "learning_rate": 8.5e-06, + "loss": 0.8814, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 8.6e-06, + "loss": 1.1263, + "step": 435 + }, + { + "epoch": 0.02, + "learning_rate": 8.700000000000001e-06, + "loss": 1.0968, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 8.8e-06, + "loss": 1.2043, + "step": 445 + }, + { + "epoch": 0.02, + "learning_rate": 8.900000000000001e-06, + "loss": 1.5603, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 0.9924, + "step": 455 + }, + { + "epoch": 0.02, + "learning_rate": 9.100000000000001e-06, + "loss": 0.7293, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 9.200000000000002e-06, + "loss": 0.7576, + "step": 465 + }, + { + "epoch": 0.02, + "learning_rate": 9.3e-06, + "loss": 0.7923, + "step": 470 + }, + { + "epoch": 0.02, + "learning_rate": 9.4e-06, + "loss": 0.8264, + "step": 475 + }, + { + "epoch": 0.02, + "learning_rate": 9.5e-06, + "loss": 0.8031, + "step": 480 + }, + { + "epoch": 0.02, + "learning_rate": 9.600000000000001e-06, + "loss": 1.2293, + "step": 485 + }, + { + "epoch": 0.02, + "learning_rate": 9.7e-06, + "loss": 0.9651, + "step": 490 + }, + { + "epoch": 0.02, + "learning_rate": 9.800000000000001e-06, + "loss": 1.3314, + "step": 495 + }, + { + "epoch": 0.02, + "learning_rate": 9.9e-06, + "loss": 1.4383, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 1e-05, + "loss": 0.9384, + "step": 505 + }, + { + "epoch": 0.02, + "learning_rate": 1.0100000000000002e-05, + "loss": 0.6344, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 1.02e-05, + "loss": 0.8903, + "step": 515 + }, + { + "epoch": 0.02, + "learning_rate": 1.0300000000000001e-05, + "loss": 0.8112, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 1.04e-05, + "loss": 0.9797, + "step": 525 + }, + { + "epoch": 0.02, + "learning_rate": 1.0500000000000001e-05, + "loss": 0.7961, + "step": 530 + }, + { + "epoch": 0.02, + "learning_rate": 1.0600000000000002e-05, + "loss": 1.0021, + "step": 535 + }, + { + "epoch": 0.03, + "learning_rate": 1.0700000000000001e-05, + "loss": 1.111, + "step": 540 + }, + { + "epoch": 0.03, + "learning_rate": 1.0800000000000002e-05, + "loss": 1.0121, + "step": 545 + }, + { + "epoch": 0.03, + "learning_rate": 1.0900000000000002e-05, + "loss": 1.3162, + "step": 550 + }, + { + "epoch": 0.03, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.8775, + "step": 555 + }, + { + "epoch": 0.03, + "learning_rate": 1.1100000000000002e-05, + "loss": 0.6268, + "step": 560 + }, + { + "epoch": 0.03, + "learning_rate": 1.1200000000000001e-05, + "loss": 0.6093, + "step": 565 + }, + { + "epoch": 0.03, + "learning_rate": 1.13e-05, + "loss": 0.6371, + "step": 570 + }, + { + "epoch": 0.03, + "learning_rate": 1.14e-05, + "loss": 0.7299, + "step": 575 + }, + { + "epoch": 0.03, + "learning_rate": 1.15e-05, + "loss": 0.8892, + "step": 580 + }, + { + "epoch": 0.03, + "learning_rate": 1.16e-05, + "loss": 0.8902, + "step": 585 + }, + { + "epoch": 0.03, + "learning_rate": 1.17e-05, + "loss": 1.1263, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 1.18e-05, + "loss": 1.2628, + "step": 595 + }, + { + "epoch": 0.03, + "learning_rate": 1.1900000000000001e-05, + "loss": 1.4236, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 0.8066, + "step": 605 + }, + { + "epoch": 0.03, + "learning_rate": 1.2100000000000001e-05, + "loss": 0.6171, + "step": 610 + }, + { + "epoch": 0.03, + "learning_rate": 1.22e-05, + "loss": 0.6193, + "step": 615 + }, + { + "epoch": 0.03, + "learning_rate": 1.23e-05, + "loss": 0.7038, + "step": 620 + }, + { + "epoch": 0.03, + "learning_rate": 1.2400000000000002e-05, + "loss": 0.7382, + "step": 625 + }, + { + "epoch": 0.03, + "learning_rate": 1.25e-05, + "loss": 0.8153, + "step": 630 + }, + { + "epoch": 0.03, + "learning_rate": 1.2600000000000001e-05, + "loss": 0.8639, + "step": 635 + }, + { + "epoch": 0.03, + "learning_rate": 1.27e-05, + "loss": 0.985, + "step": 640 + }, + { + "epoch": 0.03, + "learning_rate": 1.2800000000000001e-05, + "loss": 0.9144, + "step": 645 + }, + { + "epoch": 0.03, + "learning_rate": 1.2900000000000002e-05, + "loss": 1.2459, + "step": 650 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.8517, + "step": 655 + }, + { + "epoch": 0.03, + "learning_rate": 1.3100000000000002e-05, + "loss": 0.4846, + "step": 660 + }, + { + "epoch": 0.03, + "learning_rate": 1.3200000000000002e-05, + "loss": 0.5826, + "step": 665 + }, + { + "epoch": 0.03, + "learning_rate": 1.3300000000000001e-05, + "loss": 0.7343, + "step": 670 + }, + { + "epoch": 0.03, + "learning_rate": 1.3400000000000002e-05, + "loss": 0.7328, + "step": 675 + }, + { + "epoch": 0.03, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.6546, + "step": 680 + }, + { + "epoch": 0.03, + "learning_rate": 1.3600000000000002e-05, + "loss": 0.8793, + "step": 685 + }, + { + "epoch": 0.03, + "learning_rate": 1.3700000000000003e-05, + "loss": 0.8999, + "step": 690 + }, + { + "epoch": 0.03, + "learning_rate": 1.38e-05, + "loss": 1.1491, + "step": 695 + }, + { + "epoch": 0.03, + "learning_rate": 1.39e-05, + "loss": 1.377, + "step": 700 + }, + { + "epoch": 0.03, + "learning_rate": 1.4e-05, + "loss": 0.7843, + "step": 705 + }, + { + "epoch": 0.03, + "learning_rate": 1.41e-05, + "loss": 0.622, + "step": 710 + }, + { + "epoch": 0.03, + "learning_rate": 1.4200000000000001e-05, + "loss": 0.5346, + "step": 715 + }, + { + "epoch": 0.03, + "learning_rate": 1.43e-05, + "loss": 0.6517, + "step": 720 + }, + { + "epoch": 0.03, + "learning_rate": 1.4400000000000001e-05, + "loss": 0.6661, + "step": 725 + }, + { + "epoch": 0.03, + "learning_rate": 1.45e-05, + "loss": 0.7379, + "step": 730 + }, + { + "epoch": 0.03, + "learning_rate": 1.46e-05, + "loss": 0.7839, + "step": 735 + }, + { + "epoch": 0.03, + "learning_rate": 1.4700000000000002e-05, + "loss": 1.0163, + "step": 740 + }, + { + "epoch": 0.03, + "learning_rate": 1.48e-05, + "loss": 0.9786, + "step": 745 + }, + { + "epoch": 0.03, + "learning_rate": 1.4900000000000001e-05, + "loss": 1.2756, + "step": 750 + }, + { + "epoch": 0.04, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.8289, + "step": 755 + }, + { + "epoch": 0.04, + "learning_rate": 1.5100000000000001e-05, + "loss": 0.5909, + "step": 760 + }, + { + "epoch": 0.04, + "learning_rate": 1.5200000000000002e-05, + "loss": 0.5347, + "step": 765 + }, + { + "epoch": 0.04, + "learning_rate": 1.5300000000000003e-05, + "loss": 0.7078, + "step": 770 + }, + { + "epoch": 0.04, + "learning_rate": 1.54e-05, + "loss": 0.6262, + "step": 775 + }, + { + "epoch": 0.04, + "learning_rate": 1.55e-05, + "loss": 0.8401, + "step": 780 + }, + { + "epoch": 0.04, + "learning_rate": 1.5600000000000003e-05, + "loss": 0.6788, + "step": 785 + }, + { + "epoch": 0.04, + "learning_rate": 1.5700000000000002e-05, + "loss": 0.9063, + "step": 790 + }, + { + "epoch": 0.04, + "learning_rate": 1.58e-05, + "loss": 0.9448, + "step": 795 + }, + { + "epoch": 0.04, + "learning_rate": 1.5900000000000004e-05, + "loss": 1.3078, + "step": 800 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7461, + "step": 805 + }, + { + "epoch": 0.04, + "learning_rate": 1.6100000000000002e-05, + "loss": 0.4522, + "step": 810 + }, + { + "epoch": 0.04, + "learning_rate": 1.62e-05, + "loss": 0.5883, + "step": 815 + }, + { + "epoch": 0.04, + "learning_rate": 1.63e-05, + "loss": 0.5923, + "step": 820 + }, + { + "epoch": 0.04, + "learning_rate": 1.64e-05, + "loss": 0.7269, + "step": 825 + }, + { + "epoch": 0.04, + "learning_rate": 1.65e-05, + "loss": 0.6916, + "step": 830 + }, + { + "epoch": 0.04, + "learning_rate": 1.66e-05, + "loss": 0.6976, + "step": 835 + }, + { + "epoch": 0.04, + "learning_rate": 1.67e-05, + "loss": 0.953, + "step": 840 + }, + { + "epoch": 0.04, + "learning_rate": 1.6800000000000002e-05, + "loss": 0.952, + "step": 845 + }, + { + "epoch": 0.04, + "learning_rate": 1.69e-05, + "loss": 1.4978, + "step": 850 + }, + { + "epoch": 0.04, + "learning_rate": 1.7e-05, + "loss": 0.8314, + "step": 855 + }, + { + "epoch": 0.04, + "learning_rate": 1.7100000000000002e-05, + "loss": 0.4179, + "step": 860 + }, + { + "epoch": 0.04, + "learning_rate": 1.72e-05, + "loss": 0.5123, + "step": 865 + }, + { + "epoch": 0.04, + "learning_rate": 1.73e-05, + "loss": 0.528, + "step": 870 + }, + { + "epoch": 0.04, + "learning_rate": 1.7400000000000003e-05, + "loss": 0.6553, + "step": 875 + }, + { + "epoch": 0.04, + "learning_rate": 1.7500000000000002e-05, + "loss": 0.8417, + "step": 880 + }, + { + "epoch": 0.04, + "learning_rate": 1.76e-05, + "loss": 0.7153, + "step": 885 + }, + { + "epoch": 0.04, + "learning_rate": 1.77e-05, + "loss": 0.6923, + "step": 890 + }, + { + "epoch": 0.04, + "learning_rate": 1.7800000000000002e-05, + "loss": 0.8491, + "step": 895 + }, + { + "epoch": 0.04, + "learning_rate": 1.79e-05, + "loss": 1.1041, + "step": 900 + }, + { + "epoch": 0.04, + "learning_rate": 1.8e-05, + "loss": 0.6685, + "step": 905 + }, + { + "epoch": 0.04, + "learning_rate": 1.8100000000000003e-05, + "loss": 0.467, + "step": 910 + }, + { + "epoch": 0.04, + "learning_rate": 1.8200000000000002e-05, + "loss": 0.478, + "step": 915 + }, + { + "epoch": 0.04, + "learning_rate": 1.83e-05, + "loss": 0.6318, + "step": 920 + }, + { + "epoch": 0.04, + "learning_rate": 1.8400000000000003e-05, + "loss": 0.5477, + "step": 925 + }, + { + "epoch": 0.04, + "learning_rate": 1.8500000000000002e-05, + "loss": 0.8122, + "step": 930 + }, + { + "epoch": 0.04, + "learning_rate": 1.86e-05, + "loss": 0.7658, + "step": 935 + }, + { + "epoch": 0.04, + "learning_rate": 1.8700000000000004e-05, + "loss": 0.8465, + "step": 940 + }, + { + "epoch": 0.04, + "learning_rate": 1.88e-05, + "loss": 0.8287, + "step": 945 + }, + { + "epoch": 0.04, + "learning_rate": 1.8900000000000002e-05, + "loss": 1.1613, + "step": 950 + }, + { + "epoch": 0.04, + "learning_rate": 1.9e-05, + "loss": 0.5815, + "step": 955 + }, + { + "epoch": 0.04, + "learning_rate": 1.91e-05, + "loss": 0.3932, + "step": 960 + }, + { + "epoch": 0.05, + "learning_rate": 1.9200000000000003e-05, + "loss": 0.3984, + "step": 965 + }, + { + "epoch": 0.05, + "learning_rate": 1.93e-05, + "loss": 0.5436, + "step": 970 + }, + { + "epoch": 0.05, + "learning_rate": 1.94e-05, + "loss": 0.5992, + "step": 975 + }, + { + "epoch": 0.05, + "learning_rate": 1.95e-05, + "loss": 0.6758, + "step": 980 + }, + { + "epoch": 0.05, + "learning_rate": 1.9600000000000002e-05, + "loss": 0.6634, + "step": 985 + }, + { + "epoch": 0.05, + "learning_rate": 1.97e-05, + "loss": 0.8048, + "step": 990 + }, + { + "epoch": 0.05, + "learning_rate": 1.98e-05, + "loss": 1.0593, + "step": 995 + }, + { + "epoch": 0.05, + "learning_rate": 1.9900000000000003e-05, + "loss": 1.3275, + "step": 1000 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 0.745, + "step": 1005 + }, + { + "epoch": 0.05, + "learning_rate": 1.9999216214945216e-05, + "loss": 0.4186, + "step": 1010 + }, + { + "epoch": 0.05, + "learning_rate": 1.999843242989043e-05, + "loss": 0.4657, + "step": 1015 + }, + { + "epoch": 0.05, + "learning_rate": 1.999764864483564e-05, + "loss": 0.3849, + "step": 1020 + }, + { + "epoch": 0.05, + "learning_rate": 1.9996864859780857e-05, + "loss": 0.6111, + "step": 1025 + }, + { + "epoch": 0.05, + "learning_rate": 1.9996081074726068e-05, + "loss": 0.5342, + "step": 1030 + }, + { + "epoch": 0.05, + "learning_rate": 1.999529728967128e-05, + "loss": 0.6535, + "step": 1035 + }, + { + "epoch": 0.05, + "learning_rate": 1.9994513504616496e-05, + "loss": 0.7321, + "step": 1040 + }, + { + "epoch": 0.05, + "learning_rate": 1.999372971956171e-05, + "loss": 0.7966, + "step": 1045 + }, + { + "epoch": 0.05, + "learning_rate": 1.9992945934506923e-05, + "loss": 1.1703, + "step": 1050 + }, + { + "epoch": 0.05, + "learning_rate": 1.9992162149452137e-05, + "loss": 0.6399, + "step": 1055 + }, + { + "epoch": 0.05, + "learning_rate": 1.9991378364397348e-05, + "loss": 0.3462, + "step": 1060 + }, + { + "epoch": 0.05, + "learning_rate": 1.9990594579342565e-05, + "loss": 0.4746, + "step": 1065 + }, + { + "epoch": 0.05, + "learning_rate": 1.9989810794287776e-05, + "loss": 0.4348, + "step": 1070 + }, + { + "epoch": 0.05, + "learning_rate": 1.998902700923299e-05, + "loss": 0.5812, + "step": 1075 + }, + { + "epoch": 0.05, + "learning_rate": 1.9988243224178203e-05, + "loss": 0.5924, + "step": 1080 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987459439123414e-05, + "loss": 0.6898, + "step": 1085 + }, + { + "epoch": 0.05, + "learning_rate": 1.998667565406863e-05, + "loss": 0.7083, + "step": 1090 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985891869013842e-05, + "loss": 0.9508, + "step": 1095 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985108083959056e-05, + "loss": 1.2479, + "step": 1100 + }, + { + "epoch": 0.05, + "learning_rate": 1.998432429890427e-05, + "loss": 0.6903, + "step": 1105 + }, + { + "epoch": 0.05, + "learning_rate": 1.9983540513849483e-05, + "loss": 0.3722, + "step": 1110 + }, + { + "epoch": 0.05, + "learning_rate": 1.9982756728794697e-05, + "loss": 0.4207, + "step": 1115 + }, + { + "epoch": 0.05, + "learning_rate": 1.998197294373991e-05, + "loss": 0.5722, + "step": 1120 + }, + { + "epoch": 0.05, + "learning_rate": 1.9981189158685125e-05, + "loss": 0.5865, + "step": 1125 + }, + { + "epoch": 0.05, + "learning_rate": 1.998040537363034e-05, + "loss": 0.4989, + "step": 1130 + }, + { + "epoch": 0.05, + "learning_rate": 1.997962158857555e-05, + "loss": 0.5998, + "step": 1135 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978837803520764e-05, + "loss": 0.9028, + "step": 1140 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978054018465977e-05, + "loss": 0.8579, + "step": 1145 + }, + { + "epoch": 0.05, + "learning_rate": 1.997727023341119e-05, + "loss": 1.2055, + "step": 1150 + }, + { + "epoch": 0.05, + "learning_rate": 1.9976486448356405e-05, + "loss": 0.6244, + "step": 1155 + }, + { + "epoch": 0.05, + "learning_rate": 1.9975702663301616e-05, + "loss": 0.3915, + "step": 1160 + }, + { + "epoch": 0.05, + "learning_rate": 1.9974918878246833e-05, + "loss": 0.4356, + "step": 1165 + }, + { + "epoch": 0.05, + "learning_rate": 1.9974135093192044e-05, + "loss": 0.5114, + "step": 1170 + }, + { + "epoch": 0.05, + "learning_rate": 1.9973351308137257e-05, + "loss": 0.5035, + "step": 1175 + }, + { + "epoch": 0.06, + "learning_rate": 1.997256752308247e-05, + "loss": 0.6821, + "step": 1180 + }, + { + "epoch": 0.06, + "learning_rate": 1.9971783738027685e-05, + "loss": 0.5494, + "step": 1185 + }, + { + "epoch": 0.06, + "learning_rate": 1.99709999529729e-05, + "loss": 0.6005, + "step": 1190 + }, + { + "epoch": 0.06, + "learning_rate": 1.9970216167918113e-05, + "loss": 0.8884, + "step": 1195 + }, + { + "epoch": 0.06, + "learning_rate": 1.9969432382863324e-05, + "loss": 0.9246, + "step": 1200 + }, + { + "epoch": 0.06, + "learning_rate": 1.9968648597808538e-05, + "loss": 0.5223, + "step": 1205 + }, + { + "epoch": 0.06, + "learning_rate": 1.996786481275375e-05, + "loss": 0.3661, + "step": 1210 + }, + { + "epoch": 0.06, + "learning_rate": 1.9967081027698965e-05, + "loss": 0.5004, + "step": 1215 + }, + { + "epoch": 0.06, + "learning_rate": 1.996629724264418e-05, + "loss": 0.4138, + "step": 1220 + }, + { + "epoch": 0.06, + "learning_rate": 1.9965513457589393e-05, + "loss": 0.6478, + "step": 1225 + }, + { + "epoch": 0.06, + "learning_rate": 1.9964729672534607e-05, + "loss": 0.5358, + "step": 1230 + }, + { + "epoch": 0.06, + "learning_rate": 1.9963945887479818e-05, + "loss": 0.664, + "step": 1235 + }, + { + "epoch": 0.06, + "learning_rate": 1.9963162102425035e-05, + "loss": 0.5272, + "step": 1240 + }, + { + "epoch": 0.06, + "learning_rate": 1.9962378317370245e-05, + "loss": 0.9186, + "step": 1245 + }, + { + "epoch": 0.06, + "learning_rate": 1.996159453231546e-05, + "loss": 1.262, + "step": 1250 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960810747260673e-05, + "loss": 0.5889, + "step": 1255 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960026962205887e-05, + "loss": 0.4323, + "step": 1260 + }, + { + "epoch": 0.06, + "learning_rate": 1.99592431771511e-05, + "loss": 0.3947, + "step": 1265 + }, + { + "epoch": 0.06, + "learning_rate": 1.995845939209631e-05, + "loss": 0.5637, + "step": 1270 + }, + { + "epoch": 0.06, + "learning_rate": 1.9957675607041525e-05, + "loss": 0.4977, + "step": 1275 + }, + { + "epoch": 0.06, + "learning_rate": 1.995689182198674e-05, + "loss": 0.5272, + "step": 1280 + }, + { + "epoch": 0.06, + "learning_rate": 1.9956108036931953e-05, + "loss": 0.5806, + "step": 1285 + }, + { + "epoch": 0.06, + "learning_rate": 1.9955324251877167e-05, + "loss": 0.7725, + "step": 1290 + }, + { + "epoch": 0.06, + "learning_rate": 1.995454046682238e-05, + "loss": 1.0007, + "step": 1295 + }, + { + "epoch": 0.06, + "learning_rate": 1.995375668176759e-05, + "loss": 1.2275, + "step": 1300 + }, + { + "epoch": 0.06, + "learning_rate": 1.995297289671281e-05, + "loss": 0.5902, + "step": 1305 + }, + { + "epoch": 0.06, + "learning_rate": 1.995218911165802e-05, + "loss": 0.3678, + "step": 1310 + }, + { + "epoch": 0.06, + "learning_rate": 1.9951405326603233e-05, + "loss": 0.3997, + "step": 1315 + }, + { + "epoch": 0.06, + "learning_rate": 1.9950621541548447e-05, + "loss": 0.3824, + "step": 1320 + }, + { + "epoch": 0.06, + "learning_rate": 1.994983775649366e-05, + "loss": 0.45, + "step": 1325 + }, + { + "epoch": 0.06, + "learning_rate": 1.9949053971438875e-05, + "loss": 0.4312, + "step": 1330 + }, + { + "epoch": 0.06, + "learning_rate": 1.9948270186384086e-05, + "loss": 0.6568, + "step": 1335 + }, + { + "epoch": 0.06, + "learning_rate": 1.9947486401329303e-05, + "loss": 0.7326, + "step": 1340 + }, + { + "epoch": 0.06, + "learning_rate": 1.9946702616274513e-05, + "loss": 1.6595, + "step": 1345 + }, + { + "epoch": 0.06, + "learning_rate": 1.9945918831219727e-05, + "loss": 1.1587, + "step": 1350 + }, + { + "epoch": 0.06, + "learning_rate": 1.994513504616494e-05, + "loss": 0.5287, + "step": 1355 + }, + { + "epoch": 0.06, + "learning_rate": 1.9944351261110155e-05, + "loss": 0.4248, + "step": 1360 + }, + { + "epoch": 0.06, + "learning_rate": 1.994356747605537e-05, + "loss": 0.4569, + "step": 1365 + }, + { + "epoch": 0.06, + "learning_rate": 1.9942783691000583e-05, + "loss": 0.4401, + "step": 1370 + }, + { + "epoch": 0.06, + "learning_rate": 1.9941999905945793e-05, + "loss": 0.475, + "step": 1375 + }, + { + "epoch": 0.06, + "learning_rate": 1.994121612089101e-05, + "loss": 0.5074, + "step": 1380 + }, + { + "epoch": 0.06, + "learning_rate": 1.994043233583622e-05, + "loss": 0.6305, + "step": 1385 + }, + { + "epoch": 0.06, + "learning_rate": 1.9939648550781435e-05, + "loss": 0.6242, + "step": 1390 + }, + { + "epoch": 0.07, + "learning_rate": 1.993886476572665e-05, + "loss": 0.6831, + "step": 1395 + }, + { + "epoch": 0.07, + "learning_rate": 1.993808098067186e-05, + "loss": 0.9001, + "step": 1400 + }, + { + "epoch": 0.07, + "learning_rate": 1.9937297195617077e-05, + "loss": 0.5865, + "step": 1405 + }, + { + "epoch": 0.07, + "learning_rate": 1.9936513410562287e-05, + "loss": 0.332, + "step": 1410 + }, + { + "epoch": 0.07, + "learning_rate": 1.99357296255075e-05, + "loss": 0.4441, + "step": 1415 + }, + { + "epoch": 0.07, + "learning_rate": 1.9934945840452715e-05, + "loss": 0.3415, + "step": 1420 + }, + { + "epoch": 0.07, + "learning_rate": 1.993416205539793e-05, + "loss": 0.4253, + "step": 1425 + }, + { + "epoch": 0.07, + "learning_rate": 1.9933378270343143e-05, + "loss": 0.594, + "step": 1430 + }, + { + "epoch": 0.07, + "learning_rate": 1.9932594485288357e-05, + "loss": 0.7563, + "step": 1435 + }, + { + "epoch": 0.07, + "learning_rate": 1.993181070023357e-05, + "loss": 0.7389, + "step": 1440 + }, + { + "epoch": 0.07, + "learning_rate": 1.9931026915178785e-05, + "loss": 0.8635, + "step": 1445 + }, + { + "epoch": 0.07, + "learning_rate": 1.9930243130123995e-05, + "loss": 1.0338, + "step": 1450 + }, + { + "epoch": 0.07, + "learning_rate": 1.9929459345069212e-05, + "loss": 0.4737, + "step": 1455 + }, + { + "epoch": 0.07, + "learning_rate": 1.9928675560014423e-05, + "loss": 0.2898, + "step": 1460 + }, + { + "epoch": 0.07, + "learning_rate": 1.9927891774959637e-05, + "loss": 0.3928, + "step": 1465 + }, + { + "epoch": 0.07, + "learning_rate": 1.992710798990485e-05, + "loss": 0.4041, + "step": 1470 + }, + { + "epoch": 0.07, + "learning_rate": 1.992632420485006e-05, + "loss": 0.4331, + "step": 1475 + }, + { + "epoch": 0.07, + "learning_rate": 1.992554041979528e-05, + "loss": 0.5609, + "step": 1480 + }, + { + "epoch": 0.07, + "learning_rate": 1.992475663474049e-05, + "loss": 0.545, + "step": 1485 + }, + { + "epoch": 0.07, + "learning_rate": 1.9923972849685703e-05, + "loss": 0.6846, + "step": 1490 + }, + { + "epoch": 0.07, + "learning_rate": 1.9923189064630917e-05, + "loss": 0.7526, + "step": 1495 + }, + { + "epoch": 0.07, + "learning_rate": 1.992240527957613e-05, + "loss": 1.0243, + "step": 1500 + }, + { + "epoch": 0.07, + "learning_rate": 1.9921621494521345e-05, + "loss": 0.5665, + "step": 1505 + }, + { + "epoch": 0.07, + "learning_rate": 1.992083770946656e-05, + "loss": 0.3, + "step": 1510 + }, + { + "epoch": 0.07, + "learning_rate": 1.992005392441177e-05, + "loss": 0.3819, + "step": 1515 + }, + { + "epoch": 0.07, + "learning_rate": 1.9919270139356986e-05, + "loss": 0.4012, + "step": 1520 + }, + { + "epoch": 0.07, + "learning_rate": 1.9918486354302197e-05, + "loss": 0.4596, + "step": 1525 + }, + { + "epoch": 0.07, + "learning_rate": 1.991770256924741e-05, + "loss": 0.5021, + "step": 1530 + }, + { + "epoch": 0.07, + "learning_rate": 1.9916918784192625e-05, + "loss": 0.4451, + "step": 1535 + }, + { + "epoch": 0.07, + "learning_rate": 1.991613499913784e-05, + "loss": 0.704, + "step": 1540 + }, + { + "epoch": 0.07, + "learning_rate": 1.9915351214083053e-05, + "loss": 0.6392, + "step": 1545 + }, + { + "epoch": 0.07, + "learning_rate": 1.9914567429028263e-05, + "loss": 1.1391, + "step": 1550 + }, + { + "epoch": 0.07, + "learning_rate": 1.991378364397348e-05, + "loss": 0.5388, + "step": 1555 + }, + { + "epoch": 0.07, + "learning_rate": 1.991299985891869e-05, + "loss": 0.3753, + "step": 1560 + }, + { + "epoch": 0.07, + "learning_rate": 1.9912216073863905e-05, + "loss": 0.3058, + "step": 1565 + }, + { + "epoch": 0.07, + "learning_rate": 1.991143228880912e-05, + "loss": 0.4206, + "step": 1570 + }, + { + "epoch": 0.07, + "learning_rate": 1.9910648503754333e-05, + "loss": 0.3922, + "step": 1575 + }, + { + "epoch": 0.07, + "learning_rate": 1.9909864718699547e-05, + "loss": 0.4388, + "step": 1580 + }, + { + "epoch": 0.07, + "learning_rate": 1.990908093364476e-05, + "loss": 0.485, + "step": 1585 + }, + { + "epoch": 0.07, + "learning_rate": 1.990829714858997e-05, + "loss": 0.7441, + "step": 1590 + }, + { + "epoch": 0.07, + "learning_rate": 1.9907513363535185e-05, + "loss": 0.7177, + "step": 1595 + }, + { + "epoch": 0.07, + "learning_rate": 1.99067295784804e-05, + "loss": 1.129, + "step": 1600 + }, + { + "epoch": 0.07, + "learning_rate": 1.9905945793425613e-05, + "loss": 0.4943, + "step": 1605 + }, + { + "epoch": 0.08, + "learning_rate": 1.9905162008370827e-05, + "loss": 0.299, + "step": 1610 + }, + { + "epoch": 0.08, + "learning_rate": 1.9904378223316037e-05, + "loss": 0.4898, + "step": 1615 + }, + { + "epoch": 0.08, + "learning_rate": 1.9903594438261254e-05, + "loss": 0.3973, + "step": 1620 + }, + { + "epoch": 0.08, + "learning_rate": 1.9902810653206465e-05, + "loss": 0.4336, + "step": 1625 + }, + { + "epoch": 0.08, + "learning_rate": 1.990202686815168e-05, + "loss": 0.4543, + "step": 1630 + }, + { + "epoch": 0.08, + "learning_rate": 1.9901243083096893e-05, + "loss": 0.4828, + "step": 1635 + }, + { + "epoch": 0.08, + "learning_rate": 1.9900459298042107e-05, + "loss": 0.7448, + "step": 1640 + }, + { + "epoch": 0.08, + "learning_rate": 1.989967551298732e-05, + "loss": 0.6852, + "step": 1645 + }, + { + "epoch": 0.08, + "learning_rate": 1.9898891727932534e-05, + "loss": 0.9936, + "step": 1650 + }, + { + "epoch": 0.08, + "learning_rate": 1.989810794287775e-05, + "loss": 0.4738, + "step": 1655 + }, + { + "epoch": 0.08, + "learning_rate": 1.989732415782296e-05, + "loss": 0.2747, + "step": 1660 + }, + { + "epoch": 0.08, + "learning_rate": 1.9896540372768173e-05, + "loss": 0.2839, + "step": 1665 + }, + { + "epoch": 0.08, + "learning_rate": 1.9895756587713387e-05, + "loss": 0.3596, + "step": 1670 + }, + { + "epoch": 0.08, + "learning_rate": 1.98949728026586e-05, + "loss": 0.4797, + "step": 1675 + }, + { + "epoch": 0.08, + "learning_rate": 1.9894189017603815e-05, + "loss": 0.4667, + "step": 1680 + }, + { + "epoch": 0.08, + "learning_rate": 1.989340523254903e-05, + "loss": 0.5085, + "step": 1685 + }, + { + "epoch": 0.08, + "learning_rate": 1.989262144749424e-05, + "loss": 0.6464, + "step": 1690 + }, + { + "epoch": 0.08, + "learning_rate": 1.9891837662439456e-05, + "loss": 0.7383, + "step": 1695 + }, + { + "epoch": 0.08, + "learning_rate": 1.9891053877384667e-05, + "loss": 1.1763, + "step": 1700 + }, + { + "epoch": 0.08, + "learning_rate": 1.989027009232988e-05, + "loss": 0.4347, + "step": 1705 + }, + { + "epoch": 0.08, + "learning_rate": 1.9889486307275095e-05, + "loss": 0.3092, + "step": 1710 + }, + { + "epoch": 0.08, + "learning_rate": 1.988870252222031e-05, + "loss": 0.3977, + "step": 1715 + }, + { + "epoch": 0.08, + "learning_rate": 1.9887918737165522e-05, + "loss": 0.3769, + "step": 1720 + }, + { + "epoch": 0.08, + "learning_rate": 1.9887134952110733e-05, + "loss": 0.4335, + "step": 1725 + }, + { + "epoch": 0.08, + "learning_rate": 1.9886351167055947e-05, + "loss": 0.5907, + "step": 1730 + }, + { + "epoch": 0.08, + "learning_rate": 1.988556738200116e-05, + "loss": 0.5505, + "step": 1735 + }, + { + "epoch": 0.08, + "learning_rate": 1.9884783596946375e-05, + "loss": 0.515, + "step": 1740 + }, + { + "epoch": 0.08, + "learning_rate": 1.988399981189159e-05, + "loss": 0.7169, + "step": 1745 + }, + { + "epoch": 0.08, + "learning_rate": 1.9883216026836802e-05, + "loss": 1.006, + "step": 1750 + }, + { + "epoch": 0.08, + "learning_rate": 1.9882432241782016e-05, + "loss": 0.5365, + "step": 1755 + }, + { + "epoch": 0.08, + "learning_rate": 1.988164845672723e-05, + "loss": 0.2144, + "step": 1760 + }, + { + "epoch": 0.08, + "learning_rate": 1.988086467167244e-05, + "loss": 0.4321, + "step": 1765 + }, + { + "epoch": 0.08, + "learning_rate": 1.9880080886617658e-05, + "loss": 0.4422, + "step": 1770 + }, + { + "epoch": 0.08, + "learning_rate": 1.987929710156287e-05, + "loss": 0.3549, + "step": 1775 + }, + { + "epoch": 0.08, + "learning_rate": 1.9878513316508082e-05, + "loss": 0.418, + "step": 1780 + }, + { + "epoch": 0.08, + "learning_rate": 1.9877729531453296e-05, + "loss": 0.5311, + "step": 1785 + }, + { + "epoch": 0.08, + "learning_rate": 1.9876945746398507e-05, + "loss": 0.5819, + "step": 1790 + }, + { + "epoch": 0.08, + "learning_rate": 1.9876161961343724e-05, + "loss": 0.7497, + "step": 1795 + }, + { + "epoch": 0.08, + "learning_rate": 1.9875378176288935e-05, + "loss": 1.267, + "step": 1800 + }, + { + "epoch": 0.08, + "learning_rate": 1.987459439123415e-05, + "loss": 0.5148, + "step": 1805 + }, + { + "epoch": 0.08, + "learning_rate": 1.9873810606179363e-05, + "loss": 0.3745, + "step": 1810 + }, + { + "epoch": 0.08, + "learning_rate": 1.9873026821124576e-05, + "loss": 0.3499, + "step": 1815 + }, + { + "epoch": 0.08, + "learning_rate": 1.987224303606979e-05, + "loss": 0.3857, + "step": 1820 + }, + { + "epoch": 0.09, + "learning_rate": 1.9871459251015004e-05, + "loss": 0.3592, + "step": 1825 + }, + { + "epoch": 0.09, + "learning_rate": 1.9870675465960215e-05, + "loss": 0.4503, + "step": 1830 + }, + { + "epoch": 0.09, + "learning_rate": 1.9869891680905432e-05, + "loss": 0.5993, + "step": 1835 + }, + { + "epoch": 0.09, + "learning_rate": 1.9869107895850643e-05, + "loss": 0.7032, + "step": 1840 + }, + { + "epoch": 0.09, + "learning_rate": 1.9868324110795856e-05, + "loss": 0.7298, + "step": 1845 + }, + { + "epoch": 0.09, + "learning_rate": 1.986754032574107e-05, + "loss": 1.0798, + "step": 1850 + }, + { + "epoch": 0.09, + "learning_rate": 1.9866756540686284e-05, + "loss": 0.5357, + "step": 1855 + }, + { + "epoch": 0.09, + "learning_rate": 1.9865972755631498e-05, + "loss": 0.3394, + "step": 1860 + }, + { + "epoch": 0.09, + "learning_rate": 1.986518897057671e-05, + "loss": 0.2815, + "step": 1865 + }, + { + "epoch": 0.09, + "learning_rate": 1.9864405185521926e-05, + "loss": 0.3833, + "step": 1870 + }, + { + "epoch": 0.09, + "learning_rate": 1.9863621400467137e-05, + "loss": 0.3657, + "step": 1875 + }, + { + "epoch": 0.09, + "learning_rate": 1.986283761541235e-05, + "loss": 0.4627, + "step": 1880 + }, + { + "epoch": 0.09, + "learning_rate": 1.9862053830357564e-05, + "loss": 0.5369, + "step": 1885 + }, + { + "epoch": 0.09, + "learning_rate": 1.9861270045302778e-05, + "loss": 0.6602, + "step": 1890 + }, + { + "epoch": 0.09, + "learning_rate": 1.9860486260247992e-05, + "loss": 0.5755, + "step": 1895 + }, + { + "epoch": 0.09, + "learning_rate": 1.9859702475193206e-05, + "loss": 1.0243, + "step": 1900 + }, + { + "epoch": 0.09, + "learning_rate": 1.9858918690138417e-05, + "loss": 0.6036, + "step": 1905 + }, + { + "epoch": 0.09, + "learning_rate": 1.9858134905083634e-05, + "loss": 0.2902, + "step": 1910 + }, + { + "epoch": 0.09, + "learning_rate": 1.9857351120028844e-05, + "loss": 0.3318, + "step": 1915 + }, + { + "epoch": 0.09, + "learning_rate": 1.9856567334974058e-05, + "loss": 0.3193, + "step": 1920 + }, + { + "epoch": 0.09, + "learning_rate": 1.9855783549919272e-05, + "loss": 0.4973, + "step": 1925 + }, + { + "epoch": 0.09, + "learning_rate": 1.9854999764864483e-05, + "loss": 0.5275, + "step": 1930 + }, + { + "epoch": 0.09, + "learning_rate": 1.98542159798097e-05, + "loss": 0.4504, + "step": 1935 + }, + { + "epoch": 0.09, + "learning_rate": 1.985343219475491e-05, + "loss": 0.528, + "step": 1940 + }, + { + "epoch": 0.09, + "learning_rate": 1.9852648409700124e-05, + "loss": 0.8864, + "step": 1945 + }, + { + "epoch": 0.09, + "learning_rate": 1.985186462464534e-05, + "loss": 0.9087, + "step": 1950 + }, + { + "epoch": 0.09, + "learning_rate": 1.9851080839590552e-05, + "loss": 0.5253, + "step": 1955 + }, + { + "epoch": 0.09, + "learning_rate": 1.9850297054535766e-05, + "loss": 0.3372, + "step": 1960 + }, + { + "epoch": 0.09, + "learning_rate": 1.984951326948098e-05, + "loss": 0.3391, + "step": 1965 + }, + { + "epoch": 0.09, + "learning_rate": 1.9848729484426194e-05, + "loss": 0.3418, + "step": 1970 + }, + { + "epoch": 0.09, + "learning_rate": 1.9847945699371408e-05, + "loss": 0.3695, + "step": 1975 + }, + { + "epoch": 0.09, + "learning_rate": 1.984716191431662e-05, + "loss": 0.4408, + "step": 1980 + }, + { + "epoch": 0.09, + "learning_rate": 1.9846378129261832e-05, + "loss": 0.4869, + "step": 1985 + }, + { + "epoch": 0.09, + "learning_rate": 1.9845594344207046e-05, + "loss": 0.4849, + "step": 1990 + }, + { + "epoch": 0.09, + "learning_rate": 1.984481055915226e-05, + "loss": 0.6679, + "step": 1995 + }, + { + "epoch": 0.09, + "learning_rate": 1.9844026774097474e-05, + "loss": 0.9504, + "step": 2000 + }, + { + "epoch": 0.09, + "learning_rate": 1.9843242989042685e-05, + "loss": 0.4752, + "step": 2005 + }, + { + "epoch": 0.09, + "learning_rate": 1.9842459203987902e-05, + "loss": 0.2645, + "step": 2010 + }, + { + "epoch": 0.09, + "learning_rate": 1.9841675418933112e-05, + "loss": 0.2796, + "step": 2015 + }, + { + "epoch": 0.09, + "learning_rate": 1.9840891633878326e-05, + "loss": 0.4134, + "step": 2020 + }, + { + "epoch": 0.09, + "learning_rate": 1.984010784882354e-05, + "loss": 0.4873, + "step": 2025 + }, + { + "epoch": 0.09, + "learning_rate": 1.9839324063768754e-05, + "loss": 0.5061, + "step": 2030 + }, + { + "epoch": 0.09, + "learning_rate": 1.9838540278713968e-05, + "loss": 0.5597, + "step": 2035 + }, + { + "epoch": 0.1, + "learning_rate": 1.9837756493659182e-05, + "loss": 0.5494, + "step": 2040 + }, + { + "epoch": 0.1, + "learning_rate": 1.9836972708604392e-05, + "loss": 0.7516, + "step": 2045 + }, + { + "epoch": 0.1, + "learning_rate": 1.9836188923549606e-05, + "loss": 1.0119, + "step": 2050 + }, + { + "epoch": 0.1, + "learning_rate": 1.983540513849482e-05, + "loss": 0.4939, + "step": 2055 + }, + { + "epoch": 0.1, + "learning_rate": 1.9834621353440034e-05, + "loss": 0.3384, + "step": 2060 + }, + { + "epoch": 0.1, + "learning_rate": 1.9833837568385248e-05, + "loss": 0.292, + "step": 2065 + }, + { + "epoch": 0.1, + "learning_rate": 1.9833053783330462e-05, + "loss": 0.3244, + "step": 2070 + }, + { + "epoch": 0.1, + "learning_rate": 1.9832269998275676e-05, + "loss": 0.3806, + "step": 2075 + }, + { + "epoch": 0.1, + "learning_rate": 1.9831486213220886e-05, + "loss": 0.4527, + "step": 2080 + }, + { + "epoch": 0.1, + "learning_rate": 1.9830702428166104e-05, + "loss": 0.5149, + "step": 2085 + }, + { + "epoch": 0.1, + "learning_rate": 1.9829918643111314e-05, + "loss": 0.7397, + "step": 2090 + }, + { + "epoch": 0.1, + "learning_rate": 1.9829134858056528e-05, + "loss": 0.7535, + "step": 2095 + }, + { + "epoch": 0.1, + "learning_rate": 1.9828351073001742e-05, + "loss": 0.9471, + "step": 2100 + }, + { + "epoch": 0.1, + "learning_rate": 1.9827567287946956e-05, + "loss": 0.4706, + "step": 2105 + }, + { + "epoch": 0.1, + "learning_rate": 1.982678350289217e-05, + "loss": 0.3369, + "step": 2110 + }, + { + "epoch": 0.1, + "learning_rate": 1.982599971783738e-05, + "loss": 0.3418, + "step": 2115 + }, + { + "epoch": 0.1, + "learning_rate": 1.9825215932782594e-05, + "loss": 0.3639, + "step": 2120 + }, + { + "epoch": 0.1, + "learning_rate": 1.9824432147727808e-05, + "loss": 0.4024, + "step": 2125 + }, + { + "epoch": 0.1, + "learning_rate": 1.9823648362673022e-05, + "loss": 0.4048, + "step": 2130 + }, + { + "epoch": 0.1, + "learning_rate": 1.9822864577618236e-05, + "loss": 0.6293, + "step": 2135 + }, + { + "epoch": 0.1, + "learning_rate": 1.982208079256345e-05, + "loss": 0.6106, + "step": 2140 + }, + { + "epoch": 0.1, + "learning_rate": 1.982129700750866e-05, + "loss": 0.6771, + "step": 2145 + }, + { + "epoch": 0.1, + "learning_rate": 1.9820513222453878e-05, + "loss": 1.2044, + "step": 2150 + }, + { + "epoch": 0.1, + "learning_rate": 1.9819729437399088e-05, + "loss": 0.5152, + "step": 2155 + }, + { + "epoch": 0.1, + "learning_rate": 1.9818945652344302e-05, + "loss": 0.2559, + "step": 2160 + }, + { + "epoch": 0.1, + "learning_rate": 1.9818161867289516e-05, + "loss": 0.2496, + "step": 2165 + }, + { + "epoch": 0.1, + "learning_rate": 1.981737808223473e-05, + "loss": 0.2577, + "step": 2170 + }, + { + "epoch": 0.1, + "learning_rate": 1.9816594297179944e-05, + "loss": 0.5568, + "step": 2175 + }, + { + "epoch": 0.1, + "learning_rate": 1.9815810512125154e-05, + "loss": 0.4338, + "step": 2180 + }, + { + "epoch": 0.1, + "learning_rate": 1.981502672707037e-05, + "loss": 0.6521, + "step": 2185 + }, + { + "epoch": 0.1, + "learning_rate": 1.9814242942015582e-05, + "loss": 0.5643, + "step": 2190 + }, + { + "epoch": 0.1, + "learning_rate": 1.9813459156960796e-05, + "loss": 0.4993, + "step": 2195 + }, + { + "epoch": 0.1, + "learning_rate": 1.981267537190601e-05, + "loss": 1.2342, + "step": 2200 + }, + { + "epoch": 0.1, + "learning_rate": 1.9811891586851224e-05, + "loss": 0.4659, + "step": 2205 + }, + { + "epoch": 0.1, + "learning_rate": 1.9811107801796438e-05, + "loss": 0.2984, + "step": 2210 + }, + { + "epoch": 0.1, + "learning_rate": 1.981032401674165e-05, + "loss": 0.3362, + "step": 2215 + }, + { + "epoch": 0.1, + "learning_rate": 1.9809540231686862e-05, + "loss": 0.2982, + "step": 2220 + }, + { + "epoch": 0.1, + "learning_rate": 1.980875644663208e-05, + "loss": 0.3995, + "step": 2225 + }, + { + "epoch": 0.1, + "learning_rate": 1.980797266157729e-05, + "loss": 0.4959, + "step": 2230 + }, + { + "epoch": 0.1, + "learning_rate": 1.9807188876522504e-05, + "loss": 0.5604, + "step": 2235 + }, + { + "epoch": 0.1, + "learning_rate": 1.9806405091467718e-05, + "loss": 0.6278, + "step": 2240 + }, + { + "epoch": 0.1, + "learning_rate": 1.9805621306412928e-05, + "loss": 0.751, + "step": 2245 + }, + { + "epoch": 0.1, + "learning_rate": 1.9804837521358146e-05, + "loss": 1.2158, + "step": 2250 + }, + { + "epoch": 0.11, + "learning_rate": 1.9804053736303356e-05, + "loss": 0.441, + "step": 2255 + }, + { + "epoch": 0.11, + "learning_rate": 1.980326995124857e-05, + "loss": 0.2743, + "step": 2260 + }, + { + "epoch": 0.11, + "learning_rate": 1.9802486166193784e-05, + "loss": 0.4457, + "step": 2265 + }, + { + "epoch": 0.11, + "learning_rate": 1.9801702381138998e-05, + "loss": 0.4269, + "step": 2270 + }, + { + "epoch": 0.11, + "learning_rate": 1.9800918596084212e-05, + "loss": 0.3709, + "step": 2275 + }, + { + "epoch": 0.11, + "learning_rate": 1.9800134811029426e-05, + "loss": 0.4595, + "step": 2280 + }, + { + "epoch": 0.11, + "learning_rate": 1.979935102597464e-05, + "loss": 0.6209, + "step": 2285 + }, + { + "epoch": 0.11, + "learning_rate": 1.9798567240919853e-05, + "loss": 0.5724, + "step": 2290 + }, + { + "epoch": 0.11, + "learning_rate": 1.9797783455865064e-05, + "loss": 0.7098, + "step": 2295 + }, + { + "epoch": 0.11, + "learning_rate": 1.979699967081028e-05, + "loss": 1.0224, + "step": 2300 + }, + { + "epoch": 0.11, + "learning_rate": 1.9796215885755492e-05, + "loss": 0.4748, + "step": 2305 + }, + { + "epoch": 0.11, + "learning_rate": 1.9795432100700706e-05, + "loss": 0.188, + "step": 2310 + }, + { + "epoch": 0.11, + "learning_rate": 1.979464831564592e-05, + "loss": 0.2832, + "step": 2315 + }, + { + "epoch": 0.11, + "learning_rate": 1.979386453059113e-05, + "loss": 0.3772, + "step": 2320 + }, + { + "epoch": 0.11, + "learning_rate": 1.9793080745536347e-05, + "loss": 0.3791, + "step": 2325 + }, + { + "epoch": 0.11, + "learning_rate": 1.9792296960481558e-05, + "loss": 0.4206, + "step": 2330 + }, + { + "epoch": 0.11, + "learning_rate": 1.9791513175426772e-05, + "loss": 0.591, + "step": 2335 + }, + { + "epoch": 0.11, + "learning_rate": 1.9790729390371986e-05, + "loss": 0.5566, + "step": 2340 + }, + { + "epoch": 0.11, + "learning_rate": 1.97899456053172e-05, + "loss": 0.7117, + "step": 2345 + }, + { + "epoch": 0.11, + "learning_rate": 1.9789161820262414e-05, + "loss": 0.9647, + "step": 2350 + }, + { + "epoch": 0.11, + "learning_rate": 1.9788378035207627e-05, + "loss": 0.4322, + "step": 2355 + }, + { + "epoch": 0.11, + "learning_rate": 1.9787594250152838e-05, + "loss": 0.3008, + "step": 2360 + }, + { + "epoch": 0.11, + "learning_rate": 1.9786810465098055e-05, + "loss": 0.3233, + "step": 2365 + }, + { + "epoch": 0.11, + "learning_rate": 1.9786026680043266e-05, + "loss": 0.4009, + "step": 2370 + }, + { + "epoch": 0.11, + "learning_rate": 1.978524289498848e-05, + "loss": 0.378, + "step": 2375 + }, + { + "epoch": 0.11, + "learning_rate": 1.9784459109933694e-05, + "loss": 0.4942, + "step": 2380 + }, + { + "epoch": 0.11, + "learning_rate": 1.9783675324878907e-05, + "loss": 0.3688, + "step": 2385 + }, + { + "epoch": 0.11, + "learning_rate": 1.978289153982412e-05, + "loss": 0.6061, + "step": 2390 + }, + { + "epoch": 0.11, + "learning_rate": 1.9782107754769332e-05, + "loss": 0.5694, + "step": 2395 + }, + { + "epoch": 0.11, + "learning_rate": 1.978132396971455e-05, + "loss": 0.9538, + "step": 2400 + }, + { + "epoch": 0.11, + "learning_rate": 1.978054018465976e-05, + "loss": 0.4312, + "step": 2405 + }, + { + "epoch": 0.11, + "learning_rate": 1.9779756399604974e-05, + "loss": 0.2543, + "step": 2410 + }, + { + "epoch": 0.11, + "learning_rate": 1.9778972614550188e-05, + "loss": 0.231, + "step": 2415 + }, + { + "epoch": 0.11, + "learning_rate": 1.97781888294954e-05, + "loss": 0.2848, + "step": 2420 + }, + { + "epoch": 0.11, + "learning_rate": 1.9777405044440615e-05, + "loss": 0.3575, + "step": 2425 + }, + { + "epoch": 0.11, + "learning_rate": 1.977662125938583e-05, + "loss": 0.4481, + "step": 2430 + }, + { + "epoch": 0.11, + "learning_rate": 1.977583747433104e-05, + "loss": 0.4305, + "step": 2435 + }, + { + "epoch": 0.11, + "learning_rate": 1.9775053689276254e-05, + "loss": 0.7923, + "step": 2440 + }, + { + "epoch": 0.11, + "learning_rate": 1.9774269904221468e-05, + "loss": 0.5847, + "step": 2445 + }, + { + "epoch": 0.11, + "learning_rate": 1.977348611916668e-05, + "loss": 1.0685, + "step": 2450 + }, + { + "epoch": 0.11, + "learning_rate": 1.9772702334111895e-05, + "loss": 0.5119, + "step": 2455 + }, + { + "epoch": 0.11, + "learning_rate": 1.9771918549057106e-05, + "loss": 0.2529, + "step": 2460 + }, + { + "epoch": 0.12, + "learning_rate": 1.9771134764002323e-05, + "loss": 0.2534, + "step": 2465 + }, + { + "epoch": 0.12, + "learning_rate": 1.9770350978947534e-05, + "loss": 0.4104, + "step": 2470 + }, + { + "epoch": 0.12, + "learning_rate": 1.9769567193892748e-05, + "loss": 0.4233, + "step": 2475 + }, + { + "epoch": 0.12, + "learning_rate": 1.976878340883796e-05, + "loss": 0.3437, + "step": 2480 + }, + { + "epoch": 0.12, + "learning_rate": 1.9767999623783175e-05, + "loss": 0.5363, + "step": 2485 + }, + { + "epoch": 0.12, + "learning_rate": 1.976721583872839e-05, + "loss": 0.6265, + "step": 2490 + }, + { + "epoch": 0.12, + "learning_rate": 1.9766432053673603e-05, + "loss": 0.6902, + "step": 2495 + }, + { + "epoch": 0.12, + "learning_rate": 1.9765648268618817e-05, + "loss": 1.1218, + "step": 2500 + }, + { + "epoch": 0.12, + "learning_rate": 1.9764864483564028e-05, + "loss": 0.3805, + "step": 2505 + }, + { + "epoch": 0.12, + "learning_rate": 1.976408069850924e-05, + "loss": 0.2308, + "step": 2510 + }, + { + "epoch": 0.12, + "learning_rate": 1.9763296913454455e-05, + "loss": 0.2854, + "step": 2515 + }, + { + "epoch": 0.12, + "learning_rate": 1.976251312839967e-05, + "loss": 0.3471, + "step": 2520 + }, + { + "epoch": 0.12, + "learning_rate": 1.9761729343344883e-05, + "loss": 0.418, + "step": 2525 + }, + { + "epoch": 0.12, + "learning_rate": 1.9760945558290097e-05, + "loss": 0.3995, + "step": 2530 + }, + { + "epoch": 0.12, + "learning_rate": 1.9760161773235308e-05, + "loss": 0.5414, + "step": 2535 + }, + { + "epoch": 0.12, + "learning_rate": 1.9759377988180525e-05, + "loss": 0.4674, + "step": 2540 + }, + { + "epoch": 0.12, + "learning_rate": 1.9758594203125736e-05, + "loss": 0.548, + "step": 2545 + }, + { + "epoch": 0.12, + "learning_rate": 1.975781041807095e-05, + "loss": 1.1554, + "step": 2550 + }, + { + "epoch": 0.12, + "learning_rate": 1.9757026633016163e-05, + "loss": 0.4927, + "step": 2555 + }, + { + "epoch": 0.12, + "learning_rate": 1.9756242847961377e-05, + "loss": 0.2384, + "step": 2560 + }, + { + "epoch": 0.12, + "learning_rate": 1.975545906290659e-05, + "loss": 0.2467, + "step": 2565 + }, + { + "epoch": 0.12, + "learning_rate": 1.97546752778518e-05, + "loss": 0.2739, + "step": 2570 + }, + { + "epoch": 0.12, + "learning_rate": 1.9753891492797016e-05, + "loss": 0.4415, + "step": 2575 + }, + { + "epoch": 0.12, + "learning_rate": 1.975310770774223e-05, + "loss": 0.3919, + "step": 2580 + }, + { + "epoch": 0.12, + "learning_rate": 1.9752323922687443e-05, + "loss": 0.4875, + "step": 2585 + }, + { + "epoch": 0.12, + "learning_rate": 1.9751540137632657e-05, + "loss": 0.5478, + "step": 2590 + }, + { + "epoch": 0.12, + "learning_rate": 1.975075635257787e-05, + "loss": 0.6074, + "step": 2595 + }, + { + "epoch": 0.12, + "learning_rate": 1.9749972567523085e-05, + "loss": 0.7679, + "step": 2600 + }, + { + "epoch": 0.12, + "learning_rate": 1.97491887824683e-05, + "loss": 0.5413, + "step": 2605 + }, + { + "epoch": 0.12, + "learning_rate": 1.974840499741351e-05, + "loss": 0.2023, + "step": 2610 + }, + { + "epoch": 0.12, + "learning_rate": 1.9747621212358727e-05, + "loss": 0.2183, + "step": 2615 + }, + { + "epoch": 0.12, + "learning_rate": 1.9746837427303937e-05, + "loss": 0.327, + "step": 2620 + }, + { + "epoch": 0.12, + "learning_rate": 1.974605364224915e-05, + "loss": 0.3329, + "step": 2625 + }, + { + "epoch": 0.12, + "learning_rate": 1.9745269857194365e-05, + "loss": 0.3636, + "step": 2630 + }, + { + "epoch": 0.12, + "learning_rate": 1.9744486072139576e-05, + "loss": 0.4454, + "step": 2635 + }, + { + "epoch": 0.12, + "learning_rate": 1.9743702287084793e-05, + "loss": 0.5881, + "step": 2640 + }, + { + "epoch": 0.12, + "learning_rate": 1.9742918502030003e-05, + "loss": 0.6546, + "step": 2645 + }, + { + "epoch": 0.12, + "learning_rate": 1.9742134716975217e-05, + "loss": 0.9867, + "step": 2650 + }, + { + "epoch": 0.12, + "learning_rate": 1.974135093192043e-05, + "loss": 0.4603, + "step": 2655 + }, + { + "epoch": 0.12, + "learning_rate": 1.9740567146865645e-05, + "loss": 0.2764, + "step": 2660 + }, + { + "epoch": 0.12, + "learning_rate": 1.973978336181086e-05, + "loss": 0.2423, + "step": 2665 + }, + { + "epoch": 0.12, + "learning_rate": 1.9738999576756073e-05, + "loss": 0.2468, + "step": 2670 + }, + { + "epoch": 0.12, + "learning_rate": 1.9738215791701284e-05, + "loss": 0.365, + "step": 2675 + }, + { + "epoch": 0.13, + "learning_rate": 1.97374320066465e-05, + "loss": 0.4589, + "step": 2680 + }, + { + "epoch": 0.13, + "learning_rate": 1.973664822159171e-05, + "loss": 0.4907, + "step": 2685 + }, + { + "epoch": 0.13, + "learning_rate": 1.9735864436536925e-05, + "loss": 0.5466, + "step": 2690 + }, + { + "epoch": 0.13, + "learning_rate": 1.973508065148214e-05, + "loss": 0.4969, + "step": 2695 + }, + { + "epoch": 0.13, + "learning_rate": 1.9734296866427353e-05, + "loss": 1.0733, + "step": 2700 + }, + { + "epoch": 0.13, + "learning_rate": 1.9733513081372567e-05, + "loss": 0.4684, + "step": 2705 + }, + { + "epoch": 0.13, + "learning_rate": 1.9732729296317777e-05, + "loss": 0.2367, + "step": 2710 + }, + { + "epoch": 0.13, + "learning_rate": 1.9731945511262995e-05, + "loss": 0.2683, + "step": 2715 + }, + { + "epoch": 0.13, + "learning_rate": 1.9731161726208205e-05, + "loss": 0.2503, + "step": 2720 + }, + { + "epoch": 0.13, + "learning_rate": 1.973037794115342e-05, + "loss": 0.3385, + "step": 2725 + }, + { + "epoch": 0.13, + "learning_rate": 1.9729594156098633e-05, + "loss": 0.3533, + "step": 2730 + }, + { + "epoch": 0.13, + "learning_rate": 1.9728810371043847e-05, + "loss": 0.434, + "step": 2735 + }, + { + "epoch": 0.13, + "learning_rate": 1.972802658598906e-05, + "loss": 0.4045, + "step": 2740 + }, + { + "epoch": 0.13, + "learning_rate": 1.9727242800934275e-05, + "loss": 0.5531, + "step": 2745 + }, + { + "epoch": 0.13, + "learning_rate": 1.9726459015879485e-05, + "loss": 0.8087, + "step": 2750 + }, + { + "epoch": 0.13, + "learning_rate": 1.9725675230824703e-05, + "loss": 0.4245, + "step": 2755 + }, + { + "epoch": 0.13, + "learning_rate": 1.9724891445769913e-05, + "loss": 0.2777, + "step": 2760 + }, + { + "epoch": 0.13, + "learning_rate": 1.9724107660715127e-05, + "loss": 0.3036, + "step": 2765 + }, + { + "epoch": 0.13, + "learning_rate": 1.972332387566034e-05, + "loss": 0.4066, + "step": 2770 + }, + { + "epoch": 0.13, + "learning_rate": 1.972254009060555e-05, + "loss": 0.3239, + "step": 2775 + }, + { + "epoch": 0.13, + "learning_rate": 1.972175630555077e-05, + "loss": 0.3395, + "step": 2780 + }, + { + "epoch": 0.13, + "learning_rate": 1.972097252049598e-05, + "loss": 0.4637, + "step": 2785 + }, + { + "epoch": 0.13, + "learning_rate": 1.9720188735441193e-05, + "loss": 0.486, + "step": 2790 + }, + { + "epoch": 0.13, + "learning_rate": 1.9719404950386407e-05, + "loss": 0.6314, + "step": 2795 + }, + { + "epoch": 0.13, + "learning_rate": 1.971862116533162e-05, + "loss": 0.8526, + "step": 2800 + }, + { + "epoch": 0.13, + "learning_rate": 1.9717837380276835e-05, + "loss": 0.4288, + "step": 2805 + }, + { + "epoch": 0.13, + "learning_rate": 1.971705359522205e-05, + "loss": 0.227, + "step": 2810 + }, + { + "epoch": 0.13, + "learning_rate": 1.9716269810167263e-05, + "loss": 0.2939, + "step": 2815 + }, + { + "epoch": 0.13, + "learning_rate": 1.9715486025112477e-05, + "loss": 0.2735, + "step": 2820 + }, + { + "epoch": 0.13, + "learning_rate": 1.9714702240057687e-05, + "loss": 0.3667, + "step": 2825 + }, + { + "epoch": 0.13, + "learning_rate": 1.97139184550029e-05, + "loss": 0.385, + "step": 2830 + }, + { + "epoch": 0.13, + "learning_rate": 1.9713134669948115e-05, + "loss": 0.3763, + "step": 2835 + }, + { + "epoch": 0.13, + "learning_rate": 1.971235088489333e-05, + "loss": 0.4141, + "step": 2840 + }, + { + "epoch": 0.13, + "learning_rate": 1.9711567099838543e-05, + "loss": 0.6103, + "step": 2845 + }, + { + "epoch": 0.13, + "learning_rate": 1.9710783314783753e-05, + "loss": 0.7695, + "step": 2850 + }, + { + "epoch": 0.13, + "learning_rate": 1.970999952972897e-05, + "loss": 0.4827, + "step": 2855 + }, + { + "epoch": 0.13, + "learning_rate": 1.970921574467418e-05, + "loss": 0.2578, + "step": 2860 + }, + { + "epoch": 0.13, + "learning_rate": 1.9708431959619395e-05, + "loss": 0.2754, + "step": 2865 + }, + { + "epoch": 0.13, + "learning_rate": 1.970764817456461e-05, + "loss": 0.2874, + "step": 2870 + }, + { + "epoch": 0.13, + "learning_rate": 1.9706864389509823e-05, + "loss": 0.2498, + "step": 2875 + }, + { + "epoch": 0.13, + "learning_rate": 1.9706080604455037e-05, + "loss": 0.5192, + "step": 2880 + }, + { + "epoch": 0.13, + "learning_rate": 1.970529681940025e-05, + "loss": 0.3749, + "step": 2885 + }, + { + "epoch": 0.13, + "learning_rate": 1.970451303434546e-05, + "loss": 0.5461, + "step": 2890 + }, + { + "epoch": 0.14, + "learning_rate": 1.9703729249290675e-05, + "loss": 0.6363, + "step": 2895 + }, + { + "epoch": 0.14, + "learning_rate": 1.970294546423589e-05, + "loss": 1.0204, + "step": 2900 + }, + { + "epoch": 0.14, + "learning_rate": 1.9702161679181103e-05, + "loss": 0.4849, + "step": 2905 + }, + { + "epoch": 0.14, + "learning_rate": 1.9701377894126317e-05, + "loss": 0.1746, + "step": 2910 + }, + { + "epoch": 0.14, + "learning_rate": 1.970059410907153e-05, + "loss": 0.259, + "step": 2915 + }, + { + "epoch": 0.14, + "learning_rate": 1.9699810324016745e-05, + "loss": 0.3586, + "step": 2920 + }, + { + "epoch": 0.14, + "learning_rate": 1.9699026538961955e-05, + "loss": 0.3875, + "step": 2925 + }, + { + "epoch": 0.14, + "learning_rate": 1.9698242753907172e-05, + "loss": 0.3089, + "step": 2930 + }, + { + "epoch": 0.14, + "learning_rate": 1.9697458968852383e-05, + "loss": 0.4891, + "step": 2935 + }, + { + "epoch": 0.14, + "learning_rate": 1.9696675183797597e-05, + "loss": 0.4279, + "step": 2940 + }, + { + "epoch": 0.14, + "learning_rate": 1.969589139874281e-05, + "loss": 0.6177, + "step": 2945 + }, + { + "epoch": 0.14, + "learning_rate": 1.9695107613688025e-05, + "loss": 0.8893, + "step": 2950 + }, + { + "epoch": 0.14, + "learning_rate": 1.969432382863324e-05, + "loss": 0.3806, + "step": 2955 + }, + { + "epoch": 0.14, + "learning_rate": 1.969354004357845e-05, + "loss": 0.2398, + "step": 2960 + }, + { + "epoch": 0.14, + "learning_rate": 1.9692756258523663e-05, + "loss": 0.2188, + "step": 2965 + }, + { + "epoch": 0.14, + "learning_rate": 1.9691972473468877e-05, + "loss": 0.3182, + "step": 2970 + }, + { + "epoch": 0.14, + "learning_rate": 1.969118868841409e-05, + "loss": 0.3459, + "step": 2975 + }, + { + "epoch": 0.14, + "learning_rate": 1.9690404903359305e-05, + "loss": 0.2882, + "step": 2980 + }, + { + "epoch": 0.14, + "learning_rate": 1.968962111830452e-05, + "loss": 0.3212, + "step": 2985 + }, + { + "epoch": 0.14, + "learning_rate": 1.968899409026069e-05, + "loss": 0.4588, + "step": 2990 + }, + { + "epoch": 0.14, + "learning_rate": 1.9688210305205903e-05, + "loss": 0.6138, + "step": 2995 + }, + { + "epoch": 0.14, + "learning_rate": 1.9687426520151117e-05, + "loss": 0.837, + "step": 3000 + }, + { + "epoch": 0.14, + "learning_rate": 1.9686642735096327e-05, + "loss": 0.3779, + "step": 3005 + }, + { + "epoch": 0.14, + "learning_rate": 1.9685858950041545e-05, + "loss": 0.2237, + "step": 3010 + }, + { + "epoch": 0.14, + "learning_rate": 1.9685075164986755e-05, + "loss": 0.3279, + "step": 3015 + }, + { + "epoch": 0.14, + "learning_rate": 1.968429137993197e-05, + "loss": 0.2848, + "step": 3020 + }, + { + "epoch": 0.14, + "learning_rate": 1.9683507594877183e-05, + "loss": 0.3314, + "step": 3025 + }, + { + "epoch": 0.14, + "learning_rate": 1.9682723809822394e-05, + "loss": 0.3744, + "step": 3030 + }, + { + "epoch": 0.14, + "learning_rate": 1.968194002476761e-05, + "loss": 0.4589, + "step": 3035 + }, + { + "epoch": 0.14, + "learning_rate": 1.968115623971282e-05, + "loss": 0.6012, + "step": 3040 + }, + { + "epoch": 0.14, + "learning_rate": 1.9680372454658035e-05, + "loss": 0.625, + "step": 3045 + }, + { + "epoch": 0.14, + "learning_rate": 1.967958866960325e-05, + "loss": 0.9265, + "step": 3050 + }, + { + "epoch": 0.14, + "learning_rate": 1.9678804884548463e-05, + "loss": 0.4029, + "step": 3055 + }, + { + "epoch": 0.14, + "learning_rate": 1.9678021099493677e-05, + "loss": 0.2582, + "step": 3060 + }, + { + "epoch": 0.14, + "learning_rate": 1.967723731443889e-05, + "loss": 0.3223, + "step": 3065 + }, + { + "epoch": 0.14, + "learning_rate": 1.96764535293841e-05, + "loss": 0.2428, + "step": 3070 + }, + { + "epoch": 0.14, + "learning_rate": 1.967566974432932e-05, + "loss": 0.3014, + "step": 3075 + }, + { + "epoch": 0.14, + "learning_rate": 1.967488595927453e-05, + "loss": 0.452, + "step": 3080 + }, + { + "epoch": 0.14, + "learning_rate": 1.9674102174219743e-05, + "loss": 0.3358, + "step": 3085 + }, + { + "epoch": 0.14, + "learning_rate": 1.9673318389164957e-05, + "loss": 0.4613, + "step": 3090 + }, + { + "epoch": 0.14, + "learning_rate": 1.967253460411017e-05, + "loss": 0.5618, + "step": 3095 + }, + { + "epoch": 0.14, + "learning_rate": 1.9671750819055385e-05, + "loss": 0.7322, + "step": 3100 + }, + { + "epoch": 0.14, + "learning_rate": 1.9670967034000595e-05, + "loss": 0.3473, + "step": 3105 + }, + { + "epoch": 0.15, + "learning_rate": 1.9670183248945813e-05, + "loss": 0.2555, + "step": 3110 + }, + { + "epoch": 0.15, + "learning_rate": 1.9669399463891023e-05, + "loss": 0.2629, + "step": 3115 + }, + { + "epoch": 0.15, + "learning_rate": 1.9668615678836237e-05, + "loss": 0.3049, + "step": 3120 + }, + { + "epoch": 0.15, + "learning_rate": 1.966783189378145e-05, + "loss": 0.3242, + "step": 3125 + }, + { + "epoch": 0.15, + "learning_rate": 1.9667048108726665e-05, + "loss": 0.3861, + "step": 3130 + }, + { + "epoch": 0.15, + "learning_rate": 1.966626432367188e-05, + "loss": 0.4608, + "step": 3135 + }, + { + "epoch": 0.15, + "learning_rate": 1.9665480538617093e-05, + "loss": 0.4212, + "step": 3140 + }, + { + "epoch": 0.15, + "learning_rate": 1.9664696753562303e-05, + "loss": 0.6555, + "step": 3145 + }, + { + "epoch": 0.15, + "learning_rate": 1.966391296850752e-05, + "loss": 0.7641, + "step": 3150 + }, + { + "epoch": 0.15, + "learning_rate": 1.966312918345273e-05, + "loss": 0.4017, + "step": 3155 + }, + { + "epoch": 0.15, + "learning_rate": 1.9662345398397945e-05, + "loss": 0.1657, + "step": 3160 + }, + { + "epoch": 0.15, + "learning_rate": 1.966156161334316e-05, + "loss": 0.2806, + "step": 3165 + }, + { + "epoch": 0.15, + "learning_rate": 1.966077782828837e-05, + "loss": 0.2702, + "step": 3170 + }, + { + "epoch": 0.15, + "learning_rate": 1.9659994043233587e-05, + "loss": 0.3138, + "step": 3175 + }, + { + "epoch": 0.15, + "learning_rate": 1.9659210258178797e-05, + "loss": 0.4255, + "step": 3180 + }, + { + "epoch": 0.15, + "learning_rate": 1.965842647312401e-05, + "loss": 0.3488, + "step": 3185 + }, + { + "epoch": 0.15, + "learning_rate": 1.9657642688069225e-05, + "loss": 0.5055, + "step": 3190 + }, + { + "epoch": 0.15, + "learning_rate": 1.965685890301444e-05, + "loss": 0.5626, + "step": 3195 + }, + { + "epoch": 0.15, + "learning_rate": 1.9656075117959653e-05, + "loss": 0.8002, + "step": 3200 + }, + { + "epoch": 0.15, + "learning_rate": 1.9655291332904867e-05, + "loss": 0.4219, + "step": 3205 + }, + { + "epoch": 0.15, + "learning_rate": 1.965450754785008e-05, + "loss": 0.2463, + "step": 3210 + }, + { + "epoch": 0.15, + "learning_rate": 1.9653723762795294e-05, + "loss": 0.2057, + "step": 3215 + }, + { + "epoch": 0.15, + "learning_rate": 1.9652939977740505e-05, + "loss": 0.326, + "step": 3220 + }, + { + "epoch": 0.15, + "learning_rate": 1.965215619268572e-05, + "loss": 0.3563, + "step": 3225 + }, + { + "epoch": 0.15, + "learning_rate": 1.9651372407630933e-05, + "loss": 0.4195, + "step": 3230 + }, + { + "epoch": 0.15, + "learning_rate": 1.9650588622576147e-05, + "loss": 0.3367, + "step": 3235 + }, + { + "epoch": 0.15, + "learning_rate": 1.964980483752136e-05, + "loss": 0.545, + "step": 3240 + }, + { + "epoch": 0.15, + "learning_rate": 1.964902105246657e-05, + "loss": 0.6454, + "step": 3245 + }, + { + "epoch": 0.15, + "learning_rate": 1.964823726741179e-05, + "loss": 0.7225, + "step": 3250 + }, + { + "epoch": 0.15, + "learning_rate": 1.9647453482357e-05, + "loss": 0.4606, + "step": 3255 + }, + { + "epoch": 0.15, + "learning_rate": 1.9646669697302213e-05, + "loss": 0.2679, + "step": 3260 + }, + { + "epoch": 0.15, + "learning_rate": 1.9645885912247427e-05, + "loss": 0.2538, + "step": 3265 + }, + { + "epoch": 0.15, + "learning_rate": 1.964510212719264e-05, + "loss": 0.2758, + "step": 3270 + }, + { + "epoch": 0.15, + "learning_rate": 1.9644318342137855e-05, + "loss": 0.3146, + "step": 3275 + }, + { + "epoch": 0.15, + "learning_rate": 1.964353455708307e-05, + "loss": 0.2646, + "step": 3280 + }, + { + "epoch": 0.15, + "learning_rate": 1.964275077202828e-05, + "loss": 0.3698, + "step": 3285 + }, + { + "epoch": 0.15, + "learning_rate": 1.9641966986973493e-05, + "loss": 0.505, + "step": 3290 + }, + { + "epoch": 0.15, + "learning_rate": 1.9641183201918707e-05, + "loss": 0.5847, + "step": 3295 + }, + { + "epoch": 0.15, + "learning_rate": 1.964039941686392e-05, + "loss": 0.7694, + "step": 3300 + }, + { + "epoch": 0.15, + "learning_rate": 1.9639615631809135e-05, + "loss": 0.4151, + "step": 3305 + }, + { + "epoch": 0.15, + "learning_rate": 1.963883184675435e-05, + "loss": 0.2161, + "step": 3310 + }, + { + "epoch": 0.15, + "learning_rate": 1.9638048061699562e-05, + "loss": 0.3054, + "step": 3315 + }, + { + "epoch": 0.15, + "learning_rate": 1.9637264276644773e-05, + "loss": 0.2491, + "step": 3320 + }, + { + "epoch": 0.16, + "learning_rate": 1.963648049158999e-05, + "loss": 0.2946, + "step": 3325 + }, + { + "epoch": 0.16, + "learning_rate": 1.96356967065352e-05, + "loss": 0.3118, + "step": 3330 + }, + { + "epoch": 0.16, + "learning_rate": 1.9634912921480415e-05, + "loss": 0.463, + "step": 3335 + }, + { + "epoch": 0.16, + "learning_rate": 1.963412913642563e-05, + "loss": 0.3721, + "step": 3340 + }, + { + "epoch": 0.16, + "learning_rate": 1.9633345351370842e-05, + "loss": 0.6009, + "step": 3345 + }, + { + "epoch": 0.16, + "learning_rate": 1.9632561566316056e-05, + "loss": 0.8445, + "step": 3350 + }, + { + "epoch": 0.16, + "learning_rate": 1.9631777781261267e-05, + "loss": 0.4554, + "step": 3355 + }, + { + "epoch": 0.16, + "learning_rate": 1.963099399620648e-05, + "loss": 0.2121, + "step": 3360 + }, + { + "epoch": 0.16, + "learning_rate": 1.9630210211151695e-05, + "loss": 0.2767, + "step": 3365 + }, + { + "epoch": 0.16, + "learning_rate": 1.962942642609691e-05, + "loss": 0.2726, + "step": 3370 + }, + { + "epoch": 0.16, + "learning_rate": 1.9628642641042123e-05, + "loss": 0.4073, + "step": 3375 + }, + { + "epoch": 0.16, + "learning_rate": 1.9627858855987336e-05, + "loss": 0.4101, + "step": 3380 + }, + { + "epoch": 0.16, + "learning_rate": 1.9627075070932547e-05, + "loss": 0.4055, + "step": 3385 + }, + { + "epoch": 0.16, + "learning_rate": 1.9626291285877764e-05, + "loss": 0.4796, + "step": 3390 + }, + { + "epoch": 0.16, + "learning_rate": 1.9625507500822975e-05, + "loss": 0.6178, + "step": 3395 + }, + { + "epoch": 0.16, + "learning_rate": 1.962472371576819e-05, + "loss": 0.8758, + "step": 3400 + }, + { + "epoch": 0.16, + "learning_rate": 1.9623939930713403e-05, + "loss": 0.4028, + "step": 3405 + }, + { + "epoch": 0.16, + "learning_rate": 1.9623156145658616e-05, + "loss": 0.2257, + "step": 3410 + }, + { + "epoch": 0.16, + "learning_rate": 1.962237236060383e-05, + "loss": 0.2765, + "step": 3415 + }, + { + "epoch": 0.16, + "learning_rate": 1.962158857554904e-05, + "loss": 0.2894, + "step": 3420 + }, + { + "epoch": 0.16, + "learning_rate": 1.9620804790494258e-05, + "loss": 0.2995, + "step": 3425 + }, + { + "epoch": 0.16, + "learning_rate": 1.962002100543947e-05, + "loss": 0.3938, + "step": 3430 + }, + { + "epoch": 0.16, + "learning_rate": 1.9619237220384683e-05, + "loss": 0.4108, + "step": 3435 + }, + { + "epoch": 0.16, + "learning_rate": 1.9618453435329897e-05, + "loss": 0.4647, + "step": 3440 + }, + { + "epoch": 0.16, + "learning_rate": 1.961766965027511e-05, + "loss": 0.5126, + "step": 3445 + }, + { + "epoch": 0.16, + "learning_rate": 1.9616885865220324e-05, + "loss": 0.8012, + "step": 3450 + }, + { + "epoch": 0.16, + "learning_rate": 1.9616102080165538e-05, + "loss": 0.4131, + "step": 3455 + }, + { + "epoch": 0.16, + "learning_rate": 1.961531829511075e-05, + "loss": 0.2214, + "step": 3460 + }, + { + "epoch": 0.16, + "learning_rate": 1.9614534510055966e-05, + "loss": 0.2513, + "step": 3465 + }, + { + "epoch": 0.16, + "learning_rate": 1.9613750725001177e-05, + "loss": 0.3865, + "step": 3470 + }, + { + "epoch": 0.16, + "learning_rate": 1.961296693994639e-05, + "loss": 0.3377, + "step": 3475 + }, + { + "epoch": 0.16, + "learning_rate": 1.9612183154891604e-05, + "loss": 0.3707, + "step": 3480 + }, + { + "epoch": 0.16, + "learning_rate": 1.9611399369836818e-05, + "loss": 0.4052, + "step": 3485 + }, + { + "epoch": 0.16, + "learning_rate": 1.9610615584782032e-05, + "loss": 0.4237, + "step": 3490 + }, + { + "epoch": 0.16, + "learning_rate": 1.9609831799727243e-05, + "loss": 0.466, + "step": 3495 + }, + { + "epoch": 0.16, + "learning_rate": 1.9609048014672457e-05, + "loss": 0.791, + "step": 3500 + }, + { + "epoch": 0.16, + "learning_rate": 1.960826422961767e-05, + "loss": 0.3511, + "step": 3505 + }, + { + "epoch": 0.16, + "learning_rate": 1.9607480444562884e-05, + "loss": 0.1677, + "step": 3510 + }, + { + "epoch": 0.16, + "learning_rate": 1.96066966595081e-05, + "loss": 0.2672, + "step": 3515 + }, + { + "epoch": 0.16, + "learning_rate": 1.9605912874453312e-05, + "loss": 0.27, + "step": 3520 + }, + { + "epoch": 0.16, + "learning_rate": 1.9605129089398526e-05, + "loss": 0.3319, + "step": 3525 + }, + { + "epoch": 0.16, + "learning_rate": 1.960434530434374e-05, + "loss": 0.3659, + "step": 3530 + }, + { + "epoch": 0.16, + "learning_rate": 1.960356151928895e-05, + "loss": 0.3154, + "step": 3535 + }, + { + "epoch": 0.17, + "learning_rate": 1.9602777734234168e-05, + "loss": 0.4368, + "step": 3540 + }, + { + "epoch": 0.17, + "learning_rate": 1.960199394917938e-05, + "loss": 0.6238, + "step": 3545 + }, + { + "epoch": 0.17, + "learning_rate": 1.9601210164124592e-05, + "loss": 0.868, + "step": 3550 + }, + { + "epoch": 0.17, + "learning_rate": 1.9600426379069806e-05, + "loss": 0.439, + "step": 3555 + }, + { + "epoch": 0.17, + "learning_rate": 1.9599642594015017e-05, + "loss": 0.1922, + "step": 3560 + }, + { + "epoch": 0.17, + "learning_rate": 1.9598858808960234e-05, + "loss": 0.2233, + "step": 3565 + }, + { + "epoch": 0.17, + "learning_rate": 1.9598075023905445e-05, + "loss": 0.3184, + "step": 3570 + }, + { + "epoch": 0.17, + "learning_rate": 1.959729123885066e-05, + "loss": 0.3508, + "step": 3575 + }, + { + "epoch": 0.17, + "learning_rate": 1.9596507453795872e-05, + "loss": 0.2921, + "step": 3580 + }, + { + "epoch": 0.17, + "learning_rate": 1.9595723668741086e-05, + "loss": 0.3405, + "step": 3585 + }, + { + "epoch": 0.17, + "learning_rate": 1.95949398836863e-05, + "loss": 0.5112, + "step": 3590 + }, + { + "epoch": 0.17, + "learning_rate": 1.9594156098631514e-05, + "loss": 0.4812, + "step": 3595 + }, + { + "epoch": 0.17, + "learning_rate": 1.9593372313576725e-05, + "loss": 0.8224, + "step": 3600 + }, + { + "epoch": 0.17, + "learning_rate": 1.9592588528521942e-05, + "loss": 0.4547, + "step": 3605 + }, + { + "epoch": 0.17, + "learning_rate": 1.9591804743467152e-05, + "loss": 0.1878, + "step": 3610 + }, + { + "epoch": 0.17, + "learning_rate": 1.9591020958412366e-05, + "loss": 0.2636, + "step": 3615 + }, + { + "epoch": 0.17, + "learning_rate": 1.959023717335758e-05, + "loss": 0.2779, + "step": 3620 + }, + { + "epoch": 0.17, + "learning_rate": 1.9589453388302794e-05, + "loss": 0.2307, + "step": 3625 + }, + { + "epoch": 0.17, + "learning_rate": 1.9588669603248008e-05, + "loss": 0.3005, + "step": 3630 + }, + { + "epoch": 0.17, + "learning_rate": 1.958788581819322e-05, + "loss": 0.3427, + "step": 3635 + }, + { + "epoch": 0.17, + "learning_rate": 1.9587102033138436e-05, + "loss": 0.4767, + "step": 3640 + }, + { + "epoch": 0.17, + "learning_rate": 1.9586318248083646e-05, + "loss": 0.4229, + "step": 3645 + }, + { + "epoch": 0.17, + "learning_rate": 1.958553446302886e-05, + "loss": 0.8545, + "step": 3650 + }, + { + "epoch": 0.17, + "learning_rate": 1.9584750677974074e-05, + "loss": 0.4065, + "step": 3655 + }, + { + "epoch": 0.17, + "learning_rate": 1.9583966892919288e-05, + "loss": 0.1509, + "step": 3660 + }, + { + "epoch": 0.17, + "learning_rate": 1.9583183107864502e-05, + "loss": 0.2052, + "step": 3665 + }, + { + "epoch": 0.17, + "learning_rate": 1.9582399322809716e-05, + "loss": 0.2437, + "step": 3670 + }, + { + "epoch": 0.17, + "learning_rate": 1.9581615537754926e-05, + "loss": 0.3728, + "step": 3675 + }, + { + "epoch": 0.17, + "learning_rate": 1.958083175270014e-05, + "loss": 0.3682, + "step": 3680 + }, + { + "epoch": 0.17, + "learning_rate": 1.9580047967645354e-05, + "loss": 0.4861, + "step": 3685 + }, + { + "epoch": 0.17, + "learning_rate": 1.9579264182590568e-05, + "loss": 0.4066, + "step": 3690 + }, + { + "epoch": 0.17, + "learning_rate": 1.9578480397535782e-05, + "loss": 0.45, + "step": 3695 + }, + { + "epoch": 0.17, + "learning_rate": 1.9577696612480993e-05, + "loss": 0.7415, + "step": 3700 + }, + { + "epoch": 0.17, + "learning_rate": 1.957691282742621e-05, + "loss": 0.4888, + "step": 3705 + }, + { + "epoch": 0.17, + "learning_rate": 1.957612904237142e-05, + "loss": 0.2174, + "step": 3710 + }, + { + "epoch": 0.17, + "learning_rate": 1.9575345257316634e-05, + "loss": 0.2577, + "step": 3715 + }, + { + "epoch": 0.17, + "learning_rate": 1.9574561472261848e-05, + "loss": 0.2525, + "step": 3720 + }, + { + "epoch": 0.17, + "learning_rate": 1.9573777687207062e-05, + "loss": 0.3246, + "step": 3725 + }, + { + "epoch": 0.17, + "learning_rate": 1.9572993902152276e-05, + "loss": 0.3091, + "step": 3730 + }, + { + "epoch": 0.17, + "learning_rate": 1.957221011709749e-05, + "loss": 0.3636, + "step": 3735 + }, + { + "epoch": 0.17, + "learning_rate": 1.9571426332042704e-05, + "loss": 0.4908, + "step": 3740 + }, + { + "epoch": 0.17, + "learning_rate": 1.9570642546987914e-05, + "loss": 0.444, + "step": 3745 + }, + { + "epoch": 0.17, + "learning_rate": 1.9569858761933128e-05, + "loss": 0.6554, + "step": 3750 + }, + { + "epoch": 0.18, + "learning_rate": 1.9569074976878342e-05, + "loss": 0.3102, + "step": 3755 + }, + { + "epoch": 0.18, + "learning_rate": 1.9568291191823556e-05, + "loss": 0.2166, + "step": 3760 + }, + { + "epoch": 0.18, + "learning_rate": 1.956750740676877e-05, + "loss": 0.2067, + "step": 3765 + }, + { + "epoch": 0.18, + "learning_rate": 1.9566723621713984e-05, + "loss": 0.2251, + "step": 3770 + }, + { + "epoch": 0.18, + "learning_rate": 1.9565939836659194e-05, + "loss": 0.3112, + "step": 3775 + }, + { + "epoch": 0.18, + "learning_rate": 1.956515605160441e-05, + "loss": 0.3403, + "step": 3780 + }, + { + "epoch": 0.18, + "learning_rate": 1.9564372266549622e-05, + "loss": 0.3457, + "step": 3785 + }, + { + "epoch": 0.18, + "learning_rate": 1.9563588481494836e-05, + "loss": 0.5348, + "step": 3790 + }, + { + "epoch": 0.18, + "learning_rate": 1.956280469644005e-05, + "loss": 0.5904, + "step": 3795 + }, + { + "epoch": 0.18, + "learning_rate": 1.9562020911385264e-05, + "loss": 0.8875, + "step": 3800 + }, + { + "epoch": 0.18, + "learning_rate": 1.9561237126330478e-05, + "loss": 0.3938, + "step": 3805 + }, + { + "epoch": 0.18, + "learning_rate": 1.9560453341275688e-05, + "loss": 0.2407, + "step": 3810 + }, + { + "epoch": 0.18, + "learning_rate": 1.9559669556220902e-05, + "loss": 0.2626, + "step": 3815 + }, + { + "epoch": 0.18, + "learning_rate": 1.9558885771166116e-05, + "loss": 0.2197, + "step": 3820 + }, + { + "epoch": 0.18, + "learning_rate": 1.955810198611133e-05, + "loss": 0.3859, + "step": 3825 + }, + { + "epoch": 0.18, + "learning_rate": 1.9557318201056544e-05, + "loss": 0.3984, + "step": 3830 + }, + { + "epoch": 0.18, + "learning_rate": 1.9556534416001758e-05, + "loss": 0.2618, + "step": 3835 + }, + { + "epoch": 0.18, + "learning_rate": 1.9555750630946972e-05, + "loss": 0.607, + "step": 3840 + }, + { + "epoch": 0.18, + "learning_rate": 1.9554966845892186e-05, + "loss": 0.6173, + "step": 3845 + }, + { + "epoch": 0.18, + "learning_rate": 1.9554183060837396e-05, + "loss": 0.8174, + "step": 3850 + }, + { + "epoch": 0.18, + "learning_rate": 1.9553399275782613e-05, + "loss": 0.3763, + "step": 3855 + }, + { + "epoch": 0.18, + "learning_rate": 1.9552615490727824e-05, + "loss": 0.232, + "step": 3860 + }, + { + "epoch": 0.18, + "learning_rate": 1.9551831705673038e-05, + "loss": 0.1597, + "step": 3865 + }, + { + "epoch": 0.18, + "learning_rate": 1.9551047920618252e-05, + "loss": 0.2861, + "step": 3870 + }, + { + "epoch": 0.18, + "learning_rate": 1.9550264135563462e-05, + "loss": 0.2547, + "step": 3875 + }, + { + "epoch": 0.18, + "learning_rate": 1.954948035050868e-05, + "loss": 0.3221, + "step": 3880 + }, + { + "epoch": 0.18, + "learning_rate": 1.954869656545389e-05, + "loss": 0.3712, + "step": 3885 + }, + { + "epoch": 0.18, + "learning_rate": 1.9547912780399104e-05, + "loss": 0.5022, + "step": 3890 + }, + { + "epoch": 0.18, + "learning_rate": 1.9547128995344318e-05, + "loss": 0.5154, + "step": 3895 + }, + { + "epoch": 0.18, + "learning_rate": 1.9546345210289532e-05, + "loss": 0.8972, + "step": 3900 + }, + { + "epoch": 0.18, + "learning_rate": 1.9545561425234746e-05, + "loss": 0.3731, + "step": 3905 + }, + { + "epoch": 0.18, + "learning_rate": 1.954477764017996e-05, + "loss": 0.1438, + "step": 3910 + }, + { + "epoch": 0.18, + "learning_rate": 1.954399385512517e-05, + "loss": 0.3258, + "step": 3915 + }, + { + "epoch": 0.18, + "learning_rate": 1.9543210070070387e-05, + "loss": 0.2788, + "step": 3920 + }, + { + "epoch": 0.18, + "learning_rate": 1.9542426285015598e-05, + "loss": 0.269, + "step": 3925 + }, + { + "epoch": 0.18, + "learning_rate": 1.9541642499960812e-05, + "loss": 0.3346, + "step": 3930 + }, + { + "epoch": 0.18, + "learning_rate": 1.9540858714906026e-05, + "loss": 0.401, + "step": 3935 + }, + { + "epoch": 0.18, + "learning_rate": 1.954007492985124e-05, + "loss": 0.3484, + "step": 3940 + }, + { + "epoch": 0.18, + "learning_rate": 1.9539291144796454e-05, + "loss": 0.5295, + "step": 3945 + }, + { + "epoch": 0.18, + "learning_rate": 1.9538507359741664e-05, + "loss": 0.7834, + "step": 3950 + }, + { + "epoch": 0.18, + "learning_rate": 1.953772357468688e-05, + "loss": 0.3469, + "step": 3955 + }, + { + "epoch": 0.18, + "learning_rate": 1.9536939789632092e-05, + "loss": 0.3008, + "step": 3960 + }, + { + "epoch": 0.19, + "learning_rate": 1.9536156004577306e-05, + "loss": 0.2192, + "step": 3965 + }, + { + "epoch": 0.19, + "learning_rate": 1.953537221952252e-05, + "loss": 0.2714, + "step": 3970 + }, + { + "epoch": 0.19, + "learning_rate": 1.9534588434467734e-05, + "loss": 0.236, + "step": 3975 + }, + { + "epoch": 0.19, + "learning_rate": 1.9533804649412948e-05, + "loss": 0.3313, + "step": 3980 + }, + { + "epoch": 0.19, + "learning_rate": 1.953302086435816e-05, + "loss": 0.4521, + "step": 3985 + }, + { + "epoch": 0.19, + "learning_rate": 1.9532237079303372e-05, + "loss": 0.6107, + "step": 3990 + }, + { + "epoch": 0.19, + "learning_rate": 1.953145329424859e-05, + "loss": 0.6532, + "step": 3995 + }, + { + "epoch": 0.19, + "learning_rate": 1.95306695091938e-05, + "loss": 0.7622, + "step": 4000 + }, + { + "epoch": 0.19, + "learning_rate": 1.9529885724139014e-05, + "loss": 0.3901, + "step": 4005 + }, + { + "epoch": 0.19, + "learning_rate": 1.9529101939084228e-05, + "loss": 0.2575, + "step": 4010 + }, + { + "epoch": 0.19, + "learning_rate": 1.9528318154029438e-05, + "loss": 0.1799, + "step": 4015 + }, + { + "epoch": 0.19, + "learning_rate": 1.9527534368974655e-05, + "loss": 0.2706, + "step": 4020 + }, + { + "epoch": 0.19, + "learning_rate": 1.9526750583919866e-05, + "loss": 0.333, + "step": 4025 + }, + { + "epoch": 0.19, + "learning_rate": 1.952596679886508e-05, + "loss": 0.341, + "step": 4030 + }, + { + "epoch": 0.19, + "learning_rate": 1.9525183013810294e-05, + "loss": 0.3399, + "step": 4035 + }, + { + "epoch": 0.19, + "learning_rate": 1.9524399228755508e-05, + "loss": 0.4147, + "step": 4040 + }, + { + "epoch": 0.19, + "learning_rate": 1.952361544370072e-05, + "loss": 0.4624, + "step": 4045 + }, + { + "epoch": 0.19, + "learning_rate": 1.9522831658645935e-05, + "loss": 0.7946, + "step": 4050 + }, + { + "epoch": 0.19, + "learning_rate": 1.952204787359115e-05, + "loss": 0.3148, + "step": 4055 + }, + { + "epoch": 0.19, + "learning_rate": 1.9521264088536363e-05, + "loss": 0.2504, + "step": 4060 + }, + { + "epoch": 0.19, + "learning_rate": 1.9520480303481574e-05, + "loss": 0.1754, + "step": 4065 + }, + { + "epoch": 0.19, + "learning_rate": 1.9519696518426788e-05, + "loss": 0.2607, + "step": 4070 + }, + { + "epoch": 0.19, + "learning_rate": 1.9518912733372e-05, + "loss": 0.3272, + "step": 4075 + }, + { + "epoch": 0.19, + "learning_rate": 1.9518128948317215e-05, + "loss": 0.426, + "step": 4080 + }, + { + "epoch": 0.19, + "learning_rate": 1.951734516326243e-05, + "loss": 0.3973, + "step": 4085 + }, + { + "epoch": 0.19, + "learning_rate": 1.951656137820764e-05, + "loss": 0.418, + "step": 4090 + }, + { + "epoch": 0.19, + "learning_rate": 1.9515777593152857e-05, + "loss": 0.5203, + "step": 4095 + }, + { + "epoch": 0.19, + "learning_rate": 1.9514993808098068e-05, + "loss": 0.9427, + "step": 4100 + }, + { + "epoch": 0.19, + "learning_rate": 1.951421002304328e-05, + "loss": 0.3731, + "step": 4105 + }, + { + "epoch": 0.19, + "learning_rate": 1.9513426237988496e-05, + "loss": 0.1844, + "step": 4110 + }, + { + "epoch": 0.19, + "learning_rate": 1.951264245293371e-05, + "loss": 0.2464, + "step": 4115 + }, + { + "epoch": 0.19, + "learning_rate": 1.9511858667878923e-05, + "loss": 0.1807, + "step": 4120 + }, + { + "epoch": 0.19, + "learning_rate": 1.9511074882824137e-05, + "loss": 0.3148, + "step": 4125 + }, + { + "epoch": 0.19, + "learning_rate": 1.9510291097769348e-05, + "loss": 0.2822, + "step": 4130 + }, + { + "epoch": 0.19, + "learning_rate": 1.950950731271456e-05, + "loss": 0.2875, + "step": 4135 + }, + { + "epoch": 0.19, + "learning_rate": 1.9508723527659776e-05, + "loss": 0.432, + "step": 4140 + }, + { + "epoch": 0.19, + "learning_rate": 1.950793974260499e-05, + "loss": 0.6259, + "step": 4145 + }, + { + "epoch": 0.19, + "learning_rate": 1.9507155957550203e-05, + "loss": 0.7135, + "step": 4150 + }, + { + "epoch": 0.19, + "learning_rate": 1.9506372172495417e-05, + "loss": 0.3204, + "step": 4155 + }, + { + "epoch": 0.19, + "learning_rate": 1.950558838744063e-05, + "loss": 0.1675, + "step": 4160 + }, + { + "epoch": 0.19, + "learning_rate": 1.950480460238584e-05, + "loss": 0.1806, + "step": 4165 + }, + { + "epoch": 0.19, + "learning_rate": 1.950402081733106e-05, + "loss": 0.299, + "step": 4170 + }, + { + "epoch": 0.19, + "learning_rate": 1.950323703227627e-05, + "loss": 0.2398, + "step": 4175 + }, + { + "epoch": 0.2, + "learning_rate": 1.9502453247221483e-05, + "loss": 0.3808, + "step": 4180 + }, + { + "epoch": 0.2, + "learning_rate": 1.9501669462166697e-05, + "loss": 0.4253, + "step": 4185 + }, + { + "epoch": 0.2, + "learning_rate": 1.950088567711191e-05, + "loss": 0.4762, + "step": 4190 + }, + { + "epoch": 0.2, + "learning_rate": 1.9500101892057125e-05, + "loss": 0.5255, + "step": 4195 + }, + { + "epoch": 0.2, + "learning_rate": 1.9499318107002336e-05, + "loss": 0.8221, + "step": 4200 + }, + { + "epoch": 0.2, + "learning_rate": 1.949853432194755e-05, + "loss": 0.416, + "step": 4205 + }, + { + "epoch": 0.2, + "learning_rate": 1.9497750536892763e-05, + "loss": 0.6598, + "step": 4210 + }, + { + "epoch": 0.2, + "learning_rate": 1.9496966751837977e-05, + "loss": 0.1326, + "step": 4215 + }, + { + "epoch": 0.2, + "learning_rate": 1.949618296678319e-05, + "loss": 0.215, + "step": 4220 + }, + { + "epoch": 0.2, + "learning_rate": 1.9495399181728405e-05, + "loss": 0.3046, + "step": 4225 + }, + { + "epoch": 0.2, + "learning_rate": 1.9494615396673616e-05, + "loss": 0.3458, + "step": 4230 + }, + { + "epoch": 0.2, + "learning_rate": 1.9493831611618833e-05, + "loss": 0.3501, + "step": 4235 + }, + { + "epoch": 0.2, + "learning_rate": 1.9493047826564044e-05, + "loss": 0.5032, + "step": 4240 + }, + { + "epoch": 0.2, + "learning_rate": 1.9492264041509257e-05, + "loss": 0.6051, + "step": 4245 + }, + { + "epoch": 0.2, + "learning_rate": 1.949148025645447e-05, + "loss": 0.8645, + "step": 4250 + }, + { + "epoch": 0.2, + "learning_rate": 1.9490696471399685e-05, + "loss": 0.3597, + "step": 4255 + }, + { + "epoch": 0.2, + "learning_rate": 1.94899126863449e-05, + "loss": 0.2018, + "step": 4260 + }, + { + "epoch": 0.2, + "learning_rate": 1.948912890129011e-05, + "loss": 0.2225, + "step": 4265 + }, + { + "epoch": 0.2, + "learning_rate": 1.9488345116235327e-05, + "loss": 0.3058, + "step": 4270 + }, + { + "epoch": 0.2, + "learning_rate": 1.9487561331180537e-05, + "loss": 0.367, + "step": 4275 + }, + { + "epoch": 0.2, + "learning_rate": 1.948677754612575e-05, + "loss": 0.3147, + "step": 4280 + }, + { + "epoch": 0.2, + "learning_rate": 1.9485993761070965e-05, + "loss": 0.3372, + "step": 4285 + }, + { + "epoch": 0.2, + "learning_rate": 1.948520997601618e-05, + "loss": 0.3279, + "step": 4290 + }, + { + "epoch": 0.2, + "learning_rate": 1.9484426190961393e-05, + "loss": 0.4893, + "step": 4295 + }, + { + "epoch": 0.2, + "learning_rate": 1.9483642405906607e-05, + "loss": 0.8027, + "step": 4300 + }, + { + "epoch": 0.2, + "learning_rate": 1.9482858620851818e-05, + "loss": 0.4597, + "step": 4305 + }, + { + "epoch": 0.2, + "learning_rate": 1.9482074835797035e-05, + "loss": 0.1776, + "step": 4310 + }, + { + "epoch": 0.2, + "learning_rate": 1.9481291050742245e-05, + "loss": 0.2143, + "step": 4315 + }, + { + "epoch": 0.2, + "learning_rate": 1.948050726568746e-05, + "loss": 0.2351, + "step": 4320 + }, + { + "epoch": 0.2, + "learning_rate": 1.9479723480632673e-05, + "loss": 0.3237, + "step": 4325 + }, + { + "epoch": 0.2, + "learning_rate": 1.9478939695577884e-05, + "loss": 0.3484, + "step": 4330 + }, + { + "epoch": 0.2, + "learning_rate": 1.94781559105231e-05, + "loss": 0.3139, + "step": 4335 + }, + { + "epoch": 0.2, + "learning_rate": 1.947737212546831e-05, + "loss": 0.4198, + "step": 4340 + }, + { + "epoch": 0.2, + "learning_rate": 1.9476588340413525e-05, + "loss": 0.48, + "step": 4345 + }, + { + "epoch": 0.2, + "learning_rate": 1.947580455535874e-05, + "loss": 0.7149, + "step": 4350 + }, + { + "epoch": 0.2, + "learning_rate": 1.9475020770303953e-05, + "loss": 0.4256, + "step": 4355 + }, + { + "epoch": 0.2, + "learning_rate": 1.9474236985249167e-05, + "loss": 0.2352, + "step": 4360 + }, + { + "epoch": 0.2, + "learning_rate": 1.947345320019438e-05, + "loss": 0.197, + "step": 4365 + }, + { + "epoch": 0.2, + "learning_rate": 1.9472669415139595e-05, + "loss": 0.313, + "step": 4370 + }, + { + "epoch": 0.2, + "learning_rate": 1.947188563008481e-05, + "loss": 0.2447, + "step": 4375 + }, + { + "epoch": 0.2, + "learning_rate": 1.947110184503002e-05, + "loss": 0.2627, + "step": 4380 + }, + { + "epoch": 0.2, + "learning_rate": 1.9470318059975237e-05, + "loss": 0.2974, + "step": 4385 + }, + { + "epoch": 0.2, + "learning_rate": 1.9469534274920447e-05, + "loss": 0.4315, + "step": 4390 + }, + { + "epoch": 0.21, + "learning_rate": 1.946875048986566e-05, + "loss": 0.6057, + "step": 4395 + }, + { + "epoch": 0.21, + "learning_rate": 1.9467966704810875e-05, + "loss": 0.7785, + "step": 4400 + }, + { + "epoch": 0.21, + "learning_rate": 1.9467182919756085e-05, + "loss": 0.426, + "step": 4405 + }, + { + "epoch": 0.21, + "learning_rate": 1.9466399134701303e-05, + "loss": 0.3034, + "step": 4410 + }, + { + "epoch": 0.21, + "learning_rate": 1.9465615349646513e-05, + "loss": 0.1557, + "step": 4415 + }, + { + "epoch": 0.21, + "learning_rate": 1.9464831564591727e-05, + "loss": 0.2953, + "step": 4420 + }, + { + "epoch": 0.21, + "learning_rate": 1.946404777953694e-05, + "loss": 0.4292, + "step": 4425 + }, + { + "epoch": 0.21, + "learning_rate": 1.9463263994482155e-05, + "loss": 0.468, + "step": 4430 + }, + { + "epoch": 0.21, + "learning_rate": 1.946248020942737e-05, + "loss": 0.3723, + "step": 4435 + }, + { + "epoch": 0.21, + "learning_rate": 1.9461696424372583e-05, + "loss": 0.3748, + "step": 4440 + }, + { + "epoch": 0.21, + "learning_rate": 1.9460912639317793e-05, + "loss": 0.3476, + "step": 4445 + }, + { + "epoch": 0.21, + "learning_rate": 1.946012885426301e-05, + "loss": 0.7008, + "step": 4450 + }, + { + "epoch": 0.21, + "learning_rate": 1.945934506920822e-05, + "loss": 0.3942, + "step": 4455 + }, + { + "epoch": 0.21, + "learning_rate": 1.9458561284153435e-05, + "loss": 0.1434, + "step": 4460 + }, + { + "epoch": 0.21, + "learning_rate": 1.945777749909865e-05, + "loss": 0.1975, + "step": 4465 + }, + { + "epoch": 0.21, + "learning_rate": 1.9456993714043863e-05, + "loss": 0.3374, + "step": 4470 + }, + { + "epoch": 0.21, + "learning_rate": 1.9456209928989077e-05, + "loss": 0.2609, + "step": 4475 + }, + { + "epoch": 0.21, + "learning_rate": 1.9455426143934287e-05, + "loss": 0.2869, + "step": 4480 + }, + { + "epoch": 0.21, + "learning_rate": 1.9454642358879505e-05, + "loss": 0.4011, + "step": 4485 + }, + { + "epoch": 0.21, + "learning_rate": 1.9453858573824715e-05, + "loss": 0.4172, + "step": 4490 + }, + { + "epoch": 0.21, + "learning_rate": 1.945307478876993e-05, + "loss": 0.5234, + "step": 4495 + }, + { + "epoch": 0.21, + "learning_rate": 1.9452291003715143e-05, + "loss": 0.7829, + "step": 4500 + }, + { + "epoch": 0.21, + "learning_rate": 1.9451507218660357e-05, + "loss": 0.3145, + "step": 4505 + }, + { + "epoch": 0.21, + "learning_rate": 1.945072343360557e-05, + "loss": 0.1722, + "step": 4510 + }, + { + "epoch": 0.21, + "learning_rate": 1.9449939648550785e-05, + "loss": 0.2948, + "step": 4515 + }, + { + "epoch": 0.21, + "learning_rate": 1.9449155863495995e-05, + "loss": 0.2227, + "step": 4520 + }, + { + "epoch": 0.21, + "learning_rate": 1.944837207844121e-05, + "loss": 0.2552, + "step": 4525 + }, + { + "epoch": 0.21, + "learning_rate": 1.9447588293386423e-05, + "loss": 0.3306, + "step": 4530 + }, + { + "epoch": 0.21, + "learning_rate": 1.9446804508331637e-05, + "loss": 0.2957, + "step": 4535 + }, + { + "epoch": 0.21, + "learning_rate": 1.944602072327685e-05, + "loss": 0.5235, + "step": 4540 + }, + { + "epoch": 0.21, + "learning_rate": 1.944523693822206e-05, + "loss": 0.5366, + "step": 4545 + }, + { + "epoch": 0.21, + "learning_rate": 1.944445315316728e-05, + "loss": 0.753, + "step": 4550 + }, + { + "epoch": 0.21, + "learning_rate": 1.944366936811249e-05, + "loss": 0.2989, + "step": 4555 + }, + { + "epoch": 0.21, + "learning_rate": 1.9442885583057703e-05, + "loss": 0.1639, + "step": 4560 + }, + { + "epoch": 0.21, + "learning_rate": 1.9442101798002917e-05, + "loss": 0.1844, + "step": 4565 + }, + { + "epoch": 0.21, + "learning_rate": 1.944131801294813e-05, + "loss": 0.2972, + "step": 4570 + }, + { + "epoch": 0.21, + "learning_rate": 1.9440534227893345e-05, + "loss": 0.3515, + "step": 4575 + }, + { + "epoch": 0.21, + "learning_rate": 1.943975044283856e-05, + "loss": 0.2764, + "step": 4580 + }, + { + "epoch": 0.21, + "learning_rate": 1.9438966657783773e-05, + "loss": 0.4514, + "step": 4585 + }, + { + "epoch": 0.21, + "learning_rate": 1.9438182872728983e-05, + "loss": 0.3424, + "step": 4590 + }, + { + "epoch": 0.21, + "learning_rate": 1.9437399087674197e-05, + "loss": 0.539, + "step": 4595 + }, + { + "epoch": 0.21, + "learning_rate": 1.943661530261941e-05, + "loss": 0.8198, + "step": 4600 + }, + { + "epoch": 0.21, + "learning_rate": 1.9435831517564625e-05, + "loss": 0.3306, + "step": 4605 + }, + { + "epoch": 0.22, + "learning_rate": 1.943504773250984e-05, + "loss": 0.1788, + "step": 4610 + }, + { + "epoch": 0.22, + "learning_rate": 1.9434263947455053e-05, + "loss": 0.2419, + "step": 4615 + }, + { + "epoch": 0.22, + "learning_rate": 1.9433480162400263e-05, + "loss": 0.3073, + "step": 4620 + }, + { + "epoch": 0.22, + "learning_rate": 1.943269637734548e-05, + "loss": 0.1982, + "step": 4625 + }, + { + "epoch": 0.22, + "learning_rate": 1.943191259229069e-05, + "loss": 0.3853, + "step": 4630 + }, + { + "epoch": 0.22, + "learning_rate": 1.9431128807235905e-05, + "loss": 0.3433, + "step": 4635 + }, + { + "epoch": 0.22, + "learning_rate": 1.943034502218112e-05, + "loss": 0.373, + "step": 4640 + }, + { + "epoch": 0.22, + "learning_rate": 1.9429561237126333e-05, + "loss": 0.4099, + "step": 4645 + }, + { + "epoch": 0.22, + "learning_rate": 1.9428777452071547e-05, + "loss": 0.7581, + "step": 4650 + }, + { + "epoch": 0.22, + "learning_rate": 1.9427993667016757e-05, + "loss": 0.3001, + "step": 4655 + }, + { + "epoch": 0.22, + "learning_rate": 1.942720988196197e-05, + "loss": 0.1235, + "step": 4660 + }, + { + "epoch": 0.22, + "learning_rate": 1.9426426096907185e-05, + "loss": 0.1975, + "step": 4665 + }, + { + "epoch": 0.22, + "learning_rate": 1.94256423118524e-05, + "loss": 0.2757, + "step": 4670 + }, + { + "epoch": 0.22, + "learning_rate": 1.9424858526797613e-05, + "loss": 0.2863, + "step": 4675 + }, + { + "epoch": 0.22, + "learning_rate": 1.9424074741742827e-05, + "loss": 0.3228, + "step": 4680 + }, + { + "epoch": 0.22, + "learning_rate": 1.942329095668804e-05, + "loss": 0.3734, + "step": 4685 + }, + { + "epoch": 0.22, + "learning_rate": 1.9422507171633254e-05, + "loss": 0.4365, + "step": 4690 + }, + { + "epoch": 0.22, + "learning_rate": 1.9421723386578465e-05, + "loss": 0.4877, + "step": 4695 + }, + { + "epoch": 0.22, + "learning_rate": 1.9420939601523682e-05, + "loss": 0.7756, + "step": 4700 + }, + { + "epoch": 0.22, + "learning_rate": 1.9420155816468893e-05, + "loss": 0.338, + "step": 4705 + }, + { + "epoch": 0.22, + "learning_rate": 1.9419372031414107e-05, + "loss": 0.1238, + "step": 4710 + }, + { + "epoch": 0.22, + "learning_rate": 1.941858824635932e-05, + "loss": 0.2242, + "step": 4715 + }, + { + "epoch": 0.22, + "learning_rate": 1.941780446130453e-05, + "loss": 0.2178, + "step": 4720 + }, + { + "epoch": 0.22, + "learning_rate": 1.941702067624975e-05, + "loss": 0.27, + "step": 4725 + }, + { + "epoch": 0.22, + "learning_rate": 1.941623689119496e-05, + "loss": 0.3249, + "step": 4730 + }, + { + "epoch": 0.22, + "learning_rate": 1.9415453106140173e-05, + "loss": 0.3082, + "step": 4735 + }, + { + "epoch": 0.22, + "learning_rate": 1.9414669321085387e-05, + "loss": 0.3891, + "step": 4740 + }, + { + "epoch": 0.22, + "learning_rate": 1.94138855360306e-05, + "loss": 0.525, + "step": 4745 + }, + { + "epoch": 0.22, + "learning_rate": 1.9413101750975814e-05, + "loss": 0.6924, + "step": 4750 + }, + { + "epoch": 0.22, + "learning_rate": 1.941231796592103e-05, + "loss": 0.4089, + "step": 4755 + }, + { + "epoch": 0.22, + "learning_rate": 1.941153418086624e-05, + "loss": 0.1315, + "step": 4760 + }, + { + "epoch": 0.22, + "learning_rate": 1.9410750395811456e-05, + "loss": 0.1435, + "step": 4765 + }, + { + "epoch": 0.22, + "learning_rate": 1.9409966610756667e-05, + "loss": 0.2198, + "step": 4770 + }, + { + "epoch": 0.22, + "learning_rate": 1.940918282570188e-05, + "loss": 0.2413, + "step": 4775 + }, + { + "epoch": 0.22, + "learning_rate": 1.9408399040647095e-05, + "loss": 0.3171, + "step": 4780 + }, + { + "epoch": 0.22, + "learning_rate": 1.940761525559231e-05, + "loss": 0.2903, + "step": 4785 + }, + { + "epoch": 0.22, + "learning_rate": 1.9406831470537522e-05, + "loss": 0.3473, + "step": 4790 + }, + { + "epoch": 0.22, + "learning_rate": 1.9406047685482733e-05, + "loss": 0.5104, + "step": 4795 + }, + { + "epoch": 0.22, + "learning_rate": 1.940526390042795e-05, + "loss": 0.7117, + "step": 4800 + }, + { + "epoch": 0.22, + "learning_rate": 1.940448011537316e-05, + "loss": 0.3581, + "step": 4805 + }, + { + "epoch": 0.22, + "learning_rate": 1.9403696330318375e-05, + "loss": 0.2153, + "step": 4810 + }, + { + "epoch": 0.22, + "learning_rate": 1.940291254526359e-05, + "loss": 0.2117, + "step": 4815 + }, + { + "epoch": 0.22, + "learning_rate": 1.9402128760208802e-05, + "loss": 0.2479, + "step": 4820 + }, + { + "epoch": 0.23, + "learning_rate": 1.9401344975154016e-05, + "loss": 0.3125, + "step": 4825 + }, + { + "epoch": 0.23, + "learning_rate": 1.940056119009923e-05, + "loss": 0.3053, + "step": 4830 + }, + { + "epoch": 0.23, + "learning_rate": 1.939977740504444e-05, + "loss": 0.3783, + "step": 4835 + }, + { + "epoch": 0.23, + "learning_rate": 1.9398993619989658e-05, + "loss": 0.4291, + "step": 4840 + }, + { + "epoch": 0.23, + "learning_rate": 1.939820983493487e-05, + "loss": 0.3458, + "step": 4845 + }, + { + "epoch": 0.23, + "learning_rate": 1.9397426049880082e-05, + "loss": 0.7172, + "step": 4850 + }, + { + "epoch": 0.23, + "learning_rate": 1.9396642264825296e-05, + "loss": 0.4026, + "step": 4855 + }, + { + "epoch": 0.23, + "learning_rate": 1.9395858479770507e-05, + "loss": 0.1863, + "step": 4860 + }, + { + "epoch": 0.23, + "learning_rate": 1.9395074694715724e-05, + "loss": 0.208, + "step": 4865 + }, + { + "epoch": 0.23, + "learning_rate": 1.9394290909660935e-05, + "loss": 0.2116, + "step": 4870 + }, + { + "epoch": 0.23, + "learning_rate": 1.939350712460615e-05, + "loss": 0.2817, + "step": 4875 + }, + { + "epoch": 0.23, + "learning_rate": 1.9392723339551362e-05, + "loss": 0.3429, + "step": 4880 + }, + { + "epoch": 0.23, + "learning_rate": 1.9391939554496576e-05, + "loss": 0.4071, + "step": 4885 + }, + { + "epoch": 0.23, + "learning_rate": 1.939115576944179e-05, + "loss": 0.4191, + "step": 4890 + }, + { + "epoch": 0.23, + "learning_rate": 1.9390371984387004e-05, + "loss": 0.5109, + "step": 4895 + }, + { + "epoch": 0.23, + "learning_rate": 1.9389588199332218e-05, + "loss": 0.7804, + "step": 4900 + }, + { + "epoch": 0.23, + "learning_rate": 1.9388804414277432e-05, + "loss": 0.3884, + "step": 4905 + }, + { + "epoch": 0.23, + "learning_rate": 1.9388020629222642e-05, + "loss": 0.1351, + "step": 4910 + }, + { + "epoch": 0.23, + "learning_rate": 1.9387236844167856e-05, + "loss": 0.2175, + "step": 4915 + }, + { + "epoch": 0.23, + "learning_rate": 1.938645305911307e-05, + "loss": 0.237, + "step": 4920 + }, + { + "epoch": 0.23, + "learning_rate": 1.9385669274058284e-05, + "loss": 0.3365, + "step": 4925 + }, + { + "epoch": 0.23, + "learning_rate": 1.9384885489003498e-05, + "loss": 0.3038, + "step": 4930 + }, + { + "epoch": 0.23, + "learning_rate": 1.938410170394871e-05, + "loss": 0.2486, + "step": 4935 + }, + { + "epoch": 0.23, + "learning_rate": 1.9383317918893926e-05, + "loss": 0.3903, + "step": 4940 + }, + { + "epoch": 0.23, + "learning_rate": 1.9382534133839136e-05, + "loss": 0.5263, + "step": 4945 + }, + { + "epoch": 0.23, + "learning_rate": 1.938175034878435e-05, + "loss": 0.6892, + "step": 4950 + }, + { + "epoch": 0.23, + "learning_rate": 1.9380966563729564e-05, + "loss": 0.439, + "step": 4955 + }, + { + "epoch": 0.23, + "learning_rate": 1.9380182778674778e-05, + "loss": 0.1741, + "step": 4960 + }, + { + "epoch": 0.23, + "learning_rate": 1.9379398993619992e-05, + "loss": 0.199, + "step": 4965 + }, + { + "epoch": 0.23, + "learning_rate": 1.9378615208565206e-05, + "loss": 0.246, + "step": 4970 + }, + { + "epoch": 0.23, + "learning_rate": 1.9377831423510416e-05, + "loss": 0.2149, + "step": 4975 + }, + { + "epoch": 0.23, + "learning_rate": 1.937704763845563e-05, + "loss": 0.3709, + "step": 4980 + }, + { + "epoch": 0.23, + "learning_rate": 1.9376263853400844e-05, + "loss": 0.3476, + "step": 4985 + }, + { + "epoch": 0.23, + "learning_rate": 1.9375480068346058e-05, + "loss": 0.41, + "step": 4990 + }, + { + "epoch": 0.23, + "learning_rate": 1.9374696283291272e-05, + "loss": 0.4483, + "step": 4995 + }, + { + "epoch": 0.23, + "learning_rate": 1.9373912498236486e-05, + "loss": 0.6013, + "step": 5000 + }, + { + "epoch": 0.23, + "learning_rate": 1.93731287131817e-05, + "loss": 0.3716, + "step": 5005 + }, + { + "epoch": 0.23, + "learning_rate": 1.937234492812691e-05, + "loss": 0.1351, + "step": 5010 + }, + { + "epoch": 0.23, + "learning_rate": 1.9371561143072128e-05, + "loss": 0.1643, + "step": 5015 + }, + { + "epoch": 0.23, + "learning_rate": 1.9370777358017338e-05, + "loss": 0.2067, + "step": 5020 + }, + { + "epoch": 0.23, + "learning_rate": 1.9369993572962552e-05, + "loss": 0.2506, + "step": 5025 + }, + { + "epoch": 0.23, + "learning_rate": 1.9369209787907766e-05, + "loss": 0.2104, + "step": 5030 + }, + { + "epoch": 0.23, + "learning_rate": 1.936842600285298e-05, + "loss": 0.2822, + "step": 5035 + }, + { + "epoch": 0.24, + "learning_rate": 1.9367642217798194e-05, + "loss": 0.3395, + "step": 5040 + }, + { + "epoch": 0.24, + "learning_rate": 1.9366858432743404e-05, + "loss": 0.5625, + "step": 5045 + }, + { + "epoch": 0.24, + "learning_rate": 1.936607464768862e-05, + "loss": 0.8041, + "step": 5050 + }, + { + "epoch": 0.24, + "learning_rate": 1.9365290862633832e-05, + "loss": 0.2843, + "step": 5055 + }, + { + "epoch": 0.24, + "learning_rate": 1.9364507077579046e-05, + "loss": 0.2228, + "step": 5060 + }, + { + "epoch": 0.24, + "learning_rate": 1.936372329252426e-05, + "loss": 0.2473, + "step": 5065 + }, + { + "epoch": 0.24, + "learning_rate": 1.9362939507469474e-05, + "loss": 0.2517, + "step": 5070 + }, + { + "epoch": 0.24, + "learning_rate": 1.9362155722414684e-05, + "loss": 0.6024, + "step": 5075 + }, + { + "epoch": 0.24, + "learning_rate": 1.9361371937359902e-05, + "loss": 0.4021, + "step": 5080 + }, + { + "epoch": 0.24, + "learning_rate": 1.9360588152305112e-05, + "loss": 0.4412, + "step": 5085 + }, + { + "epoch": 0.24, + "learning_rate": 1.9359804367250326e-05, + "loss": 0.4314, + "step": 5090 + }, + { + "epoch": 0.24, + "learning_rate": 1.935902058219554e-05, + "loss": 0.4803, + "step": 5095 + }, + { + "epoch": 0.24, + "learning_rate": 1.9358236797140754e-05, + "loss": 0.4986, + "step": 5100 + }, + { + "epoch": 0.24, + "learning_rate": 1.9357453012085968e-05, + "loss": 0.3644, + "step": 5105 + }, + { + "epoch": 0.24, + "learning_rate": 1.935666922703118e-05, + "loss": 0.2482, + "step": 5110 + }, + { + "epoch": 0.24, + "learning_rate": 1.9355885441976396e-05, + "loss": 0.1968, + "step": 5115 + }, + { + "epoch": 0.24, + "learning_rate": 1.9355101656921606e-05, + "loss": 0.2267, + "step": 5120 + }, + { + "epoch": 0.24, + "learning_rate": 1.935431787186682e-05, + "loss": 0.2209, + "step": 5125 + }, + { + "epoch": 0.24, + "learning_rate": 1.9353534086812034e-05, + "loss": 0.3899, + "step": 5130 + }, + { + "epoch": 0.24, + "learning_rate": 1.9352750301757248e-05, + "loss": 0.2757, + "step": 5135 + }, + { + "epoch": 0.24, + "learning_rate": 1.9351966516702462e-05, + "loss": 0.3713, + "step": 5140 + }, + { + "epoch": 0.24, + "learning_rate": 1.9351182731647676e-05, + "loss": 0.4712, + "step": 5145 + }, + { + "epoch": 0.24, + "learning_rate": 1.9350398946592886e-05, + "loss": 0.5116, + "step": 5150 + }, + { + "epoch": 0.24, + "learning_rate": 1.9349615161538104e-05, + "loss": 0.3821, + "step": 5155 + }, + { + "epoch": 0.24, + "learning_rate": 1.9348831376483314e-05, + "loss": 0.1595, + "step": 5160 + }, + { + "epoch": 0.24, + "learning_rate": 1.9348047591428528e-05, + "loss": 0.1296, + "step": 5165 + }, + { + "epoch": 0.24, + "learning_rate": 1.9347263806373742e-05, + "loss": 0.2284, + "step": 5170 + }, + { + "epoch": 0.24, + "learning_rate": 1.9346480021318952e-05, + "loss": 0.281, + "step": 5175 + }, + { + "epoch": 0.24, + "learning_rate": 1.934569623626417e-05, + "loss": 0.17, + "step": 5180 + }, + { + "epoch": 0.24, + "learning_rate": 1.934491245120938e-05, + "loss": 0.3331, + "step": 5185 + }, + { + "epoch": 0.24, + "learning_rate": 1.9344128666154594e-05, + "loss": 0.4022, + "step": 5190 + }, + { + "epoch": 0.24, + "learning_rate": 1.9343344881099808e-05, + "loss": 0.495, + "step": 5195 + }, + { + "epoch": 0.24, + "learning_rate": 1.9342561096045022e-05, + "loss": 0.8846, + "step": 5200 + }, + { + "epoch": 0.24, + "learning_rate": 1.9341777310990236e-05, + "loss": 0.3249, + "step": 5205 + }, + { + "epoch": 0.24, + "learning_rate": 1.934099352593545e-05, + "loss": 0.1457, + "step": 5210 + }, + { + "epoch": 0.24, + "learning_rate": 1.9340209740880664e-05, + "loss": 0.205, + "step": 5215 + }, + { + "epoch": 0.24, + "learning_rate": 1.9339425955825878e-05, + "loss": 0.2736, + "step": 5220 + }, + { + "epoch": 0.24, + "learning_rate": 1.9338642170771088e-05, + "loss": 0.2574, + "step": 5225 + }, + { + "epoch": 0.24, + "learning_rate": 1.9337858385716305e-05, + "loss": 0.2396, + "step": 5230 + }, + { + "epoch": 0.24, + "learning_rate": 1.9337074600661516e-05, + "loss": 0.3956, + "step": 5235 + }, + { + "epoch": 0.24, + "learning_rate": 1.933629081560673e-05, + "loss": 0.3899, + "step": 5240 + }, + { + "epoch": 0.24, + "learning_rate": 1.9335507030551944e-05, + "loss": 0.4603, + "step": 5245 + }, + { + "epoch": 0.24, + "learning_rate": 1.9334723245497154e-05, + "loss": 0.8332, + "step": 5250 + }, + { + "epoch": 0.25, + "learning_rate": 1.933393946044237e-05, + "loss": 0.3782, + "step": 5255 + }, + { + "epoch": 0.25, + "learning_rate": 1.9333155675387582e-05, + "loss": 0.1268, + "step": 5260 + }, + { + "epoch": 0.25, + "learning_rate": 1.9332371890332796e-05, + "loss": 0.2608, + "step": 5265 + }, + { + "epoch": 0.25, + "learning_rate": 1.933158810527801e-05, + "loss": 0.1392, + "step": 5270 + }, + { + "epoch": 0.25, + "learning_rate": 1.9330804320223224e-05, + "loss": 0.2242, + "step": 5275 + }, + { + "epoch": 0.25, + "learning_rate": 1.9330020535168438e-05, + "loss": 0.1908, + "step": 5280 + }, + { + "epoch": 0.25, + "learning_rate": 1.932923675011365e-05, + "loss": 0.2051, + "step": 5285 + }, + { + "epoch": 0.25, + "learning_rate": 1.9328452965058862e-05, + "loss": 0.5424, + "step": 5290 + }, + { + "epoch": 0.25, + "learning_rate": 1.932766918000408e-05, + "loss": 0.5415, + "step": 5295 + }, + { + "epoch": 0.25, + "learning_rate": 1.932688539494929e-05, + "loss": 0.6368, + "step": 5300 + }, + { + "epoch": 0.25, + "learning_rate": 1.9326101609894504e-05, + "loss": 0.4718, + "step": 5305 + }, + { + "epoch": 0.25, + "learning_rate": 1.9325317824839718e-05, + "loss": 0.1058, + "step": 5310 + }, + { + "epoch": 0.25, + "learning_rate": 1.932453403978493e-05, + "loss": 0.1814, + "step": 5315 + }, + { + "epoch": 0.25, + "learning_rate": 1.9323750254730146e-05, + "loss": 0.219, + "step": 5320 + }, + { + "epoch": 0.25, + "learning_rate": 1.9322966469675356e-05, + "loss": 0.2229, + "step": 5325 + }, + { + "epoch": 0.25, + "learning_rate": 1.9322182684620573e-05, + "loss": 0.3236, + "step": 5330 + }, + { + "epoch": 0.25, + "learning_rate": 1.9321398899565784e-05, + "loss": 0.2804, + "step": 5335 + }, + { + "epoch": 0.25, + "learning_rate": 1.9320615114510998e-05, + "loss": 0.3324, + "step": 5340 + }, + { + "epoch": 0.25, + "learning_rate": 1.931983132945621e-05, + "loss": 0.5195, + "step": 5345 + }, + { + "epoch": 0.25, + "learning_rate": 1.9319047544401426e-05, + "loss": 0.8297, + "step": 5350 + }, + { + "epoch": 0.25, + "learning_rate": 1.931826375934664e-05, + "loss": 0.3, + "step": 5355 + }, + { + "epoch": 0.25, + "learning_rate": 1.9317479974291853e-05, + "loss": 0.2239, + "step": 5360 + }, + { + "epoch": 0.25, + "learning_rate": 1.9316696189237064e-05, + "loss": 0.2179, + "step": 5365 + }, + { + "epoch": 0.25, + "learning_rate": 1.9315912404182278e-05, + "loss": 0.2458, + "step": 5370 + }, + { + "epoch": 0.25, + "learning_rate": 1.931512861912749e-05, + "loss": 0.2145, + "step": 5375 + }, + { + "epoch": 0.25, + "learning_rate": 1.9314344834072706e-05, + "loss": 0.1894, + "step": 5380 + }, + { + "epoch": 0.25, + "learning_rate": 1.931356104901792e-05, + "loss": 0.3247, + "step": 5385 + }, + { + "epoch": 0.25, + "learning_rate": 1.931277726396313e-05, + "loss": 0.4402, + "step": 5390 + }, + { + "epoch": 0.25, + "learning_rate": 1.9311993478908347e-05, + "loss": 0.5536, + "step": 5395 + }, + { + "epoch": 0.25, + "learning_rate": 1.9311209693853558e-05, + "loss": 0.7553, + "step": 5400 + }, + { + "epoch": 0.25, + "learning_rate": 1.9310425908798772e-05, + "loss": 0.345, + "step": 5405 + }, + { + "epoch": 0.25, + "learning_rate": 1.9309642123743986e-05, + "loss": 0.1617, + "step": 5410 + }, + { + "epoch": 0.25, + "learning_rate": 1.93088583386892e-05, + "loss": 0.2743, + "step": 5415 + }, + { + "epoch": 0.25, + "learning_rate": 1.9308074553634413e-05, + "loss": 0.2523, + "step": 5420 + }, + { + "epoch": 0.25, + "learning_rate": 1.9307290768579627e-05, + "loss": 0.2766, + "step": 5425 + }, + { + "epoch": 0.25, + "learning_rate": 1.930650698352484e-05, + "loss": 0.2555, + "step": 5430 + }, + { + "epoch": 0.25, + "learning_rate": 1.9305723198470052e-05, + "loss": 0.3894, + "step": 5435 + }, + { + "epoch": 0.25, + "learning_rate": 1.9304939413415266e-05, + "loss": 0.3815, + "step": 5440 + }, + { + "epoch": 0.25, + "learning_rate": 1.930415562836048e-05, + "loss": 0.5165, + "step": 5445 + }, + { + "epoch": 0.25, + "learning_rate": 1.9303371843305694e-05, + "loss": 0.8409, + "step": 5450 + }, + { + "epoch": 0.25, + "learning_rate": 1.9302588058250907e-05, + "loss": 0.3595, + "step": 5455 + }, + { + "epoch": 0.25, + "learning_rate": 1.930180427319612e-05, + "loss": 0.1876, + "step": 5460 + }, + { + "epoch": 0.26, + "learning_rate": 1.9301020488141332e-05, + "loss": 0.2553, + "step": 5465 + }, + { + "epoch": 0.26, + "learning_rate": 1.930023670308655e-05, + "loss": 0.3259, + "step": 5470 + }, + { + "epoch": 0.26, + "learning_rate": 1.929945291803176e-05, + "loss": 0.2538, + "step": 5475 + }, + { + "epoch": 0.26, + "learning_rate": 1.9298669132976974e-05, + "loss": 0.3327, + "step": 5480 + }, + { + "epoch": 0.26, + "learning_rate": 1.9297885347922187e-05, + "loss": 0.3046, + "step": 5485 + }, + { + "epoch": 0.26, + "learning_rate": 1.92971015628674e-05, + "loss": 0.4529, + "step": 5490 + }, + { + "epoch": 0.26, + "learning_rate": 1.9296317777812615e-05, + "loss": 0.4448, + "step": 5495 + }, + { + "epoch": 0.26, + "learning_rate": 1.9295533992757826e-05, + "loss": 0.6134, + "step": 5500 + }, + { + "epoch": 0.26, + "learning_rate": 1.929475020770304e-05, + "loss": 0.4037, + "step": 5505 + }, + { + "epoch": 0.26, + "learning_rate": 1.9293966422648254e-05, + "loss": 0.1626, + "step": 5510 + }, + { + "epoch": 0.26, + "learning_rate": 1.9293182637593467e-05, + "loss": 0.1955, + "step": 5515 + }, + { + "epoch": 0.26, + "learning_rate": 1.929239885253868e-05, + "loss": 0.244, + "step": 5520 + }, + { + "epoch": 0.26, + "learning_rate": 1.9291615067483895e-05, + "loss": 0.1823, + "step": 5525 + }, + { + "epoch": 0.26, + "learning_rate": 1.929083128242911e-05, + "loss": 0.3409, + "step": 5530 + }, + { + "epoch": 0.26, + "learning_rate": 1.9290047497374323e-05, + "loss": 0.3905, + "step": 5535 + }, + { + "epoch": 0.26, + "learning_rate": 1.9289263712319534e-05, + "loss": 0.4031, + "step": 5540 + }, + { + "epoch": 0.26, + "learning_rate": 1.928847992726475e-05, + "loss": 0.5521, + "step": 5545 + }, + { + "epoch": 0.26, + "learning_rate": 1.928769614220996e-05, + "loss": 0.6648, + "step": 5550 + }, + { + "epoch": 0.26, + "learning_rate": 1.9286912357155175e-05, + "loss": 0.2675, + "step": 5555 + }, + { + "epoch": 0.26, + "learning_rate": 1.928612857210039e-05, + "loss": 0.1622, + "step": 5560 + }, + { + "epoch": 0.26, + "learning_rate": 1.92853447870456e-05, + "loss": 0.2045, + "step": 5565 + }, + { + "epoch": 0.26, + "learning_rate": 1.9284561001990817e-05, + "loss": 0.2512, + "step": 5570 + }, + { + "epoch": 0.26, + "learning_rate": 1.9283777216936028e-05, + "loss": 0.2425, + "step": 5575 + }, + { + "epoch": 0.26, + "learning_rate": 1.928299343188124e-05, + "loss": 0.3474, + "step": 5580 + }, + { + "epoch": 0.26, + "learning_rate": 1.9282209646826455e-05, + "loss": 0.4243, + "step": 5585 + }, + { + "epoch": 0.26, + "learning_rate": 1.928142586177167e-05, + "loss": 0.4205, + "step": 5590 + }, + { + "epoch": 0.26, + "learning_rate": 1.9280642076716883e-05, + "loss": 0.4256, + "step": 5595 + }, + { + "epoch": 0.26, + "learning_rate": 1.9279858291662097e-05, + "loss": 0.74, + "step": 5600 + }, + { + "epoch": 0.26, + "learning_rate": 1.9279074506607308e-05, + "loss": 0.3487, + "step": 5605 + }, + { + "epoch": 0.26, + "learning_rate": 1.9278290721552525e-05, + "loss": 0.1986, + "step": 5610 + }, + { + "epoch": 0.26, + "learning_rate": 1.9277506936497735e-05, + "loss": 0.279, + "step": 5615 + }, + { + "epoch": 0.26, + "learning_rate": 1.927672315144295e-05, + "loss": 0.2964, + "step": 5620 + }, + { + "epoch": 0.26, + "learning_rate": 1.9275939366388163e-05, + "loss": 0.255, + "step": 5625 + }, + { + "epoch": 0.26, + "learning_rate": 1.9275155581333377e-05, + "loss": 0.3853, + "step": 5630 + }, + { + "epoch": 0.26, + "learning_rate": 1.927437179627859e-05, + "loss": 0.3548, + "step": 5635 + }, + { + "epoch": 0.26, + "learning_rate": 1.92735880112238e-05, + "loss": 0.3402, + "step": 5640 + }, + { + "epoch": 0.26, + "learning_rate": 1.927280422616902e-05, + "loss": 0.375, + "step": 5645 + }, + { + "epoch": 0.26, + "learning_rate": 1.927202044111423e-05, + "loss": 0.8663, + "step": 5650 + }, + { + "epoch": 0.26, + "learning_rate": 1.9271236656059443e-05, + "loss": 0.3404, + "step": 5655 + }, + { + "epoch": 0.26, + "learning_rate": 1.9270452871004657e-05, + "loss": 0.1193, + "step": 5660 + }, + { + "epoch": 0.26, + "learning_rate": 1.926966908594987e-05, + "loss": 0.2268, + "step": 5665 + }, + { + "epoch": 0.26, + "learning_rate": 1.9268885300895085e-05, + "loss": 0.3271, + "step": 5670 + }, + { + "epoch": 0.26, + "learning_rate": 1.92681015158403e-05, + "loss": 0.2239, + "step": 5675 + }, + { + "epoch": 0.27, + "learning_rate": 1.926731773078551e-05, + "loss": 0.363, + "step": 5680 + }, + { + "epoch": 0.27, + "learning_rate": 1.9266690702741683e-05, + "loss": 0.3885, + "step": 5685 + }, + { + "epoch": 0.27, + "learning_rate": 1.9265906917686894e-05, + "loss": 0.5457, + "step": 5690 + }, + { + "epoch": 0.27, + "learning_rate": 1.9265123132632108e-05, + "loss": 0.532, + "step": 5695 + }, + { + "epoch": 0.27, + "learning_rate": 1.926433934757732e-05, + "loss": 0.8407, + "step": 5700 + }, + { + "epoch": 0.27, + "learning_rate": 1.9263555562522536e-05, + "loss": 0.214, + "step": 5705 + }, + { + "epoch": 0.27, + "learning_rate": 1.926277177746775e-05, + "loss": 0.1542, + "step": 5710 + }, + { + "epoch": 0.27, + "learning_rate": 1.9261987992412963e-05, + "loss": 0.2797, + "step": 5715 + }, + { + "epoch": 0.27, + "learning_rate": 1.9261204207358174e-05, + "loss": 0.1692, + "step": 5720 + }, + { + "epoch": 0.27, + "learning_rate": 1.926042042230339e-05, + "loss": 0.3042, + "step": 5725 + }, + { + "epoch": 0.27, + "learning_rate": 1.92596366372486e-05, + "loss": 0.3473, + "step": 5730 + }, + { + "epoch": 0.27, + "learning_rate": 1.9258852852193816e-05, + "loss": 0.3281, + "step": 5735 + }, + { + "epoch": 0.27, + "learning_rate": 1.925806906713903e-05, + "loss": 0.3659, + "step": 5740 + }, + { + "epoch": 0.27, + "learning_rate": 1.9257285282084243e-05, + "loss": 0.4402, + "step": 5745 + }, + { + "epoch": 0.27, + "learning_rate": 1.9256501497029457e-05, + "loss": 0.7063, + "step": 5750 + }, + { + "epoch": 0.27, + "learning_rate": 1.9255717711974668e-05, + "loss": 0.3744, + "step": 5755 + }, + { + "epoch": 0.27, + "learning_rate": 1.9254933926919882e-05, + "loss": 0.1654, + "step": 5760 + }, + { + "epoch": 0.27, + "learning_rate": 1.9254150141865096e-05, + "loss": 0.1909, + "step": 5765 + }, + { + "epoch": 0.27, + "learning_rate": 1.925336635681031e-05, + "loss": 0.2026, + "step": 5770 + }, + { + "epoch": 0.27, + "learning_rate": 1.9252582571755523e-05, + "loss": 0.246, + "step": 5775 + }, + { + "epoch": 0.27, + "learning_rate": 1.9251798786700737e-05, + "loss": 0.2819, + "step": 5780 + }, + { + "epoch": 0.27, + "learning_rate": 1.925101500164595e-05, + "loss": 0.3336, + "step": 5785 + }, + { + "epoch": 0.27, + "learning_rate": 1.9250231216591165e-05, + "loss": 0.4281, + "step": 5790 + }, + { + "epoch": 0.27, + "learning_rate": 1.9249447431536376e-05, + "loss": 0.4699, + "step": 5795 + }, + { + "epoch": 0.27, + "learning_rate": 1.924866364648159e-05, + "loss": 0.7916, + "step": 5800 + }, + { + "epoch": 0.27, + "learning_rate": 1.9247879861426803e-05, + "loss": 0.3297, + "step": 5805 + }, + { + "epoch": 0.27, + "learning_rate": 1.9247096076372017e-05, + "loss": 0.1378, + "step": 5810 + }, + { + "epoch": 0.27, + "learning_rate": 1.924631229131723e-05, + "loss": 0.155, + "step": 5815 + }, + { + "epoch": 0.27, + "learning_rate": 1.9245528506262442e-05, + "loss": 0.2305, + "step": 5820 + }, + { + "epoch": 0.27, + "learning_rate": 1.924474472120766e-05, + "loss": 0.247, + "step": 5825 + }, + { + "epoch": 0.27, + "learning_rate": 1.924396093615287e-05, + "loss": 0.2662, + "step": 5830 + }, + { + "epoch": 0.27, + "learning_rate": 1.9243177151098084e-05, + "loss": 0.335, + "step": 5835 + }, + { + "epoch": 0.27, + "learning_rate": 1.9242393366043297e-05, + "loss": 0.3854, + "step": 5840 + }, + { + "epoch": 0.27, + "learning_rate": 1.924160958098851e-05, + "loss": 0.4929, + "step": 5845 + }, + { + "epoch": 0.27, + "learning_rate": 1.9240825795933725e-05, + "loss": 0.6641, + "step": 5850 + }, + { + "epoch": 0.27, + "learning_rate": 1.924004201087894e-05, + "loss": 0.3136, + "step": 5855 + }, + { + "epoch": 0.27, + "learning_rate": 1.923925822582415e-05, + "loss": 0.1783, + "step": 5860 + }, + { + "epoch": 0.27, + "learning_rate": 1.9238474440769367e-05, + "loss": 0.2408, + "step": 5865 + }, + { + "epoch": 0.27, + "learning_rate": 1.9237690655714577e-05, + "loss": 0.2645, + "step": 5870 + }, + { + "epoch": 0.27, + "learning_rate": 1.923690687065979e-05, + "loss": 0.2702, + "step": 5875 + }, + { + "epoch": 0.27, + "learning_rate": 1.9236123085605005e-05, + "loss": 0.2408, + "step": 5880 + }, + { + "epoch": 0.27, + "learning_rate": 1.923533930055022e-05, + "loss": 0.3547, + "step": 5885 + }, + { + "epoch": 0.27, + "learning_rate": 1.9234555515495433e-05, + "loss": 0.3429, + "step": 5890 + }, + { + "epoch": 0.28, + "learning_rate": 1.9233771730440644e-05, + "loss": 0.5108, + "step": 5895 + }, + { + "epoch": 0.28, + "learning_rate": 1.923298794538586e-05, + "loss": 0.9399, + "step": 5900 + }, + { + "epoch": 0.28, + "learning_rate": 1.923220416033107e-05, + "loss": 0.3939, + "step": 5905 + }, + { + "epoch": 0.28, + "learning_rate": 1.9231420375276285e-05, + "loss": 0.1552, + "step": 5910 + }, + { + "epoch": 0.28, + "learning_rate": 1.92306365902215e-05, + "loss": 0.2075, + "step": 5915 + }, + { + "epoch": 0.28, + "learning_rate": 1.9229852805166713e-05, + "loss": 0.2066, + "step": 5920 + }, + { + "epoch": 0.28, + "learning_rate": 1.9229069020111927e-05, + "loss": 0.3028, + "step": 5925 + }, + { + "epoch": 0.28, + "learning_rate": 1.922828523505714e-05, + "loss": 0.24, + "step": 5930 + }, + { + "epoch": 0.28, + "learning_rate": 1.922750145000235e-05, + "loss": 0.2975, + "step": 5935 + }, + { + "epoch": 0.28, + "learning_rate": 1.922671766494757e-05, + "loss": 0.3763, + "step": 5940 + }, + { + "epoch": 0.28, + "learning_rate": 1.922593387989278e-05, + "loss": 0.4842, + "step": 5945 + }, + { + "epoch": 0.28, + "learning_rate": 1.9225150094837993e-05, + "loss": 0.4962, + "step": 5950 + }, + { + "epoch": 0.28, + "learning_rate": 1.9224366309783207e-05, + "loss": 0.3506, + "step": 5955 + }, + { + "epoch": 0.28, + "learning_rate": 1.9223582524728418e-05, + "loss": 0.1437, + "step": 5960 + }, + { + "epoch": 0.28, + "learning_rate": 1.9222798739673635e-05, + "loss": 0.1921, + "step": 5965 + }, + { + "epoch": 0.28, + "learning_rate": 1.9222014954618845e-05, + "loss": 0.211, + "step": 5970 + }, + { + "epoch": 0.28, + "learning_rate": 1.922123116956406e-05, + "loss": 0.1829, + "step": 5975 + }, + { + "epoch": 0.28, + "learning_rate": 1.9220447384509273e-05, + "loss": 0.3135, + "step": 5980 + }, + { + "epoch": 0.28, + "learning_rate": 1.9219663599454487e-05, + "loss": 0.3635, + "step": 5985 + }, + { + "epoch": 0.28, + "learning_rate": 1.92188798143997e-05, + "loss": 0.3972, + "step": 5990 + }, + { + "epoch": 0.28, + "learning_rate": 1.9218096029344915e-05, + "loss": 0.4304, + "step": 5995 + }, + { + "epoch": 0.28, + "learning_rate": 1.921731224429013e-05, + "loss": 0.5761, + "step": 6000 + }, + { + "epoch": 0.28, + "learning_rate": 1.9216528459235343e-05, + "loss": 0.3202, + "step": 6005 + }, + { + "epoch": 0.28, + "learning_rate": 1.9215744674180553e-05, + "loss": 0.132, + "step": 6010 + }, + { + "epoch": 0.28, + "learning_rate": 1.9214960889125767e-05, + "loss": 0.248, + "step": 6015 + }, + { + "epoch": 0.28, + "learning_rate": 1.921417710407098e-05, + "loss": 0.2491, + "step": 6020 + }, + { + "epoch": 0.28, + "learning_rate": 1.9213393319016195e-05, + "loss": 0.1984, + "step": 6025 + }, + { + "epoch": 0.28, + "learning_rate": 1.921260953396141e-05, + "loss": 0.4062, + "step": 6030 + }, + { + "epoch": 0.28, + "learning_rate": 1.921182574890662e-05, + "loss": 0.381, + "step": 6035 + }, + { + "epoch": 0.28, + "learning_rate": 1.9211041963851837e-05, + "loss": 0.3894, + "step": 6040 + }, + { + "epoch": 0.28, + "learning_rate": 1.9210258178797047e-05, + "loss": 0.5295, + "step": 6045 + }, + { + "epoch": 0.28, + "learning_rate": 1.920947439374226e-05, + "loss": 0.5582, + "step": 6050 + }, + { + "epoch": 0.28, + "learning_rate": 1.9208690608687475e-05, + "loss": 0.3742, + "step": 6055 + }, + { + "epoch": 0.28, + "learning_rate": 1.920790682363269e-05, + "loss": 0.1369, + "step": 6060 + }, + { + "epoch": 0.28, + "learning_rate": 1.9207123038577903e-05, + "loss": 0.191, + "step": 6065 + }, + { + "epoch": 0.28, + "learning_rate": 1.9206339253523117e-05, + "loss": 0.2741, + "step": 6070 + }, + { + "epoch": 0.28, + "learning_rate": 1.9205555468468327e-05, + "loss": 0.2237, + "step": 6075 + }, + { + "epoch": 0.28, + "learning_rate": 1.920477168341354e-05, + "loss": 0.2747, + "step": 6080 + }, + { + "epoch": 0.28, + "learning_rate": 1.9203987898358755e-05, + "loss": 0.3203, + "step": 6085 + }, + { + "epoch": 0.28, + "learning_rate": 1.920320411330397e-05, + "loss": 0.3738, + "step": 6090 + }, + { + "epoch": 0.28, + "learning_rate": 1.9202420328249183e-05, + "loss": 0.4449, + "step": 6095 + }, + { + "epoch": 0.28, + "learning_rate": 1.9201636543194397e-05, + "loss": 0.5899, + "step": 6100 + }, + { + "epoch": 0.28, + "learning_rate": 1.920085275813961e-05, + "loss": 0.2703, + "step": 6105 + }, + { + "epoch": 0.29, + "learning_rate": 1.920006897308482e-05, + "loss": 0.2, + "step": 6110 + }, + { + "epoch": 0.29, + "learning_rate": 1.9199285188030035e-05, + "loss": 0.1694, + "step": 6115 + }, + { + "epoch": 0.29, + "learning_rate": 1.919850140297525e-05, + "loss": 0.211, + "step": 6120 + }, + { + "epoch": 0.29, + "learning_rate": 1.9197717617920463e-05, + "loss": 0.2194, + "step": 6125 + }, + { + "epoch": 0.29, + "learning_rate": 1.9196933832865677e-05, + "loss": 0.2725, + "step": 6130 + }, + { + "epoch": 0.29, + "learning_rate": 1.919615004781089e-05, + "loss": 0.3563, + "step": 6135 + }, + { + "epoch": 0.29, + "learning_rate": 1.9195366262756105e-05, + "loss": 0.3059, + "step": 6140 + }, + { + "epoch": 0.29, + "learning_rate": 1.9194582477701315e-05, + "loss": 0.4064, + "step": 6145 + }, + { + "epoch": 0.29, + "learning_rate": 1.919379869264653e-05, + "loss": 0.6607, + "step": 6150 + }, + { + "epoch": 0.29, + "learning_rate": 1.9193014907591743e-05, + "loss": 0.3117, + "step": 6155 + }, + { + "epoch": 0.29, + "learning_rate": 1.9192231122536957e-05, + "loss": 0.1045, + "step": 6160 + }, + { + "epoch": 0.29, + "learning_rate": 1.919144733748217e-05, + "loss": 0.1874, + "step": 6165 + }, + { + "epoch": 0.29, + "learning_rate": 1.9190663552427385e-05, + "loss": 0.3021, + "step": 6170 + }, + { + "epoch": 0.29, + "learning_rate": 1.9189879767372595e-05, + "loss": 0.211, + "step": 6175 + }, + { + "epoch": 0.29, + "learning_rate": 1.9189095982317813e-05, + "loss": 0.265, + "step": 6180 + }, + { + "epoch": 0.29, + "learning_rate": 1.9188312197263023e-05, + "loss": 0.2312, + "step": 6185 + }, + { + "epoch": 0.29, + "learning_rate": 1.9187528412208237e-05, + "loss": 0.3051, + "step": 6190 + }, + { + "epoch": 0.29, + "learning_rate": 1.918674462715345e-05, + "loss": 0.5762, + "step": 6195 + }, + { + "epoch": 0.29, + "learning_rate": 1.9185960842098665e-05, + "loss": 0.6396, + "step": 6200 + }, + { + "epoch": 0.29, + "learning_rate": 1.918517705704388e-05, + "loss": 0.3087, + "step": 6205 + }, + { + "epoch": 0.29, + "learning_rate": 1.918439327198909e-05, + "loss": 0.1117, + "step": 6210 + }, + { + "epoch": 0.29, + "learning_rate": 1.9183609486934306e-05, + "loss": 0.1206, + "step": 6215 + }, + { + "epoch": 0.29, + "learning_rate": 1.9182825701879517e-05, + "loss": 0.177, + "step": 6220 + }, + { + "epoch": 0.29, + "learning_rate": 1.918204191682473e-05, + "loss": 0.2447, + "step": 6225 + }, + { + "epoch": 0.29, + "learning_rate": 1.9181258131769945e-05, + "loss": 0.2349, + "step": 6230 + }, + { + "epoch": 0.29, + "learning_rate": 1.918047434671516e-05, + "loss": 0.2984, + "step": 6235 + }, + { + "epoch": 0.29, + "learning_rate": 1.9179690561660373e-05, + "loss": 0.2954, + "step": 6240 + }, + { + "epoch": 0.29, + "learning_rate": 1.9178906776605587e-05, + "loss": 0.3903, + "step": 6245 + }, + { + "epoch": 0.29, + "learning_rate": 1.9178122991550797e-05, + "loss": 0.6694, + "step": 6250 + }, + { + "epoch": 0.29, + "learning_rate": 1.9177339206496014e-05, + "loss": 0.2947, + "step": 6255 + }, + { + "epoch": 0.29, + "learning_rate": 1.9176555421441225e-05, + "loss": 0.1219, + "step": 6260 + }, + { + "epoch": 0.29, + "learning_rate": 1.917577163638644e-05, + "loss": 0.2013, + "step": 6265 + }, + { + "epoch": 0.29, + "learning_rate": 1.9174987851331653e-05, + "loss": 0.1924, + "step": 6270 + }, + { + "epoch": 0.29, + "learning_rate": 1.9174204066276863e-05, + "loss": 0.29, + "step": 6275 + }, + { + "epoch": 0.29, + "learning_rate": 1.917342028122208e-05, + "loss": 0.3052, + "step": 6280 + }, + { + "epoch": 0.29, + "learning_rate": 1.917263649616729e-05, + "loss": 0.3302, + "step": 6285 + }, + { + "epoch": 0.29, + "learning_rate": 1.9171852711112505e-05, + "loss": 0.4615, + "step": 6290 + }, + { + "epoch": 0.29, + "learning_rate": 1.917106892605772e-05, + "loss": 0.4105, + "step": 6295 + }, + { + "epoch": 0.29, + "learning_rate": 1.9170285141002933e-05, + "loss": 0.6715, + "step": 6300 + }, + { + "epoch": 0.29, + "learning_rate": 1.9169501355948147e-05, + "loss": 0.3003, + "step": 6305 + }, + { + "epoch": 0.29, + "learning_rate": 1.916871757089336e-05, + "loss": 0.2266, + "step": 6310 + }, + { + "epoch": 0.29, + "learning_rate": 1.9167933785838574e-05, + "loss": 0.1107, + "step": 6315 + }, + { + "epoch": 0.29, + "learning_rate": 1.916715000078379e-05, + "loss": 0.2179, + "step": 6320 + }, + { + "epoch": 0.3, + "learning_rate": 1.9166366215729e-05, + "loss": 0.1912, + "step": 6325 + }, + { + "epoch": 0.3, + "learning_rate": 1.9165582430674213e-05, + "loss": 0.241, + "step": 6330 + }, + { + "epoch": 0.3, + "learning_rate": 1.9164798645619427e-05, + "loss": 0.336, + "step": 6335 + }, + { + "epoch": 0.3, + "learning_rate": 1.916401486056464e-05, + "loss": 0.3382, + "step": 6340 + }, + { + "epoch": 0.3, + "learning_rate": 1.9163231075509854e-05, + "loss": 0.5897, + "step": 6345 + }, + { + "epoch": 0.3, + "learning_rate": 1.9162447290455065e-05, + "loss": 0.7494, + "step": 6350 + }, + { + "epoch": 0.3, + "learning_rate": 1.9161663505400282e-05, + "loss": 0.3499, + "step": 6355 + }, + { + "epoch": 0.3, + "learning_rate": 1.9160879720345493e-05, + "loss": 0.1544, + "step": 6360 + }, + { + "epoch": 0.3, + "learning_rate": 1.9160095935290707e-05, + "loss": 0.1487, + "step": 6365 + }, + { + "epoch": 0.3, + "learning_rate": 1.915931215023592e-05, + "loss": 0.2265, + "step": 6370 + }, + { + "epoch": 0.3, + "learning_rate": 1.9158528365181135e-05, + "loss": 0.2467, + "step": 6375 + }, + { + "epoch": 0.3, + "learning_rate": 1.915774458012635e-05, + "loss": 0.2705, + "step": 6380 + }, + { + "epoch": 0.3, + "learning_rate": 1.9156960795071562e-05, + "loss": 0.4046, + "step": 6385 + }, + { + "epoch": 0.3, + "learning_rate": 1.9156177010016773e-05, + "loss": 0.4616, + "step": 6390 + }, + { + "epoch": 0.3, + "learning_rate": 1.915539322496199e-05, + "loss": 0.5142, + "step": 6395 + }, + { + "epoch": 0.3, + "learning_rate": 1.91546094399072e-05, + "loss": 0.6709, + "step": 6400 + }, + { + "epoch": 0.3, + "learning_rate": 1.9153825654852415e-05, + "loss": 0.3144, + "step": 6405 + }, + { + "epoch": 0.3, + "learning_rate": 1.915304186979763e-05, + "loss": 0.1743, + "step": 6410 + }, + { + "epoch": 0.3, + "learning_rate": 1.9152258084742842e-05, + "loss": 0.2046, + "step": 6415 + }, + { + "epoch": 0.3, + "learning_rate": 1.9151474299688056e-05, + "loss": 0.1748, + "step": 6420 + }, + { + "epoch": 0.3, + "learning_rate": 1.9150690514633267e-05, + "loss": 0.347, + "step": 6425 + }, + { + "epoch": 0.3, + "learning_rate": 1.9149906729578484e-05, + "loss": 0.4057, + "step": 6430 + }, + { + "epoch": 0.3, + "learning_rate": 1.9149122944523695e-05, + "loss": 0.3375, + "step": 6435 + }, + { + "epoch": 0.3, + "learning_rate": 1.914833915946891e-05, + "loss": 0.3648, + "step": 6440 + }, + { + "epoch": 0.3, + "learning_rate": 1.9147555374414122e-05, + "loss": 0.3829, + "step": 6445 + }, + { + "epoch": 0.3, + "learning_rate": 1.9146771589359336e-05, + "loss": 0.5695, + "step": 6450 + }, + { + "epoch": 0.3, + "learning_rate": 1.914598780430455e-05, + "loss": 0.238, + "step": 6455 + }, + { + "epoch": 0.3, + "learning_rate": 1.9145204019249764e-05, + "loss": 0.1316, + "step": 6460 + }, + { + "epoch": 0.3, + "learning_rate": 1.9144420234194975e-05, + "loss": 0.2054, + "step": 6465 + }, + { + "epoch": 0.3, + "learning_rate": 1.914363644914019e-05, + "loss": 0.2046, + "step": 6470 + }, + { + "epoch": 0.3, + "learning_rate": 1.9142852664085402e-05, + "loss": 0.2385, + "step": 6475 + }, + { + "epoch": 0.3, + "learning_rate": 1.9142068879030616e-05, + "loss": 0.2931, + "step": 6480 + }, + { + "epoch": 0.3, + "learning_rate": 1.914128509397583e-05, + "loss": 0.3218, + "step": 6485 + }, + { + "epoch": 0.3, + "learning_rate": 1.914050130892104e-05, + "loss": 0.3059, + "step": 6490 + }, + { + "epoch": 0.3, + "learning_rate": 1.9139717523866258e-05, + "loss": 0.4008, + "step": 6495 + }, + { + "epoch": 0.3, + "learning_rate": 1.913893373881147e-05, + "loss": 0.6874, + "step": 6500 + }, + { + "epoch": 0.3, + "learning_rate": 1.9138149953756683e-05, + "loss": 0.3523, + "step": 6505 + }, + { + "epoch": 0.3, + "learning_rate": 1.9137366168701896e-05, + "loss": 0.1623, + "step": 6510 + }, + { + "epoch": 0.3, + "learning_rate": 1.913658238364711e-05, + "loss": 0.1548, + "step": 6515 + }, + { + "epoch": 0.3, + "learning_rate": 1.9135798598592324e-05, + "loss": 0.2571, + "step": 6520 + }, + { + "epoch": 0.3, + "learning_rate": 1.9135014813537538e-05, + "loss": 0.2677, + "step": 6525 + }, + { + "epoch": 0.3, + "learning_rate": 1.9134231028482752e-05, + "loss": 0.2672, + "step": 6530 + }, + { + "epoch": 0.3, + "learning_rate": 1.9133447243427963e-05, + "loss": 0.1512, + "step": 6535 + }, + { + "epoch": 0.31, + "learning_rate": 1.9132663458373176e-05, + "loss": 0.434, + "step": 6540 + }, + { + "epoch": 0.31, + "learning_rate": 1.913187967331839e-05, + "loss": 0.4751, + "step": 6545 + }, + { + "epoch": 0.31, + "learning_rate": 1.9131095888263604e-05, + "loss": 0.9035, + "step": 6550 + }, + { + "epoch": 0.31, + "learning_rate": 1.9130312103208818e-05, + "loss": 0.2479, + "step": 6555 + }, + { + "epoch": 0.31, + "learning_rate": 1.9129528318154032e-05, + "loss": 0.1588, + "step": 6560 + }, + { + "epoch": 0.31, + "learning_rate": 1.9128744533099243e-05, + "loss": 0.1603, + "step": 6565 + }, + { + "epoch": 0.31, + "learning_rate": 1.912796074804446e-05, + "loss": 0.2272, + "step": 6570 + }, + { + "epoch": 0.31, + "learning_rate": 1.912717696298967e-05, + "loss": 0.2211, + "step": 6575 + }, + { + "epoch": 0.31, + "learning_rate": 1.9126393177934884e-05, + "loss": 0.2354, + "step": 6580 + }, + { + "epoch": 0.31, + "learning_rate": 1.9125609392880098e-05, + "loss": 0.3462, + "step": 6585 + }, + { + "epoch": 0.31, + "learning_rate": 1.9124825607825312e-05, + "loss": 0.3478, + "step": 6590 + }, + { + "epoch": 0.31, + "learning_rate": 1.9124041822770526e-05, + "loss": 0.434, + "step": 6595 + }, + { + "epoch": 0.31, + "learning_rate": 1.9123258037715737e-05, + "loss": 0.6833, + "step": 6600 + }, + { + "epoch": 0.31, + "learning_rate": 1.912247425266095e-05, + "loss": 0.3255, + "step": 6605 + }, + { + "epoch": 0.31, + "learning_rate": 1.9121690467606164e-05, + "loss": 0.2105, + "step": 6610 + }, + { + "epoch": 0.31, + "learning_rate": 1.9120906682551378e-05, + "loss": 0.248, + "step": 6615 + }, + { + "epoch": 0.31, + "learning_rate": 1.9120122897496592e-05, + "loss": 0.2495, + "step": 6620 + }, + { + "epoch": 0.31, + "learning_rate": 1.9119339112441806e-05, + "loss": 0.2071, + "step": 6625 + }, + { + "epoch": 0.31, + "learning_rate": 1.911855532738702e-05, + "loss": 0.204, + "step": 6630 + }, + { + "epoch": 0.31, + "learning_rate": 1.9117771542332234e-05, + "loss": 0.4248, + "step": 6635 + }, + { + "epoch": 0.31, + "learning_rate": 1.9116987757277444e-05, + "loss": 0.3261, + "step": 6640 + }, + { + "epoch": 0.31, + "learning_rate": 1.911620397222266e-05, + "loss": 0.425, + "step": 6645 + }, + { + "epoch": 0.31, + "learning_rate": 1.9115420187167872e-05, + "loss": 0.658, + "step": 6650 + }, + { + "epoch": 0.31, + "learning_rate": 1.9114636402113086e-05, + "loss": 0.2923, + "step": 6655 + }, + { + "epoch": 0.31, + "learning_rate": 1.91138526170583e-05, + "loss": 0.1128, + "step": 6660 + }, + { + "epoch": 0.31, + "learning_rate": 1.911306883200351e-05, + "loss": 0.1456, + "step": 6665 + }, + { + "epoch": 0.31, + "learning_rate": 1.9112285046948728e-05, + "loss": 0.2254, + "step": 6670 + }, + { + "epoch": 0.31, + "learning_rate": 1.911150126189394e-05, + "loss": 0.2541, + "step": 6675 + }, + { + "epoch": 0.31, + "learning_rate": 1.9110717476839152e-05, + "loss": 0.3436, + "step": 6680 + }, + { + "epoch": 0.31, + "learning_rate": 1.9109933691784366e-05, + "loss": 0.3193, + "step": 6685 + }, + { + "epoch": 0.31, + "learning_rate": 1.910914990672958e-05, + "loss": 0.309, + "step": 6690 + }, + { + "epoch": 0.31, + "learning_rate": 1.9108366121674794e-05, + "loss": 0.504, + "step": 6695 + }, + { + "epoch": 0.31, + "learning_rate": 1.9107582336620008e-05, + "loss": 0.6894, + "step": 6700 + }, + { + "epoch": 0.31, + "learning_rate": 1.910679855156522e-05, + "loss": 0.3584, + "step": 6705 + }, + { + "epoch": 0.31, + "learning_rate": 1.9106014766510436e-05, + "loss": 0.1103, + "step": 6710 + }, + { + "epoch": 0.31, + "learning_rate": 1.9105230981455646e-05, + "loss": 0.2487, + "step": 6715 + }, + { + "epoch": 0.31, + "learning_rate": 1.910444719640086e-05, + "loss": 0.1941, + "step": 6720 + }, + { + "epoch": 0.31, + "learning_rate": 1.9103663411346074e-05, + "loss": 0.2426, + "step": 6725 + }, + { + "epoch": 0.31, + "learning_rate": 1.9102879626291288e-05, + "loss": 0.2712, + "step": 6730 + }, + { + "epoch": 0.31, + "learning_rate": 1.9102095841236502e-05, + "loss": 0.3572, + "step": 6735 + }, + { + "epoch": 0.31, + "learning_rate": 1.9101312056181712e-05, + "loss": 0.4285, + "step": 6740 + }, + { + "epoch": 0.31, + "learning_rate": 1.910052827112693e-05, + "loss": 0.5303, + "step": 6745 + }, + { + "epoch": 0.31, + "learning_rate": 1.909974448607214e-05, + "loss": 0.4896, + "step": 6750 + }, + { + "epoch": 0.32, + "learning_rate": 1.9098960701017354e-05, + "loss": 0.2817, + "step": 6755 + }, + { + "epoch": 0.32, + "learning_rate": 1.9098176915962568e-05, + "loss": 0.1629, + "step": 6760 + }, + { + "epoch": 0.32, + "learning_rate": 1.9097393130907782e-05, + "loss": 0.2024, + "step": 6765 + }, + { + "epoch": 0.32, + "learning_rate": 1.9096609345852996e-05, + "loss": 0.2163, + "step": 6770 + }, + { + "epoch": 0.32, + "learning_rate": 1.909582556079821e-05, + "loss": 0.2022, + "step": 6775 + }, + { + "epoch": 0.32, + "learning_rate": 1.909504177574342e-05, + "loss": 0.3167, + "step": 6780 + }, + { + "epoch": 0.32, + "learning_rate": 1.9094257990688638e-05, + "loss": 0.4345, + "step": 6785 + }, + { + "epoch": 0.32, + "learning_rate": 1.9093474205633848e-05, + "loss": 0.3893, + "step": 6790 + }, + { + "epoch": 0.32, + "learning_rate": 1.9092690420579062e-05, + "loss": 0.3411, + "step": 6795 + }, + { + "epoch": 0.32, + "learning_rate": 1.9091906635524276e-05, + "loss": 0.6765, + "step": 6800 + }, + { + "epoch": 0.32, + "learning_rate": 1.9091122850469486e-05, + "loss": 0.3742, + "step": 6805 + }, + { + "epoch": 0.32, + "learning_rate": 1.9090339065414704e-05, + "loss": 0.1481, + "step": 6810 + }, + { + "epoch": 0.32, + "learning_rate": 1.9089555280359914e-05, + "loss": 0.1375, + "step": 6815 + }, + { + "epoch": 0.32, + "learning_rate": 1.9088771495305128e-05, + "loss": 0.2551, + "step": 6820 + }, + { + "epoch": 0.32, + "learning_rate": 1.9087987710250342e-05, + "loss": 0.2335, + "step": 6825 + }, + { + "epoch": 0.32, + "learning_rate": 1.9087203925195556e-05, + "loss": 0.2022, + "step": 6830 + }, + { + "epoch": 0.32, + "learning_rate": 1.908642014014077e-05, + "loss": 0.272, + "step": 6835 + }, + { + "epoch": 0.32, + "learning_rate": 1.9085636355085984e-05, + "loss": 0.3548, + "step": 6840 + }, + { + "epoch": 0.32, + "learning_rate": 1.9084852570031198e-05, + "loss": 0.4034, + "step": 6845 + }, + { + "epoch": 0.32, + "learning_rate": 1.908406878497641e-05, + "loss": 0.7186, + "step": 6850 + }, + { + "epoch": 0.32, + "learning_rate": 1.9083284999921622e-05, + "loss": 0.3299, + "step": 6855 + }, + { + "epoch": 0.32, + "learning_rate": 1.9082501214866836e-05, + "loss": 0.1502, + "step": 6860 + }, + { + "epoch": 0.32, + "learning_rate": 1.908171742981205e-05, + "loss": 0.1961, + "step": 6865 + }, + { + "epoch": 0.32, + "learning_rate": 1.9080933644757264e-05, + "loss": 0.1746, + "step": 6870 + }, + { + "epoch": 0.32, + "learning_rate": 1.9080149859702478e-05, + "loss": 0.2177, + "step": 6875 + }, + { + "epoch": 0.32, + "learning_rate": 1.9079366074647688e-05, + "loss": 0.2479, + "step": 6880 + }, + { + "epoch": 0.32, + "learning_rate": 1.9078582289592905e-05, + "loss": 0.3497, + "step": 6885 + }, + { + "epoch": 0.32, + "learning_rate": 1.9077798504538116e-05, + "loss": 0.2857, + "step": 6890 + }, + { + "epoch": 0.32, + "learning_rate": 1.907701471948333e-05, + "loss": 0.4186, + "step": 6895 + }, + { + "epoch": 0.32, + "learning_rate": 1.9076230934428544e-05, + "loss": 0.5697, + "step": 6900 + }, + { + "epoch": 0.32, + "learning_rate": 1.9075447149373758e-05, + "loss": 0.2705, + "step": 6905 + }, + { + "epoch": 0.32, + "learning_rate": 1.907466336431897e-05, + "loss": 0.1694, + "step": 6910 + }, + { + "epoch": 0.32, + "learning_rate": 1.9073879579264186e-05, + "loss": 0.2024, + "step": 6915 + }, + { + "epoch": 0.32, + "learning_rate": 1.9073095794209396e-05, + "loss": 0.1911, + "step": 6920 + }, + { + "epoch": 0.32, + "learning_rate": 1.907231200915461e-05, + "loss": 0.324, + "step": 6925 + }, + { + "epoch": 0.32, + "learning_rate": 1.9071528224099824e-05, + "loss": 0.2166, + "step": 6930 + }, + { + "epoch": 0.32, + "learning_rate": 1.9070744439045038e-05, + "loss": 0.2728, + "step": 6935 + }, + { + "epoch": 0.32, + "learning_rate": 1.906996065399025e-05, + "loss": 0.4889, + "step": 6940 + }, + { + "epoch": 0.32, + "learning_rate": 1.9069176868935466e-05, + "loss": 0.3559, + "step": 6945 + }, + { + "epoch": 0.32, + "learning_rate": 1.906839308388068e-05, + "loss": 0.6589, + "step": 6950 + }, + { + "epoch": 0.32, + "learning_rate": 1.906760929882589e-05, + "loss": 0.3064, + "step": 6955 + }, + { + "epoch": 0.32, + "learning_rate": 1.9066825513771104e-05, + "loss": 0.1575, + "step": 6960 + }, + { + "epoch": 0.32, + "learning_rate": 1.9066041728716318e-05, + "loss": 0.2603, + "step": 6965 + }, + { + "epoch": 0.33, + "learning_rate": 1.9065257943661532e-05, + "loss": 0.2049, + "step": 6970 + }, + { + "epoch": 0.33, + "learning_rate": 1.9064474158606746e-05, + "loss": 0.1928, + "step": 6975 + }, + { + "epoch": 0.33, + "learning_rate": 1.906369037355196e-05, + "loss": 0.2743, + "step": 6980 + }, + { + "epoch": 0.33, + "learning_rate": 1.9062906588497173e-05, + "loss": 0.3078, + "step": 6985 + }, + { + "epoch": 0.33, + "learning_rate": 1.9062122803442384e-05, + "loss": 0.3558, + "step": 6990 + }, + { + "epoch": 0.33, + "learning_rate": 1.9061339018387598e-05, + "loss": 0.5479, + "step": 6995 + }, + { + "epoch": 0.33, + "learning_rate": 1.9060555233332812e-05, + "loss": 0.7941, + "step": 7000 + }, + { + "epoch": 0.33, + "learning_rate": 1.9059771448278026e-05, + "loss": 0.2665, + "step": 7005 + }, + { + "epoch": 0.33, + "learning_rate": 1.905898766322324e-05, + "loss": 0.1489, + "step": 7010 + }, + { + "epoch": 0.33, + "learning_rate": 1.9058203878168453e-05, + "loss": 0.1762, + "step": 7015 + }, + { + "epoch": 0.33, + "learning_rate": 1.9057420093113664e-05, + "loss": 0.2242, + "step": 7020 + }, + { + "epoch": 0.33, + "learning_rate": 1.905663630805888e-05, + "loss": 0.2587, + "step": 7025 + }, + { + "epoch": 0.33, + "learning_rate": 1.9055852523004092e-05, + "loss": 0.2101, + "step": 7030 + }, + { + "epoch": 0.33, + "learning_rate": 1.9055068737949306e-05, + "loss": 0.2533, + "step": 7035 + }, + { + "epoch": 0.33, + "learning_rate": 1.905428495289452e-05, + "loss": 0.4377, + "step": 7040 + }, + { + "epoch": 0.33, + "learning_rate": 1.9053501167839734e-05, + "loss": 0.441, + "step": 7045 + }, + { + "epoch": 0.33, + "learning_rate": 1.9052717382784947e-05, + "loss": 0.7094, + "step": 7050 + }, + { + "epoch": 0.33, + "learning_rate": 1.9051933597730158e-05, + "loss": 0.1968, + "step": 7055 + }, + { + "epoch": 0.33, + "learning_rate": 1.9051149812675375e-05, + "loss": 0.1635, + "step": 7060 + }, + { + "epoch": 0.33, + "learning_rate": 1.9050366027620586e-05, + "loss": 0.1534, + "step": 7065 + }, + { + "epoch": 0.33, + "learning_rate": 1.90495822425658e-05, + "loss": 0.1294, + "step": 7070 + }, + { + "epoch": 0.33, + "learning_rate": 1.9048798457511014e-05, + "loss": 0.1829, + "step": 7075 + }, + { + "epoch": 0.33, + "learning_rate": 1.9048014672456227e-05, + "loss": 0.172, + "step": 7080 + }, + { + "epoch": 0.33, + "learning_rate": 1.904723088740144e-05, + "loss": 0.2696, + "step": 7085 + }, + { + "epoch": 0.33, + "learning_rate": 1.9046447102346655e-05, + "loss": 0.3744, + "step": 7090 + }, + { + "epoch": 0.33, + "learning_rate": 1.9045663317291866e-05, + "loss": 0.4525, + "step": 7095 + }, + { + "epoch": 0.33, + "learning_rate": 1.9044879532237083e-05, + "loss": 0.5999, + "step": 7100 + }, + { + "epoch": 0.33, + "learning_rate": 1.9044095747182294e-05, + "loss": 0.2546, + "step": 7105 + }, + { + "epoch": 0.33, + "learning_rate": 1.9043311962127508e-05, + "loss": 0.1782, + "step": 7110 + }, + { + "epoch": 0.33, + "learning_rate": 1.904252817707272e-05, + "loss": 0.217, + "step": 7115 + }, + { + "epoch": 0.33, + "learning_rate": 1.9041744392017932e-05, + "loss": 0.1918, + "step": 7120 + }, + { + "epoch": 0.33, + "learning_rate": 1.904096060696315e-05, + "loss": 0.2431, + "step": 7125 + }, + { + "epoch": 0.33, + "learning_rate": 1.904017682190836e-05, + "loss": 0.3174, + "step": 7130 + }, + { + "epoch": 0.33, + "learning_rate": 1.9039393036853574e-05, + "loss": 0.3594, + "step": 7135 + }, + { + "epoch": 0.33, + "learning_rate": 1.9038609251798788e-05, + "loss": 0.3965, + "step": 7140 + }, + { + "epoch": 0.33, + "learning_rate": 1.9037825466744e-05, + "loss": 0.2913, + "step": 7145 + }, + { + "epoch": 0.33, + "learning_rate": 1.9037041681689215e-05, + "loss": 0.7954, + "step": 7150 + }, + { + "epoch": 0.33, + "learning_rate": 1.903625789663443e-05, + "loss": 0.3234, + "step": 7155 + }, + { + "epoch": 0.33, + "learning_rate": 1.9035474111579643e-05, + "loss": 0.1657, + "step": 7160 + }, + { + "epoch": 0.33, + "learning_rate": 1.9034690326524857e-05, + "loss": 0.0944, + "step": 7165 + }, + { + "epoch": 0.33, + "learning_rate": 1.9033906541470068e-05, + "loss": 0.2358, + "step": 7170 + }, + { + "epoch": 0.33, + "learning_rate": 1.903312275641528e-05, + "loss": 0.2233, + "step": 7175 + }, + { + "epoch": 0.34, + "learning_rate": 1.9032338971360495e-05, + "loss": 0.2742, + "step": 7180 + }, + { + "epoch": 0.34, + "learning_rate": 1.903155518630571e-05, + "loss": 0.3522, + "step": 7185 + }, + { + "epoch": 0.34, + "learning_rate": 1.9030771401250923e-05, + "loss": 0.2871, + "step": 7190 + }, + { + "epoch": 0.34, + "learning_rate": 1.9029987616196134e-05, + "loss": 0.5827, + "step": 7195 + }, + { + "epoch": 0.34, + "learning_rate": 1.902920383114135e-05, + "loss": 0.9008, + "step": 7200 + }, + { + "epoch": 0.34, + "learning_rate": 1.902842004608656e-05, + "loss": 0.3112, + "step": 7205 + }, + { + "epoch": 0.34, + "learning_rate": 1.9027636261031775e-05, + "loss": 0.1972, + "step": 7210 + }, + { + "epoch": 0.34, + "learning_rate": 1.902685247597699e-05, + "loss": 0.2397, + "step": 7215 + }, + { + "epoch": 0.34, + "learning_rate": 1.9026068690922203e-05, + "loss": 0.1644, + "step": 7220 + }, + { + "epoch": 0.34, + "learning_rate": 1.9025284905867417e-05, + "loss": 0.329, + "step": 7225 + }, + { + "epoch": 0.34, + "learning_rate": 1.902450112081263e-05, + "loss": 0.2891, + "step": 7230 + }, + { + "epoch": 0.34, + "learning_rate": 1.902371733575784e-05, + "loss": 0.3564, + "step": 7235 + }, + { + "epoch": 0.34, + "learning_rate": 1.902293355070306e-05, + "loss": 0.3063, + "step": 7240 + }, + { + "epoch": 0.34, + "learning_rate": 1.902214976564827e-05, + "loss": 0.481, + "step": 7245 + }, + { + "epoch": 0.34, + "learning_rate": 1.9021365980593483e-05, + "loss": 0.7006, + "step": 7250 + }, + { + "epoch": 0.34, + "learning_rate": 1.9020582195538697e-05, + "loss": 0.3454, + "step": 7255 + }, + { + "epoch": 0.34, + "learning_rate": 1.901979841048391e-05, + "loss": 0.0799, + "step": 7260 + }, + { + "epoch": 0.34, + "learning_rate": 1.9019014625429125e-05, + "loss": 0.1361, + "step": 7265 + }, + { + "epoch": 0.34, + "learning_rate": 1.9018230840374336e-05, + "loss": 0.2282, + "step": 7270 + }, + { + "epoch": 0.34, + "learning_rate": 1.901744705531955e-05, + "loss": 0.2861, + "step": 7275 + }, + { + "epoch": 0.34, + "learning_rate": 1.9016663270264763e-05, + "loss": 0.3123, + "step": 7280 + }, + { + "epoch": 0.34, + "learning_rate": 1.9015879485209977e-05, + "loss": 0.2939, + "step": 7285 + }, + { + "epoch": 0.34, + "learning_rate": 1.901509570015519e-05, + "loss": 0.368, + "step": 7290 + }, + { + "epoch": 0.34, + "learning_rate": 1.9014311915100405e-05, + "loss": 0.2656, + "step": 7295 + }, + { + "epoch": 0.34, + "learning_rate": 1.901352813004562e-05, + "loss": 0.7157, + "step": 7300 + }, + { + "epoch": 0.34, + "learning_rate": 1.9012744344990833e-05, + "loss": 0.297, + "step": 7305 + }, + { + "epoch": 0.34, + "learning_rate": 1.9011960559936043e-05, + "loss": 0.1168, + "step": 7310 + }, + { + "epoch": 0.34, + "learning_rate": 1.9011176774881257e-05, + "loss": 0.1664, + "step": 7315 + }, + { + "epoch": 0.34, + "learning_rate": 1.901039298982647e-05, + "loss": 0.1445, + "step": 7320 + }, + { + "epoch": 0.34, + "learning_rate": 1.9009609204771685e-05, + "loss": 0.2953, + "step": 7325 + }, + { + "epoch": 0.34, + "learning_rate": 1.90088254197169e-05, + "loss": 0.1705, + "step": 7330 + }, + { + "epoch": 0.34, + "learning_rate": 1.900804163466211e-05, + "loss": 0.3413, + "step": 7335 + }, + { + "epoch": 0.34, + "learning_rate": 1.9007257849607327e-05, + "loss": 0.3309, + "step": 7340 + }, + { + "epoch": 0.34, + "learning_rate": 1.9006474064552537e-05, + "loss": 0.4903, + "step": 7345 + }, + { + "epoch": 0.34, + "learning_rate": 1.900569027949775e-05, + "loss": 0.6247, + "step": 7350 + }, + { + "epoch": 0.34, + "learning_rate": 1.9004906494442965e-05, + "loss": 0.299, + "step": 7355 + }, + { + "epoch": 0.34, + "learning_rate": 1.900412270938818e-05, + "loss": 0.1186, + "step": 7360 + }, + { + "epoch": 0.34, + "learning_rate": 1.9003338924333393e-05, + "loss": 0.151, + "step": 7365 + }, + { + "epoch": 0.34, + "learning_rate": 1.9002555139278607e-05, + "loss": 0.1634, + "step": 7370 + }, + { + "epoch": 0.34, + "learning_rate": 1.900177135422382e-05, + "loss": 0.2192, + "step": 7375 + }, + { + "epoch": 0.34, + "learning_rate": 1.900098756916903e-05, + "loss": 0.3089, + "step": 7380 + }, + { + "epoch": 0.34, + "learning_rate": 1.9000203784114245e-05, + "loss": 0.2922, + "step": 7385 + }, + { + "epoch": 0.34, + "learning_rate": 1.899941999905946e-05, + "loss": 0.2575, + "step": 7390 + }, + { + "epoch": 0.35, + "learning_rate": 1.8998636214004673e-05, + "loss": 0.5155, + "step": 7395 + }, + { + "epoch": 0.35, + "learning_rate": 1.8997852428949887e-05, + "loss": 0.8029, + "step": 7400 + }, + { + "epoch": 0.35, + "learning_rate": 1.89970686438951e-05, + "loss": 0.3019, + "step": 7405 + }, + { + "epoch": 0.35, + "learning_rate": 1.899628485884031e-05, + "loss": 0.1694, + "step": 7410 + }, + { + "epoch": 0.35, + "learning_rate": 1.899550107378553e-05, + "loss": 0.1644, + "step": 7415 + }, + { + "epoch": 0.35, + "learning_rate": 1.899471728873074e-05, + "loss": 0.2562, + "step": 7420 + }, + { + "epoch": 0.35, + "learning_rate": 1.8993933503675953e-05, + "loss": 0.2024, + "step": 7425 + }, + { + "epoch": 0.35, + "learning_rate": 1.8993149718621167e-05, + "loss": 0.2765, + "step": 7430 + }, + { + "epoch": 0.35, + "learning_rate": 1.899236593356638e-05, + "loss": 0.3152, + "step": 7435 + }, + { + "epoch": 0.35, + "learning_rate": 1.8991582148511595e-05, + "loss": 0.7121, + "step": 7440 + }, + { + "epoch": 0.35, + "learning_rate": 1.8990798363456805e-05, + "loss": 0.4815, + "step": 7445 + }, + { + "epoch": 0.35, + "learning_rate": 1.899001457840202e-05, + "loss": 0.596, + "step": 7450 + }, + { + "epoch": 0.35, + "learning_rate": 1.8989230793347233e-05, + "loss": 0.2899, + "step": 7455 + }, + { + "epoch": 0.35, + "learning_rate": 1.8988447008292447e-05, + "loss": 0.1684, + "step": 7460 + }, + { + "epoch": 0.35, + "learning_rate": 1.898766322323766e-05, + "loss": 0.1769, + "step": 7465 + }, + { + "epoch": 0.35, + "learning_rate": 1.8986879438182875e-05, + "loss": 0.1686, + "step": 7470 + }, + { + "epoch": 0.35, + "learning_rate": 1.898609565312809e-05, + "loss": 0.1521, + "step": 7475 + }, + { + "epoch": 0.35, + "learning_rate": 1.8985311868073303e-05, + "loss": 0.213, + "step": 7480 + }, + { + "epoch": 0.35, + "learning_rate": 1.8984528083018513e-05, + "loss": 0.3328, + "step": 7485 + }, + { + "epoch": 0.35, + "learning_rate": 1.8983744297963727e-05, + "loss": 0.3086, + "step": 7490 + }, + { + "epoch": 0.35, + "learning_rate": 1.898296051290894e-05, + "loss": 0.3979, + "step": 7495 + }, + { + "epoch": 0.35, + "learning_rate": 1.8982176727854155e-05, + "loss": 0.9146, + "step": 7500 + }, + { + "epoch": 0.35, + "learning_rate": 1.898139294279937e-05, + "loss": 0.2771, + "step": 7505 + }, + { + "epoch": 0.35, + "learning_rate": 1.898060915774458e-05, + "loss": 0.1257, + "step": 7510 + }, + { + "epoch": 0.35, + "learning_rate": 1.8979825372689797e-05, + "loss": 0.2241, + "step": 7515 + }, + { + "epoch": 0.35, + "learning_rate": 1.8979041587635007e-05, + "loss": 0.2393, + "step": 7520 + }, + { + "epoch": 0.35, + "learning_rate": 1.897825780258022e-05, + "loss": 0.172, + "step": 7525 + }, + { + "epoch": 0.35, + "learning_rate": 1.8977474017525435e-05, + "loss": 0.2781, + "step": 7530 + }, + { + "epoch": 0.35, + "learning_rate": 1.897669023247065e-05, + "loss": 0.3206, + "step": 7535 + }, + { + "epoch": 0.35, + "learning_rate": 1.8975906447415863e-05, + "loss": 0.3699, + "step": 7540 + }, + { + "epoch": 0.35, + "learning_rate": 1.8975122662361077e-05, + "loss": 0.3469, + "step": 7545 + }, + { + "epoch": 0.35, + "learning_rate": 1.8974338877306287e-05, + "loss": 0.8029, + "step": 7550 + }, + { + "epoch": 0.35, + "learning_rate": 1.8973555092251504e-05, + "loss": 0.312, + "step": 7555 + }, + { + "epoch": 0.35, + "learning_rate": 1.8972771307196715e-05, + "loss": 0.1367, + "step": 7560 + }, + { + "epoch": 0.35, + "learning_rate": 1.897198752214193e-05, + "loss": 0.0884, + "step": 7565 + }, + { + "epoch": 0.35, + "learning_rate": 1.8971203737087143e-05, + "loss": 0.2232, + "step": 7570 + }, + { + "epoch": 0.35, + "learning_rate": 1.8970419952032357e-05, + "loss": 0.1861, + "step": 7575 + }, + { + "epoch": 0.35, + "learning_rate": 1.896963616697757e-05, + "loss": 0.2897, + "step": 7580 + }, + { + "epoch": 0.35, + "learning_rate": 1.896885238192278e-05, + "loss": 0.3206, + "step": 7585 + }, + { + "epoch": 0.35, + "learning_rate": 1.8968068596868e-05, + "loss": 0.3773, + "step": 7590 + }, + { + "epoch": 0.35, + "learning_rate": 1.896728481181321e-05, + "loss": 0.3115, + "step": 7595 + }, + { + "epoch": 0.35, + "learning_rate": 1.8966501026758423e-05, + "loss": 0.9366, + "step": 7600 + }, + { + "epoch": 0.35, + "learning_rate": 1.8965717241703637e-05, + "loss": 0.2707, + "step": 7605 + }, + { + "epoch": 0.36, + "learning_rate": 1.896493345664885e-05, + "loss": 0.1204, + "step": 7610 + }, + { + "epoch": 0.36, + "learning_rate": 1.8964149671594065e-05, + "loss": 0.1409, + "step": 7615 + }, + { + "epoch": 0.36, + "learning_rate": 1.896336588653928e-05, + "loss": 0.1973, + "step": 7620 + }, + { + "epoch": 0.36, + "learning_rate": 1.896258210148449e-05, + "loss": 0.1588, + "step": 7625 + }, + { + "epoch": 0.36, + "learning_rate": 1.8961798316429706e-05, + "loss": 0.3012, + "step": 7630 + }, + { + "epoch": 0.36, + "learning_rate": 1.8961014531374917e-05, + "loss": 0.3534, + "step": 7635 + }, + { + "epoch": 0.36, + "learning_rate": 1.896023074632013e-05, + "loss": 0.4233, + "step": 7640 + }, + { + "epoch": 0.36, + "learning_rate": 1.8959446961265345e-05, + "loss": 0.3975, + "step": 7645 + }, + { + "epoch": 0.36, + "learning_rate": 1.8958663176210555e-05, + "loss": 0.4849, + "step": 7650 + }, + { + "epoch": 0.36, + "learning_rate": 1.8957879391155772e-05, + "loss": 0.3376, + "step": 7655 + }, + { + "epoch": 0.36, + "learning_rate": 1.8957095606100983e-05, + "loss": 0.1438, + "step": 7660 + }, + { + "epoch": 0.36, + "learning_rate": 1.8956311821046197e-05, + "loss": 0.1841, + "step": 7665 + }, + { + "epoch": 0.36, + "learning_rate": 1.895552803599141e-05, + "loss": 0.1959, + "step": 7670 + }, + { + "epoch": 0.36, + "learning_rate": 1.8954744250936625e-05, + "loss": 0.2594, + "step": 7675 + }, + { + "epoch": 0.36, + "learning_rate": 1.895396046588184e-05, + "loss": 0.3133, + "step": 7680 + }, + { + "epoch": 0.36, + "learning_rate": 1.8953176680827052e-05, + "loss": 0.3055, + "step": 7685 + }, + { + "epoch": 0.36, + "learning_rate": 1.8952392895772266e-05, + "loss": 0.3877, + "step": 7690 + }, + { + "epoch": 0.36, + "learning_rate": 1.895160911071748e-05, + "loss": 0.3368, + "step": 7695 + }, + { + "epoch": 0.36, + "learning_rate": 1.895082532566269e-05, + "loss": 0.6805, + "step": 7700 + }, + { + "epoch": 0.36, + "learning_rate": 1.8950041540607905e-05, + "loss": 0.3995, + "step": 7705 + }, + { + "epoch": 0.36, + "learning_rate": 1.894925775555312e-05, + "loss": 0.1304, + "step": 7710 + }, + { + "epoch": 0.36, + "learning_rate": 1.8948473970498333e-05, + "loss": 0.1476, + "step": 7715 + }, + { + "epoch": 0.36, + "learning_rate": 1.8947690185443546e-05, + "loss": 0.1871, + "step": 7720 + }, + { + "epoch": 0.36, + "learning_rate": 1.8946906400388757e-05, + "loss": 0.2658, + "step": 7725 + }, + { + "epoch": 0.36, + "learning_rate": 1.8946122615333974e-05, + "loss": 0.2439, + "step": 7730 + }, + { + "epoch": 0.36, + "learning_rate": 1.8945338830279185e-05, + "loss": 0.3401, + "step": 7735 + }, + { + "epoch": 0.36, + "learning_rate": 1.89445550452244e-05, + "loss": 0.2806, + "step": 7740 + }, + { + "epoch": 0.36, + "learning_rate": 1.8943771260169613e-05, + "loss": 0.4399, + "step": 7745 + }, + { + "epoch": 0.36, + "learning_rate": 1.8942987475114826e-05, + "loss": 0.6512, + "step": 7750 + }, + { + "epoch": 0.36, + "learning_rate": 1.894220369006004e-05, + "loss": 0.2376, + "step": 7755 + }, + { + "epoch": 0.36, + "learning_rate": 1.8941419905005254e-05, + "loss": 0.1043, + "step": 7760 + }, + { + "epoch": 0.36, + "learning_rate": 1.8940636119950465e-05, + "loss": 0.1589, + "step": 7765 + }, + { + "epoch": 0.36, + "learning_rate": 1.893985233489568e-05, + "loss": 0.1373, + "step": 7770 + }, + { + "epoch": 0.36, + "learning_rate": 1.8939068549840893e-05, + "loss": 0.2714, + "step": 7775 + }, + { + "epoch": 0.36, + "learning_rate": 1.8938284764786107e-05, + "loss": 0.2307, + "step": 7780 + }, + { + "epoch": 0.36, + "learning_rate": 1.893750097973132e-05, + "loss": 0.2299, + "step": 7785 + }, + { + "epoch": 0.36, + "learning_rate": 1.8936717194676534e-05, + "loss": 0.4098, + "step": 7790 + }, + { + "epoch": 0.36, + "learning_rate": 1.8935933409621748e-05, + "loss": 0.4609, + "step": 7795 + }, + { + "epoch": 0.36, + "learning_rate": 1.893514962456696e-05, + "loss": 0.7143, + "step": 7800 + }, + { + "epoch": 0.36, + "learning_rate": 1.8934365839512173e-05, + "loss": 0.3278, + "step": 7805 + }, + { + "epoch": 0.36, + "learning_rate": 1.8933582054457387e-05, + "loss": 0.1103, + "step": 7810 + }, + { + "epoch": 0.36, + "learning_rate": 1.89327982694026e-05, + "loss": 0.1488, + "step": 7815 + }, + { + "epoch": 0.36, + "learning_rate": 1.8932014484347814e-05, + "loss": 0.2829, + "step": 7820 + }, + { + "epoch": 0.37, + "learning_rate": 1.8931230699293028e-05, + "loss": 0.2354, + "step": 7825 + }, + { + "epoch": 0.37, + "learning_rate": 1.8930446914238242e-05, + "loss": 0.3971, + "step": 7830 + }, + { + "epoch": 0.37, + "learning_rate": 1.8929663129183453e-05, + "loss": 0.4175, + "step": 7835 + }, + { + "epoch": 0.37, + "learning_rate": 1.8928879344128667e-05, + "loss": 0.3929, + "step": 7840 + }, + { + "epoch": 0.37, + "learning_rate": 1.892809555907388e-05, + "loss": 0.4019, + "step": 7845 + }, + { + "epoch": 0.37, + "learning_rate": 1.8927311774019094e-05, + "loss": 0.6624, + "step": 7850 + }, + { + "epoch": 0.37, + "learning_rate": 1.892652798896431e-05, + "loss": 0.3053, + "step": 7855 + }, + { + "epoch": 0.37, + "learning_rate": 1.8925744203909522e-05, + "loss": 0.2106, + "step": 7860 + }, + { + "epoch": 0.37, + "learning_rate": 1.8924960418854733e-05, + "loss": 0.172, + "step": 7865 + }, + { + "epoch": 0.37, + "learning_rate": 1.892417663379995e-05, + "loss": 0.1984, + "step": 7870 + }, + { + "epoch": 0.37, + "learning_rate": 1.892339284874516e-05, + "loss": 0.1985, + "step": 7875 + }, + { + "epoch": 0.37, + "learning_rate": 1.8922609063690374e-05, + "loss": 0.4085, + "step": 7880 + }, + { + "epoch": 0.37, + "learning_rate": 1.892182527863559e-05, + "loss": 0.3442, + "step": 7885 + }, + { + "epoch": 0.37, + "learning_rate": 1.8921041493580802e-05, + "loss": 0.2404, + "step": 7890 + }, + { + "epoch": 0.37, + "learning_rate": 1.8920257708526016e-05, + "loss": 0.4951, + "step": 7895 + }, + { + "epoch": 0.37, + "learning_rate": 1.891947392347123e-05, + "loss": 0.6079, + "step": 7900 + }, + { + "epoch": 0.37, + "learning_rate": 1.8918690138416444e-05, + "loss": 0.3329, + "step": 7905 + }, + { + "epoch": 0.37, + "learning_rate": 1.8917906353361655e-05, + "loss": 0.1138, + "step": 7910 + }, + { + "epoch": 0.37, + "learning_rate": 1.891712256830687e-05, + "loss": 0.1449, + "step": 7915 + }, + { + "epoch": 0.37, + "learning_rate": 1.8916338783252082e-05, + "loss": 0.1854, + "step": 7920 + }, + { + "epoch": 0.37, + "learning_rate": 1.8915554998197296e-05, + "loss": 0.1878, + "step": 7925 + }, + { + "epoch": 0.37, + "learning_rate": 1.891477121314251e-05, + "loss": 0.2632, + "step": 7930 + }, + { + "epoch": 0.37, + "learning_rate": 1.8913987428087724e-05, + "loss": 0.1892, + "step": 7935 + }, + { + "epoch": 0.37, + "learning_rate": 1.8913203643032935e-05, + "loss": 0.3837, + "step": 7940 + }, + { + "epoch": 0.37, + "learning_rate": 1.8912419857978152e-05, + "loss": 0.4508, + "step": 7945 + }, + { + "epoch": 0.37, + "learning_rate": 1.8911636072923362e-05, + "loss": 0.5534, + "step": 7950 + }, + { + "epoch": 0.37, + "learning_rate": 1.8910852287868576e-05, + "loss": 0.315, + "step": 7955 + }, + { + "epoch": 0.37, + "learning_rate": 1.891006850281379e-05, + "loss": 0.1236, + "step": 7960 + }, + { + "epoch": 0.37, + "learning_rate": 1.8909284717759004e-05, + "loss": 0.0979, + "step": 7965 + }, + { + "epoch": 0.37, + "learning_rate": 1.8908500932704218e-05, + "loss": 0.1878, + "step": 7970 + }, + { + "epoch": 0.37, + "learning_rate": 1.890771714764943e-05, + "loss": 0.2305, + "step": 7975 + }, + { + "epoch": 0.37, + "learning_rate": 1.8906933362594642e-05, + "loss": 0.2422, + "step": 7980 + }, + { + "epoch": 0.37, + "learning_rate": 1.8906149577539856e-05, + "loss": 0.2615, + "step": 7985 + }, + { + "epoch": 0.37, + "learning_rate": 1.890536579248507e-05, + "loss": 0.3486, + "step": 7990 + }, + { + "epoch": 0.37, + "learning_rate": 1.8904582007430284e-05, + "loss": 0.461, + "step": 7995 + }, + { + "epoch": 0.37, + "learning_rate": 1.8903798222375498e-05, + "loss": 0.6374, + "step": 8000 + }, + { + "epoch": 0.37, + "learning_rate": 1.8903014437320712e-05, + "loss": 0.3201, + "step": 8005 + }, + { + "epoch": 0.37, + "learning_rate": 1.8902230652265926e-05, + "loss": 0.1109, + "step": 8010 + }, + { + "epoch": 0.37, + "learning_rate": 1.8901446867211136e-05, + "loss": 0.1345, + "step": 8015 + }, + { + "epoch": 0.37, + "learning_rate": 1.890066308215635e-05, + "loss": 0.1565, + "step": 8020 + }, + { + "epoch": 0.37, + "learning_rate": 1.8899879297101564e-05, + "loss": 0.7264, + "step": 8025 + }, + { + "epoch": 0.37, + "learning_rate": 1.8899095512046778e-05, + "loss": 0.231, + "step": 8030 + }, + { + "epoch": 0.37, + "learning_rate": 1.8898311726991992e-05, + "loss": 0.2366, + "step": 8035 + }, + { + "epoch": 0.38, + "learning_rate": 1.8897527941937203e-05, + "loss": 0.3771, + "step": 8040 + }, + { + "epoch": 0.38, + "learning_rate": 1.889674415688242e-05, + "loss": 0.4329, + "step": 8045 + }, + { + "epoch": 0.38, + "learning_rate": 1.889596037182763e-05, + "loss": 0.6533, + "step": 8050 + }, + { + "epoch": 0.38, + "learning_rate": 1.8895176586772844e-05, + "loss": 0.338, + "step": 8055 + }, + { + "epoch": 0.38, + "learning_rate": 1.8894392801718058e-05, + "loss": 0.1299, + "step": 8060 + }, + { + "epoch": 0.38, + "learning_rate": 1.8893609016663272e-05, + "loss": 0.1517, + "step": 8065 + }, + { + "epoch": 0.38, + "learning_rate": 1.8892825231608486e-05, + "loss": 0.2633, + "step": 8070 + }, + { + "epoch": 0.38, + "learning_rate": 1.88920414465537e-05, + "loss": 0.2221, + "step": 8075 + }, + { + "epoch": 0.38, + "learning_rate": 1.889125766149891e-05, + "loss": 0.2924, + "step": 8080 + }, + { + "epoch": 0.38, + "learning_rate": 1.8890473876444128e-05, + "loss": 0.2867, + "step": 8085 + }, + { + "epoch": 0.38, + "learning_rate": 1.8889690091389338e-05, + "loss": 0.3109, + "step": 8090 + }, + { + "epoch": 0.38, + "learning_rate": 1.8888906306334552e-05, + "loss": 0.4688, + "step": 8095 + }, + { + "epoch": 0.38, + "learning_rate": 1.8888122521279766e-05, + "loss": 0.7502, + "step": 8100 + }, + { + "epoch": 0.38, + "learning_rate": 1.888733873622498e-05, + "loss": 0.2301, + "step": 8105 + }, + { + "epoch": 0.38, + "learning_rate": 1.8886554951170194e-05, + "loss": 0.1549, + "step": 8110 + }, + { + "epoch": 0.38, + "learning_rate": 1.8885771166115404e-05, + "loss": 0.1893, + "step": 8115 + }, + { + "epoch": 0.38, + "learning_rate": 1.8884987381060618e-05, + "loss": 0.1992, + "step": 8120 + }, + { + "epoch": 0.38, + "learning_rate": 1.8884203596005832e-05, + "loss": 0.1521, + "step": 8125 + }, + { + "epoch": 0.38, + "learning_rate": 1.8883419810951046e-05, + "loss": 0.2274, + "step": 8130 + }, + { + "epoch": 0.38, + "learning_rate": 1.888263602589626e-05, + "loss": 0.2885, + "step": 8135 + }, + { + "epoch": 0.38, + "learning_rate": 1.8881852240841474e-05, + "loss": 0.3451, + "step": 8140 + }, + { + "epoch": 0.38, + "learning_rate": 1.8881068455786688e-05, + "loss": 0.3565, + "step": 8145 + }, + { + "epoch": 0.38, + "learning_rate": 1.88802846707319e-05, + "loss": 0.5021, + "step": 8150 + }, + { + "epoch": 0.38, + "learning_rate": 1.8879500885677112e-05, + "loss": 0.344, + "step": 8155 + }, + { + "epoch": 0.38, + "learning_rate": 1.887871710062233e-05, + "loss": 0.2039, + "step": 8160 + }, + { + "epoch": 0.38, + "learning_rate": 1.887793331556754e-05, + "loss": 0.2314, + "step": 8165 + }, + { + "epoch": 0.38, + "learning_rate": 1.8877149530512754e-05, + "loss": 0.1719, + "step": 8170 + }, + { + "epoch": 0.38, + "learning_rate": 1.8876365745457968e-05, + "loss": 0.2557, + "step": 8175 + }, + { + "epoch": 0.38, + "learning_rate": 1.887558196040318e-05, + "loss": 0.2644, + "step": 8180 + }, + { + "epoch": 0.38, + "learning_rate": 1.8874798175348396e-05, + "loss": 0.2162, + "step": 8185 + }, + { + "epoch": 0.38, + "learning_rate": 1.8874014390293606e-05, + "loss": 0.3655, + "step": 8190 + }, + { + "epoch": 0.38, + "learning_rate": 1.887323060523882e-05, + "loss": 0.4942, + "step": 8195 + }, + { + "epoch": 0.38, + "learning_rate": 1.8872446820184034e-05, + "loss": 0.6199, + "step": 8200 + }, + { + "epoch": 0.38, + "learning_rate": 1.8871663035129248e-05, + "loss": 0.3494, + "step": 8205 + }, + { + "epoch": 0.38, + "learning_rate": 1.8870879250074462e-05, + "loss": 0.1605, + "step": 8210 + }, + { + "epoch": 0.38, + "learning_rate": 1.8870095465019676e-05, + "loss": 0.1165, + "step": 8215 + }, + { + "epoch": 0.38, + "learning_rate": 1.886931167996489e-05, + "loss": 0.1653, + "step": 8220 + }, + { + "epoch": 0.38, + "learning_rate": 1.8868527894910103e-05, + "loss": 0.1936, + "step": 8225 + }, + { + "epoch": 0.38, + "learning_rate": 1.8867744109855314e-05, + "loss": 0.2854, + "step": 8230 + }, + { + "epoch": 0.38, + "learning_rate": 1.8866960324800528e-05, + "loss": 0.2725, + "step": 8235 + }, + { + "epoch": 0.38, + "learning_rate": 1.8866176539745742e-05, + "loss": 0.4045, + "step": 8240 + }, + { + "epoch": 0.38, + "learning_rate": 1.8865392754690956e-05, + "loss": 0.3646, + "step": 8245 + }, + { + "epoch": 0.38, + "learning_rate": 1.886460896963617e-05, + "loss": 0.6643, + "step": 8250 + }, + { + "epoch": 0.39, + "learning_rate": 1.886382518458138e-05, + "loss": 0.2439, + "step": 8255 + }, + { + "epoch": 0.39, + "learning_rate": 1.8863041399526597e-05, + "loss": 0.1384, + "step": 8260 + }, + { + "epoch": 0.39, + "learning_rate": 1.8862257614471808e-05, + "loss": 0.1399, + "step": 8265 + }, + { + "epoch": 0.39, + "learning_rate": 1.8861473829417022e-05, + "loss": 0.17, + "step": 8270 + }, + { + "epoch": 0.39, + "learning_rate": 1.8860690044362236e-05, + "loss": 0.2109, + "step": 8275 + }, + { + "epoch": 0.39, + "learning_rate": 1.885990625930745e-05, + "loss": 0.2079, + "step": 8280 + }, + { + "epoch": 0.39, + "learning_rate": 1.8859122474252664e-05, + "loss": 0.2478, + "step": 8285 + }, + { + "epoch": 0.39, + "learning_rate": 1.8858338689197877e-05, + "loss": 0.2618, + "step": 8290 + }, + { + "epoch": 0.39, + "learning_rate": 1.8857554904143088e-05, + "loss": 0.3146, + "step": 8295 + }, + { + "epoch": 0.39, + "learning_rate": 1.8856771119088302e-05, + "loss": 0.5877, + "step": 8300 + }, + { + "epoch": 0.39, + "learning_rate": 1.8855987334033516e-05, + "loss": 0.2784, + "step": 8305 + }, + { + "epoch": 0.39, + "learning_rate": 1.885520354897873e-05, + "loss": 0.1436, + "step": 8310 + }, + { + "epoch": 0.39, + "learning_rate": 1.8854419763923944e-05, + "loss": 0.1598, + "step": 8315 + }, + { + "epoch": 0.39, + "learning_rate": 1.8853635978869158e-05, + "loss": 0.1545, + "step": 8320 + }, + { + "epoch": 0.39, + "learning_rate": 1.885285219381437e-05, + "loss": 0.2853, + "step": 8325 + }, + { + "epoch": 0.39, + "learning_rate": 1.8852068408759582e-05, + "loss": 0.2467, + "step": 8330 + }, + { + "epoch": 0.39, + "learning_rate": 1.8851284623704796e-05, + "loss": 0.271, + "step": 8335 + }, + { + "epoch": 0.39, + "learning_rate": 1.885050083865001e-05, + "loss": 0.3561, + "step": 8340 + }, + { + "epoch": 0.39, + "learning_rate": 1.8849717053595224e-05, + "loss": 0.3717, + "step": 8345 + }, + { + "epoch": 0.39, + "learning_rate": 1.8848933268540438e-05, + "loss": 0.6701, + "step": 8350 + }, + { + "epoch": 0.39, + "learning_rate": 1.884814948348565e-05, + "loss": 0.3146, + "step": 8355 + }, + { + "epoch": 0.39, + "learning_rate": 1.8847365698430865e-05, + "loss": 0.1088, + "step": 8360 + }, + { + "epoch": 0.39, + "learning_rate": 1.8846581913376076e-05, + "loss": 0.1436, + "step": 8365 + }, + { + "epoch": 0.39, + "learning_rate": 1.884579812832129e-05, + "loss": 0.1958, + "step": 8370 + }, + { + "epoch": 0.39, + "learning_rate": 1.8845014343266504e-05, + "loss": 0.2178, + "step": 8375 + }, + { + "epoch": 0.39, + "learning_rate": 1.8844230558211718e-05, + "loss": 0.1422, + "step": 8380 + }, + { + "epoch": 0.39, + "learning_rate": 1.884344677315693e-05, + "loss": 0.2301, + "step": 8385 + }, + { + "epoch": 0.39, + "learning_rate": 1.8842662988102145e-05, + "loss": 0.4035, + "step": 8390 + }, + { + "epoch": 0.39, + "learning_rate": 1.8841879203047356e-05, + "loss": 0.4027, + "step": 8395 + }, + { + "epoch": 0.39, + "learning_rate": 1.8841095417992573e-05, + "loss": 0.6834, + "step": 8400 + }, + { + "epoch": 0.39, + "learning_rate": 1.8840311632937784e-05, + "loss": 0.2374, + "step": 8405 + }, + { + "epoch": 0.39, + "learning_rate": 1.8839527847882998e-05, + "loss": 0.172, + "step": 8410 + }, + { + "epoch": 0.39, + "learning_rate": 1.883874406282821e-05, + "loss": 0.1755, + "step": 8415 + }, + { + "epoch": 0.39, + "learning_rate": 1.8837960277773425e-05, + "loss": 0.2204, + "step": 8420 + }, + { + "epoch": 0.39, + "learning_rate": 1.883717649271864e-05, + "loss": 0.2173, + "step": 8425 + }, + { + "epoch": 0.39, + "learning_rate": 1.883639270766385e-05, + "loss": 0.3071, + "step": 8430 + }, + { + "epoch": 0.39, + "learning_rate": 1.8835608922609064e-05, + "loss": 0.2889, + "step": 8435 + }, + { + "epoch": 0.39, + "learning_rate": 1.8834825137554278e-05, + "loss": 0.3744, + "step": 8440 + }, + { + "epoch": 0.39, + "learning_rate": 1.8834198109510448e-05, + "loss": 0.6518, + "step": 8445 + }, + { + "epoch": 0.39, + "learning_rate": 1.8833414324455662e-05, + "loss": 0.7128, + "step": 8450 + }, + { + "epoch": 0.39, + "learning_rate": 1.8832630539400876e-05, + "loss": 0.3003, + "step": 8455 + }, + { + "epoch": 0.39, + "learning_rate": 1.883184675434609e-05, + "loss": 0.0996, + "step": 8460 + }, + { + "epoch": 0.39, + "learning_rate": 1.8831062969291304e-05, + "loss": 0.1282, + "step": 8465 + }, + { + "epoch": 0.4, + "learning_rate": 1.8830279184236518e-05, + "loss": 0.1423, + "step": 8470 + }, + { + "epoch": 0.4, + "learning_rate": 1.8829495399181728e-05, + "loss": 0.2481, + "step": 8475 + }, + { + "epoch": 0.4, + "learning_rate": 1.8828711614126945e-05, + "loss": 0.1612, + "step": 8480 + }, + { + "epoch": 0.4, + "learning_rate": 1.8827927829072156e-05, + "loss": 0.3512, + "step": 8485 + }, + { + "epoch": 0.4, + "learning_rate": 1.882714404401737e-05, + "loss": 0.2906, + "step": 8490 + }, + { + "epoch": 0.4, + "learning_rate": 1.8826360258962584e-05, + "loss": 0.2834, + "step": 8495 + }, + { + "epoch": 0.4, + "learning_rate": 1.8825576473907798e-05, + "loss": 0.6138, + "step": 8500 + }, + { + "epoch": 0.4, + "learning_rate": 1.882479268885301e-05, + "loss": 0.3247, + "step": 8505 + }, + { + "epoch": 0.4, + "learning_rate": 1.8824008903798222e-05, + "loss": 0.0987, + "step": 8510 + }, + { + "epoch": 0.4, + "learning_rate": 1.882322511874344e-05, + "loss": 0.1981, + "step": 8515 + }, + { + "epoch": 0.4, + "learning_rate": 1.882244133368865e-05, + "loss": 0.2302, + "step": 8520 + }, + { + "epoch": 0.4, + "learning_rate": 1.8821657548633864e-05, + "loss": 0.286, + "step": 8525 + }, + { + "epoch": 0.4, + "learning_rate": 1.8820873763579078e-05, + "loss": 0.2, + "step": 8530 + }, + { + "epoch": 0.4, + "learning_rate": 1.882008997852429e-05, + "loss": 0.3144, + "step": 8535 + }, + { + "epoch": 0.4, + "learning_rate": 1.8819306193469506e-05, + "loss": 0.2449, + "step": 8540 + }, + { + "epoch": 0.4, + "learning_rate": 1.881852240841472e-05, + "loss": 0.324, + "step": 8545 + }, + { + "epoch": 0.4, + "learning_rate": 1.881773862335993e-05, + "loss": 0.5676, + "step": 8550 + }, + { + "epoch": 0.4, + "learning_rate": 1.8816954838305144e-05, + "loss": 0.2509, + "step": 8555 + }, + { + "epoch": 0.4, + "learning_rate": 1.8816171053250358e-05, + "loss": 0.1236, + "step": 8560 + }, + { + "epoch": 0.4, + "learning_rate": 1.8815387268195572e-05, + "loss": 0.1508, + "step": 8565 + }, + { + "epoch": 0.4, + "learning_rate": 1.8814603483140786e-05, + "loss": 0.1854, + "step": 8570 + }, + { + "epoch": 0.4, + "learning_rate": 1.8813819698085996e-05, + "loss": 0.2049, + "step": 8575 + }, + { + "epoch": 0.4, + "learning_rate": 1.8813035913031213e-05, + "loss": 0.253, + "step": 8580 + }, + { + "epoch": 0.4, + "learning_rate": 1.8812252127976424e-05, + "loss": 0.2392, + "step": 8585 + }, + { + "epoch": 0.4, + "learning_rate": 1.8811468342921638e-05, + "loss": 0.3392, + "step": 8590 + }, + { + "epoch": 0.4, + "learning_rate": 1.8810684557866852e-05, + "loss": 0.3588, + "step": 8595 + }, + { + "epoch": 0.4, + "learning_rate": 1.8809900772812066e-05, + "loss": 0.5704, + "step": 8600 + }, + { + "epoch": 0.4, + "learning_rate": 1.880911698775728e-05, + "loss": 0.2261, + "step": 8605 + }, + { + "epoch": 0.4, + "learning_rate": 1.8808333202702493e-05, + "loss": 0.1138, + "step": 8610 + }, + { + "epoch": 0.4, + "learning_rate": 1.8807549417647707e-05, + "loss": 0.1819, + "step": 8615 + }, + { + "epoch": 0.4, + "learning_rate": 1.8806765632592918e-05, + "loss": 0.2097, + "step": 8620 + }, + { + "epoch": 0.4, + "learning_rate": 1.8805981847538132e-05, + "loss": 0.33, + "step": 8625 + }, + { + "epoch": 0.4, + "learning_rate": 1.8805198062483346e-05, + "loss": 0.1766, + "step": 8630 + }, + { + "epoch": 0.4, + "learning_rate": 1.880441427742856e-05, + "loss": 0.2288, + "step": 8635 + }, + { + "epoch": 0.4, + "learning_rate": 1.8803630492373774e-05, + "loss": 0.3268, + "step": 8640 + }, + { + "epoch": 0.4, + "learning_rate": 1.8802846707318987e-05, + "loss": 0.4618, + "step": 8645 + }, + { + "epoch": 0.4, + "learning_rate": 1.8802062922264198e-05, + "loss": 0.487, + "step": 8650 + }, + { + "epoch": 0.4, + "learning_rate": 1.8801279137209415e-05, + "loss": 0.2719, + "step": 8655 + }, + { + "epoch": 0.4, + "learning_rate": 1.8800495352154626e-05, + "loss": 0.0903, + "step": 8660 + }, + { + "epoch": 0.4, + "learning_rate": 1.879971156709984e-05, + "loss": 0.1441, + "step": 8665 + }, + { + "epoch": 0.4, + "learning_rate": 1.8798927782045054e-05, + "loss": 0.1544, + "step": 8670 + }, + { + "epoch": 0.4, + "learning_rate": 1.8798143996990267e-05, + "loss": 0.2806, + "step": 8675 + }, + { + "epoch": 0.41, + "learning_rate": 1.879736021193548e-05, + "loss": 0.3168, + "step": 8680 + }, + { + "epoch": 0.41, + "learning_rate": 1.8796576426880692e-05, + "loss": 0.2666, + "step": 8685 + }, + { + "epoch": 0.41, + "learning_rate": 1.8795792641825906e-05, + "loss": 0.3645, + "step": 8690 + }, + { + "epoch": 0.41, + "learning_rate": 1.879500885677112e-05, + "loss": 0.5892, + "step": 8695 + }, + { + "epoch": 0.41, + "learning_rate": 1.8794225071716334e-05, + "loss": 0.6008, + "step": 8700 + }, + { + "epoch": 0.41, + "learning_rate": 1.8793441286661548e-05, + "loss": 0.3091, + "step": 8705 + }, + { + "epoch": 0.41, + "learning_rate": 1.879265750160676e-05, + "loss": 0.1157, + "step": 8710 + }, + { + "epoch": 0.41, + "learning_rate": 1.8791873716551975e-05, + "loss": 0.1896, + "step": 8715 + }, + { + "epoch": 0.41, + "learning_rate": 1.879108993149719e-05, + "loss": 0.1758, + "step": 8720 + }, + { + "epoch": 0.41, + "learning_rate": 1.87903061464424e-05, + "loss": 0.2199, + "step": 8725 + }, + { + "epoch": 0.41, + "learning_rate": 1.8789522361387617e-05, + "loss": 0.1525, + "step": 8730 + }, + { + "epoch": 0.41, + "learning_rate": 1.8788738576332828e-05, + "loss": 0.3416, + "step": 8735 + }, + { + "epoch": 0.41, + "learning_rate": 1.878795479127804e-05, + "loss": 0.3551, + "step": 8740 + }, + { + "epoch": 0.41, + "learning_rate": 1.8787171006223255e-05, + "loss": 0.3541, + "step": 8745 + }, + { + "epoch": 0.41, + "learning_rate": 1.8786387221168466e-05, + "loss": 0.6545, + "step": 8750 + }, + { + "epoch": 0.41, + "learning_rate": 1.8785603436113683e-05, + "loss": 0.2391, + "step": 8755 + }, + { + "epoch": 0.41, + "learning_rate": 1.8784819651058894e-05, + "loss": 0.1449, + "step": 8760 + }, + { + "epoch": 0.41, + "learning_rate": 1.8784035866004108e-05, + "loss": 0.1726, + "step": 8765 + }, + { + "epoch": 0.41, + "learning_rate": 1.878325208094932e-05, + "loss": 0.1607, + "step": 8770 + }, + { + "epoch": 0.41, + "learning_rate": 1.8782468295894535e-05, + "loss": 0.2541, + "step": 8775 + }, + { + "epoch": 0.41, + "learning_rate": 1.878168451083975e-05, + "loss": 0.1685, + "step": 8780 + }, + { + "epoch": 0.41, + "learning_rate": 1.8780900725784963e-05, + "loss": 0.3067, + "step": 8785 + }, + { + "epoch": 0.41, + "learning_rate": 1.8780116940730174e-05, + "loss": 0.3734, + "step": 8790 + }, + { + "epoch": 0.41, + "learning_rate": 1.877933315567539e-05, + "loss": 0.311, + "step": 8795 + }, + { + "epoch": 0.41, + "learning_rate": 1.87785493706206e-05, + "loss": 0.464, + "step": 8800 + }, + { + "epoch": 0.41, + "learning_rate": 1.8777765585565815e-05, + "loss": 0.2861, + "step": 8805 + }, + { + "epoch": 0.41, + "learning_rate": 1.877698180051103e-05, + "loss": 0.0895, + "step": 8810 + }, + { + "epoch": 0.41, + "learning_rate": 1.8776198015456243e-05, + "loss": 0.1917, + "step": 8815 + }, + { + "epoch": 0.41, + "learning_rate": 1.8775414230401457e-05, + "loss": 0.2786, + "step": 8820 + }, + { + "epoch": 0.41, + "learning_rate": 1.8774630445346668e-05, + "loss": 0.1561, + "step": 8825 + }, + { + "epoch": 0.41, + "learning_rate": 1.8773846660291885e-05, + "loss": 0.2006, + "step": 8830 + }, + { + "epoch": 0.41, + "learning_rate": 1.8773062875237096e-05, + "loss": 0.2568, + "step": 8835 + }, + { + "epoch": 0.41, + "learning_rate": 1.877227909018231e-05, + "loss": 0.3812, + "step": 8840 + }, + { + "epoch": 0.41, + "learning_rate": 1.8771495305127523e-05, + "loss": 0.3857, + "step": 8845 + }, + { + "epoch": 0.41, + "learning_rate": 1.8770711520072737e-05, + "loss": 0.6512, + "step": 8850 + }, + { + "epoch": 0.41, + "learning_rate": 1.876992773501795e-05, + "loss": 0.2031, + "step": 8855 + }, + { + "epoch": 0.41, + "learning_rate": 1.8769143949963165e-05, + "loss": 0.0943, + "step": 8860 + }, + { + "epoch": 0.41, + "learning_rate": 1.8768360164908376e-05, + "loss": 0.133, + "step": 8865 + }, + { + "epoch": 0.41, + "learning_rate": 1.8767576379853593e-05, + "loss": 0.181, + "step": 8870 + }, + { + "epoch": 0.41, + "learning_rate": 1.8766792594798803e-05, + "loss": 0.3046, + "step": 8875 + }, + { + "epoch": 0.41, + "learning_rate": 1.8766008809744017e-05, + "loss": 0.3182, + "step": 8880 + }, + { + "epoch": 0.41, + "learning_rate": 1.876522502468923e-05, + "loss": 0.2638, + "step": 8885 + }, + { + "epoch": 0.41, + "learning_rate": 1.8764441239634442e-05, + "loss": 0.3133, + "step": 8890 + }, + { + "epoch": 0.42, + "learning_rate": 1.876365745457966e-05, + "loss": 0.3542, + "step": 8895 + }, + { + "epoch": 0.42, + "learning_rate": 1.876287366952487e-05, + "loss": 0.7553, + "step": 8900 + }, + { + "epoch": 0.42, + "learning_rate": 1.8762089884470083e-05, + "loss": 0.2799, + "step": 8905 + }, + { + "epoch": 0.42, + "learning_rate": 1.8761306099415297e-05, + "loss": 0.1592, + "step": 8910 + }, + { + "epoch": 0.42, + "learning_rate": 1.876052231436051e-05, + "loss": 0.1252, + "step": 8915 + }, + { + "epoch": 0.42, + "learning_rate": 1.8759738529305725e-05, + "loss": 0.1886, + "step": 8920 + }, + { + "epoch": 0.42, + "learning_rate": 1.875895474425094e-05, + "loss": 0.2613, + "step": 8925 + }, + { + "epoch": 0.42, + "learning_rate": 1.8758170959196153e-05, + "loss": 0.3106, + "step": 8930 + }, + { + "epoch": 0.42, + "learning_rate": 1.8757387174141367e-05, + "loss": 0.2214, + "step": 8935 + }, + { + "epoch": 0.42, + "learning_rate": 1.8756603389086577e-05, + "loss": 0.3003, + "step": 8940 + }, + { + "epoch": 0.42, + "learning_rate": 1.875581960403179e-05, + "loss": 0.3717, + "step": 8945 + }, + { + "epoch": 0.42, + "learning_rate": 1.8755035818977005e-05, + "loss": 0.5338, + "step": 8950 + }, + { + "epoch": 0.42, + "learning_rate": 1.875425203392222e-05, + "loss": 0.2474, + "step": 8955 + }, + { + "epoch": 0.42, + "learning_rate": 1.8753468248867433e-05, + "loss": 0.1502, + "step": 8960 + }, + { + "epoch": 0.42, + "learning_rate": 1.8752684463812644e-05, + "loss": 0.1918, + "step": 8965 + }, + { + "epoch": 0.42, + "learning_rate": 1.875190067875786e-05, + "loss": 0.1617, + "step": 8970 + }, + { + "epoch": 0.42, + "learning_rate": 1.875111689370307e-05, + "loss": 0.2153, + "step": 8975 + }, + { + "epoch": 0.42, + "learning_rate": 1.8750333108648285e-05, + "loss": 0.2767, + "step": 8980 + }, + { + "epoch": 0.42, + "learning_rate": 1.87495493235935e-05, + "loss": 0.2624, + "step": 8985 + }, + { + "epoch": 0.42, + "learning_rate": 1.8748765538538713e-05, + "loss": 0.3624, + "step": 8990 + }, + { + "epoch": 0.42, + "learning_rate": 1.8747981753483927e-05, + "loss": 0.4102, + "step": 8995 + }, + { + "epoch": 0.42, + "learning_rate": 1.874719796842914e-05, + "loss": 0.6647, + "step": 9000 + }, + { + "epoch": 0.42, + "learning_rate": 1.874641418337435e-05, + "loss": 0.16, + "step": 9005 + }, + { + "epoch": 0.42, + "learning_rate": 1.8745630398319565e-05, + "loss": 0.1188, + "step": 9010 + }, + { + "epoch": 0.42, + "learning_rate": 1.874484661326478e-05, + "loss": 0.1938, + "step": 9015 + }, + { + "epoch": 0.42, + "learning_rate": 1.8744062828209993e-05, + "loss": 0.2017, + "step": 9020 + }, + { + "epoch": 0.42, + "learning_rate": 1.8743279043155207e-05, + "loss": 0.2465, + "step": 9025 + }, + { + "epoch": 0.42, + "learning_rate": 1.874249525810042e-05, + "loss": 0.2671, + "step": 9030 + }, + { + "epoch": 0.42, + "learning_rate": 1.8741711473045635e-05, + "loss": 0.1911, + "step": 9035 + }, + { + "epoch": 0.42, + "learning_rate": 1.8740927687990845e-05, + "loss": 0.3654, + "step": 9040 + }, + { + "epoch": 0.42, + "learning_rate": 1.8740143902936063e-05, + "loss": 0.3904, + "step": 9045 + }, + { + "epoch": 0.42, + "learning_rate": 1.8739360117881273e-05, + "loss": 0.5258, + "step": 9050 + }, + { + "epoch": 0.42, + "learning_rate": 1.8738576332826487e-05, + "loss": 0.2227, + "step": 9055 + }, + { + "epoch": 0.42, + "learning_rate": 1.87377925477717e-05, + "loss": 0.0826, + "step": 9060 + }, + { + "epoch": 0.42, + "learning_rate": 1.8737008762716915e-05, + "loss": 0.1681, + "step": 9065 + }, + { + "epoch": 0.42, + "learning_rate": 1.873622497766213e-05, + "loss": 0.2063, + "step": 9070 + }, + { + "epoch": 0.42, + "learning_rate": 1.873544119260734e-05, + "loss": 0.2334, + "step": 9075 + }, + { + "epoch": 0.42, + "learning_rate": 1.8734657407552553e-05, + "loss": 0.2478, + "step": 9080 + }, + { + "epoch": 0.42, + "learning_rate": 1.8733873622497767e-05, + "loss": 0.3684, + "step": 9085 + }, + { + "epoch": 0.42, + "learning_rate": 1.873308983744298e-05, + "loss": 0.292, + "step": 9090 + }, + { + "epoch": 0.42, + "learning_rate": 1.8732306052388195e-05, + "loss": 0.2457, + "step": 9095 + }, + { + "epoch": 0.42, + "learning_rate": 1.873152226733341e-05, + "loss": 0.5138, + "step": 9100 + }, + { + "epoch": 0.42, + "learning_rate": 1.873073848227862e-05, + "loss": 0.3402, + "step": 9105 + }, + { + "epoch": 0.43, + "learning_rate": 1.8729954697223837e-05, + "loss": 0.1397, + "step": 9110 + }, + { + "epoch": 0.43, + "learning_rate": 1.8729170912169047e-05, + "loss": 0.1392, + "step": 9115 + }, + { + "epoch": 0.43, + "learning_rate": 1.872838712711426e-05, + "loss": 0.109, + "step": 9120 + }, + { + "epoch": 0.43, + "learning_rate": 1.8727603342059475e-05, + "loss": 0.2441, + "step": 9125 + }, + { + "epoch": 0.43, + "learning_rate": 1.872681955700469e-05, + "loss": 0.2452, + "step": 9130 + }, + { + "epoch": 0.43, + "learning_rate": 1.8726035771949903e-05, + "loss": 0.2466, + "step": 9135 + }, + { + "epoch": 0.43, + "learning_rate": 1.8725251986895113e-05, + "loss": 0.3464, + "step": 9140 + }, + { + "epoch": 0.43, + "learning_rate": 1.872446820184033e-05, + "loss": 0.4602, + "step": 9145 + }, + { + "epoch": 0.43, + "learning_rate": 1.872368441678554e-05, + "loss": 0.6565, + "step": 9150 + }, + { + "epoch": 0.43, + "learning_rate": 1.8722900631730755e-05, + "loss": 0.3206, + "step": 9155 + }, + { + "epoch": 0.43, + "learning_rate": 1.872211684667597e-05, + "loss": 0.0716, + "step": 9160 + }, + { + "epoch": 0.43, + "learning_rate": 1.8721333061621183e-05, + "loss": 0.1231, + "step": 9165 + }, + { + "epoch": 0.43, + "learning_rate": 1.8720549276566397e-05, + "loss": 0.2065, + "step": 9170 + }, + { + "epoch": 0.43, + "learning_rate": 1.871976549151161e-05, + "loss": 0.1649, + "step": 9175 + }, + { + "epoch": 0.43, + "learning_rate": 1.871898170645682e-05, + "loss": 0.1393, + "step": 9180 + }, + { + "epoch": 0.43, + "learning_rate": 1.871819792140204e-05, + "loss": 0.2792, + "step": 9185 + }, + { + "epoch": 0.43, + "learning_rate": 1.871741413634725e-05, + "loss": 0.3291, + "step": 9190 + }, + { + "epoch": 0.43, + "learning_rate": 1.8716630351292463e-05, + "loss": 0.294, + "step": 9195 + }, + { + "epoch": 0.43, + "learning_rate": 1.8715846566237677e-05, + "loss": 0.6407, + "step": 9200 + }, + { + "epoch": 0.43, + "learning_rate": 1.8715062781182887e-05, + "loss": 0.2999, + "step": 9205 + }, + { + "epoch": 0.43, + "learning_rate": 1.8714278996128105e-05, + "loss": 0.142, + "step": 9210 + }, + { + "epoch": 0.43, + "learning_rate": 1.8713495211073315e-05, + "loss": 0.2173, + "step": 9215 + }, + { + "epoch": 0.43, + "learning_rate": 1.871271142601853e-05, + "loss": 0.2285, + "step": 9220 + }, + { + "epoch": 0.43, + "learning_rate": 1.8711927640963743e-05, + "loss": 0.1986, + "step": 9225 + }, + { + "epoch": 0.43, + "learning_rate": 1.8711143855908957e-05, + "loss": 0.2207, + "step": 9230 + }, + { + "epoch": 0.43, + "learning_rate": 1.871036007085417e-05, + "loss": 0.2179, + "step": 9235 + }, + { + "epoch": 0.43, + "learning_rate": 1.8709576285799385e-05, + "loss": 0.2123, + "step": 9240 + }, + { + "epoch": 0.43, + "learning_rate": 1.87087925007446e-05, + "loss": 0.2957, + "step": 9245 + }, + { + "epoch": 0.43, + "learning_rate": 1.8708008715689812e-05, + "loss": 0.6045, + "step": 9250 + }, + { + "epoch": 0.43, + "learning_rate": 1.8707224930635023e-05, + "loss": 0.2722, + "step": 9255 + }, + { + "epoch": 0.43, + "learning_rate": 1.870644114558024e-05, + "loss": 0.2209, + "step": 9260 + }, + { + "epoch": 0.43, + "learning_rate": 1.870565736052545e-05, + "loss": 0.1307, + "step": 9265 + }, + { + "epoch": 0.43, + "learning_rate": 1.8704873575470665e-05, + "loss": 0.152, + "step": 9270 + }, + { + "epoch": 0.43, + "learning_rate": 1.870408979041588e-05, + "loss": 0.1983, + "step": 9275 + }, + { + "epoch": 0.43, + "learning_rate": 1.870330600536109e-05, + "loss": 0.2012, + "step": 9280 + }, + { + "epoch": 0.43, + "learning_rate": 1.8702522220306306e-05, + "loss": 0.6512, + "step": 9285 + }, + { + "epoch": 0.43, + "learning_rate": 1.8701738435251517e-05, + "loss": 0.3317, + "step": 9290 + }, + { + "epoch": 0.43, + "learning_rate": 1.870095465019673e-05, + "loss": 0.4515, + "step": 9295 + }, + { + "epoch": 0.43, + "learning_rate": 1.8700170865141945e-05, + "loss": 0.7104, + "step": 9300 + }, + { + "epoch": 0.43, + "learning_rate": 1.869938708008716e-05, + "loss": 0.2689, + "step": 9305 + }, + { + "epoch": 0.43, + "learning_rate": 1.8698603295032373e-05, + "loss": 0.1028, + "step": 9310 + }, + { + "epoch": 0.43, + "learning_rate": 1.8697819509977586e-05, + "loss": 0.1626, + "step": 9315 + }, + { + "epoch": 0.43, + "learning_rate": 1.8697035724922797e-05, + "loss": 0.2462, + "step": 9320 + }, + { + "epoch": 0.44, + "learning_rate": 1.8696251939868014e-05, + "loss": 0.1763, + "step": 9325 + }, + { + "epoch": 0.44, + "learning_rate": 1.8695468154813225e-05, + "loss": 0.3345, + "step": 9330 + }, + { + "epoch": 0.44, + "learning_rate": 1.869468436975844e-05, + "loss": 0.2613, + "step": 9335 + }, + { + "epoch": 0.44, + "learning_rate": 1.8693900584703653e-05, + "loss": 0.3072, + "step": 9340 + }, + { + "epoch": 0.44, + "learning_rate": 1.8693116799648866e-05, + "loss": 0.3295, + "step": 9345 + }, + { + "epoch": 0.44, + "learning_rate": 1.869233301459408e-05, + "loss": 0.6022, + "step": 9350 + }, + { + "epoch": 0.44, + "learning_rate": 1.869154922953929e-05, + "loss": 0.3035, + "step": 9355 + }, + { + "epoch": 0.44, + "learning_rate": 1.8690765444484508e-05, + "loss": 0.2063, + "step": 9360 + }, + { + "epoch": 0.44, + "learning_rate": 1.868998165942972e-05, + "loss": 0.2401, + "step": 9365 + }, + { + "epoch": 0.44, + "learning_rate": 1.8689197874374933e-05, + "loss": 0.1846, + "step": 9370 + }, + { + "epoch": 0.44, + "learning_rate": 1.8688414089320147e-05, + "loss": 0.1649, + "step": 9375 + }, + { + "epoch": 0.44, + "learning_rate": 1.868763030426536e-05, + "loss": 0.1734, + "step": 9380 + }, + { + "epoch": 0.44, + "learning_rate": 1.8686846519210574e-05, + "loss": 0.2245, + "step": 9385 + }, + { + "epoch": 0.44, + "learning_rate": 1.8686062734155788e-05, + "loss": 0.2765, + "step": 9390 + }, + { + "epoch": 0.44, + "learning_rate": 1.8685278949101e-05, + "loss": 0.3261, + "step": 9395 + }, + { + "epoch": 0.44, + "learning_rate": 1.8684495164046213e-05, + "loss": 0.5533, + "step": 9400 + }, + { + "epoch": 0.44, + "learning_rate": 1.8683711378991427e-05, + "loss": 0.2859, + "step": 9405 + }, + { + "epoch": 0.44, + "learning_rate": 1.868292759393664e-05, + "loss": 0.0899, + "step": 9410 + }, + { + "epoch": 0.44, + "learning_rate": 1.8682143808881854e-05, + "loss": 0.1552, + "step": 9415 + }, + { + "epoch": 0.44, + "learning_rate": 1.8681360023827065e-05, + "loss": 0.1668, + "step": 9420 + }, + { + "epoch": 0.44, + "learning_rate": 1.8680576238772282e-05, + "loss": 0.2532, + "step": 9425 + }, + { + "epoch": 0.44, + "learning_rate": 1.8679792453717493e-05, + "loss": 0.254, + "step": 9430 + }, + { + "epoch": 0.44, + "learning_rate": 1.8679008668662707e-05, + "loss": 0.2807, + "step": 9435 + }, + { + "epoch": 0.44, + "learning_rate": 1.867822488360792e-05, + "loss": 0.506, + "step": 9440 + }, + { + "epoch": 0.44, + "learning_rate": 1.8677441098553134e-05, + "loss": 0.3954, + "step": 9445 + }, + { + "epoch": 0.44, + "learning_rate": 1.867665731349835e-05, + "loss": 0.5384, + "step": 9450 + }, + { + "epoch": 0.44, + "learning_rate": 1.8675873528443562e-05, + "loss": 0.1873, + "step": 9455 + }, + { + "epoch": 0.44, + "learning_rate": 1.8675089743388776e-05, + "loss": 0.1488, + "step": 9460 + }, + { + "epoch": 0.44, + "learning_rate": 1.8674305958333987e-05, + "loss": 0.1511, + "step": 9465 + }, + { + "epoch": 0.44, + "learning_rate": 1.86735221732792e-05, + "loss": 0.17, + "step": 9470 + }, + { + "epoch": 0.44, + "learning_rate": 1.8672738388224414e-05, + "loss": 0.1869, + "step": 9475 + }, + { + "epoch": 0.44, + "learning_rate": 1.867195460316963e-05, + "loss": 0.2043, + "step": 9480 + }, + { + "epoch": 0.44, + "learning_rate": 1.8671170818114842e-05, + "loss": 0.3333, + "step": 9485 + }, + { + "epoch": 0.44, + "learning_rate": 1.8670387033060056e-05, + "loss": 0.3531, + "step": 9490 + }, + { + "epoch": 0.44, + "learning_rate": 1.8669603248005267e-05, + "loss": 0.5792, + "step": 9495 + }, + { + "epoch": 0.44, + "learning_rate": 1.8668819462950484e-05, + "loss": 0.6072, + "step": 9500 + }, + { + "epoch": 0.44, + "learning_rate": 1.8668035677895695e-05, + "loss": 0.3354, + "step": 9505 + }, + { + "epoch": 0.44, + "learning_rate": 1.866725189284091e-05, + "loss": 0.081, + "step": 9510 + }, + { + "epoch": 0.44, + "learning_rate": 1.8666468107786122e-05, + "loss": 0.1598, + "step": 9515 + }, + { + "epoch": 0.44, + "learning_rate": 1.8665684322731336e-05, + "loss": 0.1325, + "step": 9520 + }, + { + "epoch": 0.44, + "learning_rate": 1.866490053767655e-05, + "loss": 0.243, + "step": 9525 + }, + { + "epoch": 0.44, + "learning_rate": 1.866411675262176e-05, + "loss": 0.256, + "step": 9530 + }, + { + "epoch": 0.44, + "learning_rate": 1.8663332967566975e-05, + "loss": 0.2464, + "step": 9535 + }, + { + "epoch": 0.45, + "learning_rate": 1.866254918251219e-05, + "loss": 0.3764, + "step": 9540 + }, + { + "epoch": 0.45, + "learning_rate": 1.8661765397457402e-05, + "loss": 0.3959, + "step": 9545 + }, + { + "epoch": 0.45, + "learning_rate": 1.8660981612402616e-05, + "loss": 0.8095, + "step": 9550 + }, + { + "epoch": 0.45, + "learning_rate": 1.866019782734783e-05, + "loss": 0.2742, + "step": 9555 + }, + { + "epoch": 0.45, + "learning_rate": 1.8659414042293044e-05, + "loss": 0.1321, + "step": 9560 + }, + { + "epoch": 0.45, + "learning_rate": 1.8658630257238258e-05, + "loss": 0.1742, + "step": 9565 + }, + { + "epoch": 0.45, + "learning_rate": 1.865784647218347e-05, + "loss": 0.1818, + "step": 9570 + }, + { + "epoch": 0.45, + "learning_rate": 1.8657062687128686e-05, + "loss": 0.1883, + "step": 9575 + }, + { + "epoch": 0.45, + "learning_rate": 1.8656278902073896e-05, + "loss": 0.2709, + "step": 9580 + }, + { + "epoch": 0.45, + "learning_rate": 1.865549511701911e-05, + "loss": 0.2502, + "step": 9585 + }, + { + "epoch": 0.45, + "learning_rate": 1.8654711331964324e-05, + "loss": 0.3082, + "step": 9590 + }, + { + "epoch": 0.45, + "learning_rate": 1.8653927546909535e-05, + "loss": 0.3196, + "step": 9595 + }, + { + "epoch": 0.45, + "learning_rate": 1.8653143761854752e-05, + "loss": 0.8118, + "step": 9600 + }, + { + "epoch": 0.45, + "learning_rate": 1.8652359976799962e-05, + "loss": 0.2884, + "step": 9605 + }, + { + "epoch": 0.45, + "learning_rate": 1.8651576191745176e-05, + "loss": 0.1232, + "step": 9610 + }, + { + "epoch": 0.45, + "learning_rate": 1.865079240669039e-05, + "loss": 0.1453, + "step": 9615 + }, + { + "epoch": 0.45, + "learning_rate": 1.8650008621635604e-05, + "loss": 0.1877, + "step": 9620 + }, + { + "epoch": 0.45, + "learning_rate": 1.8649224836580818e-05, + "loss": 0.2546, + "step": 9625 + }, + { + "epoch": 0.45, + "learning_rate": 1.8648441051526032e-05, + "loss": 0.1846, + "step": 9630 + }, + { + "epoch": 0.45, + "learning_rate": 1.8647657266471243e-05, + "loss": 0.3649, + "step": 9635 + }, + { + "epoch": 0.45, + "learning_rate": 1.864687348141646e-05, + "loss": 0.3054, + "step": 9640 + }, + { + "epoch": 0.45, + "learning_rate": 1.864608969636167e-05, + "loss": 0.2868, + "step": 9645 + }, + { + "epoch": 0.45, + "learning_rate": 1.8645305911306884e-05, + "loss": 0.6246, + "step": 9650 + }, + { + "epoch": 0.45, + "learning_rate": 1.8644522126252098e-05, + "loss": 0.2241, + "step": 9655 + }, + { + "epoch": 0.45, + "learning_rate": 1.8643738341197312e-05, + "loss": 0.1827, + "step": 9660 + }, + { + "epoch": 0.45, + "learning_rate": 1.8642954556142526e-05, + "loss": 0.1333, + "step": 9665 + }, + { + "epoch": 0.45, + "learning_rate": 1.8642170771087736e-05, + "loss": 0.2136, + "step": 9670 + }, + { + "epoch": 0.45, + "learning_rate": 1.8641386986032954e-05, + "loss": 0.164, + "step": 9675 + }, + { + "epoch": 0.45, + "learning_rate": 1.8640603200978164e-05, + "loss": 0.2427, + "step": 9680 + }, + { + "epoch": 0.45, + "learning_rate": 1.8639819415923378e-05, + "loss": 0.2968, + "step": 9685 + }, + { + "epoch": 0.45, + "learning_rate": 1.8639035630868592e-05, + "loss": 0.3661, + "step": 9690 + }, + { + "epoch": 0.45, + "learning_rate": 1.8638251845813806e-05, + "loss": 0.3389, + "step": 9695 + }, + { + "epoch": 0.45, + "learning_rate": 1.863746806075902e-05, + "loss": 0.8643, + "step": 9700 + }, + { + "epoch": 0.45, + "learning_rate": 1.8636684275704234e-05, + "loss": 0.2849, + "step": 9705 + }, + { + "epoch": 0.45, + "learning_rate": 1.8635900490649444e-05, + "loss": 0.1267, + "step": 9710 + }, + { + "epoch": 0.45, + "learning_rate": 1.863511670559466e-05, + "loss": 0.1608, + "step": 9715 + }, + { + "epoch": 0.45, + "learning_rate": 1.8634332920539872e-05, + "loss": 0.1116, + "step": 9720 + }, + { + "epoch": 0.45, + "learning_rate": 1.8633549135485086e-05, + "loss": 0.1512, + "step": 9725 + }, + { + "epoch": 0.45, + "learning_rate": 1.86327653504303e-05, + "loss": 0.2167, + "step": 9730 + }, + { + "epoch": 0.45, + "learning_rate": 1.863198156537551e-05, + "loss": 0.2379, + "step": 9735 + }, + { + "epoch": 0.45, + "learning_rate": 1.8631197780320728e-05, + "loss": 0.3606, + "step": 9740 + }, + { + "epoch": 0.45, + "learning_rate": 1.8630413995265938e-05, + "loss": 0.3912, + "step": 9745 + }, + { + "epoch": 0.45, + "learning_rate": 1.8629630210211152e-05, + "loss": 0.5597, + "step": 9750 + }, + { + "epoch": 0.46, + "learning_rate": 1.8628846425156366e-05, + "loss": 0.3225, + "step": 9755 + }, + { + "epoch": 0.46, + "learning_rate": 1.862806264010158e-05, + "loss": 0.1031, + "step": 9760 + }, + { + "epoch": 0.46, + "learning_rate": 1.8627278855046794e-05, + "loss": 0.2045, + "step": 9765 + }, + { + "epoch": 0.46, + "learning_rate": 1.8626495069992008e-05, + "loss": 0.1208, + "step": 9770 + }, + { + "epoch": 0.46, + "learning_rate": 1.8625711284937222e-05, + "loss": 0.2459, + "step": 9775 + }, + { + "epoch": 0.46, + "learning_rate": 1.8624927499882436e-05, + "loss": 0.1681, + "step": 9780 + }, + { + "epoch": 0.46, + "learning_rate": 1.8624143714827646e-05, + "loss": 0.4399, + "step": 9785 + }, + { + "epoch": 0.46, + "learning_rate": 1.862335992977286e-05, + "loss": 0.2947, + "step": 9790 + }, + { + "epoch": 0.46, + "learning_rate": 1.8622576144718074e-05, + "loss": 0.4561, + "step": 9795 + }, + { + "epoch": 0.46, + "learning_rate": 1.8621792359663288e-05, + "loss": 0.6068, + "step": 9800 + }, + { + "epoch": 0.46, + "learning_rate": 1.8621008574608502e-05, + "loss": 0.2161, + "step": 9805 + }, + { + "epoch": 0.46, + "learning_rate": 1.8620224789553712e-05, + "loss": 0.1146, + "step": 9810 + }, + { + "epoch": 0.46, + "learning_rate": 1.861944100449893e-05, + "loss": 0.1992, + "step": 9815 + }, + { + "epoch": 0.46, + "learning_rate": 1.861865721944414e-05, + "loss": 0.2146, + "step": 9820 + }, + { + "epoch": 0.46, + "learning_rate": 1.8617873434389354e-05, + "loss": 0.1645, + "step": 9825 + }, + { + "epoch": 0.46, + "learning_rate": 1.8617089649334568e-05, + "loss": 0.2525, + "step": 9830 + }, + { + "epoch": 0.46, + "learning_rate": 1.8616305864279782e-05, + "loss": 0.2867, + "step": 9835 + }, + { + "epoch": 0.46, + "learning_rate": 1.8615522079224996e-05, + "loss": 0.3965, + "step": 9840 + }, + { + "epoch": 0.46, + "learning_rate": 1.861473829417021e-05, + "loss": 0.316, + "step": 9845 + }, + { + "epoch": 0.46, + "learning_rate": 1.861395450911542e-05, + "loss": 0.4794, + "step": 9850 + }, + { + "epoch": 0.46, + "learning_rate": 1.8613170724060634e-05, + "loss": 0.2089, + "step": 9855 + }, + { + "epoch": 0.46, + "learning_rate": 1.8612386939005848e-05, + "loss": 0.0973, + "step": 9860 + }, + { + "epoch": 0.46, + "learning_rate": 1.8611603153951062e-05, + "loss": 0.1503, + "step": 9865 + }, + { + "epoch": 0.46, + "learning_rate": 1.8610819368896276e-05, + "loss": 0.1923, + "step": 9870 + }, + { + "epoch": 0.46, + "learning_rate": 1.861003558384149e-05, + "loss": 0.2079, + "step": 9875 + }, + { + "epoch": 0.46, + "learning_rate": 1.8609251798786704e-05, + "loss": 0.2932, + "step": 9880 + }, + { + "epoch": 0.46, + "learning_rate": 1.8608468013731914e-05, + "loss": 0.2118, + "step": 9885 + }, + { + "epoch": 0.46, + "learning_rate": 1.860768422867713e-05, + "loss": 0.4529, + "step": 9890 + }, + { + "epoch": 0.46, + "learning_rate": 1.8606900443622342e-05, + "loss": 0.4992, + "step": 9895 + }, + { + "epoch": 0.46, + "learning_rate": 1.8606116658567556e-05, + "loss": 0.7247, + "step": 9900 + }, + { + "epoch": 0.46, + "learning_rate": 1.860533287351277e-05, + "loss": 0.2615, + "step": 9905 + }, + { + "epoch": 0.46, + "learning_rate": 1.8604549088457984e-05, + "loss": 0.1337, + "step": 9910 + }, + { + "epoch": 0.46, + "learning_rate": 1.8603765303403198e-05, + "loss": 0.1473, + "step": 9915 + }, + { + "epoch": 0.46, + "learning_rate": 1.8602981518348408e-05, + "loss": 0.2391, + "step": 9920 + }, + { + "epoch": 0.46, + "learning_rate": 1.8602197733293622e-05, + "loss": 0.2209, + "step": 9925 + }, + { + "epoch": 0.46, + "learning_rate": 1.8601413948238836e-05, + "loss": 0.2441, + "step": 9930 + }, + { + "epoch": 0.46, + "learning_rate": 1.860063016318405e-05, + "loss": 0.2697, + "step": 9935 + }, + { + "epoch": 0.46, + "learning_rate": 1.8599846378129264e-05, + "loss": 0.3542, + "step": 9940 + }, + { + "epoch": 0.46, + "learning_rate": 1.8599062593074478e-05, + "loss": 0.3546, + "step": 9945 + }, + { + "epoch": 0.46, + "learning_rate": 1.8598278808019688e-05, + "loss": 0.8018, + "step": 9950 + }, + { + "epoch": 0.46, + "learning_rate": 1.8597495022964905e-05, + "loss": 0.2301, + "step": 9955 + }, + { + "epoch": 0.46, + "learning_rate": 1.8596711237910116e-05, + "loss": 0.0913, + "step": 9960 + }, + { + "epoch": 0.46, + "learning_rate": 1.859592745285533e-05, + "loss": 0.1875, + "step": 9965 + }, + { + "epoch": 0.47, + "learning_rate": 1.8595143667800544e-05, + "loss": 0.1521, + "step": 9970 + }, + { + "epoch": 0.47, + "learning_rate": 1.8594359882745758e-05, + "loss": 0.1545, + "step": 9975 + }, + { + "epoch": 0.47, + "learning_rate": 1.859357609769097e-05, + "loss": 0.2105, + "step": 9980 + }, + { + "epoch": 0.47, + "learning_rate": 1.8592792312636182e-05, + "loss": 0.2772, + "step": 9985 + }, + { + "epoch": 0.47, + "learning_rate": 1.85920085275814e-05, + "loss": 0.384, + "step": 9990 + }, + { + "epoch": 0.47, + "learning_rate": 1.859122474252661e-05, + "loss": 0.294, + "step": 9995 + }, + { + "epoch": 0.47, + "learning_rate": 1.8590440957471824e-05, + "loss": 0.5701, + "step": 10000 + }, + { + "epoch": 0.47, + "learning_rate": 1.8589657172417038e-05, + "loss": 0.3018, + "step": 10005 + }, + { + "epoch": 0.47, + "learning_rate": 1.858887338736225e-05, + "loss": 0.146, + "step": 10010 + }, + { + "epoch": 0.47, + "learning_rate": 1.8588089602307465e-05, + "loss": 0.2157, + "step": 10015 + }, + { + "epoch": 0.47, + "learning_rate": 1.858730581725268e-05, + "loss": 0.1809, + "step": 10020 + }, + { + "epoch": 0.47, + "learning_rate": 1.858652203219789e-05, + "loss": 0.1605, + "step": 10025 + }, + { + "epoch": 0.47, + "learning_rate": 1.8585738247143107e-05, + "loss": 0.1612, + "step": 10030 + }, + { + "epoch": 0.47, + "learning_rate": 1.8584954462088318e-05, + "loss": 0.1955, + "step": 10035 + }, + { + "epoch": 0.47, + "learning_rate": 1.858417067703353e-05, + "loss": 0.2136, + "step": 10040 + }, + { + "epoch": 0.47, + "learning_rate": 1.8583386891978746e-05, + "loss": 0.4425, + "step": 10045 + }, + { + "epoch": 0.47, + "learning_rate": 1.8582603106923956e-05, + "loss": 0.5771, + "step": 10050 + }, + { + "epoch": 0.47, + "learning_rate": 1.8581819321869173e-05, + "loss": 0.3124, + "step": 10055 + }, + { + "epoch": 0.47, + "learning_rate": 1.8581035536814384e-05, + "loss": 0.1658, + "step": 10060 + }, + { + "epoch": 0.47, + "learning_rate": 1.8580251751759598e-05, + "loss": 0.2146, + "step": 10065 + }, + { + "epoch": 0.47, + "learning_rate": 1.857946796670481e-05, + "loss": 0.1755, + "step": 10070 + }, + { + "epoch": 0.47, + "learning_rate": 1.8578684181650026e-05, + "loss": 0.2154, + "step": 10075 + }, + { + "epoch": 0.47, + "learning_rate": 1.857790039659524e-05, + "loss": 0.1933, + "step": 10080 + }, + { + "epoch": 0.47, + "learning_rate": 1.8577116611540453e-05, + "loss": 0.1655, + "step": 10085 + }, + { + "epoch": 0.47, + "learning_rate": 1.8576332826485667e-05, + "loss": 0.1967, + "step": 10090 + }, + { + "epoch": 0.47, + "learning_rate": 1.857554904143088e-05, + "loss": 0.4955, + "step": 10095 + }, + { + "epoch": 0.47, + "learning_rate": 1.8574765256376092e-05, + "loss": 0.5188, + "step": 10100 + }, + { + "epoch": 0.47, + "learning_rate": 1.857398147132131e-05, + "loss": 0.2023, + "step": 10105 + }, + { + "epoch": 0.47, + "learning_rate": 1.857319768626652e-05, + "loss": 0.1892, + "step": 10110 + }, + { + "epoch": 0.47, + "learning_rate": 1.8572413901211733e-05, + "loss": 0.1397, + "step": 10115 + }, + { + "epoch": 0.47, + "learning_rate": 1.8571630116156947e-05, + "loss": 0.1333, + "step": 10120 + }, + { + "epoch": 0.47, + "learning_rate": 1.8570846331102158e-05, + "loss": 0.2091, + "step": 10125 + }, + { + "epoch": 0.47, + "learning_rate": 1.8570062546047375e-05, + "loss": 0.1695, + "step": 10130 + }, + { + "epoch": 0.47, + "learning_rate": 1.8569278760992586e-05, + "loss": 0.3251, + "step": 10135 + }, + { + "epoch": 0.47, + "learning_rate": 1.85684949759378e-05, + "loss": 0.3039, + "step": 10140 + }, + { + "epoch": 0.47, + "learning_rate": 1.8567711190883013e-05, + "loss": 0.4027, + "step": 10145 + }, + { + "epoch": 0.47, + "learning_rate": 1.8566927405828227e-05, + "loss": 0.461, + "step": 10150 + }, + { + "epoch": 0.47, + "learning_rate": 1.856614362077344e-05, + "loss": 0.2918, + "step": 10155 + }, + { + "epoch": 0.47, + "learning_rate": 1.8565359835718655e-05, + "loss": 0.1856, + "step": 10160 + }, + { + "epoch": 0.47, + "learning_rate": 1.8564576050663866e-05, + "loss": 0.1539, + "step": 10165 + }, + { + "epoch": 0.47, + "learning_rate": 1.8563792265609083e-05, + "loss": 0.1487, + "step": 10170 + }, + { + "epoch": 0.47, + "learning_rate": 1.8563008480554294e-05, + "loss": 0.1158, + "step": 10175 + }, + { + "epoch": 0.48, + "learning_rate": 1.8562224695499507e-05, + "loss": 0.216, + "step": 10180 + }, + { + "epoch": 0.48, + "learning_rate": 1.856144091044472e-05, + "loss": 0.3013, + "step": 10185 + }, + { + "epoch": 0.48, + "learning_rate": 1.8560657125389935e-05, + "loss": 0.4077, + "step": 10190 + }, + { + "epoch": 0.48, + "learning_rate": 1.855987334033515e-05, + "loss": 0.3365, + "step": 10195 + }, + { + "epoch": 0.48, + "learning_rate": 1.855908955528036e-05, + "loss": 0.4686, + "step": 10200 + }, + { + "epoch": 0.48, + "learning_rate": 1.8558305770225577e-05, + "loss": 0.2812, + "step": 10205 + }, + { + "epoch": 0.48, + "learning_rate": 1.8557521985170787e-05, + "loss": 0.133, + "step": 10210 + }, + { + "epoch": 0.48, + "learning_rate": 1.8556738200116e-05, + "loss": 0.1349, + "step": 10215 + }, + { + "epoch": 0.48, + "learning_rate": 1.8555954415061215e-05, + "loss": 0.1394, + "step": 10220 + }, + { + "epoch": 0.48, + "learning_rate": 1.855517063000643e-05, + "loss": 0.2261, + "step": 10225 + }, + { + "epoch": 0.48, + "learning_rate": 1.8554386844951643e-05, + "loss": 0.1947, + "step": 10230 + }, + { + "epoch": 0.48, + "learning_rate": 1.8553603059896857e-05, + "loss": 0.2436, + "step": 10235 + }, + { + "epoch": 0.48, + "learning_rate": 1.8552819274842068e-05, + "loss": 0.2637, + "step": 10240 + }, + { + "epoch": 0.48, + "learning_rate": 1.855203548978728e-05, + "loss": 0.4449, + "step": 10245 + }, + { + "epoch": 0.48, + "learning_rate": 1.8551251704732495e-05, + "loss": 0.7073, + "step": 10250 + }, + { + "epoch": 0.48, + "learning_rate": 1.855046791967771e-05, + "loss": 0.2425, + "step": 10255 + }, + { + "epoch": 0.48, + "learning_rate": 1.8549684134622923e-05, + "loss": 0.1223, + "step": 10260 + }, + { + "epoch": 0.48, + "learning_rate": 1.8548900349568134e-05, + "loss": 0.1035, + "step": 10265 + }, + { + "epoch": 0.48, + "learning_rate": 1.854811656451335e-05, + "loss": 0.1336, + "step": 10270 + }, + { + "epoch": 0.48, + "learning_rate": 1.854733277945856e-05, + "loss": 0.224, + "step": 10275 + }, + { + "epoch": 0.48, + "learning_rate": 1.8546548994403775e-05, + "loss": 0.2017, + "step": 10280 + }, + { + "epoch": 0.48, + "learning_rate": 1.854576520934899e-05, + "loss": 0.3207, + "step": 10285 + }, + { + "epoch": 0.48, + "learning_rate": 1.8544981424294203e-05, + "loss": 0.3581, + "step": 10290 + }, + { + "epoch": 0.48, + "learning_rate": 1.8544197639239417e-05, + "loss": 0.299, + "step": 10295 + }, + { + "epoch": 0.48, + "learning_rate": 1.854341385418463e-05, + "loss": 0.4774, + "step": 10300 + }, + { + "epoch": 0.48, + "learning_rate": 1.8542630069129845e-05, + "loss": 0.2978, + "step": 10305 + }, + { + "epoch": 0.48, + "learning_rate": 1.8541846284075055e-05, + "loss": 0.1423, + "step": 10310 + }, + { + "epoch": 0.48, + "learning_rate": 1.854106249902027e-05, + "loss": 0.1697, + "step": 10315 + }, + { + "epoch": 0.48, + "learning_rate": 1.8540278713965483e-05, + "loss": 0.1412, + "step": 10320 + }, + { + "epoch": 0.48, + "learning_rate": 1.8539494928910697e-05, + "loss": 0.1674, + "step": 10325 + }, + { + "epoch": 0.48, + "learning_rate": 1.853871114385591e-05, + "loss": 0.2561, + "step": 10330 + }, + { + "epoch": 0.48, + "learning_rate": 1.8537927358801125e-05, + "loss": 0.3232, + "step": 10335 + }, + { + "epoch": 0.48, + "learning_rate": 1.8537143573746335e-05, + "loss": 0.2814, + "step": 10340 + }, + { + "epoch": 0.48, + "learning_rate": 1.8536359788691553e-05, + "loss": 0.4515, + "step": 10345 + }, + { + "epoch": 0.48, + "learning_rate": 1.8535576003636763e-05, + "loss": 0.6846, + "step": 10350 + }, + { + "epoch": 0.48, + "learning_rate": 1.8534792218581977e-05, + "loss": 0.3577, + "step": 10355 + }, + { + "epoch": 0.48, + "learning_rate": 1.853400843352719e-05, + "loss": 0.076, + "step": 10360 + }, + { + "epoch": 0.48, + "learning_rate": 1.8533224648472405e-05, + "loss": 0.2025, + "step": 10365 + }, + { + "epoch": 0.48, + "learning_rate": 1.853244086341762e-05, + "loss": 0.1471, + "step": 10370 + }, + { + "epoch": 0.48, + "learning_rate": 1.853165707836283e-05, + "loss": 0.1286, + "step": 10375 + }, + { + "epoch": 0.48, + "learning_rate": 1.8530873293308043e-05, + "loss": 0.2737, + "step": 10380 + }, + { + "epoch": 0.48, + "learning_rate": 1.8530089508253257e-05, + "loss": 0.2873, + "step": 10385 + }, + { + "epoch": 0.48, + "learning_rate": 1.852930572319847e-05, + "loss": 0.3913, + "step": 10390 + }, + { + "epoch": 0.49, + "learning_rate": 1.8528521938143685e-05, + "loss": 0.4033, + "step": 10395 + }, + { + "epoch": 0.49, + "learning_rate": 1.85277381530889e-05, + "loss": 0.5046, + "step": 10400 + }, + { + "epoch": 0.49, + "learning_rate": 1.8526954368034113e-05, + "loss": 0.2842, + "step": 10405 + }, + { + "epoch": 0.49, + "learning_rate": 1.8526170582979327e-05, + "loss": 0.1708, + "step": 10410 + }, + { + "epoch": 0.49, + "learning_rate": 1.8525386797924537e-05, + "loss": 0.1666, + "step": 10415 + }, + { + "epoch": 0.49, + "learning_rate": 1.8524603012869755e-05, + "loss": 0.1689, + "step": 10420 + }, + { + "epoch": 0.49, + "learning_rate": 1.8523819227814965e-05, + "loss": 0.2137, + "step": 10425 + }, + { + "epoch": 0.49, + "learning_rate": 1.852303544276018e-05, + "loss": 0.2885, + "step": 10430 + }, + { + "epoch": 0.49, + "learning_rate": 1.8522251657705393e-05, + "loss": 0.2818, + "step": 10435 + }, + { + "epoch": 0.49, + "learning_rate": 1.8521467872650603e-05, + "loss": 0.2431, + "step": 10440 + }, + { + "epoch": 0.49, + "learning_rate": 1.852068408759582e-05, + "loss": 0.3849, + "step": 10445 + }, + { + "epoch": 0.49, + "learning_rate": 1.851990030254103e-05, + "loss": 0.6032, + "step": 10450 + }, + { + "epoch": 0.49, + "learning_rate": 1.8519116517486245e-05, + "loss": 0.2557, + "step": 10455 + }, + { + "epoch": 0.49, + "learning_rate": 1.851833273243146e-05, + "loss": 0.1205, + "step": 10460 + }, + { + "epoch": 0.49, + "learning_rate": 1.8517548947376673e-05, + "loss": 0.1141, + "step": 10465 + }, + { + "epoch": 0.49, + "learning_rate": 1.8516765162321887e-05, + "loss": 0.1535, + "step": 10470 + }, + { + "epoch": 0.49, + "learning_rate": 1.85159813772671e-05, + "loss": 0.1693, + "step": 10475 + }, + { + "epoch": 0.49, + "learning_rate": 1.851519759221231e-05, + "loss": 0.2214, + "step": 10480 + }, + { + "epoch": 0.49, + "learning_rate": 1.851441380715753e-05, + "loss": 0.3034, + "step": 10485 + }, + { + "epoch": 0.49, + "learning_rate": 1.851363002210274e-05, + "loss": 0.2867, + "step": 10490 + }, + { + "epoch": 0.49, + "learning_rate": 1.8512846237047953e-05, + "loss": 0.4438, + "step": 10495 + }, + { + "epoch": 0.49, + "learning_rate": 1.8512062451993167e-05, + "loss": 0.614, + "step": 10500 + }, + { + "epoch": 0.49, + "learning_rate": 1.851127866693838e-05, + "loss": 0.2172, + "step": 10505 + }, + { + "epoch": 0.49, + "learning_rate": 1.8510494881883595e-05, + "loss": 0.0805, + "step": 10510 + }, + { + "epoch": 0.49, + "learning_rate": 1.8509711096828805e-05, + "loss": 0.1772, + "step": 10515 + }, + { + "epoch": 0.49, + "learning_rate": 1.8508927311774023e-05, + "loss": 0.1784, + "step": 10520 + }, + { + "epoch": 0.49, + "learning_rate": 1.8508143526719233e-05, + "loss": 0.2444, + "step": 10525 + }, + { + "epoch": 0.49, + "learning_rate": 1.8507359741664447e-05, + "loss": 0.1369, + "step": 10530 + }, + { + "epoch": 0.49, + "learning_rate": 1.850657595660966e-05, + "loss": 0.1954, + "step": 10535 + }, + { + "epoch": 0.49, + "learning_rate": 1.8505792171554875e-05, + "loss": 0.3048, + "step": 10540 + }, + { + "epoch": 0.49, + "learning_rate": 1.850500838650009e-05, + "loss": 0.3967, + "step": 10545 + }, + { + "epoch": 0.49, + "learning_rate": 1.8504224601445303e-05, + "loss": 0.476, + "step": 10550 + }, + { + "epoch": 0.49, + "learning_rate": 1.8503440816390513e-05, + "loss": 0.2726, + "step": 10555 + }, + { + "epoch": 0.49, + "learning_rate": 1.850265703133573e-05, + "loss": 0.0955, + "step": 10560 + }, + { + "epoch": 0.49, + "learning_rate": 1.850187324628094e-05, + "loss": 0.1345, + "step": 10565 + }, + { + "epoch": 0.49, + "learning_rate": 1.8501089461226155e-05, + "loss": 0.2196, + "step": 10570 + }, + { + "epoch": 0.49, + "learning_rate": 1.850030567617137e-05, + "loss": 0.1968, + "step": 10575 + }, + { + "epoch": 0.49, + "learning_rate": 1.849952189111658e-05, + "loss": 0.2663, + "step": 10580 + }, + { + "epoch": 0.49, + "learning_rate": 1.8498738106061797e-05, + "loss": 0.2857, + "step": 10585 + }, + { + "epoch": 0.49, + "learning_rate": 1.8497954321007007e-05, + "loss": 0.3226, + "step": 10590 + }, + { + "epoch": 0.49, + "learning_rate": 1.849717053595222e-05, + "loss": 0.3366, + "step": 10595 + }, + { + "epoch": 0.49, + "learning_rate": 1.8496386750897435e-05, + "loss": 0.4914, + "step": 10600 + }, + { + "epoch": 0.49, + "learning_rate": 1.849560296584265e-05, + "loss": 0.3555, + "step": 10605 + }, + { + "epoch": 0.5, + "learning_rate": 1.8494819180787863e-05, + "loss": 0.1054, + "step": 10610 + }, + { + "epoch": 0.5, + "learning_rate": 1.8494035395733077e-05, + "loss": 0.2007, + "step": 10615 + }, + { + "epoch": 0.5, + "learning_rate": 1.849325161067829e-05, + "loss": 0.2069, + "step": 10620 + }, + { + "epoch": 0.5, + "learning_rate": 1.8492467825623504e-05, + "loss": 0.2225, + "step": 10625 + }, + { + "epoch": 0.5, + "learning_rate": 1.8491684040568715e-05, + "loss": 0.2789, + "step": 10630 + }, + { + "epoch": 0.5, + "learning_rate": 1.849090025551393e-05, + "loss": 0.3114, + "step": 10635 + }, + { + "epoch": 0.5, + "learning_rate": 1.8490116470459143e-05, + "loss": 0.3273, + "step": 10640 + }, + { + "epoch": 0.5, + "learning_rate": 1.8489332685404357e-05, + "loss": 0.2557, + "step": 10645 + }, + { + "epoch": 0.5, + "learning_rate": 1.848854890034957e-05, + "loss": 0.5148, + "step": 10650 + }, + { + "epoch": 0.5, + "learning_rate": 1.848776511529478e-05, + "loss": 0.3063, + "step": 10655 + }, + { + "epoch": 0.5, + "learning_rate": 1.848698133024e-05, + "loss": 0.1092, + "step": 10660 + }, + { + "epoch": 0.5, + "learning_rate": 1.848619754518521e-05, + "loss": 0.1135, + "step": 10665 + }, + { + "epoch": 0.5, + "learning_rate": 1.8485413760130423e-05, + "loss": 0.1765, + "step": 10670 + }, + { + "epoch": 0.5, + "learning_rate": 1.8484629975075637e-05, + "loss": 0.1905, + "step": 10675 + }, + { + "epoch": 0.5, + "learning_rate": 1.848384619002085e-05, + "loss": 0.1588, + "step": 10680 + }, + { + "epoch": 0.5, + "learning_rate": 1.8483062404966064e-05, + "loss": 0.2983, + "step": 10685 + }, + { + "epoch": 0.5, + "learning_rate": 1.848227861991128e-05, + "loss": 0.2593, + "step": 10690 + }, + { + "epoch": 0.5, + "learning_rate": 1.848149483485649e-05, + "loss": 0.3945, + "step": 10695 + }, + { + "epoch": 0.5, + "learning_rate": 1.8480711049801703e-05, + "loss": 0.5359, + "step": 10700 + }, + { + "epoch": 0.5, + "learning_rate": 1.8479927264746917e-05, + "loss": 0.473, + "step": 10705 + }, + { + "epoch": 0.5, + "learning_rate": 1.847914347969213e-05, + "loss": 0.1142, + "step": 10710 + }, + { + "epoch": 0.5, + "learning_rate": 1.8478359694637345e-05, + "loss": 0.1146, + "step": 10715 + }, + { + "epoch": 0.5, + "learning_rate": 1.847757590958256e-05, + "loss": 0.2126, + "step": 10720 + }, + { + "epoch": 0.5, + "learning_rate": 1.8476792124527772e-05, + "loss": 0.1638, + "step": 10725 + }, + { + "epoch": 0.5, + "learning_rate": 1.8476008339472983e-05, + "loss": 0.2529, + "step": 10730 + }, + { + "epoch": 0.5, + "learning_rate": 1.84752245544182e-05, + "loss": 0.2843, + "step": 10735 + }, + { + "epoch": 0.5, + "learning_rate": 1.847444076936341e-05, + "loss": 0.3466, + "step": 10740 + }, + { + "epoch": 0.5, + "learning_rate": 1.8473656984308625e-05, + "loss": 0.3651, + "step": 10745 + }, + { + "epoch": 0.5, + "learning_rate": 1.847287319925384e-05, + "loss": 0.5283, + "step": 10750 + }, + { + "epoch": 0.5, + "learning_rate": 1.8472089414199052e-05, + "loss": 0.2919, + "step": 10755 + }, + { + "epoch": 0.5, + "learning_rate": 1.8471305629144266e-05, + "loss": 0.1133, + "step": 10760 + }, + { + "epoch": 0.5, + "learning_rate": 1.8470521844089477e-05, + "loss": 0.1283, + "step": 10765 + }, + { + "epoch": 0.5, + "learning_rate": 1.846973805903469e-05, + "loss": 0.1331, + "step": 10770 + }, + { + "epoch": 0.5, + "learning_rate": 1.8468954273979905e-05, + "loss": 0.2511, + "step": 10775 + }, + { + "epoch": 0.5, + "learning_rate": 1.846817048892512e-05, + "loss": 0.2337, + "step": 10780 + }, + { + "epoch": 0.5, + "learning_rate": 1.8467386703870332e-05, + "loss": 0.2217, + "step": 10785 + }, + { + "epoch": 0.5, + "learning_rate": 1.8466602918815546e-05, + "loss": 0.4054, + "step": 10790 + }, + { + "epoch": 0.5, + "learning_rate": 1.8465819133760757e-05, + "loss": 0.42, + "step": 10795 + }, + { + "epoch": 0.5, + "learning_rate": 1.8465035348705974e-05, + "loss": 0.4874, + "step": 10800 + }, + { + "epoch": 0.5, + "learning_rate": 1.8464251563651185e-05, + "loss": 0.2858, + "step": 10805 + }, + { + "epoch": 0.5, + "learning_rate": 1.84634677785964e-05, + "loss": 0.1269, + "step": 10810 + }, + { + "epoch": 0.5, + "learning_rate": 1.8462683993541612e-05, + "loss": 0.1334, + "step": 10815 + }, + { + "epoch": 0.5, + "learning_rate": 1.8461900208486826e-05, + "loss": 0.1715, + "step": 10820 + }, + { + "epoch": 0.51, + "learning_rate": 1.846111642343204e-05, + "loss": 0.1632, + "step": 10825 + }, + { + "epoch": 0.51, + "learning_rate": 1.846033263837725e-05, + "loss": 0.1676, + "step": 10830 + }, + { + "epoch": 0.51, + "learning_rate": 1.8459548853322468e-05, + "loss": 0.1878, + "step": 10835 + }, + { + "epoch": 0.51, + "learning_rate": 1.845876506826768e-05, + "loss": 0.2542, + "step": 10840 + }, + { + "epoch": 0.51, + "learning_rate": 1.8457981283212893e-05, + "loss": 0.3347, + "step": 10845 + }, + { + "epoch": 0.51, + "learning_rate": 1.8457197498158106e-05, + "loss": 0.5031, + "step": 10850 + }, + { + "epoch": 0.51, + "learning_rate": 1.845641371310332e-05, + "loss": 0.3013, + "step": 10855 + }, + { + "epoch": 0.51, + "learning_rate": 1.8455629928048534e-05, + "loss": 0.0807, + "step": 10860 + }, + { + "epoch": 0.51, + "learning_rate": 1.8454846142993748e-05, + "loss": 0.1866, + "step": 10865 + }, + { + "epoch": 0.51, + "learning_rate": 1.845406235793896e-05, + "loss": 0.128, + "step": 10870 + }, + { + "epoch": 0.51, + "learning_rate": 1.8453278572884176e-05, + "loss": 0.2177, + "step": 10875 + }, + { + "epoch": 0.51, + "learning_rate": 1.8452494787829386e-05, + "loss": 0.2811, + "step": 10880 + }, + { + "epoch": 0.51, + "learning_rate": 1.84517110027746e-05, + "loss": 0.3075, + "step": 10885 + }, + { + "epoch": 0.51, + "learning_rate": 1.8450927217719814e-05, + "loss": 0.3236, + "step": 10890 + }, + { + "epoch": 0.51, + "learning_rate": 1.8450143432665025e-05, + "loss": 0.2975, + "step": 10895 + }, + { + "epoch": 0.51, + "learning_rate": 1.8449359647610242e-05, + "loss": 0.7018, + "step": 10900 + }, + { + "epoch": 0.51, + "learning_rate": 1.8448575862555453e-05, + "loss": 0.3354, + "step": 10905 + }, + { + "epoch": 0.51, + "learning_rate": 1.8447792077500667e-05, + "loss": 0.1611, + "step": 10910 + }, + { + "epoch": 0.51, + "learning_rate": 1.844700829244588e-05, + "loss": 0.1056, + "step": 10915 + }, + { + "epoch": 0.51, + "learning_rate": 1.8446224507391094e-05, + "loss": 0.1905, + "step": 10920 + }, + { + "epoch": 0.51, + "learning_rate": 1.8445440722336308e-05, + "loss": 0.1444, + "step": 10925 + }, + { + "epoch": 0.51, + "learning_rate": 1.8444656937281522e-05, + "loss": 0.2417, + "step": 10930 + }, + { + "epoch": 0.51, + "learning_rate": 1.8443873152226736e-05, + "loss": 0.2322, + "step": 10935 + }, + { + "epoch": 0.51, + "learning_rate": 1.844308936717195e-05, + "loss": 0.2791, + "step": 10940 + }, + { + "epoch": 0.51, + "learning_rate": 1.844230558211716e-05, + "loss": 0.3903, + "step": 10945 + }, + { + "epoch": 0.51, + "learning_rate": 1.8441521797062378e-05, + "loss": 0.6466, + "step": 10950 + }, + { + "epoch": 0.51, + "learning_rate": 1.8440738012007588e-05, + "loss": 0.2265, + "step": 10955 + }, + { + "epoch": 0.51, + "learning_rate": 1.8439954226952802e-05, + "loss": 0.1035, + "step": 10960 + }, + { + "epoch": 0.51, + "learning_rate": 1.8439170441898016e-05, + "loss": 0.127, + "step": 10965 + }, + { + "epoch": 0.51, + "learning_rate": 1.8438386656843227e-05, + "loss": 0.1504, + "step": 10970 + }, + { + "epoch": 0.51, + "learning_rate": 1.8437602871788444e-05, + "loss": 0.3139, + "step": 10975 + }, + { + "epoch": 0.51, + "learning_rate": 1.8436819086733654e-05, + "loss": 0.2015, + "step": 10980 + }, + { + "epoch": 0.51, + "learning_rate": 1.843603530167887e-05, + "loss": 0.2831, + "step": 10985 + }, + { + "epoch": 0.51, + "learning_rate": 1.8435251516624082e-05, + "loss": 0.4307, + "step": 10990 + }, + { + "epoch": 0.51, + "learning_rate": 1.8434467731569296e-05, + "loss": 0.4474, + "step": 10995 + }, + { + "epoch": 0.51, + "learning_rate": 1.843368394651451e-05, + "loss": 0.4485, + "step": 11000 + }, + { + "epoch": 0.51, + "learning_rate": 1.8432900161459724e-05, + "loss": 0.3155, + "step": 11005 + }, + { + "epoch": 0.51, + "learning_rate": 1.8432116376404934e-05, + "loss": 0.1338, + "step": 11010 + }, + { + "epoch": 0.51, + "learning_rate": 1.8431332591350152e-05, + "loss": 0.1072, + "step": 11015 + }, + { + "epoch": 0.51, + "learning_rate": 1.8430548806295362e-05, + "loss": 0.1568, + "step": 11020 + }, + { + "epoch": 0.51, + "learning_rate": 1.8429765021240576e-05, + "loss": 0.209, + "step": 11025 + }, + { + "epoch": 0.51, + "learning_rate": 1.842898123618579e-05, + "loss": 0.2181, + "step": 11030 + }, + { + "epoch": 0.51, + "learning_rate": 1.8428197451131004e-05, + "loss": 0.1599, + "step": 11035 + }, + { + "epoch": 0.52, + "learning_rate": 1.8427413666076218e-05, + "loss": 0.2345, + "step": 11040 + }, + { + "epoch": 0.52, + "learning_rate": 1.842662988102143e-05, + "loss": 0.5213, + "step": 11045 + }, + { + "epoch": 0.52, + "learning_rate": 1.8425846095966646e-05, + "loss": 0.6337, + "step": 11050 + }, + { + "epoch": 0.52, + "learning_rate": 1.8425062310911856e-05, + "loss": 0.2929, + "step": 11055 + }, + { + "epoch": 0.52, + "learning_rate": 1.842427852585707e-05, + "loss": 0.0707, + "step": 11060 + }, + { + "epoch": 0.52, + "learning_rate": 1.8423494740802284e-05, + "loss": 0.1464, + "step": 11065 + }, + { + "epoch": 0.52, + "learning_rate": 1.8422710955747498e-05, + "loss": 0.1703, + "step": 11070 + }, + { + "epoch": 0.52, + "learning_rate": 1.8421927170692712e-05, + "loss": 0.2093, + "step": 11075 + }, + { + "epoch": 0.52, + "learning_rate": 1.8421143385637926e-05, + "loss": 0.1709, + "step": 11080 + }, + { + "epoch": 0.52, + "learning_rate": 1.8420359600583136e-05, + "loss": 0.2491, + "step": 11085 + }, + { + "epoch": 0.52, + "learning_rate": 1.841957581552835e-05, + "loss": 0.3152, + "step": 11090 + }, + { + "epoch": 0.52, + "learning_rate": 1.8418792030473564e-05, + "loss": 0.3752, + "step": 11095 + }, + { + "epoch": 0.52, + "learning_rate": 1.8418008245418778e-05, + "loss": 0.7978, + "step": 11100 + }, + { + "epoch": 0.52, + "learning_rate": 1.8417224460363992e-05, + "loss": 0.2618, + "step": 11105 + }, + { + "epoch": 0.52, + "learning_rate": 1.8416440675309202e-05, + "loss": 0.121, + "step": 11110 + }, + { + "epoch": 0.52, + "learning_rate": 1.841565689025442e-05, + "loss": 0.3329, + "step": 11115 + }, + { + "epoch": 0.52, + "learning_rate": 1.841487310519963e-05, + "loss": 0.1459, + "step": 11120 + }, + { + "epoch": 0.52, + "learning_rate": 1.8414089320144844e-05, + "loss": 0.1842, + "step": 11125 + }, + { + "epoch": 0.52, + "learning_rate": 1.8413305535090058e-05, + "loss": 0.2167, + "step": 11130 + }, + { + "epoch": 0.52, + "learning_rate": 1.8412521750035272e-05, + "loss": 0.2646, + "step": 11135 + }, + { + "epoch": 0.52, + "learning_rate": 1.8411737964980486e-05, + "loss": 0.3365, + "step": 11140 + }, + { + "epoch": 0.52, + "learning_rate": 1.84109541799257e-05, + "loss": 0.356, + "step": 11145 + }, + { + "epoch": 0.52, + "learning_rate": 1.8410170394870914e-05, + "loss": 0.7355, + "step": 11150 + }, + { + "epoch": 0.52, + "learning_rate": 1.8409386609816124e-05, + "loss": 0.2418, + "step": 11155 + }, + { + "epoch": 0.52, + "learning_rate": 1.8408602824761338e-05, + "loss": 0.091, + "step": 11160 + }, + { + "epoch": 0.52, + "learning_rate": 1.8407819039706552e-05, + "loss": 0.1088, + "step": 11165 + }, + { + "epoch": 0.52, + "learning_rate": 1.8407035254651766e-05, + "loss": 0.2117, + "step": 11170 + }, + { + "epoch": 0.52, + "learning_rate": 1.840625146959698e-05, + "loss": 0.1758, + "step": 11175 + }, + { + "epoch": 0.52, + "learning_rate": 1.8405467684542194e-05, + "loss": 0.2617, + "step": 11180 + }, + { + "epoch": 0.52, + "learning_rate": 1.8404683899487404e-05, + "loss": 0.1872, + "step": 11185 + }, + { + "epoch": 0.52, + "learning_rate": 1.840390011443262e-05, + "loss": 0.2953, + "step": 11190 + }, + { + "epoch": 0.52, + "learning_rate": 1.8403116329377832e-05, + "loss": 0.2921, + "step": 11195 + }, + { + "epoch": 0.52, + "learning_rate": 1.8402332544323046e-05, + "loss": 0.6133, + "step": 11200 + }, + { + "epoch": 0.52, + "learning_rate": 1.840154875926826e-05, + "loss": 0.2085, + "step": 11205 + }, + { + "epoch": 0.52, + "learning_rate": 1.8400764974213474e-05, + "loss": 0.1601, + "step": 11210 + }, + { + "epoch": 0.52, + "learning_rate": 1.8399981189158688e-05, + "loss": 0.1244, + "step": 11215 + }, + { + "epoch": 0.52, + "learning_rate": 1.8399197404103898e-05, + "loss": 0.1695, + "step": 11220 + }, + { + "epoch": 0.52, + "learning_rate": 1.8398413619049112e-05, + "loss": 0.1907, + "step": 11225 + }, + { + "epoch": 0.52, + "learning_rate": 1.8397629833994326e-05, + "loss": 0.2046, + "step": 11230 + }, + { + "epoch": 0.52, + "learning_rate": 1.839684604893954e-05, + "loss": 0.2313, + "step": 11235 + }, + { + "epoch": 0.52, + "learning_rate": 1.8396062263884754e-05, + "loss": 0.4132, + "step": 11240 + }, + { + "epoch": 0.52, + "learning_rate": 1.8395278478829968e-05, + "loss": 0.3662, + "step": 11245 + }, + { + "epoch": 0.52, + "learning_rate": 1.839449469377518e-05, + "loss": 0.6663, + "step": 11250 + }, + { + "epoch": 0.53, + "learning_rate": 1.8393710908720396e-05, + "loss": 0.2829, + "step": 11255 + }, + { + "epoch": 0.53, + "learning_rate": 1.8392927123665606e-05, + "loss": 0.0905, + "step": 11260 + }, + { + "epoch": 0.53, + "learning_rate": 1.8392143338610823e-05, + "loss": 0.1383, + "step": 11265 + }, + { + "epoch": 0.53, + "learning_rate": 1.8391359553556034e-05, + "loss": 0.1256, + "step": 11270 + }, + { + "epoch": 0.53, + "learning_rate": 1.8390575768501248e-05, + "loss": 0.1687, + "step": 11275 + }, + { + "epoch": 0.53, + "learning_rate": 1.838979198344646e-05, + "loss": 0.2381, + "step": 11280 + }, + { + "epoch": 0.53, + "learning_rate": 1.8389008198391672e-05, + "loss": 0.2776, + "step": 11285 + }, + { + "epoch": 0.53, + "learning_rate": 1.838822441333689e-05, + "loss": 0.314, + "step": 11290 + }, + { + "epoch": 0.53, + "learning_rate": 1.83874406282821e-05, + "loss": 0.3639, + "step": 11295 + }, + { + "epoch": 0.53, + "learning_rate": 1.8386656843227314e-05, + "loss": 0.5223, + "step": 11300 + }, + { + "epoch": 0.53, + "learning_rate": 1.8385873058172528e-05, + "loss": 0.1783, + "step": 11305 + }, + { + "epoch": 0.53, + "learning_rate": 1.8385089273117742e-05, + "loss": 0.0991, + "step": 11310 + }, + { + "epoch": 0.53, + "learning_rate": 1.8384305488062956e-05, + "loss": 0.2287, + "step": 11315 + }, + { + "epoch": 0.53, + "learning_rate": 1.838352170300817e-05, + "loss": 0.2312, + "step": 11320 + }, + { + "epoch": 0.53, + "learning_rate": 1.838273791795338e-05, + "loss": 0.2481, + "step": 11325 + }, + { + "epoch": 0.53, + "learning_rate": 1.8381954132898597e-05, + "loss": 0.1717, + "step": 11330 + }, + { + "epoch": 0.53, + "learning_rate": 1.8381170347843808e-05, + "loss": 0.2379, + "step": 11335 + }, + { + "epoch": 0.53, + "learning_rate": 1.8380386562789022e-05, + "loss": 0.3034, + "step": 11340 + }, + { + "epoch": 0.53, + "learning_rate": 1.8379602777734236e-05, + "loss": 0.3773, + "step": 11345 + }, + { + "epoch": 0.53, + "learning_rate": 1.837881899267945e-05, + "loss": 0.6601, + "step": 11350 + }, + { + "epoch": 0.53, + "learning_rate": 1.8378035207624663e-05, + "loss": 0.2288, + "step": 11355 + }, + { + "epoch": 0.53, + "learning_rate": 1.8377251422569874e-05, + "loss": 0.1123, + "step": 11360 + }, + { + "epoch": 0.53, + "learning_rate": 1.837646763751509e-05, + "loss": 0.158, + "step": 11365 + }, + { + "epoch": 0.53, + "learning_rate": 1.8375683852460302e-05, + "loss": 0.1854, + "step": 11370 + }, + { + "epoch": 0.53, + "learning_rate": 1.8374900067405516e-05, + "loss": 0.1726, + "step": 11375 + }, + { + "epoch": 0.53, + "learning_rate": 1.837411628235073e-05, + "loss": 0.2843, + "step": 11380 + }, + { + "epoch": 0.53, + "learning_rate": 1.8373332497295944e-05, + "loss": 0.2421, + "step": 11385 + }, + { + "epoch": 0.53, + "learning_rate": 1.8372548712241157e-05, + "loss": 0.3232, + "step": 11390 + }, + { + "epoch": 0.53, + "learning_rate": 1.837176492718637e-05, + "loss": 0.4045, + "step": 11395 + }, + { + "epoch": 0.53, + "learning_rate": 1.8370981142131582e-05, + "loss": 0.5567, + "step": 11400 + }, + { + "epoch": 0.53, + "learning_rate": 1.83701973570768e-05, + "loss": 0.2517, + "step": 11405 + }, + { + "epoch": 0.53, + "learning_rate": 1.836941357202201e-05, + "loss": 0.0982, + "step": 11410 + }, + { + "epoch": 0.53, + "learning_rate": 1.8368629786967224e-05, + "loss": 0.1019, + "step": 11415 + }, + { + "epoch": 0.53, + "learning_rate": 1.8367846001912437e-05, + "loss": 0.1961, + "step": 11420 + }, + { + "epoch": 0.53, + "learning_rate": 1.8367062216857648e-05, + "loss": 0.1333, + "step": 11425 + }, + { + "epoch": 0.53, + "learning_rate": 1.8366278431802865e-05, + "loss": 0.1751, + "step": 11430 + }, + { + "epoch": 0.53, + "learning_rate": 1.8365494646748076e-05, + "loss": 0.2277, + "step": 11435 + }, + { + "epoch": 0.53, + "learning_rate": 1.836471086169329e-05, + "loss": 0.2504, + "step": 11440 + }, + { + "epoch": 0.53, + "learning_rate": 1.8363927076638504e-05, + "loss": 0.4636, + "step": 11445 + }, + { + "epoch": 0.53, + "learning_rate": 1.8363143291583718e-05, + "loss": 0.5295, + "step": 11450 + }, + { + "epoch": 0.53, + "learning_rate": 1.836235950652893e-05, + "loss": 0.2709, + "step": 11455 + }, + { + "epoch": 0.53, + "learning_rate": 1.8361575721474145e-05, + "loss": 0.0808, + "step": 11460 + }, + { + "epoch": 0.53, + "learning_rate": 1.836079193641936e-05, + "loss": 0.1669, + "step": 11465 + }, + { + "epoch": 0.54, + "learning_rate": 1.8360008151364573e-05, + "loss": 0.1414, + "step": 11470 + }, + { + "epoch": 0.54, + "learning_rate": 1.8359224366309784e-05, + "loss": 0.2378, + "step": 11475 + }, + { + "epoch": 0.54, + "learning_rate": 1.8358440581254998e-05, + "loss": 0.2094, + "step": 11480 + }, + { + "epoch": 0.54, + "learning_rate": 1.835765679620021e-05, + "loss": 0.193, + "step": 11485 + }, + { + "epoch": 0.54, + "learning_rate": 1.8356873011145425e-05, + "loss": 0.3476, + "step": 11490 + }, + { + "epoch": 0.54, + "learning_rate": 1.835608922609064e-05, + "loss": 0.416, + "step": 11495 + }, + { + "epoch": 0.54, + "learning_rate": 1.835530544103585e-05, + "loss": 0.5145, + "step": 11500 + }, + { + "epoch": 0.54, + "learning_rate": 1.8354521655981067e-05, + "loss": 0.2958, + "step": 11505 + }, + { + "epoch": 0.54, + "learning_rate": 1.8353737870926278e-05, + "loss": 0.0655, + "step": 11510 + }, + { + "epoch": 0.54, + "learning_rate": 1.835295408587149e-05, + "loss": 0.1104, + "step": 11515 + }, + { + "epoch": 0.54, + "learning_rate": 1.8352170300816705e-05, + "loss": 0.1485, + "step": 11520 + }, + { + "epoch": 0.54, + "learning_rate": 1.835138651576192e-05, + "loss": 0.1616, + "step": 11525 + }, + { + "epoch": 0.54, + "learning_rate": 1.8350602730707133e-05, + "loss": 0.183, + "step": 11530 + }, + { + "epoch": 0.54, + "learning_rate": 1.8349818945652347e-05, + "loss": 0.2659, + "step": 11535 + }, + { + "epoch": 0.54, + "learning_rate": 1.8349035160597558e-05, + "loss": 0.3224, + "step": 11540 + }, + { + "epoch": 0.54, + "learning_rate": 1.834825137554277e-05, + "loss": 0.4451, + "step": 11545 + }, + { + "epoch": 0.54, + "learning_rate": 1.8347467590487985e-05, + "loss": 0.6188, + "step": 11550 + }, + { + "epoch": 0.54, + "learning_rate": 1.83466838054332e-05, + "loss": 0.2427, + "step": 11555 + }, + { + "epoch": 0.54, + "learning_rate": 1.8345900020378413e-05, + "loss": 0.0932, + "step": 11560 + }, + { + "epoch": 0.54, + "learning_rate": 1.8345116235323627e-05, + "loss": 0.201, + "step": 11565 + }, + { + "epoch": 0.54, + "learning_rate": 1.834433245026884e-05, + "loss": 0.1564, + "step": 11570 + }, + { + "epoch": 0.54, + "learning_rate": 1.834354866521405e-05, + "loss": 0.1475, + "step": 11575 + }, + { + "epoch": 0.54, + "learning_rate": 1.834276488015927e-05, + "loss": 0.1801, + "step": 11580 + }, + { + "epoch": 0.54, + "learning_rate": 1.834198109510448e-05, + "loss": 0.2695, + "step": 11585 + }, + { + "epoch": 0.54, + "learning_rate": 1.8341197310049693e-05, + "loss": 0.2264, + "step": 11590 + }, + { + "epoch": 0.54, + "learning_rate": 1.8340413524994907e-05, + "loss": 0.3982, + "step": 11595 + }, + { + "epoch": 0.54, + "learning_rate": 1.833962973994012e-05, + "loss": 0.6555, + "step": 11600 + }, + { + "epoch": 0.54, + "learning_rate": 1.8338845954885335e-05, + "loss": 0.3081, + "step": 11605 + }, + { + "epoch": 0.54, + "learning_rate": 1.8338062169830546e-05, + "loss": 0.1064, + "step": 11610 + }, + { + "epoch": 0.54, + "learning_rate": 1.833727838477576e-05, + "loss": 0.1906, + "step": 11615 + }, + { + "epoch": 0.54, + "learning_rate": 1.8336494599720973e-05, + "loss": 0.1571, + "step": 11620 + }, + { + "epoch": 0.54, + "learning_rate": 1.8335710814666187e-05, + "loss": 0.1314, + "step": 11625 + }, + { + "epoch": 0.54, + "learning_rate": 1.83349270296114e-05, + "loss": 0.233, + "step": 11630 + }, + { + "epoch": 0.54, + "learning_rate": 1.8334143244556615e-05, + "loss": 0.216, + "step": 11635 + }, + { + "epoch": 0.54, + "learning_rate": 1.8333359459501826e-05, + "loss": 0.2412, + "step": 11640 + }, + { + "epoch": 0.54, + "learning_rate": 1.8332575674447043e-05, + "loss": 0.3539, + "step": 11645 + }, + { + "epoch": 0.54, + "learning_rate": 1.8331791889392253e-05, + "loss": 0.3351, + "step": 11650 + }, + { + "epoch": 0.54, + "learning_rate": 1.8331008104337467e-05, + "loss": 0.2767, + "step": 11655 + }, + { + "epoch": 0.54, + "learning_rate": 1.833022431928268e-05, + "loss": 0.1602, + "step": 11660 + }, + { + "epoch": 0.54, + "learning_rate": 1.8329440534227895e-05, + "loss": 0.64, + "step": 11665 + }, + { + "epoch": 0.54, + "learning_rate": 1.8328813506184066e-05, + "loss": 0.128, + "step": 11670 + }, + { + "epoch": 0.54, + "learning_rate": 1.832802972112928e-05, + "loss": 0.1629, + "step": 11675 + }, + { + "epoch": 0.55, + "learning_rate": 1.832724593607449e-05, + "loss": 0.2992, + "step": 11680 + }, + { + "epoch": 0.55, + "learning_rate": 1.8326462151019707e-05, + "loss": 0.2846, + "step": 11685 + }, + { + "epoch": 0.55, + "learning_rate": 1.8325678365964918e-05, + "loss": 0.283, + "step": 11690 + }, + { + "epoch": 0.55, + "learning_rate": 1.8324894580910132e-05, + "loss": 0.5153, + "step": 11695 + }, + { + "epoch": 0.55, + "learning_rate": 1.8324110795855346e-05, + "loss": 0.5733, + "step": 11700 + }, + { + "epoch": 0.55, + "learning_rate": 1.832332701080056e-05, + "loss": 0.2492, + "step": 11705 + }, + { + "epoch": 0.55, + "learning_rate": 1.8322543225745773e-05, + "loss": 0.0945, + "step": 11710 + }, + { + "epoch": 0.55, + "learning_rate": 1.8321759440690987e-05, + "loss": 0.0858, + "step": 11715 + }, + { + "epoch": 0.55, + "learning_rate": 1.8320975655636198e-05, + "loss": 0.1475, + "step": 11720 + }, + { + "epoch": 0.55, + "learning_rate": 1.8320191870581415e-05, + "loss": 0.1625, + "step": 11725 + }, + { + "epoch": 0.55, + "learning_rate": 1.8319408085526626e-05, + "loss": 0.2204, + "step": 11730 + }, + { + "epoch": 0.55, + "learning_rate": 1.831862430047184e-05, + "loss": 0.236, + "step": 11735 + }, + { + "epoch": 0.55, + "learning_rate": 1.8317840515417053e-05, + "loss": 0.32, + "step": 11740 + }, + { + "epoch": 0.55, + "learning_rate": 1.8317056730362267e-05, + "loss": 0.4229, + "step": 11745 + }, + { + "epoch": 0.55, + "learning_rate": 1.831627294530748e-05, + "loss": 0.5182, + "step": 11750 + }, + { + "epoch": 0.55, + "learning_rate": 1.8315489160252692e-05, + "loss": 0.2982, + "step": 11755 + }, + { + "epoch": 0.55, + "learning_rate": 1.831470537519791e-05, + "loss": 0.0763, + "step": 11760 + }, + { + "epoch": 0.55, + "learning_rate": 1.831392159014312e-05, + "loss": 0.0989, + "step": 11765 + }, + { + "epoch": 0.55, + "learning_rate": 1.8313137805088334e-05, + "loss": 0.1841, + "step": 11770 + }, + { + "epoch": 0.55, + "learning_rate": 1.8312354020033547e-05, + "loss": 0.1474, + "step": 11775 + }, + { + "epoch": 0.55, + "learning_rate": 1.831157023497876e-05, + "loss": 0.186, + "step": 11780 + }, + { + "epoch": 0.55, + "learning_rate": 1.8310786449923975e-05, + "loss": 0.2091, + "step": 11785 + }, + { + "epoch": 0.55, + "learning_rate": 1.831000266486919e-05, + "loss": 0.2279, + "step": 11790 + }, + { + "epoch": 0.55, + "learning_rate": 1.83092188798144e-05, + "loss": 0.3051, + "step": 11795 + }, + { + "epoch": 0.55, + "learning_rate": 1.8308435094759617e-05, + "loss": 0.7879, + "step": 11800 + }, + { + "epoch": 0.55, + "learning_rate": 1.8307651309704827e-05, + "loss": 0.2961, + "step": 11805 + }, + { + "epoch": 0.55, + "learning_rate": 1.830686752465004e-05, + "loss": 0.1053, + "step": 11810 + }, + { + "epoch": 0.55, + "learning_rate": 1.8306083739595255e-05, + "loss": 0.1465, + "step": 11815 + }, + { + "epoch": 0.55, + "learning_rate": 1.8305299954540466e-05, + "loss": 0.1971, + "step": 11820 + }, + { + "epoch": 0.55, + "learning_rate": 1.8304516169485683e-05, + "loss": 0.1379, + "step": 11825 + }, + { + "epoch": 0.55, + "learning_rate": 1.8303732384430894e-05, + "loss": 0.1948, + "step": 11830 + }, + { + "epoch": 0.55, + "learning_rate": 1.8302948599376108e-05, + "loss": 0.3947, + "step": 11835 + }, + { + "epoch": 0.55, + "learning_rate": 1.830216481432132e-05, + "loss": 0.3241, + "step": 11840 + }, + { + "epoch": 0.55, + "learning_rate": 1.8301381029266535e-05, + "loss": 0.4234, + "step": 11845 + }, + { + "epoch": 0.55, + "learning_rate": 1.830059724421175e-05, + "loss": 0.7616, + "step": 11850 + }, + { + "epoch": 0.55, + "learning_rate": 1.8299813459156963e-05, + "loss": 0.3111, + "step": 11855 + }, + { + "epoch": 0.55, + "learning_rate": 1.8299029674102177e-05, + "loss": 0.0971, + "step": 11860 + }, + { + "epoch": 0.55, + "learning_rate": 1.829824588904739e-05, + "loss": 0.0742, + "step": 11865 + }, + { + "epoch": 0.55, + "learning_rate": 1.82974621039926e-05, + "loss": 0.1005, + "step": 11870 + }, + { + "epoch": 0.55, + "learning_rate": 1.8296678318937815e-05, + "loss": 0.1713, + "step": 11875 + }, + { + "epoch": 0.55, + "learning_rate": 1.829589453388303e-05, + "loss": 0.2496, + "step": 11880 + }, + { + "epoch": 0.55, + "learning_rate": 1.82952675058392e-05, + "loss": 0.2245, + "step": 11885 + }, + { + "epoch": 0.55, + "learning_rate": 1.8294483720784414e-05, + "loss": 0.3005, + "step": 11890 + }, + { + "epoch": 0.56, + "learning_rate": 1.8293699935729628e-05, + "loss": 0.4579, + "step": 11895 + }, + { + "epoch": 0.56, + "learning_rate": 1.829291615067484e-05, + "loss": 0.7279, + "step": 11900 + }, + { + "epoch": 0.56, + "learning_rate": 1.8292132365620055e-05, + "loss": 0.177, + "step": 11905 + }, + { + "epoch": 0.56, + "learning_rate": 1.8291348580565266e-05, + "loss": 0.1003, + "step": 11910 + }, + { + "epoch": 0.56, + "learning_rate": 1.829056479551048e-05, + "loss": 0.1564, + "step": 11915 + }, + { + "epoch": 0.56, + "learning_rate": 1.8289781010455694e-05, + "loss": 0.233, + "step": 11920 + }, + { + "epoch": 0.56, + "learning_rate": 1.8288997225400908e-05, + "loss": 0.1809, + "step": 11925 + }, + { + "epoch": 0.56, + "learning_rate": 1.828821344034612e-05, + "loss": 0.2469, + "step": 11930 + }, + { + "epoch": 0.56, + "learning_rate": 1.8287429655291332e-05, + "loss": 0.2773, + "step": 11935 + }, + { + "epoch": 0.56, + "learning_rate": 1.828664587023655e-05, + "loss": 0.3309, + "step": 11940 + }, + { + "epoch": 0.56, + "learning_rate": 1.828586208518176e-05, + "loss": 0.25, + "step": 11945 + }, + { + "epoch": 0.56, + "learning_rate": 1.8285078300126974e-05, + "loss": 0.4452, + "step": 11950 + }, + { + "epoch": 0.56, + "learning_rate": 1.8284294515072188e-05, + "loss": 0.2698, + "step": 11955 + }, + { + "epoch": 0.56, + "learning_rate": 1.82835107300174e-05, + "loss": 0.0781, + "step": 11960 + }, + { + "epoch": 0.56, + "learning_rate": 1.8282726944962615e-05, + "loss": 0.1029, + "step": 11965 + }, + { + "epoch": 0.56, + "learning_rate": 1.828194315990783e-05, + "loss": 0.1722, + "step": 11970 + }, + { + "epoch": 0.56, + "learning_rate": 1.828115937485304e-05, + "loss": 0.1502, + "step": 11975 + }, + { + "epoch": 0.56, + "learning_rate": 1.8280375589798257e-05, + "loss": 0.2189, + "step": 11980 + }, + { + "epoch": 0.56, + "learning_rate": 1.8279591804743468e-05, + "loss": 0.1885, + "step": 11985 + }, + { + "epoch": 0.56, + "learning_rate": 1.827880801968868e-05, + "loss": 0.1883, + "step": 11990 + }, + { + "epoch": 0.56, + "learning_rate": 1.8278024234633896e-05, + "loss": 0.3488, + "step": 11995 + }, + { + "epoch": 0.56, + "learning_rate": 1.827724044957911e-05, + "loss": 0.6505, + "step": 12000 + }, + { + "epoch": 0.56, + "learning_rate": 1.8276456664524323e-05, + "loss": 0.2543, + "step": 12005 + }, + { + "epoch": 0.56, + "learning_rate": 1.8275672879469534e-05, + "loss": 0.1051, + "step": 12010 + }, + { + "epoch": 0.56, + "learning_rate": 1.827488909441475e-05, + "loss": 0.106, + "step": 12015 + }, + { + "epoch": 0.56, + "learning_rate": 1.827410530935996e-05, + "loss": 0.1483, + "step": 12020 + }, + { + "epoch": 0.56, + "learning_rate": 1.8273321524305176e-05, + "loss": 0.2022, + "step": 12025 + }, + { + "epoch": 0.56, + "learning_rate": 1.827253773925039e-05, + "loss": 0.2002, + "step": 12030 + }, + { + "epoch": 0.56, + "learning_rate": 1.8271753954195603e-05, + "loss": 0.2563, + "step": 12035 + }, + { + "epoch": 0.56, + "learning_rate": 1.8270970169140817e-05, + "loss": 0.2597, + "step": 12040 + }, + { + "epoch": 0.56, + "learning_rate": 1.827018638408603e-05, + "loss": 0.429, + "step": 12045 + }, + { + "epoch": 0.56, + "learning_rate": 1.8269402599031242e-05, + "loss": 0.5843, + "step": 12050 + }, + { + "epoch": 0.56, + "learning_rate": 1.826861881397646e-05, + "loss": 0.2978, + "step": 12055 + }, + { + "epoch": 0.56, + "learning_rate": 1.826783502892167e-05, + "loss": 0.1374, + "step": 12060 + }, + { + "epoch": 0.56, + "learning_rate": 1.8267051243866883e-05, + "loss": 0.1817, + "step": 12065 + }, + { + "epoch": 0.56, + "learning_rate": 1.8266267458812097e-05, + "loss": 0.0982, + "step": 12070 + }, + { + "epoch": 0.56, + "learning_rate": 1.8265483673757308e-05, + "loss": 0.2185, + "step": 12075 + }, + { + "epoch": 0.56, + "learning_rate": 1.8264699888702525e-05, + "loss": 0.1185, + "step": 12080 + }, + { + "epoch": 0.56, + "learning_rate": 1.8263916103647736e-05, + "loss": 0.2142, + "step": 12085 + }, + { + "epoch": 0.56, + "learning_rate": 1.826313231859295e-05, + "loss": 0.3277, + "step": 12090 + }, + { + "epoch": 0.56, + "learning_rate": 1.8262348533538163e-05, + "loss": 0.4278, + "step": 12095 + }, + { + "epoch": 0.56, + "learning_rate": 1.8261564748483377e-05, + "loss": 0.5751, + "step": 12100 + }, + { + "epoch": 0.56, + "learning_rate": 1.826078096342859e-05, + "loss": 0.3273, + "step": 12105 + }, + { + "epoch": 0.57, + "learning_rate": 1.8259997178373805e-05, + "loss": 0.0808, + "step": 12110 + }, + { + "epoch": 0.57, + "learning_rate": 1.825921339331902e-05, + "loss": 0.1315, + "step": 12115 + }, + { + "epoch": 0.57, + "learning_rate": 1.8258429608264233e-05, + "loss": 0.155, + "step": 12120 + }, + { + "epoch": 0.57, + "learning_rate": 1.8257645823209444e-05, + "loss": 0.1783, + "step": 12125 + }, + { + "epoch": 0.57, + "learning_rate": 1.8256862038154657e-05, + "loss": 0.2643, + "step": 12130 + }, + { + "epoch": 0.57, + "learning_rate": 1.825607825309987e-05, + "loss": 0.2056, + "step": 12135 + }, + { + "epoch": 0.57, + "learning_rate": 1.8255294468045085e-05, + "loss": 0.3616, + "step": 12140 + }, + { + "epoch": 0.57, + "learning_rate": 1.82545106829903e-05, + "loss": 0.4022, + "step": 12145 + }, + { + "epoch": 0.57, + "learning_rate": 1.825372689793551e-05, + "loss": 0.6273, + "step": 12150 + }, + { + "epoch": 0.57, + "learning_rate": 1.8252943112880727e-05, + "loss": 0.2924, + "step": 12155 + }, + { + "epoch": 0.57, + "learning_rate": 1.8252159327825937e-05, + "loss": 0.1846, + "step": 12160 + }, + { + "epoch": 0.57, + "learning_rate": 1.825137554277115e-05, + "loss": 0.1085, + "step": 12165 + }, + { + "epoch": 0.57, + "learning_rate": 1.8250591757716365e-05, + "loss": 0.1888, + "step": 12170 + }, + { + "epoch": 0.57, + "learning_rate": 1.824980797266158e-05, + "loss": 0.1602, + "step": 12175 + }, + { + "epoch": 0.57, + "learning_rate": 1.8249024187606793e-05, + "loss": 0.3114, + "step": 12180 + }, + { + "epoch": 0.57, + "learning_rate": 1.8248240402552007e-05, + "loss": 0.2241, + "step": 12185 + }, + { + "epoch": 0.57, + "learning_rate": 1.8247456617497218e-05, + "loss": 0.3146, + "step": 12190 + }, + { + "epoch": 0.57, + "learning_rate": 1.824667283244243e-05, + "loss": 0.3433, + "step": 12195 + }, + { + "epoch": 0.57, + "learning_rate": 1.8245889047387645e-05, + "loss": 0.3809, + "step": 12200 + }, + { + "epoch": 0.57, + "learning_rate": 1.824510526233286e-05, + "loss": 0.2425, + "step": 12205 + }, + { + "epoch": 0.57, + "learning_rate": 1.8244321477278073e-05, + "loss": 0.0895, + "step": 12210 + }, + { + "epoch": 0.57, + "learning_rate": 1.8243537692223287e-05, + "loss": 0.1157, + "step": 12215 + }, + { + "epoch": 0.57, + "learning_rate": 1.82427539071685e-05, + "loss": 0.1821, + "step": 12220 + }, + { + "epoch": 0.57, + "learning_rate": 1.824197012211371e-05, + "loss": 0.2085, + "step": 12225 + }, + { + "epoch": 0.57, + "learning_rate": 1.8241186337058925e-05, + "loss": 0.2786, + "step": 12230 + }, + { + "epoch": 0.57, + "learning_rate": 1.824040255200414e-05, + "loss": 0.2232, + "step": 12235 + }, + { + "epoch": 0.57, + "learning_rate": 1.8239618766949353e-05, + "loss": 0.1914, + "step": 12240 + }, + { + "epoch": 0.57, + "learning_rate": 1.8238834981894567e-05, + "loss": 0.4057, + "step": 12245 + }, + { + "epoch": 0.57, + "learning_rate": 1.823805119683978e-05, + "loss": 0.5458, + "step": 12250 + }, + { + "epoch": 0.57, + "learning_rate": 1.8237267411784995e-05, + "loss": 0.2761, + "step": 12255 + }, + { + "epoch": 0.57, + "learning_rate": 1.8236483626730205e-05, + "loss": 0.1128, + "step": 12260 + }, + { + "epoch": 0.57, + "learning_rate": 1.823569984167542e-05, + "loss": 0.1394, + "step": 12265 + }, + { + "epoch": 0.57, + "learning_rate": 1.8234916056620633e-05, + "loss": 0.1571, + "step": 12270 + }, + { + "epoch": 0.57, + "learning_rate": 1.8234132271565847e-05, + "loss": 0.2344, + "step": 12275 + }, + { + "epoch": 0.57, + "learning_rate": 1.823334848651106e-05, + "loss": 0.2405, + "step": 12280 + }, + { + "epoch": 0.57, + "learning_rate": 1.8232564701456275e-05, + "loss": 0.2111, + "step": 12285 + }, + { + "epoch": 0.57, + "learning_rate": 1.8231780916401485e-05, + "loss": 0.2919, + "step": 12290 + }, + { + "epoch": 0.57, + "learning_rate": 1.8230997131346703e-05, + "loss": 0.461, + "step": 12295 + }, + { + "epoch": 0.57, + "learning_rate": 1.8230213346291913e-05, + "loss": 0.4065, + "step": 12300 + }, + { + "epoch": 0.57, + "learning_rate": 1.8229429561237127e-05, + "loss": 0.2381, + "step": 12305 + }, + { + "epoch": 0.57, + "learning_rate": 1.822864577618234e-05, + "loss": 0.0971, + "step": 12310 + }, + { + "epoch": 0.57, + "learning_rate": 1.8227861991127555e-05, + "loss": 0.1587, + "step": 12315 + }, + { + "epoch": 0.57, + "learning_rate": 1.822707820607277e-05, + "loss": 0.221, + "step": 12320 + }, + { + "epoch": 0.58, + "learning_rate": 1.822629442101798e-05, + "loss": 0.1664, + "step": 12325 + }, + { + "epoch": 0.58, + "learning_rate": 1.8225510635963197e-05, + "loss": 0.2185, + "step": 12330 + }, + { + "epoch": 0.58, + "learning_rate": 1.8224726850908407e-05, + "loss": 0.2366, + "step": 12335 + }, + { + "epoch": 0.58, + "learning_rate": 1.822394306585362e-05, + "loss": 0.3796, + "step": 12340 + }, + { + "epoch": 0.58, + "learning_rate": 1.8223159280798835e-05, + "loss": 0.3175, + "step": 12345 + }, + { + "epoch": 0.58, + "learning_rate": 1.822237549574405e-05, + "loss": 0.6086, + "step": 12350 + }, + { + "epoch": 0.58, + "learning_rate": 1.8221591710689263e-05, + "loss": 0.2234, + "step": 12355 + }, + { + "epoch": 0.58, + "learning_rate": 1.8220807925634477e-05, + "loss": 0.1107, + "step": 12360 + }, + { + "epoch": 0.58, + "learning_rate": 1.8220024140579687e-05, + "loss": 0.1629, + "step": 12365 + }, + { + "epoch": 0.58, + "learning_rate": 1.8219240355524905e-05, + "loss": 0.1813, + "step": 12370 + }, + { + "epoch": 0.58, + "learning_rate": 1.8218456570470115e-05, + "loss": 0.1278, + "step": 12375 + }, + { + "epoch": 0.58, + "learning_rate": 1.821767278541533e-05, + "loss": 0.2862, + "step": 12380 + }, + { + "epoch": 0.58, + "learning_rate": 1.8216889000360543e-05, + "loss": 0.1883, + "step": 12385 + }, + { + "epoch": 0.58, + "learning_rate": 1.8216105215305753e-05, + "loss": 0.3023, + "step": 12390 + }, + { + "epoch": 0.58, + "learning_rate": 1.821532143025097e-05, + "loss": 0.3543, + "step": 12395 + }, + { + "epoch": 0.58, + "learning_rate": 1.821453764519618e-05, + "loss": 0.6779, + "step": 12400 + }, + { + "epoch": 0.58, + "learning_rate": 1.8213753860141395e-05, + "loss": 0.2855, + "step": 12405 + }, + { + "epoch": 0.58, + "learning_rate": 1.821297007508661e-05, + "loss": 0.1078, + "step": 12410 + }, + { + "epoch": 0.58, + "learning_rate": 1.8212186290031823e-05, + "loss": 0.0658, + "step": 12415 + }, + { + "epoch": 0.58, + "learning_rate": 1.8211402504977037e-05, + "loss": 0.1846, + "step": 12420 + }, + { + "epoch": 0.58, + "learning_rate": 1.821061871992225e-05, + "loss": 0.2117, + "step": 12425 + }, + { + "epoch": 0.58, + "learning_rate": 1.8209834934867465e-05, + "loss": 0.2137, + "step": 12430 + }, + { + "epoch": 0.58, + "learning_rate": 1.820905114981268e-05, + "loss": 0.2952, + "step": 12435 + }, + { + "epoch": 0.58, + "learning_rate": 1.820826736475789e-05, + "loss": 0.3457, + "step": 12440 + }, + { + "epoch": 0.58, + "learning_rate": 1.8207483579703103e-05, + "loss": 0.3901, + "step": 12445 + }, + { + "epoch": 0.58, + "learning_rate": 1.8206699794648317e-05, + "loss": 0.4563, + "step": 12450 + }, + { + "epoch": 0.58, + "learning_rate": 1.820591600959353e-05, + "loss": 0.3036, + "step": 12455 + }, + { + "epoch": 0.58, + "learning_rate": 1.8205132224538745e-05, + "loss": 0.1384, + "step": 12460 + }, + { + "epoch": 0.58, + "learning_rate": 1.8204348439483955e-05, + "loss": 0.1121, + "step": 12465 + }, + { + "epoch": 0.58, + "learning_rate": 1.8203564654429173e-05, + "loss": 0.1626, + "step": 12470 + }, + { + "epoch": 0.58, + "learning_rate": 1.8202780869374383e-05, + "loss": 0.1478, + "step": 12475 + }, + { + "epoch": 0.58, + "learning_rate": 1.8201997084319597e-05, + "loss": 0.2527, + "step": 12480 + }, + { + "epoch": 0.58, + "learning_rate": 1.820121329926481e-05, + "loss": 0.2802, + "step": 12485 + }, + { + "epoch": 0.58, + "learning_rate": 1.8200429514210025e-05, + "loss": 0.3921, + "step": 12490 + }, + { + "epoch": 0.58, + "learning_rate": 1.819964572915524e-05, + "loss": 0.3346, + "step": 12495 + }, + { + "epoch": 0.58, + "learning_rate": 1.8198861944100453e-05, + "loss": 0.5112, + "step": 12500 + }, + { + "epoch": 0.58, + "learning_rate": 1.8198078159045663e-05, + "loss": 0.2538, + "step": 12505 + }, + { + "epoch": 0.58, + "learning_rate": 1.819729437399088e-05, + "loss": 0.0939, + "step": 12510 + }, + { + "epoch": 0.58, + "learning_rate": 1.819651058893609e-05, + "loss": 0.1042, + "step": 12515 + }, + { + "epoch": 0.58, + "learning_rate": 1.8195726803881305e-05, + "loss": 0.1335, + "step": 12520 + }, + { + "epoch": 0.58, + "learning_rate": 1.819494301882652e-05, + "loss": 0.1319, + "step": 12525 + }, + { + "epoch": 0.58, + "learning_rate": 1.8194159233771733e-05, + "loss": 0.1862, + "step": 12530 + }, + { + "epoch": 0.58, + "learning_rate": 1.8193375448716947e-05, + "loss": 0.2162, + "step": 12535 + }, + { + "epoch": 0.59, + "learning_rate": 1.8192591663662157e-05, + "loss": 0.3461, + "step": 12540 + }, + { + "epoch": 0.59, + "learning_rate": 1.819180787860737e-05, + "loss": 0.2864, + "step": 12545 + }, + { + "epoch": 0.59, + "learning_rate": 1.8191024093552585e-05, + "loss": 0.5461, + "step": 12550 + }, + { + "epoch": 0.59, + "learning_rate": 1.81902403084978e-05, + "loss": 0.2668, + "step": 12555 + }, + { + "epoch": 0.59, + "learning_rate": 1.8189456523443013e-05, + "loss": 0.0315, + "step": 12560 + }, + { + "epoch": 0.59, + "learning_rate": 1.8188672738388227e-05, + "loss": 0.0844, + "step": 12565 + }, + { + "epoch": 0.59, + "learning_rate": 1.818788895333344e-05, + "loss": 0.193, + "step": 12570 + }, + { + "epoch": 0.59, + "learning_rate": 1.8187105168278654e-05, + "loss": 0.1365, + "step": 12575 + }, + { + "epoch": 0.59, + "learning_rate": 1.8186321383223865e-05, + "loss": 0.1781, + "step": 12580 + }, + { + "epoch": 0.59, + "learning_rate": 1.8185537598169082e-05, + "loss": 0.331, + "step": 12585 + }, + { + "epoch": 0.59, + "learning_rate": 1.8184753813114293e-05, + "loss": 0.2479, + "step": 12590 + }, + { + "epoch": 0.59, + "learning_rate": 1.8183970028059507e-05, + "loss": 0.3348, + "step": 12595 + }, + { + "epoch": 0.59, + "learning_rate": 1.818318624300472e-05, + "loss": 0.5393, + "step": 12600 + }, + { + "epoch": 0.59, + "learning_rate": 1.818240245794993e-05, + "loss": 0.2076, + "step": 12605 + }, + { + "epoch": 0.59, + "learning_rate": 1.818161867289515e-05, + "loss": 0.0844, + "step": 12610 + }, + { + "epoch": 0.59, + "learning_rate": 1.818083488784036e-05, + "loss": 0.1753, + "step": 12615 + }, + { + "epoch": 0.59, + "learning_rate": 1.8180051102785573e-05, + "loss": 0.1373, + "step": 12620 + }, + { + "epoch": 0.59, + "learning_rate": 1.8179267317730787e-05, + "loss": 0.2813, + "step": 12625 + }, + { + "epoch": 0.59, + "learning_rate": 1.8178483532676e-05, + "loss": 0.2083, + "step": 12630 + }, + { + "epoch": 0.59, + "learning_rate": 1.8177699747621214e-05, + "loss": 0.3167, + "step": 12635 + }, + { + "epoch": 0.59, + "learning_rate": 1.817691596256643e-05, + "loss": 0.2749, + "step": 12640 + }, + { + "epoch": 0.59, + "learning_rate": 1.8176132177511642e-05, + "loss": 0.421, + "step": 12645 + }, + { + "epoch": 0.59, + "learning_rate": 1.8175348392456856e-05, + "loss": 0.6261, + "step": 12650 + }, + { + "epoch": 0.59, + "learning_rate": 1.8174564607402067e-05, + "loss": 0.2091, + "step": 12655 + }, + { + "epoch": 0.59, + "learning_rate": 1.817378082234728e-05, + "loss": 0.1761, + "step": 12660 + }, + { + "epoch": 0.59, + "learning_rate": 1.8172997037292495e-05, + "loss": 0.1845, + "step": 12665 + }, + { + "epoch": 0.59, + "learning_rate": 1.817221325223771e-05, + "loss": 0.1495, + "step": 12670 + }, + { + "epoch": 0.59, + "learning_rate": 1.8171429467182922e-05, + "loss": 0.183, + "step": 12675 + }, + { + "epoch": 0.59, + "learning_rate": 1.8170645682128133e-05, + "loss": 0.1456, + "step": 12680 + }, + { + "epoch": 0.59, + "learning_rate": 1.816986189707335e-05, + "loss": 0.3501, + "step": 12685 + }, + { + "epoch": 0.59, + "learning_rate": 1.816907811201856e-05, + "loss": 0.2666, + "step": 12690 + }, + { + "epoch": 0.59, + "learning_rate": 1.8168294326963775e-05, + "loss": 0.2329, + "step": 12695 + }, + { + "epoch": 0.59, + "learning_rate": 1.816751054190899e-05, + "loss": 0.5528, + "step": 12700 + }, + { + "epoch": 0.59, + "learning_rate": 1.8166726756854202e-05, + "loss": 0.2247, + "step": 12705 + }, + { + "epoch": 0.59, + "learning_rate": 1.8165942971799416e-05, + "loss": 0.1046, + "step": 12710 + }, + { + "epoch": 0.59, + "learning_rate": 1.816515918674463e-05, + "loss": 0.1533, + "step": 12715 + }, + { + "epoch": 0.59, + "learning_rate": 1.816437540168984e-05, + "loss": 0.1581, + "step": 12720 + }, + { + "epoch": 0.59, + "learning_rate": 1.8163591616635055e-05, + "loss": 0.1386, + "step": 12725 + }, + { + "epoch": 0.59, + "learning_rate": 1.816280783158027e-05, + "loss": 0.1616, + "step": 12730 + }, + { + "epoch": 0.59, + "learning_rate": 1.8162024046525482e-05, + "loss": 0.1669, + "step": 12735 + }, + { + "epoch": 0.59, + "learning_rate": 1.8161240261470696e-05, + "loss": 0.2804, + "step": 12740 + }, + { + "epoch": 0.59, + "learning_rate": 1.816045647641591e-05, + "loss": 0.4752, + "step": 12745 + }, + { + "epoch": 0.59, + "learning_rate": 1.8159672691361124e-05, + "loss": 0.6178, + "step": 12750 + }, + { + "epoch": 0.6, + "learning_rate": 1.8158888906306335e-05, + "loss": 0.214, + "step": 12755 + }, + { + "epoch": 0.6, + "learning_rate": 1.815810512125155e-05, + "loss": 0.1284, + "step": 12760 + }, + { + "epoch": 0.6, + "learning_rate": 1.8157321336196762e-05, + "loss": 0.0707, + "step": 12765 + }, + { + "epoch": 0.6, + "learning_rate": 1.8156537551141976e-05, + "loss": 0.0857, + "step": 12770 + }, + { + "epoch": 0.6, + "learning_rate": 1.815575376608719e-05, + "loss": 0.162, + "step": 12775 + }, + { + "epoch": 0.6, + "learning_rate": 1.8154969981032404e-05, + "loss": 0.1753, + "step": 12780 + }, + { + "epoch": 0.6, + "learning_rate": 1.8154186195977618e-05, + "loss": 0.3083, + "step": 12785 + }, + { + "epoch": 0.6, + "learning_rate": 1.815340241092283e-05, + "loss": 0.359, + "step": 12790 + }, + { + "epoch": 0.6, + "learning_rate": 1.8152618625868043e-05, + "loss": 0.3942, + "step": 12795 + }, + { + "epoch": 0.6, + "learning_rate": 1.8151834840813256e-05, + "loss": 0.5581, + "step": 12800 + }, + { + "epoch": 0.6, + "learning_rate": 1.815105105575847e-05, + "loss": 0.275, + "step": 12805 + }, + { + "epoch": 0.6, + "learning_rate": 1.8150267270703684e-05, + "loss": 0.0976, + "step": 12810 + }, + { + "epoch": 0.6, + "learning_rate": 1.8149483485648898e-05, + "loss": 0.1869, + "step": 12815 + }, + { + "epoch": 0.6, + "learning_rate": 1.814869970059411e-05, + "loss": 0.1546, + "step": 12820 + }, + { + "epoch": 0.6, + "learning_rate": 1.8147915915539326e-05, + "loss": 0.1551, + "step": 12825 + }, + { + "epoch": 0.6, + "learning_rate": 1.8147132130484536e-05, + "loss": 0.1323, + "step": 12830 + }, + { + "epoch": 0.6, + "learning_rate": 1.814634834542975e-05, + "loss": 0.199, + "step": 12835 + }, + { + "epoch": 0.6, + "learning_rate": 1.8145564560374964e-05, + "loss": 0.2718, + "step": 12840 + }, + { + "epoch": 0.6, + "learning_rate": 1.8144780775320178e-05, + "loss": 0.358, + "step": 12845 + }, + { + "epoch": 0.6, + "learning_rate": 1.8143996990265392e-05, + "loss": 0.7596, + "step": 12850 + }, + { + "epoch": 0.6, + "learning_rate": 1.8143213205210603e-05, + "loss": 0.1977, + "step": 12855 + }, + { + "epoch": 0.6, + "learning_rate": 1.814242942015582e-05, + "loss": 0.1171, + "step": 12860 + }, + { + "epoch": 0.6, + "learning_rate": 1.814164563510103e-05, + "loss": 0.117, + "step": 12865 + }, + { + "epoch": 0.6, + "learning_rate": 1.8140861850046244e-05, + "loss": 0.1249, + "step": 12870 + }, + { + "epoch": 0.6, + "learning_rate": 1.8140078064991458e-05, + "loss": 0.2719, + "step": 12875 + }, + { + "epoch": 0.6, + "learning_rate": 1.8139294279936672e-05, + "loss": 0.1671, + "step": 12880 + }, + { + "epoch": 0.6, + "learning_rate": 1.8138510494881886e-05, + "loss": 0.2753, + "step": 12885 + }, + { + "epoch": 0.6, + "learning_rate": 1.81377267098271e-05, + "loss": 0.2222, + "step": 12890 + }, + { + "epoch": 0.6, + "learning_rate": 1.813694292477231e-05, + "loss": 0.3789, + "step": 12895 + }, + { + "epoch": 0.6, + "learning_rate": 1.8136159139717528e-05, + "loss": 0.5476, + "step": 12900 + }, + { + "epoch": 0.6, + "learning_rate": 1.8135375354662738e-05, + "loss": 0.2506, + "step": 12905 + }, + { + "epoch": 0.6, + "learning_rate": 1.8134591569607952e-05, + "loss": 0.1047, + "step": 12910 + }, + { + "epoch": 0.6, + "learning_rate": 1.8133807784553166e-05, + "loss": 0.1184, + "step": 12915 + }, + { + "epoch": 0.6, + "learning_rate": 1.8133023999498377e-05, + "loss": 0.1366, + "step": 12920 + }, + { + "epoch": 0.6, + "learning_rate": 1.8132240214443594e-05, + "loss": 0.1905, + "step": 12925 + }, + { + "epoch": 0.6, + "learning_rate": 1.8131456429388804e-05, + "loss": 0.1637, + "step": 12930 + }, + { + "epoch": 0.6, + "learning_rate": 1.813067264433402e-05, + "loss": 0.2118, + "step": 12935 + }, + { + "epoch": 0.6, + "learning_rate": 1.8129888859279232e-05, + "loss": 0.4517, + "step": 12940 + }, + { + "epoch": 0.6, + "learning_rate": 1.8129105074224446e-05, + "loss": 0.3792, + "step": 12945 + }, + { + "epoch": 0.6, + "learning_rate": 1.812832128916966e-05, + "loss": 0.6719, + "step": 12950 + }, + { + "epoch": 0.6, + "learning_rate": 1.8127537504114874e-05, + "loss": 0.1911, + "step": 12955 + }, + { + "epoch": 0.6, + "learning_rate": 1.8126753719060088e-05, + "loss": 0.0936, + "step": 12960 + }, + { + "epoch": 0.6, + "learning_rate": 1.8125969934005302e-05, + "loss": 0.0923, + "step": 12965 + }, + { + "epoch": 0.61, + "learning_rate": 1.8125186148950512e-05, + "loss": 0.184, + "step": 12970 + }, + { + "epoch": 0.61, + "learning_rate": 1.8124402363895726e-05, + "loss": 0.1625, + "step": 12975 + }, + { + "epoch": 0.61, + "learning_rate": 1.812361857884094e-05, + "loss": 0.1753, + "step": 12980 + }, + { + "epoch": 0.61, + "learning_rate": 1.8122834793786154e-05, + "loss": 0.2652, + "step": 12985 + }, + { + "epoch": 0.61, + "learning_rate": 1.8122051008731368e-05, + "loss": 0.2203, + "step": 12990 + }, + { + "epoch": 0.61, + "learning_rate": 1.812126722367658e-05, + "loss": 0.3814, + "step": 12995 + }, + { + "epoch": 0.61, + "learning_rate": 1.8120483438621796e-05, + "loss": 0.613, + "step": 13000 + }, + { + "epoch": 0.61, + "learning_rate": 1.8119699653567006e-05, + "loss": 0.2086, + "step": 13005 + }, + { + "epoch": 0.61, + "learning_rate": 1.811891586851222e-05, + "loss": 0.1071, + "step": 13010 + }, + { + "epoch": 0.61, + "learning_rate": 1.8118132083457434e-05, + "loss": 0.1252, + "step": 13015 + }, + { + "epoch": 0.61, + "learning_rate": 1.8117348298402648e-05, + "loss": 0.1397, + "step": 13020 + }, + { + "epoch": 0.61, + "learning_rate": 1.8116564513347862e-05, + "loss": 0.1292, + "step": 13025 + }, + { + "epoch": 0.61, + "learning_rate": 1.8115780728293076e-05, + "loss": 0.2849, + "step": 13030 + }, + { + "epoch": 0.61, + "learning_rate": 1.8114996943238286e-05, + "loss": 0.1862, + "step": 13035 + }, + { + "epoch": 0.61, + "learning_rate": 1.8114213158183504e-05, + "loss": 0.2616, + "step": 13040 + }, + { + "epoch": 0.61, + "learning_rate": 1.8113429373128714e-05, + "loss": 0.2944, + "step": 13045 + }, + { + "epoch": 0.61, + "learning_rate": 1.8112645588073928e-05, + "loss": 0.4961, + "step": 13050 + }, + { + "epoch": 0.61, + "learning_rate": 1.8111861803019142e-05, + "loss": 0.2485, + "step": 13055 + }, + { + "epoch": 0.61, + "learning_rate": 1.8111078017964356e-05, + "loss": 0.0993, + "step": 13060 + }, + { + "epoch": 0.61, + "learning_rate": 1.811029423290957e-05, + "loss": 0.1454, + "step": 13065 + }, + { + "epoch": 0.61, + "learning_rate": 1.810951044785478e-05, + "loss": 0.1368, + "step": 13070 + }, + { + "epoch": 0.61, + "learning_rate": 1.8108726662799994e-05, + "loss": 0.178, + "step": 13075 + }, + { + "epoch": 0.61, + "learning_rate": 1.8107942877745208e-05, + "loss": 0.2138, + "step": 13080 + }, + { + "epoch": 0.61, + "learning_rate": 1.8107159092690422e-05, + "loss": 0.2884, + "step": 13085 + }, + { + "epoch": 0.61, + "learning_rate": 1.8106375307635636e-05, + "loss": 0.2957, + "step": 13090 + }, + { + "epoch": 0.61, + "learning_rate": 1.810559152258085e-05, + "loss": 0.3439, + "step": 13095 + }, + { + "epoch": 0.61, + "learning_rate": 1.8104807737526064e-05, + "loss": 0.3735, + "step": 13100 + }, + { + "epoch": 0.61, + "learning_rate": 1.8104023952471278e-05, + "loss": 0.2341, + "step": 13105 + }, + { + "epoch": 0.61, + "learning_rate": 1.8103240167416488e-05, + "loss": 0.1029, + "step": 13110 + }, + { + "epoch": 0.61, + "learning_rate": 1.8102456382361702e-05, + "loss": 0.1167, + "step": 13115 + }, + { + "epoch": 0.61, + "learning_rate": 1.8101672597306916e-05, + "loss": 0.1298, + "step": 13120 + }, + { + "epoch": 0.61, + "learning_rate": 1.810088881225213e-05, + "loss": 0.1732, + "step": 13125 + }, + { + "epoch": 0.61, + "learning_rate": 1.8100105027197344e-05, + "loss": 0.2762, + "step": 13130 + }, + { + "epoch": 0.61, + "learning_rate": 1.8099321242142554e-05, + "loss": 0.26, + "step": 13135 + }, + { + "epoch": 0.61, + "learning_rate": 1.809853745708777e-05, + "loss": 0.3426, + "step": 13140 + }, + { + "epoch": 0.61, + "learning_rate": 1.8097753672032982e-05, + "loss": 0.4019, + "step": 13145 + }, + { + "epoch": 0.61, + "learning_rate": 1.8096969886978196e-05, + "loss": 0.7487, + "step": 13150 + }, + { + "epoch": 0.61, + "learning_rate": 1.809618610192341e-05, + "loss": 0.3123, + "step": 13155 + }, + { + "epoch": 0.61, + "learning_rate": 1.8095402316868624e-05, + "loss": 0.0959, + "step": 13160 + }, + { + "epoch": 0.61, + "learning_rate": 1.8094618531813838e-05, + "loss": 0.0824, + "step": 13165 + }, + { + "epoch": 0.61, + "learning_rate": 1.809383474675905e-05, + "loss": 0.2266, + "step": 13170 + }, + { + "epoch": 0.61, + "learning_rate": 1.8093050961704265e-05, + "loss": 0.1764, + "step": 13175 + }, + { + "epoch": 0.61, + "learning_rate": 1.8092267176649476e-05, + "loss": 0.2117, + "step": 13180 + }, + { + "epoch": 0.62, + "learning_rate": 1.809148339159469e-05, + "loss": 0.2613, + "step": 13185 + }, + { + "epoch": 0.62, + "learning_rate": 1.8090699606539904e-05, + "loss": 0.2495, + "step": 13190 + }, + { + "epoch": 0.62, + "learning_rate": 1.8089915821485118e-05, + "loss": 0.3384, + "step": 13195 + }, + { + "epoch": 0.62, + "learning_rate": 1.808913203643033e-05, + "loss": 0.6137, + "step": 13200 + }, + { + "epoch": 0.62, + "learning_rate": 1.8088348251375546e-05, + "loss": 0.2418, + "step": 13205 + }, + { + "epoch": 0.62, + "learning_rate": 1.8087564466320756e-05, + "loss": 0.0482, + "step": 13210 + }, + { + "epoch": 0.62, + "learning_rate": 1.8086780681265973e-05, + "loss": 0.1059, + "step": 13215 + }, + { + "epoch": 0.62, + "learning_rate": 1.8085996896211184e-05, + "loss": 0.1762, + "step": 13220 + }, + { + "epoch": 0.62, + "learning_rate": 1.8085213111156398e-05, + "loss": 0.2271, + "step": 13225 + }, + { + "epoch": 0.62, + "learning_rate": 1.808442932610161e-05, + "loss": 0.2679, + "step": 13230 + }, + { + "epoch": 0.62, + "learning_rate": 1.8083645541046826e-05, + "loss": 0.3361, + "step": 13235 + }, + { + "epoch": 0.62, + "learning_rate": 1.808286175599204e-05, + "loss": 0.3421, + "step": 13240 + }, + { + "epoch": 0.62, + "learning_rate": 1.808207797093725e-05, + "loss": 0.3615, + "step": 13245 + }, + { + "epoch": 0.62, + "learning_rate": 1.8081294185882464e-05, + "loss": 0.6389, + "step": 13250 + }, + { + "epoch": 0.62, + "learning_rate": 1.8080510400827678e-05, + "loss": 0.2581, + "step": 13255 + }, + { + "epoch": 0.62, + "learning_rate": 1.8079726615772892e-05, + "loss": 0.1071, + "step": 13260 + }, + { + "epoch": 0.62, + "learning_rate": 1.8078942830718106e-05, + "loss": 0.1831, + "step": 13265 + }, + { + "epoch": 0.62, + "learning_rate": 1.807815904566332e-05, + "loss": 0.2567, + "step": 13270 + }, + { + "epoch": 0.62, + "learning_rate": 1.8077375260608533e-05, + "loss": 0.1976, + "step": 13275 + }, + { + "epoch": 0.62, + "learning_rate": 1.8076591475553747e-05, + "loss": 0.177, + "step": 13280 + }, + { + "epoch": 0.62, + "learning_rate": 1.8075807690498958e-05, + "loss": 0.3031, + "step": 13285 + }, + { + "epoch": 0.62, + "learning_rate": 1.8075023905444172e-05, + "loss": 0.2594, + "step": 13290 + }, + { + "epoch": 0.62, + "learning_rate": 1.8074240120389386e-05, + "loss": 0.3334, + "step": 13295 + }, + { + "epoch": 0.62, + "learning_rate": 1.80734563353346e-05, + "loss": 0.7191, + "step": 13300 + }, + { + "epoch": 0.62, + "learning_rate": 1.8072672550279813e-05, + "loss": 0.2046, + "step": 13305 + }, + { + "epoch": 0.62, + "learning_rate": 1.8071888765225024e-05, + "loss": 0.0685, + "step": 13310 + }, + { + "epoch": 0.62, + "learning_rate": 1.807110498017024e-05, + "loss": 0.1259, + "step": 13315 + }, + { + "epoch": 0.62, + "learning_rate": 1.8070321195115452e-05, + "loss": 0.2039, + "step": 13320 + }, + { + "epoch": 0.62, + "learning_rate": 1.8069537410060666e-05, + "loss": 0.2184, + "step": 13325 + }, + { + "epoch": 0.62, + "learning_rate": 1.806875362500588e-05, + "loss": 0.1575, + "step": 13330 + }, + { + "epoch": 0.62, + "learning_rate": 1.8067969839951094e-05, + "loss": 0.3521, + "step": 13335 + }, + { + "epoch": 0.62, + "learning_rate": 1.8067186054896307e-05, + "loss": 0.3732, + "step": 13340 + }, + { + "epoch": 0.62, + "learning_rate": 1.806640226984152e-05, + "loss": 0.3423, + "step": 13345 + }, + { + "epoch": 0.62, + "learning_rate": 1.8065618484786732e-05, + "loss": 0.657, + "step": 13350 + }, + { + "epoch": 0.62, + "learning_rate": 1.806483469973195e-05, + "loss": 0.3007, + "step": 13355 + }, + { + "epoch": 0.62, + "learning_rate": 1.806405091467716e-05, + "loss": 0.0585, + "step": 13360 + }, + { + "epoch": 0.62, + "learning_rate": 1.8063267129622374e-05, + "loss": 0.1533, + "step": 13365 + }, + { + "epoch": 0.62, + "learning_rate": 1.8062483344567587e-05, + "loss": 0.2043, + "step": 13370 + }, + { + "epoch": 0.62, + "learning_rate": 1.80616995595128e-05, + "loss": 0.197, + "step": 13375 + }, + { + "epoch": 0.62, + "learning_rate": 1.8060915774458015e-05, + "loss": 0.1869, + "step": 13380 + }, + { + "epoch": 0.62, + "learning_rate": 1.8060131989403226e-05, + "loss": 0.1669, + "step": 13385 + }, + { + "epoch": 0.62, + "learning_rate": 1.805934820434844e-05, + "loss": 0.3543, + "step": 13390 + }, + { + "epoch": 0.63, + "learning_rate": 1.8058564419293654e-05, + "loss": 0.291, + "step": 13395 + }, + { + "epoch": 0.63, + "learning_rate": 1.8057780634238868e-05, + "loss": 0.4537, + "step": 13400 + }, + { + "epoch": 0.63, + "learning_rate": 1.805699684918408e-05, + "loss": 0.2838, + "step": 13405 + }, + { + "epoch": 0.63, + "learning_rate": 1.8056213064129295e-05, + "loss": 0.101, + "step": 13410 + }, + { + "epoch": 0.63, + "learning_rate": 1.805542927907451e-05, + "loss": 0.109, + "step": 13415 + }, + { + "epoch": 0.63, + "learning_rate": 1.8054645494019723e-05, + "loss": 0.13, + "step": 13420 + }, + { + "epoch": 0.63, + "learning_rate": 1.8053861708964934e-05, + "loss": 0.1621, + "step": 13425 + }, + { + "epoch": 0.63, + "learning_rate": 1.805307792391015e-05, + "loss": 0.2074, + "step": 13430 + }, + { + "epoch": 0.63, + "learning_rate": 1.805229413885536e-05, + "loss": 0.2683, + "step": 13435 + }, + { + "epoch": 0.63, + "learning_rate": 1.8051510353800575e-05, + "loss": 0.265, + "step": 13440 + }, + { + "epoch": 0.63, + "learning_rate": 1.805072656874579e-05, + "loss": 0.4351, + "step": 13445 + }, + { + "epoch": 0.63, + "learning_rate": 1.8049942783691e-05, + "loss": 0.7304, + "step": 13450 + }, + { + "epoch": 0.63, + "learning_rate": 1.8049158998636217e-05, + "loss": 0.1502, + "step": 13455 + }, + { + "epoch": 0.63, + "learning_rate": 1.8048375213581428e-05, + "loss": 0.0326, + "step": 13460 + }, + { + "epoch": 0.63, + "learning_rate": 1.804759142852664e-05, + "loss": 0.0823, + "step": 13465 + }, + { + "epoch": 0.63, + "learning_rate": 1.8046807643471855e-05, + "loss": 0.1804, + "step": 13470 + }, + { + "epoch": 0.63, + "learning_rate": 1.804602385841707e-05, + "loss": 0.1101, + "step": 13475 + }, + { + "epoch": 0.63, + "learning_rate": 1.8045240073362283e-05, + "loss": 0.2353, + "step": 13480 + }, + { + "epoch": 0.63, + "learning_rate": 1.8044456288307497e-05, + "loss": 0.2658, + "step": 13485 + }, + { + "epoch": 0.63, + "learning_rate": 1.804367250325271e-05, + "loss": 0.2739, + "step": 13490 + }, + { + "epoch": 0.63, + "learning_rate": 1.8042888718197925e-05, + "loss": 0.3481, + "step": 13495 + }, + { + "epoch": 0.63, + "learning_rate": 1.8042104933143135e-05, + "loss": 0.4943, + "step": 13500 + }, + { + "epoch": 0.63, + "learning_rate": 1.804132114808835e-05, + "loss": 0.2629, + "step": 13505 + }, + { + "epoch": 0.63, + "learning_rate": 1.8040537363033563e-05, + "loss": 0.0884, + "step": 13510 + }, + { + "epoch": 0.63, + "learning_rate": 1.8039753577978777e-05, + "loss": 0.0984, + "step": 13515 + }, + { + "epoch": 0.63, + "learning_rate": 1.803896979292399e-05, + "loss": 0.0861, + "step": 13520 + }, + { + "epoch": 0.63, + "learning_rate": 1.80381860078692e-05, + "loss": 0.1505, + "step": 13525 + }, + { + "epoch": 0.63, + "learning_rate": 1.803740222281442e-05, + "loss": 0.2409, + "step": 13530 + }, + { + "epoch": 0.63, + "learning_rate": 1.803661843775963e-05, + "loss": 0.2881, + "step": 13535 + }, + { + "epoch": 0.63, + "learning_rate": 1.8035834652704843e-05, + "loss": 0.2466, + "step": 13540 + }, + { + "epoch": 0.63, + "learning_rate": 1.8035050867650057e-05, + "loss": 0.3239, + "step": 13545 + }, + { + "epoch": 0.63, + "learning_rate": 1.803426708259527e-05, + "loss": 0.4168, + "step": 13550 + }, + { + "epoch": 0.63, + "learning_rate": 1.8033483297540485e-05, + "loss": 0.2401, + "step": 13555 + }, + { + "epoch": 0.63, + "learning_rate": 1.80326995124857e-05, + "loss": 0.1225, + "step": 13560 + }, + { + "epoch": 0.63, + "learning_rate": 1.803191572743091e-05, + "loss": 0.0936, + "step": 13565 + }, + { + "epoch": 0.63, + "learning_rate": 1.8031131942376123e-05, + "loss": 0.1213, + "step": 13570 + }, + { + "epoch": 0.63, + "learning_rate": 1.8030348157321337e-05, + "loss": 0.2741, + "step": 13575 + }, + { + "epoch": 0.63, + "learning_rate": 1.802956437226655e-05, + "loss": 0.2251, + "step": 13580 + }, + { + "epoch": 0.63, + "learning_rate": 1.8028780587211765e-05, + "loss": 0.2146, + "step": 13585 + }, + { + "epoch": 0.63, + "learning_rate": 1.802799680215698e-05, + "loss": 0.323, + "step": 13590 + }, + { + "epoch": 0.63, + "learning_rate": 1.8027213017102193e-05, + "loss": 0.2774, + "step": 13595 + }, + { + "epoch": 0.63, + "learning_rate": 1.8026429232047403e-05, + "loss": 0.4286, + "step": 13600 + }, + { + "epoch": 0.63, + "learning_rate": 1.8025645446992617e-05, + "loss": 0.2137, + "step": 13605 + }, + { + "epoch": 0.64, + "learning_rate": 1.802486166193783e-05, + "loss": 0.1228, + "step": 13610 + }, + { + "epoch": 0.64, + "learning_rate": 1.8024077876883045e-05, + "loss": 0.1267, + "step": 13615 + }, + { + "epoch": 0.64, + "learning_rate": 1.802329409182826e-05, + "loss": 0.1645, + "step": 13620 + }, + { + "epoch": 0.64, + "learning_rate": 1.8022510306773473e-05, + "loss": 0.2046, + "step": 13625 + }, + { + "epoch": 0.64, + "learning_rate": 1.8021726521718687e-05, + "loss": 0.2009, + "step": 13630 + }, + { + "epoch": 0.64, + "learning_rate": 1.8020942736663897e-05, + "loss": 0.2249, + "step": 13635 + }, + { + "epoch": 0.64, + "learning_rate": 1.802015895160911e-05, + "loss": 0.2867, + "step": 13640 + }, + { + "epoch": 0.64, + "learning_rate": 1.8019375166554325e-05, + "loss": 0.3413, + "step": 13645 + }, + { + "epoch": 0.64, + "learning_rate": 1.801859138149954e-05, + "loss": 0.7257, + "step": 13650 + }, + { + "epoch": 0.64, + "learning_rate": 1.8017807596444753e-05, + "loss": 0.1693, + "step": 13655 + }, + { + "epoch": 0.64, + "learning_rate": 1.8017023811389967e-05, + "loss": 0.0749, + "step": 13660 + }, + { + "epoch": 0.64, + "learning_rate": 1.8016240026335177e-05, + "loss": 0.1104, + "step": 13665 + }, + { + "epoch": 0.64, + "learning_rate": 1.8015456241280395e-05, + "loss": 0.1834, + "step": 13670 + }, + { + "epoch": 0.64, + "learning_rate": 1.8014672456225605e-05, + "loss": 0.1609, + "step": 13675 + }, + { + "epoch": 0.64, + "learning_rate": 1.801388867117082e-05, + "loss": 0.2899, + "step": 13680 + }, + { + "epoch": 0.64, + "learning_rate": 1.8013104886116033e-05, + "loss": 0.1833, + "step": 13685 + }, + { + "epoch": 0.64, + "learning_rate": 1.8012321101061247e-05, + "loss": 0.3229, + "step": 13690 + }, + { + "epoch": 0.64, + "learning_rate": 1.801153731600646e-05, + "loss": 0.34, + "step": 13695 + }, + { + "epoch": 0.64, + "learning_rate": 1.801075353095167e-05, + "loss": 0.7479, + "step": 13700 + }, + { + "epoch": 0.64, + "learning_rate": 1.8009969745896885e-05, + "loss": 0.2743, + "step": 13705 + }, + { + "epoch": 0.64, + "learning_rate": 1.80091859608421e-05, + "loss": 0.0687, + "step": 13710 + }, + { + "epoch": 0.64, + "learning_rate": 1.8008402175787313e-05, + "loss": 0.1244, + "step": 13715 + }, + { + "epoch": 0.64, + "learning_rate": 1.8007618390732527e-05, + "loss": 0.0964, + "step": 13720 + }, + { + "epoch": 0.64, + "learning_rate": 1.800683460567774e-05, + "loss": 0.2067, + "step": 13725 + }, + { + "epoch": 0.64, + "learning_rate": 1.8006050820622955e-05, + "loss": 0.2851, + "step": 13730 + }, + { + "epoch": 0.64, + "learning_rate": 1.800526703556817e-05, + "loss": 0.145, + "step": 13735 + }, + { + "epoch": 0.64, + "learning_rate": 1.800448325051338e-05, + "loss": 0.314, + "step": 13740 + }, + { + "epoch": 0.64, + "learning_rate": 1.8003699465458597e-05, + "loss": 0.3162, + "step": 13745 + }, + { + "epoch": 0.64, + "learning_rate": 1.8002915680403807e-05, + "loss": 0.6037, + "step": 13750 + }, + { + "epoch": 0.64, + "learning_rate": 1.800213189534902e-05, + "loss": 0.2893, + "step": 13755 + }, + { + "epoch": 0.64, + "learning_rate": 1.8001348110294235e-05, + "loss": 0.1066, + "step": 13760 + }, + { + "epoch": 0.64, + "learning_rate": 1.8000564325239445e-05, + "loss": 0.1369, + "step": 13765 + }, + { + "epoch": 0.64, + "learning_rate": 1.7999780540184663e-05, + "loss": 0.119, + "step": 13770 + }, + { + "epoch": 0.64, + "learning_rate": 1.7998996755129873e-05, + "loss": 0.1651, + "step": 13775 + }, + { + "epoch": 0.64, + "learning_rate": 1.7998212970075087e-05, + "loss": 0.1298, + "step": 13780 + }, + { + "epoch": 0.64, + "learning_rate": 1.79974291850203e-05, + "loss": 0.3191, + "step": 13785 + }, + { + "epoch": 0.64, + "learning_rate": 1.7996645399965515e-05, + "loss": 0.2348, + "step": 13790 + }, + { + "epoch": 0.64, + "learning_rate": 1.799586161491073e-05, + "loss": 0.2823, + "step": 13795 + }, + { + "epoch": 0.64, + "learning_rate": 1.7995077829855943e-05, + "loss": 0.7438, + "step": 13800 + }, + { + "epoch": 0.64, + "learning_rate": 1.7994294044801157e-05, + "loss": 0.1944, + "step": 13805 + }, + { + "epoch": 0.64, + "learning_rate": 1.799351025974637e-05, + "loss": 0.0748, + "step": 13810 + }, + { + "epoch": 0.64, + "learning_rate": 1.799272647469158e-05, + "loss": 0.1483, + "step": 13815 + }, + { + "epoch": 0.64, + "learning_rate": 1.7991942689636795e-05, + "loss": 0.1346, + "step": 13820 + }, + { + "epoch": 0.65, + "learning_rate": 1.799115890458201e-05, + "loss": 0.1945, + "step": 13825 + }, + { + "epoch": 0.65, + "learning_rate": 1.7990375119527223e-05, + "loss": 0.2159, + "step": 13830 + }, + { + "epoch": 0.65, + "learning_rate": 1.7989591334472437e-05, + "loss": 0.2135, + "step": 13835 + }, + { + "epoch": 0.65, + "learning_rate": 1.7988807549417647e-05, + "loss": 0.3681, + "step": 13840 + }, + { + "epoch": 0.65, + "learning_rate": 1.7988023764362864e-05, + "loss": 0.3707, + "step": 13845 + }, + { + "epoch": 0.65, + "learning_rate": 1.7987239979308075e-05, + "loss": 0.7777, + "step": 13850 + }, + { + "epoch": 0.65, + "learning_rate": 1.798645619425329e-05, + "loss": 0.322, + "step": 13855 + }, + { + "epoch": 0.65, + "learning_rate": 1.7985672409198503e-05, + "loss": 0.0827, + "step": 13860 + }, + { + "epoch": 0.65, + "learning_rate": 1.7984888624143717e-05, + "loss": 0.1055, + "step": 13865 + }, + { + "epoch": 0.65, + "learning_rate": 1.798410483908893e-05, + "loss": 0.1698, + "step": 13870 + }, + { + "epoch": 0.65, + "learning_rate": 1.7983321054034145e-05, + "loss": 0.2299, + "step": 13875 + }, + { + "epoch": 0.65, + "learning_rate": 1.7982537268979355e-05, + "loss": 0.1941, + "step": 13880 + }, + { + "epoch": 0.65, + "learning_rate": 1.7981753483924572e-05, + "loss": 0.2909, + "step": 13885 + }, + { + "epoch": 0.65, + "learning_rate": 1.7980969698869783e-05, + "loss": 0.3056, + "step": 13890 + }, + { + "epoch": 0.65, + "learning_rate": 1.7980185913814997e-05, + "loss": 0.3902, + "step": 13895 + }, + { + "epoch": 0.65, + "learning_rate": 1.797940212876021e-05, + "loss": 0.6766, + "step": 13900 + }, + { + "epoch": 0.65, + "learning_rate": 1.7978618343705425e-05, + "loss": 0.2698, + "step": 13905 + }, + { + "epoch": 0.65, + "learning_rate": 1.797783455865064e-05, + "loss": 0.1085, + "step": 13910 + }, + { + "epoch": 0.65, + "learning_rate": 1.797705077359585e-05, + "loss": 0.1044, + "step": 13915 + }, + { + "epoch": 0.65, + "learning_rate": 1.7976266988541063e-05, + "loss": 0.1361, + "step": 13920 + }, + { + "epoch": 0.65, + "learning_rate": 1.7975483203486277e-05, + "loss": 0.1876, + "step": 13925 + }, + { + "epoch": 0.65, + "learning_rate": 1.797469941843149e-05, + "loss": 0.1581, + "step": 13930 + }, + { + "epoch": 0.65, + "learning_rate": 1.7973915633376705e-05, + "loss": 0.2839, + "step": 13935 + }, + { + "epoch": 0.65, + "learning_rate": 1.797313184832192e-05, + "loss": 0.1772, + "step": 13940 + }, + { + "epoch": 0.65, + "learning_rate": 1.7972348063267132e-05, + "loss": 0.3157, + "step": 13945 + }, + { + "epoch": 0.65, + "learning_rate": 1.7971564278212346e-05, + "loss": 0.5271, + "step": 13950 + }, + { + "epoch": 0.65, + "learning_rate": 1.7970780493157557e-05, + "loss": 0.2653, + "step": 13955 + }, + { + "epoch": 0.65, + "learning_rate": 1.796999670810277e-05, + "loss": 0.0371, + "step": 13960 + }, + { + "epoch": 0.65, + "learning_rate": 1.7969212923047985e-05, + "loss": 0.077, + "step": 13965 + }, + { + "epoch": 0.65, + "learning_rate": 1.79684291379932e-05, + "loss": 0.1503, + "step": 13970 + }, + { + "epoch": 0.65, + "learning_rate": 1.7967645352938412e-05, + "loss": 0.1859, + "step": 13975 + }, + { + "epoch": 0.65, + "learning_rate": 1.7966861567883623e-05, + "loss": 0.1487, + "step": 13980 + }, + { + "epoch": 0.65, + "learning_rate": 1.796607778282884e-05, + "loss": 0.2365, + "step": 13985 + }, + { + "epoch": 0.65, + "learning_rate": 1.796529399777405e-05, + "loss": 0.2422, + "step": 13990 + }, + { + "epoch": 0.65, + "learning_rate": 1.7964510212719265e-05, + "loss": 0.3292, + "step": 13995 + }, + { + "epoch": 0.65, + "learning_rate": 1.796372642766448e-05, + "loss": 0.6147, + "step": 14000 + }, + { + "epoch": 0.65, + "learning_rate": 1.7962942642609693e-05, + "loss": 0.222, + "step": 14005 + }, + { + "epoch": 0.65, + "learning_rate": 1.7962158857554906e-05, + "loss": 0.1412, + "step": 14010 + }, + { + "epoch": 0.65, + "learning_rate": 1.796137507250012e-05, + "loss": 0.1588, + "step": 14015 + }, + { + "epoch": 0.65, + "learning_rate": 1.7960591287445334e-05, + "loss": 0.1609, + "step": 14020 + }, + { + "epoch": 0.65, + "learning_rate": 1.7959807502390545e-05, + "loss": 0.1297, + "step": 14025 + }, + { + "epoch": 0.65, + "learning_rate": 1.795902371733576e-05, + "loss": 0.1735, + "step": 14030 + }, + { + "epoch": 0.65, + "learning_rate": 1.7958239932280973e-05, + "loss": 0.1816, + "step": 14035 + }, + { + "epoch": 0.66, + "learning_rate": 1.7957456147226186e-05, + "loss": 0.2098, + "step": 14040 + }, + { + "epoch": 0.66, + "learning_rate": 1.79566723621714e-05, + "loss": 0.3348, + "step": 14045 + }, + { + "epoch": 0.66, + "learning_rate": 1.7955888577116614e-05, + "loss": 0.534, + "step": 14050 + }, + { + "epoch": 0.66, + "learning_rate": 1.7955104792061825e-05, + "loss": 0.2183, + "step": 14055 + }, + { + "epoch": 0.66, + "learning_rate": 1.7954321007007042e-05, + "loss": 0.0817, + "step": 14060 + }, + { + "epoch": 0.66, + "learning_rate": 1.7953537221952253e-05, + "loss": 0.0821, + "step": 14065 + }, + { + "epoch": 0.66, + "learning_rate": 1.7952753436897467e-05, + "loss": 0.1166, + "step": 14070 + }, + { + "epoch": 0.66, + "learning_rate": 1.795196965184268e-05, + "loss": 0.2538, + "step": 14075 + }, + { + "epoch": 0.66, + "learning_rate": 1.7951185866787894e-05, + "loss": 0.1809, + "step": 14080 + }, + { + "epoch": 0.66, + "learning_rate": 1.7950402081733108e-05, + "loss": 0.2724, + "step": 14085 + }, + { + "epoch": 0.66, + "learning_rate": 1.794961829667832e-05, + "loss": 0.2887, + "step": 14090 + }, + { + "epoch": 0.66, + "learning_rate": 1.7948834511623533e-05, + "loss": 0.4142, + "step": 14095 + }, + { + "epoch": 0.66, + "learning_rate": 1.7948050726568747e-05, + "loss": 0.6192, + "step": 14100 + }, + { + "epoch": 0.66, + "learning_rate": 1.794726694151396e-05, + "loss": 0.251, + "step": 14105 + }, + { + "epoch": 0.66, + "learning_rate": 1.7946483156459174e-05, + "loss": 0.0909, + "step": 14110 + }, + { + "epoch": 0.66, + "learning_rate": 1.7945699371404388e-05, + "loss": 0.1046, + "step": 14115 + }, + { + "epoch": 0.66, + "learning_rate": 1.7944915586349602e-05, + "loss": 0.0959, + "step": 14120 + }, + { + "epoch": 0.66, + "learning_rate": 1.7944131801294816e-05, + "loss": 0.1656, + "step": 14125 + }, + { + "epoch": 0.66, + "learning_rate": 1.7943348016240027e-05, + "loss": 0.2122, + "step": 14130 + }, + { + "epoch": 0.66, + "learning_rate": 1.794256423118524e-05, + "loss": 0.2179, + "step": 14135 + }, + { + "epoch": 0.66, + "learning_rate": 1.7941780446130454e-05, + "loss": 0.2127, + "step": 14140 + }, + { + "epoch": 0.66, + "learning_rate": 1.794099666107567e-05, + "loss": 0.3221, + "step": 14145 + }, + { + "epoch": 0.66, + "learning_rate": 1.7940212876020882e-05, + "loss": 0.6101, + "step": 14150 + }, + { + "epoch": 0.66, + "learning_rate": 1.7939429090966093e-05, + "loss": 0.294, + "step": 14155 + }, + { + "epoch": 0.66, + "learning_rate": 1.793864530591131e-05, + "loss": 0.1212, + "step": 14160 + }, + { + "epoch": 0.66, + "learning_rate": 1.793786152085652e-05, + "loss": 0.1184, + "step": 14165 + }, + { + "epoch": 0.66, + "learning_rate": 1.7937077735801734e-05, + "loss": 0.1062, + "step": 14170 + }, + { + "epoch": 0.66, + "learning_rate": 1.793629395074695e-05, + "loss": 0.1962, + "step": 14175 + }, + { + "epoch": 0.66, + "learning_rate": 1.7935510165692162e-05, + "loss": 0.1837, + "step": 14180 + }, + { + "epoch": 0.66, + "learning_rate": 1.7934726380637376e-05, + "loss": 0.2212, + "step": 14185 + }, + { + "epoch": 0.66, + "learning_rate": 1.793394259558259e-05, + "loss": 0.3377, + "step": 14190 + }, + { + "epoch": 0.66, + "learning_rate": 1.79331588105278e-05, + "loss": 0.2935, + "step": 14195 + }, + { + "epoch": 0.66, + "learning_rate": 1.7932375025473018e-05, + "loss": 0.4795, + "step": 14200 + }, + { + "epoch": 0.66, + "learning_rate": 1.793159124041823e-05, + "loss": 0.2201, + "step": 14205 + }, + { + "epoch": 0.66, + "learning_rate": 1.7930807455363442e-05, + "loss": 0.134, + "step": 14210 + }, + { + "epoch": 0.66, + "learning_rate": 1.7930023670308656e-05, + "loss": 0.1086, + "step": 14215 + }, + { + "epoch": 0.66, + "learning_rate": 1.792923988525387e-05, + "loss": 0.1584, + "step": 14220 + }, + { + "epoch": 0.66, + "learning_rate": 1.7928456100199084e-05, + "loss": 0.1764, + "step": 14225 + }, + { + "epoch": 0.66, + "learning_rate": 1.7927672315144295e-05, + "loss": 0.1776, + "step": 14230 + }, + { + "epoch": 0.66, + "learning_rate": 1.792688853008951e-05, + "loss": 0.1828, + "step": 14235 + }, + { + "epoch": 0.66, + "learning_rate": 1.7926104745034722e-05, + "loss": 0.2791, + "step": 14240 + }, + { + "epoch": 0.66, + "learning_rate": 1.7925320959979936e-05, + "loss": 0.3181, + "step": 14245 + }, + { + "epoch": 0.66, + "learning_rate": 1.792453717492515e-05, + "loss": 0.8255, + "step": 14250 + }, + { + "epoch": 0.67, + "learning_rate": 1.7923753389870364e-05, + "loss": 0.321, + "step": 14255 + }, + { + "epoch": 0.67, + "learning_rate": 1.7922969604815578e-05, + "loss": 0.0898, + "step": 14260 + }, + { + "epoch": 0.67, + "learning_rate": 1.7922185819760792e-05, + "loss": 0.1708, + "step": 14265 + }, + { + "epoch": 0.67, + "learning_rate": 1.7921402034706002e-05, + "loss": 0.1316, + "step": 14270 + }, + { + "epoch": 0.67, + "learning_rate": 1.792061824965122e-05, + "loss": 0.1778, + "step": 14275 + }, + { + "epoch": 0.67, + "learning_rate": 1.791983446459643e-05, + "loss": 0.168, + "step": 14280 + }, + { + "epoch": 0.67, + "learning_rate": 1.7919050679541644e-05, + "loss": 0.291, + "step": 14285 + }, + { + "epoch": 0.67, + "learning_rate": 1.7918266894486858e-05, + "loss": 0.2596, + "step": 14290 + }, + { + "epoch": 0.67, + "learning_rate": 1.791748310943207e-05, + "loss": 0.2771, + "step": 14295 + }, + { + "epoch": 0.67, + "learning_rate": 1.7916699324377286e-05, + "loss": 0.6026, + "step": 14300 + }, + { + "epoch": 0.67, + "learning_rate": 1.7915915539322496e-05, + "loss": 0.2228, + "step": 14305 + }, + { + "epoch": 0.67, + "learning_rate": 1.791513175426771e-05, + "loss": 0.0702, + "step": 14310 + }, + { + "epoch": 0.67, + "learning_rate": 1.7914347969212924e-05, + "loss": 0.164, + "step": 14315 + }, + { + "epoch": 0.67, + "learning_rate": 1.7913564184158138e-05, + "loss": 0.1234, + "step": 14320 + }, + { + "epoch": 0.67, + "learning_rate": 1.7912780399103352e-05, + "loss": 0.1424, + "step": 14325 + }, + { + "epoch": 0.67, + "learning_rate": 1.7911996614048566e-05, + "loss": 0.1952, + "step": 14330 + }, + { + "epoch": 0.67, + "learning_rate": 1.791121282899378e-05, + "loss": 0.233, + "step": 14335 + }, + { + "epoch": 0.67, + "learning_rate": 1.7910429043938994e-05, + "loss": 0.2717, + "step": 14340 + }, + { + "epoch": 0.67, + "learning_rate": 1.7909645258884204e-05, + "loss": 0.2716, + "step": 14345 + }, + { + "epoch": 0.67, + "learning_rate": 1.7908861473829418e-05, + "loss": 0.5792, + "step": 14350 + }, + { + "epoch": 0.67, + "learning_rate": 1.7908077688774632e-05, + "loss": 0.1712, + "step": 14355 + }, + { + "epoch": 0.67, + "learning_rate": 1.7907293903719846e-05, + "loss": 0.0798, + "step": 14360 + }, + { + "epoch": 0.67, + "learning_rate": 1.790651011866506e-05, + "loss": 0.137, + "step": 14365 + }, + { + "epoch": 0.67, + "learning_rate": 1.790572633361027e-05, + "loss": 0.1375, + "step": 14370 + }, + { + "epoch": 0.67, + "learning_rate": 1.7904942548555488e-05, + "loss": 0.2504, + "step": 14375 + }, + { + "epoch": 0.67, + "learning_rate": 1.7904158763500698e-05, + "loss": 0.2304, + "step": 14380 + }, + { + "epoch": 0.67, + "learning_rate": 1.7903374978445912e-05, + "loss": 0.2548, + "step": 14385 + }, + { + "epoch": 0.67, + "learning_rate": 1.7902591193391126e-05, + "loss": 0.3049, + "step": 14390 + }, + { + "epoch": 0.67, + "learning_rate": 1.790180740833634e-05, + "loss": 0.4073, + "step": 14395 + }, + { + "epoch": 0.67, + "learning_rate": 1.7901023623281554e-05, + "loss": 0.5195, + "step": 14400 + }, + { + "epoch": 0.67, + "learning_rate": 1.7900239838226768e-05, + "loss": 0.267, + "step": 14405 + }, + { + "epoch": 0.67, + "learning_rate": 1.7899456053171978e-05, + "loss": 0.0918, + "step": 14410 + }, + { + "epoch": 0.67, + "learning_rate": 1.7898672268117192e-05, + "loss": 0.1025, + "step": 14415 + }, + { + "epoch": 0.67, + "learning_rate": 1.7897888483062406e-05, + "loss": 0.1415, + "step": 14420 + }, + { + "epoch": 0.67, + "learning_rate": 1.789710469800762e-05, + "loss": 0.1369, + "step": 14425 + }, + { + "epoch": 0.67, + "learning_rate": 1.7896320912952834e-05, + "loss": 0.2144, + "step": 14430 + }, + { + "epoch": 0.67, + "learning_rate": 1.7895537127898048e-05, + "loss": 0.1942, + "step": 14435 + }, + { + "epoch": 0.67, + "learning_rate": 1.789475334284326e-05, + "loss": 0.2313, + "step": 14440 + }, + { + "epoch": 0.67, + "learning_rate": 1.7893969557788472e-05, + "loss": 0.4069, + "step": 14445 + }, + { + "epoch": 0.67, + "learning_rate": 1.7893185772733686e-05, + "loss": 0.6075, + "step": 14450 + }, + { + "epoch": 0.67, + "learning_rate": 1.78924019876789e-05, + "loss": 0.2016, + "step": 14455 + }, + { + "epoch": 0.67, + "learning_rate": 1.7891618202624114e-05, + "loss": 0.0962, + "step": 14460 + }, + { + "epoch": 0.67, + "learning_rate": 1.7890834417569328e-05, + "loss": 0.1401, + "step": 14465 + }, + { + "epoch": 0.68, + "learning_rate": 1.7890050632514542e-05, + "loss": 0.1746, + "step": 14470 + }, + { + "epoch": 0.68, + "learning_rate": 1.7889266847459756e-05, + "loss": 0.2198, + "step": 14475 + }, + { + "epoch": 0.68, + "learning_rate": 1.7888483062404966e-05, + "loss": 0.1788, + "step": 14480 + }, + { + "epoch": 0.68, + "learning_rate": 1.788769927735018e-05, + "loss": 0.3015, + "step": 14485 + }, + { + "epoch": 0.68, + "learning_rate": 1.7886915492295394e-05, + "loss": 0.336, + "step": 14490 + }, + { + "epoch": 0.68, + "learning_rate": 1.7886131707240608e-05, + "loss": 0.399, + "step": 14495 + }, + { + "epoch": 0.68, + "learning_rate": 1.7885347922185822e-05, + "loss": 0.5129, + "step": 14500 + }, + { + "epoch": 0.68, + "learning_rate": 1.7884564137131036e-05, + "loss": 0.2472, + "step": 14505 + }, + { + "epoch": 0.68, + "learning_rate": 1.7883780352076246e-05, + "loss": 0.0997, + "step": 14510 + }, + { + "epoch": 0.68, + "learning_rate": 1.7882996567021463e-05, + "loss": 0.151, + "step": 14515 + }, + { + "epoch": 0.68, + "learning_rate": 1.7882212781966674e-05, + "loss": 0.1149, + "step": 14520 + }, + { + "epoch": 0.68, + "learning_rate": 1.7881428996911888e-05, + "loss": 0.1079, + "step": 14525 + }, + { + "epoch": 0.68, + "learning_rate": 1.7880645211857102e-05, + "loss": 0.1688, + "step": 14530 + }, + { + "epoch": 0.68, + "learning_rate": 1.7879861426802316e-05, + "loss": 0.2422, + "step": 14535 + }, + { + "epoch": 0.68, + "learning_rate": 1.787907764174753e-05, + "loss": 0.256, + "step": 14540 + }, + { + "epoch": 0.68, + "learning_rate": 1.787829385669274e-05, + "loss": 0.2604, + "step": 14545 + }, + { + "epoch": 0.68, + "learning_rate": 1.7877510071637954e-05, + "loss": 0.4361, + "step": 14550 + }, + { + "epoch": 0.68, + "learning_rate": 1.7876726286583168e-05, + "loss": 0.3397, + "step": 14555 + }, + { + "epoch": 0.68, + "learning_rate": 1.7875942501528382e-05, + "loss": 0.0841, + "step": 14560 + }, + { + "epoch": 0.68, + "learning_rate": 1.7875158716473596e-05, + "loss": 0.0836, + "step": 14565 + }, + { + "epoch": 0.68, + "learning_rate": 1.787437493141881e-05, + "loss": 0.1452, + "step": 14570 + }, + { + "epoch": 0.68, + "learning_rate": 1.7873591146364024e-05, + "loss": 0.1594, + "step": 14575 + }, + { + "epoch": 0.68, + "learning_rate": 1.7872807361309237e-05, + "loss": 0.2242, + "step": 14580 + }, + { + "epoch": 0.68, + "learning_rate": 1.7872023576254448e-05, + "loss": 0.2338, + "step": 14585 + }, + { + "epoch": 0.68, + "learning_rate": 1.7871239791199665e-05, + "loss": 0.218, + "step": 14590 + }, + { + "epoch": 0.68, + "learning_rate": 1.7870456006144876e-05, + "loss": 0.4391, + "step": 14595 + }, + { + "epoch": 0.68, + "learning_rate": 1.786967222109009e-05, + "loss": 0.5228, + "step": 14600 + }, + { + "epoch": 0.68, + "learning_rate": 1.7868888436035304e-05, + "loss": 0.2472, + "step": 14605 + }, + { + "epoch": 0.68, + "learning_rate": 1.7868104650980514e-05, + "loss": 0.0867, + "step": 14610 + }, + { + "epoch": 0.68, + "learning_rate": 1.786732086592573e-05, + "loss": 0.1296, + "step": 14615 + }, + { + "epoch": 0.68, + "learning_rate": 1.7866537080870942e-05, + "loss": 0.1691, + "step": 14620 + }, + { + "epoch": 0.68, + "learning_rate": 1.7865753295816156e-05, + "loss": 0.2125, + "step": 14625 + }, + { + "epoch": 0.68, + "learning_rate": 1.786496951076137e-05, + "loss": 0.2022, + "step": 14630 + }, + { + "epoch": 0.68, + "learning_rate": 1.7864185725706584e-05, + "loss": 0.1817, + "step": 14635 + }, + { + "epoch": 0.68, + "learning_rate": 1.7863401940651798e-05, + "loss": 0.3351, + "step": 14640 + }, + { + "epoch": 0.68, + "learning_rate": 1.786261815559701e-05, + "loss": 0.3566, + "step": 14645 + }, + { + "epoch": 0.68, + "learning_rate": 1.7861834370542225e-05, + "loss": 0.6346, + "step": 14650 + }, + { + "epoch": 0.68, + "learning_rate": 1.786105058548744e-05, + "loss": 0.2488, + "step": 14655 + }, + { + "epoch": 0.68, + "learning_rate": 1.786026680043265e-05, + "loss": 0.0688, + "step": 14660 + }, + { + "epoch": 0.68, + "learning_rate": 1.7859483015377864e-05, + "loss": 0.1261, + "step": 14665 + }, + { + "epoch": 0.68, + "learning_rate": 1.7858699230323078e-05, + "loss": 0.1552, + "step": 14670 + }, + { + "epoch": 0.68, + "learning_rate": 1.785791544526829e-05, + "loss": 0.1928, + "step": 14675 + }, + { + "epoch": 0.68, + "learning_rate": 1.7857131660213505e-05, + "loss": 0.1883, + "step": 14680 + }, + { + "epoch": 0.69, + "learning_rate": 1.7856347875158716e-05, + "loss": 0.2545, + "step": 14685 + }, + { + "epoch": 0.69, + "learning_rate": 1.7855564090103933e-05, + "loss": 0.3732, + "step": 14690 + }, + { + "epoch": 0.69, + "learning_rate": 1.7854780305049144e-05, + "loss": 0.3061, + "step": 14695 + }, + { + "epoch": 0.69, + "learning_rate": 1.7853996519994358e-05, + "loss": 0.5844, + "step": 14700 + }, + { + "epoch": 0.69, + "learning_rate": 1.785321273493957e-05, + "loss": 0.2783, + "step": 14705 + }, + { + "epoch": 0.69, + "learning_rate": 1.7852428949884785e-05, + "loss": 0.1046, + "step": 14710 + }, + { + "epoch": 0.69, + "learning_rate": 1.785164516483e-05, + "loss": 0.1335, + "step": 14715 + }, + { + "epoch": 0.69, + "learning_rate": 1.7850861379775213e-05, + "loss": 0.1018, + "step": 14720 + }, + { + "epoch": 0.69, + "learning_rate": 1.7850077594720424e-05, + "loss": 0.1993, + "step": 14725 + }, + { + "epoch": 0.69, + "learning_rate": 1.784929380966564e-05, + "loss": 0.1792, + "step": 14730 + }, + { + "epoch": 0.69, + "learning_rate": 1.784851002461085e-05, + "loss": 0.2488, + "step": 14735 + }, + { + "epoch": 0.69, + "learning_rate": 1.7847726239556066e-05, + "loss": 0.247, + "step": 14740 + }, + { + "epoch": 0.69, + "learning_rate": 1.784694245450128e-05, + "loss": 0.4897, + "step": 14745 + }, + { + "epoch": 0.69, + "learning_rate": 1.7846158669446493e-05, + "loss": 0.4311, + "step": 14750 + }, + { + "epoch": 0.69, + "learning_rate": 1.7845374884391707e-05, + "loss": 0.2626, + "step": 14755 + }, + { + "epoch": 0.69, + "learning_rate": 1.7844591099336918e-05, + "loss": 0.069, + "step": 14760 + }, + { + "epoch": 0.69, + "learning_rate": 1.784380731428213e-05, + "loss": 0.1219, + "step": 14765 + }, + { + "epoch": 0.69, + "learning_rate": 1.7843023529227346e-05, + "loss": 0.1279, + "step": 14770 + }, + { + "epoch": 0.69, + "learning_rate": 1.784223974417256e-05, + "loss": 0.1779, + "step": 14775 + }, + { + "epoch": 0.69, + "learning_rate": 1.7841455959117773e-05, + "loss": 0.2421, + "step": 14780 + }, + { + "epoch": 0.69, + "learning_rate": 1.7840672174062987e-05, + "loss": 0.2912, + "step": 14785 + }, + { + "epoch": 0.69, + "learning_rate": 1.78398883890082e-05, + "loss": 0.2462, + "step": 14790 + }, + { + "epoch": 0.69, + "learning_rate": 1.7839104603953415e-05, + "loss": 0.3095, + "step": 14795 + }, + { + "epoch": 0.69, + "learning_rate": 1.7838320818898626e-05, + "loss": 0.5914, + "step": 14800 + }, + { + "epoch": 0.69, + "learning_rate": 1.783753703384384e-05, + "loss": 0.2444, + "step": 14805 + }, + { + "epoch": 0.69, + "learning_rate": 1.7836753248789053e-05, + "loss": 0.0887, + "step": 14810 + }, + { + "epoch": 0.69, + "learning_rate": 1.7835969463734267e-05, + "loss": 0.1362, + "step": 14815 + }, + { + "epoch": 0.69, + "learning_rate": 1.783518567867948e-05, + "loss": 0.0878, + "step": 14820 + }, + { + "epoch": 0.69, + "learning_rate": 1.7834401893624692e-05, + "loss": 0.1273, + "step": 14825 + }, + { + "epoch": 0.69, + "learning_rate": 1.783361810856991e-05, + "loss": 0.2425, + "step": 14830 + }, + { + "epoch": 0.69, + "learning_rate": 1.783283432351512e-05, + "loss": 0.275, + "step": 14835 + }, + { + "epoch": 0.69, + "learning_rate": 1.7832050538460333e-05, + "loss": 0.2317, + "step": 14840 + }, + { + "epoch": 0.69, + "learning_rate": 1.7831266753405547e-05, + "loss": 0.2663, + "step": 14845 + }, + { + "epoch": 0.69, + "learning_rate": 1.783048296835076e-05, + "loss": 0.2922, + "step": 14850 + }, + { + "epoch": 0.69, + "learning_rate": 1.7829699183295975e-05, + "loss": 0.2981, + "step": 14855 + }, + { + "epoch": 0.69, + "learning_rate": 1.782891539824119e-05, + "loss": 0.0931, + "step": 14860 + }, + { + "epoch": 0.69, + "learning_rate": 1.78281316131864e-05, + "loss": 0.0793, + "step": 14865 + }, + { + "epoch": 0.69, + "learning_rate": 1.7827347828131614e-05, + "loss": 0.1572, + "step": 14870 + }, + { + "epoch": 0.69, + "learning_rate": 1.7826564043076827e-05, + "loss": 0.2228, + "step": 14875 + }, + { + "epoch": 0.69, + "learning_rate": 1.782578025802204e-05, + "loss": 0.1472, + "step": 14880 + }, + { + "epoch": 0.69, + "learning_rate": 1.7824996472967255e-05, + "loss": 0.2983, + "step": 14885 + }, + { + "epoch": 0.69, + "learning_rate": 1.782421268791247e-05, + "loss": 0.3364, + "step": 14890 + }, + { + "epoch": 0.7, + "learning_rate": 1.7823428902857683e-05, + "loss": 0.4755, + "step": 14895 + }, + { + "epoch": 0.7, + "learning_rate": 1.7822645117802894e-05, + "loss": 0.5627, + "step": 14900 + }, + { + "epoch": 0.7, + "learning_rate": 1.782186133274811e-05, + "loss": 0.2746, + "step": 14905 + }, + { + "epoch": 0.7, + "learning_rate": 1.782107754769332e-05, + "loss": 0.0909, + "step": 14910 + }, + { + "epoch": 0.7, + "learning_rate": 1.7820293762638535e-05, + "loss": 0.1021, + "step": 14915 + }, + { + "epoch": 0.7, + "learning_rate": 1.781950997758375e-05, + "loss": 0.1913, + "step": 14920 + }, + { + "epoch": 0.7, + "learning_rate": 1.7818726192528963e-05, + "loss": 0.1024, + "step": 14925 + }, + { + "epoch": 0.7, + "learning_rate": 1.7817942407474177e-05, + "loss": 0.1359, + "step": 14930 + }, + { + "epoch": 0.7, + "learning_rate": 1.7817158622419388e-05, + "loss": 0.2782, + "step": 14935 + }, + { + "epoch": 0.7, + "learning_rate": 1.78163748373646e-05, + "loss": 0.3662, + "step": 14940 + }, + { + "epoch": 0.7, + "learning_rate": 1.7815591052309815e-05, + "loss": 0.3173, + "step": 14945 + }, + { + "epoch": 0.7, + "learning_rate": 1.781480726725503e-05, + "loss": 0.5629, + "step": 14950 + }, + { + "epoch": 0.7, + "learning_rate": 1.7814023482200243e-05, + "loss": 0.3126, + "step": 14955 + }, + { + "epoch": 0.7, + "learning_rate": 1.7813239697145457e-05, + "loss": 0.1257, + "step": 14960 + }, + { + "epoch": 0.7, + "learning_rate": 1.781245591209067e-05, + "loss": 0.102, + "step": 14965 + }, + { + "epoch": 0.7, + "learning_rate": 1.7811672127035885e-05, + "loss": 0.0812, + "step": 14970 + }, + { + "epoch": 0.7, + "learning_rate": 1.7810888341981095e-05, + "loss": 0.1951, + "step": 14975 + }, + { + "epoch": 0.7, + "learning_rate": 1.781010455692631e-05, + "loss": 0.2276, + "step": 14980 + }, + { + "epoch": 0.7, + "learning_rate": 1.7809320771871523e-05, + "loss": 0.2704, + "step": 14985 + }, + { + "epoch": 0.7, + "learning_rate": 1.7808536986816737e-05, + "loss": 0.2971, + "step": 14990 + }, + { + "epoch": 0.7, + "learning_rate": 1.780775320176195e-05, + "loss": 0.2102, + "step": 14995 + }, + { + "epoch": 0.7, + "learning_rate": 1.780696941670716e-05, + "loss": 0.4946, + "step": 15000 + }, + { + "epoch": 0.7, + "learning_rate": 1.780618563165238e-05, + "loss": 0.2092, + "step": 15005 + }, + { + "epoch": 0.7, + "learning_rate": 1.780540184659759e-05, + "loss": 0.0679, + "step": 15010 + }, + { + "epoch": 0.7, + "learning_rate": 1.7804618061542803e-05, + "loss": 0.1363, + "step": 15015 + }, + { + "epoch": 0.7, + "learning_rate": 1.7803834276488017e-05, + "loss": 0.1115, + "step": 15020 + }, + { + "epoch": 0.7, + "learning_rate": 1.780305049143323e-05, + "loss": 0.1363, + "step": 15025 + }, + { + "epoch": 0.7, + "learning_rate": 1.7802266706378445e-05, + "loss": 0.213, + "step": 15030 + }, + { + "epoch": 0.7, + "learning_rate": 1.780148292132366e-05, + "loss": 0.2516, + "step": 15035 + }, + { + "epoch": 0.7, + "learning_rate": 1.780069913626887e-05, + "loss": 0.3133, + "step": 15040 + }, + { + "epoch": 0.7, + "learning_rate": 1.7799915351214087e-05, + "loss": 0.3108, + "step": 15045 + }, + { + "epoch": 0.7, + "learning_rate": 1.7799131566159297e-05, + "loss": 0.4203, + "step": 15050 + }, + { + "epoch": 0.7, + "learning_rate": 1.779834778110451e-05, + "loss": 0.2288, + "step": 15055 + }, + { + "epoch": 0.7, + "learning_rate": 1.7797563996049725e-05, + "loss": 0.0967, + "step": 15060 + }, + { + "epoch": 0.7, + "learning_rate": 1.779678021099494e-05, + "loss": 0.1433, + "step": 15065 + }, + { + "epoch": 0.7, + "learning_rate": 1.7795996425940153e-05, + "loss": 0.1605, + "step": 15070 + }, + { + "epoch": 0.7, + "learning_rate": 1.7795212640885363e-05, + "loss": 0.1502, + "step": 15075 + }, + { + "epoch": 0.7, + "learning_rate": 1.7794428855830577e-05, + "loss": 0.2371, + "step": 15080 + }, + { + "epoch": 0.7, + "learning_rate": 1.779364507077579e-05, + "loss": 0.2546, + "step": 15085 + }, + { + "epoch": 0.7, + "learning_rate": 1.7792861285721005e-05, + "loss": 0.2453, + "step": 15090 + }, + { + "epoch": 0.7, + "learning_rate": 1.779207750066622e-05, + "loss": 0.3223, + "step": 15095 + }, + { + "epoch": 0.7, + "learning_rate": 1.7791293715611433e-05, + "loss": 0.5728, + "step": 15100 + }, + { + "epoch": 0.7, + "learning_rate": 1.7790509930556647e-05, + "loss": 0.1818, + "step": 15105 + }, + { + "epoch": 0.71, + "learning_rate": 1.778972614550186e-05, + "loss": 0.0977, + "step": 15110 + }, + { + "epoch": 0.71, + "learning_rate": 1.778894236044707e-05, + "loss": 0.0862, + "step": 15115 + }, + { + "epoch": 0.71, + "learning_rate": 1.778815857539229e-05, + "loss": 0.1162, + "step": 15120 + }, + { + "epoch": 0.71, + "learning_rate": 1.77873747903375e-05, + "loss": 0.1285, + "step": 15125 + }, + { + "epoch": 0.71, + "learning_rate": 1.7786591005282713e-05, + "loss": 0.1789, + "step": 15130 + }, + { + "epoch": 0.71, + "learning_rate": 1.7785807220227927e-05, + "loss": 0.1586, + "step": 15135 + }, + { + "epoch": 0.71, + "learning_rate": 1.7785023435173137e-05, + "loss": 0.2264, + "step": 15140 + }, + { + "epoch": 0.71, + "learning_rate": 1.7784239650118355e-05, + "loss": 0.3286, + "step": 15145 + }, + { + "epoch": 0.71, + "learning_rate": 1.7783455865063565e-05, + "loss": 0.5867, + "step": 15150 + }, + { + "epoch": 0.71, + "learning_rate": 1.778267208000878e-05, + "loss": 0.2071, + "step": 15155 + }, + { + "epoch": 0.71, + "learning_rate": 1.7781888294953993e-05, + "loss": 0.1373, + "step": 15160 + }, + { + "epoch": 0.71, + "learning_rate": 1.7781104509899207e-05, + "loss": 0.1818, + "step": 15165 + }, + { + "epoch": 0.71, + "learning_rate": 1.778032072484442e-05, + "loss": 0.1301, + "step": 15170 + }, + { + "epoch": 0.71, + "learning_rate": 1.7779536939789635e-05, + "loss": 0.161, + "step": 15175 + }, + { + "epoch": 0.71, + "learning_rate": 1.777875315473485e-05, + "loss": 0.3043, + "step": 15180 + }, + { + "epoch": 0.71, + "learning_rate": 1.7777969369680062e-05, + "loss": 0.3192, + "step": 15185 + }, + { + "epoch": 0.71, + "learning_rate": 1.7777185584625273e-05, + "loss": 0.263, + "step": 15190 + }, + { + "epoch": 0.71, + "learning_rate": 1.7776401799570487e-05, + "loss": 0.2928, + "step": 15195 + }, + { + "epoch": 0.71, + "learning_rate": 1.77756180145157e-05, + "loss": 0.866, + "step": 15200 + }, + { + "epoch": 0.71, + "learning_rate": 1.7774834229460915e-05, + "loss": 0.1849, + "step": 15205 + }, + { + "epoch": 0.71, + "learning_rate": 1.777405044440613e-05, + "loss": 0.0998, + "step": 15210 + }, + { + "epoch": 0.71, + "learning_rate": 1.777326665935134e-05, + "loss": 0.1643, + "step": 15215 + }, + { + "epoch": 0.71, + "learning_rate": 1.7772482874296556e-05, + "loss": 0.2084, + "step": 15220 + }, + { + "epoch": 0.71, + "learning_rate": 1.7771699089241767e-05, + "loss": 0.1575, + "step": 15225 + }, + { + "epoch": 0.71, + "learning_rate": 1.777091530418698e-05, + "loss": 0.132, + "step": 15230 + }, + { + "epoch": 0.71, + "learning_rate": 1.7770131519132195e-05, + "loss": 0.2206, + "step": 15235 + }, + { + "epoch": 0.71, + "learning_rate": 1.776934773407741e-05, + "loss": 0.2715, + "step": 15240 + }, + { + "epoch": 0.71, + "learning_rate": 1.7768563949022623e-05, + "loss": 0.3409, + "step": 15245 + }, + { + "epoch": 0.71, + "learning_rate": 1.7767780163967836e-05, + "loss": 0.5405, + "step": 15250 + }, + { + "epoch": 0.71, + "learning_rate": 1.7766996378913047e-05, + "loss": 0.3159, + "step": 15255 + }, + { + "epoch": 0.71, + "learning_rate": 1.776621259385826e-05, + "loss": 0.1621, + "step": 15260 + }, + { + "epoch": 0.71, + "learning_rate": 1.7765428808803475e-05, + "loss": 0.058, + "step": 15265 + }, + { + "epoch": 0.71, + "learning_rate": 1.776464502374869e-05, + "loss": 0.1118, + "step": 15270 + }, + { + "epoch": 0.71, + "learning_rate": 1.7763861238693903e-05, + "loss": 0.0801, + "step": 15275 + }, + { + "epoch": 0.71, + "learning_rate": 1.7763077453639117e-05, + "loss": 0.1691, + "step": 15280 + }, + { + "epoch": 0.71, + "learning_rate": 1.776229366858433e-05, + "loss": 0.1647, + "step": 15285 + }, + { + "epoch": 0.71, + "learning_rate": 1.776150988352954e-05, + "loss": 0.3036, + "step": 15290 + }, + { + "epoch": 0.71, + "learning_rate": 1.7760726098474755e-05, + "loss": 0.3638, + "step": 15295 + }, + { + "epoch": 0.71, + "learning_rate": 1.775994231341997e-05, + "loss": 0.6663, + "step": 15300 + }, + { + "epoch": 0.71, + "learning_rate": 1.7759158528365183e-05, + "loss": 0.2445, + "step": 15305 + }, + { + "epoch": 0.71, + "learning_rate": 1.7758374743310397e-05, + "loss": 0.0941, + "step": 15310 + }, + { + "epoch": 0.71, + "learning_rate": 1.775759095825561e-05, + "loss": 0.1141, + "step": 15315 + }, + { + "epoch": 0.71, + "learning_rate": 1.7756807173200824e-05, + "loss": 0.1723, + "step": 15320 + }, + { + "epoch": 0.72, + "learning_rate": 1.7756023388146035e-05, + "loss": 0.1063, + "step": 15325 + }, + { + "epoch": 0.72, + "learning_rate": 1.775523960309125e-05, + "loss": 0.2201, + "step": 15330 + }, + { + "epoch": 0.72, + "learning_rate": 1.7754455818036463e-05, + "loss": 0.2288, + "step": 15335 + }, + { + "epoch": 0.72, + "learning_rate": 1.7753672032981677e-05, + "loss": 0.2141, + "step": 15340 + }, + { + "epoch": 0.72, + "learning_rate": 1.775288824792689e-05, + "loss": 0.2616, + "step": 15345 + }, + { + "epoch": 0.72, + "learning_rate": 1.7752104462872104e-05, + "loss": 0.5344, + "step": 15350 + }, + { + "epoch": 0.72, + "learning_rate": 1.7751320677817315e-05, + "loss": 0.2139, + "step": 15355 + }, + { + "epoch": 0.72, + "learning_rate": 1.7750536892762532e-05, + "loss": 0.066, + "step": 15360 + }, + { + "epoch": 0.72, + "learning_rate": 1.7749753107707743e-05, + "loss": 0.1592, + "step": 15365 + }, + { + "epoch": 0.72, + "learning_rate": 1.7748969322652957e-05, + "loss": 0.1448, + "step": 15370 + }, + { + "epoch": 0.72, + "learning_rate": 1.774818553759817e-05, + "loss": 0.171, + "step": 15375 + }, + { + "epoch": 0.72, + "learning_rate": 1.7747401752543384e-05, + "loss": 0.181, + "step": 15380 + }, + { + "epoch": 0.72, + "learning_rate": 1.77466179674886e-05, + "loss": 0.2425, + "step": 15385 + }, + { + "epoch": 0.72, + "learning_rate": 1.774583418243381e-05, + "loss": 0.2223, + "step": 15390 + }, + { + "epoch": 0.72, + "learning_rate": 1.7745050397379023e-05, + "loss": 0.3635, + "step": 15395 + }, + { + "epoch": 0.72, + "learning_rate": 1.7744266612324237e-05, + "loss": 0.542, + "step": 15400 + }, + { + "epoch": 0.72, + "learning_rate": 1.774348282726945e-05, + "loss": 0.2035, + "step": 15405 + }, + { + "epoch": 0.72, + "learning_rate": 1.7742699042214665e-05, + "loss": 0.096, + "step": 15410 + }, + { + "epoch": 0.72, + "learning_rate": 1.774191525715988e-05, + "loss": 0.1612, + "step": 15415 + }, + { + "epoch": 0.72, + "learning_rate": 1.7741131472105092e-05, + "loss": 0.114, + "step": 15420 + }, + { + "epoch": 0.72, + "learning_rate": 1.7740347687050306e-05, + "loss": 0.0943, + "step": 15425 + }, + { + "epoch": 0.72, + "learning_rate": 1.7739563901995517e-05, + "loss": 0.1448, + "step": 15430 + }, + { + "epoch": 0.72, + "learning_rate": 1.7738780116940734e-05, + "loss": 0.2311, + "step": 15435 + }, + { + "epoch": 0.72, + "learning_rate": 1.7737996331885945e-05, + "loss": 0.1838, + "step": 15440 + }, + { + "epoch": 0.72, + "learning_rate": 1.773721254683116e-05, + "loss": 0.3578, + "step": 15445 + }, + { + "epoch": 0.72, + "learning_rate": 1.7736428761776372e-05, + "loss": 0.5445, + "step": 15450 + }, + { + "epoch": 0.72, + "learning_rate": 1.7735644976721583e-05, + "loss": 0.2114, + "step": 15455 + }, + { + "epoch": 0.72, + "learning_rate": 1.77348611916668e-05, + "loss": 0.0826, + "step": 15460 + }, + { + "epoch": 0.72, + "learning_rate": 1.773407740661201e-05, + "loss": 0.0514, + "step": 15465 + }, + { + "epoch": 0.72, + "learning_rate": 1.7733293621557225e-05, + "loss": 0.1493, + "step": 15470 + }, + { + "epoch": 0.72, + "learning_rate": 1.773250983650244e-05, + "loss": 0.1625, + "step": 15475 + }, + { + "epoch": 0.72, + "learning_rate": 1.7731726051447652e-05, + "loss": 0.3053, + "step": 15480 + }, + { + "epoch": 0.72, + "learning_rate": 1.7730942266392866e-05, + "loss": 0.1644, + "step": 15485 + }, + { + "epoch": 0.72, + "learning_rate": 1.773015848133808e-05, + "loss": 0.397, + "step": 15490 + }, + { + "epoch": 0.72, + "learning_rate": 1.7729374696283294e-05, + "loss": 0.5178, + "step": 15495 + }, + { + "epoch": 0.72, + "learning_rate": 1.7728590911228508e-05, + "loss": 0.588, + "step": 15500 + }, + { + "epoch": 0.72, + "learning_rate": 1.772780712617372e-05, + "loss": 0.219, + "step": 15505 + }, + { + "epoch": 0.72, + "learning_rate": 1.7727023341118932e-05, + "loss": 0.1002, + "step": 15510 + }, + { + "epoch": 0.72, + "learning_rate": 1.7726239556064146e-05, + "loss": 0.0972, + "step": 15515 + }, + { + "epoch": 0.72, + "learning_rate": 1.772545577100936e-05, + "loss": 0.1926, + "step": 15520 + }, + { + "epoch": 0.72, + "learning_rate": 1.7724671985954574e-05, + "loss": 0.1175, + "step": 15525 + }, + { + "epoch": 0.72, + "learning_rate": 1.7723888200899785e-05, + "loss": 0.2136, + "step": 15530 + }, + { + "epoch": 0.72, + "learning_rate": 1.7723104415845002e-05, + "loss": 0.3205, + "step": 15535 + }, + { + "epoch": 0.73, + "learning_rate": 1.7722320630790213e-05, + "loss": 0.2608, + "step": 15540 + }, + { + "epoch": 0.73, + "learning_rate": 1.7721536845735426e-05, + "loss": 0.2418, + "step": 15545 + }, + { + "epoch": 0.73, + "learning_rate": 1.772075306068064e-05, + "loss": 0.5699, + "step": 15550 + }, + { + "epoch": 0.73, + "learning_rate": 1.7719969275625854e-05, + "loss": 0.2461, + "step": 15555 + }, + { + "epoch": 0.73, + "learning_rate": 1.7719185490571068e-05, + "loss": 0.0584, + "step": 15560 + }, + { + "epoch": 0.73, + "learning_rate": 1.7718401705516282e-05, + "loss": 0.0991, + "step": 15565 + }, + { + "epoch": 0.73, + "learning_rate": 1.7717617920461493e-05, + "loss": 0.1333, + "step": 15570 + }, + { + "epoch": 0.73, + "learning_rate": 1.771683413540671e-05, + "loss": 0.1246, + "step": 15575 + }, + { + "epoch": 0.73, + "learning_rate": 1.771605035035192e-05, + "loss": 0.1544, + "step": 15580 + }, + { + "epoch": 0.73, + "learning_rate": 1.7715266565297134e-05, + "loss": 0.2113, + "step": 15585 + }, + { + "epoch": 0.73, + "learning_rate": 1.7714482780242348e-05, + "loss": 0.2301, + "step": 15590 + }, + { + "epoch": 0.73, + "learning_rate": 1.7713698995187562e-05, + "loss": 0.2443, + "step": 15595 + }, + { + "epoch": 0.73, + "learning_rate": 1.7712915210132776e-05, + "loss": 0.5969, + "step": 15600 + }, + { + "epoch": 0.73, + "learning_rate": 1.7712131425077986e-05, + "loss": 0.2402, + "step": 15605 + }, + { + "epoch": 0.73, + "learning_rate": 1.77113476400232e-05, + "loss": 0.1322, + "step": 15610 + }, + { + "epoch": 0.73, + "learning_rate": 1.7710563854968414e-05, + "loss": 0.1391, + "step": 15615 + }, + { + "epoch": 0.73, + "learning_rate": 1.7709780069913628e-05, + "loss": 0.1207, + "step": 15620 + }, + { + "epoch": 0.73, + "learning_rate": 1.7708996284858842e-05, + "loss": 0.2047, + "step": 15625 + }, + { + "epoch": 0.73, + "learning_rate": 1.7708212499804056e-05, + "loss": 0.2101, + "step": 15630 + }, + { + "epoch": 0.73, + "learning_rate": 1.770742871474927e-05, + "loss": 0.2683, + "step": 15635 + }, + { + "epoch": 0.73, + "learning_rate": 1.7706644929694484e-05, + "loss": 0.2237, + "step": 15640 + }, + { + "epoch": 0.73, + "learning_rate": 1.7705861144639694e-05, + "loss": 0.3521, + "step": 15645 + }, + { + "epoch": 0.73, + "learning_rate": 1.7705077359584908e-05, + "loss": 0.5843, + "step": 15650 + }, + { + "epoch": 0.73, + "learning_rate": 1.7704293574530122e-05, + "loss": 0.1719, + "step": 15655 + }, + { + "epoch": 0.73, + "learning_rate": 1.7703509789475336e-05, + "loss": 0.1001, + "step": 15660 + }, + { + "epoch": 0.73, + "learning_rate": 1.770272600442055e-05, + "loss": 0.0878, + "step": 15665 + }, + { + "epoch": 0.73, + "learning_rate": 1.770194221936576e-05, + "loss": 0.1989, + "step": 15670 + }, + { + "epoch": 0.73, + "learning_rate": 1.7701158434310978e-05, + "loss": 0.1543, + "step": 15675 + }, + { + "epoch": 0.73, + "learning_rate": 1.770037464925619e-05, + "loss": 0.1588, + "step": 15680 + }, + { + "epoch": 0.73, + "learning_rate": 1.7699590864201402e-05, + "loss": 0.3029, + "step": 15685 + }, + { + "epoch": 0.73, + "learning_rate": 1.7698807079146616e-05, + "loss": 0.2396, + "step": 15690 + }, + { + "epoch": 0.73, + "learning_rate": 1.769802329409183e-05, + "loss": 0.357, + "step": 15695 + }, + { + "epoch": 0.73, + "learning_rate": 1.7697239509037044e-05, + "loss": 0.6507, + "step": 15700 + }, + { + "epoch": 0.73, + "learning_rate": 1.7696455723982258e-05, + "loss": 0.2289, + "step": 15705 + }, + { + "epoch": 0.73, + "learning_rate": 1.769567193892747e-05, + "loss": 0.0816, + "step": 15710 + }, + { + "epoch": 0.73, + "learning_rate": 1.7694888153872682e-05, + "loss": 0.0891, + "step": 15715 + }, + { + "epoch": 0.73, + "learning_rate": 1.7694104368817896e-05, + "loss": 0.1736, + "step": 15720 + }, + { + "epoch": 0.73, + "learning_rate": 1.769332058376311e-05, + "loss": 0.2008, + "step": 15725 + }, + { + "epoch": 0.73, + "learning_rate": 1.7692536798708324e-05, + "loss": 0.199, + "step": 15730 + }, + { + "epoch": 0.73, + "learning_rate": 1.7691753013653538e-05, + "loss": 0.255, + "step": 15735 + }, + { + "epoch": 0.73, + "learning_rate": 1.7690969228598752e-05, + "loss": 0.2359, + "step": 15740 + }, + { + "epoch": 0.73, + "learning_rate": 1.7690185443543962e-05, + "loss": 0.3103, + "step": 15745 + }, + { + "epoch": 0.73, + "learning_rate": 1.768940165848918e-05, + "loss": 0.5922, + "step": 15750 + }, + { + "epoch": 0.74, + "learning_rate": 1.768861787343439e-05, + "loss": 0.2138, + "step": 15755 + }, + { + "epoch": 0.74, + "learning_rate": 1.7687834088379604e-05, + "loss": 0.1087, + "step": 15760 + }, + { + "epoch": 0.74, + "learning_rate": 1.7687050303324818e-05, + "loss": 0.0963, + "step": 15765 + }, + { + "epoch": 0.74, + "learning_rate": 1.7686266518270032e-05, + "loss": 0.0777, + "step": 15770 + }, + { + "epoch": 0.74, + "learning_rate": 1.7685482733215246e-05, + "loss": 0.1607, + "step": 15775 + }, + { + "epoch": 0.74, + "learning_rate": 1.7684698948160456e-05, + "loss": 0.1585, + "step": 15780 + }, + { + "epoch": 0.74, + "learning_rate": 1.768391516310567e-05, + "loss": 0.2127, + "step": 15785 + }, + { + "epoch": 0.74, + "learning_rate": 1.7683131378050884e-05, + "loss": 0.2459, + "step": 15790 + }, + { + "epoch": 0.74, + "learning_rate": 1.7682347592996098e-05, + "loss": 0.3235, + "step": 15795 + }, + { + "epoch": 0.74, + "learning_rate": 1.7681563807941312e-05, + "loss": 0.5031, + "step": 15800 + }, + { + "epoch": 0.74, + "learning_rate": 1.7680780022886526e-05, + "loss": 0.2883, + "step": 15805 + }, + { + "epoch": 0.74, + "learning_rate": 1.767999623783174e-05, + "loss": 0.1161, + "step": 15810 + }, + { + "epoch": 0.74, + "learning_rate": 1.7679212452776954e-05, + "loss": 0.117, + "step": 15815 + }, + { + "epoch": 0.74, + "learning_rate": 1.7678428667722164e-05, + "loss": 0.1198, + "step": 15820 + }, + { + "epoch": 0.74, + "learning_rate": 1.7677644882667378e-05, + "loss": 0.1703, + "step": 15825 + }, + { + "epoch": 0.74, + "learning_rate": 1.7676861097612592e-05, + "loss": 0.204, + "step": 15830 + }, + { + "epoch": 0.74, + "learning_rate": 1.7676077312557806e-05, + "loss": 0.1478, + "step": 15835 + }, + { + "epoch": 0.74, + "learning_rate": 1.767529352750302e-05, + "loss": 0.2569, + "step": 15840 + }, + { + "epoch": 0.74, + "learning_rate": 1.767450974244823e-05, + "loss": 0.2254, + "step": 15845 + }, + { + "epoch": 0.74, + "learning_rate": 1.7673725957393448e-05, + "loss": 0.5472, + "step": 15850 + }, + { + "epoch": 0.74, + "learning_rate": 1.7672942172338658e-05, + "loss": 0.2518, + "step": 15855 + }, + { + "epoch": 0.74, + "learning_rate": 1.7672158387283872e-05, + "loss": 0.0926, + "step": 15860 + }, + { + "epoch": 0.74, + "learning_rate": 1.7671374602229086e-05, + "loss": 0.1318, + "step": 15865 + }, + { + "epoch": 0.74, + "learning_rate": 1.76705908171743e-05, + "loss": 0.1401, + "step": 15870 + }, + { + "epoch": 0.74, + "learning_rate": 1.7669807032119514e-05, + "loss": 0.1579, + "step": 15875 + }, + { + "epoch": 0.74, + "learning_rate": 1.7669023247064728e-05, + "loss": 0.139, + "step": 15880 + }, + { + "epoch": 0.74, + "learning_rate": 1.7668239462009938e-05, + "loss": 0.17, + "step": 15885 + }, + { + "epoch": 0.74, + "learning_rate": 1.7667455676955155e-05, + "loss": 0.2695, + "step": 15890 + }, + { + "epoch": 0.74, + "learning_rate": 1.7666671891900366e-05, + "loss": 0.2543, + "step": 15895 + }, + { + "epoch": 0.74, + "learning_rate": 1.766588810684558e-05, + "loss": 0.4279, + "step": 15900 + }, + { + "epoch": 0.74, + "learning_rate": 1.7665104321790794e-05, + "loss": 0.2844, + "step": 15905 + }, + { + "epoch": 0.74, + "learning_rate": 1.7664320536736008e-05, + "loss": 0.1015, + "step": 15910 + }, + { + "epoch": 0.74, + "learning_rate": 1.766353675168122e-05, + "loss": 0.1068, + "step": 15915 + }, + { + "epoch": 0.74, + "learning_rate": 1.7662752966626432e-05, + "loss": 0.1115, + "step": 15920 + }, + { + "epoch": 0.74, + "learning_rate": 1.7661969181571646e-05, + "loss": 0.1674, + "step": 15925 + }, + { + "epoch": 0.74, + "learning_rate": 1.766118539651686e-05, + "loss": 0.1759, + "step": 15930 + }, + { + "epoch": 0.74, + "learning_rate": 1.7660401611462074e-05, + "loss": 0.2252, + "step": 15935 + }, + { + "epoch": 0.74, + "learning_rate": 1.7659617826407288e-05, + "loss": 0.2375, + "step": 15940 + }, + { + "epoch": 0.74, + "learning_rate": 1.76588340413525e-05, + "loss": 0.3769, + "step": 15945 + }, + { + "epoch": 0.74, + "learning_rate": 1.7658050256297716e-05, + "loss": 0.4495, + "step": 15950 + }, + { + "epoch": 0.74, + "learning_rate": 1.765726647124293e-05, + "loss": 0.1948, + "step": 15955 + }, + { + "epoch": 0.74, + "learning_rate": 1.765648268618814e-05, + "loss": 0.0774, + "step": 15960 + }, + { + "epoch": 0.74, + "learning_rate": 1.7655698901133357e-05, + "loss": 0.0989, + "step": 15965 + }, + { + "epoch": 0.75, + "learning_rate": 1.7654915116078568e-05, + "loss": 0.0939, + "step": 15970 + }, + { + "epoch": 0.75, + "learning_rate": 1.765413133102378e-05, + "loss": 0.2203, + "step": 15975 + }, + { + "epoch": 0.75, + "learning_rate": 1.7653347545968996e-05, + "loss": 0.2015, + "step": 15980 + }, + { + "epoch": 0.75, + "learning_rate": 1.7652563760914206e-05, + "loss": 0.193, + "step": 15985 + }, + { + "epoch": 0.75, + "learning_rate": 1.7651779975859423e-05, + "loss": 0.2686, + "step": 15990 + }, + { + "epoch": 0.75, + "learning_rate": 1.7650996190804634e-05, + "loss": 0.213, + "step": 15995 + }, + { + "epoch": 0.75, + "learning_rate": 1.7650212405749848e-05, + "loss": 0.476, + "step": 16000 + }, + { + "epoch": 0.75, + "learning_rate": 1.764942862069506e-05, + "loss": 0.2164, + "step": 16005 + }, + { + "epoch": 0.75, + "learning_rate": 1.7648644835640276e-05, + "loss": 0.0558, + "step": 16010 + }, + { + "epoch": 0.75, + "learning_rate": 1.764786105058549e-05, + "loss": 0.1303, + "step": 16015 + }, + { + "epoch": 0.75, + "learning_rate": 1.7647077265530703e-05, + "loss": 0.1231, + "step": 16020 + }, + { + "epoch": 0.75, + "learning_rate": 1.7646293480475914e-05, + "loss": 0.1846, + "step": 16025 + }, + { + "epoch": 0.75, + "learning_rate": 1.764550969542113e-05, + "loss": 0.1873, + "step": 16030 + }, + { + "epoch": 0.75, + "learning_rate": 1.7644725910366342e-05, + "loss": 0.1345, + "step": 16035 + }, + { + "epoch": 0.75, + "learning_rate": 1.7643942125311556e-05, + "loss": 0.2916, + "step": 16040 + }, + { + "epoch": 0.75, + "learning_rate": 1.764315834025677e-05, + "loss": 0.3561, + "step": 16045 + }, + { + "epoch": 0.75, + "learning_rate": 1.7642374555201983e-05, + "loss": 0.5833, + "step": 16050 + }, + { + "epoch": 0.75, + "learning_rate": 1.7641590770147197e-05, + "loss": 0.2464, + "step": 16055 + }, + { + "epoch": 0.75, + "learning_rate": 1.7640806985092408e-05, + "loss": 0.0829, + "step": 16060 + }, + { + "epoch": 0.75, + "learning_rate": 1.7640023200037625e-05, + "loss": 0.1179, + "step": 16065 + }, + { + "epoch": 0.75, + "learning_rate": 1.7639239414982836e-05, + "loss": 0.1629, + "step": 16070 + }, + { + "epoch": 0.75, + "learning_rate": 1.763845562992805e-05, + "loss": 0.1615, + "step": 16075 + }, + { + "epoch": 0.75, + "learning_rate": 1.7637671844873264e-05, + "loss": 0.2043, + "step": 16080 + }, + { + "epoch": 0.75, + "learning_rate": 1.7636888059818477e-05, + "loss": 0.2283, + "step": 16085 + }, + { + "epoch": 0.75, + "learning_rate": 1.763610427476369e-05, + "loss": 0.306, + "step": 16090 + }, + { + "epoch": 0.75, + "learning_rate": 1.7635320489708905e-05, + "loss": 0.4119, + "step": 16095 + }, + { + "epoch": 0.75, + "learning_rate": 1.7634536704654116e-05, + "loss": 0.4373, + "step": 16100 + }, + { + "epoch": 0.75, + "learning_rate": 1.763375291959933e-05, + "loss": 0.1972, + "step": 16105 + }, + { + "epoch": 0.75, + "learning_rate": 1.7632969134544544e-05, + "loss": 0.0756, + "step": 16110 + }, + { + "epoch": 0.75, + "learning_rate": 1.7632185349489757e-05, + "loss": 0.1467, + "step": 16115 + }, + { + "epoch": 0.75, + "learning_rate": 1.763140156443497e-05, + "loss": 0.1195, + "step": 16120 + }, + { + "epoch": 0.75, + "learning_rate": 1.7630617779380185e-05, + "loss": 0.1707, + "step": 16125 + }, + { + "epoch": 0.75, + "learning_rate": 1.76298339943254e-05, + "loss": 0.2611, + "step": 16130 + }, + { + "epoch": 0.75, + "learning_rate": 1.762905020927061e-05, + "loss": 0.2035, + "step": 16135 + }, + { + "epoch": 0.75, + "learning_rate": 1.7628266424215824e-05, + "loss": 0.2931, + "step": 16140 + }, + { + "epoch": 0.75, + "learning_rate": 1.7627482639161038e-05, + "loss": 0.3166, + "step": 16145 + }, + { + "epoch": 0.75, + "learning_rate": 1.762669885410625e-05, + "loss": 0.4894, + "step": 16150 + }, + { + "epoch": 0.75, + "learning_rate": 1.7625915069051465e-05, + "loss": 0.2469, + "step": 16155 + }, + { + "epoch": 0.75, + "learning_rate": 1.762513128399668e-05, + "loss": 0.091, + "step": 16160 + }, + { + "epoch": 0.75, + "learning_rate": 1.7624347498941893e-05, + "loss": 0.1252, + "step": 16165 + }, + { + "epoch": 0.75, + "learning_rate": 1.7623563713887104e-05, + "loss": 0.1507, + "step": 16170 + }, + { + "epoch": 0.75, + "learning_rate": 1.7622779928832318e-05, + "loss": 0.0806, + "step": 16175 + }, + { + "epoch": 0.75, + "learning_rate": 1.762199614377753e-05, + "loss": 0.1723, + "step": 16180 + }, + { + "epoch": 0.76, + "learning_rate": 1.7621212358722745e-05, + "loss": 0.1738, + "step": 16185 + }, + { + "epoch": 0.76, + "learning_rate": 1.762042857366796e-05, + "loss": 0.2276, + "step": 16190 + }, + { + "epoch": 0.76, + "learning_rate": 1.7619644788613173e-05, + "loss": 0.3772, + "step": 16195 + }, + { + "epoch": 0.76, + "learning_rate": 1.7618861003558384e-05, + "loss": 0.5853, + "step": 16200 + }, + { + "epoch": 0.76, + "learning_rate": 1.76180772185036e-05, + "loss": 0.2295, + "step": 16205 + }, + { + "epoch": 0.76, + "learning_rate": 1.761729343344881e-05, + "loss": 0.0793, + "step": 16210 + }, + { + "epoch": 0.76, + "learning_rate": 1.7616509648394025e-05, + "loss": 0.1744, + "step": 16215 + }, + { + "epoch": 0.76, + "learning_rate": 1.761572586333924e-05, + "loss": 0.1258, + "step": 16220 + }, + { + "epoch": 0.76, + "learning_rate": 1.7614942078284453e-05, + "loss": 0.1278, + "step": 16225 + }, + { + "epoch": 0.76, + "learning_rate": 1.7614158293229667e-05, + "loss": 0.1408, + "step": 16230 + }, + { + "epoch": 0.76, + "learning_rate": 1.7613374508174878e-05, + "loss": 0.176, + "step": 16235 + }, + { + "epoch": 0.76, + "learning_rate": 1.761259072312009e-05, + "loss": 0.2184, + "step": 16240 + }, + { + "epoch": 0.76, + "learning_rate": 1.7611806938065305e-05, + "loss": 0.4551, + "step": 16245 + }, + { + "epoch": 0.76, + "learning_rate": 1.761102315301052e-05, + "loss": 0.4999, + "step": 16250 + }, + { + "epoch": 0.76, + "learning_rate": 1.7610239367955733e-05, + "loss": 0.1799, + "step": 16255 + }, + { + "epoch": 0.76, + "learning_rate": 1.7609455582900947e-05, + "loss": 0.0511, + "step": 16260 + }, + { + "epoch": 0.76, + "learning_rate": 1.760867179784616e-05, + "loss": 0.0712, + "step": 16265 + }, + { + "epoch": 0.76, + "learning_rate": 1.7607888012791375e-05, + "loss": 0.1195, + "step": 16270 + }, + { + "epoch": 0.76, + "learning_rate": 1.7607104227736585e-05, + "loss": 0.1133, + "step": 16275 + }, + { + "epoch": 0.76, + "learning_rate": 1.7606320442681803e-05, + "loss": 0.18, + "step": 16280 + }, + { + "epoch": 0.76, + "learning_rate": 1.7605536657627013e-05, + "loss": 0.2426, + "step": 16285 + }, + { + "epoch": 0.76, + "learning_rate": 1.7604752872572227e-05, + "loss": 0.252, + "step": 16290 + }, + { + "epoch": 0.76, + "learning_rate": 1.760396908751744e-05, + "loss": 0.4202, + "step": 16295 + }, + { + "epoch": 0.76, + "learning_rate": 1.760318530246265e-05, + "loss": 0.645, + "step": 16300 + }, + { + "epoch": 0.76, + "learning_rate": 1.760240151740787e-05, + "loss": 0.2253, + "step": 16305 + }, + { + "epoch": 0.76, + "learning_rate": 1.760161773235308e-05, + "loss": 0.0957, + "step": 16310 + }, + { + "epoch": 0.76, + "learning_rate": 1.7600833947298293e-05, + "loss": 0.0851, + "step": 16315 + }, + { + "epoch": 0.76, + "learning_rate": 1.7600050162243507e-05, + "loss": 0.1494, + "step": 16320 + }, + { + "epoch": 0.76, + "learning_rate": 1.759926637718872e-05, + "loss": 0.2095, + "step": 16325 + }, + { + "epoch": 0.76, + "learning_rate": 1.7598482592133935e-05, + "loss": 0.1371, + "step": 16330 + }, + { + "epoch": 0.76, + "learning_rate": 1.759769880707915e-05, + "loss": 0.2431, + "step": 16335 + }, + { + "epoch": 0.76, + "learning_rate": 1.7596915022024363e-05, + "loss": 0.2534, + "step": 16340 + }, + { + "epoch": 0.76, + "learning_rate": 1.7596131236969577e-05, + "loss": 0.2616, + "step": 16345 + }, + { + "epoch": 0.76, + "learning_rate": 1.7595347451914787e-05, + "loss": 0.5365, + "step": 16350 + }, + { + "epoch": 0.76, + "learning_rate": 1.759456366686e-05, + "loss": 0.1913, + "step": 16355 + }, + { + "epoch": 0.76, + "learning_rate": 1.7593779881805215e-05, + "loss": 0.0728, + "step": 16360 + }, + { + "epoch": 0.76, + "learning_rate": 1.759299609675043e-05, + "loss": 0.1707, + "step": 16365 + }, + { + "epoch": 0.76, + "learning_rate": 1.7592212311695643e-05, + "loss": 0.2359, + "step": 16370 + }, + { + "epoch": 0.76, + "learning_rate": 1.7591428526640853e-05, + "loss": 0.1137, + "step": 16375 + }, + { + "epoch": 0.76, + "learning_rate": 1.759064474158607e-05, + "loss": 0.1787, + "step": 16380 + }, + { + "epoch": 0.76, + "learning_rate": 1.758986095653128e-05, + "loss": 0.1674, + "step": 16385 + }, + { + "epoch": 0.76, + "learning_rate": 1.7589077171476495e-05, + "loss": 0.1887, + "step": 16390 + }, + { + "epoch": 0.77, + "learning_rate": 1.758829338642171e-05, + "loss": 0.4233, + "step": 16395 + }, + { + "epoch": 0.77, + "learning_rate": 1.7587509601366923e-05, + "loss": 0.5711, + "step": 16400 + }, + { + "epoch": 0.77, + "learning_rate": 1.7586725816312137e-05, + "loss": 0.1733, + "step": 16405 + }, + { + "epoch": 0.77, + "learning_rate": 1.758594203125735e-05, + "loss": 0.0874, + "step": 16410 + }, + { + "epoch": 0.77, + "learning_rate": 1.758515824620256e-05, + "loss": 0.15, + "step": 16415 + }, + { + "epoch": 0.77, + "learning_rate": 1.758437446114778e-05, + "loss": 0.1558, + "step": 16420 + }, + { + "epoch": 0.77, + "learning_rate": 1.758359067609299e-05, + "loss": 0.1326, + "step": 16425 + }, + { + "epoch": 0.77, + "learning_rate": 1.7582806891038203e-05, + "loss": 0.196, + "step": 16430 + }, + { + "epoch": 0.77, + "learning_rate": 1.7582023105983417e-05, + "loss": 0.1857, + "step": 16435 + }, + { + "epoch": 0.77, + "learning_rate": 1.758123932092863e-05, + "loss": 0.2995, + "step": 16440 + }, + { + "epoch": 0.77, + "learning_rate": 1.7580455535873845e-05, + "loss": 0.4498, + "step": 16445 + }, + { + "epoch": 0.77, + "learning_rate": 1.7579671750819055e-05, + "loss": 0.6452, + "step": 16450 + }, + { + "epoch": 0.77, + "learning_rate": 1.757888796576427e-05, + "loss": 0.2235, + "step": 16455 + }, + { + "epoch": 0.77, + "learning_rate": 1.7578104180709483e-05, + "loss": 0.0881, + "step": 16460 + }, + { + "epoch": 0.77, + "learning_rate": 1.7577320395654697e-05, + "loss": 0.063, + "step": 16465 + }, + { + "epoch": 0.77, + "learning_rate": 1.757653661059991e-05, + "loss": 0.1451, + "step": 16470 + }, + { + "epoch": 0.77, + "learning_rate": 1.7575752825545125e-05, + "loss": 0.1652, + "step": 16475 + }, + { + "epoch": 0.77, + "learning_rate": 1.757496904049034e-05, + "loss": 0.1904, + "step": 16480 + }, + { + "epoch": 0.77, + "learning_rate": 1.7574185255435553e-05, + "loss": 0.253, + "step": 16485 + }, + { + "epoch": 0.77, + "learning_rate": 1.7573401470380763e-05, + "loss": 0.1778, + "step": 16490 + }, + { + "epoch": 0.77, + "learning_rate": 1.7572617685325977e-05, + "loss": 0.4833, + "step": 16495 + }, + { + "epoch": 0.77, + "learning_rate": 1.757183390027119e-05, + "loss": 0.4754, + "step": 16500 + }, + { + "epoch": 0.77, + "learning_rate": 1.7571050115216405e-05, + "loss": 0.1547, + "step": 16505 + }, + { + "epoch": 0.77, + "learning_rate": 1.757026633016162e-05, + "loss": 0.0813, + "step": 16510 + }, + { + "epoch": 0.77, + "learning_rate": 1.756948254510683e-05, + "loss": 0.1433, + "step": 16515 + }, + { + "epoch": 0.77, + "learning_rate": 1.7568698760052047e-05, + "loss": 0.1179, + "step": 16520 + }, + { + "epoch": 0.77, + "learning_rate": 1.7567914974997257e-05, + "loss": 0.1643, + "step": 16525 + }, + { + "epoch": 0.77, + "learning_rate": 1.756713118994247e-05, + "loss": 0.1876, + "step": 16530 + }, + { + "epoch": 0.77, + "learning_rate": 1.7566347404887685e-05, + "loss": 0.1914, + "step": 16535 + }, + { + "epoch": 0.77, + "learning_rate": 1.75655636198329e-05, + "loss": 0.2733, + "step": 16540 + }, + { + "epoch": 0.77, + "learning_rate": 1.7564779834778113e-05, + "loss": 0.3474, + "step": 16545 + }, + { + "epoch": 0.77, + "learning_rate": 1.7563996049723327e-05, + "loss": 0.5545, + "step": 16550 + }, + { + "epoch": 0.77, + "learning_rate": 1.7563212264668537e-05, + "loss": 0.251, + "step": 16555 + }, + { + "epoch": 0.77, + "learning_rate": 1.756242847961375e-05, + "loss": 0.0742, + "step": 16560 + }, + { + "epoch": 0.77, + "learning_rate": 1.7561644694558965e-05, + "loss": 0.0762, + "step": 16565 + }, + { + "epoch": 0.77, + "learning_rate": 1.756086090950418e-05, + "loss": 0.1709, + "step": 16570 + }, + { + "epoch": 0.77, + "learning_rate": 1.7560077124449393e-05, + "loss": 0.1702, + "step": 16575 + }, + { + "epoch": 0.77, + "learning_rate": 1.7559293339394607e-05, + "loss": 0.2099, + "step": 16580 + }, + { + "epoch": 0.77, + "learning_rate": 1.755850955433982e-05, + "loss": 0.311, + "step": 16585 + }, + { + "epoch": 0.77, + "learning_rate": 1.755772576928503e-05, + "loss": 0.2829, + "step": 16590 + }, + { + "epoch": 0.77, + "learning_rate": 1.755694198423025e-05, + "loss": 0.3343, + "step": 16595 + }, + { + "epoch": 0.77, + "learning_rate": 1.755615819917546e-05, + "loss": 0.3147, + "step": 16600 + }, + { + "epoch": 0.77, + "learning_rate": 1.7555374414120673e-05, + "loss": 0.242, + "step": 16605 + }, + { + "epoch": 0.78, + "learning_rate": 1.7554590629065887e-05, + "loss": 0.0859, + "step": 16610 + }, + { + "epoch": 0.78, + "learning_rate": 1.75538068440111e-05, + "loss": 0.1072, + "step": 16615 + }, + { + "epoch": 0.78, + "learning_rate": 1.7553023058956315e-05, + "loss": 0.1143, + "step": 16620 + }, + { + "epoch": 0.78, + "learning_rate": 1.7552239273901525e-05, + "loss": 0.1676, + "step": 16625 + }, + { + "epoch": 0.78, + "learning_rate": 1.755145548884674e-05, + "loss": 0.1877, + "step": 16630 + }, + { + "epoch": 0.78, + "learning_rate": 1.7550671703791953e-05, + "loss": 0.2769, + "step": 16635 + }, + { + "epoch": 0.78, + "learning_rate": 1.7549887918737167e-05, + "loss": 0.2055, + "step": 16640 + }, + { + "epoch": 0.78, + "learning_rate": 1.754910413368238e-05, + "loss": 0.1681, + "step": 16645 + }, + { + "epoch": 0.78, + "learning_rate": 1.7548320348627595e-05, + "loss": 0.5035, + "step": 16650 + }, + { + "epoch": 0.78, + "learning_rate": 1.754753656357281e-05, + "loss": 0.251, + "step": 16655 + }, + { + "epoch": 0.78, + "learning_rate": 1.7546752778518022e-05, + "loss": 0.0733, + "step": 16660 + }, + { + "epoch": 0.78, + "learning_rate": 1.7545968993463233e-05, + "loss": 0.0622, + "step": 16665 + }, + { + "epoch": 0.78, + "learning_rate": 1.7545185208408447e-05, + "loss": 0.186, + "step": 16670 + }, + { + "epoch": 0.78, + "learning_rate": 1.754440142335366e-05, + "loss": 0.1875, + "step": 16675 + }, + { + "epoch": 0.78, + "learning_rate": 1.7543617638298875e-05, + "loss": 0.1456, + "step": 16680 + }, + { + "epoch": 0.78, + "learning_rate": 1.754283385324409e-05, + "loss": 0.1846, + "step": 16685 + }, + { + "epoch": 0.78, + "learning_rate": 1.75420500681893e-05, + "loss": 0.2411, + "step": 16690 + }, + { + "epoch": 0.78, + "learning_rate": 1.7541266283134516e-05, + "loss": 0.2714, + "step": 16695 + }, + { + "epoch": 0.78, + "learning_rate": 1.7540482498079727e-05, + "loss": 0.5341, + "step": 16700 + }, + { + "epoch": 0.78, + "learning_rate": 1.753969871302494e-05, + "loss": 0.2582, + "step": 16705 + }, + { + "epoch": 0.78, + "learning_rate": 1.7538914927970155e-05, + "loss": 0.0639, + "step": 16710 + }, + { + "epoch": 0.78, + "learning_rate": 1.753813114291537e-05, + "loss": 0.091, + "step": 16715 + }, + { + "epoch": 0.78, + "learning_rate": 1.7537347357860582e-05, + "loss": 0.1561, + "step": 16720 + }, + { + "epoch": 0.78, + "learning_rate": 1.7536563572805796e-05, + "loss": 0.167, + "step": 16725 + }, + { + "epoch": 0.78, + "learning_rate": 1.7535779787751007e-05, + "loss": 0.1176, + "step": 16730 + }, + { + "epoch": 0.78, + "learning_rate": 1.7534996002696224e-05, + "loss": 0.1937, + "step": 16735 + }, + { + "epoch": 0.78, + "learning_rate": 1.7534212217641435e-05, + "loss": 0.1984, + "step": 16740 + }, + { + "epoch": 0.78, + "learning_rate": 1.753342843258665e-05, + "loss": 0.2591, + "step": 16745 + }, + { + "epoch": 0.78, + "learning_rate": 1.7532644647531863e-05, + "loss": 0.4628, + "step": 16750 + }, + { + "epoch": 0.78, + "learning_rate": 1.7531860862477076e-05, + "loss": 0.1609, + "step": 16755 + }, + { + "epoch": 0.78, + "learning_rate": 1.753107707742229e-05, + "loss": 0.0979, + "step": 16760 + }, + { + "epoch": 0.78, + "learning_rate": 1.75302932923675e-05, + "loss": 0.1462, + "step": 16765 + }, + { + "epoch": 0.78, + "learning_rate": 1.7529509507312715e-05, + "loss": 0.163, + "step": 16770 + }, + { + "epoch": 0.78, + "learning_rate": 1.752872572225793e-05, + "loss": 0.1616, + "step": 16775 + }, + { + "epoch": 0.78, + "learning_rate": 1.7527941937203143e-05, + "loss": 0.1537, + "step": 16780 + }, + { + "epoch": 0.78, + "learning_rate": 1.7527158152148356e-05, + "loss": 0.1465, + "step": 16785 + }, + { + "epoch": 0.78, + "learning_rate": 1.752637436709357e-05, + "loss": 0.2385, + "step": 16790 + }, + { + "epoch": 0.78, + "learning_rate": 1.7525590582038784e-05, + "loss": 0.3273, + "step": 16795 + }, + { + "epoch": 0.78, + "learning_rate": 1.7524806796983998e-05, + "loss": 0.5533, + "step": 16800 + }, + { + "epoch": 0.78, + "learning_rate": 1.752402301192921e-05, + "loss": 0.1391, + "step": 16805 + }, + { + "epoch": 0.78, + "learning_rate": 1.7523239226874426e-05, + "loss": 0.0745, + "step": 16810 + }, + { + "epoch": 0.78, + "learning_rate": 1.7522455441819636e-05, + "loss": 0.1043, + "step": 16815 + }, + { + "epoch": 0.78, + "learning_rate": 1.752167165676485e-05, + "loss": 0.1905, + "step": 16820 + }, + { + "epoch": 0.79, + "learning_rate": 1.7520887871710064e-05, + "loss": 0.1423, + "step": 16825 + }, + { + "epoch": 0.79, + "learning_rate": 1.7520104086655275e-05, + "loss": 0.0993, + "step": 16830 + }, + { + "epoch": 0.79, + "learning_rate": 1.7519320301600492e-05, + "loss": 0.2252, + "step": 16835 + }, + { + "epoch": 0.79, + "learning_rate": 1.7518536516545703e-05, + "loss": 0.2168, + "step": 16840 + }, + { + "epoch": 0.79, + "learning_rate": 1.7517752731490917e-05, + "loss": 0.3685, + "step": 16845 + }, + { + "epoch": 0.79, + "learning_rate": 1.751696894643613e-05, + "loss": 0.428, + "step": 16850 + }, + { + "epoch": 0.79, + "learning_rate": 1.7516185161381344e-05, + "loss": 0.2561, + "step": 16855 + }, + { + "epoch": 0.79, + "learning_rate": 1.7515401376326558e-05, + "loss": 0.0574, + "step": 16860 + }, + { + "epoch": 0.79, + "learning_rate": 1.7514617591271772e-05, + "loss": 0.1081, + "step": 16865 + }, + { + "epoch": 0.79, + "learning_rate": 1.7513833806216983e-05, + "loss": 0.1428, + "step": 16870 + }, + { + "epoch": 0.79, + "learning_rate": 1.75130500211622e-05, + "loss": 0.1128, + "step": 16875 + }, + { + "epoch": 0.79, + "learning_rate": 1.751226623610741e-05, + "loss": 0.2477, + "step": 16880 + }, + { + "epoch": 0.79, + "learning_rate": 1.7511482451052624e-05, + "loss": 0.2166, + "step": 16885 + }, + { + "epoch": 0.79, + "learning_rate": 1.751069866599784e-05, + "loss": 0.2552, + "step": 16890 + }, + { + "epoch": 0.79, + "learning_rate": 1.7509914880943052e-05, + "loss": 0.2411, + "step": 16895 + }, + { + "epoch": 0.79, + "learning_rate": 1.7509131095888266e-05, + "loss": 0.4488, + "step": 16900 + }, + { + "epoch": 0.79, + "learning_rate": 1.7508347310833477e-05, + "loss": 0.2285, + "step": 16905 + }, + { + "epoch": 0.79, + "learning_rate": 1.7507563525778694e-05, + "loss": 0.0925, + "step": 16910 + }, + { + "epoch": 0.79, + "learning_rate": 1.7506779740723904e-05, + "loss": 0.1007, + "step": 16915 + }, + { + "epoch": 0.79, + "learning_rate": 1.750599595566912e-05, + "loss": 0.1318, + "step": 16920 + }, + { + "epoch": 0.79, + "learning_rate": 1.7505212170614332e-05, + "loss": 0.1029, + "step": 16925 + }, + { + "epoch": 0.79, + "learning_rate": 1.7504428385559546e-05, + "loss": 0.1626, + "step": 16930 + }, + { + "epoch": 0.79, + "learning_rate": 1.750364460050476e-05, + "loss": 0.2252, + "step": 16935 + }, + { + "epoch": 0.79, + "learning_rate": 1.7502860815449974e-05, + "loss": 0.2685, + "step": 16940 + }, + { + "epoch": 0.79, + "learning_rate": 1.7502077030395184e-05, + "loss": 0.2682, + "step": 16945 + }, + { + "epoch": 0.79, + "learning_rate": 1.75012932453404e-05, + "loss": 0.4503, + "step": 16950 + }, + { + "epoch": 0.79, + "learning_rate": 1.7500509460285612e-05, + "loss": 0.1966, + "step": 16955 + }, + { + "epoch": 0.79, + "learning_rate": 1.7499725675230826e-05, + "loss": 0.0716, + "step": 16960 + }, + { + "epoch": 0.79, + "learning_rate": 1.749894189017604e-05, + "loss": 0.0855, + "step": 16965 + }, + { + "epoch": 0.79, + "learning_rate": 1.7498158105121254e-05, + "loss": 0.1765, + "step": 16970 + }, + { + "epoch": 0.79, + "learning_rate": 1.7497374320066468e-05, + "loss": 0.1045, + "step": 16975 + }, + { + "epoch": 0.79, + "learning_rate": 1.749659053501168e-05, + "loss": 0.1965, + "step": 16980 + }, + { + "epoch": 0.79, + "learning_rate": 1.7495806749956892e-05, + "loss": 0.2889, + "step": 16985 + }, + { + "epoch": 0.79, + "learning_rate": 1.7495022964902106e-05, + "loss": 0.2706, + "step": 16990 + }, + { + "epoch": 0.79, + "learning_rate": 1.749423917984732e-05, + "loss": 0.219, + "step": 16995 + }, + { + "epoch": 0.79, + "learning_rate": 1.7493455394792534e-05, + "loss": 0.7537, + "step": 17000 + }, + { + "epoch": 0.79, + "learning_rate": 1.7492671609737748e-05, + "loss": 0.2183, + "step": 17005 + }, + { + "epoch": 0.79, + "learning_rate": 1.7491887824682962e-05, + "loss": 0.095, + "step": 17010 + }, + { + "epoch": 0.79, + "learning_rate": 1.7491104039628172e-05, + "loss": 0.0708, + "step": 17015 + }, + { + "epoch": 0.79, + "learning_rate": 1.7490320254573386e-05, + "loss": 0.1224, + "step": 17020 + }, + { + "epoch": 0.79, + "learning_rate": 1.74895364695186e-05, + "loss": 0.149, + "step": 17025 + }, + { + "epoch": 0.79, + "learning_rate": 1.7488752684463814e-05, + "loss": 0.2394, + "step": 17030 + }, + { + "epoch": 0.79, + "learning_rate": 1.7487968899409028e-05, + "loss": 0.2442, + "step": 17035 + }, + { + "epoch": 0.8, + "learning_rate": 1.7487185114354242e-05, + "loss": 0.2727, + "step": 17040 + }, + { + "epoch": 0.8, + "learning_rate": 1.7486401329299452e-05, + "loss": 0.3609, + "step": 17045 + }, + { + "epoch": 0.8, + "learning_rate": 1.748561754424467e-05, + "loss": 0.5439, + "step": 17050 + }, + { + "epoch": 0.8, + "learning_rate": 1.748483375918988e-05, + "loss": 0.2195, + "step": 17055 + }, + { + "epoch": 0.8, + "learning_rate": 1.7484049974135094e-05, + "loss": 0.0805, + "step": 17060 + }, + { + "epoch": 0.8, + "learning_rate": 1.7483266189080308e-05, + "loss": 0.1076, + "step": 17065 + }, + { + "epoch": 0.8, + "learning_rate": 1.7482482404025522e-05, + "loss": 0.2019, + "step": 17070 + }, + { + "epoch": 0.8, + "learning_rate": 1.7481698618970736e-05, + "loss": 0.1393, + "step": 17075 + }, + { + "epoch": 0.8, + "learning_rate": 1.7480914833915946e-05, + "loss": 0.1601, + "step": 17080 + }, + { + "epoch": 0.8, + "learning_rate": 1.748013104886116e-05, + "loss": 0.2208, + "step": 17085 + }, + { + "epoch": 0.8, + "learning_rate": 1.7479347263806374e-05, + "loss": 0.2692, + "step": 17090 + }, + { + "epoch": 0.8, + "learning_rate": 1.7478563478751588e-05, + "loss": 0.2507, + "step": 17095 + }, + { + "epoch": 0.8, + "learning_rate": 1.7477779693696802e-05, + "loss": 0.6805, + "step": 17100 + }, + { + "epoch": 0.8, + "learning_rate": 1.7476995908642016e-05, + "loss": 0.2378, + "step": 17105 + }, + { + "epoch": 0.8, + "learning_rate": 1.747621212358723e-05, + "loss": 0.0834, + "step": 17110 + }, + { + "epoch": 0.8, + "learning_rate": 1.7475428338532444e-05, + "loss": 0.0724, + "step": 17115 + }, + { + "epoch": 0.8, + "learning_rate": 1.7474644553477654e-05, + "loss": 0.1847, + "step": 17120 + }, + { + "epoch": 0.8, + "learning_rate": 1.747386076842287e-05, + "loss": 0.1556, + "step": 17125 + }, + { + "epoch": 0.8, + "learning_rate": 1.7473076983368082e-05, + "loss": 0.2036, + "step": 17130 + }, + { + "epoch": 0.8, + "learning_rate": 1.7472293198313296e-05, + "loss": 0.2969, + "step": 17135 + }, + { + "epoch": 0.8, + "learning_rate": 1.747150941325851e-05, + "loss": 0.3382, + "step": 17140 + }, + { + "epoch": 0.8, + "learning_rate": 1.747072562820372e-05, + "loss": 0.3471, + "step": 17145 + }, + { + "epoch": 0.8, + "learning_rate": 1.7469941843148938e-05, + "loss": 0.4779, + "step": 17150 + }, + { + "epoch": 0.8, + "learning_rate": 1.7469158058094148e-05, + "loss": 0.2133, + "step": 17155 + }, + { + "epoch": 0.8, + "learning_rate": 1.7468374273039362e-05, + "loss": 0.0909, + "step": 17160 + }, + { + "epoch": 0.8, + "learning_rate": 1.7467590487984576e-05, + "loss": 0.1217, + "step": 17165 + }, + { + "epoch": 0.8, + "learning_rate": 1.746680670292979e-05, + "loss": 0.1222, + "step": 17170 + }, + { + "epoch": 0.8, + "learning_rate": 1.7466022917875004e-05, + "loss": 0.1105, + "step": 17175 + }, + { + "epoch": 0.8, + "learning_rate": 1.7465239132820218e-05, + "loss": 0.1904, + "step": 17180 + }, + { + "epoch": 0.8, + "learning_rate": 1.7464455347765428e-05, + "loss": 0.2327, + "step": 17185 + }, + { + "epoch": 0.8, + "learning_rate": 1.7463671562710646e-05, + "loss": 0.2373, + "step": 17190 + }, + { + "epoch": 0.8, + "learning_rate": 1.7462887777655856e-05, + "loss": 0.2425, + "step": 17195 + }, + { + "epoch": 0.8, + "learning_rate": 1.746210399260107e-05, + "loss": 0.4149, + "step": 17200 + }, + { + "epoch": 0.8, + "learning_rate": 1.7461320207546284e-05, + "loss": 0.3749, + "step": 17205 + }, + { + "epoch": 0.8, + "learning_rate": 1.7460536422491498e-05, + "loss": 0.076, + "step": 17210 + }, + { + "epoch": 0.8, + "learning_rate": 1.745975263743671e-05, + "loss": 0.0653, + "step": 17215 + }, + { + "epoch": 0.8, + "learning_rate": 1.7458968852381922e-05, + "loss": 0.1085, + "step": 17220 + }, + { + "epoch": 0.8, + "learning_rate": 1.745818506732714e-05, + "loss": 0.1582, + "step": 17225 + }, + { + "epoch": 0.8, + "learning_rate": 1.745740128227235e-05, + "loss": 0.2298, + "step": 17230 + }, + { + "epoch": 0.8, + "learning_rate": 1.7456617497217564e-05, + "loss": 0.1976, + "step": 17235 + }, + { + "epoch": 0.8, + "learning_rate": 1.7455833712162778e-05, + "loss": 0.3482, + "step": 17240 + }, + { + "epoch": 0.8, + "learning_rate": 1.7455049927107992e-05, + "loss": 0.2661, + "step": 17245 + }, + { + "epoch": 0.8, + "learning_rate": 1.7454266142053206e-05, + "loss": 0.6355, + "step": 17250 + }, + { + "epoch": 0.81, + "learning_rate": 1.745348235699842e-05, + "loss": 0.2025, + "step": 17255 + }, + { + "epoch": 0.81, + "learning_rate": 1.745269857194363e-05, + "loss": 0.0766, + "step": 17260 + }, + { + "epoch": 0.81, + "learning_rate": 1.7451914786888847e-05, + "loss": 0.1712, + "step": 17265 + }, + { + "epoch": 0.81, + "learning_rate": 1.7451131001834058e-05, + "loss": 0.1655, + "step": 17270 + }, + { + "epoch": 0.81, + "learning_rate": 1.7450347216779272e-05, + "loss": 0.1001, + "step": 17275 + }, + { + "epoch": 0.81, + "learning_rate": 1.7449563431724486e-05, + "loss": 0.1872, + "step": 17280 + }, + { + "epoch": 0.81, + "learning_rate": 1.74487796466697e-05, + "loss": 0.2953, + "step": 17285 + }, + { + "epoch": 0.81, + "learning_rate": 1.7447995861614914e-05, + "loss": 0.2079, + "step": 17290 + }, + { + "epoch": 0.81, + "learning_rate": 1.7447212076560124e-05, + "loss": 0.3059, + "step": 17295 + }, + { + "epoch": 0.81, + "learning_rate": 1.7446428291505338e-05, + "loss": 0.5211, + "step": 17300 + }, + { + "epoch": 0.81, + "learning_rate": 1.7445644506450552e-05, + "loss": 0.2568, + "step": 17305 + }, + { + "epoch": 0.81, + "learning_rate": 1.7444860721395766e-05, + "loss": 0.0556, + "step": 17310 + }, + { + "epoch": 0.81, + "learning_rate": 1.744407693634098e-05, + "loss": 0.1149, + "step": 17315 + }, + { + "epoch": 0.81, + "learning_rate": 1.7443293151286194e-05, + "loss": 0.1159, + "step": 17320 + }, + { + "epoch": 0.81, + "learning_rate": 1.7442509366231407e-05, + "loss": 0.1228, + "step": 17325 + }, + { + "epoch": 0.81, + "learning_rate": 1.744172558117662e-05, + "loss": 0.1988, + "step": 17330 + }, + { + "epoch": 0.81, + "learning_rate": 1.7440941796121832e-05, + "loss": 0.129, + "step": 17335 + }, + { + "epoch": 0.81, + "learning_rate": 1.7440158011067046e-05, + "loss": 0.3034, + "step": 17340 + }, + { + "epoch": 0.81, + "learning_rate": 1.743937422601226e-05, + "loss": 0.3292, + "step": 17345 + }, + { + "epoch": 0.81, + "learning_rate": 1.7438590440957474e-05, + "loss": 0.5812, + "step": 17350 + }, + { + "epoch": 0.81, + "learning_rate": 1.7437806655902688e-05, + "loss": 0.2535, + "step": 17355 + }, + { + "epoch": 0.81, + "learning_rate": 1.7437022870847898e-05, + "loss": 0.0812, + "step": 17360 + }, + { + "epoch": 0.81, + "learning_rate": 1.7436239085793115e-05, + "loss": 0.0573, + "step": 17365 + }, + { + "epoch": 0.81, + "learning_rate": 1.7435455300738326e-05, + "loss": 0.1486, + "step": 17370 + }, + { + "epoch": 0.81, + "learning_rate": 1.743467151568354e-05, + "loss": 0.1502, + "step": 17375 + }, + { + "epoch": 0.81, + "learning_rate": 1.7433887730628754e-05, + "loss": 0.1391, + "step": 17380 + }, + { + "epoch": 0.81, + "learning_rate": 1.7433103945573968e-05, + "loss": 0.2151, + "step": 17385 + }, + { + "epoch": 0.81, + "learning_rate": 1.743232016051918e-05, + "loss": 0.2215, + "step": 17390 + }, + { + "epoch": 0.81, + "learning_rate": 1.7431536375464395e-05, + "loss": 0.3069, + "step": 17395 + }, + { + "epoch": 0.81, + "learning_rate": 1.7430752590409606e-05, + "loss": 0.5807, + "step": 17400 + }, + { + "epoch": 0.81, + "learning_rate": 1.742996880535482e-05, + "loss": 0.2226, + "step": 17405 + }, + { + "epoch": 0.81, + "learning_rate": 1.7429185020300034e-05, + "loss": 0.0589, + "step": 17410 + }, + { + "epoch": 0.81, + "learning_rate": 1.7428401235245248e-05, + "loss": 0.0894, + "step": 17415 + }, + { + "epoch": 0.81, + "learning_rate": 1.742761745019046e-05, + "loss": 0.0559, + "step": 17420 + }, + { + "epoch": 0.81, + "learning_rate": 1.7426833665135675e-05, + "loss": 0.1474, + "step": 17425 + }, + { + "epoch": 0.81, + "learning_rate": 1.742604988008089e-05, + "loss": 0.1283, + "step": 17430 + }, + { + "epoch": 0.81, + "learning_rate": 1.74252660950261e-05, + "loss": 0.2015, + "step": 17435 + }, + { + "epoch": 0.81, + "learning_rate": 1.7424482309971317e-05, + "loss": 0.2343, + "step": 17440 + }, + { + "epoch": 0.81, + "learning_rate": 1.7423698524916528e-05, + "loss": 0.2946, + "step": 17445 + }, + { + "epoch": 0.81, + "learning_rate": 1.742291473986174e-05, + "loss": 0.4734, + "step": 17450 + }, + { + "epoch": 0.81, + "learning_rate": 1.7422130954806955e-05, + "loss": 0.1872, + "step": 17455 + }, + { + "epoch": 0.81, + "learning_rate": 1.742134716975217e-05, + "loss": 0.0883, + "step": 17460 + }, + { + "epoch": 0.81, + "learning_rate": 1.7420563384697383e-05, + "loss": 0.1069, + "step": 17465 + }, + { + "epoch": 0.82, + "learning_rate": 1.7419779599642594e-05, + "loss": 0.1773, + "step": 17470 + }, + { + "epoch": 0.82, + "learning_rate": 1.7418995814587808e-05, + "loss": 0.1791, + "step": 17475 + }, + { + "epoch": 0.82, + "learning_rate": 1.741821202953302e-05, + "loss": 0.1533, + "step": 17480 + }, + { + "epoch": 0.82, + "learning_rate": 1.7417428244478235e-05, + "loss": 0.176, + "step": 17485 + }, + { + "epoch": 0.82, + "learning_rate": 1.741664445942345e-05, + "loss": 0.3268, + "step": 17490 + }, + { + "epoch": 0.82, + "learning_rate": 1.7415860674368663e-05, + "loss": 0.2135, + "step": 17495 + }, + { + "epoch": 0.82, + "learning_rate": 1.7415076889313877e-05, + "loss": 0.5947, + "step": 17500 + }, + { + "epoch": 0.82, + "learning_rate": 1.741429310425909e-05, + "loss": 0.2006, + "step": 17505 + }, + { + "epoch": 0.82, + "learning_rate": 1.74135093192043e-05, + "loss": 0.1167, + "step": 17510 + }, + { + "epoch": 0.82, + "learning_rate": 1.7412725534149516e-05, + "loss": 0.1088, + "step": 17515 + }, + { + "epoch": 0.82, + "learning_rate": 1.741194174909473e-05, + "loss": 0.0985, + "step": 17520 + }, + { + "epoch": 0.82, + "learning_rate": 1.7411157964039943e-05, + "loss": 0.1412, + "step": 17525 + }, + { + "epoch": 0.82, + "learning_rate": 1.7410374178985157e-05, + "loss": 0.2124, + "step": 17530 + }, + { + "epoch": 0.82, + "learning_rate": 1.7409590393930368e-05, + "loss": 0.1429, + "step": 17535 + }, + { + "epoch": 0.82, + "learning_rate": 1.7408806608875585e-05, + "loss": 0.2762, + "step": 17540 + }, + { + "epoch": 0.82, + "learning_rate": 1.7408022823820796e-05, + "loss": 0.336, + "step": 17545 + }, + { + "epoch": 0.82, + "learning_rate": 1.740723903876601e-05, + "loss": 0.6298, + "step": 17550 + }, + { + "epoch": 0.82, + "learning_rate": 1.7406455253711223e-05, + "loss": 0.2686, + "step": 17555 + }, + { + "epoch": 0.82, + "learning_rate": 1.7405671468656437e-05, + "loss": 0.1043, + "step": 17560 + }, + { + "epoch": 0.82, + "learning_rate": 1.740488768360165e-05, + "loss": 0.0676, + "step": 17565 + }, + { + "epoch": 0.82, + "learning_rate": 1.7404103898546865e-05, + "loss": 0.1212, + "step": 17570 + }, + { + "epoch": 0.82, + "learning_rate": 1.7403320113492076e-05, + "loss": 0.1228, + "step": 17575 + }, + { + "epoch": 0.82, + "learning_rate": 1.7402536328437293e-05, + "loss": 0.1101, + "step": 17580 + }, + { + "epoch": 0.82, + "learning_rate": 1.7401752543382503e-05, + "loss": 0.1469, + "step": 17585 + }, + { + "epoch": 0.82, + "learning_rate": 1.7400968758327717e-05, + "loss": 0.1787, + "step": 17590 + }, + { + "epoch": 0.82, + "learning_rate": 1.740018497327293e-05, + "loss": 0.2206, + "step": 17595 + }, + { + "epoch": 0.82, + "learning_rate": 1.7399401188218145e-05, + "loss": 0.4916, + "step": 17600 + }, + { + "epoch": 0.82, + "learning_rate": 1.739861740316336e-05, + "loss": 0.208, + "step": 17605 + }, + { + "epoch": 0.82, + "learning_rate": 1.739783361810857e-05, + "loss": 0.0906, + "step": 17610 + }, + { + "epoch": 0.82, + "learning_rate": 1.7397049833053783e-05, + "loss": 0.1041, + "step": 17615 + }, + { + "epoch": 0.82, + "learning_rate": 1.7396266047998997e-05, + "loss": 0.1332, + "step": 17620 + }, + { + "epoch": 0.82, + "learning_rate": 1.739548226294421e-05, + "loss": 0.1337, + "step": 17625 + }, + { + "epoch": 0.82, + "learning_rate": 1.7394698477889425e-05, + "loss": 0.1325, + "step": 17630 + }, + { + "epoch": 0.82, + "learning_rate": 1.739391469283464e-05, + "loss": 0.147, + "step": 17635 + }, + { + "epoch": 0.82, + "learning_rate": 1.7393130907779853e-05, + "loss": 0.2622, + "step": 17640 + }, + { + "epoch": 0.82, + "learning_rate": 1.7392347122725067e-05, + "loss": 0.4564, + "step": 17645 + }, + { + "epoch": 0.82, + "learning_rate": 1.7391563337670277e-05, + "loss": 0.5175, + "step": 17650 + }, + { + "epoch": 0.82, + "learning_rate": 1.7390779552615495e-05, + "loss": 0.1756, + "step": 17655 + }, + { + "epoch": 0.82, + "learning_rate": 1.7389995767560705e-05, + "loss": 0.1188, + "step": 17660 + }, + { + "epoch": 0.82, + "learning_rate": 1.738921198250592e-05, + "loss": 0.0799, + "step": 17665 + }, + { + "epoch": 0.82, + "learning_rate": 1.7388428197451133e-05, + "loss": 0.1077, + "step": 17670 + }, + { + "epoch": 0.82, + "learning_rate": 1.7387644412396344e-05, + "loss": 0.1528, + "step": 17675 + }, + { + "epoch": 0.82, + "learning_rate": 1.738686062734156e-05, + "loss": 0.1655, + "step": 17680 + }, + { + "epoch": 0.83, + "learning_rate": 1.738607684228677e-05, + "loss": 0.1754, + "step": 17685 + }, + { + "epoch": 0.83, + "learning_rate": 1.7385293057231985e-05, + "loss": 0.2688, + "step": 17690 + }, + { + "epoch": 0.83, + "learning_rate": 1.73845092721772e-05, + "loss": 0.2753, + "step": 17695 + }, + { + "epoch": 0.83, + "learning_rate": 1.7383725487122413e-05, + "loss": 0.3675, + "step": 17700 + }, + { + "epoch": 0.83, + "learning_rate": 1.7382941702067627e-05, + "loss": 0.245, + "step": 17705 + }, + { + "epoch": 0.83, + "learning_rate": 1.738215791701284e-05, + "loss": 0.1142, + "step": 17710 + }, + { + "epoch": 0.83, + "learning_rate": 1.738137413195805e-05, + "loss": 0.0702, + "step": 17715 + }, + { + "epoch": 0.83, + "learning_rate": 1.738059034690327e-05, + "loss": 0.1529, + "step": 17720 + }, + { + "epoch": 0.83, + "learning_rate": 1.737980656184848e-05, + "loss": 0.1147, + "step": 17725 + }, + { + "epoch": 0.83, + "learning_rate": 1.7379022776793693e-05, + "loss": 0.1938, + "step": 17730 + }, + { + "epoch": 0.83, + "learning_rate": 1.7378238991738907e-05, + "loss": 0.2224, + "step": 17735 + }, + { + "epoch": 0.83, + "learning_rate": 1.737745520668412e-05, + "loss": 0.1982, + "step": 17740 + }, + { + "epoch": 0.83, + "learning_rate": 1.7376671421629335e-05, + "loss": 0.3572, + "step": 17745 + }, + { + "epoch": 0.83, + "learning_rate": 1.7375887636574545e-05, + "loss": 0.6613, + "step": 17750 + }, + { + "epoch": 0.83, + "learning_rate": 1.7375103851519763e-05, + "loss": 0.2306, + "step": 17755 + }, + { + "epoch": 0.83, + "learning_rate": 1.7374320066464973e-05, + "loss": 0.0619, + "step": 17760 + }, + { + "epoch": 0.83, + "learning_rate": 1.7373536281410187e-05, + "loss": 0.1426, + "step": 17765 + }, + { + "epoch": 0.83, + "learning_rate": 1.73727524963554e-05, + "loss": 0.1614, + "step": 17770 + }, + { + "epoch": 0.83, + "learning_rate": 1.7371968711300615e-05, + "loss": 0.137, + "step": 17775 + }, + { + "epoch": 0.83, + "learning_rate": 1.737118492624583e-05, + "loss": 0.2, + "step": 17780 + }, + { + "epoch": 0.83, + "learning_rate": 1.7370401141191043e-05, + "loss": 0.2528, + "step": 17785 + }, + { + "epoch": 0.83, + "learning_rate": 1.7369617356136253e-05, + "loss": 0.3046, + "step": 17790 + }, + { + "epoch": 0.83, + "learning_rate": 1.7368833571081467e-05, + "loss": 0.3107, + "step": 17795 + }, + { + "epoch": 0.83, + "learning_rate": 1.736804978602668e-05, + "loss": 0.4765, + "step": 17800 + }, + { + "epoch": 0.83, + "learning_rate": 1.7367266000971895e-05, + "loss": 0.2553, + "step": 17805 + }, + { + "epoch": 0.83, + "learning_rate": 1.736648221591711e-05, + "loss": 0.1027, + "step": 17810 + }, + { + "epoch": 0.83, + "learning_rate": 1.7365698430862323e-05, + "loss": 0.1448, + "step": 17815 + }, + { + "epoch": 0.83, + "learning_rate": 1.7364914645807537e-05, + "loss": 0.1179, + "step": 17820 + }, + { + "epoch": 0.83, + "learning_rate": 1.7364130860752747e-05, + "loss": 0.206, + "step": 17825 + }, + { + "epoch": 0.83, + "learning_rate": 1.736334707569796e-05, + "loss": 0.1499, + "step": 17830 + }, + { + "epoch": 0.83, + "learning_rate": 1.7362563290643175e-05, + "loss": 0.2357, + "step": 17835 + }, + { + "epoch": 0.83, + "learning_rate": 1.736177950558839e-05, + "loss": 0.1637, + "step": 17840 + }, + { + "epoch": 0.83, + "learning_rate": 1.7360995720533603e-05, + "loss": 0.2992, + "step": 17845 + }, + { + "epoch": 0.83, + "learning_rate": 1.7360211935478817e-05, + "loss": 0.3762, + "step": 17850 + }, + { + "epoch": 0.83, + "learning_rate": 1.735942815042403e-05, + "loss": 0.225, + "step": 17855 + }, + { + "epoch": 0.83, + "learning_rate": 1.735864436536924e-05, + "loss": 0.0935, + "step": 17860 + }, + { + "epoch": 0.83, + "learning_rate": 1.7357860580314455e-05, + "loss": 0.0893, + "step": 17865 + }, + { + "epoch": 0.83, + "learning_rate": 1.735707679525967e-05, + "loss": 0.1329, + "step": 17870 + }, + { + "epoch": 0.83, + "learning_rate": 1.7356293010204883e-05, + "loss": 0.1481, + "step": 17875 + }, + { + "epoch": 0.83, + "learning_rate": 1.7355509225150097e-05, + "loss": 0.1654, + "step": 17880 + }, + { + "epoch": 0.83, + "learning_rate": 1.735472544009531e-05, + "loss": 0.1354, + "step": 17885 + }, + { + "epoch": 0.83, + "learning_rate": 1.735394165504052e-05, + "loss": 0.3464, + "step": 17890 + }, + { + "epoch": 0.84, + "learning_rate": 1.735315786998574e-05, + "loss": 0.2993, + "step": 17895 + }, + { + "epoch": 0.84, + "learning_rate": 1.735253084194191e-05, + "loss": 0.6743, + "step": 17900 + }, + { + "epoch": 0.84, + "learning_rate": 1.735174705688712e-05, + "loss": 0.1873, + "step": 17905 + }, + { + "epoch": 0.84, + "learning_rate": 1.7350963271832337e-05, + "loss": 0.144, + "step": 17910 + }, + { + "epoch": 0.84, + "learning_rate": 1.7350179486777547e-05, + "loss": 0.1459, + "step": 17915 + }, + { + "epoch": 0.84, + "learning_rate": 1.734939570172276e-05, + "loss": 0.0939, + "step": 17920 + }, + { + "epoch": 0.84, + "learning_rate": 1.7348611916667975e-05, + "loss": 0.1254, + "step": 17925 + }, + { + "epoch": 0.84, + "learning_rate": 1.7347828131613186e-05, + "loss": 0.1482, + "step": 17930 + }, + { + "epoch": 0.84, + "learning_rate": 1.7347044346558403e-05, + "loss": 0.1526, + "step": 17935 + }, + { + "epoch": 0.84, + "learning_rate": 1.7346260561503613e-05, + "loss": 0.2159, + "step": 17940 + }, + { + "epoch": 0.84, + "learning_rate": 1.7345476776448827e-05, + "loss": 0.2293, + "step": 17945 + }, + { + "epoch": 0.84, + "learning_rate": 1.734469299139404e-05, + "loss": 0.3899, + "step": 17950 + }, + { + "epoch": 0.84, + "learning_rate": 1.7343909206339255e-05, + "loss": 0.2237, + "step": 17955 + }, + { + "epoch": 0.84, + "learning_rate": 1.734312542128447e-05, + "loss": 0.0663, + "step": 17960 + }, + { + "epoch": 0.84, + "learning_rate": 1.7342341636229683e-05, + "loss": 0.0931, + "step": 17965 + }, + { + "epoch": 0.84, + "learning_rate": 1.7341557851174893e-05, + "loss": 0.0785, + "step": 17970 + }, + { + "epoch": 0.84, + "learning_rate": 1.734077406612011e-05, + "loss": 0.1971, + "step": 17975 + }, + { + "epoch": 0.84, + "learning_rate": 1.733999028106532e-05, + "loss": 0.2693, + "step": 17980 + }, + { + "epoch": 0.84, + "learning_rate": 1.7339206496010535e-05, + "loss": 0.2542, + "step": 17985 + }, + { + "epoch": 0.84, + "learning_rate": 1.733842271095575e-05, + "loss": 0.1786, + "step": 17990 + }, + { + "epoch": 0.84, + "learning_rate": 1.7337638925900963e-05, + "loss": 0.3282, + "step": 17995 + }, + { + "epoch": 0.84, + "learning_rate": 1.7336855140846177e-05, + "loss": 0.4567, + "step": 18000 + }, + { + "epoch": 0.84, + "learning_rate": 1.7336071355791387e-05, + "loss": 0.3093, + "step": 18005 + }, + { + "epoch": 0.84, + "learning_rate": 1.7335287570736605e-05, + "loss": 0.0859, + "step": 18010 + }, + { + "epoch": 0.84, + "learning_rate": 1.7334503785681815e-05, + "loss": 0.1042, + "step": 18015 + }, + { + "epoch": 0.84, + "learning_rate": 1.733372000062703e-05, + "loss": 0.077, + "step": 18020 + }, + { + "epoch": 0.84, + "learning_rate": 1.7332936215572243e-05, + "loss": 0.148, + "step": 18025 + }, + { + "epoch": 0.84, + "learning_rate": 1.7332152430517457e-05, + "loss": 0.1841, + "step": 18030 + }, + { + "epoch": 0.84, + "learning_rate": 1.733136864546267e-05, + "loss": 0.0989, + "step": 18035 + }, + { + "epoch": 0.84, + "learning_rate": 1.7330584860407885e-05, + "loss": 0.1978, + "step": 18040 + }, + { + "epoch": 0.84, + "learning_rate": 1.7329801075353095e-05, + "loss": 0.3673, + "step": 18045 + }, + { + "epoch": 0.84, + "learning_rate": 1.732901729029831e-05, + "loss": 0.4043, + "step": 18050 + }, + { + "epoch": 0.84, + "learning_rate": 1.7328233505243523e-05, + "loss": 0.2048, + "step": 18055 + }, + { + "epoch": 0.84, + "learning_rate": 1.7327449720188737e-05, + "loss": 0.0706, + "step": 18060 + }, + { + "epoch": 0.84, + "learning_rate": 1.732666593513395e-05, + "loss": 0.1344, + "step": 18065 + }, + { + "epoch": 0.84, + "learning_rate": 1.732588215007916e-05, + "loss": 0.1345, + "step": 18070 + }, + { + "epoch": 0.84, + "learning_rate": 1.732509836502438e-05, + "loss": 0.1103, + "step": 18075 + }, + { + "epoch": 0.84, + "learning_rate": 1.732431457996959e-05, + "loss": 0.1883, + "step": 18080 + }, + { + "epoch": 0.84, + "learning_rate": 1.7323530794914803e-05, + "loss": 0.1763, + "step": 18085 + }, + { + "epoch": 0.84, + "learning_rate": 1.7322747009860017e-05, + "loss": 0.2073, + "step": 18090 + }, + { + "epoch": 0.84, + "learning_rate": 1.732196322480523e-05, + "loss": 0.3624, + "step": 18095 + }, + { + "epoch": 0.84, + "learning_rate": 1.7321179439750445e-05, + "loss": 0.4715, + "step": 18100 + }, + { + "epoch": 0.84, + "learning_rate": 1.732039565469566e-05, + "loss": 0.1813, + "step": 18105 + }, + { + "epoch": 0.85, + "learning_rate": 1.7319611869640873e-05, + "loss": 0.0933, + "step": 18110 + }, + { + "epoch": 0.85, + "learning_rate": 1.7318828084586083e-05, + "loss": 0.1297, + "step": 18115 + }, + { + "epoch": 0.85, + "learning_rate": 1.7318044299531297e-05, + "loss": 0.1176, + "step": 18120 + }, + { + "epoch": 0.85, + "learning_rate": 1.731726051447651e-05, + "loss": 0.1331, + "step": 18125 + }, + { + "epoch": 0.85, + "learning_rate": 1.7316476729421725e-05, + "loss": 0.166, + "step": 18130 + }, + { + "epoch": 0.85, + "learning_rate": 1.731569294436694e-05, + "loss": 0.2521, + "step": 18135 + }, + { + "epoch": 0.85, + "learning_rate": 1.7314909159312153e-05, + "loss": 0.2726, + "step": 18140 + }, + { + "epoch": 0.85, + "learning_rate": 1.7314125374257363e-05, + "loss": 0.3365, + "step": 18145 + }, + { + "epoch": 0.85, + "learning_rate": 1.731334158920258e-05, + "loss": 0.6564, + "step": 18150 + }, + { + "epoch": 0.85, + "learning_rate": 1.731255780414779e-05, + "loss": 0.2698, + "step": 18155 + }, + { + "epoch": 0.85, + "learning_rate": 1.7311774019093005e-05, + "loss": 0.0988, + "step": 18160 + }, + { + "epoch": 0.85, + "learning_rate": 1.731099023403822e-05, + "loss": 0.0748, + "step": 18165 + }, + { + "epoch": 0.85, + "learning_rate": 1.7310206448983433e-05, + "loss": 0.1137, + "step": 18170 + }, + { + "epoch": 0.85, + "learning_rate": 1.7309422663928647e-05, + "loss": 0.1392, + "step": 18175 + }, + { + "epoch": 0.85, + "learning_rate": 1.7308638878873857e-05, + "loss": 0.1052, + "step": 18180 + }, + { + "epoch": 0.85, + "learning_rate": 1.730785509381907e-05, + "loss": 0.1821, + "step": 18185 + }, + { + "epoch": 0.85, + "learning_rate": 1.7307071308764285e-05, + "loss": 0.1918, + "step": 18190 + }, + { + "epoch": 0.85, + "learning_rate": 1.73062875237095e-05, + "loss": 0.3034, + "step": 18195 + }, + { + "epoch": 0.85, + "learning_rate": 1.7305503738654713e-05, + "loss": 0.5586, + "step": 18200 + }, + { + "epoch": 0.85, + "learning_rate": 1.7304719953599927e-05, + "loss": 0.3347, + "step": 18205 + }, + { + "epoch": 0.85, + "learning_rate": 1.730393616854514e-05, + "loss": 0.1197, + "step": 18210 + }, + { + "epoch": 0.85, + "learning_rate": 1.7303152383490355e-05, + "loss": 0.0808, + "step": 18215 + }, + { + "epoch": 0.85, + "learning_rate": 1.7302525355446525e-05, + "loss": 0.1548, + "step": 18220 + }, + { + "epoch": 0.85, + "learning_rate": 1.7301741570391735e-05, + "loss": 0.1336, + "step": 18225 + }, + { + "epoch": 0.85, + "learning_rate": 1.7300957785336953e-05, + "loss": 0.1881, + "step": 18230 + }, + { + "epoch": 0.85, + "learning_rate": 1.7300174000282163e-05, + "loss": 0.1734, + "step": 18235 + }, + { + "epoch": 0.85, + "learning_rate": 1.7299390215227377e-05, + "loss": 0.2273, + "step": 18240 + }, + { + "epoch": 0.85, + "learning_rate": 1.729860643017259e-05, + "loss": 0.3434, + "step": 18245 + }, + { + "epoch": 0.85, + "learning_rate": 1.72978226451178e-05, + "loss": 0.4018, + "step": 18250 + }, + { + "epoch": 0.85, + "learning_rate": 1.729703886006302e-05, + "loss": 0.2318, + "step": 18255 + }, + { + "epoch": 0.85, + "learning_rate": 1.729625507500823e-05, + "loss": 0.0581, + "step": 18260 + }, + { + "epoch": 0.85, + "learning_rate": 1.7295471289953443e-05, + "loss": 0.1156, + "step": 18265 + }, + { + "epoch": 0.85, + "learning_rate": 1.7294687504898657e-05, + "loss": 0.1539, + "step": 18270 + }, + { + "epoch": 0.85, + "learning_rate": 1.729390371984387e-05, + "loss": 0.1465, + "step": 18275 + }, + { + "epoch": 0.85, + "learning_rate": 1.7293119934789085e-05, + "loss": 0.2405, + "step": 18280 + }, + { + "epoch": 0.85, + "learning_rate": 1.72923361497343e-05, + "loss": 0.2068, + "step": 18285 + }, + { + "epoch": 0.85, + "learning_rate": 1.7291552364679513e-05, + "loss": 0.3418, + "step": 18290 + }, + { + "epoch": 0.85, + "learning_rate": 1.7290768579624727e-05, + "loss": 0.3344, + "step": 18295 + }, + { + "epoch": 0.85, + "learning_rate": 1.7289984794569937e-05, + "loss": 0.324, + "step": 18300 + }, + { + "epoch": 0.85, + "learning_rate": 1.7289201009515155e-05, + "loss": 0.2146, + "step": 18305 + }, + { + "epoch": 0.85, + "learning_rate": 1.7288417224460365e-05, + "loss": 0.12, + "step": 18310 + }, + { + "epoch": 0.85, + "learning_rate": 1.728763343940558e-05, + "loss": 0.1088, + "step": 18315 + }, + { + "epoch": 0.85, + "learning_rate": 1.7286849654350793e-05, + "loss": 0.1019, + "step": 18320 + }, + { + "epoch": 0.86, + "learning_rate": 1.7286065869296003e-05, + "loss": 0.1868, + "step": 18325 + }, + { + "epoch": 0.86, + "learning_rate": 1.728528208424122e-05, + "loss": 0.2649, + "step": 18330 + }, + { + "epoch": 0.86, + "learning_rate": 1.728449829918643e-05, + "loss": 0.2723, + "step": 18335 + }, + { + "epoch": 0.86, + "learning_rate": 1.7283714514131645e-05, + "loss": 0.3037, + "step": 18340 + }, + { + "epoch": 0.86, + "learning_rate": 1.728293072907686e-05, + "loss": 0.32, + "step": 18345 + }, + { + "epoch": 0.86, + "learning_rate": 1.7282146944022073e-05, + "loss": 0.6633, + "step": 18350 + }, + { + "epoch": 0.86, + "learning_rate": 1.7281363158967287e-05, + "loss": 0.3423, + "step": 18355 + }, + { + "epoch": 0.86, + "learning_rate": 1.72805793739125e-05, + "loss": 0.0901, + "step": 18360 + }, + { + "epoch": 0.86, + "learning_rate": 1.727979558885771e-05, + "loss": 0.0976, + "step": 18365 + }, + { + "epoch": 0.86, + "learning_rate": 1.727901180380293e-05, + "loss": 0.219, + "step": 18370 + }, + { + "epoch": 0.86, + "learning_rate": 1.727822801874814e-05, + "loss": 0.1546, + "step": 18375 + }, + { + "epoch": 0.86, + "learning_rate": 1.7277444233693353e-05, + "loss": 0.183, + "step": 18380 + }, + { + "epoch": 0.86, + "learning_rate": 1.7276660448638567e-05, + "loss": 0.1376, + "step": 18385 + }, + { + "epoch": 0.86, + "learning_rate": 1.727587666358378e-05, + "loss": 0.2397, + "step": 18390 + }, + { + "epoch": 0.86, + "learning_rate": 1.7275092878528995e-05, + "loss": 0.3504, + "step": 18395 + }, + { + "epoch": 0.86, + "learning_rate": 1.7274309093474205e-05, + "loss": 0.4606, + "step": 18400 + }, + { + "epoch": 0.86, + "learning_rate": 1.7273525308419423e-05, + "loss": 0.1816, + "step": 18405 + }, + { + "epoch": 0.86, + "learning_rate": 1.7272741523364633e-05, + "loss": 0.0837, + "step": 18410 + }, + { + "epoch": 0.86, + "learning_rate": 1.7271957738309847e-05, + "loss": 0.1249, + "step": 18415 + }, + { + "epoch": 0.86, + "learning_rate": 1.727117395325506e-05, + "loss": 0.1805, + "step": 18420 + }, + { + "epoch": 0.86, + "learning_rate": 1.7270390168200275e-05, + "loss": 0.246, + "step": 18425 + }, + { + "epoch": 0.86, + "learning_rate": 1.726960638314549e-05, + "loss": 0.1257, + "step": 18430 + }, + { + "epoch": 0.86, + "learning_rate": 1.7268822598090703e-05, + "loss": 0.1773, + "step": 18435 + }, + { + "epoch": 0.86, + "learning_rate": 1.7268038813035913e-05, + "loss": 0.1636, + "step": 18440 + }, + { + "epoch": 0.86, + "learning_rate": 1.7267255027981127e-05, + "loss": 0.2327, + "step": 18445 + }, + { + "epoch": 0.86, + "learning_rate": 1.726647124292634e-05, + "loss": 0.8099, + "step": 18450 + }, + { + "epoch": 0.86, + "learning_rate": 1.7265687457871555e-05, + "loss": 0.13, + "step": 18455 + }, + { + "epoch": 0.86, + "learning_rate": 1.726490367281677e-05, + "loss": 0.1281, + "step": 18460 + }, + { + "epoch": 0.86, + "learning_rate": 1.726411988776198e-05, + "loss": 0.1082, + "step": 18465 + }, + { + "epoch": 0.86, + "learning_rate": 1.7263336102707197e-05, + "loss": 0.0937, + "step": 18470 + }, + { + "epoch": 0.86, + "learning_rate": 1.7262552317652407e-05, + "loss": 0.1067, + "step": 18475 + }, + { + "epoch": 0.86, + "learning_rate": 1.726176853259762e-05, + "loss": 0.1886, + "step": 18480 + }, + { + "epoch": 0.86, + "learning_rate": 1.7260984747542835e-05, + "loss": 0.1727, + "step": 18485 + }, + { + "epoch": 0.86, + "learning_rate": 1.726020096248805e-05, + "loss": 0.2541, + "step": 18490 + }, + { + "epoch": 0.86, + "learning_rate": 1.7259417177433263e-05, + "loss": 0.3449, + "step": 18495 + }, + { + "epoch": 0.86, + "learning_rate": 1.7258633392378477e-05, + "loss": 0.4496, + "step": 18500 + }, + { + "epoch": 0.86, + "learning_rate": 1.725784960732369e-05, + "loss": 0.2353, + "step": 18505 + }, + { + "epoch": 0.86, + "learning_rate": 1.72570658222689e-05, + "loss": 0.0484, + "step": 18510 + }, + { + "epoch": 0.86, + "learning_rate": 1.7256282037214115e-05, + "loss": 0.0597, + "step": 18515 + }, + { + "epoch": 0.86, + "learning_rate": 1.725549825215933e-05, + "loss": 0.1713, + "step": 18520 + }, + { + "epoch": 0.86, + "learning_rate": 1.7254714467104543e-05, + "loss": 0.1206, + "step": 18525 + }, + { + "epoch": 0.86, + "learning_rate": 1.7253930682049757e-05, + "loss": 0.2763, + "step": 18530 + }, + { + "epoch": 0.86, + "learning_rate": 1.725314689699497e-05, + "loss": 0.1911, + "step": 18535 + }, + { + "epoch": 0.87, + "learning_rate": 1.725236311194018e-05, + "loss": 0.2108, + "step": 18540 + }, + { + "epoch": 0.87, + "learning_rate": 1.72515793268854e-05, + "loss": 0.3533, + "step": 18545 + }, + { + "epoch": 0.87, + "learning_rate": 1.725079554183061e-05, + "loss": 0.3996, + "step": 18550 + }, + { + "epoch": 0.87, + "learning_rate": 1.7250011756775823e-05, + "loss": 0.1907, + "step": 18555 + }, + { + "epoch": 0.87, + "learning_rate": 1.7249227971721037e-05, + "loss": 0.076, + "step": 18560 + }, + { + "epoch": 0.87, + "learning_rate": 1.724844418666625e-05, + "loss": 0.165, + "step": 18565 + }, + { + "epoch": 0.87, + "learning_rate": 1.7247660401611464e-05, + "loss": 0.1086, + "step": 18570 + }, + { + "epoch": 0.87, + "learning_rate": 1.7246876616556675e-05, + "loss": 0.1199, + "step": 18575 + }, + { + "epoch": 0.87, + "learning_rate": 1.724609283150189e-05, + "loss": 0.245, + "step": 18580 + }, + { + "epoch": 0.87, + "learning_rate": 1.7245309046447103e-05, + "loss": 0.1674, + "step": 18585 + }, + { + "epoch": 0.87, + "learning_rate": 1.7244525261392317e-05, + "loss": 0.2862, + "step": 18590 + }, + { + "epoch": 0.87, + "learning_rate": 1.724374147633753e-05, + "loss": 0.4564, + "step": 18595 + }, + { + "epoch": 0.87, + "learning_rate": 1.7242957691282745e-05, + "loss": 0.4542, + "step": 18600 + }, + { + "epoch": 0.87, + "learning_rate": 1.724217390622796e-05, + "loss": 0.2245, + "step": 18605 + }, + { + "epoch": 0.87, + "learning_rate": 1.7241390121173172e-05, + "loss": 0.1992, + "step": 18610 + }, + { + "epoch": 0.87, + "learning_rate": 1.7240606336118383e-05, + "loss": 0.0679, + "step": 18615 + }, + { + "epoch": 0.87, + "learning_rate": 1.72398225510636e-05, + "loss": 0.0884, + "step": 18620 + }, + { + "epoch": 0.87, + "learning_rate": 1.723903876600881e-05, + "loss": 0.1305, + "step": 18625 + }, + { + "epoch": 0.87, + "learning_rate": 1.7238254980954025e-05, + "loss": 0.2016, + "step": 18630 + }, + { + "epoch": 0.87, + "learning_rate": 1.723747119589924e-05, + "loss": 0.196, + "step": 18635 + }, + { + "epoch": 0.87, + "learning_rate": 1.723668741084445e-05, + "loss": 0.1693, + "step": 18640 + }, + { + "epoch": 0.87, + "learning_rate": 1.7235903625789666e-05, + "loss": 0.3803, + "step": 18645 + }, + { + "epoch": 0.87, + "learning_rate": 1.7235119840734877e-05, + "loss": 0.4806, + "step": 18650 + }, + { + "epoch": 0.87, + "learning_rate": 1.723433605568009e-05, + "loss": 0.2328, + "step": 18655 + }, + { + "epoch": 0.87, + "learning_rate": 1.7233552270625305e-05, + "loss": 0.068, + "step": 18660 + }, + { + "epoch": 0.87, + "learning_rate": 1.723276848557052e-05, + "loss": 0.0851, + "step": 18665 + }, + { + "epoch": 0.87, + "learning_rate": 1.7231984700515732e-05, + "loss": 0.1106, + "step": 18670 + }, + { + "epoch": 0.87, + "learning_rate": 1.7231200915460946e-05, + "loss": 0.1496, + "step": 18675 + }, + { + "epoch": 0.87, + "learning_rate": 1.7230417130406157e-05, + "loss": 0.1601, + "step": 18680 + }, + { + "epoch": 0.87, + "learning_rate": 1.7229633345351374e-05, + "loss": 0.2402, + "step": 18685 + }, + { + "epoch": 0.87, + "learning_rate": 1.7228849560296585e-05, + "loss": 0.255, + "step": 18690 + }, + { + "epoch": 0.87, + "learning_rate": 1.72280657752418e-05, + "loss": 0.2822, + "step": 18695 + }, + { + "epoch": 0.87, + "learning_rate": 1.7227281990187012e-05, + "loss": 0.5758, + "step": 18700 + }, + { + "epoch": 0.87, + "learning_rate": 1.7226498205132226e-05, + "loss": 0.1843, + "step": 18705 + }, + { + "epoch": 0.87, + "learning_rate": 1.722571442007744e-05, + "loss": 0.0946, + "step": 18710 + }, + { + "epoch": 0.87, + "learning_rate": 1.722493063502265e-05, + "loss": 0.0383, + "step": 18715 + }, + { + "epoch": 0.87, + "learning_rate": 1.7224146849967868e-05, + "loss": 0.1229, + "step": 18720 + }, + { + "epoch": 0.87, + "learning_rate": 1.722336306491308e-05, + "loss": 0.2175, + "step": 18725 + }, + { + "epoch": 0.87, + "learning_rate": 1.7222579279858293e-05, + "loss": 0.1291, + "step": 18730 + }, + { + "epoch": 0.87, + "learning_rate": 1.7221795494803506e-05, + "loss": 0.1822, + "step": 18735 + }, + { + "epoch": 0.87, + "learning_rate": 1.722101170974872e-05, + "loss": 0.2656, + "step": 18740 + }, + { + "epoch": 0.87, + "learning_rate": 1.7220227924693934e-05, + "loss": 0.2236, + "step": 18745 + }, + { + "epoch": 0.87, + "learning_rate": 1.7219444139639148e-05, + "loss": 0.4038, + "step": 18750 + }, + { + "epoch": 0.88, + "learning_rate": 1.721866035458436e-05, + "loss": 0.1978, + "step": 18755 + }, + { + "epoch": 0.88, + "learning_rate": 1.7217876569529576e-05, + "loss": 0.0921, + "step": 18760 + }, + { + "epoch": 0.88, + "learning_rate": 1.7217092784474786e-05, + "loss": 0.151, + "step": 18765 + }, + { + "epoch": 0.88, + "learning_rate": 1.721630899942e-05, + "loss": 0.1317, + "step": 18770 + }, + { + "epoch": 0.88, + "learning_rate": 1.7215525214365214e-05, + "loss": 0.1417, + "step": 18775 + }, + { + "epoch": 0.88, + "learning_rate": 1.7214741429310425e-05, + "loss": 0.1506, + "step": 18780 + }, + { + "epoch": 0.88, + "learning_rate": 1.7213957644255642e-05, + "loss": 0.1963, + "step": 18785 + }, + { + "epoch": 0.88, + "learning_rate": 1.7213173859200853e-05, + "loss": 0.2705, + "step": 18790 + }, + { + "epoch": 0.88, + "learning_rate": 1.7212390074146067e-05, + "loss": 0.2663, + "step": 18795 + }, + { + "epoch": 0.88, + "learning_rate": 1.721160628909128e-05, + "loss": 0.5767, + "step": 18800 + }, + { + "epoch": 0.88, + "learning_rate": 1.7210822504036494e-05, + "loss": 0.1702, + "step": 18805 + }, + { + "epoch": 0.88, + "learning_rate": 1.7210038718981708e-05, + "loss": 0.0625, + "step": 18810 + }, + { + "epoch": 0.88, + "learning_rate": 1.7209254933926922e-05, + "loss": 0.1013, + "step": 18815 + }, + { + "epoch": 0.88, + "learning_rate": 1.7208471148872136e-05, + "loss": 0.132, + "step": 18820 + }, + { + "epoch": 0.88, + "learning_rate": 1.720768736381735e-05, + "loss": 0.1107, + "step": 18825 + }, + { + "epoch": 0.88, + "learning_rate": 1.720690357876256e-05, + "loss": 0.1782, + "step": 18830 + }, + { + "epoch": 0.88, + "learning_rate": 1.7206119793707774e-05, + "loss": 0.213, + "step": 18835 + }, + { + "epoch": 0.88, + "learning_rate": 1.7205336008652988e-05, + "loss": 0.3126, + "step": 18840 + }, + { + "epoch": 0.88, + "learning_rate": 1.7204552223598202e-05, + "loss": 0.3847, + "step": 18845 + }, + { + "epoch": 0.88, + "learning_rate": 1.7203768438543416e-05, + "loss": 0.5436, + "step": 18850 + }, + { + "epoch": 0.88, + "learning_rate": 1.7202984653488627e-05, + "loss": 0.1875, + "step": 18855 + }, + { + "epoch": 0.88, + "learning_rate": 1.7202200868433844e-05, + "loss": 0.0937, + "step": 18860 + }, + { + "epoch": 0.88, + "learning_rate": 1.7201417083379054e-05, + "loss": 0.1355, + "step": 18865 + }, + { + "epoch": 0.88, + "learning_rate": 1.720063329832427e-05, + "loss": 0.1417, + "step": 18870 + }, + { + "epoch": 0.88, + "learning_rate": 1.7199849513269482e-05, + "loss": 0.1382, + "step": 18875 + }, + { + "epoch": 0.88, + "learning_rate": 1.7199065728214696e-05, + "loss": 0.2395, + "step": 18880 + }, + { + "epoch": 0.88, + "learning_rate": 1.719828194315991e-05, + "loss": 0.2275, + "step": 18885 + }, + { + "epoch": 0.88, + "learning_rate": 1.7197498158105124e-05, + "loss": 0.2511, + "step": 18890 + }, + { + "epoch": 0.88, + "learning_rate": 1.7196714373050334e-05, + "loss": 0.3154, + "step": 18895 + }, + { + "epoch": 0.88, + "learning_rate": 1.719593058799555e-05, + "loss": 0.5231, + "step": 18900 + }, + { + "epoch": 0.88, + "learning_rate": 1.7195146802940762e-05, + "loss": 0.1573, + "step": 18905 + }, + { + "epoch": 0.88, + "learning_rate": 1.7194363017885976e-05, + "loss": 0.0808, + "step": 18910 + }, + { + "epoch": 0.88, + "learning_rate": 1.719357923283119e-05, + "loss": 0.0893, + "step": 18915 + }, + { + "epoch": 0.88, + "learning_rate": 1.7192795447776404e-05, + "loss": 0.1562, + "step": 18920 + }, + { + "epoch": 0.88, + "learning_rate": 1.7192011662721618e-05, + "loss": 0.1937, + "step": 18925 + }, + { + "epoch": 0.88, + "learning_rate": 1.719122787766683e-05, + "loss": 0.1925, + "step": 18930 + }, + { + "epoch": 0.88, + "learning_rate": 1.7190444092612046e-05, + "loss": 0.2791, + "step": 18935 + }, + { + "epoch": 0.88, + "learning_rate": 1.7189660307557256e-05, + "loss": 0.3337, + "step": 18940 + }, + { + "epoch": 0.88, + "learning_rate": 1.718887652250247e-05, + "loss": 0.3198, + "step": 18945 + }, + { + "epoch": 0.88, + "learning_rate": 1.7188092737447684e-05, + "loss": 0.5088, + "step": 18950 + }, + { + "epoch": 0.88, + "learning_rate": 1.7187308952392898e-05, + "loss": 0.1709, + "step": 18955 + }, + { + "epoch": 0.88, + "learning_rate": 1.7186525167338112e-05, + "loss": 0.1017, + "step": 18960 + }, + { + "epoch": 0.88, + "learning_rate": 1.7185741382283322e-05, + "loss": 0.1423, + "step": 18965 + }, + { + "epoch": 0.89, + "learning_rate": 1.7184957597228536e-05, + "loss": 0.1257, + "step": 18970 + }, + { + "epoch": 0.89, + "learning_rate": 1.718417381217375e-05, + "loss": 0.1645, + "step": 18975 + }, + { + "epoch": 0.89, + "learning_rate": 1.7183390027118964e-05, + "loss": 0.1752, + "step": 18980 + }, + { + "epoch": 0.89, + "learning_rate": 1.7182606242064178e-05, + "loss": 0.1956, + "step": 18985 + }, + { + "epoch": 0.89, + "learning_rate": 1.7181822457009392e-05, + "loss": 0.2104, + "step": 18990 + }, + { + "epoch": 0.89, + "learning_rate": 1.7181038671954602e-05, + "loss": 0.2864, + "step": 18995 + }, + { + "epoch": 0.89, + "learning_rate": 1.718025488689982e-05, + "loss": 0.5584, + "step": 19000 + }, + { + "epoch": 0.89, + "learning_rate": 1.717947110184503e-05, + "loss": 0.2235, + "step": 19005 + }, + { + "epoch": 0.89, + "learning_rate": 1.7178687316790244e-05, + "loss": 0.0822, + "step": 19010 + }, + { + "epoch": 0.89, + "learning_rate": 1.7177903531735458e-05, + "loss": 0.1069, + "step": 19015 + }, + { + "epoch": 0.89, + "learning_rate": 1.7177119746680672e-05, + "loss": 0.1213, + "step": 19020 + }, + { + "epoch": 0.89, + "learning_rate": 1.7176335961625886e-05, + "loss": 0.2554, + "step": 19025 + }, + { + "epoch": 0.89, + "learning_rate": 1.7175552176571096e-05, + "loss": 0.1368, + "step": 19030 + }, + { + "epoch": 0.89, + "learning_rate": 1.7174768391516314e-05, + "loss": 0.1717, + "step": 19035 + }, + { + "epoch": 0.89, + "learning_rate": 1.7173984606461524e-05, + "loss": 0.2277, + "step": 19040 + }, + { + "epoch": 0.89, + "learning_rate": 1.7173200821406738e-05, + "loss": 0.2489, + "step": 19045 + }, + { + "epoch": 0.89, + "learning_rate": 1.7172417036351952e-05, + "loss": 0.5192, + "step": 19050 + }, + { + "epoch": 0.89, + "learning_rate": 1.7171633251297166e-05, + "loss": 0.1059, + "step": 19055 + }, + { + "epoch": 0.89, + "learning_rate": 1.717084946624238e-05, + "loss": 0.1134, + "step": 19060 + }, + { + "epoch": 0.89, + "learning_rate": 1.7170065681187594e-05, + "loss": 0.1005, + "step": 19065 + }, + { + "epoch": 0.89, + "learning_rate": 1.7169281896132804e-05, + "loss": 0.079, + "step": 19070 + }, + { + "epoch": 0.89, + "learning_rate": 1.716849811107802e-05, + "loss": 0.1957, + "step": 19075 + }, + { + "epoch": 0.89, + "learning_rate": 1.7167714326023232e-05, + "loss": 0.1822, + "step": 19080 + }, + { + "epoch": 0.89, + "learning_rate": 1.7166930540968446e-05, + "loss": 0.1535, + "step": 19085 + }, + { + "epoch": 0.89, + "learning_rate": 1.716614675591366e-05, + "loss": 0.2625, + "step": 19090 + }, + { + "epoch": 0.89, + "learning_rate": 1.716536297085887e-05, + "loss": 0.3252, + "step": 19095 + }, + { + "epoch": 0.89, + "learning_rate": 1.7164579185804088e-05, + "loss": 0.486, + "step": 19100 + }, + { + "epoch": 0.89, + "learning_rate": 1.7163795400749298e-05, + "loss": 0.1903, + "step": 19105 + }, + { + "epoch": 0.89, + "learning_rate": 1.7163011615694512e-05, + "loss": 0.0703, + "step": 19110 + }, + { + "epoch": 0.89, + "learning_rate": 1.7162227830639726e-05, + "loss": 0.1731, + "step": 19115 + }, + { + "epoch": 0.89, + "learning_rate": 1.716144404558494e-05, + "loss": 0.0951, + "step": 19120 + }, + { + "epoch": 0.89, + "learning_rate": 1.7160660260530154e-05, + "loss": 0.1422, + "step": 19125 + }, + { + "epoch": 0.89, + "learning_rate": 1.7159876475475368e-05, + "loss": 0.1888, + "step": 19130 + }, + { + "epoch": 0.89, + "learning_rate": 1.715909269042058e-05, + "loss": 0.1529, + "step": 19135 + }, + { + "epoch": 0.89, + "learning_rate": 1.7158308905365796e-05, + "loss": 0.1551, + "step": 19140 + }, + { + "epoch": 0.89, + "learning_rate": 1.7157525120311006e-05, + "loss": 0.2719, + "step": 19145 + }, + { + "epoch": 0.89, + "learning_rate": 1.7156741335256223e-05, + "loss": 0.5324, + "step": 19150 + }, + { + "epoch": 0.89, + "learning_rate": 1.7155957550201434e-05, + "loss": 0.2271, + "step": 19155 + }, + { + "epoch": 0.89, + "learning_rate": 1.7155173765146648e-05, + "loss": 0.1467, + "step": 19160 + }, + { + "epoch": 0.89, + "learning_rate": 1.715438998009186e-05, + "loss": 0.0991, + "step": 19165 + }, + { + "epoch": 0.89, + "learning_rate": 1.7153606195037072e-05, + "loss": 0.1359, + "step": 19170 + }, + { + "epoch": 0.89, + "learning_rate": 1.715282240998229e-05, + "loss": 0.1051, + "step": 19175 + }, + { + "epoch": 0.89, + "learning_rate": 1.71520386249275e-05, + "loss": 0.2102, + "step": 19180 + }, + { + "epoch": 0.9, + "learning_rate": 1.7151254839872714e-05, + "loss": 0.2189, + "step": 19185 + }, + { + "epoch": 0.9, + "learning_rate": 1.7150471054817928e-05, + "loss": 0.2397, + "step": 19190 + }, + { + "epoch": 0.9, + "learning_rate": 1.7149687269763142e-05, + "loss": 0.2792, + "step": 19195 + }, + { + "epoch": 0.9, + "learning_rate": 1.7148903484708356e-05, + "loss": 0.4688, + "step": 19200 + }, + { + "epoch": 0.9, + "learning_rate": 1.714811969965357e-05, + "loss": 0.2494, + "step": 19205 + }, + { + "epoch": 0.9, + "learning_rate": 1.714733591459878e-05, + "loss": 0.1013, + "step": 19210 + }, + { + "epoch": 0.9, + "learning_rate": 1.7146552129543997e-05, + "loss": 0.0738, + "step": 19215 + }, + { + "epoch": 0.9, + "learning_rate": 1.7145768344489208e-05, + "loss": 0.1392, + "step": 19220 + }, + { + "epoch": 0.9, + "learning_rate": 1.7144984559434422e-05, + "loss": 0.132, + "step": 19225 + }, + { + "epoch": 0.9, + "learning_rate": 1.7144200774379636e-05, + "loss": 0.1433, + "step": 19230 + }, + { + "epoch": 0.9, + "learning_rate": 1.714341698932485e-05, + "loss": 0.2131, + "step": 19235 + }, + { + "epoch": 0.9, + "learning_rate": 1.7142633204270063e-05, + "loss": 0.237, + "step": 19240 + }, + { + "epoch": 0.9, + "learning_rate": 1.7142006176226234e-05, + "loss": 0.3774, + "step": 19245 + }, + { + "epoch": 0.9, + "learning_rate": 1.7141222391171444e-05, + "loss": 0.4308, + "step": 19250 + }, + { + "epoch": 0.9, + "learning_rate": 1.7140438606116662e-05, + "loss": 0.2426, + "step": 19255 + }, + { + "epoch": 0.9, + "learning_rate": 1.7139654821061872e-05, + "loss": 0.1149, + "step": 19260 + }, + { + "epoch": 0.9, + "learning_rate": 1.7138871036007086e-05, + "loss": 0.0679, + "step": 19265 + }, + { + "epoch": 0.9, + "learning_rate": 1.71380872509523e-05, + "loss": 0.0922, + "step": 19270 + }, + { + "epoch": 0.9, + "learning_rate": 1.7137303465897514e-05, + "loss": 0.1522, + "step": 19275 + }, + { + "epoch": 0.9, + "learning_rate": 1.7136519680842728e-05, + "loss": 0.1422, + "step": 19280 + }, + { + "epoch": 0.9, + "learning_rate": 1.7135735895787942e-05, + "loss": 0.1224, + "step": 19285 + }, + { + "epoch": 0.9, + "learning_rate": 1.7134952110733156e-05, + "loss": 0.27, + "step": 19290 + }, + { + "epoch": 0.9, + "learning_rate": 1.7134168325678366e-05, + "loss": 0.3725, + "step": 19295 + }, + { + "epoch": 0.9, + "learning_rate": 1.713338454062358e-05, + "loss": 0.3632, + "step": 19300 + }, + { + "epoch": 0.9, + "learning_rate": 1.7132600755568794e-05, + "loss": 0.1988, + "step": 19305 + }, + { + "epoch": 0.9, + "learning_rate": 1.7131816970514008e-05, + "loss": 0.0415, + "step": 19310 + }, + { + "epoch": 0.9, + "learning_rate": 1.7131033185459222e-05, + "loss": 0.0715, + "step": 19315 + }, + { + "epoch": 0.9, + "learning_rate": 1.7130249400404436e-05, + "loss": 0.1044, + "step": 19320 + }, + { + "epoch": 0.9, + "learning_rate": 1.7129465615349646e-05, + "loss": 0.2214, + "step": 19325 + }, + { + "epoch": 0.9, + "learning_rate": 1.7128681830294864e-05, + "loss": 0.4414, + "step": 19330 + }, + { + "epoch": 0.9, + "learning_rate": 1.7127898045240074e-05, + "loss": 0.2215, + "step": 19335 + }, + { + "epoch": 0.9, + "learning_rate": 1.7127114260185288e-05, + "loss": 0.2416, + "step": 19340 + }, + { + "epoch": 0.9, + "learning_rate": 1.7126330475130502e-05, + "loss": 0.2983, + "step": 19345 + }, + { + "epoch": 0.9, + "learning_rate": 1.7125546690075716e-05, + "loss": 0.6556, + "step": 19350 + }, + { + "epoch": 0.9, + "learning_rate": 1.712476290502093e-05, + "loss": 0.1771, + "step": 19355 + }, + { + "epoch": 0.9, + "learning_rate": 1.712397911996614e-05, + "loss": 0.1033, + "step": 19360 + }, + { + "epoch": 0.9, + "learning_rate": 1.7123195334911354e-05, + "loss": 0.0999, + "step": 19365 + }, + { + "epoch": 0.9, + "learning_rate": 1.7122411549856568e-05, + "loss": 0.1314, + "step": 19370 + }, + { + "epoch": 0.9, + "learning_rate": 1.7121627764801782e-05, + "loss": 0.1199, + "step": 19375 + }, + { + "epoch": 0.9, + "learning_rate": 1.7120843979746996e-05, + "loss": 0.237, + "step": 19380 + }, + { + "epoch": 0.9, + "learning_rate": 1.712006019469221e-05, + "loss": 0.2092, + "step": 19385 + }, + { + "epoch": 0.9, + "learning_rate": 1.7119276409637424e-05, + "loss": 0.2627, + "step": 19390 + }, + { + "epoch": 0.9, + "learning_rate": 1.7118492624582638e-05, + "loss": 0.3178, + "step": 19395 + }, + { + "epoch": 0.91, + "learning_rate": 1.7117708839527848e-05, + "loss": 0.5211, + "step": 19400 + }, + { + "epoch": 0.91, + "learning_rate": 1.7116925054473062e-05, + "loss": 0.2629, + "step": 19405 + }, + { + "epoch": 0.91, + "learning_rate": 1.7116141269418276e-05, + "loss": 0.0863, + "step": 19410 + }, + { + "epoch": 0.91, + "learning_rate": 1.711535748436349e-05, + "loss": 0.1249, + "step": 19415 + }, + { + "epoch": 0.91, + "learning_rate": 1.7114573699308704e-05, + "loss": 0.1279, + "step": 19420 + }, + { + "epoch": 0.91, + "learning_rate": 1.7113789914253914e-05, + "loss": 0.0951, + "step": 19425 + }, + { + "epoch": 0.91, + "learning_rate": 1.711300612919913e-05, + "loss": 0.1773, + "step": 19430 + }, + { + "epoch": 0.91, + "learning_rate": 1.7112222344144342e-05, + "loss": 0.169, + "step": 19435 + }, + { + "epoch": 0.91, + "learning_rate": 1.7111438559089556e-05, + "loss": 0.1928, + "step": 19440 + }, + { + "epoch": 0.91, + "learning_rate": 1.711065477403477e-05, + "loss": 0.3014, + "step": 19445 + }, + { + "epoch": 0.91, + "learning_rate": 1.7109870988979984e-05, + "loss": 0.5065, + "step": 19450 + }, + { + "epoch": 0.91, + "learning_rate": 1.7109087203925198e-05, + "loss": 0.127, + "step": 19455 + }, + { + "epoch": 0.91, + "learning_rate": 1.710830341887041e-05, + "loss": 0.0899, + "step": 19460 + }, + { + "epoch": 0.91, + "learning_rate": 1.7107519633815622e-05, + "loss": 0.0991, + "step": 19465 + }, + { + "epoch": 0.91, + "learning_rate": 1.710673584876084e-05, + "loss": 0.0852, + "step": 19470 + }, + { + "epoch": 0.91, + "learning_rate": 1.710595206370605e-05, + "loss": 0.1273, + "step": 19475 + }, + { + "epoch": 0.91, + "learning_rate": 1.7105168278651264e-05, + "loss": 0.2085, + "step": 19480 + }, + { + "epoch": 0.91, + "learning_rate": 1.7104384493596478e-05, + "loss": 0.2701, + "step": 19485 + }, + { + "epoch": 0.91, + "learning_rate": 1.710360070854169e-05, + "loss": 0.214, + "step": 19490 + }, + { + "epoch": 0.91, + "learning_rate": 1.7102816923486906e-05, + "loss": 0.3061, + "step": 19495 + }, + { + "epoch": 0.91, + "learning_rate": 1.7102033138432116e-05, + "loss": 0.4987, + "step": 19500 + }, + { + "epoch": 0.91, + "learning_rate": 1.710124935337733e-05, + "loss": 0.2229, + "step": 19505 + }, + { + "epoch": 0.91, + "learning_rate": 1.7100465568322544e-05, + "loss": 0.073, + "step": 19510 + }, + { + "epoch": 0.91, + "learning_rate": 1.7099681783267758e-05, + "loss": 0.0574, + "step": 19515 + }, + { + "epoch": 0.91, + "learning_rate": 1.709889799821297e-05, + "loss": 0.0993, + "step": 19520 + }, + { + "epoch": 0.91, + "learning_rate": 1.7098114213158186e-05, + "loss": 0.1771, + "step": 19525 + }, + { + "epoch": 0.91, + "learning_rate": 1.70973304281034e-05, + "loss": 0.0893, + "step": 19530 + }, + { + "epoch": 0.91, + "learning_rate": 1.7096546643048613e-05, + "loss": 0.1401, + "step": 19535 + }, + { + "epoch": 0.91, + "learning_rate": 1.7095762857993824e-05, + "loss": 0.2401, + "step": 19540 + }, + { + "epoch": 0.91, + "learning_rate": 1.709497907293904e-05, + "loss": 0.2492, + "step": 19545 + }, + { + "epoch": 0.91, + "learning_rate": 1.709419528788425e-05, + "loss": 0.4506, + "step": 19550 + }, + { + "epoch": 0.91, + "learning_rate": 1.7093411502829466e-05, + "loss": 0.1444, + "step": 19555 + }, + { + "epoch": 0.91, + "learning_rate": 1.709262771777468e-05, + "loss": 0.0706, + "step": 19560 + }, + { + "epoch": 0.91, + "learning_rate": 1.709184393271989e-05, + "loss": 0.0977, + "step": 19565 + }, + { + "epoch": 0.91, + "learning_rate": 1.7091060147665107e-05, + "loss": 0.1293, + "step": 19570 + }, + { + "epoch": 0.91, + "learning_rate": 1.7090276362610318e-05, + "loss": 0.1469, + "step": 19575 + }, + { + "epoch": 0.91, + "learning_rate": 1.7089492577555532e-05, + "loss": 0.1896, + "step": 19580 + }, + { + "epoch": 0.91, + "learning_rate": 1.7088708792500746e-05, + "loss": 0.1649, + "step": 19585 + }, + { + "epoch": 0.91, + "learning_rate": 1.708792500744596e-05, + "loss": 0.2219, + "step": 19590 + }, + { + "epoch": 0.91, + "learning_rate": 1.7087141222391173e-05, + "loss": 0.2975, + "step": 19595 + }, + { + "epoch": 0.91, + "learning_rate": 1.7086357437336387e-05, + "loss": 0.7063, + "step": 19600 + }, + { + "epoch": 0.91, + "learning_rate": 1.70855736522816e-05, + "loss": 0.1183, + "step": 19605 + }, + { + "epoch": 0.92, + "learning_rate": 1.7084789867226815e-05, + "loss": 0.0876, + "step": 19610 + }, + { + "epoch": 0.92, + "learning_rate": 1.7084006082172026e-05, + "loss": 0.3809, + "step": 19615 + }, + { + "epoch": 0.92, + "learning_rate": 1.708322229711724e-05, + "loss": 0.1294, + "step": 19620 + }, + { + "epoch": 0.92, + "learning_rate": 1.7082438512062454e-05, + "loss": 0.1697, + "step": 19625 + }, + { + "epoch": 0.92, + "learning_rate": 1.7081654727007667e-05, + "loss": 0.224, + "step": 19630 + }, + { + "epoch": 0.92, + "learning_rate": 1.708087094195288e-05, + "loss": 0.1926, + "step": 19635 + }, + { + "epoch": 0.92, + "learning_rate": 1.7080087156898092e-05, + "loss": 0.295, + "step": 19640 + }, + { + "epoch": 0.92, + "learning_rate": 1.707930337184331e-05, + "loss": 0.3733, + "step": 19645 + }, + { + "epoch": 0.92, + "learning_rate": 1.707851958678852e-05, + "loss": 0.4389, + "step": 19650 + }, + { + "epoch": 0.92, + "learning_rate": 1.7077735801733734e-05, + "loss": 0.1884, + "step": 19655 + }, + { + "epoch": 0.92, + "learning_rate": 1.7076952016678947e-05, + "loss": 0.0577, + "step": 19660 + }, + { + "epoch": 0.92, + "learning_rate": 1.707616823162416e-05, + "loss": 0.0889, + "step": 19665 + }, + { + "epoch": 0.92, + "learning_rate": 1.7075384446569375e-05, + "loss": 0.0989, + "step": 19670 + }, + { + "epoch": 0.92, + "learning_rate": 1.707460066151459e-05, + "loss": 0.1449, + "step": 19675 + }, + { + "epoch": 0.92, + "learning_rate": 1.70738168764598e-05, + "loss": 0.1931, + "step": 19680 + }, + { + "epoch": 0.92, + "learning_rate": 1.7073033091405014e-05, + "loss": 0.1717, + "step": 19685 + }, + { + "epoch": 0.92, + "learning_rate": 1.7072249306350228e-05, + "loss": 0.2924, + "step": 19690 + }, + { + "epoch": 0.92, + "learning_rate": 1.707146552129544e-05, + "loss": 0.2265, + "step": 19695 + }, + { + "epoch": 0.92, + "learning_rate": 1.7070681736240655e-05, + "loss": 0.3034, + "step": 19700 + }, + { + "epoch": 0.92, + "learning_rate": 1.706989795118587e-05, + "loss": 0.2481, + "step": 19705 + }, + { + "epoch": 0.92, + "learning_rate": 1.7069114166131083e-05, + "loss": 0.0638, + "step": 19710 + }, + { + "epoch": 0.92, + "learning_rate": 1.7068330381076294e-05, + "loss": 0.0919, + "step": 19715 + }, + { + "epoch": 0.92, + "learning_rate": 1.7067546596021508e-05, + "loss": 0.095, + "step": 19720 + }, + { + "epoch": 0.92, + "learning_rate": 1.706676281096672e-05, + "loss": 0.1459, + "step": 19725 + }, + { + "epoch": 0.92, + "learning_rate": 1.7065979025911935e-05, + "loss": 0.1144, + "step": 19730 + }, + { + "epoch": 0.92, + "learning_rate": 1.706519524085715e-05, + "loss": 0.2965, + "step": 19735 + }, + { + "epoch": 0.92, + "learning_rate": 1.7064411455802363e-05, + "loss": 0.18, + "step": 19740 + }, + { + "epoch": 0.92, + "learning_rate": 1.7063627670747577e-05, + "loss": 0.4294, + "step": 19745 + }, + { + "epoch": 0.92, + "learning_rate": 1.7062843885692788e-05, + "loss": 0.7633, + "step": 19750 + }, + { + "epoch": 0.92, + "learning_rate": 1.7062060100638e-05, + "loss": 0.1876, + "step": 19755 + }, + { + "epoch": 0.92, + "learning_rate": 1.7061276315583215e-05, + "loss": 0.1402, + "step": 19760 + }, + { + "epoch": 0.92, + "learning_rate": 1.706049253052843e-05, + "loss": 0.0744, + "step": 19765 + }, + { + "epoch": 0.92, + "learning_rate": 1.7059708745473643e-05, + "loss": 0.1534, + "step": 19770 + }, + { + "epoch": 0.92, + "learning_rate": 1.7058924960418857e-05, + "loss": 0.105, + "step": 19775 + }, + { + "epoch": 0.92, + "learning_rate": 1.7058141175364068e-05, + "loss": 0.2081, + "step": 19780 + }, + { + "epoch": 0.92, + "learning_rate": 1.7057357390309285e-05, + "loss": 0.1488, + "step": 19785 + }, + { + "epoch": 0.92, + "learning_rate": 1.7056573605254495e-05, + "loss": 0.2616, + "step": 19790 + }, + { + "epoch": 0.92, + "learning_rate": 1.705578982019971e-05, + "loss": 0.2662, + "step": 19795 + }, + { + "epoch": 0.92, + "learning_rate": 1.7055006035144923e-05, + "loss": 0.3883, + "step": 19800 + }, + { + "epoch": 0.92, + "learning_rate": 1.7054222250090137e-05, + "loss": 0.1609, + "step": 19805 + }, + { + "epoch": 0.92, + "learning_rate": 1.705343846503535e-05, + "loss": 0.0799, + "step": 19810 + }, + { + "epoch": 0.92, + "learning_rate": 1.705265467998056e-05, + "loss": 0.087, + "step": 19815 + }, + { + "epoch": 0.92, + "learning_rate": 1.7051870894925775e-05, + "loss": 0.194, + "step": 19820 + }, + { + "epoch": 0.93, + "learning_rate": 1.705108710987099e-05, + "loss": 0.1103, + "step": 19825 + }, + { + "epoch": 0.93, + "learning_rate": 1.7050303324816203e-05, + "loss": 0.1893, + "step": 19830 + }, + { + "epoch": 0.93, + "learning_rate": 1.7049519539761417e-05, + "loss": 0.2444, + "step": 19835 + }, + { + "epoch": 0.93, + "learning_rate": 1.704873575470663e-05, + "loss": 0.2429, + "step": 19840 + }, + { + "epoch": 0.93, + "learning_rate": 1.7047951969651845e-05, + "loss": 0.3144, + "step": 19845 + }, + { + "epoch": 0.93, + "learning_rate": 1.704716818459706e-05, + "loss": 0.4834, + "step": 19850 + }, + { + "epoch": 0.93, + "learning_rate": 1.704638439954227e-05, + "loss": 0.1951, + "step": 19855 + }, + { + "epoch": 0.93, + "learning_rate": 1.7045600614487487e-05, + "loss": 0.0849, + "step": 19860 + }, + { + "epoch": 0.93, + "learning_rate": 1.7044816829432697e-05, + "loss": 0.1384, + "step": 19865 + }, + { + "epoch": 0.93, + "learning_rate": 1.704403304437791e-05, + "loss": 0.1505, + "step": 19870 + }, + { + "epoch": 0.93, + "learning_rate": 1.7043249259323125e-05, + "loss": 0.154, + "step": 19875 + }, + { + "epoch": 0.93, + "learning_rate": 1.7042465474268336e-05, + "loss": 0.2354, + "step": 19880 + }, + { + "epoch": 0.93, + "learning_rate": 1.7041681689213553e-05, + "loss": 0.2192, + "step": 19885 + }, + { + "epoch": 0.93, + "learning_rate": 1.7040897904158763e-05, + "loss": 0.2718, + "step": 19890 + }, + { + "epoch": 0.93, + "learning_rate": 1.7040114119103977e-05, + "loss": 0.3056, + "step": 19895 + }, + { + "epoch": 0.93, + "learning_rate": 1.703933033404919e-05, + "loss": 0.4475, + "step": 19900 + }, + { + "epoch": 0.93, + "learning_rate": 1.7038546548994405e-05, + "loss": 0.2562, + "step": 19905 + }, + { + "epoch": 0.93, + "learning_rate": 1.703776276393962e-05, + "loss": 0.0562, + "step": 19910 + }, + { + "epoch": 0.93, + "learning_rate": 1.7036978978884833e-05, + "loss": 0.1212, + "step": 19915 + }, + { + "epoch": 0.93, + "learning_rate": 1.7036195193830047e-05, + "loss": 0.0697, + "step": 19920 + }, + { + "epoch": 0.93, + "learning_rate": 1.703541140877526e-05, + "loss": 0.1794, + "step": 19925 + }, + { + "epoch": 0.93, + "learning_rate": 1.703462762372047e-05, + "loss": 0.1976, + "step": 19930 + }, + { + "epoch": 0.93, + "learning_rate": 1.7033843838665685e-05, + "loss": 0.185, + "step": 19935 + }, + { + "epoch": 0.93, + "learning_rate": 1.70330600536109e-05, + "loss": 0.2454, + "step": 19940 + }, + { + "epoch": 0.93, + "learning_rate": 1.7032276268556113e-05, + "loss": 0.1997, + "step": 19945 + }, + { + "epoch": 0.93, + "learning_rate": 1.7031492483501327e-05, + "loss": 0.6297, + "step": 19950 + }, + { + "epoch": 0.93, + "learning_rate": 1.7030708698446537e-05, + "loss": 0.1574, + "step": 19955 + }, + { + "epoch": 0.93, + "learning_rate": 1.7029924913391755e-05, + "loss": 0.066, + "step": 19960 + }, + { + "epoch": 0.93, + "learning_rate": 1.7029141128336965e-05, + "loss": 0.1136, + "step": 19965 + }, + { + "epoch": 0.93, + "learning_rate": 1.702835734328218e-05, + "loss": 0.1132, + "step": 19970 + }, + { + "epoch": 0.93, + "learning_rate": 1.7027573558227393e-05, + "loss": 0.1921, + "step": 19975 + }, + { + "epoch": 0.93, + "learning_rate": 1.7026789773172607e-05, + "loss": 0.1347, + "step": 19980 + }, + { + "epoch": 0.93, + "learning_rate": 1.702600598811782e-05, + "loss": 0.2405, + "step": 19985 + }, + { + "epoch": 0.93, + "learning_rate": 1.7025222203063035e-05, + "loss": 0.2259, + "step": 19990 + }, + { + "epoch": 0.93, + "learning_rate": 1.7024438418008245e-05, + "loss": 0.3136, + "step": 19995 + }, + { + "epoch": 0.93, + "learning_rate": 1.7023654632953463e-05, + "loss": 0.3853, + "step": 20000 + }, + { + "epoch": 0.93, + "learning_rate": 1.7022870847898673e-05, + "loss": 0.2158, + "step": 20005 + }, + { + "epoch": 0.93, + "learning_rate": 1.7022087062843887e-05, + "loss": 0.0802, + "step": 20010 + }, + { + "epoch": 0.93, + "learning_rate": 1.70213032777891e-05, + "loss": 0.0745, + "step": 20015 + }, + { + "epoch": 0.93, + "learning_rate": 1.7020519492734315e-05, + "loss": 0.1353, + "step": 20020 + }, + { + "epoch": 0.93, + "learning_rate": 1.701973570767953e-05, + "loss": 0.1455, + "step": 20025 + }, + { + "epoch": 0.93, + "learning_rate": 1.701895192262474e-05, + "loss": 0.1117, + "step": 20030 + }, + { + "epoch": 0.93, + "learning_rate": 1.7018168137569953e-05, + "loss": 0.1912, + "step": 20035 + }, + { + "epoch": 0.94, + "learning_rate": 1.7017384352515167e-05, + "loss": 0.211, + "step": 20040 + }, + { + "epoch": 0.94, + "learning_rate": 1.701660056746038e-05, + "loss": 0.2398, + "step": 20045 + }, + { + "epoch": 0.94, + "learning_rate": 1.7015816782405595e-05, + "loss": 0.6622, + "step": 20050 + }, + { + "epoch": 0.94, + "learning_rate": 1.701503299735081e-05, + "loss": 0.1905, + "step": 20055 + }, + { + "epoch": 0.94, + "learning_rate": 1.7014249212296023e-05, + "loss": 0.0718, + "step": 20060 + }, + { + "epoch": 0.94, + "learning_rate": 1.7013465427241237e-05, + "loss": 0.0488, + "step": 20065 + }, + { + "epoch": 0.94, + "learning_rate": 1.7012681642186447e-05, + "loss": 0.1622, + "step": 20070 + }, + { + "epoch": 0.94, + "learning_rate": 1.701189785713166e-05, + "loss": 0.1579, + "step": 20075 + }, + { + "epoch": 0.94, + "learning_rate": 1.7011114072076875e-05, + "loss": 0.1167, + "step": 20080 + }, + { + "epoch": 0.94, + "learning_rate": 1.701033028702209e-05, + "loss": 0.1729, + "step": 20085 + }, + { + "epoch": 0.94, + "learning_rate": 1.7009546501967303e-05, + "loss": 0.1643, + "step": 20090 + }, + { + "epoch": 0.94, + "learning_rate": 1.7008762716912513e-05, + "loss": 0.3448, + "step": 20095 + }, + { + "epoch": 0.94, + "learning_rate": 1.700797893185773e-05, + "loss": 0.4274, + "step": 20100 + }, + { + "epoch": 0.94, + "learning_rate": 1.700719514680294e-05, + "loss": 0.1908, + "step": 20105 + }, + { + "epoch": 0.94, + "learning_rate": 1.7006411361748155e-05, + "loss": 0.0368, + "step": 20110 + }, + { + "epoch": 0.94, + "learning_rate": 1.700562757669337e-05, + "loss": 0.0898, + "step": 20115 + }, + { + "epoch": 0.94, + "learning_rate": 1.7004843791638583e-05, + "loss": 0.1293, + "step": 20120 + }, + { + "epoch": 0.94, + "learning_rate": 1.7004060006583797e-05, + "loss": 0.1416, + "step": 20125 + }, + { + "epoch": 0.94, + "learning_rate": 1.700327622152901e-05, + "loss": 0.1573, + "step": 20130 + }, + { + "epoch": 0.94, + "learning_rate": 1.700249243647422e-05, + "loss": 0.1809, + "step": 20135 + }, + { + "epoch": 0.94, + "learning_rate": 1.7001708651419435e-05, + "loss": 0.3192, + "step": 20140 + }, + { + "epoch": 0.94, + "learning_rate": 1.700092486636465e-05, + "loss": 0.2653, + "step": 20145 + }, + { + "epoch": 0.94, + "learning_rate": 1.7000141081309863e-05, + "loss": 0.5466, + "step": 20150 + }, + { + "epoch": 0.94, + "learning_rate": 1.6999357296255077e-05, + "loss": 0.1971, + "step": 20155 + }, + { + "epoch": 0.94, + "learning_rate": 1.699857351120029e-05, + "loss": 0.1222, + "step": 20160 + }, + { + "epoch": 0.94, + "learning_rate": 1.6997789726145505e-05, + "loss": 0.1325, + "step": 20165 + }, + { + "epoch": 0.94, + "learning_rate": 1.6997005941090715e-05, + "loss": 0.0593, + "step": 20170 + }, + { + "epoch": 0.94, + "learning_rate": 1.6996222156035932e-05, + "loss": 0.1517, + "step": 20175 + }, + { + "epoch": 0.94, + "learning_rate": 1.6995438370981143e-05, + "loss": 0.1367, + "step": 20180 + }, + { + "epoch": 0.94, + "learning_rate": 1.6994654585926357e-05, + "loss": 0.2145, + "step": 20185 + }, + { + "epoch": 0.94, + "learning_rate": 1.699387080087157e-05, + "loss": 0.2504, + "step": 20190 + }, + { + "epoch": 0.94, + "learning_rate": 1.6993087015816785e-05, + "loss": 0.3664, + "step": 20195 + }, + { + "epoch": 0.94, + "learning_rate": 1.6992303230762e-05, + "loss": 0.4137, + "step": 20200 + }, + { + "epoch": 0.94, + "learning_rate": 1.699151944570721e-05, + "loss": 0.2094, + "step": 20205 + }, + { + "epoch": 0.94, + "learning_rate": 1.6990735660652423e-05, + "loss": 0.0852, + "step": 20210 + }, + { + "epoch": 0.94, + "learning_rate": 1.6989951875597637e-05, + "loss": 0.0603, + "step": 20215 + }, + { + "epoch": 0.94, + "learning_rate": 1.698916809054285e-05, + "loss": 0.161, + "step": 20220 + }, + { + "epoch": 0.94, + "learning_rate": 1.6988384305488065e-05, + "loss": 0.106, + "step": 20225 + }, + { + "epoch": 0.94, + "learning_rate": 1.698760052043328e-05, + "loss": 0.1538, + "step": 20230 + }, + { + "epoch": 0.94, + "learning_rate": 1.6986816735378492e-05, + "loss": 0.1602, + "step": 20235 + }, + { + "epoch": 0.94, + "learning_rate": 1.6986032950323706e-05, + "loss": 0.2877, + "step": 20240 + }, + { + "epoch": 0.94, + "learning_rate": 1.6985249165268917e-05, + "loss": 0.4337, + "step": 20245 + }, + { + "epoch": 0.94, + "learning_rate": 1.698446538021413e-05, + "loss": 0.6768, + "step": 20250 + }, + { + "epoch": 0.95, + "learning_rate": 1.6983681595159345e-05, + "loss": 0.2147, + "step": 20255 + }, + { + "epoch": 0.95, + "learning_rate": 1.698289781010456e-05, + "loss": 0.1292, + "step": 20260 + }, + { + "epoch": 0.95, + "learning_rate": 1.6982114025049772e-05, + "loss": 0.1835, + "step": 20265 + }, + { + "epoch": 0.95, + "learning_rate": 1.6981330239994983e-05, + "loss": 0.1112, + "step": 20270 + }, + { + "epoch": 0.95, + "learning_rate": 1.69805464549402e-05, + "loss": 0.1764, + "step": 20275 + }, + { + "epoch": 0.95, + "learning_rate": 1.697976266988541e-05, + "loss": 0.1477, + "step": 20280 + }, + { + "epoch": 0.95, + "learning_rate": 1.6978978884830625e-05, + "loss": 0.257, + "step": 20285 + }, + { + "epoch": 0.95, + "learning_rate": 1.697819509977584e-05, + "loss": 0.2419, + "step": 20290 + }, + { + "epoch": 0.95, + "learning_rate": 1.6977411314721053e-05, + "loss": 0.2276, + "step": 20295 + }, + { + "epoch": 0.95, + "learning_rate": 1.6976627529666266e-05, + "loss": 0.3961, + "step": 20300 + }, + { + "epoch": 0.95, + "learning_rate": 1.697584374461148e-05, + "loss": 0.3068, + "step": 20305 + }, + { + "epoch": 0.95, + "learning_rate": 1.697505995955669e-05, + "loss": 0.0723, + "step": 20310 + }, + { + "epoch": 0.95, + "learning_rate": 1.6974276174501908e-05, + "loss": 0.0967, + "step": 20315 + }, + { + "epoch": 0.95, + "learning_rate": 1.697349238944712e-05, + "loss": 0.1151, + "step": 20320 + }, + { + "epoch": 0.95, + "learning_rate": 1.6972708604392333e-05, + "loss": 0.1615, + "step": 20325 + }, + { + "epoch": 0.95, + "learning_rate": 1.6971924819337546e-05, + "loss": 0.219, + "step": 20330 + }, + { + "epoch": 0.95, + "learning_rate": 1.697114103428276e-05, + "loss": 0.2574, + "step": 20335 + }, + { + "epoch": 0.95, + "learning_rate": 1.6970357249227974e-05, + "loss": 0.2705, + "step": 20340 + }, + { + "epoch": 0.95, + "learning_rate": 1.6969573464173185e-05, + "loss": 0.2993, + "step": 20345 + }, + { + "epoch": 0.95, + "learning_rate": 1.69687896791184e-05, + "loss": 0.6279, + "step": 20350 + }, + { + "epoch": 0.95, + "learning_rate": 1.6968005894063613e-05, + "loss": 0.1919, + "step": 20355 + }, + { + "epoch": 0.95, + "learning_rate": 1.6967222109008826e-05, + "loss": 0.0439, + "step": 20360 + }, + { + "epoch": 0.95, + "learning_rate": 1.696643832395404e-05, + "loss": 0.0789, + "step": 20365 + }, + { + "epoch": 0.95, + "learning_rate": 1.6965654538899254e-05, + "loss": 0.1537, + "step": 20370 + }, + { + "epoch": 0.95, + "learning_rate": 1.6964870753844468e-05, + "loss": 0.0927, + "step": 20375 + }, + { + "epoch": 0.95, + "learning_rate": 1.6964086968789682e-05, + "loss": 0.1653, + "step": 20380 + }, + { + "epoch": 0.95, + "learning_rate": 1.6963303183734893e-05, + "loss": 0.2245, + "step": 20385 + }, + { + "epoch": 0.95, + "learning_rate": 1.696251939868011e-05, + "loss": 0.2526, + "step": 20390 + }, + { + "epoch": 0.95, + "learning_rate": 1.696173561362532e-05, + "loss": 0.2687, + "step": 20395 + }, + { + "epoch": 0.95, + "learning_rate": 1.6960951828570534e-05, + "loss": 0.6334, + "step": 20400 + }, + { + "epoch": 0.95, + "learning_rate": 1.6960168043515748e-05, + "loss": 0.1858, + "step": 20405 + }, + { + "epoch": 0.95, + "learning_rate": 1.695938425846096e-05, + "loss": 0.0953, + "step": 20410 + }, + { + "epoch": 0.95, + "learning_rate": 1.6958600473406176e-05, + "loss": 0.1384, + "step": 20415 + }, + { + "epoch": 0.95, + "learning_rate": 1.6957816688351387e-05, + "loss": 0.1128, + "step": 20420 + }, + { + "epoch": 0.95, + "learning_rate": 1.69570329032966e-05, + "loss": 0.1004, + "step": 20425 + }, + { + "epoch": 0.95, + "learning_rate": 1.6956249118241814e-05, + "loss": 0.1117, + "step": 20430 + }, + { + "epoch": 0.95, + "learning_rate": 1.695546533318703e-05, + "loss": 0.2534, + "step": 20435 + }, + { + "epoch": 0.95, + "learning_rate": 1.6954681548132242e-05, + "loss": 0.23, + "step": 20440 + }, + { + "epoch": 0.95, + "learning_rate": 1.6953897763077456e-05, + "loss": 0.2366, + "step": 20445 + }, + { + "epoch": 0.95, + "learning_rate": 1.695311397802267e-05, + "loss": 0.4956, + "step": 20450 + }, + { + "epoch": 0.95, + "learning_rate": 1.6952330192967884e-05, + "loss": 0.2167, + "step": 20455 + }, + { + "epoch": 0.95, + "learning_rate": 1.6951546407913094e-05, + "loss": 0.0958, + "step": 20460 + }, + { + "epoch": 0.95, + "learning_rate": 1.695076262285831e-05, + "loss": 0.0566, + "step": 20465 + }, + { + "epoch": 0.96, + "learning_rate": 1.6949978837803522e-05, + "loss": 0.1221, + "step": 20470 + }, + { + "epoch": 0.96, + "learning_rate": 1.6949195052748736e-05, + "loss": 0.0884, + "step": 20475 + }, + { + "epoch": 0.96, + "learning_rate": 1.694841126769395e-05, + "loss": 0.2541, + "step": 20480 + }, + { + "epoch": 0.96, + "learning_rate": 1.694762748263916e-05, + "loss": 0.2203, + "step": 20485 + }, + { + "epoch": 0.96, + "learning_rate": 1.6946843697584378e-05, + "loss": 0.1863, + "step": 20490 + }, + { + "epoch": 0.96, + "learning_rate": 1.694605991252959e-05, + "loss": 0.2929, + "step": 20495 + }, + { + "epoch": 0.96, + "learning_rate": 1.6945276127474802e-05, + "loss": 0.271, + "step": 20500 + }, + { + "epoch": 0.96, + "learning_rate": 1.6944492342420016e-05, + "loss": 0.1831, + "step": 20505 + }, + { + "epoch": 0.96, + "learning_rate": 1.694370855736523e-05, + "loss": 0.0501, + "step": 20510 + }, + { + "epoch": 0.96, + "learning_rate": 1.6942924772310444e-05, + "loss": 0.1323, + "step": 20515 + }, + { + "epoch": 0.96, + "learning_rate": 1.6942140987255658e-05, + "loss": 0.2169, + "step": 20520 + }, + { + "epoch": 0.96, + "learning_rate": 1.694135720220087e-05, + "loss": 0.1172, + "step": 20525 + }, + { + "epoch": 0.96, + "learning_rate": 1.6940573417146082e-05, + "loss": 0.2164, + "step": 20530 + }, + { + "epoch": 0.96, + "learning_rate": 1.6939789632091296e-05, + "loss": 0.1629, + "step": 20535 + }, + { + "epoch": 0.96, + "learning_rate": 1.693900584703651e-05, + "loss": 0.2959, + "step": 20540 + }, + { + "epoch": 0.96, + "learning_rate": 1.6938222061981724e-05, + "loss": 0.3087, + "step": 20545 + }, + { + "epoch": 0.96, + "learning_rate": 1.6937438276926938e-05, + "loss": 0.7987, + "step": 20550 + }, + { + "epoch": 0.96, + "learning_rate": 1.6936654491872152e-05, + "loss": 0.2158, + "step": 20555 + }, + { + "epoch": 0.96, + "learning_rate": 1.6935870706817362e-05, + "loss": 0.0551, + "step": 20560 + }, + { + "epoch": 0.96, + "learning_rate": 1.6935086921762576e-05, + "loss": 0.0877, + "step": 20565 + }, + { + "epoch": 0.96, + "learning_rate": 1.693430313670779e-05, + "loss": 0.1039, + "step": 20570 + }, + { + "epoch": 0.96, + "learning_rate": 1.6933519351653004e-05, + "loss": 0.1547, + "step": 20575 + }, + { + "epoch": 0.96, + "learning_rate": 1.6932735566598218e-05, + "loss": 0.167, + "step": 20580 + }, + { + "epoch": 0.96, + "learning_rate": 1.6931951781543432e-05, + "loss": 0.1186, + "step": 20585 + }, + { + "epoch": 0.96, + "learning_rate": 1.6931167996488646e-05, + "loss": 0.1959, + "step": 20590 + }, + { + "epoch": 0.96, + "learning_rate": 1.6930384211433856e-05, + "loss": 0.1781, + "step": 20595 + }, + { + "epoch": 0.96, + "learning_rate": 1.692960042637907e-05, + "loss": 0.4715, + "step": 20600 + }, + { + "epoch": 0.96, + "learning_rate": 1.6928816641324284e-05, + "loss": 0.2431, + "step": 20605 + }, + { + "epoch": 0.96, + "learning_rate": 1.6928032856269498e-05, + "loss": 0.0756, + "step": 20610 + }, + { + "epoch": 0.96, + "learning_rate": 1.6927249071214712e-05, + "loss": 0.0938, + "step": 20615 + }, + { + "epoch": 0.96, + "learning_rate": 1.6926465286159926e-05, + "loss": 0.1711, + "step": 20620 + }, + { + "epoch": 0.96, + "learning_rate": 1.6925681501105136e-05, + "loss": 0.1379, + "step": 20625 + }, + { + "epoch": 0.96, + "learning_rate": 1.6924897716050354e-05, + "loss": 0.1878, + "step": 20630 + }, + { + "epoch": 0.96, + "learning_rate": 1.6924113930995564e-05, + "loss": 0.2055, + "step": 20635 + }, + { + "epoch": 0.96, + "learning_rate": 1.6923330145940778e-05, + "loss": 0.1536, + "step": 20640 + }, + { + "epoch": 0.96, + "learning_rate": 1.6922546360885992e-05, + "loss": 0.2507, + "step": 20645 + }, + { + "epoch": 0.96, + "learning_rate": 1.6921762575831206e-05, + "loss": 0.4966, + "step": 20650 + }, + { + "epoch": 0.96, + "learning_rate": 1.692097879077642e-05, + "loss": 0.1616, + "step": 20655 + }, + { + "epoch": 0.96, + "learning_rate": 1.692019500572163e-05, + "loss": 0.0673, + "step": 20660 + }, + { + "epoch": 0.96, + "learning_rate": 1.6919411220666844e-05, + "loss": 0.0905, + "step": 20665 + }, + { + "epoch": 0.96, + "learning_rate": 1.6918627435612058e-05, + "loss": 0.1219, + "step": 20670 + }, + { + "epoch": 0.96, + "learning_rate": 1.6917843650557272e-05, + "loss": 0.0941, + "step": 20675 + }, + { + "epoch": 0.96, + "learning_rate": 1.6917059865502486e-05, + "loss": 0.1362, + "step": 20680 + }, + { + "epoch": 0.97, + "learning_rate": 1.69162760804477e-05, + "loss": 0.1788, + "step": 20685 + }, + { + "epoch": 0.97, + "learning_rate": 1.6915492295392914e-05, + "loss": 0.2372, + "step": 20690 + }, + { + "epoch": 0.97, + "learning_rate": 1.6914708510338128e-05, + "loss": 0.2672, + "step": 20695 + }, + { + "epoch": 0.97, + "learning_rate": 1.6913924725283338e-05, + "loss": 0.409, + "step": 20700 + }, + { + "epoch": 0.97, + "learning_rate": 1.6913140940228556e-05, + "loss": 0.1966, + "step": 20705 + }, + { + "epoch": 0.97, + "learning_rate": 1.6912357155173766e-05, + "loss": 0.0573, + "step": 20710 + }, + { + "epoch": 0.97, + "learning_rate": 1.691157337011898e-05, + "loss": 0.0683, + "step": 20715 + }, + { + "epoch": 0.97, + "learning_rate": 1.6910789585064194e-05, + "loss": 0.0474, + "step": 20720 + }, + { + "epoch": 0.97, + "learning_rate": 1.6910005800009404e-05, + "loss": 0.0682, + "step": 20725 + }, + { + "epoch": 0.97, + "learning_rate": 1.690922201495462e-05, + "loss": 0.1241, + "step": 20730 + }, + { + "epoch": 0.97, + "learning_rate": 1.6908438229899832e-05, + "loss": 0.1928, + "step": 20735 + }, + { + "epoch": 0.97, + "learning_rate": 1.6907654444845046e-05, + "loss": 0.23, + "step": 20740 + }, + { + "epoch": 0.97, + "learning_rate": 1.690687065979026e-05, + "loss": 0.3035, + "step": 20745 + }, + { + "epoch": 0.97, + "learning_rate": 1.6906086874735474e-05, + "loss": 0.443, + "step": 20750 + }, + { + "epoch": 0.97, + "learning_rate": 1.6905303089680688e-05, + "loss": 0.2238, + "step": 20755 + }, + { + "epoch": 0.97, + "learning_rate": 1.69045193046259e-05, + "loss": 0.0642, + "step": 20760 + }, + { + "epoch": 0.97, + "learning_rate": 1.6903735519571116e-05, + "loss": 0.0975, + "step": 20765 + }, + { + "epoch": 0.97, + "learning_rate": 1.690295173451633e-05, + "loss": 0.1374, + "step": 20770 + }, + { + "epoch": 0.97, + "learning_rate": 1.690216794946154e-05, + "loss": 0.1851, + "step": 20775 + }, + { + "epoch": 0.97, + "learning_rate": 1.6901384164406754e-05, + "loss": 0.1809, + "step": 20780 + }, + { + "epoch": 0.97, + "learning_rate": 1.6900600379351968e-05, + "loss": 0.1576, + "step": 20785 + }, + { + "epoch": 0.97, + "learning_rate": 1.6899816594297182e-05, + "loss": 0.2686, + "step": 20790 + }, + { + "epoch": 0.97, + "learning_rate": 1.6899032809242396e-05, + "loss": 0.241, + "step": 20795 + }, + { + "epoch": 0.97, + "learning_rate": 1.6898249024187606e-05, + "loss": 0.6042, + "step": 20800 + }, + { + "epoch": 0.97, + "learning_rate": 1.6897465239132823e-05, + "loss": 0.1927, + "step": 20805 + }, + { + "epoch": 0.97, + "learning_rate": 1.6896681454078034e-05, + "loss": 0.1114, + "step": 20810 + }, + { + "epoch": 0.97, + "learning_rate": 1.6895897669023248e-05, + "loss": 0.0992, + "step": 20815 + }, + { + "epoch": 0.97, + "learning_rate": 1.6895113883968462e-05, + "loss": 0.1313, + "step": 20820 + }, + { + "epoch": 0.97, + "learning_rate": 1.6894330098913676e-05, + "loss": 0.1349, + "step": 20825 + }, + { + "epoch": 0.97, + "learning_rate": 1.689354631385889e-05, + "loss": 0.0776, + "step": 20830 + }, + { + "epoch": 0.97, + "learning_rate": 1.6892762528804104e-05, + "loss": 0.2206, + "step": 20835 + }, + { + "epoch": 0.97, + "learning_rate": 1.6891978743749314e-05, + "loss": 0.2008, + "step": 20840 + }, + { + "epoch": 0.97, + "learning_rate": 1.689119495869453e-05, + "loss": 0.1772, + "step": 20845 + }, + { + "epoch": 0.97, + "learning_rate": 1.6890411173639742e-05, + "loss": 0.4666, + "step": 20850 + }, + { + "epoch": 0.97, + "learning_rate": 1.6889627388584956e-05, + "loss": 0.1922, + "step": 20855 + }, + { + "epoch": 0.97, + "learning_rate": 1.688884360353017e-05, + "loss": 0.0489, + "step": 20860 + }, + { + "epoch": 0.97, + "learning_rate": 1.6888059818475384e-05, + "loss": 0.1144, + "step": 20865 + }, + { + "epoch": 0.97, + "learning_rate": 1.6887276033420597e-05, + "loss": 0.1017, + "step": 20870 + }, + { + "epoch": 0.97, + "learning_rate": 1.6886492248365808e-05, + "loss": 0.1164, + "step": 20875 + }, + { + "epoch": 0.97, + "learning_rate": 1.6885708463311022e-05, + "loss": 0.1358, + "step": 20880 + }, + { + "epoch": 0.97, + "learning_rate": 1.6884924678256236e-05, + "loss": 0.1768, + "step": 20885 + }, + { + "epoch": 0.97, + "learning_rate": 1.688414089320145e-05, + "loss": 0.2551, + "step": 20890 + }, + { + "epoch": 0.97, + "learning_rate": 1.6883357108146664e-05, + "loss": 0.3179, + "step": 20895 + }, + { + "epoch": 0.98, + "learning_rate": 1.6882573323091878e-05, + "loss": 0.5792, + "step": 20900 + }, + { + "epoch": 0.98, + "learning_rate": 1.688178953803709e-05, + "loss": 0.197, + "step": 20905 + }, + { + "epoch": 0.98, + "learning_rate": 1.6881005752982305e-05, + "loss": 0.1008, + "step": 20910 + }, + { + "epoch": 0.98, + "learning_rate": 1.6880221967927516e-05, + "loss": 0.1025, + "step": 20915 + }, + { + "epoch": 0.98, + "learning_rate": 1.687943818287273e-05, + "loss": 0.1174, + "step": 20920 + }, + { + "epoch": 0.98, + "learning_rate": 1.6878654397817944e-05, + "loss": 0.1236, + "step": 20925 + }, + { + "epoch": 0.98, + "learning_rate": 1.6877870612763158e-05, + "loss": 0.2077, + "step": 20930 + }, + { + "epoch": 0.98, + "learning_rate": 1.687708682770837e-05, + "loss": 0.2177, + "step": 20935 + }, + { + "epoch": 0.98, + "learning_rate": 1.6876303042653582e-05, + "loss": 0.2288, + "step": 20940 + }, + { + "epoch": 0.98, + "learning_rate": 1.68755192575988e-05, + "loss": 0.3481, + "step": 20945 + }, + { + "epoch": 0.98, + "learning_rate": 1.687473547254401e-05, + "loss": 0.4198, + "step": 20950 + }, + { + "epoch": 0.98, + "learning_rate": 1.6873951687489224e-05, + "loss": 0.2261, + "step": 20955 + }, + { + "epoch": 0.98, + "learning_rate": 1.6873167902434438e-05, + "loss": 0.0703, + "step": 20960 + }, + { + "epoch": 0.98, + "learning_rate": 1.687238411737965e-05, + "loss": 0.0709, + "step": 20965 + }, + { + "epoch": 0.98, + "learning_rate": 1.6871600332324865e-05, + "loss": 0.1438, + "step": 20970 + }, + { + "epoch": 0.98, + "learning_rate": 1.687081654727008e-05, + "loss": 0.1581, + "step": 20975 + }, + { + "epoch": 0.98, + "learning_rate": 1.687003276221529e-05, + "loss": 0.1882, + "step": 20980 + }, + { + "epoch": 0.98, + "learning_rate": 1.6869248977160504e-05, + "loss": 0.1306, + "step": 20985 + }, + { + "epoch": 0.98, + "learning_rate": 1.6868465192105718e-05, + "loss": 0.3843, + "step": 20990 + }, + { + "epoch": 0.98, + "learning_rate": 1.686768140705093e-05, + "loss": 0.2341, + "step": 20995 + }, + { + "epoch": 0.98, + "learning_rate": 1.6866897621996145e-05, + "loss": 0.5793, + "step": 21000 + }, + { + "epoch": 0.98, + "learning_rate": 1.686611383694136e-05, + "loss": 0.1926, + "step": 21005 + }, + { + "epoch": 0.98, + "learning_rate": 1.6865330051886573e-05, + "loss": 0.0443, + "step": 21010 + }, + { + "epoch": 0.98, + "learning_rate": 1.6864546266831784e-05, + "loss": 0.0993, + "step": 21015 + }, + { + "epoch": 0.98, + "learning_rate": 1.6863762481777e-05, + "loss": 0.1558, + "step": 21020 + }, + { + "epoch": 0.98, + "learning_rate": 1.686297869672221e-05, + "loss": 0.1095, + "step": 21025 + }, + { + "epoch": 0.98, + "learning_rate": 1.6862194911667425e-05, + "loss": 0.1696, + "step": 21030 + }, + { + "epoch": 0.98, + "learning_rate": 1.686141112661264e-05, + "loss": 0.1484, + "step": 21035 + }, + { + "epoch": 0.98, + "learning_rate": 1.6860627341557853e-05, + "loss": 0.3471, + "step": 21040 + }, + { + "epoch": 0.98, + "learning_rate": 1.6859843556503067e-05, + "loss": 0.2733, + "step": 21045 + }, + { + "epoch": 0.98, + "learning_rate": 1.6859059771448278e-05, + "loss": 0.4663, + "step": 21050 + }, + { + "epoch": 0.98, + "learning_rate": 1.685827598639349e-05, + "loss": 0.2136, + "step": 21055 + }, + { + "epoch": 0.98, + "learning_rate": 1.6857492201338706e-05, + "loss": 0.0273, + "step": 21060 + }, + { + "epoch": 0.98, + "learning_rate": 1.685670841628392e-05, + "loss": 0.1179, + "step": 21065 + }, + { + "epoch": 0.98, + "learning_rate": 1.6855924631229133e-05, + "loss": 0.1099, + "step": 21070 + }, + { + "epoch": 0.98, + "learning_rate": 1.6855140846174347e-05, + "loss": 0.1182, + "step": 21075 + }, + { + "epoch": 0.98, + "learning_rate": 1.685435706111956e-05, + "loss": 0.1853, + "step": 21080 + }, + { + "epoch": 0.98, + "learning_rate": 1.6853573276064775e-05, + "loss": 0.0739, + "step": 21085 + }, + { + "epoch": 0.98, + "learning_rate": 1.6852789491009986e-05, + "loss": 0.2085, + "step": 21090 + }, + { + "epoch": 0.98, + "learning_rate": 1.68520057059552e-05, + "loss": 0.2557, + "step": 21095 + }, + { + "epoch": 0.98, + "learning_rate": 1.6851221920900413e-05, + "loss": 0.5123, + "step": 21100 + }, + { + "epoch": 0.98, + "learning_rate": 1.6850438135845627e-05, + "loss": 0.2266, + "step": 21105 + }, + { + "epoch": 0.99, + "learning_rate": 1.684965435079084e-05, + "loss": 0.0615, + "step": 21110 + }, + { + "epoch": 0.99, + "learning_rate": 1.6848870565736052e-05, + "loss": 0.0517, + "step": 21115 + }, + { + "epoch": 0.99, + "learning_rate": 1.684808678068127e-05, + "loss": 0.0945, + "step": 21120 + }, + { + "epoch": 0.99, + "learning_rate": 1.684730299562648e-05, + "loss": 0.1399, + "step": 21125 + }, + { + "epoch": 0.99, + "learning_rate": 1.6846519210571693e-05, + "loss": 0.1547, + "step": 21130 + }, + { + "epoch": 0.99, + "learning_rate": 1.6845735425516907e-05, + "loss": 0.2386, + "step": 21135 + }, + { + "epoch": 0.99, + "learning_rate": 1.684495164046212e-05, + "loss": 0.2613, + "step": 21140 + }, + { + "epoch": 0.99, + "learning_rate": 1.6844167855407335e-05, + "loss": 0.3378, + "step": 21145 + }, + { + "epoch": 0.99, + "learning_rate": 1.684338407035255e-05, + "loss": 0.5164, + "step": 21150 + }, + { + "epoch": 0.99, + "learning_rate": 1.684260028529776e-05, + "loss": 0.1966, + "step": 21155 + }, + { + "epoch": 0.99, + "learning_rate": 1.6841816500242977e-05, + "loss": 0.0675, + "step": 21160 + }, + { + "epoch": 0.99, + "learning_rate": 1.6841032715188187e-05, + "loss": 0.1247, + "step": 21165 + }, + { + "epoch": 0.99, + "learning_rate": 1.68402489301334e-05, + "loss": 0.1262, + "step": 21170 + }, + { + "epoch": 0.99, + "learning_rate": 1.6839465145078615e-05, + "loss": 0.1284, + "step": 21175 + }, + { + "epoch": 0.99, + "learning_rate": 1.683868136002383e-05, + "loss": 0.2704, + "step": 21180 + }, + { + "epoch": 0.99, + "learning_rate": 1.6837897574969043e-05, + "loss": 0.1451, + "step": 21185 + }, + { + "epoch": 0.99, + "learning_rate": 1.6837113789914254e-05, + "loss": 0.3121, + "step": 21190 + }, + { + "epoch": 0.99, + "learning_rate": 1.6836330004859467e-05, + "loss": 0.3131, + "step": 21195 + }, + { + "epoch": 0.99, + "learning_rate": 1.683554621980468e-05, + "loss": 0.6027, + "step": 21200 + }, + { + "epoch": 0.99, + "learning_rate": 1.6834762434749895e-05, + "loss": 0.2483, + "step": 21205 + }, + { + "epoch": 0.99, + "learning_rate": 1.683397864969511e-05, + "loss": 0.0526, + "step": 21210 + }, + { + "epoch": 0.99, + "learning_rate": 1.6833194864640323e-05, + "loss": 0.1629, + "step": 21215 + }, + { + "epoch": 0.99, + "learning_rate": 1.6832411079585537e-05, + "loss": 0.1384, + "step": 21220 + }, + { + "epoch": 0.99, + "learning_rate": 1.683162729453075e-05, + "loss": 0.1629, + "step": 21225 + }, + { + "epoch": 0.99, + "learning_rate": 1.683084350947596e-05, + "loss": 0.1871, + "step": 21230 + }, + { + "epoch": 0.99, + "learning_rate": 1.683005972442118e-05, + "loss": 0.2585, + "step": 21235 + }, + { + "epoch": 0.99, + "learning_rate": 1.682927593936639e-05, + "loss": 0.3094, + "step": 21240 + }, + { + "epoch": 0.99, + "learning_rate": 1.6828492154311603e-05, + "loss": 0.275, + "step": 21245 + }, + { + "epoch": 0.99, + "learning_rate": 1.6827708369256817e-05, + "loss": 0.4019, + "step": 21250 + }, + { + "epoch": 0.99, + "learning_rate": 1.6826924584202028e-05, + "loss": 0.2128, + "step": 21255 + }, + { + "epoch": 0.99, + "learning_rate": 1.6826140799147245e-05, + "loss": 0.0263, + "step": 21260 + }, + { + "epoch": 0.99, + "learning_rate": 1.6825357014092455e-05, + "loss": 0.1624, + "step": 21265 + }, + { + "epoch": 0.99, + "learning_rate": 1.682457322903767e-05, + "loss": 0.0927, + "step": 21270 + }, + { + "epoch": 0.99, + "learning_rate": 1.6823789443982883e-05, + "loss": 0.1146, + "step": 21275 + }, + { + "epoch": 0.99, + "learning_rate": 1.6823005658928097e-05, + "loss": 0.1037, + "step": 21280 + }, + { + "epoch": 0.99, + "learning_rate": 1.682222187387331e-05, + "loss": 0.1416, + "step": 21285 + }, + { + "epoch": 0.99, + "learning_rate": 1.6821438088818525e-05, + "loss": 0.2463, + "step": 21290 + }, + { + "epoch": 0.99, + "learning_rate": 1.6820654303763735e-05, + "loss": 0.3664, + "step": 21295 + }, + { + "epoch": 0.99, + "learning_rate": 1.6819870518708953e-05, + "loss": 0.5594, + "step": 21300 + }, + { + "epoch": 0.99, + "learning_rate": 1.6819086733654163e-05, + "loss": 0.1797, + "step": 21305 + }, + { + "epoch": 0.99, + "learning_rate": 1.6818302948599377e-05, + "loss": 0.1078, + "step": 21310 + }, + { + "epoch": 0.99, + "learning_rate": 1.681751916354459e-05, + "loss": 0.0942, + "step": 21315 + }, + { + "epoch": 0.99, + "learning_rate": 1.6816735378489805e-05, + "loss": 0.0715, + "step": 21320 + }, + { + "epoch": 1.0, + "learning_rate": 1.681595159343502e-05, + "loss": 0.0956, + "step": 21325 + }, + { + "epoch": 1.0, + "learning_rate": 1.681516780838023e-05, + "loss": 0.1574, + "step": 21330 + }, + { + "epoch": 1.0, + "learning_rate": 1.6814384023325447e-05, + "loss": 0.1546, + "step": 21335 + }, + { + "epoch": 1.0, + "learning_rate": 1.6813600238270657e-05, + "loss": 0.3196, + "step": 21340 + }, + { + "epoch": 1.0, + "learning_rate": 1.681281645321587e-05, + "loss": 0.3463, + "step": 21345 + }, + { + "epoch": 1.0, + "learning_rate": 1.6812032668161085e-05, + "loss": 0.515, + "step": 21350 + }, + { + "epoch": 1.0, + "learning_rate": 1.68112488831063e-05, + "loss": 0.1254, + "step": 21355 + }, + { + "epoch": 1.0, + "learning_rate": 1.6810465098051513e-05, + "loss": 0.1172, + "step": 21360 + }, + { + "epoch": 1.0, + "learning_rate": 1.6809681312996727e-05, + "loss": 0.081, + "step": 21365 + }, + { + "epoch": 1.0, + "learning_rate": 1.6808897527941937e-05, + "loss": 0.1016, + "step": 21370 + }, + { + "epoch": 1.0, + "learning_rate": 1.680811374288715e-05, + "loss": 0.133, + "step": 21375 + }, + { + "epoch": 1.0, + "learning_rate": 1.6807329957832365e-05, + "loss": 0.1478, + "step": 21380 + }, + { + "epoch": 1.0, + "learning_rate": 1.680654617277758e-05, + "loss": 0.2154, + "step": 21385 + }, + { + "epoch": 1.0, + "learning_rate": 1.6805762387722793e-05, + "loss": 0.2109, + "step": 21390 + }, + { + "epoch": 1.0, + "learning_rate": 1.6804978602668007e-05, + "loss": 0.5048, + "step": 21395 + }, + { + "epoch": 1.0, + "learning_rate": 1.680419481761322e-05, + "loss": 0.5279, + "step": 21400 + }, + { + "epoch": 1.0, + "learning_rate": 1.680341103255843e-05, + "loss": 0.1696, + "step": 21405 + }, + { + "epoch": 1.0, + "learning_rate": 1.6802627247503645e-05, + "loss": 0.0934, + "step": 21410 + }, + { + "epoch": 1.0, + "learning_rate": 1.680184346244886e-05, + "loss": 0.1172, + "step": 21415 + }, + { + "epoch": 1.0, + "learning_rate": 1.6801059677394073e-05, + "loss": 0.1691, + "step": 21420 + }, + { + "epoch": 1.0, + "learning_rate": 1.6800275892339287e-05, + "loss": 0.2184, + "step": 21425 + }, + { + "epoch": 1.0, + "learning_rate": 1.67994921072845e-05, + "loss": 0.3285, + "step": 21430 + }, + { + "epoch": 1.0, + "eval_cer": 0.019780983363940314, + "eval_loss": 0.6674277186393738, + "eval_runtime": 457.5932, + "eval_samples_per_second": 41.631, + "eval_steps_per_second": 5.205, + "eval_wer": 0.16735751295336787, + "step": 21431 + } + ], + "max_steps": 128586, + "num_train_epochs": 6, + "total_flos": 2.573133119811064e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-21431/training_args.bin b/checkpoint-21431/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..36cc7cb27194c4763ad57ba9f820c49b1d0a2bcf --- /dev/null +++ b/checkpoint-21431/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35a655ca2fa82ac80a7162e5149caad102a189b97deb1fba1f94f21e15657a07 +size 3055 diff --git a/checkpoint-42862/config.json b/checkpoint-42862/config.json new file mode 100644 index 0000000000000000000000000000000000000000..382a3e79497e514ac876eee8114c7079c255a204 --- /dev/null +++ b/checkpoint-42862/config.json @@ -0,0 +1,109 @@ +{ + "_name_or_path": "facebook/wav2vec2-base-960h", + "activation_dropout": 0.1, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForCTC" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "group", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.1, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 12, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 12, + "num_negatives": 100, + "output_hidden_size": 768, + "pad_token_id": 0, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} diff --git a/checkpoint-42862/optimizer.pt b/checkpoint-42862/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..088fb06cd9836397a6e23dbfb686c56fc64b50ab --- /dev/null +++ b/checkpoint-42862/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:429b27618e08493c42ebe65dd9b2ae4961fb565a564109bc98bebe09b543af65 +size 1847481 diff --git a/checkpoint-42862/preprocessor_config.json b/checkpoint-42862/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a0b7227fc1d916e469b14f6c154ad6dfea1e6891 --- /dev/null +++ b/checkpoint-42862/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-42862/pytorch_model.bin b/checkpoint-42862/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a46fb2d42b370762528f719be230534aff4e0b10 --- /dev/null +++ b/checkpoint-42862/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f34b0861feb99182df1f63d7dd8121c1dd2cb3ab42f48ab47936e3f1a29240a +size 377656855 diff --git a/checkpoint-42862/rng_state.pth b/checkpoint-42862/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cf87a78c50fab2aea15050fb7f534847ded681e2 --- /dev/null +++ b/checkpoint-42862/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b529f1c2de06f0a3e6457d777b0605d272874db0c9c8a51cbcf805ea7be20c0f +size 14503 diff --git a/checkpoint-42862/scaler.pt b/checkpoint-42862/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..78c2501f6aad00b6fa8f30a24262f51eae1fd3ba --- /dev/null +++ b/checkpoint-42862/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1256598b61ef0b914c8c3d23fd575eaebdce339858e228fae565375ffd9565db +size 559 diff --git a/checkpoint-42862/scheduler.pt b/checkpoint-42862/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7f79ebffefc861d2d44ceb5a6253651b719d696 --- /dev/null +++ b/checkpoint-42862/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feb9378373d457ffae9b4d386559c2db5cf0f11d555436822bcf17137a680a10 +size 623 diff --git a/checkpoint-42862/trainer_state.json b/checkpoint-42862/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6319ad6d5928c1594274b084117e3065eaa143f2 --- /dev/null +++ b/checkpoint-42862/trainer_state.json @@ -0,0 +1,51468 @@ +{ + "best_metric": 0.34811559319496155, + "best_model_checkpoint": "wav2vec2-base-pem123-960h-la/checkpoint-42862", + "epoch": 2.0, + "global_step": 42862, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 6.000000000000001e-08, + "loss": 2.6531, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.6e-07, + "loss": 3.4824, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 2.6e-07, + "loss": 3.2682, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 3.6e-07, + "loss": 3.2567, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.6000000000000004e-07, + "loss": 3.5979, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 5.6e-07, + "loss": 3.3327, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 6.6e-07, + "loss": 3.7519, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 7.6e-07, + "loss": 3.5748, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 8.6e-07, + "loss": 3.5357, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 9.400000000000001e-07, + "loss": 3.4531, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.04e-06, + "loss": 2.5381, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 1.14e-06, + "loss": 2.9048, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 1.2400000000000002e-06, + "loss": 3.0937, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 1.34e-06, + "loss": 3.0091, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 1.44e-06, + "loss": 2.8452, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.54e-06, + "loss": 2.6674, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 1.6400000000000002e-06, + "loss": 2.9619, + "step": 85 + }, + { + "epoch": 0.0, + "learning_rate": 1.74e-06, + "loss": 2.7327, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 1.8400000000000002e-06, + "loss": 2.7925, + "step": 95 + }, + { + "epoch": 0.0, + "learning_rate": 1.94e-06, + "loss": 3.0929, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.04e-06, + "loss": 1.7821, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 2.1400000000000003e-06, + "loss": 1.9388, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 2.24e-06, + "loss": 2.1683, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 2.3400000000000005e-06, + "loss": 1.8805, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 2.4400000000000004e-06, + "loss": 2.0734, + "step": 125 + }, + { + "epoch": 0.01, + "learning_rate": 2.5400000000000002e-06, + "loss": 2.0576, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 2.64e-06, + "loss": 1.778, + "step": 135 + }, + { + "epoch": 0.01, + "learning_rate": 2.7400000000000004e-06, + "loss": 1.866, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 2.84e-06, + "loss": 2.0255, + "step": 145 + }, + { + "epoch": 0.01, + "learning_rate": 2.9400000000000002e-06, + "loss": 2.1399, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 3.04e-06, + "loss": 1.4145, + "step": 155 + }, + { + "epoch": 0.01, + "learning_rate": 3.1400000000000004e-06, + "loss": 1.2365, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 3.2400000000000003e-06, + "loss": 1.5569, + "step": 165 + }, + { + "epoch": 0.01, + "learning_rate": 3.3400000000000006e-06, + "loss": 1.6138, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 3.44e-06, + "loss": 1.3237, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.54e-06, + "loss": 1.3709, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 3.6400000000000003e-06, + "loss": 1.475, + "step": 185 + }, + { + "epoch": 0.01, + "learning_rate": 3.74e-06, + "loss": 1.5188, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 3.8400000000000005e-06, + "loss": 1.7965, + "step": 195 + }, + { + "epoch": 0.01, + "learning_rate": 3.94e-06, + "loss": 1.9079, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.04e-06, + "loss": 1.1918, + "step": 205 + }, + { + "epoch": 0.01, + "learning_rate": 4.14e-06, + "loss": 0.9466, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 4.24e-06, + "loss": 1.186, + "step": 215 + }, + { + "epoch": 0.01, + "learning_rate": 4.34e-06, + "loss": 1.1864, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 4.440000000000001e-06, + "loss": 1.1844, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.540000000000001e-06, + "loss": 1.2449, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 4.6400000000000005e-06, + "loss": 1.5866, + "step": 235 + }, + { + "epoch": 0.01, + "learning_rate": 4.74e-06, + "loss": 1.3059, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 4.84e-06, + "loss": 1.4398, + "step": 245 + }, + { + "epoch": 0.01, + "learning_rate": 4.94e-06, + "loss": 1.8654, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.04e-06, + "loss": 1.2339, + "step": 255 + }, + { + "epoch": 0.01, + "learning_rate": 5.140000000000001e-06, + "loss": 0.8202, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 5.240000000000001e-06, + "loss": 1.151, + "step": 265 + }, + { + "epoch": 0.01, + "learning_rate": 5.3400000000000005e-06, + "loss": 1.1299, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 5.4400000000000004e-06, + "loss": 1.154, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.540000000000001e-06, + "loss": 1.2657, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 5.64e-06, + "loss": 1.3412, + "step": 285 + }, + { + "epoch": 0.01, + "learning_rate": 5.72e-06, + "loss": 1.2532, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 5.82e-06, + "loss": 1.5254, + "step": 295 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 1.9021, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.02e-06, + "loss": 1.2932, + "step": 305 + }, + { + "epoch": 0.01, + "learning_rate": 6.120000000000001e-06, + "loss": 0.882, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 6.220000000000001e-06, + "loss": 0.8607, + "step": 315 + }, + { + "epoch": 0.01, + "learning_rate": 6.3200000000000005e-06, + "loss": 0.9375, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 6.42e-06, + "loss": 1.0688, + "step": 325 + }, + { + "epoch": 0.02, + "learning_rate": 6.520000000000001e-06, + "loss": 1.0282, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 6.620000000000001e-06, + "loss": 1.1712, + "step": 335 + }, + { + "epoch": 0.02, + "learning_rate": 6.720000000000001e-06, + "loss": 1.3186, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 6.820000000000001e-06, + "loss": 1.3102, + "step": 345 + }, + { + "epoch": 0.02, + "learning_rate": 6.9e-06, + "loss": 2.0291, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 7e-06, + "loss": 1.0834, + "step": 355 + }, + { + "epoch": 0.02, + "learning_rate": 7.100000000000001e-06, + "loss": 0.7925, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 7.2000000000000005e-06, + "loss": 0.9559, + "step": 365 + }, + { + "epoch": 0.02, + "learning_rate": 7.3e-06, + "loss": 0.9066, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 7.4e-06, + "loss": 1.0408, + "step": 375 + }, + { + "epoch": 0.02, + "learning_rate": 7.500000000000001e-06, + "loss": 1.0672, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 7.600000000000001e-06, + "loss": 1.3249, + "step": 385 + }, + { + "epoch": 0.02, + "learning_rate": 7.7e-06, + "loss": 1.3579, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 7.800000000000002e-06, + "loss": 1.4037, + "step": 395 + }, + { + "epoch": 0.02, + "learning_rate": 7.9e-06, + "loss": 1.5432, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.9596, + "step": 405 + }, + { + "epoch": 0.02, + "learning_rate": 8.1e-06, + "loss": 0.6342, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 8.2e-06, + "loss": 0.8461, + "step": 415 + }, + { + "epoch": 0.02, + "learning_rate": 8.3e-06, + "loss": 0.9826, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 8.400000000000001e-06, + "loss": 0.9279, + "step": 425 + }, + { + "epoch": 0.02, + "learning_rate": 8.5e-06, + "loss": 0.8814, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 8.6e-06, + "loss": 1.1263, + "step": 435 + }, + { + "epoch": 0.02, + "learning_rate": 8.700000000000001e-06, + "loss": 1.0968, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 8.8e-06, + "loss": 1.2043, + "step": 445 + }, + { + "epoch": 0.02, + "learning_rate": 8.900000000000001e-06, + "loss": 1.5603, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 0.9924, + "step": 455 + }, + { + "epoch": 0.02, + "learning_rate": 9.100000000000001e-06, + "loss": 0.7293, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 9.200000000000002e-06, + "loss": 0.7576, + "step": 465 + }, + { + "epoch": 0.02, + "learning_rate": 9.3e-06, + "loss": 0.7923, + "step": 470 + }, + { + "epoch": 0.02, + "learning_rate": 9.4e-06, + "loss": 0.8264, + "step": 475 + }, + { + "epoch": 0.02, + "learning_rate": 9.5e-06, + "loss": 0.8031, + "step": 480 + }, + { + "epoch": 0.02, + "learning_rate": 9.600000000000001e-06, + "loss": 1.2293, + "step": 485 + }, + { + "epoch": 0.02, + "learning_rate": 9.7e-06, + "loss": 0.9651, + "step": 490 + }, + { + "epoch": 0.02, + "learning_rate": 9.800000000000001e-06, + "loss": 1.3314, + "step": 495 + }, + { + "epoch": 0.02, + "learning_rate": 9.9e-06, + "loss": 1.4383, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 1e-05, + "loss": 0.9384, + "step": 505 + }, + { + "epoch": 0.02, + "learning_rate": 1.0100000000000002e-05, + "loss": 0.6344, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 1.02e-05, + "loss": 0.8903, + "step": 515 + }, + { + "epoch": 0.02, + "learning_rate": 1.0300000000000001e-05, + "loss": 0.8112, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 1.04e-05, + "loss": 0.9797, + "step": 525 + }, + { + "epoch": 0.02, + "learning_rate": 1.0500000000000001e-05, + "loss": 0.7961, + "step": 530 + }, + { + "epoch": 0.02, + "learning_rate": 1.0600000000000002e-05, + "loss": 1.0021, + "step": 535 + }, + { + "epoch": 0.03, + "learning_rate": 1.0700000000000001e-05, + "loss": 1.111, + "step": 540 + }, + { + "epoch": 0.03, + "learning_rate": 1.0800000000000002e-05, + "loss": 1.0121, + "step": 545 + }, + { + "epoch": 0.03, + "learning_rate": 1.0900000000000002e-05, + "loss": 1.3162, + "step": 550 + }, + { + "epoch": 0.03, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.8775, + "step": 555 + }, + { + "epoch": 0.03, + "learning_rate": 1.1100000000000002e-05, + "loss": 0.6268, + "step": 560 + }, + { + "epoch": 0.03, + "learning_rate": 1.1200000000000001e-05, + "loss": 0.6093, + "step": 565 + }, + { + "epoch": 0.03, + "learning_rate": 1.13e-05, + "loss": 0.6371, + "step": 570 + }, + { + "epoch": 0.03, + "learning_rate": 1.14e-05, + "loss": 0.7299, + "step": 575 + }, + { + "epoch": 0.03, + "learning_rate": 1.15e-05, + "loss": 0.8892, + "step": 580 + }, + { + "epoch": 0.03, + "learning_rate": 1.16e-05, + "loss": 0.8902, + "step": 585 + }, + { + "epoch": 0.03, + "learning_rate": 1.17e-05, + "loss": 1.1263, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 1.18e-05, + "loss": 1.2628, + "step": 595 + }, + { + "epoch": 0.03, + "learning_rate": 1.1900000000000001e-05, + "loss": 1.4236, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 0.8066, + "step": 605 + }, + { + "epoch": 0.03, + "learning_rate": 1.2100000000000001e-05, + "loss": 0.6171, + "step": 610 + }, + { + "epoch": 0.03, + "learning_rate": 1.22e-05, + "loss": 0.6193, + "step": 615 + }, + { + "epoch": 0.03, + "learning_rate": 1.23e-05, + "loss": 0.7038, + "step": 620 + }, + { + "epoch": 0.03, + "learning_rate": 1.2400000000000002e-05, + "loss": 0.7382, + "step": 625 + }, + { + "epoch": 0.03, + "learning_rate": 1.25e-05, + "loss": 0.8153, + "step": 630 + }, + { + "epoch": 0.03, + "learning_rate": 1.2600000000000001e-05, + "loss": 0.8639, + "step": 635 + }, + { + "epoch": 0.03, + "learning_rate": 1.27e-05, + "loss": 0.985, + "step": 640 + }, + { + "epoch": 0.03, + "learning_rate": 1.2800000000000001e-05, + "loss": 0.9144, + "step": 645 + }, + { + "epoch": 0.03, + "learning_rate": 1.2900000000000002e-05, + "loss": 1.2459, + "step": 650 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.8517, + "step": 655 + }, + { + "epoch": 0.03, + "learning_rate": 1.3100000000000002e-05, + "loss": 0.4846, + "step": 660 + }, + { + "epoch": 0.03, + "learning_rate": 1.3200000000000002e-05, + "loss": 0.5826, + "step": 665 + }, + { + "epoch": 0.03, + "learning_rate": 1.3300000000000001e-05, + "loss": 0.7343, + "step": 670 + }, + { + "epoch": 0.03, + "learning_rate": 1.3400000000000002e-05, + "loss": 0.7328, + "step": 675 + }, + { + "epoch": 0.03, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.6546, + "step": 680 + }, + { + "epoch": 0.03, + "learning_rate": 1.3600000000000002e-05, + "loss": 0.8793, + "step": 685 + }, + { + "epoch": 0.03, + "learning_rate": 1.3700000000000003e-05, + "loss": 0.8999, + "step": 690 + }, + { + "epoch": 0.03, + "learning_rate": 1.38e-05, + "loss": 1.1491, + "step": 695 + }, + { + "epoch": 0.03, + "learning_rate": 1.39e-05, + "loss": 1.377, + "step": 700 + }, + { + "epoch": 0.03, + "learning_rate": 1.4e-05, + "loss": 0.7843, + "step": 705 + }, + { + "epoch": 0.03, + "learning_rate": 1.41e-05, + "loss": 0.622, + "step": 710 + }, + { + "epoch": 0.03, + "learning_rate": 1.4200000000000001e-05, + "loss": 0.5346, + "step": 715 + }, + { + "epoch": 0.03, + "learning_rate": 1.43e-05, + "loss": 0.6517, + "step": 720 + }, + { + "epoch": 0.03, + "learning_rate": 1.4400000000000001e-05, + "loss": 0.6661, + "step": 725 + }, + { + "epoch": 0.03, + "learning_rate": 1.45e-05, + "loss": 0.7379, + "step": 730 + }, + { + "epoch": 0.03, + "learning_rate": 1.46e-05, + "loss": 0.7839, + "step": 735 + }, + { + "epoch": 0.03, + "learning_rate": 1.4700000000000002e-05, + "loss": 1.0163, + "step": 740 + }, + { + "epoch": 0.03, + "learning_rate": 1.48e-05, + "loss": 0.9786, + "step": 745 + }, + { + "epoch": 0.03, + "learning_rate": 1.4900000000000001e-05, + "loss": 1.2756, + "step": 750 + }, + { + "epoch": 0.04, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.8289, + "step": 755 + }, + { + "epoch": 0.04, + "learning_rate": 1.5100000000000001e-05, + "loss": 0.5909, + "step": 760 + }, + { + "epoch": 0.04, + "learning_rate": 1.5200000000000002e-05, + "loss": 0.5347, + "step": 765 + }, + { + "epoch": 0.04, + "learning_rate": 1.5300000000000003e-05, + "loss": 0.7078, + "step": 770 + }, + { + "epoch": 0.04, + "learning_rate": 1.54e-05, + "loss": 0.6262, + "step": 775 + }, + { + "epoch": 0.04, + "learning_rate": 1.55e-05, + "loss": 0.8401, + "step": 780 + }, + { + "epoch": 0.04, + "learning_rate": 1.5600000000000003e-05, + "loss": 0.6788, + "step": 785 + }, + { + "epoch": 0.04, + "learning_rate": 1.5700000000000002e-05, + "loss": 0.9063, + "step": 790 + }, + { + "epoch": 0.04, + "learning_rate": 1.58e-05, + "loss": 0.9448, + "step": 795 + }, + { + "epoch": 0.04, + "learning_rate": 1.5900000000000004e-05, + "loss": 1.3078, + "step": 800 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7461, + "step": 805 + }, + { + "epoch": 0.04, + "learning_rate": 1.6100000000000002e-05, + "loss": 0.4522, + "step": 810 + }, + { + "epoch": 0.04, + "learning_rate": 1.62e-05, + "loss": 0.5883, + "step": 815 + }, + { + "epoch": 0.04, + "learning_rate": 1.63e-05, + "loss": 0.5923, + "step": 820 + }, + { + "epoch": 0.04, + "learning_rate": 1.64e-05, + "loss": 0.7269, + "step": 825 + }, + { + "epoch": 0.04, + "learning_rate": 1.65e-05, + "loss": 0.6916, + "step": 830 + }, + { + "epoch": 0.04, + "learning_rate": 1.66e-05, + "loss": 0.6976, + "step": 835 + }, + { + "epoch": 0.04, + "learning_rate": 1.67e-05, + "loss": 0.953, + "step": 840 + }, + { + "epoch": 0.04, + "learning_rate": 1.6800000000000002e-05, + "loss": 0.952, + "step": 845 + }, + { + "epoch": 0.04, + "learning_rate": 1.69e-05, + "loss": 1.4978, + "step": 850 + }, + { + "epoch": 0.04, + "learning_rate": 1.7e-05, + "loss": 0.8314, + "step": 855 + }, + { + "epoch": 0.04, + "learning_rate": 1.7100000000000002e-05, + "loss": 0.4179, + "step": 860 + }, + { + "epoch": 0.04, + "learning_rate": 1.72e-05, + "loss": 0.5123, + "step": 865 + }, + { + "epoch": 0.04, + "learning_rate": 1.73e-05, + "loss": 0.528, + "step": 870 + }, + { + "epoch": 0.04, + "learning_rate": 1.7400000000000003e-05, + "loss": 0.6553, + "step": 875 + }, + { + "epoch": 0.04, + "learning_rate": 1.7500000000000002e-05, + "loss": 0.8417, + "step": 880 + }, + { + "epoch": 0.04, + "learning_rate": 1.76e-05, + "loss": 0.7153, + "step": 885 + }, + { + "epoch": 0.04, + "learning_rate": 1.77e-05, + "loss": 0.6923, + "step": 890 + }, + { + "epoch": 0.04, + "learning_rate": 1.7800000000000002e-05, + "loss": 0.8491, + "step": 895 + }, + { + "epoch": 0.04, + "learning_rate": 1.79e-05, + "loss": 1.1041, + "step": 900 + }, + { + "epoch": 0.04, + "learning_rate": 1.8e-05, + "loss": 0.6685, + "step": 905 + }, + { + "epoch": 0.04, + "learning_rate": 1.8100000000000003e-05, + "loss": 0.467, + "step": 910 + }, + { + "epoch": 0.04, + "learning_rate": 1.8200000000000002e-05, + "loss": 0.478, + "step": 915 + }, + { + "epoch": 0.04, + "learning_rate": 1.83e-05, + "loss": 0.6318, + "step": 920 + }, + { + "epoch": 0.04, + "learning_rate": 1.8400000000000003e-05, + "loss": 0.5477, + "step": 925 + }, + { + "epoch": 0.04, + "learning_rate": 1.8500000000000002e-05, + "loss": 0.8122, + "step": 930 + }, + { + "epoch": 0.04, + "learning_rate": 1.86e-05, + "loss": 0.7658, + "step": 935 + }, + { + "epoch": 0.04, + "learning_rate": 1.8700000000000004e-05, + "loss": 0.8465, + "step": 940 + }, + { + "epoch": 0.04, + "learning_rate": 1.88e-05, + "loss": 0.8287, + "step": 945 + }, + { + "epoch": 0.04, + "learning_rate": 1.8900000000000002e-05, + "loss": 1.1613, + "step": 950 + }, + { + "epoch": 0.04, + "learning_rate": 1.9e-05, + "loss": 0.5815, + "step": 955 + }, + { + "epoch": 0.04, + "learning_rate": 1.91e-05, + "loss": 0.3932, + "step": 960 + }, + { + "epoch": 0.05, + "learning_rate": 1.9200000000000003e-05, + "loss": 0.3984, + "step": 965 + }, + { + "epoch": 0.05, + "learning_rate": 1.93e-05, + "loss": 0.5436, + "step": 970 + }, + { + "epoch": 0.05, + "learning_rate": 1.94e-05, + "loss": 0.5992, + "step": 975 + }, + { + "epoch": 0.05, + "learning_rate": 1.95e-05, + "loss": 0.6758, + "step": 980 + }, + { + "epoch": 0.05, + "learning_rate": 1.9600000000000002e-05, + "loss": 0.6634, + "step": 985 + }, + { + "epoch": 0.05, + "learning_rate": 1.97e-05, + "loss": 0.8048, + "step": 990 + }, + { + "epoch": 0.05, + "learning_rate": 1.98e-05, + "loss": 1.0593, + "step": 995 + }, + { + "epoch": 0.05, + "learning_rate": 1.9900000000000003e-05, + "loss": 1.3275, + "step": 1000 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 0.745, + "step": 1005 + }, + { + "epoch": 0.05, + "learning_rate": 1.9999216214945216e-05, + "loss": 0.4186, + "step": 1010 + }, + { + "epoch": 0.05, + "learning_rate": 1.999843242989043e-05, + "loss": 0.4657, + "step": 1015 + }, + { + "epoch": 0.05, + "learning_rate": 1.999764864483564e-05, + "loss": 0.3849, + "step": 1020 + }, + { + "epoch": 0.05, + "learning_rate": 1.9996864859780857e-05, + "loss": 0.6111, + "step": 1025 + }, + { + "epoch": 0.05, + "learning_rate": 1.9996081074726068e-05, + "loss": 0.5342, + "step": 1030 + }, + { + "epoch": 0.05, + "learning_rate": 1.999529728967128e-05, + "loss": 0.6535, + "step": 1035 + }, + { + "epoch": 0.05, + "learning_rate": 1.9994513504616496e-05, + "loss": 0.7321, + "step": 1040 + }, + { + "epoch": 0.05, + "learning_rate": 1.999372971956171e-05, + "loss": 0.7966, + "step": 1045 + }, + { + "epoch": 0.05, + "learning_rate": 1.9992945934506923e-05, + "loss": 1.1703, + "step": 1050 + }, + { + "epoch": 0.05, + "learning_rate": 1.9992162149452137e-05, + "loss": 0.6399, + "step": 1055 + }, + { + "epoch": 0.05, + "learning_rate": 1.9991378364397348e-05, + "loss": 0.3462, + "step": 1060 + }, + { + "epoch": 0.05, + "learning_rate": 1.9990594579342565e-05, + "loss": 0.4746, + "step": 1065 + }, + { + "epoch": 0.05, + "learning_rate": 1.9989810794287776e-05, + "loss": 0.4348, + "step": 1070 + }, + { + "epoch": 0.05, + "learning_rate": 1.998902700923299e-05, + "loss": 0.5812, + "step": 1075 + }, + { + "epoch": 0.05, + "learning_rate": 1.9988243224178203e-05, + "loss": 0.5924, + "step": 1080 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987459439123414e-05, + "loss": 0.6898, + "step": 1085 + }, + { + "epoch": 0.05, + "learning_rate": 1.998667565406863e-05, + "loss": 0.7083, + "step": 1090 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985891869013842e-05, + "loss": 0.9508, + "step": 1095 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985108083959056e-05, + "loss": 1.2479, + "step": 1100 + }, + { + "epoch": 0.05, + "learning_rate": 1.998432429890427e-05, + "loss": 0.6903, + "step": 1105 + }, + { + "epoch": 0.05, + "learning_rate": 1.9983540513849483e-05, + "loss": 0.3722, + "step": 1110 + }, + { + "epoch": 0.05, + "learning_rate": 1.9982756728794697e-05, + "loss": 0.4207, + "step": 1115 + }, + { + "epoch": 0.05, + "learning_rate": 1.998197294373991e-05, + "loss": 0.5722, + "step": 1120 + }, + { + "epoch": 0.05, + "learning_rate": 1.9981189158685125e-05, + "loss": 0.5865, + "step": 1125 + }, + { + "epoch": 0.05, + "learning_rate": 1.998040537363034e-05, + "loss": 0.4989, + "step": 1130 + }, + { + "epoch": 0.05, + "learning_rate": 1.997962158857555e-05, + "loss": 0.5998, + "step": 1135 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978837803520764e-05, + "loss": 0.9028, + "step": 1140 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978054018465977e-05, + "loss": 0.8579, + "step": 1145 + }, + { + "epoch": 0.05, + "learning_rate": 1.997727023341119e-05, + "loss": 1.2055, + "step": 1150 + }, + { + "epoch": 0.05, + "learning_rate": 1.9976486448356405e-05, + "loss": 0.6244, + "step": 1155 + }, + { + "epoch": 0.05, + "learning_rate": 1.9975702663301616e-05, + "loss": 0.3915, + "step": 1160 + }, + { + "epoch": 0.05, + "learning_rate": 1.9974918878246833e-05, + "loss": 0.4356, + "step": 1165 + }, + { + "epoch": 0.05, + "learning_rate": 1.9974135093192044e-05, + "loss": 0.5114, + "step": 1170 + }, + { + "epoch": 0.05, + "learning_rate": 1.9973351308137257e-05, + "loss": 0.5035, + "step": 1175 + }, + { + "epoch": 0.06, + "learning_rate": 1.997256752308247e-05, + "loss": 0.6821, + "step": 1180 + }, + { + "epoch": 0.06, + "learning_rate": 1.9971783738027685e-05, + "loss": 0.5494, + "step": 1185 + }, + { + "epoch": 0.06, + "learning_rate": 1.99709999529729e-05, + "loss": 0.6005, + "step": 1190 + }, + { + "epoch": 0.06, + "learning_rate": 1.9970216167918113e-05, + "loss": 0.8884, + "step": 1195 + }, + { + "epoch": 0.06, + "learning_rate": 1.9969432382863324e-05, + "loss": 0.9246, + "step": 1200 + }, + { + "epoch": 0.06, + "learning_rate": 1.9968648597808538e-05, + "loss": 0.5223, + "step": 1205 + }, + { + "epoch": 0.06, + "learning_rate": 1.996786481275375e-05, + "loss": 0.3661, + "step": 1210 + }, + { + "epoch": 0.06, + "learning_rate": 1.9967081027698965e-05, + "loss": 0.5004, + "step": 1215 + }, + { + "epoch": 0.06, + "learning_rate": 1.996629724264418e-05, + "loss": 0.4138, + "step": 1220 + }, + { + "epoch": 0.06, + "learning_rate": 1.9965513457589393e-05, + "loss": 0.6478, + "step": 1225 + }, + { + "epoch": 0.06, + "learning_rate": 1.9964729672534607e-05, + "loss": 0.5358, + "step": 1230 + }, + { + "epoch": 0.06, + "learning_rate": 1.9963945887479818e-05, + "loss": 0.664, + "step": 1235 + }, + { + "epoch": 0.06, + "learning_rate": 1.9963162102425035e-05, + "loss": 0.5272, + "step": 1240 + }, + { + "epoch": 0.06, + "learning_rate": 1.9962378317370245e-05, + "loss": 0.9186, + "step": 1245 + }, + { + "epoch": 0.06, + "learning_rate": 1.996159453231546e-05, + "loss": 1.262, + "step": 1250 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960810747260673e-05, + "loss": 0.5889, + "step": 1255 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960026962205887e-05, + "loss": 0.4323, + "step": 1260 + }, + { + "epoch": 0.06, + "learning_rate": 1.99592431771511e-05, + "loss": 0.3947, + "step": 1265 + }, + { + "epoch": 0.06, + "learning_rate": 1.995845939209631e-05, + "loss": 0.5637, + "step": 1270 + }, + { + "epoch": 0.06, + "learning_rate": 1.9957675607041525e-05, + "loss": 0.4977, + "step": 1275 + }, + { + "epoch": 0.06, + "learning_rate": 1.995689182198674e-05, + "loss": 0.5272, + "step": 1280 + }, + { + "epoch": 0.06, + "learning_rate": 1.9956108036931953e-05, + "loss": 0.5806, + "step": 1285 + }, + { + "epoch": 0.06, + "learning_rate": 1.9955324251877167e-05, + "loss": 0.7725, + "step": 1290 + }, + { + "epoch": 0.06, + "learning_rate": 1.995454046682238e-05, + "loss": 1.0007, + "step": 1295 + }, + { + "epoch": 0.06, + "learning_rate": 1.995375668176759e-05, + "loss": 1.2275, + "step": 1300 + }, + { + "epoch": 0.06, + "learning_rate": 1.995297289671281e-05, + "loss": 0.5902, + "step": 1305 + }, + { + "epoch": 0.06, + "learning_rate": 1.995218911165802e-05, + "loss": 0.3678, + "step": 1310 + }, + { + "epoch": 0.06, + "learning_rate": 1.9951405326603233e-05, + "loss": 0.3997, + "step": 1315 + }, + { + "epoch": 0.06, + "learning_rate": 1.9950621541548447e-05, + "loss": 0.3824, + "step": 1320 + }, + { + "epoch": 0.06, + "learning_rate": 1.994983775649366e-05, + "loss": 0.45, + "step": 1325 + }, + { + "epoch": 0.06, + "learning_rate": 1.9949053971438875e-05, + "loss": 0.4312, + "step": 1330 + }, + { + "epoch": 0.06, + "learning_rate": 1.9948270186384086e-05, + "loss": 0.6568, + "step": 1335 + }, + { + "epoch": 0.06, + "learning_rate": 1.9947486401329303e-05, + "loss": 0.7326, + "step": 1340 + }, + { + "epoch": 0.06, + "learning_rate": 1.9946702616274513e-05, + "loss": 1.6595, + "step": 1345 + }, + { + "epoch": 0.06, + "learning_rate": 1.9945918831219727e-05, + "loss": 1.1587, + "step": 1350 + }, + { + "epoch": 0.06, + "learning_rate": 1.994513504616494e-05, + "loss": 0.5287, + "step": 1355 + }, + { + "epoch": 0.06, + "learning_rate": 1.9944351261110155e-05, + "loss": 0.4248, + "step": 1360 + }, + { + "epoch": 0.06, + "learning_rate": 1.994356747605537e-05, + "loss": 0.4569, + "step": 1365 + }, + { + "epoch": 0.06, + "learning_rate": 1.9942783691000583e-05, + "loss": 0.4401, + "step": 1370 + }, + { + "epoch": 0.06, + "learning_rate": 1.9941999905945793e-05, + "loss": 0.475, + "step": 1375 + }, + { + "epoch": 0.06, + "learning_rate": 1.994121612089101e-05, + "loss": 0.5074, + "step": 1380 + }, + { + "epoch": 0.06, + "learning_rate": 1.994043233583622e-05, + "loss": 0.6305, + "step": 1385 + }, + { + "epoch": 0.06, + "learning_rate": 1.9939648550781435e-05, + "loss": 0.6242, + "step": 1390 + }, + { + "epoch": 0.07, + "learning_rate": 1.993886476572665e-05, + "loss": 0.6831, + "step": 1395 + }, + { + "epoch": 0.07, + "learning_rate": 1.993808098067186e-05, + "loss": 0.9001, + "step": 1400 + }, + { + "epoch": 0.07, + "learning_rate": 1.9937297195617077e-05, + "loss": 0.5865, + "step": 1405 + }, + { + "epoch": 0.07, + "learning_rate": 1.9936513410562287e-05, + "loss": 0.332, + "step": 1410 + }, + { + "epoch": 0.07, + "learning_rate": 1.99357296255075e-05, + "loss": 0.4441, + "step": 1415 + }, + { + "epoch": 0.07, + "learning_rate": 1.9934945840452715e-05, + "loss": 0.3415, + "step": 1420 + }, + { + "epoch": 0.07, + "learning_rate": 1.993416205539793e-05, + "loss": 0.4253, + "step": 1425 + }, + { + "epoch": 0.07, + "learning_rate": 1.9933378270343143e-05, + "loss": 0.594, + "step": 1430 + }, + { + "epoch": 0.07, + "learning_rate": 1.9932594485288357e-05, + "loss": 0.7563, + "step": 1435 + }, + { + "epoch": 0.07, + "learning_rate": 1.993181070023357e-05, + "loss": 0.7389, + "step": 1440 + }, + { + "epoch": 0.07, + "learning_rate": 1.9931026915178785e-05, + "loss": 0.8635, + "step": 1445 + }, + { + "epoch": 0.07, + "learning_rate": 1.9930243130123995e-05, + "loss": 1.0338, + "step": 1450 + }, + { + "epoch": 0.07, + "learning_rate": 1.9929459345069212e-05, + "loss": 0.4737, + "step": 1455 + }, + { + "epoch": 0.07, + "learning_rate": 1.9928675560014423e-05, + "loss": 0.2898, + "step": 1460 + }, + { + "epoch": 0.07, + "learning_rate": 1.9927891774959637e-05, + "loss": 0.3928, + "step": 1465 + }, + { + "epoch": 0.07, + "learning_rate": 1.992710798990485e-05, + "loss": 0.4041, + "step": 1470 + }, + { + "epoch": 0.07, + "learning_rate": 1.992632420485006e-05, + "loss": 0.4331, + "step": 1475 + }, + { + "epoch": 0.07, + "learning_rate": 1.992554041979528e-05, + "loss": 0.5609, + "step": 1480 + }, + { + "epoch": 0.07, + "learning_rate": 1.992475663474049e-05, + "loss": 0.545, + "step": 1485 + }, + { + "epoch": 0.07, + "learning_rate": 1.9923972849685703e-05, + "loss": 0.6846, + "step": 1490 + }, + { + "epoch": 0.07, + "learning_rate": 1.9923189064630917e-05, + "loss": 0.7526, + "step": 1495 + }, + { + "epoch": 0.07, + "learning_rate": 1.992240527957613e-05, + "loss": 1.0243, + "step": 1500 + }, + { + "epoch": 0.07, + "learning_rate": 1.9921621494521345e-05, + "loss": 0.5665, + "step": 1505 + }, + { + "epoch": 0.07, + "learning_rate": 1.992083770946656e-05, + "loss": 0.3, + "step": 1510 + }, + { + "epoch": 0.07, + "learning_rate": 1.992005392441177e-05, + "loss": 0.3819, + "step": 1515 + }, + { + "epoch": 0.07, + "learning_rate": 1.9919270139356986e-05, + "loss": 0.4012, + "step": 1520 + }, + { + "epoch": 0.07, + "learning_rate": 1.9918486354302197e-05, + "loss": 0.4596, + "step": 1525 + }, + { + "epoch": 0.07, + "learning_rate": 1.991770256924741e-05, + "loss": 0.5021, + "step": 1530 + }, + { + "epoch": 0.07, + "learning_rate": 1.9916918784192625e-05, + "loss": 0.4451, + "step": 1535 + }, + { + "epoch": 0.07, + "learning_rate": 1.991613499913784e-05, + "loss": 0.704, + "step": 1540 + }, + { + "epoch": 0.07, + "learning_rate": 1.9915351214083053e-05, + "loss": 0.6392, + "step": 1545 + }, + { + "epoch": 0.07, + "learning_rate": 1.9914567429028263e-05, + "loss": 1.1391, + "step": 1550 + }, + { + "epoch": 0.07, + "learning_rate": 1.991378364397348e-05, + "loss": 0.5388, + "step": 1555 + }, + { + "epoch": 0.07, + "learning_rate": 1.991299985891869e-05, + "loss": 0.3753, + "step": 1560 + }, + { + "epoch": 0.07, + "learning_rate": 1.9912216073863905e-05, + "loss": 0.3058, + "step": 1565 + }, + { + "epoch": 0.07, + "learning_rate": 1.991143228880912e-05, + "loss": 0.4206, + "step": 1570 + }, + { + "epoch": 0.07, + "learning_rate": 1.9910648503754333e-05, + "loss": 0.3922, + "step": 1575 + }, + { + "epoch": 0.07, + "learning_rate": 1.9909864718699547e-05, + "loss": 0.4388, + "step": 1580 + }, + { + "epoch": 0.07, + "learning_rate": 1.990908093364476e-05, + "loss": 0.485, + "step": 1585 + }, + { + "epoch": 0.07, + "learning_rate": 1.990829714858997e-05, + "loss": 0.7441, + "step": 1590 + }, + { + "epoch": 0.07, + "learning_rate": 1.9907513363535185e-05, + "loss": 0.7177, + "step": 1595 + }, + { + "epoch": 0.07, + "learning_rate": 1.99067295784804e-05, + "loss": 1.129, + "step": 1600 + }, + { + "epoch": 0.07, + "learning_rate": 1.9905945793425613e-05, + "loss": 0.4943, + "step": 1605 + }, + { + "epoch": 0.08, + "learning_rate": 1.9905162008370827e-05, + "loss": 0.299, + "step": 1610 + }, + { + "epoch": 0.08, + "learning_rate": 1.9904378223316037e-05, + "loss": 0.4898, + "step": 1615 + }, + { + "epoch": 0.08, + "learning_rate": 1.9903594438261254e-05, + "loss": 0.3973, + "step": 1620 + }, + { + "epoch": 0.08, + "learning_rate": 1.9902810653206465e-05, + "loss": 0.4336, + "step": 1625 + }, + { + "epoch": 0.08, + "learning_rate": 1.990202686815168e-05, + "loss": 0.4543, + "step": 1630 + }, + { + "epoch": 0.08, + "learning_rate": 1.9901243083096893e-05, + "loss": 0.4828, + "step": 1635 + }, + { + "epoch": 0.08, + "learning_rate": 1.9900459298042107e-05, + "loss": 0.7448, + "step": 1640 + }, + { + "epoch": 0.08, + "learning_rate": 1.989967551298732e-05, + "loss": 0.6852, + "step": 1645 + }, + { + "epoch": 0.08, + "learning_rate": 1.9898891727932534e-05, + "loss": 0.9936, + "step": 1650 + }, + { + "epoch": 0.08, + "learning_rate": 1.989810794287775e-05, + "loss": 0.4738, + "step": 1655 + }, + { + "epoch": 0.08, + "learning_rate": 1.989732415782296e-05, + "loss": 0.2747, + "step": 1660 + }, + { + "epoch": 0.08, + "learning_rate": 1.9896540372768173e-05, + "loss": 0.2839, + "step": 1665 + }, + { + "epoch": 0.08, + "learning_rate": 1.9895756587713387e-05, + "loss": 0.3596, + "step": 1670 + }, + { + "epoch": 0.08, + "learning_rate": 1.98949728026586e-05, + "loss": 0.4797, + "step": 1675 + }, + { + "epoch": 0.08, + "learning_rate": 1.9894189017603815e-05, + "loss": 0.4667, + "step": 1680 + }, + { + "epoch": 0.08, + "learning_rate": 1.989340523254903e-05, + "loss": 0.5085, + "step": 1685 + }, + { + "epoch": 0.08, + "learning_rate": 1.989262144749424e-05, + "loss": 0.6464, + "step": 1690 + }, + { + "epoch": 0.08, + "learning_rate": 1.9891837662439456e-05, + "loss": 0.7383, + "step": 1695 + }, + { + "epoch": 0.08, + "learning_rate": 1.9891053877384667e-05, + "loss": 1.1763, + "step": 1700 + }, + { + "epoch": 0.08, + "learning_rate": 1.989027009232988e-05, + "loss": 0.4347, + "step": 1705 + }, + { + "epoch": 0.08, + "learning_rate": 1.9889486307275095e-05, + "loss": 0.3092, + "step": 1710 + }, + { + "epoch": 0.08, + "learning_rate": 1.988870252222031e-05, + "loss": 0.3977, + "step": 1715 + }, + { + "epoch": 0.08, + "learning_rate": 1.9887918737165522e-05, + "loss": 0.3769, + "step": 1720 + }, + { + "epoch": 0.08, + "learning_rate": 1.9887134952110733e-05, + "loss": 0.4335, + "step": 1725 + }, + { + "epoch": 0.08, + "learning_rate": 1.9886351167055947e-05, + "loss": 0.5907, + "step": 1730 + }, + { + "epoch": 0.08, + "learning_rate": 1.988556738200116e-05, + "loss": 0.5505, + "step": 1735 + }, + { + "epoch": 0.08, + "learning_rate": 1.9884783596946375e-05, + "loss": 0.515, + "step": 1740 + }, + { + "epoch": 0.08, + "learning_rate": 1.988399981189159e-05, + "loss": 0.7169, + "step": 1745 + }, + { + "epoch": 0.08, + "learning_rate": 1.9883216026836802e-05, + "loss": 1.006, + "step": 1750 + }, + { + "epoch": 0.08, + "learning_rate": 1.9882432241782016e-05, + "loss": 0.5365, + "step": 1755 + }, + { + "epoch": 0.08, + "learning_rate": 1.988164845672723e-05, + "loss": 0.2144, + "step": 1760 + }, + { + "epoch": 0.08, + "learning_rate": 1.988086467167244e-05, + "loss": 0.4321, + "step": 1765 + }, + { + "epoch": 0.08, + "learning_rate": 1.9880080886617658e-05, + "loss": 0.4422, + "step": 1770 + }, + { + "epoch": 0.08, + "learning_rate": 1.987929710156287e-05, + "loss": 0.3549, + "step": 1775 + }, + { + "epoch": 0.08, + "learning_rate": 1.9878513316508082e-05, + "loss": 0.418, + "step": 1780 + }, + { + "epoch": 0.08, + "learning_rate": 1.9877729531453296e-05, + "loss": 0.5311, + "step": 1785 + }, + { + "epoch": 0.08, + "learning_rate": 1.9876945746398507e-05, + "loss": 0.5819, + "step": 1790 + }, + { + "epoch": 0.08, + "learning_rate": 1.9876161961343724e-05, + "loss": 0.7497, + "step": 1795 + }, + { + "epoch": 0.08, + "learning_rate": 1.9875378176288935e-05, + "loss": 1.267, + "step": 1800 + }, + { + "epoch": 0.08, + "learning_rate": 1.987459439123415e-05, + "loss": 0.5148, + "step": 1805 + }, + { + "epoch": 0.08, + "learning_rate": 1.9873810606179363e-05, + "loss": 0.3745, + "step": 1810 + }, + { + "epoch": 0.08, + "learning_rate": 1.9873026821124576e-05, + "loss": 0.3499, + "step": 1815 + }, + { + "epoch": 0.08, + "learning_rate": 1.987224303606979e-05, + "loss": 0.3857, + "step": 1820 + }, + { + "epoch": 0.09, + "learning_rate": 1.9871459251015004e-05, + "loss": 0.3592, + "step": 1825 + }, + { + "epoch": 0.09, + "learning_rate": 1.9870675465960215e-05, + "loss": 0.4503, + "step": 1830 + }, + { + "epoch": 0.09, + "learning_rate": 1.9869891680905432e-05, + "loss": 0.5993, + "step": 1835 + }, + { + "epoch": 0.09, + "learning_rate": 1.9869107895850643e-05, + "loss": 0.7032, + "step": 1840 + }, + { + "epoch": 0.09, + "learning_rate": 1.9868324110795856e-05, + "loss": 0.7298, + "step": 1845 + }, + { + "epoch": 0.09, + "learning_rate": 1.986754032574107e-05, + "loss": 1.0798, + "step": 1850 + }, + { + "epoch": 0.09, + "learning_rate": 1.9866756540686284e-05, + "loss": 0.5357, + "step": 1855 + }, + { + "epoch": 0.09, + "learning_rate": 1.9865972755631498e-05, + "loss": 0.3394, + "step": 1860 + }, + { + "epoch": 0.09, + "learning_rate": 1.986518897057671e-05, + "loss": 0.2815, + "step": 1865 + }, + { + "epoch": 0.09, + "learning_rate": 1.9864405185521926e-05, + "loss": 0.3833, + "step": 1870 + }, + { + "epoch": 0.09, + "learning_rate": 1.9863621400467137e-05, + "loss": 0.3657, + "step": 1875 + }, + { + "epoch": 0.09, + "learning_rate": 1.986283761541235e-05, + "loss": 0.4627, + "step": 1880 + }, + { + "epoch": 0.09, + "learning_rate": 1.9862053830357564e-05, + "loss": 0.5369, + "step": 1885 + }, + { + "epoch": 0.09, + "learning_rate": 1.9861270045302778e-05, + "loss": 0.6602, + "step": 1890 + }, + { + "epoch": 0.09, + "learning_rate": 1.9860486260247992e-05, + "loss": 0.5755, + "step": 1895 + }, + { + "epoch": 0.09, + "learning_rate": 1.9859702475193206e-05, + "loss": 1.0243, + "step": 1900 + }, + { + "epoch": 0.09, + "learning_rate": 1.9858918690138417e-05, + "loss": 0.6036, + "step": 1905 + }, + { + "epoch": 0.09, + "learning_rate": 1.9858134905083634e-05, + "loss": 0.2902, + "step": 1910 + }, + { + "epoch": 0.09, + "learning_rate": 1.9857351120028844e-05, + "loss": 0.3318, + "step": 1915 + }, + { + "epoch": 0.09, + "learning_rate": 1.9856567334974058e-05, + "loss": 0.3193, + "step": 1920 + }, + { + "epoch": 0.09, + "learning_rate": 1.9855783549919272e-05, + "loss": 0.4973, + "step": 1925 + }, + { + "epoch": 0.09, + "learning_rate": 1.9854999764864483e-05, + "loss": 0.5275, + "step": 1930 + }, + { + "epoch": 0.09, + "learning_rate": 1.98542159798097e-05, + "loss": 0.4504, + "step": 1935 + }, + { + "epoch": 0.09, + "learning_rate": 1.985343219475491e-05, + "loss": 0.528, + "step": 1940 + }, + { + "epoch": 0.09, + "learning_rate": 1.9852648409700124e-05, + "loss": 0.8864, + "step": 1945 + }, + { + "epoch": 0.09, + "learning_rate": 1.985186462464534e-05, + "loss": 0.9087, + "step": 1950 + }, + { + "epoch": 0.09, + "learning_rate": 1.9851080839590552e-05, + "loss": 0.5253, + "step": 1955 + }, + { + "epoch": 0.09, + "learning_rate": 1.9850297054535766e-05, + "loss": 0.3372, + "step": 1960 + }, + { + "epoch": 0.09, + "learning_rate": 1.984951326948098e-05, + "loss": 0.3391, + "step": 1965 + }, + { + "epoch": 0.09, + "learning_rate": 1.9848729484426194e-05, + "loss": 0.3418, + "step": 1970 + }, + { + "epoch": 0.09, + "learning_rate": 1.9847945699371408e-05, + "loss": 0.3695, + "step": 1975 + }, + { + "epoch": 0.09, + "learning_rate": 1.984716191431662e-05, + "loss": 0.4408, + "step": 1980 + }, + { + "epoch": 0.09, + "learning_rate": 1.9846378129261832e-05, + "loss": 0.4869, + "step": 1985 + }, + { + "epoch": 0.09, + "learning_rate": 1.9845594344207046e-05, + "loss": 0.4849, + "step": 1990 + }, + { + "epoch": 0.09, + "learning_rate": 1.984481055915226e-05, + "loss": 0.6679, + "step": 1995 + }, + { + "epoch": 0.09, + "learning_rate": 1.9844026774097474e-05, + "loss": 0.9504, + "step": 2000 + }, + { + "epoch": 0.09, + "learning_rate": 1.9843242989042685e-05, + "loss": 0.4752, + "step": 2005 + }, + { + "epoch": 0.09, + "learning_rate": 1.9842459203987902e-05, + "loss": 0.2645, + "step": 2010 + }, + { + "epoch": 0.09, + "learning_rate": 1.9841675418933112e-05, + "loss": 0.2796, + "step": 2015 + }, + { + "epoch": 0.09, + "learning_rate": 1.9840891633878326e-05, + "loss": 0.4134, + "step": 2020 + }, + { + "epoch": 0.09, + "learning_rate": 1.984010784882354e-05, + "loss": 0.4873, + "step": 2025 + }, + { + "epoch": 0.09, + "learning_rate": 1.9839324063768754e-05, + "loss": 0.5061, + "step": 2030 + }, + { + "epoch": 0.09, + "learning_rate": 1.9838540278713968e-05, + "loss": 0.5597, + "step": 2035 + }, + { + "epoch": 0.1, + "learning_rate": 1.9837756493659182e-05, + "loss": 0.5494, + "step": 2040 + }, + { + "epoch": 0.1, + "learning_rate": 1.9836972708604392e-05, + "loss": 0.7516, + "step": 2045 + }, + { + "epoch": 0.1, + "learning_rate": 1.9836188923549606e-05, + "loss": 1.0119, + "step": 2050 + }, + { + "epoch": 0.1, + "learning_rate": 1.983540513849482e-05, + "loss": 0.4939, + "step": 2055 + }, + { + "epoch": 0.1, + "learning_rate": 1.9834621353440034e-05, + "loss": 0.3384, + "step": 2060 + }, + { + "epoch": 0.1, + "learning_rate": 1.9833837568385248e-05, + "loss": 0.292, + "step": 2065 + }, + { + "epoch": 0.1, + "learning_rate": 1.9833053783330462e-05, + "loss": 0.3244, + "step": 2070 + }, + { + "epoch": 0.1, + "learning_rate": 1.9832269998275676e-05, + "loss": 0.3806, + "step": 2075 + }, + { + "epoch": 0.1, + "learning_rate": 1.9831486213220886e-05, + "loss": 0.4527, + "step": 2080 + }, + { + "epoch": 0.1, + "learning_rate": 1.9830702428166104e-05, + "loss": 0.5149, + "step": 2085 + }, + { + "epoch": 0.1, + "learning_rate": 1.9829918643111314e-05, + "loss": 0.7397, + "step": 2090 + }, + { + "epoch": 0.1, + "learning_rate": 1.9829134858056528e-05, + "loss": 0.7535, + "step": 2095 + }, + { + "epoch": 0.1, + "learning_rate": 1.9828351073001742e-05, + "loss": 0.9471, + "step": 2100 + }, + { + "epoch": 0.1, + "learning_rate": 1.9827567287946956e-05, + "loss": 0.4706, + "step": 2105 + }, + { + "epoch": 0.1, + "learning_rate": 1.982678350289217e-05, + "loss": 0.3369, + "step": 2110 + }, + { + "epoch": 0.1, + "learning_rate": 1.982599971783738e-05, + "loss": 0.3418, + "step": 2115 + }, + { + "epoch": 0.1, + "learning_rate": 1.9825215932782594e-05, + "loss": 0.3639, + "step": 2120 + }, + { + "epoch": 0.1, + "learning_rate": 1.9824432147727808e-05, + "loss": 0.4024, + "step": 2125 + }, + { + "epoch": 0.1, + "learning_rate": 1.9823648362673022e-05, + "loss": 0.4048, + "step": 2130 + }, + { + "epoch": 0.1, + "learning_rate": 1.9822864577618236e-05, + "loss": 0.6293, + "step": 2135 + }, + { + "epoch": 0.1, + "learning_rate": 1.982208079256345e-05, + "loss": 0.6106, + "step": 2140 + }, + { + "epoch": 0.1, + "learning_rate": 1.982129700750866e-05, + "loss": 0.6771, + "step": 2145 + }, + { + "epoch": 0.1, + "learning_rate": 1.9820513222453878e-05, + "loss": 1.2044, + "step": 2150 + }, + { + "epoch": 0.1, + "learning_rate": 1.9819729437399088e-05, + "loss": 0.5152, + "step": 2155 + }, + { + "epoch": 0.1, + "learning_rate": 1.9818945652344302e-05, + "loss": 0.2559, + "step": 2160 + }, + { + "epoch": 0.1, + "learning_rate": 1.9818161867289516e-05, + "loss": 0.2496, + "step": 2165 + }, + { + "epoch": 0.1, + "learning_rate": 1.981737808223473e-05, + "loss": 0.2577, + "step": 2170 + }, + { + "epoch": 0.1, + "learning_rate": 1.9816594297179944e-05, + "loss": 0.5568, + "step": 2175 + }, + { + "epoch": 0.1, + "learning_rate": 1.9815810512125154e-05, + "loss": 0.4338, + "step": 2180 + }, + { + "epoch": 0.1, + "learning_rate": 1.981502672707037e-05, + "loss": 0.6521, + "step": 2185 + }, + { + "epoch": 0.1, + "learning_rate": 1.9814242942015582e-05, + "loss": 0.5643, + "step": 2190 + }, + { + "epoch": 0.1, + "learning_rate": 1.9813459156960796e-05, + "loss": 0.4993, + "step": 2195 + }, + { + "epoch": 0.1, + "learning_rate": 1.981267537190601e-05, + "loss": 1.2342, + "step": 2200 + }, + { + "epoch": 0.1, + "learning_rate": 1.9811891586851224e-05, + "loss": 0.4659, + "step": 2205 + }, + { + "epoch": 0.1, + "learning_rate": 1.9811107801796438e-05, + "loss": 0.2984, + "step": 2210 + }, + { + "epoch": 0.1, + "learning_rate": 1.981032401674165e-05, + "loss": 0.3362, + "step": 2215 + }, + { + "epoch": 0.1, + "learning_rate": 1.9809540231686862e-05, + "loss": 0.2982, + "step": 2220 + }, + { + "epoch": 0.1, + "learning_rate": 1.980875644663208e-05, + "loss": 0.3995, + "step": 2225 + }, + { + "epoch": 0.1, + "learning_rate": 1.980797266157729e-05, + "loss": 0.4959, + "step": 2230 + }, + { + "epoch": 0.1, + "learning_rate": 1.9807188876522504e-05, + "loss": 0.5604, + "step": 2235 + }, + { + "epoch": 0.1, + "learning_rate": 1.9806405091467718e-05, + "loss": 0.6278, + "step": 2240 + }, + { + "epoch": 0.1, + "learning_rate": 1.9805621306412928e-05, + "loss": 0.751, + "step": 2245 + }, + { + "epoch": 0.1, + "learning_rate": 1.9804837521358146e-05, + "loss": 1.2158, + "step": 2250 + }, + { + "epoch": 0.11, + "learning_rate": 1.9804053736303356e-05, + "loss": 0.441, + "step": 2255 + }, + { + "epoch": 0.11, + "learning_rate": 1.980326995124857e-05, + "loss": 0.2743, + "step": 2260 + }, + { + "epoch": 0.11, + "learning_rate": 1.9802486166193784e-05, + "loss": 0.4457, + "step": 2265 + }, + { + "epoch": 0.11, + "learning_rate": 1.9801702381138998e-05, + "loss": 0.4269, + "step": 2270 + }, + { + "epoch": 0.11, + "learning_rate": 1.9800918596084212e-05, + "loss": 0.3709, + "step": 2275 + }, + { + "epoch": 0.11, + "learning_rate": 1.9800134811029426e-05, + "loss": 0.4595, + "step": 2280 + }, + { + "epoch": 0.11, + "learning_rate": 1.979935102597464e-05, + "loss": 0.6209, + "step": 2285 + }, + { + "epoch": 0.11, + "learning_rate": 1.9798567240919853e-05, + "loss": 0.5724, + "step": 2290 + }, + { + "epoch": 0.11, + "learning_rate": 1.9797783455865064e-05, + "loss": 0.7098, + "step": 2295 + }, + { + "epoch": 0.11, + "learning_rate": 1.979699967081028e-05, + "loss": 1.0224, + "step": 2300 + }, + { + "epoch": 0.11, + "learning_rate": 1.9796215885755492e-05, + "loss": 0.4748, + "step": 2305 + }, + { + "epoch": 0.11, + "learning_rate": 1.9795432100700706e-05, + "loss": 0.188, + "step": 2310 + }, + { + "epoch": 0.11, + "learning_rate": 1.979464831564592e-05, + "loss": 0.2832, + "step": 2315 + }, + { + "epoch": 0.11, + "learning_rate": 1.979386453059113e-05, + "loss": 0.3772, + "step": 2320 + }, + { + "epoch": 0.11, + "learning_rate": 1.9793080745536347e-05, + "loss": 0.3791, + "step": 2325 + }, + { + "epoch": 0.11, + "learning_rate": 1.9792296960481558e-05, + "loss": 0.4206, + "step": 2330 + }, + { + "epoch": 0.11, + "learning_rate": 1.9791513175426772e-05, + "loss": 0.591, + "step": 2335 + }, + { + "epoch": 0.11, + "learning_rate": 1.9790729390371986e-05, + "loss": 0.5566, + "step": 2340 + }, + { + "epoch": 0.11, + "learning_rate": 1.97899456053172e-05, + "loss": 0.7117, + "step": 2345 + }, + { + "epoch": 0.11, + "learning_rate": 1.9789161820262414e-05, + "loss": 0.9647, + "step": 2350 + }, + { + "epoch": 0.11, + "learning_rate": 1.9788378035207627e-05, + "loss": 0.4322, + "step": 2355 + }, + { + "epoch": 0.11, + "learning_rate": 1.9787594250152838e-05, + "loss": 0.3008, + "step": 2360 + }, + { + "epoch": 0.11, + "learning_rate": 1.9786810465098055e-05, + "loss": 0.3233, + "step": 2365 + }, + { + "epoch": 0.11, + "learning_rate": 1.9786026680043266e-05, + "loss": 0.4009, + "step": 2370 + }, + { + "epoch": 0.11, + "learning_rate": 1.978524289498848e-05, + "loss": 0.378, + "step": 2375 + }, + { + "epoch": 0.11, + "learning_rate": 1.9784459109933694e-05, + "loss": 0.4942, + "step": 2380 + }, + { + "epoch": 0.11, + "learning_rate": 1.9783675324878907e-05, + "loss": 0.3688, + "step": 2385 + }, + { + "epoch": 0.11, + "learning_rate": 1.978289153982412e-05, + "loss": 0.6061, + "step": 2390 + }, + { + "epoch": 0.11, + "learning_rate": 1.9782107754769332e-05, + "loss": 0.5694, + "step": 2395 + }, + { + "epoch": 0.11, + "learning_rate": 1.978132396971455e-05, + "loss": 0.9538, + "step": 2400 + }, + { + "epoch": 0.11, + "learning_rate": 1.978054018465976e-05, + "loss": 0.4312, + "step": 2405 + }, + { + "epoch": 0.11, + "learning_rate": 1.9779756399604974e-05, + "loss": 0.2543, + "step": 2410 + }, + { + "epoch": 0.11, + "learning_rate": 1.9778972614550188e-05, + "loss": 0.231, + "step": 2415 + }, + { + "epoch": 0.11, + "learning_rate": 1.97781888294954e-05, + "loss": 0.2848, + "step": 2420 + }, + { + "epoch": 0.11, + "learning_rate": 1.9777405044440615e-05, + "loss": 0.3575, + "step": 2425 + }, + { + "epoch": 0.11, + "learning_rate": 1.977662125938583e-05, + "loss": 0.4481, + "step": 2430 + }, + { + "epoch": 0.11, + "learning_rate": 1.977583747433104e-05, + "loss": 0.4305, + "step": 2435 + }, + { + "epoch": 0.11, + "learning_rate": 1.9775053689276254e-05, + "loss": 0.7923, + "step": 2440 + }, + { + "epoch": 0.11, + "learning_rate": 1.9774269904221468e-05, + "loss": 0.5847, + "step": 2445 + }, + { + "epoch": 0.11, + "learning_rate": 1.977348611916668e-05, + "loss": 1.0685, + "step": 2450 + }, + { + "epoch": 0.11, + "learning_rate": 1.9772702334111895e-05, + "loss": 0.5119, + "step": 2455 + }, + { + "epoch": 0.11, + "learning_rate": 1.9771918549057106e-05, + "loss": 0.2529, + "step": 2460 + }, + { + "epoch": 0.12, + "learning_rate": 1.9771134764002323e-05, + "loss": 0.2534, + "step": 2465 + }, + { + "epoch": 0.12, + "learning_rate": 1.9770350978947534e-05, + "loss": 0.4104, + "step": 2470 + }, + { + "epoch": 0.12, + "learning_rate": 1.9769567193892748e-05, + "loss": 0.4233, + "step": 2475 + }, + { + "epoch": 0.12, + "learning_rate": 1.976878340883796e-05, + "loss": 0.3437, + "step": 2480 + }, + { + "epoch": 0.12, + "learning_rate": 1.9767999623783175e-05, + "loss": 0.5363, + "step": 2485 + }, + { + "epoch": 0.12, + "learning_rate": 1.976721583872839e-05, + "loss": 0.6265, + "step": 2490 + }, + { + "epoch": 0.12, + "learning_rate": 1.9766432053673603e-05, + "loss": 0.6902, + "step": 2495 + }, + { + "epoch": 0.12, + "learning_rate": 1.9765648268618817e-05, + "loss": 1.1218, + "step": 2500 + }, + { + "epoch": 0.12, + "learning_rate": 1.9764864483564028e-05, + "loss": 0.3805, + "step": 2505 + }, + { + "epoch": 0.12, + "learning_rate": 1.976408069850924e-05, + "loss": 0.2308, + "step": 2510 + }, + { + "epoch": 0.12, + "learning_rate": 1.9763296913454455e-05, + "loss": 0.2854, + "step": 2515 + }, + { + "epoch": 0.12, + "learning_rate": 1.976251312839967e-05, + "loss": 0.3471, + "step": 2520 + }, + { + "epoch": 0.12, + "learning_rate": 1.9761729343344883e-05, + "loss": 0.418, + "step": 2525 + }, + { + "epoch": 0.12, + "learning_rate": 1.9760945558290097e-05, + "loss": 0.3995, + "step": 2530 + }, + { + "epoch": 0.12, + "learning_rate": 1.9760161773235308e-05, + "loss": 0.5414, + "step": 2535 + }, + { + "epoch": 0.12, + "learning_rate": 1.9759377988180525e-05, + "loss": 0.4674, + "step": 2540 + }, + { + "epoch": 0.12, + "learning_rate": 1.9758594203125736e-05, + "loss": 0.548, + "step": 2545 + }, + { + "epoch": 0.12, + "learning_rate": 1.975781041807095e-05, + "loss": 1.1554, + "step": 2550 + }, + { + "epoch": 0.12, + "learning_rate": 1.9757026633016163e-05, + "loss": 0.4927, + "step": 2555 + }, + { + "epoch": 0.12, + "learning_rate": 1.9756242847961377e-05, + "loss": 0.2384, + "step": 2560 + }, + { + "epoch": 0.12, + "learning_rate": 1.975545906290659e-05, + "loss": 0.2467, + "step": 2565 + }, + { + "epoch": 0.12, + "learning_rate": 1.97546752778518e-05, + "loss": 0.2739, + "step": 2570 + }, + { + "epoch": 0.12, + "learning_rate": 1.9753891492797016e-05, + "loss": 0.4415, + "step": 2575 + }, + { + "epoch": 0.12, + "learning_rate": 1.975310770774223e-05, + "loss": 0.3919, + "step": 2580 + }, + { + "epoch": 0.12, + "learning_rate": 1.9752323922687443e-05, + "loss": 0.4875, + "step": 2585 + }, + { + "epoch": 0.12, + "learning_rate": 1.9751540137632657e-05, + "loss": 0.5478, + "step": 2590 + }, + { + "epoch": 0.12, + "learning_rate": 1.975075635257787e-05, + "loss": 0.6074, + "step": 2595 + }, + { + "epoch": 0.12, + "learning_rate": 1.9749972567523085e-05, + "loss": 0.7679, + "step": 2600 + }, + { + "epoch": 0.12, + "learning_rate": 1.97491887824683e-05, + "loss": 0.5413, + "step": 2605 + }, + { + "epoch": 0.12, + "learning_rate": 1.974840499741351e-05, + "loss": 0.2023, + "step": 2610 + }, + { + "epoch": 0.12, + "learning_rate": 1.9747621212358727e-05, + "loss": 0.2183, + "step": 2615 + }, + { + "epoch": 0.12, + "learning_rate": 1.9746837427303937e-05, + "loss": 0.327, + "step": 2620 + }, + { + "epoch": 0.12, + "learning_rate": 1.974605364224915e-05, + "loss": 0.3329, + "step": 2625 + }, + { + "epoch": 0.12, + "learning_rate": 1.9745269857194365e-05, + "loss": 0.3636, + "step": 2630 + }, + { + "epoch": 0.12, + "learning_rate": 1.9744486072139576e-05, + "loss": 0.4454, + "step": 2635 + }, + { + "epoch": 0.12, + "learning_rate": 1.9743702287084793e-05, + "loss": 0.5881, + "step": 2640 + }, + { + "epoch": 0.12, + "learning_rate": 1.9742918502030003e-05, + "loss": 0.6546, + "step": 2645 + }, + { + "epoch": 0.12, + "learning_rate": 1.9742134716975217e-05, + "loss": 0.9867, + "step": 2650 + }, + { + "epoch": 0.12, + "learning_rate": 1.974135093192043e-05, + "loss": 0.4603, + "step": 2655 + }, + { + "epoch": 0.12, + "learning_rate": 1.9740567146865645e-05, + "loss": 0.2764, + "step": 2660 + }, + { + "epoch": 0.12, + "learning_rate": 1.973978336181086e-05, + "loss": 0.2423, + "step": 2665 + }, + { + "epoch": 0.12, + "learning_rate": 1.9738999576756073e-05, + "loss": 0.2468, + "step": 2670 + }, + { + "epoch": 0.12, + "learning_rate": 1.9738215791701284e-05, + "loss": 0.365, + "step": 2675 + }, + { + "epoch": 0.13, + "learning_rate": 1.97374320066465e-05, + "loss": 0.4589, + "step": 2680 + }, + { + "epoch": 0.13, + "learning_rate": 1.973664822159171e-05, + "loss": 0.4907, + "step": 2685 + }, + { + "epoch": 0.13, + "learning_rate": 1.9735864436536925e-05, + "loss": 0.5466, + "step": 2690 + }, + { + "epoch": 0.13, + "learning_rate": 1.973508065148214e-05, + "loss": 0.4969, + "step": 2695 + }, + { + "epoch": 0.13, + "learning_rate": 1.9734296866427353e-05, + "loss": 1.0733, + "step": 2700 + }, + { + "epoch": 0.13, + "learning_rate": 1.9733513081372567e-05, + "loss": 0.4684, + "step": 2705 + }, + { + "epoch": 0.13, + "learning_rate": 1.9732729296317777e-05, + "loss": 0.2367, + "step": 2710 + }, + { + "epoch": 0.13, + "learning_rate": 1.9731945511262995e-05, + "loss": 0.2683, + "step": 2715 + }, + { + "epoch": 0.13, + "learning_rate": 1.9731161726208205e-05, + "loss": 0.2503, + "step": 2720 + }, + { + "epoch": 0.13, + "learning_rate": 1.973037794115342e-05, + "loss": 0.3385, + "step": 2725 + }, + { + "epoch": 0.13, + "learning_rate": 1.9729594156098633e-05, + "loss": 0.3533, + "step": 2730 + }, + { + "epoch": 0.13, + "learning_rate": 1.9728810371043847e-05, + "loss": 0.434, + "step": 2735 + }, + { + "epoch": 0.13, + "learning_rate": 1.972802658598906e-05, + "loss": 0.4045, + "step": 2740 + }, + { + "epoch": 0.13, + "learning_rate": 1.9727242800934275e-05, + "loss": 0.5531, + "step": 2745 + }, + { + "epoch": 0.13, + "learning_rate": 1.9726459015879485e-05, + "loss": 0.8087, + "step": 2750 + }, + { + "epoch": 0.13, + "learning_rate": 1.9725675230824703e-05, + "loss": 0.4245, + "step": 2755 + }, + { + "epoch": 0.13, + "learning_rate": 1.9724891445769913e-05, + "loss": 0.2777, + "step": 2760 + }, + { + "epoch": 0.13, + "learning_rate": 1.9724107660715127e-05, + "loss": 0.3036, + "step": 2765 + }, + { + "epoch": 0.13, + "learning_rate": 1.972332387566034e-05, + "loss": 0.4066, + "step": 2770 + }, + { + "epoch": 0.13, + "learning_rate": 1.972254009060555e-05, + "loss": 0.3239, + "step": 2775 + }, + { + "epoch": 0.13, + "learning_rate": 1.972175630555077e-05, + "loss": 0.3395, + "step": 2780 + }, + { + "epoch": 0.13, + "learning_rate": 1.972097252049598e-05, + "loss": 0.4637, + "step": 2785 + }, + { + "epoch": 0.13, + "learning_rate": 1.9720188735441193e-05, + "loss": 0.486, + "step": 2790 + }, + { + "epoch": 0.13, + "learning_rate": 1.9719404950386407e-05, + "loss": 0.6314, + "step": 2795 + }, + { + "epoch": 0.13, + "learning_rate": 1.971862116533162e-05, + "loss": 0.8526, + "step": 2800 + }, + { + "epoch": 0.13, + "learning_rate": 1.9717837380276835e-05, + "loss": 0.4288, + "step": 2805 + }, + { + "epoch": 0.13, + "learning_rate": 1.971705359522205e-05, + "loss": 0.227, + "step": 2810 + }, + { + "epoch": 0.13, + "learning_rate": 1.9716269810167263e-05, + "loss": 0.2939, + "step": 2815 + }, + { + "epoch": 0.13, + "learning_rate": 1.9715486025112477e-05, + "loss": 0.2735, + "step": 2820 + }, + { + "epoch": 0.13, + "learning_rate": 1.9714702240057687e-05, + "loss": 0.3667, + "step": 2825 + }, + { + "epoch": 0.13, + "learning_rate": 1.97139184550029e-05, + "loss": 0.385, + "step": 2830 + }, + { + "epoch": 0.13, + "learning_rate": 1.9713134669948115e-05, + "loss": 0.3763, + "step": 2835 + }, + { + "epoch": 0.13, + "learning_rate": 1.971235088489333e-05, + "loss": 0.4141, + "step": 2840 + }, + { + "epoch": 0.13, + "learning_rate": 1.9711567099838543e-05, + "loss": 0.6103, + "step": 2845 + }, + { + "epoch": 0.13, + "learning_rate": 1.9710783314783753e-05, + "loss": 0.7695, + "step": 2850 + }, + { + "epoch": 0.13, + "learning_rate": 1.970999952972897e-05, + "loss": 0.4827, + "step": 2855 + }, + { + "epoch": 0.13, + "learning_rate": 1.970921574467418e-05, + "loss": 0.2578, + "step": 2860 + }, + { + "epoch": 0.13, + "learning_rate": 1.9708431959619395e-05, + "loss": 0.2754, + "step": 2865 + }, + { + "epoch": 0.13, + "learning_rate": 1.970764817456461e-05, + "loss": 0.2874, + "step": 2870 + }, + { + "epoch": 0.13, + "learning_rate": 1.9706864389509823e-05, + "loss": 0.2498, + "step": 2875 + }, + { + "epoch": 0.13, + "learning_rate": 1.9706080604455037e-05, + "loss": 0.5192, + "step": 2880 + }, + { + "epoch": 0.13, + "learning_rate": 1.970529681940025e-05, + "loss": 0.3749, + "step": 2885 + }, + { + "epoch": 0.13, + "learning_rate": 1.970451303434546e-05, + "loss": 0.5461, + "step": 2890 + }, + { + "epoch": 0.14, + "learning_rate": 1.9703729249290675e-05, + "loss": 0.6363, + "step": 2895 + }, + { + "epoch": 0.14, + "learning_rate": 1.970294546423589e-05, + "loss": 1.0204, + "step": 2900 + }, + { + "epoch": 0.14, + "learning_rate": 1.9702161679181103e-05, + "loss": 0.4849, + "step": 2905 + }, + { + "epoch": 0.14, + "learning_rate": 1.9701377894126317e-05, + "loss": 0.1746, + "step": 2910 + }, + { + "epoch": 0.14, + "learning_rate": 1.970059410907153e-05, + "loss": 0.259, + "step": 2915 + }, + { + "epoch": 0.14, + "learning_rate": 1.9699810324016745e-05, + "loss": 0.3586, + "step": 2920 + }, + { + "epoch": 0.14, + "learning_rate": 1.9699026538961955e-05, + "loss": 0.3875, + "step": 2925 + }, + { + "epoch": 0.14, + "learning_rate": 1.9698242753907172e-05, + "loss": 0.3089, + "step": 2930 + }, + { + "epoch": 0.14, + "learning_rate": 1.9697458968852383e-05, + "loss": 0.4891, + "step": 2935 + }, + { + "epoch": 0.14, + "learning_rate": 1.9696675183797597e-05, + "loss": 0.4279, + "step": 2940 + }, + { + "epoch": 0.14, + "learning_rate": 1.969589139874281e-05, + "loss": 0.6177, + "step": 2945 + }, + { + "epoch": 0.14, + "learning_rate": 1.9695107613688025e-05, + "loss": 0.8893, + "step": 2950 + }, + { + "epoch": 0.14, + "learning_rate": 1.969432382863324e-05, + "loss": 0.3806, + "step": 2955 + }, + { + "epoch": 0.14, + "learning_rate": 1.969354004357845e-05, + "loss": 0.2398, + "step": 2960 + }, + { + "epoch": 0.14, + "learning_rate": 1.9692756258523663e-05, + "loss": 0.2188, + "step": 2965 + }, + { + "epoch": 0.14, + "learning_rate": 1.9691972473468877e-05, + "loss": 0.3182, + "step": 2970 + }, + { + "epoch": 0.14, + "learning_rate": 1.969118868841409e-05, + "loss": 0.3459, + "step": 2975 + }, + { + "epoch": 0.14, + "learning_rate": 1.9690404903359305e-05, + "loss": 0.2882, + "step": 2980 + }, + { + "epoch": 0.14, + "learning_rate": 1.968962111830452e-05, + "loss": 0.3212, + "step": 2985 + }, + { + "epoch": 0.14, + "learning_rate": 1.968899409026069e-05, + "loss": 0.4588, + "step": 2990 + }, + { + "epoch": 0.14, + "learning_rate": 1.9688210305205903e-05, + "loss": 0.6138, + "step": 2995 + }, + { + "epoch": 0.14, + "learning_rate": 1.9687426520151117e-05, + "loss": 0.837, + "step": 3000 + }, + { + "epoch": 0.14, + "learning_rate": 1.9686642735096327e-05, + "loss": 0.3779, + "step": 3005 + }, + { + "epoch": 0.14, + "learning_rate": 1.9685858950041545e-05, + "loss": 0.2237, + "step": 3010 + }, + { + "epoch": 0.14, + "learning_rate": 1.9685075164986755e-05, + "loss": 0.3279, + "step": 3015 + }, + { + "epoch": 0.14, + "learning_rate": 1.968429137993197e-05, + "loss": 0.2848, + "step": 3020 + }, + { + "epoch": 0.14, + "learning_rate": 1.9683507594877183e-05, + "loss": 0.3314, + "step": 3025 + }, + { + "epoch": 0.14, + "learning_rate": 1.9682723809822394e-05, + "loss": 0.3744, + "step": 3030 + }, + { + "epoch": 0.14, + "learning_rate": 1.968194002476761e-05, + "loss": 0.4589, + "step": 3035 + }, + { + "epoch": 0.14, + "learning_rate": 1.968115623971282e-05, + "loss": 0.6012, + "step": 3040 + }, + { + "epoch": 0.14, + "learning_rate": 1.9680372454658035e-05, + "loss": 0.625, + "step": 3045 + }, + { + "epoch": 0.14, + "learning_rate": 1.967958866960325e-05, + "loss": 0.9265, + "step": 3050 + }, + { + "epoch": 0.14, + "learning_rate": 1.9678804884548463e-05, + "loss": 0.4029, + "step": 3055 + }, + { + "epoch": 0.14, + "learning_rate": 1.9678021099493677e-05, + "loss": 0.2582, + "step": 3060 + }, + { + "epoch": 0.14, + "learning_rate": 1.967723731443889e-05, + "loss": 0.3223, + "step": 3065 + }, + { + "epoch": 0.14, + "learning_rate": 1.96764535293841e-05, + "loss": 0.2428, + "step": 3070 + }, + { + "epoch": 0.14, + "learning_rate": 1.967566974432932e-05, + "loss": 0.3014, + "step": 3075 + }, + { + "epoch": 0.14, + "learning_rate": 1.967488595927453e-05, + "loss": 0.452, + "step": 3080 + }, + { + "epoch": 0.14, + "learning_rate": 1.9674102174219743e-05, + "loss": 0.3358, + "step": 3085 + }, + { + "epoch": 0.14, + "learning_rate": 1.9673318389164957e-05, + "loss": 0.4613, + "step": 3090 + }, + { + "epoch": 0.14, + "learning_rate": 1.967253460411017e-05, + "loss": 0.5618, + "step": 3095 + }, + { + "epoch": 0.14, + "learning_rate": 1.9671750819055385e-05, + "loss": 0.7322, + "step": 3100 + }, + { + "epoch": 0.14, + "learning_rate": 1.9670967034000595e-05, + "loss": 0.3473, + "step": 3105 + }, + { + "epoch": 0.15, + "learning_rate": 1.9670183248945813e-05, + "loss": 0.2555, + "step": 3110 + }, + { + "epoch": 0.15, + "learning_rate": 1.9669399463891023e-05, + "loss": 0.2629, + "step": 3115 + }, + { + "epoch": 0.15, + "learning_rate": 1.9668615678836237e-05, + "loss": 0.3049, + "step": 3120 + }, + { + "epoch": 0.15, + "learning_rate": 1.966783189378145e-05, + "loss": 0.3242, + "step": 3125 + }, + { + "epoch": 0.15, + "learning_rate": 1.9667048108726665e-05, + "loss": 0.3861, + "step": 3130 + }, + { + "epoch": 0.15, + "learning_rate": 1.966626432367188e-05, + "loss": 0.4608, + "step": 3135 + }, + { + "epoch": 0.15, + "learning_rate": 1.9665480538617093e-05, + "loss": 0.4212, + "step": 3140 + }, + { + "epoch": 0.15, + "learning_rate": 1.9664696753562303e-05, + "loss": 0.6555, + "step": 3145 + }, + { + "epoch": 0.15, + "learning_rate": 1.966391296850752e-05, + "loss": 0.7641, + "step": 3150 + }, + { + "epoch": 0.15, + "learning_rate": 1.966312918345273e-05, + "loss": 0.4017, + "step": 3155 + }, + { + "epoch": 0.15, + "learning_rate": 1.9662345398397945e-05, + "loss": 0.1657, + "step": 3160 + }, + { + "epoch": 0.15, + "learning_rate": 1.966156161334316e-05, + "loss": 0.2806, + "step": 3165 + }, + { + "epoch": 0.15, + "learning_rate": 1.966077782828837e-05, + "loss": 0.2702, + "step": 3170 + }, + { + "epoch": 0.15, + "learning_rate": 1.9659994043233587e-05, + "loss": 0.3138, + "step": 3175 + }, + { + "epoch": 0.15, + "learning_rate": 1.9659210258178797e-05, + "loss": 0.4255, + "step": 3180 + }, + { + "epoch": 0.15, + "learning_rate": 1.965842647312401e-05, + "loss": 0.3488, + "step": 3185 + }, + { + "epoch": 0.15, + "learning_rate": 1.9657642688069225e-05, + "loss": 0.5055, + "step": 3190 + }, + { + "epoch": 0.15, + "learning_rate": 1.965685890301444e-05, + "loss": 0.5626, + "step": 3195 + }, + { + "epoch": 0.15, + "learning_rate": 1.9656075117959653e-05, + "loss": 0.8002, + "step": 3200 + }, + { + "epoch": 0.15, + "learning_rate": 1.9655291332904867e-05, + "loss": 0.4219, + "step": 3205 + }, + { + "epoch": 0.15, + "learning_rate": 1.965450754785008e-05, + "loss": 0.2463, + "step": 3210 + }, + { + "epoch": 0.15, + "learning_rate": 1.9653723762795294e-05, + "loss": 0.2057, + "step": 3215 + }, + { + "epoch": 0.15, + "learning_rate": 1.9652939977740505e-05, + "loss": 0.326, + "step": 3220 + }, + { + "epoch": 0.15, + "learning_rate": 1.965215619268572e-05, + "loss": 0.3563, + "step": 3225 + }, + { + "epoch": 0.15, + "learning_rate": 1.9651372407630933e-05, + "loss": 0.4195, + "step": 3230 + }, + { + "epoch": 0.15, + "learning_rate": 1.9650588622576147e-05, + "loss": 0.3367, + "step": 3235 + }, + { + "epoch": 0.15, + "learning_rate": 1.964980483752136e-05, + "loss": 0.545, + "step": 3240 + }, + { + "epoch": 0.15, + "learning_rate": 1.964902105246657e-05, + "loss": 0.6454, + "step": 3245 + }, + { + "epoch": 0.15, + "learning_rate": 1.964823726741179e-05, + "loss": 0.7225, + "step": 3250 + }, + { + "epoch": 0.15, + "learning_rate": 1.9647453482357e-05, + "loss": 0.4606, + "step": 3255 + }, + { + "epoch": 0.15, + "learning_rate": 1.9646669697302213e-05, + "loss": 0.2679, + "step": 3260 + }, + { + "epoch": 0.15, + "learning_rate": 1.9645885912247427e-05, + "loss": 0.2538, + "step": 3265 + }, + { + "epoch": 0.15, + "learning_rate": 1.964510212719264e-05, + "loss": 0.2758, + "step": 3270 + }, + { + "epoch": 0.15, + "learning_rate": 1.9644318342137855e-05, + "loss": 0.3146, + "step": 3275 + }, + { + "epoch": 0.15, + "learning_rate": 1.964353455708307e-05, + "loss": 0.2646, + "step": 3280 + }, + { + "epoch": 0.15, + "learning_rate": 1.964275077202828e-05, + "loss": 0.3698, + "step": 3285 + }, + { + "epoch": 0.15, + "learning_rate": 1.9641966986973493e-05, + "loss": 0.505, + "step": 3290 + }, + { + "epoch": 0.15, + "learning_rate": 1.9641183201918707e-05, + "loss": 0.5847, + "step": 3295 + }, + { + "epoch": 0.15, + "learning_rate": 1.964039941686392e-05, + "loss": 0.7694, + "step": 3300 + }, + { + "epoch": 0.15, + "learning_rate": 1.9639615631809135e-05, + "loss": 0.4151, + "step": 3305 + }, + { + "epoch": 0.15, + "learning_rate": 1.963883184675435e-05, + "loss": 0.2161, + "step": 3310 + }, + { + "epoch": 0.15, + "learning_rate": 1.9638048061699562e-05, + "loss": 0.3054, + "step": 3315 + }, + { + "epoch": 0.15, + "learning_rate": 1.9637264276644773e-05, + "loss": 0.2491, + "step": 3320 + }, + { + "epoch": 0.16, + "learning_rate": 1.963648049158999e-05, + "loss": 0.2946, + "step": 3325 + }, + { + "epoch": 0.16, + "learning_rate": 1.96356967065352e-05, + "loss": 0.3118, + "step": 3330 + }, + { + "epoch": 0.16, + "learning_rate": 1.9634912921480415e-05, + "loss": 0.463, + "step": 3335 + }, + { + "epoch": 0.16, + "learning_rate": 1.963412913642563e-05, + "loss": 0.3721, + "step": 3340 + }, + { + "epoch": 0.16, + "learning_rate": 1.9633345351370842e-05, + "loss": 0.6009, + "step": 3345 + }, + { + "epoch": 0.16, + "learning_rate": 1.9632561566316056e-05, + "loss": 0.8445, + "step": 3350 + }, + { + "epoch": 0.16, + "learning_rate": 1.9631777781261267e-05, + "loss": 0.4554, + "step": 3355 + }, + { + "epoch": 0.16, + "learning_rate": 1.963099399620648e-05, + "loss": 0.2121, + "step": 3360 + }, + { + "epoch": 0.16, + "learning_rate": 1.9630210211151695e-05, + "loss": 0.2767, + "step": 3365 + }, + { + "epoch": 0.16, + "learning_rate": 1.962942642609691e-05, + "loss": 0.2726, + "step": 3370 + }, + { + "epoch": 0.16, + "learning_rate": 1.9628642641042123e-05, + "loss": 0.4073, + "step": 3375 + }, + { + "epoch": 0.16, + "learning_rate": 1.9627858855987336e-05, + "loss": 0.4101, + "step": 3380 + }, + { + "epoch": 0.16, + "learning_rate": 1.9627075070932547e-05, + "loss": 0.4055, + "step": 3385 + }, + { + "epoch": 0.16, + "learning_rate": 1.9626291285877764e-05, + "loss": 0.4796, + "step": 3390 + }, + { + "epoch": 0.16, + "learning_rate": 1.9625507500822975e-05, + "loss": 0.6178, + "step": 3395 + }, + { + "epoch": 0.16, + "learning_rate": 1.962472371576819e-05, + "loss": 0.8758, + "step": 3400 + }, + { + "epoch": 0.16, + "learning_rate": 1.9623939930713403e-05, + "loss": 0.4028, + "step": 3405 + }, + { + "epoch": 0.16, + "learning_rate": 1.9623156145658616e-05, + "loss": 0.2257, + "step": 3410 + }, + { + "epoch": 0.16, + "learning_rate": 1.962237236060383e-05, + "loss": 0.2765, + "step": 3415 + }, + { + "epoch": 0.16, + "learning_rate": 1.962158857554904e-05, + "loss": 0.2894, + "step": 3420 + }, + { + "epoch": 0.16, + "learning_rate": 1.9620804790494258e-05, + "loss": 0.2995, + "step": 3425 + }, + { + "epoch": 0.16, + "learning_rate": 1.962002100543947e-05, + "loss": 0.3938, + "step": 3430 + }, + { + "epoch": 0.16, + "learning_rate": 1.9619237220384683e-05, + "loss": 0.4108, + "step": 3435 + }, + { + "epoch": 0.16, + "learning_rate": 1.9618453435329897e-05, + "loss": 0.4647, + "step": 3440 + }, + { + "epoch": 0.16, + "learning_rate": 1.961766965027511e-05, + "loss": 0.5126, + "step": 3445 + }, + { + "epoch": 0.16, + "learning_rate": 1.9616885865220324e-05, + "loss": 0.8012, + "step": 3450 + }, + { + "epoch": 0.16, + "learning_rate": 1.9616102080165538e-05, + "loss": 0.4131, + "step": 3455 + }, + { + "epoch": 0.16, + "learning_rate": 1.961531829511075e-05, + "loss": 0.2214, + "step": 3460 + }, + { + "epoch": 0.16, + "learning_rate": 1.9614534510055966e-05, + "loss": 0.2513, + "step": 3465 + }, + { + "epoch": 0.16, + "learning_rate": 1.9613750725001177e-05, + "loss": 0.3865, + "step": 3470 + }, + { + "epoch": 0.16, + "learning_rate": 1.961296693994639e-05, + "loss": 0.3377, + "step": 3475 + }, + { + "epoch": 0.16, + "learning_rate": 1.9612183154891604e-05, + "loss": 0.3707, + "step": 3480 + }, + { + "epoch": 0.16, + "learning_rate": 1.9611399369836818e-05, + "loss": 0.4052, + "step": 3485 + }, + { + "epoch": 0.16, + "learning_rate": 1.9610615584782032e-05, + "loss": 0.4237, + "step": 3490 + }, + { + "epoch": 0.16, + "learning_rate": 1.9609831799727243e-05, + "loss": 0.466, + "step": 3495 + }, + { + "epoch": 0.16, + "learning_rate": 1.9609048014672457e-05, + "loss": 0.791, + "step": 3500 + }, + { + "epoch": 0.16, + "learning_rate": 1.960826422961767e-05, + "loss": 0.3511, + "step": 3505 + }, + { + "epoch": 0.16, + "learning_rate": 1.9607480444562884e-05, + "loss": 0.1677, + "step": 3510 + }, + { + "epoch": 0.16, + "learning_rate": 1.96066966595081e-05, + "loss": 0.2672, + "step": 3515 + }, + { + "epoch": 0.16, + "learning_rate": 1.9605912874453312e-05, + "loss": 0.27, + "step": 3520 + }, + { + "epoch": 0.16, + "learning_rate": 1.9605129089398526e-05, + "loss": 0.3319, + "step": 3525 + }, + { + "epoch": 0.16, + "learning_rate": 1.960434530434374e-05, + "loss": 0.3659, + "step": 3530 + }, + { + "epoch": 0.16, + "learning_rate": 1.960356151928895e-05, + "loss": 0.3154, + "step": 3535 + }, + { + "epoch": 0.17, + "learning_rate": 1.9602777734234168e-05, + "loss": 0.4368, + "step": 3540 + }, + { + "epoch": 0.17, + "learning_rate": 1.960199394917938e-05, + "loss": 0.6238, + "step": 3545 + }, + { + "epoch": 0.17, + "learning_rate": 1.9601210164124592e-05, + "loss": 0.868, + "step": 3550 + }, + { + "epoch": 0.17, + "learning_rate": 1.9600426379069806e-05, + "loss": 0.439, + "step": 3555 + }, + { + "epoch": 0.17, + "learning_rate": 1.9599642594015017e-05, + "loss": 0.1922, + "step": 3560 + }, + { + "epoch": 0.17, + "learning_rate": 1.9598858808960234e-05, + "loss": 0.2233, + "step": 3565 + }, + { + "epoch": 0.17, + "learning_rate": 1.9598075023905445e-05, + "loss": 0.3184, + "step": 3570 + }, + { + "epoch": 0.17, + "learning_rate": 1.959729123885066e-05, + "loss": 0.3508, + "step": 3575 + }, + { + "epoch": 0.17, + "learning_rate": 1.9596507453795872e-05, + "loss": 0.2921, + "step": 3580 + }, + { + "epoch": 0.17, + "learning_rate": 1.9595723668741086e-05, + "loss": 0.3405, + "step": 3585 + }, + { + "epoch": 0.17, + "learning_rate": 1.95949398836863e-05, + "loss": 0.5112, + "step": 3590 + }, + { + "epoch": 0.17, + "learning_rate": 1.9594156098631514e-05, + "loss": 0.4812, + "step": 3595 + }, + { + "epoch": 0.17, + "learning_rate": 1.9593372313576725e-05, + "loss": 0.8224, + "step": 3600 + }, + { + "epoch": 0.17, + "learning_rate": 1.9592588528521942e-05, + "loss": 0.4547, + "step": 3605 + }, + { + "epoch": 0.17, + "learning_rate": 1.9591804743467152e-05, + "loss": 0.1878, + "step": 3610 + }, + { + "epoch": 0.17, + "learning_rate": 1.9591020958412366e-05, + "loss": 0.2636, + "step": 3615 + }, + { + "epoch": 0.17, + "learning_rate": 1.959023717335758e-05, + "loss": 0.2779, + "step": 3620 + }, + { + "epoch": 0.17, + "learning_rate": 1.9589453388302794e-05, + "loss": 0.2307, + "step": 3625 + }, + { + "epoch": 0.17, + "learning_rate": 1.9588669603248008e-05, + "loss": 0.3005, + "step": 3630 + }, + { + "epoch": 0.17, + "learning_rate": 1.958788581819322e-05, + "loss": 0.3427, + "step": 3635 + }, + { + "epoch": 0.17, + "learning_rate": 1.9587102033138436e-05, + "loss": 0.4767, + "step": 3640 + }, + { + "epoch": 0.17, + "learning_rate": 1.9586318248083646e-05, + "loss": 0.4229, + "step": 3645 + }, + { + "epoch": 0.17, + "learning_rate": 1.958553446302886e-05, + "loss": 0.8545, + "step": 3650 + }, + { + "epoch": 0.17, + "learning_rate": 1.9584750677974074e-05, + "loss": 0.4065, + "step": 3655 + }, + { + "epoch": 0.17, + "learning_rate": 1.9583966892919288e-05, + "loss": 0.1509, + "step": 3660 + }, + { + "epoch": 0.17, + "learning_rate": 1.9583183107864502e-05, + "loss": 0.2052, + "step": 3665 + }, + { + "epoch": 0.17, + "learning_rate": 1.9582399322809716e-05, + "loss": 0.2437, + "step": 3670 + }, + { + "epoch": 0.17, + "learning_rate": 1.9581615537754926e-05, + "loss": 0.3728, + "step": 3675 + }, + { + "epoch": 0.17, + "learning_rate": 1.958083175270014e-05, + "loss": 0.3682, + "step": 3680 + }, + { + "epoch": 0.17, + "learning_rate": 1.9580047967645354e-05, + "loss": 0.4861, + "step": 3685 + }, + { + "epoch": 0.17, + "learning_rate": 1.9579264182590568e-05, + "loss": 0.4066, + "step": 3690 + }, + { + "epoch": 0.17, + "learning_rate": 1.9578480397535782e-05, + "loss": 0.45, + "step": 3695 + }, + { + "epoch": 0.17, + "learning_rate": 1.9577696612480993e-05, + "loss": 0.7415, + "step": 3700 + }, + { + "epoch": 0.17, + "learning_rate": 1.957691282742621e-05, + "loss": 0.4888, + "step": 3705 + }, + { + "epoch": 0.17, + "learning_rate": 1.957612904237142e-05, + "loss": 0.2174, + "step": 3710 + }, + { + "epoch": 0.17, + "learning_rate": 1.9575345257316634e-05, + "loss": 0.2577, + "step": 3715 + }, + { + "epoch": 0.17, + "learning_rate": 1.9574561472261848e-05, + "loss": 0.2525, + "step": 3720 + }, + { + "epoch": 0.17, + "learning_rate": 1.9573777687207062e-05, + "loss": 0.3246, + "step": 3725 + }, + { + "epoch": 0.17, + "learning_rate": 1.9572993902152276e-05, + "loss": 0.3091, + "step": 3730 + }, + { + "epoch": 0.17, + "learning_rate": 1.957221011709749e-05, + "loss": 0.3636, + "step": 3735 + }, + { + "epoch": 0.17, + "learning_rate": 1.9571426332042704e-05, + "loss": 0.4908, + "step": 3740 + }, + { + "epoch": 0.17, + "learning_rate": 1.9570642546987914e-05, + "loss": 0.444, + "step": 3745 + }, + { + "epoch": 0.17, + "learning_rate": 1.9569858761933128e-05, + "loss": 0.6554, + "step": 3750 + }, + { + "epoch": 0.18, + "learning_rate": 1.9569074976878342e-05, + "loss": 0.3102, + "step": 3755 + }, + { + "epoch": 0.18, + "learning_rate": 1.9568291191823556e-05, + "loss": 0.2166, + "step": 3760 + }, + { + "epoch": 0.18, + "learning_rate": 1.956750740676877e-05, + "loss": 0.2067, + "step": 3765 + }, + { + "epoch": 0.18, + "learning_rate": 1.9566723621713984e-05, + "loss": 0.2251, + "step": 3770 + }, + { + "epoch": 0.18, + "learning_rate": 1.9565939836659194e-05, + "loss": 0.3112, + "step": 3775 + }, + { + "epoch": 0.18, + "learning_rate": 1.956515605160441e-05, + "loss": 0.3403, + "step": 3780 + }, + { + "epoch": 0.18, + "learning_rate": 1.9564372266549622e-05, + "loss": 0.3457, + "step": 3785 + }, + { + "epoch": 0.18, + "learning_rate": 1.9563588481494836e-05, + "loss": 0.5348, + "step": 3790 + }, + { + "epoch": 0.18, + "learning_rate": 1.956280469644005e-05, + "loss": 0.5904, + "step": 3795 + }, + { + "epoch": 0.18, + "learning_rate": 1.9562020911385264e-05, + "loss": 0.8875, + "step": 3800 + }, + { + "epoch": 0.18, + "learning_rate": 1.9561237126330478e-05, + "loss": 0.3938, + "step": 3805 + }, + { + "epoch": 0.18, + "learning_rate": 1.9560453341275688e-05, + "loss": 0.2407, + "step": 3810 + }, + { + "epoch": 0.18, + "learning_rate": 1.9559669556220902e-05, + "loss": 0.2626, + "step": 3815 + }, + { + "epoch": 0.18, + "learning_rate": 1.9558885771166116e-05, + "loss": 0.2197, + "step": 3820 + }, + { + "epoch": 0.18, + "learning_rate": 1.955810198611133e-05, + "loss": 0.3859, + "step": 3825 + }, + { + "epoch": 0.18, + "learning_rate": 1.9557318201056544e-05, + "loss": 0.3984, + "step": 3830 + }, + { + "epoch": 0.18, + "learning_rate": 1.9556534416001758e-05, + "loss": 0.2618, + "step": 3835 + }, + { + "epoch": 0.18, + "learning_rate": 1.9555750630946972e-05, + "loss": 0.607, + "step": 3840 + }, + { + "epoch": 0.18, + "learning_rate": 1.9554966845892186e-05, + "loss": 0.6173, + "step": 3845 + }, + { + "epoch": 0.18, + "learning_rate": 1.9554183060837396e-05, + "loss": 0.8174, + "step": 3850 + }, + { + "epoch": 0.18, + "learning_rate": 1.9553399275782613e-05, + "loss": 0.3763, + "step": 3855 + }, + { + "epoch": 0.18, + "learning_rate": 1.9552615490727824e-05, + "loss": 0.232, + "step": 3860 + }, + { + "epoch": 0.18, + "learning_rate": 1.9551831705673038e-05, + "loss": 0.1597, + "step": 3865 + }, + { + "epoch": 0.18, + "learning_rate": 1.9551047920618252e-05, + "loss": 0.2861, + "step": 3870 + }, + { + "epoch": 0.18, + "learning_rate": 1.9550264135563462e-05, + "loss": 0.2547, + "step": 3875 + }, + { + "epoch": 0.18, + "learning_rate": 1.954948035050868e-05, + "loss": 0.3221, + "step": 3880 + }, + { + "epoch": 0.18, + "learning_rate": 1.954869656545389e-05, + "loss": 0.3712, + "step": 3885 + }, + { + "epoch": 0.18, + "learning_rate": 1.9547912780399104e-05, + "loss": 0.5022, + "step": 3890 + }, + { + "epoch": 0.18, + "learning_rate": 1.9547128995344318e-05, + "loss": 0.5154, + "step": 3895 + }, + { + "epoch": 0.18, + "learning_rate": 1.9546345210289532e-05, + "loss": 0.8972, + "step": 3900 + }, + { + "epoch": 0.18, + "learning_rate": 1.9545561425234746e-05, + "loss": 0.3731, + "step": 3905 + }, + { + "epoch": 0.18, + "learning_rate": 1.954477764017996e-05, + "loss": 0.1438, + "step": 3910 + }, + { + "epoch": 0.18, + "learning_rate": 1.954399385512517e-05, + "loss": 0.3258, + "step": 3915 + }, + { + "epoch": 0.18, + "learning_rate": 1.9543210070070387e-05, + "loss": 0.2788, + "step": 3920 + }, + { + "epoch": 0.18, + "learning_rate": 1.9542426285015598e-05, + "loss": 0.269, + "step": 3925 + }, + { + "epoch": 0.18, + "learning_rate": 1.9541642499960812e-05, + "loss": 0.3346, + "step": 3930 + }, + { + "epoch": 0.18, + "learning_rate": 1.9540858714906026e-05, + "loss": 0.401, + "step": 3935 + }, + { + "epoch": 0.18, + "learning_rate": 1.954007492985124e-05, + "loss": 0.3484, + "step": 3940 + }, + { + "epoch": 0.18, + "learning_rate": 1.9539291144796454e-05, + "loss": 0.5295, + "step": 3945 + }, + { + "epoch": 0.18, + "learning_rate": 1.9538507359741664e-05, + "loss": 0.7834, + "step": 3950 + }, + { + "epoch": 0.18, + "learning_rate": 1.953772357468688e-05, + "loss": 0.3469, + "step": 3955 + }, + { + "epoch": 0.18, + "learning_rate": 1.9536939789632092e-05, + "loss": 0.3008, + "step": 3960 + }, + { + "epoch": 0.19, + "learning_rate": 1.9536156004577306e-05, + "loss": 0.2192, + "step": 3965 + }, + { + "epoch": 0.19, + "learning_rate": 1.953537221952252e-05, + "loss": 0.2714, + "step": 3970 + }, + { + "epoch": 0.19, + "learning_rate": 1.9534588434467734e-05, + "loss": 0.236, + "step": 3975 + }, + { + "epoch": 0.19, + "learning_rate": 1.9533804649412948e-05, + "loss": 0.3313, + "step": 3980 + }, + { + "epoch": 0.19, + "learning_rate": 1.953302086435816e-05, + "loss": 0.4521, + "step": 3985 + }, + { + "epoch": 0.19, + "learning_rate": 1.9532237079303372e-05, + "loss": 0.6107, + "step": 3990 + }, + { + "epoch": 0.19, + "learning_rate": 1.953145329424859e-05, + "loss": 0.6532, + "step": 3995 + }, + { + "epoch": 0.19, + "learning_rate": 1.95306695091938e-05, + "loss": 0.7622, + "step": 4000 + }, + { + "epoch": 0.19, + "learning_rate": 1.9529885724139014e-05, + "loss": 0.3901, + "step": 4005 + }, + { + "epoch": 0.19, + "learning_rate": 1.9529101939084228e-05, + "loss": 0.2575, + "step": 4010 + }, + { + "epoch": 0.19, + "learning_rate": 1.9528318154029438e-05, + "loss": 0.1799, + "step": 4015 + }, + { + "epoch": 0.19, + "learning_rate": 1.9527534368974655e-05, + "loss": 0.2706, + "step": 4020 + }, + { + "epoch": 0.19, + "learning_rate": 1.9526750583919866e-05, + "loss": 0.333, + "step": 4025 + }, + { + "epoch": 0.19, + "learning_rate": 1.952596679886508e-05, + "loss": 0.341, + "step": 4030 + }, + { + "epoch": 0.19, + "learning_rate": 1.9525183013810294e-05, + "loss": 0.3399, + "step": 4035 + }, + { + "epoch": 0.19, + "learning_rate": 1.9524399228755508e-05, + "loss": 0.4147, + "step": 4040 + }, + { + "epoch": 0.19, + "learning_rate": 1.952361544370072e-05, + "loss": 0.4624, + "step": 4045 + }, + { + "epoch": 0.19, + "learning_rate": 1.9522831658645935e-05, + "loss": 0.7946, + "step": 4050 + }, + { + "epoch": 0.19, + "learning_rate": 1.952204787359115e-05, + "loss": 0.3148, + "step": 4055 + }, + { + "epoch": 0.19, + "learning_rate": 1.9521264088536363e-05, + "loss": 0.2504, + "step": 4060 + }, + { + "epoch": 0.19, + "learning_rate": 1.9520480303481574e-05, + "loss": 0.1754, + "step": 4065 + }, + { + "epoch": 0.19, + "learning_rate": 1.9519696518426788e-05, + "loss": 0.2607, + "step": 4070 + }, + { + "epoch": 0.19, + "learning_rate": 1.9518912733372e-05, + "loss": 0.3272, + "step": 4075 + }, + { + "epoch": 0.19, + "learning_rate": 1.9518128948317215e-05, + "loss": 0.426, + "step": 4080 + }, + { + "epoch": 0.19, + "learning_rate": 1.951734516326243e-05, + "loss": 0.3973, + "step": 4085 + }, + { + "epoch": 0.19, + "learning_rate": 1.951656137820764e-05, + "loss": 0.418, + "step": 4090 + }, + { + "epoch": 0.19, + "learning_rate": 1.9515777593152857e-05, + "loss": 0.5203, + "step": 4095 + }, + { + "epoch": 0.19, + "learning_rate": 1.9514993808098068e-05, + "loss": 0.9427, + "step": 4100 + }, + { + "epoch": 0.19, + "learning_rate": 1.951421002304328e-05, + "loss": 0.3731, + "step": 4105 + }, + { + "epoch": 0.19, + "learning_rate": 1.9513426237988496e-05, + "loss": 0.1844, + "step": 4110 + }, + { + "epoch": 0.19, + "learning_rate": 1.951264245293371e-05, + "loss": 0.2464, + "step": 4115 + }, + { + "epoch": 0.19, + "learning_rate": 1.9511858667878923e-05, + "loss": 0.1807, + "step": 4120 + }, + { + "epoch": 0.19, + "learning_rate": 1.9511074882824137e-05, + "loss": 0.3148, + "step": 4125 + }, + { + "epoch": 0.19, + "learning_rate": 1.9510291097769348e-05, + "loss": 0.2822, + "step": 4130 + }, + { + "epoch": 0.19, + "learning_rate": 1.950950731271456e-05, + "loss": 0.2875, + "step": 4135 + }, + { + "epoch": 0.19, + "learning_rate": 1.9508723527659776e-05, + "loss": 0.432, + "step": 4140 + }, + { + "epoch": 0.19, + "learning_rate": 1.950793974260499e-05, + "loss": 0.6259, + "step": 4145 + }, + { + "epoch": 0.19, + "learning_rate": 1.9507155957550203e-05, + "loss": 0.7135, + "step": 4150 + }, + { + "epoch": 0.19, + "learning_rate": 1.9506372172495417e-05, + "loss": 0.3204, + "step": 4155 + }, + { + "epoch": 0.19, + "learning_rate": 1.950558838744063e-05, + "loss": 0.1675, + "step": 4160 + }, + { + "epoch": 0.19, + "learning_rate": 1.950480460238584e-05, + "loss": 0.1806, + "step": 4165 + }, + { + "epoch": 0.19, + "learning_rate": 1.950402081733106e-05, + "loss": 0.299, + "step": 4170 + }, + { + "epoch": 0.19, + "learning_rate": 1.950323703227627e-05, + "loss": 0.2398, + "step": 4175 + }, + { + "epoch": 0.2, + "learning_rate": 1.9502453247221483e-05, + "loss": 0.3808, + "step": 4180 + }, + { + "epoch": 0.2, + "learning_rate": 1.9501669462166697e-05, + "loss": 0.4253, + "step": 4185 + }, + { + "epoch": 0.2, + "learning_rate": 1.950088567711191e-05, + "loss": 0.4762, + "step": 4190 + }, + { + "epoch": 0.2, + "learning_rate": 1.9500101892057125e-05, + "loss": 0.5255, + "step": 4195 + }, + { + "epoch": 0.2, + "learning_rate": 1.9499318107002336e-05, + "loss": 0.8221, + "step": 4200 + }, + { + "epoch": 0.2, + "learning_rate": 1.949853432194755e-05, + "loss": 0.416, + "step": 4205 + }, + { + "epoch": 0.2, + "learning_rate": 1.9497750536892763e-05, + "loss": 0.6598, + "step": 4210 + }, + { + "epoch": 0.2, + "learning_rate": 1.9496966751837977e-05, + "loss": 0.1326, + "step": 4215 + }, + { + "epoch": 0.2, + "learning_rate": 1.949618296678319e-05, + "loss": 0.215, + "step": 4220 + }, + { + "epoch": 0.2, + "learning_rate": 1.9495399181728405e-05, + "loss": 0.3046, + "step": 4225 + }, + { + "epoch": 0.2, + "learning_rate": 1.9494615396673616e-05, + "loss": 0.3458, + "step": 4230 + }, + { + "epoch": 0.2, + "learning_rate": 1.9493831611618833e-05, + "loss": 0.3501, + "step": 4235 + }, + { + "epoch": 0.2, + "learning_rate": 1.9493047826564044e-05, + "loss": 0.5032, + "step": 4240 + }, + { + "epoch": 0.2, + "learning_rate": 1.9492264041509257e-05, + "loss": 0.6051, + "step": 4245 + }, + { + "epoch": 0.2, + "learning_rate": 1.949148025645447e-05, + "loss": 0.8645, + "step": 4250 + }, + { + "epoch": 0.2, + "learning_rate": 1.9490696471399685e-05, + "loss": 0.3597, + "step": 4255 + }, + { + "epoch": 0.2, + "learning_rate": 1.94899126863449e-05, + "loss": 0.2018, + "step": 4260 + }, + { + "epoch": 0.2, + "learning_rate": 1.948912890129011e-05, + "loss": 0.2225, + "step": 4265 + }, + { + "epoch": 0.2, + "learning_rate": 1.9488345116235327e-05, + "loss": 0.3058, + "step": 4270 + }, + { + "epoch": 0.2, + "learning_rate": 1.9487561331180537e-05, + "loss": 0.367, + "step": 4275 + }, + { + "epoch": 0.2, + "learning_rate": 1.948677754612575e-05, + "loss": 0.3147, + "step": 4280 + }, + { + "epoch": 0.2, + "learning_rate": 1.9485993761070965e-05, + "loss": 0.3372, + "step": 4285 + }, + { + "epoch": 0.2, + "learning_rate": 1.948520997601618e-05, + "loss": 0.3279, + "step": 4290 + }, + { + "epoch": 0.2, + "learning_rate": 1.9484426190961393e-05, + "loss": 0.4893, + "step": 4295 + }, + { + "epoch": 0.2, + "learning_rate": 1.9483642405906607e-05, + "loss": 0.8027, + "step": 4300 + }, + { + "epoch": 0.2, + "learning_rate": 1.9482858620851818e-05, + "loss": 0.4597, + "step": 4305 + }, + { + "epoch": 0.2, + "learning_rate": 1.9482074835797035e-05, + "loss": 0.1776, + "step": 4310 + }, + { + "epoch": 0.2, + "learning_rate": 1.9481291050742245e-05, + "loss": 0.2143, + "step": 4315 + }, + { + "epoch": 0.2, + "learning_rate": 1.948050726568746e-05, + "loss": 0.2351, + "step": 4320 + }, + { + "epoch": 0.2, + "learning_rate": 1.9479723480632673e-05, + "loss": 0.3237, + "step": 4325 + }, + { + "epoch": 0.2, + "learning_rate": 1.9478939695577884e-05, + "loss": 0.3484, + "step": 4330 + }, + { + "epoch": 0.2, + "learning_rate": 1.94781559105231e-05, + "loss": 0.3139, + "step": 4335 + }, + { + "epoch": 0.2, + "learning_rate": 1.947737212546831e-05, + "loss": 0.4198, + "step": 4340 + }, + { + "epoch": 0.2, + "learning_rate": 1.9476588340413525e-05, + "loss": 0.48, + "step": 4345 + }, + { + "epoch": 0.2, + "learning_rate": 1.947580455535874e-05, + "loss": 0.7149, + "step": 4350 + }, + { + "epoch": 0.2, + "learning_rate": 1.9475020770303953e-05, + "loss": 0.4256, + "step": 4355 + }, + { + "epoch": 0.2, + "learning_rate": 1.9474236985249167e-05, + "loss": 0.2352, + "step": 4360 + }, + { + "epoch": 0.2, + "learning_rate": 1.947345320019438e-05, + "loss": 0.197, + "step": 4365 + }, + { + "epoch": 0.2, + "learning_rate": 1.9472669415139595e-05, + "loss": 0.313, + "step": 4370 + }, + { + "epoch": 0.2, + "learning_rate": 1.947188563008481e-05, + "loss": 0.2447, + "step": 4375 + }, + { + "epoch": 0.2, + "learning_rate": 1.947110184503002e-05, + "loss": 0.2627, + "step": 4380 + }, + { + "epoch": 0.2, + "learning_rate": 1.9470318059975237e-05, + "loss": 0.2974, + "step": 4385 + }, + { + "epoch": 0.2, + "learning_rate": 1.9469534274920447e-05, + "loss": 0.4315, + "step": 4390 + }, + { + "epoch": 0.21, + "learning_rate": 1.946875048986566e-05, + "loss": 0.6057, + "step": 4395 + }, + { + "epoch": 0.21, + "learning_rate": 1.9467966704810875e-05, + "loss": 0.7785, + "step": 4400 + }, + { + "epoch": 0.21, + "learning_rate": 1.9467182919756085e-05, + "loss": 0.426, + "step": 4405 + }, + { + "epoch": 0.21, + "learning_rate": 1.9466399134701303e-05, + "loss": 0.3034, + "step": 4410 + }, + { + "epoch": 0.21, + "learning_rate": 1.9465615349646513e-05, + "loss": 0.1557, + "step": 4415 + }, + { + "epoch": 0.21, + "learning_rate": 1.9464831564591727e-05, + "loss": 0.2953, + "step": 4420 + }, + { + "epoch": 0.21, + "learning_rate": 1.946404777953694e-05, + "loss": 0.4292, + "step": 4425 + }, + { + "epoch": 0.21, + "learning_rate": 1.9463263994482155e-05, + "loss": 0.468, + "step": 4430 + }, + { + "epoch": 0.21, + "learning_rate": 1.946248020942737e-05, + "loss": 0.3723, + "step": 4435 + }, + { + "epoch": 0.21, + "learning_rate": 1.9461696424372583e-05, + "loss": 0.3748, + "step": 4440 + }, + { + "epoch": 0.21, + "learning_rate": 1.9460912639317793e-05, + "loss": 0.3476, + "step": 4445 + }, + { + "epoch": 0.21, + "learning_rate": 1.946012885426301e-05, + "loss": 0.7008, + "step": 4450 + }, + { + "epoch": 0.21, + "learning_rate": 1.945934506920822e-05, + "loss": 0.3942, + "step": 4455 + }, + { + "epoch": 0.21, + "learning_rate": 1.9458561284153435e-05, + "loss": 0.1434, + "step": 4460 + }, + { + "epoch": 0.21, + "learning_rate": 1.945777749909865e-05, + "loss": 0.1975, + "step": 4465 + }, + { + "epoch": 0.21, + "learning_rate": 1.9456993714043863e-05, + "loss": 0.3374, + "step": 4470 + }, + { + "epoch": 0.21, + "learning_rate": 1.9456209928989077e-05, + "loss": 0.2609, + "step": 4475 + }, + { + "epoch": 0.21, + "learning_rate": 1.9455426143934287e-05, + "loss": 0.2869, + "step": 4480 + }, + { + "epoch": 0.21, + "learning_rate": 1.9454642358879505e-05, + "loss": 0.4011, + "step": 4485 + }, + { + "epoch": 0.21, + "learning_rate": 1.9453858573824715e-05, + "loss": 0.4172, + "step": 4490 + }, + { + "epoch": 0.21, + "learning_rate": 1.945307478876993e-05, + "loss": 0.5234, + "step": 4495 + }, + { + "epoch": 0.21, + "learning_rate": 1.9452291003715143e-05, + "loss": 0.7829, + "step": 4500 + }, + { + "epoch": 0.21, + "learning_rate": 1.9451507218660357e-05, + "loss": 0.3145, + "step": 4505 + }, + { + "epoch": 0.21, + "learning_rate": 1.945072343360557e-05, + "loss": 0.1722, + "step": 4510 + }, + { + "epoch": 0.21, + "learning_rate": 1.9449939648550785e-05, + "loss": 0.2948, + "step": 4515 + }, + { + "epoch": 0.21, + "learning_rate": 1.9449155863495995e-05, + "loss": 0.2227, + "step": 4520 + }, + { + "epoch": 0.21, + "learning_rate": 1.944837207844121e-05, + "loss": 0.2552, + "step": 4525 + }, + { + "epoch": 0.21, + "learning_rate": 1.9447588293386423e-05, + "loss": 0.3306, + "step": 4530 + }, + { + "epoch": 0.21, + "learning_rate": 1.9446804508331637e-05, + "loss": 0.2957, + "step": 4535 + }, + { + "epoch": 0.21, + "learning_rate": 1.944602072327685e-05, + "loss": 0.5235, + "step": 4540 + }, + { + "epoch": 0.21, + "learning_rate": 1.944523693822206e-05, + "loss": 0.5366, + "step": 4545 + }, + { + "epoch": 0.21, + "learning_rate": 1.944445315316728e-05, + "loss": 0.753, + "step": 4550 + }, + { + "epoch": 0.21, + "learning_rate": 1.944366936811249e-05, + "loss": 0.2989, + "step": 4555 + }, + { + "epoch": 0.21, + "learning_rate": 1.9442885583057703e-05, + "loss": 0.1639, + "step": 4560 + }, + { + "epoch": 0.21, + "learning_rate": 1.9442101798002917e-05, + "loss": 0.1844, + "step": 4565 + }, + { + "epoch": 0.21, + "learning_rate": 1.944131801294813e-05, + "loss": 0.2972, + "step": 4570 + }, + { + "epoch": 0.21, + "learning_rate": 1.9440534227893345e-05, + "loss": 0.3515, + "step": 4575 + }, + { + "epoch": 0.21, + "learning_rate": 1.943975044283856e-05, + "loss": 0.2764, + "step": 4580 + }, + { + "epoch": 0.21, + "learning_rate": 1.9438966657783773e-05, + "loss": 0.4514, + "step": 4585 + }, + { + "epoch": 0.21, + "learning_rate": 1.9438182872728983e-05, + "loss": 0.3424, + "step": 4590 + }, + { + "epoch": 0.21, + "learning_rate": 1.9437399087674197e-05, + "loss": 0.539, + "step": 4595 + }, + { + "epoch": 0.21, + "learning_rate": 1.943661530261941e-05, + "loss": 0.8198, + "step": 4600 + }, + { + "epoch": 0.21, + "learning_rate": 1.9435831517564625e-05, + "loss": 0.3306, + "step": 4605 + }, + { + "epoch": 0.22, + "learning_rate": 1.943504773250984e-05, + "loss": 0.1788, + "step": 4610 + }, + { + "epoch": 0.22, + "learning_rate": 1.9434263947455053e-05, + "loss": 0.2419, + "step": 4615 + }, + { + "epoch": 0.22, + "learning_rate": 1.9433480162400263e-05, + "loss": 0.3073, + "step": 4620 + }, + { + "epoch": 0.22, + "learning_rate": 1.943269637734548e-05, + "loss": 0.1982, + "step": 4625 + }, + { + "epoch": 0.22, + "learning_rate": 1.943191259229069e-05, + "loss": 0.3853, + "step": 4630 + }, + { + "epoch": 0.22, + "learning_rate": 1.9431128807235905e-05, + "loss": 0.3433, + "step": 4635 + }, + { + "epoch": 0.22, + "learning_rate": 1.943034502218112e-05, + "loss": 0.373, + "step": 4640 + }, + { + "epoch": 0.22, + "learning_rate": 1.9429561237126333e-05, + "loss": 0.4099, + "step": 4645 + }, + { + "epoch": 0.22, + "learning_rate": 1.9428777452071547e-05, + "loss": 0.7581, + "step": 4650 + }, + { + "epoch": 0.22, + "learning_rate": 1.9427993667016757e-05, + "loss": 0.3001, + "step": 4655 + }, + { + "epoch": 0.22, + "learning_rate": 1.942720988196197e-05, + "loss": 0.1235, + "step": 4660 + }, + { + "epoch": 0.22, + "learning_rate": 1.9426426096907185e-05, + "loss": 0.1975, + "step": 4665 + }, + { + "epoch": 0.22, + "learning_rate": 1.94256423118524e-05, + "loss": 0.2757, + "step": 4670 + }, + { + "epoch": 0.22, + "learning_rate": 1.9424858526797613e-05, + "loss": 0.2863, + "step": 4675 + }, + { + "epoch": 0.22, + "learning_rate": 1.9424074741742827e-05, + "loss": 0.3228, + "step": 4680 + }, + { + "epoch": 0.22, + "learning_rate": 1.942329095668804e-05, + "loss": 0.3734, + "step": 4685 + }, + { + "epoch": 0.22, + "learning_rate": 1.9422507171633254e-05, + "loss": 0.4365, + "step": 4690 + }, + { + "epoch": 0.22, + "learning_rate": 1.9421723386578465e-05, + "loss": 0.4877, + "step": 4695 + }, + { + "epoch": 0.22, + "learning_rate": 1.9420939601523682e-05, + "loss": 0.7756, + "step": 4700 + }, + { + "epoch": 0.22, + "learning_rate": 1.9420155816468893e-05, + "loss": 0.338, + "step": 4705 + }, + { + "epoch": 0.22, + "learning_rate": 1.9419372031414107e-05, + "loss": 0.1238, + "step": 4710 + }, + { + "epoch": 0.22, + "learning_rate": 1.941858824635932e-05, + "loss": 0.2242, + "step": 4715 + }, + { + "epoch": 0.22, + "learning_rate": 1.941780446130453e-05, + "loss": 0.2178, + "step": 4720 + }, + { + "epoch": 0.22, + "learning_rate": 1.941702067624975e-05, + "loss": 0.27, + "step": 4725 + }, + { + "epoch": 0.22, + "learning_rate": 1.941623689119496e-05, + "loss": 0.3249, + "step": 4730 + }, + { + "epoch": 0.22, + "learning_rate": 1.9415453106140173e-05, + "loss": 0.3082, + "step": 4735 + }, + { + "epoch": 0.22, + "learning_rate": 1.9414669321085387e-05, + "loss": 0.3891, + "step": 4740 + }, + { + "epoch": 0.22, + "learning_rate": 1.94138855360306e-05, + "loss": 0.525, + "step": 4745 + }, + { + "epoch": 0.22, + "learning_rate": 1.9413101750975814e-05, + "loss": 0.6924, + "step": 4750 + }, + { + "epoch": 0.22, + "learning_rate": 1.941231796592103e-05, + "loss": 0.4089, + "step": 4755 + }, + { + "epoch": 0.22, + "learning_rate": 1.941153418086624e-05, + "loss": 0.1315, + "step": 4760 + }, + { + "epoch": 0.22, + "learning_rate": 1.9410750395811456e-05, + "loss": 0.1435, + "step": 4765 + }, + { + "epoch": 0.22, + "learning_rate": 1.9409966610756667e-05, + "loss": 0.2198, + "step": 4770 + }, + { + "epoch": 0.22, + "learning_rate": 1.940918282570188e-05, + "loss": 0.2413, + "step": 4775 + }, + { + "epoch": 0.22, + "learning_rate": 1.9408399040647095e-05, + "loss": 0.3171, + "step": 4780 + }, + { + "epoch": 0.22, + "learning_rate": 1.940761525559231e-05, + "loss": 0.2903, + "step": 4785 + }, + { + "epoch": 0.22, + "learning_rate": 1.9406831470537522e-05, + "loss": 0.3473, + "step": 4790 + }, + { + "epoch": 0.22, + "learning_rate": 1.9406047685482733e-05, + "loss": 0.5104, + "step": 4795 + }, + { + "epoch": 0.22, + "learning_rate": 1.940526390042795e-05, + "loss": 0.7117, + "step": 4800 + }, + { + "epoch": 0.22, + "learning_rate": 1.940448011537316e-05, + "loss": 0.3581, + "step": 4805 + }, + { + "epoch": 0.22, + "learning_rate": 1.9403696330318375e-05, + "loss": 0.2153, + "step": 4810 + }, + { + "epoch": 0.22, + "learning_rate": 1.940291254526359e-05, + "loss": 0.2117, + "step": 4815 + }, + { + "epoch": 0.22, + "learning_rate": 1.9402128760208802e-05, + "loss": 0.2479, + "step": 4820 + }, + { + "epoch": 0.23, + "learning_rate": 1.9401344975154016e-05, + "loss": 0.3125, + "step": 4825 + }, + { + "epoch": 0.23, + "learning_rate": 1.940056119009923e-05, + "loss": 0.3053, + "step": 4830 + }, + { + "epoch": 0.23, + "learning_rate": 1.939977740504444e-05, + "loss": 0.3783, + "step": 4835 + }, + { + "epoch": 0.23, + "learning_rate": 1.9398993619989658e-05, + "loss": 0.4291, + "step": 4840 + }, + { + "epoch": 0.23, + "learning_rate": 1.939820983493487e-05, + "loss": 0.3458, + "step": 4845 + }, + { + "epoch": 0.23, + "learning_rate": 1.9397426049880082e-05, + "loss": 0.7172, + "step": 4850 + }, + { + "epoch": 0.23, + "learning_rate": 1.9396642264825296e-05, + "loss": 0.4026, + "step": 4855 + }, + { + "epoch": 0.23, + "learning_rate": 1.9395858479770507e-05, + "loss": 0.1863, + "step": 4860 + }, + { + "epoch": 0.23, + "learning_rate": 1.9395074694715724e-05, + "loss": 0.208, + "step": 4865 + }, + { + "epoch": 0.23, + "learning_rate": 1.9394290909660935e-05, + "loss": 0.2116, + "step": 4870 + }, + { + "epoch": 0.23, + "learning_rate": 1.939350712460615e-05, + "loss": 0.2817, + "step": 4875 + }, + { + "epoch": 0.23, + "learning_rate": 1.9392723339551362e-05, + "loss": 0.3429, + "step": 4880 + }, + { + "epoch": 0.23, + "learning_rate": 1.9391939554496576e-05, + "loss": 0.4071, + "step": 4885 + }, + { + "epoch": 0.23, + "learning_rate": 1.939115576944179e-05, + "loss": 0.4191, + "step": 4890 + }, + { + "epoch": 0.23, + "learning_rate": 1.9390371984387004e-05, + "loss": 0.5109, + "step": 4895 + }, + { + "epoch": 0.23, + "learning_rate": 1.9389588199332218e-05, + "loss": 0.7804, + "step": 4900 + }, + { + "epoch": 0.23, + "learning_rate": 1.9388804414277432e-05, + "loss": 0.3884, + "step": 4905 + }, + { + "epoch": 0.23, + "learning_rate": 1.9388020629222642e-05, + "loss": 0.1351, + "step": 4910 + }, + { + "epoch": 0.23, + "learning_rate": 1.9387236844167856e-05, + "loss": 0.2175, + "step": 4915 + }, + { + "epoch": 0.23, + "learning_rate": 1.938645305911307e-05, + "loss": 0.237, + "step": 4920 + }, + { + "epoch": 0.23, + "learning_rate": 1.9385669274058284e-05, + "loss": 0.3365, + "step": 4925 + }, + { + "epoch": 0.23, + "learning_rate": 1.9384885489003498e-05, + "loss": 0.3038, + "step": 4930 + }, + { + "epoch": 0.23, + "learning_rate": 1.938410170394871e-05, + "loss": 0.2486, + "step": 4935 + }, + { + "epoch": 0.23, + "learning_rate": 1.9383317918893926e-05, + "loss": 0.3903, + "step": 4940 + }, + { + "epoch": 0.23, + "learning_rate": 1.9382534133839136e-05, + "loss": 0.5263, + "step": 4945 + }, + { + "epoch": 0.23, + "learning_rate": 1.938175034878435e-05, + "loss": 0.6892, + "step": 4950 + }, + { + "epoch": 0.23, + "learning_rate": 1.9380966563729564e-05, + "loss": 0.439, + "step": 4955 + }, + { + "epoch": 0.23, + "learning_rate": 1.9380182778674778e-05, + "loss": 0.1741, + "step": 4960 + }, + { + "epoch": 0.23, + "learning_rate": 1.9379398993619992e-05, + "loss": 0.199, + "step": 4965 + }, + { + "epoch": 0.23, + "learning_rate": 1.9378615208565206e-05, + "loss": 0.246, + "step": 4970 + }, + { + "epoch": 0.23, + "learning_rate": 1.9377831423510416e-05, + "loss": 0.2149, + "step": 4975 + }, + { + "epoch": 0.23, + "learning_rate": 1.937704763845563e-05, + "loss": 0.3709, + "step": 4980 + }, + { + "epoch": 0.23, + "learning_rate": 1.9376263853400844e-05, + "loss": 0.3476, + "step": 4985 + }, + { + "epoch": 0.23, + "learning_rate": 1.9375480068346058e-05, + "loss": 0.41, + "step": 4990 + }, + { + "epoch": 0.23, + "learning_rate": 1.9374696283291272e-05, + "loss": 0.4483, + "step": 4995 + }, + { + "epoch": 0.23, + "learning_rate": 1.9373912498236486e-05, + "loss": 0.6013, + "step": 5000 + }, + { + "epoch": 0.23, + "learning_rate": 1.93731287131817e-05, + "loss": 0.3716, + "step": 5005 + }, + { + "epoch": 0.23, + "learning_rate": 1.937234492812691e-05, + "loss": 0.1351, + "step": 5010 + }, + { + "epoch": 0.23, + "learning_rate": 1.9371561143072128e-05, + "loss": 0.1643, + "step": 5015 + }, + { + "epoch": 0.23, + "learning_rate": 1.9370777358017338e-05, + "loss": 0.2067, + "step": 5020 + }, + { + "epoch": 0.23, + "learning_rate": 1.9369993572962552e-05, + "loss": 0.2506, + "step": 5025 + }, + { + "epoch": 0.23, + "learning_rate": 1.9369209787907766e-05, + "loss": 0.2104, + "step": 5030 + }, + { + "epoch": 0.23, + "learning_rate": 1.936842600285298e-05, + "loss": 0.2822, + "step": 5035 + }, + { + "epoch": 0.24, + "learning_rate": 1.9367642217798194e-05, + "loss": 0.3395, + "step": 5040 + }, + { + "epoch": 0.24, + "learning_rate": 1.9366858432743404e-05, + "loss": 0.5625, + "step": 5045 + }, + { + "epoch": 0.24, + "learning_rate": 1.936607464768862e-05, + "loss": 0.8041, + "step": 5050 + }, + { + "epoch": 0.24, + "learning_rate": 1.9365290862633832e-05, + "loss": 0.2843, + "step": 5055 + }, + { + "epoch": 0.24, + "learning_rate": 1.9364507077579046e-05, + "loss": 0.2228, + "step": 5060 + }, + { + "epoch": 0.24, + "learning_rate": 1.936372329252426e-05, + "loss": 0.2473, + "step": 5065 + }, + { + "epoch": 0.24, + "learning_rate": 1.9362939507469474e-05, + "loss": 0.2517, + "step": 5070 + }, + { + "epoch": 0.24, + "learning_rate": 1.9362155722414684e-05, + "loss": 0.6024, + "step": 5075 + }, + { + "epoch": 0.24, + "learning_rate": 1.9361371937359902e-05, + "loss": 0.4021, + "step": 5080 + }, + { + "epoch": 0.24, + "learning_rate": 1.9360588152305112e-05, + "loss": 0.4412, + "step": 5085 + }, + { + "epoch": 0.24, + "learning_rate": 1.9359804367250326e-05, + "loss": 0.4314, + "step": 5090 + }, + { + "epoch": 0.24, + "learning_rate": 1.935902058219554e-05, + "loss": 0.4803, + "step": 5095 + }, + { + "epoch": 0.24, + "learning_rate": 1.9358236797140754e-05, + "loss": 0.4986, + "step": 5100 + }, + { + "epoch": 0.24, + "learning_rate": 1.9357453012085968e-05, + "loss": 0.3644, + "step": 5105 + }, + { + "epoch": 0.24, + "learning_rate": 1.935666922703118e-05, + "loss": 0.2482, + "step": 5110 + }, + { + "epoch": 0.24, + "learning_rate": 1.9355885441976396e-05, + "loss": 0.1968, + "step": 5115 + }, + { + "epoch": 0.24, + "learning_rate": 1.9355101656921606e-05, + "loss": 0.2267, + "step": 5120 + }, + { + "epoch": 0.24, + "learning_rate": 1.935431787186682e-05, + "loss": 0.2209, + "step": 5125 + }, + { + "epoch": 0.24, + "learning_rate": 1.9353534086812034e-05, + "loss": 0.3899, + "step": 5130 + }, + { + "epoch": 0.24, + "learning_rate": 1.9352750301757248e-05, + "loss": 0.2757, + "step": 5135 + }, + { + "epoch": 0.24, + "learning_rate": 1.9351966516702462e-05, + "loss": 0.3713, + "step": 5140 + }, + { + "epoch": 0.24, + "learning_rate": 1.9351182731647676e-05, + "loss": 0.4712, + "step": 5145 + }, + { + "epoch": 0.24, + "learning_rate": 1.9350398946592886e-05, + "loss": 0.5116, + "step": 5150 + }, + { + "epoch": 0.24, + "learning_rate": 1.9349615161538104e-05, + "loss": 0.3821, + "step": 5155 + }, + { + "epoch": 0.24, + "learning_rate": 1.9348831376483314e-05, + "loss": 0.1595, + "step": 5160 + }, + { + "epoch": 0.24, + "learning_rate": 1.9348047591428528e-05, + "loss": 0.1296, + "step": 5165 + }, + { + "epoch": 0.24, + "learning_rate": 1.9347263806373742e-05, + "loss": 0.2284, + "step": 5170 + }, + { + "epoch": 0.24, + "learning_rate": 1.9346480021318952e-05, + "loss": 0.281, + "step": 5175 + }, + { + "epoch": 0.24, + "learning_rate": 1.934569623626417e-05, + "loss": 0.17, + "step": 5180 + }, + { + "epoch": 0.24, + "learning_rate": 1.934491245120938e-05, + "loss": 0.3331, + "step": 5185 + }, + { + "epoch": 0.24, + "learning_rate": 1.9344128666154594e-05, + "loss": 0.4022, + "step": 5190 + }, + { + "epoch": 0.24, + "learning_rate": 1.9343344881099808e-05, + "loss": 0.495, + "step": 5195 + }, + { + "epoch": 0.24, + "learning_rate": 1.9342561096045022e-05, + "loss": 0.8846, + "step": 5200 + }, + { + "epoch": 0.24, + "learning_rate": 1.9341777310990236e-05, + "loss": 0.3249, + "step": 5205 + }, + { + "epoch": 0.24, + "learning_rate": 1.934099352593545e-05, + "loss": 0.1457, + "step": 5210 + }, + { + "epoch": 0.24, + "learning_rate": 1.9340209740880664e-05, + "loss": 0.205, + "step": 5215 + }, + { + "epoch": 0.24, + "learning_rate": 1.9339425955825878e-05, + "loss": 0.2736, + "step": 5220 + }, + { + "epoch": 0.24, + "learning_rate": 1.9338642170771088e-05, + "loss": 0.2574, + "step": 5225 + }, + { + "epoch": 0.24, + "learning_rate": 1.9337858385716305e-05, + "loss": 0.2396, + "step": 5230 + }, + { + "epoch": 0.24, + "learning_rate": 1.9337074600661516e-05, + "loss": 0.3956, + "step": 5235 + }, + { + "epoch": 0.24, + "learning_rate": 1.933629081560673e-05, + "loss": 0.3899, + "step": 5240 + }, + { + "epoch": 0.24, + "learning_rate": 1.9335507030551944e-05, + "loss": 0.4603, + "step": 5245 + }, + { + "epoch": 0.24, + "learning_rate": 1.9334723245497154e-05, + "loss": 0.8332, + "step": 5250 + }, + { + "epoch": 0.25, + "learning_rate": 1.933393946044237e-05, + "loss": 0.3782, + "step": 5255 + }, + { + "epoch": 0.25, + "learning_rate": 1.9333155675387582e-05, + "loss": 0.1268, + "step": 5260 + }, + { + "epoch": 0.25, + "learning_rate": 1.9332371890332796e-05, + "loss": 0.2608, + "step": 5265 + }, + { + "epoch": 0.25, + "learning_rate": 1.933158810527801e-05, + "loss": 0.1392, + "step": 5270 + }, + { + "epoch": 0.25, + "learning_rate": 1.9330804320223224e-05, + "loss": 0.2242, + "step": 5275 + }, + { + "epoch": 0.25, + "learning_rate": 1.9330020535168438e-05, + "loss": 0.1908, + "step": 5280 + }, + { + "epoch": 0.25, + "learning_rate": 1.932923675011365e-05, + "loss": 0.2051, + "step": 5285 + }, + { + "epoch": 0.25, + "learning_rate": 1.9328452965058862e-05, + "loss": 0.5424, + "step": 5290 + }, + { + "epoch": 0.25, + "learning_rate": 1.932766918000408e-05, + "loss": 0.5415, + "step": 5295 + }, + { + "epoch": 0.25, + "learning_rate": 1.932688539494929e-05, + "loss": 0.6368, + "step": 5300 + }, + { + "epoch": 0.25, + "learning_rate": 1.9326101609894504e-05, + "loss": 0.4718, + "step": 5305 + }, + { + "epoch": 0.25, + "learning_rate": 1.9325317824839718e-05, + "loss": 0.1058, + "step": 5310 + }, + { + "epoch": 0.25, + "learning_rate": 1.932453403978493e-05, + "loss": 0.1814, + "step": 5315 + }, + { + "epoch": 0.25, + "learning_rate": 1.9323750254730146e-05, + "loss": 0.219, + "step": 5320 + }, + { + "epoch": 0.25, + "learning_rate": 1.9322966469675356e-05, + "loss": 0.2229, + "step": 5325 + }, + { + "epoch": 0.25, + "learning_rate": 1.9322182684620573e-05, + "loss": 0.3236, + "step": 5330 + }, + { + "epoch": 0.25, + "learning_rate": 1.9321398899565784e-05, + "loss": 0.2804, + "step": 5335 + }, + { + "epoch": 0.25, + "learning_rate": 1.9320615114510998e-05, + "loss": 0.3324, + "step": 5340 + }, + { + "epoch": 0.25, + "learning_rate": 1.931983132945621e-05, + "loss": 0.5195, + "step": 5345 + }, + { + "epoch": 0.25, + "learning_rate": 1.9319047544401426e-05, + "loss": 0.8297, + "step": 5350 + }, + { + "epoch": 0.25, + "learning_rate": 1.931826375934664e-05, + "loss": 0.3, + "step": 5355 + }, + { + "epoch": 0.25, + "learning_rate": 1.9317479974291853e-05, + "loss": 0.2239, + "step": 5360 + }, + { + "epoch": 0.25, + "learning_rate": 1.9316696189237064e-05, + "loss": 0.2179, + "step": 5365 + }, + { + "epoch": 0.25, + "learning_rate": 1.9315912404182278e-05, + "loss": 0.2458, + "step": 5370 + }, + { + "epoch": 0.25, + "learning_rate": 1.931512861912749e-05, + "loss": 0.2145, + "step": 5375 + }, + { + "epoch": 0.25, + "learning_rate": 1.9314344834072706e-05, + "loss": 0.1894, + "step": 5380 + }, + { + "epoch": 0.25, + "learning_rate": 1.931356104901792e-05, + "loss": 0.3247, + "step": 5385 + }, + { + "epoch": 0.25, + "learning_rate": 1.931277726396313e-05, + "loss": 0.4402, + "step": 5390 + }, + { + "epoch": 0.25, + "learning_rate": 1.9311993478908347e-05, + "loss": 0.5536, + "step": 5395 + }, + { + "epoch": 0.25, + "learning_rate": 1.9311209693853558e-05, + "loss": 0.7553, + "step": 5400 + }, + { + "epoch": 0.25, + "learning_rate": 1.9310425908798772e-05, + "loss": 0.345, + "step": 5405 + }, + { + "epoch": 0.25, + "learning_rate": 1.9309642123743986e-05, + "loss": 0.1617, + "step": 5410 + }, + { + "epoch": 0.25, + "learning_rate": 1.93088583386892e-05, + "loss": 0.2743, + "step": 5415 + }, + { + "epoch": 0.25, + "learning_rate": 1.9308074553634413e-05, + "loss": 0.2523, + "step": 5420 + }, + { + "epoch": 0.25, + "learning_rate": 1.9307290768579627e-05, + "loss": 0.2766, + "step": 5425 + }, + { + "epoch": 0.25, + "learning_rate": 1.930650698352484e-05, + "loss": 0.2555, + "step": 5430 + }, + { + "epoch": 0.25, + "learning_rate": 1.9305723198470052e-05, + "loss": 0.3894, + "step": 5435 + }, + { + "epoch": 0.25, + "learning_rate": 1.9304939413415266e-05, + "loss": 0.3815, + "step": 5440 + }, + { + "epoch": 0.25, + "learning_rate": 1.930415562836048e-05, + "loss": 0.5165, + "step": 5445 + }, + { + "epoch": 0.25, + "learning_rate": 1.9303371843305694e-05, + "loss": 0.8409, + "step": 5450 + }, + { + "epoch": 0.25, + "learning_rate": 1.9302588058250907e-05, + "loss": 0.3595, + "step": 5455 + }, + { + "epoch": 0.25, + "learning_rate": 1.930180427319612e-05, + "loss": 0.1876, + "step": 5460 + }, + { + "epoch": 0.26, + "learning_rate": 1.9301020488141332e-05, + "loss": 0.2553, + "step": 5465 + }, + { + "epoch": 0.26, + "learning_rate": 1.930023670308655e-05, + "loss": 0.3259, + "step": 5470 + }, + { + "epoch": 0.26, + "learning_rate": 1.929945291803176e-05, + "loss": 0.2538, + "step": 5475 + }, + { + "epoch": 0.26, + "learning_rate": 1.9298669132976974e-05, + "loss": 0.3327, + "step": 5480 + }, + { + "epoch": 0.26, + "learning_rate": 1.9297885347922187e-05, + "loss": 0.3046, + "step": 5485 + }, + { + "epoch": 0.26, + "learning_rate": 1.92971015628674e-05, + "loss": 0.4529, + "step": 5490 + }, + { + "epoch": 0.26, + "learning_rate": 1.9296317777812615e-05, + "loss": 0.4448, + "step": 5495 + }, + { + "epoch": 0.26, + "learning_rate": 1.9295533992757826e-05, + "loss": 0.6134, + "step": 5500 + }, + { + "epoch": 0.26, + "learning_rate": 1.929475020770304e-05, + "loss": 0.4037, + "step": 5505 + }, + { + "epoch": 0.26, + "learning_rate": 1.9293966422648254e-05, + "loss": 0.1626, + "step": 5510 + }, + { + "epoch": 0.26, + "learning_rate": 1.9293182637593467e-05, + "loss": 0.1955, + "step": 5515 + }, + { + "epoch": 0.26, + "learning_rate": 1.929239885253868e-05, + "loss": 0.244, + "step": 5520 + }, + { + "epoch": 0.26, + "learning_rate": 1.9291615067483895e-05, + "loss": 0.1823, + "step": 5525 + }, + { + "epoch": 0.26, + "learning_rate": 1.929083128242911e-05, + "loss": 0.3409, + "step": 5530 + }, + { + "epoch": 0.26, + "learning_rate": 1.9290047497374323e-05, + "loss": 0.3905, + "step": 5535 + }, + { + "epoch": 0.26, + "learning_rate": 1.9289263712319534e-05, + "loss": 0.4031, + "step": 5540 + }, + { + "epoch": 0.26, + "learning_rate": 1.928847992726475e-05, + "loss": 0.5521, + "step": 5545 + }, + { + "epoch": 0.26, + "learning_rate": 1.928769614220996e-05, + "loss": 0.6648, + "step": 5550 + }, + { + "epoch": 0.26, + "learning_rate": 1.9286912357155175e-05, + "loss": 0.2675, + "step": 5555 + }, + { + "epoch": 0.26, + "learning_rate": 1.928612857210039e-05, + "loss": 0.1622, + "step": 5560 + }, + { + "epoch": 0.26, + "learning_rate": 1.92853447870456e-05, + "loss": 0.2045, + "step": 5565 + }, + { + "epoch": 0.26, + "learning_rate": 1.9284561001990817e-05, + "loss": 0.2512, + "step": 5570 + }, + { + "epoch": 0.26, + "learning_rate": 1.9283777216936028e-05, + "loss": 0.2425, + "step": 5575 + }, + { + "epoch": 0.26, + "learning_rate": 1.928299343188124e-05, + "loss": 0.3474, + "step": 5580 + }, + { + "epoch": 0.26, + "learning_rate": 1.9282209646826455e-05, + "loss": 0.4243, + "step": 5585 + }, + { + "epoch": 0.26, + "learning_rate": 1.928142586177167e-05, + "loss": 0.4205, + "step": 5590 + }, + { + "epoch": 0.26, + "learning_rate": 1.9280642076716883e-05, + "loss": 0.4256, + "step": 5595 + }, + { + "epoch": 0.26, + "learning_rate": 1.9279858291662097e-05, + "loss": 0.74, + "step": 5600 + }, + { + "epoch": 0.26, + "learning_rate": 1.9279074506607308e-05, + "loss": 0.3487, + "step": 5605 + }, + { + "epoch": 0.26, + "learning_rate": 1.9278290721552525e-05, + "loss": 0.1986, + "step": 5610 + }, + { + "epoch": 0.26, + "learning_rate": 1.9277506936497735e-05, + "loss": 0.279, + "step": 5615 + }, + { + "epoch": 0.26, + "learning_rate": 1.927672315144295e-05, + "loss": 0.2964, + "step": 5620 + }, + { + "epoch": 0.26, + "learning_rate": 1.9275939366388163e-05, + "loss": 0.255, + "step": 5625 + }, + { + "epoch": 0.26, + "learning_rate": 1.9275155581333377e-05, + "loss": 0.3853, + "step": 5630 + }, + { + "epoch": 0.26, + "learning_rate": 1.927437179627859e-05, + "loss": 0.3548, + "step": 5635 + }, + { + "epoch": 0.26, + "learning_rate": 1.92735880112238e-05, + "loss": 0.3402, + "step": 5640 + }, + { + "epoch": 0.26, + "learning_rate": 1.927280422616902e-05, + "loss": 0.375, + "step": 5645 + }, + { + "epoch": 0.26, + "learning_rate": 1.927202044111423e-05, + "loss": 0.8663, + "step": 5650 + }, + { + "epoch": 0.26, + "learning_rate": 1.9271236656059443e-05, + "loss": 0.3404, + "step": 5655 + }, + { + "epoch": 0.26, + "learning_rate": 1.9270452871004657e-05, + "loss": 0.1193, + "step": 5660 + }, + { + "epoch": 0.26, + "learning_rate": 1.926966908594987e-05, + "loss": 0.2268, + "step": 5665 + }, + { + "epoch": 0.26, + "learning_rate": 1.9268885300895085e-05, + "loss": 0.3271, + "step": 5670 + }, + { + "epoch": 0.26, + "learning_rate": 1.92681015158403e-05, + "loss": 0.2239, + "step": 5675 + }, + { + "epoch": 0.27, + "learning_rate": 1.926731773078551e-05, + "loss": 0.363, + "step": 5680 + }, + { + "epoch": 0.27, + "learning_rate": 1.9266690702741683e-05, + "loss": 0.3885, + "step": 5685 + }, + { + "epoch": 0.27, + "learning_rate": 1.9265906917686894e-05, + "loss": 0.5457, + "step": 5690 + }, + { + "epoch": 0.27, + "learning_rate": 1.9265123132632108e-05, + "loss": 0.532, + "step": 5695 + }, + { + "epoch": 0.27, + "learning_rate": 1.926433934757732e-05, + "loss": 0.8407, + "step": 5700 + }, + { + "epoch": 0.27, + "learning_rate": 1.9263555562522536e-05, + "loss": 0.214, + "step": 5705 + }, + { + "epoch": 0.27, + "learning_rate": 1.926277177746775e-05, + "loss": 0.1542, + "step": 5710 + }, + { + "epoch": 0.27, + "learning_rate": 1.9261987992412963e-05, + "loss": 0.2797, + "step": 5715 + }, + { + "epoch": 0.27, + "learning_rate": 1.9261204207358174e-05, + "loss": 0.1692, + "step": 5720 + }, + { + "epoch": 0.27, + "learning_rate": 1.926042042230339e-05, + "loss": 0.3042, + "step": 5725 + }, + { + "epoch": 0.27, + "learning_rate": 1.92596366372486e-05, + "loss": 0.3473, + "step": 5730 + }, + { + "epoch": 0.27, + "learning_rate": 1.9258852852193816e-05, + "loss": 0.3281, + "step": 5735 + }, + { + "epoch": 0.27, + "learning_rate": 1.925806906713903e-05, + "loss": 0.3659, + "step": 5740 + }, + { + "epoch": 0.27, + "learning_rate": 1.9257285282084243e-05, + "loss": 0.4402, + "step": 5745 + }, + { + "epoch": 0.27, + "learning_rate": 1.9256501497029457e-05, + "loss": 0.7063, + "step": 5750 + }, + { + "epoch": 0.27, + "learning_rate": 1.9255717711974668e-05, + "loss": 0.3744, + "step": 5755 + }, + { + "epoch": 0.27, + "learning_rate": 1.9254933926919882e-05, + "loss": 0.1654, + "step": 5760 + }, + { + "epoch": 0.27, + "learning_rate": 1.9254150141865096e-05, + "loss": 0.1909, + "step": 5765 + }, + { + "epoch": 0.27, + "learning_rate": 1.925336635681031e-05, + "loss": 0.2026, + "step": 5770 + }, + { + "epoch": 0.27, + "learning_rate": 1.9252582571755523e-05, + "loss": 0.246, + "step": 5775 + }, + { + "epoch": 0.27, + "learning_rate": 1.9251798786700737e-05, + "loss": 0.2819, + "step": 5780 + }, + { + "epoch": 0.27, + "learning_rate": 1.925101500164595e-05, + "loss": 0.3336, + "step": 5785 + }, + { + "epoch": 0.27, + "learning_rate": 1.9250231216591165e-05, + "loss": 0.4281, + "step": 5790 + }, + { + "epoch": 0.27, + "learning_rate": 1.9249447431536376e-05, + "loss": 0.4699, + "step": 5795 + }, + { + "epoch": 0.27, + "learning_rate": 1.924866364648159e-05, + "loss": 0.7916, + "step": 5800 + }, + { + "epoch": 0.27, + "learning_rate": 1.9247879861426803e-05, + "loss": 0.3297, + "step": 5805 + }, + { + "epoch": 0.27, + "learning_rate": 1.9247096076372017e-05, + "loss": 0.1378, + "step": 5810 + }, + { + "epoch": 0.27, + "learning_rate": 1.924631229131723e-05, + "loss": 0.155, + "step": 5815 + }, + { + "epoch": 0.27, + "learning_rate": 1.9245528506262442e-05, + "loss": 0.2305, + "step": 5820 + }, + { + "epoch": 0.27, + "learning_rate": 1.924474472120766e-05, + "loss": 0.247, + "step": 5825 + }, + { + "epoch": 0.27, + "learning_rate": 1.924396093615287e-05, + "loss": 0.2662, + "step": 5830 + }, + { + "epoch": 0.27, + "learning_rate": 1.9243177151098084e-05, + "loss": 0.335, + "step": 5835 + }, + { + "epoch": 0.27, + "learning_rate": 1.9242393366043297e-05, + "loss": 0.3854, + "step": 5840 + }, + { + "epoch": 0.27, + "learning_rate": 1.924160958098851e-05, + "loss": 0.4929, + "step": 5845 + }, + { + "epoch": 0.27, + "learning_rate": 1.9240825795933725e-05, + "loss": 0.6641, + "step": 5850 + }, + { + "epoch": 0.27, + "learning_rate": 1.924004201087894e-05, + "loss": 0.3136, + "step": 5855 + }, + { + "epoch": 0.27, + "learning_rate": 1.923925822582415e-05, + "loss": 0.1783, + "step": 5860 + }, + { + "epoch": 0.27, + "learning_rate": 1.9238474440769367e-05, + "loss": 0.2408, + "step": 5865 + }, + { + "epoch": 0.27, + "learning_rate": 1.9237690655714577e-05, + "loss": 0.2645, + "step": 5870 + }, + { + "epoch": 0.27, + "learning_rate": 1.923690687065979e-05, + "loss": 0.2702, + "step": 5875 + }, + { + "epoch": 0.27, + "learning_rate": 1.9236123085605005e-05, + "loss": 0.2408, + "step": 5880 + }, + { + "epoch": 0.27, + "learning_rate": 1.923533930055022e-05, + "loss": 0.3547, + "step": 5885 + }, + { + "epoch": 0.27, + "learning_rate": 1.9234555515495433e-05, + "loss": 0.3429, + "step": 5890 + }, + { + "epoch": 0.28, + "learning_rate": 1.9233771730440644e-05, + "loss": 0.5108, + "step": 5895 + }, + { + "epoch": 0.28, + "learning_rate": 1.923298794538586e-05, + "loss": 0.9399, + "step": 5900 + }, + { + "epoch": 0.28, + "learning_rate": 1.923220416033107e-05, + "loss": 0.3939, + "step": 5905 + }, + { + "epoch": 0.28, + "learning_rate": 1.9231420375276285e-05, + "loss": 0.1552, + "step": 5910 + }, + { + "epoch": 0.28, + "learning_rate": 1.92306365902215e-05, + "loss": 0.2075, + "step": 5915 + }, + { + "epoch": 0.28, + "learning_rate": 1.9229852805166713e-05, + "loss": 0.2066, + "step": 5920 + }, + { + "epoch": 0.28, + "learning_rate": 1.9229069020111927e-05, + "loss": 0.3028, + "step": 5925 + }, + { + "epoch": 0.28, + "learning_rate": 1.922828523505714e-05, + "loss": 0.24, + "step": 5930 + }, + { + "epoch": 0.28, + "learning_rate": 1.922750145000235e-05, + "loss": 0.2975, + "step": 5935 + }, + { + "epoch": 0.28, + "learning_rate": 1.922671766494757e-05, + "loss": 0.3763, + "step": 5940 + }, + { + "epoch": 0.28, + "learning_rate": 1.922593387989278e-05, + "loss": 0.4842, + "step": 5945 + }, + { + "epoch": 0.28, + "learning_rate": 1.9225150094837993e-05, + "loss": 0.4962, + "step": 5950 + }, + { + "epoch": 0.28, + "learning_rate": 1.9224366309783207e-05, + "loss": 0.3506, + "step": 5955 + }, + { + "epoch": 0.28, + "learning_rate": 1.9223582524728418e-05, + "loss": 0.1437, + "step": 5960 + }, + { + "epoch": 0.28, + "learning_rate": 1.9222798739673635e-05, + "loss": 0.1921, + "step": 5965 + }, + { + "epoch": 0.28, + "learning_rate": 1.9222014954618845e-05, + "loss": 0.211, + "step": 5970 + }, + { + "epoch": 0.28, + "learning_rate": 1.922123116956406e-05, + "loss": 0.1829, + "step": 5975 + }, + { + "epoch": 0.28, + "learning_rate": 1.9220447384509273e-05, + "loss": 0.3135, + "step": 5980 + }, + { + "epoch": 0.28, + "learning_rate": 1.9219663599454487e-05, + "loss": 0.3635, + "step": 5985 + }, + { + "epoch": 0.28, + "learning_rate": 1.92188798143997e-05, + "loss": 0.3972, + "step": 5990 + }, + { + "epoch": 0.28, + "learning_rate": 1.9218096029344915e-05, + "loss": 0.4304, + "step": 5995 + }, + { + "epoch": 0.28, + "learning_rate": 1.921731224429013e-05, + "loss": 0.5761, + "step": 6000 + }, + { + "epoch": 0.28, + "learning_rate": 1.9216528459235343e-05, + "loss": 0.3202, + "step": 6005 + }, + { + "epoch": 0.28, + "learning_rate": 1.9215744674180553e-05, + "loss": 0.132, + "step": 6010 + }, + { + "epoch": 0.28, + "learning_rate": 1.9214960889125767e-05, + "loss": 0.248, + "step": 6015 + }, + { + "epoch": 0.28, + "learning_rate": 1.921417710407098e-05, + "loss": 0.2491, + "step": 6020 + }, + { + "epoch": 0.28, + "learning_rate": 1.9213393319016195e-05, + "loss": 0.1984, + "step": 6025 + }, + { + "epoch": 0.28, + "learning_rate": 1.921260953396141e-05, + "loss": 0.4062, + "step": 6030 + }, + { + "epoch": 0.28, + "learning_rate": 1.921182574890662e-05, + "loss": 0.381, + "step": 6035 + }, + { + "epoch": 0.28, + "learning_rate": 1.9211041963851837e-05, + "loss": 0.3894, + "step": 6040 + }, + { + "epoch": 0.28, + "learning_rate": 1.9210258178797047e-05, + "loss": 0.5295, + "step": 6045 + }, + { + "epoch": 0.28, + "learning_rate": 1.920947439374226e-05, + "loss": 0.5582, + "step": 6050 + }, + { + "epoch": 0.28, + "learning_rate": 1.9208690608687475e-05, + "loss": 0.3742, + "step": 6055 + }, + { + "epoch": 0.28, + "learning_rate": 1.920790682363269e-05, + "loss": 0.1369, + "step": 6060 + }, + { + "epoch": 0.28, + "learning_rate": 1.9207123038577903e-05, + "loss": 0.191, + "step": 6065 + }, + { + "epoch": 0.28, + "learning_rate": 1.9206339253523117e-05, + "loss": 0.2741, + "step": 6070 + }, + { + "epoch": 0.28, + "learning_rate": 1.9205555468468327e-05, + "loss": 0.2237, + "step": 6075 + }, + { + "epoch": 0.28, + "learning_rate": 1.920477168341354e-05, + "loss": 0.2747, + "step": 6080 + }, + { + "epoch": 0.28, + "learning_rate": 1.9203987898358755e-05, + "loss": 0.3203, + "step": 6085 + }, + { + "epoch": 0.28, + "learning_rate": 1.920320411330397e-05, + "loss": 0.3738, + "step": 6090 + }, + { + "epoch": 0.28, + "learning_rate": 1.9202420328249183e-05, + "loss": 0.4449, + "step": 6095 + }, + { + "epoch": 0.28, + "learning_rate": 1.9201636543194397e-05, + "loss": 0.5899, + "step": 6100 + }, + { + "epoch": 0.28, + "learning_rate": 1.920085275813961e-05, + "loss": 0.2703, + "step": 6105 + }, + { + "epoch": 0.29, + "learning_rate": 1.920006897308482e-05, + "loss": 0.2, + "step": 6110 + }, + { + "epoch": 0.29, + "learning_rate": 1.9199285188030035e-05, + "loss": 0.1694, + "step": 6115 + }, + { + "epoch": 0.29, + "learning_rate": 1.919850140297525e-05, + "loss": 0.211, + "step": 6120 + }, + { + "epoch": 0.29, + "learning_rate": 1.9197717617920463e-05, + "loss": 0.2194, + "step": 6125 + }, + { + "epoch": 0.29, + "learning_rate": 1.9196933832865677e-05, + "loss": 0.2725, + "step": 6130 + }, + { + "epoch": 0.29, + "learning_rate": 1.919615004781089e-05, + "loss": 0.3563, + "step": 6135 + }, + { + "epoch": 0.29, + "learning_rate": 1.9195366262756105e-05, + "loss": 0.3059, + "step": 6140 + }, + { + "epoch": 0.29, + "learning_rate": 1.9194582477701315e-05, + "loss": 0.4064, + "step": 6145 + }, + { + "epoch": 0.29, + "learning_rate": 1.919379869264653e-05, + "loss": 0.6607, + "step": 6150 + }, + { + "epoch": 0.29, + "learning_rate": 1.9193014907591743e-05, + "loss": 0.3117, + "step": 6155 + }, + { + "epoch": 0.29, + "learning_rate": 1.9192231122536957e-05, + "loss": 0.1045, + "step": 6160 + }, + { + "epoch": 0.29, + "learning_rate": 1.919144733748217e-05, + "loss": 0.1874, + "step": 6165 + }, + { + "epoch": 0.29, + "learning_rate": 1.9190663552427385e-05, + "loss": 0.3021, + "step": 6170 + }, + { + "epoch": 0.29, + "learning_rate": 1.9189879767372595e-05, + "loss": 0.211, + "step": 6175 + }, + { + "epoch": 0.29, + "learning_rate": 1.9189095982317813e-05, + "loss": 0.265, + "step": 6180 + }, + { + "epoch": 0.29, + "learning_rate": 1.9188312197263023e-05, + "loss": 0.2312, + "step": 6185 + }, + { + "epoch": 0.29, + "learning_rate": 1.9187528412208237e-05, + "loss": 0.3051, + "step": 6190 + }, + { + "epoch": 0.29, + "learning_rate": 1.918674462715345e-05, + "loss": 0.5762, + "step": 6195 + }, + { + "epoch": 0.29, + "learning_rate": 1.9185960842098665e-05, + "loss": 0.6396, + "step": 6200 + }, + { + "epoch": 0.29, + "learning_rate": 1.918517705704388e-05, + "loss": 0.3087, + "step": 6205 + }, + { + "epoch": 0.29, + "learning_rate": 1.918439327198909e-05, + "loss": 0.1117, + "step": 6210 + }, + { + "epoch": 0.29, + "learning_rate": 1.9183609486934306e-05, + "loss": 0.1206, + "step": 6215 + }, + { + "epoch": 0.29, + "learning_rate": 1.9182825701879517e-05, + "loss": 0.177, + "step": 6220 + }, + { + "epoch": 0.29, + "learning_rate": 1.918204191682473e-05, + "loss": 0.2447, + "step": 6225 + }, + { + "epoch": 0.29, + "learning_rate": 1.9181258131769945e-05, + "loss": 0.2349, + "step": 6230 + }, + { + "epoch": 0.29, + "learning_rate": 1.918047434671516e-05, + "loss": 0.2984, + "step": 6235 + }, + { + "epoch": 0.29, + "learning_rate": 1.9179690561660373e-05, + "loss": 0.2954, + "step": 6240 + }, + { + "epoch": 0.29, + "learning_rate": 1.9178906776605587e-05, + "loss": 0.3903, + "step": 6245 + }, + { + "epoch": 0.29, + "learning_rate": 1.9178122991550797e-05, + "loss": 0.6694, + "step": 6250 + }, + { + "epoch": 0.29, + "learning_rate": 1.9177339206496014e-05, + "loss": 0.2947, + "step": 6255 + }, + { + "epoch": 0.29, + "learning_rate": 1.9176555421441225e-05, + "loss": 0.1219, + "step": 6260 + }, + { + "epoch": 0.29, + "learning_rate": 1.917577163638644e-05, + "loss": 0.2013, + "step": 6265 + }, + { + "epoch": 0.29, + "learning_rate": 1.9174987851331653e-05, + "loss": 0.1924, + "step": 6270 + }, + { + "epoch": 0.29, + "learning_rate": 1.9174204066276863e-05, + "loss": 0.29, + "step": 6275 + }, + { + "epoch": 0.29, + "learning_rate": 1.917342028122208e-05, + "loss": 0.3052, + "step": 6280 + }, + { + "epoch": 0.29, + "learning_rate": 1.917263649616729e-05, + "loss": 0.3302, + "step": 6285 + }, + { + "epoch": 0.29, + "learning_rate": 1.9171852711112505e-05, + "loss": 0.4615, + "step": 6290 + }, + { + "epoch": 0.29, + "learning_rate": 1.917106892605772e-05, + "loss": 0.4105, + "step": 6295 + }, + { + "epoch": 0.29, + "learning_rate": 1.9170285141002933e-05, + "loss": 0.6715, + "step": 6300 + }, + { + "epoch": 0.29, + "learning_rate": 1.9169501355948147e-05, + "loss": 0.3003, + "step": 6305 + }, + { + "epoch": 0.29, + "learning_rate": 1.916871757089336e-05, + "loss": 0.2266, + "step": 6310 + }, + { + "epoch": 0.29, + "learning_rate": 1.9167933785838574e-05, + "loss": 0.1107, + "step": 6315 + }, + { + "epoch": 0.29, + "learning_rate": 1.916715000078379e-05, + "loss": 0.2179, + "step": 6320 + }, + { + "epoch": 0.3, + "learning_rate": 1.9166366215729e-05, + "loss": 0.1912, + "step": 6325 + }, + { + "epoch": 0.3, + "learning_rate": 1.9165582430674213e-05, + "loss": 0.241, + "step": 6330 + }, + { + "epoch": 0.3, + "learning_rate": 1.9164798645619427e-05, + "loss": 0.336, + "step": 6335 + }, + { + "epoch": 0.3, + "learning_rate": 1.916401486056464e-05, + "loss": 0.3382, + "step": 6340 + }, + { + "epoch": 0.3, + "learning_rate": 1.9163231075509854e-05, + "loss": 0.5897, + "step": 6345 + }, + { + "epoch": 0.3, + "learning_rate": 1.9162447290455065e-05, + "loss": 0.7494, + "step": 6350 + }, + { + "epoch": 0.3, + "learning_rate": 1.9161663505400282e-05, + "loss": 0.3499, + "step": 6355 + }, + { + "epoch": 0.3, + "learning_rate": 1.9160879720345493e-05, + "loss": 0.1544, + "step": 6360 + }, + { + "epoch": 0.3, + "learning_rate": 1.9160095935290707e-05, + "loss": 0.1487, + "step": 6365 + }, + { + "epoch": 0.3, + "learning_rate": 1.915931215023592e-05, + "loss": 0.2265, + "step": 6370 + }, + { + "epoch": 0.3, + "learning_rate": 1.9158528365181135e-05, + "loss": 0.2467, + "step": 6375 + }, + { + "epoch": 0.3, + "learning_rate": 1.915774458012635e-05, + "loss": 0.2705, + "step": 6380 + }, + { + "epoch": 0.3, + "learning_rate": 1.9156960795071562e-05, + "loss": 0.4046, + "step": 6385 + }, + { + "epoch": 0.3, + "learning_rate": 1.9156177010016773e-05, + "loss": 0.4616, + "step": 6390 + }, + { + "epoch": 0.3, + "learning_rate": 1.915539322496199e-05, + "loss": 0.5142, + "step": 6395 + }, + { + "epoch": 0.3, + "learning_rate": 1.91546094399072e-05, + "loss": 0.6709, + "step": 6400 + }, + { + "epoch": 0.3, + "learning_rate": 1.9153825654852415e-05, + "loss": 0.3144, + "step": 6405 + }, + { + "epoch": 0.3, + "learning_rate": 1.915304186979763e-05, + "loss": 0.1743, + "step": 6410 + }, + { + "epoch": 0.3, + "learning_rate": 1.9152258084742842e-05, + "loss": 0.2046, + "step": 6415 + }, + { + "epoch": 0.3, + "learning_rate": 1.9151474299688056e-05, + "loss": 0.1748, + "step": 6420 + }, + { + "epoch": 0.3, + "learning_rate": 1.9150690514633267e-05, + "loss": 0.347, + "step": 6425 + }, + { + "epoch": 0.3, + "learning_rate": 1.9149906729578484e-05, + "loss": 0.4057, + "step": 6430 + }, + { + "epoch": 0.3, + "learning_rate": 1.9149122944523695e-05, + "loss": 0.3375, + "step": 6435 + }, + { + "epoch": 0.3, + "learning_rate": 1.914833915946891e-05, + "loss": 0.3648, + "step": 6440 + }, + { + "epoch": 0.3, + "learning_rate": 1.9147555374414122e-05, + "loss": 0.3829, + "step": 6445 + }, + { + "epoch": 0.3, + "learning_rate": 1.9146771589359336e-05, + "loss": 0.5695, + "step": 6450 + }, + { + "epoch": 0.3, + "learning_rate": 1.914598780430455e-05, + "loss": 0.238, + "step": 6455 + }, + { + "epoch": 0.3, + "learning_rate": 1.9145204019249764e-05, + "loss": 0.1316, + "step": 6460 + }, + { + "epoch": 0.3, + "learning_rate": 1.9144420234194975e-05, + "loss": 0.2054, + "step": 6465 + }, + { + "epoch": 0.3, + "learning_rate": 1.914363644914019e-05, + "loss": 0.2046, + "step": 6470 + }, + { + "epoch": 0.3, + "learning_rate": 1.9142852664085402e-05, + "loss": 0.2385, + "step": 6475 + }, + { + "epoch": 0.3, + "learning_rate": 1.9142068879030616e-05, + "loss": 0.2931, + "step": 6480 + }, + { + "epoch": 0.3, + "learning_rate": 1.914128509397583e-05, + "loss": 0.3218, + "step": 6485 + }, + { + "epoch": 0.3, + "learning_rate": 1.914050130892104e-05, + "loss": 0.3059, + "step": 6490 + }, + { + "epoch": 0.3, + "learning_rate": 1.9139717523866258e-05, + "loss": 0.4008, + "step": 6495 + }, + { + "epoch": 0.3, + "learning_rate": 1.913893373881147e-05, + "loss": 0.6874, + "step": 6500 + }, + { + "epoch": 0.3, + "learning_rate": 1.9138149953756683e-05, + "loss": 0.3523, + "step": 6505 + }, + { + "epoch": 0.3, + "learning_rate": 1.9137366168701896e-05, + "loss": 0.1623, + "step": 6510 + }, + { + "epoch": 0.3, + "learning_rate": 1.913658238364711e-05, + "loss": 0.1548, + "step": 6515 + }, + { + "epoch": 0.3, + "learning_rate": 1.9135798598592324e-05, + "loss": 0.2571, + "step": 6520 + }, + { + "epoch": 0.3, + "learning_rate": 1.9135014813537538e-05, + "loss": 0.2677, + "step": 6525 + }, + { + "epoch": 0.3, + "learning_rate": 1.9134231028482752e-05, + "loss": 0.2672, + "step": 6530 + }, + { + "epoch": 0.3, + "learning_rate": 1.9133447243427963e-05, + "loss": 0.1512, + "step": 6535 + }, + { + "epoch": 0.31, + "learning_rate": 1.9132663458373176e-05, + "loss": 0.434, + "step": 6540 + }, + { + "epoch": 0.31, + "learning_rate": 1.913187967331839e-05, + "loss": 0.4751, + "step": 6545 + }, + { + "epoch": 0.31, + "learning_rate": 1.9131095888263604e-05, + "loss": 0.9035, + "step": 6550 + }, + { + "epoch": 0.31, + "learning_rate": 1.9130312103208818e-05, + "loss": 0.2479, + "step": 6555 + }, + { + "epoch": 0.31, + "learning_rate": 1.9129528318154032e-05, + "loss": 0.1588, + "step": 6560 + }, + { + "epoch": 0.31, + "learning_rate": 1.9128744533099243e-05, + "loss": 0.1603, + "step": 6565 + }, + { + "epoch": 0.31, + "learning_rate": 1.912796074804446e-05, + "loss": 0.2272, + "step": 6570 + }, + { + "epoch": 0.31, + "learning_rate": 1.912717696298967e-05, + "loss": 0.2211, + "step": 6575 + }, + { + "epoch": 0.31, + "learning_rate": 1.9126393177934884e-05, + "loss": 0.2354, + "step": 6580 + }, + { + "epoch": 0.31, + "learning_rate": 1.9125609392880098e-05, + "loss": 0.3462, + "step": 6585 + }, + { + "epoch": 0.31, + "learning_rate": 1.9124825607825312e-05, + "loss": 0.3478, + "step": 6590 + }, + { + "epoch": 0.31, + "learning_rate": 1.9124041822770526e-05, + "loss": 0.434, + "step": 6595 + }, + { + "epoch": 0.31, + "learning_rate": 1.9123258037715737e-05, + "loss": 0.6833, + "step": 6600 + }, + { + "epoch": 0.31, + "learning_rate": 1.912247425266095e-05, + "loss": 0.3255, + "step": 6605 + }, + { + "epoch": 0.31, + "learning_rate": 1.9121690467606164e-05, + "loss": 0.2105, + "step": 6610 + }, + { + "epoch": 0.31, + "learning_rate": 1.9120906682551378e-05, + "loss": 0.248, + "step": 6615 + }, + { + "epoch": 0.31, + "learning_rate": 1.9120122897496592e-05, + "loss": 0.2495, + "step": 6620 + }, + { + "epoch": 0.31, + "learning_rate": 1.9119339112441806e-05, + "loss": 0.2071, + "step": 6625 + }, + { + "epoch": 0.31, + "learning_rate": 1.911855532738702e-05, + "loss": 0.204, + "step": 6630 + }, + { + "epoch": 0.31, + "learning_rate": 1.9117771542332234e-05, + "loss": 0.4248, + "step": 6635 + }, + { + "epoch": 0.31, + "learning_rate": 1.9116987757277444e-05, + "loss": 0.3261, + "step": 6640 + }, + { + "epoch": 0.31, + "learning_rate": 1.911620397222266e-05, + "loss": 0.425, + "step": 6645 + }, + { + "epoch": 0.31, + "learning_rate": 1.9115420187167872e-05, + "loss": 0.658, + "step": 6650 + }, + { + "epoch": 0.31, + "learning_rate": 1.9114636402113086e-05, + "loss": 0.2923, + "step": 6655 + }, + { + "epoch": 0.31, + "learning_rate": 1.91138526170583e-05, + "loss": 0.1128, + "step": 6660 + }, + { + "epoch": 0.31, + "learning_rate": 1.911306883200351e-05, + "loss": 0.1456, + "step": 6665 + }, + { + "epoch": 0.31, + "learning_rate": 1.9112285046948728e-05, + "loss": 0.2254, + "step": 6670 + }, + { + "epoch": 0.31, + "learning_rate": 1.911150126189394e-05, + "loss": 0.2541, + "step": 6675 + }, + { + "epoch": 0.31, + "learning_rate": 1.9110717476839152e-05, + "loss": 0.3436, + "step": 6680 + }, + { + "epoch": 0.31, + "learning_rate": 1.9109933691784366e-05, + "loss": 0.3193, + "step": 6685 + }, + { + "epoch": 0.31, + "learning_rate": 1.910914990672958e-05, + "loss": 0.309, + "step": 6690 + }, + { + "epoch": 0.31, + "learning_rate": 1.9108366121674794e-05, + "loss": 0.504, + "step": 6695 + }, + { + "epoch": 0.31, + "learning_rate": 1.9107582336620008e-05, + "loss": 0.6894, + "step": 6700 + }, + { + "epoch": 0.31, + "learning_rate": 1.910679855156522e-05, + "loss": 0.3584, + "step": 6705 + }, + { + "epoch": 0.31, + "learning_rate": 1.9106014766510436e-05, + "loss": 0.1103, + "step": 6710 + }, + { + "epoch": 0.31, + "learning_rate": 1.9105230981455646e-05, + "loss": 0.2487, + "step": 6715 + }, + { + "epoch": 0.31, + "learning_rate": 1.910444719640086e-05, + "loss": 0.1941, + "step": 6720 + }, + { + "epoch": 0.31, + "learning_rate": 1.9103663411346074e-05, + "loss": 0.2426, + "step": 6725 + }, + { + "epoch": 0.31, + "learning_rate": 1.9102879626291288e-05, + "loss": 0.2712, + "step": 6730 + }, + { + "epoch": 0.31, + "learning_rate": 1.9102095841236502e-05, + "loss": 0.3572, + "step": 6735 + }, + { + "epoch": 0.31, + "learning_rate": 1.9101312056181712e-05, + "loss": 0.4285, + "step": 6740 + }, + { + "epoch": 0.31, + "learning_rate": 1.910052827112693e-05, + "loss": 0.5303, + "step": 6745 + }, + { + "epoch": 0.31, + "learning_rate": 1.909974448607214e-05, + "loss": 0.4896, + "step": 6750 + }, + { + "epoch": 0.32, + "learning_rate": 1.9098960701017354e-05, + "loss": 0.2817, + "step": 6755 + }, + { + "epoch": 0.32, + "learning_rate": 1.9098176915962568e-05, + "loss": 0.1629, + "step": 6760 + }, + { + "epoch": 0.32, + "learning_rate": 1.9097393130907782e-05, + "loss": 0.2024, + "step": 6765 + }, + { + "epoch": 0.32, + "learning_rate": 1.9096609345852996e-05, + "loss": 0.2163, + "step": 6770 + }, + { + "epoch": 0.32, + "learning_rate": 1.909582556079821e-05, + "loss": 0.2022, + "step": 6775 + }, + { + "epoch": 0.32, + "learning_rate": 1.909504177574342e-05, + "loss": 0.3167, + "step": 6780 + }, + { + "epoch": 0.32, + "learning_rate": 1.9094257990688638e-05, + "loss": 0.4345, + "step": 6785 + }, + { + "epoch": 0.32, + "learning_rate": 1.9093474205633848e-05, + "loss": 0.3893, + "step": 6790 + }, + { + "epoch": 0.32, + "learning_rate": 1.9092690420579062e-05, + "loss": 0.3411, + "step": 6795 + }, + { + "epoch": 0.32, + "learning_rate": 1.9091906635524276e-05, + "loss": 0.6765, + "step": 6800 + }, + { + "epoch": 0.32, + "learning_rate": 1.9091122850469486e-05, + "loss": 0.3742, + "step": 6805 + }, + { + "epoch": 0.32, + "learning_rate": 1.9090339065414704e-05, + "loss": 0.1481, + "step": 6810 + }, + { + "epoch": 0.32, + "learning_rate": 1.9089555280359914e-05, + "loss": 0.1375, + "step": 6815 + }, + { + "epoch": 0.32, + "learning_rate": 1.9088771495305128e-05, + "loss": 0.2551, + "step": 6820 + }, + { + "epoch": 0.32, + "learning_rate": 1.9087987710250342e-05, + "loss": 0.2335, + "step": 6825 + }, + { + "epoch": 0.32, + "learning_rate": 1.9087203925195556e-05, + "loss": 0.2022, + "step": 6830 + }, + { + "epoch": 0.32, + "learning_rate": 1.908642014014077e-05, + "loss": 0.272, + "step": 6835 + }, + { + "epoch": 0.32, + "learning_rate": 1.9085636355085984e-05, + "loss": 0.3548, + "step": 6840 + }, + { + "epoch": 0.32, + "learning_rate": 1.9084852570031198e-05, + "loss": 0.4034, + "step": 6845 + }, + { + "epoch": 0.32, + "learning_rate": 1.908406878497641e-05, + "loss": 0.7186, + "step": 6850 + }, + { + "epoch": 0.32, + "learning_rate": 1.9083284999921622e-05, + "loss": 0.3299, + "step": 6855 + }, + { + "epoch": 0.32, + "learning_rate": 1.9082501214866836e-05, + "loss": 0.1502, + "step": 6860 + }, + { + "epoch": 0.32, + "learning_rate": 1.908171742981205e-05, + "loss": 0.1961, + "step": 6865 + }, + { + "epoch": 0.32, + "learning_rate": 1.9080933644757264e-05, + "loss": 0.1746, + "step": 6870 + }, + { + "epoch": 0.32, + "learning_rate": 1.9080149859702478e-05, + "loss": 0.2177, + "step": 6875 + }, + { + "epoch": 0.32, + "learning_rate": 1.9079366074647688e-05, + "loss": 0.2479, + "step": 6880 + }, + { + "epoch": 0.32, + "learning_rate": 1.9078582289592905e-05, + "loss": 0.3497, + "step": 6885 + }, + { + "epoch": 0.32, + "learning_rate": 1.9077798504538116e-05, + "loss": 0.2857, + "step": 6890 + }, + { + "epoch": 0.32, + "learning_rate": 1.907701471948333e-05, + "loss": 0.4186, + "step": 6895 + }, + { + "epoch": 0.32, + "learning_rate": 1.9076230934428544e-05, + "loss": 0.5697, + "step": 6900 + }, + { + "epoch": 0.32, + "learning_rate": 1.9075447149373758e-05, + "loss": 0.2705, + "step": 6905 + }, + { + "epoch": 0.32, + "learning_rate": 1.907466336431897e-05, + "loss": 0.1694, + "step": 6910 + }, + { + "epoch": 0.32, + "learning_rate": 1.9073879579264186e-05, + "loss": 0.2024, + "step": 6915 + }, + { + "epoch": 0.32, + "learning_rate": 1.9073095794209396e-05, + "loss": 0.1911, + "step": 6920 + }, + { + "epoch": 0.32, + "learning_rate": 1.907231200915461e-05, + "loss": 0.324, + "step": 6925 + }, + { + "epoch": 0.32, + "learning_rate": 1.9071528224099824e-05, + "loss": 0.2166, + "step": 6930 + }, + { + "epoch": 0.32, + "learning_rate": 1.9070744439045038e-05, + "loss": 0.2728, + "step": 6935 + }, + { + "epoch": 0.32, + "learning_rate": 1.906996065399025e-05, + "loss": 0.4889, + "step": 6940 + }, + { + "epoch": 0.32, + "learning_rate": 1.9069176868935466e-05, + "loss": 0.3559, + "step": 6945 + }, + { + "epoch": 0.32, + "learning_rate": 1.906839308388068e-05, + "loss": 0.6589, + "step": 6950 + }, + { + "epoch": 0.32, + "learning_rate": 1.906760929882589e-05, + "loss": 0.3064, + "step": 6955 + }, + { + "epoch": 0.32, + "learning_rate": 1.9066825513771104e-05, + "loss": 0.1575, + "step": 6960 + }, + { + "epoch": 0.32, + "learning_rate": 1.9066041728716318e-05, + "loss": 0.2603, + "step": 6965 + }, + { + "epoch": 0.33, + "learning_rate": 1.9065257943661532e-05, + "loss": 0.2049, + "step": 6970 + }, + { + "epoch": 0.33, + "learning_rate": 1.9064474158606746e-05, + "loss": 0.1928, + "step": 6975 + }, + { + "epoch": 0.33, + "learning_rate": 1.906369037355196e-05, + "loss": 0.2743, + "step": 6980 + }, + { + "epoch": 0.33, + "learning_rate": 1.9062906588497173e-05, + "loss": 0.3078, + "step": 6985 + }, + { + "epoch": 0.33, + "learning_rate": 1.9062122803442384e-05, + "loss": 0.3558, + "step": 6990 + }, + { + "epoch": 0.33, + "learning_rate": 1.9061339018387598e-05, + "loss": 0.5479, + "step": 6995 + }, + { + "epoch": 0.33, + "learning_rate": 1.9060555233332812e-05, + "loss": 0.7941, + "step": 7000 + }, + { + "epoch": 0.33, + "learning_rate": 1.9059771448278026e-05, + "loss": 0.2665, + "step": 7005 + }, + { + "epoch": 0.33, + "learning_rate": 1.905898766322324e-05, + "loss": 0.1489, + "step": 7010 + }, + { + "epoch": 0.33, + "learning_rate": 1.9058203878168453e-05, + "loss": 0.1762, + "step": 7015 + }, + { + "epoch": 0.33, + "learning_rate": 1.9057420093113664e-05, + "loss": 0.2242, + "step": 7020 + }, + { + "epoch": 0.33, + "learning_rate": 1.905663630805888e-05, + "loss": 0.2587, + "step": 7025 + }, + { + "epoch": 0.33, + "learning_rate": 1.9055852523004092e-05, + "loss": 0.2101, + "step": 7030 + }, + { + "epoch": 0.33, + "learning_rate": 1.9055068737949306e-05, + "loss": 0.2533, + "step": 7035 + }, + { + "epoch": 0.33, + "learning_rate": 1.905428495289452e-05, + "loss": 0.4377, + "step": 7040 + }, + { + "epoch": 0.33, + "learning_rate": 1.9053501167839734e-05, + "loss": 0.441, + "step": 7045 + }, + { + "epoch": 0.33, + "learning_rate": 1.9052717382784947e-05, + "loss": 0.7094, + "step": 7050 + }, + { + "epoch": 0.33, + "learning_rate": 1.9051933597730158e-05, + "loss": 0.1968, + "step": 7055 + }, + { + "epoch": 0.33, + "learning_rate": 1.9051149812675375e-05, + "loss": 0.1635, + "step": 7060 + }, + { + "epoch": 0.33, + "learning_rate": 1.9050366027620586e-05, + "loss": 0.1534, + "step": 7065 + }, + { + "epoch": 0.33, + "learning_rate": 1.90495822425658e-05, + "loss": 0.1294, + "step": 7070 + }, + { + "epoch": 0.33, + "learning_rate": 1.9048798457511014e-05, + "loss": 0.1829, + "step": 7075 + }, + { + "epoch": 0.33, + "learning_rate": 1.9048014672456227e-05, + "loss": 0.172, + "step": 7080 + }, + { + "epoch": 0.33, + "learning_rate": 1.904723088740144e-05, + "loss": 0.2696, + "step": 7085 + }, + { + "epoch": 0.33, + "learning_rate": 1.9046447102346655e-05, + "loss": 0.3744, + "step": 7090 + }, + { + "epoch": 0.33, + "learning_rate": 1.9045663317291866e-05, + "loss": 0.4525, + "step": 7095 + }, + { + "epoch": 0.33, + "learning_rate": 1.9044879532237083e-05, + "loss": 0.5999, + "step": 7100 + }, + { + "epoch": 0.33, + "learning_rate": 1.9044095747182294e-05, + "loss": 0.2546, + "step": 7105 + }, + { + "epoch": 0.33, + "learning_rate": 1.9043311962127508e-05, + "loss": 0.1782, + "step": 7110 + }, + { + "epoch": 0.33, + "learning_rate": 1.904252817707272e-05, + "loss": 0.217, + "step": 7115 + }, + { + "epoch": 0.33, + "learning_rate": 1.9041744392017932e-05, + "loss": 0.1918, + "step": 7120 + }, + { + "epoch": 0.33, + "learning_rate": 1.904096060696315e-05, + "loss": 0.2431, + "step": 7125 + }, + { + "epoch": 0.33, + "learning_rate": 1.904017682190836e-05, + "loss": 0.3174, + "step": 7130 + }, + { + "epoch": 0.33, + "learning_rate": 1.9039393036853574e-05, + "loss": 0.3594, + "step": 7135 + }, + { + "epoch": 0.33, + "learning_rate": 1.9038609251798788e-05, + "loss": 0.3965, + "step": 7140 + }, + { + "epoch": 0.33, + "learning_rate": 1.9037825466744e-05, + "loss": 0.2913, + "step": 7145 + }, + { + "epoch": 0.33, + "learning_rate": 1.9037041681689215e-05, + "loss": 0.7954, + "step": 7150 + }, + { + "epoch": 0.33, + "learning_rate": 1.903625789663443e-05, + "loss": 0.3234, + "step": 7155 + }, + { + "epoch": 0.33, + "learning_rate": 1.9035474111579643e-05, + "loss": 0.1657, + "step": 7160 + }, + { + "epoch": 0.33, + "learning_rate": 1.9034690326524857e-05, + "loss": 0.0944, + "step": 7165 + }, + { + "epoch": 0.33, + "learning_rate": 1.9033906541470068e-05, + "loss": 0.2358, + "step": 7170 + }, + { + "epoch": 0.33, + "learning_rate": 1.903312275641528e-05, + "loss": 0.2233, + "step": 7175 + }, + { + "epoch": 0.34, + "learning_rate": 1.9032338971360495e-05, + "loss": 0.2742, + "step": 7180 + }, + { + "epoch": 0.34, + "learning_rate": 1.903155518630571e-05, + "loss": 0.3522, + "step": 7185 + }, + { + "epoch": 0.34, + "learning_rate": 1.9030771401250923e-05, + "loss": 0.2871, + "step": 7190 + }, + { + "epoch": 0.34, + "learning_rate": 1.9029987616196134e-05, + "loss": 0.5827, + "step": 7195 + }, + { + "epoch": 0.34, + "learning_rate": 1.902920383114135e-05, + "loss": 0.9008, + "step": 7200 + }, + { + "epoch": 0.34, + "learning_rate": 1.902842004608656e-05, + "loss": 0.3112, + "step": 7205 + }, + { + "epoch": 0.34, + "learning_rate": 1.9027636261031775e-05, + "loss": 0.1972, + "step": 7210 + }, + { + "epoch": 0.34, + "learning_rate": 1.902685247597699e-05, + "loss": 0.2397, + "step": 7215 + }, + { + "epoch": 0.34, + "learning_rate": 1.9026068690922203e-05, + "loss": 0.1644, + "step": 7220 + }, + { + "epoch": 0.34, + "learning_rate": 1.9025284905867417e-05, + "loss": 0.329, + "step": 7225 + }, + { + "epoch": 0.34, + "learning_rate": 1.902450112081263e-05, + "loss": 0.2891, + "step": 7230 + }, + { + "epoch": 0.34, + "learning_rate": 1.902371733575784e-05, + "loss": 0.3564, + "step": 7235 + }, + { + "epoch": 0.34, + "learning_rate": 1.902293355070306e-05, + "loss": 0.3063, + "step": 7240 + }, + { + "epoch": 0.34, + "learning_rate": 1.902214976564827e-05, + "loss": 0.481, + "step": 7245 + }, + { + "epoch": 0.34, + "learning_rate": 1.9021365980593483e-05, + "loss": 0.7006, + "step": 7250 + }, + { + "epoch": 0.34, + "learning_rate": 1.9020582195538697e-05, + "loss": 0.3454, + "step": 7255 + }, + { + "epoch": 0.34, + "learning_rate": 1.901979841048391e-05, + "loss": 0.0799, + "step": 7260 + }, + { + "epoch": 0.34, + "learning_rate": 1.9019014625429125e-05, + "loss": 0.1361, + "step": 7265 + }, + { + "epoch": 0.34, + "learning_rate": 1.9018230840374336e-05, + "loss": 0.2282, + "step": 7270 + }, + { + "epoch": 0.34, + "learning_rate": 1.901744705531955e-05, + "loss": 0.2861, + "step": 7275 + }, + { + "epoch": 0.34, + "learning_rate": 1.9016663270264763e-05, + "loss": 0.3123, + "step": 7280 + }, + { + "epoch": 0.34, + "learning_rate": 1.9015879485209977e-05, + "loss": 0.2939, + "step": 7285 + }, + { + "epoch": 0.34, + "learning_rate": 1.901509570015519e-05, + "loss": 0.368, + "step": 7290 + }, + { + "epoch": 0.34, + "learning_rate": 1.9014311915100405e-05, + "loss": 0.2656, + "step": 7295 + }, + { + "epoch": 0.34, + "learning_rate": 1.901352813004562e-05, + "loss": 0.7157, + "step": 7300 + }, + { + "epoch": 0.34, + "learning_rate": 1.9012744344990833e-05, + "loss": 0.297, + "step": 7305 + }, + { + "epoch": 0.34, + "learning_rate": 1.9011960559936043e-05, + "loss": 0.1168, + "step": 7310 + }, + { + "epoch": 0.34, + "learning_rate": 1.9011176774881257e-05, + "loss": 0.1664, + "step": 7315 + }, + { + "epoch": 0.34, + "learning_rate": 1.901039298982647e-05, + "loss": 0.1445, + "step": 7320 + }, + { + "epoch": 0.34, + "learning_rate": 1.9009609204771685e-05, + "loss": 0.2953, + "step": 7325 + }, + { + "epoch": 0.34, + "learning_rate": 1.90088254197169e-05, + "loss": 0.1705, + "step": 7330 + }, + { + "epoch": 0.34, + "learning_rate": 1.900804163466211e-05, + "loss": 0.3413, + "step": 7335 + }, + { + "epoch": 0.34, + "learning_rate": 1.9007257849607327e-05, + "loss": 0.3309, + "step": 7340 + }, + { + "epoch": 0.34, + "learning_rate": 1.9006474064552537e-05, + "loss": 0.4903, + "step": 7345 + }, + { + "epoch": 0.34, + "learning_rate": 1.900569027949775e-05, + "loss": 0.6247, + "step": 7350 + }, + { + "epoch": 0.34, + "learning_rate": 1.9004906494442965e-05, + "loss": 0.299, + "step": 7355 + }, + { + "epoch": 0.34, + "learning_rate": 1.900412270938818e-05, + "loss": 0.1186, + "step": 7360 + }, + { + "epoch": 0.34, + "learning_rate": 1.9003338924333393e-05, + "loss": 0.151, + "step": 7365 + }, + { + "epoch": 0.34, + "learning_rate": 1.9002555139278607e-05, + "loss": 0.1634, + "step": 7370 + }, + { + "epoch": 0.34, + "learning_rate": 1.900177135422382e-05, + "loss": 0.2192, + "step": 7375 + }, + { + "epoch": 0.34, + "learning_rate": 1.900098756916903e-05, + "loss": 0.3089, + "step": 7380 + }, + { + "epoch": 0.34, + "learning_rate": 1.9000203784114245e-05, + "loss": 0.2922, + "step": 7385 + }, + { + "epoch": 0.34, + "learning_rate": 1.899941999905946e-05, + "loss": 0.2575, + "step": 7390 + }, + { + "epoch": 0.35, + "learning_rate": 1.8998636214004673e-05, + "loss": 0.5155, + "step": 7395 + }, + { + "epoch": 0.35, + "learning_rate": 1.8997852428949887e-05, + "loss": 0.8029, + "step": 7400 + }, + { + "epoch": 0.35, + "learning_rate": 1.89970686438951e-05, + "loss": 0.3019, + "step": 7405 + }, + { + "epoch": 0.35, + "learning_rate": 1.899628485884031e-05, + "loss": 0.1694, + "step": 7410 + }, + { + "epoch": 0.35, + "learning_rate": 1.899550107378553e-05, + "loss": 0.1644, + "step": 7415 + }, + { + "epoch": 0.35, + "learning_rate": 1.899471728873074e-05, + "loss": 0.2562, + "step": 7420 + }, + { + "epoch": 0.35, + "learning_rate": 1.8993933503675953e-05, + "loss": 0.2024, + "step": 7425 + }, + { + "epoch": 0.35, + "learning_rate": 1.8993149718621167e-05, + "loss": 0.2765, + "step": 7430 + }, + { + "epoch": 0.35, + "learning_rate": 1.899236593356638e-05, + "loss": 0.3152, + "step": 7435 + }, + { + "epoch": 0.35, + "learning_rate": 1.8991582148511595e-05, + "loss": 0.7121, + "step": 7440 + }, + { + "epoch": 0.35, + "learning_rate": 1.8990798363456805e-05, + "loss": 0.4815, + "step": 7445 + }, + { + "epoch": 0.35, + "learning_rate": 1.899001457840202e-05, + "loss": 0.596, + "step": 7450 + }, + { + "epoch": 0.35, + "learning_rate": 1.8989230793347233e-05, + "loss": 0.2899, + "step": 7455 + }, + { + "epoch": 0.35, + "learning_rate": 1.8988447008292447e-05, + "loss": 0.1684, + "step": 7460 + }, + { + "epoch": 0.35, + "learning_rate": 1.898766322323766e-05, + "loss": 0.1769, + "step": 7465 + }, + { + "epoch": 0.35, + "learning_rate": 1.8986879438182875e-05, + "loss": 0.1686, + "step": 7470 + }, + { + "epoch": 0.35, + "learning_rate": 1.898609565312809e-05, + "loss": 0.1521, + "step": 7475 + }, + { + "epoch": 0.35, + "learning_rate": 1.8985311868073303e-05, + "loss": 0.213, + "step": 7480 + }, + { + "epoch": 0.35, + "learning_rate": 1.8984528083018513e-05, + "loss": 0.3328, + "step": 7485 + }, + { + "epoch": 0.35, + "learning_rate": 1.8983744297963727e-05, + "loss": 0.3086, + "step": 7490 + }, + { + "epoch": 0.35, + "learning_rate": 1.898296051290894e-05, + "loss": 0.3979, + "step": 7495 + }, + { + "epoch": 0.35, + "learning_rate": 1.8982176727854155e-05, + "loss": 0.9146, + "step": 7500 + }, + { + "epoch": 0.35, + "learning_rate": 1.898139294279937e-05, + "loss": 0.2771, + "step": 7505 + }, + { + "epoch": 0.35, + "learning_rate": 1.898060915774458e-05, + "loss": 0.1257, + "step": 7510 + }, + { + "epoch": 0.35, + "learning_rate": 1.8979825372689797e-05, + "loss": 0.2241, + "step": 7515 + }, + { + "epoch": 0.35, + "learning_rate": 1.8979041587635007e-05, + "loss": 0.2393, + "step": 7520 + }, + { + "epoch": 0.35, + "learning_rate": 1.897825780258022e-05, + "loss": 0.172, + "step": 7525 + }, + { + "epoch": 0.35, + "learning_rate": 1.8977474017525435e-05, + "loss": 0.2781, + "step": 7530 + }, + { + "epoch": 0.35, + "learning_rate": 1.897669023247065e-05, + "loss": 0.3206, + "step": 7535 + }, + { + "epoch": 0.35, + "learning_rate": 1.8975906447415863e-05, + "loss": 0.3699, + "step": 7540 + }, + { + "epoch": 0.35, + "learning_rate": 1.8975122662361077e-05, + "loss": 0.3469, + "step": 7545 + }, + { + "epoch": 0.35, + "learning_rate": 1.8974338877306287e-05, + "loss": 0.8029, + "step": 7550 + }, + { + "epoch": 0.35, + "learning_rate": 1.8973555092251504e-05, + "loss": 0.312, + "step": 7555 + }, + { + "epoch": 0.35, + "learning_rate": 1.8972771307196715e-05, + "loss": 0.1367, + "step": 7560 + }, + { + "epoch": 0.35, + "learning_rate": 1.897198752214193e-05, + "loss": 0.0884, + "step": 7565 + }, + { + "epoch": 0.35, + "learning_rate": 1.8971203737087143e-05, + "loss": 0.2232, + "step": 7570 + }, + { + "epoch": 0.35, + "learning_rate": 1.8970419952032357e-05, + "loss": 0.1861, + "step": 7575 + }, + { + "epoch": 0.35, + "learning_rate": 1.896963616697757e-05, + "loss": 0.2897, + "step": 7580 + }, + { + "epoch": 0.35, + "learning_rate": 1.896885238192278e-05, + "loss": 0.3206, + "step": 7585 + }, + { + "epoch": 0.35, + "learning_rate": 1.8968068596868e-05, + "loss": 0.3773, + "step": 7590 + }, + { + "epoch": 0.35, + "learning_rate": 1.896728481181321e-05, + "loss": 0.3115, + "step": 7595 + }, + { + "epoch": 0.35, + "learning_rate": 1.8966501026758423e-05, + "loss": 0.9366, + "step": 7600 + }, + { + "epoch": 0.35, + "learning_rate": 1.8965717241703637e-05, + "loss": 0.2707, + "step": 7605 + }, + { + "epoch": 0.36, + "learning_rate": 1.896493345664885e-05, + "loss": 0.1204, + "step": 7610 + }, + { + "epoch": 0.36, + "learning_rate": 1.8964149671594065e-05, + "loss": 0.1409, + "step": 7615 + }, + { + "epoch": 0.36, + "learning_rate": 1.896336588653928e-05, + "loss": 0.1973, + "step": 7620 + }, + { + "epoch": 0.36, + "learning_rate": 1.896258210148449e-05, + "loss": 0.1588, + "step": 7625 + }, + { + "epoch": 0.36, + "learning_rate": 1.8961798316429706e-05, + "loss": 0.3012, + "step": 7630 + }, + { + "epoch": 0.36, + "learning_rate": 1.8961014531374917e-05, + "loss": 0.3534, + "step": 7635 + }, + { + "epoch": 0.36, + "learning_rate": 1.896023074632013e-05, + "loss": 0.4233, + "step": 7640 + }, + { + "epoch": 0.36, + "learning_rate": 1.8959446961265345e-05, + "loss": 0.3975, + "step": 7645 + }, + { + "epoch": 0.36, + "learning_rate": 1.8958663176210555e-05, + "loss": 0.4849, + "step": 7650 + }, + { + "epoch": 0.36, + "learning_rate": 1.8957879391155772e-05, + "loss": 0.3376, + "step": 7655 + }, + { + "epoch": 0.36, + "learning_rate": 1.8957095606100983e-05, + "loss": 0.1438, + "step": 7660 + }, + { + "epoch": 0.36, + "learning_rate": 1.8956311821046197e-05, + "loss": 0.1841, + "step": 7665 + }, + { + "epoch": 0.36, + "learning_rate": 1.895552803599141e-05, + "loss": 0.1959, + "step": 7670 + }, + { + "epoch": 0.36, + "learning_rate": 1.8954744250936625e-05, + "loss": 0.2594, + "step": 7675 + }, + { + "epoch": 0.36, + "learning_rate": 1.895396046588184e-05, + "loss": 0.3133, + "step": 7680 + }, + { + "epoch": 0.36, + "learning_rate": 1.8953176680827052e-05, + "loss": 0.3055, + "step": 7685 + }, + { + "epoch": 0.36, + "learning_rate": 1.8952392895772266e-05, + "loss": 0.3877, + "step": 7690 + }, + { + "epoch": 0.36, + "learning_rate": 1.895160911071748e-05, + "loss": 0.3368, + "step": 7695 + }, + { + "epoch": 0.36, + "learning_rate": 1.895082532566269e-05, + "loss": 0.6805, + "step": 7700 + }, + { + "epoch": 0.36, + "learning_rate": 1.8950041540607905e-05, + "loss": 0.3995, + "step": 7705 + }, + { + "epoch": 0.36, + "learning_rate": 1.894925775555312e-05, + "loss": 0.1304, + "step": 7710 + }, + { + "epoch": 0.36, + "learning_rate": 1.8948473970498333e-05, + "loss": 0.1476, + "step": 7715 + }, + { + "epoch": 0.36, + "learning_rate": 1.8947690185443546e-05, + "loss": 0.1871, + "step": 7720 + }, + { + "epoch": 0.36, + "learning_rate": 1.8946906400388757e-05, + "loss": 0.2658, + "step": 7725 + }, + { + "epoch": 0.36, + "learning_rate": 1.8946122615333974e-05, + "loss": 0.2439, + "step": 7730 + }, + { + "epoch": 0.36, + "learning_rate": 1.8945338830279185e-05, + "loss": 0.3401, + "step": 7735 + }, + { + "epoch": 0.36, + "learning_rate": 1.89445550452244e-05, + "loss": 0.2806, + "step": 7740 + }, + { + "epoch": 0.36, + "learning_rate": 1.8943771260169613e-05, + "loss": 0.4399, + "step": 7745 + }, + { + "epoch": 0.36, + "learning_rate": 1.8942987475114826e-05, + "loss": 0.6512, + "step": 7750 + }, + { + "epoch": 0.36, + "learning_rate": 1.894220369006004e-05, + "loss": 0.2376, + "step": 7755 + }, + { + "epoch": 0.36, + "learning_rate": 1.8941419905005254e-05, + "loss": 0.1043, + "step": 7760 + }, + { + "epoch": 0.36, + "learning_rate": 1.8940636119950465e-05, + "loss": 0.1589, + "step": 7765 + }, + { + "epoch": 0.36, + "learning_rate": 1.893985233489568e-05, + "loss": 0.1373, + "step": 7770 + }, + { + "epoch": 0.36, + "learning_rate": 1.8939068549840893e-05, + "loss": 0.2714, + "step": 7775 + }, + { + "epoch": 0.36, + "learning_rate": 1.8938284764786107e-05, + "loss": 0.2307, + "step": 7780 + }, + { + "epoch": 0.36, + "learning_rate": 1.893750097973132e-05, + "loss": 0.2299, + "step": 7785 + }, + { + "epoch": 0.36, + "learning_rate": 1.8936717194676534e-05, + "loss": 0.4098, + "step": 7790 + }, + { + "epoch": 0.36, + "learning_rate": 1.8935933409621748e-05, + "loss": 0.4609, + "step": 7795 + }, + { + "epoch": 0.36, + "learning_rate": 1.893514962456696e-05, + "loss": 0.7143, + "step": 7800 + }, + { + "epoch": 0.36, + "learning_rate": 1.8934365839512173e-05, + "loss": 0.3278, + "step": 7805 + }, + { + "epoch": 0.36, + "learning_rate": 1.8933582054457387e-05, + "loss": 0.1103, + "step": 7810 + }, + { + "epoch": 0.36, + "learning_rate": 1.89327982694026e-05, + "loss": 0.1488, + "step": 7815 + }, + { + "epoch": 0.36, + "learning_rate": 1.8932014484347814e-05, + "loss": 0.2829, + "step": 7820 + }, + { + "epoch": 0.37, + "learning_rate": 1.8931230699293028e-05, + "loss": 0.2354, + "step": 7825 + }, + { + "epoch": 0.37, + "learning_rate": 1.8930446914238242e-05, + "loss": 0.3971, + "step": 7830 + }, + { + "epoch": 0.37, + "learning_rate": 1.8929663129183453e-05, + "loss": 0.4175, + "step": 7835 + }, + { + "epoch": 0.37, + "learning_rate": 1.8928879344128667e-05, + "loss": 0.3929, + "step": 7840 + }, + { + "epoch": 0.37, + "learning_rate": 1.892809555907388e-05, + "loss": 0.4019, + "step": 7845 + }, + { + "epoch": 0.37, + "learning_rate": 1.8927311774019094e-05, + "loss": 0.6624, + "step": 7850 + }, + { + "epoch": 0.37, + "learning_rate": 1.892652798896431e-05, + "loss": 0.3053, + "step": 7855 + }, + { + "epoch": 0.37, + "learning_rate": 1.8925744203909522e-05, + "loss": 0.2106, + "step": 7860 + }, + { + "epoch": 0.37, + "learning_rate": 1.8924960418854733e-05, + "loss": 0.172, + "step": 7865 + }, + { + "epoch": 0.37, + "learning_rate": 1.892417663379995e-05, + "loss": 0.1984, + "step": 7870 + }, + { + "epoch": 0.37, + "learning_rate": 1.892339284874516e-05, + "loss": 0.1985, + "step": 7875 + }, + { + "epoch": 0.37, + "learning_rate": 1.8922609063690374e-05, + "loss": 0.4085, + "step": 7880 + }, + { + "epoch": 0.37, + "learning_rate": 1.892182527863559e-05, + "loss": 0.3442, + "step": 7885 + }, + { + "epoch": 0.37, + "learning_rate": 1.8921041493580802e-05, + "loss": 0.2404, + "step": 7890 + }, + { + "epoch": 0.37, + "learning_rate": 1.8920257708526016e-05, + "loss": 0.4951, + "step": 7895 + }, + { + "epoch": 0.37, + "learning_rate": 1.891947392347123e-05, + "loss": 0.6079, + "step": 7900 + }, + { + "epoch": 0.37, + "learning_rate": 1.8918690138416444e-05, + "loss": 0.3329, + "step": 7905 + }, + { + "epoch": 0.37, + "learning_rate": 1.8917906353361655e-05, + "loss": 0.1138, + "step": 7910 + }, + { + "epoch": 0.37, + "learning_rate": 1.891712256830687e-05, + "loss": 0.1449, + "step": 7915 + }, + { + "epoch": 0.37, + "learning_rate": 1.8916338783252082e-05, + "loss": 0.1854, + "step": 7920 + }, + { + "epoch": 0.37, + "learning_rate": 1.8915554998197296e-05, + "loss": 0.1878, + "step": 7925 + }, + { + "epoch": 0.37, + "learning_rate": 1.891477121314251e-05, + "loss": 0.2632, + "step": 7930 + }, + { + "epoch": 0.37, + "learning_rate": 1.8913987428087724e-05, + "loss": 0.1892, + "step": 7935 + }, + { + "epoch": 0.37, + "learning_rate": 1.8913203643032935e-05, + "loss": 0.3837, + "step": 7940 + }, + { + "epoch": 0.37, + "learning_rate": 1.8912419857978152e-05, + "loss": 0.4508, + "step": 7945 + }, + { + "epoch": 0.37, + "learning_rate": 1.8911636072923362e-05, + "loss": 0.5534, + "step": 7950 + }, + { + "epoch": 0.37, + "learning_rate": 1.8910852287868576e-05, + "loss": 0.315, + "step": 7955 + }, + { + "epoch": 0.37, + "learning_rate": 1.891006850281379e-05, + "loss": 0.1236, + "step": 7960 + }, + { + "epoch": 0.37, + "learning_rate": 1.8909284717759004e-05, + "loss": 0.0979, + "step": 7965 + }, + { + "epoch": 0.37, + "learning_rate": 1.8908500932704218e-05, + "loss": 0.1878, + "step": 7970 + }, + { + "epoch": 0.37, + "learning_rate": 1.890771714764943e-05, + "loss": 0.2305, + "step": 7975 + }, + { + "epoch": 0.37, + "learning_rate": 1.8906933362594642e-05, + "loss": 0.2422, + "step": 7980 + }, + { + "epoch": 0.37, + "learning_rate": 1.8906149577539856e-05, + "loss": 0.2615, + "step": 7985 + }, + { + "epoch": 0.37, + "learning_rate": 1.890536579248507e-05, + "loss": 0.3486, + "step": 7990 + }, + { + "epoch": 0.37, + "learning_rate": 1.8904582007430284e-05, + "loss": 0.461, + "step": 7995 + }, + { + "epoch": 0.37, + "learning_rate": 1.8903798222375498e-05, + "loss": 0.6374, + "step": 8000 + }, + { + "epoch": 0.37, + "learning_rate": 1.8903014437320712e-05, + "loss": 0.3201, + "step": 8005 + }, + { + "epoch": 0.37, + "learning_rate": 1.8902230652265926e-05, + "loss": 0.1109, + "step": 8010 + }, + { + "epoch": 0.37, + "learning_rate": 1.8901446867211136e-05, + "loss": 0.1345, + "step": 8015 + }, + { + "epoch": 0.37, + "learning_rate": 1.890066308215635e-05, + "loss": 0.1565, + "step": 8020 + }, + { + "epoch": 0.37, + "learning_rate": 1.8899879297101564e-05, + "loss": 0.7264, + "step": 8025 + }, + { + "epoch": 0.37, + "learning_rate": 1.8899095512046778e-05, + "loss": 0.231, + "step": 8030 + }, + { + "epoch": 0.37, + "learning_rate": 1.8898311726991992e-05, + "loss": 0.2366, + "step": 8035 + }, + { + "epoch": 0.38, + "learning_rate": 1.8897527941937203e-05, + "loss": 0.3771, + "step": 8040 + }, + { + "epoch": 0.38, + "learning_rate": 1.889674415688242e-05, + "loss": 0.4329, + "step": 8045 + }, + { + "epoch": 0.38, + "learning_rate": 1.889596037182763e-05, + "loss": 0.6533, + "step": 8050 + }, + { + "epoch": 0.38, + "learning_rate": 1.8895176586772844e-05, + "loss": 0.338, + "step": 8055 + }, + { + "epoch": 0.38, + "learning_rate": 1.8894392801718058e-05, + "loss": 0.1299, + "step": 8060 + }, + { + "epoch": 0.38, + "learning_rate": 1.8893609016663272e-05, + "loss": 0.1517, + "step": 8065 + }, + { + "epoch": 0.38, + "learning_rate": 1.8892825231608486e-05, + "loss": 0.2633, + "step": 8070 + }, + { + "epoch": 0.38, + "learning_rate": 1.88920414465537e-05, + "loss": 0.2221, + "step": 8075 + }, + { + "epoch": 0.38, + "learning_rate": 1.889125766149891e-05, + "loss": 0.2924, + "step": 8080 + }, + { + "epoch": 0.38, + "learning_rate": 1.8890473876444128e-05, + "loss": 0.2867, + "step": 8085 + }, + { + "epoch": 0.38, + "learning_rate": 1.8889690091389338e-05, + "loss": 0.3109, + "step": 8090 + }, + { + "epoch": 0.38, + "learning_rate": 1.8888906306334552e-05, + "loss": 0.4688, + "step": 8095 + }, + { + "epoch": 0.38, + "learning_rate": 1.8888122521279766e-05, + "loss": 0.7502, + "step": 8100 + }, + { + "epoch": 0.38, + "learning_rate": 1.888733873622498e-05, + "loss": 0.2301, + "step": 8105 + }, + { + "epoch": 0.38, + "learning_rate": 1.8886554951170194e-05, + "loss": 0.1549, + "step": 8110 + }, + { + "epoch": 0.38, + "learning_rate": 1.8885771166115404e-05, + "loss": 0.1893, + "step": 8115 + }, + { + "epoch": 0.38, + "learning_rate": 1.8884987381060618e-05, + "loss": 0.1992, + "step": 8120 + }, + { + "epoch": 0.38, + "learning_rate": 1.8884203596005832e-05, + "loss": 0.1521, + "step": 8125 + }, + { + "epoch": 0.38, + "learning_rate": 1.8883419810951046e-05, + "loss": 0.2274, + "step": 8130 + }, + { + "epoch": 0.38, + "learning_rate": 1.888263602589626e-05, + "loss": 0.2885, + "step": 8135 + }, + { + "epoch": 0.38, + "learning_rate": 1.8881852240841474e-05, + "loss": 0.3451, + "step": 8140 + }, + { + "epoch": 0.38, + "learning_rate": 1.8881068455786688e-05, + "loss": 0.3565, + "step": 8145 + }, + { + "epoch": 0.38, + "learning_rate": 1.88802846707319e-05, + "loss": 0.5021, + "step": 8150 + }, + { + "epoch": 0.38, + "learning_rate": 1.8879500885677112e-05, + "loss": 0.344, + "step": 8155 + }, + { + "epoch": 0.38, + "learning_rate": 1.887871710062233e-05, + "loss": 0.2039, + "step": 8160 + }, + { + "epoch": 0.38, + "learning_rate": 1.887793331556754e-05, + "loss": 0.2314, + "step": 8165 + }, + { + "epoch": 0.38, + "learning_rate": 1.8877149530512754e-05, + "loss": 0.1719, + "step": 8170 + }, + { + "epoch": 0.38, + "learning_rate": 1.8876365745457968e-05, + "loss": 0.2557, + "step": 8175 + }, + { + "epoch": 0.38, + "learning_rate": 1.887558196040318e-05, + "loss": 0.2644, + "step": 8180 + }, + { + "epoch": 0.38, + "learning_rate": 1.8874798175348396e-05, + "loss": 0.2162, + "step": 8185 + }, + { + "epoch": 0.38, + "learning_rate": 1.8874014390293606e-05, + "loss": 0.3655, + "step": 8190 + }, + { + "epoch": 0.38, + "learning_rate": 1.887323060523882e-05, + "loss": 0.4942, + "step": 8195 + }, + { + "epoch": 0.38, + "learning_rate": 1.8872446820184034e-05, + "loss": 0.6199, + "step": 8200 + }, + { + "epoch": 0.38, + "learning_rate": 1.8871663035129248e-05, + "loss": 0.3494, + "step": 8205 + }, + { + "epoch": 0.38, + "learning_rate": 1.8870879250074462e-05, + "loss": 0.1605, + "step": 8210 + }, + { + "epoch": 0.38, + "learning_rate": 1.8870095465019676e-05, + "loss": 0.1165, + "step": 8215 + }, + { + "epoch": 0.38, + "learning_rate": 1.886931167996489e-05, + "loss": 0.1653, + "step": 8220 + }, + { + "epoch": 0.38, + "learning_rate": 1.8868527894910103e-05, + "loss": 0.1936, + "step": 8225 + }, + { + "epoch": 0.38, + "learning_rate": 1.8867744109855314e-05, + "loss": 0.2854, + "step": 8230 + }, + { + "epoch": 0.38, + "learning_rate": 1.8866960324800528e-05, + "loss": 0.2725, + "step": 8235 + }, + { + "epoch": 0.38, + "learning_rate": 1.8866176539745742e-05, + "loss": 0.4045, + "step": 8240 + }, + { + "epoch": 0.38, + "learning_rate": 1.8865392754690956e-05, + "loss": 0.3646, + "step": 8245 + }, + { + "epoch": 0.38, + "learning_rate": 1.886460896963617e-05, + "loss": 0.6643, + "step": 8250 + }, + { + "epoch": 0.39, + "learning_rate": 1.886382518458138e-05, + "loss": 0.2439, + "step": 8255 + }, + { + "epoch": 0.39, + "learning_rate": 1.8863041399526597e-05, + "loss": 0.1384, + "step": 8260 + }, + { + "epoch": 0.39, + "learning_rate": 1.8862257614471808e-05, + "loss": 0.1399, + "step": 8265 + }, + { + "epoch": 0.39, + "learning_rate": 1.8861473829417022e-05, + "loss": 0.17, + "step": 8270 + }, + { + "epoch": 0.39, + "learning_rate": 1.8860690044362236e-05, + "loss": 0.2109, + "step": 8275 + }, + { + "epoch": 0.39, + "learning_rate": 1.885990625930745e-05, + "loss": 0.2079, + "step": 8280 + }, + { + "epoch": 0.39, + "learning_rate": 1.8859122474252664e-05, + "loss": 0.2478, + "step": 8285 + }, + { + "epoch": 0.39, + "learning_rate": 1.8858338689197877e-05, + "loss": 0.2618, + "step": 8290 + }, + { + "epoch": 0.39, + "learning_rate": 1.8857554904143088e-05, + "loss": 0.3146, + "step": 8295 + }, + { + "epoch": 0.39, + "learning_rate": 1.8856771119088302e-05, + "loss": 0.5877, + "step": 8300 + }, + { + "epoch": 0.39, + "learning_rate": 1.8855987334033516e-05, + "loss": 0.2784, + "step": 8305 + }, + { + "epoch": 0.39, + "learning_rate": 1.885520354897873e-05, + "loss": 0.1436, + "step": 8310 + }, + { + "epoch": 0.39, + "learning_rate": 1.8854419763923944e-05, + "loss": 0.1598, + "step": 8315 + }, + { + "epoch": 0.39, + "learning_rate": 1.8853635978869158e-05, + "loss": 0.1545, + "step": 8320 + }, + { + "epoch": 0.39, + "learning_rate": 1.885285219381437e-05, + "loss": 0.2853, + "step": 8325 + }, + { + "epoch": 0.39, + "learning_rate": 1.8852068408759582e-05, + "loss": 0.2467, + "step": 8330 + }, + { + "epoch": 0.39, + "learning_rate": 1.8851284623704796e-05, + "loss": 0.271, + "step": 8335 + }, + { + "epoch": 0.39, + "learning_rate": 1.885050083865001e-05, + "loss": 0.3561, + "step": 8340 + }, + { + "epoch": 0.39, + "learning_rate": 1.8849717053595224e-05, + "loss": 0.3717, + "step": 8345 + }, + { + "epoch": 0.39, + "learning_rate": 1.8848933268540438e-05, + "loss": 0.6701, + "step": 8350 + }, + { + "epoch": 0.39, + "learning_rate": 1.884814948348565e-05, + "loss": 0.3146, + "step": 8355 + }, + { + "epoch": 0.39, + "learning_rate": 1.8847365698430865e-05, + "loss": 0.1088, + "step": 8360 + }, + { + "epoch": 0.39, + "learning_rate": 1.8846581913376076e-05, + "loss": 0.1436, + "step": 8365 + }, + { + "epoch": 0.39, + "learning_rate": 1.884579812832129e-05, + "loss": 0.1958, + "step": 8370 + }, + { + "epoch": 0.39, + "learning_rate": 1.8845014343266504e-05, + "loss": 0.2178, + "step": 8375 + }, + { + "epoch": 0.39, + "learning_rate": 1.8844230558211718e-05, + "loss": 0.1422, + "step": 8380 + }, + { + "epoch": 0.39, + "learning_rate": 1.884344677315693e-05, + "loss": 0.2301, + "step": 8385 + }, + { + "epoch": 0.39, + "learning_rate": 1.8842662988102145e-05, + "loss": 0.4035, + "step": 8390 + }, + { + "epoch": 0.39, + "learning_rate": 1.8841879203047356e-05, + "loss": 0.4027, + "step": 8395 + }, + { + "epoch": 0.39, + "learning_rate": 1.8841095417992573e-05, + "loss": 0.6834, + "step": 8400 + }, + { + "epoch": 0.39, + "learning_rate": 1.8840311632937784e-05, + "loss": 0.2374, + "step": 8405 + }, + { + "epoch": 0.39, + "learning_rate": 1.8839527847882998e-05, + "loss": 0.172, + "step": 8410 + }, + { + "epoch": 0.39, + "learning_rate": 1.883874406282821e-05, + "loss": 0.1755, + "step": 8415 + }, + { + "epoch": 0.39, + "learning_rate": 1.8837960277773425e-05, + "loss": 0.2204, + "step": 8420 + }, + { + "epoch": 0.39, + "learning_rate": 1.883717649271864e-05, + "loss": 0.2173, + "step": 8425 + }, + { + "epoch": 0.39, + "learning_rate": 1.883639270766385e-05, + "loss": 0.3071, + "step": 8430 + }, + { + "epoch": 0.39, + "learning_rate": 1.8835608922609064e-05, + "loss": 0.2889, + "step": 8435 + }, + { + "epoch": 0.39, + "learning_rate": 1.8834825137554278e-05, + "loss": 0.3744, + "step": 8440 + }, + { + "epoch": 0.39, + "learning_rate": 1.8834198109510448e-05, + "loss": 0.6518, + "step": 8445 + }, + { + "epoch": 0.39, + "learning_rate": 1.8833414324455662e-05, + "loss": 0.7128, + "step": 8450 + }, + { + "epoch": 0.39, + "learning_rate": 1.8832630539400876e-05, + "loss": 0.3003, + "step": 8455 + }, + { + "epoch": 0.39, + "learning_rate": 1.883184675434609e-05, + "loss": 0.0996, + "step": 8460 + }, + { + "epoch": 0.39, + "learning_rate": 1.8831062969291304e-05, + "loss": 0.1282, + "step": 8465 + }, + { + "epoch": 0.4, + "learning_rate": 1.8830279184236518e-05, + "loss": 0.1423, + "step": 8470 + }, + { + "epoch": 0.4, + "learning_rate": 1.8829495399181728e-05, + "loss": 0.2481, + "step": 8475 + }, + { + "epoch": 0.4, + "learning_rate": 1.8828711614126945e-05, + "loss": 0.1612, + "step": 8480 + }, + { + "epoch": 0.4, + "learning_rate": 1.8827927829072156e-05, + "loss": 0.3512, + "step": 8485 + }, + { + "epoch": 0.4, + "learning_rate": 1.882714404401737e-05, + "loss": 0.2906, + "step": 8490 + }, + { + "epoch": 0.4, + "learning_rate": 1.8826360258962584e-05, + "loss": 0.2834, + "step": 8495 + }, + { + "epoch": 0.4, + "learning_rate": 1.8825576473907798e-05, + "loss": 0.6138, + "step": 8500 + }, + { + "epoch": 0.4, + "learning_rate": 1.882479268885301e-05, + "loss": 0.3247, + "step": 8505 + }, + { + "epoch": 0.4, + "learning_rate": 1.8824008903798222e-05, + "loss": 0.0987, + "step": 8510 + }, + { + "epoch": 0.4, + "learning_rate": 1.882322511874344e-05, + "loss": 0.1981, + "step": 8515 + }, + { + "epoch": 0.4, + "learning_rate": 1.882244133368865e-05, + "loss": 0.2302, + "step": 8520 + }, + { + "epoch": 0.4, + "learning_rate": 1.8821657548633864e-05, + "loss": 0.286, + "step": 8525 + }, + { + "epoch": 0.4, + "learning_rate": 1.8820873763579078e-05, + "loss": 0.2, + "step": 8530 + }, + { + "epoch": 0.4, + "learning_rate": 1.882008997852429e-05, + "loss": 0.3144, + "step": 8535 + }, + { + "epoch": 0.4, + "learning_rate": 1.8819306193469506e-05, + "loss": 0.2449, + "step": 8540 + }, + { + "epoch": 0.4, + "learning_rate": 1.881852240841472e-05, + "loss": 0.324, + "step": 8545 + }, + { + "epoch": 0.4, + "learning_rate": 1.881773862335993e-05, + "loss": 0.5676, + "step": 8550 + }, + { + "epoch": 0.4, + "learning_rate": 1.8816954838305144e-05, + "loss": 0.2509, + "step": 8555 + }, + { + "epoch": 0.4, + "learning_rate": 1.8816171053250358e-05, + "loss": 0.1236, + "step": 8560 + }, + { + "epoch": 0.4, + "learning_rate": 1.8815387268195572e-05, + "loss": 0.1508, + "step": 8565 + }, + { + "epoch": 0.4, + "learning_rate": 1.8814603483140786e-05, + "loss": 0.1854, + "step": 8570 + }, + { + "epoch": 0.4, + "learning_rate": 1.8813819698085996e-05, + "loss": 0.2049, + "step": 8575 + }, + { + "epoch": 0.4, + "learning_rate": 1.8813035913031213e-05, + "loss": 0.253, + "step": 8580 + }, + { + "epoch": 0.4, + "learning_rate": 1.8812252127976424e-05, + "loss": 0.2392, + "step": 8585 + }, + { + "epoch": 0.4, + "learning_rate": 1.8811468342921638e-05, + "loss": 0.3392, + "step": 8590 + }, + { + "epoch": 0.4, + "learning_rate": 1.8810684557866852e-05, + "loss": 0.3588, + "step": 8595 + }, + { + "epoch": 0.4, + "learning_rate": 1.8809900772812066e-05, + "loss": 0.5704, + "step": 8600 + }, + { + "epoch": 0.4, + "learning_rate": 1.880911698775728e-05, + "loss": 0.2261, + "step": 8605 + }, + { + "epoch": 0.4, + "learning_rate": 1.8808333202702493e-05, + "loss": 0.1138, + "step": 8610 + }, + { + "epoch": 0.4, + "learning_rate": 1.8807549417647707e-05, + "loss": 0.1819, + "step": 8615 + }, + { + "epoch": 0.4, + "learning_rate": 1.8806765632592918e-05, + "loss": 0.2097, + "step": 8620 + }, + { + "epoch": 0.4, + "learning_rate": 1.8805981847538132e-05, + "loss": 0.33, + "step": 8625 + }, + { + "epoch": 0.4, + "learning_rate": 1.8805198062483346e-05, + "loss": 0.1766, + "step": 8630 + }, + { + "epoch": 0.4, + "learning_rate": 1.880441427742856e-05, + "loss": 0.2288, + "step": 8635 + }, + { + "epoch": 0.4, + "learning_rate": 1.8803630492373774e-05, + "loss": 0.3268, + "step": 8640 + }, + { + "epoch": 0.4, + "learning_rate": 1.8802846707318987e-05, + "loss": 0.4618, + "step": 8645 + }, + { + "epoch": 0.4, + "learning_rate": 1.8802062922264198e-05, + "loss": 0.487, + "step": 8650 + }, + { + "epoch": 0.4, + "learning_rate": 1.8801279137209415e-05, + "loss": 0.2719, + "step": 8655 + }, + { + "epoch": 0.4, + "learning_rate": 1.8800495352154626e-05, + "loss": 0.0903, + "step": 8660 + }, + { + "epoch": 0.4, + "learning_rate": 1.879971156709984e-05, + "loss": 0.1441, + "step": 8665 + }, + { + "epoch": 0.4, + "learning_rate": 1.8798927782045054e-05, + "loss": 0.1544, + "step": 8670 + }, + { + "epoch": 0.4, + "learning_rate": 1.8798143996990267e-05, + "loss": 0.2806, + "step": 8675 + }, + { + "epoch": 0.41, + "learning_rate": 1.879736021193548e-05, + "loss": 0.3168, + "step": 8680 + }, + { + "epoch": 0.41, + "learning_rate": 1.8796576426880692e-05, + "loss": 0.2666, + "step": 8685 + }, + { + "epoch": 0.41, + "learning_rate": 1.8795792641825906e-05, + "loss": 0.3645, + "step": 8690 + }, + { + "epoch": 0.41, + "learning_rate": 1.879500885677112e-05, + "loss": 0.5892, + "step": 8695 + }, + { + "epoch": 0.41, + "learning_rate": 1.8794225071716334e-05, + "loss": 0.6008, + "step": 8700 + }, + { + "epoch": 0.41, + "learning_rate": 1.8793441286661548e-05, + "loss": 0.3091, + "step": 8705 + }, + { + "epoch": 0.41, + "learning_rate": 1.879265750160676e-05, + "loss": 0.1157, + "step": 8710 + }, + { + "epoch": 0.41, + "learning_rate": 1.8791873716551975e-05, + "loss": 0.1896, + "step": 8715 + }, + { + "epoch": 0.41, + "learning_rate": 1.879108993149719e-05, + "loss": 0.1758, + "step": 8720 + }, + { + "epoch": 0.41, + "learning_rate": 1.87903061464424e-05, + "loss": 0.2199, + "step": 8725 + }, + { + "epoch": 0.41, + "learning_rate": 1.8789522361387617e-05, + "loss": 0.1525, + "step": 8730 + }, + { + "epoch": 0.41, + "learning_rate": 1.8788738576332828e-05, + "loss": 0.3416, + "step": 8735 + }, + { + "epoch": 0.41, + "learning_rate": 1.878795479127804e-05, + "loss": 0.3551, + "step": 8740 + }, + { + "epoch": 0.41, + "learning_rate": 1.8787171006223255e-05, + "loss": 0.3541, + "step": 8745 + }, + { + "epoch": 0.41, + "learning_rate": 1.8786387221168466e-05, + "loss": 0.6545, + "step": 8750 + }, + { + "epoch": 0.41, + "learning_rate": 1.8785603436113683e-05, + "loss": 0.2391, + "step": 8755 + }, + { + "epoch": 0.41, + "learning_rate": 1.8784819651058894e-05, + "loss": 0.1449, + "step": 8760 + }, + { + "epoch": 0.41, + "learning_rate": 1.8784035866004108e-05, + "loss": 0.1726, + "step": 8765 + }, + { + "epoch": 0.41, + "learning_rate": 1.878325208094932e-05, + "loss": 0.1607, + "step": 8770 + }, + { + "epoch": 0.41, + "learning_rate": 1.8782468295894535e-05, + "loss": 0.2541, + "step": 8775 + }, + { + "epoch": 0.41, + "learning_rate": 1.878168451083975e-05, + "loss": 0.1685, + "step": 8780 + }, + { + "epoch": 0.41, + "learning_rate": 1.8780900725784963e-05, + "loss": 0.3067, + "step": 8785 + }, + { + "epoch": 0.41, + "learning_rate": 1.8780116940730174e-05, + "loss": 0.3734, + "step": 8790 + }, + { + "epoch": 0.41, + "learning_rate": 1.877933315567539e-05, + "loss": 0.311, + "step": 8795 + }, + { + "epoch": 0.41, + "learning_rate": 1.87785493706206e-05, + "loss": 0.464, + "step": 8800 + }, + { + "epoch": 0.41, + "learning_rate": 1.8777765585565815e-05, + "loss": 0.2861, + "step": 8805 + }, + { + "epoch": 0.41, + "learning_rate": 1.877698180051103e-05, + "loss": 0.0895, + "step": 8810 + }, + { + "epoch": 0.41, + "learning_rate": 1.8776198015456243e-05, + "loss": 0.1917, + "step": 8815 + }, + { + "epoch": 0.41, + "learning_rate": 1.8775414230401457e-05, + "loss": 0.2786, + "step": 8820 + }, + { + "epoch": 0.41, + "learning_rate": 1.8774630445346668e-05, + "loss": 0.1561, + "step": 8825 + }, + { + "epoch": 0.41, + "learning_rate": 1.8773846660291885e-05, + "loss": 0.2006, + "step": 8830 + }, + { + "epoch": 0.41, + "learning_rate": 1.8773062875237096e-05, + "loss": 0.2568, + "step": 8835 + }, + { + "epoch": 0.41, + "learning_rate": 1.877227909018231e-05, + "loss": 0.3812, + "step": 8840 + }, + { + "epoch": 0.41, + "learning_rate": 1.8771495305127523e-05, + "loss": 0.3857, + "step": 8845 + }, + { + "epoch": 0.41, + "learning_rate": 1.8770711520072737e-05, + "loss": 0.6512, + "step": 8850 + }, + { + "epoch": 0.41, + "learning_rate": 1.876992773501795e-05, + "loss": 0.2031, + "step": 8855 + }, + { + "epoch": 0.41, + "learning_rate": 1.8769143949963165e-05, + "loss": 0.0943, + "step": 8860 + }, + { + "epoch": 0.41, + "learning_rate": 1.8768360164908376e-05, + "loss": 0.133, + "step": 8865 + }, + { + "epoch": 0.41, + "learning_rate": 1.8767576379853593e-05, + "loss": 0.181, + "step": 8870 + }, + { + "epoch": 0.41, + "learning_rate": 1.8766792594798803e-05, + "loss": 0.3046, + "step": 8875 + }, + { + "epoch": 0.41, + "learning_rate": 1.8766008809744017e-05, + "loss": 0.3182, + "step": 8880 + }, + { + "epoch": 0.41, + "learning_rate": 1.876522502468923e-05, + "loss": 0.2638, + "step": 8885 + }, + { + "epoch": 0.41, + "learning_rate": 1.8764441239634442e-05, + "loss": 0.3133, + "step": 8890 + }, + { + "epoch": 0.42, + "learning_rate": 1.876365745457966e-05, + "loss": 0.3542, + "step": 8895 + }, + { + "epoch": 0.42, + "learning_rate": 1.876287366952487e-05, + "loss": 0.7553, + "step": 8900 + }, + { + "epoch": 0.42, + "learning_rate": 1.8762089884470083e-05, + "loss": 0.2799, + "step": 8905 + }, + { + "epoch": 0.42, + "learning_rate": 1.8761306099415297e-05, + "loss": 0.1592, + "step": 8910 + }, + { + "epoch": 0.42, + "learning_rate": 1.876052231436051e-05, + "loss": 0.1252, + "step": 8915 + }, + { + "epoch": 0.42, + "learning_rate": 1.8759738529305725e-05, + "loss": 0.1886, + "step": 8920 + }, + { + "epoch": 0.42, + "learning_rate": 1.875895474425094e-05, + "loss": 0.2613, + "step": 8925 + }, + { + "epoch": 0.42, + "learning_rate": 1.8758170959196153e-05, + "loss": 0.3106, + "step": 8930 + }, + { + "epoch": 0.42, + "learning_rate": 1.8757387174141367e-05, + "loss": 0.2214, + "step": 8935 + }, + { + "epoch": 0.42, + "learning_rate": 1.8756603389086577e-05, + "loss": 0.3003, + "step": 8940 + }, + { + "epoch": 0.42, + "learning_rate": 1.875581960403179e-05, + "loss": 0.3717, + "step": 8945 + }, + { + "epoch": 0.42, + "learning_rate": 1.8755035818977005e-05, + "loss": 0.5338, + "step": 8950 + }, + { + "epoch": 0.42, + "learning_rate": 1.875425203392222e-05, + "loss": 0.2474, + "step": 8955 + }, + { + "epoch": 0.42, + "learning_rate": 1.8753468248867433e-05, + "loss": 0.1502, + "step": 8960 + }, + { + "epoch": 0.42, + "learning_rate": 1.8752684463812644e-05, + "loss": 0.1918, + "step": 8965 + }, + { + "epoch": 0.42, + "learning_rate": 1.875190067875786e-05, + "loss": 0.1617, + "step": 8970 + }, + { + "epoch": 0.42, + "learning_rate": 1.875111689370307e-05, + "loss": 0.2153, + "step": 8975 + }, + { + "epoch": 0.42, + "learning_rate": 1.8750333108648285e-05, + "loss": 0.2767, + "step": 8980 + }, + { + "epoch": 0.42, + "learning_rate": 1.87495493235935e-05, + "loss": 0.2624, + "step": 8985 + }, + { + "epoch": 0.42, + "learning_rate": 1.8748765538538713e-05, + "loss": 0.3624, + "step": 8990 + }, + { + "epoch": 0.42, + "learning_rate": 1.8747981753483927e-05, + "loss": 0.4102, + "step": 8995 + }, + { + "epoch": 0.42, + "learning_rate": 1.874719796842914e-05, + "loss": 0.6647, + "step": 9000 + }, + { + "epoch": 0.42, + "learning_rate": 1.874641418337435e-05, + "loss": 0.16, + "step": 9005 + }, + { + "epoch": 0.42, + "learning_rate": 1.8745630398319565e-05, + "loss": 0.1188, + "step": 9010 + }, + { + "epoch": 0.42, + "learning_rate": 1.874484661326478e-05, + "loss": 0.1938, + "step": 9015 + }, + { + "epoch": 0.42, + "learning_rate": 1.8744062828209993e-05, + "loss": 0.2017, + "step": 9020 + }, + { + "epoch": 0.42, + "learning_rate": 1.8743279043155207e-05, + "loss": 0.2465, + "step": 9025 + }, + { + "epoch": 0.42, + "learning_rate": 1.874249525810042e-05, + "loss": 0.2671, + "step": 9030 + }, + { + "epoch": 0.42, + "learning_rate": 1.8741711473045635e-05, + "loss": 0.1911, + "step": 9035 + }, + { + "epoch": 0.42, + "learning_rate": 1.8740927687990845e-05, + "loss": 0.3654, + "step": 9040 + }, + { + "epoch": 0.42, + "learning_rate": 1.8740143902936063e-05, + "loss": 0.3904, + "step": 9045 + }, + { + "epoch": 0.42, + "learning_rate": 1.8739360117881273e-05, + "loss": 0.5258, + "step": 9050 + }, + { + "epoch": 0.42, + "learning_rate": 1.8738576332826487e-05, + "loss": 0.2227, + "step": 9055 + }, + { + "epoch": 0.42, + "learning_rate": 1.87377925477717e-05, + "loss": 0.0826, + "step": 9060 + }, + { + "epoch": 0.42, + "learning_rate": 1.8737008762716915e-05, + "loss": 0.1681, + "step": 9065 + }, + { + "epoch": 0.42, + "learning_rate": 1.873622497766213e-05, + "loss": 0.2063, + "step": 9070 + }, + { + "epoch": 0.42, + "learning_rate": 1.873544119260734e-05, + "loss": 0.2334, + "step": 9075 + }, + { + "epoch": 0.42, + "learning_rate": 1.8734657407552553e-05, + "loss": 0.2478, + "step": 9080 + }, + { + "epoch": 0.42, + "learning_rate": 1.8733873622497767e-05, + "loss": 0.3684, + "step": 9085 + }, + { + "epoch": 0.42, + "learning_rate": 1.873308983744298e-05, + "loss": 0.292, + "step": 9090 + }, + { + "epoch": 0.42, + "learning_rate": 1.8732306052388195e-05, + "loss": 0.2457, + "step": 9095 + }, + { + "epoch": 0.42, + "learning_rate": 1.873152226733341e-05, + "loss": 0.5138, + "step": 9100 + }, + { + "epoch": 0.42, + "learning_rate": 1.873073848227862e-05, + "loss": 0.3402, + "step": 9105 + }, + { + "epoch": 0.43, + "learning_rate": 1.8729954697223837e-05, + "loss": 0.1397, + "step": 9110 + }, + { + "epoch": 0.43, + "learning_rate": 1.8729170912169047e-05, + "loss": 0.1392, + "step": 9115 + }, + { + "epoch": 0.43, + "learning_rate": 1.872838712711426e-05, + "loss": 0.109, + "step": 9120 + }, + { + "epoch": 0.43, + "learning_rate": 1.8727603342059475e-05, + "loss": 0.2441, + "step": 9125 + }, + { + "epoch": 0.43, + "learning_rate": 1.872681955700469e-05, + "loss": 0.2452, + "step": 9130 + }, + { + "epoch": 0.43, + "learning_rate": 1.8726035771949903e-05, + "loss": 0.2466, + "step": 9135 + }, + { + "epoch": 0.43, + "learning_rate": 1.8725251986895113e-05, + "loss": 0.3464, + "step": 9140 + }, + { + "epoch": 0.43, + "learning_rate": 1.872446820184033e-05, + "loss": 0.4602, + "step": 9145 + }, + { + "epoch": 0.43, + "learning_rate": 1.872368441678554e-05, + "loss": 0.6565, + "step": 9150 + }, + { + "epoch": 0.43, + "learning_rate": 1.8722900631730755e-05, + "loss": 0.3206, + "step": 9155 + }, + { + "epoch": 0.43, + "learning_rate": 1.872211684667597e-05, + "loss": 0.0716, + "step": 9160 + }, + { + "epoch": 0.43, + "learning_rate": 1.8721333061621183e-05, + "loss": 0.1231, + "step": 9165 + }, + { + "epoch": 0.43, + "learning_rate": 1.8720549276566397e-05, + "loss": 0.2065, + "step": 9170 + }, + { + "epoch": 0.43, + "learning_rate": 1.871976549151161e-05, + "loss": 0.1649, + "step": 9175 + }, + { + "epoch": 0.43, + "learning_rate": 1.871898170645682e-05, + "loss": 0.1393, + "step": 9180 + }, + { + "epoch": 0.43, + "learning_rate": 1.871819792140204e-05, + "loss": 0.2792, + "step": 9185 + }, + { + "epoch": 0.43, + "learning_rate": 1.871741413634725e-05, + "loss": 0.3291, + "step": 9190 + }, + { + "epoch": 0.43, + "learning_rate": 1.8716630351292463e-05, + "loss": 0.294, + "step": 9195 + }, + { + "epoch": 0.43, + "learning_rate": 1.8715846566237677e-05, + "loss": 0.6407, + "step": 9200 + }, + { + "epoch": 0.43, + "learning_rate": 1.8715062781182887e-05, + "loss": 0.2999, + "step": 9205 + }, + { + "epoch": 0.43, + "learning_rate": 1.8714278996128105e-05, + "loss": 0.142, + "step": 9210 + }, + { + "epoch": 0.43, + "learning_rate": 1.8713495211073315e-05, + "loss": 0.2173, + "step": 9215 + }, + { + "epoch": 0.43, + "learning_rate": 1.871271142601853e-05, + "loss": 0.2285, + "step": 9220 + }, + { + "epoch": 0.43, + "learning_rate": 1.8711927640963743e-05, + "loss": 0.1986, + "step": 9225 + }, + { + "epoch": 0.43, + "learning_rate": 1.8711143855908957e-05, + "loss": 0.2207, + "step": 9230 + }, + { + "epoch": 0.43, + "learning_rate": 1.871036007085417e-05, + "loss": 0.2179, + "step": 9235 + }, + { + "epoch": 0.43, + "learning_rate": 1.8709576285799385e-05, + "loss": 0.2123, + "step": 9240 + }, + { + "epoch": 0.43, + "learning_rate": 1.87087925007446e-05, + "loss": 0.2957, + "step": 9245 + }, + { + "epoch": 0.43, + "learning_rate": 1.8708008715689812e-05, + "loss": 0.6045, + "step": 9250 + }, + { + "epoch": 0.43, + "learning_rate": 1.8707224930635023e-05, + "loss": 0.2722, + "step": 9255 + }, + { + "epoch": 0.43, + "learning_rate": 1.870644114558024e-05, + "loss": 0.2209, + "step": 9260 + }, + { + "epoch": 0.43, + "learning_rate": 1.870565736052545e-05, + "loss": 0.1307, + "step": 9265 + }, + { + "epoch": 0.43, + "learning_rate": 1.8704873575470665e-05, + "loss": 0.152, + "step": 9270 + }, + { + "epoch": 0.43, + "learning_rate": 1.870408979041588e-05, + "loss": 0.1983, + "step": 9275 + }, + { + "epoch": 0.43, + "learning_rate": 1.870330600536109e-05, + "loss": 0.2012, + "step": 9280 + }, + { + "epoch": 0.43, + "learning_rate": 1.8702522220306306e-05, + "loss": 0.6512, + "step": 9285 + }, + { + "epoch": 0.43, + "learning_rate": 1.8701738435251517e-05, + "loss": 0.3317, + "step": 9290 + }, + { + "epoch": 0.43, + "learning_rate": 1.870095465019673e-05, + "loss": 0.4515, + "step": 9295 + }, + { + "epoch": 0.43, + "learning_rate": 1.8700170865141945e-05, + "loss": 0.7104, + "step": 9300 + }, + { + "epoch": 0.43, + "learning_rate": 1.869938708008716e-05, + "loss": 0.2689, + "step": 9305 + }, + { + "epoch": 0.43, + "learning_rate": 1.8698603295032373e-05, + "loss": 0.1028, + "step": 9310 + }, + { + "epoch": 0.43, + "learning_rate": 1.8697819509977586e-05, + "loss": 0.1626, + "step": 9315 + }, + { + "epoch": 0.43, + "learning_rate": 1.8697035724922797e-05, + "loss": 0.2462, + "step": 9320 + }, + { + "epoch": 0.44, + "learning_rate": 1.8696251939868014e-05, + "loss": 0.1763, + "step": 9325 + }, + { + "epoch": 0.44, + "learning_rate": 1.8695468154813225e-05, + "loss": 0.3345, + "step": 9330 + }, + { + "epoch": 0.44, + "learning_rate": 1.869468436975844e-05, + "loss": 0.2613, + "step": 9335 + }, + { + "epoch": 0.44, + "learning_rate": 1.8693900584703653e-05, + "loss": 0.3072, + "step": 9340 + }, + { + "epoch": 0.44, + "learning_rate": 1.8693116799648866e-05, + "loss": 0.3295, + "step": 9345 + }, + { + "epoch": 0.44, + "learning_rate": 1.869233301459408e-05, + "loss": 0.6022, + "step": 9350 + }, + { + "epoch": 0.44, + "learning_rate": 1.869154922953929e-05, + "loss": 0.3035, + "step": 9355 + }, + { + "epoch": 0.44, + "learning_rate": 1.8690765444484508e-05, + "loss": 0.2063, + "step": 9360 + }, + { + "epoch": 0.44, + "learning_rate": 1.868998165942972e-05, + "loss": 0.2401, + "step": 9365 + }, + { + "epoch": 0.44, + "learning_rate": 1.8689197874374933e-05, + "loss": 0.1846, + "step": 9370 + }, + { + "epoch": 0.44, + "learning_rate": 1.8688414089320147e-05, + "loss": 0.1649, + "step": 9375 + }, + { + "epoch": 0.44, + "learning_rate": 1.868763030426536e-05, + "loss": 0.1734, + "step": 9380 + }, + { + "epoch": 0.44, + "learning_rate": 1.8686846519210574e-05, + "loss": 0.2245, + "step": 9385 + }, + { + "epoch": 0.44, + "learning_rate": 1.8686062734155788e-05, + "loss": 0.2765, + "step": 9390 + }, + { + "epoch": 0.44, + "learning_rate": 1.8685278949101e-05, + "loss": 0.3261, + "step": 9395 + }, + { + "epoch": 0.44, + "learning_rate": 1.8684495164046213e-05, + "loss": 0.5533, + "step": 9400 + }, + { + "epoch": 0.44, + "learning_rate": 1.8683711378991427e-05, + "loss": 0.2859, + "step": 9405 + }, + { + "epoch": 0.44, + "learning_rate": 1.868292759393664e-05, + "loss": 0.0899, + "step": 9410 + }, + { + "epoch": 0.44, + "learning_rate": 1.8682143808881854e-05, + "loss": 0.1552, + "step": 9415 + }, + { + "epoch": 0.44, + "learning_rate": 1.8681360023827065e-05, + "loss": 0.1668, + "step": 9420 + }, + { + "epoch": 0.44, + "learning_rate": 1.8680576238772282e-05, + "loss": 0.2532, + "step": 9425 + }, + { + "epoch": 0.44, + "learning_rate": 1.8679792453717493e-05, + "loss": 0.254, + "step": 9430 + }, + { + "epoch": 0.44, + "learning_rate": 1.8679008668662707e-05, + "loss": 0.2807, + "step": 9435 + }, + { + "epoch": 0.44, + "learning_rate": 1.867822488360792e-05, + "loss": 0.506, + "step": 9440 + }, + { + "epoch": 0.44, + "learning_rate": 1.8677441098553134e-05, + "loss": 0.3954, + "step": 9445 + }, + { + "epoch": 0.44, + "learning_rate": 1.867665731349835e-05, + "loss": 0.5384, + "step": 9450 + }, + { + "epoch": 0.44, + "learning_rate": 1.8675873528443562e-05, + "loss": 0.1873, + "step": 9455 + }, + { + "epoch": 0.44, + "learning_rate": 1.8675089743388776e-05, + "loss": 0.1488, + "step": 9460 + }, + { + "epoch": 0.44, + "learning_rate": 1.8674305958333987e-05, + "loss": 0.1511, + "step": 9465 + }, + { + "epoch": 0.44, + "learning_rate": 1.86735221732792e-05, + "loss": 0.17, + "step": 9470 + }, + { + "epoch": 0.44, + "learning_rate": 1.8672738388224414e-05, + "loss": 0.1869, + "step": 9475 + }, + { + "epoch": 0.44, + "learning_rate": 1.867195460316963e-05, + "loss": 0.2043, + "step": 9480 + }, + { + "epoch": 0.44, + "learning_rate": 1.8671170818114842e-05, + "loss": 0.3333, + "step": 9485 + }, + { + "epoch": 0.44, + "learning_rate": 1.8670387033060056e-05, + "loss": 0.3531, + "step": 9490 + }, + { + "epoch": 0.44, + "learning_rate": 1.8669603248005267e-05, + "loss": 0.5792, + "step": 9495 + }, + { + "epoch": 0.44, + "learning_rate": 1.8668819462950484e-05, + "loss": 0.6072, + "step": 9500 + }, + { + "epoch": 0.44, + "learning_rate": 1.8668035677895695e-05, + "loss": 0.3354, + "step": 9505 + }, + { + "epoch": 0.44, + "learning_rate": 1.866725189284091e-05, + "loss": 0.081, + "step": 9510 + }, + { + "epoch": 0.44, + "learning_rate": 1.8666468107786122e-05, + "loss": 0.1598, + "step": 9515 + }, + { + "epoch": 0.44, + "learning_rate": 1.8665684322731336e-05, + "loss": 0.1325, + "step": 9520 + }, + { + "epoch": 0.44, + "learning_rate": 1.866490053767655e-05, + "loss": 0.243, + "step": 9525 + }, + { + "epoch": 0.44, + "learning_rate": 1.866411675262176e-05, + "loss": 0.256, + "step": 9530 + }, + { + "epoch": 0.44, + "learning_rate": 1.8663332967566975e-05, + "loss": 0.2464, + "step": 9535 + }, + { + "epoch": 0.45, + "learning_rate": 1.866254918251219e-05, + "loss": 0.3764, + "step": 9540 + }, + { + "epoch": 0.45, + "learning_rate": 1.8661765397457402e-05, + "loss": 0.3959, + "step": 9545 + }, + { + "epoch": 0.45, + "learning_rate": 1.8660981612402616e-05, + "loss": 0.8095, + "step": 9550 + }, + { + "epoch": 0.45, + "learning_rate": 1.866019782734783e-05, + "loss": 0.2742, + "step": 9555 + }, + { + "epoch": 0.45, + "learning_rate": 1.8659414042293044e-05, + "loss": 0.1321, + "step": 9560 + }, + { + "epoch": 0.45, + "learning_rate": 1.8658630257238258e-05, + "loss": 0.1742, + "step": 9565 + }, + { + "epoch": 0.45, + "learning_rate": 1.865784647218347e-05, + "loss": 0.1818, + "step": 9570 + }, + { + "epoch": 0.45, + "learning_rate": 1.8657062687128686e-05, + "loss": 0.1883, + "step": 9575 + }, + { + "epoch": 0.45, + "learning_rate": 1.8656278902073896e-05, + "loss": 0.2709, + "step": 9580 + }, + { + "epoch": 0.45, + "learning_rate": 1.865549511701911e-05, + "loss": 0.2502, + "step": 9585 + }, + { + "epoch": 0.45, + "learning_rate": 1.8654711331964324e-05, + "loss": 0.3082, + "step": 9590 + }, + { + "epoch": 0.45, + "learning_rate": 1.8653927546909535e-05, + "loss": 0.3196, + "step": 9595 + }, + { + "epoch": 0.45, + "learning_rate": 1.8653143761854752e-05, + "loss": 0.8118, + "step": 9600 + }, + { + "epoch": 0.45, + "learning_rate": 1.8652359976799962e-05, + "loss": 0.2884, + "step": 9605 + }, + { + "epoch": 0.45, + "learning_rate": 1.8651576191745176e-05, + "loss": 0.1232, + "step": 9610 + }, + { + "epoch": 0.45, + "learning_rate": 1.865079240669039e-05, + "loss": 0.1453, + "step": 9615 + }, + { + "epoch": 0.45, + "learning_rate": 1.8650008621635604e-05, + "loss": 0.1877, + "step": 9620 + }, + { + "epoch": 0.45, + "learning_rate": 1.8649224836580818e-05, + "loss": 0.2546, + "step": 9625 + }, + { + "epoch": 0.45, + "learning_rate": 1.8648441051526032e-05, + "loss": 0.1846, + "step": 9630 + }, + { + "epoch": 0.45, + "learning_rate": 1.8647657266471243e-05, + "loss": 0.3649, + "step": 9635 + }, + { + "epoch": 0.45, + "learning_rate": 1.864687348141646e-05, + "loss": 0.3054, + "step": 9640 + }, + { + "epoch": 0.45, + "learning_rate": 1.864608969636167e-05, + "loss": 0.2868, + "step": 9645 + }, + { + "epoch": 0.45, + "learning_rate": 1.8645305911306884e-05, + "loss": 0.6246, + "step": 9650 + }, + { + "epoch": 0.45, + "learning_rate": 1.8644522126252098e-05, + "loss": 0.2241, + "step": 9655 + }, + { + "epoch": 0.45, + "learning_rate": 1.8643738341197312e-05, + "loss": 0.1827, + "step": 9660 + }, + { + "epoch": 0.45, + "learning_rate": 1.8642954556142526e-05, + "loss": 0.1333, + "step": 9665 + }, + { + "epoch": 0.45, + "learning_rate": 1.8642170771087736e-05, + "loss": 0.2136, + "step": 9670 + }, + { + "epoch": 0.45, + "learning_rate": 1.8641386986032954e-05, + "loss": 0.164, + "step": 9675 + }, + { + "epoch": 0.45, + "learning_rate": 1.8640603200978164e-05, + "loss": 0.2427, + "step": 9680 + }, + { + "epoch": 0.45, + "learning_rate": 1.8639819415923378e-05, + "loss": 0.2968, + "step": 9685 + }, + { + "epoch": 0.45, + "learning_rate": 1.8639035630868592e-05, + "loss": 0.3661, + "step": 9690 + }, + { + "epoch": 0.45, + "learning_rate": 1.8638251845813806e-05, + "loss": 0.3389, + "step": 9695 + }, + { + "epoch": 0.45, + "learning_rate": 1.863746806075902e-05, + "loss": 0.8643, + "step": 9700 + }, + { + "epoch": 0.45, + "learning_rate": 1.8636684275704234e-05, + "loss": 0.2849, + "step": 9705 + }, + { + "epoch": 0.45, + "learning_rate": 1.8635900490649444e-05, + "loss": 0.1267, + "step": 9710 + }, + { + "epoch": 0.45, + "learning_rate": 1.863511670559466e-05, + "loss": 0.1608, + "step": 9715 + }, + { + "epoch": 0.45, + "learning_rate": 1.8634332920539872e-05, + "loss": 0.1116, + "step": 9720 + }, + { + "epoch": 0.45, + "learning_rate": 1.8633549135485086e-05, + "loss": 0.1512, + "step": 9725 + }, + { + "epoch": 0.45, + "learning_rate": 1.86327653504303e-05, + "loss": 0.2167, + "step": 9730 + }, + { + "epoch": 0.45, + "learning_rate": 1.863198156537551e-05, + "loss": 0.2379, + "step": 9735 + }, + { + "epoch": 0.45, + "learning_rate": 1.8631197780320728e-05, + "loss": 0.3606, + "step": 9740 + }, + { + "epoch": 0.45, + "learning_rate": 1.8630413995265938e-05, + "loss": 0.3912, + "step": 9745 + }, + { + "epoch": 0.45, + "learning_rate": 1.8629630210211152e-05, + "loss": 0.5597, + "step": 9750 + }, + { + "epoch": 0.46, + "learning_rate": 1.8628846425156366e-05, + "loss": 0.3225, + "step": 9755 + }, + { + "epoch": 0.46, + "learning_rate": 1.862806264010158e-05, + "loss": 0.1031, + "step": 9760 + }, + { + "epoch": 0.46, + "learning_rate": 1.8627278855046794e-05, + "loss": 0.2045, + "step": 9765 + }, + { + "epoch": 0.46, + "learning_rate": 1.8626495069992008e-05, + "loss": 0.1208, + "step": 9770 + }, + { + "epoch": 0.46, + "learning_rate": 1.8625711284937222e-05, + "loss": 0.2459, + "step": 9775 + }, + { + "epoch": 0.46, + "learning_rate": 1.8624927499882436e-05, + "loss": 0.1681, + "step": 9780 + }, + { + "epoch": 0.46, + "learning_rate": 1.8624143714827646e-05, + "loss": 0.4399, + "step": 9785 + }, + { + "epoch": 0.46, + "learning_rate": 1.862335992977286e-05, + "loss": 0.2947, + "step": 9790 + }, + { + "epoch": 0.46, + "learning_rate": 1.8622576144718074e-05, + "loss": 0.4561, + "step": 9795 + }, + { + "epoch": 0.46, + "learning_rate": 1.8621792359663288e-05, + "loss": 0.6068, + "step": 9800 + }, + { + "epoch": 0.46, + "learning_rate": 1.8621008574608502e-05, + "loss": 0.2161, + "step": 9805 + }, + { + "epoch": 0.46, + "learning_rate": 1.8620224789553712e-05, + "loss": 0.1146, + "step": 9810 + }, + { + "epoch": 0.46, + "learning_rate": 1.861944100449893e-05, + "loss": 0.1992, + "step": 9815 + }, + { + "epoch": 0.46, + "learning_rate": 1.861865721944414e-05, + "loss": 0.2146, + "step": 9820 + }, + { + "epoch": 0.46, + "learning_rate": 1.8617873434389354e-05, + "loss": 0.1645, + "step": 9825 + }, + { + "epoch": 0.46, + "learning_rate": 1.8617089649334568e-05, + "loss": 0.2525, + "step": 9830 + }, + { + "epoch": 0.46, + "learning_rate": 1.8616305864279782e-05, + "loss": 0.2867, + "step": 9835 + }, + { + "epoch": 0.46, + "learning_rate": 1.8615522079224996e-05, + "loss": 0.3965, + "step": 9840 + }, + { + "epoch": 0.46, + "learning_rate": 1.861473829417021e-05, + "loss": 0.316, + "step": 9845 + }, + { + "epoch": 0.46, + "learning_rate": 1.861395450911542e-05, + "loss": 0.4794, + "step": 9850 + }, + { + "epoch": 0.46, + "learning_rate": 1.8613170724060634e-05, + "loss": 0.2089, + "step": 9855 + }, + { + "epoch": 0.46, + "learning_rate": 1.8612386939005848e-05, + "loss": 0.0973, + "step": 9860 + }, + { + "epoch": 0.46, + "learning_rate": 1.8611603153951062e-05, + "loss": 0.1503, + "step": 9865 + }, + { + "epoch": 0.46, + "learning_rate": 1.8610819368896276e-05, + "loss": 0.1923, + "step": 9870 + }, + { + "epoch": 0.46, + "learning_rate": 1.861003558384149e-05, + "loss": 0.2079, + "step": 9875 + }, + { + "epoch": 0.46, + "learning_rate": 1.8609251798786704e-05, + "loss": 0.2932, + "step": 9880 + }, + { + "epoch": 0.46, + "learning_rate": 1.8608468013731914e-05, + "loss": 0.2118, + "step": 9885 + }, + { + "epoch": 0.46, + "learning_rate": 1.860768422867713e-05, + "loss": 0.4529, + "step": 9890 + }, + { + "epoch": 0.46, + "learning_rate": 1.8606900443622342e-05, + "loss": 0.4992, + "step": 9895 + }, + { + "epoch": 0.46, + "learning_rate": 1.8606116658567556e-05, + "loss": 0.7247, + "step": 9900 + }, + { + "epoch": 0.46, + "learning_rate": 1.860533287351277e-05, + "loss": 0.2615, + "step": 9905 + }, + { + "epoch": 0.46, + "learning_rate": 1.8604549088457984e-05, + "loss": 0.1337, + "step": 9910 + }, + { + "epoch": 0.46, + "learning_rate": 1.8603765303403198e-05, + "loss": 0.1473, + "step": 9915 + }, + { + "epoch": 0.46, + "learning_rate": 1.8602981518348408e-05, + "loss": 0.2391, + "step": 9920 + }, + { + "epoch": 0.46, + "learning_rate": 1.8602197733293622e-05, + "loss": 0.2209, + "step": 9925 + }, + { + "epoch": 0.46, + "learning_rate": 1.8601413948238836e-05, + "loss": 0.2441, + "step": 9930 + }, + { + "epoch": 0.46, + "learning_rate": 1.860063016318405e-05, + "loss": 0.2697, + "step": 9935 + }, + { + "epoch": 0.46, + "learning_rate": 1.8599846378129264e-05, + "loss": 0.3542, + "step": 9940 + }, + { + "epoch": 0.46, + "learning_rate": 1.8599062593074478e-05, + "loss": 0.3546, + "step": 9945 + }, + { + "epoch": 0.46, + "learning_rate": 1.8598278808019688e-05, + "loss": 0.8018, + "step": 9950 + }, + { + "epoch": 0.46, + "learning_rate": 1.8597495022964905e-05, + "loss": 0.2301, + "step": 9955 + }, + { + "epoch": 0.46, + "learning_rate": 1.8596711237910116e-05, + "loss": 0.0913, + "step": 9960 + }, + { + "epoch": 0.46, + "learning_rate": 1.859592745285533e-05, + "loss": 0.1875, + "step": 9965 + }, + { + "epoch": 0.47, + "learning_rate": 1.8595143667800544e-05, + "loss": 0.1521, + "step": 9970 + }, + { + "epoch": 0.47, + "learning_rate": 1.8594359882745758e-05, + "loss": 0.1545, + "step": 9975 + }, + { + "epoch": 0.47, + "learning_rate": 1.859357609769097e-05, + "loss": 0.2105, + "step": 9980 + }, + { + "epoch": 0.47, + "learning_rate": 1.8592792312636182e-05, + "loss": 0.2772, + "step": 9985 + }, + { + "epoch": 0.47, + "learning_rate": 1.85920085275814e-05, + "loss": 0.384, + "step": 9990 + }, + { + "epoch": 0.47, + "learning_rate": 1.859122474252661e-05, + "loss": 0.294, + "step": 9995 + }, + { + "epoch": 0.47, + "learning_rate": 1.8590440957471824e-05, + "loss": 0.5701, + "step": 10000 + }, + { + "epoch": 0.47, + "learning_rate": 1.8589657172417038e-05, + "loss": 0.3018, + "step": 10005 + }, + { + "epoch": 0.47, + "learning_rate": 1.858887338736225e-05, + "loss": 0.146, + "step": 10010 + }, + { + "epoch": 0.47, + "learning_rate": 1.8588089602307465e-05, + "loss": 0.2157, + "step": 10015 + }, + { + "epoch": 0.47, + "learning_rate": 1.858730581725268e-05, + "loss": 0.1809, + "step": 10020 + }, + { + "epoch": 0.47, + "learning_rate": 1.858652203219789e-05, + "loss": 0.1605, + "step": 10025 + }, + { + "epoch": 0.47, + "learning_rate": 1.8585738247143107e-05, + "loss": 0.1612, + "step": 10030 + }, + { + "epoch": 0.47, + "learning_rate": 1.8584954462088318e-05, + "loss": 0.1955, + "step": 10035 + }, + { + "epoch": 0.47, + "learning_rate": 1.858417067703353e-05, + "loss": 0.2136, + "step": 10040 + }, + { + "epoch": 0.47, + "learning_rate": 1.8583386891978746e-05, + "loss": 0.4425, + "step": 10045 + }, + { + "epoch": 0.47, + "learning_rate": 1.8582603106923956e-05, + "loss": 0.5771, + "step": 10050 + }, + { + "epoch": 0.47, + "learning_rate": 1.8581819321869173e-05, + "loss": 0.3124, + "step": 10055 + }, + { + "epoch": 0.47, + "learning_rate": 1.8581035536814384e-05, + "loss": 0.1658, + "step": 10060 + }, + { + "epoch": 0.47, + "learning_rate": 1.8580251751759598e-05, + "loss": 0.2146, + "step": 10065 + }, + { + "epoch": 0.47, + "learning_rate": 1.857946796670481e-05, + "loss": 0.1755, + "step": 10070 + }, + { + "epoch": 0.47, + "learning_rate": 1.8578684181650026e-05, + "loss": 0.2154, + "step": 10075 + }, + { + "epoch": 0.47, + "learning_rate": 1.857790039659524e-05, + "loss": 0.1933, + "step": 10080 + }, + { + "epoch": 0.47, + "learning_rate": 1.8577116611540453e-05, + "loss": 0.1655, + "step": 10085 + }, + { + "epoch": 0.47, + "learning_rate": 1.8576332826485667e-05, + "loss": 0.1967, + "step": 10090 + }, + { + "epoch": 0.47, + "learning_rate": 1.857554904143088e-05, + "loss": 0.4955, + "step": 10095 + }, + { + "epoch": 0.47, + "learning_rate": 1.8574765256376092e-05, + "loss": 0.5188, + "step": 10100 + }, + { + "epoch": 0.47, + "learning_rate": 1.857398147132131e-05, + "loss": 0.2023, + "step": 10105 + }, + { + "epoch": 0.47, + "learning_rate": 1.857319768626652e-05, + "loss": 0.1892, + "step": 10110 + }, + { + "epoch": 0.47, + "learning_rate": 1.8572413901211733e-05, + "loss": 0.1397, + "step": 10115 + }, + { + "epoch": 0.47, + "learning_rate": 1.8571630116156947e-05, + "loss": 0.1333, + "step": 10120 + }, + { + "epoch": 0.47, + "learning_rate": 1.8570846331102158e-05, + "loss": 0.2091, + "step": 10125 + }, + { + "epoch": 0.47, + "learning_rate": 1.8570062546047375e-05, + "loss": 0.1695, + "step": 10130 + }, + { + "epoch": 0.47, + "learning_rate": 1.8569278760992586e-05, + "loss": 0.3251, + "step": 10135 + }, + { + "epoch": 0.47, + "learning_rate": 1.85684949759378e-05, + "loss": 0.3039, + "step": 10140 + }, + { + "epoch": 0.47, + "learning_rate": 1.8567711190883013e-05, + "loss": 0.4027, + "step": 10145 + }, + { + "epoch": 0.47, + "learning_rate": 1.8566927405828227e-05, + "loss": 0.461, + "step": 10150 + }, + { + "epoch": 0.47, + "learning_rate": 1.856614362077344e-05, + "loss": 0.2918, + "step": 10155 + }, + { + "epoch": 0.47, + "learning_rate": 1.8565359835718655e-05, + "loss": 0.1856, + "step": 10160 + }, + { + "epoch": 0.47, + "learning_rate": 1.8564576050663866e-05, + "loss": 0.1539, + "step": 10165 + }, + { + "epoch": 0.47, + "learning_rate": 1.8563792265609083e-05, + "loss": 0.1487, + "step": 10170 + }, + { + "epoch": 0.47, + "learning_rate": 1.8563008480554294e-05, + "loss": 0.1158, + "step": 10175 + }, + { + "epoch": 0.48, + "learning_rate": 1.8562224695499507e-05, + "loss": 0.216, + "step": 10180 + }, + { + "epoch": 0.48, + "learning_rate": 1.856144091044472e-05, + "loss": 0.3013, + "step": 10185 + }, + { + "epoch": 0.48, + "learning_rate": 1.8560657125389935e-05, + "loss": 0.4077, + "step": 10190 + }, + { + "epoch": 0.48, + "learning_rate": 1.855987334033515e-05, + "loss": 0.3365, + "step": 10195 + }, + { + "epoch": 0.48, + "learning_rate": 1.855908955528036e-05, + "loss": 0.4686, + "step": 10200 + }, + { + "epoch": 0.48, + "learning_rate": 1.8558305770225577e-05, + "loss": 0.2812, + "step": 10205 + }, + { + "epoch": 0.48, + "learning_rate": 1.8557521985170787e-05, + "loss": 0.133, + "step": 10210 + }, + { + "epoch": 0.48, + "learning_rate": 1.8556738200116e-05, + "loss": 0.1349, + "step": 10215 + }, + { + "epoch": 0.48, + "learning_rate": 1.8555954415061215e-05, + "loss": 0.1394, + "step": 10220 + }, + { + "epoch": 0.48, + "learning_rate": 1.855517063000643e-05, + "loss": 0.2261, + "step": 10225 + }, + { + "epoch": 0.48, + "learning_rate": 1.8554386844951643e-05, + "loss": 0.1947, + "step": 10230 + }, + { + "epoch": 0.48, + "learning_rate": 1.8553603059896857e-05, + "loss": 0.2436, + "step": 10235 + }, + { + "epoch": 0.48, + "learning_rate": 1.8552819274842068e-05, + "loss": 0.2637, + "step": 10240 + }, + { + "epoch": 0.48, + "learning_rate": 1.855203548978728e-05, + "loss": 0.4449, + "step": 10245 + }, + { + "epoch": 0.48, + "learning_rate": 1.8551251704732495e-05, + "loss": 0.7073, + "step": 10250 + }, + { + "epoch": 0.48, + "learning_rate": 1.855046791967771e-05, + "loss": 0.2425, + "step": 10255 + }, + { + "epoch": 0.48, + "learning_rate": 1.8549684134622923e-05, + "loss": 0.1223, + "step": 10260 + }, + { + "epoch": 0.48, + "learning_rate": 1.8548900349568134e-05, + "loss": 0.1035, + "step": 10265 + }, + { + "epoch": 0.48, + "learning_rate": 1.854811656451335e-05, + "loss": 0.1336, + "step": 10270 + }, + { + "epoch": 0.48, + "learning_rate": 1.854733277945856e-05, + "loss": 0.224, + "step": 10275 + }, + { + "epoch": 0.48, + "learning_rate": 1.8546548994403775e-05, + "loss": 0.2017, + "step": 10280 + }, + { + "epoch": 0.48, + "learning_rate": 1.854576520934899e-05, + "loss": 0.3207, + "step": 10285 + }, + { + "epoch": 0.48, + "learning_rate": 1.8544981424294203e-05, + "loss": 0.3581, + "step": 10290 + }, + { + "epoch": 0.48, + "learning_rate": 1.8544197639239417e-05, + "loss": 0.299, + "step": 10295 + }, + { + "epoch": 0.48, + "learning_rate": 1.854341385418463e-05, + "loss": 0.4774, + "step": 10300 + }, + { + "epoch": 0.48, + "learning_rate": 1.8542630069129845e-05, + "loss": 0.2978, + "step": 10305 + }, + { + "epoch": 0.48, + "learning_rate": 1.8541846284075055e-05, + "loss": 0.1423, + "step": 10310 + }, + { + "epoch": 0.48, + "learning_rate": 1.854106249902027e-05, + "loss": 0.1697, + "step": 10315 + }, + { + "epoch": 0.48, + "learning_rate": 1.8540278713965483e-05, + "loss": 0.1412, + "step": 10320 + }, + { + "epoch": 0.48, + "learning_rate": 1.8539494928910697e-05, + "loss": 0.1674, + "step": 10325 + }, + { + "epoch": 0.48, + "learning_rate": 1.853871114385591e-05, + "loss": 0.2561, + "step": 10330 + }, + { + "epoch": 0.48, + "learning_rate": 1.8537927358801125e-05, + "loss": 0.3232, + "step": 10335 + }, + { + "epoch": 0.48, + "learning_rate": 1.8537143573746335e-05, + "loss": 0.2814, + "step": 10340 + }, + { + "epoch": 0.48, + "learning_rate": 1.8536359788691553e-05, + "loss": 0.4515, + "step": 10345 + }, + { + "epoch": 0.48, + "learning_rate": 1.8535576003636763e-05, + "loss": 0.6846, + "step": 10350 + }, + { + "epoch": 0.48, + "learning_rate": 1.8534792218581977e-05, + "loss": 0.3577, + "step": 10355 + }, + { + "epoch": 0.48, + "learning_rate": 1.853400843352719e-05, + "loss": 0.076, + "step": 10360 + }, + { + "epoch": 0.48, + "learning_rate": 1.8533224648472405e-05, + "loss": 0.2025, + "step": 10365 + }, + { + "epoch": 0.48, + "learning_rate": 1.853244086341762e-05, + "loss": 0.1471, + "step": 10370 + }, + { + "epoch": 0.48, + "learning_rate": 1.853165707836283e-05, + "loss": 0.1286, + "step": 10375 + }, + { + "epoch": 0.48, + "learning_rate": 1.8530873293308043e-05, + "loss": 0.2737, + "step": 10380 + }, + { + "epoch": 0.48, + "learning_rate": 1.8530089508253257e-05, + "loss": 0.2873, + "step": 10385 + }, + { + "epoch": 0.48, + "learning_rate": 1.852930572319847e-05, + "loss": 0.3913, + "step": 10390 + }, + { + "epoch": 0.49, + "learning_rate": 1.8528521938143685e-05, + "loss": 0.4033, + "step": 10395 + }, + { + "epoch": 0.49, + "learning_rate": 1.85277381530889e-05, + "loss": 0.5046, + "step": 10400 + }, + { + "epoch": 0.49, + "learning_rate": 1.8526954368034113e-05, + "loss": 0.2842, + "step": 10405 + }, + { + "epoch": 0.49, + "learning_rate": 1.8526170582979327e-05, + "loss": 0.1708, + "step": 10410 + }, + { + "epoch": 0.49, + "learning_rate": 1.8525386797924537e-05, + "loss": 0.1666, + "step": 10415 + }, + { + "epoch": 0.49, + "learning_rate": 1.8524603012869755e-05, + "loss": 0.1689, + "step": 10420 + }, + { + "epoch": 0.49, + "learning_rate": 1.8523819227814965e-05, + "loss": 0.2137, + "step": 10425 + }, + { + "epoch": 0.49, + "learning_rate": 1.852303544276018e-05, + "loss": 0.2885, + "step": 10430 + }, + { + "epoch": 0.49, + "learning_rate": 1.8522251657705393e-05, + "loss": 0.2818, + "step": 10435 + }, + { + "epoch": 0.49, + "learning_rate": 1.8521467872650603e-05, + "loss": 0.2431, + "step": 10440 + }, + { + "epoch": 0.49, + "learning_rate": 1.852068408759582e-05, + "loss": 0.3849, + "step": 10445 + }, + { + "epoch": 0.49, + "learning_rate": 1.851990030254103e-05, + "loss": 0.6032, + "step": 10450 + }, + { + "epoch": 0.49, + "learning_rate": 1.8519116517486245e-05, + "loss": 0.2557, + "step": 10455 + }, + { + "epoch": 0.49, + "learning_rate": 1.851833273243146e-05, + "loss": 0.1205, + "step": 10460 + }, + { + "epoch": 0.49, + "learning_rate": 1.8517548947376673e-05, + "loss": 0.1141, + "step": 10465 + }, + { + "epoch": 0.49, + "learning_rate": 1.8516765162321887e-05, + "loss": 0.1535, + "step": 10470 + }, + { + "epoch": 0.49, + "learning_rate": 1.85159813772671e-05, + "loss": 0.1693, + "step": 10475 + }, + { + "epoch": 0.49, + "learning_rate": 1.851519759221231e-05, + "loss": 0.2214, + "step": 10480 + }, + { + "epoch": 0.49, + "learning_rate": 1.851441380715753e-05, + "loss": 0.3034, + "step": 10485 + }, + { + "epoch": 0.49, + "learning_rate": 1.851363002210274e-05, + "loss": 0.2867, + "step": 10490 + }, + { + "epoch": 0.49, + "learning_rate": 1.8512846237047953e-05, + "loss": 0.4438, + "step": 10495 + }, + { + "epoch": 0.49, + "learning_rate": 1.8512062451993167e-05, + "loss": 0.614, + "step": 10500 + }, + { + "epoch": 0.49, + "learning_rate": 1.851127866693838e-05, + "loss": 0.2172, + "step": 10505 + }, + { + "epoch": 0.49, + "learning_rate": 1.8510494881883595e-05, + "loss": 0.0805, + "step": 10510 + }, + { + "epoch": 0.49, + "learning_rate": 1.8509711096828805e-05, + "loss": 0.1772, + "step": 10515 + }, + { + "epoch": 0.49, + "learning_rate": 1.8508927311774023e-05, + "loss": 0.1784, + "step": 10520 + }, + { + "epoch": 0.49, + "learning_rate": 1.8508143526719233e-05, + "loss": 0.2444, + "step": 10525 + }, + { + "epoch": 0.49, + "learning_rate": 1.8507359741664447e-05, + "loss": 0.1369, + "step": 10530 + }, + { + "epoch": 0.49, + "learning_rate": 1.850657595660966e-05, + "loss": 0.1954, + "step": 10535 + }, + { + "epoch": 0.49, + "learning_rate": 1.8505792171554875e-05, + "loss": 0.3048, + "step": 10540 + }, + { + "epoch": 0.49, + "learning_rate": 1.850500838650009e-05, + "loss": 0.3967, + "step": 10545 + }, + { + "epoch": 0.49, + "learning_rate": 1.8504224601445303e-05, + "loss": 0.476, + "step": 10550 + }, + { + "epoch": 0.49, + "learning_rate": 1.8503440816390513e-05, + "loss": 0.2726, + "step": 10555 + }, + { + "epoch": 0.49, + "learning_rate": 1.850265703133573e-05, + "loss": 0.0955, + "step": 10560 + }, + { + "epoch": 0.49, + "learning_rate": 1.850187324628094e-05, + "loss": 0.1345, + "step": 10565 + }, + { + "epoch": 0.49, + "learning_rate": 1.8501089461226155e-05, + "loss": 0.2196, + "step": 10570 + }, + { + "epoch": 0.49, + "learning_rate": 1.850030567617137e-05, + "loss": 0.1968, + "step": 10575 + }, + { + "epoch": 0.49, + "learning_rate": 1.849952189111658e-05, + "loss": 0.2663, + "step": 10580 + }, + { + "epoch": 0.49, + "learning_rate": 1.8498738106061797e-05, + "loss": 0.2857, + "step": 10585 + }, + { + "epoch": 0.49, + "learning_rate": 1.8497954321007007e-05, + "loss": 0.3226, + "step": 10590 + }, + { + "epoch": 0.49, + "learning_rate": 1.849717053595222e-05, + "loss": 0.3366, + "step": 10595 + }, + { + "epoch": 0.49, + "learning_rate": 1.8496386750897435e-05, + "loss": 0.4914, + "step": 10600 + }, + { + "epoch": 0.49, + "learning_rate": 1.849560296584265e-05, + "loss": 0.3555, + "step": 10605 + }, + { + "epoch": 0.5, + "learning_rate": 1.8494819180787863e-05, + "loss": 0.1054, + "step": 10610 + }, + { + "epoch": 0.5, + "learning_rate": 1.8494035395733077e-05, + "loss": 0.2007, + "step": 10615 + }, + { + "epoch": 0.5, + "learning_rate": 1.849325161067829e-05, + "loss": 0.2069, + "step": 10620 + }, + { + "epoch": 0.5, + "learning_rate": 1.8492467825623504e-05, + "loss": 0.2225, + "step": 10625 + }, + { + "epoch": 0.5, + "learning_rate": 1.8491684040568715e-05, + "loss": 0.2789, + "step": 10630 + }, + { + "epoch": 0.5, + "learning_rate": 1.849090025551393e-05, + "loss": 0.3114, + "step": 10635 + }, + { + "epoch": 0.5, + "learning_rate": 1.8490116470459143e-05, + "loss": 0.3273, + "step": 10640 + }, + { + "epoch": 0.5, + "learning_rate": 1.8489332685404357e-05, + "loss": 0.2557, + "step": 10645 + }, + { + "epoch": 0.5, + "learning_rate": 1.848854890034957e-05, + "loss": 0.5148, + "step": 10650 + }, + { + "epoch": 0.5, + "learning_rate": 1.848776511529478e-05, + "loss": 0.3063, + "step": 10655 + }, + { + "epoch": 0.5, + "learning_rate": 1.848698133024e-05, + "loss": 0.1092, + "step": 10660 + }, + { + "epoch": 0.5, + "learning_rate": 1.848619754518521e-05, + "loss": 0.1135, + "step": 10665 + }, + { + "epoch": 0.5, + "learning_rate": 1.8485413760130423e-05, + "loss": 0.1765, + "step": 10670 + }, + { + "epoch": 0.5, + "learning_rate": 1.8484629975075637e-05, + "loss": 0.1905, + "step": 10675 + }, + { + "epoch": 0.5, + "learning_rate": 1.848384619002085e-05, + "loss": 0.1588, + "step": 10680 + }, + { + "epoch": 0.5, + "learning_rate": 1.8483062404966064e-05, + "loss": 0.2983, + "step": 10685 + }, + { + "epoch": 0.5, + "learning_rate": 1.848227861991128e-05, + "loss": 0.2593, + "step": 10690 + }, + { + "epoch": 0.5, + "learning_rate": 1.848149483485649e-05, + "loss": 0.3945, + "step": 10695 + }, + { + "epoch": 0.5, + "learning_rate": 1.8480711049801703e-05, + "loss": 0.5359, + "step": 10700 + }, + { + "epoch": 0.5, + "learning_rate": 1.8479927264746917e-05, + "loss": 0.473, + "step": 10705 + }, + { + "epoch": 0.5, + "learning_rate": 1.847914347969213e-05, + "loss": 0.1142, + "step": 10710 + }, + { + "epoch": 0.5, + "learning_rate": 1.8478359694637345e-05, + "loss": 0.1146, + "step": 10715 + }, + { + "epoch": 0.5, + "learning_rate": 1.847757590958256e-05, + "loss": 0.2126, + "step": 10720 + }, + { + "epoch": 0.5, + "learning_rate": 1.8476792124527772e-05, + "loss": 0.1638, + "step": 10725 + }, + { + "epoch": 0.5, + "learning_rate": 1.8476008339472983e-05, + "loss": 0.2529, + "step": 10730 + }, + { + "epoch": 0.5, + "learning_rate": 1.84752245544182e-05, + "loss": 0.2843, + "step": 10735 + }, + { + "epoch": 0.5, + "learning_rate": 1.847444076936341e-05, + "loss": 0.3466, + "step": 10740 + }, + { + "epoch": 0.5, + "learning_rate": 1.8473656984308625e-05, + "loss": 0.3651, + "step": 10745 + }, + { + "epoch": 0.5, + "learning_rate": 1.847287319925384e-05, + "loss": 0.5283, + "step": 10750 + }, + { + "epoch": 0.5, + "learning_rate": 1.8472089414199052e-05, + "loss": 0.2919, + "step": 10755 + }, + { + "epoch": 0.5, + "learning_rate": 1.8471305629144266e-05, + "loss": 0.1133, + "step": 10760 + }, + { + "epoch": 0.5, + "learning_rate": 1.8470521844089477e-05, + "loss": 0.1283, + "step": 10765 + }, + { + "epoch": 0.5, + "learning_rate": 1.846973805903469e-05, + "loss": 0.1331, + "step": 10770 + }, + { + "epoch": 0.5, + "learning_rate": 1.8468954273979905e-05, + "loss": 0.2511, + "step": 10775 + }, + { + "epoch": 0.5, + "learning_rate": 1.846817048892512e-05, + "loss": 0.2337, + "step": 10780 + }, + { + "epoch": 0.5, + "learning_rate": 1.8467386703870332e-05, + "loss": 0.2217, + "step": 10785 + }, + { + "epoch": 0.5, + "learning_rate": 1.8466602918815546e-05, + "loss": 0.4054, + "step": 10790 + }, + { + "epoch": 0.5, + "learning_rate": 1.8465819133760757e-05, + "loss": 0.42, + "step": 10795 + }, + { + "epoch": 0.5, + "learning_rate": 1.8465035348705974e-05, + "loss": 0.4874, + "step": 10800 + }, + { + "epoch": 0.5, + "learning_rate": 1.8464251563651185e-05, + "loss": 0.2858, + "step": 10805 + }, + { + "epoch": 0.5, + "learning_rate": 1.84634677785964e-05, + "loss": 0.1269, + "step": 10810 + }, + { + "epoch": 0.5, + "learning_rate": 1.8462683993541612e-05, + "loss": 0.1334, + "step": 10815 + }, + { + "epoch": 0.5, + "learning_rate": 1.8461900208486826e-05, + "loss": 0.1715, + "step": 10820 + }, + { + "epoch": 0.51, + "learning_rate": 1.846111642343204e-05, + "loss": 0.1632, + "step": 10825 + }, + { + "epoch": 0.51, + "learning_rate": 1.846033263837725e-05, + "loss": 0.1676, + "step": 10830 + }, + { + "epoch": 0.51, + "learning_rate": 1.8459548853322468e-05, + "loss": 0.1878, + "step": 10835 + }, + { + "epoch": 0.51, + "learning_rate": 1.845876506826768e-05, + "loss": 0.2542, + "step": 10840 + }, + { + "epoch": 0.51, + "learning_rate": 1.8457981283212893e-05, + "loss": 0.3347, + "step": 10845 + }, + { + "epoch": 0.51, + "learning_rate": 1.8457197498158106e-05, + "loss": 0.5031, + "step": 10850 + }, + { + "epoch": 0.51, + "learning_rate": 1.845641371310332e-05, + "loss": 0.3013, + "step": 10855 + }, + { + "epoch": 0.51, + "learning_rate": 1.8455629928048534e-05, + "loss": 0.0807, + "step": 10860 + }, + { + "epoch": 0.51, + "learning_rate": 1.8454846142993748e-05, + "loss": 0.1866, + "step": 10865 + }, + { + "epoch": 0.51, + "learning_rate": 1.845406235793896e-05, + "loss": 0.128, + "step": 10870 + }, + { + "epoch": 0.51, + "learning_rate": 1.8453278572884176e-05, + "loss": 0.2177, + "step": 10875 + }, + { + "epoch": 0.51, + "learning_rate": 1.8452494787829386e-05, + "loss": 0.2811, + "step": 10880 + }, + { + "epoch": 0.51, + "learning_rate": 1.84517110027746e-05, + "loss": 0.3075, + "step": 10885 + }, + { + "epoch": 0.51, + "learning_rate": 1.8450927217719814e-05, + "loss": 0.3236, + "step": 10890 + }, + { + "epoch": 0.51, + "learning_rate": 1.8450143432665025e-05, + "loss": 0.2975, + "step": 10895 + }, + { + "epoch": 0.51, + "learning_rate": 1.8449359647610242e-05, + "loss": 0.7018, + "step": 10900 + }, + { + "epoch": 0.51, + "learning_rate": 1.8448575862555453e-05, + "loss": 0.3354, + "step": 10905 + }, + { + "epoch": 0.51, + "learning_rate": 1.8447792077500667e-05, + "loss": 0.1611, + "step": 10910 + }, + { + "epoch": 0.51, + "learning_rate": 1.844700829244588e-05, + "loss": 0.1056, + "step": 10915 + }, + { + "epoch": 0.51, + "learning_rate": 1.8446224507391094e-05, + "loss": 0.1905, + "step": 10920 + }, + { + "epoch": 0.51, + "learning_rate": 1.8445440722336308e-05, + "loss": 0.1444, + "step": 10925 + }, + { + "epoch": 0.51, + "learning_rate": 1.8444656937281522e-05, + "loss": 0.2417, + "step": 10930 + }, + { + "epoch": 0.51, + "learning_rate": 1.8443873152226736e-05, + "loss": 0.2322, + "step": 10935 + }, + { + "epoch": 0.51, + "learning_rate": 1.844308936717195e-05, + "loss": 0.2791, + "step": 10940 + }, + { + "epoch": 0.51, + "learning_rate": 1.844230558211716e-05, + "loss": 0.3903, + "step": 10945 + }, + { + "epoch": 0.51, + "learning_rate": 1.8441521797062378e-05, + "loss": 0.6466, + "step": 10950 + }, + { + "epoch": 0.51, + "learning_rate": 1.8440738012007588e-05, + "loss": 0.2265, + "step": 10955 + }, + { + "epoch": 0.51, + "learning_rate": 1.8439954226952802e-05, + "loss": 0.1035, + "step": 10960 + }, + { + "epoch": 0.51, + "learning_rate": 1.8439170441898016e-05, + "loss": 0.127, + "step": 10965 + }, + { + "epoch": 0.51, + "learning_rate": 1.8438386656843227e-05, + "loss": 0.1504, + "step": 10970 + }, + { + "epoch": 0.51, + "learning_rate": 1.8437602871788444e-05, + "loss": 0.3139, + "step": 10975 + }, + { + "epoch": 0.51, + "learning_rate": 1.8436819086733654e-05, + "loss": 0.2015, + "step": 10980 + }, + { + "epoch": 0.51, + "learning_rate": 1.843603530167887e-05, + "loss": 0.2831, + "step": 10985 + }, + { + "epoch": 0.51, + "learning_rate": 1.8435251516624082e-05, + "loss": 0.4307, + "step": 10990 + }, + { + "epoch": 0.51, + "learning_rate": 1.8434467731569296e-05, + "loss": 0.4474, + "step": 10995 + }, + { + "epoch": 0.51, + "learning_rate": 1.843368394651451e-05, + "loss": 0.4485, + "step": 11000 + }, + { + "epoch": 0.51, + "learning_rate": 1.8432900161459724e-05, + "loss": 0.3155, + "step": 11005 + }, + { + "epoch": 0.51, + "learning_rate": 1.8432116376404934e-05, + "loss": 0.1338, + "step": 11010 + }, + { + "epoch": 0.51, + "learning_rate": 1.8431332591350152e-05, + "loss": 0.1072, + "step": 11015 + }, + { + "epoch": 0.51, + "learning_rate": 1.8430548806295362e-05, + "loss": 0.1568, + "step": 11020 + }, + { + "epoch": 0.51, + "learning_rate": 1.8429765021240576e-05, + "loss": 0.209, + "step": 11025 + }, + { + "epoch": 0.51, + "learning_rate": 1.842898123618579e-05, + "loss": 0.2181, + "step": 11030 + }, + { + "epoch": 0.51, + "learning_rate": 1.8428197451131004e-05, + "loss": 0.1599, + "step": 11035 + }, + { + "epoch": 0.52, + "learning_rate": 1.8427413666076218e-05, + "loss": 0.2345, + "step": 11040 + }, + { + "epoch": 0.52, + "learning_rate": 1.842662988102143e-05, + "loss": 0.5213, + "step": 11045 + }, + { + "epoch": 0.52, + "learning_rate": 1.8425846095966646e-05, + "loss": 0.6337, + "step": 11050 + }, + { + "epoch": 0.52, + "learning_rate": 1.8425062310911856e-05, + "loss": 0.2929, + "step": 11055 + }, + { + "epoch": 0.52, + "learning_rate": 1.842427852585707e-05, + "loss": 0.0707, + "step": 11060 + }, + { + "epoch": 0.52, + "learning_rate": 1.8423494740802284e-05, + "loss": 0.1464, + "step": 11065 + }, + { + "epoch": 0.52, + "learning_rate": 1.8422710955747498e-05, + "loss": 0.1703, + "step": 11070 + }, + { + "epoch": 0.52, + "learning_rate": 1.8421927170692712e-05, + "loss": 0.2093, + "step": 11075 + }, + { + "epoch": 0.52, + "learning_rate": 1.8421143385637926e-05, + "loss": 0.1709, + "step": 11080 + }, + { + "epoch": 0.52, + "learning_rate": 1.8420359600583136e-05, + "loss": 0.2491, + "step": 11085 + }, + { + "epoch": 0.52, + "learning_rate": 1.841957581552835e-05, + "loss": 0.3152, + "step": 11090 + }, + { + "epoch": 0.52, + "learning_rate": 1.8418792030473564e-05, + "loss": 0.3752, + "step": 11095 + }, + { + "epoch": 0.52, + "learning_rate": 1.8418008245418778e-05, + "loss": 0.7978, + "step": 11100 + }, + { + "epoch": 0.52, + "learning_rate": 1.8417224460363992e-05, + "loss": 0.2618, + "step": 11105 + }, + { + "epoch": 0.52, + "learning_rate": 1.8416440675309202e-05, + "loss": 0.121, + "step": 11110 + }, + { + "epoch": 0.52, + "learning_rate": 1.841565689025442e-05, + "loss": 0.3329, + "step": 11115 + }, + { + "epoch": 0.52, + "learning_rate": 1.841487310519963e-05, + "loss": 0.1459, + "step": 11120 + }, + { + "epoch": 0.52, + "learning_rate": 1.8414089320144844e-05, + "loss": 0.1842, + "step": 11125 + }, + { + "epoch": 0.52, + "learning_rate": 1.8413305535090058e-05, + "loss": 0.2167, + "step": 11130 + }, + { + "epoch": 0.52, + "learning_rate": 1.8412521750035272e-05, + "loss": 0.2646, + "step": 11135 + }, + { + "epoch": 0.52, + "learning_rate": 1.8411737964980486e-05, + "loss": 0.3365, + "step": 11140 + }, + { + "epoch": 0.52, + "learning_rate": 1.84109541799257e-05, + "loss": 0.356, + "step": 11145 + }, + { + "epoch": 0.52, + "learning_rate": 1.8410170394870914e-05, + "loss": 0.7355, + "step": 11150 + }, + { + "epoch": 0.52, + "learning_rate": 1.8409386609816124e-05, + "loss": 0.2418, + "step": 11155 + }, + { + "epoch": 0.52, + "learning_rate": 1.8408602824761338e-05, + "loss": 0.091, + "step": 11160 + }, + { + "epoch": 0.52, + "learning_rate": 1.8407819039706552e-05, + "loss": 0.1088, + "step": 11165 + }, + { + "epoch": 0.52, + "learning_rate": 1.8407035254651766e-05, + "loss": 0.2117, + "step": 11170 + }, + { + "epoch": 0.52, + "learning_rate": 1.840625146959698e-05, + "loss": 0.1758, + "step": 11175 + }, + { + "epoch": 0.52, + "learning_rate": 1.8405467684542194e-05, + "loss": 0.2617, + "step": 11180 + }, + { + "epoch": 0.52, + "learning_rate": 1.8404683899487404e-05, + "loss": 0.1872, + "step": 11185 + }, + { + "epoch": 0.52, + "learning_rate": 1.840390011443262e-05, + "loss": 0.2953, + "step": 11190 + }, + { + "epoch": 0.52, + "learning_rate": 1.8403116329377832e-05, + "loss": 0.2921, + "step": 11195 + }, + { + "epoch": 0.52, + "learning_rate": 1.8402332544323046e-05, + "loss": 0.6133, + "step": 11200 + }, + { + "epoch": 0.52, + "learning_rate": 1.840154875926826e-05, + "loss": 0.2085, + "step": 11205 + }, + { + "epoch": 0.52, + "learning_rate": 1.8400764974213474e-05, + "loss": 0.1601, + "step": 11210 + }, + { + "epoch": 0.52, + "learning_rate": 1.8399981189158688e-05, + "loss": 0.1244, + "step": 11215 + }, + { + "epoch": 0.52, + "learning_rate": 1.8399197404103898e-05, + "loss": 0.1695, + "step": 11220 + }, + { + "epoch": 0.52, + "learning_rate": 1.8398413619049112e-05, + "loss": 0.1907, + "step": 11225 + }, + { + "epoch": 0.52, + "learning_rate": 1.8397629833994326e-05, + "loss": 0.2046, + "step": 11230 + }, + { + "epoch": 0.52, + "learning_rate": 1.839684604893954e-05, + "loss": 0.2313, + "step": 11235 + }, + { + "epoch": 0.52, + "learning_rate": 1.8396062263884754e-05, + "loss": 0.4132, + "step": 11240 + }, + { + "epoch": 0.52, + "learning_rate": 1.8395278478829968e-05, + "loss": 0.3662, + "step": 11245 + }, + { + "epoch": 0.52, + "learning_rate": 1.839449469377518e-05, + "loss": 0.6663, + "step": 11250 + }, + { + "epoch": 0.53, + "learning_rate": 1.8393710908720396e-05, + "loss": 0.2829, + "step": 11255 + }, + { + "epoch": 0.53, + "learning_rate": 1.8392927123665606e-05, + "loss": 0.0905, + "step": 11260 + }, + { + "epoch": 0.53, + "learning_rate": 1.8392143338610823e-05, + "loss": 0.1383, + "step": 11265 + }, + { + "epoch": 0.53, + "learning_rate": 1.8391359553556034e-05, + "loss": 0.1256, + "step": 11270 + }, + { + "epoch": 0.53, + "learning_rate": 1.8390575768501248e-05, + "loss": 0.1687, + "step": 11275 + }, + { + "epoch": 0.53, + "learning_rate": 1.838979198344646e-05, + "loss": 0.2381, + "step": 11280 + }, + { + "epoch": 0.53, + "learning_rate": 1.8389008198391672e-05, + "loss": 0.2776, + "step": 11285 + }, + { + "epoch": 0.53, + "learning_rate": 1.838822441333689e-05, + "loss": 0.314, + "step": 11290 + }, + { + "epoch": 0.53, + "learning_rate": 1.83874406282821e-05, + "loss": 0.3639, + "step": 11295 + }, + { + "epoch": 0.53, + "learning_rate": 1.8386656843227314e-05, + "loss": 0.5223, + "step": 11300 + }, + { + "epoch": 0.53, + "learning_rate": 1.8385873058172528e-05, + "loss": 0.1783, + "step": 11305 + }, + { + "epoch": 0.53, + "learning_rate": 1.8385089273117742e-05, + "loss": 0.0991, + "step": 11310 + }, + { + "epoch": 0.53, + "learning_rate": 1.8384305488062956e-05, + "loss": 0.2287, + "step": 11315 + }, + { + "epoch": 0.53, + "learning_rate": 1.838352170300817e-05, + "loss": 0.2312, + "step": 11320 + }, + { + "epoch": 0.53, + "learning_rate": 1.838273791795338e-05, + "loss": 0.2481, + "step": 11325 + }, + { + "epoch": 0.53, + "learning_rate": 1.8381954132898597e-05, + "loss": 0.1717, + "step": 11330 + }, + { + "epoch": 0.53, + "learning_rate": 1.8381170347843808e-05, + "loss": 0.2379, + "step": 11335 + }, + { + "epoch": 0.53, + "learning_rate": 1.8380386562789022e-05, + "loss": 0.3034, + "step": 11340 + }, + { + "epoch": 0.53, + "learning_rate": 1.8379602777734236e-05, + "loss": 0.3773, + "step": 11345 + }, + { + "epoch": 0.53, + "learning_rate": 1.837881899267945e-05, + "loss": 0.6601, + "step": 11350 + }, + { + "epoch": 0.53, + "learning_rate": 1.8378035207624663e-05, + "loss": 0.2288, + "step": 11355 + }, + { + "epoch": 0.53, + "learning_rate": 1.8377251422569874e-05, + "loss": 0.1123, + "step": 11360 + }, + { + "epoch": 0.53, + "learning_rate": 1.837646763751509e-05, + "loss": 0.158, + "step": 11365 + }, + { + "epoch": 0.53, + "learning_rate": 1.8375683852460302e-05, + "loss": 0.1854, + "step": 11370 + }, + { + "epoch": 0.53, + "learning_rate": 1.8374900067405516e-05, + "loss": 0.1726, + "step": 11375 + }, + { + "epoch": 0.53, + "learning_rate": 1.837411628235073e-05, + "loss": 0.2843, + "step": 11380 + }, + { + "epoch": 0.53, + "learning_rate": 1.8373332497295944e-05, + "loss": 0.2421, + "step": 11385 + }, + { + "epoch": 0.53, + "learning_rate": 1.8372548712241157e-05, + "loss": 0.3232, + "step": 11390 + }, + { + "epoch": 0.53, + "learning_rate": 1.837176492718637e-05, + "loss": 0.4045, + "step": 11395 + }, + { + "epoch": 0.53, + "learning_rate": 1.8370981142131582e-05, + "loss": 0.5567, + "step": 11400 + }, + { + "epoch": 0.53, + "learning_rate": 1.83701973570768e-05, + "loss": 0.2517, + "step": 11405 + }, + { + "epoch": 0.53, + "learning_rate": 1.836941357202201e-05, + "loss": 0.0982, + "step": 11410 + }, + { + "epoch": 0.53, + "learning_rate": 1.8368629786967224e-05, + "loss": 0.1019, + "step": 11415 + }, + { + "epoch": 0.53, + "learning_rate": 1.8367846001912437e-05, + "loss": 0.1961, + "step": 11420 + }, + { + "epoch": 0.53, + "learning_rate": 1.8367062216857648e-05, + "loss": 0.1333, + "step": 11425 + }, + { + "epoch": 0.53, + "learning_rate": 1.8366278431802865e-05, + "loss": 0.1751, + "step": 11430 + }, + { + "epoch": 0.53, + "learning_rate": 1.8365494646748076e-05, + "loss": 0.2277, + "step": 11435 + }, + { + "epoch": 0.53, + "learning_rate": 1.836471086169329e-05, + "loss": 0.2504, + "step": 11440 + }, + { + "epoch": 0.53, + "learning_rate": 1.8363927076638504e-05, + "loss": 0.4636, + "step": 11445 + }, + { + "epoch": 0.53, + "learning_rate": 1.8363143291583718e-05, + "loss": 0.5295, + "step": 11450 + }, + { + "epoch": 0.53, + "learning_rate": 1.836235950652893e-05, + "loss": 0.2709, + "step": 11455 + }, + { + "epoch": 0.53, + "learning_rate": 1.8361575721474145e-05, + "loss": 0.0808, + "step": 11460 + }, + { + "epoch": 0.53, + "learning_rate": 1.836079193641936e-05, + "loss": 0.1669, + "step": 11465 + }, + { + "epoch": 0.54, + "learning_rate": 1.8360008151364573e-05, + "loss": 0.1414, + "step": 11470 + }, + { + "epoch": 0.54, + "learning_rate": 1.8359224366309784e-05, + "loss": 0.2378, + "step": 11475 + }, + { + "epoch": 0.54, + "learning_rate": 1.8358440581254998e-05, + "loss": 0.2094, + "step": 11480 + }, + { + "epoch": 0.54, + "learning_rate": 1.835765679620021e-05, + "loss": 0.193, + "step": 11485 + }, + { + "epoch": 0.54, + "learning_rate": 1.8356873011145425e-05, + "loss": 0.3476, + "step": 11490 + }, + { + "epoch": 0.54, + "learning_rate": 1.835608922609064e-05, + "loss": 0.416, + "step": 11495 + }, + { + "epoch": 0.54, + "learning_rate": 1.835530544103585e-05, + "loss": 0.5145, + "step": 11500 + }, + { + "epoch": 0.54, + "learning_rate": 1.8354521655981067e-05, + "loss": 0.2958, + "step": 11505 + }, + { + "epoch": 0.54, + "learning_rate": 1.8353737870926278e-05, + "loss": 0.0655, + "step": 11510 + }, + { + "epoch": 0.54, + "learning_rate": 1.835295408587149e-05, + "loss": 0.1104, + "step": 11515 + }, + { + "epoch": 0.54, + "learning_rate": 1.8352170300816705e-05, + "loss": 0.1485, + "step": 11520 + }, + { + "epoch": 0.54, + "learning_rate": 1.835138651576192e-05, + "loss": 0.1616, + "step": 11525 + }, + { + "epoch": 0.54, + "learning_rate": 1.8350602730707133e-05, + "loss": 0.183, + "step": 11530 + }, + { + "epoch": 0.54, + "learning_rate": 1.8349818945652347e-05, + "loss": 0.2659, + "step": 11535 + }, + { + "epoch": 0.54, + "learning_rate": 1.8349035160597558e-05, + "loss": 0.3224, + "step": 11540 + }, + { + "epoch": 0.54, + "learning_rate": 1.834825137554277e-05, + "loss": 0.4451, + "step": 11545 + }, + { + "epoch": 0.54, + "learning_rate": 1.8347467590487985e-05, + "loss": 0.6188, + "step": 11550 + }, + { + "epoch": 0.54, + "learning_rate": 1.83466838054332e-05, + "loss": 0.2427, + "step": 11555 + }, + { + "epoch": 0.54, + "learning_rate": 1.8345900020378413e-05, + "loss": 0.0932, + "step": 11560 + }, + { + "epoch": 0.54, + "learning_rate": 1.8345116235323627e-05, + "loss": 0.201, + "step": 11565 + }, + { + "epoch": 0.54, + "learning_rate": 1.834433245026884e-05, + "loss": 0.1564, + "step": 11570 + }, + { + "epoch": 0.54, + "learning_rate": 1.834354866521405e-05, + "loss": 0.1475, + "step": 11575 + }, + { + "epoch": 0.54, + "learning_rate": 1.834276488015927e-05, + "loss": 0.1801, + "step": 11580 + }, + { + "epoch": 0.54, + "learning_rate": 1.834198109510448e-05, + "loss": 0.2695, + "step": 11585 + }, + { + "epoch": 0.54, + "learning_rate": 1.8341197310049693e-05, + "loss": 0.2264, + "step": 11590 + }, + { + "epoch": 0.54, + "learning_rate": 1.8340413524994907e-05, + "loss": 0.3982, + "step": 11595 + }, + { + "epoch": 0.54, + "learning_rate": 1.833962973994012e-05, + "loss": 0.6555, + "step": 11600 + }, + { + "epoch": 0.54, + "learning_rate": 1.8338845954885335e-05, + "loss": 0.3081, + "step": 11605 + }, + { + "epoch": 0.54, + "learning_rate": 1.8338062169830546e-05, + "loss": 0.1064, + "step": 11610 + }, + { + "epoch": 0.54, + "learning_rate": 1.833727838477576e-05, + "loss": 0.1906, + "step": 11615 + }, + { + "epoch": 0.54, + "learning_rate": 1.8336494599720973e-05, + "loss": 0.1571, + "step": 11620 + }, + { + "epoch": 0.54, + "learning_rate": 1.8335710814666187e-05, + "loss": 0.1314, + "step": 11625 + }, + { + "epoch": 0.54, + "learning_rate": 1.83349270296114e-05, + "loss": 0.233, + "step": 11630 + }, + { + "epoch": 0.54, + "learning_rate": 1.8334143244556615e-05, + "loss": 0.216, + "step": 11635 + }, + { + "epoch": 0.54, + "learning_rate": 1.8333359459501826e-05, + "loss": 0.2412, + "step": 11640 + }, + { + "epoch": 0.54, + "learning_rate": 1.8332575674447043e-05, + "loss": 0.3539, + "step": 11645 + }, + { + "epoch": 0.54, + "learning_rate": 1.8331791889392253e-05, + "loss": 0.3351, + "step": 11650 + }, + { + "epoch": 0.54, + "learning_rate": 1.8331008104337467e-05, + "loss": 0.2767, + "step": 11655 + }, + { + "epoch": 0.54, + "learning_rate": 1.833022431928268e-05, + "loss": 0.1602, + "step": 11660 + }, + { + "epoch": 0.54, + "learning_rate": 1.8329440534227895e-05, + "loss": 0.64, + "step": 11665 + }, + { + "epoch": 0.54, + "learning_rate": 1.8328813506184066e-05, + "loss": 0.128, + "step": 11670 + }, + { + "epoch": 0.54, + "learning_rate": 1.832802972112928e-05, + "loss": 0.1629, + "step": 11675 + }, + { + "epoch": 0.55, + "learning_rate": 1.832724593607449e-05, + "loss": 0.2992, + "step": 11680 + }, + { + "epoch": 0.55, + "learning_rate": 1.8326462151019707e-05, + "loss": 0.2846, + "step": 11685 + }, + { + "epoch": 0.55, + "learning_rate": 1.8325678365964918e-05, + "loss": 0.283, + "step": 11690 + }, + { + "epoch": 0.55, + "learning_rate": 1.8324894580910132e-05, + "loss": 0.5153, + "step": 11695 + }, + { + "epoch": 0.55, + "learning_rate": 1.8324110795855346e-05, + "loss": 0.5733, + "step": 11700 + }, + { + "epoch": 0.55, + "learning_rate": 1.832332701080056e-05, + "loss": 0.2492, + "step": 11705 + }, + { + "epoch": 0.55, + "learning_rate": 1.8322543225745773e-05, + "loss": 0.0945, + "step": 11710 + }, + { + "epoch": 0.55, + "learning_rate": 1.8321759440690987e-05, + "loss": 0.0858, + "step": 11715 + }, + { + "epoch": 0.55, + "learning_rate": 1.8320975655636198e-05, + "loss": 0.1475, + "step": 11720 + }, + { + "epoch": 0.55, + "learning_rate": 1.8320191870581415e-05, + "loss": 0.1625, + "step": 11725 + }, + { + "epoch": 0.55, + "learning_rate": 1.8319408085526626e-05, + "loss": 0.2204, + "step": 11730 + }, + { + "epoch": 0.55, + "learning_rate": 1.831862430047184e-05, + "loss": 0.236, + "step": 11735 + }, + { + "epoch": 0.55, + "learning_rate": 1.8317840515417053e-05, + "loss": 0.32, + "step": 11740 + }, + { + "epoch": 0.55, + "learning_rate": 1.8317056730362267e-05, + "loss": 0.4229, + "step": 11745 + }, + { + "epoch": 0.55, + "learning_rate": 1.831627294530748e-05, + "loss": 0.5182, + "step": 11750 + }, + { + "epoch": 0.55, + "learning_rate": 1.8315489160252692e-05, + "loss": 0.2982, + "step": 11755 + }, + { + "epoch": 0.55, + "learning_rate": 1.831470537519791e-05, + "loss": 0.0763, + "step": 11760 + }, + { + "epoch": 0.55, + "learning_rate": 1.831392159014312e-05, + "loss": 0.0989, + "step": 11765 + }, + { + "epoch": 0.55, + "learning_rate": 1.8313137805088334e-05, + "loss": 0.1841, + "step": 11770 + }, + { + "epoch": 0.55, + "learning_rate": 1.8312354020033547e-05, + "loss": 0.1474, + "step": 11775 + }, + { + "epoch": 0.55, + "learning_rate": 1.831157023497876e-05, + "loss": 0.186, + "step": 11780 + }, + { + "epoch": 0.55, + "learning_rate": 1.8310786449923975e-05, + "loss": 0.2091, + "step": 11785 + }, + { + "epoch": 0.55, + "learning_rate": 1.831000266486919e-05, + "loss": 0.2279, + "step": 11790 + }, + { + "epoch": 0.55, + "learning_rate": 1.83092188798144e-05, + "loss": 0.3051, + "step": 11795 + }, + { + "epoch": 0.55, + "learning_rate": 1.8308435094759617e-05, + "loss": 0.7879, + "step": 11800 + }, + { + "epoch": 0.55, + "learning_rate": 1.8307651309704827e-05, + "loss": 0.2961, + "step": 11805 + }, + { + "epoch": 0.55, + "learning_rate": 1.830686752465004e-05, + "loss": 0.1053, + "step": 11810 + }, + { + "epoch": 0.55, + "learning_rate": 1.8306083739595255e-05, + "loss": 0.1465, + "step": 11815 + }, + { + "epoch": 0.55, + "learning_rate": 1.8305299954540466e-05, + "loss": 0.1971, + "step": 11820 + }, + { + "epoch": 0.55, + "learning_rate": 1.8304516169485683e-05, + "loss": 0.1379, + "step": 11825 + }, + { + "epoch": 0.55, + "learning_rate": 1.8303732384430894e-05, + "loss": 0.1948, + "step": 11830 + }, + { + "epoch": 0.55, + "learning_rate": 1.8302948599376108e-05, + "loss": 0.3947, + "step": 11835 + }, + { + "epoch": 0.55, + "learning_rate": 1.830216481432132e-05, + "loss": 0.3241, + "step": 11840 + }, + { + "epoch": 0.55, + "learning_rate": 1.8301381029266535e-05, + "loss": 0.4234, + "step": 11845 + }, + { + "epoch": 0.55, + "learning_rate": 1.830059724421175e-05, + "loss": 0.7616, + "step": 11850 + }, + { + "epoch": 0.55, + "learning_rate": 1.8299813459156963e-05, + "loss": 0.3111, + "step": 11855 + }, + { + "epoch": 0.55, + "learning_rate": 1.8299029674102177e-05, + "loss": 0.0971, + "step": 11860 + }, + { + "epoch": 0.55, + "learning_rate": 1.829824588904739e-05, + "loss": 0.0742, + "step": 11865 + }, + { + "epoch": 0.55, + "learning_rate": 1.82974621039926e-05, + "loss": 0.1005, + "step": 11870 + }, + { + "epoch": 0.55, + "learning_rate": 1.8296678318937815e-05, + "loss": 0.1713, + "step": 11875 + }, + { + "epoch": 0.55, + "learning_rate": 1.829589453388303e-05, + "loss": 0.2496, + "step": 11880 + }, + { + "epoch": 0.55, + "learning_rate": 1.82952675058392e-05, + "loss": 0.2245, + "step": 11885 + }, + { + "epoch": 0.55, + "learning_rate": 1.8294483720784414e-05, + "loss": 0.3005, + "step": 11890 + }, + { + "epoch": 0.56, + "learning_rate": 1.8293699935729628e-05, + "loss": 0.4579, + "step": 11895 + }, + { + "epoch": 0.56, + "learning_rate": 1.829291615067484e-05, + "loss": 0.7279, + "step": 11900 + }, + { + "epoch": 0.56, + "learning_rate": 1.8292132365620055e-05, + "loss": 0.177, + "step": 11905 + }, + { + "epoch": 0.56, + "learning_rate": 1.8291348580565266e-05, + "loss": 0.1003, + "step": 11910 + }, + { + "epoch": 0.56, + "learning_rate": 1.829056479551048e-05, + "loss": 0.1564, + "step": 11915 + }, + { + "epoch": 0.56, + "learning_rate": 1.8289781010455694e-05, + "loss": 0.233, + "step": 11920 + }, + { + "epoch": 0.56, + "learning_rate": 1.8288997225400908e-05, + "loss": 0.1809, + "step": 11925 + }, + { + "epoch": 0.56, + "learning_rate": 1.828821344034612e-05, + "loss": 0.2469, + "step": 11930 + }, + { + "epoch": 0.56, + "learning_rate": 1.8287429655291332e-05, + "loss": 0.2773, + "step": 11935 + }, + { + "epoch": 0.56, + "learning_rate": 1.828664587023655e-05, + "loss": 0.3309, + "step": 11940 + }, + { + "epoch": 0.56, + "learning_rate": 1.828586208518176e-05, + "loss": 0.25, + "step": 11945 + }, + { + "epoch": 0.56, + "learning_rate": 1.8285078300126974e-05, + "loss": 0.4452, + "step": 11950 + }, + { + "epoch": 0.56, + "learning_rate": 1.8284294515072188e-05, + "loss": 0.2698, + "step": 11955 + }, + { + "epoch": 0.56, + "learning_rate": 1.82835107300174e-05, + "loss": 0.0781, + "step": 11960 + }, + { + "epoch": 0.56, + "learning_rate": 1.8282726944962615e-05, + "loss": 0.1029, + "step": 11965 + }, + { + "epoch": 0.56, + "learning_rate": 1.828194315990783e-05, + "loss": 0.1722, + "step": 11970 + }, + { + "epoch": 0.56, + "learning_rate": 1.828115937485304e-05, + "loss": 0.1502, + "step": 11975 + }, + { + "epoch": 0.56, + "learning_rate": 1.8280375589798257e-05, + "loss": 0.2189, + "step": 11980 + }, + { + "epoch": 0.56, + "learning_rate": 1.8279591804743468e-05, + "loss": 0.1885, + "step": 11985 + }, + { + "epoch": 0.56, + "learning_rate": 1.827880801968868e-05, + "loss": 0.1883, + "step": 11990 + }, + { + "epoch": 0.56, + "learning_rate": 1.8278024234633896e-05, + "loss": 0.3488, + "step": 11995 + }, + { + "epoch": 0.56, + "learning_rate": 1.827724044957911e-05, + "loss": 0.6505, + "step": 12000 + }, + { + "epoch": 0.56, + "learning_rate": 1.8276456664524323e-05, + "loss": 0.2543, + "step": 12005 + }, + { + "epoch": 0.56, + "learning_rate": 1.8275672879469534e-05, + "loss": 0.1051, + "step": 12010 + }, + { + "epoch": 0.56, + "learning_rate": 1.827488909441475e-05, + "loss": 0.106, + "step": 12015 + }, + { + "epoch": 0.56, + "learning_rate": 1.827410530935996e-05, + "loss": 0.1483, + "step": 12020 + }, + { + "epoch": 0.56, + "learning_rate": 1.8273321524305176e-05, + "loss": 0.2022, + "step": 12025 + }, + { + "epoch": 0.56, + "learning_rate": 1.827253773925039e-05, + "loss": 0.2002, + "step": 12030 + }, + { + "epoch": 0.56, + "learning_rate": 1.8271753954195603e-05, + "loss": 0.2563, + "step": 12035 + }, + { + "epoch": 0.56, + "learning_rate": 1.8270970169140817e-05, + "loss": 0.2597, + "step": 12040 + }, + { + "epoch": 0.56, + "learning_rate": 1.827018638408603e-05, + "loss": 0.429, + "step": 12045 + }, + { + "epoch": 0.56, + "learning_rate": 1.8269402599031242e-05, + "loss": 0.5843, + "step": 12050 + }, + { + "epoch": 0.56, + "learning_rate": 1.826861881397646e-05, + "loss": 0.2978, + "step": 12055 + }, + { + "epoch": 0.56, + "learning_rate": 1.826783502892167e-05, + "loss": 0.1374, + "step": 12060 + }, + { + "epoch": 0.56, + "learning_rate": 1.8267051243866883e-05, + "loss": 0.1817, + "step": 12065 + }, + { + "epoch": 0.56, + "learning_rate": 1.8266267458812097e-05, + "loss": 0.0982, + "step": 12070 + }, + { + "epoch": 0.56, + "learning_rate": 1.8265483673757308e-05, + "loss": 0.2185, + "step": 12075 + }, + { + "epoch": 0.56, + "learning_rate": 1.8264699888702525e-05, + "loss": 0.1185, + "step": 12080 + }, + { + "epoch": 0.56, + "learning_rate": 1.8263916103647736e-05, + "loss": 0.2142, + "step": 12085 + }, + { + "epoch": 0.56, + "learning_rate": 1.826313231859295e-05, + "loss": 0.3277, + "step": 12090 + }, + { + "epoch": 0.56, + "learning_rate": 1.8262348533538163e-05, + "loss": 0.4278, + "step": 12095 + }, + { + "epoch": 0.56, + "learning_rate": 1.8261564748483377e-05, + "loss": 0.5751, + "step": 12100 + }, + { + "epoch": 0.56, + "learning_rate": 1.826078096342859e-05, + "loss": 0.3273, + "step": 12105 + }, + { + "epoch": 0.57, + "learning_rate": 1.8259997178373805e-05, + "loss": 0.0808, + "step": 12110 + }, + { + "epoch": 0.57, + "learning_rate": 1.825921339331902e-05, + "loss": 0.1315, + "step": 12115 + }, + { + "epoch": 0.57, + "learning_rate": 1.8258429608264233e-05, + "loss": 0.155, + "step": 12120 + }, + { + "epoch": 0.57, + "learning_rate": 1.8257645823209444e-05, + "loss": 0.1783, + "step": 12125 + }, + { + "epoch": 0.57, + "learning_rate": 1.8256862038154657e-05, + "loss": 0.2643, + "step": 12130 + }, + { + "epoch": 0.57, + "learning_rate": 1.825607825309987e-05, + "loss": 0.2056, + "step": 12135 + }, + { + "epoch": 0.57, + "learning_rate": 1.8255294468045085e-05, + "loss": 0.3616, + "step": 12140 + }, + { + "epoch": 0.57, + "learning_rate": 1.82545106829903e-05, + "loss": 0.4022, + "step": 12145 + }, + { + "epoch": 0.57, + "learning_rate": 1.825372689793551e-05, + "loss": 0.6273, + "step": 12150 + }, + { + "epoch": 0.57, + "learning_rate": 1.8252943112880727e-05, + "loss": 0.2924, + "step": 12155 + }, + { + "epoch": 0.57, + "learning_rate": 1.8252159327825937e-05, + "loss": 0.1846, + "step": 12160 + }, + { + "epoch": 0.57, + "learning_rate": 1.825137554277115e-05, + "loss": 0.1085, + "step": 12165 + }, + { + "epoch": 0.57, + "learning_rate": 1.8250591757716365e-05, + "loss": 0.1888, + "step": 12170 + }, + { + "epoch": 0.57, + "learning_rate": 1.824980797266158e-05, + "loss": 0.1602, + "step": 12175 + }, + { + "epoch": 0.57, + "learning_rate": 1.8249024187606793e-05, + "loss": 0.3114, + "step": 12180 + }, + { + "epoch": 0.57, + "learning_rate": 1.8248240402552007e-05, + "loss": 0.2241, + "step": 12185 + }, + { + "epoch": 0.57, + "learning_rate": 1.8247456617497218e-05, + "loss": 0.3146, + "step": 12190 + }, + { + "epoch": 0.57, + "learning_rate": 1.824667283244243e-05, + "loss": 0.3433, + "step": 12195 + }, + { + "epoch": 0.57, + "learning_rate": 1.8245889047387645e-05, + "loss": 0.3809, + "step": 12200 + }, + { + "epoch": 0.57, + "learning_rate": 1.824510526233286e-05, + "loss": 0.2425, + "step": 12205 + }, + { + "epoch": 0.57, + "learning_rate": 1.8244321477278073e-05, + "loss": 0.0895, + "step": 12210 + }, + { + "epoch": 0.57, + "learning_rate": 1.8243537692223287e-05, + "loss": 0.1157, + "step": 12215 + }, + { + "epoch": 0.57, + "learning_rate": 1.82427539071685e-05, + "loss": 0.1821, + "step": 12220 + }, + { + "epoch": 0.57, + "learning_rate": 1.824197012211371e-05, + "loss": 0.2085, + "step": 12225 + }, + { + "epoch": 0.57, + "learning_rate": 1.8241186337058925e-05, + "loss": 0.2786, + "step": 12230 + }, + { + "epoch": 0.57, + "learning_rate": 1.824040255200414e-05, + "loss": 0.2232, + "step": 12235 + }, + { + "epoch": 0.57, + "learning_rate": 1.8239618766949353e-05, + "loss": 0.1914, + "step": 12240 + }, + { + "epoch": 0.57, + "learning_rate": 1.8238834981894567e-05, + "loss": 0.4057, + "step": 12245 + }, + { + "epoch": 0.57, + "learning_rate": 1.823805119683978e-05, + "loss": 0.5458, + "step": 12250 + }, + { + "epoch": 0.57, + "learning_rate": 1.8237267411784995e-05, + "loss": 0.2761, + "step": 12255 + }, + { + "epoch": 0.57, + "learning_rate": 1.8236483626730205e-05, + "loss": 0.1128, + "step": 12260 + }, + { + "epoch": 0.57, + "learning_rate": 1.823569984167542e-05, + "loss": 0.1394, + "step": 12265 + }, + { + "epoch": 0.57, + "learning_rate": 1.8234916056620633e-05, + "loss": 0.1571, + "step": 12270 + }, + { + "epoch": 0.57, + "learning_rate": 1.8234132271565847e-05, + "loss": 0.2344, + "step": 12275 + }, + { + "epoch": 0.57, + "learning_rate": 1.823334848651106e-05, + "loss": 0.2405, + "step": 12280 + }, + { + "epoch": 0.57, + "learning_rate": 1.8232564701456275e-05, + "loss": 0.2111, + "step": 12285 + }, + { + "epoch": 0.57, + "learning_rate": 1.8231780916401485e-05, + "loss": 0.2919, + "step": 12290 + }, + { + "epoch": 0.57, + "learning_rate": 1.8230997131346703e-05, + "loss": 0.461, + "step": 12295 + }, + { + "epoch": 0.57, + "learning_rate": 1.8230213346291913e-05, + "loss": 0.4065, + "step": 12300 + }, + { + "epoch": 0.57, + "learning_rate": 1.8229429561237127e-05, + "loss": 0.2381, + "step": 12305 + }, + { + "epoch": 0.57, + "learning_rate": 1.822864577618234e-05, + "loss": 0.0971, + "step": 12310 + }, + { + "epoch": 0.57, + "learning_rate": 1.8227861991127555e-05, + "loss": 0.1587, + "step": 12315 + }, + { + "epoch": 0.57, + "learning_rate": 1.822707820607277e-05, + "loss": 0.221, + "step": 12320 + }, + { + "epoch": 0.58, + "learning_rate": 1.822629442101798e-05, + "loss": 0.1664, + "step": 12325 + }, + { + "epoch": 0.58, + "learning_rate": 1.8225510635963197e-05, + "loss": 0.2185, + "step": 12330 + }, + { + "epoch": 0.58, + "learning_rate": 1.8224726850908407e-05, + "loss": 0.2366, + "step": 12335 + }, + { + "epoch": 0.58, + "learning_rate": 1.822394306585362e-05, + "loss": 0.3796, + "step": 12340 + }, + { + "epoch": 0.58, + "learning_rate": 1.8223159280798835e-05, + "loss": 0.3175, + "step": 12345 + }, + { + "epoch": 0.58, + "learning_rate": 1.822237549574405e-05, + "loss": 0.6086, + "step": 12350 + }, + { + "epoch": 0.58, + "learning_rate": 1.8221591710689263e-05, + "loss": 0.2234, + "step": 12355 + }, + { + "epoch": 0.58, + "learning_rate": 1.8220807925634477e-05, + "loss": 0.1107, + "step": 12360 + }, + { + "epoch": 0.58, + "learning_rate": 1.8220024140579687e-05, + "loss": 0.1629, + "step": 12365 + }, + { + "epoch": 0.58, + "learning_rate": 1.8219240355524905e-05, + "loss": 0.1813, + "step": 12370 + }, + { + "epoch": 0.58, + "learning_rate": 1.8218456570470115e-05, + "loss": 0.1278, + "step": 12375 + }, + { + "epoch": 0.58, + "learning_rate": 1.821767278541533e-05, + "loss": 0.2862, + "step": 12380 + }, + { + "epoch": 0.58, + "learning_rate": 1.8216889000360543e-05, + "loss": 0.1883, + "step": 12385 + }, + { + "epoch": 0.58, + "learning_rate": 1.8216105215305753e-05, + "loss": 0.3023, + "step": 12390 + }, + { + "epoch": 0.58, + "learning_rate": 1.821532143025097e-05, + "loss": 0.3543, + "step": 12395 + }, + { + "epoch": 0.58, + "learning_rate": 1.821453764519618e-05, + "loss": 0.6779, + "step": 12400 + }, + { + "epoch": 0.58, + "learning_rate": 1.8213753860141395e-05, + "loss": 0.2855, + "step": 12405 + }, + { + "epoch": 0.58, + "learning_rate": 1.821297007508661e-05, + "loss": 0.1078, + "step": 12410 + }, + { + "epoch": 0.58, + "learning_rate": 1.8212186290031823e-05, + "loss": 0.0658, + "step": 12415 + }, + { + "epoch": 0.58, + "learning_rate": 1.8211402504977037e-05, + "loss": 0.1846, + "step": 12420 + }, + { + "epoch": 0.58, + "learning_rate": 1.821061871992225e-05, + "loss": 0.2117, + "step": 12425 + }, + { + "epoch": 0.58, + "learning_rate": 1.8209834934867465e-05, + "loss": 0.2137, + "step": 12430 + }, + { + "epoch": 0.58, + "learning_rate": 1.820905114981268e-05, + "loss": 0.2952, + "step": 12435 + }, + { + "epoch": 0.58, + "learning_rate": 1.820826736475789e-05, + "loss": 0.3457, + "step": 12440 + }, + { + "epoch": 0.58, + "learning_rate": 1.8207483579703103e-05, + "loss": 0.3901, + "step": 12445 + }, + { + "epoch": 0.58, + "learning_rate": 1.8206699794648317e-05, + "loss": 0.4563, + "step": 12450 + }, + { + "epoch": 0.58, + "learning_rate": 1.820591600959353e-05, + "loss": 0.3036, + "step": 12455 + }, + { + "epoch": 0.58, + "learning_rate": 1.8205132224538745e-05, + "loss": 0.1384, + "step": 12460 + }, + { + "epoch": 0.58, + "learning_rate": 1.8204348439483955e-05, + "loss": 0.1121, + "step": 12465 + }, + { + "epoch": 0.58, + "learning_rate": 1.8203564654429173e-05, + "loss": 0.1626, + "step": 12470 + }, + { + "epoch": 0.58, + "learning_rate": 1.8202780869374383e-05, + "loss": 0.1478, + "step": 12475 + }, + { + "epoch": 0.58, + "learning_rate": 1.8201997084319597e-05, + "loss": 0.2527, + "step": 12480 + }, + { + "epoch": 0.58, + "learning_rate": 1.820121329926481e-05, + "loss": 0.2802, + "step": 12485 + }, + { + "epoch": 0.58, + "learning_rate": 1.8200429514210025e-05, + "loss": 0.3921, + "step": 12490 + }, + { + "epoch": 0.58, + "learning_rate": 1.819964572915524e-05, + "loss": 0.3346, + "step": 12495 + }, + { + "epoch": 0.58, + "learning_rate": 1.8198861944100453e-05, + "loss": 0.5112, + "step": 12500 + }, + { + "epoch": 0.58, + "learning_rate": 1.8198078159045663e-05, + "loss": 0.2538, + "step": 12505 + }, + { + "epoch": 0.58, + "learning_rate": 1.819729437399088e-05, + "loss": 0.0939, + "step": 12510 + }, + { + "epoch": 0.58, + "learning_rate": 1.819651058893609e-05, + "loss": 0.1042, + "step": 12515 + }, + { + "epoch": 0.58, + "learning_rate": 1.8195726803881305e-05, + "loss": 0.1335, + "step": 12520 + }, + { + "epoch": 0.58, + "learning_rate": 1.819494301882652e-05, + "loss": 0.1319, + "step": 12525 + }, + { + "epoch": 0.58, + "learning_rate": 1.8194159233771733e-05, + "loss": 0.1862, + "step": 12530 + }, + { + "epoch": 0.58, + "learning_rate": 1.8193375448716947e-05, + "loss": 0.2162, + "step": 12535 + }, + { + "epoch": 0.59, + "learning_rate": 1.8192591663662157e-05, + "loss": 0.3461, + "step": 12540 + }, + { + "epoch": 0.59, + "learning_rate": 1.819180787860737e-05, + "loss": 0.2864, + "step": 12545 + }, + { + "epoch": 0.59, + "learning_rate": 1.8191024093552585e-05, + "loss": 0.5461, + "step": 12550 + }, + { + "epoch": 0.59, + "learning_rate": 1.81902403084978e-05, + "loss": 0.2668, + "step": 12555 + }, + { + "epoch": 0.59, + "learning_rate": 1.8189456523443013e-05, + "loss": 0.0315, + "step": 12560 + }, + { + "epoch": 0.59, + "learning_rate": 1.8188672738388227e-05, + "loss": 0.0844, + "step": 12565 + }, + { + "epoch": 0.59, + "learning_rate": 1.818788895333344e-05, + "loss": 0.193, + "step": 12570 + }, + { + "epoch": 0.59, + "learning_rate": 1.8187105168278654e-05, + "loss": 0.1365, + "step": 12575 + }, + { + "epoch": 0.59, + "learning_rate": 1.8186321383223865e-05, + "loss": 0.1781, + "step": 12580 + }, + { + "epoch": 0.59, + "learning_rate": 1.8185537598169082e-05, + "loss": 0.331, + "step": 12585 + }, + { + "epoch": 0.59, + "learning_rate": 1.8184753813114293e-05, + "loss": 0.2479, + "step": 12590 + }, + { + "epoch": 0.59, + "learning_rate": 1.8183970028059507e-05, + "loss": 0.3348, + "step": 12595 + }, + { + "epoch": 0.59, + "learning_rate": 1.818318624300472e-05, + "loss": 0.5393, + "step": 12600 + }, + { + "epoch": 0.59, + "learning_rate": 1.818240245794993e-05, + "loss": 0.2076, + "step": 12605 + }, + { + "epoch": 0.59, + "learning_rate": 1.818161867289515e-05, + "loss": 0.0844, + "step": 12610 + }, + { + "epoch": 0.59, + "learning_rate": 1.818083488784036e-05, + "loss": 0.1753, + "step": 12615 + }, + { + "epoch": 0.59, + "learning_rate": 1.8180051102785573e-05, + "loss": 0.1373, + "step": 12620 + }, + { + "epoch": 0.59, + "learning_rate": 1.8179267317730787e-05, + "loss": 0.2813, + "step": 12625 + }, + { + "epoch": 0.59, + "learning_rate": 1.8178483532676e-05, + "loss": 0.2083, + "step": 12630 + }, + { + "epoch": 0.59, + "learning_rate": 1.8177699747621214e-05, + "loss": 0.3167, + "step": 12635 + }, + { + "epoch": 0.59, + "learning_rate": 1.817691596256643e-05, + "loss": 0.2749, + "step": 12640 + }, + { + "epoch": 0.59, + "learning_rate": 1.8176132177511642e-05, + "loss": 0.421, + "step": 12645 + }, + { + "epoch": 0.59, + "learning_rate": 1.8175348392456856e-05, + "loss": 0.6261, + "step": 12650 + }, + { + "epoch": 0.59, + "learning_rate": 1.8174564607402067e-05, + "loss": 0.2091, + "step": 12655 + }, + { + "epoch": 0.59, + "learning_rate": 1.817378082234728e-05, + "loss": 0.1761, + "step": 12660 + }, + { + "epoch": 0.59, + "learning_rate": 1.8172997037292495e-05, + "loss": 0.1845, + "step": 12665 + }, + { + "epoch": 0.59, + "learning_rate": 1.817221325223771e-05, + "loss": 0.1495, + "step": 12670 + }, + { + "epoch": 0.59, + "learning_rate": 1.8171429467182922e-05, + "loss": 0.183, + "step": 12675 + }, + { + "epoch": 0.59, + "learning_rate": 1.8170645682128133e-05, + "loss": 0.1456, + "step": 12680 + }, + { + "epoch": 0.59, + "learning_rate": 1.816986189707335e-05, + "loss": 0.3501, + "step": 12685 + }, + { + "epoch": 0.59, + "learning_rate": 1.816907811201856e-05, + "loss": 0.2666, + "step": 12690 + }, + { + "epoch": 0.59, + "learning_rate": 1.8168294326963775e-05, + "loss": 0.2329, + "step": 12695 + }, + { + "epoch": 0.59, + "learning_rate": 1.816751054190899e-05, + "loss": 0.5528, + "step": 12700 + }, + { + "epoch": 0.59, + "learning_rate": 1.8166726756854202e-05, + "loss": 0.2247, + "step": 12705 + }, + { + "epoch": 0.59, + "learning_rate": 1.8165942971799416e-05, + "loss": 0.1046, + "step": 12710 + }, + { + "epoch": 0.59, + "learning_rate": 1.816515918674463e-05, + "loss": 0.1533, + "step": 12715 + }, + { + "epoch": 0.59, + "learning_rate": 1.816437540168984e-05, + "loss": 0.1581, + "step": 12720 + }, + { + "epoch": 0.59, + "learning_rate": 1.8163591616635055e-05, + "loss": 0.1386, + "step": 12725 + }, + { + "epoch": 0.59, + "learning_rate": 1.816280783158027e-05, + "loss": 0.1616, + "step": 12730 + }, + { + "epoch": 0.59, + "learning_rate": 1.8162024046525482e-05, + "loss": 0.1669, + "step": 12735 + }, + { + "epoch": 0.59, + "learning_rate": 1.8161240261470696e-05, + "loss": 0.2804, + "step": 12740 + }, + { + "epoch": 0.59, + "learning_rate": 1.816045647641591e-05, + "loss": 0.4752, + "step": 12745 + }, + { + "epoch": 0.59, + "learning_rate": 1.8159672691361124e-05, + "loss": 0.6178, + "step": 12750 + }, + { + "epoch": 0.6, + "learning_rate": 1.8158888906306335e-05, + "loss": 0.214, + "step": 12755 + }, + { + "epoch": 0.6, + "learning_rate": 1.815810512125155e-05, + "loss": 0.1284, + "step": 12760 + }, + { + "epoch": 0.6, + "learning_rate": 1.8157321336196762e-05, + "loss": 0.0707, + "step": 12765 + }, + { + "epoch": 0.6, + "learning_rate": 1.8156537551141976e-05, + "loss": 0.0857, + "step": 12770 + }, + { + "epoch": 0.6, + "learning_rate": 1.815575376608719e-05, + "loss": 0.162, + "step": 12775 + }, + { + "epoch": 0.6, + "learning_rate": 1.8154969981032404e-05, + "loss": 0.1753, + "step": 12780 + }, + { + "epoch": 0.6, + "learning_rate": 1.8154186195977618e-05, + "loss": 0.3083, + "step": 12785 + }, + { + "epoch": 0.6, + "learning_rate": 1.815340241092283e-05, + "loss": 0.359, + "step": 12790 + }, + { + "epoch": 0.6, + "learning_rate": 1.8152618625868043e-05, + "loss": 0.3942, + "step": 12795 + }, + { + "epoch": 0.6, + "learning_rate": 1.8151834840813256e-05, + "loss": 0.5581, + "step": 12800 + }, + { + "epoch": 0.6, + "learning_rate": 1.815105105575847e-05, + "loss": 0.275, + "step": 12805 + }, + { + "epoch": 0.6, + "learning_rate": 1.8150267270703684e-05, + "loss": 0.0976, + "step": 12810 + }, + { + "epoch": 0.6, + "learning_rate": 1.8149483485648898e-05, + "loss": 0.1869, + "step": 12815 + }, + { + "epoch": 0.6, + "learning_rate": 1.814869970059411e-05, + "loss": 0.1546, + "step": 12820 + }, + { + "epoch": 0.6, + "learning_rate": 1.8147915915539326e-05, + "loss": 0.1551, + "step": 12825 + }, + { + "epoch": 0.6, + "learning_rate": 1.8147132130484536e-05, + "loss": 0.1323, + "step": 12830 + }, + { + "epoch": 0.6, + "learning_rate": 1.814634834542975e-05, + "loss": 0.199, + "step": 12835 + }, + { + "epoch": 0.6, + "learning_rate": 1.8145564560374964e-05, + "loss": 0.2718, + "step": 12840 + }, + { + "epoch": 0.6, + "learning_rate": 1.8144780775320178e-05, + "loss": 0.358, + "step": 12845 + }, + { + "epoch": 0.6, + "learning_rate": 1.8143996990265392e-05, + "loss": 0.7596, + "step": 12850 + }, + { + "epoch": 0.6, + "learning_rate": 1.8143213205210603e-05, + "loss": 0.1977, + "step": 12855 + }, + { + "epoch": 0.6, + "learning_rate": 1.814242942015582e-05, + "loss": 0.1171, + "step": 12860 + }, + { + "epoch": 0.6, + "learning_rate": 1.814164563510103e-05, + "loss": 0.117, + "step": 12865 + }, + { + "epoch": 0.6, + "learning_rate": 1.8140861850046244e-05, + "loss": 0.1249, + "step": 12870 + }, + { + "epoch": 0.6, + "learning_rate": 1.8140078064991458e-05, + "loss": 0.2719, + "step": 12875 + }, + { + "epoch": 0.6, + "learning_rate": 1.8139294279936672e-05, + "loss": 0.1671, + "step": 12880 + }, + { + "epoch": 0.6, + "learning_rate": 1.8138510494881886e-05, + "loss": 0.2753, + "step": 12885 + }, + { + "epoch": 0.6, + "learning_rate": 1.81377267098271e-05, + "loss": 0.2222, + "step": 12890 + }, + { + "epoch": 0.6, + "learning_rate": 1.813694292477231e-05, + "loss": 0.3789, + "step": 12895 + }, + { + "epoch": 0.6, + "learning_rate": 1.8136159139717528e-05, + "loss": 0.5476, + "step": 12900 + }, + { + "epoch": 0.6, + "learning_rate": 1.8135375354662738e-05, + "loss": 0.2506, + "step": 12905 + }, + { + "epoch": 0.6, + "learning_rate": 1.8134591569607952e-05, + "loss": 0.1047, + "step": 12910 + }, + { + "epoch": 0.6, + "learning_rate": 1.8133807784553166e-05, + "loss": 0.1184, + "step": 12915 + }, + { + "epoch": 0.6, + "learning_rate": 1.8133023999498377e-05, + "loss": 0.1366, + "step": 12920 + }, + { + "epoch": 0.6, + "learning_rate": 1.8132240214443594e-05, + "loss": 0.1905, + "step": 12925 + }, + { + "epoch": 0.6, + "learning_rate": 1.8131456429388804e-05, + "loss": 0.1637, + "step": 12930 + }, + { + "epoch": 0.6, + "learning_rate": 1.813067264433402e-05, + "loss": 0.2118, + "step": 12935 + }, + { + "epoch": 0.6, + "learning_rate": 1.8129888859279232e-05, + "loss": 0.4517, + "step": 12940 + }, + { + "epoch": 0.6, + "learning_rate": 1.8129105074224446e-05, + "loss": 0.3792, + "step": 12945 + }, + { + "epoch": 0.6, + "learning_rate": 1.812832128916966e-05, + "loss": 0.6719, + "step": 12950 + }, + { + "epoch": 0.6, + "learning_rate": 1.8127537504114874e-05, + "loss": 0.1911, + "step": 12955 + }, + { + "epoch": 0.6, + "learning_rate": 1.8126753719060088e-05, + "loss": 0.0936, + "step": 12960 + }, + { + "epoch": 0.6, + "learning_rate": 1.8125969934005302e-05, + "loss": 0.0923, + "step": 12965 + }, + { + "epoch": 0.61, + "learning_rate": 1.8125186148950512e-05, + "loss": 0.184, + "step": 12970 + }, + { + "epoch": 0.61, + "learning_rate": 1.8124402363895726e-05, + "loss": 0.1625, + "step": 12975 + }, + { + "epoch": 0.61, + "learning_rate": 1.812361857884094e-05, + "loss": 0.1753, + "step": 12980 + }, + { + "epoch": 0.61, + "learning_rate": 1.8122834793786154e-05, + "loss": 0.2652, + "step": 12985 + }, + { + "epoch": 0.61, + "learning_rate": 1.8122051008731368e-05, + "loss": 0.2203, + "step": 12990 + }, + { + "epoch": 0.61, + "learning_rate": 1.812126722367658e-05, + "loss": 0.3814, + "step": 12995 + }, + { + "epoch": 0.61, + "learning_rate": 1.8120483438621796e-05, + "loss": 0.613, + "step": 13000 + }, + { + "epoch": 0.61, + "learning_rate": 1.8119699653567006e-05, + "loss": 0.2086, + "step": 13005 + }, + { + "epoch": 0.61, + "learning_rate": 1.811891586851222e-05, + "loss": 0.1071, + "step": 13010 + }, + { + "epoch": 0.61, + "learning_rate": 1.8118132083457434e-05, + "loss": 0.1252, + "step": 13015 + }, + { + "epoch": 0.61, + "learning_rate": 1.8117348298402648e-05, + "loss": 0.1397, + "step": 13020 + }, + { + "epoch": 0.61, + "learning_rate": 1.8116564513347862e-05, + "loss": 0.1292, + "step": 13025 + }, + { + "epoch": 0.61, + "learning_rate": 1.8115780728293076e-05, + "loss": 0.2849, + "step": 13030 + }, + { + "epoch": 0.61, + "learning_rate": 1.8114996943238286e-05, + "loss": 0.1862, + "step": 13035 + }, + { + "epoch": 0.61, + "learning_rate": 1.8114213158183504e-05, + "loss": 0.2616, + "step": 13040 + }, + { + "epoch": 0.61, + "learning_rate": 1.8113429373128714e-05, + "loss": 0.2944, + "step": 13045 + }, + { + "epoch": 0.61, + "learning_rate": 1.8112645588073928e-05, + "loss": 0.4961, + "step": 13050 + }, + { + "epoch": 0.61, + "learning_rate": 1.8111861803019142e-05, + "loss": 0.2485, + "step": 13055 + }, + { + "epoch": 0.61, + "learning_rate": 1.8111078017964356e-05, + "loss": 0.0993, + "step": 13060 + }, + { + "epoch": 0.61, + "learning_rate": 1.811029423290957e-05, + "loss": 0.1454, + "step": 13065 + }, + { + "epoch": 0.61, + "learning_rate": 1.810951044785478e-05, + "loss": 0.1368, + "step": 13070 + }, + { + "epoch": 0.61, + "learning_rate": 1.8108726662799994e-05, + "loss": 0.178, + "step": 13075 + }, + { + "epoch": 0.61, + "learning_rate": 1.8107942877745208e-05, + "loss": 0.2138, + "step": 13080 + }, + { + "epoch": 0.61, + "learning_rate": 1.8107159092690422e-05, + "loss": 0.2884, + "step": 13085 + }, + { + "epoch": 0.61, + "learning_rate": 1.8106375307635636e-05, + "loss": 0.2957, + "step": 13090 + }, + { + "epoch": 0.61, + "learning_rate": 1.810559152258085e-05, + "loss": 0.3439, + "step": 13095 + }, + { + "epoch": 0.61, + "learning_rate": 1.8104807737526064e-05, + "loss": 0.3735, + "step": 13100 + }, + { + "epoch": 0.61, + "learning_rate": 1.8104023952471278e-05, + "loss": 0.2341, + "step": 13105 + }, + { + "epoch": 0.61, + "learning_rate": 1.8103240167416488e-05, + "loss": 0.1029, + "step": 13110 + }, + { + "epoch": 0.61, + "learning_rate": 1.8102456382361702e-05, + "loss": 0.1167, + "step": 13115 + }, + { + "epoch": 0.61, + "learning_rate": 1.8101672597306916e-05, + "loss": 0.1298, + "step": 13120 + }, + { + "epoch": 0.61, + "learning_rate": 1.810088881225213e-05, + "loss": 0.1732, + "step": 13125 + }, + { + "epoch": 0.61, + "learning_rate": 1.8100105027197344e-05, + "loss": 0.2762, + "step": 13130 + }, + { + "epoch": 0.61, + "learning_rate": 1.8099321242142554e-05, + "loss": 0.26, + "step": 13135 + }, + { + "epoch": 0.61, + "learning_rate": 1.809853745708777e-05, + "loss": 0.3426, + "step": 13140 + }, + { + "epoch": 0.61, + "learning_rate": 1.8097753672032982e-05, + "loss": 0.4019, + "step": 13145 + }, + { + "epoch": 0.61, + "learning_rate": 1.8096969886978196e-05, + "loss": 0.7487, + "step": 13150 + }, + { + "epoch": 0.61, + "learning_rate": 1.809618610192341e-05, + "loss": 0.3123, + "step": 13155 + }, + { + "epoch": 0.61, + "learning_rate": 1.8095402316868624e-05, + "loss": 0.0959, + "step": 13160 + }, + { + "epoch": 0.61, + "learning_rate": 1.8094618531813838e-05, + "loss": 0.0824, + "step": 13165 + }, + { + "epoch": 0.61, + "learning_rate": 1.809383474675905e-05, + "loss": 0.2266, + "step": 13170 + }, + { + "epoch": 0.61, + "learning_rate": 1.8093050961704265e-05, + "loss": 0.1764, + "step": 13175 + }, + { + "epoch": 0.61, + "learning_rate": 1.8092267176649476e-05, + "loss": 0.2117, + "step": 13180 + }, + { + "epoch": 0.62, + "learning_rate": 1.809148339159469e-05, + "loss": 0.2613, + "step": 13185 + }, + { + "epoch": 0.62, + "learning_rate": 1.8090699606539904e-05, + "loss": 0.2495, + "step": 13190 + }, + { + "epoch": 0.62, + "learning_rate": 1.8089915821485118e-05, + "loss": 0.3384, + "step": 13195 + }, + { + "epoch": 0.62, + "learning_rate": 1.808913203643033e-05, + "loss": 0.6137, + "step": 13200 + }, + { + "epoch": 0.62, + "learning_rate": 1.8088348251375546e-05, + "loss": 0.2418, + "step": 13205 + }, + { + "epoch": 0.62, + "learning_rate": 1.8087564466320756e-05, + "loss": 0.0482, + "step": 13210 + }, + { + "epoch": 0.62, + "learning_rate": 1.8086780681265973e-05, + "loss": 0.1059, + "step": 13215 + }, + { + "epoch": 0.62, + "learning_rate": 1.8085996896211184e-05, + "loss": 0.1762, + "step": 13220 + }, + { + "epoch": 0.62, + "learning_rate": 1.8085213111156398e-05, + "loss": 0.2271, + "step": 13225 + }, + { + "epoch": 0.62, + "learning_rate": 1.808442932610161e-05, + "loss": 0.2679, + "step": 13230 + }, + { + "epoch": 0.62, + "learning_rate": 1.8083645541046826e-05, + "loss": 0.3361, + "step": 13235 + }, + { + "epoch": 0.62, + "learning_rate": 1.808286175599204e-05, + "loss": 0.3421, + "step": 13240 + }, + { + "epoch": 0.62, + "learning_rate": 1.808207797093725e-05, + "loss": 0.3615, + "step": 13245 + }, + { + "epoch": 0.62, + "learning_rate": 1.8081294185882464e-05, + "loss": 0.6389, + "step": 13250 + }, + { + "epoch": 0.62, + "learning_rate": 1.8080510400827678e-05, + "loss": 0.2581, + "step": 13255 + }, + { + "epoch": 0.62, + "learning_rate": 1.8079726615772892e-05, + "loss": 0.1071, + "step": 13260 + }, + { + "epoch": 0.62, + "learning_rate": 1.8078942830718106e-05, + "loss": 0.1831, + "step": 13265 + }, + { + "epoch": 0.62, + "learning_rate": 1.807815904566332e-05, + "loss": 0.2567, + "step": 13270 + }, + { + "epoch": 0.62, + "learning_rate": 1.8077375260608533e-05, + "loss": 0.1976, + "step": 13275 + }, + { + "epoch": 0.62, + "learning_rate": 1.8076591475553747e-05, + "loss": 0.177, + "step": 13280 + }, + { + "epoch": 0.62, + "learning_rate": 1.8075807690498958e-05, + "loss": 0.3031, + "step": 13285 + }, + { + "epoch": 0.62, + "learning_rate": 1.8075023905444172e-05, + "loss": 0.2594, + "step": 13290 + }, + { + "epoch": 0.62, + "learning_rate": 1.8074240120389386e-05, + "loss": 0.3334, + "step": 13295 + }, + { + "epoch": 0.62, + "learning_rate": 1.80734563353346e-05, + "loss": 0.7191, + "step": 13300 + }, + { + "epoch": 0.62, + "learning_rate": 1.8072672550279813e-05, + "loss": 0.2046, + "step": 13305 + }, + { + "epoch": 0.62, + "learning_rate": 1.8071888765225024e-05, + "loss": 0.0685, + "step": 13310 + }, + { + "epoch": 0.62, + "learning_rate": 1.807110498017024e-05, + "loss": 0.1259, + "step": 13315 + }, + { + "epoch": 0.62, + "learning_rate": 1.8070321195115452e-05, + "loss": 0.2039, + "step": 13320 + }, + { + "epoch": 0.62, + "learning_rate": 1.8069537410060666e-05, + "loss": 0.2184, + "step": 13325 + }, + { + "epoch": 0.62, + "learning_rate": 1.806875362500588e-05, + "loss": 0.1575, + "step": 13330 + }, + { + "epoch": 0.62, + "learning_rate": 1.8067969839951094e-05, + "loss": 0.3521, + "step": 13335 + }, + { + "epoch": 0.62, + "learning_rate": 1.8067186054896307e-05, + "loss": 0.3732, + "step": 13340 + }, + { + "epoch": 0.62, + "learning_rate": 1.806640226984152e-05, + "loss": 0.3423, + "step": 13345 + }, + { + "epoch": 0.62, + "learning_rate": 1.8065618484786732e-05, + "loss": 0.657, + "step": 13350 + }, + { + "epoch": 0.62, + "learning_rate": 1.806483469973195e-05, + "loss": 0.3007, + "step": 13355 + }, + { + "epoch": 0.62, + "learning_rate": 1.806405091467716e-05, + "loss": 0.0585, + "step": 13360 + }, + { + "epoch": 0.62, + "learning_rate": 1.8063267129622374e-05, + "loss": 0.1533, + "step": 13365 + }, + { + "epoch": 0.62, + "learning_rate": 1.8062483344567587e-05, + "loss": 0.2043, + "step": 13370 + }, + { + "epoch": 0.62, + "learning_rate": 1.80616995595128e-05, + "loss": 0.197, + "step": 13375 + }, + { + "epoch": 0.62, + "learning_rate": 1.8060915774458015e-05, + "loss": 0.1869, + "step": 13380 + }, + { + "epoch": 0.62, + "learning_rate": 1.8060131989403226e-05, + "loss": 0.1669, + "step": 13385 + }, + { + "epoch": 0.62, + "learning_rate": 1.805934820434844e-05, + "loss": 0.3543, + "step": 13390 + }, + { + "epoch": 0.63, + "learning_rate": 1.8058564419293654e-05, + "loss": 0.291, + "step": 13395 + }, + { + "epoch": 0.63, + "learning_rate": 1.8057780634238868e-05, + "loss": 0.4537, + "step": 13400 + }, + { + "epoch": 0.63, + "learning_rate": 1.805699684918408e-05, + "loss": 0.2838, + "step": 13405 + }, + { + "epoch": 0.63, + "learning_rate": 1.8056213064129295e-05, + "loss": 0.101, + "step": 13410 + }, + { + "epoch": 0.63, + "learning_rate": 1.805542927907451e-05, + "loss": 0.109, + "step": 13415 + }, + { + "epoch": 0.63, + "learning_rate": 1.8054645494019723e-05, + "loss": 0.13, + "step": 13420 + }, + { + "epoch": 0.63, + "learning_rate": 1.8053861708964934e-05, + "loss": 0.1621, + "step": 13425 + }, + { + "epoch": 0.63, + "learning_rate": 1.805307792391015e-05, + "loss": 0.2074, + "step": 13430 + }, + { + "epoch": 0.63, + "learning_rate": 1.805229413885536e-05, + "loss": 0.2683, + "step": 13435 + }, + { + "epoch": 0.63, + "learning_rate": 1.8051510353800575e-05, + "loss": 0.265, + "step": 13440 + }, + { + "epoch": 0.63, + "learning_rate": 1.805072656874579e-05, + "loss": 0.4351, + "step": 13445 + }, + { + "epoch": 0.63, + "learning_rate": 1.8049942783691e-05, + "loss": 0.7304, + "step": 13450 + }, + { + "epoch": 0.63, + "learning_rate": 1.8049158998636217e-05, + "loss": 0.1502, + "step": 13455 + }, + { + "epoch": 0.63, + "learning_rate": 1.8048375213581428e-05, + "loss": 0.0326, + "step": 13460 + }, + { + "epoch": 0.63, + "learning_rate": 1.804759142852664e-05, + "loss": 0.0823, + "step": 13465 + }, + { + "epoch": 0.63, + "learning_rate": 1.8046807643471855e-05, + "loss": 0.1804, + "step": 13470 + }, + { + "epoch": 0.63, + "learning_rate": 1.804602385841707e-05, + "loss": 0.1101, + "step": 13475 + }, + { + "epoch": 0.63, + "learning_rate": 1.8045240073362283e-05, + "loss": 0.2353, + "step": 13480 + }, + { + "epoch": 0.63, + "learning_rate": 1.8044456288307497e-05, + "loss": 0.2658, + "step": 13485 + }, + { + "epoch": 0.63, + "learning_rate": 1.804367250325271e-05, + "loss": 0.2739, + "step": 13490 + }, + { + "epoch": 0.63, + "learning_rate": 1.8042888718197925e-05, + "loss": 0.3481, + "step": 13495 + }, + { + "epoch": 0.63, + "learning_rate": 1.8042104933143135e-05, + "loss": 0.4943, + "step": 13500 + }, + { + "epoch": 0.63, + "learning_rate": 1.804132114808835e-05, + "loss": 0.2629, + "step": 13505 + }, + { + "epoch": 0.63, + "learning_rate": 1.8040537363033563e-05, + "loss": 0.0884, + "step": 13510 + }, + { + "epoch": 0.63, + "learning_rate": 1.8039753577978777e-05, + "loss": 0.0984, + "step": 13515 + }, + { + "epoch": 0.63, + "learning_rate": 1.803896979292399e-05, + "loss": 0.0861, + "step": 13520 + }, + { + "epoch": 0.63, + "learning_rate": 1.80381860078692e-05, + "loss": 0.1505, + "step": 13525 + }, + { + "epoch": 0.63, + "learning_rate": 1.803740222281442e-05, + "loss": 0.2409, + "step": 13530 + }, + { + "epoch": 0.63, + "learning_rate": 1.803661843775963e-05, + "loss": 0.2881, + "step": 13535 + }, + { + "epoch": 0.63, + "learning_rate": 1.8035834652704843e-05, + "loss": 0.2466, + "step": 13540 + }, + { + "epoch": 0.63, + "learning_rate": 1.8035050867650057e-05, + "loss": 0.3239, + "step": 13545 + }, + { + "epoch": 0.63, + "learning_rate": 1.803426708259527e-05, + "loss": 0.4168, + "step": 13550 + }, + { + "epoch": 0.63, + "learning_rate": 1.8033483297540485e-05, + "loss": 0.2401, + "step": 13555 + }, + { + "epoch": 0.63, + "learning_rate": 1.80326995124857e-05, + "loss": 0.1225, + "step": 13560 + }, + { + "epoch": 0.63, + "learning_rate": 1.803191572743091e-05, + "loss": 0.0936, + "step": 13565 + }, + { + "epoch": 0.63, + "learning_rate": 1.8031131942376123e-05, + "loss": 0.1213, + "step": 13570 + }, + { + "epoch": 0.63, + "learning_rate": 1.8030348157321337e-05, + "loss": 0.2741, + "step": 13575 + }, + { + "epoch": 0.63, + "learning_rate": 1.802956437226655e-05, + "loss": 0.2251, + "step": 13580 + }, + { + "epoch": 0.63, + "learning_rate": 1.8028780587211765e-05, + "loss": 0.2146, + "step": 13585 + }, + { + "epoch": 0.63, + "learning_rate": 1.802799680215698e-05, + "loss": 0.323, + "step": 13590 + }, + { + "epoch": 0.63, + "learning_rate": 1.8027213017102193e-05, + "loss": 0.2774, + "step": 13595 + }, + { + "epoch": 0.63, + "learning_rate": 1.8026429232047403e-05, + "loss": 0.4286, + "step": 13600 + }, + { + "epoch": 0.63, + "learning_rate": 1.8025645446992617e-05, + "loss": 0.2137, + "step": 13605 + }, + { + "epoch": 0.64, + "learning_rate": 1.802486166193783e-05, + "loss": 0.1228, + "step": 13610 + }, + { + "epoch": 0.64, + "learning_rate": 1.8024077876883045e-05, + "loss": 0.1267, + "step": 13615 + }, + { + "epoch": 0.64, + "learning_rate": 1.802329409182826e-05, + "loss": 0.1645, + "step": 13620 + }, + { + "epoch": 0.64, + "learning_rate": 1.8022510306773473e-05, + "loss": 0.2046, + "step": 13625 + }, + { + "epoch": 0.64, + "learning_rate": 1.8021726521718687e-05, + "loss": 0.2009, + "step": 13630 + }, + { + "epoch": 0.64, + "learning_rate": 1.8020942736663897e-05, + "loss": 0.2249, + "step": 13635 + }, + { + "epoch": 0.64, + "learning_rate": 1.802015895160911e-05, + "loss": 0.2867, + "step": 13640 + }, + { + "epoch": 0.64, + "learning_rate": 1.8019375166554325e-05, + "loss": 0.3413, + "step": 13645 + }, + { + "epoch": 0.64, + "learning_rate": 1.801859138149954e-05, + "loss": 0.7257, + "step": 13650 + }, + { + "epoch": 0.64, + "learning_rate": 1.8017807596444753e-05, + "loss": 0.1693, + "step": 13655 + }, + { + "epoch": 0.64, + "learning_rate": 1.8017023811389967e-05, + "loss": 0.0749, + "step": 13660 + }, + { + "epoch": 0.64, + "learning_rate": 1.8016240026335177e-05, + "loss": 0.1104, + "step": 13665 + }, + { + "epoch": 0.64, + "learning_rate": 1.8015456241280395e-05, + "loss": 0.1834, + "step": 13670 + }, + { + "epoch": 0.64, + "learning_rate": 1.8014672456225605e-05, + "loss": 0.1609, + "step": 13675 + }, + { + "epoch": 0.64, + "learning_rate": 1.801388867117082e-05, + "loss": 0.2899, + "step": 13680 + }, + { + "epoch": 0.64, + "learning_rate": 1.8013104886116033e-05, + "loss": 0.1833, + "step": 13685 + }, + { + "epoch": 0.64, + "learning_rate": 1.8012321101061247e-05, + "loss": 0.3229, + "step": 13690 + }, + { + "epoch": 0.64, + "learning_rate": 1.801153731600646e-05, + "loss": 0.34, + "step": 13695 + }, + { + "epoch": 0.64, + "learning_rate": 1.801075353095167e-05, + "loss": 0.7479, + "step": 13700 + }, + { + "epoch": 0.64, + "learning_rate": 1.8009969745896885e-05, + "loss": 0.2743, + "step": 13705 + }, + { + "epoch": 0.64, + "learning_rate": 1.80091859608421e-05, + "loss": 0.0687, + "step": 13710 + }, + { + "epoch": 0.64, + "learning_rate": 1.8008402175787313e-05, + "loss": 0.1244, + "step": 13715 + }, + { + "epoch": 0.64, + "learning_rate": 1.8007618390732527e-05, + "loss": 0.0964, + "step": 13720 + }, + { + "epoch": 0.64, + "learning_rate": 1.800683460567774e-05, + "loss": 0.2067, + "step": 13725 + }, + { + "epoch": 0.64, + "learning_rate": 1.8006050820622955e-05, + "loss": 0.2851, + "step": 13730 + }, + { + "epoch": 0.64, + "learning_rate": 1.800526703556817e-05, + "loss": 0.145, + "step": 13735 + }, + { + "epoch": 0.64, + "learning_rate": 1.800448325051338e-05, + "loss": 0.314, + "step": 13740 + }, + { + "epoch": 0.64, + "learning_rate": 1.8003699465458597e-05, + "loss": 0.3162, + "step": 13745 + }, + { + "epoch": 0.64, + "learning_rate": 1.8002915680403807e-05, + "loss": 0.6037, + "step": 13750 + }, + { + "epoch": 0.64, + "learning_rate": 1.800213189534902e-05, + "loss": 0.2893, + "step": 13755 + }, + { + "epoch": 0.64, + "learning_rate": 1.8001348110294235e-05, + "loss": 0.1066, + "step": 13760 + }, + { + "epoch": 0.64, + "learning_rate": 1.8000564325239445e-05, + "loss": 0.1369, + "step": 13765 + }, + { + "epoch": 0.64, + "learning_rate": 1.7999780540184663e-05, + "loss": 0.119, + "step": 13770 + }, + { + "epoch": 0.64, + "learning_rate": 1.7998996755129873e-05, + "loss": 0.1651, + "step": 13775 + }, + { + "epoch": 0.64, + "learning_rate": 1.7998212970075087e-05, + "loss": 0.1298, + "step": 13780 + }, + { + "epoch": 0.64, + "learning_rate": 1.79974291850203e-05, + "loss": 0.3191, + "step": 13785 + }, + { + "epoch": 0.64, + "learning_rate": 1.7996645399965515e-05, + "loss": 0.2348, + "step": 13790 + }, + { + "epoch": 0.64, + "learning_rate": 1.799586161491073e-05, + "loss": 0.2823, + "step": 13795 + }, + { + "epoch": 0.64, + "learning_rate": 1.7995077829855943e-05, + "loss": 0.7438, + "step": 13800 + }, + { + "epoch": 0.64, + "learning_rate": 1.7994294044801157e-05, + "loss": 0.1944, + "step": 13805 + }, + { + "epoch": 0.64, + "learning_rate": 1.799351025974637e-05, + "loss": 0.0748, + "step": 13810 + }, + { + "epoch": 0.64, + "learning_rate": 1.799272647469158e-05, + "loss": 0.1483, + "step": 13815 + }, + { + "epoch": 0.64, + "learning_rate": 1.7991942689636795e-05, + "loss": 0.1346, + "step": 13820 + }, + { + "epoch": 0.65, + "learning_rate": 1.799115890458201e-05, + "loss": 0.1945, + "step": 13825 + }, + { + "epoch": 0.65, + "learning_rate": 1.7990375119527223e-05, + "loss": 0.2159, + "step": 13830 + }, + { + "epoch": 0.65, + "learning_rate": 1.7989591334472437e-05, + "loss": 0.2135, + "step": 13835 + }, + { + "epoch": 0.65, + "learning_rate": 1.7988807549417647e-05, + "loss": 0.3681, + "step": 13840 + }, + { + "epoch": 0.65, + "learning_rate": 1.7988023764362864e-05, + "loss": 0.3707, + "step": 13845 + }, + { + "epoch": 0.65, + "learning_rate": 1.7987239979308075e-05, + "loss": 0.7777, + "step": 13850 + }, + { + "epoch": 0.65, + "learning_rate": 1.798645619425329e-05, + "loss": 0.322, + "step": 13855 + }, + { + "epoch": 0.65, + "learning_rate": 1.7985672409198503e-05, + "loss": 0.0827, + "step": 13860 + }, + { + "epoch": 0.65, + "learning_rate": 1.7984888624143717e-05, + "loss": 0.1055, + "step": 13865 + }, + { + "epoch": 0.65, + "learning_rate": 1.798410483908893e-05, + "loss": 0.1698, + "step": 13870 + }, + { + "epoch": 0.65, + "learning_rate": 1.7983321054034145e-05, + "loss": 0.2299, + "step": 13875 + }, + { + "epoch": 0.65, + "learning_rate": 1.7982537268979355e-05, + "loss": 0.1941, + "step": 13880 + }, + { + "epoch": 0.65, + "learning_rate": 1.7981753483924572e-05, + "loss": 0.2909, + "step": 13885 + }, + { + "epoch": 0.65, + "learning_rate": 1.7980969698869783e-05, + "loss": 0.3056, + "step": 13890 + }, + { + "epoch": 0.65, + "learning_rate": 1.7980185913814997e-05, + "loss": 0.3902, + "step": 13895 + }, + { + "epoch": 0.65, + "learning_rate": 1.797940212876021e-05, + "loss": 0.6766, + "step": 13900 + }, + { + "epoch": 0.65, + "learning_rate": 1.7978618343705425e-05, + "loss": 0.2698, + "step": 13905 + }, + { + "epoch": 0.65, + "learning_rate": 1.797783455865064e-05, + "loss": 0.1085, + "step": 13910 + }, + { + "epoch": 0.65, + "learning_rate": 1.797705077359585e-05, + "loss": 0.1044, + "step": 13915 + }, + { + "epoch": 0.65, + "learning_rate": 1.7976266988541063e-05, + "loss": 0.1361, + "step": 13920 + }, + { + "epoch": 0.65, + "learning_rate": 1.7975483203486277e-05, + "loss": 0.1876, + "step": 13925 + }, + { + "epoch": 0.65, + "learning_rate": 1.797469941843149e-05, + "loss": 0.1581, + "step": 13930 + }, + { + "epoch": 0.65, + "learning_rate": 1.7973915633376705e-05, + "loss": 0.2839, + "step": 13935 + }, + { + "epoch": 0.65, + "learning_rate": 1.797313184832192e-05, + "loss": 0.1772, + "step": 13940 + }, + { + "epoch": 0.65, + "learning_rate": 1.7972348063267132e-05, + "loss": 0.3157, + "step": 13945 + }, + { + "epoch": 0.65, + "learning_rate": 1.7971564278212346e-05, + "loss": 0.5271, + "step": 13950 + }, + { + "epoch": 0.65, + "learning_rate": 1.7970780493157557e-05, + "loss": 0.2653, + "step": 13955 + }, + { + "epoch": 0.65, + "learning_rate": 1.796999670810277e-05, + "loss": 0.0371, + "step": 13960 + }, + { + "epoch": 0.65, + "learning_rate": 1.7969212923047985e-05, + "loss": 0.077, + "step": 13965 + }, + { + "epoch": 0.65, + "learning_rate": 1.79684291379932e-05, + "loss": 0.1503, + "step": 13970 + }, + { + "epoch": 0.65, + "learning_rate": 1.7967645352938412e-05, + "loss": 0.1859, + "step": 13975 + }, + { + "epoch": 0.65, + "learning_rate": 1.7966861567883623e-05, + "loss": 0.1487, + "step": 13980 + }, + { + "epoch": 0.65, + "learning_rate": 1.796607778282884e-05, + "loss": 0.2365, + "step": 13985 + }, + { + "epoch": 0.65, + "learning_rate": 1.796529399777405e-05, + "loss": 0.2422, + "step": 13990 + }, + { + "epoch": 0.65, + "learning_rate": 1.7964510212719265e-05, + "loss": 0.3292, + "step": 13995 + }, + { + "epoch": 0.65, + "learning_rate": 1.796372642766448e-05, + "loss": 0.6147, + "step": 14000 + }, + { + "epoch": 0.65, + "learning_rate": 1.7962942642609693e-05, + "loss": 0.222, + "step": 14005 + }, + { + "epoch": 0.65, + "learning_rate": 1.7962158857554906e-05, + "loss": 0.1412, + "step": 14010 + }, + { + "epoch": 0.65, + "learning_rate": 1.796137507250012e-05, + "loss": 0.1588, + "step": 14015 + }, + { + "epoch": 0.65, + "learning_rate": 1.7960591287445334e-05, + "loss": 0.1609, + "step": 14020 + }, + { + "epoch": 0.65, + "learning_rate": 1.7959807502390545e-05, + "loss": 0.1297, + "step": 14025 + }, + { + "epoch": 0.65, + "learning_rate": 1.795902371733576e-05, + "loss": 0.1735, + "step": 14030 + }, + { + "epoch": 0.65, + "learning_rate": 1.7958239932280973e-05, + "loss": 0.1816, + "step": 14035 + }, + { + "epoch": 0.66, + "learning_rate": 1.7957456147226186e-05, + "loss": 0.2098, + "step": 14040 + }, + { + "epoch": 0.66, + "learning_rate": 1.79566723621714e-05, + "loss": 0.3348, + "step": 14045 + }, + { + "epoch": 0.66, + "learning_rate": 1.7955888577116614e-05, + "loss": 0.534, + "step": 14050 + }, + { + "epoch": 0.66, + "learning_rate": 1.7955104792061825e-05, + "loss": 0.2183, + "step": 14055 + }, + { + "epoch": 0.66, + "learning_rate": 1.7954321007007042e-05, + "loss": 0.0817, + "step": 14060 + }, + { + "epoch": 0.66, + "learning_rate": 1.7953537221952253e-05, + "loss": 0.0821, + "step": 14065 + }, + { + "epoch": 0.66, + "learning_rate": 1.7952753436897467e-05, + "loss": 0.1166, + "step": 14070 + }, + { + "epoch": 0.66, + "learning_rate": 1.795196965184268e-05, + "loss": 0.2538, + "step": 14075 + }, + { + "epoch": 0.66, + "learning_rate": 1.7951185866787894e-05, + "loss": 0.1809, + "step": 14080 + }, + { + "epoch": 0.66, + "learning_rate": 1.7950402081733108e-05, + "loss": 0.2724, + "step": 14085 + }, + { + "epoch": 0.66, + "learning_rate": 1.794961829667832e-05, + "loss": 0.2887, + "step": 14090 + }, + { + "epoch": 0.66, + "learning_rate": 1.7948834511623533e-05, + "loss": 0.4142, + "step": 14095 + }, + { + "epoch": 0.66, + "learning_rate": 1.7948050726568747e-05, + "loss": 0.6192, + "step": 14100 + }, + { + "epoch": 0.66, + "learning_rate": 1.794726694151396e-05, + "loss": 0.251, + "step": 14105 + }, + { + "epoch": 0.66, + "learning_rate": 1.7946483156459174e-05, + "loss": 0.0909, + "step": 14110 + }, + { + "epoch": 0.66, + "learning_rate": 1.7945699371404388e-05, + "loss": 0.1046, + "step": 14115 + }, + { + "epoch": 0.66, + "learning_rate": 1.7944915586349602e-05, + "loss": 0.0959, + "step": 14120 + }, + { + "epoch": 0.66, + "learning_rate": 1.7944131801294816e-05, + "loss": 0.1656, + "step": 14125 + }, + { + "epoch": 0.66, + "learning_rate": 1.7943348016240027e-05, + "loss": 0.2122, + "step": 14130 + }, + { + "epoch": 0.66, + "learning_rate": 1.794256423118524e-05, + "loss": 0.2179, + "step": 14135 + }, + { + "epoch": 0.66, + "learning_rate": 1.7941780446130454e-05, + "loss": 0.2127, + "step": 14140 + }, + { + "epoch": 0.66, + "learning_rate": 1.794099666107567e-05, + "loss": 0.3221, + "step": 14145 + }, + { + "epoch": 0.66, + "learning_rate": 1.7940212876020882e-05, + "loss": 0.6101, + "step": 14150 + }, + { + "epoch": 0.66, + "learning_rate": 1.7939429090966093e-05, + "loss": 0.294, + "step": 14155 + }, + { + "epoch": 0.66, + "learning_rate": 1.793864530591131e-05, + "loss": 0.1212, + "step": 14160 + }, + { + "epoch": 0.66, + "learning_rate": 1.793786152085652e-05, + "loss": 0.1184, + "step": 14165 + }, + { + "epoch": 0.66, + "learning_rate": 1.7937077735801734e-05, + "loss": 0.1062, + "step": 14170 + }, + { + "epoch": 0.66, + "learning_rate": 1.793629395074695e-05, + "loss": 0.1962, + "step": 14175 + }, + { + "epoch": 0.66, + "learning_rate": 1.7935510165692162e-05, + "loss": 0.1837, + "step": 14180 + }, + { + "epoch": 0.66, + "learning_rate": 1.7934726380637376e-05, + "loss": 0.2212, + "step": 14185 + }, + { + "epoch": 0.66, + "learning_rate": 1.793394259558259e-05, + "loss": 0.3377, + "step": 14190 + }, + { + "epoch": 0.66, + "learning_rate": 1.79331588105278e-05, + "loss": 0.2935, + "step": 14195 + }, + { + "epoch": 0.66, + "learning_rate": 1.7932375025473018e-05, + "loss": 0.4795, + "step": 14200 + }, + { + "epoch": 0.66, + "learning_rate": 1.793159124041823e-05, + "loss": 0.2201, + "step": 14205 + }, + { + "epoch": 0.66, + "learning_rate": 1.7930807455363442e-05, + "loss": 0.134, + "step": 14210 + }, + { + "epoch": 0.66, + "learning_rate": 1.7930023670308656e-05, + "loss": 0.1086, + "step": 14215 + }, + { + "epoch": 0.66, + "learning_rate": 1.792923988525387e-05, + "loss": 0.1584, + "step": 14220 + }, + { + "epoch": 0.66, + "learning_rate": 1.7928456100199084e-05, + "loss": 0.1764, + "step": 14225 + }, + { + "epoch": 0.66, + "learning_rate": 1.7927672315144295e-05, + "loss": 0.1776, + "step": 14230 + }, + { + "epoch": 0.66, + "learning_rate": 1.792688853008951e-05, + "loss": 0.1828, + "step": 14235 + }, + { + "epoch": 0.66, + "learning_rate": 1.7926104745034722e-05, + "loss": 0.2791, + "step": 14240 + }, + { + "epoch": 0.66, + "learning_rate": 1.7925320959979936e-05, + "loss": 0.3181, + "step": 14245 + }, + { + "epoch": 0.66, + "learning_rate": 1.792453717492515e-05, + "loss": 0.8255, + "step": 14250 + }, + { + "epoch": 0.67, + "learning_rate": 1.7923753389870364e-05, + "loss": 0.321, + "step": 14255 + }, + { + "epoch": 0.67, + "learning_rate": 1.7922969604815578e-05, + "loss": 0.0898, + "step": 14260 + }, + { + "epoch": 0.67, + "learning_rate": 1.7922185819760792e-05, + "loss": 0.1708, + "step": 14265 + }, + { + "epoch": 0.67, + "learning_rate": 1.7921402034706002e-05, + "loss": 0.1316, + "step": 14270 + }, + { + "epoch": 0.67, + "learning_rate": 1.792061824965122e-05, + "loss": 0.1778, + "step": 14275 + }, + { + "epoch": 0.67, + "learning_rate": 1.791983446459643e-05, + "loss": 0.168, + "step": 14280 + }, + { + "epoch": 0.67, + "learning_rate": 1.7919050679541644e-05, + "loss": 0.291, + "step": 14285 + }, + { + "epoch": 0.67, + "learning_rate": 1.7918266894486858e-05, + "loss": 0.2596, + "step": 14290 + }, + { + "epoch": 0.67, + "learning_rate": 1.791748310943207e-05, + "loss": 0.2771, + "step": 14295 + }, + { + "epoch": 0.67, + "learning_rate": 1.7916699324377286e-05, + "loss": 0.6026, + "step": 14300 + }, + { + "epoch": 0.67, + "learning_rate": 1.7915915539322496e-05, + "loss": 0.2228, + "step": 14305 + }, + { + "epoch": 0.67, + "learning_rate": 1.791513175426771e-05, + "loss": 0.0702, + "step": 14310 + }, + { + "epoch": 0.67, + "learning_rate": 1.7914347969212924e-05, + "loss": 0.164, + "step": 14315 + }, + { + "epoch": 0.67, + "learning_rate": 1.7913564184158138e-05, + "loss": 0.1234, + "step": 14320 + }, + { + "epoch": 0.67, + "learning_rate": 1.7912780399103352e-05, + "loss": 0.1424, + "step": 14325 + }, + { + "epoch": 0.67, + "learning_rate": 1.7911996614048566e-05, + "loss": 0.1952, + "step": 14330 + }, + { + "epoch": 0.67, + "learning_rate": 1.791121282899378e-05, + "loss": 0.233, + "step": 14335 + }, + { + "epoch": 0.67, + "learning_rate": 1.7910429043938994e-05, + "loss": 0.2717, + "step": 14340 + }, + { + "epoch": 0.67, + "learning_rate": 1.7909645258884204e-05, + "loss": 0.2716, + "step": 14345 + }, + { + "epoch": 0.67, + "learning_rate": 1.7908861473829418e-05, + "loss": 0.5792, + "step": 14350 + }, + { + "epoch": 0.67, + "learning_rate": 1.7908077688774632e-05, + "loss": 0.1712, + "step": 14355 + }, + { + "epoch": 0.67, + "learning_rate": 1.7907293903719846e-05, + "loss": 0.0798, + "step": 14360 + }, + { + "epoch": 0.67, + "learning_rate": 1.790651011866506e-05, + "loss": 0.137, + "step": 14365 + }, + { + "epoch": 0.67, + "learning_rate": 1.790572633361027e-05, + "loss": 0.1375, + "step": 14370 + }, + { + "epoch": 0.67, + "learning_rate": 1.7904942548555488e-05, + "loss": 0.2504, + "step": 14375 + }, + { + "epoch": 0.67, + "learning_rate": 1.7904158763500698e-05, + "loss": 0.2304, + "step": 14380 + }, + { + "epoch": 0.67, + "learning_rate": 1.7903374978445912e-05, + "loss": 0.2548, + "step": 14385 + }, + { + "epoch": 0.67, + "learning_rate": 1.7902591193391126e-05, + "loss": 0.3049, + "step": 14390 + }, + { + "epoch": 0.67, + "learning_rate": 1.790180740833634e-05, + "loss": 0.4073, + "step": 14395 + }, + { + "epoch": 0.67, + "learning_rate": 1.7901023623281554e-05, + "loss": 0.5195, + "step": 14400 + }, + { + "epoch": 0.67, + "learning_rate": 1.7900239838226768e-05, + "loss": 0.267, + "step": 14405 + }, + { + "epoch": 0.67, + "learning_rate": 1.7899456053171978e-05, + "loss": 0.0918, + "step": 14410 + }, + { + "epoch": 0.67, + "learning_rate": 1.7898672268117192e-05, + "loss": 0.1025, + "step": 14415 + }, + { + "epoch": 0.67, + "learning_rate": 1.7897888483062406e-05, + "loss": 0.1415, + "step": 14420 + }, + { + "epoch": 0.67, + "learning_rate": 1.789710469800762e-05, + "loss": 0.1369, + "step": 14425 + }, + { + "epoch": 0.67, + "learning_rate": 1.7896320912952834e-05, + "loss": 0.2144, + "step": 14430 + }, + { + "epoch": 0.67, + "learning_rate": 1.7895537127898048e-05, + "loss": 0.1942, + "step": 14435 + }, + { + "epoch": 0.67, + "learning_rate": 1.789475334284326e-05, + "loss": 0.2313, + "step": 14440 + }, + { + "epoch": 0.67, + "learning_rate": 1.7893969557788472e-05, + "loss": 0.4069, + "step": 14445 + }, + { + "epoch": 0.67, + "learning_rate": 1.7893185772733686e-05, + "loss": 0.6075, + "step": 14450 + }, + { + "epoch": 0.67, + "learning_rate": 1.78924019876789e-05, + "loss": 0.2016, + "step": 14455 + }, + { + "epoch": 0.67, + "learning_rate": 1.7891618202624114e-05, + "loss": 0.0962, + "step": 14460 + }, + { + "epoch": 0.67, + "learning_rate": 1.7890834417569328e-05, + "loss": 0.1401, + "step": 14465 + }, + { + "epoch": 0.68, + "learning_rate": 1.7890050632514542e-05, + "loss": 0.1746, + "step": 14470 + }, + { + "epoch": 0.68, + "learning_rate": 1.7889266847459756e-05, + "loss": 0.2198, + "step": 14475 + }, + { + "epoch": 0.68, + "learning_rate": 1.7888483062404966e-05, + "loss": 0.1788, + "step": 14480 + }, + { + "epoch": 0.68, + "learning_rate": 1.788769927735018e-05, + "loss": 0.3015, + "step": 14485 + }, + { + "epoch": 0.68, + "learning_rate": 1.7886915492295394e-05, + "loss": 0.336, + "step": 14490 + }, + { + "epoch": 0.68, + "learning_rate": 1.7886131707240608e-05, + "loss": 0.399, + "step": 14495 + }, + { + "epoch": 0.68, + "learning_rate": 1.7885347922185822e-05, + "loss": 0.5129, + "step": 14500 + }, + { + "epoch": 0.68, + "learning_rate": 1.7884564137131036e-05, + "loss": 0.2472, + "step": 14505 + }, + { + "epoch": 0.68, + "learning_rate": 1.7883780352076246e-05, + "loss": 0.0997, + "step": 14510 + }, + { + "epoch": 0.68, + "learning_rate": 1.7882996567021463e-05, + "loss": 0.151, + "step": 14515 + }, + { + "epoch": 0.68, + "learning_rate": 1.7882212781966674e-05, + "loss": 0.1149, + "step": 14520 + }, + { + "epoch": 0.68, + "learning_rate": 1.7881428996911888e-05, + "loss": 0.1079, + "step": 14525 + }, + { + "epoch": 0.68, + "learning_rate": 1.7880645211857102e-05, + "loss": 0.1688, + "step": 14530 + }, + { + "epoch": 0.68, + "learning_rate": 1.7879861426802316e-05, + "loss": 0.2422, + "step": 14535 + }, + { + "epoch": 0.68, + "learning_rate": 1.787907764174753e-05, + "loss": 0.256, + "step": 14540 + }, + { + "epoch": 0.68, + "learning_rate": 1.787829385669274e-05, + "loss": 0.2604, + "step": 14545 + }, + { + "epoch": 0.68, + "learning_rate": 1.7877510071637954e-05, + "loss": 0.4361, + "step": 14550 + }, + { + "epoch": 0.68, + "learning_rate": 1.7876726286583168e-05, + "loss": 0.3397, + "step": 14555 + }, + { + "epoch": 0.68, + "learning_rate": 1.7875942501528382e-05, + "loss": 0.0841, + "step": 14560 + }, + { + "epoch": 0.68, + "learning_rate": 1.7875158716473596e-05, + "loss": 0.0836, + "step": 14565 + }, + { + "epoch": 0.68, + "learning_rate": 1.787437493141881e-05, + "loss": 0.1452, + "step": 14570 + }, + { + "epoch": 0.68, + "learning_rate": 1.7873591146364024e-05, + "loss": 0.1594, + "step": 14575 + }, + { + "epoch": 0.68, + "learning_rate": 1.7872807361309237e-05, + "loss": 0.2242, + "step": 14580 + }, + { + "epoch": 0.68, + "learning_rate": 1.7872023576254448e-05, + "loss": 0.2338, + "step": 14585 + }, + { + "epoch": 0.68, + "learning_rate": 1.7871239791199665e-05, + "loss": 0.218, + "step": 14590 + }, + { + "epoch": 0.68, + "learning_rate": 1.7870456006144876e-05, + "loss": 0.4391, + "step": 14595 + }, + { + "epoch": 0.68, + "learning_rate": 1.786967222109009e-05, + "loss": 0.5228, + "step": 14600 + }, + { + "epoch": 0.68, + "learning_rate": 1.7868888436035304e-05, + "loss": 0.2472, + "step": 14605 + }, + { + "epoch": 0.68, + "learning_rate": 1.7868104650980514e-05, + "loss": 0.0867, + "step": 14610 + }, + { + "epoch": 0.68, + "learning_rate": 1.786732086592573e-05, + "loss": 0.1296, + "step": 14615 + }, + { + "epoch": 0.68, + "learning_rate": 1.7866537080870942e-05, + "loss": 0.1691, + "step": 14620 + }, + { + "epoch": 0.68, + "learning_rate": 1.7865753295816156e-05, + "loss": 0.2125, + "step": 14625 + }, + { + "epoch": 0.68, + "learning_rate": 1.786496951076137e-05, + "loss": 0.2022, + "step": 14630 + }, + { + "epoch": 0.68, + "learning_rate": 1.7864185725706584e-05, + "loss": 0.1817, + "step": 14635 + }, + { + "epoch": 0.68, + "learning_rate": 1.7863401940651798e-05, + "loss": 0.3351, + "step": 14640 + }, + { + "epoch": 0.68, + "learning_rate": 1.786261815559701e-05, + "loss": 0.3566, + "step": 14645 + }, + { + "epoch": 0.68, + "learning_rate": 1.7861834370542225e-05, + "loss": 0.6346, + "step": 14650 + }, + { + "epoch": 0.68, + "learning_rate": 1.786105058548744e-05, + "loss": 0.2488, + "step": 14655 + }, + { + "epoch": 0.68, + "learning_rate": 1.786026680043265e-05, + "loss": 0.0688, + "step": 14660 + }, + { + "epoch": 0.68, + "learning_rate": 1.7859483015377864e-05, + "loss": 0.1261, + "step": 14665 + }, + { + "epoch": 0.68, + "learning_rate": 1.7858699230323078e-05, + "loss": 0.1552, + "step": 14670 + }, + { + "epoch": 0.68, + "learning_rate": 1.785791544526829e-05, + "loss": 0.1928, + "step": 14675 + }, + { + "epoch": 0.68, + "learning_rate": 1.7857131660213505e-05, + "loss": 0.1883, + "step": 14680 + }, + { + "epoch": 0.69, + "learning_rate": 1.7856347875158716e-05, + "loss": 0.2545, + "step": 14685 + }, + { + "epoch": 0.69, + "learning_rate": 1.7855564090103933e-05, + "loss": 0.3732, + "step": 14690 + }, + { + "epoch": 0.69, + "learning_rate": 1.7854780305049144e-05, + "loss": 0.3061, + "step": 14695 + }, + { + "epoch": 0.69, + "learning_rate": 1.7853996519994358e-05, + "loss": 0.5844, + "step": 14700 + }, + { + "epoch": 0.69, + "learning_rate": 1.785321273493957e-05, + "loss": 0.2783, + "step": 14705 + }, + { + "epoch": 0.69, + "learning_rate": 1.7852428949884785e-05, + "loss": 0.1046, + "step": 14710 + }, + { + "epoch": 0.69, + "learning_rate": 1.785164516483e-05, + "loss": 0.1335, + "step": 14715 + }, + { + "epoch": 0.69, + "learning_rate": 1.7850861379775213e-05, + "loss": 0.1018, + "step": 14720 + }, + { + "epoch": 0.69, + "learning_rate": 1.7850077594720424e-05, + "loss": 0.1993, + "step": 14725 + }, + { + "epoch": 0.69, + "learning_rate": 1.784929380966564e-05, + "loss": 0.1792, + "step": 14730 + }, + { + "epoch": 0.69, + "learning_rate": 1.784851002461085e-05, + "loss": 0.2488, + "step": 14735 + }, + { + "epoch": 0.69, + "learning_rate": 1.7847726239556066e-05, + "loss": 0.247, + "step": 14740 + }, + { + "epoch": 0.69, + "learning_rate": 1.784694245450128e-05, + "loss": 0.4897, + "step": 14745 + }, + { + "epoch": 0.69, + "learning_rate": 1.7846158669446493e-05, + "loss": 0.4311, + "step": 14750 + }, + { + "epoch": 0.69, + "learning_rate": 1.7845374884391707e-05, + "loss": 0.2626, + "step": 14755 + }, + { + "epoch": 0.69, + "learning_rate": 1.7844591099336918e-05, + "loss": 0.069, + "step": 14760 + }, + { + "epoch": 0.69, + "learning_rate": 1.784380731428213e-05, + "loss": 0.1219, + "step": 14765 + }, + { + "epoch": 0.69, + "learning_rate": 1.7843023529227346e-05, + "loss": 0.1279, + "step": 14770 + }, + { + "epoch": 0.69, + "learning_rate": 1.784223974417256e-05, + "loss": 0.1779, + "step": 14775 + }, + { + "epoch": 0.69, + "learning_rate": 1.7841455959117773e-05, + "loss": 0.2421, + "step": 14780 + }, + { + "epoch": 0.69, + "learning_rate": 1.7840672174062987e-05, + "loss": 0.2912, + "step": 14785 + }, + { + "epoch": 0.69, + "learning_rate": 1.78398883890082e-05, + "loss": 0.2462, + "step": 14790 + }, + { + "epoch": 0.69, + "learning_rate": 1.7839104603953415e-05, + "loss": 0.3095, + "step": 14795 + }, + { + "epoch": 0.69, + "learning_rate": 1.7838320818898626e-05, + "loss": 0.5914, + "step": 14800 + }, + { + "epoch": 0.69, + "learning_rate": 1.783753703384384e-05, + "loss": 0.2444, + "step": 14805 + }, + { + "epoch": 0.69, + "learning_rate": 1.7836753248789053e-05, + "loss": 0.0887, + "step": 14810 + }, + { + "epoch": 0.69, + "learning_rate": 1.7835969463734267e-05, + "loss": 0.1362, + "step": 14815 + }, + { + "epoch": 0.69, + "learning_rate": 1.783518567867948e-05, + "loss": 0.0878, + "step": 14820 + }, + { + "epoch": 0.69, + "learning_rate": 1.7834401893624692e-05, + "loss": 0.1273, + "step": 14825 + }, + { + "epoch": 0.69, + "learning_rate": 1.783361810856991e-05, + "loss": 0.2425, + "step": 14830 + }, + { + "epoch": 0.69, + "learning_rate": 1.783283432351512e-05, + "loss": 0.275, + "step": 14835 + }, + { + "epoch": 0.69, + "learning_rate": 1.7832050538460333e-05, + "loss": 0.2317, + "step": 14840 + }, + { + "epoch": 0.69, + "learning_rate": 1.7831266753405547e-05, + "loss": 0.2663, + "step": 14845 + }, + { + "epoch": 0.69, + "learning_rate": 1.783048296835076e-05, + "loss": 0.2922, + "step": 14850 + }, + { + "epoch": 0.69, + "learning_rate": 1.7829699183295975e-05, + "loss": 0.2981, + "step": 14855 + }, + { + "epoch": 0.69, + "learning_rate": 1.782891539824119e-05, + "loss": 0.0931, + "step": 14860 + }, + { + "epoch": 0.69, + "learning_rate": 1.78281316131864e-05, + "loss": 0.0793, + "step": 14865 + }, + { + "epoch": 0.69, + "learning_rate": 1.7827347828131614e-05, + "loss": 0.1572, + "step": 14870 + }, + { + "epoch": 0.69, + "learning_rate": 1.7826564043076827e-05, + "loss": 0.2228, + "step": 14875 + }, + { + "epoch": 0.69, + "learning_rate": 1.782578025802204e-05, + "loss": 0.1472, + "step": 14880 + }, + { + "epoch": 0.69, + "learning_rate": 1.7824996472967255e-05, + "loss": 0.2983, + "step": 14885 + }, + { + "epoch": 0.69, + "learning_rate": 1.782421268791247e-05, + "loss": 0.3364, + "step": 14890 + }, + { + "epoch": 0.7, + "learning_rate": 1.7823428902857683e-05, + "loss": 0.4755, + "step": 14895 + }, + { + "epoch": 0.7, + "learning_rate": 1.7822645117802894e-05, + "loss": 0.5627, + "step": 14900 + }, + { + "epoch": 0.7, + "learning_rate": 1.782186133274811e-05, + "loss": 0.2746, + "step": 14905 + }, + { + "epoch": 0.7, + "learning_rate": 1.782107754769332e-05, + "loss": 0.0909, + "step": 14910 + }, + { + "epoch": 0.7, + "learning_rate": 1.7820293762638535e-05, + "loss": 0.1021, + "step": 14915 + }, + { + "epoch": 0.7, + "learning_rate": 1.781950997758375e-05, + "loss": 0.1913, + "step": 14920 + }, + { + "epoch": 0.7, + "learning_rate": 1.7818726192528963e-05, + "loss": 0.1024, + "step": 14925 + }, + { + "epoch": 0.7, + "learning_rate": 1.7817942407474177e-05, + "loss": 0.1359, + "step": 14930 + }, + { + "epoch": 0.7, + "learning_rate": 1.7817158622419388e-05, + "loss": 0.2782, + "step": 14935 + }, + { + "epoch": 0.7, + "learning_rate": 1.78163748373646e-05, + "loss": 0.3662, + "step": 14940 + }, + { + "epoch": 0.7, + "learning_rate": 1.7815591052309815e-05, + "loss": 0.3173, + "step": 14945 + }, + { + "epoch": 0.7, + "learning_rate": 1.781480726725503e-05, + "loss": 0.5629, + "step": 14950 + }, + { + "epoch": 0.7, + "learning_rate": 1.7814023482200243e-05, + "loss": 0.3126, + "step": 14955 + }, + { + "epoch": 0.7, + "learning_rate": 1.7813239697145457e-05, + "loss": 0.1257, + "step": 14960 + }, + { + "epoch": 0.7, + "learning_rate": 1.781245591209067e-05, + "loss": 0.102, + "step": 14965 + }, + { + "epoch": 0.7, + "learning_rate": 1.7811672127035885e-05, + "loss": 0.0812, + "step": 14970 + }, + { + "epoch": 0.7, + "learning_rate": 1.7810888341981095e-05, + "loss": 0.1951, + "step": 14975 + }, + { + "epoch": 0.7, + "learning_rate": 1.781010455692631e-05, + "loss": 0.2276, + "step": 14980 + }, + { + "epoch": 0.7, + "learning_rate": 1.7809320771871523e-05, + "loss": 0.2704, + "step": 14985 + }, + { + "epoch": 0.7, + "learning_rate": 1.7808536986816737e-05, + "loss": 0.2971, + "step": 14990 + }, + { + "epoch": 0.7, + "learning_rate": 1.780775320176195e-05, + "loss": 0.2102, + "step": 14995 + }, + { + "epoch": 0.7, + "learning_rate": 1.780696941670716e-05, + "loss": 0.4946, + "step": 15000 + }, + { + "epoch": 0.7, + "learning_rate": 1.780618563165238e-05, + "loss": 0.2092, + "step": 15005 + }, + { + "epoch": 0.7, + "learning_rate": 1.780540184659759e-05, + "loss": 0.0679, + "step": 15010 + }, + { + "epoch": 0.7, + "learning_rate": 1.7804618061542803e-05, + "loss": 0.1363, + "step": 15015 + }, + { + "epoch": 0.7, + "learning_rate": 1.7803834276488017e-05, + "loss": 0.1115, + "step": 15020 + }, + { + "epoch": 0.7, + "learning_rate": 1.780305049143323e-05, + "loss": 0.1363, + "step": 15025 + }, + { + "epoch": 0.7, + "learning_rate": 1.7802266706378445e-05, + "loss": 0.213, + "step": 15030 + }, + { + "epoch": 0.7, + "learning_rate": 1.780148292132366e-05, + "loss": 0.2516, + "step": 15035 + }, + { + "epoch": 0.7, + "learning_rate": 1.780069913626887e-05, + "loss": 0.3133, + "step": 15040 + }, + { + "epoch": 0.7, + "learning_rate": 1.7799915351214087e-05, + "loss": 0.3108, + "step": 15045 + }, + { + "epoch": 0.7, + "learning_rate": 1.7799131566159297e-05, + "loss": 0.4203, + "step": 15050 + }, + { + "epoch": 0.7, + "learning_rate": 1.779834778110451e-05, + "loss": 0.2288, + "step": 15055 + }, + { + "epoch": 0.7, + "learning_rate": 1.7797563996049725e-05, + "loss": 0.0967, + "step": 15060 + }, + { + "epoch": 0.7, + "learning_rate": 1.779678021099494e-05, + "loss": 0.1433, + "step": 15065 + }, + { + "epoch": 0.7, + "learning_rate": 1.7795996425940153e-05, + "loss": 0.1605, + "step": 15070 + }, + { + "epoch": 0.7, + "learning_rate": 1.7795212640885363e-05, + "loss": 0.1502, + "step": 15075 + }, + { + "epoch": 0.7, + "learning_rate": 1.7794428855830577e-05, + "loss": 0.2371, + "step": 15080 + }, + { + "epoch": 0.7, + "learning_rate": 1.779364507077579e-05, + "loss": 0.2546, + "step": 15085 + }, + { + "epoch": 0.7, + "learning_rate": 1.7792861285721005e-05, + "loss": 0.2453, + "step": 15090 + }, + { + "epoch": 0.7, + "learning_rate": 1.779207750066622e-05, + "loss": 0.3223, + "step": 15095 + }, + { + "epoch": 0.7, + "learning_rate": 1.7791293715611433e-05, + "loss": 0.5728, + "step": 15100 + }, + { + "epoch": 0.7, + "learning_rate": 1.7790509930556647e-05, + "loss": 0.1818, + "step": 15105 + }, + { + "epoch": 0.71, + "learning_rate": 1.778972614550186e-05, + "loss": 0.0977, + "step": 15110 + }, + { + "epoch": 0.71, + "learning_rate": 1.778894236044707e-05, + "loss": 0.0862, + "step": 15115 + }, + { + "epoch": 0.71, + "learning_rate": 1.778815857539229e-05, + "loss": 0.1162, + "step": 15120 + }, + { + "epoch": 0.71, + "learning_rate": 1.77873747903375e-05, + "loss": 0.1285, + "step": 15125 + }, + { + "epoch": 0.71, + "learning_rate": 1.7786591005282713e-05, + "loss": 0.1789, + "step": 15130 + }, + { + "epoch": 0.71, + "learning_rate": 1.7785807220227927e-05, + "loss": 0.1586, + "step": 15135 + }, + { + "epoch": 0.71, + "learning_rate": 1.7785023435173137e-05, + "loss": 0.2264, + "step": 15140 + }, + { + "epoch": 0.71, + "learning_rate": 1.7784239650118355e-05, + "loss": 0.3286, + "step": 15145 + }, + { + "epoch": 0.71, + "learning_rate": 1.7783455865063565e-05, + "loss": 0.5867, + "step": 15150 + }, + { + "epoch": 0.71, + "learning_rate": 1.778267208000878e-05, + "loss": 0.2071, + "step": 15155 + }, + { + "epoch": 0.71, + "learning_rate": 1.7781888294953993e-05, + "loss": 0.1373, + "step": 15160 + }, + { + "epoch": 0.71, + "learning_rate": 1.7781104509899207e-05, + "loss": 0.1818, + "step": 15165 + }, + { + "epoch": 0.71, + "learning_rate": 1.778032072484442e-05, + "loss": 0.1301, + "step": 15170 + }, + { + "epoch": 0.71, + "learning_rate": 1.7779536939789635e-05, + "loss": 0.161, + "step": 15175 + }, + { + "epoch": 0.71, + "learning_rate": 1.777875315473485e-05, + "loss": 0.3043, + "step": 15180 + }, + { + "epoch": 0.71, + "learning_rate": 1.7777969369680062e-05, + "loss": 0.3192, + "step": 15185 + }, + { + "epoch": 0.71, + "learning_rate": 1.7777185584625273e-05, + "loss": 0.263, + "step": 15190 + }, + { + "epoch": 0.71, + "learning_rate": 1.7776401799570487e-05, + "loss": 0.2928, + "step": 15195 + }, + { + "epoch": 0.71, + "learning_rate": 1.77756180145157e-05, + "loss": 0.866, + "step": 15200 + }, + { + "epoch": 0.71, + "learning_rate": 1.7774834229460915e-05, + "loss": 0.1849, + "step": 15205 + }, + { + "epoch": 0.71, + "learning_rate": 1.777405044440613e-05, + "loss": 0.0998, + "step": 15210 + }, + { + "epoch": 0.71, + "learning_rate": 1.777326665935134e-05, + "loss": 0.1643, + "step": 15215 + }, + { + "epoch": 0.71, + "learning_rate": 1.7772482874296556e-05, + "loss": 0.2084, + "step": 15220 + }, + { + "epoch": 0.71, + "learning_rate": 1.7771699089241767e-05, + "loss": 0.1575, + "step": 15225 + }, + { + "epoch": 0.71, + "learning_rate": 1.777091530418698e-05, + "loss": 0.132, + "step": 15230 + }, + { + "epoch": 0.71, + "learning_rate": 1.7770131519132195e-05, + "loss": 0.2206, + "step": 15235 + }, + { + "epoch": 0.71, + "learning_rate": 1.776934773407741e-05, + "loss": 0.2715, + "step": 15240 + }, + { + "epoch": 0.71, + "learning_rate": 1.7768563949022623e-05, + "loss": 0.3409, + "step": 15245 + }, + { + "epoch": 0.71, + "learning_rate": 1.7767780163967836e-05, + "loss": 0.5405, + "step": 15250 + }, + { + "epoch": 0.71, + "learning_rate": 1.7766996378913047e-05, + "loss": 0.3159, + "step": 15255 + }, + { + "epoch": 0.71, + "learning_rate": 1.776621259385826e-05, + "loss": 0.1621, + "step": 15260 + }, + { + "epoch": 0.71, + "learning_rate": 1.7765428808803475e-05, + "loss": 0.058, + "step": 15265 + }, + { + "epoch": 0.71, + "learning_rate": 1.776464502374869e-05, + "loss": 0.1118, + "step": 15270 + }, + { + "epoch": 0.71, + "learning_rate": 1.7763861238693903e-05, + "loss": 0.0801, + "step": 15275 + }, + { + "epoch": 0.71, + "learning_rate": 1.7763077453639117e-05, + "loss": 0.1691, + "step": 15280 + }, + { + "epoch": 0.71, + "learning_rate": 1.776229366858433e-05, + "loss": 0.1647, + "step": 15285 + }, + { + "epoch": 0.71, + "learning_rate": 1.776150988352954e-05, + "loss": 0.3036, + "step": 15290 + }, + { + "epoch": 0.71, + "learning_rate": 1.7760726098474755e-05, + "loss": 0.3638, + "step": 15295 + }, + { + "epoch": 0.71, + "learning_rate": 1.775994231341997e-05, + "loss": 0.6663, + "step": 15300 + }, + { + "epoch": 0.71, + "learning_rate": 1.7759158528365183e-05, + "loss": 0.2445, + "step": 15305 + }, + { + "epoch": 0.71, + "learning_rate": 1.7758374743310397e-05, + "loss": 0.0941, + "step": 15310 + }, + { + "epoch": 0.71, + "learning_rate": 1.775759095825561e-05, + "loss": 0.1141, + "step": 15315 + }, + { + "epoch": 0.71, + "learning_rate": 1.7756807173200824e-05, + "loss": 0.1723, + "step": 15320 + }, + { + "epoch": 0.72, + "learning_rate": 1.7756023388146035e-05, + "loss": 0.1063, + "step": 15325 + }, + { + "epoch": 0.72, + "learning_rate": 1.775523960309125e-05, + "loss": 0.2201, + "step": 15330 + }, + { + "epoch": 0.72, + "learning_rate": 1.7754455818036463e-05, + "loss": 0.2288, + "step": 15335 + }, + { + "epoch": 0.72, + "learning_rate": 1.7753672032981677e-05, + "loss": 0.2141, + "step": 15340 + }, + { + "epoch": 0.72, + "learning_rate": 1.775288824792689e-05, + "loss": 0.2616, + "step": 15345 + }, + { + "epoch": 0.72, + "learning_rate": 1.7752104462872104e-05, + "loss": 0.5344, + "step": 15350 + }, + { + "epoch": 0.72, + "learning_rate": 1.7751320677817315e-05, + "loss": 0.2139, + "step": 15355 + }, + { + "epoch": 0.72, + "learning_rate": 1.7750536892762532e-05, + "loss": 0.066, + "step": 15360 + }, + { + "epoch": 0.72, + "learning_rate": 1.7749753107707743e-05, + "loss": 0.1592, + "step": 15365 + }, + { + "epoch": 0.72, + "learning_rate": 1.7748969322652957e-05, + "loss": 0.1448, + "step": 15370 + }, + { + "epoch": 0.72, + "learning_rate": 1.774818553759817e-05, + "loss": 0.171, + "step": 15375 + }, + { + "epoch": 0.72, + "learning_rate": 1.7747401752543384e-05, + "loss": 0.181, + "step": 15380 + }, + { + "epoch": 0.72, + "learning_rate": 1.77466179674886e-05, + "loss": 0.2425, + "step": 15385 + }, + { + "epoch": 0.72, + "learning_rate": 1.774583418243381e-05, + "loss": 0.2223, + "step": 15390 + }, + { + "epoch": 0.72, + "learning_rate": 1.7745050397379023e-05, + "loss": 0.3635, + "step": 15395 + }, + { + "epoch": 0.72, + "learning_rate": 1.7744266612324237e-05, + "loss": 0.542, + "step": 15400 + }, + { + "epoch": 0.72, + "learning_rate": 1.774348282726945e-05, + "loss": 0.2035, + "step": 15405 + }, + { + "epoch": 0.72, + "learning_rate": 1.7742699042214665e-05, + "loss": 0.096, + "step": 15410 + }, + { + "epoch": 0.72, + "learning_rate": 1.774191525715988e-05, + "loss": 0.1612, + "step": 15415 + }, + { + "epoch": 0.72, + "learning_rate": 1.7741131472105092e-05, + "loss": 0.114, + "step": 15420 + }, + { + "epoch": 0.72, + "learning_rate": 1.7740347687050306e-05, + "loss": 0.0943, + "step": 15425 + }, + { + "epoch": 0.72, + "learning_rate": 1.7739563901995517e-05, + "loss": 0.1448, + "step": 15430 + }, + { + "epoch": 0.72, + "learning_rate": 1.7738780116940734e-05, + "loss": 0.2311, + "step": 15435 + }, + { + "epoch": 0.72, + "learning_rate": 1.7737996331885945e-05, + "loss": 0.1838, + "step": 15440 + }, + { + "epoch": 0.72, + "learning_rate": 1.773721254683116e-05, + "loss": 0.3578, + "step": 15445 + }, + { + "epoch": 0.72, + "learning_rate": 1.7736428761776372e-05, + "loss": 0.5445, + "step": 15450 + }, + { + "epoch": 0.72, + "learning_rate": 1.7735644976721583e-05, + "loss": 0.2114, + "step": 15455 + }, + { + "epoch": 0.72, + "learning_rate": 1.77348611916668e-05, + "loss": 0.0826, + "step": 15460 + }, + { + "epoch": 0.72, + "learning_rate": 1.773407740661201e-05, + "loss": 0.0514, + "step": 15465 + }, + { + "epoch": 0.72, + "learning_rate": 1.7733293621557225e-05, + "loss": 0.1493, + "step": 15470 + }, + { + "epoch": 0.72, + "learning_rate": 1.773250983650244e-05, + "loss": 0.1625, + "step": 15475 + }, + { + "epoch": 0.72, + "learning_rate": 1.7731726051447652e-05, + "loss": 0.3053, + "step": 15480 + }, + { + "epoch": 0.72, + "learning_rate": 1.7730942266392866e-05, + "loss": 0.1644, + "step": 15485 + }, + { + "epoch": 0.72, + "learning_rate": 1.773015848133808e-05, + "loss": 0.397, + "step": 15490 + }, + { + "epoch": 0.72, + "learning_rate": 1.7729374696283294e-05, + "loss": 0.5178, + "step": 15495 + }, + { + "epoch": 0.72, + "learning_rate": 1.7728590911228508e-05, + "loss": 0.588, + "step": 15500 + }, + { + "epoch": 0.72, + "learning_rate": 1.772780712617372e-05, + "loss": 0.219, + "step": 15505 + }, + { + "epoch": 0.72, + "learning_rate": 1.7727023341118932e-05, + "loss": 0.1002, + "step": 15510 + }, + { + "epoch": 0.72, + "learning_rate": 1.7726239556064146e-05, + "loss": 0.0972, + "step": 15515 + }, + { + "epoch": 0.72, + "learning_rate": 1.772545577100936e-05, + "loss": 0.1926, + "step": 15520 + }, + { + "epoch": 0.72, + "learning_rate": 1.7724671985954574e-05, + "loss": 0.1175, + "step": 15525 + }, + { + "epoch": 0.72, + "learning_rate": 1.7723888200899785e-05, + "loss": 0.2136, + "step": 15530 + }, + { + "epoch": 0.72, + "learning_rate": 1.7723104415845002e-05, + "loss": 0.3205, + "step": 15535 + }, + { + "epoch": 0.73, + "learning_rate": 1.7722320630790213e-05, + "loss": 0.2608, + "step": 15540 + }, + { + "epoch": 0.73, + "learning_rate": 1.7721536845735426e-05, + "loss": 0.2418, + "step": 15545 + }, + { + "epoch": 0.73, + "learning_rate": 1.772075306068064e-05, + "loss": 0.5699, + "step": 15550 + }, + { + "epoch": 0.73, + "learning_rate": 1.7719969275625854e-05, + "loss": 0.2461, + "step": 15555 + }, + { + "epoch": 0.73, + "learning_rate": 1.7719185490571068e-05, + "loss": 0.0584, + "step": 15560 + }, + { + "epoch": 0.73, + "learning_rate": 1.7718401705516282e-05, + "loss": 0.0991, + "step": 15565 + }, + { + "epoch": 0.73, + "learning_rate": 1.7717617920461493e-05, + "loss": 0.1333, + "step": 15570 + }, + { + "epoch": 0.73, + "learning_rate": 1.771683413540671e-05, + "loss": 0.1246, + "step": 15575 + }, + { + "epoch": 0.73, + "learning_rate": 1.771605035035192e-05, + "loss": 0.1544, + "step": 15580 + }, + { + "epoch": 0.73, + "learning_rate": 1.7715266565297134e-05, + "loss": 0.2113, + "step": 15585 + }, + { + "epoch": 0.73, + "learning_rate": 1.7714482780242348e-05, + "loss": 0.2301, + "step": 15590 + }, + { + "epoch": 0.73, + "learning_rate": 1.7713698995187562e-05, + "loss": 0.2443, + "step": 15595 + }, + { + "epoch": 0.73, + "learning_rate": 1.7712915210132776e-05, + "loss": 0.5969, + "step": 15600 + }, + { + "epoch": 0.73, + "learning_rate": 1.7712131425077986e-05, + "loss": 0.2402, + "step": 15605 + }, + { + "epoch": 0.73, + "learning_rate": 1.77113476400232e-05, + "loss": 0.1322, + "step": 15610 + }, + { + "epoch": 0.73, + "learning_rate": 1.7710563854968414e-05, + "loss": 0.1391, + "step": 15615 + }, + { + "epoch": 0.73, + "learning_rate": 1.7709780069913628e-05, + "loss": 0.1207, + "step": 15620 + }, + { + "epoch": 0.73, + "learning_rate": 1.7708996284858842e-05, + "loss": 0.2047, + "step": 15625 + }, + { + "epoch": 0.73, + "learning_rate": 1.7708212499804056e-05, + "loss": 0.2101, + "step": 15630 + }, + { + "epoch": 0.73, + "learning_rate": 1.770742871474927e-05, + "loss": 0.2683, + "step": 15635 + }, + { + "epoch": 0.73, + "learning_rate": 1.7706644929694484e-05, + "loss": 0.2237, + "step": 15640 + }, + { + "epoch": 0.73, + "learning_rate": 1.7705861144639694e-05, + "loss": 0.3521, + "step": 15645 + }, + { + "epoch": 0.73, + "learning_rate": 1.7705077359584908e-05, + "loss": 0.5843, + "step": 15650 + }, + { + "epoch": 0.73, + "learning_rate": 1.7704293574530122e-05, + "loss": 0.1719, + "step": 15655 + }, + { + "epoch": 0.73, + "learning_rate": 1.7703509789475336e-05, + "loss": 0.1001, + "step": 15660 + }, + { + "epoch": 0.73, + "learning_rate": 1.770272600442055e-05, + "loss": 0.0878, + "step": 15665 + }, + { + "epoch": 0.73, + "learning_rate": 1.770194221936576e-05, + "loss": 0.1989, + "step": 15670 + }, + { + "epoch": 0.73, + "learning_rate": 1.7701158434310978e-05, + "loss": 0.1543, + "step": 15675 + }, + { + "epoch": 0.73, + "learning_rate": 1.770037464925619e-05, + "loss": 0.1588, + "step": 15680 + }, + { + "epoch": 0.73, + "learning_rate": 1.7699590864201402e-05, + "loss": 0.3029, + "step": 15685 + }, + { + "epoch": 0.73, + "learning_rate": 1.7698807079146616e-05, + "loss": 0.2396, + "step": 15690 + }, + { + "epoch": 0.73, + "learning_rate": 1.769802329409183e-05, + "loss": 0.357, + "step": 15695 + }, + { + "epoch": 0.73, + "learning_rate": 1.7697239509037044e-05, + "loss": 0.6507, + "step": 15700 + }, + { + "epoch": 0.73, + "learning_rate": 1.7696455723982258e-05, + "loss": 0.2289, + "step": 15705 + }, + { + "epoch": 0.73, + "learning_rate": 1.769567193892747e-05, + "loss": 0.0816, + "step": 15710 + }, + { + "epoch": 0.73, + "learning_rate": 1.7694888153872682e-05, + "loss": 0.0891, + "step": 15715 + }, + { + "epoch": 0.73, + "learning_rate": 1.7694104368817896e-05, + "loss": 0.1736, + "step": 15720 + }, + { + "epoch": 0.73, + "learning_rate": 1.769332058376311e-05, + "loss": 0.2008, + "step": 15725 + }, + { + "epoch": 0.73, + "learning_rate": 1.7692536798708324e-05, + "loss": 0.199, + "step": 15730 + }, + { + "epoch": 0.73, + "learning_rate": 1.7691753013653538e-05, + "loss": 0.255, + "step": 15735 + }, + { + "epoch": 0.73, + "learning_rate": 1.7690969228598752e-05, + "loss": 0.2359, + "step": 15740 + }, + { + "epoch": 0.73, + "learning_rate": 1.7690185443543962e-05, + "loss": 0.3103, + "step": 15745 + }, + { + "epoch": 0.73, + "learning_rate": 1.768940165848918e-05, + "loss": 0.5922, + "step": 15750 + }, + { + "epoch": 0.74, + "learning_rate": 1.768861787343439e-05, + "loss": 0.2138, + "step": 15755 + }, + { + "epoch": 0.74, + "learning_rate": 1.7687834088379604e-05, + "loss": 0.1087, + "step": 15760 + }, + { + "epoch": 0.74, + "learning_rate": 1.7687050303324818e-05, + "loss": 0.0963, + "step": 15765 + }, + { + "epoch": 0.74, + "learning_rate": 1.7686266518270032e-05, + "loss": 0.0777, + "step": 15770 + }, + { + "epoch": 0.74, + "learning_rate": 1.7685482733215246e-05, + "loss": 0.1607, + "step": 15775 + }, + { + "epoch": 0.74, + "learning_rate": 1.7684698948160456e-05, + "loss": 0.1585, + "step": 15780 + }, + { + "epoch": 0.74, + "learning_rate": 1.768391516310567e-05, + "loss": 0.2127, + "step": 15785 + }, + { + "epoch": 0.74, + "learning_rate": 1.7683131378050884e-05, + "loss": 0.2459, + "step": 15790 + }, + { + "epoch": 0.74, + "learning_rate": 1.7682347592996098e-05, + "loss": 0.3235, + "step": 15795 + }, + { + "epoch": 0.74, + "learning_rate": 1.7681563807941312e-05, + "loss": 0.5031, + "step": 15800 + }, + { + "epoch": 0.74, + "learning_rate": 1.7680780022886526e-05, + "loss": 0.2883, + "step": 15805 + }, + { + "epoch": 0.74, + "learning_rate": 1.767999623783174e-05, + "loss": 0.1161, + "step": 15810 + }, + { + "epoch": 0.74, + "learning_rate": 1.7679212452776954e-05, + "loss": 0.117, + "step": 15815 + }, + { + "epoch": 0.74, + "learning_rate": 1.7678428667722164e-05, + "loss": 0.1198, + "step": 15820 + }, + { + "epoch": 0.74, + "learning_rate": 1.7677644882667378e-05, + "loss": 0.1703, + "step": 15825 + }, + { + "epoch": 0.74, + "learning_rate": 1.7676861097612592e-05, + "loss": 0.204, + "step": 15830 + }, + { + "epoch": 0.74, + "learning_rate": 1.7676077312557806e-05, + "loss": 0.1478, + "step": 15835 + }, + { + "epoch": 0.74, + "learning_rate": 1.767529352750302e-05, + "loss": 0.2569, + "step": 15840 + }, + { + "epoch": 0.74, + "learning_rate": 1.767450974244823e-05, + "loss": 0.2254, + "step": 15845 + }, + { + "epoch": 0.74, + "learning_rate": 1.7673725957393448e-05, + "loss": 0.5472, + "step": 15850 + }, + { + "epoch": 0.74, + "learning_rate": 1.7672942172338658e-05, + "loss": 0.2518, + "step": 15855 + }, + { + "epoch": 0.74, + "learning_rate": 1.7672158387283872e-05, + "loss": 0.0926, + "step": 15860 + }, + { + "epoch": 0.74, + "learning_rate": 1.7671374602229086e-05, + "loss": 0.1318, + "step": 15865 + }, + { + "epoch": 0.74, + "learning_rate": 1.76705908171743e-05, + "loss": 0.1401, + "step": 15870 + }, + { + "epoch": 0.74, + "learning_rate": 1.7669807032119514e-05, + "loss": 0.1579, + "step": 15875 + }, + { + "epoch": 0.74, + "learning_rate": 1.7669023247064728e-05, + "loss": 0.139, + "step": 15880 + }, + { + "epoch": 0.74, + "learning_rate": 1.7668239462009938e-05, + "loss": 0.17, + "step": 15885 + }, + { + "epoch": 0.74, + "learning_rate": 1.7667455676955155e-05, + "loss": 0.2695, + "step": 15890 + }, + { + "epoch": 0.74, + "learning_rate": 1.7666671891900366e-05, + "loss": 0.2543, + "step": 15895 + }, + { + "epoch": 0.74, + "learning_rate": 1.766588810684558e-05, + "loss": 0.4279, + "step": 15900 + }, + { + "epoch": 0.74, + "learning_rate": 1.7665104321790794e-05, + "loss": 0.2844, + "step": 15905 + }, + { + "epoch": 0.74, + "learning_rate": 1.7664320536736008e-05, + "loss": 0.1015, + "step": 15910 + }, + { + "epoch": 0.74, + "learning_rate": 1.766353675168122e-05, + "loss": 0.1068, + "step": 15915 + }, + { + "epoch": 0.74, + "learning_rate": 1.7662752966626432e-05, + "loss": 0.1115, + "step": 15920 + }, + { + "epoch": 0.74, + "learning_rate": 1.7661969181571646e-05, + "loss": 0.1674, + "step": 15925 + }, + { + "epoch": 0.74, + "learning_rate": 1.766118539651686e-05, + "loss": 0.1759, + "step": 15930 + }, + { + "epoch": 0.74, + "learning_rate": 1.7660401611462074e-05, + "loss": 0.2252, + "step": 15935 + }, + { + "epoch": 0.74, + "learning_rate": 1.7659617826407288e-05, + "loss": 0.2375, + "step": 15940 + }, + { + "epoch": 0.74, + "learning_rate": 1.76588340413525e-05, + "loss": 0.3769, + "step": 15945 + }, + { + "epoch": 0.74, + "learning_rate": 1.7658050256297716e-05, + "loss": 0.4495, + "step": 15950 + }, + { + "epoch": 0.74, + "learning_rate": 1.765726647124293e-05, + "loss": 0.1948, + "step": 15955 + }, + { + "epoch": 0.74, + "learning_rate": 1.765648268618814e-05, + "loss": 0.0774, + "step": 15960 + }, + { + "epoch": 0.74, + "learning_rate": 1.7655698901133357e-05, + "loss": 0.0989, + "step": 15965 + }, + { + "epoch": 0.75, + "learning_rate": 1.7654915116078568e-05, + "loss": 0.0939, + "step": 15970 + }, + { + "epoch": 0.75, + "learning_rate": 1.765413133102378e-05, + "loss": 0.2203, + "step": 15975 + }, + { + "epoch": 0.75, + "learning_rate": 1.7653347545968996e-05, + "loss": 0.2015, + "step": 15980 + }, + { + "epoch": 0.75, + "learning_rate": 1.7652563760914206e-05, + "loss": 0.193, + "step": 15985 + }, + { + "epoch": 0.75, + "learning_rate": 1.7651779975859423e-05, + "loss": 0.2686, + "step": 15990 + }, + { + "epoch": 0.75, + "learning_rate": 1.7650996190804634e-05, + "loss": 0.213, + "step": 15995 + }, + { + "epoch": 0.75, + "learning_rate": 1.7650212405749848e-05, + "loss": 0.476, + "step": 16000 + }, + { + "epoch": 0.75, + "learning_rate": 1.764942862069506e-05, + "loss": 0.2164, + "step": 16005 + }, + { + "epoch": 0.75, + "learning_rate": 1.7648644835640276e-05, + "loss": 0.0558, + "step": 16010 + }, + { + "epoch": 0.75, + "learning_rate": 1.764786105058549e-05, + "loss": 0.1303, + "step": 16015 + }, + { + "epoch": 0.75, + "learning_rate": 1.7647077265530703e-05, + "loss": 0.1231, + "step": 16020 + }, + { + "epoch": 0.75, + "learning_rate": 1.7646293480475914e-05, + "loss": 0.1846, + "step": 16025 + }, + { + "epoch": 0.75, + "learning_rate": 1.764550969542113e-05, + "loss": 0.1873, + "step": 16030 + }, + { + "epoch": 0.75, + "learning_rate": 1.7644725910366342e-05, + "loss": 0.1345, + "step": 16035 + }, + { + "epoch": 0.75, + "learning_rate": 1.7643942125311556e-05, + "loss": 0.2916, + "step": 16040 + }, + { + "epoch": 0.75, + "learning_rate": 1.764315834025677e-05, + "loss": 0.3561, + "step": 16045 + }, + { + "epoch": 0.75, + "learning_rate": 1.7642374555201983e-05, + "loss": 0.5833, + "step": 16050 + }, + { + "epoch": 0.75, + "learning_rate": 1.7641590770147197e-05, + "loss": 0.2464, + "step": 16055 + }, + { + "epoch": 0.75, + "learning_rate": 1.7640806985092408e-05, + "loss": 0.0829, + "step": 16060 + }, + { + "epoch": 0.75, + "learning_rate": 1.7640023200037625e-05, + "loss": 0.1179, + "step": 16065 + }, + { + "epoch": 0.75, + "learning_rate": 1.7639239414982836e-05, + "loss": 0.1629, + "step": 16070 + }, + { + "epoch": 0.75, + "learning_rate": 1.763845562992805e-05, + "loss": 0.1615, + "step": 16075 + }, + { + "epoch": 0.75, + "learning_rate": 1.7637671844873264e-05, + "loss": 0.2043, + "step": 16080 + }, + { + "epoch": 0.75, + "learning_rate": 1.7636888059818477e-05, + "loss": 0.2283, + "step": 16085 + }, + { + "epoch": 0.75, + "learning_rate": 1.763610427476369e-05, + "loss": 0.306, + "step": 16090 + }, + { + "epoch": 0.75, + "learning_rate": 1.7635320489708905e-05, + "loss": 0.4119, + "step": 16095 + }, + { + "epoch": 0.75, + "learning_rate": 1.7634536704654116e-05, + "loss": 0.4373, + "step": 16100 + }, + { + "epoch": 0.75, + "learning_rate": 1.763375291959933e-05, + "loss": 0.1972, + "step": 16105 + }, + { + "epoch": 0.75, + "learning_rate": 1.7632969134544544e-05, + "loss": 0.0756, + "step": 16110 + }, + { + "epoch": 0.75, + "learning_rate": 1.7632185349489757e-05, + "loss": 0.1467, + "step": 16115 + }, + { + "epoch": 0.75, + "learning_rate": 1.763140156443497e-05, + "loss": 0.1195, + "step": 16120 + }, + { + "epoch": 0.75, + "learning_rate": 1.7630617779380185e-05, + "loss": 0.1707, + "step": 16125 + }, + { + "epoch": 0.75, + "learning_rate": 1.76298339943254e-05, + "loss": 0.2611, + "step": 16130 + }, + { + "epoch": 0.75, + "learning_rate": 1.762905020927061e-05, + "loss": 0.2035, + "step": 16135 + }, + { + "epoch": 0.75, + "learning_rate": 1.7628266424215824e-05, + "loss": 0.2931, + "step": 16140 + }, + { + "epoch": 0.75, + "learning_rate": 1.7627482639161038e-05, + "loss": 0.3166, + "step": 16145 + }, + { + "epoch": 0.75, + "learning_rate": 1.762669885410625e-05, + "loss": 0.4894, + "step": 16150 + }, + { + "epoch": 0.75, + "learning_rate": 1.7625915069051465e-05, + "loss": 0.2469, + "step": 16155 + }, + { + "epoch": 0.75, + "learning_rate": 1.762513128399668e-05, + "loss": 0.091, + "step": 16160 + }, + { + "epoch": 0.75, + "learning_rate": 1.7624347498941893e-05, + "loss": 0.1252, + "step": 16165 + }, + { + "epoch": 0.75, + "learning_rate": 1.7623563713887104e-05, + "loss": 0.1507, + "step": 16170 + }, + { + "epoch": 0.75, + "learning_rate": 1.7622779928832318e-05, + "loss": 0.0806, + "step": 16175 + }, + { + "epoch": 0.75, + "learning_rate": 1.762199614377753e-05, + "loss": 0.1723, + "step": 16180 + }, + { + "epoch": 0.76, + "learning_rate": 1.7621212358722745e-05, + "loss": 0.1738, + "step": 16185 + }, + { + "epoch": 0.76, + "learning_rate": 1.762042857366796e-05, + "loss": 0.2276, + "step": 16190 + }, + { + "epoch": 0.76, + "learning_rate": 1.7619644788613173e-05, + "loss": 0.3772, + "step": 16195 + }, + { + "epoch": 0.76, + "learning_rate": 1.7618861003558384e-05, + "loss": 0.5853, + "step": 16200 + }, + { + "epoch": 0.76, + "learning_rate": 1.76180772185036e-05, + "loss": 0.2295, + "step": 16205 + }, + { + "epoch": 0.76, + "learning_rate": 1.761729343344881e-05, + "loss": 0.0793, + "step": 16210 + }, + { + "epoch": 0.76, + "learning_rate": 1.7616509648394025e-05, + "loss": 0.1744, + "step": 16215 + }, + { + "epoch": 0.76, + "learning_rate": 1.761572586333924e-05, + "loss": 0.1258, + "step": 16220 + }, + { + "epoch": 0.76, + "learning_rate": 1.7614942078284453e-05, + "loss": 0.1278, + "step": 16225 + }, + { + "epoch": 0.76, + "learning_rate": 1.7614158293229667e-05, + "loss": 0.1408, + "step": 16230 + }, + { + "epoch": 0.76, + "learning_rate": 1.7613374508174878e-05, + "loss": 0.176, + "step": 16235 + }, + { + "epoch": 0.76, + "learning_rate": 1.761259072312009e-05, + "loss": 0.2184, + "step": 16240 + }, + { + "epoch": 0.76, + "learning_rate": 1.7611806938065305e-05, + "loss": 0.4551, + "step": 16245 + }, + { + "epoch": 0.76, + "learning_rate": 1.761102315301052e-05, + "loss": 0.4999, + "step": 16250 + }, + { + "epoch": 0.76, + "learning_rate": 1.7610239367955733e-05, + "loss": 0.1799, + "step": 16255 + }, + { + "epoch": 0.76, + "learning_rate": 1.7609455582900947e-05, + "loss": 0.0511, + "step": 16260 + }, + { + "epoch": 0.76, + "learning_rate": 1.760867179784616e-05, + "loss": 0.0712, + "step": 16265 + }, + { + "epoch": 0.76, + "learning_rate": 1.7607888012791375e-05, + "loss": 0.1195, + "step": 16270 + }, + { + "epoch": 0.76, + "learning_rate": 1.7607104227736585e-05, + "loss": 0.1133, + "step": 16275 + }, + { + "epoch": 0.76, + "learning_rate": 1.7606320442681803e-05, + "loss": 0.18, + "step": 16280 + }, + { + "epoch": 0.76, + "learning_rate": 1.7605536657627013e-05, + "loss": 0.2426, + "step": 16285 + }, + { + "epoch": 0.76, + "learning_rate": 1.7604752872572227e-05, + "loss": 0.252, + "step": 16290 + }, + { + "epoch": 0.76, + "learning_rate": 1.760396908751744e-05, + "loss": 0.4202, + "step": 16295 + }, + { + "epoch": 0.76, + "learning_rate": 1.760318530246265e-05, + "loss": 0.645, + "step": 16300 + }, + { + "epoch": 0.76, + "learning_rate": 1.760240151740787e-05, + "loss": 0.2253, + "step": 16305 + }, + { + "epoch": 0.76, + "learning_rate": 1.760161773235308e-05, + "loss": 0.0957, + "step": 16310 + }, + { + "epoch": 0.76, + "learning_rate": 1.7600833947298293e-05, + "loss": 0.0851, + "step": 16315 + }, + { + "epoch": 0.76, + "learning_rate": 1.7600050162243507e-05, + "loss": 0.1494, + "step": 16320 + }, + { + "epoch": 0.76, + "learning_rate": 1.759926637718872e-05, + "loss": 0.2095, + "step": 16325 + }, + { + "epoch": 0.76, + "learning_rate": 1.7598482592133935e-05, + "loss": 0.1371, + "step": 16330 + }, + { + "epoch": 0.76, + "learning_rate": 1.759769880707915e-05, + "loss": 0.2431, + "step": 16335 + }, + { + "epoch": 0.76, + "learning_rate": 1.7596915022024363e-05, + "loss": 0.2534, + "step": 16340 + }, + { + "epoch": 0.76, + "learning_rate": 1.7596131236969577e-05, + "loss": 0.2616, + "step": 16345 + }, + { + "epoch": 0.76, + "learning_rate": 1.7595347451914787e-05, + "loss": 0.5365, + "step": 16350 + }, + { + "epoch": 0.76, + "learning_rate": 1.759456366686e-05, + "loss": 0.1913, + "step": 16355 + }, + { + "epoch": 0.76, + "learning_rate": 1.7593779881805215e-05, + "loss": 0.0728, + "step": 16360 + }, + { + "epoch": 0.76, + "learning_rate": 1.759299609675043e-05, + "loss": 0.1707, + "step": 16365 + }, + { + "epoch": 0.76, + "learning_rate": 1.7592212311695643e-05, + "loss": 0.2359, + "step": 16370 + }, + { + "epoch": 0.76, + "learning_rate": 1.7591428526640853e-05, + "loss": 0.1137, + "step": 16375 + }, + { + "epoch": 0.76, + "learning_rate": 1.759064474158607e-05, + "loss": 0.1787, + "step": 16380 + }, + { + "epoch": 0.76, + "learning_rate": 1.758986095653128e-05, + "loss": 0.1674, + "step": 16385 + }, + { + "epoch": 0.76, + "learning_rate": 1.7589077171476495e-05, + "loss": 0.1887, + "step": 16390 + }, + { + "epoch": 0.77, + "learning_rate": 1.758829338642171e-05, + "loss": 0.4233, + "step": 16395 + }, + { + "epoch": 0.77, + "learning_rate": 1.7587509601366923e-05, + "loss": 0.5711, + "step": 16400 + }, + { + "epoch": 0.77, + "learning_rate": 1.7586725816312137e-05, + "loss": 0.1733, + "step": 16405 + }, + { + "epoch": 0.77, + "learning_rate": 1.758594203125735e-05, + "loss": 0.0874, + "step": 16410 + }, + { + "epoch": 0.77, + "learning_rate": 1.758515824620256e-05, + "loss": 0.15, + "step": 16415 + }, + { + "epoch": 0.77, + "learning_rate": 1.758437446114778e-05, + "loss": 0.1558, + "step": 16420 + }, + { + "epoch": 0.77, + "learning_rate": 1.758359067609299e-05, + "loss": 0.1326, + "step": 16425 + }, + { + "epoch": 0.77, + "learning_rate": 1.7582806891038203e-05, + "loss": 0.196, + "step": 16430 + }, + { + "epoch": 0.77, + "learning_rate": 1.7582023105983417e-05, + "loss": 0.1857, + "step": 16435 + }, + { + "epoch": 0.77, + "learning_rate": 1.758123932092863e-05, + "loss": 0.2995, + "step": 16440 + }, + { + "epoch": 0.77, + "learning_rate": 1.7580455535873845e-05, + "loss": 0.4498, + "step": 16445 + }, + { + "epoch": 0.77, + "learning_rate": 1.7579671750819055e-05, + "loss": 0.6452, + "step": 16450 + }, + { + "epoch": 0.77, + "learning_rate": 1.757888796576427e-05, + "loss": 0.2235, + "step": 16455 + }, + { + "epoch": 0.77, + "learning_rate": 1.7578104180709483e-05, + "loss": 0.0881, + "step": 16460 + }, + { + "epoch": 0.77, + "learning_rate": 1.7577320395654697e-05, + "loss": 0.063, + "step": 16465 + }, + { + "epoch": 0.77, + "learning_rate": 1.757653661059991e-05, + "loss": 0.1451, + "step": 16470 + }, + { + "epoch": 0.77, + "learning_rate": 1.7575752825545125e-05, + "loss": 0.1652, + "step": 16475 + }, + { + "epoch": 0.77, + "learning_rate": 1.757496904049034e-05, + "loss": 0.1904, + "step": 16480 + }, + { + "epoch": 0.77, + "learning_rate": 1.7574185255435553e-05, + "loss": 0.253, + "step": 16485 + }, + { + "epoch": 0.77, + "learning_rate": 1.7573401470380763e-05, + "loss": 0.1778, + "step": 16490 + }, + { + "epoch": 0.77, + "learning_rate": 1.7572617685325977e-05, + "loss": 0.4833, + "step": 16495 + }, + { + "epoch": 0.77, + "learning_rate": 1.757183390027119e-05, + "loss": 0.4754, + "step": 16500 + }, + { + "epoch": 0.77, + "learning_rate": 1.7571050115216405e-05, + "loss": 0.1547, + "step": 16505 + }, + { + "epoch": 0.77, + "learning_rate": 1.757026633016162e-05, + "loss": 0.0813, + "step": 16510 + }, + { + "epoch": 0.77, + "learning_rate": 1.756948254510683e-05, + "loss": 0.1433, + "step": 16515 + }, + { + "epoch": 0.77, + "learning_rate": 1.7568698760052047e-05, + "loss": 0.1179, + "step": 16520 + }, + { + "epoch": 0.77, + "learning_rate": 1.7567914974997257e-05, + "loss": 0.1643, + "step": 16525 + }, + { + "epoch": 0.77, + "learning_rate": 1.756713118994247e-05, + "loss": 0.1876, + "step": 16530 + }, + { + "epoch": 0.77, + "learning_rate": 1.7566347404887685e-05, + "loss": 0.1914, + "step": 16535 + }, + { + "epoch": 0.77, + "learning_rate": 1.75655636198329e-05, + "loss": 0.2733, + "step": 16540 + }, + { + "epoch": 0.77, + "learning_rate": 1.7564779834778113e-05, + "loss": 0.3474, + "step": 16545 + }, + { + "epoch": 0.77, + "learning_rate": 1.7563996049723327e-05, + "loss": 0.5545, + "step": 16550 + }, + { + "epoch": 0.77, + "learning_rate": 1.7563212264668537e-05, + "loss": 0.251, + "step": 16555 + }, + { + "epoch": 0.77, + "learning_rate": 1.756242847961375e-05, + "loss": 0.0742, + "step": 16560 + }, + { + "epoch": 0.77, + "learning_rate": 1.7561644694558965e-05, + "loss": 0.0762, + "step": 16565 + }, + { + "epoch": 0.77, + "learning_rate": 1.756086090950418e-05, + "loss": 0.1709, + "step": 16570 + }, + { + "epoch": 0.77, + "learning_rate": 1.7560077124449393e-05, + "loss": 0.1702, + "step": 16575 + }, + { + "epoch": 0.77, + "learning_rate": 1.7559293339394607e-05, + "loss": 0.2099, + "step": 16580 + }, + { + "epoch": 0.77, + "learning_rate": 1.755850955433982e-05, + "loss": 0.311, + "step": 16585 + }, + { + "epoch": 0.77, + "learning_rate": 1.755772576928503e-05, + "loss": 0.2829, + "step": 16590 + }, + { + "epoch": 0.77, + "learning_rate": 1.755694198423025e-05, + "loss": 0.3343, + "step": 16595 + }, + { + "epoch": 0.77, + "learning_rate": 1.755615819917546e-05, + "loss": 0.3147, + "step": 16600 + }, + { + "epoch": 0.77, + "learning_rate": 1.7555374414120673e-05, + "loss": 0.242, + "step": 16605 + }, + { + "epoch": 0.78, + "learning_rate": 1.7554590629065887e-05, + "loss": 0.0859, + "step": 16610 + }, + { + "epoch": 0.78, + "learning_rate": 1.75538068440111e-05, + "loss": 0.1072, + "step": 16615 + }, + { + "epoch": 0.78, + "learning_rate": 1.7553023058956315e-05, + "loss": 0.1143, + "step": 16620 + }, + { + "epoch": 0.78, + "learning_rate": 1.7552239273901525e-05, + "loss": 0.1676, + "step": 16625 + }, + { + "epoch": 0.78, + "learning_rate": 1.755145548884674e-05, + "loss": 0.1877, + "step": 16630 + }, + { + "epoch": 0.78, + "learning_rate": 1.7550671703791953e-05, + "loss": 0.2769, + "step": 16635 + }, + { + "epoch": 0.78, + "learning_rate": 1.7549887918737167e-05, + "loss": 0.2055, + "step": 16640 + }, + { + "epoch": 0.78, + "learning_rate": 1.754910413368238e-05, + "loss": 0.1681, + "step": 16645 + }, + { + "epoch": 0.78, + "learning_rate": 1.7548320348627595e-05, + "loss": 0.5035, + "step": 16650 + }, + { + "epoch": 0.78, + "learning_rate": 1.754753656357281e-05, + "loss": 0.251, + "step": 16655 + }, + { + "epoch": 0.78, + "learning_rate": 1.7546752778518022e-05, + "loss": 0.0733, + "step": 16660 + }, + { + "epoch": 0.78, + "learning_rate": 1.7545968993463233e-05, + "loss": 0.0622, + "step": 16665 + }, + { + "epoch": 0.78, + "learning_rate": 1.7545185208408447e-05, + "loss": 0.186, + "step": 16670 + }, + { + "epoch": 0.78, + "learning_rate": 1.754440142335366e-05, + "loss": 0.1875, + "step": 16675 + }, + { + "epoch": 0.78, + "learning_rate": 1.7543617638298875e-05, + "loss": 0.1456, + "step": 16680 + }, + { + "epoch": 0.78, + "learning_rate": 1.754283385324409e-05, + "loss": 0.1846, + "step": 16685 + }, + { + "epoch": 0.78, + "learning_rate": 1.75420500681893e-05, + "loss": 0.2411, + "step": 16690 + }, + { + "epoch": 0.78, + "learning_rate": 1.7541266283134516e-05, + "loss": 0.2714, + "step": 16695 + }, + { + "epoch": 0.78, + "learning_rate": 1.7540482498079727e-05, + "loss": 0.5341, + "step": 16700 + }, + { + "epoch": 0.78, + "learning_rate": 1.753969871302494e-05, + "loss": 0.2582, + "step": 16705 + }, + { + "epoch": 0.78, + "learning_rate": 1.7538914927970155e-05, + "loss": 0.0639, + "step": 16710 + }, + { + "epoch": 0.78, + "learning_rate": 1.753813114291537e-05, + "loss": 0.091, + "step": 16715 + }, + { + "epoch": 0.78, + "learning_rate": 1.7537347357860582e-05, + "loss": 0.1561, + "step": 16720 + }, + { + "epoch": 0.78, + "learning_rate": 1.7536563572805796e-05, + "loss": 0.167, + "step": 16725 + }, + { + "epoch": 0.78, + "learning_rate": 1.7535779787751007e-05, + "loss": 0.1176, + "step": 16730 + }, + { + "epoch": 0.78, + "learning_rate": 1.7534996002696224e-05, + "loss": 0.1937, + "step": 16735 + }, + { + "epoch": 0.78, + "learning_rate": 1.7534212217641435e-05, + "loss": 0.1984, + "step": 16740 + }, + { + "epoch": 0.78, + "learning_rate": 1.753342843258665e-05, + "loss": 0.2591, + "step": 16745 + }, + { + "epoch": 0.78, + "learning_rate": 1.7532644647531863e-05, + "loss": 0.4628, + "step": 16750 + }, + { + "epoch": 0.78, + "learning_rate": 1.7531860862477076e-05, + "loss": 0.1609, + "step": 16755 + }, + { + "epoch": 0.78, + "learning_rate": 1.753107707742229e-05, + "loss": 0.0979, + "step": 16760 + }, + { + "epoch": 0.78, + "learning_rate": 1.75302932923675e-05, + "loss": 0.1462, + "step": 16765 + }, + { + "epoch": 0.78, + "learning_rate": 1.7529509507312715e-05, + "loss": 0.163, + "step": 16770 + }, + { + "epoch": 0.78, + "learning_rate": 1.752872572225793e-05, + "loss": 0.1616, + "step": 16775 + }, + { + "epoch": 0.78, + "learning_rate": 1.7527941937203143e-05, + "loss": 0.1537, + "step": 16780 + }, + { + "epoch": 0.78, + "learning_rate": 1.7527158152148356e-05, + "loss": 0.1465, + "step": 16785 + }, + { + "epoch": 0.78, + "learning_rate": 1.752637436709357e-05, + "loss": 0.2385, + "step": 16790 + }, + { + "epoch": 0.78, + "learning_rate": 1.7525590582038784e-05, + "loss": 0.3273, + "step": 16795 + }, + { + "epoch": 0.78, + "learning_rate": 1.7524806796983998e-05, + "loss": 0.5533, + "step": 16800 + }, + { + "epoch": 0.78, + "learning_rate": 1.752402301192921e-05, + "loss": 0.1391, + "step": 16805 + }, + { + "epoch": 0.78, + "learning_rate": 1.7523239226874426e-05, + "loss": 0.0745, + "step": 16810 + }, + { + "epoch": 0.78, + "learning_rate": 1.7522455441819636e-05, + "loss": 0.1043, + "step": 16815 + }, + { + "epoch": 0.78, + "learning_rate": 1.752167165676485e-05, + "loss": 0.1905, + "step": 16820 + }, + { + "epoch": 0.79, + "learning_rate": 1.7520887871710064e-05, + "loss": 0.1423, + "step": 16825 + }, + { + "epoch": 0.79, + "learning_rate": 1.7520104086655275e-05, + "loss": 0.0993, + "step": 16830 + }, + { + "epoch": 0.79, + "learning_rate": 1.7519320301600492e-05, + "loss": 0.2252, + "step": 16835 + }, + { + "epoch": 0.79, + "learning_rate": 1.7518536516545703e-05, + "loss": 0.2168, + "step": 16840 + }, + { + "epoch": 0.79, + "learning_rate": 1.7517752731490917e-05, + "loss": 0.3685, + "step": 16845 + }, + { + "epoch": 0.79, + "learning_rate": 1.751696894643613e-05, + "loss": 0.428, + "step": 16850 + }, + { + "epoch": 0.79, + "learning_rate": 1.7516185161381344e-05, + "loss": 0.2561, + "step": 16855 + }, + { + "epoch": 0.79, + "learning_rate": 1.7515401376326558e-05, + "loss": 0.0574, + "step": 16860 + }, + { + "epoch": 0.79, + "learning_rate": 1.7514617591271772e-05, + "loss": 0.1081, + "step": 16865 + }, + { + "epoch": 0.79, + "learning_rate": 1.7513833806216983e-05, + "loss": 0.1428, + "step": 16870 + }, + { + "epoch": 0.79, + "learning_rate": 1.75130500211622e-05, + "loss": 0.1128, + "step": 16875 + }, + { + "epoch": 0.79, + "learning_rate": 1.751226623610741e-05, + "loss": 0.2477, + "step": 16880 + }, + { + "epoch": 0.79, + "learning_rate": 1.7511482451052624e-05, + "loss": 0.2166, + "step": 16885 + }, + { + "epoch": 0.79, + "learning_rate": 1.751069866599784e-05, + "loss": 0.2552, + "step": 16890 + }, + { + "epoch": 0.79, + "learning_rate": 1.7509914880943052e-05, + "loss": 0.2411, + "step": 16895 + }, + { + "epoch": 0.79, + "learning_rate": 1.7509131095888266e-05, + "loss": 0.4488, + "step": 16900 + }, + { + "epoch": 0.79, + "learning_rate": 1.7508347310833477e-05, + "loss": 0.2285, + "step": 16905 + }, + { + "epoch": 0.79, + "learning_rate": 1.7507563525778694e-05, + "loss": 0.0925, + "step": 16910 + }, + { + "epoch": 0.79, + "learning_rate": 1.7506779740723904e-05, + "loss": 0.1007, + "step": 16915 + }, + { + "epoch": 0.79, + "learning_rate": 1.750599595566912e-05, + "loss": 0.1318, + "step": 16920 + }, + { + "epoch": 0.79, + "learning_rate": 1.7505212170614332e-05, + "loss": 0.1029, + "step": 16925 + }, + { + "epoch": 0.79, + "learning_rate": 1.7504428385559546e-05, + "loss": 0.1626, + "step": 16930 + }, + { + "epoch": 0.79, + "learning_rate": 1.750364460050476e-05, + "loss": 0.2252, + "step": 16935 + }, + { + "epoch": 0.79, + "learning_rate": 1.7502860815449974e-05, + "loss": 0.2685, + "step": 16940 + }, + { + "epoch": 0.79, + "learning_rate": 1.7502077030395184e-05, + "loss": 0.2682, + "step": 16945 + }, + { + "epoch": 0.79, + "learning_rate": 1.75012932453404e-05, + "loss": 0.4503, + "step": 16950 + }, + { + "epoch": 0.79, + "learning_rate": 1.7500509460285612e-05, + "loss": 0.1966, + "step": 16955 + }, + { + "epoch": 0.79, + "learning_rate": 1.7499725675230826e-05, + "loss": 0.0716, + "step": 16960 + }, + { + "epoch": 0.79, + "learning_rate": 1.749894189017604e-05, + "loss": 0.0855, + "step": 16965 + }, + { + "epoch": 0.79, + "learning_rate": 1.7498158105121254e-05, + "loss": 0.1765, + "step": 16970 + }, + { + "epoch": 0.79, + "learning_rate": 1.7497374320066468e-05, + "loss": 0.1045, + "step": 16975 + }, + { + "epoch": 0.79, + "learning_rate": 1.749659053501168e-05, + "loss": 0.1965, + "step": 16980 + }, + { + "epoch": 0.79, + "learning_rate": 1.7495806749956892e-05, + "loss": 0.2889, + "step": 16985 + }, + { + "epoch": 0.79, + "learning_rate": 1.7495022964902106e-05, + "loss": 0.2706, + "step": 16990 + }, + { + "epoch": 0.79, + "learning_rate": 1.749423917984732e-05, + "loss": 0.219, + "step": 16995 + }, + { + "epoch": 0.79, + "learning_rate": 1.7493455394792534e-05, + "loss": 0.7537, + "step": 17000 + }, + { + "epoch": 0.79, + "learning_rate": 1.7492671609737748e-05, + "loss": 0.2183, + "step": 17005 + }, + { + "epoch": 0.79, + "learning_rate": 1.7491887824682962e-05, + "loss": 0.095, + "step": 17010 + }, + { + "epoch": 0.79, + "learning_rate": 1.7491104039628172e-05, + "loss": 0.0708, + "step": 17015 + }, + { + "epoch": 0.79, + "learning_rate": 1.7490320254573386e-05, + "loss": 0.1224, + "step": 17020 + }, + { + "epoch": 0.79, + "learning_rate": 1.74895364695186e-05, + "loss": 0.149, + "step": 17025 + }, + { + "epoch": 0.79, + "learning_rate": 1.7488752684463814e-05, + "loss": 0.2394, + "step": 17030 + }, + { + "epoch": 0.79, + "learning_rate": 1.7487968899409028e-05, + "loss": 0.2442, + "step": 17035 + }, + { + "epoch": 0.8, + "learning_rate": 1.7487185114354242e-05, + "loss": 0.2727, + "step": 17040 + }, + { + "epoch": 0.8, + "learning_rate": 1.7486401329299452e-05, + "loss": 0.3609, + "step": 17045 + }, + { + "epoch": 0.8, + "learning_rate": 1.748561754424467e-05, + "loss": 0.5439, + "step": 17050 + }, + { + "epoch": 0.8, + "learning_rate": 1.748483375918988e-05, + "loss": 0.2195, + "step": 17055 + }, + { + "epoch": 0.8, + "learning_rate": 1.7484049974135094e-05, + "loss": 0.0805, + "step": 17060 + }, + { + "epoch": 0.8, + "learning_rate": 1.7483266189080308e-05, + "loss": 0.1076, + "step": 17065 + }, + { + "epoch": 0.8, + "learning_rate": 1.7482482404025522e-05, + "loss": 0.2019, + "step": 17070 + }, + { + "epoch": 0.8, + "learning_rate": 1.7481698618970736e-05, + "loss": 0.1393, + "step": 17075 + }, + { + "epoch": 0.8, + "learning_rate": 1.7480914833915946e-05, + "loss": 0.1601, + "step": 17080 + }, + { + "epoch": 0.8, + "learning_rate": 1.748013104886116e-05, + "loss": 0.2208, + "step": 17085 + }, + { + "epoch": 0.8, + "learning_rate": 1.7479347263806374e-05, + "loss": 0.2692, + "step": 17090 + }, + { + "epoch": 0.8, + "learning_rate": 1.7478563478751588e-05, + "loss": 0.2507, + "step": 17095 + }, + { + "epoch": 0.8, + "learning_rate": 1.7477779693696802e-05, + "loss": 0.6805, + "step": 17100 + }, + { + "epoch": 0.8, + "learning_rate": 1.7476995908642016e-05, + "loss": 0.2378, + "step": 17105 + }, + { + "epoch": 0.8, + "learning_rate": 1.747621212358723e-05, + "loss": 0.0834, + "step": 17110 + }, + { + "epoch": 0.8, + "learning_rate": 1.7475428338532444e-05, + "loss": 0.0724, + "step": 17115 + }, + { + "epoch": 0.8, + "learning_rate": 1.7474644553477654e-05, + "loss": 0.1847, + "step": 17120 + }, + { + "epoch": 0.8, + "learning_rate": 1.747386076842287e-05, + "loss": 0.1556, + "step": 17125 + }, + { + "epoch": 0.8, + "learning_rate": 1.7473076983368082e-05, + "loss": 0.2036, + "step": 17130 + }, + { + "epoch": 0.8, + "learning_rate": 1.7472293198313296e-05, + "loss": 0.2969, + "step": 17135 + }, + { + "epoch": 0.8, + "learning_rate": 1.747150941325851e-05, + "loss": 0.3382, + "step": 17140 + }, + { + "epoch": 0.8, + "learning_rate": 1.747072562820372e-05, + "loss": 0.3471, + "step": 17145 + }, + { + "epoch": 0.8, + "learning_rate": 1.7469941843148938e-05, + "loss": 0.4779, + "step": 17150 + }, + { + "epoch": 0.8, + "learning_rate": 1.7469158058094148e-05, + "loss": 0.2133, + "step": 17155 + }, + { + "epoch": 0.8, + "learning_rate": 1.7468374273039362e-05, + "loss": 0.0909, + "step": 17160 + }, + { + "epoch": 0.8, + "learning_rate": 1.7467590487984576e-05, + "loss": 0.1217, + "step": 17165 + }, + { + "epoch": 0.8, + "learning_rate": 1.746680670292979e-05, + "loss": 0.1222, + "step": 17170 + }, + { + "epoch": 0.8, + "learning_rate": 1.7466022917875004e-05, + "loss": 0.1105, + "step": 17175 + }, + { + "epoch": 0.8, + "learning_rate": 1.7465239132820218e-05, + "loss": 0.1904, + "step": 17180 + }, + { + "epoch": 0.8, + "learning_rate": 1.7464455347765428e-05, + "loss": 0.2327, + "step": 17185 + }, + { + "epoch": 0.8, + "learning_rate": 1.7463671562710646e-05, + "loss": 0.2373, + "step": 17190 + }, + { + "epoch": 0.8, + "learning_rate": 1.7462887777655856e-05, + "loss": 0.2425, + "step": 17195 + }, + { + "epoch": 0.8, + "learning_rate": 1.746210399260107e-05, + "loss": 0.4149, + "step": 17200 + }, + { + "epoch": 0.8, + "learning_rate": 1.7461320207546284e-05, + "loss": 0.3749, + "step": 17205 + }, + { + "epoch": 0.8, + "learning_rate": 1.7460536422491498e-05, + "loss": 0.076, + "step": 17210 + }, + { + "epoch": 0.8, + "learning_rate": 1.745975263743671e-05, + "loss": 0.0653, + "step": 17215 + }, + { + "epoch": 0.8, + "learning_rate": 1.7458968852381922e-05, + "loss": 0.1085, + "step": 17220 + }, + { + "epoch": 0.8, + "learning_rate": 1.745818506732714e-05, + "loss": 0.1582, + "step": 17225 + }, + { + "epoch": 0.8, + "learning_rate": 1.745740128227235e-05, + "loss": 0.2298, + "step": 17230 + }, + { + "epoch": 0.8, + "learning_rate": 1.7456617497217564e-05, + "loss": 0.1976, + "step": 17235 + }, + { + "epoch": 0.8, + "learning_rate": 1.7455833712162778e-05, + "loss": 0.3482, + "step": 17240 + }, + { + "epoch": 0.8, + "learning_rate": 1.7455049927107992e-05, + "loss": 0.2661, + "step": 17245 + }, + { + "epoch": 0.8, + "learning_rate": 1.7454266142053206e-05, + "loss": 0.6355, + "step": 17250 + }, + { + "epoch": 0.81, + "learning_rate": 1.745348235699842e-05, + "loss": 0.2025, + "step": 17255 + }, + { + "epoch": 0.81, + "learning_rate": 1.745269857194363e-05, + "loss": 0.0766, + "step": 17260 + }, + { + "epoch": 0.81, + "learning_rate": 1.7451914786888847e-05, + "loss": 0.1712, + "step": 17265 + }, + { + "epoch": 0.81, + "learning_rate": 1.7451131001834058e-05, + "loss": 0.1655, + "step": 17270 + }, + { + "epoch": 0.81, + "learning_rate": 1.7450347216779272e-05, + "loss": 0.1001, + "step": 17275 + }, + { + "epoch": 0.81, + "learning_rate": 1.7449563431724486e-05, + "loss": 0.1872, + "step": 17280 + }, + { + "epoch": 0.81, + "learning_rate": 1.74487796466697e-05, + "loss": 0.2953, + "step": 17285 + }, + { + "epoch": 0.81, + "learning_rate": 1.7447995861614914e-05, + "loss": 0.2079, + "step": 17290 + }, + { + "epoch": 0.81, + "learning_rate": 1.7447212076560124e-05, + "loss": 0.3059, + "step": 17295 + }, + { + "epoch": 0.81, + "learning_rate": 1.7446428291505338e-05, + "loss": 0.5211, + "step": 17300 + }, + { + "epoch": 0.81, + "learning_rate": 1.7445644506450552e-05, + "loss": 0.2568, + "step": 17305 + }, + { + "epoch": 0.81, + "learning_rate": 1.7444860721395766e-05, + "loss": 0.0556, + "step": 17310 + }, + { + "epoch": 0.81, + "learning_rate": 1.744407693634098e-05, + "loss": 0.1149, + "step": 17315 + }, + { + "epoch": 0.81, + "learning_rate": 1.7443293151286194e-05, + "loss": 0.1159, + "step": 17320 + }, + { + "epoch": 0.81, + "learning_rate": 1.7442509366231407e-05, + "loss": 0.1228, + "step": 17325 + }, + { + "epoch": 0.81, + "learning_rate": 1.744172558117662e-05, + "loss": 0.1988, + "step": 17330 + }, + { + "epoch": 0.81, + "learning_rate": 1.7440941796121832e-05, + "loss": 0.129, + "step": 17335 + }, + { + "epoch": 0.81, + "learning_rate": 1.7440158011067046e-05, + "loss": 0.3034, + "step": 17340 + }, + { + "epoch": 0.81, + "learning_rate": 1.743937422601226e-05, + "loss": 0.3292, + "step": 17345 + }, + { + "epoch": 0.81, + "learning_rate": 1.7438590440957474e-05, + "loss": 0.5812, + "step": 17350 + }, + { + "epoch": 0.81, + "learning_rate": 1.7437806655902688e-05, + "loss": 0.2535, + "step": 17355 + }, + { + "epoch": 0.81, + "learning_rate": 1.7437022870847898e-05, + "loss": 0.0812, + "step": 17360 + }, + { + "epoch": 0.81, + "learning_rate": 1.7436239085793115e-05, + "loss": 0.0573, + "step": 17365 + }, + { + "epoch": 0.81, + "learning_rate": 1.7435455300738326e-05, + "loss": 0.1486, + "step": 17370 + }, + { + "epoch": 0.81, + "learning_rate": 1.743467151568354e-05, + "loss": 0.1502, + "step": 17375 + }, + { + "epoch": 0.81, + "learning_rate": 1.7433887730628754e-05, + "loss": 0.1391, + "step": 17380 + }, + { + "epoch": 0.81, + "learning_rate": 1.7433103945573968e-05, + "loss": 0.2151, + "step": 17385 + }, + { + "epoch": 0.81, + "learning_rate": 1.743232016051918e-05, + "loss": 0.2215, + "step": 17390 + }, + { + "epoch": 0.81, + "learning_rate": 1.7431536375464395e-05, + "loss": 0.3069, + "step": 17395 + }, + { + "epoch": 0.81, + "learning_rate": 1.7430752590409606e-05, + "loss": 0.5807, + "step": 17400 + }, + { + "epoch": 0.81, + "learning_rate": 1.742996880535482e-05, + "loss": 0.2226, + "step": 17405 + }, + { + "epoch": 0.81, + "learning_rate": 1.7429185020300034e-05, + "loss": 0.0589, + "step": 17410 + }, + { + "epoch": 0.81, + "learning_rate": 1.7428401235245248e-05, + "loss": 0.0894, + "step": 17415 + }, + { + "epoch": 0.81, + "learning_rate": 1.742761745019046e-05, + "loss": 0.0559, + "step": 17420 + }, + { + "epoch": 0.81, + "learning_rate": 1.7426833665135675e-05, + "loss": 0.1474, + "step": 17425 + }, + { + "epoch": 0.81, + "learning_rate": 1.742604988008089e-05, + "loss": 0.1283, + "step": 17430 + }, + { + "epoch": 0.81, + "learning_rate": 1.74252660950261e-05, + "loss": 0.2015, + "step": 17435 + }, + { + "epoch": 0.81, + "learning_rate": 1.7424482309971317e-05, + "loss": 0.2343, + "step": 17440 + }, + { + "epoch": 0.81, + "learning_rate": 1.7423698524916528e-05, + "loss": 0.2946, + "step": 17445 + }, + { + "epoch": 0.81, + "learning_rate": 1.742291473986174e-05, + "loss": 0.4734, + "step": 17450 + }, + { + "epoch": 0.81, + "learning_rate": 1.7422130954806955e-05, + "loss": 0.1872, + "step": 17455 + }, + { + "epoch": 0.81, + "learning_rate": 1.742134716975217e-05, + "loss": 0.0883, + "step": 17460 + }, + { + "epoch": 0.81, + "learning_rate": 1.7420563384697383e-05, + "loss": 0.1069, + "step": 17465 + }, + { + "epoch": 0.82, + "learning_rate": 1.7419779599642594e-05, + "loss": 0.1773, + "step": 17470 + }, + { + "epoch": 0.82, + "learning_rate": 1.7418995814587808e-05, + "loss": 0.1791, + "step": 17475 + }, + { + "epoch": 0.82, + "learning_rate": 1.741821202953302e-05, + "loss": 0.1533, + "step": 17480 + }, + { + "epoch": 0.82, + "learning_rate": 1.7417428244478235e-05, + "loss": 0.176, + "step": 17485 + }, + { + "epoch": 0.82, + "learning_rate": 1.741664445942345e-05, + "loss": 0.3268, + "step": 17490 + }, + { + "epoch": 0.82, + "learning_rate": 1.7415860674368663e-05, + "loss": 0.2135, + "step": 17495 + }, + { + "epoch": 0.82, + "learning_rate": 1.7415076889313877e-05, + "loss": 0.5947, + "step": 17500 + }, + { + "epoch": 0.82, + "learning_rate": 1.741429310425909e-05, + "loss": 0.2006, + "step": 17505 + }, + { + "epoch": 0.82, + "learning_rate": 1.74135093192043e-05, + "loss": 0.1167, + "step": 17510 + }, + { + "epoch": 0.82, + "learning_rate": 1.7412725534149516e-05, + "loss": 0.1088, + "step": 17515 + }, + { + "epoch": 0.82, + "learning_rate": 1.741194174909473e-05, + "loss": 0.0985, + "step": 17520 + }, + { + "epoch": 0.82, + "learning_rate": 1.7411157964039943e-05, + "loss": 0.1412, + "step": 17525 + }, + { + "epoch": 0.82, + "learning_rate": 1.7410374178985157e-05, + "loss": 0.2124, + "step": 17530 + }, + { + "epoch": 0.82, + "learning_rate": 1.7409590393930368e-05, + "loss": 0.1429, + "step": 17535 + }, + { + "epoch": 0.82, + "learning_rate": 1.7408806608875585e-05, + "loss": 0.2762, + "step": 17540 + }, + { + "epoch": 0.82, + "learning_rate": 1.7408022823820796e-05, + "loss": 0.336, + "step": 17545 + }, + { + "epoch": 0.82, + "learning_rate": 1.740723903876601e-05, + "loss": 0.6298, + "step": 17550 + }, + { + "epoch": 0.82, + "learning_rate": 1.7406455253711223e-05, + "loss": 0.2686, + "step": 17555 + }, + { + "epoch": 0.82, + "learning_rate": 1.7405671468656437e-05, + "loss": 0.1043, + "step": 17560 + }, + { + "epoch": 0.82, + "learning_rate": 1.740488768360165e-05, + "loss": 0.0676, + "step": 17565 + }, + { + "epoch": 0.82, + "learning_rate": 1.7404103898546865e-05, + "loss": 0.1212, + "step": 17570 + }, + { + "epoch": 0.82, + "learning_rate": 1.7403320113492076e-05, + "loss": 0.1228, + "step": 17575 + }, + { + "epoch": 0.82, + "learning_rate": 1.7402536328437293e-05, + "loss": 0.1101, + "step": 17580 + }, + { + "epoch": 0.82, + "learning_rate": 1.7401752543382503e-05, + "loss": 0.1469, + "step": 17585 + }, + { + "epoch": 0.82, + "learning_rate": 1.7400968758327717e-05, + "loss": 0.1787, + "step": 17590 + }, + { + "epoch": 0.82, + "learning_rate": 1.740018497327293e-05, + "loss": 0.2206, + "step": 17595 + }, + { + "epoch": 0.82, + "learning_rate": 1.7399401188218145e-05, + "loss": 0.4916, + "step": 17600 + }, + { + "epoch": 0.82, + "learning_rate": 1.739861740316336e-05, + "loss": 0.208, + "step": 17605 + }, + { + "epoch": 0.82, + "learning_rate": 1.739783361810857e-05, + "loss": 0.0906, + "step": 17610 + }, + { + "epoch": 0.82, + "learning_rate": 1.7397049833053783e-05, + "loss": 0.1041, + "step": 17615 + }, + { + "epoch": 0.82, + "learning_rate": 1.7396266047998997e-05, + "loss": 0.1332, + "step": 17620 + }, + { + "epoch": 0.82, + "learning_rate": 1.739548226294421e-05, + "loss": 0.1337, + "step": 17625 + }, + { + "epoch": 0.82, + "learning_rate": 1.7394698477889425e-05, + "loss": 0.1325, + "step": 17630 + }, + { + "epoch": 0.82, + "learning_rate": 1.739391469283464e-05, + "loss": 0.147, + "step": 17635 + }, + { + "epoch": 0.82, + "learning_rate": 1.7393130907779853e-05, + "loss": 0.2622, + "step": 17640 + }, + { + "epoch": 0.82, + "learning_rate": 1.7392347122725067e-05, + "loss": 0.4564, + "step": 17645 + }, + { + "epoch": 0.82, + "learning_rate": 1.7391563337670277e-05, + "loss": 0.5175, + "step": 17650 + }, + { + "epoch": 0.82, + "learning_rate": 1.7390779552615495e-05, + "loss": 0.1756, + "step": 17655 + }, + { + "epoch": 0.82, + "learning_rate": 1.7389995767560705e-05, + "loss": 0.1188, + "step": 17660 + }, + { + "epoch": 0.82, + "learning_rate": 1.738921198250592e-05, + "loss": 0.0799, + "step": 17665 + }, + { + "epoch": 0.82, + "learning_rate": 1.7388428197451133e-05, + "loss": 0.1077, + "step": 17670 + }, + { + "epoch": 0.82, + "learning_rate": 1.7387644412396344e-05, + "loss": 0.1528, + "step": 17675 + }, + { + "epoch": 0.82, + "learning_rate": 1.738686062734156e-05, + "loss": 0.1655, + "step": 17680 + }, + { + "epoch": 0.83, + "learning_rate": 1.738607684228677e-05, + "loss": 0.1754, + "step": 17685 + }, + { + "epoch": 0.83, + "learning_rate": 1.7385293057231985e-05, + "loss": 0.2688, + "step": 17690 + }, + { + "epoch": 0.83, + "learning_rate": 1.73845092721772e-05, + "loss": 0.2753, + "step": 17695 + }, + { + "epoch": 0.83, + "learning_rate": 1.7383725487122413e-05, + "loss": 0.3675, + "step": 17700 + }, + { + "epoch": 0.83, + "learning_rate": 1.7382941702067627e-05, + "loss": 0.245, + "step": 17705 + }, + { + "epoch": 0.83, + "learning_rate": 1.738215791701284e-05, + "loss": 0.1142, + "step": 17710 + }, + { + "epoch": 0.83, + "learning_rate": 1.738137413195805e-05, + "loss": 0.0702, + "step": 17715 + }, + { + "epoch": 0.83, + "learning_rate": 1.738059034690327e-05, + "loss": 0.1529, + "step": 17720 + }, + { + "epoch": 0.83, + "learning_rate": 1.737980656184848e-05, + "loss": 0.1147, + "step": 17725 + }, + { + "epoch": 0.83, + "learning_rate": 1.7379022776793693e-05, + "loss": 0.1938, + "step": 17730 + }, + { + "epoch": 0.83, + "learning_rate": 1.7378238991738907e-05, + "loss": 0.2224, + "step": 17735 + }, + { + "epoch": 0.83, + "learning_rate": 1.737745520668412e-05, + "loss": 0.1982, + "step": 17740 + }, + { + "epoch": 0.83, + "learning_rate": 1.7376671421629335e-05, + "loss": 0.3572, + "step": 17745 + }, + { + "epoch": 0.83, + "learning_rate": 1.7375887636574545e-05, + "loss": 0.6613, + "step": 17750 + }, + { + "epoch": 0.83, + "learning_rate": 1.7375103851519763e-05, + "loss": 0.2306, + "step": 17755 + }, + { + "epoch": 0.83, + "learning_rate": 1.7374320066464973e-05, + "loss": 0.0619, + "step": 17760 + }, + { + "epoch": 0.83, + "learning_rate": 1.7373536281410187e-05, + "loss": 0.1426, + "step": 17765 + }, + { + "epoch": 0.83, + "learning_rate": 1.73727524963554e-05, + "loss": 0.1614, + "step": 17770 + }, + { + "epoch": 0.83, + "learning_rate": 1.7371968711300615e-05, + "loss": 0.137, + "step": 17775 + }, + { + "epoch": 0.83, + "learning_rate": 1.737118492624583e-05, + "loss": 0.2, + "step": 17780 + }, + { + "epoch": 0.83, + "learning_rate": 1.7370401141191043e-05, + "loss": 0.2528, + "step": 17785 + }, + { + "epoch": 0.83, + "learning_rate": 1.7369617356136253e-05, + "loss": 0.3046, + "step": 17790 + }, + { + "epoch": 0.83, + "learning_rate": 1.7368833571081467e-05, + "loss": 0.3107, + "step": 17795 + }, + { + "epoch": 0.83, + "learning_rate": 1.736804978602668e-05, + "loss": 0.4765, + "step": 17800 + }, + { + "epoch": 0.83, + "learning_rate": 1.7367266000971895e-05, + "loss": 0.2553, + "step": 17805 + }, + { + "epoch": 0.83, + "learning_rate": 1.736648221591711e-05, + "loss": 0.1027, + "step": 17810 + }, + { + "epoch": 0.83, + "learning_rate": 1.7365698430862323e-05, + "loss": 0.1448, + "step": 17815 + }, + { + "epoch": 0.83, + "learning_rate": 1.7364914645807537e-05, + "loss": 0.1179, + "step": 17820 + }, + { + "epoch": 0.83, + "learning_rate": 1.7364130860752747e-05, + "loss": 0.206, + "step": 17825 + }, + { + "epoch": 0.83, + "learning_rate": 1.736334707569796e-05, + "loss": 0.1499, + "step": 17830 + }, + { + "epoch": 0.83, + "learning_rate": 1.7362563290643175e-05, + "loss": 0.2357, + "step": 17835 + }, + { + "epoch": 0.83, + "learning_rate": 1.736177950558839e-05, + "loss": 0.1637, + "step": 17840 + }, + { + "epoch": 0.83, + "learning_rate": 1.7360995720533603e-05, + "loss": 0.2992, + "step": 17845 + }, + { + "epoch": 0.83, + "learning_rate": 1.7360211935478817e-05, + "loss": 0.3762, + "step": 17850 + }, + { + "epoch": 0.83, + "learning_rate": 1.735942815042403e-05, + "loss": 0.225, + "step": 17855 + }, + { + "epoch": 0.83, + "learning_rate": 1.735864436536924e-05, + "loss": 0.0935, + "step": 17860 + }, + { + "epoch": 0.83, + "learning_rate": 1.7357860580314455e-05, + "loss": 0.0893, + "step": 17865 + }, + { + "epoch": 0.83, + "learning_rate": 1.735707679525967e-05, + "loss": 0.1329, + "step": 17870 + }, + { + "epoch": 0.83, + "learning_rate": 1.7356293010204883e-05, + "loss": 0.1481, + "step": 17875 + }, + { + "epoch": 0.83, + "learning_rate": 1.7355509225150097e-05, + "loss": 0.1654, + "step": 17880 + }, + { + "epoch": 0.83, + "learning_rate": 1.735472544009531e-05, + "loss": 0.1354, + "step": 17885 + }, + { + "epoch": 0.83, + "learning_rate": 1.735394165504052e-05, + "loss": 0.3464, + "step": 17890 + }, + { + "epoch": 0.84, + "learning_rate": 1.735315786998574e-05, + "loss": 0.2993, + "step": 17895 + }, + { + "epoch": 0.84, + "learning_rate": 1.735253084194191e-05, + "loss": 0.6743, + "step": 17900 + }, + { + "epoch": 0.84, + "learning_rate": 1.735174705688712e-05, + "loss": 0.1873, + "step": 17905 + }, + { + "epoch": 0.84, + "learning_rate": 1.7350963271832337e-05, + "loss": 0.144, + "step": 17910 + }, + { + "epoch": 0.84, + "learning_rate": 1.7350179486777547e-05, + "loss": 0.1459, + "step": 17915 + }, + { + "epoch": 0.84, + "learning_rate": 1.734939570172276e-05, + "loss": 0.0939, + "step": 17920 + }, + { + "epoch": 0.84, + "learning_rate": 1.7348611916667975e-05, + "loss": 0.1254, + "step": 17925 + }, + { + "epoch": 0.84, + "learning_rate": 1.7347828131613186e-05, + "loss": 0.1482, + "step": 17930 + }, + { + "epoch": 0.84, + "learning_rate": 1.7347044346558403e-05, + "loss": 0.1526, + "step": 17935 + }, + { + "epoch": 0.84, + "learning_rate": 1.7346260561503613e-05, + "loss": 0.2159, + "step": 17940 + }, + { + "epoch": 0.84, + "learning_rate": 1.7345476776448827e-05, + "loss": 0.2293, + "step": 17945 + }, + { + "epoch": 0.84, + "learning_rate": 1.734469299139404e-05, + "loss": 0.3899, + "step": 17950 + }, + { + "epoch": 0.84, + "learning_rate": 1.7343909206339255e-05, + "loss": 0.2237, + "step": 17955 + }, + { + "epoch": 0.84, + "learning_rate": 1.734312542128447e-05, + "loss": 0.0663, + "step": 17960 + }, + { + "epoch": 0.84, + "learning_rate": 1.7342341636229683e-05, + "loss": 0.0931, + "step": 17965 + }, + { + "epoch": 0.84, + "learning_rate": 1.7341557851174893e-05, + "loss": 0.0785, + "step": 17970 + }, + { + "epoch": 0.84, + "learning_rate": 1.734077406612011e-05, + "loss": 0.1971, + "step": 17975 + }, + { + "epoch": 0.84, + "learning_rate": 1.733999028106532e-05, + "loss": 0.2693, + "step": 17980 + }, + { + "epoch": 0.84, + "learning_rate": 1.7339206496010535e-05, + "loss": 0.2542, + "step": 17985 + }, + { + "epoch": 0.84, + "learning_rate": 1.733842271095575e-05, + "loss": 0.1786, + "step": 17990 + }, + { + "epoch": 0.84, + "learning_rate": 1.7337638925900963e-05, + "loss": 0.3282, + "step": 17995 + }, + { + "epoch": 0.84, + "learning_rate": 1.7336855140846177e-05, + "loss": 0.4567, + "step": 18000 + }, + { + "epoch": 0.84, + "learning_rate": 1.7336071355791387e-05, + "loss": 0.3093, + "step": 18005 + }, + { + "epoch": 0.84, + "learning_rate": 1.7335287570736605e-05, + "loss": 0.0859, + "step": 18010 + }, + { + "epoch": 0.84, + "learning_rate": 1.7334503785681815e-05, + "loss": 0.1042, + "step": 18015 + }, + { + "epoch": 0.84, + "learning_rate": 1.733372000062703e-05, + "loss": 0.077, + "step": 18020 + }, + { + "epoch": 0.84, + "learning_rate": 1.7332936215572243e-05, + "loss": 0.148, + "step": 18025 + }, + { + "epoch": 0.84, + "learning_rate": 1.7332152430517457e-05, + "loss": 0.1841, + "step": 18030 + }, + { + "epoch": 0.84, + "learning_rate": 1.733136864546267e-05, + "loss": 0.0989, + "step": 18035 + }, + { + "epoch": 0.84, + "learning_rate": 1.7330584860407885e-05, + "loss": 0.1978, + "step": 18040 + }, + { + "epoch": 0.84, + "learning_rate": 1.7329801075353095e-05, + "loss": 0.3673, + "step": 18045 + }, + { + "epoch": 0.84, + "learning_rate": 1.732901729029831e-05, + "loss": 0.4043, + "step": 18050 + }, + { + "epoch": 0.84, + "learning_rate": 1.7328233505243523e-05, + "loss": 0.2048, + "step": 18055 + }, + { + "epoch": 0.84, + "learning_rate": 1.7327449720188737e-05, + "loss": 0.0706, + "step": 18060 + }, + { + "epoch": 0.84, + "learning_rate": 1.732666593513395e-05, + "loss": 0.1344, + "step": 18065 + }, + { + "epoch": 0.84, + "learning_rate": 1.732588215007916e-05, + "loss": 0.1345, + "step": 18070 + }, + { + "epoch": 0.84, + "learning_rate": 1.732509836502438e-05, + "loss": 0.1103, + "step": 18075 + }, + { + "epoch": 0.84, + "learning_rate": 1.732431457996959e-05, + "loss": 0.1883, + "step": 18080 + }, + { + "epoch": 0.84, + "learning_rate": 1.7323530794914803e-05, + "loss": 0.1763, + "step": 18085 + }, + { + "epoch": 0.84, + "learning_rate": 1.7322747009860017e-05, + "loss": 0.2073, + "step": 18090 + }, + { + "epoch": 0.84, + "learning_rate": 1.732196322480523e-05, + "loss": 0.3624, + "step": 18095 + }, + { + "epoch": 0.84, + "learning_rate": 1.7321179439750445e-05, + "loss": 0.4715, + "step": 18100 + }, + { + "epoch": 0.84, + "learning_rate": 1.732039565469566e-05, + "loss": 0.1813, + "step": 18105 + }, + { + "epoch": 0.85, + "learning_rate": 1.7319611869640873e-05, + "loss": 0.0933, + "step": 18110 + }, + { + "epoch": 0.85, + "learning_rate": 1.7318828084586083e-05, + "loss": 0.1297, + "step": 18115 + }, + { + "epoch": 0.85, + "learning_rate": 1.7318044299531297e-05, + "loss": 0.1176, + "step": 18120 + }, + { + "epoch": 0.85, + "learning_rate": 1.731726051447651e-05, + "loss": 0.1331, + "step": 18125 + }, + { + "epoch": 0.85, + "learning_rate": 1.7316476729421725e-05, + "loss": 0.166, + "step": 18130 + }, + { + "epoch": 0.85, + "learning_rate": 1.731569294436694e-05, + "loss": 0.2521, + "step": 18135 + }, + { + "epoch": 0.85, + "learning_rate": 1.7314909159312153e-05, + "loss": 0.2726, + "step": 18140 + }, + { + "epoch": 0.85, + "learning_rate": 1.7314125374257363e-05, + "loss": 0.3365, + "step": 18145 + }, + { + "epoch": 0.85, + "learning_rate": 1.731334158920258e-05, + "loss": 0.6564, + "step": 18150 + }, + { + "epoch": 0.85, + "learning_rate": 1.731255780414779e-05, + "loss": 0.2698, + "step": 18155 + }, + { + "epoch": 0.85, + "learning_rate": 1.7311774019093005e-05, + "loss": 0.0988, + "step": 18160 + }, + { + "epoch": 0.85, + "learning_rate": 1.731099023403822e-05, + "loss": 0.0748, + "step": 18165 + }, + { + "epoch": 0.85, + "learning_rate": 1.7310206448983433e-05, + "loss": 0.1137, + "step": 18170 + }, + { + "epoch": 0.85, + "learning_rate": 1.7309422663928647e-05, + "loss": 0.1392, + "step": 18175 + }, + { + "epoch": 0.85, + "learning_rate": 1.7308638878873857e-05, + "loss": 0.1052, + "step": 18180 + }, + { + "epoch": 0.85, + "learning_rate": 1.730785509381907e-05, + "loss": 0.1821, + "step": 18185 + }, + { + "epoch": 0.85, + "learning_rate": 1.7307071308764285e-05, + "loss": 0.1918, + "step": 18190 + }, + { + "epoch": 0.85, + "learning_rate": 1.73062875237095e-05, + "loss": 0.3034, + "step": 18195 + }, + { + "epoch": 0.85, + "learning_rate": 1.7305503738654713e-05, + "loss": 0.5586, + "step": 18200 + }, + { + "epoch": 0.85, + "learning_rate": 1.7304719953599927e-05, + "loss": 0.3347, + "step": 18205 + }, + { + "epoch": 0.85, + "learning_rate": 1.730393616854514e-05, + "loss": 0.1197, + "step": 18210 + }, + { + "epoch": 0.85, + "learning_rate": 1.7303152383490355e-05, + "loss": 0.0808, + "step": 18215 + }, + { + "epoch": 0.85, + "learning_rate": 1.7302525355446525e-05, + "loss": 0.1548, + "step": 18220 + }, + { + "epoch": 0.85, + "learning_rate": 1.7301741570391735e-05, + "loss": 0.1336, + "step": 18225 + }, + { + "epoch": 0.85, + "learning_rate": 1.7300957785336953e-05, + "loss": 0.1881, + "step": 18230 + }, + { + "epoch": 0.85, + "learning_rate": 1.7300174000282163e-05, + "loss": 0.1734, + "step": 18235 + }, + { + "epoch": 0.85, + "learning_rate": 1.7299390215227377e-05, + "loss": 0.2273, + "step": 18240 + }, + { + "epoch": 0.85, + "learning_rate": 1.729860643017259e-05, + "loss": 0.3434, + "step": 18245 + }, + { + "epoch": 0.85, + "learning_rate": 1.72978226451178e-05, + "loss": 0.4018, + "step": 18250 + }, + { + "epoch": 0.85, + "learning_rate": 1.729703886006302e-05, + "loss": 0.2318, + "step": 18255 + }, + { + "epoch": 0.85, + "learning_rate": 1.729625507500823e-05, + "loss": 0.0581, + "step": 18260 + }, + { + "epoch": 0.85, + "learning_rate": 1.7295471289953443e-05, + "loss": 0.1156, + "step": 18265 + }, + { + "epoch": 0.85, + "learning_rate": 1.7294687504898657e-05, + "loss": 0.1539, + "step": 18270 + }, + { + "epoch": 0.85, + "learning_rate": 1.729390371984387e-05, + "loss": 0.1465, + "step": 18275 + }, + { + "epoch": 0.85, + "learning_rate": 1.7293119934789085e-05, + "loss": 0.2405, + "step": 18280 + }, + { + "epoch": 0.85, + "learning_rate": 1.72923361497343e-05, + "loss": 0.2068, + "step": 18285 + }, + { + "epoch": 0.85, + "learning_rate": 1.7291552364679513e-05, + "loss": 0.3418, + "step": 18290 + }, + { + "epoch": 0.85, + "learning_rate": 1.7290768579624727e-05, + "loss": 0.3344, + "step": 18295 + }, + { + "epoch": 0.85, + "learning_rate": 1.7289984794569937e-05, + "loss": 0.324, + "step": 18300 + }, + { + "epoch": 0.85, + "learning_rate": 1.7289201009515155e-05, + "loss": 0.2146, + "step": 18305 + }, + { + "epoch": 0.85, + "learning_rate": 1.7288417224460365e-05, + "loss": 0.12, + "step": 18310 + }, + { + "epoch": 0.85, + "learning_rate": 1.728763343940558e-05, + "loss": 0.1088, + "step": 18315 + }, + { + "epoch": 0.85, + "learning_rate": 1.7286849654350793e-05, + "loss": 0.1019, + "step": 18320 + }, + { + "epoch": 0.86, + "learning_rate": 1.7286065869296003e-05, + "loss": 0.1868, + "step": 18325 + }, + { + "epoch": 0.86, + "learning_rate": 1.728528208424122e-05, + "loss": 0.2649, + "step": 18330 + }, + { + "epoch": 0.86, + "learning_rate": 1.728449829918643e-05, + "loss": 0.2723, + "step": 18335 + }, + { + "epoch": 0.86, + "learning_rate": 1.7283714514131645e-05, + "loss": 0.3037, + "step": 18340 + }, + { + "epoch": 0.86, + "learning_rate": 1.728293072907686e-05, + "loss": 0.32, + "step": 18345 + }, + { + "epoch": 0.86, + "learning_rate": 1.7282146944022073e-05, + "loss": 0.6633, + "step": 18350 + }, + { + "epoch": 0.86, + "learning_rate": 1.7281363158967287e-05, + "loss": 0.3423, + "step": 18355 + }, + { + "epoch": 0.86, + "learning_rate": 1.72805793739125e-05, + "loss": 0.0901, + "step": 18360 + }, + { + "epoch": 0.86, + "learning_rate": 1.727979558885771e-05, + "loss": 0.0976, + "step": 18365 + }, + { + "epoch": 0.86, + "learning_rate": 1.727901180380293e-05, + "loss": 0.219, + "step": 18370 + }, + { + "epoch": 0.86, + "learning_rate": 1.727822801874814e-05, + "loss": 0.1546, + "step": 18375 + }, + { + "epoch": 0.86, + "learning_rate": 1.7277444233693353e-05, + "loss": 0.183, + "step": 18380 + }, + { + "epoch": 0.86, + "learning_rate": 1.7276660448638567e-05, + "loss": 0.1376, + "step": 18385 + }, + { + "epoch": 0.86, + "learning_rate": 1.727587666358378e-05, + "loss": 0.2397, + "step": 18390 + }, + { + "epoch": 0.86, + "learning_rate": 1.7275092878528995e-05, + "loss": 0.3504, + "step": 18395 + }, + { + "epoch": 0.86, + "learning_rate": 1.7274309093474205e-05, + "loss": 0.4606, + "step": 18400 + }, + { + "epoch": 0.86, + "learning_rate": 1.7273525308419423e-05, + "loss": 0.1816, + "step": 18405 + }, + { + "epoch": 0.86, + "learning_rate": 1.7272741523364633e-05, + "loss": 0.0837, + "step": 18410 + }, + { + "epoch": 0.86, + "learning_rate": 1.7271957738309847e-05, + "loss": 0.1249, + "step": 18415 + }, + { + "epoch": 0.86, + "learning_rate": 1.727117395325506e-05, + "loss": 0.1805, + "step": 18420 + }, + { + "epoch": 0.86, + "learning_rate": 1.7270390168200275e-05, + "loss": 0.246, + "step": 18425 + }, + { + "epoch": 0.86, + "learning_rate": 1.726960638314549e-05, + "loss": 0.1257, + "step": 18430 + }, + { + "epoch": 0.86, + "learning_rate": 1.7268822598090703e-05, + "loss": 0.1773, + "step": 18435 + }, + { + "epoch": 0.86, + "learning_rate": 1.7268038813035913e-05, + "loss": 0.1636, + "step": 18440 + }, + { + "epoch": 0.86, + "learning_rate": 1.7267255027981127e-05, + "loss": 0.2327, + "step": 18445 + }, + { + "epoch": 0.86, + "learning_rate": 1.726647124292634e-05, + "loss": 0.8099, + "step": 18450 + }, + { + "epoch": 0.86, + "learning_rate": 1.7265687457871555e-05, + "loss": 0.13, + "step": 18455 + }, + { + "epoch": 0.86, + "learning_rate": 1.726490367281677e-05, + "loss": 0.1281, + "step": 18460 + }, + { + "epoch": 0.86, + "learning_rate": 1.726411988776198e-05, + "loss": 0.1082, + "step": 18465 + }, + { + "epoch": 0.86, + "learning_rate": 1.7263336102707197e-05, + "loss": 0.0937, + "step": 18470 + }, + { + "epoch": 0.86, + "learning_rate": 1.7262552317652407e-05, + "loss": 0.1067, + "step": 18475 + }, + { + "epoch": 0.86, + "learning_rate": 1.726176853259762e-05, + "loss": 0.1886, + "step": 18480 + }, + { + "epoch": 0.86, + "learning_rate": 1.7260984747542835e-05, + "loss": 0.1727, + "step": 18485 + }, + { + "epoch": 0.86, + "learning_rate": 1.726020096248805e-05, + "loss": 0.2541, + "step": 18490 + }, + { + "epoch": 0.86, + "learning_rate": 1.7259417177433263e-05, + "loss": 0.3449, + "step": 18495 + }, + { + "epoch": 0.86, + "learning_rate": 1.7258633392378477e-05, + "loss": 0.4496, + "step": 18500 + }, + { + "epoch": 0.86, + "learning_rate": 1.725784960732369e-05, + "loss": 0.2353, + "step": 18505 + }, + { + "epoch": 0.86, + "learning_rate": 1.72570658222689e-05, + "loss": 0.0484, + "step": 18510 + }, + { + "epoch": 0.86, + "learning_rate": 1.7256282037214115e-05, + "loss": 0.0597, + "step": 18515 + }, + { + "epoch": 0.86, + "learning_rate": 1.725549825215933e-05, + "loss": 0.1713, + "step": 18520 + }, + { + "epoch": 0.86, + "learning_rate": 1.7254714467104543e-05, + "loss": 0.1206, + "step": 18525 + }, + { + "epoch": 0.86, + "learning_rate": 1.7253930682049757e-05, + "loss": 0.2763, + "step": 18530 + }, + { + "epoch": 0.86, + "learning_rate": 1.725314689699497e-05, + "loss": 0.1911, + "step": 18535 + }, + { + "epoch": 0.87, + "learning_rate": 1.725236311194018e-05, + "loss": 0.2108, + "step": 18540 + }, + { + "epoch": 0.87, + "learning_rate": 1.72515793268854e-05, + "loss": 0.3533, + "step": 18545 + }, + { + "epoch": 0.87, + "learning_rate": 1.725079554183061e-05, + "loss": 0.3996, + "step": 18550 + }, + { + "epoch": 0.87, + "learning_rate": 1.7250011756775823e-05, + "loss": 0.1907, + "step": 18555 + }, + { + "epoch": 0.87, + "learning_rate": 1.7249227971721037e-05, + "loss": 0.076, + "step": 18560 + }, + { + "epoch": 0.87, + "learning_rate": 1.724844418666625e-05, + "loss": 0.165, + "step": 18565 + }, + { + "epoch": 0.87, + "learning_rate": 1.7247660401611464e-05, + "loss": 0.1086, + "step": 18570 + }, + { + "epoch": 0.87, + "learning_rate": 1.7246876616556675e-05, + "loss": 0.1199, + "step": 18575 + }, + { + "epoch": 0.87, + "learning_rate": 1.724609283150189e-05, + "loss": 0.245, + "step": 18580 + }, + { + "epoch": 0.87, + "learning_rate": 1.7245309046447103e-05, + "loss": 0.1674, + "step": 18585 + }, + { + "epoch": 0.87, + "learning_rate": 1.7244525261392317e-05, + "loss": 0.2862, + "step": 18590 + }, + { + "epoch": 0.87, + "learning_rate": 1.724374147633753e-05, + "loss": 0.4564, + "step": 18595 + }, + { + "epoch": 0.87, + "learning_rate": 1.7242957691282745e-05, + "loss": 0.4542, + "step": 18600 + }, + { + "epoch": 0.87, + "learning_rate": 1.724217390622796e-05, + "loss": 0.2245, + "step": 18605 + }, + { + "epoch": 0.87, + "learning_rate": 1.7241390121173172e-05, + "loss": 0.1992, + "step": 18610 + }, + { + "epoch": 0.87, + "learning_rate": 1.7240606336118383e-05, + "loss": 0.0679, + "step": 18615 + }, + { + "epoch": 0.87, + "learning_rate": 1.72398225510636e-05, + "loss": 0.0884, + "step": 18620 + }, + { + "epoch": 0.87, + "learning_rate": 1.723903876600881e-05, + "loss": 0.1305, + "step": 18625 + }, + { + "epoch": 0.87, + "learning_rate": 1.7238254980954025e-05, + "loss": 0.2016, + "step": 18630 + }, + { + "epoch": 0.87, + "learning_rate": 1.723747119589924e-05, + "loss": 0.196, + "step": 18635 + }, + { + "epoch": 0.87, + "learning_rate": 1.723668741084445e-05, + "loss": 0.1693, + "step": 18640 + }, + { + "epoch": 0.87, + "learning_rate": 1.7235903625789666e-05, + "loss": 0.3803, + "step": 18645 + }, + { + "epoch": 0.87, + "learning_rate": 1.7235119840734877e-05, + "loss": 0.4806, + "step": 18650 + }, + { + "epoch": 0.87, + "learning_rate": 1.723433605568009e-05, + "loss": 0.2328, + "step": 18655 + }, + { + "epoch": 0.87, + "learning_rate": 1.7233552270625305e-05, + "loss": 0.068, + "step": 18660 + }, + { + "epoch": 0.87, + "learning_rate": 1.723276848557052e-05, + "loss": 0.0851, + "step": 18665 + }, + { + "epoch": 0.87, + "learning_rate": 1.7231984700515732e-05, + "loss": 0.1106, + "step": 18670 + }, + { + "epoch": 0.87, + "learning_rate": 1.7231200915460946e-05, + "loss": 0.1496, + "step": 18675 + }, + { + "epoch": 0.87, + "learning_rate": 1.7230417130406157e-05, + "loss": 0.1601, + "step": 18680 + }, + { + "epoch": 0.87, + "learning_rate": 1.7229633345351374e-05, + "loss": 0.2402, + "step": 18685 + }, + { + "epoch": 0.87, + "learning_rate": 1.7228849560296585e-05, + "loss": 0.255, + "step": 18690 + }, + { + "epoch": 0.87, + "learning_rate": 1.72280657752418e-05, + "loss": 0.2822, + "step": 18695 + }, + { + "epoch": 0.87, + "learning_rate": 1.7227281990187012e-05, + "loss": 0.5758, + "step": 18700 + }, + { + "epoch": 0.87, + "learning_rate": 1.7226498205132226e-05, + "loss": 0.1843, + "step": 18705 + }, + { + "epoch": 0.87, + "learning_rate": 1.722571442007744e-05, + "loss": 0.0946, + "step": 18710 + }, + { + "epoch": 0.87, + "learning_rate": 1.722493063502265e-05, + "loss": 0.0383, + "step": 18715 + }, + { + "epoch": 0.87, + "learning_rate": 1.7224146849967868e-05, + "loss": 0.1229, + "step": 18720 + }, + { + "epoch": 0.87, + "learning_rate": 1.722336306491308e-05, + "loss": 0.2175, + "step": 18725 + }, + { + "epoch": 0.87, + "learning_rate": 1.7222579279858293e-05, + "loss": 0.1291, + "step": 18730 + }, + { + "epoch": 0.87, + "learning_rate": 1.7221795494803506e-05, + "loss": 0.1822, + "step": 18735 + }, + { + "epoch": 0.87, + "learning_rate": 1.722101170974872e-05, + "loss": 0.2656, + "step": 18740 + }, + { + "epoch": 0.87, + "learning_rate": 1.7220227924693934e-05, + "loss": 0.2236, + "step": 18745 + }, + { + "epoch": 0.87, + "learning_rate": 1.7219444139639148e-05, + "loss": 0.4038, + "step": 18750 + }, + { + "epoch": 0.88, + "learning_rate": 1.721866035458436e-05, + "loss": 0.1978, + "step": 18755 + }, + { + "epoch": 0.88, + "learning_rate": 1.7217876569529576e-05, + "loss": 0.0921, + "step": 18760 + }, + { + "epoch": 0.88, + "learning_rate": 1.7217092784474786e-05, + "loss": 0.151, + "step": 18765 + }, + { + "epoch": 0.88, + "learning_rate": 1.721630899942e-05, + "loss": 0.1317, + "step": 18770 + }, + { + "epoch": 0.88, + "learning_rate": 1.7215525214365214e-05, + "loss": 0.1417, + "step": 18775 + }, + { + "epoch": 0.88, + "learning_rate": 1.7214741429310425e-05, + "loss": 0.1506, + "step": 18780 + }, + { + "epoch": 0.88, + "learning_rate": 1.7213957644255642e-05, + "loss": 0.1963, + "step": 18785 + }, + { + "epoch": 0.88, + "learning_rate": 1.7213173859200853e-05, + "loss": 0.2705, + "step": 18790 + }, + { + "epoch": 0.88, + "learning_rate": 1.7212390074146067e-05, + "loss": 0.2663, + "step": 18795 + }, + { + "epoch": 0.88, + "learning_rate": 1.721160628909128e-05, + "loss": 0.5767, + "step": 18800 + }, + { + "epoch": 0.88, + "learning_rate": 1.7210822504036494e-05, + "loss": 0.1702, + "step": 18805 + }, + { + "epoch": 0.88, + "learning_rate": 1.7210038718981708e-05, + "loss": 0.0625, + "step": 18810 + }, + { + "epoch": 0.88, + "learning_rate": 1.7209254933926922e-05, + "loss": 0.1013, + "step": 18815 + }, + { + "epoch": 0.88, + "learning_rate": 1.7208471148872136e-05, + "loss": 0.132, + "step": 18820 + }, + { + "epoch": 0.88, + "learning_rate": 1.720768736381735e-05, + "loss": 0.1107, + "step": 18825 + }, + { + "epoch": 0.88, + "learning_rate": 1.720690357876256e-05, + "loss": 0.1782, + "step": 18830 + }, + { + "epoch": 0.88, + "learning_rate": 1.7206119793707774e-05, + "loss": 0.213, + "step": 18835 + }, + { + "epoch": 0.88, + "learning_rate": 1.7205336008652988e-05, + "loss": 0.3126, + "step": 18840 + }, + { + "epoch": 0.88, + "learning_rate": 1.7204552223598202e-05, + "loss": 0.3847, + "step": 18845 + }, + { + "epoch": 0.88, + "learning_rate": 1.7203768438543416e-05, + "loss": 0.5436, + "step": 18850 + }, + { + "epoch": 0.88, + "learning_rate": 1.7202984653488627e-05, + "loss": 0.1875, + "step": 18855 + }, + { + "epoch": 0.88, + "learning_rate": 1.7202200868433844e-05, + "loss": 0.0937, + "step": 18860 + }, + { + "epoch": 0.88, + "learning_rate": 1.7201417083379054e-05, + "loss": 0.1355, + "step": 18865 + }, + { + "epoch": 0.88, + "learning_rate": 1.720063329832427e-05, + "loss": 0.1417, + "step": 18870 + }, + { + "epoch": 0.88, + "learning_rate": 1.7199849513269482e-05, + "loss": 0.1382, + "step": 18875 + }, + { + "epoch": 0.88, + "learning_rate": 1.7199065728214696e-05, + "loss": 0.2395, + "step": 18880 + }, + { + "epoch": 0.88, + "learning_rate": 1.719828194315991e-05, + "loss": 0.2275, + "step": 18885 + }, + { + "epoch": 0.88, + "learning_rate": 1.7197498158105124e-05, + "loss": 0.2511, + "step": 18890 + }, + { + "epoch": 0.88, + "learning_rate": 1.7196714373050334e-05, + "loss": 0.3154, + "step": 18895 + }, + { + "epoch": 0.88, + "learning_rate": 1.719593058799555e-05, + "loss": 0.5231, + "step": 18900 + }, + { + "epoch": 0.88, + "learning_rate": 1.7195146802940762e-05, + "loss": 0.1573, + "step": 18905 + }, + { + "epoch": 0.88, + "learning_rate": 1.7194363017885976e-05, + "loss": 0.0808, + "step": 18910 + }, + { + "epoch": 0.88, + "learning_rate": 1.719357923283119e-05, + "loss": 0.0893, + "step": 18915 + }, + { + "epoch": 0.88, + "learning_rate": 1.7192795447776404e-05, + "loss": 0.1562, + "step": 18920 + }, + { + "epoch": 0.88, + "learning_rate": 1.7192011662721618e-05, + "loss": 0.1937, + "step": 18925 + }, + { + "epoch": 0.88, + "learning_rate": 1.719122787766683e-05, + "loss": 0.1925, + "step": 18930 + }, + { + "epoch": 0.88, + "learning_rate": 1.7190444092612046e-05, + "loss": 0.2791, + "step": 18935 + }, + { + "epoch": 0.88, + "learning_rate": 1.7189660307557256e-05, + "loss": 0.3337, + "step": 18940 + }, + { + "epoch": 0.88, + "learning_rate": 1.718887652250247e-05, + "loss": 0.3198, + "step": 18945 + }, + { + "epoch": 0.88, + "learning_rate": 1.7188092737447684e-05, + "loss": 0.5088, + "step": 18950 + }, + { + "epoch": 0.88, + "learning_rate": 1.7187308952392898e-05, + "loss": 0.1709, + "step": 18955 + }, + { + "epoch": 0.88, + "learning_rate": 1.7186525167338112e-05, + "loss": 0.1017, + "step": 18960 + }, + { + "epoch": 0.88, + "learning_rate": 1.7185741382283322e-05, + "loss": 0.1423, + "step": 18965 + }, + { + "epoch": 0.89, + "learning_rate": 1.7184957597228536e-05, + "loss": 0.1257, + "step": 18970 + }, + { + "epoch": 0.89, + "learning_rate": 1.718417381217375e-05, + "loss": 0.1645, + "step": 18975 + }, + { + "epoch": 0.89, + "learning_rate": 1.7183390027118964e-05, + "loss": 0.1752, + "step": 18980 + }, + { + "epoch": 0.89, + "learning_rate": 1.7182606242064178e-05, + "loss": 0.1956, + "step": 18985 + }, + { + "epoch": 0.89, + "learning_rate": 1.7181822457009392e-05, + "loss": 0.2104, + "step": 18990 + }, + { + "epoch": 0.89, + "learning_rate": 1.7181038671954602e-05, + "loss": 0.2864, + "step": 18995 + }, + { + "epoch": 0.89, + "learning_rate": 1.718025488689982e-05, + "loss": 0.5584, + "step": 19000 + }, + { + "epoch": 0.89, + "learning_rate": 1.717947110184503e-05, + "loss": 0.2235, + "step": 19005 + }, + { + "epoch": 0.89, + "learning_rate": 1.7178687316790244e-05, + "loss": 0.0822, + "step": 19010 + }, + { + "epoch": 0.89, + "learning_rate": 1.7177903531735458e-05, + "loss": 0.1069, + "step": 19015 + }, + { + "epoch": 0.89, + "learning_rate": 1.7177119746680672e-05, + "loss": 0.1213, + "step": 19020 + }, + { + "epoch": 0.89, + "learning_rate": 1.7176335961625886e-05, + "loss": 0.2554, + "step": 19025 + }, + { + "epoch": 0.89, + "learning_rate": 1.7175552176571096e-05, + "loss": 0.1368, + "step": 19030 + }, + { + "epoch": 0.89, + "learning_rate": 1.7174768391516314e-05, + "loss": 0.1717, + "step": 19035 + }, + { + "epoch": 0.89, + "learning_rate": 1.7173984606461524e-05, + "loss": 0.2277, + "step": 19040 + }, + { + "epoch": 0.89, + "learning_rate": 1.7173200821406738e-05, + "loss": 0.2489, + "step": 19045 + }, + { + "epoch": 0.89, + "learning_rate": 1.7172417036351952e-05, + "loss": 0.5192, + "step": 19050 + }, + { + "epoch": 0.89, + "learning_rate": 1.7171633251297166e-05, + "loss": 0.1059, + "step": 19055 + }, + { + "epoch": 0.89, + "learning_rate": 1.717084946624238e-05, + "loss": 0.1134, + "step": 19060 + }, + { + "epoch": 0.89, + "learning_rate": 1.7170065681187594e-05, + "loss": 0.1005, + "step": 19065 + }, + { + "epoch": 0.89, + "learning_rate": 1.7169281896132804e-05, + "loss": 0.079, + "step": 19070 + }, + { + "epoch": 0.89, + "learning_rate": 1.716849811107802e-05, + "loss": 0.1957, + "step": 19075 + }, + { + "epoch": 0.89, + "learning_rate": 1.7167714326023232e-05, + "loss": 0.1822, + "step": 19080 + }, + { + "epoch": 0.89, + "learning_rate": 1.7166930540968446e-05, + "loss": 0.1535, + "step": 19085 + }, + { + "epoch": 0.89, + "learning_rate": 1.716614675591366e-05, + "loss": 0.2625, + "step": 19090 + }, + { + "epoch": 0.89, + "learning_rate": 1.716536297085887e-05, + "loss": 0.3252, + "step": 19095 + }, + { + "epoch": 0.89, + "learning_rate": 1.7164579185804088e-05, + "loss": 0.486, + "step": 19100 + }, + { + "epoch": 0.89, + "learning_rate": 1.7163795400749298e-05, + "loss": 0.1903, + "step": 19105 + }, + { + "epoch": 0.89, + "learning_rate": 1.7163011615694512e-05, + "loss": 0.0703, + "step": 19110 + }, + { + "epoch": 0.89, + "learning_rate": 1.7162227830639726e-05, + "loss": 0.1731, + "step": 19115 + }, + { + "epoch": 0.89, + "learning_rate": 1.716144404558494e-05, + "loss": 0.0951, + "step": 19120 + }, + { + "epoch": 0.89, + "learning_rate": 1.7160660260530154e-05, + "loss": 0.1422, + "step": 19125 + }, + { + "epoch": 0.89, + "learning_rate": 1.7159876475475368e-05, + "loss": 0.1888, + "step": 19130 + }, + { + "epoch": 0.89, + "learning_rate": 1.715909269042058e-05, + "loss": 0.1529, + "step": 19135 + }, + { + "epoch": 0.89, + "learning_rate": 1.7158308905365796e-05, + "loss": 0.1551, + "step": 19140 + }, + { + "epoch": 0.89, + "learning_rate": 1.7157525120311006e-05, + "loss": 0.2719, + "step": 19145 + }, + { + "epoch": 0.89, + "learning_rate": 1.7156741335256223e-05, + "loss": 0.5324, + "step": 19150 + }, + { + "epoch": 0.89, + "learning_rate": 1.7155957550201434e-05, + "loss": 0.2271, + "step": 19155 + }, + { + "epoch": 0.89, + "learning_rate": 1.7155173765146648e-05, + "loss": 0.1467, + "step": 19160 + }, + { + "epoch": 0.89, + "learning_rate": 1.715438998009186e-05, + "loss": 0.0991, + "step": 19165 + }, + { + "epoch": 0.89, + "learning_rate": 1.7153606195037072e-05, + "loss": 0.1359, + "step": 19170 + }, + { + "epoch": 0.89, + "learning_rate": 1.715282240998229e-05, + "loss": 0.1051, + "step": 19175 + }, + { + "epoch": 0.89, + "learning_rate": 1.71520386249275e-05, + "loss": 0.2102, + "step": 19180 + }, + { + "epoch": 0.9, + "learning_rate": 1.7151254839872714e-05, + "loss": 0.2189, + "step": 19185 + }, + { + "epoch": 0.9, + "learning_rate": 1.7150471054817928e-05, + "loss": 0.2397, + "step": 19190 + }, + { + "epoch": 0.9, + "learning_rate": 1.7149687269763142e-05, + "loss": 0.2792, + "step": 19195 + }, + { + "epoch": 0.9, + "learning_rate": 1.7148903484708356e-05, + "loss": 0.4688, + "step": 19200 + }, + { + "epoch": 0.9, + "learning_rate": 1.714811969965357e-05, + "loss": 0.2494, + "step": 19205 + }, + { + "epoch": 0.9, + "learning_rate": 1.714733591459878e-05, + "loss": 0.1013, + "step": 19210 + }, + { + "epoch": 0.9, + "learning_rate": 1.7146552129543997e-05, + "loss": 0.0738, + "step": 19215 + }, + { + "epoch": 0.9, + "learning_rate": 1.7145768344489208e-05, + "loss": 0.1392, + "step": 19220 + }, + { + "epoch": 0.9, + "learning_rate": 1.7144984559434422e-05, + "loss": 0.132, + "step": 19225 + }, + { + "epoch": 0.9, + "learning_rate": 1.7144200774379636e-05, + "loss": 0.1433, + "step": 19230 + }, + { + "epoch": 0.9, + "learning_rate": 1.714341698932485e-05, + "loss": 0.2131, + "step": 19235 + }, + { + "epoch": 0.9, + "learning_rate": 1.7142633204270063e-05, + "loss": 0.237, + "step": 19240 + }, + { + "epoch": 0.9, + "learning_rate": 1.7142006176226234e-05, + "loss": 0.3774, + "step": 19245 + }, + { + "epoch": 0.9, + "learning_rate": 1.7141222391171444e-05, + "loss": 0.4308, + "step": 19250 + }, + { + "epoch": 0.9, + "learning_rate": 1.7140438606116662e-05, + "loss": 0.2426, + "step": 19255 + }, + { + "epoch": 0.9, + "learning_rate": 1.7139654821061872e-05, + "loss": 0.1149, + "step": 19260 + }, + { + "epoch": 0.9, + "learning_rate": 1.7138871036007086e-05, + "loss": 0.0679, + "step": 19265 + }, + { + "epoch": 0.9, + "learning_rate": 1.71380872509523e-05, + "loss": 0.0922, + "step": 19270 + }, + { + "epoch": 0.9, + "learning_rate": 1.7137303465897514e-05, + "loss": 0.1522, + "step": 19275 + }, + { + "epoch": 0.9, + "learning_rate": 1.7136519680842728e-05, + "loss": 0.1422, + "step": 19280 + }, + { + "epoch": 0.9, + "learning_rate": 1.7135735895787942e-05, + "loss": 0.1224, + "step": 19285 + }, + { + "epoch": 0.9, + "learning_rate": 1.7134952110733156e-05, + "loss": 0.27, + "step": 19290 + }, + { + "epoch": 0.9, + "learning_rate": 1.7134168325678366e-05, + "loss": 0.3725, + "step": 19295 + }, + { + "epoch": 0.9, + "learning_rate": 1.713338454062358e-05, + "loss": 0.3632, + "step": 19300 + }, + { + "epoch": 0.9, + "learning_rate": 1.7132600755568794e-05, + "loss": 0.1988, + "step": 19305 + }, + { + "epoch": 0.9, + "learning_rate": 1.7131816970514008e-05, + "loss": 0.0415, + "step": 19310 + }, + { + "epoch": 0.9, + "learning_rate": 1.7131033185459222e-05, + "loss": 0.0715, + "step": 19315 + }, + { + "epoch": 0.9, + "learning_rate": 1.7130249400404436e-05, + "loss": 0.1044, + "step": 19320 + }, + { + "epoch": 0.9, + "learning_rate": 1.7129465615349646e-05, + "loss": 0.2214, + "step": 19325 + }, + { + "epoch": 0.9, + "learning_rate": 1.7128681830294864e-05, + "loss": 0.4414, + "step": 19330 + }, + { + "epoch": 0.9, + "learning_rate": 1.7127898045240074e-05, + "loss": 0.2215, + "step": 19335 + }, + { + "epoch": 0.9, + "learning_rate": 1.7127114260185288e-05, + "loss": 0.2416, + "step": 19340 + }, + { + "epoch": 0.9, + "learning_rate": 1.7126330475130502e-05, + "loss": 0.2983, + "step": 19345 + }, + { + "epoch": 0.9, + "learning_rate": 1.7125546690075716e-05, + "loss": 0.6556, + "step": 19350 + }, + { + "epoch": 0.9, + "learning_rate": 1.712476290502093e-05, + "loss": 0.1771, + "step": 19355 + }, + { + "epoch": 0.9, + "learning_rate": 1.712397911996614e-05, + "loss": 0.1033, + "step": 19360 + }, + { + "epoch": 0.9, + "learning_rate": 1.7123195334911354e-05, + "loss": 0.0999, + "step": 19365 + }, + { + "epoch": 0.9, + "learning_rate": 1.7122411549856568e-05, + "loss": 0.1314, + "step": 19370 + }, + { + "epoch": 0.9, + "learning_rate": 1.7121627764801782e-05, + "loss": 0.1199, + "step": 19375 + }, + { + "epoch": 0.9, + "learning_rate": 1.7120843979746996e-05, + "loss": 0.237, + "step": 19380 + }, + { + "epoch": 0.9, + "learning_rate": 1.712006019469221e-05, + "loss": 0.2092, + "step": 19385 + }, + { + "epoch": 0.9, + "learning_rate": 1.7119276409637424e-05, + "loss": 0.2627, + "step": 19390 + }, + { + "epoch": 0.9, + "learning_rate": 1.7118492624582638e-05, + "loss": 0.3178, + "step": 19395 + }, + { + "epoch": 0.91, + "learning_rate": 1.7117708839527848e-05, + "loss": 0.5211, + "step": 19400 + }, + { + "epoch": 0.91, + "learning_rate": 1.7116925054473062e-05, + "loss": 0.2629, + "step": 19405 + }, + { + "epoch": 0.91, + "learning_rate": 1.7116141269418276e-05, + "loss": 0.0863, + "step": 19410 + }, + { + "epoch": 0.91, + "learning_rate": 1.711535748436349e-05, + "loss": 0.1249, + "step": 19415 + }, + { + "epoch": 0.91, + "learning_rate": 1.7114573699308704e-05, + "loss": 0.1279, + "step": 19420 + }, + { + "epoch": 0.91, + "learning_rate": 1.7113789914253914e-05, + "loss": 0.0951, + "step": 19425 + }, + { + "epoch": 0.91, + "learning_rate": 1.711300612919913e-05, + "loss": 0.1773, + "step": 19430 + }, + { + "epoch": 0.91, + "learning_rate": 1.7112222344144342e-05, + "loss": 0.169, + "step": 19435 + }, + { + "epoch": 0.91, + "learning_rate": 1.7111438559089556e-05, + "loss": 0.1928, + "step": 19440 + }, + { + "epoch": 0.91, + "learning_rate": 1.711065477403477e-05, + "loss": 0.3014, + "step": 19445 + }, + { + "epoch": 0.91, + "learning_rate": 1.7109870988979984e-05, + "loss": 0.5065, + "step": 19450 + }, + { + "epoch": 0.91, + "learning_rate": 1.7109087203925198e-05, + "loss": 0.127, + "step": 19455 + }, + { + "epoch": 0.91, + "learning_rate": 1.710830341887041e-05, + "loss": 0.0899, + "step": 19460 + }, + { + "epoch": 0.91, + "learning_rate": 1.7107519633815622e-05, + "loss": 0.0991, + "step": 19465 + }, + { + "epoch": 0.91, + "learning_rate": 1.710673584876084e-05, + "loss": 0.0852, + "step": 19470 + }, + { + "epoch": 0.91, + "learning_rate": 1.710595206370605e-05, + "loss": 0.1273, + "step": 19475 + }, + { + "epoch": 0.91, + "learning_rate": 1.7105168278651264e-05, + "loss": 0.2085, + "step": 19480 + }, + { + "epoch": 0.91, + "learning_rate": 1.7104384493596478e-05, + "loss": 0.2701, + "step": 19485 + }, + { + "epoch": 0.91, + "learning_rate": 1.710360070854169e-05, + "loss": 0.214, + "step": 19490 + }, + { + "epoch": 0.91, + "learning_rate": 1.7102816923486906e-05, + "loss": 0.3061, + "step": 19495 + }, + { + "epoch": 0.91, + "learning_rate": 1.7102033138432116e-05, + "loss": 0.4987, + "step": 19500 + }, + { + "epoch": 0.91, + "learning_rate": 1.710124935337733e-05, + "loss": 0.2229, + "step": 19505 + }, + { + "epoch": 0.91, + "learning_rate": 1.7100465568322544e-05, + "loss": 0.073, + "step": 19510 + }, + { + "epoch": 0.91, + "learning_rate": 1.7099681783267758e-05, + "loss": 0.0574, + "step": 19515 + }, + { + "epoch": 0.91, + "learning_rate": 1.709889799821297e-05, + "loss": 0.0993, + "step": 19520 + }, + { + "epoch": 0.91, + "learning_rate": 1.7098114213158186e-05, + "loss": 0.1771, + "step": 19525 + }, + { + "epoch": 0.91, + "learning_rate": 1.70973304281034e-05, + "loss": 0.0893, + "step": 19530 + }, + { + "epoch": 0.91, + "learning_rate": 1.7096546643048613e-05, + "loss": 0.1401, + "step": 19535 + }, + { + "epoch": 0.91, + "learning_rate": 1.7095762857993824e-05, + "loss": 0.2401, + "step": 19540 + }, + { + "epoch": 0.91, + "learning_rate": 1.709497907293904e-05, + "loss": 0.2492, + "step": 19545 + }, + { + "epoch": 0.91, + "learning_rate": 1.709419528788425e-05, + "loss": 0.4506, + "step": 19550 + }, + { + "epoch": 0.91, + "learning_rate": 1.7093411502829466e-05, + "loss": 0.1444, + "step": 19555 + }, + { + "epoch": 0.91, + "learning_rate": 1.709262771777468e-05, + "loss": 0.0706, + "step": 19560 + }, + { + "epoch": 0.91, + "learning_rate": 1.709184393271989e-05, + "loss": 0.0977, + "step": 19565 + }, + { + "epoch": 0.91, + "learning_rate": 1.7091060147665107e-05, + "loss": 0.1293, + "step": 19570 + }, + { + "epoch": 0.91, + "learning_rate": 1.7090276362610318e-05, + "loss": 0.1469, + "step": 19575 + }, + { + "epoch": 0.91, + "learning_rate": 1.7089492577555532e-05, + "loss": 0.1896, + "step": 19580 + }, + { + "epoch": 0.91, + "learning_rate": 1.7088708792500746e-05, + "loss": 0.1649, + "step": 19585 + }, + { + "epoch": 0.91, + "learning_rate": 1.708792500744596e-05, + "loss": 0.2219, + "step": 19590 + }, + { + "epoch": 0.91, + "learning_rate": 1.7087141222391173e-05, + "loss": 0.2975, + "step": 19595 + }, + { + "epoch": 0.91, + "learning_rate": 1.7086357437336387e-05, + "loss": 0.7063, + "step": 19600 + }, + { + "epoch": 0.91, + "learning_rate": 1.70855736522816e-05, + "loss": 0.1183, + "step": 19605 + }, + { + "epoch": 0.92, + "learning_rate": 1.7084789867226815e-05, + "loss": 0.0876, + "step": 19610 + }, + { + "epoch": 0.92, + "learning_rate": 1.7084006082172026e-05, + "loss": 0.3809, + "step": 19615 + }, + { + "epoch": 0.92, + "learning_rate": 1.708322229711724e-05, + "loss": 0.1294, + "step": 19620 + }, + { + "epoch": 0.92, + "learning_rate": 1.7082438512062454e-05, + "loss": 0.1697, + "step": 19625 + }, + { + "epoch": 0.92, + "learning_rate": 1.7081654727007667e-05, + "loss": 0.224, + "step": 19630 + }, + { + "epoch": 0.92, + "learning_rate": 1.708087094195288e-05, + "loss": 0.1926, + "step": 19635 + }, + { + "epoch": 0.92, + "learning_rate": 1.7080087156898092e-05, + "loss": 0.295, + "step": 19640 + }, + { + "epoch": 0.92, + "learning_rate": 1.707930337184331e-05, + "loss": 0.3733, + "step": 19645 + }, + { + "epoch": 0.92, + "learning_rate": 1.707851958678852e-05, + "loss": 0.4389, + "step": 19650 + }, + { + "epoch": 0.92, + "learning_rate": 1.7077735801733734e-05, + "loss": 0.1884, + "step": 19655 + }, + { + "epoch": 0.92, + "learning_rate": 1.7076952016678947e-05, + "loss": 0.0577, + "step": 19660 + }, + { + "epoch": 0.92, + "learning_rate": 1.707616823162416e-05, + "loss": 0.0889, + "step": 19665 + }, + { + "epoch": 0.92, + "learning_rate": 1.7075384446569375e-05, + "loss": 0.0989, + "step": 19670 + }, + { + "epoch": 0.92, + "learning_rate": 1.707460066151459e-05, + "loss": 0.1449, + "step": 19675 + }, + { + "epoch": 0.92, + "learning_rate": 1.70738168764598e-05, + "loss": 0.1931, + "step": 19680 + }, + { + "epoch": 0.92, + "learning_rate": 1.7073033091405014e-05, + "loss": 0.1717, + "step": 19685 + }, + { + "epoch": 0.92, + "learning_rate": 1.7072249306350228e-05, + "loss": 0.2924, + "step": 19690 + }, + { + "epoch": 0.92, + "learning_rate": 1.707146552129544e-05, + "loss": 0.2265, + "step": 19695 + }, + { + "epoch": 0.92, + "learning_rate": 1.7070681736240655e-05, + "loss": 0.3034, + "step": 19700 + }, + { + "epoch": 0.92, + "learning_rate": 1.706989795118587e-05, + "loss": 0.2481, + "step": 19705 + }, + { + "epoch": 0.92, + "learning_rate": 1.7069114166131083e-05, + "loss": 0.0638, + "step": 19710 + }, + { + "epoch": 0.92, + "learning_rate": 1.7068330381076294e-05, + "loss": 0.0919, + "step": 19715 + }, + { + "epoch": 0.92, + "learning_rate": 1.7067546596021508e-05, + "loss": 0.095, + "step": 19720 + }, + { + "epoch": 0.92, + "learning_rate": 1.706676281096672e-05, + "loss": 0.1459, + "step": 19725 + }, + { + "epoch": 0.92, + "learning_rate": 1.7065979025911935e-05, + "loss": 0.1144, + "step": 19730 + }, + { + "epoch": 0.92, + "learning_rate": 1.706519524085715e-05, + "loss": 0.2965, + "step": 19735 + }, + { + "epoch": 0.92, + "learning_rate": 1.7064411455802363e-05, + "loss": 0.18, + "step": 19740 + }, + { + "epoch": 0.92, + "learning_rate": 1.7063627670747577e-05, + "loss": 0.4294, + "step": 19745 + }, + { + "epoch": 0.92, + "learning_rate": 1.7062843885692788e-05, + "loss": 0.7633, + "step": 19750 + }, + { + "epoch": 0.92, + "learning_rate": 1.7062060100638e-05, + "loss": 0.1876, + "step": 19755 + }, + { + "epoch": 0.92, + "learning_rate": 1.7061276315583215e-05, + "loss": 0.1402, + "step": 19760 + }, + { + "epoch": 0.92, + "learning_rate": 1.706049253052843e-05, + "loss": 0.0744, + "step": 19765 + }, + { + "epoch": 0.92, + "learning_rate": 1.7059708745473643e-05, + "loss": 0.1534, + "step": 19770 + }, + { + "epoch": 0.92, + "learning_rate": 1.7058924960418857e-05, + "loss": 0.105, + "step": 19775 + }, + { + "epoch": 0.92, + "learning_rate": 1.7058141175364068e-05, + "loss": 0.2081, + "step": 19780 + }, + { + "epoch": 0.92, + "learning_rate": 1.7057357390309285e-05, + "loss": 0.1488, + "step": 19785 + }, + { + "epoch": 0.92, + "learning_rate": 1.7056573605254495e-05, + "loss": 0.2616, + "step": 19790 + }, + { + "epoch": 0.92, + "learning_rate": 1.705578982019971e-05, + "loss": 0.2662, + "step": 19795 + }, + { + "epoch": 0.92, + "learning_rate": 1.7055006035144923e-05, + "loss": 0.3883, + "step": 19800 + }, + { + "epoch": 0.92, + "learning_rate": 1.7054222250090137e-05, + "loss": 0.1609, + "step": 19805 + }, + { + "epoch": 0.92, + "learning_rate": 1.705343846503535e-05, + "loss": 0.0799, + "step": 19810 + }, + { + "epoch": 0.92, + "learning_rate": 1.705265467998056e-05, + "loss": 0.087, + "step": 19815 + }, + { + "epoch": 0.92, + "learning_rate": 1.7051870894925775e-05, + "loss": 0.194, + "step": 19820 + }, + { + "epoch": 0.93, + "learning_rate": 1.705108710987099e-05, + "loss": 0.1103, + "step": 19825 + }, + { + "epoch": 0.93, + "learning_rate": 1.7050303324816203e-05, + "loss": 0.1893, + "step": 19830 + }, + { + "epoch": 0.93, + "learning_rate": 1.7049519539761417e-05, + "loss": 0.2444, + "step": 19835 + }, + { + "epoch": 0.93, + "learning_rate": 1.704873575470663e-05, + "loss": 0.2429, + "step": 19840 + }, + { + "epoch": 0.93, + "learning_rate": 1.7047951969651845e-05, + "loss": 0.3144, + "step": 19845 + }, + { + "epoch": 0.93, + "learning_rate": 1.704716818459706e-05, + "loss": 0.4834, + "step": 19850 + }, + { + "epoch": 0.93, + "learning_rate": 1.704638439954227e-05, + "loss": 0.1951, + "step": 19855 + }, + { + "epoch": 0.93, + "learning_rate": 1.7045600614487487e-05, + "loss": 0.0849, + "step": 19860 + }, + { + "epoch": 0.93, + "learning_rate": 1.7044816829432697e-05, + "loss": 0.1384, + "step": 19865 + }, + { + "epoch": 0.93, + "learning_rate": 1.704403304437791e-05, + "loss": 0.1505, + "step": 19870 + }, + { + "epoch": 0.93, + "learning_rate": 1.7043249259323125e-05, + "loss": 0.154, + "step": 19875 + }, + { + "epoch": 0.93, + "learning_rate": 1.7042465474268336e-05, + "loss": 0.2354, + "step": 19880 + }, + { + "epoch": 0.93, + "learning_rate": 1.7041681689213553e-05, + "loss": 0.2192, + "step": 19885 + }, + { + "epoch": 0.93, + "learning_rate": 1.7040897904158763e-05, + "loss": 0.2718, + "step": 19890 + }, + { + "epoch": 0.93, + "learning_rate": 1.7040114119103977e-05, + "loss": 0.3056, + "step": 19895 + }, + { + "epoch": 0.93, + "learning_rate": 1.703933033404919e-05, + "loss": 0.4475, + "step": 19900 + }, + { + "epoch": 0.93, + "learning_rate": 1.7038546548994405e-05, + "loss": 0.2562, + "step": 19905 + }, + { + "epoch": 0.93, + "learning_rate": 1.703776276393962e-05, + "loss": 0.0562, + "step": 19910 + }, + { + "epoch": 0.93, + "learning_rate": 1.7036978978884833e-05, + "loss": 0.1212, + "step": 19915 + }, + { + "epoch": 0.93, + "learning_rate": 1.7036195193830047e-05, + "loss": 0.0697, + "step": 19920 + }, + { + "epoch": 0.93, + "learning_rate": 1.703541140877526e-05, + "loss": 0.1794, + "step": 19925 + }, + { + "epoch": 0.93, + "learning_rate": 1.703462762372047e-05, + "loss": 0.1976, + "step": 19930 + }, + { + "epoch": 0.93, + "learning_rate": 1.7033843838665685e-05, + "loss": 0.185, + "step": 19935 + }, + { + "epoch": 0.93, + "learning_rate": 1.70330600536109e-05, + "loss": 0.2454, + "step": 19940 + }, + { + "epoch": 0.93, + "learning_rate": 1.7032276268556113e-05, + "loss": 0.1997, + "step": 19945 + }, + { + "epoch": 0.93, + "learning_rate": 1.7031492483501327e-05, + "loss": 0.6297, + "step": 19950 + }, + { + "epoch": 0.93, + "learning_rate": 1.7030708698446537e-05, + "loss": 0.1574, + "step": 19955 + }, + { + "epoch": 0.93, + "learning_rate": 1.7029924913391755e-05, + "loss": 0.066, + "step": 19960 + }, + { + "epoch": 0.93, + "learning_rate": 1.7029141128336965e-05, + "loss": 0.1136, + "step": 19965 + }, + { + "epoch": 0.93, + "learning_rate": 1.702835734328218e-05, + "loss": 0.1132, + "step": 19970 + }, + { + "epoch": 0.93, + "learning_rate": 1.7027573558227393e-05, + "loss": 0.1921, + "step": 19975 + }, + { + "epoch": 0.93, + "learning_rate": 1.7026789773172607e-05, + "loss": 0.1347, + "step": 19980 + }, + { + "epoch": 0.93, + "learning_rate": 1.702600598811782e-05, + "loss": 0.2405, + "step": 19985 + }, + { + "epoch": 0.93, + "learning_rate": 1.7025222203063035e-05, + "loss": 0.2259, + "step": 19990 + }, + { + "epoch": 0.93, + "learning_rate": 1.7024438418008245e-05, + "loss": 0.3136, + "step": 19995 + }, + { + "epoch": 0.93, + "learning_rate": 1.7023654632953463e-05, + "loss": 0.3853, + "step": 20000 + }, + { + "epoch": 0.93, + "learning_rate": 1.7022870847898673e-05, + "loss": 0.2158, + "step": 20005 + }, + { + "epoch": 0.93, + "learning_rate": 1.7022087062843887e-05, + "loss": 0.0802, + "step": 20010 + }, + { + "epoch": 0.93, + "learning_rate": 1.70213032777891e-05, + "loss": 0.0745, + "step": 20015 + }, + { + "epoch": 0.93, + "learning_rate": 1.7020519492734315e-05, + "loss": 0.1353, + "step": 20020 + }, + { + "epoch": 0.93, + "learning_rate": 1.701973570767953e-05, + "loss": 0.1455, + "step": 20025 + }, + { + "epoch": 0.93, + "learning_rate": 1.701895192262474e-05, + "loss": 0.1117, + "step": 20030 + }, + { + "epoch": 0.93, + "learning_rate": 1.7018168137569953e-05, + "loss": 0.1912, + "step": 20035 + }, + { + "epoch": 0.94, + "learning_rate": 1.7017384352515167e-05, + "loss": 0.211, + "step": 20040 + }, + { + "epoch": 0.94, + "learning_rate": 1.701660056746038e-05, + "loss": 0.2398, + "step": 20045 + }, + { + "epoch": 0.94, + "learning_rate": 1.7015816782405595e-05, + "loss": 0.6622, + "step": 20050 + }, + { + "epoch": 0.94, + "learning_rate": 1.701503299735081e-05, + "loss": 0.1905, + "step": 20055 + }, + { + "epoch": 0.94, + "learning_rate": 1.7014249212296023e-05, + "loss": 0.0718, + "step": 20060 + }, + { + "epoch": 0.94, + "learning_rate": 1.7013465427241237e-05, + "loss": 0.0488, + "step": 20065 + }, + { + "epoch": 0.94, + "learning_rate": 1.7012681642186447e-05, + "loss": 0.1622, + "step": 20070 + }, + { + "epoch": 0.94, + "learning_rate": 1.701189785713166e-05, + "loss": 0.1579, + "step": 20075 + }, + { + "epoch": 0.94, + "learning_rate": 1.7011114072076875e-05, + "loss": 0.1167, + "step": 20080 + }, + { + "epoch": 0.94, + "learning_rate": 1.701033028702209e-05, + "loss": 0.1729, + "step": 20085 + }, + { + "epoch": 0.94, + "learning_rate": 1.7009546501967303e-05, + "loss": 0.1643, + "step": 20090 + }, + { + "epoch": 0.94, + "learning_rate": 1.7008762716912513e-05, + "loss": 0.3448, + "step": 20095 + }, + { + "epoch": 0.94, + "learning_rate": 1.700797893185773e-05, + "loss": 0.4274, + "step": 20100 + }, + { + "epoch": 0.94, + "learning_rate": 1.700719514680294e-05, + "loss": 0.1908, + "step": 20105 + }, + { + "epoch": 0.94, + "learning_rate": 1.7006411361748155e-05, + "loss": 0.0368, + "step": 20110 + }, + { + "epoch": 0.94, + "learning_rate": 1.700562757669337e-05, + "loss": 0.0898, + "step": 20115 + }, + { + "epoch": 0.94, + "learning_rate": 1.7004843791638583e-05, + "loss": 0.1293, + "step": 20120 + }, + { + "epoch": 0.94, + "learning_rate": 1.7004060006583797e-05, + "loss": 0.1416, + "step": 20125 + }, + { + "epoch": 0.94, + "learning_rate": 1.700327622152901e-05, + "loss": 0.1573, + "step": 20130 + }, + { + "epoch": 0.94, + "learning_rate": 1.700249243647422e-05, + "loss": 0.1809, + "step": 20135 + }, + { + "epoch": 0.94, + "learning_rate": 1.7001708651419435e-05, + "loss": 0.3192, + "step": 20140 + }, + { + "epoch": 0.94, + "learning_rate": 1.700092486636465e-05, + "loss": 0.2653, + "step": 20145 + }, + { + "epoch": 0.94, + "learning_rate": 1.7000141081309863e-05, + "loss": 0.5466, + "step": 20150 + }, + { + "epoch": 0.94, + "learning_rate": 1.6999357296255077e-05, + "loss": 0.1971, + "step": 20155 + }, + { + "epoch": 0.94, + "learning_rate": 1.699857351120029e-05, + "loss": 0.1222, + "step": 20160 + }, + { + "epoch": 0.94, + "learning_rate": 1.6997789726145505e-05, + "loss": 0.1325, + "step": 20165 + }, + { + "epoch": 0.94, + "learning_rate": 1.6997005941090715e-05, + "loss": 0.0593, + "step": 20170 + }, + { + "epoch": 0.94, + "learning_rate": 1.6996222156035932e-05, + "loss": 0.1517, + "step": 20175 + }, + { + "epoch": 0.94, + "learning_rate": 1.6995438370981143e-05, + "loss": 0.1367, + "step": 20180 + }, + { + "epoch": 0.94, + "learning_rate": 1.6994654585926357e-05, + "loss": 0.2145, + "step": 20185 + }, + { + "epoch": 0.94, + "learning_rate": 1.699387080087157e-05, + "loss": 0.2504, + "step": 20190 + }, + { + "epoch": 0.94, + "learning_rate": 1.6993087015816785e-05, + "loss": 0.3664, + "step": 20195 + }, + { + "epoch": 0.94, + "learning_rate": 1.6992303230762e-05, + "loss": 0.4137, + "step": 20200 + }, + { + "epoch": 0.94, + "learning_rate": 1.699151944570721e-05, + "loss": 0.2094, + "step": 20205 + }, + { + "epoch": 0.94, + "learning_rate": 1.6990735660652423e-05, + "loss": 0.0852, + "step": 20210 + }, + { + "epoch": 0.94, + "learning_rate": 1.6989951875597637e-05, + "loss": 0.0603, + "step": 20215 + }, + { + "epoch": 0.94, + "learning_rate": 1.698916809054285e-05, + "loss": 0.161, + "step": 20220 + }, + { + "epoch": 0.94, + "learning_rate": 1.6988384305488065e-05, + "loss": 0.106, + "step": 20225 + }, + { + "epoch": 0.94, + "learning_rate": 1.698760052043328e-05, + "loss": 0.1538, + "step": 20230 + }, + { + "epoch": 0.94, + "learning_rate": 1.6986816735378492e-05, + "loss": 0.1602, + "step": 20235 + }, + { + "epoch": 0.94, + "learning_rate": 1.6986032950323706e-05, + "loss": 0.2877, + "step": 20240 + }, + { + "epoch": 0.94, + "learning_rate": 1.6985249165268917e-05, + "loss": 0.4337, + "step": 20245 + }, + { + "epoch": 0.94, + "learning_rate": 1.698446538021413e-05, + "loss": 0.6768, + "step": 20250 + }, + { + "epoch": 0.95, + "learning_rate": 1.6983681595159345e-05, + "loss": 0.2147, + "step": 20255 + }, + { + "epoch": 0.95, + "learning_rate": 1.698289781010456e-05, + "loss": 0.1292, + "step": 20260 + }, + { + "epoch": 0.95, + "learning_rate": 1.6982114025049772e-05, + "loss": 0.1835, + "step": 20265 + }, + { + "epoch": 0.95, + "learning_rate": 1.6981330239994983e-05, + "loss": 0.1112, + "step": 20270 + }, + { + "epoch": 0.95, + "learning_rate": 1.69805464549402e-05, + "loss": 0.1764, + "step": 20275 + }, + { + "epoch": 0.95, + "learning_rate": 1.697976266988541e-05, + "loss": 0.1477, + "step": 20280 + }, + { + "epoch": 0.95, + "learning_rate": 1.6978978884830625e-05, + "loss": 0.257, + "step": 20285 + }, + { + "epoch": 0.95, + "learning_rate": 1.697819509977584e-05, + "loss": 0.2419, + "step": 20290 + }, + { + "epoch": 0.95, + "learning_rate": 1.6977411314721053e-05, + "loss": 0.2276, + "step": 20295 + }, + { + "epoch": 0.95, + "learning_rate": 1.6976627529666266e-05, + "loss": 0.3961, + "step": 20300 + }, + { + "epoch": 0.95, + "learning_rate": 1.697584374461148e-05, + "loss": 0.3068, + "step": 20305 + }, + { + "epoch": 0.95, + "learning_rate": 1.697505995955669e-05, + "loss": 0.0723, + "step": 20310 + }, + { + "epoch": 0.95, + "learning_rate": 1.6974276174501908e-05, + "loss": 0.0967, + "step": 20315 + }, + { + "epoch": 0.95, + "learning_rate": 1.697349238944712e-05, + "loss": 0.1151, + "step": 20320 + }, + { + "epoch": 0.95, + "learning_rate": 1.6972708604392333e-05, + "loss": 0.1615, + "step": 20325 + }, + { + "epoch": 0.95, + "learning_rate": 1.6971924819337546e-05, + "loss": 0.219, + "step": 20330 + }, + { + "epoch": 0.95, + "learning_rate": 1.697114103428276e-05, + "loss": 0.2574, + "step": 20335 + }, + { + "epoch": 0.95, + "learning_rate": 1.6970357249227974e-05, + "loss": 0.2705, + "step": 20340 + }, + { + "epoch": 0.95, + "learning_rate": 1.6969573464173185e-05, + "loss": 0.2993, + "step": 20345 + }, + { + "epoch": 0.95, + "learning_rate": 1.69687896791184e-05, + "loss": 0.6279, + "step": 20350 + }, + { + "epoch": 0.95, + "learning_rate": 1.6968005894063613e-05, + "loss": 0.1919, + "step": 20355 + }, + { + "epoch": 0.95, + "learning_rate": 1.6967222109008826e-05, + "loss": 0.0439, + "step": 20360 + }, + { + "epoch": 0.95, + "learning_rate": 1.696643832395404e-05, + "loss": 0.0789, + "step": 20365 + }, + { + "epoch": 0.95, + "learning_rate": 1.6965654538899254e-05, + "loss": 0.1537, + "step": 20370 + }, + { + "epoch": 0.95, + "learning_rate": 1.6964870753844468e-05, + "loss": 0.0927, + "step": 20375 + }, + { + "epoch": 0.95, + "learning_rate": 1.6964086968789682e-05, + "loss": 0.1653, + "step": 20380 + }, + { + "epoch": 0.95, + "learning_rate": 1.6963303183734893e-05, + "loss": 0.2245, + "step": 20385 + }, + { + "epoch": 0.95, + "learning_rate": 1.696251939868011e-05, + "loss": 0.2526, + "step": 20390 + }, + { + "epoch": 0.95, + "learning_rate": 1.696173561362532e-05, + "loss": 0.2687, + "step": 20395 + }, + { + "epoch": 0.95, + "learning_rate": 1.6960951828570534e-05, + "loss": 0.6334, + "step": 20400 + }, + { + "epoch": 0.95, + "learning_rate": 1.6960168043515748e-05, + "loss": 0.1858, + "step": 20405 + }, + { + "epoch": 0.95, + "learning_rate": 1.695938425846096e-05, + "loss": 0.0953, + "step": 20410 + }, + { + "epoch": 0.95, + "learning_rate": 1.6958600473406176e-05, + "loss": 0.1384, + "step": 20415 + }, + { + "epoch": 0.95, + "learning_rate": 1.6957816688351387e-05, + "loss": 0.1128, + "step": 20420 + }, + { + "epoch": 0.95, + "learning_rate": 1.69570329032966e-05, + "loss": 0.1004, + "step": 20425 + }, + { + "epoch": 0.95, + "learning_rate": 1.6956249118241814e-05, + "loss": 0.1117, + "step": 20430 + }, + { + "epoch": 0.95, + "learning_rate": 1.695546533318703e-05, + "loss": 0.2534, + "step": 20435 + }, + { + "epoch": 0.95, + "learning_rate": 1.6954681548132242e-05, + "loss": 0.23, + "step": 20440 + }, + { + "epoch": 0.95, + "learning_rate": 1.6953897763077456e-05, + "loss": 0.2366, + "step": 20445 + }, + { + "epoch": 0.95, + "learning_rate": 1.695311397802267e-05, + "loss": 0.4956, + "step": 20450 + }, + { + "epoch": 0.95, + "learning_rate": 1.6952330192967884e-05, + "loss": 0.2167, + "step": 20455 + }, + { + "epoch": 0.95, + "learning_rate": 1.6951546407913094e-05, + "loss": 0.0958, + "step": 20460 + }, + { + "epoch": 0.95, + "learning_rate": 1.695076262285831e-05, + "loss": 0.0566, + "step": 20465 + }, + { + "epoch": 0.96, + "learning_rate": 1.6949978837803522e-05, + "loss": 0.1221, + "step": 20470 + }, + { + "epoch": 0.96, + "learning_rate": 1.6949195052748736e-05, + "loss": 0.0884, + "step": 20475 + }, + { + "epoch": 0.96, + "learning_rate": 1.694841126769395e-05, + "loss": 0.2541, + "step": 20480 + }, + { + "epoch": 0.96, + "learning_rate": 1.694762748263916e-05, + "loss": 0.2203, + "step": 20485 + }, + { + "epoch": 0.96, + "learning_rate": 1.6946843697584378e-05, + "loss": 0.1863, + "step": 20490 + }, + { + "epoch": 0.96, + "learning_rate": 1.694605991252959e-05, + "loss": 0.2929, + "step": 20495 + }, + { + "epoch": 0.96, + "learning_rate": 1.6945276127474802e-05, + "loss": 0.271, + "step": 20500 + }, + { + "epoch": 0.96, + "learning_rate": 1.6944492342420016e-05, + "loss": 0.1831, + "step": 20505 + }, + { + "epoch": 0.96, + "learning_rate": 1.694370855736523e-05, + "loss": 0.0501, + "step": 20510 + }, + { + "epoch": 0.96, + "learning_rate": 1.6942924772310444e-05, + "loss": 0.1323, + "step": 20515 + }, + { + "epoch": 0.96, + "learning_rate": 1.6942140987255658e-05, + "loss": 0.2169, + "step": 20520 + }, + { + "epoch": 0.96, + "learning_rate": 1.694135720220087e-05, + "loss": 0.1172, + "step": 20525 + }, + { + "epoch": 0.96, + "learning_rate": 1.6940573417146082e-05, + "loss": 0.2164, + "step": 20530 + }, + { + "epoch": 0.96, + "learning_rate": 1.6939789632091296e-05, + "loss": 0.1629, + "step": 20535 + }, + { + "epoch": 0.96, + "learning_rate": 1.693900584703651e-05, + "loss": 0.2959, + "step": 20540 + }, + { + "epoch": 0.96, + "learning_rate": 1.6938222061981724e-05, + "loss": 0.3087, + "step": 20545 + }, + { + "epoch": 0.96, + "learning_rate": 1.6937438276926938e-05, + "loss": 0.7987, + "step": 20550 + }, + { + "epoch": 0.96, + "learning_rate": 1.6936654491872152e-05, + "loss": 0.2158, + "step": 20555 + }, + { + "epoch": 0.96, + "learning_rate": 1.6935870706817362e-05, + "loss": 0.0551, + "step": 20560 + }, + { + "epoch": 0.96, + "learning_rate": 1.6935086921762576e-05, + "loss": 0.0877, + "step": 20565 + }, + { + "epoch": 0.96, + "learning_rate": 1.693430313670779e-05, + "loss": 0.1039, + "step": 20570 + }, + { + "epoch": 0.96, + "learning_rate": 1.6933519351653004e-05, + "loss": 0.1547, + "step": 20575 + }, + { + "epoch": 0.96, + "learning_rate": 1.6932735566598218e-05, + "loss": 0.167, + "step": 20580 + }, + { + "epoch": 0.96, + "learning_rate": 1.6931951781543432e-05, + "loss": 0.1186, + "step": 20585 + }, + { + "epoch": 0.96, + "learning_rate": 1.6931167996488646e-05, + "loss": 0.1959, + "step": 20590 + }, + { + "epoch": 0.96, + "learning_rate": 1.6930384211433856e-05, + "loss": 0.1781, + "step": 20595 + }, + { + "epoch": 0.96, + "learning_rate": 1.692960042637907e-05, + "loss": 0.4715, + "step": 20600 + }, + { + "epoch": 0.96, + "learning_rate": 1.6928816641324284e-05, + "loss": 0.2431, + "step": 20605 + }, + { + "epoch": 0.96, + "learning_rate": 1.6928032856269498e-05, + "loss": 0.0756, + "step": 20610 + }, + { + "epoch": 0.96, + "learning_rate": 1.6927249071214712e-05, + "loss": 0.0938, + "step": 20615 + }, + { + "epoch": 0.96, + "learning_rate": 1.6926465286159926e-05, + "loss": 0.1711, + "step": 20620 + }, + { + "epoch": 0.96, + "learning_rate": 1.6925681501105136e-05, + "loss": 0.1379, + "step": 20625 + }, + { + "epoch": 0.96, + "learning_rate": 1.6924897716050354e-05, + "loss": 0.1878, + "step": 20630 + }, + { + "epoch": 0.96, + "learning_rate": 1.6924113930995564e-05, + "loss": 0.2055, + "step": 20635 + }, + { + "epoch": 0.96, + "learning_rate": 1.6923330145940778e-05, + "loss": 0.1536, + "step": 20640 + }, + { + "epoch": 0.96, + "learning_rate": 1.6922546360885992e-05, + "loss": 0.2507, + "step": 20645 + }, + { + "epoch": 0.96, + "learning_rate": 1.6921762575831206e-05, + "loss": 0.4966, + "step": 20650 + }, + { + "epoch": 0.96, + "learning_rate": 1.692097879077642e-05, + "loss": 0.1616, + "step": 20655 + }, + { + "epoch": 0.96, + "learning_rate": 1.692019500572163e-05, + "loss": 0.0673, + "step": 20660 + }, + { + "epoch": 0.96, + "learning_rate": 1.6919411220666844e-05, + "loss": 0.0905, + "step": 20665 + }, + { + "epoch": 0.96, + "learning_rate": 1.6918627435612058e-05, + "loss": 0.1219, + "step": 20670 + }, + { + "epoch": 0.96, + "learning_rate": 1.6917843650557272e-05, + "loss": 0.0941, + "step": 20675 + }, + { + "epoch": 0.96, + "learning_rate": 1.6917059865502486e-05, + "loss": 0.1362, + "step": 20680 + }, + { + "epoch": 0.97, + "learning_rate": 1.69162760804477e-05, + "loss": 0.1788, + "step": 20685 + }, + { + "epoch": 0.97, + "learning_rate": 1.6915492295392914e-05, + "loss": 0.2372, + "step": 20690 + }, + { + "epoch": 0.97, + "learning_rate": 1.6914708510338128e-05, + "loss": 0.2672, + "step": 20695 + }, + { + "epoch": 0.97, + "learning_rate": 1.6913924725283338e-05, + "loss": 0.409, + "step": 20700 + }, + { + "epoch": 0.97, + "learning_rate": 1.6913140940228556e-05, + "loss": 0.1966, + "step": 20705 + }, + { + "epoch": 0.97, + "learning_rate": 1.6912357155173766e-05, + "loss": 0.0573, + "step": 20710 + }, + { + "epoch": 0.97, + "learning_rate": 1.691157337011898e-05, + "loss": 0.0683, + "step": 20715 + }, + { + "epoch": 0.97, + "learning_rate": 1.6910789585064194e-05, + "loss": 0.0474, + "step": 20720 + }, + { + "epoch": 0.97, + "learning_rate": 1.6910005800009404e-05, + "loss": 0.0682, + "step": 20725 + }, + { + "epoch": 0.97, + "learning_rate": 1.690922201495462e-05, + "loss": 0.1241, + "step": 20730 + }, + { + "epoch": 0.97, + "learning_rate": 1.6908438229899832e-05, + "loss": 0.1928, + "step": 20735 + }, + { + "epoch": 0.97, + "learning_rate": 1.6907654444845046e-05, + "loss": 0.23, + "step": 20740 + }, + { + "epoch": 0.97, + "learning_rate": 1.690687065979026e-05, + "loss": 0.3035, + "step": 20745 + }, + { + "epoch": 0.97, + "learning_rate": 1.6906086874735474e-05, + "loss": 0.443, + "step": 20750 + }, + { + "epoch": 0.97, + "learning_rate": 1.6905303089680688e-05, + "loss": 0.2238, + "step": 20755 + }, + { + "epoch": 0.97, + "learning_rate": 1.69045193046259e-05, + "loss": 0.0642, + "step": 20760 + }, + { + "epoch": 0.97, + "learning_rate": 1.6903735519571116e-05, + "loss": 0.0975, + "step": 20765 + }, + { + "epoch": 0.97, + "learning_rate": 1.690295173451633e-05, + "loss": 0.1374, + "step": 20770 + }, + { + "epoch": 0.97, + "learning_rate": 1.690216794946154e-05, + "loss": 0.1851, + "step": 20775 + }, + { + "epoch": 0.97, + "learning_rate": 1.6901384164406754e-05, + "loss": 0.1809, + "step": 20780 + }, + { + "epoch": 0.97, + "learning_rate": 1.6900600379351968e-05, + "loss": 0.1576, + "step": 20785 + }, + { + "epoch": 0.97, + "learning_rate": 1.6899816594297182e-05, + "loss": 0.2686, + "step": 20790 + }, + { + "epoch": 0.97, + "learning_rate": 1.6899032809242396e-05, + "loss": 0.241, + "step": 20795 + }, + { + "epoch": 0.97, + "learning_rate": 1.6898249024187606e-05, + "loss": 0.6042, + "step": 20800 + }, + { + "epoch": 0.97, + "learning_rate": 1.6897465239132823e-05, + "loss": 0.1927, + "step": 20805 + }, + { + "epoch": 0.97, + "learning_rate": 1.6896681454078034e-05, + "loss": 0.1114, + "step": 20810 + }, + { + "epoch": 0.97, + "learning_rate": 1.6895897669023248e-05, + "loss": 0.0992, + "step": 20815 + }, + { + "epoch": 0.97, + "learning_rate": 1.6895113883968462e-05, + "loss": 0.1313, + "step": 20820 + }, + { + "epoch": 0.97, + "learning_rate": 1.6894330098913676e-05, + "loss": 0.1349, + "step": 20825 + }, + { + "epoch": 0.97, + "learning_rate": 1.689354631385889e-05, + "loss": 0.0776, + "step": 20830 + }, + { + "epoch": 0.97, + "learning_rate": 1.6892762528804104e-05, + "loss": 0.2206, + "step": 20835 + }, + { + "epoch": 0.97, + "learning_rate": 1.6891978743749314e-05, + "loss": 0.2008, + "step": 20840 + }, + { + "epoch": 0.97, + "learning_rate": 1.689119495869453e-05, + "loss": 0.1772, + "step": 20845 + }, + { + "epoch": 0.97, + "learning_rate": 1.6890411173639742e-05, + "loss": 0.4666, + "step": 20850 + }, + { + "epoch": 0.97, + "learning_rate": 1.6889627388584956e-05, + "loss": 0.1922, + "step": 20855 + }, + { + "epoch": 0.97, + "learning_rate": 1.688884360353017e-05, + "loss": 0.0489, + "step": 20860 + }, + { + "epoch": 0.97, + "learning_rate": 1.6888059818475384e-05, + "loss": 0.1144, + "step": 20865 + }, + { + "epoch": 0.97, + "learning_rate": 1.6887276033420597e-05, + "loss": 0.1017, + "step": 20870 + }, + { + "epoch": 0.97, + "learning_rate": 1.6886492248365808e-05, + "loss": 0.1164, + "step": 20875 + }, + { + "epoch": 0.97, + "learning_rate": 1.6885708463311022e-05, + "loss": 0.1358, + "step": 20880 + }, + { + "epoch": 0.97, + "learning_rate": 1.6884924678256236e-05, + "loss": 0.1768, + "step": 20885 + }, + { + "epoch": 0.97, + "learning_rate": 1.688414089320145e-05, + "loss": 0.2551, + "step": 20890 + }, + { + "epoch": 0.97, + "learning_rate": 1.6883357108146664e-05, + "loss": 0.3179, + "step": 20895 + }, + { + "epoch": 0.98, + "learning_rate": 1.6882573323091878e-05, + "loss": 0.5792, + "step": 20900 + }, + { + "epoch": 0.98, + "learning_rate": 1.688178953803709e-05, + "loss": 0.197, + "step": 20905 + }, + { + "epoch": 0.98, + "learning_rate": 1.6881005752982305e-05, + "loss": 0.1008, + "step": 20910 + }, + { + "epoch": 0.98, + "learning_rate": 1.6880221967927516e-05, + "loss": 0.1025, + "step": 20915 + }, + { + "epoch": 0.98, + "learning_rate": 1.687943818287273e-05, + "loss": 0.1174, + "step": 20920 + }, + { + "epoch": 0.98, + "learning_rate": 1.6878654397817944e-05, + "loss": 0.1236, + "step": 20925 + }, + { + "epoch": 0.98, + "learning_rate": 1.6877870612763158e-05, + "loss": 0.2077, + "step": 20930 + }, + { + "epoch": 0.98, + "learning_rate": 1.687708682770837e-05, + "loss": 0.2177, + "step": 20935 + }, + { + "epoch": 0.98, + "learning_rate": 1.6876303042653582e-05, + "loss": 0.2288, + "step": 20940 + }, + { + "epoch": 0.98, + "learning_rate": 1.68755192575988e-05, + "loss": 0.3481, + "step": 20945 + }, + { + "epoch": 0.98, + "learning_rate": 1.687473547254401e-05, + "loss": 0.4198, + "step": 20950 + }, + { + "epoch": 0.98, + "learning_rate": 1.6873951687489224e-05, + "loss": 0.2261, + "step": 20955 + }, + { + "epoch": 0.98, + "learning_rate": 1.6873167902434438e-05, + "loss": 0.0703, + "step": 20960 + }, + { + "epoch": 0.98, + "learning_rate": 1.687238411737965e-05, + "loss": 0.0709, + "step": 20965 + }, + { + "epoch": 0.98, + "learning_rate": 1.6871600332324865e-05, + "loss": 0.1438, + "step": 20970 + }, + { + "epoch": 0.98, + "learning_rate": 1.687081654727008e-05, + "loss": 0.1581, + "step": 20975 + }, + { + "epoch": 0.98, + "learning_rate": 1.687003276221529e-05, + "loss": 0.1882, + "step": 20980 + }, + { + "epoch": 0.98, + "learning_rate": 1.6869248977160504e-05, + "loss": 0.1306, + "step": 20985 + }, + { + "epoch": 0.98, + "learning_rate": 1.6868465192105718e-05, + "loss": 0.3843, + "step": 20990 + }, + { + "epoch": 0.98, + "learning_rate": 1.686768140705093e-05, + "loss": 0.2341, + "step": 20995 + }, + { + "epoch": 0.98, + "learning_rate": 1.6866897621996145e-05, + "loss": 0.5793, + "step": 21000 + }, + { + "epoch": 0.98, + "learning_rate": 1.686611383694136e-05, + "loss": 0.1926, + "step": 21005 + }, + { + "epoch": 0.98, + "learning_rate": 1.6865330051886573e-05, + "loss": 0.0443, + "step": 21010 + }, + { + "epoch": 0.98, + "learning_rate": 1.6864546266831784e-05, + "loss": 0.0993, + "step": 21015 + }, + { + "epoch": 0.98, + "learning_rate": 1.6863762481777e-05, + "loss": 0.1558, + "step": 21020 + }, + { + "epoch": 0.98, + "learning_rate": 1.686297869672221e-05, + "loss": 0.1095, + "step": 21025 + }, + { + "epoch": 0.98, + "learning_rate": 1.6862194911667425e-05, + "loss": 0.1696, + "step": 21030 + }, + { + "epoch": 0.98, + "learning_rate": 1.686141112661264e-05, + "loss": 0.1484, + "step": 21035 + }, + { + "epoch": 0.98, + "learning_rate": 1.6860627341557853e-05, + "loss": 0.3471, + "step": 21040 + }, + { + "epoch": 0.98, + "learning_rate": 1.6859843556503067e-05, + "loss": 0.2733, + "step": 21045 + }, + { + "epoch": 0.98, + "learning_rate": 1.6859059771448278e-05, + "loss": 0.4663, + "step": 21050 + }, + { + "epoch": 0.98, + "learning_rate": 1.685827598639349e-05, + "loss": 0.2136, + "step": 21055 + }, + { + "epoch": 0.98, + "learning_rate": 1.6857492201338706e-05, + "loss": 0.0273, + "step": 21060 + }, + { + "epoch": 0.98, + "learning_rate": 1.685670841628392e-05, + "loss": 0.1179, + "step": 21065 + }, + { + "epoch": 0.98, + "learning_rate": 1.6855924631229133e-05, + "loss": 0.1099, + "step": 21070 + }, + { + "epoch": 0.98, + "learning_rate": 1.6855140846174347e-05, + "loss": 0.1182, + "step": 21075 + }, + { + "epoch": 0.98, + "learning_rate": 1.685435706111956e-05, + "loss": 0.1853, + "step": 21080 + }, + { + "epoch": 0.98, + "learning_rate": 1.6853573276064775e-05, + "loss": 0.0739, + "step": 21085 + }, + { + "epoch": 0.98, + "learning_rate": 1.6852789491009986e-05, + "loss": 0.2085, + "step": 21090 + }, + { + "epoch": 0.98, + "learning_rate": 1.68520057059552e-05, + "loss": 0.2557, + "step": 21095 + }, + { + "epoch": 0.98, + "learning_rate": 1.6851221920900413e-05, + "loss": 0.5123, + "step": 21100 + }, + { + "epoch": 0.98, + "learning_rate": 1.6850438135845627e-05, + "loss": 0.2266, + "step": 21105 + }, + { + "epoch": 0.99, + "learning_rate": 1.684965435079084e-05, + "loss": 0.0615, + "step": 21110 + }, + { + "epoch": 0.99, + "learning_rate": 1.6848870565736052e-05, + "loss": 0.0517, + "step": 21115 + }, + { + "epoch": 0.99, + "learning_rate": 1.684808678068127e-05, + "loss": 0.0945, + "step": 21120 + }, + { + "epoch": 0.99, + "learning_rate": 1.684730299562648e-05, + "loss": 0.1399, + "step": 21125 + }, + { + "epoch": 0.99, + "learning_rate": 1.6846519210571693e-05, + "loss": 0.1547, + "step": 21130 + }, + { + "epoch": 0.99, + "learning_rate": 1.6845735425516907e-05, + "loss": 0.2386, + "step": 21135 + }, + { + "epoch": 0.99, + "learning_rate": 1.684495164046212e-05, + "loss": 0.2613, + "step": 21140 + }, + { + "epoch": 0.99, + "learning_rate": 1.6844167855407335e-05, + "loss": 0.3378, + "step": 21145 + }, + { + "epoch": 0.99, + "learning_rate": 1.684338407035255e-05, + "loss": 0.5164, + "step": 21150 + }, + { + "epoch": 0.99, + "learning_rate": 1.684260028529776e-05, + "loss": 0.1966, + "step": 21155 + }, + { + "epoch": 0.99, + "learning_rate": 1.6841816500242977e-05, + "loss": 0.0675, + "step": 21160 + }, + { + "epoch": 0.99, + "learning_rate": 1.6841032715188187e-05, + "loss": 0.1247, + "step": 21165 + }, + { + "epoch": 0.99, + "learning_rate": 1.68402489301334e-05, + "loss": 0.1262, + "step": 21170 + }, + { + "epoch": 0.99, + "learning_rate": 1.6839465145078615e-05, + "loss": 0.1284, + "step": 21175 + }, + { + "epoch": 0.99, + "learning_rate": 1.683868136002383e-05, + "loss": 0.2704, + "step": 21180 + }, + { + "epoch": 0.99, + "learning_rate": 1.6837897574969043e-05, + "loss": 0.1451, + "step": 21185 + }, + { + "epoch": 0.99, + "learning_rate": 1.6837113789914254e-05, + "loss": 0.3121, + "step": 21190 + }, + { + "epoch": 0.99, + "learning_rate": 1.6836330004859467e-05, + "loss": 0.3131, + "step": 21195 + }, + { + "epoch": 0.99, + "learning_rate": 1.683554621980468e-05, + "loss": 0.6027, + "step": 21200 + }, + { + "epoch": 0.99, + "learning_rate": 1.6834762434749895e-05, + "loss": 0.2483, + "step": 21205 + }, + { + "epoch": 0.99, + "learning_rate": 1.683397864969511e-05, + "loss": 0.0526, + "step": 21210 + }, + { + "epoch": 0.99, + "learning_rate": 1.6833194864640323e-05, + "loss": 0.1629, + "step": 21215 + }, + { + "epoch": 0.99, + "learning_rate": 1.6832411079585537e-05, + "loss": 0.1384, + "step": 21220 + }, + { + "epoch": 0.99, + "learning_rate": 1.683162729453075e-05, + "loss": 0.1629, + "step": 21225 + }, + { + "epoch": 0.99, + "learning_rate": 1.683084350947596e-05, + "loss": 0.1871, + "step": 21230 + }, + { + "epoch": 0.99, + "learning_rate": 1.683005972442118e-05, + "loss": 0.2585, + "step": 21235 + }, + { + "epoch": 0.99, + "learning_rate": 1.682927593936639e-05, + "loss": 0.3094, + "step": 21240 + }, + { + "epoch": 0.99, + "learning_rate": 1.6828492154311603e-05, + "loss": 0.275, + "step": 21245 + }, + { + "epoch": 0.99, + "learning_rate": 1.6827708369256817e-05, + "loss": 0.4019, + "step": 21250 + }, + { + "epoch": 0.99, + "learning_rate": 1.6826924584202028e-05, + "loss": 0.2128, + "step": 21255 + }, + { + "epoch": 0.99, + "learning_rate": 1.6826140799147245e-05, + "loss": 0.0263, + "step": 21260 + }, + { + "epoch": 0.99, + "learning_rate": 1.6825357014092455e-05, + "loss": 0.1624, + "step": 21265 + }, + { + "epoch": 0.99, + "learning_rate": 1.682457322903767e-05, + "loss": 0.0927, + "step": 21270 + }, + { + "epoch": 0.99, + "learning_rate": 1.6823789443982883e-05, + "loss": 0.1146, + "step": 21275 + }, + { + "epoch": 0.99, + "learning_rate": 1.6823005658928097e-05, + "loss": 0.1037, + "step": 21280 + }, + { + "epoch": 0.99, + "learning_rate": 1.682222187387331e-05, + "loss": 0.1416, + "step": 21285 + }, + { + "epoch": 0.99, + "learning_rate": 1.6821438088818525e-05, + "loss": 0.2463, + "step": 21290 + }, + { + "epoch": 0.99, + "learning_rate": 1.6820654303763735e-05, + "loss": 0.3664, + "step": 21295 + }, + { + "epoch": 0.99, + "learning_rate": 1.6819870518708953e-05, + "loss": 0.5594, + "step": 21300 + }, + { + "epoch": 0.99, + "learning_rate": 1.6819086733654163e-05, + "loss": 0.1797, + "step": 21305 + }, + { + "epoch": 0.99, + "learning_rate": 1.6818302948599377e-05, + "loss": 0.1078, + "step": 21310 + }, + { + "epoch": 0.99, + "learning_rate": 1.681751916354459e-05, + "loss": 0.0942, + "step": 21315 + }, + { + "epoch": 0.99, + "learning_rate": 1.6816735378489805e-05, + "loss": 0.0715, + "step": 21320 + }, + { + "epoch": 1.0, + "learning_rate": 1.681595159343502e-05, + "loss": 0.0956, + "step": 21325 + }, + { + "epoch": 1.0, + "learning_rate": 1.681516780838023e-05, + "loss": 0.1574, + "step": 21330 + }, + { + "epoch": 1.0, + "learning_rate": 1.6814384023325447e-05, + "loss": 0.1546, + "step": 21335 + }, + { + "epoch": 1.0, + "learning_rate": 1.6813600238270657e-05, + "loss": 0.3196, + "step": 21340 + }, + { + "epoch": 1.0, + "learning_rate": 1.681281645321587e-05, + "loss": 0.3463, + "step": 21345 + }, + { + "epoch": 1.0, + "learning_rate": 1.6812032668161085e-05, + "loss": 0.515, + "step": 21350 + }, + { + "epoch": 1.0, + "learning_rate": 1.68112488831063e-05, + "loss": 0.1254, + "step": 21355 + }, + { + "epoch": 1.0, + "learning_rate": 1.6810465098051513e-05, + "loss": 0.1172, + "step": 21360 + }, + { + "epoch": 1.0, + "learning_rate": 1.6809681312996727e-05, + "loss": 0.081, + "step": 21365 + }, + { + "epoch": 1.0, + "learning_rate": 1.6808897527941937e-05, + "loss": 0.1016, + "step": 21370 + }, + { + "epoch": 1.0, + "learning_rate": 1.680811374288715e-05, + "loss": 0.133, + "step": 21375 + }, + { + "epoch": 1.0, + "learning_rate": 1.6807329957832365e-05, + "loss": 0.1478, + "step": 21380 + }, + { + "epoch": 1.0, + "learning_rate": 1.680654617277758e-05, + "loss": 0.2154, + "step": 21385 + }, + { + "epoch": 1.0, + "learning_rate": 1.6805762387722793e-05, + "loss": 0.2109, + "step": 21390 + }, + { + "epoch": 1.0, + "learning_rate": 1.6804978602668007e-05, + "loss": 0.5048, + "step": 21395 + }, + { + "epoch": 1.0, + "learning_rate": 1.680419481761322e-05, + "loss": 0.5279, + "step": 21400 + }, + { + "epoch": 1.0, + "learning_rate": 1.680341103255843e-05, + "loss": 0.1696, + "step": 21405 + }, + { + "epoch": 1.0, + "learning_rate": 1.6802627247503645e-05, + "loss": 0.0934, + "step": 21410 + }, + { + "epoch": 1.0, + "learning_rate": 1.680184346244886e-05, + "loss": 0.1172, + "step": 21415 + }, + { + "epoch": 1.0, + "learning_rate": 1.6801059677394073e-05, + "loss": 0.1691, + "step": 21420 + }, + { + "epoch": 1.0, + "learning_rate": 1.6800275892339287e-05, + "loss": 0.2184, + "step": 21425 + }, + { + "epoch": 1.0, + "learning_rate": 1.67994921072845e-05, + "loss": 0.3285, + "step": 21430 + }, + { + "epoch": 1.0, + "eval_cer": 0.019780983363940314, + "eval_loss": 0.6674277186393738, + "eval_runtime": 457.5932, + "eval_samples_per_second": 41.631, + "eval_steps_per_second": 5.205, + "eval_wer": 0.16735751295336787, + "step": 21431 + }, + { + "epoch": 1.0, + "learning_rate": 1.6798708322229715e-05, + "loss": 0.2708, + "step": 21435 + }, + { + "epoch": 1.0, + "learning_rate": 1.6797924537174925e-05, + "loss": 0.0632, + "step": 21440 + }, + { + "epoch": 1.0, + "learning_rate": 1.679714075212014e-05, + "loss": 0.0625, + "step": 21445 + }, + { + "epoch": 1.0, + "learning_rate": 1.6796356967065353e-05, + "loss": 0.1169, + "step": 21450 + }, + { + "epoch": 1.0, + "learning_rate": 1.6795573182010567e-05, + "loss": 0.1674, + "step": 21455 + }, + { + "epoch": 1.0, + "learning_rate": 1.679478939695578e-05, + "loss": 0.1875, + "step": 21460 + }, + { + "epoch": 1.0, + "learning_rate": 1.6794005611900995e-05, + "loss": 0.2132, + "step": 21465 + }, + { + "epoch": 1.0, + "learning_rate": 1.6793221826846205e-05, + "loss": 0.2001, + "step": 21470 + }, + { + "epoch": 1.0, + "learning_rate": 1.6792438041791422e-05, + "loss": 0.2851, + "step": 21475 + }, + { + "epoch": 1.0, + "learning_rate": 1.6791654256736633e-05, + "loss": 0.2083, + "step": 21480 + }, + { + "epoch": 1.0, + "learning_rate": 1.6790870471681847e-05, + "loss": 0.4056, + "step": 21485 + }, + { + "epoch": 1.0, + "learning_rate": 1.679008668662706e-05, + "loss": 0.0784, + "step": 21490 + }, + { + "epoch": 1.0, + "learning_rate": 1.6789302901572275e-05, + "loss": 0.1198, + "step": 21495 + }, + { + "epoch": 1.0, + "learning_rate": 1.678851911651749e-05, + "loss": 0.0637, + "step": 21500 + }, + { + "epoch": 1.0, + "learning_rate": 1.67877353314627e-05, + "loss": 0.0893, + "step": 21505 + }, + { + "epoch": 1.0, + "learning_rate": 1.6786951546407913e-05, + "loss": 0.1339, + "step": 21510 + }, + { + "epoch": 1.0, + "learning_rate": 1.6786167761353127e-05, + "loss": 0.2976, + "step": 21515 + }, + { + "epoch": 1.0, + "learning_rate": 1.678538397629834e-05, + "loss": 0.2174, + "step": 21520 + }, + { + "epoch": 1.0, + "learning_rate": 1.6784600191243555e-05, + "loss": 0.2785, + "step": 21525 + }, + { + "epoch": 1.0, + "learning_rate": 1.678381640618877e-05, + "loss": 0.5756, + "step": 21530 + }, + { + "epoch": 1.0, + "learning_rate": 1.6783032621133983e-05, + "loss": 0.3013, + "step": 21535 + }, + { + "epoch": 1.01, + "learning_rate": 1.6782248836079196e-05, + "loss": 0.0721, + "step": 21540 + }, + { + "epoch": 1.01, + "learning_rate": 1.6781465051024407e-05, + "loss": 0.0953, + "step": 21545 + }, + { + "epoch": 1.01, + "learning_rate": 1.6780681265969624e-05, + "loss": 0.0683, + "step": 21550 + }, + { + "epoch": 1.01, + "learning_rate": 1.6779897480914835e-05, + "loss": 0.155, + "step": 21555 + }, + { + "epoch": 1.01, + "learning_rate": 1.677911369586005e-05, + "loss": 0.2342, + "step": 21560 + }, + { + "epoch": 1.01, + "learning_rate": 1.6778329910805263e-05, + "loss": 0.1579, + "step": 21565 + }, + { + "epoch": 1.01, + "learning_rate": 1.6777546125750473e-05, + "loss": 0.2137, + "step": 21570 + }, + { + "epoch": 1.01, + "learning_rate": 1.677676234069569e-05, + "loss": 0.3298, + "step": 21575 + }, + { + "epoch": 1.01, + "learning_rate": 1.67759785556409e-05, + "loss": 0.3294, + "step": 21580 + }, + { + "epoch": 1.01, + "learning_rate": 1.6775194770586115e-05, + "loss": 0.347, + "step": 21585 + }, + { + "epoch": 1.01, + "learning_rate": 1.677441098553133e-05, + "loss": 0.0707, + "step": 21590 + }, + { + "epoch": 1.01, + "learning_rate": 1.6773627200476543e-05, + "loss": 0.0494, + "step": 21595 + }, + { + "epoch": 1.01, + "learning_rate": 1.6772843415421757e-05, + "loss": 0.0614, + "step": 21600 + }, + { + "epoch": 1.01, + "learning_rate": 1.677205963036697e-05, + "loss": 0.1719, + "step": 21605 + }, + { + "epoch": 1.01, + "learning_rate": 1.6771275845312184e-05, + "loss": 0.1739, + "step": 21610 + }, + { + "epoch": 1.01, + "learning_rate": 1.6770492060257398e-05, + "loss": 0.2605, + "step": 21615 + }, + { + "epoch": 1.01, + "learning_rate": 1.676970827520261e-05, + "loss": 0.2799, + "step": 21620 + }, + { + "epoch": 1.01, + "learning_rate": 1.6768924490147823e-05, + "loss": 0.3259, + "step": 21625 + }, + { + "epoch": 1.01, + "learning_rate": 1.6768140705093037e-05, + "loss": 0.3949, + "step": 21630 + }, + { + "epoch": 1.01, + "learning_rate": 1.676735692003825e-05, + "loss": 0.4475, + "step": 21635 + }, + { + "epoch": 1.01, + "learning_rate": 1.6766573134983464e-05, + "loss": 0.067, + "step": 21640 + }, + { + "epoch": 1.01, + "learning_rate": 1.6765789349928675e-05, + "loss": 0.0889, + "step": 21645 + }, + { + "epoch": 1.01, + "learning_rate": 1.6765005564873892e-05, + "loss": 0.0926, + "step": 21650 + }, + { + "epoch": 1.01, + "learning_rate": 1.6764221779819103e-05, + "loss": 0.169, + "step": 21655 + }, + { + "epoch": 1.01, + "learning_rate": 1.6763437994764317e-05, + "loss": 0.1749, + "step": 21660 + }, + { + "epoch": 1.01, + "learning_rate": 1.676265420970953e-05, + "loss": 0.2628, + "step": 21665 + }, + { + "epoch": 1.01, + "learning_rate": 1.6761870424654744e-05, + "loss": 0.2144, + "step": 21670 + }, + { + "epoch": 1.01, + "learning_rate": 1.676108663959996e-05, + "loss": 0.274, + "step": 21675 + }, + { + "epoch": 1.01, + "learning_rate": 1.6760302854545172e-05, + "loss": 0.3579, + "step": 21680 + }, + { + "epoch": 1.01, + "learning_rate": 1.6759519069490383e-05, + "loss": 0.3807, + "step": 21685 + }, + { + "epoch": 1.01, + "learning_rate": 1.67587352844356e-05, + "loss": 0.1214, + "step": 21690 + }, + { + "epoch": 1.01, + "learning_rate": 1.675795149938081e-05, + "loss": 0.1011, + "step": 21695 + }, + { + "epoch": 1.01, + "learning_rate": 1.6757167714326024e-05, + "loss": 0.0696, + "step": 21700 + }, + { + "epoch": 1.01, + "learning_rate": 1.675638392927124e-05, + "loss": 0.1463, + "step": 21705 + }, + { + "epoch": 1.01, + "learning_rate": 1.6755600144216452e-05, + "loss": 0.1416, + "step": 21710 + }, + { + "epoch": 1.01, + "learning_rate": 1.6754816359161666e-05, + "loss": 0.128, + "step": 21715 + }, + { + "epoch": 1.01, + "learning_rate": 1.6754032574106877e-05, + "loss": 0.1893, + "step": 21720 + }, + { + "epoch": 1.01, + "learning_rate": 1.675324878905209e-05, + "loss": 0.3064, + "step": 21725 + }, + { + "epoch": 1.01, + "learning_rate": 1.6752465003997305e-05, + "loss": 0.3098, + "step": 21730 + }, + { + "epoch": 1.01, + "learning_rate": 1.675168121894252e-05, + "loss": 0.3172, + "step": 21735 + }, + { + "epoch": 1.01, + "learning_rate": 1.6750897433887732e-05, + "loss": 0.0526, + "step": 21740 + }, + { + "epoch": 1.01, + "learning_rate": 1.6750113648832946e-05, + "loss": 0.0767, + "step": 21745 + }, + { + "epoch": 1.01, + "learning_rate": 1.674932986377816e-05, + "loss": 0.105, + "step": 21750 + }, + { + "epoch": 1.02, + "learning_rate": 1.6748546078723374e-05, + "loss": 0.128, + "step": 21755 + }, + { + "epoch": 1.02, + "learning_rate": 1.6747762293668585e-05, + "loss": 0.1071, + "step": 21760 + }, + { + "epoch": 1.02, + "learning_rate": 1.67469785086138e-05, + "loss": 0.1624, + "step": 21765 + }, + { + "epoch": 1.02, + "learning_rate": 1.6746194723559012e-05, + "loss": 0.3446, + "step": 21770 + }, + { + "epoch": 1.02, + "learning_rate": 1.6745410938504226e-05, + "loss": 0.3188, + "step": 21775 + }, + { + "epoch": 1.02, + "learning_rate": 1.674462715344944e-05, + "loss": 0.5742, + "step": 21780 + }, + { + "epoch": 1.02, + "learning_rate": 1.674384336839465e-05, + "loss": 0.2419, + "step": 21785 + }, + { + "epoch": 1.02, + "learning_rate": 1.6743059583339868e-05, + "loss": 0.0936, + "step": 21790 + }, + { + "epoch": 1.02, + "learning_rate": 1.674227579828508e-05, + "loss": 0.0601, + "step": 21795 + }, + { + "epoch": 1.02, + "learning_rate": 1.6741492013230292e-05, + "loss": 0.0906, + "step": 21800 + }, + { + "epoch": 1.02, + "learning_rate": 1.6740708228175506e-05, + "loss": 0.104, + "step": 21805 + }, + { + "epoch": 1.02, + "learning_rate": 1.673992444312072e-05, + "loss": 0.1736, + "step": 21810 + }, + { + "epoch": 1.02, + "learning_rate": 1.6739140658065934e-05, + "loss": 0.1642, + "step": 21815 + }, + { + "epoch": 1.02, + "learning_rate": 1.6738356873011148e-05, + "loss": 0.194, + "step": 21820 + }, + { + "epoch": 1.02, + "learning_rate": 1.673757308795636e-05, + "loss": 0.1698, + "step": 21825 + }, + { + "epoch": 1.02, + "learning_rate": 1.6736789302901572e-05, + "loss": 0.5151, + "step": 21830 + }, + { + "epoch": 1.02, + "learning_rate": 1.6736005517846786e-05, + "loss": 0.4388, + "step": 21835 + }, + { + "epoch": 1.02, + "learning_rate": 1.6735221732792e-05, + "loss": 0.0625, + "step": 21840 + }, + { + "epoch": 1.02, + "learning_rate": 1.6734437947737214e-05, + "loss": 0.0551, + "step": 21845 + }, + { + "epoch": 1.02, + "learning_rate": 1.6733654162682428e-05, + "loss": 0.086, + "step": 21850 + }, + { + "epoch": 1.02, + "learning_rate": 1.6732870377627642e-05, + "loss": 0.107, + "step": 21855 + }, + { + "epoch": 1.02, + "learning_rate": 1.6732086592572853e-05, + "loss": 0.1138, + "step": 21860 + }, + { + "epoch": 1.02, + "learning_rate": 1.673130280751807e-05, + "loss": 0.1885, + "step": 21865 + }, + { + "epoch": 1.02, + "learning_rate": 1.673051902246328e-05, + "loss": 0.2082, + "step": 21870 + }, + { + "epoch": 1.02, + "learning_rate": 1.6729735237408494e-05, + "loss": 0.2015, + "step": 21875 + }, + { + "epoch": 1.02, + "learning_rate": 1.6728951452353708e-05, + "loss": 0.4405, + "step": 21880 + }, + { + "epoch": 1.02, + "learning_rate": 1.6728167667298922e-05, + "loss": 0.2858, + "step": 21885 + }, + { + "epoch": 1.02, + "learning_rate": 1.6727383882244136e-05, + "loss": 0.0538, + "step": 21890 + }, + { + "epoch": 1.02, + "learning_rate": 1.6726600097189346e-05, + "loss": 0.0779, + "step": 21895 + }, + { + "epoch": 1.02, + "learning_rate": 1.672581631213456e-05, + "loss": 0.142, + "step": 21900 + }, + { + "epoch": 1.02, + "learning_rate": 1.6725032527079774e-05, + "loss": 0.1888, + "step": 21905 + }, + { + "epoch": 1.02, + "learning_rate": 1.6724248742024988e-05, + "loss": 0.1195, + "step": 21910 + }, + { + "epoch": 1.02, + "learning_rate": 1.6723464956970202e-05, + "loss": 0.2157, + "step": 21915 + }, + { + "epoch": 1.02, + "learning_rate": 1.6722681171915416e-05, + "loss": 0.2258, + "step": 21920 + }, + { + "epoch": 1.02, + "learning_rate": 1.672189738686063e-05, + "loss": 0.3311, + "step": 21925 + }, + { + "epoch": 1.02, + "learning_rate": 1.6721113601805844e-05, + "loss": 0.3652, + "step": 21930 + }, + { + "epoch": 1.02, + "learning_rate": 1.6720329816751054e-05, + "loss": 0.3514, + "step": 21935 + }, + { + "epoch": 1.02, + "learning_rate": 1.6719546031696268e-05, + "loss": 0.0737, + "step": 21940 + }, + { + "epoch": 1.02, + "learning_rate": 1.6718762246641482e-05, + "loss": 0.0702, + "step": 21945 + }, + { + "epoch": 1.02, + "learning_rate": 1.6717978461586696e-05, + "loss": 0.1228, + "step": 21950 + }, + { + "epoch": 1.02, + "learning_rate": 1.671719467653191e-05, + "loss": 0.1717, + "step": 21955 + }, + { + "epoch": 1.02, + "learning_rate": 1.671641089147712e-05, + "loss": 0.1327, + "step": 21960 + }, + { + "epoch": 1.02, + "learning_rate": 1.6715627106422338e-05, + "loss": 0.18, + "step": 21965 + }, + { + "epoch": 1.03, + "learning_rate": 1.6714843321367548e-05, + "loss": 0.1339, + "step": 21970 + }, + { + "epoch": 1.03, + "learning_rate": 1.6714059536312762e-05, + "loss": 0.4464, + "step": 21975 + }, + { + "epoch": 1.03, + "learning_rate": 1.6713275751257976e-05, + "loss": 0.4614, + "step": 21980 + }, + { + "epoch": 1.03, + "learning_rate": 1.671249196620319e-05, + "loss": 0.4731, + "step": 21985 + }, + { + "epoch": 1.03, + "learning_rate": 1.6711708181148404e-05, + "loss": 0.0743, + "step": 21990 + }, + { + "epoch": 1.03, + "learning_rate": 1.6710924396093618e-05, + "loss": 0.0772, + "step": 21995 + }, + { + "epoch": 1.03, + "learning_rate": 1.671014061103883e-05, + "loss": 0.1295, + "step": 22000 + }, + { + "epoch": 1.03, + "learning_rate": 1.6709356825984046e-05, + "loss": 0.1665, + "step": 22005 + }, + { + "epoch": 1.03, + "learning_rate": 1.6708573040929256e-05, + "loss": 0.1362, + "step": 22010 + }, + { + "epoch": 1.03, + "learning_rate": 1.670778925587447e-05, + "loss": 0.1636, + "step": 22015 + }, + { + "epoch": 1.03, + "learning_rate": 1.6707005470819684e-05, + "loss": 0.2899, + "step": 22020 + }, + { + "epoch": 1.03, + "learning_rate": 1.6706221685764898e-05, + "loss": 0.2897, + "step": 22025 + }, + { + "epoch": 1.03, + "learning_rate": 1.6705437900710112e-05, + "loss": 0.3886, + "step": 22030 + }, + { + "epoch": 1.03, + "learning_rate": 1.6704654115655322e-05, + "loss": 0.2882, + "step": 22035 + }, + { + "epoch": 1.03, + "learning_rate": 1.6703870330600536e-05, + "loss": 0.0652, + "step": 22040 + }, + { + "epoch": 1.03, + "learning_rate": 1.670308654554575e-05, + "loss": 0.0749, + "step": 22045 + }, + { + "epoch": 1.03, + "learning_rate": 1.6702302760490964e-05, + "loss": 0.0939, + "step": 22050 + }, + { + "epoch": 1.03, + "learning_rate": 1.6701518975436178e-05, + "loss": 0.1791, + "step": 22055 + }, + { + "epoch": 1.03, + "learning_rate": 1.6700735190381392e-05, + "loss": 0.1139, + "step": 22060 + }, + { + "epoch": 1.03, + "learning_rate": 1.6699951405326606e-05, + "loss": 0.2075, + "step": 22065 + }, + { + "epoch": 1.03, + "learning_rate": 1.669916762027182e-05, + "loss": 0.2175, + "step": 22070 + }, + { + "epoch": 1.03, + "learning_rate": 1.669838383521703e-05, + "loss": 0.2589, + "step": 22075 + }, + { + "epoch": 1.03, + "learning_rate": 1.6697600050162247e-05, + "loss": 0.42, + "step": 22080 + }, + { + "epoch": 1.03, + "learning_rate": 1.6696816265107458e-05, + "loss": 0.3279, + "step": 22085 + }, + { + "epoch": 1.03, + "learning_rate": 1.6696032480052672e-05, + "loss": 0.0295, + "step": 22090 + }, + { + "epoch": 1.03, + "learning_rate": 1.6695248694997886e-05, + "loss": 0.0318, + "step": 22095 + }, + { + "epoch": 1.03, + "learning_rate": 1.6694464909943096e-05, + "loss": 0.1887, + "step": 22100 + }, + { + "epoch": 1.03, + "learning_rate": 1.6693681124888314e-05, + "loss": 0.1017, + "step": 22105 + }, + { + "epoch": 1.03, + "learning_rate": 1.6692897339833524e-05, + "loss": 0.145, + "step": 22110 + }, + { + "epoch": 1.03, + "learning_rate": 1.6692113554778738e-05, + "loss": 0.195, + "step": 22115 + }, + { + "epoch": 1.03, + "learning_rate": 1.6691329769723952e-05, + "loss": 0.2584, + "step": 22120 + }, + { + "epoch": 1.03, + "learning_rate": 1.6690545984669166e-05, + "loss": 0.2965, + "step": 22125 + }, + { + "epoch": 1.03, + "learning_rate": 1.668976219961438e-05, + "loss": 0.3912, + "step": 22130 + }, + { + "epoch": 1.03, + "learning_rate": 1.6688978414559594e-05, + "loss": 0.3964, + "step": 22135 + }, + { + "epoch": 1.03, + "learning_rate": 1.6688194629504804e-05, + "loss": 0.0447, + "step": 22140 + }, + { + "epoch": 1.03, + "learning_rate": 1.668741084445002e-05, + "loss": 0.1708, + "step": 22145 + }, + { + "epoch": 1.03, + "learning_rate": 1.6686627059395232e-05, + "loss": 0.1072, + "step": 22150 + }, + { + "epoch": 1.03, + "learning_rate": 1.6685843274340446e-05, + "loss": 0.1223, + "step": 22155 + }, + { + "epoch": 1.03, + "learning_rate": 1.668505948928566e-05, + "loss": 0.1006, + "step": 22160 + }, + { + "epoch": 1.03, + "learning_rate": 1.6684275704230874e-05, + "loss": 0.1096, + "step": 22165 + }, + { + "epoch": 1.03, + "learning_rate": 1.6683491919176088e-05, + "loss": 0.2708, + "step": 22170 + }, + { + "epoch": 1.03, + "learning_rate": 1.6682708134121298e-05, + "loss": 0.3093, + "step": 22175 + }, + { + "epoch": 1.03, + "learning_rate": 1.6681924349066515e-05, + "loss": 0.3701, + "step": 22180 + }, + { + "epoch": 1.04, + "learning_rate": 1.6681140564011726e-05, + "loss": 0.251, + "step": 22185 + }, + { + "epoch": 1.04, + "learning_rate": 1.668035677895694e-05, + "loss": 0.0674, + "step": 22190 + }, + { + "epoch": 1.04, + "learning_rate": 1.6679572993902154e-05, + "loss": 0.1336, + "step": 22195 + }, + { + "epoch": 1.04, + "learning_rate": 1.6678789208847368e-05, + "loss": 0.136, + "step": 22200 + }, + { + "epoch": 1.04, + "learning_rate": 1.667800542379258e-05, + "loss": 0.0998, + "step": 22205 + }, + { + "epoch": 1.04, + "learning_rate": 1.6677221638737795e-05, + "loss": 0.13, + "step": 22210 + }, + { + "epoch": 1.04, + "learning_rate": 1.6676437853683006e-05, + "loss": 0.1497, + "step": 22215 + }, + { + "epoch": 1.04, + "learning_rate": 1.667565406862822e-05, + "loss": 0.1846, + "step": 22220 + }, + { + "epoch": 1.04, + "learning_rate": 1.6674870283573434e-05, + "loss": 0.2499, + "step": 22225 + }, + { + "epoch": 1.04, + "learning_rate": 1.6674086498518648e-05, + "loss": 0.4191, + "step": 22230 + }, + { + "epoch": 1.04, + "learning_rate": 1.667330271346386e-05, + "loss": 0.3157, + "step": 22235 + }, + { + "epoch": 1.04, + "learning_rate": 1.6672518928409075e-05, + "loss": 0.0555, + "step": 22240 + }, + { + "epoch": 1.04, + "learning_rate": 1.667173514335429e-05, + "loss": 0.1057, + "step": 22245 + }, + { + "epoch": 1.04, + "learning_rate": 1.66709513582995e-05, + "loss": 0.0907, + "step": 22250 + }, + { + "epoch": 1.04, + "learning_rate": 1.6670167573244714e-05, + "loss": 0.1564, + "step": 22255 + }, + { + "epoch": 1.04, + "learning_rate": 1.6669383788189928e-05, + "loss": 0.1607, + "step": 22260 + }, + { + "epoch": 1.04, + "learning_rate": 1.666860000313514e-05, + "loss": 0.1959, + "step": 22265 + }, + { + "epoch": 1.04, + "learning_rate": 1.6667816218080356e-05, + "loss": 0.2283, + "step": 22270 + }, + { + "epoch": 1.04, + "learning_rate": 1.666703243302557e-05, + "loss": 0.3232, + "step": 22275 + }, + { + "epoch": 1.04, + "learning_rate": 1.6666248647970783e-05, + "loss": 0.344, + "step": 22280 + }, + { + "epoch": 1.04, + "learning_rate": 1.6665464862915994e-05, + "loss": 0.2879, + "step": 22285 + }, + { + "epoch": 1.04, + "learning_rate": 1.6664681077861208e-05, + "loss": 0.077, + "step": 22290 + }, + { + "epoch": 1.04, + "learning_rate": 1.666389729280642e-05, + "loss": 0.0681, + "step": 22295 + }, + { + "epoch": 1.04, + "learning_rate": 1.6663113507751636e-05, + "loss": 0.0788, + "step": 22300 + }, + { + "epoch": 1.04, + "learning_rate": 1.666232972269685e-05, + "loss": 0.1024, + "step": 22305 + }, + { + "epoch": 1.04, + "learning_rate": 1.6661545937642063e-05, + "loss": 0.1455, + "step": 22310 + }, + { + "epoch": 1.04, + "learning_rate": 1.6660762152587274e-05, + "loss": 0.2212, + "step": 22315 + }, + { + "epoch": 1.04, + "learning_rate": 1.665997836753249e-05, + "loss": 0.1622, + "step": 22320 + }, + { + "epoch": 1.04, + "learning_rate": 1.6659194582477702e-05, + "loss": 0.257, + "step": 22325 + }, + { + "epoch": 1.04, + "learning_rate": 1.6658410797422916e-05, + "loss": 0.386, + "step": 22330 + }, + { + "epoch": 1.04, + "learning_rate": 1.665762701236813e-05, + "loss": 0.3866, + "step": 22335 + }, + { + "epoch": 1.04, + "learning_rate": 1.6656843227313343e-05, + "loss": 0.068, + "step": 22340 + }, + { + "epoch": 1.04, + "learning_rate": 1.6656059442258557e-05, + "loss": 0.1259, + "step": 22345 + }, + { + "epoch": 1.04, + "learning_rate": 1.6655275657203768e-05, + "loss": 0.1199, + "step": 22350 + }, + { + "epoch": 1.04, + "learning_rate": 1.6654491872148982e-05, + "loss": 0.1675, + "step": 22355 + }, + { + "epoch": 1.04, + "learning_rate": 1.6653708087094196e-05, + "loss": 0.1525, + "step": 22360 + }, + { + "epoch": 1.04, + "learning_rate": 1.665292430203941e-05, + "loss": 0.1875, + "step": 22365 + }, + { + "epoch": 1.04, + "learning_rate": 1.6652140516984623e-05, + "loss": 0.2257, + "step": 22370 + }, + { + "epoch": 1.04, + "learning_rate": 1.6651356731929837e-05, + "loss": 0.195, + "step": 22375 + }, + { + "epoch": 1.04, + "learning_rate": 1.665057294687505e-05, + "loss": 0.3703, + "step": 22380 + }, + { + "epoch": 1.04, + "learning_rate": 1.6649789161820265e-05, + "loss": 0.3111, + "step": 22385 + }, + { + "epoch": 1.04, + "learning_rate": 1.6649005376765476e-05, + "loss": 0.0501, + "step": 22390 + }, + { + "epoch": 1.04, + "learning_rate": 1.6648221591710693e-05, + "loss": 0.1009, + "step": 22395 + }, + { + "epoch": 1.05, + "learning_rate": 1.6647437806655904e-05, + "loss": 0.1188, + "step": 22400 + }, + { + "epoch": 1.05, + "learning_rate": 1.6646654021601117e-05, + "loss": 0.1223, + "step": 22405 + }, + { + "epoch": 1.05, + "learning_rate": 1.664587023654633e-05, + "loss": 0.2532, + "step": 22410 + }, + { + "epoch": 1.05, + "learning_rate": 1.6645086451491542e-05, + "loss": 0.2103, + "step": 22415 + }, + { + "epoch": 1.05, + "learning_rate": 1.664430266643676e-05, + "loss": 0.2827, + "step": 22420 + }, + { + "epoch": 1.05, + "learning_rate": 1.664351888138197e-05, + "loss": 0.3525, + "step": 22425 + }, + { + "epoch": 1.05, + "learning_rate": 1.6642735096327184e-05, + "loss": 0.4255, + "step": 22430 + }, + { + "epoch": 1.05, + "learning_rate": 1.6641951311272397e-05, + "loss": 0.3211, + "step": 22435 + }, + { + "epoch": 1.05, + "learning_rate": 1.664116752621761e-05, + "loss": 0.0945, + "step": 22440 + }, + { + "epoch": 1.05, + "learning_rate": 1.6640383741162825e-05, + "loss": 0.0941, + "step": 22445 + }, + { + "epoch": 1.05, + "learning_rate": 1.663959995610804e-05, + "loss": 0.1018, + "step": 22450 + }, + { + "epoch": 1.05, + "learning_rate": 1.663881617105325e-05, + "loss": 0.1377, + "step": 22455 + }, + { + "epoch": 1.05, + "learning_rate": 1.6638032385998467e-05, + "loss": 0.1353, + "step": 22460 + }, + { + "epoch": 1.05, + "learning_rate": 1.6637248600943678e-05, + "loss": 0.1506, + "step": 22465 + }, + { + "epoch": 1.05, + "learning_rate": 1.663646481588889e-05, + "loss": 0.2194, + "step": 22470 + }, + { + "epoch": 1.05, + "learning_rate": 1.6635681030834105e-05, + "loss": 0.2673, + "step": 22475 + }, + { + "epoch": 1.05, + "learning_rate": 1.663489724577932e-05, + "loss": 0.4444, + "step": 22480 + }, + { + "epoch": 1.05, + "learning_rate": 1.6634113460724533e-05, + "loss": 0.4203, + "step": 22485 + }, + { + "epoch": 1.05, + "learning_rate": 1.6633329675669744e-05, + "loss": 0.0998, + "step": 22490 + }, + { + "epoch": 1.05, + "learning_rate": 1.663254589061496e-05, + "loss": 0.0645, + "step": 22495 + }, + { + "epoch": 1.05, + "learning_rate": 1.663176210556017e-05, + "loss": 0.0815, + "step": 22500 + }, + { + "epoch": 1.05, + "learning_rate": 1.6630978320505385e-05, + "loss": 0.1239, + "step": 22505 + }, + { + "epoch": 1.05, + "learning_rate": 1.66301945354506e-05, + "loss": 0.1158, + "step": 22510 + }, + { + "epoch": 1.05, + "learning_rate": 1.6629410750395813e-05, + "loss": 0.1965, + "step": 22515 + }, + { + "epoch": 1.05, + "learning_rate": 1.6628626965341027e-05, + "loss": 0.2295, + "step": 22520 + }, + { + "epoch": 1.05, + "learning_rate": 1.662784318028624e-05, + "loss": 0.3215, + "step": 22525 + }, + { + "epoch": 1.05, + "learning_rate": 1.662705939523145e-05, + "loss": 0.3832, + "step": 22530 + }, + { + "epoch": 1.05, + "learning_rate": 1.662627561017667e-05, + "loss": 0.3457, + "step": 22535 + }, + { + "epoch": 1.05, + "learning_rate": 1.662549182512188e-05, + "loss": 0.1041, + "step": 22540 + }, + { + "epoch": 1.05, + "learning_rate": 1.6624708040067093e-05, + "loss": 0.0946, + "step": 22545 + }, + { + "epoch": 1.05, + "learning_rate": 1.6623924255012307e-05, + "loss": 0.0797, + "step": 22550 + }, + { + "epoch": 1.05, + "learning_rate": 1.662314046995752e-05, + "loss": 0.1227, + "step": 22555 + }, + { + "epoch": 1.05, + "learning_rate": 1.6622356684902735e-05, + "loss": 0.1388, + "step": 22560 + }, + { + "epoch": 1.05, + "learning_rate": 1.6621572899847945e-05, + "loss": 0.1356, + "step": 22565 + }, + { + "epoch": 1.05, + "learning_rate": 1.662078911479316e-05, + "loss": 0.1936, + "step": 22570 + }, + { + "epoch": 1.05, + "learning_rate": 1.6620005329738373e-05, + "loss": 0.3707, + "step": 22575 + }, + { + "epoch": 1.05, + "learning_rate": 1.6619221544683587e-05, + "loss": 0.4417, + "step": 22580 + }, + { + "epoch": 1.05, + "learning_rate": 1.66184377596288e-05, + "loss": 0.4056, + "step": 22585 + }, + { + "epoch": 1.05, + "learning_rate": 1.6617653974574015e-05, + "loss": 0.1009, + "step": 22590 + }, + { + "epoch": 1.05, + "learning_rate": 1.661687018951923e-05, + "loss": 0.1221, + "step": 22595 + }, + { + "epoch": 1.05, + "learning_rate": 1.6616086404464443e-05, + "loss": 0.1144, + "step": 22600 + }, + { + "epoch": 1.05, + "learning_rate": 1.6615302619409653e-05, + "loss": 0.1499, + "step": 22605 + }, + { + "epoch": 1.06, + "learning_rate": 1.6614518834354867e-05, + "loss": 0.1002, + "step": 22610 + }, + { + "epoch": 1.06, + "learning_rate": 1.661373504930008e-05, + "loss": 0.1595, + "step": 22615 + }, + { + "epoch": 1.06, + "learning_rate": 1.6612951264245295e-05, + "loss": 0.2121, + "step": 22620 + }, + { + "epoch": 1.06, + "learning_rate": 1.661216747919051e-05, + "loss": 0.2338, + "step": 22625 + }, + { + "epoch": 1.06, + "learning_rate": 1.661138369413572e-05, + "loss": 0.375, + "step": 22630 + }, + { + "epoch": 1.06, + "learning_rate": 1.6610599909080937e-05, + "loss": 0.2275, + "step": 22635 + }, + { + "epoch": 1.06, + "learning_rate": 1.6609816124026147e-05, + "loss": 0.0468, + "step": 22640 + }, + { + "epoch": 1.06, + "learning_rate": 1.660903233897136e-05, + "loss": 0.0573, + "step": 22645 + }, + { + "epoch": 1.06, + "learning_rate": 1.6608248553916575e-05, + "loss": 0.0989, + "step": 22650 + }, + { + "epoch": 1.06, + "learning_rate": 1.660746476886179e-05, + "loss": 0.1472, + "step": 22655 + }, + { + "epoch": 1.06, + "learning_rate": 1.6606680983807003e-05, + "loss": 0.1244, + "step": 22660 + }, + { + "epoch": 1.06, + "learning_rate": 1.6605897198752217e-05, + "loss": 0.2461, + "step": 22665 + }, + { + "epoch": 1.06, + "learning_rate": 1.6605113413697427e-05, + "loss": 0.1669, + "step": 22670 + }, + { + "epoch": 1.06, + "learning_rate": 1.660432962864264e-05, + "loss": 0.2483, + "step": 22675 + }, + { + "epoch": 1.06, + "learning_rate": 1.6603545843587855e-05, + "loss": 0.3463, + "step": 22680 + }, + { + "epoch": 1.06, + "learning_rate": 1.660276205853307e-05, + "loss": 0.3456, + "step": 22685 + }, + { + "epoch": 1.06, + "learning_rate": 1.6601978273478283e-05, + "loss": 0.0599, + "step": 22690 + }, + { + "epoch": 1.06, + "learning_rate": 1.6601194488423497e-05, + "loss": 0.0639, + "step": 22695 + }, + { + "epoch": 1.06, + "learning_rate": 1.660041070336871e-05, + "loss": 0.1262, + "step": 22700 + }, + { + "epoch": 1.06, + "learning_rate": 1.659962691831392e-05, + "loss": 0.1424, + "step": 22705 + }, + { + "epoch": 1.06, + "learning_rate": 1.659884313325914e-05, + "loss": 0.1273, + "step": 22710 + }, + { + "epoch": 1.06, + "learning_rate": 1.659805934820435e-05, + "loss": 0.2208, + "step": 22715 + }, + { + "epoch": 1.06, + "learning_rate": 1.6597275563149563e-05, + "loss": 0.1814, + "step": 22720 + }, + { + "epoch": 1.06, + "learning_rate": 1.6596491778094777e-05, + "loss": 0.2325, + "step": 22725 + }, + { + "epoch": 1.06, + "learning_rate": 1.659570799303999e-05, + "loss": 0.4139, + "step": 22730 + }, + { + "epoch": 1.06, + "learning_rate": 1.6594924207985205e-05, + "loss": 0.2675, + "step": 22735 + }, + { + "epoch": 1.06, + "learning_rate": 1.6594140422930415e-05, + "loss": 0.0246, + "step": 22740 + }, + { + "epoch": 1.06, + "learning_rate": 1.659335663787563e-05, + "loss": 0.0829, + "step": 22745 + }, + { + "epoch": 1.06, + "learning_rate": 1.6592572852820843e-05, + "loss": 0.1863, + "step": 22750 + }, + { + "epoch": 1.06, + "learning_rate": 1.6591789067766057e-05, + "loss": 0.1843, + "step": 22755 + }, + { + "epoch": 1.06, + "learning_rate": 1.659100528271127e-05, + "loss": 0.1143, + "step": 22760 + }, + { + "epoch": 1.06, + "learning_rate": 1.6590221497656485e-05, + "loss": 0.1305, + "step": 22765 + }, + { + "epoch": 1.06, + "learning_rate": 1.65894377126017e-05, + "loss": 0.229, + "step": 22770 + }, + { + "epoch": 1.06, + "learning_rate": 1.6588653927546913e-05, + "loss": 0.2498, + "step": 22775 + }, + { + "epoch": 1.06, + "learning_rate": 1.6587870142492123e-05, + "loss": 0.3766, + "step": 22780 + }, + { + "epoch": 1.06, + "learning_rate": 1.6587086357437337e-05, + "loss": 0.2961, + "step": 22785 + }, + { + "epoch": 1.06, + "learning_rate": 1.658630257238255e-05, + "loss": 0.069, + "step": 22790 + }, + { + "epoch": 1.06, + "learning_rate": 1.6585518787327765e-05, + "loss": 0.0735, + "step": 22795 + }, + { + "epoch": 1.06, + "learning_rate": 1.658473500227298e-05, + "loss": 0.1352, + "step": 22800 + }, + { + "epoch": 1.06, + "learning_rate": 1.658395121721819e-05, + "loss": 0.1563, + "step": 22805 + }, + { + "epoch": 1.06, + "learning_rate": 1.6583167432163407e-05, + "loss": 0.155, + "step": 22810 + }, + { + "epoch": 1.06, + "learning_rate": 1.6582383647108617e-05, + "loss": 0.2341, + "step": 22815 + }, + { + "epoch": 1.06, + "learning_rate": 1.658159986205383e-05, + "loss": 0.22, + "step": 22820 + }, + { + "epoch": 1.07, + "learning_rate": 1.6580816076999045e-05, + "loss": 0.2533, + "step": 22825 + }, + { + "epoch": 1.07, + "learning_rate": 1.658003229194426e-05, + "loss": 0.5077, + "step": 22830 + }, + { + "epoch": 1.07, + "learning_rate": 1.6579248506889473e-05, + "loss": 0.49, + "step": 22835 + }, + { + "epoch": 1.07, + "learning_rate": 1.6578464721834687e-05, + "loss": 0.166, + "step": 22840 + }, + { + "epoch": 1.07, + "learning_rate": 1.6577680936779897e-05, + "loss": 0.0973, + "step": 22845 + }, + { + "epoch": 1.07, + "learning_rate": 1.6576897151725114e-05, + "loss": 0.0986, + "step": 22850 + }, + { + "epoch": 1.07, + "learning_rate": 1.6576113366670325e-05, + "loss": 0.1956, + "step": 22855 + }, + { + "epoch": 1.07, + "learning_rate": 1.657532958161554e-05, + "loss": 0.2051, + "step": 22860 + }, + { + "epoch": 1.07, + "learning_rate": 1.6574545796560753e-05, + "loss": 0.1668, + "step": 22865 + }, + { + "epoch": 1.07, + "learning_rate": 1.6573762011505967e-05, + "loss": 0.2645, + "step": 22870 + }, + { + "epoch": 1.07, + "learning_rate": 1.657297822645118e-05, + "loss": 0.2412, + "step": 22875 + }, + { + "epoch": 1.07, + "learning_rate": 1.657219444139639e-05, + "loss": 0.36, + "step": 22880 + }, + { + "epoch": 1.07, + "learning_rate": 1.6571410656341605e-05, + "loss": 0.3128, + "step": 22885 + }, + { + "epoch": 1.07, + "learning_rate": 1.657062687128682e-05, + "loss": 0.0542, + "step": 22890 + }, + { + "epoch": 1.07, + "learning_rate": 1.6569843086232033e-05, + "loss": 0.0474, + "step": 22895 + }, + { + "epoch": 1.07, + "learning_rate": 1.6569059301177247e-05, + "loss": 0.1289, + "step": 22900 + }, + { + "epoch": 1.07, + "learning_rate": 1.656827551612246e-05, + "loss": 0.0668, + "step": 22905 + }, + { + "epoch": 1.07, + "learning_rate": 1.6567491731067674e-05, + "loss": 0.2174, + "step": 22910 + }, + { + "epoch": 1.07, + "learning_rate": 1.656670794601289e-05, + "loss": 0.1433, + "step": 22915 + }, + { + "epoch": 1.07, + "learning_rate": 1.65659241609581e-05, + "loss": 0.1519, + "step": 22920 + }, + { + "epoch": 1.07, + "learning_rate": 1.6565140375903316e-05, + "loss": 0.2903, + "step": 22925 + }, + { + "epoch": 1.07, + "learning_rate": 1.6564356590848527e-05, + "loss": 0.4346, + "step": 22930 + }, + { + "epoch": 1.07, + "learning_rate": 1.656357280579374e-05, + "loss": 0.3024, + "step": 22935 + }, + { + "epoch": 1.07, + "learning_rate": 1.6562789020738955e-05, + "loss": 0.031, + "step": 22940 + }, + { + "epoch": 1.07, + "learning_rate": 1.6562005235684165e-05, + "loss": 0.1083, + "step": 22945 + }, + { + "epoch": 1.07, + "learning_rate": 1.6561221450629382e-05, + "loss": 0.0789, + "step": 22950 + }, + { + "epoch": 1.07, + "learning_rate": 1.6560437665574593e-05, + "loss": 0.193, + "step": 22955 + }, + { + "epoch": 1.07, + "learning_rate": 1.6559653880519807e-05, + "loss": 0.2532, + "step": 22960 + }, + { + "epoch": 1.07, + "learning_rate": 1.655887009546502e-05, + "loss": 0.2134, + "step": 22965 + }, + { + "epoch": 1.07, + "learning_rate": 1.6558086310410235e-05, + "loss": 0.2538, + "step": 22970 + }, + { + "epoch": 1.07, + "learning_rate": 1.655730252535545e-05, + "loss": 0.2492, + "step": 22975 + }, + { + "epoch": 1.07, + "learning_rate": 1.6556518740300662e-05, + "loss": 0.4392, + "step": 22980 + }, + { + "epoch": 1.07, + "learning_rate": 1.6555734955245873e-05, + "loss": 0.2968, + "step": 22985 + }, + { + "epoch": 1.07, + "learning_rate": 1.655495117019109e-05, + "loss": 0.0663, + "step": 22990 + }, + { + "epoch": 1.07, + "learning_rate": 1.65541673851363e-05, + "loss": 0.1486, + "step": 22995 + }, + { + "epoch": 1.07, + "learning_rate": 1.6553383600081515e-05, + "loss": 0.0821, + "step": 23000 + }, + { + "epoch": 1.07, + "learning_rate": 1.655259981502673e-05, + "loss": 0.1134, + "step": 23005 + }, + { + "epoch": 1.07, + "learning_rate": 1.6551816029971942e-05, + "loss": 0.2011, + "step": 23010 + }, + { + "epoch": 1.07, + "learning_rate": 1.6551032244917156e-05, + "loss": 0.2416, + "step": 23015 + }, + { + "epoch": 1.07, + "learning_rate": 1.6550248459862367e-05, + "loss": 0.2838, + "step": 23020 + }, + { + "epoch": 1.07, + "learning_rate": 1.6549464674807584e-05, + "loss": 0.285, + "step": 23025 + }, + { + "epoch": 1.07, + "learning_rate": 1.6548680889752795e-05, + "loss": 0.3597, + "step": 23030 + }, + { + "epoch": 1.07, + "learning_rate": 1.654789710469801e-05, + "loss": 0.4682, + "step": 23035 + }, + { + "epoch": 1.08, + "learning_rate": 1.6547113319643222e-05, + "loss": 0.0424, + "step": 23040 + }, + { + "epoch": 1.08, + "learning_rate": 1.6546329534588436e-05, + "loss": 0.0739, + "step": 23045 + }, + { + "epoch": 1.08, + "learning_rate": 1.654554574953365e-05, + "loss": 0.0871, + "step": 23050 + }, + { + "epoch": 1.08, + "learning_rate": 1.6544761964478864e-05, + "loss": 0.0759, + "step": 23055 + }, + { + "epoch": 1.08, + "learning_rate": 1.6543978179424075e-05, + "loss": 0.1438, + "step": 23060 + }, + { + "epoch": 1.08, + "learning_rate": 1.654319439436929e-05, + "loss": 0.1683, + "step": 23065 + }, + { + "epoch": 1.08, + "learning_rate": 1.6542410609314503e-05, + "loss": 0.2168, + "step": 23070 + }, + { + "epoch": 1.08, + "learning_rate": 1.6541626824259716e-05, + "loss": 0.2413, + "step": 23075 + }, + { + "epoch": 1.08, + "learning_rate": 1.654084303920493e-05, + "loss": 0.3585, + "step": 23080 + }, + { + "epoch": 1.08, + "learning_rate": 1.6540059254150144e-05, + "loss": 0.2754, + "step": 23085 + }, + { + "epoch": 1.08, + "learning_rate": 1.6539275469095358e-05, + "loss": 0.0734, + "step": 23090 + }, + { + "epoch": 1.08, + "learning_rate": 1.653849168404057e-05, + "loss": 0.1821, + "step": 23095 + }, + { + "epoch": 1.08, + "learning_rate": 1.6537707898985783e-05, + "loss": 0.136, + "step": 23100 + }, + { + "epoch": 1.08, + "learning_rate": 1.6536924113930996e-05, + "loss": 0.1626, + "step": 23105 + }, + { + "epoch": 1.08, + "learning_rate": 1.653614032887621e-05, + "loss": 0.1497, + "step": 23110 + }, + { + "epoch": 1.08, + "learning_rate": 1.6535356543821424e-05, + "loss": 0.1505, + "step": 23115 + }, + { + "epoch": 1.08, + "learning_rate": 1.6534572758766638e-05, + "loss": 0.2225, + "step": 23120 + }, + { + "epoch": 1.08, + "learning_rate": 1.6533788973711852e-05, + "loss": 0.2445, + "step": 23125 + }, + { + "epoch": 1.08, + "learning_rate": 1.6533005188657063e-05, + "loss": 0.4143, + "step": 23130 + }, + { + "epoch": 1.08, + "learning_rate": 1.6532221403602277e-05, + "loss": 0.322, + "step": 23135 + }, + { + "epoch": 1.08, + "learning_rate": 1.653143761854749e-05, + "loss": 0.0707, + "step": 23140 + }, + { + "epoch": 1.08, + "learning_rate": 1.6530653833492704e-05, + "loss": 0.0468, + "step": 23145 + }, + { + "epoch": 1.08, + "learning_rate": 1.6529870048437918e-05, + "loss": 0.1199, + "step": 23150 + }, + { + "epoch": 1.08, + "learning_rate": 1.6529086263383132e-05, + "loss": 0.1203, + "step": 23155 + }, + { + "epoch": 1.08, + "learning_rate": 1.6528302478328343e-05, + "loss": 0.1562, + "step": 23160 + }, + { + "epoch": 1.08, + "learning_rate": 1.652751869327356e-05, + "loss": 0.1764, + "step": 23165 + }, + { + "epoch": 1.08, + "learning_rate": 1.652673490821877e-05, + "loss": 0.1951, + "step": 23170 + }, + { + "epoch": 1.08, + "learning_rate": 1.6525951123163984e-05, + "loss": 0.3135, + "step": 23175 + }, + { + "epoch": 1.08, + "learning_rate": 1.6525167338109198e-05, + "loss": 0.2562, + "step": 23180 + }, + { + "epoch": 1.08, + "learning_rate": 1.6524383553054412e-05, + "loss": 0.2707, + "step": 23185 + }, + { + "epoch": 1.08, + "learning_rate": 1.6523599767999626e-05, + "loss": 0.0834, + "step": 23190 + }, + { + "epoch": 1.08, + "learning_rate": 1.6522815982944837e-05, + "loss": 0.1467, + "step": 23195 + }, + { + "epoch": 1.08, + "learning_rate": 1.652203219789005e-05, + "loss": 0.093, + "step": 23200 + }, + { + "epoch": 1.08, + "learning_rate": 1.6521248412835264e-05, + "loss": 0.0902, + "step": 23205 + }, + { + "epoch": 1.08, + "learning_rate": 1.652046462778048e-05, + "loss": 0.2016, + "step": 23210 + }, + { + "epoch": 1.08, + "learning_rate": 1.6519680842725692e-05, + "loss": 0.2145, + "step": 23215 + }, + { + "epoch": 1.08, + "learning_rate": 1.6518897057670906e-05, + "loss": 0.2803, + "step": 23220 + }, + { + "epoch": 1.08, + "learning_rate": 1.651811327261612e-05, + "loss": 0.2825, + "step": 23225 + }, + { + "epoch": 1.08, + "learning_rate": 1.6517329487561334e-05, + "loss": 0.3829, + "step": 23230 + }, + { + "epoch": 1.08, + "learning_rate": 1.6516545702506544e-05, + "loss": 0.2447, + "step": 23235 + }, + { + "epoch": 1.08, + "learning_rate": 1.6515761917451762e-05, + "loss": 0.0754, + "step": 23240 + }, + { + "epoch": 1.08, + "learning_rate": 1.6514978132396972e-05, + "loss": 0.0598, + "step": 23245 + }, + { + "epoch": 1.08, + "learning_rate": 1.6514194347342186e-05, + "loss": 0.1389, + "step": 23250 + }, + { + "epoch": 1.09, + "learning_rate": 1.65134105622874e-05, + "loss": 0.156, + "step": 23255 + }, + { + "epoch": 1.09, + "learning_rate": 1.651262677723261e-05, + "loss": 0.1372, + "step": 23260 + }, + { + "epoch": 1.09, + "learning_rate": 1.6511842992177828e-05, + "loss": 0.2297, + "step": 23265 + }, + { + "epoch": 1.09, + "learning_rate": 1.651105920712304e-05, + "loss": 0.2115, + "step": 23270 + }, + { + "epoch": 1.09, + "learning_rate": 1.6510275422068252e-05, + "loss": 0.2469, + "step": 23275 + }, + { + "epoch": 1.09, + "learning_rate": 1.6509491637013466e-05, + "loss": 0.2814, + "step": 23280 + }, + { + "epoch": 1.09, + "learning_rate": 1.650870785195868e-05, + "loss": 0.3825, + "step": 23285 + }, + { + "epoch": 1.09, + "learning_rate": 1.6507924066903894e-05, + "loss": 0.0548, + "step": 23290 + }, + { + "epoch": 1.09, + "learning_rate": 1.6507140281849108e-05, + "loss": 0.0556, + "step": 23295 + }, + { + "epoch": 1.09, + "learning_rate": 1.650635649679432e-05, + "loss": 0.0974, + "step": 23300 + }, + { + "epoch": 1.09, + "learning_rate": 1.6505572711739536e-05, + "loss": 0.1694, + "step": 23305 + }, + { + "epoch": 1.09, + "learning_rate": 1.6504788926684746e-05, + "loss": 0.2936, + "step": 23310 + }, + { + "epoch": 1.09, + "learning_rate": 1.650400514162996e-05, + "loss": 0.1766, + "step": 23315 + }, + { + "epoch": 1.09, + "learning_rate": 1.6503221356575174e-05, + "loss": 0.1843, + "step": 23320 + }, + { + "epoch": 1.09, + "learning_rate": 1.6502437571520388e-05, + "loss": 0.3095, + "step": 23325 + }, + { + "epoch": 1.09, + "learning_rate": 1.6501653786465602e-05, + "loss": 0.3444, + "step": 23330 + }, + { + "epoch": 1.09, + "learning_rate": 1.6500870001410812e-05, + "loss": 0.2731, + "step": 23335 + }, + { + "epoch": 1.09, + "learning_rate": 1.650008621635603e-05, + "loss": 0.0252, + "step": 23340 + }, + { + "epoch": 1.09, + "learning_rate": 1.649930243130124e-05, + "loss": 0.1032, + "step": 23345 + }, + { + "epoch": 1.09, + "learning_rate": 1.6498518646246454e-05, + "loss": 0.1265, + "step": 23350 + }, + { + "epoch": 1.09, + "learning_rate": 1.6497734861191668e-05, + "loss": 0.1087, + "step": 23355 + }, + { + "epoch": 1.09, + "learning_rate": 1.6496951076136882e-05, + "loss": 0.1427, + "step": 23360 + }, + { + "epoch": 1.09, + "learning_rate": 1.6496167291082096e-05, + "loss": 0.2091, + "step": 23365 + }, + { + "epoch": 1.09, + "learning_rate": 1.649538350602731e-05, + "loss": 0.7426, + "step": 23370 + }, + { + "epoch": 1.09, + "learning_rate": 1.649459972097252e-05, + "loss": 0.2259, + "step": 23375 + }, + { + "epoch": 1.09, + "learning_rate": 1.6493815935917738e-05, + "loss": 0.4487, + "step": 23380 + }, + { + "epoch": 1.09, + "learning_rate": 1.6493032150862948e-05, + "loss": 0.2265, + "step": 23385 + }, + { + "epoch": 1.09, + "learning_rate": 1.6492248365808162e-05, + "loss": 0.0922, + "step": 23390 + }, + { + "epoch": 1.09, + "learning_rate": 1.6491464580753376e-05, + "loss": 0.1548, + "step": 23395 + }, + { + "epoch": 1.09, + "learning_rate": 1.649068079569859e-05, + "loss": 0.1354, + "step": 23400 + }, + { + "epoch": 1.09, + "learning_rate": 1.6489897010643804e-05, + "loss": 0.1246, + "step": 23405 + }, + { + "epoch": 1.09, + "learning_rate": 1.6489113225589014e-05, + "loss": 0.1558, + "step": 23410 + }, + { + "epoch": 1.09, + "learning_rate": 1.6488329440534228e-05, + "loss": 0.1481, + "step": 23415 + }, + { + "epoch": 1.09, + "learning_rate": 1.6487545655479442e-05, + "loss": 0.2439, + "step": 23420 + }, + { + "epoch": 1.09, + "learning_rate": 1.6486761870424656e-05, + "loss": 0.3442, + "step": 23425 + }, + { + "epoch": 1.09, + "learning_rate": 1.648597808536987e-05, + "loss": 0.3016, + "step": 23430 + }, + { + "epoch": 1.09, + "learning_rate": 1.6485194300315084e-05, + "loss": 0.2837, + "step": 23435 + }, + { + "epoch": 1.09, + "learning_rate": 1.6484410515260298e-05, + "loss": 0.0948, + "step": 23440 + }, + { + "epoch": 1.09, + "learning_rate": 1.648362673020551e-05, + "loss": 0.0681, + "step": 23445 + }, + { + "epoch": 1.09, + "learning_rate": 1.6482842945150722e-05, + "loss": 0.107, + "step": 23450 + }, + { + "epoch": 1.09, + "learning_rate": 1.6482059160095936e-05, + "loss": 0.1297, + "step": 23455 + }, + { + "epoch": 1.09, + "learning_rate": 1.648127537504115e-05, + "loss": 0.1361, + "step": 23460 + }, + { + "epoch": 1.09, + "learning_rate": 1.6480491589986364e-05, + "loss": 0.142, + "step": 23465 + }, + { + "epoch": 1.1, + "learning_rate": 1.6479707804931578e-05, + "loss": 0.2334, + "step": 23470 + }, + { + "epoch": 1.1, + "learning_rate": 1.6478924019876788e-05, + "loss": 0.277, + "step": 23475 + }, + { + "epoch": 1.1, + "learning_rate": 1.6478140234822006e-05, + "loss": 0.341, + "step": 23480 + }, + { + "epoch": 1.1, + "learning_rate": 1.6477356449767216e-05, + "loss": 0.3929, + "step": 23485 + }, + { + "epoch": 1.1, + "learning_rate": 1.647657266471243e-05, + "loss": 0.1686, + "step": 23490 + }, + { + "epoch": 1.1, + "learning_rate": 1.6475788879657644e-05, + "loss": 0.0867, + "step": 23495 + }, + { + "epoch": 1.1, + "learning_rate": 1.6475005094602858e-05, + "loss": 0.0441, + "step": 23500 + }, + { + "epoch": 1.1, + "learning_rate": 1.647422130954807e-05, + "loss": 0.1218, + "step": 23505 + }, + { + "epoch": 1.1, + "learning_rate": 1.6473437524493286e-05, + "loss": 0.1216, + "step": 23510 + }, + { + "epoch": 1.1, + "learning_rate": 1.6472653739438496e-05, + "loss": 0.2273, + "step": 23515 + }, + { + "epoch": 1.1, + "learning_rate": 1.647186995438371e-05, + "loss": 0.3123, + "step": 23520 + }, + { + "epoch": 1.1, + "learning_rate": 1.6471086169328924e-05, + "loss": 0.2573, + "step": 23525 + }, + { + "epoch": 1.1, + "learning_rate": 1.6470302384274138e-05, + "loss": 0.4518, + "step": 23530 + }, + { + "epoch": 1.1, + "learning_rate": 1.6469518599219352e-05, + "loss": 0.2675, + "step": 23535 + }, + { + "epoch": 1.1, + "learning_rate": 1.6468734814164566e-05, + "loss": 0.0457, + "step": 23540 + }, + { + "epoch": 1.1, + "learning_rate": 1.646795102910978e-05, + "loss": 0.0892, + "step": 23545 + }, + { + "epoch": 1.1, + "learning_rate": 1.646716724405499e-05, + "loss": 0.07, + "step": 23550 + }, + { + "epoch": 1.1, + "learning_rate": 1.6466383459000207e-05, + "loss": 0.1443, + "step": 23555 + }, + { + "epoch": 1.1, + "learning_rate": 1.6465599673945418e-05, + "loss": 0.1848, + "step": 23560 + }, + { + "epoch": 1.1, + "learning_rate": 1.6464815888890632e-05, + "loss": 0.1461, + "step": 23565 + }, + { + "epoch": 1.1, + "learning_rate": 1.6464032103835846e-05, + "loss": 0.1837, + "step": 23570 + }, + { + "epoch": 1.1, + "learning_rate": 1.646324831878106e-05, + "loss": 0.178, + "step": 23575 + }, + { + "epoch": 1.1, + "learning_rate": 1.6462464533726273e-05, + "loss": 0.5349, + "step": 23580 + }, + { + "epoch": 1.1, + "learning_rate": 1.6461680748671484e-05, + "loss": 0.3265, + "step": 23585 + }, + { + "epoch": 1.1, + "learning_rate": 1.6460896963616698e-05, + "loss": 0.0742, + "step": 23590 + }, + { + "epoch": 1.1, + "learning_rate": 1.6460113178561912e-05, + "loss": 0.0978, + "step": 23595 + }, + { + "epoch": 1.1, + "learning_rate": 1.6459329393507126e-05, + "loss": 0.1308, + "step": 23600 + }, + { + "epoch": 1.1, + "learning_rate": 1.645854560845234e-05, + "loss": 0.1915, + "step": 23605 + }, + { + "epoch": 1.1, + "learning_rate": 1.6457761823397554e-05, + "loss": 0.094, + "step": 23610 + }, + { + "epoch": 1.1, + "learning_rate": 1.6456978038342764e-05, + "loss": 0.1869, + "step": 23615 + }, + { + "epoch": 1.1, + "learning_rate": 1.645619425328798e-05, + "loss": 0.1502, + "step": 23620 + }, + { + "epoch": 1.1, + "learning_rate": 1.6455410468233192e-05, + "loss": 0.2174, + "step": 23625 + }, + { + "epoch": 1.1, + "learning_rate": 1.6454626683178406e-05, + "loss": 0.4411, + "step": 23630 + }, + { + "epoch": 1.1, + "learning_rate": 1.645384289812362e-05, + "loss": 0.2565, + "step": 23635 + }, + { + "epoch": 1.1, + "learning_rate": 1.6453059113068834e-05, + "loss": 0.0362, + "step": 23640 + }, + { + "epoch": 1.1, + "learning_rate": 1.6452275328014047e-05, + "loss": 0.0526, + "step": 23645 + }, + { + "epoch": 1.1, + "learning_rate": 1.6451491542959258e-05, + "loss": 0.1324, + "step": 23650 + }, + { + "epoch": 1.1, + "learning_rate": 1.6450707757904475e-05, + "loss": 0.0992, + "step": 23655 + }, + { + "epoch": 1.1, + "learning_rate": 1.6449923972849686e-05, + "loss": 0.1667, + "step": 23660 + }, + { + "epoch": 1.1, + "learning_rate": 1.64491401877949e-05, + "loss": 0.2788, + "step": 23665 + }, + { + "epoch": 1.1, + "learning_rate": 1.6448356402740114e-05, + "loss": 0.2025, + "step": 23670 + }, + { + "epoch": 1.1, + "learning_rate": 1.6447572617685328e-05, + "loss": 0.2577, + "step": 23675 + }, + { + "epoch": 1.1, + "learning_rate": 1.644678883263054e-05, + "loss": 0.2843, + "step": 23680 + }, + { + "epoch": 1.11, + "learning_rate": 1.6446005047575755e-05, + "loss": 0.2967, + "step": 23685 + }, + { + "epoch": 1.11, + "learning_rate": 1.6445221262520966e-05, + "loss": 0.0835, + "step": 23690 + }, + { + "epoch": 1.11, + "learning_rate": 1.6444437477466183e-05, + "loss": 0.0873, + "step": 23695 + }, + { + "epoch": 1.11, + "learning_rate": 1.6443653692411394e-05, + "loss": 0.1089, + "step": 23700 + }, + { + "epoch": 1.11, + "learning_rate": 1.6442869907356608e-05, + "loss": 0.1707, + "step": 23705 + }, + { + "epoch": 1.11, + "learning_rate": 1.644208612230182e-05, + "loss": 0.191, + "step": 23710 + }, + { + "epoch": 1.11, + "learning_rate": 1.6441302337247035e-05, + "loss": 0.124, + "step": 23715 + }, + { + "epoch": 1.11, + "learning_rate": 1.644051855219225e-05, + "loss": 0.2111, + "step": 23720 + }, + { + "epoch": 1.11, + "learning_rate": 1.643973476713746e-05, + "loss": 0.1918, + "step": 23725 + }, + { + "epoch": 1.11, + "learning_rate": 1.6438950982082674e-05, + "loss": 0.4829, + "step": 23730 + }, + { + "epoch": 1.11, + "learning_rate": 1.6438167197027888e-05, + "loss": 0.3964, + "step": 23735 + }, + { + "epoch": 1.11, + "learning_rate": 1.64373834119731e-05, + "loss": 0.0372, + "step": 23740 + }, + { + "epoch": 1.11, + "learning_rate": 1.6436599626918315e-05, + "loss": 0.0559, + "step": 23745 + }, + { + "epoch": 1.11, + "learning_rate": 1.643581584186353e-05, + "loss": 0.1288, + "step": 23750 + }, + { + "epoch": 1.11, + "learning_rate": 1.6435032056808743e-05, + "loss": 0.1367, + "step": 23755 + }, + { + "epoch": 1.11, + "learning_rate": 1.6434248271753957e-05, + "loss": 0.0864, + "step": 23760 + }, + { + "epoch": 1.11, + "learning_rate": 1.6433464486699168e-05, + "loss": 0.1882, + "step": 23765 + }, + { + "epoch": 1.11, + "learning_rate": 1.6432680701644385e-05, + "loss": 0.2035, + "step": 23770 + }, + { + "epoch": 1.11, + "learning_rate": 1.6431896916589595e-05, + "loss": 0.3376, + "step": 23775 + }, + { + "epoch": 1.11, + "learning_rate": 1.643111313153481e-05, + "loss": 0.5252, + "step": 23780 + }, + { + "epoch": 1.11, + "learning_rate": 1.6430329346480023e-05, + "loss": 0.2832, + "step": 23785 + }, + { + "epoch": 1.11, + "learning_rate": 1.6429545561425234e-05, + "loss": 0.0732, + "step": 23790 + }, + { + "epoch": 1.11, + "learning_rate": 1.642876177637045e-05, + "loss": 0.0599, + "step": 23795 + }, + { + "epoch": 1.11, + "learning_rate": 1.642797799131566e-05, + "loss": 0.1289, + "step": 23800 + }, + { + "epoch": 1.11, + "learning_rate": 1.6427194206260876e-05, + "loss": 0.1547, + "step": 23805 + }, + { + "epoch": 1.11, + "learning_rate": 1.642641042120609e-05, + "loss": 0.0893, + "step": 23810 + }, + { + "epoch": 1.11, + "learning_rate": 1.6425626636151303e-05, + "loss": 0.1896, + "step": 23815 + }, + { + "epoch": 1.11, + "learning_rate": 1.6424842851096517e-05, + "loss": 0.2018, + "step": 23820 + }, + { + "epoch": 1.11, + "learning_rate": 1.642405906604173e-05, + "loss": 0.2314, + "step": 23825 + }, + { + "epoch": 1.11, + "learning_rate": 1.642327528098694e-05, + "loss": 0.6314, + "step": 23830 + }, + { + "epoch": 1.11, + "learning_rate": 1.642249149593216e-05, + "loss": 0.2099, + "step": 23835 + }, + { + "epoch": 1.11, + "learning_rate": 1.642170771087737e-05, + "loss": 0.0935, + "step": 23840 + }, + { + "epoch": 1.11, + "learning_rate": 1.6420923925822583e-05, + "loss": 0.0824, + "step": 23845 + }, + { + "epoch": 1.11, + "learning_rate": 1.6420140140767797e-05, + "loss": 0.1094, + "step": 23850 + }, + { + "epoch": 1.11, + "learning_rate": 1.641935635571301e-05, + "loss": 0.1203, + "step": 23855 + }, + { + "epoch": 1.11, + "learning_rate": 1.6418572570658225e-05, + "loss": 0.0716, + "step": 23860 + }, + { + "epoch": 1.11, + "learning_rate": 1.6417788785603436e-05, + "loss": 0.0886, + "step": 23865 + }, + { + "epoch": 1.11, + "learning_rate": 1.6417005000548653e-05, + "loss": 0.2079, + "step": 23870 + }, + { + "epoch": 1.11, + "learning_rate": 1.6416221215493863e-05, + "loss": 0.2192, + "step": 23875 + }, + { + "epoch": 1.11, + "learning_rate": 1.6415437430439077e-05, + "loss": 0.1609, + "step": 23880 + }, + { + "epoch": 1.11, + "learning_rate": 1.641465364538429e-05, + "loss": 0.427, + "step": 23885 + }, + { + "epoch": 1.11, + "learning_rate": 1.6413869860329505e-05, + "loss": 0.0594, + "step": 23890 + }, + { + "epoch": 1.11, + "learning_rate": 1.641308607527472e-05, + "loss": 0.0976, + "step": 23895 + }, + { + "epoch": 1.12, + "learning_rate": 1.6412302290219933e-05, + "loss": 0.1183, + "step": 23900 + }, + { + "epoch": 1.12, + "learning_rate": 1.6411518505165143e-05, + "loss": 0.1117, + "step": 23905 + }, + { + "epoch": 1.12, + "learning_rate": 1.6410734720110357e-05, + "loss": 0.1788, + "step": 23910 + }, + { + "epoch": 1.12, + "learning_rate": 1.640995093505557e-05, + "loss": 0.1834, + "step": 23915 + }, + { + "epoch": 1.12, + "learning_rate": 1.6409167150000785e-05, + "loss": 0.2126, + "step": 23920 + }, + { + "epoch": 1.12, + "learning_rate": 1.6408383364946e-05, + "loss": 0.4336, + "step": 23925 + }, + { + "epoch": 1.12, + "learning_rate": 1.6407599579891213e-05, + "loss": 0.3505, + "step": 23930 + }, + { + "epoch": 1.12, + "learning_rate": 1.6406815794836427e-05, + "loss": 0.3065, + "step": 23935 + }, + { + "epoch": 1.12, + "learning_rate": 1.6406032009781637e-05, + "loss": 0.0399, + "step": 23940 + }, + { + "epoch": 1.12, + "learning_rate": 1.640524822472685e-05, + "loss": 0.0772, + "step": 23945 + }, + { + "epoch": 1.12, + "learning_rate": 1.6404464439672065e-05, + "loss": 0.0448, + "step": 23950 + }, + { + "epoch": 1.12, + "learning_rate": 1.640368065461728e-05, + "loss": 0.0798, + "step": 23955 + }, + { + "epoch": 1.12, + "learning_rate": 1.6402896869562493e-05, + "loss": 0.0925, + "step": 23960 + }, + { + "epoch": 1.12, + "learning_rate": 1.6402113084507707e-05, + "loss": 0.1361, + "step": 23965 + }, + { + "epoch": 1.12, + "learning_rate": 1.640132929945292e-05, + "loss": 0.1452, + "step": 23970 + }, + { + "epoch": 1.12, + "learning_rate": 1.640054551439813e-05, + "loss": 0.2325, + "step": 23975 + }, + { + "epoch": 1.12, + "learning_rate": 1.6399761729343345e-05, + "loss": 0.3527, + "step": 23980 + }, + { + "epoch": 1.12, + "learning_rate": 1.639897794428856e-05, + "loss": 0.2864, + "step": 23985 + }, + { + "epoch": 1.12, + "learning_rate": 1.6398194159233773e-05, + "loss": 0.0622, + "step": 23990 + }, + { + "epoch": 1.12, + "learning_rate": 1.6397410374178987e-05, + "loss": 0.0723, + "step": 23995 + }, + { + "epoch": 1.12, + "learning_rate": 1.63966265891242e-05, + "loss": 0.1408, + "step": 24000 + }, + { + "epoch": 1.12, + "learning_rate": 1.639584280406941e-05, + "loss": 0.1051, + "step": 24005 + }, + { + "epoch": 1.12, + "learning_rate": 1.639505901901463e-05, + "loss": 0.1084, + "step": 24010 + }, + { + "epoch": 1.12, + "learning_rate": 1.639427523395984e-05, + "loss": 0.2103, + "step": 24015 + }, + { + "epoch": 1.12, + "learning_rate": 1.6393491448905053e-05, + "loss": 0.2023, + "step": 24020 + }, + { + "epoch": 1.12, + "learning_rate": 1.6392707663850267e-05, + "loss": 0.2348, + "step": 24025 + }, + { + "epoch": 1.12, + "learning_rate": 1.639192387879548e-05, + "loss": 0.3672, + "step": 24030 + }, + { + "epoch": 1.12, + "learning_rate": 1.6391140093740695e-05, + "loss": 0.3526, + "step": 24035 + }, + { + "epoch": 1.12, + "learning_rate": 1.6390356308685905e-05, + "loss": 0.0778, + "step": 24040 + }, + { + "epoch": 1.12, + "learning_rate": 1.638957252363112e-05, + "loss": 0.1023, + "step": 24045 + }, + { + "epoch": 1.12, + "learning_rate": 1.6388788738576333e-05, + "loss": 0.0873, + "step": 24050 + }, + { + "epoch": 1.12, + "learning_rate": 1.6388004953521547e-05, + "loss": 0.1086, + "step": 24055 + }, + { + "epoch": 1.12, + "learning_rate": 1.638722116846676e-05, + "loss": 0.1551, + "step": 24060 + }, + { + "epoch": 1.12, + "learning_rate": 1.6386437383411975e-05, + "loss": 0.1625, + "step": 24065 + }, + { + "epoch": 1.12, + "learning_rate": 1.638565359835719e-05, + "loss": 0.2734, + "step": 24070 + }, + { + "epoch": 1.12, + "learning_rate": 1.6384869813302403e-05, + "loss": 0.2265, + "step": 24075 + }, + { + "epoch": 1.12, + "learning_rate": 1.6384086028247613e-05, + "loss": 0.2489, + "step": 24080 + }, + { + "epoch": 1.12, + "learning_rate": 1.638330224319283e-05, + "loss": 0.3076, + "step": 24085 + }, + { + "epoch": 1.12, + "learning_rate": 1.638251845813804e-05, + "loss": 0.1064, + "step": 24090 + }, + { + "epoch": 1.12, + "learning_rate": 1.6381734673083255e-05, + "loss": 0.0911, + "step": 24095 + }, + { + "epoch": 1.12, + "learning_rate": 1.638095088802847e-05, + "loss": 0.0852, + "step": 24100 + }, + { + "epoch": 1.12, + "learning_rate": 1.638016710297368e-05, + "loss": 0.1027, + "step": 24105 + }, + { + "epoch": 1.13, + "learning_rate": 1.6379383317918897e-05, + "loss": 0.1949, + "step": 24110 + }, + { + "epoch": 1.13, + "learning_rate": 1.6378599532864107e-05, + "loss": 0.1723, + "step": 24115 + }, + { + "epoch": 1.13, + "learning_rate": 1.637781574780932e-05, + "loss": 0.207, + "step": 24120 + }, + { + "epoch": 1.13, + "learning_rate": 1.6377031962754535e-05, + "loss": 0.2483, + "step": 24125 + }, + { + "epoch": 1.13, + "learning_rate": 1.637624817769975e-05, + "loss": 0.4968, + "step": 24130 + }, + { + "epoch": 1.13, + "learning_rate": 1.6375464392644963e-05, + "loss": 0.3258, + "step": 24135 + }, + { + "epoch": 1.13, + "learning_rate": 1.6374680607590177e-05, + "loss": 0.0432, + "step": 24140 + }, + { + "epoch": 1.13, + "learning_rate": 1.6373896822535387e-05, + "loss": 0.0878, + "step": 24145 + }, + { + "epoch": 1.13, + "learning_rate": 1.6373113037480605e-05, + "loss": 0.163, + "step": 24150 + }, + { + "epoch": 1.13, + "learning_rate": 1.6372329252425815e-05, + "loss": 0.2356, + "step": 24155 + }, + { + "epoch": 1.13, + "learning_rate": 1.637154546737103e-05, + "loss": 0.1573, + "step": 24160 + }, + { + "epoch": 1.13, + "learning_rate": 1.6370761682316243e-05, + "loss": 0.1774, + "step": 24165 + }, + { + "epoch": 1.13, + "learning_rate": 1.6369977897261457e-05, + "loss": 0.1667, + "step": 24170 + }, + { + "epoch": 1.13, + "learning_rate": 1.636919411220667e-05, + "loss": 0.1914, + "step": 24175 + }, + { + "epoch": 1.13, + "learning_rate": 1.636841032715188e-05, + "loss": 0.3366, + "step": 24180 + }, + { + "epoch": 1.13, + "learning_rate": 1.63676265420971e-05, + "loss": 0.389, + "step": 24185 + }, + { + "epoch": 1.13, + "learning_rate": 1.636684275704231e-05, + "loss": 0.0601, + "step": 24190 + }, + { + "epoch": 1.13, + "learning_rate": 1.6366058971987523e-05, + "loss": 0.0784, + "step": 24195 + }, + { + "epoch": 1.13, + "learning_rate": 1.6365275186932737e-05, + "loss": 0.1066, + "step": 24200 + }, + { + "epoch": 1.13, + "learning_rate": 1.636449140187795e-05, + "loss": 0.1525, + "step": 24205 + }, + { + "epoch": 1.13, + "learning_rate": 1.6363707616823165e-05, + "loss": 0.1988, + "step": 24210 + }, + { + "epoch": 1.13, + "learning_rate": 1.636292383176838e-05, + "loss": 0.252, + "step": 24215 + }, + { + "epoch": 1.13, + "learning_rate": 1.636214004671359e-05, + "loss": 0.3322, + "step": 24220 + }, + { + "epoch": 1.13, + "learning_rate": 1.6361356261658806e-05, + "loss": 0.2519, + "step": 24225 + }, + { + "epoch": 1.13, + "learning_rate": 1.6360572476604017e-05, + "loss": 0.2909, + "step": 24230 + }, + { + "epoch": 1.13, + "learning_rate": 1.635978869154923e-05, + "loss": 0.3463, + "step": 24235 + }, + { + "epoch": 1.13, + "learning_rate": 1.6359004906494445e-05, + "loss": 0.0224, + "step": 24240 + }, + { + "epoch": 1.13, + "learning_rate": 1.635822112143966e-05, + "loss": 0.0675, + "step": 24245 + }, + { + "epoch": 1.13, + "learning_rate": 1.6357437336384872e-05, + "loss": 0.1372, + "step": 24250 + }, + { + "epoch": 1.13, + "learning_rate": 1.6356653551330083e-05, + "loss": 0.1832, + "step": 24255 + }, + { + "epoch": 1.13, + "learning_rate": 1.6355869766275297e-05, + "loss": 0.1108, + "step": 24260 + }, + { + "epoch": 1.13, + "learning_rate": 1.635508598122051e-05, + "loss": 0.1944, + "step": 24265 + }, + { + "epoch": 1.13, + "learning_rate": 1.6354302196165725e-05, + "loss": 0.1828, + "step": 24270 + }, + { + "epoch": 1.13, + "learning_rate": 1.635351841111094e-05, + "loss": 0.2393, + "step": 24275 + }, + { + "epoch": 1.13, + "learning_rate": 1.6352734626056153e-05, + "loss": 0.4225, + "step": 24280 + }, + { + "epoch": 1.13, + "learning_rate": 1.6351950841001366e-05, + "loss": 0.3735, + "step": 24285 + }, + { + "epoch": 1.13, + "learning_rate": 1.635116705594658e-05, + "loss": 0.0227, + "step": 24290 + }, + { + "epoch": 1.13, + "learning_rate": 1.635038327089179e-05, + "loss": 0.0606, + "step": 24295 + }, + { + "epoch": 1.13, + "learning_rate": 1.6349599485837005e-05, + "loss": 0.0897, + "step": 24300 + }, + { + "epoch": 1.13, + "learning_rate": 1.634881570078222e-05, + "loss": 0.1893, + "step": 24305 + }, + { + "epoch": 1.13, + "learning_rate": 1.6348031915727433e-05, + "loss": 0.1348, + "step": 24310 + }, + { + "epoch": 1.13, + "learning_rate": 1.6347248130672646e-05, + "loss": 0.1975, + "step": 24315 + }, + { + "epoch": 1.13, + "learning_rate": 1.6346464345617857e-05, + "loss": 0.2427, + "step": 24320 + }, + { + "epoch": 1.14, + "learning_rate": 1.6345680560563074e-05, + "loss": 0.2059, + "step": 24325 + }, + { + "epoch": 1.14, + "learning_rate": 1.6344896775508285e-05, + "loss": 0.4388, + "step": 24330 + }, + { + "epoch": 1.14, + "learning_rate": 1.63441129904535e-05, + "loss": 0.2305, + "step": 24335 + }, + { + "epoch": 1.14, + "learning_rate": 1.6343329205398713e-05, + "loss": 0.0722, + "step": 24340 + }, + { + "epoch": 1.14, + "learning_rate": 1.6342545420343927e-05, + "loss": 0.1261, + "step": 24345 + }, + { + "epoch": 1.14, + "learning_rate": 1.634176163528914e-05, + "loss": 0.0991, + "step": 24350 + }, + { + "epoch": 1.14, + "learning_rate": 1.6340977850234354e-05, + "loss": 0.1313, + "step": 24355 + }, + { + "epoch": 1.14, + "learning_rate": 1.6340194065179565e-05, + "loss": 0.1284, + "step": 24360 + }, + { + "epoch": 1.14, + "learning_rate": 1.633941028012478e-05, + "loss": 0.2371, + "step": 24365 + }, + { + "epoch": 1.14, + "learning_rate": 1.6338626495069993e-05, + "loss": 0.2578, + "step": 24370 + }, + { + "epoch": 1.14, + "learning_rate": 1.6337842710015207e-05, + "loss": 0.2852, + "step": 24375 + }, + { + "epoch": 1.14, + "learning_rate": 1.633705892496042e-05, + "loss": 0.3184, + "step": 24380 + }, + { + "epoch": 1.14, + "learning_rate": 1.6336275139905634e-05, + "loss": 0.2727, + "step": 24385 + }, + { + "epoch": 1.14, + "learning_rate": 1.6335491354850848e-05, + "loss": 0.0995, + "step": 24390 + }, + { + "epoch": 1.14, + "learning_rate": 1.633470756979606e-05, + "loss": 0.075, + "step": 24395 + }, + { + "epoch": 1.14, + "learning_rate": 1.6333923784741276e-05, + "loss": 0.1169, + "step": 24400 + }, + { + "epoch": 1.14, + "learning_rate": 1.6333139999686487e-05, + "loss": 0.1186, + "step": 24405 + }, + { + "epoch": 1.14, + "learning_rate": 1.63323562146317e-05, + "loss": 0.1519, + "step": 24410 + }, + { + "epoch": 1.14, + "learning_rate": 1.6331572429576914e-05, + "loss": 0.1897, + "step": 24415 + }, + { + "epoch": 1.14, + "learning_rate": 1.633078864452213e-05, + "loss": 0.2469, + "step": 24420 + }, + { + "epoch": 1.14, + "learning_rate": 1.6330004859467342e-05, + "loss": 0.2405, + "step": 24425 + }, + { + "epoch": 1.14, + "learning_rate": 1.6329221074412553e-05, + "loss": 0.5484, + "step": 24430 + }, + { + "epoch": 1.14, + "learning_rate": 1.6328437289357767e-05, + "loss": 0.3294, + "step": 24435 + }, + { + "epoch": 1.14, + "learning_rate": 1.632765350430298e-05, + "loss": 0.0427, + "step": 24440 + }, + { + "epoch": 1.14, + "learning_rate": 1.6326869719248194e-05, + "loss": 0.1192, + "step": 24445 + }, + { + "epoch": 1.14, + "learning_rate": 1.632608593419341e-05, + "loss": 0.0817, + "step": 24450 + }, + { + "epoch": 1.14, + "learning_rate": 1.6325302149138622e-05, + "loss": 0.0814, + "step": 24455 + }, + { + "epoch": 1.14, + "learning_rate": 1.6324518364083833e-05, + "loss": 0.1558, + "step": 24460 + }, + { + "epoch": 1.14, + "learning_rate": 1.632373457902905e-05, + "loss": 0.149, + "step": 24465 + }, + { + "epoch": 1.14, + "learning_rate": 1.632295079397426e-05, + "loss": 0.1717, + "step": 24470 + }, + { + "epoch": 1.14, + "learning_rate": 1.6322167008919475e-05, + "loss": 0.2424, + "step": 24475 + }, + { + "epoch": 1.14, + "learning_rate": 1.632138322386469e-05, + "loss": 0.2916, + "step": 24480 + }, + { + "epoch": 1.14, + "learning_rate": 1.6320599438809902e-05, + "loss": 0.3675, + "step": 24485 + }, + { + "epoch": 1.14, + "learning_rate": 1.6319815653755116e-05, + "loss": 0.1092, + "step": 24490 + }, + { + "epoch": 1.14, + "learning_rate": 1.6319031868700327e-05, + "loss": 0.086, + "step": 24495 + }, + { + "epoch": 1.14, + "learning_rate": 1.6318248083645544e-05, + "loss": 0.1483, + "step": 24500 + }, + { + "epoch": 1.14, + "learning_rate": 1.6317464298590755e-05, + "loss": 0.1295, + "step": 24505 + }, + { + "epoch": 1.14, + "learning_rate": 1.631668051353597e-05, + "loss": 0.1388, + "step": 24510 + }, + { + "epoch": 1.14, + "learning_rate": 1.6315896728481182e-05, + "loss": 0.2248, + "step": 24515 + }, + { + "epoch": 1.14, + "learning_rate": 1.6315112943426396e-05, + "loss": 0.229, + "step": 24520 + }, + { + "epoch": 1.14, + "learning_rate": 1.631432915837161e-05, + "loss": 0.2604, + "step": 24525 + }, + { + "epoch": 1.14, + "learning_rate": 1.6313545373316824e-05, + "loss": 0.3488, + "step": 24530 + }, + { + "epoch": 1.14, + "learning_rate": 1.6312761588262035e-05, + "loss": 0.2816, + "step": 24535 + }, + { + "epoch": 1.15, + "learning_rate": 1.6311977803207252e-05, + "loss": 0.0694, + "step": 24540 + }, + { + "epoch": 1.15, + "learning_rate": 1.6311194018152462e-05, + "loss": 0.0728, + "step": 24545 + }, + { + "epoch": 1.15, + "learning_rate": 1.6310410233097676e-05, + "loss": 0.058, + "step": 24550 + }, + { + "epoch": 1.15, + "learning_rate": 1.630962644804289e-05, + "loss": 0.0766, + "step": 24555 + }, + { + "epoch": 1.15, + "learning_rate": 1.6308842662988104e-05, + "loss": 0.1177, + "step": 24560 + }, + { + "epoch": 1.15, + "learning_rate": 1.6308058877933318e-05, + "loss": 0.1314, + "step": 24565 + }, + { + "epoch": 1.15, + "learning_rate": 1.630727509287853e-05, + "loss": 0.1472, + "step": 24570 + }, + { + "epoch": 1.15, + "learning_rate": 1.6306491307823742e-05, + "loss": 0.3199, + "step": 24575 + }, + { + "epoch": 1.15, + "learning_rate": 1.6305707522768956e-05, + "loss": 0.3317, + "step": 24580 + }, + { + "epoch": 1.15, + "learning_rate": 1.630492373771417e-05, + "loss": 0.3662, + "step": 24585 + }, + { + "epoch": 1.15, + "learning_rate": 1.6304139952659384e-05, + "loss": 0.0684, + "step": 24590 + }, + { + "epoch": 1.15, + "learning_rate": 1.6303356167604598e-05, + "loss": 0.0825, + "step": 24595 + }, + { + "epoch": 1.15, + "learning_rate": 1.6302572382549812e-05, + "loss": 0.0647, + "step": 24600 + }, + { + "epoch": 1.15, + "learning_rate": 1.6301788597495026e-05, + "loss": 0.101, + "step": 24605 + }, + { + "epoch": 1.15, + "learning_rate": 1.6301004812440236e-05, + "loss": 0.1126, + "step": 24610 + }, + { + "epoch": 1.15, + "learning_rate": 1.6300221027385454e-05, + "loss": 0.1619, + "step": 24615 + }, + { + "epoch": 1.15, + "learning_rate": 1.6299437242330664e-05, + "loss": 0.248, + "step": 24620 + }, + { + "epoch": 1.15, + "learning_rate": 1.6298653457275878e-05, + "loss": 0.2743, + "step": 24625 + }, + { + "epoch": 1.15, + "learning_rate": 1.6297869672221092e-05, + "loss": 0.4154, + "step": 24630 + }, + { + "epoch": 1.15, + "learning_rate": 1.6297085887166303e-05, + "loss": 0.3563, + "step": 24635 + }, + { + "epoch": 1.15, + "learning_rate": 1.629630210211152e-05, + "loss": 0.0512, + "step": 24640 + }, + { + "epoch": 1.15, + "learning_rate": 1.629551831705673e-05, + "loss": 0.0548, + "step": 24645 + }, + { + "epoch": 1.15, + "learning_rate": 1.6294734532001944e-05, + "loss": 0.0815, + "step": 24650 + }, + { + "epoch": 1.15, + "learning_rate": 1.6293950746947158e-05, + "loss": 0.1276, + "step": 24655 + }, + { + "epoch": 1.15, + "learning_rate": 1.6293166961892372e-05, + "loss": 0.141, + "step": 24660 + }, + { + "epoch": 1.15, + "learning_rate": 1.6292383176837586e-05, + "loss": 0.2118, + "step": 24665 + }, + { + "epoch": 1.15, + "learning_rate": 1.62915993917828e-05, + "loss": 0.1749, + "step": 24670 + }, + { + "epoch": 1.15, + "learning_rate": 1.629081560672801e-05, + "loss": 0.2312, + "step": 24675 + }, + { + "epoch": 1.15, + "learning_rate": 1.6290031821673228e-05, + "loss": 0.5181, + "step": 24680 + }, + { + "epoch": 1.15, + "learning_rate": 1.6289248036618438e-05, + "loss": 0.2571, + "step": 24685 + }, + { + "epoch": 1.15, + "learning_rate": 1.6288464251563652e-05, + "loss": 0.0409, + "step": 24690 + }, + { + "epoch": 1.15, + "learning_rate": 1.6287680466508866e-05, + "loss": 0.0747, + "step": 24695 + }, + { + "epoch": 1.15, + "learning_rate": 1.628689668145408e-05, + "loss": 0.1181, + "step": 24700 + }, + { + "epoch": 1.15, + "learning_rate": 1.6286112896399294e-05, + "loss": 0.1395, + "step": 24705 + }, + { + "epoch": 1.15, + "learning_rate": 1.6285329111344504e-05, + "loss": 0.1435, + "step": 24710 + }, + { + "epoch": 1.15, + "learning_rate": 1.628454532628972e-05, + "loss": 0.174, + "step": 24715 + }, + { + "epoch": 1.15, + "learning_rate": 1.6283761541234932e-05, + "loss": 0.1197, + "step": 24720 + }, + { + "epoch": 1.15, + "learning_rate": 1.6282977756180146e-05, + "loss": 0.2038, + "step": 24725 + }, + { + "epoch": 1.15, + "learning_rate": 1.628219397112536e-05, + "loss": 0.3092, + "step": 24730 + }, + { + "epoch": 1.15, + "learning_rate": 1.6281410186070574e-05, + "loss": 0.4175, + "step": 24735 + }, + { + "epoch": 1.15, + "learning_rate": 1.6280626401015788e-05, + "loss": 0.0201, + "step": 24740 + }, + { + "epoch": 1.15, + "learning_rate": 1.6279842615961002e-05, + "loss": 0.1052, + "step": 24745 + }, + { + "epoch": 1.15, + "learning_rate": 1.6279058830906212e-05, + "loss": 0.1185, + "step": 24750 + }, + { + "epoch": 1.16, + "learning_rate": 1.6278275045851426e-05, + "loss": 0.1012, + "step": 24755 + }, + { + "epoch": 1.16, + "learning_rate": 1.627749126079664e-05, + "loss": 0.2098, + "step": 24760 + }, + { + "epoch": 1.16, + "learning_rate": 1.6276707475741854e-05, + "loss": 0.1507, + "step": 24765 + }, + { + "epoch": 1.16, + "learning_rate": 1.6275923690687068e-05, + "loss": 0.1518, + "step": 24770 + }, + { + "epoch": 1.16, + "learning_rate": 1.627513990563228e-05, + "loss": 0.2168, + "step": 24775 + }, + { + "epoch": 1.16, + "learning_rate": 1.6274356120577496e-05, + "loss": 0.3516, + "step": 24780 + }, + { + "epoch": 1.16, + "learning_rate": 1.6273572335522706e-05, + "loss": 0.3077, + "step": 24785 + }, + { + "epoch": 1.16, + "learning_rate": 1.627278855046792e-05, + "loss": 0.0342, + "step": 24790 + }, + { + "epoch": 1.16, + "learning_rate": 1.6272004765413134e-05, + "loss": 0.083, + "step": 24795 + }, + { + "epoch": 1.16, + "learning_rate": 1.6271220980358348e-05, + "loss": 0.0801, + "step": 24800 + }, + { + "epoch": 1.16, + "learning_rate": 1.6270437195303562e-05, + "loss": 0.1694, + "step": 24805 + }, + { + "epoch": 1.16, + "learning_rate": 1.6269653410248776e-05, + "loss": 0.1143, + "step": 24810 + }, + { + "epoch": 1.16, + "learning_rate": 1.626886962519399e-05, + "loss": 0.0924, + "step": 24815 + }, + { + "epoch": 1.16, + "learning_rate": 1.62680858401392e-05, + "loss": 0.237, + "step": 24820 + }, + { + "epoch": 1.16, + "learning_rate": 1.6267302055084414e-05, + "loss": 0.1925, + "step": 24825 + }, + { + "epoch": 1.16, + "learning_rate": 1.6266518270029628e-05, + "loss": 0.3285, + "step": 24830 + }, + { + "epoch": 1.16, + "learning_rate": 1.6265734484974842e-05, + "loss": 0.4066, + "step": 24835 + }, + { + "epoch": 1.16, + "learning_rate": 1.6264950699920056e-05, + "loss": 0.0913, + "step": 24840 + }, + { + "epoch": 1.16, + "learning_rate": 1.626416691486527e-05, + "loss": 0.09, + "step": 24845 + }, + { + "epoch": 1.16, + "learning_rate": 1.626338312981048e-05, + "loss": 0.1174, + "step": 24850 + }, + { + "epoch": 1.16, + "learning_rate": 1.6262599344755697e-05, + "loss": 0.0969, + "step": 24855 + }, + { + "epoch": 1.16, + "learning_rate": 1.6261815559700908e-05, + "loss": 0.1173, + "step": 24860 + }, + { + "epoch": 1.16, + "learning_rate": 1.6261031774646122e-05, + "loss": 0.222, + "step": 24865 + }, + { + "epoch": 1.16, + "learning_rate": 1.6260247989591336e-05, + "loss": 0.2767, + "step": 24870 + }, + { + "epoch": 1.16, + "learning_rate": 1.625946420453655e-05, + "loss": 0.1782, + "step": 24875 + }, + { + "epoch": 1.16, + "learning_rate": 1.6258680419481764e-05, + "loss": 0.2277, + "step": 24880 + }, + { + "epoch": 1.16, + "learning_rate": 1.6257896634426974e-05, + "loss": 0.2343, + "step": 24885 + }, + { + "epoch": 1.16, + "learning_rate": 1.6257112849372188e-05, + "loss": 0.3339, + "step": 24890 + }, + { + "epoch": 1.16, + "learning_rate": 1.6256329064317402e-05, + "loss": 0.0648, + "step": 24895 + }, + { + "epoch": 1.16, + "learning_rate": 1.6255545279262616e-05, + "loss": 0.1105, + "step": 24900 + }, + { + "epoch": 1.16, + "learning_rate": 1.625476149420783e-05, + "loss": 0.1184, + "step": 24905 + }, + { + "epoch": 1.16, + "learning_rate": 1.6253977709153044e-05, + "loss": 0.166, + "step": 24910 + }, + { + "epoch": 1.16, + "learning_rate": 1.6253193924098258e-05, + "loss": 0.098, + "step": 24915 + }, + { + "epoch": 1.16, + "learning_rate": 1.625241013904347e-05, + "loss": 0.2253, + "step": 24920 + }, + { + "epoch": 1.16, + "learning_rate": 1.6251626353988682e-05, + "loss": 0.2517, + "step": 24925 + }, + { + "epoch": 1.16, + "learning_rate": 1.62508425689339e-05, + "loss": 0.287, + "step": 24930 + }, + { + "epoch": 1.16, + "learning_rate": 1.625005878387911e-05, + "loss": 0.3426, + "step": 24935 + }, + { + "epoch": 1.16, + "learning_rate": 1.6249274998824324e-05, + "loss": 0.0747, + "step": 24940 + }, + { + "epoch": 1.16, + "learning_rate": 1.6248491213769538e-05, + "loss": 0.0584, + "step": 24945 + }, + { + "epoch": 1.16, + "learning_rate": 1.6247707428714748e-05, + "loss": 0.1144, + "step": 24950 + }, + { + "epoch": 1.16, + "learning_rate": 1.6246923643659965e-05, + "loss": 0.1062, + "step": 24955 + }, + { + "epoch": 1.16, + "learning_rate": 1.6246139858605176e-05, + "loss": 0.1777, + "step": 24960 + }, + { + "epoch": 1.16, + "learning_rate": 1.624535607355039e-05, + "loss": 0.1773, + "step": 24965 + }, + { + "epoch": 1.17, + "learning_rate": 1.6244572288495604e-05, + "loss": 0.2313, + "step": 24970 + }, + { + "epoch": 1.17, + "learning_rate": 1.6243788503440818e-05, + "loss": 0.2795, + "step": 24975 + }, + { + "epoch": 1.17, + "learning_rate": 1.624300471838603e-05, + "loss": 0.3885, + "step": 24980 + }, + { + "epoch": 1.17, + "learning_rate": 1.6242220933331245e-05, + "loss": 0.3908, + "step": 24985 + }, + { + "epoch": 1.17, + "learning_rate": 1.6241437148276456e-05, + "loss": 0.0605, + "step": 24990 + }, + { + "epoch": 1.17, + "learning_rate": 1.6240653363221673e-05, + "loss": 0.085, + "step": 24995 + }, + { + "epoch": 1.17, + "learning_rate": 1.6239869578166884e-05, + "loss": 0.1085, + "step": 25000 + }, + { + "epoch": 1.17, + "learning_rate": 1.6239085793112098e-05, + "loss": 0.0463, + "step": 25005 + }, + { + "epoch": 1.17, + "learning_rate": 1.623830200805731e-05, + "loss": 0.1531, + "step": 25010 + }, + { + "epoch": 1.17, + "learning_rate": 1.6237518223002526e-05, + "loss": 0.2199, + "step": 25015 + }, + { + "epoch": 1.17, + "learning_rate": 1.623673443794774e-05, + "loss": 0.2393, + "step": 25020 + }, + { + "epoch": 1.17, + "learning_rate": 1.623595065289295e-05, + "loss": 0.3733, + "step": 25025 + }, + { + "epoch": 1.17, + "learning_rate": 1.6235166867838167e-05, + "loss": 0.3225, + "step": 25030 + }, + { + "epoch": 1.17, + "learning_rate": 1.6234383082783378e-05, + "loss": 0.2117, + "step": 25035 + }, + { + "epoch": 1.17, + "learning_rate": 1.623359929772859e-05, + "loss": 0.0419, + "step": 25040 + }, + { + "epoch": 1.17, + "learning_rate": 1.6232815512673806e-05, + "loss": 0.1021, + "step": 25045 + }, + { + "epoch": 1.17, + "learning_rate": 1.623203172761902e-05, + "loss": 0.1235, + "step": 25050 + }, + { + "epoch": 1.17, + "learning_rate": 1.6231247942564233e-05, + "loss": 0.1249, + "step": 25055 + }, + { + "epoch": 1.17, + "learning_rate": 1.6230464157509447e-05, + "loss": 0.192, + "step": 25060 + }, + { + "epoch": 1.17, + "learning_rate": 1.6229680372454658e-05, + "loss": 0.1072, + "step": 25065 + }, + { + "epoch": 1.17, + "learning_rate": 1.6228896587399875e-05, + "loss": 0.1035, + "step": 25070 + }, + { + "epoch": 1.17, + "learning_rate": 1.6228112802345086e-05, + "loss": 0.2934, + "step": 25075 + }, + { + "epoch": 1.17, + "learning_rate": 1.62273290172903e-05, + "loss": 0.4639, + "step": 25080 + }, + { + "epoch": 1.17, + "learning_rate": 1.6226545232235513e-05, + "loss": 0.235, + "step": 25085 + }, + { + "epoch": 1.17, + "learning_rate": 1.6225761447180727e-05, + "loss": 0.0493, + "step": 25090 + }, + { + "epoch": 1.17, + "learning_rate": 1.622497766212594e-05, + "loss": 0.0572, + "step": 25095 + }, + { + "epoch": 1.17, + "learning_rate": 1.6224193877071152e-05, + "loss": 0.0987, + "step": 25100 + }, + { + "epoch": 1.17, + "learning_rate": 1.6223410092016366e-05, + "loss": 0.1056, + "step": 25105 + }, + { + "epoch": 1.17, + "learning_rate": 1.622262630696158e-05, + "loss": 0.1613, + "step": 25110 + }, + { + "epoch": 1.17, + "learning_rate": 1.6221842521906793e-05, + "loss": 0.1629, + "step": 25115 + }, + { + "epoch": 1.17, + "learning_rate": 1.6221058736852007e-05, + "loss": 0.2078, + "step": 25120 + }, + { + "epoch": 1.17, + "learning_rate": 1.622027495179722e-05, + "loss": 0.1947, + "step": 25125 + }, + { + "epoch": 1.17, + "learning_rate": 1.6219491166742435e-05, + "loss": 0.3467, + "step": 25130 + }, + { + "epoch": 1.17, + "learning_rate": 1.621870738168765e-05, + "loss": 0.2284, + "step": 25135 + }, + { + "epoch": 1.17, + "learning_rate": 1.621792359663286e-05, + "loss": 0.0624, + "step": 25140 + }, + { + "epoch": 1.17, + "learning_rate": 1.6217139811578074e-05, + "loss": 0.0692, + "step": 25145 + }, + { + "epoch": 1.17, + "learning_rate": 1.6216356026523287e-05, + "loss": 0.1222, + "step": 25150 + }, + { + "epoch": 1.17, + "learning_rate": 1.62155722414685e-05, + "loss": 0.1692, + "step": 25155 + }, + { + "epoch": 1.17, + "learning_rate": 1.6214788456413715e-05, + "loss": 0.1629, + "step": 25160 + }, + { + "epoch": 1.17, + "learning_rate": 1.6214004671358926e-05, + "loss": 0.1671, + "step": 25165 + }, + { + "epoch": 1.17, + "learning_rate": 1.6213220886304143e-05, + "loss": 0.2507, + "step": 25170 + }, + { + "epoch": 1.17, + "learning_rate": 1.6212437101249354e-05, + "loss": 0.2868, + "step": 25175 + }, + { + "epoch": 1.17, + "learning_rate": 1.6211653316194567e-05, + "loss": 0.3525, + "step": 25180 + }, + { + "epoch": 1.18, + "learning_rate": 1.621086953113978e-05, + "loss": 0.3272, + "step": 25185 + }, + { + "epoch": 1.18, + "learning_rate": 1.6210085746084995e-05, + "loss": 0.0452, + "step": 25190 + }, + { + "epoch": 1.18, + "learning_rate": 1.620930196103021e-05, + "loss": 0.0713, + "step": 25195 + }, + { + "epoch": 1.18, + "learning_rate": 1.6208518175975423e-05, + "loss": 0.0912, + "step": 25200 + }, + { + "epoch": 1.18, + "learning_rate": 1.6207734390920634e-05, + "loss": 0.1543, + "step": 25205 + }, + { + "epoch": 1.18, + "learning_rate": 1.6206950605865848e-05, + "loss": 0.1041, + "step": 25210 + }, + { + "epoch": 1.18, + "learning_rate": 1.620616682081106e-05, + "loss": 0.141, + "step": 25215 + }, + { + "epoch": 1.18, + "learning_rate": 1.6205383035756275e-05, + "loss": 0.1966, + "step": 25220 + }, + { + "epoch": 1.18, + "learning_rate": 1.620459925070149e-05, + "loss": 0.3014, + "step": 25225 + }, + { + "epoch": 1.18, + "learning_rate": 1.6203815465646703e-05, + "loss": 0.4792, + "step": 25230 + }, + { + "epoch": 1.18, + "learning_rate": 1.6203031680591917e-05, + "loss": 0.2327, + "step": 25235 + }, + { + "epoch": 1.18, + "learning_rate": 1.6202247895537128e-05, + "loss": 0.0362, + "step": 25240 + }, + { + "epoch": 1.18, + "learning_rate": 1.6201464110482345e-05, + "loss": 0.1217, + "step": 25245 + }, + { + "epoch": 1.18, + "learning_rate": 1.6200680325427555e-05, + "loss": 0.0556, + "step": 25250 + }, + { + "epoch": 1.18, + "learning_rate": 1.619989654037277e-05, + "loss": 0.1454, + "step": 25255 + }, + { + "epoch": 1.18, + "learning_rate": 1.6199112755317983e-05, + "loss": 0.1041, + "step": 25260 + }, + { + "epoch": 1.18, + "learning_rate": 1.6198328970263197e-05, + "loss": 0.1451, + "step": 25265 + }, + { + "epoch": 1.18, + "learning_rate": 1.619754518520841e-05, + "loss": 0.2237, + "step": 25270 + }, + { + "epoch": 1.18, + "learning_rate": 1.619691815716458e-05, + "loss": 0.2745, + "step": 25275 + }, + { + "epoch": 1.18, + "learning_rate": 1.6196134372109792e-05, + "loss": 0.4215, + "step": 25280 + }, + { + "epoch": 1.18, + "learning_rate": 1.619535058705501e-05, + "loss": 0.481, + "step": 25285 + }, + { + "epoch": 1.18, + "learning_rate": 1.619456680200022e-05, + "loss": 0.0337, + "step": 25290 + }, + { + "epoch": 1.18, + "learning_rate": 1.6193783016945434e-05, + "loss": 0.102, + "step": 25295 + }, + { + "epoch": 1.18, + "learning_rate": 1.6192999231890648e-05, + "loss": 0.0853, + "step": 25300 + }, + { + "epoch": 1.18, + "learning_rate": 1.619221544683586e-05, + "loss": 0.1298, + "step": 25305 + }, + { + "epoch": 1.18, + "learning_rate": 1.6191431661781075e-05, + "loss": 0.151, + "step": 25310 + }, + { + "epoch": 1.18, + "learning_rate": 1.619064787672629e-05, + "loss": 0.1743, + "step": 25315 + }, + { + "epoch": 1.18, + "learning_rate": 1.61898640916715e-05, + "loss": 0.247, + "step": 25320 + }, + { + "epoch": 1.18, + "learning_rate": 1.6189080306616717e-05, + "loss": 0.2403, + "step": 25325 + }, + { + "epoch": 1.18, + "learning_rate": 1.6188296521561928e-05, + "loss": 0.4411, + "step": 25330 + }, + { + "epoch": 1.18, + "learning_rate": 1.618751273650714e-05, + "loss": 0.2575, + "step": 25335 + }, + { + "epoch": 1.18, + "learning_rate": 1.6186728951452355e-05, + "loss": 0.0465, + "step": 25340 + }, + { + "epoch": 1.18, + "learning_rate": 1.6185945166397566e-05, + "loss": 0.0688, + "step": 25345 + }, + { + "epoch": 1.18, + "learning_rate": 1.6185161381342783e-05, + "loss": 0.1129, + "step": 25350 + }, + { + "epoch": 1.18, + "learning_rate": 1.6184377596287994e-05, + "loss": 0.1276, + "step": 25355 + }, + { + "epoch": 1.18, + "learning_rate": 1.6183593811233208e-05, + "loss": 0.0864, + "step": 25360 + }, + { + "epoch": 1.18, + "learning_rate": 1.618281002617842e-05, + "loss": 0.1639, + "step": 25365 + }, + { + "epoch": 1.18, + "learning_rate": 1.6182026241123636e-05, + "loss": 0.1556, + "step": 25370 + }, + { + "epoch": 1.18, + "learning_rate": 1.618124245606885e-05, + "loss": 0.3036, + "step": 25375 + }, + { + "epoch": 1.18, + "learning_rate": 1.6180458671014063e-05, + "loss": 0.3818, + "step": 25380 + }, + { + "epoch": 1.18, + "learning_rate": 1.6179674885959277e-05, + "loss": 0.2293, + "step": 25385 + }, + { + "epoch": 1.18, + "learning_rate": 1.617889110090449e-05, + "loss": 0.0748, + "step": 25390 + }, + { + "epoch": 1.18, + "learning_rate": 1.61781073158497e-05, + "loss": 0.1107, + "step": 25395 + }, + { + "epoch": 1.19, + "learning_rate": 1.6177323530794916e-05, + "loss": 0.166, + "step": 25400 + }, + { + "epoch": 1.19, + "learning_rate": 1.617653974574013e-05, + "loss": 0.0938, + "step": 25405 + }, + { + "epoch": 1.19, + "learning_rate": 1.6175755960685343e-05, + "loss": 0.1765, + "step": 25410 + }, + { + "epoch": 1.19, + "learning_rate": 1.6174972175630557e-05, + "loss": 0.1421, + "step": 25415 + }, + { + "epoch": 1.19, + "learning_rate": 1.6174188390575768e-05, + "loss": 0.169, + "step": 25420 + }, + { + "epoch": 1.19, + "learning_rate": 1.6173404605520985e-05, + "loss": 0.2901, + "step": 25425 + }, + { + "epoch": 1.19, + "learning_rate": 1.6172620820466196e-05, + "loss": 0.4629, + "step": 25430 + }, + { + "epoch": 1.19, + "learning_rate": 1.617183703541141e-05, + "loss": 0.4004, + "step": 25435 + }, + { + "epoch": 1.19, + "learning_rate": 1.6171053250356623e-05, + "loss": 0.0532, + "step": 25440 + }, + { + "epoch": 1.19, + "learning_rate": 1.6170269465301837e-05, + "loss": 0.0672, + "step": 25445 + }, + { + "epoch": 1.19, + "learning_rate": 1.616948568024705e-05, + "loss": 0.1277, + "step": 25450 + }, + { + "epoch": 1.19, + "learning_rate": 1.6168701895192265e-05, + "loss": 0.0931, + "step": 25455 + }, + { + "epoch": 1.19, + "learning_rate": 1.6167918110137476e-05, + "loss": 0.1628, + "step": 25460 + }, + { + "epoch": 1.19, + "learning_rate": 1.616713432508269e-05, + "loss": 0.1642, + "step": 25465 + }, + { + "epoch": 1.19, + "learning_rate": 1.6166350540027903e-05, + "loss": 0.1649, + "step": 25470 + }, + { + "epoch": 1.19, + "learning_rate": 1.6165566754973117e-05, + "loss": 0.2531, + "step": 25475 + }, + { + "epoch": 1.19, + "learning_rate": 1.616478296991833e-05, + "loss": 0.4141, + "step": 25480 + }, + { + "epoch": 1.19, + "learning_rate": 1.6163999184863545e-05, + "loss": 0.3355, + "step": 25485 + }, + { + "epoch": 1.19, + "learning_rate": 1.616321539980876e-05, + "loss": 0.0631, + "step": 25490 + }, + { + "epoch": 1.19, + "learning_rate": 1.616243161475397e-05, + "loss": 0.0915, + "step": 25495 + }, + { + "epoch": 1.19, + "learning_rate": 1.6161647829699187e-05, + "loss": 0.1369, + "step": 25500 + }, + { + "epoch": 1.19, + "learning_rate": 1.6160864044644397e-05, + "loss": 0.1394, + "step": 25505 + }, + { + "epoch": 1.19, + "learning_rate": 1.616008025958961e-05, + "loss": 0.096, + "step": 25510 + }, + { + "epoch": 1.19, + "learning_rate": 1.6159296474534825e-05, + "loss": 0.1094, + "step": 25515 + }, + { + "epoch": 1.19, + "learning_rate": 1.615851268948004e-05, + "loss": 0.1217, + "step": 25520 + }, + { + "epoch": 1.19, + "learning_rate": 1.6157728904425253e-05, + "loss": 0.2451, + "step": 25525 + }, + { + "epoch": 1.19, + "learning_rate": 1.6156945119370464e-05, + "loss": 0.2088, + "step": 25530 + }, + { + "epoch": 1.19, + "learning_rate": 1.6156161334315677e-05, + "loss": 0.2696, + "step": 25535 + }, + { + "epoch": 1.19, + "learning_rate": 1.615537754926089e-05, + "loss": 0.0968, + "step": 25540 + }, + { + "epoch": 1.19, + "learning_rate": 1.6154593764206105e-05, + "loss": 0.0515, + "step": 25545 + }, + { + "epoch": 1.19, + "learning_rate": 1.615380997915132e-05, + "loss": 0.0912, + "step": 25550 + }, + { + "epoch": 1.19, + "learning_rate": 1.6153026194096533e-05, + "loss": 0.1158, + "step": 25555 + }, + { + "epoch": 1.19, + "learning_rate": 1.6152242409041744e-05, + "loss": 0.1445, + "step": 25560 + }, + { + "epoch": 1.19, + "learning_rate": 1.615145862398696e-05, + "loss": 0.1769, + "step": 25565 + }, + { + "epoch": 1.19, + "learning_rate": 1.615067483893217e-05, + "loss": 0.1704, + "step": 25570 + }, + { + "epoch": 1.19, + "learning_rate": 1.6149891053877385e-05, + "loss": 0.2812, + "step": 25575 + }, + { + "epoch": 1.19, + "learning_rate": 1.61491072688226e-05, + "loss": 0.3944, + "step": 25580 + }, + { + "epoch": 1.19, + "learning_rate": 1.6148323483767813e-05, + "loss": 0.2521, + "step": 25585 + }, + { + "epoch": 1.19, + "learning_rate": 1.6147539698713027e-05, + "loss": 0.0945, + "step": 25590 + }, + { + "epoch": 1.19, + "learning_rate": 1.6146755913658238e-05, + "loss": 0.1083, + "step": 25595 + }, + { + "epoch": 1.19, + "learning_rate": 1.6145972128603455e-05, + "loss": 0.0934, + "step": 25600 + }, + { + "epoch": 1.19, + "learning_rate": 1.6145188343548665e-05, + "loss": 0.1016, + "step": 25605 + }, + { + "epoch": 1.19, + "learning_rate": 1.614440455849388e-05, + "loss": 0.1465, + "step": 25610 + }, + { + "epoch": 1.2, + "learning_rate": 1.6143620773439093e-05, + "loss": 0.1531, + "step": 25615 + }, + { + "epoch": 1.2, + "learning_rate": 1.6142836988384307e-05, + "loss": 0.1764, + "step": 25620 + }, + { + "epoch": 1.2, + "learning_rate": 1.614205320332952e-05, + "loss": 0.2381, + "step": 25625 + }, + { + "epoch": 1.2, + "learning_rate": 1.6141269418274735e-05, + "loss": 0.4556, + "step": 25630 + }, + { + "epoch": 1.2, + "learning_rate": 1.6140485633219945e-05, + "loss": 0.3126, + "step": 25635 + }, + { + "epoch": 1.2, + "learning_rate": 1.6139701848165163e-05, + "loss": 0.0332, + "step": 25640 + }, + { + "epoch": 1.2, + "learning_rate": 1.6138918063110373e-05, + "loss": 0.0406, + "step": 25645 + }, + { + "epoch": 1.2, + "learning_rate": 1.6138134278055587e-05, + "loss": 0.1147, + "step": 25650 + }, + { + "epoch": 1.2, + "learning_rate": 1.61373504930008e-05, + "loss": 0.1279, + "step": 25655 + }, + { + "epoch": 1.2, + "learning_rate": 1.613656670794601e-05, + "loss": 0.1488, + "step": 25660 + }, + { + "epoch": 1.2, + "learning_rate": 1.613578292289123e-05, + "loss": 0.0947, + "step": 25665 + }, + { + "epoch": 1.2, + "learning_rate": 1.613499913783644e-05, + "loss": 0.147, + "step": 25670 + }, + { + "epoch": 1.2, + "learning_rate": 1.6134215352781653e-05, + "loss": 0.2196, + "step": 25675 + }, + { + "epoch": 1.2, + "learning_rate": 1.6133431567726867e-05, + "loss": 0.3325, + "step": 25680 + }, + { + "epoch": 1.2, + "learning_rate": 1.613264778267208e-05, + "loss": 0.3548, + "step": 25685 + }, + { + "epoch": 1.2, + "learning_rate": 1.6131863997617295e-05, + "loss": 0.0497, + "step": 25690 + }, + { + "epoch": 1.2, + "learning_rate": 1.613108021256251e-05, + "loss": 0.0752, + "step": 25695 + }, + { + "epoch": 1.2, + "learning_rate": 1.6130296427507723e-05, + "loss": 0.0716, + "step": 25700 + }, + { + "epoch": 1.2, + "learning_rate": 1.6129512642452937e-05, + "loss": 0.1445, + "step": 25705 + }, + { + "epoch": 1.2, + "learning_rate": 1.6128728857398147e-05, + "loss": 0.1371, + "step": 25710 + }, + { + "epoch": 1.2, + "learning_rate": 1.6128101829354318e-05, + "loss": 0.1388, + "step": 25715 + }, + { + "epoch": 1.2, + "learning_rate": 1.6127318044299535e-05, + "loss": 0.1818, + "step": 25720 + }, + { + "epoch": 1.2, + "learning_rate": 1.6126534259244745e-05, + "loss": 0.1747, + "step": 25725 + }, + { + "epoch": 1.2, + "learning_rate": 1.612575047418996e-05, + "loss": 0.2925, + "step": 25730 + }, + { + "epoch": 1.2, + "learning_rate": 1.6124966689135173e-05, + "loss": 0.212, + "step": 25735 + }, + { + "epoch": 1.2, + "learning_rate": 1.6124182904080384e-05, + "loss": 0.07, + "step": 25740 + }, + { + "epoch": 1.2, + "learning_rate": 1.61233991190256e-05, + "loss": 0.0871, + "step": 25745 + }, + { + "epoch": 1.2, + "learning_rate": 1.612261533397081e-05, + "loss": 0.1572, + "step": 25750 + }, + { + "epoch": 1.2, + "learning_rate": 1.6121831548916026e-05, + "loss": 0.1053, + "step": 25755 + }, + { + "epoch": 1.2, + "learning_rate": 1.612104776386124e-05, + "loss": 0.1303, + "step": 25760 + }, + { + "epoch": 1.2, + "learning_rate": 1.6120263978806453e-05, + "loss": 0.2018, + "step": 25765 + }, + { + "epoch": 1.2, + "learning_rate": 1.6119480193751667e-05, + "loss": 0.2664, + "step": 25770 + }, + { + "epoch": 1.2, + "learning_rate": 1.611869640869688e-05, + "loss": 0.2745, + "step": 25775 + }, + { + "epoch": 1.2, + "learning_rate": 1.6117912623642095e-05, + "loss": 0.4014, + "step": 25780 + }, + { + "epoch": 1.2, + "learning_rate": 1.611712883858731e-05, + "loss": 0.2742, + "step": 25785 + }, + { + "epoch": 1.2, + "learning_rate": 1.611634505353252e-05, + "loss": 0.094, + "step": 25790 + }, + { + "epoch": 1.2, + "learning_rate": 1.6115561268477733e-05, + "loss": 0.1958, + "step": 25795 + }, + { + "epoch": 1.2, + "learning_rate": 1.6114777483422947e-05, + "loss": 0.0788, + "step": 25800 + }, + { + "epoch": 1.2, + "learning_rate": 1.611399369836816e-05, + "loss": 0.106, + "step": 25805 + }, + { + "epoch": 1.2, + "learning_rate": 1.6113209913313375e-05, + "loss": 0.1569, + "step": 25810 + }, + { + "epoch": 1.2, + "learning_rate": 1.6112426128258586e-05, + "loss": 0.126, + "step": 25815 + }, + { + "epoch": 1.2, + "learning_rate": 1.6111642343203803e-05, + "loss": 0.1446, + "step": 25820 + }, + { + "epoch": 1.21, + "learning_rate": 1.6110858558149013e-05, + "loss": 0.3583, + "step": 25825 + }, + { + "epoch": 1.21, + "learning_rate": 1.6110074773094227e-05, + "loss": 0.3009, + "step": 25830 + }, + { + "epoch": 1.21, + "learning_rate": 1.610929098803944e-05, + "loss": 0.3158, + "step": 25835 + }, + { + "epoch": 1.21, + "learning_rate": 1.6108507202984655e-05, + "loss": 0.1105, + "step": 25840 + }, + { + "epoch": 1.21, + "learning_rate": 1.610772341792987e-05, + "loss": 0.0567, + "step": 25845 + }, + { + "epoch": 1.21, + "learning_rate": 1.6106939632875083e-05, + "loss": 0.0254, + "step": 25850 + }, + { + "epoch": 1.21, + "learning_rate": 1.6106155847820293e-05, + "loss": 0.1127, + "step": 25855 + }, + { + "epoch": 1.21, + "learning_rate": 1.6105372062765507e-05, + "loss": 0.165, + "step": 25860 + }, + { + "epoch": 1.21, + "learning_rate": 1.610458827771072e-05, + "loss": 0.1691, + "step": 25865 + }, + { + "epoch": 1.21, + "learning_rate": 1.6103804492655935e-05, + "loss": 0.1748, + "step": 25870 + }, + { + "epoch": 1.21, + "learning_rate": 1.610302070760115e-05, + "loss": 0.3087, + "step": 25875 + }, + { + "epoch": 1.21, + "learning_rate": 1.6102236922546363e-05, + "loss": 0.3935, + "step": 25880 + }, + { + "epoch": 1.21, + "learning_rate": 1.6101453137491577e-05, + "loss": 0.3608, + "step": 25885 + }, + { + "epoch": 1.21, + "learning_rate": 1.6100669352436787e-05, + "loss": 0.0722, + "step": 25890 + }, + { + "epoch": 1.21, + "learning_rate": 1.6099885567382005e-05, + "loss": 0.1239, + "step": 25895 + }, + { + "epoch": 1.21, + "learning_rate": 1.6099101782327215e-05, + "loss": 0.0837, + "step": 25900 + }, + { + "epoch": 1.21, + "learning_rate": 1.609831799727243e-05, + "loss": 0.1534, + "step": 25905 + }, + { + "epoch": 1.21, + "learning_rate": 1.6097534212217643e-05, + "loss": 0.1598, + "step": 25910 + }, + { + "epoch": 1.21, + "learning_rate": 1.6096750427162857e-05, + "loss": 0.1712, + "step": 25915 + }, + { + "epoch": 1.21, + "learning_rate": 1.609596664210807e-05, + "loss": 0.1853, + "step": 25920 + }, + { + "epoch": 1.21, + "learning_rate": 1.609518285705328e-05, + "loss": 0.2454, + "step": 25925 + }, + { + "epoch": 1.21, + "learning_rate": 1.6094399071998495e-05, + "loss": 0.303, + "step": 25930 + }, + { + "epoch": 1.21, + "learning_rate": 1.609361528694371e-05, + "loss": 0.3649, + "step": 25935 + }, + { + "epoch": 1.21, + "learning_rate": 1.6092831501888923e-05, + "loss": 0.0425, + "step": 25940 + }, + { + "epoch": 1.21, + "learning_rate": 1.6092047716834137e-05, + "loss": 0.1058, + "step": 25945 + }, + { + "epoch": 1.21, + "learning_rate": 1.609126393177935e-05, + "loss": 0.0823, + "step": 25950 + }, + { + "epoch": 1.21, + "learning_rate": 1.609048014672456e-05, + "loss": 0.0905, + "step": 25955 + }, + { + "epoch": 1.21, + "learning_rate": 1.608969636166978e-05, + "loss": 0.1479, + "step": 25960 + }, + { + "epoch": 1.21, + "learning_rate": 1.608891257661499e-05, + "loss": 0.1194, + "step": 25965 + }, + { + "epoch": 1.21, + "learning_rate": 1.6088128791560203e-05, + "loss": 0.2551, + "step": 25970 + }, + { + "epoch": 1.21, + "learning_rate": 1.6087345006505417e-05, + "loss": 0.2567, + "step": 25975 + }, + { + "epoch": 1.21, + "learning_rate": 1.608656122145063e-05, + "loss": 0.4998, + "step": 25980 + }, + { + "epoch": 1.21, + "learning_rate": 1.6085777436395845e-05, + "loss": 0.3416, + "step": 25985 + }, + { + "epoch": 1.21, + "learning_rate": 1.6084993651341055e-05, + "loss": 0.0866, + "step": 25990 + }, + { + "epoch": 1.21, + "learning_rate": 1.6084209866286273e-05, + "loss": 0.1128, + "step": 25995 + }, + { + "epoch": 1.21, + "learning_rate": 1.6083426081231483e-05, + "loss": 0.1394, + "step": 26000 + }, + { + "epoch": 1.21, + "learning_rate": 1.6082642296176697e-05, + "loss": 0.163, + "step": 26005 + }, + { + "epoch": 1.21, + "learning_rate": 1.608185851112191e-05, + "loss": 0.1127, + "step": 26010 + }, + { + "epoch": 1.21, + "learning_rate": 1.6081074726067125e-05, + "loss": 0.1923, + "step": 26015 + }, + { + "epoch": 1.21, + "learning_rate": 1.608029094101234e-05, + "loss": 0.1738, + "step": 26020 + }, + { + "epoch": 1.21, + "learning_rate": 1.6079507155957553e-05, + "loss": 0.2733, + "step": 26025 + }, + { + "epoch": 1.21, + "learning_rate": 1.6078723370902763e-05, + "loss": 0.3812, + "step": 26030 + }, + { + "epoch": 1.21, + "learning_rate": 1.607793958584798e-05, + "loss": 0.2911, + "step": 26035 + }, + { + "epoch": 1.22, + "learning_rate": 1.607715580079319e-05, + "loss": 0.0426, + "step": 26040 + }, + { + "epoch": 1.22, + "learning_rate": 1.6076372015738405e-05, + "loss": 0.1156, + "step": 26045 + }, + { + "epoch": 1.22, + "learning_rate": 1.607558823068362e-05, + "loss": 0.1092, + "step": 26050 + }, + { + "epoch": 1.22, + "learning_rate": 1.607480444562883e-05, + "loss": 0.1071, + "step": 26055 + }, + { + "epoch": 1.22, + "learning_rate": 1.6074020660574047e-05, + "loss": 0.1825, + "step": 26060 + }, + { + "epoch": 1.22, + "learning_rate": 1.6073236875519257e-05, + "loss": 0.2019, + "step": 26065 + }, + { + "epoch": 1.22, + "learning_rate": 1.607245309046447e-05, + "loss": 0.2312, + "step": 26070 + }, + { + "epoch": 1.22, + "learning_rate": 1.6071669305409685e-05, + "loss": 0.1766, + "step": 26075 + }, + { + "epoch": 1.22, + "learning_rate": 1.60708855203549e-05, + "loss": 0.3091, + "step": 26080 + }, + { + "epoch": 1.22, + "learning_rate": 1.6070101735300113e-05, + "loss": 0.35, + "step": 26085 + }, + { + "epoch": 1.22, + "learning_rate": 1.6069317950245327e-05, + "loss": 0.0862, + "step": 26090 + }, + { + "epoch": 1.22, + "learning_rate": 1.606853416519054e-05, + "loss": 0.0783, + "step": 26095 + }, + { + "epoch": 1.22, + "learning_rate": 1.6067750380135755e-05, + "loss": 0.0974, + "step": 26100 + }, + { + "epoch": 1.22, + "learning_rate": 1.6066966595080965e-05, + "loss": 0.0428, + "step": 26105 + }, + { + "epoch": 1.22, + "learning_rate": 1.6066182810026182e-05, + "loss": 0.1272, + "step": 26110 + }, + { + "epoch": 1.22, + "learning_rate": 1.6065399024971393e-05, + "loss": 0.1728, + "step": 26115 + }, + { + "epoch": 1.22, + "learning_rate": 1.6064615239916607e-05, + "loss": 0.2236, + "step": 26120 + }, + { + "epoch": 1.22, + "learning_rate": 1.606383145486182e-05, + "loss": 0.3677, + "step": 26125 + }, + { + "epoch": 1.22, + "learning_rate": 1.606304766980703e-05, + "loss": 0.4215, + "step": 26130 + }, + { + "epoch": 1.22, + "learning_rate": 1.606226388475225e-05, + "loss": 0.3085, + "step": 26135 + }, + { + "epoch": 1.22, + "learning_rate": 1.606148009969746e-05, + "loss": 0.0464, + "step": 26140 + }, + { + "epoch": 1.22, + "learning_rate": 1.6060696314642673e-05, + "loss": 0.0697, + "step": 26145 + }, + { + "epoch": 1.22, + "learning_rate": 1.6059912529587887e-05, + "loss": 0.1091, + "step": 26150 + }, + { + "epoch": 1.22, + "learning_rate": 1.60591287445331e-05, + "loss": 0.1073, + "step": 26155 + }, + { + "epoch": 1.22, + "learning_rate": 1.6058344959478315e-05, + "loss": 0.1585, + "step": 26160 + }, + { + "epoch": 1.22, + "learning_rate": 1.605756117442353e-05, + "loss": 0.1376, + "step": 26165 + }, + { + "epoch": 1.22, + "learning_rate": 1.605677738936874e-05, + "loss": 0.2462, + "step": 26170 + }, + { + "epoch": 1.22, + "learning_rate": 1.6055993604313956e-05, + "loss": 0.1889, + "step": 26175 + }, + { + "epoch": 1.22, + "learning_rate": 1.6055209819259167e-05, + "loss": 0.3838, + "step": 26180 + }, + { + "epoch": 1.22, + "learning_rate": 1.605442603420438e-05, + "loss": 0.3044, + "step": 26185 + }, + { + "epoch": 1.22, + "learning_rate": 1.6053642249149595e-05, + "loss": 0.0562, + "step": 26190 + }, + { + "epoch": 1.22, + "learning_rate": 1.605285846409481e-05, + "loss": 0.0978, + "step": 26195 + }, + { + "epoch": 1.22, + "learning_rate": 1.6052074679040022e-05, + "loss": 0.1215, + "step": 26200 + }, + { + "epoch": 1.22, + "learning_rate": 1.6051290893985233e-05, + "loss": 0.1945, + "step": 26205 + }, + { + "epoch": 1.22, + "learning_rate": 1.605050710893045e-05, + "loss": 0.1485, + "step": 26210 + }, + { + "epoch": 1.22, + "learning_rate": 1.604972332387566e-05, + "loss": 0.0891, + "step": 26215 + }, + { + "epoch": 1.22, + "learning_rate": 1.6048939538820875e-05, + "loss": 0.1399, + "step": 26220 + }, + { + "epoch": 1.22, + "learning_rate": 1.604815575376609e-05, + "loss": 0.205, + "step": 26225 + }, + { + "epoch": 1.22, + "learning_rate": 1.6047371968711303e-05, + "loss": 0.3367, + "step": 26230 + }, + { + "epoch": 1.22, + "learning_rate": 1.6046588183656516e-05, + "loss": 0.2997, + "step": 26235 + }, + { + "epoch": 1.22, + "learning_rate": 1.604580439860173e-05, + "loss": 0.0592, + "step": 26240 + }, + { + "epoch": 1.22, + "learning_rate": 1.604502061354694e-05, + "loss": 0.0551, + "step": 26245 + }, + { + "epoch": 1.22, + "learning_rate": 1.6044236828492155e-05, + "loss": 0.0715, + "step": 26250 + }, + { + "epoch": 1.23, + "learning_rate": 1.604345304343737e-05, + "loss": 0.1212, + "step": 26255 + }, + { + "epoch": 1.23, + "learning_rate": 1.6042669258382583e-05, + "loss": 0.1241, + "step": 26260 + }, + { + "epoch": 1.23, + "learning_rate": 1.6041885473327796e-05, + "loss": 0.1764, + "step": 26265 + }, + { + "epoch": 1.23, + "learning_rate": 1.6041101688273007e-05, + "loss": 0.1995, + "step": 26270 + }, + { + "epoch": 1.23, + "learning_rate": 1.6040317903218224e-05, + "loss": 0.2772, + "step": 26275 + }, + { + "epoch": 1.23, + "learning_rate": 1.6039534118163435e-05, + "loss": 0.3426, + "step": 26280 + }, + { + "epoch": 1.23, + "learning_rate": 1.603875033310865e-05, + "loss": 0.2971, + "step": 26285 + }, + { + "epoch": 1.23, + "learning_rate": 1.6037966548053863e-05, + "loss": 0.065, + "step": 26290 + }, + { + "epoch": 1.23, + "learning_rate": 1.6037182762999077e-05, + "loss": 0.0287, + "step": 26295 + }, + { + "epoch": 1.23, + "learning_rate": 1.603639897794429e-05, + "loss": 0.1137, + "step": 26300 + }, + { + "epoch": 1.23, + "learning_rate": 1.6035615192889504e-05, + "loss": 0.1516, + "step": 26305 + }, + { + "epoch": 1.23, + "learning_rate": 1.6034831407834718e-05, + "loss": 0.0944, + "step": 26310 + }, + { + "epoch": 1.23, + "learning_rate": 1.603404762277993e-05, + "loss": 0.1105, + "step": 26315 + }, + { + "epoch": 1.23, + "learning_rate": 1.6033263837725143e-05, + "loss": 0.1635, + "step": 26320 + }, + { + "epoch": 1.23, + "learning_rate": 1.6032480052670357e-05, + "loss": 0.2127, + "step": 26325 + }, + { + "epoch": 1.23, + "learning_rate": 1.603169626761557e-05, + "loss": 0.4454, + "step": 26330 + }, + { + "epoch": 1.23, + "learning_rate": 1.6030912482560784e-05, + "loss": 0.3167, + "step": 26335 + }, + { + "epoch": 1.23, + "learning_rate": 1.6030128697505998e-05, + "loss": 0.0731, + "step": 26340 + }, + { + "epoch": 1.23, + "learning_rate": 1.602934491245121e-05, + "loss": 0.0499, + "step": 26345 + }, + { + "epoch": 1.23, + "learning_rate": 1.6028561127396426e-05, + "loss": 0.097, + "step": 26350 + }, + { + "epoch": 1.23, + "learning_rate": 1.6027777342341637e-05, + "loss": 0.0544, + "step": 26355 + }, + { + "epoch": 1.23, + "learning_rate": 1.602699355728685e-05, + "loss": 0.1498, + "step": 26360 + }, + { + "epoch": 1.23, + "learning_rate": 1.6026209772232064e-05, + "loss": 0.1206, + "step": 26365 + }, + { + "epoch": 1.23, + "learning_rate": 1.602542598717728e-05, + "loss": 0.2232, + "step": 26370 + }, + { + "epoch": 1.23, + "learning_rate": 1.6024642202122492e-05, + "loss": 0.3615, + "step": 26375 + }, + { + "epoch": 1.23, + "learning_rate": 1.6023858417067703e-05, + "loss": 0.3271, + "step": 26380 + }, + { + "epoch": 1.23, + "learning_rate": 1.6023074632012917e-05, + "loss": 0.3493, + "step": 26385 + }, + { + "epoch": 1.23, + "learning_rate": 1.602229084695813e-05, + "loss": 0.0708, + "step": 26390 + }, + { + "epoch": 1.23, + "learning_rate": 1.6021507061903344e-05, + "loss": 0.124, + "step": 26395 + }, + { + "epoch": 1.23, + "learning_rate": 1.602072327684856e-05, + "loss": 0.0928, + "step": 26400 + }, + { + "epoch": 1.23, + "learning_rate": 1.6019939491793772e-05, + "loss": 0.0961, + "step": 26405 + }, + { + "epoch": 1.23, + "learning_rate": 1.6019155706738986e-05, + "loss": 0.1516, + "step": 26410 + }, + { + "epoch": 1.23, + "learning_rate": 1.60183719216842e-05, + "loss": 0.1803, + "step": 26415 + }, + { + "epoch": 1.23, + "learning_rate": 1.601758813662941e-05, + "loss": 0.1668, + "step": 26420 + }, + { + "epoch": 1.23, + "learning_rate": 1.6016804351574628e-05, + "loss": 0.2315, + "step": 26425 + }, + { + "epoch": 1.23, + "learning_rate": 1.601602056651984e-05, + "loss": 0.353, + "step": 26430 + }, + { + "epoch": 1.23, + "learning_rate": 1.6015236781465052e-05, + "loss": 0.319, + "step": 26435 + }, + { + "epoch": 1.23, + "learning_rate": 1.6014452996410266e-05, + "loss": 0.0558, + "step": 26440 + }, + { + "epoch": 1.23, + "learning_rate": 1.6013669211355477e-05, + "loss": 0.1412, + "step": 26445 + }, + { + "epoch": 1.23, + "learning_rate": 1.6012885426300694e-05, + "loss": 0.0911, + "step": 26450 + }, + { + "epoch": 1.23, + "learning_rate": 1.6012101641245905e-05, + "loss": 0.0855, + "step": 26455 + }, + { + "epoch": 1.23, + "learning_rate": 1.601131785619112e-05, + "loss": 0.1342, + "step": 26460 + }, + { + "epoch": 1.23, + "learning_rate": 1.6010534071136332e-05, + "loss": 0.1502, + "step": 26465 + }, + { + "epoch": 1.24, + "learning_rate": 1.6009750286081546e-05, + "loss": 0.2149, + "step": 26470 + }, + { + "epoch": 1.24, + "learning_rate": 1.600896650102676e-05, + "loss": 0.1451, + "step": 26475 + }, + { + "epoch": 1.24, + "learning_rate": 1.6008182715971974e-05, + "loss": 0.38, + "step": 26480 + }, + { + "epoch": 1.24, + "learning_rate": 1.6007398930917185e-05, + "loss": 0.5451, + "step": 26485 + }, + { + "epoch": 1.24, + "learning_rate": 1.6006615145862402e-05, + "loss": 0.0726, + "step": 26490 + }, + { + "epoch": 1.24, + "learning_rate": 1.6005831360807612e-05, + "loss": 0.0716, + "step": 26495 + }, + { + "epoch": 1.24, + "learning_rate": 1.6005047575752826e-05, + "loss": 0.084, + "step": 26500 + }, + { + "epoch": 1.24, + "learning_rate": 1.600426379069804e-05, + "loss": 0.1345, + "step": 26505 + }, + { + "epoch": 1.24, + "learning_rate": 1.6003480005643254e-05, + "loss": 0.2791, + "step": 26510 + }, + { + "epoch": 1.24, + "learning_rate": 1.6002696220588468e-05, + "loss": 0.1281, + "step": 26515 + }, + { + "epoch": 1.24, + "learning_rate": 1.600191243553368e-05, + "loss": 0.2016, + "step": 26520 + }, + { + "epoch": 1.24, + "learning_rate": 1.6001128650478896e-05, + "loss": 0.3165, + "step": 26525 + }, + { + "epoch": 1.24, + "learning_rate": 1.6000344865424106e-05, + "loss": 0.336, + "step": 26530 + }, + { + "epoch": 1.24, + "learning_rate": 1.599956108036932e-05, + "loss": 0.3234, + "step": 26535 + }, + { + "epoch": 1.24, + "learning_rate": 1.5998777295314534e-05, + "loss": 0.1082, + "step": 26540 + }, + { + "epoch": 1.24, + "learning_rate": 1.5997993510259748e-05, + "loss": 0.0455, + "step": 26545 + }, + { + "epoch": 1.24, + "learning_rate": 1.5997209725204962e-05, + "loss": 0.1116, + "step": 26550 + }, + { + "epoch": 1.24, + "learning_rate": 1.5996425940150176e-05, + "loss": 0.0805, + "step": 26555 + }, + { + "epoch": 1.24, + "learning_rate": 1.5995642155095386e-05, + "loss": 0.1438, + "step": 26560 + }, + { + "epoch": 1.24, + "learning_rate": 1.5994858370040604e-05, + "loss": 0.1412, + "step": 26565 + }, + { + "epoch": 1.24, + "learning_rate": 1.5994074584985814e-05, + "loss": 0.2316, + "step": 26570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5993290799931028e-05, + "loss": 0.2124, + "step": 26575 + }, + { + "epoch": 1.24, + "learning_rate": 1.5992507014876242e-05, + "loss": 0.3504, + "step": 26580 + }, + { + "epoch": 1.24, + "learning_rate": 1.5991723229821453e-05, + "loss": 0.3047, + "step": 26585 + }, + { + "epoch": 1.24, + "learning_rate": 1.599093944476667e-05, + "loss": 0.0622, + "step": 26590 + }, + { + "epoch": 1.24, + "learning_rate": 1.599015565971188e-05, + "loss": 0.0547, + "step": 26595 + }, + { + "epoch": 1.24, + "learning_rate": 1.5989371874657094e-05, + "loss": 0.0788, + "step": 26600 + }, + { + "epoch": 1.24, + "learning_rate": 1.5988588089602308e-05, + "loss": 0.1388, + "step": 26605 + }, + { + "epoch": 1.24, + "learning_rate": 1.5987804304547522e-05, + "loss": 0.1345, + "step": 26610 + }, + { + "epoch": 1.24, + "learning_rate": 1.5987020519492736e-05, + "loss": 0.1711, + "step": 26615 + }, + { + "epoch": 1.24, + "learning_rate": 1.598623673443795e-05, + "loss": 0.209, + "step": 26620 + }, + { + "epoch": 1.24, + "learning_rate": 1.5985452949383164e-05, + "loss": 0.24, + "step": 26625 + }, + { + "epoch": 1.24, + "learning_rate": 1.5984669164328378e-05, + "loss": 0.2429, + "step": 26630 + }, + { + "epoch": 1.24, + "learning_rate": 1.5983885379273588e-05, + "loss": 0.2346, + "step": 26635 + }, + { + "epoch": 1.24, + "learning_rate": 1.5983101594218802e-05, + "loss": 0.052, + "step": 26640 + }, + { + "epoch": 1.24, + "learning_rate": 1.5982317809164016e-05, + "loss": 0.1405, + "step": 26645 + }, + { + "epoch": 1.24, + "learning_rate": 1.598153402410923e-05, + "loss": 0.0957, + "step": 26650 + }, + { + "epoch": 1.24, + "learning_rate": 1.5980750239054444e-05, + "loss": 0.0875, + "step": 26655 + }, + { + "epoch": 1.24, + "learning_rate": 1.5979966453999654e-05, + "loss": 0.0848, + "step": 26660 + }, + { + "epoch": 1.24, + "learning_rate": 1.597918266894487e-05, + "loss": 0.1606, + "step": 26665 + }, + { + "epoch": 1.24, + "learning_rate": 1.5978398883890082e-05, + "loss": 0.2839, + "step": 26670 + }, + { + "epoch": 1.24, + "learning_rate": 1.5977615098835296e-05, + "loss": 0.2471, + "step": 26675 + }, + { + "epoch": 1.24, + "learning_rate": 1.597683131378051e-05, + "loss": 0.3593, + "step": 26680 + }, + { + "epoch": 1.25, + "learning_rate": 1.5976047528725724e-05, + "loss": 0.3423, + "step": 26685 + }, + { + "epoch": 1.25, + "learning_rate": 1.5975263743670938e-05, + "loss": 0.0586, + "step": 26690 + }, + { + "epoch": 1.25, + "learning_rate": 1.5974479958616152e-05, + "loss": 0.0374, + "step": 26695 + }, + { + "epoch": 1.25, + "learning_rate": 1.5973696173561362e-05, + "loss": 0.1045, + "step": 26700 + }, + { + "epoch": 1.25, + "learning_rate": 1.5972912388506576e-05, + "loss": 0.1342, + "step": 26705 + }, + { + "epoch": 1.25, + "learning_rate": 1.597212860345179e-05, + "loss": 0.2293, + "step": 26710 + }, + { + "epoch": 1.25, + "learning_rate": 1.5971344818397004e-05, + "loss": 0.1674, + "step": 26715 + }, + { + "epoch": 1.25, + "learning_rate": 1.5970561033342218e-05, + "loss": 0.1991, + "step": 26720 + }, + { + "epoch": 1.25, + "learning_rate": 1.5969777248287432e-05, + "loss": 0.2317, + "step": 26725 + }, + { + "epoch": 1.25, + "learning_rate": 1.5968993463232646e-05, + "loss": 0.4123, + "step": 26730 + }, + { + "epoch": 1.25, + "learning_rate": 1.5968209678177856e-05, + "loss": 0.2453, + "step": 26735 + }, + { + "epoch": 1.25, + "learning_rate": 1.5967425893123073e-05, + "loss": 0.083, + "step": 26740 + }, + { + "epoch": 1.25, + "learning_rate": 1.5966642108068284e-05, + "loss": 0.0648, + "step": 26745 + }, + { + "epoch": 1.25, + "learning_rate": 1.5965858323013498e-05, + "loss": 0.1362, + "step": 26750 + }, + { + "epoch": 1.25, + "learning_rate": 1.5965074537958712e-05, + "loss": 0.1406, + "step": 26755 + }, + { + "epoch": 1.25, + "learning_rate": 1.5964290752903926e-05, + "loss": 0.1994, + "step": 26760 + }, + { + "epoch": 1.25, + "learning_rate": 1.596350696784914e-05, + "loss": 0.1565, + "step": 26765 + }, + { + "epoch": 1.25, + "learning_rate": 1.596272318279435e-05, + "loss": 0.2318, + "step": 26770 + }, + { + "epoch": 1.25, + "learning_rate": 1.5961939397739564e-05, + "loss": 0.2744, + "step": 26775 + }, + { + "epoch": 1.25, + "learning_rate": 1.5961155612684778e-05, + "loss": 0.4521, + "step": 26780 + }, + { + "epoch": 1.25, + "learning_rate": 1.5960371827629992e-05, + "loss": 0.2907, + "step": 26785 + }, + { + "epoch": 1.25, + "learning_rate": 1.5959588042575206e-05, + "loss": 0.086, + "step": 26790 + }, + { + "epoch": 1.25, + "learning_rate": 1.595880425752042e-05, + "loss": 0.0616, + "step": 26795 + }, + { + "epoch": 1.25, + "learning_rate": 1.595802047246563e-05, + "loss": 0.0885, + "step": 26800 + }, + { + "epoch": 1.25, + "learning_rate": 1.5957236687410847e-05, + "loss": 0.1038, + "step": 26805 + }, + { + "epoch": 1.25, + "learning_rate": 1.5956452902356058e-05, + "loss": 0.1311, + "step": 26810 + }, + { + "epoch": 1.25, + "learning_rate": 1.5955669117301272e-05, + "loss": 0.2754, + "step": 26815 + }, + { + "epoch": 1.25, + "learning_rate": 1.5954885332246486e-05, + "loss": 0.2622, + "step": 26820 + }, + { + "epoch": 1.25, + "learning_rate": 1.59541015471917e-05, + "loss": 0.3481, + "step": 26825 + }, + { + "epoch": 1.25, + "learning_rate": 1.5953317762136914e-05, + "loss": 0.3888, + "step": 26830 + }, + { + "epoch": 1.25, + "learning_rate": 1.5952533977082124e-05, + "loss": 0.3201, + "step": 26835 + }, + { + "epoch": 1.25, + "learning_rate": 1.595175019202734e-05, + "loss": 0.0431, + "step": 26840 + }, + { + "epoch": 1.25, + "learning_rate": 1.5950966406972552e-05, + "loss": 0.0601, + "step": 26845 + }, + { + "epoch": 1.25, + "learning_rate": 1.5950182621917766e-05, + "loss": 0.0401, + "step": 26850 + }, + { + "epoch": 1.25, + "learning_rate": 1.594939883686298e-05, + "loss": 0.0848, + "step": 26855 + }, + { + "epoch": 1.25, + "learning_rate": 1.5948615051808194e-05, + "loss": 0.1519, + "step": 26860 + }, + { + "epoch": 1.25, + "learning_rate": 1.5947831266753408e-05, + "loss": 0.1502, + "step": 26865 + }, + { + "epoch": 1.25, + "learning_rate": 1.594704748169862e-05, + "loss": 0.1738, + "step": 26870 + }, + { + "epoch": 1.25, + "learning_rate": 1.5946263696643832e-05, + "loss": 0.1806, + "step": 26875 + }, + { + "epoch": 1.25, + "learning_rate": 1.594547991158905e-05, + "loss": 0.359, + "step": 26880 + }, + { + "epoch": 1.25, + "learning_rate": 1.594469612653426e-05, + "loss": 0.3319, + "step": 26885 + }, + { + "epoch": 1.25, + "learning_rate": 1.5943912341479474e-05, + "loss": 0.0602, + "step": 26890 + }, + { + "epoch": 1.25, + "learning_rate": 1.5943128556424688e-05, + "loss": 0.0851, + "step": 26895 + }, + { + "epoch": 1.26, + "learning_rate": 1.5942344771369898e-05, + "loss": 0.1058, + "step": 26900 + }, + { + "epoch": 1.26, + "learning_rate": 1.5941560986315115e-05, + "loss": 0.1035, + "step": 26905 + }, + { + "epoch": 1.26, + "learning_rate": 1.5940777201260326e-05, + "loss": 0.1427, + "step": 26910 + }, + { + "epoch": 1.26, + "learning_rate": 1.593999341620554e-05, + "loss": 0.1269, + "step": 26915 + }, + { + "epoch": 1.26, + "learning_rate": 1.5939209631150754e-05, + "loss": 0.1434, + "step": 26920 + }, + { + "epoch": 1.26, + "learning_rate": 1.5938425846095968e-05, + "loss": 0.2233, + "step": 26925 + }, + { + "epoch": 1.26, + "learning_rate": 1.593764206104118e-05, + "loss": 0.4562, + "step": 26930 + }, + { + "epoch": 1.26, + "learning_rate": 1.5936858275986395e-05, + "loss": 0.3848, + "step": 26935 + }, + { + "epoch": 1.26, + "learning_rate": 1.593607449093161e-05, + "loss": 0.0614, + "step": 26940 + }, + { + "epoch": 1.26, + "learning_rate": 1.5935290705876823e-05, + "loss": 0.0845, + "step": 26945 + }, + { + "epoch": 1.26, + "learning_rate": 1.5934506920822034e-05, + "loss": 0.0953, + "step": 26950 + }, + { + "epoch": 1.26, + "learning_rate": 1.593372313576725e-05, + "loss": 0.1694, + "step": 26955 + }, + { + "epoch": 1.26, + "learning_rate": 1.593293935071246e-05, + "loss": 0.1371, + "step": 26960 + }, + { + "epoch": 1.26, + "learning_rate": 1.5932155565657676e-05, + "loss": 0.2797, + "step": 26965 + }, + { + "epoch": 1.26, + "learning_rate": 1.593137178060289e-05, + "loss": 0.2314, + "step": 26970 + }, + { + "epoch": 1.26, + "learning_rate": 1.59305879955481e-05, + "loss": 0.1904, + "step": 26975 + }, + { + "epoch": 1.26, + "learning_rate": 1.5929804210493317e-05, + "loss": 0.3626, + "step": 26980 + }, + { + "epoch": 1.26, + "learning_rate": 1.5929020425438528e-05, + "loss": 0.3654, + "step": 26985 + }, + { + "epoch": 1.26, + "learning_rate": 1.592823664038374e-05, + "loss": 0.0897, + "step": 26990 + }, + { + "epoch": 1.26, + "learning_rate": 1.5927452855328956e-05, + "loss": 0.1229, + "step": 26995 + }, + { + "epoch": 1.26, + "learning_rate": 1.592666907027417e-05, + "loss": 0.1352, + "step": 27000 + }, + { + "epoch": 1.26, + "learning_rate": 1.5925885285219383e-05, + "loss": 0.1724, + "step": 27005 + }, + { + "epoch": 1.26, + "learning_rate": 1.5925101500164597e-05, + "loss": 0.1872, + "step": 27010 + }, + { + "epoch": 1.26, + "learning_rate": 1.5924317715109808e-05, + "loss": 0.211, + "step": 27015 + }, + { + "epoch": 1.26, + "learning_rate": 1.5923533930055025e-05, + "loss": 0.2077, + "step": 27020 + }, + { + "epoch": 1.26, + "learning_rate": 1.5922750145000236e-05, + "loss": 0.2314, + "step": 27025 + }, + { + "epoch": 1.26, + "learning_rate": 1.592196635994545e-05, + "loss": 0.2645, + "step": 27030 + }, + { + "epoch": 1.26, + "learning_rate": 1.5921182574890663e-05, + "loss": 0.3657, + "step": 27035 + }, + { + "epoch": 1.26, + "learning_rate": 1.5920398789835877e-05, + "loss": 0.0485, + "step": 27040 + }, + { + "epoch": 1.26, + "learning_rate": 1.591961500478109e-05, + "loss": 0.0873, + "step": 27045 + }, + { + "epoch": 1.26, + "learning_rate": 1.5918831219726302e-05, + "loss": 0.0935, + "step": 27050 + }, + { + "epoch": 1.26, + "learning_rate": 1.591804743467152e-05, + "loss": 0.0481, + "step": 27055 + }, + { + "epoch": 1.26, + "learning_rate": 1.591726364961673e-05, + "loss": 0.1495, + "step": 27060 + }, + { + "epoch": 1.26, + "learning_rate": 1.5916479864561943e-05, + "loss": 0.2669, + "step": 27065 + }, + { + "epoch": 1.26, + "learning_rate": 1.5915696079507157e-05, + "loss": 0.2449, + "step": 27070 + }, + { + "epoch": 1.26, + "learning_rate": 1.591491229445237e-05, + "loss": 0.2292, + "step": 27075 + }, + { + "epoch": 1.26, + "learning_rate": 1.5914128509397585e-05, + "loss": 0.3765, + "step": 27080 + }, + { + "epoch": 1.26, + "learning_rate": 1.59133447243428e-05, + "loss": 0.4043, + "step": 27085 + }, + { + "epoch": 1.26, + "learning_rate": 1.591256093928801e-05, + "loss": 0.0565, + "step": 27090 + }, + { + "epoch": 1.26, + "learning_rate": 1.5911777154233224e-05, + "loss": 0.1073, + "step": 27095 + }, + { + "epoch": 1.26, + "learning_rate": 1.5910993369178437e-05, + "loss": 0.1031, + "step": 27100 + }, + { + "epoch": 1.26, + "learning_rate": 1.591020958412365e-05, + "loss": 0.1669, + "step": 27105 + }, + { + "epoch": 1.26, + "learning_rate": 1.5909425799068865e-05, + "loss": 0.1159, + "step": 27110 + }, + { + "epoch": 1.27, + "learning_rate": 1.5908642014014076e-05, + "loss": 0.1482, + "step": 27115 + }, + { + "epoch": 1.27, + "learning_rate": 1.5907858228959293e-05, + "loss": 0.2052, + "step": 27120 + }, + { + "epoch": 1.27, + "learning_rate": 1.5907074443904504e-05, + "loss": 0.1452, + "step": 27125 + }, + { + "epoch": 1.27, + "learning_rate": 1.5906290658849717e-05, + "loss": 0.3528, + "step": 27130 + }, + { + "epoch": 1.27, + "learning_rate": 1.590550687379493e-05, + "loss": 0.2911, + "step": 27135 + }, + { + "epoch": 1.27, + "learning_rate": 1.5904723088740145e-05, + "loss": 0.0498, + "step": 27140 + }, + { + "epoch": 1.27, + "learning_rate": 1.590393930368536e-05, + "loss": 0.0742, + "step": 27145 + }, + { + "epoch": 1.27, + "learning_rate": 1.5903155518630573e-05, + "loss": 0.0673, + "step": 27150 + }, + { + "epoch": 1.27, + "learning_rate": 1.5902371733575787e-05, + "loss": 0.1554, + "step": 27155 + }, + { + "epoch": 1.27, + "learning_rate": 1.5901587948520998e-05, + "loss": 0.1423, + "step": 27160 + }, + { + "epoch": 1.27, + "learning_rate": 1.590080416346621e-05, + "loss": 0.2888, + "step": 27165 + }, + { + "epoch": 1.27, + "learning_rate": 1.5900020378411425e-05, + "loss": 0.206, + "step": 27170 + }, + { + "epoch": 1.27, + "learning_rate": 1.589923659335664e-05, + "loss": 0.2769, + "step": 27175 + }, + { + "epoch": 1.27, + "learning_rate": 1.5898452808301853e-05, + "loss": 0.3929, + "step": 27180 + }, + { + "epoch": 1.27, + "learning_rate": 1.5897669023247067e-05, + "loss": 0.2299, + "step": 27185 + }, + { + "epoch": 1.27, + "learning_rate": 1.5896885238192278e-05, + "loss": 0.0616, + "step": 27190 + }, + { + "epoch": 1.27, + "learning_rate": 1.5896101453137495e-05, + "loss": 0.0768, + "step": 27195 + }, + { + "epoch": 1.27, + "learning_rate": 1.5895317668082705e-05, + "loss": 0.066, + "step": 27200 + }, + { + "epoch": 1.27, + "learning_rate": 1.589453388302792e-05, + "loss": 0.1199, + "step": 27205 + }, + { + "epoch": 1.27, + "learning_rate": 1.5893750097973133e-05, + "loss": 0.1225, + "step": 27210 + }, + { + "epoch": 1.27, + "learning_rate": 1.5892966312918347e-05, + "loss": 0.1322, + "step": 27215 + }, + { + "epoch": 1.27, + "learning_rate": 1.589218252786356e-05, + "loss": 0.1938, + "step": 27220 + }, + { + "epoch": 1.27, + "learning_rate": 1.589139874280877e-05, + "loss": 0.1989, + "step": 27225 + }, + { + "epoch": 1.27, + "learning_rate": 1.5890614957753985e-05, + "loss": 0.3783, + "step": 27230 + }, + { + "epoch": 1.27, + "learning_rate": 1.58898311726992e-05, + "loss": 0.2278, + "step": 27235 + }, + { + "epoch": 1.27, + "learning_rate": 1.5889047387644413e-05, + "loss": 0.0426, + "step": 27240 + }, + { + "epoch": 1.27, + "learning_rate": 1.5888263602589627e-05, + "loss": 0.0671, + "step": 27245 + }, + { + "epoch": 1.27, + "learning_rate": 1.588747981753484e-05, + "loss": 0.1248, + "step": 27250 + }, + { + "epoch": 1.27, + "learning_rate": 1.5886696032480055e-05, + "loss": 0.1308, + "step": 27255 + }, + { + "epoch": 1.27, + "learning_rate": 1.588591224742527e-05, + "loss": 0.1626, + "step": 27260 + }, + { + "epoch": 1.27, + "learning_rate": 1.588512846237048e-05, + "loss": 0.0983, + "step": 27265 + }, + { + "epoch": 1.27, + "learning_rate": 1.5884344677315697e-05, + "loss": 0.1998, + "step": 27270 + }, + { + "epoch": 1.27, + "learning_rate": 1.5883560892260907e-05, + "loss": 0.2857, + "step": 27275 + }, + { + "epoch": 1.27, + "learning_rate": 1.588277710720612e-05, + "loss": 0.4716, + "step": 27280 + }, + { + "epoch": 1.27, + "learning_rate": 1.5881993322151335e-05, + "loss": 0.3464, + "step": 27285 + }, + { + "epoch": 1.27, + "learning_rate": 1.5881209537096546e-05, + "loss": 0.0157, + "step": 27290 + }, + { + "epoch": 1.27, + "learning_rate": 1.5880425752041763e-05, + "loss": 0.0292, + "step": 27295 + }, + { + "epoch": 1.27, + "learning_rate": 1.5879641966986973e-05, + "loss": 0.1048, + "step": 27300 + }, + { + "epoch": 1.27, + "learning_rate": 1.5878858181932187e-05, + "loss": 0.1233, + "step": 27305 + }, + { + "epoch": 1.27, + "learning_rate": 1.58780743968774e-05, + "loss": 0.1633, + "step": 27310 + }, + { + "epoch": 1.27, + "learning_rate": 1.5877290611822615e-05, + "loss": 0.213, + "step": 27315 + }, + { + "epoch": 1.27, + "learning_rate": 1.587650682676783e-05, + "loss": 0.1694, + "step": 27320 + }, + { + "epoch": 1.28, + "learning_rate": 1.5875723041713043e-05, + "loss": 0.2286, + "step": 27325 + }, + { + "epoch": 1.28, + "learning_rate": 1.5874939256658253e-05, + "loss": 0.4041, + "step": 27330 + }, + { + "epoch": 1.28, + "learning_rate": 1.587415547160347e-05, + "loss": 0.3315, + "step": 27335 + }, + { + "epoch": 1.28, + "learning_rate": 1.587337168654868e-05, + "loss": 0.0725, + "step": 27340 + }, + { + "epoch": 1.28, + "learning_rate": 1.5872587901493895e-05, + "loss": 0.092, + "step": 27345 + }, + { + "epoch": 1.28, + "learning_rate": 1.587180411643911e-05, + "loss": 0.0829, + "step": 27350 + }, + { + "epoch": 1.28, + "learning_rate": 1.5871020331384323e-05, + "loss": 0.1871, + "step": 27355 + }, + { + "epoch": 1.28, + "learning_rate": 1.5870236546329537e-05, + "loss": 0.1217, + "step": 27360 + }, + { + "epoch": 1.28, + "learning_rate": 1.5869452761274747e-05, + "loss": 0.1402, + "step": 27365 + }, + { + "epoch": 1.28, + "learning_rate": 1.5868668976219965e-05, + "loss": 0.2723, + "step": 27370 + }, + { + "epoch": 1.28, + "learning_rate": 1.5867885191165175e-05, + "loss": 0.2456, + "step": 27375 + }, + { + "epoch": 1.28, + "learning_rate": 1.586710140611039e-05, + "loss": 0.3611, + "step": 27380 + }, + { + "epoch": 1.28, + "learning_rate": 1.5866317621055603e-05, + "loss": 0.2447, + "step": 27385 + }, + { + "epoch": 1.28, + "learning_rate": 1.5865533836000817e-05, + "loss": 0.0605, + "step": 27390 + }, + { + "epoch": 1.28, + "learning_rate": 1.586475005094603e-05, + "loss": 0.1235, + "step": 27395 + }, + { + "epoch": 1.28, + "learning_rate": 1.5863966265891245e-05, + "loss": 0.1074, + "step": 27400 + }, + { + "epoch": 1.28, + "learning_rate": 1.5863182480836455e-05, + "loss": 0.1034, + "step": 27405 + }, + { + "epoch": 1.28, + "learning_rate": 1.5862398695781672e-05, + "loss": 0.1398, + "step": 27410 + }, + { + "epoch": 1.28, + "learning_rate": 1.5861614910726883e-05, + "loss": 0.1485, + "step": 27415 + }, + { + "epoch": 1.28, + "learning_rate": 1.5860831125672097e-05, + "loss": 0.184, + "step": 27420 + }, + { + "epoch": 1.28, + "learning_rate": 1.586004734061731e-05, + "loss": 0.301, + "step": 27425 + }, + { + "epoch": 1.28, + "learning_rate": 1.585926355556252e-05, + "loss": 0.3592, + "step": 27430 + }, + { + "epoch": 1.28, + "learning_rate": 1.585847977050774e-05, + "loss": 0.4242, + "step": 27435 + }, + { + "epoch": 1.28, + "learning_rate": 1.585769598545295e-05, + "loss": 0.033, + "step": 27440 + }, + { + "epoch": 1.28, + "learning_rate": 1.5856912200398163e-05, + "loss": 0.077, + "step": 27445 + }, + { + "epoch": 1.28, + "learning_rate": 1.5856128415343377e-05, + "loss": 0.0681, + "step": 27450 + }, + { + "epoch": 1.28, + "learning_rate": 1.585534463028859e-05, + "loss": 0.0868, + "step": 27455 + }, + { + "epoch": 1.28, + "learning_rate": 1.5854560845233805e-05, + "loss": 0.1456, + "step": 27460 + }, + { + "epoch": 1.28, + "learning_rate": 1.585377706017902e-05, + "loss": 0.1694, + "step": 27465 + }, + { + "epoch": 1.28, + "learning_rate": 1.5852993275124233e-05, + "loss": 0.2192, + "step": 27470 + }, + { + "epoch": 1.28, + "learning_rate": 1.5852209490069446e-05, + "loss": 0.2615, + "step": 27475 + }, + { + "epoch": 1.28, + "learning_rate": 1.5851425705014657e-05, + "loss": 0.3738, + "step": 27480 + }, + { + "epoch": 1.28, + "learning_rate": 1.585064191995987e-05, + "loss": 0.4018, + "step": 27485 + }, + { + "epoch": 1.28, + "learning_rate": 1.5849858134905085e-05, + "loss": 0.0784, + "step": 27490 + }, + { + "epoch": 1.28, + "learning_rate": 1.58490743498503e-05, + "loss": 0.0999, + "step": 27495 + }, + { + "epoch": 1.28, + "learning_rate": 1.5848290564795513e-05, + "loss": 0.094, + "step": 27500 + }, + { + "epoch": 1.28, + "learning_rate": 1.5847506779740723e-05, + "loss": 0.1918, + "step": 27505 + }, + { + "epoch": 1.28, + "learning_rate": 1.584672299468594e-05, + "loss": 0.0998, + "step": 27510 + }, + { + "epoch": 1.28, + "learning_rate": 1.584593920963115e-05, + "loss": 0.1329, + "step": 27515 + }, + { + "epoch": 1.28, + "learning_rate": 1.5845155424576365e-05, + "loss": 0.2364, + "step": 27520 + }, + { + "epoch": 1.28, + "learning_rate": 1.584437163952158e-05, + "loss": 0.2244, + "step": 27525 + }, + { + "epoch": 1.28, + "learning_rate": 1.5843587854466793e-05, + "loss": 0.2506, + "step": 27530 + }, + { + "epoch": 1.28, + "learning_rate": 1.5842804069412007e-05, + "loss": 0.2574, + "step": 27535 + }, + { + "epoch": 1.29, + "learning_rate": 1.584202028435722e-05, + "loss": 0.0459, + "step": 27540 + }, + { + "epoch": 1.29, + "learning_rate": 1.584123649930243e-05, + "loss": 0.169, + "step": 27545 + }, + { + "epoch": 1.29, + "learning_rate": 1.5840452714247645e-05, + "loss": 0.0555, + "step": 27550 + }, + { + "epoch": 1.29, + "learning_rate": 1.583966892919286e-05, + "loss": 0.1168, + "step": 27555 + }, + { + "epoch": 1.29, + "learning_rate": 1.5838885144138073e-05, + "loss": 0.1272, + "step": 27560 + }, + { + "epoch": 1.29, + "learning_rate": 1.5838101359083287e-05, + "loss": 0.1271, + "step": 27565 + }, + { + "epoch": 1.29, + "learning_rate": 1.58373175740285e-05, + "loss": 0.2171, + "step": 27570 + }, + { + "epoch": 1.29, + "learning_rate": 1.5836533788973714e-05, + "loss": 0.2344, + "step": 27575 + }, + { + "epoch": 1.29, + "learning_rate": 1.5835750003918925e-05, + "loss": 0.441, + "step": 27580 + }, + { + "epoch": 1.29, + "learning_rate": 1.5834966218864142e-05, + "loss": 0.344, + "step": 27585 + }, + { + "epoch": 1.29, + "learning_rate": 1.5834182433809353e-05, + "loss": 0.0275, + "step": 27590 + }, + { + "epoch": 1.29, + "learning_rate": 1.5833398648754567e-05, + "loss": 0.0879, + "step": 27595 + }, + { + "epoch": 1.29, + "learning_rate": 1.583261486369978e-05, + "loss": 0.0835, + "step": 27600 + }, + { + "epoch": 1.29, + "learning_rate": 1.5831831078644994e-05, + "loss": 0.0898, + "step": 27605 + }, + { + "epoch": 1.29, + "learning_rate": 1.583104729359021e-05, + "loss": 0.1473, + "step": 27610 + }, + { + "epoch": 1.29, + "learning_rate": 1.583026350853542e-05, + "loss": 0.1409, + "step": 27615 + }, + { + "epoch": 1.29, + "learning_rate": 1.5829479723480633e-05, + "loss": 0.1624, + "step": 27620 + }, + { + "epoch": 1.29, + "learning_rate": 1.5828695938425847e-05, + "loss": 0.2715, + "step": 27625 + }, + { + "epoch": 1.29, + "learning_rate": 1.582791215337106e-05, + "loss": 0.24, + "step": 27630 + }, + { + "epoch": 1.29, + "learning_rate": 1.5827128368316275e-05, + "loss": 0.3, + "step": 27635 + }, + { + "epoch": 1.29, + "learning_rate": 1.582634458326149e-05, + "loss": 0.1383, + "step": 27640 + }, + { + "epoch": 1.29, + "learning_rate": 1.58255607982067e-05, + "loss": 0.0718, + "step": 27645 + }, + { + "epoch": 1.29, + "learning_rate": 1.5824777013151916e-05, + "loss": 0.1329, + "step": 27650 + }, + { + "epoch": 1.29, + "learning_rate": 1.5823993228097127e-05, + "loss": 0.0816, + "step": 27655 + }, + { + "epoch": 1.29, + "learning_rate": 1.582320944304234e-05, + "loss": 0.1626, + "step": 27660 + }, + { + "epoch": 1.29, + "learning_rate": 1.5822425657987555e-05, + "loss": 0.1631, + "step": 27665 + }, + { + "epoch": 1.29, + "learning_rate": 1.582164187293277e-05, + "loss": 0.176, + "step": 27670 + }, + { + "epoch": 1.29, + "learning_rate": 1.5820858087877982e-05, + "loss": 0.242, + "step": 27675 + }, + { + "epoch": 1.29, + "learning_rate": 1.5820074302823193e-05, + "loss": 0.3799, + "step": 27680 + }, + { + "epoch": 1.29, + "learning_rate": 1.581929051776841e-05, + "loss": 0.3226, + "step": 27685 + }, + { + "epoch": 1.29, + "learning_rate": 1.581850673271362e-05, + "loss": 0.0577, + "step": 27690 + }, + { + "epoch": 1.29, + "learning_rate": 1.5817722947658835e-05, + "loss": 0.0607, + "step": 27695 + }, + { + "epoch": 1.29, + "learning_rate": 1.581693916260405e-05, + "loss": 0.0799, + "step": 27700 + }, + { + "epoch": 1.29, + "learning_rate": 1.5816155377549262e-05, + "loss": 0.1294, + "step": 27705 + }, + { + "epoch": 1.29, + "learning_rate": 1.5815371592494476e-05, + "loss": 0.1614, + "step": 27710 + }, + { + "epoch": 1.29, + "learning_rate": 1.581458780743969e-05, + "loss": 0.2011, + "step": 27715 + }, + { + "epoch": 1.29, + "learning_rate": 1.58138040223849e-05, + "loss": 0.1317, + "step": 27720 + }, + { + "epoch": 1.29, + "learning_rate": 1.5813020237330118e-05, + "loss": 0.138, + "step": 27725 + }, + { + "epoch": 1.29, + "learning_rate": 1.581239320928629e-05, + "loss": 0.3351, + "step": 27730 + }, + { + "epoch": 1.29, + "learning_rate": 1.58116094242315e-05, + "loss": 0.2869, + "step": 27735 + }, + { + "epoch": 1.29, + "learning_rate": 1.5810825639176713e-05, + "loss": 0.0514, + "step": 27740 + }, + { + "epoch": 1.29, + "learning_rate": 1.5810041854121927e-05, + "loss": 0.0941, + "step": 27745 + }, + { + "epoch": 1.29, + "learning_rate": 1.580925806906714e-05, + "loss": 0.0642, + "step": 27750 + }, + { + "epoch": 1.3, + "learning_rate": 1.5808474284012355e-05, + "loss": 0.1529, + "step": 27755 + }, + { + "epoch": 1.3, + "learning_rate": 1.5807690498957565e-05, + "loss": 0.1682, + "step": 27760 + }, + { + "epoch": 1.3, + "learning_rate": 1.5806906713902782e-05, + "loss": 0.1137, + "step": 27765 + }, + { + "epoch": 1.3, + "learning_rate": 1.5806279685858953e-05, + "loss": 0.2363, + "step": 27770 + }, + { + "epoch": 1.3, + "learning_rate": 1.5805495900804163e-05, + "loss": 0.2647, + "step": 27775 + }, + { + "epoch": 1.3, + "learning_rate": 1.5804712115749377e-05, + "loss": 0.432, + "step": 27780 + }, + { + "epoch": 1.3, + "learning_rate": 1.580392833069459e-05, + "loss": 0.2727, + "step": 27785 + }, + { + "epoch": 1.3, + "learning_rate": 1.5803144545639805e-05, + "loss": 0.0257, + "step": 27790 + }, + { + "epoch": 1.3, + "learning_rate": 1.580236076058502e-05, + "loss": 0.056, + "step": 27795 + }, + { + "epoch": 1.3, + "learning_rate": 1.5801576975530233e-05, + "loss": 0.1739, + "step": 27800 + }, + { + "epoch": 1.3, + "learning_rate": 1.5800793190475447e-05, + "loss": 0.1359, + "step": 27805 + }, + { + "epoch": 1.3, + "learning_rate": 1.5800009405420657e-05, + "loss": 0.1377, + "step": 27810 + }, + { + "epoch": 1.3, + "learning_rate": 1.579922562036587e-05, + "loss": 0.2141, + "step": 27815 + }, + { + "epoch": 1.3, + "learning_rate": 1.5798441835311085e-05, + "loss": 0.2105, + "step": 27820 + }, + { + "epoch": 1.3, + "learning_rate": 1.57976580502563e-05, + "loss": 0.2329, + "step": 27825 + }, + { + "epoch": 1.3, + "learning_rate": 1.5796874265201513e-05, + "loss": 0.2853, + "step": 27830 + }, + { + "epoch": 1.3, + "learning_rate": 1.5796090480146727e-05, + "loss": 0.2378, + "step": 27835 + }, + { + "epoch": 1.3, + "learning_rate": 1.5795306695091937e-05, + "loss": 0.0456, + "step": 27840 + }, + { + "epoch": 1.3, + "learning_rate": 1.5794522910037155e-05, + "loss": 0.0577, + "step": 27845 + }, + { + "epoch": 1.3, + "learning_rate": 1.5793739124982365e-05, + "loss": 0.0971, + "step": 27850 + }, + { + "epoch": 1.3, + "learning_rate": 1.579295533992758e-05, + "loss": 0.1172, + "step": 27855 + }, + { + "epoch": 1.3, + "learning_rate": 1.5792171554872793e-05, + "loss": 0.0872, + "step": 27860 + }, + { + "epoch": 1.3, + "learning_rate": 1.5791387769818007e-05, + "loss": 0.1082, + "step": 27865 + }, + { + "epoch": 1.3, + "learning_rate": 1.579060398476322e-05, + "loss": 0.1545, + "step": 27870 + }, + { + "epoch": 1.3, + "learning_rate": 1.578982019970843e-05, + "loss": 0.2331, + "step": 27875 + }, + { + "epoch": 1.3, + "learning_rate": 1.5789036414653645e-05, + "loss": 0.3144, + "step": 27880 + }, + { + "epoch": 1.3, + "learning_rate": 1.578825262959886e-05, + "loss": 0.2813, + "step": 27885 + }, + { + "epoch": 1.3, + "learning_rate": 1.5787468844544073e-05, + "loss": 0.0546, + "step": 27890 + }, + { + "epoch": 1.3, + "learning_rate": 1.5786685059489287e-05, + "loss": 0.0614, + "step": 27895 + }, + { + "epoch": 1.3, + "learning_rate": 1.57859012744345e-05, + "loss": 0.0767, + "step": 27900 + }, + { + "epoch": 1.3, + "learning_rate": 1.5785117489379715e-05, + "loss": 0.1322, + "step": 27905 + }, + { + "epoch": 1.3, + "learning_rate": 1.578433370432493e-05, + "loss": 0.134, + "step": 27910 + }, + { + "epoch": 1.3, + "learning_rate": 1.578354991927014e-05, + "loss": 0.1844, + "step": 27915 + }, + { + "epoch": 1.3, + "learning_rate": 1.5782766134215357e-05, + "loss": 0.1431, + "step": 27920 + }, + { + "epoch": 1.3, + "learning_rate": 1.5781982349160567e-05, + "loss": 0.2983, + "step": 27925 + }, + { + "epoch": 1.3, + "learning_rate": 1.578119856410578e-05, + "loss": 0.381, + "step": 27930 + }, + { + "epoch": 1.3, + "learning_rate": 1.5780414779050995e-05, + "loss": 0.3145, + "step": 27935 + }, + { + "epoch": 1.3, + "learning_rate": 1.5779630993996205e-05, + "loss": 0.0599, + "step": 27940 + }, + { + "epoch": 1.3, + "learning_rate": 1.5778847208941423e-05, + "loss": 0.1142, + "step": 27945 + }, + { + "epoch": 1.3, + "learning_rate": 1.5778063423886633e-05, + "loss": 0.0946, + "step": 27950 + }, + { + "epoch": 1.3, + "learning_rate": 1.5777279638831847e-05, + "loss": 0.1403, + "step": 27955 + }, + { + "epoch": 1.3, + "learning_rate": 1.577649585377706e-05, + "loss": 0.1777, + "step": 27960 + }, + { + "epoch": 1.3, + "learning_rate": 1.5775712068722275e-05, + "loss": 0.2978, + "step": 27965 + }, + { + "epoch": 1.31, + "learning_rate": 1.577492828366749e-05, + "loss": 0.1597, + "step": 27970 + }, + { + "epoch": 1.31, + "learning_rate": 1.5774144498612703e-05, + "loss": 0.195, + "step": 27975 + }, + { + "epoch": 1.31, + "learning_rate": 1.5773360713557913e-05, + "loss": 0.356, + "step": 27980 + }, + { + "epoch": 1.31, + "learning_rate": 1.577257692850313e-05, + "loss": 0.3127, + "step": 27985 + }, + { + "epoch": 1.31, + "learning_rate": 1.577179314344834e-05, + "loss": 0.0726, + "step": 27990 + }, + { + "epoch": 1.31, + "learning_rate": 1.5771009358393555e-05, + "loss": 0.0738, + "step": 27995 + }, + { + "epoch": 1.31, + "learning_rate": 1.577022557333877e-05, + "loss": 0.0923, + "step": 28000 + }, + { + "epoch": 1.31, + "learning_rate": 1.5769441788283983e-05, + "loss": 0.0928, + "step": 28005 + }, + { + "epoch": 1.31, + "learning_rate": 1.5768658003229197e-05, + "loss": 0.1215, + "step": 28010 + }, + { + "epoch": 1.31, + "learning_rate": 1.5767874218174407e-05, + "loss": 0.161, + "step": 28015 + }, + { + "epoch": 1.31, + "learning_rate": 1.5767090433119624e-05, + "loss": 0.2235, + "step": 28020 + }, + { + "epoch": 1.31, + "learning_rate": 1.5766306648064835e-05, + "loss": 0.2186, + "step": 28025 + }, + { + "epoch": 1.31, + "learning_rate": 1.576552286301005e-05, + "loss": 0.3355, + "step": 28030 + }, + { + "epoch": 1.31, + "learning_rate": 1.5764739077955263e-05, + "loss": 0.3447, + "step": 28035 + }, + { + "epoch": 1.31, + "learning_rate": 1.5763955292900477e-05, + "loss": 0.0388, + "step": 28040 + }, + { + "epoch": 1.31, + "learning_rate": 1.576317150784569e-05, + "loss": 0.074, + "step": 28045 + }, + { + "epoch": 1.31, + "learning_rate": 1.5762387722790905e-05, + "loss": 0.058, + "step": 28050 + }, + { + "epoch": 1.31, + "learning_rate": 1.5761603937736115e-05, + "loss": 0.1904, + "step": 28055 + }, + { + "epoch": 1.31, + "learning_rate": 1.5760820152681332e-05, + "loss": 0.0815, + "step": 28060 + }, + { + "epoch": 1.31, + "learning_rate": 1.5760036367626543e-05, + "loss": 0.1802, + "step": 28065 + }, + { + "epoch": 1.31, + "learning_rate": 1.5759252582571757e-05, + "loss": 0.1224, + "step": 28070 + }, + { + "epoch": 1.31, + "learning_rate": 1.575846879751697e-05, + "loss": 0.2543, + "step": 28075 + }, + { + "epoch": 1.31, + "learning_rate": 1.575768501246218e-05, + "loss": 0.248, + "step": 28080 + }, + { + "epoch": 1.31, + "learning_rate": 1.57569012274074e-05, + "loss": 0.3462, + "step": 28085 + }, + { + "epoch": 1.31, + "learning_rate": 1.575611744235261e-05, + "loss": 0.0377, + "step": 28090 + }, + { + "epoch": 1.31, + "learning_rate": 1.5755333657297823e-05, + "loss": 0.0877, + "step": 28095 + }, + { + "epoch": 1.31, + "learning_rate": 1.5754549872243037e-05, + "loss": 0.1138, + "step": 28100 + }, + { + "epoch": 1.31, + "learning_rate": 1.575376608718825e-05, + "loss": 0.1132, + "step": 28105 + }, + { + "epoch": 1.31, + "learning_rate": 1.5752982302133465e-05, + "loss": 0.1068, + "step": 28110 + }, + { + "epoch": 1.31, + "learning_rate": 1.575219851707868e-05, + "loss": 0.1146, + "step": 28115 + }, + { + "epoch": 1.31, + "learning_rate": 1.5751414732023892e-05, + "loss": 0.1925, + "step": 28120 + }, + { + "epoch": 1.31, + "learning_rate": 1.5750630946969106e-05, + "loss": 0.2804, + "step": 28125 + }, + { + "epoch": 1.31, + "learning_rate": 1.5749847161914317e-05, + "loss": 0.4683, + "step": 28130 + }, + { + "epoch": 1.31, + "learning_rate": 1.574906337685953e-05, + "loss": 0.3226, + "step": 28135 + }, + { + "epoch": 1.31, + "learning_rate": 1.5748279591804745e-05, + "loss": 0.063, + "step": 28140 + }, + { + "epoch": 1.31, + "learning_rate": 1.574749580674996e-05, + "loss": 0.0538, + "step": 28145 + }, + { + "epoch": 1.31, + "learning_rate": 1.5746712021695172e-05, + "loss": 0.0772, + "step": 28150 + }, + { + "epoch": 1.31, + "learning_rate": 1.5745928236640383e-05, + "loss": 0.1166, + "step": 28155 + }, + { + "epoch": 1.31, + "learning_rate": 1.57451444515856e-05, + "loss": 0.1212, + "step": 28160 + }, + { + "epoch": 1.31, + "learning_rate": 1.574436066653081e-05, + "loss": 0.1134, + "step": 28165 + }, + { + "epoch": 1.31, + "learning_rate": 1.5743576881476025e-05, + "loss": 0.2492, + "step": 28170 + }, + { + "epoch": 1.31, + "learning_rate": 1.574279309642124e-05, + "loss": 0.3189, + "step": 28175 + }, + { + "epoch": 1.31, + "learning_rate": 1.5742009311366453e-05, + "loss": 0.2254, + "step": 28180 + }, + { + "epoch": 1.32, + "learning_rate": 1.5741225526311666e-05, + "loss": 0.2818, + "step": 28185 + }, + { + "epoch": 1.32, + "learning_rate": 1.574044174125688e-05, + "loss": 0.0439, + "step": 28190 + }, + { + "epoch": 1.32, + "learning_rate": 1.573965795620209e-05, + "loss": 0.1292, + "step": 28195 + }, + { + "epoch": 1.32, + "learning_rate": 1.5738874171147305e-05, + "loss": 0.1012, + "step": 28200 + }, + { + "epoch": 1.32, + "learning_rate": 1.573809038609252e-05, + "loss": 0.08, + "step": 28205 + }, + { + "epoch": 1.32, + "learning_rate": 1.5737306601037733e-05, + "loss": 0.1199, + "step": 28210 + }, + { + "epoch": 1.32, + "learning_rate": 1.5736522815982946e-05, + "loss": 0.2438, + "step": 28215 + }, + { + "epoch": 1.32, + "learning_rate": 1.573573903092816e-05, + "loss": 0.0924, + "step": 28220 + }, + { + "epoch": 1.32, + "learning_rate": 1.5734955245873374e-05, + "loss": 0.2114, + "step": 28225 + }, + { + "epoch": 1.32, + "learning_rate": 1.5734171460818585e-05, + "loss": 0.323, + "step": 28230 + }, + { + "epoch": 1.32, + "learning_rate": 1.5733387675763802e-05, + "loss": 0.2703, + "step": 28235 + }, + { + "epoch": 1.32, + "learning_rate": 1.5732603890709013e-05, + "loss": 0.0825, + "step": 28240 + }, + { + "epoch": 1.32, + "learning_rate": 1.5731820105654227e-05, + "loss": 0.0734, + "step": 28245 + }, + { + "epoch": 1.32, + "learning_rate": 1.573103632059944e-05, + "loss": 0.0782, + "step": 28250 + }, + { + "epoch": 1.32, + "learning_rate": 1.5730252535544654e-05, + "loss": 0.1712, + "step": 28255 + }, + { + "epoch": 1.32, + "learning_rate": 1.5729468750489868e-05, + "loss": 0.1776, + "step": 28260 + }, + { + "epoch": 1.32, + "learning_rate": 1.572868496543508e-05, + "loss": 0.1425, + "step": 28265 + }, + { + "epoch": 1.32, + "learning_rate": 1.5727901180380293e-05, + "loss": 0.2537, + "step": 28270 + }, + { + "epoch": 1.32, + "learning_rate": 1.5727117395325507e-05, + "loss": 0.2227, + "step": 28275 + }, + { + "epoch": 1.32, + "learning_rate": 1.572633361027072e-05, + "loss": 0.4145, + "step": 28280 + }, + { + "epoch": 1.32, + "learning_rate": 1.5725549825215934e-05, + "loss": 0.4865, + "step": 28285 + }, + { + "epoch": 1.32, + "learning_rate": 1.5724766040161148e-05, + "loss": 0.0633, + "step": 28290 + }, + { + "epoch": 1.32, + "learning_rate": 1.572398225510636e-05, + "loss": 0.0838, + "step": 28295 + }, + { + "epoch": 1.32, + "learning_rate": 1.5723198470051576e-05, + "loss": 0.1071, + "step": 28300 + }, + { + "epoch": 1.32, + "learning_rate": 1.5722414684996787e-05, + "loss": 0.1015, + "step": 28305 + }, + { + "epoch": 1.32, + "learning_rate": 1.5721630899942e-05, + "loss": 0.0966, + "step": 28310 + }, + { + "epoch": 1.32, + "learning_rate": 1.5720847114887214e-05, + "loss": 0.2073, + "step": 28315 + }, + { + "epoch": 1.32, + "learning_rate": 1.572006332983243e-05, + "loss": 0.1782, + "step": 28320 + }, + { + "epoch": 1.32, + "learning_rate": 1.5719279544777642e-05, + "loss": 0.2457, + "step": 28325 + }, + { + "epoch": 1.32, + "learning_rate": 1.5718495759722853e-05, + "loss": 0.3423, + "step": 28330 + }, + { + "epoch": 1.32, + "learning_rate": 1.571771197466807e-05, + "loss": 0.3075, + "step": 28335 + }, + { + "epoch": 1.32, + "learning_rate": 1.571692818961328e-05, + "loss": 0.0612, + "step": 28340 + }, + { + "epoch": 1.32, + "learning_rate": 1.5716144404558494e-05, + "loss": 0.087, + "step": 28345 + }, + { + "epoch": 1.32, + "learning_rate": 1.571536061950371e-05, + "loss": 0.0356, + "step": 28350 + }, + { + "epoch": 1.32, + "learning_rate": 1.5714576834448922e-05, + "loss": 0.1484, + "step": 28355 + }, + { + "epoch": 1.32, + "learning_rate": 1.5713793049394136e-05, + "loss": 0.1245, + "step": 28360 + }, + { + "epoch": 1.32, + "learning_rate": 1.571300926433935e-05, + "loss": 0.117, + "step": 28365 + }, + { + "epoch": 1.32, + "learning_rate": 1.571222547928456e-05, + "loss": 0.187, + "step": 28370 + }, + { + "epoch": 1.32, + "learning_rate": 1.5711441694229778e-05, + "loss": 0.3141, + "step": 28375 + }, + { + "epoch": 1.32, + "learning_rate": 1.571065790917499e-05, + "loss": 0.3338, + "step": 28380 + }, + { + "epoch": 1.32, + "learning_rate": 1.5709874124120202e-05, + "loss": 0.3198, + "step": 28385 + }, + { + "epoch": 1.32, + "learning_rate": 1.5709090339065416e-05, + "loss": 0.0383, + "step": 28390 + }, + { + "epoch": 1.32, + "learning_rate": 1.5708306554010627e-05, + "loss": 0.1249, + "step": 28395 + }, + { + "epoch": 1.33, + "learning_rate": 1.5707522768955844e-05, + "loss": 0.0838, + "step": 28400 + }, + { + "epoch": 1.33, + "learning_rate": 1.5706738983901055e-05, + "loss": 0.1157, + "step": 28405 + }, + { + "epoch": 1.33, + "learning_rate": 1.570595519884627e-05, + "loss": 0.1878, + "step": 28410 + }, + { + "epoch": 1.33, + "learning_rate": 1.5705171413791482e-05, + "loss": 0.1487, + "step": 28415 + }, + { + "epoch": 1.33, + "learning_rate": 1.5704387628736696e-05, + "loss": 0.1422, + "step": 28420 + }, + { + "epoch": 1.33, + "learning_rate": 1.570360384368191e-05, + "loss": 0.1978, + "step": 28425 + }, + { + "epoch": 1.33, + "learning_rate": 1.5702820058627124e-05, + "loss": 0.3651, + "step": 28430 + }, + { + "epoch": 1.33, + "learning_rate": 1.5702036273572338e-05, + "loss": 0.2579, + "step": 28435 + }, + { + "epoch": 1.33, + "learning_rate": 1.5701252488517552e-05, + "loss": 0.0636, + "step": 28440 + }, + { + "epoch": 1.33, + "learning_rate": 1.5700468703462762e-05, + "loss": 0.069, + "step": 28445 + }, + { + "epoch": 1.33, + "learning_rate": 1.569968491840798e-05, + "loss": 0.0931, + "step": 28450 + }, + { + "epoch": 1.33, + "learning_rate": 1.569890113335319e-05, + "loss": 0.1109, + "step": 28455 + }, + { + "epoch": 1.33, + "learning_rate": 1.5698117348298404e-05, + "loss": 0.0914, + "step": 28460 + }, + { + "epoch": 1.33, + "learning_rate": 1.5697333563243618e-05, + "loss": 0.1079, + "step": 28465 + }, + { + "epoch": 1.33, + "learning_rate": 1.569654977818883e-05, + "loss": 0.2067, + "step": 28470 + }, + { + "epoch": 1.33, + "learning_rate": 1.5695765993134046e-05, + "loss": 0.2517, + "step": 28475 + }, + { + "epoch": 1.33, + "learning_rate": 1.5694982208079256e-05, + "loss": 0.3391, + "step": 28480 + }, + { + "epoch": 1.33, + "learning_rate": 1.569419842302447e-05, + "loss": 0.1997, + "step": 28485 + }, + { + "epoch": 1.33, + "learning_rate": 1.5693414637969684e-05, + "loss": 0.0238, + "step": 28490 + }, + { + "epoch": 1.33, + "learning_rate": 1.5692630852914898e-05, + "loss": 0.0778, + "step": 28495 + }, + { + "epoch": 1.33, + "learning_rate": 1.5691847067860112e-05, + "loss": 0.0956, + "step": 28500 + }, + { + "epoch": 1.33, + "learning_rate": 1.5691063282805326e-05, + "loss": 0.1695, + "step": 28505 + }, + { + "epoch": 1.33, + "learning_rate": 1.5690279497750536e-05, + "loss": 0.0927, + "step": 28510 + }, + { + "epoch": 1.33, + "learning_rate": 1.5689495712695754e-05, + "loss": 0.109, + "step": 28515 + }, + { + "epoch": 1.33, + "learning_rate": 1.5688711927640964e-05, + "loss": 0.2123, + "step": 28520 + }, + { + "epoch": 1.33, + "learning_rate": 1.5687928142586178e-05, + "loss": 0.1514, + "step": 28525 + }, + { + "epoch": 1.33, + "learning_rate": 1.5687144357531392e-05, + "loss": 0.4057, + "step": 28530 + }, + { + "epoch": 1.33, + "learning_rate": 1.5686360572476606e-05, + "loss": 0.3171, + "step": 28535 + }, + { + "epoch": 1.33, + "learning_rate": 1.568557678742182e-05, + "loss": 0.0878, + "step": 28540 + }, + { + "epoch": 1.33, + "learning_rate": 1.568479300236703e-05, + "loss": 0.0565, + "step": 28545 + }, + { + "epoch": 1.33, + "learning_rate": 1.5684009217312248e-05, + "loss": 0.1183, + "step": 28550 + }, + { + "epoch": 1.33, + "learning_rate": 1.5683225432257458e-05, + "loss": 0.1481, + "step": 28555 + }, + { + "epoch": 1.33, + "learning_rate": 1.5682441647202672e-05, + "loss": 0.1402, + "step": 28560 + }, + { + "epoch": 1.33, + "learning_rate": 1.5681657862147886e-05, + "loss": 0.1809, + "step": 28565 + }, + { + "epoch": 1.33, + "learning_rate": 1.56808740770931e-05, + "loss": 0.2217, + "step": 28570 + }, + { + "epoch": 1.33, + "learning_rate": 1.5680090292038314e-05, + "loss": 0.3258, + "step": 28575 + }, + { + "epoch": 1.33, + "learning_rate": 1.5679306506983528e-05, + "loss": 0.2286, + "step": 28580 + }, + { + "epoch": 1.33, + "learning_rate": 1.5678522721928738e-05, + "loss": 0.2848, + "step": 28585 + }, + { + "epoch": 1.33, + "learning_rate": 1.5677738936873952e-05, + "loss": 0.0547, + "step": 28590 + }, + { + "epoch": 1.33, + "learning_rate": 1.5676955151819166e-05, + "loss": 0.0618, + "step": 28595 + }, + { + "epoch": 1.33, + "learning_rate": 1.567617136676438e-05, + "loss": 0.0941, + "step": 28600 + }, + { + "epoch": 1.33, + "learning_rate": 1.5675387581709594e-05, + "loss": 0.1233, + "step": 28605 + }, + { + "epoch": 1.33, + "learning_rate": 1.5674603796654804e-05, + "loss": 0.1848, + "step": 28610 + }, + { + "epoch": 1.34, + "learning_rate": 1.567382001160002e-05, + "loss": 0.0776, + "step": 28615 + }, + { + "epoch": 1.34, + "learning_rate": 1.5673036226545232e-05, + "loss": 0.248, + "step": 28620 + }, + { + "epoch": 1.34, + "learning_rate": 1.5672252441490446e-05, + "loss": 0.232, + "step": 28625 + }, + { + "epoch": 1.34, + "learning_rate": 1.567146865643566e-05, + "loss": 0.4364, + "step": 28630 + }, + { + "epoch": 1.34, + "learning_rate": 1.5670684871380874e-05, + "loss": 0.2168, + "step": 28635 + }, + { + "epoch": 1.34, + "learning_rate": 1.5669901086326088e-05, + "loss": 0.049, + "step": 28640 + }, + { + "epoch": 1.34, + "learning_rate": 1.5669117301271302e-05, + "loss": 0.0206, + "step": 28645 + }, + { + "epoch": 1.34, + "learning_rate": 1.5668333516216516e-05, + "loss": 0.1098, + "step": 28650 + }, + { + "epoch": 1.34, + "learning_rate": 1.5667549731161726e-05, + "loss": 0.1082, + "step": 28655 + }, + { + "epoch": 1.34, + "learning_rate": 1.566676594610694e-05, + "loss": 0.1286, + "step": 28660 + }, + { + "epoch": 1.34, + "learning_rate": 1.5665982161052154e-05, + "loss": 0.2045, + "step": 28665 + }, + { + "epoch": 1.34, + "learning_rate": 1.5665198375997368e-05, + "loss": 0.1718, + "step": 28670 + }, + { + "epoch": 1.34, + "learning_rate": 1.5664414590942582e-05, + "loss": 0.3584, + "step": 28675 + }, + { + "epoch": 1.34, + "learning_rate": 1.5663630805887796e-05, + "loss": 0.4569, + "step": 28680 + }, + { + "epoch": 1.34, + "learning_rate": 1.5662847020833006e-05, + "loss": 0.3119, + "step": 28685 + }, + { + "epoch": 1.34, + "learning_rate": 1.5662063235778223e-05, + "loss": 0.0559, + "step": 28690 + }, + { + "epoch": 1.34, + "learning_rate": 1.5661279450723434e-05, + "loss": 0.0382, + "step": 28695 + }, + { + "epoch": 1.34, + "learning_rate": 1.5660495665668648e-05, + "loss": 0.1046, + "step": 28700 + }, + { + "epoch": 1.34, + "learning_rate": 1.5659711880613862e-05, + "loss": 0.1162, + "step": 28705 + }, + { + "epoch": 1.34, + "learning_rate": 1.5658928095559076e-05, + "loss": 0.1435, + "step": 28710 + }, + { + "epoch": 1.34, + "learning_rate": 1.565814431050429e-05, + "loss": 0.203, + "step": 28715 + }, + { + "epoch": 1.34, + "learning_rate": 1.56573605254495e-05, + "loss": 0.2438, + "step": 28720 + }, + { + "epoch": 1.34, + "learning_rate": 1.5656576740394714e-05, + "loss": 0.214, + "step": 28725 + }, + { + "epoch": 1.34, + "learning_rate": 1.5655792955339928e-05, + "loss": 0.369, + "step": 28730 + }, + { + "epoch": 1.34, + "learning_rate": 1.5655009170285142e-05, + "loss": 0.2569, + "step": 28735 + }, + { + "epoch": 1.34, + "learning_rate": 1.5654225385230356e-05, + "loss": 0.0718, + "step": 28740 + }, + { + "epoch": 1.34, + "learning_rate": 1.565344160017557e-05, + "loss": 0.1807, + "step": 28745 + }, + { + "epoch": 1.34, + "learning_rate": 1.5652657815120784e-05, + "loss": 0.0907, + "step": 28750 + }, + { + "epoch": 1.34, + "learning_rate": 1.5651874030065997e-05, + "loss": 0.1447, + "step": 28755 + }, + { + "epoch": 1.34, + "learning_rate": 1.5651090245011208e-05, + "loss": 0.1166, + "step": 28760 + }, + { + "epoch": 1.34, + "learning_rate": 1.5650306459956425e-05, + "loss": 0.1564, + "step": 28765 + }, + { + "epoch": 1.34, + "learning_rate": 1.5649522674901636e-05, + "loss": 0.2352, + "step": 28770 + }, + { + "epoch": 1.34, + "learning_rate": 1.564873888984685e-05, + "loss": 0.2784, + "step": 28775 + }, + { + "epoch": 1.34, + "learning_rate": 1.5647955104792064e-05, + "loss": 0.3881, + "step": 28780 + }, + { + "epoch": 1.34, + "learning_rate": 1.5647171319737274e-05, + "loss": 0.2386, + "step": 28785 + }, + { + "epoch": 1.34, + "learning_rate": 1.564638753468249e-05, + "loss": 0.0621, + "step": 28790 + }, + { + "epoch": 1.34, + "learning_rate": 1.5645603749627702e-05, + "loss": 0.0444, + "step": 28795 + }, + { + "epoch": 1.34, + "learning_rate": 1.5644819964572916e-05, + "loss": 0.0656, + "step": 28800 + }, + { + "epoch": 1.34, + "learning_rate": 1.564403617951813e-05, + "loss": 0.0765, + "step": 28805 + }, + { + "epoch": 1.34, + "learning_rate": 1.5643252394463344e-05, + "loss": 0.2032, + "step": 28810 + }, + { + "epoch": 1.34, + "learning_rate": 1.5642468609408558e-05, + "loss": 0.1976, + "step": 28815 + }, + { + "epoch": 1.34, + "learning_rate": 1.564168482435377e-05, + "loss": 0.1735, + "step": 28820 + }, + { + "epoch": 1.35, + "learning_rate": 1.5640901039298982e-05, + "loss": 0.2373, + "step": 28825 + }, + { + "epoch": 1.35, + "learning_rate": 1.56401172542442e-05, + "loss": 0.3012, + "step": 28830 + }, + { + "epoch": 1.35, + "learning_rate": 1.563933346918941e-05, + "loss": 0.3411, + "step": 28835 + }, + { + "epoch": 1.35, + "learning_rate": 1.5638549684134624e-05, + "loss": 0.0834, + "step": 28840 + }, + { + "epoch": 1.35, + "learning_rate": 1.5637765899079838e-05, + "loss": 0.0505, + "step": 28845 + }, + { + "epoch": 1.35, + "learning_rate": 1.563698211402505e-05, + "loss": 0.076, + "step": 28850 + }, + { + "epoch": 1.35, + "learning_rate": 1.5636198328970265e-05, + "loss": 0.0456, + "step": 28855 + }, + { + "epoch": 1.35, + "learning_rate": 1.5635414543915476e-05, + "loss": 0.0746, + "step": 28860 + }, + { + "epoch": 1.35, + "learning_rate": 1.5634630758860693e-05, + "loss": 0.1093, + "step": 28865 + }, + { + "epoch": 1.35, + "learning_rate": 1.5633846973805904e-05, + "loss": 0.1907, + "step": 28870 + }, + { + "epoch": 1.35, + "learning_rate": 1.5633063188751118e-05, + "loss": 0.1332, + "step": 28875 + }, + { + "epoch": 1.35, + "learning_rate": 1.563227940369633e-05, + "loss": 0.3031, + "step": 28880 + }, + { + "epoch": 1.35, + "learning_rate": 1.5631495618641545e-05, + "loss": 0.3297, + "step": 28885 + }, + { + "epoch": 1.35, + "learning_rate": 1.563071183358676e-05, + "loss": 0.0652, + "step": 28890 + }, + { + "epoch": 1.35, + "learning_rate": 1.5629928048531973e-05, + "loss": 0.0844, + "step": 28895 + }, + { + "epoch": 1.35, + "learning_rate": 1.5629144263477184e-05, + "loss": 0.1097, + "step": 28900 + }, + { + "epoch": 1.35, + "learning_rate": 1.56283604784224e-05, + "loss": 0.115, + "step": 28905 + }, + { + "epoch": 1.35, + "learning_rate": 1.562757669336761e-05, + "loss": 0.1077, + "step": 28910 + }, + { + "epoch": 1.35, + "learning_rate": 1.5626792908312826e-05, + "loss": 0.1605, + "step": 28915 + }, + { + "epoch": 1.35, + "learning_rate": 1.562600912325804e-05, + "loss": 0.2131, + "step": 28920 + }, + { + "epoch": 1.35, + "learning_rate": 1.562522533820325e-05, + "loss": 0.1889, + "step": 28925 + }, + { + "epoch": 1.35, + "learning_rate": 1.5624441553148467e-05, + "loss": 0.3687, + "step": 28930 + }, + { + "epoch": 1.35, + "learning_rate": 1.5623657768093678e-05, + "loss": 0.2025, + "step": 28935 + }, + { + "epoch": 1.35, + "learning_rate": 1.562287398303889e-05, + "loss": 0.0673, + "step": 28940 + }, + { + "epoch": 1.35, + "learning_rate": 1.5622090197984106e-05, + "loss": 0.0705, + "step": 28945 + }, + { + "epoch": 1.35, + "learning_rate": 1.562130641292932e-05, + "loss": 0.1058, + "step": 28950 + }, + { + "epoch": 1.35, + "learning_rate": 1.5620522627874533e-05, + "loss": 0.1207, + "step": 28955 + }, + { + "epoch": 1.35, + "learning_rate": 1.5619738842819747e-05, + "loss": 0.1275, + "step": 28960 + }, + { + "epoch": 1.35, + "learning_rate": 1.561895505776496e-05, + "loss": 0.2169, + "step": 28965 + }, + { + "epoch": 1.35, + "learning_rate": 1.5618171272710175e-05, + "loss": 0.1789, + "step": 28970 + }, + { + "epoch": 1.35, + "learning_rate": 1.5617387487655386e-05, + "loss": 0.2378, + "step": 28975 + }, + { + "epoch": 1.35, + "learning_rate": 1.56166037026006e-05, + "loss": 0.4098, + "step": 28980 + }, + { + "epoch": 1.35, + "learning_rate": 1.5615819917545813e-05, + "loss": 0.2499, + "step": 28985 + }, + { + "epoch": 1.35, + "learning_rate": 1.5615036132491027e-05, + "loss": 0.0793, + "step": 28990 + }, + { + "epoch": 1.35, + "learning_rate": 1.561425234743624e-05, + "loss": 0.067, + "step": 28995 + }, + { + "epoch": 1.35, + "learning_rate": 1.5613468562381452e-05, + "loss": 0.0746, + "step": 29000 + }, + { + "epoch": 1.35, + "learning_rate": 1.561268477732667e-05, + "loss": 0.2106, + "step": 29005 + }, + { + "epoch": 1.35, + "learning_rate": 1.561190099227188e-05, + "loss": 0.1199, + "step": 29010 + }, + { + "epoch": 1.35, + "learning_rate": 1.5611117207217093e-05, + "loss": 0.0969, + "step": 29015 + }, + { + "epoch": 1.35, + "learning_rate": 1.5610333422162307e-05, + "loss": 0.3277, + "step": 29020 + }, + { + "epoch": 1.35, + "learning_rate": 1.560954963710752e-05, + "loss": 0.2931, + "step": 29025 + }, + { + "epoch": 1.35, + "learning_rate": 1.5608765852052735e-05, + "loss": 0.4393, + "step": 29030 + }, + { + "epoch": 1.35, + "learning_rate": 1.560798206699795e-05, + "loss": 0.4498, + "step": 29035 + }, + { + "epoch": 1.36, + "learning_rate": 1.560719828194316e-05, + "loss": 0.0525, + "step": 29040 + }, + { + "epoch": 1.36, + "learning_rate": 1.5606414496888374e-05, + "loss": 0.0493, + "step": 29045 + }, + { + "epoch": 1.36, + "learning_rate": 1.5605630711833587e-05, + "loss": 0.0895, + "step": 29050 + }, + { + "epoch": 1.36, + "learning_rate": 1.56048469267788e-05, + "loss": 0.1352, + "step": 29055 + }, + { + "epoch": 1.36, + "learning_rate": 1.5604063141724015e-05, + "loss": 0.1055, + "step": 29060 + }, + { + "epoch": 1.36, + "learning_rate": 1.560327935666923e-05, + "loss": 0.2887, + "step": 29065 + }, + { + "epoch": 1.36, + "learning_rate": 1.5602495571614443e-05, + "loss": 0.1776, + "step": 29070 + }, + { + "epoch": 1.36, + "learning_rate": 1.5601711786559654e-05, + "loss": 0.1566, + "step": 29075 + }, + { + "epoch": 1.36, + "learning_rate": 1.560092800150487e-05, + "loss": 0.2249, + "step": 29080 + }, + { + "epoch": 1.36, + "learning_rate": 1.560014421645008e-05, + "loss": 0.4125, + "step": 29085 + }, + { + "epoch": 1.36, + "learning_rate": 1.5599360431395295e-05, + "loss": 0.1064, + "step": 29090 + }, + { + "epoch": 1.36, + "learning_rate": 1.559857664634051e-05, + "loss": 0.1015, + "step": 29095 + }, + { + "epoch": 1.36, + "learning_rate": 1.5597792861285723e-05, + "loss": 0.0806, + "step": 29100 + }, + { + "epoch": 1.36, + "learning_rate": 1.5597009076230937e-05, + "loss": 0.1206, + "step": 29105 + }, + { + "epoch": 1.36, + "learning_rate": 1.5596225291176148e-05, + "loss": 0.1109, + "step": 29110 + }, + { + "epoch": 1.36, + "learning_rate": 1.559544150612136e-05, + "loss": 0.1305, + "step": 29115 + }, + { + "epoch": 1.36, + "learning_rate": 1.5594657721066575e-05, + "loss": 0.2071, + "step": 29120 + }, + { + "epoch": 1.36, + "learning_rate": 1.559387393601179e-05, + "loss": 0.1554, + "step": 29125 + }, + { + "epoch": 1.36, + "learning_rate": 1.5593090150957003e-05, + "loss": 0.5038, + "step": 29130 + }, + { + "epoch": 1.36, + "learning_rate": 1.5592306365902217e-05, + "loss": 0.3296, + "step": 29135 + }, + { + "epoch": 1.36, + "learning_rate": 1.5591522580847428e-05, + "loss": 0.1192, + "step": 29140 + }, + { + "epoch": 1.36, + "learning_rate": 1.5590738795792645e-05, + "loss": 0.1061, + "step": 29145 + }, + { + "epoch": 1.36, + "learning_rate": 1.5589955010737855e-05, + "loss": 0.0826, + "step": 29150 + }, + { + "epoch": 1.36, + "learning_rate": 1.558917122568307e-05, + "loss": 0.1131, + "step": 29155 + }, + { + "epoch": 1.36, + "learning_rate": 1.5588387440628283e-05, + "loss": 0.1495, + "step": 29160 + }, + { + "epoch": 1.36, + "learning_rate": 1.5587603655573497e-05, + "loss": 0.2157, + "step": 29165 + }, + { + "epoch": 1.36, + "learning_rate": 1.558681987051871e-05, + "loss": 0.2206, + "step": 29170 + }, + { + "epoch": 1.36, + "learning_rate": 1.558603608546392e-05, + "loss": 0.2772, + "step": 29175 + }, + { + "epoch": 1.36, + "learning_rate": 1.558525230040914e-05, + "loss": 0.5025, + "step": 29180 + }, + { + "epoch": 1.36, + "learning_rate": 1.558446851535435e-05, + "loss": 0.3455, + "step": 29185 + }, + { + "epoch": 1.36, + "learning_rate": 1.5583684730299563e-05, + "loss": 0.0408, + "step": 29190 + }, + { + "epoch": 1.36, + "learning_rate": 1.5582900945244777e-05, + "loss": 0.0804, + "step": 29195 + }, + { + "epoch": 1.36, + "learning_rate": 1.558211716018999e-05, + "loss": 0.1028, + "step": 29200 + }, + { + "epoch": 1.36, + "learning_rate": 1.5581333375135205e-05, + "loss": 0.0895, + "step": 29205 + }, + { + "epoch": 1.36, + "learning_rate": 1.558054959008042e-05, + "loss": 0.151, + "step": 29210 + }, + { + "epoch": 1.36, + "learning_rate": 1.557976580502563e-05, + "loss": 0.1078, + "step": 29215 + }, + { + "epoch": 1.36, + "learning_rate": 1.5578982019970847e-05, + "loss": 0.287, + "step": 29220 + }, + { + "epoch": 1.36, + "learning_rate": 1.5578198234916057e-05, + "loss": 0.2512, + "step": 29225 + }, + { + "epoch": 1.36, + "learning_rate": 1.557741444986127e-05, + "loss": 0.3908, + "step": 29230 + }, + { + "epoch": 1.36, + "learning_rate": 1.5576630664806485e-05, + "loss": 0.3252, + "step": 29235 + }, + { + "epoch": 1.36, + "learning_rate": 1.5575846879751695e-05, + "loss": 0.0572, + "step": 29240 + }, + { + "epoch": 1.36, + "learning_rate": 1.5575063094696913e-05, + "loss": 0.0728, + "step": 29245 + }, + { + "epoch": 1.36, + "learning_rate": 1.5574279309642123e-05, + "loss": 0.1044, + "step": 29250 + }, + { + "epoch": 1.37, + "learning_rate": 1.5573495524587337e-05, + "loss": 0.1306, + "step": 29255 + }, + { + "epoch": 1.37, + "learning_rate": 1.557271173953255e-05, + "loss": 0.1469, + "step": 29260 + }, + { + "epoch": 1.37, + "learning_rate": 1.5571927954477765e-05, + "loss": 0.1307, + "step": 29265 + }, + { + "epoch": 1.37, + "learning_rate": 1.557114416942298e-05, + "loss": 0.2949, + "step": 29270 + }, + { + "epoch": 1.37, + "learning_rate": 1.5570360384368193e-05, + "loss": 0.1483, + "step": 29275 + }, + { + "epoch": 1.37, + "learning_rate": 1.5569576599313407e-05, + "loss": 0.3853, + "step": 29280 + }, + { + "epoch": 1.37, + "learning_rate": 1.556879281425862e-05, + "loss": 0.2308, + "step": 29285 + }, + { + "epoch": 1.37, + "learning_rate": 1.556800902920383e-05, + "loss": 0.0336, + "step": 29290 + }, + { + "epoch": 1.37, + "learning_rate": 1.556722524414905e-05, + "loss": 0.0368, + "step": 29295 + }, + { + "epoch": 1.37, + "learning_rate": 1.556644145909426e-05, + "loss": 0.0633, + "step": 29300 + }, + { + "epoch": 1.37, + "learning_rate": 1.5565657674039473e-05, + "loss": 0.1041, + "step": 29305 + }, + { + "epoch": 1.37, + "learning_rate": 1.5564873888984687e-05, + "loss": 0.2455, + "step": 29310 + }, + { + "epoch": 1.37, + "learning_rate": 1.5564090103929897e-05, + "loss": 0.128, + "step": 29315 + }, + { + "epoch": 1.37, + "learning_rate": 1.5563306318875115e-05, + "loss": 0.1876, + "step": 29320 + }, + { + "epoch": 1.37, + "learning_rate": 1.5562522533820325e-05, + "loss": 0.1958, + "step": 29325 + }, + { + "epoch": 1.37, + "learning_rate": 1.556173874876554e-05, + "loss": 0.3136, + "step": 29330 + }, + { + "epoch": 1.37, + "learning_rate": 1.5560954963710753e-05, + "loss": 0.3116, + "step": 29335 + }, + { + "epoch": 1.37, + "learning_rate": 1.5560171178655967e-05, + "loss": 0.0619, + "step": 29340 + }, + { + "epoch": 1.37, + "learning_rate": 1.555938739360118e-05, + "loss": 0.0497, + "step": 29345 + }, + { + "epoch": 1.37, + "learning_rate": 1.5558603608546395e-05, + "loss": 0.1101, + "step": 29350 + }, + { + "epoch": 1.37, + "learning_rate": 1.5557819823491605e-05, + "loss": 0.1347, + "step": 29355 + }, + { + "epoch": 1.37, + "learning_rate": 1.5557036038436822e-05, + "loss": 0.14, + "step": 29360 + }, + { + "epoch": 1.37, + "learning_rate": 1.5556252253382033e-05, + "loss": 0.2094, + "step": 29365 + }, + { + "epoch": 1.37, + "learning_rate": 1.5555468468327247e-05, + "loss": 0.2115, + "step": 29370 + }, + { + "epoch": 1.37, + "learning_rate": 1.555468468327246e-05, + "loss": 0.23, + "step": 29375 + }, + { + "epoch": 1.37, + "learning_rate": 1.5553900898217675e-05, + "loss": 0.409, + "step": 29380 + }, + { + "epoch": 1.37, + "learning_rate": 1.555311711316289e-05, + "loss": 0.2575, + "step": 29385 + }, + { + "epoch": 1.37, + "learning_rate": 1.55523333281081e-05, + "loss": 0.0701, + "step": 29390 + }, + { + "epoch": 1.37, + "learning_rate": 1.5551549543053316e-05, + "loss": 0.0587, + "step": 29395 + }, + { + "epoch": 1.37, + "learning_rate": 1.5550765757998527e-05, + "loss": 0.1614, + "step": 29400 + }, + { + "epoch": 1.37, + "learning_rate": 1.554998197294374e-05, + "loss": 0.1066, + "step": 29405 + }, + { + "epoch": 1.37, + "learning_rate": 1.5549198187888955e-05, + "loss": 0.1333, + "step": 29410 + }, + { + "epoch": 1.37, + "learning_rate": 1.554841440283417e-05, + "loss": 0.181, + "step": 29415 + }, + { + "epoch": 1.37, + "learning_rate": 1.5547630617779383e-05, + "loss": 0.183, + "step": 29420 + }, + { + "epoch": 1.37, + "learning_rate": 1.5546846832724596e-05, + "loss": 0.2203, + "step": 29425 + }, + { + "epoch": 1.37, + "learning_rate": 1.5546063047669807e-05, + "loss": 0.4626, + "step": 29430 + }, + { + "epoch": 1.37, + "learning_rate": 1.554527926261502e-05, + "loss": 0.2009, + "step": 29435 + }, + { + "epoch": 1.37, + "learning_rate": 1.5544495477560235e-05, + "loss": 0.0562, + "step": 29440 + }, + { + "epoch": 1.37, + "learning_rate": 1.554371169250545e-05, + "loss": 0.1155, + "step": 29445 + }, + { + "epoch": 1.37, + "learning_rate": 1.5542927907450663e-05, + "loss": 0.0612, + "step": 29450 + }, + { + "epoch": 1.37, + "learning_rate": 1.5542144122395873e-05, + "loss": 0.1157, + "step": 29455 + }, + { + "epoch": 1.37, + "learning_rate": 1.554136033734109e-05, + "loss": 0.1621, + "step": 29460 + }, + { + "epoch": 1.37, + "learning_rate": 1.55405765522863e-05, + "loss": 0.0789, + "step": 29465 + }, + { + "epoch": 1.38, + "learning_rate": 1.5539792767231515e-05, + "loss": 0.2351, + "step": 29470 + }, + { + "epoch": 1.38, + "learning_rate": 1.553900898217673e-05, + "loss": 0.2513, + "step": 29475 + }, + { + "epoch": 1.38, + "learning_rate": 1.5538225197121943e-05, + "loss": 0.3753, + "step": 29480 + }, + { + "epoch": 1.38, + "learning_rate": 1.5537441412067157e-05, + "loss": 0.2661, + "step": 29485 + }, + { + "epoch": 1.38, + "learning_rate": 1.553665762701237e-05, + "loss": 0.1098, + "step": 29490 + }, + { + "epoch": 1.38, + "learning_rate": 1.5535873841957584e-05, + "loss": 0.0493, + "step": 29495 + }, + { + "epoch": 1.38, + "learning_rate": 1.5535090056902795e-05, + "loss": 0.1339, + "step": 29500 + }, + { + "epoch": 1.38, + "learning_rate": 1.553430627184801e-05, + "loss": 0.1432, + "step": 29505 + }, + { + "epoch": 1.38, + "learning_rate": 1.5533522486793223e-05, + "loss": 0.107, + "step": 29510 + }, + { + "epoch": 1.38, + "learning_rate": 1.5532738701738437e-05, + "loss": 0.1993, + "step": 29515 + }, + { + "epoch": 1.38, + "learning_rate": 1.553195491668365e-05, + "loss": 0.3002, + "step": 29520 + }, + { + "epoch": 1.38, + "learning_rate": 1.5531171131628864e-05, + "loss": 0.2679, + "step": 29525 + }, + { + "epoch": 1.38, + "learning_rate": 1.5530387346574075e-05, + "loss": 0.3519, + "step": 29530 + }, + { + "epoch": 1.38, + "learning_rate": 1.5529603561519292e-05, + "loss": 0.2758, + "step": 29535 + }, + { + "epoch": 1.38, + "learning_rate": 1.5528819776464503e-05, + "loss": 0.1208, + "step": 29540 + }, + { + "epoch": 1.38, + "learning_rate": 1.5528035991409717e-05, + "loss": 0.0767, + "step": 29545 + }, + { + "epoch": 1.38, + "learning_rate": 1.552725220635493e-05, + "loss": 0.0603, + "step": 29550 + }, + { + "epoch": 1.38, + "learning_rate": 1.5526468421300144e-05, + "loss": 0.0691, + "step": 29555 + }, + { + "epoch": 1.38, + "learning_rate": 1.552568463624536e-05, + "loss": 0.1479, + "step": 29560 + }, + { + "epoch": 1.38, + "learning_rate": 1.552490085119057e-05, + "loss": 0.0891, + "step": 29565 + }, + { + "epoch": 1.38, + "learning_rate": 1.5524117066135783e-05, + "loss": 0.2104, + "step": 29570 + }, + { + "epoch": 1.38, + "learning_rate": 1.5523333281080997e-05, + "loss": 0.2748, + "step": 29575 + }, + { + "epoch": 1.38, + "learning_rate": 1.552254949602621e-05, + "loss": 0.3622, + "step": 29580 + }, + { + "epoch": 1.38, + "learning_rate": 1.5521765710971425e-05, + "loss": 0.252, + "step": 29585 + }, + { + "epoch": 1.38, + "learning_rate": 1.552098192591664e-05, + "loss": 0.079, + "step": 29590 + }, + { + "epoch": 1.38, + "learning_rate": 1.5520198140861852e-05, + "loss": 0.0913, + "step": 29595 + }, + { + "epoch": 1.38, + "learning_rate": 1.5519414355807066e-05, + "loss": 0.1742, + "step": 29600 + }, + { + "epoch": 1.38, + "learning_rate": 1.5518630570752277e-05, + "loss": 0.073, + "step": 29605 + }, + { + "epoch": 1.38, + "learning_rate": 1.5517846785697494e-05, + "loss": 0.091, + "step": 29610 + }, + { + "epoch": 1.38, + "learning_rate": 1.5517063000642705e-05, + "loss": 0.1321, + "step": 29615 + }, + { + "epoch": 1.38, + "learning_rate": 1.551627921558792e-05, + "loss": 0.1131, + "step": 29620 + }, + { + "epoch": 1.38, + "learning_rate": 1.5515495430533132e-05, + "loss": 0.3363, + "step": 29625 + }, + { + "epoch": 1.38, + "learning_rate": 1.5514711645478343e-05, + "loss": 0.3416, + "step": 29630 + }, + { + "epoch": 1.38, + "learning_rate": 1.551392786042356e-05, + "loss": 0.3418, + "step": 29635 + }, + { + "epoch": 1.38, + "learning_rate": 1.551314407536877e-05, + "loss": 0.0653, + "step": 29640 + }, + { + "epoch": 1.38, + "learning_rate": 1.5512360290313985e-05, + "loss": 0.0418, + "step": 29645 + }, + { + "epoch": 1.38, + "learning_rate": 1.55115765052592e-05, + "loss": 0.0941, + "step": 29650 + }, + { + "epoch": 1.38, + "learning_rate": 1.5510792720204412e-05, + "loss": 0.1759, + "step": 29655 + }, + { + "epoch": 1.38, + "learning_rate": 1.5510008935149626e-05, + "loss": 0.111, + "step": 29660 + }, + { + "epoch": 1.38, + "learning_rate": 1.550922515009484e-05, + "loss": 0.1351, + "step": 29665 + }, + { + "epoch": 1.38, + "learning_rate": 1.550844136504005e-05, + "loss": 0.1784, + "step": 29670 + }, + { + "epoch": 1.38, + "learning_rate": 1.5507657579985268e-05, + "loss": 0.2896, + "step": 29675 + }, + { + "epoch": 1.38, + "learning_rate": 1.550687379493048e-05, + "loss": 0.3232, + "step": 29680 + }, + { + "epoch": 1.39, + "learning_rate": 1.5506090009875692e-05, + "loss": 0.5113, + "step": 29685 + }, + { + "epoch": 1.39, + "learning_rate": 1.5505306224820906e-05, + "loss": 0.0731, + "step": 29690 + }, + { + "epoch": 1.39, + "learning_rate": 1.550452243976612e-05, + "loss": 0.0491, + "step": 29695 + }, + { + "epoch": 1.39, + "learning_rate": 1.5503738654711334e-05, + "loss": 0.1016, + "step": 29700 + }, + { + "epoch": 1.39, + "learning_rate": 1.5502954869656545e-05, + "loss": 0.0946, + "step": 29705 + }, + { + "epoch": 1.39, + "learning_rate": 1.5502171084601762e-05, + "loss": 0.1709, + "step": 29710 + }, + { + "epoch": 1.39, + "learning_rate": 1.5501387299546973e-05, + "loss": 0.2002, + "step": 29715 + }, + { + "epoch": 1.39, + "learning_rate": 1.5500603514492186e-05, + "loss": 0.2524, + "step": 29720 + }, + { + "epoch": 1.39, + "learning_rate": 1.54998197294374e-05, + "loss": 0.2481, + "step": 29725 + }, + { + "epoch": 1.39, + "learning_rate": 1.5499035944382614e-05, + "loss": 0.4631, + "step": 29730 + }, + { + "epoch": 1.39, + "learning_rate": 1.5498252159327828e-05, + "loss": 0.273, + "step": 29735 + }, + { + "epoch": 1.39, + "learning_rate": 1.5497468374273042e-05, + "loss": 0.0508, + "step": 29740 + }, + { + "epoch": 1.39, + "learning_rate": 1.5496684589218253e-05, + "loss": 0.1125, + "step": 29745 + }, + { + "epoch": 1.39, + "learning_rate": 1.549590080416347e-05, + "loss": 0.0327, + "step": 29750 + }, + { + "epoch": 1.39, + "learning_rate": 1.549511701910868e-05, + "loss": 0.1278, + "step": 29755 + }, + { + "epoch": 1.39, + "learning_rate": 1.5494333234053894e-05, + "loss": 0.0829, + "step": 29760 + }, + { + "epoch": 1.39, + "learning_rate": 1.5493549448999108e-05, + "loss": 0.1396, + "step": 29765 + }, + { + "epoch": 1.39, + "learning_rate": 1.549276566394432e-05, + "loss": 0.1894, + "step": 29770 + }, + { + "epoch": 1.39, + "learning_rate": 1.5491981878889536e-05, + "loss": 0.2046, + "step": 29775 + }, + { + "epoch": 1.39, + "learning_rate": 1.5491198093834747e-05, + "loss": 0.2948, + "step": 29780 + }, + { + "epoch": 1.39, + "learning_rate": 1.549041430877996e-05, + "loss": 0.3256, + "step": 29785 + }, + { + "epoch": 1.39, + "learning_rate": 1.5489630523725174e-05, + "loss": 0.09, + "step": 29790 + }, + { + "epoch": 1.39, + "learning_rate": 1.5488846738670388e-05, + "loss": 0.0646, + "step": 29795 + }, + { + "epoch": 1.39, + "learning_rate": 1.5488062953615602e-05, + "loss": 0.1158, + "step": 29800 + }, + { + "epoch": 1.39, + "learning_rate": 1.5487279168560816e-05, + "loss": 0.1228, + "step": 29805 + }, + { + "epoch": 1.39, + "learning_rate": 1.548649538350603e-05, + "loss": 0.1435, + "step": 29810 + }, + { + "epoch": 1.39, + "learning_rate": 1.5485711598451244e-05, + "loss": 0.1631, + "step": 29815 + }, + { + "epoch": 1.39, + "learning_rate": 1.5484927813396454e-05, + "loss": 0.0708, + "step": 29820 + }, + { + "epoch": 1.39, + "learning_rate": 1.5484144028341668e-05, + "loss": 0.4099, + "step": 29825 + }, + { + "epoch": 1.39, + "learning_rate": 1.5483360243286882e-05, + "loss": 0.4474, + "step": 29830 + }, + { + "epoch": 1.39, + "learning_rate": 1.5482576458232096e-05, + "loss": 0.3448, + "step": 29835 + }, + { + "epoch": 1.39, + "learning_rate": 1.548179267317731e-05, + "loss": 0.0606, + "step": 29840 + }, + { + "epoch": 1.39, + "learning_rate": 1.548100888812252e-05, + "loss": 0.1094, + "step": 29845 + }, + { + "epoch": 1.39, + "learning_rate": 1.5480225103067738e-05, + "loss": 0.1081, + "step": 29850 + }, + { + "epoch": 1.39, + "learning_rate": 1.547944131801295e-05, + "loss": 0.1361, + "step": 29855 + }, + { + "epoch": 1.39, + "learning_rate": 1.5478657532958162e-05, + "loss": 0.1611, + "step": 29860 + }, + { + "epoch": 1.39, + "learning_rate": 1.5477873747903376e-05, + "loss": 0.1986, + "step": 29865 + }, + { + "epoch": 1.39, + "learning_rate": 1.547708996284859e-05, + "loss": 0.1848, + "step": 29870 + }, + { + "epoch": 1.39, + "learning_rate": 1.5476306177793804e-05, + "loss": 0.3482, + "step": 29875 + }, + { + "epoch": 1.39, + "learning_rate": 1.5475522392739018e-05, + "loss": 0.4131, + "step": 29880 + }, + { + "epoch": 1.39, + "learning_rate": 1.547473860768423e-05, + "loss": 0.2946, + "step": 29885 + }, + { + "epoch": 1.39, + "learning_rate": 1.5473954822629442e-05, + "loss": 0.0356, + "step": 29890 + }, + { + "epoch": 1.39, + "learning_rate": 1.5473171037574656e-05, + "loss": 0.0547, + "step": 29895 + }, + { + "epoch": 1.4, + "learning_rate": 1.547238725251987e-05, + "loss": 0.1375, + "step": 29900 + }, + { + "epoch": 1.4, + "learning_rate": 1.5471603467465084e-05, + "loss": 0.119, + "step": 29905 + }, + { + "epoch": 1.4, + "learning_rate": 1.5470819682410298e-05, + "loss": 0.107, + "step": 29910 + }, + { + "epoch": 1.4, + "learning_rate": 1.5470035897355512e-05, + "loss": 0.1367, + "step": 29915 + }, + { + "epoch": 1.4, + "learning_rate": 1.5469252112300722e-05, + "loss": 0.147, + "step": 29920 + }, + { + "epoch": 1.4, + "learning_rate": 1.546846832724594e-05, + "loss": 0.2123, + "step": 29925 + }, + { + "epoch": 1.4, + "learning_rate": 1.546768454219115e-05, + "loss": 0.3934, + "step": 29930 + }, + { + "epoch": 1.4, + "learning_rate": 1.5466900757136364e-05, + "loss": 0.2301, + "step": 29935 + }, + { + "epoch": 1.4, + "learning_rate": 1.5466116972081578e-05, + "loss": 0.045, + "step": 29940 + }, + { + "epoch": 1.4, + "learning_rate": 1.5465333187026792e-05, + "loss": 0.092, + "step": 29945 + }, + { + "epoch": 1.4, + "learning_rate": 1.5464549401972006e-05, + "loss": 0.0908, + "step": 29950 + }, + { + "epoch": 1.4, + "learning_rate": 1.5463765616917216e-05, + "loss": 0.1098, + "step": 29955 + }, + { + "epoch": 1.4, + "learning_rate": 1.546298183186243e-05, + "loss": 0.0832, + "step": 29960 + }, + { + "epoch": 1.4, + "learning_rate": 1.5462198046807644e-05, + "loss": 0.1968, + "step": 29965 + }, + { + "epoch": 1.4, + "learning_rate": 1.5461414261752858e-05, + "loss": 0.2016, + "step": 29970 + }, + { + "epoch": 1.4, + "learning_rate": 1.5460630476698072e-05, + "loss": 0.2575, + "step": 29975 + }, + { + "epoch": 1.4, + "learning_rate": 1.5459846691643286e-05, + "loss": 0.4952, + "step": 29980 + }, + { + "epoch": 1.4, + "learning_rate": 1.5459062906588496e-05, + "loss": 0.364, + "step": 29985 + }, + { + "epoch": 1.4, + "learning_rate": 1.5458279121533714e-05, + "loss": 0.0521, + "step": 29990 + }, + { + "epoch": 1.4, + "learning_rate": 1.5457495336478924e-05, + "loss": 0.0457, + "step": 29995 + }, + { + "epoch": 1.4, + "learning_rate": 1.5456711551424138e-05, + "loss": 0.0946, + "step": 30000 + }, + { + "epoch": 1.4, + "learning_rate": 1.5455927766369352e-05, + "loss": 0.1691, + "step": 30005 + }, + { + "epoch": 1.4, + "learning_rate": 1.5455143981314566e-05, + "loss": 0.1704, + "step": 30010 + }, + { + "epoch": 1.4, + "learning_rate": 1.545436019625978e-05, + "loss": 0.1653, + "step": 30015 + }, + { + "epoch": 1.4, + "learning_rate": 1.545357641120499e-05, + "loss": 0.1592, + "step": 30020 + }, + { + "epoch": 1.4, + "learning_rate": 1.5452792626150208e-05, + "loss": 0.2251, + "step": 30025 + }, + { + "epoch": 1.4, + "learning_rate": 1.5452008841095418e-05, + "loss": 0.2223, + "step": 30030 + }, + { + "epoch": 1.4, + "learning_rate": 1.5451225056040632e-05, + "loss": 0.3359, + "step": 30035 + }, + { + "epoch": 1.4, + "learning_rate": 1.5450441270985846e-05, + "loss": 0.0442, + "step": 30040 + }, + { + "epoch": 1.4, + "learning_rate": 1.544965748593106e-05, + "loss": 0.1268, + "step": 30045 + }, + { + "epoch": 1.4, + "learning_rate": 1.5448873700876274e-05, + "loss": 0.1322, + "step": 30050 + }, + { + "epoch": 1.4, + "learning_rate": 1.5448089915821488e-05, + "loss": 0.0535, + "step": 30055 + }, + { + "epoch": 1.4, + "learning_rate": 1.5447306130766698e-05, + "loss": 0.1431, + "step": 30060 + }, + { + "epoch": 1.4, + "learning_rate": 1.5446522345711915e-05, + "loss": 0.1551, + "step": 30065 + }, + { + "epoch": 1.4, + "learning_rate": 1.5445738560657126e-05, + "loss": 0.1445, + "step": 30070 + }, + { + "epoch": 1.4, + "learning_rate": 1.544495477560234e-05, + "loss": 0.2025, + "step": 30075 + }, + { + "epoch": 1.4, + "learning_rate": 1.5444170990547554e-05, + "loss": 0.3375, + "step": 30080 + }, + { + "epoch": 1.4, + "learning_rate": 1.5443387205492764e-05, + "loss": 0.2115, + "step": 30085 + }, + { + "epoch": 1.4, + "learning_rate": 1.544260342043798e-05, + "loss": 0.0324, + "step": 30090 + }, + { + "epoch": 1.4, + "learning_rate": 1.5441819635383192e-05, + "loss": 0.0582, + "step": 30095 + }, + { + "epoch": 1.4, + "learning_rate": 1.5441035850328406e-05, + "loss": 0.0829, + "step": 30100 + }, + { + "epoch": 1.4, + "learning_rate": 1.544025206527362e-05, + "loss": 0.0776, + "step": 30105 + }, + { + "epoch": 1.4, + "learning_rate": 1.5439468280218834e-05, + "loss": 0.1564, + "step": 30110 + }, + { + "epoch": 1.41, + "learning_rate": 1.5438684495164048e-05, + "loss": 0.1717, + "step": 30115 + }, + { + "epoch": 1.41, + "learning_rate": 1.543790071010926e-05, + "loss": 0.217, + "step": 30120 + }, + { + "epoch": 1.41, + "learning_rate": 1.5437116925054476e-05, + "loss": 0.2191, + "step": 30125 + }, + { + "epoch": 1.41, + "learning_rate": 1.543633313999969e-05, + "loss": 0.3155, + "step": 30130 + }, + { + "epoch": 1.41, + "learning_rate": 1.54355493549449e-05, + "loss": 0.3595, + "step": 30135 + }, + { + "epoch": 1.41, + "learning_rate": 1.5434765569890117e-05, + "loss": 0.0571, + "step": 30140 + }, + { + "epoch": 1.41, + "learning_rate": 1.5433981784835328e-05, + "loss": 0.0568, + "step": 30145 + }, + { + "epoch": 1.41, + "learning_rate": 1.543319799978054e-05, + "loss": 0.077, + "step": 30150 + }, + { + "epoch": 1.41, + "learning_rate": 1.5432414214725756e-05, + "loss": 0.1147, + "step": 30155 + }, + { + "epoch": 1.41, + "learning_rate": 1.5431630429670966e-05, + "loss": 0.0826, + "step": 30160 + }, + { + "epoch": 1.41, + "learning_rate": 1.5430846644616183e-05, + "loss": 0.1165, + "step": 30165 + }, + { + "epoch": 1.41, + "learning_rate": 1.5430062859561394e-05, + "loss": 0.1987, + "step": 30170 + }, + { + "epoch": 1.41, + "learning_rate": 1.5429279074506608e-05, + "loss": 0.2379, + "step": 30175 + }, + { + "epoch": 1.41, + "learning_rate": 1.542849528945182e-05, + "loss": 0.5167, + "step": 30180 + }, + { + "epoch": 1.41, + "learning_rate": 1.5427711504397036e-05, + "loss": 0.2545, + "step": 30185 + }, + { + "epoch": 1.41, + "learning_rate": 1.542692771934225e-05, + "loss": 0.0899, + "step": 30190 + }, + { + "epoch": 1.41, + "learning_rate": 1.5426143934287463e-05, + "loss": 0.0671, + "step": 30195 + }, + { + "epoch": 1.41, + "learning_rate": 1.5425360149232674e-05, + "loss": 0.1145, + "step": 30200 + }, + { + "epoch": 1.41, + "learning_rate": 1.542457636417789e-05, + "loss": 0.0848, + "step": 30205 + }, + { + "epoch": 1.41, + "learning_rate": 1.5423792579123102e-05, + "loss": 0.208, + "step": 30210 + }, + { + "epoch": 1.41, + "learning_rate": 1.5423008794068316e-05, + "loss": 0.1277, + "step": 30215 + }, + { + "epoch": 1.41, + "learning_rate": 1.542222500901353e-05, + "loss": 0.182, + "step": 30220 + }, + { + "epoch": 1.41, + "learning_rate": 1.5421441223958743e-05, + "loss": 0.2234, + "step": 30225 + }, + { + "epoch": 1.41, + "learning_rate": 1.5420657438903957e-05, + "loss": 0.3056, + "step": 30230 + }, + { + "epoch": 1.41, + "learning_rate": 1.5419873653849168e-05, + "loss": 0.4144, + "step": 30235 + }, + { + "epoch": 1.41, + "learning_rate": 1.5419089868794385e-05, + "loss": 0.0343, + "step": 30240 + }, + { + "epoch": 1.41, + "learning_rate": 1.5418306083739596e-05, + "loss": 0.0805, + "step": 30245 + }, + { + "epoch": 1.41, + "learning_rate": 1.541752229868481e-05, + "loss": 0.1667, + "step": 30250 + }, + { + "epoch": 1.41, + "learning_rate": 1.5416738513630024e-05, + "loss": 0.1355, + "step": 30255 + }, + { + "epoch": 1.41, + "learning_rate": 1.5415954728575237e-05, + "loss": 0.1269, + "step": 30260 + }, + { + "epoch": 1.41, + "learning_rate": 1.541517094352045e-05, + "loss": 0.246, + "step": 30265 + }, + { + "epoch": 1.41, + "learning_rate": 1.5414387158465665e-05, + "loss": 0.1213, + "step": 30270 + }, + { + "epoch": 1.41, + "learning_rate": 1.5413603373410876e-05, + "loss": 0.169, + "step": 30275 + }, + { + "epoch": 1.41, + "learning_rate": 1.541281958835609e-05, + "loss": 0.3212, + "step": 30280 + }, + { + "epoch": 1.41, + "learning_rate": 1.5412035803301304e-05, + "loss": 0.1963, + "step": 30285 + }, + { + "epoch": 1.41, + "learning_rate": 1.5411252018246517e-05, + "loss": 0.0683, + "step": 30290 + }, + { + "epoch": 1.41, + "learning_rate": 1.541046823319173e-05, + "loss": 0.0797, + "step": 30295 + }, + { + "epoch": 1.41, + "learning_rate": 1.5409684448136942e-05, + "loss": 0.0972, + "step": 30300 + }, + { + "epoch": 1.41, + "learning_rate": 1.540890066308216e-05, + "loss": 0.1827, + "step": 30305 + }, + { + "epoch": 1.41, + "learning_rate": 1.540811687802737e-05, + "loss": 0.0909, + "step": 30310 + }, + { + "epoch": 1.41, + "learning_rate": 1.5407333092972584e-05, + "loss": 0.1743, + "step": 30315 + }, + { + "epoch": 1.41, + "learning_rate": 1.5406549307917798e-05, + "loss": 0.1385, + "step": 30320 + }, + { + "epoch": 1.42, + "learning_rate": 1.540576552286301e-05, + "loss": 0.2003, + "step": 30325 + }, + { + "epoch": 1.42, + "learning_rate": 1.5404981737808225e-05, + "loss": 0.4004, + "step": 30330 + }, + { + "epoch": 1.42, + "learning_rate": 1.540419795275344e-05, + "loss": 0.2143, + "step": 30335 + }, + { + "epoch": 1.42, + "learning_rate": 1.5403414167698653e-05, + "loss": 0.0175, + "step": 30340 + }, + { + "epoch": 1.42, + "learning_rate": 1.5402630382643864e-05, + "loss": 0.1199, + "step": 30345 + }, + { + "epoch": 1.42, + "learning_rate": 1.5401846597589078e-05, + "loss": 0.0549, + "step": 30350 + }, + { + "epoch": 1.42, + "learning_rate": 1.540106281253429e-05, + "loss": 0.1376, + "step": 30355 + }, + { + "epoch": 1.42, + "learning_rate": 1.5400279027479505e-05, + "loss": 0.1743, + "step": 30360 + }, + { + "epoch": 1.42, + "learning_rate": 1.539949524242472e-05, + "loss": 0.2547, + "step": 30365 + }, + { + "epoch": 1.42, + "learning_rate": 1.5398711457369933e-05, + "loss": 0.1769, + "step": 30370 + }, + { + "epoch": 1.42, + "learning_rate": 1.5397927672315144e-05, + "loss": 0.3749, + "step": 30375 + }, + { + "epoch": 1.42, + "learning_rate": 1.539714388726036e-05, + "loss": 0.5307, + "step": 30380 + }, + { + "epoch": 1.42, + "learning_rate": 1.539636010220557e-05, + "loss": 0.3359, + "step": 30385 + }, + { + "epoch": 1.42, + "learning_rate": 1.5395576317150785e-05, + "loss": 0.1245, + "step": 30390 + }, + { + "epoch": 1.42, + "learning_rate": 1.5394792532096e-05, + "loss": 0.0377, + "step": 30395 + }, + { + "epoch": 1.42, + "learning_rate": 1.5394008747041213e-05, + "loss": 0.1, + "step": 30400 + }, + { + "epoch": 1.42, + "learning_rate": 1.5393224961986427e-05, + "loss": 0.146, + "step": 30405 + }, + { + "epoch": 1.42, + "learning_rate": 1.5392441176931638e-05, + "loss": 0.1585, + "step": 30410 + }, + { + "epoch": 1.42, + "learning_rate": 1.539165739187685e-05, + "loss": 0.2002, + "step": 30415 + }, + { + "epoch": 1.42, + "learning_rate": 1.5390873606822065e-05, + "loss": 0.2221, + "step": 30420 + }, + { + "epoch": 1.42, + "learning_rate": 1.539008982176728e-05, + "loss": 0.2473, + "step": 30425 + }, + { + "epoch": 1.42, + "learning_rate": 1.5389306036712493e-05, + "loss": 0.3531, + "step": 30430 + }, + { + "epoch": 1.42, + "learning_rate": 1.5388522251657707e-05, + "loss": 0.3376, + "step": 30435 + }, + { + "epoch": 1.42, + "learning_rate": 1.538773846660292e-05, + "loss": 0.1212, + "step": 30440 + }, + { + "epoch": 1.42, + "learning_rate": 1.5386954681548135e-05, + "loss": 0.0599, + "step": 30445 + }, + { + "epoch": 1.42, + "learning_rate": 1.5386170896493345e-05, + "loss": 0.0923, + "step": 30450 + }, + { + "epoch": 1.42, + "learning_rate": 1.5385387111438563e-05, + "loss": 0.0752, + "step": 30455 + }, + { + "epoch": 1.42, + "learning_rate": 1.5384603326383773e-05, + "loss": 0.074, + "step": 30460 + }, + { + "epoch": 1.42, + "learning_rate": 1.5383819541328987e-05, + "loss": 0.0945, + "step": 30465 + }, + { + "epoch": 1.42, + "learning_rate": 1.53830357562742e-05, + "loss": 0.2006, + "step": 30470 + }, + { + "epoch": 1.42, + "learning_rate": 1.538225197121941e-05, + "loss": 0.2685, + "step": 30475 + }, + { + "epoch": 1.42, + "learning_rate": 1.538146818616463e-05, + "loss": 0.2831, + "step": 30480 + }, + { + "epoch": 1.42, + "learning_rate": 1.538068440110984e-05, + "loss": 0.1803, + "step": 30485 + }, + { + "epoch": 1.42, + "learning_rate": 1.5379900616055053e-05, + "loss": 0.0256, + "step": 30490 + }, + { + "epoch": 1.42, + "learning_rate": 1.5379116831000267e-05, + "loss": 0.0892, + "step": 30495 + }, + { + "epoch": 1.42, + "learning_rate": 1.537833304594548e-05, + "loss": 0.1554, + "step": 30500 + }, + { + "epoch": 1.42, + "learning_rate": 1.5377549260890695e-05, + "loss": 0.1182, + "step": 30505 + }, + { + "epoch": 1.42, + "learning_rate": 1.537676547583591e-05, + "loss": 0.1168, + "step": 30510 + }, + { + "epoch": 1.42, + "learning_rate": 1.537598169078112e-05, + "loss": 0.1539, + "step": 30515 + }, + { + "epoch": 1.42, + "learning_rate": 1.5375197905726337e-05, + "loss": 0.229, + "step": 30520 + }, + { + "epoch": 1.42, + "learning_rate": 1.5374414120671547e-05, + "loss": 0.2612, + "step": 30525 + }, + { + "epoch": 1.42, + "learning_rate": 1.537363033561676e-05, + "loss": 0.4199, + "step": 30530 + }, + { + "epoch": 1.42, + "learning_rate": 1.5372846550561975e-05, + "loss": 0.3006, + "step": 30535 + }, + { + "epoch": 1.43, + "learning_rate": 1.537206276550719e-05, + "loss": 0.0404, + "step": 30540 + }, + { + "epoch": 1.43, + "learning_rate": 1.5371278980452403e-05, + "loss": 0.0872, + "step": 30545 + }, + { + "epoch": 1.43, + "learning_rate": 1.5370495195397613e-05, + "loss": 0.1091, + "step": 30550 + }, + { + "epoch": 1.43, + "learning_rate": 1.536971141034283e-05, + "loss": 0.0654, + "step": 30555 + }, + { + "epoch": 1.43, + "learning_rate": 1.536892762528804e-05, + "loss": 0.1402, + "step": 30560 + }, + { + "epoch": 1.43, + "learning_rate": 1.5368143840233255e-05, + "loss": 0.1676, + "step": 30565 + }, + { + "epoch": 1.43, + "learning_rate": 1.536736005517847e-05, + "loss": 0.1912, + "step": 30570 + }, + { + "epoch": 1.43, + "learning_rate": 1.5366576270123683e-05, + "loss": 0.3351, + "step": 30575 + }, + { + "epoch": 1.43, + "learning_rate": 1.5365792485068897e-05, + "loss": 0.367, + "step": 30580 + }, + { + "epoch": 1.43, + "learning_rate": 1.536500870001411e-05, + "loss": 0.2385, + "step": 30585 + }, + { + "epoch": 1.43, + "learning_rate": 1.536422491495932e-05, + "loss": 0.0754, + "step": 30590 + }, + { + "epoch": 1.43, + "learning_rate": 1.536344112990454e-05, + "loss": 0.0664, + "step": 30595 + }, + { + "epoch": 1.43, + "learning_rate": 1.536265734484975e-05, + "loss": 0.1467, + "step": 30600 + }, + { + "epoch": 1.43, + "learning_rate": 1.5361873559794963e-05, + "loss": 0.0865, + "step": 30605 + }, + { + "epoch": 1.43, + "learning_rate": 1.5361089774740177e-05, + "loss": 0.1208, + "step": 30610 + }, + { + "epoch": 1.43, + "learning_rate": 1.5360305989685387e-05, + "loss": 0.1351, + "step": 30615 + }, + { + "epoch": 1.43, + "learning_rate": 1.5359522204630605e-05, + "loss": 0.1539, + "step": 30620 + }, + { + "epoch": 1.43, + "learning_rate": 1.5358738419575815e-05, + "loss": 0.2201, + "step": 30625 + }, + { + "epoch": 1.43, + "learning_rate": 1.535795463452103e-05, + "loss": 0.3883, + "step": 30630 + }, + { + "epoch": 1.43, + "learning_rate": 1.5357170849466243e-05, + "loss": 0.2628, + "step": 30635 + }, + { + "epoch": 1.43, + "learning_rate": 1.5356387064411457e-05, + "loss": 0.0461, + "step": 30640 + }, + { + "epoch": 1.43, + "learning_rate": 1.535560327935667e-05, + "loss": 0.0915, + "step": 30645 + }, + { + "epoch": 1.43, + "learning_rate": 1.5354819494301885e-05, + "loss": 0.0698, + "step": 30650 + }, + { + "epoch": 1.43, + "learning_rate": 1.53540357092471e-05, + "loss": 0.1448, + "step": 30655 + }, + { + "epoch": 1.43, + "learning_rate": 1.5353251924192313e-05, + "loss": 0.1386, + "step": 30660 + }, + { + "epoch": 1.43, + "learning_rate": 1.5352468139137523e-05, + "loss": 0.169, + "step": 30665 + }, + { + "epoch": 1.43, + "learning_rate": 1.5351684354082737e-05, + "loss": 0.1716, + "step": 30670 + }, + { + "epoch": 1.43, + "learning_rate": 1.535090056902795e-05, + "loss": 0.1982, + "step": 30675 + }, + { + "epoch": 1.43, + "learning_rate": 1.5350116783973165e-05, + "loss": 0.3828, + "step": 30680 + }, + { + "epoch": 1.43, + "learning_rate": 1.534933299891838e-05, + "loss": 0.2975, + "step": 30685 + }, + { + "epoch": 1.43, + "learning_rate": 1.534854921386359e-05, + "loss": 0.0999, + "step": 30690 + }, + { + "epoch": 1.43, + "learning_rate": 1.5347765428808807e-05, + "loss": 0.0746, + "step": 30695 + }, + { + "epoch": 1.43, + "learning_rate": 1.5346981643754017e-05, + "loss": 0.0579, + "step": 30700 + }, + { + "epoch": 1.43, + "learning_rate": 1.534619785869923e-05, + "loss": 0.1278, + "step": 30705 + }, + { + "epoch": 1.43, + "learning_rate": 1.5345414073644445e-05, + "loss": 0.1335, + "step": 30710 + }, + { + "epoch": 1.43, + "learning_rate": 1.534463028858966e-05, + "loss": 0.1177, + "step": 30715 + }, + { + "epoch": 1.43, + "learning_rate": 1.5343846503534873e-05, + "loss": 0.1861, + "step": 30720 + }, + { + "epoch": 1.43, + "learning_rate": 1.5343062718480087e-05, + "loss": 0.2215, + "step": 30725 + }, + { + "epoch": 1.43, + "learning_rate": 1.5342278933425297e-05, + "loss": 0.3913, + "step": 30730 + }, + { + "epoch": 1.43, + "learning_rate": 1.5341495148370514e-05, + "loss": 0.332, + "step": 30735 + }, + { + "epoch": 1.43, + "learning_rate": 1.5340711363315725e-05, + "loss": 0.0479, + "step": 30740 + }, + { + "epoch": 1.43, + "learning_rate": 1.533992757826094e-05, + "loss": 0.0902, + "step": 30745 + }, + { + "epoch": 1.43, + "learning_rate": 1.5339143793206153e-05, + "loss": 0.0383, + "step": 30750 + }, + { + "epoch": 1.44, + "learning_rate": 1.5338360008151367e-05, + "loss": 0.0873, + "step": 30755 + }, + { + "epoch": 1.44, + "learning_rate": 1.533757622309658e-05, + "loss": 0.058, + "step": 30760 + }, + { + "epoch": 1.44, + "learning_rate": 1.533679243804179e-05, + "loss": 0.1621, + "step": 30765 + }, + { + "epoch": 1.44, + "learning_rate": 1.533600865298701e-05, + "loss": 0.1878, + "step": 30770 + }, + { + "epoch": 1.44, + "learning_rate": 1.533522486793222e-05, + "loss": 0.3271, + "step": 30775 + }, + { + "epoch": 1.44, + "learning_rate": 1.5334441082877433e-05, + "loss": 0.2911, + "step": 30780 + }, + { + "epoch": 1.44, + "learning_rate": 1.5333657297822647e-05, + "loss": 0.2349, + "step": 30785 + }, + { + "epoch": 1.44, + "learning_rate": 1.533287351276786e-05, + "loss": 0.1201, + "step": 30790 + }, + { + "epoch": 1.44, + "learning_rate": 1.5332089727713075e-05, + "loss": 0.0789, + "step": 30795 + }, + { + "epoch": 1.44, + "learning_rate": 1.533130594265829e-05, + "loss": 0.0823, + "step": 30800 + }, + { + "epoch": 1.44, + "learning_rate": 1.53305221576035e-05, + "loss": 0.1153, + "step": 30805 + }, + { + "epoch": 1.44, + "learning_rate": 1.5329738372548713e-05, + "loss": 0.1054, + "step": 30810 + }, + { + "epoch": 1.44, + "learning_rate": 1.5328954587493927e-05, + "loss": 0.1376, + "step": 30815 + }, + { + "epoch": 1.44, + "learning_rate": 1.532817080243914e-05, + "loss": 0.1513, + "step": 30820 + }, + { + "epoch": 1.44, + "learning_rate": 1.5327387017384355e-05, + "loss": 0.2324, + "step": 30825 + }, + { + "epoch": 1.44, + "learning_rate": 1.5326603232329565e-05, + "loss": 0.3203, + "step": 30830 + }, + { + "epoch": 1.44, + "learning_rate": 1.5325819447274782e-05, + "loss": 0.4077, + "step": 30835 + }, + { + "epoch": 1.44, + "learning_rate": 1.5325035662219993e-05, + "loss": 0.0783, + "step": 30840 + }, + { + "epoch": 1.44, + "learning_rate": 1.5324251877165207e-05, + "loss": 0.0441, + "step": 30845 + }, + { + "epoch": 1.44, + "learning_rate": 1.532346809211042e-05, + "loss": 0.0616, + "step": 30850 + }, + { + "epoch": 1.44, + "learning_rate": 1.5322684307055635e-05, + "loss": 0.1068, + "step": 30855 + }, + { + "epoch": 1.44, + "learning_rate": 1.532190052200085e-05, + "loss": 0.1137, + "step": 30860 + }, + { + "epoch": 1.44, + "learning_rate": 1.5321116736946062e-05, + "loss": 0.2083, + "step": 30865 + }, + { + "epoch": 1.44, + "learning_rate": 1.5320332951891276e-05, + "loss": 0.1234, + "step": 30870 + }, + { + "epoch": 1.44, + "learning_rate": 1.5319549166836487e-05, + "loss": 0.2183, + "step": 30875 + }, + { + "epoch": 1.44, + "learning_rate": 1.53187653817817e-05, + "loss": 0.3248, + "step": 30880 + }, + { + "epoch": 1.44, + "learning_rate": 1.5317981596726915e-05, + "loss": 0.2879, + "step": 30885 + }, + { + "epoch": 1.44, + "learning_rate": 1.531719781167213e-05, + "loss": 0.0633, + "step": 30890 + }, + { + "epoch": 1.44, + "learning_rate": 1.5316414026617342e-05, + "loss": 0.0492, + "step": 30895 + }, + { + "epoch": 1.44, + "learning_rate": 1.5315630241562556e-05, + "loss": 0.0931, + "step": 30900 + }, + { + "epoch": 1.44, + "learning_rate": 1.5314846456507767e-05, + "loss": 0.0612, + "step": 30905 + }, + { + "epoch": 1.44, + "learning_rate": 1.5314062671452984e-05, + "loss": 0.0813, + "step": 30910 + }, + { + "epoch": 1.44, + "learning_rate": 1.5313278886398195e-05, + "loss": 0.1622, + "step": 30915 + }, + { + "epoch": 1.44, + "learning_rate": 1.531249510134341e-05, + "loss": 0.1378, + "step": 30920 + }, + { + "epoch": 1.44, + "learning_rate": 1.5311711316288623e-05, + "loss": 0.1609, + "step": 30925 + }, + { + "epoch": 1.44, + "learning_rate": 1.5310927531233836e-05, + "loss": 0.5023, + "step": 30930 + }, + { + "epoch": 1.44, + "learning_rate": 1.531014374617905e-05, + "loss": 0.3593, + "step": 30935 + }, + { + "epoch": 1.44, + "learning_rate": 1.530935996112426e-05, + "loss": 0.1545, + "step": 30940 + }, + { + "epoch": 1.44, + "learning_rate": 1.5308576176069475e-05, + "loss": 0.0696, + "step": 30945 + }, + { + "epoch": 1.44, + "learning_rate": 1.530779239101469e-05, + "loss": 0.0694, + "step": 30950 + }, + { + "epoch": 1.44, + "learning_rate": 1.5307008605959903e-05, + "loss": 0.1546, + "step": 30955 + }, + { + "epoch": 1.44, + "learning_rate": 1.5306224820905116e-05, + "loss": 0.1018, + "step": 30960 + }, + { + "epoch": 1.44, + "learning_rate": 1.530544103585033e-05, + "loss": 0.1363, + "step": 30965 + }, + { + "epoch": 1.45, + "learning_rate": 1.5304657250795544e-05, + "loss": 0.3256, + "step": 30970 + }, + { + "epoch": 1.45, + "learning_rate": 1.5303873465740758e-05, + "loss": 0.3606, + "step": 30975 + }, + { + "epoch": 1.45, + "learning_rate": 1.530308968068597e-05, + "loss": 0.331, + "step": 30980 + }, + { + "epoch": 1.45, + "learning_rate": 1.5302305895631186e-05, + "loss": 0.2471, + "step": 30985 + }, + { + "epoch": 1.45, + "learning_rate": 1.5301522110576397e-05, + "loss": 0.042, + "step": 30990 + }, + { + "epoch": 1.45, + "learning_rate": 1.530073832552161e-05, + "loss": 0.0757, + "step": 30995 + }, + { + "epoch": 1.45, + "learning_rate": 1.5299954540466824e-05, + "loss": 0.0695, + "step": 31000 + }, + { + "epoch": 1.45, + "learning_rate": 1.5299170755412035e-05, + "loss": 0.1101, + "step": 31005 + }, + { + "epoch": 1.45, + "learning_rate": 1.5298386970357252e-05, + "loss": 0.1784, + "step": 31010 + }, + { + "epoch": 1.45, + "learning_rate": 1.5297603185302463e-05, + "loss": 0.1233, + "step": 31015 + }, + { + "epoch": 1.45, + "learning_rate": 1.5296819400247677e-05, + "loss": 0.2507, + "step": 31020 + }, + { + "epoch": 1.45, + "learning_rate": 1.529603561519289e-05, + "loss": 0.248, + "step": 31025 + }, + { + "epoch": 1.45, + "learning_rate": 1.5295251830138104e-05, + "loss": 0.4157, + "step": 31030 + }, + { + "epoch": 1.45, + "learning_rate": 1.5294468045083318e-05, + "loss": 0.2699, + "step": 31035 + }, + { + "epoch": 1.45, + "learning_rate": 1.5293684260028532e-05, + "loss": 0.1, + "step": 31040 + }, + { + "epoch": 1.45, + "learning_rate": 1.5292900474973743e-05, + "loss": 0.0886, + "step": 31045 + }, + { + "epoch": 1.45, + "learning_rate": 1.529211668991896e-05, + "loss": 0.0945, + "step": 31050 + }, + { + "epoch": 1.45, + "learning_rate": 1.529133290486417e-05, + "loss": 0.1045, + "step": 31055 + }, + { + "epoch": 1.45, + "learning_rate": 1.5290549119809384e-05, + "loss": 0.1411, + "step": 31060 + }, + { + "epoch": 1.45, + "learning_rate": 1.52897653347546e-05, + "loss": 0.167, + "step": 31065 + }, + { + "epoch": 1.45, + "learning_rate": 1.5288981549699812e-05, + "loss": 0.2069, + "step": 31070 + }, + { + "epoch": 1.45, + "learning_rate": 1.5288197764645026e-05, + "loss": 0.1847, + "step": 31075 + }, + { + "epoch": 1.45, + "learning_rate": 1.5287413979590237e-05, + "loss": 0.2041, + "step": 31080 + }, + { + "epoch": 1.45, + "learning_rate": 1.5286630194535454e-05, + "loss": 0.2017, + "step": 31085 + }, + { + "epoch": 1.45, + "learning_rate": 1.5285846409480664e-05, + "loss": 0.0352, + "step": 31090 + }, + { + "epoch": 1.45, + "learning_rate": 1.528506262442588e-05, + "loss": 0.0676, + "step": 31095 + }, + { + "epoch": 1.45, + "learning_rate": 1.5284278839371092e-05, + "loss": 0.1651, + "step": 31100 + }, + { + "epoch": 1.45, + "learning_rate": 1.5283495054316306e-05, + "loss": 0.095, + "step": 31105 + }, + { + "epoch": 1.45, + "learning_rate": 1.528271126926152e-05, + "loss": 0.1385, + "step": 31110 + }, + { + "epoch": 1.45, + "learning_rate": 1.5281927484206734e-05, + "loss": 0.16, + "step": 31115 + }, + { + "epoch": 1.45, + "learning_rate": 1.5281143699151944e-05, + "loss": 0.2099, + "step": 31120 + }, + { + "epoch": 1.45, + "learning_rate": 1.5280359914097162e-05, + "loss": 0.3167, + "step": 31125 + }, + { + "epoch": 1.45, + "learning_rate": 1.5279576129042372e-05, + "loss": 0.2805, + "step": 31130 + }, + { + "epoch": 1.45, + "learning_rate": 1.5278792343987586e-05, + "loss": 0.2409, + "step": 31135 + }, + { + "epoch": 1.45, + "learning_rate": 1.52780085589328e-05, + "loss": 0.0747, + "step": 31140 + }, + { + "epoch": 1.45, + "learning_rate": 1.527722477387801e-05, + "loss": 0.0464, + "step": 31145 + }, + { + "epoch": 1.45, + "learning_rate": 1.5276440988823228e-05, + "loss": 0.0867, + "step": 31150 + }, + { + "epoch": 1.45, + "learning_rate": 1.527565720376844e-05, + "loss": 0.1238, + "step": 31155 + }, + { + "epoch": 1.45, + "learning_rate": 1.5274873418713652e-05, + "loss": 0.1296, + "step": 31160 + }, + { + "epoch": 1.45, + "learning_rate": 1.5274089633658866e-05, + "loss": 0.1621, + "step": 31165 + }, + { + "epoch": 1.45, + "learning_rate": 1.527330584860408e-05, + "loss": 0.1919, + "step": 31170 + }, + { + "epoch": 1.45, + "learning_rate": 1.5272522063549294e-05, + "loss": 0.3081, + "step": 31175 + }, + { + "epoch": 1.45, + "learning_rate": 1.5271738278494508e-05, + "loss": 0.3428, + "step": 31180 + }, + { + "epoch": 1.46, + "learning_rate": 1.5270954493439722e-05, + "loss": 0.273, + "step": 31185 + }, + { + "epoch": 1.46, + "learning_rate": 1.5270170708384936e-05, + "loss": 0.1131, + "step": 31190 + }, + { + "epoch": 1.46, + "learning_rate": 1.5269386923330146e-05, + "loss": 0.0283, + "step": 31195 + }, + { + "epoch": 1.46, + "learning_rate": 1.526860313827536e-05, + "loss": 0.0582, + "step": 31200 + }, + { + "epoch": 1.46, + "learning_rate": 1.5267819353220574e-05, + "loss": 0.0585, + "step": 31205 + }, + { + "epoch": 1.46, + "learning_rate": 1.5267035568165788e-05, + "loss": 0.0735, + "step": 31210 + }, + { + "epoch": 1.46, + "learning_rate": 1.5266251783111002e-05, + "loss": 0.272, + "step": 31215 + }, + { + "epoch": 1.46, + "learning_rate": 1.5265467998056212e-05, + "loss": 0.1506, + "step": 31220 + }, + { + "epoch": 1.46, + "learning_rate": 1.526468421300143e-05, + "loss": 0.2312, + "step": 31225 + }, + { + "epoch": 1.46, + "learning_rate": 1.526390042794664e-05, + "loss": 0.2993, + "step": 31230 + }, + { + "epoch": 1.46, + "learning_rate": 1.5263116642891854e-05, + "loss": 0.3461, + "step": 31235 + }, + { + "epoch": 1.46, + "learning_rate": 1.5262332857837068e-05, + "loss": 0.0388, + "step": 31240 + }, + { + "epoch": 1.46, + "learning_rate": 1.5261549072782282e-05, + "loss": 0.0356, + "step": 31245 + }, + { + "epoch": 1.46, + "learning_rate": 1.5260765287727496e-05, + "loss": 0.1398, + "step": 31250 + }, + { + "epoch": 1.46, + "learning_rate": 1.525998150267271e-05, + "loss": 0.0815, + "step": 31255 + }, + { + "epoch": 1.46, + "learning_rate": 1.525919771761792e-05, + "loss": 0.1347, + "step": 31260 + }, + { + "epoch": 1.46, + "learning_rate": 1.5258413932563134e-05, + "loss": 0.1489, + "step": 31265 + }, + { + "epoch": 1.46, + "learning_rate": 1.5257630147508348e-05, + "loss": 0.1687, + "step": 31270 + }, + { + "epoch": 1.46, + "learning_rate": 1.5256846362453562e-05, + "loss": 0.1338, + "step": 31275 + }, + { + "epoch": 1.46, + "learning_rate": 1.5256062577398776e-05, + "loss": 0.3636, + "step": 31280 + }, + { + "epoch": 1.46, + "learning_rate": 1.5255278792343988e-05, + "loss": 0.2142, + "step": 31285 + }, + { + "epoch": 1.46, + "learning_rate": 1.5254495007289202e-05, + "loss": 0.0323, + "step": 31290 + }, + { + "epoch": 1.46, + "learning_rate": 1.5253711222234416e-05, + "loss": 0.0875, + "step": 31295 + }, + { + "epoch": 1.46, + "learning_rate": 1.525292743717963e-05, + "loss": 0.0987, + "step": 31300 + }, + { + "epoch": 1.46, + "learning_rate": 1.5252143652124842e-05, + "loss": 0.1468, + "step": 31305 + }, + { + "epoch": 1.46, + "learning_rate": 1.5251359867070058e-05, + "loss": 0.0654, + "step": 31310 + }, + { + "epoch": 1.46, + "learning_rate": 1.525057608201527e-05, + "loss": 0.1286, + "step": 31315 + }, + { + "epoch": 1.46, + "learning_rate": 1.5249792296960484e-05, + "loss": 0.1423, + "step": 31320 + }, + { + "epoch": 1.46, + "learning_rate": 1.5249008511905696e-05, + "loss": 0.2072, + "step": 31325 + }, + { + "epoch": 1.46, + "learning_rate": 1.5248224726850908e-05, + "loss": 0.3649, + "step": 31330 + }, + { + "epoch": 1.46, + "learning_rate": 1.5247440941796124e-05, + "loss": 0.366, + "step": 31335 + }, + { + "epoch": 1.46, + "learning_rate": 1.5246657156741336e-05, + "loss": 0.0274, + "step": 31340 + }, + { + "epoch": 1.46, + "learning_rate": 1.524587337168655e-05, + "loss": 0.0756, + "step": 31345 + }, + { + "epoch": 1.46, + "learning_rate": 1.5245089586631762e-05, + "loss": 0.1292, + "step": 31350 + }, + { + "epoch": 1.46, + "learning_rate": 1.5244305801576978e-05, + "loss": 0.1784, + "step": 31355 + }, + { + "epoch": 1.46, + "learning_rate": 1.524352201652219e-05, + "loss": 0.1282, + "step": 31360 + }, + { + "epoch": 1.46, + "learning_rate": 1.5242738231467404e-05, + "loss": 0.2259, + "step": 31365 + }, + { + "epoch": 1.46, + "learning_rate": 1.5241954446412616e-05, + "loss": 0.2356, + "step": 31370 + }, + { + "epoch": 1.46, + "learning_rate": 1.5241170661357832e-05, + "loss": 0.2162, + "step": 31375 + }, + { + "epoch": 1.46, + "learning_rate": 1.5240386876303044e-05, + "loss": 0.2779, + "step": 31380 + }, + { + "epoch": 1.46, + "learning_rate": 1.5239603091248258e-05, + "loss": 0.3447, + "step": 31385 + }, + { + "epoch": 1.46, + "learning_rate": 1.523881930619347e-05, + "loss": 0.0535, + "step": 31390 + }, + { + "epoch": 1.46, + "learning_rate": 1.5238035521138684e-05, + "loss": 0.1216, + "step": 31395 + }, + { + "epoch": 1.47, + "learning_rate": 1.5237251736083898e-05, + "loss": 0.0795, + "step": 31400 + }, + { + "epoch": 1.47, + "learning_rate": 1.523646795102911e-05, + "loss": 0.1059, + "step": 31405 + }, + { + "epoch": 1.47, + "learning_rate": 1.5235684165974326e-05, + "loss": 0.079, + "step": 31410 + }, + { + "epoch": 1.47, + "learning_rate": 1.5234900380919538e-05, + "loss": 0.1216, + "step": 31415 + }, + { + "epoch": 1.47, + "learning_rate": 1.5234116595864752e-05, + "loss": 0.1635, + "step": 31420 + }, + { + "epoch": 1.47, + "learning_rate": 1.5233332810809964e-05, + "loss": 0.1558, + "step": 31425 + }, + { + "epoch": 1.47, + "learning_rate": 1.523254902575518e-05, + "loss": 0.405, + "step": 31430 + }, + { + "epoch": 1.47, + "learning_rate": 1.5231765240700392e-05, + "loss": 0.4419, + "step": 31435 + }, + { + "epoch": 1.47, + "learning_rate": 1.5230981455645606e-05, + "loss": 0.0325, + "step": 31440 + }, + { + "epoch": 1.47, + "learning_rate": 1.5230197670590818e-05, + "loss": 0.1278, + "step": 31445 + }, + { + "epoch": 1.47, + "learning_rate": 1.5229413885536033e-05, + "loss": 0.1036, + "step": 31450 + }, + { + "epoch": 1.47, + "learning_rate": 1.5228630100481246e-05, + "loss": 0.1107, + "step": 31455 + }, + { + "epoch": 1.47, + "learning_rate": 1.5227846315426458e-05, + "loss": 0.0893, + "step": 31460 + }, + { + "epoch": 1.47, + "learning_rate": 1.5227062530371672e-05, + "loss": 0.1751, + "step": 31465 + }, + { + "epoch": 1.47, + "learning_rate": 1.5226278745316884e-05, + "loss": 0.1661, + "step": 31470 + }, + { + "epoch": 1.47, + "learning_rate": 1.52254949602621e-05, + "loss": 0.1933, + "step": 31475 + }, + { + "epoch": 1.47, + "learning_rate": 1.5224711175207312e-05, + "loss": 0.2263, + "step": 31480 + }, + { + "epoch": 1.47, + "learning_rate": 1.5223927390152526e-05, + "loss": 0.3486, + "step": 31485 + }, + { + "epoch": 1.47, + "learning_rate": 1.522314360509774e-05, + "loss": 0.0391, + "step": 31490 + }, + { + "epoch": 1.47, + "learning_rate": 1.5222359820042954e-05, + "loss": 0.0702, + "step": 31495 + }, + { + "epoch": 1.47, + "learning_rate": 1.5221576034988166e-05, + "loss": 0.0595, + "step": 31500 + }, + { + "epoch": 1.47, + "learning_rate": 1.522079224993338e-05, + "loss": 0.0184, + "step": 31505 + }, + { + "epoch": 1.47, + "learning_rate": 1.5220008464878594e-05, + "loss": 0.1383, + "step": 31510 + }, + { + "epoch": 1.47, + "learning_rate": 1.5219224679823807e-05, + "loss": 0.1519, + "step": 31515 + }, + { + "epoch": 1.47, + "learning_rate": 1.521844089476902e-05, + "loss": 0.1777, + "step": 31520 + }, + { + "epoch": 1.47, + "learning_rate": 1.5217657109714232e-05, + "loss": 0.2411, + "step": 31525 + }, + { + "epoch": 1.47, + "learning_rate": 1.5216873324659448e-05, + "loss": 0.3028, + "step": 31530 + }, + { + "epoch": 1.47, + "learning_rate": 1.521608953960466e-05, + "loss": 0.379, + "step": 31535 + }, + { + "epoch": 1.47, + "learning_rate": 1.5215305754549874e-05, + "loss": 0.0478, + "step": 31540 + }, + { + "epoch": 1.47, + "learning_rate": 1.5214521969495086e-05, + "loss": 0.0623, + "step": 31545 + }, + { + "epoch": 1.47, + "learning_rate": 1.5213738184440301e-05, + "loss": 0.0523, + "step": 31550 + }, + { + "epoch": 1.47, + "learning_rate": 1.5212954399385514e-05, + "loss": 0.1509, + "step": 31555 + }, + { + "epoch": 1.47, + "learning_rate": 1.5212170614330728e-05, + "loss": 0.1149, + "step": 31560 + }, + { + "epoch": 1.47, + "learning_rate": 1.521138682927594e-05, + "loss": 0.1829, + "step": 31565 + }, + { + "epoch": 1.47, + "learning_rate": 1.5210603044221155e-05, + "loss": 0.1609, + "step": 31570 + }, + { + "epoch": 1.47, + "learning_rate": 1.5209819259166368e-05, + "loss": 0.2295, + "step": 31575 + }, + { + "epoch": 1.47, + "learning_rate": 1.5209035474111581e-05, + "loss": 0.486, + "step": 31580 + }, + { + "epoch": 1.47, + "learning_rate": 1.5208251689056794e-05, + "loss": 0.2702, + "step": 31585 + }, + { + "epoch": 1.47, + "learning_rate": 1.5207467904002008e-05, + "loss": 0.0594, + "step": 31590 + }, + { + "epoch": 1.47, + "learning_rate": 1.5206684118947222e-05, + "loss": 0.1273, + "step": 31595 + }, + { + "epoch": 1.47, + "learning_rate": 1.5205900333892434e-05, + "loss": 0.0883, + "step": 31600 + }, + { + "epoch": 1.47, + "learning_rate": 1.5205116548837648e-05, + "loss": 0.0647, + "step": 31605 + }, + { + "epoch": 1.47, + "learning_rate": 1.5204332763782862e-05, + "loss": 0.0872, + "step": 31610 + }, + { + "epoch": 1.48, + "learning_rate": 1.5203548978728075e-05, + "loss": 0.1148, + "step": 31615 + }, + { + "epoch": 1.48, + "learning_rate": 1.5202765193673288e-05, + "loss": 0.1226, + "step": 31620 + }, + { + "epoch": 1.48, + "learning_rate": 1.5201981408618503e-05, + "loss": 0.2385, + "step": 31625 + }, + { + "epoch": 1.48, + "learning_rate": 1.5201197623563715e-05, + "loss": 0.3862, + "step": 31630 + }, + { + "epoch": 1.48, + "learning_rate": 1.520041383850893e-05, + "loss": 0.3206, + "step": 31635 + }, + { + "epoch": 1.48, + "learning_rate": 1.5199630053454142e-05, + "loss": 0.0743, + "step": 31640 + }, + { + "epoch": 1.48, + "learning_rate": 1.5198846268399357e-05, + "loss": 0.0545, + "step": 31645 + }, + { + "epoch": 1.48, + "learning_rate": 1.519806248334457e-05, + "loss": 0.0978, + "step": 31650 + }, + { + "epoch": 1.48, + "learning_rate": 1.5197278698289782e-05, + "loss": 0.0675, + "step": 31655 + }, + { + "epoch": 1.48, + "learning_rate": 1.5196494913234995e-05, + "loss": 0.1666, + "step": 31660 + }, + { + "epoch": 1.48, + "learning_rate": 1.5195711128180208e-05, + "loss": 0.2834, + "step": 31665 + }, + { + "epoch": 1.48, + "learning_rate": 1.5194927343125423e-05, + "loss": 0.1719, + "step": 31670 + }, + { + "epoch": 1.48, + "learning_rate": 1.5194143558070636e-05, + "loss": 0.2468, + "step": 31675 + }, + { + "epoch": 1.48, + "learning_rate": 1.519335977301585e-05, + "loss": 0.3821, + "step": 31680 + }, + { + "epoch": 1.48, + "learning_rate": 1.5192575987961062e-05, + "loss": 0.3466, + "step": 31685 + }, + { + "epoch": 1.48, + "learning_rate": 1.5191792202906277e-05, + "loss": 0.0438, + "step": 31690 + }, + { + "epoch": 1.48, + "learning_rate": 1.519100841785149e-05, + "loss": 0.066, + "step": 31695 + }, + { + "epoch": 1.48, + "learning_rate": 1.5190224632796703e-05, + "loss": 0.083, + "step": 31700 + }, + { + "epoch": 1.48, + "learning_rate": 1.5189440847741916e-05, + "loss": 0.0671, + "step": 31705 + }, + { + "epoch": 1.48, + "learning_rate": 1.5188657062687131e-05, + "loss": 0.1106, + "step": 31710 + }, + { + "epoch": 1.48, + "learning_rate": 1.5187873277632343e-05, + "loss": 0.1968, + "step": 31715 + }, + { + "epoch": 1.48, + "learning_rate": 1.5187089492577556e-05, + "loss": 0.209, + "step": 31720 + }, + { + "epoch": 1.48, + "learning_rate": 1.5186305707522771e-05, + "loss": 0.1886, + "step": 31725 + }, + { + "epoch": 1.48, + "learning_rate": 1.5185521922467983e-05, + "loss": 0.3375, + "step": 31730 + }, + { + "epoch": 1.48, + "learning_rate": 1.5184738137413197e-05, + "loss": 0.2246, + "step": 31735 + }, + { + "epoch": 1.48, + "learning_rate": 1.518395435235841e-05, + "loss": 0.0597, + "step": 31740 + }, + { + "epoch": 1.48, + "learning_rate": 1.5183170567303625e-05, + "loss": 0.0915, + "step": 31745 + }, + { + "epoch": 1.48, + "learning_rate": 1.5182386782248837e-05, + "loss": 0.1168, + "step": 31750 + }, + { + "epoch": 1.48, + "learning_rate": 1.5181602997194051e-05, + "loss": 0.0906, + "step": 31755 + }, + { + "epoch": 1.48, + "learning_rate": 1.5180819212139263e-05, + "loss": 0.0936, + "step": 31760 + }, + { + "epoch": 1.48, + "learning_rate": 1.5180035427084479e-05, + "loss": 0.1309, + "step": 31765 + }, + { + "epoch": 1.48, + "learning_rate": 1.5179251642029691e-05, + "loss": 0.149, + "step": 31770 + }, + { + "epoch": 1.48, + "learning_rate": 1.5178467856974905e-05, + "loss": 0.2683, + "step": 31775 + }, + { + "epoch": 1.48, + "learning_rate": 1.5177684071920117e-05, + "loss": 0.4699, + "step": 31780 + }, + { + "epoch": 1.48, + "learning_rate": 1.517690028686533e-05, + "loss": 0.2512, + "step": 31785 + }, + { + "epoch": 1.48, + "learning_rate": 1.5176116501810545e-05, + "loss": 0.059, + "step": 31790 + }, + { + "epoch": 1.48, + "learning_rate": 1.5175332716755757e-05, + "loss": 0.0595, + "step": 31795 + }, + { + "epoch": 1.48, + "learning_rate": 1.5174548931700971e-05, + "loss": 0.0786, + "step": 31800 + }, + { + "epoch": 1.48, + "learning_rate": 1.5173765146646185e-05, + "loss": 0.1279, + "step": 31805 + }, + { + "epoch": 1.48, + "learning_rate": 1.5172981361591399e-05, + "loss": 0.2461, + "step": 31810 + }, + { + "epoch": 1.48, + "learning_rate": 1.5172197576536611e-05, + "loss": 0.1358, + "step": 31815 + }, + { + "epoch": 1.48, + "learning_rate": 1.5171413791481825e-05, + "loss": 0.1626, + "step": 31820 + }, + { + "epoch": 1.48, + "learning_rate": 1.5170630006427039e-05, + "loss": 0.1351, + "step": 31825 + }, + { + "epoch": 1.49, + "learning_rate": 1.5169846221372253e-05, + "loss": 0.3434, + "step": 31830 + }, + { + "epoch": 1.49, + "learning_rate": 1.5169062436317465e-05, + "loss": 0.2988, + "step": 31835 + }, + { + "epoch": 1.49, + "learning_rate": 1.5168278651262681e-05, + "loss": 0.0382, + "step": 31840 + }, + { + "epoch": 1.49, + "learning_rate": 1.5167494866207893e-05, + "loss": 0.0455, + "step": 31845 + }, + { + "epoch": 1.49, + "learning_rate": 1.5166711081153105e-05, + "loss": 0.0726, + "step": 31850 + }, + { + "epoch": 1.49, + "learning_rate": 1.516592729609832e-05, + "loss": 0.0954, + "step": 31855 + }, + { + "epoch": 1.49, + "learning_rate": 1.5165143511043531e-05, + "loss": 0.0936, + "step": 31860 + }, + { + "epoch": 1.49, + "learning_rate": 1.5164359725988747e-05, + "loss": 0.1024, + "step": 31865 + }, + { + "epoch": 1.49, + "learning_rate": 1.516357594093396e-05, + "loss": 0.1589, + "step": 31870 + }, + { + "epoch": 1.49, + "learning_rate": 1.5162792155879173e-05, + "loss": 0.2689, + "step": 31875 + }, + { + "epoch": 1.49, + "learning_rate": 1.5162008370824385e-05, + "loss": 0.3105, + "step": 31880 + }, + { + "epoch": 1.49, + "learning_rate": 1.5161224585769601e-05, + "loss": 0.295, + "step": 31885 + }, + { + "epoch": 1.49, + "learning_rate": 1.5160440800714813e-05, + "loss": 0.0471, + "step": 31890 + }, + { + "epoch": 1.49, + "learning_rate": 1.5159657015660027e-05, + "loss": 0.0563, + "step": 31895 + }, + { + "epoch": 1.49, + "learning_rate": 1.515887323060524e-05, + "loss": 0.0521, + "step": 31900 + }, + { + "epoch": 1.49, + "learning_rate": 1.5158089445550455e-05, + "loss": 0.1391, + "step": 31905 + }, + { + "epoch": 1.49, + "learning_rate": 1.5157305660495667e-05, + "loss": 0.0926, + "step": 31910 + }, + { + "epoch": 1.49, + "learning_rate": 1.515652187544088e-05, + "loss": 0.1678, + "step": 31915 + }, + { + "epoch": 1.49, + "learning_rate": 1.5155738090386093e-05, + "loss": 0.1874, + "step": 31920 + }, + { + "epoch": 1.49, + "learning_rate": 1.5154954305331307e-05, + "loss": 0.1741, + "step": 31925 + }, + { + "epoch": 1.49, + "learning_rate": 1.5154170520276521e-05, + "loss": 0.362, + "step": 31930 + }, + { + "epoch": 1.49, + "learning_rate": 1.5153386735221733e-05, + "loss": 0.3195, + "step": 31935 + }, + { + "epoch": 1.49, + "learning_rate": 1.5152602950166949e-05, + "loss": 0.0607, + "step": 31940 + }, + { + "epoch": 1.49, + "learning_rate": 1.5151819165112161e-05, + "loss": 0.0734, + "step": 31945 + }, + { + "epoch": 1.49, + "learning_rate": 1.5151035380057375e-05, + "loss": 0.096, + "step": 31950 + }, + { + "epoch": 1.49, + "learning_rate": 1.5150251595002587e-05, + "loss": 0.1064, + "step": 31955 + }, + { + "epoch": 1.49, + "learning_rate": 1.5149467809947803e-05, + "loss": 0.1528, + "step": 31960 + }, + { + "epoch": 1.49, + "learning_rate": 1.5148684024893015e-05, + "loss": 0.2397, + "step": 31965 + }, + { + "epoch": 1.49, + "learning_rate": 1.5147900239838229e-05, + "loss": 0.2079, + "step": 31970 + }, + { + "epoch": 1.49, + "learning_rate": 1.5147116454783441e-05, + "loss": 0.2747, + "step": 31975 + }, + { + "epoch": 1.49, + "learning_rate": 1.5146332669728653e-05, + "loss": 0.4375, + "step": 31980 + }, + { + "epoch": 1.49, + "learning_rate": 1.5145548884673869e-05, + "loss": 0.348, + "step": 31985 + }, + { + "epoch": 1.49, + "learning_rate": 1.5144765099619081e-05, + "loss": 0.0462, + "step": 31990 + }, + { + "epoch": 1.49, + "learning_rate": 1.5143981314564295e-05, + "loss": 0.078, + "step": 31995 + }, + { + "epoch": 1.49, + "learning_rate": 1.5143197529509507e-05, + "loss": 0.0692, + "step": 32000 + }, + { + "epoch": 1.49, + "learning_rate": 1.5142413744454723e-05, + "loss": 0.0825, + "step": 32005 + }, + { + "epoch": 1.49, + "learning_rate": 1.5141629959399935e-05, + "loss": 0.138, + "step": 32010 + }, + { + "epoch": 1.49, + "learning_rate": 1.5140846174345149e-05, + "loss": 0.1539, + "step": 32015 + }, + { + "epoch": 1.49, + "learning_rate": 1.5140062389290361e-05, + "loss": 0.2308, + "step": 32020 + }, + { + "epoch": 1.49, + "learning_rate": 1.5139278604235577e-05, + "loss": 0.2213, + "step": 32025 + }, + { + "epoch": 1.49, + "learning_rate": 1.5138494819180789e-05, + "loss": 0.2618, + "step": 32030 + }, + { + "epoch": 1.49, + "learning_rate": 1.5137711034126003e-05, + "loss": 0.3362, + "step": 32035 + }, + { + "epoch": 1.5, + "learning_rate": 1.5136927249071217e-05, + "loss": 0.0699, + "step": 32040 + }, + { + "epoch": 1.5, + "learning_rate": 1.5136143464016429e-05, + "loss": 0.0228, + "step": 32045 + }, + { + "epoch": 1.5, + "learning_rate": 1.5135359678961643e-05, + "loss": 0.046, + "step": 32050 + }, + { + "epoch": 1.5, + "learning_rate": 1.5134575893906855e-05, + "loss": 0.0907, + "step": 32055 + }, + { + "epoch": 1.5, + "learning_rate": 1.513379210885207e-05, + "loss": 0.1112, + "step": 32060 + }, + { + "epoch": 1.5, + "learning_rate": 1.5133008323797283e-05, + "loss": 0.1013, + "step": 32065 + }, + { + "epoch": 1.5, + "learning_rate": 1.5132224538742497e-05, + "loss": 0.1556, + "step": 32070 + }, + { + "epoch": 1.5, + "learning_rate": 1.5131440753687709e-05, + "loss": 0.24, + "step": 32075 + }, + { + "epoch": 1.5, + "learning_rate": 1.5130656968632925e-05, + "loss": 0.5122, + "step": 32080 + }, + { + "epoch": 1.5, + "learning_rate": 1.5129873183578137e-05, + "loss": 0.2973, + "step": 32085 + }, + { + "epoch": 1.5, + "learning_rate": 1.512908939852335e-05, + "loss": 0.0492, + "step": 32090 + }, + { + "epoch": 1.5, + "learning_rate": 1.5128305613468563e-05, + "loss": 0.0999, + "step": 32095 + }, + { + "epoch": 1.5, + "learning_rate": 1.5127521828413779e-05, + "loss": 0.0568, + "step": 32100 + }, + { + "epoch": 1.5, + "learning_rate": 1.512673804335899e-05, + "loss": 0.0834, + "step": 32105 + }, + { + "epoch": 1.5, + "learning_rate": 1.5125954258304203e-05, + "loss": 0.0824, + "step": 32110 + }, + { + "epoch": 1.5, + "learning_rate": 1.5125170473249417e-05, + "loss": 0.2455, + "step": 32115 + }, + { + "epoch": 1.5, + "learning_rate": 1.512438668819463e-05, + "loss": 0.1645, + "step": 32120 + }, + { + "epoch": 1.5, + "learning_rate": 1.5123602903139845e-05, + "loss": 0.2583, + "step": 32125 + }, + { + "epoch": 1.5, + "learning_rate": 1.5122819118085057e-05, + "loss": 0.3227, + "step": 32130 + }, + { + "epoch": 1.5, + "learning_rate": 1.512203533303027e-05, + "loss": 0.3561, + "step": 32135 + }, + { + "epoch": 1.5, + "learning_rate": 1.5121251547975485e-05, + "loss": 0.0648, + "step": 32140 + }, + { + "epoch": 1.5, + "learning_rate": 1.5120467762920699e-05, + "loss": 0.016, + "step": 32145 + }, + { + "epoch": 1.5, + "learning_rate": 1.511968397786591e-05, + "loss": 0.1238, + "step": 32150 + }, + { + "epoch": 1.5, + "learning_rate": 1.5118900192811126e-05, + "loss": 0.0738, + "step": 32155 + }, + { + "epoch": 1.5, + "learning_rate": 1.5118116407756339e-05, + "loss": 0.113, + "step": 32160 + }, + { + "epoch": 1.5, + "learning_rate": 1.5117332622701553e-05, + "loss": 0.0999, + "step": 32165 + }, + { + "epoch": 1.5, + "learning_rate": 1.5116548837646765e-05, + "loss": 0.2245, + "step": 32170 + }, + { + "epoch": 1.5, + "learning_rate": 1.5115765052591977e-05, + "loss": 0.3073, + "step": 32175 + }, + { + "epoch": 1.5, + "learning_rate": 1.5114981267537193e-05, + "loss": 0.2557, + "step": 32180 + }, + { + "epoch": 1.5, + "learning_rate": 1.5114197482482405e-05, + "loss": 0.2607, + "step": 32185 + }, + { + "epoch": 1.5, + "learning_rate": 1.5113413697427619e-05, + "loss": 0.064, + "step": 32190 + }, + { + "epoch": 1.5, + "learning_rate": 1.5112629912372831e-05, + "loss": 0.076, + "step": 32195 + }, + { + "epoch": 1.5, + "learning_rate": 1.5111846127318046e-05, + "loss": 0.1157, + "step": 32200 + }, + { + "epoch": 1.5, + "learning_rate": 1.5111062342263259e-05, + "loss": 0.075, + "step": 32205 + }, + { + "epoch": 1.5, + "learning_rate": 1.5110278557208473e-05, + "loss": 0.1556, + "step": 32210 + }, + { + "epoch": 1.5, + "learning_rate": 1.5109494772153685e-05, + "loss": 0.1983, + "step": 32215 + }, + { + "epoch": 1.5, + "learning_rate": 1.51087109870989e-05, + "loss": 0.1951, + "step": 32220 + }, + { + "epoch": 1.5, + "learning_rate": 1.5107927202044113e-05, + "loss": 0.2283, + "step": 32225 + }, + { + "epoch": 1.5, + "learning_rate": 1.5107143416989327e-05, + "loss": 0.3268, + "step": 32230 + }, + { + "epoch": 1.5, + "learning_rate": 1.5106359631934539e-05, + "loss": 0.2575, + "step": 32235 + }, + { + "epoch": 1.5, + "learning_rate": 1.5105575846879753e-05, + "loss": 0.046, + "step": 32240 + }, + { + "epoch": 1.5, + "learning_rate": 1.5104792061824967e-05, + "loss": 0.0554, + "step": 32245 + }, + { + "epoch": 1.5, + "learning_rate": 1.5104008276770179e-05, + "loss": 0.0833, + "step": 32250 + }, + { + "epoch": 1.51, + "learning_rate": 1.5103224491715394e-05, + "loss": 0.0969, + "step": 32255 + }, + { + "epoch": 1.51, + "learning_rate": 1.5102440706660607e-05, + "loss": 0.1418, + "step": 32260 + }, + { + "epoch": 1.51, + "learning_rate": 1.510165692160582e-05, + "loss": 0.1293, + "step": 32265 + }, + { + "epoch": 1.51, + "learning_rate": 1.5100873136551033e-05, + "loss": 0.0968, + "step": 32270 + }, + { + "epoch": 1.51, + "learning_rate": 1.5100089351496248e-05, + "loss": 0.3261, + "step": 32275 + }, + { + "epoch": 1.51, + "learning_rate": 1.509930556644146e-05, + "loss": 0.283, + "step": 32280 + }, + { + "epoch": 1.51, + "learning_rate": 1.5098521781386674e-05, + "loss": 0.3071, + "step": 32285 + }, + { + "epoch": 1.51, + "learning_rate": 1.5097737996331887e-05, + "loss": 0.072, + "step": 32290 + }, + { + "epoch": 1.51, + "learning_rate": 1.5096954211277102e-05, + "loss": 0.1006, + "step": 32295 + }, + { + "epoch": 1.51, + "learning_rate": 1.5096170426222314e-05, + "loss": 0.1003, + "step": 32300 + }, + { + "epoch": 1.51, + "learning_rate": 1.5095386641167527e-05, + "loss": 0.1151, + "step": 32305 + }, + { + "epoch": 1.51, + "learning_rate": 1.509460285611274e-05, + "loss": 0.1184, + "step": 32310 + }, + { + "epoch": 1.51, + "learning_rate": 1.5093819071057953e-05, + "loss": 0.1667, + "step": 32315 + }, + { + "epoch": 1.51, + "learning_rate": 1.5093035286003168e-05, + "loss": 0.1405, + "step": 32320 + }, + { + "epoch": 1.51, + "learning_rate": 1.509225150094838e-05, + "loss": 0.2462, + "step": 32325 + }, + { + "epoch": 1.51, + "learning_rate": 1.5091467715893594e-05, + "loss": 0.2843, + "step": 32330 + }, + { + "epoch": 1.51, + "learning_rate": 1.5090683930838808e-05, + "loss": 0.2082, + "step": 32335 + }, + { + "epoch": 1.51, + "learning_rate": 1.5089900145784022e-05, + "loss": 0.0373, + "step": 32340 + }, + { + "epoch": 1.51, + "learning_rate": 1.5089116360729235e-05, + "loss": 0.0384, + "step": 32345 + }, + { + "epoch": 1.51, + "learning_rate": 1.5088332575674448e-05, + "loss": 0.0444, + "step": 32350 + }, + { + "epoch": 1.51, + "learning_rate": 1.5087548790619662e-05, + "loss": 0.0799, + "step": 32355 + }, + { + "epoch": 1.51, + "learning_rate": 1.5086765005564876e-05, + "loss": 0.1469, + "step": 32360 + }, + { + "epoch": 1.51, + "learning_rate": 1.5085981220510088e-05, + "loss": 0.1759, + "step": 32365 + }, + { + "epoch": 1.51, + "learning_rate": 1.50851974354553e-05, + "loss": 0.1966, + "step": 32370 + }, + { + "epoch": 1.51, + "learning_rate": 1.5084413650400516e-05, + "loss": 0.2456, + "step": 32375 + }, + { + "epoch": 1.51, + "learning_rate": 1.5083629865345728e-05, + "loss": 0.3569, + "step": 32380 + }, + { + "epoch": 1.51, + "learning_rate": 1.5082846080290942e-05, + "loss": 0.2519, + "step": 32385 + }, + { + "epoch": 1.51, + "learning_rate": 1.5082062295236155e-05, + "loss": 0.0463, + "step": 32390 + }, + { + "epoch": 1.51, + "learning_rate": 1.508127851018137e-05, + "loss": 0.0796, + "step": 32395 + }, + { + "epoch": 1.51, + "learning_rate": 1.5080494725126582e-05, + "loss": 0.0918, + "step": 32400 + }, + { + "epoch": 1.51, + "learning_rate": 1.5079710940071796e-05, + "loss": 0.0762, + "step": 32405 + }, + { + "epoch": 1.51, + "learning_rate": 1.5078927155017009e-05, + "loss": 0.0586, + "step": 32410 + }, + { + "epoch": 1.51, + "learning_rate": 1.5078143369962224e-05, + "loss": 0.1551, + "step": 32415 + }, + { + "epoch": 1.51, + "learning_rate": 1.5077359584907436e-05, + "loss": 0.2989, + "step": 32420 + }, + { + "epoch": 1.51, + "learning_rate": 1.507657579985265e-05, + "loss": 0.2065, + "step": 32425 + }, + { + "epoch": 1.51, + "learning_rate": 1.5075792014797862e-05, + "loss": 0.3864, + "step": 32430 + }, + { + "epoch": 1.51, + "learning_rate": 1.5075008229743076e-05, + "loss": 0.2009, + "step": 32435 + }, + { + "epoch": 1.51, + "learning_rate": 1.507422444468829e-05, + "loss": 0.0476, + "step": 32440 + }, + { + "epoch": 1.51, + "learning_rate": 1.5073440659633502e-05, + "loss": 0.1002, + "step": 32445 + }, + { + "epoch": 1.51, + "learning_rate": 1.5072656874578716e-05, + "loss": 0.1062, + "step": 32450 + }, + { + "epoch": 1.51, + "learning_rate": 1.507187308952393e-05, + "loss": 0.0847, + "step": 32455 + }, + { + "epoch": 1.51, + "learning_rate": 1.5071089304469144e-05, + "loss": 0.0809, + "step": 32460 + }, + { + "epoch": 1.51, + "learning_rate": 1.5070305519414356e-05, + "loss": 0.1638, + "step": 32465 + }, + { + "epoch": 1.52, + "learning_rate": 1.5069521734359572e-05, + "loss": 0.2693, + "step": 32470 + }, + { + "epoch": 1.52, + "learning_rate": 1.5068737949304784e-05, + "loss": 0.2582, + "step": 32475 + }, + { + "epoch": 1.52, + "learning_rate": 1.5067954164249998e-05, + "loss": 0.2734, + "step": 32480 + }, + { + "epoch": 1.52, + "learning_rate": 1.506717037919521e-05, + "loss": 0.2822, + "step": 32485 + }, + { + "epoch": 1.52, + "learning_rate": 1.5066386594140426e-05, + "loss": 0.0517, + "step": 32490 + }, + { + "epoch": 1.52, + "learning_rate": 1.5065602809085638e-05, + "loss": 0.0486, + "step": 32495 + }, + { + "epoch": 1.52, + "learning_rate": 1.506481902403085e-05, + "loss": 0.0478, + "step": 32500 + }, + { + "epoch": 1.52, + "learning_rate": 1.5064035238976064e-05, + "loss": 0.1418, + "step": 32505 + }, + { + "epoch": 1.52, + "learning_rate": 1.5063251453921276e-05, + "loss": 0.1221, + "step": 32510 + }, + { + "epoch": 1.52, + "learning_rate": 1.5062467668866492e-05, + "loss": 0.168, + "step": 32515 + }, + { + "epoch": 1.52, + "learning_rate": 1.5061683883811704e-05, + "loss": 0.196, + "step": 32520 + }, + { + "epoch": 1.52, + "learning_rate": 1.5060900098756918e-05, + "loss": 0.2721, + "step": 32525 + }, + { + "epoch": 1.52, + "learning_rate": 1.506011631370213e-05, + "loss": 0.3576, + "step": 32530 + }, + { + "epoch": 1.52, + "learning_rate": 1.5059332528647346e-05, + "loss": 0.3454, + "step": 32535 + }, + { + "epoch": 1.52, + "learning_rate": 1.5058548743592558e-05, + "loss": 0.0558, + "step": 32540 + }, + { + "epoch": 1.52, + "learning_rate": 1.5057764958537772e-05, + "loss": 0.0572, + "step": 32545 + }, + { + "epoch": 1.52, + "learning_rate": 1.5056981173482984e-05, + "loss": 0.0613, + "step": 32550 + }, + { + "epoch": 1.52, + "learning_rate": 1.50561973884282e-05, + "loss": 0.1226, + "step": 32555 + }, + { + "epoch": 1.52, + "learning_rate": 1.5055413603373412e-05, + "loss": 0.1035, + "step": 32560 + }, + { + "epoch": 1.52, + "learning_rate": 1.5054629818318624e-05, + "loss": 0.0922, + "step": 32565 + }, + { + "epoch": 1.52, + "learning_rate": 1.505384603326384e-05, + "loss": 0.2358, + "step": 32570 + }, + { + "epoch": 1.52, + "learning_rate": 1.5053062248209052e-05, + "loss": 0.1497, + "step": 32575 + }, + { + "epoch": 1.52, + "learning_rate": 1.5052278463154266e-05, + "loss": 0.3272, + "step": 32580 + }, + { + "epoch": 1.52, + "learning_rate": 1.5051494678099478e-05, + "loss": 0.3985, + "step": 32585 + }, + { + "epoch": 1.52, + "learning_rate": 1.5050710893044694e-05, + "loss": 0.044, + "step": 32590 + }, + { + "epoch": 1.52, + "learning_rate": 1.5049927107989906e-05, + "loss": 0.1202, + "step": 32595 + }, + { + "epoch": 1.52, + "learning_rate": 1.504914332293512e-05, + "loss": 0.1429, + "step": 32600 + }, + { + "epoch": 1.52, + "learning_rate": 1.5048359537880332e-05, + "loss": 0.0826, + "step": 32605 + }, + { + "epoch": 1.52, + "learning_rate": 1.5047575752825548e-05, + "loss": 0.1068, + "step": 32610 + }, + { + "epoch": 1.52, + "learning_rate": 1.504679196777076e-05, + "loss": 0.2536, + "step": 32615 + }, + { + "epoch": 1.52, + "learning_rate": 1.5046008182715974e-05, + "loss": 0.2464, + "step": 32620 + }, + { + "epoch": 1.52, + "learning_rate": 1.5045224397661186e-05, + "loss": 0.4023, + "step": 32625 + }, + { + "epoch": 1.52, + "learning_rate": 1.5044440612606398e-05, + "loss": 0.4528, + "step": 32630 + }, + { + "epoch": 1.52, + "learning_rate": 1.5043656827551614e-05, + "loss": 0.2417, + "step": 32635 + }, + { + "epoch": 1.52, + "learning_rate": 1.5042873042496826e-05, + "loss": 0.0292, + "step": 32640 + }, + { + "epoch": 1.52, + "learning_rate": 1.504208925744204e-05, + "loss": 0.0988, + "step": 32645 + }, + { + "epoch": 1.52, + "learning_rate": 1.5041305472387254e-05, + "loss": 0.067, + "step": 32650 + }, + { + "epoch": 1.52, + "learning_rate": 1.5040521687332468e-05, + "loss": 0.1081, + "step": 32655 + }, + { + "epoch": 1.52, + "learning_rate": 1.503973790227768e-05, + "loss": 0.1431, + "step": 32660 + }, + { + "epoch": 1.52, + "learning_rate": 1.5038954117222894e-05, + "loss": 0.1415, + "step": 32665 + }, + { + "epoch": 1.52, + "learning_rate": 1.5038170332168108e-05, + "loss": 0.1489, + "step": 32670 + }, + { + "epoch": 1.52, + "learning_rate": 1.5037386547113322e-05, + "loss": 0.187, + "step": 32675 + }, + { + "epoch": 1.52, + "learning_rate": 1.5036602762058534e-05, + "loss": 0.3941, + "step": 32680 + }, + { + "epoch": 1.53, + "learning_rate": 1.503581897700375e-05, + "loss": 0.267, + "step": 32685 + }, + { + "epoch": 1.53, + "learning_rate": 1.5035035191948962e-05, + "loss": 0.0578, + "step": 32690 + }, + { + "epoch": 1.53, + "learning_rate": 1.5034251406894174e-05, + "loss": 0.0391, + "step": 32695 + }, + { + "epoch": 1.53, + "learning_rate": 1.5033467621839388e-05, + "loss": 0.0686, + "step": 32700 + }, + { + "epoch": 1.53, + "learning_rate": 1.50326838367846e-05, + "loss": 0.1086, + "step": 32705 + }, + { + "epoch": 1.53, + "learning_rate": 1.5031900051729816e-05, + "loss": 0.1141, + "step": 32710 + }, + { + "epoch": 1.53, + "learning_rate": 1.5031116266675028e-05, + "loss": 0.1051, + "step": 32715 + }, + { + "epoch": 1.53, + "learning_rate": 1.5030332481620242e-05, + "loss": 0.2131, + "step": 32720 + }, + { + "epoch": 1.53, + "learning_rate": 1.5029548696565454e-05, + "loss": 0.2887, + "step": 32725 + }, + { + "epoch": 1.53, + "learning_rate": 1.502876491151067e-05, + "loss": 0.4061, + "step": 32730 + }, + { + "epoch": 1.53, + "learning_rate": 1.5027981126455882e-05, + "loss": 0.3098, + "step": 32735 + }, + { + "epoch": 1.53, + "learning_rate": 1.5027197341401096e-05, + "loss": 0.0542, + "step": 32740 + }, + { + "epoch": 1.53, + "learning_rate": 1.5026413556346308e-05, + "loss": 0.0951, + "step": 32745 + }, + { + "epoch": 1.53, + "learning_rate": 1.5025629771291524e-05, + "loss": 0.0702, + "step": 32750 + }, + { + "epoch": 1.53, + "learning_rate": 1.5024845986236736e-05, + "loss": 0.0914, + "step": 32755 + }, + { + "epoch": 1.53, + "learning_rate": 1.5024062201181948e-05, + "loss": 0.1323, + "step": 32760 + }, + { + "epoch": 1.53, + "learning_rate": 1.5023278416127162e-05, + "loss": 0.1518, + "step": 32765 + }, + { + "epoch": 1.53, + "learning_rate": 1.5022494631072376e-05, + "loss": 0.2011, + "step": 32770 + }, + { + "epoch": 1.53, + "learning_rate": 1.502171084601759e-05, + "loss": 0.1865, + "step": 32775 + }, + { + "epoch": 1.53, + "learning_rate": 1.5020927060962802e-05, + "loss": 0.328, + "step": 32780 + }, + { + "epoch": 1.53, + "learning_rate": 1.5020143275908018e-05, + "loss": 0.3728, + "step": 32785 + }, + { + "epoch": 1.53, + "learning_rate": 1.501935949085323e-05, + "loss": 0.0544, + "step": 32790 + }, + { + "epoch": 1.53, + "learning_rate": 1.5018575705798444e-05, + "loss": 0.0543, + "step": 32795 + }, + { + "epoch": 1.53, + "learning_rate": 1.5017791920743656e-05, + "loss": 0.0897, + "step": 32800 + }, + { + "epoch": 1.53, + "learning_rate": 1.5017008135688871e-05, + "loss": 0.1329, + "step": 32805 + }, + { + "epoch": 1.53, + "learning_rate": 1.5016224350634084e-05, + "loss": 0.0871, + "step": 32810 + }, + { + "epoch": 1.53, + "learning_rate": 1.5015440565579298e-05, + "loss": 0.1539, + "step": 32815 + }, + { + "epoch": 1.53, + "learning_rate": 1.501465678052451e-05, + "loss": 0.2018, + "step": 32820 + }, + { + "epoch": 1.53, + "learning_rate": 1.5013872995469722e-05, + "loss": 0.1788, + "step": 32825 + }, + { + "epoch": 1.53, + "learning_rate": 1.5013089210414938e-05, + "loss": 0.4134, + "step": 32830 + }, + { + "epoch": 1.53, + "learning_rate": 1.501230542536015e-05, + "loss": 0.3125, + "step": 32835 + }, + { + "epoch": 1.53, + "learning_rate": 1.5011521640305364e-05, + "loss": 0.0423, + "step": 32840 + }, + { + "epoch": 1.53, + "learning_rate": 1.5010737855250576e-05, + "loss": 0.0694, + "step": 32845 + }, + { + "epoch": 1.53, + "learning_rate": 1.5009954070195792e-05, + "loss": 0.0747, + "step": 32850 + }, + { + "epoch": 1.53, + "learning_rate": 1.5009170285141004e-05, + "loss": 0.1646, + "step": 32855 + }, + { + "epoch": 1.53, + "learning_rate": 1.5008386500086218e-05, + "loss": 0.1413, + "step": 32860 + }, + { + "epoch": 1.53, + "learning_rate": 1.500760271503143e-05, + "loss": 0.1363, + "step": 32865 + }, + { + "epoch": 1.53, + "learning_rate": 1.5006818929976645e-05, + "loss": 0.1389, + "step": 32870 + }, + { + "epoch": 1.53, + "learning_rate": 1.5006035144921858e-05, + "loss": 0.313, + "step": 32875 + }, + { + "epoch": 1.53, + "learning_rate": 1.5005251359867072e-05, + "loss": 0.4249, + "step": 32880 + }, + { + "epoch": 1.53, + "learning_rate": 1.5004467574812286e-05, + "loss": 0.2864, + "step": 32885 + }, + { + "epoch": 1.53, + "learning_rate": 1.5003683789757498e-05, + "loss": 0.0551, + "step": 32890 + }, + { + "epoch": 1.53, + "learning_rate": 1.5002900004702712e-05, + "loss": 0.0842, + "step": 32895 + }, + { + "epoch": 1.54, + "learning_rate": 1.5002116219647924e-05, + "loss": 0.0557, + "step": 32900 + }, + { + "epoch": 1.54, + "learning_rate": 1.500133243459314e-05, + "loss": 0.1035, + "step": 32905 + }, + { + "epoch": 1.54, + "learning_rate": 1.5000548649538352e-05, + "loss": 0.0518, + "step": 32910 + }, + { + "epoch": 1.54, + "learning_rate": 1.4999764864483566e-05, + "loss": 0.1531, + "step": 32915 + }, + { + "epoch": 1.54, + "learning_rate": 1.4998981079428778e-05, + "loss": 0.216, + "step": 32920 + }, + { + "epoch": 1.54, + "learning_rate": 1.4998197294373993e-05, + "loss": 0.1716, + "step": 32925 + }, + { + "epoch": 1.54, + "learning_rate": 1.4997413509319206e-05, + "loss": 0.39, + "step": 32930 + }, + { + "epoch": 1.54, + "learning_rate": 1.499662972426442e-05, + "loss": 0.2803, + "step": 32935 + }, + { + "epoch": 1.54, + "learning_rate": 1.4995845939209632e-05, + "loss": 0.1181, + "step": 32940 + }, + { + "epoch": 1.54, + "learning_rate": 1.4995062154154847e-05, + "loss": 0.0665, + "step": 32945 + }, + { + "epoch": 1.54, + "learning_rate": 1.499427836910006e-05, + "loss": 0.0879, + "step": 32950 + }, + { + "epoch": 1.54, + "learning_rate": 1.4993494584045272e-05, + "loss": 0.111, + "step": 32955 + }, + { + "epoch": 1.54, + "learning_rate": 1.4992710798990486e-05, + "loss": 0.1512, + "step": 32960 + }, + { + "epoch": 1.54, + "learning_rate": 1.49919270139357e-05, + "loss": 0.1847, + "step": 32965 + }, + { + "epoch": 1.54, + "learning_rate": 1.4991143228880913e-05, + "loss": 0.1602, + "step": 32970 + }, + { + "epoch": 1.54, + "learning_rate": 1.4990359443826126e-05, + "loss": 0.3279, + "step": 32975 + }, + { + "epoch": 1.54, + "learning_rate": 1.498957565877134e-05, + "loss": 0.2164, + "step": 32980 + }, + { + "epoch": 1.54, + "learning_rate": 1.4988791873716553e-05, + "loss": 0.2688, + "step": 32985 + }, + { + "epoch": 1.54, + "learning_rate": 1.4988008088661767e-05, + "loss": 0.0506, + "step": 32990 + }, + { + "epoch": 1.54, + "learning_rate": 1.498722430360698e-05, + "loss": 0.0692, + "step": 32995 + }, + { + "epoch": 1.54, + "learning_rate": 1.4986440518552195e-05, + "loss": 0.1243, + "step": 33000 + }, + { + "epoch": 1.54, + "learning_rate": 1.4985656733497407e-05, + "loss": 0.1195, + "step": 33005 + }, + { + "epoch": 1.54, + "learning_rate": 1.4984872948442621e-05, + "loss": 0.1462, + "step": 33010 + }, + { + "epoch": 1.54, + "learning_rate": 1.4984089163387834e-05, + "loss": 0.121, + "step": 33015 + }, + { + "epoch": 1.54, + "learning_rate": 1.4983305378333046e-05, + "loss": 0.1771, + "step": 33020 + }, + { + "epoch": 1.54, + "learning_rate": 1.4982521593278261e-05, + "loss": 0.1746, + "step": 33025 + }, + { + "epoch": 1.54, + "learning_rate": 1.4981737808223474e-05, + "loss": 0.3095, + "step": 33030 + }, + { + "epoch": 1.54, + "learning_rate": 1.4980954023168687e-05, + "loss": 0.2838, + "step": 33035 + }, + { + "epoch": 1.54, + "learning_rate": 1.49801702381139e-05, + "loss": 0.0275, + "step": 33040 + }, + { + "epoch": 1.54, + "learning_rate": 1.4979386453059115e-05, + "loss": 0.0868, + "step": 33045 + }, + { + "epoch": 1.54, + "learning_rate": 1.4978602668004327e-05, + "loss": 0.0809, + "step": 33050 + }, + { + "epoch": 1.54, + "learning_rate": 1.4977818882949541e-05, + "loss": 0.1187, + "step": 33055 + }, + { + "epoch": 1.54, + "learning_rate": 1.4977035097894754e-05, + "loss": 0.1628, + "step": 33060 + }, + { + "epoch": 1.54, + "learning_rate": 1.497625131283997e-05, + "loss": 0.1992, + "step": 33065 + }, + { + "epoch": 1.54, + "learning_rate": 1.4975467527785181e-05, + "loss": 0.2054, + "step": 33070 + }, + { + "epoch": 1.54, + "learning_rate": 1.4974683742730395e-05, + "loss": 0.2641, + "step": 33075 + }, + { + "epoch": 1.54, + "learning_rate": 1.4973899957675608e-05, + "loss": 0.4842, + "step": 33080 + }, + { + "epoch": 1.54, + "learning_rate": 1.4973116172620821e-05, + "loss": 0.4444, + "step": 33085 + }, + { + "epoch": 1.54, + "learning_rate": 1.4972332387566035e-05, + "loss": 0.0517, + "step": 33090 + }, + { + "epoch": 1.54, + "learning_rate": 1.4971548602511248e-05, + "loss": 0.0611, + "step": 33095 + }, + { + "epoch": 1.54, + "learning_rate": 1.4970764817456463e-05, + "loss": 0.0702, + "step": 33100 + }, + { + "epoch": 1.54, + "learning_rate": 1.4969981032401675e-05, + "loss": 0.143, + "step": 33105 + }, + { + "epoch": 1.54, + "learning_rate": 1.496919724734689e-05, + "loss": 0.1304, + "step": 33110 + }, + { + "epoch": 1.55, + "learning_rate": 1.4968413462292101e-05, + "loss": 0.1834, + "step": 33115 + }, + { + "epoch": 1.55, + "learning_rate": 1.4967629677237317e-05, + "loss": 0.1337, + "step": 33120 + }, + { + "epoch": 1.55, + "learning_rate": 1.496684589218253e-05, + "loss": 0.213, + "step": 33125 + }, + { + "epoch": 1.55, + "learning_rate": 1.49662188641387e-05, + "loss": 0.3345, + "step": 33130 + }, + { + "epoch": 1.55, + "learning_rate": 1.4965435079083914e-05, + "loss": 0.307, + "step": 33135 + }, + { + "epoch": 1.55, + "learning_rate": 1.4964651294029126e-05, + "loss": 0.063, + "step": 33140 + }, + { + "epoch": 1.55, + "learning_rate": 1.496386750897434e-05, + "loss": 0.0567, + "step": 33145 + }, + { + "epoch": 1.55, + "learning_rate": 1.4963083723919554e-05, + "loss": 0.0812, + "step": 33150 + }, + { + "epoch": 1.55, + "learning_rate": 1.4962299938864766e-05, + "loss": 0.0735, + "step": 33155 + }, + { + "epoch": 1.55, + "learning_rate": 1.4961516153809981e-05, + "loss": 0.1804, + "step": 33160 + }, + { + "epoch": 1.55, + "learning_rate": 1.4960732368755194e-05, + "loss": 0.125, + "step": 33165 + }, + { + "epoch": 1.55, + "learning_rate": 1.4959948583700408e-05, + "loss": 0.2112, + "step": 33170 + }, + { + "epoch": 1.55, + "learning_rate": 1.495916479864562e-05, + "loss": 0.2541, + "step": 33175 + }, + { + "epoch": 1.55, + "learning_rate": 1.4958381013590835e-05, + "loss": 0.2793, + "step": 33180 + }, + { + "epoch": 1.55, + "learning_rate": 1.4957597228536048e-05, + "loss": 0.242, + "step": 33185 + }, + { + "epoch": 1.55, + "learning_rate": 1.4956813443481262e-05, + "loss": 0.0286, + "step": 33190 + }, + { + "epoch": 1.55, + "learning_rate": 1.4956029658426474e-05, + "loss": 0.057, + "step": 33195 + }, + { + "epoch": 1.55, + "learning_rate": 1.495524587337169e-05, + "loss": 0.087, + "step": 33200 + }, + { + "epoch": 1.55, + "learning_rate": 1.4954462088316902e-05, + "loss": 0.0883, + "step": 33205 + }, + { + "epoch": 1.55, + "learning_rate": 1.4953678303262114e-05, + "loss": 0.1055, + "step": 33210 + }, + { + "epoch": 1.55, + "learning_rate": 1.4952894518207328e-05, + "loss": 0.4157, + "step": 33215 + }, + { + "epoch": 1.55, + "learning_rate": 1.495211073315254e-05, + "loss": 0.1814, + "step": 33220 + }, + { + "epoch": 1.55, + "learning_rate": 1.4951326948097755e-05, + "loss": 0.301, + "step": 33225 + }, + { + "epoch": 1.55, + "learning_rate": 1.4950543163042968e-05, + "loss": 0.3661, + "step": 33230 + }, + { + "epoch": 1.55, + "learning_rate": 1.4949759377988182e-05, + "loss": 0.2502, + "step": 33235 + }, + { + "epoch": 1.55, + "learning_rate": 1.4948975592933394e-05, + "loss": 0.0987, + "step": 33240 + }, + { + "epoch": 1.55, + "learning_rate": 1.494819180787861e-05, + "loss": 0.0505, + "step": 33245 + }, + { + "epoch": 1.55, + "learning_rate": 1.4947408022823822e-05, + "loss": 0.1345, + "step": 33250 + }, + { + "epoch": 1.55, + "learning_rate": 1.4946624237769036e-05, + "loss": 0.1235, + "step": 33255 + }, + { + "epoch": 1.55, + "learning_rate": 1.494584045271425e-05, + "loss": 0.1194, + "step": 33260 + }, + { + "epoch": 1.55, + "learning_rate": 1.4945056667659463e-05, + "loss": 0.1738, + "step": 33265 + }, + { + "epoch": 1.55, + "learning_rate": 1.4944272882604676e-05, + "loss": 0.2238, + "step": 33270 + }, + { + "epoch": 1.55, + "learning_rate": 1.4943489097549888e-05, + "loss": 0.295, + "step": 33275 + }, + { + "epoch": 1.55, + "learning_rate": 1.4942705312495103e-05, + "loss": 0.488, + "step": 33280 + }, + { + "epoch": 1.55, + "learning_rate": 1.4941921527440316e-05, + "loss": 0.392, + "step": 33285 + }, + { + "epoch": 1.55, + "learning_rate": 1.494113774238553e-05, + "loss": 0.0366, + "step": 33290 + }, + { + "epoch": 1.55, + "learning_rate": 1.4940353957330742e-05, + "loss": 0.0348, + "step": 33295 + }, + { + "epoch": 1.55, + "learning_rate": 1.4939570172275957e-05, + "loss": 0.0887, + "step": 33300 + }, + { + "epoch": 1.55, + "learning_rate": 1.493878638722117e-05, + "loss": 0.1132, + "step": 33305 + }, + { + "epoch": 1.55, + "learning_rate": 1.4938002602166383e-05, + "loss": 0.1445, + "step": 33310 + }, + { + "epoch": 1.55, + "learning_rate": 1.4937218817111596e-05, + "loss": 0.0744, + "step": 33315 + }, + { + "epoch": 1.55, + "learning_rate": 1.4936435032056811e-05, + "loss": 0.2349, + "step": 33320 + }, + { + "epoch": 1.55, + "learning_rate": 1.4935651247002023e-05, + "loss": 0.3013, + "step": 33325 + }, + { + "epoch": 1.56, + "learning_rate": 1.4934867461947237e-05, + "loss": 0.3091, + "step": 33330 + }, + { + "epoch": 1.56, + "learning_rate": 1.493408367689245e-05, + "loss": 0.3184, + "step": 33335 + }, + { + "epoch": 1.56, + "learning_rate": 1.4933299891837663e-05, + "loss": 0.0297, + "step": 33340 + }, + { + "epoch": 1.56, + "learning_rate": 1.4932516106782877e-05, + "loss": 0.0657, + "step": 33345 + }, + { + "epoch": 1.56, + "learning_rate": 1.493173232172809e-05, + "loss": 0.0838, + "step": 33350 + }, + { + "epoch": 1.56, + "learning_rate": 1.4930948536673303e-05, + "loss": 0.0415, + "step": 33355 + }, + { + "epoch": 1.56, + "learning_rate": 1.4930164751618517e-05, + "loss": 0.1043, + "step": 33360 + }, + { + "epoch": 1.56, + "learning_rate": 1.4929380966563731e-05, + "loss": 0.1528, + "step": 33365 + }, + { + "epoch": 1.56, + "learning_rate": 1.4928597181508943e-05, + "loss": 0.1359, + "step": 33370 + }, + { + "epoch": 1.56, + "learning_rate": 1.4927813396454159e-05, + "loss": 0.2899, + "step": 33375 + }, + { + "epoch": 1.56, + "learning_rate": 1.4927029611399371e-05, + "loss": 0.3585, + "step": 33380 + }, + { + "epoch": 1.56, + "learning_rate": 1.4926245826344585e-05, + "loss": 0.3395, + "step": 33385 + }, + { + "epoch": 1.56, + "learning_rate": 1.4925462041289797e-05, + "loss": 0.0484, + "step": 33390 + }, + { + "epoch": 1.56, + "learning_rate": 1.4924678256235013e-05, + "loss": 0.0426, + "step": 33395 + }, + { + "epoch": 1.56, + "learning_rate": 1.4923894471180225e-05, + "loss": 0.0988, + "step": 33400 + }, + { + "epoch": 1.56, + "learning_rate": 1.4923110686125437e-05, + "loss": 0.1479, + "step": 33405 + }, + { + "epoch": 1.56, + "learning_rate": 1.4922326901070651e-05, + "loss": 0.1827, + "step": 33410 + }, + { + "epoch": 1.56, + "learning_rate": 1.4921543116015864e-05, + "loss": 0.1848, + "step": 33415 + }, + { + "epoch": 1.56, + "learning_rate": 1.492075933096108e-05, + "loss": 0.1389, + "step": 33420 + }, + { + "epoch": 1.56, + "learning_rate": 1.4919975545906291e-05, + "loss": 0.2672, + "step": 33425 + }, + { + "epoch": 1.56, + "learning_rate": 1.4919191760851505e-05, + "loss": 0.3146, + "step": 33430 + }, + { + "epoch": 1.56, + "learning_rate": 1.4918407975796717e-05, + "loss": 0.206, + "step": 33435 + }, + { + "epoch": 1.56, + "learning_rate": 1.4917624190741933e-05, + "loss": 0.0536, + "step": 33440 + }, + { + "epoch": 1.56, + "learning_rate": 1.4916840405687145e-05, + "loss": 0.0521, + "step": 33445 + }, + { + "epoch": 1.56, + "learning_rate": 1.491605662063236e-05, + "loss": 0.053, + "step": 33450 + }, + { + "epoch": 1.56, + "learning_rate": 1.4915272835577571e-05, + "loss": 0.1253, + "step": 33455 + }, + { + "epoch": 1.56, + "learning_rate": 1.4914489050522787e-05, + "loss": 0.1643, + "step": 33460 + }, + { + "epoch": 1.56, + "learning_rate": 1.4913705265468e-05, + "loss": 0.1678, + "step": 33465 + }, + { + "epoch": 1.56, + "learning_rate": 1.4912921480413211e-05, + "loss": 0.1316, + "step": 33470 + }, + { + "epoch": 1.56, + "learning_rate": 1.4912137695358427e-05, + "loss": 0.3028, + "step": 33475 + }, + { + "epoch": 1.56, + "learning_rate": 1.491135391030364e-05, + "loss": 0.4472, + "step": 33480 + }, + { + "epoch": 1.56, + "learning_rate": 1.4910570125248853e-05, + "loss": 0.2954, + "step": 33485 + }, + { + "epoch": 1.56, + "learning_rate": 1.4909786340194065e-05, + "loss": 0.0151, + "step": 33490 + }, + { + "epoch": 1.56, + "learning_rate": 1.4909002555139281e-05, + "loss": 0.0831, + "step": 33495 + }, + { + "epoch": 1.56, + "learning_rate": 1.4908218770084493e-05, + "loss": 0.0586, + "step": 33500 + }, + { + "epoch": 1.56, + "learning_rate": 1.4907434985029707e-05, + "loss": 0.1487, + "step": 33505 + }, + { + "epoch": 1.56, + "learning_rate": 1.490665119997492e-05, + "loss": 0.0689, + "step": 33510 + }, + { + "epoch": 1.56, + "learning_rate": 1.4905867414920135e-05, + "loss": 0.1569, + "step": 33515 + }, + { + "epoch": 1.56, + "learning_rate": 1.4905083629865347e-05, + "loss": 0.1794, + "step": 33520 + }, + { + "epoch": 1.56, + "learning_rate": 1.4904299844810561e-05, + "loss": 0.2747, + "step": 33525 + }, + { + "epoch": 1.56, + "learning_rate": 1.4903516059755773e-05, + "loss": 0.3703, + "step": 33530 + }, + { + "epoch": 1.56, + "learning_rate": 1.4902732274700985e-05, + "loss": 0.336, + "step": 33535 + }, + { + "epoch": 1.57, + "learning_rate": 1.4901948489646201e-05, + "loss": 0.0809, + "step": 33540 + }, + { + "epoch": 1.57, + "learning_rate": 1.4901164704591413e-05, + "loss": 0.0574, + "step": 33545 + }, + { + "epoch": 1.57, + "learning_rate": 1.4900380919536627e-05, + "loss": 0.0474, + "step": 33550 + }, + { + "epoch": 1.57, + "learning_rate": 1.489959713448184e-05, + "loss": 0.1294, + "step": 33555 + }, + { + "epoch": 1.57, + "learning_rate": 1.4898813349427055e-05, + "loss": 0.1134, + "step": 33560 + }, + { + "epoch": 1.57, + "learning_rate": 1.4898029564372267e-05, + "loss": 0.1304, + "step": 33565 + }, + { + "epoch": 1.57, + "learning_rate": 1.4897245779317481e-05, + "loss": 0.1349, + "step": 33570 + }, + { + "epoch": 1.57, + "learning_rate": 1.4896461994262695e-05, + "loss": 0.1339, + "step": 33575 + }, + { + "epoch": 1.57, + "learning_rate": 1.4895678209207909e-05, + "loss": 0.3101, + "step": 33580 + }, + { + "epoch": 1.57, + "learning_rate": 1.4894894424153121e-05, + "loss": 0.2441, + "step": 33585 + }, + { + "epoch": 1.57, + "learning_rate": 1.4894110639098337e-05, + "loss": 0.0574, + "step": 33590 + }, + { + "epoch": 1.57, + "learning_rate": 1.4893326854043549e-05, + "loss": 0.1494, + "step": 33595 + }, + { + "epoch": 1.57, + "learning_rate": 1.4892543068988761e-05, + "loss": 0.0818, + "step": 33600 + }, + { + "epoch": 1.57, + "learning_rate": 1.4891759283933975e-05, + "loss": 0.0343, + "step": 33605 + }, + { + "epoch": 1.57, + "learning_rate": 1.4890975498879187e-05, + "loss": 0.0497, + "step": 33610 + }, + { + "epoch": 1.57, + "learning_rate": 1.4890191713824403e-05, + "loss": 0.1251, + "step": 33615 + }, + { + "epoch": 1.57, + "learning_rate": 1.4889407928769615e-05, + "loss": 0.1593, + "step": 33620 + }, + { + "epoch": 1.57, + "learning_rate": 1.4888624143714829e-05, + "loss": 0.1829, + "step": 33625 + }, + { + "epoch": 1.57, + "learning_rate": 1.4887840358660041e-05, + "loss": 0.4039, + "step": 33630 + }, + { + "epoch": 1.57, + "learning_rate": 1.4887056573605257e-05, + "loss": 0.2516, + "step": 33635 + }, + { + "epoch": 1.57, + "learning_rate": 1.4886272788550469e-05, + "loss": 0.0328, + "step": 33640 + }, + { + "epoch": 1.57, + "learning_rate": 1.4885489003495683e-05, + "loss": 0.0959, + "step": 33645 + }, + { + "epoch": 1.57, + "learning_rate": 1.4884705218440895e-05, + "loss": 0.0733, + "step": 33650 + }, + { + "epoch": 1.57, + "learning_rate": 1.488392143338611e-05, + "loss": 0.0875, + "step": 33655 + }, + { + "epoch": 1.57, + "learning_rate": 1.4883137648331323e-05, + "loss": 0.0994, + "step": 33660 + }, + { + "epoch": 1.57, + "learning_rate": 1.4882353863276535e-05, + "loss": 0.1776, + "step": 33665 + }, + { + "epoch": 1.57, + "learning_rate": 1.4881570078221749e-05, + "loss": 0.1794, + "step": 33670 + }, + { + "epoch": 1.57, + "learning_rate": 1.4880786293166963e-05, + "loss": 0.2112, + "step": 33675 + }, + { + "epoch": 1.57, + "learning_rate": 1.4880002508112177e-05, + "loss": 0.2123, + "step": 33680 + }, + { + "epoch": 1.57, + "learning_rate": 1.4879218723057389e-05, + "loss": 0.4273, + "step": 33685 + }, + { + "epoch": 1.57, + "learning_rate": 1.4878434938002605e-05, + "loss": 0.0487, + "step": 33690 + }, + { + "epoch": 1.57, + "learning_rate": 1.4877651152947817e-05, + "loss": 0.0653, + "step": 33695 + }, + { + "epoch": 1.57, + "learning_rate": 1.487686736789303e-05, + "loss": 0.0647, + "step": 33700 + }, + { + "epoch": 1.57, + "learning_rate": 1.4876083582838243e-05, + "loss": 0.0774, + "step": 33705 + }, + { + "epoch": 1.57, + "learning_rate": 1.4875299797783459e-05, + "loss": 0.1084, + "step": 33710 + }, + { + "epoch": 1.57, + "learning_rate": 1.487451601272867e-05, + "loss": 0.1575, + "step": 33715 + }, + { + "epoch": 1.57, + "learning_rate": 1.4873732227673885e-05, + "loss": 0.1553, + "step": 33720 + }, + { + "epoch": 1.57, + "learning_rate": 1.4872948442619097e-05, + "loss": 0.2492, + "step": 33725 + }, + { + "epoch": 1.57, + "learning_rate": 1.4872164657564309e-05, + "loss": 0.2886, + "step": 33730 + }, + { + "epoch": 1.57, + "learning_rate": 1.4871380872509525e-05, + "loss": 0.2957, + "step": 33735 + }, + { + "epoch": 1.57, + "learning_rate": 1.4870597087454737e-05, + "loss": 0.0997, + "step": 33740 + }, + { + "epoch": 1.57, + "learning_rate": 1.486981330239995e-05, + "loss": 0.0764, + "step": 33745 + }, + { + "epoch": 1.57, + "learning_rate": 1.4869029517345163e-05, + "loss": 0.0875, + "step": 33750 + }, + { + "epoch": 1.58, + "learning_rate": 1.4868245732290379e-05, + "loss": 0.1655, + "step": 33755 + }, + { + "epoch": 1.58, + "learning_rate": 1.4867461947235591e-05, + "loss": 0.0998, + "step": 33760 + }, + { + "epoch": 1.58, + "learning_rate": 1.4866678162180805e-05, + "loss": 0.1561, + "step": 33765 + }, + { + "epoch": 1.58, + "learning_rate": 1.4865894377126017e-05, + "loss": 0.1504, + "step": 33770 + }, + { + "epoch": 1.58, + "learning_rate": 1.4865110592071233e-05, + "loss": 0.2526, + "step": 33775 + }, + { + "epoch": 1.58, + "learning_rate": 1.4864326807016445e-05, + "loss": 0.3547, + "step": 33780 + }, + { + "epoch": 1.58, + "learning_rate": 1.4863543021961659e-05, + "loss": 0.2778, + "step": 33785 + }, + { + "epoch": 1.58, + "learning_rate": 1.4862759236906873e-05, + "loss": 0.0651, + "step": 33790 + }, + { + "epoch": 1.58, + "learning_rate": 1.4861975451852085e-05, + "loss": 0.0651, + "step": 33795 + }, + { + "epoch": 1.58, + "learning_rate": 1.4861191666797299e-05, + "loss": 0.1012, + "step": 33800 + }, + { + "epoch": 1.58, + "learning_rate": 1.4860407881742511e-05, + "loss": 0.1282, + "step": 33805 + }, + { + "epoch": 1.58, + "learning_rate": 1.4859624096687727e-05, + "loss": 0.163, + "step": 33810 + }, + { + "epoch": 1.58, + "learning_rate": 1.4858840311632939e-05, + "loss": 0.221, + "step": 33815 + }, + { + "epoch": 1.58, + "learning_rate": 1.4858056526578153e-05, + "loss": 0.1405, + "step": 33820 + }, + { + "epoch": 1.58, + "learning_rate": 1.4857272741523365e-05, + "loss": 0.3016, + "step": 33825 + }, + { + "epoch": 1.58, + "learning_rate": 1.485648895646858e-05, + "loss": 0.4393, + "step": 33830 + }, + { + "epoch": 1.58, + "learning_rate": 1.4855705171413793e-05, + "loss": 0.2873, + "step": 33835 + }, + { + "epoch": 1.58, + "learning_rate": 1.4854921386359007e-05, + "loss": 0.0466, + "step": 33840 + }, + { + "epoch": 1.58, + "learning_rate": 1.4854137601304219e-05, + "loss": 0.0364, + "step": 33845 + }, + { + "epoch": 1.58, + "learning_rate": 1.4853353816249434e-05, + "loss": 0.1092, + "step": 33850 + }, + { + "epoch": 1.58, + "learning_rate": 1.4852570031194647e-05, + "loss": 0.1333, + "step": 33855 + }, + { + "epoch": 1.58, + "learning_rate": 1.4851786246139859e-05, + "loss": 0.1323, + "step": 33860 + }, + { + "epoch": 1.58, + "learning_rate": 1.4851002461085073e-05, + "loss": 0.1203, + "step": 33865 + }, + { + "epoch": 1.58, + "learning_rate": 1.4850218676030285e-05, + "loss": 0.1091, + "step": 33870 + }, + { + "epoch": 1.58, + "learning_rate": 1.48494348909755e-05, + "loss": 0.2064, + "step": 33875 + }, + { + "epoch": 1.58, + "learning_rate": 1.4848651105920713e-05, + "loss": 0.3023, + "step": 33880 + }, + { + "epoch": 1.58, + "learning_rate": 1.4847867320865927e-05, + "loss": 0.3641, + "step": 33885 + }, + { + "epoch": 1.58, + "learning_rate": 1.484708353581114e-05, + "loss": 0.0508, + "step": 33890 + }, + { + "epoch": 1.58, + "learning_rate": 1.4846299750756354e-05, + "loss": 0.0378, + "step": 33895 + }, + { + "epoch": 1.58, + "learning_rate": 1.4845515965701567e-05, + "loss": 0.1599, + "step": 33900 + }, + { + "epoch": 1.58, + "learning_rate": 1.4844732180646782e-05, + "loss": 0.0987, + "step": 33905 + }, + { + "epoch": 1.58, + "learning_rate": 1.4843948395591994e-05, + "loss": 0.0775, + "step": 33910 + }, + { + "epoch": 1.58, + "learning_rate": 1.4843164610537208e-05, + "loss": 0.1141, + "step": 33915 + }, + { + "epoch": 1.58, + "learning_rate": 1.484238082548242e-05, + "loss": 0.1908, + "step": 33920 + }, + { + "epoch": 1.58, + "learning_rate": 1.4841597040427633e-05, + "loss": 0.2086, + "step": 33925 + }, + { + "epoch": 1.58, + "learning_rate": 1.4840813255372848e-05, + "loss": 0.3953, + "step": 33930 + }, + { + "epoch": 1.58, + "learning_rate": 1.484002947031806e-05, + "loss": 0.3082, + "step": 33935 + }, + { + "epoch": 1.58, + "learning_rate": 1.4839245685263275e-05, + "loss": 0.0498, + "step": 33940 + }, + { + "epoch": 1.58, + "learning_rate": 1.4838461900208487e-05, + "loss": 0.0308, + "step": 33945 + }, + { + "epoch": 1.58, + "learning_rate": 1.4837678115153702e-05, + "loss": 0.0608, + "step": 33950 + }, + { + "epoch": 1.58, + "learning_rate": 1.4836894330098915e-05, + "loss": 0.1048, + "step": 33955 + }, + { + "epoch": 1.58, + "learning_rate": 1.4836110545044128e-05, + "loss": 0.1423, + "step": 33960 + }, + { + "epoch": 1.58, + "learning_rate": 1.483532675998934e-05, + "loss": 0.175, + "step": 33965 + }, + { + "epoch": 1.59, + "learning_rate": 1.4834542974934556e-05, + "loss": 0.1559, + "step": 33970 + }, + { + "epoch": 1.59, + "learning_rate": 1.4833759189879768e-05, + "loss": 0.2204, + "step": 33975 + }, + { + "epoch": 1.59, + "learning_rate": 1.4832975404824982e-05, + "loss": 0.3428, + "step": 33980 + }, + { + "epoch": 1.59, + "learning_rate": 1.4832191619770195e-05, + "loss": 0.2474, + "step": 33985 + }, + { + "epoch": 1.59, + "learning_rate": 1.4831407834715409e-05, + "loss": 0.0378, + "step": 33990 + }, + { + "epoch": 1.59, + "learning_rate": 1.4830624049660622e-05, + "loss": 0.0739, + "step": 33995 + }, + { + "epoch": 1.59, + "learning_rate": 1.4829840264605835e-05, + "loss": 0.0936, + "step": 34000 + }, + { + "epoch": 1.59, + "learning_rate": 1.482905647955105e-05, + "loss": 0.0705, + "step": 34005 + }, + { + "epoch": 1.59, + "learning_rate": 1.4828272694496262e-05, + "loss": 0.0965, + "step": 34010 + }, + { + "epoch": 1.59, + "learning_rate": 1.4827488909441476e-05, + "loss": 0.0939, + "step": 34015 + }, + { + "epoch": 1.59, + "learning_rate": 1.4826705124386689e-05, + "loss": 0.1264, + "step": 34020 + }, + { + "epoch": 1.59, + "learning_rate": 1.4825921339331904e-05, + "loss": 0.1568, + "step": 34025 + }, + { + "epoch": 1.59, + "learning_rate": 1.4825137554277116e-05, + "loss": 0.2927, + "step": 34030 + }, + { + "epoch": 1.59, + "learning_rate": 1.482435376922233e-05, + "loss": 0.3461, + "step": 34035 + }, + { + "epoch": 1.59, + "learning_rate": 1.4823569984167542e-05, + "loss": 0.0535, + "step": 34040 + }, + { + "epoch": 1.59, + "learning_rate": 1.4822786199112758e-05, + "loss": 0.0372, + "step": 34045 + }, + { + "epoch": 1.59, + "learning_rate": 1.482200241405797e-05, + "loss": 0.0823, + "step": 34050 + }, + { + "epoch": 1.59, + "learning_rate": 1.4821218629003183e-05, + "loss": 0.0993, + "step": 34055 + }, + { + "epoch": 1.59, + "learning_rate": 1.4820434843948396e-05, + "loss": 0.1109, + "step": 34060 + }, + { + "epoch": 1.59, + "learning_rate": 1.4819651058893609e-05, + "loss": 0.1505, + "step": 34065 + }, + { + "epoch": 1.59, + "learning_rate": 1.4818867273838824e-05, + "loss": 0.2167, + "step": 34070 + }, + { + "epoch": 1.59, + "learning_rate": 1.4818083488784036e-05, + "loss": 0.1975, + "step": 34075 + }, + { + "epoch": 1.59, + "learning_rate": 1.481729970372925e-05, + "loss": 0.3396, + "step": 34080 + }, + { + "epoch": 1.59, + "learning_rate": 1.4816515918674463e-05, + "loss": 0.2306, + "step": 34085 + }, + { + "epoch": 1.59, + "learning_rate": 1.4815732133619678e-05, + "loss": 0.0387, + "step": 34090 + }, + { + "epoch": 1.59, + "learning_rate": 1.481494834856489e-05, + "loss": 0.0714, + "step": 34095 + }, + { + "epoch": 1.59, + "learning_rate": 1.4814164563510104e-05, + "loss": 0.1295, + "step": 34100 + }, + { + "epoch": 1.59, + "learning_rate": 1.4813380778455318e-05, + "loss": 0.1515, + "step": 34105 + }, + { + "epoch": 1.59, + "learning_rate": 1.4812596993400532e-05, + "loss": 0.1432, + "step": 34110 + }, + { + "epoch": 1.59, + "learning_rate": 1.4811813208345744e-05, + "loss": 0.1063, + "step": 34115 + }, + { + "epoch": 1.59, + "learning_rate": 1.4811029423290957e-05, + "loss": 0.175, + "step": 34120 + }, + { + "epoch": 1.59, + "learning_rate": 1.4810245638236172e-05, + "loss": 0.2832, + "step": 34125 + }, + { + "epoch": 1.59, + "learning_rate": 1.4809461853181384e-05, + "loss": 0.4135, + "step": 34130 + }, + { + "epoch": 1.59, + "learning_rate": 1.4808678068126598e-05, + "loss": 0.2442, + "step": 34135 + }, + { + "epoch": 1.59, + "learning_rate": 1.480789428307181e-05, + "loss": 0.0886, + "step": 34140 + }, + { + "epoch": 1.59, + "learning_rate": 1.4807110498017026e-05, + "loss": 0.0458, + "step": 34145 + }, + { + "epoch": 1.59, + "learning_rate": 1.4806326712962238e-05, + "loss": 0.1056, + "step": 34150 + }, + { + "epoch": 1.59, + "learning_rate": 1.4805542927907452e-05, + "loss": 0.0775, + "step": 34155 + }, + { + "epoch": 1.59, + "learning_rate": 1.4804759142852664e-05, + "loss": 0.1509, + "step": 34160 + }, + { + "epoch": 1.59, + "learning_rate": 1.480397535779788e-05, + "loss": 0.1604, + "step": 34165 + }, + { + "epoch": 1.59, + "learning_rate": 1.4803191572743092e-05, + "loss": 0.1765, + "step": 34170 + }, + { + "epoch": 1.59, + "learning_rate": 1.4802407787688306e-05, + "loss": 0.1314, + "step": 34175 + }, + { + "epoch": 1.59, + "learning_rate": 1.4801624002633518e-05, + "loss": 0.428, + "step": 34180 + }, + { + "epoch": 1.6, + "learning_rate": 1.480084021757873e-05, + "loss": 0.2183, + "step": 34185 + }, + { + "epoch": 1.6, + "learning_rate": 1.4800056432523946e-05, + "loss": 0.0748, + "step": 34190 + }, + { + "epoch": 1.6, + "learning_rate": 1.4799272647469158e-05, + "loss": 0.0489, + "step": 34195 + }, + { + "epoch": 1.6, + "learning_rate": 1.4798488862414372e-05, + "loss": 0.0639, + "step": 34200 + }, + { + "epoch": 1.6, + "learning_rate": 1.4797705077359586e-05, + "loss": 0.0929, + "step": 34205 + }, + { + "epoch": 1.6, + "learning_rate": 1.47969212923048e-05, + "loss": 0.187, + "step": 34210 + }, + { + "epoch": 1.6, + "learning_rate": 1.4796137507250012e-05, + "loss": 0.1703, + "step": 34215 + }, + { + "epoch": 1.6, + "learning_rate": 1.4795353722195228e-05, + "loss": 0.1433, + "step": 34220 + }, + { + "epoch": 1.6, + "learning_rate": 1.479456993714044e-05, + "loss": 0.2344, + "step": 34225 + }, + { + "epoch": 1.6, + "learning_rate": 1.4793786152085654e-05, + "loss": 0.3602, + "step": 34230 + }, + { + "epoch": 1.6, + "learning_rate": 1.4793002367030866e-05, + "loss": 0.3009, + "step": 34235 + }, + { + "epoch": 1.6, + "learning_rate": 1.4792218581976082e-05, + "loss": 0.107, + "step": 34240 + }, + { + "epoch": 1.6, + "learning_rate": 1.4791434796921294e-05, + "loss": 0.0895, + "step": 34245 + }, + { + "epoch": 1.6, + "learning_rate": 1.4790651011866506e-05, + "loss": 0.0729, + "step": 34250 + }, + { + "epoch": 1.6, + "learning_rate": 1.478986722681172e-05, + "loss": 0.0705, + "step": 34255 + }, + { + "epoch": 1.6, + "learning_rate": 1.4789083441756932e-05, + "loss": 0.1037, + "step": 34260 + }, + { + "epoch": 1.6, + "learning_rate": 1.4788299656702148e-05, + "loss": 0.1372, + "step": 34265 + }, + { + "epoch": 1.6, + "learning_rate": 1.478751587164736e-05, + "loss": 0.2172, + "step": 34270 + }, + { + "epoch": 1.6, + "learning_rate": 1.4786732086592574e-05, + "loss": 0.2641, + "step": 34275 + }, + { + "epoch": 1.6, + "learning_rate": 1.4785948301537786e-05, + "loss": 0.3562, + "step": 34280 + }, + { + "epoch": 1.6, + "learning_rate": 1.4785164516483002e-05, + "loss": 0.2552, + "step": 34285 + }, + { + "epoch": 1.6, + "learning_rate": 1.4784380731428214e-05, + "loss": 0.0604, + "step": 34290 + }, + { + "epoch": 1.6, + "learning_rate": 1.4783596946373428e-05, + "loss": 0.0537, + "step": 34295 + }, + { + "epoch": 1.6, + "learning_rate": 1.478281316131864e-05, + "loss": 0.1428, + "step": 34300 + }, + { + "epoch": 1.6, + "learning_rate": 1.4782029376263856e-05, + "loss": 0.1001, + "step": 34305 + }, + { + "epoch": 1.6, + "learning_rate": 1.4781245591209068e-05, + "loss": 0.0837, + "step": 34310 + }, + { + "epoch": 1.6, + "learning_rate": 1.478046180615428e-05, + "loss": 0.125, + "step": 34315 + }, + { + "epoch": 1.6, + "learning_rate": 1.4779678021099496e-05, + "loss": 0.1296, + "step": 34320 + }, + { + "epoch": 1.6, + "learning_rate": 1.4778894236044708e-05, + "loss": 0.1319, + "step": 34325 + }, + { + "epoch": 1.6, + "learning_rate": 1.4778110450989922e-05, + "loss": 0.2868, + "step": 34330 + }, + { + "epoch": 1.6, + "learning_rate": 1.4777326665935134e-05, + "loss": 0.3055, + "step": 34335 + }, + { + "epoch": 1.6, + "learning_rate": 1.477654288088035e-05, + "loss": 0.0613, + "step": 34340 + }, + { + "epoch": 1.6, + "learning_rate": 1.4775759095825562e-05, + "loss": 0.1232, + "step": 34345 + }, + { + "epoch": 1.6, + "learning_rate": 1.4774975310770776e-05, + "loss": 0.0745, + "step": 34350 + }, + { + "epoch": 1.6, + "learning_rate": 1.4774191525715988e-05, + "loss": 0.128, + "step": 34355 + }, + { + "epoch": 1.6, + "learning_rate": 1.4773407740661204e-05, + "loss": 0.0547, + "step": 34360 + }, + { + "epoch": 1.6, + "learning_rate": 1.4772623955606416e-05, + "loss": 0.1655, + "step": 34365 + }, + { + "epoch": 1.6, + "learning_rate": 1.477184017055163e-05, + "loss": 0.47, + "step": 34370 + }, + { + "epoch": 1.6, + "learning_rate": 1.4771056385496842e-05, + "loss": 0.2424, + "step": 34375 + }, + { + "epoch": 1.6, + "learning_rate": 1.4770272600442054e-05, + "loss": 0.3795, + "step": 34380 + }, + { + "epoch": 1.6, + "learning_rate": 1.476948881538727e-05, + "loss": 0.292, + "step": 34385 + }, + { + "epoch": 1.6, + "learning_rate": 1.4768705030332482e-05, + "loss": 0.0394, + "step": 34390 + }, + { + "epoch": 1.6, + "learning_rate": 1.4767921245277696e-05, + "loss": 0.0294, + "step": 34395 + }, + { + "epoch": 1.61, + "learning_rate": 1.4767137460222908e-05, + "loss": 0.0748, + "step": 34400 + }, + { + "epoch": 1.61, + "learning_rate": 1.4766353675168124e-05, + "loss": 0.1135, + "step": 34405 + }, + { + "epoch": 1.61, + "learning_rate": 1.4765569890113336e-05, + "loss": 0.1248, + "step": 34410 + }, + { + "epoch": 1.61, + "learning_rate": 1.476478610505855e-05, + "loss": 0.1699, + "step": 34415 + }, + { + "epoch": 1.61, + "learning_rate": 1.4764002320003764e-05, + "loss": 0.1724, + "step": 34420 + }, + { + "epoch": 1.61, + "learning_rate": 1.4763218534948978e-05, + "loss": 0.2934, + "step": 34425 + }, + { + "epoch": 1.61, + "learning_rate": 1.476243474989419e-05, + "loss": 0.3888, + "step": 34430 + }, + { + "epoch": 1.61, + "learning_rate": 1.4761650964839405e-05, + "loss": 0.3509, + "step": 34435 + }, + { + "epoch": 1.61, + "learning_rate": 1.4760867179784618e-05, + "loss": 0.0337, + "step": 34440 + }, + { + "epoch": 1.61, + "learning_rate": 1.476008339472983e-05, + "loss": 0.0399, + "step": 34445 + }, + { + "epoch": 1.61, + "learning_rate": 1.4759299609675044e-05, + "loss": 0.071, + "step": 34450 + }, + { + "epoch": 1.61, + "learning_rate": 1.4758515824620256e-05, + "loss": 0.1536, + "step": 34455 + }, + { + "epoch": 1.61, + "learning_rate": 1.4757732039565472e-05, + "loss": 0.0879, + "step": 34460 + }, + { + "epoch": 1.61, + "learning_rate": 1.4756948254510684e-05, + "loss": 0.256, + "step": 34465 + }, + { + "epoch": 1.61, + "learning_rate": 1.4756164469455898e-05, + "loss": 0.1642, + "step": 34470 + }, + { + "epoch": 1.61, + "learning_rate": 1.475538068440111e-05, + "loss": 0.3665, + "step": 34475 + }, + { + "epoch": 1.61, + "learning_rate": 1.4754596899346326e-05, + "loss": 0.5219, + "step": 34480 + }, + { + "epoch": 1.61, + "learning_rate": 1.4753813114291538e-05, + "loss": 0.24, + "step": 34485 + }, + { + "epoch": 1.61, + "learning_rate": 1.4753029329236752e-05, + "loss": 0.0458, + "step": 34490 + }, + { + "epoch": 1.61, + "learning_rate": 1.4752245544181964e-05, + "loss": 0.0644, + "step": 34495 + }, + { + "epoch": 1.61, + "learning_rate": 1.475146175912718e-05, + "loss": 0.0412, + "step": 34500 + }, + { + "epoch": 1.61, + "learning_rate": 1.4750677974072392e-05, + "loss": 0.0782, + "step": 34505 + }, + { + "epoch": 1.61, + "learning_rate": 1.4749894189017604e-05, + "loss": 0.102, + "step": 34510 + }, + { + "epoch": 1.61, + "learning_rate": 1.4749110403962818e-05, + "loss": 0.1028, + "step": 34515 + }, + { + "epoch": 1.61, + "learning_rate": 1.4748326618908032e-05, + "loss": 0.1788, + "step": 34520 + }, + { + "epoch": 1.61, + "learning_rate": 1.4747542833853246e-05, + "loss": 0.1417, + "step": 34525 + }, + { + "epoch": 1.61, + "learning_rate": 1.4746759048798458e-05, + "loss": 0.2783, + "step": 34530 + }, + { + "epoch": 1.61, + "learning_rate": 1.4745975263743673e-05, + "loss": 0.2968, + "step": 34535 + }, + { + "epoch": 1.61, + "learning_rate": 1.4745191478688886e-05, + "loss": 0.0946, + "step": 34540 + }, + { + "epoch": 1.61, + "learning_rate": 1.47444076936341e-05, + "loss": 0.0437, + "step": 34545 + }, + { + "epoch": 1.61, + "learning_rate": 1.4743623908579312e-05, + "loss": 0.0868, + "step": 34550 + }, + { + "epoch": 1.61, + "learning_rate": 1.4742840123524527e-05, + "loss": 0.105, + "step": 34555 + }, + { + "epoch": 1.61, + "learning_rate": 1.474205633846974e-05, + "loss": 0.1074, + "step": 34560 + }, + { + "epoch": 1.61, + "learning_rate": 1.4741272553414953e-05, + "loss": 0.1464, + "step": 34565 + }, + { + "epoch": 1.61, + "learning_rate": 1.4740488768360166e-05, + "loss": 0.1298, + "step": 34570 + }, + { + "epoch": 1.61, + "learning_rate": 1.4739704983305378e-05, + "loss": 0.2425, + "step": 34575 + }, + { + "epoch": 1.61, + "learning_rate": 1.4738921198250593e-05, + "loss": 0.3058, + "step": 34580 + }, + { + "epoch": 1.61, + "learning_rate": 1.4738137413195806e-05, + "loss": 0.2462, + "step": 34585 + }, + { + "epoch": 1.61, + "learning_rate": 1.473735362814102e-05, + "loss": 0.0695, + "step": 34590 + }, + { + "epoch": 1.61, + "learning_rate": 1.4736569843086232e-05, + "loss": 0.0971, + "step": 34595 + }, + { + "epoch": 1.61, + "learning_rate": 1.4735786058031447e-05, + "loss": 0.1426, + "step": 34600 + }, + { + "epoch": 1.61, + "learning_rate": 1.473500227297666e-05, + "loss": 0.0843, + "step": 34605 + }, + { + "epoch": 1.61, + "learning_rate": 1.4734218487921874e-05, + "loss": 0.1106, + "step": 34610 + }, + { + "epoch": 1.62, + "learning_rate": 1.4733434702867086e-05, + "loss": 0.0959, + "step": 34615 + }, + { + "epoch": 1.62, + "learning_rate": 1.4732650917812301e-05, + "loss": 0.1614, + "step": 34620 + }, + { + "epoch": 1.62, + "learning_rate": 1.4731867132757514e-05, + "loss": 0.2626, + "step": 34625 + }, + { + "epoch": 1.62, + "learning_rate": 1.4731083347702727e-05, + "loss": 0.2758, + "step": 34630 + }, + { + "epoch": 1.62, + "learning_rate": 1.4730299562647941e-05, + "loss": 0.3072, + "step": 34635 + }, + { + "epoch": 1.62, + "learning_rate": 1.4729515777593154e-05, + "loss": 0.0358, + "step": 34640 + }, + { + "epoch": 1.62, + "learning_rate": 1.4728731992538367e-05, + "loss": 0.0405, + "step": 34645 + }, + { + "epoch": 1.62, + "learning_rate": 1.472794820748358e-05, + "loss": 0.0772, + "step": 34650 + }, + { + "epoch": 1.62, + "learning_rate": 1.4727164422428795e-05, + "loss": 0.1179, + "step": 34655 + }, + { + "epoch": 1.62, + "learning_rate": 1.4726380637374008e-05, + "loss": 0.0595, + "step": 34660 + }, + { + "epoch": 1.62, + "learning_rate": 1.4725596852319221e-05, + "loss": 0.2746, + "step": 34665 + }, + { + "epoch": 1.62, + "learning_rate": 1.4724813067264434e-05, + "loss": 0.1401, + "step": 34670 + }, + { + "epoch": 1.62, + "learning_rate": 1.472402928220965e-05, + "loss": 0.1435, + "step": 34675 + }, + { + "epoch": 1.62, + "learning_rate": 1.4723245497154861e-05, + "loss": 0.2242, + "step": 34680 + }, + { + "epoch": 1.62, + "learning_rate": 1.4722461712100075e-05, + "loss": 0.3268, + "step": 34685 + }, + { + "epoch": 1.62, + "learning_rate": 1.4721677927045288e-05, + "loss": 0.034, + "step": 34690 + }, + { + "epoch": 1.62, + "learning_rate": 1.4720894141990503e-05, + "loss": 0.1351, + "step": 34695 + }, + { + "epoch": 1.62, + "learning_rate": 1.4720110356935715e-05, + "loss": 0.0795, + "step": 34700 + }, + { + "epoch": 1.62, + "learning_rate": 1.4719326571880928e-05, + "loss": 0.0898, + "step": 34705 + }, + { + "epoch": 1.62, + "learning_rate": 1.4718542786826141e-05, + "loss": 0.096, + "step": 34710 + }, + { + "epoch": 1.62, + "learning_rate": 1.4717759001771354e-05, + "loss": 0.1264, + "step": 34715 + }, + { + "epoch": 1.62, + "learning_rate": 1.471697521671657e-05, + "loss": 0.1767, + "step": 34720 + }, + { + "epoch": 1.62, + "learning_rate": 1.4716191431661782e-05, + "loss": 0.1141, + "step": 34725 + }, + { + "epoch": 1.62, + "learning_rate": 1.4715407646606995e-05, + "loss": 0.3326, + "step": 34730 + }, + { + "epoch": 1.62, + "learning_rate": 1.471462386155221e-05, + "loss": 0.3716, + "step": 34735 + }, + { + "epoch": 1.62, + "learning_rate": 1.4713840076497423e-05, + "loss": 0.0574, + "step": 34740 + }, + { + "epoch": 1.62, + "learning_rate": 1.4713056291442635e-05, + "loss": 0.0426, + "step": 34745 + }, + { + "epoch": 1.62, + "learning_rate": 1.4712272506387851e-05, + "loss": 0.0909, + "step": 34750 + }, + { + "epoch": 1.62, + "learning_rate": 1.4711488721333063e-05, + "loss": 0.1078, + "step": 34755 + }, + { + "epoch": 1.62, + "learning_rate": 1.4710704936278277e-05, + "loss": 0.1061, + "step": 34760 + }, + { + "epoch": 1.62, + "learning_rate": 1.470992115122349e-05, + "loss": 0.1885, + "step": 34765 + }, + { + "epoch": 1.62, + "learning_rate": 1.4709137366168702e-05, + "loss": 0.176, + "step": 34770 + }, + { + "epoch": 1.62, + "learning_rate": 1.4708353581113917e-05, + "loss": 0.1519, + "step": 34775 + }, + { + "epoch": 1.62, + "learning_rate": 1.470756979605913e-05, + "loss": 0.2766, + "step": 34780 + }, + { + "epoch": 1.62, + "learning_rate": 1.4706786011004343e-05, + "loss": 0.2345, + "step": 34785 + }, + { + "epoch": 1.62, + "learning_rate": 1.4706002225949556e-05, + "loss": 0.0734, + "step": 34790 + }, + { + "epoch": 1.62, + "learning_rate": 1.4705218440894771e-05, + "loss": 0.0545, + "step": 34795 + }, + { + "epoch": 1.62, + "learning_rate": 1.4704434655839983e-05, + "loss": 0.0359, + "step": 34800 + }, + { + "epoch": 1.62, + "learning_rate": 1.4703650870785197e-05, + "loss": 0.08, + "step": 34805 + }, + { + "epoch": 1.62, + "learning_rate": 1.470286708573041e-05, + "loss": 0.1442, + "step": 34810 + }, + { + "epoch": 1.62, + "learning_rate": 1.4702083300675625e-05, + "loss": 0.1107, + "step": 34815 + }, + { + "epoch": 1.62, + "learning_rate": 1.4701299515620837e-05, + "loss": 0.1809, + "step": 34820 + }, + { + "epoch": 1.62, + "learning_rate": 1.4700515730566051e-05, + "loss": 0.1639, + "step": 34825 + }, + { + "epoch": 1.63, + "learning_rate": 1.4699731945511263e-05, + "loss": 0.2368, + "step": 34830 + }, + { + "epoch": 1.63, + "learning_rate": 1.4698948160456477e-05, + "loss": 0.2754, + "step": 34835 + }, + { + "epoch": 1.63, + "learning_rate": 1.4698164375401691e-05, + "loss": 0.1172, + "step": 34840 + }, + { + "epoch": 1.63, + "learning_rate": 1.4697380590346903e-05, + "loss": 0.0575, + "step": 34845 + }, + { + "epoch": 1.63, + "learning_rate": 1.4696596805292119e-05, + "loss": 0.1116, + "step": 34850 + }, + { + "epoch": 1.63, + "learning_rate": 1.4695813020237331e-05, + "loss": 0.1418, + "step": 34855 + }, + { + "epoch": 1.63, + "learning_rate": 1.4695029235182545e-05, + "loss": 0.1506, + "step": 34860 + }, + { + "epoch": 1.63, + "learning_rate": 1.4694245450127757e-05, + "loss": 0.1702, + "step": 34865 + }, + { + "epoch": 1.63, + "learning_rate": 1.4693461665072973e-05, + "loss": 0.2012, + "step": 34870 + }, + { + "epoch": 1.63, + "learning_rate": 1.4692677880018185e-05, + "loss": 0.28, + "step": 34875 + }, + { + "epoch": 1.63, + "learning_rate": 1.4691894094963399e-05, + "loss": 0.2179, + "step": 34880 + }, + { + "epoch": 1.63, + "learning_rate": 1.4691110309908611e-05, + "loss": 0.2453, + "step": 34885 + }, + { + "epoch": 1.63, + "learning_rate": 1.4690326524853827e-05, + "loss": 0.0277, + "step": 34890 + }, + { + "epoch": 1.63, + "learning_rate": 1.4689542739799039e-05, + "loss": 0.1151, + "step": 34895 + }, + { + "epoch": 1.63, + "learning_rate": 1.4688758954744251e-05, + "loss": 0.091, + "step": 34900 + }, + { + "epoch": 1.63, + "learning_rate": 1.4687975169689465e-05, + "loss": 0.1309, + "step": 34905 + }, + { + "epoch": 1.63, + "learning_rate": 1.4687191384634677e-05, + "loss": 0.0805, + "step": 34910 + }, + { + "epoch": 1.63, + "learning_rate": 1.4686407599579893e-05, + "loss": 0.1674, + "step": 34915 + }, + { + "epoch": 1.63, + "learning_rate": 1.4685623814525105e-05, + "loss": 0.1859, + "step": 34920 + }, + { + "epoch": 1.63, + "learning_rate": 1.4684840029470319e-05, + "loss": 0.234, + "step": 34925 + }, + { + "epoch": 1.63, + "learning_rate": 1.4684056244415531e-05, + "loss": 0.3353, + "step": 34930 + }, + { + "epoch": 1.63, + "learning_rate": 1.4683272459360747e-05, + "loss": 0.3759, + "step": 34935 + }, + { + "epoch": 1.63, + "learning_rate": 1.4682488674305959e-05, + "loss": 0.0299, + "step": 34940 + }, + { + "epoch": 1.63, + "learning_rate": 1.4681704889251173e-05, + "loss": 0.0724, + "step": 34945 + }, + { + "epoch": 1.63, + "learning_rate": 1.4680921104196387e-05, + "loss": 0.1143, + "step": 34950 + }, + { + "epoch": 1.63, + "learning_rate": 1.46801373191416e-05, + "loss": 0.1031, + "step": 34955 + }, + { + "epoch": 1.63, + "learning_rate": 1.4679353534086813e-05, + "loss": 0.1564, + "step": 34960 + }, + { + "epoch": 1.63, + "learning_rate": 1.4678569749032025e-05, + "loss": 0.1843, + "step": 34965 + }, + { + "epoch": 1.63, + "learning_rate": 1.4677785963977241e-05, + "loss": 0.1421, + "step": 34970 + }, + { + "epoch": 1.63, + "learning_rate": 1.4677002178922453e-05, + "loss": 0.1495, + "step": 34975 + }, + { + "epoch": 1.63, + "learning_rate": 1.4676218393867667e-05, + "loss": 0.3336, + "step": 34980 + }, + { + "epoch": 1.63, + "learning_rate": 1.467543460881288e-05, + "loss": 0.3336, + "step": 34985 + }, + { + "epoch": 1.63, + "learning_rate": 1.4674650823758095e-05, + "loss": 0.0764, + "step": 34990 + }, + { + "epoch": 1.63, + "learning_rate": 1.4673867038703307e-05, + "loss": 0.0414, + "step": 34995 + }, + { + "epoch": 1.63, + "learning_rate": 1.4673083253648521e-05, + "loss": 0.0867, + "step": 35000 + }, + { + "epoch": 1.63, + "learning_rate": 1.4672299468593733e-05, + "loss": 0.0791, + "step": 35005 + }, + { + "epoch": 1.63, + "learning_rate": 1.4671515683538949e-05, + "loss": 0.1432, + "step": 35010 + }, + { + "epoch": 1.63, + "learning_rate": 1.4670731898484161e-05, + "loss": 0.1849, + "step": 35015 + }, + { + "epoch": 1.63, + "learning_rate": 1.4669948113429375e-05, + "loss": 0.1386, + "step": 35020 + }, + { + "epoch": 1.63, + "learning_rate": 1.4669164328374587e-05, + "loss": 0.1576, + "step": 35025 + }, + { + "epoch": 1.63, + "learning_rate": 1.46683805433198e-05, + "loss": 0.3647, + "step": 35030 + }, + { + "epoch": 1.63, + "learning_rate": 1.4667596758265015e-05, + "loss": 0.3103, + "step": 35035 + }, + { + "epoch": 1.64, + "learning_rate": 1.4666812973210227e-05, + "loss": 0.0494, + "step": 35040 + }, + { + "epoch": 1.64, + "learning_rate": 1.4666029188155441e-05, + "loss": 0.0672, + "step": 35045 + }, + { + "epoch": 1.64, + "learning_rate": 1.4665245403100655e-05, + "loss": 0.0995, + "step": 35050 + }, + { + "epoch": 1.64, + "learning_rate": 1.4664461618045869e-05, + "loss": 0.0589, + "step": 35055 + }, + { + "epoch": 1.64, + "learning_rate": 1.4663677832991081e-05, + "loss": 0.1083, + "step": 35060 + }, + { + "epoch": 1.64, + "learning_rate": 1.4662894047936297e-05, + "loss": 0.1836, + "step": 35065 + }, + { + "epoch": 1.64, + "learning_rate": 1.4662110262881509e-05, + "loss": 0.216, + "step": 35070 + }, + { + "epoch": 1.64, + "learning_rate": 1.4661326477826723e-05, + "loss": 0.2673, + "step": 35075 + }, + { + "epoch": 1.64, + "learning_rate": 1.4660542692771935e-05, + "loss": 0.3442, + "step": 35080 + }, + { + "epoch": 1.64, + "learning_rate": 1.465975890771715e-05, + "loss": 0.3217, + "step": 35085 + }, + { + "epoch": 1.64, + "learning_rate": 1.4658975122662363e-05, + "loss": 0.0211, + "step": 35090 + }, + { + "epoch": 1.64, + "learning_rate": 1.4658191337607575e-05, + "loss": 0.0388, + "step": 35095 + }, + { + "epoch": 1.64, + "learning_rate": 1.4657407552552789e-05, + "loss": 0.044, + "step": 35100 + }, + { + "epoch": 1.64, + "learning_rate": 1.4656623767498001e-05, + "loss": 0.1086, + "step": 35105 + }, + { + "epoch": 1.64, + "learning_rate": 1.4655839982443217e-05, + "loss": 0.1599, + "step": 35110 + }, + { + "epoch": 1.64, + "learning_rate": 1.4655056197388429e-05, + "loss": 0.129, + "step": 35115 + }, + { + "epoch": 1.64, + "learning_rate": 1.4654272412333643e-05, + "loss": 0.1483, + "step": 35120 + }, + { + "epoch": 1.64, + "learning_rate": 1.4653488627278855e-05, + "loss": 0.2187, + "step": 35125 + }, + { + "epoch": 1.64, + "learning_rate": 1.465270484222407e-05, + "loss": 0.4357, + "step": 35130 + }, + { + "epoch": 1.64, + "learning_rate": 1.4651921057169283e-05, + "loss": 0.2683, + "step": 35135 + }, + { + "epoch": 1.64, + "learning_rate": 1.4651137272114497e-05, + "loss": 0.0366, + "step": 35140 + }, + { + "epoch": 1.64, + "learning_rate": 1.4650353487059709e-05, + "loss": 0.0448, + "step": 35145 + }, + { + "epoch": 1.64, + "learning_rate": 1.4649569702004925e-05, + "loss": 0.0861, + "step": 35150 + }, + { + "epoch": 1.64, + "learning_rate": 1.4648785916950137e-05, + "loss": 0.0554, + "step": 35155 + }, + { + "epoch": 1.64, + "learning_rate": 1.4648002131895349e-05, + "loss": 0.143, + "step": 35160 + }, + { + "epoch": 1.64, + "learning_rate": 1.4647218346840565e-05, + "loss": 0.1561, + "step": 35165 + }, + { + "epoch": 1.64, + "learning_rate": 1.4646434561785777e-05, + "loss": 0.1867, + "step": 35170 + }, + { + "epoch": 1.64, + "learning_rate": 1.464565077673099e-05, + "loss": 0.1855, + "step": 35175 + }, + { + "epoch": 1.64, + "learning_rate": 1.4644866991676203e-05, + "loss": 0.4092, + "step": 35180 + }, + { + "epoch": 1.64, + "learning_rate": 1.4644083206621418e-05, + "loss": 0.3768, + "step": 35185 + }, + { + "epoch": 1.64, + "learning_rate": 1.464329942156663e-05, + "loss": 0.1151, + "step": 35190 + }, + { + "epoch": 1.64, + "learning_rate": 1.4642515636511845e-05, + "loss": 0.0626, + "step": 35195 + }, + { + "epoch": 1.64, + "learning_rate": 1.4641731851457057e-05, + "loss": 0.0333, + "step": 35200 + }, + { + "epoch": 1.64, + "learning_rate": 1.4640948066402272e-05, + "loss": 0.0781, + "step": 35205 + }, + { + "epoch": 1.64, + "learning_rate": 1.4640164281347485e-05, + "loss": 0.1316, + "step": 35210 + }, + { + "epoch": 1.64, + "learning_rate": 1.4639380496292699e-05, + "loss": 0.0988, + "step": 35215 + }, + { + "epoch": 1.64, + "learning_rate": 1.463859671123791e-05, + "loss": 0.1637, + "step": 35220 + }, + { + "epoch": 1.64, + "learning_rate": 1.4637812926183123e-05, + "loss": 0.2726, + "step": 35225 + }, + { + "epoch": 1.64, + "learning_rate": 1.4637029141128339e-05, + "loss": 0.336, + "step": 35230 + }, + { + "epoch": 1.64, + "learning_rate": 1.463624535607355e-05, + "loss": 0.1921, + "step": 35235 + }, + { + "epoch": 1.64, + "learning_rate": 1.4635461571018765e-05, + "loss": 0.026, + "step": 35240 + }, + { + "epoch": 1.64, + "learning_rate": 1.4634677785963977e-05, + "loss": 0.0511, + "step": 35245 + }, + { + "epoch": 1.64, + "learning_rate": 1.4633894000909192e-05, + "loss": 0.0613, + "step": 35250 + }, + { + "epoch": 1.65, + "learning_rate": 1.4633110215854405e-05, + "loss": 0.116, + "step": 35255 + }, + { + "epoch": 1.65, + "learning_rate": 1.4632326430799619e-05, + "loss": 0.1376, + "step": 35260 + }, + { + "epoch": 1.65, + "learning_rate": 1.4631542645744833e-05, + "loss": 0.1423, + "step": 35265 + }, + { + "epoch": 1.65, + "learning_rate": 1.4630758860690046e-05, + "loss": 0.0715, + "step": 35270 + }, + { + "epoch": 1.65, + "learning_rate": 1.4629975075635259e-05, + "loss": 0.2406, + "step": 35275 + }, + { + "epoch": 1.65, + "learning_rate": 1.4629191290580474e-05, + "loss": 0.2576, + "step": 35280 + }, + { + "epoch": 1.65, + "learning_rate": 1.4628407505525686e-05, + "loss": 0.1546, + "step": 35285 + }, + { + "epoch": 1.65, + "learning_rate": 1.4627623720470899e-05, + "loss": 0.0603, + "step": 35290 + }, + { + "epoch": 1.65, + "learning_rate": 1.4626839935416113e-05, + "loss": 0.0637, + "step": 35295 + }, + { + "epoch": 1.65, + "learning_rate": 1.4626056150361325e-05, + "loss": 0.0723, + "step": 35300 + }, + { + "epoch": 1.65, + "learning_rate": 1.462527236530654e-05, + "loss": 0.0931, + "step": 35305 + }, + { + "epoch": 1.65, + "learning_rate": 1.4624488580251753e-05, + "loss": 0.163, + "step": 35310 + }, + { + "epoch": 1.65, + "learning_rate": 1.4623704795196966e-05, + "loss": 0.2184, + "step": 35315 + }, + { + "epoch": 1.65, + "learning_rate": 1.4622921010142179e-05, + "loss": 0.0844, + "step": 35320 + }, + { + "epoch": 1.65, + "learning_rate": 1.4622137225087394e-05, + "loss": 0.2475, + "step": 35325 + }, + { + "epoch": 1.65, + "learning_rate": 1.4621353440032607e-05, + "loss": 0.3015, + "step": 35330 + }, + { + "epoch": 1.65, + "learning_rate": 1.462056965497782e-05, + "loss": 0.26, + "step": 35335 + }, + { + "epoch": 1.65, + "learning_rate": 1.4619785869923033e-05, + "loss": 0.0676, + "step": 35340 + }, + { + "epoch": 1.65, + "learning_rate": 1.4619002084868248e-05, + "loss": 0.0765, + "step": 35345 + }, + { + "epoch": 1.65, + "learning_rate": 1.461821829981346e-05, + "loss": 0.0856, + "step": 35350 + }, + { + "epoch": 1.65, + "learning_rate": 1.4617434514758673e-05, + "loss": 0.0801, + "step": 35355 + }, + { + "epoch": 1.65, + "learning_rate": 1.4616650729703887e-05, + "loss": 0.1053, + "step": 35360 + }, + { + "epoch": 1.65, + "learning_rate": 1.46158669446491e-05, + "loss": 0.1531, + "step": 35365 + }, + { + "epoch": 1.65, + "learning_rate": 1.4615083159594314e-05, + "loss": 0.2514, + "step": 35370 + }, + { + "epoch": 1.65, + "learning_rate": 1.4614299374539527e-05, + "loss": 0.1658, + "step": 35375 + }, + { + "epoch": 1.65, + "learning_rate": 1.4613515589484742e-05, + "loss": 0.3044, + "step": 35380 + }, + { + "epoch": 1.65, + "learning_rate": 1.4612731804429954e-05, + "loss": 0.2306, + "step": 35385 + }, + { + "epoch": 1.65, + "learning_rate": 1.4611948019375168e-05, + "loss": 0.0903, + "step": 35390 + }, + { + "epoch": 1.65, + "learning_rate": 1.461116423432038e-05, + "loss": 0.0519, + "step": 35395 + }, + { + "epoch": 1.65, + "learning_rate": 1.4610380449265596e-05, + "loss": 0.0563, + "step": 35400 + }, + { + "epoch": 1.65, + "learning_rate": 1.4609596664210808e-05, + "loss": 0.0959, + "step": 35405 + }, + { + "epoch": 1.65, + "learning_rate": 1.4608812879156022e-05, + "loss": 0.0844, + "step": 35410 + }, + { + "epoch": 1.65, + "learning_rate": 1.4608029094101234e-05, + "loss": 0.1355, + "step": 35415 + }, + { + "epoch": 1.65, + "learning_rate": 1.4607245309046447e-05, + "loss": 0.1931, + "step": 35420 + }, + { + "epoch": 1.65, + "learning_rate": 1.4606461523991662e-05, + "loss": 0.257, + "step": 35425 + }, + { + "epoch": 1.65, + "learning_rate": 1.4605677738936874e-05, + "loss": 0.5352, + "step": 35430 + }, + { + "epoch": 1.65, + "learning_rate": 1.4604893953882088e-05, + "loss": 0.3642, + "step": 35435 + }, + { + "epoch": 1.65, + "learning_rate": 1.46041101688273e-05, + "loss": 0.0393, + "step": 35440 + }, + { + "epoch": 1.65, + "learning_rate": 1.4603326383772516e-05, + "loss": 0.106, + "step": 35445 + }, + { + "epoch": 1.65, + "learning_rate": 1.4602542598717728e-05, + "loss": 0.1245, + "step": 35450 + }, + { + "epoch": 1.65, + "learning_rate": 1.4601758813662942e-05, + "loss": 0.1576, + "step": 35455 + }, + { + "epoch": 1.65, + "learning_rate": 1.4600975028608155e-05, + "loss": 0.1734, + "step": 35460 + }, + { + "epoch": 1.65, + "learning_rate": 1.460019124355337e-05, + "loss": 0.0837, + "step": 35465 + }, + { + "epoch": 1.66, + "learning_rate": 1.4599407458498582e-05, + "loss": 0.1933, + "step": 35470 + }, + { + "epoch": 1.66, + "learning_rate": 1.4598623673443796e-05, + "loss": 0.1611, + "step": 35475 + }, + { + "epoch": 1.66, + "learning_rate": 1.459783988838901e-05, + "loss": 0.2965, + "step": 35480 + }, + { + "epoch": 1.66, + "learning_rate": 1.4597056103334222e-05, + "loss": 0.314, + "step": 35485 + }, + { + "epoch": 1.66, + "learning_rate": 1.4596272318279436e-05, + "loss": 0.0355, + "step": 35490 + }, + { + "epoch": 1.66, + "learning_rate": 1.4595488533224648e-05, + "loss": 0.0476, + "step": 35495 + }, + { + "epoch": 1.66, + "learning_rate": 1.4594704748169864e-05, + "loss": 0.0686, + "step": 35500 + }, + { + "epoch": 1.66, + "learning_rate": 1.4593920963115076e-05, + "loss": 0.1746, + "step": 35505 + }, + { + "epoch": 1.66, + "learning_rate": 1.459313717806029e-05, + "loss": 0.1403, + "step": 35510 + }, + { + "epoch": 1.66, + "learning_rate": 1.4592353393005502e-05, + "loss": 0.1285, + "step": 35515 + }, + { + "epoch": 1.66, + "learning_rate": 1.4591569607950718e-05, + "loss": 0.252, + "step": 35520 + }, + { + "epoch": 1.66, + "learning_rate": 1.459078582289593e-05, + "loss": 0.2358, + "step": 35525 + }, + { + "epoch": 1.66, + "learning_rate": 1.4590002037841144e-05, + "loss": 0.2364, + "step": 35530 + }, + { + "epoch": 1.66, + "learning_rate": 1.4589218252786356e-05, + "loss": 0.2942, + "step": 35535 + }, + { + "epoch": 1.66, + "learning_rate": 1.4588434467731572e-05, + "loss": 0.0387, + "step": 35540 + }, + { + "epoch": 1.66, + "learning_rate": 1.4587650682676784e-05, + "loss": 0.1152, + "step": 35545 + }, + { + "epoch": 1.66, + "learning_rate": 1.4586866897621996e-05, + "loss": 0.0824, + "step": 35550 + }, + { + "epoch": 1.66, + "learning_rate": 1.458608311256721e-05, + "loss": 0.1106, + "step": 35555 + }, + { + "epoch": 1.66, + "learning_rate": 1.4585299327512422e-05, + "loss": 0.1017, + "step": 35560 + }, + { + "epoch": 1.66, + "learning_rate": 1.4584515542457638e-05, + "loss": 0.2116, + "step": 35565 + }, + { + "epoch": 1.66, + "learning_rate": 1.458373175740285e-05, + "loss": 0.2479, + "step": 35570 + }, + { + "epoch": 1.66, + "learning_rate": 1.4582947972348064e-05, + "loss": 0.2595, + "step": 35575 + }, + { + "epoch": 1.66, + "learning_rate": 1.4582164187293278e-05, + "loss": 0.3131, + "step": 35580 + }, + { + "epoch": 1.66, + "learning_rate": 1.4581380402238492e-05, + "loss": 0.2581, + "step": 35585 + }, + { + "epoch": 1.66, + "learning_rate": 1.4580596617183704e-05, + "loss": 0.0496, + "step": 35590 + }, + { + "epoch": 1.66, + "learning_rate": 1.457981283212892e-05, + "loss": 0.0473, + "step": 35595 + }, + { + "epoch": 1.66, + "learning_rate": 1.4579029047074132e-05, + "loss": 0.1001, + "step": 35600 + }, + { + "epoch": 1.66, + "learning_rate": 1.4578245262019346e-05, + "loss": 0.053, + "step": 35605 + }, + { + "epoch": 1.66, + "learning_rate": 1.4577461476964558e-05, + "loss": 0.1636, + "step": 35610 + }, + { + "epoch": 1.66, + "learning_rate": 1.457667769190977e-05, + "loss": 0.1338, + "step": 35615 + }, + { + "epoch": 1.66, + "learning_rate": 1.4575893906854986e-05, + "loss": 0.2236, + "step": 35620 + }, + { + "epoch": 1.66, + "learning_rate": 1.4575110121800198e-05, + "loss": 0.17, + "step": 35625 + }, + { + "epoch": 1.66, + "learning_rate": 1.4574326336745412e-05, + "loss": 0.3704, + "step": 35630 + }, + { + "epoch": 1.66, + "learning_rate": 1.4573542551690624e-05, + "loss": 0.1904, + "step": 35635 + }, + { + "epoch": 1.66, + "learning_rate": 1.457275876663584e-05, + "loss": 0.0506, + "step": 35640 + }, + { + "epoch": 1.66, + "learning_rate": 1.4571974981581052e-05, + "loss": 0.0974, + "step": 35645 + }, + { + "epoch": 1.66, + "learning_rate": 1.4571191196526266e-05, + "loss": 0.036, + "step": 35650 + }, + { + "epoch": 1.66, + "learning_rate": 1.4570407411471478e-05, + "loss": 0.1061, + "step": 35655 + }, + { + "epoch": 1.66, + "learning_rate": 1.4569623626416694e-05, + "loss": 0.1198, + "step": 35660 + }, + { + "epoch": 1.66, + "learning_rate": 1.4568839841361906e-05, + "loss": 0.244, + "step": 35665 + }, + { + "epoch": 1.66, + "learning_rate": 1.456805605630712e-05, + "loss": 0.1738, + "step": 35670 + }, + { + "epoch": 1.66, + "learning_rate": 1.4567272271252332e-05, + "loss": 0.1297, + "step": 35675 + }, + { + "epoch": 1.66, + "learning_rate": 1.4566488486197546e-05, + "loss": 0.3234, + "step": 35680 + }, + { + "epoch": 1.67, + "learning_rate": 1.456570470114276e-05, + "loss": 0.3038, + "step": 35685 + }, + { + "epoch": 1.67, + "learning_rate": 1.4564920916087972e-05, + "loss": 0.0216, + "step": 35690 + }, + { + "epoch": 1.67, + "learning_rate": 1.4564137131033188e-05, + "loss": 0.0839, + "step": 35695 + }, + { + "epoch": 1.67, + "learning_rate": 1.45633533459784e-05, + "loss": 0.0911, + "step": 35700 + }, + { + "epoch": 1.67, + "learning_rate": 1.4562569560923614e-05, + "loss": 0.1222, + "step": 35705 + }, + { + "epoch": 1.67, + "learning_rate": 1.4561785775868826e-05, + "loss": 0.0914, + "step": 35710 + }, + { + "epoch": 1.67, + "learning_rate": 1.4561001990814042e-05, + "loss": 0.1321, + "step": 35715 + }, + { + "epoch": 1.67, + "learning_rate": 1.4560218205759254e-05, + "loss": 0.1351, + "step": 35720 + }, + { + "epoch": 1.67, + "learning_rate": 1.4559434420704468e-05, + "loss": 0.2113, + "step": 35725 + }, + { + "epoch": 1.67, + "learning_rate": 1.455865063564968e-05, + "loss": 0.3217, + "step": 35730 + }, + { + "epoch": 1.67, + "learning_rate": 1.4557866850594896e-05, + "loss": 0.3432, + "step": 35735 + }, + { + "epoch": 1.67, + "learning_rate": 1.4557083065540108e-05, + "loss": 0.0595, + "step": 35740 + }, + { + "epoch": 1.67, + "learning_rate": 1.455629928048532e-05, + "loss": 0.0583, + "step": 35745 + }, + { + "epoch": 1.67, + "learning_rate": 1.4555515495430534e-05, + "loss": 0.0955, + "step": 35750 + }, + { + "epoch": 1.67, + "learning_rate": 1.4554731710375746e-05, + "loss": 0.0763, + "step": 35755 + }, + { + "epoch": 1.67, + "learning_rate": 1.4553947925320962e-05, + "loss": 0.159, + "step": 35760 + }, + { + "epoch": 1.67, + "learning_rate": 1.4553164140266174e-05, + "loss": 0.1699, + "step": 35765 + }, + { + "epoch": 1.67, + "learning_rate": 1.4552380355211388e-05, + "loss": 0.1226, + "step": 35770 + }, + { + "epoch": 1.67, + "learning_rate": 1.45515965701566e-05, + "loss": 0.233, + "step": 35775 + }, + { + "epoch": 1.67, + "learning_rate": 1.4550812785101816e-05, + "loss": 0.3687, + "step": 35780 + }, + { + "epoch": 1.67, + "learning_rate": 1.4550029000047028e-05, + "loss": 0.2873, + "step": 35785 + }, + { + "epoch": 1.67, + "learning_rate": 1.4549245214992242e-05, + "loss": 0.0251, + "step": 35790 + }, + { + "epoch": 1.67, + "learning_rate": 1.4548461429937456e-05, + "loss": 0.0768, + "step": 35795 + }, + { + "epoch": 1.67, + "learning_rate": 1.454767764488267e-05, + "loss": 0.1486, + "step": 35800 + }, + { + "epoch": 1.67, + "learning_rate": 1.4546893859827882e-05, + "loss": 0.1071, + "step": 35805 + }, + { + "epoch": 1.67, + "learning_rate": 1.4546110074773094e-05, + "loss": 0.115, + "step": 35810 + }, + { + "epoch": 1.67, + "learning_rate": 1.454532628971831e-05, + "loss": 0.1783, + "step": 35815 + }, + { + "epoch": 1.67, + "learning_rate": 1.4544542504663522e-05, + "loss": 0.1416, + "step": 35820 + }, + { + "epoch": 1.67, + "learning_rate": 1.4543758719608736e-05, + "loss": 0.2712, + "step": 35825 + }, + { + "epoch": 1.67, + "learning_rate": 1.4542974934553948e-05, + "loss": 0.3227, + "step": 35830 + }, + { + "epoch": 1.67, + "learning_rate": 1.4542191149499164e-05, + "loss": 0.3159, + "step": 35835 + }, + { + "epoch": 1.67, + "learning_rate": 1.4541407364444376e-05, + "loss": 0.0305, + "step": 35840 + }, + { + "epoch": 1.67, + "learning_rate": 1.454062357938959e-05, + "loss": 0.0921, + "step": 35845 + }, + { + "epoch": 1.67, + "learning_rate": 1.4539839794334802e-05, + "loss": 0.0471, + "step": 35850 + }, + { + "epoch": 1.67, + "learning_rate": 1.4539056009280017e-05, + "loss": 0.1261, + "step": 35855 + }, + { + "epoch": 1.67, + "learning_rate": 1.453827222422523e-05, + "loss": 0.1288, + "step": 35860 + }, + { + "epoch": 1.67, + "learning_rate": 1.4537488439170444e-05, + "loss": 0.1459, + "step": 35865 + }, + { + "epoch": 1.67, + "learning_rate": 1.4536704654115656e-05, + "loss": 0.141, + "step": 35870 + }, + { + "epoch": 1.67, + "learning_rate": 1.4535920869060868e-05, + "loss": 0.2848, + "step": 35875 + }, + { + "epoch": 1.67, + "learning_rate": 1.4535137084006084e-05, + "loss": 0.3458, + "step": 35880 + }, + { + "epoch": 1.67, + "learning_rate": 1.4534353298951296e-05, + "loss": 0.4117, + "step": 35885 + }, + { + "epoch": 1.67, + "learning_rate": 1.453356951389651e-05, + "loss": 0.0384, + "step": 35890 + }, + { + "epoch": 1.67, + "learning_rate": 1.4532785728841724e-05, + "loss": 0.1041, + "step": 35895 + }, + { + "epoch": 1.68, + "learning_rate": 1.4532001943786938e-05, + "loss": 0.1218, + "step": 35900 + }, + { + "epoch": 1.68, + "learning_rate": 1.453121815873215e-05, + "loss": 0.1158, + "step": 35905 + }, + { + "epoch": 1.68, + "learning_rate": 1.4530434373677365e-05, + "loss": 0.0852, + "step": 35910 + }, + { + "epoch": 1.68, + "learning_rate": 1.4529650588622578e-05, + "loss": 0.081, + "step": 35915 + }, + { + "epoch": 1.68, + "learning_rate": 1.4528866803567791e-05, + "loss": 0.198, + "step": 35920 + }, + { + "epoch": 1.68, + "learning_rate": 1.4528083018513004e-05, + "loss": 0.2399, + "step": 35925 + }, + { + "epoch": 1.68, + "learning_rate": 1.452729923345822e-05, + "loss": 0.2144, + "step": 35930 + }, + { + "epoch": 1.68, + "learning_rate": 1.4526515448403432e-05, + "loss": 0.2149, + "step": 35935 + }, + { + "epoch": 1.68, + "learning_rate": 1.4525731663348644e-05, + "loss": 0.0402, + "step": 35940 + }, + { + "epoch": 1.68, + "learning_rate": 1.4524947878293858e-05, + "loss": 0.0718, + "step": 35945 + }, + { + "epoch": 1.68, + "learning_rate": 1.452416409323907e-05, + "loss": 0.0818, + "step": 35950 + }, + { + "epoch": 1.68, + "learning_rate": 1.4523380308184285e-05, + "loss": 0.0695, + "step": 35955 + }, + { + "epoch": 1.68, + "learning_rate": 1.4522596523129498e-05, + "loss": 0.0995, + "step": 35960 + }, + { + "epoch": 1.68, + "learning_rate": 1.4521812738074712e-05, + "loss": 0.1538, + "step": 35965 + }, + { + "epoch": 1.68, + "learning_rate": 1.4521028953019924e-05, + "loss": 0.2184, + "step": 35970 + }, + { + "epoch": 1.68, + "learning_rate": 1.452024516796514e-05, + "loss": 0.1701, + "step": 35975 + }, + { + "epoch": 1.68, + "learning_rate": 1.4519461382910352e-05, + "loss": 0.401, + "step": 35980 + }, + { + "epoch": 1.68, + "learning_rate": 1.4518677597855565e-05, + "loss": 0.3014, + "step": 35985 + }, + { + "epoch": 1.68, + "learning_rate": 1.4517893812800778e-05, + "loss": 0.047, + "step": 35990 + }, + { + "epoch": 1.68, + "learning_rate": 1.4517110027745993e-05, + "loss": 0.0691, + "step": 35995 + }, + { + "epoch": 1.68, + "learning_rate": 1.4516326242691206e-05, + "loss": 0.1155, + "step": 36000 + }, + { + "epoch": 1.68, + "learning_rate": 1.4515542457636418e-05, + "loss": 0.1304, + "step": 36005 + }, + { + "epoch": 1.68, + "learning_rate": 1.4514758672581633e-05, + "loss": 0.1563, + "step": 36010 + }, + { + "epoch": 1.68, + "learning_rate": 1.4513974887526846e-05, + "loss": 0.1016, + "step": 36015 + }, + { + "epoch": 1.68, + "learning_rate": 1.451319110247206e-05, + "loss": 0.186, + "step": 36020 + }, + { + "epoch": 1.68, + "learning_rate": 1.4512407317417272e-05, + "loss": 0.203, + "step": 36025 + }, + { + "epoch": 1.68, + "learning_rate": 1.4511623532362487e-05, + "loss": 0.3696, + "step": 36030 + }, + { + "epoch": 1.68, + "learning_rate": 1.45108397473077e-05, + "loss": 0.2746, + "step": 36035 + }, + { + "epoch": 1.68, + "learning_rate": 1.4510055962252913e-05, + "loss": 0.0431, + "step": 36040 + }, + { + "epoch": 1.68, + "learning_rate": 1.4509272177198126e-05, + "loss": 0.0458, + "step": 36045 + }, + { + "epoch": 1.68, + "learning_rate": 1.4508488392143341e-05, + "loss": 0.068, + "step": 36050 + }, + { + "epoch": 1.68, + "learning_rate": 1.4507704607088553e-05, + "loss": 0.156, + "step": 36055 + }, + { + "epoch": 1.68, + "learning_rate": 1.4506920822033767e-05, + "loss": 0.0859, + "step": 36060 + }, + { + "epoch": 1.68, + "learning_rate": 1.450613703697898e-05, + "loss": 0.1969, + "step": 36065 + }, + { + "epoch": 1.68, + "learning_rate": 1.4505353251924192e-05, + "loss": 0.1335, + "step": 36070 + }, + { + "epoch": 1.68, + "learning_rate": 1.4504569466869407e-05, + "loss": 0.2158, + "step": 36075 + }, + { + "epoch": 1.68, + "learning_rate": 1.450378568181462e-05, + "loss": 0.2714, + "step": 36080 + }, + { + "epoch": 1.68, + "learning_rate": 1.4503001896759833e-05, + "loss": 0.2969, + "step": 36085 + }, + { + "epoch": 1.68, + "learning_rate": 1.4502218111705046e-05, + "loss": 0.0216, + "step": 36090 + }, + { + "epoch": 1.68, + "learning_rate": 1.4501434326650261e-05, + "loss": 0.0438, + "step": 36095 + }, + { + "epoch": 1.68, + "learning_rate": 1.4500650541595473e-05, + "loss": 0.0988, + "step": 36100 + }, + { + "epoch": 1.68, + "learning_rate": 1.4499866756540687e-05, + "loss": 0.0888, + "step": 36105 + }, + { + "epoch": 1.68, + "learning_rate": 1.4499082971485901e-05, + "loss": 0.0665, + "step": 36110 + }, + { + "epoch": 1.69, + "learning_rate": 1.4498299186431115e-05, + "loss": 0.1506, + "step": 36115 + }, + { + "epoch": 1.69, + "learning_rate": 1.4497515401376327e-05, + "loss": 0.1244, + "step": 36120 + }, + { + "epoch": 1.69, + "learning_rate": 1.4496731616321541e-05, + "loss": 0.1407, + "step": 36125 + }, + { + "epoch": 1.69, + "learning_rate": 1.4495947831266755e-05, + "loss": 0.389, + "step": 36130 + }, + { + "epoch": 1.69, + "learning_rate": 1.4495164046211967e-05, + "loss": 0.2841, + "step": 36135 + }, + { + "epoch": 1.69, + "learning_rate": 1.4494380261157181e-05, + "loss": 0.0355, + "step": 36140 + }, + { + "epoch": 1.69, + "learning_rate": 1.4493596476102394e-05, + "loss": 0.1308, + "step": 36145 + }, + { + "epoch": 1.69, + "learning_rate": 1.4492812691047609e-05, + "loss": 0.0888, + "step": 36150 + }, + { + "epoch": 1.69, + "learning_rate": 1.4492028905992821e-05, + "loss": 0.1226, + "step": 36155 + }, + { + "epoch": 1.69, + "learning_rate": 1.4491245120938035e-05, + "loss": 0.1033, + "step": 36160 + }, + { + "epoch": 1.69, + "learning_rate": 1.4490461335883247e-05, + "loss": 0.1986, + "step": 36165 + }, + { + "epoch": 1.69, + "learning_rate": 1.4489677550828463e-05, + "loss": 0.1476, + "step": 36170 + }, + { + "epoch": 1.69, + "learning_rate": 1.4488893765773675e-05, + "loss": 0.3752, + "step": 36175 + }, + { + "epoch": 1.69, + "learning_rate": 1.448810998071889e-05, + "loss": 0.3593, + "step": 36180 + }, + { + "epoch": 1.69, + "learning_rate": 1.4487326195664101e-05, + "loss": 0.3019, + "step": 36185 + }, + { + "epoch": 1.69, + "learning_rate": 1.4486542410609317e-05, + "loss": 0.0502, + "step": 36190 + }, + { + "epoch": 1.69, + "learning_rate": 1.448575862555453e-05, + "loss": 0.0869, + "step": 36195 + }, + { + "epoch": 1.69, + "learning_rate": 1.4484974840499741e-05, + "loss": 0.0769, + "step": 36200 + }, + { + "epoch": 1.69, + "learning_rate": 1.4484191055444955e-05, + "loss": 0.0761, + "step": 36205 + }, + { + "epoch": 1.69, + "learning_rate": 1.448340727039017e-05, + "loss": 0.1277, + "step": 36210 + }, + { + "epoch": 1.69, + "learning_rate": 1.4482623485335383e-05, + "loss": 0.1541, + "step": 36215 + }, + { + "epoch": 1.69, + "learning_rate": 1.4481839700280595e-05, + "loss": 0.1727, + "step": 36220 + }, + { + "epoch": 1.69, + "learning_rate": 1.4481055915225811e-05, + "loss": 0.2071, + "step": 36225 + }, + { + "epoch": 1.69, + "learning_rate": 1.4480272130171023e-05, + "loss": 0.2472, + "step": 36230 + }, + { + "epoch": 1.69, + "learning_rate": 1.4479488345116237e-05, + "loss": 0.1617, + "step": 36235 + }, + { + "epoch": 1.69, + "learning_rate": 1.447870456006145e-05, + "loss": 0.0486, + "step": 36240 + }, + { + "epoch": 1.69, + "learning_rate": 1.4477920775006665e-05, + "loss": 0.029, + "step": 36245 + }, + { + "epoch": 1.69, + "learning_rate": 1.4477136989951877e-05, + "loss": 0.0703, + "step": 36250 + }, + { + "epoch": 1.69, + "learning_rate": 1.4476353204897091e-05, + "loss": 0.0847, + "step": 36255 + }, + { + "epoch": 1.69, + "learning_rate": 1.4475569419842303e-05, + "loss": 0.1484, + "step": 36260 + }, + { + "epoch": 1.69, + "learning_rate": 1.4474785634787515e-05, + "loss": 0.1272, + "step": 36265 + }, + { + "epoch": 1.69, + "learning_rate": 1.4474001849732731e-05, + "loss": 0.1129, + "step": 36270 + }, + { + "epoch": 1.69, + "learning_rate": 1.4473218064677943e-05, + "loss": 0.1698, + "step": 36275 + }, + { + "epoch": 1.69, + "learning_rate": 1.4472434279623157e-05, + "loss": 0.2721, + "step": 36280 + }, + { + "epoch": 1.69, + "learning_rate": 1.447165049456837e-05, + "loss": 0.2507, + "step": 36285 + }, + { + "epoch": 1.69, + "learning_rate": 1.4470866709513585e-05, + "loss": 0.1443, + "step": 36290 + }, + { + "epoch": 1.69, + "learning_rate": 1.4470082924458797e-05, + "loss": 0.0702, + "step": 36295 + }, + { + "epoch": 1.69, + "learning_rate": 1.4469299139404011e-05, + "loss": 0.0486, + "step": 36300 + }, + { + "epoch": 1.69, + "learning_rate": 1.4468515354349223e-05, + "loss": 0.0962, + "step": 36305 + }, + { + "epoch": 1.69, + "learning_rate": 1.4467731569294439e-05, + "loss": 0.1491, + "step": 36310 + }, + { + "epoch": 1.69, + "learning_rate": 1.4466947784239651e-05, + "loss": 0.1473, + "step": 36315 + }, + { + "epoch": 1.69, + "learning_rate": 1.4466163999184865e-05, + "loss": 0.1861, + "step": 36320 + }, + { + "epoch": 1.69, + "learning_rate": 1.4465380214130079e-05, + "loss": 0.2508, + "step": 36325 + }, + { + "epoch": 1.7, + "learning_rate": 1.4464596429075291e-05, + "loss": 0.1919, + "step": 36330 + }, + { + "epoch": 1.7, + "learning_rate": 1.4463812644020505e-05, + "loss": 0.329, + "step": 36335 + }, + { + "epoch": 1.7, + "learning_rate": 1.4463028858965717e-05, + "loss": 0.0241, + "step": 36340 + }, + { + "epoch": 1.7, + "learning_rate": 1.4462245073910933e-05, + "loss": 0.0493, + "step": 36345 + }, + { + "epoch": 1.7, + "learning_rate": 1.4461461288856145e-05, + "loss": 0.0885, + "step": 36350 + }, + { + "epoch": 1.7, + "learning_rate": 1.4460677503801359e-05, + "loss": 0.0676, + "step": 36355 + }, + { + "epoch": 1.7, + "learning_rate": 1.4459893718746571e-05, + "loss": 0.138, + "step": 36360 + }, + { + "epoch": 1.7, + "learning_rate": 1.4459109933691787e-05, + "loss": 0.1602, + "step": 36365 + }, + { + "epoch": 1.7, + "learning_rate": 1.4458326148636999e-05, + "loss": 0.1771, + "step": 36370 + }, + { + "epoch": 1.7, + "learning_rate": 1.4457542363582213e-05, + "loss": 0.207, + "step": 36375 + }, + { + "epoch": 1.7, + "learning_rate": 1.4456758578527425e-05, + "loss": 0.3872, + "step": 36380 + }, + { + "epoch": 1.7, + "learning_rate": 1.445597479347264e-05, + "loss": 0.4241, + "step": 36385 + }, + { + "epoch": 1.7, + "learning_rate": 1.4455191008417853e-05, + "loss": 0.0311, + "step": 36390 + }, + { + "epoch": 1.7, + "learning_rate": 1.4454407223363065e-05, + "loss": 0.0573, + "step": 36395 + }, + { + "epoch": 1.7, + "learning_rate": 1.4453623438308279e-05, + "loss": 0.0677, + "step": 36400 + }, + { + "epoch": 1.7, + "learning_rate": 1.4452839653253491e-05, + "loss": 0.1657, + "step": 36405 + }, + { + "epoch": 1.7, + "learning_rate": 1.4452055868198707e-05, + "loss": 0.0953, + "step": 36410 + }, + { + "epoch": 1.7, + "learning_rate": 1.4451272083143919e-05, + "loss": 0.1371, + "step": 36415 + }, + { + "epoch": 1.7, + "learning_rate": 1.4450488298089133e-05, + "loss": 0.1549, + "step": 36420 + }, + { + "epoch": 1.7, + "learning_rate": 1.4449704513034347e-05, + "loss": 0.1376, + "step": 36425 + }, + { + "epoch": 1.7, + "learning_rate": 1.444892072797956e-05, + "loss": 0.3851, + "step": 36430 + }, + { + "epoch": 1.7, + "learning_rate": 1.4448136942924773e-05, + "loss": 0.2807, + "step": 36435 + }, + { + "epoch": 1.7, + "learning_rate": 1.4447353157869989e-05, + "loss": 0.0163, + "step": 36440 + }, + { + "epoch": 1.7, + "learning_rate": 1.44465693728152e-05, + "loss": 0.0581, + "step": 36445 + }, + { + "epoch": 1.7, + "learning_rate": 1.4445785587760415e-05, + "loss": 0.056, + "step": 36450 + }, + { + "epoch": 1.7, + "learning_rate": 1.4445001802705627e-05, + "loss": 0.1045, + "step": 36455 + }, + { + "epoch": 1.7, + "learning_rate": 1.4444218017650839e-05, + "loss": 0.0543, + "step": 36460 + }, + { + "epoch": 1.7, + "learning_rate": 1.4443434232596055e-05, + "loss": 0.1655, + "step": 36465 + }, + { + "epoch": 1.7, + "learning_rate": 1.4442807204552225e-05, + "loss": 0.1582, + "step": 36470 + }, + { + "epoch": 1.7, + "learning_rate": 1.4442023419497437e-05, + "loss": 0.2369, + "step": 36475 + }, + { + "epoch": 1.7, + "learning_rate": 1.4441239634442651e-05, + "loss": 0.2963, + "step": 36480 + }, + { + "epoch": 1.7, + "learning_rate": 1.4440455849387865e-05, + "loss": 0.2817, + "step": 36485 + }, + { + "epoch": 1.7, + "learning_rate": 1.4439672064333079e-05, + "loss": 0.0662, + "step": 36490 + }, + { + "epoch": 1.7, + "learning_rate": 1.4438888279278291e-05, + "loss": 0.0917, + "step": 36495 + }, + { + "epoch": 1.7, + "learning_rate": 1.4438104494223505e-05, + "loss": 0.0448, + "step": 36500 + }, + { + "epoch": 1.7, + "learning_rate": 1.4437320709168719e-05, + "loss": 0.1574, + "step": 36505 + }, + { + "epoch": 1.7, + "learning_rate": 1.4436536924113933e-05, + "loss": 0.1734, + "step": 36510 + }, + { + "epoch": 1.7, + "learning_rate": 1.4435753139059145e-05, + "loss": 0.1669, + "step": 36515 + }, + { + "epoch": 1.7, + "learning_rate": 1.443496935400436e-05, + "loss": 0.1831, + "step": 36520 + }, + { + "epoch": 1.7, + "learning_rate": 1.4434185568949573e-05, + "loss": 0.1776, + "step": 36525 + }, + { + "epoch": 1.7, + "learning_rate": 1.4433401783894785e-05, + "loss": 0.3592, + "step": 36530 + }, + { + "epoch": 1.7, + "learning_rate": 1.4432617998839999e-05, + "loss": 0.2814, + "step": 36535 + }, + { + "epoch": 1.71, + "learning_rate": 1.4431834213785211e-05, + "loss": 0.0311, + "step": 36540 + }, + { + "epoch": 1.71, + "learning_rate": 1.4431050428730427e-05, + "loss": 0.091, + "step": 36545 + }, + { + "epoch": 1.71, + "learning_rate": 1.443026664367564e-05, + "loss": 0.0954, + "step": 36550 + }, + { + "epoch": 1.71, + "learning_rate": 1.4429482858620853e-05, + "loss": 0.102, + "step": 36555 + }, + { + "epoch": 1.71, + "learning_rate": 1.4428699073566065e-05, + "loss": 0.1249, + "step": 36560 + }, + { + "epoch": 1.71, + "learning_rate": 1.4427915288511281e-05, + "loss": 0.1781, + "step": 36565 + }, + { + "epoch": 1.71, + "learning_rate": 1.4427131503456493e-05, + "loss": 0.122, + "step": 36570 + }, + { + "epoch": 1.71, + "learning_rate": 1.4426347718401707e-05, + "loss": 0.2721, + "step": 36575 + }, + { + "epoch": 1.71, + "learning_rate": 1.442556393334692e-05, + "loss": 0.3878, + "step": 36580 + }, + { + "epoch": 1.71, + "learning_rate": 1.4424780148292135e-05, + "loss": 0.2806, + "step": 36585 + }, + { + "epoch": 1.71, + "learning_rate": 1.4423996363237347e-05, + "loss": 0.0162, + "step": 36590 + }, + { + "epoch": 1.71, + "learning_rate": 1.442321257818256e-05, + "loss": 0.1438, + "step": 36595 + }, + { + "epoch": 1.71, + "learning_rate": 1.4422428793127775e-05, + "loss": 0.0547, + "step": 36600 + }, + { + "epoch": 1.71, + "learning_rate": 1.4421645008072987e-05, + "loss": 0.0748, + "step": 36605 + }, + { + "epoch": 1.71, + "learning_rate": 1.4420861223018201e-05, + "loss": 0.071, + "step": 36610 + }, + { + "epoch": 1.71, + "learning_rate": 1.4420077437963413e-05, + "loss": 0.0877, + "step": 36615 + }, + { + "epoch": 1.71, + "learning_rate": 1.4419293652908629e-05, + "loss": 0.0916, + "step": 36620 + }, + { + "epoch": 1.71, + "learning_rate": 1.4418509867853841e-05, + "loss": 0.29, + "step": 36625 + }, + { + "epoch": 1.71, + "learning_rate": 1.4417726082799055e-05, + "loss": 0.3652, + "step": 36630 + }, + { + "epoch": 1.71, + "learning_rate": 1.4416942297744267e-05, + "loss": 0.3349, + "step": 36635 + }, + { + "epoch": 1.71, + "learning_rate": 1.4416158512689483e-05, + "loss": 0.0536, + "step": 36640 + }, + { + "epoch": 1.71, + "learning_rate": 1.4415374727634695e-05, + "loss": 0.0458, + "step": 36645 + }, + { + "epoch": 1.71, + "learning_rate": 1.4414590942579909e-05, + "loss": 0.085, + "step": 36650 + }, + { + "epoch": 1.71, + "learning_rate": 1.4413807157525121e-05, + "loss": 0.0993, + "step": 36655 + }, + { + "epoch": 1.71, + "learning_rate": 1.4413023372470333e-05, + "loss": 0.1099, + "step": 36660 + }, + { + "epoch": 1.71, + "learning_rate": 1.4412239587415549e-05, + "loss": 0.112, + "step": 36665 + }, + { + "epoch": 1.71, + "learning_rate": 1.4411455802360761e-05, + "loss": 0.2057, + "step": 36670 + }, + { + "epoch": 1.71, + "learning_rate": 1.4410672017305975e-05, + "loss": 0.3171, + "step": 36675 + }, + { + "epoch": 1.71, + "learning_rate": 1.4409888232251187e-05, + "loss": 0.296, + "step": 36680 + }, + { + "epoch": 1.71, + "learning_rate": 1.4409104447196403e-05, + "loss": 0.2658, + "step": 36685 + }, + { + "epoch": 1.71, + "learning_rate": 1.4408320662141615e-05, + "loss": 0.0218, + "step": 36690 + }, + { + "epoch": 1.71, + "learning_rate": 1.4407536877086829e-05, + "loss": 0.0514, + "step": 36695 + }, + { + "epoch": 1.71, + "learning_rate": 1.4406753092032043e-05, + "loss": 0.081, + "step": 36700 + }, + { + "epoch": 1.71, + "learning_rate": 1.4405969306977257e-05, + "loss": 0.0679, + "step": 36705 + }, + { + "epoch": 1.71, + "learning_rate": 1.4405185521922469e-05, + "loss": 0.1526, + "step": 36710 + }, + { + "epoch": 1.71, + "learning_rate": 1.4404401736867683e-05, + "loss": 0.1244, + "step": 36715 + }, + { + "epoch": 1.71, + "learning_rate": 1.4403617951812897e-05, + "loss": 0.1967, + "step": 36720 + }, + { + "epoch": 1.71, + "learning_rate": 1.4402834166758109e-05, + "loss": 0.2807, + "step": 36725 + }, + { + "epoch": 1.71, + "learning_rate": 1.4402050381703323e-05, + "loss": 0.3164, + "step": 36730 + }, + { + "epoch": 1.71, + "learning_rate": 1.4401266596648535e-05, + "loss": 0.2543, + "step": 36735 + }, + { + "epoch": 1.71, + "learning_rate": 1.440048281159375e-05, + "loss": 0.0474, + "step": 36740 + }, + { + "epoch": 1.71, + "learning_rate": 1.4399699026538963e-05, + "loss": 0.0358, + "step": 36745 + }, + { + "epoch": 1.71, + "learning_rate": 1.4398915241484177e-05, + "loss": 0.0577, + "step": 36750 + }, + { + "epoch": 1.72, + "learning_rate": 1.4398131456429389e-05, + "loss": 0.1247, + "step": 36755 + }, + { + "epoch": 1.72, + "learning_rate": 1.4397347671374605e-05, + "loss": 0.0912, + "step": 36760 + }, + { + "epoch": 1.72, + "learning_rate": 1.4396563886319817e-05, + "loss": 0.1512, + "step": 36765 + }, + { + "epoch": 1.72, + "learning_rate": 1.439578010126503e-05, + "loss": 0.187, + "step": 36770 + }, + { + "epoch": 1.72, + "learning_rate": 1.4394996316210243e-05, + "loss": 0.2044, + "step": 36775 + }, + { + "epoch": 1.72, + "learning_rate": 1.4394212531155459e-05, + "loss": 0.2219, + "step": 36780 + }, + { + "epoch": 1.72, + "learning_rate": 1.439342874610067e-05, + "loss": 0.3031, + "step": 36785 + }, + { + "epoch": 1.72, + "learning_rate": 1.4392644961045883e-05, + "loss": 0.0566, + "step": 36790 + }, + { + "epoch": 1.72, + "learning_rate": 1.4391861175991097e-05, + "loss": 0.05, + "step": 36795 + }, + { + "epoch": 1.72, + "learning_rate": 1.439107739093631e-05, + "loss": 0.1119, + "step": 36800 + }, + { + "epoch": 1.72, + "learning_rate": 1.4390293605881525e-05, + "loss": 0.1107, + "step": 36805 + }, + { + "epoch": 1.72, + "learning_rate": 1.4389509820826737e-05, + "loss": 0.198, + "step": 36810 + }, + { + "epoch": 1.72, + "learning_rate": 1.438872603577195e-05, + "loss": 0.1943, + "step": 36815 + }, + { + "epoch": 1.72, + "learning_rate": 1.4387942250717165e-05, + "loss": 0.1967, + "step": 36820 + }, + { + "epoch": 1.72, + "learning_rate": 1.4387158465662379e-05, + "loss": 0.1664, + "step": 36825 + }, + { + "epoch": 1.72, + "learning_rate": 1.438637468060759e-05, + "loss": 0.3422, + "step": 36830 + }, + { + "epoch": 1.72, + "learning_rate": 1.4385590895552806e-05, + "loss": 0.2327, + "step": 36835 + }, + { + "epoch": 1.72, + "learning_rate": 1.4384807110498019e-05, + "loss": 0.0715, + "step": 36840 + }, + { + "epoch": 1.72, + "learning_rate": 1.4384023325443233e-05, + "loss": 0.0976, + "step": 36845 + }, + { + "epoch": 1.72, + "learning_rate": 1.4383239540388445e-05, + "loss": 0.0848, + "step": 36850 + }, + { + "epoch": 1.72, + "learning_rate": 1.4382455755333657e-05, + "loss": 0.0524, + "step": 36855 + }, + { + "epoch": 1.72, + "learning_rate": 1.4381671970278873e-05, + "loss": 0.1459, + "step": 36860 + }, + { + "epoch": 1.72, + "learning_rate": 1.4380888185224085e-05, + "loss": 0.1363, + "step": 36865 + }, + { + "epoch": 1.72, + "learning_rate": 1.4380104400169299e-05, + "loss": 0.1704, + "step": 36870 + }, + { + "epoch": 1.72, + "learning_rate": 1.4379320615114511e-05, + "loss": 0.3359, + "step": 36875 + }, + { + "epoch": 1.72, + "learning_rate": 1.4378536830059726e-05, + "loss": 0.4377, + "step": 36880 + }, + { + "epoch": 1.72, + "learning_rate": 1.4377753045004939e-05, + "loss": 0.1847, + "step": 36885 + }, + { + "epoch": 1.72, + "learning_rate": 1.4376969259950153e-05, + "loss": 0.0775, + "step": 36890 + }, + { + "epoch": 1.72, + "learning_rate": 1.4376185474895365e-05, + "loss": 0.0722, + "step": 36895 + }, + { + "epoch": 1.72, + "learning_rate": 1.437540168984058e-05, + "loss": 0.106, + "step": 36900 + }, + { + "epoch": 1.72, + "learning_rate": 1.4374617904785793e-05, + "loss": 0.1095, + "step": 36905 + }, + { + "epoch": 1.72, + "learning_rate": 1.4373834119731007e-05, + "loss": 0.1045, + "step": 36910 + }, + { + "epoch": 1.72, + "learning_rate": 1.437305033467622e-05, + "loss": 0.1643, + "step": 36915 + }, + { + "epoch": 1.72, + "learning_rate": 1.4372266549621433e-05, + "loss": 0.1888, + "step": 36920 + }, + { + "epoch": 1.72, + "learning_rate": 1.4371482764566647e-05, + "loss": 0.1461, + "step": 36925 + }, + { + "epoch": 1.72, + "learning_rate": 1.4370698979511859e-05, + "loss": 0.36, + "step": 36930 + }, + { + "epoch": 1.72, + "learning_rate": 1.4369915194457074e-05, + "loss": 0.1914, + "step": 36935 + }, + { + "epoch": 1.72, + "learning_rate": 1.4369131409402287e-05, + "loss": 0.0292, + "step": 36940 + }, + { + "epoch": 1.72, + "learning_rate": 1.43683476243475e-05, + "loss": 0.0758, + "step": 36945 + }, + { + "epoch": 1.72, + "learning_rate": 1.4367563839292713e-05, + "loss": 0.074, + "step": 36950 + }, + { + "epoch": 1.72, + "learning_rate": 1.4366780054237928e-05, + "loss": 0.1499, + "step": 36955 + }, + { + "epoch": 1.72, + "learning_rate": 1.436599626918314e-05, + "loss": 0.1167, + "step": 36960 + }, + { + "epoch": 1.72, + "learning_rate": 1.4365212484128354e-05, + "loss": 0.0796, + "step": 36965 + }, + { + "epoch": 1.73, + "learning_rate": 1.4364428699073567e-05, + "loss": 0.0926, + "step": 36970 + }, + { + "epoch": 1.73, + "learning_rate": 1.4363644914018782e-05, + "loss": 0.2108, + "step": 36975 + }, + { + "epoch": 1.73, + "learning_rate": 1.4362861128963994e-05, + "loss": 0.2106, + "step": 36980 + }, + { + "epoch": 1.73, + "learning_rate": 1.4362077343909207e-05, + "loss": 0.2872, + "step": 36985 + }, + { + "epoch": 1.73, + "learning_rate": 1.436129355885442e-05, + "loss": 0.0477, + "step": 36990 + }, + { + "epoch": 1.73, + "learning_rate": 1.4360509773799633e-05, + "loss": 0.0257, + "step": 36995 + }, + { + "epoch": 1.73, + "learning_rate": 1.4359725988744848e-05, + "loss": 0.1252, + "step": 37000 + }, + { + "epoch": 1.73, + "learning_rate": 1.435894220369006e-05, + "loss": 0.1321, + "step": 37005 + }, + { + "epoch": 1.73, + "learning_rate": 1.4358158418635274e-05, + "loss": 0.1789, + "step": 37010 + }, + { + "epoch": 1.73, + "learning_rate": 1.4357374633580488e-05, + "loss": 0.1004, + "step": 37015 + }, + { + "epoch": 1.73, + "learning_rate": 1.4356590848525702e-05, + "loss": 0.1946, + "step": 37020 + }, + { + "epoch": 1.73, + "learning_rate": 1.4355807063470914e-05, + "loss": 0.1881, + "step": 37025 + }, + { + "epoch": 1.73, + "learning_rate": 1.4355023278416128e-05, + "loss": 0.3375, + "step": 37030 + }, + { + "epoch": 1.73, + "learning_rate": 1.4354239493361342e-05, + "loss": 0.2199, + "step": 37035 + }, + { + "epoch": 1.73, + "learning_rate": 1.4353455708306556e-05, + "loss": 0.0575, + "step": 37040 + }, + { + "epoch": 1.73, + "learning_rate": 1.4352671923251768e-05, + "loss": 0.0641, + "step": 37045 + }, + { + "epoch": 1.73, + "learning_rate": 1.435188813819698e-05, + "loss": 0.1354, + "step": 37050 + }, + { + "epoch": 1.73, + "learning_rate": 1.4351104353142196e-05, + "loss": 0.0793, + "step": 37055 + }, + { + "epoch": 1.73, + "learning_rate": 1.4350320568087408e-05, + "loss": 0.0634, + "step": 37060 + }, + { + "epoch": 1.73, + "learning_rate": 1.4349536783032622e-05, + "loss": 0.1773, + "step": 37065 + }, + { + "epoch": 1.73, + "learning_rate": 1.4348752997977835e-05, + "loss": 0.1752, + "step": 37070 + }, + { + "epoch": 1.73, + "learning_rate": 1.434796921292305e-05, + "loss": 0.1668, + "step": 37075 + }, + { + "epoch": 1.73, + "learning_rate": 1.4347185427868262e-05, + "loss": 0.3402, + "step": 37080 + }, + { + "epoch": 1.73, + "learning_rate": 1.4346401642813476e-05, + "loss": 0.2596, + "step": 37085 + }, + { + "epoch": 1.73, + "learning_rate": 1.4345617857758688e-05, + "loss": 0.0693, + "step": 37090 + }, + { + "epoch": 1.73, + "learning_rate": 1.4344834072703904e-05, + "loss": 0.0821, + "step": 37095 + }, + { + "epoch": 1.73, + "learning_rate": 1.4344050287649116e-05, + "loss": 0.0515, + "step": 37100 + }, + { + "epoch": 1.73, + "learning_rate": 1.434326650259433e-05, + "loss": 0.0693, + "step": 37105 + }, + { + "epoch": 1.73, + "learning_rate": 1.4342482717539542e-05, + "loss": 0.0675, + "step": 37110 + }, + { + "epoch": 1.73, + "learning_rate": 1.4341698932484756e-05, + "loss": 0.0614, + "step": 37115 + }, + { + "epoch": 1.73, + "learning_rate": 1.434091514742997e-05, + "loss": 0.1118, + "step": 37120 + }, + { + "epoch": 1.73, + "learning_rate": 1.4340131362375182e-05, + "loss": 0.1841, + "step": 37125 + }, + { + "epoch": 1.73, + "learning_rate": 1.4339347577320396e-05, + "loss": 0.4163, + "step": 37130 + }, + { + "epoch": 1.73, + "learning_rate": 1.433856379226561e-05, + "loss": 0.2525, + "step": 37135 + }, + { + "epoch": 1.73, + "learning_rate": 1.4337780007210824e-05, + "loss": 0.0489, + "step": 37140 + }, + { + "epoch": 1.73, + "learning_rate": 1.4336996222156036e-05, + "loss": 0.0879, + "step": 37145 + }, + { + "epoch": 1.73, + "learning_rate": 1.4336212437101252e-05, + "loss": 0.0851, + "step": 37150 + }, + { + "epoch": 1.73, + "learning_rate": 1.4335428652046464e-05, + "loss": 0.093, + "step": 37155 + }, + { + "epoch": 1.73, + "learning_rate": 1.4334644866991678e-05, + "loss": 0.161, + "step": 37160 + }, + { + "epoch": 1.73, + "learning_rate": 1.433386108193689e-05, + "loss": 0.1694, + "step": 37165 + }, + { + "epoch": 1.73, + "learning_rate": 1.4333077296882106e-05, + "loss": 0.17, + "step": 37170 + }, + { + "epoch": 1.73, + "learning_rate": 1.4332293511827318e-05, + "loss": 0.2063, + "step": 37175 + }, + { + "epoch": 1.73, + "learning_rate": 1.433150972677253e-05, + "loss": 0.2002, + "step": 37180 + }, + { + "epoch": 1.74, + "learning_rate": 1.4330725941717744e-05, + "loss": 0.2128, + "step": 37185 + }, + { + "epoch": 1.74, + "learning_rate": 1.4329942156662956e-05, + "loss": 0.0525, + "step": 37190 + }, + { + "epoch": 1.74, + "learning_rate": 1.4329158371608172e-05, + "loss": 0.1173, + "step": 37195 + }, + { + "epoch": 1.74, + "learning_rate": 1.4328374586553384e-05, + "loss": 0.1373, + "step": 37200 + }, + { + "epoch": 1.74, + "learning_rate": 1.4327590801498598e-05, + "loss": 0.0733, + "step": 37205 + }, + { + "epoch": 1.74, + "learning_rate": 1.432680701644381e-05, + "loss": 0.118, + "step": 37210 + }, + { + "epoch": 1.74, + "learning_rate": 1.4326023231389026e-05, + "loss": 0.1538, + "step": 37215 + }, + { + "epoch": 1.74, + "learning_rate": 1.4325239446334238e-05, + "loss": 0.1563, + "step": 37220 + }, + { + "epoch": 1.74, + "learning_rate": 1.4324455661279452e-05, + "loss": 0.2196, + "step": 37225 + }, + { + "epoch": 1.74, + "learning_rate": 1.4323671876224666e-05, + "loss": 0.3397, + "step": 37230 + }, + { + "epoch": 1.74, + "learning_rate": 1.432288809116988e-05, + "loss": 0.2154, + "step": 37235 + }, + { + "epoch": 1.74, + "learning_rate": 1.4322104306115092e-05, + "loss": 0.0617, + "step": 37240 + }, + { + "epoch": 1.74, + "learning_rate": 1.4321320521060304e-05, + "loss": 0.0488, + "step": 37245 + }, + { + "epoch": 1.74, + "learning_rate": 1.432053673600552e-05, + "loss": 0.1145, + "step": 37250 + }, + { + "epoch": 1.74, + "learning_rate": 1.4319752950950732e-05, + "loss": 0.1049, + "step": 37255 + }, + { + "epoch": 1.74, + "learning_rate": 1.4318969165895946e-05, + "loss": 0.0923, + "step": 37260 + }, + { + "epoch": 1.74, + "learning_rate": 1.4318185380841158e-05, + "loss": 0.1686, + "step": 37265 + }, + { + "epoch": 1.74, + "learning_rate": 1.4317401595786374e-05, + "loss": 0.1347, + "step": 37270 + }, + { + "epoch": 1.74, + "learning_rate": 1.4316617810731586e-05, + "loss": 0.161, + "step": 37275 + }, + { + "epoch": 1.74, + "learning_rate": 1.43158340256768e-05, + "loss": 0.2883, + "step": 37280 + }, + { + "epoch": 1.74, + "learning_rate": 1.4315050240622012e-05, + "loss": 0.2093, + "step": 37285 + }, + { + "epoch": 1.74, + "learning_rate": 1.4314266455567228e-05, + "loss": 0.0615, + "step": 37290 + }, + { + "epoch": 1.74, + "learning_rate": 1.431348267051244e-05, + "loss": 0.058, + "step": 37295 + }, + { + "epoch": 1.74, + "learning_rate": 1.4312698885457654e-05, + "loss": 0.0953, + "step": 37300 + }, + { + "epoch": 1.74, + "learning_rate": 1.4311915100402866e-05, + "loss": 0.0668, + "step": 37305 + }, + { + "epoch": 1.74, + "learning_rate": 1.4311131315348078e-05, + "loss": 0.1407, + "step": 37310 + }, + { + "epoch": 1.74, + "learning_rate": 1.4310347530293294e-05, + "loss": 0.1212, + "step": 37315 + }, + { + "epoch": 1.74, + "learning_rate": 1.4309563745238506e-05, + "loss": 0.1046, + "step": 37320 + }, + { + "epoch": 1.74, + "learning_rate": 1.430877996018372e-05, + "loss": 0.1611, + "step": 37325 + }, + { + "epoch": 1.74, + "learning_rate": 1.4307996175128934e-05, + "loss": 0.3317, + "step": 37330 + }, + { + "epoch": 1.74, + "learning_rate": 1.4307212390074148e-05, + "loss": 0.2498, + "step": 37335 + }, + { + "epoch": 1.74, + "learning_rate": 1.430642860501936e-05, + "loss": 0.0457, + "step": 37340 + }, + { + "epoch": 1.74, + "learning_rate": 1.4305644819964574e-05, + "loss": 0.0306, + "step": 37345 + }, + { + "epoch": 1.74, + "learning_rate": 1.4304861034909788e-05, + "loss": 0.0676, + "step": 37350 + }, + { + "epoch": 1.74, + "learning_rate": 1.4304077249855002e-05, + "loss": 0.0872, + "step": 37355 + }, + { + "epoch": 1.74, + "learning_rate": 1.4303293464800214e-05, + "loss": 0.1829, + "step": 37360 + }, + { + "epoch": 1.74, + "learning_rate": 1.430250967974543e-05, + "loss": 0.1626, + "step": 37365 + }, + { + "epoch": 1.74, + "learning_rate": 1.4301725894690642e-05, + "loss": 0.1964, + "step": 37370 + }, + { + "epoch": 1.74, + "learning_rate": 1.4300942109635854e-05, + "loss": 0.2105, + "step": 37375 + }, + { + "epoch": 1.74, + "learning_rate": 1.4300158324581068e-05, + "loss": 0.3942, + "step": 37380 + }, + { + "epoch": 1.74, + "learning_rate": 1.429937453952628e-05, + "loss": 0.2594, + "step": 37385 + }, + { + "epoch": 1.74, + "learning_rate": 1.4298590754471496e-05, + "loss": 0.0395, + "step": 37390 + }, + { + "epoch": 1.74, + "learning_rate": 1.4297806969416708e-05, + "loss": 0.0307, + "step": 37395 + }, + { + "epoch": 1.75, + "learning_rate": 1.4297023184361922e-05, + "loss": 0.0373, + "step": 37400 + }, + { + "epoch": 1.75, + "learning_rate": 1.4296239399307134e-05, + "loss": 0.06, + "step": 37405 + }, + { + "epoch": 1.75, + "learning_rate": 1.429545561425235e-05, + "loss": 0.1288, + "step": 37410 + }, + { + "epoch": 1.75, + "learning_rate": 1.4294671829197562e-05, + "loss": 0.1277, + "step": 37415 + }, + { + "epoch": 1.75, + "learning_rate": 1.4293888044142776e-05, + "loss": 0.1357, + "step": 37420 + }, + { + "epoch": 1.75, + "learning_rate": 1.4293104259087988e-05, + "loss": 0.1732, + "step": 37425 + }, + { + "epoch": 1.75, + "learning_rate": 1.4292320474033204e-05, + "loss": 0.2856, + "step": 37430 + }, + { + "epoch": 1.75, + "learning_rate": 1.4291536688978416e-05, + "loss": 0.2645, + "step": 37435 + }, + { + "epoch": 1.75, + "learning_rate": 1.4290752903923628e-05, + "loss": 0.0783, + "step": 37440 + }, + { + "epoch": 1.75, + "learning_rate": 1.4289969118868842e-05, + "loss": 0.0332, + "step": 37445 + }, + { + "epoch": 1.75, + "learning_rate": 1.4289185333814056e-05, + "loss": 0.0371, + "step": 37450 + }, + { + "epoch": 1.75, + "learning_rate": 1.428840154875927e-05, + "loss": 0.1575, + "step": 37455 + }, + { + "epoch": 1.75, + "learning_rate": 1.4287617763704482e-05, + "loss": 0.094, + "step": 37460 + }, + { + "epoch": 1.75, + "learning_rate": 1.4286833978649698e-05, + "loss": 0.0949, + "step": 37465 + }, + { + "epoch": 1.75, + "learning_rate": 1.428605019359491e-05, + "loss": 0.1676, + "step": 37470 + }, + { + "epoch": 1.75, + "learning_rate": 1.4285266408540124e-05, + "loss": 0.1947, + "step": 37475 + }, + { + "epoch": 1.75, + "learning_rate": 1.4284482623485336e-05, + "loss": 0.1995, + "step": 37480 + }, + { + "epoch": 1.75, + "learning_rate": 1.4283698838430551e-05, + "loss": 0.2297, + "step": 37485 + }, + { + "epoch": 1.75, + "learning_rate": 1.4282915053375764e-05, + "loss": 0.0635, + "step": 37490 + }, + { + "epoch": 1.75, + "learning_rate": 1.4282131268320978e-05, + "loss": 0.049, + "step": 37495 + }, + { + "epoch": 1.75, + "learning_rate": 1.428134748326619e-05, + "loss": 0.1047, + "step": 37500 + }, + { + "epoch": 1.75, + "learning_rate": 1.4280563698211402e-05, + "loss": 0.125, + "step": 37505 + }, + { + "epoch": 1.75, + "learning_rate": 1.4279779913156618e-05, + "loss": 0.1367, + "step": 37510 + }, + { + "epoch": 1.75, + "learning_rate": 1.427899612810183e-05, + "loss": 0.16, + "step": 37515 + }, + { + "epoch": 1.75, + "learning_rate": 1.4278212343047044e-05, + "loss": 0.2038, + "step": 37520 + }, + { + "epoch": 1.75, + "learning_rate": 1.4277428557992256e-05, + "loss": 0.2235, + "step": 37525 + }, + { + "epoch": 1.75, + "learning_rate": 1.4276644772937472e-05, + "loss": 0.428, + "step": 37530 + }, + { + "epoch": 1.75, + "learning_rate": 1.4275860987882684e-05, + "loss": 0.3868, + "step": 37535 + }, + { + "epoch": 1.75, + "learning_rate": 1.4275077202827898e-05, + "loss": 0.0283, + "step": 37540 + }, + { + "epoch": 1.75, + "learning_rate": 1.4274293417773112e-05, + "loss": 0.0262, + "step": 37545 + }, + { + "epoch": 1.75, + "learning_rate": 1.4273509632718325e-05, + "loss": 0.0667, + "step": 37550 + }, + { + "epoch": 1.75, + "learning_rate": 1.4272725847663538e-05, + "loss": 0.1034, + "step": 37555 + }, + { + "epoch": 1.75, + "learning_rate": 1.4271942062608752e-05, + "loss": 0.1123, + "step": 37560 + }, + { + "epoch": 1.75, + "learning_rate": 1.4271158277553965e-05, + "loss": 0.1533, + "step": 37565 + }, + { + "epoch": 1.75, + "learning_rate": 1.4270374492499178e-05, + "loss": 0.0854, + "step": 37570 + }, + { + "epoch": 1.75, + "learning_rate": 1.4269590707444392e-05, + "loss": 0.2158, + "step": 37575 + }, + { + "epoch": 1.75, + "learning_rate": 1.4268806922389604e-05, + "loss": 0.3784, + "step": 37580 + }, + { + "epoch": 1.75, + "learning_rate": 1.426802313733482e-05, + "loss": 0.2143, + "step": 37585 + }, + { + "epoch": 1.75, + "learning_rate": 1.4267239352280032e-05, + "loss": 0.0696, + "step": 37590 + }, + { + "epoch": 1.75, + "learning_rate": 1.4266455567225246e-05, + "loss": 0.0339, + "step": 37595 + }, + { + "epoch": 1.75, + "learning_rate": 1.4265671782170458e-05, + "loss": 0.0495, + "step": 37600 + }, + { + "epoch": 1.75, + "learning_rate": 1.4264887997115673e-05, + "loss": 0.1684, + "step": 37605 + }, + { + "epoch": 1.75, + "learning_rate": 1.4264104212060886e-05, + "loss": 0.184, + "step": 37610 + }, + { + "epoch": 1.76, + "learning_rate": 1.42633204270061e-05, + "loss": 0.1755, + "step": 37615 + }, + { + "epoch": 1.76, + "learning_rate": 1.4262536641951312e-05, + "loss": 0.1916, + "step": 37620 + }, + { + "epoch": 1.76, + "learning_rate": 1.4261752856896527e-05, + "loss": 0.178, + "step": 37625 + }, + { + "epoch": 1.76, + "learning_rate": 1.426096907184174e-05, + "loss": 0.2962, + "step": 37630 + }, + { + "epoch": 1.76, + "learning_rate": 1.4260185286786952e-05, + "loss": 0.1308, + "step": 37635 + }, + { + "epoch": 1.76, + "learning_rate": 1.4259401501732166e-05, + "loss": 0.0454, + "step": 37640 + }, + { + "epoch": 1.76, + "learning_rate": 1.425861771667738e-05, + "loss": 0.0986, + "step": 37645 + }, + { + "epoch": 1.76, + "learning_rate": 1.4257833931622593e-05, + "loss": 0.0789, + "step": 37650 + }, + { + "epoch": 1.76, + "learning_rate": 1.4257050146567806e-05, + "loss": 0.1297, + "step": 37655 + }, + { + "epoch": 1.76, + "learning_rate": 1.425626636151302e-05, + "loss": 0.0659, + "step": 37660 + }, + { + "epoch": 1.76, + "learning_rate": 1.4255482576458233e-05, + "loss": 0.1486, + "step": 37665 + }, + { + "epoch": 1.76, + "learning_rate": 1.4254698791403447e-05, + "loss": 0.1687, + "step": 37670 + }, + { + "epoch": 1.76, + "learning_rate": 1.425391500634866e-05, + "loss": 0.1958, + "step": 37675 + }, + { + "epoch": 1.76, + "learning_rate": 1.4253131221293875e-05, + "loss": 0.3865, + "step": 37680 + }, + { + "epoch": 1.76, + "learning_rate": 1.4252347436239087e-05, + "loss": 0.3559, + "step": 37685 + }, + { + "epoch": 1.76, + "learning_rate": 1.4251563651184301e-05, + "loss": 0.0624, + "step": 37690 + }, + { + "epoch": 1.76, + "learning_rate": 1.4250779866129513e-05, + "loss": 0.0917, + "step": 37695 + }, + { + "epoch": 1.76, + "learning_rate": 1.4249996081074726e-05, + "loss": 0.0923, + "step": 37700 + }, + { + "epoch": 1.76, + "learning_rate": 1.4249212296019941e-05, + "loss": 0.1211, + "step": 37705 + }, + { + "epoch": 1.76, + "learning_rate": 1.4248428510965153e-05, + "loss": 0.1117, + "step": 37710 + }, + { + "epoch": 1.76, + "learning_rate": 1.4247644725910367e-05, + "loss": 0.2351, + "step": 37715 + }, + { + "epoch": 1.76, + "learning_rate": 1.424686094085558e-05, + "loss": 0.1649, + "step": 37720 + }, + { + "epoch": 1.76, + "learning_rate": 1.4246077155800795e-05, + "loss": 0.1855, + "step": 37725 + }, + { + "epoch": 1.76, + "learning_rate": 1.4245293370746007e-05, + "loss": 0.3319, + "step": 37730 + }, + { + "epoch": 1.76, + "learning_rate": 1.4244509585691221e-05, + "loss": 0.2802, + "step": 37735 + }, + { + "epoch": 1.76, + "learning_rate": 1.4243725800636434e-05, + "loss": 0.0575, + "step": 37740 + }, + { + "epoch": 1.76, + "learning_rate": 1.4242942015581649e-05, + "loss": 0.0562, + "step": 37745 + }, + { + "epoch": 1.76, + "learning_rate": 1.4242158230526861e-05, + "loss": 0.0816, + "step": 37750 + }, + { + "epoch": 1.76, + "learning_rate": 1.4241374445472075e-05, + "loss": 0.0493, + "step": 37755 + }, + { + "epoch": 1.76, + "learning_rate": 1.424059066041729e-05, + "loss": 0.1505, + "step": 37760 + }, + { + "epoch": 1.76, + "learning_rate": 1.4239806875362501e-05, + "loss": 0.1146, + "step": 37765 + }, + { + "epoch": 1.76, + "learning_rate": 1.4239023090307715e-05, + "loss": 0.2125, + "step": 37770 + }, + { + "epoch": 1.76, + "learning_rate": 1.4238239305252927e-05, + "loss": 0.2498, + "step": 37775 + }, + { + "epoch": 1.76, + "learning_rate": 1.4237455520198143e-05, + "loss": 0.3085, + "step": 37780 + }, + { + "epoch": 1.76, + "learning_rate": 1.4236671735143355e-05, + "loss": 0.4248, + "step": 37785 + }, + { + "epoch": 1.76, + "learning_rate": 1.423588795008857e-05, + "loss": 0.0457, + "step": 37790 + }, + { + "epoch": 1.76, + "learning_rate": 1.4235104165033781e-05, + "loss": 0.0464, + "step": 37795 + }, + { + "epoch": 1.76, + "learning_rate": 1.4234320379978997e-05, + "loss": 0.0329, + "step": 37800 + }, + { + "epoch": 1.76, + "learning_rate": 1.423353659492421e-05, + "loss": 0.1443, + "step": 37805 + }, + { + "epoch": 1.76, + "learning_rate": 1.4232752809869423e-05, + "loss": 0.1504, + "step": 37810 + }, + { + "epoch": 1.76, + "learning_rate": 1.4231969024814635e-05, + "loss": 0.1618, + "step": 37815 + }, + { + "epoch": 1.76, + "learning_rate": 1.4231185239759851e-05, + "loss": 0.1992, + "step": 37820 + }, + { + "epoch": 1.76, + "learning_rate": 1.4230401454705063e-05, + "loss": 0.1979, + "step": 37825 + }, + { + "epoch": 1.77, + "learning_rate": 1.4229617669650275e-05, + "loss": 0.3638, + "step": 37830 + }, + { + "epoch": 1.77, + "learning_rate": 1.422883388459549e-05, + "loss": 0.2355, + "step": 37835 + }, + { + "epoch": 1.77, + "learning_rate": 1.4228050099540701e-05, + "loss": 0.0892, + "step": 37840 + }, + { + "epoch": 1.77, + "learning_rate": 1.4227266314485917e-05, + "loss": 0.0729, + "step": 37845 + }, + { + "epoch": 1.77, + "learning_rate": 1.422648252943113e-05, + "loss": 0.1274, + "step": 37850 + }, + { + "epoch": 1.77, + "learning_rate": 1.4225698744376343e-05, + "loss": 0.0896, + "step": 37855 + }, + { + "epoch": 1.77, + "learning_rate": 1.4224914959321557e-05, + "loss": 0.0717, + "step": 37860 + }, + { + "epoch": 1.77, + "learning_rate": 1.4224131174266771e-05, + "loss": 0.1184, + "step": 37865 + }, + { + "epoch": 1.77, + "learning_rate": 1.4223347389211983e-05, + "loss": 0.1207, + "step": 37870 + }, + { + "epoch": 1.77, + "learning_rate": 1.4222563604157197e-05, + "loss": 0.2518, + "step": 37875 + }, + { + "epoch": 1.77, + "learning_rate": 1.4221779819102411e-05, + "loss": 0.3498, + "step": 37880 + }, + { + "epoch": 1.77, + "learning_rate": 1.4220996034047625e-05, + "loss": 0.3115, + "step": 37885 + }, + { + "epoch": 1.77, + "learning_rate": 1.4220212248992837e-05, + "loss": 0.0583, + "step": 37890 + }, + { + "epoch": 1.77, + "learning_rate": 1.421942846393805e-05, + "loss": 0.0557, + "step": 37895 + }, + { + "epoch": 1.77, + "learning_rate": 1.4218644678883265e-05, + "loss": 0.0753, + "step": 37900 + }, + { + "epoch": 1.77, + "learning_rate": 1.4217860893828477e-05, + "loss": 0.0799, + "step": 37905 + }, + { + "epoch": 1.77, + "learning_rate": 1.4217077108773691e-05, + "loss": 0.0979, + "step": 37910 + }, + { + "epoch": 1.77, + "learning_rate": 1.4216293323718903e-05, + "loss": 0.0891, + "step": 37915 + }, + { + "epoch": 1.77, + "learning_rate": 1.4215509538664119e-05, + "loss": 0.1499, + "step": 37920 + }, + { + "epoch": 1.77, + "learning_rate": 1.4214725753609331e-05, + "loss": 0.1411, + "step": 37925 + }, + { + "epoch": 1.77, + "learning_rate": 1.4213941968554545e-05, + "loss": 0.2998, + "step": 37930 + }, + { + "epoch": 1.77, + "learning_rate": 1.4213158183499757e-05, + "loss": 0.2268, + "step": 37935 + }, + { + "epoch": 1.77, + "learning_rate": 1.4212374398444973e-05, + "loss": 0.0357, + "step": 37940 + }, + { + "epoch": 1.77, + "learning_rate": 1.4211590613390185e-05, + "loss": 0.0426, + "step": 37945 + }, + { + "epoch": 1.77, + "learning_rate": 1.4210806828335399e-05, + "loss": 0.0794, + "step": 37950 + }, + { + "epoch": 1.77, + "learning_rate": 1.4210023043280611e-05, + "loss": 0.1651, + "step": 37955 + }, + { + "epoch": 1.77, + "learning_rate": 1.4209239258225825e-05, + "loss": 0.1167, + "step": 37960 + }, + { + "epoch": 1.77, + "learning_rate": 1.4208455473171039e-05, + "loss": 0.1324, + "step": 37965 + }, + { + "epoch": 1.77, + "learning_rate": 1.4207671688116251e-05, + "loss": 0.1587, + "step": 37970 + }, + { + "epoch": 1.77, + "learning_rate": 1.4206887903061465e-05, + "loss": 0.2, + "step": 37975 + }, + { + "epoch": 1.77, + "learning_rate": 1.4206104118006679e-05, + "loss": 0.3415, + "step": 37980 + }, + { + "epoch": 1.77, + "learning_rate": 1.4205320332951893e-05, + "loss": 0.24, + "step": 37985 + }, + { + "epoch": 1.77, + "learning_rate": 1.4204536547897105e-05, + "loss": 0.0173, + "step": 37990 + }, + { + "epoch": 1.77, + "learning_rate": 1.420375276284232e-05, + "loss": 0.0568, + "step": 37995 + }, + { + "epoch": 1.77, + "learning_rate": 1.4202968977787533e-05, + "loss": 0.0666, + "step": 38000 + }, + { + "epoch": 1.77, + "learning_rate": 1.4202185192732747e-05, + "loss": 0.1213, + "step": 38005 + }, + { + "epoch": 1.77, + "learning_rate": 1.4201401407677959e-05, + "loss": 0.1022, + "step": 38010 + }, + { + "epoch": 1.77, + "learning_rate": 1.4200617622623175e-05, + "loss": 0.1468, + "step": 38015 + }, + { + "epoch": 1.77, + "learning_rate": 1.4199833837568387e-05, + "loss": 0.2344, + "step": 38020 + }, + { + "epoch": 1.77, + "learning_rate": 1.4199050052513599e-05, + "loss": 0.2398, + "step": 38025 + }, + { + "epoch": 1.77, + "learning_rate": 1.4198266267458813e-05, + "loss": 0.3248, + "step": 38030 + }, + { + "epoch": 1.77, + "learning_rate": 1.4197482482404025e-05, + "loss": 0.2467, + "step": 38035 + }, + { + "epoch": 1.77, + "learning_rate": 1.419669869734924e-05, + "loss": 0.0681, + "step": 38040 + }, + { + "epoch": 1.78, + "learning_rate": 1.4195914912294453e-05, + "loss": 0.0525, + "step": 38045 + }, + { + "epoch": 1.78, + "learning_rate": 1.4195131127239667e-05, + "loss": 0.0501, + "step": 38050 + }, + { + "epoch": 1.78, + "learning_rate": 1.4194347342184879e-05, + "loss": 0.0514, + "step": 38055 + }, + { + "epoch": 1.78, + "learning_rate": 1.4193563557130095e-05, + "loss": 0.1218, + "step": 38060 + }, + { + "epoch": 1.78, + "learning_rate": 1.4192779772075307e-05, + "loss": 0.1249, + "step": 38065 + }, + { + "epoch": 1.78, + "learning_rate": 1.419199598702052e-05, + "loss": 0.1505, + "step": 38070 + }, + { + "epoch": 1.78, + "learning_rate": 1.4191212201965735e-05, + "loss": 0.2301, + "step": 38075 + }, + { + "epoch": 1.78, + "learning_rate": 1.4190428416910949e-05, + "loss": 0.3129, + "step": 38080 + }, + { + "epoch": 1.78, + "learning_rate": 1.4189644631856161e-05, + "loss": 0.2717, + "step": 38085 + }, + { + "epoch": 1.78, + "learning_rate": 1.4188860846801373e-05, + "loss": 0.0331, + "step": 38090 + }, + { + "epoch": 1.78, + "learning_rate": 1.4188077061746589e-05, + "loss": 0.0624, + "step": 38095 + }, + { + "epoch": 1.78, + "learning_rate": 1.4187293276691801e-05, + "loss": 0.0583, + "step": 38100 + }, + { + "epoch": 1.78, + "learning_rate": 1.4186509491637015e-05, + "loss": 0.0417, + "step": 38105 + }, + { + "epoch": 1.78, + "learning_rate": 1.4185725706582227e-05, + "loss": 0.1927, + "step": 38110 + }, + { + "epoch": 1.78, + "learning_rate": 1.4184941921527443e-05, + "loss": 0.0895, + "step": 38115 + }, + { + "epoch": 1.78, + "learning_rate": 1.4184158136472655e-05, + "loss": 0.1494, + "step": 38120 + }, + { + "epoch": 1.78, + "learning_rate": 1.4183374351417869e-05, + "loss": 0.2075, + "step": 38125 + }, + { + "epoch": 1.78, + "learning_rate": 1.4182590566363081e-05, + "loss": 0.2841, + "step": 38130 + }, + { + "epoch": 1.78, + "learning_rate": 1.4181806781308297e-05, + "loss": 0.2523, + "step": 38135 + }, + { + "epoch": 1.78, + "learning_rate": 1.4181022996253509e-05, + "loss": 0.051, + "step": 38140 + }, + { + "epoch": 1.78, + "learning_rate": 1.4180239211198723e-05, + "loss": 0.0424, + "step": 38145 + }, + { + "epoch": 1.78, + "learning_rate": 1.4179455426143935e-05, + "loss": 0.0772, + "step": 38150 + }, + { + "epoch": 1.78, + "learning_rate": 1.4178671641089147e-05, + "loss": 0.0753, + "step": 38155 + }, + { + "epoch": 1.78, + "learning_rate": 1.4177887856034363e-05, + "loss": 0.1035, + "step": 38160 + }, + { + "epoch": 1.78, + "learning_rate": 1.4177104070979575e-05, + "loss": 0.226, + "step": 38165 + }, + { + "epoch": 1.78, + "learning_rate": 1.4176320285924789e-05, + "loss": 0.2246, + "step": 38170 + }, + { + "epoch": 1.78, + "learning_rate": 1.4175536500870003e-05, + "loss": 0.1679, + "step": 38175 + }, + { + "epoch": 1.78, + "learning_rate": 1.4174752715815217e-05, + "loss": 0.3673, + "step": 38180 + }, + { + "epoch": 1.78, + "learning_rate": 1.4173968930760429e-05, + "loss": 0.2447, + "step": 38185 + }, + { + "epoch": 1.78, + "learning_rate": 1.4173185145705643e-05, + "loss": 0.1178, + "step": 38190 + }, + { + "epoch": 1.78, + "learning_rate": 1.4172401360650857e-05, + "loss": 0.0845, + "step": 38195 + }, + { + "epoch": 1.78, + "learning_rate": 1.417161757559607e-05, + "loss": 0.0427, + "step": 38200 + }, + { + "epoch": 1.78, + "learning_rate": 1.4170833790541283e-05, + "loss": 0.1022, + "step": 38205 + }, + { + "epoch": 1.78, + "learning_rate": 1.4170050005486498e-05, + "loss": 0.0848, + "step": 38210 + }, + { + "epoch": 1.78, + "learning_rate": 1.416926622043171e-05, + "loss": 0.1151, + "step": 38215 + }, + { + "epoch": 1.78, + "learning_rate": 1.4168482435376923e-05, + "loss": 0.1462, + "step": 38220 + }, + { + "epoch": 1.78, + "learning_rate": 1.4167698650322137e-05, + "loss": 0.2352, + "step": 38225 + }, + { + "epoch": 1.78, + "learning_rate": 1.4166914865267349e-05, + "loss": 0.4185, + "step": 38230 + }, + { + "epoch": 1.78, + "learning_rate": 1.4166131080212564e-05, + "loss": 0.2938, + "step": 38235 + }, + { + "epoch": 1.78, + "learning_rate": 1.4165347295157777e-05, + "loss": 0.0908, + "step": 38240 + }, + { + "epoch": 1.78, + "learning_rate": 1.416456351010299e-05, + "loss": 0.0728, + "step": 38245 + }, + { + "epoch": 1.78, + "learning_rate": 1.4163779725048203e-05, + "loss": 0.0581, + "step": 38250 + }, + { + "epoch": 1.79, + "learning_rate": 1.4162995939993418e-05, + "loss": 0.128, + "step": 38255 + }, + { + "epoch": 1.79, + "learning_rate": 1.416221215493863e-05, + "loss": 0.1307, + "step": 38260 + }, + { + "epoch": 1.79, + "learning_rate": 1.4161428369883845e-05, + "loss": 0.1847, + "step": 38265 + }, + { + "epoch": 1.79, + "learning_rate": 1.4160644584829057e-05, + "loss": 0.1701, + "step": 38270 + }, + { + "epoch": 1.79, + "learning_rate": 1.4159860799774272e-05, + "loss": 0.2065, + "step": 38275 + }, + { + "epoch": 1.79, + "learning_rate": 1.4159077014719485e-05, + "loss": 0.4186, + "step": 38280 + }, + { + "epoch": 1.79, + "learning_rate": 1.4158293229664697e-05, + "loss": 0.2782, + "step": 38285 + }, + { + "epoch": 1.79, + "learning_rate": 1.415750944460991e-05, + "loss": 0.0348, + "step": 38290 + }, + { + "epoch": 1.79, + "learning_rate": 1.4156725659555125e-05, + "loss": 0.023, + "step": 38295 + }, + { + "epoch": 1.79, + "learning_rate": 1.4155941874500338e-05, + "loss": 0.0195, + "step": 38300 + }, + { + "epoch": 1.79, + "learning_rate": 1.415515808944555e-05, + "loss": 0.1097, + "step": 38305 + }, + { + "epoch": 1.79, + "learning_rate": 1.4154374304390766e-05, + "loss": 0.1873, + "step": 38310 + }, + { + "epoch": 1.79, + "learning_rate": 1.4153590519335978e-05, + "loss": 0.1415, + "step": 38315 + }, + { + "epoch": 1.79, + "learning_rate": 1.4152806734281192e-05, + "loss": 0.1651, + "step": 38320 + }, + { + "epoch": 1.79, + "learning_rate": 1.4152022949226405e-05, + "loss": 0.2963, + "step": 38325 + }, + { + "epoch": 1.79, + "learning_rate": 1.415123916417162e-05, + "loss": 0.2719, + "step": 38330 + }, + { + "epoch": 1.79, + "learning_rate": 1.4150455379116832e-05, + "loss": 0.2542, + "step": 38335 + }, + { + "epoch": 1.79, + "learning_rate": 1.4149671594062046e-05, + "loss": 0.0304, + "step": 38340 + }, + { + "epoch": 1.79, + "learning_rate": 1.4148887809007259e-05, + "loss": 0.1187, + "step": 38345 + }, + { + "epoch": 1.79, + "learning_rate": 1.414810402395247e-05, + "loss": 0.1246, + "step": 38350 + }, + { + "epoch": 1.79, + "learning_rate": 1.4147320238897686e-05, + "loss": 0.0625, + "step": 38355 + }, + { + "epoch": 1.79, + "learning_rate": 1.4146536453842899e-05, + "loss": 0.182, + "step": 38360 + }, + { + "epoch": 1.79, + "learning_rate": 1.4145752668788112e-05, + "loss": 0.0816, + "step": 38365 + }, + { + "epoch": 1.79, + "learning_rate": 1.4144968883733325e-05, + "loss": 0.1735, + "step": 38370 + }, + { + "epoch": 1.79, + "learning_rate": 1.414418509867854e-05, + "loss": 0.1373, + "step": 38375 + }, + { + "epoch": 1.79, + "learning_rate": 1.4143401313623752e-05, + "loss": 0.4016, + "step": 38380 + }, + { + "epoch": 1.79, + "learning_rate": 1.4142617528568966e-05, + "loss": 0.2637, + "step": 38385 + }, + { + "epoch": 1.79, + "learning_rate": 1.414183374351418e-05, + "loss": 0.0964, + "step": 38390 + }, + { + "epoch": 1.79, + "learning_rate": 1.4141049958459394e-05, + "loss": 0.0635, + "step": 38395 + }, + { + "epoch": 1.79, + "learning_rate": 1.4140266173404606e-05, + "loss": 0.0482, + "step": 38400 + }, + { + "epoch": 1.79, + "learning_rate": 1.413948238834982e-05, + "loss": 0.0768, + "step": 38405 + }, + { + "epoch": 1.79, + "learning_rate": 1.4138698603295034e-05, + "loss": 0.0976, + "step": 38410 + }, + { + "epoch": 1.79, + "learning_rate": 1.4137914818240246e-05, + "loss": 0.2089, + "step": 38415 + }, + { + "epoch": 1.79, + "learning_rate": 1.413713103318546e-05, + "loss": 0.2171, + "step": 38420 + }, + { + "epoch": 1.79, + "learning_rate": 1.4136347248130673e-05, + "loss": 0.2064, + "step": 38425 + }, + { + "epoch": 1.79, + "learning_rate": 1.4135563463075888e-05, + "loss": 0.4382, + "step": 38430 + }, + { + "epoch": 1.79, + "learning_rate": 1.41347796780211e-05, + "loss": 0.2063, + "step": 38435 + }, + { + "epoch": 1.79, + "learning_rate": 1.4133995892966314e-05, + "loss": 0.0299, + "step": 38440 + }, + { + "epoch": 1.79, + "learning_rate": 1.4133212107911526e-05, + "loss": 0.08, + "step": 38445 + }, + { + "epoch": 1.79, + "learning_rate": 1.4132428322856742e-05, + "loss": 0.1102, + "step": 38450 + }, + { + "epoch": 1.79, + "learning_rate": 1.4131644537801954e-05, + "loss": 0.0615, + "step": 38455 + }, + { + "epoch": 1.79, + "learning_rate": 1.4130860752747168e-05, + "loss": 0.0704, + "step": 38460 + }, + { + "epoch": 1.79, + "learning_rate": 1.413007696769238e-05, + "loss": 0.0914, + "step": 38465 + }, + { + "epoch": 1.8, + "learning_rate": 1.4129293182637596e-05, + "loss": 0.1401, + "step": 38470 + }, + { + "epoch": 1.8, + "learning_rate": 1.4128509397582808e-05, + "loss": 0.2028, + "step": 38475 + }, + { + "epoch": 1.8, + "learning_rate": 1.412772561252802e-05, + "loss": 0.2902, + "step": 38480 + }, + { + "epoch": 1.8, + "learning_rate": 1.4126941827473234e-05, + "loss": 0.2409, + "step": 38485 + }, + { + "epoch": 1.8, + "learning_rate": 1.4126158042418448e-05, + "loss": 0.0215, + "step": 38490 + }, + { + "epoch": 1.8, + "learning_rate": 1.4125374257363662e-05, + "loss": 0.0641, + "step": 38495 + }, + { + "epoch": 1.8, + "learning_rate": 1.4124590472308874e-05, + "loss": 0.1115, + "step": 38500 + }, + { + "epoch": 1.8, + "learning_rate": 1.4123806687254088e-05, + "loss": 0.099, + "step": 38505 + }, + { + "epoch": 1.8, + "learning_rate": 1.4123022902199302e-05, + "loss": 0.0935, + "step": 38510 + }, + { + "epoch": 1.8, + "learning_rate": 1.4122239117144516e-05, + "loss": 0.1694, + "step": 38515 + }, + { + "epoch": 1.8, + "learning_rate": 1.4121455332089728e-05, + "loss": 0.234, + "step": 38520 + }, + { + "epoch": 1.8, + "learning_rate": 1.4120671547034944e-05, + "loss": 0.237, + "step": 38525 + }, + { + "epoch": 1.8, + "learning_rate": 1.4119887761980156e-05, + "loss": 0.3852, + "step": 38530 + }, + { + "epoch": 1.8, + "learning_rate": 1.411910397692537e-05, + "loss": 0.2284, + "step": 38535 + }, + { + "epoch": 1.8, + "learning_rate": 1.4118320191870582e-05, + "loss": 0.0491, + "step": 38540 + }, + { + "epoch": 1.8, + "learning_rate": 1.4117536406815794e-05, + "loss": 0.0762, + "step": 38545 + }, + { + "epoch": 1.8, + "learning_rate": 1.411675262176101e-05, + "loss": 0.1074, + "step": 38550 + }, + { + "epoch": 1.8, + "learning_rate": 1.4115968836706222e-05, + "loss": 0.1303, + "step": 38555 + }, + { + "epoch": 1.8, + "learning_rate": 1.4115185051651436e-05, + "loss": 0.1395, + "step": 38560 + }, + { + "epoch": 1.8, + "learning_rate": 1.4114401266596648e-05, + "loss": 0.156, + "step": 38565 + }, + { + "epoch": 1.8, + "learning_rate": 1.4113617481541864e-05, + "loss": 0.1675, + "step": 38570 + }, + { + "epoch": 1.8, + "learning_rate": 1.4112833696487076e-05, + "loss": 0.1753, + "step": 38575 + }, + { + "epoch": 1.8, + "learning_rate": 1.411204991143229e-05, + "loss": 0.3437, + "step": 38580 + }, + { + "epoch": 1.8, + "learning_rate": 1.4111266126377502e-05, + "loss": 0.3223, + "step": 38585 + }, + { + "epoch": 1.8, + "learning_rate": 1.4110482341322718e-05, + "loss": 0.0702, + "step": 38590 + }, + { + "epoch": 1.8, + "learning_rate": 1.410969855626793e-05, + "loss": 0.0645, + "step": 38595 + }, + { + "epoch": 1.8, + "learning_rate": 1.4108914771213144e-05, + "loss": 0.1382, + "step": 38600 + }, + { + "epoch": 1.8, + "learning_rate": 1.4108130986158356e-05, + "loss": 0.1124, + "step": 38605 + }, + { + "epoch": 1.8, + "learning_rate": 1.410734720110357e-05, + "loss": 0.0886, + "step": 38610 + }, + { + "epoch": 1.8, + "learning_rate": 1.4106563416048784e-05, + "loss": 0.1948, + "step": 38615 + }, + { + "epoch": 1.8, + "learning_rate": 1.4105779630993996e-05, + "loss": 0.2204, + "step": 38620 + }, + { + "epoch": 1.8, + "learning_rate": 1.4104995845939212e-05, + "loss": 0.1517, + "step": 38625 + }, + { + "epoch": 1.8, + "learning_rate": 1.4104212060884424e-05, + "loss": 0.3109, + "step": 38630 + }, + { + "epoch": 1.8, + "learning_rate": 1.4103428275829638e-05, + "loss": 0.1802, + "step": 38635 + }, + { + "epoch": 1.8, + "learning_rate": 1.410264449077485e-05, + "loss": 0.028, + "step": 38640 + }, + { + "epoch": 1.8, + "learning_rate": 1.4101860705720066e-05, + "loss": 0.1063, + "step": 38645 + }, + { + "epoch": 1.8, + "learning_rate": 1.4101076920665278e-05, + "loss": 0.0521, + "step": 38650 + }, + { + "epoch": 1.8, + "learning_rate": 1.4100293135610492e-05, + "loss": 0.091, + "step": 38655 + }, + { + "epoch": 1.8, + "learning_rate": 1.4099509350555704e-05, + "loss": 0.0967, + "step": 38660 + }, + { + "epoch": 1.8, + "learning_rate": 1.409872556550092e-05, + "loss": 0.117, + "step": 38665 + }, + { + "epoch": 1.8, + "learning_rate": 1.4097941780446132e-05, + "loss": 0.1346, + "step": 38670 + }, + { + "epoch": 1.8, + "learning_rate": 1.4097157995391344e-05, + "loss": 0.1494, + "step": 38675 + }, + { + "epoch": 1.8, + "learning_rate": 1.4096374210336558e-05, + "loss": 0.2843, + "step": 38680 + }, + { + "epoch": 1.81, + "learning_rate": 1.409559042528177e-05, + "loss": 0.2728, + "step": 38685 + }, + { + "epoch": 1.81, + "learning_rate": 1.4094806640226986e-05, + "loss": 0.0535, + "step": 38690 + }, + { + "epoch": 1.81, + "learning_rate": 1.4094022855172198e-05, + "loss": 0.0669, + "step": 38695 + }, + { + "epoch": 1.81, + "learning_rate": 1.4093239070117412e-05, + "loss": 0.1214, + "step": 38700 + }, + { + "epoch": 1.81, + "learning_rate": 1.4092455285062626e-05, + "loss": 0.0523, + "step": 38705 + }, + { + "epoch": 1.81, + "learning_rate": 1.409167150000784e-05, + "loss": 0.1108, + "step": 38710 + }, + { + "epoch": 1.81, + "learning_rate": 1.4090887714953052e-05, + "loss": 0.1358, + "step": 38715 + }, + { + "epoch": 1.81, + "learning_rate": 1.4090103929898266e-05, + "loss": 0.1182, + "step": 38720 + }, + { + "epoch": 1.81, + "learning_rate": 1.408932014484348e-05, + "loss": 0.1505, + "step": 38725 + }, + { + "epoch": 1.81, + "learning_rate": 1.4088536359788694e-05, + "loss": 0.3261, + "step": 38730 + }, + { + "epoch": 1.81, + "learning_rate": 1.4087752574733906e-05, + "loss": 0.3337, + "step": 38735 + }, + { + "epoch": 1.81, + "learning_rate": 1.4086968789679118e-05, + "loss": 0.0553, + "step": 38740 + }, + { + "epoch": 1.81, + "learning_rate": 1.4086185004624334e-05, + "loss": 0.0685, + "step": 38745 + }, + { + "epoch": 1.81, + "learning_rate": 1.4085401219569546e-05, + "loss": 0.0481, + "step": 38750 + }, + { + "epoch": 1.81, + "learning_rate": 1.408461743451476e-05, + "loss": 0.095, + "step": 38755 + }, + { + "epoch": 1.81, + "learning_rate": 1.4083833649459972e-05, + "loss": 0.1435, + "step": 38760 + }, + { + "epoch": 1.81, + "learning_rate": 1.4083049864405188e-05, + "loss": 0.0965, + "step": 38765 + }, + { + "epoch": 1.81, + "learning_rate": 1.40822660793504e-05, + "loss": 0.1779, + "step": 38770 + }, + { + "epoch": 1.81, + "learning_rate": 1.4081482294295614e-05, + "loss": 0.2204, + "step": 38775 + }, + { + "epoch": 1.81, + "learning_rate": 1.4080698509240826e-05, + "loss": 0.3111, + "step": 38780 + }, + { + "epoch": 1.81, + "learning_rate": 1.4079914724186042e-05, + "loss": 0.2863, + "step": 38785 + }, + { + "epoch": 1.81, + "learning_rate": 1.4079130939131254e-05, + "loss": 0.0378, + "step": 38790 + }, + { + "epoch": 1.81, + "learning_rate": 1.4078347154076468e-05, + "loss": 0.1025, + "step": 38795 + }, + { + "epoch": 1.81, + "learning_rate": 1.407756336902168e-05, + "loss": 0.0496, + "step": 38800 + }, + { + "epoch": 1.81, + "learning_rate": 1.4076779583966894e-05, + "loss": 0.0819, + "step": 38805 + }, + { + "epoch": 1.81, + "learning_rate": 1.4075995798912108e-05, + "loss": 0.071, + "step": 38810 + }, + { + "epoch": 1.81, + "learning_rate": 1.407521201385732e-05, + "loss": 0.1571, + "step": 38815 + }, + { + "epoch": 1.81, + "learning_rate": 1.4074428228802534e-05, + "loss": 0.262, + "step": 38820 + }, + { + "epoch": 1.81, + "learning_rate": 1.4073644443747748e-05, + "loss": 0.2983, + "step": 38825 + }, + { + "epoch": 1.81, + "learning_rate": 1.4072860658692962e-05, + "loss": 0.228, + "step": 38830 + }, + { + "epoch": 1.81, + "learning_rate": 1.4072076873638174e-05, + "loss": 0.1873, + "step": 38835 + }, + { + "epoch": 1.81, + "learning_rate": 1.407129308858339e-05, + "loss": 0.018, + "step": 38840 + }, + { + "epoch": 1.81, + "learning_rate": 1.4070509303528602e-05, + "loss": 0.0565, + "step": 38845 + }, + { + "epoch": 1.81, + "learning_rate": 1.4069725518473816e-05, + "loss": 0.1004, + "step": 38850 + }, + { + "epoch": 1.81, + "learning_rate": 1.4068941733419028e-05, + "loss": 0.0656, + "step": 38855 + }, + { + "epoch": 1.81, + "learning_rate": 1.4068157948364243e-05, + "loss": 0.2027, + "step": 38860 + }, + { + "epoch": 1.81, + "learning_rate": 1.4067374163309456e-05, + "loss": 0.2106, + "step": 38865 + }, + { + "epoch": 1.81, + "learning_rate": 1.4066590378254668e-05, + "loss": 0.1221, + "step": 38870 + }, + { + "epoch": 1.81, + "learning_rate": 1.4065806593199882e-05, + "loss": 0.2289, + "step": 38875 + }, + { + "epoch": 1.81, + "learning_rate": 1.4065022808145094e-05, + "loss": 0.311, + "step": 38880 + }, + { + "epoch": 1.81, + "learning_rate": 1.406423902309031e-05, + "loss": 0.3, + "step": 38885 + }, + { + "epoch": 1.81, + "learning_rate": 1.4063455238035522e-05, + "loss": 0.0561, + "step": 38890 + }, + { + "epoch": 1.81, + "learning_rate": 1.4062671452980736e-05, + "loss": 0.0676, + "step": 38895 + }, + { + "epoch": 1.82, + "learning_rate": 1.4061887667925948e-05, + "loss": 0.1541, + "step": 38900 + }, + { + "epoch": 1.82, + "learning_rate": 1.4061103882871163e-05, + "loss": 0.0841, + "step": 38905 + }, + { + "epoch": 1.82, + "learning_rate": 1.4060320097816376e-05, + "loss": 0.1486, + "step": 38910 + }, + { + "epoch": 1.82, + "learning_rate": 1.405953631276159e-05, + "loss": 0.1742, + "step": 38915 + }, + { + "epoch": 1.82, + "learning_rate": 1.4058752527706803e-05, + "loss": 0.153, + "step": 38920 + }, + { + "epoch": 1.82, + "learning_rate": 1.4057968742652017e-05, + "loss": 0.201, + "step": 38925 + }, + { + "epoch": 1.82, + "learning_rate": 1.405718495759723e-05, + "loss": 0.3103, + "step": 38930 + }, + { + "epoch": 1.82, + "learning_rate": 1.4056401172542442e-05, + "loss": 0.1888, + "step": 38935 + }, + { + "epoch": 1.82, + "learning_rate": 1.4055617387487657e-05, + "loss": 0.035, + "step": 38940 + }, + { + "epoch": 1.82, + "learning_rate": 1.405483360243287e-05, + "loss": 0.02, + "step": 38945 + }, + { + "epoch": 1.82, + "learning_rate": 1.4054049817378084e-05, + "loss": 0.0971, + "step": 38950 + }, + { + "epoch": 1.82, + "learning_rate": 1.4053266032323296e-05, + "loss": 0.1084, + "step": 38955 + }, + { + "epoch": 1.82, + "learning_rate": 1.4052482247268511e-05, + "loss": 0.0948, + "step": 38960 + }, + { + "epoch": 1.82, + "learning_rate": 1.4051698462213724e-05, + "loss": 0.1653, + "step": 38965 + }, + { + "epoch": 1.82, + "learning_rate": 1.4050914677158937e-05, + "loss": 0.1645, + "step": 38970 + }, + { + "epoch": 1.82, + "learning_rate": 1.405013089210415e-05, + "loss": 0.1154, + "step": 38975 + }, + { + "epoch": 1.82, + "learning_rate": 1.4049347107049365e-05, + "loss": 0.4433, + "step": 38980 + }, + { + "epoch": 1.82, + "learning_rate": 1.4048563321994577e-05, + "loss": 0.2112, + "step": 38985 + }, + { + "epoch": 1.82, + "learning_rate": 1.4047779536939791e-05, + "loss": 0.0622, + "step": 38990 + }, + { + "epoch": 1.82, + "learning_rate": 1.4046995751885004e-05, + "loss": 0.0382, + "step": 38995 + }, + { + "epoch": 1.82, + "learning_rate": 1.4046211966830216e-05, + "loss": 0.1102, + "step": 39000 + }, + { + "epoch": 1.82, + "learning_rate": 1.4045428181775431e-05, + "loss": 0.1516, + "step": 39005 + }, + { + "epoch": 1.82, + "learning_rate": 1.4044644396720644e-05, + "loss": 0.1424, + "step": 39010 + }, + { + "epoch": 1.82, + "learning_rate": 1.4043860611665858e-05, + "loss": 0.1871, + "step": 39015 + }, + { + "epoch": 1.82, + "learning_rate": 1.4043076826611071e-05, + "loss": 0.2085, + "step": 39020 + }, + { + "epoch": 1.82, + "learning_rate": 1.4042293041556285e-05, + "loss": 0.2028, + "step": 39025 + }, + { + "epoch": 1.82, + "learning_rate": 1.4041509256501498e-05, + "loss": 0.3885, + "step": 39030 + }, + { + "epoch": 1.82, + "learning_rate": 1.4040725471446711e-05, + "loss": 0.3049, + "step": 39035 + }, + { + "epoch": 1.82, + "learning_rate": 1.4039941686391925e-05, + "loss": 0.0252, + "step": 39040 + }, + { + "epoch": 1.82, + "learning_rate": 1.403915790133714e-05, + "loss": 0.0376, + "step": 39045 + }, + { + "epoch": 1.82, + "learning_rate": 1.4038374116282351e-05, + "loss": 0.029, + "step": 39050 + }, + { + "epoch": 1.82, + "learning_rate": 1.4037590331227567e-05, + "loss": 0.1269, + "step": 39055 + }, + { + "epoch": 1.82, + "learning_rate": 1.403680654617278e-05, + "loss": 0.1043, + "step": 39060 + }, + { + "epoch": 1.82, + "learning_rate": 1.4036022761117992e-05, + "loss": 0.1261, + "step": 39065 + }, + { + "epoch": 1.82, + "learning_rate": 1.4035238976063205e-05, + "loss": 0.2339, + "step": 39070 + }, + { + "epoch": 1.82, + "learning_rate": 1.4034455191008418e-05, + "loss": 0.1286, + "step": 39075 + }, + { + "epoch": 1.82, + "learning_rate": 1.4033671405953633e-05, + "loss": 0.2755, + "step": 39080 + }, + { + "epoch": 1.82, + "learning_rate": 1.4032887620898845e-05, + "loss": 0.3312, + "step": 39085 + }, + { + "epoch": 1.82, + "learning_rate": 1.403210383584406e-05, + "loss": 0.0087, + "step": 39090 + }, + { + "epoch": 1.82, + "learning_rate": 1.4031320050789272e-05, + "loss": 0.0794, + "step": 39095 + }, + { + "epoch": 1.82, + "learning_rate": 1.4030536265734487e-05, + "loss": 0.0557, + "step": 39100 + }, + { + "epoch": 1.82, + "learning_rate": 1.40297524806797e-05, + "loss": 0.136, + "step": 39105 + }, + { + "epoch": 1.82, + "learning_rate": 1.4028968695624913e-05, + "loss": 0.1002, + "step": 39110 + }, + { + "epoch": 1.83, + "learning_rate": 1.4028184910570125e-05, + "loss": 0.1967, + "step": 39115 + }, + { + "epoch": 1.83, + "learning_rate": 1.4027401125515341e-05, + "loss": 0.1752, + "step": 39120 + }, + { + "epoch": 1.83, + "learning_rate": 1.4026617340460553e-05, + "loss": 0.1226, + "step": 39125 + }, + { + "epoch": 1.83, + "learning_rate": 1.4025833555405766e-05, + "loss": 0.2543, + "step": 39130 + }, + { + "epoch": 1.83, + "learning_rate": 1.402504977035098e-05, + "loss": 0.3523, + "step": 39135 + }, + { + "epoch": 1.83, + "learning_rate": 1.4024265985296193e-05, + "loss": 0.086, + "step": 39140 + }, + { + "epoch": 1.83, + "learning_rate": 1.4023482200241407e-05, + "loss": 0.1465, + "step": 39145 + }, + { + "epoch": 1.83, + "learning_rate": 1.402269841518662e-05, + "loss": 0.1037, + "step": 39150 + }, + { + "epoch": 1.83, + "learning_rate": 1.4021914630131835e-05, + "loss": 0.0747, + "step": 39155 + }, + { + "epoch": 1.83, + "learning_rate": 1.4021130845077047e-05, + "loss": 0.1403, + "step": 39160 + }, + { + "epoch": 1.83, + "learning_rate": 1.4020347060022261e-05, + "loss": 0.1251, + "step": 39165 + }, + { + "epoch": 1.83, + "learning_rate": 1.4019563274967473e-05, + "loss": 0.1805, + "step": 39170 + }, + { + "epoch": 1.83, + "learning_rate": 1.4018779489912689e-05, + "loss": 0.1899, + "step": 39175 + }, + { + "epoch": 1.83, + "learning_rate": 1.4017995704857901e-05, + "loss": 0.3973, + "step": 39180 + }, + { + "epoch": 1.83, + "learning_rate": 1.4017368676814072e-05, + "loss": 0.2354, + "step": 39185 + }, + { + "epoch": 1.83, + "learning_rate": 1.4016584891759286e-05, + "loss": 0.0266, + "step": 39190 + }, + { + "epoch": 1.83, + "learning_rate": 1.4015801106704498e-05, + "loss": 0.0359, + "step": 39195 + }, + { + "epoch": 1.83, + "learning_rate": 1.4015017321649712e-05, + "loss": 0.0703, + "step": 39200 + }, + { + "epoch": 1.83, + "learning_rate": 1.4014233536594926e-05, + "loss": 0.1166, + "step": 39205 + }, + { + "epoch": 1.83, + "learning_rate": 1.4013449751540138e-05, + "loss": 0.1409, + "step": 39210 + }, + { + "epoch": 1.83, + "learning_rate": 1.4012665966485353e-05, + "loss": 0.129, + "step": 39215 + }, + { + "epoch": 1.83, + "learning_rate": 1.4011882181430566e-05, + "loss": 0.1237, + "step": 39220 + }, + { + "epoch": 1.83, + "learning_rate": 1.401109839637578e-05, + "loss": 0.1638, + "step": 39225 + }, + { + "epoch": 1.83, + "learning_rate": 1.4010314611320992e-05, + "loss": 0.2936, + "step": 39230 + }, + { + "epoch": 1.83, + "learning_rate": 1.4009530826266207e-05, + "loss": 0.2029, + "step": 39235 + }, + { + "epoch": 1.83, + "learning_rate": 1.400874704121142e-05, + "loss": 0.0323, + "step": 39240 + }, + { + "epoch": 1.83, + "learning_rate": 1.4007963256156633e-05, + "loss": 0.1204, + "step": 39245 + }, + { + "epoch": 1.83, + "learning_rate": 1.4007179471101846e-05, + "loss": 0.0603, + "step": 39250 + }, + { + "epoch": 1.83, + "learning_rate": 1.4006395686047061e-05, + "loss": 0.1161, + "step": 39255 + }, + { + "epoch": 1.83, + "learning_rate": 1.4005611900992273e-05, + "loss": 0.1051, + "step": 39260 + }, + { + "epoch": 1.83, + "learning_rate": 1.4004828115937486e-05, + "loss": 0.1097, + "step": 39265 + }, + { + "epoch": 1.83, + "learning_rate": 1.40040443308827e-05, + "loss": 0.1569, + "step": 39270 + }, + { + "epoch": 1.83, + "learning_rate": 1.4003260545827912e-05, + "loss": 0.2331, + "step": 39275 + }, + { + "epoch": 1.83, + "learning_rate": 1.4002476760773127e-05, + "loss": 0.4034, + "step": 39280 + }, + { + "epoch": 1.83, + "learning_rate": 1.400169297571834e-05, + "loss": 0.2077, + "step": 39285 + }, + { + "epoch": 1.83, + "learning_rate": 1.4000909190663553e-05, + "loss": 0.0452, + "step": 39290 + }, + { + "epoch": 1.83, + "learning_rate": 1.4000125405608766e-05, + "loss": 0.0357, + "step": 39295 + }, + { + "epoch": 1.83, + "learning_rate": 1.3999341620553981e-05, + "loss": 0.0787, + "step": 39300 + }, + { + "epoch": 1.83, + "learning_rate": 1.3998557835499194e-05, + "loss": 0.1583, + "step": 39305 + }, + { + "epoch": 1.83, + "learning_rate": 1.3997774050444407e-05, + "loss": 0.1058, + "step": 39310 + }, + { + "epoch": 1.83, + "learning_rate": 1.3996990265389621e-05, + "loss": 0.0733, + "step": 39315 + }, + { + "epoch": 1.83, + "learning_rate": 1.3996206480334835e-05, + "loss": 0.1355, + "step": 39320 + }, + { + "epoch": 1.83, + "learning_rate": 1.3995422695280047e-05, + "loss": 0.1527, + "step": 39325 + }, + { + "epoch": 1.84, + "learning_rate": 1.399463891022526e-05, + "loss": 0.2326, + "step": 39330 + }, + { + "epoch": 1.84, + "learning_rate": 1.3993855125170475e-05, + "loss": 0.2507, + "step": 39335 + }, + { + "epoch": 1.84, + "learning_rate": 1.3993071340115687e-05, + "loss": 0.0268, + "step": 39340 + }, + { + "epoch": 1.84, + "learning_rate": 1.3992287555060901e-05, + "loss": 0.0515, + "step": 39345 + }, + { + "epoch": 1.84, + "learning_rate": 1.3991503770006114e-05, + "loss": 0.0863, + "step": 39350 + }, + { + "epoch": 1.84, + "learning_rate": 1.399071998495133e-05, + "loss": 0.1121, + "step": 39355 + }, + { + "epoch": 1.84, + "learning_rate": 1.3989936199896541e-05, + "loss": 0.1218, + "step": 39360 + }, + { + "epoch": 1.84, + "learning_rate": 1.3989152414841755e-05, + "loss": 0.0561, + "step": 39365 + }, + { + "epoch": 1.84, + "learning_rate": 1.3988368629786968e-05, + "loss": 0.1979, + "step": 39370 + }, + { + "epoch": 1.84, + "learning_rate": 1.3987584844732183e-05, + "loss": 0.3349, + "step": 39375 + }, + { + "epoch": 1.84, + "learning_rate": 1.3986801059677395e-05, + "loss": 0.2712, + "step": 39380 + }, + { + "epoch": 1.84, + "learning_rate": 1.398601727462261e-05, + "loss": 0.21, + "step": 39385 + }, + { + "epoch": 1.84, + "learning_rate": 1.3985233489567821e-05, + "loss": 0.0712, + "step": 39390 + }, + { + "epoch": 1.84, + "learning_rate": 1.3984449704513035e-05, + "loss": 0.1168, + "step": 39395 + }, + { + "epoch": 1.84, + "learning_rate": 1.398366591945825e-05, + "loss": 0.0892, + "step": 39400 + }, + { + "epoch": 1.84, + "learning_rate": 1.3982882134403461e-05, + "loss": 0.1336, + "step": 39405 + }, + { + "epoch": 1.84, + "learning_rate": 1.3982098349348675e-05, + "loss": 0.172, + "step": 39410 + }, + { + "epoch": 1.84, + "learning_rate": 1.398131456429389e-05, + "loss": 0.1663, + "step": 39415 + }, + { + "epoch": 1.84, + "learning_rate": 1.3980530779239103e-05, + "loss": 0.1677, + "step": 39420 + }, + { + "epoch": 1.84, + "learning_rate": 1.3979746994184315e-05, + "loss": 0.1407, + "step": 39425 + }, + { + "epoch": 1.84, + "learning_rate": 1.3978963209129531e-05, + "loss": 0.4581, + "step": 39430 + }, + { + "epoch": 1.84, + "learning_rate": 1.3978179424074743e-05, + "loss": 0.4789, + "step": 39435 + }, + { + "epoch": 1.84, + "learning_rate": 1.3977395639019957e-05, + "loss": 0.0543, + "step": 39440 + }, + { + "epoch": 1.84, + "learning_rate": 1.397661185396517e-05, + "loss": 0.0469, + "step": 39445 + }, + { + "epoch": 1.84, + "learning_rate": 1.3975828068910385e-05, + "loss": 0.0991, + "step": 39450 + }, + { + "epoch": 1.84, + "learning_rate": 1.3975044283855597e-05, + "loss": 0.0445, + "step": 39455 + }, + { + "epoch": 1.84, + "learning_rate": 1.397426049880081e-05, + "loss": 0.1107, + "step": 39460 + }, + { + "epoch": 1.84, + "learning_rate": 1.3973476713746023e-05, + "loss": 0.0865, + "step": 39465 + }, + { + "epoch": 1.84, + "learning_rate": 1.3972692928691235e-05, + "loss": 0.1738, + "step": 39470 + }, + { + "epoch": 1.84, + "learning_rate": 1.3971909143636451e-05, + "loss": 0.3184, + "step": 39475 + }, + { + "epoch": 1.84, + "learning_rate": 1.3971125358581663e-05, + "loss": 0.2718, + "step": 39480 + }, + { + "epoch": 1.84, + "learning_rate": 1.3970341573526877e-05, + "loss": 0.3997, + "step": 39485 + }, + { + "epoch": 1.84, + "learning_rate": 1.396955778847209e-05, + "loss": 0.0962, + "step": 39490 + }, + { + "epoch": 1.84, + "learning_rate": 1.3968774003417305e-05, + "loss": 0.0429, + "step": 39495 + }, + { + "epoch": 1.84, + "learning_rate": 1.3967990218362517e-05, + "loss": 0.119, + "step": 39500 + }, + { + "epoch": 1.84, + "learning_rate": 1.3967206433307731e-05, + "loss": 0.065, + "step": 39505 + }, + { + "epoch": 1.84, + "learning_rate": 1.3966422648252943e-05, + "loss": 0.2045, + "step": 39510 + }, + { + "epoch": 1.84, + "learning_rate": 1.3965638863198159e-05, + "loss": 0.1923, + "step": 39515 + }, + { + "epoch": 1.84, + "learning_rate": 1.3964855078143371e-05, + "loss": 0.1101, + "step": 39520 + }, + { + "epoch": 1.84, + "learning_rate": 1.3964071293088583e-05, + "loss": 0.201, + "step": 39525 + }, + { + "epoch": 1.84, + "learning_rate": 1.3963287508033799e-05, + "loss": 0.2879, + "step": 39530 + }, + { + "epoch": 1.84, + "learning_rate": 1.3962503722979011e-05, + "loss": 0.1518, + "step": 39535 + }, + { + "epoch": 1.84, + "learning_rate": 1.3961719937924225e-05, + "loss": 0.0882, + "step": 39540 + }, + { + "epoch": 1.85, + "learning_rate": 1.3960936152869437e-05, + "loss": 0.0576, + "step": 39545 + }, + { + "epoch": 1.85, + "learning_rate": 1.3960152367814653e-05, + "loss": 0.0711, + "step": 39550 + }, + { + "epoch": 1.85, + "learning_rate": 1.3959368582759865e-05, + "loss": 0.1663, + "step": 39555 + }, + { + "epoch": 1.85, + "learning_rate": 1.3958584797705079e-05, + "loss": 0.1107, + "step": 39560 + }, + { + "epoch": 1.85, + "learning_rate": 1.3957801012650291e-05, + "loss": 0.1376, + "step": 39565 + }, + { + "epoch": 1.85, + "learning_rate": 1.3957017227595507e-05, + "loss": 0.2258, + "step": 39570 + }, + { + "epoch": 1.85, + "learning_rate": 1.3956233442540719e-05, + "loss": 0.1872, + "step": 39575 + }, + { + "epoch": 1.85, + "learning_rate": 1.3955449657485933e-05, + "loss": 0.3751, + "step": 39580 + }, + { + "epoch": 1.85, + "learning_rate": 1.3954665872431145e-05, + "loss": 0.3203, + "step": 39585 + }, + { + "epoch": 1.85, + "learning_rate": 1.3953882087376357e-05, + "loss": 0.0157, + "step": 39590 + }, + { + "epoch": 1.85, + "learning_rate": 1.3953098302321573e-05, + "loss": 0.0469, + "step": 39595 + }, + { + "epoch": 1.85, + "learning_rate": 1.3952314517266785e-05, + "loss": 0.0631, + "step": 39600 + }, + { + "epoch": 1.85, + "learning_rate": 1.3951530732211999e-05, + "loss": 0.0862, + "step": 39605 + }, + { + "epoch": 1.85, + "learning_rate": 1.3950746947157211e-05, + "loss": 0.1194, + "step": 39610 + }, + { + "epoch": 1.85, + "learning_rate": 1.3949963162102427e-05, + "loss": 0.1458, + "step": 39615 + }, + { + "epoch": 1.85, + "learning_rate": 1.3949179377047639e-05, + "loss": 0.2322, + "step": 39620 + }, + { + "epoch": 1.85, + "learning_rate": 1.3948395591992853e-05, + "loss": 0.363, + "step": 39625 + }, + { + "epoch": 1.85, + "learning_rate": 1.3947611806938067e-05, + "loss": 0.2981, + "step": 39630 + }, + { + "epoch": 1.85, + "learning_rate": 1.394682802188328e-05, + "loss": 0.2648, + "step": 39635 + }, + { + "epoch": 1.85, + "learning_rate": 1.3946044236828493e-05, + "loss": 0.0346, + "step": 39640 + }, + { + "epoch": 1.85, + "learning_rate": 1.3945260451773709e-05, + "loss": 0.0576, + "step": 39645 + }, + { + "epoch": 1.85, + "learning_rate": 1.394447666671892e-05, + "loss": 0.1158, + "step": 39650 + }, + { + "epoch": 1.85, + "learning_rate": 1.3943692881664133e-05, + "loss": 0.0324, + "step": 39655 + }, + { + "epoch": 1.85, + "learning_rate": 1.3942909096609347e-05, + "loss": 0.1207, + "step": 39660 + }, + { + "epoch": 1.85, + "learning_rate": 1.3942125311554559e-05, + "loss": 0.1835, + "step": 39665 + }, + { + "epoch": 1.85, + "learning_rate": 1.3941341526499775e-05, + "loss": 0.1337, + "step": 39670 + }, + { + "epoch": 1.85, + "learning_rate": 1.3940557741444987e-05, + "loss": 0.1672, + "step": 39675 + }, + { + "epoch": 1.85, + "learning_rate": 1.3939773956390201e-05, + "loss": 0.2736, + "step": 39680 + }, + { + "epoch": 1.85, + "learning_rate": 1.3938990171335413e-05, + "loss": 0.2847, + "step": 39685 + }, + { + "epoch": 1.85, + "learning_rate": 1.3938206386280629e-05, + "loss": 0.044, + "step": 39690 + }, + { + "epoch": 1.85, + "learning_rate": 1.3937422601225841e-05, + "loss": 0.0497, + "step": 39695 + }, + { + "epoch": 1.85, + "learning_rate": 1.3936638816171055e-05, + "loss": 0.1422, + "step": 39700 + }, + { + "epoch": 1.85, + "learning_rate": 1.3935855031116267e-05, + "loss": 0.0669, + "step": 39705 + }, + { + "epoch": 1.85, + "learning_rate": 1.3935071246061483e-05, + "loss": 0.0804, + "step": 39710 + }, + { + "epoch": 1.85, + "learning_rate": 1.3934287461006695e-05, + "loss": 0.0735, + "step": 39715 + }, + { + "epoch": 1.85, + "learning_rate": 1.3933503675951907e-05, + "loss": 0.1105, + "step": 39720 + }, + { + "epoch": 1.85, + "learning_rate": 1.3932719890897121e-05, + "loss": 0.1518, + "step": 39725 + }, + { + "epoch": 1.85, + "learning_rate": 1.3931936105842335e-05, + "loss": 0.3293, + "step": 39730 + }, + { + "epoch": 1.85, + "learning_rate": 1.3931152320787549e-05, + "loss": 0.2068, + "step": 39735 + }, + { + "epoch": 1.85, + "learning_rate": 1.3930368535732761e-05, + "loss": 0.0597, + "step": 39740 + }, + { + "epoch": 1.85, + "learning_rate": 1.3929584750677977e-05, + "loss": 0.0848, + "step": 39745 + }, + { + "epoch": 1.85, + "learning_rate": 1.3928800965623189e-05, + "loss": 0.0794, + "step": 39750 + }, + { + "epoch": 1.86, + "learning_rate": 1.3928017180568403e-05, + "loss": 0.0735, + "step": 39755 + }, + { + "epoch": 1.86, + "learning_rate": 1.3927233395513615e-05, + "loss": 0.1662, + "step": 39760 + }, + { + "epoch": 1.86, + "learning_rate": 1.392644961045883e-05, + "loss": 0.1268, + "step": 39765 + }, + { + "epoch": 1.86, + "learning_rate": 1.3925665825404043e-05, + "loss": 0.1259, + "step": 39770 + }, + { + "epoch": 1.86, + "learning_rate": 1.3924882040349257e-05, + "loss": 0.2228, + "step": 39775 + }, + { + "epoch": 1.86, + "learning_rate": 1.3924098255294469e-05, + "loss": 0.2452, + "step": 39780 + }, + { + "epoch": 1.86, + "learning_rate": 1.3923314470239681e-05, + "loss": 0.3537, + "step": 39785 + }, + { + "epoch": 1.86, + "learning_rate": 1.3922530685184897e-05, + "loss": 0.0579, + "step": 39790 + }, + { + "epoch": 1.86, + "learning_rate": 1.3921746900130109e-05, + "loss": 0.0819, + "step": 39795 + }, + { + "epoch": 1.86, + "learning_rate": 1.3920963115075323e-05, + "loss": 0.117, + "step": 39800 + }, + { + "epoch": 1.86, + "learning_rate": 1.3920179330020535e-05, + "loss": 0.0466, + "step": 39805 + }, + { + "epoch": 1.86, + "learning_rate": 1.391939554496575e-05, + "loss": 0.1153, + "step": 39810 + }, + { + "epoch": 1.86, + "learning_rate": 1.3918611759910963e-05, + "loss": 0.1227, + "step": 39815 + }, + { + "epoch": 1.86, + "learning_rate": 1.3917827974856177e-05, + "loss": 0.1416, + "step": 39820 + }, + { + "epoch": 1.86, + "learning_rate": 1.3917044189801389e-05, + "loss": 0.1812, + "step": 39825 + }, + { + "epoch": 1.86, + "learning_rate": 1.3916260404746604e-05, + "loss": 0.3345, + "step": 39830 + }, + { + "epoch": 1.86, + "learning_rate": 1.3915476619691817e-05, + "loss": 0.359, + "step": 39835 + }, + { + "epoch": 1.86, + "learning_rate": 1.391469283463703e-05, + "loss": 0.019, + "step": 39840 + }, + { + "epoch": 1.86, + "learning_rate": 1.3913909049582245e-05, + "loss": 0.037, + "step": 39845 + }, + { + "epoch": 1.86, + "learning_rate": 1.3913125264527457e-05, + "loss": 0.0897, + "step": 39850 + }, + { + "epoch": 1.86, + "learning_rate": 1.391234147947267e-05, + "loss": 0.0943, + "step": 39855 + }, + { + "epoch": 1.86, + "learning_rate": 1.3911557694417883e-05, + "loss": 0.1399, + "step": 39860 + }, + { + "epoch": 1.86, + "learning_rate": 1.3910773909363098e-05, + "loss": 0.1403, + "step": 39865 + }, + { + "epoch": 1.86, + "learning_rate": 1.390999012430831e-05, + "loss": 0.2049, + "step": 39870 + }, + { + "epoch": 1.86, + "learning_rate": 1.3909206339253525e-05, + "loss": 0.2028, + "step": 39875 + }, + { + "epoch": 1.86, + "learning_rate": 1.3908422554198737e-05, + "loss": 0.281, + "step": 39880 + }, + { + "epoch": 1.86, + "learning_rate": 1.3907638769143952e-05, + "loss": 0.2856, + "step": 39885 + }, + { + "epoch": 1.86, + "learning_rate": 1.3906854984089165e-05, + "loss": 0.0383, + "step": 39890 + }, + { + "epoch": 1.86, + "learning_rate": 1.3906071199034378e-05, + "loss": 0.0449, + "step": 39895 + }, + { + "epoch": 1.86, + "learning_rate": 1.390528741397959e-05, + "loss": 0.0715, + "step": 39900 + }, + { + "epoch": 1.86, + "learning_rate": 1.3904503628924806e-05, + "loss": 0.0983, + "step": 39905 + }, + { + "epoch": 1.86, + "learning_rate": 1.3903719843870019e-05, + "loss": 0.1431, + "step": 39910 + }, + { + "epoch": 1.86, + "learning_rate": 1.390293605881523e-05, + "loss": 0.1682, + "step": 39915 + }, + { + "epoch": 1.86, + "learning_rate": 1.3902152273760445e-05, + "loss": 0.1312, + "step": 39920 + }, + { + "epoch": 1.86, + "learning_rate": 1.3901368488705659e-05, + "loss": 0.1199, + "step": 39925 + }, + { + "epoch": 1.86, + "learning_rate": 1.3900584703650872e-05, + "loss": 0.1829, + "step": 39930 + }, + { + "epoch": 1.86, + "learning_rate": 1.3899800918596085e-05, + "loss": 0.2109, + "step": 39935 + }, + { + "epoch": 1.86, + "learning_rate": 1.3899017133541299e-05, + "loss": 0.0469, + "step": 39940 + }, + { + "epoch": 1.86, + "learning_rate": 1.3898233348486512e-05, + "loss": 0.056, + "step": 39945 + }, + { + "epoch": 1.86, + "learning_rate": 1.3897449563431726e-05, + "loss": 0.0796, + "step": 39950 + }, + { + "epoch": 1.86, + "learning_rate": 1.3896665778376939e-05, + "loss": 0.0767, + "step": 39955 + }, + { + "epoch": 1.86, + "learning_rate": 1.3895881993322154e-05, + "loss": 0.0807, + "step": 39960 + }, + { + "epoch": 1.86, + "learning_rate": 1.3895098208267366e-05, + "loss": 0.1694, + "step": 39965 + }, + { + "epoch": 1.87, + "learning_rate": 1.389431442321258e-05, + "loss": 0.1686, + "step": 39970 + }, + { + "epoch": 1.87, + "learning_rate": 1.3893530638157793e-05, + "loss": 0.1735, + "step": 39975 + }, + { + "epoch": 1.87, + "learning_rate": 1.3892746853103005e-05, + "loss": 0.3052, + "step": 39980 + }, + { + "epoch": 1.87, + "learning_rate": 1.389196306804822e-05, + "loss": 0.315, + "step": 39985 + }, + { + "epoch": 1.87, + "learning_rate": 1.3891179282993433e-05, + "loss": 0.0689, + "step": 39990 + }, + { + "epoch": 1.87, + "learning_rate": 1.3890395497938646e-05, + "loss": 0.0481, + "step": 39995 + }, + { + "epoch": 1.87, + "learning_rate": 1.3889611712883859e-05, + "loss": 0.0813, + "step": 40000 + }, + { + "epoch": 1.87, + "learning_rate": 1.3888827927829074e-05, + "loss": 0.0883, + "step": 40005 + }, + { + "epoch": 1.87, + "learning_rate": 1.3888044142774286e-05, + "loss": 0.0955, + "step": 40010 + }, + { + "epoch": 1.87, + "learning_rate": 1.38872603577195e-05, + "loss": 0.1675, + "step": 40015 + }, + { + "epoch": 1.87, + "learning_rate": 1.3886476572664713e-05, + "loss": 0.1361, + "step": 40020 + }, + { + "epoch": 1.87, + "learning_rate": 1.3885692787609928e-05, + "loss": 0.1712, + "step": 40025 + }, + { + "epoch": 1.87, + "learning_rate": 1.388490900255514e-05, + "loss": 0.3516, + "step": 40030 + }, + { + "epoch": 1.87, + "learning_rate": 1.3884125217500354e-05, + "loss": 0.233, + "step": 40035 + }, + { + "epoch": 1.87, + "learning_rate": 1.3883341432445567e-05, + "loss": 0.0227, + "step": 40040 + }, + { + "epoch": 1.87, + "learning_rate": 1.388255764739078e-05, + "loss": 0.0551, + "step": 40045 + }, + { + "epoch": 1.87, + "learning_rate": 1.3881773862335994e-05, + "loss": 0.0955, + "step": 40050 + }, + { + "epoch": 1.87, + "learning_rate": 1.3880990077281207e-05, + "loss": 0.1336, + "step": 40055 + }, + { + "epoch": 1.87, + "learning_rate": 1.3880206292226422e-05, + "loss": 0.0766, + "step": 40060 + }, + { + "epoch": 1.87, + "learning_rate": 1.3879422507171634e-05, + "loss": 0.0847, + "step": 40065 + }, + { + "epoch": 1.87, + "learning_rate": 1.3878638722116848e-05, + "loss": 0.143, + "step": 40070 + }, + { + "epoch": 1.87, + "learning_rate": 1.387785493706206e-05, + "loss": 0.1564, + "step": 40075 + }, + { + "epoch": 1.87, + "learning_rate": 1.3877071152007276e-05, + "loss": 0.3278, + "step": 40080 + }, + { + "epoch": 1.87, + "learning_rate": 1.3876287366952488e-05, + "loss": 0.2336, + "step": 40085 + }, + { + "epoch": 1.87, + "learning_rate": 1.3875503581897702e-05, + "loss": 0.0916, + "step": 40090 + }, + { + "epoch": 1.87, + "learning_rate": 1.3874719796842914e-05, + "loss": 0.11, + "step": 40095 + }, + { + "epoch": 1.87, + "learning_rate": 1.387393601178813e-05, + "loss": 0.0472, + "step": 40100 + }, + { + "epoch": 1.87, + "learning_rate": 1.3873152226733342e-05, + "loss": 0.0719, + "step": 40105 + }, + { + "epoch": 1.87, + "learning_rate": 1.3872368441678554e-05, + "loss": 0.1545, + "step": 40110 + }, + { + "epoch": 1.87, + "learning_rate": 1.3871584656623768e-05, + "loss": 0.1102, + "step": 40115 + }, + { + "epoch": 1.87, + "learning_rate": 1.387080087156898e-05, + "loss": 0.2047, + "step": 40120 + }, + { + "epoch": 1.87, + "learning_rate": 1.3870017086514196e-05, + "loss": 0.241, + "step": 40125 + }, + { + "epoch": 1.87, + "learning_rate": 1.3869233301459408e-05, + "loss": 0.3618, + "step": 40130 + }, + { + "epoch": 1.87, + "learning_rate": 1.3868449516404622e-05, + "loss": 0.3678, + "step": 40135 + }, + { + "epoch": 1.87, + "learning_rate": 1.3867665731349834e-05, + "loss": 0.0394, + "step": 40140 + }, + { + "epoch": 1.87, + "learning_rate": 1.386688194629505e-05, + "loss": 0.0337, + "step": 40145 + }, + { + "epoch": 1.87, + "learning_rate": 1.3866098161240262e-05, + "loss": 0.0898, + "step": 40150 + }, + { + "epoch": 1.87, + "learning_rate": 1.3865314376185476e-05, + "loss": 0.074, + "step": 40155 + }, + { + "epoch": 1.87, + "learning_rate": 1.386453059113069e-05, + "loss": 0.1356, + "step": 40160 + }, + { + "epoch": 1.87, + "learning_rate": 1.3863746806075904e-05, + "loss": 0.1607, + "step": 40165 + }, + { + "epoch": 1.87, + "learning_rate": 1.3862963021021116e-05, + "loss": 0.1426, + "step": 40170 + }, + { + "epoch": 1.87, + "learning_rate": 1.3862179235966328e-05, + "loss": 0.2663, + "step": 40175 + }, + { + "epoch": 1.87, + "learning_rate": 1.3861395450911544e-05, + "loss": 0.3673, + "step": 40180 + }, + { + "epoch": 1.88, + "learning_rate": 1.3860611665856756e-05, + "loss": 0.3103, + "step": 40185 + }, + { + "epoch": 1.88, + "learning_rate": 1.385982788080197e-05, + "loss": 0.0458, + "step": 40190 + }, + { + "epoch": 1.88, + "learning_rate": 1.3859044095747182e-05, + "loss": 0.0491, + "step": 40195 + }, + { + "epoch": 1.88, + "learning_rate": 1.3858260310692398e-05, + "loss": 0.0837, + "step": 40200 + }, + { + "epoch": 1.88, + "learning_rate": 1.385747652563761e-05, + "loss": 0.1077, + "step": 40205 + }, + { + "epoch": 1.88, + "learning_rate": 1.3856692740582824e-05, + "loss": 0.1128, + "step": 40210 + }, + { + "epoch": 1.88, + "learning_rate": 1.3855908955528036e-05, + "loss": 0.2105, + "step": 40215 + }, + { + "epoch": 1.88, + "learning_rate": 1.3855125170473252e-05, + "loss": 0.169, + "step": 40220 + }, + { + "epoch": 1.88, + "learning_rate": 1.3854341385418464e-05, + "loss": 0.1373, + "step": 40225 + }, + { + "epoch": 1.88, + "learning_rate": 1.3853557600363678e-05, + "loss": 0.2803, + "step": 40230 + }, + { + "epoch": 1.88, + "learning_rate": 1.385277381530889e-05, + "loss": 0.2458, + "step": 40235 + }, + { + "epoch": 1.88, + "learning_rate": 1.3851990030254104e-05, + "loss": 0.067, + "step": 40240 + }, + { + "epoch": 1.88, + "learning_rate": 1.3851206245199318e-05, + "loss": 0.0656, + "step": 40245 + }, + { + "epoch": 1.88, + "learning_rate": 1.385042246014453e-05, + "loss": 0.0975, + "step": 40250 + }, + { + "epoch": 1.88, + "learning_rate": 1.3849638675089744e-05, + "loss": 0.0711, + "step": 40255 + }, + { + "epoch": 1.88, + "learning_rate": 1.3848854890034958e-05, + "loss": 0.1479, + "step": 40260 + }, + { + "epoch": 1.88, + "learning_rate": 1.3848071104980172e-05, + "loss": 0.0855, + "step": 40265 + }, + { + "epoch": 1.88, + "learning_rate": 1.3847287319925384e-05, + "loss": 0.2257, + "step": 40270 + }, + { + "epoch": 1.88, + "learning_rate": 1.38465035348706e-05, + "loss": 0.2097, + "step": 40275 + }, + { + "epoch": 1.88, + "learning_rate": 1.3845719749815812e-05, + "loss": 0.4084, + "step": 40280 + }, + { + "epoch": 1.88, + "learning_rate": 1.3844935964761026e-05, + "loss": 0.2848, + "step": 40285 + }, + { + "epoch": 1.88, + "learning_rate": 1.3844152179706238e-05, + "loss": 0.104, + "step": 40290 + }, + { + "epoch": 1.88, + "learning_rate": 1.3843368394651454e-05, + "loss": 0.0429, + "step": 40295 + }, + { + "epoch": 1.88, + "learning_rate": 1.3842584609596666e-05, + "loss": 0.086, + "step": 40300 + }, + { + "epoch": 1.88, + "learning_rate": 1.3841800824541878e-05, + "loss": 0.0811, + "step": 40305 + }, + { + "epoch": 1.88, + "learning_rate": 1.3841017039487092e-05, + "loss": 0.058, + "step": 40310 + }, + { + "epoch": 1.88, + "learning_rate": 1.3840233254432304e-05, + "loss": 0.1156, + "step": 40315 + }, + { + "epoch": 1.88, + "learning_rate": 1.383944946937752e-05, + "loss": 0.2159, + "step": 40320 + }, + { + "epoch": 1.88, + "learning_rate": 1.3838665684322732e-05, + "loss": 0.2478, + "step": 40325 + }, + { + "epoch": 1.88, + "learning_rate": 1.3837881899267946e-05, + "loss": 0.2005, + "step": 40330 + }, + { + "epoch": 1.88, + "learning_rate": 1.3837098114213158e-05, + "loss": 0.3357, + "step": 40335 + }, + { + "epoch": 1.88, + "learning_rate": 1.3836314329158374e-05, + "loss": 0.0394, + "step": 40340 + }, + { + "epoch": 1.88, + "learning_rate": 1.3835530544103586e-05, + "loss": 0.0648, + "step": 40345 + }, + { + "epoch": 1.88, + "learning_rate": 1.38347467590488e-05, + "loss": 0.0823, + "step": 40350 + }, + { + "epoch": 1.88, + "learning_rate": 1.3833962973994012e-05, + "loss": 0.0483, + "step": 40355 + }, + { + "epoch": 1.88, + "learning_rate": 1.3833179188939228e-05, + "loss": 0.193, + "step": 40360 + }, + { + "epoch": 1.88, + "learning_rate": 1.383239540388444e-05, + "loss": 0.1007, + "step": 40365 + }, + { + "epoch": 1.88, + "learning_rate": 1.3831611618829652e-05, + "loss": 0.1092, + "step": 40370 + }, + { + "epoch": 1.88, + "learning_rate": 1.3830827833774868e-05, + "loss": 0.2738, + "step": 40375 + }, + { + "epoch": 1.88, + "learning_rate": 1.383004404872008e-05, + "loss": 0.2871, + "step": 40380 + }, + { + "epoch": 1.88, + "learning_rate": 1.3829260263665294e-05, + "loss": 0.2501, + "step": 40385 + }, + { + "epoch": 1.88, + "learning_rate": 1.3828476478610506e-05, + "loss": 0.0473, + "step": 40390 + }, + { + "epoch": 1.88, + "learning_rate": 1.3827692693555722e-05, + "loss": 0.0597, + "step": 40395 + }, + { + "epoch": 1.89, + "learning_rate": 1.3826908908500934e-05, + "loss": 0.0646, + "step": 40400 + }, + { + "epoch": 1.89, + "learning_rate": 1.3826125123446148e-05, + "loss": 0.1314, + "step": 40405 + }, + { + "epoch": 1.89, + "learning_rate": 1.382534133839136e-05, + "loss": 0.1628, + "step": 40410 + }, + { + "epoch": 1.89, + "learning_rate": 1.3824557553336576e-05, + "loss": 0.1858, + "step": 40415 + }, + { + "epoch": 1.89, + "learning_rate": 1.3823773768281788e-05, + "loss": 0.2056, + "step": 40420 + }, + { + "epoch": 1.89, + "learning_rate": 1.3822989983227002e-05, + "loss": 0.1649, + "step": 40425 + }, + { + "epoch": 1.89, + "learning_rate": 1.3822206198172214e-05, + "loss": 0.1464, + "step": 40430 + }, + { + "epoch": 1.89, + "learning_rate": 1.3821422413117426e-05, + "loss": 0.3024, + "step": 40435 + }, + { + "epoch": 1.89, + "learning_rate": 1.3820638628062642e-05, + "loss": 0.0702, + "step": 40440 + }, + { + "epoch": 1.89, + "learning_rate": 1.3819854843007854e-05, + "loss": 0.0468, + "step": 40445 + }, + { + "epoch": 1.89, + "learning_rate": 1.3819071057953068e-05, + "loss": 0.0472, + "step": 40450 + }, + { + "epoch": 1.89, + "learning_rate": 1.381828727289828e-05, + "loss": 0.0957, + "step": 40455 + }, + { + "epoch": 1.89, + "learning_rate": 1.3817503487843496e-05, + "loss": 0.1398, + "step": 40460 + }, + { + "epoch": 1.89, + "learning_rate": 1.3816719702788708e-05, + "loss": 0.109, + "step": 40465 + }, + { + "epoch": 1.89, + "learning_rate": 1.3815935917733922e-05, + "loss": 0.2193, + "step": 40470 + }, + { + "epoch": 1.89, + "learning_rate": 1.3815152132679136e-05, + "loss": 0.2995, + "step": 40475 + }, + { + "epoch": 1.89, + "learning_rate": 1.381436834762435e-05, + "loss": 0.336, + "step": 40480 + }, + { + "epoch": 1.89, + "learning_rate": 1.3813584562569562e-05, + "loss": 0.2809, + "step": 40485 + }, + { + "epoch": 1.89, + "learning_rate": 1.3812800777514777e-05, + "loss": 0.0376, + "step": 40490 + }, + { + "epoch": 1.89, + "learning_rate": 1.381201699245999e-05, + "loss": 0.0265, + "step": 40495 + }, + { + "epoch": 1.89, + "learning_rate": 1.3811233207405202e-05, + "loss": 0.0517, + "step": 40500 + }, + { + "epoch": 1.89, + "learning_rate": 1.3810449422350416e-05, + "loss": 0.0746, + "step": 40505 + }, + { + "epoch": 1.89, + "learning_rate": 1.3809665637295628e-05, + "loss": 0.0954, + "step": 40510 + }, + { + "epoch": 1.89, + "learning_rate": 1.3808881852240844e-05, + "loss": 0.1229, + "step": 40515 + }, + { + "epoch": 1.89, + "learning_rate": 1.3808098067186056e-05, + "loss": 0.1508, + "step": 40520 + }, + { + "epoch": 1.89, + "learning_rate": 1.380731428213127e-05, + "loss": 0.2266, + "step": 40525 + }, + { + "epoch": 1.89, + "learning_rate": 1.3806530497076482e-05, + "loss": 0.3108, + "step": 40530 + }, + { + "epoch": 1.89, + "learning_rate": 1.3805746712021697e-05, + "loss": 0.2541, + "step": 40535 + }, + { + "epoch": 1.89, + "learning_rate": 1.380496292696691e-05, + "loss": 0.0478, + "step": 40540 + }, + { + "epoch": 1.89, + "learning_rate": 1.3804179141912124e-05, + "loss": 0.0496, + "step": 40545 + }, + { + "epoch": 1.89, + "learning_rate": 1.3803395356857336e-05, + "loss": 0.0953, + "step": 40550 + }, + { + "epoch": 1.89, + "learning_rate": 1.3802611571802551e-05, + "loss": 0.1024, + "step": 40555 + }, + { + "epoch": 1.89, + "learning_rate": 1.3801827786747764e-05, + "loss": 0.0996, + "step": 40560 + }, + { + "epoch": 1.89, + "learning_rate": 1.3801044001692976e-05, + "loss": 0.1564, + "step": 40565 + }, + { + "epoch": 1.89, + "learning_rate": 1.380026021663819e-05, + "loss": 0.2166, + "step": 40570 + }, + { + "epoch": 1.89, + "learning_rate": 1.3799476431583404e-05, + "loss": 0.226, + "step": 40575 + }, + { + "epoch": 1.89, + "learning_rate": 1.3798692646528618e-05, + "loss": 0.219, + "step": 40580 + }, + { + "epoch": 1.89, + "learning_rate": 1.379790886147383e-05, + "loss": 0.1555, + "step": 40585 + }, + { + "epoch": 1.89, + "learning_rate": 1.3797125076419045e-05, + "loss": 0.0567, + "step": 40590 + }, + { + "epoch": 1.89, + "learning_rate": 1.3796341291364258e-05, + "loss": 0.0816, + "step": 40595 + }, + { + "epoch": 1.89, + "learning_rate": 1.3795557506309471e-05, + "loss": 0.0672, + "step": 40600 + }, + { + "epoch": 1.89, + "learning_rate": 1.3794773721254684e-05, + "loss": 0.0293, + "step": 40605 + }, + { + "epoch": 1.89, + "learning_rate": 1.37939899361999e-05, + "loss": 0.1691, + "step": 40610 + }, + { + "epoch": 1.9, + "learning_rate": 1.3793206151145111e-05, + "loss": 0.1475, + "step": 40615 + }, + { + "epoch": 1.9, + "learning_rate": 1.3792422366090325e-05, + "loss": 0.1439, + "step": 40620 + }, + { + "epoch": 1.9, + "learning_rate": 1.3791638581035538e-05, + "loss": 0.2063, + "step": 40625 + }, + { + "epoch": 1.9, + "learning_rate": 1.379085479598075e-05, + "loss": 0.2501, + "step": 40630 + }, + { + "epoch": 1.9, + "learning_rate": 1.3790071010925965e-05, + "loss": 0.24, + "step": 40635 + }, + { + "epoch": 1.9, + "learning_rate": 1.3789287225871178e-05, + "loss": 0.0474, + "step": 40640 + }, + { + "epoch": 1.9, + "learning_rate": 1.3788503440816392e-05, + "loss": 0.0137, + "step": 40645 + }, + { + "epoch": 1.9, + "learning_rate": 1.3787719655761604e-05, + "loss": 0.0644, + "step": 40650 + }, + { + "epoch": 1.9, + "learning_rate": 1.378693587070682e-05, + "loss": 0.0847, + "step": 40655 + }, + { + "epoch": 1.9, + "learning_rate": 1.3786152085652032e-05, + "loss": 0.136, + "step": 40660 + }, + { + "epoch": 1.9, + "learning_rate": 1.3785368300597245e-05, + "loss": 0.1225, + "step": 40665 + }, + { + "epoch": 1.9, + "learning_rate": 1.3784584515542458e-05, + "loss": 0.1019, + "step": 40670 + }, + { + "epoch": 1.9, + "learning_rate": 1.3783800730487673e-05, + "loss": 0.2475, + "step": 40675 + }, + { + "epoch": 1.9, + "learning_rate": 1.3783016945432885e-05, + "loss": 0.2234, + "step": 40680 + }, + { + "epoch": 1.9, + "learning_rate": 1.37822331603781e-05, + "loss": 0.2204, + "step": 40685 + }, + { + "epoch": 1.9, + "learning_rate": 1.3781449375323313e-05, + "loss": 0.0354, + "step": 40690 + }, + { + "epoch": 1.9, + "learning_rate": 1.3780665590268525e-05, + "loss": 0.0904, + "step": 40695 + }, + { + "epoch": 1.9, + "learning_rate": 1.377988180521374e-05, + "loss": 0.0707, + "step": 40700 + }, + { + "epoch": 1.9, + "learning_rate": 1.3779098020158952e-05, + "loss": 0.0884, + "step": 40705 + }, + { + "epoch": 1.9, + "learning_rate": 1.3778314235104167e-05, + "loss": 0.1006, + "step": 40710 + }, + { + "epoch": 1.9, + "learning_rate": 1.377753045004938e-05, + "loss": 0.0914, + "step": 40715 + }, + { + "epoch": 1.9, + "learning_rate": 1.3776746664994593e-05, + "loss": 0.2008, + "step": 40720 + }, + { + "epoch": 1.9, + "learning_rate": 1.3775962879939806e-05, + "loss": 0.1749, + "step": 40725 + }, + { + "epoch": 1.9, + "learning_rate": 1.3775179094885021e-05, + "loss": 0.3868, + "step": 40730 + }, + { + "epoch": 1.9, + "learning_rate": 1.3774395309830233e-05, + "loss": 0.3199, + "step": 40735 + }, + { + "epoch": 1.9, + "learning_rate": 1.3773611524775447e-05, + "loss": 0.065, + "step": 40740 + }, + { + "epoch": 1.9, + "learning_rate": 1.377282773972066e-05, + "loss": 0.071, + "step": 40745 + }, + { + "epoch": 1.9, + "learning_rate": 1.3772043954665875e-05, + "loss": 0.0642, + "step": 40750 + }, + { + "epoch": 1.9, + "learning_rate": 1.3771260169611087e-05, + "loss": 0.1533, + "step": 40755 + }, + { + "epoch": 1.9, + "learning_rate": 1.37704763845563e-05, + "loss": 0.0883, + "step": 40760 + }, + { + "epoch": 1.9, + "learning_rate": 1.3769692599501513e-05, + "loss": 0.2085, + "step": 40765 + }, + { + "epoch": 1.9, + "learning_rate": 1.3768908814446726e-05, + "loss": 0.1509, + "step": 40770 + }, + { + "epoch": 1.9, + "learning_rate": 1.3768125029391941e-05, + "loss": 0.1984, + "step": 40775 + }, + { + "epoch": 1.9, + "learning_rate": 1.3767341244337153e-05, + "loss": 0.4016, + "step": 40780 + }, + { + "epoch": 1.9, + "learning_rate": 1.3766557459282367e-05, + "loss": 0.2388, + "step": 40785 + }, + { + "epoch": 1.9, + "learning_rate": 1.3765773674227581e-05, + "loss": 0.0885, + "step": 40790 + }, + { + "epoch": 1.9, + "learning_rate": 1.3764989889172795e-05, + "loss": 0.0829, + "step": 40795 + }, + { + "epoch": 1.9, + "learning_rate": 1.3764206104118007e-05, + "loss": 0.0609, + "step": 40800 + }, + { + "epoch": 1.9, + "learning_rate": 1.3763422319063223e-05, + "loss": 0.0371, + "step": 40805 + }, + { + "epoch": 1.9, + "learning_rate": 1.3762638534008435e-05, + "loss": 0.0782, + "step": 40810 + }, + { + "epoch": 1.9, + "learning_rate": 1.3761854748953649e-05, + "loss": 0.1198, + "step": 40815 + }, + { + "epoch": 1.9, + "learning_rate": 1.3761070963898861e-05, + "loss": 0.1064, + "step": 40820 + }, + { + "epoch": 1.9, + "learning_rate": 1.3760287178844073e-05, + "loss": 0.2126, + "step": 40825 + }, + { + "epoch": 1.91, + "learning_rate": 1.3759503393789289e-05, + "loss": 0.2644, + "step": 40830 + }, + { + "epoch": 1.91, + "learning_rate": 1.3758719608734501e-05, + "loss": 0.2017, + "step": 40835 + }, + { + "epoch": 1.91, + "learning_rate": 1.3757935823679715e-05, + "loss": 0.0589, + "step": 40840 + }, + { + "epoch": 1.91, + "learning_rate": 1.3757152038624927e-05, + "loss": 0.0665, + "step": 40845 + }, + { + "epoch": 1.91, + "learning_rate": 1.3756368253570143e-05, + "loss": 0.0637, + "step": 40850 + }, + { + "epoch": 1.91, + "learning_rate": 1.3755584468515355e-05, + "loss": 0.1055, + "step": 40855 + }, + { + "epoch": 1.91, + "learning_rate": 1.3754800683460569e-05, + "loss": 0.0967, + "step": 40860 + }, + { + "epoch": 1.91, + "learning_rate": 1.3754016898405781e-05, + "loss": 0.1057, + "step": 40865 + }, + { + "epoch": 1.91, + "learning_rate": 1.3753233113350997e-05, + "loss": 0.1226, + "step": 40870 + }, + { + "epoch": 1.91, + "learning_rate": 1.3752449328296209e-05, + "loss": 0.1455, + "step": 40875 + }, + { + "epoch": 1.91, + "learning_rate": 1.3751665543241423e-05, + "loss": 0.4124, + "step": 40880 + }, + { + "epoch": 1.91, + "learning_rate": 1.3750881758186635e-05, + "loss": 0.2321, + "step": 40885 + }, + { + "epoch": 1.91, + "learning_rate": 1.375009797313185e-05, + "loss": 0.0824, + "step": 40890 + }, + { + "epoch": 1.91, + "learning_rate": 1.3749314188077063e-05, + "loss": 0.0691, + "step": 40895 + }, + { + "epoch": 1.91, + "learning_rate": 1.3748530403022275e-05, + "loss": 0.0824, + "step": 40900 + }, + { + "epoch": 1.91, + "learning_rate": 1.3747746617967491e-05, + "loss": 0.1582, + "step": 40905 + }, + { + "epoch": 1.91, + "learning_rate": 1.3746962832912703e-05, + "loss": 0.104, + "step": 40910 + }, + { + "epoch": 1.91, + "learning_rate": 1.3746179047857917e-05, + "loss": 0.1756, + "step": 40915 + }, + { + "epoch": 1.91, + "learning_rate": 1.374539526280313e-05, + "loss": 0.1663, + "step": 40920 + }, + { + "epoch": 1.91, + "learning_rate": 1.3744611477748345e-05, + "loss": 0.2132, + "step": 40925 + }, + { + "epoch": 1.91, + "learning_rate": 1.3743827692693557e-05, + "loss": 0.2881, + "step": 40930 + }, + { + "epoch": 1.91, + "learning_rate": 1.3743043907638771e-05, + "loss": 0.1533, + "step": 40935 + }, + { + "epoch": 1.91, + "learning_rate": 1.3742260122583983e-05, + "loss": 0.034, + "step": 40940 + }, + { + "epoch": 1.91, + "learning_rate": 1.3741476337529199e-05, + "loss": 0.0692, + "step": 40945 + }, + { + "epoch": 1.91, + "learning_rate": 1.3740692552474411e-05, + "loss": 0.0526, + "step": 40950 + }, + { + "epoch": 1.91, + "learning_rate": 1.3739908767419623e-05, + "loss": 0.1364, + "step": 40955 + }, + { + "epoch": 1.91, + "learning_rate": 1.3739124982364837e-05, + "loss": 0.0757, + "step": 40960 + }, + { + "epoch": 1.91, + "learning_rate": 1.373834119731005e-05, + "loss": 0.1443, + "step": 40965 + }, + { + "epoch": 1.91, + "learning_rate": 1.3737557412255265e-05, + "loss": 0.2183, + "step": 40970 + }, + { + "epoch": 1.91, + "learning_rate": 1.3736773627200477e-05, + "loss": 0.2046, + "step": 40975 + }, + { + "epoch": 1.91, + "learning_rate": 1.3735989842145691e-05, + "loss": 0.3461, + "step": 40980 + }, + { + "epoch": 1.91, + "learning_rate": 1.3735206057090903e-05, + "loss": 0.268, + "step": 40985 + }, + { + "epoch": 1.91, + "learning_rate": 1.3734422272036119e-05, + "loss": 0.0231, + "step": 40990 + }, + { + "epoch": 1.91, + "learning_rate": 1.3733638486981331e-05, + "loss": 0.1039, + "step": 40995 + }, + { + "epoch": 1.91, + "learning_rate": 1.3732854701926545e-05, + "loss": 0.0878, + "step": 41000 + }, + { + "epoch": 1.91, + "learning_rate": 1.3732070916871759e-05, + "loss": 0.0818, + "step": 41005 + }, + { + "epoch": 1.91, + "learning_rate": 1.3731287131816973e-05, + "loss": 0.108, + "step": 41010 + }, + { + "epoch": 1.91, + "learning_rate": 1.3730503346762185e-05, + "loss": 0.1776, + "step": 41015 + }, + { + "epoch": 1.91, + "learning_rate": 1.3729719561707397e-05, + "loss": 0.0533, + "step": 41020 + }, + { + "epoch": 1.91, + "learning_rate": 1.3728935776652613e-05, + "loss": 0.1883, + "step": 41025 + }, + { + "epoch": 1.91, + "learning_rate": 1.3728151991597825e-05, + "loss": 0.3926, + "step": 41030 + }, + { + "epoch": 1.91, + "learning_rate": 1.3727368206543039e-05, + "loss": 0.2241, + "step": 41035 + }, + { + "epoch": 1.91, + "learning_rate": 1.3726584421488251e-05, + "loss": 0.0445, + "step": 41040 + }, + { + "epoch": 1.92, + "learning_rate": 1.3725800636433467e-05, + "loss": 0.087, + "step": 41045 + }, + { + "epoch": 1.92, + "learning_rate": 1.3725016851378679e-05, + "loss": 0.0322, + "step": 41050 + }, + { + "epoch": 1.92, + "learning_rate": 1.3724233066323893e-05, + "loss": 0.0478, + "step": 41055 + }, + { + "epoch": 1.92, + "learning_rate": 1.3723449281269105e-05, + "loss": 0.1294, + "step": 41060 + }, + { + "epoch": 1.92, + "learning_rate": 1.372266549621432e-05, + "loss": 0.1646, + "step": 41065 + }, + { + "epoch": 1.92, + "learning_rate": 1.3721881711159533e-05, + "loss": 0.1664, + "step": 41070 + }, + { + "epoch": 1.92, + "learning_rate": 1.3721097926104747e-05, + "loss": 0.2329, + "step": 41075 + }, + { + "epoch": 1.92, + "learning_rate": 1.3720314141049959e-05, + "loss": 0.2623, + "step": 41080 + }, + { + "epoch": 1.92, + "learning_rate": 1.3719530355995173e-05, + "loss": 0.2138, + "step": 41085 + }, + { + "epoch": 1.92, + "learning_rate": 1.3718746570940387e-05, + "loss": 0.0504, + "step": 41090 + }, + { + "epoch": 1.92, + "learning_rate": 1.3717962785885599e-05, + "loss": 0.0839, + "step": 41095 + }, + { + "epoch": 1.92, + "learning_rate": 1.3717179000830813e-05, + "loss": 0.0589, + "step": 41100 + }, + { + "epoch": 1.92, + "learning_rate": 1.3716395215776027e-05, + "loss": 0.1023, + "step": 41105 + }, + { + "epoch": 1.92, + "learning_rate": 1.371561143072124e-05, + "loss": 0.1489, + "step": 41110 + }, + { + "epoch": 1.92, + "learning_rate": 1.3714827645666453e-05, + "loss": 0.1497, + "step": 41115 + }, + { + "epoch": 1.92, + "learning_rate": 1.3714043860611669e-05, + "loss": 0.1494, + "step": 41120 + }, + { + "epoch": 1.92, + "learning_rate": 1.371326007555688e-05, + "loss": 0.2351, + "step": 41125 + }, + { + "epoch": 1.92, + "learning_rate": 1.3712476290502095e-05, + "loss": 0.3537, + "step": 41130 + }, + { + "epoch": 1.92, + "learning_rate": 1.3711692505447307e-05, + "loss": 0.3639, + "step": 41135 + }, + { + "epoch": 1.92, + "learning_rate": 1.3710908720392522e-05, + "loss": 0.0352, + "step": 41140 + }, + { + "epoch": 1.92, + "learning_rate": 1.3710124935337735e-05, + "loss": 0.0505, + "step": 41145 + }, + { + "epoch": 1.92, + "learning_rate": 1.3709341150282947e-05, + "loss": 0.0559, + "step": 41150 + }, + { + "epoch": 1.92, + "learning_rate": 1.370855736522816e-05, + "loss": 0.0432, + "step": 41155 + }, + { + "epoch": 1.92, + "learning_rate": 1.3707773580173373e-05, + "loss": 0.2254, + "step": 41160 + }, + { + "epoch": 1.92, + "learning_rate": 1.3706989795118589e-05, + "loss": 0.1569, + "step": 41165 + }, + { + "epoch": 1.92, + "learning_rate": 1.37062060100638e-05, + "loss": 0.0941, + "step": 41170 + }, + { + "epoch": 1.92, + "learning_rate": 1.3705422225009015e-05, + "loss": 0.2558, + "step": 41175 + }, + { + "epoch": 1.92, + "learning_rate": 1.3704638439954227e-05, + "loss": 0.2511, + "step": 41180 + }, + { + "epoch": 1.92, + "learning_rate": 1.3703854654899443e-05, + "loss": 0.3308, + "step": 41185 + }, + { + "epoch": 1.92, + "learning_rate": 1.3703070869844655e-05, + "loss": 0.0499, + "step": 41190 + }, + { + "epoch": 1.92, + "learning_rate": 1.3702287084789869e-05, + "loss": 0.0471, + "step": 41195 + }, + { + "epoch": 1.92, + "learning_rate": 1.370150329973508e-05, + "loss": 0.063, + "step": 41200 + }, + { + "epoch": 1.92, + "learning_rate": 1.3700719514680296e-05, + "loss": 0.0849, + "step": 41205 + }, + { + "epoch": 1.92, + "learning_rate": 1.3699935729625509e-05, + "loss": 0.1009, + "step": 41210 + }, + { + "epoch": 1.92, + "learning_rate": 1.3699151944570721e-05, + "loss": 0.1324, + "step": 41215 + }, + { + "epoch": 1.92, + "learning_rate": 1.3698368159515936e-05, + "loss": 0.145, + "step": 41220 + }, + { + "epoch": 1.92, + "learning_rate": 1.3697584374461149e-05, + "loss": 0.206, + "step": 41225 + }, + { + "epoch": 1.92, + "learning_rate": 1.3696800589406363e-05, + "loss": 0.3881, + "step": 41230 + }, + { + "epoch": 1.92, + "learning_rate": 1.3696016804351575e-05, + "loss": 0.3881, + "step": 41235 + }, + { + "epoch": 1.92, + "learning_rate": 1.369523301929679e-05, + "loss": 0.1123, + "step": 41240 + }, + { + "epoch": 1.92, + "learning_rate": 1.3694449234242003e-05, + "loss": 0.0195, + "step": 41245 + }, + { + "epoch": 1.92, + "learning_rate": 1.3693665449187217e-05, + "loss": 0.1247, + "step": 41250 + }, + { + "epoch": 1.93, + "learning_rate": 1.3692881664132429e-05, + "loss": 0.0557, + "step": 41255 + }, + { + "epoch": 1.93, + "learning_rate": 1.3692097879077644e-05, + "loss": 0.0885, + "step": 41260 + }, + { + "epoch": 1.93, + "learning_rate": 1.3691314094022857e-05, + "loss": 0.1122, + "step": 41265 + }, + { + "epoch": 1.93, + "learning_rate": 1.369053030896807e-05, + "loss": 0.2121, + "step": 41270 + }, + { + "epoch": 1.93, + "learning_rate": 1.3689746523913283e-05, + "loss": 0.1881, + "step": 41275 + }, + { + "epoch": 1.93, + "learning_rate": 1.3688962738858495e-05, + "loss": 0.2472, + "step": 41280 + }, + { + "epoch": 1.93, + "learning_rate": 1.368817895380371e-05, + "loss": 0.1782, + "step": 41285 + }, + { + "epoch": 1.93, + "learning_rate": 1.3687395168748923e-05, + "loss": 0.0595, + "step": 41290 + }, + { + "epoch": 1.93, + "learning_rate": 1.3686611383694137e-05, + "loss": 0.0678, + "step": 41295 + }, + { + "epoch": 1.93, + "learning_rate": 1.3685827598639349e-05, + "loss": 0.1161, + "step": 41300 + }, + { + "epoch": 1.93, + "learning_rate": 1.3685043813584564e-05, + "loss": 0.0635, + "step": 41305 + }, + { + "epoch": 1.93, + "learning_rate": 1.3684260028529777e-05, + "loss": 0.1394, + "step": 41310 + }, + { + "epoch": 1.93, + "learning_rate": 1.368347624347499e-05, + "loss": 0.1814, + "step": 41315 + }, + { + "epoch": 1.93, + "learning_rate": 1.3682692458420204e-05, + "loss": 0.3026, + "step": 41320 + }, + { + "epoch": 1.93, + "learning_rate": 1.3681908673365418e-05, + "loss": 0.2154, + "step": 41325 + }, + { + "epoch": 1.93, + "learning_rate": 1.368112488831063e-05, + "loss": 0.2504, + "step": 41330 + }, + { + "epoch": 1.93, + "learning_rate": 1.3680341103255846e-05, + "loss": 0.3073, + "step": 41335 + }, + { + "epoch": 1.93, + "learning_rate": 1.3679557318201058e-05, + "loss": 0.0638, + "step": 41340 + }, + { + "epoch": 1.93, + "learning_rate": 1.367877353314627e-05, + "loss": 0.0279, + "step": 41345 + }, + { + "epoch": 1.93, + "learning_rate": 1.3677989748091484e-05, + "loss": 0.1001, + "step": 41350 + }, + { + "epoch": 1.93, + "learning_rate": 1.3677205963036697e-05, + "loss": 0.0529, + "step": 41355 + }, + { + "epoch": 1.93, + "learning_rate": 1.3676422177981912e-05, + "loss": 0.1054, + "step": 41360 + }, + { + "epoch": 1.93, + "learning_rate": 1.3675638392927124e-05, + "loss": 0.111, + "step": 41365 + }, + { + "epoch": 1.93, + "learning_rate": 1.3674854607872338e-05, + "loss": 0.0676, + "step": 41370 + }, + { + "epoch": 1.93, + "learning_rate": 1.367407082281755e-05, + "loss": 0.243, + "step": 41375 + }, + { + "epoch": 1.93, + "learning_rate": 1.3673287037762766e-05, + "loss": 0.4077, + "step": 41380 + }, + { + "epoch": 1.93, + "learning_rate": 1.3672503252707978e-05, + "loss": 0.3552, + "step": 41385 + }, + { + "epoch": 1.93, + "learning_rate": 1.3671719467653192e-05, + "loss": 0.0411, + "step": 41390 + }, + { + "epoch": 1.93, + "learning_rate": 1.3670935682598405e-05, + "loss": 0.039, + "step": 41395 + }, + { + "epoch": 1.93, + "learning_rate": 1.367015189754362e-05, + "loss": 0.0688, + "step": 41400 + }, + { + "epoch": 1.93, + "learning_rate": 1.3669368112488832e-05, + "loss": 0.1222, + "step": 41405 + }, + { + "epoch": 1.93, + "learning_rate": 1.3668584327434045e-05, + "loss": 0.073, + "step": 41410 + }, + { + "epoch": 1.93, + "learning_rate": 1.3667800542379258e-05, + "loss": 0.0939, + "step": 41415 + }, + { + "epoch": 1.93, + "learning_rate": 1.3667016757324472e-05, + "loss": 0.1917, + "step": 41420 + }, + { + "epoch": 1.93, + "learning_rate": 1.3666232972269686e-05, + "loss": 0.1409, + "step": 41425 + }, + { + "epoch": 1.93, + "learning_rate": 1.3665449187214898e-05, + "loss": 0.3746, + "step": 41430 + }, + { + "epoch": 1.93, + "learning_rate": 1.3664665402160114e-05, + "loss": 0.3048, + "step": 41435 + }, + { + "epoch": 1.93, + "learning_rate": 1.3663881617105326e-05, + "loss": 0.0518, + "step": 41440 + }, + { + "epoch": 1.93, + "learning_rate": 1.366309783205054e-05, + "loss": 0.0805, + "step": 41445 + }, + { + "epoch": 1.93, + "learning_rate": 1.3662314046995752e-05, + "loss": 0.0876, + "step": 41450 + }, + { + "epoch": 1.93, + "learning_rate": 1.3661530261940968e-05, + "loss": 0.0906, + "step": 41455 + }, + { + "epoch": 1.93, + "learning_rate": 1.366074647688618e-05, + "loss": 0.0911, + "step": 41460 + }, + { + "epoch": 1.93, + "learning_rate": 1.3659962691831394e-05, + "loss": 0.1121, + "step": 41465 + }, + { + "epoch": 1.94, + "learning_rate": 1.3659178906776606e-05, + "loss": 0.1549, + "step": 41470 + }, + { + "epoch": 1.94, + "learning_rate": 1.3658395121721819e-05, + "loss": 0.2158, + "step": 41475 + }, + { + "epoch": 1.94, + "learning_rate": 1.3657611336667034e-05, + "loss": 0.3177, + "step": 41480 + }, + { + "epoch": 1.94, + "learning_rate": 1.3656827551612246e-05, + "loss": 0.2616, + "step": 41485 + }, + { + "epoch": 1.94, + "learning_rate": 1.365604376655746e-05, + "loss": 0.0809, + "step": 41490 + }, + { + "epoch": 1.94, + "learning_rate": 1.3655259981502672e-05, + "loss": 0.0507, + "step": 41495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3654476196447888e-05, + "loss": 0.0408, + "step": 41500 + }, + { + "epoch": 1.94, + "learning_rate": 1.36536924113931e-05, + "loss": 0.079, + "step": 41505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3652908626338314e-05, + "loss": 0.1017, + "step": 41510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3652124841283526e-05, + "loss": 0.0745, + "step": 41515 + }, + { + "epoch": 1.94, + "learning_rate": 1.3651341056228742e-05, + "loss": 0.1234, + "step": 41520 + }, + { + "epoch": 1.94, + "learning_rate": 1.3650557271173954e-05, + "loss": 0.23, + "step": 41525 + }, + { + "epoch": 1.94, + "learning_rate": 1.3649773486119168e-05, + "loss": 0.2216, + "step": 41530 + }, + { + "epoch": 1.94, + "learning_rate": 1.3648989701064382e-05, + "loss": 0.2246, + "step": 41535 + }, + { + "epoch": 1.94, + "learning_rate": 1.3648205916009594e-05, + "loss": 0.0395, + "step": 41540 + }, + { + "epoch": 1.94, + "learning_rate": 1.3647422130954808e-05, + "loss": 0.0559, + "step": 41545 + }, + { + "epoch": 1.94, + "learning_rate": 1.364663834590002e-05, + "loss": 0.0517, + "step": 41550 + }, + { + "epoch": 1.94, + "learning_rate": 1.3645854560845236e-05, + "loss": 0.1465, + "step": 41555 + }, + { + "epoch": 1.94, + "learning_rate": 1.3645070775790448e-05, + "loss": 0.2362, + "step": 41560 + }, + { + "epoch": 1.94, + "learning_rate": 1.3644286990735662e-05, + "loss": 0.1821, + "step": 41565 + }, + { + "epoch": 1.94, + "learning_rate": 1.3643503205680874e-05, + "loss": 0.1984, + "step": 41570 + }, + { + "epoch": 1.94, + "learning_rate": 1.364271942062609e-05, + "loss": 0.2761, + "step": 41575 + }, + { + "epoch": 1.94, + "learning_rate": 1.3641935635571302e-05, + "loss": 0.4789, + "step": 41580 + }, + { + "epoch": 1.94, + "learning_rate": 1.3641151850516516e-05, + "loss": 0.2259, + "step": 41585 + }, + { + "epoch": 1.94, + "learning_rate": 1.3640368065461728e-05, + "loss": 0.0482, + "step": 41590 + }, + { + "epoch": 1.94, + "learning_rate": 1.3639584280406944e-05, + "loss": 0.0146, + "step": 41595 + }, + { + "epoch": 1.94, + "learning_rate": 1.3638800495352156e-05, + "loss": 0.1254, + "step": 41600 + }, + { + "epoch": 1.94, + "learning_rate": 1.3638016710297368e-05, + "loss": 0.0778, + "step": 41605 + }, + { + "epoch": 1.94, + "learning_rate": 1.3637232925242582e-05, + "loss": 0.0651, + "step": 41610 + }, + { + "epoch": 1.94, + "learning_rate": 1.3636449140187794e-05, + "loss": 0.1359, + "step": 41615 + }, + { + "epoch": 1.94, + "learning_rate": 1.363566535513301e-05, + "loss": 0.2115, + "step": 41620 + }, + { + "epoch": 1.94, + "learning_rate": 1.3634881570078222e-05, + "loss": 0.1439, + "step": 41625 + }, + { + "epoch": 1.94, + "learning_rate": 1.3634097785023436e-05, + "loss": 0.3997, + "step": 41630 + }, + { + "epoch": 1.94, + "learning_rate": 1.363331399996865e-05, + "loss": 0.2421, + "step": 41635 + }, + { + "epoch": 1.94, + "learning_rate": 1.3632530214913864e-05, + "loss": 0.0331, + "step": 41640 + }, + { + "epoch": 1.94, + "learning_rate": 1.3631746429859076e-05, + "loss": 0.1243, + "step": 41645 + }, + { + "epoch": 1.94, + "learning_rate": 1.3630962644804292e-05, + "loss": 0.147, + "step": 41650 + }, + { + "epoch": 1.94, + "learning_rate": 1.3630178859749504e-05, + "loss": 0.0483, + "step": 41655 + }, + { + "epoch": 1.94, + "learning_rate": 1.3629395074694718e-05, + "loss": 0.0896, + "step": 41660 + }, + { + "epoch": 1.94, + "learning_rate": 1.362861128963993e-05, + "loss": 0.1532, + "step": 41665 + }, + { + "epoch": 1.94, + "learning_rate": 1.3627827504585142e-05, + "loss": 0.1593, + "step": 41670 + }, + { + "epoch": 1.94, + "learning_rate": 1.3627043719530358e-05, + "loss": 0.1917, + "step": 41675 + }, + { + "epoch": 1.94, + "learning_rate": 1.362625993447557e-05, + "loss": 0.318, + "step": 41680 + }, + { + "epoch": 1.95, + "learning_rate": 1.3625476149420784e-05, + "loss": 0.3726, + "step": 41685 + }, + { + "epoch": 1.95, + "learning_rate": 1.3624692364365996e-05, + "loss": 0.0432, + "step": 41690 + }, + { + "epoch": 1.95, + "learning_rate": 1.3623908579311212e-05, + "loss": 0.0398, + "step": 41695 + }, + { + "epoch": 1.95, + "learning_rate": 1.3623124794256424e-05, + "loss": 0.0579, + "step": 41700 + }, + { + "epoch": 1.95, + "learning_rate": 1.3622341009201638e-05, + "loss": 0.0813, + "step": 41705 + }, + { + "epoch": 1.95, + "learning_rate": 1.362155722414685e-05, + "loss": 0.0798, + "step": 41710 + }, + { + "epoch": 1.95, + "learning_rate": 1.3620773439092066e-05, + "loss": 0.0768, + "step": 41715 + }, + { + "epoch": 1.95, + "learning_rate": 1.3619989654037278e-05, + "loss": 0.1886, + "step": 41720 + }, + { + "epoch": 1.95, + "learning_rate": 1.3619205868982492e-05, + "loss": 0.1185, + "step": 41725 + }, + { + "epoch": 1.95, + "learning_rate": 1.3618422083927704e-05, + "loss": 0.2939, + "step": 41730 + }, + { + "epoch": 1.95, + "learning_rate": 1.3617638298872918e-05, + "loss": 0.295, + "step": 41735 + }, + { + "epoch": 1.95, + "learning_rate": 1.3616854513818132e-05, + "loss": 0.028, + "step": 41740 + }, + { + "epoch": 1.95, + "learning_rate": 1.3616070728763344e-05, + "loss": 0.0461, + "step": 41745 + }, + { + "epoch": 1.95, + "learning_rate": 1.361528694370856e-05, + "loss": 0.0892, + "step": 41750 + }, + { + "epoch": 1.95, + "learning_rate": 1.3614503158653772e-05, + "loss": 0.0907, + "step": 41755 + }, + { + "epoch": 1.95, + "learning_rate": 1.3613719373598986e-05, + "loss": 0.0892, + "step": 41760 + }, + { + "epoch": 1.95, + "learning_rate": 1.3612935588544198e-05, + "loss": 0.1427, + "step": 41765 + }, + { + "epoch": 1.95, + "learning_rate": 1.3612151803489414e-05, + "loss": 0.108, + "step": 41770 + }, + { + "epoch": 1.95, + "learning_rate": 1.3611368018434626e-05, + "loss": 0.206, + "step": 41775 + }, + { + "epoch": 1.95, + "learning_rate": 1.361058423337984e-05, + "loss": 0.2341, + "step": 41780 + }, + { + "epoch": 1.95, + "learning_rate": 1.3609800448325052e-05, + "loss": 0.2855, + "step": 41785 + }, + { + "epoch": 1.95, + "learning_rate": 1.3609016663270268e-05, + "loss": 0.0509, + "step": 41790 + }, + { + "epoch": 1.95, + "learning_rate": 1.360823287821548e-05, + "loss": 0.0388, + "step": 41795 + }, + { + "epoch": 1.95, + "learning_rate": 1.3607449093160692e-05, + "loss": 0.0668, + "step": 41800 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606665308105906e-05, + "loss": 0.067, + "step": 41805 + }, + { + "epoch": 1.95, + "learning_rate": 1.3605881523051118e-05, + "loss": 0.1153, + "step": 41810 + }, + { + "epoch": 1.95, + "learning_rate": 1.3605097737996334e-05, + "loss": 0.183, + "step": 41815 + }, + { + "epoch": 1.95, + "learning_rate": 1.3604313952941546e-05, + "loss": 0.2191, + "step": 41820 + }, + { + "epoch": 1.95, + "learning_rate": 1.360353016788676e-05, + "loss": 0.1736, + "step": 41825 + }, + { + "epoch": 1.95, + "learning_rate": 1.3602746382831972e-05, + "loss": 0.3504, + "step": 41830 + }, + { + "epoch": 1.95, + "learning_rate": 1.3601962597777188e-05, + "loss": 0.2775, + "step": 41835 + }, + { + "epoch": 1.95, + "learning_rate": 1.36011788127224e-05, + "loss": 0.0406, + "step": 41840 + }, + { + "epoch": 1.95, + "learning_rate": 1.3600395027667614e-05, + "loss": 0.0676, + "step": 41845 + }, + { + "epoch": 1.95, + "learning_rate": 1.3599611242612828e-05, + "loss": 0.0566, + "step": 41850 + }, + { + "epoch": 1.95, + "learning_rate": 1.3598827457558042e-05, + "loss": 0.0765, + "step": 41855 + }, + { + "epoch": 1.95, + "learning_rate": 1.3598043672503254e-05, + "loss": 0.1108, + "step": 41860 + }, + { + "epoch": 1.95, + "learning_rate": 1.3597259887448466e-05, + "loss": 0.1186, + "step": 41865 + }, + { + "epoch": 1.95, + "learning_rate": 1.3596476102393682e-05, + "loss": 0.1556, + "step": 41870 + }, + { + "epoch": 1.95, + "learning_rate": 1.3595692317338894e-05, + "loss": 0.16, + "step": 41875 + }, + { + "epoch": 1.95, + "learning_rate": 1.3594908532284108e-05, + "loss": 0.3715, + "step": 41880 + }, + { + "epoch": 1.95, + "learning_rate": 1.359412474722932e-05, + "loss": 0.272, + "step": 41885 + }, + { + "epoch": 1.95, + "learning_rate": 1.3593340962174535e-05, + "loss": 0.0219, + "step": 41890 + }, + { + "epoch": 1.95, + "learning_rate": 1.3592557177119748e-05, + "loss": 0.0154, + "step": 41895 + }, + { + "epoch": 1.96, + "learning_rate": 1.3591773392064962e-05, + "loss": 0.1363, + "step": 41900 + }, + { + "epoch": 1.96, + "learning_rate": 1.3590989607010174e-05, + "loss": 0.074, + "step": 41905 + }, + { + "epoch": 1.96, + "learning_rate": 1.359020582195539e-05, + "loss": 0.1535, + "step": 41910 + }, + { + "epoch": 1.96, + "learning_rate": 1.3589422036900602e-05, + "loss": 0.0951, + "step": 41915 + }, + { + "epoch": 1.96, + "learning_rate": 1.3588638251845816e-05, + "loss": 0.1372, + "step": 41920 + }, + { + "epoch": 1.96, + "learning_rate": 1.3587854466791028e-05, + "loss": 0.2103, + "step": 41925 + }, + { + "epoch": 1.96, + "learning_rate": 1.358707068173624e-05, + "loss": 0.1851, + "step": 41930 + }, + { + "epoch": 1.96, + "learning_rate": 1.3586286896681456e-05, + "loss": 0.3416, + "step": 41935 + }, + { + "epoch": 1.96, + "learning_rate": 1.3585503111626668e-05, + "loss": 0.0271, + "step": 41940 + }, + { + "epoch": 1.96, + "learning_rate": 1.3584719326571882e-05, + "loss": 0.0605, + "step": 41945 + }, + { + "epoch": 1.96, + "learning_rate": 1.3583935541517096e-05, + "loss": 0.0437, + "step": 41950 + }, + { + "epoch": 1.96, + "learning_rate": 1.358315175646231e-05, + "loss": 0.0778, + "step": 41955 + }, + { + "epoch": 1.96, + "learning_rate": 1.3582367971407522e-05, + "loss": 0.0917, + "step": 41960 + }, + { + "epoch": 1.96, + "learning_rate": 1.3581584186352737e-05, + "loss": 0.1232, + "step": 41965 + }, + { + "epoch": 1.96, + "learning_rate": 1.358080040129795e-05, + "loss": 0.236, + "step": 41970 + }, + { + "epoch": 1.96, + "learning_rate": 1.3580016616243163e-05, + "loss": 0.2185, + "step": 41975 + }, + { + "epoch": 1.96, + "learning_rate": 1.3579232831188376e-05, + "loss": 0.393, + "step": 41980 + }, + { + "epoch": 1.96, + "learning_rate": 1.3578449046133591e-05, + "loss": 0.3043, + "step": 41985 + }, + { + "epoch": 1.96, + "learning_rate": 1.3577665261078803e-05, + "loss": 0.0506, + "step": 41990 + }, + { + "epoch": 1.96, + "learning_rate": 1.3576881476024016e-05, + "loss": 0.0796, + "step": 41995 + }, + { + "epoch": 1.96, + "learning_rate": 1.357609769096923e-05, + "loss": 0.0657, + "step": 42000 + }, + { + "epoch": 1.96, + "learning_rate": 1.3575313905914442e-05, + "loss": 0.0712, + "step": 42005 + }, + { + "epoch": 1.96, + "learning_rate": 1.3574530120859657e-05, + "loss": 0.1189, + "step": 42010 + }, + { + "epoch": 1.96, + "learning_rate": 1.357374633580487e-05, + "loss": 0.157, + "step": 42015 + }, + { + "epoch": 1.96, + "learning_rate": 1.3572962550750083e-05, + "loss": 0.1085, + "step": 42020 + }, + { + "epoch": 1.96, + "learning_rate": 1.3572178765695296e-05, + "loss": 0.2305, + "step": 42025 + }, + { + "epoch": 1.96, + "learning_rate": 1.3571394980640511e-05, + "loss": 0.3907, + "step": 42030 + }, + { + "epoch": 1.96, + "learning_rate": 1.3570611195585723e-05, + "loss": 0.2425, + "step": 42035 + }, + { + "epoch": 1.96, + "learning_rate": 1.3569827410530937e-05, + "loss": 0.039, + "step": 42040 + }, + { + "epoch": 1.96, + "learning_rate": 1.356904362547615e-05, + "loss": 0.0644, + "step": 42045 + }, + { + "epoch": 1.96, + "learning_rate": 1.3568259840421365e-05, + "loss": 0.0367, + "step": 42050 + }, + { + "epoch": 1.96, + "learning_rate": 1.3567476055366577e-05, + "loss": 0.0325, + "step": 42055 + }, + { + "epoch": 1.96, + "learning_rate": 1.356669227031179e-05, + "loss": 0.1135, + "step": 42060 + }, + { + "epoch": 1.96, + "learning_rate": 1.3565908485257005e-05, + "loss": 0.1177, + "step": 42065 + }, + { + "epoch": 1.96, + "learning_rate": 1.3565124700202217e-05, + "loss": 0.1744, + "step": 42070 + }, + { + "epoch": 1.96, + "learning_rate": 1.3564340915147431e-05, + "loss": 0.267, + "step": 42075 + }, + { + "epoch": 1.96, + "learning_rate": 1.3563557130092644e-05, + "loss": 0.2601, + "step": 42080 + }, + { + "epoch": 1.96, + "learning_rate": 1.3562773345037859e-05, + "loss": 0.3815, + "step": 42085 + }, + { + "epoch": 1.96, + "learning_rate": 1.3561989559983071e-05, + "loss": 0.0435, + "step": 42090 + }, + { + "epoch": 1.96, + "learning_rate": 1.3561205774928285e-05, + "loss": 0.0452, + "step": 42095 + }, + { + "epoch": 1.96, + "learning_rate": 1.3560421989873497e-05, + "loss": 0.1247, + "step": 42100 + }, + { + "epoch": 1.96, + "learning_rate": 1.3559638204818713e-05, + "loss": 0.0745, + "step": 42105 + }, + { + "epoch": 1.96, + "learning_rate": 1.3558854419763925e-05, + "loss": 0.1184, + "step": 42110 + }, + { + "epoch": 1.97, + "learning_rate": 1.355807063470914e-05, + "loss": 0.1522, + "step": 42115 + }, + { + "epoch": 1.97, + "learning_rate": 1.3557286849654351e-05, + "loss": 0.1299, + "step": 42120 + }, + { + "epoch": 1.97, + "learning_rate": 1.3556503064599564e-05, + "loss": 0.2057, + "step": 42125 + }, + { + "epoch": 1.97, + "learning_rate": 1.355571927954478e-05, + "loss": 0.2441, + "step": 42130 + }, + { + "epoch": 1.97, + "learning_rate": 1.3554935494489991e-05, + "loss": 0.2413, + "step": 42135 + }, + { + "epoch": 1.97, + "learning_rate": 1.3554151709435205e-05, + "loss": 0.063, + "step": 42140 + }, + { + "epoch": 1.97, + "learning_rate": 1.3553367924380418e-05, + "loss": 0.0286, + "step": 42145 + }, + { + "epoch": 1.97, + "learning_rate": 1.3552584139325633e-05, + "loss": 0.0437, + "step": 42150 + }, + { + "epoch": 1.97, + "learning_rate": 1.3551800354270845e-05, + "loss": 0.0742, + "step": 42155 + }, + { + "epoch": 1.97, + "learning_rate": 1.355101656921606e-05, + "loss": 0.1081, + "step": 42160 + }, + { + "epoch": 1.97, + "learning_rate": 1.3550232784161273e-05, + "loss": 0.1142, + "step": 42165 + }, + { + "epoch": 1.97, + "learning_rate": 1.3549448999106487e-05, + "loss": 0.1755, + "step": 42170 + }, + { + "epoch": 1.97, + "learning_rate": 1.35486652140517e-05, + "loss": 0.2252, + "step": 42175 + }, + { + "epoch": 1.97, + "learning_rate": 1.3547881428996915e-05, + "loss": 0.2728, + "step": 42180 + }, + { + "epoch": 1.97, + "learning_rate": 1.3547097643942127e-05, + "loss": 0.3548, + "step": 42185 + }, + { + "epoch": 1.97, + "learning_rate": 1.354631385888734e-05, + "loss": 0.1084, + "step": 42190 + }, + { + "epoch": 1.97, + "learning_rate": 1.3545530073832553e-05, + "loss": 0.0751, + "step": 42195 + }, + { + "epoch": 1.97, + "learning_rate": 1.3544746288777765e-05, + "loss": 0.1289, + "step": 42200 + }, + { + "epoch": 1.97, + "learning_rate": 1.3543962503722981e-05, + "loss": 0.1643, + "step": 42205 + }, + { + "epoch": 1.97, + "learning_rate": 1.3543178718668193e-05, + "loss": 0.1434, + "step": 42210 + }, + { + "epoch": 1.97, + "learning_rate": 1.3542394933613407e-05, + "loss": 0.1457, + "step": 42215 + }, + { + "epoch": 1.97, + "learning_rate": 1.354161114855862e-05, + "loss": 0.1197, + "step": 42220 + }, + { + "epoch": 1.97, + "learning_rate": 1.3540827363503835e-05, + "loss": 0.3008, + "step": 42225 + }, + { + "epoch": 1.97, + "learning_rate": 1.3540043578449047e-05, + "loss": 0.2925, + "step": 42230 + }, + { + "epoch": 1.97, + "learning_rate": 1.3539259793394261e-05, + "loss": 0.3225, + "step": 42235 + }, + { + "epoch": 1.97, + "learning_rate": 1.3538476008339473e-05, + "loss": 0.0174, + "step": 42240 + }, + { + "epoch": 1.97, + "learning_rate": 1.3537692223284689e-05, + "loss": 0.0662, + "step": 42245 + }, + { + "epoch": 1.97, + "learning_rate": 1.3536908438229901e-05, + "loss": 0.0883, + "step": 42250 + }, + { + "epoch": 1.97, + "learning_rate": 1.3536124653175113e-05, + "loss": 0.1134, + "step": 42255 + }, + { + "epoch": 1.97, + "learning_rate": 1.3535340868120327e-05, + "loss": 0.0576, + "step": 42260 + }, + { + "epoch": 1.97, + "learning_rate": 1.3534557083065541e-05, + "loss": 0.1248, + "step": 42265 + }, + { + "epoch": 1.97, + "learning_rate": 1.3533773298010755e-05, + "loss": 0.1739, + "step": 42270 + }, + { + "epoch": 1.97, + "learning_rate": 1.3532989512955967e-05, + "loss": 0.1615, + "step": 42275 + }, + { + "epoch": 1.97, + "learning_rate": 1.3532205727901183e-05, + "loss": 0.2108, + "step": 42280 + }, + { + "epoch": 1.97, + "learning_rate": 1.3531421942846395e-05, + "loss": 0.1906, + "step": 42285 + }, + { + "epoch": 1.97, + "learning_rate": 1.3530638157791609e-05, + "loss": 0.0301, + "step": 42290 + }, + { + "epoch": 1.97, + "learning_rate": 1.3529854372736821e-05, + "loss": 0.0248, + "step": 42295 + }, + { + "epoch": 1.97, + "learning_rate": 1.3529070587682037e-05, + "loss": 0.0653, + "step": 42300 + }, + { + "epoch": 1.97, + "learning_rate": 1.3528286802627249e-05, + "loss": 0.1097, + "step": 42305 + }, + { + "epoch": 1.97, + "learning_rate": 1.3527503017572463e-05, + "loss": 0.1149, + "step": 42310 + }, + { + "epoch": 1.97, + "learning_rate": 1.3526719232517675e-05, + "loss": 0.1353, + "step": 42315 + }, + { + "epoch": 1.97, + "learning_rate": 1.3525935447462887e-05, + "loss": 0.1043, + "step": 42320 + }, + { + "epoch": 1.97, + "learning_rate": 1.3525151662408103e-05, + "loss": 0.3242, + "step": 42325 + }, + { + "epoch": 1.98, + "learning_rate": 1.3524367877353315e-05, + "loss": 0.2849, + "step": 42330 + }, + { + "epoch": 1.98, + "learning_rate": 1.3523584092298529e-05, + "loss": 0.3416, + "step": 42335 + }, + { + "epoch": 1.98, + "learning_rate": 1.3522800307243741e-05, + "loss": 0.0547, + "step": 42340 + }, + { + "epoch": 1.98, + "learning_rate": 1.3522016522188957e-05, + "loss": 0.0568, + "step": 42345 + }, + { + "epoch": 1.98, + "learning_rate": 1.3521232737134169e-05, + "loss": 0.0978, + "step": 42350 + }, + { + "epoch": 1.98, + "learning_rate": 1.3520448952079383e-05, + "loss": 0.1457, + "step": 42355 + }, + { + "epoch": 1.98, + "learning_rate": 1.3519665167024595e-05, + "loss": 0.0819, + "step": 42360 + }, + { + "epoch": 1.98, + "learning_rate": 1.351888138196981e-05, + "loss": 0.1787, + "step": 42365 + }, + { + "epoch": 1.98, + "learning_rate": 1.3518097596915023e-05, + "loss": 0.1941, + "step": 42370 + }, + { + "epoch": 1.98, + "learning_rate": 1.3517313811860237e-05, + "loss": 0.2267, + "step": 42375 + }, + { + "epoch": 1.98, + "learning_rate": 1.351653002680545e-05, + "loss": 0.377, + "step": 42380 + }, + { + "epoch": 1.98, + "learning_rate": 1.3515746241750663e-05, + "loss": 0.2612, + "step": 42385 + }, + { + "epoch": 1.98, + "learning_rate": 1.3514962456695877e-05, + "loss": 0.0406, + "step": 42390 + }, + { + "epoch": 1.98, + "learning_rate": 1.3514178671641089e-05, + "loss": 0.0252, + "step": 42395 + }, + { + "epoch": 1.98, + "learning_rate": 1.3513394886586305e-05, + "loss": 0.0102, + "step": 42400 + }, + { + "epoch": 1.98, + "learning_rate": 1.3512611101531517e-05, + "loss": 0.1246, + "step": 42405 + }, + { + "epoch": 1.98, + "learning_rate": 1.351182731647673e-05, + "loss": 0.1436, + "step": 42410 + }, + { + "epoch": 1.98, + "learning_rate": 1.3511043531421943e-05, + "loss": 0.1284, + "step": 42415 + }, + { + "epoch": 1.98, + "learning_rate": 1.3510259746367159e-05, + "loss": 0.1715, + "step": 42420 + }, + { + "epoch": 1.98, + "learning_rate": 1.3509475961312371e-05, + "loss": 0.2565, + "step": 42425 + }, + { + "epoch": 1.98, + "learning_rate": 1.3508692176257585e-05, + "loss": 0.2379, + "step": 42430 + }, + { + "epoch": 1.98, + "learning_rate": 1.3507908391202797e-05, + "loss": 0.1811, + "step": 42435 + }, + { + "epoch": 1.98, + "learning_rate": 1.3507124606148013e-05, + "loss": 0.0796, + "step": 42440 + }, + { + "epoch": 1.98, + "learning_rate": 1.3506340821093225e-05, + "loss": 0.0504, + "step": 42445 + }, + { + "epoch": 1.98, + "learning_rate": 1.3505557036038437e-05, + "loss": 0.1026, + "step": 42450 + }, + { + "epoch": 1.98, + "learning_rate": 1.3504773250983651e-05, + "loss": 0.0687, + "step": 42455 + }, + { + "epoch": 1.98, + "learning_rate": 1.3503989465928863e-05, + "loss": 0.0637, + "step": 42460 + }, + { + "epoch": 1.98, + "learning_rate": 1.3503205680874079e-05, + "loss": 0.0864, + "step": 42465 + }, + { + "epoch": 1.98, + "learning_rate": 1.3502421895819291e-05, + "loss": 0.1525, + "step": 42470 + }, + { + "epoch": 1.98, + "learning_rate": 1.3501638110764505e-05, + "loss": 0.1946, + "step": 42475 + }, + { + "epoch": 1.98, + "learning_rate": 1.3500854325709719e-05, + "loss": 0.4185, + "step": 42480 + }, + { + "epoch": 1.98, + "learning_rate": 1.3500070540654933e-05, + "loss": 0.2679, + "step": 42485 + }, + { + "epoch": 1.98, + "learning_rate": 1.3499286755600145e-05, + "loss": 0.0422, + "step": 42490 + }, + { + "epoch": 1.98, + "learning_rate": 1.349850297054536e-05, + "loss": 0.1082, + "step": 42495 + }, + { + "epoch": 1.98, + "learning_rate": 1.3497719185490573e-05, + "loss": 0.0823, + "step": 42500 + }, + { + "epoch": 1.98, + "learning_rate": 1.3496935400435787e-05, + "loss": 0.0491, + "step": 42505 + }, + { + "epoch": 1.98, + "learning_rate": 1.3496151615380999e-05, + "loss": 0.1773, + "step": 42510 + }, + { + "epoch": 1.98, + "learning_rate": 1.3495367830326211e-05, + "loss": 0.1344, + "step": 42515 + }, + { + "epoch": 1.98, + "learning_rate": 1.3494584045271427e-05, + "loss": 0.0834, + "step": 42520 + }, + { + "epoch": 1.98, + "learning_rate": 1.3493800260216639e-05, + "loss": 0.2854, + "step": 42525 + }, + { + "epoch": 1.98, + "learning_rate": 1.3493016475161853e-05, + "loss": 0.3165, + "step": 42530 + }, + { + "epoch": 1.98, + "learning_rate": 1.3492232690107065e-05, + "loss": 0.2865, + "step": 42535 + }, + { + "epoch": 1.98, + "learning_rate": 1.349144890505228e-05, + "loss": 0.0551, + "step": 42540 + }, + { + "epoch": 1.99, + "learning_rate": 1.3490665119997493e-05, + "loss": 0.0634, + "step": 42545 + }, + { + "epoch": 1.99, + "learning_rate": 1.3489881334942707e-05, + "loss": 0.0897, + "step": 42550 + }, + { + "epoch": 1.99, + "learning_rate": 1.3489097549887919e-05, + "loss": 0.1488, + "step": 42555 + }, + { + "epoch": 1.99, + "learning_rate": 1.3488313764833134e-05, + "loss": 0.1682, + "step": 42560 + }, + { + "epoch": 1.99, + "learning_rate": 1.3487529979778347e-05, + "loss": 0.1566, + "step": 42565 + }, + { + "epoch": 1.99, + "learning_rate": 1.348674619472356e-05, + "loss": 0.1352, + "step": 42570 + }, + { + "epoch": 1.99, + "learning_rate": 1.3485962409668773e-05, + "loss": 0.2381, + "step": 42575 + }, + { + "epoch": 1.99, + "learning_rate": 1.3485178624613987e-05, + "loss": 0.3136, + "step": 42580 + }, + { + "epoch": 1.99, + "learning_rate": 1.34843948395592e-05, + "loss": 0.2361, + "step": 42585 + }, + { + "epoch": 1.99, + "learning_rate": 1.3483611054504413e-05, + "loss": 0.0656, + "step": 42590 + }, + { + "epoch": 1.99, + "learning_rate": 1.3482827269449628e-05, + "loss": 0.0353, + "step": 42595 + }, + { + "epoch": 1.99, + "learning_rate": 1.348204348439484e-05, + "loss": 0.0433, + "step": 42600 + }, + { + "epoch": 1.99, + "learning_rate": 1.3481259699340055e-05, + "loss": 0.0876, + "step": 42605 + }, + { + "epoch": 1.99, + "learning_rate": 1.3480475914285267e-05, + "loss": 0.1082, + "step": 42610 + }, + { + "epoch": 1.99, + "learning_rate": 1.3479692129230482e-05, + "loss": 0.1904, + "step": 42615 + }, + { + "epoch": 1.99, + "learning_rate": 1.3478908344175695e-05, + "loss": 0.2017, + "step": 42620 + }, + { + "epoch": 1.99, + "learning_rate": 1.3478124559120908e-05, + "loss": 0.1698, + "step": 42625 + }, + { + "epoch": 1.99, + "learning_rate": 1.347734077406612e-05, + "loss": 0.4722, + "step": 42630 + }, + { + "epoch": 1.99, + "learning_rate": 1.3476556989011336e-05, + "loss": 0.2895, + "step": 42635 + }, + { + "epoch": 1.99, + "learning_rate": 1.3475773203956548e-05, + "loss": 0.0633, + "step": 42640 + }, + { + "epoch": 1.99, + "learning_rate": 1.347498941890176e-05, + "loss": 0.0354, + "step": 42645 + }, + { + "epoch": 1.99, + "learning_rate": 1.3474205633846975e-05, + "loss": 0.0921, + "step": 42650 + }, + { + "epoch": 1.99, + "learning_rate": 1.3473421848792187e-05, + "loss": 0.0774, + "step": 42655 + }, + { + "epoch": 1.99, + "learning_rate": 1.3472638063737402e-05, + "loss": 0.1163, + "step": 42660 + }, + { + "epoch": 1.99, + "learning_rate": 1.3471854278682615e-05, + "loss": 0.1109, + "step": 42665 + }, + { + "epoch": 1.99, + "learning_rate": 1.3471070493627829e-05, + "loss": 0.1748, + "step": 42670 + }, + { + "epoch": 1.99, + "learning_rate": 1.347028670857304e-05, + "loss": 0.1627, + "step": 42675 + }, + { + "epoch": 1.99, + "learning_rate": 1.3469502923518256e-05, + "loss": 0.3621, + "step": 42680 + }, + { + "epoch": 1.99, + "learning_rate": 1.3468719138463469e-05, + "loss": 0.193, + "step": 42685 + }, + { + "epoch": 1.99, + "learning_rate": 1.3467935353408682e-05, + "loss": 0.0512, + "step": 42690 + }, + { + "epoch": 1.99, + "learning_rate": 1.3467151568353896e-05, + "loss": 0.0666, + "step": 42695 + }, + { + "epoch": 1.99, + "learning_rate": 1.346636778329911e-05, + "loss": 0.1495, + "step": 42700 + }, + { + "epoch": 1.99, + "learning_rate": 1.3465583998244322e-05, + "loss": 0.13, + "step": 42705 + }, + { + "epoch": 1.99, + "learning_rate": 1.3464800213189535e-05, + "loss": 0.0941, + "step": 42710 + }, + { + "epoch": 1.99, + "learning_rate": 1.346401642813475e-05, + "loss": 0.1118, + "step": 42715 + }, + { + "epoch": 1.99, + "learning_rate": 1.3463232643079963e-05, + "loss": 0.1672, + "step": 42720 + }, + { + "epoch": 1.99, + "learning_rate": 1.3462448858025176e-05, + "loss": 0.2903, + "step": 42725 + }, + { + "epoch": 1.99, + "learning_rate": 1.3461665072970389e-05, + "loss": 0.5016, + "step": 42730 + }, + { + "epoch": 1.99, + "learning_rate": 1.3460881287915604e-05, + "loss": 0.3474, + "step": 42735 + }, + { + "epoch": 1.99, + "learning_rate": 1.3460097502860816e-05, + "loss": 0.0593, + "step": 42740 + }, + { + "epoch": 1.99, + "learning_rate": 1.345931371780603e-05, + "loss": 0.0545, + "step": 42745 + }, + { + "epoch": 1.99, + "learning_rate": 1.3458529932751243e-05, + "loss": 0.0924, + "step": 42750 + }, + { + "epoch": 2.0, + "learning_rate": 1.3457746147696458e-05, + "loss": 0.0801, + "step": 42755 + }, + { + "epoch": 2.0, + "learning_rate": 1.345696236264167e-05, + "loss": 0.0866, + "step": 42760 + }, + { + "epoch": 2.0, + "learning_rate": 1.3456178577586884e-05, + "loss": 0.1346, + "step": 42765 + }, + { + "epoch": 2.0, + "learning_rate": 1.3455394792532096e-05, + "loss": 0.1853, + "step": 42770 + }, + { + "epoch": 2.0, + "learning_rate": 1.3454611007477309e-05, + "loss": 0.1966, + "step": 42775 + }, + { + "epoch": 2.0, + "learning_rate": 1.3453827222422524e-05, + "loss": 0.238, + "step": 42780 + }, + { + "epoch": 2.0, + "learning_rate": 1.3453043437367737e-05, + "loss": 0.2965, + "step": 42785 + }, + { + "epoch": 2.0, + "learning_rate": 1.345225965231295e-05, + "loss": 0.1616, + "step": 42790 + }, + { + "epoch": 2.0, + "learning_rate": 1.3451475867258164e-05, + "loss": 0.0177, + "step": 42795 + }, + { + "epoch": 2.0, + "learning_rate": 1.3450692082203378e-05, + "loss": 0.0763, + "step": 42800 + }, + { + "epoch": 2.0, + "learning_rate": 1.344990829714859e-05, + "loss": 0.108, + "step": 42805 + }, + { + "epoch": 2.0, + "learning_rate": 1.3449124512093806e-05, + "loss": 0.0828, + "step": 42810 + }, + { + "epoch": 2.0, + "learning_rate": 1.3448340727039018e-05, + "loss": 0.0873, + "step": 42815 + }, + { + "epoch": 2.0, + "learning_rate": 1.3447556941984232e-05, + "loss": 0.1054, + "step": 42820 + }, + { + "epoch": 2.0, + "learning_rate": 1.3446773156929444e-05, + "loss": 0.2872, + "step": 42825 + }, + { + "epoch": 2.0, + "learning_rate": 1.344598937187466e-05, + "loss": 0.2569, + "step": 42830 + }, + { + "epoch": 2.0, + "learning_rate": 1.3445205586819872e-05, + "loss": 0.2157, + "step": 42835 + }, + { + "epoch": 2.0, + "learning_rate": 1.3444421801765084e-05, + "loss": 0.074, + "step": 42840 + }, + { + "epoch": 2.0, + "learning_rate": 1.3443638016710298e-05, + "loss": 0.1238, + "step": 42845 + }, + { + "epoch": 2.0, + "learning_rate": 1.344285423165551e-05, + "loss": 0.1821, + "step": 42850 + }, + { + "epoch": 2.0, + "learning_rate": 1.3442070446600726e-05, + "loss": 0.0898, + "step": 42855 + }, + { + "epoch": 2.0, + "learning_rate": 1.3441286661545938e-05, + "loss": 0.269, + "step": 42860 + }, + { + "epoch": 2.0, + "eval_cer": 0.015201216458283512, + "eval_loss": 0.34811559319496155, + "eval_runtime": 472.4357, + "eval_samples_per_second": 40.323, + "eval_steps_per_second": 5.042, + "eval_wer": 0.12934863064396743, + "step": 42862 + } + ], + "max_steps": 128586, + "num_train_epochs": 6, + "total_flos": 5.144893173595299e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-42862/training_args.bin b/checkpoint-42862/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..36cc7cb27194c4763ad57ba9f820c49b1d0a2bcf --- /dev/null +++ b/checkpoint-42862/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35a655ca2fa82ac80a7162e5149caad102a189b97deb1fba1f94f21e15657a07 +size 3055 diff --git a/checkpoint-64293/config.json b/checkpoint-64293/config.json new file mode 100644 index 0000000000000000000000000000000000000000..382a3e79497e514ac876eee8114c7079c255a204 --- /dev/null +++ b/checkpoint-64293/config.json @@ -0,0 +1,109 @@ +{ + "_name_or_path": "facebook/wav2vec2-base-960h", + "activation_dropout": 0.1, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForCTC" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "group", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.1, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 12, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 12, + "num_negatives": 100, + "output_hidden_size": 768, + "pad_token_id": 0, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} diff --git a/checkpoint-64293/optimizer.pt b/checkpoint-64293/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c9b416b9007d8eec37d02a06472b67262728c90 --- /dev/null +++ b/checkpoint-64293/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70f449fd91f5b3b5c068dcd9756404426ee3dbd001cee1939591b0a31072a6f2 +size 1847481 diff --git a/checkpoint-64293/preprocessor_config.json b/checkpoint-64293/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a0b7227fc1d916e469b14f6c154ad6dfea1e6891 --- /dev/null +++ b/checkpoint-64293/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-64293/pytorch_model.bin b/checkpoint-64293/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..dd1c66ac35d6cbed02c4733ac54a2af2daa159b9 --- /dev/null +++ b/checkpoint-64293/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abc8dd19eb7009f20d4447482fcddd4c2b4d655e4137b8cc06205b09e62140ae +size 377656855 diff --git a/checkpoint-64293/rng_state.pth b/checkpoint-64293/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4beda7ae970713f1f9902848891fcd62e47064b9 --- /dev/null +++ b/checkpoint-64293/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e85f46e4383b8da0d91aea3717549c81ffdf7df2199637091a8b68ba17dce0d +size 14567 diff --git a/checkpoint-64293/scaler.pt b/checkpoint-64293/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..70c934eb47aca7d31751bae28df1408565c10500 --- /dev/null +++ b/checkpoint-64293/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44902f468724d244832ae5f6d1760490e6a64fd9c3c730d0e2d8d928c92b34c +size 559 diff --git a/checkpoint-64293/scheduler.pt b/checkpoint-64293/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee385c4b005a869e15ccc300414e18cc9539abc9 --- /dev/null +++ b/checkpoint-64293/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a4d748ddd7c10bf15e3e21e6cbc27ddad74ba702753ba17867dcf56cc009b31 +size 623 diff --git a/checkpoint-64293/trainer_state.json b/checkpoint-64293/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b9872a8ea50989a110f8d9c24fad72f94a941585 --- /dev/null +++ b/checkpoint-64293/trainer_state.json @@ -0,0 +1,77194 @@ +{ + "best_metric": 0.03575053811073303, + "best_model_checkpoint": "wav2vec2-base-pem123-960h-la/checkpoint-64293", + "epoch": 3.0, + "global_step": 64293, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 6.000000000000001e-08, + "loss": 2.6531, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.6e-07, + "loss": 3.4824, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 2.6e-07, + "loss": 3.2682, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 3.6e-07, + "loss": 3.2567, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.6000000000000004e-07, + "loss": 3.5979, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 5.6e-07, + "loss": 3.3327, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 6.6e-07, + "loss": 3.7519, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 7.6e-07, + "loss": 3.5748, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 8.6e-07, + "loss": 3.5357, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 9.400000000000001e-07, + "loss": 3.4531, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.04e-06, + "loss": 2.5381, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 1.14e-06, + "loss": 2.9048, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 1.2400000000000002e-06, + "loss": 3.0937, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 1.34e-06, + "loss": 3.0091, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 1.44e-06, + "loss": 2.8452, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.54e-06, + "loss": 2.6674, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 1.6400000000000002e-06, + "loss": 2.9619, + "step": 85 + }, + { + "epoch": 0.0, + "learning_rate": 1.74e-06, + "loss": 2.7327, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 1.8400000000000002e-06, + "loss": 2.7925, + "step": 95 + }, + { + "epoch": 0.0, + "learning_rate": 1.94e-06, + "loss": 3.0929, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.04e-06, + "loss": 1.7821, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 2.1400000000000003e-06, + "loss": 1.9388, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 2.24e-06, + "loss": 2.1683, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 2.3400000000000005e-06, + "loss": 1.8805, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 2.4400000000000004e-06, + "loss": 2.0734, + "step": 125 + }, + { + "epoch": 0.01, + "learning_rate": 2.5400000000000002e-06, + "loss": 2.0576, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 2.64e-06, + "loss": 1.778, + "step": 135 + }, + { + "epoch": 0.01, + "learning_rate": 2.7400000000000004e-06, + "loss": 1.866, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 2.84e-06, + "loss": 2.0255, + "step": 145 + }, + { + "epoch": 0.01, + "learning_rate": 2.9400000000000002e-06, + "loss": 2.1399, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 3.04e-06, + "loss": 1.4145, + "step": 155 + }, + { + "epoch": 0.01, + "learning_rate": 3.1400000000000004e-06, + "loss": 1.2365, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 3.2400000000000003e-06, + "loss": 1.5569, + "step": 165 + }, + { + "epoch": 0.01, + "learning_rate": 3.3400000000000006e-06, + "loss": 1.6138, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 3.44e-06, + "loss": 1.3237, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.54e-06, + "loss": 1.3709, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 3.6400000000000003e-06, + "loss": 1.475, + "step": 185 + }, + { + "epoch": 0.01, + "learning_rate": 3.74e-06, + "loss": 1.5188, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 3.8400000000000005e-06, + "loss": 1.7965, + "step": 195 + }, + { + "epoch": 0.01, + "learning_rate": 3.94e-06, + "loss": 1.9079, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.04e-06, + "loss": 1.1918, + "step": 205 + }, + { + "epoch": 0.01, + "learning_rate": 4.14e-06, + "loss": 0.9466, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 4.24e-06, + "loss": 1.186, + "step": 215 + }, + { + "epoch": 0.01, + "learning_rate": 4.34e-06, + "loss": 1.1864, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 4.440000000000001e-06, + "loss": 1.1844, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.540000000000001e-06, + "loss": 1.2449, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 4.6400000000000005e-06, + "loss": 1.5866, + "step": 235 + }, + { + "epoch": 0.01, + "learning_rate": 4.74e-06, + "loss": 1.3059, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 4.84e-06, + "loss": 1.4398, + "step": 245 + }, + { + "epoch": 0.01, + "learning_rate": 4.94e-06, + "loss": 1.8654, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.04e-06, + "loss": 1.2339, + "step": 255 + }, + { + "epoch": 0.01, + "learning_rate": 5.140000000000001e-06, + "loss": 0.8202, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 5.240000000000001e-06, + "loss": 1.151, + "step": 265 + }, + { + "epoch": 0.01, + "learning_rate": 5.3400000000000005e-06, + "loss": 1.1299, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 5.4400000000000004e-06, + "loss": 1.154, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.540000000000001e-06, + "loss": 1.2657, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 5.64e-06, + "loss": 1.3412, + "step": 285 + }, + { + "epoch": 0.01, + "learning_rate": 5.72e-06, + "loss": 1.2532, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 5.82e-06, + "loss": 1.5254, + "step": 295 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 1.9021, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.02e-06, + "loss": 1.2932, + "step": 305 + }, + { + "epoch": 0.01, + "learning_rate": 6.120000000000001e-06, + "loss": 0.882, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 6.220000000000001e-06, + "loss": 0.8607, + "step": 315 + }, + { + "epoch": 0.01, + "learning_rate": 6.3200000000000005e-06, + "loss": 0.9375, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 6.42e-06, + "loss": 1.0688, + "step": 325 + }, + { + "epoch": 0.02, + "learning_rate": 6.520000000000001e-06, + "loss": 1.0282, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 6.620000000000001e-06, + "loss": 1.1712, + "step": 335 + }, + { + "epoch": 0.02, + "learning_rate": 6.720000000000001e-06, + "loss": 1.3186, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 6.820000000000001e-06, + "loss": 1.3102, + "step": 345 + }, + { + "epoch": 0.02, + "learning_rate": 6.9e-06, + "loss": 2.0291, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 7e-06, + "loss": 1.0834, + "step": 355 + }, + { + "epoch": 0.02, + "learning_rate": 7.100000000000001e-06, + "loss": 0.7925, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 7.2000000000000005e-06, + "loss": 0.9559, + "step": 365 + }, + { + "epoch": 0.02, + "learning_rate": 7.3e-06, + "loss": 0.9066, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 7.4e-06, + "loss": 1.0408, + "step": 375 + }, + { + "epoch": 0.02, + "learning_rate": 7.500000000000001e-06, + "loss": 1.0672, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 7.600000000000001e-06, + "loss": 1.3249, + "step": 385 + }, + { + "epoch": 0.02, + "learning_rate": 7.7e-06, + "loss": 1.3579, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 7.800000000000002e-06, + "loss": 1.4037, + "step": 395 + }, + { + "epoch": 0.02, + "learning_rate": 7.9e-06, + "loss": 1.5432, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.9596, + "step": 405 + }, + { + "epoch": 0.02, + "learning_rate": 8.1e-06, + "loss": 0.6342, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 8.2e-06, + "loss": 0.8461, + "step": 415 + }, + { + "epoch": 0.02, + "learning_rate": 8.3e-06, + "loss": 0.9826, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 8.400000000000001e-06, + "loss": 0.9279, + "step": 425 + }, + { + "epoch": 0.02, + "learning_rate": 8.5e-06, + "loss": 0.8814, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 8.6e-06, + "loss": 1.1263, + "step": 435 + }, + { + "epoch": 0.02, + "learning_rate": 8.700000000000001e-06, + "loss": 1.0968, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 8.8e-06, + "loss": 1.2043, + "step": 445 + }, + { + "epoch": 0.02, + "learning_rate": 8.900000000000001e-06, + "loss": 1.5603, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 0.9924, + "step": 455 + }, + { + "epoch": 0.02, + "learning_rate": 9.100000000000001e-06, + "loss": 0.7293, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 9.200000000000002e-06, + "loss": 0.7576, + "step": 465 + }, + { + "epoch": 0.02, + "learning_rate": 9.3e-06, + "loss": 0.7923, + "step": 470 + }, + { + "epoch": 0.02, + "learning_rate": 9.4e-06, + "loss": 0.8264, + "step": 475 + }, + { + "epoch": 0.02, + "learning_rate": 9.5e-06, + "loss": 0.8031, + "step": 480 + }, + { + "epoch": 0.02, + "learning_rate": 9.600000000000001e-06, + "loss": 1.2293, + "step": 485 + }, + { + "epoch": 0.02, + "learning_rate": 9.7e-06, + "loss": 0.9651, + "step": 490 + }, + { + "epoch": 0.02, + "learning_rate": 9.800000000000001e-06, + "loss": 1.3314, + "step": 495 + }, + { + "epoch": 0.02, + "learning_rate": 9.9e-06, + "loss": 1.4383, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 1e-05, + "loss": 0.9384, + "step": 505 + }, + { + "epoch": 0.02, + "learning_rate": 1.0100000000000002e-05, + "loss": 0.6344, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 1.02e-05, + "loss": 0.8903, + "step": 515 + }, + { + "epoch": 0.02, + "learning_rate": 1.0300000000000001e-05, + "loss": 0.8112, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 1.04e-05, + "loss": 0.9797, + "step": 525 + }, + { + "epoch": 0.02, + "learning_rate": 1.0500000000000001e-05, + "loss": 0.7961, + "step": 530 + }, + { + "epoch": 0.02, + "learning_rate": 1.0600000000000002e-05, + "loss": 1.0021, + "step": 535 + }, + { + "epoch": 0.03, + "learning_rate": 1.0700000000000001e-05, + "loss": 1.111, + "step": 540 + }, + { + "epoch": 0.03, + "learning_rate": 1.0800000000000002e-05, + "loss": 1.0121, + "step": 545 + }, + { + "epoch": 0.03, + "learning_rate": 1.0900000000000002e-05, + "loss": 1.3162, + "step": 550 + }, + { + "epoch": 0.03, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.8775, + "step": 555 + }, + { + "epoch": 0.03, + "learning_rate": 1.1100000000000002e-05, + "loss": 0.6268, + "step": 560 + }, + { + "epoch": 0.03, + "learning_rate": 1.1200000000000001e-05, + "loss": 0.6093, + "step": 565 + }, + { + "epoch": 0.03, + "learning_rate": 1.13e-05, + "loss": 0.6371, + "step": 570 + }, + { + "epoch": 0.03, + "learning_rate": 1.14e-05, + "loss": 0.7299, + "step": 575 + }, + { + "epoch": 0.03, + "learning_rate": 1.15e-05, + "loss": 0.8892, + "step": 580 + }, + { + "epoch": 0.03, + "learning_rate": 1.16e-05, + "loss": 0.8902, + "step": 585 + }, + { + "epoch": 0.03, + "learning_rate": 1.17e-05, + "loss": 1.1263, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 1.18e-05, + "loss": 1.2628, + "step": 595 + }, + { + "epoch": 0.03, + "learning_rate": 1.1900000000000001e-05, + "loss": 1.4236, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 0.8066, + "step": 605 + }, + { + "epoch": 0.03, + "learning_rate": 1.2100000000000001e-05, + "loss": 0.6171, + "step": 610 + }, + { + "epoch": 0.03, + "learning_rate": 1.22e-05, + "loss": 0.6193, + "step": 615 + }, + { + "epoch": 0.03, + "learning_rate": 1.23e-05, + "loss": 0.7038, + "step": 620 + }, + { + "epoch": 0.03, + "learning_rate": 1.2400000000000002e-05, + "loss": 0.7382, + "step": 625 + }, + { + "epoch": 0.03, + "learning_rate": 1.25e-05, + "loss": 0.8153, + "step": 630 + }, + { + "epoch": 0.03, + "learning_rate": 1.2600000000000001e-05, + "loss": 0.8639, + "step": 635 + }, + { + "epoch": 0.03, + "learning_rate": 1.27e-05, + "loss": 0.985, + "step": 640 + }, + { + "epoch": 0.03, + "learning_rate": 1.2800000000000001e-05, + "loss": 0.9144, + "step": 645 + }, + { + "epoch": 0.03, + "learning_rate": 1.2900000000000002e-05, + "loss": 1.2459, + "step": 650 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.8517, + "step": 655 + }, + { + "epoch": 0.03, + "learning_rate": 1.3100000000000002e-05, + "loss": 0.4846, + "step": 660 + }, + { + "epoch": 0.03, + "learning_rate": 1.3200000000000002e-05, + "loss": 0.5826, + "step": 665 + }, + { + "epoch": 0.03, + "learning_rate": 1.3300000000000001e-05, + "loss": 0.7343, + "step": 670 + }, + { + "epoch": 0.03, + "learning_rate": 1.3400000000000002e-05, + "loss": 0.7328, + "step": 675 + }, + { + "epoch": 0.03, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.6546, + "step": 680 + }, + { + "epoch": 0.03, + "learning_rate": 1.3600000000000002e-05, + "loss": 0.8793, + "step": 685 + }, + { + "epoch": 0.03, + "learning_rate": 1.3700000000000003e-05, + "loss": 0.8999, + "step": 690 + }, + { + "epoch": 0.03, + "learning_rate": 1.38e-05, + "loss": 1.1491, + "step": 695 + }, + { + "epoch": 0.03, + "learning_rate": 1.39e-05, + "loss": 1.377, + "step": 700 + }, + { + "epoch": 0.03, + "learning_rate": 1.4e-05, + "loss": 0.7843, + "step": 705 + }, + { + "epoch": 0.03, + "learning_rate": 1.41e-05, + "loss": 0.622, + "step": 710 + }, + { + "epoch": 0.03, + "learning_rate": 1.4200000000000001e-05, + "loss": 0.5346, + "step": 715 + }, + { + "epoch": 0.03, + "learning_rate": 1.43e-05, + "loss": 0.6517, + "step": 720 + }, + { + "epoch": 0.03, + "learning_rate": 1.4400000000000001e-05, + "loss": 0.6661, + "step": 725 + }, + { + "epoch": 0.03, + "learning_rate": 1.45e-05, + "loss": 0.7379, + "step": 730 + }, + { + "epoch": 0.03, + "learning_rate": 1.46e-05, + "loss": 0.7839, + "step": 735 + }, + { + "epoch": 0.03, + "learning_rate": 1.4700000000000002e-05, + "loss": 1.0163, + "step": 740 + }, + { + "epoch": 0.03, + "learning_rate": 1.48e-05, + "loss": 0.9786, + "step": 745 + }, + { + "epoch": 0.03, + "learning_rate": 1.4900000000000001e-05, + "loss": 1.2756, + "step": 750 + }, + { + "epoch": 0.04, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.8289, + "step": 755 + }, + { + "epoch": 0.04, + "learning_rate": 1.5100000000000001e-05, + "loss": 0.5909, + "step": 760 + }, + { + "epoch": 0.04, + "learning_rate": 1.5200000000000002e-05, + "loss": 0.5347, + "step": 765 + }, + { + "epoch": 0.04, + "learning_rate": 1.5300000000000003e-05, + "loss": 0.7078, + "step": 770 + }, + { + "epoch": 0.04, + "learning_rate": 1.54e-05, + "loss": 0.6262, + "step": 775 + }, + { + "epoch": 0.04, + "learning_rate": 1.55e-05, + "loss": 0.8401, + "step": 780 + }, + { + "epoch": 0.04, + "learning_rate": 1.5600000000000003e-05, + "loss": 0.6788, + "step": 785 + }, + { + "epoch": 0.04, + "learning_rate": 1.5700000000000002e-05, + "loss": 0.9063, + "step": 790 + }, + { + "epoch": 0.04, + "learning_rate": 1.58e-05, + "loss": 0.9448, + "step": 795 + }, + { + "epoch": 0.04, + "learning_rate": 1.5900000000000004e-05, + "loss": 1.3078, + "step": 800 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7461, + "step": 805 + }, + { + "epoch": 0.04, + "learning_rate": 1.6100000000000002e-05, + "loss": 0.4522, + "step": 810 + }, + { + "epoch": 0.04, + "learning_rate": 1.62e-05, + "loss": 0.5883, + "step": 815 + }, + { + "epoch": 0.04, + "learning_rate": 1.63e-05, + "loss": 0.5923, + "step": 820 + }, + { + "epoch": 0.04, + "learning_rate": 1.64e-05, + "loss": 0.7269, + "step": 825 + }, + { + "epoch": 0.04, + "learning_rate": 1.65e-05, + "loss": 0.6916, + "step": 830 + }, + { + "epoch": 0.04, + "learning_rate": 1.66e-05, + "loss": 0.6976, + "step": 835 + }, + { + "epoch": 0.04, + "learning_rate": 1.67e-05, + "loss": 0.953, + "step": 840 + }, + { + "epoch": 0.04, + "learning_rate": 1.6800000000000002e-05, + "loss": 0.952, + "step": 845 + }, + { + "epoch": 0.04, + "learning_rate": 1.69e-05, + "loss": 1.4978, + "step": 850 + }, + { + "epoch": 0.04, + "learning_rate": 1.7e-05, + "loss": 0.8314, + "step": 855 + }, + { + "epoch": 0.04, + "learning_rate": 1.7100000000000002e-05, + "loss": 0.4179, + "step": 860 + }, + { + "epoch": 0.04, + "learning_rate": 1.72e-05, + "loss": 0.5123, + "step": 865 + }, + { + "epoch": 0.04, + "learning_rate": 1.73e-05, + "loss": 0.528, + "step": 870 + }, + { + "epoch": 0.04, + "learning_rate": 1.7400000000000003e-05, + "loss": 0.6553, + "step": 875 + }, + { + "epoch": 0.04, + "learning_rate": 1.7500000000000002e-05, + "loss": 0.8417, + "step": 880 + }, + { + "epoch": 0.04, + "learning_rate": 1.76e-05, + "loss": 0.7153, + "step": 885 + }, + { + "epoch": 0.04, + "learning_rate": 1.77e-05, + "loss": 0.6923, + "step": 890 + }, + { + "epoch": 0.04, + "learning_rate": 1.7800000000000002e-05, + "loss": 0.8491, + "step": 895 + }, + { + "epoch": 0.04, + "learning_rate": 1.79e-05, + "loss": 1.1041, + "step": 900 + }, + { + "epoch": 0.04, + "learning_rate": 1.8e-05, + "loss": 0.6685, + "step": 905 + }, + { + "epoch": 0.04, + "learning_rate": 1.8100000000000003e-05, + "loss": 0.467, + "step": 910 + }, + { + "epoch": 0.04, + "learning_rate": 1.8200000000000002e-05, + "loss": 0.478, + "step": 915 + }, + { + "epoch": 0.04, + "learning_rate": 1.83e-05, + "loss": 0.6318, + "step": 920 + }, + { + "epoch": 0.04, + "learning_rate": 1.8400000000000003e-05, + "loss": 0.5477, + "step": 925 + }, + { + "epoch": 0.04, + "learning_rate": 1.8500000000000002e-05, + "loss": 0.8122, + "step": 930 + }, + { + "epoch": 0.04, + "learning_rate": 1.86e-05, + "loss": 0.7658, + "step": 935 + }, + { + "epoch": 0.04, + "learning_rate": 1.8700000000000004e-05, + "loss": 0.8465, + "step": 940 + }, + { + "epoch": 0.04, + "learning_rate": 1.88e-05, + "loss": 0.8287, + "step": 945 + }, + { + "epoch": 0.04, + "learning_rate": 1.8900000000000002e-05, + "loss": 1.1613, + "step": 950 + }, + { + "epoch": 0.04, + "learning_rate": 1.9e-05, + "loss": 0.5815, + "step": 955 + }, + { + "epoch": 0.04, + "learning_rate": 1.91e-05, + "loss": 0.3932, + "step": 960 + }, + { + "epoch": 0.05, + "learning_rate": 1.9200000000000003e-05, + "loss": 0.3984, + "step": 965 + }, + { + "epoch": 0.05, + "learning_rate": 1.93e-05, + "loss": 0.5436, + "step": 970 + }, + { + "epoch": 0.05, + "learning_rate": 1.94e-05, + "loss": 0.5992, + "step": 975 + }, + { + "epoch": 0.05, + "learning_rate": 1.95e-05, + "loss": 0.6758, + "step": 980 + }, + { + "epoch": 0.05, + "learning_rate": 1.9600000000000002e-05, + "loss": 0.6634, + "step": 985 + }, + { + "epoch": 0.05, + "learning_rate": 1.97e-05, + "loss": 0.8048, + "step": 990 + }, + { + "epoch": 0.05, + "learning_rate": 1.98e-05, + "loss": 1.0593, + "step": 995 + }, + { + "epoch": 0.05, + "learning_rate": 1.9900000000000003e-05, + "loss": 1.3275, + "step": 1000 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 0.745, + "step": 1005 + }, + { + "epoch": 0.05, + "learning_rate": 1.9999216214945216e-05, + "loss": 0.4186, + "step": 1010 + }, + { + "epoch": 0.05, + "learning_rate": 1.999843242989043e-05, + "loss": 0.4657, + "step": 1015 + }, + { + "epoch": 0.05, + "learning_rate": 1.999764864483564e-05, + "loss": 0.3849, + "step": 1020 + }, + { + "epoch": 0.05, + "learning_rate": 1.9996864859780857e-05, + "loss": 0.6111, + "step": 1025 + }, + { + "epoch": 0.05, + "learning_rate": 1.9996081074726068e-05, + "loss": 0.5342, + "step": 1030 + }, + { + "epoch": 0.05, + "learning_rate": 1.999529728967128e-05, + "loss": 0.6535, + "step": 1035 + }, + { + "epoch": 0.05, + "learning_rate": 1.9994513504616496e-05, + "loss": 0.7321, + "step": 1040 + }, + { + "epoch": 0.05, + "learning_rate": 1.999372971956171e-05, + "loss": 0.7966, + "step": 1045 + }, + { + "epoch": 0.05, + "learning_rate": 1.9992945934506923e-05, + "loss": 1.1703, + "step": 1050 + }, + { + "epoch": 0.05, + "learning_rate": 1.9992162149452137e-05, + "loss": 0.6399, + "step": 1055 + }, + { + "epoch": 0.05, + "learning_rate": 1.9991378364397348e-05, + "loss": 0.3462, + "step": 1060 + }, + { + "epoch": 0.05, + "learning_rate": 1.9990594579342565e-05, + "loss": 0.4746, + "step": 1065 + }, + { + "epoch": 0.05, + "learning_rate": 1.9989810794287776e-05, + "loss": 0.4348, + "step": 1070 + }, + { + "epoch": 0.05, + "learning_rate": 1.998902700923299e-05, + "loss": 0.5812, + "step": 1075 + }, + { + "epoch": 0.05, + "learning_rate": 1.9988243224178203e-05, + "loss": 0.5924, + "step": 1080 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987459439123414e-05, + "loss": 0.6898, + "step": 1085 + }, + { + "epoch": 0.05, + "learning_rate": 1.998667565406863e-05, + "loss": 0.7083, + "step": 1090 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985891869013842e-05, + "loss": 0.9508, + "step": 1095 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985108083959056e-05, + "loss": 1.2479, + "step": 1100 + }, + { + "epoch": 0.05, + "learning_rate": 1.998432429890427e-05, + "loss": 0.6903, + "step": 1105 + }, + { + "epoch": 0.05, + "learning_rate": 1.9983540513849483e-05, + "loss": 0.3722, + "step": 1110 + }, + { + "epoch": 0.05, + "learning_rate": 1.9982756728794697e-05, + "loss": 0.4207, + "step": 1115 + }, + { + "epoch": 0.05, + "learning_rate": 1.998197294373991e-05, + "loss": 0.5722, + "step": 1120 + }, + { + "epoch": 0.05, + "learning_rate": 1.9981189158685125e-05, + "loss": 0.5865, + "step": 1125 + }, + { + "epoch": 0.05, + "learning_rate": 1.998040537363034e-05, + "loss": 0.4989, + "step": 1130 + }, + { + "epoch": 0.05, + "learning_rate": 1.997962158857555e-05, + "loss": 0.5998, + "step": 1135 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978837803520764e-05, + "loss": 0.9028, + "step": 1140 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978054018465977e-05, + "loss": 0.8579, + "step": 1145 + }, + { + "epoch": 0.05, + "learning_rate": 1.997727023341119e-05, + "loss": 1.2055, + "step": 1150 + }, + { + "epoch": 0.05, + "learning_rate": 1.9976486448356405e-05, + "loss": 0.6244, + "step": 1155 + }, + { + "epoch": 0.05, + "learning_rate": 1.9975702663301616e-05, + "loss": 0.3915, + "step": 1160 + }, + { + "epoch": 0.05, + "learning_rate": 1.9974918878246833e-05, + "loss": 0.4356, + "step": 1165 + }, + { + "epoch": 0.05, + "learning_rate": 1.9974135093192044e-05, + "loss": 0.5114, + "step": 1170 + }, + { + "epoch": 0.05, + "learning_rate": 1.9973351308137257e-05, + "loss": 0.5035, + "step": 1175 + }, + { + "epoch": 0.06, + "learning_rate": 1.997256752308247e-05, + "loss": 0.6821, + "step": 1180 + }, + { + "epoch": 0.06, + "learning_rate": 1.9971783738027685e-05, + "loss": 0.5494, + "step": 1185 + }, + { + "epoch": 0.06, + "learning_rate": 1.99709999529729e-05, + "loss": 0.6005, + "step": 1190 + }, + { + "epoch": 0.06, + "learning_rate": 1.9970216167918113e-05, + "loss": 0.8884, + "step": 1195 + }, + { + "epoch": 0.06, + "learning_rate": 1.9969432382863324e-05, + "loss": 0.9246, + "step": 1200 + }, + { + "epoch": 0.06, + "learning_rate": 1.9968648597808538e-05, + "loss": 0.5223, + "step": 1205 + }, + { + "epoch": 0.06, + "learning_rate": 1.996786481275375e-05, + "loss": 0.3661, + "step": 1210 + }, + { + "epoch": 0.06, + "learning_rate": 1.9967081027698965e-05, + "loss": 0.5004, + "step": 1215 + }, + { + "epoch": 0.06, + "learning_rate": 1.996629724264418e-05, + "loss": 0.4138, + "step": 1220 + }, + { + "epoch": 0.06, + "learning_rate": 1.9965513457589393e-05, + "loss": 0.6478, + "step": 1225 + }, + { + "epoch": 0.06, + "learning_rate": 1.9964729672534607e-05, + "loss": 0.5358, + "step": 1230 + }, + { + "epoch": 0.06, + "learning_rate": 1.9963945887479818e-05, + "loss": 0.664, + "step": 1235 + }, + { + "epoch": 0.06, + "learning_rate": 1.9963162102425035e-05, + "loss": 0.5272, + "step": 1240 + }, + { + "epoch": 0.06, + "learning_rate": 1.9962378317370245e-05, + "loss": 0.9186, + "step": 1245 + }, + { + "epoch": 0.06, + "learning_rate": 1.996159453231546e-05, + "loss": 1.262, + "step": 1250 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960810747260673e-05, + "loss": 0.5889, + "step": 1255 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960026962205887e-05, + "loss": 0.4323, + "step": 1260 + }, + { + "epoch": 0.06, + "learning_rate": 1.99592431771511e-05, + "loss": 0.3947, + "step": 1265 + }, + { + "epoch": 0.06, + "learning_rate": 1.995845939209631e-05, + "loss": 0.5637, + "step": 1270 + }, + { + "epoch": 0.06, + "learning_rate": 1.9957675607041525e-05, + "loss": 0.4977, + "step": 1275 + }, + { + "epoch": 0.06, + "learning_rate": 1.995689182198674e-05, + "loss": 0.5272, + "step": 1280 + }, + { + "epoch": 0.06, + "learning_rate": 1.9956108036931953e-05, + "loss": 0.5806, + "step": 1285 + }, + { + "epoch": 0.06, + "learning_rate": 1.9955324251877167e-05, + "loss": 0.7725, + "step": 1290 + }, + { + "epoch": 0.06, + "learning_rate": 1.995454046682238e-05, + "loss": 1.0007, + "step": 1295 + }, + { + "epoch": 0.06, + "learning_rate": 1.995375668176759e-05, + "loss": 1.2275, + "step": 1300 + }, + { + "epoch": 0.06, + "learning_rate": 1.995297289671281e-05, + "loss": 0.5902, + "step": 1305 + }, + { + "epoch": 0.06, + "learning_rate": 1.995218911165802e-05, + "loss": 0.3678, + "step": 1310 + }, + { + "epoch": 0.06, + "learning_rate": 1.9951405326603233e-05, + "loss": 0.3997, + "step": 1315 + }, + { + "epoch": 0.06, + "learning_rate": 1.9950621541548447e-05, + "loss": 0.3824, + "step": 1320 + }, + { + "epoch": 0.06, + "learning_rate": 1.994983775649366e-05, + "loss": 0.45, + "step": 1325 + }, + { + "epoch": 0.06, + "learning_rate": 1.9949053971438875e-05, + "loss": 0.4312, + "step": 1330 + }, + { + "epoch": 0.06, + "learning_rate": 1.9948270186384086e-05, + "loss": 0.6568, + "step": 1335 + }, + { + "epoch": 0.06, + "learning_rate": 1.9947486401329303e-05, + "loss": 0.7326, + "step": 1340 + }, + { + "epoch": 0.06, + "learning_rate": 1.9946702616274513e-05, + "loss": 1.6595, + "step": 1345 + }, + { + "epoch": 0.06, + "learning_rate": 1.9945918831219727e-05, + "loss": 1.1587, + "step": 1350 + }, + { + "epoch": 0.06, + "learning_rate": 1.994513504616494e-05, + "loss": 0.5287, + "step": 1355 + }, + { + "epoch": 0.06, + "learning_rate": 1.9944351261110155e-05, + "loss": 0.4248, + "step": 1360 + }, + { + "epoch": 0.06, + "learning_rate": 1.994356747605537e-05, + "loss": 0.4569, + "step": 1365 + }, + { + "epoch": 0.06, + "learning_rate": 1.9942783691000583e-05, + "loss": 0.4401, + "step": 1370 + }, + { + "epoch": 0.06, + "learning_rate": 1.9941999905945793e-05, + "loss": 0.475, + "step": 1375 + }, + { + "epoch": 0.06, + "learning_rate": 1.994121612089101e-05, + "loss": 0.5074, + "step": 1380 + }, + { + "epoch": 0.06, + "learning_rate": 1.994043233583622e-05, + "loss": 0.6305, + "step": 1385 + }, + { + "epoch": 0.06, + "learning_rate": 1.9939648550781435e-05, + "loss": 0.6242, + "step": 1390 + }, + { + "epoch": 0.07, + "learning_rate": 1.993886476572665e-05, + "loss": 0.6831, + "step": 1395 + }, + { + "epoch": 0.07, + "learning_rate": 1.993808098067186e-05, + "loss": 0.9001, + "step": 1400 + }, + { + "epoch": 0.07, + "learning_rate": 1.9937297195617077e-05, + "loss": 0.5865, + "step": 1405 + }, + { + "epoch": 0.07, + "learning_rate": 1.9936513410562287e-05, + "loss": 0.332, + "step": 1410 + }, + { + "epoch": 0.07, + "learning_rate": 1.99357296255075e-05, + "loss": 0.4441, + "step": 1415 + }, + { + "epoch": 0.07, + "learning_rate": 1.9934945840452715e-05, + "loss": 0.3415, + "step": 1420 + }, + { + "epoch": 0.07, + "learning_rate": 1.993416205539793e-05, + "loss": 0.4253, + "step": 1425 + }, + { + "epoch": 0.07, + "learning_rate": 1.9933378270343143e-05, + "loss": 0.594, + "step": 1430 + }, + { + "epoch": 0.07, + "learning_rate": 1.9932594485288357e-05, + "loss": 0.7563, + "step": 1435 + }, + { + "epoch": 0.07, + "learning_rate": 1.993181070023357e-05, + "loss": 0.7389, + "step": 1440 + }, + { + "epoch": 0.07, + "learning_rate": 1.9931026915178785e-05, + "loss": 0.8635, + "step": 1445 + }, + { + "epoch": 0.07, + "learning_rate": 1.9930243130123995e-05, + "loss": 1.0338, + "step": 1450 + }, + { + "epoch": 0.07, + "learning_rate": 1.9929459345069212e-05, + "loss": 0.4737, + "step": 1455 + }, + { + "epoch": 0.07, + "learning_rate": 1.9928675560014423e-05, + "loss": 0.2898, + "step": 1460 + }, + { + "epoch": 0.07, + "learning_rate": 1.9927891774959637e-05, + "loss": 0.3928, + "step": 1465 + }, + { + "epoch": 0.07, + "learning_rate": 1.992710798990485e-05, + "loss": 0.4041, + "step": 1470 + }, + { + "epoch": 0.07, + "learning_rate": 1.992632420485006e-05, + "loss": 0.4331, + "step": 1475 + }, + { + "epoch": 0.07, + "learning_rate": 1.992554041979528e-05, + "loss": 0.5609, + "step": 1480 + }, + { + "epoch": 0.07, + "learning_rate": 1.992475663474049e-05, + "loss": 0.545, + "step": 1485 + }, + { + "epoch": 0.07, + "learning_rate": 1.9923972849685703e-05, + "loss": 0.6846, + "step": 1490 + }, + { + "epoch": 0.07, + "learning_rate": 1.9923189064630917e-05, + "loss": 0.7526, + "step": 1495 + }, + { + "epoch": 0.07, + "learning_rate": 1.992240527957613e-05, + "loss": 1.0243, + "step": 1500 + }, + { + "epoch": 0.07, + "learning_rate": 1.9921621494521345e-05, + "loss": 0.5665, + "step": 1505 + }, + { + "epoch": 0.07, + "learning_rate": 1.992083770946656e-05, + "loss": 0.3, + "step": 1510 + }, + { + "epoch": 0.07, + "learning_rate": 1.992005392441177e-05, + "loss": 0.3819, + "step": 1515 + }, + { + "epoch": 0.07, + "learning_rate": 1.9919270139356986e-05, + "loss": 0.4012, + "step": 1520 + }, + { + "epoch": 0.07, + "learning_rate": 1.9918486354302197e-05, + "loss": 0.4596, + "step": 1525 + }, + { + "epoch": 0.07, + "learning_rate": 1.991770256924741e-05, + "loss": 0.5021, + "step": 1530 + }, + { + "epoch": 0.07, + "learning_rate": 1.9916918784192625e-05, + "loss": 0.4451, + "step": 1535 + }, + { + "epoch": 0.07, + "learning_rate": 1.991613499913784e-05, + "loss": 0.704, + "step": 1540 + }, + { + "epoch": 0.07, + "learning_rate": 1.9915351214083053e-05, + "loss": 0.6392, + "step": 1545 + }, + { + "epoch": 0.07, + "learning_rate": 1.9914567429028263e-05, + "loss": 1.1391, + "step": 1550 + }, + { + "epoch": 0.07, + "learning_rate": 1.991378364397348e-05, + "loss": 0.5388, + "step": 1555 + }, + { + "epoch": 0.07, + "learning_rate": 1.991299985891869e-05, + "loss": 0.3753, + "step": 1560 + }, + { + "epoch": 0.07, + "learning_rate": 1.9912216073863905e-05, + "loss": 0.3058, + "step": 1565 + }, + { + "epoch": 0.07, + "learning_rate": 1.991143228880912e-05, + "loss": 0.4206, + "step": 1570 + }, + { + "epoch": 0.07, + "learning_rate": 1.9910648503754333e-05, + "loss": 0.3922, + "step": 1575 + }, + { + "epoch": 0.07, + "learning_rate": 1.9909864718699547e-05, + "loss": 0.4388, + "step": 1580 + }, + { + "epoch": 0.07, + "learning_rate": 1.990908093364476e-05, + "loss": 0.485, + "step": 1585 + }, + { + "epoch": 0.07, + "learning_rate": 1.990829714858997e-05, + "loss": 0.7441, + "step": 1590 + }, + { + "epoch": 0.07, + "learning_rate": 1.9907513363535185e-05, + "loss": 0.7177, + "step": 1595 + }, + { + "epoch": 0.07, + "learning_rate": 1.99067295784804e-05, + "loss": 1.129, + "step": 1600 + }, + { + "epoch": 0.07, + "learning_rate": 1.9905945793425613e-05, + "loss": 0.4943, + "step": 1605 + }, + { + "epoch": 0.08, + "learning_rate": 1.9905162008370827e-05, + "loss": 0.299, + "step": 1610 + }, + { + "epoch": 0.08, + "learning_rate": 1.9904378223316037e-05, + "loss": 0.4898, + "step": 1615 + }, + { + "epoch": 0.08, + "learning_rate": 1.9903594438261254e-05, + "loss": 0.3973, + "step": 1620 + }, + { + "epoch": 0.08, + "learning_rate": 1.9902810653206465e-05, + "loss": 0.4336, + "step": 1625 + }, + { + "epoch": 0.08, + "learning_rate": 1.990202686815168e-05, + "loss": 0.4543, + "step": 1630 + }, + { + "epoch": 0.08, + "learning_rate": 1.9901243083096893e-05, + "loss": 0.4828, + "step": 1635 + }, + { + "epoch": 0.08, + "learning_rate": 1.9900459298042107e-05, + "loss": 0.7448, + "step": 1640 + }, + { + "epoch": 0.08, + "learning_rate": 1.989967551298732e-05, + "loss": 0.6852, + "step": 1645 + }, + { + "epoch": 0.08, + "learning_rate": 1.9898891727932534e-05, + "loss": 0.9936, + "step": 1650 + }, + { + "epoch": 0.08, + "learning_rate": 1.989810794287775e-05, + "loss": 0.4738, + "step": 1655 + }, + { + "epoch": 0.08, + "learning_rate": 1.989732415782296e-05, + "loss": 0.2747, + "step": 1660 + }, + { + "epoch": 0.08, + "learning_rate": 1.9896540372768173e-05, + "loss": 0.2839, + "step": 1665 + }, + { + "epoch": 0.08, + "learning_rate": 1.9895756587713387e-05, + "loss": 0.3596, + "step": 1670 + }, + { + "epoch": 0.08, + "learning_rate": 1.98949728026586e-05, + "loss": 0.4797, + "step": 1675 + }, + { + "epoch": 0.08, + "learning_rate": 1.9894189017603815e-05, + "loss": 0.4667, + "step": 1680 + }, + { + "epoch": 0.08, + "learning_rate": 1.989340523254903e-05, + "loss": 0.5085, + "step": 1685 + }, + { + "epoch": 0.08, + "learning_rate": 1.989262144749424e-05, + "loss": 0.6464, + "step": 1690 + }, + { + "epoch": 0.08, + "learning_rate": 1.9891837662439456e-05, + "loss": 0.7383, + "step": 1695 + }, + { + "epoch": 0.08, + "learning_rate": 1.9891053877384667e-05, + "loss": 1.1763, + "step": 1700 + }, + { + "epoch": 0.08, + "learning_rate": 1.989027009232988e-05, + "loss": 0.4347, + "step": 1705 + }, + { + "epoch": 0.08, + "learning_rate": 1.9889486307275095e-05, + "loss": 0.3092, + "step": 1710 + }, + { + "epoch": 0.08, + "learning_rate": 1.988870252222031e-05, + "loss": 0.3977, + "step": 1715 + }, + { + "epoch": 0.08, + "learning_rate": 1.9887918737165522e-05, + "loss": 0.3769, + "step": 1720 + }, + { + "epoch": 0.08, + "learning_rate": 1.9887134952110733e-05, + "loss": 0.4335, + "step": 1725 + }, + { + "epoch": 0.08, + "learning_rate": 1.9886351167055947e-05, + "loss": 0.5907, + "step": 1730 + }, + { + "epoch": 0.08, + "learning_rate": 1.988556738200116e-05, + "loss": 0.5505, + "step": 1735 + }, + { + "epoch": 0.08, + "learning_rate": 1.9884783596946375e-05, + "loss": 0.515, + "step": 1740 + }, + { + "epoch": 0.08, + "learning_rate": 1.988399981189159e-05, + "loss": 0.7169, + "step": 1745 + }, + { + "epoch": 0.08, + "learning_rate": 1.9883216026836802e-05, + "loss": 1.006, + "step": 1750 + }, + { + "epoch": 0.08, + "learning_rate": 1.9882432241782016e-05, + "loss": 0.5365, + "step": 1755 + }, + { + "epoch": 0.08, + "learning_rate": 1.988164845672723e-05, + "loss": 0.2144, + "step": 1760 + }, + { + "epoch": 0.08, + "learning_rate": 1.988086467167244e-05, + "loss": 0.4321, + "step": 1765 + }, + { + "epoch": 0.08, + "learning_rate": 1.9880080886617658e-05, + "loss": 0.4422, + "step": 1770 + }, + { + "epoch": 0.08, + "learning_rate": 1.987929710156287e-05, + "loss": 0.3549, + "step": 1775 + }, + { + "epoch": 0.08, + "learning_rate": 1.9878513316508082e-05, + "loss": 0.418, + "step": 1780 + }, + { + "epoch": 0.08, + "learning_rate": 1.9877729531453296e-05, + "loss": 0.5311, + "step": 1785 + }, + { + "epoch": 0.08, + "learning_rate": 1.9876945746398507e-05, + "loss": 0.5819, + "step": 1790 + }, + { + "epoch": 0.08, + "learning_rate": 1.9876161961343724e-05, + "loss": 0.7497, + "step": 1795 + }, + { + "epoch": 0.08, + "learning_rate": 1.9875378176288935e-05, + "loss": 1.267, + "step": 1800 + }, + { + "epoch": 0.08, + "learning_rate": 1.987459439123415e-05, + "loss": 0.5148, + "step": 1805 + }, + { + "epoch": 0.08, + "learning_rate": 1.9873810606179363e-05, + "loss": 0.3745, + "step": 1810 + }, + { + "epoch": 0.08, + "learning_rate": 1.9873026821124576e-05, + "loss": 0.3499, + "step": 1815 + }, + { + "epoch": 0.08, + "learning_rate": 1.987224303606979e-05, + "loss": 0.3857, + "step": 1820 + }, + { + "epoch": 0.09, + "learning_rate": 1.9871459251015004e-05, + "loss": 0.3592, + "step": 1825 + }, + { + "epoch": 0.09, + "learning_rate": 1.9870675465960215e-05, + "loss": 0.4503, + "step": 1830 + }, + { + "epoch": 0.09, + "learning_rate": 1.9869891680905432e-05, + "loss": 0.5993, + "step": 1835 + }, + { + "epoch": 0.09, + "learning_rate": 1.9869107895850643e-05, + "loss": 0.7032, + "step": 1840 + }, + { + "epoch": 0.09, + "learning_rate": 1.9868324110795856e-05, + "loss": 0.7298, + "step": 1845 + }, + { + "epoch": 0.09, + "learning_rate": 1.986754032574107e-05, + "loss": 1.0798, + "step": 1850 + }, + { + "epoch": 0.09, + "learning_rate": 1.9866756540686284e-05, + "loss": 0.5357, + "step": 1855 + }, + { + "epoch": 0.09, + "learning_rate": 1.9865972755631498e-05, + "loss": 0.3394, + "step": 1860 + }, + { + "epoch": 0.09, + "learning_rate": 1.986518897057671e-05, + "loss": 0.2815, + "step": 1865 + }, + { + "epoch": 0.09, + "learning_rate": 1.9864405185521926e-05, + "loss": 0.3833, + "step": 1870 + }, + { + "epoch": 0.09, + "learning_rate": 1.9863621400467137e-05, + "loss": 0.3657, + "step": 1875 + }, + { + "epoch": 0.09, + "learning_rate": 1.986283761541235e-05, + "loss": 0.4627, + "step": 1880 + }, + { + "epoch": 0.09, + "learning_rate": 1.9862053830357564e-05, + "loss": 0.5369, + "step": 1885 + }, + { + "epoch": 0.09, + "learning_rate": 1.9861270045302778e-05, + "loss": 0.6602, + "step": 1890 + }, + { + "epoch": 0.09, + "learning_rate": 1.9860486260247992e-05, + "loss": 0.5755, + "step": 1895 + }, + { + "epoch": 0.09, + "learning_rate": 1.9859702475193206e-05, + "loss": 1.0243, + "step": 1900 + }, + { + "epoch": 0.09, + "learning_rate": 1.9858918690138417e-05, + "loss": 0.6036, + "step": 1905 + }, + { + "epoch": 0.09, + "learning_rate": 1.9858134905083634e-05, + "loss": 0.2902, + "step": 1910 + }, + { + "epoch": 0.09, + "learning_rate": 1.9857351120028844e-05, + "loss": 0.3318, + "step": 1915 + }, + { + "epoch": 0.09, + "learning_rate": 1.9856567334974058e-05, + "loss": 0.3193, + "step": 1920 + }, + { + "epoch": 0.09, + "learning_rate": 1.9855783549919272e-05, + "loss": 0.4973, + "step": 1925 + }, + { + "epoch": 0.09, + "learning_rate": 1.9854999764864483e-05, + "loss": 0.5275, + "step": 1930 + }, + { + "epoch": 0.09, + "learning_rate": 1.98542159798097e-05, + "loss": 0.4504, + "step": 1935 + }, + { + "epoch": 0.09, + "learning_rate": 1.985343219475491e-05, + "loss": 0.528, + "step": 1940 + }, + { + "epoch": 0.09, + "learning_rate": 1.9852648409700124e-05, + "loss": 0.8864, + "step": 1945 + }, + { + "epoch": 0.09, + "learning_rate": 1.985186462464534e-05, + "loss": 0.9087, + "step": 1950 + }, + { + "epoch": 0.09, + "learning_rate": 1.9851080839590552e-05, + "loss": 0.5253, + "step": 1955 + }, + { + "epoch": 0.09, + "learning_rate": 1.9850297054535766e-05, + "loss": 0.3372, + "step": 1960 + }, + { + "epoch": 0.09, + "learning_rate": 1.984951326948098e-05, + "loss": 0.3391, + "step": 1965 + }, + { + "epoch": 0.09, + "learning_rate": 1.9848729484426194e-05, + "loss": 0.3418, + "step": 1970 + }, + { + "epoch": 0.09, + "learning_rate": 1.9847945699371408e-05, + "loss": 0.3695, + "step": 1975 + }, + { + "epoch": 0.09, + "learning_rate": 1.984716191431662e-05, + "loss": 0.4408, + "step": 1980 + }, + { + "epoch": 0.09, + "learning_rate": 1.9846378129261832e-05, + "loss": 0.4869, + "step": 1985 + }, + { + "epoch": 0.09, + "learning_rate": 1.9845594344207046e-05, + "loss": 0.4849, + "step": 1990 + }, + { + "epoch": 0.09, + "learning_rate": 1.984481055915226e-05, + "loss": 0.6679, + "step": 1995 + }, + { + "epoch": 0.09, + "learning_rate": 1.9844026774097474e-05, + "loss": 0.9504, + "step": 2000 + }, + { + "epoch": 0.09, + "learning_rate": 1.9843242989042685e-05, + "loss": 0.4752, + "step": 2005 + }, + { + "epoch": 0.09, + "learning_rate": 1.9842459203987902e-05, + "loss": 0.2645, + "step": 2010 + }, + { + "epoch": 0.09, + "learning_rate": 1.9841675418933112e-05, + "loss": 0.2796, + "step": 2015 + }, + { + "epoch": 0.09, + "learning_rate": 1.9840891633878326e-05, + "loss": 0.4134, + "step": 2020 + }, + { + "epoch": 0.09, + "learning_rate": 1.984010784882354e-05, + "loss": 0.4873, + "step": 2025 + }, + { + "epoch": 0.09, + "learning_rate": 1.9839324063768754e-05, + "loss": 0.5061, + "step": 2030 + }, + { + "epoch": 0.09, + "learning_rate": 1.9838540278713968e-05, + "loss": 0.5597, + "step": 2035 + }, + { + "epoch": 0.1, + "learning_rate": 1.9837756493659182e-05, + "loss": 0.5494, + "step": 2040 + }, + { + "epoch": 0.1, + "learning_rate": 1.9836972708604392e-05, + "loss": 0.7516, + "step": 2045 + }, + { + "epoch": 0.1, + "learning_rate": 1.9836188923549606e-05, + "loss": 1.0119, + "step": 2050 + }, + { + "epoch": 0.1, + "learning_rate": 1.983540513849482e-05, + "loss": 0.4939, + "step": 2055 + }, + { + "epoch": 0.1, + "learning_rate": 1.9834621353440034e-05, + "loss": 0.3384, + "step": 2060 + }, + { + "epoch": 0.1, + "learning_rate": 1.9833837568385248e-05, + "loss": 0.292, + "step": 2065 + }, + { + "epoch": 0.1, + "learning_rate": 1.9833053783330462e-05, + "loss": 0.3244, + "step": 2070 + }, + { + "epoch": 0.1, + "learning_rate": 1.9832269998275676e-05, + "loss": 0.3806, + "step": 2075 + }, + { + "epoch": 0.1, + "learning_rate": 1.9831486213220886e-05, + "loss": 0.4527, + "step": 2080 + }, + { + "epoch": 0.1, + "learning_rate": 1.9830702428166104e-05, + "loss": 0.5149, + "step": 2085 + }, + { + "epoch": 0.1, + "learning_rate": 1.9829918643111314e-05, + "loss": 0.7397, + "step": 2090 + }, + { + "epoch": 0.1, + "learning_rate": 1.9829134858056528e-05, + "loss": 0.7535, + "step": 2095 + }, + { + "epoch": 0.1, + "learning_rate": 1.9828351073001742e-05, + "loss": 0.9471, + "step": 2100 + }, + { + "epoch": 0.1, + "learning_rate": 1.9827567287946956e-05, + "loss": 0.4706, + "step": 2105 + }, + { + "epoch": 0.1, + "learning_rate": 1.982678350289217e-05, + "loss": 0.3369, + "step": 2110 + }, + { + "epoch": 0.1, + "learning_rate": 1.982599971783738e-05, + "loss": 0.3418, + "step": 2115 + }, + { + "epoch": 0.1, + "learning_rate": 1.9825215932782594e-05, + "loss": 0.3639, + "step": 2120 + }, + { + "epoch": 0.1, + "learning_rate": 1.9824432147727808e-05, + "loss": 0.4024, + "step": 2125 + }, + { + "epoch": 0.1, + "learning_rate": 1.9823648362673022e-05, + "loss": 0.4048, + "step": 2130 + }, + { + "epoch": 0.1, + "learning_rate": 1.9822864577618236e-05, + "loss": 0.6293, + "step": 2135 + }, + { + "epoch": 0.1, + "learning_rate": 1.982208079256345e-05, + "loss": 0.6106, + "step": 2140 + }, + { + "epoch": 0.1, + "learning_rate": 1.982129700750866e-05, + "loss": 0.6771, + "step": 2145 + }, + { + "epoch": 0.1, + "learning_rate": 1.9820513222453878e-05, + "loss": 1.2044, + "step": 2150 + }, + { + "epoch": 0.1, + "learning_rate": 1.9819729437399088e-05, + "loss": 0.5152, + "step": 2155 + }, + { + "epoch": 0.1, + "learning_rate": 1.9818945652344302e-05, + "loss": 0.2559, + "step": 2160 + }, + { + "epoch": 0.1, + "learning_rate": 1.9818161867289516e-05, + "loss": 0.2496, + "step": 2165 + }, + { + "epoch": 0.1, + "learning_rate": 1.981737808223473e-05, + "loss": 0.2577, + "step": 2170 + }, + { + "epoch": 0.1, + "learning_rate": 1.9816594297179944e-05, + "loss": 0.5568, + "step": 2175 + }, + { + "epoch": 0.1, + "learning_rate": 1.9815810512125154e-05, + "loss": 0.4338, + "step": 2180 + }, + { + "epoch": 0.1, + "learning_rate": 1.981502672707037e-05, + "loss": 0.6521, + "step": 2185 + }, + { + "epoch": 0.1, + "learning_rate": 1.9814242942015582e-05, + "loss": 0.5643, + "step": 2190 + }, + { + "epoch": 0.1, + "learning_rate": 1.9813459156960796e-05, + "loss": 0.4993, + "step": 2195 + }, + { + "epoch": 0.1, + "learning_rate": 1.981267537190601e-05, + "loss": 1.2342, + "step": 2200 + }, + { + "epoch": 0.1, + "learning_rate": 1.9811891586851224e-05, + "loss": 0.4659, + "step": 2205 + }, + { + "epoch": 0.1, + "learning_rate": 1.9811107801796438e-05, + "loss": 0.2984, + "step": 2210 + }, + { + "epoch": 0.1, + "learning_rate": 1.981032401674165e-05, + "loss": 0.3362, + "step": 2215 + }, + { + "epoch": 0.1, + "learning_rate": 1.9809540231686862e-05, + "loss": 0.2982, + "step": 2220 + }, + { + "epoch": 0.1, + "learning_rate": 1.980875644663208e-05, + "loss": 0.3995, + "step": 2225 + }, + { + "epoch": 0.1, + "learning_rate": 1.980797266157729e-05, + "loss": 0.4959, + "step": 2230 + }, + { + "epoch": 0.1, + "learning_rate": 1.9807188876522504e-05, + "loss": 0.5604, + "step": 2235 + }, + { + "epoch": 0.1, + "learning_rate": 1.9806405091467718e-05, + "loss": 0.6278, + "step": 2240 + }, + { + "epoch": 0.1, + "learning_rate": 1.9805621306412928e-05, + "loss": 0.751, + "step": 2245 + }, + { + "epoch": 0.1, + "learning_rate": 1.9804837521358146e-05, + "loss": 1.2158, + "step": 2250 + }, + { + "epoch": 0.11, + "learning_rate": 1.9804053736303356e-05, + "loss": 0.441, + "step": 2255 + }, + { + "epoch": 0.11, + "learning_rate": 1.980326995124857e-05, + "loss": 0.2743, + "step": 2260 + }, + { + "epoch": 0.11, + "learning_rate": 1.9802486166193784e-05, + "loss": 0.4457, + "step": 2265 + }, + { + "epoch": 0.11, + "learning_rate": 1.9801702381138998e-05, + "loss": 0.4269, + "step": 2270 + }, + { + "epoch": 0.11, + "learning_rate": 1.9800918596084212e-05, + "loss": 0.3709, + "step": 2275 + }, + { + "epoch": 0.11, + "learning_rate": 1.9800134811029426e-05, + "loss": 0.4595, + "step": 2280 + }, + { + "epoch": 0.11, + "learning_rate": 1.979935102597464e-05, + "loss": 0.6209, + "step": 2285 + }, + { + "epoch": 0.11, + "learning_rate": 1.9798567240919853e-05, + "loss": 0.5724, + "step": 2290 + }, + { + "epoch": 0.11, + "learning_rate": 1.9797783455865064e-05, + "loss": 0.7098, + "step": 2295 + }, + { + "epoch": 0.11, + "learning_rate": 1.979699967081028e-05, + "loss": 1.0224, + "step": 2300 + }, + { + "epoch": 0.11, + "learning_rate": 1.9796215885755492e-05, + "loss": 0.4748, + "step": 2305 + }, + { + "epoch": 0.11, + "learning_rate": 1.9795432100700706e-05, + "loss": 0.188, + "step": 2310 + }, + { + "epoch": 0.11, + "learning_rate": 1.979464831564592e-05, + "loss": 0.2832, + "step": 2315 + }, + { + "epoch": 0.11, + "learning_rate": 1.979386453059113e-05, + "loss": 0.3772, + "step": 2320 + }, + { + "epoch": 0.11, + "learning_rate": 1.9793080745536347e-05, + "loss": 0.3791, + "step": 2325 + }, + { + "epoch": 0.11, + "learning_rate": 1.9792296960481558e-05, + "loss": 0.4206, + "step": 2330 + }, + { + "epoch": 0.11, + "learning_rate": 1.9791513175426772e-05, + "loss": 0.591, + "step": 2335 + }, + { + "epoch": 0.11, + "learning_rate": 1.9790729390371986e-05, + "loss": 0.5566, + "step": 2340 + }, + { + "epoch": 0.11, + "learning_rate": 1.97899456053172e-05, + "loss": 0.7117, + "step": 2345 + }, + { + "epoch": 0.11, + "learning_rate": 1.9789161820262414e-05, + "loss": 0.9647, + "step": 2350 + }, + { + "epoch": 0.11, + "learning_rate": 1.9788378035207627e-05, + "loss": 0.4322, + "step": 2355 + }, + { + "epoch": 0.11, + "learning_rate": 1.9787594250152838e-05, + "loss": 0.3008, + "step": 2360 + }, + { + "epoch": 0.11, + "learning_rate": 1.9786810465098055e-05, + "loss": 0.3233, + "step": 2365 + }, + { + "epoch": 0.11, + "learning_rate": 1.9786026680043266e-05, + "loss": 0.4009, + "step": 2370 + }, + { + "epoch": 0.11, + "learning_rate": 1.978524289498848e-05, + "loss": 0.378, + "step": 2375 + }, + { + "epoch": 0.11, + "learning_rate": 1.9784459109933694e-05, + "loss": 0.4942, + "step": 2380 + }, + { + "epoch": 0.11, + "learning_rate": 1.9783675324878907e-05, + "loss": 0.3688, + "step": 2385 + }, + { + "epoch": 0.11, + "learning_rate": 1.978289153982412e-05, + "loss": 0.6061, + "step": 2390 + }, + { + "epoch": 0.11, + "learning_rate": 1.9782107754769332e-05, + "loss": 0.5694, + "step": 2395 + }, + { + "epoch": 0.11, + "learning_rate": 1.978132396971455e-05, + "loss": 0.9538, + "step": 2400 + }, + { + "epoch": 0.11, + "learning_rate": 1.978054018465976e-05, + "loss": 0.4312, + "step": 2405 + }, + { + "epoch": 0.11, + "learning_rate": 1.9779756399604974e-05, + "loss": 0.2543, + "step": 2410 + }, + { + "epoch": 0.11, + "learning_rate": 1.9778972614550188e-05, + "loss": 0.231, + "step": 2415 + }, + { + "epoch": 0.11, + "learning_rate": 1.97781888294954e-05, + "loss": 0.2848, + "step": 2420 + }, + { + "epoch": 0.11, + "learning_rate": 1.9777405044440615e-05, + "loss": 0.3575, + "step": 2425 + }, + { + "epoch": 0.11, + "learning_rate": 1.977662125938583e-05, + "loss": 0.4481, + "step": 2430 + }, + { + "epoch": 0.11, + "learning_rate": 1.977583747433104e-05, + "loss": 0.4305, + "step": 2435 + }, + { + "epoch": 0.11, + "learning_rate": 1.9775053689276254e-05, + "loss": 0.7923, + "step": 2440 + }, + { + "epoch": 0.11, + "learning_rate": 1.9774269904221468e-05, + "loss": 0.5847, + "step": 2445 + }, + { + "epoch": 0.11, + "learning_rate": 1.977348611916668e-05, + "loss": 1.0685, + "step": 2450 + }, + { + "epoch": 0.11, + "learning_rate": 1.9772702334111895e-05, + "loss": 0.5119, + "step": 2455 + }, + { + "epoch": 0.11, + "learning_rate": 1.9771918549057106e-05, + "loss": 0.2529, + "step": 2460 + }, + { + "epoch": 0.12, + "learning_rate": 1.9771134764002323e-05, + "loss": 0.2534, + "step": 2465 + }, + { + "epoch": 0.12, + "learning_rate": 1.9770350978947534e-05, + "loss": 0.4104, + "step": 2470 + }, + { + "epoch": 0.12, + "learning_rate": 1.9769567193892748e-05, + "loss": 0.4233, + "step": 2475 + }, + { + "epoch": 0.12, + "learning_rate": 1.976878340883796e-05, + "loss": 0.3437, + "step": 2480 + }, + { + "epoch": 0.12, + "learning_rate": 1.9767999623783175e-05, + "loss": 0.5363, + "step": 2485 + }, + { + "epoch": 0.12, + "learning_rate": 1.976721583872839e-05, + "loss": 0.6265, + "step": 2490 + }, + { + "epoch": 0.12, + "learning_rate": 1.9766432053673603e-05, + "loss": 0.6902, + "step": 2495 + }, + { + "epoch": 0.12, + "learning_rate": 1.9765648268618817e-05, + "loss": 1.1218, + "step": 2500 + }, + { + "epoch": 0.12, + "learning_rate": 1.9764864483564028e-05, + "loss": 0.3805, + "step": 2505 + }, + { + "epoch": 0.12, + "learning_rate": 1.976408069850924e-05, + "loss": 0.2308, + "step": 2510 + }, + { + "epoch": 0.12, + "learning_rate": 1.9763296913454455e-05, + "loss": 0.2854, + "step": 2515 + }, + { + "epoch": 0.12, + "learning_rate": 1.976251312839967e-05, + "loss": 0.3471, + "step": 2520 + }, + { + "epoch": 0.12, + "learning_rate": 1.9761729343344883e-05, + "loss": 0.418, + "step": 2525 + }, + { + "epoch": 0.12, + "learning_rate": 1.9760945558290097e-05, + "loss": 0.3995, + "step": 2530 + }, + { + "epoch": 0.12, + "learning_rate": 1.9760161773235308e-05, + "loss": 0.5414, + "step": 2535 + }, + { + "epoch": 0.12, + "learning_rate": 1.9759377988180525e-05, + "loss": 0.4674, + "step": 2540 + }, + { + "epoch": 0.12, + "learning_rate": 1.9758594203125736e-05, + "loss": 0.548, + "step": 2545 + }, + { + "epoch": 0.12, + "learning_rate": 1.975781041807095e-05, + "loss": 1.1554, + "step": 2550 + }, + { + "epoch": 0.12, + "learning_rate": 1.9757026633016163e-05, + "loss": 0.4927, + "step": 2555 + }, + { + "epoch": 0.12, + "learning_rate": 1.9756242847961377e-05, + "loss": 0.2384, + "step": 2560 + }, + { + "epoch": 0.12, + "learning_rate": 1.975545906290659e-05, + "loss": 0.2467, + "step": 2565 + }, + { + "epoch": 0.12, + "learning_rate": 1.97546752778518e-05, + "loss": 0.2739, + "step": 2570 + }, + { + "epoch": 0.12, + "learning_rate": 1.9753891492797016e-05, + "loss": 0.4415, + "step": 2575 + }, + { + "epoch": 0.12, + "learning_rate": 1.975310770774223e-05, + "loss": 0.3919, + "step": 2580 + }, + { + "epoch": 0.12, + "learning_rate": 1.9752323922687443e-05, + "loss": 0.4875, + "step": 2585 + }, + { + "epoch": 0.12, + "learning_rate": 1.9751540137632657e-05, + "loss": 0.5478, + "step": 2590 + }, + { + "epoch": 0.12, + "learning_rate": 1.975075635257787e-05, + "loss": 0.6074, + "step": 2595 + }, + { + "epoch": 0.12, + "learning_rate": 1.9749972567523085e-05, + "loss": 0.7679, + "step": 2600 + }, + { + "epoch": 0.12, + "learning_rate": 1.97491887824683e-05, + "loss": 0.5413, + "step": 2605 + }, + { + "epoch": 0.12, + "learning_rate": 1.974840499741351e-05, + "loss": 0.2023, + "step": 2610 + }, + { + "epoch": 0.12, + "learning_rate": 1.9747621212358727e-05, + "loss": 0.2183, + "step": 2615 + }, + { + "epoch": 0.12, + "learning_rate": 1.9746837427303937e-05, + "loss": 0.327, + "step": 2620 + }, + { + "epoch": 0.12, + "learning_rate": 1.974605364224915e-05, + "loss": 0.3329, + "step": 2625 + }, + { + "epoch": 0.12, + "learning_rate": 1.9745269857194365e-05, + "loss": 0.3636, + "step": 2630 + }, + { + "epoch": 0.12, + "learning_rate": 1.9744486072139576e-05, + "loss": 0.4454, + "step": 2635 + }, + { + "epoch": 0.12, + "learning_rate": 1.9743702287084793e-05, + "loss": 0.5881, + "step": 2640 + }, + { + "epoch": 0.12, + "learning_rate": 1.9742918502030003e-05, + "loss": 0.6546, + "step": 2645 + }, + { + "epoch": 0.12, + "learning_rate": 1.9742134716975217e-05, + "loss": 0.9867, + "step": 2650 + }, + { + "epoch": 0.12, + "learning_rate": 1.974135093192043e-05, + "loss": 0.4603, + "step": 2655 + }, + { + "epoch": 0.12, + "learning_rate": 1.9740567146865645e-05, + "loss": 0.2764, + "step": 2660 + }, + { + "epoch": 0.12, + "learning_rate": 1.973978336181086e-05, + "loss": 0.2423, + "step": 2665 + }, + { + "epoch": 0.12, + "learning_rate": 1.9738999576756073e-05, + "loss": 0.2468, + "step": 2670 + }, + { + "epoch": 0.12, + "learning_rate": 1.9738215791701284e-05, + "loss": 0.365, + "step": 2675 + }, + { + "epoch": 0.13, + "learning_rate": 1.97374320066465e-05, + "loss": 0.4589, + "step": 2680 + }, + { + "epoch": 0.13, + "learning_rate": 1.973664822159171e-05, + "loss": 0.4907, + "step": 2685 + }, + { + "epoch": 0.13, + "learning_rate": 1.9735864436536925e-05, + "loss": 0.5466, + "step": 2690 + }, + { + "epoch": 0.13, + "learning_rate": 1.973508065148214e-05, + "loss": 0.4969, + "step": 2695 + }, + { + "epoch": 0.13, + "learning_rate": 1.9734296866427353e-05, + "loss": 1.0733, + "step": 2700 + }, + { + "epoch": 0.13, + "learning_rate": 1.9733513081372567e-05, + "loss": 0.4684, + "step": 2705 + }, + { + "epoch": 0.13, + "learning_rate": 1.9732729296317777e-05, + "loss": 0.2367, + "step": 2710 + }, + { + "epoch": 0.13, + "learning_rate": 1.9731945511262995e-05, + "loss": 0.2683, + "step": 2715 + }, + { + "epoch": 0.13, + "learning_rate": 1.9731161726208205e-05, + "loss": 0.2503, + "step": 2720 + }, + { + "epoch": 0.13, + "learning_rate": 1.973037794115342e-05, + "loss": 0.3385, + "step": 2725 + }, + { + "epoch": 0.13, + "learning_rate": 1.9729594156098633e-05, + "loss": 0.3533, + "step": 2730 + }, + { + "epoch": 0.13, + "learning_rate": 1.9728810371043847e-05, + "loss": 0.434, + "step": 2735 + }, + { + "epoch": 0.13, + "learning_rate": 1.972802658598906e-05, + "loss": 0.4045, + "step": 2740 + }, + { + "epoch": 0.13, + "learning_rate": 1.9727242800934275e-05, + "loss": 0.5531, + "step": 2745 + }, + { + "epoch": 0.13, + "learning_rate": 1.9726459015879485e-05, + "loss": 0.8087, + "step": 2750 + }, + { + "epoch": 0.13, + "learning_rate": 1.9725675230824703e-05, + "loss": 0.4245, + "step": 2755 + }, + { + "epoch": 0.13, + "learning_rate": 1.9724891445769913e-05, + "loss": 0.2777, + "step": 2760 + }, + { + "epoch": 0.13, + "learning_rate": 1.9724107660715127e-05, + "loss": 0.3036, + "step": 2765 + }, + { + "epoch": 0.13, + "learning_rate": 1.972332387566034e-05, + "loss": 0.4066, + "step": 2770 + }, + { + "epoch": 0.13, + "learning_rate": 1.972254009060555e-05, + "loss": 0.3239, + "step": 2775 + }, + { + "epoch": 0.13, + "learning_rate": 1.972175630555077e-05, + "loss": 0.3395, + "step": 2780 + }, + { + "epoch": 0.13, + "learning_rate": 1.972097252049598e-05, + "loss": 0.4637, + "step": 2785 + }, + { + "epoch": 0.13, + "learning_rate": 1.9720188735441193e-05, + "loss": 0.486, + "step": 2790 + }, + { + "epoch": 0.13, + "learning_rate": 1.9719404950386407e-05, + "loss": 0.6314, + "step": 2795 + }, + { + "epoch": 0.13, + "learning_rate": 1.971862116533162e-05, + "loss": 0.8526, + "step": 2800 + }, + { + "epoch": 0.13, + "learning_rate": 1.9717837380276835e-05, + "loss": 0.4288, + "step": 2805 + }, + { + "epoch": 0.13, + "learning_rate": 1.971705359522205e-05, + "loss": 0.227, + "step": 2810 + }, + { + "epoch": 0.13, + "learning_rate": 1.9716269810167263e-05, + "loss": 0.2939, + "step": 2815 + }, + { + "epoch": 0.13, + "learning_rate": 1.9715486025112477e-05, + "loss": 0.2735, + "step": 2820 + }, + { + "epoch": 0.13, + "learning_rate": 1.9714702240057687e-05, + "loss": 0.3667, + "step": 2825 + }, + { + "epoch": 0.13, + "learning_rate": 1.97139184550029e-05, + "loss": 0.385, + "step": 2830 + }, + { + "epoch": 0.13, + "learning_rate": 1.9713134669948115e-05, + "loss": 0.3763, + "step": 2835 + }, + { + "epoch": 0.13, + "learning_rate": 1.971235088489333e-05, + "loss": 0.4141, + "step": 2840 + }, + { + "epoch": 0.13, + "learning_rate": 1.9711567099838543e-05, + "loss": 0.6103, + "step": 2845 + }, + { + "epoch": 0.13, + "learning_rate": 1.9710783314783753e-05, + "loss": 0.7695, + "step": 2850 + }, + { + "epoch": 0.13, + "learning_rate": 1.970999952972897e-05, + "loss": 0.4827, + "step": 2855 + }, + { + "epoch": 0.13, + "learning_rate": 1.970921574467418e-05, + "loss": 0.2578, + "step": 2860 + }, + { + "epoch": 0.13, + "learning_rate": 1.9708431959619395e-05, + "loss": 0.2754, + "step": 2865 + }, + { + "epoch": 0.13, + "learning_rate": 1.970764817456461e-05, + "loss": 0.2874, + "step": 2870 + }, + { + "epoch": 0.13, + "learning_rate": 1.9706864389509823e-05, + "loss": 0.2498, + "step": 2875 + }, + { + "epoch": 0.13, + "learning_rate": 1.9706080604455037e-05, + "loss": 0.5192, + "step": 2880 + }, + { + "epoch": 0.13, + "learning_rate": 1.970529681940025e-05, + "loss": 0.3749, + "step": 2885 + }, + { + "epoch": 0.13, + "learning_rate": 1.970451303434546e-05, + "loss": 0.5461, + "step": 2890 + }, + { + "epoch": 0.14, + "learning_rate": 1.9703729249290675e-05, + "loss": 0.6363, + "step": 2895 + }, + { + "epoch": 0.14, + "learning_rate": 1.970294546423589e-05, + "loss": 1.0204, + "step": 2900 + }, + { + "epoch": 0.14, + "learning_rate": 1.9702161679181103e-05, + "loss": 0.4849, + "step": 2905 + }, + { + "epoch": 0.14, + "learning_rate": 1.9701377894126317e-05, + "loss": 0.1746, + "step": 2910 + }, + { + "epoch": 0.14, + "learning_rate": 1.970059410907153e-05, + "loss": 0.259, + "step": 2915 + }, + { + "epoch": 0.14, + "learning_rate": 1.9699810324016745e-05, + "loss": 0.3586, + "step": 2920 + }, + { + "epoch": 0.14, + "learning_rate": 1.9699026538961955e-05, + "loss": 0.3875, + "step": 2925 + }, + { + "epoch": 0.14, + "learning_rate": 1.9698242753907172e-05, + "loss": 0.3089, + "step": 2930 + }, + { + "epoch": 0.14, + "learning_rate": 1.9697458968852383e-05, + "loss": 0.4891, + "step": 2935 + }, + { + "epoch": 0.14, + "learning_rate": 1.9696675183797597e-05, + "loss": 0.4279, + "step": 2940 + }, + { + "epoch": 0.14, + "learning_rate": 1.969589139874281e-05, + "loss": 0.6177, + "step": 2945 + }, + { + "epoch": 0.14, + "learning_rate": 1.9695107613688025e-05, + "loss": 0.8893, + "step": 2950 + }, + { + "epoch": 0.14, + "learning_rate": 1.969432382863324e-05, + "loss": 0.3806, + "step": 2955 + }, + { + "epoch": 0.14, + "learning_rate": 1.969354004357845e-05, + "loss": 0.2398, + "step": 2960 + }, + { + "epoch": 0.14, + "learning_rate": 1.9692756258523663e-05, + "loss": 0.2188, + "step": 2965 + }, + { + "epoch": 0.14, + "learning_rate": 1.9691972473468877e-05, + "loss": 0.3182, + "step": 2970 + }, + { + "epoch": 0.14, + "learning_rate": 1.969118868841409e-05, + "loss": 0.3459, + "step": 2975 + }, + { + "epoch": 0.14, + "learning_rate": 1.9690404903359305e-05, + "loss": 0.2882, + "step": 2980 + }, + { + "epoch": 0.14, + "learning_rate": 1.968962111830452e-05, + "loss": 0.3212, + "step": 2985 + }, + { + "epoch": 0.14, + "learning_rate": 1.968899409026069e-05, + "loss": 0.4588, + "step": 2990 + }, + { + "epoch": 0.14, + "learning_rate": 1.9688210305205903e-05, + "loss": 0.6138, + "step": 2995 + }, + { + "epoch": 0.14, + "learning_rate": 1.9687426520151117e-05, + "loss": 0.837, + "step": 3000 + }, + { + "epoch": 0.14, + "learning_rate": 1.9686642735096327e-05, + "loss": 0.3779, + "step": 3005 + }, + { + "epoch": 0.14, + "learning_rate": 1.9685858950041545e-05, + "loss": 0.2237, + "step": 3010 + }, + { + "epoch": 0.14, + "learning_rate": 1.9685075164986755e-05, + "loss": 0.3279, + "step": 3015 + }, + { + "epoch": 0.14, + "learning_rate": 1.968429137993197e-05, + "loss": 0.2848, + "step": 3020 + }, + { + "epoch": 0.14, + "learning_rate": 1.9683507594877183e-05, + "loss": 0.3314, + "step": 3025 + }, + { + "epoch": 0.14, + "learning_rate": 1.9682723809822394e-05, + "loss": 0.3744, + "step": 3030 + }, + { + "epoch": 0.14, + "learning_rate": 1.968194002476761e-05, + "loss": 0.4589, + "step": 3035 + }, + { + "epoch": 0.14, + "learning_rate": 1.968115623971282e-05, + "loss": 0.6012, + "step": 3040 + }, + { + "epoch": 0.14, + "learning_rate": 1.9680372454658035e-05, + "loss": 0.625, + "step": 3045 + }, + { + "epoch": 0.14, + "learning_rate": 1.967958866960325e-05, + "loss": 0.9265, + "step": 3050 + }, + { + "epoch": 0.14, + "learning_rate": 1.9678804884548463e-05, + "loss": 0.4029, + "step": 3055 + }, + { + "epoch": 0.14, + "learning_rate": 1.9678021099493677e-05, + "loss": 0.2582, + "step": 3060 + }, + { + "epoch": 0.14, + "learning_rate": 1.967723731443889e-05, + "loss": 0.3223, + "step": 3065 + }, + { + "epoch": 0.14, + "learning_rate": 1.96764535293841e-05, + "loss": 0.2428, + "step": 3070 + }, + { + "epoch": 0.14, + "learning_rate": 1.967566974432932e-05, + "loss": 0.3014, + "step": 3075 + }, + { + "epoch": 0.14, + "learning_rate": 1.967488595927453e-05, + "loss": 0.452, + "step": 3080 + }, + { + "epoch": 0.14, + "learning_rate": 1.9674102174219743e-05, + "loss": 0.3358, + "step": 3085 + }, + { + "epoch": 0.14, + "learning_rate": 1.9673318389164957e-05, + "loss": 0.4613, + "step": 3090 + }, + { + "epoch": 0.14, + "learning_rate": 1.967253460411017e-05, + "loss": 0.5618, + "step": 3095 + }, + { + "epoch": 0.14, + "learning_rate": 1.9671750819055385e-05, + "loss": 0.7322, + "step": 3100 + }, + { + "epoch": 0.14, + "learning_rate": 1.9670967034000595e-05, + "loss": 0.3473, + "step": 3105 + }, + { + "epoch": 0.15, + "learning_rate": 1.9670183248945813e-05, + "loss": 0.2555, + "step": 3110 + }, + { + "epoch": 0.15, + "learning_rate": 1.9669399463891023e-05, + "loss": 0.2629, + "step": 3115 + }, + { + "epoch": 0.15, + "learning_rate": 1.9668615678836237e-05, + "loss": 0.3049, + "step": 3120 + }, + { + "epoch": 0.15, + "learning_rate": 1.966783189378145e-05, + "loss": 0.3242, + "step": 3125 + }, + { + "epoch": 0.15, + "learning_rate": 1.9667048108726665e-05, + "loss": 0.3861, + "step": 3130 + }, + { + "epoch": 0.15, + "learning_rate": 1.966626432367188e-05, + "loss": 0.4608, + "step": 3135 + }, + { + "epoch": 0.15, + "learning_rate": 1.9665480538617093e-05, + "loss": 0.4212, + "step": 3140 + }, + { + "epoch": 0.15, + "learning_rate": 1.9664696753562303e-05, + "loss": 0.6555, + "step": 3145 + }, + { + "epoch": 0.15, + "learning_rate": 1.966391296850752e-05, + "loss": 0.7641, + "step": 3150 + }, + { + "epoch": 0.15, + "learning_rate": 1.966312918345273e-05, + "loss": 0.4017, + "step": 3155 + }, + { + "epoch": 0.15, + "learning_rate": 1.9662345398397945e-05, + "loss": 0.1657, + "step": 3160 + }, + { + "epoch": 0.15, + "learning_rate": 1.966156161334316e-05, + "loss": 0.2806, + "step": 3165 + }, + { + "epoch": 0.15, + "learning_rate": 1.966077782828837e-05, + "loss": 0.2702, + "step": 3170 + }, + { + "epoch": 0.15, + "learning_rate": 1.9659994043233587e-05, + "loss": 0.3138, + "step": 3175 + }, + { + "epoch": 0.15, + "learning_rate": 1.9659210258178797e-05, + "loss": 0.4255, + "step": 3180 + }, + { + "epoch": 0.15, + "learning_rate": 1.965842647312401e-05, + "loss": 0.3488, + "step": 3185 + }, + { + "epoch": 0.15, + "learning_rate": 1.9657642688069225e-05, + "loss": 0.5055, + "step": 3190 + }, + { + "epoch": 0.15, + "learning_rate": 1.965685890301444e-05, + "loss": 0.5626, + "step": 3195 + }, + { + "epoch": 0.15, + "learning_rate": 1.9656075117959653e-05, + "loss": 0.8002, + "step": 3200 + }, + { + "epoch": 0.15, + "learning_rate": 1.9655291332904867e-05, + "loss": 0.4219, + "step": 3205 + }, + { + "epoch": 0.15, + "learning_rate": 1.965450754785008e-05, + "loss": 0.2463, + "step": 3210 + }, + { + "epoch": 0.15, + "learning_rate": 1.9653723762795294e-05, + "loss": 0.2057, + "step": 3215 + }, + { + "epoch": 0.15, + "learning_rate": 1.9652939977740505e-05, + "loss": 0.326, + "step": 3220 + }, + { + "epoch": 0.15, + "learning_rate": 1.965215619268572e-05, + "loss": 0.3563, + "step": 3225 + }, + { + "epoch": 0.15, + "learning_rate": 1.9651372407630933e-05, + "loss": 0.4195, + "step": 3230 + }, + { + "epoch": 0.15, + "learning_rate": 1.9650588622576147e-05, + "loss": 0.3367, + "step": 3235 + }, + { + "epoch": 0.15, + "learning_rate": 1.964980483752136e-05, + "loss": 0.545, + "step": 3240 + }, + { + "epoch": 0.15, + "learning_rate": 1.964902105246657e-05, + "loss": 0.6454, + "step": 3245 + }, + { + "epoch": 0.15, + "learning_rate": 1.964823726741179e-05, + "loss": 0.7225, + "step": 3250 + }, + { + "epoch": 0.15, + "learning_rate": 1.9647453482357e-05, + "loss": 0.4606, + "step": 3255 + }, + { + "epoch": 0.15, + "learning_rate": 1.9646669697302213e-05, + "loss": 0.2679, + "step": 3260 + }, + { + "epoch": 0.15, + "learning_rate": 1.9645885912247427e-05, + "loss": 0.2538, + "step": 3265 + }, + { + "epoch": 0.15, + "learning_rate": 1.964510212719264e-05, + "loss": 0.2758, + "step": 3270 + }, + { + "epoch": 0.15, + "learning_rate": 1.9644318342137855e-05, + "loss": 0.3146, + "step": 3275 + }, + { + "epoch": 0.15, + "learning_rate": 1.964353455708307e-05, + "loss": 0.2646, + "step": 3280 + }, + { + "epoch": 0.15, + "learning_rate": 1.964275077202828e-05, + "loss": 0.3698, + "step": 3285 + }, + { + "epoch": 0.15, + "learning_rate": 1.9641966986973493e-05, + "loss": 0.505, + "step": 3290 + }, + { + "epoch": 0.15, + "learning_rate": 1.9641183201918707e-05, + "loss": 0.5847, + "step": 3295 + }, + { + "epoch": 0.15, + "learning_rate": 1.964039941686392e-05, + "loss": 0.7694, + "step": 3300 + }, + { + "epoch": 0.15, + "learning_rate": 1.9639615631809135e-05, + "loss": 0.4151, + "step": 3305 + }, + { + "epoch": 0.15, + "learning_rate": 1.963883184675435e-05, + "loss": 0.2161, + "step": 3310 + }, + { + "epoch": 0.15, + "learning_rate": 1.9638048061699562e-05, + "loss": 0.3054, + "step": 3315 + }, + { + "epoch": 0.15, + "learning_rate": 1.9637264276644773e-05, + "loss": 0.2491, + "step": 3320 + }, + { + "epoch": 0.16, + "learning_rate": 1.963648049158999e-05, + "loss": 0.2946, + "step": 3325 + }, + { + "epoch": 0.16, + "learning_rate": 1.96356967065352e-05, + "loss": 0.3118, + "step": 3330 + }, + { + "epoch": 0.16, + "learning_rate": 1.9634912921480415e-05, + "loss": 0.463, + "step": 3335 + }, + { + "epoch": 0.16, + "learning_rate": 1.963412913642563e-05, + "loss": 0.3721, + "step": 3340 + }, + { + "epoch": 0.16, + "learning_rate": 1.9633345351370842e-05, + "loss": 0.6009, + "step": 3345 + }, + { + "epoch": 0.16, + "learning_rate": 1.9632561566316056e-05, + "loss": 0.8445, + "step": 3350 + }, + { + "epoch": 0.16, + "learning_rate": 1.9631777781261267e-05, + "loss": 0.4554, + "step": 3355 + }, + { + "epoch": 0.16, + "learning_rate": 1.963099399620648e-05, + "loss": 0.2121, + "step": 3360 + }, + { + "epoch": 0.16, + "learning_rate": 1.9630210211151695e-05, + "loss": 0.2767, + "step": 3365 + }, + { + "epoch": 0.16, + "learning_rate": 1.962942642609691e-05, + "loss": 0.2726, + "step": 3370 + }, + { + "epoch": 0.16, + "learning_rate": 1.9628642641042123e-05, + "loss": 0.4073, + "step": 3375 + }, + { + "epoch": 0.16, + "learning_rate": 1.9627858855987336e-05, + "loss": 0.4101, + "step": 3380 + }, + { + "epoch": 0.16, + "learning_rate": 1.9627075070932547e-05, + "loss": 0.4055, + "step": 3385 + }, + { + "epoch": 0.16, + "learning_rate": 1.9626291285877764e-05, + "loss": 0.4796, + "step": 3390 + }, + { + "epoch": 0.16, + "learning_rate": 1.9625507500822975e-05, + "loss": 0.6178, + "step": 3395 + }, + { + "epoch": 0.16, + "learning_rate": 1.962472371576819e-05, + "loss": 0.8758, + "step": 3400 + }, + { + "epoch": 0.16, + "learning_rate": 1.9623939930713403e-05, + "loss": 0.4028, + "step": 3405 + }, + { + "epoch": 0.16, + "learning_rate": 1.9623156145658616e-05, + "loss": 0.2257, + "step": 3410 + }, + { + "epoch": 0.16, + "learning_rate": 1.962237236060383e-05, + "loss": 0.2765, + "step": 3415 + }, + { + "epoch": 0.16, + "learning_rate": 1.962158857554904e-05, + "loss": 0.2894, + "step": 3420 + }, + { + "epoch": 0.16, + "learning_rate": 1.9620804790494258e-05, + "loss": 0.2995, + "step": 3425 + }, + { + "epoch": 0.16, + "learning_rate": 1.962002100543947e-05, + "loss": 0.3938, + "step": 3430 + }, + { + "epoch": 0.16, + "learning_rate": 1.9619237220384683e-05, + "loss": 0.4108, + "step": 3435 + }, + { + "epoch": 0.16, + "learning_rate": 1.9618453435329897e-05, + "loss": 0.4647, + "step": 3440 + }, + { + "epoch": 0.16, + "learning_rate": 1.961766965027511e-05, + "loss": 0.5126, + "step": 3445 + }, + { + "epoch": 0.16, + "learning_rate": 1.9616885865220324e-05, + "loss": 0.8012, + "step": 3450 + }, + { + "epoch": 0.16, + "learning_rate": 1.9616102080165538e-05, + "loss": 0.4131, + "step": 3455 + }, + { + "epoch": 0.16, + "learning_rate": 1.961531829511075e-05, + "loss": 0.2214, + "step": 3460 + }, + { + "epoch": 0.16, + "learning_rate": 1.9614534510055966e-05, + "loss": 0.2513, + "step": 3465 + }, + { + "epoch": 0.16, + "learning_rate": 1.9613750725001177e-05, + "loss": 0.3865, + "step": 3470 + }, + { + "epoch": 0.16, + "learning_rate": 1.961296693994639e-05, + "loss": 0.3377, + "step": 3475 + }, + { + "epoch": 0.16, + "learning_rate": 1.9612183154891604e-05, + "loss": 0.3707, + "step": 3480 + }, + { + "epoch": 0.16, + "learning_rate": 1.9611399369836818e-05, + "loss": 0.4052, + "step": 3485 + }, + { + "epoch": 0.16, + "learning_rate": 1.9610615584782032e-05, + "loss": 0.4237, + "step": 3490 + }, + { + "epoch": 0.16, + "learning_rate": 1.9609831799727243e-05, + "loss": 0.466, + "step": 3495 + }, + { + "epoch": 0.16, + "learning_rate": 1.9609048014672457e-05, + "loss": 0.791, + "step": 3500 + }, + { + "epoch": 0.16, + "learning_rate": 1.960826422961767e-05, + "loss": 0.3511, + "step": 3505 + }, + { + "epoch": 0.16, + "learning_rate": 1.9607480444562884e-05, + "loss": 0.1677, + "step": 3510 + }, + { + "epoch": 0.16, + "learning_rate": 1.96066966595081e-05, + "loss": 0.2672, + "step": 3515 + }, + { + "epoch": 0.16, + "learning_rate": 1.9605912874453312e-05, + "loss": 0.27, + "step": 3520 + }, + { + "epoch": 0.16, + "learning_rate": 1.9605129089398526e-05, + "loss": 0.3319, + "step": 3525 + }, + { + "epoch": 0.16, + "learning_rate": 1.960434530434374e-05, + "loss": 0.3659, + "step": 3530 + }, + { + "epoch": 0.16, + "learning_rate": 1.960356151928895e-05, + "loss": 0.3154, + "step": 3535 + }, + { + "epoch": 0.17, + "learning_rate": 1.9602777734234168e-05, + "loss": 0.4368, + "step": 3540 + }, + { + "epoch": 0.17, + "learning_rate": 1.960199394917938e-05, + "loss": 0.6238, + "step": 3545 + }, + { + "epoch": 0.17, + "learning_rate": 1.9601210164124592e-05, + "loss": 0.868, + "step": 3550 + }, + { + "epoch": 0.17, + "learning_rate": 1.9600426379069806e-05, + "loss": 0.439, + "step": 3555 + }, + { + "epoch": 0.17, + "learning_rate": 1.9599642594015017e-05, + "loss": 0.1922, + "step": 3560 + }, + { + "epoch": 0.17, + "learning_rate": 1.9598858808960234e-05, + "loss": 0.2233, + "step": 3565 + }, + { + "epoch": 0.17, + "learning_rate": 1.9598075023905445e-05, + "loss": 0.3184, + "step": 3570 + }, + { + "epoch": 0.17, + "learning_rate": 1.959729123885066e-05, + "loss": 0.3508, + "step": 3575 + }, + { + "epoch": 0.17, + "learning_rate": 1.9596507453795872e-05, + "loss": 0.2921, + "step": 3580 + }, + { + "epoch": 0.17, + "learning_rate": 1.9595723668741086e-05, + "loss": 0.3405, + "step": 3585 + }, + { + "epoch": 0.17, + "learning_rate": 1.95949398836863e-05, + "loss": 0.5112, + "step": 3590 + }, + { + "epoch": 0.17, + "learning_rate": 1.9594156098631514e-05, + "loss": 0.4812, + "step": 3595 + }, + { + "epoch": 0.17, + "learning_rate": 1.9593372313576725e-05, + "loss": 0.8224, + "step": 3600 + }, + { + "epoch": 0.17, + "learning_rate": 1.9592588528521942e-05, + "loss": 0.4547, + "step": 3605 + }, + { + "epoch": 0.17, + "learning_rate": 1.9591804743467152e-05, + "loss": 0.1878, + "step": 3610 + }, + { + "epoch": 0.17, + "learning_rate": 1.9591020958412366e-05, + "loss": 0.2636, + "step": 3615 + }, + { + "epoch": 0.17, + "learning_rate": 1.959023717335758e-05, + "loss": 0.2779, + "step": 3620 + }, + { + "epoch": 0.17, + "learning_rate": 1.9589453388302794e-05, + "loss": 0.2307, + "step": 3625 + }, + { + "epoch": 0.17, + "learning_rate": 1.9588669603248008e-05, + "loss": 0.3005, + "step": 3630 + }, + { + "epoch": 0.17, + "learning_rate": 1.958788581819322e-05, + "loss": 0.3427, + "step": 3635 + }, + { + "epoch": 0.17, + "learning_rate": 1.9587102033138436e-05, + "loss": 0.4767, + "step": 3640 + }, + { + "epoch": 0.17, + "learning_rate": 1.9586318248083646e-05, + "loss": 0.4229, + "step": 3645 + }, + { + "epoch": 0.17, + "learning_rate": 1.958553446302886e-05, + "loss": 0.8545, + "step": 3650 + }, + { + "epoch": 0.17, + "learning_rate": 1.9584750677974074e-05, + "loss": 0.4065, + "step": 3655 + }, + { + "epoch": 0.17, + "learning_rate": 1.9583966892919288e-05, + "loss": 0.1509, + "step": 3660 + }, + { + "epoch": 0.17, + "learning_rate": 1.9583183107864502e-05, + "loss": 0.2052, + "step": 3665 + }, + { + "epoch": 0.17, + "learning_rate": 1.9582399322809716e-05, + "loss": 0.2437, + "step": 3670 + }, + { + "epoch": 0.17, + "learning_rate": 1.9581615537754926e-05, + "loss": 0.3728, + "step": 3675 + }, + { + "epoch": 0.17, + "learning_rate": 1.958083175270014e-05, + "loss": 0.3682, + "step": 3680 + }, + { + "epoch": 0.17, + "learning_rate": 1.9580047967645354e-05, + "loss": 0.4861, + "step": 3685 + }, + { + "epoch": 0.17, + "learning_rate": 1.9579264182590568e-05, + "loss": 0.4066, + "step": 3690 + }, + { + "epoch": 0.17, + "learning_rate": 1.9578480397535782e-05, + "loss": 0.45, + "step": 3695 + }, + { + "epoch": 0.17, + "learning_rate": 1.9577696612480993e-05, + "loss": 0.7415, + "step": 3700 + }, + { + "epoch": 0.17, + "learning_rate": 1.957691282742621e-05, + "loss": 0.4888, + "step": 3705 + }, + { + "epoch": 0.17, + "learning_rate": 1.957612904237142e-05, + "loss": 0.2174, + "step": 3710 + }, + { + "epoch": 0.17, + "learning_rate": 1.9575345257316634e-05, + "loss": 0.2577, + "step": 3715 + }, + { + "epoch": 0.17, + "learning_rate": 1.9574561472261848e-05, + "loss": 0.2525, + "step": 3720 + }, + { + "epoch": 0.17, + "learning_rate": 1.9573777687207062e-05, + "loss": 0.3246, + "step": 3725 + }, + { + "epoch": 0.17, + "learning_rate": 1.9572993902152276e-05, + "loss": 0.3091, + "step": 3730 + }, + { + "epoch": 0.17, + "learning_rate": 1.957221011709749e-05, + "loss": 0.3636, + "step": 3735 + }, + { + "epoch": 0.17, + "learning_rate": 1.9571426332042704e-05, + "loss": 0.4908, + "step": 3740 + }, + { + "epoch": 0.17, + "learning_rate": 1.9570642546987914e-05, + "loss": 0.444, + "step": 3745 + }, + { + "epoch": 0.17, + "learning_rate": 1.9569858761933128e-05, + "loss": 0.6554, + "step": 3750 + }, + { + "epoch": 0.18, + "learning_rate": 1.9569074976878342e-05, + "loss": 0.3102, + "step": 3755 + }, + { + "epoch": 0.18, + "learning_rate": 1.9568291191823556e-05, + "loss": 0.2166, + "step": 3760 + }, + { + "epoch": 0.18, + "learning_rate": 1.956750740676877e-05, + "loss": 0.2067, + "step": 3765 + }, + { + "epoch": 0.18, + "learning_rate": 1.9566723621713984e-05, + "loss": 0.2251, + "step": 3770 + }, + { + "epoch": 0.18, + "learning_rate": 1.9565939836659194e-05, + "loss": 0.3112, + "step": 3775 + }, + { + "epoch": 0.18, + "learning_rate": 1.956515605160441e-05, + "loss": 0.3403, + "step": 3780 + }, + { + "epoch": 0.18, + "learning_rate": 1.9564372266549622e-05, + "loss": 0.3457, + "step": 3785 + }, + { + "epoch": 0.18, + "learning_rate": 1.9563588481494836e-05, + "loss": 0.5348, + "step": 3790 + }, + { + "epoch": 0.18, + "learning_rate": 1.956280469644005e-05, + "loss": 0.5904, + "step": 3795 + }, + { + "epoch": 0.18, + "learning_rate": 1.9562020911385264e-05, + "loss": 0.8875, + "step": 3800 + }, + { + "epoch": 0.18, + "learning_rate": 1.9561237126330478e-05, + "loss": 0.3938, + "step": 3805 + }, + { + "epoch": 0.18, + "learning_rate": 1.9560453341275688e-05, + "loss": 0.2407, + "step": 3810 + }, + { + "epoch": 0.18, + "learning_rate": 1.9559669556220902e-05, + "loss": 0.2626, + "step": 3815 + }, + { + "epoch": 0.18, + "learning_rate": 1.9558885771166116e-05, + "loss": 0.2197, + "step": 3820 + }, + { + "epoch": 0.18, + "learning_rate": 1.955810198611133e-05, + "loss": 0.3859, + "step": 3825 + }, + { + "epoch": 0.18, + "learning_rate": 1.9557318201056544e-05, + "loss": 0.3984, + "step": 3830 + }, + { + "epoch": 0.18, + "learning_rate": 1.9556534416001758e-05, + "loss": 0.2618, + "step": 3835 + }, + { + "epoch": 0.18, + "learning_rate": 1.9555750630946972e-05, + "loss": 0.607, + "step": 3840 + }, + { + "epoch": 0.18, + "learning_rate": 1.9554966845892186e-05, + "loss": 0.6173, + "step": 3845 + }, + { + "epoch": 0.18, + "learning_rate": 1.9554183060837396e-05, + "loss": 0.8174, + "step": 3850 + }, + { + "epoch": 0.18, + "learning_rate": 1.9553399275782613e-05, + "loss": 0.3763, + "step": 3855 + }, + { + "epoch": 0.18, + "learning_rate": 1.9552615490727824e-05, + "loss": 0.232, + "step": 3860 + }, + { + "epoch": 0.18, + "learning_rate": 1.9551831705673038e-05, + "loss": 0.1597, + "step": 3865 + }, + { + "epoch": 0.18, + "learning_rate": 1.9551047920618252e-05, + "loss": 0.2861, + "step": 3870 + }, + { + "epoch": 0.18, + "learning_rate": 1.9550264135563462e-05, + "loss": 0.2547, + "step": 3875 + }, + { + "epoch": 0.18, + "learning_rate": 1.954948035050868e-05, + "loss": 0.3221, + "step": 3880 + }, + { + "epoch": 0.18, + "learning_rate": 1.954869656545389e-05, + "loss": 0.3712, + "step": 3885 + }, + { + "epoch": 0.18, + "learning_rate": 1.9547912780399104e-05, + "loss": 0.5022, + "step": 3890 + }, + { + "epoch": 0.18, + "learning_rate": 1.9547128995344318e-05, + "loss": 0.5154, + "step": 3895 + }, + { + "epoch": 0.18, + "learning_rate": 1.9546345210289532e-05, + "loss": 0.8972, + "step": 3900 + }, + { + "epoch": 0.18, + "learning_rate": 1.9545561425234746e-05, + "loss": 0.3731, + "step": 3905 + }, + { + "epoch": 0.18, + "learning_rate": 1.954477764017996e-05, + "loss": 0.1438, + "step": 3910 + }, + { + "epoch": 0.18, + "learning_rate": 1.954399385512517e-05, + "loss": 0.3258, + "step": 3915 + }, + { + "epoch": 0.18, + "learning_rate": 1.9543210070070387e-05, + "loss": 0.2788, + "step": 3920 + }, + { + "epoch": 0.18, + "learning_rate": 1.9542426285015598e-05, + "loss": 0.269, + "step": 3925 + }, + { + "epoch": 0.18, + "learning_rate": 1.9541642499960812e-05, + "loss": 0.3346, + "step": 3930 + }, + { + "epoch": 0.18, + "learning_rate": 1.9540858714906026e-05, + "loss": 0.401, + "step": 3935 + }, + { + "epoch": 0.18, + "learning_rate": 1.954007492985124e-05, + "loss": 0.3484, + "step": 3940 + }, + { + "epoch": 0.18, + "learning_rate": 1.9539291144796454e-05, + "loss": 0.5295, + "step": 3945 + }, + { + "epoch": 0.18, + "learning_rate": 1.9538507359741664e-05, + "loss": 0.7834, + "step": 3950 + }, + { + "epoch": 0.18, + "learning_rate": 1.953772357468688e-05, + "loss": 0.3469, + "step": 3955 + }, + { + "epoch": 0.18, + "learning_rate": 1.9536939789632092e-05, + "loss": 0.3008, + "step": 3960 + }, + { + "epoch": 0.19, + "learning_rate": 1.9536156004577306e-05, + "loss": 0.2192, + "step": 3965 + }, + { + "epoch": 0.19, + "learning_rate": 1.953537221952252e-05, + "loss": 0.2714, + "step": 3970 + }, + { + "epoch": 0.19, + "learning_rate": 1.9534588434467734e-05, + "loss": 0.236, + "step": 3975 + }, + { + "epoch": 0.19, + "learning_rate": 1.9533804649412948e-05, + "loss": 0.3313, + "step": 3980 + }, + { + "epoch": 0.19, + "learning_rate": 1.953302086435816e-05, + "loss": 0.4521, + "step": 3985 + }, + { + "epoch": 0.19, + "learning_rate": 1.9532237079303372e-05, + "loss": 0.6107, + "step": 3990 + }, + { + "epoch": 0.19, + "learning_rate": 1.953145329424859e-05, + "loss": 0.6532, + "step": 3995 + }, + { + "epoch": 0.19, + "learning_rate": 1.95306695091938e-05, + "loss": 0.7622, + "step": 4000 + }, + { + "epoch": 0.19, + "learning_rate": 1.9529885724139014e-05, + "loss": 0.3901, + "step": 4005 + }, + { + "epoch": 0.19, + "learning_rate": 1.9529101939084228e-05, + "loss": 0.2575, + "step": 4010 + }, + { + "epoch": 0.19, + "learning_rate": 1.9528318154029438e-05, + "loss": 0.1799, + "step": 4015 + }, + { + "epoch": 0.19, + "learning_rate": 1.9527534368974655e-05, + "loss": 0.2706, + "step": 4020 + }, + { + "epoch": 0.19, + "learning_rate": 1.9526750583919866e-05, + "loss": 0.333, + "step": 4025 + }, + { + "epoch": 0.19, + "learning_rate": 1.952596679886508e-05, + "loss": 0.341, + "step": 4030 + }, + { + "epoch": 0.19, + "learning_rate": 1.9525183013810294e-05, + "loss": 0.3399, + "step": 4035 + }, + { + "epoch": 0.19, + "learning_rate": 1.9524399228755508e-05, + "loss": 0.4147, + "step": 4040 + }, + { + "epoch": 0.19, + "learning_rate": 1.952361544370072e-05, + "loss": 0.4624, + "step": 4045 + }, + { + "epoch": 0.19, + "learning_rate": 1.9522831658645935e-05, + "loss": 0.7946, + "step": 4050 + }, + { + "epoch": 0.19, + "learning_rate": 1.952204787359115e-05, + "loss": 0.3148, + "step": 4055 + }, + { + "epoch": 0.19, + "learning_rate": 1.9521264088536363e-05, + "loss": 0.2504, + "step": 4060 + }, + { + "epoch": 0.19, + "learning_rate": 1.9520480303481574e-05, + "loss": 0.1754, + "step": 4065 + }, + { + "epoch": 0.19, + "learning_rate": 1.9519696518426788e-05, + "loss": 0.2607, + "step": 4070 + }, + { + "epoch": 0.19, + "learning_rate": 1.9518912733372e-05, + "loss": 0.3272, + "step": 4075 + }, + { + "epoch": 0.19, + "learning_rate": 1.9518128948317215e-05, + "loss": 0.426, + "step": 4080 + }, + { + "epoch": 0.19, + "learning_rate": 1.951734516326243e-05, + "loss": 0.3973, + "step": 4085 + }, + { + "epoch": 0.19, + "learning_rate": 1.951656137820764e-05, + "loss": 0.418, + "step": 4090 + }, + { + "epoch": 0.19, + "learning_rate": 1.9515777593152857e-05, + "loss": 0.5203, + "step": 4095 + }, + { + "epoch": 0.19, + "learning_rate": 1.9514993808098068e-05, + "loss": 0.9427, + "step": 4100 + }, + { + "epoch": 0.19, + "learning_rate": 1.951421002304328e-05, + "loss": 0.3731, + "step": 4105 + }, + { + "epoch": 0.19, + "learning_rate": 1.9513426237988496e-05, + "loss": 0.1844, + "step": 4110 + }, + { + "epoch": 0.19, + "learning_rate": 1.951264245293371e-05, + "loss": 0.2464, + "step": 4115 + }, + { + "epoch": 0.19, + "learning_rate": 1.9511858667878923e-05, + "loss": 0.1807, + "step": 4120 + }, + { + "epoch": 0.19, + "learning_rate": 1.9511074882824137e-05, + "loss": 0.3148, + "step": 4125 + }, + { + "epoch": 0.19, + "learning_rate": 1.9510291097769348e-05, + "loss": 0.2822, + "step": 4130 + }, + { + "epoch": 0.19, + "learning_rate": 1.950950731271456e-05, + "loss": 0.2875, + "step": 4135 + }, + { + "epoch": 0.19, + "learning_rate": 1.9508723527659776e-05, + "loss": 0.432, + "step": 4140 + }, + { + "epoch": 0.19, + "learning_rate": 1.950793974260499e-05, + "loss": 0.6259, + "step": 4145 + }, + { + "epoch": 0.19, + "learning_rate": 1.9507155957550203e-05, + "loss": 0.7135, + "step": 4150 + }, + { + "epoch": 0.19, + "learning_rate": 1.9506372172495417e-05, + "loss": 0.3204, + "step": 4155 + }, + { + "epoch": 0.19, + "learning_rate": 1.950558838744063e-05, + "loss": 0.1675, + "step": 4160 + }, + { + "epoch": 0.19, + "learning_rate": 1.950480460238584e-05, + "loss": 0.1806, + "step": 4165 + }, + { + "epoch": 0.19, + "learning_rate": 1.950402081733106e-05, + "loss": 0.299, + "step": 4170 + }, + { + "epoch": 0.19, + "learning_rate": 1.950323703227627e-05, + "loss": 0.2398, + "step": 4175 + }, + { + "epoch": 0.2, + "learning_rate": 1.9502453247221483e-05, + "loss": 0.3808, + "step": 4180 + }, + { + "epoch": 0.2, + "learning_rate": 1.9501669462166697e-05, + "loss": 0.4253, + "step": 4185 + }, + { + "epoch": 0.2, + "learning_rate": 1.950088567711191e-05, + "loss": 0.4762, + "step": 4190 + }, + { + "epoch": 0.2, + "learning_rate": 1.9500101892057125e-05, + "loss": 0.5255, + "step": 4195 + }, + { + "epoch": 0.2, + "learning_rate": 1.9499318107002336e-05, + "loss": 0.8221, + "step": 4200 + }, + { + "epoch": 0.2, + "learning_rate": 1.949853432194755e-05, + "loss": 0.416, + "step": 4205 + }, + { + "epoch": 0.2, + "learning_rate": 1.9497750536892763e-05, + "loss": 0.6598, + "step": 4210 + }, + { + "epoch": 0.2, + "learning_rate": 1.9496966751837977e-05, + "loss": 0.1326, + "step": 4215 + }, + { + "epoch": 0.2, + "learning_rate": 1.949618296678319e-05, + "loss": 0.215, + "step": 4220 + }, + { + "epoch": 0.2, + "learning_rate": 1.9495399181728405e-05, + "loss": 0.3046, + "step": 4225 + }, + { + "epoch": 0.2, + "learning_rate": 1.9494615396673616e-05, + "loss": 0.3458, + "step": 4230 + }, + { + "epoch": 0.2, + "learning_rate": 1.9493831611618833e-05, + "loss": 0.3501, + "step": 4235 + }, + { + "epoch": 0.2, + "learning_rate": 1.9493047826564044e-05, + "loss": 0.5032, + "step": 4240 + }, + { + "epoch": 0.2, + "learning_rate": 1.9492264041509257e-05, + "loss": 0.6051, + "step": 4245 + }, + { + "epoch": 0.2, + "learning_rate": 1.949148025645447e-05, + "loss": 0.8645, + "step": 4250 + }, + { + "epoch": 0.2, + "learning_rate": 1.9490696471399685e-05, + "loss": 0.3597, + "step": 4255 + }, + { + "epoch": 0.2, + "learning_rate": 1.94899126863449e-05, + "loss": 0.2018, + "step": 4260 + }, + { + "epoch": 0.2, + "learning_rate": 1.948912890129011e-05, + "loss": 0.2225, + "step": 4265 + }, + { + "epoch": 0.2, + "learning_rate": 1.9488345116235327e-05, + "loss": 0.3058, + "step": 4270 + }, + { + "epoch": 0.2, + "learning_rate": 1.9487561331180537e-05, + "loss": 0.367, + "step": 4275 + }, + { + "epoch": 0.2, + "learning_rate": 1.948677754612575e-05, + "loss": 0.3147, + "step": 4280 + }, + { + "epoch": 0.2, + "learning_rate": 1.9485993761070965e-05, + "loss": 0.3372, + "step": 4285 + }, + { + "epoch": 0.2, + "learning_rate": 1.948520997601618e-05, + "loss": 0.3279, + "step": 4290 + }, + { + "epoch": 0.2, + "learning_rate": 1.9484426190961393e-05, + "loss": 0.4893, + "step": 4295 + }, + { + "epoch": 0.2, + "learning_rate": 1.9483642405906607e-05, + "loss": 0.8027, + "step": 4300 + }, + { + "epoch": 0.2, + "learning_rate": 1.9482858620851818e-05, + "loss": 0.4597, + "step": 4305 + }, + { + "epoch": 0.2, + "learning_rate": 1.9482074835797035e-05, + "loss": 0.1776, + "step": 4310 + }, + { + "epoch": 0.2, + "learning_rate": 1.9481291050742245e-05, + "loss": 0.2143, + "step": 4315 + }, + { + "epoch": 0.2, + "learning_rate": 1.948050726568746e-05, + "loss": 0.2351, + "step": 4320 + }, + { + "epoch": 0.2, + "learning_rate": 1.9479723480632673e-05, + "loss": 0.3237, + "step": 4325 + }, + { + "epoch": 0.2, + "learning_rate": 1.9478939695577884e-05, + "loss": 0.3484, + "step": 4330 + }, + { + "epoch": 0.2, + "learning_rate": 1.94781559105231e-05, + "loss": 0.3139, + "step": 4335 + }, + { + "epoch": 0.2, + "learning_rate": 1.947737212546831e-05, + "loss": 0.4198, + "step": 4340 + }, + { + "epoch": 0.2, + "learning_rate": 1.9476588340413525e-05, + "loss": 0.48, + "step": 4345 + }, + { + "epoch": 0.2, + "learning_rate": 1.947580455535874e-05, + "loss": 0.7149, + "step": 4350 + }, + { + "epoch": 0.2, + "learning_rate": 1.9475020770303953e-05, + "loss": 0.4256, + "step": 4355 + }, + { + "epoch": 0.2, + "learning_rate": 1.9474236985249167e-05, + "loss": 0.2352, + "step": 4360 + }, + { + "epoch": 0.2, + "learning_rate": 1.947345320019438e-05, + "loss": 0.197, + "step": 4365 + }, + { + "epoch": 0.2, + "learning_rate": 1.9472669415139595e-05, + "loss": 0.313, + "step": 4370 + }, + { + "epoch": 0.2, + "learning_rate": 1.947188563008481e-05, + "loss": 0.2447, + "step": 4375 + }, + { + "epoch": 0.2, + "learning_rate": 1.947110184503002e-05, + "loss": 0.2627, + "step": 4380 + }, + { + "epoch": 0.2, + "learning_rate": 1.9470318059975237e-05, + "loss": 0.2974, + "step": 4385 + }, + { + "epoch": 0.2, + "learning_rate": 1.9469534274920447e-05, + "loss": 0.4315, + "step": 4390 + }, + { + "epoch": 0.21, + "learning_rate": 1.946875048986566e-05, + "loss": 0.6057, + "step": 4395 + }, + { + "epoch": 0.21, + "learning_rate": 1.9467966704810875e-05, + "loss": 0.7785, + "step": 4400 + }, + { + "epoch": 0.21, + "learning_rate": 1.9467182919756085e-05, + "loss": 0.426, + "step": 4405 + }, + { + "epoch": 0.21, + "learning_rate": 1.9466399134701303e-05, + "loss": 0.3034, + "step": 4410 + }, + { + "epoch": 0.21, + "learning_rate": 1.9465615349646513e-05, + "loss": 0.1557, + "step": 4415 + }, + { + "epoch": 0.21, + "learning_rate": 1.9464831564591727e-05, + "loss": 0.2953, + "step": 4420 + }, + { + "epoch": 0.21, + "learning_rate": 1.946404777953694e-05, + "loss": 0.4292, + "step": 4425 + }, + { + "epoch": 0.21, + "learning_rate": 1.9463263994482155e-05, + "loss": 0.468, + "step": 4430 + }, + { + "epoch": 0.21, + "learning_rate": 1.946248020942737e-05, + "loss": 0.3723, + "step": 4435 + }, + { + "epoch": 0.21, + "learning_rate": 1.9461696424372583e-05, + "loss": 0.3748, + "step": 4440 + }, + { + "epoch": 0.21, + "learning_rate": 1.9460912639317793e-05, + "loss": 0.3476, + "step": 4445 + }, + { + "epoch": 0.21, + "learning_rate": 1.946012885426301e-05, + "loss": 0.7008, + "step": 4450 + }, + { + "epoch": 0.21, + "learning_rate": 1.945934506920822e-05, + "loss": 0.3942, + "step": 4455 + }, + { + "epoch": 0.21, + "learning_rate": 1.9458561284153435e-05, + "loss": 0.1434, + "step": 4460 + }, + { + "epoch": 0.21, + "learning_rate": 1.945777749909865e-05, + "loss": 0.1975, + "step": 4465 + }, + { + "epoch": 0.21, + "learning_rate": 1.9456993714043863e-05, + "loss": 0.3374, + "step": 4470 + }, + { + "epoch": 0.21, + "learning_rate": 1.9456209928989077e-05, + "loss": 0.2609, + "step": 4475 + }, + { + "epoch": 0.21, + "learning_rate": 1.9455426143934287e-05, + "loss": 0.2869, + "step": 4480 + }, + { + "epoch": 0.21, + "learning_rate": 1.9454642358879505e-05, + "loss": 0.4011, + "step": 4485 + }, + { + "epoch": 0.21, + "learning_rate": 1.9453858573824715e-05, + "loss": 0.4172, + "step": 4490 + }, + { + "epoch": 0.21, + "learning_rate": 1.945307478876993e-05, + "loss": 0.5234, + "step": 4495 + }, + { + "epoch": 0.21, + "learning_rate": 1.9452291003715143e-05, + "loss": 0.7829, + "step": 4500 + }, + { + "epoch": 0.21, + "learning_rate": 1.9451507218660357e-05, + "loss": 0.3145, + "step": 4505 + }, + { + "epoch": 0.21, + "learning_rate": 1.945072343360557e-05, + "loss": 0.1722, + "step": 4510 + }, + { + "epoch": 0.21, + "learning_rate": 1.9449939648550785e-05, + "loss": 0.2948, + "step": 4515 + }, + { + "epoch": 0.21, + "learning_rate": 1.9449155863495995e-05, + "loss": 0.2227, + "step": 4520 + }, + { + "epoch": 0.21, + "learning_rate": 1.944837207844121e-05, + "loss": 0.2552, + "step": 4525 + }, + { + "epoch": 0.21, + "learning_rate": 1.9447588293386423e-05, + "loss": 0.3306, + "step": 4530 + }, + { + "epoch": 0.21, + "learning_rate": 1.9446804508331637e-05, + "loss": 0.2957, + "step": 4535 + }, + { + "epoch": 0.21, + "learning_rate": 1.944602072327685e-05, + "loss": 0.5235, + "step": 4540 + }, + { + "epoch": 0.21, + "learning_rate": 1.944523693822206e-05, + "loss": 0.5366, + "step": 4545 + }, + { + "epoch": 0.21, + "learning_rate": 1.944445315316728e-05, + "loss": 0.753, + "step": 4550 + }, + { + "epoch": 0.21, + "learning_rate": 1.944366936811249e-05, + "loss": 0.2989, + "step": 4555 + }, + { + "epoch": 0.21, + "learning_rate": 1.9442885583057703e-05, + "loss": 0.1639, + "step": 4560 + }, + { + "epoch": 0.21, + "learning_rate": 1.9442101798002917e-05, + "loss": 0.1844, + "step": 4565 + }, + { + "epoch": 0.21, + "learning_rate": 1.944131801294813e-05, + "loss": 0.2972, + "step": 4570 + }, + { + "epoch": 0.21, + "learning_rate": 1.9440534227893345e-05, + "loss": 0.3515, + "step": 4575 + }, + { + "epoch": 0.21, + "learning_rate": 1.943975044283856e-05, + "loss": 0.2764, + "step": 4580 + }, + { + "epoch": 0.21, + "learning_rate": 1.9438966657783773e-05, + "loss": 0.4514, + "step": 4585 + }, + { + "epoch": 0.21, + "learning_rate": 1.9438182872728983e-05, + "loss": 0.3424, + "step": 4590 + }, + { + "epoch": 0.21, + "learning_rate": 1.9437399087674197e-05, + "loss": 0.539, + "step": 4595 + }, + { + "epoch": 0.21, + "learning_rate": 1.943661530261941e-05, + "loss": 0.8198, + "step": 4600 + }, + { + "epoch": 0.21, + "learning_rate": 1.9435831517564625e-05, + "loss": 0.3306, + "step": 4605 + }, + { + "epoch": 0.22, + "learning_rate": 1.943504773250984e-05, + "loss": 0.1788, + "step": 4610 + }, + { + "epoch": 0.22, + "learning_rate": 1.9434263947455053e-05, + "loss": 0.2419, + "step": 4615 + }, + { + "epoch": 0.22, + "learning_rate": 1.9433480162400263e-05, + "loss": 0.3073, + "step": 4620 + }, + { + "epoch": 0.22, + "learning_rate": 1.943269637734548e-05, + "loss": 0.1982, + "step": 4625 + }, + { + "epoch": 0.22, + "learning_rate": 1.943191259229069e-05, + "loss": 0.3853, + "step": 4630 + }, + { + "epoch": 0.22, + "learning_rate": 1.9431128807235905e-05, + "loss": 0.3433, + "step": 4635 + }, + { + "epoch": 0.22, + "learning_rate": 1.943034502218112e-05, + "loss": 0.373, + "step": 4640 + }, + { + "epoch": 0.22, + "learning_rate": 1.9429561237126333e-05, + "loss": 0.4099, + "step": 4645 + }, + { + "epoch": 0.22, + "learning_rate": 1.9428777452071547e-05, + "loss": 0.7581, + "step": 4650 + }, + { + "epoch": 0.22, + "learning_rate": 1.9427993667016757e-05, + "loss": 0.3001, + "step": 4655 + }, + { + "epoch": 0.22, + "learning_rate": 1.942720988196197e-05, + "loss": 0.1235, + "step": 4660 + }, + { + "epoch": 0.22, + "learning_rate": 1.9426426096907185e-05, + "loss": 0.1975, + "step": 4665 + }, + { + "epoch": 0.22, + "learning_rate": 1.94256423118524e-05, + "loss": 0.2757, + "step": 4670 + }, + { + "epoch": 0.22, + "learning_rate": 1.9424858526797613e-05, + "loss": 0.2863, + "step": 4675 + }, + { + "epoch": 0.22, + "learning_rate": 1.9424074741742827e-05, + "loss": 0.3228, + "step": 4680 + }, + { + "epoch": 0.22, + "learning_rate": 1.942329095668804e-05, + "loss": 0.3734, + "step": 4685 + }, + { + "epoch": 0.22, + "learning_rate": 1.9422507171633254e-05, + "loss": 0.4365, + "step": 4690 + }, + { + "epoch": 0.22, + "learning_rate": 1.9421723386578465e-05, + "loss": 0.4877, + "step": 4695 + }, + { + "epoch": 0.22, + "learning_rate": 1.9420939601523682e-05, + "loss": 0.7756, + "step": 4700 + }, + { + "epoch": 0.22, + "learning_rate": 1.9420155816468893e-05, + "loss": 0.338, + "step": 4705 + }, + { + "epoch": 0.22, + "learning_rate": 1.9419372031414107e-05, + "loss": 0.1238, + "step": 4710 + }, + { + "epoch": 0.22, + "learning_rate": 1.941858824635932e-05, + "loss": 0.2242, + "step": 4715 + }, + { + "epoch": 0.22, + "learning_rate": 1.941780446130453e-05, + "loss": 0.2178, + "step": 4720 + }, + { + "epoch": 0.22, + "learning_rate": 1.941702067624975e-05, + "loss": 0.27, + "step": 4725 + }, + { + "epoch": 0.22, + "learning_rate": 1.941623689119496e-05, + "loss": 0.3249, + "step": 4730 + }, + { + "epoch": 0.22, + "learning_rate": 1.9415453106140173e-05, + "loss": 0.3082, + "step": 4735 + }, + { + "epoch": 0.22, + "learning_rate": 1.9414669321085387e-05, + "loss": 0.3891, + "step": 4740 + }, + { + "epoch": 0.22, + "learning_rate": 1.94138855360306e-05, + "loss": 0.525, + "step": 4745 + }, + { + "epoch": 0.22, + "learning_rate": 1.9413101750975814e-05, + "loss": 0.6924, + "step": 4750 + }, + { + "epoch": 0.22, + "learning_rate": 1.941231796592103e-05, + "loss": 0.4089, + "step": 4755 + }, + { + "epoch": 0.22, + "learning_rate": 1.941153418086624e-05, + "loss": 0.1315, + "step": 4760 + }, + { + "epoch": 0.22, + "learning_rate": 1.9410750395811456e-05, + "loss": 0.1435, + "step": 4765 + }, + { + "epoch": 0.22, + "learning_rate": 1.9409966610756667e-05, + "loss": 0.2198, + "step": 4770 + }, + { + "epoch": 0.22, + "learning_rate": 1.940918282570188e-05, + "loss": 0.2413, + "step": 4775 + }, + { + "epoch": 0.22, + "learning_rate": 1.9408399040647095e-05, + "loss": 0.3171, + "step": 4780 + }, + { + "epoch": 0.22, + "learning_rate": 1.940761525559231e-05, + "loss": 0.2903, + "step": 4785 + }, + { + "epoch": 0.22, + "learning_rate": 1.9406831470537522e-05, + "loss": 0.3473, + "step": 4790 + }, + { + "epoch": 0.22, + "learning_rate": 1.9406047685482733e-05, + "loss": 0.5104, + "step": 4795 + }, + { + "epoch": 0.22, + "learning_rate": 1.940526390042795e-05, + "loss": 0.7117, + "step": 4800 + }, + { + "epoch": 0.22, + "learning_rate": 1.940448011537316e-05, + "loss": 0.3581, + "step": 4805 + }, + { + "epoch": 0.22, + "learning_rate": 1.9403696330318375e-05, + "loss": 0.2153, + "step": 4810 + }, + { + "epoch": 0.22, + "learning_rate": 1.940291254526359e-05, + "loss": 0.2117, + "step": 4815 + }, + { + "epoch": 0.22, + "learning_rate": 1.9402128760208802e-05, + "loss": 0.2479, + "step": 4820 + }, + { + "epoch": 0.23, + "learning_rate": 1.9401344975154016e-05, + "loss": 0.3125, + "step": 4825 + }, + { + "epoch": 0.23, + "learning_rate": 1.940056119009923e-05, + "loss": 0.3053, + "step": 4830 + }, + { + "epoch": 0.23, + "learning_rate": 1.939977740504444e-05, + "loss": 0.3783, + "step": 4835 + }, + { + "epoch": 0.23, + "learning_rate": 1.9398993619989658e-05, + "loss": 0.4291, + "step": 4840 + }, + { + "epoch": 0.23, + "learning_rate": 1.939820983493487e-05, + "loss": 0.3458, + "step": 4845 + }, + { + "epoch": 0.23, + "learning_rate": 1.9397426049880082e-05, + "loss": 0.7172, + "step": 4850 + }, + { + "epoch": 0.23, + "learning_rate": 1.9396642264825296e-05, + "loss": 0.4026, + "step": 4855 + }, + { + "epoch": 0.23, + "learning_rate": 1.9395858479770507e-05, + "loss": 0.1863, + "step": 4860 + }, + { + "epoch": 0.23, + "learning_rate": 1.9395074694715724e-05, + "loss": 0.208, + "step": 4865 + }, + { + "epoch": 0.23, + "learning_rate": 1.9394290909660935e-05, + "loss": 0.2116, + "step": 4870 + }, + { + "epoch": 0.23, + "learning_rate": 1.939350712460615e-05, + "loss": 0.2817, + "step": 4875 + }, + { + "epoch": 0.23, + "learning_rate": 1.9392723339551362e-05, + "loss": 0.3429, + "step": 4880 + }, + { + "epoch": 0.23, + "learning_rate": 1.9391939554496576e-05, + "loss": 0.4071, + "step": 4885 + }, + { + "epoch": 0.23, + "learning_rate": 1.939115576944179e-05, + "loss": 0.4191, + "step": 4890 + }, + { + "epoch": 0.23, + "learning_rate": 1.9390371984387004e-05, + "loss": 0.5109, + "step": 4895 + }, + { + "epoch": 0.23, + "learning_rate": 1.9389588199332218e-05, + "loss": 0.7804, + "step": 4900 + }, + { + "epoch": 0.23, + "learning_rate": 1.9388804414277432e-05, + "loss": 0.3884, + "step": 4905 + }, + { + "epoch": 0.23, + "learning_rate": 1.9388020629222642e-05, + "loss": 0.1351, + "step": 4910 + }, + { + "epoch": 0.23, + "learning_rate": 1.9387236844167856e-05, + "loss": 0.2175, + "step": 4915 + }, + { + "epoch": 0.23, + "learning_rate": 1.938645305911307e-05, + "loss": 0.237, + "step": 4920 + }, + { + "epoch": 0.23, + "learning_rate": 1.9385669274058284e-05, + "loss": 0.3365, + "step": 4925 + }, + { + "epoch": 0.23, + "learning_rate": 1.9384885489003498e-05, + "loss": 0.3038, + "step": 4930 + }, + { + "epoch": 0.23, + "learning_rate": 1.938410170394871e-05, + "loss": 0.2486, + "step": 4935 + }, + { + "epoch": 0.23, + "learning_rate": 1.9383317918893926e-05, + "loss": 0.3903, + "step": 4940 + }, + { + "epoch": 0.23, + "learning_rate": 1.9382534133839136e-05, + "loss": 0.5263, + "step": 4945 + }, + { + "epoch": 0.23, + "learning_rate": 1.938175034878435e-05, + "loss": 0.6892, + "step": 4950 + }, + { + "epoch": 0.23, + "learning_rate": 1.9380966563729564e-05, + "loss": 0.439, + "step": 4955 + }, + { + "epoch": 0.23, + "learning_rate": 1.9380182778674778e-05, + "loss": 0.1741, + "step": 4960 + }, + { + "epoch": 0.23, + "learning_rate": 1.9379398993619992e-05, + "loss": 0.199, + "step": 4965 + }, + { + "epoch": 0.23, + "learning_rate": 1.9378615208565206e-05, + "loss": 0.246, + "step": 4970 + }, + { + "epoch": 0.23, + "learning_rate": 1.9377831423510416e-05, + "loss": 0.2149, + "step": 4975 + }, + { + "epoch": 0.23, + "learning_rate": 1.937704763845563e-05, + "loss": 0.3709, + "step": 4980 + }, + { + "epoch": 0.23, + "learning_rate": 1.9376263853400844e-05, + "loss": 0.3476, + "step": 4985 + }, + { + "epoch": 0.23, + "learning_rate": 1.9375480068346058e-05, + "loss": 0.41, + "step": 4990 + }, + { + "epoch": 0.23, + "learning_rate": 1.9374696283291272e-05, + "loss": 0.4483, + "step": 4995 + }, + { + "epoch": 0.23, + "learning_rate": 1.9373912498236486e-05, + "loss": 0.6013, + "step": 5000 + }, + { + "epoch": 0.23, + "learning_rate": 1.93731287131817e-05, + "loss": 0.3716, + "step": 5005 + }, + { + "epoch": 0.23, + "learning_rate": 1.937234492812691e-05, + "loss": 0.1351, + "step": 5010 + }, + { + "epoch": 0.23, + "learning_rate": 1.9371561143072128e-05, + "loss": 0.1643, + "step": 5015 + }, + { + "epoch": 0.23, + "learning_rate": 1.9370777358017338e-05, + "loss": 0.2067, + "step": 5020 + }, + { + "epoch": 0.23, + "learning_rate": 1.9369993572962552e-05, + "loss": 0.2506, + "step": 5025 + }, + { + "epoch": 0.23, + "learning_rate": 1.9369209787907766e-05, + "loss": 0.2104, + "step": 5030 + }, + { + "epoch": 0.23, + "learning_rate": 1.936842600285298e-05, + "loss": 0.2822, + "step": 5035 + }, + { + "epoch": 0.24, + "learning_rate": 1.9367642217798194e-05, + "loss": 0.3395, + "step": 5040 + }, + { + "epoch": 0.24, + "learning_rate": 1.9366858432743404e-05, + "loss": 0.5625, + "step": 5045 + }, + { + "epoch": 0.24, + "learning_rate": 1.936607464768862e-05, + "loss": 0.8041, + "step": 5050 + }, + { + "epoch": 0.24, + "learning_rate": 1.9365290862633832e-05, + "loss": 0.2843, + "step": 5055 + }, + { + "epoch": 0.24, + "learning_rate": 1.9364507077579046e-05, + "loss": 0.2228, + "step": 5060 + }, + { + "epoch": 0.24, + "learning_rate": 1.936372329252426e-05, + "loss": 0.2473, + "step": 5065 + }, + { + "epoch": 0.24, + "learning_rate": 1.9362939507469474e-05, + "loss": 0.2517, + "step": 5070 + }, + { + "epoch": 0.24, + "learning_rate": 1.9362155722414684e-05, + "loss": 0.6024, + "step": 5075 + }, + { + "epoch": 0.24, + "learning_rate": 1.9361371937359902e-05, + "loss": 0.4021, + "step": 5080 + }, + { + "epoch": 0.24, + "learning_rate": 1.9360588152305112e-05, + "loss": 0.4412, + "step": 5085 + }, + { + "epoch": 0.24, + "learning_rate": 1.9359804367250326e-05, + "loss": 0.4314, + "step": 5090 + }, + { + "epoch": 0.24, + "learning_rate": 1.935902058219554e-05, + "loss": 0.4803, + "step": 5095 + }, + { + "epoch": 0.24, + "learning_rate": 1.9358236797140754e-05, + "loss": 0.4986, + "step": 5100 + }, + { + "epoch": 0.24, + "learning_rate": 1.9357453012085968e-05, + "loss": 0.3644, + "step": 5105 + }, + { + "epoch": 0.24, + "learning_rate": 1.935666922703118e-05, + "loss": 0.2482, + "step": 5110 + }, + { + "epoch": 0.24, + "learning_rate": 1.9355885441976396e-05, + "loss": 0.1968, + "step": 5115 + }, + { + "epoch": 0.24, + "learning_rate": 1.9355101656921606e-05, + "loss": 0.2267, + "step": 5120 + }, + { + "epoch": 0.24, + "learning_rate": 1.935431787186682e-05, + "loss": 0.2209, + "step": 5125 + }, + { + "epoch": 0.24, + "learning_rate": 1.9353534086812034e-05, + "loss": 0.3899, + "step": 5130 + }, + { + "epoch": 0.24, + "learning_rate": 1.9352750301757248e-05, + "loss": 0.2757, + "step": 5135 + }, + { + "epoch": 0.24, + "learning_rate": 1.9351966516702462e-05, + "loss": 0.3713, + "step": 5140 + }, + { + "epoch": 0.24, + "learning_rate": 1.9351182731647676e-05, + "loss": 0.4712, + "step": 5145 + }, + { + "epoch": 0.24, + "learning_rate": 1.9350398946592886e-05, + "loss": 0.5116, + "step": 5150 + }, + { + "epoch": 0.24, + "learning_rate": 1.9349615161538104e-05, + "loss": 0.3821, + "step": 5155 + }, + { + "epoch": 0.24, + "learning_rate": 1.9348831376483314e-05, + "loss": 0.1595, + "step": 5160 + }, + { + "epoch": 0.24, + "learning_rate": 1.9348047591428528e-05, + "loss": 0.1296, + "step": 5165 + }, + { + "epoch": 0.24, + "learning_rate": 1.9347263806373742e-05, + "loss": 0.2284, + "step": 5170 + }, + { + "epoch": 0.24, + "learning_rate": 1.9346480021318952e-05, + "loss": 0.281, + "step": 5175 + }, + { + "epoch": 0.24, + "learning_rate": 1.934569623626417e-05, + "loss": 0.17, + "step": 5180 + }, + { + "epoch": 0.24, + "learning_rate": 1.934491245120938e-05, + "loss": 0.3331, + "step": 5185 + }, + { + "epoch": 0.24, + "learning_rate": 1.9344128666154594e-05, + "loss": 0.4022, + "step": 5190 + }, + { + "epoch": 0.24, + "learning_rate": 1.9343344881099808e-05, + "loss": 0.495, + "step": 5195 + }, + { + "epoch": 0.24, + "learning_rate": 1.9342561096045022e-05, + "loss": 0.8846, + "step": 5200 + }, + { + "epoch": 0.24, + "learning_rate": 1.9341777310990236e-05, + "loss": 0.3249, + "step": 5205 + }, + { + "epoch": 0.24, + "learning_rate": 1.934099352593545e-05, + "loss": 0.1457, + "step": 5210 + }, + { + "epoch": 0.24, + "learning_rate": 1.9340209740880664e-05, + "loss": 0.205, + "step": 5215 + }, + { + "epoch": 0.24, + "learning_rate": 1.9339425955825878e-05, + "loss": 0.2736, + "step": 5220 + }, + { + "epoch": 0.24, + "learning_rate": 1.9338642170771088e-05, + "loss": 0.2574, + "step": 5225 + }, + { + "epoch": 0.24, + "learning_rate": 1.9337858385716305e-05, + "loss": 0.2396, + "step": 5230 + }, + { + "epoch": 0.24, + "learning_rate": 1.9337074600661516e-05, + "loss": 0.3956, + "step": 5235 + }, + { + "epoch": 0.24, + "learning_rate": 1.933629081560673e-05, + "loss": 0.3899, + "step": 5240 + }, + { + "epoch": 0.24, + "learning_rate": 1.9335507030551944e-05, + "loss": 0.4603, + "step": 5245 + }, + { + "epoch": 0.24, + "learning_rate": 1.9334723245497154e-05, + "loss": 0.8332, + "step": 5250 + }, + { + "epoch": 0.25, + "learning_rate": 1.933393946044237e-05, + "loss": 0.3782, + "step": 5255 + }, + { + "epoch": 0.25, + "learning_rate": 1.9333155675387582e-05, + "loss": 0.1268, + "step": 5260 + }, + { + "epoch": 0.25, + "learning_rate": 1.9332371890332796e-05, + "loss": 0.2608, + "step": 5265 + }, + { + "epoch": 0.25, + "learning_rate": 1.933158810527801e-05, + "loss": 0.1392, + "step": 5270 + }, + { + "epoch": 0.25, + "learning_rate": 1.9330804320223224e-05, + "loss": 0.2242, + "step": 5275 + }, + { + "epoch": 0.25, + "learning_rate": 1.9330020535168438e-05, + "loss": 0.1908, + "step": 5280 + }, + { + "epoch": 0.25, + "learning_rate": 1.932923675011365e-05, + "loss": 0.2051, + "step": 5285 + }, + { + "epoch": 0.25, + "learning_rate": 1.9328452965058862e-05, + "loss": 0.5424, + "step": 5290 + }, + { + "epoch": 0.25, + "learning_rate": 1.932766918000408e-05, + "loss": 0.5415, + "step": 5295 + }, + { + "epoch": 0.25, + "learning_rate": 1.932688539494929e-05, + "loss": 0.6368, + "step": 5300 + }, + { + "epoch": 0.25, + "learning_rate": 1.9326101609894504e-05, + "loss": 0.4718, + "step": 5305 + }, + { + "epoch": 0.25, + "learning_rate": 1.9325317824839718e-05, + "loss": 0.1058, + "step": 5310 + }, + { + "epoch": 0.25, + "learning_rate": 1.932453403978493e-05, + "loss": 0.1814, + "step": 5315 + }, + { + "epoch": 0.25, + "learning_rate": 1.9323750254730146e-05, + "loss": 0.219, + "step": 5320 + }, + { + "epoch": 0.25, + "learning_rate": 1.9322966469675356e-05, + "loss": 0.2229, + "step": 5325 + }, + { + "epoch": 0.25, + "learning_rate": 1.9322182684620573e-05, + "loss": 0.3236, + "step": 5330 + }, + { + "epoch": 0.25, + "learning_rate": 1.9321398899565784e-05, + "loss": 0.2804, + "step": 5335 + }, + { + "epoch": 0.25, + "learning_rate": 1.9320615114510998e-05, + "loss": 0.3324, + "step": 5340 + }, + { + "epoch": 0.25, + "learning_rate": 1.931983132945621e-05, + "loss": 0.5195, + "step": 5345 + }, + { + "epoch": 0.25, + "learning_rate": 1.9319047544401426e-05, + "loss": 0.8297, + "step": 5350 + }, + { + "epoch": 0.25, + "learning_rate": 1.931826375934664e-05, + "loss": 0.3, + "step": 5355 + }, + { + "epoch": 0.25, + "learning_rate": 1.9317479974291853e-05, + "loss": 0.2239, + "step": 5360 + }, + { + "epoch": 0.25, + "learning_rate": 1.9316696189237064e-05, + "loss": 0.2179, + "step": 5365 + }, + { + "epoch": 0.25, + "learning_rate": 1.9315912404182278e-05, + "loss": 0.2458, + "step": 5370 + }, + { + "epoch": 0.25, + "learning_rate": 1.931512861912749e-05, + "loss": 0.2145, + "step": 5375 + }, + { + "epoch": 0.25, + "learning_rate": 1.9314344834072706e-05, + "loss": 0.1894, + "step": 5380 + }, + { + "epoch": 0.25, + "learning_rate": 1.931356104901792e-05, + "loss": 0.3247, + "step": 5385 + }, + { + "epoch": 0.25, + "learning_rate": 1.931277726396313e-05, + "loss": 0.4402, + "step": 5390 + }, + { + "epoch": 0.25, + "learning_rate": 1.9311993478908347e-05, + "loss": 0.5536, + "step": 5395 + }, + { + "epoch": 0.25, + "learning_rate": 1.9311209693853558e-05, + "loss": 0.7553, + "step": 5400 + }, + { + "epoch": 0.25, + "learning_rate": 1.9310425908798772e-05, + "loss": 0.345, + "step": 5405 + }, + { + "epoch": 0.25, + "learning_rate": 1.9309642123743986e-05, + "loss": 0.1617, + "step": 5410 + }, + { + "epoch": 0.25, + "learning_rate": 1.93088583386892e-05, + "loss": 0.2743, + "step": 5415 + }, + { + "epoch": 0.25, + "learning_rate": 1.9308074553634413e-05, + "loss": 0.2523, + "step": 5420 + }, + { + "epoch": 0.25, + "learning_rate": 1.9307290768579627e-05, + "loss": 0.2766, + "step": 5425 + }, + { + "epoch": 0.25, + "learning_rate": 1.930650698352484e-05, + "loss": 0.2555, + "step": 5430 + }, + { + "epoch": 0.25, + "learning_rate": 1.9305723198470052e-05, + "loss": 0.3894, + "step": 5435 + }, + { + "epoch": 0.25, + "learning_rate": 1.9304939413415266e-05, + "loss": 0.3815, + "step": 5440 + }, + { + "epoch": 0.25, + "learning_rate": 1.930415562836048e-05, + "loss": 0.5165, + "step": 5445 + }, + { + "epoch": 0.25, + "learning_rate": 1.9303371843305694e-05, + "loss": 0.8409, + "step": 5450 + }, + { + "epoch": 0.25, + "learning_rate": 1.9302588058250907e-05, + "loss": 0.3595, + "step": 5455 + }, + { + "epoch": 0.25, + "learning_rate": 1.930180427319612e-05, + "loss": 0.1876, + "step": 5460 + }, + { + "epoch": 0.26, + "learning_rate": 1.9301020488141332e-05, + "loss": 0.2553, + "step": 5465 + }, + { + "epoch": 0.26, + "learning_rate": 1.930023670308655e-05, + "loss": 0.3259, + "step": 5470 + }, + { + "epoch": 0.26, + "learning_rate": 1.929945291803176e-05, + "loss": 0.2538, + "step": 5475 + }, + { + "epoch": 0.26, + "learning_rate": 1.9298669132976974e-05, + "loss": 0.3327, + "step": 5480 + }, + { + "epoch": 0.26, + "learning_rate": 1.9297885347922187e-05, + "loss": 0.3046, + "step": 5485 + }, + { + "epoch": 0.26, + "learning_rate": 1.92971015628674e-05, + "loss": 0.4529, + "step": 5490 + }, + { + "epoch": 0.26, + "learning_rate": 1.9296317777812615e-05, + "loss": 0.4448, + "step": 5495 + }, + { + "epoch": 0.26, + "learning_rate": 1.9295533992757826e-05, + "loss": 0.6134, + "step": 5500 + }, + { + "epoch": 0.26, + "learning_rate": 1.929475020770304e-05, + "loss": 0.4037, + "step": 5505 + }, + { + "epoch": 0.26, + "learning_rate": 1.9293966422648254e-05, + "loss": 0.1626, + "step": 5510 + }, + { + "epoch": 0.26, + "learning_rate": 1.9293182637593467e-05, + "loss": 0.1955, + "step": 5515 + }, + { + "epoch": 0.26, + "learning_rate": 1.929239885253868e-05, + "loss": 0.244, + "step": 5520 + }, + { + "epoch": 0.26, + "learning_rate": 1.9291615067483895e-05, + "loss": 0.1823, + "step": 5525 + }, + { + "epoch": 0.26, + "learning_rate": 1.929083128242911e-05, + "loss": 0.3409, + "step": 5530 + }, + { + "epoch": 0.26, + "learning_rate": 1.9290047497374323e-05, + "loss": 0.3905, + "step": 5535 + }, + { + "epoch": 0.26, + "learning_rate": 1.9289263712319534e-05, + "loss": 0.4031, + "step": 5540 + }, + { + "epoch": 0.26, + "learning_rate": 1.928847992726475e-05, + "loss": 0.5521, + "step": 5545 + }, + { + "epoch": 0.26, + "learning_rate": 1.928769614220996e-05, + "loss": 0.6648, + "step": 5550 + }, + { + "epoch": 0.26, + "learning_rate": 1.9286912357155175e-05, + "loss": 0.2675, + "step": 5555 + }, + { + "epoch": 0.26, + "learning_rate": 1.928612857210039e-05, + "loss": 0.1622, + "step": 5560 + }, + { + "epoch": 0.26, + "learning_rate": 1.92853447870456e-05, + "loss": 0.2045, + "step": 5565 + }, + { + "epoch": 0.26, + "learning_rate": 1.9284561001990817e-05, + "loss": 0.2512, + "step": 5570 + }, + { + "epoch": 0.26, + "learning_rate": 1.9283777216936028e-05, + "loss": 0.2425, + "step": 5575 + }, + { + "epoch": 0.26, + "learning_rate": 1.928299343188124e-05, + "loss": 0.3474, + "step": 5580 + }, + { + "epoch": 0.26, + "learning_rate": 1.9282209646826455e-05, + "loss": 0.4243, + "step": 5585 + }, + { + "epoch": 0.26, + "learning_rate": 1.928142586177167e-05, + "loss": 0.4205, + "step": 5590 + }, + { + "epoch": 0.26, + "learning_rate": 1.9280642076716883e-05, + "loss": 0.4256, + "step": 5595 + }, + { + "epoch": 0.26, + "learning_rate": 1.9279858291662097e-05, + "loss": 0.74, + "step": 5600 + }, + { + "epoch": 0.26, + "learning_rate": 1.9279074506607308e-05, + "loss": 0.3487, + "step": 5605 + }, + { + "epoch": 0.26, + "learning_rate": 1.9278290721552525e-05, + "loss": 0.1986, + "step": 5610 + }, + { + "epoch": 0.26, + "learning_rate": 1.9277506936497735e-05, + "loss": 0.279, + "step": 5615 + }, + { + "epoch": 0.26, + "learning_rate": 1.927672315144295e-05, + "loss": 0.2964, + "step": 5620 + }, + { + "epoch": 0.26, + "learning_rate": 1.9275939366388163e-05, + "loss": 0.255, + "step": 5625 + }, + { + "epoch": 0.26, + "learning_rate": 1.9275155581333377e-05, + "loss": 0.3853, + "step": 5630 + }, + { + "epoch": 0.26, + "learning_rate": 1.927437179627859e-05, + "loss": 0.3548, + "step": 5635 + }, + { + "epoch": 0.26, + "learning_rate": 1.92735880112238e-05, + "loss": 0.3402, + "step": 5640 + }, + { + "epoch": 0.26, + "learning_rate": 1.927280422616902e-05, + "loss": 0.375, + "step": 5645 + }, + { + "epoch": 0.26, + "learning_rate": 1.927202044111423e-05, + "loss": 0.8663, + "step": 5650 + }, + { + "epoch": 0.26, + "learning_rate": 1.9271236656059443e-05, + "loss": 0.3404, + "step": 5655 + }, + { + "epoch": 0.26, + "learning_rate": 1.9270452871004657e-05, + "loss": 0.1193, + "step": 5660 + }, + { + "epoch": 0.26, + "learning_rate": 1.926966908594987e-05, + "loss": 0.2268, + "step": 5665 + }, + { + "epoch": 0.26, + "learning_rate": 1.9268885300895085e-05, + "loss": 0.3271, + "step": 5670 + }, + { + "epoch": 0.26, + "learning_rate": 1.92681015158403e-05, + "loss": 0.2239, + "step": 5675 + }, + { + "epoch": 0.27, + "learning_rate": 1.926731773078551e-05, + "loss": 0.363, + "step": 5680 + }, + { + "epoch": 0.27, + "learning_rate": 1.9266690702741683e-05, + "loss": 0.3885, + "step": 5685 + }, + { + "epoch": 0.27, + "learning_rate": 1.9265906917686894e-05, + "loss": 0.5457, + "step": 5690 + }, + { + "epoch": 0.27, + "learning_rate": 1.9265123132632108e-05, + "loss": 0.532, + "step": 5695 + }, + { + "epoch": 0.27, + "learning_rate": 1.926433934757732e-05, + "loss": 0.8407, + "step": 5700 + }, + { + "epoch": 0.27, + "learning_rate": 1.9263555562522536e-05, + "loss": 0.214, + "step": 5705 + }, + { + "epoch": 0.27, + "learning_rate": 1.926277177746775e-05, + "loss": 0.1542, + "step": 5710 + }, + { + "epoch": 0.27, + "learning_rate": 1.9261987992412963e-05, + "loss": 0.2797, + "step": 5715 + }, + { + "epoch": 0.27, + "learning_rate": 1.9261204207358174e-05, + "loss": 0.1692, + "step": 5720 + }, + { + "epoch": 0.27, + "learning_rate": 1.926042042230339e-05, + "loss": 0.3042, + "step": 5725 + }, + { + "epoch": 0.27, + "learning_rate": 1.92596366372486e-05, + "loss": 0.3473, + "step": 5730 + }, + { + "epoch": 0.27, + "learning_rate": 1.9258852852193816e-05, + "loss": 0.3281, + "step": 5735 + }, + { + "epoch": 0.27, + "learning_rate": 1.925806906713903e-05, + "loss": 0.3659, + "step": 5740 + }, + { + "epoch": 0.27, + "learning_rate": 1.9257285282084243e-05, + "loss": 0.4402, + "step": 5745 + }, + { + "epoch": 0.27, + "learning_rate": 1.9256501497029457e-05, + "loss": 0.7063, + "step": 5750 + }, + { + "epoch": 0.27, + "learning_rate": 1.9255717711974668e-05, + "loss": 0.3744, + "step": 5755 + }, + { + "epoch": 0.27, + "learning_rate": 1.9254933926919882e-05, + "loss": 0.1654, + "step": 5760 + }, + { + "epoch": 0.27, + "learning_rate": 1.9254150141865096e-05, + "loss": 0.1909, + "step": 5765 + }, + { + "epoch": 0.27, + "learning_rate": 1.925336635681031e-05, + "loss": 0.2026, + "step": 5770 + }, + { + "epoch": 0.27, + "learning_rate": 1.9252582571755523e-05, + "loss": 0.246, + "step": 5775 + }, + { + "epoch": 0.27, + "learning_rate": 1.9251798786700737e-05, + "loss": 0.2819, + "step": 5780 + }, + { + "epoch": 0.27, + "learning_rate": 1.925101500164595e-05, + "loss": 0.3336, + "step": 5785 + }, + { + "epoch": 0.27, + "learning_rate": 1.9250231216591165e-05, + "loss": 0.4281, + "step": 5790 + }, + { + "epoch": 0.27, + "learning_rate": 1.9249447431536376e-05, + "loss": 0.4699, + "step": 5795 + }, + { + "epoch": 0.27, + "learning_rate": 1.924866364648159e-05, + "loss": 0.7916, + "step": 5800 + }, + { + "epoch": 0.27, + "learning_rate": 1.9247879861426803e-05, + "loss": 0.3297, + "step": 5805 + }, + { + "epoch": 0.27, + "learning_rate": 1.9247096076372017e-05, + "loss": 0.1378, + "step": 5810 + }, + { + "epoch": 0.27, + "learning_rate": 1.924631229131723e-05, + "loss": 0.155, + "step": 5815 + }, + { + "epoch": 0.27, + "learning_rate": 1.9245528506262442e-05, + "loss": 0.2305, + "step": 5820 + }, + { + "epoch": 0.27, + "learning_rate": 1.924474472120766e-05, + "loss": 0.247, + "step": 5825 + }, + { + "epoch": 0.27, + "learning_rate": 1.924396093615287e-05, + "loss": 0.2662, + "step": 5830 + }, + { + "epoch": 0.27, + "learning_rate": 1.9243177151098084e-05, + "loss": 0.335, + "step": 5835 + }, + { + "epoch": 0.27, + "learning_rate": 1.9242393366043297e-05, + "loss": 0.3854, + "step": 5840 + }, + { + "epoch": 0.27, + "learning_rate": 1.924160958098851e-05, + "loss": 0.4929, + "step": 5845 + }, + { + "epoch": 0.27, + "learning_rate": 1.9240825795933725e-05, + "loss": 0.6641, + "step": 5850 + }, + { + "epoch": 0.27, + "learning_rate": 1.924004201087894e-05, + "loss": 0.3136, + "step": 5855 + }, + { + "epoch": 0.27, + "learning_rate": 1.923925822582415e-05, + "loss": 0.1783, + "step": 5860 + }, + { + "epoch": 0.27, + "learning_rate": 1.9238474440769367e-05, + "loss": 0.2408, + "step": 5865 + }, + { + "epoch": 0.27, + "learning_rate": 1.9237690655714577e-05, + "loss": 0.2645, + "step": 5870 + }, + { + "epoch": 0.27, + "learning_rate": 1.923690687065979e-05, + "loss": 0.2702, + "step": 5875 + }, + { + "epoch": 0.27, + "learning_rate": 1.9236123085605005e-05, + "loss": 0.2408, + "step": 5880 + }, + { + "epoch": 0.27, + "learning_rate": 1.923533930055022e-05, + "loss": 0.3547, + "step": 5885 + }, + { + "epoch": 0.27, + "learning_rate": 1.9234555515495433e-05, + "loss": 0.3429, + "step": 5890 + }, + { + "epoch": 0.28, + "learning_rate": 1.9233771730440644e-05, + "loss": 0.5108, + "step": 5895 + }, + { + "epoch": 0.28, + "learning_rate": 1.923298794538586e-05, + "loss": 0.9399, + "step": 5900 + }, + { + "epoch": 0.28, + "learning_rate": 1.923220416033107e-05, + "loss": 0.3939, + "step": 5905 + }, + { + "epoch": 0.28, + "learning_rate": 1.9231420375276285e-05, + "loss": 0.1552, + "step": 5910 + }, + { + "epoch": 0.28, + "learning_rate": 1.92306365902215e-05, + "loss": 0.2075, + "step": 5915 + }, + { + "epoch": 0.28, + "learning_rate": 1.9229852805166713e-05, + "loss": 0.2066, + "step": 5920 + }, + { + "epoch": 0.28, + "learning_rate": 1.9229069020111927e-05, + "loss": 0.3028, + "step": 5925 + }, + { + "epoch": 0.28, + "learning_rate": 1.922828523505714e-05, + "loss": 0.24, + "step": 5930 + }, + { + "epoch": 0.28, + "learning_rate": 1.922750145000235e-05, + "loss": 0.2975, + "step": 5935 + }, + { + "epoch": 0.28, + "learning_rate": 1.922671766494757e-05, + "loss": 0.3763, + "step": 5940 + }, + { + "epoch": 0.28, + "learning_rate": 1.922593387989278e-05, + "loss": 0.4842, + "step": 5945 + }, + { + "epoch": 0.28, + "learning_rate": 1.9225150094837993e-05, + "loss": 0.4962, + "step": 5950 + }, + { + "epoch": 0.28, + "learning_rate": 1.9224366309783207e-05, + "loss": 0.3506, + "step": 5955 + }, + { + "epoch": 0.28, + "learning_rate": 1.9223582524728418e-05, + "loss": 0.1437, + "step": 5960 + }, + { + "epoch": 0.28, + "learning_rate": 1.9222798739673635e-05, + "loss": 0.1921, + "step": 5965 + }, + { + "epoch": 0.28, + "learning_rate": 1.9222014954618845e-05, + "loss": 0.211, + "step": 5970 + }, + { + "epoch": 0.28, + "learning_rate": 1.922123116956406e-05, + "loss": 0.1829, + "step": 5975 + }, + { + "epoch": 0.28, + "learning_rate": 1.9220447384509273e-05, + "loss": 0.3135, + "step": 5980 + }, + { + "epoch": 0.28, + "learning_rate": 1.9219663599454487e-05, + "loss": 0.3635, + "step": 5985 + }, + { + "epoch": 0.28, + "learning_rate": 1.92188798143997e-05, + "loss": 0.3972, + "step": 5990 + }, + { + "epoch": 0.28, + "learning_rate": 1.9218096029344915e-05, + "loss": 0.4304, + "step": 5995 + }, + { + "epoch": 0.28, + "learning_rate": 1.921731224429013e-05, + "loss": 0.5761, + "step": 6000 + }, + { + "epoch": 0.28, + "learning_rate": 1.9216528459235343e-05, + "loss": 0.3202, + "step": 6005 + }, + { + "epoch": 0.28, + "learning_rate": 1.9215744674180553e-05, + "loss": 0.132, + "step": 6010 + }, + { + "epoch": 0.28, + "learning_rate": 1.9214960889125767e-05, + "loss": 0.248, + "step": 6015 + }, + { + "epoch": 0.28, + "learning_rate": 1.921417710407098e-05, + "loss": 0.2491, + "step": 6020 + }, + { + "epoch": 0.28, + "learning_rate": 1.9213393319016195e-05, + "loss": 0.1984, + "step": 6025 + }, + { + "epoch": 0.28, + "learning_rate": 1.921260953396141e-05, + "loss": 0.4062, + "step": 6030 + }, + { + "epoch": 0.28, + "learning_rate": 1.921182574890662e-05, + "loss": 0.381, + "step": 6035 + }, + { + "epoch": 0.28, + "learning_rate": 1.9211041963851837e-05, + "loss": 0.3894, + "step": 6040 + }, + { + "epoch": 0.28, + "learning_rate": 1.9210258178797047e-05, + "loss": 0.5295, + "step": 6045 + }, + { + "epoch": 0.28, + "learning_rate": 1.920947439374226e-05, + "loss": 0.5582, + "step": 6050 + }, + { + "epoch": 0.28, + "learning_rate": 1.9208690608687475e-05, + "loss": 0.3742, + "step": 6055 + }, + { + "epoch": 0.28, + "learning_rate": 1.920790682363269e-05, + "loss": 0.1369, + "step": 6060 + }, + { + "epoch": 0.28, + "learning_rate": 1.9207123038577903e-05, + "loss": 0.191, + "step": 6065 + }, + { + "epoch": 0.28, + "learning_rate": 1.9206339253523117e-05, + "loss": 0.2741, + "step": 6070 + }, + { + "epoch": 0.28, + "learning_rate": 1.9205555468468327e-05, + "loss": 0.2237, + "step": 6075 + }, + { + "epoch": 0.28, + "learning_rate": 1.920477168341354e-05, + "loss": 0.2747, + "step": 6080 + }, + { + "epoch": 0.28, + "learning_rate": 1.9203987898358755e-05, + "loss": 0.3203, + "step": 6085 + }, + { + "epoch": 0.28, + "learning_rate": 1.920320411330397e-05, + "loss": 0.3738, + "step": 6090 + }, + { + "epoch": 0.28, + "learning_rate": 1.9202420328249183e-05, + "loss": 0.4449, + "step": 6095 + }, + { + "epoch": 0.28, + "learning_rate": 1.9201636543194397e-05, + "loss": 0.5899, + "step": 6100 + }, + { + "epoch": 0.28, + "learning_rate": 1.920085275813961e-05, + "loss": 0.2703, + "step": 6105 + }, + { + "epoch": 0.29, + "learning_rate": 1.920006897308482e-05, + "loss": 0.2, + "step": 6110 + }, + { + "epoch": 0.29, + "learning_rate": 1.9199285188030035e-05, + "loss": 0.1694, + "step": 6115 + }, + { + "epoch": 0.29, + "learning_rate": 1.919850140297525e-05, + "loss": 0.211, + "step": 6120 + }, + { + "epoch": 0.29, + "learning_rate": 1.9197717617920463e-05, + "loss": 0.2194, + "step": 6125 + }, + { + "epoch": 0.29, + "learning_rate": 1.9196933832865677e-05, + "loss": 0.2725, + "step": 6130 + }, + { + "epoch": 0.29, + "learning_rate": 1.919615004781089e-05, + "loss": 0.3563, + "step": 6135 + }, + { + "epoch": 0.29, + "learning_rate": 1.9195366262756105e-05, + "loss": 0.3059, + "step": 6140 + }, + { + "epoch": 0.29, + "learning_rate": 1.9194582477701315e-05, + "loss": 0.4064, + "step": 6145 + }, + { + "epoch": 0.29, + "learning_rate": 1.919379869264653e-05, + "loss": 0.6607, + "step": 6150 + }, + { + "epoch": 0.29, + "learning_rate": 1.9193014907591743e-05, + "loss": 0.3117, + "step": 6155 + }, + { + "epoch": 0.29, + "learning_rate": 1.9192231122536957e-05, + "loss": 0.1045, + "step": 6160 + }, + { + "epoch": 0.29, + "learning_rate": 1.919144733748217e-05, + "loss": 0.1874, + "step": 6165 + }, + { + "epoch": 0.29, + "learning_rate": 1.9190663552427385e-05, + "loss": 0.3021, + "step": 6170 + }, + { + "epoch": 0.29, + "learning_rate": 1.9189879767372595e-05, + "loss": 0.211, + "step": 6175 + }, + { + "epoch": 0.29, + "learning_rate": 1.9189095982317813e-05, + "loss": 0.265, + "step": 6180 + }, + { + "epoch": 0.29, + "learning_rate": 1.9188312197263023e-05, + "loss": 0.2312, + "step": 6185 + }, + { + "epoch": 0.29, + "learning_rate": 1.9187528412208237e-05, + "loss": 0.3051, + "step": 6190 + }, + { + "epoch": 0.29, + "learning_rate": 1.918674462715345e-05, + "loss": 0.5762, + "step": 6195 + }, + { + "epoch": 0.29, + "learning_rate": 1.9185960842098665e-05, + "loss": 0.6396, + "step": 6200 + }, + { + "epoch": 0.29, + "learning_rate": 1.918517705704388e-05, + "loss": 0.3087, + "step": 6205 + }, + { + "epoch": 0.29, + "learning_rate": 1.918439327198909e-05, + "loss": 0.1117, + "step": 6210 + }, + { + "epoch": 0.29, + "learning_rate": 1.9183609486934306e-05, + "loss": 0.1206, + "step": 6215 + }, + { + "epoch": 0.29, + "learning_rate": 1.9182825701879517e-05, + "loss": 0.177, + "step": 6220 + }, + { + "epoch": 0.29, + "learning_rate": 1.918204191682473e-05, + "loss": 0.2447, + "step": 6225 + }, + { + "epoch": 0.29, + "learning_rate": 1.9181258131769945e-05, + "loss": 0.2349, + "step": 6230 + }, + { + "epoch": 0.29, + "learning_rate": 1.918047434671516e-05, + "loss": 0.2984, + "step": 6235 + }, + { + "epoch": 0.29, + "learning_rate": 1.9179690561660373e-05, + "loss": 0.2954, + "step": 6240 + }, + { + "epoch": 0.29, + "learning_rate": 1.9178906776605587e-05, + "loss": 0.3903, + "step": 6245 + }, + { + "epoch": 0.29, + "learning_rate": 1.9178122991550797e-05, + "loss": 0.6694, + "step": 6250 + }, + { + "epoch": 0.29, + "learning_rate": 1.9177339206496014e-05, + "loss": 0.2947, + "step": 6255 + }, + { + "epoch": 0.29, + "learning_rate": 1.9176555421441225e-05, + "loss": 0.1219, + "step": 6260 + }, + { + "epoch": 0.29, + "learning_rate": 1.917577163638644e-05, + "loss": 0.2013, + "step": 6265 + }, + { + "epoch": 0.29, + "learning_rate": 1.9174987851331653e-05, + "loss": 0.1924, + "step": 6270 + }, + { + "epoch": 0.29, + "learning_rate": 1.9174204066276863e-05, + "loss": 0.29, + "step": 6275 + }, + { + "epoch": 0.29, + "learning_rate": 1.917342028122208e-05, + "loss": 0.3052, + "step": 6280 + }, + { + "epoch": 0.29, + "learning_rate": 1.917263649616729e-05, + "loss": 0.3302, + "step": 6285 + }, + { + "epoch": 0.29, + "learning_rate": 1.9171852711112505e-05, + "loss": 0.4615, + "step": 6290 + }, + { + "epoch": 0.29, + "learning_rate": 1.917106892605772e-05, + "loss": 0.4105, + "step": 6295 + }, + { + "epoch": 0.29, + "learning_rate": 1.9170285141002933e-05, + "loss": 0.6715, + "step": 6300 + }, + { + "epoch": 0.29, + "learning_rate": 1.9169501355948147e-05, + "loss": 0.3003, + "step": 6305 + }, + { + "epoch": 0.29, + "learning_rate": 1.916871757089336e-05, + "loss": 0.2266, + "step": 6310 + }, + { + "epoch": 0.29, + "learning_rate": 1.9167933785838574e-05, + "loss": 0.1107, + "step": 6315 + }, + { + "epoch": 0.29, + "learning_rate": 1.916715000078379e-05, + "loss": 0.2179, + "step": 6320 + }, + { + "epoch": 0.3, + "learning_rate": 1.9166366215729e-05, + "loss": 0.1912, + "step": 6325 + }, + { + "epoch": 0.3, + "learning_rate": 1.9165582430674213e-05, + "loss": 0.241, + "step": 6330 + }, + { + "epoch": 0.3, + "learning_rate": 1.9164798645619427e-05, + "loss": 0.336, + "step": 6335 + }, + { + "epoch": 0.3, + "learning_rate": 1.916401486056464e-05, + "loss": 0.3382, + "step": 6340 + }, + { + "epoch": 0.3, + "learning_rate": 1.9163231075509854e-05, + "loss": 0.5897, + "step": 6345 + }, + { + "epoch": 0.3, + "learning_rate": 1.9162447290455065e-05, + "loss": 0.7494, + "step": 6350 + }, + { + "epoch": 0.3, + "learning_rate": 1.9161663505400282e-05, + "loss": 0.3499, + "step": 6355 + }, + { + "epoch": 0.3, + "learning_rate": 1.9160879720345493e-05, + "loss": 0.1544, + "step": 6360 + }, + { + "epoch": 0.3, + "learning_rate": 1.9160095935290707e-05, + "loss": 0.1487, + "step": 6365 + }, + { + "epoch": 0.3, + "learning_rate": 1.915931215023592e-05, + "loss": 0.2265, + "step": 6370 + }, + { + "epoch": 0.3, + "learning_rate": 1.9158528365181135e-05, + "loss": 0.2467, + "step": 6375 + }, + { + "epoch": 0.3, + "learning_rate": 1.915774458012635e-05, + "loss": 0.2705, + "step": 6380 + }, + { + "epoch": 0.3, + "learning_rate": 1.9156960795071562e-05, + "loss": 0.4046, + "step": 6385 + }, + { + "epoch": 0.3, + "learning_rate": 1.9156177010016773e-05, + "loss": 0.4616, + "step": 6390 + }, + { + "epoch": 0.3, + "learning_rate": 1.915539322496199e-05, + "loss": 0.5142, + "step": 6395 + }, + { + "epoch": 0.3, + "learning_rate": 1.91546094399072e-05, + "loss": 0.6709, + "step": 6400 + }, + { + "epoch": 0.3, + "learning_rate": 1.9153825654852415e-05, + "loss": 0.3144, + "step": 6405 + }, + { + "epoch": 0.3, + "learning_rate": 1.915304186979763e-05, + "loss": 0.1743, + "step": 6410 + }, + { + "epoch": 0.3, + "learning_rate": 1.9152258084742842e-05, + "loss": 0.2046, + "step": 6415 + }, + { + "epoch": 0.3, + "learning_rate": 1.9151474299688056e-05, + "loss": 0.1748, + "step": 6420 + }, + { + "epoch": 0.3, + "learning_rate": 1.9150690514633267e-05, + "loss": 0.347, + "step": 6425 + }, + { + "epoch": 0.3, + "learning_rate": 1.9149906729578484e-05, + "loss": 0.4057, + "step": 6430 + }, + { + "epoch": 0.3, + "learning_rate": 1.9149122944523695e-05, + "loss": 0.3375, + "step": 6435 + }, + { + "epoch": 0.3, + "learning_rate": 1.914833915946891e-05, + "loss": 0.3648, + "step": 6440 + }, + { + "epoch": 0.3, + "learning_rate": 1.9147555374414122e-05, + "loss": 0.3829, + "step": 6445 + }, + { + "epoch": 0.3, + "learning_rate": 1.9146771589359336e-05, + "loss": 0.5695, + "step": 6450 + }, + { + "epoch": 0.3, + "learning_rate": 1.914598780430455e-05, + "loss": 0.238, + "step": 6455 + }, + { + "epoch": 0.3, + "learning_rate": 1.9145204019249764e-05, + "loss": 0.1316, + "step": 6460 + }, + { + "epoch": 0.3, + "learning_rate": 1.9144420234194975e-05, + "loss": 0.2054, + "step": 6465 + }, + { + "epoch": 0.3, + "learning_rate": 1.914363644914019e-05, + "loss": 0.2046, + "step": 6470 + }, + { + "epoch": 0.3, + "learning_rate": 1.9142852664085402e-05, + "loss": 0.2385, + "step": 6475 + }, + { + "epoch": 0.3, + "learning_rate": 1.9142068879030616e-05, + "loss": 0.2931, + "step": 6480 + }, + { + "epoch": 0.3, + "learning_rate": 1.914128509397583e-05, + "loss": 0.3218, + "step": 6485 + }, + { + "epoch": 0.3, + "learning_rate": 1.914050130892104e-05, + "loss": 0.3059, + "step": 6490 + }, + { + "epoch": 0.3, + "learning_rate": 1.9139717523866258e-05, + "loss": 0.4008, + "step": 6495 + }, + { + "epoch": 0.3, + "learning_rate": 1.913893373881147e-05, + "loss": 0.6874, + "step": 6500 + }, + { + "epoch": 0.3, + "learning_rate": 1.9138149953756683e-05, + "loss": 0.3523, + "step": 6505 + }, + { + "epoch": 0.3, + "learning_rate": 1.9137366168701896e-05, + "loss": 0.1623, + "step": 6510 + }, + { + "epoch": 0.3, + "learning_rate": 1.913658238364711e-05, + "loss": 0.1548, + "step": 6515 + }, + { + "epoch": 0.3, + "learning_rate": 1.9135798598592324e-05, + "loss": 0.2571, + "step": 6520 + }, + { + "epoch": 0.3, + "learning_rate": 1.9135014813537538e-05, + "loss": 0.2677, + "step": 6525 + }, + { + "epoch": 0.3, + "learning_rate": 1.9134231028482752e-05, + "loss": 0.2672, + "step": 6530 + }, + { + "epoch": 0.3, + "learning_rate": 1.9133447243427963e-05, + "loss": 0.1512, + "step": 6535 + }, + { + "epoch": 0.31, + "learning_rate": 1.9132663458373176e-05, + "loss": 0.434, + "step": 6540 + }, + { + "epoch": 0.31, + "learning_rate": 1.913187967331839e-05, + "loss": 0.4751, + "step": 6545 + }, + { + "epoch": 0.31, + "learning_rate": 1.9131095888263604e-05, + "loss": 0.9035, + "step": 6550 + }, + { + "epoch": 0.31, + "learning_rate": 1.9130312103208818e-05, + "loss": 0.2479, + "step": 6555 + }, + { + "epoch": 0.31, + "learning_rate": 1.9129528318154032e-05, + "loss": 0.1588, + "step": 6560 + }, + { + "epoch": 0.31, + "learning_rate": 1.9128744533099243e-05, + "loss": 0.1603, + "step": 6565 + }, + { + "epoch": 0.31, + "learning_rate": 1.912796074804446e-05, + "loss": 0.2272, + "step": 6570 + }, + { + "epoch": 0.31, + "learning_rate": 1.912717696298967e-05, + "loss": 0.2211, + "step": 6575 + }, + { + "epoch": 0.31, + "learning_rate": 1.9126393177934884e-05, + "loss": 0.2354, + "step": 6580 + }, + { + "epoch": 0.31, + "learning_rate": 1.9125609392880098e-05, + "loss": 0.3462, + "step": 6585 + }, + { + "epoch": 0.31, + "learning_rate": 1.9124825607825312e-05, + "loss": 0.3478, + "step": 6590 + }, + { + "epoch": 0.31, + "learning_rate": 1.9124041822770526e-05, + "loss": 0.434, + "step": 6595 + }, + { + "epoch": 0.31, + "learning_rate": 1.9123258037715737e-05, + "loss": 0.6833, + "step": 6600 + }, + { + "epoch": 0.31, + "learning_rate": 1.912247425266095e-05, + "loss": 0.3255, + "step": 6605 + }, + { + "epoch": 0.31, + "learning_rate": 1.9121690467606164e-05, + "loss": 0.2105, + "step": 6610 + }, + { + "epoch": 0.31, + "learning_rate": 1.9120906682551378e-05, + "loss": 0.248, + "step": 6615 + }, + { + "epoch": 0.31, + "learning_rate": 1.9120122897496592e-05, + "loss": 0.2495, + "step": 6620 + }, + { + "epoch": 0.31, + "learning_rate": 1.9119339112441806e-05, + "loss": 0.2071, + "step": 6625 + }, + { + "epoch": 0.31, + "learning_rate": 1.911855532738702e-05, + "loss": 0.204, + "step": 6630 + }, + { + "epoch": 0.31, + "learning_rate": 1.9117771542332234e-05, + "loss": 0.4248, + "step": 6635 + }, + { + "epoch": 0.31, + "learning_rate": 1.9116987757277444e-05, + "loss": 0.3261, + "step": 6640 + }, + { + "epoch": 0.31, + "learning_rate": 1.911620397222266e-05, + "loss": 0.425, + "step": 6645 + }, + { + "epoch": 0.31, + "learning_rate": 1.9115420187167872e-05, + "loss": 0.658, + "step": 6650 + }, + { + "epoch": 0.31, + "learning_rate": 1.9114636402113086e-05, + "loss": 0.2923, + "step": 6655 + }, + { + "epoch": 0.31, + "learning_rate": 1.91138526170583e-05, + "loss": 0.1128, + "step": 6660 + }, + { + "epoch": 0.31, + "learning_rate": 1.911306883200351e-05, + "loss": 0.1456, + "step": 6665 + }, + { + "epoch": 0.31, + "learning_rate": 1.9112285046948728e-05, + "loss": 0.2254, + "step": 6670 + }, + { + "epoch": 0.31, + "learning_rate": 1.911150126189394e-05, + "loss": 0.2541, + "step": 6675 + }, + { + "epoch": 0.31, + "learning_rate": 1.9110717476839152e-05, + "loss": 0.3436, + "step": 6680 + }, + { + "epoch": 0.31, + "learning_rate": 1.9109933691784366e-05, + "loss": 0.3193, + "step": 6685 + }, + { + "epoch": 0.31, + "learning_rate": 1.910914990672958e-05, + "loss": 0.309, + "step": 6690 + }, + { + "epoch": 0.31, + "learning_rate": 1.9108366121674794e-05, + "loss": 0.504, + "step": 6695 + }, + { + "epoch": 0.31, + "learning_rate": 1.9107582336620008e-05, + "loss": 0.6894, + "step": 6700 + }, + { + "epoch": 0.31, + "learning_rate": 1.910679855156522e-05, + "loss": 0.3584, + "step": 6705 + }, + { + "epoch": 0.31, + "learning_rate": 1.9106014766510436e-05, + "loss": 0.1103, + "step": 6710 + }, + { + "epoch": 0.31, + "learning_rate": 1.9105230981455646e-05, + "loss": 0.2487, + "step": 6715 + }, + { + "epoch": 0.31, + "learning_rate": 1.910444719640086e-05, + "loss": 0.1941, + "step": 6720 + }, + { + "epoch": 0.31, + "learning_rate": 1.9103663411346074e-05, + "loss": 0.2426, + "step": 6725 + }, + { + "epoch": 0.31, + "learning_rate": 1.9102879626291288e-05, + "loss": 0.2712, + "step": 6730 + }, + { + "epoch": 0.31, + "learning_rate": 1.9102095841236502e-05, + "loss": 0.3572, + "step": 6735 + }, + { + "epoch": 0.31, + "learning_rate": 1.9101312056181712e-05, + "loss": 0.4285, + "step": 6740 + }, + { + "epoch": 0.31, + "learning_rate": 1.910052827112693e-05, + "loss": 0.5303, + "step": 6745 + }, + { + "epoch": 0.31, + "learning_rate": 1.909974448607214e-05, + "loss": 0.4896, + "step": 6750 + }, + { + "epoch": 0.32, + "learning_rate": 1.9098960701017354e-05, + "loss": 0.2817, + "step": 6755 + }, + { + "epoch": 0.32, + "learning_rate": 1.9098176915962568e-05, + "loss": 0.1629, + "step": 6760 + }, + { + "epoch": 0.32, + "learning_rate": 1.9097393130907782e-05, + "loss": 0.2024, + "step": 6765 + }, + { + "epoch": 0.32, + "learning_rate": 1.9096609345852996e-05, + "loss": 0.2163, + "step": 6770 + }, + { + "epoch": 0.32, + "learning_rate": 1.909582556079821e-05, + "loss": 0.2022, + "step": 6775 + }, + { + "epoch": 0.32, + "learning_rate": 1.909504177574342e-05, + "loss": 0.3167, + "step": 6780 + }, + { + "epoch": 0.32, + "learning_rate": 1.9094257990688638e-05, + "loss": 0.4345, + "step": 6785 + }, + { + "epoch": 0.32, + "learning_rate": 1.9093474205633848e-05, + "loss": 0.3893, + "step": 6790 + }, + { + "epoch": 0.32, + "learning_rate": 1.9092690420579062e-05, + "loss": 0.3411, + "step": 6795 + }, + { + "epoch": 0.32, + "learning_rate": 1.9091906635524276e-05, + "loss": 0.6765, + "step": 6800 + }, + { + "epoch": 0.32, + "learning_rate": 1.9091122850469486e-05, + "loss": 0.3742, + "step": 6805 + }, + { + "epoch": 0.32, + "learning_rate": 1.9090339065414704e-05, + "loss": 0.1481, + "step": 6810 + }, + { + "epoch": 0.32, + "learning_rate": 1.9089555280359914e-05, + "loss": 0.1375, + "step": 6815 + }, + { + "epoch": 0.32, + "learning_rate": 1.9088771495305128e-05, + "loss": 0.2551, + "step": 6820 + }, + { + "epoch": 0.32, + "learning_rate": 1.9087987710250342e-05, + "loss": 0.2335, + "step": 6825 + }, + { + "epoch": 0.32, + "learning_rate": 1.9087203925195556e-05, + "loss": 0.2022, + "step": 6830 + }, + { + "epoch": 0.32, + "learning_rate": 1.908642014014077e-05, + "loss": 0.272, + "step": 6835 + }, + { + "epoch": 0.32, + "learning_rate": 1.9085636355085984e-05, + "loss": 0.3548, + "step": 6840 + }, + { + "epoch": 0.32, + "learning_rate": 1.9084852570031198e-05, + "loss": 0.4034, + "step": 6845 + }, + { + "epoch": 0.32, + "learning_rate": 1.908406878497641e-05, + "loss": 0.7186, + "step": 6850 + }, + { + "epoch": 0.32, + "learning_rate": 1.9083284999921622e-05, + "loss": 0.3299, + "step": 6855 + }, + { + "epoch": 0.32, + "learning_rate": 1.9082501214866836e-05, + "loss": 0.1502, + "step": 6860 + }, + { + "epoch": 0.32, + "learning_rate": 1.908171742981205e-05, + "loss": 0.1961, + "step": 6865 + }, + { + "epoch": 0.32, + "learning_rate": 1.9080933644757264e-05, + "loss": 0.1746, + "step": 6870 + }, + { + "epoch": 0.32, + "learning_rate": 1.9080149859702478e-05, + "loss": 0.2177, + "step": 6875 + }, + { + "epoch": 0.32, + "learning_rate": 1.9079366074647688e-05, + "loss": 0.2479, + "step": 6880 + }, + { + "epoch": 0.32, + "learning_rate": 1.9078582289592905e-05, + "loss": 0.3497, + "step": 6885 + }, + { + "epoch": 0.32, + "learning_rate": 1.9077798504538116e-05, + "loss": 0.2857, + "step": 6890 + }, + { + "epoch": 0.32, + "learning_rate": 1.907701471948333e-05, + "loss": 0.4186, + "step": 6895 + }, + { + "epoch": 0.32, + "learning_rate": 1.9076230934428544e-05, + "loss": 0.5697, + "step": 6900 + }, + { + "epoch": 0.32, + "learning_rate": 1.9075447149373758e-05, + "loss": 0.2705, + "step": 6905 + }, + { + "epoch": 0.32, + "learning_rate": 1.907466336431897e-05, + "loss": 0.1694, + "step": 6910 + }, + { + "epoch": 0.32, + "learning_rate": 1.9073879579264186e-05, + "loss": 0.2024, + "step": 6915 + }, + { + "epoch": 0.32, + "learning_rate": 1.9073095794209396e-05, + "loss": 0.1911, + "step": 6920 + }, + { + "epoch": 0.32, + "learning_rate": 1.907231200915461e-05, + "loss": 0.324, + "step": 6925 + }, + { + "epoch": 0.32, + "learning_rate": 1.9071528224099824e-05, + "loss": 0.2166, + "step": 6930 + }, + { + "epoch": 0.32, + "learning_rate": 1.9070744439045038e-05, + "loss": 0.2728, + "step": 6935 + }, + { + "epoch": 0.32, + "learning_rate": 1.906996065399025e-05, + "loss": 0.4889, + "step": 6940 + }, + { + "epoch": 0.32, + "learning_rate": 1.9069176868935466e-05, + "loss": 0.3559, + "step": 6945 + }, + { + "epoch": 0.32, + "learning_rate": 1.906839308388068e-05, + "loss": 0.6589, + "step": 6950 + }, + { + "epoch": 0.32, + "learning_rate": 1.906760929882589e-05, + "loss": 0.3064, + "step": 6955 + }, + { + "epoch": 0.32, + "learning_rate": 1.9066825513771104e-05, + "loss": 0.1575, + "step": 6960 + }, + { + "epoch": 0.32, + "learning_rate": 1.9066041728716318e-05, + "loss": 0.2603, + "step": 6965 + }, + { + "epoch": 0.33, + "learning_rate": 1.9065257943661532e-05, + "loss": 0.2049, + "step": 6970 + }, + { + "epoch": 0.33, + "learning_rate": 1.9064474158606746e-05, + "loss": 0.1928, + "step": 6975 + }, + { + "epoch": 0.33, + "learning_rate": 1.906369037355196e-05, + "loss": 0.2743, + "step": 6980 + }, + { + "epoch": 0.33, + "learning_rate": 1.9062906588497173e-05, + "loss": 0.3078, + "step": 6985 + }, + { + "epoch": 0.33, + "learning_rate": 1.9062122803442384e-05, + "loss": 0.3558, + "step": 6990 + }, + { + "epoch": 0.33, + "learning_rate": 1.9061339018387598e-05, + "loss": 0.5479, + "step": 6995 + }, + { + "epoch": 0.33, + "learning_rate": 1.9060555233332812e-05, + "loss": 0.7941, + "step": 7000 + }, + { + "epoch": 0.33, + "learning_rate": 1.9059771448278026e-05, + "loss": 0.2665, + "step": 7005 + }, + { + "epoch": 0.33, + "learning_rate": 1.905898766322324e-05, + "loss": 0.1489, + "step": 7010 + }, + { + "epoch": 0.33, + "learning_rate": 1.9058203878168453e-05, + "loss": 0.1762, + "step": 7015 + }, + { + "epoch": 0.33, + "learning_rate": 1.9057420093113664e-05, + "loss": 0.2242, + "step": 7020 + }, + { + "epoch": 0.33, + "learning_rate": 1.905663630805888e-05, + "loss": 0.2587, + "step": 7025 + }, + { + "epoch": 0.33, + "learning_rate": 1.9055852523004092e-05, + "loss": 0.2101, + "step": 7030 + }, + { + "epoch": 0.33, + "learning_rate": 1.9055068737949306e-05, + "loss": 0.2533, + "step": 7035 + }, + { + "epoch": 0.33, + "learning_rate": 1.905428495289452e-05, + "loss": 0.4377, + "step": 7040 + }, + { + "epoch": 0.33, + "learning_rate": 1.9053501167839734e-05, + "loss": 0.441, + "step": 7045 + }, + { + "epoch": 0.33, + "learning_rate": 1.9052717382784947e-05, + "loss": 0.7094, + "step": 7050 + }, + { + "epoch": 0.33, + "learning_rate": 1.9051933597730158e-05, + "loss": 0.1968, + "step": 7055 + }, + { + "epoch": 0.33, + "learning_rate": 1.9051149812675375e-05, + "loss": 0.1635, + "step": 7060 + }, + { + "epoch": 0.33, + "learning_rate": 1.9050366027620586e-05, + "loss": 0.1534, + "step": 7065 + }, + { + "epoch": 0.33, + "learning_rate": 1.90495822425658e-05, + "loss": 0.1294, + "step": 7070 + }, + { + "epoch": 0.33, + "learning_rate": 1.9048798457511014e-05, + "loss": 0.1829, + "step": 7075 + }, + { + "epoch": 0.33, + "learning_rate": 1.9048014672456227e-05, + "loss": 0.172, + "step": 7080 + }, + { + "epoch": 0.33, + "learning_rate": 1.904723088740144e-05, + "loss": 0.2696, + "step": 7085 + }, + { + "epoch": 0.33, + "learning_rate": 1.9046447102346655e-05, + "loss": 0.3744, + "step": 7090 + }, + { + "epoch": 0.33, + "learning_rate": 1.9045663317291866e-05, + "loss": 0.4525, + "step": 7095 + }, + { + "epoch": 0.33, + "learning_rate": 1.9044879532237083e-05, + "loss": 0.5999, + "step": 7100 + }, + { + "epoch": 0.33, + "learning_rate": 1.9044095747182294e-05, + "loss": 0.2546, + "step": 7105 + }, + { + "epoch": 0.33, + "learning_rate": 1.9043311962127508e-05, + "loss": 0.1782, + "step": 7110 + }, + { + "epoch": 0.33, + "learning_rate": 1.904252817707272e-05, + "loss": 0.217, + "step": 7115 + }, + { + "epoch": 0.33, + "learning_rate": 1.9041744392017932e-05, + "loss": 0.1918, + "step": 7120 + }, + { + "epoch": 0.33, + "learning_rate": 1.904096060696315e-05, + "loss": 0.2431, + "step": 7125 + }, + { + "epoch": 0.33, + "learning_rate": 1.904017682190836e-05, + "loss": 0.3174, + "step": 7130 + }, + { + "epoch": 0.33, + "learning_rate": 1.9039393036853574e-05, + "loss": 0.3594, + "step": 7135 + }, + { + "epoch": 0.33, + "learning_rate": 1.9038609251798788e-05, + "loss": 0.3965, + "step": 7140 + }, + { + "epoch": 0.33, + "learning_rate": 1.9037825466744e-05, + "loss": 0.2913, + "step": 7145 + }, + { + "epoch": 0.33, + "learning_rate": 1.9037041681689215e-05, + "loss": 0.7954, + "step": 7150 + }, + { + "epoch": 0.33, + "learning_rate": 1.903625789663443e-05, + "loss": 0.3234, + "step": 7155 + }, + { + "epoch": 0.33, + "learning_rate": 1.9035474111579643e-05, + "loss": 0.1657, + "step": 7160 + }, + { + "epoch": 0.33, + "learning_rate": 1.9034690326524857e-05, + "loss": 0.0944, + "step": 7165 + }, + { + "epoch": 0.33, + "learning_rate": 1.9033906541470068e-05, + "loss": 0.2358, + "step": 7170 + }, + { + "epoch": 0.33, + "learning_rate": 1.903312275641528e-05, + "loss": 0.2233, + "step": 7175 + }, + { + "epoch": 0.34, + "learning_rate": 1.9032338971360495e-05, + "loss": 0.2742, + "step": 7180 + }, + { + "epoch": 0.34, + "learning_rate": 1.903155518630571e-05, + "loss": 0.3522, + "step": 7185 + }, + { + "epoch": 0.34, + "learning_rate": 1.9030771401250923e-05, + "loss": 0.2871, + "step": 7190 + }, + { + "epoch": 0.34, + "learning_rate": 1.9029987616196134e-05, + "loss": 0.5827, + "step": 7195 + }, + { + "epoch": 0.34, + "learning_rate": 1.902920383114135e-05, + "loss": 0.9008, + "step": 7200 + }, + { + "epoch": 0.34, + "learning_rate": 1.902842004608656e-05, + "loss": 0.3112, + "step": 7205 + }, + { + "epoch": 0.34, + "learning_rate": 1.9027636261031775e-05, + "loss": 0.1972, + "step": 7210 + }, + { + "epoch": 0.34, + "learning_rate": 1.902685247597699e-05, + "loss": 0.2397, + "step": 7215 + }, + { + "epoch": 0.34, + "learning_rate": 1.9026068690922203e-05, + "loss": 0.1644, + "step": 7220 + }, + { + "epoch": 0.34, + "learning_rate": 1.9025284905867417e-05, + "loss": 0.329, + "step": 7225 + }, + { + "epoch": 0.34, + "learning_rate": 1.902450112081263e-05, + "loss": 0.2891, + "step": 7230 + }, + { + "epoch": 0.34, + "learning_rate": 1.902371733575784e-05, + "loss": 0.3564, + "step": 7235 + }, + { + "epoch": 0.34, + "learning_rate": 1.902293355070306e-05, + "loss": 0.3063, + "step": 7240 + }, + { + "epoch": 0.34, + "learning_rate": 1.902214976564827e-05, + "loss": 0.481, + "step": 7245 + }, + { + "epoch": 0.34, + "learning_rate": 1.9021365980593483e-05, + "loss": 0.7006, + "step": 7250 + }, + { + "epoch": 0.34, + "learning_rate": 1.9020582195538697e-05, + "loss": 0.3454, + "step": 7255 + }, + { + "epoch": 0.34, + "learning_rate": 1.901979841048391e-05, + "loss": 0.0799, + "step": 7260 + }, + { + "epoch": 0.34, + "learning_rate": 1.9019014625429125e-05, + "loss": 0.1361, + "step": 7265 + }, + { + "epoch": 0.34, + "learning_rate": 1.9018230840374336e-05, + "loss": 0.2282, + "step": 7270 + }, + { + "epoch": 0.34, + "learning_rate": 1.901744705531955e-05, + "loss": 0.2861, + "step": 7275 + }, + { + "epoch": 0.34, + "learning_rate": 1.9016663270264763e-05, + "loss": 0.3123, + "step": 7280 + }, + { + "epoch": 0.34, + "learning_rate": 1.9015879485209977e-05, + "loss": 0.2939, + "step": 7285 + }, + { + "epoch": 0.34, + "learning_rate": 1.901509570015519e-05, + "loss": 0.368, + "step": 7290 + }, + { + "epoch": 0.34, + "learning_rate": 1.9014311915100405e-05, + "loss": 0.2656, + "step": 7295 + }, + { + "epoch": 0.34, + "learning_rate": 1.901352813004562e-05, + "loss": 0.7157, + "step": 7300 + }, + { + "epoch": 0.34, + "learning_rate": 1.9012744344990833e-05, + "loss": 0.297, + "step": 7305 + }, + { + "epoch": 0.34, + "learning_rate": 1.9011960559936043e-05, + "loss": 0.1168, + "step": 7310 + }, + { + "epoch": 0.34, + "learning_rate": 1.9011176774881257e-05, + "loss": 0.1664, + "step": 7315 + }, + { + "epoch": 0.34, + "learning_rate": 1.901039298982647e-05, + "loss": 0.1445, + "step": 7320 + }, + { + "epoch": 0.34, + "learning_rate": 1.9009609204771685e-05, + "loss": 0.2953, + "step": 7325 + }, + { + "epoch": 0.34, + "learning_rate": 1.90088254197169e-05, + "loss": 0.1705, + "step": 7330 + }, + { + "epoch": 0.34, + "learning_rate": 1.900804163466211e-05, + "loss": 0.3413, + "step": 7335 + }, + { + "epoch": 0.34, + "learning_rate": 1.9007257849607327e-05, + "loss": 0.3309, + "step": 7340 + }, + { + "epoch": 0.34, + "learning_rate": 1.9006474064552537e-05, + "loss": 0.4903, + "step": 7345 + }, + { + "epoch": 0.34, + "learning_rate": 1.900569027949775e-05, + "loss": 0.6247, + "step": 7350 + }, + { + "epoch": 0.34, + "learning_rate": 1.9004906494442965e-05, + "loss": 0.299, + "step": 7355 + }, + { + "epoch": 0.34, + "learning_rate": 1.900412270938818e-05, + "loss": 0.1186, + "step": 7360 + }, + { + "epoch": 0.34, + "learning_rate": 1.9003338924333393e-05, + "loss": 0.151, + "step": 7365 + }, + { + "epoch": 0.34, + "learning_rate": 1.9002555139278607e-05, + "loss": 0.1634, + "step": 7370 + }, + { + "epoch": 0.34, + "learning_rate": 1.900177135422382e-05, + "loss": 0.2192, + "step": 7375 + }, + { + "epoch": 0.34, + "learning_rate": 1.900098756916903e-05, + "loss": 0.3089, + "step": 7380 + }, + { + "epoch": 0.34, + "learning_rate": 1.9000203784114245e-05, + "loss": 0.2922, + "step": 7385 + }, + { + "epoch": 0.34, + "learning_rate": 1.899941999905946e-05, + "loss": 0.2575, + "step": 7390 + }, + { + "epoch": 0.35, + "learning_rate": 1.8998636214004673e-05, + "loss": 0.5155, + "step": 7395 + }, + { + "epoch": 0.35, + "learning_rate": 1.8997852428949887e-05, + "loss": 0.8029, + "step": 7400 + }, + { + "epoch": 0.35, + "learning_rate": 1.89970686438951e-05, + "loss": 0.3019, + "step": 7405 + }, + { + "epoch": 0.35, + "learning_rate": 1.899628485884031e-05, + "loss": 0.1694, + "step": 7410 + }, + { + "epoch": 0.35, + "learning_rate": 1.899550107378553e-05, + "loss": 0.1644, + "step": 7415 + }, + { + "epoch": 0.35, + "learning_rate": 1.899471728873074e-05, + "loss": 0.2562, + "step": 7420 + }, + { + "epoch": 0.35, + "learning_rate": 1.8993933503675953e-05, + "loss": 0.2024, + "step": 7425 + }, + { + "epoch": 0.35, + "learning_rate": 1.8993149718621167e-05, + "loss": 0.2765, + "step": 7430 + }, + { + "epoch": 0.35, + "learning_rate": 1.899236593356638e-05, + "loss": 0.3152, + "step": 7435 + }, + { + "epoch": 0.35, + "learning_rate": 1.8991582148511595e-05, + "loss": 0.7121, + "step": 7440 + }, + { + "epoch": 0.35, + "learning_rate": 1.8990798363456805e-05, + "loss": 0.4815, + "step": 7445 + }, + { + "epoch": 0.35, + "learning_rate": 1.899001457840202e-05, + "loss": 0.596, + "step": 7450 + }, + { + "epoch": 0.35, + "learning_rate": 1.8989230793347233e-05, + "loss": 0.2899, + "step": 7455 + }, + { + "epoch": 0.35, + "learning_rate": 1.8988447008292447e-05, + "loss": 0.1684, + "step": 7460 + }, + { + "epoch": 0.35, + "learning_rate": 1.898766322323766e-05, + "loss": 0.1769, + "step": 7465 + }, + { + "epoch": 0.35, + "learning_rate": 1.8986879438182875e-05, + "loss": 0.1686, + "step": 7470 + }, + { + "epoch": 0.35, + "learning_rate": 1.898609565312809e-05, + "loss": 0.1521, + "step": 7475 + }, + { + "epoch": 0.35, + "learning_rate": 1.8985311868073303e-05, + "loss": 0.213, + "step": 7480 + }, + { + "epoch": 0.35, + "learning_rate": 1.8984528083018513e-05, + "loss": 0.3328, + "step": 7485 + }, + { + "epoch": 0.35, + "learning_rate": 1.8983744297963727e-05, + "loss": 0.3086, + "step": 7490 + }, + { + "epoch": 0.35, + "learning_rate": 1.898296051290894e-05, + "loss": 0.3979, + "step": 7495 + }, + { + "epoch": 0.35, + "learning_rate": 1.8982176727854155e-05, + "loss": 0.9146, + "step": 7500 + }, + { + "epoch": 0.35, + "learning_rate": 1.898139294279937e-05, + "loss": 0.2771, + "step": 7505 + }, + { + "epoch": 0.35, + "learning_rate": 1.898060915774458e-05, + "loss": 0.1257, + "step": 7510 + }, + { + "epoch": 0.35, + "learning_rate": 1.8979825372689797e-05, + "loss": 0.2241, + "step": 7515 + }, + { + "epoch": 0.35, + "learning_rate": 1.8979041587635007e-05, + "loss": 0.2393, + "step": 7520 + }, + { + "epoch": 0.35, + "learning_rate": 1.897825780258022e-05, + "loss": 0.172, + "step": 7525 + }, + { + "epoch": 0.35, + "learning_rate": 1.8977474017525435e-05, + "loss": 0.2781, + "step": 7530 + }, + { + "epoch": 0.35, + "learning_rate": 1.897669023247065e-05, + "loss": 0.3206, + "step": 7535 + }, + { + "epoch": 0.35, + "learning_rate": 1.8975906447415863e-05, + "loss": 0.3699, + "step": 7540 + }, + { + "epoch": 0.35, + "learning_rate": 1.8975122662361077e-05, + "loss": 0.3469, + "step": 7545 + }, + { + "epoch": 0.35, + "learning_rate": 1.8974338877306287e-05, + "loss": 0.8029, + "step": 7550 + }, + { + "epoch": 0.35, + "learning_rate": 1.8973555092251504e-05, + "loss": 0.312, + "step": 7555 + }, + { + "epoch": 0.35, + "learning_rate": 1.8972771307196715e-05, + "loss": 0.1367, + "step": 7560 + }, + { + "epoch": 0.35, + "learning_rate": 1.897198752214193e-05, + "loss": 0.0884, + "step": 7565 + }, + { + "epoch": 0.35, + "learning_rate": 1.8971203737087143e-05, + "loss": 0.2232, + "step": 7570 + }, + { + "epoch": 0.35, + "learning_rate": 1.8970419952032357e-05, + "loss": 0.1861, + "step": 7575 + }, + { + "epoch": 0.35, + "learning_rate": 1.896963616697757e-05, + "loss": 0.2897, + "step": 7580 + }, + { + "epoch": 0.35, + "learning_rate": 1.896885238192278e-05, + "loss": 0.3206, + "step": 7585 + }, + { + "epoch": 0.35, + "learning_rate": 1.8968068596868e-05, + "loss": 0.3773, + "step": 7590 + }, + { + "epoch": 0.35, + "learning_rate": 1.896728481181321e-05, + "loss": 0.3115, + "step": 7595 + }, + { + "epoch": 0.35, + "learning_rate": 1.8966501026758423e-05, + "loss": 0.9366, + "step": 7600 + }, + { + "epoch": 0.35, + "learning_rate": 1.8965717241703637e-05, + "loss": 0.2707, + "step": 7605 + }, + { + "epoch": 0.36, + "learning_rate": 1.896493345664885e-05, + "loss": 0.1204, + "step": 7610 + }, + { + "epoch": 0.36, + "learning_rate": 1.8964149671594065e-05, + "loss": 0.1409, + "step": 7615 + }, + { + "epoch": 0.36, + "learning_rate": 1.896336588653928e-05, + "loss": 0.1973, + "step": 7620 + }, + { + "epoch": 0.36, + "learning_rate": 1.896258210148449e-05, + "loss": 0.1588, + "step": 7625 + }, + { + "epoch": 0.36, + "learning_rate": 1.8961798316429706e-05, + "loss": 0.3012, + "step": 7630 + }, + { + "epoch": 0.36, + "learning_rate": 1.8961014531374917e-05, + "loss": 0.3534, + "step": 7635 + }, + { + "epoch": 0.36, + "learning_rate": 1.896023074632013e-05, + "loss": 0.4233, + "step": 7640 + }, + { + "epoch": 0.36, + "learning_rate": 1.8959446961265345e-05, + "loss": 0.3975, + "step": 7645 + }, + { + "epoch": 0.36, + "learning_rate": 1.8958663176210555e-05, + "loss": 0.4849, + "step": 7650 + }, + { + "epoch": 0.36, + "learning_rate": 1.8957879391155772e-05, + "loss": 0.3376, + "step": 7655 + }, + { + "epoch": 0.36, + "learning_rate": 1.8957095606100983e-05, + "loss": 0.1438, + "step": 7660 + }, + { + "epoch": 0.36, + "learning_rate": 1.8956311821046197e-05, + "loss": 0.1841, + "step": 7665 + }, + { + "epoch": 0.36, + "learning_rate": 1.895552803599141e-05, + "loss": 0.1959, + "step": 7670 + }, + { + "epoch": 0.36, + "learning_rate": 1.8954744250936625e-05, + "loss": 0.2594, + "step": 7675 + }, + { + "epoch": 0.36, + "learning_rate": 1.895396046588184e-05, + "loss": 0.3133, + "step": 7680 + }, + { + "epoch": 0.36, + "learning_rate": 1.8953176680827052e-05, + "loss": 0.3055, + "step": 7685 + }, + { + "epoch": 0.36, + "learning_rate": 1.8952392895772266e-05, + "loss": 0.3877, + "step": 7690 + }, + { + "epoch": 0.36, + "learning_rate": 1.895160911071748e-05, + "loss": 0.3368, + "step": 7695 + }, + { + "epoch": 0.36, + "learning_rate": 1.895082532566269e-05, + "loss": 0.6805, + "step": 7700 + }, + { + "epoch": 0.36, + "learning_rate": 1.8950041540607905e-05, + "loss": 0.3995, + "step": 7705 + }, + { + "epoch": 0.36, + "learning_rate": 1.894925775555312e-05, + "loss": 0.1304, + "step": 7710 + }, + { + "epoch": 0.36, + "learning_rate": 1.8948473970498333e-05, + "loss": 0.1476, + "step": 7715 + }, + { + "epoch": 0.36, + "learning_rate": 1.8947690185443546e-05, + "loss": 0.1871, + "step": 7720 + }, + { + "epoch": 0.36, + "learning_rate": 1.8946906400388757e-05, + "loss": 0.2658, + "step": 7725 + }, + { + "epoch": 0.36, + "learning_rate": 1.8946122615333974e-05, + "loss": 0.2439, + "step": 7730 + }, + { + "epoch": 0.36, + "learning_rate": 1.8945338830279185e-05, + "loss": 0.3401, + "step": 7735 + }, + { + "epoch": 0.36, + "learning_rate": 1.89445550452244e-05, + "loss": 0.2806, + "step": 7740 + }, + { + "epoch": 0.36, + "learning_rate": 1.8943771260169613e-05, + "loss": 0.4399, + "step": 7745 + }, + { + "epoch": 0.36, + "learning_rate": 1.8942987475114826e-05, + "loss": 0.6512, + "step": 7750 + }, + { + "epoch": 0.36, + "learning_rate": 1.894220369006004e-05, + "loss": 0.2376, + "step": 7755 + }, + { + "epoch": 0.36, + "learning_rate": 1.8941419905005254e-05, + "loss": 0.1043, + "step": 7760 + }, + { + "epoch": 0.36, + "learning_rate": 1.8940636119950465e-05, + "loss": 0.1589, + "step": 7765 + }, + { + "epoch": 0.36, + "learning_rate": 1.893985233489568e-05, + "loss": 0.1373, + "step": 7770 + }, + { + "epoch": 0.36, + "learning_rate": 1.8939068549840893e-05, + "loss": 0.2714, + "step": 7775 + }, + { + "epoch": 0.36, + "learning_rate": 1.8938284764786107e-05, + "loss": 0.2307, + "step": 7780 + }, + { + "epoch": 0.36, + "learning_rate": 1.893750097973132e-05, + "loss": 0.2299, + "step": 7785 + }, + { + "epoch": 0.36, + "learning_rate": 1.8936717194676534e-05, + "loss": 0.4098, + "step": 7790 + }, + { + "epoch": 0.36, + "learning_rate": 1.8935933409621748e-05, + "loss": 0.4609, + "step": 7795 + }, + { + "epoch": 0.36, + "learning_rate": 1.893514962456696e-05, + "loss": 0.7143, + "step": 7800 + }, + { + "epoch": 0.36, + "learning_rate": 1.8934365839512173e-05, + "loss": 0.3278, + "step": 7805 + }, + { + "epoch": 0.36, + "learning_rate": 1.8933582054457387e-05, + "loss": 0.1103, + "step": 7810 + }, + { + "epoch": 0.36, + "learning_rate": 1.89327982694026e-05, + "loss": 0.1488, + "step": 7815 + }, + { + "epoch": 0.36, + "learning_rate": 1.8932014484347814e-05, + "loss": 0.2829, + "step": 7820 + }, + { + "epoch": 0.37, + "learning_rate": 1.8931230699293028e-05, + "loss": 0.2354, + "step": 7825 + }, + { + "epoch": 0.37, + "learning_rate": 1.8930446914238242e-05, + "loss": 0.3971, + "step": 7830 + }, + { + "epoch": 0.37, + "learning_rate": 1.8929663129183453e-05, + "loss": 0.4175, + "step": 7835 + }, + { + "epoch": 0.37, + "learning_rate": 1.8928879344128667e-05, + "loss": 0.3929, + "step": 7840 + }, + { + "epoch": 0.37, + "learning_rate": 1.892809555907388e-05, + "loss": 0.4019, + "step": 7845 + }, + { + "epoch": 0.37, + "learning_rate": 1.8927311774019094e-05, + "loss": 0.6624, + "step": 7850 + }, + { + "epoch": 0.37, + "learning_rate": 1.892652798896431e-05, + "loss": 0.3053, + "step": 7855 + }, + { + "epoch": 0.37, + "learning_rate": 1.8925744203909522e-05, + "loss": 0.2106, + "step": 7860 + }, + { + "epoch": 0.37, + "learning_rate": 1.8924960418854733e-05, + "loss": 0.172, + "step": 7865 + }, + { + "epoch": 0.37, + "learning_rate": 1.892417663379995e-05, + "loss": 0.1984, + "step": 7870 + }, + { + "epoch": 0.37, + "learning_rate": 1.892339284874516e-05, + "loss": 0.1985, + "step": 7875 + }, + { + "epoch": 0.37, + "learning_rate": 1.8922609063690374e-05, + "loss": 0.4085, + "step": 7880 + }, + { + "epoch": 0.37, + "learning_rate": 1.892182527863559e-05, + "loss": 0.3442, + "step": 7885 + }, + { + "epoch": 0.37, + "learning_rate": 1.8921041493580802e-05, + "loss": 0.2404, + "step": 7890 + }, + { + "epoch": 0.37, + "learning_rate": 1.8920257708526016e-05, + "loss": 0.4951, + "step": 7895 + }, + { + "epoch": 0.37, + "learning_rate": 1.891947392347123e-05, + "loss": 0.6079, + "step": 7900 + }, + { + "epoch": 0.37, + "learning_rate": 1.8918690138416444e-05, + "loss": 0.3329, + "step": 7905 + }, + { + "epoch": 0.37, + "learning_rate": 1.8917906353361655e-05, + "loss": 0.1138, + "step": 7910 + }, + { + "epoch": 0.37, + "learning_rate": 1.891712256830687e-05, + "loss": 0.1449, + "step": 7915 + }, + { + "epoch": 0.37, + "learning_rate": 1.8916338783252082e-05, + "loss": 0.1854, + "step": 7920 + }, + { + "epoch": 0.37, + "learning_rate": 1.8915554998197296e-05, + "loss": 0.1878, + "step": 7925 + }, + { + "epoch": 0.37, + "learning_rate": 1.891477121314251e-05, + "loss": 0.2632, + "step": 7930 + }, + { + "epoch": 0.37, + "learning_rate": 1.8913987428087724e-05, + "loss": 0.1892, + "step": 7935 + }, + { + "epoch": 0.37, + "learning_rate": 1.8913203643032935e-05, + "loss": 0.3837, + "step": 7940 + }, + { + "epoch": 0.37, + "learning_rate": 1.8912419857978152e-05, + "loss": 0.4508, + "step": 7945 + }, + { + "epoch": 0.37, + "learning_rate": 1.8911636072923362e-05, + "loss": 0.5534, + "step": 7950 + }, + { + "epoch": 0.37, + "learning_rate": 1.8910852287868576e-05, + "loss": 0.315, + "step": 7955 + }, + { + "epoch": 0.37, + "learning_rate": 1.891006850281379e-05, + "loss": 0.1236, + "step": 7960 + }, + { + "epoch": 0.37, + "learning_rate": 1.8909284717759004e-05, + "loss": 0.0979, + "step": 7965 + }, + { + "epoch": 0.37, + "learning_rate": 1.8908500932704218e-05, + "loss": 0.1878, + "step": 7970 + }, + { + "epoch": 0.37, + "learning_rate": 1.890771714764943e-05, + "loss": 0.2305, + "step": 7975 + }, + { + "epoch": 0.37, + "learning_rate": 1.8906933362594642e-05, + "loss": 0.2422, + "step": 7980 + }, + { + "epoch": 0.37, + "learning_rate": 1.8906149577539856e-05, + "loss": 0.2615, + "step": 7985 + }, + { + "epoch": 0.37, + "learning_rate": 1.890536579248507e-05, + "loss": 0.3486, + "step": 7990 + }, + { + "epoch": 0.37, + "learning_rate": 1.8904582007430284e-05, + "loss": 0.461, + "step": 7995 + }, + { + "epoch": 0.37, + "learning_rate": 1.8903798222375498e-05, + "loss": 0.6374, + "step": 8000 + }, + { + "epoch": 0.37, + "learning_rate": 1.8903014437320712e-05, + "loss": 0.3201, + "step": 8005 + }, + { + "epoch": 0.37, + "learning_rate": 1.8902230652265926e-05, + "loss": 0.1109, + "step": 8010 + }, + { + "epoch": 0.37, + "learning_rate": 1.8901446867211136e-05, + "loss": 0.1345, + "step": 8015 + }, + { + "epoch": 0.37, + "learning_rate": 1.890066308215635e-05, + "loss": 0.1565, + "step": 8020 + }, + { + "epoch": 0.37, + "learning_rate": 1.8899879297101564e-05, + "loss": 0.7264, + "step": 8025 + }, + { + "epoch": 0.37, + "learning_rate": 1.8899095512046778e-05, + "loss": 0.231, + "step": 8030 + }, + { + "epoch": 0.37, + "learning_rate": 1.8898311726991992e-05, + "loss": 0.2366, + "step": 8035 + }, + { + "epoch": 0.38, + "learning_rate": 1.8897527941937203e-05, + "loss": 0.3771, + "step": 8040 + }, + { + "epoch": 0.38, + "learning_rate": 1.889674415688242e-05, + "loss": 0.4329, + "step": 8045 + }, + { + "epoch": 0.38, + "learning_rate": 1.889596037182763e-05, + "loss": 0.6533, + "step": 8050 + }, + { + "epoch": 0.38, + "learning_rate": 1.8895176586772844e-05, + "loss": 0.338, + "step": 8055 + }, + { + "epoch": 0.38, + "learning_rate": 1.8894392801718058e-05, + "loss": 0.1299, + "step": 8060 + }, + { + "epoch": 0.38, + "learning_rate": 1.8893609016663272e-05, + "loss": 0.1517, + "step": 8065 + }, + { + "epoch": 0.38, + "learning_rate": 1.8892825231608486e-05, + "loss": 0.2633, + "step": 8070 + }, + { + "epoch": 0.38, + "learning_rate": 1.88920414465537e-05, + "loss": 0.2221, + "step": 8075 + }, + { + "epoch": 0.38, + "learning_rate": 1.889125766149891e-05, + "loss": 0.2924, + "step": 8080 + }, + { + "epoch": 0.38, + "learning_rate": 1.8890473876444128e-05, + "loss": 0.2867, + "step": 8085 + }, + { + "epoch": 0.38, + "learning_rate": 1.8889690091389338e-05, + "loss": 0.3109, + "step": 8090 + }, + { + "epoch": 0.38, + "learning_rate": 1.8888906306334552e-05, + "loss": 0.4688, + "step": 8095 + }, + { + "epoch": 0.38, + "learning_rate": 1.8888122521279766e-05, + "loss": 0.7502, + "step": 8100 + }, + { + "epoch": 0.38, + "learning_rate": 1.888733873622498e-05, + "loss": 0.2301, + "step": 8105 + }, + { + "epoch": 0.38, + "learning_rate": 1.8886554951170194e-05, + "loss": 0.1549, + "step": 8110 + }, + { + "epoch": 0.38, + "learning_rate": 1.8885771166115404e-05, + "loss": 0.1893, + "step": 8115 + }, + { + "epoch": 0.38, + "learning_rate": 1.8884987381060618e-05, + "loss": 0.1992, + "step": 8120 + }, + { + "epoch": 0.38, + "learning_rate": 1.8884203596005832e-05, + "loss": 0.1521, + "step": 8125 + }, + { + "epoch": 0.38, + "learning_rate": 1.8883419810951046e-05, + "loss": 0.2274, + "step": 8130 + }, + { + "epoch": 0.38, + "learning_rate": 1.888263602589626e-05, + "loss": 0.2885, + "step": 8135 + }, + { + "epoch": 0.38, + "learning_rate": 1.8881852240841474e-05, + "loss": 0.3451, + "step": 8140 + }, + { + "epoch": 0.38, + "learning_rate": 1.8881068455786688e-05, + "loss": 0.3565, + "step": 8145 + }, + { + "epoch": 0.38, + "learning_rate": 1.88802846707319e-05, + "loss": 0.5021, + "step": 8150 + }, + { + "epoch": 0.38, + "learning_rate": 1.8879500885677112e-05, + "loss": 0.344, + "step": 8155 + }, + { + "epoch": 0.38, + "learning_rate": 1.887871710062233e-05, + "loss": 0.2039, + "step": 8160 + }, + { + "epoch": 0.38, + "learning_rate": 1.887793331556754e-05, + "loss": 0.2314, + "step": 8165 + }, + { + "epoch": 0.38, + "learning_rate": 1.8877149530512754e-05, + "loss": 0.1719, + "step": 8170 + }, + { + "epoch": 0.38, + "learning_rate": 1.8876365745457968e-05, + "loss": 0.2557, + "step": 8175 + }, + { + "epoch": 0.38, + "learning_rate": 1.887558196040318e-05, + "loss": 0.2644, + "step": 8180 + }, + { + "epoch": 0.38, + "learning_rate": 1.8874798175348396e-05, + "loss": 0.2162, + "step": 8185 + }, + { + "epoch": 0.38, + "learning_rate": 1.8874014390293606e-05, + "loss": 0.3655, + "step": 8190 + }, + { + "epoch": 0.38, + "learning_rate": 1.887323060523882e-05, + "loss": 0.4942, + "step": 8195 + }, + { + "epoch": 0.38, + "learning_rate": 1.8872446820184034e-05, + "loss": 0.6199, + "step": 8200 + }, + { + "epoch": 0.38, + "learning_rate": 1.8871663035129248e-05, + "loss": 0.3494, + "step": 8205 + }, + { + "epoch": 0.38, + "learning_rate": 1.8870879250074462e-05, + "loss": 0.1605, + "step": 8210 + }, + { + "epoch": 0.38, + "learning_rate": 1.8870095465019676e-05, + "loss": 0.1165, + "step": 8215 + }, + { + "epoch": 0.38, + "learning_rate": 1.886931167996489e-05, + "loss": 0.1653, + "step": 8220 + }, + { + "epoch": 0.38, + "learning_rate": 1.8868527894910103e-05, + "loss": 0.1936, + "step": 8225 + }, + { + "epoch": 0.38, + "learning_rate": 1.8867744109855314e-05, + "loss": 0.2854, + "step": 8230 + }, + { + "epoch": 0.38, + "learning_rate": 1.8866960324800528e-05, + "loss": 0.2725, + "step": 8235 + }, + { + "epoch": 0.38, + "learning_rate": 1.8866176539745742e-05, + "loss": 0.4045, + "step": 8240 + }, + { + "epoch": 0.38, + "learning_rate": 1.8865392754690956e-05, + "loss": 0.3646, + "step": 8245 + }, + { + "epoch": 0.38, + "learning_rate": 1.886460896963617e-05, + "loss": 0.6643, + "step": 8250 + }, + { + "epoch": 0.39, + "learning_rate": 1.886382518458138e-05, + "loss": 0.2439, + "step": 8255 + }, + { + "epoch": 0.39, + "learning_rate": 1.8863041399526597e-05, + "loss": 0.1384, + "step": 8260 + }, + { + "epoch": 0.39, + "learning_rate": 1.8862257614471808e-05, + "loss": 0.1399, + "step": 8265 + }, + { + "epoch": 0.39, + "learning_rate": 1.8861473829417022e-05, + "loss": 0.17, + "step": 8270 + }, + { + "epoch": 0.39, + "learning_rate": 1.8860690044362236e-05, + "loss": 0.2109, + "step": 8275 + }, + { + "epoch": 0.39, + "learning_rate": 1.885990625930745e-05, + "loss": 0.2079, + "step": 8280 + }, + { + "epoch": 0.39, + "learning_rate": 1.8859122474252664e-05, + "loss": 0.2478, + "step": 8285 + }, + { + "epoch": 0.39, + "learning_rate": 1.8858338689197877e-05, + "loss": 0.2618, + "step": 8290 + }, + { + "epoch": 0.39, + "learning_rate": 1.8857554904143088e-05, + "loss": 0.3146, + "step": 8295 + }, + { + "epoch": 0.39, + "learning_rate": 1.8856771119088302e-05, + "loss": 0.5877, + "step": 8300 + }, + { + "epoch": 0.39, + "learning_rate": 1.8855987334033516e-05, + "loss": 0.2784, + "step": 8305 + }, + { + "epoch": 0.39, + "learning_rate": 1.885520354897873e-05, + "loss": 0.1436, + "step": 8310 + }, + { + "epoch": 0.39, + "learning_rate": 1.8854419763923944e-05, + "loss": 0.1598, + "step": 8315 + }, + { + "epoch": 0.39, + "learning_rate": 1.8853635978869158e-05, + "loss": 0.1545, + "step": 8320 + }, + { + "epoch": 0.39, + "learning_rate": 1.885285219381437e-05, + "loss": 0.2853, + "step": 8325 + }, + { + "epoch": 0.39, + "learning_rate": 1.8852068408759582e-05, + "loss": 0.2467, + "step": 8330 + }, + { + "epoch": 0.39, + "learning_rate": 1.8851284623704796e-05, + "loss": 0.271, + "step": 8335 + }, + { + "epoch": 0.39, + "learning_rate": 1.885050083865001e-05, + "loss": 0.3561, + "step": 8340 + }, + { + "epoch": 0.39, + "learning_rate": 1.8849717053595224e-05, + "loss": 0.3717, + "step": 8345 + }, + { + "epoch": 0.39, + "learning_rate": 1.8848933268540438e-05, + "loss": 0.6701, + "step": 8350 + }, + { + "epoch": 0.39, + "learning_rate": 1.884814948348565e-05, + "loss": 0.3146, + "step": 8355 + }, + { + "epoch": 0.39, + "learning_rate": 1.8847365698430865e-05, + "loss": 0.1088, + "step": 8360 + }, + { + "epoch": 0.39, + "learning_rate": 1.8846581913376076e-05, + "loss": 0.1436, + "step": 8365 + }, + { + "epoch": 0.39, + "learning_rate": 1.884579812832129e-05, + "loss": 0.1958, + "step": 8370 + }, + { + "epoch": 0.39, + "learning_rate": 1.8845014343266504e-05, + "loss": 0.2178, + "step": 8375 + }, + { + "epoch": 0.39, + "learning_rate": 1.8844230558211718e-05, + "loss": 0.1422, + "step": 8380 + }, + { + "epoch": 0.39, + "learning_rate": 1.884344677315693e-05, + "loss": 0.2301, + "step": 8385 + }, + { + "epoch": 0.39, + "learning_rate": 1.8842662988102145e-05, + "loss": 0.4035, + "step": 8390 + }, + { + "epoch": 0.39, + "learning_rate": 1.8841879203047356e-05, + "loss": 0.4027, + "step": 8395 + }, + { + "epoch": 0.39, + "learning_rate": 1.8841095417992573e-05, + "loss": 0.6834, + "step": 8400 + }, + { + "epoch": 0.39, + "learning_rate": 1.8840311632937784e-05, + "loss": 0.2374, + "step": 8405 + }, + { + "epoch": 0.39, + "learning_rate": 1.8839527847882998e-05, + "loss": 0.172, + "step": 8410 + }, + { + "epoch": 0.39, + "learning_rate": 1.883874406282821e-05, + "loss": 0.1755, + "step": 8415 + }, + { + "epoch": 0.39, + "learning_rate": 1.8837960277773425e-05, + "loss": 0.2204, + "step": 8420 + }, + { + "epoch": 0.39, + "learning_rate": 1.883717649271864e-05, + "loss": 0.2173, + "step": 8425 + }, + { + "epoch": 0.39, + "learning_rate": 1.883639270766385e-05, + "loss": 0.3071, + "step": 8430 + }, + { + "epoch": 0.39, + "learning_rate": 1.8835608922609064e-05, + "loss": 0.2889, + "step": 8435 + }, + { + "epoch": 0.39, + "learning_rate": 1.8834825137554278e-05, + "loss": 0.3744, + "step": 8440 + }, + { + "epoch": 0.39, + "learning_rate": 1.8834198109510448e-05, + "loss": 0.6518, + "step": 8445 + }, + { + "epoch": 0.39, + "learning_rate": 1.8833414324455662e-05, + "loss": 0.7128, + "step": 8450 + }, + { + "epoch": 0.39, + "learning_rate": 1.8832630539400876e-05, + "loss": 0.3003, + "step": 8455 + }, + { + "epoch": 0.39, + "learning_rate": 1.883184675434609e-05, + "loss": 0.0996, + "step": 8460 + }, + { + "epoch": 0.39, + "learning_rate": 1.8831062969291304e-05, + "loss": 0.1282, + "step": 8465 + }, + { + "epoch": 0.4, + "learning_rate": 1.8830279184236518e-05, + "loss": 0.1423, + "step": 8470 + }, + { + "epoch": 0.4, + "learning_rate": 1.8829495399181728e-05, + "loss": 0.2481, + "step": 8475 + }, + { + "epoch": 0.4, + "learning_rate": 1.8828711614126945e-05, + "loss": 0.1612, + "step": 8480 + }, + { + "epoch": 0.4, + "learning_rate": 1.8827927829072156e-05, + "loss": 0.3512, + "step": 8485 + }, + { + "epoch": 0.4, + "learning_rate": 1.882714404401737e-05, + "loss": 0.2906, + "step": 8490 + }, + { + "epoch": 0.4, + "learning_rate": 1.8826360258962584e-05, + "loss": 0.2834, + "step": 8495 + }, + { + "epoch": 0.4, + "learning_rate": 1.8825576473907798e-05, + "loss": 0.6138, + "step": 8500 + }, + { + "epoch": 0.4, + "learning_rate": 1.882479268885301e-05, + "loss": 0.3247, + "step": 8505 + }, + { + "epoch": 0.4, + "learning_rate": 1.8824008903798222e-05, + "loss": 0.0987, + "step": 8510 + }, + { + "epoch": 0.4, + "learning_rate": 1.882322511874344e-05, + "loss": 0.1981, + "step": 8515 + }, + { + "epoch": 0.4, + "learning_rate": 1.882244133368865e-05, + "loss": 0.2302, + "step": 8520 + }, + { + "epoch": 0.4, + "learning_rate": 1.8821657548633864e-05, + "loss": 0.286, + "step": 8525 + }, + { + "epoch": 0.4, + "learning_rate": 1.8820873763579078e-05, + "loss": 0.2, + "step": 8530 + }, + { + "epoch": 0.4, + "learning_rate": 1.882008997852429e-05, + "loss": 0.3144, + "step": 8535 + }, + { + "epoch": 0.4, + "learning_rate": 1.8819306193469506e-05, + "loss": 0.2449, + "step": 8540 + }, + { + "epoch": 0.4, + "learning_rate": 1.881852240841472e-05, + "loss": 0.324, + "step": 8545 + }, + { + "epoch": 0.4, + "learning_rate": 1.881773862335993e-05, + "loss": 0.5676, + "step": 8550 + }, + { + "epoch": 0.4, + "learning_rate": 1.8816954838305144e-05, + "loss": 0.2509, + "step": 8555 + }, + { + "epoch": 0.4, + "learning_rate": 1.8816171053250358e-05, + "loss": 0.1236, + "step": 8560 + }, + { + "epoch": 0.4, + "learning_rate": 1.8815387268195572e-05, + "loss": 0.1508, + "step": 8565 + }, + { + "epoch": 0.4, + "learning_rate": 1.8814603483140786e-05, + "loss": 0.1854, + "step": 8570 + }, + { + "epoch": 0.4, + "learning_rate": 1.8813819698085996e-05, + "loss": 0.2049, + "step": 8575 + }, + { + "epoch": 0.4, + "learning_rate": 1.8813035913031213e-05, + "loss": 0.253, + "step": 8580 + }, + { + "epoch": 0.4, + "learning_rate": 1.8812252127976424e-05, + "loss": 0.2392, + "step": 8585 + }, + { + "epoch": 0.4, + "learning_rate": 1.8811468342921638e-05, + "loss": 0.3392, + "step": 8590 + }, + { + "epoch": 0.4, + "learning_rate": 1.8810684557866852e-05, + "loss": 0.3588, + "step": 8595 + }, + { + "epoch": 0.4, + "learning_rate": 1.8809900772812066e-05, + "loss": 0.5704, + "step": 8600 + }, + { + "epoch": 0.4, + "learning_rate": 1.880911698775728e-05, + "loss": 0.2261, + "step": 8605 + }, + { + "epoch": 0.4, + "learning_rate": 1.8808333202702493e-05, + "loss": 0.1138, + "step": 8610 + }, + { + "epoch": 0.4, + "learning_rate": 1.8807549417647707e-05, + "loss": 0.1819, + "step": 8615 + }, + { + "epoch": 0.4, + "learning_rate": 1.8806765632592918e-05, + "loss": 0.2097, + "step": 8620 + }, + { + "epoch": 0.4, + "learning_rate": 1.8805981847538132e-05, + "loss": 0.33, + "step": 8625 + }, + { + "epoch": 0.4, + "learning_rate": 1.8805198062483346e-05, + "loss": 0.1766, + "step": 8630 + }, + { + "epoch": 0.4, + "learning_rate": 1.880441427742856e-05, + "loss": 0.2288, + "step": 8635 + }, + { + "epoch": 0.4, + "learning_rate": 1.8803630492373774e-05, + "loss": 0.3268, + "step": 8640 + }, + { + "epoch": 0.4, + "learning_rate": 1.8802846707318987e-05, + "loss": 0.4618, + "step": 8645 + }, + { + "epoch": 0.4, + "learning_rate": 1.8802062922264198e-05, + "loss": 0.487, + "step": 8650 + }, + { + "epoch": 0.4, + "learning_rate": 1.8801279137209415e-05, + "loss": 0.2719, + "step": 8655 + }, + { + "epoch": 0.4, + "learning_rate": 1.8800495352154626e-05, + "loss": 0.0903, + "step": 8660 + }, + { + "epoch": 0.4, + "learning_rate": 1.879971156709984e-05, + "loss": 0.1441, + "step": 8665 + }, + { + "epoch": 0.4, + "learning_rate": 1.8798927782045054e-05, + "loss": 0.1544, + "step": 8670 + }, + { + "epoch": 0.4, + "learning_rate": 1.8798143996990267e-05, + "loss": 0.2806, + "step": 8675 + }, + { + "epoch": 0.41, + "learning_rate": 1.879736021193548e-05, + "loss": 0.3168, + "step": 8680 + }, + { + "epoch": 0.41, + "learning_rate": 1.8796576426880692e-05, + "loss": 0.2666, + "step": 8685 + }, + { + "epoch": 0.41, + "learning_rate": 1.8795792641825906e-05, + "loss": 0.3645, + "step": 8690 + }, + { + "epoch": 0.41, + "learning_rate": 1.879500885677112e-05, + "loss": 0.5892, + "step": 8695 + }, + { + "epoch": 0.41, + "learning_rate": 1.8794225071716334e-05, + "loss": 0.6008, + "step": 8700 + }, + { + "epoch": 0.41, + "learning_rate": 1.8793441286661548e-05, + "loss": 0.3091, + "step": 8705 + }, + { + "epoch": 0.41, + "learning_rate": 1.879265750160676e-05, + "loss": 0.1157, + "step": 8710 + }, + { + "epoch": 0.41, + "learning_rate": 1.8791873716551975e-05, + "loss": 0.1896, + "step": 8715 + }, + { + "epoch": 0.41, + "learning_rate": 1.879108993149719e-05, + "loss": 0.1758, + "step": 8720 + }, + { + "epoch": 0.41, + "learning_rate": 1.87903061464424e-05, + "loss": 0.2199, + "step": 8725 + }, + { + "epoch": 0.41, + "learning_rate": 1.8789522361387617e-05, + "loss": 0.1525, + "step": 8730 + }, + { + "epoch": 0.41, + "learning_rate": 1.8788738576332828e-05, + "loss": 0.3416, + "step": 8735 + }, + { + "epoch": 0.41, + "learning_rate": 1.878795479127804e-05, + "loss": 0.3551, + "step": 8740 + }, + { + "epoch": 0.41, + "learning_rate": 1.8787171006223255e-05, + "loss": 0.3541, + "step": 8745 + }, + { + "epoch": 0.41, + "learning_rate": 1.8786387221168466e-05, + "loss": 0.6545, + "step": 8750 + }, + { + "epoch": 0.41, + "learning_rate": 1.8785603436113683e-05, + "loss": 0.2391, + "step": 8755 + }, + { + "epoch": 0.41, + "learning_rate": 1.8784819651058894e-05, + "loss": 0.1449, + "step": 8760 + }, + { + "epoch": 0.41, + "learning_rate": 1.8784035866004108e-05, + "loss": 0.1726, + "step": 8765 + }, + { + "epoch": 0.41, + "learning_rate": 1.878325208094932e-05, + "loss": 0.1607, + "step": 8770 + }, + { + "epoch": 0.41, + "learning_rate": 1.8782468295894535e-05, + "loss": 0.2541, + "step": 8775 + }, + { + "epoch": 0.41, + "learning_rate": 1.878168451083975e-05, + "loss": 0.1685, + "step": 8780 + }, + { + "epoch": 0.41, + "learning_rate": 1.8780900725784963e-05, + "loss": 0.3067, + "step": 8785 + }, + { + "epoch": 0.41, + "learning_rate": 1.8780116940730174e-05, + "loss": 0.3734, + "step": 8790 + }, + { + "epoch": 0.41, + "learning_rate": 1.877933315567539e-05, + "loss": 0.311, + "step": 8795 + }, + { + "epoch": 0.41, + "learning_rate": 1.87785493706206e-05, + "loss": 0.464, + "step": 8800 + }, + { + "epoch": 0.41, + "learning_rate": 1.8777765585565815e-05, + "loss": 0.2861, + "step": 8805 + }, + { + "epoch": 0.41, + "learning_rate": 1.877698180051103e-05, + "loss": 0.0895, + "step": 8810 + }, + { + "epoch": 0.41, + "learning_rate": 1.8776198015456243e-05, + "loss": 0.1917, + "step": 8815 + }, + { + "epoch": 0.41, + "learning_rate": 1.8775414230401457e-05, + "loss": 0.2786, + "step": 8820 + }, + { + "epoch": 0.41, + "learning_rate": 1.8774630445346668e-05, + "loss": 0.1561, + "step": 8825 + }, + { + "epoch": 0.41, + "learning_rate": 1.8773846660291885e-05, + "loss": 0.2006, + "step": 8830 + }, + { + "epoch": 0.41, + "learning_rate": 1.8773062875237096e-05, + "loss": 0.2568, + "step": 8835 + }, + { + "epoch": 0.41, + "learning_rate": 1.877227909018231e-05, + "loss": 0.3812, + "step": 8840 + }, + { + "epoch": 0.41, + "learning_rate": 1.8771495305127523e-05, + "loss": 0.3857, + "step": 8845 + }, + { + "epoch": 0.41, + "learning_rate": 1.8770711520072737e-05, + "loss": 0.6512, + "step": 8850 + }, + { + "epoch": 0.41, + "learning_rate": 1.876992773501795e-05, + "loss": 0.2031, + "step": 8855 + }, + { + "epoch": 0.41, + "learning_rate": 1.8769143949963165e-05, + "loss": 0.0943, + "step": 8860 + }, + { + "epoch": 0.41, + "learning_rate": 1.8768360164908376e-05, + "loss": 0.133, + "step": 8865 + }, + { + "epoch": 0.41, + "learning_rate": 1.8767576379853593e-05, + "loss": 0.181, + "step": 8870 + }, + { + "epoch": 0.41, + "learning_rate": 1.8766792594798803e-05, + "loss": 0.3046, + "step": 8875 + }, + { + "epoch": 0.41, + "learning_rate": 1.8766008809744017e-05, + "loss": 0.3182, + "step": 8880 + }, + { + "epoch": 0.41, + "learning_rate": 1.876522502468923e-05, + "loss": 0.2638, + "step": 8885 + }, + { + "epoch": 0.41, + "learning_rate": 1.8764441239634442e-05, + "loss": 0.3133, + "step": 8890 + }, + { + "epoch": 0.42, + "learning_rate": 1.876365745457966e-05, + "loss": 0.3542, + "step": 8895 + }, + { + "epoch": 0.42, + "learning_rate": 1.876287366952487e-05, + "loss": 0.7553, + "step": 8900 + }, + { + "epoch": 0.42, + "learning_rate": 1.8762089884470083e-05, + "loss": 0.2799, + "step": 8905 + }, + { + "epoch": 0.42, + "learning_rate": 1.8761306099415297e-05, + "loss": 0.1592, + "step": 8910 + }, + { + "epoch": 0.42, + "learning_rate": 1.876052231436051e-05, + "loss": 0.1252, + "step": 8915 + }, + { + "epoch": 0.42, + "learning_rate": 1.8759738529305725e-05, + "loss": 0.1886, + "step": 8920 + }, + { + "epoch": 0.42, + "learning_rate": 1.875895474425094e-05, + "loss": 0.2613, + "step": 8925 + }, + { + "epoch": 0.42, + "learning_rate": 1.8758170959196153e-05, + "loss": 0.3106, + "step": 8930 + }, + { + "epoch": 0.42, + "learning_rate": 1.8757387174141367e-05, + "loss": 0.2214, + "step": 8935 + }, + { + "epoch": 0.42, + "learning_rate": 1.8756603389086577e-05, + "loss": 0.3003, + "step": 8940 + }, + { + "epoch": 0.42, + "learning_rate": 1.875581960403179e-05, + "loss": 0.3717, + "step": 8945 + }, + { + "epoch": 0.42, + "learning_rate": 1.8755035818977005e-05, + "loss": 0.5338, + "step": 8950 + }, + { + "epoch": 0.42, + "learning_rate": 1.875425203392222e-05, + "loss": 0.2474, + "step": 8955 + }, + { + "epoch": 0.42, + "learning_rate": 1.8753468248867433e-05, + "loss": 0.1502, + "step": 8960 + }, + { + "epoch": 0.42, + "learning_rate": 1.8752684463812644e-05, + "loss": 0.1918, + "step": 8965 + }, + { + "epoch": 0.42, + "learning_rate": 1.875190067875786e-05, + "loss": 0.1617, + "step": 8970 + }, + { + "epoch": 0.42, + "learning_rate": 1.875111689370307e-05, + "loss": 0.2153, + "step": 8975 + }, + { + "epoch": 0.42, + "learning_rate": 1.8750333108648285e-05, + "loss": 0.2767, + "step": 8980 + }, + { + "epoch": 0.42, + "learning_rate": 1.87495493235935e-05, + "loss": 0.2624, + "step": 8985 + }, + { + "epoch": 0.42, + "learning_rate": 1.8748765538538713e-05, + "loss": 0.3624, + "step": 8990 + }, + { + "epoch": 0.42, + "learning_rate": 1.8747981753483927e-05, + "loss": 0.4102, + "step": 8995 + }, + { + "epoch": 0.42, + "learning_rate": 1.874719796842914e-05, + "loss": 0.6647, + "step": 9000 + }, + { + "epoch": 0.42, + "learning_rate": 1.874641418337435e-05, + "loss": 0.16, + "step": 9005 + }, + { + "epoch": 0.42, + "learning_rate": 1.8745630398319565e-05, + "loss": 0.1188, + "step": 9010 + }, + { + "epoch": 0.42, + "learning_rate": 1.874484661326478e-05, + "loss": 0.1938, + "step": 9015 + }, + { + "epoch": 0.42, + "learning_rate": 1.8744062828209993e-05, + "loss": 0.2017, + "step": 9020 + }, + { + "epoch": 0.42, + "learning_rate": 1.8743279043155207e-05, + "loss": 0.2465, + "step": 9025 + }, + { + "epoch": 0.42, + "learning_rate": 1.874249525810042e-05, + "loss": 0.2671, + "step": 9030 + }, + { + "epoch": 0.42, + "learning_rate": 1.8741711473045635e-05, + "loss": 0.1911, + "step": 9035 + }, + { + "epoch": 0.42, + "learning_rate": 1.8740927687990845e-05, + "loss": 0.3654, + "step": 9040 + }, + { + "epoch": 0.42, + "learning_rate": 1.8740143902936063e-05, + "loss": 0.3904, + "step": 9045 + }, + { + "epoch": 0.42, + "learning_rate": 1.8739360117881273e-05, + "loss": 0.5258, + "step": 9050 + }, + { + "epoch": 0.42, + "learning_rate": 1.8738576332826487e-05, + "loss": 0.2227, + "step": 9055 + }, + { + "epoch": 0.42, + "learning_rate": 1.87377925477717e-05, + "loss": 0.0826, + "step": 9060 + }, + { + "epoch": 0.42, + "learning_rate": 1.8737008762716915e-05, + "loss": 0.1681, + "step": 9065 + }, + { + "epoch": 0.42, + "learning_rate": 1.873622497766213e-05, + "loss": 0.2063, + "step": 9070 + }, + { + "epoch": 0.42, + "learning_rate": 1.873544119260734e-05, + "loss": 0.2334, + "step": 9075 + }, + { + "epoch": 0.42, + "learning_rate": 1.8734657407552553e-05, + "loss": 0.2478, + "step": 9080 + }, + { + "epoch": 0.42, + "learning_rate": 1.8733873622497767e-05, + "loss": 0.3684, + "step": 9085 + }, + { + "epoch": 0.42, + "learning_rate": 1.873308983744298e-05, + "loss": 0.292, + "step": 9090 + }, + { + "epoch": 0.42, + "learning_rate": 1.8732306052388195e-05, + "loss": 0.2457, + "step": 9095 + }, + { + "epoch": 0.42, + "learning_rate": 1.873152226733341e-05, + "loss": 0.5138, + "step": 9100 + }, + { + "epoch": 0.42, + "learning_rate": 1.873073848227862e-05, + "loss": 0.3402, + "step": 9105 + }, + { + "epoch": 0.43, + "learning_rate": 1.8729954697223837e-05, + "loss": 0.1397, + "step": 9110 + }, + { + "epoch": 0.43, + "learning_rate": 1.8729170912169047e-05, + "loss": 0.1392, + "step": 9115 + }, + { + "epoch": 0.43, + "learning_rate": 1.872838712711426e-05, + "loss": 0.109, + "step": 9120 + }, + { + "epoch": 0.43, + "learning_rate": 1.8727603342059475e-05, + "loss": 0.2441, + "step": 9125 + }, + { + "epoch": 0.43, + "learning_rate": 1.872681955700469e-05, + "loss": 0.2452, + "step": 9130 + }, + { + "epoch": 0.43, + "learning_rate": 1.8726035771949903e-05, + "loss": 0.2466, + "step": 9135 + }, + { + "epoch": 0.43, + "learning_rate": 1.8725251986895113e-05, + "loss": 0.3464, + "step": 9140 + }, + { + "epoch": 0.43, + "learning_rate": 1.872446820184033e-05, + "loss": 0.4602, + "step": 9145 + }, + { + "epoch": 0.43, + "learning_rate": 1.872368441678554e-05, + "loss": 0.6565, + "step": 9150 + }, + { + "epoch": 0.43, + "learning_rate": 1.8722900631730755e-05, + "loss": 0.3206, + "step": 9155 + }, + { + "epoch": 0.43, + "learning_rate": 1.872211684667597e-05, + "loss": 0.0716, + "step": 9160 + }, + { + "epoch": 0.43, + "learning_rate": 1.8721333061621183e-05, + "loss": 0.1231, + "step": 9165 + }, + { + "epoch": 0.43, + "learning_rate": 1.8720549276566397e-05, + "loss": 0.2065, + "step": 9170 + }, + { + "epoch": 0.43, + "learning_rate": 1.871976549151161e-05, + "loss": 0.1649, + "step": 9175 + }, + { + "epoch": 0.43, + "learning_rate": 1.871898170645682e-05, + "loss": 0.1393, + "step": 9180 + }, + { + "epoch": 0.43, + "learning_rate": 1.871819792140204e-05, + "loss": 0.2792, + "step": 9185 + }, + { + "epoch": 0.43, + "learning_rate": 1.871741413634725e-05, + "loss": 0.3291, + "step": 9190 + }, + { + "epoch": 0.43, + "learning_rate": 1.8716630351292463e-05, + "loss": 0.294, + "step": 9195 + }, + { + "epoch": 0.43, + "learning_rate": 1.8715846566237677e-05, + "loss": 0.6407, + "step": 9200 + }, + { + "epoch": 0.43, + "learning_rate": 1.8715062781182887e-05, + "loss": 0.2999, + "step": 9205 + }, + { + "epoch": 0.43, + "learning_rate": 1.8714278996128105e-05, + "loss": 0.142, + "step": 9210 + }, + { + "epoch": 0.43, + "learning_rate": 1.8713495211073315e-05, + "loss": 0.2173, + "step": 9215 + }, + { + "epoch": 0.43, + "learning_rate": 1.871271142601853e-05, + "loss": 0.2285, + "step": 9220 + }, + { + "epoch": 0.43, + "learning_rate": 1.8711927640963743e-05, + "loss": 0.1986, + "step": 9225 + }, + { + "epoch": 0.43, + "learning_rate": 1.8711143855908957e-05, + "loss": 0.2207, + "step": 9230 + }, + { + "epoch": 0.43, + "learning_rate": 1.871036007085417e-05, + "loss": 0.2179, + "step": 9235 + }, + { + "epoch": 0.43, + "learning_rate": 1.8709576285799385e-05, + "loss": 0.2123, + "step": 9240 + }, + { + "epoch": 0.43, + "learning_rate": 1.87087925007446e-05, + "loss": 0.2957, + "step": 9245 + }, + { + "epoch": 0.43, + "learning_rate": 1.8708008715689812e-05, + "loss": 0.6045, + "step": 9250 + }, + { + "epoch": 0.43, + "learning_rate": 1.8707224930635023e-05, + "loss": 0.2722, + "step": 9255 + }, + { + "epoch": 0.43, + "learning_rate": 1.870644114558024e-05, + "loss": 0.2209, + "step": 9260 + }, + { + "epoch": 0.43, + "learning_rate": 1.870565736052545e-05, + "loss": 0.1307, + "step": 9265 + }, + { + "epoch": 0.43, + "learning_rate": 1.8704873575470665e-05, + "loss": 0.152, + "step": 9270 + }, + { + "epoch": 0.43, + "learning_rate": 1.870408979041588e-05, + "loss": 0.1983, + "step": 9275 + }, + { + "epoch": 0.43, + "learning_rate": 1.870330600536109e-05, + "loss": 0.2012, + "step": 9280 + }, + { + "epoch": 0.43, + "learning_rate": 1.8702522220306306e-05, + "loss": 0.6512, + "step": 9285 + }, + { + "epoch": 0.43, + "learning_rate": 1.8701738435251517e-05, + "loss": 0.3317, + "step": 9290 + }, + { + "epoch": 0.43, + "learning_rate": 1.870095465019673e-05, + "loss": 0.4515, + "step": 9295 + }, + { + "epoch": 0.43, + "learning_rate": 1.8700170865141945e-05, + "loss": 0.7104, + "step": 9300 + }, + { + "epoch": 0.43, + "learning_rate": 1.869938708008716e-05, + "loss": 0.2689, + "step": 9305 + }, + { + "epoch": 0.43, + "learning_rate": 1.8698603295032373e-05, + "loss": 0.1028, + "step": 9310 + }, + { + "epoch": 0.43, + "learning_rate": 1.8697819509977586e-05, + "loss": 0.1626, + "step": 9315 + }, + { + "epoch": 0.43, + "learning_rate": 1.8697035724922797e-05, + "loss": 0.2462, + "step": 9320 + }, + { + "epoch": 0.44, + "learning_rate": 1.8696251939868014e-05, + "loss": 0.1763, + "step": 9325 + }, + { + "epoch": 0.44, + "learning_rate": 1.8695468154813225e-05, + "loss": 0.3345, + "step": 9330 + }, + { + "epoch": 0.44, + "learning_rate": 1.869468436975844e-05, + "loss": 0.2613, + "step": 9335 + }, + { + "epoch": 0.44, + "learning_rate": 1.8693900584703653e-05, + "loss": 0.3072, + "step": 9340 + }, + { + "epoch": 0.44, + "learning_rate": 1.8693116799648866e-05, + "loss": 0.3295, + "step": 9345 + }, + { + "epoch": 0.44, + "learning_rate": 1.869233301459408e-05, + "loss": 0.6022, + "step": 9350 + }, + { + "epoch": 0.44, + "learning_rate": 1.869154922953929e-05, + "loss": 0.3035, + "step": 9355 + }, + { + "epoch": 0.44, + "learning_rate": 1.8690765444484508e-05, + "loss": 0.2063, + "step": 9360 + }, + { + "epoch": 0.44, + "learning_rate": 1.868998165942972e-05, + "loss": 0.2401, + "step": 9365 + }, + { + "epoch": 0.44, + "learning_rate": 1.8689197874374933e-05, + "loss": 0.1846, + "step": 9370 + }, + { + "epoch": 0.44, + "learning_rate": 1.8688414089320147e-05, + "loss": 0.1649, + "step": 9375 + }, + { + "epoch": 0.44, + "learning_rate": 1.868763030426536e-05, + "loss": 0.1734, + "step": 9380 + }, + { + "epoch": 0.44, + "learning_rate": 1.8686846519210574e-05, + "loss": 0.2245, + "step": 9385 + }, + { + "epoch": 0.44, + "learning_rate": 1.8686062734155788e-05, + "loss": 0.2765, + "step": 9390 + }, + { + "epoch": 0.44, + "learning_rate": 1.8685278949101e-05, + "loss": 0.3261, + "step": 9395 + }, + { + "epoch": 0.44, + "learning_rate": 1.8684495164046213e-05, + "loss": 0.5533, + "step": 9400 + }, + { + "epoch": 0.44, + "learning_rate": 1.8683711378991427e-05, + "loss": 0.2859, + "step": 9405 + }, + { + "epoch": 0.44, + "learning_rate": 1.868292759393664e-05, + "loss": 0.0899, + "step": 9410 + }, + { + "epoch": 0.44, + "learning_rate": 1.8682143808881854e-05, + "loss": 0.1552, + "step": 9415 + }, + { + "epoch": 0.44, + "learning_rate": 1.8681360023827065e-05, + "loss": 0.1668, + "step": 9420 + }, + { + "epoch": 0.44, + "learning_rate": 1.8680576238772282e-05, + "loss": 0.2532, + "step": 9425 + }, + { + "epoch": 0.44, + "learning_rate": 1.8679792453717493e-05, + "loss": 0.254, + "step": 9430 + }, + { + "epoch": 0.44, + "learning_rate": 1.8679008668662707e-05, + "loss": 0.2807, + "step": 9435 + }, + { + "epoch": 0.44, + "learning_rate": 1.867822488360792e-05, + "loss": 0.506, + "step": 9440 + }, + { + "epoch": 0.44, + "learning_rate": 1.8677441098553134e-05, + "loss": 0.3954, + "step": 9445 + }, + { + "epoch": 0.44, + "learning_rate": 1.867665731349835e-05, + "loss": 0.5384, + "step": 9450 + }, + { + "epoch": 0.44, + "learning_rate": 1.8675873528443562e-05, + "loss": 0.1873, + "step": 9455 + }, + { + "epoch": 0.44, + "learning_rate": 1.8675089743388776e-05, + "loss": 0.1488, + "step": 9460 + }, + { + "epoch": 0.44, + "learning_rate": 1.8674305958333987e-05, + "loss": 0.1511, + "step": 9465 + }, + { + "epoch": 0.44, + "learning_rate": 1.86735221732792e-05, + "loss": 0.17, + "step": 9470 + }, + { + "epoch": 0.44, + "learning_rate": 1.8672738388224414e-05, + "loss": 0.1869, + "step": 9475 + }, + { + "epoch": 0.44, + "learning_rate": 1.867195460316963e-05, + "loss": 0.2043, + "step": 9480 + }, + { + "epoch": 0.44, + "learning_rate": 1.8671170818114842e-05, + "loss": 0.3333, + "step": 9485 + }, + { + "epoch": 0.44, + "learning_rate": 1.8670387033060056e-05, + "loss": 0.3531, + "step": 9490 + }, + { + "epoch": 0.44, + "learning_rate": 1.8669603248005267e-05, + "loss": 0.5792, + "step": 9495 + }, + { + "epoch": 0.44, + "learning_rate": 1.8668819462950484e-05, + "loss": 0.6072, + "step": 9500 + }, + { + "epoch": 0.44, + "learning_rate": 1.8668035677895695e-05, + "loss": 0.3354, + "step": 9505 + }, + { + "epoch": 0.44, + "learning_rate": 1.866725189284091e-05, + "loss": 0.081, + "step": 9510 + }, + { + "epoch": 0.44, + "learning_rate": 1.8666468107786122e-05, + "loss": 0.1598, + "step": 9515 + }, + { + "epoch": 0.44, + "learning_rate": 1.8665684322731336e-05, + "loss": 0.1325, + "step": 9520 + }, + { + "epoch": 0.44, + "learning_rate": 1.866490053767655e-05, + "loss": 0.243, + "step": 9525 + }, + { + "epoch": 0.44, + "learning_rate": 1.866411675262176e-05, + "loss": 0.256, + "step": 9530 + }, + { + "epoch": 0.44, + "learning_rate": 1.8663332967566975e-05, + "loss": 0.2464, + "step": 9535 + }, + { + "epoch": 0.45, + "learning_rate": 1.866254918251219e-05, + "loss": 0.3764, + "step": 9540 + }, + { + "epoch": 0.45, + "learning_rate": 1.8661765397457402e-05, + "loss": 0.3959, + "step": 9545 + }, + { + "epoch": 0.45, + "learning_rate": 1.8660981612402616e-05, + "loss": 0.8095, + "step": 9550 + }, + { + "epoch": 0.45, + "learning_rate": 1.866019782734783e-05, + "loss": 0.2742, + "step": 9555 + }, + { + "epoch": 0.45, + "learning_rate": 1.8659414042293044e-05, + "loss": 0.1321, + "step": 9560 + }, + { + "epoch": 0.45, + "learning_rate": 1.8658630257238258e-05, + "loss": 0.1742, + "step": 9565 + }, + { + "epoch": 0.45, + "learning_rate": 1.865784647218347e-05, + "loss": 0.1818, + "step": 9570 + }, + { + "epoch": 0.45, + "learning_rate": 1.8657062687128686e-05, + "loss": 0.1883, + "step": 9575 + }, + { + "epoch": 0.45, + "learning_rate": 1.8656278902073896e-05, + "loss": 0.2709, + "step": 9580 + }, + { + "epoch": 0.45, + "learning_rate": 1.865549511701911e-05, + "loss": 0.2502, + "step": 9585 + }, + { + "epoch": 0.45, + "learning_rate": 1.8654711331964324e-05, + "loss": 0.3082, + "step": 9590 + }, + { + "epoch": 0.45, + "learning_rate": 1.8653927546909535e-05, + "loss": 0.3196, + "step": 9595 + }, + { + "epoch": 0.45, + "learning_rate": 1.8653143761854752e-05, + "loss": 0.8118, + "step": 9600 + }, + { + "epoch": 0.45, + "learning_rate": 1.8652359976799962e-05, + "loss": 0.2884, + "step": 9605 + }, + { + "epoch": 0.45, + "learning_rate": 1.8651576191745176e-05, + "loss": 0.1232, + "step": 9610 + }, + { + "epoch": 0.45, + "learning_rate": 1.865079240669039e-05, + "loss": 0.1453, + "step": 9615 + }, + { + "epoch": 0.45, + "learning_rate": 1.8650008621635604e-05, + "loss": 0.1877, + "step": 9620 + }, + { + "epoch": 0.45, + "learning_rate": 1.8649224836580818e-05, + "loss": 0.2546, + "step": 9625 + }, + { + "epoch": 0.45, + "learning_rate": 1.8648441051526032e-05, + "loss": 0.1846, + "step": 9630 + }, + { + "epoch": 0.45, + "learning_rate": 1.8647657266471243e-05, + "loss": 0.3649, + "step": 9635 + }, + { + "epoch": 0.45, + "learning_rate": 1.864687348141646e-05, + "loss": 0.3054, + "step": 9640 + }, + { + "epoch": 0.45, + "learning_rate": 1.864608969636167e-05, + "loss": 0.2868, + "step": 9645 + }, + { + "epoch": 0.45, + "learning_rate": 1.8645305911306884e-05, + "loss": 0.6246, + "step": 9650 + }, + { + "epoch": 0.45, + "learning_rate": 1.8644522126252098e-05, + "loss": 0.2241, + "step": 9655 + }, + { + "epoch": 0.45, + "learning_rate": 1.8643738341197312e-05, + "loss": 0.1827, + "step": 9660 + }, + { + "epoch": 0.45, + "learning_rate": 1.8642954556142526e-05, + "loss": 0.1333, + "step": 9665 + }, + { + "epoch": 0.45, + "learning_rate": 1.8642170771087736e-05, + "loss": 0.2136, + "step": 9670 + }, + { + "epoch": 0.45, + "learning_rate": 1.8641386986032954e-05, + "loss": 0.164, + "step": 9675 + }, + { + "epoch": 0.45, + "learning_rate": 1.8640603200978164e-05, + "loss": 0.2427, + "step": 9680 + }, + { + "epoch": 0.45, + "learning_rate": 1.8639819415923378e-05, + "loss": 0.2968, + "step": 9685 + }, + { + "epoch": 0.45, + "learning_rate": 1.8639035630868592e-05, + "loss": 0.3661, + "step": 9690 + }, + { + "epoch": 0.45, + "learning_rate": 1.8638251845813806e-05, + "loss": 0.3389, + "step": 9695 + }, + { + "epoch": 0.45, + "learning_rate": 1.863746806075902e-05, + "loss": 0.8643, + "step": 9700 + }, + { + "epoch": 0.45, + "learning_rate": 1.8636684275704234e-05, + "loss": 0.2849, + "step": 9705 + }, + { + "epoch": 0.45, + "learning_rate": 1.8635900490649444e-05, + "loss": 0.1267, + "step": 9710 + }, + { + "epoch": 0.45, + "learning_rate": 1.863511670559466e-05, + "loss": 0.1608, + "step": 9715 + }, + { + "epoch": 0.45, + "learning_rate": 1.8634332920539872e-05, + "loss": 0.1116, + "step": 9720 + }, + { + "epoch": 0.45, + "learning_rate": 1.8633549135485086e-05, + "loss": 0.1512, + "step": 9725 + }, + { + "epoch": 0.45, + "learning_rate": 1.86327653504303e-05, + "loss": 0.2167, + "step": 9730 + }, + { + "epoch": 0.45, + "learning_rate": 1.863198156537551e-05, + "loss": 0.2379, + "step": 9735 + }, + { + "epoch": 0.45, + "learning_rate": 1.8631197780320728e-05, + "loss": 0.3606, + "step": 9740 + }, + { + "epoch": 0.45, + "learning_rate": 1.8630413995265938e-05, + "loss": 0.3912, + "step": 9745 + }, + { + "epoch": 0.45, + "learning_rate": 1.8629630210211152e-05, + "loss": 0.5597, + "step": 9750 + }, + { + "epoch": 0.46, + "learning_rate": 1.8628846425156366e-05, + "loss": 0.3225, + "step": 9755 + }, + { + "epoch": 0.46, + "learning_rate": 1.862806264010158e-05, + "loss": 0.1031, + "step": 9760 + }, + { + "epoch": 0.46, + "learning_rate": 1.8627278855046794e-05, + "loss": 0.2045, + "step": 9765 + }, + { + "epoch": 0.46, + "learning_rate": 1.8626495069992008e-05, + "loss": 0.1208, + "step": 9770 + }, + { + "epoch": 0.46, + "learning_rate": 1.8625711284937222e-05, + "loss": 0.2459, + "step": 9775 + }, + { + "epoch": 0.46, + "learning_rate": 1.8624927499882436e-05, + "loss": 0.1681, + "step": 9780 + }, + { + "epoch": 0.46, + "learning_rate": 1.8624143714827646e-05, + "loss": 0.4399, + "step": 9785 + }, + { + "epoch": 0.46, + "learning_rate": 1.862335992977286e-05, + "loss": 0.2947, + "step": 9790 + }, + { + "epoch": 0.46, + "learning_rate": 1.8622576144718074e-05, + "loss": 0.4561, + "step": 9795 + }, + { + "epoch": 0.46, + "learning_rate": 1.8621792359663288e-05, + "loss": 0.6068, + "step": 9800 + }, + { + "epoch": 0.46, + "learning_rate": 1.8621008574608502e-05, + "loss": 0.2161, + "step": 9805 + }, + { + "epoch": 0.46, + "learning_rate": 1.8620224789553712e-05, + "loss": 0.1146, + "step": 9810 + }, + { + "epoch": 0.46, + "learning_rate": 1.861944100449893e-05, + "loss": 0.1992, + "step": 9815 + }, + { + "epoch": 0.46, + "learning_rate": 1.861865721944414e-05, + "loss": 0.2146, + "step": 9820 + }, + { + "epoch": 0.46, + "learning_rate": 1.8617873434389354e-05, + "loss": 0.1645, + "step": 9825 + }, + { + "epoch": 0.46, + "learning_rate": 1.8617089649334568e-05, + "loss": 0.2525, + "step": 9830 + }, + { + "epoch": 0.46, + "learning_rate": 1.8616305864279782e-05, + "loss": 0.2867, + "step": 9835 + }, + { + "epoch": 0.46, + "learning_rate": 1.8615522079224996e-05, + "loss": 0.3965, + "step": 9840 + }, + { + "epoch": 0.46, + "learning_rate": 1.861473829417021e-05, + "loss": 0.316, + "step": 9845 + }, + { + "epoch": 0.46, + "learning_rate": 1.861395450911542e-05, + "loss": 0.4794, + "step": 9850 + }, + { + "epoch": 0.46, + "learning_rate": 1.8613170724060634e-05, + "loss": 0.2089, + "step": 9855 + }, + { + "epoch": 0.46, + "learning_rate": 1.8612386939005848e-05, + "loss": 0.0973, + "step": 9860 + }, + { + "epoch": 0.46, + "learning_rate": 1.8611603153951062e-05, + "loss": 0.1503, + "step": 9865 + }, + { + "epoch": 0.46, + "learning_rate": 1.8610819368896276e-05, + "loss": 0.1923, + "step": 9870 + }, + { + "epoch": 0.46, + "learning_rate": 1.861003558384149e-05, + "loss": 0.2079, + "step": 9875 + }, + { + "epoch": 0.46, + "learning_rate": 1.8609251798786704e-05, + "loss": 0.2932, + "step": 9880 + }, + { + "epoch": 0.46, + "learning_rate": 1.8608468013731914e-05, + "loss": 0.2118, + "step": 9885 + }, + { + "epoch": 0.46, + "learning_rate": 1.860768422867713e-05, + "loss": 0.4529, + "step": 9890 + }, + { + "epoch": 0.46, + "learning_rate": 1.8606900443622342e-05, + "loss": 0.4992, + "step": 9895 + }, + { + "epoch": 0.46, + "learning_rate": 1.8606116658567556e-05, + "loss": 0.7247, + "step": 9900 + }, + { + "epoch": 0.46, + "learning_rate": 1.860533287351277e-05, + "loss": 0.2615, + "step": 9905 + }, + { + "epoch": 0.46, + "learning_rate": 1.8604549088457984e-05, + "loss": 0.1337, + "step": 9910 + }, + { + "epoch": 0.46, + "learning_rate": 1.8603765303403198e-05, + "loss": 0.1473, + "step": 9915 + }, + { + "epoch": 0.46, + "learning_rate": 1.8602981518348408e-05, + "loss": 0.2391, + "step": 9920 + }, + { + "epoch": 0.46, + "learning_rate": 1.8602197733293622e-05, + "loss": 0.2209, + "step": 9925 + }, + { + "epoch": 0.46, + "learning_rate": 1.8601413948238836e-05, + "loss": 0.2441, + "step": 9930 + }, + { + "epoch": 0.46, + "learning_rate": 1.860063016318405e-05, + "loss": 0.2697, + "step": 9935 + }, + { + "epoch": 0.46, + "learning_rate": 1.8599846378129264e-05, + "loss": 0.3542, + "step": 9940 + }, + { + "epoch": 0.46, + "learning_rate": 1.8599062593074478e-05, + "loss": 0.3546, + "step": 9945 + }, + { + "epoch": 0.46, + "learning_rate": 1.8598278808019688e-05, + "loss": 0.8018, + "step": 9950 + }, + { + "epoch": 0.46, + "learning_rate": 1.8597495022964905e-05, + "loss": 0.2301, + "step": 9955 + }, + { + "epoch": 0.46, + "learning_rate": 1.8596711237910116e-05, + "loss": 0.0913, + "step": 9960 + }, + { + "epoch": 0.46, + "learning_rate": 1.859592745285533e-05, + "loss": 0.1875, + "step": 9965 + }, + { + "epoch": 0.47, + "learning_rate": 1.8595143667800544e-05, + "loss": 0.1521, + "step": 9970 + }, + { + "epoch": 0.47, + "learning_rate": 1.8594359882745758e-05, + "loss": 0.1545, + "step": 9975 + }, + { + "epoch": 0.47, + "learning_rate": 1.859357609769097e-05, + "loss": 0.2105, + "step": 9980 + }, + { + "epoch": 0.47, + "learning_rate": 1.8592792312636182e-05, + "loss": 0.2772, + "step": 9985 + }, + { + "epoch": 0.47, + "learning_rate": 1.85920085275814e-05, + "loss": 0.384, + "step": 9990 + }, + { + "epoch": 0.47, + "learning_rate": 1.859122474252661e-05, + "loss": 0.294, + "step": 9995 + }, + { + "epoch": 0.47, + "learning_rate": 1.8590440957471824e-05, + "loss": 0.5701, + "step": 10000 + }, + { + "epoch": 0.47, + "learning_rate": 1.8589657172417038e-05, + "loss": 0.3018, + "step": 10005 + }, + { + "epoch": 0.47, + "learning_rate": 1.858887338736225e-05, + "loss": 0.146, + "step": 10010 + }, + { + "epoch": 0.47, + "learning_rate": 1.8588089602307465e-05, + "loss": 0.2157, + "step": 10015 + }, + { + "epoch": 0.47, + "learning_rate": 1.858730581725268e-05, + "loss": 0.1809, + "step": 10020 + }, + { + "epoch": 0.47, + "learning_rate": 1.858652203219789e-05, + "loss": 0.1605, + "step": 10025 + }, + { + "epoch": 0.47, + "learning_rate": 1.8585738247143107e-05, + "loss": 0.1612, + "step": 10030 + }, + { + "epoch": 0.47, + "learning_rate": 1.8584954462088318e-05, + "loss": 0.1955, + "step": 10035 + }, + { + "epoch": 0.47, + "learning_rate": 1.858417067703353e-05, + "loss": 0.2136, + "step": 10040 + }, + { + "epoch": 0.47, + "learning_rate": 1.8583386891978746e-05, + "loss": 0.4425, + "step": 10045 + }, + { + "epoch": 0.47, + "learning_rate": 1.8582603106923956e-05, + "loss": 0.5771, + "step": 10050 + }, + { + "epoch": 0.47, + "learning_rate": 1.8581819321869173e-05, + "loss": 0.3124, + "step": 10055 + }, + { + "epoch": 0.47, + "learning_rate": 1.8581035536814384e-05, + "loss": 0.1658, + "step": 10060 + }, + { + "epoch": 0.47, + "learning_rate": 1.8580251751759598e-05, + "loss": 0.2146, + "step": 10065 + }, + { + "epoch": 0.47, + "learning_rate": 1.857946796670481e-05, + "loss": 0.1755, + "step": 10070 + }, + { + "epoch": 0.47, + "learning_rate": 1.8578684181650026e-05, + "loss": 0.2154, + "step": 10075 + }, + { + "epoch": 0.47, + "learning_rate": 1.857790039659524e-05, + "loss": 0.1933, + "step": 10080 + }, + { + "epoch": 0.47, + "learning_rate": 1.8577116611540453e-05, + "loss": 0.1655, + "step": 10085 + }, + { + "epoch": 0.47, + "learning_rate": 1.8576332826485667e-05, + "loss": 0.1967, + "step": 10090 + }, + { + "epoch": 0.47, + "learning_rate": 1.857554904143088e-05, + "loss": 0.4955, + "step": 10095 + }, + { + "epoch": 0.47, + "learning_rate": 1.8574765256376092e-05, + "loss": 0.5188, + "step": 10100 + }, + { + "epoch": 0.47, + "learning_rate": 1.857398147132131e-05, + "loss": 0.2023, + "step": 10105 + }, + { + "epoch": 0.47, + "learning_rate": 1.857319768626652e-05, + "loss": 0.1892, + "step": 10110 + }, + { + "epoch": 0.47, + "learning_rate": 1.8572413901211733e-05, + "loss": 0.1397, + "step": 10115 + }, + { + "epoch": 0.47, + "learning_rate": 1.8571630116156947e-05, + "loss": 0.1333, + "step": 10120 + }, + { + "epoch": 0.47, + "learning_rate": 1.8570846331102158e-05, + "loss": 0.2091, + "step": 10125 + }, + { + "epoch": 0.47, + "learning_rate": 1.8570062546047375e-05, + "loss": 0.1695, + "step": 10130 + }, + { + "epoch": 0.47, + "learning_rate": 1.8569278760992586e-05, + "loss": 0.3251, + "step": 10135 + }, + { + "epoch": 0.47, + "learning_rate": 1.85684949759378e-05, + "loss": 0.3039, + "step": 10140 + }, + { + "epoch": 0.47, + "learning_rate": 1.8567711190883013e-05, + "loss": 0.4027, + "step": 10145 + }, + { + "epoch": 0.47, + "learning_rate": 1.8566927405828227e-05, + "loss": 0.461, + "step": 10150 + }, + { + "epoch": 0.47, + "learning_rate": 1.856614362077344e-05, + "loss": 0.2918, + "step": 10155 + }, + { + "epoch": 0.47, + "learning_rate": 1.8565359835718655e-05, + "loss": 0.1856, + "step": 10160 + }, + { + "epoch": 0.47, + "learning_rate": 1.8564576050663866e-05, + "loss": 0.1539, + "step": 10165 + }, + { + "epoch": 0.47, + "learning_rate": 1.8563792265609083e-05, + "loss": 0.1487, + "step": 10170 + }, + { + "epoch": 0.47, + "learning_rate": 1.8563008480554294e-05, + "loss": 0.1158, + "step": 10175 + }, + { + "epoch": 0.48, + "learning_rate": 1.8562224695499507e-05, + "loss": 0.216, + "step": 10180 + }, + { + "epoch": 0.48, + "learning_rate": 1.856144091044472e-05, + "loss": 0.3013, + "step": 10185 + }, + { + "epoch": 0.48, + "learning_rate": 1.8560657125389935e-05, + "loss": 0.4077, + "step": 10190 + }, + { + "epoch": 0.48, + "learning_rate": 1.855987334033515e-05, + "loss": 0.3365, + "step": 10195 + }, + { + "epoch": 0.48, + "learning_rate": 1.855908955528036e-05, + "loss": 0.4686, + "step": 10200 + }, + { + "epoch": 0.48, + "learning_rate": 1.8558305770225577e-05, + "loss": 0.2812, + "step": 10205 + }, + { + "epoch": 0.48, + "learning_rate": 1.8557521985170787e-05, + "loss": 0.133, + "step": 10210 + }, + { + "epoch": 0.48, + "learning_rate": 1.8556738200116e-05, + "loss": 0.1349, + "step": 10215 + }, + { + "epoch": 0.48, + "learning_rate": 1.8555954415061215e-05, + "loss": 0.1394, + "step": 10220 + }, + { + "epoch": 0.48, + "learning_rate": 1.855517063000643e-05, + "loss": 0.2261, + "step": 10225 + }, + { + "epoch": 0.48, + "learning_rate": 1.8554386844951643e-05, + "loss": 0.1947, + "step": 10230 + }, + { + "epoch": 0.48, + "learning_rate": 1.8553603059896857e-05, + "loss": 0.2436, + "step": 10235 + }, + { + "epoch": 0.48, + "learning_rate": 1.8552819274842068e-05, + "loss": 0.2637, + "step": 10240 + }, + { + "epoch": 0.48, + "learning_rate": 1.855203548978728e-05, + "loss": 0.4449, + "step": 10245 + }, + { + "epoch": 0.48, + "learning_rate": 1.8551251704732495e-05, + "loss": 0.7073, + "step": 10250 + }, + { + "epoch": 0.48, + "learning_rate": 1.855046791967771e-05, + "loss": 0.2425, + "step": 10255 + }, + { + "epoch": 0.48, + "learning_rate": 1.8549684134622923e-05, + "loss": 0.1223, + "step": 10260 + }, + { + "epoch": 0.48, + "learning_rate": 1.8548900349568134e-05, + "loss": 0.1035, + "step": 10265 + }, + { + "epoch": 0.48, + "learning_rate": 1.854811656451335e-05, + "loss": 0.1336, + "step": 10270 + }, + { + "epoch": 0.48, + "learning_rate": 1.854733277945856e-05, + "loss": 0.224, + "step": 10275 + }, + { + "epoch": 0.48, + "learning_rate": 1.8546548994403775e-05, + "loss": 0.2017, + "step": 10280 + }, + { + "epoch": 0.48, + "learning_rate": 1.854576520934899e-05, + "loss": 0.3207, + "step": 10285 + }, + { + "epoch": 0.48, + "learning_rate": 1.8544981424294203e-05, + "loss": 0.3581, + "step": 10290 + }, + { + "epoch": 0.48, + "learning_rate": 1.8544197639239417e-05, + "loss": 0.299, + "step": 10295 + }, + { + "epoch": 0.48, + "learning_rate": 1.854341385418463e-05, + "loss": 0.4774, + "step": 10300 + }, + { + "epoch": 0.48, + "learning_rate": 1.8542630069129845e-05, + "loss": 0.2978, + "step": 10305 + }, + { + "epoch": 0.48, + "learning_rate": 1.8541846284075055e-05, + "loss": 0.1423, + "step": 10310 + }, + { + "epoch": 0.48, + "learning_rate": 1.854106249902027e-05, + "loss": 0.1697, + "step": 10315 + }, + { + "epoch": 0.48, + "learning_rate": 1.8540278713965483e-05, + "loss": 0.1412, + "step": 10320 + }, + { + "epoch": 0.48, + "learning_rate": 1.8539494928910697e-05, + "loss": 0.1674, + "step": 10325 + }, + { + "epoch": 0.48, + "learning_rate": 1.853871114385591e-05, + "loss": 0.2561, + "step": 10330 + }, + { + "epoch": 0.48, + "learning_rate": 1.8537927358801125e-05, + "loss": 0.3232, + "step": 10335 + }, + { + "epoch": 0.48, + "learning_rate": 1.8537143573746335e-05, + "loss": 0.2814, + "step": 10340 + }, + { + "epoch": 0.48, + "learning_rate": 1.8536359788691553e-05, + "loss": 0.4515, + "step": 10345 + }, + { + "epoch": 0.48, + "learning_rate": 1.8535576003636763e-05, + "loss": 0.6846, + "step": 10350 + }, + { + "epoch": 0.48, + "learning_rate": 1.8534792218581977e-05, + "loss": 0.3577, + "step": 10355 + }, + { + "epoch": 0.48, + "learning_rate": 1.853400843352719e-05, + "loss": 0.076, + "step": 10360 + }, + { + "epoch": 0.48, + "learning_rate": 1.8533224648472405e-05, + "loss": 0.2025, + "step": 10365 + }, + { + "epoch": 0.48, + "learning_rate": 1.853244086341762e-05, + "loss": 0.1471, + "step": 10370 + }, + { + "epoch": 0.48, + "learning_rate": 1.853165707836283e-05, + "loss": 0.1286, + "step": 10375 + }, + { + "epoch": 0.48, + "learning_rate": 1.8530873293308043e-05, + "loss": 0.2737, + "step": 10380 + }, + { + "epoch": 0.48, + "learning_rate": 1.8530089508253257e-05, + "loss": 0.2873, + "step": 10385 + }, + { + "epoch": 0.48, + "learning_rate": 1.852930572319847e-05, + "loss": 0.3913, + "step": 10390 + }, + { + "epoch": 0.49, + "learning_rate": 1.8528521938143685e-05, + "loss": 0.4033, + "step": 10395 + }, + { + "epoch": 0.49, + "learning_rate": 1.85277381530889e-05, + "loss": 0.5046, + "step": 10400 + }, + { + "epoch": 0.49, + "learning_rate": 1.8526954368034113e-05, + "loss": 0.2842, + "step": 10405 + }, + { + "epoch": 0.49, + "learning_rate": 1.8526170582979327e-05, + "loss": 0.1708, + "step": 10410 + }, + { + "epoch": 0.49, + "learning_rate": 1.8525386797924537e-05, + "loss": 0.1666, + "step": 10415 + }, + { + "epoch": 0.49, + "learning_rate": 1.8524603012869755e-05, + "loss": 0.1689, + "step": 10420 + }, + { + "epoch": 0.49, + "learning_rate": 1.8523819227814965e-05, + "loss": 0.2137, + "step": 10425 + }, + { + "epoch": 0.49, + "learning_rate": 1.852303544276018e-05, + "loss": 0.2885, + "step": 10430 + }, + { + "epoch": 0.49, + "learning_rate": 1.8522251657705393e-05, + "loss": 0.2818, + "step": 10435 + }, + { + "epoch": 0.49, + "learning_rate": 1.8521467872650603e-05, + "loss": 0.2431, + "step": 10440 + }, + { + "epoch": 0.49, + "learning_rate": 1.852068408759582e-05, + "loss": 0.3849, + "step": 10445 + }, + { + "epoch": 0.49, + "learning_rate": 1.851990030254103e-05, + "loss": 0.6032, + "step": 10450 + }, + { + "epoch": 0.49, + "learning_rate": 1.8519116517486245e-05, + "loss": 0.2557, + "step": 10455 + }, + { + "epoch": 0.49, + "learning_rate": 1.851833273243146e-05, + "loss": 0.1205, + "step": 10460 + }, + { + "epoch": 0.49, + "learning_rate": 1.8517548947376673e-05, + "loss": 0.1141, + "step": 10465 + }, + { + "epoch": 0.49, + "learning_rate": 1.8516765162321887e-05, + "loss": 0.1535, + "step": 10470 + }, + { + "epoch": 0.49, + "learning_rate": 1.85159813772671e-05, + "loss": 0.1693, + "step": 10475 + }, + { + "epoch": 0.49, + "learning_rate": 1.851519759221231e-05, + "loss": 0.2214, + "step": 10480 + }, + { + "epoch": 0.49, + "learning_rate": 1.851441380715753e-05, + "loss": 0.3034, + "step": 10485 + }, + { + "epoch": 0.49, + "learning_rate": 1.851363002210274e-05, + "loss": 0.2867, + "step": 10490 + }, + { + "epoch": 0.49, + "learning_rate": 1.8512846237047953e-05, + "loss": 0.4438, + "step": 10495 + }, + { + "epoch": 0.49, + "learning_rate": 1.8512062451993167e-05, + "loss": 0.614, + "step": 10500 + }, + { + "epoch": 0.49, + "learning_rate": 1.851127866693838e-05, + "loss": 0.2172, + "step": 10505 + }, + { + "epoch": 0.49, + "learning_rate": 1.8510494881883595e-05, + "loss": 0.0805, + "step": 10510 + }, + { + "epoch": 0.49, + "learning_rate": 1.8509711096828805e-05, + "loss": 0.1772, + "step": 10515 + }, + { + "epoch": 0.49, + "learning_rate": 1.8508927311774023e-05, + "loss": 0.1784, + "step": 10520 + }, + { + "epoch": 0.49, + "learning_rate": 1.8508143526719233e-05, + "loss": 0.2444, + "step": 10525 + }, + { + "epoch": 0.49, + "learning_rate": 1.8507359741664447e-05, + "loss": 0.1369, + "step": 10530 + }, + { + "epoch": 0.49, + "learning_rate": 1.850657595660966e-05, + "loss": 0.1954, + "step": 10535 + }, + { + "epoch": 0.49, + "learning_rate": 1.8505792171554875e-05, + "loss": 0.3048, + "step": 10540 + }, + { + "epoch": 0.49, + "learning_rate": 1.850500838650009e-05, + "loss": 0.3967, + "step": 10545 + }, + { + "epoch": 0.49, + "learning_rate": 1.8504224601445303e-05, + "loss": 0.476, + "step": 10550 + }, + { + "epoch": 0.49, + "learning_rate": 1.8503440816390513e-05, + "loss": 0.2726, + "step": 10555 + }, + { + "epoch": 0.49, + "learning_rate": 1.850265703133573e-05, + "loss": 0.0955, + "step": 10560 + }, + { + "epoch": 0.49, + "learning_rate": 1.850187324628094e-05, + "loss": 0.1345, + "step": 10565 + }, + { + "epoch": 0.49, + "learning_rate": 1.8501089461226155e-05, + "loss": 0.2196, + "step": 10570 + }, + { + "epoch": 0.49, + "learning_rate": 1.850030567617137e-05, + "loss": 0.1968, + "step": 10575 + }, + { + "epoch": 0.49, + "learning_rate": 1.849952189111658e-05, + "loss": 0.2663, + "step": 10580 + }, + { + "epoch": 0.49, + "learning_rate": 1.8498738106061797e-05, + "loss": 0.2857, + "step": 10585 + }, + { + "epoch": 0.49, + "learning_rate": 1.8497954321007007e-05, + "loss": 0.3226, + "step": 10590 + }, + { + "epoch": 0.49, + "learning_rate": 1.849717053595222e-05, + "loss": 0.3366, + "step": 10595 + }, + { + "epoch": 0.49, + "learning_rate": 1.8496386750897435e-05, + "loss": 0.4914, + "step": 10600 + }, + { + "epoch": 0.49, + "learning_rate": 1.849560296584265e-05, + "loss": 0.3555, + "step": 10605 + }, + { + "epoch": 0.5, + "learning_rate": 1.8494819180787863e-05, + "loss": 0.1054, + "step": 10610 + }, + { + "epoch": 0.5, + "learning_rate": 1.8494035395733077e-05, + "loss": 0.2007, + "step": 10615 + }, + { + "epoch": 0.5, + "learning_rate": 1.849325161067829e-05, + "loss": 0.2069, + "step": 10620 + }, + { + "epoch": 0.5, + "learning_rate": 1.8492467825623504e-05, + "loss": 0.2225, + "step": 10625 + }, + { + "epoch": 0.5, + "learning_rate": 1.8491684040568715e-05, + "loss": 0.2789, + "step": 10630 + }, + { + "epoch": 0.5, + "learning_rate": 1.849090025551393e-05, + "loss": 0.3114, + "step": 10635 + }, + { + "epoch": 0.5, + "learning_rate": 1.8490116470459143e-05, + "loss": 0.3273, + "step": 10640 + }, + { + "epoch": 0.5, + "learning_rate": 1.8489332685404357e-05, + "loss": 0.2557, + "step": 10645 + }, + { + "epoch": 0.5, + "learning_rate": 1.848854890034957e-05, + "loss": 0.5148, + "step": 10650 + }, + { + "epoch": 0.5, + "learning_rate": 1.848776511529478e-05, + "loss": 0.3063, + "step": 10655 + }, + { + "epoch": 0.5, + "learning_rate": 1.848698133024e-05, + "loss": 0.1092, + "step": 10660 + }, + { + "epoch": 0.5, + "learning_rate": 1.848619754518521e-05, + "loss": 0.1135, + "step": 10665 + }, + { + "epoch": 0.5, + "learning_rate": 1.8485413760130423e-05, + "loss": 0.1765, + "step": 10670 + }, + { + "epoch": 0.5, + "learning_rate": 1.8484629975075637e-05, + "loss": 0.1905, + "step": 10675 + }, + { + "epoch": 0.5, + "learning_rate": 1.848384619002085e-05, + "loss": 0.1588, + "step": 10680 + }, + { + "epoch": 0.5, + "learning_rate": 1.8483062404966064e-05, + "loss": 0.2983, + "step": 10685 + }, + { + "epoch": 0.5, + "learning_rate": 1.848227861991128e-05, + "loss": 0.2593, + "step": 10690 + }, + { + "epoch": 0.5, + "learning_rate": 1.848149483485649e-05, + "loss": 0.3945, + "step": 10695 + }, + { + "epoch": 0.5, + "learning_rate": 1.8480711049801703e-05, + "loss": 0.5359, + "step": 10700 + }, + { + "epoch": 0.5, + "learning_rate": 1.8479927264746917e-05, + "loss": 0.473, + "step": 10705 + }, + { + "epoch": 0.5, + "learning_rate": 1.847914347969213e-05, + "loss": 0.1142, + "step": 10710 + }, + { + "epoch": 0.5, + "learning_rate": 1.8478359694637345e-05, + "loss": 0.1146, + "step": 10715 + }, + { + "epoch": 0.5, + "learning_rate": 1.847757590958256e-05, + "loss": 0.2126, + "step": 10720 + }, + { + "epoch": 0.5, + "learning_rate": 1.8476792124527772e-05, + "loss": 0.1638, + "step": 10725 + }, + { + "epoch": 0.5, + "learning_rate": 1.8476008339472983e-05, + "loss": 0.2529, + "step": 10730 + }, + { + "epoch": 0.5, + "learning_rate": 1.84752245544182e-05, + "loss": 0.2843, + "step": 10735 + }, + { + "epoch": 0.5, + "learning_rate": 1.847444076936341e-05, + "loss": 0.3466, + "step": 10740 + }, + { + "epoch": 0.5, + "learning_rate": 1.8473656984308625e-05, + "loss": 0.3651, + "step": 10745 + }, + { + "epoch": 0.5, + "learning_rate": 1.847287319925384e-05, + "loss": 0.5283, + "step": 10750 + }, + { + "epoch": 0.5, + "learning_rate": 1.8472089414199052e-05, + "loss": 0.2919, + "step": 10755 + }, + { + "epoch": 0.5, + "learning_rate": 1.8471305629144266e-05, + "loss": 0.1133, + "step": 10760 + }, + { + "epoch": 0.5, + "learning_rate": 1.8470521844089477e-05, + "loss": 0.1283, + "step": 10765 + }, + { + "epoch": 0.5, + "learning_rate": 1.846973805903469e-05, + "loss": 0.1331, + "step": 10770 + }, + { + "epoch": 0.5, + "learning_rate": 1.8468954273979905e-05, + "loss": 0.2511, + "step": 10775 + }, + { + "epoch": 0.5, + "learning_rate": 1.846817048892512e-05, + "loss": 0.2337, + "step": 10780 + }, + { + "epoch": 0.5, + "learning_rate": 1.8467386703870332e-05, + "loss": 0.2217, + "step": 10785 + }, + { + "epoch": 0.5, + "learning_rate": 1.8466602918815546e-05, + "loss": 0.4054, + "step": 10790 + }, + { + "epoch": 0.5, + "learning_rate": 1.8465819133760757e-05, + "loss": 0.42, + "step": 10795 + }, + { + "epoch": 0.5, + "learning_rate": 1.8465035348705974e-05, + "loss": 0.4874, + "step": 10800 + }, + { + "epoch": 0.5, + "learning_rate": 1.8464251563651185e-05, + "loss": 0.2858, + "step": 10805 + }, + { + "epoch": 0.5, + "learning_rate": 1.84634677785964e-05, + "loss": 0.1269, + "step": 10810 + }, + { + "epoch": 0.5, + "learning_rate": 1.8462683993541612e-05, + "loss": 0.1334, + "step": 10815 + }, + { + "epoch": 0.5, + "learning_rate": 1.8461900208486826e-05, + "loss": 0.1715, + "step": 10820 + }, + { + "epoch": 0.51, + "learning_rate": 1.846111642343204e-05, + "loss": 0.1632, + "step": 10825 + }, + { + "epoch": 0.51, + "learning_rate": 1.846033263837725e-05, + "loss": 0.1676, + "step": 10830 + }, + { + "epoch": 0.51, + "learning_rate": 1.8459548853322468e-05, + "loss": 0.1878, + "step": 10835 + }, + { + "epoch": 0.51, + "learning_rate": 1.845876506826768e-05, + "loss": 0.2542, + "step": 10840 + }, + { + "epoch": 0.51, + "learning_rate": 1.8457981283212893e-05, + "loss": 0.3347, + "step": 10845 + }, + { + "epoch": 0.51, + "learning_rate": 1.8457197498158106e-05, + "loss": 0.5031, + "step": 10850 + }, + { + "epoch": 0.51, + "learning_rate": 1.845641371310332e-05, + "loss": 0.3013, + "step": 10855 + }, + { + "epoch": 0.51, + "learning_rate": 1.8455629928048534e-05, + "loss": 0.0807, + "step": 10860 + }, + { + "epoch": 0.51, + "learning_rate": 1.8454846142993748e-05, + "loss": 0.1866, + "step": 10865 + }, + { + "epoch": 0.51, + "learning_rate": 1.845406235793896e-05, + "loss": 0.128, + "step": 10870 + }, + { + "epoch": 0.51, + "learning_rate": 1.8453278572884176e-05, + "loss": 0.2177, + "step": 10875 + }, + { + "epoch": 0.51, + "learning_rate": 1.8452494787829386e-05, + "loss": 0.2811, + "step": 10880 + }, + { + "epoch": 0.51, + "learning_rate": 1.84517110027746e-05, + "loss": 0.3075, + "step": 10885 + }, + { + "epoch": 0.51, + "learning_rate": 1.8450927217719814e-05, + "loss": 0.3236, + "step": 10890 + }, + { + "epoch": 0.51, + "learning_rate": 1.8450143432665025e-05, + "loss": 0.2975, + "step": 10895 + }, + { + "epoch": 0.51, + "learning_rate": 1.8449359647610242e-05, + "loss": 0.7018, + "step": 10900 + }, + { + "epoch": 0.51, + "learning_rate": 1.8448575862555453e-05, + "loss": 0.3354, + "step": 10905 + }, + { + "epoch": 0.51, + "learning_rate": 1.8447792077500667e-05, + "loss": 0.1611, + "step": 10910 + }, + { + "epoch": 0.51, + "learning_rate": 1.844700829244588e-05, + "loss": 0.1056, + "step": 10915 + }, + { + "epoch": 0.51, + "learning_rate": 1.8446224507391094e-05, + "loss": 0.1905, + "step": 10920 + }, + { + "epoch": 0.51, + "learning_rate": 1.8445440722336308e-05, + "loss": 0.1444, + "step": 10925 + }, + { + "epoch": 0.51, + "learning_rate": 1.8444656937281522e-05, + "loss": 0.2417, + "step": 10930 + }, + { + "epoch": 0.51, + "learning_rate": 1.8443873152226736e-05, + "loss": 0.2322, + "step": 10935 + }, + { + "epoch": 0.51, + "learning_rate": 1.844308936717195e-05, + "loss": 0.2791, + "step": 10940 + }, + { + "epoch": 0.51, + "learning_rate": 1.844230558211716e-05, + "loss": 0.3903, + "step": 10945 + }, + { + "epoch": 0.51, + "learning_rate": 1.8441521797062378e-05, + "loss": 0.6466, + "step": 10950 + }, + { + "epoch": 0.51, + "learning_rate": 1.8440738012007588e-05, + "loss": 0.2265, + "step": 10955 + }, + { + "epoch": 0.51, + "learning_rate": 1.8439954226952802e-05, + "loss": 0.1035, + "step": 10960 + }, + { + "epoch": 0.51, + "learning_rate": 1.8439170441898016e-05, + "loss": 0.127, + "step": 10965 + }, + { + "epoch": 0.51, + "learning_rate": 1.8438386656843227e-05, + "loss": 0.1504, + "step": 10970 + }, + { + "epoch": 0.51, + "learning_rate": 1.8437602871788444e-05, + "loss": 0.3139, + "step": 10975 + }, + { + "epoch": 0.51, + "learning_rate": 1.8436819086733654e-05, + "loss": 0.2015, + "step": 10980 + }, + { + "epoch": 0.51, + "learning_rate": 1.843603530167887e-05, + "loss": 0.2831, + "step": 10985 + }, + { + "epoch": 0.51, + "learning_rate": 1.8435251516624082e-05, + "loss": 0.4307, + "step": 10990 + }, + { + "epoch": 0.51, + "learning_rate": 1.8434467731569296e-05, + "loss": 0.4474, + "step": 10995 + }, + { + "epoch": 0.51, + "learning_rate": 1.843368394651451e-05, + "loss": 0.4485, + "step": 11000 + }, + { + "epoch": 0.51, + "learning_rate": 1.8432900161459724e-05, + "loss": 0.3155, + "step": 11005 + }, + { + "epoch": 0.51, + "learning_rate": 1.8432116376404934e-05, + "loss": 0.1338, + "step": 11010 + }, + { + "epoch": 0.51, + "learning_rate": 1.8431332591350152e-05, + "loss": 0.1072, + "step": 11015 + }, + { + "epoch": 0.51, + "learning_rate": 1.8430548806295362e-05, + "loss": 0.1568, + "step": 11020 + }, + { + "epoch": 0.51, + "learning_rate": 1.8429765021240576e-05, + "loss": 0.209, + "step": 11025 + }, + { + "epoch": 0.51, + "learning_rate": 1.842898123618579e-05, + "loss": 0.2181, + "step": 11030 + }, + { + "epoch": 0.51, + "learning_rate": 1.8428197451131004e-05, + "loss": 0.1599, + "step": 11035 + }, + { + "epoch": 0.52, + "learning_rate": 1.8427413666076218e-05, + "loss": 0.2345, + "step": 11040 + }, + { + "epoch": 0.52, + "learning_rate": 1.842662988102143e-05, + "loss": 0.5213, + "step": 11045 + }, + { + "epoch": 0.52, + "learning_rate": 1.8425846095966646e-05, + "loss": 0.6337, + "step": 11050 + }, + { + "epoch": 0.52, + "learning_rate": 1.8425062310911856e-05, + "loss": 0.2929, + "step": 11055 + }, + { + "epoch": 0.52, + "learning_rate": 1.842427852585707e-05, + "loss": 0.0707, + "step": 11060 + }, + { + "epoch": 0.52, + "learning_rate": 1.8423494740802284e-05, + "loss": 0.1464, + "step": 11065 + }, + { + "epoch": 0.52, + "learning_rate": 1.8422710955747498e-05, + "loss": 0.1703, + "step": 11070 + }, + { + "epoch": 0.52, + "learning_rate": 1.8421927170692712e-05, + "loss": 0.2093, + "step": 11075 + }, + { + "epoch": 0.52, + "learning_rate": 1.8421143385637926e-05, + "loss": 0.1709, + "step": 11080 + }, + { + "epoch": 0.52, + "learning_rate": 1.8420359600583136e-05, + "loss": 0.2491, + "step": 11085 + }, + { + "epoch": 0.52, + "learning_rate": 1.841957581552835e-05, + "loss": 0.3152, + "step": 11090 + }, + { + "epoch": 0.52, + "learning_rate": 1.8418792030473564e-05, + "loss": 0.3752, + "step": 11095 + }, + { + "epoch": 0.52, + "learning_rate": 1.8418008245418778e-05, + "loss": 0.7978, + "step": 11100 + }, + { + "epoch": 0.52, + "learning_rate": 1.8417224460363992e-05, + "loss": 0.2618, + "step": 11105 + }, + { + "epoch": 0.52, + "learning_rate": 1.8416440675309202e-05, + "loss": 0.121, + "step": 11110 + }, + { + "epoch": 0.52, + "learning_rate": 1.841565689025442e-05, + "loss": 0.3329, + "step": 11115 + }, + { + "epoch": 0.52, + "learning_rate": 1.841487310519963e-05, + "loss": 0.1459, + "step": 11120 + }, + { + "epoch": 0.52, + "learning_rate": 1.8414089320144844e-05, + "loss": 0.1842, + "step": 11125 + }, + { + "epoch": 0.52, + "learning_rate": 1.8413305535090058e-05, + "loss": 0.2167, + "step": 11130 + }, + { + "epoch": 0.52, + "learning_rate": 1.8412521750035272e-05, + "loss": 0.2646, + "step": 11135 + }, + { + "epoch": 0.52, + "learning_rate": 1.8411737964980486e-05, + "loss": 0.3365, + "step": 11140 + }, + { + "epoch": 0.52, + "learning_rate": 1.84109541799257e-05, + "loss": 0.356, + "step": 11145 + }, + { + "epoch": 0.52, + "learning_rate": 1.8410170394870914e-05, + "loss": 0.7355, + "step": 11150 + }, + { + "epoch": 0.52, + "learning_rate": 1.8409386609816124e-05, + "loss": 0.2418, + "step": 11155 + }, + { + "epoch": 0.52, + "learning_rate": 1.8408602824761338e-05, + "loss": 0.091, + "step": 11160 + }, + { + "epoch": 0.52, + "learning_rate": 1.8407819039706552e-05, + "loss": 0.1088, + "step": 11165 + }, + { + "epoch": 0.52, + "learning_rate": 1.8407035254651766e-05, + "loss": 0.2117, + "step": 11170 + }, + { + "epoch": 0.52, + "learning_rate": 1.840625146959698e-05, + "loss": 0.1758, + "step": 11175 + }, + { + "epoch": 0.52, + "learning_rate": 1.8405467684542194e-05, + "loss": 0.2617, + "step": 11180 + }, + { + "epoch": 0.52, + "learning_rate": 1.8404683899487404e-05, + "loss": 0.1872, + "step": 11185 + }, + { + "epoch": 0.52, + "learning_rate": 1.840390011443262e-05, + "loss": 0.2953, + "step": 11190 + }, + { + "epoch": 0.52, + "learning_rate": 1.8403116329377832e-05, + "loss": 0.2921, + "step": 11195 + }, + { + "epoch": 0.52, + "learning_rate": 1.8402332544323046e-05, + "loss": 0.6133, + "step": 11200 + }, + { + "epoch": 0.52, + "learning_rate": 1.840154875926826e-05, + "loss": 0.2085, + "step": 11205 + }, + { + "epoch": 0.52, + "learning_rate": 1.8400764974213474e-05, + "loss": 0.1601, + "step": 11210 + }, + { + "epoch": 0.52, + "learning_rate": 1.8399981189158688e-05, + "loss": 0.1244, + "step": 11215 + }, + { + "epoch": 0.52, + "learning_rate": 1.8399197404103898e-05, + "loss": 0.1695, + "step": 11220 + }, + { + "epoch": 0.52, + "learning_rate": 1.8398413619049112e-05, + "loss": 0.1907, + "step": 11225 + }, + { + "epoch": 0.52, + "learning_rate": 1.8397629833994326e-05, + "loss": 0.2046, + "step": 11230 + }, + { + "epoch": 0.52, + "learning_rate": 1.839684604893954e-05, + "loss": 0.2313, + "step": 11235 + }, + { + "epoch": 0.52, + "learning_rate": 1.8396062263884754e-05, + "loss": 0.4132, + "step": 11240 + }, + { + "epoch": 0.52, + "learning_rate": 1.8395278478829968e-05, + "loss": 0.3662, + "step": 11245 + }, + { + "epoch": 0.52, + "learning_rate": 1.839449469377518e-05, + "loss": 0.6663, + "step": 11250 + }, + { + "epoch": 0.53, + "learning_rate": 1.8393710908720396e-05, + "loss": 0.2829, + "step": 11255 + }, + { + "epoch": 0.53, + "learning_rate": 1.8392927123665606e-05, + "loss": 0.0905, + "step": 11260 + }, + { + "epoch": 0.53, + "learning_rate": 1.8392143338610823e-05, + "loss": 0.1383, + "step": 11265 + }, + { + "epoch": 0.53, + "learning_rate": 1.8391359553556034e-05, + "loss": 0.1256, + "step": 11270 + }, + { + "epoch": 0.53, + "learning_rate": 1.8390575768501248e-05, + "loss": 0.1687, + "step": 11275 + }, + { + "epoch": 0.53, + "learning_rate": 1.838979198344646e-05, + "loss": 0.2381, + "step": 11280 + }, + { + "epoch": 0.53, + "learning_rate": 1.8389008198391672e-05, + "loss": 0.2776, + "step": 11285 + }, + { + "epoch": 0.53, + "learning_rate": 1.838822441333689e-05, + "loss": 0.314, + "step": 11290 + }, + { + "epoch": 0.53, + "learning_rate": 1.83874406282821e-05, + "loss": 0.3639, + "step": 11295 + }, + { + "epoch": 0.53, + "learning_rate": 1.8386656843227314e-05, + "loss": 0.5223, + "step": 11300 + }, + { + "epoch": 0.53, + "learning_rate": 1.8385873058172528e-05, + "loss": 0.1783, + "step": 11305 + }, + { + "epoch": 0.53, + "learning_rate": 1.8385089273117742e-05, + "loss": 0.0991, + "step": 11310 + }, + { + "epoch": 0.53, + "learning_rate": 1.8384305488062956e-05, + "loss": 0.2287, + "step": 11315 + }, + { + "epoch": 0.53, + "learning_rate": 1.838352170300817e-05, + "loss": 0.2312, + "step": 11320 + }, + { + "epoch": 0.53, + "learning_rate": 1.838273791795338e-05, + "loss": 0.2481, + "step": 11325 + }, + { + "epoch": 0.53, + "learning_rate": 1.8381954132898597e-05, + "loss": 0.1717, + "step": 11330 + }, + { + "epoch": 0.53, + "learning_rate": 1.8381170347843808e-05, + "loss": 0.2379, + "step": 11335 + }, + { + "epoch": 0.53, + "learning_rate": 1.8380386562789022e-05, + "loss": 0.3034, + "step": 11340 + }, + { + "epoch": 0.53, + "learning_rate": 1.8379602777734236e-05, + "loss": 0.3773, + "step": 11345 + }, + { + "epoch": 0.53, + "learning_rate": 1.837881899267945e-05, + "loss": 0.6601, + "step": 11350 + }, + { + "epoch": 0.53, + "learning_rate": 1.8378035207624663e-05, + "loss": 0.2288, + "step": 11355 + }, + { + "epoch": 0.53, + "learning_rate": 1.8377251422569874e-05, + "loss": 0.1123, + "step": 11360 + }, + { + "epoch": 0.53, + "learning_rate": 1.837646763751509e-05, + "loss": 0.158, + "step": 11365 + }, + { + "epoch": 0.53, + "learning_rate": 1.8375683852460302e-05, + "loss": 0.1854, + "step": 11370 + }, + { + "epoch": 0.53, + "learning_rate": 1.8374900067405516e-05, + "loss": 0.1726, + "step": 11375 + }, + { + "epoch": 0.53, + "learning_rate": 1.837411628235073e-05, + "loss": 0.2843, + "step": 11380 + }, + { + "epoch": 0.53, + "learning_rate": 1.8373332497295944e-05, + "loss": 0.2421, + "step": 11385 + }, + { + "epoch": 0.53, + "learning_rate": 1.8372548712241157e-05, + "loss": 0.3232, + "step": 11390 + }, + { + "epoch": 0.53, + "learning_rate": 1.837176492718637e-05, + "loss": 0.4045, + "step": 11395 + }, + { + "epoch": 0.53, + "learning_rate": 1.8370981142131582e-05, + "loss": 0.5567, + "step": 11400 + }, + { + "epoch": 0.53, + "learning_rate": 1.83701973570768e-05, + "loss": 0.2517, + "step": 11405 + }, + { + "epoch": 0.53, + "learning_rate": 1.836941357202201e-05, + "loss": 0.0982, + "step": 11410 + }, + { + "epoch": 0.53, + "learning_rate": 1.8368629786967224e-05, + "loss": 0.1019, + "step": 11415 + }, + { + "epoch": 0.53, + "learning_rate": 1.8367846001912437e-05, + "loss": 0.1961, + "step": 11420 + }, + { + "epoch": 0.53, + "learning_rate": 1.8367062216857648e-05, + "loss": 0.1333, + "step": 11425 + }, + { + "epoch": 0.53, + "learning_rate": 1.8366278431802865e-05, + "loss": 0.1751, + "step": 11430 + }, + { + "epoch": 0.53, + "learning_rate": 1.8365494646748076e-05, + "loss": 0.2277, + "step": 11435 + }, + { + "epoch": 0.53, + "learning_rate": 1.836471086169329e-05, + "loss": 0.2504, + "step": 11440 + }, + { + "epoch": 0.53, + "learning_rate": 1.8363927076638504e-05, + "loss": 0.4636, + "step": 11445 + }, + { + "epoch": 0.53, + "learning_rate": 1.8363143291583718e-05, + "loss": 0.5295, + "step": 11450 + }, + { + "epoch": 0.53, + "learning_rate": 1.836235950652893e-05, + "loss": 0.2709, + "step": 11455 + }, + { + "epoch": 0.53, + "learning_rate": 1.8361575721474145e-05, + "loss": 0.0808, + "step": 11460 + }, + { + "epoch": 0.53, + "learning_rate": 1.836079193641936e-05, + "loss": 0.1669, + "step": 11465 + }, + { + "epoch": 0.54, + "learning_rate": 1.8360008151364573e-05, + "loss": 0.1414, + "step": 11470 + }, + { + "epoch": 0.54, + "learning_rate": 1.8359224366309784e-05, + "loss": 0.2378, + "step": 11475 + }, + { + "epoch": 0.54, + "learning_rate": 1.8358440581254998e-05, + "loss": 0.2094, + "step": 11480 + }, + { + "epoch": 0.54, + "learning_rate": 1.835765679620021e-05, + "loss": 0.193, + "step": 11485 + }, + { + "epoch": 0.54, + "learning_rate": 1.8356873011145425e-05, + "loss": 0.3476, + "step": 11490 + }, + { + "epoch": 0.54, + "learning_rate": 1.835608922609064e-05, + "loss": 0.416, + "step": 11495 + }, + { + "epoch": 0.54, + "learning_rate": 1.835530544103585e-05, + "loss": 0.5145, + "step": 11500 + }, + { + "epoch": 0.54, + "learning_rate": 1.8354521655981067e-05, + "loss": 0.2958, + "step": 11505 + }, + { + "epoch": 0.54, + "learning_rate": 1.8353737870926278e-05, + "loss": 0.0655, + "step": 11510 + }, + { + "epoch": 0.54, + "learning_rate": 1.835295408587149e-05, + "loss": 0.1104, + "step": 11515 + }, + { + "epoch": 0.54, + "learning_rate": 1.8352170300816705e-05, + "loss": 0.1485, + "step": 11520 + }, + { + "epoch": 0.54, + "learning_rate": 1.835138651576192e-05, + "loss": 0.1616, + "step": 11525 + }, + { + "epoch": 0.54, + "learning_rate": 1.8350602730707133e-05, + "loss": 0.183, + "step": 11530 + }, + { + "epoch": 0.54, + "learning_rate": 1.8349818945652347e-05, + "loss": 0.2659, + "step": 11535 + }, + { + "epoch": 0.54, + "learning_rate": 1.8349035160597558e-05, + "loss": 0.3224, + "step": 11540 + }, + { + "epoch": 0.54, + "learning_rate": 1.834825137554277e-05, + "loss": 0.4451, + "step": 11545 + }, + { + "epoch": 0.54, + "learning_rate": 1.8347467590487985e-05, + "loss": 0.6188, + "step": 11550 + }, + { + "epoch": 0.54, + "learning_rate": 1.83466838054332e-05, + "loss": 0.2427, + "step": 11555 + }, + { + "epoch": 0.54, + "learning_rate": 1.8345900020378413e-05, + "loss": 0.0932, + "step": 11560 + }, + { + "epoch": 0.54, + "learning_rate": 1.8345116235323627e-05, + "loss": 0.201, + "step": 11565 + }, + { + "epoch": 0.54, + "learning_rate": 1.834433245026884e-05, + "loss": 0.1564, + "step": 11570 + }, + { + "epoch": 0.54, + "learning_rate": 1.834354866521405e-05, + "loss": 0.1475, + "step": 11575 + }, + { + "epoch": 0.54, + "learning_rate": 1.834276488015927e-05, + "loss": 0.1801, + "step": 11580 + }, + { + "epoch": 0.54, + "learning_rate": 1.834198109510448e-05, + "loss": 0.2695, + "step": 11585 + }, + { + "epoch": 0.54, + "learning_rate": 1.8341197310049693e-05, + "loss": 0.2264, + "step": 11590 + }, + { + "epoch": 0.54, + "learning_rate": 1.8340413524994907e-05, + "loss": 0.3982, + "step": 11595 + }, + { + "epoch": 0.54, + "learning_rate": 1.833962973994012e-05, + "loss": 0.6555, + "step": 11600 + }, + { + "epoch": 0.54, + "learning_rate": 1.8338845954885335e-05, + "loss": 0.3081, + "step": 11605 + }, + { + "epoch": 0.54, + "learning_rate": 1.8338062169830546e-05, + "loss": 0.1064, + "step": 11610 + }, + { + "epoch": 0.54, + "learning_rate": 1.833727838477576e-05, + "loss": 0.1906, + "step": 11615 + }, + { + "epoch": 0.54, + "learning_rate": 1.8336494599720973e-05, + "loss": 0.1571, + "step": 11620 + }, + { + "epoch": 0.54, + "learning_rate": 1.8335710814666187e-05, + "loss": 0.1314, + "step": 11625 + }, + { + "epoch": 0.54, + "learning_rate": 1.83349270296114e-05, + "loss": 0.233, + "step": 11630 + }, + { + "epoch": 0.54, + "learning_rate": 1.8334143244556615e-05, + "loss": 0.216, + "step": 11635 + }, + { + "epoch": 0.54, + "learning_rate": 1.8333359459501826e-05, + "loss": 0.2412, + "step": 11640 + }, + { + "epoch": 0.54, + "learning_rate": 1.8332575674447043e-05, + "loss": 0.3539, + "step": 11645 + }, + { + "epoch": 0.54, + "learning_rate": 1.8331791889392253e-05, + "loss": 0.3351, + "step": 11650 + }, + { + "epoch": 0.54, + "learning_rate": 1.8331008104337467e-05, + "loss": 0.2767, + "step": 11655 + }, + { + "epoch": 0.54, + "learning_rate": 1.833022431928268e-05, + "loss": 0.1602, + "step": 11660 + }, + { + "epoch": 0.54, + "learning_rate": 1.8329440534227895e-05, + "loss": 0.64, + "step": 11665 + }, + { + "epoch": 0.54, + "learning_rate": 1.8328813506184066e-05, + "loss": 0.128, + "step": 11670 + }, + { + "epoch": 0.54, + "learning_rate": 1.832802972112928e-05, + "loss": 0.1629, + "step": 11675 + }, + { + "epoch": 0.55, + "learning_rate": 1.832724593607449e-05, + "loss": 0.2992, + "step": 11680 + }, + { + "epoch": 0.55, + "learning_rate": 1.8326462151019707e-05, + "loss": 0.2846, + "step": 11685 + }, + { + "epoch": 0.55, + "learning_rate": 1.8325678365964918e-05, + "loss": 0.283, + "step": 11690 + }, + { + "epoch": 0.55, + "learning_rate": 1.8324894580910132e-05, + "loss": 0.5153, + "step": 11695 + }, + { + "epoch": 0.55, + "learning_rate": 1.8324110795855346e-05, + "loss": 0.5733, + "step": 11700 + }, + { + "epoch": 0.55, + "learning_rate": 1.832332701080056e-05, + "loss": 0.2492, + "step": 11705 + }, + { + "epoch": 0.55, + "learning_rate": 1.8322543225745773e-05, + "loss": 0.0945, + "step": 11710 + }, + { + "epoch": 0.55, + "learning_rate": 1.8321759440690987e-05, + "loss": 0.0858, + "step": 11715 + }, + { + "epoch": 0.55, + "learning_rate": 1.8320975655636198e-05, + "loss": 0.1475, + "step": 11720 + }, + { + "epoch": 0.55, + "learning_rate": 1.8320191870581415e-05, + "loss": 0.1625, + "step": 11725 + }, + { + "epoch": 0.55, + "learning_rate": 1.8319408085526626e-05, + "loss": 0.2204, + "step": 11730 + }, + { + "epoch": 0.55, + "learning_rate": 1.831862430047184e-05, + "loss": 0.236, + "step": 11735 + }, + { + "epoch": 0.55, + "learning_rate": 1.8317840515417053e-05, + "loss": 0.32, + "step": 11740 + }, + { + "epoch": 0.55, + "learning_rate": 1.8317056730362267e-05, + "loss": 0.4229, + "step": 11745 + }, + { + "epoch": 0.55, + "learning_rate": 1.831627294530748e-05, + "loss": 0.5182, + "step": 11750 + }, + { + "epoch": 0.55, + "learning_rate": 1.8315489160252692e-05, + "loss": 0.2982, + "step": 11755 + }, + { + "epoch": 0.55, + "learning_rate": 1.831470537519791e-05, + "loss": 0.0763, + "step": 11760 + }, + { + "epoch": 0.55, + "learning_rate": 1.831392159014312e-05, + "loss": 0.0989, + "step": 11765 + }, + { + "epoch": 0.55, + "learning_rate": 1.8313137805088334e-05, + "loss": 0.1841, + "step": 11770 + }, + { + "epoch": 0.55, + "learning_rate": 1.8312354020033547e-05, + "loss": 0.1474, + "step": 11775 + }, + { + "epoch": 0.55, + "learning_rate": 1.831157023497876e-05, + "loss": 0.186, + "step": 11780 + }, + { + "epoch": 0.55, + "learning_rate": 1.8310786449923975e-05, + "loss": 0.2091, + "step": 11785 + }, + { + "epoch": 0.55, + "learning_rate": 1.831000266486919e-05, + "loss": 0.2279, + "step": 11790 + }, + { + "epoch": 0.55, + "learning_rate": 1.83092188798144e-05, + "loss": 0.3051, + "step": 11795 + }, + { + "epoch": 0.55, + "learning_rate": 1.8308435094759617e-05, + "loss": 0.7879, + "step": 11800 + }, + { + "epoch": 0.55, + "learning_rate": 1.8307651309704827e-05, + "loss": 0.2961, + "step": 11805 + }, + { + "epoch": 0.55, + "learning_rate": 1.830686752465004e-05, + "loss": 0.1053, + "step": 11810 + }, + { + "epoch": 0.55, + "learning_rate": 1.8306083739595255e-05, + "loss": 0.1465, + "step": 11815 + }, + { + "epoch": 0.55, + "learning_rate": 1.8305299954540466e-05, + "loss": 0.1971, + "step": 11820 + }, + { + "epoch": 0.55, + "learning_rate": 1.8304516169485683e-05, + "loss": 0.1379, + "step": 11825 + }, + { + "epoch": 0.55, + "learning_rate": 1.8303732384430894e-05, + "loss": 0.1948, + "step": 11830 + }, + { + "epoch": 0.55, + "learning_rate": 1.8302948599376108e-05, + "loss": 0.3947, + "step": 11835 + }, + { + "epoch": 0.55, + "learning_rate": 1.830216481432132e-05, + "loss": 0.3241, + "step": 11840 + }, + { + "epoch": 0.55, + "learning_rate": 1.8301381029266535e-05, + "loss": 0.4234, + "step": 11845 + }, + { + "epoch": 0.55, + "learning_rate": 1.830059724421175e-05, + "loss": 0.7616, + "step": 11850 + }, + { + "epoch": 0.55, + "learning_rate": 1.8299813459156963e-05, + "loss": 0.3111, + "step": 11855 + }, + { + "epoch": 0.55, + "learning_rate": 1.8299029674102177e-05, + "loss": 0.0971, + "step": 11860 + }, + { + "epoch": 0.55, + "learning_rate": 1.829824588904739e-05, + "loss": 0.0742, + "step": 11865 + }, + { + "epoch": 0.55, + "learning_rate": 1.82974621039926e-05, + "loss": 0.1005, + "step": 11870 + }, + { + "epoch": 0.55, + "learning_rate": 1.8296678318937815e-05, + "loss": 0.1713, + "step": 11875 + }, + { + "epoch": 0.55, + "learning_rate": 1.829589453388303e-05, + "loss": 0.2496, + "step": 11880 + }, + { + "epoch": 0.55, + "learning_rate": 1.82952675058392e-05, + "loss": 0.2245, + "step": 11885 + }, + { + "epoch": 0.55, + "learning_rate": 1.8294483720784414e-05, + "loss": 0.3005, + "step": 11890 + }, + { + "epoch": 0.56, + "learning_rate": 1.8293699935729628e-05, + "loss": 0.4579, + "step": 11895 + }, + { + "epoch": 0.56, + "learning_rate": 1.829291615067484e-05, + "loss": 0.7279, + "step": 11900 + }, + { + "epoch": 0.56, + "learning_rate": 1.8292132365620055e-05, + "loss": 0.177, + "step": 11905 + }, + { + "epoch": 0.56, + "learning_rate": 1.8291348580565266e-05, + "loss": 0.1003, + "step": 11910 + }, + { + "epoch": 0.56, + "learning_rate": 1.829056479551048e-05, + "loss": 0.1564, + "step": 11915 + }, + { + "epoch": 0.56, + "learning_rate": 1.8289781010455694e-05, + "loss": 0.233, + "step": 11920 + }, + { + "epoch": 0.56, + "learning_rate": 1.8288997225400908e-05, + "loss": 0.1809, + "step": 11925 + }, + { + "epoch": 0.56, + "learning_rate": 1.828821344034612e-05, + "loss": 0.2469, + "step": 11930 + }, + { + "epoch": 0.56, + "learning_rate": 1.8287429655291332e-05, + "loss": 0.2773, + "step": 11935 + }, + { + "epoch": 0.56, + "learning_rate": 1.828664587023655e-05, + "loss": 0.3309, + "step": 11940 + }, + { + "epoch": 0.56, + "learning_rate": 1.828586208518176e-05, + "loss": 0.25, + "step": 11945 + }, + { + "epoch": 0.56, + "learning_rate": 1.8285078300126974e-05, + "loss": 0.4452, + "step": 11950 + }, + { + "epoch": 0.56, + "learning_rate": 1.8284294515072188e-05, + "loss": 0.2698, + "step": 11955 + }, + { + "epoch": 0.56, + "learning_rate": 1.82835107300174e-05, + "loss": 0.0781, + "step": 11960 + }, + { + "epoch": 0.56, + "learning_rate": 1.8282726944962615e-05, + "loss": 0.1029, + "step": 11965 + }, + { + "epoch": 0.56, + "learning_rate": 1.828194315990783e-05, + "loss": 0.1722, + "step": 11970 + }, + { + "epoch": 0.56, + "learning_rate": 1.828115937485304e-05, + "loss": 0.1502, + "step": 11975 + }, + { + "epoch": 0.56, + "learning_rate": 1.8280375589798257e-05, + "loss": 0.2189, + "step": 11980 + }, + { + "epoch": 0.56, + "learning_rate": 1.8279591804743468e-05, + "loss": 0.1885, + "step": 11985 + }, + { + "epoch": 0.56, + "learning_rate": 1.827880801968868e-05, + "loss": 0.1883, + "step": 11990 + }, + { + "epoch": 0.56, + "learning_rate": 1.8278024234633896e-05, + "loss": 0.3488, + "step": 11995 + }, + { + "epoch": 0.56, + "learning_rate": 1.827724044957911e-05, + "loss": 0.6505, + "step": 12000 + }, + { + "epoch": 0.56, + "learning_rate": 1.8276456664524323e-05, + "loss": 0.2543, + "step": 12005 + }, + { + "epoch": 0.56, + "learning_rate": 1.8275672879469534e-05, + "loss": 0.1051, + "step": 12010 + }, + { + "epoch": 0.56, + "learning_rate": 1.827488909441475e-05, + "loss": 0.106, + "step": 12015 + }, + { + "epoch": 0.56, + "learning_rate": 1.827410530935996e-05, + "loss": 0.1483, + "step": 12020 + }, + { + "epoch": 0.56, + "learning_rate": 1.8273321524305176e-05, + "loss": 0.2022, + "step": 12025 + }, + { + "epoch": 0.56, + "learning_rate": 1.827253773925039e-05, + "loss": 0.2002, + "step": 12030 + }, + { + "epoch": 0.56, + "learning_rate": 1.8271753954195603e-05, + "loss": 0.2563, + "step": 12035 + }, + { + "epoch": 0.56, + "learning_rate": 1.8270970169140817e-05, + "loss": 0.2597, + "step": 12040 + }, + { + "epoch": 0.56, + "learning_rate": 1.827018638408603e-05, + "loss": 0.429, + "step": 12045 + }, + { + "epoch": 0.56, + "learning_rate": 1.8269402599031242e-05, + "loss": 0.5843, + "step": 12050 + }, + { + "epoch": 0.56, + "learning_rate": 1.826861881397646e-05, + "loss": 0.2978, + "step": 12055 + }, + { + "epoch": 0.56, + "learning_rate": 1.826783502892167e-05, + "loss": 0.1374, + "step": 12060 + }, + { + "epoch": 0.56, + "learning_rate": 1.8267051243866883e-05, + "loss": 0.1817, + "step": 12065 + }, + { + "epoch": 0.56, + "learning_rate": 1.8266267458812097e-05, + "loss": 0.0982, + "step": 12070 + }, + { + "epoch": 0.56, + "learning_rate": 1.8265483673757308e-05, + "loss": 0.2185, + "step": 12075 + }, + { + "epoch": 0.56, + "learning_rate": 1.8264699888702525e-05, + "loss": 0.1185, + "step": 12080 + }, + { + "epoch": 0.56, + "learning_rate": 1.8263916103647736e-05, + "loss": 0.2142, + "step": 12085 + }, + { + "epoch": 0.56, + "learning_rate": 1.826313231859295e-05, + "loss": 0.3277, + "step": 12090 + }, + { + "epoch": 0.56, + "learning_rate": 1.8262348533538163e-05, + "loss": 0.4278, + "step": 12095 + }, + { + "epoch": 0.56, + "learning_rate": 1.8261564748483377e-05, + "loss": 0.5751, + "step": 12100 + }, + { + "epoch": 0.56, + "learning_rate": 1.826078096342859e-05, + "loss": 0.3273, + "step": 12105 + }, + { + "epoch": 0.57, + "learning_rate": 1.8259997178373805e-05, + "loss": 0.0808, + "step": 12110 + }, + { + "epoch": 0.57, + "learning_rate": 1.825921339331902e-05, + "loss": 0.1315, + "step": 12115 + }, + { + "epoch": 0.57, + "learning_rate": 1.8258429608264233e-05, + "loss": 0.155, + "step": 12120 + }, + { + "epoch": 0.57, + "learning_rate": 1.8257645823209444e-05, + "loss": 0.1783, + "step": 12125 + }, + { + "epoch": 0.57, + "learning_rate": 1.8256862038154657e-05, + "loss": 0.2643, + "step": 12130 + }, + { + "epoch": 0.57, + "learning_rate": 1.825607825309987e-05, + "loss": 0.2056, + "step": 12135 + }, + { + "epoch": 0.57, + "learning_rate": 1.8255294468045085e-05, + "loss": 0.3616, + "step": 12140 + }, + { + "epoch": 0.57, + "learning_rate": 1.82545106829903e-05, + "loss": 0.4022, + "step": 12145 + }, + { + "epoch": 0.57, + "learning_rate": 1.825372689793551e-05, + "loss": 0.6273, + "step": 12150 + }, + { + "epoch": 0.57, + "learning_rate": 1.8252943112880727e-05, + "loss": 0.2924, + "step": 12155 + }, + { + "epoch": 0.57, + "learning_rate": 1.8252159327825937e-05, + "loss": 0.1846, + "step": 12160 + }, + { + "epoch": 0.57, + "learning_rate": 1.825137554277115e-05, + "loss": 0.1085, + "step": 12165 + }, + { + "epoch": 0.57, + "learning_rate": 1.8250591757716365e-05, + "loss": 0.1888, + "step": 12170 + }, + { + "epoch": 0.57, + "learning_rate": 1.824980797266158e-05, + "loss": 0.1602, + "step": 12175 + }, + { + "epoch": 0.57, + "learning_rate": 1.8249024187606793e-05, + "loss": 0.3114, + "step": 12180 + }, + { + "epoch": 0.57, + "learning_rate": 1.8248240402552007e-05, + "loss": 0.2241, + "step": 12185 + }, + { + "epoch": 0.57, + "learning_rate": 1.8247456617497218e-05, + "loss": 0.3146, + "step": 12190 + }, + { + "epoch": 0.57, + "learning_rate": 1.824667283244243e-05, + "loss": 0.3433, + "step": 12195 + }, + { + "epoch": 0.57, + "learning_rate": 1.8245889047387645e-05, + "loss": 0.3809, + "step": 12200 + }, + { + "epoch": 0.57, + "learning_rate": 1.824510526233286e-05, + "loss": 0.2425, + "step": 12205 + }, + { + "epoch": 0.57, + "learning_rate": 1.8244321477278073e-05, + "loss": 0.0895, + "step": 12210 + }, + { + "epoch": 0.57, + "learning_rate": 1.8243537692223287e-05, + "loss": 0.1157, + "step": 12215 + }, + { + "epoch": 0.57, + "learning_rate": 1.82427539071685e-05, + "loss": 0.1821, + "step": 12220 + }, + { + "epoch": 0.57, + "learning_rate": 1.824197012211371e-05, + "loss": 0.2085, + "step": 12225 + }, + { + "epoch": 0.57, + "learning_rate": 1.8241186337058925e-05, + "loss": 0.2786, + "step": 12230 + }, + { + "epoch": 0.57, + "learning_rate": 1.824040255200414e-05, + "loss": 0.2232, + "step": 12235 + }, + { + "epoch": 0.57, + "learning_rate": 1.8239618766949353e-05, + "loss": 0.1914, + "step": 12240 + }, + { + "epoch": 0.57, + "learning_rate": 1.8238834981894567e-05, + "loss": 0.4057, + "step": 12245 + }, + { + "epoch": 0.57, + "learning_rate": 1.823805119683978e-05, + "loss": 0.5458, + "step": 12250 + }, + { + "epoch": 0.57, + "learning_rate": 1.8237267411784995e-05, + "loss": 0.2761, + "step": 12255 + }, + { + "epoch": 0.57, + "learning_rate": 1.8236483626730205e-05, + "loss": 0.1128, + "step": 12260 + }, + { + "epoch": 0.57, + "learning_rate": 1.823569984167542e-05, + "loss": 0.1394, + "step": 12265 + }, + { + "epoch": 0.57, + "learning_rate": 1.8234916056620633e-05, + "loss": 0.1571, + "step": 12270 + }, + { + "epoch": 0.57, + "learning_rate": 1.8234132271565847e-05, + "loss": 0.2344, + "step": 12275 + }, + { + "epoch": 0.57, + "learning_rate": 1.823334848651106e-05, + "loss": 0.2405, + "step": 12280 + }, + { + "epoch": 0.57, + "learning_rate": 1.8232564701456275e-05, + "loss": 0.2111, + "step": 12285 + }, + { + "epoch": 0.57, + "learning_rate": 1.8231780916401485e-05, + "loss": 0.2919, + "step": 12290 + }, + { + "epoch": 0.57, + "learning_rate": 1.8230997131346703e-05, + "loss": 0.461, + "step": 12295 + }, + { + "epoch": 0.57, + "learning_rate": 1.8230213346291913e-05, + "loss": 0.4065, + "step": 12300 + }, + { + "epoch": 0.57, + "learning_rate": 1.8229429561237127e-05, + "loss": 0.2381, + "step": 12305 + }, + { + "epoch": 0.57, + "learning_rate": 1.822864577618234e-05, + "loss": 0.0971, + "step": 12310 + }, + { + "epoch": 0.57, + "learning_rate": 1.8227861991127555e-05, + "loss": 0.1587, + "step": 12315 + }, + { + "epoch": 0.57, + "learning_rate": 1.822707820607277e-05, + "loss": 0.221, + "step": 12320 + }, + { + "epoch": 0.58, + "learning_rate": 1.822629442101798e-05, + "loss": 0.1664, + "step": 12325 + }, + { + "epoch": 0.58, + "learning_rate": 1.8225510635963197e-05, + "loss": 0.2185, + "step": 12330 + }, + { + "epoch": 0.58, + "learning_rate": 1.8224726850908407e-05, + "loss": 0.2366, + "step": 12335 + }, + { + "epoch": 0.58, + "learning_rate": 1.822394306585362e-05, + "loss": 0.3796, + "step": 12340 + }, + { + "epoch": 0.58, + "learning_rate": 1.8223159280798835e-05, + "loss": 0.3175, + "step": 12345 + }, + { + "epoch": 0.58, + "learning_rate": 1.822237549574405e-05, + "loss": 0.6086, + "step": 12350 + }, + { + "epoch": 0.58, + "learning_rate": 1.8221591710689263e-05, + "loss": 0.2234, + "step": 12355 + }, + { + "epoch": 0.58, + "learning_rate": 1.8220807925634477e-05, + "loss": 0.1107, + "step": 12360 + }, + { + "epoch": 0.58, + "learning_rate": 1.8220024140579687e-05, + "loss": 0.1629, + "step": 12365 + }, + { + "epoch": 0.58, + "learning_rate": 1.8219240355524905e-05, + "loss": 0.1813, + "step": 12370 + }, + { + "epoch": 0.58, + "learning_rate": 1.8218456570470115e-05, + "loss": 0.1278, + "step": 12375 + }, + { + "epoch": 0.58, + "learning_rate": 1.821767278541533e-05, + "loss": 0.2862, + "step": 12380 + }, + { + "epoch": 0.58, + "learning_rate": 1.8216889000360543e-05, + "loss": 0.1883, + "step": 12385 + }, + { + "epoch": 0.58, + "learning_rate": 1.8216105215305753e-05, + "loss": 0.3023, + "step": 12390 + }, + { + "epoch": 0.58, + "learning_rate": 1.821532143025097e-05, + "loss": 0.3543, + "step": 12395 + }, + { + "epoch": 0.58, + "learning_rate": 1.821453764519618e-05, + "loss": 0.6779, + "step": 12400 + }, + { + "epoch": 0.58, + "learning_rate": 1.8213753860141395e-05, + "loss": 0.2855, + "step": 12405 + }, + { + "epoch": 0.58, + "learning_rate": 1.821297007508661e-05, + "loss": 0.1078, + "step": 12410 + }, + { + "epoch": 0.58, + "learning_rate": 1.8212186290031823e-05, + "loss": 0.0658, + "step": 12415 + }, + { + "epoch": 0.58, + "learning_rate": 1.8211402504977037e-05, + "loss": 0.1846, + "step": 12420 + }, + { + "epoch": 0.58, + "learning_rate": 1.821061871992225e-05, + "loss": 0.2117, + "step": 12425 + }, + { + "epoch": 0.58, + "learning_rate": 1.8209834934867465e-05, + "loss": 0.2137, + "step": 12430 + }, + { + "epoch": 0.58, + "learning_rate": 1.820905114981268e-05, + "loss": 0.2952, + "step": 12435 + }, + { + "epoch": 0.58, + "learning_rate": 1.820826736475789e-05, + "loss": 0.3457, + "step": 12440 + }, + { + "epoch": 0.58, + "learning_rate": 1.8207483579703103e-05, + "loss": 0.3901, + "step": 12445 + }, + { + "epoch": 0.58, + "learning_rate": 1.8206699794648317e-05, + "loss": 0.4563, + "step": 12450 + }, + { + "epoch": 0.58, + "learning_rate": 1.820591600959353e-05, + "loss": 0.3036, + "step": 12455 + }, + { + "epoch": 0.58, + "learning_rate": 1.8205132224538745e-05, + "loss": 0.1384, + "step": 12460 + }, + { + "epoch": 0.58, + "learning_rate": 1.8204348439483955e-05, + "loss": 0.1121, + "step": 12465 + }, + { + "epoch": 0.58, + "learning_rate": 1.8203564654429173e-05, + "loss": 0.1626, + "step": 12470 + }, + { + "epoch": 0.58, + "learning_rate": 1.8202780869374383e-05, + "loss": 0.1478, + "step": 12475 + }, + { + "epoch": 0.58, + "learning_rate": 1.8201997084319597e-05, + "loss": 0.2527, + "step": 12480 + }, + { + "epoch": 0.58, + "learning_rate": 1.820121329926481e-05, + "loss": 0.2802, + "step": 12485 + }, + { + "epoch": 0.58, + "learning_rate": 1.8200429514210025e-05, + "loss": 0.3921, + "step": 12490 + }, + { + "epoch": 0.58, + "learning_rate": 1.819964572915524e-05, + "loss": 0.3346, + "step": 12495 + }, + { + "epoch": 0.58, + "learning_rate": 1.8198861944100453e-05, + "loss": 0.5112, + "step": 12500 + }, + { + "epoch": 0.58, + "learning_rate": 1.8198078159045663e-05, + "loss": 0.2538, + "step": 12505 + }, + { + "epoch": 0.58, + "learning_rate": 1.819729437399088e-05, + "loss": 0.0939, + "step": 12510 + }, + { + "epoch": 0.58, + "learning_rate": 1.819651058893609e-05, + "loss": 0.1042, + "step": 12515 + }, + { + "epoch": 0.58, + "learning_rate": 1.8195726803881305e-05, + "loss": 0.1335, + "step": 12520 + }, + { + "epoch": 0.58, + "learning_rate": 1.819494301882652e-05, + "loss": 0.1319, + "step": 12525 + }, + { + "epoch": 0.58, + "learning_rate": 1.8194159233771733e-05, + "loss": 0.1862, + "step": 12530 + }, + { + "epoch": 0.58, + "learning_rate": 1.8193375448716947e-05, + "loss": 0.2162, + "step": 12535 + }, + { + "epoch": 0.59, + "learning_rate": 1.8192591663662157e-05, + "loss": 0.3461, + "step": 12540 + }, + { + "epoch": 0.59, + "learning_rate": 1.819180787860737e-05, + "loss": 0.2864, + "step": 12545 + }, + { + "epoch": 0.59, + "learning_rate": 1.8191024093552585e-05, + "loss": 0.5461, + "step": 12550 + }, + { + "epoch": 0.59, + "learning_rate": 1.81902403084978e-05, + "loss": 0.2668, + "step": 12555 + }, + { + "epoch": 0.59, + "learning_rate": 1.8189456523443013e-05, + "loss": 0.0315, + "step": 12560 + }, + { + "epoch": 0.59, + "learning_rate": 1.8188672738388227e-05, + "loss": 0.0844, + "step": 12565 + }, + { + "epoch": 0.59, + "learning_rate": 1.818788895333344e-05, + "loss": 0.193, + "step": 12570 + }, + { + "epoch": 0.59, + "learning_rate": 1.8187105168278654e-05, + "loss": 0.1365, + "step": 12575 + }, + { + "epoch": 0.59, + "learning_rate": 1.8186321383223865e-05, + "loss": 0.1781, + "step": 12580 + }, + { + "epoch": 0.59, + "learning_rate": 1.8185537598169082e-05, + "loss": 0.331, + "step": 12585 + }, + { + "epoch": 0.59, + "learning_rate": 1.8184753813114293e-05, + "loss": 0.2479, + "step": 12590 + }, + { + "epoch": 0.59, + "learning_rate": 1.8183970028059507e-05, + "loss": 0.3348, + "step": 12595 + }, + { + "epoch": 0.59, + "learning_rate": 1.818318624300472e-05, + "loss": 0.5393, + "step": 12600 + }, + { + "epoch": 0.59, + "learning_rate": 1.818240245794993e-05, + "loss": 0.2076, + "step": 12605 + }, + { + "epoch": 0.59, + "learning_rate": 1.818161867289515e-05, + "loss": 0.0844, + "step": 12610 + }, + { + "epoch": 0.59, + "learning_rate": 1.818083488784036e-05, + "loss": 0.1753, + "step": 12615 + }, + { + "epoch": 0.59, + "learning_rate": 1.8180051102785573e-05, + "loss": 0.1373, + "step": 12620 + }, + { + "epoch": 0.59, + "learning_rate": 1.8179267317730787e-05, + "loss": 0.2813, + "step": 12625 + }, + { + "epoch": 0.59, + "learning_rate": 1.8178483532676e-05, + "loss": 0.2083, + "step": 12630 + }, + { + "epoch": 0.59, + "learning_rate": 1.8177699747621214e-05, + "loss": 0.3167, + "step": 12635 + }, + { + "epoch": 0.59, + "learning_rate": 1.817691596256643e-05, + "loss": 0.2749, + "step": 12640 + }, + { + "epoch": 0.59, + "learning_rate": 1.8176132177511642e-05, + "loss": 0.421, + "step": 12645 + }, + { + "epoch": 0.59, + "learning_rate": 1.8175348392456856e-05, + "loss": 0.6261, + "step": 12650 + }, + { + "epoch": 0.59, + "learning_rate": 1.8174564607402067e-05, + "loss": 0.2091, + "step": 12655 + }, + { + "epoch": 0.59, + "learning_rate": 1.817378082234728e-05, + "loss": 0.1761, + "step": 12660 + }, + { + "epoch": 0.59, + "learning_rate": 1.8172997037292495e-05, + "loss": 0.1845, + "step": 12665 + }, + { + "epoch": 0.59, + "learning_rate": 1.817221325223771e-05, + "loss": 0.1495, + "step": 12670 + }, + { + "epoch": 0.59, + "learning_rate": 1.8171429467182922e-05, + "loss": 0.183, + "step": 12675 + }, + { + "epoch": 0.59, + "learning_rate": 1.8170645682128133e-05, + "loss": 0.1456, + "step": 12680 + }, + { + "epoch": 0.59, + "learning_rate": 1.816986189707335e-05, + "loss": 0.3501, + "step": 12685 + }, + { + "epoch": 0.59, + "learning_rate": 1.816907811201856e-05, + "loss": 0.2666, + "step": 12690 + }, + { + "epoch": 0.59, + "learning_rate": 1.8168294326963775e-05, + "loss": 0.2329, + "step": 12695 + }, + { + "epoch": 0.59, + "learning_rate": 1.816751054190899e-05, + "loss": 0.5528, + "step": 12700 + }, + { + "epoch": 0.59, + "learning_rate": 1.8166726756854202e-05, + "loss": 0.2247, + "step": 12705 + }, + { + "epoch": 0.59, + "learning_rate": 1.8165942971799416e-05, + "loss": 0.1046, + "step": 12710 + }, + { + "epoch": 0.59, + "learning_rate": 1.816515918674463e-05, + "loss": 0.1533, + "step": 12715 + }, + { + "epoch": 0.59, + "learning_rate": 1.816437540168984e-05, + "loss": 0.1581, + "step": 12720 + }, + { + "epoch": 0.59, + "learning_rate": 1.8163591616635055e-05, + "loss": 0.1386, + "step": 12725 + }, + { + "epoch": 0.59, + "learning_rate": 1.816280783158027e-05, + "loss": 0.1616, + "step": 12730 + }, + { + "epoch": 0.59, + "learning_rate": 1.8162024046525482e-05, + "loss": 0.1669, + "step": 12735 + }, + { + "epoch": 0.59, + "learning_rate": 1.8161240261470696e-05, + "loss": 0.2804, + "step": 12740 + }, + { + "epoch": 0.59, + "learning_rate": 1.816045647641591e-05, + "loss": 0.4752, + "step": 12745 + }, + { + "epoch": 0.59, + "learning_rate": 1.8159672691361124e-05, + "loss": 0.6178, + "step": 12750 + }, + { + "epoch": 0.6, + "learning_rate": 1.8158888906306335e-05, + "loss": 0.214, + "step": 12755 + }, + { + "epoch": 0.6, + "learning_rate": 1.815810512125155e-05, + "loss": 0.1284, + "step": 12760 + }, + { + "epoch": 0.6, + "learning_rate": 1.8157321336196762e-05, + "loss": 0.0707, + "step": 12765 + }, + { + "epoch": 0.6, + "learning_rate": 1.8156537551141976e-05, + "loss": 0.0857, + "step": 12770 + }, + { + "epoch": 0.6, + "learning_rate": 1.815575376608719e-05, + "loss": 0.162, + "step": 12775 + }, + { + "epoch": 0.6, + "learning_rate": 1.8154969981032404e-05, + "loss": 0.1753, + "step": 12780 + }, + { + "epoch": 0.6, + "learning_rate": 1.8154186195977618e-05, + "loss": 0.3083, + "step": 12785 + }, + { + "epoch": 0.6, + "learning_rate": 1.815340241092283e-05, + "loss": 0.359, + "step": 12790 + }, + { + "epoch": 0.6, + "learning_rate": 1.8152618625868043e-05, + "loss": 0.3942, + "step": 12795 + }, + { + "epoch": 0.6, + "learning_rate": 1.8151834840813256e-05, + "loss": 0.5581, + "step": 12800 + }, + { + "epoch": 0.6, + "learning_rate": 1.815105105575847e-05, + "loss": 0.275, + "step": 12805 + }, + { + "epoch": 0.6, + "learning_rate": 1.8150267270703684e-05, + "loss": 0.0976, + "step": 12810 + }, + { + "epoch": 0.6, + "learning_rate": 1.8149483485648898e-05, + "loss": 0.1869, + "step": 12815 + }, + { + "epoch": 0.6, + "learning_rate": 1.814869970059411e-05, + "loss": 0.1546, + "step": 12820 + }, + { + "epoch": 0.6, + "learning_rate": 1.8147915915539326e-05, + "loss": 0.1551, + "step": 12825 + }, + { + "epoch": 0.6, + "learning_rate": 1.8147132130484536e-05, + "loss": 0.1323, + "step": 12830 + }, + { + "epoch": 0.6, + "learning_rate": 1.814634834542975e-05, + "loss": 0.199, + "step": 12835 + }, + { + "epoch": 0.6, + "learning_rate": 1.8145564560374964e-05, + "loss": 0.2718, + "step": 12840 + }, + { + "epoch": 0.6, + "learning_rate": 1.8144780775320178e-05, + "loss": 0.358, + "step": 12845 + }, + { + "epoch": 0.6, + "learning_rate": 1.8143996990265392e-05, + "loss": 0.7596, + "step": 12850 + }, + { + "epoch": 0.6, + "learning_rate": 1.8143213205210603e-05, + "loss": 0.1977, + "step": 12855 + }, + { + "epoch": 0.6, + "learning_rate": 1.814242942015582e-05, + "loss": 0.1171, + "step": 12860 + }, + { + "epoch": 0.6, + "learning_rate": 1.814164563510103e-05, + "loss": 0.117, + "step": 12865 + }, + { + "epoch": 0.6, + "learning_rate": 1.8140861850046244e-05, + "loss": 0.1249, + "step": 12870 + }, + { + "epoch": 0.6, + "learning_rate": 1.8140078064991458e-05, + "loss": 0.2719, + "step": 12875 + }, + { + "epoch": 0.6, + "learning_rate": 1.8139294279936672e-05, + "loss": 0.1671, + "step": 12880 + }, + { + "epoch": 0.6, + "learning_rate": 1.8138510494881886e-05, + "loss": 0.2753, + "step": 12885 + }, + { + "epoch": 0.6, + "learning_rate": 1.81377267098271e-05, + "loss": 0.2222, + "step": 12890 + }, + { + "epoch": 0.6, + "learning_rate": 1.813694292477231e-05, + "loss": 0.3789, + "step": 12895 + }, + { + "epoch": 0.6, + "learning_rate": 1.8136159139717528e-05, + "loss": 0.5476, + "step": 12900 + }, + { + "epoch": 0.6, + "learning_rate": 1.8135375354662738e-05, + "loss": 0.2506, + "step": 12905 + }, + { + "epoch": 0.6, + "learning_rate": 1.8134591569607952e-05, + "loss": 0.1047, + "step": 12910 + }, + { + "epoch": 0.6, + "learning_rate": 1.8133807784553166e-05, + "loss": 0.1184, + "step": 12915 + }, + { + "epoch": 0.6, + "learning_rate": 1.8133023999498377e-05, + "loss": 0.1366, + "step": 12920 + }, + { + "epoch": 0.6, + "learning_rate": 1.8132240214443594e-05, + "loss": 0.1905, + "step": 12925 + }, + { + "epoch": 0.6, + "learning_rate": 1.8131456429388804e-05, + "loss": 0.1637, + "step": 12930 + }, + { + "epoch": 0.6, + "learning_rate": 1.813067264433402e-05, + "loss": 0.2118, + "step": 12935 + }, + { + "epoch": 0.6, + "learning_rate": 1.8129888859279232e-05, + "loss": 0.4517, + "step": 12940 + }, + { + "epoch": 0.6, + "learning_rate": 1.8129105074224446e-05, + "loss": 0.3792, + "step": 12945 + }, + { + "epoch": 0.6, + "learning_rate": 1.812832128916966e-05, + "loss": 0.6719, + "step": 12950 + }, + { + "epoch": 0.6, + "learning_rate": 1.8127537504114874e-05, + "loss": 0.1911, + "step": 12955 + }, + { + "epoch": 0.6, + "learning_rate": 1.8126753719060088e-05, + "loss": 0.0936, + "step": 12960 + }, + { + "epoch": 0.6, + "learning_rate": 1.8125969934005302e-05, + "loss": 0.0923, + "step": 12965 + }, + { + "epoch": 0.61, + "learning_rate": 1.8125186148950512e-05, + "loss": 0.184, + "step": 12970 + }, + { + "epoch": 0.61, + "learning_rate": 1.8124402363895726e-05, + "loss": 0.1625, + "step": 12975 + }, + { + "epoch": 0.61, + "learning_rate": 1.812361857884094e-05, + "loss": 0.1753, + "step": 12980 + }, + { + "epoch": 0.61, + "learning_rate": 1.8122834793786154e-05, + "loss": 0.2652, + "step": 12985 + }, + { + "epoch": 0.61, + "learning_rate": 1.8122051008731368e-05, + "loss": 0.2203, + "step": 12990 + }, + { + "epoch": 0.61, + "learning_rate": 1.812126722367658e-05, + "loss": 0.3814, + "step": 12995 + }, + { + "epoch": 0.61, + "learning_rate": 1.8120483438621796e-05, + "loss": 0.613, + "step": 13000 + }, + { + "epoch": 0.61, + "learning_rate": 1.8119699653567006e-05, + "loss": 0.2086, + "step": 13005 + }, + { + "epoch": 0.61, + "learning_rate": 1.811891586851222e-05, + "loss": 0.1071, + "step": 13010 + }, + { + "epoch": 0.61, + "learning_rate": 1.8118132083457434e-05, + "loss": 0.1252, + "step": 13015 + }, + { + "epoch": 0.61, + "learning_rate": 1.8117348298402648e-05, + "loss": 0.1397, + "step": 13020 + }, + { + "epoch": 0.61, + "learning_rate": 1.8116564513347862e-05, + "loss": 0.1292, + "step": 13025 + }, + { + "epoch": 0.61, + "learning_rate": 1.8115780728293076e-05, + "loss": 0.2849, + "step": 13030 + }, + { + "epoch": 0.61, + "learning_rate": 1.8114996943238286e-05, + "loss": 0.1862, + "step": 13035 + }, + { + "epoch": 0.61, + "learning_rate": 1.8114213158183504e-05, + "loss": 0.2616, + "step": 13040 + }, + { + "epoch": 0.61, + "learning_rate": 1.8113429373128714e-05, + "loss": 0.2944, + "step": 13045 + }, + { + "epoch": 0.61, + "learning_rate": 1.8112645588073928e-05, + "loss": 0.4961, + "step": 13050 + }, + { + "epoch": 0.61, + "learning_rate": 1.8111861803019142e-05, + "loss": 0.2485, + "step": 13055 + }, + { + "epoch": 0.61, + "learning_rate": 1.8111078017964356e-05, + "loss": 0.0993, + "step": 13060 + }, + { + "epoch": 0.61, + "learning_rate": 1.811029423290957e-05, + "loss": 0.1454, + "step": 13065 + }, + { + "epoch": 0.61, + "learning_rate": 1.810951044785478e-05, + "loss": 0.1368, + "step": 13070 + }, + { + "epoch": 0.61, + "learning_rate": 1.8108726662799994e-05, + "loss": 0.178, + "step": 13075 + }, + { + "epoch": 0.61, + "learning_rate": 1.8107942877745208e-05, + "loss": 0.2138, + "step": 13080 + }, + { + "epoch": 0.61, + "learning_rate": 1.8107159092690422e-05, + "loss": 0.2884, + "step": 13085 + }, + { + "epoch": 0.61, + "learning_rate": 1.8106375307635636e-05, + "loss": 0.2957, + "step": 13090 + }, + { + "epoch": 0.61, + "learning_rate": 1.810559152258085e-05, + "loss": 0.3439, + "step": 13095 + }, + { + "epoch": 0.61, + "learning_rate": 1.8104807737526064e-05, + "loss": 0.3735, + "step": 13100 + }, + { + "epoch": 0.61, + "learning_rate": 1.8104023952471278e-05, + "loss": 0.2341, + "step": 13105 + }, + { + "epoch": 0.61, + "learning_rate": 1.8103240167416488e-05, + "loss": 0.1029, + "step": 13110 + }, + { + "epoch": 0.61, + "learning_rate": 1.8102456382361702e-05, + "loss": 0.1167, + "step": 13115 + }, + { + "epoch": 0.61, + "learning_rate": 1.8101672597306916e-05, + "loss": 0.1298, + "step": 13120 + }, + { + "epoch": 0.61, + "learning_rate": 1.810088881225213e-05, + "loss": 0.1732, + "step": 13125 + }, + { + "epoch": 0.61, + "learning_rate": 1.8100105027197344e-05, + "loss": 0.2762, + "step": 13130 + }, + { + "epoch": 0.61, + "learning_rate": 1.8099321242142554e-05, + "loss": 0.26, + "step": 13135 + }, + { + "epoch": 0.61, + "learning_rate": 1.809853745708777e-05, + "loss": 0.3426, + "step": 13140 + }, + { + "epoch": 0.61, + "learning_rate": 1.8097753672032982e-05, + "loss": 0.4019, + "step": 13145 + }, + { + "epoch": 0.61, + "learning_rate": 1.8096969886978196e-05, + "loss": 0.7487, + "step": 13150 + }, + { + "epoch": 0.61, + "learning_rate": 1.809618610192341e-05, + "loss": 0.3123, + "step": 13155 + }, + { + "epoch": 0.61, + "learning_rate": 1.8095402316868624e-05, + "loss": 0.0959, + "step": 13160 + }, + { + "epoch": 0.61, + "learning_rate": 1.8094618531813838e-05, + "loss": 0.0824, + "step": 13165 + }, + { + "epoch": 0.61, + "learning_rate": 1.809383474675905e-05, + "loss": 0.2266, + "step": 13170 + }, + { + "epoch": 0.61, + "learning_rate": 1.8093050961704265e-05, + "loss": 0.1764, + "step": 13175 + }, + { + "epoch": 0.61, + "learning_rate": 1.8092267176649476e-05, + "loss": 0.2117, + "step": 13180 + }, + { + "epoch": 0.62, + "learning_rate": 1.809148339159469e-05, + "loss": 0.2613, + "step": 13185 + }, + { + "epoch": 0.62, + "learning_rate": 1.8090699606539904e-05, + "loss": 0.2495, + "step": 13190 + }, + { + "epoch": 0.62, + "learning_rate": 1.8089915821485118e-05, + "loss": 0.3384, + "step": 13195 + }, + { + "epoch": 0.62, + "learning_rate": 1.808913203643033e-05, + "loss": 0.6137, + "step": 13200 + }, + { + "epoch": 0.62, + "learning_rate": 1.8088348251375546e-05, + "loss": 0.2418, + "step": 13205 + }, + { + "epoch": 0.62, + "learning_rate": 1.8087564466320756e-05, + "loss": 0.0482, + "step": 13210 + }, + { + "epoch": 0.62, + "learning_rate": 1.8086780681265973e-05, + "loss": 0.1059, + "step": 13215 + }, + { + "epoch": 0.62, + "learning_rate": 1.8085996896211184e-05, + "loss": 0.1762, + "step": 13220 + }, + { + "epoch": 0.62, + "learning_rate": 1.8085213111156398e-05, + "loss": 0.2271, + "step": 13225 + }, + { + "epoch": 0.62, + "learning_rate": 1.808442932610161e-05, + "loss": 0.2679, + "step": 13230 + }, + { + "epoch": 0.62, + "learning_rate": 1.8083645541046826e-05, + "loss": 0.3361, + "step": 13235 + }, + { + "epoch": 0.62, + "learning_rate": 1.808286175599204e-05, + "loss": 0.3421, + "step": 13240 + }, + { + "epoch": 0.62, + "learning_rate": 1.808207797093725e-05, + "loss": 0.3615, + "step": 13245 + }, + { + "epoch": 0.62, + "learning_rate": 1.8081294185882464e-05, + "loss": 0.6389, + "step": 13250 + }, + { + "epoch": 0.62, + "learning_rate": 1.8080510400827678e-05, + "loss": 0.2581, + "step": 13255 + }, + { + "epoch": 0.62, + "learning_rate": 1.8079726615772892e-05, + "loss": 0.1071, + "step": 13260 + }, + { + "epoch": 0.62, + "learning_rate": 1.8078942830718106e-05, + "loss": 0.1831, + "step": 13265 + }, + { + "epoch": 0.62, + "learning_rate": 1.807815904566332e-05, + "loss": 0.2567, + "step": 13270 + }, + { + "epoch": 0.62, + "learning_rate": 1.8077375260608533e-05, + "loss": 0.1976, + "step": 13275 + }, + { + "epoch": 0.62, + "learning_rate": 1.8076591475553747e-05, + "loss": 0.177, + "step": 13280 + }, + { + "epoch": 0.62, + "learning_rate": 1.8075807690498958e-05, + "loss": 0.3031, + "step": 13285 + }, + { + "epoch": 0.62, + "learning_rate": 1.8075023905444172e-05, + "loss": 0.2594, + "step": 13290 + }, + { + "epoch": 0.62, + "learning_rate": 1.8074240120389386e-05, + "loss": 0.3334, + "step": 13295 + }, + { + "epoch": 0.62, + "learning_rate": 1.80734563353346e-05, + "loss": 0.7191, + "step": 13300 + }, + { + "epoch": 0.62, + "learning_rate": 1.8072672550279813e-05, + "loss": 0.2046, + "step": 13305 + }, + { + "epoch": 0.62, + "learning_rate": 1.8071888765225024e-05, + "loss": 0.0685, + "step": 13310 + }, + { + "epoch": 0.62, + "learning_rate": 1.807110498017024e-05, + "loss": 0.1259, + "step": 13315 + }, + { + "epoch": 0.62, + "learning_rate": 1.8070321195115452e-05, + "loss": 0.2039, + "step": 13320 + }, + { + "epoch": 0.62, + "learning_rate": 1.8069537410060666e-05, + "loss": 0.2184, + "step": 13325 + }, + { + "epoch": 0.62, + "learning_rate": 1.806875362500588e-05, + "loss": 0.1575, + "step": 13330 + }, + { + "epoch": 0.62, + "learning_rate": 1.8067969839951094e-05, + "loss": 0.3521, + "step": 13335 + }, + { + "epoch": 0.62, + "learning_rate": 1.8067186054896307e-05, + "loss": 0.3732, + "step": 13340 + }, + { + "epoch": 0.62, + "learning_rate": 1.806640226984152e-05, + "loss": 0.3423, + "step": 13345 + }, + { + "epoch": 0.62, + "learning_rate": 1.8065618484786732e-05, + "loss": 0.657, + "step": 13350 + }, + { + "epoch": 0.62, + "learning_rate": 1.806483469973195e-05, + "loss": 0.3007, + "step": 13355 + }, + { + "epoch": 0.62, + "learning_rate": 1.806405091467716e-05, + "loss": 0.0585, + "step": 13360 + }, + { + "epoch": 0.62, + "learning_rate": 1.8063267129622374e-05, + "loss": 0.1533, + "step": 13365 + }, + { + "epoch": 0.62, + "learning_rate": 1.8062483344567587e-05, + "loss": 0.2043, + "step": 13370 + }, + { + "epoch": 0.62, + "learning_rate": 1.80616995595128e-05, + "loss": 0.197, + "step": 13375 + }, + { + "epoch": 0.62, + "learning_rate": 1.8060915774458015e-05, + "loss": 0.1869, + "step": 13380 + }, + { + "epoch": 0.62, + "learning_rate": 1.8060131989403226e-05, + "loss": 0.1669, + "step": 13385 + }, + { + "epoch": 0.62, + "learning_rate": 1.805934820434844e-05, + "loss": 0.3543, + "step": 13390 + }, + { + "epoch": 0.63, + "learning_rate": 1.8058564419293654e-05, + "loss": 0.291, + "step": 13395 + }, + { + "epoch": 0.63, + "learning_rate": 1.8057780634238868e-05, + "loss": 0.4537, + "step": 13400 + }, + { + "epoch": 0.63, + "learning_rate": 1.805699684918408e-05, + "loss": 0.2838, + "step": 13405 + }, + { + "epoch": 0.63, + "learning_rate": 1.8056213064129295e-05, + "loss": 0.101, + "step": 13410 + }, + { + "epoch": 0.63, + "learning_rate": 1.805542927907451e-05, + "loss": 0.109, + "step": 13415 + }, + { + "epoch": 0.63, + "learning_rate": 1.8054645494019723e-05, + "loss": 0.13, + "step": 13420 + }, + { + "epoch": 0.63, + "learning_rate": 1.8053861708964934e-05, + "loss": 0.1621, + "step": 13425 + }, + { + "epoch": 0.63, + "learning_rate": 1.805307792391015e-05, + "loss": 0.2074, + "step": 13430 + }, + { + "epoch": 0.63, + "learning_rate": 1.805229413885536e-05, + "loss": 0.2683, + "step": 13435 + }, + { + "epoch": 0.63, + "learning_rate": 1.8051510353800575e-05, + "loss": 0.265, + "step": 13440 + }, + { + "epoch": 0.63, + "learning_rate": 1.805072656874579e-05, + "loss": 0.4351, + "step": 13445 + }, + { + "epoch": 0.63, + "learning_rate": 1.8049942783691e-05, + "loss": 0.7304, + "step": 13450 + }, + { + "epoch": 0.63, + "learning_rate": 1.8049158998636217e-05, + "loss": 0.1502, + "step": 13455 + }, + { + "epoch": 0.63, + "learning_rate": 1.8048375213581428e-05, + "loss": 0.0326, + "step": 13460 + }, + { + "epoch": 0.63, + "learning_rate": 1.804759142852664e-05, + "loss": 0.0823, + "step": 13465 + }, + { + "epoch": 0.63, + "learning_rate": 1.8046807643471855e-05, + "loss": 0.1804, + "step": 13470 + }, + { + "epoch": 0.63, + "learning_rate": 1.804602385841707e-05, + "loss": 0.1101, + "step": 13475 + }, + { + "epoch": 0.63, + "learning_rate": 1.8045240073362283e-05, + "loss": 0.2353, + "step": 13480 + }, + { + "epoch": 0.63, + "learning_rate": 1.8044456288307497e-05, + "loss": 0.2658, + "step": 13485 + }, + { + "epoch": 0.63, + "learning_rate": 1.804367250325271e-05, + "loss": 0.2739, + "step": 13490 + }, + { + "epoch": 0.63, + "learning_rate": 1.8042888718197925e-05, + "loss": 0.3481, + "step": 13495 + }, + { + "epoch": 0.63, + "learning_rate": 1.8042104933143135e-05, + "loss": 0.4943, + "step": 13500 + }, + { + "epoch": 0.63, + "learning_rate": 1.804132114808835e-05, + "loss": 0.2629, + "step": 13505 + }, + { + "epoch": 0.63, + "learning_rate": 1.8040537363033563e-05, + "loss": 0.0884, + "step": 13510 + }, + { + "epoch": 0.63, + "learning_rate": 1.8039753577978777e-05, + "loss": 0.0984, + "step": 13515 + }, + { + "epoch": 0.63, + "learning_rate": 1.803896979292399e-05, + "loss": 0.0861, + "step": 13520 + }, + { + "epoch": 0.63, + "learning_rate": 1.80381860078692e-05, + "loss": 0.1505, + "step": 13525 + }, + { + "epoch": 0.63, + "learning_rate": 1.803740222281442e-05, + "loss": 0.2409, + "step": 13530 + }, + { + "epoch": 0.63, + "learning_rate": 1.803661843775963e-05, + "loss": 0.2881, + "step": 13535 + }, + { + "epoch": 0.63, + "learning_rate": 1.8035834652704843e-05, + "loss": 0.2466, + "step": 13540 + }, + { + "epoch": 0.63, + "learning_rate": 1.8035050867650057e-05, + "loss": 0.3239, + "step": 13545 + }, + { + "epoch": 0.63, + "learning_rate": 1.803426708259527e-05, + "loss": 0.4168, + "step": 13550 + }, + { + "epoch": 0.63, + "learning_rate": 1.8033483297540485e-05, + "loss": 0.2401, + "step": 13555 + }, + { + "epoch": 0.63, + "learning_rate": 1.80326995124857e-05, + "loss": 0.1225, + "step": 13560 + }, + { + "epoch": 0.63, + "learning_rate": 1.803191572743091e-05, + "loss": 0.0936, + "step": 13565 + }, + { + "epoch": 0.63, + "learning_rate": 1.8031131942376123e-05, + "loss": 0.1213, + "step": 13570 + }, + { + "epoch": 0.63, + "learning_rate": 1.8030348157321337e-05, + "loss": 0.2741, + "step": 13575 + }, + { + "epoch": 0.63, + "learning_rate": 1.802956437226655e-05, + "loss": 0.2251, + "step": 13580 + }, + { + "epoch": 0.63, + "learning_rate": 1.8028780587211765e-05, + "loss": 0.2146, + "step": 13585 + }, + { + "epoch": 0.63, + "learning_rate": 1.802799680215698e-05, + "loss": 0.323, + "step": 13590 + }, + { + "epoch": 0.63, + "learning_rate": 1.8027213017102193e-05, + "loss": 0.2774, + "step": 13595 + }, + { + "epoch": 0.63, + "learning_rate": 1.8026429232047403e-05, + "loss": 0.4286, + "step": 13600 + }, + { + "epoch": 0.63, + "learning_rate": 1.8025645446992617e-05, + "loss": 0.2137, + "step": 13605 + }, + { + "epoch": 0.64, + "learning_rate": 1.802486166193783e-05, + "loss": 0.1228, + "step": 13610 + }, + { + "epoch": 0.64, + "learning_rate": 1.8024077876883045e-05, + "loss": 0.1267, + "step": 13615 + }, + { + "epoch": 0.64, + "learning_rate": 1.802329409182826e-05, + "loss": 0.1645, + "step": 13620 + }, + { + "epoch": 0.64, + "learning_rate": 1.8022510306773473e-05, + "loss": 0.2046, + "step": 13625 + }, + { + "epoch": 0.64, + "learning_rate": 1.8021726521718687e-05, + "loss": 0.2009, + "step": 13630 + }, + { + "epoch": 0.64, + "learning_rate": 1.8020942736663897e-05, + "loss": 0.2249, + "step": 13635 + }, + { + "epoch": 0.64, + "learning_rate": 1.802015895160911e-05, + "loss": 0.2867, + "step": 13640 + }, + { + "epoch": 0.64, + "learning_rate": 1.8019375166554325e-05, + "loss": 0.3413, + "step": 13645 + }, + { + "epoch": 0.64, + "learning_rate": 1.801859138149954e-05, + "loss": 0.7257, + "step": 13650 + }, + { + "epoch": 0.64, + "learning_rate": 1.8017807596444753e-05, + "loss": 0.1693, + "step": 13655 + }, + { + "epoch": 0.64, + "learning_rate": 1.8017023811389967e-05, + "loss": 0.0749, + "step": 13660 + }, + { + "epoch": 0.64, + "learning_rate": 1.8016240026335177e-05, + "loss": 0.1104, + "step": 13665 + }, + { + "epoch": 0.64, + "learning_rate": 1.8015456241280395e-05, + "loss": 0.1834, + "step": 13670 + }, + { + "epoch": 0.64, + "learning_rate": 1.8014672456225605e-05, + "loss": 0.1609, + "step": 13675 + }, + { + "epoch": 0.64, + "learning_rate": 1.801388867117082e-05, + "loss": 0.2899, + "step": 13680 + }, + { + "epoch": 0.64, + "learning_rate": 1.8013104886116033e-05, + "loss": 0.1833, + "step": 13685 + }, + { + "epoch": 0.64, + "learning_rate": 1.8012321101061247e-05, + "loss": 0.3229, + "step": 13690 + }, + { + "epoch": 0.64, + "learning_rate": 1.801153731600646e-05, + "loss": 0.34, + "step": 13695 + }, + { + "epoch": 0.64, + "learning_rate": 1.801075353095167e-05, + "loss": 0.7479, + "step": 13700 + }, + { + "epoch": 0.64, + "learning_rate": 1.8009969745896885e-05, + "loss": 0.2743, + "step": 13705 + }, + { + "epoch": 0.64, + "learning_rate": 1.80091859608421e-05, + "loss": 0.0687, + "step": 13710 + }, + { + "epoch": 0.64, + "learning_rate": 1.8008402175787313e-05, + "loss": 0.1244, + "step": 13715 + }, + { + "epoch": 0.64, + "learning_rate": 1.8007618390732527e-05, + "loss": 0.0964, + "step": 13720 + }, + { + "epoch": 0.64, + "learning_rate": 1.800683460567774e-05, + "loss": 0.2067, + "step": 13725 + }, + { + "epoch": 0.64, + "learning_rate": 1.8006050820622955e-05, + "loss": 0.2851, + "step": 13730 + }, + { + "epoch": 0.64, + "learning_rate": 1.800526703556817e-05, + "loss": 0.145, + "step": 13735 + }, + { + "epoch": 0.64, + "learning_rate": 1.800448325051338e-05, + "loss": 0.314, + "step": 13740 + }, + { + "epoch": 0.64, + "learning_rate": 1.8003699465458597e-05, + "loss": 0.3162, + "step": 13745 + }, + { + "epoch": 0.64, + "learning_rate": 1.8002915680403807e-05, + "loss": 0.6037, + "step": 13750 + }, + { + "epoch": 0.64, + "learning_rate": 1.800213189534902e-05, + "loss": 0.2893, + "step": 13755 + }, + { + "epoch": 0.64, + "learning_rate": 1.8001348110294235e-05, + "loss": 0.1066, + "step": 13760 + }, + { + "epoch": 0.64, + "learning_rate": 1.8000564325239445e-05, + "loss": 0.1369, + "step": 13765 + }, + { + "epoch": 0.64, + "learning_rate": 1.7999780540184663e-05, + "loss": 0.119, + "step": 13770 + }, + { + "epoch": 0.64, + "learning_rate": 1.7998996755129873e-05, + "loss": 0.1651, + "step": 13775 + }, + { + "epoch": 0.64, + "learning_rate": 1.7998212970075087e-05, + "loss": 0.1298, + "step": 13780 + }, + { + "epoch": 0.64, + "learning_rate": 1.79974291850203e-05, + "loss": 0.3191, + "step": 13785 + }, + { + "epoch": 0.64, + "learning_rate": 1.7996645399965515e-05, + "loss": 0.2348, + "step": 13790 + }, + { + "epoch": 0.64, + "learning_rate": 1.799586161491073e-05, + "loss": 0.2823, + "step": 13795 + }, + { + "epoch": 0.64, + "learning_rate": 1.7995077829855943e-05, + "loss": 0.7438, + "step": 13800 + }, + { + "epoch": 0.64, + "learning_rate": 1.7994294044801157e-05, + "loss": 0.1944, + "step": 13805 + }, + { + "epoch": 0.64, + "learning_rate": 1.799351025974637e-05, + "loss": 0.0748, + "step": 13810 + }, + { + "epoch": 0.64, + "learning_rate": 1.799272647469158e-05, + "loss": 0.1483, + "step": 13815 + }, + { + "epoch": 0.64, + "learning_rate": 1.7991942689636795e-05, + "loss": 0.1346, + "step": 13820 + }, + { + "epoch": 0.65, + "learning_rate": 1.799115890458201e-05, + "loss": 0.1945, + "step": 13825 + }, + { + "epoch": 0.65, + "learning_rate": 1.7990375119527223e-05, + "loss": 0.2159, + "step": 13830 + }, + { + "epoch": 0.65, + "learning_rate": 1.7989591334472437e-05, + "loss": 0.2135, + "step": 13835 + }, + { + "epoch": 0.65, + "learning_rate": 1.7988807549417647e-05, + "loss": 0.3681, + "step": 13840 + }, + { + "epoch": 0.65, + "learning_rate": 1.7988023764362864e-05, + "loss": 0.3707, + "step": 13845 + }, + { + "epoch": 0.65, + "learning_rate": 1.7987239979308075e-05, + "loss": 0.7777, + "step": 13850 + }, + { + "epoch": 0.65, + "learning_rate": 1.798645619425329e-05, + "loss": 0.322, + "step": 13855 + }, + { + "epoch": 0.65, + "learning_rate": 1.7985672409198503e-05, + "loss": 0.0827, + "step": 13860 + }, + { + "epoch": 0.65, + "learning_rate": 1.7984888624143717e-05, + "loss": 0.1055, + "step": 13865 + }, + { + "epoch": 0.65, + "learning_rate": 1.798410483908893e-05, + "loss": 0.1698, + "step": 13870 + }, + { + "epoch": 0.65, + "learning_rate": 1.7983321054034145e-05, + "loss": 0.2299, + "step": 13875 + }, + { + "epoch": 0.65, + "learning_rate": 1.7982537268979355e-05, + "loss": 0.1941, + "step": 13880 + }, + { + "epoch": 0.65, + "learning_rate": 1.7981753483924572e-05, + "loss": 0.2909, + "step": 13885 + }, + { + "epoch": 0.65, + "learning_rate": 1.7980969698869783e-05, + "loss": 0.3056, + "step": 13890 + }, + { + "epoch": 0.65, + "learning_rate": 1.7980185913814997e-05, + "loss": 0.3902, + "step": 13895 + }, + { + "epoch": 0.65, + "learning_rate": 1.797940212876021e-05, + "loss": 0.6766, + "step": 13900 + }, + { + "epoch": 0.65, + "learning_rate": 1.7978618343705425e-05, + "loss": 0.2698, + "step": 13905 + }, + { + "epoch": 0.65, + "learning_rate": 1.797783455865064e-05, + "loss": 0.1085, + "step": 13910 + }, + { + "epoch": 0.65, + "learning_rate": 1.797705077359585e-05, + "loss": 0.1044, + "step": 13915 + }, + { + "epoch": 0.65, + "learning_rate": 1.7976266988541063e-05, + "loss": 0.1361, + "step": 13920 + }, + { + "epoch": 0.65, + "learning_rate": 1.7975483203486277e-05, + "loss": 0.1876, + "step": 13925 + }, + { + "epoch": 0.65, + "learning_rate": 1.797469941843149e-05, + "loss": 0.1581, + "step": 13930 + }, + { + "epoch": 0.65, + "learning_rate": 1.7973915633376705e-05, + "loss": 0.2839, + "step": 13935 + }, + { + "epoch": 0.65, + "learning_rate": 1.797313184832192e-05, + "loss": 0.1772, + "step": 13940 + }, + { + "epoch": 0.65, + "learning_rate": 1.7972348063267132e-05, + "loss": 0.3157, + "step": 13945 + }, + { + "epoch": 0.65, + "learning_rate": 1.7971564278212346e-05, + "loss": 0.5271, + "step": 13950 + }, + { + "epoch": 0.65, + "learning_rate": 1.7970780493157557e-05, + "loss": 0.2653, + "step": 13955 + }, + { + "epoch": 0.65, + "learning_rate": 1.796999670810277e-05, + "loss": 0.0371, + "step": 13960 + }, + { + "epoch": 0.65, + "learning_rate": 1.7969212923047985e-05, + "loss": 0.077, + "step": 13965 + }, + { + "epoch": 0.65, + "learning_rate": 1.79684291379932e-05, + "loss": 0.1503, + "step": 13970 + }, + { + "epoch": 0.65, + "learning_rate": 1.7967645352938412e-05, + "loss": 0.1859, + "step": 13975 + }, + { + "epoch": 0.65, + "learning_rate": 1.7966861567883623e-05, + "loss": 0.1487, + "step": 13980 + }, + { + "epoch": 0.65, + "learning_rate": 1.796607778282884e-05, + "loss": 0.2365, + "step": 13985 + }, + { + "epoch": 0.65, + "learning_rate": 1.796529399777405e-05, + "loss": 0.2422, + "step": 13990 + }, + { + "epoch": 0.65, + "learning_rate": 1.7964510212719265e-05, + "loss": 0.3292, + "step": 13995 + }, + { + "epoch": 0.65, + "learning_rate": 1.796372642766448e-05, + "loss": 0.6147, + "step": 14000 + }, + { + "epoch": 0.65, + "learning_rate": 1.7962942642609693e-05, + "loss": 0.222, + "step": 14005 + }, + { + "epoch": 0.65, + "learning_rate": 1.7962158857554906e-05, + "loss": 0.1412, + "step": 14010 + }, + { + "epoch": 0.65, + "learning_rate": 1.796137507250012e-05, + "loss": 0.1588, + "step": 14015 + }, + { + "epoch": 0.65, + "learning_rate": 1.7960591287445334e-05, + "loss": 0.1609, + "step": 14020 + }, + { + "epoch": 0.65, + "learning_rate": 1.7959807502390545e-05, + "loss": 0.1297, + "step": 14025 + }, + { + "epoch": 0.65, + "learning_rate": 1.795902371733576e-05, + "loss": 0.1735, + "step": 14030 + }, + { + "epoch": 0.65, + "learning_rate": 1.7958239932280973e-05, + "loss": 0.1816, + "step": 14035 + }, + { + "epoch": 0.66, + "learning_rate": 1.7957456147226186e-05, + "loss": 0.2098, + "step": 14040 + }, + { + "epoch": 0.66, + "learning_rate": 1.79566723621714e-05, + "loss": 0.3348, + "step": 14045 + }, + { + "epoch": 0.66, + "learning_rate": 1.7955888577116614e-05, + "loss": 0.534, + "step": 14050 + }, + { + "epoch": 0.66, + "learning_rate": 1.7955104792061825e-05, + "loss": 0.2183, + "step": 14055 + }, + { + "epoch": 0.66, + "learning_rate": 1.7954321007007042e-05, + "loss": 0.0817, + "step": 14060 + }, + { + "epoch": 0.66, + "learning_rate": 1.7953537221952253e-05, + "loss": 0.0821, + "step": 14065 + }, + { + "epoch": 0.66, + "learning_rate": 1.7952753436897467e-05, + "loss": 0.1166, + "step": 14070 + }, + { + "epoch": 0.66, + "learning_rate": 1.795196965184268e-05, + "loss": 0.2538, + "step": 14075 + }, + { + "epoch": 0.66, + "learning_rate": 1.7951185866787894e-05, + "loss": 0.1809, + "step": 14080 + }, + { + "epoch": 0.66, + "learning_rate": 1.7950402081733108e-05, + "loss": 0.2724, + "step": 14085 + }, + { + "epoch": 0.66, + "learning_rate": 1.794961829667832e-05, + "loss": 0.2887, + "step": 14090 + }, + { + "epoch": 0.66, + "learning_rate": 1.7948834511623533e-05, + "loss": 0.4142, + "step": 14095 + }, + { + "epoch": 0.66, + "learning_rate": 1.7948050726568747e-05, + "loss": 0.6192, + "step": 14100 + }, + { + "epoch": 0.66, + "learning_rate": 1.794726694151396e-05, + "loss": 0.251, + "step": 14105 + }, + { + "epoch": 0.66, + "learning_rate": 1.7946483156459174e-05, + "loss": 0.0909, + "step": 14110 + }, + { + "epoch": 0.66, + "learning_rate": 1.7945699371404388e-05, + "loss": 0.1046, + "step": 14115 + }, + { + "epoch": 0.66, + "learning_rate": 1.7944915586349602e-05, + "loss": 0.0959, + "step": 14120 + }, + { + "epoch": 0.66, + "learning_rate": 1.7944131801294816e-05, + "loss": 0.1656, + "step": 14125 + }, + { + "epoch": 0.66, + "learning_rate": 1.7943348016240027e-05, + "loss": 0.2122, + "step": 14130 + }, + { + "epoch": 0.66, + "learning_rate": 1.794256423118524e-05, + "loss": 0.2179, + "step": 14135 + }, + { + "epoch": 0.66, + "learning_rate": 1.7941780446130454e-05, + "loss": 0.2127, + "step": 14140 + }, + { + "epoch": 0.66, + "learning_rate": 1.794099666107567e-05, + "loss": 0.3221, + "step": 14145 + }, + { + "epoch": 0.66, + "learning_rate": 1.7940212876020882e-05, + "loss": 0.6101, + "step": 14150 + }, + { + "epoch": 0.66, + "learning_rate": 1.7939429090966093e-05, + "loss": 0.294, + "step": 14155 + }, + { + "epoch": 0.66, + "learning_rate": 1.793864530591131e-05, + "loss": 0.1212, + "step": 14160 + }, + { + "epoch": 0.66, + "learning_rate": 1.793786152085652e-05, + "loss": 0.1184, + "step": 14165 + }, + { + "epoch": 0.66, + "learning_rate": 1.7937077735801734e-05, + "loss": 0.1062, + "step": 14170 + }, + { + "epoch": 0.66, + "learning_rate": 1.793629395074695e-05, + "loss": 0.1962, + "step": 14175 + }, + { + "epoch": 0.66, + "learning_rate": 1.7935510165692162e-05, + "loss": 0.1837, + "step": 14180 + }, + { + "epoch": 0.66, + "learning_rate": 1.7934726380637376e-05, + "loss": 0.2212, + "step": 14185 + }, + { + "epoch": 0.66, + "learning_rate": 1.793394259558259e-05, + "loss": 0.3377, + "step": 14190 + }, + { + "epoch": 0.66, + "learning_rate": 1.79331588105278e-05, + "loss": 0.2935, + "step": 14195 + }, + { + "epoch": 0.66, + "learning_rate": 1.7932375025473018e-05, + "loss": 0.4795, + "step": 14200 + }, + { + "epoch": 0.66, + "learning_rate": 1.793159124041823e-05, + "loss": 0.2201, + "step": 14205 + }, + { + "epoch": 0.66, + "learning_rate": 1.7930807455363442e-05, + "loss": 0.134, + "step": 14210 + }, + { + "epoch": 0.66, + "learning_rate": 1.7930023670308656e-05, + "loss": 0.1086, + "step": 14215 + }, + { + "epoch": 0.66, + "learning_rate": 1.792923988525387e-05, + "loss": 0.1584, + "step": 14220 + }, + { + "epoch": 0.66, + "learning_rate": 1.7928456100199084e-05, + "loss": 0.1764, + "step": 14225 + }, + { + "epoch": 0.66, + "learning_rate": 1.7927672315144295e-05, + "loss": 0.1776, + "step": 14230 + }, + { + "epoch": 0.66, + "learning_rate": 1.792688853008951e-05, + "loss": 0.1828, + "step": 14235 + }, + { + "epoch": 0.66, + "learning_rate": 1.7926104745034722e-05, + "loss": 0.2791, + "step": 14240 + }, + { + "epoch": 0.66, + "learning_rate": 1.7925320959979936e-05, + "loss": 0.3181, + "step": 14245 + }, + { + "epoch": 0.66, + "learning_rate": 1.792453717492515e-05, + "loss": 0.8255, + "step": 14250 + }, + { + "epoch": 0.67, + "learning_rate": 1.7923753389870364e-05, + "loss": 0.321, + "step": 14255 + }, + { + "epoch": 0.67, + "learning_rate": 1.7922969604815578e-05, + "loss": 0.0898, + "step": 14260 + }, + { + "epoch": 0.67, + "learning_rate": 1.7922185819760792e-05, + "loss": 0.1708, + "step": 14265 + }, + { + "epoch": 0.67, + "learning_rate": 1.7921402034706002e-05, + "loss": 0.1316, + "step": 14270 + }, + { + "epoch": 0.67, + "learning_rate": 1.792061824965122e-05, + "loss": 0.1778, + "step": 14275 + }, + { + "epoch": 0.67, + "learning_rate": 1.791983446459643e-05, + "loss": 0.168, + "step": 14280 + }, + { + "epoch": 0.67, + "learning_rate": 1.7919050679541644e-05, + "loss": 0.291, + "step": 14285 + }, + { + "epoch": 0.67, + "learning_rate": 1.7918266894486858e-05, + "loss": 0.2596, + "step": 14290 + }, + { + "epoch": 0.67, + "learning_rate": 1.791748310943207e-05, + "loss": 0.2771, + "step": 14295 + }, + { + "epoch": 0.67, + "learning_rate": 1.7916699324377286e-05, + "loss": 0.6026, + "step": 14300 + }, + { + "epoch": 0.67, + "learning_rate": 1.7915915539322496e-05, + "loss": 0.2228, + "step": 14305 + }, + { + "epoch": 0.67, + "learning_rate": 1.791513175426771e-05, + "loss": 0.0702, + "step": 14310 + }, + { + "epoch": 0.67, + "learning_rate": 1.7914347969212924e-05, + "loss": 0.164, + "step": 14315 + }, + { + "epoch": 0.67, + "learning_rate": 1.7913564184158138e-05, + "loss": 0.1234, + "step": 14320 + }, + { + "epoch": 0.67, + "learning_rate": 1.7912780399103352e-05, + "loss": 0.1424, + "step": 14325 + }, + { + "epoch": 0.67, + "learning_rate": 1.7911996614048566e-05, + "loss": 0.1952, + "step": 14330 + }, + { + "epoch": 0.67, + "learning_rate": 1.791121282899378e-05, + "loss": 0.233, + "step": 14335 + }, + { + "epoch": 0.67, + "learning_rate": 1.7910429043938994e-05, + "loss": 0.2717, + "step": 14340 + }, + { + "epoch": 0.67, + "learning_rate": 1.7909645258884204e-05, + "loss": 0.2716, + "step": 14345 + }, + { + "epoch": 0.67, + "learning_rate": 1.7908861473829418e-05, + "loss": 0.5792, + "step": 14350 + }, + { + "epoch": 0.67, + "learning_rate": 1.7908077688774632e-05, + "loss": 0.1712, + "step": 14355 + }, + { + "epoch": 0.67, + "learning_rate": 1.7907293903719846e-05, + "loss": 0.0798, + "step": 14360 + }, + { + "epoch": 0.67, + "learning_rate": 1.790651011866506e-05, + "loss": 0.137, + "step": 14365 + }, + { + "epoch": 0.67, + "learning_rate": 1.790572633361027e-05, + "loss": 0.1375, + "step": 14370 + }, + { + "epoch": 0.67, + "learning_rate": 1.7904942548555488e-05, + "loss": 0.2504, + "step": 14375 + }, + { + "epoch": 0.67, + "learning_rate": 1.7904158763500698e-05, + "loss": 0.2304, + "step": 14380 + }, + { + "epoch": 0.67, + "learning_rate": 1.7903374978445912e-05, + "loss": 0.2548, + "step": 14385 + }, + { + "epoch": 0.67, + "learning_rate": 1.7902591193391126e-05, + "loss": 0.3049, + "step": 14390 + }, + { + "epoch": 0.67, + "learning_rate": 1.790180740833634e-05, + "loss": 0.4073, + "step": 14395 + }, + { + "epoch": 0.67, + "learning_rate": 1.7901023623281554e-05, + "loss": 0.5195, + "step": 14400 + }, + { + "epoch": 0.67, + "learning_rate": 1.7900239838226768e-05, + "loss": 0.267, + "step": 14405 + }, + { + "epoch": 0.67, + "learning_rate": 1.7899456053171978e-05, + "loss": 0.0918, + "step": 14410 + }, + { + "epoch": 0.67, + "learning_rate": 1.7898672268117192e-05, + "loss": 0.1025, + "step": 14415 + }, + { + "epoch": 0.67, + "learning_rate": 1.7897888483062406e-05, + "loss": 0.1415, + "step": 14420 + }, + { + "epoch": 0.67, + "learning_rate": 1.789710469800762e-05, + "loss": 0.1369, + "step": 14425 + }, + { + "epoch": 0.67, + "learning_rate": 1.7896320912952834e-05, + "loss": 0.2144, + "step": 14430 + }, + { + "epoch": 0.67, + "learning_rate": 1.7895537127898048e-05, + "loss": 0.1942, + "step": 14435 + }, + { + "epoch": 0.67, + "learning_rate": 1.789475334284326e-05, + "loss": 0.2313, + "step": 14440 + }, + { + "epoch": 0.67, + "learning_rate": 1.7893969557788472e-05, + "loss": 0.4069, + "step": 14445 + }, + { + "epoch": 0.67, + "learning_rate": 1.7893185772733686e-05, + "loss": 0.6075, + "step": 14450 + }, + { + "epoch": 0.67, + "learning_rate": 1.78924019876789e-05, + "loss": 0.2016, + "step": 14455 + }, + { + "epoch": 0.67, + "learning_rate": 1.7891618202624114e-05, + "loss": 0.0962, + "step": 14460 + }, + { + "epoch": 0.67, + "learning_rate": 1.7890834417569328e-05, + "loss": 0.1401, + "step": 14465 + }, + { + "epoch": 0.68, + "learning_rate": 1.7890050632514542e-05, + "loss": 0.1746, + "step": 14470 + }, + { + "epoch": 0.68, + "learning_rate": 1.7889266847459756e-05, + "loss": 0.2198, + "step": 14475 + }, + { + "epoch": 0.68, + "learning_rate": 1.7888483062404966e-05, + "loss": 0.1788, + "step": 14480 + }, + { + "epoch": 0.68, + "learning_rate": 1.788769927735018e-05, + "loss": 0.3015, + "step": 14485 + }, + { + "epoch": 0.68, + "learning_rate": 1.7886915492295394e-05, + "loss": 0.336, + "step": 14490 + }, + { + "epoch": 0.68, + "learning_rate": 1.7886131707240608e-05, + "loss": 0.399, + "step": 14495 + }, + { + "epoch": 0.68, + "learning_rate": 1.7885347922185822e-05, + "loss": 0.5129, + "step": 14500 + }, + { + "epoch": 0.68, + "learning_rate": 1.7884564137131036e-05, + "loss": 0.2472, + "step": 14505 + }, + { + "epoch": 0.68, + "learning_rate": 1.7883780352076246e-05, + "loss": 0.0997, + "step": 14510 + }, + { + "epoch": 0.68, + "learning_rate": 1.7882996567021463e-05, + "loss": 0.151, + "step": 14515 + }, + { + "epoch": 0.68, + "learning_rate": 1.7882212781966674e-05, + "loss": 0.1149, + "step": 14520 + }, + { + "epoch": 0.68, + "learning_rate": 1.7881428996911888e-05, + "loss": 0.1079, + "step": 14525 + }, + { + "epoch": 0.68, + "learning_rate": 1.7880645211857102e-05, + "loss": 0.1688, + "step": 14530 + }, + { + "epoch": 0.68, + "learning_rate": 1.7879861426802316e-05, + "loss": 0.2422, + "step": 14535 + }, + { + "epoch": 0.68, + "learning_rate": 1.787907764174753e-05, + "loss": 0.256, + "step": 14540 + }, + { + "epoch": 0.68, + "learning_rate": 1.787829385669274e-05, + "loss": 0.2604, + "step": 14545 + }, + { + "epoch": 0.68, + "learning_rate": 1.7877510071637954e-05, + "loss": 0.4361, + "step": 14550 + }, + { + "epoch": 0.68, + "learning_rate": 1.7876726286583168e-05, + "loss": 0.3397, + "step": 14555 + }, + { + "epoch": 0.68, + "learning_rate": 1.7875942501528382e-05, + "loss": 0.0841, + "step": 14560 + }, + { + "epoch": 0.68, + "learning_rate": 1.7875158716473596e-05, + "loss": 0.0836, + "step": 14565 + }, + { + "epoch": 0.68, + "learning_rate": 1.787437493141881e-05, + "loss": 0.1452, + "step": 14570 + }, + { + "epoch": 0.68, + "learning_rate": 1.7873591146364024e-05, + "loss": 0.1594, + "step": 14575 + }, + { + "epoch": 0.68, + "learning_rate": 1.7872807361309237e-05, + "loss": 0.2242, + "step": 14580 + }, + { + "epoch": 0.68, + "learning_rate": 1.7872023576254448e-05, + "loss": 0.2338, + "step": 14585 + }, + { + "epoch": 0.68, + "learning_rate": 1.7871239791199665e-05, + "loss": 0.218, + "step": 14590 + }, + { + "epoch": 0.68, + "learning_rate": 1.7870456006144876e-05, + "loss": 0.4391, + "step": 14595 + }, + { + "epoch": 0.68, + "learning_rate": 1.786967222109009e-05, + "loss": 0.5228, + "step": 14600 + }, + { + "epoch": 0.68, + "learning_rate": 1.7868888436035304e-05, + "loss": 0.2472, + "step": 14605 + }, + { + "epoch": 0.68, + "learning_rate": 1.7868104650980514e-05, + "loss": 0.0867, + "step": 14610 + }, + { + "epoch": 0.68, + "learning_rate": 1.786732086592573e-05, + "loss": 0.1296, + "step": 14615 + }, + { + "epoch": 0.68, + "learning_rate": 1.7866537080870942e-05, + "loss": 0.1691, + "step": 14620 + }, + { + "epoch": 0.68, + "learning_rate": 1.7865753295816156e-05, + "loss": 0.2125, + "step": 14625 + }, + { + "epoch": 0.68, + "learning_rate": 1.786496951076137e-05, + "loss": 0.2022, + "step": 14630 + }, + { + "epoch": 0.68, + "learning_rate": 1.7864185725706584e-05, + "loss": 0.1817, + "step": 14635 + }, + { + "epoch": 0.68, + "learning_rate": 1.7863401940651798e-05, + "loss": 0.3351, + "step": 14640 + }, + { + "epoch": 0.68, + "learning_rate": 1.786261815559701e-05, + "loss": 0.3566, + "step": 14645 + }, + { + "epoch": 0.68, + "learning_rate": 1.7861834370542225e-05, + "loss": 0.6346, + "step": 14650 + }, + { + "epoch": 0.68, + "learning_rate": 1.786105058548744e-05, + "loss": 0.2488, + "step": 14655 + }, + { + "epoch": 0.68, + "learning_rate": 1.786026680043265e-05, + "loss": 0.0688, + "step": 14660 + }, + { + "epoch": 0.68, + "learning_rate": 1.7859483015377864e-05, + "loss": 0.1261, + "step": 14665 + }, + { + "epoch": 0.68, + "learning_rate": 1.7858699230323078e-05, + "loss": 0.1552, + "step": 14670 + }, + { + "epoch": 0.68, + "learning_rate": 1.785791544526829e-05, + "loss": 0.1928, + "step": 14675 + }, + { + "epoch": 0.68, + "learning_rate": 1.7857131660213505e-05, + "loss": 0.1883, + "step": 14680 + }, + { + "epoch": 0.69, + "learning_rate": 1.7856347875158716e-05, + "loss": 0.2545, + "step": 14685 + }, + { + "epoch": 0.69, + "learning_rate": 1.7855564090103933e-05, + "loss": 0.3732, + "step": 14690 + }, + { + "epoch": 0.69, + "learning_rate": 1.7854780305049144e-05, + "loss": 0.3061, + "step": 14695 + }, + { + "epoch": 0.69, + "learning_rate": 1.7853996519994358e-05, + "loss": 0.5844, + "step": 14700 + }, + { + "epoch": 0.69, + "learning_rate": 1.785321273493957e-05, + "loss": 0.2783, + "step": 14705 + }, + { + "epoch": 0.69, + "learning_rate": 1.7852428949884785e-05, + "loss": 0.1046, + "step": 14710 + }, + { + "epoch": 0.69, + "learning_rate": 1.785164516483e-05, + "loss": 0.1335, + "step": 14715 + }, + { + "epoch": 0.69, + "learning_rate": 1.7850861379775213e-05, + "loss": 0.1018, + "step": 14720 + }, + { + "epoch": 0.69, + "learning_rate": 1.7850077594720424e-05, + "loss": 0.1993, + "step": 14725 + }, + { + "epoch": 0.69, + "learning_rate": 1.784929380966564e-05, + "loss": 0.1792, + "step": 14730 + }, + { + "epoch": 0.69, + "learning_rate": 1.784851002461085e-05, + "loss": 0.2488, + "step": 14735 + }, + { + "epoch": 0.69, + "learning_rate": 1.7847726239556066e-05, + "loss": 0.247, + "step": 14740 + }, + { + "epoch": 0.69, + "learning_rate": 1.784694245450128e-05, + "loss": 0.4897, + "step": 14745 + }, + { + "epoch": 0.69, + "learning_rate": 1.7846158669446493e-05, + "loss": 0.4311, + "step": 14750 + }, + { + "epoch": 0.69, + "learning_rate": 1.7845374884391707e-05, + "loss": 0.2626, + "step": 14755 + }, + { + "epoch": 0.69, + "learning_rate": 1.7844591099336918e-05, + "loss": 0.069, + "step": 14760 + }, + { + "epoch": 0.69, + "learning_rate": 1.784380731428213e-05, + "loss": 0.1219, + "step": 14765 + }, + { + "epoch": 0.69, + "learning_rate": 1.7843023529227346e-05, + "loss": 0.1279, + "step": 14770 + }, + { + "epoch": 0.69, + "learning_rate": 1.784223974417256e-05, + "loss": 0.1779, + "step": 14775 + }, + { + "epoch": 0.69, + "learning_rate": 1.7841455959117773e-05, + "loss": 0.2421, + "step": 14780 + }, + { + "epoch": 0.69, + "learning_rate": 1.7840672174062987e-05, + "loss": 0.2912, + "step": 14785 + }, + { + "epoch": 0.69, + "learning_rate": 1.78398883890082e-05, + "loss": 0.2462, + "step": 14790 + }, + { + "epoch": 0.69, + "learning_rate": 1.7839104603953415e-05, + "loss": 0.3095, + "step": 14795 + }, + { + "epoch": 0.69, + "learning_rate": 1.7838320818898626e-05, + "loss": 0.5914, + "step": 14800 + }, + { + "epoch": 0.69, + "learning_rate": 1.783753703384384e-05, + "loss": 0.2444, + "step": 14805 + }, + { + "epoch": 0.69, + "learning_rate": 1.7836753248789053e-05, + "loss": 0.0887, + "step": 14810 + }, + { + "epoch": 0.69, + "learning_rate": 1.7835969463734267e-05, + "loss": 0.1362, + "step": 14815 + }, + { + "epoch": 0.69, + "learning_rate": 1.783518567867948e-05, + "loss": 0.0878, + "step": 14820 + }, + { + "epoch": 0.69, + "learning_rate": 1.7834401893624692e-05, + "loss": 0.1273, + "step": 14825 + }, + { + "epoch": 0.69, + "learning_rate": 1.783361810856991e-05, + "loss": 0.2425, + "step": 14830 + }, + { + "epoch": 0.69, + "learning_rate": 1.783283432351512e-05, + "loss": 0.275, + "step": 14835 + }, + { + "epoch": 0.69, + "learning_rate": 1.7832050538460333e-05, + "loss": 0.2317, + "step": 14840 + }, + { + "epoch": 0.69, + "learning_rate": 1.7831266753405547e-05, + "loss": 0.2663, + "step": 14845 + }, + { + "epoch": 0.69, + "learning_rate": 1.783048296835076e-05, + "loss": 0.2922, + "step": 14850 + }, + { + "epoch": 0.69, + "learning_rate": 1.7829699183295975e-05, + "loss": 0.2981, + "step": 14855 + }, + { + "epoch": 0.69, + "learning_rate": 1.782891539824119e-05, + "loss": 0.0931, + "step": 14860 + }, + { + "epoch": 0.69, + "learning_rate": 1.78281316131864e-05, + "loss": 0.0793, + "step": 14865 + }, + { + "epoch": 0.69, + "learning_rate": 1.7827347828131614e-05, + "loss": 0.1572, + "step": 14870 + }, + { + "epoch": 0.69, + "learning_rate": 1.7826564043076827e-05, + "loss": 0.2228, + "step": 14875 + }, + { + "epoch": 0.69, + "learning_rate": 1.782578025802204e-05, + "loss": 0.1472, + "step": 14880 + }, + { + "epoch": 0.69, + "learning_rate": 1.7824996472967255e-05, + "loss": 0.2983, + "step": 14885 + }, + { + "epoch": 0.69, + "learning_rate": 1.782421268791247e-05, + "loss": 0.3364, + "step": 14890 + }, + { + "epoch": 0.7, + "learning_rate": 1.7823428902857683e-05, + "loss": 0.4755, + "step": 14895 + }, + { + "epoch": 0.7, + "learning_rate": 1.7822645117802894e-05, + "loss": 0.5627, + "step": 14900 + }, + { + "epoch": 0.7, + "learning_rate": 1.782186133274811e-05, + "loss": 0.2746, + "step": 14905 + }, + { + "epoch": 0.7, + "learning_rate": 1.782107754769332e-05, + "loss": 0.0909, + "step": 14910 + }, + { + "epoch": 0.7, + "learning_rate": 1.7820293762638535e-05, + "loss": 0.1021, + "step": 14915 + }, + { + "epoch": 0.7, + "learning_rate": 1.781950997758375e-05, + "loss": 0.1913, + "step": 14920 + }, + { + "epoch": 0.7, + "learning_rate": 1.7818726192528963e-05, + "loss": 0.1024, + "step": 14925 + }, + { + "epoch": 0.7, + "learning_rate": 1.7817942407474177e-05, + "loss": 0.1359, + "step": 14930 + }, + { + "epoch": 0.7, + "learning_rate": 1.7817158622419388e-05, + "loss": 0.2782, + "step": 14935 + }, + { + "epoch": 0.7, + "learning_rate": 1.78163748373646e-05, + "loss": 0.3662, + "step": 14940 + }, + { + "epoch": 0.7, + "learning_rate": 1.7815591052309815e-05, + "loss": 0.3173, + "step": 14945 + }, + { + "epoch": 0.7, + "learning_rate": 1.781480726725503e-05, + "loss": 0.5629, + "step": 14950 + }, + { + "epoch": 0.7, + "learning_rate": 1.7814023482200243e-05, + "loss": 0.3126, + "step": 14955 + }, + { + "epoch": 0.7, + "learning_rate": 1.7813239697145457e-05, + "loss": 0.1257, + "step": 14960 + }, + { + "epoch": 0.7, + "learning_rate": 1.781245591209067e-05, + "loss": 0.102, + "step": 14965 + }, + { + "epoch": 0.7, + "learning_rate": 1.7811672127035885e-05, + "loss": 0.0812, + "step": 14970 + }, + { + "epoch": 0.7, + "learning_rate": 1.7810888341981095e-05, + "loss": 0.1951, + "step": 14975 + }, + { + "epoch": 0.7, + "learning_rate": 1.781010455692631e-05, + "loss": 0.2276, + "step": 14980 + }, + { + "epoch": 0.7, + "learning_rate": 1.7809320771871523e-05, + "loss": 0.2704, + "step": 14985 + }, + { + "epoch": 0.7, + "learning_rate": 1.7808536986816737e-05, + "loss": 0.2971, + "step": 14990 + }, + { + "epoch": 0.7, + "learning_rate": 1.780775320176195e-05, + "loss": 0.2102, + "step": 14995 + }, + { + "epoch": 0.7, + "learning_rate": 1.780696941670716e-05, + "loss": 0.4946, + "step": 15000 + }, + { + "epoch": 0.7, + "learning_rate": 1.780618563165238e-05, + "loss": 0.2092, + "step": 15005 + }, + { + "epoch": 0.7, + "learning_rate": 1.780540184659759e-05, + "loss": 0.0679, + "step": 15010 + }, + { + "epoch": 0.7, + "learning_rate": 1.7804618061542803e-05, + "loss": 0.1363, + "step": 15015 + }, + { + "epoch": 0.7, + "learning_rate": 1.7803834276488017e-05, + "loss": 0.1115, + "step": 15020 + }, + { + "epoch": 0.7, + "learning_rate": 1.780305049143323e-05, + "loss": 0.1363, + "step": 15025 + }, + { + "epoch": 0.7, + "learning_rate": 1.7802266706378445e-05, + "loss": 0.213, + "step": 15030 + }, + { + "epoch": 0.7, + "learning_rate": 1.780148292132366e-05, + "loss": 0.2516, + "step": 15035 + }, + { + "epoch": 0.7, + "learning_rate": 1.780069913626887e-05, + "loss": 0.3133, + "step": 15040 + }, + { + "epoch": 0.7, + "learning_rate": 1.7799915351214087e-05, + "loss": 0.3108, + "step": 15045 + }, + { + "epoch": 0.7, + "learning_rate": 1.7799131566159297e-05, + "loss": 0.4203, + "step": 15050 + }, + { + "epoch": 0.7, + "learning_rate": 1.779834778110451e-05, + "loss": 0.2288, + "step": 15055 + }, + { + "epoch": 0.7, + "learning_rate": 1.7797563996049725e-05, + "loss": 0.0967, + "step": 15060 + }, + { + "epoch": 0.7, + "learning_rate": 1.779678021099494e-05, + "loss": 0.1433, + "step": 15065 + }, + { + "epoch": 0.7, + "learning_rate": 1.7795996425940153e-05, + "loss": 0.1605, + "step": 15070 + }, + { + "epoch": 0.7, + "learning_rate": 1.7795212640885363e-05, + "loss": 0.1502, + "step": 15075 + }, + { + "epoch": 0.7, + "learning_rate": 1.7794428855830577e-05, + "loss": 0.2371, + "step": 15080 + }, + { + "epoch": 0.7, + "learning_rate": 1.779364507077579e-05, + "loss": 0.2546, + "step": 15085 + }, + { + "epoch": 0.7, + "learning_rate": 1.7792861285721005e-05, + "loss": 0.2453, + "step": 15090 + }, + { + "epoch": 0.7, + "learning_rate": 1.779207750066622e-05, + "loss": 0.3223, + "step": 15095 + }, + { + "epoch": 0.7, + "learning_rate": 1.7791293715611433e-05, + "loss": 0.5728, + "step": 15100 + }, + { + "epoch": 0.7, + "learning_rate": 1.7790509930556647e-05, + "loss": 0.1818, + "step": 15105 + }, + { + "epoch": 0.71, + "learning_rate": 1.778972614550186e-05, + "loss": 0.0977, + "step": 15110 + }, + { + "epoch": 0.71, + "learning_rate": 1.778894236044707e-05, + "loss": 0.0862, + "step": 15115 + }, + { + "epoch": 0.71, + "learning_rate": 1.778815857539229e-05, + "loss": 0.1162, + "step": 15120 + }, + { + "epoch": 0.71, + "learning_rate": 1.77873747903375e-05, + "loss": 0.1285, + "step": 15125 + }, + { + "epoch": 0.71, + "learning_rate": 1.7786591005282713e-05, + "loss": 0.1789, + "step": 15130 + }, + { + "epoch": 0.71, + "learning_rate": 1.7785807220227927e-05, + "loss": 0.1586, + "step": 15135 + }, + { + "epoch": 0.71, + "learning_rate": 1.7785023435173137e-05, + "loss": 0.2264, + "step": 15140 + }, + { + "epoch": 0.71, + "learning_rate": 1.7784239650118355e-05, + "loss": 0.3286, + "step": 15145 + }, + { + "epoch": 0.71, + "learning_rate": 1.7783455865063565e-05, + "loss": 0.5867, + "step": 15150 + }, + { + "epoch": 0.71, + "learning_rate": 1.778267208000878e-05, + "loss": 0.2071, + "step": 15155 + }, + { + "epoch": 0.71, + "learning_rate": 1.7781888294953993e-05, + "loss": 0.1373, + "step": 15160 + }, + { + "epoch": 0.71, + "learning_rate": 1.7781104509899207e-05, + "loss": 0.1818, + "step": 15165 + }, + { + "epoch": 0.71, + "learning_rate": 1.778032072484442e-05, + "loss": 0.1301, + "step": 15170 + }, + { + "epoch": 0.71, + "learning_rate": 1.7779536939789635e-05, + "loss": 0.161, + "step": 15175 + }, + { + "epoch": 0.71, + "learning_rate": 1.777875315473485e-05, + "loss": 0.3043, + "step": 15180 + }, + { + "epoch": 0.71, + "learning_rate": 1.7777969369680062e-05, + "loss": 0.3192, + "step": 15185 + }, + { + "epoch": 0.71, + "learning_rate": 1.7777185584625273e-05, + "loss": 0.263, + "step": 15190 + }, + { + "epoch": 0.71, + "learning_rate": 1.7776401799570487e-05, + "loss": 0.2928, + "step": 15195 + }, + { + "epoch": 0.71, + "learning_rate": 1.77756180145157e-05, + "loss": 0.866, + "step": 15200 + }, + { + "epoch": 0.71, + "learning_rate": 1.7774834229460915e-05, + "loss": 0.1849, + "step": 15205 + }, + { + "epoch": 0.71, + "learning_rate": 1.777405044440613e-05, + "loss": 0.0998, + "step": 15210 + }, + { + "epoch": 0.71, + "learning_rate": 1.777326665935134e-05, + "loss": 0.1643, + "step": 15215 + }, + { + "epoch": 0.71, + "learning_rate": 1.7772482874296556e-05, + "loss": 0.2084, + "step": 15220 + }, + { + "epoch": 0.71, + "learning_rate": 1.7771699089241767e-05, + "loss": 0.1575, + "step": 15225 + }, + { + "epoch": 0.71, + "learning_rate": 1.777091530418698e-05, + "loss": 0.132, + "step": 15230 + }, + { + "epoch": 0.71, + "learning_rate": 1.7770131519132195e-05, + "loss": 0.2206, + "step": 15235 + }, + { + "epoch": 0.71, + "learning_rate": 1.776934773407741e-05, + "loss": 0.2715, + "step": 15240 + }, + { + "epoch": 0.71, + "learning_rate": 1.7768563949022623e-05, + "loss": 0.3409, + "step": 15245 + }, + { + "epoch": 0.71, + "learning_rate": 1.7767780163967836e-05, + "loss": 0.5405, + "step": 15250 + }, + { + "epoch": 0.71, + "learning_rate": 1.7766996378913047e-05, + "loss": 0.3159, + "step": 15255 + }, + { + "epoch": 0.71, + "learning_rate": 1.776621259385826e-05, + "loss": 0.1621, + "step": 15260 + }, + { + "epoch": 0.71, + "learning_rate": 1.7765428808803475e-05, + "loss": 0.058, + "step": 15265 + }, + { + "epoch": 0.71, + "learning_rate": 1.776464502374869e-05, + "loss": 0.1118, + "step": 15270 + }, + { + "epoch": 0.71, + "learning_rate": 1.7763861238693903e-05, + "loss": 0.0801, + "step": 15275 + }, + { + "epoch": 0.71, + "learning_rate": 1.7763077453639117e-05, + "loss": 0.1691, + "step": 15280 + }, + { + "epoch": 0.71, + "learning_rate": 1.776229366858433e-05, + "loss": 0.1647, + "step": 15285 + }, + { + "epoch": 0.71, + "learning_rate": 1.776150988352954e-05, + "loss": 0.3036, + "step": 15290 + }, + { + "epoch": 0.71, + "learning_rate": 1.7760726098474755e-05, + "loss": 0.3638, + "step": 15295 + }, + { + "epoch": 0.71, + "learning_rate": 1.775994231341997e-05, + "loss": 0.6663, + "step": 15300 + }, + { + "epoch": 0.71, + "learning_rate": 1.7759158528365183e-05, + "loss": 0.2445, + "step": 15305 + }, + { + "epoch": 0.71, + "learning_rate": 1.7758374743310397e-05, + "loss": 0.0941, + "step": 15310 + }, + { + "epoch": 0.71, + "learning_rate": 1.775759095825561e-05, + "loss": 0.1141, + "step": 15315 + }, + { + "epoch": 0.71, + "learning_rate": 1.7756807173200824e-05, + "loss": 0.1723, + "step": 15320 + }, + { + "epoch": 0.72, + "learning_rate": 1.7756023388146035e-05, + "loss": 0.1063, + "step": 15325 + }, + { + "epoch": 0.72, + "learning_rate": 1.775523960309125e-05, + "loss": 0.2201, + "step": 15330 + }, + { + "epoch": 0.72, + "learning_rate": 1.7754455818036463e-05, + "loss": 0.2288, + "step": 15335 + }, + { + "epoch": 0.72, + "learning_rate": 1.7753672032981677e-05, + "loss": 0.2141, + "step": 15340 + }, + { + "epoch": 0.72, + "learning_rate": 1.775288824792689e-05, + "loss": 0.2616, + "step": 15345 + }, + { + "epoch": 0.72, + "learning_rate": 1.7752104462872104e-05, + "loss": 0.5344, + "step": 15350 + }, + { + "epoch": 0.72, + "learning_rate": 1.7751320677817315e-05, + "loss": 0.2139, + "step": 15355 + }, + { + "epoch": 0.72, + "learning_rate": 1.7750536892762532e-05, + "loss": 0.066, + "step": 15360 + }, + { + "epoch": 0.72, + "learning_rate": 1.7749753107707743e-05, + "loss": 0.1592, + "step": 15365 + }, + { + "epoch": 0.72, + "learning_rate": 1.7748969322652957e-05, + "loss": 0.1448, + "step": 15370 + }, + { + "epoch": 0.72, + "learning_rate": 1.774818553759817e-05, + "loss": 0.171, + "step": 15375 + }, + { + "epoch": 0.72, + "learning_rate": 1.7747401752543384e-05, + "loss": 0.181, + "step": 15380 + }, + { + "epoch": 0.72, + "learning_rate": 1.77466179674886e-05, + "loss": 0.2425, + "step": 15385 + }, + { + "epoch": 0.72, + "learning_rate": 1.774583418243381e-05, + "loss": 0.2223, + "step": 15390 + }, + { + "epoch": 0.72, + "learning_rate": 1.7745050397379023e-05, + "loss": 0.3635, + "step": 15395 + }, + { + "epoch": 0.72, + "learning_rate": 1.7744266612324237e-05, + "loss": 0.542, + "step": 15400 + }, + { + "epoch": 0.72, + "learning_rate": 1.774348282726945e-05, + "loss": 0.2035, + "step": 15405 + }, + { + "epoch": 0.72, + "learning_rate": 1.7742699042214665e-05, + "loss": 0.096, + "step": 15410 + }, + { + "epoch": 0.72, + "learning_rate": 1.774191525715988e-05, + "loss": 0.1612, + "step": 15415 + }, + { + "epoch": 0.72, + "learning_rate": 1.7741131472105092e-05, + "loss": 0.114, + "step": 15420 + }, + { + "epoch": 0.72, + "learning_rate": 1.7740347687050306e-05, + "loss": 0.0943, + "step": 15425 + }, + { + "epoch": 0.72, + "learning_rate": 1.7739563901995517e-05, + "loss": 0.1448, + "step": 15430 + }, + { + "epoch": 0.72, + "learning_rate": 1.7738780116940734e-05, + "loss": 0.2311, + "step": 15435 + }, + { + "epoch": 0.72, + "learning_rate": 1.7737996331885945e-05, + "loss": 0.1838, + "step": 15440 + }, + { + "epoch": 0.72, + "learning_rate": 1.773721254683116e-05, + "loss": 0.3578, + "step": 15445 + }, + { + "epoch": 0.72, + "learning_rate": 1.7736428761776372e-05, + "loss": 0.5445, + "step": 15450 + }, + { + "epoch": 0.72, + "learning_rate": 1.7735644976721583e-05, + "loss": 0.2114, + "step": 15455 + }, + { + "epoch": 0.72, + "learning_rate": 1.77348611916668e-05, + "loss": 0.0826, + "step": 15460 + }, + { + "epoch": 0.72, + "learning_rate": 1.773407740661201e-05, + "loss": 0.0514, + "step": 15465 + }, + { + "epoch": 0.72, + "learning_rate": 1.7733293621557225e-05, + "loss": 0.1493, + "step": 15470 + }, + { + "epoch": 0.72, + "learning_rate": 1.773250983650244e-05, + "loss": 0.1625, + "step": 15475 + }, + { + "epoch": 0.72, + "learning_rate": 1.7731726051447652e-05, + "loss": 0.3053, + "step": 15480 + }, + { + "epoch": 0.72, + "learning_rate": 1.7730942266392866e-05, + "loss": 0.1644, + "step": 15485 + }, + { + "epoch": 0.72, + "learning_rate": 1.773015848133808e-05, + "loss": 0.397, + "step": 15490 + }, + { + "epoch": 0.72, + "learning_rate": 1.7729374696283294e-05, + "loss": 0.5178, + "step": 15495 + }, + { + "epoch": 0.72, + "learning_rate": 1.7728590911228508e-05, + "loss": 0.588, + "step": 15500 + }, + { + "epoch": 0.72, + "learning_rate": 1.772780712617372e-05, + "loss": 0.219, + "step": 15505 + }, + { + "epoch": 0.72, + "learning_rate": 1.7727023341118932e-05, + "loss": 0.1002, + "step": 15510 + }, + { + "epoch": 0.72, + "learning_rate": 1.7726239556064146e-05, + "loss": 0.0972, + "step": 15515 + }, + { + "epoch": 0.72, + "learning_rate": 1.772545577100936e-05, + "loss": 0.1926, + "step": 15520 + }, + { + "epoch": 0.72, + "learning_rate": 1.7724671985954574e-05, + "loss": 0.1175, + "step": 15525 + }, + { + "epoch": 0.72, + "learning_rate": 1.7723888200899785e-05, + "loss": 0.2136, + "step": 15530 + }, + { + "epoch": 0.72, + "learning_rate": 1.7723104415845002e-05, + "loss": 0.3205, + "step": 15535 + }, + { + "epoch": 0.73, + "learning_rate": 1.7722320630790213e-05, + "loss": 0.2608, + "step": 15540 + }, + { + "epoch": 0.73, + "learning_rate": 1.7721536845735426e-05, + "loss": 0.2418, + "step": 15545 + }, + { + "epoch": 0.73, + "learning_rate": 1.772075306068064e-05, + "loss": 0.5699, + "step": 15550 + }, + { + "epoch": 0.73, + "learning_rate": 1.7719969275625854e-05, + "loss": 0.2461, + "step": 15555 + }, + { + "epoch": 0.73, + "learning_rate": 1.7719185490571068e-05, + "loss": 0.0584, + "step": 15560 + }, + { + "epoch": 0.73, + "learning_rate": 1.7718401705516282e-05, + "loss": 0.0991, + "step": 15565 + }, + { + "epoch": 0.73, + "learning_rate": 1.7717617920461493e-05, + "loss": 0.1333, + "step": 15570 + }, + { + "epoch": 0.73, + "learning_rate": 1.771683413540671e-05, + "loss": 0.1246, + "step": 15575 + }, + { + "epoch": 0.73, + "learning_rate": 1.771605035035192e-05, + "loss": 0.1544, + "step": 15580 + }, + { + "epoch": 0.73, + "learning_rate": 1.7715266565297134e-05, + "loss": 0.2113, + "step": 15585 + }, + { + "epoch": 0.73, + "learning_rate": 1.7714482780242348e-05, + "loss": 0.2301, + "step": 15590 + }, + { + "epoch": 0.73, + "learning_rate": 1.7713698995187562e-05, + "loss": 0.2443, + "step": 15595 + }, + { + "epoch": 0.73, + "learning_rate": 1.7712915210132776e-05, + "loss": 0.5969, + "step": 15600 + }, + { + "epoch": 0.73, + "learning_rate": 1.7712131425077986e-05, + "loss": 0.2402, + "step": 15605 + }, + { + "epoch": 0.73, + "learning_rate": 1.77113476400232e-05, + "loss": 0.1322, + "step": 15610 + }, + { + "epoch": 0.73, + "learning_rate": 1.7710563854968414e-05, + "loss": 0.1391, + "step": 15615 + }, + { + "epoch": 0.73, + "learning_rate": 1.7709780069913628e-05, + "loss": 0.1207, + "step": 15620 + }, + { + "epoch": 0.73, + "learning_rate": 1.7708996284858842e-05, + "loss": 0.2047, + "step": 15625 + }, + { + "epoch": 0.73, + "learning_rate": 1.7708212499804056e-05, + "loss": 0.2101, + "step": 15630 + }, + { + "epoch": 0.73, + "learning_rate": 1.770742871474927e-05, + "loss": 0.2683, + "step": 15635 + }, + { + "epoch": 0.73, + "learning_rate": 1.7706644929694484e-05, + "loss": 0.2237, + "step": 15640 + }, + { + "epoch": 0.73, + "learning_rate": 1.7705861144639694e-05, + "loss": 0.3521, + "step": 15645 + }, + { + "epoch": 0.73, + "learning_rate": 1.7705077359584908e-05, + "loss": 0.5843, + "step": 15650 + }, + { + "epoch": 0.73, + "learning_rate": 1.7704293574530122e-05, + "loss": 0.1719, + "step": 15655 + }, + { + "epoch": 0.73, + "learning_rate": 1.7703509789475336e-05, + "loss": 0.1001, + "step": 15660 + }, + { + "epoch": 0.73, + "learning_rate": 1.770272600442055e-05, + "loss": 0.0878, + "step": 15665 + }, + { + "epoch": 0.73, + "learning_rate": 1.770194221936576e-05, + "loss": 0.1989, + "step": 15670 + }, + { + "epoch": 0.73, + "learning_rate": 1.7701158434310978e-05, + "loss": 0.1543, + "step": 15675 + }, + { + "epoch": 0.73, + "learning_rate": 1.770037464925619e-05, + "loss": 0.1588, + "step": 15680 + }, + { + "epoch": 0.73, + "learning_rate": 1.7699590864201402e-05, + "loss": 0.3029, + "step": 15685 + }, + { + "epoch": 0.73, + "learning_rate": 1.7698807079146616e-05, + "loss": 0.2396, + "step": 15690 + }, + { + "epoch": 0.73, + "learning_rate": 1.769802329409183e-05, + "loss": 0.357, + "step": 15695 + }, + { + "epoch": 0.73, + "learning_rate": 1.7697239509037044e-05, + "loss": 0.6507, + "step": 15700 + }, + { + "epoch": 0.73, + "learning_rate": 1.7696455723982258e-05, + "loss": 0.2289, + "step": 15705 + }, + { + "epoch": 0.73, + "learning_rate": 1.769567193892747e-05, + "loss": 0.0816, + "step": 15710 + }, + { + "epoch": 0.73, + "learning_rate": 1.7694888153872682e-05, + "loss": 0.0891, + "step": 15715 + }, + { + "epoch": 0.73, + "learning_rate": 1.7694104368817896e-05, + "loss": 0.1736, + "step": 15720 + }, + { + "epoch": 0.73, + "learning_rate": 1.769332058376311e-05, + "loss": 0.2008, + "step": 15725 + }, + { + "epoch": 0.73, + "learning_rate": 1.7692536798708324e-05, + "loss": 0.199, + "step": 15730 + }, + { + "epoch": 0.73, + "learning_rate": 1.7691753013653538e-05, + "loss": 0.255, + "step": 15735 + }, + { + "epoch": 0.73, + "learning_rate": 1.7690969228598752e-05, + "loss": 0.2359, + "step": 15740 + }, + { + "epoch": 0.73, + "learning_rate": 1.7690185443543962e-05, + "loss": 0.3103, + "step": 15745 + }, + { + "epoch": 0.73, + "learning_rate": 1.768940165848918e-05, + "loss": 0.5922, + "step": 15750 + }, + { + "epoch": 0.74, + "learning_rate": 1.768861787343439e-05, + "loss": 0.2138, + "step": 15755 + }, + { + "epoch": 0.74, + "learning_rate": 1.7687834088379604e-05, + "loss": 0.1087, + "step": 15760 + }, + { + "epoch": 0.74, + "learning_rate": 1.7687050303324818e-05, + "loss": 0.0963, + "step": 15765 + }, + { + "epoch": 0.74, + "learning_rate": 1.7686266518270032e-05, + "loss": 0.0777, + "step": 15770 + }, + { + "epoch": 0.74, + "learning_rate": 1.7685482733215246e-05, + "loss": 0.1607, + "step": 15775 + }, + { + "epoch": 0.74, + "learning_rate": 1.7684698948160456e-05, + "loss": 0.1585, + "step": 15780 + }, + { + "epoch": 0.74, + "learning_rate": 1.768391516310567e-05, + "loss": 0.2127, + "step": 15785 + }, + { + "epoch": 0.74, + "learning_rate": 1.7683131378050884e-05, + "loss": 0.2459, + "step": 15790 + }, + { + "epoch": 0.74, + "learning_rate": 1.7682347592996098e-05, + "loss": 0.3235, + "step": 15795 + }, + { + "epoch": 0.74, + "learning_rate": 1.7681563807941312e-05, + "loss": 0.5031, + "step": 15800 + }, + { + "epoch": 0.74, + "learning_rate": 1.7680780022886526e-05, + "loss": 0.2883, + "step": 15805 + }, + { + "epoch": 0.74, + "learning_rate": 1.767999623783174e-05, + "loss": 0.1161, + "step": 15810 + }, + { + "epoch": 0.74, + "learning_rate": 1.7679212452776954e-05, + "loss": 0.117, + "step": 15815 + }, + { + "epoch": 0.74, + "learning_rate": 1.7678428667722164e-05, + "loss": 0.1198, + "step": 15820 + }, + { + "epoch": 0.74, + "learning_rate": 1.7677644882667378e-05, + "loss": 0.1703, + "step": 15825 + }, + { + "epoch": 0.74, + "learning_rate": 1.7676861097612592e-05, + "loss": 0.204, + "step": 15830 + }, + { + "epoch": 0.74, + "learning_rate": 1.7676077312557806e-05, + "loss": 0.1478, + "step": 15835 + }, + { + "epoch": 0.74, + "learning_rate": 1.767529352750302e-05, + "loss": 0.2569, + "step": 15840 + }, + { + "epoch": 0.74, + "learning_rate": 1.767450974244823e-05, + "loss": 0.2254, + "step": 15845 + }, + { + "epoch": 0.74, + "learning_rate": 1.7673725957393448e-05, + "loss": 0.5472, + "step": 15850 + }, + { + "epoch": 0.74, + "learning_rate": 1.7672942172338658e-05, + "loss": 0.2518, + "step": 15855 + }, + { + "epoch": 0.74, + "learning_rate": 1.7672158387283872e-05, + "loss": 0.0926, + "step": 15860 + }, + { + "epoch": 0.74, + "learning_rate": 1.7671374602229086e-05, + "loss": 0.1318, + "step": 15865 + }, + { + "epoch": 0.74, + "learning_rate": 1.76705908171743e-05, + "loss": 0.1401, + "step": 15870 + }, + { + "epoch": 0.74, + "learning_rate": 1.7669807032119514e-05, + "loss": 0.1579, + "step": 15875 + }, + { + "epoch": 0.74, + "learning_rate": 1.7669023247064728e-05, + "loss": 0.139, + "step": 15880 + }, + { + "epoch": 0.74, + "learning_rate": 1.7668239462009938e-05, + "loss": 0.17, + "step": 15885 + }, + { + "epoch": 0.74, + "learning_rate": 1.7667455676955155e-05, + "loss": 0.2695, + "step": 15890 + }, + { + "epoch": 0.74, + "learning_rate": 1.7666671891900366e-05, + "loss": 0.2543, + "step": 15895 + }, + { + "epoch": 0.74, + "learning_rate": 1.766588810684558e-05, + "loss": 0.4279, + "step": 15900 + }, + { + "epoch": 0.74, + "learning_rate": 1.7665104321790794e-05, + "loss": 0.2844, + "step": 15905 + }, + { + "epoch": 0.74, + "learning_rate": 1.7664320536736008e-05, + "loss": 0.1015, + "step": 15910 + }, + { + "epoch": 0.74, + "learning_rate": 1.766353675168122e-05, + "loss": 0.1068, + "step": 15915 + }, + { + "epoch": 0.74, + "learning_rate": 1.7662752966626432e-05, + "loss": 0.1115, + "step": 15920 + }, + { + "epoch": 0.74, + "learning_rate": 1.7661969181571646e-05, + "loss": 0.1674, + "step": 15925 + }, + { + "epoch": 0.74, + "learning_rate": 1.766118539651686e-05, + "loss": 0.1759, + "step": 15930 + }, + { + "epoch": 0.74, + "learning_rate": 1.7660401611462074e-05, + "loss": 0.2252, + "step": 15935 + }, + { + "epoch": 0.74, + "learning_rate": 1.7659617826407288e-05, + "loss": 0.2375, + "step": 15940 + }, + { + "epoch": 0.74, + "learning_rate": 1.76588340413525e-05, + "loss": 0.3769, + "step": 15945 + }, + { + "epoch": 0.74, + "learning_rate": 1.7658050256297716e-05, + "loss": 0.4495, + "step": 15950 + }, + { + "epoch": 0.74, + "learning_rate": 1.765726647124293e-05, + "loss": 0.1948, + "step": 15955 + }, + { + "epoch": 0.74, + "learning_rate": 1.765648268618814e-05, + "loss": 0.0774, + "step": 15960 + }, + { + "epoch": 0.74, + "learning_rate": 1.7655698901133357e-05, + "loss": 0.0989, + "step": 15965 + }, + { + "epoch": 0.75, + "learning_rate": 1.7654915116078568e-05, + "loss": 0.0939, + "step": 15970 + }, + { + "epoch": 0.75, + "learning_rate": 1.765413133102378e-05, + "loss": 0.2203, + "step": 15975 + }, + { + "epoch": 0.75, + "learning_rate": 1.7653347545968996e-05, + "loss": 0.2015, + "step": 15980 + }, + { + "epoch": 0.75, + "learning_rate": 1.7652563760914206e-05, + "loss": 0.193, + "step": 15985 + }, + { + "epoch": 0.75, + "learning_rate": 1.7651779975859423e-05, + "loss": 0.2686, + "step": 15990 + }, + { + "epoch": 0.75, + "learning_rate": 1.7650996190804634e-05, + "loss": 0.213, + "step": 15995 + }, + { + "epoch": 0.75, + "learning_rate": 1.7650212405749848e-05, + "loss": 0.476, + "step": 16000 + }, + { + "epoch": 0.75, + "learning_rate": 1.764942862069506e-05, + "loss": 0.2164, + "step": 16005 + }, + { + "epoch": 0.75, + "learning_rate": 1.7648644835640276e-05, + "loss": 0.0558, + "step": 16010 + }, + { + "epoch": 0.75, + "learning_rate": 1.764786105058549e-05, + "loss": 0.1303, + "step": 16015 + }, + { + "epoch": 0.75, + "learning_rate": 1.7647077265530703e-05, + "loss": 0.1231, + "step": 16020 + }, + { + "epoch": 0.75, + "learning_rate": 1.7646293480475914e-05, + "loss": 0.1846, + "step": 16025 + }, + { + "epoch": 0.75, + "learning_rate": 1.764550969542113e-05, + "loss": 0.1873, + "step": 16030 + }, + { + "epoch": 0.75, + "learning_rate": 1.7644725910366342e-05, + "loss": 0.1345, + "step": 16035 + }, + { + "epoch": 0.75, + "learning_rate": 1.7643942125311556e-05, + "loss": 0.2916, + "step": 16040 + }, + { + "epoch": 0.75, + "learning_rate": 1.764315834025677e-05, + "loss": 0.3561, + "step": 16045 + }, + { + "epoch": 0.75, + "learning_rate": 1.7642374555201983e-05, + "loss": 0.5833, + "step": 16050 + }, + { + "epoch": 0.75, + "learning_rate": 1.7641590770147197e-05, + "loss": 0.2464, + "step": 16055 + }, + { + "epoch": 0.75, + "learning_rate": 1.7640806985092408e-05, + "loss": 0.0829, + "step": 16060 + }, + { + "epoch": 0.75, + "learning_rate": 1.7640023200037625e-05, + "loss": 0.1179, + "step": 16065 + }, + { + "epoch": 0.75, + "learning_rate": 1.7639239414982836e-05, + "loss": 0.1629, + "step": 16070 + }, + { + "epoch": 0.75, + "learning_rate": 1.763845562992805e-05, + "loss": 0.1615, + "step": 16075 + }, + { + "epoch": 0.75, + "learning_rate": 1.7637671844873264e-05, + "loss": 0.2043, + "step": 16080 + }, + { + "epoch": 0.75, + "learning_rate": 1.7636888059818477e-05, + "loss": 0.2283, + "step": 16085 + }, + { + "epoch": 0.75, + "learning_rate": 1.763610427476369e-05, + "loss": 0.306, + "step": 16090 + }, + { + "epoch": 0.75, + "learning_rate": 1.7635320489708905e-05, + "loss": 0.4119, + "step": 16095 + }, + { + "epoch": 0.75, + "learning_rate": 1.7634536704654116e-05, + "loss": 0.4373, + "step": 16100 + }, + { + "epoch": 0.75, + "learning_rate": 1.763375291959933e-05, + "loss": 0.1972, + "step": 16105 + }, + { + "epoch": 0.75, + "learning_rate": 1.7632969134544544e-05, + "loss": 0.0756, + "step": 16110 + }, + { + "epoch": 0.75, + "learning_rate": 1.7632185349489757e-05, + "loss": 0.1467, + "step": 16115 + }, + { + "epoch": 0.75, + "learning_rate": 1.763140156443497e-05, + "loss": 0.1195, + "step": 16120 + }, + { + "epoch": 0.75, + "learning_rate": 1.7630617779380185e-05, + "loss": 0.1707, + "step": 16125 + }, + { + "epoch": 0.75, + "learning_rate": 1.76298339943254e-05, + "loss": 0.2611, + "step": 16130 + }, + { + "epoch": 0.75, + "learning_rate": 1.762905020927061e-05, + "loss": 0.2035, + "step": 16135 + }, + { + "epoch": 0.75, + "learning_rate": 1.7628266424215824e-05, + "loss": 0.2931, + "step": 16140 + }, + { + "epoch": 0.75, + "learning_rate": 1.7627482639161038e-05, + "loss": 0.3166, + "step": 16145 + }, + { + "epoch": 0.75, + "learning_rate": 1.762669885410625e-05, + "loss": 0.4894, + "step": 16150 + }, + { + "epoch": 0.75, + "learning_rate": 1.7625915069051465e-05, + "loss": 0.2469, + "step": 16155 + }, + { + "epoch": 0.75, + "learning_rate": 1.762513128399668e-05, + "loss": 0.091, + "step": 16160 + }, + { + "epoch": 0.75, + "learning_rate": 1.7624347498941893e-05, + "loss": 0.1252, + "step": 16165 + }, + { + "epoch": 0.75, + "learning_rate": 1.7623563713887104e-05, + "loss": 0.1507, + "step": 16170 + }, + { + "epoch": 0.75, + "learning_rate": 1.7622779928832318e-05, + "loss": 0.0806, + "step": 16175 + }, + { + "epoch": 0.75, + "learning_rate": 1.762199614377753e-05, + "loss": 0.1723, + "step": 16180 + }, + { + "epoch": 0.76, + "learning_rate": 1.7621212358722745e-05, + "loss": 0.1738, + "step": 16185 + }, + { + "epoch": 0.76, + "learning_rate": 1.762042857366796e-05, + "loss": 0.2276, + "step": 16190 + }, + { + "epoch": 0.76, + "learning_rate": 1.7619644788613173e-05, + "loss": 0.3772, + "step": 16195 + }, + { + "epoch": 0.76, + "learning_rate": 1.7618861003558384e-05, + "loss": 0.5853, + "step": 16200 + }, + { + "epoch": 0.76, + "learning_rate": 1.76180772185036e-05, + "loss": 0.2295, + "step": 16205 + }, + { + "epoch": 0.76, + "learning_rate": 1.761729343344881e-05, + "loss": 0.0793, + "step": 16210 + }, + { + "epoch": 0.76, + "learning_rate": 1.7616509648394025e-05, + "loss": 0.1744, + "step": 16215 + }, + { + "epoch": 0.76, + "learning_rate": 1.761572586333924e-05, + "loss": 0.1258, + "step": 16220 + }, + { + "epoch": 0.76, + "learning_rate": 1.7614942078284453e-05, + "loss": 0.1278, + "step": 16225 + }, + { + "epoch": 0.76, + "learning_rate": 1.7614158293229667e-05, + "loss": 0.1408, + "step": 16230 + }, + { + "epoch": 0.76, + "learning_rate": 1.7613374508174878e-05, + "loss": 0.176, + "step": 16235 + }, + { + "epoch": 0.76, + "learning_rate": 1.761259072312009e-05, + "loss": 0.2184, + "step": 16240 + }, + { + "epoch": 0.76, + "learning_rate": 1.7611806938065305e-05, + "loss": 0.4551, + "step": 16245 + }, + { + "epoch": 0.76, + "learning_rate": 1.761102315301052e-05, + "loss": 0.4999, + "step": 16250 + }, + { + "epoch": 0.76, + "learning_rate": 1.7610239367955733e-05, + "loss": 0.1799, + "step": 16255 + }, + { + "epoch": 0.76, + "learning_rate": 1.7609455582900947e-05, + "loss": 0.0511, + "step": 16260 + }, + { + "epoch": 0.76, + "learning_rate": 1.760867179784616e-05, + "loss": 0.0712, + "step": 16265 + }, + { + "epoch": 0.76, + "learning_rate": 1.7607888012791375e-05, + "loss": 0.1195, + "step": 16270 + }, + { + "epoch": 0.76, + "learning_rate": 1.7607104227736585e-05, + "loss": 0.1133, + "step": 16275 + }, + { + "epoch": 0.76, + "learning_rate": 1.7606320442681803e-05, + "loss": 0.18, + "step": 16280 + }, + { + "epoch": 0.76, + "learning_rate": 1.7605536657627013e-05, + "loss": 0.2426, + "step": 16285 + }, + { + "epoch": 0.76, + "learning_rate": 1.7604752872572227e-05, + "loss": 0.252, + "step": 16290 + }, + { + "epoch": 0.76, + "learning_rate": 1.760396908751744e-05, + "loss": 0.4202, + "step": 16295 + }, + { + "epoch": 0.76, + "learning_rate": 1.760318530246265e-05, + "loss": 0.645, + "step": 16300 + }, + { + "epoch": 0.76, + "learning_rate": 1.760240151740787e-05, + "loss": 0.2253, + "step": 16305 + }, + { + "epoch": 0.76, + "learning_rate": 1.760161773235308e-05, + "loss": 0.0957, + "step": 16310 + }, + { + "epoch": 0.76, + "learning_rate": 1.7600833947298293e-05, + "loss": 0.0851, + "step": 16315 + }, + { + "epoch": 0.76, + "learning_rate": 1.7600050162243507e-05, + "loss": 0.1494, + "step": 16320 + }, + { + "epoch": 0.76, + "learning_rate": 1.759926637718872e-05, + "loss": 0.2095, + "step": 16325 + }, + { + "epoch": 0.76, + "learning_rate": 1.7598482592133935e-05, + "loss": 0.1371, + "step": 16330 + }, + { + "epoch": 0.76, + "learning_rate": 1.759769880707915e-05, + "loss": 0.2431, + "step": 16335 + }, + { + "epoch": 0.76, + "learning_rate": 1.7596915022024363e-05, + "loss": 0.2534, + "step": 16340 + }, + { + "epoch": 0.76, + "learning_rate": 1.7596131236969577e-05, + "loss": 0.2616, + "step": 16345 + }, + { + "epoch": 0.76, + "learning_rate": 1.7595347451914787e-05, + "loss": 0.5365, + "step": 16350 + }, + { + "epoch": 0.76, + "learning_rate": 1.759456366686e-05, + "loss": 0.1913, + "step": 16355 + }, + { + "epoch": 0.76, + "learning_rate": 1.7593779881805215e-05, + "loss": 0.0728, + "step": 16360 + }, + { + "epoch": 0.76, + "learning_rate": 1.759299609675043e-05, + "loss": 0.1707, + "step": 16365 + }, + { + "epoch": 0.76, + "learning_rate": 1.7592212311695643e-05, + "loss": 0.2359, + "step": 16370 + }, + { + "epoch": 0.76, + "learning_rate": 1.7591428526640853e-05, + "loss": 0.1137, + "step": 16375 + }, + { + "epoch": 0.76, + "learning_rate": 1.759064474158607e-05, + "loss": 0.1787, + "step": 16380 + }, + { + "epoch": 0.76, + "learning_rate": 1.758986095653128e-05, + "loss": 0.1674, + "step": 16385 + }, + { + "epoch": 0.76, + "learning_rate": 1.7589077171476495e-05, + "loss": 0.1887, + "step": 16390 + }, + { + "epoch": 0.77, + "learning_rate": 1.758829338642171e-05, + "loss": 0.4233, + "step": 16395 + }, + { + "epoch": 0.77, + "learning_rate": 1.7587509601366923e-05, + "loss": 0.5711, + "step": 16400 + }, + { + "epoch": 0.77, + "learning_rate": 1.7586725816312137e-05, + "loss": 0.1733, + "step": 16405 + }, + { + "epoch": 0.77, + "learning_rate": 1.758594203125735e-05, + "loss": 0.0874, + "step": 16410 + }, + { + "epoch": 0.77, + "learning_rate": 1.758515824620256e-05, + "loss": 0.15, + "step": 16415 + }, + { + "epoch": 0.77, + "learning_rate": 1.758437446114778e-05, + "loss": 0.1558, + "step": 16420 + }, + { + "epoch": 0.77, + "learning_rate": 1.758359067609299e-05, + "loss": 0.1326, + "step": 16425 + }, + { + "epoch": 0.77, + "learning_rate": 1.7582806891038203e-05, + "loss": 0.196, + "step": 16430 + }, + { + "epoch": 0.77, + "learning_rate": 1.7582023105983417e-05, + "loss": 0.1857, + "step": 16435 + }, + { + "epoch": 0.77, + "learning_rate": 1.758123932092863e-05, + "loss": 0.2995, + "step": 16440 + }, + { + "epoch": 0.77, + "learning_rate": 1.7580455535873845e-05, + "loss": 0.4498, + "step": 16445 + }, + { + "epoch": 0.77, + "learning_rate": 1.7579671750819055e-05, + "loss": 0.6452, + "step": 16450 + }, + { + "epoch": 0.77, + "learning_rate": 1.757888796576427e-05, + "loss": 0.2235, + "step": 16455 + }, + { + "epoch": 0.77, + "learning_rate": 1.7578104180709483e-05, + "loss": 0.0881, + "step": 16460 + }, + { + "epoch": 0.77, + "learning_rate": 1.7577320395654697e-05, + "loss": 0.063, + "step": 16465 + }, + { + "epoch": 0.77, + "learning_rate": 1.757653661059991e-05, + "loss": 0.1451, + "step": 16470 + }, + { + "epoch": 0.77, + "learning_rate": 1.7575752825545125e-05, + "loss": 0.1652, + "step": 16475 + }, + { + "epoch": 0.77, + "learning_rate": 1.757496904049034e-05, + "loss": 0.1904, + "step": 16480 + }, + { + "epoch": 0.77, + "learning_rate": 1.7574185255435553e-05, + "loss": 0.253, + "step": 16485 + }, + { + "epoch": 0.77, + "learning_rate": 1.7573401470380763e-05, + "loss": 0.1778, + "step": 16490 + }, + { + "epoch": 0.77, + "learning_rate": 1.7572617685325977e-05, + "loss": 0.4833, + "step": 16495 + }, + { + "epoch": 0.77, + "learning_rate": 1.757183390027119e-05, + "loss": 0.4754, + "step": 16500 + }, + { + "epoch": 0.77, + "learning_rate": 1.7571050115216405e-05, + "loss": 0.1547, + "step": 16505 + }, + { + "epoch": 0.77, + "learning_rate": 1.757026633016162e-05, + "loss": 0.0813, + "step": 16510 + }, + { + "epoch": 0.77, + "learning_rate": 1.756948254510683e-05, + "loss": 0.1433, + "step": 16515 + }, + { + "epoch": 0.77, + "learning_rate": 1.7568698760052047e-05, + "loss": 0.1179, + "step": 16520 + }, + { + "epoch": 0.77, + "learning_rate": 1.7567914974997257e-05, + "loss": 0.1643, + "step": 16525 + }, + { + "epoch": 0.77, + "learning_rate": 1.756713118994247e-05, + "loss": 0.1876, + "step": 16530 + }, + { + "epoch": 0.77, + "learning_rate": 1.7566347404887685e-05, + "loss": 0.1914, + "step": 16535 + }, + { + "epoch": 0.77, + "learning_rate": 1.75655636198329e-05, + "loss": 0.2733, + "step": 16540 + }, + { + "epoch": 0.77, + "learning_rate": 1.7564779834778113e-05, + "loss": 0.3474, + "step": 16545 + }, + { + "epoch": 0.77, + "learning_rate": 1.7563996049723327e-05, + "loss": 0.5545, + "step": 16550 + }, + { + "epoch": 0.77, + "learning_rate": 1.7563212264668537e-05, + "loss": 0.251, + "step": 16555 + }, + { + "epoch": 0.77, + "learning_rate": 1.756242847961375e-05, + "loss": 0.0742, + "step": 16560 + }, + { + "epoch": 0.77, + "learning_rate": 1.7561644694558965e-05, + "loss": 0.0762, + "step": 16565 + }, + { + "epoch": 0.77, + "learning_rate": 1.756086090950418e-05, + "loss": 0.1709, + "step": 16570 + }, + { + "epoch": 0.77, + "learning_rate": 1.7560077124449393e-05, + "loss": 0.1702, + "step": 16575 + }, + { + "epoch": 0.77, + "learning_rate": 1.7559293339394607e-05, + "loss": 0.2099, + "step": 16580 + }, + { + "epoch": 0.77, + "learning_rate": 1.755850955433982e-05, + "loss": 0.311, + "step": 16585 + }, + { + "epoch": 0.77, + "learning_rate": 1.755772576928503e-05, + "loss": 0.2829, + "step": 16590 + }, + { + "epoch": 0.77, + "learning_rate": 1.755694198423025e-05, + "loss": 0.3343, + "step": 16595 + }, + { + "epoch": 0.77, + "learning_rate": 1.755615819917546e-05, + "loss": 0.3147, + "step": 16600 + }, + { + "epoch": 0.77, + "learning_rate": 1.7555374414120673e-05, + "loss": 0.242, + "step": 16605 + }, + { + "epoch": 0.78, + "learning_rate": 1.7554590629065887e-05, + "loss": 0.0859, + "step": 16610 + }, + { + "epoch": 0.78, + "learning_rate": 1.75538068440111e-05, + "loss": 0.1072, + "step": 16615 + }, + { + "epoch": 0.78, + "learning_rate": 1.7553023058956315e-05, + "loss": 0.1143, + "step": 16620 + }, + { + "epoch": 0.78, + "learning_rate": 1.7552239273901525e-05, + "loss": 0.1676, + "step": 16625 + }, + { + "epoch": 0.78, + "learning_rate": 1.755145548884674e-05, + "loss": 0.1877, + "step": 16630 + }, + { + "epoch": 0.78, + "learning_rate": 1.7550671703791953e-05, + "loss": 0.2769, + "step": 16635 + }, + { + "epoch": 0.78, + "learning_rate": 1.7549887918737167e-05, + "loss": 0.2055, + "step": 16640 + }, + { + "epoch": 0.78, + "learning_rate": 1.754910413368238e-05, + "loss": 0.1681, + "step": 16645 + }, + { + "epoch": 0.78, + "learning_rate": 1.7548320348627595e-05, + "loss": 0.5035, + "step": 16650 + }, + { + "epoch": 0.78, + "learning_rate": 1.754753656357281e-05, + "loss": 0.251, + "step": 16655 + }, + { + "epoch": 0.78, + "learning_rate": 1.7546752778518022e-05, + "loss": 0.0733, + "step": 16660 + }, + { + "epoch": 0.78, + "learning_rate": 1.7545968993463233e-05, + "loss": 0.0622, + "step": 16665 + }, + { + "epoch": 0.78, + "learning_rate": 1.7545185208408447e-05, + "loss": 0.186, + "step": 16670 + }, + { + "epoch": 0.78, + "learning_rate": 1.754440142335366e-05, + "loss": 0.1875, + "step": 16675 + }, + { + "epoch": 0.78, + "learning_rate": 1.7543617638298875e-05, + "loss": 0.1456, + "step": 16680 + }, + { + "epoch": 0.78, + "learning_rate": 1.754283385324409e-05, + "loss": 0.1846, + "step": 16685 + }, + { + "epoch": 0.78, + "learning_rate": 1.75420500681893e-05, + "loss": 0.2411, + "step": 16690 + }, + { + "epoch": 0.78, + "learning_rate": 1.7541266283134516e-05, + "loss": 0.2714, + "step": 16695 + }, + { + "epoch": 0.78, + "learning_rate": 1.7540482498079727e-05, + "loss": 0.5341, + "step": 16700 + }, + { + "epoch": 0.78, + "learning_rate": 1.753969871302494e-05, + "loss": 0.2582, + "step": 16705 + }, + { + "epoch": 0.78, + "learning_rate": 1.7538914927970155e-05, + "loss": 0.0639, + "step": 16710 + }, + { + "epoch": 0.78, + "learning_rate": 1.753813114291537e-05, + "loss": 0.091, + "step": 16715 + }, + { + "epoch": 0.78, + "learning_rate": 1.7537347357860582e-05, + "loss": 0.1561, + "step": 16720 + }, + { + "epoch": 0.78, + "learning_rate": 1.7536563572805796e-05, + "loss": 0.167, + "step": 16725 + }, + { + "epoch": 0.78, + "learning_rate": 1.7535779787751007e-05, + "loss": 0.1176, + "step": 16730 + }, + { + "epoch": 0.78, + "learning_rate": 1.7534996002696224e-05, + "loss": 0.1937, + "step": 16735 + }, + { + "epoch": 0.78, + "learning_rate": 1.7534212217641435e-05, + "loss": 0.1984, + "step": 16740 + }, + { + "epoch": 0.78, + "learning_rate": 1.753342843258665e-05, + "loss": 0.2591, + "step": 16745 + }, + { + "epoch": 0.78, + "learning_rate": 1.7532644647531863e-05, + "loss": 0.4628, + "step": 16750 + }, + { + "epoch": 0.78, + "learning_rate": 1.7531860862477076e-05, + "loss": 0.1609, + "step": 16755 + }, + { + "epoch": 0.78, + "learning_rate": 1.753107707742229e-05, + "loss": 0.0979, + "step": 16760 + }, + { + "epoch": 0.78, + "learning_rate": 1.75302932923675e-05, + "loss": 0.1462, + "step": 16765 + }, + { + "epoch": 0.78, + "learning_rate": 1.7529509507312715e-05, + "loss": 0.163, + "step": 16770 + }, + { + "epoch": 0.78, + "learning_rate": 1.752872572225793e-05, + "loss": 0.1616, + "step": 16775 + }, + { + "epoch": 0.78, + "learning_rate": 1.7527941937203143e-05, + "loss": 0.1537, + "step": 16780 + }, + { + "epoch": 0.78, + "learning_rate": 1.7527158152148356e-05, + "loss": 0.1465, + "step": 16785 + }, + { + "epoch": 0.78, + "learning_rate": 1.752637436709357e-05, + "loss": 0.2385, + "step": 16790 + }, + { + "epoch": 0.78, + "learning_rate": 1.7525590582038784e-05, + "loss": 0.3273, + "step": 16795 + }, + { + "epoch": 0.78, + "learning_rate": 1.7524806796983998e-05, + "loss": 0.5533, + "step": 16800 + }, + { + "epoch": 0.78, + "learning_rate": 1.752402301192921e-05, + "loss": 0.1391, + "step": 16805 + }, + { + "epoch": 0.78, + "learning_rate": 1.7523239226874426e-05, + "loss": 0.0745, + "step": 16810 + }, + { + "epoch": 0.78, + "learning_rate": 1.7522455441819636e-05, + "loss": 0.1043, + "step": 16815 + }, + { + "epoch": 0.78, + "learning_rate": 1.752167165676485e-05, + "loss": 0.1905, + "step": 16820 + }, + { + "epoch": 0.79, + "learning_rate": 1.7520887871710064e-05, + "loss": 0.1423, + "step": 16825 + }, + { + "epoch": 0.79, + "learning_rate": 1.7520104086655275e-05, + "loss": 0.0993, + "step": 16830 + }, + { + "epoch": 0.79, + "learning_rate": 1.7519320301600492e-05, + "loss": 0.2252, + "step": 16835 + }, + { + "epoch": 0.79, + "learning_rate": 1.7518536516545703e-05, + "loss": 0.2168, + "step": 16840 + }, + { + "epoch": 0.79, + "learning_rate": 1.7517752731490917e-05, + "loss": 0.3685, + "step": 16845 + }, + { + "epoch": 0.79, + "learning_rate": 1.751696894643613e-05, + "loss": 0.428, + "step": 16850 + }, + { + "epoch": 0.79, + "learning_rate": 1.7516185161381344e-05, + "loss": 0.2561, + "step": 16855 + }, + { + "epoch": 0.79, + "learning_rate": 1.7515401376326558e-05, + "loss": 0.0574, + "step": 16860 + }, + { + "epoch": 0.79, + "learning_rate": 1.7514617591271772e-05, + "loss": 0.1081, + "step": 16865 + }, + { + "epoch": 0.79, + "learning_rate": 1.7513833806216983e-05, + "loss": 0.1428, + "step": 16870 + }, + { + "epoch": 0.79, + "learning_rate": 1.75130500211622e-05, + "loss": 0.1128, + "step": 16875 + }, + { + "epoch": 0.79, + "learning_rate": 1.751226623610741e-05, + "loss": 0.2477, + "step": 16880 + }, + { + "epoch": 0.79, + "learning_rate": 1.7511482451052624e-05, + "loss": 0.2166, + "step": 16885 + }, + { + "epoch": 0.79, + "learning_rate": 1.751069866599784e-05, + "loss": 0.2552, + "step": 16890 + }, + { + "epoch": 0.79, + "learning_rate": 1.7509914880943052e-05, + "loss": 0.2411, + "step": 16895 + }, + { + "epoch": 0.79, + "learning_rate": 1.7509131095888266e-05, + "loss": 0.4488, + "step": 16900 + }, + { + "epoch": 0.79, + "learning_rate": 1.7508347310833477e-05, + "loss": 0.2285, + "step": 16905 + }, + { + "epoch": 0.79, + "learning_rate": 1.7507563525778694e-05, + "loss": 0.0925, + "step": 16910 + }, + { + "epoch": 0.79, + "learning_rate": 1.7506779740723904e-05, + "loss": 0.1007, + "step": 16915 + }, + { + "epoch": 0.79, + "learning_rate": 1.750599595566912e-05, + "loss": 0.1318, + "step": 16920 + }, + { + "epoch": 0.79, + "learning_rate": 1.7505212170614332e-05, + "loss": 0.1029, + "step": 16925 + }, + { + "epoch": 0.79, + "learning_rate": 1.7504428385559546e-05, + "loss": 0.1626, + "step": 16930 + }, + { + "epoch": 0.79, + "learning_rate": 1.750364460050476e-05, + "loss": 0.2252, + "step": 16935 + }, + { + "epoch": 0.79, + "learning_rate": 1.7502860815449974e-05, + "loss": 0.2685, + "step": 16940 + }, + { + "epoch": 0.79, + "learning_rate": 1.7502077030395184e-05, + "loss": 0.2682, + "step": 16945 + }, + { + "epoch": 0.79, + "learning_rate": 1.75012932453404e-05, + "loss": 0.4503, + "step": 16950 + }, + { + "epoch": 0.79, + "learning_rate": 1.7500509460285612e-05, + "loss": 0.1966, + "step": 16955 + }, + { + "epoch": 0.79, + "learning_rate": 1.7499725675230826e-05, + "loss": 0.0716, + "step": 16960 + }, + { + "epoch": 0.79, + "learning_rate": 1.749894189017604e-05, + "loss": 0.0855, + "step": 16965 + }, + { + "epoch": 0.79, + "learning_rate": 1.7498158105121254e-05, + "loss": 0.1765, + "step": 16970 + }, + { + "epoch": 0.79, + "learning_rate": 1.7497374320066468e-05, + "loss": 0.1045, + "step": 16975 + }, + { + "epoch": 0.79, + "learning_rate": 1.749659053501168e-05, + "loss": 0.1965, + "step": 16980 + }, + { + "epoch": 0.79, + "learning_rate": 1.7495806749956892e-05, + "loss": 0.2889, + "step": 16985 + }, + { + "epoch": 0.79, + "learning_rate": 1.7495022964902106e-05, + "loss": 0.2706, + "step": 16990 + }, + { + "epoch": 0.79, + "learning_rate": 1.749423917984732e-05, + "loss": 0.219, + "step": 16995 + }, + { + "epoch": 0.79, + "learning_rate": 1.7493455394792534e-05, + "loss": 0.7537, + "step": 17000 + }, + { + "epoch": 0.79, + "learning_rate": 1.7492671609737748e-05, + "loss": 0.2183, + "step": 17005 + }, + { + "epoch": 0.79, + "learning_rate": 1.7491887824682962e-05, + "loss": 0.095, + "step": 17010 + }, + { + "epoch": 0.79, + "learning_rate": 1.7491104039628172e-05, + "loss": 0.0708, + "step": 17015 + }, + { + "epoch": 0.79, + "learning_rate": 1.7490320254573386e-05, + "loss": 0.1224, + "step": 17020 + }, + { + "epoch": 0.79, + "learning_rate": 1.74895364695186e-05, + "loss": 0.149, + "step": 17025 + }, + { + "epoch": 0.79, + "learning_rate": 1.7488752684463814e-05, + "loss": 0.2394, + "step": 17030 + }, + { + "epoch": 0.79, + "learning_rate": 1.7487968899409028e-05, + "loss": 0.2442, + "step": 17035 + }, + { + "epoch": 0.8, + "learning_rate": 1.7487185114354242e-05, + "loss": 0.2727, + "step": 17040 + }, + { + "epoch": 0.8, + "learning_rate": 1.7486401329299452e-05, + "loss": 0.3609, + "step": 17045 + }, + { + "epoch": 0.8, + "learning_rate": 1.748561754424467e-05, + "loss": 0.5439, + "step": 17050 + }, + { + "epoch": 0.8, + "learning_rate": 1.748483375918988e-05, + "loss": 0.2195, + "step": 17055 + }, + { + "epoch": 0.8, + "learning_rate": 1.7484049974135094e-05, + "loss": 0.0805, + "step": 17060 + }, + { + "epoch": 0.8, + "learning_rate": 1.7483266189080308e-05, + "loss": 0.1076, + "step": 17065 + }, + { + "epoch": 0.8, + "learning_rate": 1.7482482404025522e-05, + "loss": 0.2019, + "step": 17070 + }, + { + "epoch": 0.8, + "learning_rate": 1.7481698618970736e-05, + "loss": 0.1393, + "step": 17075 + }, + { + "epoch": 0.8, + "learning_rate": 1.7480914833915946e-05, + "loss": 0.1601, + "step": 17080 + }, + { + "epoch": 0.8, + "learning_rate": 1.748013104886116e-05, + "loss": 0.2208, + "step": 17085 + }, + { + "epoch": 0.8, + "learning_rate": 1.7479347263806374e-05, + "loss": 0.2692, + "step": 17090 + }, + { + "epoch": 0.8, + "learning_rate": 1.7478563478751588e-05, + "loss": 0.2507, + "step": 17095 + }, + { + "epoch": 0.8, + "learning_rate": 1.7477779693696802e-05, + "loss": 0.6805, + "step": 17100 + }, + { + "epoch": 0.8, + "learning_rate": 1.7476995908642016e-05, + "loss": 0.2378, + "step": 17105 + }, + { + "epoch": 0.8, + "learning_rate": 1.747621212358723e-05, + "loss": 0.0834, + "step": 17110 + }, + { + "epoch": 0.8, + "learning_rate": 1.7475428338532444e-05, + "loss": 0.0724, + "step": 17115 + }, + { + "epoch": 0.8, + "learning_rate": 1.7474644553477654e-05, + "loss": 0.1847, + "step": 17120 + }, + { + "epoch": 0.8, + "learning_rate": 1.747386076842287e-05, + "loss": 0.1556, + "step": 17125 + }, + { + "epoch": 0.8, + "learning_rate": 1.7473076983368082e-05, + "loss": 0.2036, + "step": 17130 + }, + { + "epoch": 0.8, + "learning_rate": 1.7472293198313296e-05, + "loss": 0.2969, + "step": 17135 + }, + { + "epoch": 0.8, + "learning_rate": 1.747150941325851e-05, + "loss": 0.3382, + "step": 17140 + }, + { + "epoch": 0.8, + "learning_rate": 1.747072562820372e-05, + "loss": 0.3471, + "step": 17145 + }, + { + "epoch": 0.8, + "learning_rate": 1.7469941843148938e-05, + "loss": 0.4779, + "step": 17150 + }, + { + "epoch": 0.8, + "learning_rate": 1.7469158058094148e-05, + "loss": 0.2133, + "step": 17155 + }, + { + "epoch": 0.8, + "learning_rate": 1.7468374273039362e-05, + "loss": 0.0909, + "step": 17160 + }, + { + "epoch": 0.8, + "learning_rate": 1.7467590487984576e-05, + "loss": 0.1217, + "step": 17165 + }, + { + "epoch": 0.8, + "learning_rate": 1.746680670292979e-05, + "loss": 0.1222, + "step": 17170 + }, + { + "epoch": 0.8, + "learning_rate": 1.7466022917875004e-05, + "loss": 0.1105, + "step": 17175 + }, + { + "epoch": 0.8, + "learning_rate": 1.7465239132820218e-05, + "loss": 0.1904, + "step": 17180 + }, + { + "epoch": 0.8, + "learning_rate": 1.7464455347765428e-05, + "loss": 0.2327, + "step": 17185 + }, + { + "epoch": 0.8, + "learning_rate": 1.7463671562710646e-05, + "loss": 0.2373, + "step": 17190 + }, + { + "epoch": 0.8, + "learning_rate": 1.7462887777655856e-05, + "loss": 0.2425, + "step": 17195 + }, + { + "epoch": 0.8, + "learning_rate": 1.746210399260107e-05, + "loss": 0.4149, + "step": 17200 + }, + { + "epoch": 0.8, + "learning_rate": 1.7461320207546284e-05, + "loss": 0.3749, + "step": 17205 + }, + { + "epoch": 0.8, + "learning_rate": 1.7460536422491498e-05, + "loss": 0.076, + "step": 17210 + }, + { + "epoch": 0.8, + "learning_rate": 1.745975263743671e-05, + "loss": 0.0653, + "step": 17215 + }, + { + "epoch": 0.8, + "learning_rate": 1.7458968852381922e-05, + "loss": 0.1085, + "step": 17220 + }, + { + "epoch": 0.8, + "learning_rate": 1.745818506732714e-05, + "loss": 0.1582, + "step": 17225 + }, + { + "epoch": 0.8, + "learning_rate": 1.745740128227235e-05, + "loss": 0.2298, + "step": 17230 + }, + { + "epoch": 0.8, + "learning_rate": 1.7456617497217564e-05, + "loss": 0.1976, + "step": 17235 + }, + { + "epoch": 0.8, + "learning_rate": 1.7455833712162778e-05, + "loss": 0.3482, + "step": 17240 + }, + { + "epoch": 0.8, + "learning_rate": 1.7455049927107992e-05, + "loss": 0.2661, + "step": 17245 + }, + { + "epoch": 0.8, + "learning_rate": 1.7454266142053206e-05, + "loss": 0.6355, + "step": 17250 + }, + { + "epoch": 0.81, + "learning_rate": 1.745348235699842e-05, + "loss": 0.2025, + "step": 17255 + }, + { + "epoch": 0.81, + "learning_rate": 1.745269857194363e-05, + "loss": 0.0766, + "step": 17260 + }, + { + "epoch": 0.81, + "learning_rate": 1.7451914786888847e-05, + "loss": 0.1712, + "step": 17265 + }, + { + "epoch": 0.81, + "learning_rate": 1.7451131001834058e-05, + "loss": 0.1655, + "step": 17270 + }, + { + "epoch": 0.81, + "learning_rate": 1.7450347216779272e-05, + "loss": 0.1001, + "step": 17275 + }, + { + "epoch": 0.81, + "learning_rate": 1.7449563431724486e-05, + "loss": 0.1872, + "step": 17280 + }, + { + "epoch": 0.81, + "learning_rate": 1.74487796466697e-05, + "loss": 0.2953, + "step": 17285 + }, + { + "epoch": 0.81, + "learning_rate": 1.7447995861614914e-05, + "loss": 0.2079, + "step": 17290 + }, + { + "epoch": 0.81, + "learning_rate": 1.7447212076560124e-05, + "loss": 0.3059, + "step": 17295 + }, + { + "epoch": 0.81, + "learning_rate": 1.7446428291505338e-05, + "loss": 0.5211, + "step": 17300 + }, + { + "epoch": 0.81, + "learning_rate": 1.7445644506450552e-05, + "loss": 0.2568, + "step": 17305 + }, + { + "epoch": 0.81, + "learning_rate": 1.7444860721395766e-05, + "loss": 0.0556, + "step": 17310 + }, + { + "epoch": 0.81, + "learning_rate": 1.744407693634098e-05, + "loss": 0.1149, + "step": 17315 + }, + { + "epoch": 0.81, + "learning_rate": 1.7443293151286194e-05, + "loss": 0.1159, + "step": 17320 + }, + { + "epoch": 0.81, + "learning_rate": 1.7442509366231407e-05, + "loss": 0.1228, + "step": 17325 + }, + { + "epoch": 0.81, + "learning_rate": 1.744172558117662e-05, + "loss": 0.1988, + "step": 17330 + }, + { + "epoch": 0.81, + "learning_rate": 1.7440941796121832e-05, + "loss": 0.129, + "step": 17335 + }, + { + "epoch": 0.81, + "learning_rate": 1.7440158011067046e-05, + "loss": 0.3034, + "step": 17340 + }, + { + "epoch": 0.81, + "learning_rate": 1.743937422601226e-05, + "loss": 0.3292, + "step": 17345 + }, + { + "epoch": 0.81, + "learning_rate": 1.7438590440957474e-05, + "loss": 0.5812, + "step": 17350 + }, + { + "epoch": 0.81, + "learning_rate": 1.7437806655902688e-05, + "loss": 0.2535, + "step": 17355 + }, + { + "epoch": 0.81, + "learning_rate": 1.7437022870847898e-05, + "loss": 0.0812, + "step": 17360 + }, + { + "epoch": 0.81, + "learning_rate": 1.7436239085793115e-05, + "loss": 0.0573, + "step": 17365 + }, + { + "epoch": 0.81, + "learning_rate": 1.7435455300738326e-05, + "loss": 0.1486, + "step": 17370 + }, + { + "epoch": 0.81, + "learning_rate": 1.743467151568354e-05, + "loss": 0.1502, + "step": 17375 + }, + { + "epoch": 0.81, + "learning_rate": 1.7433887730628754e-05, + "loss": 0.1391, + "step": 17380 + }, + { + "epoch": 0.81, + "learning_rate": 1.7433103945573968e-05, + "loss": 0.2151, + "step": 17385 + }, + { + "epoch": 0.81, + "learning_rate": 1.743232016051918e-05, + "loss": 0.2215, + "step": 17390 + }, + { + "epoch": 0.81, + "learning_rate": 1.7431536375464395e-05, + "loss": 0.3069, + "step": 17395 + }, + { + "epoch": 0.81, + "learning_rate": 1.7430752590409606e-05, + "loss": 0.5807, + "step": 17400 + }, + { + "epoch": 0.81, + "learning_rate": 1.742996880535482e-05, + "loss": 0.2226, + "step": 17405 + }, + { + "epoch": 0.81, + "learning_rate": 1.7429185020300034e-05, + "loss": 0.0589, + "step": 17410 + }, + { + "epoch": 0.81, + "learning_rate": 1.7428401235245248e-05, + "loss": 0.0894, + "step": 17415 + }, + { + "epoch": 0.81, + "learning_rate": 1.742761745019046e-05, + "loss": 0.0559, + "step": 17420 + }, + { + "epoch": 0.81, + "learning_rate": 1.7426833665135675e-05, + "loss": 0.1474, + "step": 17425 + }, + { + "epoch": 0.81, + "learning_rate": 1.742604988008089e-05, + "loss": 0.1283, + "step": 17430 + }, + { + "epoch": 0.81, + "learning_rate": 1.74252660950261e-05, + "loss": 0.2015, + "step": 17435 + }, + { + "epoch": 0.81, + "learning_rate": 1.7424482309971317e-05, + "loss": 0.2343, + "step": 17440 + }, + { + "epoch": 0.81, + "learning_rate": 1.7423698524916528e-05, + "loss": 0.2946, + "step": 17445 + }, + { + "epoch": 0.81, + "learning_rate": 1.742291473986174e-05, + "loss": 0.4734, + "step": 17450 + }, + { + "epoch": 0.81, + "learning_rate": 1.7422130954806955e-05, + "loss": 0.1872, + "step": 17455 + }, + { + "epoch": 0.81, + "learning_rate": 1.742134716975217e-05, + "loss": 0.0883, + "step": 17460 + }, + { + "epoch": 0.81, + "learning_rate": 1.7420563384697383e-05, + "loss": 0.1069, + "step": 17465 + }, + { + "epoch": 0.82, + "learning_rate": 1.7419779599642594e-05, + "loss": 0.1773, + "step": 17470 + }, + { + "epoch": 0.82, + "learning_rate": 1.7418995814587808e-05, + "loss": 0.1791, + "step": 17475 + }, + { + "epoch": 0.82, + "learning_rate": 1.741821202953302e-05, + "loss": 0.1533, + "step": 17480 + }, + { + "epoch": 0.82, + "learning_rate": 1.7417428244478235e-05, + "loss": 0.176, + "step": 17485 + }, + { + "epoch": 0.82, + "learning_rate": 1.741664445942345e-05, + "loss": 0.3268, + "step": 17490 + }, + { + "epoch": 0.82, + "learning_rate": 1.7415860674368663e-05, + "loss": 0.2135, + "step": 17495 + }, + { + "epoch": 0.82, + "learning_rate": 1.7415076889313877e-05, + "loss": 0.5947, + "step": 17500 + }, + { + "epoch": 0.82, + "learning_rate": 1.741429310425909e-05, + "loss": 0.2006, + "step": 17505 + }, + { + "epoch": 0.82, + "learning_rate": 1.74135093192043e-05, + "loss": 0.1167, + "step": 17510 + }, + { + "epoch": 0.82, + "learning_rate": 1.7412725534149516e-05, + "loss": 0.1088, + "step": 17515 + }, + { + "epoch": 0.82, + "learning_rate": 1.741194174909473e-05, + "loss": 0.0985, + "step": 17520 + }, + { + "epoch": 0.82, + "learning_rate": 1.7411157964039943e-05, + "loss": 0.1412, + "step": 17525 + }, + { + "epoch": 0.82, + "learning_rate": 1.7410374178985157e-05, + "loss": 0.2124, + "step": 17530 + }, + { + "epoch": 0.82, + "learning_rate": 1.7409590393930368e-05, + "loss": 0.1429, + "step": 17535 + }, + { + "epoch": 0.82, + "learning_rate": 1.7408806608875585e-05, + "loss": 0.2762, + "step": 17540 + }, + { + "epoch": 0.82, + "learning_rate": 1.7408022823820796e-05, + "loss": 0.336, + "step": 17545 + }, + { + "epoch": 0.82, + "learning_rate": 1.740723903876601e-05, + "loss": 0.6298, + "step": 17550 + }, + { + "epoch": 0.82, + "learning_rate": 1.7406455253711223e-05, + "loss": 0.2686, + "step": 17555 + }, + { + "epoch": 0.82, + "learning_rate": 1.7405671468656437e-05, + "loss": 0.1043, + "step": 17560 + }, + { + "epoch": 0.82, + "learning_rate": 1.740488768360165e-05, + "loss": 0.0676, + "step": 17565 + }, + { + "epoch": 0.82, + "learning_rate": 1.7404103898546865e-05, + "loss": 0.1212, + "step": 17570 + }, + { + "epoch": 0.82, + "learning_rate": 1.7403320113492076e-05, + "loss": 0.1228, + "step": 17575 + }, + { + "epoch": 0.82, + "learning_rate": 1.7402536328437293e-05, + "loss": 0.1101, + "step": 17580 + }, + { + "epoch": 0.82, + "learning_rate": 1.7401752543382503e-05, + "loss": 0.1469, + "step": 17585 + }, + { + "epoch": 0.82, + "learning_rate": 1.7400968758327717e-05, + "loss": 0.1787, + "step": 17590 + }, + { + "epoch": 0.82, + "learning_rate": 1.740018497327293e-05, + "loss": 0.2206, + "step": 17595 + }, + { + "epoch": 0.82, + "learning_rate": 1.7399401188218145e-05, + "loss": 0.4916, + "step": 17600 + }, + { + "epoch": 0.82, + "learning_rate": 1.739861740316336e-05, + "loss": 0.208, + "step": 17605 + }, + { + "epoch": 0.82, + "learning_rate": 1.739783361810857e-05, + "loss": 0.0906, + "step": 17610 + }, + { + "epoch": 0.82, + "learning_rate": 1.7397049833053783e-05, + "loss": 0.1041, + "step": 17615 + }, + { + "epoch": 0.82, + "learning_rate": 1.7396266047998997e-05, + "loss": 0.1332, + "step": 17620 + }, + { + "epoch": 0.82, + "learning_rate": 1.739548226294421e-05, + "loss": 0.1337, + "step": 17625 + }, + { + "epoch": 0.82, + "learning_rate": 1.7394698477889425e-05, + "loss": 0.1325, + "step": 17630 + }, + { + "epoch": 0.82, + "learning_rate": 1.739391469283464e-05, + "loss": 0.147, + "step": 17635 + }, + { + "epoch": 0.82, + "learning_rate": 1.7393130907779853e-05, + "loss": 0.2622, + "step": 17640 + }, + { + "epoch": 0.82, + "learning_rate": 1.7392347122725067e-05, + "loss": 0.4564, + "step": 17645 + }, + { + "epoch": 0.82, + "learning_rate": 1.7391563337670277e-05, + "loss": 0.5175, + "step": 17650 + }, + { + "epoch": 0.82, + "learning_rate": 1.7390779552615495e-05, + "loss": 0.1756, + "step": 17655 + }, + { + "epoch": 0.82, + "learning_rate": 1.7389995767560705e-05, + "loss": 0.1188, + "step": 17660 + }, + { + "epoch": 0.82, + "learning_rate": 1.738921198250592e-05, + "loss": 0.0799, + "step": 17665 + }, + { + "epoch": 0.82, + "learning_rate": 1.7388428197451133e-05, + "loss": 0.1077, + "step": 17670 + }, + { + "epoch": 0.82, + "learning_rate": 1.7387644412396344e-05, + "loss": 0.1528, + "step": 17675 + }, + { + "epoch": 0.82, + "learning_rate": 1.738686062734156e-05, + "loss": 0.1655, + "step": 17680 + }, + { + "epoch": 0.83, + "learning_rate": 1.738607684228677e-05, + "loss": 0.1754, + "step": 17685 + }, + { + "epoch": 0.83, + "learning_rate": 1.7385293057231985e-05, + "loss": 0.2688, + "step": 17690 + }, + { + "epoch": 0.83, + "learning_rate": 1.73845092721772e-05, + "loss": 0.2753, + "step": 17695 + }, + { + "epoch": 0.83, + "learning_rate": 1.7383725487122413e-05, + "loss": 0.3675, + "step": 17700 + }, + { + "epoch": 0.83, + "learning_rate": 1.7382941702067627e-05, + "loss": 0.245, + "step": 17705 + }, + { + "epoch": 0.83, + "learning_rate": 1.738215791701284e-05, + "loss": 0.1142, + "step": 17710 + }, + { + "epoch": 0.83, + "learning_rate": 1.738137413195805e-05, + "loss": 0.0702, + "step": 17715 + }, + { + "epoch": 0.83, + "learning_rate": 1.738059034690327e-05, + "loss": 0.1529, + "step": 17720 + }, + { + "epoch": 0.83, + "learning_rate": 1.737980656184848e-05, + "loss": 0.1147, + "step": 17725 + }, + { + "epoch": 0.83, + "learning_rate": 1.7379022776793693e-05, + "loss": 0.1938, + "step": 17730 + }, + { + "epoch": 0.83, + "learning_rate": 1.7378238991738907e-05, + "loss": 0.2224, + "step": 17735 + }, + { + "epoch": 0.83, + "learning_rate": 1.737745520668412e-05, + "loss": 0.1982, + "step": 17740 + }, + { + "epoch": 0.83, + "learning_rate": 1.7376671421629335e-05, + "loss": 0.3572, + "step": 17745 + }, + { + "epoch": 0.83, + "learning_rate": 1.7375887636574545e-05, + "loss": 0.6613, + "step": 17750 + }, + { + "epoch": 0.83, + "learning_rate": 1.7375103851519763e-05, + "loss": 0.2306, + "step": 17755 + }, + { + "epoch": 0.83, + "learning_rate": 1.7374320066464973e-05, + "loss": 0.0619, + "step": 17760 + }, + { + "epoch": 0.83, + "learning_rate": 1.7373536281410187e-05, + "loss": 0.1426, + "step": 17765 + }, + { + "epoch": 0.83, + "learning_rate": 1.73727524963554e-05, + "loss": 0.1614, + "step": 17770 + }, + { + "epoch": 0.83, + "learning_rate": 1.7371968711300615e-05, + "loss": 0.137, + "step": 17775 + }, + { + "epoch": 0.83, + "learning_rate": 1.737118492624583e-05, + "loss": 0.2, + "step": 17780 + }, + { + "epoch": 0.83, + "learning_rate": 1.7370401141191043e-05, + "loss": 0.2528, + "step": 17785 + }, + { + "epoch": 0.83, + "learning_rate": 1.7369617356136253e-05, + "loss": 0.3046, + "step": 17790 + }, + { + "epoch": 0.83, + "learning_rate": 1.7368833571081467e-05, + "loss": 0.3107, + "step": 17795 + }, + { + "epoch": 0.83, + "learning_rate": 1.736804978602668e-05, + "loss": 0.4765, + "step": 17800 + }, + { + "epoch": 0.83, + "learning_rate": 1.7367266000971895e-05, + "loss": 0.2553, + "step": 17805 + }, + { + "epoch": 0.83, + "learning_rate": 1.736648221591711e-05, + "loss": 0.1027, + "step": 17810 + }, + { + "epoch": 0.83, + "learning_rate": 1.7365698430862323e-05, + "loss": 0.1448, + "step": 17815 + }, + { + "epoch": 0.83, + "learning_rate": 1.7364914645807537e-05, + "loss": 0.1179, + "step": 17820 + }, + { + "epoch": 0.83, + "learning_rate": 1.7364130860752747e-05, + "loss": 0.206, + "step": 17825 + }, + { + "epoch": 0.83, + "learning_rate": 1.736334707569796e-05, + "loss": 0.1499, + "step": 17830 + }, + { + "epoch": 0.83, + "learning_rate": 1.7362563290643175e-05, + "loss": 0.2357, + "step": 17835 + }, + { + "epoch": 0.83, + "learning_rate": 1.736177950558839e-05, + "loss": 0.1637, + "step": 17840 + }, + { + "epoch": 0.83, + "learning_rate": 1.7360995720533603e-05, + "loss": 0.2992, + "step": 17845 + }, + { + "epoch": 0.83, + "learning_rate": 1.7360211935478817e-05, + "loss": 0.3762, + "step": 17850 + }, + { + "epoch": 0.83, + "learning_rate": 1.735942815042403e-05, + "loss": 0.225, + "step": 17855 + }, + { + "epoch": 0.83, + "learning_rate": 1.735864436536924e-05, + "loss": 0.0935, + "step": 17860 + }, + { + "epoch": 0.83, + "learning_rate": 1.7357860580314455e-05, + "loss": 0.0893, + "step": 17865 + }, + { + "epoch": 0.83, + "learning_rate": 1.735707679525967e-05, + "loss": 0.1329, + "step": 17870 + }, + { + "epoch": 0.83, + "learning_rate": 1.7356293010204883e-05, + "loss": 0.1481, + "step": 17875 + }, + { + "epoch": 0.83, + "learning_rate": 1.7355509225150097e-05, + "loss": 0.1654, + "step": 17880 + }, + { + "epoch": 0.83, + "learning_rate": 1.735472544009531e-05, + "loss": 0.1354, + "step": 17885 + }, + { + "epoch": 0.83, + "learning_rate": 1.735394165504052e-05, + "loss": 0.3464, + "step": 17890 + }, + { + "epoch": 0.84, + "learning_rate": 1.735315786998574e-05, + "loss": 0.2993, + "step": 17895 + }, + { + "epoch": 0.84, + "learning_rate": 1.735253084194191e-05, + "loss": 0.6743, + "step": 17900 + }, + { + "epoch": 0.84, + "learning_rate": 1.735174705688712e-05, + "loss": 0.1873, + "step": 17905 + }, + { + "epoch": 0.84, + "learning_rate": 1.7350963271832337e-05, + "loss": 0.144, + "step": 17910 + }, + { + "epoch": 0.84, + "learning_rate": 1.7350179486777547e-05, + "loss": 0.1459, + "step": 17915 + }, + { + "epoch": 0.84, + "learning_rate": 1.734939570172276e-05, + "loss": 0.0939, + "step": 17920 + }, + { + "epoch": 0.84, + "learning_rate": 1.7348611916667975e-05, + "loss": 0.1254, + "step": 17925 + }, + { + "epoch": 0.84, + "learning_rate": 1.7347828131613186e-05, + "loss": 0.1482, + "step": 17930 + }, + { + "epoch": 0.84, + "learning_rate": 1.7347044346558403e-05, + "loss": 0.1526, + "step": 17935 + }, + { + "epoch": 0.84, + "learning_rate": 1.7346260561503613e-05, + "loss": 0.2159, + "step": 17940 + }, + { + "epoch": 0.84, + "learning_rate": 1.7345476776448827e-05, + "loss": 0.2293, + "step": 17945 + }, + { + "epoch": 0.84, + "learning_rate": 1.734469299139404e-05, + "loss": 0.3899, + "step": 17950 + }, + { + "epoch": 0.84, + "learning_rate": 1.7343909206339255e-05, + "loss": 0.2237, + "step": 17955 + }, + { + "epoch": 0.84, + "learning_rate": 1.734312542128447e-05, + "loss": 0.0663, + "step": 17960 + }, + { + "epoch": 0.84, + "learning_rate": 1.7342341636229683e-05, + "loss": 0.0931, + "step": 17965 + }, + { + "epoch": 0.84, + "learning_rate": 1.7341557851174893e-05, + "loss": 0.0785, + "step": 17970 + }, + { + "epoch": 0.84, + "learning_rate": 1.734077406612011e-05, + "loss": 0.1971, + "step": 17975 + }, + { + "epoch": 0.84, + "learning_rate": 1.733999028106532e-05, + "loss": 0.2693, + "step": 17980 + }, + { + "epoch": 0.84, + "learning_rate": 1.7339206496010535e-05, + "loss": 0.2542, + "step": 17985 + }, + { + "epoch": 0.84, + "learning_rate": 1.733842271095575e-05, + "loss": 0.1786, + "step": 17990 + }, + { + "epoch": 0.84, + "learning_rate": 1.7337638925900963e-05, + "loss": 0.3282, + "step": 17995 + }, + { + "epoch": 0.84, + "learning_rate": 1.7336855140846177e-05, + "loss": 0.4567, + "step": 18000 + }, + { + "epoch": 0.84, + "learning_rate": 1.7336071355791387e-05, + "loss": 0.3093, + "step": 18005 + }, + { + "epoch": 0.84, + "learning_rate": 1.7335287570736605e-05, + "loss": 0.0859, + "step": 18010 + }, + { + "epoch": 0.84, + "learning_rate": 1.7334503785681815e-05, + "loss": 0.1042, + "step": 18015 + }, + { + "epoch": 0.84, + "learning_rate": 1.733372000062703e-05, + "loss": 0.077, + "step": 18020 + }, + { + "epoch": 0.84, + "learning_rate": 1.7332936215572243e-05, + "loss": 0.148, + "step": 18025 + }, + { + "epoch": 0.84, + "learning_rate": 1.7332152430517457e-05, + "loss": 0.1841, + "step": 18030 + }, + { + "epoch": 0.84, + "learning_rate": 1.733136864546267e-05, + "loss": 0.0989, + "step": 18035 + }, + { + "epoch": 0.84, + "learning_rate": 1.7330584860407885e-05, + "loss": 0.1978, + "step": 18040 + }, + { + "epoch": 0.84, + "learning_rate": 1.7329801075353095e-05, + "loss": 0.3673, + "step": 18045 + }, + { + "epoch": 0.84, + "learning_rate": 1.732901729029831e-05, + "loss": 0.4043, + "step": 18050 + }, + { + "epoch": 0.84, + "learning_rate": 1.7328233505243523e-05, + "loss": 0.2048, + "step": 18055 + }, + { + "epoch": 0.84, + "learning_rate": 1.7327449720188737e-05, + "loss": 0.0706, + "step": 18060 + }, + { + "epoch": 0.84, + "learning_rate": 1.732666593513395e-05, + "loss": 0.1344, + "step": 18065 + }, + { + "epoch": 0.84, + "learning_rate": 1.732588215007916e-05, + "loss": 0.1345, + "step": 18070 + }, + { + "epoch": 0.84, + "learning_rate": 1.732509836502438e-05, + "loss": 0.1103, + "step": 18075 + }, + { + "epoch": 0.84, + "learning_rate": 1.732431457996959e-05, + "loss": 0.1883, + "step": 18080 + }, + { + "epoch": 0.84, + "learning_rate": 1.7323530794914803e-05, + "loss": 0.1763, + "step": 18085 + }, + { + "epoch": 0.84, + "learning_rate": 1.7322747009860017e-05, + "loss": 0.2073, + "step": 18090 + }, + { + "epoch": 0.84, + "learning_rate": 1.732196322480523e-05, + "loss": 0.3624, + "step": 18095 + }, + { + "epoch": 0.84, + "learning_rate": 1.7321179439750445e-05, + "loss": 0.4715, + "step": 18100 + }, + { + "epoch": 0.84, + "learning_rate": 1.732039565469566e-05, + "loss": 0.1813, + "step": 18105 + }, + { + "epoch": 0.85, + "learning_rate": 1.7319611869640873e-05, + "loss": 0.0933, + "step": 18110 + }, + { + "epoch": 0.85, + "learning_rate": 1.7318828084586083e-05, + "loss": 0.1297, + "step": 18115 + }, + { + "epoch": 0.85, + "learning_rate": 1.7318044299531297e-05, + "loss": 0.1176, + "step": 18120 + }, + { + "epoch": 0.85, + "learning_rate": 1.731726051447651e-05, + "loss": 0.1331, + "step": 18125 + }, + { + "epoch": 0.85, + "learning_rate": 1.7316476729421725e-05, + "loss": 0.166, + "step": 18130 + }, + { + "epoch": 0.85, + "learning_rate": 1.731569294436694e-05, + "loss": 0.2521, + "step": 18135 + }, + { + "epoch": 0.85, + "learning_rate": 1.7314909159312153e-05, + "loss": 0.2726, + "step": 18140 + }, + { + "epoch": 0.85, + "learning_rate": 1.7314125374257363e-05, + "loss": 0.3365, + "step": 18145 + }, + { + "epoch": 0.85, + "learning_rate": 1.731334158920258e-05, + "loss": 0.6564, + "step": 18150 + }, + { + "epoch": 0.85, + "learning_rate": 1.731255780414779e-05, + "loss": 0.2698, + "step": 18155 + }, + { + "epoch": 0.85, + "learning_rate": 1.7311774019093005e-05, + "loss": 0.0988, + "step": 18160 + }, + { + "epoch": 0.85, + "learning_rate": 1.731099023403822e-05, + "loss": 0.0748, + "step": 18165 + }, + { + "epoch": 0.85, + "learning_rate": 1.7310206448983433e-05, + "loss": 0.1137, + "step": 18170 + }, + { + "epoch": 0.85, + "learning_rate": 1.7309422663928647e-05, + "loss": 0.1392, + "step": 18175 + }, + { + "epoch": 0.85, + "learning_rate": 1.7308638878873857e-05, + "loss": 0.1052, + "step": 18180 + }, + { + "epoch": 0.85, + "learning_rate": 1.730785509381907e-05, + "loss": 0.1821, + "step": 18185 + }, + { + "epoch": 0.85, + "learning_rate": 1.7307071308764285e-05, + "loss": 0.1918, + "step": 18190 + }, + { + "epoch": 0.85, + "learning_rate": 1.73062875237095e-05, + "loss": 0.3034, + "step": 18195 + }, + { + "epoch": 0.85, + "learning_rate": 1.7305503738654713e-05, + "loss": 0.5586, + "step": 18200 + }, + { + "epoch": 0.85, + "learning_rate": 1.7304719953599927e-05, + "loss": 0.3347, + "step": 18205 + }, + { + "epoch": 0.85, + "learning_rate": 1.730393616854514e-05, + "loss": 0.1197, + "step": 18210 + }, + { + "epoch": 0.85, + "learning_rate": 1.7303152383490355e-05, + "loss": 0.0808, + "step": 18215 + }, + { + "epoch": 0.85, + "learning_rate": 1.7302525355446525e-05, + "loss": 0.1548, + "step": 18220 + }, + { + "epoch": 0.85, + "learning_rate": 1.7301741570391735e-05, + "loss": 0.1336, + "step": 18225 + }, + { + "epoch": 0.85, + "learning_rate": 1.7300957785336953e-05, + "loss": 0.1881, + "step": 18230 + }, + { + "epoch": 0.85, + "learning_rate": 1.7300174000282163e-05, + "loss": 0.1734, + "step": 18235 + }, + { + "epoch": 0.85, + "learning_rate": 1.7299390215227377e-05, + "loss": 0.2273, + "step": 18240 + }, + { + "epoch": 0.85, + "learning_rate": 1.729860643017259e-05, + "loss": 0.3434, + "step": 18245 + }, + { + "epoch": 0.85, + "learning_rate": 1.72978226451178e-05, + "loss": 0.4018, + "step": 18250 + }, + { + "epoch": 0.85, + "learning_rate": 1.729703886006302e-05, + "loss": 0.2318, + "step": 18255 + }, + { + "epoch": 0.85, + "learning_rate": 1.729625507500823e-05, + "loss": 0.0581, + "step": 18260 + }, + { + "epoch": 0.85, + "learning_rate": 1.7295471289953443e-05, + "loss": 0.1156, + "step": 18265 + }, + { + "epoch": 0.85, + "learning_rate": 1.7294687504898657e-05, + "loss": 0.1539, + "step": 18270 + }, + { + "epoch": 0.85, + "learning_rate": 1.729390371984387e-05, + "loss": 0.1465, + "step": 18275 + }, + { + "epoch": 0.85, + "learning_rate": 1.7293119934789085e-05, + "loss": 0.2405, + "step": 18280 + }, + { + "epoch": 0.85, + "learning_rate": 1.72923361497343e-05, + "loss": 0.2068, + "step": 18285 + }, + { + "epoch": 0.85, + "learning_rate": 1.7291552364679513e-05, + "loss": 0.3418, + "step": 18290 + }, + { + "epoch": 0.85, + "learning_rate": 1.7290768579624727e-05, + "loss": 0.3344, + "step": 18295 + }, + { + "epoch": 0.85, + "learning_rate": 1.7289984794569937e-05, + "loss": 0.324, + "step": 18300 + }, + { + "epoch": 0.85, + "learning_rate": 1.7289201009515155e-05, + "loss": 0.2146, + "step": 18305 + }, + { + "epoch": 0.85, + "learning_rate": 1.7288417224460365e-05, + "loss": 0.12, + "step": 18310 + }, + { + "epoch": 0.85, + "learning_rate": 1.728763343940558e-05, + "loss": 0.1088, + "step": 18315 + }, + { + "epoch": 0.85, + "learning_rate": 1.7286849654350793e-05, + "loss": 0.1019, + "step": 18320 + }, + { + "epoch": 0.86, + "learning_rate": 1.7286065869296003e-05, + "loss": 0.1868, + "step": 18325 + }, + { + "epoch": 0.86, + "learning_rate": 1.728528208424122e-05, + "loss": 0.2649, + "step": 18330 + }, + { + "epoch": 0.86, + "learning_rate": 1.728449829918643e-05, + "loss": 0.2723, + "step": 18335 + }, + { + "epoch": 0.86, + "learning_rate": 1.7283714514131645e-05, + "loss": 0.3037, + "step": 18340 + }, + { + "epoch": 0.86, + "learning_rate": 1.728293072907686e-05, + "loss": 0.32, + "step": 18345 + }, + { + "epoch": 0.86, + "learning_rate": 1.7282146944022073e-05, + "loss": 0.6633, + "step": 18350 + }, + { + "epoch": 0.86, + "learning_rate": 1.7281363158967287e-05, + "loss": 0.3423, + "step": 18355 + }, + { + "epoch": 0.86, + "learning_rate": 1.72805793739125e-05, + "loss": 0.0901, + "step": 18360 + }, + { + "epoch": 0.86, + "learning_rate": 1.727979558885771e-05, + "loss": 0.0976, + "step": 18365 + }, + { + "epoch": 0.86, + "learning_rate": 1.727901180380293e-05, + "loss": 0.219, + "step": 18370 + }, + { + "epoch": 0.86, + "learning_rate": 1.727822801874814e-05, + "loss": 0.1546, + "step": 18375 + }, + { + "epoch": 0.86, + "learning_rate": 1.7277444233693353e-05, + "loss": 0.183, + "step": 18380 + }, + { + "epoch": 0.86, + "learning_rate": 1.7276660448638567e-05, + "loss": 0.1376, + "step": 18385 + }, + { + "epoch": 0.86, + "learning_rate": 1.727587666358378e-05, + "loss": 0.2397, + "step": 18390 + }, + { + "epoch": 0.86, + "learning_rate": 1.7275092878528995e-05, + "loss": 0.3504, + "step": 18395 + }, + { + "epoch": 0.86, + "learning_rate": 1.7274309093474205e-05, + "loss": 0.4606, + "step": 18400 + }, + { + "epoch": 0.86, + "learning_rate": 1.7273525308419423e-05, + "loss": 0.1816, + "step": 18405 + }, + { + "epoch": 0.86, + "learning_rate": 1.7272741523364633e-05, + "loss": 0.0837, + "step": 18410 + }, + { + "epoch": 0.86, + "learning_rate": 1.7271957738309847e-05, + "loss": 0.1249, + "step": 18415 + }, + { + "epoch": 0.86, + "learning_rate": 1.727117395325506e-05, + "loss": 0.1805, + "step": 18420 + }, + { + "epoch": 0.86, + "learning_rate": 1.7270390168200275e-05, + "loss": 0.246, + "step": 18425 + }, + { + "epoch": 0.86, + "learning_rate": 1.726960638314549e-05, + "loss": 0.1257, + "step": 18430 + }, + { + "epoch": 0.86, + "learning_rate": 1.7268822598090703e-05, + "loss": 0.1773, + "step": 18435 + }, + { + "epoch": 0.86, + "learning_rate": 1.7268038813035913e-05, + "loss": 0.1636, + "step": 18440 + }, + { + "epoch": 0.86, + "learning_rate": 1.7267255027981127e-05, + "loss": 0.2327, + "step": 18445 + }, + { + "epoch": 0.86, + "learning_rate": 1.726647124292634e-05, + "loss": 0.8099, + "step": 18450 + }, + { + "epoch": 0.86, + "learning_rate": 1.7265687457871555e-05, + "loss": 0.13, + "step": 18455 + }, + { + "epoch": 0.86, + "learning_rate": 1.726490367281677e-05, + "loss": 0.1281, + "step": 18460 + }, + { + "epoch": 0.86, + "learning_rate": 1.726411988776198e-05, + "loss": 0.1082, + "step": 18465 + }, + { + "epoch": 0.86, + "learning_rate": 1.7263336102707197e-05, + "loss": 0.0937, + "step": 18470 + }, + { + "epoch": 0.86, + "learning_rate": 1.7262552317652407e-05, + "loss": 0.1067, + "step": 18475 + }, + { + "epoch": 0.86, + "learning_rate": 1.726176853259762e-05, + "loss": 0.1886, + "step": 18480 + }, + { + "epoch": 0.86, + "learning_rate": 1.7260984747542835e-05, + "loss": 0.1727, + "step": 18485 + }, + { + "epoch": 0.86, + "learning_rate": 1.726020096248805e-05, + "loss": 0.2541, + "step": 18490 + }, + { + "epoch": 0.86, + "learning_rate": 1.7259417177433263e-05, + "loss": 0.3449, + "step": 18495 + }, + { + "epoch": 0.86, + "learning_rate": 1.7258633392378477e-05, + "loss": 0.4496, + "step": 18500 + }, + { + "epoch": 0.86, + "learning_rate": 1.725784960732369e-05, + "loss": 0.2353, + "step": 18505 + }, + { + "epoch": 0.86, + "learning_rate": 1.72570658222689e-05, + "loss": 0.0484, + "step": 18510 + }, + { + "epoch": 0.86, + "learning_rate": 1.7256282037214115e-05, + "loss": 0.0597, + "step": 18515 + }, + { + "epoch": 0.86, + "learning_rate": 1.725549825215933e-05, + "loss": 0.1713, + "step": 18520 + }, + { + "epoch": 0.86, + "learning_rate": 1.7254714467104543e-05, + "loss": 0.1206, + "step": 18525 + }, + { + "epoch": 0.86, + "learning_rate": 1.7253930682049757e-05, + "loss": 0.2763, + "step": 18530 + }, + { + "epoch": 0.86, + "learning_rate": 1.725314689699497e-05, + "loss": 0.1911, + "step": 18535 + }, + { + "epoch": 0.87, + "learning_rate": 1.725236311194018e-05, + "loss": 0.2108, + "step": 18540 + }, + { + "epoch": 0.87, + "learning_rate": 1.72515793268854e-05, + "loss": 0.3533, + "step": 18545 + }, + { + "epoch": 0.87, + "learning_rate": 1.725079554183061e-05, + "loss": 0.3996, + "step": 18550 + }, + { + "epoch": 0.87, + "learning_rate": 1.7250011756775823e-05, + "loss": 0.1907, + "step": 18555 + }, + { + "epoch": 0.87, + "learning_rate": 1.7249227971721037e-05, + "loss": 0.076, + "step": 18560 + }, + { + "epoch": 0.87, + "learning_rate": 1.724844418666625e-05, + "loss": 0.165, + "step": 18565 + }, + { + "epoch": 0.87, + "learning_rate": 1.7247660401611464e-05, + "loss": 0.1086, + "step": 18570 + }, + { + "epoch": 0.87, + "learning_rate": 1.7246876616556675e-05, + "loss": 0.1199, + "step": 18575 + }, + { + "epoch": 0.87, + "learning_rate": 1.724609283150189e-05, + "loss": 0.245, + "step": 18580 + }, + { + "epoch": 0.87, + "learning_rate": 1.7245309046447103e-05, + "loss": 0.1674, + "step": 18585 + }, + { + "epoch": 0.87, + "learning_rate": 1.7244525261392317e-05, + "loss": 0.2862, + "step": 18590 + }, + { + "epoch": 0.87, + "learning_rate": 1.724374147633753e-05, + "loss": 0.4564, + "step": 18595 + }, + { + "epoch": 0.87, + "learning_rate": 1.7242957691282745e-05, + "loss": 0.4542, + "step": 18600 + }, + { + "epoch": 0.87, + "learning_rate": 1.724217390622796e-05, + "loss": 0.2245, + "step": 18605 + }, + { + "epoch": 0.87, + "learning_rate": 1.7241390121173172e-05, + "loss": 0.1992, + "step": 18610 + }, + { + "epoch": 0.87, + "learning_rate": 1.7240606336118383e-05, + "loss": 0.0679, + "step": 18615 + }, + { + "epoch": 0.87, + "learning_rate": 1.72398225510636e-05, + "loss": 0.0884, + "step": 18620 + }, + { + "epoch": 0.87, + "learning_rate": 1.723903876600881e-05, + "loss": 0.1305, + "step": 18625 + }, + { + "epoch": 0.87, + "learning_rate": 1.7238254980954025e-05, + "loss": 0.2016, + "step": 18630 + }, + { + "epoch": 0.87, + "learning_rate": 1.723747119589924e-05, + "loss": 0.196, + "step": 18635 + }, + { + "epoch": 0.87, + "learning_rate": 1.723668741084445e-05, + "loss": 0.1693, + "step": 18640 + }, + { + "epoch": 0.87, + "learning_rate": 1.7235903625789666e-05, + "loss": 0.3803, + "step": 18645 + }, + { + "epoch": 0.87, + "learning_rate": 1.7235119840734877e-05, + "loss": 0.4806, + "step": 18650 + }, + { + "epoch": 0.87, + "learning_rate": 1.723433605568009e-05, + "loss": 0.2328, + "step": 18655 + }, + { + "epoch": 0.87, + "learning_rate": 1.7233552270625305e-05, + "loss": 0.068, + "step": 18660 + }, + { + "epoch": 0.87, + "learning_rate": 1.723276848557052e-05, + "loss": 0.0851, + "step": 18665 + }, + { + "epoch": 0.87, + "learning_rate": 1.7231984700515732e-05, + "loss": 0.1106, + "step": 18670 + }, + { + "epoch": 0.87, + "learning_rate": 1.7231200915460946e-05, + "loss": 0.1496, + "step": 18675 + }, + { + "epoch": 0.87, + "learning_rate": 1.7230417130406157e-05, + "loss": 0.1601, + "step": 18680 + }, + { + "epoch": 0.87, + "learning_rate": 1.7229633345351374e-05, + "loss": 0.2402, + "step": 18685 + }, + { + "epoch": 0.87, + "learning_rate": 1.7228849560296585e-05, + "loss": 0.255, + "step": 18690 + }, + { + "epoch": 0.87, + "learning_rate": 1.72280657752418e-05, + "loss": 0.2822, + "step": 18695 + }, + { + "epoch": 0.87, + "learning_rate": 1.7227281990187012e-05, + "loss": 0.5758, + "step": 18700 + }, + { + "epoch": 0.87, + "learning_rate": 1.7226498205132226e-05, + "loss": 0.1843, + "step": 18705 + }, + { + "epoch": 0.87, + "learning_rate": 1.722571442007744e-05, + "loss": 0.0946, + "step": 18710 + }, + { + "epoch": 0.87, + "learning_rate": 1.722493063502265e-05, + "loss": 0.0383, + "step": 18715 + }, + { + "epoch": 0.87, + "learning_rate": 1.7224146849967868e-05, + "loss": 0.1229, + "step": 18720 + }, + { + "epoch": 0.87, + "learning_rate": 1.722336306491308e-05, + "loss": 0.2175, + "step": 18725 + }, + { + "epoch": 0.87, + "learning_rate": 1.7222579279858293e-05, + "loss": 0.1291, + "step": 18730 + }, + { + "epoch": 0.87, + "learning_rate": 1.7221795494803506e-05, + "loss": 0.1822, + "step": 18735 + }, + { + "epoch": 0.87, + "learning_rate": 1.722101170974872e-05, + "loss": 0.2656, + "step": 18740 + }, + { + "epoch": 0.87, + "learning_rate": 1.7220227924693934e-05, + "loss": 0.2236, + "step": 18745 + }, + { + "epoch": 0.87, + "learning_rate": 1.7219444139639148e-05, + "loss": 0.4038, + "step": 18750 + }, + { + "epoch": 0.88, + "learning_rate": 1.721866035458436e-05, + "loss": 0.1978, + "step": 18755 + }, + { + "epoch": 0.88, + "learning_rate": 1.7217876569529576e-05, + "loss": 0.0921, + "step": 18760 + }, + { + "epoch": 0.88, + "learning_rate": 1.7217092784474786e-05, + "loss": 0.151, + "step": 18765 + }, + { + "epoch": 0.88, + "learning_rate": 1.721630899942e-05, + "loss": 0.1317, + "step": 18770 + }, + { + "epoch": 0.88, + "learning_rate": 1.7215525214365214e-05, + "loss": 0.1417, + "step": 18775 + }, + { + "epoch": 0.88, + "learning_rate": 1.7214741429310425e-05, + "loss": 0.1506, + "step": 18780 + }, + { + "epoch": 0.88, + "learning_rate": 1.7213957644255642e-05, + "loss": 0.1963, + "step": 18785 + }, + { + "epoch": 0.88, + "learning_rate": 1.7213173859200853e-05, + "loss": 0.2705, + "step": 18790 + }, + { + "epoch": 0.88, + "learning_rate": 1.7212390074146067e-05, + "loss": 0.2663, + "step": 18795 + }, + { + "epoch": 0.88, + "learning_rate": 1.721160628909128e-05, + "loss": 0.5767, + "step": 18800 + }, + { + "epoch": 0.88, + "learning_rate": 1.7210822504036494e-05, + "loss": 0.1702, + "step": 18805 + }, + { + "epoch": 0.88, + "learning_rate": 1.7210038718981708e-05, + "loss": 0.0625, + "step": 18810 + }, + { + "epoch": 0.88, + "learning_rate": 1.7209254933926922e-05, + "loss": 0.1013, + "step": 18815 + }, + { + "epoch": 0.88, + "learning_rate": 1.7208471148872136e-05, + "loss": 0.132, + "step": 18820 + }, + { + "epoch": 0.88, + "learning_rate": 1.720768736381735e-05, + "loss": 0.1107, + "step": 18825 + }, + { + "epoch": 0.88, + "learning_rate": 1.720690357876256e-05, + "loss": 0.1782, + "step": 18830 + }, + { + "epoch": 0.88, + "learning_rate": 1.7206119793707774e-05, + "loss": 0.213, + "step": 18835 + }, + { + "epoch": 0.88, + "learning_rate": 1.7205336008652988e-05, + "loss": 0.3126, + "step": 18840 + }, + { + "epoch": 0.88, + "learning_rate": 1.7204552223598202e-05, + "loss": 0.3847, + "step": 18845 + }, + { + "epoch": 0.88, + "learning_rate": 1.7203768438543416e-05, + "loss": 0.5436, + "step": 18850 + }, + { + "epoch": 0.88, + "learning_rate": 1.7202984653488627e-05, + "loss": 0.1875, + "step": 18855 + }, + { + "epoch": 0.88, + "learning_rate": 1.7202200868433844e-05, + "loss": 0.0937, + "step": 18860 + }, + { + "epoch": 0.88, + "learning_rate": 1.7201417083379054e-05, + "loss": 0.1355, + "step": 18865 + }, + { + "epoch": 0.88, + "learning_rate": 1.720063329832427e-05, + "loss": 0.1417, + "step": 18870 + }, + { + "epoch": 0.88, + "learning_rate": 1.7199849513269482e-05, + "loss": 0.1382, + "step": 18875 + }, + { + "epoch": 0.88, + "learning_rate": 1.7199065728214696e-05, + "loss": 0.2395, + "step": 18880 + }, + { + "epoch": 0.88, + "learning_rate": 1.719828194315991e-05, + "loss": 0.2275, + "step": 18885 + }, + { + "epoch": 0.88, + "learning_rate": 1.7197498158105124e-05, + "loss": 0.2511, + "step": 18890 + }, + { + "epoch": 0.88, + "learning_rate": 1.7196714373050334e-05, + "loss": 0.3154, + "step": 18895 + }, + { + "epoch": 0.88, + "learning_rate": 1.719593058799555e-05, + "loss": 0.5231, + "step": 18900 + }, + { + "epoch": 0.88, + "learning_rate": 1.7195146802940762e-05, + "loss": 0.1573, + "step": 18905 + }, + { + "epoch": 0.88, + "learning_rate": 1.7194363017885976e-05, + "loss": 0.0808, + "step": 18910 + }, + { + "epoch": 0.88, + "learning_rate": 1.719357923283119e-05, + "loss": 0.0893, + "step": 18915 + }, + { + "epoch": 0.88, + "learning_rate": 1.7192795447776404e-05, + "loss": 0.1562, + "step": 18920 + }, + { + "epoch": 0.88, + "learning_rate": 1.7192011662721618e-05, + "loss": 0.1937, + "step": 18925 + }, + { + "epoch": 0.88, + "learning_rate": 1.719122787766683e-05, + "loss": 0.1925, + "step": 18930 + }, + { + "epoch": 0.88, + "learning_rate": 1.7190444092612046e-05, + "loss": 0.2791, + "step": 18935 + }, + { + "epoch": 0.88, + "learning_rate": 1.7189660307557256e-05, + "loss": 0.3337, + "step": 18940 + }, + { + "epoch": 0.88, + "learning_rate": 1.718887652250247e-05, + "loss": 0.3198, + "step": 18945 + }, + { + "epoch": 0.88, + "learning_rate": 1.7188092737447684e-05, + "loss": 0.5088, + "step": 18950 + }, + { + "epoch": 0.88, + "learning_rate": 1.7187308952392898e-05, + "loss": 0.1709, + "step": 18955 + }, + { + "epoch": 0.88, + "learning_rate": 1.7186525167338112e-05, + "loss": 0.1017, + "step": 18960 + }, + { + "epoch": 0.88, + "learning_rate": 1.7185741382283322e-05, + "loss": 0.1423, + "step": 18965 + }, + { + "epoch": 0.89, + "learning_rate": 1.7184957597228536e-05, + "loss": 0.1257, + "step": 18970 + }, + { + "epoch": 0.89, + "learning_rate": 1.718417381217375e-05, + "loss": 0.1645, + "step": 18975 + }, + { + "epoch": 0.89, + "learning_rate": 1.7183390027118964e-05, + "loss": 0.1752, + "step": 18980 + }, + { + "epoch": 0.89, + "learning_rate": 1.7182606242064178e-05, + "loss": 0.1956, + "step": 18985 + }, + { + "epoch": 0.89, + "learning_rate": 1.7181822457009392e-05, + "loss": 0.2104, + "step": 18990 + }, + { + "epoch": 0.89, + "learning_rate": 1.7181038671954602e-05, + "loss": 0.2864, + "step": 18995 + }, + { + "epoch": 0.89, + "learning_rate": 1.718025488689982e-05, + "loss": 0.5584, + "step": 19000 + }, + { + "epoch": 0.89, + "learning_rate": 1.717947110184503e-05, + "loss": 0.2235, + "step": 19005 + }, + { + "epoch": 0.89, + "learning_rate": 1.7178687316790244e-05, + "loss": 0.0822, + "step": 19010 + }, + { + "epoch": 0.89, + "learning_rate": 1.7177903531735458e-05, + "loss": 0.1069, + "step": 19015 + }, + { + "epoch": 0.89, + "learning_rate": 1.7177119746680672e-05, + "loss": 0.1213, + "step": 19020 + }, + { + "epoch": 0.89, + "learning_rate": 1.7176335961625886e-05, + "loss": 0.2554, + "step": 19025 + }, + { + "epoch": 0.89, + "learning_rate": 1.7175552176571096e-05, + "loss": 0.1368, + "step": 19030 + }, + { + "epoch": 0.89, + "learning_rate": 1.7174768391516314e-05, + "loss": 0.1717, + "step": 19035 + }, + { + "epoch": 0.89, + "learning_rate": 1.7173984606461524e-05, + "loss": 0.2277, + "step": 19040 + }, + { + "epoch": 0.89, + "learning_rate": 1.7173200821406738e-05, + "loss": 0.2489, + "step": 19045 + }, + { + "epoch": 0.89, + "learning_rate": 1.7172417036351952e-05, + "loss": 0.5192, + "step": 19050 + }, + { + "epoch": 0.89, + "learning_rate": 1.7171633251297166e-05, + "loss": 0.1059, + "step": 19055 + }, + { + "epoch": 0.89, + "learning_rate": 1.717084946624238e-05, + "loss": 0.1134, + "step": 19060 + }, + { + "epoch": 0.89, + "learning_rate": 1.7170065681187594e-05, + "loss": 0.1005, + "step": 19065 + }, + { + "epoch": 0.89, + "learning_rate": 1.7169281896132804e-05, + "loss": 0.079, + "step": 19070 + }, + { + "epoch": 0.89, + "learning_rate": 1.716849811107802e-05, + "loss": 0.1957, + "step": 19075 + }, + { + "epoch": 0.89, + "learning_rate": 1.7167714326023232e-05, + "loss": 0.1822, + "step": 19080 + }, + { + "epoch": 0.89, + "learning_rate": 1.7166930540968446e-05, + "loss": 0.1535, + "step": 19085 + }, + { + "epoch": 0.89, + "learning_rate": 1.716614675591366e-05, + "loss": 0.2625, + "step": 19090 + }, + { + "epoch": 0.89, + "learning_rate": 1.716536297085887e-05, + "loss": 0.3252, + "step": 19095 + }, + { + "epoch": 0.89, + "learning_rate": 1.7164579185804088e-05, + "loss": 0.486, + "step": 19100 + }, + { + "epoch": 0.89, + "learning_rate": 1.7163795400749298e-05, + "loss": 0.1903, + "step": 19105 + }, + { + "epoch": 0.89, + "learning_rate": 1.7163011615694512e-05, + "loss": 0.0703, + "step": 19110 + }, + { + "epoch": 0.89, + "learning_rate": 1.7162227830639726e-05, + "loss": 0.1731, + "step": 19115 + }, + { + "epoch": 0.89, + "learning_rate": 1.716144404558494e-05, + "loss": 0.0951, + "step": 19120 + }, + { + "epoch": 0.89, + "learning_rate": 1.7160660260530154e-05, + "loss": 0.1422, + "step": 19125 + }, + { + "epoch": 0.89, + "learning_rate": 1.7159876475475368e-05, + "loss": 0.1888, + "step": 19130 + }, + { + "epoch": 0.89, + "learning_rate": 1.715909269042058e-05, + "loss": 0.1529, + "step": 19135 + }, + { + "epoch": 0.89, + "learning_rate": 1.7158308905365796e-05, + "loss": 0.1551, + "step": 19140 + }, + { + "epoch": 0.89, + "learning_rate": 1.7157525120311006e-05, + "loss": 0.2719, + "step": 19145 + }, + { + "epoch": 0.89, + "learning_rate": 1.7156741335256223e-05, + "loss": 0.5324, + "step": 19150 + }, + { + "epoch": 0.89, + "learning_rate": 1.7155957550201434e-05, + "loss": 0.2271, + "step": 19155 + }, + { + "epoch": 0.89, + "learning_rate": 1.7155173765146648e-05, + "loss": 0.1467, + "step": 19160 + }, + { + "epoch": 0.89, + "learning_rate": 1.715438998009186e-05, + "loss": 0.0991, + "step": 19165 + }, + { + "epoch": 0.89, + "learning_rate": 1.7153606195037072e-05, + "loss": 0.1359, + "step": 19170 + }, + { + "epoch": 0.89, + "learning_rate": 1.715282240998229e-05, + "loss": 0.1051, + "step": 19175 + }, + { + "epoch": 0.89, + "learning_rate": 1.71520386249275e-05, + "loss": 0.2102, + "step": 19180 + }, + { + "epoch": 0.9, + "learning_rate": 1.7151254839872714e-05, + "loss": 0.2189, + "step": 19185 + }, + { + "epoch": 0.9, + "learning_rate": 1.7150471054817928e-05, + "loss": 0.2397, + "step": 19190 + }, + { + "epoch": 0.9, + "learning_rate": 1.7149687269763142e-05, + "loss": 0.2792, + "step": 19195 + }, + { + "epoch": 0.9, + "learning_rate": 1.7148903484708356e-05, + "loss": 0.4688, + "step": 19200 + }, + { + "epoch": 0.9, + "learning_rate": 1.714811969965357e-05, + "loss": 0.2494, + "step": 19205 + }, + { + "epoch": 0.9, + "learning_rate": 1.714733591459878e-05, + "loss": 0.1013, + "step": 19210 + }, + { + "epoch": 0.9, + "learning_rate": 1.7146552129543997e-05, + "loss": 0.0738, + "step": 19215 + }, + { + "epoch": 0.9, + "learning_rate": 1.7145768344489208e-05, + "loss": 0.1392, + "step": 19220 + }, + { + "epoch": 0.9, + "learning_rate": 1.7144984559434422e-05, + "loss": 0.132, + "step": 19225 + }, + { + "epoch": 0.9, + "learning_rate": 1.7144200774379636e-05, + "loss": 0.1433, + "step": 19230 + }, + { + "epoch": 0.9, + "learning_rate": 1.714341698932485e-05, + "loss": 0.2131, + "step": 19235 + }, + { + "epoch": 0.9, + "learning_rate": 1.7142633204270063e-05, + "loss": 0.237, + "step": 19240 + }, + { + "epoch": 0.9, + "learning_rate": 1.7142006176226234e-05, + "loss": 0.3774, + "step": 19245 + }, + { + "epoch": 0.9, + "learning_rate": 1.7141222391171444e-05, + "loss": 0.4308, + "step": 19250 + }, + { + "epoch": 0.9, + "learning_rate": 1.7140438606116662e-05, + "loss": 0.2426, + "step": 19255 + }, + { + "epoch": 0.9, + "learning_rate": 1.7139654821061872e-05, + "loss": 0.1149, + "step": 19260 + }, + { + "epoch": 0.9, + "learning_rate": 1.7138871036007086e-05, + "loss": 0.0679, + "step": 19265 + }, + { + "epoch": 0.9, + "learning_rate": 1.71380872509523e-05, + "loss": 0.0922, + "step": 19270 + }, + { + "epoch": 0.9, + "learning_rate": 1.7137303465897514e-05, + "loss": 0.1522, + "step": 19275 + }, + { + "epoch": 0.9, + "learning_rate": 1.7136519680842728e-05, + "loss": 0.1422, + "step": 19280 + }, + { + "epoch": 0.9, + "learning_rate": 1.7135735895787942e-05, + "loss": 0.1224, + "step": 19285 + }, + { + "epoch": 0.9, + "learning_rate": 1.7134952110733156e-05, + "loss": 0.27, + "step": 19290 + }, + { + "epoch": 0.9, + "learning_rate": 1.7134168325678366e-05, + "loss": 0.3725, + "step": 19295 + }, + { + "epoch": 0.9, + "learning_rate": 1.713338454062358e-05, + "loss": 0.3632, + "step": 19300 + }, + { + "epoch": 0.9, + "learning_rate": 1.7132600755568794e-05, + "loss": 0.1988, + "step": 19305 + }, + { + "epoch": 0.9, + "learning_rate": 1.7131816970514008e-05, + "loss": 0.0415, + "step": 19310 + }, + { + "epoch": 0.9, + "learning_rate": 1.7131033185459222e-05, + "loss": 0.0715, + "step": 19315 + }, + { + "epoch": 0.9, + "learning_rate": 1.7130249400404436e-05, + "loss": 0.1044, + "step": 19320 + }, + { + "epoch": 0.9, + "learning_rate": 1.7129465615349646e-05, + "loss": 0.2214, + "step": 19325 + }, + { + "epoch": 0.9, + "learning_rate": 1.7128681830294864e-05, + "loss": 0.4414, + "step": 19330 + }, + { + "epoch": 0.9, + "learning_rate": 1.7127898045240074e-05, + "loss": 0.2215, + "step": 19335 + }, + { + "epoch": 0.9, + "learning_rate": 1.7127114260185288e-05, + "loss": 0.2416, + "step": 19340 + }, + { + "epoch": 0.9, + "learning_rate": 1.7126330475130502e-05, + "loss": 0.2983, + "step": 19345 + }, + { + "epoch": 0.9, + "learning_rate": 1.7125546690075716e-05, + "loss": 0.6556, + "step": 19350 + }, + { + "epoch": 0.9, + "learning_rate": 1.712476290502093e-05, + "loss": 0.1771, + "step": 19355 + }, + { + "epoch": 0.9, + "learning_rate": 1.712397911996614e-05, + "loss": 0.1033, + "step": 19360 + }, + { + "epoch": 0.9, + "learning_rate": 1.7123195334911354e-05, + "loss": 0.0999, + "step": 19365 + }, + { + "epoch": 0.9, + "learning_rate": 1.7122411549856568e-05, + "loss": 0.1314, + "step": 19370 + }, + { + "epoch": 0.9, + "learning_rate": 1.7121627764801782e-05, + "loss": 0.1199, + "step": 19375 + }, + { + "epoch": 0.9, + "learning_rate": 1.7120843979746996e-05, + "loss": 0.237, + "step": 19380 + }, + { + "epoch": 0.9, + "learning_rate": 1.712006019469221e-05, + "loss": 0.2092, + "step": 19385 + }, + { + "epoch": 0.9, + "learning_rate": 1.7119276409637424e-05, + "loss": 0.2627, + "step": 19390 + }, + { + "epoch": 0.9, + "learning_rate": 1.7118492624582638e-05, + "loss": 0.3178, + "step": 19395 + }, + { + "epoch": 0.91, + "learning_rate": 1.7117708839527848e-05, + "loss": 0.5211, + "step": 19400 + }, + { + "epoch": 0.91, + "learning_rate": 1.7116925054473062e-05, + "loss": 0.2629, + "step": 19405 + }, + { + "epoch": 0.91, + "learning_rate": 1.7116141269418276e-05, + "loss": 0.0863, + "step": 19410 + }, + { + "epoch": 0.91, + "learning_rate": 1.711535748436349e-05, + "loss": 0.1249, + "step": 19415 + }, + { + "epoch": 0.91, + "learning_rate": 1.7114573699308704e-05, + "loss": 0.1279, + "step": 19420 + }, + { + "epoch": 0.91, + "learning_rate": 1.7113789914253914e-05, + "loss": 0.0951, + "step": 19425 + }, + { + "epoch": 0.91, + "learning_rate": 1.711300612919913e-05, + "loss": 0.1773, + "step": 19430 + }, + { + "epoch": 0.91, + "learning_rate": 1.7112222344144342e-05, + "loss": 0.169, + "step": 19435 + }, + { + "epoch": 0.91, + "learning_rate": 1.7111438559089556e-05, + "loss": 0.1928, + "step": 19440 + }, + { + "epoch": 0.91, + "learning_rate": 1.711065477403477e-05, + "loss": 0.3014, + "step": 19445 + }, + { + "epoch": 0.91, + "learning_rate": 1.7109870988979984e-05, + "loss": 0.5065, + "step": 19450 + }, + { + "epoch": 0.91, + "learning_rate": 1.7109087203925198e-05, + "loss": 0.127, + "step": 19455 + }, + { + "epoch": 0.91, + "learning_rate": 1.710830341887041e-05, + "loss": 0.0899, + "step": 19460 + }, + { + "epoch": 0.91, + "learning_rate": 1.7107519633815622e-05, + "loss": 0.0991, + "step": 19465 + }, + { + "epoch": 0.91, + "learning_rate": 1.710673584876084e-05, + "loss": 0.0852, + "step": 19470 + }, + { + "epoch": 0.91, + "learning_rate": 1.710595206370605e-05, + "loss": 0.1273, + "step": 19475 + }, + { + "epoch": 0.91, + "learning_rate": 1.7105168278651264e-05, + "loss": 0.2085, + "step": 19480 + }, + { + "epoch": 0.91, + "learning_rate": 1.7104384493596478e-05, + "loss": 0.2701, + "step": 19485 + }, + { + "epoch": 0.91, + "learning_rate": 1.710360070854169e-05, + "loss": 0.214, + "step": 19490 + }, + { + "epoch": 0.91, + "learning_rate": 1.7102816923486906e-05, + "loss": 0.3061, + "step": 19495 + }, + { + "epoch": 0.91, + "learning_rate": 1.7102033138432116e-05, + "loss": 0.4987, + "step": 19500 + }, + { + "epoch": 0.91, + "learning_rate": 1.710124935337733e-05, + "loss": 0.2229, + "step": 19505 + }, + { + "epoch": 0.91, + "learning_rate": 1.7100465568322544e-05, + "loss": 0.073, + "step": 19510 + }, + { + "epoch": 0.91, + "learning_rate": 1.7099681783267758e-05, + "loss": 0.0574, + "step": 19515 + }, + { + "epoch": 0.91, + "learning_rate": 1.709889799821297e-05, + "loss": 0.0993, + "step": 19520 + }, + { + "epoch": 0.91, + "learning_rate": 1.7098114213158186e-05, + "loss": 0.1771, + "step": 19525 + }, + { + "epoch": 0.91, + "learning_rate": 1.70973304281034e-05, + "loss": 0.0893, + "step": 19530 + }, + { + "epoch": 0.91, + "learning_rate": 1.7096546643048613e-05, + "loss": 0.1401, + "step": 19535 + }, + { + "epoch": 0.91, + "learning_rate": 1.7095762857993824e-05, + "loss": 0.2401, + "step": 19540 + }, + { + "epoch": 0.91, + "learning_rate": 1.709497907293904e-05, + "loss": 0.2492, + "step": 19545 + }, + { + "epoch": 0.91, + "learning_rate": 1.709419528788425e-05, + "loss": 0.4506, + "step": 19550 + }, + { + "epoch": 0.91, + "learning_rate": 1.7093411502829466e-05, + "loss": 0.1444, + "step": 19555 + }, + { + "epoch": 0.91, + "learning_rate": 1.709262771777468e-05, + "loss": 0.0706, + "step": 19560 + }, + { + "epoch": 0.91, + "learning_rate": 1.709184393271989e-05, + "loss": 0.0977, + "step": 19565 + }, + { + "epoch": 0.91, + "learning_rate": 1.7091060147665107e-05, + "loss": 0.1293, + "step": 19570 + }, + { + "epoch": 0.91, + "learning_rate": 1.7090276362610318e-05, + "loss": 0.1469, + "step": 19575 + }, + { + "epoch": 0.91, + "learning_rate": 1.7089492577555532e-05, + "loss": 0.1896, + "step": 19580 + }, + { + "epoch": 0.91, + "learning_rate": 1.7088708792500746e-05, + "loss": 0.1649, + "step": 19585 + }, + { + "epoch": 0.91, + "learning_rate": 1.708792500744596e-05, + "loss": 0.2219, + "step": 19590 + }, + { + "epoch": 0.91, + "learning_rate": 1.7087141222391173e-05, + "loss": 0.2975, + "step": 19595 + }, + { + "epoch": 0.91, + "learning_rate": 1.7086357437336387e-05, + "loss": 0.7063, + "step": 19600 + }, + { + "epoch": 0.91, + "learning_rate": 1.70855736522816e-05, + "loss": 0.1183, + "step": 19605 + }, + { + "epoch": 0.92, + "learning_rate": 1.7084789867226815e-05, + "loss": 0.0876, + "step": 19610 + }, + { + "epoch": 0.92, + "learning_rate": 1.7084006082172026e-05, + "loss": 0.3809, + "step": 19615 + }, + { + "epoch": 0.92, + "learning_rate": 1.708322229711724e-05, + "loss": 0.1294, + "step": 19620 + }, + { + "epoch": 0.92, + "learning_rate": 1.7082438512062454e-05, + "loss": 0.1697, + "step": 19625 + }, + { + "epoch": 0.92, + "learning_rate": 1.7081654727007667e-05, + "loss": 0.224, + "step": 19630 + }, + { + "epoch": 0.92, + "learning_rate": 1.708087094195288e-05, + "loss": 0.1926, + "step": 19635 + }, + { + "epoch": 0.92, + "learning_rate": 1.7080087156898092e-05, + "loss": 0.295, + "step": 19640 + }, + { + "epoch": 0.92, + "learning_rate": 1.707930337184331e-05, + "loss": 0.3733, + "step": 19645 + }, + { + "epoch": 0.92, + "learning_rate": 1.707851958678852e-05, + "loss": 0.4389, + "step": 19650 + }, + { + "epoch": 0.92, + "learning_rate": 1.7077735801733734e-05, + "loss": 0.1884, + "step": 19655 + }, + { + "epoch": 0.92, + "learning_rate": 1.7076952016678947e-05, + "loss": 0.0577, + "step": 19660 + }, + { + "epoch": 0.92, + "learning_rate": 1.707616823162416e-05, + "loss": 0.0889, + "step": 19665 + }, + { + "epoch": 0.92, + "learning_rate": 1.7075384446569375e-05, + "loss": 0.0989, + "step": 19670 + }, + { + "epoch": 0.92, + "learning_rate": 1.707460066151459e-05, + "loss": 0.1449, + "step": 19675 + }, + { + "epoch": 0.92, + "learning_rate": 1.70738168764598e-05, + "loss": 0.1931, + "step": 19680 + }, + { + "epoch": 0.92, + "learning_rate": 1.7073033091405014e-05, + "loss": 0.1717, + "step": 19685 + }, + { + "epoch": 0.92, + "learning_rate": 1.7072249306350228e-05, + "loss": 0.2924, + "step": 19690 + }, + { + "epoch": 0.92, + "learning_rate": 1.707146552129544e-05, + "loss": 0.2265, + "step": 19695 + }, + { + "epoch": 0.92, + "learning_rate": 1.7070681736240655e-05, + "loss": 0.3034, + "step": 19700 + }, + { + "epoch": 0.92, + "learning_rate": 1.706989795118587e-05, + "loss": 0.2481, + "step": 19705 + }, + { + "epoch": 0.92, + "learning_rate": 1.7069114166131083e-05, + "loss": 0.0638, + "step": 19710 + }, + { + "epoch": 0.92, + "learning_rate": 1.7068330381076294e-05, + "loss": 0.0919, + "step": 19715 + }, + { + "epoch": 0.92, + "learning_rate": 1.7067546596021508e-05, + "loss": 0.095, + "step": 19720 + }, + { + "epoch": 0.92, + "learning_rate": 1.706676281096672e-05, + "loss": 0.1459, + "step": 19725 + }, + { + "epoch": 0.92, + "learning_rate": 1.7065979025911935e-05, + "loss": 0.1144, + "step": 19730 + }, + { + "epoch": 0.92, + "learning_rate": 1.706519524085715e-05, + "loss": 0.2965, + "step": 19735 + }, + { + "epoch": 0.92, + "learning_rate": 1.7064411455802363e-05, + "loss": 0.18, + "step": 19740 + }, + { + "epoch": 0.92, + "learning_rate": 1.7063627670747577e-05, + "loss": 0.4294, + "step": 19745 + }, + { + "epoch": 0.92, + "learning_rate": 1.7062843885692788e-05, + "loss": 0.7633, + "step": 19750 + }, + { + "epoch": 0.92, + "learning_rate": 1.7062060100638e-05, + "loss": 0.1876, + "step": 19755 + }, + { + "epoch": 0.92, + "learning_rate": 1.7061276315583215e-05, + "loss": 0.1402, + "step": 19760 + }, + { + "epoch": 0.92, + "learning_rate": 1.706049253052843e-05, + "loss": 0.0744, + "step": 19765 + }, + { + "epoch": 0.92, + "learning_rate": 1.7059708745473643e-05, + "loss": 0.1534, + "step": 19770 + }, + { + "epoch": 0.92, + "learning_rate": 1.7058924960418857e-05, + "loss": 0.105, + "step": 19775 + }, + { + "epoch": 0.92, + "learning_rate": 1.7058141175364068e-05, + "loss": 0.2081, + "step": 19780 + }, + { + "epoch": 0.92, + "learning_rate": 1.7057357390309285e-05, + "loss": 0.1488, + "step": 19785 + }, + { + "epoch": 0.92, + "learning_rate": 1.7056573605254495e-05, + "loss": 0.2616, + "step": 19790 + }, + { + "epoch": 0.92, + "learning_rate": 1.705578982019971e-05, + "loss": 0.2662, + "step": 19795 + }, + { + "epoch": 0.92, + "learning_rate": 1.7055006035144923e-05, + "loss": 0.3883, + "step": 19800 + }, + { + "epoch": 0.92, + "learning_rate": 1.7054222250090137e-05, + "loss": 0.1609, + "step": 19805 + }, + { + "epoch": 0.92, + "learning_rate": 1.705343846503535e-05, + "loss": 0.0799, + "step": 19810 + }, + { + "epoch": 0.92, + "learning_rate": 1.705265467998056e-05, + "loss": 0.087, + "step": 19815 + }, + { + "epoch": 0.92, + "learning_rate": 1.7051870894925775e-05, + "loss": 0.194, + "step": 19820 + }, + { + "epoch": 0.93, + "learning_rate": 1.705108710987099e-05, + "loss": 0.1103, + "step": 19825 + }, + { + "epoch": 0.93, + "learning_rate": 1.7050303324816203e-05, + "loss": 0.1893, + "step": 19830 + }, + { + "epoch": 0.93, + "learning_rate": 1.7049519539761417e-05, + "loss": 0.2444, + "step": 19835 + }, + { + "epoch": 0.93, + "learning_rate": 1.704873575470663e-05, + "loss": 0.2429, + "step": 19840 + }, + { + "epoch": 0.93, + "learning_rate": 1.7047951969651845e-05, + "loss": 0.3144, + "step": 19845 + }, + { + "epoch": 0.93, + "learning_rate": 1.704716818459706e-05, + "loss": 0.4834, + "step": 19850 + }, + { + "epoch": 0.93, + "learning_rate": 1.704638439954227e-05, + "loss": 0.1951, + "step": 19855 + }, + { + "epoch": 0.93, + "learning_rate": 1.7045600614487487e-05, + "loss": 0.0849, + "step": 19860 + }, + { + "epoch": 0.93, + "learning_rate": 1.7044816829432697e-05, + "loss": 0.1384, + "step": 19865 + }, + { + "epoch": 0.93, + "learning_rate": 1.704403304437791e-05, + "loss": 0.1505, + "step": 19870 + }, + { + "epoch": 0.93, + "learning_rate": 1.7043249259323125e-05, + "loss": 0.154, + "step": 19875 + }, + { + "epoch": 0.93, + "learning_rate": 1.7042465474268336e-05, + "loss": 0.2354, + "step": 19880 + }, + { + "epoch": 0.93, + "learning_rate": 1.7041681689213553e-05, + "loss": 0.2192, + "step": 19885 + }, + { + "epoch": 0.93, + "learning_rate": 1.7040897904158763e-05, + "loss": 0.2718, + "step": 19890 + }, + { + "epoch": 0.93, + "learning_rate": 1.7040114119103977e-05, + "loss": 0.3056, + "step": 19895 + }, + { + "epoch": 0.93, + "learning_rate": 1.703933033404919e-05, + "loss": 0.4475, + "step": 19900 + }, + { + "epoch": 0.93, + "learning_rate": 1.7038546548994405e-05, + "loss": 0.2562, + "step": 19905 + }, + { + "epoch": 0.93, + "learning_rate": 1.703776276393962e-05, + "loss": 0.0562, + "step": 19910 + }, + { + "epoch": 0.93, + "learning_rate": 1.7036978978884833e-05, + "loss": 0.1212, + "step": 19915 + }, + { + "epoch": 0.93, + "learning_rate": 1.7036195193830047e-05, + "loss": 0.0697, + "step": 19920 + }, + { + "epoch": 0.93, + "learning_rate": 1.703541140877526e-05, + "loss": 0.1794, + "step": 19925 + }, + { + "epoch": 0.93, + "learning_rate": 1.703462762372047e-05, + "loss": 0.1976, + "step": 19930 + }, + { + "epoch": 0.93, + "learning_rate": 1.7033843838665685e-05, + "loss": 0.185, + "step": 19935 + }, + { + "epoch": 0.93, + "learning_rate": 1.70330600536109e-05, + "loss": 0.2454, + "step": 19940 + }, + { + "epoch": 0.93, + "learning_rate": 1.7032276268556113e-05, + "loss": 0.1997, + "step": 19945 + }, + { + "epoch": 0.93, + "learning_rate": 1.7031492483501327e-05, + "loss": 0.6297, + "step": 19950 + }, + { + "epoch": 0.93, + "learning_rate": 1.7030708698446537e-05, + "loss": 0.1574, + "step": 19955 + }, + { + "epoch": 0.93, + "learning_rate": 1.7029924913391755e-05, + "loss": 0.066, + "step": 19960 + }, + { + "epoch": 0.93, + "learning_rate": 1.7029141128336965e-05, + "loss": 0.1136, + "step": 19965 + }, + { + "epoch": 0.93, + "learning_rate": 1.702835734328218e-05, + "loss": 0.1132, + "step": 19970 + }, + { + "epoch": 0.93, + "learning_rate": 1.7027573558227393e-05, + "loss": 0.1921, + "step": 19975 + }, + { + "epoch": 0.93, + "learning_rate": 1.7026789773172607e-05, + "loss": 0.1347, + "step": 19980 + }, + { + "epoch": 0.93, + "learning_rate": 1.702600598811782e-05, + "loss": 0.2405, + "step": 19985 + }, + { + "epoch": 0.93, + "learning_rate": 1.7025222203063035e-05, + "loss": 0.2259, + "step": 19990 + }, + { + "epoch": 0.93, + "learning_rate": 1.7024438418008245e-05, + "loss": 0.3136, + "step": 19995 + }, + { + "epoch": 0.93, + "learning_rate": 1.7023654632953463e-05, + "loss": 0.3853, + "step": 20000 + }, + { + "epoch": 0.93, + "learning_rate": 1.7022870847898673e-05, + "loss": 0.2158, + "step": 20005 + }, + { + "epoch": 0.93, + "learning_rate": 1.7022087062843887e-05, + "loss": 0.0802, + "step": 20010 + }, + { + "epoch": 0.93, + "learning_rate": 1.70213032777891e-05, + "loss": 0.0745, + "step": 20015 + }, + { + "epoch": 0.93, + "learning_rate": 1.7020519492734315e-05, + "loss": 0.1353, + "step": 20020 + }, + { + "epoch": 0.93, + "learning_rate": 1.701973570767953e-05, + "loss": 0.1455, + "step": 20025 + }, + { + "epoch": 0.93, + "learning_rate": 1.701895192262474e-05, + "loss": 0.1117, + "step": 20030 + }, + { + "epoch": 0.93, + "learning_rate": 1.7018168137569953e-05, + "loss": 0.1912, + "step": 20035 + }, + { + "epoch": 0.94, + "learning_rate": 1.7017384352515167e-05, + "loss": 0.211, + "step": 20040 + }, + { + "epoch": 0.94, + "learning_rate": 1.701660056746038e-05, + "loss": 0.2398, + "step": 20045 + }, + { + "epoch": 0.94, + "learning_rate": 1.7015816782405595e-05, + "loss": 0.6622, + "step": 20050 + }, + { + "epoch": 0.94, + "learning_rate": 1.701503299735081e-05, + "loss": 0.1905, + "step": 20055 + }, + { + "epoch": 0.94, + "learning_rate": 1.7014249212296023e-05, + "loss": 0.0718, + "step": 20060 + }, + { + "epoch": 0.94, + "learning_rate": 1.7013465427241237e-05, + "loss": 0.0488, + "step": 20065 + }, + { + "epoch": 0.94, + "learning_rate": 1.7012681642186447e-05, + "loss": 0.1622, + "step": 20070 + }, + { + "epoch": 0.94, + "learning_rate": 1.701189785713166e-05, + "loss": 0.1579, + "step": 20075 + }, + { + "epoch": 0.94, + "learning_rate": 1.7011114072076875e-05, + "loss": 0.1167, + "step": 20080 + }, + { + "epoch": 0.94, + "learning_rate": 1.701033028702209e-05, + "loss": 0.1729, + "step": 20085 + }, + { + "epoch": 0.94, + "learning_rate": 1.7009546501967303e-05, + "loss": 0.1643, + "step": 20090 + }, + { + "epoch": 0.94, + "learning_rate": 1.7008762716912513e-05, + "loss": 0.3448, + "step": 20095 + }, + { + "epoch": 0.94, + "learning_rate": 1.700797893185773e-05, + "loss": 0.4274, + "step": 20100 + }, + { + "epoch": 0.94, + "learning_rate": 1.700719514680294e-05, + "loss": 0.1908, + "step": 20105 + }, + { + "epoch": 0.94, + "learning_rate": 1.7006411361748155e-05, + "loss": 0.0368, + "step": 20110 + }, + { + "epoch": 0.94, + "learning_rate": 1.700562757669337e-05, + "loss": 0.0898, + "step": 20115 + }, + { + "epoch": 0.94, + "learning_rate": 1.7004843791638583e-05, + "loss": 0.1293, + "step": 20120 + }, + { + "epoch": 0.94, + "learning_rate": 1.7004060006583797e-05, + "loss": 0.1416, + "step": 20125 + }, + { + "epoch": 0.94, + "learning_rate": 1.700327622152901e-05, + "loss": 0.1573, + "step": 20130 + }, + { + "epoch": 0.94, + "learning_rate": 1.700249243647422e-05, + "loss": 0.1809, + "step": 20135 + }, + { + "epoch": 0.94, + "learning_rate": 1.7001708651419435e-05, + "loss": 0.3192, + "step": 20140 + }, + { + "epoch": 0.94, + "learning_rate": 1.700092486636465e-05, + "loss": 0.2653, + "step": 20145 + }, + { + "epoch": 0.94, + "learning_rate": 1.7000141081309863e-05, + "loss": 0.5466, + "step": 20150 + }, + { + "epoch": 0.94, + "learning_rate": 1.6999357296255077e-05, + "loss": 0.1971, + "step": 20155 + }, + { + "epoch": 0.94, + "learning_rate": 1.699857351120029e-05, + "loss": 0.1222, + "step": 20160 + }, + { + "epoch": 0.94, + "learning_rate": 1.6997789726145505e-05, + "loss": 0.1325, + "step": 20165 + }, + { + "epoch": 0.94, + "learning_rate": 1.6997005941090715e-05, + "loss": 0.0593, + "step": 20170 + }, + { + "epoch": 0.94, + "learning_rate": 1.6996222156035932e-05, + "loss": 0.1517, + "step": 20175 + }, + { + "epoch": 0.94, + "learning_rate": 1.6995438370981143e-05, + "loss": 0.1367, + "step": 20180 + }, + { + "epoch": 0.94, + "learning_rate": 1.6994654585926357e-05, + "loss": 0.2145, + "step": 20185 + }, + { + "epoch": 0.94, + "learning_rate": 1.699387080087157e-05, + "loss": 0.2504, + "step": 20190 + }, + { + "epoch": 0.94, + "learning_rate": 1.6993087015816785e-05, + "loss": 0.3664, + "step": 20195 + }, + { + "epoch": 0.94, + "learning_rate": 1.6992303230762e-05, + "loss": 0.4137, + "step": 20200 + }, + { + "epoch": 0.94, + "learning_rate": 1.699151944570721e-05, + "loss": 0.2094, + "step": 20205 + }, + { + "epoch": 0.94, + "learning_rate": 1.6990735660652423e-05, + "loss": 0.0852, + "step": 20210 + }, + { + "epoch": 0.94, + "learning_rate": 1.6989951875597637e-05, + "loss": 0.0603, + "step": 20215 + }, + { + "epoch": 0.94, + "learning_rate": 1.698916809054285e-05, + "loss": 0.161, + "step": 20220 + }, + { + "epoch": 0.94, + "learning_rate": 1.6988384305488065e-05, + "loss": 0.106, + "step": 20225 + }, + { + "epoch": 0.94, + "learning_rate": 1.698760052043328e-05, + "loss": 0.1538, + "step": 20230 + }, + { + "epoch": 0.94, + "learning_rate": 1.6986816735378492e-05, + "loss": 0.1602, + "step": 20235 + }, + { + "epoch": 0.94, + "learning_rate": 1.6986032950323706e-05, + "loss": 0.2877, + "step": 20240 + }, + { + "epoch": 0.94, + "learning_rate": 1.6985249165268917e-05, + "loss": 0.4337, + "step": 20245 + }, + { + "epoch": 0.94, + "learning_rate": 1.698446538021413e-05, + "loss": 0.6768, + "step": 20250 + }, + { + "epoch": 0.95, + "learning_rate": 1.6983681595159345e-05, + "loss": 0.2147, + "step": 20255 + }, + { + "epoch": 0.95, + "learning_rate": 1.698289781010456e-05, + "loss": 0.1292, + "step": 20260 + }, + { + "epoch": 0.95, + "learning_rate": 1.6982114025049772e-05, + "loss": 0.1835, + "step": 20265 + }, + { + "epoch": 0.95, + "learning_rate": 1.6981330239994983e-05, + "loss": 0.1112, + "step": 20270 + }, + { + "epoch": 0.95, + "learning_rate": 1.69805464549402e-05, + "loss": 0.1764, + "step": 20275 + }, + { + "epoch": 0.95, + "learning_rate": 1.697976266988541e-05, + "loss": 0.1477, + "step": 20280 + }, + { + "epoch": 0.95, + "learning_rate": 1.6978978884830625e-05, + "loss": 0.257, + "step": 20285 + }, + { + "epoch": 0.95, + "learning_rate": 1.697819509977584e-05, + "loss": 0.2419, + "step": 20290 + }, + { + "epoch": 0.95, + "learning_rate": 1.6977411314721053e-05, + "loss": 0.2276, + "step": 20295 + }, + { + "epoch": 0.95, + "learning_rate": 1.6976627529666266e-05, + "loss": 0.3961, + "step": 20300 + }, + { + "epoch": 0.95, + "learning_rate": 1.697584374461148e-05, + "loss": 0.3068, + "step": 20305 + }, + { + "epoch": 0.95, + "learning_rate": 1.697505995955669e-05, + "loss": 0.0723, + "step": 20310 + }, + { + "epoch": 0.95, + "learning_rate": 1.6974276174501908e-05, + "loss": 0.0967, + "step": 20315 + }, + { + "epoch": 0.95, + "learning_rate": 1.697349238944712e-05, + "loss": 0.1151, + "step": 20320 + }, + { + "epoch": 0.95, + "learning_rate": 1.6972708604392333e-05, + "loss": 0.1615, + "step": 20325 + }, + { + "epoch": 0.95, + "learning_rate": 1.6971924819337546e-05, + "loss": 0.219, + "step": 20330 + }, + { + "epoch": 0.95, + "learning_rate": 1.697114103428276e-05, + "loss": 0.2574, + "step": 20335 + }, + { + "epoch": 0.95, + "learning_rate": 1.6970357249227974e-05, + "loss": 0.2705, + "step": 20340 + }, + { + "epoch": 0.95, + "learning_rate": 1.6969573464173185e-05, + "loss": 0.2993, + "step": 20345 + }, + { + "epoch": 0.95, + "learning_rate": 1.69687896791184e-05, + "loss": 0.6279, + "step": 20350 + }, + { + "epoch": 0.95, + "learning_rate": 1.6968005894063613e-05, + "loss": 0.1919, + "step": 20355 + }, + { + "epoch": 0.95, + "learning_rate": 1.6967222109008826e-05, + "loss": 0.0439, + "step": 20360 + }, + { + "epoch": 0.95, + "learning_rate": 1.696643832395404e-05, + "loss": 0.0789, + "step": 20365 + }, + { + "epoch": 0.95, + "learning_rate": 1.6965654538899254e-05, + "loss": 0.1537, + "step": 20370 + }, + { + "epoch": 0.95, + "learning_rate": 1.6964870753844468e-05, + "loss": 0.0927, + "step": 20375 + }, + { + "epoch": 0.95, + "learning_rate": 1.6964086968789682e-05, + "loss": 0.1653, + "step": 20380 + }, + { + "epoch": 0.95, + "learning_rate": 1.6963303183734893e-05, + "loss": 0.2245, + "step": 20385 + }, + { + "epoch": 0.95, + "learning_rate": 1.696251939868011e-05, + "loss": 0.2526, + "step": 20390 + }, + { + "epoch": 0.95, + "learning_rate": 1.696173561362532e-05, + "loss": 0.2687, + "step": 20395 + }, + { + "epoch": 0.95, + "learning_rate": 1.6960951828570534e-05, + "loss": 0.6334, + "step": 20400 + }, + { + "epoch": 0.95, + "learning_rate": 1.6960168043515748e-05, + "loss": 0.1858, + "step": 20405 + }, + { + "epoch": 0.95, + "learning_rate": 1.695938425846096e-05, + "loss": 0.0953, + "step": 20410 + }, + { + "epoch": 0.95, + "learning_rate": 1.6958600473406176e-05, + "loss": 0.1384, + "step": 20415 + }, + { + "epoch": 0.95, + "learning_rate": 1.6957816688351387e-05, + "loss": 0.1128, + "step": 20420 + }, + { + "epoch": 0.95, + "learning_rate": 1.69570329032966e-05, + "loss": 0.1004, + "step": 20425 + }, + { + "epoch": 0.95, + "learning_rate": 1.6956249118241814e-05, + "loss": 0.1117, + "step": 20430 + }, + { + "epoch": 0.95, + "learning_rate": 1.695546533318703e-05, + "loss": 0.2534, + "step": 20435 + }, + { + "epoch": 0.95, + "learning_rate": 1.6954681548132242e-05, + "loss": 0.23, + "step": 20440 + }, + { + "epoch": 0.95, + "learning_rate": 1.6953897763077456e-05, + "loss": 0.2366, + "step": 20445 + }, + { + "epoch": 0.95, + "learning_rate": 1.695311397802267e-05, + "loss": 0.4956, + "step": 20450 + }, + { + "epoch": 0.95, + "learning_rate": 1.6952330192967884e-05, + "loss": 0.2167, + "step": 20455 + }, + { + "epoch": 0.95, + "learning_rate": 1.6951546407913094e-05, + "loss": 0.0958, + "step": 20460 + }, + { + "epoch": 0.95, + "learning_rate": 1.695076262285831e-05, + "loss": 0.0566, + "step": 20465 + }, + { + "epoch": 0.96, + "learning_rate": 1.6949978837803522e-05, + "loss": 0.1221, + "step": 20470 + }, + { + "epoch": 0.96, + "learning_rate": 1.6949195052748736e-05, + "loss": 0.0884, + "step": 20475 + }, + { + "epoch": 0.96, + "learning_rate": 1.694841126769395e-05, + "loss": 0.2541, + "step": 20480 + }, + { + "epoch": 0.96, + "learning_rate": 1.694762748263916e-05, + "loss": 0.2203, + "step": 20485 + }, + { + "epoch": 0.96, + "learning_rate": 1.6946843697584378e-05, + "loss": 0.1863, + "step": 20490 + }, + { + "epoch": 0.96, + "learning_rate": 1.694605991252959e-05, + "loss": 0.2929, + "step": 20495 + }, + { + "epoch": 0.96, + "learning_rate": 1.6945276127474802e-05, + "loss": 0.271, + "step": 20500 + }, + { + "epoch": 0.96, + "learning_rate": 1.6944492342420016e-05, + "loss": 0.1831, + "step": 20505 + }, + { + "epoch": 0.96, + "learning_rate": 1.694370855736523e-05, + "loss": 0.0501, + "step": 20510 + }, + { + "epoch": 0.96, + "learning_rate": 1.6942924772310444e-05, + "loss": 0.1323, + "step": 20515 + }, + { + "epoch": 0.96, + "learning_rate": 1.6942140987255658e-05, + "loss": 0.2169, + "step": 20520 + }, + { + "epoch": 0.96, + "learning_rate": 1.694135720220087e-05, + "loss": 0.1172, + "step": 20525 + }, + { + "epoch": 0.96, + "learning_rate": 1.6940573417146082e-05, + "loss": 0.2164, + "step": 20530 + }, + { + "epoch": 0.96, + "learning_rate": 1.6939789632091296e-05, + "loss": 0.1629, + "step": 20535 + }, + { + "epoch": 0.96, + "learning_rate": 1.693900584703651e-05, + "loss": 0.2959, + "step": 20540 + }, + { + "epoch": 0.96, + "learning_rate": 1.6938222061981724e-05, + "loss": 0.3087, + "step": 20545 + }, + { + "epoch": 0.96, + "learning_rate": 1.6937438276926938e-05, + "loss": 0.7987, + "step": 20550 + }, + { + "epoch": 0.96, + "learning_rate": 1.6936654491872152e-05, + "loss": 0.2158, + "step": 20555 + }, + { + "epoch": 0.96, + "learning_rate": 1.6935870706817362e-05, + "loss": 0.0551, + "step": 20560 + }, + { + "epoch": 0.96, + "learning_rate": 1.6935086921762576e-05, + "loss": 0.0877, + "step": 20565 + }, + { + "epoch": 0.96, + "learning_rate": 1.693430313670779e-05, + "loss": 0.1039, + "step": 20570 + }, + { + "epoch": 0.96, + "learning_rate": 1.6933519351653004e-05, + "loss": 0.1547, + "step": 20575 + }, + { + "epoch": 0.96, + "learning_rate": 1.6932735566598218e-05, + "loss": 0.167, + "step": 20580 + }, + { + "epoch": 0.96, + "learning_rate": 1.6931951781543432e-05, + "loss": 0.1186, + "step": 20585 + }, + { + "epoch": 0.96, + "learning_rate": 1.6931167996488646e-05, + "loss": 0.1959, + "step": 20590 + }, + { + "epoch": 0.96, + "learning_rate": 1.6930384211433856e-05, + "loss": 0.1781, + "step": 20595 + }, + { + "epoch": 0.96, + "learning_rate": 1.692960042637907e-05, + "loss": 0.4715, + "step": 20600 + }, + { + "epoch": 0.96, + "learning_rate": 1.6928816641324284e-05, + "loss": 0.2431, + "step": 20605 + }, + { + "epoch": 0.96, + "learning_rate": 1.6928032856269498e-05, + "loss": 0.0756, + "step": 20610 + }, + { + "epoch": 0.96, + "learning_rate": 1.6927249071214712e-05, + "loss": 0.0938, + "step": 20615 + }, + { + "epoch": 0.96, + "learning_rate": 1.6926465286159926e-05, + "loss": 0.1711, + "step": 20620 + }, + { + "epoch": 0.96, + "learning_rate": 1.6925681501105136e-05, + "loss": 0.1379, + "step": 20625 + }, + { + "epoch": 0.96, + "learning_rate": 1.6924897716050354e-05, + "loss": 0.1878, + "step": 20630 + }, + { + "epoch": 0.96, + "learning_rate": 1.6924113930995564e-05, + "loss": 0.2055, + "step": 20635 + }, + { + "epoch": 0.96, + "learning_rate": 1.6923330145940778e-05, + "loss": 0.1536, + "step": 20640 + }, + { + "epoch": 0.96, + "learning_rate": 1.6922546360885992e-05, + "loss": 0.2507, + "step": 20645 + }, + { + "epoch": 0.96, + "learning_rate": 1.6921762575831206e-05, + "loss": 0.4966, + "step": 20650 + }, + { + "epoch": 0.96, + "learning_rate": 1.692097879077642e-05, + "loss": 0.1616, + "step": 20655 + }, + { + "epoch": 0.96, + "learning_rate": 1.692019500572163e-05, + "loss": 0.0673, + "step": 20660 + }, + { + "epoch": 0.96, + "learning_rate": 1.6919411220666844e-05, + "loss": 0.0905, + "step": 20665 + }, + { + "epoch": 0.96, + "learning_rate": 1.6918627435612058e-05, + "loss": 0.1219, + "step": 20670 + }, + { + "epoch": 0.96, + "learning_rate": 1.6917843650557272e-05, + "loss": 0.0941, + "step": 20675 + }, + { + "epoch": 0.96, + "learning_rate": 1.6917059865502486e-05, + "loss": 0.1362, + "step": 20680 + }, + { + "epoch": 0.97, + "learning_rate": 1.69162760804477e-05, + "loss": 0.1788, + "step": 20685 + }, + { + "epoch": 0.97, + "learning_rate": 1.6915492295392914e-05, + "loss": 0.2372, + "step": 20690 + }, + { + "epoch": 0.97, + "learning_rate": 1.6914708510338128e-05, + "loss": 0.2672, + "step": 20695 + }, + { + "epoch": 0.97, + "learning_rate": 1.6913924725283338e-05, + "loss": 0.409, + "step": 20700 + }, + { + "epoch": 0.97, + "learning_rate": 1.6913140940228556e-05, + "loss": 0.1966, + "step": 20705 + }, + { + "epoch": 0.97, + "learning_rate": 1.6912357155173766e-05, + "loss": 0.0573, + "step": 20710 + }, + { + "epoch": 0.97, + "learning_rate": 1.691157337011898e-05, + "loss": 0.0683, + "step": 20715 + }, + { + "epoch": 0.97, + "learning_rate": 1.6910789585064194e-05, + "loss": 0.0474, + "step": 20720 + }, + { + "epoch": 0.97, + "learning_rate": 1.6910005800009404e-05, + "loss": 0.0682, + "step": 20725 + }, + { + "epoch": 0.97, + "learning_rate": 1.690922201495462e-05, + "loss": 0.1241, + "step": 20730 + }, + { + "epoch": 0.97, + "learning_rate": 1.6908438229899832e-05, + "loss": 0.1928, + "step": 20735 + }, + { + "epoch": 0.97, + "learning_rate": 1.6907654444845046e-05, + "loss": 0.23, + "step": 20740 + }, + { + "epoch": 0.97, + "learning_rate": 1.690687065979026e-05, + "loss": 0.3035, + "step": 20745 + }, + { + "epoch": 0.97, + "learning_rate": 1.6906086874735474e-05, + "loss": 0.443, + "step": 20750 + }, + { + "epoch": 0.97, + "learning_rate": 1.6905303089680688e-05, + "loss": 0.2238, + "step": 20755 + }, + { + "epoch": 0.97, + "learning_rate": 1.69045193046259e-05, + "loss": 0.0642, + "step": 20760 + }, + { + "epoch": 0.97, + "learning_rate": 1.6903735519571116e-05, + "loss": 0.0975, + "step": 20765 + }, + { + "epoch": 0.97, + "learning_rate": 1.690295173451633e-05, + "loss": 0.1374, + "step": 20770 + }, + { + "epoch": 0.97, + "learning_rate": 1.690216794946154e-05, + "loss": 0.1851, + "step": 20775 + }, + { + "epoch": 0.97, + "learning_rate": 1.6901384164406754e-05, + "loss": 0.1809, + "step": 20780 + }, + { + "epoch": 0.97, + "learning_rate": 1.6900600379351968e-05, + "loss": 0.1576, + "step": 20785 + }, + { + "epoch": 0.97, + "learning_rate": 1.6899816594297182e-05, + "loss": 0.2686, + "step": 20790 + }, + { + "epoch": 0.97, + "learning_rate": 1.6899032809242396e-05, + "loss": 0.241, + "step": 20795 + }, + { + "epoch": 0.97, + "learning_rate": 1.6898249024187606e-05, + "loss": 0.6042, + "step": 20800 + }, + { + "epoch": 0.97, + "learning_rate": 1.6897465239132823e-05, + "loss": 0.1927, + "step": 20805 + }, + { + "epoch": 0.97, + "learning_rate": 1.6896681454078034e-05, + "loss": 0.1114, + "step": 20810 + }, + { + "epoch": 0.97, + "learning_rate": 1.6895897669023248e-05, + "loss": 0.0992, + "step": 20815 + }, + { + "epoch": 0.97, + "learning_rate": 1.6895113883968462e-05, + "loss": 0.1313, + "step": 20820 + }, + { + "epoch": 0.97, + "learning_rate": 1.6894330098913676e-05, + "loss": 0.1349, + "step": 20825 + }, + { + "epoch": 0.97, + "learning_rate": 1.689354631385889e-05, + "loss": 0.0776, + "step": 20830 + }, + { + "epoch": 0.97, + "learning_rate": 1.6892762528804104e-05, + "loss": 0.2206, + "step": 20835 + }, + { + "epoch": 0.97, + "learning_rate": 1.6891978743749314e-05, + "loss": 0.2008, + "step": 20840 + }, + { + "epoch": 0.97, + "learning_rate": 1.689119495869453e-05, + "loss": 0.1772, + "step": 20845 + }, + { + "epoch": 0.97, + "learning_rate": 1.6890411173639742e-05, + "loss": 0.4666, + "step": 20850 + }, + { + "epoch": 0.97, + "learning_rate": 1.6889627388584956e-05, + "loss": 0.1922, + "step": 20855 + }, + { + "epoch": 0.97, + "learning_rate": 1.688884360353017e-05, + "loss": 0.0489, + "step": 20860 + }, + { + "epoch": 0.97, + "learning_rate": 1.6888059818475384e-05, + "loss": 0.1144, + "step": 20865 + }, + { + "epoch": 0.97, + "learning_rate": 1.6887276033420597e-05, + "loss": 0.1017, + "step": 20870 + }, + { + "epoch": 0.97, + "learning_rate": 1.6886492248365808e-05, + "loss": 0.1164, + "step": 20875 + }, + { + "epoch": 0.97, + "learning_rate": 1.6885708463311022e-05, + "loss": 0.1358, + "step": 20880 + }, + { + "epoch": 0.97, + "learning_rate": 1.6884924678256236e-05, + "loss": 0.1768, + "step": 20885 + }, + { + "epoch": 0.97, + "learning_rate": 1.688414089320145e-05, + "loss": 0.2551, + "step": 20890 + }, + { + "epoch": 0.97, + "learning_rate": 1.6883357108146664e-05, + "loss": 0.3179, + "step": 20895 + }, + { + "epoch": 0.98, + "learning_rate": 1.6882573323091878e-05, + "loss": 0.5792, + "step": 20900 + }, + { + "epoch": 0.98, + "learning_rate": 1.688178953803709e-05, + "loss": 0.197, + "step": 20905 + }, + { + "epoch": 0.98, + "learning_rate": 1.6881005752982305e-05, + "loss": 0.1008, + "step": 20910 + }, + { + "epoch": 0.98, + "learning_rate": 1.6880221967927516e-05, + "loss": 0.1025, + "step": 20915 + }, + { + "epoch": 0.98, + "learning_rate": 1.687943818287273e-05, + "loss": 0.1174, + "step": 20920 + }, + { + "epoch": 0.98, + "learning_rate": 1.6878654397817944e-05, + "loss": 0.1236, + "step": 20925 + }, + { + "epoch": 0.98, + "learning_rate": 1.6877870612763158e-05, + "loss": 0.2077, + "step": 20930 + }, + { + "epoch": 0.98, + "learning_rate": 1.687708682770837e-05, + "loss": 0.2177, + "step": 20935 + }, + { + "epoch": 0.98, + "learning_rate": 1.6876303042653582e-05, + "loss": 0.2288, + "step": 20940 + }, + { + "epoch": 0.98, + "learning_rate": 1.68755192575988e-05, + "loss": 0.3481, + "step": 20945 + }, + { + "epoch": 0.98, + "learning_rate": 1.687473547254401e-05, + "loss": 0.4198, + "step": 20950 + }, + { + "epoch": 0.98, + "learning_rate": 1.6873951687489224e-05, + "loss": 0.2261, + "step": 20955 + }, + { + "epoch": 0.98, + "learning_rate": 1.6873167902434438e-05, + "loss": 0.0703, + "step": 20960 + }, + { + "epoch": 0.98, + "learning_rate": 1.687238411737965e-05, + "loss": 0.0709, + "step": 20965 + }, + { + "epoch": 0.98, + "learning_rate": 1.6871600332324865e-05, + "loss": 0.1438, + "step": 20970 + }, + { + "epoch": 0.98, + "learning_rate": 1.687081654727008e-05, + "loss": 0.1581, + "step": 20975 + }, + { + "epoch": 0.98, + "learning_rate": 1.687003276221529e-05, + "loss": 0.1882, + "step": 20980 + }, + { + "epoch": 0.98, + "learning_rate": 1.6869248977160504e-05, + "loss": 0.1306, + "step": 20985 + }, + { + "epoch": 0.98, + "learning_rate": 1.6868465192105718e-05, + "loss": 0.3843, + "step": 20990 + }, + { + "epoch": 0.98, + "learning_rate": 1.686768140705093e-05, + "loss": 0.2341, + "step": 20995 + }, + { + "epoch": 0.98, + "learning_rate": 1.6866897621996145e-05, + "loss": 0.5793, + "step": 21000 + }, + { + "epoch": 0.98, + "learning_rate": 1.686611383694136e-05, + "loss": 0.1926, + "step": 21005 + }, + { + "epoch": 0.98, + "learning_rate": 1.6865330051886573e-05, + "loss": 0.0443, + "step": 21010 + }, + { + "epoch": 0.98, + "learning_rate": 1.6864546266831784e-05, + "loss": 0.0993, + "step": 21015 + }, + { + "epoch": 0.98, + "learning_rate": 1.6863762481777e-05, + "loss": 0.1558, + "step": 21020 + }, + { + "epoch": 0.98, + "learning_rate": 1.686297869672221e-05, + "loss": 0.1095, + "step": 21025 + }, + { + "epoch": 0.98, + "learning_rate": 1.6862194911667425e-05, + "loss": 0.1696, + "step": 21030 + }, + { + "epoch": 0.98, + "learning_rate": 1.686141112661264e-05, + "loss": 0.1484, + "step": 21035 + }, + { + "epoch": 0.98, + "learning_rate": 1.6860627341557853e-05, + "loss": 0.3471, + "step": 21040 + }, + { + "epoch": 0.98, + "learning_rate": 1.6859843556503067e-05, + "loss": 0.2733, + "step": 21045 + }, + { + "epoch": 0.98, + "learning_rate": 1.6859059771448278e-05, + "loss": 0.4663, + "step": 21050 + }, + { + "epoch": 0.98, + "learning_rate": 1.685827598639349e-05, + "loss": 0.2136, + "step": 21055 + }, + { + "epoch": 0.98, + "learning_rate": 1.6857492201338706e-05, + "loss": 0.0273, + "step": 21060 + }, + { + "epoch": 0.98, + "learning_rate": 1.685670841628392e-05, + "loss": 0.1179, + "step": 21065 + }, + { + "epoch": 0.98, + "learning_rate": 1.6855924631229133e-05, + "loss": 0.1099, + "step": 21070 + }, + { + "epoch": 0.98, + "learning_rate": 1.6855140846174347e-05, + "loss": 0.1182, + "step": 21075 + }, + { + "epoch": 0.98, + "learning_rate": 1.685435706111956e-05, + "loss": 0.1853, + "step": 21080 + }, + { + "epoch": 0.98, + "learning_rate": 1.6853573276064775e-05, + "loss": 0.0739, + "step": 21085 + }, + { + "epoch": 0.98, + "learning_rate": 1.6852789491009986e-05, + "loss": 0.2085, + "step": 21090 + }, + { + "epoch": 0.98, + "learning_rate": 1.68520057059552e-05, + "loss": 0.2557, + "step": 21095 + }, + { + "epoch": 0.98, + "learning_rate": 1.6851221920900413e-05, + "loss": 0.5123, + "step": 21100 + }, + { + "epoch": 0.98, + "learning_rate": 1.6850438135845627e-05, + "loss": 0.2266, + "step": 21105 + }, + { + "epoch": 0.99, + "learning_rate": 1.684965435079084e-05, + "loss": 0.0615, + "step": 21110 + }, + { + "epoch": 0.99, + "learning_rate": 1.6848870565736052e-05, + "loss": 0.0517, + "step": 21115 + }, + { + "epoch": 0.99, + "learning_rate": 1.684808678068127e-05, + "loss": 0.0945, + "step": 21120 + }, + { + "epoch": 0.99, + "learning_rate": 1.684730299562648e-05, + "loss": 0.1399, + "step": 21125 + }, + { + "epoch": 0.99, + "learning_rate": 1.6846519210571693e-05, + "loss": 0.1547, + "step": 21130 + }, + { + "epoch": 0.99, + "learning_rate": 1.6845735425516907e-05, + "loss": 0.2386, + "step": 21135 + }, + { + "epoch": 0.99, + "learning_rate": 1.684495164046212e-05, + "loss": 0.2613, + "step": 21140 + }, + { + "epoch": 0.99, + "learning_rate": 1.6844167855407335e-05, + "loss": 0.3378, + "step": 21145 + }, + { + "epoch": 0.99, + "learning_rate": 1.684338407035255e-05, + "loss": 0.5164, + "step": 21150 + }, + { + "epoch": 0.99, + "learning_rate": 1.684260028529776e-05, + "loss": 0.1966, + "step": 21155 + }, + { + "epoch": 0.99, + "learning_rate": 1.6841816500242977e-05, + "loss": 0.0675, + "step": 21160 + }, + { + "epoch": 0.99, + "learning_rate": 1.6841032715188187e-05, + "loss": 0.1247, + "step": 21165 + }, + { + "epoch": 0.99, + "learning_rate": 1.68402489301334e-05, + "loss": 0.1262, + "step": 21170 + }, + { + "epoch": 0.99, + "learning_rate": 1.6839465145078615e-05, + "loss": 0.1284, + "step": 21175 + }, + { + "epoch": 0.99, + "learning_rate": 1.683868136002383e-05, + "loss": 0.2704, + "step": 21180 + }, + { + "epoch": 0.99, + "learning_rate": 1.6837897574969043e-05, + "loss": 0.1451, + "step": 21185 + }, + { + "epoch": 0.99, + "learning_rate": 1.6837113789914254e-05, + "loss": 0.3121, + "step": 21190 + }, + { + "epoch": 0.99, + "learning_rate": 1.6836330004859467e-05, + "loss": 0.3131, + "step": 21195 + }, + { + "epoch": 0.99, + "learning_rate": 1.683554621980468e-05, + "loss": 0.6027, + "step": 21200 + }, + { + "epoch": 0.99, + "learning_rate": 1.6834762434749895e-05, + "loss": 0.2483, + "step": 21205 + }, + { + "epoch": 0.99, + "learning_rate": 1.683397864969511e-05, + "loss": 0.0526, + "step": 21210 + }, + { + "epoch": 0.99, + "learning_rate": 1.6833194864640323e-05, + "loss": 0.1629, + "step": 21215 + }, + { + "epoch": 0.99, + "learning_rate": 1.6832411079585537e-05, + "loss": 0.1384, + "step": 21220 + }, + { + "epoch": 0.99, + "learning_rate": 1.683162729453075e-05, + "loss": 0.1629, + "step": 21225 + }, + { + "epoch": 0.99, + "learning_rate": 1.683084350947596e-05, + "loss": 0.1871, + "step": 21230 + }, + { + "epoch": 0.99, + "learning_rate": 1.683005972442118e-05, + "loss": 0.2585, + "step": 21235 + }, + { + "epoch": 0.99, + "learning_rate": 1.682927593936639e-05, + "loss": 0.3094, + "step": 21240 + }, + { + "epoch": 0.99, + "learning_rate": 1.6828492154311603e-05, + "loss": 0.275, + "step": 21245 + }, + { + "epoch": 0.99, + "learning_rate": 1.6827708369256817e-05, + "loss": 0.4019, + "step": 21250 + }, + { + "epoch": 0.99, + "learning_rate": 1.6826924584202028e-05, + "loss": 0.2128, + "step": 21255 + }, + { + "epoch": 0.99, + "learning_rate": 1.6826140799147245e-05, + "loss": 0.0263, + "step": 21260 + }, + { + "epoch": 0.99, + "learning_rate": 1.6825357014092455e-05, + "loss": 0.1624, + "step": 21265 + }, + { + "epoch": 0.99, + "learning_rate": 1.682457322903767e-05, + "loss": 0.0927, + "step": 21270 + }, + { + "epoch": 0.99, + "learning_rate": 1.6823789443982883e-05, + "loss": 0.1146, + "step": 21275 + }, + { + "epoch": 0.99, + "learning_rate": 1.6823005658928097e-05, + "loss": 0.1037, + "step": 21280 + }, + { + "epoch": 0.99, + "learning_rate": 1.682222187387331e-05, + "loss": 0.1416, + "step": 21285 + }, + { + "epoch": 0.99, + "learning_rate": 1.6821438088818525e-05, + "loss": 0.2463, + "step": 21290 + }, + { + "epoch": 0.99, + "learning_rate": 1.6820654303763735e-05, + "loss": 0.3664, + "step": 21295 + }, + { + "epoch": 0.99, + "learning_rate": 1.6819870518708953e-05, + "loss": 0.5594, + "step": 21300 + }, + { + "epoch": 0.99, + "learning_rate": 1.6819086733654163e-05, + "loss": 0.1797, + "step": 21305 + }, + { + "epoch": 0.99, + "learning_rate": 1.6818302948599377e-05, + "loss": 0.1078, + "step": 21310 + }, + { + "epoch": 0.99, + "learning_rate": 1.681751916354459e-05, + "loss": 0.0942, + "step": 21315 + }, + { + "epoch": 0.99, + "learning_rate": 1.6816735378489805e-05, + "loss": 0.0715, + "step": 21320 + }, + { + "epoch": 1.0, + "learning_rate": 1.681595159343502e-05, + "loss": 0.0956, + "step": 21325 + }, + { + "epoch": 1.0, + "learning_rate": 1.681516780838023e-05, + "loss": 0.1574, + "step": 21330 + }, + { + "epoch": 1.0, + "learning_rate": 1.6814384023325447e-05, + "loss": 0.1546, + "step": 21335 + }, + { + "epoch": 1.0, + "learning_rate": 1.6813600238270657e-05, + "loss": 0.3196, + "step": 21340 + }, + { + "epoch": 1.0, + "learning_rate": 1.681281645321587e-05, + "loss": 0.3463, + "step": 21345 + }, + { + "epoch": 1.0, + "learning_rate": 1.6812032668161085e-05, + "loss": 0.515, + "step": 21350 + }, + { + "epoch": 1.0, + "learning_rate": 1.68112488831063e-05, + "loss": 0.1254, + "step": 21355 + }, + { + "epoch": 1.0, + "learning_rate": 1.6810465098051513e-05, + "loss": 0.1172, + "step": 21360 + }, + { + "epoch": 1.0, + "learning_rate": 1.6809681312996727e-05, + "loss": 0.081, + "step": 21365 + }, + { + "epoch": 1.0, + "learning_rate": 1.6808897527941937e-05, + "loss": 0.1016, + "step": 21370 + }, + { + "epoch": 1.0, + "learning_rate": 1.680811374288715e-05, + "loss": 0.133, + "step": 21375 + }, + { + "epoch": 1.0, + "learning_rate": 1.6807329957832365e-05, + "loss": 0.1478, + "step": 21380 + }, + { + "epoch": 1.0, + "learning_rate": 1.680654617277758e-05, + "loss": 0.2154, + "step": 21385 + }, + { + "epoch": 1.0, + "learning_rate": 1.6805762387722793e-05, + "loss": 0.2109, + "step": 21390 + }, + { + "epoch": 1.0, + "learning_rate": 1.6804978602668007e-05, + "loss": 0.5048, + "step": 21395 + }, + { + "epoch": 1.0, + "learning_rate": 1.680419481761322e-05, + "loss": 0.5279, + "step": 21400 + }, + { + "epoch": 1.0, + "learning_rate": 1.680341103255843e-05, + "loss": 0.1696, + "step": 21405 + }, + { + "epoch": 1.0, + "learning_rate": 1.6802627247503645e-05, + "loss": 0.0934, + "step": 21410 + }, + { + "epoch": 1.0, + "learning_rate": 1.680184346244886e-05, + "loss": 0.1172, + "step": 21415 + }, + { + "epoch": 1.0, + "learning_rate": 1.6801059677394073e-05, + "loss": 0.1691, + "step": 21420 + }, + { + "epoch": 1.0, + "learning_rate": 1.6800275892339287e-05, + "loss": 0.2184, + "step": 21425 + }, + { + "epoch": 1.0, + "learning_rate": 1.67994921072845e-05, + "loss": 0.3285, + "step": 21430 + }, + { + "epoch": 1.0, + "eval_cer": 0.019780983363940314, + "eval_loss": 0.6674277186393738, + "eval_runtime": 457.5932, + "eval_samples_per_second": 41.631, + "eval_steps_per_second": 5.205, + "eval_wer": 0.16735751295336787, + "step": 21431 + }, + { + "epoch": 1.0, + "learning_rate": 1.6798708322229715e-05, + "loss": 0.2708, + "step": 21435 + }, + { + "epoch": 1.0, + "learning_rate": 1.6797924537174925e-05, + "loss": 0.0632, + "step": 21440 + }, + { + "epoch": 1.0, + "learning_rate": 1.679714075212014e-05, + "loss": 0.0625, + "step": 21445 + }, + { + "epoch": 1.0, + "learning_rate": 1.6796356967065353e-05, + "loss": 0.1169, + "step": 21450 + }, + { + "epoch": 1.0, + "learning_rate": 1.6795573182010567e-05, + "loss": 0.1674, + "step": 21455 + }, + { + "epoch": 1.0, + "learning_rate": 1.679478939695578e-05, + "loss": 0.1875, + "step": 21460 + }, + { + "epoch": 1.0, + "learning_rate": 1.6794005611900995e-05, + "loss": 0.2132, + "step": 21465 + }, + { + "epoch": 1.0, + "learning_rate": 1.6793221826846205e-05, + "loss": 0.2001, + "step": 21470 + }, + { + "epoch": 1.0, + "learning_rate": 1.6792438041791422e-05, + "loss": 0.2851, + "step": 21475 + }, + { + "epoch": 1.0, + "learning_rate": 1.6791654256736633e-05, + "loss": 0.2083, + "step": 21480 + }, + { + "epoch": 1.0, + "learning_rate": 1.6790870471681847e-05, + "loss": 0.4056, + "step": 21485 + }, + { + "epoch": 1.0, + "learning_rate": 1.679008668662706e-05, + "loss": 0.0784, + "step": 21490 + }, + { + "epoch": 1.0, + "learning_rate": 1.6789302901572275e-05, + "loss": 0.1198, + "step": 21495 + }, + { + "epoch": 1.0, + "learning_rate": 1.678851911651749e-05, + "loss": 0.0637, + "step": 21500 + }, + { + "epoch": 1.0, + "learning_rate": 1.67877353314627e-05, + "loss": 0.0893, + "step": 21505 + }, + { + "epoch": 1.0, + "learning_rate": 1.6786951546407913e-05, + "loss": 0.1339, + "step": 21510 + }, + { + "epoch": 1.0, + "learning_rate": 1.6786167761353127e-05, + "loss": 0.2976, + "step": 21515 + }, + { + "epoch": 1.0, + "learning_rate": 1.678538397629834e-05, + "loss": 0.2174, + "step": 21520 + }, + { + "epoch": 1.0, + "learning_rate": 1.6784600191243555e-05, + "loss": 0.2785, + "step": 21525 + }, + { + "epoch": 1.0, + "learning_rate": 1.678381640618877e-05, + "loss": 0.5756, + "step": 21530 + }, + { + "epoch": 1.0, + "learning_rate": 1.6783032621133983e-05, + "loss": 0.3013, + "step": 21535 + }, + { + "epoch": 1.01, + "learning_rate": 1.6782248836079196e-05, + "loss": 0.0721, + "step": 21540 + }, + { + "epoch": 1.01, + "learning_rate": 1.6781465051024407e-05, + "loss": 0.0953, + "step": 21545 + }, + { + "epoch": 1.01, + "learning_rate": 1.6780681265969624e-05, + "loss": 0.0683, + "step": 21550 + }, + { + "epoch": 1.01, + "learning_rate": 1.6779897480914835e-05, + "loss": 0.155, + "step": 21555 + }, + { + "epoch": 1.01, + "learning_rate": 1.677911369586005e-05, + "loss": 0.2342, + "step": 21560 + }, + { + "epoch": 1.01, + "learning_rate": 1.6778329910805263e-05, + "loss": 0.1579, + "step": 21565 + }, + { + "epoch": 1.01, + "learning_rate": 1.6777546125750473e-05, + "loss": 0.2137, + "step": 21570 + }, + { + "epoch": 1.01, + "learning_rate": 1.677676234069569e-05, + "loss": 0.3298, + "step": 21575 + }, + { + "epoch": 1.01, + "learning_rate": 1.67759785556409e-05, + "loss": 0.3294, + "step": 21580 + }, + { + "epoch": 1.01, + "learning_rate": 1.6775194770586115e-05, + "loss": 0.347, + "step": 21585 + }, + { + "epoch": 1.01, + "learning_rate": 1.677441098553133e-05, + "loss": 0.0707, + "step": 21590 + }, + { + "epoch": 1.01, + "learning_rate": 1.6773627200476543e-05, + "loss": 0.0494, + "step": 21595 + }, + { + "epoch": 1.01, + "learning_rate": 1.6772843415421757e-05, + "loss": 0.0614, + "step": 21600 + }, + { + "epoch": 1.01, + "learning_rate": 1.677205963036697e-05, + "loss": 0.1719, + "step": 21605 + }, + { + "epoch": 1.01, + "learning_rate": 1.6771275845312184e-05, + "loss": 0.1739, + "step": 21610 + }, + { + "epoch": 1.01, + "learning_rate": 1.6770492060257398e-05, + "loss": 0.2605, + "step": 21615 + }, + { + "epoch": 1.01, + "learning_rate": 1.676970827520261e-05, + "loss": 0.2799, + "step": 21620 + }, + { + "epoch": 1.01, + "learning_rate": 1.6768924490147823e-05, + "loss": 0.3259, + "step": 21625 + }, + { + "epoch": 1.01, + "learning_rate": 1.6768140705093037e-05, + "loss": 0.3949, + "step": 21630 + }, + { + "epoch": 1.01, + "learning_rate": 1.676735692003825e-05, + "loss": 0.4475, + "step": 21635 + }, + { + "epoch": 1.01, + "learning_rate": 1.6766573134983464e-05, + "loss": 0.067, + "step": 21640 + }, + { + "epoch": 1.01, + "learning_rate": 1.6765789349928675e-05, + "loss": 0.0889, + "step": 21645 + }, + { + "epoch": 1.01, + "learning_rate": 1.6765005564873892e-05, + "loss": 0.0926, + "step": 21650 + }, + { + "epoch": 1.01, + "learning_rate": 1.6764221779819103e-05, + "loss": 0.169, + "step": 21655 + }, + { + "epoch": 1.01, + "learning_rate": 1.6763437994764317e-05, + "loss": 0.1749, + "step": 21660 + }, + { + "epoch": 1.01, + "learning_rate": 1.676265420970953e-05, + "loss": 0.2628, + "step": 21665 + }, + { + "epoch": 1.01, + "learning_rate": 1.6761870424654744e-05, + "loss": 0.2144, + "step": 21670 + }, + { + "epoch": 1.01, + "learning_rate": 1.676108663959996e-05, + "loss": 0.274, + "step": 21675 + }, + { + "epoch": 1.01, + "learning_rate": 1.6760302854545172e-05, + "loss": 0.3579, + "step": 21680 + }, + { + "epoch": 1.01, + "learning_rate": 1.6759519069490383e-05, + "loss": 0.3807, + "step": 21685 + }, + { + "epoch": 1.01, + "learning_rate": 1.67587352844356e-05, + "loss": 0.1214, + "step": 21690 + }, + { + "epoch": 1.01, + "learning_rate": 1.675795149938081e-05, + "loss": 0.1011, + "step": 21695 + }, + { + "epoch": 1.01, + "learning_rate": 1.6757167714326024e-05, + "loss": 0.0696, + "step": 21700 + }, + { + "epoch": 1.01, + "learning_rate": 1.675638392927124e-05, + "loss": 0.1463, + "step": 21705 + }, + { + "epoch": 1.01, + "learning_rate": 1.6755600144216452e-05, + "loss": 0.1416, + "step": 21710 + }, + { + "epoch": 1.01, + "learning_rate": 1.6754816359161666e-05, + "loss": 0.128, + "step": 21715 + }, + { + "epoch": 1.01, + "learning_rate": 1.6754032574106877e-05, + "loss": 0.1893, + "step": 21720 + }, + { + "epoch": 1.01, + "learning_rate": 1.675324878905209e-05, + "loss": 0.3064, + "step": 21725 + }, + { + "epoch": 1.01, + "learning_rate": 1.6752465003997305e-05, + "loss": 0.3098, + "step": 21730 + }, + { + "epoch": 1.01, + "learning_rate": 1.675168121894252e-05, + "loss": 0.3172, + "step": 21735 + }, + { + "epoch": 1.01, + "learning_rate": 1.6750897433887732e-05, + "loss": 0.0526, + "step": 21740 + }, + { + "epoch": 1.01, + "learning_rate": 1.6750113648832946e-05, + "loss": 0.0767, + "step": 21745 + }, + { + "epoch": 1.01, + "learning_rate": 1.674932986377816e-05, + "loss": 0.105, + "step": 21750 + }, + { + "epoch": 1.02, + "learning_rate": 1.6748546078723374e-05, + "loss": 0.128, + "step": 21755 + }, + { + "epoch": 1.02, + "learning_rate": 1.6747762293668585e-05, + "loss": 0.1071, + "step": 21760 + }, + { + "epoch": 1.02, + "learning_rate": 1.67469785086138e-05, + "loss": 0.1624, + "step": 21765 + }, + { + "epoch": 1.02, + "learning_rate": 1.6746194723559012e-05, + "loss": 0.3446, + "step": 21770 + }, + { + "epoch": 1.02, + "learning_rate": 1.6745410938504226e-05, + "loss": 0.3188, + "step": 21775 + }, + { + "epoch": 1.02, + "learning_rate": 1.674462715344944e-05, + "loss": 0.5742, + "step": 21780 + }, + { + "epoch": 1.02, + "learning_rate": 1.674384336839465e-05, + "loss": 0.2419, + "step": 21785 + }, + { + "epoch": 1.02, + "learning_rate": 1.6743059583339868e-05, + "loss": 0.0936, + "step": 21790 + }, + { + "epoch": 1.02, + "learning_rate": 1.674227579828508e-05, + "loss": 0.0601, + "step": 21795 + }, + { + "epoch": 1.02, + "learning_rate": 1.6741492013230292e-05, + "loss": 0.0906, + "step": 21800 + }, + { + "epoch": 1.02, + "learning_rate": 1.6740708228175506e-05, + "loss": 0.104, + "step": 21805 + }, + { + "epoch": 1.02, + "learning_rate": 1.673992444312072e-05, + "loss": 0.1736, + "step": 21810 + }, + { + "epoch": 1.02, + "learning_rate": 1.6739140658065934e-05, + "loss": 0.1642, + "step": 21815 + }, + { + "epoch": 1.02, + "learning_rate": 1.6738356873011148e-05, + "loss": 0.194, + "step": 21820 + }, + { + "epoch": 1.02, + "learning_rate": 1.673757308795636e-05, + "loss": 0.1698, + "step": 21825 + }, + { + "epoch": 1.02, + "learning_rate": 1.6736789302901572e-05, + "loss": 0.5151, + "step": 21830 + }, + { + "epoch": 1.02, + "learning_rate": 1.6736005517846786e-05, + "loss": 0.4388, + "step": 21835 + }, + { + "epoch": 1.02, + "learning_rate": 1.6735221732792e-05, + "loss": 0.0625, + "step": 21840 + }, + { + "epoch": 1.02, + "learning_rate": 1.6734437947737214e-05, + "loss": 0.0551, + "step": 21845 + }, + { + "epoch": 1.02, + "learning_rate": 1.6733654162682428e-05, + "loss": 0.086, + "step": 21850 + }, + { + "epoch": 1.02, + "learning_rate": 1.6732870377627642e-05, + "loss": 0.107, + "step": 21855 + }, + { + "epoch": 1.02, + "learning_rate": 1.6732086592572853e-05, + "loss": 0.1138, + "step": 21860 + }, + { + "epoch": 1.02, + "learning_rate": 1.673130280751807e-05, + "loss": 0.1885, + "step": 21865 + }, + { + "epoch": 1.02, + "learning_rate": 1.673051902246328e-05, + "loss": 0.2082, + "step": 21870 + }, + { + "epoch": 1.02, + "learning_rate": 1.6729735237408494e-05, + "loss": 0.2015, + "step": 21875 + }, + { + "epoch": 1.02, + "learning_rate": 1.6728951452353708e-05, + "loss": 0.4405, + "step": 21880 + }, + { + "epoch": 1.02, + "learning_rate": 1.6728167667298922e-05, + "loss": 0.2858, + "step": 21885 + }, + { + "epoch": 1.02, + "learning_rate": 1.6727383882244136e-05, + "loss": 0.0538, + "step": 21890 + }, + { + "epoch": 1.02, + "learning_rate": 1.6726600097189346e-05, + "loss": 0.0779, + "step": 21895 + }, + { + "epoch": 1.02, + "learning_rate": 1.672581631213456e-05, + "loss": 0.142, + "step": 21900 + }, + { + "epoch": 1.02, + "learning_rate": 1.6725032527079774e-05, + "loss": 0.1888, + "step": 21905 + }, + { + "epoch": 1.02, + "learning_rate": 1.6724248742024988e-05, + "loss": 0.1195, + "step": 21910 + }, + { + "epoch": 1.02, + "learning_rate": 1.6723464956970202e-05, + "loss": 0.2157, + "step": 21915 + }, + { + "epoch": 1.02, + "learning_rate": 1.6722681171915416e-05, + "loss": 0.2258, + "step": 21920 + }, + { + "epoch": 1.02, + "learning_rate": 1.672189738686063e-05, + "loss": 0.3311, + "step": 21925 + }, + { + "epoch": 1.02, + "learning_rate": 1.6721113601805844e-05, + "loss": 0.3652, + "step": 21930 + }, + { + "epoch": 1.02, + "learning_rate": 1.6720329816751054e-05, + "loss": 0.3514, + "step": 21935 + }, + { + "epoch": 1.02, + "learning_rate": 1.6719546031696268e-05, + "loss": 0.0737, + "step": 21940 + }, + { + "epoch": 1.02, + "learning_rate": 1.6718762246641482e-05, + "loss": 0.0702, + "step": 21945 + }, + { + "epoch": 1.02, + "learning_rate": 1.6717978461586696e-05, + "loss": 0.1228, + "step": 21950 + }, + { + "epoch": 1.02, + "learning_rate": 1.671719467653191e-05, + "loss": 0.1717, + "step": 21955 + }, + { + "epoch": 1.02, + "learning_rate": 1.671641089147712e-05, + "loss": 0.1327, + "step": 21960 + }, + { + "epoch": 1.02, + "learning_rate": 1.6715627106422338e-05, + "loss": 0.18, + "step": 21965 + }, + { + "epoch": 1.03, + "learning_rate": 1.6714843321367548e-05, + "loss": 0.1339, + "step": 21970 + }, + { + "epoch": 1.03, + "learning_rate": 1.6714059536312762e-05, + "loss": 0.4464, + "step": 21975 + }, + { + "epoch": 1.03, + "learning_rate": 1.6713275751257976e-05, + "loss": 0.4614, + "step": 21980 + }, + { + "epoch": 1.03, + "learning_rate": 1.671249196620319e-05, + "loss": 0.4731, + "step": 21985 + }, + { + "epoch": 1.03, + "learning_rate": 1.6711708181148404e-05, + "loss": 0.0743, + "step": 21990 + }, + { + "epoch": 1.03, + "learning_rate": 1.6710924396093618e-05, + "loss": 0.0772, + "step": 21995 + }, + { + "epoch": 1.03, + "learning_rate": 1.671014061103883e-05, + "loss": 0.1295, + "step": 22000 + }, + { + "epoch": 1.03, + "learning_rate": 1.6709356825984046e-05, + "loss": 0.1665, + "step": 22005 + }, + { + "epoch": 1.03, + "learning_rate": 1.6708573040929256e-05, + "loss": 0.1362, + "step": 22010 + }, + { + "epoch": 1.03, + "learning_rate": 1.670778925587447e-05, + "loss": 0.1636, + "step": 22015 + }, + { + "epoch": 1.03, + "learning_rate": 1.6707005470819684e-05, + "loss": 0.2899, + "step": 22020 + }, + { + "epoch": 1.03, + "learning_rate": 1.6706221685764898e-05, + "loss": 0.2897, + "step": 22025 + }, + { + "epoch": 1.03, + "learning_rate": 1.6705437900710112e-05, + "loss": 0.3886, + "step": 22030 + }, + { + "epoch": 1.03, + "learning_rate": 1.6704654115655322e-05, + "loss": 0.2882, + "step": 22035 + }, + { + "epoch": 1.03, + "learning_rate": 1.6703870330600536e-05, + "loss": 0.0652, + "step": 22040 + }, + { + "epoch": 1.03, + "learning_rate": 1.670308654554575e-05, + "loss": 0.0749, + "step": 22045 + }, + { + "epoch": 1.03, + "learning_rate": 1.6702302760490964e-05, + "loss": 0.0939, + "step": 22050 + }, + { + "epoch": 1.03, + "learning_rate": 1.6701518975436178e-05, + "loss": 0.1791, + "step": 22055 + }, + { + "epoch": 1.03, + "learning_rate": 1.6700735190381392e-05, + "loss": 0.1139, + "step": 22060 + }, + { + "epoch": 1.03, + "learning_rate": 1.6699951405326606e-05, + "loss": 0.2075, + "step": 22065 + }, + { + "epoch": 1.03, + "learning_rate": 1.669916762027182e-05, + "loss": 0.2175, + "step": 22070 + }, + { + "epoch": 1.03, + "learning_rate": 1.669838383521703e-05, + "loss": 0.2589, + "step": 22075 + }, + { + "epoch": 1.03, + "learning_rate": 1.6697600050162247e-05, + "loss": 0.42, + "step": 22080 + }, + { + "epoch": 1.03, + "learning_rate": 1.6696816265107458e-05, + "loss": 0.3279, + "step": 22085 + }, + { + "epoch": 1.03, + "learning_rate": 1.6696032480052672e-05, + "loss": 0.0295, + "step": 22090 + }, + { + "epoch": 1.03, + "learning_rate": 1.6695248694997886e-05, + "loss": 0.0318, + "step": 22095 + }, + { + "epoch": 1.03, + "learning_rate": 1.6694464909943096e-05, + "loss": 0.1887, + "step": 22100 + }, + { + "epoch": 1.03, + "learning_rate": 1.6693681124888314e-05, + "loss": 0.1017, + "step": 22105 + }, + { + "epoch": 1.03, + "learning_rate": 1.6692897339833524e-05, + "loss": 0.145, + "step": 22110 + }, + { + "epoch": 1.03, + "learning_rate": 1.6692113554778738e-05, + "loss": 0.195, + "step": 22115 + }, + { + "epoch": 1.03, + "learning_rate": 1.6691329769723952e-05, + "loss": 0.2584, + "step": 22120 + }, + { + "epoch": 1.03, + "learning_rate": 1.6690545984669166e-05, + "loss": 0.2965, + "step": 22125 + }, + { + "epoch": 1.03, + "learning_rate": 1.668976219961438e-05, + "loss": 0.3912, + "step": 22130 + }, + { + "epoch": 1.03, + "learning_rate": 1.6688978414559594e-05, + "loss": 0.3964, + "step": 22135 + }, + { + "epoch": 1.03, + "learning_rate": 1.6688194629504804e-05, + "loss": 0.0447, + "step": 22140 + }, + { + "epoch": 1.03, + "learning_rate": 1.668741084445002e-05, + "loss": 0.1708, + "step": 22145 + }, + { + "epoch": 1.03, + "learning_rate": 1.6686627059395232e-05, + "loss": 0.1072, + "step": 22150 + }, + { + "epoch": 1.03, + "learning_rate": 1.6685843274340446e-05, + "loss": 0.1223, + "step": 22155 + }, + { + "epoch": 1.03, + "learning_rate": 1.668505948928566e-05, + "loss": 0.1006, + "step": 22160 + }, + { + "epoch": 1.03, + "learning_rate": 1.6684275704230874e-05, + "loss": 0.1096, + "step": 22165 + }, + { + "epoch": 1.03, + "learning_rate": 1.6683491919176088e-05, + "loss": 0.2708, + "step": 22170 + }, + { + "epoch": 1.03, + "learning_rate": 1.6682708134121298e-05, + "loss": 0.3093, + "step": 22175 + }, + { + "epoch": 1.03, + "learning_rate": 1.6681924349066515e-05, + "loss": 0.3701, + "step": 22180 + }, + { + "epoch": 1.04, + "learning_rate": 1.6681140564011726e-05, + "loss": 0.251, + "step": 22185 + }, + { + "epoch": 1.04, + "learning_rate": 1.668035677895694e-05, + "loss": 0.0674, + "step": 22190 + }, + { + "epoch": 1.04, + "learning_rate": 1.6679572993902154e-05, + "loss": 0.1336, + "step": 22195 + }, + { + "epoch": 1.04, + "learning_rate": 1.6678789208847368e-05, + "loss": 0.136, + "step": 22200 + }, + { + "epoch": 1.04, + "learning_rate": 1.667800542379258e-05, + "loss": 0.0998, + "step": 22205 + }, + { + "epoch": 1.04, + "learning_rate": 1.6677221638737795e-05, + "loss": 0.13, + "step": 22210 + }, + { + "epoch": 1.04, + "learning_rate": 1.6676437853683006e-05, + "loss": 0.1497, + "step": 22215 + }, + { + "epoch": 1.04, + "learning_rate": 1.667565406862822e-05, + "loss": 0.1846, + "step": 22220 + }, + { + "epoch": 1.04, + "learning_rate": 1.6674870283573434e-05, + "loss": 0.2499, + "step": 22225 + }, + { + "epoch": 1.04, + "learning_rate": 1.6674086498518648e-05, + "loss": 0.4191, + "step": 22230 + }, + { + "epoch": 1.04, + "learning_rate": 1.667330271346386e-05, + "loss": 0.3157, + "step": 22235 + }, + { + "epoch": 1.04, + "learning_rate": 1.6672518928409075e-05, + "loss": 0.0555, + "step": 22240 + }, + { + "epoch": 1.04, + "learning_rate": 1.667173514335429e-05, + "loss": 0.1057, + "step": 22245 + }, + { + "epoch": 1.04, + "learning_rate": 1.66709513582995e-05, + "loss": 0.0907, + "step": 22250 + }, + { + "epoch": 1.04, + "learning_rate": 1.6670167573244714e-05, + "loss": 0.1564, + "step": 22255 + }, + { + "epoch": 1.04, + "learning_rate": 1.6669383788189928e-05, + "loss": 0.1607, + "step": 22260 + }, + { + "epoch": 1.04, + "learning_rate": 1.666860000313514e-05, + "loss": 0.1959, + "step": 22265 + }, + { + "epoch": 1.04, + "learning_rate": 1.6667816218080356e-05, + "loss": 0.2283, + "step": 22270 + }, + { + "epoch": 1.04, + "learning_rate": 1.666703243302557e-05, + "loss": 0.3232, + "step": 22275 + }, + { + "epoch": 1.04, + "learning_rate": 1.6666248647970783e-05, + "loss": 0.344, + "step": 22280 + }, + { + "epoch": 1.04, + "learning_rate": 1.6665464862915994e-05, + "loss": 0.2879, + "step": 22285 + }, + { + "epoch": 1.04, + "learning_rate": 1.6664681077861208e-05, + "loss": 0.077, + "step": 22290 + }, + { + "epoch": 1.04, + "learning_rate": 1.666389729280642e-05, + "loss": 0.0681, + "step": 22295 + }, + { + "epoch": 1.04, + "learning_rate": 1.6663113507751636e-05, + "loss": 0.0788, + "step": 22300 + }, + { + "epoch": 1.04, + "learning_rate": 1.666232972269685e-05, + "loss": 0.1024, + "step": 22305 + }, + { + "epoch": 1.04, + "learning_rate": 1.6661545937642063e-05, + "loss": 0.1455, + "step": 22310 + }, + { + "epoch": 1.04, + "learning_rate": 1.6660762152587274e-05, + "loss": 0.2212, + "step": 22315 + }, + { + "epoch": 1.04, + "learning_rate": 1.665997836753249e-05, + "loss": 0.1622, + "step": 22320 + }, + { + "epoch": 1.04, + "learning_rate": 1.6659194582477702e-05, + "loss": 0.257, + "step": 22325 + }, + { + "epoch": 1.04, + "learning_rate": 1.6658410797422916e-05, + "loss": 0.386, + "step": 22330 + }, + { + "epoch": 1.04, + "learning_rate": 1.665762701236813e-05, + "loss": 0.3866, + "step": 22335 + }, + { + "epoch": 1.04, + "learning_rate": 1.6656843227313343e-05, + "loss": 0.068, + "step": 22340 + }, + { + "epoch": 1.04, + "learning_rate": 1.6656059442258557e-05, + "loss": 0.1259, + "step": 22345 + }, + { + "epoch": 1.04, + "learning_rate": 1.6655275657203768e-05, + "loss": 0.1199, + "step": 22350 + }, + { + "epoch": 1.04, + "learning_rate": 1.6654491872148982e-05, + "loss": 0.1675, + "step": 22355 + }, + { + "epoch": 1.04, + "learning_rate": 1.6653708087094196e-05, + "loss": 0.1525, + "step": 22360 + }, + { + "epoch": 1.04, + "learning_rate": 1.665292430203941e-05, + "loss": 0.1875, + "step": 22365 + }, + { + "epoch": 1.04, + "learning_rate": 1.6652140516984623e-05, + "loss": 0.2257, + "step": 22370 + }, + { + "epoch": 1.04, + "learning_rate": 1.6651356731929837e-05, + "loss": 0.195, + "step": 22375 + }, + { + "epoch": 1.04, + "learning_rate": 1.665057294687505e-05, + "loss": 0.3703, + "step": 22380 + }, + { + "epoch": 1.04, + "learning_rate": 1.6649789161820265e-05, + "loss": 0.3111, + "step": 22385 + }, + { + "epoch": 1.04, + "learning_rate": 1.6649005376765476e-05, + "loss": 0.0501, + "step": 22390 + }, + { + "epoch": 1.04, + "learning_rate": 1.6648221591710693e-05, + "loss": 0.1009, + "step": 22395 + }, + { + "epoch": 1.05, + "learning_rate": 1.6647437806655904e-05, + "loss": 0.1188, + "step": 22400 + }, + { + "epoch": 1.05, + "learning_rate": 1.6646654021601117e-05, + "loss": 0.1223, + "step": 22405 + }, + { + "epoch": 1.05, + "learning_rate": 1.664587023654633e-05, + "loss": 0.2532, + "step": 22410 + }, + { + "epoch": 1.05, + "learning_rate": 1.6645086451491542e-05, + "loss": 0.2103, + "step": 22415 + }, + { + "epoch": 1.05, + "learning_rate": 1.664430266643676e-05, + "loss": 0.2827, + "step": 22420 + }, + { + "epoch": 1.05, + "learning_rate": 1.664351888138197e-05, + "loss": 0.3525, + "step": 22425 + }, + { + "epoch": 1.05, + "learning_rate": 1.6642735096327184e-05, + "loss": 0.4255, + "step": 22430 + }, + { + "epoch": 1.05, + "learning_rate": 1.6641951311272397e-05, + "loss": 0.3211, + "step": 22435 + }, + { + "epoch": 1.05, + "learning_rate": 1.664116752621761e-05, + "loss": 0.0945, + "step": 22440 + }, + { + "epoch": 1.05, + "learning_rate": 1.6640383741162825e-05, + "loss": 0.0941, + "step": 22445 + }, + { + "epoch": 1.05, + "learning_rate": 1.663959995610804e-05, + "loss": 0.1018, + "step": 22450 + }, + { + "epoch": 1.05, + "learning_rate": 1.663881617105325e-05, + "loss": 0.1377, + "step": 22455 + }, + { + "epoch": 1.05, + "learning_rate": 1.6638032385998467e-05, + "loss": 0.1353, + "step": 22460 + }, + { + "epoch": 1.05, + "learning_rate": 1.6637248600943678e-05, + "loss": 0.1506, + "step": 22465 + }, + { + "epoch": 1.05, + "learning_rate": 1.663646481588889e-05, + "loss": 0.2194, + "step": 22470 + }, + { + "epoch": 1.05, + "learning_rate": 1.6635681030834105e-05, + "loss": 0.2673, + "step": 22475 + }, + { + "epoch": 1.05, + "learning_rate": 1.663489724577932e-05, + "loss": 0.4444, + "step": 22480 + }, + { + "epoch": 1.05, + "learning_rate": 1.6634113460724533e-05, + "loss": 0.4203, + "step": 22485 + }, + { + "epoch": 1.05, + "learning_rate": 1.6633329675669744e-05, + "loss": 0.0998, + "step": 22490 + }, + { + "epoch": 1.05, + "learning_rate": 1.663254589061496e-05, + "loss": 0.0645, + "step": 22495 + }, + { + "epoch": 1.05, + "learning_rate": 1.663176210556017e-05, + "loss": 0.0815, + "step": 22500 + }, + { + "epoch": 1.05, + "learning_rate": 1.6630978320505385e-05, + "loss": 0.1239, + "step": 22505 + }, + { + "epoch": 1.05, + "learning_rate": 1.66301945354506e-05, + "loss": 0.1158, + "step": 22510 + }, + { + "epoch": 1.05, + "learning_rate": 1.6629410750395813e-05, + "loss": 0.1965, + "step": 22515 + }, + { + "epoch": 1.05, + "learning_rate": 1.6628626965341027e-05, + "loss": 0.2295, + "step": 22520 + }, + { + "epoch": 1.05, + "learning_rate": 1.662784318028624e-05, + "loss": 0.3215, + "step": 22525 + }, + { + "epoch": 1.05, + "learning_rate": 1.662705939523145e-05, + "loss": 0.3832, + "step": 22530 + }, + { + "epoch": 1.05, + "learning_rate": 1.662627561017667e-05, + "loss": 0.3457, + "step": 22535 + }, + { + "epoch": 1.05, + "learning_rate": 1.662549182512188e-05, + "loss": 0.1041, + "step": 22540 + }, + { + "epoch": 1.05, + "learning_rate": 1.6624708040067093e-05, + "loss": 0.0946, + "step": 22545 + }, + { + "epoch": 1.05, + "learning_rate": 1.6623924255012307e-05, + "loss": 0.0797, + "step": 22550 + }, + { + "epoch": 1.05, + "learning_rate": 1.662314046995752e-05, + "loss": 0.1227, + "step": 22555 + }, + { + "epoch": 1.05, + "learning_rate": 1.6622356684902735e-05, + "loss": 0.1388, + "step": 22560 + }, + { + "epoch": 1.05, + "learning_rate": 1.6621572899847945e-05, + "loss": 0.1356, + "step": 22565 + }, + { + "epoch": 1.05, + "learning_rate": 1.662078911479316e-05, + "loss": 0.1936, + "step": 22570 + }, + { + "epoch": 1.05, + "learning_rate": 1.6620005329738373e-05, + "loss": 0.3707, + "step": 22575 + }, + { + "epoch": 1.05, + "learning_rate": 1.6619221544683587e-05, + "loss": 0.4417, + "step": 22580 + }, + { + "epoch": 1.05, + "learning_rate": 1.66184377596288e-05, + "loss": 0.4056, + "step": 22585 + }, + { + "epoch": 1.05, + "learning_rate": 1.6617653974574015e-05, + "loss": 0.1009, + "step": 22590 + }, + { + "epoch": 1.05, + "learning_rate": 1.661687018951923e-05, + "loss": 0.1221, + "step": 22595 + }, + { + "epoch": 1.05, + "learning_rate": 1.6616086404464443e-05, + "loss": 0.1144, + "step": 22600 + }, + { + "epoch": 1.05, + "learning_rate": 1.6615302619409653e-05, + "loss": 0.1499, + "step": 22605 + }, + { + "epoch": 1.06, + "learning_rate": 1.6614518834354867e-05, + "loss": 0.1002, + "step": 22610 + }, + { + "epoch": 1.06, + "learning_rate": 1.661373504930008e-05, + "loss": 0.1595, + "step": 22615 + }, + { + "epoch": 1.06, + "learning_rate": 1.6612951264245295e-05, + "loss": 0.2121, + "step": 22620 + }, + { + "epoch": 1.06, + "learning_rate": 1.661216747919051e-05, + "loss": 0.2338, + "step": 22625 + }, + { + "epoch": 1.06, + "learning_rate": 1.661138369413572e-05, + "loss": 0.375, + "step": 22630 + }, + { + "epoch": 1.06, + "learning_rate": 1.6610599909080937e-05, + "loss": 0.2275, + "step": 22635 + }, + { + "epoch": 1.06, + "learning_rate": 1.6609816124026147e-05, + "loss": 0.0468, + "step": 22640 + }, + { + "epoch": 1.06, + "learning_rate": 1.660903233897136e-05, + "loss": 0.0573, + "step": 22645 + }, + { + "epoch": 1.06, + "learning_rate": 1.6608248553916575e-05, + "loss": 0.0989, + "step": 22650 + }, + { + "epoch": 1.06, + "learning_rate": 1.660746476886179e-05, + "loss": 0.1472, + "step": 22655 + }, + { + "epoch": 1.06, + "learning_rate": 1.6606680983807003e-05, + "loss": 0.1244, + "step": 22660 + }, + { + "epoch": 1.06, + "learning_rate": 1.6605897198752217e-05, + "loss": 0.2461, + "step": 22665 + }, + { + "epoch": 1.06, + "learning_rate": 1.6605113413697427e-05, + "loss": 0.1669, + "step": 22670 + }, + { + "epoch": 1.06, + "learning_rate": 1.660432962864264e-05, + "loss": 0.2483, + "step": 22675 + }, + { + "epoch": 1.06, + "learning_rate": 1.6603545843587855e-05, + "loss": 0.3463, + "step": 22680 + }, + { + "epoch": 1.06, + "learning_rate": 1.660276205853307e-05, + "loss": 0.3456, + "step": 22685 + }, + { + "epoch": 1.06, + "learning_rate": 1.6601978273478283e-05, + "loss": 0.0599, + "step": 22690 + }, + { + "epoch": 1.06, + "learning_rate": 1.6601194488423497e-05, + "loss": 0.0639, + "step": 22695 + }, + { + "epoch": 1.06, + "learning_rate": 1.660041070336871e-05, + "loss": 0.1262, + "step": 22700 + }, + { + "epoch": 1.06, + "learning_rate": 1.659962691831392e-05, + "loss": 0.1424, + "step": 22705 + }, + { + "epoch": 1.06, + "learning_rate": 1.659884313325914e-05, + "loss": 0.1273, + "step": 22710 + }, + { + "epoch": 1.06, + "learning_rate": 1.659805934820435e-05, + "loss": 0.2208, + "step": 22715 + }, + { + "epoch": 1.06, + "learning_rate": 1.6597275563149563e-05, + "loss": 0.1814, + "step": 22720 + }, + { + "epoch": 1.06, + "learning_rate": 1.6596491778094777e-05, + "loss": 0.2325, + "step": 22725 + }, + { + "epoch": 1.06, + "learning_rate": 1.659570799303999e-05, + "loss": 0.4139, + "step": 22730 + }, + { + "epoch": 1.06, + "learning_rate": 1.6594924207985205e-05, + "loss": 0.2675, + "step": 22735 + }, + { + "epoch": 1.06, + "learning_rate": 1.6594140422930415e-05, + "loss": 0.0246, + "step": 22740 + }, + { + "epoch": 1.06, + "learning_rate": 1.659335663787563e-05, + "loss": 0.0829, + "step": 22745 + }, + { + "epoch": 1.06, + "learning_rate": 1.6592572852820843e-05, + "loss": 0.1863, + "step": 22750 + }, + { + "epoch": 1.06, + "learning_rate": 1.6591789067766057e-05, + "loss": 0.1843, + "step": 22755 + }, + { + "epoch": 1.06, + "learning_rate": 1.659100528271127e-05, + "loss": 0.1143, + "step": 22760 + }, + { + "epoch": 1.06, + "learning_rate": 1.6590221497656485e-05, + "loss": 0.1305, + "step": 22765 + }, + { + "epoch": 1.06, + "learning_rate": 1.65894377126017e-05, + "loss": 0.229, + "step": 22770 + }, + { + "epoch": 1.06, + "learning_rate": 1.6588653927546913e-05, + "loss": 0.2498, + "step": 22775 + }, + { + "epoch": 1.06, + "learning_rate": 1.6587870142492123e-05, + "loss": 0.3766, + "step": 22780 + }, + { + "epoch": 1.06, + "learning_rate": 1.6587086357437337e-05, + "loss": 0.2961, + "step": 22785 + }, + { + "epoch": 1.06, + "learning_rate": 1.658630257238255e-05, + "loss": 0.069, + "step": 22790 + }, + { + "epoch": 1.06, + "learning_rate": 1.6585518787327765e-05, + "loss": 0.0735, + "step": 22795 + }, + { + "epoch": 1.06, + "learning_rate": 1.658473500227298e-05, + "loss": 0.1352, + "step": 22800 + }, + { + "epoch": 1.06, + "learning_rate": 1.658395121721819e-05, + "loss": 0.1563, + "step": 22805 + }, + { + "epoch": 1.06, + "learning_rate": 1.6583167432163407e-05, + "loss": 0.155, + "step": 22810 + }, + { + "epoch": 1.06, + "learning_rate": 1.6582383647108617e-05, + "loss": 0.2341, + "step": 22815 + }, + { + "epoch": 1.06, + "learning_rate": 1.658159986205383e-05, + "loss": 0.22, + "step": 22820 + }, + { + "epoch": 1.07, + "learning_rate": 1.6580816076999045e-05, + "loss": 0.2533, + "step": 22825 + }, + { + "epoch": 1.07, + "learning_rate": 1.658003229194426e-05, + "loss": 0.5077, + "step": 22830 + }, + { + "epoch": 1.07, + "learning_rate": 1.6579248506889473e-05, + "loss": 0.49, + "step": 22835 + }, + { + "epoch": 1.07, + "learning_rate": 1.6578464721834687e-05, + "loss": 0.166, + "step": 22840 + }, + { + "epoch": 1.07, + "learning_rate": 1.6577680936779897e-05, + "loss": 0.0973, + "step": 22845 + }, + { + "epoch": 1.07, + "learning_rate": 1.6576897151725114e-05, + "loss": 0.0986, + "step": 22850 + }, + { + "epoch": 1.07, + "learning_rate": 1.6576113366670325e-05, + "loss": 0.1956, + "step": 22855 + }, + { + "epoch": 1.07, + "learning_rate": 1.657532958161554e-05, + "loss": 0.2051, + "step": 22860 + }, + { + "epoch": 1.07, + "learning_rate": 1.6574545796560753e-05, + "loss": 0.1668, + "step": 22865 + }, + { + "epoch": 1.07, + "learning_rate": 1.6573762011505967e-05, + "loss": 0.2645, + "step": 22870 + }, + { + "epoch": 1.07, + "learning_rate": 1.657297822645118e-05, + "loss": 0.2412, + "step": 22875 + }, + { + "epoch": 1.07, + "learning_rate": 1.657219444139639e-05, + "loss": 0.36, + "step": 22880 + }, + { + "epoch": 1.07, + "learning_rate": 1.6571410656341605e-05, + "loss": 0.3128, + "step": 22885 + }, + { + "epoch": 1.07, + "learning_rate": 1.657062687128682e-05, + "loss": 0.0542, + "step": 22890 + }, + { + "epoch": 1.07, + "learning_rate": 1.6569843086232033e-05, + "loss": 0.0474, + "step": 22895 + }, + { + "epoch": 1.07, + "learning_rate": 1.6569059301177247e-05, + "loss": 0.1289, + "step": 22900 + }, + { + "epoch": 1.07, + "learning_rate": 1.656827551612246e-05, + "loss": 0.0668, + "step": 22905 + }, + { + "epoch": 1.07, + "learning_rate": 1.6567491731067674e-05, + "loss": 0.2174, + "step": 22910 + }, + { + "epoch": 1.07, + "learning_rate": 1.656670794601289e-05, + "loss": 0.1433, + "step": 22915 + }, + { + "epoch": 1.07, + "learning_rate": 1.65659241609581e-05, + "loss": 0.1519, + "step": 22920 + }, + { + "epoch": 1.07, + "learning_rate": 1.6565140375903316e-05, + "loss": 0.2903, + "step": 22925 + }, + { + "epoch": 1.07, + "learning_rate": 1.6564356590848527e-05, + "loss": 0.4346, + "step": 22930 + }, + { + "epoch": 1.07, + "learning_rate": 1.656357280579374e-05, + "loss": 0.3024, + "step": 22935 + }, + { + "epoch": 1.07, + "learning_rate": 1.6562789020738955e-05, + "loss": 0.031, + "step": 22940 + }, + { + "epoch": 1.07, + "learning_rate": 1.6562005235684165e-05, + "loss": 0.1083, + "step": 22945 + }, + { + "epoch": 1.07, + "learning_rate": 1.6561221450629382e-05, + "loss": 0.0789, + "step": 22950 + }, + { + "epoch": 1.07, + "learning_rate": 1.6560437665574593e-05, + "loss": 0.193, + "step": 22955 + }, + { + "epoch": 1.07, + "learning_rate": 1.6559653880519807e-05, + "loss": 0.2532, + "step": 22960 + }, + { + "epoch": 1.07, + "learning_rate": 1.655887009546502e-05, + "loss": 0.2134, + "step": 22965 + }, + { + "epoch": 1.07, + "learning_rate": 1.6558086310410235e-05, + "loss": 0.2538, + "step": 22970 + }, + { + "epoch": 1.07, + "learning_rate": 1.655730252535545e-05, + "loss": 0.2492, + "step": 22975 + }, + { + "epoch": 1.07, + "learning_rate": 1.6556518740300662e-05, + "loss": 0.4392, + "step": 22980 + }, + { + "epoch": 1.07, + "learning_rate": 1.6555734955245873e-05, + "loss": 0.2968, + "step": 22985 + }, + { + "epoch": 1.07, + "learning_rate": 1.655495117019109e-05, + "loss": 0.0663, + "step": 22990 + }, + { + "epoch": 1.07, + "learning_rate": 1.65541673851363e-05, + "loss": 0.1486, + "step": 22995 + }, + { + "epoch": 1.07, + "learning_rate": 1.6553383600081515e-05, + "loss": 0.0821, + "step": 23000 + }, + { + "epoch": 1.07, + "learning_rate": 1.655259981502673e-05, + "loss": 0.1134, + "step": 23005 + }, + { + "epoch": 1.07, + "learning_rate": 1.6551816029971942e-05, + "loss": 0.2011, + "step": 23010 + }, + { + "epoch": 1.07, + "learning_rate": 1.6551032244917156e-05, + "loss": 0.2416, + "step": 23015 + }, + { + "epoch": 1.07, + "learning_rate": 1.6550248459862367e-05, + "loss": 0.2838, + "step": 23020 + }, + { + "epoch": 1.07, + "learning_rate": 1.6549464674807584e-05, + "loss": 0.285, + "step": 23025 + }, + { + "epoch": 1.07, + "learning_rate": 1.6548680889752795e-05, + "loss": 0.3597, + "step": 23030 + }, + { + "epoch": 1.07, + "learning_rate": 1.654789710469801e-05, + "loss": 0.4682, + "step": 23035 + }, + { + "epoch": 1.08, + "learning_rate": 1.6547113319643222e-05, + "loss": 0.0424, + "step": 23040 + }, + { + "epoch": 1.08, + "learning_rate": 1.6546329534588436e-05, + "loss": 0.0739, + "step": 23045 + }, + { + "epoch": 1.08, + "learning_rate": 1.654554574953365e-05, + "loss": 0.0871, + "step": 23050 + }, + { + "epoch": 1.08, + "learning_rate": 1.6544761964478864e-05, + "loss": 0.0759, + "step": 23055 + }, + { + "epoch": 1.08, + "learning_rate": 1.6543978179424075e-05, + "loss": 0.1438, + "step": 23060 + }, + { + "epoch": 1.08, + "learning_rate": 1.654319439436929e-05, + "loss": 0.1683, + "step": 23065 + }, + { + "epoch": 1.08, + "learning_rate": 1.6542410609314503e-05, + "loss": 0.2168, + "step": 23070 + }, + { + "epoch": 1.08, + "learning_rate": 1.6541626824259716e-05, + "loss": 0.2413, + "step": 23075 + }, + { + "epoch": 1.08, + "learning_rate": 1.654084303920493e-05, + "loss": 0.3585, + "step": 23080 + }, + { + "epoch": 1.08, + "learning_rate": 1.6540059254150144e-05, + "loss": 0.2754, + "step": 23085 + }, + { + "epoch": 1.08, + "learning_rate": 1.6539275469095358e-05, + "loss": 0.0734, + "step": 23090 + }, + { + "epoch": 1.08, + "learning_rate": 1.653849168404057e-05, + "loss": 0.1821, + "step": 23095 + }, + { + "epoch": 1.08, + "learning_rate": 1.6537707898985783e-05, + "loss": 0.136, + "step": 23100 + }, + { + "epoch": 1.08, + "learning_rate": 1.6536924113930996e-05, + "loss": 0.1626, + "step": 23105 + }, + { + "epoch": 1.08, + "learning_rate": 1.653614032887621e-05, + "loss": 0.1497, + "step": 23110 + }, + { + "epoch": 1.08, + "learning_rate": 1.6535356543821424e-05, + "loss": 0.1505, + "step": 23115 + }, + { + "epoch": 1.08, + "learning_rate": 1.6534572758766638e-05, + "loss": 0.2225, + "step": 23120 + }, + { + "epoch": 1.08, + "learning_rate": 1.6533788973711852e-05, + "loss": 0.2445, + "step": 23125 + }, + { + "epoch": 1.08, + "learning_rate": 1.6533005188657063e-05, + "loss": 0.4143, + "step": 23130 + }, + { + "epoch": 1.08, + "learning_rate": 1.6532221403602277e-05, + "loss": 0.322, + "step": 23135 + }, + { + "epoch": 1.08, + "learning_rate": 1.653143761854749e-05, + "loss": 0.0707, + "step": 23140 + }, + { + "epoch": 1.08, + "learning_rate": 1.6530653833492704e-05, + "loss": 0.0468, + "step": 23145 + }, + { + "epoch": 1.08, + "learning_rate": 1.6529870048437918e-05, + "loss": 0.1199, + "step": 23150 + }, + { + "epoch": 1.08, + "learning_rate": 1.6529086263383132e-05, + "loss": 0.1203, + "step": 23155 + }, + { + "epoch": 1.08, + "learning_rate": 1.6528302478328343e-05, + "loss": 0.1562, + "step": 23160 + }, + { + "epoch": 1.08, + "learning_rate": 1.652751869327356e-05, + "loss": 0.1764, + "step": 23165 + }, + { + "epoch": 1.08, + "learning_rate": 1.652673490821877e-05, + "loss": 0.1951, + "step": 23170 + }, + { + "epoch": 1.08, + "learning_rate": 1.6525951123163984e-05, + "loss": 0.3135, + "step": 23175 + }, + { + "epoch": 1.08, + "learning_rate": 1.6525167338109198e-05, + "loss": 0.2562, + "step": 23180 + }, + { + "epoch": 1.08, + "learning_rate": 1.6524383553054412e-05, + "loss": 0.2707, + "step": 23185 + }, + { + "epoch": 1.08, + "learning_rate": 1.6523599767999626e-05, + "loss": 0.0834, + "step": 23190 + }, + { + "epoch": 1.08, + "learning_rate": 1.6522815982944837e-05, + "loss": 0.1467, + "step": 23195 + }, + { + "epoch": 1.08, + "learning_rate": 1.652203219789005e-05, + "loss": 0.093, + "step": 23200 + }, + { + "epoch": 1.08, + "learning_rate": 1.6521248412835264e-05, + "loss": 0.0902, + "step": 23205 + }, + { + "epoch": 1.08, + "learning_rate": 1.652046462778048e-05, + "loss": 0.2016, + "step": 23210 + }, + { + "epoch": 1.08, + "learning_rate": 1.6519680842725692e-05, + "loss": 0.2145, + "step": 23215 + }, + { + "epoch": 1.08, + "learning_rate": 1.6518897057670906e-05, + "loss": 0.2803, + "step": 23220 + }, + { + "epoch": 1.08, + "learning_rate": 1.651811327261612e-05, + "loss": 0.2825, + "step": 23225 + }, + { + "epoch": 1.08, + "learning_rate": 1.6517329487561334e-05, + "loss": 0.3829, + "step": 23230 + }, + { + "epoch": 1.08, + "learning_rate": 1.6516545702506544e-05, + "loss": 0.2447, + "step": 23235 + }, + { + "epoch": 1.08, + "learning_rate": 1.6515761917451762e-05, + "loss": 0.0754, + "step": 23240 + }, + { + "epoch": 1.08, + "learning_rate": 1.6514978132396972e-05, + "loss": 0.0598, + "step": 23245 + }, + { + "epoch": 1.08, + "learning_rate": 1.6514194347342186e-05, + "loss": 0.1389, + "step": 23250 + }, + { + "epoch": 1.09, + "learning_rate": 1.65134105622874e-05, + "loss": 0.156, + "step": 23255 + }, + { + "epoch": 1.09, + "learning_rate": 1.651262677723261e-05, + "loss": 0.1372, + "step": 23260 + }, + { + "epoch": 1.09, + "learning_rate": 1.6511842992177828e-05, + "loss": 0.2297, + "step": 23265 + }, + { + "epoch": 1.09, + "learning_rate": 1.651105920712304e-05, + "loss": 0.2115, + "step": 23270 + }, + { + "epoch": 1.09, + "learning_rate": 1.6510275422068252e-05, + "loss": 0.2469, + "step": 23275 + }, + { + "epoch": 1.09, + "learning_rate": 1.6509491637013466e-05, + "loss": 0.2814, + "step": 23280 + }, + { + "epoch": 1.09, + "learning_rate": 1.650870785195868e-05, + "loss": 0.3825, + "step": 23285 + }, + { + "epoch": 1.09, + "learning_rate": 1.6507924066903894e-05, + "loss": 0.0548, + "step": 23290 + }, + { + "epoch": 1.09, + "learning_rate": 1.6507140281849108e-05, + "loss": 0.0556, + "step": 23295 + }, + { + "epoch": 1.09, + "learning_rate": 1.650635649679432e-05, + "loss": 0.0974, + "step": 23300 + }, + { + "epoch": 1.09, + "learning_rate": 1.6505572711739536e-05, + "loss": 0.1694, + "step": 23305 + }, + { + "epoch": 1.09, + "learning_rate": 1.6504788926684746e-05, + "loss": 0.2936, + "step": 23310 + }, + { + "epoch": 1.09, + "learning_rate": 1.650400514162996e-05, + "loss": 0.1766, + "step": 23315 + }, + { + "epoch": 1.09, + "learning_rate": 1.6503221356575174e-05, + "loss": 0.1843, + "step": 23320 + }, + { + "epoch": 1.09, + "learning_rate": 1.6502437571520388e-05, + "loss": 0.3095, + "step": 23325 + }, + { + "epoch": 1.09, + "learning_rate": 1.6501653786465602e-05, + "loss": 0.3444, + "step": 23330 + }, + { + "epoch": 1.09, + "learning_rate": 1.6500870001410812e-05, + "loss": 0.2731, + "step": 23335 + }, + { + "epoch": 1.09, + "learning_rate": 1.650008621635603e-05, + "loss": 0.0252, + "step": 23340 + }, + { + "epoch": 1.09, + "learning_rate": 1.649930243130124e-05, + "loss": 0.1032, + "step": 23345 + }, + { + "epoch": 1.09, + "learning_rate": 1.6498518646246454e-05, + "loss": 0.1265, + "step": 23350 + }, + { + "epoch": 1.09, + "learning_rate": 1.6497734861191668e-05, + "loss": 0.1087, + "step": 23355 + }, + { + "epoch": 1.09, + "learning_rate": 1.6496951076136882e-05, + "loss": 0.1427, + "step": 23360 + }, + { + "epoch": 1.09, + "learning_rate": 1.6496167291082096e-05, + "loss": 0.2091, + "step": 23365 + }, + { + "epoch": 1.09, + "learning_rate": 1.649538350602731e-05, + "loss": 0.7426, + "step": 23370 + }, + { + "epoch": 1.09, + "learning_rate": 1.649459972097252e-05, + "loss": 0.2259, + "step": 23375 + }, + { + "epoch": 1.09, + "learning_rate": 1.6493815935917738e-05, + "loss": 0.4487, + "step": 23380 + }, + { + "epoch": 1.09, + "learning_rate": 1.6493032150862948e-05, + "loss": 0.2265, + "step": 23385 + }, + { + "epoch": 1.09, + "learning_rate": 1.6492248365808162e-05, + "loss": 0.0922, + "step": 23390 + }, + { + "epoch": 1.09, + "learning_rate": 1.6491464580753376e-05, + "loss": 0.1548, + "step": 23395 + }, + { + "epoch": 1.09, + "learning_rate": 1.649068079569859e-05, + "loss": 0.1354, + "step": 23400 + }, + { + "epoch": 1.09, + "learning_rate": 1.6489897010643804e-05, + "loss": 0.1246, + "step": 23405 + }, + { + "epoch": 1.09, + "learning_rate": 1.6489113225589014e-05, + "loss": 0.1558, + "step": 23410 + }, + { + "epoch": 1.09, + "learning_rate": 1.6488329440534228e-05, + "loss": 0.1481, + "step": 23415 + }, + { + "epoch": 1.09, + "learning_rate": 1.6487545655479442e-05, + "loss": 0.2439, + "step": 23420 + }, + { + "epoch": 1.09, + "learning_rate": 1.6486761870424656e-05, + "loss": 0.3442, + "step": 23425 + }, + { + "epoch": 1.09, + "learning_rate": 1.648597808536987e-05, + "loss": 0.3016, + "step": 23430 + }, + { + "epoch": 1.09, + "learning_rate": 1.6485194300315084e-05, + "loss": 0.2837, + "step": 23435 + }, + { + "epoch": 1.09, + "learning_rate": 1.6484410515260298e-05, + "loss": 0.0948, + "step": 23440 + }, + { + "epoch": 1.09, + "learning_rate": 1.648362673020551e-05, + "loss": 0.0681, + "step": 23445 + }, + { + "epoch": 1.09, + "learning_rate": 1.6482842945150722e-05, + "loss": 0.107, + "step": 23450 + }, + { + "epoch": 1.09, + "learning_rate": 1.6482059160095936e-05, + "loss": 0.1297, + "step": 23455 + }, + { + "epoch": 1.09, + "learning_rate": 1.648127537504115e-05, + "loss": 0.1361, + "step": 23460 + }, + { + "epoch": 1.09, + "learning_rate": 1.6480491589986364e-05, + "loss": 0.142, + "step": 23465 + }, + { + "epoch": 1.1, + "learning_rate": 1.6479707804931578e-05, + "loss": 0.2334, + "step": 23470 + }, + { + "epoch": 1.1, + "learning_rate": 1.6478924019876788e-05, + "loss": 0.277, + "step": 23475 + }, + { + "epoch": 1.1, + "learning_rate": 1.6478140234822006e-05, + "loss": 0.341, + "step": 23480 + }, + { + "epoch": 1.1, + "learning_rate": 1.6477356449767216e-05, + "loss": 0.3929, + "step": 23485 + }, + { + "epoch": 1.1, + "learning_rate": 1.647657266471243e-05, + "loss": 0.1686, + "step": 23490 + }, + { + "epoch": 1.1, + "learning_rate": 1.6475788879657644e-05, + "loss": 0.0867, + "step": 23495 + }, + { + "epoch": 1.1, + "learning_rate": 1.6475005094602858e-05, + "loss": 0.0441, + "step": 23500 + }, + { + "epoch": 1.1, + "learning_rate": 1.647422130954807e-05, + "loss": 0.1218, + "step": 23505 + }, + { + "epoch": 1.1, + "learning_rate": 1.6473437524493286e-05, + "loss": 0.1216, + "step": 23510 + }, + { + "epoch": 1.1, + "learning_rate": 1.6472653739438496e-05, + "loss": 0.2273, + "step": 23515 + }, + { + "epoch": 1.1, + "learning_rate": 1.647186995438371e-05, + "loss": 0.3123, + "step": 23520 + }, + { + "epoch": 1.1, + "learning_rate": 1.6471086169328924e-05, + "loss": 0.2573, + "step": 23525 + }, + { + "epoch": 1.1, + "learning_rate": 1.6470302384274138e-05, + "loss": 0.4518, + "step": 23530 + }, + { + "epoch": 1.1, + "learning_rate": 1.6469518599219352e-05, + "loss": 0.2675, + "step": 23535 + }, + { + "epoch": 1.1, + "learning_rate": 1.6468734814164566e-05, + "loss": 0.0457, + "step": 23540 + }, + { + "epoch": 1.1, + "learning_rate": 1.646795102910978e-05, + "loss": 0.0892, + "step": 23545 + }, + { + "epoch": 1.1, + "learning_rate": 1.646716724405499e-05, + "loss": 0.07, + "step": 23550 + }, + { + "epoch": 1.1, + "learning_rate": 1.6466383459000207e-05, + "loss": 0.1443, + "step": 23555 + }, + { + "epoch": 1.1, + "learning_rate": 1.6465599673945418e-05, + "loss": 0.1848, + "step": 23560 + }, + { + "epoch": 1.1, + "learning_rate": 1.6464815888890632e-05, + "loss": 0.1461, + "step": 23565 + }, + { + "epoch": 1.1, + "learning_rate": 1.6464032103835846e-05, + "loss": 0.1837, + "step": 23570 + }, + { + "epoch": 1.1, + "learning_rate": 1.646324831878106e-05, + "loss": 0.178, + "step": 23575 + }, + { + "epoch": 1.1, + "learning_rate": 1.6462464533726273e-05, + "loss": 0.5349, + "step": 23580 + }, + { + "epoch": 1.1, + "learning_rate": 1.6461680748671484e-05, + "loss": 0.3265, + "step": 23585 + }, + { + "epoch": 1.1, + "learning_rate": 1.6460896963616698e-05, + "loss": 0.0742, + "step": 23590 + }, + { + "epoch": 1.1, + "learning_rate": 1.6460113178561912e-05, + "loss": 0.0978, + "step": 23595 + }, + { + "epoch": 1.1, + "learning_rate": 1.6459329393507126e-05, + "loss": 0.1308, + "step": 23600 + }, + { + "epoch": 1.1, + "learning_rate": 1.645854560845234e-05, + "loss": 0.1915, + "step": 23605 + }, + { + "epoch": 1.1, + "learning_rate": 1.6457761823397554e-05, + "loss": 0.094, + "step": 23610 + }, + { + "epoch": 1.1, + "learning_rate": 1.6456978038342764e-05, + "loss": 0.1869, + "step": 23615 + }, + { + "epoch": 1.1, + "learning_rate": 1.645619425328798e-05, + "loss": 0.1502, + "step": 23620 + }, + { + "epoch": 1.1, + "learning_rate": 1.6455410468233192e-05, + "loss": 0.2174, + "step": 23625 + }, + { + "epoch": 1.1, + "learning_rate": 1.6454626683178406e-05, + "loss": 0.4411, + "step": 23630 + }, + { + "epoch": 1.1, + "learning_rate": 1.645384289812362e-05, + "loss": 0.2565, + "step": 23635 + }, + { + "epoch": 1.1, + "learning_rate": 1.6453059113068834e-05, + "loss": 0.0362, + "step": 23640 + }, + { + "epoch": 1.1, + "learning_rate": 1.6452275328014047e-05, + "loss": 0.0526, + "step": 23645 + }, + { + "epoch": 1.1, + "learning_rate": 1.6451491542959258e-05, + "loss": 0.1324, + "step": 23650 + }, + { + "epoch": 1.1, + "learning_rate": 1.6450707757904475e-05, + "loss": 0.0992, + "step": 23655 + }, + { + "epoch": 1.1, + "learning_rate": 1.6449923972849686e-05, + "loss": 0.1667, + "step": 23660 + }, + { + "epoch": 1.1, + "learning_rate": 1.64491401877949e-05, + "loss": 0.2788, + "step": 23665 + }, + { + "epoch": 1.1, + "learning_rate": 1.6448356402740114e-05, + "loss": 0.2025, + "step": 23670 + }, + { + "epoch": 1.1, + "learning_rate": 1.6447572617685328e-05, + "loss": 0.2577, + "step": 23675 + }, + { + "epoch": 1.1, + "learning_rate": 1.644678883263054e-05, + "loss": 0.2843, + "step": 23680 + }, + { + "epoch": 1.11, + "learning_rate": 1.6446005047575755e-05, + "loss": 0.2967, + "step": 23685 + }, + { + "epoch": 1.11, + "learning_rate": 1.6445221262520966e-05, + "loss": 0.0835, + "step": 23690 + }, + { + "epoch": 1.11, + "learning_rate": 1.6444437477466183e-05, + "loss": 0.0873, + "step": 23695 + }, + { + "epoch": 1.11, + "learning_rate": 1.6443653692411394e-05, + "loss": 0.1089, + "step": 23700 + }, + { + "epoch": 1.11, + "learning_rate": 1.6442869907356608e-05, + "loss": 0.1707, + "step": 23705 + }, + { + "epoch": 1.11, + "learning_rate": 1.644208612230182e-05, + "loss": 0.191, + "step": 23710 + }, + { + "epoch": 1.11, + "learning_rate": 1.6441302337247035e-05, + "loss": 0.124, + "step": 23715 + }, + { + "epoch": 1.11, + "learning_rate": 1.644051855219225e-05, + "loss": 0.2111, + "step": 23720 + }, + { + "epoch": 1.11, + "learning_rate": 1.643973476713746e-05, + "loss": 0.1918, + "step": 23725 + }, + { + "epoch": 1.11, + "learning_rate": 1.6438950982082674e-05, + "loss": 0.4829, + "step": 23730 + }, + { + "epoch": 1.11, + "learning_rate": 1.6438167197027888e-05, + "loss": 0.3964, + "step": 23735 + }, + { + "epoch": 1.11, + "learning_rate": 1.64373834119731e-05, + "loss": 0.0372, + "step": 23740 + }, + { + "epoch": 1.11, + "learning_rate": 1.6436599626918315e-05, + "loss": 0.0559, + "step": 23745 + }, + { + "epoch": 1.11, + "learning_rate": 1.643581584186353e-05, + "loss": 0.1288, + "step": 23750 + }, + { + "epoch": 1.11, + "learning_rate": 1.6435032056808743e-05, + "loss": 0.1367, + "step": 23755 + }, + { + "epoch": 1.11, + "learning_rate": 1.6434248271753957e-05, + "loss": 0.0864, + "step": 23760 + }, + { + "epoch": 1.11, + "learning_rate": 1.6433464486699168e-05, + "loss": 0.1882, + "step": 23765 + }, + { + "epoch": 1.11, + "learning_rate": 1.6432680701644385e-05, + "loss": 0.2035, + "step": 23770 + }, + { + "epoch": 1.11, + "learning_rate": 1.6431896916589595e-05, + "loss": 0.3376, + "step": 23775 + }, + { + "epoch": 1.11, + "learning_rate": 1.643111313153481e-05, + "loss": 0.5252, + "step": 23780 + }, + { + "epoch": 1.11, + "learning_rate": 1.6430329346480023e-05, + "loss": 0.2832, + "step": 23785 + }, + { + "epoch": 1.11, + "learning_rate": 1.6429545561425234e-05, + "loss": 0.0732, + "step": 23790 + }, + { + "epoch": 1.11, + "learning_rate": 1.642876177637045e-05, + "loss": 0.0599, + "step": 23795 + }, + { + "epoch": 1.11, + "learning_rate": 1.642797799131566e-05, + "loss": 0.1289, + "step": 23800 + }, + { + "epoch": 1.11, + "learning_rate": 1.6427194206260876e-05, + "loss": 0.1547, + "step": 23805 + }, + { + "epoch": 1.11, + "learning_rate": 1.642641042120609e-05, + "loss": 0.0893, + "step": 23810 + }, + { + "epoch": 1.11, + "learning_rate": 1.6425626636151303e-05, + "loss": 0.1896, + "step": 23815 + }, + { + "epoch": 1.11, + "learning_rate": 1.6424842851096517e-05, + "loss": 0.2018, + "step": 23820 + }, + { + "epoch": 1.11, + "learning_rate": 1.642405906604173e-05, + "loss": 0.2314, + "step": 23825 + }, + { + "epoch": 1.11, + "learning_rate": 1.642327528098694e-05, + "loss": 0.6314, + "step": 23830 + }, + { + "epoch": 1.11, + "learning_rate": 1.642249149593216e-05, + "loss": 0.2099, + "step": 23835 + }, + { + "epoch": 1.11, + "learning_rate": 1.642170771087737e-05, + "loss": 0.0935, + "step": 23840 + }, + { + "epoch": 1.11, + "learning_rate": 1.6420923925822583e-05, + "loss": 0.0824, + "step": 23845 + }, + { + "epoch": 1.11, + "learning_rate": 1.6420140140767797e-05, + "loss": 0.1094, + "step": 23850 + }, + { + "epoch": 1.11, + "learning_rate": 1.641935635571301e-05, + "loss": 0.1203, + "step": 23855 + }, + { + "epoch": 1.11, + "learning_rate": 1.6418572570658225e-05, + "loss": 0.0716, + "step": 23860 + }, + { + "epoch": 1.11, + "learning_rate": 1.6417788785603436e-05, + "loss": 0.0886, + "step": 23865 + }, + { + "epoch": 1.11, + "learning_rate": 1.6417005000548653e-05, + "loss": 0.2079, + "step": 23870 + }, + { + "epoch": 1.11, + "learning_rate": 1.6416221215493863e-05, + "loss": 0.2192, + "step": 23875 + }, + { + "epoch": 1.11, + "learning_rate": 1.6415437430439077e-05, + "loss": 0.1609, + "step": 23880 + }, + { + "epoch": 1.11, + "learning_rate": 1.641465364538429e-05, + "loss": 0.427, + "step": 23885 + }, + { + "epoch": 1.11, + "learning_rate": 1.6413869860329505e-05, + "loss": 0.0594, + "step": 23890 + }, + { + "epoch": 1.11, + "learning_rate": 1.641308607527472e-05, + "loss": 0.0976, + "step": 23895 + }, + { + "epoch": 1.12, + "learning_rate": 1.6412302290219933e-05, + "loss": 0.1183, + "step": 23900 + }, + { + "epoch": 1.12, + "learning_rate": 1.6411518505165143e-05, + "loss": 0.1117, + "step": 23905 + }, + { + "epoch": 1.12, + "learning_rate": 1.6410734720110357e-05, + "loss": 0.1788, + "step": 23910 + }, + { + "epoch": 1.12, + "learning_rate": 1.640995093505557e-05, + "loss": 0.1834, + "step": 23915 + }, + { + "epoch": 1.12, + "learning_rate": 1.6409167150000785e-05, + "loss": 0.2126, + "step": 23920 + }, + { + "epoch": 1.12, + "learning_rate": 1.6408383364946e-05, + "loss": 0.4336, + "step": 23925 + }, + { + "epoch": 1.12, + "learning_rate": 1.6407599579891213e-05, + "loss": 0.3505, + "step": 23930 + }, + { + "epoch": 1.12, + "learning_rate": 1.6406815794836427e-05, + "loss": 0.3065, + "step": 23935 + }, + { + "epoch": 1.12, + "learning_rate": 1.6406032009781637e-05, + "loss": 0.0399, + "step": 23940 + }, + { + "epoch": 1.12, + "learning_rate": 1.640524822472685e-05, + "loss": 0.0772, + "step": 23945 + }, + { + "epoch": 1.12, + "learning_rate": 1.6404464439672065e-05, + "loss": 0.0448, + "step": 23950 + }, + { + "epoch": 1.12, + "learning_rate": 1.640368065461728e-05, + "loss": 0.0798, + "step": 23955 + }, + { + "epoch": 1.12, + "learning_rate": 1.6402896869562493e-05, + "loss": 0.0925, + "step": 23960 + }, + { + "epoch": 1.12, + "learning_rate": 1.6402113084507707e-05, + "loss": 0.1361, + "step": 23965 + }, + { + "epoch": 1.12, + "learning_rate": 1.640132929945292e-05, + "loss": 0.1452, + "step": 23970 + }, + { + "epoch": 1.12, + "learning_rate": 1.640054551439813e-05, + "loss": 0.2325, + "step": 23975 + }, + { + "epoch": 1.12, + "learning_rate": 1.6399761729343345e-05, + "loss": 0.3527, + "step": 23980 + }, + { + "epoch": 1.12, + "learning_rate": 1.639897794428856e-05, + "loss": 0.2864, + "step": 23985 + }, + { + "epoch": 1.12, + "learning_rate": 1.6398194159233773e-05, + "loss": 0.0622, + "step": 23990 + }, + { + "epoch": 1.12, + "learning_rate": 1.6397410374178987e-05, + "loss": 0.0723, + "step": 23995 + }, + { + "epoch": 1.12, + "learning_rate": 1.63966265891242e-05, + "loss": 0.1408, + "step": 24000 + }, + { + "epoch": 1.12, + "learning_rate": 1.639584280406941e-05, + "loss": 0.1051, + "step": 24005 + }, + { + "epoch": 1.12, + "learning_rate": 1.639505901901463e-05, + "loss": 0.1084, + "step": 24010 + }, + { + "epoch": 1.12, + "learning_rate": 1.639427523395984e-05, + "loss": 0.2103, + "step": 24015 + }, + { + "epoch": 1.12, + "learning_rate": 1.6393491448905053e-05, + "loss": 0.2023, + "step": 24020 + }, + { + "epoch": 1.12, + "learning_rate": 1.6392707663850267e-05, + "loss": 0.2348, + "step": 24025 + }, + { + "epoch": 1.12, + "learning_rate": 1.639192387879548e-05, + "loss": 0.3672, + "step": 24030 + }, + { + "epoch": 1.12, + "learning_rate": 1.6391140093740695e-05, + "loss": 0.3526, + "step": 24035 + }, + { + "epoch": 1.12, + "learning_rate": 1.6390356308685905e-05, + "loss": 0.0778, + "step": 24040 + }, + { + "epoch": 1.12, + "learning_rate": 1.638957252363112e-05, + "loss": 0.1023, + "step": 24045 + }, + { + "epoch": 1.12, + "learning_rate": 1.6388788738576333e-05, + "loss": 0.0873, + "step": 24050 + }, + { + "epoch": 1.12, + "learning_rate": 1.6388004953521547e-05, + "loss": 0.1086, + "step": 24055 + }, + { + "epoch": 1.12, + "learning_rate": 1.638722116846676e-05, + "loss": 0.1551, + "step": 24060 + }, + { + "epoch": 1.12, + "learning_rate": 1.6386437383411975e-05, + "loss": 0.1625, + "step": 24065 + }, + { + "epoch": 1.12, + "learning_rate": 1.638565359835719e-05, + "loss": 0.2734, + "step": 24070 + }, + { + "epoch": 1.12, + "learning_rate": 1.6384869813302403e-05, + "loss": 0.2265, + "step": 24075 + }, + { + "epoch": 1.12, + "learning_rate": 1.6384086028247613e-05, + "loss": 0.2489, + "step": 24080 + }, + { + "epoch": 1.12, + "learning_rate": 1.638330224319283e-05, + "loss": 0.3076, + "step": 24085 + }, + { + "epoch": 1.12, + "learning_rate": 1.638251845813804e-05, + "loss": 0.1064, + "step": 24090 + }, + { + "epoch": 1.12, + "learning_rate": 1.6381734673083255e-05, + "loss": 0.0911, + "step": 24095 + }, + { + "epoch": 1.12, + "learning_rate": 1.638095088802847e-05, + "loss": 0.0852, + "step": 24100 + }, + { + "epoch": 1.12, + "learning_rate": 1.638016710297368e-05, + "loss": 0.1027, + "step": 24105 + }, + { + "epoch": 1.13, + "learning_rate": 1.6379383317918897e-05, + "loss": 0.1949, + "step": 24110 + }, + { + "epoch": 1.13, + "learning_rate": 1.6378599532864107e-05, + "loss": 0.1723, + "step": 24115 + }, + { + "epoch": 1.13, + "learning_rate": 1.637781574780932e-05, + "loss": 0.207, + "step": 24120 + }, + { + "epoch": 1.13, + "learning_rate": 1.6377031962754535e-05, + "loss": 0.2483, + "step": 24125 + }, + { + "epoch": 1.13, + "learning_rate": 1.637624817769975e-05, + "loss": 0.4968, + "step": 24130 + }, + { + "epoch": 1.13, + "learning_rate": 1.6375464392644963e-05, + "loss": 0.3258, + "step": 24135 + }, + { + "epoch": 1.13, + "learning_rate": 1.6374680607590177e-05, + "loss": 0.0432, + "step": 24140 + }, + { + "epoch": 1.13, + "learning_rate": 1.6373896822535387e-05, + "loss": 0.0878, + "step": 24145 + }, + { + "epoch": 1.13, + "learning_rate": 1.6373113037480605e-05, + "loss": 0.163, + "step": 24150 + }, + { + "epoch": 1.13, + "learning_rate": 1.6372329252425815e-05, + "loss": 0.2356, + "step": 24155 + }, + { + "epoch": 1.13, + "learning_rate": 1.637154546737103e-05, + "loss": 0.1573, + "step": 24160 + }, + { + "epoch": 1.13, + "learning_rate": 1.6370761682316243e-05, + "loss": 0.1774, + "step": 24165 + }, + { + "epoch": 1.13, + "learning_rate": 1.6369977897261457e-05, + "loss": 0.1667, + "step": 24170 + }, + { + "epoch": 1.13, + "learning_rate": 1.636919411220667e-05, + "loss": 0.1914, + "step": 24175 + }, + { + "epoch": 1.13, + "learning_rate": 1.636841032715188e-05, + "loss": 0.3366, + "step": 24180 + }, + { + "epoch": 1.13, + "learning_rate": 1.63676265420971e-05, + "loss": 0.389, + "step": 24185 + }, + { + "epoch": 1.13, + "learning_rate": 1.636684275704231e-05, + "loss": 0.0601, + "step": 24190 + }, + { + "epoch": 1.13, + "learning_rate": 1.6366058971987523e-05, + "loss": 0.0784, + "step": 24195 + }, + { + "epoch": 1.13, + "learning_rate": 1.6365275186932737e-05, + "loss": 0.1066, + "step": 24200 + }, + { + "epoch": 1.13, + "learning_rate": 1.636449140187795e-05, + "loss": 0.1525, + "step": 24205 + }, + { + "epoch": 1.13, + "learning_rate": 1.6363707616823165e-05, + "loss": 0.1988, + "step": 24210 + }, + { + "epoch": 1.13, + "learning_rate": 1.636292383176838e-05, + "loss": 0.252, + "step": 24215 + }, + { + "epoch": 1.13, + "learning_rate": 1.636214004671359e-05, + "loss": 0.3322, + "step": 24220 + }, + { + "epoch": 1.13, + "learning_rate": 1.6361356261658806e-05, + "loss": 0.2519, + "step": 24225 + }, + { + "epoch": 1.13, + "learning_rate": 1.6360572476604017e-05, + "loss": 0.2909, + "step": 24230 + }, + { + "epoch": 1.13, + "learning_rate": 1.635978869154923e-05, + "loss": 0.3463, + "step": 24235 + }, + { + "epoch": 1.13, + "learning_rate": 1.6359004906494445e-05, + "loss": 0.0224, + "step": 24240 + }, + { + "epoch": 1.13, + "learning_rate": 1.635822112143966e-05, + "loss": 0.0675, + "step": 24245 + }, + { + "epoch": 1.13, + "learning_rate": 1.6357437336384872e-05, + "loss": 0.1372, + "step": 24250 + }, + { + "epoch": 1.13, + "learning_rate": 1.6356653551330083e-05, + "loss": 0.1832, + "step": 24255 + }, + { + "epoch": 1.13, + "learning_rate": 1.6355869766275297e-05, + "loss": 0.1108, + "step": 24260 + }, + { + "epoch": 1.13, + "learning_rate": 1.635508598122051e-05, + "loss": 0.1944, + "step": 24265 + }, + { + "epoch": 1.13, + "learning_rate": 1.6354302196165725e-05, + "loss": 0.1828, + "step": 24270 + }, + { + "epoch": 1.13, + "learning_rate": 1.635351841111094e-05, + "loss": 0.2393, + "step": 24275 + }, + { + "epoch": 1.13, + "learning_rate": 1.6352734626056153e-05, + "loss": 0.4225, + "step": 24280 + }, + { + "epoch": 1.13, + "learning_rate": 1.6351950841001366e-05, + "loss": 0.3735, + "step": 24285 + }, + { + "epoch": 1.13, + "learning_rate": 1.635116705594658e-05, + "loss": 0.0227, + "step": 24290 + }, + { + "epoch": 1.13, + "learning_rate": 1.635038327089179e-05, + "loss": 0.0606, + "step": 24295 + }, + { + "epoch": 1.13, + "learning_rate": 1.6349599485837005e-05, + "loss": 0.0897, + "step": 24300 + }, + { + "epoch": 1.13, + "learning_rate": 1.634881570078222e-05, + "loss": 0.1893, + "step": 24305 + }, + { + "epoch": 1.13, + "learning_rate": 1.6348031915727433e-05, + "loss": 0.1348, + "step": 24310 + }, + { + "epoch": 1.13, + "learning_rate": 1.6347248130672646e-05, + "loss": 0.1975, + "step": 24315 + }, + { + "epoch": 1.13, + "learning_rate": 1.6346464345617857e-05, + "loss": 0.2427, + "step": 24320 + }, + { + "epoch": 1.14, + "learning_rate": 1.6345680560563074e-05, + "loss": 0.2059, + "step": 24325 + }, + { + "epoch": 1.14, + "learning_rate": 1.6344896775508285e-05, + "loss": 0.4388, + "step": 24330 + }, + { + "epoch": 1.14, + "learning_rate": 1.63441129904535e-05, + "loss": 0.2305, + "step": 24335 + }, + { + "epoch": 1.14, + "learning_rate": 1.6343329205398713e-05, + "loss": 0.0722, + "step": 24340 + }, + { + "epoch": 1.14, + "learning_rate": 1.6342545420343927e-05, + "loss": 0.1261, + "step": 24345 + }, + { + "epoch": 1.14, + "learning_rate": 1.634176163528914e-05, + "loss": 0.0991, + "step": 24350 + }, + { + "epoch": 1.14, + "learning_rate": 1.6340977850234354e-05, + "loss": 0.1313, + "step": 24355 + }, + { + "epoch": 1.14, + "learning_rate": 1.6340194065179565e-05, + "loss": 0.1284, + "step": 24360 + }, + { + "epoch": 1.14, + "learning_rate": 1.633941028012478e-05, + "loss": 0.2371, + "step": 24365 + }, + { + "epoch": 1.14, + "learning_rate": 1.6338626495069993e-05, + "loss": 0.2578, + "step": 24370 + }, + { + "epoch": 1.14, + "learning_rate": 1.6337842710015207e-05, + "loss": 0.2852, + "step": 24375 + }, + { + "epoch": 1.14, + "learning_rate": 1.633705892496042e-05, + "loss": 0.3184, + "step": 24380 + }, + { + "epoch": 1.14, + "learning_rate": 1.6336275139905634e-05, + "loss": 0.2727, + "step": 24385 + }, + { + "epoch": 1.14, + "learning_rate": 1.6335491354850848e-05, + "loss": 0.0995, + "step": 24390 + }, + { + "epoch": 1.14, + "learning_rate": 1.633470756979606e-05, + "loss": 0.075, + "step": 24395 + }, + { + "epoch": 1.14, + "learning_rate": 1.6333923784741276e-05, + "loss": 0.1169, + "step": 24400 + }, + { + "epoch": 1.14, + "learning_rate": 1.6333139999686487e-05, + "loss": 0.1186, + "step": 24405 + }, + { + "epoch": 1.14, + "learning_rate": 1.63323562146317e-05, + "loss": 0.1519, + "step": 24410 + }, + { + "epoch": 1.14, + "learning_rate": 1.6331572429576914e-05, + "loss": 0.1897, + "step": 24415 + }, + { + "epoch": 1.14, + "learning_rate": 1.633078864452213e-05, + "loss": 0.2469, + "step": 24420 + }, + { + "epoch": 1.14, + "learning_rate": 1.6330004859467342e-05, + "loss": 0.2405, + "step": 24425 + }, + { + "epoch": 1.14, + "learning_rate": 1.6329221074412553e-05, + "loss": 0.5484, + "step": 24430 + }, + { + "epoch": 1.14, + "learning_rate": 1.6328437289357767e-05, + "loss": 0.3294, + "step": 24435 + }, + { + "epoch": 1.14, + "learning_rate": 1.632765350430298e-05, + "loss": 0.0427, + "step": 24440 + }, + { + "epoch": 1.14, + "learning_rate": 1.6326869719248194e-05, + "loss": 0.1192, + "step": 24445 + }, + { + "epoch": 1.14, + "learning_rate": 1.632608593419341e-05, + "loss": 0.0817, + "step": 24450 + }, + { + "epoch": 1.14, + "learning_rate": 1.6325302149138622e-05, + "loss": 0.0814, + "step": 24455 + }, + { + "epoch": 1.14, + "learning_rate": 1.6324518364083833e-05, + "loss": 0.1558, + "step": 24460 + }, + { + "epoch": 1.14, + "learning_rate": 1.632373457902905e-05, + "loss": 0.149, + "step": 24465 + }, + { + "epoch": 1.14, + "learning_rate": 1.632295079397426e-05, + "loss": 0.1717, + "step": 24470 + }, + { + "epoch": 1.14, + "learning_rate": 1.6322167008919475e-05, + "loss": 0.2424, + "step": 24475 + }, + { + "epoch": 1.14, + "learning_rate": 1.632138322386469e-05, + "loss": 0.2916, + "step": 24480 + }, + { + "epoch": 1.14, + "learning_rate": 1.6320599438809902e-05, + "loss": 0.3675, + "step": 24485 + }, + { + "epoch": 1.14, + "learning_rate": 1.6319815653755116e-05, + "loss": 0.1092, + "step": 24490 + }, + { + "epoch": 1.14, + "learning_rate": 1.6319031868700327e-05, + "loss": 0.086, + "step": 24495 + }, + { + "epoch": 1.14, + "learning_rate": 1.6318248083645544e-05, + "loss": 0.1483, + "step": 24500 + }, + { + "epoch": 1.14, + "learning_rate": 1.6317464298590755e-05, + "loss": 0.1295, + "step": 24505 + }, + { + "epoch": 1.14, + "learning_rate": 1.631668051353597e-05, + "loss": 0.1388, + "step": 24510 + }, + { + "epoch": 1.14, + "learning_rate": 1.6315896728481182e-05, + "loss": 0.2248, + "step": 24515 + }, + { + "epoch": 1.14, + "learning_rate": 1.6315112943426396e-05, + "loss": 0.229, + "step": 24520 + }, + { + "epoch": 1.14, + "learning_rate": 1.631432915837161e-05, + "loss": 0.2604, + "step": 24525 + }, + { + "epoch": 1.14, + "learning_rate": 1.6313545373316824e-05, + "loss": 0.3488, + "step": 24530 + }, + { + "epoch": 1.14, + "learning_rate": 1.6312761588262035e-05, + "loss": 0.2816, + "step": 24535 + }, + { + "epoch": 1.15, + "learning_rate": 1.6311977803207252e-05, + "loss": 0.0694, + "step": 24540 + }, + { + "epoch": 1.15, + "learning_rate": 1.6311194018152462e-05, + "loss": 0.0728, + "step": 24545 + }, + { + "epoch": 1.15, + "learning_rate": 1.6310410233097676e-05, + "loss": 0.058, + "step": 24550 + }, + { + "epoch": 1.15, + "learning_rate": 1.630962644804289e-05, + "loss": 0.0766, + "step": 24555 + }, + { + "epoch": 1.15, + "learning_rate": 1.6308842662988104e-05, + "loss": 0.1177, + "step": 24560 + }, + { + "epoch": 1.15, + "learning_rate": 1.6308058877933318e-05, + "loss": 0.1314, + "step": 24565 + }, + { + "epoch": 1.15, + "learning_rate": 1.630727509287853e-05, + "loss": 0.1472, + "step": 24570 + }, + { + "epoch": 1.15, + "learning_rate": 1.6306491307823742e-05, + "loss": 0.3199, + "step": 24575 + }, + { + "epoch": 1.15, + "learning_rate": 1.6305707522768956e-05, + "loss": 0.3317, + "step": 24580 + }, + { + "epoch": 1.15, + "learning_rate": 1.630492373771417e-05, + "loss": 0.3662, + "step": 24585 + }, + { + "epoch": 1.15, + "learning_rate": 1.6304139952659384e-05, + "loss": 0.0684, + "step": 24590 + }, + { + "epoch": 1.15, + "learning_rate": 1.6303356167604598e-05, + "loss": 0.0825, + "step": 24595 + }, + { + "epoch": 1.15, + "learning_rate": 1.6302572382549812e-05, + "loss": 0.0647, + "step": 24600 + }, + { + "epoch": 1.15, + "learning_rate": 1.6301788597495026e-05, + "loss": 0.101, + "step": 24605 + }, + { + "epoch": 1.15, + "learning_rate": 1.6301004812440236e-05, + "loss": 0.1126, + "step": 24610 + }, + { + "epoch": 1.15, + "learning_rate": 1.6300221027385454e-05, + "loss": 0.1619, + "step": 24615 + }, + { + "epoch": 1.15, + "learning_rate": 1.6299437242330664e-05, + "loss": 0.248, + "step": 24620 + }, + { + "epoch": 1.15, + "learning_rate": 1.6298653457275878e-05, + "loss": 0.2743, + "step": 24625 + }, + { + "epoch": 1.15, + "learning_rate": 1.6297869672221092e-05, + "loss": 0.4154, + "step": 24630 + }, + { + "epoch": 1.15, + "learning_rate": 1.6297085887166303e-05, + "loss": 0.3563, + "step": 24635 + }, + { + "epoch": 1.15, + "learning_rate": 1.629630210211152e-05, + "loss": 0.0512, + "step": 24640 + }, + { + "epoch": 1.15, + "learning_rate": 1.629551831705673e-05, + "loss": 0.0548, + "step": 24645 + }, + { + "epoch": 1.15, + "learning_rate": 1.6294734532001944e-05, + "loss": 0.0815, + "step": 24650 + }, + { + "epoch": 1.15, + "learning_rate": 1.6293950746947158e-05, + "loss": 0.1276, + "step": 24655 + }, + { + "epoch": 1.15, + "learning_rate": 1.6293166961892372e-05, + "loss": 0.141, + "step": 24660 + }, + { + "epoch": 1.15, + "learning_rate": 1.6292383176837586e-05, + "loss": 0.2118, + "step": 24665 + }, + { + "epoch": 1.15, + "learning_rate": 1.62915993917828e-05, + "loss": 0.1749, + "step": 24670 + }, + { + "epoch": 1.15, + "learning_rate": 1.629081560672801e-05, + "loss": 0.2312, + "step": 24675 + }, + { + "epoch": 1.15, + "learning_rate": 1.6290031821673228e-05, + "loss": 0.5181, + "step": 24680 + }, + { + "epoch": 1.15, + "learning_rate": 1.6289248036618438e-05, + "loss": 0.2571, + "step": 24685 + }, + { + "epoch": 1.15, + "learning_rate": 1.6288464251563652e-05, + "loss": 0.0409, + "step": 24690 + }, + { + "epoch": 1.15, + "learning_rate": 1.6287680466508866e-05, + "loss": 0.0747, + "step": 24695 + }, + { + "epoch": 1.15, + "learning_rate": 1.628689668145408e-05, + "loss": 0.1181, + "step": 24700 + }, + { + "epoch": 1.15, + "learning_rate": 1.6286112896399294e-05, + "loss": 0.1395, + "step": 24705 + }, + { + "epoch": 1.15, + "learning_rate": 1.6285329111344504e-05, + "loss": 0.1435, + "step": 24710 + }, + { + "epoch": 1.15, + "learning_rate": 1.628454532628972e-05, + "loss": 0.174, + "step": 24715 + }, + { + "epoch": 1.15, + "learning_rate": 1.6283761541234932e-05, + "loss": 0.1197, + "step": 24720 + }, + { + "epoch": 1.15, + "learning_rate": 1.6282977756180146e-05, + "loss": 0.2038, + "step": 24725 + }, + { + "epoch": 1.15, + "learning_rate": 1.628219397112536e-05, + "loss": 0.3092, + "step": 24730 + }, + { + "epoch": 1.15, + "learning_rate": 1.6281410186070574e-05, + "loss": 0.4175, + "step": 24735 + }, + { + "epoch": 1.15, + "learning_rate": 1.6280626401015788e-05, + "loss": 0.0201, + "step": 24740 + }, + { + "epoch": 1.15, + "learning_rate": 1.6279842615961002e-05, + "loss": 0.1052, + "step": 24745 + }, + { + "epoch": 1.15, + "learning_rate": 1.6279058830906212e-05, + "loss": 0.1185, + "step": 24750 + }, + { + "epoch": 1.16, + "learning_rate": 1.6278275045851426e-05, + "loss": 0.1012, + "step": 24755 + }, + { + "epoch": 1.16, + "learning_rate": 1.627749126079664e-05, + "loss": 0.2098, + "step": 24760 + }, + { + "epoch": 1.16, + "learning_rate": 1.6276707475741854e-05, + "loss": 0.1507, + "step": 24765 + }, + { + "epoch": 1.16, + "learning_rate": 1.6275923690687068e-05, + "loss": 0.1518, + "step": 24770 + }, + { + "epoch": 1.16, + "learning_rate": 1.627513990563228e-05, + "loss": 0.2168, + "step": 24775 + }, + { + "epoch": 1.16, + "learning_rate": 1.6274356120577496e-05, + "loss": 0.3516, + "step": 24780 + }, + { + "epoch": 1.16, + "learning_rate": 1.6273572335522706e-05, + "loss": 0.3077, + "step": 24785 + }, + { + "epoch": 1.16, + "learning_rate": 1.627278855046792e-05, + "loss": 0.0342, + "step": 24790 + }, + { + "epoch": 1.16, + "learning_rate": 1.6272004765413134e-05, + "loss": 0.083, + "step": 24795 + }, + { + "epoch": 1.16, + "learning_rate": 1.6271220980358348e-05, + "loss": 0.0801, + "step": 24800 + }, + { + "epoch": 1.16, + "learning_rate": 1.6270437195303562e-05, + "loss": 0.1694, + "step": 24805 + }, + { + "epoch": 1.16, + "learning_rate": 1.6269653410248776e-05, + "loss": 0.1143, + "step": 24810 + }, + { + "epoch": 1.16, + "learning_rate": 1.626886962519399e-05, + "loss": 0.0924, + "step": 24815 + }, + { + "epoch": 1.16, + "learning_rate": 1.62680858401392e-05, + "loss": 0.237, + "step": 24820 + }, + { + "epoch": 1.16, + "learning_rate": 1.6267302055084414e-05, + "loss": 0.1925, + "step": 24825 + }, + { + "epoch": 1.16, + "learning_rate": 1.6266518270029628e-05, + "loss": 0.3285, + "step": 24830 + }, + { + "epoch": 1.16, + "learning_rate": 1.6265734484974842e-05, + "loss": 0.4066, + "step": 24835 + }, + { + "epoch": 1.16, + "learning_rate": 1.6264950699920056e-05, + "loss": 0.0913, + "step": 24840 + }, + { + "epoch": 1.16, + "learning_rate": 1.626416691486527e-05, + "loss": 0.09, + "step": 24845 + }, + { + "epoch": 1.16, + "learning_rate": 1.626338312981048e-05, + "loss": 0.1174, + "step": 24850 + }, + { + "epoch": 1.16, + "learning_rate": 1.6262599344755697e-05, + "loss": 0.0969, + "step": 24855 + }, + { + "epoch": 1.16, + "learning_rate": 1.6261815559700908e-05, + "loss": 0.1173, + "step": 24860 + }, + { + "epoch": 1.16, + "learning_rate": 1.6261031774646122e-05, + "loss": 0.222, + "step": 24865 + }, + { + "epoch": 1.16, + "learning_rate": 1.6260247989591336e-05, + "loss": 0.2767, + "step": 24870 + }, + { + "epoch": 1.16, + "learning_rate": 1.625946420453655e-05, + "loss": 0.1782, + "step": 24875 + }, + { + "epoch": 1.16, + "learning_rate": 1.6258680419481764e-05, + "loss": 0.2277, + "step": 24880 + }, + { + "epoch": 1.16, + "learning_rate": 1.6257896634426974e-05, + "loss": 0.2343, + "step": 24885 + }, + { + "epoch": 1.16, + "learning_rate": 1.6257112849372188e-05, + "loss": 0.3339, + "step": 24890 + }, + { + "epoch": 1.16, + "learning_rate": 1.6256329064317402e-05, + "loss": 0.0648, + "step": 24895 + }, + { + "epoch": 1.16, + "learning_rate": 1.6255545279262616e-05, + "loss": 0.1105, + "step": 24900 + }, + { + "epoch": 1.16, + "learning_rate": 1.625476149420783e-05, + "loss": 0.1184, + "step": 24905 + }, + { + "epoch": 1.16, + "learning_rate": 1.6253977709153044e-05, + "loss": 0.166, + "step": 24910 + }, + { + "epoch": 1.16, + "learning_rate": 1.6253193924098258e-05, + "loss": 0.098, + "step": 24915 + }, + { + "epoch": 1.16, + "learning_rate": 1.625241013904347e-05, + "loss": 0.2253, + "step": 24920 + }, + { + "epoch": 1.16, + "learning_rate": 1.6251626353988682e-05, + "loss": 0.2517, + "step": 24925 + }, + { + "epoch": 1.16, + "learning_rate": 1.62508425689339e-05, + "loss": 0.287, + "step": 24930 + }, + { + "epoch": 1.16, + "learning_rate": 1.625005878387911e-05, + "loss": 0.3426, + "step": 24935 + }, + { + "epoch": 1.16, + "learning_rate": 1.6249274998824324e-05, + "loss": 0.0747, + "step": 24940 + }, + { + "epoch": 1.16, + "learning_rate": 1.6248491213769538e-05, + "loss": 0.0584, + "step": 24945 + }, + { + "epoch": 1.16, + "learning_rate": 1.6247707428714748e-05, + "loss": 0.1144, + "step": 24950 + }, + { + "epoch": 1.16, + "learning_rate": 1.6246923643659965e-05, + "loss": 0.1062, + "step": 24955 + }, + { + "epoch": 1.16, + "learning_rate": 1.6246139858605176e-05, + "loss": 0.1777, + "step": 24960 + }, + { + "epoch": 1.16, + "learning_rate": 1.624535607355039e-05, + "loss": 0.1773, + "step": 24965 + }, + { + "epoch": 1.17, + "learning_rate": 1.6244572288495604e-05, + "loss": 0.2313, + "step": 24970 + }, + { + "epoch": 1.17, + "learning_rate": 1.6243788503440818e-05, + "loss": 0.2795, + "step": 24975 + }, + { + "epoch": 1.17, + "learning_rate": 1.624300471838603e-05, + "loss": 0.3885, + "step": 24980 + }, + { + "epoch": 1.17, + "learning_rate": 1.6242220933331245e-05, + "loss": 0.3908, + "step": 24985 + }, + { + "epoch": 1.17, + "learning_rate": 1.6241437148276456e-05, + "loss": 0.0605, + "step": 24990 + }, + { + "epoch": 1.17, + "learning_rate": 1.6240653363221673e-05, + "loss": 0.085, + "step": 24995 + }, + { + "epoch": 1.17, + "learning_rate": 1.6239869578166884e-05, + "loss": 0.1085, + "step": 25000 + }, + { + "epoch": 1.17, + "learning_rate": 1.6239085793112098e-05, + "loss": 0.0463, + "step": 25005 + }, + { + "epoch": 1.17, + "learning_rate": 1.623830200805731e-05, + "loss": 0.1531, + "step": 25010 + }, + { + "epoch": 1.17, + "learning_rate": 1.6237518223002526e-05, + "loss": 0.2199, + "step": 25015 + }, + { + "epoch": 1.17, + "learning_rate": 1.623673443794774e-05, + "loss": 0.2393, + "step": 25020 + }, + { + "epoch": 1.17, + "learning_rate": 1.623595065289295e-05, + "loss": 0.3733, + "step": 25025 + }, + { + "epoch": 1.17, + "learning_rate": 1.6235166867838167e-05, + "loss": 0.3225, + "step": 25030 + }, + { + "epoch": 1.17, + "learning_rate": 1.6234383082783378e-05, + "loss": 0.2117, + "step": 25035 + }, + { + "epoch": 1.17, + "learning_rate": 1.623359929772859e-05, + "loss": 0.0419, + "step": 25040 + }, + { + "epoch": 1.17, + "learning_rate": 1.6232815512673806e-05, + "loss": 0.1021, + "step": 25045 + }, + { + "epoch": 1.17, + "learning_rate": 1.623203172761902e-05, + "loss": 0.1235, + "step": 25050 + }, + { + "epoch": 1.17, + "learning_rate": 1.6231247942564233e-05, + "loss": 0.1249, + "step": 25055 + }, + { + "epoch": 1.17, + "learning_rate": 1.6230464157509447e-05, + "loss": 0.192, + "step": 25060 + }, + { + "epoch": 1.17, + "learning_rate": 1.6229680372454658e-05, + "loss": 0.1072, + "step": 25065 + }, + { + "epoch": 1.17, + "learning_rate": 1.6228896587399875e-05, + "loss": 0.1035, + "step": 25070 + }, + { + "epoch": 1.17, + "learning_rate": 1.6228112802345086e-05, + "loss": 0.2934, + "step": 25075 + }, + { + "epoch": 1.17, + "learning_rate": 1.62273290172903e-05, + "loss": 0.4639, + "step": 25080 + }, + { + "epoch": 1.17, + "learning_rate": 1.6226545232235513e-05, + "loss": 0.235, + "step": 25085 + }, + { + "epoch": 1.17, + "learning_rate": 1.6225761447180727e-05, + "loss": 0.0493, + "step": 25090 + }, + { + "epoch": 1.17, + "learning_rate": 1.622497766212594e-05, + "loss": 0.0572, + "step": 25095 + }, + { + "epoch": 1.17, + "learning_rate": 1.6224193877071152e-05, + "loss": 0.0987, + "step": 25100 + }, + { + "epoch": 1.17, + "learning_rate": 1.6223410092016366e-05, + "loss": 0.1056, + "step": 25105 + }, + { + "epoch": 1.17, + "learning_rate": 1.622262630696158e-05, + "loss": 0.1613, + "step": 25110 + }, + { + "epoch": 1.17, + "learning_rate": 1.6221842521906793e-05, + "loss": 0.1629, + "step": 25115 + }, + { + "epoch": 1.17, + "learning_rate": 1.6221058736852007e-05, + "loss": 0.2078, + "step": 25120 + }, + { + "epoch": 1.17, + "learning_rate": 1.622027495179722e-05, + "loss": 0.1947, + "step": 25125 + }, + { + "epoch": 1.17, + "learning_rate": 1.6219491166742435e-05, + "loss": 0.3467, + "step": 25130 + }, + { + "epoch": 1.17, + "learning_rate": 1.621870738168765e-05, + "loss": 0.2284, + "step": 25135 + }, + { + "epoch": 1.17, + "learning_rate": 1.621792359663286e-05, + "loss": 0.0624, + "step": 25140 + }, + { + "epoch": 1.17, + "learning_rate": 1.6217139811578074e-05, + "loss": 0.0692, + "step": 25145 + }, + { + "epoch": 1.17, + "learning_rate": 1.6216356026523287e-05, + "loss": 0.1222, + "step": 25150 + }, + { + "epoch": 1.17, + "learning_rate": 1.62155722414685e-05, + "loss": 0.1692, + "step": 25155 + }, + { + "epoch": 1.17, + "learning_rate": 1.6214788456413715e-05, + "loss": 0.1629, + "step": 25160 + }, + { + "epoch": 1.17, + "learning_rate": 1.6214004671358926e-05, + "loss": 0.1671, + "step": 25165 + }, + { + "epoch": 1.17, + "learning_rate": 1.6213220886304143e-05, + "loss": 0.2507, + "step": 25170 + }, + { + "epoch": 1.17, + "learning_rate": 1.6212437101249354e-05, + "loss": 0.2868, + "step": 25175 + }, + { + "epoch": 1.17, + "learning_rate": 1.6211653316194567e-05, + "loss": 0.3525, + "step": 25180 + }, + { + "epoch": 1.18, + "learning_rate": 1.621086953113978e-05, + "loss": 0.3272, + "step": 25185 + }, + { + "epoch": 1.18, + "learning_rate": 1.6210085746084995e-05, + "loss": 0.0452, + "step": 25190 + }, + { + "epoch": 1.18, + "learning_rate": 1.620930196103021e-05, + "loss": 0.0713, + "step": 25195 + }, + { + "epoch": 1.18, + "learning_rate": 1.6208518175975423e-05, + "loss": 0.0912, + "step": 25200 + }, + { + "epoch": 1.18, + "learning_rate": 1.6207734390920634e-05, + "loss": 0.1543, + "step": 25205 + }, + { + "epoch": 1.18, + "learning_rate": 1.6206950605865848e-05, + "loss": 0.1041, + "step": 25210 + }, + { + "epoch": 1.18, + "learning_rate": 1.620616682081106e-05, + "loss": 0.141, + "step": 25215 + }, + { + "epoch": 1.18, + "learning_rate": 1.6205383035756275e-05, + "loss": 0.1966, + "step": 25220 + }, + { + "epoch": 1.18, + "learning_rate": 1.620459925070149e-05, + "loss": 0.3014, + "step": 25225 + }, + { + "epoch": 1.18, + "learning_rate": 1.6203815465646703e-05, + "loss": 0.4792, + "step": 25230 + }, + { + "epoch": 1.18, + "learning_rate": 1.6203031680591917e-05, + "loss": 0.2327, + "step": 25235 + }, + { + "epoch": 1.18, + "learning_rate": 1.6202247895537128e-05, + "loss": 0.0362, + "step": 25240 + }, + { + "epoch": 1.18, + "learning_rate": 1.6201464110482345e-05, + "loss": 0.1217, + "step": 25245 + }, + { + "epoch": 1.18, + "learning_rate": 1.6200680325427555e-05, + "loss": 0.0556, + "step": 25250 + }, + { + "epoch": 1.18, + "learning_rate": 1.619989654037277e-05, + "loss": 0.1454, + "step": 25255 + }, + { + "epoch": 1.18, + "learning_rate": 1.6199112755317983e-05, + "loss": 0.1041, + "step": 25260 + }, + { + "epoch": 1.18, + "learning_rate": 1.6198328970263197e-05, + "loss": 0.1451, + "step": 25265 + }, + { + "epoch": 1.18, + "learning_rate": 1.619754518520841e-05, + "loss": 0.2237, + "step": 25270 + }, + { + "epoch": 1.18, + "learning_rate": 1.619691815716458e-05, + "loss": 0.2745, + "step": 25275 + }, + { + "epoch": 1.18, + "learning_rate": 1.6196134372109792e-05, + "loss": 0.4215, + "step": 25280 + }, + { + "epoch": 1.18, + "learning_rate": 1.619535058705501e-05, + "loss": 0.481, + "step": 25285 + }, + { + "epoch": 1.18, + "learning_rate": 1.619456680200022e-05, + "loss": 0.0337, + "step": 25290 + }, + { + "epoch": 1.18, + "learning_rate": 1.6193783016945434e-05, + "loss": 0.102, + "step": 25295 + }, + { + "epoch": 1.18, + "learning_rate": 1.6192999231890648e-05, + "loss": 0.0853, + "step": 25300 + }, + { + "epoch": 1.18, + "learning_rate": 1.619221544683586e-05, + "loss": 0.1298, + "step": 25305 + }, + { + "epoch": 1.18, + "learning_rate": 1.6191431661781075e-05, + "loss": 0.151, + "step": 25310 + }, + { + "epoch": 1.18, + "learning_rate": 1.619064787672629e-05, + "loss": 0.1743, + "step": 25315 + }, + { + "epoch": 1.18, + "learning_rate": 1.61898640916715e-05, + "loss": 0.247, + "step": 25320 + }, + { + "epoch": 1.18, + "learning_rate": 1.6189080306616717e-05, + "loss": 0.2403, + "step": 25325 + }, + { + "epoch": 1.18, + "learning_rate": 1.6188296521561928e-05, + "loss": 0.4411, + "step": 25330 + }, + { + "epoch": 1.18, + "learning_rate": 1.618751273650714e-05, + "loss": 0.2575, + "step": 25335 + }, + { + "epoch": 1.18, + "learning_rate": 1.6186728951452355e-05, + "loss": 0.0465, + "step": 25340 + }, + { + "epoch": 1.18, + "learning_rate": 1.6185945166397566e-05, + "loss": 0.0688, + "step": 25345 + }, + { + "epoch": 1.18, + "learning_rate": 1.6185161381342783e-05, + "loss": 0.1129, + "step": 25350 + }, + { + "epoch": 1.18, + "learning_rate": 1.6184377596287994e-05, + "loss": 0.1276, + "step": 25355 + }, + { + "epoch": 1.18, + "learning_rate": 1.6183593811233208e-05, + "loss": 0.0864, + "step": 25360 + }, + { + "epoch": 1.18, + "learning_rate": 1.618281002617842e-05, + "loss": 0.1639, + "step": 25365 + }, + { + "epoch": 1.18, + "learning_rate": 1.6182026241123636e-05, + "loss": 0.1556, + "step": 25370 + }, + { + "epoch": 1.18, + "learning_rate": 1.618124245606885e-05, + "loss": 0.3036, + "step": 25375 + }, + { + "epoch": 1.18, + "learning_rate": 1.6180458671014063e-05, + "loss": 0.3818, + "step": 25380 + }, + { + "epoch": 1.18, + "learning_rate": 1.6179674885959277e-05, + "loss": 0.2293, + "step": 25385 + }, + { + "epoch": 1.18, + "learning_rate": 1.617889110090449e-05, + "loss": 0.0748, + "step": 25390 + }, + { + "epoch": 1.18, + "learning_rate": 1.61781073158497e-05, + "loss": 0.1107, + "step": 25395 + }, + { + "epoch": 1.19, + "learning_rate": 1.6177323530794916e-05, + "loss": 0.166, + "step": 25400 + }, + { + "epoch": 1.19, + "learning_rate": 1.617653974574013e-05, + "loss": 0.0938, + "step": 25405 + }, + { + "epoch": 1.19, + "learning_rate": 1.6175755960685343e-05, + "loss": 0.1765, + "step": 25410 + }, + { + "epoch": 1.19, + "learning_rate": 1.6174972175630557e-05, + "loss": 0.1421, + "step": 25415 + }, + { + "epoch": 1.19, + "learning_rate": 1.6174188390575768e-05, + "loss": 0.169, + "step": 25420 + }, + { + "epoch": 1.19, + "learning_rate": 1.6173404605520985e-05, + "loss": 0.2901, + "step": 25425 + }, + { + "epoch": 1.19, + "learning_rate": 1.6172620820466196e-05, + "loss": 0.4629, + "step": 25430 + }, + { + "epoch": 1.19, + "learning_rate": 1.617183703541141e-05, + "loss": 0.4004, + "step": 25435 + }, + { + "epoch": 1.19, + "learning_rate": 1.6171053250356623e-05, + "loss": 0.0532, + "step": 25440 + }, + { + "epoch": 1.19, + "learning_rate": 1.6170269465301837e-05, + "loss": 0.0672, + "step": 25445 + }, + { + "epoch": 1.19, + "learning_rate": 1.616948568024705e-05, + "loss": 0.1277, + "step": 25450 + }, + { + "epoch": 1.19, + "learning_rate": 1.6168701895192265e-05, + "loss": 0.0931, + "step": 25455 + }, + { + "epoch": 1.19, + "learning_rate": 1.6167918110137476e-05, + "loss": 0.1628, + "step": 25460 + }, + { + "epoch": 1.19, + "learning_rate": 1.616713432508269e-05, + "loss": 0.1642, + "step": 25465 + }, + { + "epoch": 1.19, + "learning_rate": 1.6166350540027903e-05, + "loss": 0.1649, + "step": 25470 + }, + { + "epoch": 1.19, + "learning_rate": 1.6165566754973117e-05, + "loss": 0.2531, + "step": 25475 + }, + { + "epoch": 1.19, + "learning_rate": 1.616478296991833e-05, + "loss": 0.4141, + "step": 25480 + }, + { + "epoch": 1.19, + "learning_rate": 1.6163999184863545e-05, + "loss": 0.3355, + "step": 25485 + }, + { + "epoch": 1.19, + "learning_rate": 1.616321539980876e-05, + "loss": 0.0631, + "step": 25490 + }, + { + "epoch": 1.19, + "learning_rate": 1.616243161475397e-05, + "loss": 0.0915, + "step": 25495 + }, + { + "epoch": 1.19, + "learning_rate": 1.6161647829699187e-05, + "loss": 0.1369, + "step": 25500 + }, + { + "epoch": 1.19, + "learning_rate": 1.6160864044644397e-05, + "loss": 0.1394, + "step": 25505 + }, + { + "epoch": 1.19, + "learning_rate": 1.616008025958961e-05, + "loss": 0.096, + "step": 25510 + }, + { + "epoch": 1.19, + "learning_rate": 1.6159296474534825e-05, + "loss": 0.1094, + "step": 25515 + }, + { + "epoch": 1.19, + "learning_rate": 1.615851268948004e-05, + "loss": 0.1217, + "step": 25520 + }, + { + "epoch": 1.19, + "learning_rate": 1.6157728904425253e-05, + "loss": 0.2451, + "step": 25525 + }, + { + "epoch": 1.19, + "learning_rate": 1.6156945119370464e-05, + "loss": 0.2088, + "step": 25530 + }, + { + "epoch": 1.19, + "learning_rate": 1.6156161334315677e-05, + "loss": 0.2696, + "step": 25535 + }, + { + "epoch": 1.19, + "learning_rate": 1.615537754926089e-05, + "loss": 0.0968, + "step": 25540 + }, + { + "epoch": 1.19, + "learning_rate": 1.6154593764206105e-05, + "loss": 0.0515, + "step": 25545 + }, + { + "epoch": 1.19, + "learning_rate": 1.615380997915132e-05, + "loss": 0.0912, + "step": 25550 + }, + { + "epoch": 1.19, + "learning_rate": 1.6153026194096533e-05, + "loss": 0.1158, + "step": 25555 + }, + { + "epoch": 1.19, + "learning_rate": 1.6152242409041744e-05, + "loss": 0.1445, + "step": 25560 + }, + { + "epoch": 1.19, + "learning_rate": 1.615145862398696e-05, + "loss": 0.1769, + "step": 25565 + }, + { + "epoch": 1.19, + "learning_rate": 1.615067483893217e-05, + "loss": 0.1704, + "step": 25570 + }, + { + "epoch": 1.19, + "learning_rate": 1.6149891053877385e-05, + "loss": 0.2812, + "step": 25575 + }, + { + "epoch": 1.19, + "learning_rate": 1.61491072688226e-05, + "loss": 0.3944, + "step": 25580 + }, + { + "epoch": 1.19, + "learning_rate": 1.6148323483767813e-05, + "loss": 0.2521, + "step": 25585 + }, + { + "epoch": 1.19, + "learning_rate": 1.6147539698713027e-05, + "loss": 0.0945, + "step": 25590 + }, + { + "epoch": 1.19, + "learning_rate": 1.6146755913658238e-05, + "loss": 0.1083, + "step": 25595 + }, + { + "epoch": 1.19, + "learning_rate": 1.6145972128603455e-05, + "loss": 0.0934, + "step": 25600 + }, + { + "epoch": 1.19, + "learning_rate": 1.6145188343548665e-05, + "loss": 0.1016, + "step": 25605 + }, + { + "epoch": 1.19, + "learning_rate": 1.614440455849388e-05, + "loss": 0.1465, + "step": 25610 + }, + { + "epoch": 1.2, + "learning_rate": 1.6143620773439093e-05, + "loss": 0.1531, + "step": 25615 + }, + { + "epoch": 1.2, + "learning_rate": 1.6142836988384307e-05, + "loss": 0.1764, + "step": 25620 + }, + { + "epoch": 1.2, + "learning_rate": 1.614205320332952e-05, + "loss": 0.2381, + "step": 25625 + }, + { + "epoch": 1.2, + "learning_rate": 1.6141269418274735e-05, + "loss": 0.4556, + "step": 25630 + }, + { + "epoch": 1.2, + "learning_rate": 1.6140485633219945e-05, + "loss": 0.3126, + "step": 25635 + }, + { + "epoch": 1.2, + "learning_rate": 1.6139701848165163e-05, + "loss": 0.0332, + "step": 25640 + }, + { + "epoch": 1.2, + "learning_rate": 1.6138918063110373e-05, + "loss": 0.0406, + "step": 25645 + }, + { + "epoch": 1.2, + "learning_rate": 1.6138134278055587e-05, + "loss": 0.1147, + "step": 25650 + }, + { + "epoch": 1.2, + "learning_rate": 1.61373504930008e-05, + "loss": 0.1279, + "step": 25655 + }, + { + "epoch": 1.2, + "learning_rate": 1.613656670794601e-05, + "loss": 0.1488, + "step": 25660 + }, + { + "epoch": 1.2, + "learning_rate": 1.613578292289123e-05, + "loss": 0.0947, + "step": 25665 + }, + { + "epoch": 1.2, + "learning_rate": 1.613499913783644e-05, + "loss": 0.147, + "step": 25670 + }, + { + "epoch": 1.2, + "learning_rate": 1.6134215352781653e-05, + "loss": 0.2196, + "step": 25675 + }, + { + "epoch": 1.2, + "learning_rate": 1.6133431567726867e-05, + "loss": 0.3325, + "step": 25680 + }, + { + "epoch": 1.2, + "learning_rate": 1.613264778267208e-05, + "loss": 0.3548, + "step": 25685 + }, + { + "epoch": 1.2, + "learning_rate": 1.6131863997617295e-05, + "loss": 0.0497, + "step": 25690 + }, + { + "epoch": 1.2, + "learning_rate": 1.613108021256251e-05, + "loss": 0.0752, + "step": 25695 + }, + { + "epoch": 1.2, + "learning_rate": 1.6130296427507723e-05, + "loss": 0.0716, + "step": 25700 + }, + { + "epoch": 1.2, + "learning_rate": 1.6129512642452937e-05, + "loss": 0.1445, + "step": 25705 + }, + { + "epoch": 1.2, + "learning_rate": 1.6128728857398147e-05, + "loss": 0.1371, + "step": 25710 + }, + { + "epoch": 1.2, + "learning_rate": 1.6128101829354318e-05, + "loss": 0.1388, + "step": 25715 + }, + { + "epoch": 1.2, + "learning_rate": 1.6127318044299535e-05, + "loss": 0.1818, + "step": 25720 + }, + { + "epoch": 1.2, + "learning_rate": 1.6126534259244745e-05, + "loss": 0.1747, + "step": 25725 + }, + { + "epoch": 1.2, + "learning_rate": 1.612575047418996e-05, + "loss": 0.2925, + "step": 25730 + }, + { + "epoch": 1.2, + "learning_rate": 1.6124966689135173e-05, + "loss": 0.212, + "step": 25735 + }, + { + "epoch": 1.2, + "learning_rate": 1.6124182904080384e-05, + "loss": 0.07, + "step": 25740 + }, + { + "epoch": 1.2, + "learning_rate": 1.61233991190256e-05, + "loss": 0.0871, + "step": 25745 + }, + { + "epoch": 1.2, + "learning_rate": 1.612261533397081e-05, + "loss": 0.1572, + "step": 25750 + }, + { + "epoch": 1.2, + "learning_rate": 1.6121831548916026e-05, + "loss": 0.1053, + "step": 25755 + }, + { + "epoch": 1.2, + "learning_rate": 1.612104776386124e-05, + "loss": 0.1303, + "step": 25760 + }, + { + "epoch": 1.2, + "learning_rate": 1.6120263978806453e-05, + "loss": 0.2018, + "step": 25765 + }, + { + "epoch": 1.2, + "learning_rate": 1.6119480193751667e-05, + "loss": 0.2664, + "step": 25770 + }, + { + "epoch": 1.2, + "learning_rate": 1.611869640869688e-05, + "loss": 0.2745, + "step": 25775 + }, + { + "epoch": 1.2, + "learning_rate": 1.6117912623642095e-05, + "loss": 0.4014, + "step": 25780 + }, + { + "epoch": 1.2, + "learning_rate": 1.611712883858731e-05, + "loss": 0.2742, + "step": 25785 + }, + { + "epoch": 1.2, + "learning_rate": 1.611634505353252e-05, + "loss": 0.094, + "step": 25790 + }, + { + "epoch": 1.2, + "learning_rate": 1.6115561268477733e-05, + "loss": 0.1958, + "step": 25795 + }, + { + "epoch": 1.2, + "learning_rate": 1.6114777483422947e-05, + "loss": 0.0788, + "step": 25800 + }, + { + "epoch": 1.2, + "learning_rate": 1.611399369836816e-05, + "loss": 0.106, + "step": 25805 + }, + { + "epoch": 1.2, + "learning_rate": 1.6113209913313375e-05, + "loss": 0.1569, + "step": 25810 + }, + { + "epoch": 1.2, + "learning_rate": 1.6112426128258586e-05, + "loss": 0.126, + "step": 25815 + }, + { + "epoch": 1.2, + "learning_rate": 1.6111642343203803e-05, + "loss": 0.1446, + "step": 25820 + }, + { + "epoch": 1.21, + "learning_rate": 1.6110858558149013e-05, + "loss": 0.3583, + "step": 25825 + }, + { + "epoch": 1.21, + "learning_rate": 1.6110074773094227e-05, + "loss": 0.3009, + "step": 25830 + }, + { + "epoch": 1.21, + "learning_rate": 1.610929098803944e-05, + "loss": 0.3158, + "step": 25835 + }, + { + "epoch": 1.21, + "learning_rate": 1.6108507202984655e-05, + "loss": 0.1105, + "step": 25840 + }, + { + "epoch": 1.21, + "learning_rate": 1.610772341792987e-05, + "loss": 0.0567, + "step": 25845 + }, + { + "epoch": 1.21, + "learning_rate": 1.6106939632875083e-05, + "loss": 0.0254, + "step": 25850 + }, + { + "epoch": 1.21, + "learning_rate": 1.6106155847820293e-05, + "loss": 0.1127, + "step": 25855 + }, + { + "epoch": 1.21, + "learning_rate": 1.6105372062765507e-05, + "loss": 0.165, + "step": 25860 + }, + { + "epoch": 1.21, + "learning_rate": 1.610458827771072e-05, + "loss": 0.1691, + "step": 25865 + }, + { + "epoch": 1.21, + "learning_rate": 1.6103804492655935e-05, + "loss": 0.1748, + "step": 25870 + }, + { + "epoch": 1.21, + "learning_rate": 1.610302070760115e-05, + "loss": 0.3087, + "step": 25875 + }, + { + "epoch": 1.21, + "learning_rate": 1.6102236922546363e-05, + "loss": 0.3935, + "step": 25880 + }, + { + "epoch": 1.21, + "learning_rate": 1.6101453137491577e-05, + "loss": 0.3608, + "step": 25885 + }, + { + "epoch": 1.21, + "learning_rate": 1.6100669352436787e-05, + "loss": 0.0722, + "step": 25890 + }, + { + "epoch": 1.21, + "learning_rate": 1.6099885567382005e-05, + "loss": 0.1239, + "step": 25895 + }, + { + "epoch": 1.21, + "learning_rate": 1.6099101782327215e-05, + "loss": 0.0837, + "step": 25900 + }, + { + "epoch": 1.21, + "learning_rate": 1.609831799727243e-05, + "loss": 0.1534, + "step": 25905 + }, + { + "epoch": 1.21, + "learning_rate": 1.6097534212217643e-05, + "loss": 0.1598, + "step": 25910 + }, + { + "epoch": 1.21, + "learning_rate": 1.6096750427162857e-05, + "loss": 0.1712, + "step": 25915 + }, + { + "epoch": 1.21, + "learning_rate": 1.609596664210807e-05, + "loss": 0.1853, + "step": 25920 + }, + { + "epoch": 1.21, + "learning_rate": 1.609518285705328e-05, + "loss": 0.2454, + "step": 25925 + }, + { + "epoch": 1.21, + "learning_rate": 1.6094399071998495e-05, + "loss": 0.303, + "step": 25930 + }, + { + "epoch": 1.21, + "learning_rate": 1.609361528694371e-05, + "loss": 0.3649, + "step": 25935 + }, + { + "epoch": 1.21, + "learning_rate": 1.6092831501888923e-05, + "loss": 0.0425, + "step": 25940 + }, + { + "epoch": 1.21, + "learning_rate": 1.6092047716834137e-05, + "loss": 0.1058, + "step": 25945 + }, + { + "epoch": 1.21, + "learning_rate": 1.609126393177935e-05, + "loss": 0.0823, + "step": 25950 + }, + { + "epoch": 1.21, + "learning_rate": 1.609048014672456e-05, + "loss": 0.0905, + "step": 25955 + }, + { + "epoch": 1.21, + "learning_rate": 1.608969636166978e-05, + "loss": 0.1479, + "step": 25960 + }, + { + "epoch": 1.21, + "learning_rate": 1.608891257661499e-05, + "loss": 0.1194, + "step": 25965 + }, + { + "epoch": 1.21, + "learning_rate": 1.6088128791560203e-05, + "loss": 0.2551, + "step": 25970 + }, + { + "epoch": 1.21, + "learning_rate": 1.6087345006505417e-05, + "loss": 0.2567, + "step": 25975 + }, + { + "epoch": 1.21, + "learning_rate": 1.608656122145063e-05, + "loss": 0.4998, + "step": 25980 + }, + { + "epoch": 1.21, + "learning_rate": 1.6085777436395845e-05, + "loss": 0.3416, + "step": 25985 + }, + { + "epoch": 1.21, + "learning_rate": 1.6084993651341055e-05, + "loss": 0.0866, + "step": 25990 + }, + { + "epoch": 1.21, + "learning_rate": 1.6084209866286273e-05, + "loss": 0.1128, + "step": 25995 + }, + { + "epoch": 1.21, + "learning_rate": 1.6083426081231483e-05, + "loss": 0.1394, + "step": 26000 + }, + { + "epoch": 1.21, + "learning_rate": 1.6082642296176697e-05, + "loss": 0.163, + "step": 26005 + }, + { + "epoch": 1.21, + "learning_rate": 1.608185851112191e-05, + "loss": 0.1127, + "step": 26010 + }, + { + "epoch": 1.21, + "learning_rate": 1.6081074726067125e-05, + "loss": 0.1923, + "step": 26015 + }, + { + "epoch": 1.21, + "learning_rate": 1.608029094101234e-05, + "loss": 0.1738, + "step": 26020 + }, + { + "epoch": 1.21, + "learning_rate": 1.6079507155957553e-05, + "loss": 0.2733, + "step": 26025 + }, + { + "epoch": 1.21, + "learning_rate": 1.6078723370902763e-05, + "loss": 0.3812, + "step": 26030 + }, + { + "epoch": 1.21, + "learning_rate": 1.607793958584798e-05, + "loss": 0.2911, + "step": 26035 + }, + { + "epoch": 1.22, + "learning_rate": 1.607715580079319e-05, + "loss": 0.0426, + "step": 26040 + }, + { + "epoch": 1.22, + "learning_rate": 1.6076372015738405e-05, + "loss": 0.1156, + "step": 26045 + }, + { + "epoch": 1.22, + "learning_rate": 1.607558823068362e-05, + "loss": 0.1092, + "step": 26050 + }, + { + "epoch": 1.22, + "learning_rate": 1.607480444562883e-05, + "loss": 0.1071, + "step": 26055 + }, + { + "epoch": 1.22, + "learning_rate": 1.6074020660574047e-05, + "loss": 0.1825, + "step": 26060 + }, + { + "epoch": 1.22, + "learning_rate": 1.6073236875519257e-05, + "loss": 0.2019, + "step": 26065 + }, + { + "epoch": 1.22, + "learning_rate": 1.607245309046447e-05, + "loss": 0.2312, + "step": 26070 + }, + { + "epoch": 1.22, + "learning_rate": 1.6071669305409685e-05, + "loss": 0.1766, + "step": 26075 + }, + { + "epoch": 1.22, + "learning_rate": 1.60708855203549e-05, + "loss": 0.3091, + "step": 26080 + }, + { + "epoch": 1.22, + "learning_rate": 1.6070101735300113e-05, + "loss": 0.35, + "step": 26085 + }, + { + "epoch": 1.22, + "learning_rate": 1.6069317950245327e-05, + "loss": 0.0862, + "step": 26090 + }, + { + "epoch": 1.22, + "learning_rate": 1.606853416519054e-05, + "loss": 0.0783, + "step": 26095 + }, + { + "epoch": 1.22, + "learning_rate": 1.6067750380135755e-05, + "loss": 0.0974, + "step": 26100 + }, + { + "epoch": 1.22, + "learning_rate": 1.6066966595080965e-05, + "loss": 0.0428, + "step": 26105 + }, + { + "epoch": 1.22, + "learning_rate": 1.6066182810026182e-05, + "loss": 0.1272, + "step": 26110 + }, + { + "epoch": 1.22, + "learning_rate": 1.6065399024971393e-05, + "loss": 0.1728, + "step": 26115 + }, + { + "epoch": 1.22, + "learning_rate": 1.6064615239916607e-05, + "loss": 0.2236, + "step": 26120 + }, + { + "epoch": 1.22, + "learning_rate": 1.606383145486182e-05, + "loss": 0.3677, + "step": 26125 + }, + { + "epoch": 1.22, + "learning_rate": 1.606304766980703e-05, + "loss": 0.4215, + "step": 26130 + }, + { + "epoch": 1.22, + "learning_rate": 1.606226388475225e-05, + "loss": 0.3085, + "step": 26135 + }, + { + "epoch": 1.22, + "learning_rate": 1.606148009969746e-05, + "loss": 0.0464, + "step": 26140 + }, + { + "epoch": 1.22, + "learning_rate": 1.6060696314642673e-05, + "loss": 0.0697, + "step": 26145 + }, + { + "epoch": 1.22, + "learning_rate": 1.6059912529587887e-05, + "loss": 0.1091, + "step": 26150 + }, + { + "epoch": 1.22, + "learning_rate": 1.60591287445331e-05, + "loss": 0.1073, + "step": 26155 + }, + { + "epoch": 1.22, + "learning_rate": 1.6058344959478315e-05, + "loss": 0.1585, + "step": 26160 + }, + { + "epoch": 1.22, + "learning_rate": 1.605756117442353e-05, + "loss": 0.1376, + "step": 26165 + }, + { + "epoch": 1.22, + "learning_rate": 1.605677738936874e-05, + "loss": 0.2462, + "step": 26170 + }, + { + "epoch": 1.22, + "learning_rate": 1.6055993604313956e-05, + "loss": 0.1889, + "step": 26175 + }, + { + "epoch": 1.22, + "learning_rate": 1.6055209819259167e-05, + "loss": 0.3838, + "step": 26180 + }, + { + "epoch": 1.22, + "learning_rate": 1.605442603420438e-05, + "loss": 0.3044, + "step": 26185 + }, + { + "epoch": 1.22, + "learning_rate": 1.6053642249149595e-05, + "loss": 0.0562, + "step": 26190 + }, + { + "epoch": 1.22, + "learning_rate": 1.605285846409481e-05, + "loss": 0.0978, + "step": 26195 + }, + { + "epoch": 1.22, + "learning_rate": 1.6052074679040022e-05, + "loss": 0.1215, + "step": 26200 + }, + { + "epoch": 1.22, + "learning_rate": 1.6051290893985233e-05, + "loss": 0.1945, + "step": 26205 + }, + { + "epoch": 1.22, + "learning_rate": 1.605050710893045e-05, + "loss": 0.1485, + "step": 26210 + }, + { + "epoch": 1.22, + "learning_rate": 1.604972332387566e-05, + "loss": 0.0891, + "step": 26215 + }, + { + "epoch": 1.22, + "learning_rate": 1.6048939538820875e-05, + "loss": 0.1399, + "step": 26220 + }, + { + "epoch": 1.22, + "learning_rate": 1.604815575376609e-05, + "loss": 0.205, + "step": 26225 + }, + { + "epoch": 1.22, + "learning_rate": 1.6047371968711303e-05, + "loss": 0.3367, + "step": 26230 + }, + { + "epoch": 1.22, + "learning_rate": 1.6046588183656516e-05, + "loss": 0.2997, + "step": 26235 + }, + { + "epoch": 1.22, + "learning_rate": 1.604580439860173e-05, + "loss": 0.0592, + "step": 26240 + }, + { + "epoch": 1.22, + "learning_rate": 1.604502061354694e-05, + "loss": 0.0551, + "step": 26245 + }, + { + "epoch": 1.22, + "learning_rate": 1.6044236828492155e-05, + "loss": 0.0715, + "step": 26250 + }, + { + "epoch": 1.23, + "learning_rate": 1.604345304343737e-05, + "loss": 0.1212, + "step": 26255 + }, + { + "epoch": 1.23, + "learning_rate": 1.6042669258382583e-05, + "loss": 0.1241, + "step": 26260 + }, + { + "epoch": 1.23, + "learning_rate": 1.6041885473327796e-05, + "loss": 0.1764, + "step": 26265 + }, + { + "epoch": 1.23, + "learning_rate": 1.6041101688273007e-05, + "loss": 0.1995, + "step": 26270 + }, + { + "epoch": 1.23, + "learning_rate": 1.6040317903218224e-05, + "loss": 0.2772, + "step": 26275 + }, + { + "epoch": 1.23, + "learning_rate": 1.6039534118163435e-05, + "loss": 0.3426, + "step": 26280 + }, + { + "epoch": 1.23, + "learning_rate": 1.603875033310865e-05, + "loss": 0.2971, + "step": 26285 + }, + { + "epoch": 1.23, + "learning_rate": 1.6037966548053863e-05, + "loss": 0.065, + "step": 26290 + }, + { + "epoch": 1.23, + "learning_rate": 1.6037182762999077e-05, + "loss": 0.0287, + "step": 26295 + }, + { + "epoch": 1.23, + "learning_rate": 1.603639897794429e-05, + "loss": 0.1137, + "step": 26300 + }, + { + "epoch": 1.23, + "learning_rate": 1.6035615192889504e-05, + "loss": 0.1516, + "step": 26305 + }, + { + "epoch": 1.23, + "learning_rate": 1.6034831407834718e-05, + "loss": 0.0944, + "step": 26310 + }, + { + "epoch": 1.23, + "learning_rate": 1.603404762277993e-05, + "loss": 0.1105, + "step": 26315 + }, + { + "epoch": 1.23, + "learning_rate": 1.6033263837725143e-05, + "loss": 0.1635, + "step": 26320 + }, + { + "epoch": 1.23, + "learning_rate": 1.6032480052670357e-05, + "loss": 0.2127, + "step": 26325 + }, + { + "epoch": 1.23, + "learning_rate": 1.603169626761557e-05, + "loss": 0.4454, + "step": 26330 + }, + { + "epoch": 1.23, + "learning_rate": 1.6030912482560784e-05, + "loss": 0.3167, + "step": 26335 + }, + { + "epoch": 1.23, + "learning_rate": 1.6030128697505998e-05, + "loss": 0.0731, + "step": 26340 + }, + { + "epoch": 1.23, + "learning_rate": 1.602934491245121e-05, + "loss": 0.0499, + "step": 26345 + }, + { + "epoch": 1.23, + "learning_rate": 1.6028561127396426e-05, + "loss": 0.097, + "step": 26350 + }, + { + "epoch": 1.23, + "learning_rate": 1.6027777342341637e-05, + "loss": 0.0544, + "step": 26355 + }, + { + "epoch": 1.23, + "learning_rate": 1.602699355728685e-05, + "loss": 0.1498, + "step": 26360 + }, + { + "epoch": 1.23, + "learning_rate": 1.6026209772232064e-05, + "loss": 0.1206, + "step": 26365 + }, + { + "epoch": 1.23, + "learning_rate": 1.602542598717728e-05, + "loss": 0.2232, + "step": 26370 + }, + { + "epoch": 1.23, + "learning_rate": 1.6024642202122492e-05, + "loss": 0.3615, + "step": 26375 + }, + { + "epoch": 1.23, + "learning_rate": 1.6023858417067703e-05, + "loss": 0.3271, + "step": 26380 + }, + { + "epoch": 1.23, + "learning_rate": 1.6023074632012917e-05, + "loss": 0.3493, + "step": 26385 + }, + { + "epoch": 1.23, + "learning_rate": 1.602229084695813e-05, + "loss": 0.0708, + "step": 26390 + }, + { + "epoch": 1.23, + "learning_rate": 1.6021507061903344e-05, + "loss": 0.124, + "step": 26395 + }, + { + "epoch": 1.23, + "learning_rate": 1.602072327684856e-05, + "loss": 0.0928, + "step": 26400 + }, + { + "epoch": 1.23, + "learning_rate": 1.6019939491793772e-05, + "loss": 0.0961, + "step": 26405 + }, + { + "epoch": 1.23, + "learning_rate": 1.6019155706738986e-05, + "loss": 0.1516, + "step": 26410 + }, + { + "epoch": 1.23, + "learning_rate": 1.60183719216842e-05, + "loss": 0.1803, + "step": 26415 + }, + { + "epoch": 1.23, + "learning_rate": 1.601758813662941e-05, + "loss": 0.1668, + "step": 26420 + }, + { + "epoch": 1.23, + "learning_rate": 1.6016804351574628e-05, + "loss": 0.2315, + "step": 26425 + }, + { + "epoch": 1.23, + "learning_rate": 1.601602056651984e-05, + "loss": 0.353, + "step": 26430 + }, + { + "epoch": 1.23, + "learning_rate": 1.6015236781465052e-05, + "loss": 0.319, + "step": 26435 + }, + { + "epoch": 1.23, + "learning_rate": 1.6014452996410266e-05, + "loss": 0.0558, + "step": 26440 + }, + { + "epoch": 1.23, + "learning_rate": 1.6013669211355477e-05, + "loss": 0.1412, + "step": 26445 + }, + { + "epoch": 1.23, + "learning_rate": 1.6012885426300694e-05, + "loss": 0.0911, + "step": 26450 + }, + { + "epoch": 1.23, + "learning_rate": 1.6012101641245905e-05, + "loss": 0.0855, + "step": 26455 + }, + { + "epoch": 1.23, + "learning_rate": 1.601131785619112e-05, + "loss": 0.1342, + "step": 26460 + }, + { + "epoch": 1.23, + "learning_rate": 1.6010534071136332e-05, + "loss": 0.1502, + "step": 26465 + }, + { + "epoch": 1.24, + "learning_rate": 1.6009750286081546e-05, + "loss": 0.2149, + "step": 26470 + }, + { + "epoch": 1.24, + "learning_rate": 1.600896650102676e-05, + "loss": 0.1451, + "step": 26475 + }, + { + "epoch": 1.24, + "learning_rate": 1.6008182715971974e-05, + "loss": 0.38, + "step": 26480 + }, + { + "epoch": 1.24, + "learning_rate": 1.6007398930917185e-05, + "loss": 0.5451, + "step": 26485 + }, + { + "epoch": 1.24, + "learning_rate": 1.6006615145862402e-05, + "loss": 0.0726, + "step": 26490 + }, + { + "epoch": 1.24, + "learning_rate": 1.6005831360807612e-05, + "loss": 0.0716, + "step": 26495 + }, + { + "epoch": 1.24, + "learning_rate": 1.6005047575752826e-05, + "loss": 0.084, + "step": 26500 + }, + { + "epoch": 1.24, + "learning_rate": 1.600426379069804e-05, + "loss": 0.1345, + "step": 26505 + }, + { + "epoch": 1.24, + "learning_rate": 1.6003480005643254e-05, + "loss": 0.2791, + "step": 26510 + }, + { + "epoch": 1.24, + "learning_rate": 1.6002696220588468e-05, + "loss": 0.1281, + "step": 26515 + }, + { + "epoch": 1.24, + "learning_rate": 1.600191243553368e-05, + "loss": 0.2016, + "step": 26520 + }, + { + "epoch": 1.24, + "learning_rate": 1.6001128650478896e-05, + "loss": 0.3165, + "step": 26525 + }, + { + "epoch": 1.24, + "learning_rate": 1.6000344865424106e-05, + "loss": 0.336, + "step": 26530 + }, + { + "epoch": 1.24, + "learning_rate": 1.599956108036932e-05, + "loss": 0.3234, + "step": 26535 + }, + { + "epoch": 1.24, + "learning_rate": 1.5998777295314534e-05, + "loss": 0.1082, + "step": 26540 + }, + { + "epoch": 1.24, + "learning_rate": 1.5997993510259748e-05, + "loss": 0.0455, + "step": 26545 + }, + { + "epoch": 1.24, + "learning_rate": 1.5997209725204962e-05, + "loss": 0.1116, + "step": 26550 + }, + { + "epoch": 1.24, + "learning_rate": 1.5996425940150176e-05, + "loss": 0.0805, + "step": 26555 + }, + { + "epoch": 1.24, + "learning_rate": 1.5995642155095386e-05, + "loss": 0.1438, + "step": 26560 + }, + { + "epoch": 1.24, + "learning_rate": 1.5994858370040604e-05, + "loss": 0.1412, + "step": 26565 + }, + { + "epoch": 1.24, + "learning_rate": 1.5994074584985814e-05, + "loss": 0.2316, + "step": 26570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5993290799931028e-05, + "loss": 0.2124, + "step": 26575 + }, + { + "epoch": 1.24, + "learning_rate": 1.5992507014876242e-05, + "loss": 0.3504, + "step": 26580 + }, + { + "epoch": 1.24, + "learning_rate": 1.5991723229821453e-05, + "loss": 0.3047, + "step": 26585 + }, + { + "epoch": 1.24, + "learning_rate": 1.599093944476667e-05, + "loss": 0.0622, + "step": 26590 + }, + { + "epoch": 1.24, + "learning_rate": 1.599015565971188e-05, + "loss": 0.0547, + "step": 26595 + }, + { + "epoch": 1.24, + "learning_rate": 1.5989371874657094e-05, + "loss": 0.0788, + "step": 26600 + }, + { + "epoch": 1.24, + "learning_rate": 1.5988588089602308e-05, + "loss": 0.1388, + "step": 26605 + }, + { + "epoch": 1.24, + "learning_rate": 1.5987804304547522e-05, + "loss": 0.1345, + "step": 26610 + }, + { + "epoch": 1.24, + "learning_rate": 1.5987020519492736e-05, + "loss": 0.1711, + "step": 26615 + }, + { + "epoch": 1.24, + "learning_rate": 1.598623673443795e-05, + "loss": 0.209, + "step": 26620 + }, + { + "epoch": 1.24, + "learning_rate": 1.5985452949383164e-05, + "loss": 0.24, + "step": 26625 + }, + { + "epoch": 1.24, + "learning_rate": 1.5984669164328378e-05, + "loss": 0.2429, + "step": 26630 + }, + { + "epoch": 1.24, + "learning_rate": 1.5983885379273588e-05, + "loss": 0.2346, + "step": 26635 + }, + { + "epoch": 1.24, + "learning_rate": 1.5983101594218802e-05, + "loss": 0.052, + "step": 26640 + }, + { + "epoch": 1.24, + "learning_rate": 1.5982317809164016e-05, + "loss": 0.1405, + "step": 26645 + }, + { + "epoch": 1.24, + "learning_rate": 1.598153402410923e-05, + "loss": 0.0957, + "step": 26650 + }, + { + "epoch": 1.24, + "learning_rate": 1.5980750239054444e-05, + "loss": 0.0875, + "step": 26655 + }, + { + "epoch": 1.24, + "learning_rate": 1.5979966453999654e-05, + "loss": 0.0848, + "step": 26660 + }, + { + "epoch": 1.24, + "learning_rate": 1.597918266894487e-05, + "loss": 0.1606, + "step": 26665 + }, + { + "epoch": 1.24, + "learning_rate": 1.5978398883890082e-05, + "loss": 0.2839, + "step": 26670 + }, + { + "epoch": 1.24, + "learning_rate": 1.5977615098835296e-05, + "loss": 0.2471, + "step": 26675 + }, + { + "epoch": 1.24, + "learning_rate": 1.597683131378051e-05, + "loss": 0.3593, + "step": 26680 + }, + { + "epoch": 1.25, + "learning_rate": 1.5976047528725724e-05, + "loss": 0.3423, + "step": 26685 + }, + { + "epoch": 1.25, + "learning_rate": 1.5975263743670938e-05, + "loss": 0.0586, + "step": 26690 + }, + { + "epoch": 1.25, + "learning_rate": 1.5974479958616152e-05, + "loss": 0.0374, + "step": 26695 + }, + { + "epoch": 1.25, + "learning_rate": 1.5973696173561362e-05, + "loss": 0.1045, + "step": 26700 + }, + { + "epoch": 1.25, + "learning_rate": 1.5972912388506576e-05, + "loss": 0.1342, + "step": 26705 + }, + { + "epoch": 1.25, + "learning_rate": 1.597212860345179e-05, + "loss": 0.2293, + "step": 26710 + }, + { + "epoch": 1.25, + "learning_rate": 1.5971344818397004e-05, + "loss": 0.1674, + "step": 26715 + }, + { + "epoch": 1.25, + "learning_rate": 1.5970561033342218e-05, + "loss": 0.1991, + "step": 26720 + }, + { + "epoch": 1.25, + "learning_rate": 1.5969777248287432e-05, + "loss": 0.2317, + "step": 26725 + }, + { + "epoch": 1.25, + "learning_rate": 1.5968993463232646e-05, + "loss": 0.4123, + "step": 26730 + }, + { + "epoch": 1.25, + "learning_rate": 1.5968209678177856e-05, + "loss": 0.2453, + "step": 26735 + }, + { + "epoch": 1.25, + "learning_rate": 1.5967425893123073e-05, + "loss": 0.083, + "step": 26740 + }, + { + "epoch": 1.25, + "learning_rate": 1.5966642108068284e-05, + "loss": 0.0648, + "step": 26745 + }, + { + "epoch": 1.25, + "learning_rate": 1.5965858323013498e-05, + "loss": 0.1362, + "step": 26750 + }, + { + "epoch": 1.25, + "learning_rate": 1.5965074537958712e-05, + "loss": 0.1406, + "step": 26755 + }, + { + "epoch": 1.25, + "learning_rate": 1.5964290752903926e-05, + "loss": 0.1994, + "step": 26760 + }, + { + "epoch": 1.25, + "learning_rate": 1.596350696784914e-05, + "loss": 0.1565, + "step": 26765 + }, + { + "epoch": 1.25, + "learning_rate": 1.596272318279435e-05, + "loss": 0.2318, + "step": 26770 + }, + { + "epoch": 1.25, + "learning_rate": 1.5961939397739564e-05, + "loss": 0.2744, + "step": 26775 + }, + { + "epoch": 1.25, + "learning_rate": 1.5961155612684778e-05, + "loss": 0.4521, + "step": 26780 + }, + { + "epoch": 1.25, + "learning_rate": 1.5960371827629992e-05, + "loss": 0.2907, + "step": 26785 + }, + { + "epoch": 1.25, + "learning_rate": 1.5959588042575206e-05, + "loss": 0.086, + "step": 26790 + }, + { + "epoch": 1.25, + "learning_rate": 1.595880425752042e-05, + "loss": 0.0616, + "step": 26795 + }, + { + "epoch": 1.25, + "learning_rate": 1.595802047246563e-05, + "loss": 0.0885, + "step": 26800 + }, + { + "epoch": 1.25, + "learning_rate": 1.5957236687410847e-05, + "loss": 0.1038, + "step": 26805 + }, + { + "epoch": 1.25, + "learning_rate": 1.5956452902356058e-05, + "loss": 0.1311, + "step": 26810 + }, + { + "epoch": 1.25, + "learning_rate": 1.5955669117301272e-05, + "loss": 0.2754, + "step": 26815 + }, + { + "epoch": 1.25, + "learning_rate": 1.5954885332246486e-05, + "loss": 0.2622, + "step": 26820 + }, + { + "epoch": 1.25, + "learning_rate": 1.59541015471917e-05, + "loss": 0.3481, + "step": 26825 + }, + { + "epoch": 1.25, + "learning_rate": 1.5953317762136914e-05, + "loss": 0.3888, + "step": 26830 + }, + { + "epoch": 1.25, + "learning_rate": 1.5952533977082124e-05, + "loss": 0.3201, + "step": 26835 + }, + { + "epoch": 1.25, + "learning_rate": 1.595175019202734e-05, + "loss": 0.0431, + "step": 26840 + }, + { + "epoch": 1.25, + "learning_rate": 1.5950966406972552e-05, + "loss": 0.0601, + "step": 26845 + }, + { + "epoch": 1.25, + "learning_rate": 1.5950182621917766e-05, + "loss": 0.0401, + "step": 26850 + }, + { + "epoch": 1.25, + "learning_rate": 1.594939883686298e-05, + "loss": 0.0848, + "step": 26855 + }, + { + "epoch": 1.25, + "learning_rate": 1.5948615051808194e-05, + "loss": 0.1519, + "step": 26860 + }, + { + "epoch": 1.25, + "learning_rate": 1.5947831266753408e-05, + "loss": 0.1502, + "step": 26865 + }, + { + "epoch": 1.25, + "learning_rate": 1.594704748169862e-05, + "loss": 0.1738, + "step": 26870 + }, + { + "epoch": 1.25, + "learning_rate": 1.5946263696643832e-05, + "loss": 0.1806, + "step": 26875 + }, + { + "epoch": 1.25, + "learning_rate": 1.594547991158905e-05, + "loss": 0.359, + "step": 26880 + }, + { + "epoch": 1.25, + "learning_rate": 1.594469612653426e-05, + "loss": 0.3319, + "step": 26885 + }, + { + "epoch": 1.25, + "learning_rate": 1.5943912341479474e-05, + "loss": 0.0602, + "step": 26890 + }, + { + "epoch": 1.25, + "learning_rate": 1.5943128556424688e-05, + "loss": 0.0851, + "step": 26895 + }, + { + "epoch": 1.26, + "learning_rate": 1.5942344771369898e-05, + "loss": 0.1058, + "step": 26900 + }, + { + "epoch": 1.26, + "learning_rate": 1.5941560986315115e-05, + "loss": 0.1035, + "step": 26905 + }, + { + "epoch": 1.26, + "learning_rate": 1.5940777201260326e-05, + "loss": 0.1427, + "step": 26910 + }, + { + "epoch": 1.26, + "learning_rate": 1.593999341620554e-05, + "loss": 0.1269, + "step": 26915 + }, + { + "epoch": 1.26, + "learning_rate": 1.5939209631150754e-05, + "loss": 0.1434, + "step": 26920 + }, + { + "epoch": 1.26, + "learning_rate": 1.5938425846095968e-05, + "loss": 0.2233, + "step": 26925 + }, + { + "epoch": 1.26, + "learning_rate": 1.593764206104118e-05, + "loss": 0.4562, + "step": 26930 + }, + { + "epoch": 1.26, + "learning_rate": 1.5936858275986395e-05, + "loss": 0.3848, + "step": 26935 + }, + { + "epoch": 1.26, + "learning_rate": 1.593607449093161e-05, + "loss": 0.0614, + "step": 26940 + }, + { + "epoch": 1.26, + "learning_rate": 1.5935290705876823e-05, + "loss": 0.0845, + "step": 26945 + }, + { + "epoch": 1.26, + "learning_rate": 1.5934506920822034e-05, + "loss": 0.0953, + "step": 26950 + }, + { + "epoch": 1.26, + "learning_rate": 1.593372313576725e-05, + "loss": 0.1694, + "step": 26955 + }, + { + "epoch": 1.26, + "learning_rate": 1.593293935071246e-05, + "loss": 0.1371, + "step": 26960 + }, + { + "epoch": 1.26, + "learning_rate": 1.5932155565657676e-05, + "loss": 0.2797, + "step": 26965 + }, + { + "epoch": 1.26, + "learning_rate": 1.593137178060289e-05, + "loss": 0.2314, + "step": 26970 + }, + { + "epoch": 1.26, + "learning_rate": 1.59305879955481e-05, + "loss": 0.1904, + "step": 26975 + }, + { + "epoch": 1.26, + "learning_rate": 1.5929804210493317e-05, + "loss": 0.3626, + "step": 26980 + }, + { + "epoch": 1.26, + "learning_rate": 1.5929020425438528e-05, + "loss": 0.3654, + "step": 26985 + }, + { + "epoch": 1.26, + "learning_rate": 1.592823664038374e-05, + "loss": 0.0897, + "step": 26990 + }, + { + "epoch": 1.26, + "learning_rate": 1.5927452855328956e-05, + "loss": 0.1229, + "step": 26995 + }, + { + "epoch": 1.26, + "learning_rate": 1.592666907027417e-05, + "loss": 0.1352, + "step": 27000 + }, + { + "epoch": 1.26, + "learning_rate": 1.5925885285219383e-05, + "loss": 0.1724, + "step": 27005 + }, + { + "epoch": 1.26, + "learning_rate": 1.5925101500164597e-05, + "loss": 0.1872, + "step": 27010 + }, + { + "epoch": 1.26, + "learning_rate": 1.5924317715109808e-05, + "loss": 0.211, + "step": 27015 + }, + { + "epoch": 1.26, + "learning_rate": 1.5923533930055025e-05, + "loss": 0.2077, + "step": 27020 + }, + { + "epoch": 1.26, + "learning_rate": 1.5922750145000236e-05, + "loss": 0.2314, + "step": 27025 + }, + { + "epoch": 1.26, + "learning_rate": 1.592196635994545e-05, + "loss": 0.2645, + "step": 27030 + }, + { + "epoch": 1.26, + "learning_rate": 1.5921182574890663e-05, + "loss": 0.3657, + "step": 27035 + }, + { + "epoch": 1.26, + "learning_rate": 1.5920398789835877e-05, + "loss": 0.0485, + "step": 27040 + }, + { + "epoch": 1.26, + "learning_rate": 1.591961500478109e-05, + "loss": 0.0873, + "step": 27045 + }, + { + "epoch": 1.26, + "learning_rate": 1.5918831219726302e-05, + "loss": 0.0935, + "step": 27050 + }, + { + "epoch": 1.26, + "learning_rate": 1.591804743467152e-05, + "loss": 0.0481, + "step": 27055 + }, + { + "epoch": 1.26, + "learning_rate": 1.591726364961673e-05, + "loss": 0.1495, + "step": 27060 + }, + { + "epoch": 1.26, + "learning_rate": 1.5916479864561943e-05, + "loss": 0.2669, + "step": 27065 + }, + { + "epoch": 1.26, + "learning_rate": 1.5915696079507157e-05, + "loss": 0.2449, + "step": 27070 + }, + { + "epoch": 1.26, + "learning_rate": 1.591491229445237e-05, + "loss": 0.2292, + "step": 27075 + }, + { + "epoch": 1.26, + "learning_rate": 1.5914128509397585e-05, + "loss": 0.3765, + "step": 27080 + }, + { + "epoch": 1.26, + "learning_rate": 1.59133447243428e-05, + "loss": 0.4043, + "step": 27085 + }, + { + "epoch": 1.26, + "learning_rate": 1.591256093928801e-05, + "loss": 0.0565, + "step": 27090 + }, + { + "epoch": 1.26, + "learning_rate": 1.5911777154233224e-05, + "loss": 0.1073, + "step": 27095 + }, + { + "epoch": 1.26, + "learning_rate": 1.5910993369178437e-05, + "loss": 0.1031, + "step": 27100 + }, + { + "epoch": 1.26, + "learning_rate": 1.591020958412365e-05, + "loss": 0.1669, + "step": 27105 + }, + { + "epoch": 1.26, + "learning_rate": 1.5909425799068865e-05, + "loss": 0.1159, + "step": 27110 + }, + { + "epoch": 1.27, + "learning_rate": 1.5908642014014076e-05, + "loss": 0.1482, + "step": 27115 + }, + { + "epoch": 1.27, + "learning_rate": 1.5907858228959293e-05, + "loss": 0.2052, + "step": 27120 + }, + { + "epoch": 1.27, + "learning_rate": 1.5907074443904504e-05, + "loss": 0.1452, + "step": 27125 + }, + { + "epoch": 1.27, + "learning_rate": 1.5906290658849717e-05, + "loss": 0.3528, + "step": 27130 + }, + { + "epoch": 1.27, + "learning_rate": 1.590550687379493e-05, + "loss": 0.2911, + "step": 27135 + }, + { + "epoch": 1.27, + "learning_rate": 1.5904723088740145e-05, + "loss": 0.0498, + "step": 27140 + }, + { + "epoch": 1.27, + "learning_rate": 1.590393930368536e-05, + "loss": 0.0742, + "step": 27145 + }, + { + "epoch": 1.27, + "learning_rate": 1.5903155518630573e-05, + "loss": 0.0673, + "step": 27150 + }, + { + "epoch": 1.27, + "learning_rate": 1.5902371733575787e-05, + "loss": 0.1554, + "step": 27155 + }, + { + "epoch": 1.27, + "learning_rate": 1.5901587948520998e-05, + "loss": 0.1423, + "step": 27160 + }, + { + "epoch": 1.27, + "learning_rate": 1.590080416346621e-05, + "loss": 0.2888, + "step": 27165 + }, + { + "epoch": 1.27, + "learning_rate": 1.5900020378411425e-05, + "loss": 0.206, + "step": 27170 + }, + { + "epoch": 1.27, + "learning_rate": 1.589923659335664e-05, + "loss": 0.2769, + "step": 27175 + }, + { + "epoch": 1.27, + "learning_rate": 1.5898452808301853e-05, + "loss": 0.3929, + "step": 27180 + }, + { + "epoch": 1.27, + "learning_rate": 1.5897669023247067e-05, + "loss": 0.2299, + "step": 27185 + }, + { + "epoch": 1.27, + "learning_rate": 1.5896885238192278e-05, + "loss": 0.0616, + "step": 27190 + }, + { + "epoch": 1.27, + "learning_rate": 1.5896101453137495e-05, + "loss": 0.0768, + "step": 27195 + }, + { + "epoch": 1.27, + "learning_rate": 1.5895317668082705e-05, + "loss": 0.066, + "step": 27200 + }, + { + "epoch": 1.27, + "learning_rate": 1.589453388302792e-05, + "loss": 0.1199, + "step": 27205 + }, + { + "epoch": 1.27, + "learning_rate": 1.5893750097973133e-05, + "loss": 0.1225, + "step": 27210 + }, + { + "epoch": 1.27, + "learning_rate": 1.5892966312918347e-05, + "loss": 0.1322, + "step": 27215 + }, + { + "epoch": 1.27, + "learning_rate": 1.589218252786356e-05, + "loss": 0.1938, + "step": 27220 + }, + { + "epoch": 1.27, + "learning_rate": 1.589139874280877e-05, + "loss": 0.1989, + "step": 27225 + }, + { + "epoch": 1.27, + "learning_rate": 1.5890614957753985e-05, + "loss": 0.3783, + "step": 27230 + }, + { + "epoch": 1.27, + "learning_rate": 1.58898311726992e-05, + "loss": 0.2278, + "step": 27235 + }, + { + "epoch": 1.27, + "learning_rate": 1.5889047387644413e-05, + "loss": 0.0426, + "step": 27240 + }, + { + "epoch": 1.27, + "learning_rate": 1.5888263602589627e-05, + "loss": 0.0671, + "step": 27245 + }, + { + "epoch": 1.27, + "learning_rate": 1.588747981753484e-05, + "loss": 0.1248, + "step": 27250 + }, + { + "epoch": 1.27, + "learning_rate": 1.5886696032480055e-05, + "loss": 0.1308, + "step": 27255 + }, + { + "epoch": 1.27, + "learning_rate": 1.588591224742527e-05, + "loss": 0.1626, + "step": 27260 + }, + { + "epoch": 1.27, + "learning_rate": 1.588512846237048e-05, + "loss": 0.0983, + "step": 27265 + }, + { + "epoch": 1.27, + "learning_rate": 1.5884344677315697e-05, + "loss": 0.1998, + "step": 27270 + }, + { + "epoch": 1.27, + "learning_rate": 1.5883560892260907e-05, + "loss": 0.2857, + "step": 27275 + }, + { + "epoch": 1.27, + "learning_rate": 1.588277710720612e-05, + "loss": 0.4716, + "step": 27280 + }, + { + "epoch": 1.27, + "learning_rate": 1.5881993322151335e-05, + "loss": 0.3464, + "step": 27285 + }, + { + "epoch": 1.27, + "learning_rate": 1.5881209537096546e-05, + "loss": 0.0157, + "step": 27290 + }, + { + "epoch": 1.27, + "learning_rate": 1.5880425752041763e-05, + "loss": 0.0292, + "step": 27295 + }, + { + "epoch": 1.27, + "learning_rate": 1.5879641966986973e-05, + "loss": 0.1048, + "step": 27300 + }, + { + "epoch": 1.27, + "learning_rate": 1.5878858181932187e-05, + "loss": 0.1233, + "step": 27305 + }, + { + "epoch": 1.27, + "learning_rate": 1.58780743968774e-05, + "loss": 0.1633, + "step": 27310 + }, + { + "epoch": 1.27, + "learning_rate": 1.5877290611822615e-05, + "loss": 0.213, + "step": 27315 + }, + { + "epoch": 1.27, + "learning_rate": 1.587650682676783e-05, + "loss": 0.1694, + "step": 27320 + }, + { + "epoch": 1.28, + "learning_rate": 1.5875723041713043e-05, + "loss": 0.2286, + "step": 27325 + }, + { + "epoch": 1.28, + "learning_rate": 1.5874939256658253e-05, + "loss": 0.4041, + "step": 27330 + }, + { + "epoch": 1.28, + "learning_rate": 1.587415547160347e-05, + "loss": 0.3315, + "step": 27335 + }, + { + "epoch": 1.28, + "learning_rate": 1.587337168654868e-05, + "loss": 0.0725, + "step": 27340 + }, + { + "epoch": 1.28, + "learning_rate": 1.5872587901493895e-05, + "loss": 0.092, + "step": 27345 + }, + { + "epoch": 1.28, + "learning_rate": 1.587180411643911e-05, + "loss": 0.0829, + "step": 27350 + }, + { + "epoch": 1.28, + "learning_rate": 1.5871020331384323e-05, + "loss": 0.1871, + "step": 27355 + }, + { + "epoch": 1.28, + "learning_rate": 1.5870236546329537e-05, + "loss": 0.1217, + "step": 27360 + }, + { + "epoch": 1.28, + "learning_rate": 1.5869452761274747e-05, + "loss": 0.1402, + "step": 27365 + }, + { + "epoch": 1.28, + "learning_rate": 1.5868668976219965e-05, + "loss": 0.2723, + "step": 27370 + }, + { + "epoch": 1.28, + "learning_rate": 1.5867885191165175e-05, + "loss": 0.2456, + "step": 27375 + }, + { + "epoch": 1.28, + "learning_rate": 1.586710140611039e-05, + "loss": 0.3611, + "step": 27380 + }, + { + "epoch": 1.28, + "learning_rate": 1.5866317621055603e-05, + "loss": 0.2447, + "step": 27385 + }, + { + "epoch": 1.28, + "learning_rate": 1.5865533836000817e-05, + "loss": 0.0605, + "step": 27390 + }, + { + "epoch": 1.28, + "learning_rate": 1.586475005094603e-05, + "loss": 0.1235, + "step": 27395 + }, + { + "epoch": 1.28, + "learning_rate": 1.5863966265891245e-05, + "loss": 0.1074, + "step": 27400 + }, + { + "epoch": 1.28, + "learning_rate": 1.5863182480836455e-05, + "loss": 0.1034, + "step": 27405 + }, + { + "epoch": 1.28, + "learning_rate": 1.5862398695781672e-05, + "loss": 0.1398, + "step": 27410 + }, + { + "epoch": 1.28, + "learning_rate": 1.5861614910726883e-05, + "loss": 0.1485, + "step": 27415 + }, + { + "epoch": 1.28, + "learning_rate": 1.5860831125672097e-05, + "loss": 0.184, + "step": 27420 + }, + { + "epoch": 1.28, + "learning_rate": 1.586004734061731e-05, + "loss": 0.301, + "step": 27425 + }, + { + "epoch": 1.28, + "learning_rate": 1.585926355556252e-05, + "loss": 0.3592, + "step": 27430 + }, + { + "epoch": 1.28, + "learning_rate": 1.585847977050774e-05, + "loss": 0.4242, + "step": 27435 + }, + { + "epoch": 1.28, + "learning_rate": 1.585769598545295e-05, + "loss": 0.033, + "step": 27440 + }, + { + "epoch": 1.28, + "learning_rate": 1.5856912200398163e-05, + "loss": 0.077, + "step": 27445 + }, + { + "epoch": 1.28, + "learning_rate": 1.5856128415343377e-05, + "loss": 0.0681, + "step": 27450 + }, + { + "epoch": 1.28, + "learning_rate": 1.585534463028859e-05, + "loss": 0.0868, + "step": 27455 + }, + { + "epoch": 1.28, + "learning_rate": 1.5854560845233805e-05, + "loss": 0.1456, + "step": 27460 + }, + { + "epoch": 1.28, + "learning_rate": 1.585377706017902e-05, + "loss": 0.1694, + "step": 27465 + }, + { + "epoch": 1.28, + "learning_rate": 1.5852993275124233e-05, + "loss": 0.2192, + "step": 27470 + }, + { + "epoch": 1.28, + "learning_rate": 1.5852209490069446e-05, + "loss": 0.2615, + "step": 27475 + }, + { + "epoch": 1.28, + "learning_rate": 1.5851425705014657e-05, + "loss": 0.3738, + "step": 27480 + }, + { + "epoch": 1.28, + "learning_rate": 1.585064191995987e-05, + "loss": 0.4018, + "step": 27485 + }, + { + "epoch": 1.28, + "learning_rate": 1.5849858134905085e-05, + "loss": 0.0784, + "step": 27490 + }, + { + "epoch": 1.28, + "learning_rate": 1.58490743498503e-05, + "loss": 0.0999, + "step": 27495 + }, + { + "epoch": 1.28, + "learning_rate": 1.5848290564795513e-05, + "loss": 0.094, + "step": 27500 + }, + { + "epoch": 1.28, + "learning_rate": 1.5847506779740723e-05, + "loss": 0.1918, + "step": 27505 + }, + { + "epoch": 1.28, + "learning_rate": 1.584672299468594e-05, + "loss": 0.0998, + "step": 27510 + }, + { + "epoch": 1.28, + "learning_rate": 1.584593920963115e-05, + "loss": 0.1329, + "step": 27515 + }, + { + "epoch": 1.28, + "learning_rate": 1.5845155424576365e-05, + "loss": 0.2364, + "step": 27520 + }, + { + "epoch": 1.28, + "learning_rate": 1.584437163952158e-05, + "loss": 0.2244, + "step": 27525 + }, + { + "epoch": 1.28, + "learning_rate": 1.5843587854466793e-05, + "loss": 0.2506, + "step": 27530 + }, + { + "epoch": 1.28, + "learning_rate": 1.5842804069412007e-05, + "loss": 0.2574, + "step": 27535 + }, + { + "epoch": 1.29, + "learning_rate": 1.584202028435722e-05, + "loss": 0.0459, + "step": 27540 + }, + { + "epoch": 1.29, + "learning_rate": 1.584123649930243e-05, + "loss": 0.169, + "step": 27545 + }, + { + "epoch": 1.29, + "learning_rate": 1.5840452714247645e-05, + "loss": 0.0555, + "step": 27550 + }, + { + "epoch": 1.29, + "learning_rate": 1.583966892919286e-05, + "loss": 0.1168, + "step": 27555 + }, + { + "epoch": 1.29, + "learning_rate": 1.5838885144138073e-05, + "loss": 0.1272, + "step": 27560 + }, + { + "epoch": 1.29, + "learning_rate": 1.5838101359083287e-05, + "loss": 0.1271, + "step": 27565 + }, + { + "epoch": 1.29, + "learning_rate": 1.58373175740285e-05, + "loss": 0.2171, + "step": 27570 + }, + { + "epoch": 1.29, + "learning_rate": 1.5836533788973714e-05, + "loss": 0.2344, + "step": 27575 + }, + { + "epoch": 1.29, + "learning_rate": 1.5835750003918925e-05, + "loss": 0.441, + "step": 27580 + }, + { + "epoch": 1.29, + "learning_rate": 1.5834966218864142e-05, + "loss": 0.344, + "step": 27585 + }, + { + "epoch": 1.29, + "learning_rate": 1.5834182433809353e-05, + "loss": 0.0275, + "step": 27590 + }, + { + "epoch": 1.29, + "learning_rate": 1.5833398648754567e-05, + "loss": 0.0879, + "step": 27595 + }, + { + "epoch": 1.29, + "learning_rate": 1.583261486369978e-05, + "loss": 0.0835, + "step": 27600 + }, + { + "epoch": 1.29, + "learning_rate": 1.5831831078644994e-05, + "loss": 0.0898, + "step": 27605 + }, + { + "epoch": 1.29, + "learning_rate": 1.583104729359021e-05, + "loss": 0.1473, + "step": 27610 + }, + { + "epoch": 1.29, + "learning_rate": 1.583026350853542e-05, + "loss": 0.1409, + "step": 27615 + }, + { + "epoch": 1.29, + "learning_rate": 1.5829479723480633e-05, + "loss": 0.1624, + "step": 27620 + }, + { + "epoch": 1.29, + "learning_rate": 1.5828695938425847e-05, + "loss": 0.2715, + "step": 27625 + }, + { + "epoch": 1.29, + "learning_rate": 1.582791215337106e-05, + "loss": 0.24, + "step": 27630 + }, + { + "epoch": 1.29, + "learning_rate": 1.5827128368316275e-05, + "loss": 0.3, + "step": 27635 + }, + { + "epoch": 1.29, + "learning_rate": 1.582634458326149e-05, + "loss": 0.1383, + "step": 27640 + }, + { + "epoch": 1.29, + "learning_rate": 1.58255607982067e-05, + "loss": 0.0718, + "step": 27645 + }, + { + "epoch": 1.29, + "learning_rate": 1.5824777013151916e-05, + "loss": 0.1329, + "step": 27650 + }, + { + "epoch": 1.29, + "learning_rate": 1.5823993228097127e-05, + "loss": 0.0816, + "step": 27655 + }, + { + "epoch": 1.29, + "learning_rate": 1.582320944304234e-05, + "loss": 0.1626, + "step": 27660 + }, + { + "epoch": 1.29, + "learning_rate": 1.5822425657987555e-05, + "loss": 0.1631, + "step": 27665 + }, + { + "epoch": 1.29, + "learning_rate": 1.582164187293277e-05, + "loss": 0.176, + "step": 27670 + }, + { + "epoch": 1.29, + "learning_rate": 1.5820858087877982e-05, + "loss": 0.242, + "step": 27675 + }, + { + "epoch": 1.29, + "learning_rate": 1.5820074302823193e-05, + "loss": 0.3799, + "step": 27680 + }, + { + "epoch": 1.29, + "learning_rate": 1.581929051776841e-05, + "loss": 0.3226, + "step": 27685 + }, + { + "epoch": 1.29, + "learning_rate": 1.581850673271362e-05, + "loss": 0.0577, + "step": 27690 + }, + { + "epoch": 1.29, + "learning_rate": 1.5817722947658835e-05, + "loss": 0.0607, + "step": 27695 + }, + { + "epoch": 1.29, + "learning_rate": 1.581693916260405e-05, + "loss": 0.0799, + "step": 27700 + }, + { + "epoch": 1.29, + "learning_rate": 1.5816155377549262e-05, + "loss": 0.1294, + "step": 27705 + }, + { + "epoch": 1.29, + "learning_rate": 1.5815371592494476e-05, + "loss": 0.1614, + "step": 27710 + }, + { + "epoch": 1.29, + "learning_rate": 1.581458780743969e-05, + "loss": 0.2011, + "step": 27715 + }, + { + "epoch": 1.29, + "learning_rate": 1.58138040223849e-05, + "loss": 0.1317, + "step": 27720 + }, + { + "epoch": 1.29, + "learning_rate": 1.5813020237330118e-05, + "loss": 0.138, + "step": 27725 + }, + { + "epoch": 1.29, + "learning_rate": 1.581239320928629e-05, + "loss": 0.3351, + "step": 27730 + }, + { + "epoch": 1.29, + "learning_rate": 1.58116094242315e-05, + "loss": 0.2869, + "step": 27735 + }, + { + "epoch": 1.29, + "learning_rate": 1.5810825639176713e-05, + "loss": 0.0514, + "step": 27740 + }, + { + "epoch": 1.29, + "learning_rate": 1.5810041854121927e-05, + "loss": 0.0941, + "step": 27745 + }, + { + "epoch": 1.29, + "learning_rate": 1.580925806906714e-05, + "loss": 0.0642, + "step": 27750 + }, + { + "epoch": 1.3, + "learning_rate": 1.5808474284012355e-05, + "loss": 0.1529, + "step": 27755 + }, + { + "epoch": 1.3, + "learning_rate": 1.5807690498957565e-05, + "loss": 0.1682, + "step": 27760 + }, + { + "epoch": 1.3, + "learning_rate": 1.5806906713902782e-05, + "loss": 0.1137, + "step": 27765 + }, + { + "epoch": 1.3, + "learning_rate": 1.5806279685858953e-05, + "loss": 0.2363, + "step": 27770 + }, + { + "epoch": 1.3, + "learning_rate": 1.5805495900804163e-05, + "loss": 0.2647, + "step": 27775 + }, + { + "epoch": 1.3, + "learning_rate": 1.5804712115749377e-05, + "loss": 0.432, + "step": 27780 + }, + { + "epoch": 1.3, + "learning_rate": 1.580392833069459e-05, + "loss": 0.2727, + "step": 27785 + }, + { + "epoch": 1.3, + "learning_rate": 1.5803144545639805e-05, + "loss": 0.0257, + "step": 27790 + }, + { + "epoch": 1.3, + "learning_rate": 1.580236076058502e-05, + "loss": 0.056, + "step": 27795 + }, + { + "epoch": 1.3, + "learning_rate": 1.5801576975530233e-05, + "loss": 0.1739, + "step": 27800 + }, + { + "epoch": 1.3, + "learning_rate": 1.5800793190475447e-05, + "loss": 0.1359, + "step": 27805 + }, + { + "epoch": 1.3, + "learning_rate": 1.5800009405420657e-05, + "loss": 0.1377, + "step": 27810 + }, + { + "epoch": 1.3, + "learning_rate": 1.579922562036587e-05, + "loss": 0.2141, + "step": 27815 + }, + { + "epoch": 1.3, + "learning_rate": 1.5798441835311085e-05, + "loss": 0.2105, + "step": 27820 + }, + { + "epoch": 1.3, + "learning_rate": 1.57976580502563e-05, + "loss": 0.2329, + "step": 27825 + }, + { + "epoch": 1.3, + "learning_rate": 1.5796874265201513e-05, + "loss": 0.2853, + "step": 27830 + }, + { + "epoch": 1.3, + "learning_rate": 1.5796090480146727e-05, + "loss": 0.2378, + "step": 27835 + }, + { + "epoch": 1.3, + "learning_rate": 1.5795306695091937e-05, + "loss": 0.0456, + "step": 27840 + }, + { + "epoch": 1.3, + "learning_rate": 1.5794522910037155e-05, + "loss": 0.0577, + "step": 27845 + }, + { + "epoch": 1.3, + "learning_rate": 1.5793739124982365e-05, + "loss": 0.0971, + "step": 27850 + }, + { + "epoch": 1.3, + "learning_rate": 1.579295533992758e-05, + "loss": 0.1172, + "step": 27855 + }, + { + "epoch": 1.3, + "learning_rate": 1.5792171554872793e-05, + "loss": 0.0872, + "step": 27860 + }, + { + "epoch": 1.3, + "learning_rate": 1.5791387769818007e-05, + "loss": 0.1082, + "step": 27865 + }, + { + "epoch": 1.3, + "learning_rate": 1.579060398476322e-05, + "loss": 0.1545, + "step": 27870 + }, + { + "epoch": 1.3, + "learning_rate": 1.578982019970843e-05, + "loss": 0.2331, + "step": 27875 + }, + { + "epoch": 1.3, + "learning_rate": 1.5789036414653645e-05, + "loss": 0.3144, + "step": 27880 + }, + { + "epoch": 1.3, + "learning_rate": 1.578825262959886e-05, + "loss": 0.2813, + "step": 27885 + }, + { + "epoch": 1.3, + "learning_rate": 1.5787468844544073e-05, + "loss": 0.0546, + "step": 27890 + }, + { + "epoch": 1.3, + "learning_rate": 1.5786685059489287e-05, + "loss": 0.0614, + "step": 27895 + }, + { + "epoch": 1.3, + "learning_rate": 1.57859012744345e-05, + "loss": 0.0767, + "step": 27900 + }, + { + "epoch": 1.3, + "learning_rate": 1.5785117489379715e-05, + "loss": 0.1322, + "step": 27905 + }, + { + "epoch": 1.3, + "learning_rate": 1.578433370432493e-05, + "loss": 0.134, + "step": 27910 + }, + { + "epoch": 1.3, + "learning_rate": 1.578354991927014e-05, + "loss": 0.1844, + "step": 27915 + }, + { + "epoch": 1.3, + "learning_rate": 1.5782766134215357e-05, + "loss": 0.1431, + "step": 27920 + }, + { + "epoch": 1.3, + "learning_rate": 1.5781982349160567e-05, + "loss": 0.2983, + "step": 27925 + }, + { + "epoch": 1.3, + "learning_rate": 1.578119856410578e-05, + "loss": 0.381, + "step": 27930 + }, + { + "epoch": 1.3, + "learning_rate": 1.5780414779050995e-05, + "loss": 0.3145, + "step": 27935 + }, + { + "epoch": 1.3, + "learning_rate": 1.5779630993996205e-05, + "loss": 0.0599, + "step": 27940 + }, + { + "epoch": 1.3, + "learning_rate": 1.5778847208941423e-05, + "loss": 0.1142, + "step": 27945 + }, + { + "epoch": 1.3, + "learning_rate": 1.5778063423886633e-05, + "loss": 0.0946, + "step": 27950 + }, + { + "epoch": 1.3, + "learning_rate": 1.5777279638831847e-05, + "loss": 0.1403, + "step": 27955 + }, + { + "epoch": 1.3, + "learning_rate": 1.577649585377706e-05, + "loss": 0.1777, + "step": 27960 + }, + { + "epoch": 1.3, + "learning_rate": 1.5775712068722275e-05, + "loss": 0.2978, + "step": 27965 + }, + { + "epoch": 1.31, + "learning_rate": 1.577492828366749e-05, + "loss": 0.1597, + "step": 27970 + }, + { + "epoch": 1.31, + "learning_rate": 1.5774144498612703e-05, + "loss": 0.195, + "step": 27975 + }, + { + "epoch": 1.31, + "learning_rate": 1.5773360713557913e-05, + "loss": 0.356, + "step": 27980 + }, + { + "epoch": 1.31, + "learning_rate": 1.577257692850313e-05, + "loss": 0.3127, + "step": 27985 + }, + { + "epoch": 1.31, + "learning_rate": 1.577179314344834e-05, + "loss": 0.0726, + "step": 27990 + }, + { + "epoch": 1.31, + "learning_rate": 1.5771009358393555e-05, + "loss": 0.0738, + "step": 27995 + }, + { + "epoch": 1.31, + "learning_rate": 1.577022557333877e-05, + "loss": 0.0923, + "step": 28000 + }, + { + "epoch": 1.31, + "learning_rate": 1.5769441788283983e-05, + "loss": 0.0928, + "step": 28005 + }, + { + "epoch": 1.31, + "learning_rate": 1.5768658003229197e-05, + "loss": 0.1215, + "step": 28010 + }, + { + "epoch": 1.31, + "learning_rate": 1.5767874218174407e-05, + "loss": 0.161, + "step": 28015 + }, + { + "epoch": 1.31, + "learning_rate": 1.5767090433119624e-05, + "loss": 0.2235, + "step": 28020 + }, + { + "epoch": 1.31, + "learning_rate": 1.5766306648064835e-05, + "loss": 0.2186, + "step": 28025 + }, + { + "epoch": 1.31, + "learning_rate": 1.576552286301005e-05, + "loss": 0.3355, + "step": 28030 + }, + { + "epoch": 1.31, + "learning_rate": 1.5764739077955263e-05, + "loss": 0.3447, + "step": 28035 + }, + { + "epoch": 1.31, + "learning_rate": 1.5763955292900477e-05, + "loss": 0.0388, + "step": 28040 + }, + { + "epoch": 1.31, + "learning_rate": 1.576317150784569e-05, + "loss": 0.074, + "step": 28045 + }, + { + "epoch": 1.31, + "learning_rate": 1.5762387722790905e-05, + "loss": 0.058, + "step": 28050 + }, + { + "epoch": 1.31, + "learning_rate": 1.5761603937736115e-05, + "loss": 0.1904, + "step": 28055 + }, + { + "epoch": 1.31, + "learning_rate": 1.5760820152681332e-05, + "loss": 0.0815, + "step": 28060 + }, + { + "epoch": 1.31, + "learning_rate": 1.5760036367626543e-05, + "loss": 0.1802, + "step": 28065 + }, + { + "epoch": 1.31, + "learning_rate": 1.5759252582571757e-05, + "loss": 0.1224, + "step": 28070 + }, + { + "epoch": 1.31, + "learning_rate": 1.575846879751697e-05, + "loss": 0.2543, + "step": 28075 + }, + { + "epoch": 1.31, + "learning_rate": 1.575768501246218e-05, + "loss": 0.248, + "step": 28080 + }, + { + "epoch": 1.31, + "learning_rate": 1.57569012274074e-05, + "loss": 0.3462, + "step": 28085 + }, + { + "epoch": 1.31, + "learning_rate": 1.575611744235261e-05, + "loss": 0.0377, + "step": 28090 + }, + { + "epoch": 1.31, + "learning_rate": 1.5755333657297823e-05, + "loss": 0.0877, + "step": 28095 + }, + { + "epoch": 1.31, + "learning_rate": 1.5754549872243037e-05, + "loss": 0.1138, + "step": 28100 + }, + { + "epoch": 1.31, + "learning_rate": 1.575376608718825e-05, + "loss": 0.1132, + "step": 28105 + }, + { + "epoch": 1.31, + "learning_rate": 1.5752982302133465e-05, + "loss": 0.1068, + "step": 28110 + }, + { + "epoch": 1.31, + "learning_rate": 1.575219851707868e-05, + "loss": 0.1146, + "step": 28115 + }, + { + "epoch": 1.31, + "learning_rate": 1.5751414732023892e-05, + "loss": 0.1925, + "step": 28120 + }, + { + "epoch": 1.31, + "learning_rate": 1.5750630946969106e-05, + "loss": 0.2804, + "step": 28125 + }, + { + "epoch": 1.31, + "learning_rate": 1.5749847161914317e-05, + "loss": 0.4683, + "step": 28130 + }, + { + "epoch": 1.31, + "learning_rate": 1.574906337685953e-05, + "loss": 0.3226, + "step": 28135 + }, + { + "epoch": 1.31, + "learning_rate": 1.5748279591804745e-05, + "loss": 0.063, + "step": 28140 + }, + { + "epoch": 1.31, + "learning_rate": 1.574749580674996e-05, + "loss": 0.0538, + "step": 28145 + }, + { + "epoch": 1.31, + "learning_rate": 1.5746712021695172e-05, + "loss": 0.0772, + "step": 28150 + }, + { + "epoch": 1.31, + "learning_rate": 1.5745928236640383e-05, + "loss": 0.1166, + "step": 28155 + }, + { + "epoch": 1.31, + "learning_rate": 1.57451444515856e-05, + "loss": 0.1212, + "step": 28160 + }, + { + "epoch": 1.31, + "learning_rate": 1.574436066653081e-05, + "loss": 0.1134, + "step": 28165 + }, + { + "epoch": 1.31, + "learning_rate": 1.5743576881476025e-05, + "loss": 0.2492, + "step": 28170 + }, + { + "epoch": 1.31, + "learning_rate": 1.574279309642124e-05, + "loss": 0.3189, + "step": 28175 + }, + { + "epoch": 1.31, + "learning_rate": 1.5742009311366453e-05, + "loss": 0.2254, + "step": 28180 + }, + { + "epoch": 1.32, + "learning_rate": 1.5741225526311666e-05, + "loss": 0.2818, + "step": 28185 + }, + { + "epoch": 1.32, + "learning_rate": 1.574044174125688e-05, + "loss": 0.0439, + "step": 28190 + }, + { + "epoch": 1.32, + "learning_rate": 1.573965795620209e-05, + "loss": 0.1292, + "step": 28195 + }, + { + "epoch": 1.32, + "learning_rate": 1.5738874171147305e-05, + "loss": 0.1012, + "step": 28200 + }, + { + "epoch": 1.32, + "learning_rate": 1.573809038609252e-05, + "loss": 0.08, + "step": 28205 + }, + { + "epoch": 1.32, + "learning_rate": 1.5737306601037733e-05, + "loss": 0.1199, + "step": 28210 + }, + { + "epoch": 1.32, + "learning_rate": 1.5736522815982946e-05, + "loss": 0.2438, + "step": 28215 + }, + { + "epoch": 1.32, + "learning_rate": 1.573573903092816e-05, + "loss": 0.0924, + "step": 28220 + }, + { + "epoch": 1.32, + "learning_rate": 1.5734955245873374e-05, + "loss": 0.2114, + "step": 28225 + }, + { + "epoch": 1.32, + "learning_rate": 1.5734171460818585e-05, + "loss": 0.323, + "step": 28230 + }, + { + "epoch": 1.32, + "learning_rate": 1.5733387675763802e-05, + "loss": 0.2703, + "step": 28235 + }, + { + "epoch": 1.32, + "learning_rate": 1.5732603890709013e-05, + "loss": 0.0825, + "step": 28240 + }, + { + "epoch": 1.32, + "learning_rate": 1.5731820105654227e-05, + "loss": 0.0734, + "step": 28245 + }, + { + "epoch": 1.32, + "learning_rate": 1.573103632059944e-05, + "loss": 0.0782, + "step": 28250 + }, + { + "epoch": 1.32, + "learning_rate": 1.5730252535544654e-05, + "loss": 0.1712, + "step": 28255 + }, + { + "epoch": 1.32, + "learning_rate": 1.5729468750489868e-05, + "loss": 0.1776, + "step": 28260 + }, + { + "epoch": 1.32, + "learning_rate": 1.572868496543508e-05, + "loss": 0.1425, + "step": 28265 + }, + { + "epoch": 1.32, + "learning_rate": 1.5727901180380293e-05, + "loss": 0.2537, + "step": 28270 + }, + { + "epoch": 1.32, + "learning_rate": 1.5727117395325507e-05, + "loss": 0.2227, + "step": 28275 + }, + { + "epoch": 1.32, + "learning_rate": 1.572633361027072e-05, + "loss": 0.4145, + "step": 28280 + }, + { + "epoch": 1.32, + "learning_rate": 1.5725549825215934e-05, + "loss": 0.4865, + "step": 28285 + }, + { + "epoch": 1.32, + "learning_rate": 1.5724766040161148e-05, + "loss": 0.0633, + "step": 28290 + }, + { + "epoch": 1.32, + "learning_rate": 1.572398225510636e-05, + "loss": 0.0838, + "step": 28295 + }, + { + "epoch": 1.32, + "learning_rate": 1.5723198470051576e-05, + "loss": 0.1071, + "step": 28300 + }, + { + "epoch": 1.32, + "learning_rate": 1.5722414684996787e-05, + "loss": 0.1015, + "step": 28305 + }, + { + "epoch": 1.32, + "learning_rate": 1.5721630899942e-05, + "loss": 0.0966, + "step": 28310 + }, + { + "epoch": 1.32, + "learning_rate": 1.5720847114887214e-05, + "loss": 0.2073, + "step": 28315 + }, + { + "epoch": 1.32, + "learning_rate": 1.572006332983243e-05, + "loss": 0.1782, + "step": 28320 + }, + { + "epoch": 1.32, + "learning_rate": 1.5719279544777642e-05, + "loss": 0.2457, + "step": 28325 + }, + { + "epoch": 1.32, + "learning_rate": 1.5718495759722853e-05, + "loss": 0.3423, + "step": 28330 + }, + { + "epoch": 1.32, + "learning_rate": 1.571771197466807e-05, + "loss": 0.3075, + "step": 28335 + }, + { + "epoch": 1.32, + "learning_rate": 1.571692818961328e-05, + "loss": 0.0612, + "step": 28340 + }, + { + "epoch": 1.32, + "learning_rate": 1.5716144404558494e-05, + "loss": 0.087, + "step": 28345 + }, + { + "epoch": 1.32, + "learning_rate": 1.571536061950371e-05, + "loss": 0.0356, + "step": 28350 + }, + { + "epoch": 1.32, + "learning_rate": 1.5714576834448922e-05, + "loss": 0.1484, + "step": 28355 + }, + { + "epoch": 1.32, + "learning_rate": 1.5713793049394136e-05, + "loss": 0.1245, + "step": 28360 + }, + { + "epoch": 1.32, + "learning_rate": 1.571300926433935e-05, + "loss": 0.117, + "step": 28365 + }, + { + "epoch": 1.32, + "learning_rate": 1.571222547928456e-05, + "loss": 0.187, + "step": 28370 + }, + { + "epoch": 1.32, + "learning_rate": 1.5711441694229778e-05, + "loss": 0.3141, + "step": 28375 + }, + { + "epoch": 1.32, + "learning_rate": 1.571065790917499e-05, + "loss": 0.3338, + "step": 28380 + }, + { + "epoch": 1.32, + "learning_rate": 1.5709874124120202e-05, + "loss": 0.3198, + "step": 28385 + }, + { + "epoch": 1.32, + "learning_rate": 1.5709090339065416e-05, + "loss": 0.0383, + "step": 28390 + }, + { + "epoch": 1.32, + "learning_rate": 1.5708306554010627e-05, + "loss": 0.1249, + "step": 28395 + }, + { + "epoch": 1.33, + "learning_rate": 1.5707522768955844e-05, + "loss": 0.0838, + "step": 28400 + }, + { + "epoch": 1.33, + "learning_rate": 1.5706738983901055e-05, + "loss": 0.1157, + "step": 28405 + }, + { + "epoch": 1.33, + "learning_rate": 1.570595519884627e-05, + "loss": 0.1878, + "step": 28410 + }, + { + "epoch": 1.33, + "learning_rate": 1.5705171413791482e-05, + "loss": 0.1487, + "step": 28415 + }, + { + "epoch": 1.33, + "learning_rate": 1.5704387628736696e-05, + "loss": 0.1422, + "step": 28420 + }, + { + "epoch": 1.33, + "learning_rate": 1.570360384368191e-05, + "loss": 0.1978, + "step": 28425 + }, + { + "epoch": 1.33, + "learning_rate": 1.5702820058627124e-05, + "loss": 0.3651, + "step": 28430 + }, + { + "epoch": 1.33, + "learning_rate": 1.5702036273572338e-05, + "loss": 0.2579, + "step": 28435 + }, + { + "epoch": 1.33, + "learning_rate": 1.5701252488517552e-05, + "loss": 0.0636, + "step": 28440 + }, + { + "epoch": 1.33, + "learning_rate": 1.5700468703462762e-05, + "loss": 0.069, + "step": 28445 + }, + { + "epoch": 1.33, + "learning_rate": 1.569968491840798e-05, + "loss": 0.0931, + "step": 28450 + }, + { + "epoch": 1.33, + "learning_rate": 1.569890113335319e-05, + "loss": 0.1109, + "step": 28455 + }, + { + "epoch": 1.33, + "learning_rate": 1.5698117348298404e-05, + "loss": 0.0914, + "step": 28460 + }, + { + "epoch": 1.33, + "learning_rate": 1.5697333563243618e-05, + "loss": 0.1079, + "step": 28465 + }, + { + "epoch": 1.33, + "learning_rate": 1.569654977818883e-05, + "loss": 0.2067, + "step": 28470 + }, + { + "epoch": 1.33, + "learning_rate": 1.5695765993134046e-05, + "loss": 0.2517, + "step": 28475 + }, + { + "epoch": 1.33, + "learning_rate": 1.5694982208079256e-05, + "loss": 0.3391, + "step": 28480 + }, + { + "epoch": 1.33, + "learning_rate": 1.569419842302447e-05, + "loss": 0.1997, + "step": 28485 + }, + { + "epoch": 1.33, + "learning_rate": 1.5693414637969684e-05, + "loss": 0.0238, + "step": 28490 + }, + { + "epoch": 1.33, + "learning_rate": 1.5692630852914898e-05, + "loss": 0.0778, + "step": 28495 + }, + { + "epoch": 1.33, + "learning_rate": 1.5691847067860112e-05, + "loss": 0.0956, + "step": 28500 + }, + { + "epoch": 1.33, + "learning_rate": 1.5691063282805326e-05, + "loss": 0.1695, + "step": 28505 + }, + { + "epoch": 1.33, + "learning_rate": 1.5690279497750536e-05, + "loss": 0.0927, + "step": 28510 + }, + { + "epoch": 1.33, + "learning_rate": 1.5689495712695754e-05, + "loss": 0.109, + "step": 28515 + }, + { + "epoch": 1.33, + "learning_rate": 1.5688711927640964e-05, + "loss": 0.2123, + "step": 28520 + }, + { + "epoch": 1.33, + "learning_rate": 1.5687928142586178e-05, + "loss": 0.1514, + "step": 28525 + }, + { + "epoch": 1.33, + "learning_rate": 1.5687144357531392e-05, + "loss": 0.4057, + "step": 28530 + }, + { + "epoch": 1.33, + "learning_rate": 1.5686360572476606e-05, + "loss": 0.3171, + "step": 28535 + }, + { + "epoch": 1.33, + "learning_rate": 1.568557678742182e-05, + "loss": 0.0878, + "step": 28540 + }, + { + "epoch": 1.33, + "learning_rate": 1.568479300236703e-05, + "loss": 0.0565, + "step": 28545 + }, + { + "epoch": 1.33, + "learning_rate": 1.5684009217312248e-05, + "loss": 0.1183, + "step": 28550 + }, + { + "epoch": 1.33, + "learning_rate": 1.5683225432257458e-05, + "loss": 0.1481, + "step": 28555 + }, + { + "epoch": 1.33, + "learning_rate": 1.5682441647202672e-05, + "loss": 0.1402, + "step": 28560 + }, + { + "epoch": 1.33, + "learning_rate": 1.5681657862147886e-05, + "loss": 0.1809, + "step": 28565 + }, + { + "epoch": 1.33, + "learning_rate": 1.56808740770931e-05, + "loss": 0.2217, + "step": 28570 + }, + { + "epoch": 1.33, + "learning_rate": 1.5680090292038314e-05, + "loss": 0.3258, + "step": 28575 + }, + { + "epoch": 1.33, + "learning_rate": 1.5679306506983528e-05, + "loss": 0.2286, + "step": 28580 + }, + { + "epoch": 1.33, + "learning_rate": 1.5678522721928738e-05, + "loss": 0.2848, + "step": 28585 + }, + { + "epoch": 1.33, + "learning_rate": 1.5677738936873952e-05, + "loss": 0.0547, + "step": 28590 + }, + { + "epoch": 1.33, + "learning_rate": 1.5676955151819166e-05, + "loss": 0.0618, + "step": 28595 + }, + { + "epoch": 1.33, + "learning_rate": 1.567617136676438e-05, + "loss": 0.0941, + "step": 28600 + }, + { + "epoch": 1.33, + "learning_rate": 1.5675387581709594e-05, + "loss": 0.1233, + "step": 28605 + }, + { + "epoch": 1.33, + "learning_rate": 1.5674603796654804e-05, + "loss": 0.1848, + "step": 28610 + }, + { + "epoch": 1.34, + "learning_rate": 1.567382001160002e-05, + "loss": 0.0776, + "step": 28615 + }, + { + "epoch": 1.34, + "learning_rate": 1.5673036226545232e-05, + "loss": 0.248, + "step": 28620 + }, + { + "epoch": 1.34, + "learning_rate": 1.5672252441490446e-05, + "loss": 0.232, + "step": 28625 + }, + { + "epoch": 1.34, + "learning_rate": 1.567146865643566e-05, + "loss": 0.4364, + "step": 28630 + }, + { + "epoch": 1.34, + "learning_rate": 1.5670684871380874e-05, + "loss": 0.2168, + "step": 28635 + }, + { + "epoch": 1.34, + "learning_rate": 1.5669901086326088e-05, + "loss": 0.049, + "step": 28640 + }, + { + "epoch": 1.34, + "learning_rate": 1.5669117301271302e-05, + "loss": 0.0206, + "step": 28645 + }, + { + "epoch": 1.34, + "learning_rate": 1.5668333516216516e-05, + "loss": 0.1098, + "step": 28650 + }, + { + "epoch": 1.34, + "learning_rate": 1.5667549731161726e-05, + "loss": 0.1082, + "step": 28655 + }, + { + "epoch": 1.34, + "learning_rate": 1.566676594610694e-05, + "loss": 0.1286, + "step": 28660 + }, + { + "epoch": 1.34, + "learning_rate": 1.5665982161052154e-05, + "loss": 0.2045, + "step": 28665 + }, + { + "epoch": 1.34, + "learning_rate": 1.5665198375997368e-05, + "loss": 0.1718, + "step": 28670 + }, + { + "epoch": 1.34, + "learning_rate": 1.5664414590942582e-05, + "loss": 0.3584, + "step": 28675 + }, + { + "epoch": 1.34, + "learning_rate": 1.5663630805887796e-05, + "loss": 0.4569, + "step": 28680 + }, + { + "epoch": 1.34, + "learning_rate": 1.5662847020833006e-05, + "loss": 0.3119, + "step": 28685 + }, + { + "epoch": 1.34, + "learning_rate": 1.5662063235778223e-05, + "loss": 0.0559, + "step": 28690 + }, + { + "epoch": 1.34, + "learning_rate": 1.5661279450723434e-05, + "loss": 0.0382, + "step": 28695 + }, + { + "epoch": 1.34, + "learning_rate": 1.5660495665668648e-05, + "loss": 0.1046, + "step": 28700 + }, + { + "epoch": 1.34, + "learning_rate": 1.5659711880613862e-05, + "loss": 0.1162, + "step": 28705 + }, + { + "epoch": 1.34, + "learning_rate": 1.5658928095559076e-05, + "loss": 0.1435, + "step": 28710 + }, + { + "epoch": 1.34, + "learning_rate": 1.565814431050429e-05, + "loss": 0.203, + "step": 28715 + }, + { + "epoch": 1.34, + "learning_rate": 1.56573605254495e-05, + "loss": 0.2438, + "step": 28720 + }, + { + "epoch": 1.34, + "learning_rate": 1.5656576740394714e-05, + "loss": 0.214, + "step": 28725 + }, + { + "epoch": 1.34, + "learning_rate": 1.5655792955339928e-05, + "loss": 0.369, + "step": 28730 + }, + { + "epoch": 1.34, + "learning_rate": 1.5655009170285142e-05, + "loss": 0.2569, + "step": 28735 + }, + { + "epoch": 1.34, + "learning_rate": 1.5654225385230356e-05, + "loss": 0.0718, + "step": 28740 + }, + { + "epoch": 1.34, + "learning_rate": 1.565344160017557e-05, + "loss": 0.1807, + "step": 28745 + }, + { + "epoch": 1.34, + "learning_rate": 1.5652657815120784e-05, + "loss": 0.0907, + "step": 28750 + }, + { + "epoch": 1.34, + "learning_rate": 1.5651874030065997e-05, + "loss": 0.1447, + "step": 28755 + }, + { + "epoch": 1.34, + "learning_rate": 1.5651090245011208e-05, + "loss": 0.1166, + "step": 28760 + }, + { + "epoch": 1.34, + "learning_rate": 1.5650306459956425e-05, + "loss": 0.1564, + "step": 28765 + }, + { + "epoch": 1.34, + "learning_rate": 1.5649522674901636e-05, + "loss": 0.2352, + "step": 28770 + }, + { + "epoch": 1.34, + "learning_rate": 1.564873888984685e-05, + "loss": 0.2784, + "step": 28775 + }, + { + "epoch": 1.34, + "learning_rate": 1.5647955104792064e-05, + "loss": 0.3881, + "step": 28780 + }, + { + "epoch": 1.34, + "learning_rate": 1.5647171319737274e-05, + "loss": 0.2386, + "step": 28785 + }, + { + "epoch": 1.34, + "learning_rate": 1.564638753468249e-05, + "loss": 0.0621, + "step": 28790 + }, + { + "epoch": 1.34, + "learning_rate": 1.5645603749627702e-05, + "loss": 0.0444, + "step": 28795 + }, + { + "epoch": 1.34, + "learning_rate": 1.5644819964572916e-05, + "loss": 0.0656, + "step": 28800 + }, + { + "epoch": 1.34, + "learning_rate": 1.564403617951813e-05, + "loss": 0.0765, + "step": 28805 + }, + { + "epoch": 1.34, + "learning_rate": 1.5643252394463344e-05, + "loss": 0.2032, + "step": 28810 + }, + { + "epoch": 1.34, + "learning_rate": 1.5642468609408558e-05, + "loss": 0.1976, + "step": 28815 + }, + { + "epoch": 1.34, + "learning_rate": 1.564168482435377e-05, + "loss": 0.1735, + "step": 28820 + }, + { + "epoch": 1.35, + "learning_rate": 1.5640901039298982e-05, + "loss": 0.2373, + "step": 28825 + }, + { + "epoch": 1.35, + "learning_rate": 1.56401172542442e-05, + "loss": 0.3012, + "step": 28830 + }, + { + "epoch": 1.35, + "learning_rate": 1.563933346918941e-05, + "loss": 0.3411, + "step": 28835 + }, + { + "epoch": 1.35, + "learning_rate": 1.5638549684134624e-05, + "loss": 0.0834, + "step": 28840 + }, + { + "epoch": 1.35, + "learning_rate": 1.5637765899079838e-05, + "loss": 0.0505, + "step": 28845 + }, + { + "epoch": 1.35, + "learning_rate": 1.563698211402505e-05, + "loss": 0.076, + "step": 28850 + }, + { + "epoch": 1.35, + "learning_rate": 1.5636198328970265e-05, + "loss": 0.0456, + "step": 28855 + }, + { + "epoch": 1.35, + "learning_rate": 1.5635414543915476e-05, + "loss": 0.0746, + "step": 28860 + }, + { + "epoch": 1.35, + "learning_rate": 1.5634630758860693e-05, + "loss": 0.1093, + "step": 28865 + }, + { + "epoch": 1.35, + "learning_rate": 1.5633846973805904e-05, + "loss": 0.1907, + "step": 28870 + }, + { + "epoch": 1.35, + "learning_rate": 1.5633063188751118e-05, + "loss": 0.1332, + "step": 28875 + }, + { + "epoch": 1.35, + "learning_rate": 1.563227940369633e-05, + "loss": 0.3031, + "step": 28880 + }, + { + "epoch": 1.35, + "learning_rate": 1.5631495618641545e-05, + "loss": 0.3297, + "step": 28885 + }, + { + "epoch": 1.35, + "learning_rate": 1.563071183358676e-05, + "loss": 0.0652, + "step": 28890 + }, + { + "epoch": 1.35, + "learning_rate": 1.5629928048531973e-05, + "loss": 0.0844, + "step": 28895 + }, + { + "epoch": 1.35, + "learning_rate": 1.5629144263477184e-05, + "loss": 0.1097, + "step": 28900 + }, + { + "epoch": 1.35, + "learning_rate": 1.56283604784224e-05, + "loss": 0.115, + "step": 28905 + }, + { + "epoch": 1.35, + "learning_rate": 1.562757669336761e-05, + "loss": 0.1077, + "step": 28910 + }, + { + "epoch": 1.35, + "learning_rate": 1.5626792908312826e-05, + "loss": 0.1605, + "step": 28915 + }, + { + "epoch": 1.35, + "learning_rate": 1.562600912325804e-05, + "loss": 0.2131, + "step": 28920 + }, + { + "epoch": 1.35, + "learning_rate": 1.562522533820325e-05, + "loss": 0.1889, + "step": 28925 + }, + { + "epoch": 1.35, + "learning_rate": 1.5624441553148467e-05, + "loss": 0.3687, + "step": 28930 + }, + { + "epoch": 1.35, + "learning_rate": 1.5623657768093678e-05, + "loss": 0.2025, + "step": 28935 + }, + { + "epoch": 1.35, + "learning_rate": 1.562287398303889e-05, + "loss": 0.0673, + "step": 28940 + }, + { + "epoch": 1.35, + "learning_rate": 1.5622090197984106e-05, + "loss": 0.0705, + "step": 28945 + }, + { + "epoch": 1.35, + "learning_rate": 1.562130641292932e-05, + "loss": 0.1058, + "step": 28950 + }, + { + "epoch": 1.35, + "learning_rate": 1.5620522627874533e-05, + "loss": 0.1207, + "step": 28955 + }, + { + "epoch": 1.35, + "learning_rate": 1.5619738842819747e-05, + "loss": 0.1275, + "step": 28960 + }, + { + "epoch": 1.35, + "learning_rate": 1.561895505776496e-05, + "loss": 0.2169, + "step": 28965 + }, + { + "epoch": 1.35, + "learning_rate": 1.5618171272710175e-05, + "loss": 0.1789, + "step": 28970 + }, + { + "epoch": 1.35, + "learning_rate": 1.5617387487655386e-05, + "loss": 0.2378, + "step": 28975 + }, + { + "epoch": 1.35, + "learning_rate": 1.56166037026006e-05, + "loss": 0.4098, + "step": 28980 + }, + { + "epoch": 1.35, + "learning_rate": 1.5615819917545813e-05, + "loss": 0.2499, + "step": 28985 + }, + { + "epoch": 1.35, + "learning_rate": 1.5615036132491027e-05, + "loss": 0.0793, + "step": 28990 + }, + { + "epoch": 1.35, + "learning_rate": 1.561425234743624e-05, + "loss": 0.067, + "step": 28995 + }, + { + "epoch": 1.35, + "learning_rate": 1.5613468562381452e-05, + "loss": 0.0746, + "step": 29000 + }, + { + "epoch": 1.35, + "learning_rate": 1.561268477732667e-05, + "loss": 0.2106, + "step": 29005 + }, + { + "epoch": 1.35, + "learning_rate": 1.561190099227188e-05, + "loss": 0.1199, + "step": 29010 + }, + { + "epoch": 1.35, + "learning_rate": 1.5611117207217093e-05, + "loss": 0.0969, + "step": 29015 + }, + { + "epoch": 1.35, + "learning_rate": 1.5610333422162307e-05, + "loss": 0.3277, + "step": 29020 + }, + { + "epoch": 1.35, + "learning_rate": 1.560954963710752e-05, + "loss": 0.2931, + "step": 29025 + }, + { + "epoch": 1.35, + "learning_rate": 1.5608765852052735e-05, + "loss": 0.4393, + "step": 29030 + }, + { + "epoch": 1.35, + "learning_rate": 1.560798206699795e-05, + "loss": 0.4498, + "step": 29035 + }, + { + "epoch": 1.36, + "learning_rate": 1.560719828194316e-05, + "loss": 0.0525, + "step": 29040 + }, + { + "epoch": 1.36, + "learning_rate": 1.5606414496888374e-05, + "loss": 0.0493, + "step": 29045 + }, + { + "epoch": 1.36, + "learning_rate": 1.5605630711833587e-05, + "loss": 0.0895, + "step": 29050 + }, + { + "epoch": 1.36, + "learning_rate": 1.56048469267788e-05, + "loss": 0.1352, + "step": 29055 + }, + { + "epoch": 1.36, + "learning_rate": 1.5604063141724015e-05, + "loss": 0.1055, + "step": 29060 + }, + { + "epoch": 1.36, + "learning_rate": 1.560327935666923e-05, + "loss": 0.2887, + "step": 29065 + }, + { + "epoch": 1.36, + "learning_rate": 1.5602495571614443e-05, + "loss": 0.1776, + "step": 29070 + }, + { + "epoch": 1.36, + "learning_rate": 1.5601711786559654e-05, + "loss": 0.1566, + "step": 29075 + }, + { + "epoch": 1.36, + "learning_rate": 1.560092800150487e-05, + "loss": 0.2249, + "step": 29080 + }, + { + "epoch": 1.36, + "learning_rate": 1.560014421645008e-05, + "loss": 0.4125, + "step": 29085 + }, + { + "epoch": 1.36, + "learning_rate": 1.5599360431395295e-05, + "loss": 0.1064, + "step": 29090 + }, + { + "epoch": 1.36, + "learning_rate": 1.559857664634051e-05, + "loss": 0.1015, + "step": 29095 + }, + { + "epoch": 1.36, + "learning_rate": 1.5597792861285723e-05, + "loss": 0.0806, + "step": 29100 + }, + { + "epoch": 1.36, + "learning_rate": 1.5597009076230937e-05, + "loss": 0.1206, + "step": 29105 + }, + { + "epoch": 1.36, + "learning_rate": 1.5596225291176148e-05, + "loss": 0.1109, + "step": 29110 + }, + { + "epoch": 1.36, + "learning_rate": 1.559544150612136e-05, + "loss": 0.1305, + "step": 29115 + }, + { + "epoch": 1.36, + "learning_rate": 1.5594657721066575e-05, + "loss": 0.2071, + "step": 29120 + }, + { + "epoch": 1.36, + "learning_rate": 1.559387393601179e-05, + "loss": 0.1554, + "step": 29125 + }, + { + "epoch": 1.36, + "learning_rate": 1.5593090150957003e-05, + "loss": 0.5038, + "step": 29130 + }, + { + "epoch": 1.36, + "learning_rate": 1.5592306365902217e-05, + "loss": 0.3296, + "step": 29135 + }, + { + "epoch": 1.36, + "learning_rate": 1.5591522580847428e-05, + "loss": 0.1192, + "step": 29140 + }, + { + "epoch": 1.36, + "learning_rate": 1.5590738795792645e-05, + "loss": 0.1061, + "step": 29145 + }, + { + "epoch": 1.36, + "learning_rate": 1.5589955010737855e-05, + "loss": 0.0826, + "step": 29150 + }, + { + "epoch": 1.36, + "learning_rate": 1.558917122568307e-05, + "loss": 0.1131, + "step": 29155 + }, + { + "epoch": 1.36, + "learning_rate": 1.5588387440628283e-05, + "loss": 0.1495, + "step": 29160 + }, + { + "epoch": 1.36, + "learning_rate": 1.5587603655573497e-05, + "loss": 0.2157, + "step": 29165 + }, + { + "epoch": 1.36, + "learning_rate": 1.558681987051871e-05, + "loss": 0.2206, + "step": 29170 + }, + { + "epoch": 1.36, + "learning_rate": 1.558603608546392e-05, + "loss": 0.2772, + "step": 29175 + }, + { + "epoch": 1.36, + "learning_rate": 1.558525230040914e-05, + "loss": 0.5025, + "step": 29180 + }, + { + "epoch": 1.36, + "learning_rate": 1.558446851535435e-05, + "loss": 0.3455, + "step": 29185 + }, + { + "epoch": 1.36, + "learning_rate": 1.5583684730299563e-05, + "loss": 0.0408, + "step": 29190 + }, + { + "epoch": 1.36, + "learning_rate": 1.5582900945244777e-05, + "loss": 0.0804, + "step": 29195 + }, + { + "epoch": 1.36, + "learning_rate": 1.558211716018999e-05, + "loss": 0.1028, + "step": 29200 + }, + { + "epoch": 1.36, + "learning_rate": 1.5581333375135205e-05, + "loss": 0.0895, + "step": 29205 + }, + { + "epoch": 1.36, + "learning_rate": 1.558054959008042e-05, + "loss": 0.151, + "step": 29210 + }, + { + "epoch": 1.36, + "learning_rate": 1.557976580502563e-05, + "loss": 0.1078, + "step": 29215 + }, + { + "epoch": 1.36, + "learning_rate": 1.5578982019970847e-05, + "loss": 0.287, + "step": 29220 + }, + { + "epoch": 1.36, + "learning_rate": 1.5578198234916057e-05, + "loss": 0.2512, + "step": 29225 + }, + { + "epoch": 1.36, + "learning_rate": 1.557741444986127e-05, + "loss": 0.3908, + "step": 29230 + }, + { + "epoch": 1.36, + "learning_rate": 1.5576630664806485e-05, + "loss": 0.3252, + "step": 29235 + }, + { + "epoch": 1.36, + "learning_rate": 1.5575846879751695e-05, + "loss": 0.0572, + "step": 29240 + }, + { + "epoch": 1.36, + "learning_rate": 1.5575063094696913e-05, + "loss": 0.0728, + "step": 29245 + }, + { + "epoch": 1.36, + "learning_rate": 1.5574279309642123e-05, + "loss": 0.1044, + "step": 29250 + }, + { + "epoch": 1.37, + "learning_rate": 1.5573495524587337e-05, + "loss": 0.1306, + "step": 29255 + }, + { + "epoch": 1.37, + "learning_rate": 1.557271173953255e-05, + "loss": 0.1469, + "step": 29260 + }, + { + "epoch": 1.37, + "learning_rate": 1.5571927954477765e-05, + "loss": 0.1307, + "step": 29265 + }, + { + "epoch": 1.37, + "learning_rate": 1.557114416942298e-05, + "loss": 0.2949, + "step": 29270 + }, + { + "epoch": 1.37, + "learning_rate": 1.5570360384368193e-05, + "loss": 0.1483, + "step": 29275 + }, + { + "epoch": 1.37, + "learning_rate": 1.5569576599313407e-05, + "loss": 0.3853, + "step": 29280 + }, + { + "epoch": 1.37, + "learning_rate": 1.556879281425862e-05, + "loss": 0.2308, + "step": 29285 + }, + { + "epoch": 1.37, + "learning_rate": 1.556800902920383e-05, + "loss": 0.0336, + "step": 29290 + }, + { + "epoch": 1.37, + "learning_rate": 1.556722524414905e-05, + "loss": 0.0368, + "step": 29295 + }, + { + "epoch": 1.37, + "learning_rate": 1.556644145909426e-05, + "loss": 0.0633, + "step": 29300 + }, + { + "epoch": 1.37, + "learning_rate": 1.5565657674039473e-05, + "loss": 0.1041, + "step": 29305 + }, + { + "epoch": 1.37, + "learning_rate": 1.5564873888984687e-05, + "loss": 0.2455, + "step": 29310 + }, + { + "epoch": 1.37, + "learning_rate": 1.5564090103929897e-05, + "loss": 0.128, + "step": 29315 + }, + { + "epoch": 1.37, + "learning_rate": 1.5563306318875115e-05, + "loss": 0.1876, + "step": 29320 + }, + { + "epoch": 1.37, + "learning_rate": 1.5562522533820325e-05, + "loss": 0.1958, + "step": 29325 + }, + { + "epoch": 1.37, + "learning_rate": 1.556173874876554e-05, + "loss": 0.3136, + "step": 29330 + }, + { + "epoch": 1.37, + "learning_rate": 1.5560954963710753e-05, + "loss": 0.3116, + "step": 29335 + }, + { + "epoch": 1.37, + "learning_rate": 1.5560171178655967e-05, + "loss": 0.0619, + "step": 29340 + }, + { + "epoch": 1.37, + "learning_rate": 1.555938739360118e-05, + "loss": 0.0497, + "step": 29345 + }, + { + "epoch": 1.37, + "learning_rate": 1.5558603608546395e-05, + "loss": 0.1101, + "step": 29350 + }, + { + "epoch": 1.37, + "learning_rate": 1.5557819823491605e-05, + "loss": 0.1347, + "step": 29355 + }, + { + "epoch": 1.37, + "learning_rate": 1.5557036038436822e-05, + "loss": 0.14, + "step": 29360 + }, + { + "epoch": 1.37, + "learning_rate": 1.5556252253382033e-05, + "loss": 0.2094, + "step": 29365 + }, + { + "epoch": 1.37, + "learning_rate": 1.5555468468327247e-05, + "loss": 0.2115, + "step": 29370 + }, + { + "epoch": 1.37, + "learning_rate": 1.555468468327246e-05, + "loss": 0.23, + "step": 29375 + }, + { + "epoch": 1.37, + "learning_rate": 1.5553900898217675e-05, + "loss": 0.409, + "step": 29380 + }, + { + "epoch": 1.37, + "learning_rate": 1.555311711316289e-05, + "loss": 0.2575, + "step": 29385 + }, + { + "epoch": 1.37, + "learning_rate": 1.55523333281081e-05, + "loss": 0.0701, + "step": 29390 + }, + { + "epoch": 1.37, + "learning_rate": 1.5551549543053316e-05, + "loss": 0.0587, + "step": 29395 + }, + { + "epoch": 1.37, + "learning_rate": 1.5550765757998527e-05, + "loss": 0.1614, + "step": 29400 + }, + { + "epoch": 1.37, + "learning_rate": 1.554998197294374e-05, + "loss": 0.1066, + "step": 29405 + }, + { + "epoch": 1.37, + "learning_rate": 1.5549198187888955e-05, + "loss": 0.1333, + "step": 29410 + }, + { + "epoch": 1.37, + "learning_rate": 1.554841440283417e-05, + "loss": 0.181, + "step": 29415 + }, + { + "epoch": 1.37, + "learning_rate": 1.5547630617779383e-05, + "loss": 0.183, + "step": 29420 + }, + { + "epoch": 1.37, + "learning_rate": 1.5546846832724596e-05, + "loss": 0.2203, + "step": 29425 + }, + { + "epoch": 1.37, + "learning_rate": 1.5546063047669807e-05, + "loss": 0.4626, + "step": 29430 + }, + { + "epoch": 1.37, + "learning_rate": 1.554527926261502e-05, + "loss": 0.2009, + "step": 29435 + }, + { + "epoch": 1.37, + "learning_rate": 1.5544495477560235e-05, + "loss": 0.0562, + "step": 29440 + }, + { + "epoch": 1.37, + "learning_rate": 1.554371169250545e-05, + "loss": 0.1155, + "step": 29445 + }, + { + "epoch": 1.37, + "learning_rate": 1.5542927907450663e-05, + "loss": 0.0612, + "step": 29450 + }, + { + "epoch": 1.37, + "learning_rate": 1.5542144122395873e-05, + "loss": 0.1157, + "step": 29455 + }, + { + "epoch": 1.37, + "learning_rate": 1.554136033734109e-05, + "loss": 0.1621, + "step": 29460 + }, + { + "epoch": 1.37, + "learning_rate": 1.55405765522863e-05, + "loss": 0.0789, + "step": 29465 + }, + { + "epoch": 1.38, + "learning_rate": 1.5539792767231515e-05, + "loss": 0.2351, + "step": 29470 + }, + { + "epoch": 1.38, + "learning_rate": 1.553900898217673e-05, + "loss": 0.2513, + "step": 29475 + }, + { + "epoch": 1.38, + "learning_rate": 1.5538225197121943e-05, + "loss": 0.3753, + "step": 29480 + }, + { + "epoch": 1.38, + "learning_rate": 1.5537441412067157e-05, + "loss": 0.2661, + "step": 29485 + }, + { + "epoch": 1.38, + "learning_rate": 1.553665762701237e-05, + "loss": 0.1098, + "step": 29490 + }, + { + "epoch": 1.38, + "learning_rate": 1.5535873841957584e-05, + "loss": 0.0493, + "step": 29495 + }, + { + "epoch": 1.38, + "learning_rate": 1.5535090056902795e-05, + "loss": 0.1339, + "step": 29500 + }, + { + "epoch": 1.38, + "learning_rate": 1.553430627184801e-05, + "loss": 0.1432, + "step": 29505 + }, + { + "epoch": 1.38, + "learning_rate": 1.5533522486793223e-05, + "loss": 0.107, + "step": 29510 + }, + { + "epoch": 1.38, + "learning_rate": 1.5532738701738437e-05, + "loss": 0.1993, + "step": 29515 + }, + { + "epoch": 1.38, + "learning_rate": 1.553195491668365e-05, + "loss": 0.3002, + "step": 29520 + }, + { + "epoch": 1.38, + "learning_rate": 1.5531171131628864e-05, + "loss": 0.2679, + "step": 29525 + }, + { + "epoch": 1.38, + "learning_rate": 1.5530387346574075e-05, + "loss": 0.3519, + "step": 29530 + }, + { + "epoch": 1.38, + "learning_rate": 1.5529603561519292e-05, + "loss": 0.2758, + "step": 29535 + }, + { + "epoch": 1.38, + "learning_rate": 1.5528819776464503e-05, + "loss": 0.1208, + "step": 29540 + }, + { + "epoch": 1.38, + "learning_rate": 1.5528035991409717e-05, + "loss": 0.0767, + "step": 29545 + }, + { + "epoch": 1.38, + "learning_rate": 1.552725220635493e-05, + "loss": 0.0603, + "step": 29550 + }, + { + "epoch": 1.38, + "learning_rate": 1.5526468421300144e-05, + "loss": 0.0691, + "step": 29555 + }, + { + "epoch": 1.38, + "learning_rate": 1.552568463624536e-05, + "loss": 0.1479, + "step": 29560 + }, + { + "epoch": 1.38, + "learning_rate": 1.552490085119057e-05, + "loss": 0.0891, + "step": 29565 + }, + { + "epoch": 1.38, + "learning_rate": 1.5524117066135783e-05, + "loss": 0.2104, + "step": 29570 + }, + { + "epoch": 1.38, + "learning_rate": 1.5523333281080997e-05, + "loss": 0.2748, + "step": 29575 + }, + { + "epoch": 1.38, + "learning_rate": 1.552254949602621e-05, + "loss": 0.3622, + "step": 29580 + }, + { + "epoch": 1.38, + "learning_rate": 1.5521765710971425e-05, + "loss": 0.252, + "step": 29585 + }, + { + "epoch": 1.38, + "learning_rate": 1.552098192591664e-05, + "loss": 0.079, + "step": 29590 + }, + { + "epoch": 1.38, + "learning_rate": 1.5520198140861852e-05, + "loss": 0.0913, + "step": 29595 + }, + { + "epoch": 1.38, + "learning_rate": 1.5519414355807066e-05, + "loss": 0.1742, + "step": 29600 + }, + { + "epoch": 1.38, + "learning_rate": 1.5518630570752277e-05, + "loss": 0.073, + "step": 29605 + }, + { + "epoch": 1.38, + "learning_rate": 1.5517846785697494e-05, + "loss": 0.091, + "step": 29610 + }, + { + "epoch": 1.38, + "learning_rate": 1.5517063000642705e-05, + "loss": 0.1321, + "step": 29615 + }, + { + "epoch": 1.38, + "learning_rate": 1.551627921558792e-05, + "loss": 0.1131, + "step": 29620 + }, + { + "epoch": 1.38, + "learning_rate": 1.5515495430533132e-05, + "loss": 0.3363, + "step": 29625 + }, + { + "epoch": 1.38, + "learning_rate": 1.5514711645478343e-05, + "loss": 0.3416, + "step": 29630 + }, + { + "epoch": 1.38, + "learning_rate": 1.551392786042356e-05, + "loss": 0.3418, + "step": 29635 + }, + { + "epoch": 1.38, + "learning_rate": 1.551314407536877e-05, + "loss": 0.0653, + "step": 29640 + }, + { + "epoch": 1.38, + "learning_rate": 1.5512360290313985e-05, + "loss": 0.0418, + "step": 29645 + }, + { + "epoch": 1.38, + "learning_rate": 1.55115765052592e-05, + "loss": 0.0941, + "step": 29650 + }, + { + "epoch": 1.38, + "learning_rate": 1.5510792720204412e-05, + "loss": 0.1759, + "step": 29655 + }, + { + "epoch": 1.38, + "learning_rate": 1.5510008935149626e-05, + "loss": 0.111, + "step": 29660 + }, + { + "epoch": 1.38, + "learning_rate": 1.550922515009484e-05, + "loss": 0.1351, + "step": 29665 + }, + { + "epoch": 1.38, + "learning_rate": 1.550844136504005e-05, + "loss": 0.1784, + "step": 29670 + }, + { + "epoch": 1.38, + "learning_rate": 1.5507657579985268e-05, + "loss": 0.2896, + "step": 29675 + }, + { + "epoch": 1.38, + "learning_rate": 1.550687379493048e-05, + "loss": 0.3232, + "step": 29680 + }, + { + "epoch": 1.39, + "learning_rate": 1.5506090009875692e-05, + "loss": 0.5113, + "step": 29685 + }, + { + "epoch": 1.39, + "learning_rate": 1.5505306224820906e-05, + "loss": 0.0731, + "step": 29690 + }, + { + "epoch": 1.39, + "learning_rate": 1.550452243976612e-05, + "loss": 0.0491, + "step": 29695 + }, + { + "epoch": 1.39, + "learning_rate": 1.5503738654711334e-05, + "loss": 0.1016, + "step": 29700 + }, + { + "epoch": 1.39, + "learning_rate": 1.5502954869656545e-05, + "loss": 0.0946, + "step": 29705 + }, + { + "epoch": 1.39, + "learning_rate": 1.5502171084601762e-05, + "loss": 0.1709, + "step": 29710 + }, + { + "epoch": 1.39, + "learning_rate": 1.5501387299546973e-05, + "loss": 0.2002, + "step": 29715 + }, + { + "epoch": 1.39, + "learning_rate": 1.5500603514492186e-05, + "loss": 0.2524, + "step": 29720 + }, + { + "epoch": 1.39, + "learning_rate": 1.54998197294374e-05, + "loss": 0.2481, + "step": 29725 + }, + { + "epoch": 1.39, + "learning_rate": 1.5499035944382614e-05, + "loss": 0.4631, + "step": 29730 + }, + { + "epoch": 1.39, + "learning_rate": 1.5498252159327828e-05, + "loss": 0.273, + "step": 29735 + }, + { + "epoch": 1.39, + "learning_rate": 1.5497468374273042e-05, + "loss": 0.0508, + "step": 29740 + }, + { + "epoch": 1.39, + "learning_rate": 1.5496684589218253e-05, + "loss": 0.1125, + "step": 29745 + }, + { + "epoch": 1.39, + "learning_rate": 1.549590080416347e-05, + "loss": 0.0327, + "step": 29750 + }, + { + "epoch": 1.39, + "learning_rate": 1.549511701910868e-05, + "loss": 0.1278, + "step": 29755 + }, + { + "epoch": 1.39, + "learning_rate": 1.5494333234053894e-05, + "loss": 0.0829, + "step": 29760 + }, + { + "epoch": 1.39, + "learning_rate": 1.5493549448999108e-05, + "loss": 0.1396, + "step": 29765 + }, + { + "epoch": 1.39, + "learning_rate": 1.549276566394432e-05, + "loss": 0.1894, + "step": 29770 + }, + { + "epoch": 1.39, + "learning_rate": 1.5491981878889536e-05, + "loss": 0.2046, + "step": 29775 + }, + { + "epoch": 1.39, + "learning_rate": 1.5491198093834747e-05, + "loss": 0.2948, + "step": 29780 + }, + { + "epoch": 1.39, + "learning_rate": 1.549041430877996e-05, + "loss": 0.3256, + "step": 29785 + }, + { + "epoch": 1.39, + "learning_rate": 1.5489630523725174e-05, + "loss": 0.09, + "step": 29790 + }, + { + "epoch": 1.39, + "learning_rate": 1.5488846738670388e-05, + "loss": 0.0646, + "step": 29795 + }, + { + "epoch": 1.39, + "learning_rate": 1.5488062953615602e-05, + "loss": 0.1158, + "step": 29800 + }, + { + "epoch": 1.39, + "learning_rate": 1.5487279168560816e-05, + "loss": 0.1228, + "step": 29805 + }, + { + "epoch": 1.39, + "learning_rate": 1.548649538350603e-05, + "loss": 0.1435, + "step": 29810 + }, + { + "epoch": 1.39, + "learning_rate": 1.5485711598451244e-05, + "loss": 0.1631, + "step": 29815 + }, + { + "epoch": 1.39, + "learning_rate": 1.5484927813396454e-05, + "loss": 0.0708, + "step": 29820 + }, + { + "epoch": 1.39, + "learning_rate": 1.5484144028341668e-05, + "loss": 0.4099, + "step": 29825 + }, + { + "epoch": 1.39, + "learning_rate": 1.5483360243286882e-05, + "loss": 0.4474, + "step": 29830 + }, + { + "epoch": 1.39, + "learning_rate": 1.5482576458232096e-05, + "loss": 0.3448, + "step": 29835 + }, + { + "epoch": 1.39, + "learning_rate": 1.548179267317731e-05, + "loss": 0.0606, + "step": 29840 + }, + { + "epoch": 1.39, + "learning_rate": 1.548100888812252e-05, + "loss": 0.1094, + "step": 29845 + }, + { + "epoch": 1.39, + "learning_rate": 1.5480225103067738e-05, + "loss": 0.1081, + "step": 29850 + }, + { + "epoch": 1.39, + "learning_rate": 1.547944131801295e-05, + "loss": 0.1361, + "step": 29855 + }, + { + "epoch": 1.39, + "learning_rate": 1.5478657532958162e-05, + "loss": 0.1611, + "step": 29860 + }, + { + "epoch": 1.39, + "learning_rate": 1.5477873747903376e-05, + "loss": 0.1986, + "step": 29865 + }, + { + "epoch": 1.39, + "learning_rate": 1.547708996284859e-05, + "loss": 0.1848, + "step": 29870 + }, + { + "epoch": 1.39, + "learning_rate": 1.5476306177793804e-05, + "loss": 0.3482, + "step": 29875 + }, + { + "epoch": 1.39, + "learning_rate": 1.5475522392739018e-05, + "loss": 0.4131, + "step": 29880 + }, + { + "epoch": 1.39, + "learning_rate": 1.547473860768423e-05, + "loss": 0.2946, + "step": 29885 + }, + { + "epoch": 1.39, + "learning_rate": 1.5473954822629442e-05, + "loss": 0.0356, + "step": 29890 + }, + { + "epoch": 1.39, + "learning_rate": 1.5473171037574656e-05, + "loss": 0.0547, + "step": 29895 + }, + { + "epoch": 1.4, + "learning_rate": 1.547238725251987e-05, + "loss": 0.1375, + "step": 29900 + }, + { + "epoch": 1.4, + "learning_rate": 1.5471603467465084e-05, + "loss": 0.119, + "step": 29905 + }, + { + "epoch": 1.4, + "learning_rate": 1.5470819682410298e-05, + "loss": 0.107, + "step": 29910 + }, + { + "epoch": 1.4, + "learning_rate": 1.5470035897355512e-05, + "loss": 0.1367, + "step": 29915 + }, + { + "epoch": 1.4, + "learning_rate": 1.5469252112300722e-05, + "loss": 0.147, + "step": 29920 + }, + { + "epoch": 1.4, + "learning_rate": 1.546846832724594e-05, + "loss": 0.2123, + "step": 29925 + }, + { + "epoch": 1.4, + "learning_rate": 1.546768454219115e-05, + "loss": 0.3934, + "step": 29930 + }, + { + "epoch": 1.4, + "learning_rate": 1.5466900757136364e-05, + "loss": 0.2301, + "step": 29935 + }, + { + "epoch": 1.4, + "learning_rate": 1.5466116972081578e-05, + "loss": 0.045, + "step": 29940 + }, + { + "epoch": 1.4, + "learning_rate": 1.5465333187026792e-05, + "loss": 0.092, + "step": 29945 + }, + { + "epoch": 1.4, + "learning_rate": 1.5464549401972006e-05, + "loss": 0.0908, + "step": 29950 + }, + { + "epoch": 1.4, + "learning_rate": 1.5463765616917216e-05, + "loss": 0.1098, + "step": 29955 + }, + { + "epoch": 1.4, + "learning_rate": 1.546298183186243e-05, + "loss": 0.0832, + "step": 29960 + }, + { + "epoch": 1.4, + "learning_rate": 1.5462198046807644e-05, + "loss": 0.1968, + "step": 29965 + }, + { + "epoch": 1.4, + "learning_rate": 1.5461414261752858e-05, + "loss": 0.2016, + "step": 29970 + }, + { + "epoch": 1.4, + "learning_rate": 1.5460630476698072e-05, + "loss": 0.2575, + "step": 29975 + }, + { + "epoch": 1.4, + "learning_rate": 1.5459846691643286e-05, + "loss": 0.4952, + "step": 29980 + }, + { + "epoch": 1.4, + "learning_rate": 1.5459062906588496e-05, + "loss": 0.364, + "step": 29985 + }, + { + "epoch": 1.4, + "learning_rate": 1.5458279121533714e-05, + "loss": 0.0521, + "step": 29990 + }, + { + "epoch": 1.4, + "learning_rate": 1.5457495336478924e-05, + "loss": 0.0457, + "step": 29995 + }, + { + "epoch": 1.4, + "learning_rate": 1.5456711551424138e-05, + "loss": 0.0946, + "step": 30000 + }, + { + "epoch": 1.4, + "learning_rate": 1.5455927766369352e-05, + "loss": 0.1691, + "step": 30005 + }, + { + "epoch": 1.4, + "learning_rate": 1.5455143981314566e-05, + "loss": 0.1704, + "step": 30010 + }, + { + "epoch": 1.4, + "learning_rate": 1.545436019625978e-05, + "loss": 0.1653, + "step": 30015 + }, + { + "epoch": 1.4, + "learning_rate": 1.545357641120499e-05, + "loss": 0.1592, + "step": 30020 + }, + { + "epoch": 1.4, + "learning_rate": 1.5452792626150208e-05, + "loss": 0.2251, + "step": 30025 + }, + { + "epoch": 1.4, + "learning_rate": 1.5452008841095418e-05, + "loss": 0.2223, + "step": 30030 + }, + { + "epoch": 1.4, + "learning_rate": 1.5451225056040632e-05, + "loss": 0.3359, + "step": 30035 + }, + { + "epoch": 1.4, + "learning_rate": 1.5450441270985846e-05, + "loss": 0.0442, + "step": 30040 + }, + { + "epoch": 1.4, + "learning_rate": 1.544965748593106e-05, + "loss": 0.1268, + "step": 30045 + }, + { + "epoch": 1.4, + "learning_rate": 1.5448873700876274e-05, + "loss": 0.1322, + "step": 30050 + }, + { + "epoch": 1.4, + "learning_rate": 1.5448089915821488e-05, + "loss": 0.0535, + "step": 30055 + }, + { + "epoch": 1.4, + "learning_rate": 1.5447306130766698e-05, + "loss": 0.1431, + "step": 30060 + }, + { + "epoch": 1.4, + "learning_rate": 1.5446522345711915e-05, + "loss": 0.1551, + "step": 30065 + }, + { + "epoch": 1.4, + "learning_rate": 1.5445738560657126e-05, + "loss": 0.1445, + "step": 30070 + }, + { + "epoch": 1.4, + "learning_rate": 1.544495477560234e-05, + "loss": 0.2025, + "step": 30075 + }, + { + "epoch": 1.4, + "learning_rate": 1.5444170990547554e-05, + "loss": 0.3375, + "step": 30080 + }, + { + "epoch": 1.4, + "learning_rate": 1.5443387205492764e-05, + "loss": 0.2115, + "step": 30085 + }, + { + "epoch": 1.4, + "learning_rate": 1.544260342043798e-05, + "loss": 0.0324, + "step": 30090 + }, + { + "epoch": 1.4, + "learning_rate": 1.5441819635383192e-05, + "loss": 0.0582, + "step": 30095 + }, + { + "epoch": 1.4, + "learning_rate": 1.5441035850328406e-05, + "loss": 0.0829, + "step": 30100 + }, + { + "epoch": 1.4, + "learning_rate": 1.544025206527362e-05, + "loss": 0.0776, + "step": 30105 + }, + { + "epoch": 1.4, + "learning_rate": 1.5439468280218834e-05, + "loss": 0.1564, + "step": 30110 + }, + { + "epoch": 1.41, + "learning_rate": 1.5438684495164048e-05, + "loss": 0.1717, + "step": 30115 + }, + { + "epoch": 1.41, + "learning_rate": 1.543790071010926e-05, + "loss": 0.217, + "step": 30120 + }, + { + "epoch": 1.41, + "learning_rate": 1.5437116925054476e-05, + "loss": 0.2191, + "step": 30125 + }, + { + "epoch": 1.41, + "learning_rate": 1.543633313999969e-05, + "loss": 0.3155, + "step": 30130 + }, + { + "epoch": 1.41, + "learning_rate": 1.54355493549449e-05, + "loss": 0.3595, + "step": 30135 + }, + { + "epoch": 1.41, + "learning_rate": 1.5434765569890117e-05, + "loss": 0.0571, + "step": 30140 + }, + { + "epoch": 1.41, + "learning_rate": 1.5433981784835328e-05, + "loss": 0.0568, + "step": 30145 + }, + { + "epoch": 1.41, + "learning_rate": 1.543319799978054e-05, + "loss": 0.077, + "step": 30150 + }, + { + "epoch": 1.41, + "learning_rate": 1.5432414214725756e-05, + "loss": 0.1147, + "step": 30155 + }, + { + "epoch": 1.41, + "learning_rate": 1.5431630429670966e-05, + "loss": 0.0826, + "step": 30160 + }, + { + "epoch": 1.41, + "learning_rate": 1.5430846644616183e-05, + "loss": 0.1165, + "step": 30165 + }, + { + "epoch": 1.41, + "learning_rate": 1.5430062859561394e-05, + "loss": 0.1987, + "step": 30170 + }, + { + "epoch": 1.41, + "learning_rate": 1.5429279074506608e-05, + "loss": 0.2379, + "step": 30175 + }, + { + "epoch": 1.41, + "learning_rate": 1.542849528945182e-05, + "loss": 0.5167, + "step": 30180 + }, + { + "epoch": 1.41, + "learning_rate": 1.5427711504397036e-05, + "loss": 0.2545, + "step": 30185 + }, + { + "epoch": 1.41, + "learning_rate": 1.542692771934225e-05, + "loss": 0.0899, + "step": 30190 + }, + { + "epoch": 1.41, + "learning_rate": 1.5426143934287463e-05, + "loss": 0.0671, + "step": 30195 + }, + { + "epoch": 1.41, + "learning_rate": 1.5425360149232674e-05, + "loss": 0.1145, + "step": 30200 + }, + { + "epoch": 1.41, + "learning_rate": 1.542457636417789e-05, + "loss": 0.0848, + "step": 30205 + }, + { + "epoch": 1.41, + "learning_rate": 1.5423792579123102e-05, + "loss": 0.208, + "step": 30210 + }, + { + "epoch": 1.41, + "learning_rate": 1.5423008794068316e-05, + "loss": 0.1277, + "step": 30215 + }, + { + "epoch": 1.41, + "learning_rate": 1.542222500901353e-05, + "loss": 0.182, + "step": 30220 + }, + { + "epoch": 1.41, + "learning_rate": 1.5421441223958743e-05, + "loss": 0.2234, + "step": 30225 + }, + { + "epoch": 1.41, + "learning_rate": 1.5420657438903957e-05, + "loss": 0.3056, + "step": 30230 + }, + { + "epoch": 1.41, + "learning_rate": 1.5419873653849168e-05, + "loss": 0.4144, + "step": 30235 + }, + { + "epoch": 1.41, + "learning_rate": 1.5419089868794385e-05, + "loss": 0.0343, + "step": 30240 + }, + { + "epoch": 1.41, + "learning_rate": 1.5418306083739596e-05, + "loss": 0.0805, + "step": 30245 + }, + { + "epoch": 1.41, + "learning_rate": 1.541752229868481e-05, + "loss": 0.1667, + "step": 30250 + }, + { + "epoch": 1.41, + "learning_rate": 1.5416738513630024e-05, + "loss": 0.1355, + "step": 30255 + }, + { + "epoch": 1.41, + "learning_rate": 1.5415954728575237e-05, + "loss": 0.1269, + "step": 30260 + }, + { + "epoch": 1.41, + "learning_rate": 1.541517094352045e-05, + "loss": 0.246, + "step": 30265 + }, + { + "epoch": 1.41, + "learning_rate": 1.5414387158465665e-05, + "loss": 0.1213, + "step": 30270 + }, + { + "epoch": 1.41, + "learning_rate": 1.5413603373410876e-05, + "loss": 0.169, + "step": 30275 + }, + { + "epoch": 1.41, + "learning_rate": 1.541281958835609e-05, + "loss": 0.3212, + "step": 30280 + }, + { + "epoch": 1.41, + "learning_rate": 1.5412035803301304e-05, + "loss": 0.1963, + "step": 30285 + }, + { + "epoch": 1.41, + "learning_rate": 1.5411252018246517e-05, + "loss": 0.0683, + "step": 30290 + }, + { + "epoch": 1.41, + "learning_rate": 1.541046823319173e-05, + "loss": 0.0797, + "step": 30295 + }, + { + "epoch": 1.41, + "learning_rate": 1.5409684448136942e-05, + "loss": 0.0972, + "step": 30300 + }, + { + "epoch": 1.41, + "learning_rate": 1.540890066308216e-05, + "loss": 0.1827, + "step": 30305 + }, + { + "epoch": 1.41, + "learning_rate": 1.540811687802737e-05, + "loss": 0.0909, + "step": 30310 + }, + { + "epoch": 1.41, + "learning_rate": 1.5407333092972584e-05, + "loss": 0.1743, + "step": 30315 + }, + { + "epoch": 1.41, + "learning_rate": 1.5406549307917798e-05, + "loss": 0.1385, + "step": 30320 + }, + { + "epoch": 1.42, + "learning_rate": 1.540576552286301e-05, + "loss": 0.2003, + "step": 30325 + }, + { + "epoch": 1.42, + "learning_rate": 1.5404981737808225e-05, + "loss": 0.4004, + "step": 30330 + }, + { + "epoch": 1.42, + "learning_rate": 1.540419795275344e-05, + "loss": 0.2143, + "step": 30335 + }, + { + "epoch": 1.42, + "learning_rate": 1.5403414167698653e-05, + "loss": 0.0175, + "step": 30340 + }, + { + "epoch": 1.42, + "learning_rate": 1.5402630382643864e-05, + "loss": 0.1199, + "step": 30345 + }, + { + "epoch": 1.42, + "learning_rate": 1.5401846597589078e-05, + "loss": 0.0549, + "step": 30350 + }, + { + "epoch": 1.42, + "learning_rate": 1.540106281253429e-05, + "loss": 0.1376, + "step": 30355 + }, + { + "epoch": 1.42, + "learning_rate": 1.5400279027479505e-05, + "loss": 0.1743, + "step": 30360 + }, + { + "epoch": 1.42, + "learning_rate": 1.539949524242472e-05, + "loss": 0.2547, + "step": 30365 + }, + { + "epoch": 1.42, + "learning_rate": 1.5398711457369933e-05, + "loss": 0.1769, + "step": 30370 + }, + { + "epoch": 1.42, + "learning_rate": 1.5397927672315144e-05, + "loss": 0.3749, + "step": 30375 + }, + { + "epoch": 1.42, + "learning_rate": 1.539714388726036e-05, + "loss": 0.5307, + "step": 30380 + }, + { + "epoch": 1.42, + "learning_rate": 1.539636010220557e-05, + "loss": 0.3359, + "step": 30385 + }, + { + "epoch": 1.42, + "learning_rate": 1.5395576317150785e-05, + "loss": 0.1245, + "step": 30390 + }, + { + "epoch": 1.42, + "learning_rate": 1.5394792532096e-05, + "loss": 0.0377, + "step": 30395 + }, + { + "epoch": 1.42, + "learning_rate": 1.5394008747041213e-05, + "loss": 0.1, + "step": 30400 + }, + { + "epoch": 1.42, + "learning_rate": 1.5393224961986427e-05, + "loss": 0.146, + "step": 30405 + }, + { + "epoch": 1.42, + "learning_rate": 1.5392441176931638e-05, + "loss": 0.1585, + "step": 30410 + }, + { + "epoch": 1.42, + "learning_rate": 1.539165739187685e-05, + "loss": 0.2002, + "step": 30415 + }, + { + "epoch": 1.42, + "learning_rate": 1.5390873606822065e-05, + "loss": 0.2221, + "step": 30420 + }, + { + "epoch": 1.42, + "learning_rate": 1.539008982176728e-05, + "loss": 0.2473, + "step": 30425 + }, + { + "epoch": 1.42, + "learning_rate": 1.5389306036712493e-05, + "loss": 0.3531, + "step": 30430 + }, + { + "epoch": 1.42, + "learning_rate": 1.5388522251657707e-05, + "loss": 0.3376, + "step": 30435 + }, + { + "epoch": 1.42, + "learning_rate": 1.538773846660292e-05, + "loss": 0.1212, + "step": 30440 + }, + { + "epoch": 1.42, + "learning_rate": 1.5386954681548135e-05, + "loss": 0.0599, + "step": 30445 + }, + { + "epoch": 1.42, + "learning_rate": 1.5386170896493345e-05, + "loss": 0.0923, + "step": 30450 + }, + { + "epoch": 1.42, + "learning_rate": 1.5385387111438563e-05, + "loss": 0.0752, + "step": 30455 + }, + { + "epoch": 1.42, + "learning_rate": 1.5384603326383773e-05, + "loss": 0.074, + "step": 30460 + }, + { + "epoch": 1.42, + "learning_rate": 1.5383819541328987e-05, + "loss": 0.0945, + "step": 30465 + }, + { + "epoch": 1.42, + "learning_rate": 1.53830357562742e-05, + "loss": 0.2006, + "step": 30470 + }, + { + "epoch": 1.42, + "learning_rate": 1.538225197121941e-05, + "loss": 0.2685, + "step": 30475 + }, + { + "epoch": 1.42, + "learning_rate": 1.538146818616463e-05, + "loss": 0.2831, + "step": 30480 + }, + { + "epoch": 1.42, + "learning_rate": 1.538068440110984e-05, + "loss": 0.1803, + "step": 30485 + }, + { + "epoch": 1.42, + "learning_rate": 1.5379900616055053e-05, + "loss": 0.0256, + "step": 30490 + }, + { + "epoch": 1.42, + "learning_rate": 1.5379116831000267e-05, + "loss": 0.0892, + "step": 30495 + }, + { + "epoch": 1.42, + "learning_rate": 1.537833304594548e-05, + "loss": 0.1554, + "step": 30500 + }, + { + "epoch": 1.42, + "learning_rate": 1.5377549260890695e-05, + "loss": 0.1182, + "step": 30505 + }, + { + "epoch": 1.42, + "learning_rate": 1.537676547583591e-05, + "loss": 0.1168, + "step": 30510 + }, + { + "epoch": 1.42, + "learning_rate": 1.537598169078112e-05, + "loss": 0.1539, + "step": 30515 + }, + { + "epoch": 1.42, + "learning_rate": 1.5375197905726337e-05, + "loss": 0.229, + "step": 30520 + }, + { + "epoch": 1.42, + "learning_rate": 1.5374414120671547e-05, + "loss": 0.2612, + "step": 30525 + }, + { + "epoch": 1.42, + "learning_rate": 1.537363033561676e-05, + "loss": 0.4199, + "step": 30530 + }, + { + "epoch": 1.42, + "learning_rate": 1.5372846550561975e-05, + "loss": 0.3006, + "step": 30535 + }, + { + "epoch": 1.43, + "learning_rate": 1.537206276550719e-05, + "loss": 0.0404, + "step": 30540 + }, + { + "epoch": 1.43, + "learning_rate": 1.5371278980452403e-05, + "loss": 0.0872, + "step": 30545 + }, + { + "epoch": 1.43, + "learning_rate": 1.5370495195397613e-05, + "loss": 0.1091, + "step": 30550 + }, + { + "epoch": 1.43, + "learning_rate": 1.536971141034283e-05, + "loss": 0.0654, + "step": 30555 + }, + { + "epoch": 1.43, + "learning_rate": 1.536892762528804e-05, + "loss": 0.1402, + "step": 30560 + }, + { + "epoch": 1.43, + "learning_rate": 1.5368143840233255e-05, + "loss": 0.1676, + "step": 30565 + }, + { + "epoch": 1.43, + "learning_rate": 1.536736005517847e-05, + "loss": 0.1912, + "step": 30570 + }, + { + "epoch": 1.43, + "learning_rate": 1.5366576270123683e-05, + "loss": 0.3351, + "step": 30575 + }, + { + "epoch": 1.43, + "learning_rate": 1.5365792485068897e-05, + "loss": 0.367, + "step": 30580 + }, + { + "epoch": 1.43, + "learning_rate": 1.536500870001411e-05, + "loss": 0.2385, + "step": 30585 + }, + { + "epoch": 1.43, + "learning_rate": 1.536422491495932e-05, + "loss": 0.0754, + "step": 30590 + }, + { + "epoch": 1.43, + "learning_rate": 1.536344112990454e-05, + "loss": 0.0664, + "step": 30595 + }, + { + "epoch": 1.43, + "learning_rate": 1.536265734484975e-05, + "loss": 0.1467, + "step": 30600 + }, + { + "epoch": 1.43, + "learning_rate": 1.5361873559794963e-05, + "loss": 0.0865, + "step": 30605 + }, + { + "epoch": 1.43, + "learning_rate": 1.5361089774740177e-05, + "loss": 0.1208, + "step": 30610 + }, + { + "epoch": 1.43, + "learning_rate": 1.5360305989685387e-05, + "loss": 0.1351, + "step": 30615 + }, + { + "epoch": 1.43, + "learning_rate": 1.5359522204630605e-05, + "loss": 0.1539, + "step": 30620 + }, + { + "epoch": 1.43, + "learning_rate": 1.5358738419575815e-05, + "loss": 0.2201, + "step": 30625 + }, + { + "epoch": 1.43, + "learning_rate": 1.535795463452103e-05, + "loss": 0.3883, + "step": 30630 + }, + { + "epoch": 1.43, + "learning_rate": 1.5357170849466243e-05, + "loss": 0.2628, + "step": 30635 + }, + { + "epoch": 1.43, + "learning_rate": 1.5356387064411457e-05, + "loss": 0.0461, + "step": 30640 + }, + { + "epoch": 1.43, + "learning_rate": 1.535560327935667e-05, + "loss": 0.0915, + "step": 30645 + }, + { + "epoch": 1.43, + "learning_rate": 1.5354819494301885e-05, + "loss": 0.0698, + "step": 30650 + }, + { + "epoch": 1.43, + "learning_rate": 1.53540357092471e-05, + "loss": 0.1448, + "step": 30655 + }, + { + "epoch": 1.43, + "learning_rate": 1.5353251924192313e-05, + "loss": 0.1386, + "step": 30660 + }, + { + "epoch": 1.43, + "learning_rate": 1.5352468139137523e-05, + "loss": 0.169, + "step": 30665 + }, + { + "epoch": 1.43, + "learning_rate": 1.5351684354082737e-05, + "loss": 0.1716, + "step": 30670 + }, + { + "epoch": 1.43, + "learning_rate": 1.535090056902795e-05, + "loss": 0.1982, + "step": 30675 + }, + { + "epoch": 1.43, + "learning_rate": 1.5350116783973165e-05, + "loss": 0.3828, + "step": 30680 + }, + { + "epoch": 1.43, + "learning_rate": 1.534933299891838e-05, + "loss": 0.2975, + "step": 30685 + }, + { + "epoch": 1.43, + "learning_rate": 1.534854921386359e-05, + "loss": 0.0999, + "step": 30690 + }, + { + "epoch": 1.43, + "learning_rate": 1.5347765428808807e-05, + "loss": 0.0746, + "step": 30695 + }, + { + "epoch": 1.43, + "learning_rate": 1.5346981643754017e-05, + "loss": 0.0579, + "step": 30700 + }, + { + "epoch": 1.43, + "learning_rate": 1.534619785869923e-05, + "loss": 0.1278, + "step": 30705 + }, + { + "epoch": 1.43, + "learning_rate": 1.5345414073644445e-05, + "loss": 0.1335, + "step": 30710 + }, + { + "epoch": 1.43, + "learning_rate": 1.534463028858966e-05, + "loss": 0.1177, + "step": 30715 + }, + { + "epoch": 1.43, + "learning_rate": 1.5343846503534873e-05, + "loss": 0.1861, + "step": 30720 + }, + { + "epoch": 1.43, + "learning_rate": 1.5343062718480087e-05, + "loss": 0.2215, + "step": 30725 + }, + { + "epoch": 1.43, + "learning_rate": 1.5342278933425297e-05, + "loss": 0.3913, + "step": 30730 + }, + { + "epoch": 1.43, + "learning_rate": 1.5341495148370514e-05, + "loss": 0.332, + "step": 30735 + }, + { + "epoch": 1.43, + "learning_rate": 1.5340711363315725e-05, + "loss": 0.0479, + "step": 30740 + }, + { + "epoch": 1.43, + "learning_rate": 1.533992757826094e-05, + "loss": 0.0902, + "step": 30745 + }, + { + "epoch": 1.43, + "learning_rate": 1.5339143793206153e-05, + "loss": 0.0383, + "step": 30750 + }, + { + "epoch": 1.44, + "learning_rate": 1.5338360008151367e-05, + "loss": 0.0873, + "step": 30755 + }, + { + "epoch": 1.44, + "learning_rate": 1.533757622309658e-05, + "loss": 0.058, + "step": 30760 + }, + { + "epoch": 1.44, + "learning_rate": 1.533679243804179e-05, + "loss": 0.1621, + "step": 30765 + }, + { + "epoch": 1.44, + "learning_rate": 1.533600865298701e-05, + "loss": 0.1878, + "step": 30770 + }, + { + "epoch": 1.44, + "learning_rate": 1.533522486793222e-05, + "loss": 0.3271, + "step": 30775 + }, + { + "epoch": 1.44, + "learning_rate": 1.5334441082877433e-05, + "loss": 0.2911, + "step": 30780 + }, + { + "epoch": 1.44, + "learning_rate": 1.5333657297822647e-05, + "loss": 0.2349, + "step": 30785 + }, + { + "epoch": 1.44, + "learning_rate": 1.533287351276786e-05, + "loss": 0.1201, + "step": 30790 + }, + { + "epoch": 1.44, + "learning_rate": 1.5332089727713075e-05, + "loss": 0.0789, + "step": 30795 + }, + { + "epoch": 1.44, + "learning_rate": 1.533130594265829e-05, + "loss": 0.0823, + "step": 30800 + }, + { + "epoch": 1.44, + "learning_rate": 1.53305221576035e-05, + "loss": 0.1153, + "step": 30805 + }, + { + "epoch": 1.44, + "learning_rate": 1.5329738372548713e-05, + "loss": 0.1054, + "step": 30810 + }, + { + "epoch": 1.44, + "learning_rate": 1.5328954587493927e-05, + "loss": 0.1376, + "step": 30815 + }, + { + "epoch": 1.44, + "learning_rate": 1.532817080243914e-05, + "loss": 0.1513, + "step": 30820 + }, + { + "epoch": 1.44, + "learning_rate": 1.5327387017384355e-05, + "loss": 0.2324, + "step": 30825 + }, + { + "epoch": 1.44, + "learning_rate": 1.5326603232329565e-05, + "loss": 0.3203, + "step": 30830 + }, + { + "epoch": 1.44, + "learning_rate": 1.5325819447274782e-05, + "loss": 0.4077, + "step": 30835 + }, + { + "epoch": 1.44, + "learning_rate": 1.5325035662219993e-05, + "loss": 0.0783, + "step": 30840 + }, + { + "epoch": 1.44, + "learning_rate": 1.5324251877165207e-05, + "loss": 0.0441, + "step": 30845 + }, + { + "epoch": 1.44, + "learning_rate": 1.532346809211042e-05, + "loss": 0.0616, + "step": 30850 + }, + { + "epoch": 1.44, + "learning_rate": 1.5322684307055635e-05, + "loss": 0.1068, + "step": 30855 + }, + { + "epoch": 1.44, + "learning_rate": 1.532190052200085e-05, + "loss": 0.1137, + "step": 30860 + }, + { + "epoch": 1.44, + "learning_rate": 1.5321116736946062e-05, + "loss": 0.2083, + "step": 30865 + }, + { + "epoch": 1.44, + "learning_rate": 1.5320332951891276e-05, + "loss": 0.1234, + "step": 30870 + }, + { + "epoch": 1.44, + "learning_rate": 1.5319549166836487e-05, + "loss": 0.2183, + "step": 30875 + }, + { + "epoch": 1.44, + "learning_rate": 1.53187653817817e-05, + "loss": 0.3248, + "step": 30880 + }, + { + "epoch": 1.44, + "learning_rate": 1.5317981596726915e-05, + "loss": 0.2879, + "step": 30885 + }, + { + "epoch": 1.44, + "learning_rate": 1.531719781167213e-05, + "loss": 0.0633, + "step": 30890 + }, + { + "epoch": 1.44, + "learning_rate": 1.5316414026617342e-05, + "loss": 0.0492, + "step": 30895 + }, + { + "epoch": 1.44, + "learning_rate": 1.5315630241562556e-05, + "loss": 0.0931, + "step": 30900 + }, + { + "epoch": 1.44, + "learning_rate": 1.5314846456507767e-05, + "loss": 0.0612, + "step": 30905 + }, + { + "epoch": 1.44, + "learning_rate": 1.5314062671452984e-05, + "loss": 0.0813, + "step": 30910 + }, + { + "epoch": 1.44, + "learning_rate": 1.5313278886398195e-05, + "loss": 0.1622, + "step": 30915 + }, + { + "epoch": 1.44, + "learning_rate": 1.531249510134341e-05, + "loss": 0.1378, + "step": 30920 + }, + { + "epoch": 1.44, + "learning_rate": 1.5311711316288623e-05, + "loss": 0.1609, + "step": 30925 + }, + { + "epoch": 1.44, + "learning_rate": 1.5310927531233836e-05, + "loss": 0.5023, + "step": 30930 + }, + { + "epoch": 1.44, + "learning_rate": 1.531014374617905e-05, + "loss": 0.3593, + "step": 30935 + }, + { + "epoch": 1.44, + "learning_rate": 1.530935996112426e-05, + "loss": 0.1545, + "step": 30940 + }, + { + "epoch": 1.44, + "learning_rate": 1.5308576176069475e-05, + "loss": 0.0696, + "step": 30945 + }, + { + "epoch": 1.44, + "learning_rate": 1.530779239101469e-05, + "loss": 0.0694, + "step": 30950 + }, + { + "epoch": 1.44, + "learning_rate": 1.5307008605959903e-05, + "loss": 0.1546, + "step": 30955 + }, + { + "epoch": 1.44, + "learning_rate": 1.5306224820905116e-05, + "loss": 0.1018, + "step": 30960 + }, + { + "epoch": 1.44, + "learning_rate": 1.530544103585033e-05, + "loss": 0.1363, + "step": 30965 + }, + { + "epoch": 1.45, + "learning_rate": 1.5304657250795544e-05, + "loss": 0.3256, + "step": 30970 + }, + { + "epoch": 1.45, + "learning_rate": 1.5303873465740758e-05, + "loss": 0.3606, + "step": 30975 + }, + { + "epoch": 1.45, + "learning_rate": 1.530308968068597e-05, + "loss": 0.331, + "step": 30980 + }, + { + "epoch": 1.45, + "learning_rate": 1.5302305895631186e-05, + "loss": 0.2471, + "step": 30985 + }, + { + "epoch": 1.45, + "learning_rate": 1.5301522110576397e-05, + "loss": 0.042, + "step": 30990 + }, + { + "epoch": 1.45, + "learning_rate": 1.530073832552161e-05, + "loss": 0.0757, + "step": 30995 + }, + { + "epoch": 1.45, + "learning_rate": 1.5299954540466824e-05, + "loss": 0.0695, + "step": 31000 + }, + { + "epoch": 1.45, + "learning_rate": 1.5299170755412035e-05, + "loss": 0.1101, + "step": 31005 + }, + { + "epoch": 1.45, + "learning_rate": 1.5298386970357252e-05, + "loss": 0.1784, + "step": 31010 + }, + { + "epoch": 1.45, + "learning_rate": 1.5297603185302463e-05, + "loss": 0.1233, + "step": 31015 + }, + { + "epoch": 1.45, + "learning_rate": 1.5296819400247677e-05, + "loss": 0.2507, + "step": 31020 + }, + { + "epoch": 1.45, + "learning_rate": 1.529603561519289e-05, + "loss": 0.248, + "step": 31025 + }, + { + "epoch": 1.45, + "learning_rate": 1.5295251830138104e-05, + "loss": 0.4157, + "step": 31030 + }, + { + "epoch": 1.45, + "learning_rate": 1.5294468045083318e-05, + "loss": 0.2699, + "step": 31035 + }, + { + "epoch": 1.45, + "learning_rate": 1.5293684260028532e-05, + "loss": 0.1, + "step": 31040 + }, + { + "epoch": 1.45, + "learning_rate": 1.5292900474973743e-05, + "loss": 0.0886, + "step": 31045 + }, + { + "epoch": 1.45, + "learning_rate": 1.529211668991896e-05, + "loss": 0.0945, + "step": 31050 + }, + { + "epoch": 1.45, + "learning_rate": 1.529133290486417e-05, + "loss": 0.1045, + "step": 31055 + }, + { + "epoch": 1.45, + "learning_rate": 1.5290549119809384e-05, + "loss": 0.1411, + "step": 31060 + }, + { + "epoch": 1.45, + "learning_rate": 1.52897653347546e-05, + "loss": 0.167, + "step": 31065 + }, + { + "epoch": 1.45, + "learning_rate": 1.5288981549699812e-05, + "loss": 0.2069, + "step": 31070 + }, + { + "epoch": 1.45, + "learning_rate": 1.5288197764645026e-05, + "loss": 0.1847, + "step": 31075 + }, + { + "epoch": 1.45, + "learning_rate": 1.5287413979590237e-05, + "loss": 0.2041, + "step": 31080 + }, + { + "epoch": 1.45, + "learning_rate": 1.5286630194535454e-05, + "loss": 0.2017, + "step": 31085 + }, + { + "epoch": 1.45, + "learning_rate": 1.5285846409480664e-05, + "loss": 0.0352, + "step": 31090 + }, + { + "epoch": 1.45, + "learning_rate": 1.528506262442588e-05, + "loss": 0.0676, + "step": 31095 + }, + { + "epoch": 1.45, + "learning_rate": 1.5284278839371092e-05, + "loss": 0.1651, + "step": 31100 + }, + { + "epoch": 1.45, + "learning_rate": 1.5283495054316306e-05, + "loss": 0.095, + "step": 31105 + }, + { + "epoch": 1.45, + "learning_rate": 1.528271126926152e-05, + "loss": 0.1385, + "step": 31110 + }, + { + "epoch": 1.45, + "learning_rate": 1.5281927484206734e-05, + "loss": 0.16, + "step": 31115 + }, + { + "epoch": 1.45, + "learning_rate": 1.5281143699151944e-05, + "loss": 0.2099, + "step": 31120 + }, + { + "epoch": 1.45, + "learning_rate": 1.5280359914097162e-05, + "loss": 0.3167, + "step": 31125 + }, + { + "epoch": 1.45, + "learning_rate": 1.5279576129042372e-05, + "loss": 0.2805, + "step": 31130 + }, + { + "epoch": 1.45, + "learning_rate": 1.5278792343987586e-05, + "loss": 0.2409, + "step": 31135 + }, + { + "epoch": 1.45, + "learning_rate": 1.52780085589328e-05, + "loss": 0.0747, + "step": 31140 + }, + { + "epoch": 1.45, + "learning_rate": 1.527722477387801e-05, + "loss": 0.0464, + "step": 31145 + }, + { + "epoch": 1.45, + "learning_rate": 1.5276440988823228e-05, + "loss": 0.0867, + "step": 31150 + }, + { + "epoch": 1.45, + "learning_rate": 1.527565720376844e-05, + "loss": 0.1238, + "step": 31155 + }, + { + "epoch": 1.45, + "learning_rate": 1.5274873418713652e-05, + "loss": 0.1296, + "step": 31160 + }, + { + "epoch": 1.45, + "learning_rate": 1.5274089633658866e-05, + "loss": 0.1621, + "step": 31165 + }, + { + "epoch": 1.45, + "learning_rate": 1.527330584860408e-05, + "loss": 0.1919, + "step": 31170 + }, + { + "epoch": 1.45, + "learning_rate": 1.5272522063549294e-05, + "loss": 0.3081, + "step": 31175 + }, + { + "epoch": 1.45, + "learning_rate": 1.5271738278494508e-05, + "loss": 0.3428, + "step": 31180 + }, + { + "epoch": 1.46, + "learning_rate": 1.5270954493439722e-05, + "loss": 0.273, + "step": 31185 + }, + { + "epoch": 1.46, + "learning_rate": 1.5270170708384936e-05, + "loss": 0.1131, + "step": 31190 + }, + { + "epoch": 1.46, + "learning_rate": 1.5269386923330146e-05, + "loss": 0.0283, + "step": 31195 + }, + { + "epoch": 1.46, + "learning_rate": 1.526860313827536e-05, + "loss": 0.0582, + "step": 31200 + }, + { + "epoch": 1.46, + "learning_rate": 1.5267819353220574e-05, + "loss": 0.0585, + "step": 31205 + }, + { + "epoch": 1.46, + "learning_rate": 1.5267035568165788e-05, + "loss": 0.0735, + "step": 31210 + }, + { + "epoch": 1.46, + "learning_rate": 1.5266251783111002e-05, + "loss": 0.272, + "step": 31215 + }, + { + "epoch": 1.46, + "learning_rate": 1.5265467998056212e-05, + "loss": 0.1506, + "step": 31220 + }, + { + "epoch": 1.46, + "learning_rate": 1.526468421300143e-05, + "loss": 0.2312, + "step": 31225 + }, + { + "epoch": 1.46, + "learning_rate": 1.526390042794664e-05, + "loss": 0.2993, + "step": 31230 + }, + { + "epoch": 1.46, + "learning_rate": 1.5263116642891854e-05, + "loss": 0.3461, + "step": 31235 + }, + { + "epoch": 1.46, + "learning_rate": 1.5262332857837068e-05, + "loss": 0.0388, + "step": 31240 + }, + { + "epoch": 1.46, + "learning_rate": 1.5261549072782282e-05, + "loss": 0.0356, + "step": 31245 + }, + { + "epoch": 1.46, + "learning_rate": 1.5260765287727496e-05, + "loss": 0.1398, + "step": 31250 + }, + { + "epoch": 1.46, + "learning_rate": 1.525998150267271e-05, + "loss": 0.0815, + "step": 31255 + }, + { + "epoch": 1.46, + "learning_rate": 1.525919771761792e-05, + "loss": 0.1347, + "step": 31260 + }, + { + "epoch": 1.46, + "learning_rate": 1.5258413932563134e-05, + "loss": 0.1489, + "step": 31265 + }, + { + "epoch": 1.46, + "learning_rate": 1.5257630147508348e-05, + "loss": 0.1687, + "step": 31270 + }, + { + "epoch": 1.46, + "learning_rate": 1.5256846362453562e-05, + "loss": 0.1338, + "step": 31275 + }, + { + "epoch": 1.46, + "learning_rate": 1.5256062577398776e-05, + "loss": 0.3636, + "step": 31280 + }, + { + "epoch": 1.46, + "learning_rate": 1.5255278792343988e-05, + "loss": 0.2142, + "step": 31285 + }, + { + "epoch": 1.46, + "learning_rate": 1.5254495007289202e-05, + "loss": 0.0323, + "step": 31290 + }, + { + "epoch": 1.46, + "learning_rate": 1.5253711222234416e-05, + "loss": 0.0875, + "step": 31295 + }, + { + "epoch": 1.46, + "learning_rate": 1.525292743717963e-05, + "loss": 0.0987, + "step": 31300 + }, + { + "epoch": 1.46, + "learning_rate": 1.5252143652124842e-05, + "loss": 0.1468, + "step": 31305 + }, + { + "epoch": 1.46, + "learning_rate": 1.5251359867070058e-05, + "loss": 0.0654, + "step": 31310 + }, + { + "epoch": 1.46, + "learning_rate": 1.525057608201527e-05, + "loss": 0.1286, + "step": 31315 + }, + { + "epoch": 1.46, + "learning_rate": 1.5249792296960484e-05, + "loss": 0.1423, + "step": 31320 + }, + { + "epoch": 1.46, + "learning_rate": 1.5249008511905696e-05, + "loss": 0.2072, + "step": 31325 + }, + { + "epoch": 1.46, + "learning_rate": 1.5248224726850908e-05, + "loss": 0.3649, + "step": 31330 + }, + { + "epoch": 1.46, + "learning_rate": 1.5247440941796124e-05, + "loss": 0.366, + "step": 31335 + }, + { + "epoch": 1.46, + "learning_rate": 1.5246657156741336e-05, + "loss": 0.0274, + "step": 31340 + }, + { + "epoch": 1.46, + "learning_rate": 1.524587337168655e-05, + "loss": 0.0756, + "step": 31345 + }, + { + "epoch": 1.46, + "learning_rate": 1.5245089586631762e-05, + "loss": 0.1292, + "step": 31350 + }, + { + "epoch": 1.46, + "learning_rate": 1.5244305801576978e-05, + "loss": 0.1784, + "step": 31355 + }, + { + "epoch": 1.46, + "learning_rate": 1.524352201652219e-05, + "loss": 0.1282, + "step": 31360 + }, + { + "epoch": 1.46, + "learning_rate": 1.5242738231467404e-05, + "loss": 0.2259, + "step": 31365 + }, + { + "epoch": 1.46, + "learning_rate": 1.5241954446412616e-05, + "loss": 0.2356, + "step": 31370 + }, + { + "epoch": 1.46, + "learning_rate": 1.5241170661357832e-05, + "loss": 0.2162, + "step": 31375 + }, + { + "epoch": 1.46, + "learning_rate": 1.5240386876303044e-05, + "loss": 0.2779, + "step": 31380 + }, + { + "epoch": 1.46, + "learning_rate": 1.5239603091248258e-05, + "loss": 0.3447, + "step": 31385 + }, + { + "epoch": 1.46, + "learning_rate": 1.523881930619347e-05, + "loss": 0.0535, + "step": 31390 + }, + { + "epoch": 1.46, + "learning_rate": 1.5238035521138684e-05, + "loss": 0.1216, + "step": 31395 + }, + { + "epoch": 1.47, + "learning_rate": 1.5237251736083898e-05, + "loss": 0.0795, + "step": 31400 + }, + { + "epoch": 1.47, + "learning_rate": 1.523646795102911e-05, + "loss": 0.1059, + "step": 31405 + }, + { + "epoch": 1.47, + "learning_rate": 1.5235684165974326e-05, + "loss": 0.079, + "step": 31410 + }, + { + "epoch": 1.47, + "learning_rate": 1.5234900380919538e-05, + "loss": 0.1216, + "step": 31415 + }, + { + "epoch": 1.47, + "learning_rate": 1.5234116595864752e-05, + "loss": 0.1635, + "step": 31420 + }, + { + "epoch": 1.47, + "learning_rate": 1.5233332810809964e-05, + "loss": 0.1558, + "step": 31425 + }, + { + "epoch": 1.47, + "learning_rate": 1.523254902575518e-05, + "loss": 0.405, + "step": 31430 + }, + { + "epoch": 1.47, + "learning_rate": 1.5231765240700392e-05, + "loss": 0.4419, + "step": 31435 + }, + { + "epoch": 1.47, + "learning_rate": 1.5230981455645606e-05, + "loss": 0.0325, + "step": 31440 + }, + { + "epoch": 1.47, + "learning_rate": 1.5230197670590818e-05, + "loss": 0.1278, + "step": 31445 + }, + { + "epoch": 1.47, + "learning_rate": 1.5229413885536033e-05, + "loss": 0.1036, + "step": 31450 + }, + { + "epoch": 1.47, + "learning_rate": 1.5228630100481246e-05, + "loss": 0.1107, + "step": 31455 + }, + { + "epoch": 1.47, + "learning_rate": 1.5227846315426458e-05, + "loss": 0.0893, + "step": 31460 + }, + { + "epoch": 1.47, + "learning_rate": 1.5227062530371672e-05, + "loss": 0.1751, + "step": 31465 + }, + { + "epoch": 1.47, + "learning_rate": 1.5226278745316884e-05, + "loss": 0.1661, + "step": 31470 + }, + { + "epoch": 1.47, + "learning_rate": 1.52254949602621e-05, + "loss": 0.1933, + "step": 31475 + }, + { + "epoch": 1.47, + "learning_rate": 1.5224711175207312e-05, + "loss": 0.2263, + "step": 31480 + }, + { + "epoch": 1.47, + "learning_rate": 1.5223927390152526e-05, + "loss": 0.3486, + "step": 31485 + }, + { + "epoch": 1.47, + "learning_rate": 1.522314360509774e-05, + "loss": 0.0391, + "step": 31490 + }, + { + "epoch": 1.47, + "learning_rate": 1.5222359820042954e-05, + "loss": 0.0702, + "step": 31495 + }, + { + "epoch": 1.47, + "learning_rate": 1.5221576034988166e-05, + "loss": 0.0595, + "step": 31500 + }, + { + "epoch": 1.47, + "learning_rate": 1.522079224993338e-05, + "loss": 0.0184, + "step": 31505 + }, + { + "epoch": 1.47, + "learning_rate": 1.5220008464878594e-05, + "loss": 0.1383, + "step": 31510 + }, + { + "epoch": 1.47, + "learning_rate": 1.5219224679823807e-05, + "loss": 0.1519, + "step": 31515 + }, + { + "epoch": 1.47, + "learning_rate": 1.521844089476902e-05, + "loss": 0.1777, + "step": 31520 + }, + { + "epoch": 1.47, + "learning_rate": 1.5217657109714232e-05, + "loss": 0.2411, + "step": 31525 + }, + { + "epoch": 1.47, + "learning_rate": 1.5216873324659448e-05, + "loss": 0.3028, + "step": 31530 + }, + { + "epoch": 1.47, + "learning_rate": 1.521608953960466e-05, + "loss": 0.379, + "step": 31535 + }, + { + "epoch": 1.47, + "learning_rate": 1.5215305754549874e-05, + "loss": 0.0478, + "step": 31540 + }, + { + "epoch": 1.47, + "learning_rate": 1.5214521969495086e-05, + "loss": 0.0623, + "step": 31545 + }, + { + "epoch": 1.47, + "learning_rate": 1.5213738184440301e-05, + "loss": 0.0523, + "step": 31550 + }, + { + "epoch": 1.47, + "learning_rate": 1.5212954399385514e-05, + "loss": 0.1509, + "step": 31555 + }, + { + "epoch": 1.47, + "learning_rate": 1.5212170614330728e-05, + "loss": 0.1149, + "step": 31560 + }, + { + "epoch": 1.47, + "learning_rate": 1.521138682927594e-05, + "loss": 0.1829, + "step": 31565 + }, + { + "epoch": 1.47, + "learning_rate": 1.5210603044221155e-05, + "loss": 0.1609, + "step": 31570 + }, + { + "epoch": 1.47, + "learning_rate": 1.5209819259166368e-05, + "loss": 0.2295, + "step": 31575 + }, + { + "epoch": 1.47, + "learning_rate": 1.5209035474111581e-05, + "loss": 0.486, + "step": 31580 + }, + { + "epoch": 1.47, + "learning_rate": 1.5208251689056794e-05, + "loss": 0.2702, + "step": 31585 + }, + { + "epoch": 1.47, + "learning_rate": 1.5207467904002008e-05, + "loss": 0.0594, + "step": 31590 + }, + { + "epoch": 1.47, + "learning_rate": 1.5206684118947222e-05, + "loss": 0.1273, + "step": 31595 + }, + { + "epoch": 1.47, + "learning_rate": 1.5205900333892434e-05, + "loss": 0.0883, + "step": 31600 + }, + { + "epoch": 1.47, + "learning_rate": 1.5205116548837648e-05, + "loss": 0.0647, + "step": 31605 + }, + { + "epoch": 1.47, + "learning_rate": 1.5204332763782862e-05, + "loss": 0.0872, + "step": 31610 + }, + { + "epoch": 1.48, + "learning_rate": 1.5203548978728075e-05, + "loss": 0.1148, + "step": 31615 + }, + { + "epoch": 1.48, + "learning_rate": 1.5202765193673288e-05, + "loss": 0.1226, + "step": 31620 + }, + { + "epoch": 1.48, + "learning_rate": 1.5201981408618503e-05, + "loss": 0.2385, + "step": 31625 + }, + { + "epoch": 1.48, + "learning_rate": 1.5201197623563715e-05, + "loss": 0.3862, + "step": 31630 + }, + { + "epoch": 1.48, + "learning_rate": 1.520041383850893e-05, + "loss": 0.3206, + "step": 31635 + }, + { + "epoch": 1.48, + "learning_rate": 1.5199630053454142e-05, + "loss": 0.0743, + "step": 31640 + }, + { + "epoch": 1.48, + "learning_rate": 1.5198846268399357e-05, + "loss": 0.0545, + "step": 31645 + }, + { + "epoch": 1.48, + "learning_rate": 1.519806248334457e-05, + "loss": 0.0978, + "step": 31650 + }, + { + "epoch": 1.48, + "learning_rate": 1.5197278698289782e-05, + "loss": 0.0675, + "step": 31655 + }, + { + "epoch": 1.48, + "learning_rate": 1.5196494913234995e-05, + "loss": 0.1666, + "step": 31660 + }, + { + "epoch": 1.48, + "learning_rate": 1.5195711128180208e-05, + "loss": 0.2834, + "step": 31665 + }, + { + "epoch": 1.48, + "learning_rate": 1.5194927343125423e-05, + "loss": 0.1719, + "step": 31670 + }, + { + "epoch": 1.48, + "learning_rate": 1.5194143558070636e-05, + "loss": 0.2468, + "step": 31675 + }, + { + "epoch": 1.48, + "learning_rate": 1.519335977301585e-05, + "loss": 0.3821, + "step": 31680 + }, + { + "epoch": 1.48, + "learning_rate": 1.5192575987961062e-05, + "loss": 0.3466, + "step": 31685 + }, + { + "epoch": 1.48, + "learning_rate": 1.5191792202906277e-05, + "loss": 0.0438, + "step": 31690 + }, + { + "epoch": 1.48, + "learning_rate": 1.519100841785149e-05, + "loss": 0.066, + "step": 31695 + }, + { + "epoch": 1.48, + "learning_rate": 1.5190224632796703e-05, + "loss": 0.083, + "step": 31700 + }, + { + "epoch": 1.48, + "learning_rate": 1.5189440847741916e-05, + "loss": 0.0671, + "step": 31705 + }, + { + "epoch": 1.48, + "learning_rate": 1.5188657062687131e-05, + "loss": 0.1106, + "step": 31710 + }, + { + "epoch": 1.48, + "learning_rate": 1.5187873277632343e-05, + "loss": 0.1968, + "step": 31715 + }, + { + "epoch": 1.48, + "learning_rate": 1.5187089492577556e-05, + "loss": 0.209, + "step": 31720 + }, + { + "epoch": 1.48, + "learning_rate": 1.5186305707522771e-05, + "loss": 0.1886, + "step": 31725 + }, + { + "epoch": 1.48, + "learning_rate": 1.5185521922467983e-05, + "loss": 0.3375, + "step": 31730 + }, + { + "epoch": 1.48, + "learning_rate": 1.5184738137413197e-05, + "loss": 0.2246, + "step": 31735 + }, + { + "epoch": 1.48, + "learning_rate": 1.518395435235841e-05, + "loss": 0.0597, + "step": 31740 + }, + { + "epoch": 1.48, + "learning_rate": 1.5183170567303625e-05, + "loss": 0.0915, + "step": 31745 + }, + { + "epoch": 1.48, + "learning_rate": 1.5182386782248837e-05, + "loss": 0.1168, + "step": 31750 + }, + { + "epoch": 1.48, + "learning_rate": 1.5181602997194051e-05, + "loss": 0.0906, + "step": 31755 + }, + { + "epoch": 1.48, + "learning_rate": 1.5180819212139263e-05, + "loss": 0.0936, + "step": 31760 + }, + { + "epoch": 1.48, + "learning_rate": 1.5180035427084479e-05, + "loss": 0.1309, + "step": 31765 + }, + { + "epoch": 1.48, + "learning_rate": 1.5179251642029691e-05, + "loss": 0.149, + "step": 31770 + }, + { + "epoch": 1.48, + "learning_rate": 1.5178467856974905e-05, + "loss": 0.2683, + "step": 31775 + }, + { + "epoch": 1.48, + "learning_rate": 1.5177684071920117e-05, + "loss": 0.4699, + "step": 31780 + }, + { + "epoch": 1.48, + "learning_rate": 1.517690028686533e-05, + "loss": 0.2512, + "step": 31785 + }, + { + "epoch": 1.48, + "learning_rate": 1.5176116501810545e-05, + "loss": 0.059, + "step": 31790 + }, + { + "epoch": 1.48, + "learning_rate": 1.5175332716755757e-05, + "loss": 0.0595, + "step": 31795 + }, + { + "epoch": 1.48, + "learning_rate": 1.5174548931700971e-05, + "loss": 0.0786, + "step": 31800 + }, + { + "epoch": 1.48, + "learning_rate": 1.5173765146646185e-05, + "loss": 0.1279, + "step": 31805 + }, + { + "epoch": 1.48, + "learning_rate": 1.5172981361591399e-05, + "loss": 0.2461, + "step": 31810 + }, + { + "epoch": 1.48, + "learning_rate": 1.5172197576536611e-05, + "loss": 0.1358, + "step": 31815 + }, + { + "epoch": 1.48, + "learning_rate": 1.5171413791481825e-05, + "loss": 0.1626, + "step": 31820 + }, + { + "epoch": 1.48, + "learning_rate": 1.5170630006427039e-05, + "loss": 0.1351, + "step": 31825 + }, + { + "epoch": 1.49, + "learning_rate": 1.5169846221372253e-05, + "loss": 0.3434, + "step": 31830 + }, + { + "epoch": 1.49, + "learning_rate": 1.5169062436317465e-05, + "loss": 0.2988, + "step": 31835 + }, + { + "epoch": 1.49, + "learning_rate": 1.5168278651262681e-05, + "loss": 0.0382, + "step": 31840 + }, + { + "epoch": 1.49, + "learning_rate": 1.5167494866207893e-05, + "loss": 0.0455, + "step": 31845 + }, + { + "epoch": 1.49, + "learning_rate": 1.5166711081153105e-05, + "loss": 0.0726, + "step": 31850 + }, + { + "epoch": 1.49, + "learning_rate": 1.516592729609832e-05, + "loss": 0.0954, + "step": 31855 + }, + { + "epoch": 1.49, + "learning_rate": 1.5165143511043531e-05, + "loss": 0.0936, + "step": 31860 + }, + { + "epoch": 1.49, + "learning_rate": 1.5164359725988747e-05, + "loss": 0.1024, + "step": 31865 + }, + { + "epoch": 1.49, + "learning_rate": 1.516357594093396e-05, + "loss": 0.1589, + "step": 31870 + }, + { + "epoch": 1.49, + "learning_rate": 1.5162792155879173e-05, + "loss": 0.2689, + "step": 31875 + }, + { + "epoch": 1.49, + "learning_rate": 1.5162008370824385e-05, + "loss": 0.3105, + "step": 31880 + }, + { + "epoch": 1.49, + "learning_rate": 1.5161224585769601e-05, + "loss": 0.295, + "step": 31885 + }, + { + "epoch": 1.49, + "learning_rate": 1.5160440800714813e-05, + "loss": 0.0471, + "step": 31890 + }, + { + "epoch": 1.49, + "learning_rate": 1.5159657015660027e-05, + "loss": 0.0563, + "step": 31895 + }, + { + "epoch": 1.49, + "learning_rate": 1.515887323060524e-05, + "loss": 0.0521, + "step": 31900 + }, + { + "epoch": 1.49, + "learning_rate": 1.5158089445550455e-05, + "loss": 0.1391, + "step": 31905 + }, + { + "epoch": 1.49, + "learning_rate": 1.5157305660495667e-05, + "loss": 0.0926, + "step": 31910 + }, + { + "epoch": 1.49, + "learning_rate": 1.515652187544088e-05, + "loss": 0.1678, + "step": 31915 + }, + { + "epoch": 1.49, + "learning_rate": 1.5155738090386093e-05, + "loss": 0.1874, + "step": 31920 + }, + { + "epoch": 1.49, + "learning_rate": 1.5154954305331307e-05, + "loss": 0.1741, + "step": 31925 + }, + { + "epoch": 1.49, + "learning_rate": 1.5154170520276521e-05, + "loss": 0.362, + "step": 31930 + }, + { + "epoch": 1.49, + "learning_rate": 1.5153386735221733e-05, + "loss": 0.3195, + "step": 31935 + }, + { + "epoch": 1.49, + "learning_rate": 1.5152602950166949e-05, + "loss": 0.0607, + "step": 31940 + }, + { + "epoch": 1.49, + "learning_rate": 1.5151819165112161e-05, + "loss": 0.0734, + "step": 31945 + }, + { + "epoch": 1.49, + "learning_rate": 1.5151035380057375e-05, + "loss": 0.096, + "step": 31950 + }, + { + "epoch": 1.49, + "learning_rate": 1.5150251595002587e-05, + "loss": 0.1064, + "step": 31955 + }, + { + "epoch": 1.49, + "learning_rate": 1.5149467809947803e-05, + "loss": 0.1528, + "step": 31960 + }, + { + "epoch": 1.49, + "learning_rate": 1.5148684024893015e-05, + "loss": 0.2397, + "step": 31965 + }, + { + "epoch": 1.49, + "learning_rate": 1.5147900239838229e-05, + "loss": 0.2079, + "step": 31970 + }, + { + "epoch": 1.49, + "learning_rate": 1.5147116454783441e-05, + "loss": 0.2747, + "step": 31975 + }, + { + "epoch": 1.49, + "learning_rate": 1.5146332669728653e-05, + "loss": 0.4375, + "step": 31980 + }, + { + "epoch": 1.49, + "learning_rate": 1.5145548884673869e-05, + "loss": 0.348, + "step": 31985 + }, + { + "epoch": 1.49, + "learning_rate": 1.5144765099619081e-05, + "loss": 0.0462, + "step": 31990 + }, + { + "epoch": 1.49, + "learning_rate": 1.5143981314564295e-05, + "loss": 0.078, + "step": 31995 + }, + { + "epoch": 1.49, + "learning_rate": 1.5143197529509507e-05, + "loss": 0.0692, + "step": 32000 + }, + { + "epoch": 1.49, + "learning_rate": 1.5142413744454723e-05, + "loss": 0.0825, + "step": 32005 + }, + { + "epoch": 1.49, + "learning_rate": 1.5141629959399935e-05, + "loss": 0.138, + "step": 32010 + }, + { + "epoch": 1.49, + "learning_rate": 1.5140846174345149e-05, + "loss": 0.1539, + "step": 32015 + }, + { + "epoch": 1.49, + "learning_rate": 1.5140062389290361e-05, + "loss": 0.2308, + "step": 32020 + }, + { + "epoch": 1.49, + "learning_rate": 1.5139278604235577e-05, + "loss": 0.2213, + "step": 32025 + }, + { + "epoch": 1.49, + "learning_rate": 1.5138494819180789e-05, + "loss": 0.2618, + "step": 32030 + }, + { + "epoch": 1.49, + "learning_rate": 1.5137711034126003e-05, + "loss": 0.3362, + "step": 32035 + }, + { + "epoch": 1.5, + "learning_rate": 1.5136927249071217e-05, + "loss": 0.0699, + "step": 32040 + }, + { + "epoch": 1.5, + "learning_rate": 1.5136143464016429e-05, + "loss": 0.0228, + "step": 32045 + }, + { + "epoch": 1.5, + "learning_rate": 1.5135359678961643e-05, + "loss": 0.046, + "step": 32050 + }, + { + "epoch": 1.5, + "learning_rate": 1.5134575893906855e-05, + "loss": 0.0907, + "step": 32055 + }, + { + "epoch": 1.5, + "learning_rate": 1.513379210885207e-05, + "loss": 0.1112, + "step": 32060 + }, + { + "epoch": 1.5, + "learning_rate": 1.5133008323797283e-05, + "loss": 0.1013, + "step": 32065 + }, + { + "epoch": 1.5, + "learning_rate": 1.5132224538742497e-05, + "loss": 0.1556, + "step": 32070 + }, + { + "epoch": 1.5, + "learning_rate": 1.5131440753687709e-05, + "loss": 0.24, + "step": 32075 + }, + { + "epoch": 1.5, + "learning_rate": 1.5130656968632925e-05, + "loss": 0.5122, + "step": 32080 + }, + { + "epoch": 1.5, + "learning_rate": 1.5129873183578137e-05, + "loss": 0.2973, + "step": 32085 + }, + { + "epoch": 1.5, + "learning_rate": 1.512908939852335e-05, + "loss": 0.0492, + "step": 32090 + }, + { + "epoch": 1.5, + "learning_rate": 1.5128305613468563e-05, + "loss": 0.0999, + "step": 32095 + }, + { + "epoch": 1.5, + "learning_rate": 1.5127521828413779e-05, + "loss": 0.0568, + "step": 32100 + }, + { + "epoch": 1.5, + "learning_rate": 1.512673804335899e-05, + "loss": 0.0834, + "step": 32105 + }, + { + "epoch": 1.5, + "learning_rate": 1.5125954258304203e-05, + "loss": 0.0824, + "step": 32110 + }, + { + "epoch": 1.5, + "learning_rate": 1.5125170473249417e-05, + "loss": 0.2455, + "step": 32115 + }, + { + "epoch": 1.5, + "learning_rate": 1.512438668819463e-05, + "loss": 0.1645, + "step": 32120 + }, + { + "epoch": 1.5, + "learning_rate": 1.5123602903139845e-05, + "loss": 0.2583, + "step": 32125 + }, + { + "epoch": 1.5, + "learning_rate": 1.5122819118085057e-05, + "loss": 0.3227, + "step": 32130 + }, + { + "epoch": 1.5, + "learning_rate": 1.512203533303027e-05, + "loss": 0.3561, + "step": 32135 + }, + { + "epoch": 1.5, + "learning_rate": 1.5121251547975485e-05, + "loss": 0.0648, + "step": 32140 + }, + { + "epoch": 1.5, + "learning_rate": 1.5120467762920699e-05, + "loss": 0.016, + "step": 32145 + }, + { + "epoch": 1.5, + "learning_rate": 1.511968397786591e-05, + "loss": 0.1238, + "step": 32150 + }, + { + "epoch": 1.5, + "learning_rate": 1.5118900192811126e-05, + "loss": 0.0738, + "step": 32155 + }, + { + "epoch": 1.5, + "learning_rate": 1.5118116407756339e-05, + "loss": 0.113, + "step": 32160 + }, + { + "epoch": 1.5, + "learning_rate": 1.5117332622701553e-05, + "loss": 0.0999, + "step": 32165 + }, + { + "epoch": 1.5, + "learning_rate": 1.5116548837646765e-05, + "loss": 0.2245, + "step": 32170 + }, + { + "epoch": 1.5, + "learning_rate": 1.5115765052591977e-05, + "loss": 0.3073, + "step": 32175 + }, + { + "epoch": 1.5, + "learning_rate": 1.5114981267537193e-05, + "loss": 0.2557, + "step": 32180 + }, + { + "epoch": 1.5, + "learning_rate": 1.5114197482482405e-05, + "loss": 0.2607, + "step": 32185 + }, + { + "epoch": 1.5, + "learning_rate": 1.5113413697427619e-05, + "loss": 0.064, + "step": 32190 + }, + { + "epoch": 1.5, + "learning_rate": 1.5112629912372831e-05, + "loss": 0.076, + "step": 32195 + }, + { + "epoch": 1.5, + "learning_rate": 1.5111846127318046e-05, + "loss": 0.1157, + "step": 32200 + }, + { + "epoch": 1.5, + "learning_rate": 1.5111062342263259e-05, + "loss": 0.075, + "step": 32205 + }, + { + "epoch": 1.5, + "learning_rate": 1.5110278557208473e-05, + "loss": 0.1556, + "step": 32210 + }, + { + "epoch": 1.5, + "learning_rate": 1.5109494772153685e-05, + "loss": 0.1983, + "step": 32215 + }, + { + "epoch": 1.5, + "learning_rate": 1.51087109870989e-05, + "loss": 0.1951, + "step": 32220 + }, + { + "epoch": 1.5, + "learning_rate": 1.5107927202044113e-05, + "loss": 0.2283, + "step": 32225 + }, + { + "epoch": 1.5, + "learning_rate": 1.5107143416989327e-05, + "loss": 0.3268, + "step": 32230 + }, + { + "epoch": 1.5, + "learning_rate": 1.5106359631934539e-05, + "loss": 0.2575, + "step": 32235 + }, + { + "epoch": 1.5, + "learning_rate": 1.5105575846879753e-05, + "loss": 0.046, + "step": 32240 + }, + { + "epoch": 1.5, + "learning_rate": 1.5104792061824967e-05, + "loss": 0.0554, + "step": 32245 + }, + { + "epoch": 1.5, + "learning_rate": 1.5104008276770179e-05, + "loss": 0.0833, + "step": 32250 + }, + { + "epoch": 1.51, + "learning_rate": 1.5103224491715394e-05, + "loss": 0.0969, + "step": 32255 + }, + { + "epoch": 1.51, + "learning_rate": 1.5102440706660607e-05, + "loss": 0.1418, + "step": 32260 + }, + { + "epoch": 1.51, + "learning_rate": 1.510165692160582e-05, + "loss": 0.1293, + "step": 32265 + }, + { + "epoch": 1.51, + "learning_rate": 1.5100873136551033e-05, + "loss": 0.0968, + "step": 32270 + }, + { + "epoch": 1.51, + "learning_rate": 1.5100089351496248e-05, + "loss": 0.3261, + "step": 32275 + }, + { + "epoch": 1.51, + "learning_rate": 1.509930556644146e-05, + "loss": 0.283, + "step": 32280 + }, + { + "epoch": 1.51, + "learning_rate": 1.5098521781386674e-05, + "loss": 0.3071, + "step": 32285 + }, + { + "epoch": 1.51, + "learning_rate": 1.5097737996331887e-05, + "loss": 0.072, + "step": 32290 + }, + { + "epoch": 1.51, + "learning_rate": 1.5096954211277102e-05, + "loss": 0.1006, + "step": 32295 + }, + { + "epoch": 1.51, + "learning_rate": 1.5096170426222314e-05, + "loss": 0.1003, + "step": 32300 + }, + { + "epoch": 1.51, + "learning_rate": 1.5095386641167527e-05, + "loss": 0.1151, + "step": 32305 + }, + { + "epoch": 1.51, + "learning_rate": 1.509460285611274e-05, + "loss": 0.1184, + "step": 32310 + }, + { + "epoch": 1.51, + "learning_rate": 1.5093819071057953e-05, + "loss": 0.1667, + "step": 32315 + }, + { + "epoch": 1.51, + "learning_rate": 1.5093035286003168e-05, + "loss": 0.1405, + "step": 32320 + }, + { + "epoch": 1.51, + "learning_rate": 1.509225150094838e-05, + "loss": 0.2462, + "step": 32325 + }, + { + "epoch": 1.51, + "learning_rate": 1.5091467715893594e-05, + "loss": 0.2843, + "step": 32330 + }, + { + "epoch": 1.51, + "learning_rate": 1.5090683930838808e-05, + "loss": 0.2082, + "step": 32335 + }, + { + "epoch": 1.51, + "learning_rate": 1.5089900145784022e-05, + "loss": 0.0373, + "step": 32340 + }, + { + "epoch": 1.51, + "learning_rate": 1.5089116360729235e-05, + "loss": 0.0384, + "step": 32345 + }, + { + "epoch": 1.51, + "learning_rate": 1.5088332575674448e-05, + "loss": 0.0444, + "step": 32350 + }, + { + "epoch": 1.51, + "learning_rate": 1.5087548790619662e-05, + "loss": 0.0799, + "step": 32355 + }, + { + "epoch": 1.51, + "learning_rate": 1.5086765005564876e-05, + "loss": 0.1469, + "step": 32360 + }, + { + "epoch": 1.51, + "learning_rate": 1.5085981220510088e-05, + "loss": 0.1759, + "step": 32365 + }, + { + "epoch": 1.51, + "learning_rate": 1.50851974354553e-05, + "loss": 0.1966, + "step": 32370 + }, + { + "epoch": 1.51, + "learning_rate": 1.5084413650400516e-05, + "loss": 0.2456, + "step": 32375 + }, + { + "epoch": 1.51, + "learning_rate": 1.5083629865345728e-05, + "loss": 0.3569, + "step": 32380 + }, + { + "epoch": 1.51, + "learning_rate": 1.5082846080290942e-05, + "loss": 0.2519, + "step": 32385 + }, + { + "epoch": 1.51, + "learning_rate": 1.5082062295236155e-05, + "loss": 0.0463, + "step": 32390 + }, + { + "epoch": 1.51, + "learning_rate": 1.508127851018137e-05, + "loss": 0.0796, + "step": 32395 + }, + { + "epoch": 1.51, + "learning_rate": 1.5080494725126582e-05, + "loss": 0.0918, + "step": 32400 + }, + { + "epoch": 1.51, + "learning_rate": 1.5079710940071796e-05, + "loss": 0.0762, + "step": 32405 + }, + { + "epoch": 1.51, + "learning_rate": 1.5078927155017009e-05, + "loss": 0.0586, + "step": 32410 + }, + { + "epoch": 1.51, + "learning_rate": 1.5078143369962224e-05, + "loss": 0.1551, + "step": 32415 + }, + { + "epoch": 1.51, + "learning_rate": 1.5077359584907436e-05, + "loss": 0.2989, + "step": 32420 + }, + { + "epoch": 1.51, + "learning_rate": 1.507657579985265e-05, + "loss": 0.2065, + "step": 32425 + }, + { + "epoch": 1.51, + "learning_rate": 1.5075792014797862e-05, + "loss": 0.3864, + "step": 32430 + }, + { + "epoch": 1.51, + "learning_rate": 1.5075008229743076e-05, + "loss": 0.2009, + "step": 32435 + }, + { + "epoch": 1.51, + "learning_rate": 1.507422444468829e-05, + "loss": 0.0476, + "step": 32440 + }, + { + "epoch": 1.51, + "learning_rate": 1.5073440659633502e-05, + "loss": 0.1002, + "step": 32445 + }, + { + "epoch": 1.51, + "learning_rate": 1.5072656874578716e-05, + "loss": 0.1062, + "step": 32450 + }, + { + "epoch": 1.51, + "learning_rate": 1.507187308952393e-05, + "loss": 0.0847, + "step": 32455 + }, + { + "epoch": 1.51, + "learning_rate": 1.5071089304469144e-05, + "loss": 0.0809, + "step": 32460 + }, + { + "epoch": 1.51, + "learning_rate": 1.5070305519414356e-05, + "loss": 0.1638, + "step": 32465 + }, + { + "epoch": 1.52, + "learning_rate": 1.5069521734359572e-05, + "loss": 0.2693, + "step": 32470 + }, + { + "epoch": 1.52, + "learning_rate": 1.5068737949304784e-05, + "loss": 0.2582, + "step": 32475 + }, + { + "epoch": 1.52, + "learning_rate": 1.5067954164249998e-05, + "loss": 0.2734, + "step": 32480 + }, + { + "epoch": 1.52, + "learning_rate": 1.506717037919521e-05, + "loss": 0.2822, + "step": 32485 + }, + { + "epoch": 1.52, + "learning_rate": 1.5066386594140426e-05, + "loss": 0.0517, + "step": 32490 + }, + { + "epoch": 1.52, + "learning_rate": 1.5065602809085638e-05, + "loss": 0.0486, + "step": 32495 + }, + { + "epoch": 1.52, + "learning_rate": 1.506481902403085e-05, + "loss": 0.0478, + "step": 32500 + }, + { + "epoch": 1.52, + "learning_rate": 1.5064035238976064e-05, + "loss": 0.1418, + "step": 32505 + }, + { + "epoch": 1.52, + "learning_rate": 1.5063251453921276e-05, + "loss": 0.1221, + "step": 32510 + }, + { + "epoch": 1.52, + "learning_rate": 1.5062467668866492e-05, + "loss": 0.168, + "step": 32515 + }, + { + "epoch": 1.52, + "learning_rate": 1.5061683883811704e-05, + "loss": 0.196, + "step": 32520 + }, + { + "epoch": 1.52, + "learning_rate": 1.5060900098756918e-05, + "loss": 0.2721, + "step": 32525 + }, + { + "epoch": 1.52, + "learning_rate": 1.506011631370213e-05, + "loss": 0.3576, + "step": 32530 + }, + { + "epoch": 1.52, + "learning_rate": 1.5059332528647346e-05, + "loss": 0.3454, + "step": 32535 + }, + { + "epoch": 1.52, + "learning_rate": 1.5058548743592558e-05, + "loss": 0.0558, + "step": 32540 + }, + { + "epoch": 1.52, + "learning_rate": 1.5057764958537772e-05, + "loss": 0.0572, + "step": 32545 + }, + { + "epoch": 1.52, + "learning_rate": 1.5056981173482984e-05, + "loss": 0.0613, + "step": 32550 + }, + { + "epoch": 1.52, + "learning_rate": 1.50561973884282e-05, + "loss": 0.1226, + "step": 32555 + }, + { + "epoch": 1.52, + "learning_rate": 1.5055413603373412e-05, + "loss": 0.1035, + "step": 32560 + }, + { + "epoch": 1.52, + "learning_rate": 1.5054629818318624e-05, + "loss": 0.0922, + "step": 32565 + }, + { + "epoch": 1.52, + "learning_rate": 1.505384603326384e-05, + "loss": 0.2358, + "step": 32570 + }, + { + "epoch": 1.52, + "learning_rate": 1.5053062248209052e-05, + "loss": 0.1497, + "step": 32575 + }, + { + "epoch": 1.52, + "learning_rate": 1.5052278463154266e-05, + "loss": 0.3272, + "step": 32580 + }, + { + "epoch": 1.52, + "learning_rate": 1.5051494678099478e-05, + "loss": 0.3985, + "step": 32585 + }, + { + "epoch": 1.52, + "learning_rate": 1.5050710893044694e-05, + "loss": 0.044, + "step": 32590 + }, + { + "epoch": 1.52, + "learning_rate": 1.5049927107989906e-05, + "loss": 0.1202, + "step": 32595 + }, + { + "epoch": 1.52, + "learning_rate": 1.504914332293512e-05, + "loss": 0.1429, + "step": 32600 + }, + { + "epoch": 1.52, + "learning_rate": 1.5048359537880332e-05, + "loss": 0.0826, + "step": 32605 + }, + { + "epoch": 1.52, + "learning_rate": 1.5047575752825548e-05, + "loss": 0.1068, + "step": 32610 + }, + { + "epoch": 1.52, + "learning_rate": 1.504679196777076e-05, + "loss": 0.2536, + "step": 32615 + }, + { + "epoch": 1.52, + "learning_rate": 1.5046008182715974e-05, + "loss": 0.2464, + "step": 32620 + }, + { + "epoch": 1.52, + "learning_rate": 1.5045224397661186e-05, + "loss": 0.4023, + "step": 32625 + }, + { + "epoch": 1.52, + "learning_rate": 1.5044440612606398e-05, + "loss": 0.4528, + "step": 32630 + }, + { + "epoch": 1.52, + "learning_rate": 1.5043656827551614e-05, + "loss": 0.2417, + "step": 32635 + }, + { + "epoch": 1.52, + "learning_rate": 1.5042873042496826e-05, + "loss": 0.0292, + "step": 32640 + }, + { + "epoch": 1.52, + "learning_rate": 1.504208925744204e-05, + "loss": 0.0988, + "step": 32645 + }, + { + "epoch": 1.52, + "learning_rate": 1.5041305472387254e-05, + "loss": 0.067, + "step": 32650 + }, + { + "epoch": 1.52, + "learning_rate": 1.5040521687332468e-05, + "loss": 0.1081, + "step": 32655 + }, + { + "epoch": 1.52, + "learning_rate": 1.503973790227768e-05, + "loss": 0.1431, + "step": 32660 + }, + { + "epoch": 1.52, + "learning_rate": 1.5038954117222894e-05, + "loss": 0.1415, + "step": 32665 + }, + { + "epoch": 1.52, + "learning_rate": 1.5038170332168108e-05, + "loss": 0.1489, + "step": 32670 + }, + { + "epoch": 1.52, + "learning_rate": 1.5037386547113322e-05, + "loss": 0.187, + "step": 32675 + }, + { + "epoch": 1.52, + "learning_rate": 1.5036602762058534e-05, + "loss": 0.3941, + "step": 32680 + }, + { + "epoch": 1.53, + "learning_rate": 1.503581897700375e-05, + "loss": 0.267, + "step": 32685 + }, + { + "epoch": 1.53, + "learning_rate": 1.5035035191948962e-05, + "loss": 0.0578, + "step": 32690 + }, + { + "epoch": 1.53, + "learning_rate": 1.5034251406894174e-05, + "loss": 0.0391, + "step": 32695 + }, + { + "epoch": 1.53, + "learning_rate": 1.5033467621839388e-05, + "loss": 0.0686, + "step": 32700 + }, + { + "epoch": 1.53, + "learning_rate": 1.50326838367846e-05, + "loss": 0.1086, + "step": 32705 + }, + { + "epoch": 1.53, + "learning_rate": 1.5031900051729816e-05, + "loss": 0.1141, + "step": 32710 + }, + { + "epoch": 1.53, + "learning_rate": 1.5031116266675028e-05, + "loss": 0.1051, + "step": 32715 + }, + { + "epoch": 1.53, + "learning_rate": 1.5030332481620242e-05, + "loss": 0.2131, + "step": 32720 + }, + { + "epoch": 1.53, + "learning_rate": 1.5029548696565454e-05, + "loss": 0.2887, + "step": 32725 + }, + { + "epoch": 1.53, + "learning_rate": 1.502876491151067e-05, + "loss": 0.4061, + "step": 32730 + }, + { + "epoch": 1.53, + "learning_rate": 1.5027981126455882e-05, + "loss": 0.3098, + "step": 32735 + }, + { + "epoch": 1.53, + "learning_rate": 1.5027197341401096e-05, + "loss": 0.0542, + "step": 32740 + }, + { + "epoch": 1.53, + "learning_rate": 1.5026413556346308e-05, + "loss": 0.0951, + "step": 32745 + }, + { + "epoch": 1.53, + "learning_rate": 1.5025629771291524e-05, + "loss": 0.0702, + "step": 32750 + }, + { + "epoch": 1.53, + "learning_rate": 1.5024845986236736e-05, + "loss": 0.0914, + "step": 32755 + }, + { + "epoch": 1.53, + "learning_rate": 1.5024062201181948e-05, + "loss": 0.1323, + "step": 32760 + }, + { + "epoch": 1.53, + "learning_rate": 1.5023278416127162e-05, + "loss": 0.1518, + "step": 32765 + }, + { + "epoch": 1.53, + "learning_rate": 1.5022494631072376e-05, + "loss": 0.2011, + "step": 32770 + }, + { + "epoch": 1.53, + "learning_rate": 1.502171084601759e-05, + "loss": 0.1865, + "step": 32775 + }, + { + "epoch": 1.53, + "learning_rate": 1.5020927060962802e-05, + "loss": 0.328, + "step": 32780 + }, + { + "epoch": 1.53, + "learning_rate": 1.5020143275908018e-05, + "loss": 0.3728, + "step": 32785 + }, + { + "epoch": 1.53, + "learning_rate": 1.501935949085323e-05, + "loss": 0.0544, + "step": 32790 + }, + { + "epoch": 1.53, + "learning_rate": 1.5018575705798444e-05, + "loss": 0.0543, + "step": 32795 + }, + { + "epoch": 1.53, + "learning_rate": 1.5017791920743656e-05, + "loss": 0.0897, + "step": 32800 + }, + { + "epoch": 1.53, + "learning_rate": 1.5017008135688871e-05, + "loss": 0.1329, + "step": 32805 + }, + { + "epoch": 1.53, + "learning_rate": 1.5016224350634084e-05, + "loss": 0.0871, + "step": 32810 + }, + { + "epoch": 1.53, + "learning_rate": 1.5015440565579298e-05, + "loss": 0.1539, + "step": 32815 + }, + { + "epoch": 1.53, + "learning_rate": 1.501465678052451e-05, + "loss": 0.2018, + "step": 32820 + }, + { + "epoch": 1.53, + "learning_rate": 1.5013872995469722e-05, + "loss": 0.1788, + "step": 32825 + }, + { + "epoch": 1.53, + "learning_rate": 1.5013089210414938e-05, + "loss": 0.4134, + "step": 32830 + }, + { + "epoch": 1.53, + "learning_rate": 1.501230542536015e-05, + "loss": 0.3125, + "step": 32835 + }, + { + "epoch": 1.53, + "learning_rate": 1.5011521640305364e-05, + "loss": 0.0423, + "step": 32840 + }, + { + "epoch": 1.53, + "learning_rate": 1.5010737855250576e-05, + "loss": 0.0694, + "step": 32845 + }, + { + "epoch": 1.53, + "learning_rate": 1.5009954070195792e-05, + "loss": 0.0747, + "step": 32850 + }, + { + "epoch": 1.53, + "learning_rate": 1.5009170285141004e-05, + "loss": 0.1646, + "step": 32855 + }, + { + "epoch": 1.53, + "learning_rate": 1.5008386500086218e-05, + "loss": 0.1413, + "step": 32860 + }, + { + "epoch": 1.53, + "learning_rate": 1.500760271503143e-05, + "loss": 0.1363, + "step": 32865 + }, + { + "epoch": 1.53, + "learning_rate": 1.5006818929976645e-05, + "loss": 0.1389, + "step": 32870 + }, + { + "epoch": 1.53, + "learning_rate": 1.5006035144921858e-05, + "loss": 0.313, + "step": 32875 + }, + { + "epoch": 1.53, + "learning_rate": 1.5005251359867072e-05, + "loss": 0.4249, + "step": 32880 + }, + { + "epoch": 1.53, + "learning_rate": 1.5004467574812286e-05, + "loss": 0.2864, + "step": 32885 + }, + { + "epoch": 1.53, + "learning_rate": 1.5003683789757498e-05, + "loss": 0.0551, + "step": 32890 + }, + { + "epoch": 1.53, + "learning_rate": 1.5002900004702712e-05, + "loss": 0.0842, + "step": 32895 + }, + { + "epoch": 1.54, + "learning_rate": 1.5002116219647924e-05, + "loss": 0.0557, + "step": 32900 + }, + { + "epoch": 1.54, + "learning_rate": 1.500133243459314e-05, + "loss": 0.1035, + "step": 32905 + }, + { + "epoch": 1.54, + "learning_rate": 1.5000548649538352e-05, + "loss": 0.0518, + "step": 32910 + }, + { + "epoch": 1.54, + "learning_rate": 1.4999764864483566e-05, + "loss": 0.1531, + "step": 32915 + }, + { + "epoch": 1.54, + "learning_rate": 1.4998981079428778e-05, + "loss": 0.216, + "step": 32920 + }, + { + "epoch": 1.54, + "learning_rate": 1.4998197294373993e-05, + "loss": 0.1716, + "step": 32925 + }, + { + "epoch": 1.54, + "learning_rate": 1.4997413509319206e-05, + "loss": 0.39, + "step": 32930 + }, + { + "epoch": 1.54, + "learning_rate": 1.499662972426442e-05, + "loss": 0.2803, + "step": 32935 + }, + { + "epoch": 1.54, + "learning_rate": 1.4995845939209632e-05, + "loss": 0.1181, + "step": 32940 + }, + { + "epoch": 1.54, + "learning_rate": 1.4995062154154847e-05, + "loss": 0.0665, + "step": 32945 + }, + { + "epoch": 1.54, + "learning_rate": 1.499427836910006e-05, + "loss": 0.0879, + "step": 32950 + }, + { + "epoch": 1.54, + "learning_rate": 1.4993494584045272e-05, + "loss": 0.111, + "step": 32955 + }, + { + "epoch": 1.54, + "learning_rate": 1.4992710798990486e-05, + "loss": 0.1512, + "step": 32960 + }, + { + "epoch": 1.54, + "learning_rate": 1.49919270139357e-05, + "loss": 0.1847, + "step": 32965 + }, + { + "epoch": 1.54, + "learning_rate": 1.4991143228880913e-05, + "loss": 0.1602, + "step": 32970 + }, + { + "epoch": 1.54, + "learning_rate": 1.4990359443826126e-05, + "loss": 0.3279, + "step": 32975 + }, + { + "epoch": 1.54, + "learning_rate": 1.498957565877134e-05, + "loss": 0.2164, + "step": 32980 + }, + { + "epoch": 1.54, + "learning_rate": 1.4988791873716553e-05, + "loss": 0.2688, + "step": 32985 + }, + { + "epoch": 1.54, + "learning_rate": 1.4988008088661767e-05, + "loss": 0.0506, + "step": 32990 + }, + { + "epoch": 1.54, + "learning_rate": 1.498722430360698e-05, + "loss": 0.0692, + "step": 32995 + }, + { + "epoch": 1.54, + "learning_rate": 1.4986440518552195e-05, + "loss": 0.1243, + "step": 33000 + }, + { + "epoch": 1.54, + "learning_rate": 1.4985656733497407e-05, + "loss": 0.1195, + "step": 33005 + }, + { + "epoch": 1.54, + "learning_rate": 1.4984872948442621e-05, + "loss": 0.1462, + "step": 33010 + }, + { + "epoch": 1.54, + "learning_rate": 1.4984089163387834e-05, + "loss": 0.121, + "step": 33015 + }, + { + "epoch": 1.54, + "learning_rate": 1.4983305378333046e-05, + "loss": 0.1771, + "step": 33020 + }, + { + "epoch": 1.54, + "learning_rate": 1.4982521593278261e-05, + "loss": 0.1746, + "step": 33025 + }, + { + "epoch": 1.54, + "learning_rate": 1.4981737808223474e-05, + "loss": 0.3095, + "step": 33030 + }, + { + "epoch": 1.54, + "learning_rate": 1.4980954023168687e-05, + "loss": 0.2838, + "step": 33035 + }, + { + "epoch": 1.54, + "learning_rate": 1.49801702381139e-05, + "loss": 0.0275, + "step": 33040 + }, + { + "epoch": 1.54, + "learning_rate": 1.4979386453059115e-05, + "loss": 0.0868, + "step": 33045 + }, + { + "epoch": 1.54, + "learning_rate": 1.4978602668004327e-05, + "loss": 0.0809, + "step": 33050 + }, + { + "epoch": 1.54, + "learning_rate": 1.4977818882949541e-05, + "loss": 0.1187, + "step": 33055 + }, + { + "epoch": 1.54, + "learning_rate": 1.4977035097894754e-05, + "loss": 0.1628, + "step": 33060 + }, + { + "epoch": 1.54, + "learning_rate": 1.497625131283997e-05, + "loss": 0.1992, + "step": 33065 + }, + { + "epoch": 1.54, + "learning_rate": 1.4975467527785181e-05, + "loss": 0.2054, + "step": 33070 + }, + { + "epoch": 1.54, + "learning_rate": 1.4974683742730395e-05, + "loss": 0.2641, + "step": 33075 + }, + { + "epoch": 1.54, + "learning_rate": 1.4973899957675608e-05, + "loss": 0.4842, + "step": 33080 + }, + { + "epoch": 1.54, + "learning_rate": 1.4973116172620821e-05, + "loss": 0.4444, + "step": 33085 + }, + { + "epoch": 1.54, + "learning_rate": 1.4972332387566035e-05, + "loss": 0.0517, + "step": 33090 + }, + { + "epoch": 1.54, + "learning_rate": 1.4971548602511248e-05, + "loss": 0.0611, + "step": 33095 + }, + { + "epoch": 1.54, + "learning_rate": 1.4970764817456463e-05, + "loss": 0.0702, + "step": 33100 + }, + { + "epoch": 1.54, + "learning_rate": 1.4969981032401675e-05, + "loss": 0.143, + "step": 33105 + }, + { + "epoch": 1.54, + "learning_rate": 1.496919724734689e-05, + "loss": 0.1304, + "step": 33110 + }, + { + "epoch": 1.55, + "learning_rate": 1.4968413462292101e-05, + "loss": 0.1834, + "step": 33115 + }, + { + "epoch": 1.55, + "learning_rate": 1.4967629677237317e-05, + "loss": 0.1337, + "step": 33120 + }, + { + "epoch": 1.55, + "learning_rate": 1.496684589218253e-05, + "loss": 0.213, + "step": 33125 + }, + { + "epoch": 1.55, + "learning_rate": 1.49662188641387e-05, + "loss": 0.3345, + "step": 33130 + }, + { + "epoch": 1.55, + "learning_rate": 1.4965435079083914e-05, + "loss": 0.307, + "step": 33135 + }, + { + "epoch": 1.55, + "learning_rate": 1.4964651294029126e-05, + "loss": 0.063, + "step": 33140 + }, + { + "epoch": 1.55, + "learning_rate": 1.496386750897434e-05, + "loss": 0.0567, + "step": 33145 + }, + { + "epoch": 1.55, + "learning_rate": 1.4963083723919554e-05, + "loss": 0.0812, + "step": 33150 + }, + { + "epoch": 1.55, + "learning_rate": 1.4962299938864766e-05, + "loss": 0.0735, + "step": 33155 + }, + { + "epoch": 1.55, + "learning_rate": 1.4961516153809981e-05, + "loss": 0.1804, + "step": 33160 + }, + { + "epoch": 1.55, + "learning_rate": 1.4960732368755194e-05, + "loss": 0.125, + "step": 33165 + }, + { + "epoch": 1.55, + "learning_rate": 1.4959948583700408e-05, + "loss": 0.2112, + "step": 33170 + }, + { + "epoch": 1.55, + "learning_rate": 1.495916479864562e-05, + "loss": 0.2541, + "step": 33175 + }, + { + "epoch": 1.55, + "learning_rate": 1.4958381013590835e-05, + "loss": 0.2793, + "step": 33180 + }, + { + "epoch": 1.55, + "learning_rate": 1.4957597228536048e-05, + "loss": 0.242, + "step": 33185 + }, + { + "epoch": 1.55, + "learning_rate": 1.4956813443481262e-05, + "loss": 0.0286, + "step": 33190 + }, + { + "epoch": 1.55, + "learning_rate": 1.4956029658426474e-05, + "loss": 0.057, + "step": 33195 + }, + { + "epoch": 1.55, + "learning_rate": 1.495524587337169e-05, + "loss": 0.087, + "step": 33200 + }, + { + "epoch": 1.55, + "learning_rate": 1.4954462088316902e-05, + "loss": 0.0883, + "step": 33205 + }, + { + "epoch": 1.55, + "learning_rate": 1.4953678303262114e-05, + "loss": 0.1055, + "step": 33210 + }, + { + "epoch": 1.55, + "learning_rate": 1.4952894518207328e-05, + "loss": 0.4157, + "step": 33215 + }, + { + "epoch": 1.55, + "learning_rate": 1.495211073315254e-05, + "loss": 0.1814, + "step": 33220 + }, + { + "epoch": 1.55, + "learning_rate": 1.4951326948097755e-05, + "loss": 0.301, + "step": 33225 + }, + { + "epoch": 1.55, + "learning_rate": 1.4950543163042968e-05, + "loss": 0.3661, + "step": 33230 + }, + { + "epoch": 1.55, + "learning_rate": 1.4949759377988182e-05, + "loss": 0.2502, + "step": 33235 + }, + { + "epoch": 1.55, + "learning_rate": 1.4948975592933394e-05, + "loss": 0.0987, + "step": 33240 + }, + { + "epoch": 1.55, + "learning_rate": 1.494819180787861e-05, + "loss": 0.0505, + "step": 33245 + }, + { + "epoch": 1.55, + "learning_rate": 1.4947408022823822e-05, + "loss": 0.1345, + "step": 33250 + }, + { + "epoch": 1.55, + "learning_rate": 1.4946624237769036e-05, + "loss": 0.1235, + "step": 33255 + }, + { + "epoch": 1.55, + "learning_rate": 1.494584045271425e-05, + "loss": 0.1194, + "step": 33260 + }, + { + "epoch": 1.55, + "learning_rate": 1.4945056667659463e-05, + "loss": 0.1738, + "step": 33265 + }, + { + "epoch": 1.55, + "learning_rate": 1.4944272882604676e-05, + "loss": 0.2238, + "step": 33270 + }, + { + "epoch": 1.55, + "learning_rate": 1.4943489097549888e-05, + "loss": 0.295, + "step": 33275 + }, + { + "epoch": 1.55, + "learning_rate": 1.4942705312495103e-05, + "loss": 0.488, + "step": 33280 + }, + { + "epoch": 1.55, + "learning_rate": 1.4941921527440316e-05, + "loss": 0.392, + "step": 33285 + }, + { + "epoch": 1.55, + "learning_rate": 1.494113774238553e-05, + "loss": 0.0366, + "step": 33290 + }, + { + "epoch": 1.55, + "learning_rate": 1.4940353957330742e-05, + "loss": 0.0348, + "step": 33295 + }, + { + "epoch": 1.55, + "learning_rate": 1.4939570172275957e-05, + "loss": 0.0887, + "step": 33300 + }, + { + "epoch": 1.55, + "learning_rate": 1.493878638722117e-05, + "loss": 0.1132, + "step": 33305 + }, + { + "epoch": 1.55, + "learning_rate": 1.4938002602166383e-05, + "loss": 0.1445, + "step": 33310 + }, + { + "epoch": 1.55, + "learning_rate": 1.4937218817111596e-05, + "loss": 0.0744, + "step": 33315 + }, + { + "epoch": 1.55, + "learning_rate": 1.4936435032056811e-05, + "loss": 0.2349, + "step": 33320 + }, + { + "epoch": 1.55, + "learning_rate": 1.4935651247002023e-05, + "loss": 0.3013, + "step": 33325 + }, + { + "epoch": 1.56, + "learning_rate": 1.4934867461947237e-05, + "loss": 0.3091, + "step": 33330 + }, + { + "epoch": 1.56, + "learning_rate": 1.493408367689245e-05, + "loss": 0.3184, + "step": 33335 + }, + { + "epoch": 1.56, + "learning_rate": 1.4933299891837663e-05, + "loss": 0.0297, + "step": 33340 + }, + { + "epoch": 1.56, + "learning_rate": 1.4932516106782877e-05, + "loss": 0.0657, + "step": 33345 + }, + { + "epoch": 1.56, + "learning_rate": 1.493173232172809e-05, + "loss": 0.0838, + "step": 33350 + }, + { + "epoch": 1.56, + "learning_rate": 1.4930948536673303e-05, + "loss": 0.0415, + "step": 33355 + }, + { + "epoch": 1.56, + "learning_rate": 1.4930164751618517e-05, + "loss": 0.1043, + "step": 33360 + }, + { + "epoch": 1.56, + "learning_rate": 1.4929380966563731e-05, + "loss": 0.1528, + "step": 33365 + }, + { + "epoch": 1.56, + "learning_rate": 1.4928597181508943e-05, + "loss": 0.1359, + "step": 33370 + }, + { + "epoch": 1.56, + "learning_rate": 1.4927813396454159e-05, + "loss": 0.2899, + "step": 33375 + }, + { + "epoch": 1.56, + "learning_rate": 1.4927029611399371e-05, + "loss": 0.3585, + "step": 33380 + }, + { + "epoch": 1.56, + "learning_rate": 1.4926245826344585e-05, + "loss": 0.3395, + "step": 33385 + }, + { + "epoch": 1.56, + "learning_rate": 1.4925462041289797e-05, + "loss": 0.0484, + "step": 33390 + }, + { + "epoch": 1.56, + "learning_rate": 1.4924678256235013e-05, + "loss": 0.0426, + "step": 33395 + }, + { + "epoch": 1.56, + "learning_rate": 1.4923894471180225e-05, + "loss": 0.0988, + "step": 33400 + }, + { + "epoch": 1.56, + "learning_rate": 1.4923110686125437e-05, + "loss": 0.1479, + "step": 33405 + }, + { + "epoch": 1.56, + "learning_rate": 1.4922326901070651e-05, + "loss": 0.1827, + "step": 33410 + }, + { + "epoch": 1.56, + "learning_rate": 1.4921543116015864e-05, + "loss": 0.1848, + "step": 33415 + }, + { + "epoch": 1.56, + "learning_rate": 1.492075933096108e-05, + "loss": 0.1389, + "step": 33420 + }, + { + "epoch": 1.56, + "learning_rate": 1.4919975545906291e-05, + "loss": 0.2672, + "step": 33425 + }, + { + "epoch": 1.56, + "learning_rate": 1.4919191760851505e-05, + "loss": 0.3146, + "step": 33430 + }, + { + "epoch": 1.56, + "learning_rate": 1.4918407975796717e-05, + "loss": 0.206, + "step": 33435 + }, + { + "epoch": 1.56, + "learning_rate": 1.4917624190741933e-05, + "loss": 0.0536, + "step": 33440 + }, + { + "epoch": 1.56, + "learning_rate": 1.4916840405687145e-05, + "loss": 0.0521, + "step": 33445 + }, + { + "epoch": 1.56, + "learning_rate": 1.491605662063236e-05, + "loss": 0.053, + "step": 33450 + }, + { + "epoch": 1.56, + "learning_rate": 1.4915272835577571e-05, + "loss": 0.1253, + "step": 33455 + }, + { + "epoch": 1.56, + "learning_rate": 1.4914489050522787e-05, + "loss": 0.1643, + "step": 33460 + }, + { + "epoch": 1.56, + "learning_rate": 1.4913705265468e-05, + "loss": 0.1678, + "step": 33465 + }, + { + "epoch": 1.56, + "learning_rate": 1.4912921480413211e-05, + "loss": 0.1316, + "step": 33470 + }, + { + "epoch": 1.56, + "learning_rate": 1.4912137695358427e-05, + "loss": 0.3028, + "step": 33475 + }, + { + "epoch": 1.56, + "learning_rate": 1.491135391030364e-05, + "loss": 0.4472, + "step": 33480 + }, + { + "epoch": 1.56, + "learning_rate": 1.4910570125248853e-05, + "loss": 0.2954, + "step": 33485 + }, + { + "epoch": 1.56, + "learning_rate": 1.4909786340194065e-05, + "loss": 0.0151, + "step": 33490 + }, + { + "epoch": 1.56, + "learning_rate": 1.4909002555139281e-05, + "loss": 0.0831, + "step": 33495 + }, + { + "epoch": 1.56, + "learning_rate": 1.4908218770084493e-05, + "loss": 0.0586, + "step": 33500 + }, + { + "epoch": 1.56, + "learning_rate": 1.4907434985029707e-05, + "loss": 0.1487, + "step": 33505 + }, + { + "epoch": 1.56, + "learning_rate": 1.490665119997492e-05, + "loss": 0.0689, + "step": 33510 + }, + { + "epoch": 1.56, + "learning_rate": 1.4905867414920135e-05, + "loss": 0.1569, + "step": 33515 + }, + { + "epoch": 1.56, + "learning_rate": 1.4905083629865347e-05, + "loss": 0.1794, + "step": 33520 + }, + { + "epoch": 1.56, + "learning_rate": 1.4904299844810561e-05, + "loss": 0.2747, + "step": 33525 + }, + { + "epoch": 1.56, + "learning_rate": 1.4903516059755773e-05, + "loss": 0.3703, + "step": 33530 + }, + { + "epoch": 1.56, + "learning_rate": 1.4902732274700985e-05, + "loss": 0.336, + "step": 33535 + }, + { + "epoch": 1.57, + "learning_rate": 1.4901948489646201e-05, + "loss": 0.0809, + "step": 33540 + }, + { + "epoch": 1.57, + "learning_rate": 1.4901164704591413e-05, + "loss": 0.0574, + "step": 33545 + }, + { + "epoch": 1.57, + "learning_rate": 1.4900380919536627e-05, + "loss": 0.0474, + "step": 33550 + }, + { + "epoch": 1.57, + "learning_rate": 1.489959713448184e-05, + "loss": 0.1294, + "step": 33555 + }, + { + "epoch": 1.57, + "learning_rate": 1.4898813349427055e-05, + "loss": 0.1134, + "step": 33560 + }, + { + "epoch": 1.57, + "learning_rate": 1.4898029564372267e-05, + "loss": 0.1304, + "step": 33565 + }, + { + "epoch": 1.57, + "learning_rate": 1.4897245779317481e-05, + "loss": 0.1349, + "step": 33570 + }, + { + "epoch": 1.57, + "learning_rate": 1.4896461994262695e-05, + "loss": 0.1339, + "step": 33575 + }, + { + "epoch": 1.57, + "learning_rate": 1.4895678209207909e-05, + "loss": 0.3101, + "step": 33580 + }, + { + "epoch": 1.57, + "learning_rate": 1.4894894424153121e-05, + "loss": 0.2441, + "step": 33585 + }, + { + "epoch": 1.57, + "learning_rate": 1.4894110639098337e-05, + "loss": 0.0574, + "step": 33590 + }, + { + "epoch": 1.57, + "learning_rate": 1.4893326854043549e-05, + "loss": 0.1494, + "step": 33595 + }, + { + "epoch": 1.57, + "learning_rate": 1.4892543068988761e-05, + "loss": 0.0818, + "step": 33600 + }, + { + "epoch": 1.57, + "learning_rate": 1.4891759283933975e-05, + "loss": 0.0343, + "step": 33605 + }, + { + "epoch": 1.57, + "learning_rate": 1.4890975498879187e-05, + "loss": 0.0497, + "step": 33610 + }, + { + "epoch": 1.57, + "learning_rate": 1.4890191713824403e-05, + "loss": 0.1251, + "step": 33615 + }, + { + "epoch": 1.57, + "learning_rate": 1.4889407928769615e-05, + "loss": 0.1593, + "step": 33620 + }, + { + "epoch": 1.57, + "learning_rate": 1.4888624143714829e-05, + "loss": 0.1829, + "step": 33625 + }, + { + "epoch": 1.57, + "learning_rate": 1.4887840358660041e-05, + "loss": 0.4039, + "step": 33630 + }, + { + "epoch": 1.57, + "learning_rate": 1.4887056573605257e-05, + "loss": 0.2516, + "step": 33635 + }, + { + "epoch": 1.57, + "learning_rate": 1.4886272788550469e-05, + "loss": 0.0328, + "step": 33640 + }, + { + "epoch": 1.57, + "learning_rate": 1.4885489003495683e-05, + "loss": 0.0959, + "step": 33645 + }, + { + "epoch": 1.57, + "learning_rate": 1.4884705218440895e-05, + "loss": 0.0733, + "step": 33650 + }, + { + "epoch": 1.57, + "learning_rate": 1.488392143338611e-05, + "loss": 0.0875, + "step": 33655 + }, + { + "epoch": 1.57, + "learning_rate": 1.4883137648331323e-05, + "loss": 0.0994, + "step": 33660 + }, + { + "epoch": 1.57, + "learning_rate": 1.4882353863276535e-05, + "loss": 0.1776, + "step": 33665 + }, + { + "epoch": 1.57, + "learning_rate": 1.4881570078221749e-05, + "loss": 0.1794, + "step": 33670 + }, + { + "epoch": 1.57, + "learning_rate": 1.4880786293166963e-05, + "loss": 0.2112, + "step": 33675 + }, + { + "epoch": 1.57, + "learning_rate": 1.4880002508112177e-05, + "loss": 0.2123, + "step": 33680 + }, + { + "epoch": 1.57, + "learning_rate": 1.4879218723057389e-05, + "loss": 0.4273, + "step": 33685 + }, + { + "epoch": 1.57, + "learning_rate": 1.4878434938002605e-05, + "loss": 0.0487, + "step": 33690 + }, + { + "epoch": 1.57, + "learning_rate": 1.4877651152947817e-05, + "loss": 0.0653, + "step": 33695 + }, + { + "epoch": 1.57, + "learning_rate": 1.487686736789303e-05, + "loss": 0.0647, + "step": 33700 + }, + { + "epoch": 1.57, + "learning_rate": 1.4876083582838243e-05, + "loss": 0.0774, + "step": 33705 + }, + { + "epoch": 1.57, + "learning_rate": 1.4875299797783459e-05, + "loss": 0.1084, + "step": 33710 + }, + { + "epoch": 1.57, + "learning_rate": 1.487451601272867e-05, + "loss": 0.1575, + "step": 33715 + }, + { + "epoch": 1.57, + "learning_rate": 1.4873732227673885e-05, + "loss": 0.1553, + "step": 33720 + }, + { + "epoch": 1.57, + "learning_rate": 1.4872948442619097e-05, + "loss": 0.2492, + "step": 33725 + }, + { + "epoch": 1.57, + "learning_rate": 1.4872164657564309e-05, + "loss": 0.2886, + "step": 33730 + }, + { + "epoch": 1.57, + "learning_rate": 1.4871380872509525e-05, + "loss": 0.2957, + "step": 33735 + }, + { + "epoch": 1.57, + "learning_rate": 1.4870597087454737e-05, + "loss": 0.0997, + "step": 33740 + }, + { + "epoch": 1.57, + "learning_rate": 1.486981330239995e-05, + "loss": 0.0764, + "step": 33745 + }, + { + "epoch": 1.57, + "learning_rate": 1.4869029517345163e-05, + "loss": 0.0875, + "step": 33750 + }, + { + "epoch": 1.58, + "learning_rate": 1.4868245732290379e-05, + "loss": 0.1655, + "step": 33755 + }, + { + "epoch": 1.58, + "learning_rate": 1.4867461947235591e-05, + "loss": 0.0998, + "step": 33760 + }, + { + "epoch": 1.58, + "learning_rate": 1.4866678162180805e-05, + "loss": 0.1561, + "step": 33765 + }, + { + "epoch": 1.58, + "learning_rate": 1.4865894377126017e-05, + "loss": 0.1504, + "step": 33770 + }, + { + "epoch": 1.58, + "learning_rate": 1.4865110592071233e-05, + "loss": 0.2526, + "step": 33775 + }, + { + "epoch": 1.58, + "learning_rate": 1.4864326807016445e-05, + "loss": 0.3547, + "step": 33780 + }, + { + "epoch": 1.58, + "learning_rate": 1.4863543021961659e-05, + "loss": 0.2778, + "step": 33785 + }, + { + "epoch": 1.58, + "learning_rate": 1.4862759236906873e-05, + "loss": 0.0651, + "step": 33790 + }, + { + "epoch": 1.58, + "learning_rate": 1.4861975451852085e-05, + "loss": 0.0651, + "step": 33795 + }, + { + "epoch": 1.58, + "learning_rate": 1.4861191666797299e-05, + "loss": 0.1012, + "step": 33800 + }, + { + "epoch": 1.58, + "learning_rate": 1.4860407881742511e-05, + "loss": 0.1282, + "step": 33805 + }, + { + "epoch": 1.58, + "learning_rate": 1.4859624096687727e-05, + "loss": 0.163, + "step": 33810 + }, + { + "epoch": 1.58, + "learning_rate": 1.4858840311632939e-05, + "loss": 0.221, + "step": 33815 + }, + { + "epoch": 1.58, + "learning_rate": 1.4858056526578153e-05, + "loss": 0.1405, + "step": 33820 + }, + { + "epoch": 1.58, + "learning_rate": 1.4857272741523365e-05, + "loss": 0.3016, + "step": 33825 + }, + { + "epoch": 1.58, + "learning_rate": 1.485648895646858e-05, + "loss": 0.4393, + "step": 33830 + }, + { + "epoch": 1.58, + "learning_rate": 1.4855705171413793e-05, + "loss": 0.2873, + "step": 33835 + }, + { + "epoch": 1.58, + "learning_rate": 1.4854921386359007e-05, + "loss": 0.0466, + "step": 33840 + }, + { + "epoch": 1.58, + "learning_rate": 1.4854137601304219e-05, + "loss": 0.0364, + "step": 33845 + }, + { + "epoch": 1.58, + "learning_rate": 1.4853353816249434e-05, + "loss": 0.1092, + "step": 33850 + }, + { + "epoch": 1.58, + "learning_rate": 1.4852570031194647e-05, + "loss": 0.1333, + "step": 33855 + }, + { + "epoch": 1.58, + "learning_rate": 1.4851786246139859e-05, + "loss": 0.1323, + "step": 33860 + }, + { + "epoch": 1.58, + "learning_rate": 1.4851002461085073e-05, + "loss": 0.1203, + "step": 33865 + }, + { + "epoch": 1.58, + "learning_rate": 1.4850218676030285e-05, + "loss": 0.1091, + "step": 33870 + }, + { + "epoch": 1.58, + "learning_rate": 1.48494348909755e-05, + "loss": 0.2064, + "step": 33875 + }, + { + "epoch": 1.58, + "learning_rate": 1.4848651105920713e-05, + "loss": 0.3023, + "step": 33880 + }, + { + "epoch": 1.58, + "learning_rate": 1.4847867320865927e-05, + "loss": 0.3641, + "step": 33885 + }, + { + "epoch": 1.58, + "learning_rate": 1.484708353581114e-05, + "loss": 0.0508, + "step": 33890 + }, + { + "epoch": 1.58, + "learning_rate": 1.4846299750756354e-05, + "loss": 0.0378, + "step": 33895 + }, + { + "epoch": 1.58, + "learning_rate": 1.4845515965701567e-05, + "loss": 0.1599, + "step": 33900 + }, + { + "epoch": 1.58, + "learning_rate": 1.4844732180646782e-05, + "loss": 0.0987, + "step": 33905 + }, + { + "epoch": 1.58, + "learning_rate": 1.4843948395591994e-05, + "loss": 0.0775, + "step": 33910 + }, + { + "epoch": 1.58, + "learning_rate": 1.4843164610537208e-05, + "loss": 0.1141, + "step": 33915 + }, + { + "epoch": 1.58, + "learning_rate": 1.484238082548242e-05, + "loss": 0.1908, + "step": 33920 + }, + { + "epoch": 1.58, + "learning_rate": 1.4841597040427633e-05, + "loss": 0.2086, + "step": 33925 + }, + { + "epoch": 1.58, + "learning_rate": 1.4840813255372848e-05, + "loss": 0.3953, + "step": 33930 + }, + { + "epoch": 1.58, + "learning_rate": 1.484002947031806e-05, + "loss": 0.3082, + "step": 33935 + }, + { + "epoch": 1.58, + "learning_rate": 1.4839245685263275e-05, + "loss": 0.0498, + "step": 33940 + }, + { + "epoch": 1.58, + "learning_rate": 1.4838461900208487e-05, + "loss": 0.0308, + "step": 33945 + }, + { + "epoch": 1.58, + "learning_rate": 1.4837678115153702e-05, + "loss": 0.0608, + "step": 33950 + }, + { + "epoch": 1.58, + "learning_rate": 1.4836894330098915e-05, + "loss": 0.1048, + "step": 33955 + }, + { + "epoch": 1.58, + "learning_rate": 1.4836110545044128e-05, + "loss": 0.1423, + "step": 33960 + }, + { + "epoch": 1.58, + "learning_rate": 1.483532675998934e-05, + "loss": 0.175, + "step": 33965 + }, + { + "epoch": 1.59, + "learning_rate": 1.4834542974934556e-05, + "loss": 0.1559, + "step": 33970 + }, + { + "epoch": 1.59, + "learning_rate": 1.4833759189879768e-05, + "loss": 0.2204, + "step": 33975 + }, + { + "epoch": 1.59, + "learning_rate": 1.4832975404824982e-05, + "loss": 0.3428, + "step": 33980 + }, + { + "epoch": 1.59, + "learning_rate": 1.4832191619770195e-05, + "loss": 0.2474, + "step": 33985 + }, + { + "epoch": 1.59, + "learning_rate": 1.4831407834715409e-05, + "loss": 0.0378, + "step": 33990 + }, + { + "epoch": 1.59, + "learning_rate": 1.4830624049660622e-05, + "loss": 0.0739, + "step": 33995 + }, + { + "epoch": 1.59, + "learning_rate": 1.4829840264605835e-05, + "loss": 0.0936, + "step": 34000 + }, + { + "epoch": 1.59, + "learning_rate": 1.482905647955105e-05, + "loss": 0.0705, + "step": 34005 + }, + { + "epoch": 1.59, + "learning_rate": 1.4828272694496262e-05, + "loss": 0.0965, + "step": 34010 + }, + { + "epoch": 1.59, + "learning_rate": 1.4827488909441476e-05, + "loss": 0.0939, + "step": 34015 + }, + { + "epoch": 1.59, + "learning_rate": 1.4826705124386689e-05, + "loss": 0.1264, + "step": 34020 + }, + { + "epoch": 1.59, + "learning_rate": 1.4825921339331904e-05, + "loss": 0.1568, + "step": 34025 + }, + { + "epoch": 1.59, + "learning_rate": 1.4825137554277116e-05, + "loss": 0.2927, + "step": 34030 + }, + { + "epoch": 1.59, + "learning_rate": 1.482435376922233e-05, + "loss": 0.3461, + "step": 34035 + }, + { + "epoch": 1.59, + "learning_rate": 1.4823569984167542e-05, + "loss": 0.0535, + "step": 34040 + }, + { + "epoch": 1.59, + "learning_rate": 1.4822786199112758e-05, + "loss": 0.0372, + "step": 34045 + }, + { + "epoch": 1.59, + "learning_rate": 1.482200241405797e-05, + "loss": 0.0823, + "step": 34050 + }, + { + "epoch": 1.59, + "learning_rate": 1.4821218629003183e-05, + "loss": 0.0993, + "step": 34055 + }, + { + "epoch": 1.59, + "learning_rate": 1.4820434843948396e-05, + "loss": 0.1109, + "step": 34060 + }, + { + "epoch": 1.59, + "learning_rate": 1.4819651058893609e-05, + "loss": 0.1505, + "step": 34065 + }, + { + "epoch": 1.59, + "learning_rate": 1.4818867273838824e-05, + "loss": 0.2167, + "step": 34070 + }, + { + "epoch": 1.59, + "learning_rate": 1.4818083488784036e-05, + "loss": 0.1975, + "step": 34075 + }, + { + "epoch": 1.59, + "learning_rate": 1.481729970372925e-05, + "loss": 0.3396, + "step": 34080 + }, + { + "epoch": 1.59, + "learning_rate": 1.4816515918674463e-05, + "loss": 0.2306, + "step": 34085 + }, + { + "epoch": 1.59, + "learning_rate": 1.4815732133619678e-05, + "loss": 0.0387, + "step": 34090 + }, + { + "epoch": 1.59, + "learning_rate": 1.481494834856489e-05, + "loss": 0.0714, + "step": 34095 + }, + { + "epoch": 1.59, + "learning_rate": 1.4814164563510104e-05, + "loss": 0.1295, + "step": 34100 + }, + { + "epoch": 1.59, + "learning_rate": 1.4813380778455318e-05, + "loss": 0.1515, + "step": 34105 + }, + { + "epoch": 1.59, + "learning_rate": 1.4812596993400532e-05, + "loss": 0.1432, + "step": 34110 + }, + { + "epoch": 1.59, + "learning_rate": 1.4811813208345744e-05, + "loss": 0.1063, + "step": 34115 + }, + { + "epoch": 1.59, + "learning_rate": 1.4811029423290957e-05, + "loss": 0.175, + "step": 34120 + }, + { + "epoch": 1.59, + "learning_rate": 1.4810245638236172e-05, + "loss": 0.2832, + "step": 34125 + }, + { + "epoch": 1.59, + "learning_rate": 1.4809461853181384e-05, + "loss": 0.4135, + "step": 34130 + }, + { + "epoch": 1.59, + "learning_rate": 1.4808678068126598e-05, + "loss": 0.2442, + "step": 34135 + }, + { + "epoch": 1.59, + "learning_rate": 1.480789428307181e-05, + "loss": 0.0886, + "step": 34140 + }, + { + "epoch": 1.59, + "learning_rate": 1.4807110498017026e-05, + "loss": 0.0458, + "step": 34145 + }, + { + "epoch": 1.59, + "learning_rate": 1.4806326712962238e-05, + "loss": 0.1056, + "step": 34150 + }, + { + "epoch": 1.59, + "learning_rate": 1.4805542927907452e-05, + "loss": 0.0775, + "step": 34155 + }, + { + "epoch": 1.59, + "learning_rate": 1.4804759142852664e-05, + "loss": 0.1509, + "step": 34160 + }, + { + "epoch": 1.59, + "learning_rate": 1.480397535779788e-05, + "loss": 0.1604, + "step": 34165 + }, + { + "epoch": 1.59, + "learning_rate": 1.4803191572743092e-05, + "loss": 0.1765, + "step": 34170 + }, + { + "epoch": 1.59, + "learning_rate": 1.4802407787688306e-05, + "loss": 0.1314, + "step": 34175 + }, + { + "epoch": 1.59, + "learning_rate": 1.4801624002633518e-05, + "loss": 0.428, + "step": 34180 + }, + { + "epoch": 1.6, + "learning_rate": 1.480084021757873e-05, + "loss": 0.2183, + "step": 34185 + }, + { + "epoch": 1.6, + "learning_rate": 1.4800056432523946e-05, + "loss": 0.0748, + "step": 34190 + }, + { + "epoch": 1.6, + "learning_rate": 1.4799272647469158e-05, + "loss": 0.0489, + "step": 34195 + }, + { + "epoch": 1.6, + "learning_rate": 1.4798488862414372e-05, + "loss": 0.0639, + "step": 34200 + }, + { + "epoch": 1.6, + "learning_rate": 1.4797705077359586e-05, + "loss": 0.0929, + "step": 34205 + }, + { + "epoch": 1.6, + "learning_rate": 1.47969212923048e-05, + "loss": 0.187, + "step": 34210 + }, + { + "epoch": 1.6, + "learning_rate": 1.4796137507250012e-05, + "loss": 0.1703, + "step": 34215 + }, + { + "epoch": 1.6, + "learning_rate": 1.4795353722195228e-05, + "loss": 0.1433, + "step": 34220 + }, + { + "epoch": 1.6, + "learning_rate": 1.479456993714044e-05, + "loss": 0.2344, + "step": 34225 + }, + { + "epoch": 1.6, + "learning_rate": 1.4793786152085654e-05, + "loss": 0.3602, + "step": 34230 + }, + { + "epoch": 1.6, + "learning_rate": 1.4793002367030866e-05, + "loss": 0.3009, + "step": 34235 + }, + { + "epoch": 1.6, + "learning_rate": 1.4792218581976082e-05, + "loss": 0.107, + "step": 34240 + }, + { + "epoch": 1.6, + "learning_rate": 1.4791434796921294e-05, + "loss": 0.0895, + "step": 34245 + }, + { + "epoch": 1.6, + "learning_rate": 1.4790651011866506e-05, + "loss": 0.0729, + "step": 34250 + }, + { + "epoch": 1.6, + "learning_rate": 1.478986722681172e-05, + "loss": 0.0705, + "step": 34255 + }, + { + "epoch": 1.6, + "learning_rate": 1.4789083441756932e-05, + "loss": 0.1037, + "step": 34260 + }, + { + "epoch": 1.6, + "learning_rate": 1.4788299656702148e-05, + "loss": 0.1372, + "step": 34265 + }, + { + "epoch": 1.6, + "learning_rate": 1.478751587164736e-05, + "loss": 0.2172, + "step": 34270 + }, + { + "epoch": 1.6, + "learning_rate": 1.4786732086592574e-05, + "loss": 0.2641, + "step": 34275 + }, + { + "epoch": 1.6, + "learning_rate": 1.4785948301537786e-05, + "loss": 0.3562, + "step": 34280 + }, + { + "epoch": 1.6, + "learning_rate": 1.4785164516483002e-05, + "loss": 0.2552, + "step": 34285 + }, + { + "epoch": 1.6, + "learning_rate": 1.4784380731428214e-05, + "loss": 0.0604, + "step": 34290 + }, + { + "epoch": 1.6, + "learning_rate": 1.4783596946373428e-05, + "loss": 0.0537, + "step": 34295 + }, + { + "epoch": 1.6, + "learning_rate": 1.478281316131864e-05, + "loss": 0.1428, + "step": 34300 + }, + { + "epoch": 1.6, + "learning_rate": 1.4782029376263856e-05, + "loss": 0.1001, + "step": 34305 + }, + { + "epoch": 1.6, + "learning_rate": 1.4781245591209068e-05, + "loss": 0.0837, + "step": 34310 + }, + { + "epoch": 1.6, + "learning_rate": 1.478046180615428e-05, + "loss": 0.125, + "step": 34315 + }, + { + "epoch": 1.6, + "learning_rate": 1.4779678021099496e-05, + "loss": 0.1296, + "step": 34320 + }, + { + "epoch": 1.6, + "learning_rate": 1.4778894236044708e-05, + "loss": 0.1319, + "step": 34325 + }, + { + "epoch": 1.6, + "learning_rate": 1.4778110450989922e-05, + "loss": 0.2868, + "step": 34330 + }, + { + "epoch": 1.6, + "learning_rate": 1.4777326665935134e-05, + "loss": 0.3055, + "step": 34335 + }, + { + "epoch": 1.6, + "learning_rate": 1.477654288088035e-05, + "loss": 0.0613, + "step": 34340 + }, + { + "epoch": 1.6, + "learning_rate": 1.4775759095825562e-05, + "loss": 0.1232, + "step": 34345 + }, + { + "epoch": 1.6, + "learning_rate": 1.4774975310770776e-05, + "loss": 0.0745, + "step": 34350 + }, + { + "epoch": 1.6, + "learning_rate": 1.4774191525715988e-05, + "loss": 0.128, + "step": 34355 + }, + { + "epoch": 1.6, + "learning_rate": 1.4773407740661204e-05, + "loss": 0.0547, + "step": 34360 + }, + { + "epoch": 1.6, + "learning_rate": 1.4772623955606416e-05, + "loss": 0.1655, + "step": 34365 + }, + { + "epoch": 1.6, + "learning_rate": 1.477184017055163e-05, + "loss": 0.47, + "step": 34370 + }, + { + "epoch": 1.6, + "learning_rate": 1.4771056385496842e-05, + "loss": 0.2424, + "step": 34375 + }, + { + "epoch": 1.6, + "learning_rate": 1.4770272600442054e-05, + "loss": 0.3795, + "step": 34380 + }, + { + "epoch": 1.6, + "learning_rate": 1.476948881538727e-05, + "loss": 0.292, + "step": 34385 + }, + { + "epoch": 1.6, + "learning_rate": 1.4768705030332482e-05, + "loss": 0.0394, + "step": 34390 + }, + { + "epoch": 1.6, + "learning_rate": 1.4767921245277696e-05, + "loss": 0.0294, + "step": 34395 + }, + { + "epoch": 1.61, + "learning_rate": 1.4767137460222908e-05, + "loss": 0.0748, + "step": 34400 + }, + { + "epoch": 1.61, + "learning_rate": 1.4766353675168124e-05, + "loss": 0.1135, + "step": 34405 + }, + { + "epoch": 1.61, + "learning_rate": 1.4765569890113336e-05, + "loss": 0.1248, + "step": 34410 + }, + { + "epoch": 1.61, + "learning_rate": 1.476478610505855e-05, + "loss": 0.1699, + "step": 34415 + }, + { + "epoch": 1.61, + "learning_rate": 1.4764002320003764e-05, + "loss": 0.1724, + "step": 34420 + }, + { + "epoch": 1.61, + "learning_rate": 1.4763218534948978e-05, + "loss": 0.2934, + "step": 34425 + }, + { + "epoch": 1.61, + "learning_rate": 1.476243474989419e-05, + "loss": 0.3888, + "step": 34430 + }, + { + "epoch": 1.61, + "learning_rate": 1.4761650964839405e-05, + "loss": 0.3509, + "step": 34435 + }, + { + "epoch": 1.61, + "learning_rate": 1.4760867179784618e-05, + "loss": 0.0337, + "step": 34440 + }, + { + "epoch": 1.61, + "learning_rate": 1.476008339472983e-05, + "loss": 0.0399, + "step": 34445 + }, + { + "epoch": 1.61, + "learning_rate": 1.4759299609675044e-05, + "loss": 0.071, + "step": 34450 + }, + { + "epoch": 1.61, + "learning_rate": 1.4758515824620256e-05, + "loss": 0.1536, + "step": 34455 + }, + { + "epoch": 1.61, + "learning_rate": 1.4757732039565472e-05, + "loss": 0.0879, + "step": 34460 + }, + { + "epoch": 1.61, + "learning_rate": 1.4756948254510684e-05, + "loss": 0.256, + "step": 34465 + }, + { + "epoch": 1.61, + "learning_rate": 1.4756164469455898e-05, + "loss": 0.1642, + "step": 34470 + }, + { + "epoch": 1.61, + "learning_rate": 1.475538068440111e-05, + "loss": 0.3665, + "step": 34475 + }, + { + "epoch": 1.61, + "learning_rate": 1.4754596899346326e-05, + "loss": 0.5219, + "step": 34480 + }, + { + "epoch": 1.61, + "learning_rate": 1.4753813114291538e-05, + "loss": 0.24, + "step": 34485 + }, + { + "epoch": 1.61, + "learning_rate": 1.4753029329236752e-05, + "loss": 0.0458, + "step": 34490 + }, + { + "epoch": 1.61, + "learning_rate": 1.4752245544181964e-05, + "loss": 0.0644, + "step": 34495 + }, + { + "epoch": 1.61, + "learning_rate": 1.475146175912718e-05, + "loss": 0.0412, + "step": 34500 + }, + { + "epoch": 1.61, + "learning_rate": 1.4750677974072392e-05, + "loss": 0.0782, + "step": 34505 + }, + { + "epoch": 1.61, + "learning_rate": 1.4749894189017604e-05, + "loss": 0.102, + "step": 34510 + }, + { + "epoch": 1.61, + "learning_rate": 1.4749110403962818e-05, + "loss": 0.1028, + "step": 34515 + }, + { + "epoch": 1.61, + "learning_rate": 1.4748326618908032e-05, + "loss": 0.1788, + "step": 34520 + }, + { + "epoch": 1.61, + "learning_rate": 1.4747542833853246e-05, + "loss": 0.1417, + "step": 34525 + }, + { + "epoch": 1.61, + "learning_rate": 1.4746759048798458e-05, + "loss": 0.2783, + "step": 34530 + }, + { + "epoch": 1.61, + "learning_rate": 1.4745975263743673e-05, + "loss": 0.2968, + "step": 34535 + }, + { + "epoch": 1.61, + "learning_rate": 1.4745191478688886e-05, + "loss": 0.0946, + "step": 34540 + }, + { + "epoch": 1.61, + "learning_rate": 1.47444076936341e-05, + "loss": 0.0437, + "step": 34545 + }, + { + "epoch": 1.61, + "learning_rate": 1.4743623908579312e-05, + "loss": 0.0868, + "step": 34550 + }, + { + "epoch": 1.61, + "learning_rate": 1.4742840123524527e-05, + "loss": 0.105, + "step": 34555 + }, + { + "epoch": 1.61, + "learning_rate": 1.474205633846974e-05, + "loss": 0.1074, + "step": 34560 + }, + { + "epoch": 1.61, + "learning_rate": 1.4741272553414953e-05, + "loss": 0.1464, + "step": 34565 + }, + { + "epoch": 1.61, + "learning_rate": 1.4740488768360166e-05, + "loss": 0.1298, + "step": 34570 + }, + { + "epoch": 1.61, + "learning_rate": 1.4739704983305378e-05, + "loss": 0.2425, + "step": 34575 + }, + { + "epoch": 1.61, + "learning_rate": 1.4738921198250593e-05, + "loss": 0.3058, + "step": 34580 + }, + { + "epoch": 1.61, + "learning_rate": 1.4738137413195806e-05, + "loss": 0.2462, + "step": 34585 + }, + { + "epoch": 1.61, + "learning_rate": 1.473735362814102e-05, + "loss": 0.0695, + "step": 34590 + }, + { + "epoch": 1.61, + "learning_rate": 1.4736569843086232e-05, + "loss": 0.0971, + "step": 34595 + }, + { + "epoch": 1.61, + "learning_rate": 1.4735786058031447e-05, + "loss": 0.1426, + "step": 34600 + }, + { + "epoch": 1.61, + "learning_rate": 1.473500227297666e-05, + "loss": 0.0843, + "step": 34605 + }, + { + "epoch": 1.61, + "learning_rate": 1.4734218487921874e-05, + "loss": 0.1106, + "step": 34610 + }, + { + "epoch": 1.62, + "learning_rate": 1.4733434702867086e-05, + "loss": 0.0959, + "step": 34615 + }, + { + "epoch": 1.62, + "learning_rate": 1.4732650917812301e-05, + "loss": 0.1614, + "step": 34620 + }, + { + "epoch": 1.62, + "learning_rate": 1.4731867132757514e-05, + "loss": 0.2626, + "step": 34625 + }, + { + "epoch": 1.62, + "learning_rate": 1.4731083347702727e-05, + "loss": 0.2758, + "step": 34630 + }, + { + "epoch": 1.62, + "learning_rate": 1.4730299562647941e-05, + "loss": 0.3072, + "step": 34635 + }, + { + "epoch": 1.62, + "learning_rate": 1.4729515777593154e-05, + "loss": 0.0358, + "step": 34640 + }, + { + "epoch": 1.62, + "learning_rate": 1.4728731992538367e-05, + "loss": 0.0405, + "step": 34645 + }, + { + "epoch": 1.62, + "learning_rate": 1.472794820748358e-05, + "loss": 0.0772, + "step": 34650 + }, + { + "epoch": 1.62, + "learning_rate": 1.4727164422428795e-05, + "loss": 0.1179, + "step": 34655 + }, + { + "epoch": 1.62, + "learning_rate": 1.4726380637374008e-05, + "loss": 0.0595, + "step": 34660 + }, + { + "epoch": 1.62, + "learning_rate": 1.4725596852319221e-05, + "loss": 0.2746, + "step": 34665 + }, + { + "epoch": 1.62, + "learning_rate": 1.4724813067264434e-05, + "loss": 0.1401, + "step": 34670 + }, + { + "epoch": 1.62, + "learning_rate": 1.472402928220965e-05, + "loss": 0.1435, + "step": 34675 + }, + { + "epoch": 1.62, + "learning_rate": 1.4723245497154861e-05, + "loss": 0.2242, + "step": 34680 + }, + { + "epoch": 1.62, + "learning_rate": 1.4722461712100075e-05, + "loss": 0.3268, + "step": 34685 + }, + { + "epoch": 1.62, + "learning_rate": 1.4721677927045288e-05, + "loss": 0.034, + "step": 34690 + }, + { + "epoch": 1.62, + "learning_rate": 1.4720894141990503e-05, + "loss": 0.1351, + "step": 34695 + }, + { + "epoch": 1.62, + "learning_rate": 1.4720110356935715e-05, + "loss": 0.0795, + "step": 34700 + }, + { + "epoch": 1.62, + "learning_rate": 1.4719326571880928e-05, + "loss": 0.0898, + "step": 34705 + }, + { + "epoch": 1.62, + "learning_rate": 1.4718542786826141e-05, + "loss": 0.096, + "step": 34710 + }, + { + "epoch": 1.62, + "learning_rate": 1.4717759001771354e-05, + "loss": 0.1264, + "step": 34715 + }, + { + "epoch": 1.62, + "learning_rate": 1.471697521671657e-05, + "loss": 0.1767, + "step": 34720 + }, + { + "epoch": 1.62, + "learning_rate": 1.4716191431661782e-05, + "loss": 0.1141, + "step": 34725 + }, + { + "epoch": 1.62, + "learning_rate": 1.4715407646606995e-05, + "loss": 0.3326, + "step": 34730 + }, + { + "epoch": 1.62, + "learning_rate": 1.471462386155221e-05, + "loss": 0.3716, + "step": 34735 + }, + { + "epoch": 1.62, + "learning_rate": 1.4713840076497423e-05, + "loss": 0.0574, + "step": 34740 + }, + { + "epoch": 1.62, + "learning_rate": 1.4713056291442635e-05, + "loss": 0.0426, + "step": 34745 + }, + { + "epoch": 1.62, + "learning_rate": 1.4712272506387851e-05, + "loss": 0.0909, + "step": 34750 + }, + { + "epoch": 1.62, + "learning_rate": 1.4711488721333063e-05, + "loss": 0.1078, + "step": 34755 + }, + { + "epoch": 1.62, + "learning_rate": 1.4710704936278277e-05, + "loss": 0.1061, + "step": 34760 + }, + { + "epoch": 1.62, + "learning_rate": 1.470992115122349e-05, + "loss": 0.1885, + "step": 34765 + }, + { + "epoch": 1.62, + "learning_rate": 1.4709137366168702e-05, + "loss": 0.176, + "step": 34770 + }, + { + "epoch": 1.62, + "learning_rate": 1.4708353581113917e-05, + "loss": 0.1519, + "step": 34775 + }, + { + "epoch": 1.62, + "learning_rate": 1.470756979605913e-05, + "loss": 0.2766, + "step": 34780 + }, + { + "epoch": 1.62, + "learning_rate": 1.4706786011004343e-05, + "loss": 0.2345, + "step": 34785 + }, + { + "epoch": 1.62, + "learning_rate": 1.4706002225949556e-05, + "loss": 0.0734, + "step": 34790 + }, + { + "epoch": 1.62, + "learning_rate": 1.4705218440894771e-05, + "loss": 0.0545, + "step": 34795 + }, + { + "epoch": 1.62, + "learning_rate": 1.4704434655839983e-05, + "loss": 0.0359, + "step": 34800 + }, + { + "epoch": 1.62, + "learning_rate": 1.4703650870785197e-05, + "loss": 0.08, + "step": 34805 + }, + { + "epoch": 1.62, + "learning_rate": 1.470286708573041e-05, + "loss": 0.1442, + "step": 34810 + }, + { + "epoch": 1.62, + "learning_rate": 1.4702083300675625e-05, + "loss": 0.1107, + "step": 34815 + }, + { + "epoch": 1.62, + "learning_rate": 1.4701299515620837e-05, + "loss": 0.1809, + "step": 34820 + }, + { + "epoch": 1.62, + "learning_rate": 1.4700515730566051e-05, + "loss": 0.1639, + "step": 34825 + }, + { + "epoch": 1.63, + "learning_rate": 1.4699731945511263e-05, + "loss": 0.2368, + "step": 34830 + }, + { + "epoch": 1.63, + "learning_rate": 1.4698948160456477e-05, + "loss": 0.2754, + "step": 34835 + }, + { + "epoch": 1.63, + "learning_rate": 1.4698164375401691e-05, + "loss": 0.1172, + "step": 34840 + }, + { + "epoch": 1.63, + "learning_rate": 1.4697380590346903e-05, + "loss": 0.0575, + "step": 34845 + }, + { + "epoch": 1.63, + "learning_rate": 1.4696596805292119e-05, + "loss": 0.1116, + "step": 34850 + }, + { + "epoch": 1.63, + "learning_rate": 1.4695813020237331e-05, + "loss": 0.1418, + "step": 34855 + }, + { + "epoch": 1.63, + "learning_rate": 1.4695029235182545e-05, + "loss": 0.1506, + "step": 34860 + }, + { + "epoch": 1.63, + "learning_rate": 1.4694245450127757e-05, + "loss": 0.1702, + "step": 34865 + }, + { + "epoch": 1.63, + "learning_rate": 1.4693461665072973e-05, + "loss": 0.2012, + "step": 34870 + }, + { + "epoch": 1.63, + "learning_rate": 1.4692677880018185e-05, + "loss": 0.28, + "step": 34875 + }, + { + "epoch": 1.63, + "learning_rate": 1.4691894094963399e-05, + "loss": 0.2179, + "step": 34880 + }, + { + "epoch": 1.63, + "learning_rate": 1.4691110309908611e-05, + "loss": 0.2453, + "step": 34885 + }, + { + "epoch": 1.63, + "learning_rate": 1.4690326524853827e-05, + "loss": 0.0277, + "step": 34890 + }, + { + "epoch": 1.63, + "learning_rate": 1.4689542739799039e-05, + "loss": 0.1151, + "step": 34895 + }, + { + "epoch": 1.63, + "learning_rate": 1.4688758954744251e-05, + "loss": 0.091, + "step": 34900 + }, + { + "epoch": 1.63, + "learning_rate": 1.4687975169689465e-05, + "loss": 0.1309, + "step": 34905 + }, + { + "epoch": 1.63, + "learning_rate": 1.4687191384634677e-05, + "loss": 0.0805, + "step": 34910 + }, + { + "epoch": 1.63, + "learning_rate": 1.4686407599579893e-05, + "loss": 0.1674, + "step": 34915 + }, + { + "epoch": 1.63, + "learning_rate": 1.4685623814525105e-05, + "loss": 0.1859, + "step": 34920 + }, + { + "epoch": 1.63, + "learning_rate": 1.4684840029470319e-05, + "loss": 0.234, + "step": 34925 + }, + { + "epoch": 1.63, + "learning_rate": 1.4684056244415531e-05, + "loss": 0.3353, + "step": 34930 + }, + { + "epoch": 1.63, + "learning_rate": 1.4683272459360747e-05, + "loss": 0.3759, + "step": 34935 + }, + { + "epoch": 1.63, + "learning_rate": 1.4682488674305959e-05, + "loss": 0.0299, + "step": 34940 + }, + { + "epoch": 1.63, + "learning_rate": 1.4681704889251173e-05, + "loss": 0.0724, + "step": 34945 + }, + { + "epoch": 1.63, + "learning_rate": 1.4680921104196387e-05, + "loss": 0.1143, + "step": 34950 + }, + { + "epoch": 1.63, + "learning_rate": 1.46801373191416e-05, + "loss": 0.1031, + "step": 34955 + }, + { + "epoch": 1.63, + "learning_rate": 1.4679353534086813e-05, + "loss": 0.1564, + "step": 34960 + }, + { + "epoch": 1.63, + "learning_rate": 1.4678569749032025e-05, + "loss": 0.1843, + "step": 34965 + }, + { + "epoch": 1.63, + "learning_rate": 1.4677785963977241e-05, + "loss": 0.1421, + "step": 34970 + }, + { + "epoch": 1.63, + "learning_rate": 1.4677002178922453e-05, + "loss": 0.1495, + "step": 34975 + }, + { + "epoch": 1.63, + "learning_rate": 1.4676218393867667e-05, + "loss": 0.3336, + "step": 34980 + }, + { + "epoch": 1.63, + "learning_rate": 1.467543460881288e-05, + "loss": 0.3336, + "step": 34985 + }, + { + "epoch": 1.63, + "learning_rate": 1.4674650823758095e-05, + "loss": 0.0764, + "step": 34990 + }, + { + "epoch": 1.63, + "learning_rate": 1.4673867038703307e-05, + "loss": 0.0414, + "step": 34995 + }, + { + "epoch": 1.63, + "learning_rate": 1.4673083253648521e-05, + "loss": 0.0867, + "step": 35000 + }, + { + "epoch": 1.63, + "learning_rate": 1.4672299468593733e-05, + "loss": 0.0791, + "step": 35005 + }, + { + "epoch": 1.63, + "learning_rate": 1.4671515683538949e-05, + "loss": 0.1432, + "step": 35010 + }, + { + "epoch": 1.63, + "learning_rate": 1.4670731898484161e-05, + "loss": 0.1849, + "step": 35015 + }, + { + "epoch": 1.63, + "learning_rate": 1.4669948113429375e-05, + "loss": 0.1386, + "step": 35020 + }, + { + "epoch": 1.63, + "learning_rate": 1.4669164328374587e-05, + "loss": 0.1576, + "step": 35025 + }, + { + "epoch": 1.63, + "learning_rate": 1.46683805433198e-05, + "loss": 0.3647, + "step": 35030 + }, + { + "epoch": 1.63, + "learning_rate": 1.4667596758265015e-05, + "loss": 0.3103, + "step": 35035 + }, + { + "epoch": 1.64, + "learning_rate": 1.4666812973210227e-05, + "loss": 0.0494, + "step": 35040 + }, + { + "epoch": 1.64, + "learning_rate": 1.4666029188155441e-05, + "loss": 0.0672, + "step": 35045 + }, + { + "epoch": 1.64, + "learning_rate": 1.4665245403100655e-05, + "loss": 0.0995, + "step": 35050 + }, + { + "epoch": 1.64, + "learning_rate": 1.4664461618045869e-05, + "loss": 0.0589, + "step": 35055 + }, + { + "epoch": 1.64, + "learning_rate": 1.4663677832991081e-05, + "loss": 0.1083, + "step": 35060 + }, + { + "epoch": 1.64, + "learning_rate": 1.4662894047936297e-05, + "loss": 0.1836, + "step": 35065 + }, + { + "epoch": 1.64, + "learning_rate": 1.4662110262881509e-05, + "loss": 0.216, + "step": 35070 + }, + { + "epoch": 1.64, + "learning_rate": 1.4661326477826723e-05, + "loss": 0.2673, + "step": 35075 + }, + { + "epoch": 1.64, + "learning_rate": 1.4660542692771935e-05, + "loss": 0.3442, + "step": 35080 + }, + { + "epoch": 1.64, + "learning_rate": 1.465975890771715e-05, + "loss": 0.3217, + "step": 35085 + }, + { + "epoch": 1.64, + "learning_rate": 1.4658975122662363e-05, + "loss": 0.0211, + "step": 35090 + }, + { + "epoch": 1.64, + "learning_rate": 1.4658191337607575e-05, + "loss": 0.0388, + "step": 35095 + }, + { + "epoch": 1.64, + "learning_rate": 1.4657407552552789e-05, + "loss": 0.044, + "step": 35100 + }, + { + "epoch": 1.64, + "learning_rate": 1.4656623767498001e-05, + "loss": 0.1086, + "step": 35105 + }, + { + "epoch": 1.64, + "learning_rate": 1.4655839982443217e-05, + "loss": 0.1599, + "step": 35110 + }, + { + "epoch": 1.64, + "learning_rate": 1.4655056197388429e-05, + "loss": 0.129, + "step": 35115 + }, + { + "epoch": 1.64, + "learning_rate": 1.4654272412333643e-05, + "loss": 0.1483, + "step": 35120 + }, + { + "epoch": 1.64, + "learning_rate": 1.4653488627278855e-05, + "loss": 0.2187, + "step": 35125 + }, + { + "epoch": 1.64, + "learning_rate": 1.465270484222407e-05, + "loss": 0.4357, + "step": 35130 + }, + { + "epoch": 1.64, + "learning_rate": 1.4651921057169283e-05, + "loss": 0.2683, + "step": 35135 + }, + { + "epoch": 1.64, + "learning_rate": 1.4651137272114497e-05, + "loss": 0.0366, + "step": 35140 + }, + { + "epoch": 1.64, + "learning_rate": 1.4650353487059709e-05, + "loss": 0.0448, + "step": 35145 + }, + { + "epoch": 1.64, + "learning_rate": 1.4649569702004925e-05, + "loss": 0.0861, + "step": 35150 + }, + { + "epoch": 1.64, + "learning_rate": 1.4648785916950137e-05, + "loss": 0.0554, + "step": 35155 + }, + { + "epoch": 1.64, + "learning_rate": 1.4648002131895349e-05, + "loss": 0.143, + "step": 35160 + }, + { + "epoch": 1.64, + "learning_rate": 1.4647218346840565e-05, + "loss": 0.1561, + "step": 35165 + }, + { + "epoch": 1.64, + "learning_rate": 1.4646434561785777e-05, + "loss": 0.1867, + "step": 35170 + }, + { + "epoch": 1.64, + "learning_rate": 1.464565077673099e-05, + "loss": 0.1855, + "step": 35175 + }, + { + "epoch": 1.64, + "learning_rate": 1.4644866991676203e-05, + "loss": 0.4092, + "step": 35180 + }, + { + "epoch": 1.64, + "learning_rate": 1.4644083206621418e-05, + "loss": 0.3768, + "step": 35185 + }, + { + "epoch": 1.64, + "learning_rate": 1.464329942156663e-05, + "loss": 0.1151, + "step": 35190 + }, + { + "epoch": 1.64, + "learning_rate": 1.4642515636511845e-05, + "loss": 0.0626, + "step": 35195 + }, + { + "epoch": 1.64, + "learning_rate": 1.4641731851457057e-05, + "loss": 0.0333, + "step": 35200 + }, + { + "epoch": 1.64, + "learning_rate": 1.4640948066402272e-05, + "loss": 0.0781, + "step": 35205 + }, + { + "epoch": 1.64, + "learning_rate": 1.4640164281347485e-05, + "loss": 0.1316, + "step": 35210 + }, + { + "epoch": 1.64, + "learning_rate": 1.4639380496292699e-05, + "loss": 0.0988, + "step": 35215 + }, + { + "epoch": 1.64, + "learning_rate": 1.463859671123791e-05, + "loss": 0.1637, + "step": 35220 + }, + { + "epoch": 1.64, + "learning_rate": 1.4637812926183123e-05, + "loss": 0.2726, + "step": 35225 + }, + { + "epoch": 1.64, + "learning_rate": 1.4637029141128339e-05, + "loss": 0.336, + "step": 35230 + }, + { + "epoch": 1.64, + "learning_rate": 1.463624535607355e-05, + "loss": 0.1921, + "step": 35235 + }, + { + "epoch": 1.64, + "learning_rate": 1.4635461571018765e-05, + "loss": 0.026, + "step": 35240 + }, + { + "epoch": 1.64, + "learning_rate": 1.4634677785963977e-05, + "loss": 0.0511, + "step": 35245 + }, + { + "epoch": 1.64, + "learning_rate": 1.4633894000909192e-05, + "loss": 0.0613, + "step": 35250 + }, + { + "epoch": 1.65, + "learning_rate": 1.4633110215854405e-05, + "loss": 0.116, + "step": 35255 + }, + { + "epoch": 1.65, + "learning_rate": 1.4632326430799619e-05, + "loss": 0.1376, + "step": 35260 + }, + { + "epoch": 1.65, + "learning_rate": 1.4631542645744833e-05, + "loss": 0.1423, + "step": 35265 + }, + { + "epoch": 1.65, + "learning_rate": 1.4630758860690046e-05, + "loss": 0.0715, + "step": 35270 + }, + { + "epoch": 1.65, + "learning_rate": 1.4629975075635259e-05, + "loss": 0.2406, + "step": 35275 + }, + { + "epoch": 1.65, + "learning_rate": 1.4629191290580474e-05, + "loss": 0.2576, + "step": 35280 + }, + { + "epoch": 1.65, + "learning_rate": 1.4628407505525686e-05, + "loss": 0.1546, + "step": 35285 + }, + { + "epoch": 1.65, + "learning_rate": 1.4627623720470899e-05, + "loss": 0.0603, + "step": 35290 + }, + { + "epoch": 1.65, + "learning_rate": 1.4626839935416113e-05, + "loss": 0.0637, + "step": 35295 + }, + { + "epoch": 1.65, + "learning_rate": 1.4626056150361325e-05, + "loss": 0.0723, + "step": 35300 + }, + { + "epoch": 1.65, + "learning_rate": 1.462527236530654e-05, + "loss": 0.0931, + "step": 35305 + }, + { + "epoch": 1.65, + "learning_rate": 1.4624488580251753e-05, + "loss": 0.163, + "step": 35310 + }, + { + "epoch": 1.65, + "learning_rate": 1.4623704795196966e-05, + "loss": 0.2184, + "step": 35315 + }, + { + "epoch": 1.65, + "learning_rate": 1.4622921010142179e-05, + "loss": 0.0844, + "step": 35320 + }, + { + "epoch": 1.65, + "learning_rate": 1.4622137225087394e-05, + "loss": 0.2475, + "step": 35325 + }, + { + "epoch": 1.65, + "learning_rate": 1.4621353440032607e-05, + "loss": 0.3015, + "step": 35330 + }, + { + "epoch": 1.65, + "learning_rate": 1.462056965497782e-05, + "loss": 0.26, + "step": 35335 + }, + { + "epoch": 1.65, + "learning_rate": 1.4619785869923033e-05, + "loss": 0.0676, + "step": 35340 + }, + { + "epoch": 1.65, + "learning_rate": 1.4619002084868248e-05, + "loss": 0.0765, + "step": 35345 + }, + { + "epoch": 1.65, + "learning_rate": 1.461821829981346e-05, + "loss": 0.0856, + "step": 35350 + }, + { + "epoch": 1.65, + "learning_rate": 1.4617434514758673e-05, + "loss": 0.0801, + "step": 35355 + }, + { + "epoch": 1.65, + "learning_rate": 1.4616650729703887e-05, + "loss": 0.1053, + "step": 35360 + }, + { + "epoch": 1.65, + "learning_rate": 1.46158669446491e-05, + "loss": 0.1531, + "step": 35365 + }, + { + "epoch": 1.65, + "learning_rate": 1.4615083159594314e-05, + "loss": 0.2514, + "step": 35370 + }, + { + "epoch": 1.65, + "learning_rate": 1.4614299374539527e-05, + "loss": 0.1658, + "step": 35375 + }, + { + "epoch": 1.65, + "learning_rate": 1.4613515589484742e-05, + "loss": 0.3044, + "step": 35380 + }, + { + "epoch": 1.65, + "learning_rate": 1.4612731804429954e-05, + "loss": 0.2306, + "step": 35385 + }, + { + "epoch": 1.65, + "learning_rate": 1.4611948019375168e-05, + "loss": 0.0903, + "step": 35390 + }, + { + "epoch": 1.65, + "learning_rate": 1.461116423432038e-05, + "loss": 0.0519, + "step": 35395 + }, + { + "epoch": 1.65, + "learning_rate": 1.4610380449265596e-05, + "loss": 0.0563, + "step": 35400 + }, + { + "epoch": 1.65, + "learning_rate": 1.4609596664210808e-05, + "loss": 0.0959, + "step": 35405 + }, + { + "epoch": 1.65, + "learning_rate": 1.4608812879156022e-05, + "loss": 0.0844, + "step": 35410 + }, + { + "epoch": 1.65, + "learning_rate": 1.4608029094101234e-05, + "loss": 0.1355, + "step": 35415 + }, + { + "epoch": 1.65, + "learning_rate": 1.4607245309046447e-05, + "loss": 0.1931, + "step": 35420 + }, + { + "epoch": 1.65, + "learning_rate": 1.4606461523991662e-05, + "loss": 0.257, + "step": 35425 + }, + { + "epoch": 1.65, + "learning_rate": 1.4605677738936874e-05, + "loss": 0.5352, + "step": 35430 + }, + { + "epoch": 1.65, + "learning_rate": 1.4604893953882088e-05, + "loss": 0.3642, + "step": 35435 + }, + { + "epoch": 1.65, + "learning_rate": 1.46041101688273e-05, + "loss": 0.0393, + "step": 35440 + }, + { + "epoch": 1.65, + "learning_rate": 1.4603326383772516e-05, + "loss": 0.106, + "step": 35445 + }, + { + "epoch": 1.65, + "learning_rate": 1.4602542598717728e-05, + "loss": 0.1245, + "step": 35450 + }, + { + "epoch": 1.65, + "learning_rate": 1.4601758813662942e-05, + "loss": 0.1576, + "step": 35455 + }, + { + "epoch": 1.65, + "learning_rate": 1.4600975028608155e-05, + "loss": 0.1734, + "step": 35460 + }, + { + "epoch": 1.65, + "learning_rate": 1.460019124355337e-05, + "loss": 0.0837, + "step": 35465 + }, + { + "epoch": 1.66, + "learning_rate": 1.4599407458498582e-05, + "loss": 0.1933, + "step": 35470 + }, + { + "epoch": 1.66, + "learning_rate": 1.4598623673443796e-05, + "loss": 0.1611, + "step": 35475 + }, + { + "epoch": 1.66, + "learning_rate": 1.459783988838901e-05, + "loss": 0.2965, + "step": 35480 + }, + { + "epoch": 1.66, + "learning_rate": 1.4597056103334222e-05, + "loss": 0.314, + "step": 35485 + }, + { + "epoch": 1.66, + "learning_rate": 1.4596272318279436e-05, + "loss": 0.0355, + "step": 35490 + }, + { + "epoch": 1.66, + "learning_rate": 1.4595488533224648e-05, + "loss": 0.0476, + "step": 35495 + }, + { + "epoch": 1.66, + "learning_rate": 1.4594704748169864e-05, + "loss": 0.0686, + "step": 35500 + }, + { + "epoch": 1.66, + "learning_rate": 1.4593920963115076e-05, + "loss": 0.1746, + "step": 35505 + }, + { + "epoch": 1.66, + "learning_rate": 1.459313717806029e-05, + "loss": 0.1403, + "step": 35510 + }, + { + "epoch": 1.66, + "learning_rate": 1.4592353393005502e-05, + "loss": 0.1285, + "step": 35515 + }, + { + "epoch": 1.66, + "learning_rate": 1.4591569607950718e-05, + "loss": 0.252, + "step": 35520 + }, + { + "epoch": 1.66, + "learning_rate": 1.459078582289593e-05, + "loss": 0.2358, + "step": 35525 + }, + { + "epoch": 1.66, + "learning_rate": 1.4590002037841144e-05, + "loss": 0.2364, + "step": 35530 + }, + { + "epoch": 1.66, + "learning_rate": 1.4589218252786356e-05, + "loss": 0.2942, + "step": 35535 + }, + { + "epoch": 1.66, + "learning_rate": 1.4588434467731572e-05, + "loss": 0.0387, + "step": 35540 + }, + { + "epoch": 1.66, + "learning_rate": 1.4587650682676784e-05, + "loss": 0.1152, + "step": 35545 + }, + { + "epoch": 1.66, + "learning_rate": 1.4586866897621996e-05, + "loss": 0.0824, + "step": 35550 + }, + { + "epoch": 1.66, + "learning_rate": 1.458608311256721e-05, + "loss": 0.1106, + "step": 35555 + }, + { + "epoch": 1.66, + "learning_rate": 1.4585299327512422e-05, + "loss": 0.1017, + "step": 35560 + }, + { + "epoch": 1.66, + "learning_rate": 1.4584515542457638e-05, + "loss": 0.2116, + "step": 35565 + }, + { + "epoch": 1.66, + "learning_rate": 1.458373175740285e-05, + "loss": 0.2479, + "step": 35570 + }, + { + "epoch": 1.66, + "learning_rate": 1.4582947972348064e-05, + "loss": 0.2595, + "step": 35575 + }, + { + "epoch": 1.66, + "learning_rate": 1.4582164187293278e-05, + "loss": 0.3131, + "step": 35580 + }, + { + "epoch": 1.66, + "learning_rate": 1.4581380402238492e-05, + "loss": 0.2581, + "step": 35585 + }, + { + "epoch": 1.66, + "learning_rate": 1.4580596617183704e-05, + "loss": 0.0496, + "step": 35590 + }, + { + "epoch": 1.66, + "learning_rate": 1.457981283212892e-05, + "loss": 0.0473, + "step": 35595 + }, + { + "epoch": 1.66, + "learning_rate": 1.4579029047074132e-05, + "loss": 0.1001, + "step": 35600 + }, + { + "epoch": 1.66, + "learning_rate": 1.4578245262019346e-05, + "loss": 0.053, + "step": 35605 + }, + { + "epoch": 1.66, + "learning_rate": 1.4577461476964558e-05, + "loss": 0.1636, + "step": 35610 + }, + { + "epoch": 1.66, + "learning_rate": 1.457667769190977e-05, + "loss": 0.1338, + "step": 35615 + }, + { + "epoch": 1.66, + "learning_rate": 1.4575893906854986e-05, + "loss": 0.2236, + "step": 35620 + }, + { + "epoch": 1.66, + "learning_rate": 1.4575110121800198e-05, + "loss": 0.17, + "step": 35625 + }, + { + "epoch": 1.66, + "learning_rate": 1.4574326336745412e-05, + "loss": 0.3704, + "step": 35630 + }, + { + "epoch": 1.66, + "learning_rate": 1.4573542551690624e-05, + "loss": 0.1904, + "step": 35635 + }, + { + "epoch": 1.66, + "learning_rate": 1.457275876663584e-05, + "loss": 0.0506, + "step": 35640 + }, + { + "epoch": 1.66, + "learning_rate": 1.4571974981581052e-05, + "loss": 0.0974, + "step": 35645 + }, + { + "epoch": 1.66, + "learning_rate": 1.4571191196526266e-05, + "loss": 0.036, + "step": 35650 + }, + { + "epoch": 1.66, + "learning_rate": 1.4570407411471478e-05, + "loss": 0.1061, + "step": 35655 + }, + { + "epoch": 1.66, + "learning_rate": 1.4569623626416694e-05, + "loss": 0.1198, + "step": 35660 + }, + { + "epoch": 1.66, + "learning_rate": 1.4568839841361906e-05, + "loss": 0.244, + "step": 35665 + }, + { + "epoch": 1.66, + "learning_rate": 1.456805605630712e-05, + "loss": 0.1738, + "step": 35670 + }, + { + "epoch": 1.66, + "learning_rate": 1.4567272271252332e-05, + "loss": 0.1297, + "step": 35675 + }, + { + "epoch": 1.66, + "learning_rate": 1.4566488486197546e-05, + "loss": 0.3234, + "step": 35680 + }, + { + "epoch": 1.67, + "learning_rate": 1.456570470114276e-05, + "loss": 0.3038, + "step": 35685 + }, + { + "epoch": 1.67, + "learning_rate": 1.4564920916087972e-05, + "loss": 0.0216, + "step": 35690 + }, + { + "epoch": 1.67, + "learning_rate": 1.4564137131033188e-05, + "loss": 0.0839, + "step": 35695 + }, + { + "epoch": 1.67, + "learning_rate": 1.45633533459784e-05, + "loss": 0.0911, + "step": 35700 + }, + { + "epoch": 1.67, + "learning_rate": 1.4562569560923614e-05, + "loss": 0.1222, + "step": 35705 + }, + { + "epoch": 1.67, + "learning_rate": 1.4561785775868826e-05, + "loss": 0.0914, + "step": 35710 + }, + { + "epoch": 1.67, + "learning_rate": 1.4561001990814042e-05, + "loss": 0.1321, + "step": 35715 + }, + { + "epoch": 1.67, + "learning_rate": 1.4560218205759254e-05, + "loss": 0.1351, + "step": 35720 + }, + { + "epoch": 1.67, + "learning_rate": 1.4559434420704468e-05, + "loss": 0.2113, + "step": 35725 + }, + { + "epoch": 1.67, + "learning_rate": 1.455865063564968e-05, + "loss": 0.3217, + "step": 35730 + }, + { + "epoch": 1.67, + "learning_rate": 1.4557866850594896e-05, + "loss": 0.3432, + "step": 35735 + }, + { + "epoch": 1.67, + "learning_rate": 1.4557083065540108e-05, + "loss": 0.0595, + "step": 35740 + }, + { + "epoch": 1.67, + "learning_rate": 1.455629928048532e-05, + "loss": 0.0583, + "step": 35745 + }, + { + "epoch": 1.67, + "learning_rate": 1.4555515495430534e-05, + "loss": 0.0955, + "step": 35750 + }, + { + "epoch": 1.67, + "learning_rate": 1.4554731710375746e-05, + "loss": 0.0763, + "step": 35755 + }, + { + "epoch": 1.67, + "learning_rate": 1.4553947925320962e-05, + "loss": 0.159, + "step": 35760 + }, + { + "epoch": 1.67, + "learning_rate": 1.4553164140266174e-05, + "loss": 0.1699, + "step": 35765 + }, + { + "epoch": 1.67, + "learning_rate": 1.4552380355211388e-05, + "loss": 0.1226, + "step": 35770 + }, + { + "epoch": 1.67, + "learning_rate": 1.45515965701566e-05, + "loss": 0.233, + "step": 35775 + }, + { + "epoch": 1.67, + "learning_rate": 1.4550812785101816e-05, + "loss": 0.3687, + "step": 35780 + }, + { + "epoch": 1.67, + "learning_rate": 1.4550029000047028e-05, + "loss": 0.2873, + "step": 35785 + }, + { + "epoch": 1.67, + "learning_rate": 1.4549245214992242e-05, + "loss": 0.0251, + "step": 35790 + }, + { + "epoch": 1.67, + "learning_rate": 1.4548461429937456e-05, + "loss": 0.0768, + "step": 35795 + }, + { + "epoch": 1.67, + "learning_rate": 1.454767764488267e-05, + "loss": 0.1486, + "step": 35800 + }, + { + "epoch": 1.67, + "learning_rate": 1.4546893859827882e-05, + "loss": 0.1071, + "step": 35805 + }, + { + "epoch": 1.67, + "learning_rate": 1.4546110074773094e-05, + "loss": 0.115, + "step": 35810 + }, + { + "epoch": 1.67, + "learning_rate": 1.454532628971831e-05, + "loss": 0.1783, + "step": 35815 + }, + { + "epoch": 1.67, + "learning_rate": 1.4544542504663522e-05, + "loss": 0.1416, + "step": 35820 + }, + { + "epoch": 1.67, + "learning_rate": 1.4543758719608736e-05, + "loss": 0.2712, + "step": 35825 + }, + { + "epoch": 1.67, + "learning_rate": 1.4542974934553948e-05, + "loss": 0.3227, + "step": 35830 + }, + { + "epoch": 1.67, + "learning_rate": 1.4542191149499164e-05, + "loss": 0.3159, + "step": 35835 + }, + { + "epoch": 1.67, + "learning_rate": 1.4541407364444376e-05, + "loss": 0.0305, + "step": 35840 + }, + { + "epoch": 1.67, + "learning_rate": 1.454062357938959e-05, + "loss": 0.0921, + "step": 35845 + }, + { + "epoch": 1.67, + "learning_rate": 1.4539839794334802e-05, + "loss": 0.0471, + "step": 35850 + }, + { + "epoch": 1.67, + "learning_rate": 1.4539056009280017e-05, + "loss": 0.1261, + "step": 35855 + }, + { + "epoch": 1.67, + "learning_rate": 1.453827222422523e-05, + "loss": 0.1288, + "step": 35860 + }, + { + "epoch": 1.67, + "learning_rate": 1.4537488439170444e-05, + "loss": 0.1459, + "step": 35865 + }, + { + "epoch": 1.67, + "learning_rate": 1.4536704654115656e-05, + "loss": 0.141, + "step": 35870 + }, + { + "epoch": 1.67, + "learning_rate": 1.4535920869060868e-05, + "loss": 0.2848, + "step": 35875 + }, + { + "epoch": 1.67, + "learning_rate": 1.4535137084006084e-05, + "loss": 0.3458, + "step": 35880 + }, + { + "epoch": 1.67, + "learning_rate": 1.4534353298951296e-05, + "loss": 0.4117, + "step": 35885 + }, + { + "epoch": 1.67, + "learning_rate": 1.453356951389651e-05, + "loss": 0.0384, + "step": 35890 + }, + { + "epoch": 1.67, + "learning_rate": 1.4532785728841724e-05, + "loss": 0.1041, + "step": 35895 + }, + { + "epoch": 1.68, + "learning_rate": 1.4532001943786938e-05, + "loss": 0.1218, + "step": 35900 + }, + { + "epoch": 1.68, + "learning_rate": 1.453121815873215e-05, + "loss": 0.1158, + "step": 35905 + }, + { + "epoch": 1.68, + "learning_rate": 1.4530434373677365e-05, + "loss": 0.0852, + "step": 35910 + }, + { + "epoch": 1.68, + "learning_rate": 1.4529650588622578e-05, + "loss": 0.081, + "step": 35915 + }, + { + "epoch": 1.68, + "learning_rate": 1.4528866803567791e-05, + "loss": 0.198, + "step": 35920 + }, + { + "epoch": 1.68, + "learning_rate": 1.4528083018513004e-05, + "loss": 0.2399, + "step": 35925 + }, + { + "epoch": 1.68, + "learning_rate": 1.452729923345822e-05, + "loss": 0.2144, + "step": 35930 + }, + { + "epoch": 1.68, + "learning_rate": 1.4526515448403432e-05, + "loss": 0.2149, + "step": 35935 + }, + { + "epoch": 1.68, + "learning_rate": 1.4525731663348644e-05, + "loss": 0.0402, + "step": 35940 + }, + { + "epoch": 1.68, + "learning_rate": 1.4524947878293858e-05, + "loss": 0.0718, + "step": 35945 + }, + { + "epoch": 1.68, + "learning_rate": 1.452416409323907e-05, + "loss": 0.0818, + "step": 35950 + }, + { + "epoch": 1.68, + "learning_rate": 1.4523380308184285e-05, + "loss": 0.0695, + "step": 35955 + }, + { + "epoch": 1.68, + "learning_rate": 1.4522596523129498e-05, + "loss": 0.0995, + "step": 35960 + }, + { + "epoch": 1.68, + "learning_rate": 1.4521812738074712e-05, + "loss": 0.1538, + "step": 35965 + }, + { + "epoch": 1.68, + "learning_rate": 1.4521028953019924e-05, + "loss": 0.2184, + "step": 35970 + }, + { + "epoch": 1.68, + "learning_rate": 1.452024516796514e-05, + "loss": 0.1701, + "step": 35975 + }, + { + "epoch": 1.68, + "learning_rate": 1.4519461382910352e-05, + "loss": 0.401, + "step": 35980 + }, + { + "epoch": 1.68, + "learning_rate": 1.4518677597855565e-05, + "loss": 0.3014, + "step": 35985 + }, + { + "epoch": 1.68, + "learning_rate": 1.4517893812800778e-05, + "loss": 0.047, + "step": 35990 + }, + { + "epoch": 1.68, + "learning_rate": 1.4517110027745993e-05, + "loss": 0.0691, + "step": 35995 + }, + { + "epoch": 1.68, + "learning_rate": 1.4516326242691206e-05, + "loss": 0.1155, + "step": 36000 + }, + { + "epoch": 1.68, + "learning_rate": 1.4515542457636418e-05, + "loss": 0.1304, + "step": 36005 + }, + { + "epoch": 1.68, + "learning_rate": 1.4514758672581633e-05, + "loss": 0.1563, + "step": 36010 + }, + { + "epoch": 1.68, + "learning_rate": 1.4513974887526846e-05, + "loss": 0.1016, + "step": 36015 + }, + { + "epoch": 1.68, + "learning_rate": 1.451319110247206e-05, + "loss": 0.186, + "step": 36020 + }, + { + "epoch": 1.68, + "learning_rate": 1.4512407317417272e-05, + "loss": 0.203, + "step": 36025 + }, + { + "epoch": 1.68, + "learning_rate": 1.4511623532362487e-05, + "loss": 0.3696, + "step": 36030 + }, + { + "epoch": 1.68, + "learning_rate": 1.45108397473077e-05, + "loss": 0.2746, + "step": 36035 + }, + { + "epoch": 1.68, + "learning_rate": 1.4510055962252913e-05, + "loss": 0.0431, + "step": 36040 + }, + { + "epoch": 1.68, + "learning_rate": 1.4509272177198126e-05, + "loss": 0.0458, + "step": 36045 + }, + { + "epoch": 1.68, + "learning_rate": 1.4508488392143341e-05, + "loss": 0.068, + "step": 36050 + }, + { + "epoch": 1.68, + "learning_rate": 1.4507704607088553e-05, + "loss": 0.156, + "step": 36055 + }, + { + "epoch": 1.68, + "learning_rate": 1.4506920822033767e-05, + "loss": 0.0859, + "step": 36060 + }, + { + "epoch": 1.68, + "learning_rate": 1.450613703697898e-05, + "loss": 0.1969, + "step": 36065 + }, + { + "epoch": 1.68, + "learning_rate": 1.4505353251924192e-05, + "loss": 0.1335, + "step": 36070 + }, + { + "epoch": 1.68, + "learning_rate": 1.4504569466869407e-05, + "loss": 0.2158, + "step": 36075 + }, + { + "epoch": 1.68, + "learning_rate": 1.450378568181462e-05, + "loss": 0.2714, + "step": 36080 + }, + { + "epoch": 1.68, + "learning_rate": 1.4503001896759833e-05, + "loss": 0.2969, + "step": 36085 + }, + { + "epoch": 1.68, + "learning_rate": 1.4502218111705046e-05, + "loss": 0.0216, + "step": 36090 + }, + { + "epoch": 1.68, + "learning_rate": 1.4501434326650261e-05, + "loss": 0.0438, + "step": 36095 + }, + { + "epoch": 1.68, + "learning_rate": 1.4500650541595473e-05, + "loss": 0.0988, + "step": 36100 + }, + { + "epoch": 1.68, + "learning_rate": 1.4499866756540687e-05, + "loss": 0.0888, + "step": 36105 + }, + { + "epoch": 1.68, + "learning_rate": 1.4499082971485901e-05, + "loss": 0.0665, + "step": 36110 + }, + { + "epoch": 1.69, + "learning_rate": 1.4498299186431115e-05, + "loss": 0.1506, + "step": 36115 + }, + { + "epoch": 1.69, + "learning_rate": 1.4497515401376327e-05, + "loss": 0.1244, + "step": 36120 + }, + { + "epoch": 1.69, + "learning_rate": 1.4496731616321541e-05, + "loss": 0.1407, + "step": 36125 + }, + { + "epoch": 1.69, + "learning_rate": 1.4495947831266755e-05, + "loss": 0.389, + "step": 36130 + }, + { + "epoch": 1.69, + "learning_rate": 1.4495164046211967e-05, + "loss": 0.2841, + "step": 36135 + }, + { + "epoch": 1.69, + "learning_rate": 1.4494380261157181e-05, + "loss": 0.0355, + "step": 36140 + }, + { + "epoch": 1.69, + "learning_rate": 1.4493596476102394e-05, + "loss": 0.1308, + "step": 36145 + }, + { + "epoch": 1.69, + "learning_rate": 1.4492812691047609e-05, + "loss": 0.0888, + "step": 36150 + }, + { + "epoch": 1.69, + "learning_rate": 1.4492028905992821e-05, + "loss": 0.1226, + "step": 36155 + }, + { + "epoch": 1.69, + "learning_rate": 1.4491245120938035e-05, + "loss": 0.1033, + "step": 36160 + }, + { + "epoch": 1.69, + "learning_rate": 1.4490461335883247e-05, + "loss": 0.1986, + "step": 36165 + }, + { + "epoch": 1.69, + "learning_rate": 1.4489677550828463e-05, + "loss": 0.1476, + "step": 36170 + }, + { + "epoch": 1.69, + "learning_rate": 1.4488893765773675e-05, + "loss": 0.3752, + "step": 36175 + }, + { + "epoch": 1.69, + "learning_rate": 1.448810998071889e-05, + "loss": 0.3593, + "step": 36180 + }, + { + "epoch": 1.69, + "learning_rate": 1.4487326195664101e-05, + "loss": 0.3019, + "step": 36185 + }, + { + "epoch": 1.69, + "learning_rate": 1.4486542410609317e-05, + "loss": 0.0502, + "step": 36190 + }, + { + "epoch": 1.69, + "learning_rate": 1.448575862555453e-05, + "loss": 0.0869, + "step": 36195 + }, + { + "epoch": 1.69, + "learning_rate": 1.4484974840499741e-05, + "loss": 0.0769, + "step": 36200 + }, + { + "epoch": 1.69, + "learning_rate": 1.4484191055444955e-05, + "loss": 0.0761, + "step": 36205 + }, + { + "epoch": 1.69, + "learning_rate": 1.448340727039017e-05, + "loss": 0.1277, + "step": 36210 + }, + { + "epoch": 1.69, + "learning_rate": 1.4482623485335383e-05, + "loss": 0.1541, + "step": 36215 + }, + { + "epoch": 1.69, + "learning_rate": 1.4481839700280595e-05, + "loss": 0.1727, + "step": 36220 + }, + { + "epoch": 1.69, + "learning_rate": 1.4481055915225811e-05, + "loss": 0.2071, + "step": 36225 + }, + { + "epoch": 1.69, + "learning_rate": 1.4480272130171023e-05, + "loss": 0.2472, + "step": 36230 + }, + { + "epoch": 1.69, + "learning_rate": 1.4479488345116237e-05, + "loss": 0.1617, + "step": 36235 + }, + { + "epoch": 1.69, + "learning_rate": 1.447870456006145e-05, + "loss": 0.0486, + "step": 36240 + }, + { + "epoch": 1.69, + "learning_rate": 1.4477920775006665e-05, + "loss": 0.029, + "step": 36245 + }, + { + "epoch": 1.69, + "learning_rate": 1.4477136989951877e-05, + "loss": 0.0703, + "step": 36250 + }, + { + "epoch": 1.69, + "learning_rate": 1.4476353204897091e-05, + "loss": 0.0847, + "step": 36255 + }, + { + "epoch": 1.69, + "learning_rate": 1.4475569419842303e-05, + "loss": 0.1484, + "step": 36260 + }, + { + "epoch": 1.69, + "learning_rate": 1.4474785634787515e-05, + "loss": 0.1272, + "step": 36265 + }, + { + "epoch": 1.69, + "learning_rate": 1.4474001849732731e-05, + "loss": 0.1129, + "step": 36270 + }, + { + "epoch": 1.69, + "learning_rate": 1.4473218064677943e-05, + "loss": 0.1698, + "step": 36275 + }, + { + "epoch": 1.69, + "learning_rate": 1.4472434279623157e-05, + "loss": 0.2721, + "step": 36280 + }, + { + "epoch": 1.69, + "learning_rate": 1.447165049456837e-05, + "loss": 0.2507, + "step": 36285 + }, + { + "epoch": 1.69, + "learning_rate": 1.4470866709513585e-05, + "loss": 0.1443, + "step": 36290 + }, + { + "epoch": 1.69, + "learning_rate": 1.4470082924458797e-05, + "loss": 0.0702, + "step": 36295 + }, + { + "epoch": 1.69, + "learning_rate": 1.4469299139404011e-05, + "loss": 0.0486, + "step": 36300 + }, + { + "epoch": 1.69, + "learning_rate": 1.4468515354349223e-05, + "loss": 0.0962, + "step": 36305 + }, + { + "epoch": 1.69, + "learning_rate": 1.4467731569294439e-05, + "loss": 0.1491, + "step": 36310 + }, + { + "epoch": 1.69, + "learning_rate": 1.4466947784239651e-05, + "loss": 0.1473, + "step": 36315 + }, + { + "epoch": 1.69, + "learning_rate": 1.4466163999184865e-05, + "loss": 0.1861, + "step": 36320 + }, + { + "epoch": 1.69, + "learning_rate": 1.4465380214130079e-05, + "loss": 0.2508, + "step": 36325 + }, + { + "epoch": 1.7, + "learning_rate": 1.4464596429075291e-05, + "loss": 0.1919, + "step": 36330 + }, + { + "epoch": 1.7, + "learning_rate": 1.4463812644020505e-05, + "loss": 0.329, + "step": 36335 + }, + { + "epoch": 1.7, + "learning_rate": 1.4463028858965717e-05, + "loss": 0.0241, + "step": 36340 + }, + { + "epoch": 1.7, + "learning_rate": 1.4462245073910933e-05, + "loss": 0.0493, + "step": 36345 + }, + { + "epoch": 1.7, + "learning_rate": 1.4461461288856145e-05, + "loss": 0.0885, + "step": 36350 + }, + { + "epoch": 1.7, + "learning_rate": 1.4460677503801359e-05, + "loss": 0.0676, + "step": 36355 + }, + { + "epoch": 1.7, + "learning_rate": 1.4459893718746571e-05, + "loss": 0.138, + "step": 36360 + }, + { + "epoch": 1.7, + "learning_rate": 1.4459109933691787e-05, + "loss": 0.1602, + "step": 36365 + }, + { + "epoch": 1.7, + "learning_rate": 1.4458326148636999e-05, + "loss": 0.1771, + "step": 36370 + }, + { + "epoch": 1.7, + "learning_rate": 1.4457542363582213e-05, + "loss": 0.207, + "step": 36375 + }, + { + "epoch": 1.7, + "learning_rate": 1.4456758578527425e-05, + "loss": 0.3872, + "step": 36380 + }, + { + "epoch": 1.7, + "learning_rate": 1.445597479347264e-05, + "loss": 0.4241, + "step": 36385 + }, + { + "epoch": 1.7, + "learning_rate": 1.4455191008417853e-05, + "loss": 0.0311, + "step": 36390 + }, + { + "epoch": 1.7, + "learning_rate": 1.4454407223363065e-05, + "loss": 0.0573, + "step": 36395 + }, + { + "epoch": 1.7, + "learning_rate": 1.4453623438308279e-05, + "loss": 0.0677, + "step": 36400 + }, + { + "epoch": 1.7, + "learning_rate": 1.4452839653253491e-05, + "loss": 0.1657, + "step": 36405 + }, + { + "epoch": 1.7, + "learning_rate": 1.4452055868198707e-05, + "loss": 0.0953, + "step": 36410 + }, + { + "epoch": 1.7, + "learning_rate": 1.4451272083143919e-05, + "loss": 0.1371, + "step": 36415 + }, + { + "epoch": 1.7, + "learning_rate": 1.4450488298089133e-05, + "loss": 0.1549, + "step": 36420 + }, + { + "epoch": 1.7, + "learning_rate": 1.4449704513034347e-05, + "loss": 0.1376, + "step": 36425 + }, + { + "epoch": 1.7, + "learning_rate": 1.444892072797956e-05, + "loss": 0.3851, + "step": 36430 + }, + { + "epoch": 1.7, + "learning_rate": 1.4448136942924773e-05, + "loss": 0.2807, + "step": 36435 + }, + { + "epoch": 1.7, + "learning_rate": 1.4447353157869989e-05, + "loss": 0.0163, + "step": 36440 + }, + { + "epoch": 1.7, + "learning_rate": 1.44465693728152e-05, + "loss": 0.0581, + "step": 36445 + }, + { + "epoch": 1.7, + "learning_rate": 1.4445785587760415e-05, + "loss": 0.056, + "step": 36450 + }, + { + "epoch": 1.7, + "learning_rate": 1.4445001802705627e-05, + "loss": 0.1045, + "step": 36455 + }, + { + "epoch": 1.7, + "learning_rate": 1.4444218017650839e-05, + "loss": 0.0543, + "step": 36460 + }, + { + "epoch": 1.7, + "learning_rate": 1.4443434232596055e-05, + "loss": 0.1655, + "step": 36465 + }, + { + "epoch": 1.7, + "learning_rate": 1.4442807204552225e-05, + "loss": 0.1582, + "step": 36470 + }, + { + "epoch": 1.7, + "learning_rate": 1.4442023419497437e-05, + "loss": 0.2369, + "step": 36475 + }, + { + "epoch": 1.7, + "learning_rate": 1.4441239634442651e-05, + "loss": 0.2963, + "step": 36480 + }, + { + "epoch": 1.7, + "learning_rate": 1.4440455849387865e-05, + "loss": 0.2817, + "step": 36485 + }, + { + "epoch": 1.7, + "learning_rate": 1.4439672064333079e-05, + "loss": 0.0662, + "step": 36490 + }, + { + "epoch": 1.7, + "learning_rate": 1.4438888279278291e-05, + "loss": 0.0917, + "step": 36495 + }, + { + "epoch": 1.7, + "learning_rate": 1.4438104494223505e-05, + "loss": 0.0448, + "step": 36500 + }, + { + "epoch": 1.7, + "learning_rate": 1.4437320709168719e-05, + "loss": 0.1574, + "step": 36505 + }, + { + "epoch": 1.7, + "learning_rate": 1.4436536924113933e-05, + "loss": 0.1734, + "step": 36510 + }, + { + "epoch": 1.7, + "learning_rate": 1.4435753139059145e-05, + "loss": 0.1669, + "step": 36515 + }, + { + "epoch": 1.7, + "learning_rate": 1.443496935400436e-05, + "loss": 0.1831, + "step": 36520 + }, + { + "epoch": 1.7, + "learning_rate": 1.4434185568949573e-05, + "loss": 0.1776, + "step": 36525 + }, + { + "epoch": 1.7, + "learning_rate": 1.4433401783894785e-05, + "loss": 0.3592, + "step": 36530 + }, + { + "epoch": 1.7, + "learning_rate": 1.4432617998839999e-05, + "loss": 0.2814, + "step": 36535 + }, + { + "epoch": 1.71, + "learning_rate": 1.4431834213785211e-05, + "loss": 0.0311, + "step": 36540 + }, + { + "epoch": 1.71, + "learning_rate": 1.4431050428730427e-05, + "loss": 0.091, + "step": 36545 + }, + { + "epoch": 1.71, + "learning_rate": 1.443026664367564e-05, + "loss": 0.0954, + "step": 36550 + }, + { + "epoch": 1.71, + "learning_rate": 1.4429482858620853e-05, + "loss": 0.102, + "step": 36555 + }, + { + "epoch": 1.71, + "learning_rate": 1.4428699073566065e-05, + "loss": 0.1249, + "step": 36560 + }, + { + "epoch": 1.71, + "learning_rate": 1.4427915288511281e-05, + "loss": 0.1781, + "step": 36565 + }, + { + "epoch": 1.71, + "learning_rate": 1.4427131503456493e-05, + "loss": 0.122, + "step": 36570 + }, + { + "epoch": 1.71, + "learning_rate": 1.4426347718401707e-05, + "loss": 0.2721, + "step": 36575 + }, + { + "epoch": 1.71, + "learning_rate": 1.442556393334692e-05, + "loss": 0.3878, + "step": 36580 + }, + { + "epoch": 1.71, + "learning_rate": 1.4424780148292135e-05, + "loss": 0.2806, + "step": 36585 + }, + { + "epoch": 1.71, + "learning_rate": 1.4423996363237347e-05, + "loss": 0.0162, + "step": 36590 + }, + { + "epoch": 1.71, + "learning_rate": 1.442321257818256e-05, + "loss": 0.1438, + "step": 36595 + }, + { + "epoch": 1.71, + "learning_rate": 1.4422428793127775e-05, + "loss": 0.0547, + "step": 36600 + }, + { + "epoch": 1.71, + "learning_rate": 1.4421645008072987e-05, + "loss": 0.0748, + "step": 36605 + }, + { + "epoch": 1.71, + "learning_rate": 1.4420861223018201e-05, + "loss": 0.071, + "step": 36610 + }, + { + "epoch": 1.71, + "learning_rate": 1.4420077437963413e-05, + "loss": 0.0877, + "step": 36615 + }, + { + "epoch": 1.71, + "learning_rate": 1.4419293652908629e-05, + "loss": 0.0916, + "step": 36620 + }, + { + "epoch": 1.71, + "learning_rate": 1.4418509867853841e-05, + "loss": 0.29, + "step": 36625 + }, + { + "epoch": 1.71, + "learning_rate": 1.4417726082799055e-05, + "loss": 0.3652, + "step": 36630 + }, + { + "epoch": 1.71, + "learning_rate": 1.4416942297744267e-05, + "loss": 0.3349, + "step": 36635 + }, + { + "epoch": 1.71, + "learning_rate": 1.4416158512689483e-05, + "loss": 0.0536, + "step": 36640 + }, + { + "epoch": 1.71, + "learning_rate": 1.4415374727634695e-05, + "loss": 0.0458, + "step": 36645 + }, + { + "epoch": 1.71, + "learning_rate": 1.4414590942579909e-05, + "loss": 0.085, + "step": 36650 + }, + { + "epoch": 1.71, + "learning_rate": 1.4413807157525121e-05, + "loss": 0.0993, + "step": 36655 + }, + { + "epoch": 1.71, + "learning_rate": 1.4413023372470333e-05, + "loss": 0.1099, + "step": 36660 + }, + { + "epoch": 1.71, + "learning_rate": 1.4412239587415549e-05, + "loss": 0.112, + "step": 36665 + }, + { + "epoch": 1.71, + "learning_rate": 1.4411455802360761e-05, + "loss": 0.2057, + "step": 36670 + }, + { + "epoch": 1.71, + "learning_rate": 1.4410672017305975e-05, + "loss": 0.3171, + "step": 36675 + }, + { + "epoch": 1.71, + "learning_rate": 1.4409888232251187e-05, + "loss": 0.296, + "step": 36680 + }, + { + "epoch": 1.71, + "learning_rate": 1.4409104447196403e-05, + "loss": 0.2658, + "step": 36685 + }, + { + "epoch": 1.71, + "learning_rate": 1.4408320662141615e-05, + "loss": 0.0218, + "step": 36690 + }, + { + "epoch": 1.71, + "learning_rate": 1.4407536877086829e-05, + "loss": 0.0514, + "step": 36695 + }, + { + "epoch": 1.71, + "learning_rate": 1.4406753092032043e-05, + "loss": 0.081, + "step": 36700 + }, + { + "epoch": 1.71, + "learning_rate": 1.4405969306977257e-05, + "loss": 0.0679, + "step": 36705 + }, + { + "epoch": 1.71, + "learning_rate": 1.4405185521922469e-05, + "loss": 0.1526, + "step": 36710 + }, + { + "epoch": 1.71, + "learning_rate": 1.4404401736867683e-05, + "loss": 0.1244, + "step": 36715 + }, + { + "epoch": 1.71, + "learning_rate": 1.4403617951812897e-05, + "loss": 0.1967, + "step": 36720 + }, + { + "epoch": 1.71, + "learning_rate": 1.4402834166758109e-05, + "loss": 0.2807, + "step": 36725 + }, + { + "epoch": 1.71, + "learning_rate": 1.4402050381703323e-05, + "loss": 0.3164, + "step": 36730 + }, + { + "epoch": 1.71, + "learning_rate": 1.4401266596648535e-05, + "loss": 0.2543, + "step": 36735 + }, + { + "epoch": 1.71, + "learning_rate": 1.440048281159375e-05, + "loss": 0.0474, + "step": 36740 + }, + { + "epoch": 1.71, + "learning_rate": 1.4399699026538963e-05, + "loss": 0.0358, + "step": 36745 + }, + { + "epoch": 1.71, + "learning_rate": 1.4398915241484177e-05, + "loss": 0.0577, + "step": 36750 + }, + { + "epoch": 1.72, + "learning_rate": 1.4398131456429389e-05, + "loss": 0.1247, + "step": 36755 + }, + { + "epoch": 1.72, + "learning_rate": 1.4397347671374605e-05, + "loss": 0.0912, + "step": 36760 + }, + { + "epoch": 1.72, + "learning_rate": 1.4396563886319817e-05, + "loss": 0.1512, + "step": 36765 + }, + { + "epoch": 1.72, + "learning_rate": 1.439578010126503e-05, + "loss": 0.187, + "step": 36770 + }, + { + "epoch": 1.72, + "learning_rate": 1.4394996316210243e-05, + "loss": 0.2044, + "step": 36775 + }, + { + "epoch": 1.72, + "learning_rate": 1.4394212531155459e-05, + "loss": 0.2219, + "step": 36780 + }, + { + "epoch": 1.72, + "learning_rate": 1.439342874610067e-05, + "loss": 0.3031, + "step": 36785 + }, + { + "epoch": 1.72, + "learning_rate": 1.4392644961045883e-05, + "loss": 0.0566, + "step": 36790 + }, + { + "epoch": 1.72, + "learning_rate": 1.4391861175991097e-05, + "loss": 0.05, + "step": 36795 + }, + { + "epoch": 1.72, + "learning_rate": 1.439107739093631e-05, + "loss": 0.1119, + "step": 36800 + }, + { + "epoch": 1.72, + "learning_rate": 1.4390293605881525e-05, + "loss": 0.1107, + "step": 36805 + }, + { + "epoch": 1.72, + "learning_rate": 1.4389509820826737e-05, + "loss": 0.198, + "step": 36810 + }, + { + "epoch": 1.72, + "learning_rate": 1.438872603577195e-05, + "loss": 0.1943, + "step": 36815 + }, + { + "epoch": 1.72, + "learning_rate": 1.4387942250717165e-05, + "loss": 0.1967, + "step": 36820 + }, + { + "epoch": 1.72, + "learning_rate": 1.4387158465662379e-05, + "loss": 0.1664, + "step": 36825 + }, + { + "epoch": 1.72, + "learning_rate": 1.438637468060759e-05, + "loss": 0.3422, + "step": 36830 + }, + { + "epoch": 1.72, + "learning_rate": 1.4385590895552806e-05, + "loss": 0.2327, + "step": 36835 + }, + { + "epoch": 1.72, + "learning_rate": 1.4384807110498019e-05, + "loss": 0.0715, + "step": 36840 + }, + { + "epoch": 1.72, + "learning_rate": 1.4384023325443233e-05, + "loss": 0.0976, + "step": 36845 + }, + { + "epoch": 1.72, + "learning_rate": 1.4383239540388445e-05, + "loss": 0.0848, + "step": 36850 + }, + { + "epoch": 1.72, + "learning_rate": 1.4382455755333657e-05, + "loss": 0.0524, + "step": 36855 + }, + { + "epoch": 1.72, + "learning_rate": 1.4381671970278873e-05, + "loss": 0.1459, + "step": 36860 + }, + { + "epoch": 1.72, + "learning_rate": 1.4380888185224085e-05, + "loss": 0.1363, + "step": 36865 + }, + { + "epoch": 1.72, + "learning_rate": 1.4380104400169299e-05, + "loss": 0.1704, + "step": 36870 + }, + { + "epoch": 1.72, + "learning_rate": 1.4379320615114511e-05, + "loss": 0.3359, + "step": 36875 + }, + { + "epoch": 1.72, + "learning_rate": 1.4378536830059726e-05, + "loss": 0.4377, + "step": 36880 + }, + { + "epoch": 1.72, + "learning_rate": 1.4377753045004939e-05, + "loss": 0.1847, + "step": 36885 + }, + { + "epoch": 1.72, + "learning_rate": 1.4376969259950153e-05, + "loss": 0.0775, + "step": 36890 + }, + { + "epoch": 1.72, + "learning_rate": 1.4376185474895365e-05, + "loss": 0.0722, + "step": 36895 + }, + { + "epoch": 1.72, + "learning_rate": 1.437540168984058e-05, + "loss": 0.106, + "step": 36900 + }, + { + "epoch": 1.72, + "learning_rate": 1.4374617904785793e-05, + "loss": 0.1095, + "step": 36905 + }, + { + "epoch": 1.72, + "learning_rate": 1.4373834119731007e-05, + "loss": 0.1045, + "step": 36910 + }, + { + "epoch": 1.72, + "learning_rate": 1.437305033467622e-05, + "loss": 0.1643, + "step": 36915 + }, + { + "epoch": 1.72, + "learning_rate": 1.4372266549621433e-05, + "loss": 0.1888, + "step": 36920 + }, + { + "epoch": 1.72, + "learning_rate": 1.4371482764566647e-05, + "loss": 0.1461, + "step": 36925 + }, + { + "epoch": 1.72, + "learning_rate": 1.4370698979511859e-05, + "loss": 0.36, + "step": 36930 + }, + { + "epoch": 1.72, + "learning_rate": 1.4369915194457074e-05, + "loss": 0.1914, + "step": 36935 + }, + { + "epoch": 1.72, + "learning_rate": 1.4369131409402287e-05, + "loss": 0.0292, + "step": 36940 + }, + { + "epoch": 1.72, + "learning_rate": 1.43683476243475e-05, + "loss": 0.0758, + "step": 36945 + }, + { + "epoch": 1.72, + "learning_rate": 1.4367563839292713e-05, + "loss": 0.074, + "step": 36950 + }, + { + "epoch": 1.72, + "learning_rate": 1.4366780054237928e-05, + "loss": 0.1499, + "step": 36955 + }, + { + "epoch": 1.72, + "learning_rate": 1.436599626918314e-05, + "loss": 0.1167, + "step": 36960 + }, + { + "epoch": 1.72, + "learning_rate": 1.4365212484128354e-05, + "loss": 0.0796, + "step": 36965 + }, + { + "epoch": 1.73, + "learning_rate": 1.4364428699073567e-05, + "loss": 0.0926, + "step": 36970 + }, + { + "epoch": 1.73, + "learning_rate": 1.4363644914018782e-05, + "loss": 0.2108, + "step": 36975 + }, + { + "epoch": 1.73, + "learning_rate": 1.4362861128963994e-05, + "loss": 0.2106, + "step": 36980 + }, + { + "epoch": 1.73, + "learning_rate": 1.4362077343909207e-05, + "loss": 0.2872, + "step": 36985 + }, + { + "epoch": 1.73, + "learning_rate": 1.436129355885442e-05, + "loss": 0.0477, + "step": 36990 + }, + { + "epoch": 1.73, + "learning_rate": 1.4360509773799633e-05, + "loss": 0.0257, + "step": 36995 + }, + { + "epoch": 1.73, + "learning_rate": 1.4359725988744848e-05, + "loss": 0.1252, + "step": 37000 + }, + { + "epoch": 1.73, + "learning_rate": 1.435894220369006e-05, + "loss": 0.1321, + "step": 37005 + }, + { + "epoch": 1.73, + "learning_rate": 1.4358158418635274e-05, + "loss": 0.1789, + "step": 37010 + }, + { + "epoch": 1.73, + "learning_rate": 1.4357374633580488e-05, + "loss": 0.1004, + "step": 37015 + }, + { + "epoch": 1.73, + "learning_rate": 1.4356590848525702e-05, + "loss": 0.1946, + "step": 37020 + }, + { + "epoch": 1.73, + "learning_rate": 1.4355807063470914e-05, + "loss": 0.1881, + "step": 37025 + }, + { + "epoch": 1.73, + "learning_rate": 1.4355023278416128e-05, + "loss": 0.3375, + "step": 37030 + }, + { + "epoch": 1.73, + "learning_rate": 1.4354239493361342e-05, + "loss": 0.2199, + "step": 37035 + }, + { + "epoch": 1.73, + "learning_rate": 1.4353455708306556e-05, + "loss": 0.0575, + "step": 37040 + }, + { + "epoch": 1.73, + "learning_rate": 1.4352671923251768e-05, + "loss": 0.0641, + "step": 37045 + }, + { + "epoch": 1.73, + "learning_rate": 1.435188813819698e-05, + "loss": 0.1354, + "step": 37050 + }, + { + "epoch": 1.73, + "learning_rate": 1.4351104353142196e-05, + "loss": 0.0793, + "step": 37055 + }, + { + "epoch": 1.73, + "learning_rate": 1.4350320568087408e-05, + "loss": 0.0634, + "step": 37060 + }, + { + "epoch": 1.73, + "learning_rate": 1.4349536783032622e-05, + "loss": 0.1773, + "step": 37065 + }, + { + "epoch": 1.73, + "learning_rate": 1.4348752997977835e-05, + "loss": 0.1752, + "step": 37070 + }, + { + "epoch": 1.73, + "learning_rate": 1.434796921292305e-05, + "loss": 0.1668, + "step": 37075 + }, + { + "epoch": 1.73, + "learning_rate": 1.4347185427868262e-05, + "loss": 0.3402, + "step": 37080 + }, + { + "epoch": 1.73, + "learning_rate": 1.4346401642813476e-05, + "loss": 0.2596, + "step": 37085 + }, + { + "epoch": 1.73, + "learning_rate": 1.4345617857758688e-05, + "loss": 0.0693, + "step": 37090 + }, + { + "epoch": 1.73, + "learning_rate": 1.4344834072703904e-05, + "loss": 0.0821, + "step": 37095 + }, + { + "epoch": 1.73, + "learning_rate": 1.4344050287649116e-05, + "loss": 0.0515, + "step": 37100 + }, + { + "epoch": 1.73, + "learning_rate": 1.434326650259433e-05, + "loss": 0.0693, + "step": 37105 + }, + { + "epoch": 1.73, + "learning_rate": 1.4342482717539542e-05, + "loss": 0.0675, + "step": 37110 + }, + { + "epoch": 1.73, + "learning_rate": 1.4341698932484756e-05, + "loss": 0.0614, + "step": 37115 + }, + { + "epoch": 1.73, + "learning_rate": 1.434091514742997e-05, + "loss": 0.1118, + "step": 37120 + }, + { + "epoch": 1.73, + "learning_rate": 1.4340131362375182e-05, + "loss": 0.1841, + "step": 37125 + }, + { + "epoch": 1.73, + "learning_rate": 1.4339347577320396e-05, + "loss": 0.4163, + "step": 37130 + }, + { + "epoch": 1.73, + "learning_rate": 1.433856379226561e-05, + "loss": 0.2525, + "step": 37135 + }, + { + "epoch": 1.73, + "learning_rate": 1.4337780007210824e-05, + "loss": 0.0489, + "step": 37140 + }, + { + "epoch": 1.73, + "learning_rate": 1.4336996222156036e-05, + "loss": 0.0879, + "step": 37145 + }, + { + "epoch": 1.73, + "learning_rate": 1.4336212437101252e-05, + "loss": 0.0851, + "step": 37150 + }, + { + "epoch": 1.73, + "learning_rate": 1.4335428652046464e-05, + "loss": 0.093, + "step": 37155 + }, + { + "epoch": 1.73, + "learning_rate": 1.4334644866991678e-05, + "loss": 0.161, + "step": 37160 + }, + { + "epoch": 1.73, + "learning_rate": 1.433386108193689e-05, + "loss": 0.1694, + "step": 37165 + }, + { + "epoch": 1.73, + "learning_rate": 1.4333077296882106e-05, + "loss": 0.17, + "step": 37170 + }, + { + "epoch": 1.73, + "learning_rate": 1.4332293511827318e-05, + "loss": 0.2063, + "step": 37175 + }, + { + "epoch": 1.73, + "learning_rate": 1.433150972677253e-05, + "loss": 0.2002, + "step": 37180 + }, + { + "epoch": 1.74, + "learning_rate": 1.4330725941717744e-05, + "loss": 0.2128, + "step": 37185 + }, + { + "epoch": 1.74, + "learning_rate": 1.4329942156662956e-05, + "loss": 0.0525, + "step": 37190 + }, + { + "epoch": 1.74, + "learning_rate": 1.4329158371608172e-05, + "loss": 0.1173, + "step": 37195 + }, + { + "epoch": 1.74, + "learning_rate": 1.4328374586553384e-05, + "loss": 0.1373, + "step": 37200 + }, + { + "epoch": 1.74, + "learning_rate": 1.4327590801498598e-05, + "loss": 0.0733, + "step": 37205 + }, + { + "epoch": 1.74, + "learning_rate": 1.432680701644381e-05, + "loss": 0.118, + "step": 37210 + }, + { + "epoch": 1.74, + "learning_rate": 1.4326023231389026e-05, + "loss": 0.1538, + "step": 37215 + }, + { + "epoch": 1.74, + "learning_rate": 1.4325239446334238e-05, + "loss": 0.1563, + "step": 37220 + }, + { + "epoch": 1.74, + "learning_rate": 1.4324455661279452e-05, + "loss": 0.2196, + "step": 37225 + }, + { + "epoch": 1.74, + "learning_rate": 1.4323671876224666e-05, + "loss": 0.3397, + "step": 37230 + }, + { + "epoch": 1.74, + "learning_rate": 1.432288809116988e-05, + "loss": 0.2154, + "step": 37235 + }, + { + "epoch": 1.74, + "learning_rate": 1.4322104306115092e-05, + "loss": 0.0617, + "step": 37240 + }, + { + "epoch": 1.74, + "learning_rate": 1.4321320521060304e-05, + "loss": 0.0488, + "step": 37245 + }, + { + "epoch": 1.74, + "learning_rate": 1.432053673600552e-05, + "loss": 0.1145, + "step": 37250 + }, + { + "epoch": 1.74, + "learning_rate": 1.4319752950950732e-05, + "loss": 0.1049, + "step": 37255 + }, + { + "epoch": 1.74, + "learning_rate": 1.4318969165895946e-05, + "loss": 0.0923, + "step": 37260 + }, + { + "epoch": 1.74, + "learning_rate": 1.4318185380841158e-05, + "loss": 0.1686, + "step": 37265 + }, + { + "epoch": 1.74, + "learning_rate": 1.4317401595786374e-05, + "loss": 0.1347, + "step": 37270 + }, + { + "epoch": 1.74, + "learning_rate": 1.4316617810731586e-05, + "loss": 0.161, + "step": 37275 + }, + { + "epoch": 1.74, + "learning_rate": 1.43158340256768e-05, + "loss": 0.2883, + "step": 37280 + }, + { + "epoch": 1.74, + "learning_rate": 1.4315050240622012e-05, + "loss": 0.2093, + "step": 37285 + }, + { + "epoch": 1.74, + "learning_rate": 1.4314266455567228e-05, + "loss": 0.0615, + "step": 37290 + }, + { + "epoch": 1.74, + "learning_rate": 1.431348267051244e-05, + "loss": 0.058, + "step": 37295 + }, + { + "epoch": 1.74, + "learning_rate": 1.4312698885457654e-05, + "loss": 0.0953, + "step": 37300 + }, + { + "epoch": 1.74, + "learning_rate": 1.4311915100402866e-05, + "loss": 0.0668, + "step": 37305 + }, + { + "epoch": 1.74, + "learning_rate": 1.4311131315348078e-05, + "loss": 0.1407, + "step": 37310 + }, + { + "epoch": 1.74, + "learning_rate": 1.4310347530293294e-05, + "loss": 0.1212, + "step": 37315 + }, + { + "epoch": 1.74, + "learning_rate": 1.4309563745238506e-05, + "loss": 0.1046, + "step": 37320 + }, + { + "epoch": 1.74, + "learning_rate": 1.430877996018372e-05, + "loss": 0.1611, + "step": 37325 + }, + { + "epoch": 1.74, + "learning_rate": 1.4307996175128934e-05, + "loss": 0.3317, + "step": 37330 + }, + { + "epoch": 1.74, + "learning_rate": 1.4307212390074148e-05, + "loss": 0.2498, + "step": 37335 + }, + { + "epoch": 1.74, + "learning_rate": 1.430642860501936e-05, + "loss": 0.0457, + "step": 37340 + }, + { + "epoch": 1.74, + "learning_rate": 1.4305644819964574e-05, + "loss": 0.0306, + "step": 37345 + }, + { + "epoch": 1.74, + "learning_rate": 1.4304861034909788e-05, + "loss": 0.0676, + "step": 37350 + }, + { + "epoch": 1.74, + "learning_rate": 1.4304077249855002e-05, + "loss": 0.0872, + "step": 37355 + }, + { + "epoch": 1.74, + "learning_rate": 1.4303293464800214e-05, + "loss": 0.1829, + "step": 37360 + }, + { + "epoch": 1.74, + "learning_rate": 1.430250967974543e-05, + "loss": 0.1626, + "step": 37365 + }, + { + "epoch": 1.74, + "learning_rate": 1.4301725894690642e-05, + "loss": 0.1964, + "step": 37370 + }, + { + "epoch": 1.74, + "learning_rate": 1.4300942109635854e-05, + "loss": 0.2105, + "step": 37375 + }, + { + "epoch": 1.74, + "learning_rate": 1.4300158324581068e-05, + "loss": 0.3942, + "step": 37380 + }, + { + "epoch": 1.74, + "learning_rate": 1.429937453952628e-05, + "loss": 0.2594, + "step": 37385 + }, + { + "epoch": 1.74, + "learning_rate": 1.4298590754471496e-05, + "loss": 0.0395, + "step": 37390 + }, + { + "epoch": 1.74, + "learning_rate": 1.4297806969416708e-05, + "loss": 0.0307, + "step": 37395 + }, + { + "epoch": 1.75, + "learning_rate": 1.4297023184361922e-05, + "loss": 0.0373, + "step": 37400 + }, + { + "epoch": 1.75, + "learning_rate": 1.4296239399307134e-05, + "loss": 0.06, + "step": 37405 + }, + { + "epoch": 1.75, + "learning_rate": 1.429545561425235e-05, + "loss": 0.1288, + "step": 37410 + }, + { + "epoch": 1.75, + "learning_rate": 1.4294671829197562e-05, + "loss": 0.1277, + "step": 37415 + }, + { + "epoch": 1.75, + "learning_rate": 1.4293888044142776e-05, + "loss": 0.1357, + "step": 37420 + }, + { + "epoch": 1.75, + "learning_rate": 1.4293104259087988e-05, + "loss": 0.1732, + "step": 37425 + }, + { + "epoch": 1.75, + "learning_rate": 1.4292320474033204e-05, + "loss": 0.2856, + "step": 37430 + }, + { + "epoch": 1.75, + "learning_rate": 1.4291536688978416e-05, + "loss": 0.2645, + "step": 37435 + }, + { + "epoch": 1.75, + "learning_rate": 1.4290752903923628e-05, + "loss": 0.0783, + "step": 37440 + }, + { + "epoch": 1.75, + "learning_rate": 1.4289969118868842e-05, + "loss": 0.0332, + "step": 37445 + }, + { + "epoch": 1.75, + "learning_rate": 1.4289185333814056e-05, + "loss": 0.0371, + "step": 37450 + }, + { + "epoch": 1.75, + "learning_rate": 1.428840154875927e-05, + "loss": 0.1575, + "step": 37455 + }, + { + "epoch": 1.75, + "learning_rate": 1.4287617763704482e-05, + "loss": 0.094, + "step": 37460 + }, + { + "epoch": 1.75, + "learning_rate": 1.4286833978649698e-05, + "loss": 0.0949, + "step": 37465 + }, + { + "epoch": 1.75, + "learning_rate": 1.428605019359491e-05, + "loss": 0.1676, + "step": 37470 + }, + { + "epoch": 1.75, + "learning_rate": 1.4285266408540124e-05, + "loss": 0.1947, + "step": 37475 + }, + { + "epoch": 1.75, + "learning_rate": 1.4284482623485336e-05, + "loss": 0.1995, + "step": 37480 + }, + { + "epoch": 1.75, + "learning_rate": 1.4283698838430551e-05, + "loss": 0.2297, + "step": 37485 + }, + { + "epoch": 1.75, + "learning_rate": 1.4282915053375764e-05, + "loss": 0.0635, + "step": 37490 + }, + { + "epoch": 1.75, + "learning_rate": 1.4282131268320978e-05, + "loss": 0.049, + "step": 37495 + }, + { + "epoch": 1.75, + "learning_rate": 1.428134748326619e-05, + "loss": 0.1047, + "step": 37500 + }, + { + "epoch": 1.75, + "learning_rate": 1.4280563698211402e-05, + "loss": 0.125, + "step": 37505 + }, + { + "epoch": 1.75, + "learning_rate": 1.4279779913156618e-05, + "loss": 0.1367, + "step": 37510 + }, + { + "epoch": 1.75, + "learning_rate": 1.427899612810183e-05, + "loss": 0.16, + "step": 37515 + }, + { + "epoch": 1.75, + "learning_rate": 1.4278212343047044e-05, + "loss": 0.2038, + "step": 37520 + }, + { + "epoch": 1.75, + "learning_rate": 1.4277428557992256e-05, + "loss": 0.2235, + "step": 37525 + }, + { + "epoch": 1.75, + "learning_rate": 1.4276644772937472e-05, + "loss": 0.428, + "step": 37530 + }, + { + "epoch": 1.75, + "learning_rate": 1.4275860987882684e-05, + "loss": 0.3868, + "step": 37535 + }, + { + "epoch": 1.75, + "learning_rate": 1.4275077202827898e-05, + "loss": 0.0283, + "step": 37540 + }, + { + "epoch": 1.75, + "learning_rate": 1.4274293417773112e-05, + "loss": 0.0262, + "step": 37545 + }, + { + "epoch": 1.75, + "learning_rate": 1.4273509632718325e-05, + "loss": 0.0667, + "step": 37550 + }, + { + "epoch": 1.75, + "learning_rate": 1.4272725847663538e-05, + "loss": 0.1034, + "step": 37555 + }, + { + "epoch": 1.75, + "learning_rate": 1.4271942062608752e-05, + "loss": 0.1123, + "step": 37560 + }, + { + "epoch": 1.75, + "learning_rate": 1.4271158277553965e-05, + "loss": 0.1533, + "step": 37565 + }, + { + "epoch": 1.75, + "learning_rate": 1.4270374492499178e-05, + "loss": 0.0854, + "step": 37570 + }, + { + "epoch": 1.75, + "learning_rate": 1.4269590707444392e-05, + "loss": 0.2158, + "step": 37575 + }, + { + "epoch": 1.75, + "learning_rate": 1.4268806922389604e-05, + "loss": 0.3784, + "step": 37580 + }, + { + "epoch": 1.75, + "learning_rate": 1.426802313733482e-05, + "loss": 0.2143, + "step": 37585 + }, + { + "epoch": 1.75, + "learning_rate": 1.4267239352280032e-05, + "loss": 0.0696, + "step": 37590 + }, + { + "epoch": 1.75, + "learning_rate": 1.4266455567225246e-05, + "loss": 0.0339, + "step": 37595 + }, + { + "epoch": 1.75, + "learning_rate": 1.4265671782170458e-05, + "loss": 0.0495, + "step": 37600 + }, + { + "epoch": 1.75, + "learning_rate": 1.4264887997115673e-05, + "loss": 0.1684, + "step": 37605 + }, + { + "epoch": 1.75, + "learning_rate": 1.4264104212060886e-05, + "loss": 0.184, + "step": 37610 + }, + { + "epoch": 1.76, + "learning_rate": 1.42633204270061e-05, + "loss": 0.1755, + "step": 37615 + }, + { + "epoch": 1.76, + "learning_rate": 1.4262536641951312e-05, + "loss": 0.1916, + "step": 37620 + }, + { + "epoch": 1.76, + "learning_rate": 1.4261752856896527e-05, + "loss": 0.178, + "step": 37625 + }, + { + "epoch": 1.76, + "learning_rate": 1.426096907184174e-05, + "loss": 0.2962, + "step": 37630 + }, + { + "epoch": 1.76, + "learning_rate": 1.4260185286786952e-05, + "loss": 0.1308, + "step": 37635 + }, + { + "epoch": 1.76, + "learning_rate": 1.4259401501732166e-05, + "loss": 0.0454, + "step": 37640 + }, + { + "epoch": 1.76, + "learning_rate": 1.425861771667738e-05, + "loss": 0.0986, + "step": 37645 + }, + { + "epoch": 1.76, + "learning_rate": 1.4257833931622593e-05, + "loss": 0.0789, + "step": 37650 + }, + { + "epoch": 1.76, + "learning_rate": 1.4257050146567806e-05, + "loss": 0.1297, + "step": 37655 + }, + { + "epoch": 1.76, + "learning_rate": 1.425626636151302e-05, + "loss": 0.0659, + "step": 37660 + }, + { + "epoch": 1.76, + "learning_rate": 1.4255482576458233e-05, + "loss": 0.1486, + "step": 37665 + }, + { + "epoch": 1.76, + "learning_rate": 1.4254698791403447e-05, + "loss": 0.1687, + "step": 37670 + }, + { + "epoch": 1.76, + "learning_rate": 1.425391500634866e-05, + "loss": 0.1958, + "step": 37675 + }, + { + "epoch": 1.76, + "learning_rate": 1.4253131221293875e-05, + "loss": 0.3865, + "step": 37680 + }, + { + "epoch": 1.76, + "learning_rate": 1.4252347436239087e-05, + "loss": 0.3559, + "step": 37685 + }, + { + "epoch": 1.76, + "learning_rate": 1.4251563651184301e-05, + "loss": 0.0624, + "step": 37690 + }, + { + "epoch": 1.76, + "learning_rate": 1.4250779866129513e-05, + "loss": 0.0917, + "step": 37695 + }, + { + "epoch": 1.76, + "learning_rate": 1.4249996081074726e-05, + "loss": 0.0923, + "step": 37700 + }, + { + "epoch": 1.76, + "learning_rate": 1.4249212296019941e-05, + "loss": 0.1211, + "step": 37705 + }, + { + "epoch": 1.76, + "learning_rate": 1.4248428510965153e-05, + "loss": 0.1117, + "step": 37710 + }, + { + "epoch": 1.76, + "learning_rate": 1.4247644725910367e-05, + "loss": 0.2351, + "step": 37715 + }, + { + "epoch": 1.76, + "learning_rate": 1.424686094085558e-05, + "loss": 0.1649, + "step": 37720 + }, + { + "epoch": 1.76, + "learning_rate": 1.4246077155800795e-05, + "loss": 0.1855, + "step": 37725 + }, + { + "epoch": 1.76, + "learning_rate": 1.4245293370746007e-05, + "loss": 0.3319, + "step": 37730 + }, + { + "epoch": 1.76, + "learning_rate": 1.4244509585691221e-05, + "loss": 0.2802, + "step": 37735 + }, + { + "epoch": 1.76, + "learning_rate": 1.4243725800636434e-05, + "loss": 0.0575, + "step": 37740 + }, + { + "epoch": 1.76, + "learning_rate": 1.4242942015581649e-05, + "loss": 0.0562, + "step": 37745 + }, + { + "epoch": 1.76, + "learning_rate": 1.4242158230526861e-05, + "loss": 0.0816, + "step": 37750 + }, + { + "epoch": 1.76, + "learning_rate": 1.4241374445472075e-05, + "loss": 0.0493, + "step": 37755 + }, + { + "epoch": 1.76, + "learning_rate": 1.424059066041729e-05, + "loss": 0.1505, + "step": 37760 + }, + { + "epoch": 1.76, + "learning_rate": 1.4239806875362501e-05, + "loss": 0.1146, + "step": 37765 + }, + { + "epoch": 1.76, + "learning_rate": 1.4239023090307715e-05, + "loss": 0.2125, + "step": 37770 + }, + { + "epoch": 1.76, + "learning_rate": 1.4238239305252927e-05, + "loss": 0.2498, + "step": 37775 + }, + { + "epoch": 1.76, + "learning_rate": 1.4237455520198143e-05, + "loss": 0.3085, + "step": 37780 + }, + { + "epoch": 1.76, + "learning_rate": 1.4236671735143355e-05, + "loss": 0.4248, + "step": 37785 + }, + { + "epoch": 1.76, + "learning_rate": 1.423588795008857e-05, + "loss": 0.0457, + "step": 37790 + }, + { + "epoch": 1.76, + "learning_rate": 1.4235104165033781e-05, + "loss": 0.0464, + "step": 37795 + }, + { + "epoch": 1.76, + "learning_rate": 1.4234320379978997e-05, + "loss": 0.0329, + "step": 37800 + }, + { + "epoch": 1.76, + "learning_rate": 1.423353659492421e-05, + "loss": 0.1443, + "step": 37805 + }, + { + "epoch": 1.76, + "learning_rate": 1.4232752809869423e-05, + "loss": 0.1504, + "step": 37810 + }, + { + "epoch": 1.76, + "learning_rate": 1.4231969024814635e-05, + "loss": 0.1618, + "step": 37815 + }, + { + "epoch": 1.76, + "learning_rate": 1.4231185239759851e-05, + "loss": 0.1992, + "step": 37820 + }, + { + "epoch": 1.76, + "learning_rate": 1.4230401454705063e-05, + "loss": 0.1979, + "step": 37825 + }, + { + "epoch": 1.77, + "learning_rate": 1.4229617669650275e-05, + "loss": 0.3638, + "step": 37830 + }, + { + "epoch": 1.77, + "learning_rate": 1.422883388459549e-05, + "loss": 0.2355, + "step": 37835 + }, + { + "epoch": 1.77, + "learning_rate": 1.4228050099540701e-05, + "loss": 0.0892, + "step": 37840 + }, + { + "epoch": 1.77, + "learning_rate": 1.4227266314485917e-05, + "loss": 0.0729, + "step": 37845 + }, + { + "epoch": 1.77, + "learning_rate": 1.422648252943113e-05, + "loss": 0.1274, + "step": 37850 + }, + { + "epoch": 1.77, + "learning_rate": 1.4225698744376343e-05, + "loss": 0.0896, + "step": 37855 + }, + { + "epoch": 1.77, + "learning_rate": 1.4224914959321557e-05, + "loss": 0.0717, + "step": 37860 + }, + { + "epoch": 1.77, + "learning_rate": 1.4224131174266771e-05, + "loss": 0.1184, + "step": 37865 + }, + { + "epoch": 1.77, + "learning_rate": 1.4223347389211983e-05, + "loss": 0.1207, + "step": 37870 + }, + { + "epoch": 1.77, + "learning_rate": 1.4222563604157197e-05, + "loss": 0.2518, + "step": 37875 + }, + { + "epoch": 1.77, + "learning_rate": 1.4221779819102411e-05, + "loss": 0.3498, + "step": 37880 + }, + { + "epoch": 1.77, + "learning_rate": 1.4220996034047625e-05, + "loss": 0.3115, + "step": 37885 + }, + { + "epoch": 1.77, + "learning_rate": 1.4220212248992837e-05, + "loss": 0.0583, + "step": 37890 + }, + { + "epoch": 1.77, + "learning_rate": 1.421942846393805e-05, + "loss": 0.0557, + "step": 37895 + }, + { + "epoch": 1.77, + "learning_rate": 1.4218644678883265e-05, + "loss": 0.0753, + "step": 37900 + }, + { + "epoch": 1.77, + "learning_rate": 1.4217860893828477e-05, + "loss": 0.0799, + "step": 37905 + }, + { + "epoch": 1.77, + "learning_rate": 1.4217077108773691e-05, + "loss": 0.0979, + "step": 37910 + }, + { + "epoch": 1.77, + "learning_rate": 1.4216293323718903e-05, + "loss": 0.0891, + "step": 37915 + }, + { + "epoch": 1.77, + "learning_rate": 1.4215509538664119e-05, + "loss": 0.1499, + "step": 37920 + }, + { + "epoch": 1.77, + "learning_rate": 1.4214725753609331e-05, + "loss": 0.1411, + "step": 37925 + }, + { + "epoch": 1.77, + "learning_rate": 1.4213941968554545e-05, + "loss": 0.2998, + "step": 37930 + }, + { + "epoch": 1.77, + "learning_rate": 1.4213158183499757e-05, + "loss": 0.2268, + "step": 37935 + }, + { + "epoch": 1.77, + "learning_rate": 1.4212374398444973e-05, + "loss": 0.0357, + "step": 37940 + }, + { + "epoch": 1.77, + "learning_rate": 1.4211590613390185e-05, + "loss": 0.0426, + "step": 37945 + }, + { + "epoch": 1.77, + "learning_rate": 1.4210806828335399e-05, + "loss": 0.0794, + "step": 37950 + }, + { + "epoch": 1.77, + "learning_rate": 1.4210023043280611e-05, + "loss": 0.1651, + "step": 37955 + }, + { + "epoch": 1.77, + "learning_rate": 1.4209239258225825e-05, + "loss": 0.1167, + "step": 37960 + }, + { + "epoch": 1.77, + "learning_rate": 1.4208455473171039e-05, + "loss": 0.1324, + "step": 37965 + }, + { + "epoch": 1.77, + "learning_rate": 1.4207671688116251e-05, + "loss": 0.1587, + "step": 37970 + }, + { + "epoch": 1.77, + "learning_rate": 1.4206887903061465e-05, + "loss": 0.2, + "step": 37975 + }, + { + "epoch": 1.77, + "learning_rate": 1.4206104118006679e-05, + "loss": 0.3415, + "step": 37980 + }, + { + "epoch": 1.77, + "learning_rate": 1.4205320332951893e-05, + "loss": 0.24, + "step": 37985 + }, + { + "epoch": 1.77, + "learning_rate": 1.4204536547897105e-05, + "loss": 0.0173, + "step": 37990 + }, + { + "epoch": 1.77, + "learning_rate": 1.420375276284232e-05, + "loss": 0.0568, + "step": 37995 + }, + { + "epoch": 1.77, + "learning_rate": 1.4202968977787533e-05, + "loss": 0.0666, + "step": 38000 + }, + { + "epoch": 1.77, + "learning_rate": 1.4202185192732747e-05, + "loss": 0.1213, + "step": 38005 + }, + { + "epoch": 1.77, + "learning_rate": 1.4201401407677959e-05, + "loss": 0.1022, + "step": 38010 + }, + { + "epoch": 1.77, + "learning_rate": 1.4200617622623175e-05, + "loss": 0.1468, + "step": 38015 + }, + { + "epoch": 1.77, + "learning_rate": 1.4199833837568387e-05, + "loss": 0.2344, + "step": 38020 + }, + { + "epoch": 1.77, + "learning_rate": 1.4199050052513599e-05, + "loss": 0.2398, + "step": 38025 + }, + { + "epoch": 1.77, + "learning_rate": 1.4198266267458813e-05, + "loss": 0.3248, + "step": 38030 + }, + { + "epoch": 1.77, + "learning_rate": 1.4197482482404025e-05, + "loss": 0.2467, + "step": 38035 + }, + { + "epoch": 1.77, + "learning_rate": 1.419669869734924e-05, + "loss": 0.0681, + "step": 38040 + }, + { + "epoch": 1.78, + "learning_rate": 1.4195914912294453e-05, + "loss": 0.0525, + "step": 38045 + }, + { + "epoch": 1.78, + "learning_rate": 1.4195131127239667e-05, + "loss": 0.0501, + "step": 38050 + }, + { + "epoch": 1.78, + "learning_rate": 1.4194347342184879e-05, + "loss": 0.0514, + "step": 38055 + }, + { + "epoch": 1.78, + "learning_rate": 1.4193563557130095e-05, + "loss": 0.1218, + "step": 38060 + }, + { + "epoch": 1.78, + "learning_rate": 1.4192779772075307e-05, + "loss": 0.1249, + "step": 38065 + }, + { + "epoch": 1.78, + "learning_rate": 1.419199598702052e-05, + "loss": 0.1505, + "step": 38070 + }, + { + "epoch": 1.78, + "learning_rate": 1.4191212201965735e-05, + "loss": 0.2301, + "step": 38075 + }, + { + "epoch": 1.78, + "learning_rate": 1.4190428416910949e-05, + "loss": 0.3129, + "step": 38080 + }, + { + "epoch": 1.78, + "learning_rate": 1.4189644631856161e-05, + "loss": 0.2717, + "step": 38085 + }, + { + "epoch": 1.78, + "learning_rate": 1.4188860846801373e-05, + "loss": 0.0331, + "step": 38090 + }, + { + "epoch": 1.78, + "learning_rate": 1.4188077061746589e-05, + "loss": 0.0624, + "step": 38095 + }, + { + "epoch": 1.78, + "learning_rate": 1.4187293276691801e-05, + "loss": 0.0583, + "step": 38100 + }, + { + "epoch": 1.78, + "learning_rate": 1.4186509491637015e-05, + "loss": 0.0417, + "step": 38105 + }, + { + "epoch": 1.78, + "learning_rate": 1.4185725706582227e-05, + "loss": 0.1927, + "step": 38110 + }, + { + "epoch": 1.78, + "learning_rate": 1.4184941921527443e-05, + "loss": 0.0895, + "step": 38115 + }, + { + "epoch": 1.78, + "learning_rate": 1.4184158136472655e-05, + "loss": 0.1494, + "step": 38120 + }, + { + "epoch": 1.78, + "learning_rate": 1.4183374351417869e-05, + "loss": 0.2075, + "step": 38125 + }, + { + "epoch": 1.78, + "learning_rate": 1.4182590566363081e-05, + "loss": 0.2841, + "step": 38130 + }, + { + "epoch": 1.78, + "learning_rate": 1.4181806781308297e-05, + "loss": 0.2523, + "step": 38135 + }, + { + "epoch": 1.78, + "learning_rate": 1.4181022996253509e-05, + "loss": 0.051, + "step": 38140 + }, + { + "epoch": 1.78, + "learning_rate": 1.4180239211198723e-05, + "loss": 0.0424, + "step": 38145 + }, + { + "epoch": 1.78, + "learning_rate": 1.4179455426143935e-05, + "loss": 0.0772, + "step": 38150 + }, + { + "epoch": 1.78, + "learning_rate": 1.4178671641089147e-05, + "loss": 0.0753, + "step": 38155 + }, + { + "epoch": 1.78, + "learning_rate": 1.4177887856034363e-05, + "loss": 0.1035, + "step": 38160 + }, + { + "epoch": 1.78, + "learning_rate": 1.4177104070979575e-05, + "loss": 0.226, + "step": 38165 + }, + { + "epoch": 1.78, + "learning_rate": 1.4176320285924789e-05, + "loss": 0.2246, + "step": 38170 + }, + { + "epoch": 1.78, + "learning_rate": 1.4175536500870003e-05, + "loss": 0.1679, + "step": 38175 + }, + { + "epoch": 1.78, + "learning_rate": 1.4174752715815217e-05, + "loss": 0.3673, + "step": 38180 + }, + { + "epoch": 1.78, + "learning_rate": 1.4173968930760429e-05, + "loss": 0.2447, + "step": 38185 + }, + { + "epoch": 1.78, + "learning_rate": 1.4173185145705643e-05, + "loss": 0.1178, + "step": 38190 + }, + { + "epoch": 1.78, + "learning_rate": 1.4172401360650857e-05, + "loss": 0.0845, + "step": 38195 + }, + { + "epoch": 1.78, + "learning_rate": 1.417161757559607e-05, + "loss": 0.0427, + "step": 38200 + }, + { + "epoch": 1.78, + "learning_rate": 1.4170833790541283e-05, + "loss": 0.1022, + "step": 38205 + }, + { + "epoch": 1.78, + "learning_rate": 1.4170050005486498e-05, + "loss": 0.0848, + "step": 38210 + }, + { + "epoch": 1.78, + "learning_rate": 1.416926622043171e-05, + "loss": 0.1151, + "step": 38215 + }, + { + "epoch": 1.78, + "learning_rate": 1.4168482435376923e-05, + "loss": 0.1462, + "step": 38220 + }, + { + "epoch": 1.78, + "learning_rate": 1.4167698650322137e-05, + "loss": 0.2352, + "step": 38225 + }, + { + "epoch": 1.78, + "learning_rate": 1.4166914865267349e-05, + "loss": 0.4185, + "step": 38230 + }, + { + "epoch": 1.78, + "learning_rate": 1.4166131080212564e-05, + "loss": 0.2938, + "step": 38235 + }, + { + "epoch": 1.78, + "learning_rate": 1.4165347295157777e-05, + "loss": 0.0908, + "step": 38240 + }, + { + "epoch": 1.78, + "learning_rate": 1.416456351010299e-05, + "loss": 0.0728, + "step": 38245 + }, + { + "epoch": 1.78, + "learning_rate": 1.4163779725048203e-05, + "loss": 0.0581, + "step": 38250 + }, + { + "epoch": 1.79, + "learning_rate": 1.4162995939993418e-05, + "loss": 0.128, + "step": 38255 + }, + { + "epoch": 1.79, + "learning_rate": 1.416221215493863e-05, + "loss": 0.1307, + "step": 38260 + }, + { + "epoch": 1.79, + "learning_rate": 1.4161428369883845e-05, + "loss": 0.1847, + "step": 38265 + }, + { + "epoch": 1.79, + "learning_rate": 1.4160644584829057e-05, + "loss": 0.1701, + "step": 38270 + }, + { + "epoch": 1.79, + "learning_rate": 1.4159860799774272e-05, + "loss": 0.2065, + "step": 38275 + }, + { + "epoch": 1.79, + "learning_rate": 1.4159077014719485e-05, + "loss": 0.4186, + "step": 38280 + }, + { + "epoch": 1.79, + "learning_rate": 1.4158293229664697e-05, + "loss": 0.2782, + "step": 38285 + }, + { + "epoch": 1.79, + "learning_rate": 1.415750944460991e-05, + "loss": 0.0348, + "step": 38290 + }, + { + "epoch": 1.79, + "learning_rate": 1.4156725659555125e-05, + "loss": 0.023, + "step": 38295 + }, + { + "epoch": 1.79, + "learning_rate": 1.4155941874500338e-05, + "loss": 0.0195, + "step": 38300 + }, + { + "epoch": 1.79, + "learning_rate": 1.415515808944555e-05, + "loss": 0.1097, + "step": 38305 + }, + { + "epoch": 1.79, + "learning_rate": 1.4154374304390766e-05, + "loss": 0.1873, + "step": 38310 + }, + { + "epoch": 1.79, + "learning_rate": 1.4153590519335978e-05, + "loss": 0.1415, + "step": 38315 + }, + { + "epoch": 1.79, + "learning_rate": 1.4152806734281192e-05, + "loss": 0.1651, + "step": 38320 + }, + { + "epoch": 1.79, + "learning_rate": 1.4152022949226405e-05, + "loss": 0.2963, + "step": 38325 + }, + { + "epoch": 1.79, + "learning_rate": 1.415123916417162e-05, + "loss": 0.2719, + "step": 38330 + }, + { + "epoch": 1.79, + "learning_rate": 1.4150455379116832e-05, + "loss": 0.2542, + "step": 38335 + }, + { + "epoch": 1.79, + "learning_rate": 1.4149671594062046e-05, + "loss": 0.0304, + "step": 38340 + }, + { + "epoch": 1.79, + "learning_rate": 1.4148887809007259e-05, + "loss": 0.1187, + "step": 38345 + }, + { + "epoch": 1.79, + "learning_rate": 1.414810402395247e-05, + "loss": 0.1246, + "step": 38350 + }, + { + "epoch": 1.79, + "learning_rate": 1.4147320238897686e-05, + "loss": 0.0625, + "step": 38355 + }, + { + "epoch": 1.79, + "learning_rate": 1.4146536453842899e-05, + "loss": 0.182, + "step": 38360 + }, + { + "epoch": 1.79, + "learning_rate": 1.4145752668788112e-05, + "loss": 0.0816, + "step": 38365 + }, + { + "epoch": 1.79, + "learning_rate": 1.4144968883733325e-05, + "loss": 0.1735, + "step": 38370 + }, + { + "epoch": 1.79, + "learning_rate": 1.414418509867854e-05, + "loss": 0.1373, + "step": 38375 + }, + { + "epoch": 1.79, + "learning_rate": 1.4143401313623752e-05, + "loss": 0.4016, + "step": 38380 + }, + { + "epoch": 1.79, + "learning_rate": 1.4142617528568966e-05, + "loss": 0.2637, + "step": 38385 + }, + { + "epoch": 1.79, + "learning_rate": 1.414183374351418e-05, + "loss": 0.0964, + "step": 38390 + }, + { + "epoch": 1.79, + "learning_rate": 1.4141049958459394e-05, + "loss": 0.0635, + "step": 38395 + }, + { + "epoch": 1.79, + "learning_rate": 1.4140266173404606e-05, + "loss": 0.0482, + "step": 38400 + }, + { + "epoch": 1.79, + "learning_rate": 1.413948238834982e-05, + "loss": 0.0768, + "step": 38405 + }, + { + "epoch": 1.79, + "learning_rate": 1.4138698603295034e-05, + "loss": 0.0976, + "step": 38410 + }, + { + "epoch": 1.79, + "learning_rate": 1.4137914818240246e-05, + "loss": 0.2089, + "step": 38415 + }, + { + "epoch": 1.79, + "learning_rate": 1.413713103318546e-05, + "loss": 0.2171, + "step": 38420 + }, + { + "epoch": 1.79, + "learning_rate": 1.4136347248130673e-05, + "loss": 0.2064, + "step": 38425 + }, + { + "epoch": 1.79, + "learning_rate": 1.4135563463075888e-05, + "loss": 0.4382, + "step": 38430 + }, + { + "epoch": 1.79, + "learning_rate": 1.41347796780211e-05, + "loss": 0.2063, + "step": 38435 + }, + { + "epoch": 1.79, + "learning_rate": 1.4133995892966314e-05, + "loss": 0.0299, + "step": 38440 + }, + { + "epoch": 1.79, + "learning_rate": 1.4133212107911526e-05, + "loss": 0.08, + "step": 38445 + }, + { + "epoch": 1.79, + "learning_rate": 1.4132428322856742e-05, + "loss": 0.1102, + "step": 38450 + }, + { + "epoch": 1.79, + "learning_rate": 1.4131644537801954e-05, + "loss": 0.0615, + "step": 38455 + }, + { + "epoch": 1.79, + "learning_rate": 1.4130860752747168e-05, + "loss": 0.0704, + "step": 38460 + }, + { + "epoch": 1.79, + "learning_rate": 1.413007696769238e-05, + "loss": 0.0914, + "step": 38465 + }, + { + "epoch": 1.8, + "learning_rate": 1.4129293182637596e-05, + "loss": 0.1401, + "step": 38470 + }, + { + "epoch": 1.8, + "learning_rate": 1.4128509397582808e-05, + "loss": 0.2028, + "step": 38475 + }, + { + "epoch": 1.8, + "learning_rate": 1.412772561252802e-05, + "loss": 0.2902, + "step": 38480 + }, + { + "epoch": 1.8, + "learning_rate": 1.4126941827473234e-05, + "loss": 0.2409, + "step": 38485 + }, + { + "epoch": 1.8, + "learning_rate": 1.4126158042418448e-05, + "loss": 0.0215, + "step": 38490 + }, + { + "epoch": 1.8, + "learning_rate": 1.4125374257363662e-05, + "loss": 0.0641, + "step": 38495 + }, + { + "epoch": 1.8, + "learning_rate": 1.4124590472308874e-05, + "loss": 0.1115, + "step": 38500 + }, + { + "epoch": 1.8, + "learning_rate": 1.4123806687254088e-05, + "loss": 0.099, + "step": 38505 + }, + { + "epoch": 1.8, + "learning_rate": 1.4123022902199302e-05, + "loss": 0.0935, + "step": 38510 + }, + { + "epoch": 1.8, + "learning_rate": 1.4122239117144516e-05, + "loss": 0.1694, + "step": 38515 + }, + { + "epoch": 1.8, + "learning_rate": 1.4121455332089728e-05, + "loss": 0.234, + "step": 38520 + }, + { + "epoch": 1.8, + "learning_rate": 1.4120671547034944e-05, + "loss": 0.237, + "step": 38525 + }, + { + "epoch": 1.8, + "learning_rate": 1.4119887761980156e-05, + "loss": 0.3852, + "step": 38530 + }, + { + "epoch": 1.8, + "learning_rate": 1.411910397692537e-05, + "loss": 0.2284, + "step": 38535 + }, + { + "epoch": 1.8, + "learning_rate": 1.4118320191870582e-05, + "loss": 0.0491, + "step": 38540 + }, + { + "epoch": 1.8, + "learning_rate": 1.4117536406815794e-05, + "loss": 0.0762, + "step": 38545 + }, + { + "epoch": 1.8, + "learning_rate": 1.411675262176101e-05, + "loss": 0.1074, + "step": 38550 + }, + { + "epoch": 1.8, + "learning_rate": 1.4115968836706222e-05, + "loss": 0.1303, + "step": 38555 + }, + { + "epoch": 1.8, + "learning_rate": 1.4115185051651436e-05, + "loss": 0.1395, + "step": 38560 + }, + { + "epoch": 1.8, + "learning_rate": 1.4114401266596648e-05, + "loss": 0.156, + "step": 38565 + }, + { + "epoch": 1.8, + "learning_rate": 1.4113617481541864e-05, + "loss": 0.1675, + "step": 38570 + }, + { + "epoch": 1.8, + "learning_rate": 1.4112833696487076e-05, + "loss": 0.1753, + "step": 38575 + }, + { + "epoch": 1.8, + "learning_rate": 1.411204991143229e-05, + "loss": 0.3437, + "step": 38580 + }, + { + "epoch": 1.8, + "learning_rate": 1.4111266126377502e-05, + "loss": 0.3223, + "step": 38585 + }, + { + "epoch": 1.8, + "learning_rate": 1.4110482341322718e-05, + "loss": 0.0702, + "step": 38590 + }, + { + "epoch": 1.8, + "learning_rate": 1.410969855626793e-05, + "loss": 0.0645, + "step": 38595 + }, + { + "epoch": 1.8, + "learning_rate": 1.4108914771213144e-05, + "loss": 0.1382, + "step": 38600 + }, + { + "epoch": 1.8, + "learning_rate": 1.4108130986158356e-05, + "loss": 0.1124, + "step": 38605 + }, + { + "epoch": 1.8, + "learning_rate": 1.410734720110357e-05, + "loss": 0.0886, + "step": 38610 + }, + { + "epoch": 1.8, + "learning_rate": 1.4106563416048784e-05, + "loss": 0.1948, + "step": 38615 + }, + { + "epoch": 1.8, + "learning_rate": 1.4105779630993996e-05, + "loss": 0.2204, + "step": 38620 + }, + { + "epoch": 1.8, + "learning_rate": 1.4104995845939212e-05, + "loss": 0.1517, + "step": 38625 + }, + { + "epoch": 1.8, + "learning_rate": 1.4104212060884424e-05, + "loss": 0.3109, + "step": 38630 + }, + { + "epoch": 1.8, + "learning_rate": 1.4103428275829638e-05, + "loss": 0.1802, + "step": 38635 + }, + { + "epoch": 1.8, + "learning_rate": 1.410264449077485e-05, + "loss": 0.028, + "step": 38640 + }, + { + "epoch": 1.8, + "learning_rate": 1.4101860705720066e-05, + "loss": 0.1063, + "step": 38645 + }, + { + "epoch": 1.8, + "learning_rate": 1.4101076920665278e-05, + "loss": 0.0521, + "step": 38650 + }, + { + "epoch": 1.8, + "learning_rate": 1.4100293135610492e-05, + "loss": 0.091, + "step": 38655 + }, + { + "epoch": 1.8, + "learning_rate": 1.4099509350555704e-05, + "loss": 0.0967, + "step": 38660 + }, + { + "epoch": 1.8, + "learning_rate": 1.409872556550092e-05, + "loss": 0.117, + "step": 38665 + }, + { + "epoch": 1.8, + "learning_rate": 1.4097941780446132e-05, + "loss": 0.1346, + "step": 38670 + }, + { + "epoch": 1.8, + "learning_rate": 1.4097157995391344e-05, + "loss": 0.1494, + "step": 38675 + }, + { + "epoch": 1.8, + "learning_rate": 1.4096374210336558e-05, + "loss": 0.2843, + "step": 38680 + }, + { + "epoch": 1.81, + "learning_rate": 1.409559042528177e-05, + "loss": 0.2728, + "step": 38685 + }, + { + "epoch": 1.81, + "learning_rate": 1.4094806640226986e-05, + "loss": 0.0535, + "step": 38690 + }, + { + "epoch": 1.81, + "learning_rate": 1.4094022855172198e-05, + "loss": 0.0669, + "step": 38695 + }, + { + "epoch": 1.81, + "learning_rate": 1.4093239070117412e-05, + "loss": 0.1214, + "step": 38700 + }, + { + "epoch": 1.81, + "learning_rate": 1.4092455285062626e-05, + "loss": 0.0523, + "step": 38705 + }, + { + "epoch": 1.81, + "learning_rate": 1.409167150000784e-05, + "loss": 0.1108, + "step": 38710 + }, + { + "epoch": 1.81, + "learning_rate": 1.4090887714953052e-05, + "loss": 0.1358, + "step": 38715 + }, + { + "epoch": 1.81, + "learning_rate": 1.4090103929898266e-05, + "loss": 0.1182, + "step": 38720 + }, + { + "epoch": 1.81, + "learning_rate": 1.408932014484348e-05, + "loss": 0.1505, + "step": 38725 + }, + { + "epoch": 1.81, + "learning_rate": 1.4088536359788694e-05, + "loss": 0.3261, + "step": 38730 + }, + { + "epoch": 1.81, + "learning_rate": 1.4087752574733906e-05, + "loss": 0.3337, + "step": 38735 + }, + { + "epoch": 1.81, + "learning_rate": 1.4086968789679118e-05, + "loss": 0.0553, + "step": 38740 + }, + { + "epoch": 1.81, + "learning_rate": 1.4086185004624334e-05, + "loss": 0.0685, + "step": 38745 + }, + { + "epoch": 1.81, + "learning_rate": 1.4085401219569546e-05, + "loss": 0.0481, + "step": 38750 + }, + { + "epoch": 1.81, + "learning_rate": 1.408461743451476e-05, + "loss": 0.095, + "step": 38755 + }, + { + "epoch": 1.81, + "learning_rate": 1.4083833649459972e-05, + "loss": 0.1435, + "step": 38760 + }, + { + "epoch": 1.81, + "learning_rate": 1.4083049864405188e-05, + "loss": 0.0965, + "step": 38765 + }, + { + "epoch": 1.81, + "learning_rate": 1.40822660793504e-05, + "loss": 0.1779, + "step": 38770 + }, + { + "epoch": 1.81, + "learning_rate": 1.4081482294295614e-05, + "loss": 0.2204, + "step": 38775 + }, + { + "epoch": 1.81, + "learning_rate": 1.4080698509240826e-05, + "loss": 0.3111, + "step": 38780 + }, + { + "epoch": 1.81, + "learning_rate": 1.4079914724186042e-05, + "loss": 0.2863, + "step": 38785 + }, + { + "epoch": 1.81, + "learning_rate": 1.4079130939131254e-05, + "loss": 0.0378, + "step": 38790 + }, + { + "epoch": 1.81, + "learning_rate": 1.4078347154076468e-05, + "loss": 0.1025, + "step": 38795 + }, + { + "epoch": 1.81, + "learning_rate": 1.407756336902168e-05, + "loss": 0.0496, + "step": 38800 + }, + { + "epoch": 1.81, + "learning_rate": 1.4076779583966894e-05, + "loss": 0.0819, + "step": 38805 + }, + { + "epoch": 1.81, + "learning_rate": 1.4075995798912108e-05, + "loss": 0.071, + "step": 38810 + }, + { + "epoch": 1.81, + "learning_rate": 1.407521201385732e-05, + "loss": 0.1571, + "step": 38815 + }, + { + "epoch": 1.81, + "learning_rate": 1.4074428228802534e-05, + "loss": 0.262, + "step": 38820 + }, + { + "epoch": 1.81, + "learning_rate": 1.4073644443747748e-05, + "loss": 0.2983, + "step": 38825 + }, + { + "epoch": 1.81, + "learning_rate": 1.4072860658692962e-05, + "loss": 0.228, + "step": 38830 + }, + { + "epoch": 1.81, + "learning_rate": 1.4072076873638174e-05, + "loss": 0.1873, + "step": 38835 + }, + { + "epoch": 1.81, + "learning_rate": 1.407129308858339e-05, + "loss": 0.018, + "step": 38840 + }, + { + "epoch": 1.81, + "learning_rate": 1.4070509303528602e-05, + "loss": 0.0565, + "step": 38845 + }, + { + "epoch": 1.81, + "learning_rate": 1.4069725518473816e-05, + "loss": 0.1004, + "step": 38850 + }, + { + "epoch": 1.81, + "learning_rate": 1.4068941733419028e-05, + "loss": 0.0656, + "step": 38855 + }, + { + "epoch": 1.81, + "learning_rate": 1.4068157948364243e-05, + "loss": 0.2027, + "step": 38860 + }, + { + "epoch": 1.81, + "learning_rate": 1.4067374163309456e-05, + "loss": 0.2106, + "step": 38865 + }, + { + "epoch": 1.81, + "learning_rate": 1.4066590378254668e-05, + "loss": 0.1221, + "step": 38870 + }, + { + "epoch": 1.81, + "learning_rate": 1.4065806593199882e-05, + "loss": 0.2289, + "step": 38875 + }, + { + "epoch": 1.81, + "learning_rate": 1.4065022808145094e-05, + "loss": 0.311, + "step": 38880 + }, + { + "epoch": 1.81, + "learning_rate": 1.406423902309031e-05, + "loss": 0.3, + "step": 38885 + }, + { + "epoch": 1.81, + "learning_rate": 1.4063455238035522e-05, + "loss": 0.0561, + "step": 38890 + }, + { + "epoch": 1.81, + "learning_rate": 1.4062671452980736e-05, + "loss": 0.0676, + "step": 38895 + }, + { + "epoch": 1.82, + "learning_rate": 1.4061887667925948e-05, + "loss": 0.1541, + "step": 38900 + }, + { + "epoch": 1.82, + "learning_rate": 1.4061103882871163e-05, + "loss": 0.0841, + "step": 38905 + }, + { + "epoch": 1.82, + "learning_rate": 1.4060320097816376e-05, + "loss": 0.1486, + "step": 38910 + }, + { + "epoch": 1.82, + "learning_rate": 1.405953631276159e-05, + "loss": 0.1742, + "step": 38915 + }, + { + "epoch": 1.82, + "learning_rate": 1.4058752527706803e-05, + "loss": 0.153, + "step": 38920 + }, + { + "epoch": 1.82, + "learning_rate": 1.4057968742652017e-05, + "loss": 0.201, + "step": 38925 + }, + { + "epoch": 1.82, + "learning_rate": 1.405718495759723e-05, + "loss": 0.3103, + "step": 38930 + }, + { + "epoch": 1.82, + "learning_rate": 1.4056401172542442e-05, + "loss": 0.1888, + "step": 38935 + }, + { + "epoch": 1.82, + "learning_rate": 1.4055617387487657e-05, + "loss": 0.035, + "step": 38940 + }, + { + "epoch": 1.82, + "learning_rate": 1.405483360243287e-05, + "loss": 0.02, + "step": 38945 + }, + { + "epoch": 1.82, + "learning_rate": 1.4054049817378084e-05, + "loss": 0.0971, + "step": 38950 + }, + { + "epoch": 1.82, + "learning_rate": 1.4053266032323296e-05, + "loss": 0.1084, + "step": 38955 + }, + { + "epoch": 1.82, + "learning_rate": 1.4052482247268511e-05, + "loss": 0.0948, + "step": 38960 + }, + { + "epoch": 1.82, + "learning_rate": 1.4051698462213724e-05, + "loss": 0.1653, + "step": 38965 + }, + { + "epoch": 1.82, + "learning_rate": 1.4050914677158937e-05, + "loss": 0.1645, + "step": 38970 + }, + { + "epoch": 1.82, + "learning_rate": 1.405013089210415e-05, + "loss": 0.1154, + "step": 38975 + }, + { + "epoch": 1.82, + "learning_rate": 1.4049347107049365e-05, + "loss": 0.4433, + "step": 38980 + }, + { + "epoch": 1.82, + "learning_rate": 1.4048563321994577e-05, + "loss": 0.2112, + "step": 38985 + }, + { + "epoch": 1.82, + "learning_rate": 1.4047779536939791e-05, + "loss": 0.0622, + "step": 38990 + }, + { + "epoch": 1.82, + "learning_rate": 1.4046995751885004e-05, + "loss": 0.0382, + "step": 38995 + }, + { + "epoch": 1.82, + "learning_rate": 1.4046211966830216e-05, + "loss": 0.1102, + "step": 39000 + }, + { + "epoch": 1.82, + "learning_rate": 1.4045428181775431e-05, + "loss": 0.1516, + "step": 39005 + }, + { + "epoch": 1.82, + "learning_rate": 1.4044644396720644e-05, + "loss": 0.1424, + "step": 39010 + }, + { + "epoch": 1.82, + "learning_rate": 1.4043860611665858e-05, + "loss": 0.1871, + "step": 39015 + }, + { + "epoch": 1.82, + "learning_rate": 1.4043076826611071e-05, + "loss": 0.2085, + "step": 39020 + }, + { + "epoch": 1.82, + "learning_rate": 1.4042293041556285e-05, + "loss": 0.2028, + "step": 39025 + }, + { + "epoch": 1.82, + "learning_rate": 1.4041509256501498e-05, + "loss": 0.3885, + "step": 39030 + }, + { + "epoch": 1.82, + "learning_rate": 1.4040725471446711e-05, + "loss": 0.3049, + "step": 39035 + }, + { + "epoch": 1.82, + "learning_rate": 1.4039941686391925e-05, + "loss": 0.0252, + "step": 39040 + }, + { + "epoch": 1.82, + "learning_rate": 1.403915790133714e-05, + "loss": 0.0376, + "step": 39045 + }, + { + "epoch": 1.82, + "learning_rate": 1.4038374116282351e-05, + "loss": 0.029, + "step": 39050 + }, + { + "epoch": 1.82, + "learning_rate": 1.4037590331227567e-05, + "loss": 0.1269, + "step": 39055 + }, + { + "epoch": 1.82, + "learning_rate": 1.403680654617278e-05, + "loss": 0.1043, + "step": 39060 + }, + { + "epoch": 1.82, + "learning_rate": 1.4036022761117992e-05, + "loss": 0.1261, + "step": 39065 + }, + { + "epoch": 1.82, + "learning_rate": 1.4035238976063205e-05, + "loss": 0.2339, + "step": 39070 + }, + { + "epoch": 1.82, + "learning_rate": 1.4034455191008418e-05, + "loss": 0.1286, + "step": 39075 + }, + { + "epoch": 1.82, + "learning_rate": 1.4033671405953633e-05, + "loss": 0.2755, + "step": 39080 + }, + { + "epoch": 1.82, + "learning_rate": 1.4032887620898845e-05, + "loss": 0.3312, + "step": 39085 + }, + { + "epoch": 1.82, + "learning_rate": 1.403210383584406e-05, + "loss": 0.0087, + "step": 39090 + }, + { + "epoch": 1.82, + "learning_rate": 1.4031320050789272e-05, + "loss": 0.0794, + "step": 39095 + }, + { + "epoch": 1.82, + "learning_rate": 1.4030536265734487e-05, + "loss": 0.0557, + "step": 39100 + }, + { + "epoch": 1.82, + "learning_rate": 1.40297524806797e-05, + "loss": 0.136, + "step": 39105 + }, + { + "epoch": 1.82, + "learning_rate": 1.4028968695624913e-05, + "loss": 0.1002, + "step": 39110 + }, + { + "epoch": 1.83, + "learning_rate": 1.4028184910570125e-05, + "loss": 0.1967, + "step": 39115 + }, + { + "epoch": 1.83, + "learning_rate": 1.4027401125515341e-05, + "loss": 0.1752, + "step": 39120 + }, + { + "epoch": 1.83, + "learning_rate": 1.4026617340460553e-05, + "loss": 0.1226, + "step": 39125 + }, + { + "epoch": 1.83, + "learning_rate": 1.4025833555405766e-05, + "loss": 0.2543, + "step": 39130 + }, + { + "epoch": 1.83, + "learning_rate": 1.402504977035098e-05, + "loss": 0.3523, + "step": 39135 + }, + { + "epoch": 1.83, + "learning_rate": 1.4024265985296193e-05, + "loss": 0.086, + "step": 39140 + }, + { + "epoch": 1.83, + "learning_rate": 1.4023482200241407e-05, + "loss": 0.1465, + "step": 39145 + }, + { + "epoch": 1.83, + "learning_rate": 1.402269841518662e-05, + "loss": 0.1037, + "step": 39150 + }, + { + "epoch": 1.83, + "learning_rate": 1.4021914630131835e-05, + "loss": 0.0747, + "step": 39155 + }, + { + "epoch": 1.83, + "learning_rate": 1.4021130845077047e-05, + "loss": 0.1403, + "step": 39160 + }, + { + "epoch": 1.83, + "learning_rate": 1.4020347060022261e-05, + "loss": 0.1251, + "step": 39165 + }, + { + "epoch": 1.83, + "learning_rate": 1.4019563274967473e-05, + "loss": 0.1805, + "step": 39170 + }, + { + "epoch": 1.83, + "learning_rate": 1.4018779489912689e-05, + "loss": 0.1899, + "step": 39175 + }, + { + "epoch": 1.83, + "learning_rate": 1.4017995704857901e-05, + "loss": 0.3973, + "step": 39180 + }, + { + "epoch": 1.83, + "learning_rate": 1.4017368676814072e-05, + "loss": 0.2354, + "step": 39185 + }, + { + "epoch": 1.83, + "learning_rate": 1.4016584891759286e-05, + "loss": 0.0266, + "step": 39190 + }, + { + "epoch": 1.83, + "learning_rate": 1.4015801106704498e-05, + "loss": 0.0359, + "step": 39195 + }, + { + "epoch": 1.83, + "learning_rate": 1.4015017321649712e-05, + "loss": 0.0703, + "step": 39200 + }, + { + "epoch": 1.83, + "learning_rate": 1.4014233536594926e-05, + "loss": 0.1166, + "step": 39205 + }, + { + "epoch": 1.83, + "learning_rate": 1.4013449751540138e-05, + "loss": 0.1409, + "step": 39210 + }, + { + "epoch": 1.83, + "learning_rate": 1.4012665966485353e-05, + "loss": 0.129, + "step": 39215 + }, + { + "epoch": 1.83, + "learning_rate": 1.4011882181430566e-05, + "loss": 0.1237, + "step": 39220 + }, + { + "epoch": 1.83, + "learning_rate": 1.401109839637578e-05, + "loss": 0.1638, + "step": 39225 + }, + { + "epoch": 1.83, + "learning_rate": 1.4010314611320992e-05, + "loss": 0.2936, + "step": 39230 + }, + { + "epoch": 1.83, + "learning_rate": 1.4009530826266207e-05, + "loss": 0.2029, + "step": 39235 + }, + { + "epoch": 1.83, + "learning_rate": 1.400874704121142e-05, + "loss": 0.0323, + "step": 39240 + }, + { + "epoch": 1.83, + "learning_rate": 1.4007963256156633e-05, + "loss": 0.1204, + "step": 39245 + }, + { + "epoch": 1.83, + "learning_rate": 1.4007179471101846e-05, + "loss": 0.0603, + "step": 39250 + }, + { + "epoch": 1.83, + "learning_rate": 1.4006395686047061e-05, + "loss": 0.1161, + "step": 39255 + }, + { + "epoch": 1.83, + "learning_rate": 1.4005611900992273e-05, + "loss": 0.1051, + "step": 39260 + }, + { + "epoch": 1.83, + "learning_rate": 1.4004828115937486e-05, + "loss": 0.1097, + "step": 39265 + }, + { + "epoch": 1.83, + "learning_rate": 1.40040443308827e-05, + "loss": 0.1569, + "step": 39270 + }, + { + "epoch": 1.83, + "learning_rate": 1.4003260545827912e-05, + "loss": 0.2331, + "step": 39275 + }, + { + "epoch": 1.83, + "learning_rate": 1.4002476760773127e-05, + "loss": 0.4034, + "step": 39280 + }, + { + "epoch": 1.83, + "learning_rate": 1.400169297571834e-05, + "loss": 0.2077, + "step": 39285 + }, + { + "epoch": 1.83, + "learning_rate": 1.4000909190663553e-05, + "loss": 0.0452, + "step": 39290 + }, + { + "epoch": 1.83, + "learning_rate": 1.4000125405608766e-05, + "loss": 0.0357, + "step": 39295 + }, + { + "epoch": 1.83, + "learning_rate": 1.3999341620553981e-05, + "loss": 0.0787, + "step": 39300 + }, + { + "epoch": 1.83, + "learning_rate": 1.3998557835499194e-05, + "loss": 0.1583, + "step": 39305 + }, + { + "epoch": 1.83, + "learning_rate": 1.3997774050444407e-05, + "loss": 0.1058, + "step": 39310 + }, + { + "epoch": 1.83, + "learning_rate": 1.3996990265389621e-05, + "loss": 0.0733, + "step": 39315 + }, + { + "epoch": 1.83, + "learning_rate": 1.3996206480334835e-05, + "loss": 0.1355, + "step": 39320 + }, + { + "epoch": 1.83, + "learning_rate": 1.3995422695280047e-05, + "loss": 0.1527, + "step": 39325 + }, + { + "epoch": 1.84, + "learning_rate": 1.399463891022526e-05, + "loss": 0.2326, + "step": 39330 + }, + { + "epoch": 1.84, + "learning_rate": 1.3993855125170475e-05, + "loss": 0.2507, + "step": 39335 + }, + { + "epoch": 1.84, + "learning_rate": 1.3993071340115687e-05, + "loss": 0.0268, + "step": 39340 + }, + { + "epoch": 1.84, + "learning_rate": 1.3992287555060901e-05, + "loss": 0.0515, + "step": 39345 + }, + { + "epoch": 1.84, + "learning_rate": 1.3991503770006114e-05, + "loss": 0.0863, + "step": 39350 + }, + { + "epoch": 1.84, + "learning_rate": 1.399071998495133e-05, + "loss": 0.1121, + "step": 39355 + }, + { + "epoch": 1.84, + "learning_rate": 1.3989936199896541e-05, + "loss": 0.1218, + "step": 39360 + }, + { + "epoch": 1.84, + "learning_rate": 1.3989152414841755e-05, + "loss": 0.0561, + "step": 39365 + }, + { + "epoch": 1.84, + "learning_rate": 1.3988368629786968e-05, + "loss": 0.1979, + "step": 39370 + }, + { + "epoch": 1.84, + "learning_rate": 1.3987584844732183e-05, + "loss": 0.3349, + "step": 39375 + }, + { + "epoch": 1.84, + "learning_rate": 1.3986801059677395e-05, + "loss": 0.2712, + "step": 39380 + }, + { + "epoch": 1.84, + "learning_rate": 1.398601727462261e-05, + "loss": 0.21, + "step": 39385 + }, + { + "epoch": 1.84, + "learning_rate": 1.3985233489567821e-05, + "loss": 0.0712, + "step": 39390 + }, + { + "epoch": 1.84, + "learning_rate": 1.3984449704513035e-05, + "loss": 0.1168, + "step": 39395 + }, + { + "epoch": 1.84, + "learning_rate": 1.398366591945825e-05, + "loss": 0.0892, + "step": 39400 + }, + { + "epoch": 1.84, + "learning_rate": 1.3982882134403461e-05, + "loss": 0.1336, + "step": 39405 + }, + { + "epoch": 1.84, + "learning_rate": 1.3982098349348675e-05, + "loss": 0.172, + "step": 39410 + }, + { + "epoch": 1.84, + "learning_rate": 1.398131456429389e-05, + "loss": 0.1663, + "step": 39415 + }, + { + "epoch": 1.84, + "learning_rate": 1.3980530779239103e-05, + "loss": 0.1677, + "step": 39420 + }, + { + "epoch": 1.84, + "learning_rate": 1.3979746994184315e-05, + "loss": 0.1407, + "step": 39425 + }, + { + "epoch": 1.84, + "learning_rate": 1.3978963209129531e-05, + "loss": 0.4581, + "step": 39430 + }, + { + "epoch": 1.84, + "learning_rate": 1.3978179424074743e-05, + "loss": 0.4789, + "step": 39435 + }, + { + "epoch": 1.84, + "learning_rate": 1.3977395639019957e-05, + "loss": 0.0543, + "step": 39440 + }, + { + "epoch": 1.84, + "learning_rate": 1.397661185396517e-05, + "loss": 0.0469, + "step": 39445 + }, + { + "epoch": 1.84, + "learning_rate": 1.3975828068910385e-05, + "loss": 0.0991, + "step": 39450 + }, + { + "epoch": 1.84, + "learning_rate": 1.3975044283855597e-05, + "loss": 0.0445, + "step": 39455 + }, + { + "epoch": 1.84, + "learning_rate": 1.397426049880081e-05, + "loss": 0.1107, + "step": 39460 + }, + { + "epoch": 1.84, + "learning_rate": 1.3973476713746023e-05, + "loss": 0.0865, + "step": 39465 + }, + { + "epoch": 1.84, + "learning_rate": 1.3972692928691235e-05, + "loss": 0.1738, + "step": 39470 + }, + { + "epoch": 1.84, + "learning_rate": 1.3971909143636451e-05, + "loss": 0.3184, + "step": 39475 + }, + { + "epoch": 1.84, + "learning_rate": 1.3971125358581663e-05, + "loss": 0.2718, + "step": 39480 + }, + { + "epoch": 1.84, + "learning_rate": 1.3970341573526877e-05, + "loss": 0.3997, + "step": 39485 + }, + { + "epoch": 1.84, + "learning_rate": 1.396955778847209e-05, + "loss": 0.0962, + "step": 39490 + }, + { + "epoch": 1.84, + "learning_rate": 1.3968774003417305e-05, + "loss": 0.0429, + "step": 39495 + }, + { + "epoch": 1.84, + "learning_rate": 1.3967990218362517e-05, + "loss": 0.119, + "step": 39500 + }, + { + "epoch": 1.84, + "learning_rate": 1.3967206433307731e-05, + "loss": 0.065, + "step": 39505 + }, + { + "epoch": 1.84, + "learning_rate": 1.3966422648252943e-05, + "loss": 0.2045, + "step": 39510 + }, + { + "epoch": 1.84, + "learning_rate": 1.3965638863198159e-05, + "loss": 0.1923, + "step": 39515 + }, + { + "epoch": 1.84, + "learning_rate": 1.3964855078143371e-05, + "loss": 0.1101, + "step": 39520 + }, + { + "epoch": 1.84, + "learning_rate": 1.3964071293088583e-05, + "loss": 0.201, + "step": 39525 + }, + { + "epoch": 1.84, + "learning_rate": 1.3963287508033799e-05, + "loss": 0.2879, + "step": 39530 + }, + { + "epoch": 1.84, + "learning_rate": 1.3962503722979011e-05, + "loss": 0.1518, + "step": 39535 + }, + { + "epoch": 1.84, + "learning_rate": 1.3961719937924225e-05, + "loss": 0.0882, + "step": 39540 + }, + { + "epoch": 1.85, + "learning_rate": 1.3960936152869437e-05, + "loss": 0.0576, + "step": 39545 + }, + { + "epoch": 1.85, + "learning_rate": 1.3960152367814653e-05, + "loss": 0.0711, + "step": 39550 + }, + { + "epoch": 1.85, + "learning_rate": 1.3959368582759865e-05, + "loss": 0.1663, + "step": 39555 + }, + { + "epoch": 1.85, + "learning_rate": 1.3958584797705079e-05, + "loss": 0.1107, + "step": 39560 + }, + { + "epoch": 1.85, + "learning_rate": 1.3957801012650291e-05, + "loss": 0.1376, + "step": 39565 + }, + { + "epoch": 1.85, + "learning_rate": 1.3957017227595507e-05, + "loss": 0.2258, + "step": 39570 + }, + { + "epoch": 1.85, + "learning_rate": 1.3956233442540719e-05, + "loss": 0.1872, + "step": 39575 + }, + { + "epoch": 1.85, + "learning_rate": 1.3955449657485933e-05, + "loss": 0.3751, + "step": 39580 + }, + { + "epoch": 1.85, + "learning_rate": 1.3954665872431145e-05, + "loss": 0.3203, + "step": 39585 + }, + { + "epoch": 1.85, + "learning_rate": 1.3953882087376357e-05, + "loss": 0.0157, + "step": 39590 + }, + { + "epoch": 1.85, + "learning_rate": 1.3953098302321573e-05, + "loss": 0.0469, + "step": 39595 + }, + { + "epoch": 1.85, + "learning_rate": 1.3952314517266785e-05, + "loss": 0.0631, + "step": 39600 + }, + { + "epoch": 1.85, + "learning_rate": 1.3951530732211999e-05, + "loss": 0.0862, + "step": 39605 + }, + { + "epoch": 1.85, + "learning_rate": 1.3950746947157211e-05, + "loss": 0.1194, + "step": 39610 + }, + { + "epoch": 1.85, + "learning_rate": 1.3949963162102427e-05, + "loss": 0.1458, + "step": 39615 + }, + { + "epoch": 1.85, + "learning_rate": 1.3949179377047639e-05, + "loss": 0.2322, + "step": 39620 + }, + { + "epoch": 1.85, + "learning_rate": 1.3948395591992853e-05, + "loss": 0.363, + "step": 39625 + }, + { + "epoch": 1.85, + "learning_rate": 1.3947611806938067e-05, + "loss": 0.2981, + "step": 39630 + }, + { + "epoch": 1.85, + "learning_rate": 1.394682802188328e-05, + "loss": 0.2648, + "step": 39635 + }, + { + "epoch": 1.85, + "learning_rate": 1.3946044236828493e-05, + "loss": 0.0346, + "step": 39640 + }, + { + "epoch": 1.85, + "learning_rate": 1.3945260451773709e-05, + "loss": 0.0576, + "step": 39645 + }, + { + "epoch": 1.85, + "learning_rate": 1.394447666671892e-05, + "loss": 0.1158, + "step": 39650 + }, + { + "epoch": 1.85, + "learning_rate": 1.3943692881664133e-05, + "loss": 0.0324, + "step": 39655 + }, + { + "epoch": 1.85, + "learning_rate": 1.3942909096609347e-05, + "loss": 0.1207, + "step": 39660 + }, + { + "epoch": 1.85, + "learning_rate": 1.3942125311554559e-05, + "loss": 0.1835, + "step": 39665 + }, + { + "epoch": 1.85, + "learning_rate": 1.3941341526499775e-05, + "loss": 0.1337, + "step": 39670 + }, + { + "epoch": 1.85, + "learning_rate": 1.3940557741444987e-05, + "loss": 0.1672, + "step": 39675 + }, + { + "epoch": 1.85, + "learning_rate": 1.3939773956390201e-05, + "loss": 0.2736, + "step": 39680 + }, + { + "epoch": 1.85, + "learning_rate": 1.3938990171335413e-05, + "loss": 0.2847, + "step": 39685 + }, + { + "epoch": 1.85, + "learning_rate": 1.3938206386280629e-05, + "loss": 0.044, + "step": 39690 + }, + { + "epoch": 1.85, + "learning_rate": 1.3937422601225841e-05, + "loss": 0.0497, + "step": 39695 + }, + { + "epoch": 1.85, + "learning_rate": 1.3936638816171055e-05, + "loss": 0.1422, + "step": 39700 + }, + { + "epoch": 1.85, + "learning_rate": 1.3935855031116267e-05, + "loss": 0.0669, + "step": 39705 + }, + { + "epoch": 1.85, + "learning_rate": 1.3935071246061483e-05, + "loss": 0.0804, + "step": 39710 + }, + { + "epoch": 1.85, + "learning_rate": 1.3934287461006695e-05, + "loss": 0.0735, + "step": 39715 + }, + { + "epoch": 1.85, + "learning_rate": 1.3933503675951907e-05, + "loss": 0.1105, + "step": 39720 + }, + { + "epoch": 1.85, + "learning_rate": 1.3932719890897121e-05, + "loss": 0.1518, + "step": 39725 + }, + { + "epoch": 1.85, + "learning_rate": 1.3931936105842335e-05, + "loss": 0.3293, + "step": 39730 + }, + { + "epoch": 1.85, + "learning_rate": 1.3931152320787549e-05, + "loss": 0.2068, + "step": 39735 + }, + { + "epoch": 1.85, + "learning_rate": 1.3930368535732761e-05, + "loss": 0.0597, + "step": 39740 + }, + { + "epoch": 1.85, + "learning_rate": 1.3929584750677977e-05, + "loss": 0.0848, + "step": 39745 + }, + { + "epoch": 1.85, + "learning_rate": 1.3928800965623189e-05, + "loss": 0.0794, + "step": 39750 + }, + { + "epoch": 1.86, + "learning_rate": 1.3928017180568403e-05, + "loss": 0.0735, + "step": 39755 + }, + { + "epoch": 1.86, + "learning_rate": 1.3927233395513615e-05, + "loss": 0.1662, + "step": 39760 + }, + { + "epoch": 1.86, + "learning_rate": 1.392644961045883e-05, + "loss": 0.1268, + "step": 39765 + }, + { + "epoch": 1.86, + "learning_rate": 1.3925665825404043e-05, + "loss": 0.1259, + "step": 39770 + }, + { + "epoch": 1.86, + "learning_rate": 1.3924882040349257e-05, + "loss": 0.2228, + "step": 39775 + }, + { + "epoch": 1.86, + "learning_rate": 1.3924098255294469e-05, + "loss": 0.2452, + "step": 39780 + }, + { + "epoch": 1.86, + "learning_rate": 1.3923314470239681e-05, + "loss": 0.3537, + "step": 39785 + }, + { + "epoch": 1.86, + "learning_rate": 1.3922530685184897e-05, + "loss": 0.0579, + "step": 39790 + }, + { + "epoch": 1.86, + "learning_rate": 1.3921746900130109e-05, + "loss": 0.0819, + "step": 39795 + }, + { + "epoch": 1.86, + "learning_rate": 1.3920963115075323e-05, + "loss": 0.117, + "step": 39800 + }, + { + "epoch": 1.86, + "learning_rate": 1.3920179330020535e-05, + "loss": 0.0466, + "step": 39805 + }, + { + "epoch": 1.86, + "learning_rate": 1.391939554496575e-05, + "loss": 0.1153, + "step": 39810 + }, + { + "epoch": 1.86, + "learning_rate": 1.3918611759910963e-05, + "loss": 0.1227, + "step": 39815 + }, + { + "epoch": 1.86, + "learning_rate": 1.3917827974856177e-05, + "loss": 0.1416, + "step": 39820 + }, + { + "epoch": 1.86, + "learning_rate": 1.3917044189801389e-05, + "loss": 0.1812, + "step": 39825 + }, + { + "epoch": 1.86, + "learning_rate": 1.3916260404746604e-05, + "loss": 0.3345, + "step": 39830 + }, + { + "epoch": 1.86, + "learning_rate": 1.3915476619691817e-05, + "loss": 0.359, + "step": 39835 + }, + { + "epoch": 1.86, + "learning_rate": 1.391469283463703e-05, + "loss": 0.019, + "step": 39840 + }, + { + "epoch": 1.86, + "learning_rate": 1.3913909049582245e-05, + "loss": 0.037, + "step": 39845 + }, + { + "epoch": 1.86, + "learning_rate": 1.3913125264527457e-05, + "loss": 0.0897, + "step": 39850 + }, + { + "epoch": 1.86, + "learning_rate": 1.391234147947267e-05, + "loss": 0.0943, + "step": 39855 + }, + { + "epoch": 1.86, + "learning_rate": 1.3911557694417883e-05, + "loss": 0.1399, + "step": 39860 + }, + { + "epoch": 1.86, + "learning_rate": 1.3910773909363098e-05, + "loss": 0.1403, + "step": 39865 + }, + { + "epoch": 1.86, + "learning_rate": 1.390999012430831e-05, + "loss": 0.2049, + "step": 39870 + }, + { + "epoch": 1.86, + "learning_rate": 1.3909206339253525e-05, + "loss": 0.2028, + "step": 39875 + }, + { + "epoch": 1.86, + "learning_rate": 1.3908422554198737e-05, + "loss": 0.281, + "step": 39880 + }, + { + "epoch": 1.86, + "learning_rate": 1.3907638769143952e-05, + "loss": 0.2856, + "step": 39885 + }, + { + "epoch": 1.86, + "learning_rate": 1.3906854984089165e-05, + "loss": 0.0383, + "step": 39890 + }, + { + "epoch": 1.86, + "learning_rate": 1.3906071199034378e-05, + "loss": 0.0449, + "step": 39895 + }, + { + "epoch": 1.86, + "learning_rate": 1.390528741397959e-05, + "loss": 0.0715, + "step": 39900 + }, + { + "epoch": 1.86, + "learning_rate": 1.3904503628924806e-05, + "loss": 0.0983, + "step": 39905 + }, + { + "epoch": 1.86, + "learning_rate": 1.3903719843870019e-05, + "loss": 0.1431, + "step": 39910 + }, + { + "epoch": 1.86, + "learning_rate": 1.390293605881523e-05, + "loss": 0.1682, + "step": 39915 + }, + { + "epoch": 1.86, + "learning_rate": 1.3902152273760445e-05, + "loss": 0.1312, + "step": 39920 + }, + { + "epoch": 1.86, + "learning_rate": 1.3901368488705659e-05, + "loss": 0.1199, + "step": 39925 + }, + { + "epoch": 1.86, + "learning_rate": 1.3900584703650872e-05, + "loss": 0.1829, + "step": 39930 + }, + { + "epoch": 1.86, + "learning_rate": 1.3899800918596085e-05, + "loss": 0.2109, + "step": 39935 + }, + { + "epoch": 1.86, + "learning_rate": 1.3899017133541299e-05, + "loss": 0.0469, + "step": 39940 + }, + { + "epoch": 1.86, + "learning_rate": 1.3898233348486512e-05, + "loss": 0.056, + "step": 39945 + }, + { + "epoch": 1.86, + "learning_rate": 1.3897449563431726e-05, + "loss": 0.0796, + "step": 39950 + }, + { + "epoch": 1.86, + "learning_rate": 1.3896665778376939e-05, + "loss": 0.0767, + "step": 39955 + }, + { + "epoch": 1.86, + "learning_rate": 1.3895881993322154e-05, + "loss": 0.0807, + "step": 39960 + }, + { + "epoch": 1.86, + "learning_rate": 1.3895098208267366e-05, + "loss": 0.1694, + "step": 39965 + }, + { + "epoch": 1.87, + "learning_rate": 1.389431442321258e-05, + "loss": 0.1686, + "step": 39970 + }, + { + "epoch": 1.87, + "learning_rate": 1.3893530638157793e-05, + "loss": 0.1735, + "step": 39975 + }, + { + "epoch": 1.87, + "learning_rate": 1.3892746853103005e-05, + "loss": 0.3052, + "step": 39980 + }, + { + "epoch": 1.87, + "learning_rate": 1.389196306804822e-05, + "loss": 0.315, + "step": 39985 + }, + { + "epoch": 1.87, + "learning_rate": 1.3891179282993433e-05, + "loss": 0.0689, + "step": 39990 + }, + { + "epoch": 1.87, + "learning_rate": 1.3890395497938646e-05, + "loss": 0.0481, + "step": 39995 + }, + { + "epoch": 1.87, + "learning_rate": 1.3889611712883859e-05, + "loss": 0.0813, + "step": 40000 + }, + { + "epoch": 1.87, + "learning_rate": 1.3888827927829074e-05, + "loss": 0.0883, + "step": 40005 + }, + { + "epoch": 1.87, + "learning_rate": 1.3888044142774286e-05, + "loss": 0.0955, + "step": 40010 + }, + { + "epoch": 1.87, + "learning_rate": 1.38872603577195e-05, + "loss": 0.1675, + "step": 40015 + }, + { + "epoch": 1.87, + "learning_rate": 1.3886476572664713e-05, + "loss": 0.1361, + "step": 40020 + }, + { + "epoch": 1.87, + "learning_rate": 1.3885692787609928e-05, + "loss": 0.1712, + "step": 40025 + }, + { + "epoch": 1.87, + "learning_rate": 1.388490900255514e-05, + "loss": 0.3516, + "step": 40030 + }, + { + "epoch": 1.87, + "learning_rate": 1.3884125217500354e-05, + "loss": 0.233, + "step": 40035 + }, + { + "epoch": 1.87, + "learning_rate": 1.3883341432445567e-05, + "loss": 0.0227, + "step": 40040 + }, + { + "epoch": 1.87, + "learning_rate": 1.388255764739078e-05, + "loss": 0.0551, + "step": 40045 + }, + { + "epoch": 1.87, + "learning_rate": 1.3881773862335994e-05, + "loss": 0.0955, + "step": 40050 + }, + { + "epoch": 1.87, + "learning_rate": 1.3880990077281207e-05, + "loss": 0.1336, + "step": 40055 + }, + { + "epoch": 1.87, + "learning_rate": 1.3880206292226422e-05, + "loss": 0.0766, + "step": 40060 + }, + { + "epoch": 1.87, + "learning_rate": 1.3879422507171634e-05, + "loss": 0.0847, + "step": 40065 + }, + { + "epoch": 1.87, + "learning_rate": 1.3878638722116848e-05, + "loss": 0.143, + "step": 40070 + }, + { + "epoch": 1.87, + "learning_rate": 1.387785493706206e-05, + "loss": 0.1564, + "step": 40075 + }, + { + "epoch": 1.87, + "learning_rate": 1.3877071152007276e-05, + "loss": 0.3278, + "step": 40080 + }, + { + "epoch": 1.87, + "learning_rate": 1.3876287366952488e-05, + "loss": 0.2336, + "step": 40085 + }, + { + "epoch": 1.87, + "learning_rate": 1.3875503581897702e-05, + "loss": 0.0916, + "step": 40090 + }, + { + "epoch": 1.87, + "learning_rate": 1.3874719796842914e-05, + "loss": 0.11, + "step": 40095 + }, + { + "epoch": 1.87, + "learning_rate": 1.387393601178813e-05, + "loss": 0.0472, + "step": 40100 + }, + { + "epoch": 1.87, + "learning_rate": 1.3873152226733342e-05, + "loss": 0.0719, + "step": 40105 + }, + { + "epoch": 1.87, + "learning_rate": 1.3872368441678554e-05, + "loss": 0.1545, + "step": 40110 + }, + { + "epoch": 1.87, + "learning_rate": 1.3871584656623768e-05, + "loss": 0.1102, + "step": 40115 + }, + { + "epoch": 1.87, + "learning_rate": 1.387080087156898e-05, + "loss": 0.2047, + "step": 40120 + }, + { + "epoch": 1.87, + "learning_rate": 1.3870017086514196e-05, + "loss": 0.241, + "step": 40125 + }, + { + "epoch": 1.87, + "learning_rate": 1.3869233301459408e-05, + "loss": 0.3618, + "step": 40130 + }, + { + "epoch": 1.87, + "learning_rate": 1.3868449516404622e-05, + "loss": 0.3678, + "step": 40135 + }, + { + "epoch": 1.87, + "learning_rate": 1.3867665731349834e-05, + "loss": 0.0394, + "step": 40140 + }, + { + "epoch": 1.87, + "learning_rate": 1.386688194629505e-05, + "loss": 0.0337, + "step": 40145 + }, + { + "epoch": 1.87, + "learning_rate": 1.3866098161240262e-05, + "loss": 0.0898, + "step": 40150 + }, + { + "epoch": 1.87, + "learning_rate": 1.3865314376185476e-05, + "loss": 0.074, + "step": 40155 + }, + { + "epoch": 1.87, + "learning_rate": 1.386453059113069e-05, + "loss": 0.1356, + "step": 40160 + }, + { + "epoch": 1.87, + "learning_rate": 1.3863746806075904e-05, + "loss": 0.1607, + "step": 40165 + }, + { + "epoch": 1.87, + "learning_rate": 1.3862963021021116e-05, + "loss": 0.1426, + "step": 40170 + }, + { + "epoch": 1.87, + "learning_rate": 1.3862179235966328e-05, + "loss": 0.2663, + "step": 40175 + }, + { + "epoch": 1.87, + "learning_rate": 1.3861395450911544e-05, + "loss": 0.3673, + "step": 40180 + }, + { + "epoch": 1.88, + "learning_rate": 1.3860611665856756e-05, + "loss": 0.3103, + "step": 40185 + }, + { + "epoch": 1.88, + "learning_rate": 1.385982788080197e-05, + "loss": 0.0458, + "step": 40190 + }, + { + "epoch": 1.88, + "learning_rate": 1.3859044095747182e-05, + "loss": 0.0491, + "step": 40195 + }, + { + "epoch": 1.88, + "learning_rate": 1.3858260310692398e-05, + "loss": 0.0837, + "step": 40200 + }, + { + "epoch": 1.88, + "learning_rate": 1.385747652563761e-05, + "loss": 0.1077, + "step": 40205 + }, + { + "epoch": 1.88, + "learning_rate": 1.3856692740582824e-05, + "loss": 0.1128, + "step": 40210 + }, + { + "epoch": 1.88, + "learning_rate": 1.3855908955528036e-05, + "loss": 0.2105, + "step": 40215 + }, + { + "epoch": 1.88, + "learning_rate": 1.3855125170473252e-05, + "loss": 0.169, + "step": 40220 + }, + { + "epoch": 1.88, + "learning_rate": 1.3854341385418464e-05, + "loss": 0.1373, + "step": 40225 + }, + { + "epoch": 1.88, + "learning_rate": 1.3853557600363678e-05, + "loss": 0.2803, + "step": 40230 + }, + { + "epoch": 1.88, + "learning_rate": 1.385277381530889e-05, + "loss": 0.2458, + "step": 40235 + }, + { + "epoch": 1.88, + "learning_rate": 1.3851990030254104e-05, + "loss": 0.067, + "step": 40240 + }, + { + "epoch": 1.88, + "learning_rate": 1.3851206245199318e-05, + "loss": 0.0656, + "step": 40245 + }, + { + "epoch": 1.88, + "learning_rate": 1.385042246014453e-05, + "loss": 0.0975, + "step": 40250 + }, + { + "epoch": 1.88, + "learning_rate": 1.3849638675089744e-05, + "loss": 0.0711, + "step": 40255 + }, + { + "epoch": 1.88, + "learning_rate": 1.3848854890034958e-05, + "loss": 0.1479, + "step": 40260 + }, + { + "epoch": 1.88, + "learning_rate": 1.3848071104980172e-05, + "loss": 0.0855, + "step": 40265 + }, + { + "epoch": 1.88, + "learning_rate": 1.3847287319925384e-05, + "loss": 0.2257, + "step": 40270 + }, + { + "epoch": 1.88, + "learning_rate": 1.38465035348706e-05, + "loss": 0.2097, + "step": 40275 + }, + { + "epoch": 1.88, + "learning_rate": 1.3845719749815812e-05, + "loss": 0.4084, + "step": 40280 + }, + { + "epoch": 1.88, + "learning_rate": 1.3844935964761026e-05, + "loss": 0.2848, + "step": 40285 + }, + { + "epoch": 1.88, + "learning_rate": 1.3844152179706238e-05, + "loss": 0.104, + "step": 40290 + }, + { + "epoch": 1.88, + "learning_rate": 1.3843368394651454e-05, + "loss": 0.0429, + "step": 40295 + }, + { + "epoch": 1.88, + "learning_rate": 1.3842584609596666e-05, + "loss": 0.086, + "step": 40300 + }, + { + "epoch": 1.88, + "learning_rate": 1.3841800824541878e-05, + "loss": 0.0811, + "step": 40305 + }, + { + "epoch": 1.88, + "learning_rate": 1.3841017039487092e-05, + "loss": 0.058, + "step": 40310 + }, + { + "epoch": 1.88, + "learning_rate": 1.3840233254432304e-05, + "loss": 0.1156, + "step": 40315 + }, + { + "epoch": 1.88, + "learning_rate": 1.383944946937752e-05, + "loss": 0.2159, + "step": 40320 + }, + { + "epoch": 1.88, + "learning_rate": 1.3838665684322732e-05, + "loss": 0.2478, + "step": 40325 + }, + { + "epoch": 1.88, + "learning_rate": 1.3837881899267946e-05, + "loss": 0.2005, + "step": 40330 + }, + { + "epoch": 1.88, + "learning_rate": 1.3837098114213158e-05, + "loss": 0.3357, + "step": 40335 + }, + { + "epoch": 1.88, + "learning_rate": 1.3836314329158374e-05, + "loss": 0.0394, + "step": 40340 + }, + { + "epoch": 1.88, + "learning_rate": 1.3835530544103586e-05, + "loss": 0.0648, + "step": 40345 + }, + { + "epoch": 1.88, + "learning_rate": 1.38347467590488e-05, + "loss": 0.0823, + "step": 40350 + }, + { + "epoch": 1.88, + "learning_rate": 1.3833962973994012e-05, + "loss": 0.0483, + "step": 40355 + }, + { + "epoch": 1.88, + "learning_rate": 1.3833179188939228e-05, + "loss": 0.193, + "step": 40360 + }, + { + "epoch": 1.88, + "learning_rate": 1.383239540388444e-05, + "loss": 0.1007, + "step": 40365 + }, + { + "epoch": 1.88, + "learning_rate": 1.3831611618829652e-05, + "loss": 0.1092, + "step": 40370 + }, + { + "epoch": 1.88, + "learning_rate": 1.3830827833774868e-05, + "loss": 0.2738, + "step": 40375 + }, + { + "epoch": 1.88, + "learning_rate": 1.383004404872008e-05, + "loss": 0.2871, + "step": 40380 + }, + { + "epoch": 1.88, + "learning_rate": 1.3829260263665294e-05, + "loss": 0.2501, + "step": 40385 + }, + { + "epoch": 1.88, + "learning_rate": 1.3828476478610506e-05, + "loss": 0.0473, + "step": 40390 + }, + { + "epoch": 1.88, + "learning_rate": 1.3827692693555722e-05, + "loss": 0.0597, + "step": 40395 + }, + { + "epoch": 1.89, + "learning_rate": 1.3826908908500934e-05, + "loss": 0.0646, + "step": 40400 + }, + { + "epoch": 1.89, + "learning_rate": 1.3826125123446148e-05, + "loss": 0.1314, + "step": 40405 + }, + { + "epoch": 1.89, + "learning_rate": 1.382534133839136e-05, + "loss": 0.1628, + "step": 40410 + }, + { + "epoch": 1.89, + "learning_rate": 1.3824557553336576e-05, + "loss": 0.1858, + "step": 40415 + }, + { + "epoch": 1.89, + "learning_rate": 1.3823773768281788e-05, + "loss": 0.2056, + "step": 40420 + }, + { + "epoch": 1.89, + "learning_rate": 1.3822989983227002e-05, + "loss": 0.1649, + "step": 40425 + }, + { + "epoch": 1.89, + "learning_rate": 1.3822206198172214e-05, + "loss": 0.1464, + "step": 40430 + }, + { + "epoch": 1.89, + "learning_rate": 1.3821422413117426e-05, + "loss": 0.3024, + "step": 40435 + }, + { + "epoch": 1.89, + "learning_rate": 1.3820638628062642e-05, + "loss": 0.0702, + "step": 40440 + }, + { + "epoch": 1.89, + "learning_rate": 1.3819854843007854e-05, + "loss": 0.0468, + "step": 40445 + }, + { + "epoch": 1.89, + "learning_rate": 1.3819071057953068e-05, + "loss": 0.0472, + "step": 40450 + }, + { + "epoch": 1.89, + "learning_rate": 1.381828727289828e-05, + "loss": 0.0957, + "step": 40455 + }, + { + "epoch": 1.89, + "learning_rate": 1.3817503487843496e-05, + "loss": 0.1398, + "step": 40460 + }, + { + "epoch": 1.89, + "learning_rate": 1.3816719702788708e-05, + "loss": 0.109, + "step": 40465 + }, + { + "epoch": 1.89, + "learning_rate": 1.3815935917733922e-05, + "loss": 0.2193, + "step": 40470 + }, + { + "epoch": 1.89, + "learning_rate": 1.3815152132679136e-05, + "loss": 0.2995, + "step": 40475 + }, + { + "epoch": 1.89, + "learning_rate": 1.381436834762435e-05, + "loss": 0.336, + "step": 40480 + }, + { + "epoch": 1.89, + "learning_rate": 1.3813584562569562e-05, + "loss": 0.2809, + "step": 40485 + }, + { + "epoch": 1.89, + "learning_rate": 1.3812800777514777e-05, + "loss": 0.0376, + "step": 40490 + }, + { + "epoch": 1.89, + "learning_rate": 1.381201699245999e-05, + "loss": 0.0265, + "step": 40495 + }, + { + "epoch": 1.89, + "learning_rate": 1.3811233207405202e-05, + "loss": 0.0517, + "step": 40500 + }, + { + "epoch": 1.89, + "learning_rate": 1.3810449422350416e-05, + "loss": 0.0746, + "step": 40505 + }, + { + "epoch": 1.89, + "learning_rate": 1.3809665637295628e-05, + "loss": 0.0954, + "step": 40510 + }, + { + "epoch": 1.89, + "learning_rate": 1.3808881852240844e-05, + "loss": 0.1229, + "step": 40515 + }, + { + "epoch": 1.89, + "learning_rate": 1.3808098067186056e-05, + "loss": 0.1508, + "step": 40520 + }, + { + "epoch": 1.89, + "learning_rate": 1.380731428213127e-05, + "loss": 0.2266, + "step": 40525 + }, + { + "epoch": 1.89, + "learning_rate": 1.3806530497076482e-05, + "loss": 0.3108, + "step": 40530 + }, + { + "epoch": 1.89, + "learning_rate": 1.3805746712021697e-05, + "loss": 0.2541, + "step": 40535 + }, + { + "epoch": 1.89, + "learning_rate": 1.380496292696691e-05, + "loss": 0.0478, + "step": 40540 + }, + { + "epoch": 1.89, + "learning_rate": 1.3804179141912124e-05, + "loss": 0.0496, + "step": 40545 + }, + { + "epoch": 1.89, + "learning_rate": 1.3803395356857336e-05, + "loss": 0.0953, + "step": 40550 + }, + { + "epoch": 1.89, + "learning_rate": 1.3802611571802551e-05, + "loss": 0.1024, + "step": 40555 + }, + { + "epoch": 1.89, + "learning_rate": 1.3801827786747764e-05, + "loss": 0.0996, + "step": 40560 + }, + { + "epoch": 1.89, + "learning_rate": 1.3801044001692976e-05, + "loss": 0.1564, + "step": 40565 + }, + { + "epoch": 1.89, + "learning_rate": 1.380026021663819e-05, + "loss": 0.2166, + "step": 40570 + }, + { + "epoch": 1.89, + "learning_rate": 1.3799476431583404e-05, + "loss": 0.226, + "step": 40575 + }, + { + "epoch": 1.89, + "learning_rate": 1.3798692646528618e-05, + "loss": 0.219, + "step": 40580 + }, + { + "epoch": 1.89, + "learning_rate": 1.379790886147383e-05, + "loss": 0.1555, + "step": 40585 + }, + { + "epoch": 1.89, + "learning_rate": 1.3797125076419045e-05, + "loss": 0.0567, + "step": 40590 + }, + { + "epoch": 1.89, + "learning_rate": 1.3796341291364258e-05, + "loss": 0.0816, + "step": 40595 + }, + { + "epoch": 1.89, + "learning_rate": 1.3795557506309471e-05, + "loss": 0.0672, + "step": 40600 + }, + { + "epoch": 1.89, + "learning_rate": 1.3794773721254684e-05, + "loss": 0.0293, + "step": 40605 + }, + { + "epoch": 1.89, + "learning_rate": 1.37939899361999e-05, + "loss": 0.1691, + "step": 40610 + }, + { + "epoch": 1.9, + "learning_rate": 1.3793206151145111e-05, + "loss": 0.1475, + "step": 40615 + }, + { + "epoch": 1.9, + "learning_rate": 1.3792422366090325e-05, + "loss": 0.1439, + "step": 40620 + }, + { + "epoch": 1.9, + "learning_rate": 1.3791638581035538e-05, + "loss": 0.2063, + "step": 40625 + }, + { + "epoch": 1.9, + "learning_rate": 1.379085479598075e-05, + "loss": 0.2501, + "step": 40630 + }, + { + "epoch": 1.9, + "learning_rate": 1.3790071010925965e-05, + "loss": 0.24, + "step": 40635 + }, + { + "epoch": 1.9, + "learning_rate": 1.3789287225871178e-05, + "loss": 0.0474, + "step": 40640 + }, + { + "epoch": 1.9, + "learning_rate": 1.3788503440816392e-05, + "loss": 0.0137, + "step": 40645 + }, + { + "epoch": 1.9, + "learning_rate": 1.3787719655761604e-05, + "loss": 0.0644, + "step": 40650 + }, + { + "epoch": 1.9, + "learning_rate": 1.378693587070682e-05, + "loss": 0.0847, + "step": 40655 + }, + { + "epoch": 1.9, + "learning_rate": 1.3786152085652032e-05, + "loss": 0.136, + "step": 40660 + }, + { + "epoch": 1.9, + "learning_rate": 1.3785368300597245e-05, + "loss": 0.1225, + "step": 40665 + }, + { + "epoch": 1.9, + "learning_rate": 1.3784584515542458e-05, + "loss": 0.1019, + "step": 40670 + }, + { + "epoch": 1.9, + "learning_rate": 1.3783800730487673e-05, + "loss": 0.2475, + "step": 40675 + }, + { + "epoch": 1.9, + "learning_rate": 1.3783016945432885e-05, + "loss": 0.2234, + "step": 40680 + }, + { + "epoch": 1.9, + "learning_rate": 1.37822331603781e-05, + "loss": 0.2204, + "step": 40685 + }, + { + "epoch": 1.9, + "learning_rate": 1.3781449375323313e-05, + "loss": 0.0354, + "step": 40690 + }, + { + "epoch": 1.9, + "learning_rate": 1.3780665590268525e-05, + "loss": 0.0904, + "step": 40695 + }, + { + "epoch": 1.9, + "learning_rate": 1.377988180521374e-05, + "loss": 0.0707, + "step": 40700 + }, + { + "epoch": 1.9, + "learning_rate": 1.3779098020158952e-05, + "loss": 0.0884, + "step": 40705 + }, + { + "epoch": 1.9, + "learning_rate": 1.3778314235104167e-05, + "loss": 0.1006, + "step": 40710 + }, + { + "epoch": 1.9, + "learning_rate": 1.377753045004938e-05, + "loss": 0.0914, + "step": 40715 + }, + { + "epoch": 1.9, + "learning_rate": 1.3776746664994593e-05, + "loss": 0.2008, + "step": 40720 + }, + { + "epoch": 1.9, + "learning_rate": 1.3775962879939806e-05, + "loss": 0.1749, + "step": 40725 + }, + { + "epoch": 1.9, + "learning_rate": 1.3775179094885021e-05, + "loss": 0.3868, + "step": 40730 + }, + { + "epoch": 1.9, + "learning_rate": 1.3774395309830233e-05, + "loss": 0.3199, + "step": 40735 + }, + { + "epoch": 1.9, + "learning_rate": 1.3773611524775447e-05, + "loss": 0.065, + "step": 40740 + }, + { + "epoch": 1.9, + "learning_rate": 1.377282773972066e-05, + "loss": 0.071, + "step": 40745 + }, + { + "epoch": 1.9, + "learning_rate": 1.3772043954665875e-05, + "loss": 0.0642, + "step": 40750 + }, + { + "epoch": 1.9, + "learning_rate": 1.3771260169611087e-05, + "loss": 0.1533, + "step": 40755 + }, + { + "epoch": 1.9, + "learning_rate": 1.37704763845563e-05, + "loss": 0.0883, + "step": 40760 + }, + { + "epoch": 1.9, + "learning_rate": 1.3769692599501513e-05, + "loss": 0.2085, + "step": 40765 + }, + { + "epoch": 1.9, + "learning_rate": 1.3768908814446726e-05, + "loss": 0.1509, + "step": 40770 + }, + { + "epoch": 1.9, + "learning_rate": 1.3768125029391941e-05, + "loss": 0.1984, + "step": 40775 + }, + { + "epoch": 1.9, + "learning_rate": 1.3767341244337153e-05, + "loss": 0.4016, + "step": 40780 + }, + { + "epoch": 1.9, + "learning_rate": 1.3766557459282367e-05, + "loss": 0.2388, + "step": 40785 + }, + { + "epoch": 1.9, + "learning_rate": 1.3765773674227581e-05, + "loss": 0.0885, + "step": 40790 + }, + { + "epoch": 1.9, + "learning_rate": 1.3764989889172795e-05, + "loss": 0.0829, + "step": 40795 + }, + { + "epoch": 1.9, + "learning_rate": 1.3764206104118007e-05, + "loss": 0.0609, + "step": 40800 + }, + { + "epoch": 1.9, + "learning_rate": 1.3763422319063223e-05, + "loss": 0.0371, + "step": 40805 + }, + { + "epoch": 1.9, + "learning_rate": 1.3762638534008435e-05, + "loss": 0.0782, + "step": 40810 + }, + { + "epoch": 1.9, + "learning_rate": 1.3761854748953649e-05, + "loss": 0.1198, + "step": 40815 + }, + { + "epoch": 1.9, + "learning_rate": 1.3761070963898861e-05, + "loss": 0.1064, + "step": 40820 + }, + { + "epoch": 1.9, + "learning_rate": 1.3760287178844073e-05, + "loss": 0.2126, + "step": 40825 + }, + { + "epoch": 1.91, + "learning_rate": 1.3759503393789289e-05, + "loss": 0.2644, + "step": 40830 + }, + { + "epoch": 1.91, + "learning_rate": 1.3758719608734501e-05, + "loss": 0.2017, + "step": 40835 + }, + { + "epoch": 1.91, + "learning_rate": 1.3757935823679715e-05, + "loss": 0.0589, + "step": 40840 + }, + { + "epoch": 1.91, + "learning_rate": 1.3757152038624927e-05, + "loss": 0.0665, + "step": 40845 + }, + { + "epoch": 1.91, + "learning_rate": 1.3756368253570143e-05, + "loss": 0.0637, + "step": 40850 + }, + { + "epoch": 1.91, + "learning_rate": 1.3755584468515355e-05, + "loss": 0.1055, + "step": 40855 + }, + { + "epoch": 1.91, + "learning_rate": 1.3754800683460569e-05, + "loss": 0.0967, + "step": 40860 + }, + { + "epoch": 1.91, + "learning_rate": 1.3754016898405781e-05, + "loss": 0.1057, + "step": 40865 + }, + { + "epoch": 1.91, + "learning_rate": 1.3753233113350997e-05, + "loss": 0.1226, + "step": 40870 + }, + { + "epoch": 1.91, + "learning_rate": 1.3752449328296209e-05, + "loss": 0.1455, + "step": 40875 + }, + { + "epoch": 1.91, + "learning_rate": 1.3751665543241423e-05, + "loss": 0.4124, + "step": 40880 + }, + { + "epoch": 1.91, + "learning_rate": 1.3750881758186635e-05, + "loss": 0.2321, + "step": 40885 + }, + { + "epoch": 1.91, + "learning_rate": 1.375009797313185e-05, + "loss": 0.0824, + "step": 40890 + }, + { + "epoch": 1.91, + "learning_rate": 1.3749314188077063e-05, + "loss": 0.0691, + "step": 40895 + }, + { + "epoch": 1.91, + "learning_rate": 1.3748530403022275e-05, + "loss": 0.0824, + "step": 40900 + }, + { + "epoch": 1.91, + "learning_rate": 1.3747746617967491e-05, + "loss": 0.1582, + "step": 40905 + }, + { + "epoch": 1.91, + "learning_rate": 1.3746962832912703e-05, + "loss": 0.104, + "step": 40910 + }, + { + "epoch": 1.91, + "learning_rate": 1.3746179047857917e-05, + "loss": 0.1756, + "step": 40915 + }, + { + "epoch": 1.91, + "learning_rate": 1.374539526280313e-05, + "loss": 0.1663, + "step": 40920 + }, + { + "epoch": 1.91, + "learning_rate": 1.3744611477748345e-05, + "loss": 0.2132, + "step": 40925 + }, + { + "epoch": 1.91, + "learning_rate": 1.3743827692693557e-05, + "loss": 0.2881, + "step": 40930 + }, + { + "epoch": 1.91, + "learning_rate": 1.3743043907638771e-05, + "loss": 0.1533, + "step": 40935 + }, + { + "epoch": 1.91, + "learning_rate": 1.3742260122583983e-05, + "loss": 0.034, + "step": 40940 + }, + { + "epoch": 1.91, + "learning_rate": 1.3741476337529199e-05, + "loss": 0.0692, + "step": 40945 + }, + { + "epoch": 1.91, + "learning_rate": 1.3740692552474411e-05, + "loss": 0.0526, + "step": 40950 + }, + { + "epoch": 1.91, + "learning_rate": 1.3739908767419623e-05, + "loss": 0.1364, + "step": 40955 + }, + { + "epoch": 1.91, + "learning_rate": 1.3739124982364837e-05, + "loss": 0.0757, + "step": 40960 + }, + { + "epoch": 1.91, + "learning_rate": 1.373834119731005e-05, + "loss": 0.1443, + "step": 40965 + }, + { + "epoch": 1.91, + "learning_rate": 1.3737557412255265e-05, + "loss": 0.2183, + "step": 40970 + }, + { + "epoch": 1.91, + "learning_rate": 1.3736773627200477e-05, + "loss": 0.2046, + "step": 40975 + }, + { + "epoch": 1.91, + "learning_rate": 1.3735989842145691e-05, + "loss": 0.3461, + "step": 40980 + }, + { + "epoch": 1.91, + "learning_rate": 1.3735206057090903e-05, + "loss": 0.268, + "step": 40985 + }, + { + "epoch": 1.91, + "learning_rate": 1.3734422272036119e-05, + "loss": 0.0231, + "step": 40990 + }, + { + "epoch": 1.91, + "learning_rate": 1.3733638486981331e-05, + "loss": 0.1039, + "step": 40995 + }, + { + "epoch": 1.91, + "learning_rate": 1.3732854701926545e-05, + "loss": 0.0878, + "step": 41000 + }, + { + "epoch": 1.91, + "learning_rate": 1.3732070916871759e-05, + "loss": 0.0818, + "step": 41005 + }, + { + "epoch": 1.91, + "learning_rate": 1.3731287131816973e-05, + "loss": 0.108, + "step": 41010 + }, + { + "epoch": 1.91, + "learning_rate": 1.3730503346762185e-05, + "loss": 0.1776, + "step": 41015 + }, + { + "epoch": 1.91, + "learning_rate": 1.3729719561707397e-05, + "loss": 0.0533, + "step": 41020 + }, + { + "epoch": 1.91, + "learning_rate": 1.3728935776652613e-05, + "loss": 0.1883, + "step": 41025 + }, + { + "epoch": 1.91, + "learning_rate": 1.3728151991597825e-05, + "loss": 0.3926, + "step": 41030 + }, + { + "epoch": 1.91, + "learning_rate": 1.3727368206543039e-05, + "loss": 0.2241, + "step": 41035 + }, + { + "epoch": 1.91, + "learning_rate": 1.3726584421488251e-05, + "loss": 0.0445, + "step": 41040 + }, + { + "epoch": 1.92, + "learning_rate": 1.3725800636433467e-05, + "loss": 0.087, + "step": 41045 + }, + { + "epoch": 1.92, + "learning_rate": 1.3725016851378679e-05, + "loss": 0.0322, + "step": 41050 + }, + { + "epoch": 1.92, + "learning_rate": 1.3724233066323893e-05, + "loss": 0.0478, + "step": 41055 + }, + { + "epoch": 1.92, + "learning_rate": 1.3723449281269105e-05, + "loss": 0.1294, + "step": 41060 + }, + { + "epoch": 1.92, + "learning_rate": 1.372266549621432e-05, + "loss": 0.1646, + "step": 41065 + }, + { + "epoch": 1.92, + "learning_rate": 1.3721881711159533e-05, + "loss": 0.1664, + "step": 41070 + }, + { + "epoch": 1.92, + "learning_rate": 1.3721097926104747e-05, + "loss": 0.2329, + "step": 41075 + }, + { + "epoch": 1.92, + "learning_rate": 1.3720314141049959e-05, + "loss": 0.2623, + "step": 41080 + }, + { + "epoch": 1.92, + "learning_rate": 1.3719530355995173e-05, + "loss": 0.2138, + "step": 41085 + }, + { + "epoch": 1.92, + "learning_rate": 1.3718746570940387e-05, + "loss": 0.0504, + "step": 41090 + }, + { + "epoch": 1.92, + "learning_rate": 1.3717962785885599e-05, + "loss": 0.0839, + "step": 41095 + }, + { + "epoch": 1.92, + "learning_rate": 1.3717179000830813e-05, + "loss": 0.0589, + "step": 41100 + }, + { + "epoch": 1.92, + "learning_rate": 1.3716395215776027e-05, + "loss": 0.1023, + "step": 41105 + }, + { + "epoch": 1.92, + "learning_rate": 1.371561143072124e-05, + "loss": 0.1489, + "step": 41110 + }, + { + "epoch": 1.92, + "learning_rate": 1.3714827645666453e-05, + "loss": 0.1497, + "step": 41115 + }, + { + "epoch": 1.92, + "learning_rate": 1.3714043860611669e-05, + "loss": 0.1494, + "step": 41120 + }, + { + "epoch": 1.92, + "learning_rate": 1.371326007555688e-05, + "loss": 0.2351, + "step": 41125 + }, + { + "epoch": 1.92, + "learning_rate": 1.3712476290502095e-05, + "loss": 0.3537, + "step": 41130 + }, + { + "epoch": 1.92, + "learning_rate": 1.3711692505447307e-05, + "loss": 0.3639, + "step": 41135 + }, + { + "epoch": 1.92, + "learning_rate": 1.3710908720392522e-05, + "loss": 0.0352, + "step": 41140 + }, + { + "epoch": 1.92, + "learning_rate": 1.3710124935337735e-05, + "loss": 0.0505, + "step": 41145 + }, + { + "epoch": 1.92, + "learning_rate": 1.3709341150282947e-05, + "loss": 0.0559, + "step": 41150 + }, + { + "epoch": 1.92, + "learning_rate": 1.370855736522816e-05, + "loss": 0.0432, + "step": 41155 + }, + { + "epoch": 1.92, + "learning_rate": 1.3707773580173373e-05, + "loss": 0.2254, + "step": 41160 + }, + { + "epoch": 1.92, + "learning_rate": 1.3706989795118589e-05, + "loss": 0.1569, + "step": 41165 + }, + { + "epoch": 1.92, + "learning_rate": 1.37062060100638e-05, + "loss": 0.0941, + "step": 41170 + }, + { + "epoch": 1.92, + "learning_rate": 1.3705422225009015e-05, + "loss": 0.2558, + "step": 41175 + }, + { + "epoch": 1.92, + "learning_rate": 1.3704638439954227e-05, + "loss": 0.2511, + "step": 41180 + }, + { + "epoch": 1.92, + "learning_rate": 1.3703854654899443e-05, + "loss": 0.3308, + "step": 41185 + }, + { + "epoch": 1.92, + "learning_rate": 1.3703070869844655e-05, + "loss": 0.0499, + "step": 41190 + }, + { + "epoch": 1.92, + "learning_rate": 1.3702287084789869e-05, + "loss": 0.0471, + "step": 41195 + }, + { + "epoch": 1.92, + "learning_rate": 1.370150329973508e-05, + "loss": 0.063, + "step": 41200 + }, + { + "epoch": 1.92, + "learning_rate": 1.3700719514680296e-05, + "loss": 0.0849, + "step": 41205 + }, + { + "epoch": 1.92, + "learning_rate": 1.3699935729625509e-05, + "loss": 0.1009, + "step": 41210 + }, + { + "epoch": 1.92, + "learning_rate": 1.3699151944570721e-05, + "loss": 0.1324, + "step": 41215 + }, + { + "epoch": 1.92, + "learning_rate": 1.3698368159515936e-05, + "loss": 0.145, + "step": 41220 + }, + { + "epoch": 1.92, + "learning_rate": 1.3697584374461149e-05, + "loss": 0.206, + "step": 41225 + }, + { + "epoch": 1.92, + "learning_rate": 1.3696800589406363e-05, + "loss": 0.3881, + "step": 41230 + }, + { + "epoch": 1.92, + "learning_rate": 1.3696016804351575e-05, + "loss": 0.3881, + "step": 41235 + }, + { + "epoch": 1.92, + "learning_rate": 1.369523301929679e-05, + "loss": 0.1123, + "step": 41240 + }, + { + "epoch": 1.92, + "learning_rate": 1.3694449234242003e-05, + "loss": 0.0195, + "step": 41245 + }, + { + "epoch": 1.92, + "learning_rate": 1.3693665449187217e-05, + "loss": 0.1247, + "step": 41250 + }, + { + "epoch": 1.93, + "learning_rate": 1.3692881664132429e-05, + "loss": 0.0557, + "step": 41255 + }, + { + "epoch": 1.93, + "learning_rate": 1.3692097879077644e-05, + "loss": 0.0885, + "step": 41260 + }, + { + "epoch": 1.93, + "learning_rate": 1.3691314094022857e-05, + "loss": 0.1122, + "step": 41265 + }, + { + "epoch": 1.93, + "learning_rate": 1.369053030896807e-05, + "loss": 0.2121, + "step": 41270 + }, + { + "epoch": 1.93, + "learning_rate": 1.3689746523913283e-05, + "loss": 0.1881, + "step": 41275 + }, + { + "epoch": 1.93, + "learning_rate": 1.3688962738858495e-05, + "loss": 0.2472, + "step": 41280 + }, + { + "epoch": 1.93, + "learning_rate": 1.368817895380371e-05, + "loss": 0.1782, + "step": 41285 + }, + { + "epoch": 1.93, + "learning_rate": 1.3687395168748923e-05, + "loss": 0.0595, + "step": 41290 + }, + { + "epoch": 1.93, + "learning_rate": 1.3686611383694137e-05, + "loss": 0.0678, + "step": 41295 + }, + { + "epoch": 1.93, + "learning_rate": 1.3685827598639349e-05, + "loss": 0.1161, + "step": 41300 + }, + { + "epoch": 1.93, + "learning_rate": 1.3685043813584564e-05, + "loss": 0.0635, + "step": 41305 + }, + { + "epoch": 1.93, + "learning_rate": 1.3684260028529777e-05, + "loss": 0.1394, + "step": 41310 + }, + { + "epoch": 1.93, + "learning_rate": 1.368347624347499e-05, + "loss": 0.1814, + "step": 41315 + }, + { + "epoch": 1.93, + "learning_rate": 1.3682692458420204e-05, + "loss": 0.3026, + "step": 41320 + }, + { + "epoch": 1.93, + "learning_rate": 1.3681908673365418e-05, + "loss": 0.2154, + "step": 41325 + }, + { + "epoch": 1.93, + "learning_rate": 1.368112488831063e-05, + "loss": 0.2504, + "step": 41330 + }, + { + "epoch": 1.93, + "learning_rate": 1.3680341103255846e-05, + "loss": 0.3073, + "step": 41335 + }, + { + "epoch": 1.93, + "learning_rate": 1.3679557318201058e-05, + "loss": 0.0638, + "step": 41340 + }, + { + "epoch": 1.93, + "learning_rate": 1.367877353314627e-05, + "loss": 0.0279, + "step": 41345 + }, + { + "epoch": 1.93, + "learning_rate": 1.3677989748091484e-05, + "loss": 0.1001, + "step": 41350 + }, + { + "epoch": 1.93, + "learning_rate": 1.3677205963036697e-05, + "loss": 0.0529, + "step": 41355 + }, + { + "epoch": 1.93, + "learning_rate": 1.3676422177981912e-05, + "loss": 0.1054, + "step": 41360 + }, + { + "epoch": 1.93, + "learning_rate": 1.3675638392927124e-05, + "loss": 0.111, + "step": 41365 + }, + { + "epoch": 1.93, + "learning_rate": 1.3674854607872338e-05, + "loss": 0.0676, + "step": 41370 + }, + { + "epoch": 1.93, + "learning_rate": 1.367407082281755e-05, + "loss": 0.243, + "step": 41375 + }, + { + "epoch": 1.93, + "learning_rate": 1.3673287037762766e-05, + "loss": 0.4077, + "step": 41380 + }, + { + "epoch": 1.93, + "learning_rate": 1.3672503252707978e-05, + "loss": 0.3552, + "step": 41385 + }, + { + "epoch": 1.93, + "learning_rate": 1.3671719467653192e-05, + "loss": 0.0411, + "step": 41390 + }, + { + "epoch": 1.93, + "learning_rate": 1.3670935682598405e-05, + "loss": 0.039, + "step": 41395 + }, + { + "epoch": 1.93, + "learning_rate": 1.367015189754362e-05, + "loss": 0.0688, + "step": 41400 + }, + { + "epoch": 1.93, + "learning_rate": 1.3669368112488832e-05, + "loss": 0.1222, + "step": 41405 + }, + { + "epoch": 1.93, + "learning_rate": 1.3668584327434045e-05, + "loss": 0.073, + "step": 41410 + }, + { + "epoch": 1.93, + "learning_rate": 1.3667800542379258e-05, + "loss": 0.0939, + "step": 41415 + }, + { + "epoch": 1.93, + "learning_rate": 1.3667016757324472e-05, + "loss": 0.1917, + "step": 41420 + }, + { + "epoch": 1.93, + "learning_rate": 1.3666232972269686e-05, + "loss": 0.1409, + "step": 41425 + }, + { + "epoch": 1.93, + "learning_rate": 1.3665449187214898e-05, + "loss": 0.3746, + "step": 41430 + }, + { + "epoch": 1.93, + "learning_rate": 1.3664665402160114e-05, + "loss": 0.3048, + "step": 41435 + }, + { + "epoch": 1.93, + "learning_rate": 1.3663881617105326e-05, + "loss": 0.0518, + "step": 41440 + }, + { + "epoch": 1.93, + "learning_rate": 1.366309783205054e-05, + "loss": 0.0805, + "step": 41445 + }, + { + "epoch": 1.93, + "learning_rate": 1.3662314046995752e-05, + "loss": 0.0876, + "step": 41450 + }, + { + "epoch": 1.93, + "learning_rate": 1.3661530261940968e-05, + "loss": 0.0906, + "step": 41455 + }, + { + "epoch": 1.93, + "learning_rate": 1.366074647688618e-05, + "loss": 0.0911, + "step": 41460 + }, + { + "epoch": 1.93, + "learning_rate": 1.3659962691831394e-05, + "loss": 0.1121, + "step": 41465 + }, + { + "epoch": 1.94, + "learning_rate": 1.3659178906776606e-05, + "loss": 0.1549, + "step": 41470 + }, + { + "epoch": 1.94, + "learning_rate": 1.3658395121721819e-05, + "loss": 0.2158, + "step": 41475 + }, + { + "epoch": 1.94, + "learning_rate": 1.3657611336667034e-05, + "loss": 0.3177, + "step": 41480 + }, + { + "epoch": 1.94, + "learning_rate": 1.3656827551612246e-05, + "loss": 0.2616, + "step": 41485 + }, + { + "epoch": 1.94, + "learning_rate": 1.365604376655746e-05, + "loss": 0.0809, + "step": 41490 + }, + { + "epoch": 1.94, + "learning_rate": 1.3655259981502672e-05, + "loss": 0.0507, + "step": 41495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3654476196447888e-05, + "loss": 0.0408, + "step": 41500 + }, + { + "epoch": 1.94, + "learning_rate": 1.36536924113931e-05, + "loss": 0.079, + "step": 41505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3652908626338314e-05, + "loss": 0.1017, + "step": 41510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3652124841283526e-05, + "loss": 0.0745, + "step": 41515 + }, + { + "epoch": 1.94, + "learning_rate": 1.3651341056228742e-05, + "loss": 0.1234, + "step": 41520 + }, + { + "epoch": 1.94, + "learning_rate": 1.3650557271173954e-05, + "loss": 0.23, + "step": 41525 + }, + { + "epoch": 1.94, + "learning_rate": 1.3649773486119168e-05, + "loss": 0.2216, + "step": 41530 + }, + { + "epoch": 1.94, + "learning_rate": 1.3648989701064382e-05, + "loss": 0.2246, + "step": 41535 + }, + { + "epoch": 1.94, + "learning_rate": 1.3648205916009594e-05, + "loss": 0.0395, + "step": 41540 + }, + { + "epoch": 1.94, + "learning_rate": 1.3647422130954808e-05, + "loss": 0.0559, + "step": 41545 + }, + { + "epoch": 1.94, + "learning_rate": 1.364663834590002e-05, + "loss": 0.0517, + "step": 41550 + }, + { + "epoch": 1.94, + "learning_rate": 1.3645854560845236e-05, + "loss": 0.1465, + "step": 41555 + }, + { + "epoch": 1.94, + "learning_rate": 1.3645070775790448e-05, + "loss": 0.2362, + "step": 41560 + }, + { + "epoch": 1.94, + "learning_rate": 1.3644286990735662e-05, + "loss": 0.1821, + "step": 41565 + }, + { + "epoch": 1.94, + "learning_rate": 1.3643503205680874e-05, + "loss": 0.1984, + "step": 41570 + }, + { + "epoch": 1.94, + "learning_rate": 1.364271942062609e-05, + "loss": 0.2761, + "step": 41575 + }, + { + "epoch": 1.94, + "learning_rate": 1.3641935635571302e-05, + "loss": 0.4789, + "step": 41580 + }, + { + "epoch": 1.94, + "learning_rate": 1.3641151850516516e-05, + "loss": 0.2259, + "step": 41585 + }, + { + "epoch": 1.94, + "learning_rate": 1.3640368065461728e-05, + "loss": 0.0482, + "step": 41590 + }, + { + "epoch": 1.94, + "learning_rate": 1.3639584280406944e-05, + "loss": 0.0146, + "step": 41595 + }, + { + "epoch": 1.94, + "learning_rate": 1.3638800495352156e-05, + "loss": 0.1254, + "step": 41600 + }, + { + "epoch": 1.94, + "learning_rate": 1.3638016710297368e-05, + "loss": 0.0778, + "step": 41605 + }, + { + "epoch": 1.94, + "learning_rate": 1.3637232925242582e-05, + "loss": 0.0651, + "step": 41610 + }, + { + "epoch": 1.94, + "learning_rate": 1.3636449140187794e-05, + "loss": 0.1359, + "step": 41615 + }, + { + "epoch": 1.94, + "learning_rate": 1.363566535513301e-05, + "loss": 0.2115, + "step": 41620 + }, + { + "epoch": 1.94, + "learning_rate": 1.3634881570078222e-05, + "loss": 0.1439, + "step": 41625 + }, + { + "epoch": 1.94, + "learning_rate": 1.3634097785023436e-05, + "loss": 0.3997, + "step": 41630 + }, + { + "epoch": 1.94, + "learning_rate": 1.363331399996865e-05, + "loss": 0.2421, + "step": 41635 + }, + { + "epoch": 1.94, + "learning_rate": 1.3632530214913864e-05, + "loss": 0.0331, + "step": 41640 + }, + { + "epoch": 1.94, + "learning_rate": 1.3631746429859076e-05, + "loss": 0.1243, + "step": 41645 + }, + { + "epoch": 1.94, + "learning_rate": 1.3630962644804292e-05, + "loss": 0.147, + "step": 41650 + }, + { + "epoch": 1.94, + "learning_rate": 1.3630178859749504e-05, + "loss": 0.0483, + "step": 41655 + }, + { + "epoch": 1.94, + "learning_rate": 1.3629395074694718e-05, + "loss": 0.0896, + "step": 41660 + }, + { + "epoch": 1.94, + "learning_rate": 1.362861128963993e-05, + "loss": 0.1532, + "step": 41665 + }, + { + "epoch": 1.94, + "learning_rate": 1.3627827504585142e-05, + "loss": 0.1593, + "step": 41670 + }, + { + "epoch": 1.94, + "learning_rate": 1.3627043719530358e-05, + "loss": 0.1917, + "step": 41675 + }, + { + "epoch": 1.94, + "learning_rate": 1.362625993447557e-05, + "loss": 0.318, + "step": 41680 + }, + { + "epoch": 1.95, + "learning_rate": 1.3625476149420784e-05, + "loss": 0.3726, + "step": 41685 + }, + { + "epoch": 1.95, + "learning_rate": 1.3624692364365996e-05, + "loss": 0.0432, + "step": 41690 + }, + { + "epoch": 1.95, + "learning_rate": 1.3623908579311212e-05, + "loss": 0.0398, + "step": 41695 + }, + { + "epoch": 1.95, + "learning_rate": 1.3623124794256424e-05, + "loss": 0.0579, + "step": 41700 + }, + { + "epoch": 1.95, + "learning_rate": 1.3622341009201638e-05, + "loss": 0.0813, + "step": 41705 + }, + { + "epoch": 1.95, + "learning_rate": 1.362155722414685e-05, + "loss": 0.0798, + "step": 41710 + }, + { + "epoch": 1.95, + "learning_rate": 1.3620773439092066e-05, + "loss": 0.0768, + "step": 41715 + }, + { + "epoch": 1.95, + "learning_rate": 1.3619989654037278e-05, + "loss": 0.1886, + "step": 41720 + }, + { + "epoch": 1.95, + "learning_rate": 1.3619205868982492e-05, + "loss": 0.1185, + "step": 41725 + }, + { + "epoch": 1.95, + "learning_rate": 1.3618422083927704e-05, + "loss": 0.2939, + "step": 41730 + }, + { + "epoch": 1.95, + "learning_rate": 1.3617638298872918e-05, + "loss": 0.295, + "step": 41735 + }, + { + "epoch": 1.95, + "learning_rate": 1.3616854513818132e-05, + "loss": 0.028, + "step": 41740 + }, + { + "epoch": 1.95, + "learning_rate": 1.3616070728763344e-05, + "loss": 0.0461, + "step": 41745 + }, + { + "epoch": 1.95, + "learning_rate": 1.361528694370856e-05, + "loss": 0.0892, + "step": 41750 + }, + { + "epoch": 1.95, + "learning_rate": 1.3614503158653772e-05, + "loss": 0.0907, + "step": 41755 + }, + { + "epoch": 1.95, + "learning_rate": 1.3613719373598986e-05, + "loss": 0.0892, + "step": 41760 + }, + { + "epoch": 1.95, + "learning_rate": 1.3612935588544198e-05, + "loss": 0.1427, + "step": 41765 + }, + { + "epoch": 1.95, + "learning_rate": 1.3612151803489414e-05, + "loss": 0.108, + "step": 41770 + }, + { + "epoch": 1.95, + "learning_rate": 1.3611368018434626e-05, + "loss": 0.206, + "step": 41775 + }, + { + "epoch": 1.95, + "learning_rate": 1.361058423337984e-05, + "loss": 0.2341, + "step": 41780 + }, + { + "epoch": 1.95, + "learning_rate": 1.3609800448325052e-05, + "loss": 0.2855, + "step": 41785 + }, + { + "epoch": 1.95, + "learning_rate": 1.3609016663270268e-05, + "loss": 0.0509, + "step": 41790 + }, + { + "epoch": 1.95, + "learning_rate": 1.360823287821548e-05, + "loss": 0.0388, + "step": 41795 + }, + { + "epoch": 1.95, + "learning_rate": 1.3607449093160692e-05, + "loss": 0.0668, + "step": 41800 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606665308105906e-05, + "loss": 0.067, + "step": 41805 + }, + { + "epoch": 1.95, + "learning_rate": 1.3605881523051118e-05, + "loss": 0.1153, + "step": 41810 + }, + { + "epoch": 1.95, + "learning_rate": 1.3605097737996334e-05, + "loss": 0.183, + "step": 41815 + }, + { + "epoch": 1.95, + "learning_rate": 1.3604313952941546e-05, + "loss": 0.2191, + "step": 41820 + }, + { + "epoch": 1.95, + "learning_rate": 1.360353016788676e-05, + "loss": 0.1736, + "step": 41825 + }, + { + "epoch": 1.95, + "learning_rate": 1.3602746382831972e-05, + "loss": 0.3504, + "step": 41830 + }, + { + "epoch": 1.95, + "learning_rate": 1.3601962597777188e-05, + "loss": 0.2775, + "step": 41835 + }, + { + "epoch": 1.95, + "learning_rate": 1.36011788127224e-05, + "loss": 0.0406, + "step": 41840 + }, + { + "epoch": 1.95, + "learning_rate": 1.3600395027667614e-05, + "loss": 0.0676, + "step": 41845 + }, + { + "epoch": 1.95, + "learning_rate": 1.3599611242612828e-05, + "loss": 0.0566, + "step": 41850 + }, + { + "epoch": 1.95, + "learning_rate": 1.3598827457558042e-05, + "loss": 0.0765, + "step": 41855 + }, + { + "epoch": 1.95, + "learning_rate": 1.3598043672503254e-05, + "loss": 0.1108, + "step": 41860 + }, + { + "epoch": 1.95, + "learning_rate": 1.3597259887448466e-05, + "loss": 0.1186, + "step": 41865 + }, + { + "epoch": 1.95, + "learning_rate": 1.3596476102393682e-05, + "loss": 0.1556, + "step": 41870 + }, + { + "epoch": 1.95, + "learning_rate": 1.3595692317338894e-05, + "loss": 0.16, + "step": 41875 + }, + { + "epoch": 1.95, + "learning_rate": 1.3594908532284108e-05, + "loss": 0.3715, + "step": 41880 + }, + { + "epoch": 1.95, + "learning_rate": 1.359412474722932e-05, + "loss": 0.272, + "step": 41885 + }, + { + "epoch": 1.95, + "learning_rate": 1.3593340962174535e-05, + "loss": 0.0219, + "step": 41890 + }, + { + "epoch": 1.95, + "learning_rate": 1.3592557177119748e-05, + "loss": 0.0154, + "step": 41895 + }, + { + "epoch": 1.96, + "learning_rate": 1.3591773392064962e-05, + "loss": 0.1363, + "step": 41900 + }, + { + "epoch": 1.96, + "learning_rate": 1.3590989607010174e-05, + "loss": 0.074, + "step": 41905 + }, + { + "epoch": 1.96, + "learning_rate": 1.359020582195539e-05, + "loss": 0.1535, + "step": 41910 + }, + { + "epoch": 1.96, + "learning_rate": 1.3589422036900602e-05, + "loss": 0.0951, + "step": 41915 + }, + { + "epoch": 1.96, + "learning_rate": 1.3588638251845816e-05, + "loss": 0.1372, + "step": 41920 + }, + { + "epoch": 1.96, + "learning_rate": 1.3587854466791028e-05, + "loss": 0.2103, + "step": 41925 + }, + { + "epoch": 1.96, + "learning_rate": 1.358707068173624e-05, + "loss": 0.1851, + "step": 41930 + }, + { + "epoch": 1.96, + "learning_rate": 1.3586286896681456e-05, + "loss": 0.3416, + "step": 41935 + }, + { + "epoch": 1.96, + "learning_rate": 1.3585503111626668e-05, + "loss": 0.0271, + "step": 41940 + }, + { + "epoch": 1.96, + "learning_rate": 1.3584719326571882e-05, + "loss": 0.0605, + "step": 41945 + }, + { + "epoch": 1.96, + "learning_rate": 1.3583935541517096e-05, + "loss": 0.0437, + "step": 41950 + }, + { + "epoch": 1.96, + "learning_rate": 1.358315175646231e-05, + "loss": 0.0778, + "step": 41955 + }, + { + "epoch": 1.96, + "learning_rate": 1.3582367971407522e-05, + "loss": 0.0917, + "step": 41960 + }, + { + "epoch": 1.96, + "learning_rate": 1.3581584186352737e-05, + "loss": 0.1232, + "step": 41965 + }, + { + "epoch": 1.96, + "learning_rate": 1.358080040129795e-05, + "loss": 0.236, + "step": 41970 + }, + { + "epoch": 1.96, + "learning_rate": 1.3580016616243163e-05, + "loss": 0.2185, + "step": 41975 + }, + { + "epoch": 1.96, + "learning_rate": 1.3579232831188376e-05, + "loss": 0.393, + "step": 41980 + }, + { + "epoch": 1.96, + "learning_rate": 1.3578449046133591e-05, + "loss": 0.3043, + "step": 41985 + }, + { + "epoch": 1.96, + "learning_rate": 1.3577665261078803e-05, + "loss": 0.0506, + "step": 41990 + }, + { + "epoch": 1.96, + "learning_rate": 1.3576881476024016e-05, + "loss": 0.0796, + "step": 41995 + }, + { + "epoch": 1.96, + "learning_rate": 1.357609769096923e-05, + "loss": 0.0657, + "step": 42000 + }, + { + "epoch": 1.96, + "learning_rate": 1.3575313905914442e-05, + "loss": 0.0712, + "step": 42005 + }, + { + "epoch": 1.96, + "learning_rate": 1.3574530120859657e-05, + "loss": 0.1189, + "step": 42010 + }, + { + "epoch": 1.96, + "learning_rate": 1.357374633580487e-05, + "loss": 0.157, + "step": 42015 + }, + { + "epoch": 1.96, + "learning_rate": 1.3572962550750083e-05, + "loss": 0.1085, + "step": 42020 + }, + { + "epoch": 1.96, + "learning_rate": 1.3572178765695296e-05, + "loss": 0.2305, + "step": 42025 + }, + { + "epoch": 1.96, + "learning_rate": 1.3571394980640511e-05, + "loss": 0.3907, + "step": 42030 + }, + { + "epoch": 1.96, + "learning_rate": 1.3570611195585723e-05, + "loss": 0.2425, + "step": 42035 + }, + { + "epoch": 1.96, + "learning_rate": 1.3569827410530937e-05, + "loss": 0.039, + "step": 42040 + }, + { + "epoch": 1.96, + "learning_rate": 1.356904362547615e-05, + "loss": 0.0644, + "step": 42045 + }, + { + "epoch": 1.96, + "learning_rate": 1.3568259840421365e-05, + "loss": 0.0367, + "step": 42050 + }, + { + "epoch": 1.96, + "learning_rate": 1.3567476055366577e-05, + "loss": 0.0325, + "step": 42055 + }, + { + "epoch": 1.96, + "learning_rate": 1.356669227031179e-05, + "loss": 0.1135, + "step": 42060 + }, + { + "epoch": 1.96, + "learning_rate": 1.3565908485257005e-05, + "loss": 0.1177, + "step": 42065 + }, + { + "epoch": 1.96, + "learning_rate": 1.3565124700202217e-05, + "loss": 0.1744, + "step": 42070 + }, + { + "epoch": 1.96, + "learning_rate": 1.3564340915147431e-05, + "loss": 0.267, + "step": 42075 + }, + { + "epoch": 1.96, + "learning_rate": 1.3563557130092644e-05, + "loss": 0.2601, + "step": 42080 + }, + { + "epoch": 1.96, + "learning_rate": 1.3562773345037859e-05, + "loss": 0.3815, + "step": 42085 + }, + { + "epoch": 1.96, + "learning_rate": 1.3561989559983071e-05, + "loss": 0.0435, + "step": 42090 + }, + { + "epoch": 1.96, + "learning_rate": 1.3561205774928285e-05, + "loss": 0.0452, + "step": 42095 + }, + { + "epoch": 1.96, + "learning_rate": 1.3560421989873497e-05, + "loss": 0.1247, + "step": 42100 + }, + { + "epoch": 1.96, + "learning_rate": 1.3559638204818713e-05, + "loss": 0.0745, + "step": 42105 + }, + { + "epoch": 1.96, + "learning_rate": 1.3558854419763925e-05, + "loss": 0.1184, + "step": 42110 + }, + { + "epoch": 1.97, + "learning_rate": 1.355807063470914e-05, + "loss": 0.1522, + "step": 42115 + }, + { + "epoch": 1.97, + "learning_rate": 1.3557286849654351e-05, + "loss": 0.1299, + "step": 42120 + }, + { + "epoch": 1.97, + "learning_rate": 1.3556503064599564e-05, + "loss": 0.2057, + "step": 42125 + }, + { + "epoch": 1.97, + "learning_rate": 1.355571927954478e-05, + "loss": 0.2441, + "step": 42130 + }, + { + "epoch": 1.97, + "learning_rate": 1.3554935494489991e-05, + "loss": 0.2413, + "step": 42135 + }, + { + "epoch": 1.97, + "learning_rate": 1.3554151709435205e-05, + "loss": 0.063, + "step": 42140 + }, + { + "epoch": 1.97, + "learning_rate": 1.3553367924380418e-05, + "loss": 0.0286, + "step": 42145 + }, + { + "epoch": 1.97, + "learning_rate": 1.3552584139325633e-05, + "loss": 0.0437, + "step": 42150 + }, + { + "epoch": 1.97, + "learning_rate": 1.3551800354270845e-05, + "loss": 0.0742, + "step": 42155 + }, + { + "epoch": 1.97, + "learning_rate": 1.355101656921606e-05, + "loss": 0.1081, + "step": 42160 + }, + { + "epoch": 1.97, + "learning_rate": 1.3550232784161273e-05, + "loss": 0.1142, + "step": 42165 + }, + { + "epoch": 1.97, + "learning_rate": 1.3549448999106487e-05, + "loss": 0.1755, + "step": 42170 + }, + { + "epoch": 1.97, + "learning_rate": 1.35486652140517e-05, + "loss": 0.2252, + "step": 42175 + }, + { + "epoch": 1.97, + "learning_rate": 1.3547881428996915e-05, + "loss": 0.2728, + "step": 42180 + }, + { + "epoch": 1.97, + "learning_rate": 1.3547097643942127e-05, + "loss": 0.3548, + "step": 42185 + }, + { + "epoch": 1.97, + "learning_rate": 1.354631385888734e-05, + "loss": 0.1084, + "step": 42190 + }, + { + "epoch": 1.97, + "learning_rate": 1.3545530073832553e-05, + "loss": 0.0751, + "step": 42195 + }, + { + "epoch": 1.97, + "learning_rate": 1.3544746288777765e-05, + "loss": 0.1289, + "step": 42200 + }, + { + "epoch": 1.97, + "learning_rate": 1.3543962503722981e-05, + "loss": 0.1643, + "step": 42205 + }, + { + "epoch": 1.97, + "learning_rate": 1.3543178718668193e-05, + "loss": 0.1434, + "step": 42210 + }, + { + "epoch": 1.97, + "learning_rate": 1.3542394933613407e-05, + "loss": 0.1457, + "step": 42215 + }, + { + "epoch": 1.97, + "learning_rate": 1.354161114855862e-05, + "loss": 0.1197, + "step": 42220 + }, + { + "epoch": 1.97, + "learning_rate": 1.3540827363503835e-05, + "loss": 0.3008, + "step": 42225 + }, + { + "epoch": 1.97, + "learning_rate": 1.3540043578449047e-05, + "loss": 0.2925, + "step": 42230 + }, + { + "epoch": 1.97, + "learning_rate": 1.3539259793394261e-05, + "loss": 0.3225, + "step": 42235 + }, + { + "epoch": 1.97, + "learning_rate": 1.3538476008339473e-05, + "loss": 0.0174, + "step": 42240 + }, + { + "epoch": 1.97, + "learning_rate": 1.3537692223284689e-05, + "loss": 0.0662, + "step": 42245 + }, + { + "epoch": 1.97, + "learning_rate": 1.3536908438229901e-05, + "loss": 0.0883, + "step": 42250 + }, + { + "epoch": 1.97, + "learning_rate": 1.3536124653175113e-05, + "loss": 0.1134, + "step": 42255 + }, + { + "epoch": 1.97, + "learning_rate": 1.3535340868120327e-05, + "loss": 0.0576, + "step": 42260 + }, + { + "epoch": 1.97, + "learning_rate": 1.3534557083065541e-05, + "loss": 0.1248, + "step": 42265 + }, + { + "epoch": 1.97, + "learning_rate": 1.3533773298010755e-05, + "loss": 0.1739, + "step": 42270 + }, + { + "epoch": 1.97, + "learning_rate": 1.3532989512955967e-05, + "loss": 0.1615, + "step": 42275 + }, + { + "epoch": 1.97, + "learning_rate": 1.3532205727901183e-05, + "loss": 0.2108, + "step": 42280 + }, + { + "epoch": 1.97, + "learning_rate": 1.3531421942846395e-05, + "loss": 0.1906, + "step": 42285 + }, + { + "epoch": 1.97, + "learning_rate": 1.3530638157791609e-05, + "loss": 0.0301, + "step": 42290 + }, + { + "epoch": 1.97, + "learning_rate": 1.3529854372736821e-05, + "loss": 0.0248, + "step": 42295 + }, + { + "epoch": 1.97, + "learning_rate": 1.3529070587682037e-05, + "loss": 0.0653, + "step": 42300 + }, + { + "epoch": 1.97, + "learning_rate": 1.3528286802627249e-05, + "loss": 0.1097, + "step": 42305 + }, + { + "epoch": 1.97, + "learning_rate": 1.3527503017572463e-05, + "loss": 0.1149, + "step": 42310 + }, + { + "epoch": 1.97, + "learning_rate": 1.3526719232517675e-05, + "loss": 0.1353, + "step": 42315 + }, + { + "epoch": 1.97, + "learning_rate": 1.3525935447462887e-05, + "loss": 0.1043, + "step": 42320 + }, + { + "epoch": 1.97, + "learning_rate": 1.3525151662408103e-05, + "loss": 0.3242, + "step": 42325 + }, + { + "epoch": 1.98, + "learning_rate": 1.3524367877353315e-05, + "loss": 0.2849, + "step": 42330 + }, + { + "epoch": 1.98, + "learning_rate": 1.3523584092298529e-05, + "loss": 0.3416, + "step": 42335 + }, + { + "epoch": 1.98, + "learning_rate": 1.3522800307243741e-05, + "loss": 0.0547, + "step": 42340 + }, + { + "epoch": 1.98, + "learning_rate": 1.3522016522188957e-05, + "loss": 0.0568, + "step": 42345 + }, + { + "epoch": 1.98, + "learning_rate": 1.3521232737134169e-05, + "loss": 0.0978, + "step": 42350 + }, + { + "epoch": 1.98, + "learning_rate": 1.3520448952079383e-05, + "loss": 0.1457, + "step": 42355 + }, + { + "epoch": 1.98, + "learning_rate": 1.3519665167024595e-05, + "loss": 0.0819, + "step": 42360 + }, + { + "epoch": 1.98, + "learning_rate": 1.351888138196981e-05, + "loss": 0.1787, + "step": 42365 + }, + { + "epoch": 1.98, + "learning_rate": 1.3518097596915023e-05, + "loss": 0.1941, + "step": 42370 + }, + { + "epoch": 1.98, + "learning_rate": 1.3517313811860237e-05, + "loss": 0.2267, + "step": 42375 + }, + { + "epoch": 1.98, + "learning_rate": 1.351653002680545e-05, + "loss": 0.377, + "step": 42380 + }, + { + "epoch": 1.98, + "learning_rate": 1.3515746241750663e-05, + "loss": 0.2612, + "step": 42385 + }, + { + "epoch": 1.98, + "learning_rate": 1.3514962456695877e-05, + "loss": 0.0406, + "step": 42390 + }, + { + "epoch": 1.98, + "learning_rate": 1.3514178671641089e-05, + "loss": 0.0252, + "step": 42395 + }, + { + "epoch": 1.98, + "learning_rate": 1.3513394886586305e-05, + "loss": 0.0102, + "step": 42400 + }, + { + "epoch": 1.98, + "learning_rate": 1.3512611101531517e-05, + "loss": 0.1246, + "step": 42405 + }, + { + "epoch": 1.98, + "learning_rate": 1.351182731647673e-05, + "loss": 0.1436, + "step": 42410 + }, + { + "epoch": 1.98, + "learning_rate": 1.3511043531421943e-05, + "loss": 0.1284, + "step": 42415 + }, + { + "epoch": 1.98, + "learning_rate": 1.3510259746367159e-05, + "loss": 0.1715, + "step": 42420 + }, + { + "epoch": 1.98, + "learning_rate": 1.3509475961312371e-05, + "loss": 0.2565, + "step": 42425 + }, + { + "epoch": 1.98, + "learning_rate": 1.3508692176257585e-05, + "loss": 0.2379, + "step": 42430 + }, + { + "epoch": 1.98, + "learning_rate": 1.3507908391202797e-05, + "loss": 0.1811, + "step": 42435 + }, + { + "epoch": 1.98, + "learning_rate": 1.3507124606148013e-05, + "loss": 0.0796, + "step": 42440 + }, + { + "epoch": 1.98, + "learning_rate": 1.3506340821093225e-05, + "loss": 0.0504, + "step": 42445 + }, + { + "epoch": 1.98, + "learning_rate": 1.3505557036038437e-05, + "loss": 0.1026, + "step": 42450 + }, + { + "epoch": 1.98, + "learning_rate": 1.3504773250983651e-05, + "loss": 0.0687, + "step": 42455 + }, + { + "epoch": 1.98, + "learning_rate": 1.3503989465928863e-05, + "loss": 0.0637, + "step": 42460 + }, + { + "epoch": 1.98, + "learning_rate": 1.3503205680874079e-05, + "loss": 0.0864, + "step": 42465 + }, + { + "epoch": 1.98, + "learning_rate": 1.3502421895819291e-05, + "loss": 0.1525, + "step": 42470 + }, + { + "epoch": 1.98, + "learning_rate": 1.3501638110764505e-05, + "loss": 0.1946, + "step": 42475 + }, + { + "epoch": 1.98, + "learning_rate": 1.3500854325709719e-05, + "loss": 0.4185, + "step": 42480 + }, + { + "epoch": 1.98, + "learning_rate": 1.3500070540654933e-05, + "loss": 0.2679, + "step": 42485 + }, + { + "epoch": 1.98, + "learning_rate": 1.3499286755600145e-05, + "loss": 0.0422, + "step": 42490 + }, + { + "epoch": 1.98, + "learning_rate": 1.349850297054536e-05, + "loss": 0.1082, + "step": 42495 + }, + { + "epoch": 1.98, + "learning_rate": 1.3497719185490573e-05, + "loss": 0.0823, + "step": 42500 + }, + { + "epoch": 1.98, + "learning_rate": 1.3496935400435787e-05, + "loss": 0.0491, + "step": 42505 + }, + { + "epoch": 1.98, + "learning_rate": 1.3496151615380999e-05, + "loss": 0.1773, + "step": 42510 + }, + { + "epoch": 1.98, + "learning_rate": 1.3495367830326211e-05, + "loss": 0.1344, + "step": 42515 + }, + { + "epoch": 1.98, + "learning_rate": 1.3494584045271427e-05, + "loss": 0.0834, + "step": 42520 + }, + { + "epoch": 1.98, + "learning_rate": 1.3493800260216639e-05, + "loss": 0.2854, + "step": 42525 + }, + { + "epoch": 1.98, + "learning_rate": 1.3493016475161853e-05, + "loss": 0.3165, + "step": 42530 + }, + { + "epoch": 1.98, + "learning_rate": 1.3492232690107065e-05, + "loss": 0.2865, + "step": 42535 + }, + { + "epoch": 1.98, + "learning_rate": 1.349144890505228e-05, + "loss": 0.0551, + "step": 42540 + }, + { + "epoch": 1.99, + "learning_rate": 1.3490665119997493e-05, + "loss": 0.0634, + "step": 42545 + }, + { + "epoch": 1.99, + "learning_rate": 1.3489881334942707e-05, + "loss": 0.0897, + "step": 42550 + }, + { + "epoch": 1.99, + "learning_rate": 1.3489097549887919e-05, + "loss": 0.1488, + "step": 42555 + }, + { + "epoch": 1.99, + "learning_rate": 1.3488313764833134e-05, + "loss": 0.1682, + "step": 42560 + }, + { + "epoch": 1.99, + "learning_rate": 1.3487529979778347e-05, + "loss": 0.1566, + "step": 42565 + }, + { + "epoch": 1.99, + "learning_rate": 1.348674619472356e-05, + "loss": 0.1352, + "step": 42570 + }, + { + "epoch": 1.99, + "learning_rate": 1.3485962409668773e-05, + "loss": 0.2381, + "step": 42575 + }, + { + "epoch": 1.99, + "learning_rate": 1.3485178624613987e-05, + "loss": 0.3136, + "step": 42580 + }, + { + "epoch": 1.99, + "learning_rate": 1.34843948395592e-05, + "loss": 0.2361, + "step": 42585 + }, + { + "epoch": 1.99, + "learning_rate": 1.3483611054504413e-05, + "loss": 0.0656, + "step": 42590 + }, + { + "epoch": 1.99, + "learning_rate": 1.3482827269449628e-05, + "loss": 0.0353, + "step": 42595 + }, + { + "epoch": 1.99, + "learning_rate": 1.348204348439484e-05, + "loss": 0.0433, + "step": 42600 + }, + { + "epoch": 1.99, + "learning_rate": 1.3481259699340055e-05, + "loss": 0.0876, + "step": 42605 + }, + { + "epoch": 1.99, + "learning_rate": 1.3480475914285267e-05, + "loss": 0.1082, + "step": 42610 + }, + { + "epoch": 1.99, + "learning_rate": 1.3479692129230482e-05, + "loss": 0.1904, + "step": 42615 + }, + { + "epoch": 1.99, + "learning_rate": 1.3478908344175695e-05, + "loss": 0.2017, + "step": 42620 + }, + { + "epoch": 1.99, + "learning_rate": 1.3478124559120908e-05, + "loss": 0.1698, + "step": 42625 + }, + { + "epoch": 1.99, + "learning_rate": 1.347734077406612e-05, + "loss": 0.4722, + "step": 42630 + }, + { + "epoch": 1.99, + "learning_rate": 1.3476556989011336e-05, + "loss": 0.2895, + "step": 42635 + }, + { + "epoch": 1.99, + "learning_rate": 1.3475773203956548e-05, + "loss": 0.0633, + "step": 42640 + }, + { + "epoch": 1.99, + "learning_rate": 1.347498941890176e-05, + "loss": 0.0354, + "step": 42645 + }, + { + "epoch": 1.99, + "learning_rate": 1.3474205633846975e-05, + "loss": 0.0921, + "step": 42650 + }, + { + "epoch": 1.99, + "learning_rate": 1.3473421848792187e-05, + "loss": 0.0774, + "step": 42655 + }, + { + "epoch": 1.99, + "learning_rate": 1.3472638063737402e-05, + "loss": 0.1163, + "step": 42660 + }, + { + "epoch": 1.99, + "learning_rate": 1.3471854278682615e-05, + "loss": 0.1109, + "step": 42665 + }, + { + "epoch": 1.99, + "learning_rate": 1.3471070493627829e-05, + "loss": 0.1748, + "step": 42670 + }, + { + "epoch": 1.99, + "learning_rate": 1.347028670857304e-05, + "loss": 0.1627, + "step": 42675 + }, + { + "epoch": 1.99, + "learning_rate": 1.3469502923518256e-05, + "loss": 0.3621, + "step": 42680 + }, + { + "epoch": 1.99, + "learning_rate": 1.3468719138463469e-05, + "loss": 0.193, + "step": 42685 + }, + { + "epoch": 1.99, + "learning_rate": 1.3467935353408682e-05, + "loss": 0.0512, + "step": 42690 + }, + { + "epoch": 1.99, + "learning_rate": 1.3467151568353896e-05, + "loss": 0.0666, + "step": 42695 + }, + { + "epoch": 1.99, + "learning_rate": 1.346636778329911e-05, + "loss": 0.1495, + "step": 42700 + }, + { + "epoch": 1.99, + "learning_rate": 1.3465583998244322e-05, + "loss": 0.13, + "step": 42705 + }, + { + "epoch": 1.99, + "learning_rate": 1.3464800213189535e-05, + "loss": 0.0941, + "step": 42710 + }, + { + "epoch": 1.99, + "learning_rate": 1.346401642813475e-05, + "loss": 0.1118, + "step": 42715 + }, + { + "epoch": 1.99, + "learning_rate": 1.3463232643079963e-05, + "loss": 0.1672, + "step": 42720 + }, + { + "epoch": 1.99, + "learning_rate": 1.3462448858025176e-05, + "loss": 0.2903, + "step": 42725 + }, + { + "epoch": 1.99, + "learning_rate": 1.3461665072970389e-05, + "loss": 0.5016, + "step": 42730 + }, + { + "epoch": 1.99, + "learning_rate": 1.3460881287915604e-05, + "loss": 0.3474, + "step": 42735 + }, + { + "epoch": 1.99, + "learning_rate": 1.3460097502860816e-05, + "loss": 0.0593, + "step": 42740 + }, + { + "epoch": 1.99, + "learning_rate": 1.345931371780603e-05, + "loss": 0.0545, + "step": 42745 + }, + { + "epoch": 1.99, + "learning_rate": 1.3458529932751243e-05, + "loss": 0.0924, + "step": 42750 + }, + { + "epoch": 2.0, + "learning_rate": 1.3457746147696458e-05, + "loss": 0.0801, + "step": 42755 + }, + { + "epoch": 2.0, + "learning_rate": 1.345696236264167e-05, + "loss": 0.0866, + "step": 42760 + }, + { + "epoch": 2.0, + "learning_rate": 1.3456178577586884e-05, + "loss": 0.1346, + "step": 42765 + }, + { + "epoch": 2.0, + "learning_rate": 1.3455394792532096e-05, + "loss": 0.1853, + "step": 42770 + }, + { + "epoch": 2.0, + "learning_rate": 1.3454611007477309e-05, + "loss": 0.1966, + "step": 42775 + }, + { + "epoch": 2.0, + "learning_rate": 1.3453827222422524e-05, + "loss": 0.238, + "step": 42780 + }, + { + "epoch": 2.0, + "learning_rate": 1.3453043437367737e-05, + "loss": 0.2965, + "step": 42785 + }, + { + "epoch": 2.0, + "learning_rate": 1.345225965231295e-05, + "loss": 0.1616, + "step": 42790 + }, + { + "epoch": 2.0, + "learning_rate": 1.3451475867258164e-05, + "loss": 0.0177, + "step": 42795 + }, + { + "epoch": 2.0, + "learning_rate": 1.3450692082203378e-05, + "loss": 0.0763, + "step": 42800 + }, + { + "epoch": 2.0, + "learning_rate": 1.344990829714859e-05, + "loss": 0.108, + "step": 42805 + }, + { + "epoch": 2.0, + "learning_rate": 1.3449124512093806e-05, + "loss": 0.0828, + "step": 42810 + }, + { + "epoch": 2.0, + "learning_rate": 1.3448340727039018e-05, + "loss": 0.0873, + "step": 42815 + }, + { + "epoch": 2.0, + "learning_rate": 1.3447556941984232e-05, + "loss": 0.1054, + "step": 42820 + }, + { + "epoch": 2.0, + "learning_rate": 1.3446773156929444e-05, + "loss": 0.2872, + "step": 42825 + }, + { + "epoch": 2.0, + "learning_rate": 1.344598937187466e-05, + "loss": 0.2569, + "step": 42830 + }, + { + "epoch": 2.0, + "learning_rate": 1.3445205586819872e-05, + "loss": 0.2157, + "step": 42835 + }, + { + "epoch": 2.0, + "learning_rate": 1.3444421801765084e-05, + "loss": 0.074, + "step": 42840 + }, + { + "epoch": 2.0, + "learning_rate": 1.3443638016710298e-05, + "loss": 0.1238, + "step": 42845 + }, + { + "epoch": 2.0, + "learning_rate": 1.344285423165551e-05, + "loss": 0.1821, + "step": 42850 + }, + { + "epoch": 2.0, + "learning_rate": 1.3442070446600726e-05, + "loss": 0.0898, + "step": 42855 + }, + { + "epoch": 2.0, + "learning_rate": 1.3441286661545938e-05, + "loss": 0.269, + "step": 42860 + }, + { + "epoch": 2.0, + "eval_cer": 0.015201216458283512, + "eval_loss": 0.34811559319496155, + "eval_runtime": 472.4357, + "eval_samples_per_second": 40.323, + "eval_steps_per_second": 5.042, + "eval_wer": 0.12934863064396743, + "step": 42862 + }, + { + "epoch": 2.0, + "learning_rate": 1.3440502876491152e-05, + "loss": 0.3587, + "step": 42865 + }, + { + "epoch": 2.0, + "learning_rate": 1.3439719091436364e-05, + "loss": 0.1501, + "step": 42870 + }, + { + "epoch": 2.0, + "learning_rate": 1.343893530638158e-05, + "loss": 0.0647, + "step": 42875 + }, + { + "epoch": 2.0, + "learning_rate": 1.3438151521326792e-05, + "loss": 0.049, + "step": 42880 + }, + { + "epoch": 2.0, + "learning_rate": 1.3437367736272006e-05, + "loss": 0.0998, + "step": 42885 + }, + { + "epoch": 2.0, + "learning_rate": 1.3436583951217218e-05, + "loss": 0.0791, + "step": 42890 + }, + { + "epoch": 2.0, + "learning_rate": 1.3435800166162434e-05, + "loss": 0.2884, + "step": 42895 + }, + { + "epoch": 2.0, + "learning_rate": 1.3435016381107646e-05, + "loss": 0.1765, + "step": 42900 + }, + { + "epoch": 2.0, + "learning_rate": 1.3434232596052858e-05, + "loss": 0.1953, + "step": 42905 + }, + { + "epoch": 2.0, + "learning_rate": 1.3433448810998074e-05, + "loss": 0.2493, + "step": 42910 + }, + { + "epoch": 2.0, + "learning_rate": 1.3432665025943286e-05, + "loss": 0.2425, + "step": 42915 + }, + { + "epoch": 2.0, + "learning_rate": 1.34318812408885e-05, + "loss": 0.0484, + "step": 42920 + }, + { + "epoch": 2.0, + "learning_rate": 1.3431097455833712e-05, + "loss": 0.0687, + "step": 42925 + }, + { + "epoch": 2.0, + "learning_rate": 1.3430313670778928e-05, + "loss": 0.0949, + "step": 42930 + }, + { + "epoch": 2.0, + "learning_rate": 1.342952988572414e-05, + "loss": 0.0677, + "step": 42935 + }, + { + "epoch": 2.0, + "learning_rate": 1.3428746100669354e-05, + "loss": 0.1255, + "step": 42940 + }, + { + "epoch": 2.0, + "learning_rate": 1.3427962315614566e-05, + "loss": 0.1747, + "step": 42945 + }, + { + "epoch": 2.0, + "learning_rate": 1.3427178530559782e-05, + "loss": 0.1615, + "step": 42950 + }, + { + "epoch": 2.0, + "learning_rate": 1.3426394745504994e-05, + "loss": 0.2272, + "step": 42955 + }, + { + "epoch": 2.0, + "learning_rate": 1.3425610960450208e-05, + "loss": 0.4022, + "step": 42960 + }, + { + "epoch": 2.0, + "learning_rate": 1.342482717539542e-05, + "loss": 0.3077, + "step": 42965 + }, + { + "epoch": 2.01, + "learning_rate": 1.3424043390340632e-05, + "loss": 0.0336, + "step": 42970 + }, + { + "epoch": 2.01, + "learning_rate": 1.3423259605285848e-05, + "loss": 0.0245, + "step": 42975 + }, + { + "epoch": 2.01, + "learning_rate": 1.342247582023106e-05, + "loss": 0.0466, + "step": 42980 + }, + { + "epoch": 2.01, + "learning_rate": 1.3421692035176274e-05, + "loss": 0.049, + "step": 42985 + }, + { + "epoch": 2.01, + "learning_rate": 1.3420908250121486e-05, + "loss": 0.081, + "step": 42990 + }, + { + "epoch": 2.01, + "learning_rate": 1.3420124465066702e-05, + "loss": 0.2433, + "step": 42995 + }, + { + "epoch": 2.01, + "learning_rate": 1.3419340680011914e-05, + "loss": 0.0825, + "step": 43000 + }, + { + "epoch": 2.01, + "learning_rate": 1.3418556894957128e-05, + "loss": 0.176, + "step": 43005 + }, + { + "epoch": 2.01, + "learning_rate": 1.3417773109902342e-05, + "loss": 0.3964, + "step": 43010 + }, + { + "epoch": 2.01, + "learning_rate": 1.3416989324847556e-05, + "loss": 0.2797, + "step": 43015 + }, + { + "epoch": 2.01, + "learning_rate": 1.3416205539792768e-05, + "loss": 0.0541, + "step": 43020 + }, + { + "epoch": 2.01, + "learning_rate": 1.3415421754737984e-05, + "loss": 0.1003, + "step": 43025 + }, + { + "epoch": 2.01, + "learning_rate": 1.3414637969683196e-05, + "loss": 0.0925, + "step": 43030 + }, + { + "epoch": 2.01, + "learning_rate": 1.3413854184628408e-05, + "loss": 0.0643, + "step": 43035 + }, + { + "epoch": 2.01, + "learning_rate": 1.3413070399573622e-05, + "loss": 0.0862, + "step": 43040 + }, + { + "epoch": 2.01, + "learning_rate": 1.3412286614518834e-05, + "loss": 0.1016, + "step": 43045 + }, + { + "epoch": 2.01, + "learning_rate": 1.341150282946405e-05, + "loss": 0.1259, + "step": 43050 + }, + { + "epoch": 2.01, + "learning_rate": 1.3410719044409262e-05, + "loss": 0.1814, + "step": 43055 + }, + { + "epoch": 2.01, + "learning_rate": 1.3409935259354476e-05, + "loss": 0.2568, + "step": 43060 + }, + { + "epoch": 2.01, + "learning_rate": 1.3409151474299688e-05, + "loss": 0.3472, + "step": 43065 + }, + { + "epoch": 2.01, + "learning_rate": 1.3408367689244904e-05, + "loss": 0.0966, + "step": 43070 + }, + { + "epoch": 2.01, + "learning_rate": 1.3407583904190116e-05, + "loss": 0.0378, + "step": 43075 + }, + { + "epoch": 2.01, + "learning_rate": 1.340680011913533e-05, + "loss": 0.066, + "step": 43080 + }, + { + "epoch": 2.01, + "learning_rate": 1.3406016334080542e-05, + "loss": 0.1326, + "step": 43085 + }, + { + "epoch": 2.01, + "learning_rate": 1.3405232549025758e-05, + "loss": 0.0976, + "step": 43090 + }, + { + "epoch": 2.01, + "learning_rate": 1.340444876397097e-05, + "loss": 0.1264, + "step": 43095 + }, + { + "epoch": 2.01, + "learning_rate": 1.3403664978916182e-05, + "loss": 0.1619, + "step": 43100 + }, + { + "epoch": 2.01, + "learning_rate": 1.3402881193861396e-05, + "loss": 0.2066, + "step": 43105 + }, + { + "epoch": 2.01, + "learning_rate": 1.340209740880661e-05, + "loss": 0.2903, + "step": 43110 + }, + { + "epoch": 2.01, + "learning_rate": 1.3401313623751824e-05, + "loss": 0.2423, + "step": 43115 + }, + { + "epoch": 2.01, + "learning_rate": 1.3400529838697036e-05, + "loss": 0.0823, + "step": 43120 + }, + { + "epoch": 2.01, + "learning_rate": 1.3399746053642252e-05, + "loss": 0.0537, + "step": 43125 + }, + { + "epoch": 2.01, + "learning_rate": 1.3398962268587464e-05, + "loss": 0.061, + "step": 43130 + }, + { + "epoch": 2.01, + "learning_rate": 1.3398178483532678e-05, + "loss": 0.1014, + "step": 43135 + }, + { + "epoch": 2.01, + "learning_rate": 1.339739469847789e-05, + "loss": 0.1187, + "step": 43140 + }, + { + "epoch": 2.01, + "learning_rate": 1.3396610913423106e-05, + "loss": 0.0802, + "step": 43145 + }, + { + "epoch": 2.01, + "learning_rate": 1.3395827128368318e-05, + "loss": 0.1449, + "step": 43150 + }, + { + "epoch": 2.01, + "learning_rate": 1.3395043343313532e-05, + "loss": 0.2398, + "step": 43155 + }, + { + "epoch": 2.01, + "learning_rate": 1.3394259558258744e-05, + "loss": 0.1875, + "step": 43160 + }, + { + "epoch": 2.01, + "learning_rate": 1.3393475773203956e-05, + "loss": 0.3293, + "step": 43165 + }, + { + "epoch": 2.01, + "learning_rate": 1.3392691988149172e-05, + "loss": 0.0328, + "step": 43170 + }, + { + "epoch": 2.01, + "learning_rate": 1.3391908203094384e-05, + "loss": 0.0352, + "step": 43175 + }, + { + "epoch": 2.01, + "learning_rate": 1.3391124418039598e-05, + "loss": 0.0326, + "step": 43180 + }, + { + "epoch": 2.02, + "learning_rate": 1.339034063298481e-05, + "loss": 0.1078, + "step": 43185 + }, + { + "epoch": 2.02, + "learning_rate": 1.3389556847930026e-05, + "loss": 0.0485, + "step": 43190 + }, + { + "epoch": 2.02, + "learning_rate": 1.3388773062875238e-05, + "loss": 0.1721, + "step": 43195 + }, + { + "epoch": 2.02, + "learning_rate": 1.3387989277820452e-05, + "loss": 0.2037, + "step": 43200 + }, + { + "epoch": 2.02, + "learning_rate": 1.3387205492765664e-05, + "loss": 0.1506, + "step": 43205 + }, + { + "epoch": 2.02, + "learning_rate": 1.338642170771088e-05, + "loss": 0.2595, + "step": 43210 + }, + { + "epoch": 2.02, + "learning_rate": 1.3385637922656092e-05, + "loss": 0.2567, + "step": 43215 + }, + { + "epoch": 2.02, + "learning_rate": 1.3384854137601306e-05, + "loss": 0.0354, + "step": 43220 + }, + { + "epoch": 2.02, + "learning_rate": 1.338407035254652e-05, + "loss": 0.0445, + "step": 43225 + }, + { + "epoch": 2.02, + "learning_rate": 1.3383286567491732e-05, + "loss": 0.1386, + "step": 43230 + }, + { + "epoch": 2.02, + "learning_rate": 1.3382502782436946e-05, + "loss": 0.1286, + "step": 43235 + }, + { + "epoch": 2.02, + "learning_rate": 1.3381718997382158e-05, + "loss": 0.1383, + "step": 43240 + }, + { + "epoch": 2.02, + "learning_rate": 1.3380935212327373e-05, + "loss": 0.164, + "step": 43245 + }, + { + "epoch": 2.02, + "learning_rate": 1.3380151427272586e-05, + "loss": 0.1469, + "step": 43250 + }, + { + "epoch": 2.02, + "learning_rate": 1.33793676422178e-05, + "loss": 0.1615, + "step": 43255 + }, + { + "epoch": 2.02, + "learning_rate": 1.3378583857163012e-05, + "loss": 0.2817, + "step": 43260 + }, + { + "epoch": 2.02, + "learning_rate": 1.3377800072108227e-05, + "loss": 0.3087, + "step": 43265 + }, + { + "epoch": 2.02, + "learning_rate": 1.337701628705344e-05, + "loss": 0.0857, + "step": 43270 + }, + { + "epoch": 2.02, + "learning_rate": 1.3376232501998654e-05, + "loss": 0.0569, + "step": 43275 + }, + { + "epoch": 2.02, + "learning_rate": 1.3375448716943866e-05, + "loss": 0.0329, + "step": 43280 + }, + { + "epoch": 2.02, + "learning_rate": 1.3374664931889081e-05, + "loss": 0.0912, + "step": 43285 + }, + { + "epoch": 2.02, + "learning_rate": 1.3373881146834294e-05, + "loss": 0.0769, + "step": 43290 + }, + { + "epoch": 2.02, + "learning_rate": 1.3373097361779506e-05, + "loss": 0.0714, + "step": 43295 + }, + { + "epoch": 2.02, + "learning_rate": 1.337231357672472e-05, + "loss": 0.1705, + "step": 43300 + }, + { + "epoch": 2.02, + "learning_rate": 1.3371529791669932e-05, + "loss": 0.3426, + "step": 43305 + }, + { + "epoch": 2.02, + "learning_rate": 1.3370746006615147e-05, + "loss": 0.3034, + "step": 43310 + }, + { + "epoch": 2.02, + "learning_rate": 1.336996222156036e-05, + "loss": 0.2828, + "step": 43315 + }, + { + "epoch": 2.02, + "learning_rate": 1.3369178436505574e-05, + "loss": 0.0257, + "step": 43320 + }, + { + "epoch": 2.02, + "learning_rate": 1.3368394651450788e-05, + "loss": 0.0729, + "step": 43325 + }, + { + "epoch": 2.02, + "learning_rate": 1.3367610866396001e-05, + "loss": 0.132, + "step": 43330 + }, + { + "epoch": 2.02, + "learning_rate": 1.3366827081341214e-05, + "loss": 0.1212, + "step": 43335 + }, + { + "epoch": 2.02, + "learning_rate": 1.336604329628643e-05, + "loss": 0.1476, + "step": 43340 + }, + { + "epoch": 2.02, + "learning_rate": 1.3365259511231641e-05, + "loss": 0.1749, + "step": 43345 + }, + { + "epoch": 2.02, + "learning_rate": 1.3364475726176855e-05, + "loss": 0.2094, + "step": 43350 + }, + { + "epoch": 2.02, + "learning_rate": 1.3363691941122068e-05, + "loss": 0.1269, + "step": 43355 + }, + { + "epoch": 2.02, + "learning_rate": 1.336290815606728e-05, + "loss": 0.397, + "step": 43360 + }, + { + "epoch": 2.02, + "learning_rate": 1.3362124371012495e-05, + "loss": 0.3699, + "step": 43365 + }, + { + "epoch": 2.02, + "learning_rate": 1.3361340585957708e-05, + "loss": 0.0477, + "step": 43370 + }, + { + "epoch": 2.02, + "learning_rate": 1.3360556800902921e-05, + "loss": 0.1156, + "step": 43375 + }, + { + "epoch": 2.02, + "learning_rate": 1.3359773015848134e-05, + "loss": 0.0311, + "step": 43380 + }, + { + "epoch": 2.02, + "learning_rate": 1.335898923079335e-05, + "loss": 0.3633, + "step": 43385 + }, + { + "epoch": 2.02, + "learning_rate": 1.3358205445738562e-05, + "loss": 0.1132, + "step": 43390 + }, + { + "epoch": 2.02, + "learning_rate": 1.3357421660683775e-05, + "loss": 0.0839, + "step": 43395 + }, + { + "epoch": 2.03, + "learning_rate": 1.3356637875628988e-05, + "loss": 0.1844, + "step": 43400 + }, + { + "epoch": 2.03, + "learning_rate": 1.3355854090574203e-05, + "loss": 0.2091, + "step": 43405 + }, + { + "epoch": 2.03, + "learning_rate": 1.3355070305519415e-05, + "loss": 0.2798, + "step": 43410 + }, + { + "epoch": 2.03, + "learning_rate": 1.335428652046463e-05, + "loss": 0.2263, + "step": 43415 + }, + { + "epoch": 2.03, + "learning_rate": 1.3353502735409842e-05, + "loss": 0.0277, + "step": 43420 + }, + { + "epoch": 2.03, + "learning_rate": 1.3352718950355055e-05, + "loss": 0.0383, + "step": 43425 + }, + { + "epoch": 2.03, + "learning_rate": 1.335193516530027e-05, + "loss": 0.0466, + "step": 43430 + }, + { + "epoch": 2.03, + "learning_rate": 1.3351151380245482e-05, + "loss": 0.0454, + "step": 43435 + }, + { + "epoch": 2.03, + "learning_rate": 1.3350367595190697e-05, + "loss": 0.1619, + "step": 43440 + }, + { + "epoch": 2.03, + "learning_rate": 1.3349740567146868e-05, + "loss": 0.0808, + "step": 43445 + }, + { + "epoch": 2.03, + "learning_rate": 1.334895678209208e-05, + "loss": 0.1816, + "step": 43450 + }, + { + "epoch": 2.03, + "learning_rate": 1.3348172997037294e-05, + "loss": 0.2489, + "step": 43455 + }, + { + "epoch": 2.03, + "learning_rate": 1.3347389211982506e-05, + "loss": 0.2021, + "step": 43460 + }, + { + "epoch": 2.03, + "learning_rate": 1.3346762183938678e-05, + "loss": 0.3291, + "step": 43465 + }, + { + "epoch": 2.03, + "learning_rate": 1.3345978398883892e-05, + "loss": 0.0624, + "step": 43470 + }, + { + "epoch": 2.03, + "learning_rate": 1.3345194613829104e-05, + "loss": 0.0649, + "step": 43475 + }, + { + "epoch": 2.03, + "learning_rate": 1.334441082877432e-05, + "loss": 0.093, + "step": 43480 + }, + { + "epoch": 2.03, + "learning_rate": 1.3343627043719532e-05, + "loss": 0.124, + "step": 43485 + }, + { + "epoch": 2.03, + "learning_rate": 1.3342843258664744e-05, + "loss": 0.0791, + "step": 43490 + }, + { + "epoch": 2.03, + "learning_rate": 1.3342059473609958e-05, + "loss": 0.1554, + "step": 43495 + }, + { + "epoch": 2.03, + "learning_rate": 1.334127568855517e-05, + "loss": 0.1531, + "step": 43500 + }, + { + "epoch": 2.03, + "learning_rate": 1.3340491903500386e-05, + "loss": 0.1726, + "step": 43505 + }, + { + "epoch": 2.03, + "learning_rate": 1.3339708118445598e-05, + "loss": 0.3568, + "step": 43510 + }, + { + "epoch": 2.03, + "learning_rate": 1.3338924333390812e-05, + "loss": 0.3171, + "step": 43515 + }, + { + "epoch": 2.03, + "learning_rate": 1.3338140548336024e-05, + "loss": 0.0196, + "step": 43520 + }, + { + "epoch": 2.03, + "learning_rate": 1.333735676328124e-05, + "loss": 0.0832, + "step": 43525 + }, + { + "epoch": 2.03, + "learning_rate": 1.3336572978226452e-05, + "loss": 0.0747, + "step": 43530 + }, + { + "epoch": 2.03, + "learning_rate": 1.3335789193171666e-05, + "loss": 0.0519, + "step": 43535 + }, + { + "epoch": 2.03, + "learning_rate": 1.3335005408116878e-05, + "loss": 0.1477, + "step": 43540 + }, + { + "epoch": 2.03, + "learning_rate": 1.3334221623062094e-05, + "loss": 0.231, + "step": 43545 + }, + { + "epoch": 2.03, + "learning_rate": 1.3333437838007306e-05, + "loss": 0.2418, + "step": 43550 + }, + { + "epoch": 2.03, + "learning_rate": 1.3332654052952518e-05, + "loss": 0.2191, + "step": 43555 + }, + { + "epoch": 2.03, + "learning_rate": 1.3331870267897732e-05, + "loss": 0.396, + "step": 43560 + }, + { + "epoch": 2.03, + "learning_rate": 1.3331086482842946e-05, + "loss": 0.3743, + "step": 43565 + }, + { + "epoch": 2.03, + "learning_rate": 1.333030269778816e-05, + "loss": 0.0503, + "step": 43570 + }, + { + "epoch": 2.03, + "learning_rate": 1.3329518912733372e-05, + "loss": 0.0244, + "step": 43575 + }, + { + "epoch": 2.03, + "learning_rate": 1.3328735127678588e-05, + "loss": 0.0543, + "step": 43580 + }, + { + "epoch": 2.03, + "learning_rate": 1.33279513426238e-05, + "loss": 0.0519, + "step": 43585 + }, + { + "epoch": 2.03, + "learning_rate": 1.3327167557569014e-05, + "loss": 0.0916, + "step": 43590 + }, + { + "epoch": 2.03, + "learning_rate": 1.3326383772514226e-05, + "loss": 0.167, + "step": 43595 + }, + { + "epoch": 2.03, + "learning_rate": 1.3325599987459442e-05, + "loss": 0.1703, + "step": 43600 + }, + { + "epoch": 2.03, + "learning_rate": 1.3324816202404654e-05, + "loss": 0.1012, + "step": 43605 + }, + { + "epoch": 2.03, + "learning_rate": 1.3324032417349868e-05, + "loss": 0.1885, + "step": 43610 + }, + { + "epoch": 2.04, + "learning_rate": 1.332324863229508e-05, + "loss": 0.2534, + "step": 43615 + }, + { + "epoch": 2.04, + "learning_rate": 1.3322464847240292e-05, + "loss": 0.0711, + "step": 43620 + }, + { + "epoch": 2.04, + "learning_rate": 1.3321681062185508e-05, + "loss": 0.0885, + "step": 43625 + }, + { + "epoch": 2.04, + "learning_rate": 1.332089727713072e-05, + "loss": 0.0919, + "step": 43630 + }, + { + "epoch": 2.04, + "learning_rate": 1.3320113492075934e-05, + "loss": 0.0972, + "step": 43635 + }, + { + "epoch": 2.04, + "learning_rate": 1.3319329707021146e-05, + "loss": 0.0946, + "step": 43640 + }, + { + "epoch": 2.04, + "learning_rate": 1.3318545921966362e-05, + "loss": 0.0836, + "step": 43645 + }, + { + "epoch": 2.04, + "learning_rate": 1.3317762136911574e-05, + "loss": 0.1282, + "step": 43650 + }, + { + "epoch": 2.04, + "learning_rate": 1.3316978351856788e-05, + "loss": 0.1321, + "step": 43655 + }, + { + "epoch": 2.04, + "learning_rate": 1.3316194566802002e-05, + "loss": 0.266, + "step": 43660 + }, + { + "epoch": 2.04, + "learning_rate": 1.3315410781747216e-05, + "loss": 0.3727, + "step": 43665 + }, + { + "epoch": 2.04, + "learning_rate": 1.3314626996692428e-05, + "loss": 0.0841, + "step": 43670 + }, + { + "epoch": 2.04, + "learning_rate": 1.3313843211637642e-05, + "loss": 0.0567, + "step": 43675 + }, + { + "epoch": 2.04, + "learning_rate": 1.3313059426582856e-05, + "loss": 0.0792, + "step": 43680 + }, + { + "epoch": 2.04, + "learning_rate": 1.3312275641528068e-05, + "loss": 0.0857, + "step": 43685 + }, + { + "epoch": 2.04, + "learning_rate": 1.3311491856473282e-05, + "loss": 0.0897, + "step": 43690 + }, + { + "epoch": 2.04, + "learning_rate": 1.3310708071418494e-05, + "loss": 0.1323, + "step": 43695 + }, + { + "epoch": 2.04, + "learning_rate": 1.330992428636371e-05, + "loss": 0.3455, + "step": 43700 + }, + { + "epoch": 2.04, + "learning_rate": 1.3309140501308922e-05, + "loss": 0.1129, + "step": 43705 + }, + { + "epoch": 2.04, + "learning_rate": 1.3308356716254136e-05, + "loss": 0.2136, + "step": 43710 + }, + { + "epoch": 2.04, + "learning_rate": 1.3307572931199348e-05, + "loss": 0.3236, + "step": 43715 + }, + { + "epoch": 2.04, + "learning_rate": 1.3306789146144564e-05, + "loss": 0.0699, + "step": 43720 + }, + { + "epoch": 2.04, + "learning_rate": 1.3306005361089776e-05, + "loss": 0.063, + "step": 43725 + }, + { + "epoch": 2.04, + "learning_rate": 1.330522157603499e-05, + "loss": 0.0609, + "step": 43730 + }, + { + "epoch": 2.04, + "learning_rate": 1.3304437790980202e-05, + "loss": 0.0902, + "step": 43735 + }, + { + "epoch": 2.04, + "learning_rate": 1.3303654005925417e-05, + "loss": 0.0967, + "step": 43740 + }, + { + "epoch": 2.04, + "learning_rate": 1.330287022087063e-05, + "loss": 0.1716, + "step": 43745 + }, + { + "epoch": 2.04, + "learning_rate": 1.3302086435815842e-05, + "loss": 0.117, + "step": 43750 + }, + { + "epoch": 2.04, + "learning_rate": 1.3301302650761056e-05, + "loss": 0.2584, + "step": 43755 + }, + { + "epoch": 2.04, + "learning_rate": 1.330051886570627e-05, + "loss": 0.3534, + "step": 43760 + }, + { + "epoch": 2.04, + "learning_rate": 1.3299735080651484e-05, + "loss": 0.2967, + "step": 43765 + }, + { + "epoch": 2.04, + "learning_rate": 1.3298951295596696e-05, + "loss": 0.0437, + "step": 43770 + }, + { + "epoch": 2.04, + "learning_rate": 1.329816751054191e-05, + "loss": 0.0362, + "step": 43775 + }, + { + "epoch": 2.04, + "learning_rate": 1.3297383725487124e-05, + "loss": 0.0681, + "step": 43780 + }, + { + "epoch": 2.04, + "learning_rate": 1.3296599940432338e-05, + "loss": 0.0636, + "step": 43785 + }, + { + "epoch": 2.04, + "learning_rate": 1.329581615537755e-05, + "loss": 0.1009, + "step": 43790 + }, + { + "epoch": 2.04, + "learning_rate": 1.3295032370322765e-05, + "loss": 0.1697, + "step": 43795 + }, + { + "epoch": 2.04, + "learning_rate": 1.3294248585267978e-05, + "loss": 0.1545, + "step": 43800 + }, + { + "epoch": 2.04, + "learning_rate": 1.3293464800213191e-05, + "loss": 0.1749, + "step": 43805 + }, + { + "epoch": 2.04, + "learning_rate": 1.3292681015158404e-05, + "loss": 0.2246, + "step": 43810 + }, + { + "epoch": 2.04, + "learning_rate": 1.3291897230103616e-05, + "loss": 0.2266, + "step": 43815 + }, + { + "epoch": 2.04, + "learning_rate": 1.3291113445048832e-05, + "loss": 0.0738, + "step": 43820 + }, + { + "epoch": 2.04, + "learning_rate": 1.3290329659994044e-05, + "loss": 0.0333, + "step": 43825 + }, + { + "epoch": 2.05, + "learning_rate": 1.3289545874939258e-05, + "loss": 0.0439, + "step": 43830 + }, + { + "epoch": 2.05, + "learning_rate": 1.328876208988447e-05, + "loss": 0.0997, + "step": 43835 + }, + { + "epoch": 2.05, + "learning_rate": 1.3287978304829685e-05, + "loss": 0.0818, + "step": 43840 + }, + { + "epoch": 2.05, + "learning_rate": 1.3287194519774898e-05, + "loss": 0.0876, + "step": 43845 + }, + { + "epoch": 2.05, + "learning_rate": 1.3286410734720112e-05, + "loss": 0.1397, + "step": 43850 + }, + { + "epoch": 2.05, + "learning_rate": 1.3285626949665324e-05, + "loss": 0.1035, + "step": 43855 + }, + { + "epoch": 2.05, + "learning_rate": 1.328484316461054e-05, + "loss": 0.3954, + "step": 43860 + }, + { + "epoch": 2.05, + "learning_rate": 1.3284059379555752e-05, + "loss": 0.4025, + "step": 43865 + }, + { + "epoch": 2.05, + "learning_rate": 1.3283275594500965e-05, + "loss": 0.0292, + "step": 43870 + }, + { + "epoch": 2.05, + "learning_rate": 1.328249180944618e-05, + "loss": 0.091, + "step": 43875 + }, + { + "epoch": 2.05, + "learning_rate": 1.3281708024391392e-05, + "loss": 0.0692, + "step": 43880 + }, + { + "epoch": 2.05, + "learning_rate": 1.3280924239336606e-05, + "loss": 0.0826, + "step": 43885 + }, + { + "epoch": 2.05, + "learning_rate": 1.3280140454281818e-05, + "loss": 0.1142, + "step": 43890 + }, + { + "epoch": 2.05, + "learning_rate": 1.3279356669227033e-05, + "loss": 0.1234, + "step": 43895 + }, + { + "epoch": 2.05, + "learning_rate": 1.3278572884172246e-05, + "loss": 0.1834, + "step": 43900 + }, + { + "epoch": 2.05, + "learning_rate": 1.327778909911746e-05, + "loss": 0.227, + "step": 43905 + }, + { + "epoch": 2.05, + "learning_rate": 1.3277005314062672e-05, + "loss": 0.336, + "step": 43910 + }, + { + "epoch": 2.05, + "learning_rate": 1.3276221529007887e-05, + "loss": 0.2137, + "step": 43915 + }, + { + "epoch": 2.05, + "learning_rate": 1.32754377439531e-05, + "loss": 0.08, + "step": 43920 + }, + { + "epoch": 2.05, + "learning_rate": 1.3274653958898313e-05, + "loss": 0.0813, + "step": 43925 + }, + { + "epoch": 2.05, + "learning_rate": 1.3273870173843526e-05, + "loss": 0.0463, + "step": 43930 + }, + { + "epoch": 2.05, + "learning_rate": 1.3273086388788741e-05, + "loss": 0.0811, + "step": 43935 + }, + { + "epoch": 2.05, + "learning_rate": 1.3272302603733953e-05, + "loss": 0.0861, + "step": 43940 + }, + { + "epoch": 2.05, + "learning_rate": 1.3271518818679166e-05, + "loss": 0.1037, + "step": 43945 + }, + { + "epoch": 2.05, + "learning_rate": 1.327073503362438e-05, + "loss": 0.2025, + "step": 43950 + }, + { + "epoch": 2.05, + "learning_rate": 1.3269951248569592e-05, + "loss": 0.2942, + "step": 43955 + }, + { + "epoch": 2.05, + "learning_rate": 1.3269167463514807e-05, + "loss": 0.1693, + "step": 43960 + }, + { + "epoch": 2.05, + "learning_rate": 1.326838367846002e-05, + "loss": 0.269, + "step": 43965 + }, + { + "epoch": 2.05, + "learning_rate": 1.3267599893405233e-05, + "loss": 0.1356, + "step": 43970 + }, + { + "epoch": 2.05, + "learning_rate": 1.3266816108350447e-05, + "loss": 0.016, + "step": 43975 + }, + { + "epoch": 2.05, + "learning_rate": 1.3266032323295661e-05, + "loss": 0.0608, + "step": 43980 + }, + { + "epoch": 2.05, + "learning_rate": 1.3265248538240873e-05, + "loss": 0.0614, + "step": 43985 + }, + { + "epoch": 2.05, + "learning_rate": 1.3264464753186087e-05, + "loss": 0.0645, + "step": 43990 + }, + { + "epoch": 2.05, + "learning_rate": 1.3263680968131301e-05, + "loss": 0.1768, + "step": 43995 + }, + { + "epoch": 2.05, + "learning_rate": 1.3262897183076515e-05, + "loss": 0.0816, + "step": 44000 + }, + { + "epoch": 2.05, + "learning_rate": 1.3262113398021727e-05, + "loss": 0.121, + "step": 44005 + }, + { + "epoch": 2.05, + "learning_rate": 1.326132961296694e-05, + "loss": 0.1856, + "step": 44010 + }, + { + "epoch": 2.05, + "learning_rate": 1.3260545827912155e-05, + "loss": 0.2582, + "step": 44015 + }, + { + "epoch": 2.05, + "learning_rate": 1.3259762042857367e-05, + "loss": 0.0342, + "step": 44020 + }, + { + "epoch": 2.05, + "learning_rate": 1.3258978257802581e-05, + "loss": 0.0482, + "step": 44025 + }, + { + "epoch": 2.05, + "learning_rate": 1.3258194472747794e-05, + "loss": 0.066, + "step": 44030 + }, + { + "epoch": 2.05, + "learning_rate": 1.3257410687693009e-05, + "loss": 0.1002, + "step": 44035 + }, + { + "epoch": 2.05, + "learning_rate": 1.3256626902638221e-05, + "loss": 0.0774, + "step": 44040 + }, + { + "epoch": 2.06, + "learning_rate": 1.3255843117583435e-05, + "loss": 0.2158, + "step": 44045 + }, + { + "epoch": 2.06, + "learning_rate": 1.3255059332528647e-05, + "loss": 0.1574, + "step": 44050 + }, + { + "epoch": 2.06, + "learning_rate": 1.3254275547473863e-05, + "loss": 0.2037, + "step": 44055 + }, + { + "epoch": 2.06, + "learning_rate": 1.3253491762419075e-05, + "loss": 0.3914, + "step": 44060 + }, + { + "epoch": 2.06, + "learning_rate": 1.325270797736429e-05, + "loss": 0.299, + "step": 44065 + }, + { + "epoch": 2.06, + "learning_rate": 1.3251924192309501e-05, + "loss": 0.0783, + "step": 44070 + }, + { + "epoch": 2.06, + "learning_rate": 1.3251140407254715e-05, + "loss": 0.0358, + "step": 44075 + }, + { + "epoch": 2.06, + "learning_rate": 1.325035662219993e-05, + "loss": 0.0511, + "step": 44080 + }, + { + "epoch": 2.06, + "learning_rate": 1.3249572837145141e-05, + "loss": 0.0737, + "step": 44085 + }, + { + "epoch": 2.06, + "learning_rate": 1.3248789052090355e-05, + "loss": 0.0319, + "step": 44090 + }, + { + "epoch": 2.06, + "learning_rate": 1.324800526703557e-05, + "loss": 0.0438, + "step": 44095 + }, + { + "epoch": 2.06, + "learning_rate": 1.3247221481980783e-05, + "loss": 0.1289, + "step": 44100 + }, + { + "epoch": 2.06, + "learning_rate": 1.3246437696925995e-05, + "loss": 0.2054, + "step": 44105 + }, + { + "epoch": 2.06, + "learning_rate": 1.3245653911871211e-05, + "loss": 0.2511, + "step": 44110 + }, + { + "epoch": 2.06, + "learning_rate": 1.3244870126816423e-05, + "loss": 0.3578, + "step": 44115 + }, + { + "epoch": 2.06, + "learning_rate": 1.3244086341761637e-05, + "loss": 0.0513, + "step": 44120 + }, + { + "epoch": 2.06, + "learning_rate": 1.324330255670685e-05, + "loss": 0.0485, + "step": 44125 + }, + { + "epoch": 2.06, + "learning_rate": 1.3242518771652065e-05, + "loss": 0.0457, + "step": 44130 + }, + { + "epoch": 2.06, + "learning_rate": 1.3241734986597277e-05, + "loss": 0.0965, + "step": 44135 + }, + { + "epoch": 2.06, + "learning_rate": 1.324095120154249e-05, + "loss": 0.0919, + "step": 44140 + }, + { + "epoch": 2.06, + "learning_rate": 1.3240167416487703e-05, + "loss": 0.1236, + "step": 44145 + }, + { + "epoch": 2.06, + "learning_rate": 1.3239383631432915e-05, + "loss": 0.1037, + "step": 44150 + }, + { + "epoch": 2.06, + "learning_rate": 1.3238599846378131e-05, + "loss": 0.1805, + "step": 44155 + }, + { + "epoch": 2.06, + "learning_rate": 1.3237816061323343e-05, + "loss": 0.2902, + "step": 44160 + }, + { + "epoch": 2.06, + "learning_rate": 1.3237032276268557e-05, + "loss": 0.2851, + "step": 44165 + }, + { + "epoch": 2.06, + "learning_rate": 1.323624849121377e-05, + "loss": 0.0588, + "step": 44170 + }, + { + "epoch": 2.06, + "learning_rate": 1.3235464706158985e-05, + "loss": 0.1677, + "step": 44175 + }, + { + "epoch": 2.06, + "learning_rate": 1.3234680921104197e-05, + "loss": 0.0701, + "step": 44180 + }, + { + "epoch": 2.06, + "learning_rate": 1.3233897136049411e-05, + "loss": 0.0714, + "step": 44185 + }, + { + "epoch": 2.06, + "learning_rate": 1.3233113350994625e-05, + "loss": 0.0526, + "step": 44190 + }, + { + "epoch": 2.06, + "learning_rate": 1.3232329565939839e-05, + "loss": 0.0875, + "step": 44195 + }, + { + "epoch": 2.06, + "learning_rate": 1.3231545780885051e-05, + "loss": 0.1067, + "step": 44200 + }, + { + "epoch": 2.06, + "learning_rate": 1.3230761995830263e-05, + "loss": 0.1859, + "step": 44205 + }, + { + "epoch": 2.06, + "learning_rate": 1.3229978210775479e-05, + "loss": 0.2862, + "step": 44210 + }, + { + "epoch": 2.06, + "learning_rate": 1.3229194425720691e-05, + "loss": 0.3919, + "step": 44215 + }, + { + "epoch": 2.06, + "learning_rate": 1.3228410640665905e-05, + "loss": 0.0165, + "step": 44220 + }, + { + "epoch": 2.06, + "learning_rate": 1.3227626855611117e-05, + "loss": 0.0282, + "step": 44225 + }, + { + "epoch": 2.06, + "learning_rate": 1.3226843070556333e-05, + "loss": 0.1177, + "step": 44230 + }, + { + "epoch": 2.06, + "learning_rate": 1.3226059285501545e-05, + "loss": 0.0937, + "step": 44235 + }, + { + "epoch": 2.06, + "learning_rate": 1.3225275500446759e-05, + "loss": 0.1239, + "step": 44240 + }, + { + "epoch": 2.06, + "learning_rate": 1.3224491715391971e-05, + "loss": 0.1821, + "step": 44245 + }, + { + "epoch": 2.06, + "learning_rate": 1.3223707930337187e-05, + "loss": 0.205, + "step": 44250 + }, + { + "epoch": 2.06, + "learning_rate": 1.3222924145282399e-05, + "loss": 0.1652, + "step": 44255 + }, + { + "epoch": 2.07, + "learning_rate": 1.3222140360227613e-05, + "loss": 0.1785, + "step": 44260 + }, + { + "epoch": 2.07, + "learning_rate": 1.3221356575172825e-05, + "loss": 0.2491, + "step": 44265 + }, + { + "epoch": 2.07, + "learning_rate": 1.3220572790118037e-05, + "loss": 0.026, + "step": 44270 + }, + { + "epoch": 2.07, + "learning_rate": 1.3219789005063253e-05, + "loss": 0.0235, + "step": 44275 + }, + { + "epoch": 2.07, + "learning_rate": 1.3219005220008465e-05, + "loss": 0.0774, + "step": 44280 + }, + { + "epoch": 2.07, + "learning_rate": 1.3218221434953679e-05, + "loss": 0.08, + "step": 44285 + }, + { + "epoch": 2.07, + "learning_rate": 1.3217437649898893e-05, + "loss": 0.0742, + "step": 44290 + }, + { + "epoch": 2.07, + "learning_rate": 1.3216653864844107e-05, + "loss": 0.1891, + "step": 44295 + }, + { + "epoch": 2.07, + "learning_rate": 1.3215870079789319e-05, + "loss": 0.1566, + "step": 44300 + }, + { + "epoch": 2.07, + "learning_rate": 1.3215086294734533e-05, + "loss": 0.2266, + "step": 44305 + }, + { + "epoch": 2.07, + "learning_rate": 1.3214302509679747e-05, + "loss": 0.2815, + "step": 44310 + }, + { + "epoch": 2.07, + "learning_rate": 1.321351872462496e-05, + "loss": 0.2934, + "step": 44315 + }, + { + "epoch": 2.07, + "learning_rate": 1.3212734939570173e-05, + "loss": 0.0386, + "step": 44320 + }, + { + "epoch": 2.07, + "learning_rate": 1.3211951154515389e-05, + "loss": 0.079, + "step": 44325 + }, + { + "epoch": 2.07, + "learning_rate": 1.32111673694606e-05, + "loss": 0.0483, + "step": 44330 + }, + { + "epoch": 2.07, + "learning_rate": 1.3210383584405813e-05, + "loss": 0.12, + "step": 44335 + }, + { + "epoch": 2.07, + "learning_rate": 1.3209599799351027e-05, + "loss": 0.1331, + "step": 44340 + }, + { + "epoch": 2.07, + "learning_rate": 1.3208816014296239e-05, + "loss": 0.1102, + "step": 44345 + }, + { + "epoch": 2.07, + "learning_rate": 1.3208032229241455e-05, + "loss": 0.116, + "step": 44350 + }, + { + "epoch": 2.07, + "learning_rate": 1.3207248444186667e-05, + "loss": 0.1937, + "step": 44355 + }, + { + "epoch": 2.07, + "learning_rate": 1.320646465913188e-05, + "loss": 0.2692, + "step": 44360 + }, + { + "epoch": 2.07, + "learning_rate": 1.3205837631088051e-05, + "loss": 0.3699, + "step": 44365 + }, + { + "epoch": 2.07, + "learning_rate": 1.3205053846033265e-05, + "loss": 0.0153, + "step": 44370 + }, + { + "epoch": 2.07, + "learning_rate": 1.3204270060978479e-05, + "loss": 0.0688, + "step": 44375 + }, + { + "epoch": 2.07, + "learning_rate": 1.3203486275923691e-05, + "loss": 0.0807, + "step": 44380 + }, + { + "epoch": 2.07, + "learning_rate": 1.3202702490868907e-05, + "loss": 0.0753, + "step": 44385 + }, + { + "epoch": 2.07, + "learning_rate": 1.3201918705814119e-05, + "loss": 0.083, + "step": 44390 + }, + { + "epoch": 2.07, + "learning_rate": 1.3201134920759333e-05, + "loss": 0.165, + "step": 44395 + }, + { + "epoch": 2.07, + "learning_rate": 1.3200351135704545e-05, + "loss": 0.169, + "step": 44400 + }, + { + "epoch": 2.07, + "learning_rate": 1.3199567350649757e-05, + "loss": 0.2277, + "step": 44405 + }, + { + "epoch": 2.07, + "learning_rate": 1.3198783565594973e-05, + "loss": 0.3473, + "step": 44410 + }, + { + "epoch": 2.07, + "learning_rate": 1.3197999780540185e-05, + "loss": 0.2985, + "step": 44415 + }, + { + "epoch": 2.07, + "learning_rate": 1.3197215995485399e-05, + "loss": 0.0606, + "step": 44420 + }, + { + "epoch": 2.07, + "learning_rate": 1.3196432210430611e-05, + "loss": 0.0483, + "step": 44425 + }, + { + "epoch": 2.07, + "learning_rate": 1.3195648425375827e-05, + "loss": 0.0787, + "step": 44430 + }, + { + "epoch": 2.07, + "learning_rate": 1.319486464032104e-05, + "loss": 0.1182, + "step": 44435 + }, + { + "epoch": 2.07, + "learning_rate": 1.3194080855266253e-05, + "loss": 0.152, + "step": 44440 + }, + { + "epoch": 2.07, + "learning_rate": 1.3193297070211465e-05, + "loss": 0.1204, + "step": 44445 + }, + { + "epoch": 2.07, + "learning_rate": 1.3192513285156681e-05, + "loss": 0.0873, + "step": 44450 + }, + { + "epoch": 2.07, + "learning_rate": 1.3191729500101893e-05, + "loss": 0.1337, + "step": 44455 + }, + { + "epoch": 2.07, + "learning_rate": 1.3190945715047107e-05, + "loss": 0.2727, + "step": 44460 + }, + { + "epoch": 2.07, + "learning_rate": 1.319016192999232e-05, + "loss": 0.2811, + "step": 44465 + }, + { + "epoch": 2.08, + "learning_rate": 1.3189378144937533e-05, + "loss": 0.0753, + "step": 44470 + }, + { + "epoch": 2.08, + "learning_rate": 1.3188594359882747e-05, + "loss": 0.0589, + "step": 44475 + }, + { + "epoch": 2.08, + "learning_rate": 1.318781057482796e-05, + "loss": 0.1141, + "step": 44480 + }, + { + "epoch": 2.08, + "learning_rate": 1.3187026789773175e-05, + "loss": 0.0495, + "step": 44485 + }, + { + "epoch": 2.08, + "learning_rate": 1.3186243004718387e-05, + "loss": 0.0716, + "step": 44490 + }, + { + "epoch": 2.08, + "learning_rate": 1.3185459219663601e-05, + "loss": 0.1369, + "step": 44495 + }, + { + "epoch": 2.08, + "learning_rate": 1.3184675434608813e-05, + "loss": 0.1567, + "step": 44500 + }, + { + "epoch": 2.08, + "learning_rate": 1.3183891649554029e-05, + "loss": 0.1988, + "step": 44505 + }, + { + "epoch": 2.08, + "learning_rate": 1.3183107864499241e-05, + "loss": 0.3542, + "step": 44510 + }, + { + "epoch": 2.08, + "learning_rate": 1.3182324079444455e-05, + "loss": 0.3402, + "step": 44515 + }, + { + "epoch": 2.08, + "learning_rate": 1.3181540294389667e-05, + "loss": 0.0784, + "step": 44520 + }, + { + "epoch": 2.08, + "learning_rate": 1.3180756509334883e-05, + "loss": 0.0768, + "step": 44525 + }, + { + "epoch": 2.08, + "learning_rate": 1.3179972724280095e-05, + "loss": 0.0386, + "step": 44530 + }, + { + "epoch": 2.08, + "learning_rate": 1.3179188939225307e-05, + "loss": 0.0284, + "step": 44535 + }, + { + "epoch": 2.08, + "learning_rate": 1.3178405154170521e-05, + "loss": 0.0668, + "step": 44540 + }, + { + "epoch": 2.08, + "learning_rate": 1.3177621369115733e-05, + "loss": 0.1517, + "step": 44545 + }, + { + "epoch": 2.08, + "learning_rate": 1.3176837584060949e-05, + "loss": 0.1934, + "step": 44550 + }, + { + "epoch": 2.08, + "learning_rate": 1.3176053799006161e-05, + "loss": 0.321, + "step": 44555 + }, + { + "epoch": 2.08, + "learning_rate": 1.3175270013951375e-05, + "loss": 0.3407, + "step": 44560 + }, + { + "epoch": 2.08, + "learning_rate": 1.3174486228896587e-05, + "loss": 0.2514, + "step": 44565 + }, + { + "epoch": 2.08, + "learning_rate": 1.3173702443841803e-05, + "loss": 0.041, + "step": 44570 + }, + { + "epoch": 2.08, + "learning_rate": 1.3172918658787015e-05, + "loss": 0.0766, + "step": 44575 + }, + { + "epoch": 2.08, + "learning_rate": 1.3172134873732229e-05, + "loss": 0.0465, + "step": 44580 + }, + { + "epoch": 2.08, + "learning_rate": 1.3171351088677443e-05, + "loss": 0.1087, + "step": 44585 + }, + { + "epoch": 2.08, + "learning_rate": 1.3170567303622657e-05, + "loss": 0.1005, + "step": 44590 + }, + { + "epoch": 2.08, + "learning_rate": 1.3169783518567869e-05, + "loss": 0.1296, + "step": 44595 + }, + { + "epoch": 2.08, + "learning_rate": 1.3168999733513081e-05, + "loss": 0.1509, + "step": 44600 + }, + { + "epoch": 2.08, + "learning_rate": 1.3168215948458297e-05, + "loss": 0.1615, + "step": 44605 + }, + { + "epoch": 2.08, + "learning_rate": 1.3167432163403509e-05, + "loss": 0.1695, + "step": 44610 + }, + { + "epoch": 2.08, + "learning_rate": 1.3166648378348723e-05, + "loss": 0.2675, + "step": 44615 + }, + { + "epoch": 2.08, + "learning_rate": 1.3165864593293935e-05, + "loss": 0.0326, + "step": 44620 + }, + { + "epoch": 2.08, + "learning_rate": 1.316508080823915e-05, + "loss": 0.065, + "step": 44625 + }, + { + "epoch": 2.08, + "learning_rate": 1.3164297023184363e-05, + "loss": 0.0504, + "step": 44630 + }, + { + "epoch": 2.08, + "learning_rate": 1.3163513238129577e-05, + "loss": 0.0747, + "step": 44635 + }, + { + "epoch": 2.08, + "learning_rate": 1.3162729453074789e-05, + "loss": 0.064, + "step": 44640 + }, + { + "epoch": 2.08, + "learning_rate": 1.3161945668020005e-05, + "loss": 0.1414, + "step": 44645 + }, + { + "epoch": 2.08, + "learning_rate": 1.3161161882965217e-05, + "loss": 0.2267, + "step": 44650 + }, + { + "epoch": 2.08, + "learning_rate": 1.316037809791043e-05, + "loss": 0.1493, + "step": 44655 + }, + { + "epoch": 2.08, + "learning_rate": 1.3159594312855643e-05, + "loss": 0.3172, + "step": 44660 + }, + { + "epoch": 2.08, + "learning_rate": 1.3158810527800857e-05, + "loss": 0.2474, + "step": 44665 + }, + { + "epoch": 2.08, + "learning_rate": 1.315802674274607e-05, + "loss": 0.0665, + "step": 44670 + }, + { + "epoch": 2.08, + "learning_rate": 1.3157242957691283e-05, + "loss": 0.0464, + "step": 44675 + }, + { + "epoch": 2.08, + "learning_rate": 1.3156459172636497e-05, + "loss": 0.0733, + "step": 44680 + }, + { + "epoch": 2.09, + "learning_rate": 1.315567538758171e-05, + "loss": 0.1709, + "step": 44685 + }, + { + "epoch": 2.09, + "learning_rate": 1.3154891602526925e-05, + "loss": 0.0666, + "step": 44690 + }, + { + "epoch": 2.09, + "learning_rate": 1.3154107817472137e-05, + "loss": 0.0916, + "step": 44695 + }, + { + "epoch": 2.09, + "learning_rate": 1.3153324032417352e-05, + "loss": 0.0939, + "step": 44700 + }, + { + "epoch": 2.09, + "learning_rate": 1.3152540247362565e-05, + "loss": 0.155, + "step": 44705 + }, + { + "epoch": 2.09, + "learning_rate": 1.3151756462307779e-05, + "loss": 0.2603, + "step": 44710 + }, + { + "epoch": 2.09, + "learning_rate": 1.315097267725299e-05, + "loss": 0.2802, + "step": 44715 + }, + { + "epoch": 2.09, + "learning_rate": 1.3150188892198206e-05, + "loss": 0.0517, + "step": 44720 + }, + { + "epoch": 2.09, + "learning_rate": 1.3149405107143419e-05, + "loss": 0.0623, + "step": 44725 + }, + { + "epoch": 2.09, + "learning_rate": 1.314862132208863e-05, + "loss": 0.0714, + "step": 44730 + }, + { + "epoch": 2.09, + "learning_rate": 1.3147837537033845e-05, + "loss": 0.0585, + "step": 44735 + }, + { + "epoch": 2.09, + "learning_rate": 1.3147053751979057e-05, + "loss": 0.1013, + "step": 44740 + }, + { + "epoch": 2.09, + "learning_rate": 1.3146269966924273e-05, + "loss": 0.0922, + "step": 44745 + }, + { + "epoch": 2.09, + "learning_rate": 1.3145486181869485e-05, + "loss": 0.1085, + "step": 44750 + }, + { + "epoch": 2.09, + "learning_rate": 1.3144702396814699e-05, + "loss": 0.1422, + "step": 44755 + }, + { + "epoch": 2.09, + "learning_rate": 1.3143918611759911e-05, + "loss": 0.2372, + "step": 44760 + }, + { + "epoch": 2.09, + "learning_rate": 1.3143134826705126e-05, + "loss": 0.3727, + "step": 44765 + }, + { + "epoch": 2.09, + "learning_rate": 1.3142351041650339e-05, + "loss": 0.1013, + "step": 44770 + }, + { + "epoch": 2.09, + "learning_rate": 1.3141567256595553e-05, + "loss": 0.0361, + "step": 44775 + }, + { + "epoch": 2.09, + "learning_rate": 1.3140783471540765e-05, + "loss": 0.088, + "step": 44780 + }, + { + "epoch": 2.09, + "learning_rate": 1.313999968648598e-05, + "loss": 0.0731, + "step": 44785 + }, + { + "epoch": 2.09, + "learning_rate": 1.3139215901431193e-05, + "loss": 0.0661, + "step": 44790 + }, + { + "epoch": 2.09, + "learning_rate": 1.3138432116376405e-05, + "loss": 0.1273, + "step": 44795 + }, + { + "epoch": 2.09, + "learning_rate": 1.313764833132162e-05, + "loss": 0.1032, + "step": 44800 + }, + { + "epoch": 2.09, + "learning_rate": 1.3136864546266833e-05, + "loss": 0.2088, + "step": 44805 + }, + { + "epoch": 2.09, + "learning_rate": 1.3136080761212047e-05, + "loss": 0.3149, + "step": 44810 + }, + { + "epoch": 2.09, + "learning_rate": 1.3135296976157259e-05, + "loss": 0.3955, + "step": 44815 + }, + { + "epoch": 2.09, + "learning_rate": 1.3134513191102474e-05, + "loss": 0.0629, + "step": 44820 + }, + { + "epoch": 2.09, + "learning_rate": 1.3133729406047687e-05, + "loss": 0.0434, + "step": 44825 + }, + { + "epoch": 2.09, + "learning_rate": 1.31329456209929e-05, + "loss": 0.1228, + "step": 44830 + }, + { + "epoch": 2.09, + "learning_rate": 1.3132161835938113e-05, + "loss": 0.0403, + "step": 44835 + }, + { + "epoch": 2.09, + "learning_rate": 1.3131378050883328e-05, + "loss": 0.0699, + "step": 44840 + }, + { + "epoch": 2.09, + "learning_rate": 1.313059426582854e-05, + "loss": 0.1441, + "step": 44845 + }, + { + "epoch": 2.09, + "learning_rate": 1.3129810480773754e-05, + "loss": 0.1262, + "step": 44850 + }, + { + "epoch": 2.09, + "learning_rate": 1.3129026695718967e-05, + "loss": 0.1689, + "step": 44855 + }, + { + "epoch": 2.09, + "learning_rate": 1.3128242910664179e-05, + "loss": 0.2219, + "step": 44860 + }, + { + "epoch": 2.09, + "learning_rate": 1.3127459125609394e-05, + "loss": 0.3262, + "step": 44865 + }, + { + "epoch": 2.09, + "learning_rate": 1.3126675340554607e-05, + "loss": 0.0307, + "step": 44870 + }, + { + "epoch": 2.09, + "learning_rate": 1.312589155549982e-05, + "loss": 0.0441, + "step": 44875 + }, + { + "epoch": 2.09, + "learning_rate": 1.3125107770445034e-05, + "loss": 0.0356, + "step": 44880 + }, + { + "epoch": 2.09, + "learning_rate": 1.3124323985390248e-05, + "loss": 0.0829, + "step": 44885 + }, + { + "epoch": 2.09, + "learning_rate": 1.312354020033546e-05, + "loss": 0.0608, + "step": 44890 + }, + { + "epoch": 2.09, + "learning_rate": 1.3122756415280674e-05, + "loss": 0.1429, + "step": 44895 + }, + { + "epoch": 2.1, + "learning_rate": 1.3121972630225888e-05, + "loss": 0.125, + "step": 44900 + }, + { + "epoch": 2.1, + "learning_rate": 1.3121188845171102e-05, + "loss": 0.1752, + "step": 44905 + }, + { + "epoch": 2.1, + "learning_rate": 1.3120405060116314e-05, + "loss": 0.1865, + "step": 44910 + }, + { + "epoch": 2.1, + "learning_rate": 1.311962127506153e-05, + "loss": 0.358, + "step": 44915 + }, + { + "epoch": 2.1, + "learning_rate": 1.3118837490006742e-05, + "loss": 0.0342, + "step": 44920 + }, + { + "epoch": 2.1, + "learning_rate": 1.3118053704951955e-05, + "loss": 0.0759, + "step": 44925 + }, + { + "epoch": 2.1, + "learning_rate": 1.3117269919897168e-05, + "loss": 0.0653, + "step": 44930 + }, + { + "epoch": 2.1, + "learning_rate": 1.311648613484238e-05, + "loss": 0.0713, + "step": 44935 + }, + { + "epoch": 2.1, + "learning_rate": 1.3115702349787596e-05, + "loss": 0.0867, + "step": 44940 + }, + { + "epoch": 2.1, + "learning_rate": 1.3114918564732808e-05, + "loss": 0.0849, + "step": 44945 + }, + { + "epoch": 2.1, + "learning_rate": 1.3114134779678022e-05, + "loss": 0.175, + "step": 44950 + }, + { + "epoch": 2.1, + "learning_rate": 1.3113350994623235e-05, + "loss": 0.1791, + "step": 44955 + }, + { + "epoch": 2.1, + "learning_rate": 1.311256720956845e-05, + "loss": 0.1874, + "step": 44960 + }, + { + "epoch": 2.1, + "learning_rate": 1.3111783424513662e-05, + "loss": 0.2178, + "step": 44965 + }, + { + "epoch": 2.1, + "learning_rate": 1.3110999639458876e-05, + "loss": 0.0445, + "step": 44970 + }, + { + "epoch": 2.1, + "learning_rate": 1.3110215854404088e-05, + "loss": 0.0422, + "step": 44975 + }, + { + "epoch": 2.1, + "learning_rate": 1.3109432069349304e-05, + "loss": 0.0326, + "step": 44980 + }, + { + "epoch": 2.1, + "learning_rate": 1.3108648284294516e-05, + "loss": 0.0743, + "step": 44985 + }, + { + "epoch": 2.1, + "learning_rate": 1.3107864499239729e-05, + "loss": 0.1022, + "step": 44990 + }, + { + "epoch": 2.1, + "learning_rate": 1.3107080714184942e-05, + "loss": 0.1278, + "step": 44995 + }, + { + "epoch": 2.1, + "learning_rate": 1.3106296929130156e-05, + "loss": 0.1631, + "step": 45000 + }, + { + "epoch": 2.1, + "learning_rate": 1.310551314407537e-05, + "loss": 0.1475, + "step": 45005 + }, + { + "epoch": 2.1, + "learning_rate": 1.3104729359020582e-05, + "loss": 0.2213, + "step": 45010 + }, + { + "epoch": 2.1, + "learning_rate": 1.3103945573965798e-05, + "loss": 0.3505, + "step": 45015 + }, + { + "epoch": 2.1, + "learning_rate": 1.310316178891101e-05, + "loss": 0.0454, + "step": 45020 + }, + { + "epoch": 2.1, + "learning_rate": 1.3102378003856224e-05, + "loss": 0.0718, + "step": 45025 + }, + { + "epoch": 2.1, + "learning_rate": 1.3101594218801436e-05, + "loss": 0.0849, + "step": 45030 + }, + { + "epoch": 2.1, + "learning_rate": 1.3100810433746652e-05, + "loss": 0.0966, + "step": 45035 + }, + { + "epoch": 2.1, + "learning_rate": 1.3100026648691864e-05, + "loss": 0.1208, + "step": 45040 + }, + { + "epoch": 2.1, + "learning_rate": 1.3099242863637078e-05, + "loss": 0.1753, + "step": 45045 + }, + { + "epoch": 2.1, + "learning_rate": 1.309845907858229e-05, + "loss": 0.2323, + "step": 45050 + }, + { + "epoch": 2.1, + "learning_rate": 1.3097675293527503e-05, + "loss": 0.2094, + "step": 45055 + }, + { + "epoch": 2.1, + "learning_rate": 1.3096891508472718e-05, + "loss": 0.2613, + "step": 45060 + }, + { + "epoch": 2.1, + "learning_rate": 1.309610772341793e-05, + "loss": 0.3646, + "step": 45065 + }, + { + "epoch": 2.1, + "learning_rate": 1.3095323938363144e-05, + "loss": 0.0468, + "step": 45070 + }, + { + "epoch": 2.1, + "learning_rate": 1.3094540153308356e-05, + "loss": 0.0398, + "step": 45075 + }, + { + "epoch": 2.1, + "learning_rate": 1.3093756368253572e-05, + "loss": 0.0819, + "step": 45080 + }, + { + "epoch": 2.1, + "learning_rate": 1.3092972583198784e-05, + "loss": 0.0739, + "step": 45085 + }, + { + "epoch": 2.1, + "learning_rate": 1.3092188798143998e-05, + "loss": 0.1802, + "step": 45090 + }, + { + "epoch": 2.1, + "learning_rate": 1.309140501308921e-05, + "loss": 0.1384, + "step": 45095 + }, + { + "epoch": 2.1, + "learning_rate": 1.3090621228034426e-05, + "loss": 0.1445, + "step": 45100 + }, + { + "epoch": 2.1, + "learning_rate": 1.3089837442979638e-05, + "loss": 0.1752, + "step": 45105 + }, + { + "epoch": 2.1, + "learning_rate": 1.3089053657924852e-05, + "loss": 0.4362, + "step": 45110 + }, + { + "epoch": 2.11, + "learning_rate": 1.3088269872870066e-05, + "loss": 0.3223, + "step": 45115 + }, + { + "epoch": 2.11, + "learning_rate": 1.3087486087815278e-05, + "loss": 0.0303, + "step": 45120 + }, + { + "epoch": 2.11, + "learning_rate": 1.3086702302760492e-05, + "loss": 0.047, + "step": 45125 + }, + { + "epoch": 2.11, + "learning_rate": 1.3085918517705704e-05, + "loss": 0.0458, + "step": 45130 + }, + { + "epoch": 2.11, + "learning_rate": 1.308513473265092e-05, + "loss": 0.097, + "step": 45135 + }, + { + "epoch": 2.11, + "learning_rate": 1.3084350947596132e-05, + "loss": 0.1045, + "step": 45140 + }, + { + "epoch": 2.11, + "learning_rate": 1.3083567162541346e-05, + "loss": 0.1566, + "step": 45145 + }, + { + "epoch": 2.11, + "learning_rate": 1.3082783377486558e-05, + "loss": 0.098, + "step": 45150 + }, + { + "epoch": 2.11, + "learning_rate": 1.3081999592431774e-05, + "loss": 0.1235, + "step": 45155 + }, + { + "epoch": 2.11, + "learning_rate": 1.3081215807376986e-05, + "loss": 0.2617, + "step": 45160 + }, + { + "epoch": 2.11, + "learning_rate": 1.30804320223222e-05, + "loss": 0.2604, + "step": 45165 + }, + { + "epoch": 2.11, + "learning_rate": 1.3079648237267412e-05, + "loss": 0.0638, + "step": 45170 + }, + { + "epoch": 2.11, + "learning_rate": 1.3078864452212628e-05, + "loss": 0.0437, + "step": 45175 + }, + { + "epoch": 2.11, + "learning_rate": 1.307808066715784e-05, + "loss": 0.125, + "step": 45180 + }, + { + "epoch": 2.11, + "learning_rate": 1.3077296882103052e-05, + "loss": 0.0794, + "step": 45185 + }, + { + "epoch": 2.11, + "learning_rate": 1.3076513097048266e-05, + "loss": 0.0686, + "step": 45190 + }, + { + "epoch": 2.11, + "learning_rate": 1.307572931199348e-05, + "loss": 0.1249, + "step": 45195 + }, + { + "epoch": 2.11, + "learning_rate": 1.3074945526938694e-05, + "loss": 0.1045, + "step": 45200 + }, + { + "epoch": 2.11, + "learning_rate": 1.3074161741883906e-05, + "loss": 0.151, + "step": 45205 + }, + { + "epoch": 2.11, + "learning_rate": 1.307337795682912e-05, + "loss": 0.2116, + "step": 45210 + }, + { + "epoch": 2.11, + "learning_rate": 1.3072594171774334e-05, + "loss": 0.3223, + "step": 45215 + }, + { + "epoch": 2.11, + "learning_rate": 1.3071810386719548e-05, + "loss": 0.1557, + "step": 45220 + }, + { + "epoch": 2.11, + "learning_rate": 1.307102660166476e-05, + "loss": 0.0382, + "step": 45225 + }, + { + "epoch": 2.11, + "learning_rate": 1.3070242816609976e-05, + "loss": 0.0262, + "step": 45230 + }, + { + "epoch": 2.11, + "learning_rate": 1.3069459031555188e-05, + "loss": 0.1138, + "step": 45235 + }, + { + "epoch": 2.11, + "learning_rate": 1.3068675246500402e-05, + "loss": 0.0946, + "step": 45240 + }, + { + "epoch": 2.11, + "learning_rate": 1.3067891461445614e-05, + "loss": 0.0577, + "step": 45245 + }, + { + "epoch": 2.11, + "learning_rate": 1.3067107676390826e-05, + "loss": 0.125, + "step": 45250 + }, + { + "epoch": 2.11, + "learning_rate": 1.3066323891336042e-05, + "loss": 0.0984, + "step": 45255 + }, + { + "epoch": 2.11, + "learning_rate": 1.3065540106281254e-05, + "loss": 0.3112, + "step": 45260 + }, + { + "epoch": 2.11, + "learning_rate": 1.3064756321226468e-05, + "loss": 0.4456, + "step": 45265 + }, + { + "epoch": 2.11, + "learning_rate": 1.306397253617168e-05, + "loss": 0.1024, + "step": 45270 + }, + { + "epoch": 2.11, + "learning_rate": 1.3063188751116896e-05, + "loss": 0.0654, + "step": 45275 + }, + { + "epoch": 2.11, + "learning_rate": 1.3062404966062108e-05, + "loss": 0.063, + "step": 45280 + }, + { + "epoch": 2.11, + "learning_rate": 1.3061621181007322e-05, + "loss": 0.073, + "step": 45285 + }, + { + "epoch": 2.11, + "learning_rate": 1.3060837395952534e-05, + "loss": 0.0433, + "step": 45290 + }, + { + "epoch": 2.11, + "learning_rate": 1.306005361089775e-05, + "loss": 0.0807, + "step": 45295 + }, + { + "epoch": 2.11, + "learning_rate": 1.3059269825842962e-05, + "loss": 0.1388, + "step": 45300 + }, + { + "epoch": 2.11, + "learning_rate": 1.3058486040788176e-05, + "loss": 0.1288, + "step": 45305 + }, + { + "epoch": 2.11, + "learning_rate": 1.3057702255733388e-05, + "loss": 0.1973, + "step": 45310 + }, + { + "epoch": 2.11, + "learning_rate": 1.3056918470678602e-05, + "loss": 0.2249, + "step": 45315 + }, + { + "epoch": 2.11, + "learning_rate": 1.3056134685623816e-05, + "loss": 0.0417, + "step": 45320 + }, + { + "epoch": 2.11, + "learning_rate": 1.3055350900569028e-05, + "loss": 0.0328, + "step": 45325 + }, + { + "epoch": 2.12, + "learning_rate": 1.3054567115514244e-05, + "loss": 0.092, + "step": 45330 + }, + { + "epoch": 2.12, + "learning_rate": 1.3053783330459456e-05, + "loss": 0.1243, + "step": 45335 + }, + { + "epoch": 2.12, + "learning_rate": 1.305299954540467e-05, + "loss": 0.056, + "step": 45340 + }, + { + "epoch": 2.12, + "learning_rate": 1.3052215760349882e-05, + "loss": 0.1449, + "step": 45345 + }, + { + "epoch": 2.12, + "learning_rate": 1.3051431975295098e-05, + "loss": 0.2336, + "step": 45350 + }, + { + "epoch": 2.12, + "learning_rate": 1.305064819024031e-05, + "loss": 0.1807, + "step": 45355 + }, + { + "epoch": 2.12, + "learning_rate": 1.3049864405185524e-05, + "loss": 0.373, + "step": 45360 + }, + { + "epoch": 2.12, + "learning_rate": 1.3049080620130736e-05, + "loss": 0.3324, + "step": 45365 + }, + { + "epoch": 2.12, + "learning_rate": 1.3048296835075951e-05, + "loss": 0.0863, + "step": 45370 + }, + { + "epoch": 2.12, + "learning_rate": 1.3047513050021164e-05, + "loss": 0.0827, + "step": 45375 + }, + { + "epoch": 2.12, + "learning_rate": 1.3046729264966376e-05, + "loss": 0.053, + "step": 45380 + }, + { + "epoch": 2.12, + "learning_rate": 1.304594547991159e-05, + "loss": 0.0841, + "step": 45385 + }, + { + "epoch": 2.12, + "learning_rate": 1.3045161694856802e-05, + "loss": 0.1131, + "step": 45390 + }, + { + "epoch": 2.12, + "learning_rate": 1.3044377909802018e-05, + "loss": 0.2027, + "step": 45395 + }, + { + "epoch": 2.12, + "learning_rate": 1.304359412474723e-05, + "loss": 0.1405, + "step": 45400 + }, + { + "epoch": 2.12, + "learning_rate": 1.3042810339692444e-05, + "loss": 0.1859, + "step": 45405 + }, + { + "epoch": 2.12, + "learning_rate": 1.3042026554637656e-05, + "loss": 0.2386, + "step": 45410 + }, + { + "epoch": 2.12, + "learning_rate": 1.3041242769582872e-05, + "loss": 0.3155, + "step": 45415 + }, + { + "epoch": 2.12, + "learning_rate": 1.3040458984528084e-05, + "loss": 0.0233, + "step": 45420 + }, + { + "epoch": 2.12, + "learning_rate": 1.3039675199473298e-05, + "loss": 0.0834, + "step": 45425 + }, + { + "epoch": 2.12, + "learning_rate": 1.3038891414418512e-05, + "loss": 0.0897, + "step": 45430 + }, + { + "epoch": 2.12, + "learning_rate": 1.3038107629363725e-05, + "loss": 0.0855, + "step": 45435 + }, + { + "epoch": 2.12, + "learning_rate": 1.3037323844308938e-05, + "loss": 0.0508, + "step": 45440 + }, + { + "epoch": 2.12, + "learning_rate": 1.303654005925415e-05, + "loss": 0.0879, + "step": 45445 + }, + { + "epoch": 2.12, + "learning_rate": 1.3035756274199365e-05, + "loss": 0.1952, + "step": 45450 + }, + { + "epoch": 2.12, + "learning_rate": 1.3034972489144578e-05, + "loss": 0.1725, + "step": 45455 + }, + { + "epoch": 2.12, + "learning_rate": 1.3034188704089792e-05, + "loss": 0.3064, + "step": 45460 + }, + { + "epoch": 2.12, + "learning_rate": 1.3033404919035004e-05, + "loss": 0.2897, + "step": 45465 + }, + { + "epoch": 2.12, + "learning_rate": 1.303262113398022e-05, + "loss": 0.0456, + "step": 45470 + }, + { + "epoch": 2.12, + "learning_rate": 1.3031837348925432e-05, + "loss": 0.0907, + "step": 45475 + }, + { + "epoch": 2.12, + "learning_rate": 1.3031053563870646e-05, + "loss": 0.0953, + "step": 45480 + }, + { + "epoch": 2.12, + "learning_rate": 1.3030269778815858e-05, + "loss": 0.0623, + "step": 45485 + }, + { + "epoch": 2.12, + "learning_rate": 1.3029485993761073e-05, + "loss": 0.0422, + "step": 45490 + }, + { + "epoch": 2.12, + "learning_rate": 1.3028702208706286e-05, + "loss": 0.1251, + "step": 45495 + }, + { + "epoch": 2.12, + "learning_rate": 1.30279184236515e-05, + "loss": 0.2241, + "step": 45500 + }, + { + "epoch": 2.12, + "learning_rate": 1.3027134638596712e-05, + "loss": 0.206, + "step": 45505 + }, + { + "epoch": 2.12, + "learning_rate": 1.3026350853541926e-05, + "loss": 0.1835, + "step": 45510 + }, + { + "epoch": 2.12, + "learning_rate": 1.302556706848714e-05, + "loss": 0.2396, + "step": 45515 + }, + { + "epoch": 2.12, + "learning_rate": 1.3024783283432352e-05, + "loss": 0.0602, + "step": 45520 + }, + { + "epoch": 2.12, + "learning_rate": 1.3023999498377566e-05, + "loss": 0.0384, + "step": 45525 + }, + { + "epoch": 2.12, + "learning_rate": 1.302321571332278e-05, + "loss": 0.0227, + "step": 45530 + }, + { + "epoch": 2.12, + "learning_rate": 1.3022431928267993e-05, + "loss": 0.0598, + "step": 45535 + }, + { + "epoch": 2.12, + "learning_rate": 1.3021648143213206e-05, + "loss": 0.1411, + "step": 45540 + }, + { + "epoch": 2.13, + "learning_rate": 1.3020864358158421e-05, + "loss": 0.1612, + "step": 45545 + }, + { + "epoch": 2.13, + "learning_rate": 1.3020080573103633e-05, + "loss": 0.218, + "step": 45550 + }, + { + "epoch": 2.13, + "learning_rate": 1.3019296788048847e-05, + "loss": 0.2523, + "step": 45555 + }, + { + "epoch": 2.13, + "learning_rate": 1.301851300299406e-05, + "loss": 0.1978, + "step": 45560 + }, + { + "epoch": 2.13, + "learning_rate": 1.3017729217939275e-05, + "loss": 0.3117, + "step": 45565 + }, + { + "epoch": 2.13, + "learning_rate": 1.3016945432884487e-05, + "loss": 0.1062, + "step": 45570 + }, + { + "epoch": 2.13, + "learning_rate": 1.30161616478297e-05, + "loss": 0.1145, + "step": 45575 + }, + { + "epoch": 2.13, + "learning_rate": 1.3015377862774913e-05, + "loss": 0.1031, + "step": 45580 + }, + { + "epoch": 2.13, + "learning_rate": 1.3014594077720126e-05, + "loss": 0.0727, + "step": 45585 + }, + { + "epoch": 2.13, + "learning_rate": 1.3013810292665341e-05, + "loss": 0.0766, + "step": 45590 + }, + { + "epoch": 2.13, + "learning_rate": 1.3013026507610554e-05, + "loss": 0.118, + "step": 45595 + }, + { + "epoch": 2.13, + "learning_rate": 1.3012242722555767e-05, + "loss": 0.0794, + "step": 45600 + }, + { + "epoch": 2.13, + "learning_rate": 1.301145893750098e-05, + "loss": 0.2502, + "step": 45605 + }, + { + "epoch": 2.13, + "learning_rate": 1.3010675152446195e-05, + "loss": 0.2556, + "step": 45610 + }, + { + "epoch": 2.13, + "learning_rate": 1.3009891367391407e-05, + "loss": 0.2407, + "step": 45615 + }, + { + "epoch": 2.13, + "learning_rate": 1.3009107582336621e-05, + "loss": 0.0709, + "step": 45620 + }, + { + "epoch": 2.13, + "learning_rate": 1.3008323797281834e-05, + "loss": 0.0743, + "step": 45625 + }, + { + "epoch": 2.13, + "learning_rate": 1.3007540012227049e-05, + "loss": 0.0364, + "step": 45630 + }, + { + "epoch": 2.13, + "learning_rate": 1.3006756227172261e-05, + "loss": 0.0777, + "step": 45635 + }, + { + "epoch": 2.13, + "learning_rate": 1.3005972442117474e-05, + "loss": 0.0727, + "step": 45640 + }, + { + "epoch": 2.13, + "learning_rate": 1.300518865706269e-05, + "loss": 0.1977, + "step": 45645 + }, + { + "epoch": 2.13, + "learning_rate": 1.3004404872007901e-05, + "loss": 0.1681, + "step": 45650 + }, + { + "epoch": 2.13, + "learning_rate": 1.3003621086953115e-05, + "loss": 0.2623, + "step": 45655 + }, + { + "epoch": 2.13, + "learning_rate": 1.3002837301898328e-05, + "loss": 0.3067, + "step": 45660 + }, + { + "epoch": 2.13, + "learning_rate": 1.3002053516843543e-05, + "loss": 0.3166, + "step": 45665 + }, + { + "epoch": 2.13, + "learning_rate": 1.3001269731788755e-05, + "loss": 0.1028, + "step": 45670 + }, + { + "epoch": 2.13, + "learning_rate": 1.300048594673397e-05, + "loss": 0.0572, + "step": 45675 + }, + { + "epoch": 2.13, + "learning_rate": 1.2999702161679181e-05, + "loss": 0.0974, + "step": 45680 + }, + { + "epoch": 2.13, + "learning_rate": 1.2998918376624397e-05, + "loss": 0.1084, + "step": 45685 + }, + { + "epoch": 2.13, + "learning_rate": 1.299813459156961e-05, + "loss": 0.1394, + "step": 45690 + }, + { + "epoch": 2.13, + "learning_rate": 1.2997350806514823e-05, + "loss": 0.1306, + "step": 45695 + }, + { + "epoch": 2.13, + "learning_rate": 1.2996567021460035e-05, + "loss": 0.1705, + "step": 45700 + }, + { + "epoch": 2.13, + "learning_rate": 1.2995783236405248e-05, + "loss": 0.1305, + "step": 45705 + }, + { + "epoch": 2.13, + "learning_rate": 1.2994999451350463e-05, + "loss": 0.3169, + "step": 45710 + }, + { + "epoch": 2.13, + "learning_rate": 1.2994215666295675e-05, + "loss": 0.2909, + "step": 45715 + }, + { + "epoch": 2.13, + "learning_rate": 1.299343188124089e-05, + "loss": 0.0292, + "step": 45720 + }, + { + "epoch": 2.13, + "learning_rate": 1.2992648096186102e-05, + "loss": 0.0243, + "step": 45725 + }, + { + "epoch": 2.13, + "learning_rate": 1.2991864311131317e-05, + "loss": 0.0665, + "step": 45730 + }, + { + "epoch": 2.13, + "learning_rate": 1.299108052607653e-05, + "loss": 0.0384, + "step": 45735 + }, + { + "epoch": 2.13, + "learning_rate": 1.2990296741021743e-05, + "loss": 0.0984, + "step": 45740 + }, + { + "epoch": 2.13, + "learning_rate": 1.2989512955966957e-05, + "loss": 0.0985, + "step": 45745 + }, + { + "epoch": 2.13, + "learning_rate": 1.2988729170912171e-05, + "loss": 0.1137, + "step": 45750 + }, + { + "epoch": 2.13, + "learning_rate": 1.2987945385857383e-05, + "loss": 0.2239, + "step": 45755 + }, + { + "epoch": 2.14, + "learning_rate": 1.2987161600802599e-05, + "loss": 0.1643, + "step": 45760 + }, + { + "epoch": 2.14, + "learning_rate": 1.2986377815747811e-05, + "loss": 0.4238, + "step": 45765 + }, + { + "epoch": 2.14, + "learning_rate": 1.2985594030693023e-05, + "loss": 0.0549, + "step": 45770 + }, + { + "epoch": 2.14, + "learning_rate": 1.2984810245638237e-05, + "loss": 0.0319, + "step": 45775 + }, + { + "epoch": 2.14, + "learning_rate": 1.298402646058345e-05, + "loss": 0.0423, + "step": 45780 + }, + { + "epoch": 2.14, + "learning_rate": 1.2983242675528665e-05, + "loss": 0.0657, + "step": 45785 + }, + { + "epoch": 2.14, + "learning_rate": 1.2982458890473877e-05, + "loss": 0.065, + "step": 45790 + }, + { + "epoch": 2.14, + "learning_rate": 1.2981675105419091e-05, + "loss": 0.1418, + "step": 45795 + }, + { + "epoch": 2.14, + "learning_rate": 1.2980891320364303e-05, + "loss": 0.1648, + "step": 45800 + }, + { + "epoch": 2.14, + "learning_rate": 1.2980107535309519e-05, + "loss": 0.1982, + "step": 45805 + }, + { + "epoch": 2.14, + "learning_rate": 1.2979323750254731e-05, + "loss": 0.196, + "step": 45810 + }, + { + "epoch": 2.14, + "learning_rate": 1.2978539965199945e-05, + "loss": 0.3101, + "step": 45815 + }, + { + "epoch": 2.14, + "learning_rate": 1.2977756180145157e-05, + "loss": 0.0599, + "step": 45820 + }, + { + "epoch": 2.14, + "learning_rate": 1.2976972395090373e-05, + "loss": 0.0809, + "step": 45825 + }, + { + "epoch": 2.14, + "learning_rate": 1.2976188610035585e-05, + "loss": 0.1309, + "step": 45830 + }, + { + "epoch": 2.14, + "learning_rate": 1.2975404824980797e-05, + "loss": 0.0561, + "step": 45835 + }, + { + "epoch": 2.14, + "learning_rate": 1.2974621039926011e-05, + "loss": 0.0666, + "step": 45840 + }, + { + "epoch": 2.14, + "learning_rate": 1.2973837254871225e-05, + "loss": 0.1195, + "step": 45845 + }, + { + "epoch": 2.14, + "learning_rate": 1.2973053469816439e-05, + "loss": 0.1992, + "step": 45850 + }, + { + "epoch": 2.14, + "learning_rate": 1.2972269684761651e-05, + "loss": 0.1405, + "step": 45855 + }, + { + "epoch": 2.14, + "learning_rate": 1.2971485899706867e-05, + "loss": 0.2539, + "step": 45860 + }, + { + "epoch": 2.14, + "learning_rate": 1.2970702114652079e-05, + "loss": 0.3003, + "step": 45865 + }, + { + "epoch": 2.14, + "learning_rate": 1.2969918329597293e-05, + "loss": 0.0268, + "step": 45870 + }, + { + "epoch": 2.14, + "learning_rate": 1.2969134544542505e-05, + "loss": 0.029, + "step": 45875 + }, + { + "epoch": 2.14, + "learning_rate": 1.296835075948772e-05, + "loss": 0.0899, + "step": 45880 + }, + { + "epoch": 2.14, + "learning_rate": 1.2967566974432933e-05, + "loss": 0.0464, + "step": 45885 + }, + { + "epoch": 2.14, + "learning_rate": 1.2966783189378147e-05, + "loss": 0.1088, + "step": 45890 + }, + { + "epoch": 2.14, + "learning_rate": 1.2965999404323359e-05, + "loss": 0.1264, + "step": 45895 + }, + { + "epoch": 2.14, + "learning_rate": 1.2965215619268571e-05, + "loss": 0.1743, + "step": 45900 + }, + { + "epoch": 2.14, + "learning_rate": 1.2964431834213787e-05, + "loss": 0.2256, + "step": 45905 + }, + { + "epoch": 2.14, + "learning_rate": 1.2963648049158999e-05, + "loss": 0.235, + "step": 45910 + }, + { + "epoch": 2.14, + "learning_rate": 1.2962864264104213e-05, + "loss": 0.4299, + "step": 45915 + }, + { + "epoch": 2.14, + "learning_rate": 1.2962080479049425e-05, + "loss": 0.0647, + "step": 45920 + }, + { + "epoch": 2.14, + "learning_rate": 1.296129669399464e-05, + "loss": 0.0445, + "step": 45925 + }, + { + "epoch": 2.14, + "learning_rate": 1.2960512908939853e-05, + "loss": 0.0616, + "step": 45930 + }, + { + "epoch": 2.14, + "learning_rate": 1.2959729123885067e-05, + "loss": 0.0822, + "step": 45935 + }, + { + "epoch": 2.14, + "learning_rate": 1.2958945338830279e-05, + "loss": 0.1084, + "step": 45940 + }, + { + "epoch": 2.14, + "learning_rate": 1.2958161553775495e-05, + "loss": 0.1383, + "step": 45945 + }, + { + "epoch": 2.14, + "learning_rate": 1.2957377768720707e-05, + "loss": 0.309, + "step": 45950 + }, + { + "epoch": 2.14, + "learning_rate": 1.295659398366592e-05, + "loss": 0.1302, + "step": 45955 + }, + { + "epoch": 2.14, + "learning_rate": 1.2955810198611135e-05, + "loss": 0.4292, + "step": 45960 + }, + { + "epoch": 2.14, + "learning_rate": 1.2955026413556347e-05, + "loss": 0.3065, + "step": 45965 + }, + { + "epoch": 2.15, + "learning_rate": 1.2954242628501561e-05, + "loss": 0.0508, + "step": 45970 + }, + { + "epoch": 2.15, + "learning_rate": 1.2953458843446773e-05, + "loss": 0.0602, + "step": 45975 + }, + { + "epoch": 2.15, + "learning_rate": 1.2952675058391989e-05, + "loss": 0.08, + "step": 45980 + }, + { + "epoch": 2.15, + "learning_rate": 1.2951891273337201e-05, + "loss": 0.0903, + "step": 45985 + }, + { + "epoch": 2.15, + "learning_rate": 1.2951107488282415e-05, + "loss": 0.0842, + "step": 45990 + }, + { + "epoch": 2.15, + "learning_rate": 1.2950323703227627e-05, + "loss": 0.115, + "step": 45995 + }, + { + "epoch": 2.15, + "learning_rate": 1.2949539918172843e-05, + "loss": 0.0675, + "step": 46000 + }, + { + "epoch": 2.15, + "learning_rate": 1.2948756133118055e-05, + "loss": 0.1959, + "step": 46005 + }, + { + "epoch": 2.15, + "learning_rate": 1.2947972348063269e-05, + "loss": 0.2618, + "step": 46010 + }, + { + "epoch": 2.15, + "learning_rate": 1.2947188563008481e-05, + "loss": 0.291, + "step": 46015 + }, + { + "epoch": 2.15, + "learning_rate": 1.2946404777953697e-05, + "loss": 0.0569, + "step": 46020 + }, + { + "epoch": 2.15, + "learning_rate": 1.2945620992898909e-05, + "loss": 0.0237, + "step": 46025 + }, + { + "epoch": 2.15, + "learning_rate": 1.2944837207844121e-05, + "loss": 0.0394, + "step": 46030 + }, + { + "epoch": 2.15, + "learning_rate": 1.2944053422789335e-05, + "loss": 0.029, + "step": 46035 + }, + { + "epoch": 2.15, + "learning_rate": 1.2943269637734549e-05, + "loss": 0.069, + "step": 46040 + }, + { + "epoch": 2.15, + "learning_rate": 1.2942485852679763e-05, + "loss": 0.0995, + "step": 46045 + }, + { + "epoch": 2.15, + "learning_rate": 1.2941702067624975e-05, + "loss": 0.0975, + "step": 46050 + }, + { + "epoch": 2.15, + "learning_rate": 1.2940918282570189e-05, + "loss": 0.1855, + "step": 46055 + }, + { + "epoch": 2.15, + "learning_rate": 1.2940134497515403e-05, + "loss": 0.2035, + "step": 46060 + }, + { + "epoch": 2.15, + "learning_rate": 1.2939350712460617e-05, + "loss": 0.3937, + "step": 46065 + }, + { + "epoch": 2.15, + "learning_rate": 1.2938566927405829e-05, + "loss": 0.1026, + "step": 46070 + }, + { + "epoch": 2.15, + "learning_rate": 1.2937783142351044e-05, + "loss": 0.0271, + "step": 46075 + }, + { + "epoch": 2.15, + "learning_rate": 1.2936999357296257e-05, + "loss": 0.0701, + "step": 46080 + }, + { + "epoch": 2.15, + "learning_rate": 1.293621557224147e-05, + "loss": 0.0962, + "step": 46085 + }, + { + "epoch": 2.15, + "learning_rate": 1.2935431787186683e-05, + "loss": 0.0695, + "step": 46090 + }, + { + "epoch": 2.15, + "learning_rate": 1.2934648002131895e-05, + "loss": 0.0941, + "step": 46095 + }, + { + "epoch": 2.15, + "learning_rate": 1.293386421707711e-05, + "loss": 0.1758, + "step": 46100 + }, + { + "epoch": 2.15, + "learning_rate": 1.2933080432022323e-05, + "loss": 0.2329, + "step": 46105 + }, + { + "epoch": 2.15, + "learning_rate": 1.2932296646967537e-05, + "loss": 0.257, + "step": 46110 + }, + { + "epoch": 2.15, + "learning_rate": 1.2931512861912749e-05, + "loss": 0.1914, + "step": 46115 + }, + { + "epoch": 2.15, + "learning_rate": 1.2930729076857964e-05, + "loss": 0.0743, + "step": 46120 + }, + { + "epoch": 2.15, + "learning_rate": 1.2929945291803177e-05, + "loss": 0.0692, + "step": 46125 + }, + { + "epoch": 2.15, + "learning_rate": 1.292916150674839e-05, + "loss": 0.0397, + "step": 46130 + }, + { + "epoch": 2.15, + "learning_rate": 1.2928377721693603e-05, + "loss": 0.0593, + "step": 46135 + }, + { + "epoch": 2.15, + "learning_rate": 1.2927593936638818e-05, + "loss": 0.1479, + "step": 46140 + }, + { + "epoch": 2.15, + "learning_rate": 1.292681015158403e-05, + "loss": 0.0926, + "step": 46145 + }, + { + "epoch": 2.15, + "learning_rate": 1.2926026366529245e-05, + "loss": 0.1446, + "step": 46150 + }, + { + "epoch": 2.15, + "learning_rate": 1.2925242581474457e-05, + "loss": 0.1745, + "step": 46155 + }, + { + "epoch": 2.15, + "learning_rate": 1.292445879641967e-05, + "loss": 0.2465, + "step": 46160 + }, + { + "epoch": 2.15, + "learning_rate": 1.2923675011364885e-05, + "loss": 0.2131, + "step": 46165 + }, + { + "epoch": 2.15, + "learning_rate": 1.2922891226310097e-05, + "loss": 0.0662, + "step": 46170 + }, + { + "epoch": 2.15, + "learning_rate": 1.2922107441255312e-05, + "loss": 0.0392, + "step": 46175 + }, + { + "epoch": 2.15, + "learning_rate": 1.2921323656200525e-05, + "loss": 0.1003, + "step": 46180 + }, + { + "epoch": 2.16, + "learning_rate": 1.2920539871145738e-05, + "loss": 0.1232, + "step": 46185 + }, + { + "epoch": 2.16, + "learning_rate": 1.291975608609095e-05, + "loss": 0.1044, + "step": 46190 + }, + { + "epoch": 2.16, + "learning_rate": 1.2918972301036166e-05, + "loss": 0.0763, + "step": 46195 + }, + { + "epoch": 2.16, + "learning_rate": 1.2918188515981379e-05, + "loss": 0.1681, + "step": 46200 + }, + { + "epoch": 2.16, + "learning_rate": 1.2917404730926592e-05, + "loss": 0.1915, + "step": 46205 + }, + { + "epoch": 2.16, + "learning_rate": 1.2916620945871805e-05, + "loss": 0.2404, + "step": 46210 + }, + { + "epoch": 2.16, + "learning_rate": 1.291583716081702e-05, + "loss": 0.2095, + "step": 46215 + }, + { + "epoch": 2.16, + "learning_rate": 1.2915053375762232e-05, + "loss": 0.0508, + "step": 46220 + }, + { + "epoch": 2.16, + "learning_rate": 1.2914269590707445e-05, + "loss": 0.0292, + "step": 46225 + }, + { + "epoch": 2.16, + "learning_rate": 1.2913485805652659e-05, + "loss": 0.0615, + "step": 46230 + }, + { + "epoch": 2.16, + "learning_rate": 1.291270202059787e-05, + "loss": 0.0614, + "step": 46235 + }, + { + "epoch": 2.16, + "learning_rate": 1.2911918235543086e-05, + "loss": 0.1748, + "step": 46240 + }, + { + "epoch": 2.16, + "learning_rate": 1.2911134450488299e-05, + "loss": 0.0778, + "step": 46245 + }, + { + "epoch": 2.16, + "learning_rate": 1.2910350665433512e-05, + "loss": 0.1938, + "step": 46250 + }, + { + "epoch": 2.16, + "learning_rate": 1.2909566880378725e-05, + "loss": 0.22, + "step": 46255 + }, + { + "epoch": 2.16, + "learning_rate": 1.290878309532394e-05, + "loss": 0.2227, + "step": 46260 + }, + { + "epoch": 2.16, + "learning_rate": 1.2907999310269153e-05, + "loss": 0.2721, + "step": 46265 + }, + { + "epoch": 2.16, + "learning_rate": 1.2907215525214366e-05, + "loss": 0.0752, + "step": 46270 + }, + { + "epoch": 2.16, + "learning_rate": 1.290643174015958e-05, + "loss": 0.0638, + "step": 46275 + }, + { + "epoch": 2.16, + "learning_rate": 1.2905647955104794e-05, + "loss": 0.0867, + "step": 46280 + }, + { + "epoch": 2.16, + "learning_rate": 1.2904864170050006e-05, + "loss": 0.0472, + "step": 46285 + }, + { + "epoch": 2.16, + "learning_rate": 1.2904080384995219e-05, + "loss": 0.0876, + "step": 46290 + }, + { + "epoch": 2.16, + "learning_rate": 1.2903296599940434e-05, + "loss": 0.1803, + "step": 46295 + }, + { + "epoch": 2.16, + "learning_rate": 1.2902512814885646e-05, + "loss": 0.1082, + "step": 46300 + }, + { + "epoch": 2.16, + "learning_rate": 1.290172902983086e-05, + "loss": 0.203, + "step": 46305 + }, + { + "epoch": 2.16, + "learning_rate": 1.2900945244776073e-05, + "loss": 0.3482, + "step": 46310 + }, + { + "epoch": 2.16, + "learning_rate": 1.2900161459721288e-05, + "loss": 0.31, + "step": 46315 + }, + { + "epoch": 2.16, + "learning_rate": 1.28993776746665e-05, + "loss": 0.0936, + "step": 46320 + }, + { + "epoch": 2.16, + "learning_rate": 1.2898593889611714e-05, + "loss": 0.0706, + "step": 46325 + }, + { + "epoch": 2.16, + "learning_rate": 1.2897810104556927e-05, + "loss": 0.0416, + "step": 46330 + }, + { + "epoch": 2.16, + "learning_rate": 1.2897026319502142e-05, + "loss": 0.1093, + "step": 46335 + }, + { + "epoch": 2.16, + "learning_rate": 1.2896242534447354e-05, + "loss": 0.1354, + "step": 46340 + }, + { + "epoch": 2.16, + "learning_rate": 1.2895458749392568e-05, + "loss": 0.0767, + "step": 46345 + }, + { + "epoch": 2.16, + "learning_rate": 1.289467496433778e-05, + "loss": 0.1904, + "step": 46350 + }, + { + "epoch": 2.16, + "learning_rate": 1.2893891179282994e-05, + "loss": 0.1699, + "step": 46355 + }, + { + "epoch": 2.16, + "learning_rate": 1.2893107394228208e-05, + "loss": 0.1509, + "step": 46360 + }, + { + "epoch": 2.16, + "learning_rate": 1.289232360917342e-05, + "loss": 0.3889, + "step": 46365 + }, + { + "epoch": 2.16, + "learning_rate": 1.2891539824118634e-05, + "loss": 0.0727, + "step": 46370 + }, + { + "epoch": 2.16, + "learning_rate": 1.2890756039063848e-05, + "loss": 0.0465, + "step": 46375 + }, + { + "epoch": 2.16, + "learning_rate": 1.2889972254009062e-05, + "loss": 0.0655, + "step": 46380 + }, + { + "epoch": 2.16, + "learning_rate": 1.2889188468954274e-05, + "loss": 0.0616, + "step": 46385 + }, + { + "epoch": 2.16, + "learning_rate": 1.288840468389949e-05, + "loss": 0.1318, + "step": 46390 + }, + { + "epoch": 2.16, + "learning_rate": 1.2887620898844702e-05, + "loss": 0.1106, + "step": 46395 + }, + { + "epoch": 2.17, + "learning_rate": 1.2886837113789916e-05, + "loss": 0.175, + "step": 46400 + }, + { + "epoch": 2.17, + "learning_rate": 1.2886053328735128e-05, + "loss": 0.2635, + "step": 46405 + }, + { + "epoch": 2.17, + "learning_rate": 1.2885269543680344e-05, + "loss": 0.2759, + "step": 46410 + }, + { + "epoch": 2.17, + "learning_rate": 1.2884485758625556e-05, + "loss": 0.2409, + "step": 46415 + }, + { + "epoch": 2.17, + "learning_rate": 1.2883701973570768e-05, + "loss": 0.0379, + "step": 46420 + }, + { + "epoch": 2.17, + "learning_rate": 1.2882918188515982e-05, + "loss": 0.0526, + "step": 46425 + }, + { + "epoch": 2.17, + "learning_rate": 1.2882134403461194e-05, + "loss": 0.0494, + "step": 46430 + }, + { + "epoch": 2.17, + "learning_rate": 1.288135061840641e-05, + "loss": 0.0765, + "step": 46435 + }, + { + "epoch": 2.17, + "learning_rate": 1.2880566833351622e-05, + "loss": 0.1189, + "step": 46440 + }, + { + "epoch": 2.17, + "learning_rate": 1.2879783048296836e-05, + "loss": 0.1203, + "step": 46445 + }, + { + "epoch": 2.17, + "learning_rate": 1.2878999263242048e-05, + "loss": 0.1393, + "step": 46450 + }, + { + "epoch": 2.17, + "learning_rate": 1.2878215478187264e-05, + "loss": 0.1572, + "step": 46455 + }, + { + "epoch": 2.17, + "learning_rate": 1.2877431693132476e-05, + "loss": 0.3012, + "step": 46460 + }, + { + "epoch": 2.17, + "learning_rate": 1.287664790807769e-05, + "loss": 0.2512, + "step": 46465 + }, + { + "epoch": 2.17, + "learning_rate": 1.2875864123022902e-05, + "loss": 0.0924, + "step": 46470 + }, + { + "epoch": 2.17, + "learning_rate": 1.2875080337968118e-05, + "loss": 0.0359, + "step": 46475 + }, + { + "epoch": 2.17, + "learning_rate": 1.287429655291333e-05, + "loss": 0.079, + "step": 46480 + }, + { + "epoch": 2.17, + "learning_rate": 1.2873512767858542e-05, + "loss": 0.0759, + "step": 46485 + }, + { + "epoch": 2.17, + "learning_rate": 1.2872728982803758e-05, + "loss": 0.103, + "step": 46490 + }, + { + "epoch": 2.17, + "learning_rate": 1.287194519774897e-05, + "loss": 0.0971, + "step": 46495 + }, + { + "epoch": 2.17, + "learning_rate": 1.2871161412694184e-05, + "loss": 0.1746, + "step": 46500 + }, + { + "epoch": 2.17, + "learning_rate": 1.2870377627639396e-05, + "loss": 0.1998, + "step": 46505 + }, + { + "epoch": 2.17, + "learning_rate": 1.2869593842584612e-05, + "loss": 0.2048, + "step": 46510 + }, + { + "epoch": 2.17, + "learning_rate": 1.2868810057529824e-05, + "loss": 0.3011, + "step": 46515 + }, + { + "epoch": 2.17, + "learning_rate": 1.2868026272475038e-05, + "loss": 0.0584, + "step": 46520 + }, + { + "epoch": 2.17, + "learning_rate": 1.286724248742025e-05, + "loss": 0.064, + "step": 46525 + }, + { + "epoch": 2.17, + "learning_rate": 1.2866458702365466e-05, + "loss": 0.0913, + "step": 46530 + }, + { + "epoch": 2.17, + "learning_rate": 1.2865674917310678e-05, + "loss": 0.1153, + "step": 46535 + }, + { + "epoch": 2.17, + "learning_rate": 1.2864891132255892e-05, + "loss": 0.069, + "step": 46540 + }, + { + "epoch": 2.17, + "learning_rate": 1.2864107347201104e-05, + "loss": 0.068, + "step": 46545 + }, + { + "epoch": 2.17, + "learning_rate": 1.2863323562146316e-05, + "loss": 0.1295, + "step": 46550 + }, + { + "epoch": 2.17, + "learning_rate": 1.2862539777091532e-05, + "loss": 0.229, + "step": 46555 + }, + { + "epoch": 2.17, + "learning_rate": 1.2861755992036744e-05, + "loss": 0.2234, + "step": 46560 + }, + { + "epoch": 2.17, + "learning_rate": 1.2860972206981958e-05, + "loss": 0.2418, + "step": 46565 + }, + { + "epoch": 2.17, + "learning_rate": 1.286018842192717e-05, + "loss": 0.0448, + "step": 46570 + }, + { + "epoch": 2.17, + "learning_rate": 1.2859404636872386e-05, + "loss": 0.0333, + "step": 46575 + }, + { + "epoch": 2.17, + "learning_rate": 1.2858620851817598e-05, + "loss": 0.0544, + "step": 46580 + }, + { + "epoch": 2.17, + "learning_rate": 1.2857837066762812e-05, + "loss": 0.126, + "step": 46585 + }, + { + "epoch": 2.17, + "learning_rate": 1.2857053281708026e-05, + "loss": 0.0373, + "step": 46590 + }, + { + "epoch": 2.17, + "learning_rate": 1.285626949665324e-05, + "loss": 0.0796, + "step": 46595 + }, + { + "epoch": 2.17, + "learning_rate": 1.2855485711598452e-05, + "loss": 0.1411, + "step": 46600 + }, + { + "epoch": 2.17, + "learning_rate": 1.2854701926543668e-05, + "loss": 0.1499, + "step": 46605 + }, + { + "epoch": 2.17, + "learning_rate": 1.285391814148888e-05, + "loss": 0.259, + "step": 46610 + }, + { + "epoch": 2.18, + "learning_rate": 1.2853134356434092e-05, + "loss": 0.2371, + "step": 46615 + }, + { + "epoch": 2.18, + "learning_rate": 1.2852350571379306e-05, + "loss": 0.0754, + "step": 46620 + }, + { + "epoch": 2.18, + "learning_rate": 1.2851566786324518e-05, + "loss": 0.0368, + "step": 46625 + }, + { + "epoch": 2.18, + "learning_rate": 1.2850783001269734e-05, + "loss": 0.0324, + "step": 46630 + }, + { + "epoch": 2.18, + "learning_rate": 1.2849999216214946e-05, + "loss": 0.0577, + "step": 46635 + }, + { + "epoch": 2.18, + "learning_rate": 1.284921543116016e-05, + "loss": 0.0888, + "step": 46640 + }, + { + "epoch": 2.18, + "learning_rate": 1.2848431646105372e-05, + "loss": 0.1034, + "step": 46645 + }, + { + "epoch": 2.18, + "learning_rate": 1.2847647861050588e-05, + "loss": 0.1378, + "step": 46650 + }, + { + "epoch": 2.18, + "learning_rate": 1.28468640759958e-05, + "loss": 0.2142, + "step": 46655 + }, + { + "epoch": 2.18, + "learning_rate": 1.2846080290941014e-05, + "loss": 0.3083, + "step": 46660 + }, + { + "epoch": 2.18, + "learning_rate": 1.2845296505886226e-05, + "loss": 0.193, + "step": 46665 + }, + { + "epoch": 2.18, + "learning_rate": 1.2844512720831442e-05, + "loss": 0.0664, + "step": 46670 + }, + { + "epoch": 2.18, + "learning_rate": 1.2843728935776654e-05, + "loss": 0.04, + "step": 46675 + }, + { + "epoch": 2.18, + "learning_rate": 1.2842945150721866e-05, + "loss": 0.0385, + "step": 46680 + }, + { + "epoch": 2.18, + "learning_rate": 1.284216136566708e-05, + "loss": 0.0412, + "step": 46685 + }, + { + "epoch": 2.18, + "learning_rate": 1.2841377580612294e-05, + "loss": 0.0962, + "step": 46690 + }, + { + "epoch": 2.18, + "learning_rate": 1.2840593795557508e-05, + "loss": 0.1109, + "step": 46695 + }, + { + "epoch": 2.18, + "learning_rate": 1.283981001050272e-05, + "loss": 0.1748, + "step": 46700 + }, + { + "epoch": 2.18, + "learning_rate": 1.2839026225447936e-05, + "loss": 0.1363, + "step": 46705 + }, + { + "epoch": 2.18, + "learning_rate": 1.2838242440393148e-05, + "loss": 0.3242, + "step": 46710 + }, + { + "epoch": 2.18, + "learning_rate": 1.2837458655338362e-05, + "loss": 0.2394, + "step": 46715 + }, + { + "epoch": 2.18, + "learning_rate": 1.2836674870283574e-05, + "loss": 0.0867, + "step": 46720 + }, + { + "epoch": 2.18, + "learning_rate": 1.283589108522879e-05, + "loss": 0.053, + "step": 46725 + }, + { + "epoch": 2.18, + "learning_rate": 1.2835107300174002e-05, + "loss": 0.0816, + "step": 46730 + }, + { + "epoch": 2.18, + "learning_rate": 1.2834323515119216e-05, + "loss": 0.0848, + "step": 46735 + }, + { + "epoch": 2.18, + "learning_rate": 1.2833539730064428e-05, + "loss": 0.1253, + "step": 46740 + }, + { + "epoch": 2.18, + "learning_rate": 1.283275594500964e-05, + "loss": 0.1279, + "step": 46745 + }, + { + "epoch": 2.18, + "learning_rate": 1.2831972159954856e-05, + "loss": 0.1643, + "step": 46750 + }, + { + "epoch": 2.18, + "learning_rate": 1.2831188374900068e-05, + "loss": 0.1052, + "step": 46755 + }, + { + "epoch": 2.18, + "learning_rate": 1.2830404589845282e-05, + "loss": 0.414, + "step": 46760 + }, + { + "epoch": 2.18, + "learning_rate": 1.2829620804790494e-05, + "loss": 0.3846, + "step": 46765 + }, + { + "epoch": 2.18, + "learning_rate": 1.282883701973571e-05, + "loss": 0.0487, + "step": 46770 + }, + { + "epoch": 2.18, + "learning_rate": 1.2828053234680922e-05, + "loss": 0.049, + "step": 46775 + }, + { + "epoch": 2.18, + "learning_rate": 1.2827269449626136e-05, + "loss": 0.0539, + "step": 46780 + }, + { + "epoch": 2.18, + "learning_rate": 1.2826485664571348e-05, + "loss": 0.0454, + "step": 46785 + }, + { + "epoch": 2.18, + "learning_rate": 1.2825701879516563e-05, + "loss": 0.1597, + "step": 46790 + }, + { + "epoch": 2.18, + "learning_rate": 1.2824918094461776e-05, + "loss": 0.1002, + "step": 46795 + }, + { + "epoch": 2.18, + "learning_rate": 1.282413430940699e-05, + "loss": 0.1373, + "step": 46800 + }, + { + "epoch": 2.18, + "learning_rate": 1.2823350524352204e-05, + "loss": 0.2601, + "step": 46805 + }, + { + "epoch": 2.18, + "learning_rate": 1.2822566739297416e-05, + "loss": 0.2318, + "step": 46810 + }, + { + "epoch": 2.18, + "learning_rate": 1.282178295424263e-05, + "loss": 0.3056, + "step": 46815 + }, + { + "epoch": 2.18, + "learning_rate": 1.2820999169187842e-05, + "loss": 0.026, + "step": 46820 + }, + { + "epoch": 2.18, + "learning_rate": 1.2820215384133057e-05, + "loss": 0.103, + "step": 46825 + }, + { + "epoch": 2.19, + "learning_rate": 1.281943159907827e-05, + "loss": 0.0383, + "step": 46830 + }, + { + "epoch": 2.19, + "learning_rate": 1.2818647814023484e-05, + "loss": 0.0978, + "step": 46835 + }, + { + "epoch": 2.19, + "learning_rate": 1.2817864028968696e-05, + "loss": 0.0945, + "step": 46840 + }, + { + "epoch": 2.19, + "learning_rate": 1.2817080243913911e-05, + "loss": 0.1017, + "step": 46845 + }, + { + "epoch": 2.19, + "learning_rate": 1.2816296458859124e-05, + "loss": 0.0924, + "step": 46850 + }, + { + "epoch": 2.19, + "learning_rate": 1.2815512673804337e-05, + "loss": 0.156, + "step": 46855 + }, + { + "epoch": 2.19, + "learning_rate": 1.281472888874955e-05, + "loss": 0.2734, + "step": 46860 + }, + { + "epoch": 2.19, + "learning_rate": 1.2813945103694765e-05, + "loss": 0.3109, + "step": 46865 + }, + { + "epoch": 2.19, + "learning_rate": 1.2813161318639978e-05, + "loss": 0.0688, + "step": 46870 + }, + { + "epoch": 2.19, + "learning_rate": 1.281237753358519e-05, + "loss": 0.0376, + "step": 46875 + }, + { + "epoch": 2.19, + "learning_rate": 1.2811593748530404e-05, + "loss": 0.0537, + "step": 46880 + }, + { + "epoch": 2.19, + "learning_rate": 1.2810809963475616e-05, + "loss": 0.084, + "step": 46885 + }, + { + "epoch": 2.19, + "learning_rate": 1.2810026178420831e-05, + "loss": 0.1507, + "step": 46890 + }, + { + "epoch": 2.19, + "learning_rate": 1.2809242393366044e-05, + "loss": 0.144, + "step": 46895 + }, + { + "epoch": 2.19, + "learning_rate": 1.2808458608311258e-05, + "loss": 0.1558, + "step": 46900 + }, + { + "epoch": 2.19, + "learning_rate": 1.2807674823256471e-05, + "loss": 0.1787, + "step": 46905 + }, + { + "epoch": 2.19, + "learning_rate": 1.2806891038201685e-05, + "loss": 0.2776, + "step": 46910 + }, + { + "epoch": 2.19, + "learning_rate": 1.2806107253146898e-05, + "loss": 0.3282, + "step": 46915 + }, + { + "epoch": 2.19, + "learning_rate": 1.2805323468092113e-05, + "loss": 0.0975, + "step": 46920 + }, + { + "epoch": 2.19, + "learning_rate": 1.2804539683037325e-05, + "loss": 0.0326, + "step": 46925 + }, + { + "epoch": 2.19, + "learning_rate": 1.280375589798254e-05, + "loss": 0.0807, + "step": 46930 + }, + { + "epoch": 2.19, + "learning_rate": 1.2802972112927752e-05, + "loss": 0.1076, + "step": 46935 + }, + { + "epoch": 2.19, + "learning_rate": 1.2802188327872964e-05, + "loss": 0.123, + "step": 46940 + }, + { + "epoch": 2.19, + "learning_rate": 1.280140454281818e-05, + "loss": 0.1297, + "step": 46945 + }, + { + "epoch": 2.19, + "learning_rate": 1.2800620757763392e-05, + "loss": 0.13, + "step": 46950 + }, + { + "epoch": 2.19, + "learning_rate": 1.2799836972708605e-05, + "loss": 0.1437, + "step": 46955 + }, + { + "epoch": 2.19, + "learning_rate": 1.2799053187653818e-05, + "loss": 0.1865, + "step": 46960 + }, + { + "epoch": 2.19, + "learning_rate": 1.2798269402599033e-05, + "loss": 0.2441, + "step": 46965 + }, + { + "epoch": 2.19, + "learning_rate": 1.2797485617544245e-05, + "loss": 0.0758, + "step": 46970 + }, + { + "epoch": 2.19, + "learning_rate": 1.279670183248946e-05, + "loss": 0.0861, + "step": 46975 + }, + { + "epoch": 2.19, + "learning_rate": 1.2795918047434672e-05, + "loss": 0.0704, + "step": 46980 + }, + { + "epoch": 2.19, + "learning_rate": 1.2795134262379887e-05, + "loss": 0.0482, + "step": 46985 + }, + { + "epoch": 2.19, + "learning_rate": 1.27943504773251e-05, + "loss": 0.0407, + "step": 46990 + }, + { + "epoch": 2.19, + "learning_rate": 1.2793566692270313e-05, + "loss": 0.1309, + "step": 46995 + }, + { + "epoch": 2.19, + "learning_rate": 1.2792782907215526e-05, + "loss": 0.1658, + "step": 47000 + }, + { + "epoch": 2.19, + "learning_rate": 1.279199912216074e-05, + "loss": 0.1571, + "step": 47005 + }, + { + "epoch": 2.19, + "learning_rate": 1.2791215337105953e-05, + "loss": 0.2098, + "step": 47010 + }, + { + "epoch": 2.19, + "learning_rate": 1.2790431552051166e-05, + "loss": 0.2532, + "step": 47015 + }, + { + "epoch": 2.19, + "learning_rate": 1.2789647766996381e-05, + "loss": 0.0152, + "step": 47020 + }, + { + "epoch": 2.19, + "learning_rate": 1.2788863981941593e-05, + "loss": 0.0505, + "step": 47025 + }, + { + "epoch": 2.19, + "learning_rate": 1.2788080196886807e-05, + "loss": 0.0752, + "step": 47030 + }, + { + "epoch": 2.19, + "learning_rate": 1.278729641183202e-05, + "loss": 0.0727, + "step": 47035 + }, + { + "epoch": 2.19, + "learning_rate": 1.2786512626777235e-05, + "loss": 0.1866, + "step": 47040 + }, + { + "epoch": 2.2, + "learning_rate": 1.2785728841722447e-05, + "loss": 0.0927, + "step": 47045 + }, + { + "epoch": 2.2, + "learning_rate": 1.2784945056667661e-05, + "loss": 0.0914, + "step": 47050 + }, + { + "epoch": 2.2, + "learning_rate": 1.2784161271612873e-05, + "loss": 0.2837, + "step": 47055 + }, + { + "epoch": 2.2, + "learning_rate": 1.2783377486558089e-05, + "loss": 0.3236, + "step": 47060 + }, + { + "epoch": 2.2, + "learning_rate": 1.2782593701503301e-05, + "loss": 0.3262, + "step": 47065 + }, + { + "epoch": 2.2, + "learning_rate": 1.2781809916448513e-05, + "loss": 0.0784, + "step": 47070 + }, + { + "epoch": 2.2, + "learning_rate": 1.2781026131393727e-05, + "loss": 0.0751, + "step": 47075 + }, + { + "epoch": 2.2, + "learning_rate": 1.278024234633894e-05, + "loss": 0.0486, + "step": 47080 + }, + { + "epoch": 2.2, + "learning_rate": 1.2779458561284155e-05, + "loss": 0.1281, + "step": 47085 + }, + { + "epoch": 2.2, + "learning_rate": 1.2778674776229367e-05, + "loss": 0.1138, + "step": 47090 + }, + { + "epoch": 2.2, + "learning_rate": 1.2777890991174581e-05, + "loss": 0.1101, + "step": 47095 + }, + { + "epoch": 2.2, + "learning_rate": 1.2777107206119793e-05, + "loss": 0.1547, + "step": 47100 + }, + { + "epoch": 2.2, + "learning_rate": 1.2776323421065009e-05, + "loss": 0.1095, + "step": 47105 + }, + { + "epoch": 2.2, + "learning_rate": 1.2775539636010221e-05, + "loss": 0.3924, + "step": 47110 + }, + { + "epoch": 2.2, + "learning_rate": 1.2774755850955435e-05, + "loss": 0.314, + "step": 47115 + }, + { + "epoch": 2.2, + "learning_rate": 1.2773972065900649e-05, + "loss": 0.0539, + "step": 47120 + }, + { + "epoch": 2.2, + "learning_rate": 1.2773188280845863e-05, + "loss": 0.039, + "step": 47125 + }, + { + "epoch": 2.2, + "learning_rate": 1.2772404495791075e-05, + "loss": 0.0658, + "step": 47130 + }, + { + "epoch": 2.2, + "learning_rate": 1.2771620710736287e-05, + "loss": 0.0625, + "step": 47135 + }, + { + "epoch": 2.2, + "learning_rate": 1.2770836925681503e-05, + "loss": 0.115, + "step": 47140 + }, + { + "epoch": 2.2, + "learning_rate": 1.2770053140626715e-05, + "loss": 0.1128, + "step": 47145 + }, + { + "epoch": 2.2, + "learning_rate": 1.2769269355571929e-05, + "loss": 0.1195, + "step": 47150 + }, + { + "epoch": 2.2, + "learning_rate": 1.2768485570517141e-05, + "loss": 0.1494, + "step": 47155 + }, + { + "epoch": 2.2, + "learning_rate": 1.2767701785462357e-05, + "loss": 0.1993, + "step": 47160 + }, + { + "epoch": 2.2, + "learning_rate": 1.2766918000407569e-05, + "loss": 0.2361, + "step": 47165 + }, + { + "epoch": 2.2, + "learning_rate": 1.2766134215352783e-05, + "loss": 0.0823, + "step": 47170 + }, + { + "epoch": 2.2, + "learning_rate": 1.2765350430297995e-05, + "loss": 0.0557, + "step": 47175 + }, + { + "epoch": 2.2, + "learning_rate": 1.2764566645243211e-05, + "loss": 0.0295, + "step": 47180 + }, + { + "epoch": 2.2, + "learning_rate": 1.2763782860188423e-05, + "loss": 0.0804, + "step": 47185 + }, + { + "epoch": 2.2, + "learning_rate": 1.2762999075133637e-05, + "loss": 0.0986, + "step": 47190 + }, + { + "epoch": 2.2, + "learning_rate": 1.276221529007885e-05, + "loss": 0.1192, + "step": 47195 + }, + { + "epoch": 2.2, + "learning_rate": 1.2761431505024063e-05, + "loss": 0.1397, + "step": 47200 + }, + { + "epoch": 2.2, + "learning_rate": 1.2760647719969277e-05, + "loss": 0.2456, + "step": 47205 + }, + { + "epoch": 2.2, + "learning_rate": 1.275986393491449e-05, + "loss": 0.267, + "step": 47210 + }, + { + "epoch": 2.2, + "learning_rate": 1.2759080149859703e-05, + "loss": 0.3058, + "step": 47215 + }, + { + "epoch": 2.2, + "learning_rate": 1.2758296364804917e-05, + "loss": 0.0419, + "step": 47220 + }, + { + "epoch": 2.2, + "learning_rate": 1.2757512579750131e-05, + "loss": 0.0535, + "step": 47225 + }, + { + "epoch": 2.2, + "learning_rate": 1.2756728794695343e-05, + "loss": 0.0522, + "step": 47230 + }, + { + "epoch": 2.2, + "learning_rate": 1.2755945009640559e-05, + "loss": 0.0544, + "step": 47235 + }, + { + "epoch": 2.2, + "learning_rate": 1.2755161224585771e-05, + "loss": 0.1465, + "step": 47240 + }, + { + "epoch": 2.2, + "learning_rate": 1.2754377439530985e-05, + "loss": 0.1576, + "step": 47245 + }, + { + "epoch": 2.2, + "learning_rate": 1.2753593654476197e-05, + "loss": 0.1764, + "step": 47250 + }, + { + "epoch": 2.2, + "learning_rate": 1.2752809869421413e-05, + "loss": 0.2072, + "step": 47255 + }, + { + "epoch": 2.21, + "learning_rate": 1.2752026084366625e-05, + "loss": 0.2698, + "step": 47260 + }, + { + "epoch": 2.21, + "learning_rate": 1.2751242299311837e-05, + "loss": 0.2404, + "step": 47265 + }, + { + "epoch": 2.21, + "learning_rate": 1.2750458514257051e-05, + "loss": 0.1002, + "step": 47270 + }, + { + "epoch": 2.21, + "learning_rate": 1.2749674729202263e-05, + "loss": 0.0465, + "step": 47275 + }, + { + "epoch": 2.21, + "learning_rate": 1.2748890944147479e-05, + "loss": 0.0432, + "step": 47280 + }, + { + "epoch": 2.21, + "learning_rate": 1.2748107159092691e-05, + "loss": 0.0784, + "step": 47285 + }, + { + "epoch": 2.21, + "learning_rate": 1.2747323374037905e-05, + "loss": 0.0798, + "step": 47290 + }, + { + "epoch": 2.21, + "learning_rate": 1.2746539588983117e-05, + "loss": 0.0903, + "step": 47295 + }, + { + "epoch": 2.21, + "learning_rate": 1.2745755803928333e-05, + "loss": 0.197, + "step": 47300 + }, + { + "epoch": 2.21, + "learning_rate": 1.2744972018873545e-05, + "loss": 0.2127, + "step": 47305 + }, + { + "epoch": 2.21, + "learning_rate": 1.2744188233818759e-05, + "loss": 0.2387, + "step": 47310 + }, + { + "epoch": 2.21, + "learning_rate": 1.2743404448763971e-05, + "loss": 0.2397, + "step": 47315 + }, + { + "epoch": 2.21, + "learning_rate": 1.2742620663709187e-05, + "loss": 0.0321, + "step": 47320 + }, + { + "epoch": 2.21, + "learning_rate": 1.2741836878654399e-05, + "loss": 0.0406, + "step": 47325 + }, + { + "epoch": 2.21, + "learning_rate": 1.2741053093599611e-05, + "loss": 0.0561, + "step": 47330 + }, + { + "epoch": 2.21, + "learning_rate": 1.2740269308544827e-05, + "loss": 0.1, + "step": 47335 + }, + { + "epoch": 2.21, + "learning_rate": 1.2739485523490039e-05, + "loss": 0.0816, + "step": 47340 + }, + { + "epoch": 2.21, + "learning_rate": 1.2738701738435253e-05, + "loss": 0.1594, + "step": 47345 + }, + { + "epoch": 2.21, + "learning_rate": 1.2737917953380465e-05, + "loss": 0.1174, + "step": 47350 + }, + { + "epoch": 2.21, + "learning_rate": 1.273713416832568e-05, + "loss": 0.2226, + "step": 47355 + }, + { + "epoch": 2.21, + "learning_rate": 1.2736350383270893e-05, + "loss": 0.2778, + "step": 47360 + }, + { + "epoch": 2.21, + "learning_rate": 1.2735566598216107e-05, + "loss": 0.3245, + "step": 47365 + }, + { + "epoch": 2.21, + "learning_rate": 1.2734782813161319e-05, + "loss": 0.0526, + "step": 47370 + }, + { + "epoch": 2.21, + "learning_rate": 1.2733999028106535e-05, + "loss": 0.0418, + "step": 47375 + }, + { + "epoch": 2.21, + "learning_rate": 1.2733215243051747e-05, + "loss": 0.1414, + "step": 47380 + }, + { + "epoch": 2.21, + "learning_rate": 1.273243145799696e-05, + "loss": 0.0651, + "step": 47385 + }, + { + "epoch": 2.21, + "learning_rate": 1.2731647672942173e-05, + "loss": 0.1081, + "step": 47390 + }, + { + "epoch": 2.21, + "learning_rate": 1.2730863887887385e-05, + "loss": 0.1513, + "step": 47395 + }, + { + "epoch": 2.21, + "learning_rate": 1.27300801028326e-05, + "loss": 0.1419, + "step": 47400 + }, + { + "epoch": 2.21, + "learning_rate": 1.2729296317777813e-05, + "loss": 0.3379, + "step": 47405 + }, + { + "epoch": 2.21, + "learning_rate": 1.2728512532723027e-05, + "loss": 0.2274, + "step": 47410 + }, + { + "epoch": 2.21, + "learning_rate": 1.2727728747668239e-05, + "loss": 0.3456, + "step": 47415 + }, + { + "epoch": 2.21, + "learning_rate": 1.2726944962613455e-05, + "loss": 0.0683, + "step": 47420 + }, + { + "epoch": 2.21, + "learning_rate": 1.2726161177558667e-05, + "loss": 0.0611, + "step": 47425 + }, + { + "epoch": 2.21, + "learning_rate": 1.272537739250388e-05, + "loss": 0.0673, + "step": 47430 + }, + { + "epoch": 2.21, + "learning_rate": 1.2724593607449095e-05, + "loss": 0.1342, + "step": 47435 + }, + { + "epoch": 2.21, + "learning_rate": 1.2723809822394309e-05, + "loss": 0.0678, + "step": 47440 + }, + { + "epoch": 2.21, + "learning_rate": 1.272302603733952e-05, + "loss": 0.254, + "step": 47445 + }, + { + "epoch": 2.21, + "learning_rate": 1.2722242252284736e-05, + "loss": 0.1418, + "step": 47450 + }, + { + "epoch": 2.21, + "learning_rate": 1.2721458467229949e-05, + "loss": 0.1771, + "step": 47455 + }, + { + "epoch": 2.21, + "learning_rate": 1.272067468217516e-05, + "loss": 0.2067, + "step": 47460 + }, + { + "epoch": 2.21, + "learning_rate": 1.2719890897120375e-05, + "loss": 0.302, + "step": 47465 + }, + { + "epoch": 2.22, + "learning_rate": 1.2719107112065587e-05, + "loss": 0.0339, + "step": 47470 + }, + { + "epoch": 2.22, + "learning_rate": 1.2718323327010803e-05, + "loss": 0.068, + "step": 47475 + }, + { + "epoch": 2.22, + "learning_rate": 1.2717539541956015e-05, + "loss": 0.101, + "step": 47480 + }, + { + "epoch": 2.22, + "learning_rate": 1.2716755756901229e-05, + "loss": 0.0721, + "step": 47485 + }, + { + "epoch": 2.22, + "learning_rate": 1.271597197184644e-05, + "loss": 0.0679, + "step": 47490 + }, + { + "epoch": 2.22, + "learning_rate": 1.2715188186791656e-05, + "loss": 0.155, + "step": 47495 + }, + { + "epoch": 2.22, + "learning_rate": 1.2714404401736869e-05, + "loss": 0.0995, + "step": 47500 + }, + { + "epoch": 2.22, + "learning_rate": 1.2713620616682083e-05, + "loss": 0.2467, + "step": 47505 + }, + { + "epoch": 2.22, + "learning_rate": 1.2712836831627295e-05, + "loss": 0.2413, + "step": 47510 + }, + { + "epoch": 2.22, + "learning_rate": 1.271205304657251e-05, + "loss": 0.2587, + "step": 47515 + }, + { + "epoch": 2.22, + "learning_rate": 1.2711269261517723e-05, + "loss": 0.0649, + "step": 47520 + }, + { + "epoch": 2.22, + "learning_rate": 1.2710485476462935e-05, + "loss": 0.0333, + "step": 47525 + }, + { + "epoch": 2.22, + "learning_rate": 1.2709701691408149e-05, + "loss": 0.0575, + "step": 47530 + }, + { + "epoch": 2.22, + "learning_rate": 1.2708917906353363e-05, + "loss": 0.1551, + "step": 47535 + }, + { + "epoch": 2.22, + "learning_rate": 1.2708134121298577e-05, + "loss": 0.2555, + "step": 47540 + }, + { + "epoch": 2.22, + "learning_rate": 1.2707350336243789e-05, + "loss": 0.0763, + "step": 47545 + }, + { + "epoch": 2.22, + "learning_rate": 1.2706566551189004e-05, + "loss": 0.1238, + "step": 47550 + }, + { + "epoch": 2.22, + "learning_rate": 1.2705782766134217e-05, + "loss": 0.1277, + "step": 47555 + }, + { + "epoch": 2.22, + "learning_rate": 1.270499898107943e-05, + "loss": 0.2487, + "step": 47560 + }, + { + "epoch": 2.22, + "learning_rate": 1.2704215196024643e-05, + "loss": 0.2534, + "step": 47565 + }, + { + "epoch": 2.22, + "learning_rate": 1.2703431410969858e-05, + "loss": 0.0228, + "step": 47570 + }, + { + "epoch": 2.22, + "learning_rate": 1.270264762591507e-05, + "loss": 0.0238, + "step": 47575 + }, + { + "epoch": 2.22, + "learning_rate": 1.2701863840860284e-05, + "loss": 0.0532, + "step": 47580 + }, + { + "epoch": 2.22, + "learning_rate": 1.2701080055805497e-05, + "loss": 0.1343, + "step": 47585 + }, + { + "epoch": 2.22, + "learning_rate": 1.2700296270750709e-05, + "loss": 0.1061, + "step": 47590 + }, + { + "epoch": 2.22, + "learning_rate": 1.2699512485695924e-05, + "loss": 0.1548, + "step": 47595 + }, + { + "epoch": 2.22, + "learning_rate": 1.2698728700641137e-05, + "loss": 0.2443, + "step": 47600 + }, + { + "epoch": 2.22, + "learning_rate": 1.269794491558635e-05, + "loss": 0.1995, + "step": 47605 + }, + { + "epoch": 2.22, + "learning_rate": 1.2697161130531563e-05, + "loss": 0.4085, + "step": 47610 + }, + { + "epoch": 2.22, + "learning_rate": 1.2696377345476778e-05, + "loss": 0.2198, + "step": 47615 + }, + { + "epoch": 2.22, + "learning_rate": 1.269559356042199e-05, + "loss": 0.0253, + "step": 47620 + }, + { + "epoch": 2.22, + "learning_rate": 1.2694809775367204e-05, + "loss": 0.0511, + "step": 47625 + }, + { + "epoch": 2.22, + "learning_rate": 1.2694025990312417e-05, + "loss": 0.0841, + "step": 47630 + }, + { + "epoch": 2.22, + "learning_rate": 1.2693242205257632e-05, + "loss": 0.0448, + "step": 47635 + }, + { + "epoch": 2.22, + "learning_rate": 1.2692458420202844e-05, + "loss": 0.0793, + "step": 47640 + }, + { + "epoch": 2.22, + "learning_rate": 1.2691674635148058e-05, + "loss": 0.0846, + "step": 47645 + }, + { + "epoch": 2.22, + "learning_rate": 1.2690890850093272e-05, + "loss": 0.1674, + "step": 47650 + }, + { + "epoch": 2.22, + "learning_rate": 1.2690107065038484e-05, + "loss": 0.129, + "step": 47655 + }, + { + "epoch": 2.22, + "learning_rate": 1.2689323279983698e-05, + "loss": 0.2383, + "step": 47660 + }, + { + "epoch": 2.22, + "learning_rate": 1.268853949492891e-05, + "loss": 0.3317, + "step": 47665 + }, + { + "epoch": 2.22, + "learning_rate": 1.2687755709874126e-05, + "loss": 0.0892, + "step": 47670 + }, + { + "epoch": 2.22, + "learning_rate": 1.2686971924819338e-05, + "loss": 0.0223, + "step": 47675 + }, + { + "epoch": 2.22, + "learning_rate": 1.2686188139764552e-05, + "loss": 0.077, + "step": 47680 + }, + { + "epoch": 2.23, + "learning_rate": 1.2685404354709765e-05, + "loss": 0.0558, + "step": 47685 + }, + { + "epoch": 2.23, + "learning_rate": 1.268462056965498e-05, + "loss": 0.0593, + "step": 47690 + }, + { + "epoch": 2.23, + "learning_rate": 1.2683836784600192e-05, + "loss": 0.0725, + "step": 47695 + }, + { + "epoch": 2.23, + "learning_rate": 1.2683052999545406e-05, + "loss": 0.2018, + "step": 47700 + }, + { + "epoch": 2.23, + "learning_rate": 1.2682269214490618e-05, + "loss": 0.1803, + "step": 47705 + }, + { + "epoch": 2.23, + "learning_rate": 1.2681485429435834e-05, + "loss": 0.2223, + "step": 47710 + }, + { + "epoch": 2.23, + "learning_rate": 1.2680701644381046e-05, + "loss": 0.4095, + "step": 47715 + }, + { + "epoch": 2.23, + "learning_rate": 1.2679917859326258e-05, + "loss": 0.0528, + "step": 47720 + }, + { + "epoch": 2.23, + "learning_rate": 1.2679134074271472e-05, + "loss": 0.0447, + "step": 47725 + }, + { + "epoch": 2.23, + "learning_rate": 1.2678350289216685e-05, + "loss": 0.0256, + "step": 47730 + }, + { + "epoch": 2.23, + "learning_rate": 1.26775665041619e-05, + "loss": 0.0814, + "step": 47735 + }, + { + "epoch": 2.23, + "learning_rate": 1.2676782719107112e-05, + "loss": 0.1283, + "step": 47740 + }, + { + "epoch": 2.23, + "learning_rate": 1.2675998934052326e-05, + "loss": 0.0823, + "step": 47745 + }, + { + "epoch": 2.23, + "learning_rate": 1.267521514899754e-05, + "loss": 0.0804, + "step": 47750 + }, + { + "epoch": 2.23, + "learning_rate": 1.2674431363942754e-05, + "loss": 0.1365, + "step": 47755 + }, + { + "epoch": 2.23, + "learning_rate": 1.2673647578887966e-05, + "loss": 0.2962, + "step": 47760 + }, + { + "epoch": 2.23, + "learning_rate": 1.2672863793833182e-05, + "loss": 0.3267, + "step": 47765 + }, + { + "epoch": 2.23, + "learning_rate": 1.2672080008778394e-05, + "loss": 0.0293, + "step": 47770 + }, + { + "epoch": 2.23, + "learning_rate": 1.2671296223723608e-05, + "loss": 0.0746, + "step": 47775 + }, + { + "epoch": 2.23, + "learning_rate": 1.267051243866882e-05, + "loss": 0.0665, + "step": 47780 + }, + { + "epoch": 2.23, + "learning_rate": 1.2669728653614032e-05, + "loss": 0.0971, + "step": 47785 + }, + { + "epoch": 2.23, + "learning_rate": 1.2668944868559248e-05, + "loss": 0.0418, + "step": 47790 + }, + { + "epoch": 2.23, + "learning_rate": 1.266816108350446e-05, + "loss": 0.1123, + "step": 47795 + }, + { + "epoch": 2.23, + "learning_rate": 1.2667377298449674e-05, + "loss": 0.0995, + "step": 47800 + }, + { + "epoch": 2.23, + "learning_rate": 1.2666593513394886e-05, + "loss": 0.1129, + "step": 47805 + }, + { + "epoch": 2.23, + "learning_rate": 1.2665809728340102e-05, + "loss": 0.2582, + "step": 47810 + }, + { + "epoch": 2.23, + "learning_rate": 1.2665025943285314e-05, + "loss": 0.3506, + "step": 47815 + }, + { + "epoch": 2.23, + "learning_rate": 1.2664242158230528e-05, + "loss": 0.0258, + "step": 47820 + }, + { + "epoch": 2.23, + "learning_rate": 1.266345837317574e-05, + "loss": 0.0343, + "step": 47825 + }, + { + "epoch": 2.23, + "learning_rate": 1.2662674588120956e-05, + "loss": 0.0366, + "step": 47830 + }, + { + "epoch": 2.23, + "learning_rate": 1.2661890803066168e-05, + "loss": 0.1132, + "step": 47835 + }, + { + "epoch": 2.23, + "learning_rate": 1.2661107018011382e-05, + "loss": 0.0819, + "step": 47840 + }, + { + "epoch": 2.23, + "learning_rate": 1.2660323232956594e-05, + "loss": 0.1235, + "step": 47845 + }, + { + "epoch": 2.23, + "learning_rate": 1.2659539447901808e-05, + "loss": 0.1482, + "step": 47850 + }, + { + "epoch": 2.23, + "learning_rate": 1.2658755662847022e-05, + "loss": 0.1346, + "step": 47855 + }, + { + "epoch": 2.23, + "learning_rate": 1.2657971877792234e-05, + "loss": 0.1981, + "step": 47860 + }, + { + "epoch": 2.23, + "learning_rate": 1.265718809273745e-05, + "loss": 0.2022, + "step": 47865 + }, + { + "epoch": 2.23, + "learning_rate": 1.2656404307682662e-05, + "loss": 0.0667, + "step": 47870 + }, + { + "epoch": 2.23, + "learning_rate": 1.2655620522627876e-05, + "loss": 0.0178, + "step": 47875 + }, + { + "epoch": 2.23, + "learning_rate": 1.2654836737573088e-05, + "loss": 0.1099, + "step": 47880 + }, + { + "epoch": 2.23, + "learning_rate": 1.2654052952518304e-05, + "loss": 0.0823, + "step": 47885 + }, + { + "epoch": 2.23, + "learning_rate": 1.2653269167463516e-05, + "loss": 0.0406, + "step": 47890 + }, + { + "epoch": 2.23, + "learning_rate": 1.265248538240873e-05, + "loss": 0.1551, + "step": 47895 + }, + { + "epoch": 2.24, + "learning_rate": 1.2651701597353942e-05, + "loss": 0.1111, + "step": 47900 + }, + { + "epoch": 2.24, + "learning_rate": 1.2650917812299158e-05, + "loss": 0.1663, + "step": 47905 + }, + { + "epoch": 2.24, + "learning_rate": 1.265013402724437e-05, + "loss": 0.2386, + "step": 47910 + }, + { + "epoch": 2.24, + "learning_rate": 1.2649350242189582e-05, + "loss": 0.2496, + "step": 47915 + }, + { + "epoch": 2.24, + "learning_rate": 1.2648566457134796e-05, + "loss": 0.0195, + "step": 47920 + }, + { + "epoch": 2.24, + "learning_rate": 1.2647782672080008e-05, + "loss": 0.0671, + "step": 47925 + }, + { + "epoch": 2.24, + "learning_rate": 1.2646998887025224e-05, + "loss": 0.0558, + "step": 47930 + }, + { + "epoch": 2.24, + "learning_rate": 1.2646215101970436e-05, + "loss": 0.0917, + "step": 47935 + }, + { + "epoch": 2.24, + "learning_rate": 1.264543131691565e-05, + "loss": 0.1752, + "step": 47940 + }, + { + "epoch": 2.24, + "learning_rate": 1.2644647531860862e-05, + "loss": 0.1103, + "step": 47945 + }, + { + "epoch": 2.24, + "learning_rate": 1.2643863746806078e-05, + "loss": 0.1002, + "step": 47950 + }, + { + "epoch": 2.24, + "learning_rate": 1.264307996175129e-05, + "loss": 0.1335, + "step": 47955 + }, + { + "epoch": 2.24, + "learning_rate": 1.2642296176696504e-05, + "loss": 0.3335, + "step": 47960 + }, + { + "epoch": 2.24, + "learning_rate": 1.2641512391641718e-05, + "loss": 0.3093, + "step": 47965 + }, + { + "epoch": 2.24, + "learning_rate": 1.2640728606586932e-05, + "loss": 0.0686, + "step": 47970 + }, + { + "epoch": 2.24, + "learning_rate": 1.2639944821532144e-05, + "loss": 0.0387, + "step": 47975 + }, + { + "epoch": 2.24, + "learning_rate": 1.2639161036477356e-05, + "loss": 0.0357, + "step": 47980 + }, + { + "epoch": 2.24, + "learning_rate": 1.2638377251422572e-05, + "loss": 0.0849, + "step": 47985 + }, + { + "epoch": 2.24, + "learning_rate": 1.2637593466367784e-05, + "loss": 0.1276, + "step": 47990 + }, + { + "epoch": 2.24, + "learning_rate": 1.2636809681312998e-05, + "loss": 0.1694, + "step": 47995 + }, + { + "epoch": 2.24, + "learning_rate": 1.263602589625821e-05, + "loss": 0.1722, + "step": 48000 + }, + { + "epoch": 2.24, + "learning_rate": 1.2635242111203426e-05, + "loss": 0.1639, + "step": 48005 + }, + { + "epoch": 2.24, + "learning_rate": 1.2634458326148638e-05, + "loss": 0.326, + "step": 48010 + }, + { + "epoch": 2.24, + "learning_rate": 1.2633674541093852e-05, + "loss": 0.3992, + "step": 48015 + }, + { + "epoch": 2.24, + "learning_rate": 1.2632890756039064e-05, + "loss": 0.0436, + "step": 48020 + }, + { + "epoch": 2.24, + "learning_rate": 1.263210697098428e-05, + "loss": 0.0531, + "step": 48025 + }, + { + "epoch": 2.24, + "learning_rate": 1.2631323185929492e-05, + "loss": 0.0581, + "step": 48030 + }, + { + "epoch": 2.24, + "learning_rate": 1.2630539400874706e-05, + "loss": 0.0561, + "step": 48035 + }, + { + "epoch": 2.24, + "learning_rate": 1.2629755615819918e-05, + "loss": 0.1014, + "step": 48040 + }, + { + "epoch": 2.24, + "learning_rate": 1.262897183076513e-05, + "loss": 0.1809, + "step": 48045 + }, + { + "epoch": 2.24, + "learning_rate": 1.2628188045710346e-05, + "loss": 0.1019, + "step": 48050 + }, + { + "epoch": 2.24, + "learning_rate": 1.2627404260655558e-05, + "loss": 0.1508, + "step": 48055 + }, + { + "epoch": 2.24, + "learning_rate": 1.2626620475600772e-05, + "loss": 0.309, + "step": 48060 + }, + { + "epoch": 2.24, + "learning_rate": 1.2625836690545986e-05, + "loss": 0.2341, + "step": 48065 + }, + { + "epoch": 2.24, + "learning_rate": 1.26250529054912e-05, + "loss": 0.0517, + "step": 48070 + }, + { + "epoch": 2.24, + "learning_rate": 1.2624269120436412e-05, + "loss": 0.039, + "step": 48075 + }, + { + "epoch": 2.24, + "learning_rate": 1.2623485335381628e-05, + "loss": 0.0392, + "step": 48080 + }, + { + "epoch": 2.24, + "learning_rate": 1.262270155032684e-05, + "loss": 0.0337, + "step": 48085 + }, + { + "epoch": 2.24, + "learning_rate": 1.2621917765272054e-05, + "loss": 0.0574, + "step": 48090 + }, + { + "epoch": 2.24, + "learning_rate": 1.2621133980217266e-05, + "loss": 0.0952, + "step": 48095 + }, + { + "epoch": 2.24, + "learning_rate": 1.2620350195162481e-05, + "loss": 0.1344, + "step": 48100 + }, + { + "epoch": 2.24, + "learning_rate": 1.2619566410107694e-05, + "loss": 0.235, + "step": 48105 + }, + { + "epoch": 2.24, + "learning_rate": 1.2618782625052906e-05, + "loss": 0.2454, + "step": 48110 + }, + { + "epoch": 2.25, + "learning_rate": 1.261799883999812e-05, + "loss": 0.26, + "step": 48115 + }, + { + "epoch": 2.25, + "learning_rate": 1.2617215054943332e-05, + "loss": 0.0369, + "step": 48120 + }, + { + "epoch": 2.25, + "learning_rate": 1.2616431269888548e-05, + "loss": 0.0334, + "step": 48125 + }, + { + "epoch": 2.25, + "learning_rate": 1.261564748483376e-05, + "loss": 0.0459, + "step": 48130 + }, + { + "epoch": 2.25, + "learning_rate": 1.2614863699778974e-05, + "loss": 0.0905, + "step": 48135 + }, + { + "epoch": 2.25, + "learning_rate": 1.2614079914724186e-05, + "loss": 0.1017, + "step": 48140 + }, + { + "epoch": 2.25, + "learning_rate": 1.2613296129669402e-05, + "loss": 0.1289, + "step": 48145 + }, + { + "epoch": 2.25, + "learning_rate": 1.2612512344614614e-05, + "loss": 0.1654, + "step": 48150 + }, + { + "epoch": 2.25, + "learning_rate": 1.2611728559559828e-05, + "loss": 0.1671, + "step": 48155 + }, + { + "epoch": 2.25, + "learning_rate": 1.261094477450504e-05, + "loss": 0.2027, + "step": 48160 + }, + { + "epoch": 2.25, + "learning_rate": 1.2610160989450255e-05, + "loss": 0.2967, + "step": 48165 + }, + { + "epoch": 2.25, + "learning_rate": 1.2609377204395468e-05, + "loss": 0.0546, + "step": 48170 + }, + { + "epoch": 2.25, + "learning_rate": 1.260859341934068e-05, + "loss": 0.0423, + "step": 48175 + }, + { + "epoch": 2.25, + "learning_rate": 1.2607809634285895e-05, + "loss": 0.0787, + "step": 48180 + }, + { + "epoch": 2.25, + "learning_rate": 1.2607025849231108e-05, + "loss": 0.1554, + "step": 48185 + }, + { + "epoch": 2.25, + "learning_rate": 1.2606242064176322e-05, + "loss": 0.0672, + "step": 48190 + }, + { + "epoch": 2.25, + "learning_rate": 1.2605458279121534e-05, + "loss": 0.1474, + "step": 48195 + }, + { + "epoch": 2.25, + "learning_rate": 1.260467449406675e-05, + "loss": 0.0994, + "step": 48200 + }, + { + "epoch": 2.25, + "learning_rate": 1.2603890709011962e-05, + "loss": 0.1914, + "step": 48205 + }, + { + "epoch": 2.25, + "learning_rate": 1.2603106923957176e-05, + "loss": 0.2273, + "step": 48210 + }, + { + "epoch": 2.25, + "learning_rate": 1.2602323138902388e-05, + "loss": 0.3731, + "step": 48215 + }, + { + "epoch": 2.25, + "learning_rate": 1.2601539353847603e-05, + "loss": 0.0643, + "step": 48220 + }, + { + "epoch": 2.25, + "learning_rate": 1.2600755568792816e-05, + "loss": 0.0238, + "step": 48225 + }, + { + "epoch": 2.25, + "learning_rate": 1.259997178373803e-05, + "loss": 0.0542, + "step": 48230 + }, + { + "epoch": 2.25, + "learning_rate": 1.2599187998683242e-05, + "loss": 0.0509, + "step": 48235 + }, + { + "epoch": 2.25, + "learning_rate": 1.2598404213628454e-05, + "loss": 0.1615, + "step": 48240 + }, + { + "epoch": 2.25, + "learning_rate": 1.259762042857367e-05, + "loss": 0.0994, + "step": 48245 + }, + { + "epoch": 2.25, + "learning_rate": 1.2596836643518882e-05, + "loss": 0.1109, + "step": 48250 + }, + { + "epoch": 2.25, + "learning_rate": 1.2596052858464096e-05, + "loss": 0.1553, + "step": 48255 + }, + { + "epoch": 2.25, + "learning_rate": 1.2595269073409308e-05, + "loss": 0.3124, + "step": 48260 + }, + { + "epoch": 2.25, + "learning_rate": 1.2594485288354523e-05, + "loss": 0.2899, + "step": 48265 + }, + { + "epoch": 2.25, + "learning_rate": 1.2593701503299736e-05, + "loss": 0.0501, + "step": 48270 + }, + { + "epoch": 2.25, + "learning_rate": 1.259291771824495e-05, + "loss": 0.0855, + "step": 48275 + }, + { + "epoch": 2.25, + "learning_rate": 1.2592133933190163e-05, + "loss": 0.0823, + "step": 48280 + }, + { + "epoch": 2.25, + "learning_rate": 1.2591350148135377e-05, + "loss": 0.145, + "step": 48285 + }, + { + "epoch": 2.25, + "learning_rate": 1.259056636308059e-05, + "loss": 0.0703, + "step": 48290 + }, + { + "epoch": 2.25, + "learning_rate": 1.2589782578025805e-05, + "loss": 0.143, + "step": 48295 + }, + { + "epoch": 2.25, + "learning_rate": 1.2588998792971017e-05, + "loss": 0.1223, + "step": 48300 + }, + { + "epoch": 2.25, + "learning_rate": 1.258821500791623e-05, + "loss": 0.1272, + "step": 48305 + }, + { + "epoch": 2.25, + "learning_rate": 1.2587431222861443e-05, + "loss": 0.1794, + "step": 48310 + }, + { + "epoch": 2.25, + "learning_rate": 1.2586647437806656e-05, + "loss": 0.2009, + "step": 48315 + }, + { + "epoch": 2.25, + "learning_rate": 1.2585863652751871e-05, + "loss": 0.0941, + "step": 48320 + }, + { + "epoch": 2.25, + "learning_rate": 1.2585079867697083e-05, + "loss": 0.0456, + "step": 48325 + }, + { + "epoch": 2.26, + "learning_rate": 1.2584296082642297e-05, + "loss": 0.0458, + "step": 48330 + }, + { + "epoch": 2.26, + "learning_rate": 1.258351229758751e-05, + "loss": 0.0858, + "step": 48335 + }, + { + "epoch": 2.26, + "learning_rate": 1.2582728512532725e-05, + "loss": 0.1233, + "step": 48340 + }, + { + "epoch": 2.26, + "learning_rate": 1.2581944727477937e-05, + "loss": 0.0849, + "step": 48345 + }, + { + "epoch": 2.26, + "learning_rate": 1.2581160942423151e-05, + "loss": 0.1166, + "step": 48350 + }, + { + "epoch": 2.26, + "learning_rate": 1.2580377157368364e-05, + "loss": 0.2759, + "step": 48355 + }, + { + "epoch": 2.26, + "learning_rate": 1.2579593372313579e-05, + "loss": 0.2935, + "step": 48360 + }, + { + "epoch": 2.26, + "learning_rate": 1.2578809587258791e-05, + "loss": 0.3687, + "step": 48365 + }, + { + "epoch": 2.26, + "learning_rate": 1.2578025802204004e-05, + "loss": 0.0169, + "step": 48370 + }, + { + "epoch": 2.26, + "learning_rate": 1.2577242017149217e-05, + "loss": 0.0272, + "step": 48375 + }, + { + "epoch": 2.26, + "learning_rate": 1.2576458232094431e-05, + "loss": 0.0452, + "step": 48380 + }, + { + "epoch": 2.26, + "learning_rate": 1.2575674447039645e-05, + "loss": 0.0606, + "step": 48385 + }, + { + "epoch": 2.26, + "learning_rate": 1.2574890661984857e-05, + "loss": 0.1621, + "step": 48390 + }, + { + "epoch": 2.26, + "learning_rate": 1.2574106876930073e-05, + "loss": 0.0805, + "step": 48395 + }, + { + "epoch": 2.26, + "learning_rate": 1.2573323091875285e-05, + "loss": 0.1328, + "step": 48400 + }, + { + "epoch": 2.26, + "learning_rate": 1.25725393068205e-05, + "loss": 0.1362, + "step": 48405 + }, + { + "epoch": 2.26, + "learning_rate": 1.2571755521765711e-05, + "loss": 0.1753, + "step": 48410 + }, + { + "epoch": 2.26, + "learning_rate": 1.2570971736710927e-05, + "loss": 0.2823, + "step": 48415 + }, + { + "epoch": 2.26, + "learning_rate": 1.257018795165614e-05, + "loss": 0.0633, + "step": 48420 + }, + { + "epoch": 2.26, + "learning_rate": 1.2569404166601353e-05, + "loss": 0.0347, + "step": 48425 + }, + { + "epoch": 2.26, + "learning_rate": 1.2568620381546565e-05, + "loss": 0.0578, + "step": 48430 + }, + { + "epoch": 2.26, + "learning_rate": 1.2567836596491778e-05, + "loss": 0.0631, + "step": 48435 + }, + { + "epoch": 2.26, + "learning_rate": 1.2567052811436993e-05, + "loss": 0.0828, + "step": 48440 + }, + { + "epoch": 2.26, + "learning_rate": 1.2566269026382205e-05, + "loss": 0.1166, + "step": 48445 + }, + { + "epoch": 2.26, + "learning_rate": 1.256548524132742e-05, + "loss": 0.1153, + "step": 48450 + }, + { + "epoch": 2.26, + "learning_rate": 1.2564701456272631e-05, + "loss": 0.1634, + "step": 48455 + }, + { + "epoch": 2.26, + "learning_rate": 1.2563917671217847e-05, + "loss": 0.2301, + "step": 48460 + }, + { + "epoch": 2.26, + "learning_rate": 1.256313388616306e-05, + "loss": 0.2951, + "step": 48465 + }, + { + "epoch": 2.26, + "learning_rate": 1.2562350101108273e-05, + "loss": 0.046, + "step": 48470 + }, + { + "epoch": 2.26, + "learning_rate": 1.2561566316053485e-05, + "loss": 0.0414, + "step": 48475 + }, + { + "epoch": 2.26, + "learning_rate": 1.2560782530998701e-05, + "loss": 0.0748, + "step": 48480 + }, + { + "epoch": 2.26, + "learning_rate": 1.2559998745943913e-05, + "loss": 0.0706, + "step": 48485 + }, + { + "epoch": 2.26, + "learning_rate": 1.2559214960889127e-05, + "loss": 0.1279, + "step": 48490 + }, + { + "epoch": 2.26, + "learning_rate": 1.2558431175834341e-05, + "loss": 0.0743, + "step": 48495 + }, + { + "epoch": 2.26, + "learning_rate": 1.2557647390779553e-05, + "loss": 0.1205, + "step": 48500 + }, + { + "epoch": 2.26, + "learning_rate": 1.2556863605724767e-05, + "loss": 0.1911, + "step": 48505 + }, + { + "epoch": 2.26, + "learning_rate": 1.255607982066998e-05, + "loss": 0.1648, + "step": 48510 + }, + { + "epoch": 2.26, + "learning_rate": 1.2555296035615195e-05, + "loss": 0.2687, + "step": 48515 + }, + { + "epoch": 2.26, + "learning_rate": 1.2554512250560407e-05, + "loss": 0.0847, + "step": 48520 + }, + { + "epoch": 2.26, + "learning_rate": 1.2553728465505621e-05, + "loss": 0.1152, + "step": 48525 + }, + { + "epoch": 2.26, + "learning_rate": 1.2552944680450833e-05, + "loss": 0.0977, + "step": 48530 + }, + { + "epoch": 2.26, + "learning_rate": 1.2552160895396049e-05, + "loss": 0.0585, + "step": 48535 + }, + { + "epoch": 2.26, + "learning_rate": 1.2551377110341261e-05, + "loss": 0.1386, + "step": 48540 + }, + { + "epoch": 2.27, + "learning_rate": 1.2550593325286475e-05, + "loss": 0.1517, + "step": 48545 + }, + { + "epoch": 2.27, + "learning_rate": 1.2549809540231687e-05, + "loss": 0.0852, + "step": 48550 + }, + { + "epoch": 2.27, + "learning_rate": 1.2549025755176903e-05, + "loss": 0.1604, + "step": 48555 + }, + { + "epoch": 2.27, + "learning_rate": 1.2548241970122115e-05, + "loss": 0.3075, + "step": 48560 + }, + { + "epoch": 2.27, + "learning_rate": 1.2547458185067327e-05, + "loss": 0.3901, + "step": 48565 + }, + { + "epoch": 2.27, + "learning_rate": 1.2546674400012541e-05, + "loss": 0.0896, + "step": 48570 + }, + { + "epoch": 2.27, + "learning_rate": 1.2545890614957753e-05, + "loss": 0.0526, + "step": 48575 + }, + { + "epoch": 2.27, + "learning_rate": 1.2545106829902969e-05, + "loss": 0.0586, + "step": 48580 + }, + { + "epoch": 2.27, + "learning_rate": 1.2544323044848181e-05, + "loss": 0.0928, + "step": 48585 + }, + { + "epoch": 2.27, + "learning_rate": 1.2543539259793395e-05, + "loss": 0.1047, + "step": 48590 + }, + { + "epoch": 2.27, + "learning_rate": 1.2542755474738609e-05, + "loss": 0.144, + "step": 48595 + }, + { + "epoch": 2.27, + "learning_rate": 1.2541971689683823e-05, + "loss": 0.1413, + "step": 48600 + }, + { + "epoch": 2.27, + "learning_rate": 1.2541187904629035e-05, + "loss": 0.1345, + "step": 48605 + }, + { + "epoch": 2.27, + "learning_rate": 1.254040411957425e-05, + "loss": 0.2603, + "step": 48610 + }, + { + "epoch": 2.27, + "learning_rate": 1.2539620334519463e-05, + "loss": 0.3291, + "step": 48615 + }, + { + "epoch": 2.27, + "learning_rate": 1.2538836549464677e-05, + "loss": 0.0751, + "step": 48620 + }, + { + "epoch": 2.27, + "learning_rate": 1.2538052764409889e-05, + "loss": 0.0066, + "step": 48625 + }, + { + "epoch": 2.27, + "learning_rate": 1.2537268979355101e-05, + "loss": 0.076, + "step": 48630 + }, + { + "epoch": 2.27, + "learning_rate": 1.2536485194300317e-05, + "loss": 0.131, + "step": 48635 + }, + { + "epoch": 2.27, + "learning_rate": 1.2535701409245529e-05, + "loss": 0.0862, + "step": 48640 + }, + { + "epoch": 2.27, + "learning_rate": 1.2534917624190743e-05, + "loss": 0.0589, + "step": 48645 + }, + { + "epoch": 2.27, + "learning_rate": 1.2534133839135955e-05, + "loss": 0.1927, + "step": 48650 + }, + { + "epoch": 2.27, + "learning_rate": 1.253335005408117e-05, + "loss": 0.103, + "step": 48655 + }, + { + "epoch": 2.27, + "learning_rate": 1.2532566269026383e-05, + "loss": 0.305, + "step": 48660 + }, + { + "epoch": 2.27, + "learning_rate": 1.2531782483971597e-05, + "loss": 0.3637, + "step": 48665 + }, + { + "epoch": 2.27, + "learning_rate": 1.2530998698916809e-05, + "loss": 0.0596, + "step": 48670 + }, + { + "epoch": 2.27, + "learning_rate": 1.2530214913862025e-05, + "loss": 0.0414, + "step": 48675 + }, + { + "epoch": 2.27, + "learning_rate": 1.2529431128807237e-05, + "loss": 0.0615, + "step": 48680 + }, + { + "epoch": 2.27, + "learning_rate": 1.252864734375245e-05, + "loss": 0.1236, + "step": 48685 + }, + { + "epoch": 2.27, + "learning_rate": 1.2527863558697663e-05, + "loss": 0.1301, + "step": 48690 + }, + { + "epoch": 2.27, + "learning_rate": 1.2527079773642877e-05, + "loss": 0.0993, + "step": 48695 + }, + { + "epoch": 2.27, + "learning_rate": 1.252629598858809e-05, + "loss": 0.1616, + "step": 48700 + }, + { + "epoch": 2.27, + "learning_rate": 1.2525512203533303e-05, + "loss": 0.1623, + "step": 48705 + }, + { + "epoch": 2.27, + "learning_rate": 1.2524728418478519e-05, + "loss": 0.3241, + "step": 48710 + }, + { + "epoch": 2.27, + "learning_rate": 1.2523944633423731e-05, + "loss": 0.2883, + "step": 48715 + }, + { + "epoch": 2.27, + "learning_rate": 1.2523160848368945e-05, + "loss": 0.1131, + "step": 48720 + }, + { + "epoch": 2.27, + "learning_rate": 1.2522377063314157e-05, + "loss": 0.0587, + "step": 48725 + }, + { + "epoch": 2.27, + "learning_rate": 1.2521593278259373e-05, + "loss": 0.0835, + "step": 48730 + }, + { + "epoch": 2.27, + "learning_rate": 1.2520809493204585e-05, + "loss": 0.0798, + "step": 48735 + }, + { + "epoch": 2.27, + "learning_rate": 1.2520025708149799e-05, + "loss": 0.112, + "step": 48740 + }, + { + "epoch": 2.27, + "learning_rate": 1.2519241923095011e-05, + "loss": 0.0651, + "step": 48745 + }, + { + "epoch": 2.27, + "learning_rate": 1.2518458138040227e-05, + "loss": 0.248, + "step": 48750 + }, + { + "epoch": 2.27, + "learning_rate": 1.2517674352985439e-05, + "loss": 0.2368, + "step": 48755 + }, + { + "epoch": 2.28, + "learning_rate": 1.2516890567930651e-05, + "loss": 0.1879, + "step": 48760 + }, + { + "epoch": 2.28, + "learning_rate": 1.2516106782875865e-05, + "loss": 0.267, + "step": 48765 + }, + { + "epoch": 2.28, + "learning_rate": 1.2515322997821077e-05, + "loss": 0.0323, + "step": 48770 + }, + { + "epoch": 2.28, + "learning_rate": 1.2514539212766293e-05, + "loss": 0.0736, + "step": 48775 + }, + { + "epoch": 2.28, + "learning_rate": 1.2513755427711505e-05, + "loss": 0.0625, + "step": 48780 + }, + { + "epoch": 2.28, + "learning_rate": 1.2512971642656719e-05, + "loss": 0.089, + "step": 48785 + }, + { + "epoch": 2.28, + "learning_rate": 1.2512187857601931e-05, + "loss": 0.0695, + "step": 48790 + }, + { + "epoch": 2.28, + "learning_rate": 1.2511404072547147e-05, + "loss": 0.1172, + "step": 48795 + }, + { + "epoch": 2.28, + "learning_rate": 1.2510620287492359e-05, + "loss": 0.2187, + "step": 48800 + }, + { + "epoch": 2.28, + "learning_rate": 1.2509836502437573e-05, + "loss": 0.1755, + "step": 48805 + }, + { + "epoch": 2.28, + "learning_rate": 1.2509052717382787e-05, + "loss": 0.3494, + "step": 48810 + }, + { + "epoch": 2.28, + "learning_rate": 1.2508268932328e-05, + "loss": 0.3253, + "step": 48815 + }, + { + "epoch": 2.28, + "learning_rate": 1.2507485147273213e-05, + "loss": 0.1197, + "step": 48820 + }, + { + "epoch": 2.28, + "learning_rate": 1.2506701362218425e-05, + "loss": 0.0672, + "step": 48825 + }, + { + "epoch": 2.28, + "learning_rate": 1.250591757716364e-05, + "loss": 0.0997, + "step": 48830 + }, + { + "epoch": 2.28, + "learning_rate": 1.2505133792108853e-05, + "loss": 0.0682, + "step": 48835 + }, + { + "epoch": 2.28, + "learning_rate": 1.2504350007054067e-05, + "loss": 0.0378, + "step": 48840 + }, + { + "epoch": 2.28, + "learning_rate": 1.2503566221999279e-05, + "loss": 0.0569, + "step": 48845 + }, + { + "epoch": 2.28, + "learning_rate": 1.2502782436944494e-05, + "loss": 0.0817, + "step": 48850 + }, + { + "epoch": 2.28, + "learning_rate": 1.2501998651889707e-05, + "loss": 0.1632, + "step": 48855 + }, + { + "epoch": 2.28, + "learning_rate": 1.250121486683492e-05, + "loss": 0.2643, + "step": 48860 + }, + { + "epoch": 2.28, + "learning_rate": 1.2500431081780133e-05, + "loss": 0.419, + "step": 48865 + }, + { + "epoch": 2.28, + "learning_rate": 1.2499647296725348e-05, + "loss": 0.0458, + "step": 48870 + }, + { + "epoch": 2.28, + "learning_rate": 1.249886351167056e-05, + "loss": 0.0664, + "step": 48875 + }, + { + "epoch": 2.28, + "learning_rate": 1.2498079726615774e-05, + "loss": 0.0878, + "step": 48880 + }, + { + "epoch": 2.28, + "learning_rate": 1.2497295941560987e-05, + "loss": 0.069, + "step": 48885 + }, + { + "epoch": 2.28, + "learning_rate": 1.2496512156506199e-05, + "loss": 0.1751, + "step": 48890 + }, + { + "epoch": 2.28, + "learning_rate": 1.2495728371451415e-05, + "loss": 0.1207, + "step": 48895 + }, + { + "epoch": 2.28, + "learning_rate": 1.2494944586396627e-05, + "loss": 0.1929, + "step": 48900 + }, + { + "epoch": 2.28, + "learning_rate": 1.249416080134184e-05, + "loss": 0.2412, + "step": 48905 + }, + { + "epoch": 2.28, + "learning_rate": 1.2493377016287055e-05, + "loss": 0.2499, + "step": 48910 + }, + { + "epoch": 2.28, + "learning_rate": 1.2492593231232268e-05, + "loss": 0.3758, + "step": 48915 + }, + { + "epoch": 2.28, + "learning_rate": 1.249180944617748e-05, + "loss": 0.0861, + "step": 48920 + }, + { + "epoch": 2.28, + "learning_rate": 1.2491025661122696e-05, + "loss": 0.051, + "step": 48925 + }, + { + "epoch": 2.28, + "learning_rate": 1.2490241876067908e-05, + "loss": 0.0319, + "step": 48930 + }, + { + "epoch": 2.28, + "learning_rate": 1.2489458091013122e-05, + "loss": 0.0455, + "step": 48935 + }, + { + "epoch": 2.28, + "learning_rate": 1.2488674305958335e-05, + "loss": 0.0583, + "step": 48940 + }, + { + "epoch": 2.28, + "learning_rate": 1.248789052090355e-05, + "loss": 0.0818, + "step": 48945 + }, + { + "epoch": 2.28, + "learning_rate": 1.2487106735848762e-05, + "loss": 0.1859, + "step": 48950 + }, + { + "epoch": 2.28, + "learning_rate": 1.2486322950793975e-05, + "loss": 0.1899, + "step": 48955 + }, + { + "epoch": 2.28, + "learning_rate": 1.2485539165739189e-05, + "loss": 0.4047, + "step": 48960 + }, + { + "epoch": 2.28, + "learning_rate": 1.24847553806844e-05, + "loss": 0.3151, + "step": 48965 + }, + { + "epoch": 2.29, + "learning_rate": 1.2483971595629616e-05, + "loss": 0.0545, + "step": 48970 + }, + { + "epoch": 2.29, + "learning_rate": 1.2483187810574829e-05, + "loss": 0.0155, + "step": 48975 + }, + { + "epoch": 2.29, + "learning_rate": 1.2482404025520042e-05, + "loss": 0.0754, + "step": 48980 + }, + { + "epoch": 2.29, + "learning_rate": 1.2481620240465255e-05, + "loss": 0.039, + "step": 48985 + }, + { + "epoch": 2.29, + "learning_rate": 1.248083645541047e-05, + "loss": 0.08, + "step": 48990 + }, + { + "epoch": 2.29, + "learning_rate": 1.2480052670355682e-05, + "loss": 0.1233, + "step": 48995 + }, + { + "epoch": 2.29, + "learning_rate": 1.2479268885300896e-05, + "loss": 0.1165, + "step": 49000 + }, + { + "epoch": 2.29, + "learning_rate": 1.2478485100246109e-05, + "loss": 0.1813, + "step": 49005 + }, + { + "epoch": 2.29, + "learning_rate": 1.2477701315191324e-05, + "loss": 0.2639, + "step": 49010 + }, + { + "epoch": 2.29, + "learning_rate": 1.2476917530136536e-05, + "loss": 0.3061, + "step": 49015 + }, + { + "epoch": 2.29, + "learning_rate": 1.2476133745081749e-05, + "loss": 0.0448, + "step": 49020 + }, + { + "epoch": 2.29, + "learning_rate": 1.2475349960026964e-05, + "loss": 0.0651, + "step": 49025 + }, + { + "epoch": 2.29, + "learning_rate": 1.2474566174972176e-05, + "loss": 0.0556, + "step": 49030 + }, + { + "epoch": 2.29, + "learning_rate": 1.247378238991739e-05, + "loss": 0.0711, + "step": 49035 + }, + { + "epoch": 2.29, + "learning_rate": 1.2472998604862603e-05, + "loss": 0.0944, + "step": 49040 + }, + { + "epoch": 2.29, + "learning_rate": 1.2472214819807818e-05, + "loss": 0.1723, + "step": 49045 + }, + { + "epoch": 2.29, + "learning_rate": 1.247143103475303e-05, + "loss": 0.1437, + "step": 49050 + }, + { + "epoch": 2.29, + "learning_rate": 1.2470647249698244e-05, + "loss": 0.1611, + "step": 49055 + }, + { + "epoch": 2.29, + "learning_rate": 1.2469863464643456e-05, + "loss": 0.2474, + "step": 49060 + }, + { + "epoch": 2.29, + "learning_rate": 1.2469079679588672e-05, + "loss": 0.3262, + "step": 49065 + }, + { + "epoch": 2.29, + "learning_rate": 1.2468295894533884e-05, + "loss": 0.0379, + "step": 49070 + }, + { + "epoch": 2.29, + "learning_rate": 1.2467512109479098e-05, + "loss": 0.0395, + "step": 49075 + }, + { + "epoch": 2.29, + "learning_rate": 1.246672832442431e-05, + "loss": 0.0432, + "step": 49080 + }, + { + "epoch": 2.29, + "learning_rate": 1.2465944539369523e-05, + "loss": 0.075, + "step": 49085 + }, + { + "epoch": 2.29, + "learning_rate": 1.2465160754314738e-05, + "loss": 0.051, + "step": 49090 + }, + { + "epoch": 2.29, + "learning_rate": 1.246437696925995e-05, + "loss": 0.1434, + "step": 49095 + }, + { + "epoch": 2.29, + "learning_rate": 1.2463593184205164e-05, + "loss": 0.1986, + "step": 49100 + }, + { + "epoch": 2.29, + "learning_rate": 1.2462809399150377e-05, + "loss": 0.1596, + "step": 49105 + }, + { + "epoch": 2.29, + "learning_rate": 1.2462025614095592e-05, + "loss": 0.3813, + "step": 49110 + }, + { + "epoch": 2.29, + "learning_rate": 1.2461241829040804e-05, + "loss": 0.2625, + "step": 49115 + }, + { + "epoch": 2.29, + "learning_rate": 1.2460458043986018e-05, + "loss": 0.0344, + "step": 49120 + }, + { + "epoch": 2.29, + "learning_rate": 1.2459674258931232e-05, + "loss": 0.034, + "step": 49125 + }, + { + "epoch": 2.29, + "learning_rate": 1.2458890473876446e-05, + "loss": 0.0638, + "step": 49130 + }, + { + "epoch": 2.29, + "learning_rate": 1.2458106688821658e-05, + "loss": 0.045, + "step": 49135 + }, + { + "epoch": 2.29, + "learning_rate": 1.2457322903766872e-05, + "loss": 0.0743, + "step": 49140 + }, + { + "epoch": 2.29, + "learning_rate": 1.2456539118712086e-05, + "loss": 0.1166, + "step": 49145 + }, + { + "epoch": 2.29, + "learning_rate": 1.2455755333657298e-05, + "loss": 0.1078, + "step": 49150 + }, + { + "epoch": 2.29, + "learning_rate": 1.2454971548602512e-05, + "loss": 0.1802, + "step": 49155 + }, + { + "epoch": 2.29, + "learning_rate": 1.2454187763547724e-05, + "loss": 0.3078, + "step": 49160 + }, + { + "epoch": 2.29, + "learning_rate": 1.245340397849294e-05, + "loss": 0.3489, + "step": 49165 + }, + { + "epoch": 2.29, + "learning_rate": 1.2452620193438152e-05, + "loss": 0.0332, + "step": 49170 + }, + { + "epoch": 2.29, + "learning_rate": 1.2451836408383366e-05, + "loss": 0.0592, + "step": 49175 + }, + { + "epoch": 2.29, + "learning_rate": 1.2451052623328578e-05, + "loss": 0.0291, + "step": 49180 + }, + { + "epoch": 2.3, + "learning_rate": 1.2450268838273794e-05, + "loss": 0.0695, + "step": 49185 + }, + { + "epoch": 2.3, + "learning_rate": 1.2449485053219006e-05, + "loss": 0.081, + "step": 49190 + }, + { + "epoch": 2.3, + "learning_rate": 1.244870126816422e-05, + "loss": 0.1311, + "step": 49195 + }, + { + "epoch": 2.3, + "learning_rate": 1.2447917483109432e-05, + "loss": 0.1058, + "step": 49200 + }, + { + "epoch": 2.3, + "learning_rate": 1.2447133698054648e-05, + "loss": 0.1388, + "step": 49205 + }, + { + "epoch": 2.3, + "learning_rate": 1.244634991299986e-05, + "loss": 0.3073, + "step": 49210 + }, + { + "epoch": 2.3, + "learning_rate": 1.2445566127945072e-05, + "loss": 0.2426, + "step": 49215 + }, + { + "epoch": 2.3, + "learning_rate": 1.2444782342890286e-05, + "loss": 0.0957, + "step": 49220 + }, + { + "epoch": 2.3, + "learning_rate": 1.24439985578355e-05, + "loss": 0.055, + "step": 49225 + }, + { + "epoch": 2.3, + "learning_rate": 1.2443214772780714e-05, + "loss": 0.0305, + "step": 49230 + }, + { + "epoch": 2.3, + "learning_rate": 1.2442430987725926e-05, + "loss": 0.1408, + "step": 49235 + }, + { + "epoch": 2.3, + "learning_rate": 1.2441647202671142e-05, + "loss": 0.0908, + "step": 49240 + }, + { + "epoch": 2.3, + "learning_rate": 1.2440863417616354e-05, + "loss": 0.0978, + "step": 49245 + }, + { + "epoch": 2.3, + "learning_rate": 1.2440079632561568e-05, + "loss": 0.1482, + "step": 49250 + }, + { + "epoch": 2.3, + "learning_rate": 1.243929584750678e-05, + "loss": 0.2039, + "step": 49255 + }, + { + "epoch": 2.3, + "learning_rate": 1.2438512062451996e-05, + "loss": 0.3077, + "step": 49260 + }, + { + "epoch": 2.3, + "learning_rate": 1.2437728277397208e-05, + "loss": 0.1668, + "step": 49265 + }, + { + "epoch": 2.3, + "learning_rate": 1.2436944492342422e-05, + "loss": 0.0675, + "step": 49270 + }, + { + "epoch": 2.3, + "learning_rate": 1.2436160707287634e-05, + "loss": 0.0325, + "step": 49275 + }, + { + "epoch": 2.3, + "learning_rate": 1.2435376922232846e-05, + "loss": 0.0352, + "step": 49280 + }, + { + "epoch": 2.3, + "learning_rate": 1.2434593137178062e-05, + "loss": 0.0273, + "step": 49285 + }, + { + "epoch": 2.3, + "learning_rate": 1.2433809352123274e-05, + "loss": 0.1357, + "step": 49290 + }, + { + "epoch": 2.3, + "learning_rate": 1.2433025567068488e-05, + "loss": 0.1806, + "step": 49295 + }, + { + "epoch": 2.3, + "learning_rate": 1.24322417820137e-05, + "loss": 0.1102, + "step": 49300 + }, + { + "epoch": 2.3, + "learning_rate": 1.2431457996958916e-05, + "loss": 0.2882, + "step": 49305 + }, + { + "epoch": 2.3, + "learning_rate": 1.2430674211904128e-05, + "loss": 0.301, + "step": 49310 + }, + { + "epoch": 2.3, + "learning_rate": 1.2429890426849342e-05, + "loss": 0.2907, + "step": 49315 + }, + { + "epoch": 2.3, + "learning_rate": 1.2429106641794554e-05, + "loss": 0.0655, + "step": 49320 + }, + { + "epoch": 2.3, + "learning_rate": 1.242832285673977e-05, + "loss": 0.0572, + "step": 49325 + }, + { + "epoch": 2.3, + "learning_rate": 1.2427539071684982e-05, + "loss": 0.0607, + "step": 49330 + }, + { + "epoch": 2.3, + "learning_rate": 1.2426755286630196e-05, + "loss": 0.0779, + "step": 49335 + }, + { + "epoch": 2.3, + "learning_rate": 1.242597150157541e-05, + "loss": 0.0683, + "step": 49340 + }, + { + "epoch": 2.3, + "learning_rate": 1.2425187716520622e-05, + "loss": 0.1186, + "step": 49345 + }, + { + "epoch": 2.3, + "learning_rate": 1.2424403931465836e-05, + "loss": 0.1542, + "step": 49350 + }, + { + "epoch": 2.3, + "learning_rate": 1.2423620146411048e-05, + "loss": 0.11, + "step": 49355 + }, + { + "epoch": 2.3, + "learning_rate": 1.2422836361356264e-05, + "loss": 0.2608, + "step": 49360 + }, + { + "epoch": 2.3, + "learning_rate": 1.2422052576301476e-05, + "loss": 0.3212, + "step": 49365 + }, + { + "epoch": 2.3, + "learning_rate": 1.242126879124669e-05, + "loss": 0.031, + "step": 49370 + }, + { + "epoch": 2.3, + "learning_rate": 1.2420485006191902e-05, + "loss": 0.0297, + "step": 49375 + }, + { + "epoch": 2.3, + "learning_rate": 1.2419701221137118e-05, + "loss": 0.1169, + "step": 49380 + }, + { + "epoch": 2.3, + "learning_rate": 1.241891743608233e-05, + "loss": 0.0557, + "step": 49385 + }, + { + "epoch": 2.3, + "learning_rate": 1.2418133651027544e-05, + "loss": 0.1283, + "step": 49390 + }, + { + "epoch": 2.3, + "learning_rate": 1.2417349865972756e-05, + "loss": 0.114, + "step": 49395 + }, + { + "epoch": 2.31, + "learning_rate": 1.2416566080917972e-05, + "loss": 0.0548, + "step": 49400 + }, + { + "epoch": 2.31, + "learning_rate": 1.2415782295863184e-05, + "loss": 0.1379, + "step": 49405 + }, + { + "epoch": 2.31, + "learning_rate": 1.2414998510808396e-05, + "loss": 0.2058, + "step": 49410 + }, + { + "epoch": 2.31, + "learning_rate": 1.241421472575361e-05, + "loss": 0.2836, + "step": 49415 + }, + { + "epoch": 2.31, + "learning_rate": 1.2413430940698822e-05, + "loss": 0.0669, + "step": 49420 + }, + { + "epoch": 2.31, + "learning_rate": 1.2412647155644038e-05, + "loss": 0.0242, + "step": 49425 + }, + { + "epoch": 2.31, + "learning_rate": 1.241186337058925e-05, + "loss": 0.0977, + "step": 49430 + }, + { + "epoch": 2.31, + "learning_rate": 1.2411079585534464e-05, + "loss": 0.0507, + "step": 49435 + }, + { + "epoch": 2.31, + "learning_rate": 1.2410295800479678e-05, + "loss": 0.063, + "step": 49440 + }, + { + "epoch": 2.31, + "learning_rate": 1.2409512015424892e-05, + "loss": 0.1527, + "step": 49445 + }, + { + "epoch": 2.31, + "learning_rate": 1.2408728230370104e-05, + "loss": 0.0736, + "step": 49450 + }, + { + "epoch": 2.31, + "learning_rate": 1.240794444531532e-05, + "loss": 0.2219, + "step": 49455 + }, + { + "epoch": 2.31, + "learning_rate": 1.2407160660260532e-05, + "loss": 0.2611, + "step": 49460 + }, + { + "epoch": 2.31, + "learning_rate": 1.2406376875205746e-05, + "loss": 0.3693, + "step": 49465 + }, + { + "epoch": 2.31, + "learning_rate": 1.2405593090150958e-05, + "loss": 0.1247, + "step": 49470 + }, + { + "epoch": 2.31, + "learning_rate": 1.240480930509617e-05, + "loss": 0.0177, + "step": 49475 + }, + { + "epoch": 2.31, + "learning_rate": 1.2404025520041386e-05, + "loss": 0.0873, + "step": 49480 + }, + { + "epoch": 2.31, + "learning_rate": 1.2403241734986598e-05, + "loss": 0.0888, + "step": 49485 + }, + { + "epoch": 2.31, + "learning_rate": 1.2402457949931812e-05, + "loss": 0.0582, + "step": 49490 + }, + { + "epoch": 2.31, + "learning_rate": 1.2401674164877024e-05, + "loss": 0.1075, + "step": 49495 + }, + { + "epoch": 2.31, + "learning_rate": 1.240089037982224e-05, + "loss": 0.1859, + "step": 49500 + }, + { + "epoch": 2.31, + "learning_rate": 1.2400106594767452e-05, + "loss": 0.1537, + "step": 49505 + }, + { + "epoch": 2.31, + "learning_rate": 1.2399322809712666e-05, + "loss": 0.2297, + "step": 49510 + }, + { + "epoch": 2.31, + "learning_rate": 1.2398695781668836e-05, + "loss": 0.2882, + "step": 49515 + }, + { + "epoch": 2.31, + "learning_rate": 1.239791199661405e-05, + "loss": 0.0205, + "step": 49520 + }, + { + "epoch": 2.31, + "learning_rate": 1.2397128211559264e-05, + "loss": 0.0498, + "step": 49525 + }, + { + "epoch": 2.31, + "learning_rate": 1.2396344426504476e-05, + "loss": 0.0858, + "step": 49530 + }, + { + "epoch": 2.31, + "learning_rate": 1.2395560641449692e-05, + "loss": 0.1835, + "step": 49535 + }, + { + "epoch": 2.31, + "learning_rate": 1.2394776856394904e-05, + "loss": 0.1171, + "step": 49540 + }, + { + "epoch": 2.31, + "learning_rate": 1.2393993071340116e-05, + "loss": 0.0876, + "step": 49545 + }, + { + "epoch": 2.31, + "learning_rate": 1.239320928628533e-05, + "loss": 0.0962, + "step": 49550 + }, + { + "epoch": 2.31, + "learning_rate": 1.2392425501230542e-05, + "loss": 0.0861, + "step": 49555 + }, + { + "epoch": 2.31, + "learning_rate": 1.2391641716175758e-05, + "loss": 0.2315, + "step": 49560 + }, + { + "epoch": 2.31, + "learning_rate": 1.239085793112097e-05, + "loss": 0.278, + "step": 49565 + }, + { + "epoch": 2.31, + "learning_rate": 1.2390074146066184e-05, + "loss": 0.0506, + "step": 49570 + }, + { + "epoch": 2.31, + "learning_rate": 1.2389290361011396e-05, + "loss": 0.0443, + "step": 49575 + }, + { + "epoch": 2.31, + "learning_rate": 1.2388506575956612e-05, + "loss": 0.1289, + "step": 49580 + }, + { + "epoch": 2.31, + "learning_rate": 1.2387722790901824e-05, + "loss": 0.0433, + "step": 49585 + }, + { + "epoch": 2.31, + "learning_rate": 1.2386939005847038e-05, + "loss": 0.1379, + "step": 49590 + }, + { + "epoch": 2.31, + "learning_rate": 1.238615522079225e-05, + "loss": 0.108, + "step": 49595 + }, + { + "epoch": 2.31, + "learning_rate": 1.2385371435737466e-05, + "loss": 0.1276, + "step": 49600 + }, + { + "epoch": 2.31, + "learning_rate": 1.2384587650682678e-05, + "loss": 0.1717, + "step": 49605 + }, + { + "epoch": 2.31, + "learning_rate": 1.238380386562789e-05, + "loss": 0.2584, + "step": 49610 + }, + { + "epoch": 2.32, + "learning_rate": 1.2383020080573106e-05, + "loss": 0.2265, + "step": 49615 + }, + { + "epoch": 2.32, + "learning_rate": 1.2382236295518318e-05, + "loss": 0.1069, + "step": 49620 + }, + { + "epoch": 2.32, + "learning_rate": 1.2381452510463532e-05, + "loss": 0.0404, + "step": 49625 + }, + { + "epoch": 2.32, + "learning_rate": 1.2380668725408744e-05, + "loss": 0.0704, + "step": 49630 + }, + { + "epoch": 2.32, + "learning_rate": 1.237988494035396e-05, + "loss": 0.0985, + "step": 49635 + }, + { + "epoch": 2.32, + "learning_rate": 1.2379101155299172e-05, + "loss": 0.084, + "step": 49640 + }, + { + "epoch": 2.32, + "learning_rate": 1.2378317370244386e-05, + "loss": 0.2373, + "step": 49645 + }, + { + "epoch": 2.32, + "learning_rate": 1.2377533585189598e-05, + "loss": 0.0689, + "step": 49650 + }, + { + "epoch": 2.32, + "learning_rate": 1.2376749800134814e-05, + "loss": 0.1609, + "step": 49655 + }, + { + "epoch": 2.32, + "learning_rate": 1.2375966015080026e-05, + "loss": 0.2887, + "step": 49660 + }, + { + "epoch": 2.32, + "learning_rate": 1.237518223002524e-05, + "loss": 0.2715, + "step": 49665 + }, + { + "epoch": 2.32, + "learning_rate": 1.2374398444970452e-05, + "loss": 0.0749, + "step": 49670 + }, + { + "epoch": 2.32, + "learning_rate": 1.2373614659915664e-05, + "loss": 0.0717, + "step": 49675 + }, + { + "epoch": 2.32, + "learning_rate": 1.237283087486088e-05, + "loss": 0.0777, + "step": 49680 + }, + { + "epoch": 2.32, + "learning_rate": 1.2372047089806092e-05, + "loss": 0.0669, + "step": 49685 + }, + { + "epoch": 2.32, + "learning_rate": 1.2371263304751306e-05, + "loss": 0.0836, + "step": 49690 + }, + { + "epoch": 2.32, + "learning_rate": 1.2370479519696518e-05, + "loss": 0.2553, + "step": 49695 + }, + { + "epoch": 2.32, + "learning_rate": 1.2369695734641734e-05, + "loss": 0.1672, + "step": 49700 + }, + { + "epoch": 2.32, + "learning_rate": 1.2368911949586946e-05, + "loss": 0.1128, + "step": 49705 + }, + { + "epoch": 2.32, + "learning_rate": 1.236812816453216e-05, + "loss": 0.2423, + "step": 49710 + }, + { + "epoch": 2.32, + "learning_rate": 1.2367344379477374e-05, + "loss": 0.3124, + "step": 49715 + }, + { + "epoch": 2.32, + "learning_rate": 1.2366560594422588e-05, + "loss": 0.0414, + "step": 49720 + }, + { + "epoch": 2.32, + "learning_rate": 1.23657768093678e-05, + "loss": 0.0981, + "step": 49725 + }, + { + "epoch": 2.32, + "learning_rate": 1.2364993024313014e-05, + "loss": 0.1131, + "step": 49730 + }, + { + "epoch": 2.32, + "learning_rate": 1.2364209239258228e-05, + "loss": 0.0668, + "step": 49735 + }, + { + "epoch": 2.32, + "learning_rate": 1.236342545420344e-05, + "loss": 0.1405, + "step": 49740 + }, + { + "epoch": 2.32, + "learning_rate": 1.2362641669148654e-05, + "loss": 0.0965, + "step": 49745 + }, + { + "epoch": 2.32, + "learning_rate": 1.2361857884093866e-05, + "loss": 0.1463, + "step": 49750 + }, + { + "epoch": 2.32, + "learning_rate": 1.2361074099039082e-05, + "loss": 0.2175, + "step": 49755 + }, + { + "epoch": 2.32, + "learning_rate": 1.2360290313984294e-05, + "loss": 0.1888, + "step": 49760 + }, + { + "epoch": 2.32, + "learning_rate": 1.2359506528929508e-05, + "loss": 0.3132, + "step": 49765 + }, + { + "epoch": 2.32, + "learning_rate": 1.235872274387472e-05, + "loss": 0.1664, + "step": 49770 + }, + { + "epoch": 2.32, + "learning_rate": 1.2357938958819935e-05, + "loss": 0.0261, + "step": 49775 + }, + { + "epoch": 2.32, + "learning_rate": 1.2357155173765148e-05, + "loss": 0.015, + "step": 49780 + }, + { + "epoch": 2.32, + "learning_rate": 1.2356371388710362e-05, + "loss": 0.0923, + "step": 49785 + }, + { + "epoch": 2.32, + "learning_rate": 1.2355587603655574e-05, + "loss": 0.0871, + "step": 49790 + }, + { + "epoch": 2.32, + "learning_rate": 1.235480381860079e-05, + "loss": 0.1367, + "step": 49795 + }, + { + "epoch": 2.32, + "learning_rate": 1.2354020033546002e-05, + "loss": 0.1528, + "step": 49800 + }, + { + "epoch": 2.32, + "learning_rate": 1.2353236248491214e-05, + "loss": 0.1766, + "step": 49805 + }, + { + "epoch": 2.32, + "learning_rate": 1.2352452463436428e-05, + "loss": 0.2554, + "step": 49810 + }, + { + "epoch": 2.32, + "learning_rate": 1.2351668678381642e-05, + "loss": 0.3166, + "step": 49815 + }, + { + "epoch": 2.32, + "learning_rate": 1.2350884893326856e-05, + "loss": 0.0502, + "step": 49820 + }, + { + "epoch": 2.32, + "learning_rate": 1.2350101108272068e-05, + "loss": 0.0437, + "step": 49825 + }, + { + "epoch": 2.33, + "learning_rate": 1.2349317323217282e-05, + "loss": 0.0936, + "step": 49830 + }, + { + "epoch": 2.33, + "learning_rate": 1.2348533538162496e-05, + "loss": 0.0694, + "step": 49835 + }, + { + "epoch": 2.33, + "learning_rate": 1.234774975310771e-05, + "loss": 0.0366, + "step": 49840 + }, + { + "epoch": 2.33, + "learning_rate": 1.2346965968052922e-05, + "loss": 0.0825, + "step": 49845 + }, + { + "epoch": 2.33, + "learning_rate": 1.2346182182998137e-05, + "loss": 0.1329, + "step": 49850 + }, + { + "epoch": 2.33, + "learning_rate": 1.234539839794335e-05, + "loss": 0.0959, + "step": 49855 + }, + { + "epoch": 2.33, + "learning_rate": 1.2344614612888563e-05, + "loss": 0.1717, + "step": 49860 + }, + { + "epoch": 2.33, + "learning_rate": 1.2343830827833776e-05, + "loss": 0.2998, + "step": 49865 + }, + { + "epoch": 2.33, + "learning_rate": 1.2343047042778988e-05, + "loss": 0.123, + "step": 49870 + }, + { + "epoch": 2.33, + "learning_rate": 1.2342263257724203e-05, + "loss": 0.0147, + "step": 49875 + }, + { + "epoch": 2.33, + "learning_rate": 1.2341479472669416e-05, + "loss": 0.0361, + "step": 49880 + }, + { + "epoch": 2.33, + "learning_rate": 1.234069568761463e-05, + "loss": 0.089, + "step": 49885 + }, + { + "epoch": 2.33, + "learning_rate": 1.2339911902559842e-05, + "loss": 0.057, + "step": 49890 + }, + { + "epoch": 2.33, + "learning_rate": 1.2339128117505057e-05, + "loss": 0.066, + "step": 49895 + }, + { + "epoch": 2.33, + "learning_rate": 1.233834433245027e-05, + "loss": 0.2305, + "step": 49900 + }, + { + "epoch": 2.33, + "learning_rate": 1.2337560547395483e-05, + "loss": 0.2467, + "step": 49905 + }, + { + "epoch": 2.33, + "learning_rate": 1.2336776762340696e-05, + "loss": 0.226, + "step": 49910 + }, + { + "epoch": 2.33, + "learning_rate": 1.2335992977285911e-05, + "loss": 0.2258, + "step": 49915 + }, + { + "epoch": 2.33, + "learning_rate": 1.2335209192231123e-05, + "loss": 0.0611, + "step": 49920 + }, + { + "epoch": 2.33, + "learning_rate": 1.2334425407176337e-05, + "loss": 0.0755, + "step": 49925 + }, + { + "epoch": 2.33, + "learning_rate": 1.2333641622121551e-05, + "loss": 0.0437, + "step": 49930 + }, + { + "epoch": 2.33, + "learning_rate": 1.2332857837066764e-05, + "loss": 0.1033, + "step": 49935 + }, + { + "epoch": 2.33, + "learning_rate": 1.2332074052011977e-05, + "loss": 0.0961, + "step": 49940 + }, + { + "epoch": 2.33, + "learning_rate": 1.233129026695719e-05, + "loss": 0.0845, + "step": 49945 + }, + { + "epoch": 2.33, + "learning_rate": 1.2330506481902405e-05, + "loss": 0.1404, + "step": 49950 + }, + { + "epoch": 2.33, + "learning_rate": 1.2329722696847617e-05, + "loss": 0.2093, + "step": 49955 + }, + { + "epoch": 2.33, + "learning_rate": 1.2328938911792831e-05, + "loss": 0.1984, + "step": 49960 + }, + { + "epoch": 2.33, + "learning_rate": 1.2328155126738044e-05, + "loss": 0.2746, + "step": 49965 + }, + { + "epoch": 2.33, + "learning_rate": 1.232737134168326e-05, + "loss": 0.0631, + "step": 49970 + }, + { + "epoch": 2.33, + "learning_rate": 1.2326587556628471e-05, + "loss": 0.0268, + "step": 49975 + }, + { + "epoch": 2.33, + "learning_rate": 1.2325803771573685e-05, + "loss": 0.0344, + "step": 49980 + }, + { + "epoch": 2.33, + "learning_rate": 1.2325019986518897e-05, + "loss": 0.0861, + "step": 49985 + }, + { + "epoch": 2.33, + "learning_rate": 1.2324236201464113e-05, + "loss": 0.1821, + "step": 49990 + }, + { + "epoch": 2.33, + "learning_rate": 1.2323452416409325e-05, + "loss": 0.1076, + "step": 49995 + }, + { + "epoch": 2.33, + "learning_rate": 1.2322668631354538e-05, + "loss": 0.1184, + "step": 50000 + }, + { + "epoch": 2.33, + "learning_rate": 1.2321884846299751e-05, + "loss": 0.0798, + "step": 50005 + }, + { + "epoch": 2.33, + "learning_rate": 1.2321101061244964e-05, + "loss": 0.2843, + "step": 50010 + }, + { + "epoch": 2.33, + "learning_rate": 1.232031727619018e-05, + "loss": 0.1617, + "step": 50015 + }, + { + "epoch": 2.33, + "learning_rate": 1.2319533491135391e-05, + "loss": 0.0382, + "step": 50020 + }, + { + "epoch": 2.33, + "learning_rate": 1.2318749706080605e-05, + "loss": 0.0547, + "step": 50025 + }, + { + "epoch": 2.33, + "learning_rate": 1.231796592102582e-05, + "loss": 0.0349, + "step": 50030 + }, + { + "epoch": 2.33, + "learning_rate": 1.2317182135971033e-05, + "loss": 0.0498, + "step": 50035 + }, + { + "epoch": 2.33, + "learning_rate": 1.2316398350916245e-05, + "loss": 0.0729, + "step": 50040 + }, + { + "epoch": 2.34, + "learning_rate": 1.231561456586146e-05, + "loss": 0.1601, + "step": 50045 + }, + { + "epoch": 2.34, + "learning_rate": 1.2314830780806673e-05, + "loss": 0.1798, + "step": 50050 + }, + { + "epoch": 2.34, + "learning_rate": 1.2314046995751887e-05, + "loss": 0.1981, + "step": 50055 + }, + { + "epoch": 2.34, + "learning_rate": 1.23132632106971e-05, + "loss": 0.3657, + "step": 50060 + }, + { + "epoch": 2.34, + "learning_rate": 1.2312479425642312e-05, + "loss": 0.3816, + "step": 50065 + }, + { + "epoch": 2.34, + "learning_rate": 1.2311695640587527e-05, + "loss": 0.0421, + "step": 50070 + }, + { + "epoch": 2.34, + "learning_rate": 1.231091185553274e-05, + "loss": 0.0836, + "step": 50075 + }, + { + "epoch": 2.34, + "learning_rate": 1.2310128070477953e-05, + "loss": 0.0849, + "step": 50080 + }, + { + "epoch": 2.34, + "learning_rate": 1.2309344285423165e-05, + "loss": 0.1037, + "step": 50085 + }, + { + "epoch": 2.34, + "learning_rate": 1.2308560500368381e-05, + "loss": 0.1266, + "step": 50090 + }, + { + "epoch": 2.34, + "learning_rate": 1.2307776715313593e-05, + "loss": 0.1167, + "step": 50095 + }, + { + "epoch": 2.34, + "learning_rate": 1.2306992930258807e-05, + "loss": 0.1929, + "step": 50100 + }, + { + "epoch": 2.34, + "learning_rate": 1.230620914520402e-05, + "loss": 0.2002, + "step": 50105 + }, + { + "epoch": 2.34, + "learning_rate": 1.2305425360149235e-05, + "loss": 0.3381, + "step": 50110 + }, + { + "epoch": 2.34, + "learning_rate": 1.2304641575094447e-05, + "loss": 0.3687, + "step": 50115 + }, + { + "epoch": 2.34, + "learning_rate": 1.2303857790039661e-05, + "loss": 0.0699, + "step": 50120 + }, + { + "epoch": 2.34, + "learning_rate": 1.2303074004984873e-05, + "loss": 0.0607, + "step": 50125 + }, + { + "epoch": 2.34, + "learning_rate": 1.2302290219930087e-05, + "loss": 0.0523, + "step": 50130 + }, + { + "epoch": 2.34, + "learning_rate": 1.2301506434875301e-05, + "loss": 0.0631, + "step": 50135 + }, + { + "epoch": 2.34, + "learning_rate": 1.2300722649820513e-05, + "loss": 0.1482, + "step": 50140 + }, + { + "epoch": 2.34, + "learning_rate": 1.2299938864765727e-05, + "loss": 0.1021, + "step": 50145 + }, + { + "epoch": 2.34, + "learning_rate": 1.2299155079710941e-05, + "loss": 0.1413, + "step": 50150 + }, + { + "epoch": 2.34, + "learning_rate": 1.2298371294656155e-05, + "loss": 0.1853, + "step": 50155 + }, + { + "epoch": 2.34, + "learning_rate": 1.2297587509601367e-05, + "loss": 0.2019, + "step": 50160 + }, + { + "epoch": 2.34, + "learning_rate": 1.2296803724546583e-05, + "loss": 0.3643, + "step": 50165 + }, + { + "epoch": 2.34, + "learning_rate": 1.2296019939491795e-05, + "loss": 0.0704, + "step": 50170 + }, + { + "epoch": 2.34, + "learning_rate": 1.2295236154437009e-05, + "loss": 0.0761, + "step": 50175 + }, + { + "epoch": 2.34, + "learning_rate": 1.2294452369382221e-05, + "loss": 0.0657, + "step": 50180 + }, + { + "epoch": 2.34, + "learning_rate": 1.2293668584327437e-05, + "loss": 0.0956, + "step": 50185 + }, + { + "epoch": 2.34, + "learning_rate": 1.2292884799272649e-05, + "loss": 0.057, + "step": 50190 + }, + { + "epoch": 2.34, + "learning_rate": 1.2292101014217861e-05, + "loss": 0.0977, + "step": 50195 + }, + { + "epoch": 2.34, + "learning_rate": 1.2291317229163075e-05, + "loss": 0.1418, + "step": 50200 + }, + { + "epoch": 2.34, + "learning_rate": 1.2290533444108287e-05, + "loss": 0.2145, + "step": 50205 + }, + { + "epoch": 2.34, + "learning_rate": 1.2289749659053503e-05, + "loss": 0.1492, + "step": 50210 + }, + { + "epoch": 2.34, + "learning_rate": 1.2288965873998715e-05, + "loss": 0.3733, + "step": 50215 + }, + { + "epoch": 2.34, + "learning_rate": 1.2288182088943929e-05, + "loss": 0.0474, + "step": 50220 + }, + { + "epoch": 2.34, + "learning_rate": 1.2287398303889141e-05, + "loss": 0.0178, + "step": 50225 + }, + { + "epoch": 2.34, + "learning_rate": 1.2286614518834357e-05, + "loss": 0.1194, + "step": 50230 + }, + { + "epoch": 2.34, + "learning_rate": 1.2285830733779569e-05, + "loss": 0.0799, + "step": 50235 + }, + { + "epoch": 2.34, + "learning_rate": 1.2285046948724783e-05, + "loss": 0.1206, + "step": 50240 + }, + { + "epoch": 2.34, + "learning_rate": 1.2284263163669997e-05, + "loss": 0.1841, + "step": 50245 + }, + { + "epoch": 2.34, + "learning_rate": 1.228347937861521e-05, + "loss": 0.1276, + "step": 50250 + }, + { + "epoch": 2.34, + "learning_rate": 1.2282695593560423e-05, + "loss": 0.2027, + "step": 50255 + }, + { + "epoch": 2.35, + "learning_rate": 1.2281911808505635e-05, + "loss": 0.1799, + "step": 50260 + }, + { + "epoch": 2.35, + "learning_rate": 1.228112802345085e-05, + "loss": 0.375, + "step": 50265 + }, + { + "epoch": 2.35, + "learning_rate": 1.2280344238396063e-05, + "loss": 0.0396, + "step": 50270 + }, + { + "epoch": 2.35, + "learning_rate": 1.2279560453341277e-05, + "loss": 0.0545, + "step": 50275 + }, + { + "epoch": 2.35, + "learning_rate": 1.2278776668286489e-05, + "loss": 0.0606, + "step": 50280 + }, + { + "epoch": 2.35, + "learning_rate": 1.2277992883231705e-05, + "loss": 0.0914, + "step": 50285 + }, + { + "epoch": 2.35, + "learning_rate": 1.2277209098176917e-05, + "loss": 0.0412, + "step": 50290 + }, + { + "epoch": 2.35, + "learning_rate": 1.2276425313122131e-05, + "loss": 0.1434, + "step": 50295 + }, + { + "epoch": 2.35, + "learning_rate": 1.2275641528067343e-05, + "loss": 0.2036, + "step": 50300 + }, + { + "epoch": 2.35, + "learning_rate": 1.2274857743012559e-05, + "loss": 0.219, + "step": 50305 + }, + { + "epoch": 2.35, + "learning_rate": 1.2274073957957771e-05, + "loss": 0.263, + "step": 50310 + }, + { + "epoch": 2.35, + "learning_rate": 1.2273290172902985e-05, + "loss": 0.2972, + "step": 50315 + }, + { + "epoch": 2.35, + "learning_rate": 1.2272506387848197e-05, + "loss": 0.0396, + "step": 50320 + }, + { + "epoch": 2.35, + "learning_rate": 1.227172260279341e-05, + "loss": 0.0763, + "step": 50325 + }, + { + "epoch": 2.35, + "learning_rate": 1.2270938817738625e-05, + "loss": 0.0389, + "step": 50330 + }, + { + "epoch": 2.35, + "learning_rate": 1.2270155032683837e-05, + "loss": 0.0813, + "step": 50335 + }, + { + "epoch": 2.35, + "learning_rate": 1.2269371247629051e-05, + "loss": 0.1306, + "step": 50340 + }, + { + "epoch": 2.35, + "learning_rate": 1.2268587462574265e-05, + "loss": 0.0779, + "step": 50345 + }, + { + "epoch": 2.35, + "learning_rate": 1.2267803677519479e-05, + "loss": 0.187, + "step": 50350 + }, + { + "epoch": 2.35, + "learning_rate": 1.2267019892464691e-05, + "loss": 0.1718, + "step": 50355 + }, + { + "epoch": 2.35, + "learning_rate": 1.2266236107409905e-05, + "loss": 0.3207, + "step": 50360 + }, + { + "epoch": 2.35, + "learning_rate": 1.2265452322355119e-05, + "loss": 0.2648, + "step": 50365 + }, + { + "epoch": 2.35, + "learning_rate": 1.2264668537300333e-05, + "loss": 0.0264, + "step": 50370 + }, + { + "epoch": 2.35, + "learning_rate": 1.2263884752245545e-05, + "loss": 0.0418, + "step": 50375 + }, + { + "epoch": 2.35, + "learning_rate": 1.226310096719076e-05, + "loss": 0.0412, + "step": 50380 + }, + { + "epoch": 2.35, + "learning_rate": 1.2262317182135973e-05, + "loss": 0.0641, + "step": 50385 + }, + { + "epoch": 2.35, + "learning_rate": 1.2261533397081185e-05, + "loss": 0.1354, + "step": 50390 + }, + { + "epoch": 2.35, + "learning_rate": 1.2260749612026399e-05, + "loss": 0.0955, + "step": 50395 + }, + { + "epoch": 2.35, + "learning_rate": 1.2259965826971611e-05, + "loss": 0.1032, + "step": 50400 + }, + { + "epoch": 2.35, + "learning_rate": 1.2259182041916827e-05, + "loss": 0.2132, + "step": 50405 + }, + { + "epoch": 2.35, + "learning_rate": 1.2258398256862039e-05, + "loss": 0.2025, + "step": 50410 + }, + { + "epoch": 2.35, + "learning_rate": 1.2257614471807253e-05, + "loss": 0.2278, + "step": 50415 + }, + { + "epoch": 2.35, + "learning_rate": 1.2256830686752465e-05, + "loss": 0.0977, + "step": 50420 + }, + { + "epoch": 2.35, + "learning_rate": 1.225604690169768e-05, + "loss": 0.0473, + "step": 50425 + }, + { + "epoch": 2.35, + "learning_rate": 1.2255263116642893e-05, + "loss": 0.0606, + "step": 50430 + }, + { + "epoch": 2.35, + "learning_rate": 1.2254479331588107e-05, + "loss": 0.0981, + "step": 50435 + }, + { + "epoch": 2.35, + "learning_rate": 1.2253695546533319e-05, + "loss": 0.0818, + "step": 50440 + }, + { + "epoch": 2.35, + "learning_rate": 1.2252911761478534e-05, + "loss": 0.1135, + "step": 50445 + }, + { + "epoch": 2.35, + "learning_rate": 1.2252127976423747e-05, + "loss": 0.1628, + "step": 50450 + }, + { + "epoch": 2.35, + "learning_rate": 1.2251344191368959e-05, + "loss": 0.1521, + "step": 50455 + }, + { + "epoch": 2.35, + "learning_rate": 1.2250560406314174e-05, + "loss": 0.3401, + "step": 50460 + }, + { + "epoch": 2.35, + "learning_rate": 1.2249776621259387e-05, + "loss": 0.2756, + "step": 50465 + }, + { + "epoch": 2.35, + "learning_rate": 1.22489928362046e-05, + "loss": 0.0554, + "step": 50470 + }, + { + "epoch": 2.36, + "learning_rate": 1.2248209051149813e-05, + "loss": 0.0253, + "step": 50475 + }, + { + "epoch": 2.36, + "learning_rate": 1.2247425266095028e-05, + "loss": 0.1059, + "step": 50480 + }, + { + "epoch": 2.36, + "learning_rate": 1.224664148104024e-05, + "loss": 0.1775, + "step": 50485 + }, + { + "epoch": 2.36, + "learning_rate": 1.2245857695985455e-05, + "loss": 0.0703, + "step": 50490 + }, + { + "epoch": 2.36, + "learning_rate": 1.2245073910930667e-05, + "loss": 0.0463, + "step": 50495 + }, + { + "epoch": 2.36, + "learning_rate": 1.2244290125875882e-05, + "loss": 0.1483, + "step": 50500 + }, + { + "epoch": 2.36, + "learning_rate": 1.2243506340821095e-05, + "loss": 0.2113, + "step": 50505 + }, + { + "epoch": 2.36, + "learning_rate": 1.2242722555766308e-05, + "loss": 0.2143, + "step": 50510 + }, + { + "epoch": 2.36, + "learning_rate": 1.224193877071152e-05, + "loss": 0.3792, + "step": 50515 + }, + { + "epoch": 2.36, + "learning_rate": 1.2241154985656733e-05, + "loss": 0.0368, + "step": 50520 + }, + { + "epoch": 2.36, + "learning_rate": 1.2240371200601948e-05, + "loss": 0.0232, + "step": 50525 + }, + { + "epoch": 2.36, + "learning_rate": 1.223958741554716e-05, + "loss": 0.0197, + "step": 50530 + }, + { + "epoch": 2.36, + "learning_rate": 1.2238803630492375e-05, + "loss": 0.1343, + "step": 50535 + }, + { + "epoch": 2.36, + "learning_rate": 1.2238019845437587e-05, + "loss": 0.1147, + "step": 50540 + }, + { + "epoch": 2.36, + "learning_rate": 1.2237236060382802e-05, + "loss": 0.135, + "step": 50545 + }, + { + "epoch": 2.36, + "learning_rate": 1.2236452275328015e-05, + "loss": 0.1731, + "step": 50550 + }, + { + "epoch": 2.36, + "learning_rate": 1.2235668490273229e-05, + "loss": 0.1098, + "step": 50555 + }, + { + "epoch": 2.36, + "learning_rate": 1.2234884705218442e-05, + "loss": 0.2273, + "step": 50560 + }, + { + "epoch": 2.36, + "learning_rate": 1.2234100920163656e-05, + "loss": 0.2623, + "step": 50565 + }, + { + "epoch": 2.36, + "learning_rate": 1.2233317135108869e-05, + "loss": 0.0419, + "step": 50570 + }, + { + "epoch": 2.36, + "learning_rate": 1.2232533350054082e-05, + "loss": 0.0451, + "step": 50575 + }, + { + "epoch": 2.36, + "learning_rate": 1.2231749564999296e-05, + "loss": 0.0982, + "step": 50580 + }, + { + "epoch": 2.36, + "learning_rate": 1.2230965779944509e-05, + "loss": 0.0518, + "step": 50585 + }, + { + "epoch": 2.36, + "learning_rate": 1.2230181994889722e-05, + "loss": 0.0991, + "step": 50590 + }, + { + "epoch": 2.36, + "learning_rate": 1.2229398209834935e-05, + "loss": 0.0705, + "step": 50595 + }, + { + "epoch": 2.36, + "learning_rate": 1.222861442478015e-05, + "loss": 0.1053, + "step": 50600 + }, + { + "epoch": 2.36, + "learning_rate": 1.2227830639725363e-05, + "loss": 0.1624, + "step": 50605 + }, + { + "epoch": 2.36, + "learning_rate": 1.2227046854670576e-05, + "loss": 0.2793, + "step": 50610 + }, + { + "epoch": 2.36, + "learning_rate": 1.2226263069615789e-05, + "loss": 0.2738, + "step": 50615 + }, + { + "epoch": 2.36, + "learning_rate": 1.2225479284561004e-05, + "loss": 0.0397, + "step": 50620 + }, + { + "epoch": 2.36, + "learning_rate": 1.2224695499506216e-05, + "loss": 0.0552, + "step": 50625 + }, + { + "epoch": 2.36, + "learning_rate": 1.222391171445143e-05, + "loss": 0.1016, + "step": 50630 + }, + { + "epoch": 2.36, + "learning_rate": 1.2223127929396643e-05, + "loss": 0.1722, + "step": 50635 + }, + { + "epoch": 2.36, + "learning_rate": 1.2222344144341858e-05, + "loss": 0.1134, + "step": 50640 + }, + { + "epoch": 2.36, + "learning_rate": 1.222156035928707e-05, + "loss": 0.1319, + "step": 50645 + }, + { + "epoch": 2.36, + "learning_rate": 1.2220776574232283e-05, + "loss": 0.1036, + "step": 50650 + }, + { + "epoch": 2.36, + "learning_rate": 1.2219992789177496e-05, + "loss": 0.1984, + "step": 50655 + }, + { + "epoch": 2.36, + "learning_rate": 1.221920900412271e-05, + "loss": 0.2497, + "step": 50660 + }, + { + "epoch": 2.36, + "learning_rate": 1.2218425219067924e-05, + "loss": 0.3259, + "step": 50665 + }, + { + "epoch": 2.36, + "learning_rate": 1.2217641434013137e-05, + "loss": 0.0318, + "step": 50670 + }, + { + "epoch": 2.36, + "learning_rate": 1.221685764895835e-05, + "loss": 0.0643, + "step": 50675 + }, + { + "epoch": 2.36, + "learning_rate": 1.2216073863903564e-05, + "loss": 0.1131, + "step": 50680 + }, + { + "epoch": 2.37, + "learning_rate": 1.2215290078848778e-05, + "loss": 0.073, + "step": 50685 + }, + { + "epoch": 2.37, + "learning_rate": 1.221450629379399e-05, + "loss": 0.1513, + "step": 50690 + }, + { + "epoch": 2.37, + "learning_rate": 1.2213722508739206e-05, + "loss": 0.1274, + "step": 50695 + }, + { + "epoch": 2.37, + "learning_rate": 1.2212938723684418e-05, + "loss": 0.1174, + "step": 50700 + }, + { + "epoch": 2.37, + "learning_rate": 1.2212154938629632e-05, + "loss": 0.196, + "step": 50705 + }, + { + "epoch": 2.37, + "learning_rate": 1.2211371153574844e-05, + "loss": 0.2789, + "step": 50710 + }, + { + "epoch": 2.37, + "learning_rate": 1.2210587368520057e-05, + "loss": 0.2517, + "step": 50715 + }, + { + "epoch": 2.37, + "learning_rate": 1.2209803583465272e-05, + "loss": 0.0358, + "step": 50720 + }, + { + "epoch": 2.37, + "learning_rate": 1.2209019798410484e-05, + "loss": 0.0353, + "step": 50725 + }, + { + "epoch": 2.37, + "learning_rate": 1.2208236013355698e-05, + "loss": 0.116, + "step": 50730 + }, + { + "epoch": 2.37, + "learning_rate": 1.220745222830091e-05, + "loss": 0.0738, + "step": 50735 + }, + { + "epoch": 2.37, + "learning_rate": 1.2206668443246126e-05, + "loss": 0.0956, + "step": 50740 + }, + { + "epoch": 2.37, + "learning_rate": 1.2205884658191338e-05, + "loss": 0.0734, + "step": 50745 + }, + { + "epoch": 2.37, + "learning_rate": 1.2205100873136552e-05, + "loss": 0.1592, + "step": 50750 + }, + { + "epoch": 2.37, + "learning_rate": 1.2204317088081764e-05, + "loss": 0.2333, + "step": 50755 + }, + { + "epoch": 2.37, + "learning_rate": 1.220353330302698e-05, + "loss": 0.1929, + "step": 50760 + }, + { + "epoch": 2.37, + "learning_rate": 1.2202749517972192e-05, + "loss": 0.5826, + "step": 50765 + }, + { + "epoch": 2.37, + "learning_rate": 1.2201965732917406e-05, + "loss": 0.0678, + "step": 50770 + }, + { + "epoch": 2.37, + "learning_rate": 1.220118194786262e-05, + "loss": 0.0588, + "step": 50775 + }, + { + "epoch": 2.37, + "learning_rate": 1.2200398162807832e-05, + "loss": 0.0301, + "step": 50780 + }, + { + "epoch": 2.37, + "learning_rate": 1.2199614377753046e-05, + "loss": 0.0743, + "step": 50785 + }, + { + "epoch": 2.37, + "learning_rate": 1.2198830592698258e-05, + "loss": 0.0594, + "step": 50790 + }, + { + "epoch": 2.37, + "learning_rate": 1.2198046807643474e-05, + "loss": 0.0979, + "step": 50795 + }, + { + "epoch": 2.37, + "learning_rate": 1.2197263022588686e-05, + "loss": 0.0678, + "step": 50800 + }, + { + "epoch": 2.37, + "learning_rate": 1.21964792375339e-05, + "loss": 0.1468, + "step": 50805 + }, + { + "epoch": 2.37, + "learning_rate": 1.2195695452479112e-05, + "loss": 0.238, + "step": 50810 + }, + { + "epoch": 2.37, + "learning_rate": 1.2194911667424328e-05, + "loss": 0.185, + "step": 50815 + }, + { + "epoch": 2.37, + "learning_rate": 1.219412788236954e-05, + "loss": 0.0528, + "step": 50820 + }, + { + "epoch": 2.37, + "learning_rate": 1.2193344097314754e-05, + "loss": 0.0391, + "step": 50825 + }, + { + "epoch": 2.37, + "learning_rate": 1.2192560312259966e-05, + "loss": 0.0602, + "step": 50830 + }, + { + "epoch": 2.37, + "learning_rate": 1.2191776527205182e-05, + "loss": 0.0745, + "step": 50835 + }, + { + "epoch": 2.37, + "learning_rate": 1.2190992742150394e-05, + "loss": 0.0926, + "step": 50840 + }, + { + "epoch": 2.37, + "learning_rate": 1.2190208957095606e-05, + "loss": 0.1335, + "step": 50845 + }, + { + "epoch": 2.37, + "learning_rate": 1.218942517204082e-05, + "loss": 0.1373, + "step": 50850 + }, + { + "epoch": 2.37, + "learning_rate": 1.2188641386986032e-05, + "loss": 0.204, + "step": 50855 + }, + { + "epoch": 2.37, + "learning_rate": 1.2187857601931248e-05, + "loss": 0.2939, + "step": 50860 + }, + { + "epoch": 2.37, + "learning_rate": 1.218707381687646e-05, + "loss": 0.2599, + "step": 50865 + }, + { + "epoch": 2.37, + "learning_rate": 1.2186290031821674e-05, + "loss": 0.1391, + "step": 50870 + }, + { + "epoch": 2.37, + "learning_rate": 1.2185506246766888e-05, + "loss": 0.0675, + "step": 50875 + }, + { + "epoch": 2.37, + "learning_rate": 1.2184722461712102e-05, + "loss": 0.0415, + "step": 50880 + }, + { + "epoch": 2.37, + "learning_rate": 1.2183938676657314e-05, + "loss": 0.0662, + "step": 50885 + }, + { + "epoch": 2.37, + "learning_rate": 1.2183154891602528e-05, + "loss": 0.0667, + "step": 50890 + }, + { + "epoch": 2.37, + "learning_rate": 1.2182371106547742e-05, + "loss": 0.0808, + "step": 50895 + }, + { + "epoch": 2.38, + "learning_rate": 1.2181587321492956e-05, + "loss": 0.1179, + "step": 50900 + }, + { + "epoch": 2.38, + "learning_rate": 1.2180803536438168e-05, + "loss": 0.1444, + "step": 50905 + }, + { + "epoch": 2.38, + "learning_rate": 1.218001975138338e-05, + "loss": 0.3121, + "step": 50910 + }, + { + "epoch": 2.38, + "learning_rate": 1.2179235966328596e-05, + "loss": 0.1573, + "step": 50915 + }, + { + "epoch": 2.38, + "learning_rate": 1.2178452181273808e-05, + "loss": 0.1081, + "step": 50920 + }, + { + "epoch": 2.38, + "learning_rate": 1.2177668396219022e-05, + "loss": 0.0486, + "step": 50925 + }, + { + "epoch": 2.38, + "learning_rate": 1.2176884611164234e-05, + "loss": 0.0354, + "step": 50930 + }, + { + "epoch": 2.38, + "learning_rate": 1.217610082610945e-05, + "loss": 0.1187, + "step": 50935 + }, + { + "epoch": 2.38, + "learning_rate": 1.2175317041054662e-05, + "loss": 0.0868, + "step": 50940 + }, + { + "epoch": 2.38, + "learning_rate": 1.2174533255999876e-05, + "loss": 0.1052, + "step": 50945 + }, + { + "epoch": 2.38, + "learning_rate": 1.2173749470945088e-05, + "loss": 0.1715, + "step": 50950 + }, + { + "epoch": 2.38, + "learning_rate": 1.2172965685890304e-05, + "loss": 0.2318, + "step": 50955 + }, + { + "epoch": 2.38, + "learning_rate": 1.2172181900835516e-05, + "loss": 0.2057, + "step": 50960 + }, + { + "epoch": 2.38, + "learning_rate": 1.217139811578073e-05, + "loss": 0.2589, + "step": 50965 + }, + { + "epoch": 2.38, + "learning_rate": 1.2170614330725942e-05, + "loss": 0.0572, + "step": 50970 + }, + { + "epoch": 2.38, + "learning_rate": 1.2169830545671156e-05, + "loss": 0.0639, + "step": 50975 + }, + { + "epoch": 2.38, + "learning_rate": 1.216904676061637e-05, + "loss": 0.0619, + "step": 50980 + }, + { + "epoch": 2.38, + "learning_rate": 1.2168262975561582e-05, + "loss": 0.0854, + "step": 50985 + }, + { + "epoch": 2.38, + "learning_rate": 1.2167479190506796e-05, + "loss": 0.0625, + "step": 50990 + }, + { + "epoch": 2.38, + "learning_rate": 1.216669540545201e-05, + "loss": 0.0908, + "step": 50995 + }, + { + "epoch": 2.38, + "learning_rate": 1.2165911620397224e-05, + "loss": 0.1432, + "step": 51000 + }, + { + "epoch": 2.38, + "learning_rate": 1.2165127835342436e-05, + "loss": 0.1444, + "step": 51005 + }, + { + "epoch": 2.38, + "learning_rate": 1.2164344050287652e-05, + "loss": 0.1782, + "step": 51010 + }, + { + "epoch": 2.38, + "learning_rate": 1.2163560265232864e-05, + "loss": 0.2457, + "step": 51015 + }, + { + "epoch": 2.38, + "learning_rate": 1.2162776480178078e-05, + "loss": 0.0601, + "step": 51020 + }, + { + "epoch": 2.38, + "learning_rate": 1.216199269512329e-05, + "loss": 0.1218, + "step": 51025 + }, + { + "epoch": 2.38, + "learning_rate": 1.2161208910068506e-05, + "loss": 0.059, + "step": 51030 + }, + { + "epoch": 2.38, + "learning_rate": 1.2160425125013718e-05, + "loss": 0.0545, + "step": 51035 + }, + { + "epoch": 2.38, + "learning_rate": 1.215964133995893e-05, + "loss": 0.1509, + "step": 51040 + }, + { + "epoch": 2.38, + "learning_rate": 1.2158857554904144e-05, + "loss": 0.0776, + "step": 51045 + }, + { + "epoch": 2.38, + "learning_rate": 1.2158073769849356e-05, + "loss": 0.2111, + "step": 51050 + }, + { + "epoch": 2.38, + "learning_rate": 1.2157289984794572e-05, + "loss": 0.1845, + "step": 51055 + }, + { + "epoch": 2.38, + "learning_rate": 1.2156506199739784e-05, + "loss": 0.295, + "step": 51060 + }, + { + "epoch": 2.38, + "learning_rate": 1.2155722414684998e-05, + "loss": 0.3386, + "step": 51065 + }, + { + "epoch": 2.38, + "learning_rate": 1.215493862963021e-05, + "loss": 0.0308, + "step": 51070 + }, + { + "epoch": 2.38, + "learning_rate": 1.2154154844575426e-05, + "loss": 0.0535, + "step": 51075 + }, + { + "epoch": 2.38, + "learning_rate": 1.2153371059520638e-05, + "loss": 0.0747, + "step": 51080 + }, + { + "epoch": 2.38, + "learning_rate": 1.2152587274465852e-05, + "loss": 0.0781, + "step": 51085 + }, + { + "epoch": 2.38, + "learning_rate": 1.2151803489411066e-05, + "loss": 0.074, + "step": 51090 + }, + { + "epoch": 2.38, + "learning_rate": 1.215101970435628e-05, + "loss": 0.0889, + "step": 51095 + }, + { + "epoch": 2.38, + "learning_rate": 1.2150235919301492e-05, + "loss": 0.0867, + "step": 51100 + }, + { + "epoch": 2.38, + "learning_rate": 1.2149452134246704e-05, + "loss": 0.1321, + "step": 51105 + }, + { + "epoch": 2.38, + "learning_rate": 1.214866834919192e-05, + "loss": 0.1562, + "step": 51110 + }, + { + "epoch": 2.39, + "learning_rate": 1.2147884564137132e-05, + "loss": 0.632, + "step": 51115 + }, + { + "epoch": 2.39, + "learning_rate": 1.2147100779082346e-05, + "loss": 0.0545, + "step": 51120 + }, + { + "epoch": 2.39, + "learning_rate": 1.2146316994027558e-05, + "loss": 0.0554, + "step": 51125 + }, + { + "epoch": 2.39, + "learning_rate": 1.2145533208972773e-05, + "loss": 0.0916, + "step": 51130 + }, + { + "epoch": 2.39, + "learning_rate": 1.2144749423917986e-05, + "loss": 0.0778, + "step": 51135 + }, + { + "epoch": 2.39, + "learning_rate": 1.21439656388632e-05, + "loss": 0.1447, + "step": 51140 + }, + { + "epoch": 2.39, + "learning_rate": 1.2143181853808412e-05, + "loss": 0.062, + "step": 51145 + }, + { + "epoch": 2.39, + "learning_rate": 1.2142398068753627e-05, + "loss": 0.1337, + "step": 51150 + }, + { + "epoch": 2.39, + "learning_rate": 1.214161428369884e-05, + "loss": 0.1622, + "step": 51155 + }, + { + "epoch": 2.39, + "learning_rate": 1.2140830498644054e-05, + "loss": 0.2186, + "step": 51160 + }, + { + "epoch": 2.39, + "learning_rate": 1.2140046713589266e-05, + "loss": 0.2537, + "step": 51165 + }, + { + "epoch": 2.39, + "learning_rate": 1.2139262928534478e-05, + "loss": 0.0788, + "step": 51170 + }, + { + "epoch": 2.39, + "learning_rate": 1.2138479143479694e-05, + "loss": 0.0364, + "step": 51175 + }, + { + "epoch": 2.39, + "learning_rate": 1.2137695358424906e-05, + "loss": 0.1155, + "step": 51180 + }, + { + "epoch": 2.39, + "learning_rate": 1.213691157337012e-05, + "loss": 0.0683, + "step": 51185 + }, + { + "epoch": 2.39, + "learning_rate": 1.2136127788315334e-05, + "loss": 0.1416, + "step": 51190 + }, + { + "epoch": 2.39, + "learning_rate": 1.2135344003260547e-05, + "loss": 0.0975, + "step": 51195 + }, + { + "epoch": 2.39, + "learning_rate": 1.213456021820576e-05, + "loss": 0.0998, + "step": 51200 + }, + { + "epoch": 2.39, + "learning_rate": 1.2133776433150974e-05, + "loss": 0.2315, + "step": 51205 + }, + { + "epoch": 2.39, + "learning_rate": 1.2132992648096188e-05, + "loss": 0.2483, + "step": 51210 + }, + { + "epoch": 2.39, + "learning_rate": 1.2132208863041401e-05, + "loss": 0.2255, + "step": 51215 + }, + { + "epoch": 2.39, + "learning_rate": 1.2131425077986614e-05, + "loss": 0.1239, + "step": 51220 + }, + { + "epoch": 2.39, + "learning_rate": 1.213064129293183e-05, + "loss": 0.0421, + "step": 51225 + }, + { + "epoch": 2.39, + "learning_rate": 1.2129857507877041e-05, + "loss": 0.0431, + "step": 51230 + }, + { + "epoch": 2.39, + "learning_rate": 1.2129073722822254e-05, + "loss": 0.0604, + "step": 51235 + }, + { + "epoch": 2.39, + "learning_rate": 1.2128289937767468e-05, + "loss": 0.0692, + "step": 51240 + }, + { + "epoch": 2.39, + "learning_rate": 1.212750615271268e-05, + "loss": 0.0819, + "step": 51245 + }, + { + "epoch": 2.39, + "learning_rate": 1.2126722367657895e-05, + "loss": 0.0985, + "step": 51250 + }, + { + "epoch": 2.39, + "learning_rate": 1.2125938582603108e-05, + "loss": 0.1348, + "step": 51255 + }, + { + "epoch": 2.39, + "learning_rate": 1.2125154797548321e-05, + "loss": 0.184, + "step": 51260 + }, + { + "epoch": 2.39, + "learning_rate": 1.2124371012493534e-05, + "loss": 0.3057, + "step": 51265 + }, + { + "epoch": 2.39, + "learning_rate": 1.212358722743875e-05, + "loss": 0.088, + "step": 51270 + }, + { + "epoch": 2.39, + "learning_rate": 1.2122803442383962e-05, + "loss": 0.0724, + "step": 51275 + }, + { + "epoch": 2.39, + "learning_rate": 1.2122019657329175e-05, + "loss": 0.0378, + "step": 51280 + }, + { + "epoch": 2.39, + "learning_rate": 1.2121235872274388e-05, + "loss": 0.0781, + "step": 51285 + }, + { + "epoch": 2.39, + "learning_rate": 1.2120452087219603e-05, + "loss": 0.1908, + "step": 51290 + }, + { + "epoch": 2.39, + "learning_rate": 1.2119668302164815e-05, + "loss": 0.1843, + "step": 51295 + }, + { + "epoch": 2.39, + "learning_rate": 1.2118884517110028e-05, + "loss": 0.085, + "step": 51300 + }, + { + "epoch": 2.39, + "learning_rate": 1.2118100732055242e-05, + "loss": 0.1861, + "step": 51305 + }, + { + "epoch": 2.39, + "learning_rate": 1.2117316947000455e-05, + "loss": 0.252, + "step": 51310 + }, + { + "epoch": 2.39, + "learning_rate": 1.211653316194567e-05, + "loss": 0.341, + "step": 51315 + }, + { + "epoch": 2.39, + "learning_rate": 1.2115749376890882e-05, + "loss": 0.054, + "step": 51320 + }, + { + "epoch": 2.39, + "learning_rate": 1.2114965591836097e-05, + "loss": 0.0834, + "step": 51325 + }, + { + "epoch": 2.4, + "learning_rate": 1.211418180678131e-05, + "loss": 0.0253, + "step": 51330 + }, + { + "epoch": 2.4, + "learning_rate": 1.2113398021726523e-05, + "loss": 0.0387, + "step": 51335 + }, + { + "epoch": 2.4, + "learning_rate": 1.2112614236671736e-05, + "loss": 0.058, + "step": 51340 + }, + { + "epoch": 2.4, + "learning_rate": 1.2111830451616951e-05, + "loss": 0.0874, + "step": 51345 + }, + { + "epoch": 2.4, + "learning_rate": 1.2111046666562163e-05, + "loss": 0.0858, + "step": 51350 + }, + { + "epoch": 2.4, + "learning_rate": 1.2110262881507377e-05, + "loss": 0.1828, + "step": 51355 + }, + { + "epoch": 2.4, + "learning_rate": 1.210947909645259e-05, + "loss": 0.2306, + "step": 51360 + }, + { + "epoch": 2.4, + "learning_rate": 1.2108695311397802e-05, + "loss": 0.3866, + "step": 51365 + }, + { + "epoch": 2.4, + "learning_rate": 1.2107911526343017e-05, + "loss": 0.0208, + "step": 51370 + }, + { + "epoch": 2.4, + "learning_rate": 1.210712774128823e-05, + "loss": 0.0327, + "step": 51375 + }, + { + "epoch": 2.4, + "learning_rate": 1.2106343956233443e-05, + "loss": 0.0503, + "step": 51380 + }, + { + "epoch": 2.4, + "learning_rate": 1.2105560171178656e-05, + "loss": 0.1202, + "step": 51385 + }, + { + "epoch": 2.4, + "learning_rate": 1.2104776386123871e-05, + "loss": 0.1188, + "step": 51390 + }, + { + "epoch": 2.4, + "learning_rate": 1.2103992601069083e-05, + "loss": 0.0703, + "step": 51395 + }, + { + "epoch": 2.4, + "learning_rate": 1.2103208816014297e-05, + "loss": 0.1258, + "step": 51400 + }, + { + "epoch": 2.4, + "learning_rate": 1.2102425030959511e-05, + "loss": 0.2325, + "step": 51405 + }, + { + "epoch": 2.4, + "learning_rate": 1.2101641245904725e-05, + "loss": 0.1977, + "step": 51410 + }, + { + "epoch": 2.4, + "learning_rate": 1.2100857460849937e-05, + "loss": 0.3474, + "step": 51415 + }, + { + "epoch": 2.4, + "learning_rate": 1.2100073675795151e-05, + "loss": 0.077, + "step": 51420 + }, + { + "epoch": 2.4, + "learning_rate": 1.2099289890740365e-05, + "loss": 0.0395, + "step": 51425 + }, + { + "epoch": 2.4, + "learning_rate": 1.2098506105685577e-05, + "loss": 0.0626, + "step": 51430 + }, + { + "epoch": 2.4, + "learning_rate": 1.2097722320630791e-05, + "loss": 0.0527, + "step": 51435 + }, + { + "epoch": 2.4, + "learning_rate": 1.2096938535576003e-05, + "loss": 0.1469, + "step": 51440 + }, + { + "epoch": 2.4, + "learning_rate": 1.2096154750521219e-05, + "loss": 0.1648, + "step": 51445 + }, + { + "epoch": 2.4, + "learning_rate": 1.2095370965466431e-05, + "loss": 0.1252, + "step": 51450 + }, + { + "epoch": 2.4, + "learning_rate": 1.2094587180411645e-05, + "loss": 0.199, + "step": 51455 + }, + { + "epoch": 2.4, + "learning_rate": 1.2093803395356857e-05, + "loss": 0.252, + "step": 51460 + }, + { + "epoch": 2.4, + "learning_rate": 1.2093019610302073e-05, + "loss": 0.2616, + "step": 51465 + }, + { + "epoch": 2.4, + "learning_rate": 1.2092235825247285e-05, + "loss": 0.0359, + "step": 51470 + }, + { + "epoch": 2.4, + "learning_rate": 1.2091452040192499e-05, + "loss": 0.0569, + "step": 51475 + }, + { + "epoch": 2.4, + "learning_rate": 1.2090668255137711e-05, + "loss": 0.0824, + "step": 51480 + }, + { + "epoch": 2.4, + "learning_rate": 1.2089884470082927e-05, + "loss": 0.065, + "step": 51485 + }, + { + "epoch": 2.4, + "learning_rate": 1.2089100685028139e-05, + "loss": 0.0876, + "step": 51490 + }, + { + "epoch": 2.4, + "learning_rate": 1.2088316899973351e-05, + "loss": 0.1345, + "step": 51495 + }, + { + "epoch": 2.4, + "learning_rate": 1.2087533114918565e-05, + "loss": 0.0907, + "step": 51500 + }, + { + "epoch": 2.4, + "learning_rate": 1.2086749329863779e-05, + "loss": 0.3917, + "step": 51505 + }, + { + "epoch": 2.4, + "learning_rate": 1.2085965544808993e-05, + "loss": 0.3124, + "step": 51510 + }, + { + "epoch": 2.4, + "learning_rate": 1.2085181759754205e-05, + "loss": 0.3424, + "step": 51515 + }, + { + "epoch": 2.4, + "learning_rate": 1.208439797469942e-05, + "loss": 0.0339, + "step": 51520 + }, + { + "epoch": 2.4, + "learning_rate": 1.2083614189644633e-05, + "loss": 0.0568, + "step": 51525 + }, + { + "epoch": 2.4, + "learning_rate": 1.2082830404589847e-05, + "loss": 0.1319, + "step": 51530 + }, + { + "epoch": 2.4, + "learning_rate": 1.208204661953506e-05, + "loss": 0.0855, + "step": 51535 + }, + { + "epoch": 2.4, + "learning_rate": 1.2081262834480275e-05, + "loss": 0.0986, + "step": 51540 + }, + { + "epoch": 2.41, + "learning_rate": 1.2080479049425487e-05, + "loss": 0.1848, + "step": 51545 + }, + { + "epoch": 2.41, + "learning_rate": 1.2079695264370701e-05, + "loss": 0.1471, + "step": 51550 + }, + { + "epoch": 2.41, + "learning_rate": 1.2078911479315913e-05, + "loss": 0.1345, + "step": 51555 + }, + { + "epoch": 2.41, + "learning_rate": 1.2078127694261125e-05, + "loss": 0.1926, + "step": 51560 + }, + { + "epoch": 2.41, + "learning_rate": 1.2077343909206341e-05, + "loss": 0.3924, + "step": 51565 + }, + { + "epoch": 2.41, + "learning_rate": 1.2076560124151553e-05, + "loss": 0.0675, + "step": 51570 + }, + { + "epoch": 2.41, + "learning_rate": 1.2075776339096767e-05, + "loss": 0.0411, + "step": 51575 + }, + { + "epoch": 2.41, + "learning_rate": 1.207499255404198e-05, + "loss": 0.0543, + "step": 51580 + }, + { + "epoch": 2.41, + "learning_rate": 1.2074208768987195e-05, + "loss": 0.148, + "step": 51585 + }, + { + "epoch": 2.41, + "learning_rate": 1.2073424983932407e-05, + "loss": 0.0893, + "step": 51590 + }, + { + "epoch": 2.41, + "learning_rate": 1.2072641198877621e-05, + "loss": 0.1574, + "step": 51595 + }, + { + "epoch": 2.41, + "learning_rate": 1.2071857413822833e-05, + "loss": 0.1502, + "step": 51600 + }, + { + "epoch": 2.41, + "learning_rate": 1.2071073628768049e-05, + "loss": 0.2346, + "step": 51605 + }, + { + "epoch": 2.41, + "learning_rate": 1.2070289843713261e-05, + "loss": 0.2326, + "step": 51610 + }, + { + "epoch": 2.41, + "learning_rate": 1.2069506058658475e-05, + "loss": 0.3411, + "step": 51615 + }, + { + "epoch": 2.41, + "learning_rate": 1.2068722273603689e-05, + "loss": 0.0766, + "step": 51620 + }, + { + "epoch": 2.41, + "learning_rate": 1.2067938488548901e-05, + "loss": 0.022, + "step": 51625 + }, + { + "epoch": 2.41, + "learning_rate": 1.2067154703494115e-05, + "loss": 0.0682, + "step": 51630 + }, + { + "epoch": 2.41, + "learning_rate": 1.2066370918439327e-05, + "loss": 0.0439, + "step": 51635 + }, + { + "epoch": 2.41, + "learning_rate": 1.2065587133384543e-05, + "loss": 0.0751, + "step": 51640 + }, + { + "epoch": 2.41, + "learning_rate": 1.2064803348329755e-05, + "loss": 0.0661, + "step": 51645 + }, + { + "epoch": 2.41, + "learning_rate": 1.2064019563274969e-05, + "loss": 0.215, + "step": 51650 + }, + { + "epoch": 2.41, + "learning_rate": 1.2063235778220181e-05, + "loss": 0.2804, + "step": 51655 + }, + { + "epoch": 2.41, + "learning_rate": 1.2062451993165397e-05, + "loss": 0.3444, + "step": 51660 + }, + { + "epoch": 2.41, + "learning_rate": 1.2061668208110609e-05, + "loss": 0.2795, + "step": 51665 + }, + { + "epoch": 2.41, + "learning_rate": 1.2060884423055823e-05, + "loss": 0.0677, + "step": 51670 + }, + { + "epoch": 2.41, + "learning_rate": 1.2060100638001035e-05, + "loss": 0.0437, + "step": 51675 + }, + { + "epoch": 2.41, + "learning_rate": 1.205931685294625e-05, + "loss": 0.0713, + "step": 51680 + }, + { + "epoch": 2.41, + "learning_rate": 1.2058533067891463e-05, + "loss": 0.032, + "step": 51685 + }, + { + "epoch": 2.41, + "learning_rate": 1.2057749282836675e-05, + "loss": 0.0629, + "step": 51690 + }, + { + "epoch": 2.41, + "learning_rate": 1.2056965497781889e-05, + "loss": 0.1758, + "step": 51695 + }, + { + "epoch": 2.41, + "learning_rate": 1.2056181712727101e-05, + "loss": 0.0785, + "step": 51700 + }, + { + "epoch": 2.41, + "learning_rate": 1.2055397927672317e-05, + "loss": 0.1533, + "step": 51705 + }, + { + "epoch": 2.41, + "learning_rate": 1.2054614142617529e-05, + "loss": 0.249, + "step": 51710 + }, + { + "epoch": 2.41, + "learning_rate": 1.2053830357562743e-05, + "loss": 0.186, + "step": 51715 + }, + { + "epoch": 2.41, + "learning_rate": 1.2053046572507957e-05, + "loss": 0.01, + "step": 51720 + }, + { + "epoch": 2.41, + "learning_rate": 1.205226278745317e-05, + "loss": 0.0311, + "step": 51725 + }, + { + "epoch": 2.41, + "learning_rate": 1.2051479002398383e-05, + "loss": 0.0707, + "step": 51730 + }, + { + "epoch": 2.41, + "learning_rate": 1.2050695217343597e-05, + "loss": 0.0547, + "step": 51735 + }, + { + "epoch": 2.41, + "learning_rate": 1.204991143228881e-05, + "loss": 0.1202, + "step": 51740 + }, + { + "epoch": 2.41, + "learning_rate": 1.2049127647234025e-05, + "loss": 0.0915, + "step": 51745 + }, + { + "epoch": 2.41, + "learning_rate": 1.2048343862179237e-05, + "loss": 0.2305, + "step": 51750 + }, + { + "epoch": 2.41, + "learning_rate": 1.2047560077124449e-05, + "loss": 0.2337, + "step": 51755 + }, + { + "epoch": 2.42, + "learning_rate": 1.2046776292069665e-05, + "loss": 0.2104, + "step": 51760 + }, + { + "epoch": 2.42, + "learning_rate": 1.2045992507014877e-05, + "loss": 0.2833, + "step": 51765 + }, + { + "epoch": 2.42, + "learning_rate": 1.204520872196009e-05, + "loss": 0.022, + "step": 51770 + }, + { + "epoch": 2.42, + "learning_rate": 1.2044424936905303e-05, + "loss": 0.0689, + "step": 51775 + }, + { + "epoch": 2.42, + "learning_rate": 1.2043641151850519e-05, + "loss": 0.142, + "step": 51780 + }, + { + "epoch": 2.42, + "learning_rate": 1.204285736679573e-05, + "loss": 0.1489, + "step": 51785 + }, + { + "epoch": 2.42, + "learning_rate": 1.2042073581740945e-05, + "loss": 0.0738, + "step": 51790 + }, + { + "epoch": 2.42, + "learning_rate": 1.2041289796686157e-05, + "loss": 0.114, + "step": 51795 + }, + { + "epoch": 2.42, + "learning_rate": 1.2040506011631372e-05, + "loss": 0.1686, + "step": 51800 + }, + { + "epoch": 2.42, + "learning_rate": 1.2039722226576585e-05, + "loss": 0.1656, + "step": 51805 + }, + { + "epoch": 2.42, + "learning_rate": 1.2038938441521799e-05, + "loss": 0.1686, + "step": 51810 + }, + { + "epoch": 2.42, + "learning_rate": 1.203815465646701e-05, + "loss": 0.2587, + "step": 51815 + }, + { + "epoch": 2.42, + "learning_rate": 1.2037370871412225e-05, + "loss": 0.0502, + "step": 51820 + }, + { + "epoch": 2.42, + "learning_rate": 1.2036587086357439e-05, + "loss": 0.0239, + "step": 51825 + }, + { + "epoch": 2.42, + "learning_rate": 1.203580330130265e-05, + "loss": 0.0367, + "step": 51830 + }, + { + "epoch": 2.42, + "learning_rate": 1.2035019516247865e-05, + "loss": 0.0598, + "step": 51835 + }, + { + "epoch": 2.42, + "learning_rate": 1.2034235731193079e-05, + "loss": 0.1056, + "step": 51840 + }, + { + "epoch": 2.42, + "learning_rate": 1.2033451946138293e-05, + "loss": 0.1451, + "step": 51845 + }, + { + "epoch": 2.42, + "learning_rate": 1.2032668161083505e-05, + "loss": 0.174, + "step": 51850 + }, + { + "epoch": 2.42, + "learning_rate": 1.203188437602872e-05, + "loss": 0.199, + "step": 51855 + }, + { + "epoch": 2.42, + "learning_rate": 1.2031100590973933e-05, + "loss": 0.3578, + "step": 51860 + }, + { + "epoch": 2.42, + "learning_rate": 1.2030316805919146e-05, + "loss": 0.3888, + "step": 51865 + }, + { + "epoch": 2.42, + "learning_rate": 1.2029533020864359e-05, + "loss": 0.0432, + "step": 51870 + }, + { + "epoch": 2.42, + "learning_rate": 1.2028749235809574e-05, + "loss": 0.0301, + "step": 51875 + }, + { + "epoch": 2.42, + "learning_rate": 1.2027965450754787e-05, + "loss": 0.0577, + "step": 51880 + }, + { + "epoch": 2.42, + "learning_rate": 1.2027181665699999e-05, + "loss": 0.0879, + "step": 51885 + }, + { + "epoch": 2.42, + "learning_rate": 1.2026397880645213e-05, + "loss": 0.1147, + "step": 51890 + }, + { + "epoch": 2.42, + "learning_rate": 1.2025614095590425e-05, + "loss": 0.1604, + "step": 51895 + }, + { + "epoch": 2.42, + "learning_rate": 1.202483031053564e-05, + "loss": 0.0468, + "step": 51900 + }, + { + "epoch": 2.42, + "learning_rate": 1.2024046525480853e-05, + "loss": 0.1913, + "step": 51905 + }, + { + "epoch": 2.42, + "learning_rate": 1.2023262740426067e-05, + "loss": 0.2669, + "step": 51910 + }, + { + "epoch": 2.42, + "learning_rate": 1.2022478955371279e-05, + "loss": 0.17, + "step": 51915 + }, + { + "epoch": 2.42, + "learning_rate": 1.2021695170316494e-05, + "loss": 0.1219, + "step": 51920 + }, + { + "epoch": 2.42, + "learning_rate": 1.2020911385261707e-05, + "loss": 0.0375, + "step": 51925 + }, + { + "epoch": 2.42, + "learning_rate": 1.202012760020692e-05, + "loss": 0.0667, + "step": 51930 + }, + { + "epoch": 2.42, + "learning_rate": 1.2019343815152134e-05, + "loss": 0.0615, + "step": 51935 + }, + { + "epoch": 2.42, + "learning_rate": 1.2018560030097348e-05, + "loss": 0.0947, + "step": 51940 + }, + { + "epoch": 2.42, + "learning_rate": 1.201777624504256e-05, + "loss": 0.1589, + "step": 51945 + }, + { + "epoch": 2.42, + "learning_rate": 1.2016992459987773e-05, + "loss": 0.0904, + "step": 51950 + }, + { + "epoch": 2.42, + "learning_rate": 1.2016208674932988e-05, + "loss": 0.2414, + "step": 51955 + }, + { + "epoch": 2.42, + "learning_rate": 1.20154248898782e-05, + "loss": 0.3085, + "step": 51960 + }, + { + "epoch": 2.42, + "learning_rate": 1.2014641104823414e-05, + "loss": 0.3275, + "step": 51965 + }, + { + "epoch": 2.42, + "learning_rate": 1.2013857319768627e-05, + "loss": 0.0334, + "step": 51970 + }, + { + "epoch": 2.43, + "learning_rate": 1.2013073534713842e-05, + "loss": 0.0268, + "step": 51975 + }, + { + "epoch": 2.43, + "learning_rate": 1.2012289749659054e-05, + "loss": 0.1292, + "step": 51980 + }, + { + "epoch": 2.43, + "learning_rate": 1.2011505964604268e-05, + "loss": 0.0277, + "step": 51985 + }, + { + "epoch": 2.43, + "learning_rate": 1.201072217954948e-05, + "loss": 0.0855, + "step": 51990 + }, + { + "epoch": 2.43, + "learning_rate": 1.2009938394494696e-05, + "loss": 0.0944, + "step": 51995 + }, + { + "epoch": 2.43, + "learning_rate": 1.2009154609439908e-05, + "loss": 0.0794, + "step": 52000 + }, + { + "epoch": 2.43, + "learning_rate": 1.2008370824385122e-05, + "loss": 0.1485, + "step": 52005 + }, + { + "epoch": 2.43, + "learning_rate": 1.2007587039330335e-05, + "loss": 0.2271, + "step": 52010 + }, + { + "epoch": 2.43, + "learning_rate": 1.2006803254275547e-05, + "loss": 0.2456, + "step": 52015 + }, + { + "epoch": 2.43, + "learning_rate": 1.2006019469220762e-05, + "loss": 0.085, + "step": 52020 + }, + { + "epoch": 2.43, + "learning_rate": 1.2005235684165975e-05, + "loss": 0.0676, + "step": 52025 + }, + { + "epoch": 2.43, + "learning_rate": 1.2004451899111188e-05, + "loss": 0.0165, + "step": 52030 + }, + { + "epoch": 2.43, + "learning_rate": 1.2003668114056402e-05, + "loss": 0.0606, + "step": 52035 + }, + { + "epoch": 2.43, + "learning_rate": 1.2002884329001616e-05, + "loss": 0.1106, + "step": 52040 + }, + { + "epoch": 2.43, + "learning_rate": 1.2002100543946828e-05, + "loss": 0.1506, + "step": 52045 + }, + { + "epoch": 2.43, + "learning_rate": 1.2001316758892042e-05, + "loss": 0.0898, + "step": 52050 + }, + { + "epoch": 2.43, + "learning_rate": 1.2000532973837256e-05, + "loss": 0.1746, + "step": 52055 + }, + { + "epoch": 2.43, + "learning_rate": 1.199974918878247e-05, + "loss": 0.251, + "step": 52060 + }, + { + "epoch": 2.43, + "learning_rate": 1.1998965403727682e-05, + "loss": 0.2887, + "step": 52065 + }, + { + "epoch": 2.43, + "learning_rate": 1.1998181618672898e-05, + "loss": 0.0429, + "step": 52070 + }, + { + "epoch": 2.43, + "learning_rate": 1.199739783361811e-05, + "loss": 0.0352, + "step": 52075 + }, + { + "epoch": 2.43, + "learning_rate": 1.1996614048563322e-05, + "loss": 0.1168, + "step": 52080 + }, + { + "epoch": 2.43, + "learning_rate": 1.1995830263508536e-05, + "loss": 0.0959, + "step": 52085 + }, + { + "epoch": 2.43, + "learning_rate": 1.1995046478453749e-05, + "loss": 0.154, + "step": 52090 + }, + { + "epoch": 2.43, + "learning_rate": 1.1994262693398964e-05, + "loss": 0.0716, + "step": 52095 + }, + { + "epoch": 2.43, + "learning_rate": 1.1993478908344176e-05, + "loss": 0.1781, + "step": 52100 + }, + { + "epoch": 2.43, + "learning_rate": 1.199269512328939e-05, + "loss": 0.2264, + "step": 52105 + }, + { + "epoch": 2.43, + "learning_rate": 1.1991911338234602e-05, + "loss": 0.2278, + "step": 52110 + }, + { + "epoch": 2.43, + "learning_rate": 1.1991127553179818e-05, + "loss": 0.3195, + "step": 52115 + }, + { + "epoch": 2.43, + "learning_rate": 1.199034376812503e-05, + "loss": 0.0321, + "step": 52120 + }, + { + "epoch": 2.43, + "learning_rate": 1.1989559983070244e-05, + "loss": 0.0277, + "step": 52125 + }, + { + "epoch": 2.43, + "learning_rate": 1.1988776198015456e-05, + "loss": 0.0288, + "step": 52130 + }, + { + "epoch": 2.43, + "learning_rate": 1.1987992412960672e-05, + "loss": 0.0316, + "step": 52135 + }, + { + "epoch": 2.43, + "learning_rate": 1.1987208627905884e-05, + "loss": 0.0851, + "step": 52140 + }, + { + "epoch": 2.43, + "learning_rate": 1.1986424842851096e-05, + "loss": 0.1352, + "step": 52145 + }, + { + "epoch": 2.43, + "learning_rate": 1.198564105779631e-05, + "loss": 0.1331, + "step": 52150 + }, + { + "epoch": 2.43, + "learning_rate": 1.1984857272741524e-05, + "loss": 0.1579, + "step": 52155 + }, + { + "epoch": 2.43, + "learning_rate": 1.1984073487686738e-05, + "loss": 0.2474, + "step": 52160 + }, + { + "epoch": 2.43, + "learning_rate": 1.198328970263195e-05, + "loss": 0.2848, + "step": 52165 + }, + { + "epoch": 2.43, + "learning_rate": 1.1982505917577166e-05, + "loss": 0.0673, + "step": 52170 + }, + { + "epoch": 2.43, + "learning_rate": 1.1981722132522378e-05, + "loss": 0.0426, + "step": 52175 + }, + { + "epoch": 2.43, + "learning_rate": 1.1980938347467592e-05, + "loss": 0.0501, + "step": 52180 + }, + { + "epoch": 2.44, + "learning_rate": 1.1980154562412804e-05, + "loss": 0.0576, + "step": 52185 + }, + { + "epoch": 2.44, + "learning_rate": 1.197937077735802e-05, + "loss": 0.105, + "step": 52190 + }, + { + "epoch": 2.44, + "learning_rate": 1.1978586992303232e-05, + "loss": 0.1487, + "step": 52195 + }, + { + "epoch": 2.44, + "learning_rate": 1.1977803207248446e-05, + "loss": 0.063, + "step": 52200 + }, + { + "epoch": 2.44, + "learning_rate": 1.1977019422193658e-05, + "loss": 0.1358, + "step": 52205 + }, + { + "epoch": 2.44, + "learning_rate": 1.197623563713887e-05, + "loss": 0.4102, + "step": 52210 + }, + { + "epoch": 2.44, + "learning_rate": 1.1975451852084086e-05, + "loss": 0.2808, + "step": 52215 + }, + { + "epoch": 2.44, + "learning_rate": 1.1974668067029298e-05, + "loss": 0.1002, + "step": 52220 + }, + { + "epoch": 2.44, + "learning_rate": 1.1973884281974512e-05, + "loss": 0.0695, + "step": 52225 + }, + { + "epoch": 2.44, + "learning_rate": 1.1973100496919724e-05, + "loss": 0.0389, + "step": 52230 + }, + { + "epoch": 2.44, + "learning_rate": 1.197231671186494e-05, + "loss": 0.0591, + "step": 52235 + }, + { + "epoch": 2.44, + "learning_rate": 1.1971532926810152e-05, + "loss": 0.1046, + "step": 52240 + }, + { + "epoch": 2.44, + "learning_rate": 1.1970749141755366e-05, + "loss": 0.0821, + "step": 52245 + }, + { + "epoch": 2.44, + "learning_rate": 1.196996535670058e-05, + "loss": 0.1596, + "step": 52250 + }, + { + "epoch": 2.44, + "learning_rate": 1.1969181571645794e-05, + "loss": 0.0936, + "step": 52255 + }, + { + "epoch": 2.44, + "learning_rate": 1.1968397786591006e-05, + "loss": 0.1988, + "step": 52260 + }, + { + "epoch": 2.44, + "learning_rate": 1.196761400153622e-05, + "loss": 0.3196, + "step": 52265 + }, + { + "epoch": 2.44, + "learning_rate": 1.1966830216481434e-05, + "loss": 0.0698, + "step": 52270 + }, + { + "epoch": 2.44, + "learning_rate": 1.1966046431426646e-05, + "loss": 0.0393, + "step": 52275 + }, + { + "epoch": 2.44, + "learning_rate": 1.196526264637186e-05, + "loss": 0.0488, + "step": 52280 + }, + { + "epoch": 2.44, + "learning_rate": 1.1964478861317072e-05, + "loss": 0.096, + "step": 52285 + }, + { + "epoch": 2.44, + "learning_rate": 1.1963695076262288e-05, + "loss": 0.0349, + "step": 52290 + }, + { + "epoch": 2.44, + "learning_rate": 1.19629112912075e-05, + "loss": 0.2553, + "step": 52295 + }, + { + "epoch": 2.44, + "learning_rate": 1.1962127506152714e-05, + "loss": 0.1327, + "step": 52300 + }, + { + "epoch": 2.44, + "learning_rate": 1.1961343721097926e-05, + "loss": 0.1643, + "step": 52305 + }, + { + "epoch": 2.44, + "learning_rate": 1.1960559936043142e-05, + "loss": 0.2194, + "step": 52310 + }, + { + "epoch": 2.44, + "learning_rate": 1.1959776150988354e-05, + "loss": 0.3497, + "step": 52315 + }, + { + "epoch": 2.44, + "learning_rate": 1.1958992365933568e-05, + "loss": 0.0641, + "step": 52320 + }, + { + "epoch": 2.44, + "learning_rate": 1.195820858087878e-05, + "loss": 0.0347, + "step": 52325 + }, + { + "epoch": 2.44, + "learning_rate": 1.1957424795823996e-05, + "loss": 0.0945, + "step": 52330 + }, + { + "epoch": 2.44, + "learning_rate": 1.1956641010769208e-05, + "loss": 0.0739, + "step": 52335 + }, + { + "epoch": 2.44, + "learning_rate": 1.195585722571442e-05, + "loss": 0.1243, + "step": 52340 + }, + { + "epoch": 2.44, + "learning_rate": 1.1955073440659634e-05, + "loss": 0.1303, + "step": 52345 + }, + { + "epoch": 2.44, + "learning_rate": 1.1954289655604848e-05, + "loss": 0.1913, + "step": 52350 + }, + { + "epoch": 2.44, + "learning_rate": 1.1953505870550062e-05, + "loss": 0.1298, + "step": 52355 + }, + { + "epoch": 2.44, + "learning_rate": 1.1952722085495274e-05, + "loss": 0.2293, + "step": 52360 + }, + { + "epoch": 2.44, + "learning_rate": 1.1951938300440488e-05, + "loss": 0.2134, + "step": 52365 + }, + { + "epoch": 2.44, + "learning_rate": 1.1951154515385702e-05, + "loss": 0.0677, + "step": 52370 + }, + { + "epoch": 2.44, + "learning_rate": 1.1950370730330916e-05, + "loss": 0.0779, + "step": 52375 + }, + { + "epoch": 2.44, + "learning_rate": 1.1949586945276128e-05, + "loss": 0.0791, + "step": 52380 + }, + { + "epoch": 2.44, + "learning_rate": 1.1948803160221344e-05, + "loss": 0.086, + "step": 52385 + }, + { + "epoch": 2.44, + "learning_rate": 1.1948019375166556e-05, + "loss": 0.1275, + "step": 52390 + }, + { + "epoch": 2.44, + "learning_rate": 1.194723559011177e-05, + "loss": 0.0879, + "step": 52395 + }, + { + "epoch": 2.45, + "learning_rate": 1.1946451805056982e-05, + "loss": 0.133, + "step": 52400 + }, + { + "epoch": 2.45, + "learning_rate": 1.1945668020002194e-05, + "loss": 0.2451, + "step": 52405 + }, + { + "epoch": 2.45, + "learning_rate": 1.194488423494741e-05, + "loss": 0.2477, + "step": 52410 + }, + { + "epoch": 2.45, + "learning_rate": 1.1944100449892622e-05, + "loss": 0.4061, + "step": 52415 + }, + { + "epoch": 2.45, + "learning_rate": 1.1943316664837836e-05, + "loss": 0.0401, + "step": 52420 + }, + { + "epoch": 2.45, + "learning_rate": 1.1942532879783048e-05, + "loss": 0.0241, + "step": 52425 + }, + { + "epoch": 2.45, + "learning_rate": 1.1941749094728264e-05, + "loss": 0.0777, + "step": 52430 + }, + { + "epoch": 2.45, + "learning_rate": 1.1940965309673476e-05, + "loss": 0.065, + "step": 52435 + }, + { + "epoch": 2.45, + "learning_rate": 1.194018152461869e-05, + "loss": 0.1047, + "step": 52440 + }, + { + "epoch": 2.45, + "learning_rate": 1.1939397739563902e-05, + "loss": 0.0401, + "step": 52445 + }, + { + "epoch": 2.45, + "learning_rate": 1.1938613954509118e-05, + "loss": 0.0681, + "step": 52450 + }, + { + "epoch": 2.45, + "learning_rate": 1.193783016945433e-05, + "loss": 0.1758, + "step": 52455 + }, + { + "epoch": 2.45, + "learning_rate": 1.1937046384399544e-05, + "loss": 0.1733, + "step": 52460 + }, + { + "epoch": 2.45, + "learning_rate": 1.1936262599344756e-05, + "loss": 0.3287, + "step": 52465 + }, + { + "epoch": 2.45, + "learning_rate": 1.193547881428997e-05, + "loss": 0.049, + "step": 52470 + }, + { + "epoch": 2.45, + "learning_rate": 1.1934695029235184e-05, + "loss": 0.0322, + "step": 52475 + }, + { + "epoch": 2.45, + "learning_rate": 1.1933911244180396e-05, + "loss": 0.069, + "step": 52480 + }, + { + "epoch": 2.45, + "learning_rate": 1.1933127459125612e-05, + "loss": 0.0482, + "step": 52485 + }, + { + "epoch": 2.45, + "learning_rate": 1.1932343674070824e-05, + "loss": 0.1658, + "step": 52490 + }, + { + "epoch": 2.45, + "learning_rate": 1.1931559889016038e-05, + "loss": 0.1137, + "step": 52495 + }, + { + "epoch": 2.45, + "learning_rate": 1.193077610396125e-05, + "loss": 0.2187, + "step": 52500 + }, + { + "epoch": 2.45, + "learning_rate": 1.1929992318906465e-05, + "loss": 0.1515, + "step": 52505 + }, + { + "epoch": 2.45, + "learning_rate": 1.1929208533851678e-05, + "loss": 0.307, + "step": 52510 + }, + { + "epoch": 2.45, + "learning_rate": 1.1928424748796892e-05, + "loss": 0.3527, + "step": 52515 + }, + { + "epoch": 2.45, + "learning_rate": 1.1927640963742104e-05, + "loss": 0.0675, + "step": 52520 + }, + { + "epoch": 2.45, + "learning_rate": 1.192685717868732e-05, + "loss": 0.0585, + "step": 52525 + }, + { + "epoch": 2.45, + "learning_rate": 1.1926073393632532e-05, + "loss": 0.0977, + "step": 52530 + }, + { + "epoch": 2.45, + "learning_rate": 1.1925289608577744e-05, + "loss": 0.0608, + "step": 52535 + }, + { + "epoch": 2.45, + "learning_rate": 1.1924505823522958e-05, + "loss": 0.061, + "step": 52540 + }, + { + "epoch": 2.45, + "learning_rate": 1.192372203846817e-05, + "loss": 0.0563, + "step": 52545 + }, + { + "epoch": 2.45, + "learning_rate": 1.1922938253413386e-05, + "loss": 0.1041, + "step": 52550 + }, + { + "epoch": 2.45, + "learning_rate": 1.1922154468358598e-05, + "loss": 0.1776, + "step": 52555 + }, + { + "epoch": 2.45, + "learning_rate": 1.1921370683303812e-05, + "loss": 0.2721, + "step": 52560 + }, + { + "epoch": 2.45, + "learning_rate": 1.1920586898249026e-05, + "loss": 0.2447, + "step": 52565 + }, + { + "epoch": 2.45, + "learning_rate": 1.191980311319424e-05, + "loss": 0.0822, + "step": 52570 + }, + { + "epoch": 2.45, + "learning_rate": 1.1919019328139452e-05, + "loss": 0.0929, + "step": 52575 + }, + { + "epoch": 2.45, + "learning_rate": 1.1918235543084666e-05, + "loss": 0.0734, + "step": 52580 + }, + { + "epoch": 2.45, + "learning_rate": 1.191745175802988e-05, + "loss": 0.0762, + "step": 52585 + }, + { + "epoch": 2.45, + "learning_rate": 1.1916667972975093e-05, + "loss": 0.1376, + "step": 52590 + }, + { + "epoch": 2.45, + "learning_rate": 1.1915884187920306e-05, + "loss": 0.1607, + "step": 52595 + }, + { + "epoch": 2.45, + "learning_rate": 1.1915100402865518e-05, + "loss": 0.2222, + "step": 52600 + }, + { + "epoch": 2.45, + "learning_rate": 1.1914316617810733e-05, + "loss": 0.2216, + "step": 52605 + }, + { + "epoch": 2.45, + "learning_rate": 1.1913532832755946e-05, + "loss": 0.2515, + "step": 52610 + }, + { + "epoch": 2.46, + "learning_rate": 1.191274904770116e-05, + "loss": 0.3711, + "step": 52615 + }, + { + "epoch": 2.46, + "learning_rate": 1.1911965262646372e-05, + "loss": 0.0333, + "step": 52620 + }, + { + "epoch": 2.46, + "learning_rate": 1.1911181477591587e-05, + "loss": 0.0515, + "step": 52625 + }, + { + "epoch": 2.46, + "learning_rate": 1.19103976925368e-05, + "loss": 0.0941, + "step": 52630 + }, + { + "epoch": 2.46, + "learning_rate": 1.1909613907482013e-05, + "loss": 0.1234, + "step": 52635 + }, + { + "epoch": 2.46, + "learning_rate": 1.1908830122427226e-05, + "loss": 0.0329, + "step": 52640 + }, + { + "epoch": 2.46, + "learning_rate": 1.1908046337372441e-05, + "loss": 0.1261, + "step": 52645 + }, + { + "epoch": 2.46, + "learning_rate": 1.1907262552317653e-05, + "loss": 0.1531, + "step": 52650 + }, + { + "epoch": 2.46, + "learning_rate": 1.1906478767262867e-05, + "loss": 0.1486, + "step": 52655 + }, + { + "epoch": 2.46, + "learning_rate": 1.190569498220808e-05, + "loss": 0.2458, + "step": 52660 + }, + { + "epoch": 2.46, + "learning_rate": 1.1904911197153293e-05, + "loss": 0.3166, + "step": 52665 + }, + { + "epoch": 2.46, + "learning_rate": 1.1904127412098507e-05, + "loss": 0.0304, + "step": 52670 + }, + { + "epoch": 2.46, + "learning_rate": 1.190334362704372e-05, + "loss": 0.0736, + "step": 52675 + }, + { + "epoch": 2.46, + "learning_rate": 1.1902559841988934e-05, + "loss": 0.0754, + "step": 52680 + }, + { + "epoch": 2.46, + "learning_rate": 1.1901776056934147e-05, + "loss": 0.0731, + "step": 52685 + }, + { + "epoch": 2.46, + "learning_rate": 1.1900992271879361e-05, + "loss": 0.0482, + "step": 52690 + }, + { + "epoch": 2.46, + "learning_rate": 1.1900208486824574e-05, + "loss": 0.0744, + "step": 52695 + }, + { + "epoch": 2.46, + "learning_rate": 1.1899424701769789e-05, + "loss": 0.1076, + "step": 52700 + }, + { + "epoch": 2.46, + "learning_rate": 1.1898640916715001e-05, + "loss": 0.1539, + "step": 52705 + }, + { + "epoch": 2.46, + "learning_rate": 1.1897857131660215e-05, + "loss": 0.2656, + "step": 52710 + }, + { + "epoch": 2.46, + "learning_rate": 1.1897073346605427e-05, + "loss": 0.2979, + "step": 52715 + }, + { + "epoch": 2.46, + "learning_rate": 1.1896289561550643e-05, + "loss": 0.0254, + "step": 52720 + }, + { + "epoch": 2.46, + "learning_rate": 1.1895505776495855e-05, + "loss": 0.0355, + "step": 52725 + }, + { + "epoch": 2.46, + "learning_rate": 1.1894721991441067e-05, + "loss": 0.0842, + "step": 52730 + }, + { + "epoch": 2.46, + "learning_rate": 1.1893938206386281e-05, + "loss": 0.075, + "step": 52735 + }, + { + "epoch": 2.46, + "learning_rate": 1.1893154421331494e-05, + "loss": 0.122, + "step": 52740 + }, + { + "epoch": 2.46, + "learning_rate": 1.189237063627671e-05, + "loss": 0.1795, + "step": 52745 + }, + { + "epoch": 2.46, + "learning_rate": 1.1891586851221921e-05, + "loss": 0.1504, + "step": 52750 + }, + { + "epoch": 2.46, + "learning_rate": 1.1890803066167135e-05, + "loss": 0.2475, + "step": 52755 + }, + { + "epoch": 2.46, + "learning_rate": 1.1890019281112348e-05, + "loss": 0.1841, + "step": 52760 + }, + { + "epoch": 2.46, + "learning_rate": 1.1889235496057563e-05, + "loss": 0.3067, + "step": 52765 + }, + { + "epoch": 2.46, + "learning_rate": 1.1888451711002775e-05, + "loss": 0.0693, + "step": 52770 + }, + { + "epoch": 2.46, + "learning_rate": 1.188766792594799e-05, + "loss": 0.0174, + "step": 52775 + }, + { + "epoch": 2.46, + "learning_rate": 1.1886884140893203e-05, + "loss": 0.0503, + "step": 52780 + }, + { + "epoch": 2.46, + "learning_rate": 1.1886100355838417e-05, + "loss": 0.1001, + "step": 52785 + }, + { + "epoch": 2.46, + "learning_rate": 1.188531657078363e-05, + "loss": 0.1064, + "step": 52790 + }, + { + "epoch": 2.46, + "learning_rate": 1.1884532785728841e-05, + "loss": 0.0935, + "step": 52795 + }, + { + "epoch": 2.46, + "learning_rate": 1.1883749000674057e-05, + "loss": 0.1237, + "step": 52800 + }, + { + "epoch": 2.46, + "learning_rate": 1.188296521561927e-05, + "loss": 0.2529, + "step": 52805 + }, + { + "epoch": 2.46, + "learning_rate": 1.1882181430564483e-05, + "loss": 0.2904, + "step": 52810 + }, + { + "epoch": 2.46, + "learning_rate": 1.1881397645509695e-05, + "loss": 0.2634, + "step": 52815 + }, + { + "epoch": 2.46, + "learning_rate": 1.1880613860454911e-05, + "loss": 0.0563, + "step": 52820 + }, + { + "epoch": 2.46, + "learning_rate": 1.1879830075400123e-05, + "loss": 0.1206, + "step": 52825 + }, + { + "epoch": 2.47, + "learning_rate": 1.1879046290345337e-05, + "loss": 0.0276, + "step": 52830 + }, + { + "epoch": 2.47, + "learning_rate": 1.187826250529055e-05, + "loss": 0.0621, + "step": 52835 + }, + { + "epoch": 2.47, + "learning_rate": 1.1877478720235765e-05, + "loss": 0.1221, + "step": 52840 + }, + { + "epoch": 2.47, + "learning_rate": 1.1876694935180977e-05, + "loss": 0.1302, + "step": 52845 + }, + { + "epoch": 2.47, + "learning_rate": 1.1875911150126191e-05, + "loss": 0.1104, + "step": 52850 + }, + { + "epoch": 2.47, + "learning_rate": 1.1875127365071403e-05, + "loss": 0.2365, + "step": 52855 + }, + { + "epoch": 2.47, + "learning_rate": 1.1874343580016615e-05, + "loss": 0.2957, + "step": 52860 + }, + { + "epoch": 2.47, + "learning_rate": 1.1873559794961831e-05, + "loss": 0.1747, + "step": 52865 + }, + { + "epoch": 2.47, + "learning_rate": 1.1872776009907043e-05, + "loss": 0.0363, + "step": 52870 + }, + { + "epoch": 2.47, + "learning_rate": 1.1871992224852257e-05, + "loss": 0.0558, + "step": 52875 + }, + { + "epoch": 2.47, + "learning_rate": 1.1871208439797471e-05, + "loss": 0.1079, + "step": 52880 + }, + { + "epoch": 2.47, + "learning_rate": 1.1870424654742685e-05, + "loss": 0.0892, + "step": 52885 + }, + { + "epoch": 2.47, + "learning_rate": 1.1869640869687897e-05, + "loss": 0.0586, + "step": 52890 + }, + { + "epoch": 2.47, + "learning_rate": 1.1868857084633111e-05, + "loss": 0.0733, + "step": 52895 + }, + { + "epoch": 2.47, + "learning_rate": 1.1868073299578325e-05, + "loss": 0.1407, + "step": 52900 + }, + { + "epoch": 2.47, + "learning_rate": 1.1867289514523539e-05, + "loss": 0.1468, + "step": 52905 + }, + { + "epoch": 2.47, + "learning_rate": 1.1866505729468751e-05, + "loss": 0.312, + "step": 52910 + }, + { + "epoch": 2.47, + "learning_rate": 1.1865721944413967e-05, + "loss": 0.1312, + "step": 52915 + }, + { + "epoch": 2.47, + "learning_rate": 1.1864938159359179e-05, + "loss": 0.074, + "step": 52920 + }, + { + "epoch": 2.47, + "learning_rate": 1.1864154374304391e-05, + "loss": 0.0478, + "step": 52925 + }, + { + "epoch": 2.47, + "learning_rate": 1.1863370589249605e-05, + "loss": 0.0973, + "step": 52930 + }, + { + "epoch": 2.47, + "learning_rate": 1.1862586804194817e-05, + "loss": 0.0613, + "step": 52935 + }, + { + "epoch": 2.47, + "learning_rate": 1.1861803019140033e-05, + "loss": 0.0627, + "step": 52940 + }, + { + "epoch": 2.47, + "learning_rate": 1.1861019234085245e-05, + "loss": 0.0968, + "step": 52945 + }, + { + "epoch": 2.47, + "learning_rate": 1.1860235449030459e-05, + "loss": 0.0754, + "step": 52950 + }, + { + "epoch": 2.47, + "learning_rate": 1.1859451663975671e-05, + "loss": 0.1461, + "step": 52955 + }, + { + "epoch": 2.47, + "learning_rate": 1.1858667878920887e-05, + "loss": 0.2842, + "step": 52960 + }, + { + "epoch": 2.47, + "learning_rate": 1.1857884093866099e-05, + "loss": 0.2588, + "step": 52965 + }, + { + "epoch": 2.47, + "learning_rate": 1.1857100308811313e-05, + "loss": 0.044, + "step": 52970 + }, + { + "epoch": 2.47, + "learning_rate": 1.1856473280767485e-05, + "loss": 0.0901, + "step": 52975 + }, + { + "epoch": 2.47, + "learning_rate": 1.1855689495712697e-05, + "loss": 0.098, + "step": 52980 + }, + { + "epoch": 2.47, + "learning_rate": 1.185490571065791e-05, + "loss": 0.1027, + "step": 52985 + }, + { + "epoch": 2.47, + "learning_rate": 1.1854121925603123e-05, + "loss": 0.0727, + "step": 52990 + }, + { + "epoch": 2.47, + "learning_rate": 1.1853338140548336e-05, + "loss": 0.086, + "step": 52995 + }, + { + "epoch": 2.47, + "learning_rate": 1.1852554355493551e-05, + "loss": 0.1336, + "step": 53000 + }, + { + "epoch": 2.47, + "learning_rate": 1.1851770570438763e-05, + "loss": 0.2022, + "step": 53005 + }, + { + "epoch": 2.47, + "learning_rate": 1.1850986785383977e-05, + "loss": 0.2916, + "step": 53010 + }, + { + "epoch": 2.47, + "learning_rate": 1.185020300032919e-05, + "loss": 0.1668, + "step": 53015 + }, + { + "epoch": 2.47, + "learning_rate": 1.1849419215274405e-05, + "loss": 0.0279, + "step": 53020 + }, + { + "epoch": 2.47, + "learning_rate": 1.1848635430219617e-05, + "loss": 0.0569, + "step": 53025 + }, + { + "epoch": 2.47, + "learning_rate": 1.1847851645164831e-05, + "loss": 0.0159, + "step": 53030 + }, + { + "epoch": 2.47, + "learning_rate": 1.1847067860110043e-05, + "loss": 0.0532, + "step": 53035 + }, + { + "epoch": 2.47, + "learning_rate": 1.1846284075055259e-05, + "loss": 0.0931, + "step": 53040 + }, + { + "epoch": 2.48, + "learning_rate": 1.1845500290000471e-05, + "loss": 0.1704, + "step": 53045 + }, + { + "epoch": 2.48, + "learning_rate": 1.1844716504945683e-05, + "loss": 0.1275, + "step": 53050 + }, + { + "epoch": 2.48, + "learning_rate": 1.1843932719890897e-05, + "loss": 0.204, + "step": 53055 + }, + { + "epoch": 2.48, + "learning_rate": 1.1843148934836111e-05, + "loss": 0.2639, + "step": 53060 + }, + { + "epoch": 2.48, + "learning_rate": 1.1842365149781325e-05, + "loss": 0.3573, + "step": 53065 + }, + { + "epoch": 2.48, + "learning_rate": 1.1841581364726537e-05, + "loss": 0.0625, + "step": 53070 + }, + { + "epoch": 2.48, + "learning_rate": 1.1840797579671753e-05, + "loss": 0.0354, + "step": 53075 + }, + { + "epoch": 2.48, + "learning_rate": 1.1840013794616965e-05, + "loss": 0.0453, + "step": 53080 + }, + { + "epoch": 2.48, + "learning_rate": 1.1839230009562179e-05, + "loss": 0.0939, + "step": 53085 + }, + { + "epoch": 2.48, + "learning_rate": 1.1838446224507391e-05, + "loss": 0.0633, + "step": 53090 + }, + { + "epoch": 2.48, + "learning_rate": 1.1837662439452607e-05, + "loss": 0.1074, + "step": 53095 + }, + { + "epoch": 2.48, + "learning_rate": 1.183687865439782e-05, + "loss": 0.0843, + "step": 53100 + }, + { + "epoch": 2.48, + "learning_rate": 1.1836094869343033e-05, + "loss": 0.1816, + "step": 53105 + }, + { + "epoch": 2.48, + "learning_rate": 1.1835311084288245e-05, + "loss": 0.283, + "step": 53110 + }, + { + "epoch": 2.48, + "learning_rate": 1.1834527299233457e-05, + "loss": 0.4184, + "step": 53115 + }, + { + "epoch": 2.48, + "learning_rate": 1.1833743514178673e-05, + "loss": 0.0683, + "step": 53120 + }, + { + "epoch": 2.48, + "learning_rate": 1.1832959729123885e-05, + "loss": 0.0495, + "step": 53125 + }, + { + "epoch": 2.48, + "learning_rate": 1.18321759440691e-05, + "loss": 0.0605, + "step": 53130 + }, + { + "epoch": 2.48, + "learning_rate": 1.1831392159014311e-05, + "loss": 0.1192, + "step": 53135 + }, + { + "epoch": 2.48, + "learning_rate": 1.1830608373959527e-05, + "loss": 0.1119, + "step": 53140 + }, + { + "epoch": 2.48, + "learning_rate": 1.182982458890474e-05, + "loss": 0.1132, + "step": 53145 + }, + { + "epoch": 2.48, + "learning_rate": 1.1829040803849953e-05, + "loss": 0.1361, + "step": 53150 + }, + { + "epoch": 2.48, + "learning_rate": 1.1828257018795165e-05, + "loss": 0.2086, + "step": 53155 + }, + { + "epoch": 2.48, + "learning_rate": 1.1827473233740381e-05, + "loss": 0.192, + "step": 53160 + }, + { + "epoch": 2.48, + "learning_rate": 1.1826689448685593e-05, + "loss": 0.2593, + "step": 53165 + }, + { + "epoch": 2.48, + "learning_rate": 1.1825905663630807e-05, + "loss": 0.0378, + "step": 53170 + }, + { + "epoch": 2.48, + "learning_rate": 1.1825121878576021e-05, + "loss": 0.0689, + "step": 53175 + }, + { + "epoch": 2.48, + "learning_rate": 1.1824338093521233e-05, + "loss": 0.0995, + "step": 53180 + }, + { + "epoch": 2.48, + "learning_rate": 1.1823554308466447e-05, + "loss": 0.0625, + "step": 53185 + }, + { + "epoch": 2.48, + "learning_rate": 1.182277052341166e-05, + "loss": 0.0522, + "step": 53190 + }, + { + "epoch": 2.48, + "learning_rate": 1.1821986738356875e-05, + "loss": 0.1119, + "step": 53195 + }, + { + "epoch": 2.48, + "learning_rate": 1.1821202953302087e-05, + "loss": 0.2608, + "step": 53200 + }, + { + "epoch": 2.48, + "learning_rate": 1.1820419168247301e-05, + "loss": 0.1296, + "step": 53205 + }, + { + "epoch": 2.48, + "learning_rate": 1.1819635383192513e-05, + "loss": 0.2392, + "step": 53210 + }, + { + "epoch": 2.48, + "learning_rate": 1.1818851598137729e-05, + "loss": 0.3229, + "step": 53215 + }, + { + "epoch": 2.48, + "learning_rate": 1.1818067813082941e-05, + "loss": 0.0728, + "step": 53220 + }, + { + "epoch": 2.48, + "learning_rate": 1.1817284028028155e-05, + "loss": 0.0496, + "step": 53225 + }, + { + "epoch": 2.48, + "learning_rate": 1.1816500242973367e-05, + "loss": 0.1219, + "step": 53230 + }, + { + "epoch": 2.48, + "learning_rate": 1.1815716457918583e-05, + "loss": 0.0571, + "step": 53235 + }, + { + "epoch": 2.48, + "learning_rate": 1.1814932672863795e-05, + "loss": 0.0712, + "step": 53240 + }, + { + "epoch": 2.48, + "learning_rate": 1.1814148887809007e-05, + "loss": 0.1081, + "step": 53245 + }, + { + "epoch": 2.48, + "learning_rate": 1.1813365102754221e-05, + "loss": 0.1176, + "step": 53250 + }, + { + "epoch": 2.48, + "learning_rate": 1.1812581317699435e-05, + "loss": 0.1212, + "step": 53255 + }, + { + "epoch": 2.49, + "learning_rate": 1.1811797532644649e-05, + "loss": 0.3415, + "step": 53260 + }, + { + "epoch": 2.49, + "learning_rate": 1.1811013747589861e-05, + "loss": 0.2234, + "step": 53265 + }, + { + "epoch": 2.49, + "learning_rate": 1.1810229962535075e-05, + "loss": 0.0399, + "step": 53270 + }, + { + "epoch": 2.49, + "learning_rate": 1.1809446177480289e-05, + "loss": 0.0437, + "step": 53275 + }, + { + "epoch": 2.49, + "learning_rate": 1.1808662392425503e-05, + "loss": 0.0732, + "step": 53280 + }, + { + "epoch": 2.49, + "learning_rate": 1.1807878607370715e-05, + "loss": 0.068, + "step": 53285 + }, + { + "epoch": 2.49, + "learning_rate": 1.180709482231593e-05, + "loss": 0.1564, + "step": 53290 + }, + { + "epoch": 2.49, + "learning_rate": 1.1806311037261143e-05, + "loss": 0.1242, + "step": 53295 + }, + { + "epoch": 2.49, + "learning_rate": 1.1805527252206357e-05, + "loss": 0.0852, + "step": 53300 + }, + { + "epoch": 2.49, + "learning_rate": 1.1804743467151569e-05, + "loss": 0.1913, + "step": 53305 + }, + { + "epoch": 2.49, + "learning_rate": 1.1803959682096781e-05, + "loss": 0.2867, + "step": 53310 + }, + { + "epoch": 2.49, + "learning_rate": 1.1803175897041997e-05, + "loss": 0.2384, + "step": 53315 + }, + { + "epoch": 2.49, + "learning_rate": 1.1802392111987209e-05, + "loss": 0.0679, + "step": 53320 + }, + { + "epoch": 2.49, + "learning_rate": 1.1801608326932423e-05, + "loss": 0.015, + "step": 53325 + }, + { + "epoch": 2.49, + "learning_rate": 1.1800824541877635e-05, + "loss": 0.0466, + "step": 53330 + }, + { + "epoch": 2.49, + "learning_rate": 1.180004075682285e-05, + "loss": 0.0421, + "step": 53335 + }, + { + "epoch": 2.49, + "learning_rate": 1.1799256971768063e-05, + "loss": 0.0758, + "step": 53340 + }, + { + "epoch": 2.49, + "learning_rate": 1.1798473186713277e-05, + "loss": 0.062, + "step": 53345 + }, + { + "epoch": 2.49, + "learning_rate": 1.1797689401658489e-05, + "loss": 0.1198, + "step": 53350 + }, + { + "epoch": 2.49, + "learning_rate": 1.1796905616603705e-05, + "loss": 0.2479, + "step": 53355 + }, + { + "epoch": 2.49, + "learning_rate": 1.1796121831548917e-05, + "loss": 0.2566, + "step": 53360 + }, + { + "epoch": 2.49, + "learning_rate": 1.179533804649413e-05, + "loss": 0.5469, + "step": 53365 + }, + { + "epoch": 2.49, + "learning_rate": 1.1794554261439343e-05, + "loss": 0.0849, + "step": 53370 + }, + { + "epoch": 2.49, + "learning_rate": 1.1793770476384557e-05, + "loss": 0.0693, + "step": 53375 + }, + { + "epoch": 2.49, + "learning_rate": 1.179298669132977e-05, + "loss": 0.0148, + "step": 53380 + }, + { + "epoch": 2.49, + "learning_rate": 1.1792202906274983e-05, + "loss": 0.0683, + "step": 53385 + }, + { + "epoch": 2.49, + "learning_rate": 1.1791419121220199e-05, + "loss": 0.0983, + "step": 53390 + }, + { + "epoch": 2.49, + "learning_rate": 1.179063533616541e-05, + "loss": 0.1328, + "step": 53395 + }, + { + "epoch": 2.49, + "learning_rate": 1.1789851551110625e-05, + "loss": 0.089, + "step": 53400 + }, + { + "epoch": 2.49, + "learning_rate": 1.1789067766055837e-05, + "loss": 0.2294, + "step": 53405 + }, + { + "epoch": 2.49, + "learning_rate": 1.1788283981001053e-05, + "loss": 0.216, + "step": 53410 + }, + { + "epoch": 2.49, + "learning_rate": 1.1787500195946265e-05, + "loss": 0.3105, + "step": 53415 + }, + { + "epoch": 2.49, + "learning_rate": 1.1786716410891479e-05, + "loss": 0.0386, + "step": 53420 + }, + { + "epoch": 2.49, + "learning_rate": 1.1785932625836691e-05, + "loss": 0.0221, + "step": 53425 + }, + { + "epoch": 2.49, + "learning_rate": 1.1785148840781906e-05, + "loss": 0.0662, + "step": 53430 + }, + { + "epoch": 2.49, + "learning_rate": 1.1784365055727119e-05, + "loss": 0.1186, + "step": 53435 + }, + { + "epoch": 2.49, + "learning_rate": 1.1783581270672331e-05, + "loss": 0.1842, + "step": 53440 + }, + { + "epoch": 2.49, + "learning_rate": 1.1782797485617545e-05, + "loss": 0.0644, + "step": 53445 + }, + { + "epoch": 2.49, + "learning_rate": 1.1782013700562757e-05, + "loss": 0.1558, + "step": 53450 + }, + { + "epoch": 2.49, + "learning_rate": 1.1781229915507973e-05, + "loss": 0.1776, + "step": 53455 + }, + { + "epoch": 2.49, + "learning_rate": 1.1780446130453185e-05, + "loss": 0.1429, + "step": 53460 + }, + { + "epoch": 2.49, + "learning_rate": 1.1779662345398399e-05, + "loss": 0.2067, + "step": 53465 + }, + { + "epoch": 2.49, + "learning_rate": 1.1778878560343611e-05, + "loss": 0.0238, + "step": 53470 + }, + { + "epoch": 2.5, + "learning_rate": 1.1778094775288827e-05, + "loss": 0.0882, + "step": 53475 + }, + { + "epoch": 2.5, + "learning_rate": 1.1777310990234039e-05, + "loss": 0.0437, + "step": 53480 + }, + { + "epoch": 2.5, + "learning_rate": 1.1776527205179253e-05, + "loss": 0.1085, + "step": 53485 + }, + { + "epoch": 2.5, + "learning_rate": 1.1775743420124467e-05, + "loss": 0.082, + "step": 53490 + }, + { + "epoch": 2.5, + "learning_rate": 1.177495963506968e-05, + "loss": 0.0515, + "step": 53495 + }, + { + "epoch": 2.5, + "learning_rate": 1.1774175850014893e-05, + "loss": 0.1574, + "step": 53500 + }, + { + "epoch": 2.5, + "learning_rate": 1.1773392064960105e-05, + "loss": 0.176, + "step": 53505 + }, + { + "epoch": 2.5, + "learning_rate": 1.177260827990532e-05, + "loss": 0.3283, + "step": 53510 + }, + { + "epoch": 2.5, + "learning_rate": 1.1771824494850533e-05, + "loss": 0.2003, + "step": 53515 + }, + { + "epoch": 2.5, + "learning_rate": 1.1771040709795747e-05, + "loss": 0.0773, + "step": 53520 + }, + { + "epoch": 2.5, + "learning_rate": 1.1770256924740959e-05, + "loss": 0.0402, + "step": 53525 + }, + { + "epoch": 2.5, + "learning_rate": 1.1769473139686174e-05, + "loss": 0.0686, + "step": 53530 + }, + { + "epoch": 2.5, + "learning_rate": 1.1768689354631387e-05, + "loss": 0.1132, + "step": 53535 + }, + { + "epoch": 2.5, + "learning_rate": 1.17679055695766e-05, + "loss": 0.109, + "step": 53540 + }, + { + "epoch": 2.5, + "learning_rate": 1.1767121784521813e-05, + "loss": 0.1463, + "step": 53545 + }, + { + "epoch": 2.5, + "learning_rate": 1.1766337999467028e-05, + "loss": 0.1783, + "step": 53550 + }, + { + "epoch": 2.5, + "learning_rate": 1.176555421441224e-05, + "loss": 0.1049, + "step": 53555 + }, + { + "epoch": 2.5, + "learning_rate": 1.1764770429357454e-05, + "loss": 0.4038, + "step": 53560 + }, + { + "epoch": 2.5, + "learning_rate": 1.1763986644302667e-05, + "loss": 0.2925, + "step": 53565 + }, + { + "epoch": 2.5, + "learning_rate": 1.176320285924788e-05, + "loss": 0.0268, + "step": 53570 + }, + { + "epoch": 2.5, + "learning_rate": 1.1762419074193094e-05, + "loss": 0.0987, + "step": 53575 + }, + { + "epoch": 2.5, + "learning_rate": 1.1761635289138307e-05, + "loss": 0.0748, + "step": 53580 + }, + { + "epoch": 2.5, + "learning_rate": 1.176085150408352e-05, + "loss": 0.0521, + "step": 53585 + }, + { + "epoch": 2.5, + "learning_rate": 1.1760067719028734e-05, + "loss": 0.1107, + "step": 53590 + }, + { + "epoch": 2.5, + "learning_rate": 1.1759283933973948e-05, + "loss": 0.1081, + "step": 53595 + }, + { + "epoch": 2.5, + "learning_rate": 1.175850014891916e-05, + "loss": 0.0851, + "step": 53600 + }, + { + "epoch": 2.5, + "learning_rate": 1.1757716363864376e-05, + "loss": 0.1295, + "step": 53605 + }, + { + "epoch": 2.5, + "learning_rate": 1.1756932578809588e-05, + "loss": 0.3218, + "step": 53610 + }, + { + "epoch": 2.5, + "learning_rate": 1.1756148793754802e-05, + "loss": 0.3039, + "step": 53615 + }, + { + "epoch": 2.5, + "learning_rate": 1.1755365008700015e-05, + "loss": 0.0713, + "step": 53620 + }, + { + "epoch": 2.5, + "learning_rate": 1.175458122364523e-05, + "loss": 0.0448, + "step": 53625 + }, + { + "epoch": 2.5, + "learning_rate": 1.1753797438590442e-05, + "loss": 0.0533, + "step": 53630 + }, + { + "epoch": 2.5, + "learning_rate": 1.1753013653535655e-05, + "loss": 0.0662, + "step": 53635 + }, + { + "epoch": 2.5, + "learning_rate": 1.1752229868480868e-05, + "loss": 0.0825, + "step": 53640 + }, + { + "epoch": 2.5, + "learning_rate": 1.175144608342608e-05, + "loss": 0.1356, + "step": 53645 + }, + { + "epoch": 2.5, + "learning_rate": 1.1750662298371296e-05, + "loss": 0.119, + "step": 53650 + }, + { + "epoch": 2.5, + "learning_rate": 1.1749878513316508e-05, + "loss": 0.177, + "step": 53655 + }, + { + "epoch": 2.5, + "learning_rate": 1.1749094728261722e-05, + "loss": 0.2674, + "step": 53660 + }, + { + "epoch": 2.5, + "learning_rate": 1.1748310943206935e-05, + "loss": 0.2447, + "step": 53665 + }, + { + "epoch": 2.5, + "learning_rate": 1.174752715815215e-05, + "loss": 0.0238, + "step": 53670 + }, + { + "epoch": 2.5, + "learning_rate": 1.1746743373097362e-05, + "loss": 0.0325, + "step": 53675 + }, + { + "epoch": 2.5, + "learning_rate": 1.1745959588042576e-05, + "loss": 0.0444, + "step": 53680 + }, + { + "epoch": 2.51, + "learning_rate": 1.1745175802987789e-05, + "loss": 0.0893, + "step": 53685 + }, + { + "epoch": 2.51, + "learning_rate": 1.1744392017933004e-05, + "loss": 0.0845, + "step": 53690 + }, + { + "epoch": 2.51, + "learning_rate": 1.1743608232878216e-05, + "loss": 0.0906, + "step": 53695 + }, + { + "epoch": 2.51, + "learning_rate": 1.1742824447823429e-05, + "loss": 0.1417, + "step": 53700 + }, + { + "epoch": 2.51, + "learning_rate": 1.1742040662768644e-05, + "loss": 0.1703, + "step": 53705 + }, + { + "epoch": 2.51, + "learning_rate": 1.1741256877713856e-05, + "loss": 0.2382, + "step": 53710 + }, + { + "epoch": 2.51, + "learning_rate": 1.174047309265907e-05, + "loss": 0.3102, + "step": 53715 + }, + { + "epoch": 2.51, + "learning_rate": 1.1739689307604282e-05, + "loss": 0.0984, + "step": 53720 + }, + { + "epoch": 2.51, + "learning_rate": 1.1738905522549498e-05, + "loss": 0.0297, + "step": 53725 + }, + { + "epoch": 2.51, + "learning_rate": 1.173812173749471e-05, + "loss": 0.0989, + "step": 53730 + }, + { + "epoch": 2.51, + "learning_rate": 1.1737337952439924e-05, + "loss": 0.076, + "step": 53735 + }, + { + "epoch": 2.51, + "learning_rate": 1.1736554167385136e-05, + "loss": 0.1314, + "step": 53740 + }, + { + "epoch": 2.51, + "learning_rate": 1.1735770382330352e-05, + "loss": 0.1822, + "step": 53745 + }, + { + "epoch": 2.51, + "learning_rate": 1.1734986597275564e-05, + "loss": 0.2006, + "step": 53750 + }, + { + "epoch": 2.51, + "learning_rate": 1.1734202812220778e-05, + "loss": 0.1828, + "step": 53755 + }, + { + "epoch": 2.51, + "learning_rate": 1.173341902716599e-05, + "loss": 0.3078, + "step": 53760 + }, + { + "epoch": 2.51, + "learning_rate": 1.1732635242111203e-05, + "loss": 0.3112, + "step": 53765 + }, + { + "epoch": 2.51, + "learning_rate": 1.1731851457056418e-05, + "loss": 0.0143, + "step": 53770 + }, + { + "epoch": 2.51, + "learning_rate": 1.173106767200163e-05, + "loss": 0.0488, + "step": 53775 + }, + { + "epoch": 2.51, + "learning_rate": 1.1730283886946844e-05, + "loss": 0.0628, + "step": 53780 + }, + { + "epoch": 2.51, + "learning_rate": 1.1729500101892058e-05, + "loss": 0.162, + "step": 53785 + }, + { + "epoch": 2.51, + "learning_rate": 1.1728716316837272e-05, + "loss": 0.1037, + "step": 53790 + }, + { + "epoch": 2.51, + "learning_rate": 1.1727932531782484e-05, + "loss": 0.0923, + "step": 53795 + }, + { + "epoch": 2.51, + "learning_rate": 1.1727148746727698e-05, + "loss": 0.0755, + "step": 53800 + }, + { + "epoch": 2.51, + "learning_rate": 1.1726364961672912e-05, + "loss": 0.1833, + "step": 53805 + }, + { + "epoch": 2.51, + "learning_rate": 1.1725581176618126e-05, + "loss": 0.1893, + "step": 53810 + }, + { + "epoch": 2.51, + "learning_rate": 1.1724797391563338e-05, + "loss": 0.3448, + "step": 53815 + }, + { + "epoch": 2.51, + "learning_rate": 1.1724013606508554e-05, + "loss": 0.0322, + "step": 53820 + }, + { + "epoch": 2.51, + "learning_rate": 1.1723229821453766e-05, + "loss": 0.0322, + "step": 53825 + }, + { + "epoch": 2.51, + "learning_rate": 1.1722446036398978e-05, + "loss": 0.0736, + "step": 53830 + }, + { + "epoch": 2.51, + "learning_rate": 1.1721662251344192e-05, + "loss": 0.0983, + "step": 53835 + }, + { + "epoch": 2.51, + "learning_rate": 1.1720878466289404e-05, + "loss": 0.0753, + "step": 53840 + }, + { + "epoch": 2.51, + "learning_rate": 1.172009468123462e-05, + "loss": 0.0771, + "step": 53845 + }, + { + "epoch": 2.51, + "learning_rate": 1.1719310896179832e-05, + "loss": 0.1376, + "step": 53850 + }, + { + "epoch": 2.51, + "learning_rate": 1.1718527111125046e-05, + "loss": 0.1836, + "step": 53855 + }, + { + "epoch": 2.51, + "learning_rate": 1.1717743326070258e-05, + "loss": 0.2587, + "step": 53860 + }, + { + "epoch": 2.51, + "learning_rate": 1.1716959541015474e-05, + "loss": 0.2983, + "step": 53865 + }, + { + "epoch": 2.51, + "learning_rate": 1.1716175755960686e-05, + "loss": 0.0732, + "step": 53870 + }, + { + "epoch": 2.51, + "learning_rate": 1.17153919709059e-05, + "loss": 0.0571, + "step": 53875 + }, + { + "epoch": 2.51, + "learning_rate": 1.1714608185851112e-05, + "loss": 0.0792, + "step": 53880 + }, + { + "epoch": 2.51, + "learning_rate": 1.1713824400796328e-05, + "loss": 0.0731, + "step": 53885 + }, + { + "epoch": 2.51, + "learning_rate": 1.171304061574154e-05, + "loss": 0.0635, + "step": 53890 + }, + { + "epoch": 2.51, + "learning_rate": 1.1712256830686752e-05, + "loss": 0.1637, + "step": 53895 + }, + { + "epoch": 2.52, + "learning_rate": 1.1711473045631966e-05, + "loss": 0.2193, + "step": 53900 + }, + { + "epoch": 2.52, + "learning_rate": 1.171068926057718e-05, + "loss": 0.2715, + "step": 53905 + }, + { + "epoch": 2.52, + "learning_rate": 1.1709905475522394e-05, + "loss": 0.3451, + "step": 53910 + }, + { + "epoch": 2.52, + "learning_rate": 1.1709121690467606e-05, + "loss": 0.2351, + "step": 53915 + }, + { + "epoch": 2.52, + "learning_rate": 1.1708337905412822e-05, + "loss": 0.0587, + "step": 53920 + }, + { + "epoch": 2.52, + "learning_rate": 1.1707554120358034e-05, + "loss": 0.0571, + "step": 53925 + }, + { + "epoch": 2.52, + "learning_rate": 1.1706770335303248e-05, + "loss": 0.0329, + "step": 53930 + }, + { + "epoch": 2.52, + "learning_rate": 1.170598655024846e-05, + "loss": 0.0982, + "step": 53935 + }, + { + "epoch": 2.52, + "learning_rate": 1.1705202765193676e-05, + "loss": 0.056, + "step": 53940 + }, + { + "epoch": 2.52, + "learning_rate": 1.1704418980138888e-05, + "loss": 0.1438, + "step": 53945 + }, + { + "epoch": 2.52, + "learning_rate": 1.1703635195084102e-05, + "loss": 0.1759, + "step": 53950 + }, + { + "epoch": 2.52, + "learning_rate": 1.1702851410029314e-05, + "loss": 0.2695, + "step": 53955 + }, + { + "epoch": 2.52, + "learning_rate": 1.1702067624974526e-05, + "loss": 0.2936, + "step": 53960 + }, + { + "epoch": 2.52, + "learning_rate": 1.1701283839919742e-05, + "loss": 0.2662, + "step": 53965 + }, + { + "epoch": 2.52, + "learning_rate": 1.1700500054864954e-05, + "loss": 0.0413, + "step": 53970 + }, + { + "epoch": 2.52, + "learning_rate": 1.1699716269810168e-05, + "loss": 0.0348, + "step": 53975 + }, + { + "epoch": 2.52, + "learning_rate": 1.169893248475538e-05, + "loss": 0.0487, + "step": 53980 + }, + { + "epoch": 2.52, + "learning_rate": 1.1698148699700596e-05, + "loss": 0.0561, + "step": 53985 + }, + { + "epoch": 2.52, + "learning_rate": 1.1697364914645808e-05, + "loss": 0.1206, + "step": 53990 + }, + { + "epoch": 2.52, + "learning_rate": 1.1696581129591022e-05, + "loss": 0.1703, + "step": 53995 + }, + { + "epoch": 2.52, + "learning_rate": 1.1695797344536234e-05, + "loss": 0.1549, + "step": 54000 + }, + { + "epoch": 2.52, + "learning_rate": 1.169501355948145e-05, + "loss": 0.209, + "step": 54005 + }, + { + "epoch": 2.52, + "learning_rate": 1.1694229774426662e-05, + "loss": 0.4151, + "step": 54010 + }, + { + "epoch": 2.52, + "learning_rate": 1.1693445989371876e-05, + "loss": 0.1933, + "step": 54015 + }, + { + "epoch": 2.52, + "learning_rate": 1.169266220431709e-05, + "loss": 0.0544, + "step": 54020 + }, + { + "epoch": 2.52, + "learning_rate": 1.1691878419262302e-05, + "loss": 0.0166, + "step": 54025 + }, + { + "epoch": 2.52, + "learning_rate": 1.1691094634207516e-05, + "loss": 0.0557, + "step": 54030 + }, + { + "epoch": 2.52, + "learning_rate": 1.1690310849152728e-05, + "loss": 0.0669, + "step": 54035 + }, + { + "epoch": 2.52, + "learning_rate": 1.1689527064097944e-05, + "loss": 0.0985, + "step": 54040 + }, + { + "epoch": 2.52, + "learning_rate": 1.1688743279043156e-05, + "loss": 0.1685, + "step": 54045 + }, + { + "epoch": 2.52, + "learning_rate": 1.168795949398837e-05, + "loss": 0.1963, + "step": 54050 + }, + { + "epoch": 2.52, + "learning_rate": 1.1687175708933582e-05, + "loss": 0.192, + "step": 54055 + }, + { + "epoch": 2.52, + "learning_rate": 1.1686391923878798e-05, + "loss": 0.3119, + "step": 54060 + }, + { + "epoch": 2.52, + "learning_rate": 1.168560813882401e-05, + "loss": 0.3424, + "step": 54065 + }, + { + "epoch": 2.52, + "learning_rate": 1.1684824353769224e-05, + "loss": 0.0345, + "step": 54070 + }, + { + "epoch": 2.52, + "learning_rate": 1.1684040568714436e-05, + "loss": 0.0528, + "step": 54075 + }, + { + "epoch": 2.52, + "learning_rate": 1.1683256783659652e-05, + "loss": 0.053, + "step": 54080 + }, + { + "epoch": 2.52, + "learning_rate": 1.1682472998604864e-05, + "loss": 0.0567, + "step": 54085 + }, + { + "epoch": 2.52, + "learning_rate": 1.1681689213550076e-05, + "loss": 0.1253, + "step": 54090 + }, + { + "epoch": 2.52, + "learning_rate": 1.168090542849529e-05, + "loss": 0.1384, + "step": 54095 + }, + { + "epoch": 2.52, + "learning_rate": 1.1680121643440504e-05, + "loss": 0.1149, + "step": 54100 + }, + { + "epoch": 2.52, + "learning_rate": 1.1679337858385718e-05, + "loss": 0.1669, + "step": 54105 + }, + { + "epoch": 2.52, + "learning_rate": 1.1678710830341888e-05, + "loss": 0.1833, + "step": 54110 + }, + { + "epoch": 2.53, + "learning_rate": 1.16779270452871e-05, + "loss": 0.2386, + "step": 54115 + }, + { + "epoch": 2.53, + "learning_rate": 1.1677143260232316e-05, + "loss": 0.0729, + "step": 54120 + }, + { + "epoch": 2.53, + "learning_rate": 1.1676359475177528e-05, + "loss": 0.0523, + "step": 54125 + }, + { + "epoch": 2.53, + "learning_rate": 1.1675575690122742e-05, + "loss": 0.0632, + "step": 54130 + }, + { + "epoch": 2.53, + "learning_rate": 1.1674791905067954e-05, + "loss": 0.0702, + "step": 54135 + }, + { + "epoch": 2.53, + "learning_rate": 1.167400812001317e-05, + "loss": 0.0453, + "step": 54140 + }, + { + "epoch": 2.53, + "learning_rate": 1.1673224334958382e-05, + "loss": 0.1348, + "step": 54145 + }, + { + "epoch": 2.53, + "learning_rate": 1.1672440549903596e-05, + "loss": 0.156, + "step": 54150 + }, + { + "epoch": 2.53, + "learning_rate": 1.1671656764848808e-05, + "loss": 0.1055, + "step": 54155 + }, + { + "epoch": 2.53, + "learning_rate": 1.167087297979402e-05, + "loss": 0.1431, + "step": 54160 + }, + { + "epoch": 2.53, + "learning_rate": 1.1670089194739236e-05, + "loss": 0.3631, + "step": 54165 + }, + { + "epoch": 2.53, + "learning_rate": 1.1669305409684448e-05, + "loss": 0.0497, + "step": 54170 + }, + { + "epoch": 2.53, + "learning_rate": 1.1668521624629662e-05, + "loss": 0.0897, + "step": 54175 + }, + { + "epoch": 2.53, + "learning_rate": 1.1667737839574876e-05, + "loss": 0.0819, + "step": 54180 + }, + { + "epoch": 2.53, + "learning_rate": 1.166695405452009e-05, + "loss": 0.0764, + "step": 54185 + }, + { + "epoch": 2.53, + "learning_rate": 1.1666170269465302e-05, + "loss": 0.0894, + "step": 54190 + }, + { + "epoch": 2.53, + "learning_rate": 1.1665386484410518e-05, + "loss": 0.0866, + "step": 54195 + }, + { + "epoch": 2.53, + "learning_rate": 1.166460269935573e-05, + "loss": 0.1523, + "step": 54200 + }, + { + "epoch": 2.53, + "learning_rate": 1.1663818914300944e-05, + "loss": 0.1929, + "step": 54205 + }, + { + "epoch": 2.53, + "learning_rate": 1.1663035129246156e-05, + "loss": 0.2861, + "step": 54210 + }, + { + "epoch": 2.53, + "learning_rate": 1.1662251344191372e-05, + "loss": 0.2864, + "step": 54215 + }, + { + "epoch": 2.53, + "learning_rate": 1.1661467559136584e-05, + "loss": 0.0513, + "step": 54220 + }, + { + "epoch": 2.53, + "learning_rate": 1.1660683774081796e-05, + "loss": 0.0342, + "step": 54225 + }, + { + "epoch": 2.53, + "learning_rate": 1.165989998902701e-05, + "loss": 0.078, + "step": 54230 + }, + { + "epoch": 2.53, + "learning_rate": 1.1659116203972222e-05, + "loss": 0.0786, + "step": 54235 + }, + { + "epoch": 2.53, + "learning_rate": 1.1658332418917438e-05, + "loss": 0.1127, + "step": 54240 + }, + { + "epoch": 2.53, + "learning_rate": 1.165754863386265e-05, + "loss": 0.1296, + "step": 54245 + }, + { + "epoch": 2.53, + "learning_rate": 1.1656764848807864e-05, + "loss": 0.134, + "step": 54250 + }, + { + "epoch": 2.53, + "learning_rate": 1.1655981063753076e-05, + "loss": 0.2205, + "step": 54255 + }, + { + "epoch": 2.53, + "learning_rate": 1.1655197278698292e-05, + "loss": 0.1846, + "step": 54260 + }, + { + "epoch": 2.53, + "learning_rate": 1.1654413493643504e-05, + "loss": 0.3259, + "step": 54265 + }, + { + "epoch": 2.53, + "learning_rate": 1.1653629708588718e-05, + "loss": 0.0222, + "step": 54270 + }, + { + "epoch": 2.53, + "learning_rate": 1.165284592353393e-05, + "loss": 0.0677, + "step": 54275 + }, + { + "epoch": 2.53, + "learning_rate": 1.1652062138479146e-05, + "loss": 0.0542, + "step": 54280 + }, + { + "epoch": 2.53, + "learning_rate": 1.1651278353424358e-05, + "loss": 0.0699, + "step": 54285 + }, + { + "epoch": 2.53, + "learning_rate": 1.165049456836957e-05, + "loss": 0.0722, + "step": 54290 + }, + { + "epoch": 2.53, + "learning_rate": 1.1649710783314786e-05, + "loss": 0.1666, + "step": 54295 + }, + { + "epoch": 2.53, + "learning_rate": 1.1648926998259998e-05, + "loss": 0.2113, + "step": 54300 + }, + { + "epoch": 2.53, + "learning_rate": 1.1648143213205212e-05, + "loss": 0.1151, + "step": 54305 + }, + { + "epoch": 2.53, + "learning_rate": 1.1647359428150424e-05, + "loss": 0.2646, + "step": 54310 + }, + { + "epoch": 2.53, + "learning_rate": 1.164657564309564e-05, + "loss": 0.2441, + "step": 54315 + }, + { + "epoch": 2.53, + "learning_rate": 1.1645791858040852e-05, + "loss": 0.0764, + "step": 54320 + }, + { + "epoch": 2.53, + "learning_rate": 1.1645008072986066e-05, + "loss": 0.0424, + "step": 54325 + }, + { + "epoch": 2.54, + "learning_rate": 1.1644224287931278e-05, + "loss": 0.0538, + "step": 54330 + }, + { + "epoch": 2.54, + "learning_rate": 1.1643440502876494e-05, + "loss": 0.0319, + "step": 54335 + }, + { + "epoch": 2.54, + "learning_rate": 1.1642656717821706e-05, + "loss": 0.1016, + "step": 54340 + }, + { + "epoch": 2.54, + "learning_rate": 1.164187293276692e-05, + "loss": 0.0941, + "step": 54345 + }, + { + "epoch": 2.54, + "learning_rate": 1.1641089147712132e-05, + "loss": 0.0697, + "step": 54350 + }, + { + "epoch": 2.54, + "learning_rate": 1.1640305362657344e-05, + "loss": 0.1258, + "step": 54355 + }, + { + "epoch": 2.54, + "learning_rate": 1.163952157760256e-05, + "loss": 0.2775, + "step": 54360 + }, + { + "epoch": 2.54, + "learning_rate": 1.1638737792547772e-05, + "loss": 0.3991, + "step": 54365 + }, + { + "epoch": 2.54, + "learning_rate": 1.1637954007492986e-05, + "loss": 0.098, + "step": 54370 + }, + { + "epoch": 2.54, + "learning_rate": 1.1637170222438198e-05, + "loss": 0.0404, + "step": 54375 + }, + { + "epoch": 2.54, + "learning_rate": 1.1636386437383414e-05, + "loss": 0.0601, + "step": 54380 + }, + { + "epoch": 2.54, + "learning_rate": 1.1635602652328626e-05, + "loss": 0.0452, + "step": 54385 + }, + { + "epoch": 2.54, + "learning_rate": 1.163481886727384e-05, + "loss": 0.1061, + "step": 54390 + }, + { + "epoch": 2.54, + "learning_rate": 1.1634035082219054e-05, + "loss": 0.0819, + "step": 54395 + }, + { + "epoch": 2.54, + "learning_rate": 1.1633251297164268e-05, + "loss": 0.1277, + "step": 54400 + }, + { + "epoch": 2.54, + "learning_rate": 1.163246751210948e-05, + "loss": 0.2061, + "step": 54405 + }, + { + "epoch": 2.54, + "learning_rate": 1.1631683727054695e-05, + "loss": 0.3015, + "step": 54410 + }, + { + "epoch": 2.54, + "learning_rate": 1.1630899941999908e-05, + "loss": 0.4022, + "step": 54415 + }, + { + "epoch": 2.54, + "learning_rate": 1.163011615694512e-05, + "loss": 0.0536, + "step": 54420 + }, + { + "epoch": 2.54, + "learning_rate": 1.1629332371890334e-05, + "loss": 0.0426, + "step": 54425 + }, + { + "epoch": 2.54, + "learning_rate": 1.1628548586835546e-05, + "loss": 0.046, + "step": 54430 + }, + { + "epoch": 2.54, + "learning_rate": 1.1627764801780761e-05, + "loss": 0.0336, + "step": 54435 + }, + { + "epoch": 2.54, + "learning_rate": 1.1626981016725974e-05, + "loss": 0.0846, + "step": 54440 + }, + { + "epoch": 2.54, + "learning_rate": 1.1626197231671188e-05, + "loss": 0.0895, + "step": 54445 + }, + { + "epoch": 2.54, + "learning_rate": 1.16254134466164e-05, + "loss": 0.0969, + "step": 54450 + }, + { + "epoch": 2.54, + "learning_rate": 1.1624629661561615e-05, + "loss": 0.1187, + "step": 54455 + }, + { + "epoch": 2.54, + "learning_rate": 1.1623845876506828e-05, + "loss": 0.2181, + "step": 54460 + }, + { + "epoch": 2.54, + "learning_rate": 1.1623062091452042e-05, + "loss": 0.2948, + "step": 54465 + }, + { + "epoch": 2.54, + "learning_rate": 1.1622278306397254e-05, + "loss": 0.0419, + "step": 54470 + }, + { + "epoch": 2.54, + "learning_rate": 1.162149452134247e-05, + "loss": 0.0756, + "step": 54475 + }, + { + "epoch": 2.54, + "learning_rate": 1.1620710736287682e-05, + "loss": 0.0746, + "step": 54480 + }, + { + "epoch": 2.54, + "learning_rate": 1.1619926951232894e-05, + "loss": 0.0578, + "step": 54485 + }, + { + "epoch": 2.54, + "learning_rate": 1.1619143166178108e-05, + "loss": 0.047, + "step": 54490 + }, + { + "epoch": 2.54, + "learning_rate": 1.1618359381123322e-05, + "loss": 0.1197, + "step": 54495 + }, + { + "epoch": 2.54, + "learning_rate": 1.1617575596068535e-05, + "loss": 0.0707, + "step": 54500 + }, + { + "epoch": 2.54, + "learning_rate": 1.1616791811013748e-05, + "loss": 0.1846, + "step": 54505 + }, + { + "epoch": 2.54, + "learning_rate": 1.1616008025958963e-05, + "loss": 0.3005, + "step": 54510 + }, + { + "epoch": 2.54, + "learning_rate": 1.1615224240904176e-05, + "loss": 0.3327, + "step": 54515 + }, + { + "epoch": 2.54, + "learning_rate": 1.161444045584939e-05, + "loss": 0.0754, + "step": 54520 + }, + { + "epoch": 2.54, + "learning_rate": 1.1613656670794602e-05, + "loss": 0.0423, + "step": 54525 + }, + { + "epoch": 2.54, + "learning_rate": 1.1612872885739817e-05, + "loss": 0.0361, + "step": 54530 + }, + { + "epoch": 2.54, + "learning_rate": 1.161208910068503e-05, + "loss": 0.0925, + "step": 54535 + }, + { + "epoch": 2.54, + "learning_rate": 1.1611305315630243e-05, + "loss": 0.0593, + "step": 54540 + }, + { + "epoch": 2.55, + "learning_rate": 1.1610521530575456e-05, + "loss": 0.1092, + "step": 54545 + }, + { + "epoch": 2.55, + "learning_rate": 1.1609737745520668e-05, + "loss": 0.1291, + "step": 54550 + }, + { + "epoch": 2.55, + "learning_rate": 1.1608953960465883e-05, + "loss": 0.2171, + "step": 54555 + }, + { + "epoch": 2.55, + "learning_rate": 1.1608170175411096e-05, + "loss": 0.1797, + "step": 54560 + }, + { + "epoch": 2.55, + "learning_rate": 1.160738639035631e-05, + "loss": 0.3179, + "step": 54565 + }, + { + "epoch": 2.55, + "learning_rate": 1.1606602605301522e-05, + "loss": 0.0331, + "step": 54570 + }, + { + "epoch": 2.55, + "learning_rate": 1.1605818820246737e-05, + "loss": 0.0527, + "step": 54575 + }, + { + "epoch": 2.55, + "learning_rate": 1.160503503519195e-05, + "loss": 0.0389, + "step": 54580 + }, + { + "epoch": 2.55, + "learning_rate": 1.1604251250137163e-05, + "loss": 0.0399, + "step": 54585 + }, + { + "epoch": 2.55, + "learning_rate": 1.1603467465082376e-05, + "loss": 0.0983, + "step": 54590 + }, + { + "epoch": 2.55, + "learning_rate": 1.1602683680027591e-05, + "loss": 0.0581, + "step": 54595 + }, + { + "epoch": 2.55, + "learning_rate": 1.1601899894972803e-05, + "loss": 0.1263, + "step": 54600 + }, + { + "epoch": 2.55, + "learning_rate": 1.1601116109918017e-05, + "loss": 0.1091, + "step": 54605 + }, + { + "epoch": 2.55, + "learning_rate": 1.1600332324863231e-05, + "loss": 0.3447, + "step": 54610 + }, + { + "epoch": 2.55, + "learning_rate": 1.1599548539808443e-05, + "loss": 0.2702, + "step": 54615 + }, + { + "epoch": 2.55, + "learning_rate": 1.1598764754753657e-05, + "loss": 0.1182, + "step": 54620 + }, + { + "epoch": 2.55, + "learning_rate": 1.159798096969887e-05, + "loss": 0.0375, + "step": 54625 + }, + { + "epoch": 2.55, + "learning_rate": 1.1597197184644085e-05, + "loss": 0.0246, + "step": 54630 + }, + { + "epoch": 2.55, + "learning_rate": 1.1596413399589297e-05, + "loss": 0.0543, + "step": 54635 + }, + { + "epoch": 2.55, + "learning_rate": 1.1595629614534511e-05, + "loss": 0.0953, + "step": 54640 + }, + { + "epoch": 2.55, + "learning_rate": 1.1594845829479724e-05, + "loss": 0.206, + "step": 54645 + }, + { + "epoch": 2.55, + "learning_rate": 1.1594062044424939e-05, + "loss": 0.1144, + "step": 54650 + }, + { + "epoch": 2.55, + "learning_rate": 1.1593278259370151e-05, + "loss": 0.1205, + "step": 54655 + }, + { + "epoch": 2.55, + "learning_rate": 1.1592494474315365e-05, + "loss": 0.2163, + "step": 54660 + }, + { + "epoch": 2.55, + "learning_rate": 1.1591710689260577e-05, + "loss": 0.34, + "step": 54665 + }, + { + "epoch": 2.55, + "learning_rate": 1.1590926904205793e-05, + "loss": 0.0405, + "step": 54670 + }, + { + "epoch": 2.55, + "learning_rate": 1.1590143119151005e-05, + "loss": 0.0285, + "step": 54675 + }, + { + "epoch": 2.55, + "learning_rate": 1.1589359334096217e-05, + "loss": 0.0436, + "step": 54680 + }, + { + "epoch": 2.55, + "learning_rate": 1.1588575549041431e-05, + "loss": 0.036, + "step": 54685 + }, + { + "epoch": 2.55, + "learning_rate": 1.1587791763986644e-05, + "loss": 0.0615, + "step": 54690 + }, + { + "epoch": 2.55, + "learning_rate": 1.158700797893186e-05, + "loss": 0.073, + "step": 54695 + }, + { + "epoch": 2.55, + "learning_rate": 1.1586224193877071e-05, + "loss": 0.0988, + "step": 54700 + }, + { + "epoch": 2.55, + "learning_rate": 1.1585440408822285e-05, + "loss": 0.1362, + "step": 54705 + }, + { + "epoch": 2.55, + "learning_rate": 1.15846566237675e-05, + "loss": 0.2456, + "step": 54710 + }, + { + "epoch": 2.55, + "learning_rate": 1.1583872838712713e-05, + "loss": 0.2314, + "step": 54715 + }, + { + "epoch": 2.55, + "learning_rate": 1.1583089053657925e-05, + "loss": 0.1112, + "step": 54720 + }, + { + "epoch": 2.55, + "learning_rate": 1.1582305268603141e-05, + "loss": 0.125, + "step": 54725 + }, + { + "epoch": 2.55, + "learning_rate": 1.1581521483548353e-05, + "loss": 0.0651, + "step": 54730 + }, + { + "epoch": 2.55, + "learning_rate": 1.1580737698493567e-05, + "loss": 0.1076, + "step": 54735 + }, + { + "epoch": 2.55, + "learning_rate": 1.157995391343878e-05, + "loss": 0.0838, + "step": 54740 + }, + { + "epoch": 2.55, + "learning_rate": 1.1579170128383991e-05, + "loss": 0.0998, + "step": 54745 + }, + { + "epoch": 2.55, + "learning_rate": 1.1578386343329207e-05, + "loss": 0.0908, + "step": 54750 + }, + { + "epoch": 2.55, + "learning_rate": 1.157760255827442e-05, + "loss": 0.1566, + "step": 54755 + }, + { + "epoch": 2.56, + "learning_rate": 1.1576818773219633e-05, + "loss": 0.325, + "step": 54760 + }, + { + "epoch": 2.56, + "learning_rate": 1.1576034988164845e-05, + "loss": 0.3982, + "step": 54765 + }, + { + "epoch": 2.56, + "learning_rate": 1.1575251203110061e-05, + "loss": 0.0304, + "step": 54770 + }, + { + "epoch": 2.56, + "learning_rate": 1.1574467418055273e-05, + "loss": 0.0603, + "step": 54775 + }, + { + "epoch": 2.56, + "learning_rate": 1.1573683633000487e-05, + "loss": 0.0578, + "step": 54780 + }, + { + "epoch": 2.56, + "learning_rate": 1.15728998479457e-05, + "loss": 0.0566, + "step": 54785 + }, + { + "epoch": 2.56, + "learning_rate": 1.1572116062890915e-05, + "loss": 0.0771, + "step": 54790 + }, + { + "epoch": 2.56, + "learning_rate": 1.1571332277836127e-05, + "loss": 0.1294, + "step": 54795 + }, + { + "epoch": 2.56, + "learning_rate": 1.1570548492781341e-05, + "loss": 0.1138, + "step": 54800 + }, + { + "epoch": 2.56, + "learning_rate": 1.1569764707726553e-05, + "loss": 0.1218, + "step": 54805 + }, + { + "epoch": 2.56, + "learning_rate": 1.1568980922671767e-05, + "loss": 0.256, + "step": 54810 + }, + { + "epoch": 2.56, + "learning_rate": 1.1568197137616981e-05, + "loss": 0.3657, + "step": 54815 + }, + { + "epoch": 2.56, + "learning_rate": 1.1567413352562193e-05, + "loss": 0.0775, + "step": 54820 + }, + { + "epoch": 2.56, + "learning_rate": 1.1566629567507409e-05, + "loss": 0.0329, + "step": 54825 + }, + { + "epoch": 2.56, + "learning_rate": 1.1565845782452621e-05, + "loss": 0.02, + "step": 54830 + }, + { + "epoch": 2.56, + "learning_rate": 1.1565061997397835e-05, + "loss": 0.0346, + "step": 54835 + }, + { + "epoch": 2.56, + "learning_rate": 1.1564278212343047e-05, + "loss": 0.0832, + "step": 54840 + }, + { + "epoch": 2.56, + "learning_rate": 1.1563494427288263e-05, + "loss": 0.0888, + "step": 54845 + }, + { + "epoch": 2.56, + "learning_rate": 1.1562710642233475e-05, + "loss": 0.1356, + "step": 54850 + }, + { + "epoch": 2.56, + "learning_rate": 1.1561926857178689e-05, + "loss": 0.1574, + "step": 54855 + }, + { + "epoch": 2.56, + "learning_rate": 1.1561143072123901e-05, + "loss": 0.2213, + "step": 54860 + }, + { + "epoch": 2.56, + "learning_rate": 1.1560359287069117e-05, + "loss": 0.2773, + "step": 54865 + }, + { + "epoch": 2.56, + "learning_rate": 1.1559575502014329e-05, + "loss": 0.073, + "step": 54870 + }, + { + "epoch": 2.56, + "learning_rate": 1.1558791716959541e-05, + "loss": 0.0188, + "step": 54875 + }, + { + "epoch": 2.56, + "learning_rate": 1.1558007931904755e-05, + "loss": 0.0895, + "step": 54880 + }, + { + "epoch": 2.56, + "learning_rate": 1.1557224146849967e-05, + "loss": 0.0839, + "step": 54885 + }, + { + "epoch": 2.56, + "learning_rate": 1.1556440361795183e-05, + "loss": 0.0635, + "step": 54890 + }, + { + "epoch": 2.56, + "learning_rate": 1.1555656576740395e-05, + "loss": 0.0701, + "step": 54895 + }, + { + "epoch": 2.56, + "learning_rate": 1.1554872791685609e-05, + "loss": 0.1132, + "step": 54900 + }, + { + "epoch": 2.56, + "learning_rate": 1.1554089006630821e-05, + "loss": 0.1373, + "step": 54905 + }, + { + "epoch": 2.56, + "learning_rate": 1.1553305221576037e-05, + "loss": 0.1604, + "step": 54910 + }, + { + "epoch": 2.56, + "learning_rate": 1.1552521436521249e-05, + "loss": 0.3499, + "step": 54915 + }, + { + "epoch": 2.56, + "learning_rate": 1.1551737651466463e-05, + "loss": 0.0524, + "step": 54920 + }, + { + "epoch": 2.56, + "learning_rate": 1.1550953866411677e-05, + "loss": 0.026, + "step": 54925 + }, + { + "epoch": 2.56, + "learning_rate": 1.155017008135689e-05, + "loss": 0.0591, + "step": 54930 + }, + { + "epoch": 2.56, + "learning_rate": 1.1549386296302103e-05, + "loss": 0.1202, + "step": 54935 + }, + { + "epoch": 2.56, + "learning_rate": 1.1548602511247315e-05, + "loss": 0.0974, + "step": 54940 + }, + { + "epoch": 2.56, + "learning_rate": 1.154781872619253e-05, + "loss": 0.1075, + "step": 54945 + }, + { + "epoch": 2.56, + "learning_rate": 1.1547034941137743e-05, + "loss": 0.588, + "step": 54950 + }, + { + "epoch": 2.56, + "learning_rate": 1.1546251156082957e-05, + "loss": 0.1737, + "step": 54955 + }, + { + "epoch": 2.56, + "learning_rate": 1.1545467371028169e-05, + "loss": 0.2966, + "step": 54960 + }, + { + "epoch": 2.56, + "learning_rate": 1.1544683585973385e-05, + "loss": 0.1925, + "step": 54965 + }, + { + "epoch": 2.56, + "learning_rate": 1.1543899800918597e-05, + "loss": 0.0461, + "step": 54970 + }, + { + "epoch": 2.57, + "learning_rate": 1.154311601586381e-05, + "loss": 0.0533, + "step": 54975 + }, + { + "epoch": 2.57, + "learning_rate": 1.1542332230809023e-05, + "loss": 0.0946, + "step": 54980 + }, + { + "epoch": 2.57, + "learning_rate": 1.1541548445754239e-05, + "loss": 0.0699, + "step": 54985 + }, + { + "epoch": 2.57, + "learning_rate": 1.154076466069945e-05, + "loss": 0.125, + "step": 54990 + }, + { + "epoch": 2.57, + "learning_rate": 1.1539980875644665e-05, + "loss": 0.0972, + "step": 54995 + }, + { + "epoch": 2.57, + "learning_rate": 1.1539197090589877e-05, + "loss": 0.2018, + "step": 55000 + }, + { + "epoch": 2.57, + "learning_rate": 1.153841330553509e-05, + "loss": 0.2578, + "step": 55005 + }, + { + "epoch": 2.57, + "learning_rate": 1.1537629520480305e-05, + "loss": 0.2174, + "step": 55010 + }, + { + "epoch": 2.57, + "learning_rate": 1.1536845735425517e-05, + "loss": 0.2747, + "step": 55015 + }, + { + "epoch": 2.57, + "learning_rate": 1.1536061950370731e-05, + "loss": 0.1615, + "step": 55020 + }, + { + "epoch": 2.57, + "learning_rate": 1.1535278165315945e-05, + "loss": 0.034, + "step": 55025 + }, + { + "epoch": 2.57, + "learning_rate": 1.1534494380261159e-05, + "loss": 0.101, + "step": 55030 + }, + { + "epoch": 2.57, + "learning_rate": 1.1533710595206371e-05, + "loss": 0.1131, + "step": 55035 + }, + { + "epoch": 2.57, + "learning_rate": 1.1532926810151586e-05, + "loss": 0.142, + "step": 55040 + }, + { + "epoch": 2.57, + "learning_rate": 1.1532143025096799e-05, + "loss": 0.0432, + "step": 55045 + }, + { + "epoch": 2.57, + "learning_rate": 1.1531359240042013e-05, + "loss": 0.1238, + "step": 55050 + }, + { + "epoch": 2.57, + "learning_rate": 1.1530575454987225e-05, + "loss": 0.1882, + "step": 55055 + }, + { + "epoch": 2.57, + "learning_rate": 1.152979166993244e-05, + "loss": 0.1863, + "step": 55060 + }, + { + "epoch": 2.57, + "learning_rate": 1.1529007884877653e-05, + "loss": 0.3653, + "step": 55065 + }, + { + "epoch": 2.57, + "learning_rate": 1.1528224099822865e-05, + "loss": 0.043, + "step": 55070 + }, + { + "epoch": 2.57, + "learning_rate": 1.1527440314768079e-05, + "loss": 0.0351, + "step": 55075 + }, + { + "epoch": 2.57, + "learning_rate": 1.1526656529713291e-05, + "loss": 0.0739, + "step": 55080 + }, + { + "epoch": 2.57, + "learning_rate": 1.1525872744658507e-05, + "loss": 0.0644, + "step": 55085 + }, + { + "epoch": 2.57, + "learning_rate": 1.1525088959603719e-05, + "loss": 0.1164, + "step": 55090 + }, + { + "epoch": 2.57, + "learning_rate": 1.1524305174548933e-05, + "loss": 0.1265, + "step": 55095 + }, + { + "epoch": 2.57, + "learning_rate": 1.1523521389494145e-05, + "loss": 0.0871, + "step": 55100 + }, + { + "epoch": 2.57, + "learning_rate": 1.152273760443936e-05, + "loss": 0.1264, + "step": 55105 + }, + { + "epoch": 2.57, + "learning_rate": 1.1521953819384573e-05, + "loss": 0.3587, + "step": 55110 + }, + { + "epoch": 2.57, + "learning_rate": 1.1521170034329787e-05, + "loss": 0.4438, + "step": 55115 + }, + { + "epoch": 2.57, + "learning_rate": 1.1520386249274999e-05, + "loss": 0.0607, + "step": 55120 + }, + { + "epoch": 2.57, + "learning_rate": 1.1519602464220214e-05, + "loss": 0.0342, + "step": 55125 + }, + { + "epoch": 2.57, + "learning_rate": 1.1518818679165427e-05, + "loss": 0.022, + "step": 55130 + }, + { + "epoch": 2.57, + "learning_rate": 1.1518034894110639e-05, + "loss": 0.0265, + "step": 55135 + }, + { + "epoch": 2.57, + "learning_rate": 1.1517251109055854e-05, + "loss": 0.0946, + "step": 55140 + }, + { + "epoch": 2.57, + "learning_rate": 1.1516467324001067e-05, + "loss": 0.1235, + "step": 55145 + }, + { + "epoch": 2.57, + "learning_rate": 1.151568353894628e-05, + "loss": 0.2051, + "step": 55150 + }, + { + "epoch": 2.57, + "learning_rate": 1.1514899753891493e-05, + "loss": 0.2828, + "step": 55155 + }, + { + "epoch": 2.57, + "learning_rate": 1.1514115968836708e-05, + "loss": 0.2089, + "step": 55160 + }, + { + "epoch": 2.57, + "learning_rate": 1.151333218378192e-05, + "loss": 0.2756, + "step": 55165 + }, + { + "epoch": 2.57, + "learning_rate": 1.1512548398727134e-05, + "loss": 0.0753, + "step": 55170 + }, + { + "epoch": 2.57, + "learning_rate": 1.1511764613672347e-05, + "loss": 0.0189, + "step": 55175 + }, + { + "epoch": 2.57, + "learning_rate": 1.1510980828617562e-05, + "loss": 0.0486, + "step": 55180 + }, + { + "epoch": 2.58, + "learning_rate": 1.1510197043562775e-05, + "loss": 0.0668, + "step": 55185 + }, + { + "epoch": 2.58, + "learning_rate": 1.1509413258507988e-05, + "loss": 0.0484, + "step": 55190 + }, + { + "epoch": 2.58, + "learning_rate": 1.15086294734532e-05, + "loss": 0.2344, + "step": 55195 + }, + { + "epoch": 2.58, + "learning_rate": 1.1507845688398413e-05, + "loss": 0.1022, + "step": 55200 + }, + { + "epoch": 2.58, + "learning_rate": 1.1507061903343628e-05, + "loss": 0.2039, + "step": 55205 + }, + { + "epoch": 2.58, + "learning_rate": 1.150627811828884e-05, + "loss": 0.3759, + "step": 55210 + }, + { + "epoch": 2.58, + "learning_rate": 1.1505494333234055e-05, + "loss": 0.3593, + "step": 55215 + }, + { + "epoch": 2.58, + "learning_rate": 1.1504710548179267e-05, + "loss": 0.0577, + "step": 55220 + }, + { + "epoch": 2.58, + "learning_rate": 1.1503926763124482e-05, + "loss": 0.0329, + "step": 55225 + }, + { + "epoch": 2.58, + "learning_rate": 1.1503142978069695e-05, + "loss": 0.0644, + "step": 55230 + }, + { + "epoch": 2.58, + "learning_rate": 1.1502359193014908e-05, + "loss": 0.0507, + "step": 55235 + }, + { + "epoch": 2.58, + "learning_rate": 1.1501575407960122e-05, + "loss": 0.0605, + "step": 55240 + }, + { + "epoch": 2.58, + "learning_rate": 1.1500791622905336e-05, + "loss": 0.1677, + "step": 55245 + }, + { + "epoch": 2.58, + "learning_rate": 1.1500007837850549e-05, + "loss": 0.1497, + "step": 55250 + }, + { + "epoch": 2.58, + "learning_rate": 1.1499224052795762e-05, + "loss": 0.1236, + "step": 55255 + }, + { + "epoch": 2.58, + "learning_rate": 1.1498440267740976e-05, + "loss": 0.2027, + "step": 55260 + }, + { + "epoch": 2.58, + "learning_rate": 1.1497656482686189e-05, + "loss": 0.3551, + "step": 55265 + }, + { + "epoch": 2.58, + "learning_rate": 1.1496872697631402e-05, + "loss": 0.1194, + "step": 55270 + }, + { + "epoch": 2.58, + "learning_rate": 1.1496088912576615e-05, + "loss": 0.0431, + "step": 55275 + }, + { + "epoch": 2.58, + "learning_rate": 1.149530512752183e-05, + "loss": 0.0716, + "step": 55280 + }, + { + "epoch": 2.58, + "learning_rate": 1.1494521342467042e-05, + "loss": 0.0789, + "step": 55285 + }, + { + "epoch": 2.58, + "learning_rate": 1.1493737557412256e-05, + "loss": 0.0781, + "step": 55290 + }, + { + "epoch": 2.58, + "learning_rate": 1.1492953772357469e-05, + "loss": 0.0786, + "step": 55295 + }, + { + "epoch": 2.58, + "learning_rate": 1.1492169987302684e-05, + "loss": 0.1363, + "step": 55300 + }, + { + "epoch": 2.58, + "learning_rate": 1.1491386202247896e-05, + "loss": 0.2028, + "step": 55305 + }, + { + "epoch": 2.58, + "learning_rate": 1.149060241719311e-05, + "loss": 0.1638, + "step": 55310 + }, + { + "epoch": 2.58, + "learning_rate": 1.1489818632138323e-05, + "loss": 0.2609, + "step": 55315 + }, + { + "epoch": 2.58, + "learning_rate": 1.1489034847083538e-05, + "loss": 0.0372, + "step": 55320 + }, + { + "epoch": 2.58, + "learning_rate": 1.148825106202875e-05, + "loss": 0.0139, + "step": 55325 + }, + { + "epoch": 2.58, + "learning_rate": 1.1487467276973963e-05, + "loss": 0.0547, + "step": 55330 + }, + { + "epoch": 2.58, + "learning_rate": 1.1486683491919176e-05, + "loss": 0.0491, + "step": 55335 + }, + { + "epoch": 2.58, + "learning_rate": 1.148589970686439e-05, + "loss": 0.0403, + "step": 55340 + }, + { + "epoch": 2.58, + "learning_rate": 1.1485115921809604e-05, + "loss": 0.1877, + "step": 55345 + }, + { + "epoch": 2.58, + "learning_rate": 1.1484332136754816e-05, + "loss": 0.1706, + "step": 55350 + }, + { + "epoch": 2.58, + "learning_rate": 1.1483548351700032e-05, + "loss": 0.1843, + "step": 55355 + }, + { + "epoch": 2.58, + "learning_rate": 1.1482764566645244e-05, + "loss": 0.2677, + "step": 55360 + }, + { + "epoch": 2.58, + "learning_rate": 1.1481980781590458e-05, + "loss": 0.1672, + "step": 55365 + }, + { + "epoch": 2.58, + "learning_rate": 1.148119699653567e-05, + "loss": 0.0489, + "step": 55370 + }, + { + "epoch": 2.58, + "learning_rate": 1.1480413211480886e-05, + "loss": 0.0291, + "step": 55375 + }, + { + "epoch": 2.58, + "learning_rate": 1.1479629426426098e-05, + "loss": 0.034, + "step": 55380 + }, + { + "epoch": 2.58, + "learning_rate": 1.1478845641371312e-05, + "loss": 0.1155, + "step": 55385 + }, + { + "epoch": 2.58, + "learning_rate": 1.1478061856316524e-05, + "loss": 0.1148, + "step": 55390 + }, + { + "epoch": 2.58, + "learning_rate": 1.1477278071261737e-05, + "loss": 0.1079, + "step": 55395 + }, + { + "epoch": 2.59, + "learning_rate": 1.1476494286206952e-05, + "loss": 0.1543, + "step": 55400 + }, + { + "epoch": 2.59, + "learning_rate": 1.1475710501152164e-05, + "loss": 0.1308, + "step": 55405 + }, + { + "epoch": 2.59, + "learning_rate": 1.1474926716097378e-05, + "loss": 0.3201, + "step": 55410 + }, + { + "epoch": 2.59, + "learning_rate": 1.147414293104259e-05, + "loss": 0.1922, + "step": 55415 + }, + { + "epoch": 2.59, + "learning_rate": 1.1473359145987806e-05, + "loss": 0.056, + "step": 55420 + }, + { + "epoch": 2.59, + "learning_rate": 1.1472575360933018e-05, + "loss": 0.0477, + "step": 55425 + }, + { + "epoch": 2.59, + "learning_rate": 1.1471791575878232e-05, + "loss": 0.0765, + "step": 55430 + }, + { + "epoch": 2.59, + "learning_rate": 1.1471007790823444e-05, + "loss": 0.0625, + "step": 55435 + }, + { + "epoch": 2.59, + "learning_rate": 1.147022400576866e-05, + "loss": 0.0535, + "step": 55440 + }, + { + "epoch": 2.59, + "learning_rate": 1.1469440220713872e-05, + "loss": 0.1265, + "step": 55445 + }, + { + "epoch": 2.59, + "learning_rate": 1.1468656435659086e-05, + "loss": 0.0814, + "step": 55450 + }, + { + "epoch": 2.59, + "learning_rate": 1.14678726506043e-05, + "loss": 0.1643, + "step": 55455 + }, + { + "epoch": 2.59, + "learning_rate": 1.1467088865549512e-05, + "loss": 0.3128, + "step": 55460 + }, + { + "epoch": 2.59, + "learning_rate": 1.1466305080494726e-05, + "loss": 0.2313, + "step": 55465 + }, + { + "epoch": 2.59, + "learning_rate": 1.1465521295439938e-05, + "loss": 0.0514, + "step": 55470 + }, + { + "epoch": 2.59, + "learning_rate": 1.1464737510385154e-05, + "loss": 0.0917, + "step": 55475 + }, + { + "epoch": 2.59, + "learning_rate": 1.1463953725330366e-05, + "loss": 0.0453, + "step": 55480 + }, + { + "epoch": 2.59, + "learning_rate": 1.146316994027558e-05, + "loss": 0.0833, + "step": 55485 + }, + { + "epoch": 2.59, + "learning_rate": 1.1462386155220792e-05, + "loss": 0.1032, + "step": 55490 + }, + { + "epoch": 2.59, + "learning_rate": 1.1461602370166008e-05, + "loss": 0.1346, + "step": 55495 + }, + { + "epoch": 2.59, + "learning_rate": 1.146081858511122e-05, + "loss": 0.1739, + "step": 55500 + }, + { + "epoch": 2.59, + "learning_rate": 1.1460034800056434e-05, + "loss": 0.2011, + "step": 55505 + }, + { + "epoch": 2.59, + "learning_rate": 1.1459251015001646e-05, + "loss": 0.1148, + "step": 55510 + }, + { + "epoch": 2.59, + "learning_rate": 1.1458467229946862e-05, + "loss": 0.2026, + "step": 55515 + }, + { + "epoch": 2.59, + "learning_rate": 1.1457683444892074e-05, + "loss": 0.0575, + "step": 55520 + }, + { + "epoch": 2.59, + "learning_rate": 1.1456899659837286e-05, + "loss": 0.0359, + "step": 55525 + }, + { + "epoch": 2.59, + "learning_rate": 1.14561158747825e-05, + "loss": 0.0556, + "step": 55530 + }, + { + "epoch": 2.59, + "learning_rate": 1.1455332089727712e-05, + "loss": 0.0612, + "step": 55535 + }, + { + "epoch": 2.59, + "learning_rate": 1.1454548304672928e-05, + "loss": 0.0526, + "step": 55540 + }, + { + "epoch": 2.59, + "learning_rate": 1.145376451961814e-05, + "loss": 0.0315, + "step": 55545 + }, + { + "epoch": 2.59, + "learning_rate": 1.1452980734563354e-05, + "loss": 0.144, + "step": 55550 + }, + { + "epoch": 2.59, + "learning_rate": 1.1452196949508568e-05, + "loss": 0.1281, + "step": 55555 + }, + { + "epoch": 2.59, + "learning_rate": 1.1451413164453782e-05, + "loss": 0.2331, + "step": 55560 + }, + { + "epoch": 2.59, + "learning_rate": 1.1450629379398994e-05, + "loss": 0.2845, + "step": 55565 + }, + { + "epoch": 2.59, + "learning_rate": 1.1449845594344208e-05, + "loss": 0.0317, + "step": 55570 + }, + { + "epoch": 2.59, + "learning_rate": 1.1449061809289422e-05, + "loss": 0.03, + "step": 55575 + }, + { + "epoch": 2.59, + "learning_rate": 1.1448278024234636e-05, + "loss": 0.0739, + "step": 55580 + }, + { + "epoch": 2.59, + "learning_rate": 1.1447494239179848e-05, + "loss": 0.1004, + "step": 55585 + }, + { + "epoch": 2.59, + "learning_rate": 1.144671045412506e-05, + "loss": 0.1453, + "step": 55590 + }, + { + "epoch": 2.59, + "learning_rate": 1.1445926669070276e-05, + "loss": 0.2057, + "step": 55595 + }, + { + "epoch": 2.59, + "learning_rate": 1.1445142884015488e-05, + "loss": 0.2203, + "step": 55600 + }, + { + "epoch": 2.59, + "learning_rate": 1.1444359098960702e-05, + "loss": 0.1848, + "step": 55605 + }, + { + "epoch": 2.59, + "learning_rate": 1.1443575313905914e-05, + "loss": 0.2693, + "step": 55610 + }, + { + "epoch": 2.6, + "learning_rate": 1.144279152885113e-05, + "loss": 0.2025, + "step": 55615 + }, + { + "epoch": 2.6, + "learning_rate": 1.1442007743796342e-05, + "loss": 0.0353, + "step": 55620 + }, + { + "epoch": 2.6, + "learning_rate": 1.1441223958741556e-05, + "loss": 0.0815, + "step": 55625 + }, + { + "epoch": 2.6, + "learning_rate": 1.1440440173686768e-05, + "loss": 0.0724, + "step": 55630 + }, + { + "epoch": 2.6, + "learning_rate": 1.1439656388631984e-05, + "loss": 0.0461, + "step": 55635 + }, + { + "epoch": 2.6, + "learning_rate": 1.1438872603577196e-05, + "loss": 0.1578, + "step": 55640 + }, + { + "epoch": 2.6, + "learning_rate": 1.143808881852241e-05, + "loss": 0.091, + "step": 55645 + }, + { + "epoch": 2.6, + "learning_rate": 1.1437305033467622e-05, + "loss": 0.1293, + "step": 55650 + }, + { + "epoch": 2.6, + "learning_rate": 1.1436521248412836e-05, + "loss": 0.2032, + "step": 55655 + }, + { + "epoch": 2.6, + "learning_rate": 1.143573746335805e-05, + "loss": 0.2329, + "step": 55660 + }, + { + "epoch": 2.6, + "learning_rate": 1.1434953678303262e-05, + "loss": 0.246, + "step": 55665 + }, + { + "epoch": 2.6, + "learning_rate": 1.1434169893248478e-05, + "loss": 0.0633, + "step": 55670 + }, + { + "epoch": 2.6, + "learning_rate": 1.143338610819369e-05, + "loss": 0.0702, + "step": 55675 + }, + { + "epoch": 2.6, + "learning_rate": 1.1432602323138904e-05, + "loss": 0.0746, + "step": 55680 + }, + { + "epoch": 2.6, + "learning_rate": 1.1431818538084116e-05, + "loss": 0.0994, + "step": 55685 + }, + { + "epoch": 2.6, + "learning_rate": 1.1431034753029332e-05, + "loss": 0.0673, + "step": 55690 + }, + { + "epoch": 2.6, + "learning_rate": 1.1430250967974544e-05, + "loss": 0.1014, + "step": 55695 + }, + { + "epoch": 2.6, + "learning_rate": 1.1429467182919758e-05, + "loss": 0.1598, + "step": 55700 + }, + { + "epoch": 2.6, + "learning_rate": 1.142868339786497e-05, + "loss": 0.1542, + "step": 55705 + }, + { + "epoch": 2.6, + "learning_rate": 1.1427899612810185e-05, + "loss": 0.1683, + "step": 55710 + }, + { + "epoch": 2.6, + "learning_rate": 1.1427115827755398e-05, + "loss": 0.3066, + "step": 55715 + }, + { + "epoch": 2.6, + "learning_rate": 1.142633204270061e-05, + "loss": 0.0337, + "step": 55720 + }, + { + "epoch": 2.6, + "learning_rate": 1.1425548257645824e-05, + "loss": 0.0113, + "step": 55725 + }, + { + "epoch": 2.6, + "learning_rate": 1.1424764472591036e-05, + "loss": 0.0901, + "step": 55730 + }, + { + "epoch": 2.6, + "learning_rate": 1.1423980687536252e-05, + "loss": 0.0709, + "step": 55735 + }, + { + "epoch": 2.6, + "learning_rate": 1.1423196902481464e-05, + "loss": 0.0917, + "step": 55740 + }, + { + "epoch": 2.6, + "learning_rate": 1.1422413117426678e-05, + "loss": 0.0923, + "step": 55745 + }, + { + "epoch": 2.6, + "learning_rate": 1.142162933237189e-05, + "loss": 0.1011, + "step": 55750 + }, + { + "epoch": 2.6, + "learning_rate": 1.1420845547317106e-05, + "loss": 0.2065, + "step": 55755 + }, + { + "epoch": 2.6, + "learning_rate": 1.1420061762262318e-05, + "loss": 0.2262, + "step": 55760 + }, + { + "epoch": 2.6, + "learning_rate": 1.1419277977207532e-05, + "loss": 0.3833, + "step": 55765 + }, + { + "epoch": 2.6, + "learning_rate": 1.1418494192152746e-05, + "loss": 0.0298, + "step": 55770 + }, + { + "epoch": 2.6, + "learning_rate": 1.141771040709796e-05, + "loss": 0.0199, + "step": 55775 + }, + { + "epoch": 2.6, + "learning_rate": 1.1416926622043172e-05, + "loss": 0.0608, + "step": 55780 + }, + { + "epoch": 2.6, + "learning_rate": 1.1416142836988384e-05, + "loss": 0.0674, + "step": 55785 + }, + { + "epoch": 2.6, + "learning_rate": 1.14153590519336e-05, + "loss": 0.05, + "step": 55790 + }, + { + "epoch": 2.6, + "learning_rate": 1.1414575266878812e-05, + "loss": 0.4276, + "step": 55795 + }, + { + "epoch": 2.6, + "learning_rate": 1.1413791481824026e-05, + "loss": 0.1672, + "step": 55800 + }, + { + "epoch": 2.6, + "learning_rate": 1.1413007696769238e-05, + "loss": 0.2332, + "step": 55805 + }, + { + "epoch": 2.6, + "learning_rate": 1.1412223911714453e-05, + "loss": 0.2378, + "step": 55810 + }, + { + "epoch": 2.6, + "learning_rate": 1.1411440126659666e-05, + "loss": 0.3594, + "step": 55815 + }, + { + "epoch": 2.6, + "learning_rate": 1.141065634160488e-05, + "loss": 0.0743, + "step": 55820 + }, + { + "epoch": 2.6, + "learning_rate": 1.1409872556550092e-05, + "loss": 0.0567, + "step": 55825 + }, + { + "epoch": 2.61, + "learning_rate": 1.1409088771495307e-05, + "loss": 0.0386, + "step": 55830 + }, + { + "epoch": 2.61, + "learning_rate": 1.140830498644052e-05, + "loss": 0.0708, + "step": 55835 + }, + { + "epoch": 2.61, + "learning_rate": 1.1407521201385733e-05, + "loss": 0.1147, + "step": 55840 + }, + { + "epoch": 2.61, + "learning_rate": 1.1406737416330946e-05, + "loss": 0.107, + "step": 55845 + }, + { + "epoch": 2.61, + "learning_rate": 1.1405953631276158e-05, + "loss": 0.1496, + "step": 55850 + }, + { + "epoch": 2.61, + "learning_rate": 1.1405169846221374e-05, + "loss": 0.1711, + "step": 55855 + }, + { + "epoch": 2.61, + "learning_rate": 1.1404386061166586e-05, + "loss": 0.1991, + "step": 55860 + }, + { + "epoch": 2.61, + "learning_rate": 1.14036022761118e-05, + "loss": 0.2534, + "step": 55865 + }, + { + "epoch": 2.61, + "learning_rate": 1.1402818491057014e-05, + "loss": 0.0475, + "step": 55870 + }, + { + "epoch": 2.61, + "learning_rate": 1.1402034706002227e-05, + "loss": 0.0586, + "step": 55875 + }, + { + "epoch": 2.61, + "learning_rate": 1.140125092094744e-05, + "loss": 0.0615, + "step": 55880 + }, + { + "epoch": 2.61, + "learning_rate": 1.1400467135892655e-05, + "loss": 0.0973, + "step": 55885 + }, + { + "epoch": 2.61, + "learning_rate": 1.1399683350837867e-05, + "loss": 0.0871, + "step": 55890 + }, + { + "epoch": 2.61, + "learning_rate": 1.1398899565783081e-05, + "loss": 0.0739, + "step": 55895 + }, + { + "epoch": 2.61, + "learning_rate": 1.1398115780728294e-05, + "loss": 0.1811, + "step": 55900 + }, + { + "epoch": 2.61, + "learning_rate": 1.139733199567351e-05, + "loss": 0.1679, + "step": 55905 + }, + { + "epoch": 2.61, + "learning_rate": 1.1396548210618721e-05, + "loss": 0.2377, + "step": 55910 + }, + { + "epoch": 2.61, + "learning_rate": 1.1395764425563934e-05, + "loss": 0.29, + "step": 55915 + }, + { + "epoch": 2.61, + "learning_rate": 1.1394980640509148e-05, + "loss": 0.0498, + "step": 55920 + }, + { + "epoch": 2.61, + "learning_rate": 1.139419685545436e-05, + "loss": 0.0492, + "step": 55925 + }, + { + "epoch": 2.61, + "learning_rate": 1.1393413070399575e-05, + "loss": 0.0697, + "step": 55930 + }, + { + "epoch": 2.61, + "learning_rate": 1.1392629285344788e-05, + "loss": 0.0466, + "step": 55935 + }, + { + "epoch": 2.61, + "learning_rate": 1.1391845500290001e-05, + "loss": 0.0723, + "step": 55940 + }, + { + "epoch": 2.61, + "learning_rate": 1.1391061715235214e-05, + "loss": 0.0772, + "step": 55945 + }, + { + "epoch": 2.61, + "learning_rate": 1.139027793018043e-05, + "loss": 0.1073, + "step": 55950 + }, + { + "epoch": 2.61, + "learning_rate": 1.1389494145125641e-05, + "loss": 0.1866, + "step": 55955 + }, + { + "epoch": 2.61, + "learning_rate": 1.1388710360070855e-05, + "loss": 0.3105, + "step": 55960 + }, + { + "epoch": 2.61, + "learning_rate": 1.1387926575016068e-05, + "loss": 0.3204, + "step": 55965 + }, + { + "epoch": 2.61, + "learning_rate": 1.1387142789961283e-05, + "loss": 0.0536, + "step": 55970 + }, + { + "epoch": 2.61, + "learning_rate": 1.1386359004906495e-05, + "loss": 0.0331, + "step": 55975 + }, + { + "epoch": 2.61, + "learning_rate": 1.1385575219851708e-05, + "loss": 0.0336, + "step": 55980 + }, + { + "epoch": 2.61, + "learning_rate": 1.1384791434796923e-05, + "loss": 0.0781, + "step": 55985 + }, + { + "epoch": 2.61, + "learning_rate": 1.1384007649742135e-05, + "loss": 0.0902, + "step": 55990 + }, + { + "epoch": 2.61, + "learning_rate": 1.138322386468735e-05, + "loss": 0.1057, + "step": 55995 + }, + { + "epoch": 2.61, + "learning_rate": 1.1382440079632562e-05, + "loss": 0.1152, + "step": 56000 + }, + { + "epoch": 2.61, + "learning_rate": 1.1381656294577777e-05, + "loss": 0.1095, + "step": 56005 + }, + { + "epoch": 2.61, + "learning_rate": 1.138087250952299e-05, + "loss": 0.2313, + "step": 56010 + }, + { + "epoch": 2.61, + "learning_rate": 1.1380088724468203e-05, + "loss": 0.3165, + "step": 56015 + }, + { + "epoch": 2.61, + "learning_rate": 1.1379304939413415e-05, + "loss": 0.0596, + "step": 56020 + }, + { + "epoch": 2.61, + "learning_rate": 1.1378521154358631e-05, + "loss": 0.0576, + "step": 56025 + }, + { + "epoch": 2.61, + "learning_rate": 1.1377737369303843e-05, + "loss": 0.0679, + "step": 56030 + }, + { + "epoch": 2.61, + "learning_rate": 1.1376953584249057e-05, + "loss": 0.0265, + "step": 56035 + }, + { + "epoch": 2.61, + "learning_rate": 1.137616979919427e-05, + "loss": 0.0645, + "step": 56040 + }, + { + "epoch": 2.62, + "learning_rate": 1.1375386014139482e-05, + "loss": 0.1304, + "step": 56045 + }, + { + "epoch": 2.62, + "learning_rate": 1.1374602229084697e-05, + "loss": 0.2927, + "step": 56050 + }, + { + "epoch": 2.62, + "learning_rate": 1.137381844402991e-05, + "loss": 0.1191, + "step": 56055 + }, + { + "epoch": 2.62, + "learning_rate": 1.1373034658975123e-05, + "loss": 0.2488, + "step": 56060 + }, + { + "epoch": 2.62, + "learning_rate": 1.1372250873920336e-05, + "loss": 0.1988, + "step": 56065 + }, + { + "epoch": 2.62, + "learning_rate": 1.1371467088865551e-05, + "loss": 0.0326, + "step": 56070 + }, + { + "epoch": 2.62, + "learning_rate": 1.1370683303810763e-05, + "loss": 0.0501, + "step": 56075 + }, + { + "epoch": 2.62, + "learning_rate": 1.1369899518755977e-05, + "loss": 0.0934, + "step": 56080 + }, + { + "epoch": 2.62, + "learning_rate": 1.1369115733701191e-05, + "loss": 0.0529, + "step": 56085 + }, + { + "epoch": 2.62, + "learning_rate": 1.1368331948646405e-05, + "loss": 0.1376, + "step": 56090 + }, + { + "epoch": 2.62, + "learning_rate": 1.1367548163591617e-05, + "loss": 0.0725, + "step": 56095 + }, + { + "epoch": 2.62, + "learning_rate": 1.1366764378536831e-05, + "loss": 0.1274, + "step": 56100 + }, + { + "epoch": 2.62, + "learning_rate": 1.1365980593482045e-05, + "loss": 0.1481, + "step": 56105 + }, + { + "epoch": 2.62, + "learning_rate": 1.1365196808427257e-05, + "loss": 0.1959, + "step": 56110 + }, + { + "epoch": 2.62, + "learning_rate": 1.1364413023372471e-05, + "loss": 0.1771, + "step": 56115 + }, + { + "epoch": 2.62, + "learning_rate": 1.1363629238317683e-05, + "loss": 0.0172, + "step": 56120 + }, + { + "epoch": 2.62, + "learning_rate": 1.1362845453262899e-05, + "loss": 0.0531, + "step": 56125 + }, + { + "epoch": 2.62, + "learning_rate": 1.1362061668208111e-05, + "loss": 0.0984, + "step": 56130 + }, + { + "epoch": 2.62, + "learning_rate": 1.1361277883153325e-05, + "loss": 0.074, + "step": 56135 + }, + { + "epoch": 2.62, + "learning_rate": 1.1360494098098537e-05, + "loss": 0.0676, + "step": 56140 + }, + { + "epoch": 2.62, + "learning_rate": 1.1359710313043753e-05, + "loss": 0.1362, + "step": 56145 + }, + { + "epoch": 2.62, + "learning_rate": 1.1358926527988965e-05, + "loss": 0.1122, + "step": 56150 + }, + { + "epoch": 2.62, + "learning_rate": 1.1358142742934179e-05, + "loss": 0.1515, + "step": 56155 + }, + { + "epoch": 2.62, + "learning_rate": 1.1357358957879391e-05, + "loss": 0.261, + "step": 56160 + }, + { + "epoch": 2.62, + "learning_rate": 1.1356575172824607e-05, + "loss": 0.293, + "step": 56165 + }, + { + "epoch": 2.62, + "learning_rate": 1.1355791387769819e-05, + "loss": 0.0781, + "step": 56170 + }, + { + "epoch": 2.62, + "learning_rate": 1.1355007602715031e-05, + "loss": 0.0546, + "step": 56175 + }, + { + "epoch": 2.62, + "learning_rate": 1.1354223817660245e-05, + "loss": 0.0495, + "step": 56180 + }, + { + "epoch": 2.62, + "learning_rate": 1.1353440032605459e-05, + "loss": 0.0399, + "step": 56185 + }, + { + "epoch": 2.62, + "learning_rate": 1.1352656247550673e-05, + "loss": 0.1285, + "step": 56190 + }, + { + "epoch": 2.62, + "learning_rate": 1.1351872462495885e-05, + "loss": 0.1656, + "step": 56195 + }, + { + "epoch": 2.62, + "learning_rate": 1.13510886774411e-05, + "loss": 0.1061, + "step": 56200 + }, + { + "epoch": 2.62, + "learning_rate": 1.1350304892386313e-05, + "loss": 0.1784, + "step": 56205 + }, + { + "epoch": 2.62, + "learning_rate": 1.1349521107331527e-05, + "loss": 0.254, + "step": 56210 + }, + { + "epoch": 2.62, + "learning_rate": 1.134873732227674e-05, + "loss": 0.3493, + "step": 56215 + }, + { + "epoch": 2.62, + "learning_rate": 1.1347953537221955e-05, + "loss": 0.0536, + "step": 56220 + }, + { + "epoch": 2.62, + "learning_rate": 1.1347169752167167e-05, + "loss": 0.0224, + "step": 56225 + }, + { + "epoch": 2.62, + "learning_rate": 1.1346385967112381e-05, + "loss": 0.06, + "step": 56230 + }, + { + "epoch": 2.62, + "learning_rate": 1.1345602182057593e-05, + "loss": 0.1365, + "step": 56235 + }, + { + "epoch": 2.62, + "learning_rate": 1.1344818397002805e-05, + "loss": 0.0783, + "step": 56240 + }, + { + "epoch": 2.62, + "learning_rate": 1.1344034611948021e-05, + "loss": 0.1582, + "step": 56245 + }, + { + "epoch": 2.62, + "learning_rate": 1.1343250826893233e-05, + "loss": 0.0864, + "step": 56250 + }, + { + "epoch": 2.62, + "learning_rate": 1.1342467041838447e-05, + "loss": 0.1631, + "step": 56255 + }, + { + "epoch": 2.63, + "learning_rate": 1.134168325678366e-05, + "loss": 0.2896, + "step": 56260 + }, + { + "epoch": 2.63, + "learning_rate": 1.1340899471728875e-05, + "loss": 0.2259, + "step": 56265 + }, + { + "epoch": 2.63, + "learning_rate": 1.1340115686674087e-05, + "loss": 0.083, + "step": 56270 + }, + { + "epoch": 2.63, + "learning_rate": 1.1339331901619301e-05, + "loss": 0.0676, + "step": 56275 + }, + { + "epoch": 2.63, + "learning_rate": 1.1338548116564513e-05, + "loss": 0.0453, + "step": 56280 + }, + { + "epoch": 2.63, + "learning_rate": 1.1337764331509729e-05, + "loss": 0.0732, + "step": 56285 + }, + { + "epoch": 2.63, + "learning_rate": 1.1336980546454941e-05, + "loss": 0.0857, + "step": 56290 + }, + { + "epoch": 2.63, + "learning_rate": 1.1336196761400155e-05, + "loss": 0.0953, + "step": 56295 + }, + { + "epoch": 2.63, + "learning_rate": 1.1335412976345369e-05, + "loss": 0.1977, + "step": 56300 + }, + { + "epoch": 2.63, + "learning_rate": 1.1334629191290581e-05, + "loss": 0.1663, + "step": 56305 + }, + { + "epoch": 2.63, + "learning_rate": 1.1333845406235795e-05, + "loss": 0.2465, + "step": 56310 + }, + { + "epoch": 2.63, + "learning_rate": 1.1333061621181007e-05, + "loss": 0.2748, + "step": 56315 + }, + { + "epoch": 2.63, + "learning_rate": 1.1332277836126223e-05, + "loss": 0.0505, + "step": 56320 + }, + { + "epoch": 2.63, + "learning_rate": 1.1331494051071435e-05, + "loss": 0.0276, + "step": 56325 + }, + { + "epoch": 2.63, + "learning_rate": 1.1330710266016649e-05, + "loss": 0.0824, + "step": 56330 + }, + { + "epoch": 2.63, + "learning_rate": 1.1329926480961861e-05, + "loss": 0.1261, + "step": 56335 + }, + { + "epoch": 2.63, + "learning_rate": 1.1329142695907077e-05, + "loss": 0.074, + "step": 56340 + }, + { + "epoch": 2.63, + "learning_rate": 1.1328358910852289e-05, + "loss": 0.206, + "step": 56345 + }, + { + "epoch": 2.63, + "learning_rate": 1.1327575125797503e-05, + "loss": 0.1365, + "step": 56350 + }, + { + "epoch": 2.63, + "learning_rate": 1.1326791340742715e-05, + "loss": 0.1942, + "step": 56355 + }, + { + "epoch": 2.63, + "learning_rate": 1.132600755568793e-05, + "loss": 0.2217, + "step": 56360 + }, + { + "epoch": 2.63, + "learning_rate": 1.1325223770633143e-05, + "loss": 0.3364, + "step": 56365 + }, + { + "epoch": 2.63, + "learning_rate": 1.1324439985578355e-05, + "loss": 0.0938, + "step": 56370 + }, + { + "epoch": 2.63, + "learning_rate": 1.1323656200523569e-05, + "loss": 0.0528, + "step": 56375 + }, + { + "epoch": 2.63, + "learning_rate": 1.1322872415468781e-05, + "loss": 0.0529, + "step": 56380 + }, + { + "epoch": 2.63, + "learning_rate": 1.1322088630413997e-05, + "loss": 0.0723, + "step": 56385 + }, + { + "epoch": 2.63, + "learning_rate": 1.1321304845359209e-05, + "loss": 0.0862, + "step": 56390 + }, + { + "epoch": 2.63, + "learning_rate": 1.1320521060304423e-05, + "loss": 0.1117, + "step": 56395 + }, + { + "epoch": 2.63, + "learning_rate": 1.1319737275249637e-05, + "loss": 0.1197, + "step": 56400 + }, + { + "epoch": 2.63, + "learning_rate": 1.131895349019485e-05, + "loss": 0.2187, + "step": 56405 + }, + { + "epoch": 2.63, + "learning_rate": 1.1318169705140063e-05, + "loss": 0.119, + "step": 56410 + }, + { + "epoch": 2.63, + "learning_rate": 1.1317385920085277e-05, + "loss": 0.4055, + "step": 56415 + }, + { + "epoch": 2.63, + "learning_rate": 1.131660213503049e-05, + "loss": 0.0653, + "step": 56420 + }, + { + "epoch": 2.63, + "learning_rate": 1.1315818349975705e-05, + "loss": 0.0624, + "step": 56425 + }, + { + "epoch": 2.63, + "learning_rate": 1.1315034564920917e-05, + "loss": 0.1205, + "step": 56430 + }, + { + "epoch": 2.63, + "learning_rate": 1.1314250779866129e-05, + "loss": 0.0533, + "step": 56435 + }, + { + "epoch": 2.63, + "learning_rate": 1.1313466994811345e-05, + "loss": 0.1112, + "step": 56440 + }, + { + "epoch": 2.63, + "learning_rate": 1.1312683209756557e-05, + "loss": 0.0813, + "step": 56445 + }, + { + "epoch": 2.63, + "learning_rate": 1.131189942470177e-05, + "loss": 0.1179, + "step": 56450 + }, + { + "epoch": 2.63, + "learning_rate": 1.1311115639646983e-05, + "loss": 0.108, + "step": 56455 + }, + { + "epoch": 2.63, + "learning_rate": 1.1310331854592199e-05, + "loss": 0.3539, + "step": 56460 + }, + { + "epoch": 2.63, + "learning_rate": 1.130954806953741e-05, + "loss": 0.3225, + "step": 56465 + }, + { + "epoch": 2.63, + "learning_rate": 1.1308764284482625e-05, + "loss": 0.1074, + "step": 56470 + }, + { + "epoch": 2.64, + "learning_rate": 1.1307980499427837e-05, + "loss": 0.0635, + "step": 56475 + }, + { + "epoch": 2.64, + "learning_rate": 1.1307196714373052e-05, + "loss": 0.0487, + "step": 56480 + }, + { + "epoch": 2.64, + "learning_rate": 1.1306412929318265e-05, + "loss": 0.0477, + "step": 56485 + }, + { + "epoch": 2.64, + "learning_rate": 1.1305629144263479e-05, + "loss": 0.0545, + "step": 56490 + }, + { + "epoch": 2.64, + "learning_rate": 1.130484535920869e-05, + "loss": 0.1771, + "step": 56495 + }, + { + "epoch": 2.64, + "learning_rate": 1.1304061574153905e-05, + "loss": 0.1594, + "step": 56500 + }, + { + "epoch": 2.64, + "learning_rate": 1.1303277789099119e-05, + "loss": 0.1938, + "step": 56505 + }, + { + "epoch": 2.64, + "learning_rate": 1.130249400404433e-05, + "loss": 0.2132, + "step": 56510 + }, + { + "epoch": 2.64, + "learning_rate": 1.1301710218989546e-05, + "loss": 0.2514, + "step": 56515 + }, + { + "epoch": 2.64, + "learning_rate": 1.1300926433934759e-05, + "loss": 0.042, + "step": 56520 + }, + { + "epoch": 2.64, + "learning_rate": 1.1300142648879973e-05, + "loss": 0.0904, + "step": 56525 + }, + { + "epoch": 2.64, + "learning_rate": 1.1299358863825185e-05, + "loss": 0.0356, + "step": 56530 + }, + { + "epoch": 2.64, + "learning_rate": 1.12985750787704e-05, + "loss": 0.1147, + "step": 56535 + }, + { + "epoch": 2.64, + "learning_rate": 1.1297791293715613e-05, + "loss": 0.0365, + "step": 56540 + }, + { + "epoch": 2.64, + "learning_rate": 1.1297007508660826e-05, + "loss": 0.1268, + "step": 56545 + }, + { + "epoch": 2.64, + "learning_rate": 1.1296223723606039e-05, + "loss": 0.1831, + "step": 56550 + }, + { + "epoch": 2.64, + "learning_rate": 1.1295439938551254e-05, + "loss": 0.1668, + "step": 56555 + }, + { + "epoch": 2.64, + "learning_rate": 1.1294656153496466e-05, + "loss": 0.3163, + "step": 56560 + }, + { + "epoch": 2.64, + "learning_rate": 1.1293872368441679e-05, + "loss": 0.2783, + "step": 56565 + }, + { + "epoch": 2.64, + "learning_rate": 1.1293088583386893e-05, + "loss": 0.0947, + "step": 56570 + }, + { + "epoch": 2.64, + "learning_rate": 1.1292304798332105e-05, + "loss": 0.0528, + "step": 56575 + }, + { + "epoch": 2.64, + "learning_rate": 1.129152101327732e-05, + "loss": 0.0567, + "step": 56580 + }, + { + "epoch": 2.64, + "learning_rate": 1.1290737228222533e-05, + "loss": 0.0607, + "step": 56585 + }, + { + "epoch": 2.64, + "learning_rate": 1.1289953443167747e-05, + "loss": 0.0598, + "step": 56590 + }, + { + "epoch": 2.64, + "learning_rate": 1.1289169658112959e-05, + "loss": 0.151, + "step": 56595 + }, + { + "epoch": 2.64, + "learning_rate": 1.1288385873058174e-05, + "loss": 0.2271, + "step": 56600 + }, + { + "epoch": 2.64, + "learning_rate": 1.1287602088003387e-05, + "loss": 0.1683, + "step": 56605 + }, + { + "epoch": 2.64, + "learning_rate": 1.12868183029486e-05, + "loss": 0.2022, + "step": 56610 + }, + { + "epoch": 2.64, + "learning_rate": 1.1286034517893814e-05, + "loss": 0.2549, + "step": 56615 + }, + { + "epoch": 2.64, + "learning_rate": 1.1285250732839028e-05, + "loss": 0.0524, + "step": 56620 + }, + { + "epoch": 2.64, + "learning_rate": 1.128446694778424e-05, + "loss": 0.0105, + "step": 56625 + }, + { + "epoch": 2.64, + "learning_rate": 1.1283683162729453e-05, + "loss": 0.0215, + "step": 56630 + }, + { + "epoch": 2.64, + "learning_rate": 1.1282899377674668e-05, + "loss": 0.0492, + "step": 56635 + }, + { + "epoch": 2.64, + "learning_rate": 1.128211559261988e-05, + "loss": 0.1595, + "step": 56640 + }, + { + "epoch": 2.64, + "learning_rate": 1.1281331807565094e-05, + "loss": 0.0684, + "step": 56645 + }, + { + "epoch": 2.64, + "learning_rate": 1.1280548022510307e-05, + "loss": 0.1465, + "step": 56650 + }, + { + "epoch": 2.64, + "learning_rate": 1.1279764237455522e-05, + "loss": 0.1494, + "step": 56655 + }, + { + "epoch": 2.64, + "learning_rate": 1.1278980452400734e-05, + "loss": 0.2374, + "step": 56660 + }, + { + "epoch": 2.64, + "learning_rate": 1.1278196667345948e-05, + "loss": 0.2635, + "step": 56665 + }, + { + "epoch": 2.64, + "learning_rate": 1.127741288229116e-05, + "loss": 0.0529, + "step": 56670 + }, + { + "epoch": 2.64, + "learning_rate": 1.1276629097236376e-05, + "loss": 0.0205, + "step": 56675 + }, + { + "epoch": 2.64, + "learning_rate": 1.1275845312181588e-05, + "loss": 0.1151, + "step": 56680 + }, + { + "epoch": 2.65, + "learning_rate": 1.1275061527126802e-05, + "loss": 0.0793, + "step": 56685 + }, + { + "epoch": 2.65, + "learning_rate": 1.1274277742072014e-05, + "loss": 0.058, + "step": 56690 + }, + { + "epoch": 2.65, + "learning_rate": 1.1273493957017227e-05, + "loss": 0.0678, + "step": 56695 + }, + { + "epoch": 2.65, + "learning_rate": 1.1272710171962442e-05, + "loss": 0.1245, + "step": 56700 + }, + { + "epoch": 2.65, + "learning_rate": 1.1271926386907654e-05, + "loss": 0.3038, + "step": 56705 + }, + { + "epoch": 2.65, + "learning_rate": 1.1271142601852868e-05, + "loss": 0.2822, + "step": 56710 + }, + { + "epoch": 2.65, + "learning_rate": 1.1270358816798082e-05, + "loss": 0.2808, + "step": 56715 + }, + { + "epoch": 2.65, + "learning_rate": 1.1269575031743296e-05, + "loss": 0.0484, + "step": 56720 + }, + { + "epoch": 2.65, + "learning_rate": 1.1268791246688508e-05, + "loss": 0.0473, + "step": 56725 + }, + { + "epoch": 2.65, + "learning_rate": 1.1268007461633722e-05, + "loss": 0.0651, + "step": 56730 + }, + { + "epoch": 2.65, + "learning_rate": 1.1267223676578936e-05, + "loss": 0.0608, + "step": 56735 + }, + { + "epoch": 2.65, + "learning_rate": 1.126643989152415e-05, + "loss": 0.0815, + "step": 56740 + }, + { + "epoch": 2.65, + "learning_rate": 1.1265656106469362e-05, + "loss": 0.0831, + "step": 56745 + }, + { + "epoch": 2.65, + "learning_rate": 1.1264872321414578e-05, + "loss": 0.1264, + "step": 56750 + }, + { + "epoch": 2.65, + "learning_rate": 1.126408853635979e-05, + "loss": 0.196, + "step": 56755 + }, + { + "epoch": 2.65, + "learning_rate": 1.1263304751305002e-05, + "loss": 0.1935, + "step": 56760 + }, + { + "epoch": 2.65, + "learning_rate": 1.1262520966250216e-05, + "loss": 0.2181, + "step": 56765 + }, + { + "epoch": 2.65, + "learning_rate": 1.1261737181195428e-05, + "loss": 0.0669, + "step": 56770 + }, + { + "epoch": 2.65, + "learning_rate": 1.1260953396140644e-05, + "loss": 0.0351, + "step": 56775 + }, + { + "epoch": 2.65, + "learning_rate": 1.1260169611085856e-05, + "loss": 0.031, + "step": 56780 + }, + { + "epoch": 2.65, + "learning_rate": 1.125938582603107e-05, + "loss": 0.1046, + "step": 56785 + }, + { + "epoch": 2.65, + "learning_rate": 1.1258602040976282e-05, + "loss": 0.0576, + "step": 56790 + }, + { + "epoch": 2.65, + "learning_rate": 1.1257818255921498e-05, + "loss": 0.1757, + "step": 56795 + }, + { + "epoch": 2.65, + "learning_rate": 1.125703447086671e-05, + "loss": 0.0646, + "step": 56800 + }, + { + "epoch": 2.65, + "learning_rate": 1.1256250685811924e-05, + "loss": 0.0942, + "step": 56805 + }, + { + "epoch": 2.65, + "learning_rate": 1.1255466900757136e-05, + "loss": 0.2081, + "step": 56810 + }, + { + "epoch": 2.65, + "learning_rate": 1.1254683115702352e-05, + "loss": 0.2665, + "step": 56815 + }, + { + "epoch": 2.65, + "learning_rate": 1.1253899330647564e-05, + "loss": 0.1061, + "step": 56820 + }, + { + "epoch": 2.65, + "learning_rate": 1.1253115545592776e-05, + "loss": 0.0643, + "step": 56825 + }, + { + "epoch": 2.65, + "learning_rate": 1.1252331760537992e-05, + "loss": 0.0954, + "step": 56830 + }, + { + "epoch": 2.65, + "learning_rate": 1.1251547975483204e-05, + "loss": 0.1031, + "step": 56835 + }, + { + "epoch": 2.65, + "learning_rate": 1.1250764190428418e-05, + "loss": 0.0669, + "step": 56840 + }, + { + "epoch": 2.65, + "learning_rate": 1.124998040537363e-05, + "loss": 0.102, + "step": 56845 + }, + { + "epoch": 2.65, + "learning_rate": 1.1249196620318846e-05, + "loss": 0.0921, + "step": 56850 + }, + { + "epoch": 2.65, + "learning_rate": 1.1248412835264058e-05, + "loss": 0.2177, + "step": 56855 + }, + { + "epoch": 2.65, + "learning_rate": 1.1247629050209272e-05, + "loss": 0.2624, + "step": 56860 + }, + { + "epoch": 2.65, + "learning_rate": 1.1246845265154484e-05, + "loss": 0.2272, + "step": 56865 + }, + { + "epoch": 2.65, + "learning_rate": 1.12460614800997e-05, + "loss": 0.0577, + "step": 56870 + }, + { + "epoch": 2.65, + "learning_rate": 1.1245277695044912e-05, + "loss": 0.0705, + "step": 56875 + }, + { + "epoch": 2.65, + "learning_rate": 1.1244493909990126e-05, + "loss": 0.0784, + "step": 56880 + }, + { + "epoch": 2.65, + "learning_rate": 1.1243710124935338e-05, + "loss": 0.0725, + "step": 56885 + }, + { + "epoch": 2.65, + "learning_rate": 1.124292633988055e-05, + "loss": 0.079, + "step": 56890 + }, + { + "epoch": 2.65, + "learning_rate": 1.1242142554825766e-05, + "loss": 0.0578, + "step": 56895 + }, + { + "epoch": 2.66, + "learning_rate": 1.1241358769770978e-05, + "loss": 0.1903, + "step": 56900 + }, + { + "epoch": 2.66, + "learning_rate": 1.1240574984716192e-05, + "loss": 0.1061, + "step": 56905 + }, + { + "epoch": 2.66, + "learning_rate": 1.1239791199661404e-05, + "loss": 0.3914, + "step": 56910 + }, + { + "epoch": 2.66, + "learning_rate": 1.123900741460662e-05, + "loss": 0.4026, + "step": 56915 + }, + { + "epoch": 2.66, + "learning_rate": 1.1238223629551832e-05, + "loss": 0.0493, + "step": 56920 + }, + { + "epoch": 2.66, + "learning_rate": 1.1237439844497046e-05, + "loss": 0.066, + "step": 56925 + }, + { + "epoch": 2.66, + "learning_rate": 1.123665605944226e-05, + "loss": 0.0681, + "step": 56930 + }, + { + "epoch": 2.66, + "learning_rate": 1.1235872274387474e-05, + "loss": 0.0228, + "step": 56935 + }, + { + "epoch": 2.66, + "learning_rate": 1.1235088489332686e-05, + "loss": 0.0556, + "step": 56940 + }, + { + "epoch": 2.66, + "learning_rate": 1.12343047042779e-05, + "loss": 0.1144, + "step": 56945 + }, + { + "epoch": 2.66, + "learning_rate": 1.1233520919223114e-05, + "loss": 0.0883, + "step": 56950 + }, + { + "epoch": 2.66, + "learning_rate": 1.1232737134168326e-05, + "loss": 0.156, + "step": 56955 + }, + { + "epoch": 2.66, + "learning_rate": 1.123195334911354e-05, + "loss": 0.2826, + "step": 56960 + }, + { + "epoch": 2.66, + "learning_rate": 1.1231169564058752e-05, + "loss": 0.2467, + "step": 56965 + }, + { + "epoch": 2.66, + "learning_rate": 1.1230385779003968e-05, + "loss": 0.0382, + "step": 56970 + }, + { + "epoch": 2.66, + "learning_rate": 1.122960199394918e-05, + "loss": 0.068, + "step": 56975 + }, + { + "epoch": 2.66, + "learning_rate": 1.1228818208894394e-05, + "loss": 0.0677, + "step": 56980 + }, + { + "epoch": 2.66, + "learning_rate": 1.1228034423839606e-05, + "loss": 0.1175, + "step": 56985 + }, + { + "epoch": 2.66, + "learning_rate": 1.1227250638784822e-05, + "loss": 0.0899, + "step": 56990 + }, + { + "epoch": 2.66, + "learning_rate": 1.1226466853730034e-05, + "loss": 0.1353, + "step": 56995 + }, + { + "epoch": 2.66, + "learning_rate": 1.1225683068675248e-05, + "loss": 0.1473, + "step": 57000 + }, + { + "epoch": 2.66, + "learning_rate": 1.122489928362046e-05, + "loss": 0.1321, + "step": 57005 + }, + { + "epoch": 2.66, + "learning_rate": 1.1224115498565676e-05, + "loss": 0.3038, + "step": 57010 + }, + { + "epoch": 2.66, + "learning_rate": 1.1223331713510888e-05, + "loss": 0.4092, + "step": 57015 + }, + { + "epoch": 2.66, + "learning_rate": 1.12225479284561e-05, + "loss": 0.0233, + "step": 57020 + }, + { + "epoch": 2.66, + "learning_rate": 1.1221764143401314e-05, + "loss": 0.0531, + "step": 57025 + }, + { + "epoch": 2.66, + "learning_rate": 1.1220980358346528e-05, + "loss": 0.011, + "step": 57030 + }, + { + "epoch": 2.66, + "learning_rate": 1.1220196573291742e-05, + "loss": 0.0896, + "step": 57035 + }, + { + "epoch": 2.66, + "learning_rate": 1.1219412788236954e-05, + "loss": 0.038, + "step": 57040 + }, + { + "epoch": 2.66, + "learning_rate": 1.121862900318217e-05, + "loss": 0.1052, + "step": 57045 + }, + { + "epoch": 2.66, + "learning_rate": 1.1217845218127382e-05, + "loss": 0.1334, + "step": 57050 + }, + { + "epoch": 2.66, + "learning_rate": 1.1217061433072596e-05, + "loss": 0.1431, + "step": 57055 + }, + { + "epoch": 2.66, + "learning_rate": 1.1216277648017808e-05, + "loss": 0.1915, + "step": 57060 + }, + { + "epoch": 2.66, + "learning_rate": 1.1215493862963024e-05, + "loss": 0.287, + "step": 57065 + }, + { + "epoch": 2.66, + "learning_rate": 1.1214710077908236e-05, + "loss": 0.1064, + "step": 57070 + }, + { + "epoch": 2.66, + "learning_rate": 1.121392629285345e-05, + "loss": 0.0665, + "step": 57075 + }, + { + "epoch": 2.66, + "learning_rate": 1.1213142507798662e-05, + "loss": 0.0537, + "step": 57080 + }, + { + "epoch": 2.66, + "learning_rate": 1.1212358722743874e-05, + "loss": 0.0913, + "step": 57085 + }, + { + "epoch": 2.66, + "learning_rate": 1.121157493768909e-05, + "loss": 0.0652, + "step": 57090 + }, + { + "epoch": 2.66, + "learning_rate": 1.1210791152634302e-05, + "loss": 0.0595, + "step": 57095 + }, + { + "epoch": 2.66, + "learning_rate": 1.1210007367579516e-05, + "loss": 0.0666, + "step": 57100 + }, + { + "epoch": 2.66, + "learning_rate": 1.1209223582524728e-05, + "loss": 0.2029, + "step": 57105 + }, + { + "epoch": 2.66, + "learning_rate": 1.1208439797469944e-05, + "loss": 0.2374, + "step": 57110 + }, + { + "epoch": 2.67, + "learning_rate": 1.1207656012415156e-05, + "loss": 0.2624, + "step": 57115 + }, + { + "epoch": 2.67, + "learning_rate": 1.120687222736037e-05, + "loss": 0.0789, + "step": 57120 + }, + { + "epoch": 2.67, + "learning_rate": 1.1206088442305582e-05, + "loss": 0.0434, + "step": 57125 + }, + { + "epoch": 2.67, + "learning_rate": 1.1205304657250798e-05, + "loss": 0.0571, + "step": 57130 + }, + { + "epoch": 2.67, + "learning_rate": 1.120452087219601e-05, + "loss": 0.0856, + "step": 57135 + }, + { + "epoch": 2.67, + "learning_rate": 1.1203737087141224e-05, + "loss": 0.1147, + "step": 57140 + }, + { + "epoch": 2.67, + "learning_rate": 1.1202953302086438e-05, + "loss": 0.1438, + "step": 57145 + }, + { + "epoch": 2.67, + "learning_rate": 1.120216951703165e-05, + "loss": 0.1312, + "step": 57150 + }, + { + "epoch": 2.67, + "learning_rate": 1.1201385731976864e-05, + "loss": 0.1851, + "step": 57155 + }, + { + "epoch": 2.67, + "learning_rate": 1.1200601946922076e-05, + "loss": 0.1552, + "step": 57160 + }, + { + "epoch": 2.67, + "learning_rate": 1.1199818161867291e-05, + "loss": 0.2446, + "step": 57165 + }, + { + "epoch": 2.67, + "learning_rate": 1.1199034376812504e-05, + "loss": 0.0227, + "step": 57170 + }, + { + "epoch": 2.67, + "learning_rate": 1.1198250591757718e-05, + "loss": 0.071, + "step": 57175 + }, + { + "epoch": 2.67, + "learning_rate": 1.119746680670293e-05, + "loss": 0.0333, + "step": 57180 + }, + { + "epoch": 2.67, + "learning_rate": 1.1196683021648145e-05, + "loss": 0.0713, + "step": 57185 + }, + { + "epoch": 2.67, + "learning_rate": 1.1195899236593358e-05, + "loss": 0.0717, + "step": 57190 + }, + { + "epoch": 2.67, + "learning_rate": 1.1195115451538572e-05, + "loss": 0.0648, + "step": 57195 + }, + { + "epoch": 2.67, + "learning_rate": 1.1194331666483784e-05, + "loss": 0.1173, + "step": 57200 + }, + { + "epoch": 2.67, + "learning_rate": 1.1193547881429e-05, + "loss": 0.09, + "step": 57205 + }, + { + "epoch": 2.67, + "learning_rate": 1.1192764096374212e-05, + "loss": 0.2192, + "step": 57210 + }, + { + "epoch": 2.67, + "learning_rate": 1.1191980311319424e-05, + "loss": 0.3397, + "step": 57215 + }, + { + "epoch": 2.67, + "learning_rate": 1.1191196526264638e-05, + "loss": 0.0862, + "step": 57220 + }, + { + "epoch": 2.67, + "learning_rate": 1.119041274120985e-05, + "loss": 0.0396, + "step": 57225 + }, + { + "epoch": 2.67, + "learning_rate": 1.1189628956155065e-05, + "loss": 0.0291, + "step": 57230 + }, + { + "epoch": 2.67, + "learning_rate": 1.1188845171100278e-05, + "loss": 0.0333, + "step": 57235 + }, + { + "epoch": 2.67, + "learning_rate": 1.1188061386045492e-05, + "loss": 0.0601, + "step": 57240 + }, + { + "epoch": 2.67, + "learning_rate": 1.1187277600990705e-05, + "loss": 0.0811, + "step": 57245 + }, + { + "epoch": 2.67, + "learning_rate": 1.118649381593592e-05, + "loss": 0.0969, + "step": 57250 + }, + { + "epoch": 2.67, + "learning_rate": 1.1185710030881132e-05, + "loss": 0.229, + "step": 57255 + }, + { + "epoch": 2.67, + "learning_rate": 1.1184926245826346e-05, + "loss": 0.294, + "step": 57260 + }, + { + "epoch": 2.67, + "learning_rate": 1.118414246077156e-05, + "loss": 0.3216, + "step": 57265 + }, + { + "epoch": 2.67, + "learning_rate": 1.1183358675716773e-05, + "loss": 0.0457, + "step": 57270 + }, + { + "epoch": 2.67, + "learning_rate": 1.1182574890661986e-05, + "loss": 0.0488, + "step": 57275 + }, + { + "epoch": 2.67, + "learning_rate": 1.1181791105607198e-05, + "loss": 0.0694, + "step": 57280 + }, + { + "epoch": 2.67, + "learning_rate": 1.1181007320552413e-05, + "loss": 0.0691, + "step": 57285 + }, + { + "epoch": 2.67, + "learning_rate": 1.1180223535497626e-05, + "loss": 0.1437, + "step": 57290 + }, + { + "epoch": 2.67, + "learning_rate": 1.117943975044284e-05, + "loss": 0.1237, + "step": 57295 + }, + { + "epoch": 2.67, + "learning_rate": 1.1178655965388052e-05, + "loss": 0.1094, + "step": 57300 + }, + { + "epoch": 2.67, + "learning_rate": 1.1177872180333267e-05, + "loss": 0.3414, + "step": 57305 + }, + { + "epoch": 2.67, + "learning_rate": 1.117708839527848e-05, + "loss": 0.2766, + "step": 57310 + }, + { + "epoch": 2.67, + "learning_rate": 1.1176304610223693e-05, + "loss": 0.2673, + "step": 57315 + }, + { + "epoch": 2.67, + "learning_rate": 1.1175520825168906e-05, + "loss": 0.0421, + "step": 57320 + }, + { + "epoch": 2.67, + "learning_rate": 1.1174737040114121e-05, + "loss": 0.0769, + "step": 57325 + }, + { + "epoch": 2.68, + "learning_rate": 1.1173953255059333e-05, + "loss": 0.042, + "step": 57330 + }, + { + "epoch": 2.68, + "learning_rate": 1.1173169470004547e-05, + "loss": 0.0679, + "step": 57335 + }, + { + "epoch": 2.68, + "learning_rate": 1.117238568494976e-05, + "loss": 0.0895, + "step": 57340 + }, + { + "epoch": 2.68, + "learning_rate": 1.1171601899894973e-05, + "loss": 0.1201, + "step": 57345 + }, + { + "epoch": 2.68, + "learning_rate": 1.1170818114840187e-05, + "loss": 0.1044, + "step": 57350 + }, + { + "epoch": 2.68, + "learning_rate": 1.11700343297854e-05, + "loss": 0.0649, + "step": 57355 + }, + { + "epoch": 2.68, + "learning_rate": 1.1169250544730615e-05, + "loss": 0.224, + "step": 57360 + }, + { + "epoch": 2.68, + "learning_rate": 1.1168466759675827e-05, + "loss": 0.3896, + "step": 57365 + }, + { + "epoch": 2.68, + "learning_rate": 1.1167682974621041e-05, + "loss": 0.0307, + "step": 57370 + }, + { + "epoch": 2.68, + "learning_rate": 1.1166899189566253e-05, + "loss": 0.0542, + "step": 57375 + }, + { + "epoch": 2.68, + "learning_rate": 1.1166115404511469e-05, + "loss": 0.0674, + "step": 57380 + }, + { + "epoch": 2.68, + "learning_rate": 1.1165331619456681e-05, + "loss": 0.0919, + "step": 57385 + }, + { + "epoch": 2.68, + "learning_rate": 1.1164547834401895e-05, + "loss": 0.0887, + "step": 57390 + }, + { + "epoch": 2.68, + "learning_rate": 1.1163764049347107e-05, + "loss": 0.1729, + "step": 57395 + }, + { + "epoch": 2.68, + "learning_rate": 1.1162980264292323e-05, + "loss": 0.1892, + "step": 57400 + }, + { + "epoch": 2.68, + "learning_rate": 1.1162196479237535e-05, + "loss": 0.1451, + "step": 57405 + }, + { + "epoch": 2.68, + "learning_rate": 1.1161412694182747e-05, + "loss": 0.3484, + "step": 57410 + }, + { + "epoch": 2.68, + "learning_rate": 1.1160628909127961e-05, + "loss": 0.3337, + "step": 57415 + }, + { + "epoch": 2.68, + "learning_rate": 1.1159845124073174e-05, + "loss": 0.0925, + "step": 57420 + }, + { + "epoch": 2.68, + "learning_rate": 1.1159061339018389e-05, + "loss": 0.0863, + "step": 57425 + }, + { + "epoch": 2.68, + "learning_rate": 1.1158277553963601e-05, + "loss": 0.0389, + "step": 57430 + }, + { + "epoch": 2.68, + "learning_rate": 1.1157493768908815e-05, + "loss": 0.0606, + "step": 57435 + }, + { + "epoch": 2.68, + "learning_rate": 1.1156709983854027e-05, + "loss": 0.1619, + "step": 57440 + }, + { + "epoch": 2.68, + "learning_rate": 1.1155926198799243e-05, + "loss": 0.177, + "step": 57445 + }, + { + "epoch": 2.68, + "learning_rate": 1.1155142413744455e-05, + "loss": 0.1052, + "step": 57450 + }, + { + "epoch": 2.68, + "learning_rate": 1.115435862868967e-05, + "loss": 0.2065, + "step": 57455 + }, + { + "epoch": 2.68, + "learning_rate": 1.1153574843634883e-05, + "loss": 0.1876, + "step": 57460 + }, + { + "epoch": 2.68, + "learning_rate": 1.1152791058580097e-05, + "loss": 0.1923, + "step": 57465 + }, + { + "epoch": 2.68, + "learning_rate": 1.115200727352531e-05, + "loss": 0.0589, + "step": 57470 + }, + { + "epoch": 2.68, + "learning_rate": 1.1151223488470521e-05, + "loss": 0.0346, + "step": 57475 + }, + { + "epoch": 2.68, + "learning_rate": 1.1150439703415737e-05, + "loss": 0.0486, + "step": 57480 + }, + { + "epoch": 2.68, + "learning_rate": 1.114965591836095e-05, + "loss": 0.0673, + "step": 57485 + }, + { + "epoch": 2.68, + "learning_rate": 1.1148872133306163e-05, + "loss": 0.0864, + "step": 57490 + }, + { + "epoch": 2.68, + "learning_rate": 1.1148088348251375e-05, + "loss": 0.103, + "step": 57495 + }, + { + "epoch": 2.68, + "learning_rate": 1.1147304563196591e-05, + "loss": 0.2384, + "step": 57500 + }, + { + "epoch": 2.68, + "learning_rate": 1.1146520778141803e-05, + "loss": 0.151, + "step": 57505 + }, + { + "epoch": 2.68, + "learning_rate": 1.1145736993087017e-05, + "loss": 0.1685, + "step": 57510 + }, + { + "epoch": 2.68, + "learning_rate": 1.114495320803223e-05, + "loss": 0.2351, + "step": 57515 + }, + { + "epoch": 2.68, + "learning_rate": 1.1144169422977445e-05, + "loss": 0.0842, + "step": 57520 + }, + { + "epoch": 2.68, + "learning_rate": 1.1143385637922657e-05, + "loss": 0.0256, + "step": 57525 + }, + { + "epoch": 2.68, + "learning_rate": 1.1142601852867871e-05, + "loss": 0.0696, + "step": 57530 + }, + { + "epoch": 2.68, + "learning_rate": 1.1141818067813083e-05, + "loss": 0.0371, + "step": 57535 + }, + { + "epoch": 2.68, + "learning_rate": 1.1141034282758295e-05, + "loss": 0.0474, + "step": 57540 + }, + { + "epoch": 2.69, + "learning_rate": 1.1140250497703511e-05, + "loss": 0.0927, + "step": 57545 + }, + { + "epoch": 2.69, + "learning_rate": 1.1139466712648723e-05, + "loss": 0.0978, + "step": 57550 + }, + { + "epoch": 2.69, + "learning_rate": 1.1138682927593937e-05, + "loss": 0.223, + "step": 57555 + }, + { + "epoch": 2.69, + "learning_rate": 1.1137899142539151e-05, + "loss": 0.2131, + "step": 57560 + }, + { + "epoch": 2.69, + "learning_rate": 1.1137115357484365e-05, + "loss": 0.2394, + "step": 57565 + }, + { + "epoch": 2.69, + "learning_rate": 1.1136331572429577e-05, + "loss": 0.0255, + "step": 57570 + }, + { + "epoch": 2.69, + "learning_rate": 1.1135547787374791e-05, + "loss": 0.0629, + "step": 57575 + }, + { + "epoch": 2.69, + "learning_rate": 1.1134764002320005e-05, + "loss": 0.0524, + "step": 57580 + }, + { + "epoch": 2.69, + "learning_rate": 1.1133980217265219e-05, + "loss": 0.0692, + "step": 57585 + }, + { + "epoch": 2.69, + "learning_rate": 1.1133196432210431e-05, + "loss": 0.0791, + "step": 57590 + }, + { + "epoch": 2.69, + "learning_rate": 1.1132412647155647e-05, + "loss": 0.0576, + "step": 57595 + }, + { + "epoch": 2.69, + "learning_rate": 1.1131628862100859e-05, + "loss": 0.1245, + "step": 57600 + }, + { + "epoch": 2.69, + "learning_rate": 1.1130845077046071e-05, + "loss": 0.1853, + "step": 57605 + }, + { + "epoch": 2.69, + "learning_rate": 1.1130061291991285e-05, + "loss": 0.4984, + "step": 57610 + }, + { + "epoch": 2.69, + "learning_rate": 1.1129277506936497e-05, + "loss": 0.306, + "step": 57615 + }, + { + "epoch": 2.69, + "learning_rate": 1.1128493721881713e-05, + "loss": 0.0546, + "step": 57620 + }, + { + "epoch": 2.69, + "learning_rate": 1.1127709936826925e-05, + "loss": 0.0659, + "step": 57625 + }, + { + "epoch": 2.69, + "learning_rate": 1.1126926151772139e-05, + "loss": 0.043, + "step": 57630 + }, + { + "epoch": 2.69, + "learning_rate": 1.1126142366717351e-05, + "loss": 0.0508, + "step": 57635 + }, + { + "epoch": 2.69, + "learning_rate": 1.1125358581662567e-05, + "loss": 0.0316, + "step": 57640 + }, + { + "epoch": 2.69, + "learning_rate": 1.1124574796607779e-05, + "loss": 0.0663, + "step": 57645 + }, + { + "epoch": 2.69, + "learning_rate": 1.1123791011552993e-05, + "loss": 0.098, + "step": 57650 + }, + { + "epoch": 2.69, + "learning_rate": 1.1123007226498205e-05, + "loss": 0.1989, + "step": 57655 + }, + { + "epoch": 2.69, + "learning_rate": 1.112222344144342e-05, + "loss": 0.25, + "step": 57660 + }, + { + "epoch": 2.69, + "learning_rate": 1.1121439656388633e-05, + "loss": 0.3534, + "step": 57665 + }, + { + "epoch": 2.69, + "learning_rate": 1.1120655871333845e-05, + "loss": 0.018, + "step": 57670 + }, + { + "epoch": 2.69, + "learning_rate": 1.111987208627906e-05, + "loss": 0.0352, + "step": 57675 + }, + { + "epoch": 2.69, + "learning_rate": 1.1119088301224273e-05, + "loss": 0.0879, + "step": 57680 + }, + { + "epoch": 2.69, + "learning_rate": 1.1118304516169487e-05, + "loss": 0.1559, + "step": 57685 + }, + { + "epoch": 2.69, + "learning_rate": 1.1117520731114699e-05, + "loss": 0.1489, + "step": 57690 + }, + { + "epoch": 2.69, + "learning_rate": 1.1116736946059915e-05, + "loss": 0.0735, + "step": 57695 + }, + { + "epoch": 2.69, + "learning_rate": 1.1115953161005127e-05, + "loss": 0.1344, + "step": 57700 + }, + { + "epoch": 2.69, + "learning_rate": 1.111516937595034e-05, + "loss": 0.1133, + "step": 57705 + }, + { + "epoch": 2.69, + "learning_rate": 1.1114385590895553e-05, + "loss": 0.1911, + "step": 57710 + }, + { + "epoch": 2.69, + "learning_rate": 1.1113601805840769e-05, + "loss": 0.3633, + "step": 57715 + }, + { + "epoch": 2.69, + "learning_rate": 1.111281802078598e-05, + "loss": 0.036, + "step": 57720 + }, + { + "epoch": 2.69, + "learning_rate": 1.1112034235731195e-05, + "loss": 0.0589, + "step": 57725 + }, + { + "epoch": 2.69, + "learning_rate": 1.1111250450676407e-05, + "loss": 0.0351, + "step": 57730 + }, + { + "epoch": 2.69, + "learning_rate": 1.1110466665621619e-05, + "loss": 0.0914, + "step": 57735 + }, + { + "epoch": 2.69, + "learning_rate": 1.1109682880566835e-05, + "loss": 0.1147, + "step": 57740 + }, + { + "epoch": 2.69, + "learning_rate": 1.1108899095512047e-05, + "loss": 0.0828, + "step": 57745 + }, + { + "epoch": 2.69, + "learning_rate": 1.110811531045726e-05, + "loss": 0.1332, + "step": 57750 + }, + { + "epoch": 2.69, + "learning_rate": 1.1107331525402473e-05, + "loss": 0.2566, + "step": 57755 + }, + { + "epoch": 2.7, + "learning_rate": 1.1106547740347689e-05, + "loss": 0.2272, + "step": 57760 + }, + { + "epoch": 2.7, + "learning_rate": 1.1105763955292901e-05, + "loss": 0.2749, + "step": 57765 + }, + { + "epoch": 2.7, + "learning_rate": 1.1104980170238115e-05, + "loss": 0.0733, + "step": 57770 + }, + { + "epoch": 2.7, + "learning_rate": 1.1104196385183329e-05, + "loss": 0.0124, + "step": 57775 + }, + { + "epoch": 2.7, + "learning_rate": 1.1103412600128543e-05, + "loss": 0.0851, + "step": 57780 + }, + { + "epoch": 2.7, + "learning_rate": 1.1102628815073755e-05, + "loss": 0.0767, + "step": 57785 + }, + { + "epoch": 2.7, + "learning_rate": 1.1101845030018969e-05, + "loss": 0.0738, + "step": 57790 + }, + { + "epoch": 2.7, + "learning_rate": 1.1101061244964183e-05, + "loss": 0.0922, + "step": 57795 + }, + { + "epoch": 2.7, + "learning_rate": 1.1100277459909395e-05, + "loss": 0.121, + "step": 57800 + }, + { + "epoch": 2.7, + "learning_rate": 1.1099493674854609e-05, + "loss": 0.2056, + "step": 57805 + }, + { + "epoch": 2.7, + "learning_rate": 1.1098709889799821e-05, + "loss": 0.1976, + "step": 57810 + }, + { + "epoch": 2.7, + "learning_rate": 1.1097926104745037e-05, + "loss": 0.3544, + "step": 57815 + }, + { + "epoch": 2.7, + "learning_rate": 1.1097142319690249e-05, + "loss": 0.0921, + "step": 57820 + }, + { + "epoch": 2.7, + "learning_rate": 1.1096358534635463e-05, + "loss": 0.0193, + "step": 57825 + }, + { + "epoch": 2.7, + "learning_rate": 1.1095574749580675e-05, + "loss": 0.0806, + "step": 57830 + }, + { + "epoch": 2.7, + "learning_rate": 1.109479096452589e-05, + "loss": 0.0968, + "step": 57835 + }, + { + "epoch": 2.7, + "learning_rate": 1.1094007179471103e-05, + "loss": 0.0627, + "step": 57840 + }, + { + "epoch": 2.7, + "learning_rate": 1.1093223394416317e-05, + "loss": 0.1318, + "step": 57845 + }, + { + "epoch": 2.7, + "learning_rate": 1.1092439609361529e-05, + "loss": 0.1039, + "step": 57850 + }, + { + "epoch": 2.7, + "learning_rate": 1.1091655824306744e-05, + "loss": 0.1956, + "step": 57855 + }, + { + "epoch": 2.7, + "learning_rate": 1.1090872039251957e-05, + "loss": 0.1967, + "step": 57860 + }, + { + "epoch": 2.7, + "learning_rate": 1.1090088254197169e-05, + "loss": 0.4312, + "step": 57865 + }, + { + "epoch": 2.7, + "learning_rate": 1.1089304469142383e-05, + "loss": 0.054, + "step": 57870 + }, + { + "epoch": 2.7, + "learning_rate": 1.1088520684087597e-05, + "loss": 0.0584, + "step": 57875 + }, + { + "epoch": 2.7, + "learning_rate": 1.108773689903281e-05, + "loss": 0.0745, + "step": 57880 + }, + { + "epoch": 2.7, + "learning_rate": 1.1086953113978023e-05, + "loss": 0.0903, + "step": 57885 + }, + { + "epoch": 2.7, + "learning_rate": 1.1086169328923237e-05, + "loss": 0.0683, + "step": 57890 + }, + { + "epoch": 2.7, + "learning_rate": 1.108538554386845e-05, + "loss": 0.1025, + "step": 57895 + }, + { + "epoch": 2.7, + "learning_rate": 1.1084601758813664e-05, + "loss": 0.129, + "step": 57900 + }, + { + "epoch": 2.7, + "learning_rate": 1.1083817973758877e-05, + "loss": 0.0918, + "step": 57905 + }, + { + "epoch": 2.7, + "learning_rate": 1.1083034188704092e-05, + "loss": 0.1841, + "step": 57910 + }, + { + "epoch": 2.7, + "learning_rate": 1.1082250403649304e-05, + "loss": 0.237, + "step": 57915 + }, + { + "epoch": 2.7, + "learning_rate": 1.1081466618594518e-05, + "loss": 0.1046, + "step": 57920 + }, + { + "epoch": 2.7, + "learning_rate": 1.108068283353973e-05, + "loss": 0.0991, + "step": 57925 + }, + { + "epoch": 2.7, + "learning_rate": 1.1079899048484943e-05, + "loss": 0.0322, + "step": 57930 + }, + { + "epoch": 2.7, + "learning_rate": 1.1079115263430158e-05, + "loss": 0.0464, + "step": 57935 + }, + { + "epoch": 2.7, + "learning_rate": 1.107833147837537e-05, + "loss": 0.0611, + "step": 57940 + }, + { + "epoch": 2.7, + "learning_rate": 1.1077547693320585e-05, + "loss": 0.1024, + "step": 57945 + }, + { + "epoch": 2.7, + "learning_rate": 1.1076763908265797e-05, + "loss": 0.2626, + "step": 57950 + }, + { + "epoch": 2.7, + "learning_rate": 1.1075980123211012e-05, + "loss": 0.1249, + "step": 57955 + }, + { + "epoch": 2.7, + "learning_rate": 1.1075196338156225e-05, + "loss": 0.2525, + "step": 57960 + }, + { + "epoch": 2.7, + "learning_rate": 1.1074412553101438e-05, + "loss": 0.2988, + "step": 57965 + }, + { + "epoch": 2.7, + "learning_rate": 1.107362876804665e-05, + "loss": 0.0208, + "step": 57970 + }, + { + "epoch": 2.71, + "learning_rate": 1.1072844982991866e-05, + "loss": 0.0614, + "step": 57975 + }, + { + "epoch": 2.71, + "learning_rate": 1.1072061197937078e-05, + "loss": 0.056, + "step": 57980 + }, + { + "epoch": 2.71, + "learning_rate": 1.1071277412882292e-05, + "loss": 0.0867, + "step": 57985 + }, + { + "epoch": 2.71, + "learning_rate": 1.1070493627827506e-05, + "loss": 0.0771, + "step": 57990 + }, + { + "epoch": 2.71, + "learning_rate": 1.1069709842772719e-05, + "loss": 0.1148, + "step": 57995 + }, + { + "epoch": 2.71, + "learning_rate": 1.1068926057717932e-05, + "loss": 0.1713, + "step": 58000 + }, + { + "epoch": 2.71, + "learning_rate": 1.1068142272663145e-05, + "loss": 0.1785, + "step": 58005 + }, + { + "epoch": 2.71, + "learning_rate": 1.106735848760836e-05, + "loss": 0.1555, + "step": 58010 + }, + { + "epoch": 2.71, + "learning_rate": 1.1066574702553572e-05, + "loss": 0.3208, + "step": 58015 + }, + { + "epoch": 2.71, + "learning_rate": 1.1065790917498786e-05, + "loss": 0.0433, + "step": 58020 + }, + { + "epoch": 2.71, + "learning_rate": 1.1065007132443999e-05, + "loss": 0.0256, + "step": 58025 + }, + { + "epoch": 2.71, + "learning_rate": 1.1064223347389214e-05, + "loss": 0.09, + "step": 58030 + }, + { + "epoch": 2.71, + "learning_rate": 1.1063439562334426e-05, + "loss": 0.0716, + "step": 58035 + }, + { + "epoch": 2.71, + "learning_rate": 1.106265577727964e-05, + "loss": 0.0298, + "step": 58040 + }, + { + "epoch": 2.71, + "learning_rate": 1.1061871992224852e-05, + "loss": 0.0745, + "step": 58045 + }, + { + "epoch": 2.71, + "learning_rate": 1.1061088207170068e-05, + "loss": 0.1625, + "step": 58050 + }, + { + "epoch": 2.71, + "learning_rate": 1.106030442211528e-05, + "loss": 0.1818, + "step": 58055 + }, + { + "epoch": 2.71, + "learning_rate": 1.1059520637060493e-05, + "loss": 0.2606, + "step": 58060 + }, + { + "epoch": 2.71, + "learning_rate": 1.1058736852005706e-05, + "loss": 0.2315, + "step": 58065 + }, + { + "epoch": 2.71, + "learning_rate": 1.1057953066950919e-05, + "loss": 0.0346, + "step": 58070 + }, + { + "epoch": 2.71, + "learning_rate": 1.1057169281896134e-05, + "loss": 0.0731, + "step": 58075 + }, + { + "epoch": 2.71, + "learning_rate": 1.1056385496841346e-05, + "loss": 0.0567, + "step": 58080 + }, + { + "epoch": 2.71, + "learning_rate": 1.105560171178656e-05, + "loss": 0.0793, + "step": 58085 + }, + { + "epoch": 2.71, + "learning_rate": 1.1054817926731774e-05, + "loss": 0.0618, + "step": 58090 + }, + { + "epoch": 2.71, + "learning_rate": 1.1054034141676988e-05, + "loss": 0.1303, + "step": 58095 + }, + { + "epoch": 2.71, + "learning_rate": 1.10532503566222e-05, + "loss": 0.1785, + "step": 58100 + }, + { + "epoch": 2.71, + "learning_rate": 1.1052466571567414e-05, + "loss": 0.197, + "step": 58105 + }, + { + "epoch": 2.71, + "learning_rate": 1.1051682786512628e-05, + "loss": 0.3032, + "step": 58110 + }, + { + "epoch": 2.71, + "learning_rate": 1.1050899001457842e-05, + "loss": 0.3066, + "step": 58115 + }, + { + "epoch": 2.71, + "learning_rate": 1.1050115216403054e-05, + "loss": 0.0911, + "step": 58120 + }, + { + "epoch": 2.71, + "learning_rate": 1.1049331431348267e-05, + "loss": 0.0323, + "step": 58125 + }, + { + "epoch": 2.71, + "learning_rate": 1.1048547646293482e-05, + "loss": 0.0549, + "step": 58130 + }, + { + "epoch": 2.71, + "learning_rate": 1.1047763861238694e-05, + "loss": 0.1821, + "step": 58135 + }, + { + "epoch": 2.71, + "learning_rate": 1.1046980076183908e-05, + "loss": 0.0566, + "step": 58140 + }, + { + "epoch": 2.71, + "learning_rate": 1.104619629112912e-05, + "loss": 0.0821, + "step": 58145 + }, + { + "epoch": 2.71, + "learning_rate": 1.1045412506074336e-05, + "loss": 0.1769, + "step": 58150 + }, + { + "epoch": 2.71, + "learning_rate": 1.1044628721019548e-05, + "loss": 0.1196, + "step": 58155 + }, + { + "epoch": 2.71, + "learning_rate": 1.1043844935964762e-05, + "loss": 0.1464, + "step": 58160 + }, + { + "epoch": 2.71, + "learning_rate": 1.1043061150909974e-05, + "loss": 0.2286, + "step": 58165 + }, + { + "epoch": 2.71, + "learning_rate": 1.104227736585519e-05, + "loss": 0.0928, + "step": 58170 + }, + { + "epoch": 2.71, + "learning_rate": 1.1041493580800402e-05, + "loss": 0.0066, + "step": 58175 + }, + { + "epoch": 2.71, + "learning_rate": 1.1040709795745616e-05, + "loss": 0.0367, + "step": 58180 + }, + { + "epoch": 2.71, + "learning_rate": 1.1039926010690828e-05, + "loss": 0.0453, + "step": 58185 + }, + { + "epoch": 2.72, + "learning_rate": 1.1039142225636042e-05, + "loss": 0.0471, + "step": 58190 + }, + { + "epoch": 2.72, + "learning_rate": 1.1038358440581256e-05, + "loss": 0.1814, + "step": 58195 + }, + { + "epoch": 2.72, + "learning_rate": 1.1037574655526468e-05, + "loss": 0.1526, + "step": 58200 + }, + { + "epoch": 2.72, + "learning_rate": 1.1036790870471684e-05, + "loss": 0.2725, + "step": 58205 + }, + { + "epoch": 2.72, + "learning_rate": 1.1036007085416896e-05, + "loss": 0.1844, + "step": 58210 + }, + { + "epoch": 2.72, + "learning_rate": 1.103522330036211e-05, + "loss": 0.1978, + "step": 58215 + }, + { + "epoch": 2.72, + "learning_rate": 1.1034439515307322e-05, + "loss": 0.1005, + "step": 58220 + }, + { + "epoch": 2.72, + "learning_rate": 1.1033655730252538e-05, + "loss": 0.0466, + "step": 58225 + }, + { + "epoch": 2.72, + "learning_rate": 1.103287194519775e-05, + "loss": 0.0683, + "step": 58230 + }, + { + "epoch": 2.72, + "learning_rate": 1.1032088160142964e-05, + "loss": 0.0518, + "step": 58235 + }, + { + "epoch": 2.72, + "learning_rate": 1.1031304375088176e-05, + "loss": 0.0813, + "step": 58240 + }, + { + "epoch": 2.72, + "learning_rate": 1.1030520590033392e-05, + "loss": 0.0502, + "step": 58245 + }, + { + "epoch": 2.72, + "learning_rate": 1.1029736804978604e-05, + "loss": 0.1395, + "step": 58250 + }, + { + "epoch": 2.72, + "learning_rate": 1.1028953019923816e-05, + "loss": 0.0872, + "step": 58255 + }, + { + "epoch": 2.72, + "learning_rate": 1.102816923486903e-05, + "loss": 0.3588, + "step": 58260 + }, + { + "epoch": 2.72, + "learning_rate": 1.1027385449814242e-05, + "loss": 0.2024, + "step": 58265 + }, + { + "epoch": 2.72, + "learning_rate": 1.1026601664759458e-05, + "loss": 0.1045, + "step": 58270 + }, + { + "epoch": 2.72, + "learning_rate": 1.102581787970467e-05, + "loss": 0.0733, + "step": 58275 + }, + { + "epoch": 2.72, + "learning_rate": 1.1025034094649884e-05, + "loss": 0.0158, + "step": 58280 + }, + { + "epoch": 2.72, + "learning_rate": 1.1024250309595096e-05, + "loss": 0.0791, + "step": 58285 + }, + { + "epoch": 2.72, + "learning_rate": 1.1023466524540312e-05, + "loss": 0.0812, + "step": 58290 + }, + { + "epoch": 2.72, + "learning_rate": 1.1022682739485524e-05, + "loss": 0.0696, + "step": 58295 + }, + { + "epoch": 2.72, + "learning_rate": 1.1021898954430738e-05, + "loss": 0.1509, + "step": 58300 + }, + { + "epoch": 2.72, + "learning_rate": 1.1021115169375952e-05, + "loss": 0.1556, + "step": 58305 + }, + { + "epoch": 2.72, + "learning_rate": 1.1020331384321166e-05, + "loss": 0.2344, + "step": 58310 + }, + { + "epoch": 2.72, + "learning_rate": 1.1019547599266378e-05, + "loss": 0.3865, + "step": 58315 + }, + { + "epoch": 2.72, + "learning_rate": 1.101876381421159e-05, + "loss": 0.0351, + "step": 58320 + }, + { + "epoch": 2.72, + "learning_rate": 1.1017980029156806e-05, + "loss": 0.0247, + "step": 58325 + }, + { + "epoch": 2.72, + "learning_rate": 1.1017196244102018e-05, + "loss": 0.034, + "step": 58330 + }, + { + "epoch": 2.72, + "learning_rate": 1.1016412459047232e-05, + "loss": 0.0443, + "step": 58335 + }, + { + "epoch": 2.72, + "learning_rate": 1.1015628673992444e-05, + "loss": 0.0533, + "step": 58340 + }, + { + "epoch": 2.72, + "learning_rate": 1.101484488893766e-05, + "loss": 0.113, + "step": 58345 + }, + { + "epoch": 2.72, + "learning_rate": 1.1014061103882872e-05, + "loss": 0.1294, + "step": 58350 + }, + { + "epoch": 2.72, + "learning_rate": 1.1013277318828086e-05, + "loss": 0.2372, + "step": 58355 + }, + { + "epoch": 2.72, + "learning_rate": 1.1012493533773298e-05, + "loss": 0.1815, + "step": 58360 + }, + { + "epoch": 2.72, + "learning_rate": 1.1011709748718514e-05, + "loss": 0.4188, + "step": 58365 + }, + { + "epoch": 2.72, + "learning_rate": 1.1010925963663726e-05, + "loss": 0.0725, + "step": 58370 + }, + { + "epoch": 2.72, + "learning_rate": 1.101014217860894e-05, + "loss": 0.0558, + "step": 58375 + }, + { + "epoch": 2.72, + "learning_rate": 1.1009358393554152e-05, + "loss": 0.0533, + "step": 58380 + }, + { + "epoch": 2.72, + "learning_rate": 1.1008574608499364e-05, + "loss": 0.0649, + "step": 58385 + }, + { + "epoch": 2.72, + "learning_rate": 1.100779082344458e-05, + "loss": 0.0457, + "step": 58390 + }, + { + "epoch": 2.72, + "learning_rate": 1.1007007038389792e-05, + "loss": 0.063, + "step": 58395 + }, + { + "epoch": 2.73, + "learning_rate": 1.1006223253335006e-05, + "loss": 0.2506, + "step": 58400 + }, + { + "epoch": 2.73, + "learning_rate": 1.100543946828022e-05, + "loss": 0.1326, + "step": 58405 + }, + { + "epoch": 2.73, + "learning_rate": 1.1004655683225434e-05, + "loss": 0.2099, + "step": 58410 + }, + { + "epoch": 2.73, + "learning_rate": 1.1003871898170646e-05, + "loss": 0.2704, + "step": 58415 + }, + { + "epoch": 2.73, + "learning_rate": 1.100308811311586e-05, + "loss": 0.0142, + "step": 58420 + }, + { + "epoch": 2.73, + "learning_rate": 1.1002304328061074e-05, + "loss": 0.0844, + "step": 58425 + }, + { + "epoch": 2.73, + "learning_rate": 1.1001520543006288e-05, + "loss": 0.0416, + "step": 58430 + }, + { + "epoch": 2.73, + "learning_rate": 1.10007367579515e-05, + "loss": 0.0186, + "step": 58435 + }, + { + "epoch": 2.73, + "learning_rate": 1.0999952972896715e-05, + "loss": 0.0636, + "step": 58440 + }, + { + "epoch": 2.73, + "learning_rate": 1.0999169187841928e-05, + "loss": 0.1073, + "step": 58445 + }, + { + "epoch": 2.73, + "learning_rate": 1.099838540278714e-05, + "loss": 0.1895, + "step": 58450 + }, + { + "epoch": 2.73, + "learning_rate": 1.0997601617732354e-05, + "loss": 0.157, + "step": 58455 + }, + { + "epoch": 2.73, + "learning_rate": 1.0996817832677566e-05, + "loss": 0.2624, + "step": 58460 + }, + { + "epoch": 2.73, + "learning_rate": 1.0996034047622782e-05, + "loss": 0.2066, + "step": 58465 + }, + { + "epoch": 2.73, + "learning_rate": 1.0995250262567994e-05, + "loss": 0.0556, + "step": 58470 + }, + { + "epoch": 2.73, + "learning_rate": 1.0994466477513208e-05, + "loss": 0.0346, + "step": 58475 + }, + { + "epoch": 2.73, + "learning_rate": 1.099368269245842e-05, + "loss": 0.0719, + "step": 58480 + }, + { + "epoch": 2.73, + "learning_rate": 1.0992898907403636e-05, + "loss": 0.064, + "step": 58485 + }, + { + "epoch": 2.73, + "learning_rate": 1.0992115122348848e-05, + "loss": 0.0563, + "step": 58490 + }, + { + "epoch": 2.73, + "learning_rate": 1.0991331337294062e-05, + "loss": 0.1556, + "step": 58495 + }, + { + "epoch": 2.73, + "learning_rate": 1.0990547552239274e-05, + "loss": 0.1548, + "step": 58500 + }, + { + "epoch": 2.73, + "learning_rate": 1.098976376718449e-05, + "loss": 0.1101, + "step": 58505 + }, + { + "epoch": 2.73, + "learning_rate": 1.0988979982129702e-05, + "loss": 0.3026, + "step": 58510 + }, + { + "epoch": 2.73, + "learning_rate": 1.0988196197074914e-05, + "loss": 0.3281, + "step": 58515 + }, + { + "epoch": 2.73, + "learning_rate": 1.098741241202013e-05, + "loss": 0.1004, + "step": 58520 + }, + { + "epoch": 2.73, + "learning_rate": 1.0986628626965342e-05, + "loss": 0.0659, + "step": 58525 + }, + { + "epoch": 2.73, + "learning_rate": 1.0985844841910556e-05, + "loss": 0.0501, + "step": 58530 + }, + { + "epoch": 2.73, + "learning_rate": 1.0985061056855768e-05, + "loss": 0.0952, + "step": 58535 + }, + { + "epoch": 2.73, + "learning_rate": 1.0984277271800983e-05, + "loss": 0.1421, + "step": 58540 + }, + { + "epoch": 2.73, + "learning_rate": 1.0983493486746196e-05, + "loss": 0.0672, + "step": 58545 + }, + { + "epoch": 2.73, + "learning_rate": 1.098270970169141e-05, + "loss": 0.1645, + "step": 58550 + }, + { + "epoch": 2.73, + "learning_rate": 1.0981925916636622e-05, + "loss": 0.2002, + "step": 58555 + }, + { + "epoch": 2.73, + "learning_rate": 1.0981142131581837e-05, + "loss": 0.2722, + "step": 58560 + }, + { + "epoch": 2.73, + "learning_rate": 1.098035834652705e-05, + "loss": 0.3582, + "step": 58565 + }, + { + "epoch": 2.73, + "learning_rate": 1.0979574561472263e-05, + "loss": 0.0799, + "step": 58570 + }, + { + "epoch": 2.73, + "learning_rate": 1.0978790776417476e-05, + "loss": 0.0616, + "step": 58575 + }, + { + "epoch": 2.73, + "learning_rate": 1.0978006991362688e-05, + "loss": 0.0691, + "step": 58580 + }, + { + "epoch": 2.73, + "learning_rate": 1.0977223206307903e-05, + "loss": 0.0437, + "step": 58585 + }, + { + "epoch": 2.73, + "learning_rate": 1.0976439421253116e-05, + "loss": 0.0749, + "step": 58590 + }, + { + "epoch": 2.73, + "learning_rate": 1.097565563619833e-05, + "loss": 0.0765, + "step": 58595 + }, + { + "epoch": 2.73, + "learning_rate": 1.0975028608154502e-05, + "loss": 0.0997, + "step": 58600 + }, + { + "epoch": 2.73, + "learning_rate": 1.0974244823099714e-05, + "loss": 0.1803, + "step": 58605 + }, + { + "epoch": 2.73, + "learning_rate": 1.0973461038044928e-05, + "loss": 0.0981, + "step": 58610 + }, + { + "epoch": 2.74, + "learning_rate": 1.097267725299014e-05, + "loss": 0.3792, + "step": 58615 + }, + { + "epoch": 2.74, + "learning_rate": 1.0971893467935356e-05, + "loss": 0.0301, + "step": 58620 + }, + { + "epoch": 2.74, + "learning_rate": 1.0971109682880568e-05, + "loss": 0.0599, + "step": 58625 + }, + { + "epoch": 2.74, + "learning_rate": 1.0970325897825782e-05, + "loss": 0.0407, + "step": 58630 + }, + { + "epoch": 2.74, + "learning_rate": 1.0969542112770994e-05, + "loss": 0.0282, + "step": 58635 + }, + { + "epoch": 2.74, + "learning_rate": 1.096875832771621e-05, + "loss": 0.1213, + "step": 58640 + }, + { + "epoch": 2.74, + "learning_rate": 1.0967974542661422e-05, + "loss": 0.0466, + "step": 58645 + }, + { + "epoch": 2.74, + "learning_rate": 1.0967190757606634e-05, + "loss": 0.1724, + "step": 58650 + }, + { + "epoch": 2.74, + "learning_rate": 1.0966406972551848e-05, + "loss": 0.127, + "step": 58655 + }, + { + "epoch": 2.74, + "learning_rate": 1.096562318749706e-05, + "loss": 0.1452, + "step": 58660 + }, + { + "epoch": 2.74, + "learning_rate": 1.0964839402442276e-05, + "loss": 0.1929, + "step": 58665 + }, + { + "epoch": 2.74, + "learning_rate": 1.0964055617387488e-05, + "loss": 0.0849, + "step": 58670 + }, + { + "epoch": 2.74, + "learning_rate": 1.0963271832332702e-05, + "loss": 0.0543, + "step": 58675 + }, + { + "epoch": 2.74, + "learning_rate": 1.0962488047277916e-05, + "loss": 0.0736, + "step": 58680 + }, + { + "epoch": 2.74, + "learning_rate": 1.096170426222313e-05, + "loss": 0.0518, + "step": 58685 + }, + { + "epoch": 2.74, + "learning_rate": 1.0960920477168342e-05, + "loss": 0.0852, + "step": 58690 + }, + { + "epoch": 2.74, + "learning_rate": 1.0960136692113556e-05, + "loss": 0.1975, + "step": 58695 + }, + { + "epoch": 2.74, + "learning_rate": 1.095935290705877e-05, + "loss": 0.1414, + "step": 58700 + }, + { + "epoch": 2.74, + "learning_rate": 1.0958569122003984e-05, + "loss": 0.2228, + "step": 58705 + }, + { + "epoch": 2.74, + "learning_rate": 1.0957785336949196e-05, + "loss": 0.1912, + "step": 58710 + }, + { + "epoch": 2.74, + "learning_rate": 1.0957001551894408e-05, + "loss": 0.2865, + "step": 58715 + }, + { + "epoch": 2.74, + "learning_rate": 1.0956217766839624e-05, + "loss": 0.0588, + "step": 58720 + }, + { + "epoch": 2.74, + "learning_rate": 1.0955433981784836e-05, + "loss": 0.0375, + "step": 58725 + }, + { + "epoch": 2.74, + "learning_rate": 1.095465019673005e-05, + "loss": 0.1023, + "step": 58730 + }, + { + "epoch": 2.74, + "learning_rate": 1.0953866411675262e-05, + "loss": 0.0744, + "step": 58735 + }, + { + "epoch": 2.74, + "learning_rate": 1.0953082626620478e-05, + "loss": 0.114, + "step": 58740 + }, + { + "epoch": 2.74, + "learning_rate": 1.095229884156569e-05, + "loss": 0.1044, + "step": 58745 + }, + { + "epoch": 2.74, + "learning_rate": 1.0951515056510904e-05, + "loss": 0.1161, + "step": 58750 + }, + { + "epoch": 2.74, + "learning_rate": 1.0950731271456116e-05, + "loss": 0.1975, + "step": 58755 + }, + { + "epoch": 2.74, + "learning_rate": 1.0949947486401331e-05, + "loss": 0.2124, + "step": 58760 + }, + { + "epoch": 2.74, + "learning_rate": 1.0949163701346544e-05, + "loss": 0.4417, + "step": 58765 + }, + { + "epoch": 2.74, + "learning_rate": 1.0948379916291758e-05, + "loss": 0.0748, + "step": 58770 + }, + { + "epoch": 2.74, + "learning_rate": 1.094759613123697e-05, + "loss": 0.0129, + "step": 58775 + }, + { + "epoch": 2.74, + "learning_rate": 1.0946812346182184e-05, + "loss": 0.0311, + "step": 58780 + }, + { + "epoch": 2.74, + "learning_rate": 1.0946028561127398e-05, + "loss": 0.1198, + "step": 58785 + }, + { + "epoch": 2.74, + "learning_rate": 1.094524477607261e-05, + "loss": 0.0663, + "step": 58790 + }, + { + "epoch": 2.74, + "learning_rate": 1.0944460991017824e-05, + "loss": 0.0381, + "step": 58795 + }, + { + "epoch": 2.74, + "learning_rate": 1.0943677205963038e-05, + "loss": 0.1238, + "step": 58800 + }, + { + "epoch": 2.74, + "learning_rate": 1.0942893420908252e-05, + "loss": 0.213, + "step": 58805 + }, + { + "epoch": 2.74, + "learning_rate": 1.0942109635853464e-05, + "loss": 0.1942, + "step": 58810 + }, + { + "epoch": 2.74, + "learning_rate": 1.094132585079868e-05, + "loss": 0.349, + "step": 58815 + }, + { + "epoch": 2.74, + "learning_rate": 1.0940542065743892e-05, + "loss": 0.0369, + "step": 58820 + }, + { + "epoch": 2.74, + "learning_rate": 1.0939758280689105e-05, + "loss": 0.0544, + "step": 58825 + }, + { + "epoch": 2.75, + "learning_rate": 1.0938974495634318e-05, + "loss": 0.0355, + "step": 58830 + }, + { + "epoch": 2.75, + "learning_rate": 1.0938190710579533e-05, + "loss": 0.0572, + "step": 58835 + }, + { + "epoch": 2.75, + "learning_rate": 1.0937406925524746e-05, + "loss": 0.1182, + "step": 58840 + }, + { + "epoch": 2.75, + "learning_rate": 1.0936623140469958e-05, + "loss": 0.1305, + "step": 58845 + }, + { + "epoch": 2.75, + "learning_rate": 1.0935839355415172e-05, + "loss": 0.1772, + "step": 58850 + }, + { + "epoch": 2.75, + "learning_rate": 1.0935055570360384e-05, + "loss": 0.1283, + "step": 58855 + }, + { + "epoch": 2.75, + "learning_rate": 1.09342717853056e-05, + "loss": 0.1314, + "step": 58860 + }, + { + "epoch": 2.75, + "learning_rate": 1.0933488000250812e-05, + "loss": 0.2568, + "step": 58865 + }, + { + "epoch": 2.75, + "learning_rate": 1.0932704215196026e-05, + "loss": 0.0676, + "step": 58870 + }, + { + "epoch": 2.75, + "learning_rate": 1.0931920430141238e-05, + "loss": 0.0313, + "step": 58875 + }, + { + "epoch": 2.75, + "learning_rate": 1.0931136645086453e-05, + "loss": 0.0603, + "step": 58880 + }, + { + "epoch": 2.75, + "learning_rate": 1.0930352860031666e-05, + "loss": 0.0634, + "step": 58885 + }, + { + "epoch": 2.75, + "learning_rate": 1.092956907497688e-05, + "loss": 0.0677, + "step": 58890 + }, + { + "epoch": 2.75, + "learning_rate": 1.0928785289922092e-05, + "loss": 0.0863, + "step": 58895 + }, + { + "epoch": 2.75, + "learning_rate": 1.0928001504867307e-05, + "loss": 0.063, + "step": 58900 + }, + { + "epoch": 2.75, + "learning_rate": 1.092721771981252e-05, + "loss": 0.17, + "step": 58905 + }, + { + "epoch": 2.75, + "learning_rate": 1.0926433934757732e-05, + "loss": 0.1749, + "step": 58910 + }, + { + "epoch": 2.75, + "learning_rate": 1.0925650149702947e-05, + "loss": 0.2382, + "step": 58915 + }, + { + "epoch": 2.75, + "learning_rate": 1.092486636464816e-05, + "loss": 0.0386, + "step": 58920 + }, + { + "epoch": 2.75, + "learning_rate": 1.0924082579593373e-05, + "loss": 0.0822, + "step": 58925 + }, + { + "epoch": 2.75, + "learning_rate": 1.0923298794538586e-05, + "loss": 0.0393, + "step": 58930 + }, + { + "epoch": 2.75, + "learning_rate": 1.0922515009483801e-05, + "loss": 0.0475, + "step": 58935 + }, + { + "epoch": 2.75, + "learning_rate": 1.0921731224429013e-05, + "loss": 0.0143, + "step": 58940 + }, + { + "epoch": 2.75, + "learning_rate": 1.0920947439374227e-05, + "loss": 0.1933, + "step": 58945 + }, + { + "epoch": 2.75, + "learning_rate": 1.092016365431944e-05, + "loss": 0.1644, + "step": 58950 + }, + { + "epoch": 2.75, + "learning_rate": 1.0919379869264655e-05, + "loss": 0.1502, + "step": 58955 + }, + { + "epoch": 2.75, + "learning_rate": 1.0918596084209867e-05, + "loss": 0.1974, + "step": 58960 + }, + { + "epoch": 2.75, + "learning_rate": 1.0917812299155081e-05, + "loss": 0.2703, + "step": 58965 + }, + { + "epoch": 2.75, + "learning_rate": 1.0917028514100293e-05, + "loss": 0.0824, + "step": 58970 + }, + { + "epoch": 2.75, + "learning_rate": 1.0916244729045506e-05, + "loss": 0.0104, + "step": 58975 + }, + { + "epoch": 2.75, + "learning_rate": 1.0915460943990721e-05, + "loss": 0.0975, + "step": 58980 + }, + { + "epoch": 2.75, + "learning_rate": 1.0914677158935934e-05, + "loss": 0.0575, + "step": 58985 + }, + { + "epoch": 2.75, + "learning_rate": 1.0913893373881147e-05, + "loss": 0.059, + "step": 58990 + }, + { + "epoch": 2.75, + "learning_rate": 1.0913109588826361e-05, + "loss": 0.1536, + "step": 58995 + }, + { + "epoch": 2.75, + "learning_rate": 1.0912325803771575e-05, + "loss": 0.1253, + "step": 59000 + }, + { + "epoch": 2.75, + "learning_rate": 1.0911542018716787e-05, + "loss": 0.198, + "step": 59005 + }, + { + "epoch": 2.75, + "learning_rate": 1.0910758233662001e-05, + "loss": 0.2212, + "step": 59010 + }, + { + "epoch": 2.75, + "learning_rate": 1.0909974448607215e-05, + "loss": 0.3376, + "step": 59015 + }, + { + "epoch": 2.75, + "learning_rate": 1.090919066355243e-05, + "loss": 0.0407, + "step": 59020 + }, + { + "epoch": 2.75, + "learning_rate": 1.0908406878497641e-05, + "loss": 0.0796, + "step": 59025 + }, + { + "epoch": 2.75, + "learning_rate": 1.0907623093442857e-05, + "loss": 0.0662, + "step": 59030 + }, + { + "epoch": 2.75, + "learning_rate": 1.090683930838807e-05, + "loss": 0.0887, + "step": 59035 + }, + { + "epoch": 2.75, + "learning_rate": 1.0906055523333281e-05, + "loss": 0.0557, + "step": 59040 + }, + { + "epoch": 2.76, + "learning_rate": 1.0905271738278495e-05, + "loss": 0.1039, + "step": 59045 + }, + { + "epoch": 2.76, + "learning_rate": 1.0904487953223708e-05, + "loss": 0.1408, + "step": 59050 + }, + { + "epoch": 2.76, + "learning_rate": 1.0903704168168923e-05, + "loss": 0.1745, + "step": 59055 + }, + { + "epoch": 2.76, + "learning_rate": 1.0902920383114135e-05, + "loss": 0.2041, + "step": 59060 + }, + { + "epoch": 2.76, + "learning_rate": 1.090213659805935e-05, + "loss": 0.3081, + "step": 59065 + }, + { + "epoch": 2.76, + "learning_rate": 1.0901352813004561e-05, + "loss": 0.0728, + "step": 59070 + }, + { + "epoch": 2.76, + "learning_rate": 1.0900569027949777e-05, + "loss": 0.0263, + "step": 59075 + }, + { + "epoch": 2.76, + "learning_rate": 1.089978524289499e-05, + "loss": 0.0564, + "step": 59080 + }, + { + "epoch": 2.76, + "learning_rate": 1.0899001457840203e-05, + "loss": 0.0237, + "step": 59085 + }, + { + "epoch": 2.76, + "learning_rate": 1.0898217672785415e-05, + "loss": 0.1018, + "step": 59090 + }, + { + "epoch": 2.76, + "learning_rate": 1.0897433887730631e-05, + "loss": 0.1151, + "step": 59095 + }, + { + "epoch": 2.76, + "learning_rate": 1.0896650102675843e-05, + "loss": 0.147, + "step": 59100 + }, + { + "epoch": 2.76, + "learning_rate": 1.0895866317621055e-05, + "loss": 0.1815, + "step": 59105 + }, + { + "epoch": 2.76, + "learning_rate": 1.089508253256627e-05, + "loss": 0.1446, + "step": 59110 + }, + { + "epoch": 2.76, + "learning_rate": 1.0894298747511483e-05, + "loss": 0.2181, + "step": 59115 + }, + { + "epoch": 2.76, + "learning_rate": 1.0893514962456697e-05, + "loss": 0.0203, + "step": 59120 + }, + { + "epoch": 2.76, + "learning_rate": 1.089273117740191e-05, + "loss": 0.0237, + "step": 59125 + }, + { + "epoch": 2.76, + "learning_rate": 1.0891947392347125e-05, + "loss": 0.0479, + "step": 59130 + }, + { + "epoch": 2.76, + "learning_rate": 1.0891163607292337e-05, + "loss": 0.0397, + "step": 59135 + }, + { + "epoch": 2.76, + "learning_rate": 1.0890379822237551e-05, + "loss": 0.0788, + "step": 59140 + }, + { + "epoch": 2.76, + "learning_rate": 1.0889596037182763e-05, + "loss": 0.1114, + "step": 59145 + }, + { + "epoch": 2.76, + "learning_rate": 1.0888812252127979e-05, + "loss": 0.1694, + "step": 59150 + }, + { + "epoch": 2.76, + "learning_rate": 1.0888028467073191e-05, + "loss": 0.2413, + "step": 59155 + }, + { + "epoch": 2.76, + "learning_rate": 1.0887244682018405e-05, + "loss": 0.2377, + "step": 59160 + }, + { + "epoch": 2.76, + "learning_rate": 1.0886460896963617e-05, + "loss": 0.2335, + "step": 59165 + }, + { + "epoch": 2.76, + "learning_rate": 1.088567711190883e-05, + "loss": 0.0697, + "step": 59170 + }, + { + "epoch": 2.76, + "learning_rate": 1.0884893326854045e-05, + "loss": 0.0551, + "step": 59175 + }, + { + "epoch": 2.76, + "learning_rate": 1.0884109541799257e-05, + "loss": 0.0589, + "step": 59180 + }, + { + "epoch": 2.76, + "learning_rate": 1.0883325756744471e-05, + "loss": 0.0893, + "step": 59185 + }, + { + "epoch": 2.76, + "learning_rate": 1.0882541971689683e-05, + "loss": 0.1181, + "step": 59190 + }, + { + "epoch": 2.76, + "learning_rate": 1.0881758186634899e-05, + "loss": 0.0627, + "step": 59195 + }, + { + "epoch": 2.76, + "learning_rate": 1.0880974401580111e-05, + "loss": 0.0549, + "step": 59200 + }, + { + "epoch": 2.76, + "learning_rate": 1.0880190616525325e-05, + "loss": 0.1895, + "step": 59205 + }, + { + "epoch": 2.76, + "learning_rate": 1.0879406831470539e-05, + "loss": 0.1951, + "step": 59210 + }, + { + "epoch": 2.76, + "learning_rate": 1.0878623046415753e-05, + "loss": 0.2903, + "step": 59215 + }, + { + "epoch": 2.76, + "learning_rate": 1.0877839261360965e-05, + "loss": 0.0543, + "step": 59220 + }, + { + "epoch": 2.76, + "learning_rate": 1.0877055476306179e-05, + "loss": 0.0562, + "step": 59225 + }, + { + "epoch": 2.76, + "learning_rate": 1.0876271691251393e-05, + "loss": 0.0212, + "step": 59230 + }, + { + "epoch": 2.76, + "learning_rate": 1.0875487906196605e-05, + "loss": 0.0372, + "step": 59235 + }, + { + "epoch": 2.76, + "learning_rate": 1.0874704121141819e-05, + "loss": 0.0557, + "step": 59240 + }, + { + "epoch": 2.76, + "learning_rate": 1.0873920336087031e-05, + "loss": 0.0941, + "step": 59245 + }, + { + "epoch": 2.76, + "learning_rate": 1.0873136551032247e-05, + "loss": 0.1335, + "step": 59250 + }, + { + "epoch": 2.76, + "learning_rate": 1.0872352765977459e-05, + "loss": 0.1388, + "step": 59255 + }, + { + "epoch": 2.77, + "learning_rate": 1.0871568980922673e-05, + "loss": 0.3518, + "step": 59260 + }, + { + "epoch": 2.77, + "learning_rate": 1.0870785195867885e-05, + "loss": 0.2424, + "step": 59265 + }, + { + "epoch": 2.77, + "learning_rate": 1.08700014108131e-05, + "loss": 0.0594, + "step": 59270 + }, + { + "epoch": 2.77, + "learning_rate": 1.0869217625758313e-05, + "loss": 0.0566, + "step": 59275 + }, + { + "epoch": 2.77, + "learning_rate": 1.0868433840703527e-05, + "loss": 0.0809, + "step": 59280 + }, + { + "epoch": 2.77, + "learning_rate": 1.0867650055648739e-05, + "loss": 0.0813, + "step": 59285 + }, + { + "epoch": 2.77, + "learning_rate": 1.0866866270593955e-05, + "loss": 0.1093, + "step": 59290 + }, + { + "epoch": 2.77, + "learning_rate": 1.0866082485539167e-05, + "loss": 0.0765, + "step": 59295 + }, + { + "epoch": 2.77, + "learning_rate": 1.0865298700484379e-05, + "loss": 0.1189, + "step": 59300 + }, + { + "epoch": 2.77, + "learning_rate": 1.0864514915429593e-05, + "loss": 0.2332, + "step": 59305 + }, + { + "epoch": 2.77, + "learning_rate": 1.0863731130374807e-05, + "loss": 0.2367, + "step": 59310 + }, + { + "epoch": 2.77, + "learning_rate": 1.086294734532002e-05, + "loss": 0.3552, + "step": 59315 + }, + { + "epoch": 2.77, + "learning_rate": 1.0862163560265233e-05, + "loss": 0.0216, + "step": 59320 + }, + { + "epoch": 2.77, + "learning_rate": 1.0861379775210447e-05, + "loss": 0.0542, + "step": 59325 + }, + { + "epoch": 2.77, + "learning_rate": 1.086059599015566e-05, + "loss": 0.0626, + "step": 59330 + }, + { + "epoch": 2.77, + "learning_rate": 1.0859812205100875e-05, + "loss": 0.0793, + "step": 59335 + }, + { + "epoch": 2.77, + "learning_rate": 1.0859028420046087e-05, + "loss": 0.0943, + "step": 59340 + }, + { + "epoch": 2.77, + "learning_rate": 1.0858244634991303e-05, + "loss": 0.1106, + "step": 59345 + }, + { + "epoch": 2.77, + "learning_rate": 1.0857460849936515e-05, + "loss": 0.1345, + "step": 59350 + }, + { + "epoch": 2.77, + "learning_rate": 1.0856677064881729e-05, + "loss": 0.2081, + "step": 59355 + }, + { + "epoch": 2.77, + "learning_rate": 1.0855893279826941e-05, + "loss": 0.3029, + "step": 59360 + }, + { + "epoch": 2.77, + "learning_rate": 1.0855109494772153e-05, + "loss": 0.2904, + "step": 59365 + }, + { + "epoch": 2.77, + "learning_rate": 1.0854325709717369e-05, + "loss": 0.0358, + "step": 59370 + }, + { + "epoch": 2.77, + "learning_rate": 1.0853541924662581e-05, + "loss": 0.0264, + "step": 59375 + }, + { + "epoch": 2.77, + "learning_rate": 1.0852758139607795e-05, + "loss": 0.0394, + "step": 59380 + }, + { + "epoch": 2.77, + "learning_rate": 1.0851974354553007e-05, + "loss": 0.0479, + "step": 59385 + }, + { + "epoch": 2.77, + "learning_rate": 1.0851190569498223e-05, + "loss": 0.0999, + "step": 59390 + }, + { + "epoch": 2.77, + "learning_rate": 1.0850406784443435e-05, + "loss": 0.0661, + "step": 59395 + }, + { + "epoch": 2.77, + "learning_rate": 1.0849622999388649e-05, + "loss": 0.0937, + "step": 59400 + }, + { + "epoch": 2.77, + "learning_rate": 1.0848839214333861e-05, + "loss": 0.1815, + "step": 59405 + }, + { + "epoch": 2.77, + "learning_rate": 1.0848055429279077e-05, + "loss": 0.3394, + "step": 59410 + }, + { + "epoch": 2.77, + "learning_rate": 1.0847271644224289e-05, + "loss": 0.3475, + "step": 59415 + }, + { + "epoch": 2.77, + "learning_rate": 1.0846487859169503e-05, + "loss": 0.0382, + "step": 59420 + }, + { + "epoch": 2.77, + "learning_rate": 1.0845704074114715e-05, + "loss": 0.0969, + "step": 59425 + }, + { + "epoch": 2.77, + "learning_rate": 1.0844920289059929e-05, + "loss": 0.0548, + "step": 59430 + }, + { + "epoch": 2.77, + "learning_rate": 1.0844136504005143e-05, + "loss": 0.0841, + "step": 59435 + }, + { + "epoch": 2.77, + "learning_rate": 1.0843352718950355e-05, + "loss": 0.0781, + "step": 59440 + }, + { + "epoch": 2.77, + "learning_rate": 1.084256893389557e-05, + "loss": 0.1071, + "step": 59445 + }, + { + "epoch": 2.77, + "learning_rate": 1.0841785148840783e-05, + "loss": 0.1219, + "step": 59450 + }, + { + "epoch": 2.77, + "learning_rate": 1.0841001363785997e-05, + "loss": 0.1605, + "step": 59455 + }, + { + "epoch": 2.77, + "learning_rate": 1.0840217578731209e-05, + "loss": 0.3221, + "step": 59460 + }, + { + "epoch": 2.77, + "learning_rate": 1.0839433793676424e-05, + "loss": 0.2631, + "step": 59465 + }, + { + "epoch": 2.77, + "learning_rate": 1.0838650008621637e-05, + "loss": 0.0423, + "step": 59470 + }, + { + "epoch": 2.78, + "learning_rate": 1.083786622356685e-05, + "loss": 0.0269, + "step": 59475 + }, + { + "epoch": 2.78, + "learning_rate": 1.0837082438512063e-05, + "loss": 0.0213, + "step": 59480 + }, + { + "epoch": 2.78, + "learning_rate": 1.0836298653457278e-05, + "loss": 0.0742, + "step": 59485 + }, + { + "epoch": 2.78, + "learning_rate": 1.083551486840249e-05, + "loss": 0.0317, + "step": 59490 + }, + { + "epoch": 2.78, + "learning_rate": 1.0834731083347703e-05, + "loss": 0.1428, + "step": 59495 + }, + { + "epoch": 2.78, + "learning_rate": 1.0833947298292917e-05, + "loss": 0.1145, + "step": 59500 + }, + { + "epoch": 2.78, + "learning_rate": 1.0833163513238129e-05, + "loss": 0.1002, + "step": 59505 + }, + { + "epoch": 2.78, + "learning_rate": 1.0832379728183344e-05, + "loss": 0.2298, + "step": 59510 + }, + { + "epoch": 2.78, + "learning_rate": 1.0831595943128557e-05, + "loss": 0.3871, + "step": 59515 + }, + { + "epoch": 2.78, + "learning_rate": 1.083081215807377e-05, + "loss": 0.03, + "step": 59520 + }, + { + "epoch": 2.78, + "learning_rate": 1.0830028373018985e-05, + "loss": 0.0571, + "step": 59525 + }, + { + "epoch": 2.78, + "learning_rate": 1.0829244587964198e-05, + "loss": 0.0582, + "step": 59530 + }, + { + "epoch": 2.78, + "learning_rate": 1.082846080290941e-05, + "loss": 0.0255, + "step": 59535 + }, + { + "epoch": 2.78, + "learning_rate": 1.0827677017854625e-05, + "loss": 0.0613, + "step": 59540 + }, + { + "epoch": 2.78, + "learning_rate": 1.0826893232799838e-05, + "loss": 0.0576, + "step": 59545 + }, + { + "epoch": 2.78, + "learning_rate": 1.0826109447745052e-05, + "loss": 0.172, + "step": 59550 + }, + { + "epoch": 2.78, + "learning_rate": 1.0825325662690265e-05, + "loss": 0.1161, + "step": 59555 + }, + { + "epoch": 2.78, + "learning_rate": 1.0824541877635477e-05, + "loss": 0.3, + "step": 59560 + }, + { + "epoch": 2.78, + "learning_rate": 1.0823758092580692e-05, + "loss": 0.3196, + "step": 59565 + }, + { + "epoch": 2.78, + "learning_rate": 1.0822974307525905e-05, + "loss": 0.0401, + "step": 59570 + }, + { + "epoch": 2.78, + "learning_rate": 1.0822190522471118e-05, + "loss": 0.0501, + "step": 59575 + }, + { + "epoch": 2.78, + "learning_rate": 1.082140673741633e-05, + "loss": 0.03, + "step": 59580 + }, + { + "epoch": 2.78, + "learning_rate": 1.0820622952361546e-05, + "loss": 0.1153, + "step": 59585 + }, + { + "epoch": 2.78, + "learning_rate": 1.0819839167306759e-05, + "loss": 0.095, + "step": 59590 + }, + { + "epoch": 2.78, + "learning_rate": 1.0819055382251972e-05, + "loss": 0.0778, + "step": 59595 + }, + { + "epoch": 2.78, + "learning_rate": 1.0818271597197185e-05, + "loss": 0.0841, + "step": 59600 + }, + { + "epoch": 2.78, + "learning_rate": 1.08174878121424e-05, + "loss": 0.1166, + "step": 59605 + }, + { + "epoch": 2.78, + "learning_rate": 1.0816704027087612e-05, + "loss": 0.202, + "step": 59610 + }, + { + "epoch": 2.78, + "learning_rate": 1.0815920242032826e-05, + "loss": 0.2211, + "step": 59615 + }, + { + "epoch": 2.78, + "learning_rate": 1.0815136456978039e-05, + "loss": 0.0165, + "step": 59620 + }, + { + "epoch": 2.78, + "learning_rate": 1.0814352671923252e-05, + "loss": 0.0745, + "step": 59625 + }, + { + "epoch": 2.78, + "learning_rate": 1.0813568886868466e-05, + "loss": 0.0653, + "step": 59630 + }, + { + "epoch": 2.78, + "learning_rate": 1.0812785101813679e-05, + "loss": 0.0922, + "step": 59635 + }, + { + "epoch": 2.78, + "learning_rate": 1.0812001316758892e-05, + "loss": 0.1051, + "step": 59640 + }, + { + "epoch": 2.78, + "learning_rate": 1.0811217531704106e-05, + "loss": 0.1219, + "step": 59645 + }, + { + "epoch": 2.78, + "learning_rate": 1.081043374664932e-05, + "loss": 0.2201, + "step": 59650 + }, + { + "epoch": 2.78, + "learning_rate": 1.0809649961594533e-05, + "loss": 0.1585, + "step": 59655 + }, + { + "epoch": 2.78, + "learning_rate": 1.0808866176539748e-05, + "loss": 0.2065, + "step": 59660 + }, + { + "epoch": 2.78, + "learning_rate": 1.080808239148496e-05, + "loss": 0.3347, + "step": 59665 + }, + { + "epoch": 2.78, + "learning_rate": 1.0807298606430174e-05, + "loss": 0.0825, + "step": 59670 + }, + { + "epoch": 2.78, + "learning_rate": 1.0806514821375386e-05, + "loss": 0.0217, + "step": 59675 + }, + { + "epoch": 2.78, + "learning_rate": 1.0805731036320602e-05, + "loss": 0.0387, + "step": 59680 + }, + { + "epoch": 2.78, + "learning_rate": 1.0804947251265814e-05, + "loss": 0.0708, + "step": 59685 + }, + { + "epoch": 2.79, + "learning_rate": 1.0804163466211026e-05, + "loss": 0.1803, + "step": 59690 + }, + { + "epoch": 2.79, + "learning_rate": 1.080337968115624e-05, + "loss": 0.0929, + "step": 59695 + }, + { + "epoch": 2.79, + "learning_rate": 1.0802595896101453e-05, + "loss": 0.0763, + "step": 59700 + }, + { + "epoch": 2.79, + "learning_rate": 1.0801812111046668e-05, + "loss": 0.1241, + "step": 59705 + }, + { + "epoch": 2.79, + "learning_rate": 1.080102832599188e-05, + "loss": 0.2267, + "step": 59710 + }, + { + "epoch": 2.79, + "learning_rate": 1.0800244540937094e-05, + "loss": 0.1755, + "step": 59715 + }, + { + "epoch": 2.79, + "learning_rate": 1.0799460755882307e-05, + "loss": 0.0372, + "step": 59720 + }, + { + "epoch": 2.79, + "learning_rate": 1.0798676970827522e-05, + "loss": 0.0253, + "step": 59725 + }, + { + "epoch": 2.79, + "learning_rate": 1.0797893185772734e-05, + "loss": 0.1087, + "step": 59730 + }, + { + "epoch": 2.79, + "learning_rate": 1.0797109400717948e-05, + "loss": 0.0811, + "step": 59735 + }, + { + "epoch": 2.79, + "learning_rate": 1.079632561566316e-05, + "loss": 0.0563, + "step": 59740 + }, + { + "epoch": 2.79, + "learning_rate": 1.0795541830608376e-05, + "loss": 0.1129, + "step": 59745 + }, + { + "epoch": 2.79, + "learning_rate": 1.0794758045553588e-05, + "loss": 0.1649, + "step": 59750 + }, + { + "epoch": 2.79, + "learning_rate": 1.07939742604988e-05, + "loss": 0.1641, + "step": 59755 + }, + { + "epoch": 2.79, + "learning_rate": 1.0793190475444016e-05, + "loss": 0.2485, + "step": 59760 + }, + { + "epoch": 2.79, + "learning_rate": 1.0792406690389228e-05, + "loss": 0.2066, + "step": 59765 + }, + { + "epoch": 2.79, + "learning_rate": 1.0791622905334442e-05, + "loss": 0.0445, + "step": 59770 + }, + { + "epoch": 2.79, + "learning_rate": 1.0790839120279654e-05, + "loss": 0.0544, + "step": 59775 + }, + { + "epoch": 2.79, + "learning_rate": 1.079005533522487e-05, + "loss": 0.0232, + "step": 59780 + }, + { + "epoch": 2.79, + "learning_rate": 1.0789271550170082e-05, + "loss": 0.0835, + "step": 59785 + }, + { + "epoch": 2.79, + "learning_rate": 1.0788487765115296e-05, + "loss": 0.1308, + "step": 59790 + }, + { + "epoch": 2.79, + "learning_rate": 1.0787703980060508e-05, + "loss": 0.0412, + "step": 59795 + }, + { + "epoch": 2.79, + "learning_rate": 1.0786920195005724e-05, + "loss": 0.2067, + "step": 59800 + }, + { + "epoch": 2.79, + "learning_rate": 1.0786136409950936e-05, + "loss": 0.1801, + "step": 59805 + }, + { + "epoch": 2.79, + "learning_rate": 1.078535262489615e-05, + "loss": 0.2654, + "step": 59810 + }, + { + "epoch": 2.79, + "learning_rate": 1.0784568839841362e-05, + "loss": 0.2404, + "step": 59815 + }, + { + "epoch": 2.79, + "learning_rate": 1.0783785054786574e-05, + "loss": 0.0479, + "step": 59820 + }, + { + "epoch": 2.79, + "learning_rate": 1.078300126973179e-05, + "loss": 0.0855, + "step": 59825 + }, + { + "epoch": 2.79, + "learning_rate": 1.0782217484677002e-05, + "loss": 0.0729, + "step": 59830 + }, + { + "epoch": 2.79, + "learning_rate": 1.0781433699622216e-05, + "loss": 0.0526, + "step": 59835 + }, + { + "epoch": 2.79, + "learning_rate": 1.078064991456743e-05, + "loss": 0.0608, + "step": 59840 + }, + { + "epoch": 2.79, + "learning_rate": 1.0779866129512644e-05, + "loss": 0.0754, + "step": 59845 + }, + { + "epoch": 2.79, + "learning_rate": 1.0779082344457856e-05, + "loss": 0.1885, + "step": 59850 + }, + { + "epoch": 2.79, + "learning_rate": 1.077829855940307e-05, + "loss": 0.1078, + "step": 59855 + }, + { + "epoch": 2.79, + "learning_rate": 1.0777514774348284e-05, + "loss": 0.2044, + "step": 59860 + }, + { + "epoch": 2.79, + "learning_rate": 1.0776730989293498e-05, + "loss": 0.3295, + "step": 59865 + }, + { + "epoch": 2.79, + "learning_rate": 1.077594720423871e-05, + "loss": 0.0507, + "step": 59870 + }, + { + "epoch": 2.79, + "learning_rate": 1.0775163419183926e-05, + "loss": 0.0369, + "step": 59875 + }, + { + "epoch": 2.79, + "learning_rate": 1.0774379634129138e-05, + "loss": 0.0298, + "step": 59880 + }, + { + "epoch": 2.79, + "learning_rate": 1.077359584907435e-05, + "loss": 0.0492, + "step": 59885 + }, + { + "epoch": 2.79, + "learning_rate": 1.0772812064019564e-05, + "loss": 0.0656, + "step": 59890 + }, + { + "epoch": 2.79, + "learning_rate": 1.0772028278964776e-05, + "loss": 0.0895, + "step": 59895 + }, + { + "epoch": 2.8, + "learning_rate": 1.0771244493909992e-05, + "loss": 0.1175, + "step": 59900 + }, + { + "epoch": 2.8, + "learning_rate": 1.0770460708855204e-05, + "loss": 0.1537, + "step": 59905 + }, + { + "epoch": 2.8, + "learning_rate": 1.0769676923800418e-05, + "loss": 0.3598, + "step": 59910 + }, + { + "epoch": 2.8, + "learning_rate": 1.076889313874563e-05, + "loss": 0.2923, + "step": 59915 + }, + { + "epoch": 2.8, + "learning_rate": 1.0768109353690846e-05, + "loss": 0.0608, + "step": 59920 + }, + { + "epoch": 2.8, + "learning_rate": 1.0767325568636058e-05, + "loss": 0.0519, + "step": 59925 + }, + { + "epoch": 2.8, + "learning_rate": 1.0766541783581272e-05, + "loss": 0.0307, + "step": 59930 + }, + { + "epoch": 2.8, + "learning_rate": 1.0765757998526484e-05, + "loss": 0.1519, + "step": 59935 + }, + { + "epoch": 2.8, + "learning_rate": 1.07649742134717e-05, + "loss": 0.0559, + "step": 59940 + }, + { + "epoch": 2.8, + "learning_rate": 1.0764190428416912e-05, + "loss": 0.1429, + "step": 59945 + }, + { + "epoch": 2.8, + "learning_rate": 1.0763406643362124e-05, + "loss": 0.085, + "step": 59950 + }, + { + "epoch": 2.8, + "learning_rate": 1.0762622858307338e-05, + "loss": 0.2346, + "step": 59955 + }, + { + "epoch": 2.8, + "learning_rate": 1.0761839073252552e-05, + "loss": 0.2619, + "step": 59960 + }, + { + "epoch": 2.8, + "learning_rate": 1.0761055288197766e-05, + "loss": 0.1942, + "step": 59965 + }, + { + "epoch": 2.8, + "learning_rate": 1.0760271503142978e-05, + "loss": 0.0377, + "step": 59970 + }, + { + "epoch": 2.8, + "learning_rate": 1.0759487718088194e-05, + "loss": 0.0325, + "step": 59975 + }, + { + "epoch": 2.8, + "learning_rate": 1.0758703933033406e-05, + "loss": 0.0534, + "step": 59980 + }, + { + "epoch": 2.8, + "learning_rate": 1.075792014797862e-05, + "loss": 0.0849, + "step": 59985 + }, + { + "epoch": 2.8, + "learning_rate": 1.0757136362923832e-05, + "loss": 0.0567, + "step": 59990 + }, + { + "epoch": 2.8, + "learning_rate": 1.0756352577869048e-05, + "loss": 0.0723, + "step": 59995 + }, + { + "epoch": 2.8, + "learning_rate": 1.075556879281426e-05, + "loss": 0.1649, + "step": 60000 + }, + { + "epoch": 2.8, + "learning_rate": 1.0754785007759474e-05, + "loss": 0.1065, + "step": 60005 + }, + { + "epoch": 2.8, + "learning_rate": 1.0754001222704686e-05, + "loss": 0.0981, + "step": 60010 + }, + { + "epoch": 2.8, + "learning_rate": 1.0753217437649898e-05, + "loss": 0.3571, + "step": 60015 + }, + { + "epoch": 2.8, + "learning_rate": 1.0752433652595114e-05, + "loss": 0.0691, + "step": 60020 + }, + { + "epoch": 2.8, + "learning_rate": 1.0751649867540326e-05, + "loss": 0.0567, + "step": 60025 + }, + { + "epoch": 2.8, + "learning_rate": 1.075086608248554e-05, + "loss": 0.0616, + "step": 60030 + }, + { + "epoch": 2.8, + "learning_rate": 1.0750082297430752e-05, + "loss": 0.0686, + "step": 60035 + }, + { + "epoch": 2.8, + "learning_rate": 1.0749298512375968e-05, + "loss": 0.0687, + "step": 60040 + }, + { + "epoch": 2.8, + "learning_rate": 1.074851472732118e-05, + "loss": 0.1136, + "step": 60045 + }, + { + "epoch": 2.8, + "learning_rate": 1.0747730942266394e-05, + "loss": 0.1254, + "step": 60050 + }, + { + "epoch": 2.8, + "learning_rate": 1.0746947157211606e-05, + "loss": 0.2474, + "step": 60055 + }, + { + "epoch": 2.8, + "learning_rate": 1.0746163372156822e-05, + "loss": 0.1883, + "step": 60060 + }, + { + "epoch": 2.8, + "learning_rate": 1.0745379587102034e-05, + "loss": 0.4343, + "step": 60065 + }, + { + "epoch": 2.8, + "learning_rate": 1.0744595802047248e-05, + "loss": 0.0974, + "step": 60070 + }, + { + "epoch": 2.8, + "learning_rate": 1.0743812016992462e-05, + "loss": 0.0394, + "step": 60075 + }, + { + "epoch": 2.8, + "learning_rate": 1.0743028231937674e-05, + "loss": 0.0907, + "step": 60080 + }, + { + "epoch": 2.8, + "learning_rate": 1.0742244446882888e-05, + "loss": 0.1173, + "step": 60085 + }, + { + "epoch": 2.8, + "learning_rate": 1.07414606618281e-05, + "loss": 0.096, + "step": 60090 + }, + { + "epoch": 2.8, + "learning_rate": 1.0740676876773316e-05, + "loss": 0.1801, + "step": 60095 + }, + { + "epoch": 2.8, + "learning_rate": 1.0739893091718528e-05, + "loss": 0.2232, + "step": 60100 + }, + { + "epoch": 2.8, + "learning_rate": 1.0739109306663742e-05, + "loss": 0.1869, + "step": 60105 + }, + { + "epoch": 2.8, + "learning_rate": 1.0738325521608954e-05, + "loss": 0.2526, + "step": 60110 + }, + { + "epoch": 2.81, + "learning_rate": 1.073754173655417e-05, + "loss": 0.1866, + "step": 60115 + }, + { + "epoch": 2.81, + "learning_rate": 1.0736757951499382e-05, + "loss": 0.088, + "step": 60120 + }, + { + "epoch": 2.81, + "learning_rate": 1.0735974166444596e-05, + "loss": 0.0488, + "step": 60125 + }, + { + "epoch": 2.81, + "learning_rate": 1.0735190381389808e-05, + "loss": 0.0262, + "step": 60130 + }, + { + "epoch": 2.81, + "learning_rate": 1.0734406596335023e-05, + "loss": 0.0383, + "step": 60135 + }, + { + "epoch": 2.81, + "learning_rate": 1.0733622811280236e-05, + "loss": 0.1139, + "step": 60140 + }, + { + "epoch": 2.81, + "learning_rate": 1.0732839026225448e-05, + "loss": 0.0991, + "step": 60145 + }, + { + "epoch": 2.81, + "learning_rate": 1.0732055241170662e-05, + "loss": 0.08, + "step": 60150 + }, + { + "epoch": 2.81, + "learning_rate": 1.0731271456115876e-05, + "loss": 0.1972, + "step": 60155 + }, + { + "epoch": 2.81, + "learning_rate": 1.073048767106109e-05, + "loss": 0.157, + "step": 60160 + }, + { + "epoch": 2.81, + "learning_rate": 1.0729703886006302e-05, + "loss": 0.3585, + "step": 60165 + }, + { + "epoch": 2.81, + "learning_rate": 1.0728920100951516e-05, + "loss": 0.0269, + "step": 60170 + }, + { + "epoch": 2.81, + "learning_rate": 1.072813631589673e-05, + "loss": 0.0937, + "step": 60175 + }, + { + "epoch": 2.81, + "learning_rate": 1.0727352530841943e-05, + "loss": 0.0532, + "step": 60180 + }, + { + "epoch": 2.81, + "learning_rate": 1.0726568745787156e-05, + "loss": 0.065, + "step": 60185 + }, + { + "epoch": 2.81, + "learning_rate": 1.0725784960732371e-05, + "loss": 0.1078, + "step": 60190 + }, + { + "epoch": 2.81, + "learning_rate": 1.0725001175677584e-05, + "loss": 0.0757, + "step": 60195 + }, + { + "epoch": 2.81, + "learning_rate": 1.0724217390622797e-05, + "loss": 0.1246, + "step": 60200 + }, + { + "epoch": 2.81, + "learning_rate": 1.072343360556801e-05, + "loss": 0.1834, + "step": 60205 + }, + { + "epoch": 2.81, + "learning_rate": 1.0722649820513222e-05, + "loss": 0.3636, + "step": 60210 + }, + { + "epoch": 2.81, + "learning_rate": 1.0721866035458437e-05, + "loss": 0.2649, + "step": 60215 + }, + { + "epoch": 2.81, + "learning_rate": 1.072108225040365e-05, + "loss": 0.0994, + "step": 60220 + }, + { + "epoch": 2.81, + "learning_rate": 1.0720298465348864e-05, + "loss": 0.0371, + "step": 60225 + }, + { + "epoch": 2.81, + "learning_rate": 1.0719514680294076e-05, + "loss": 0.0265, + "step": 60230 + }, + { + "epoch": 2.81, + "learning_rate": 1.0718730895239291e-05, + "loss": 0.0794, + "step": 60235 + }, + { + "epoch": 2.81, + "learning_rate": 1.0717947110184504e-05, + "loss": 0.0882, + "step": 60240 + }, + { + "epoch": 2.81, + "learning_rate": 1.0717163325129717e-05, + "loss": 0.0821, + "step": 60245 + }, + { + "epoch": 2.81, + "learning_rate": 1.071637954007493e-05, + "loss": 0.0938, + "step": 60250 + }, + { + "epoch": 2.81, + "learning_rate": 1.0715595755020145e-05, + "loss": 0.0977, + "step": 60255 + }, + { + "epoch": 2.81, + "learning_rate": 1.0714811969965358e-05, + "loss": 0.165, + "step": 60260 + }, + { + "epoch": 2.81, + "learning_rate": 1.0714028184910571e-05, + "loss": 0.3129, + "step": 60265 + }, + { + "epoch": 2.81, + "learning_rate": 1.0713244399855784e-05, + "loss": 0.0767, + "step": 60270 + }, + { + "epoch": 2.81, + "learning_rate": 1.0712460614800998e-05, + "loss": 0.0686, + "step": 60275 + }, + { + "epoch": 2.81, + "learning_rate": 1.0711676829746211e-05, + "loss": 0.0351, + "step": 60280 + }, + { + "epoch": 2.81, + "learning_rate": 1.0710893044691424e-05, + "loss": 0.0881, + "step": 60285 + }, + { + "epoch": 2.81, + "learning_rate": 1.071010925963664e-05, + "loss": 0.0737, + "step": 60290 + }, + { + "epoch": 2.81, + "learning_rate": 1.0709325474581851e-05, + "loss": 0.125, + "step": 60295 + }, + { + "epoch": 2.81, + "learning_rate": 1.0708541689527065e-05, + "loss": 0.1237, + "step": 60300 + }, + { + "epoch": 2.81, + "learning_rate": 1.0707757904472278e-05, + "loss": 0.1589, + "step": 60305 + }, + { + "epoch": 2.81, + "learning_rate": 1.0706974119417493e-05, + "loss": 0.1822, + "step": 60310 + }, + { + "epoch": 2.81, + "learning_rate": 1.0706190334362705e-05, + "loss": 0.2297, + "step": 60315 + }, + { + "epoch": 2.81, + "learning_rate": 1.070540654930792e-05, + "loss": 0.0529, + "step": 60320 + }, + { + "epoch": 2.81, + "learning_rate": 1.0704622764253132e-05, + "loss": 0.0159, + "step": 60325 + }, + { + "epoch": 2.82, + "learning_rate": 1.0703838979198347e-05, + "loss": 0.0392, + "step": 60330 + }, + { + "epoch": 2.82, + "learning_rate": 1.070305519414356e-05, + "loss": 0.0952, + "step": 60335 + }, + { + "epoch": 2.82, + "learning_rate": 1.0702271409088772e-05, + "loss": 0.1019, + "step": 60340 + }, + { + "epoch": 2.82, + "learning_rate": 1.0701487624033985e-05, + "loss": 0.1498, + "step": 60345 + }, + { + "epoch": 2.82, + "learning_rate": 1.0700703838979198e-05, + "loss": 0.1059, + "step": 60350 + }, + { + "epoch": 2.82, + "learning_rate": 1.0699920053924413e-05, + "loss": 0.1739, + "step": 60355 + }, + { + "epoch": 2.82, + "learning_rate": 1.0699136268869625e-05, + "loss": 0.2294, + "step": 60360 + }, + { + "epoch": 2.82, + "learning_rate": 1.069835248381484e-05, + "loss": 0.3432, + "step": 60365 + }, + { + "epoch": 2.82, + "learning_rate": 1.0697568698760053e-05, + "loss": 0.0413, + "step": 60370 + }, + { + "epoch": 2.82, + "learning_rate": 1.0696784913705267e-05, + "loss": 0.0329, + "step": 60375 + }, + { + "epoch": 2.82, + "learning_rate": 1.069600112865048e-05, + "loss": 0.0649, + "step": 60380 + }, + { + "epoch": 2.82, + "learning_rate": 1.0695217343595693e-05, + "loss": 0.0388, + "step": 60385 + }, + { + "epoch": 2.82, + "learning_rate": 1.0694433558540907e-05, + "loss": 0.1039, + "step": 60390 + }, + { + "epoch": 2.82, + "learning_rate": 1.0693649773486121e-05, + "loss": 0.0985, + "step": 60395 + }, + { + "epoch": 2.82, + "learning_rate": 1.0692865988431333e-05, + "loss": 0.152, + "step": 60400 + }, + { + "epoch": 2.82, + "learning_rate": 1.0692082203376546e-05, + "loss": 0.1796, + "step": 60405 + }, + { + "epoch": 2.82, + "learning_rate": 1.0691298418321761e-05, + "loss": 0.1797, + "step": 60410 + }, + { + "epoch": 2.82, + "learning_rate": 1.0690514633266973e-05, + "loss": 0.2262, + "step": 60415 + }, + { + "epoch": 2.82, + "learning_rate": 1.0689730848212187e-05, + "loss": 0.0712, + "step": 60420 + }, + { + "epoch": 2.82, + "learning_rate": 1.06889470631574e-05, + "loss": 0.0351, + "step": 60425 + }, + { + "epoch": 2.82, + "learning_rate": 1.0688163278102615e-05, + "loss": 0.0411, + "step": 60430 + }, + { + "epoch": 2.82, + "learning_rate": 1.0687379493047827e-05, + "loss": 0.0386, + "step": 60435 + }, + { + "epoch": 2.82, + "learning_rate": 1.0686595707993041e-05, + "loss": 0.0963, + "step": 60440 + }, + { + "epoch": 2.82, + "learning_rate": 1.0685811922938253e-05, + "loss": 0.1084, + "step": 60445 + }, + { + "epoch": 2.82, + "learning_rate": 1.0685028137883469e-05, + "loss": 0.1291, + "step": 60450 + }, + { + "epoch": 2.82, + "learning_rate": 1.0684244352828681e-05, + "loss": 0.2619, + "step": 60455 + }, + { + "epoch": 2.82, + "learning_rate": 1.0683460567773895e-05, + "loss": 0.2443, + "step": 60460 + }, + { + "epoch": 2.82, + "learning_rate": 1.0682676782719107e-05, + "loss": 0.2697, + "step": 60465 + }, + { + "epoch": 2.82, + "learning_rate": 1.0681892997664321e-05, + "loss": 0.0326, + "step": 60470 + }, + { + "epoch": 2.82, + "learning_rate": 1.0681109212609535e-05, + "loss": 0.0153, + "step": 60475 + }, + { + "epoch": 2.82, + "learning_rate": 1.0680325427554747e-05, + "loss": 0.0179, + "step": 60480 + }, + { + "epoch": 2.82, + "learning_rate": 1.0679541642499961e-05, + "loss": 0.036, + "step": 60485 + }, + { + "epoch": 2.82, + "learning_rate": 1.0678757857445175e-05, + "loss": 0.057, + "step": 60490 + }, + { + "epoch": 2.82, + "learning_rate": 1.0677974072390389e-05, + "loss": 0.0842, + "step": 60495 + }, + { + "epoch": 2.82, + "learning_rate": 1.0677190287335601e-05, + "loss": 0.1821, + "step": 60500 + }, + { + "epoch": 2.82, + "learning_rate": 1.0676406502280817e-05, + "loss": 0.1145, + "step": 60505 + }, + { + "epoch": 2.82, + "learning_rate": 1.0675622717226029e-05, + "loss": 0.1985, + "step": 60510 + }, + { + "epoch": 2.82, + "learning_rate": 1.0674838932171243e-05, + "loss": 0.202, + "step": 60515 + }, + { + "epoch": 2.82, + "learning_rate": 1.0674055147116455e-05, + "loss": 0.0571, + "step": 60520 + }, + { + "epoch": 2.82, + "learning_rate": 1.067327136206167e-05, + "loss": 0.046, + "step": 60525 + }, + { + "epoch": 2.82, + "learning_rate": 1.0672487577006883e-05, + "loss": 0.0769, + "step": 60530 + }, + { + "epoch": 2.82, + "learning_rate": 1.0671703791952095e-05, + "loss": 0.0421, + "step": 60535 + }, + { + "epoch": 2.82, + "learning_rate": 1.0670920006897309e-05, + "loss": 0.121, + "step": 60540 + }, + { + "epoch": 2.83, + "learning_rate": 1.0670136221842521e-05, + "loss": 0.099, + "step": 60545 + }, + { + "epoch": 2.83, + "learning_rate": 1.0669352436787737e-05, + "loss": 0.1578, + "step": 60550 + }, + { + "epoch": 2.83, + "learning_rate": 1.066856865173295e-05, + "loss": 0.1956, + "step": 60555 + }, + { + "epoch": 2.83, + "learning_rate": 1.0667784866678163e-05, + "loss": 0.4068, + "step": 60560 + }, + { + "epoch": 2.83, + "learning_rate": 1.0667001081623375e-05, + "loss": 0.3239, + "step": 60565 + }, + { + "epoch": 2.83, + "learning_rate": 1.0666217296568591e-05, + "loss": 0.0318, + "step": 60570 + }, + { + "epoch": 2.83, + "learning_rate": 1.0665433511513803e-05, + "loss": 0.0266, + "step": 60575 + }, + { + "epoch": 2.83, + "learning_rate": 1.0664649726459017e-05, + "loss": 0.0429, + "step": 60580 + }, + { + "epoch": 2.83, + "learning_rate": 1.066386594140423e-05, + "loss": 0.0474, + "step": 60585 + }, + { + "epoch": 2.83, + "learning_rate": 1.0663082156349445e-05, + "loss": 0.0555, + "step": 60590 + }, + { + "epoch": 2.83, + "learning_rate": 1.0662298371294657e-05, + "loss": 0.0826, + "step": 60595 + }, + { + "epoch": 2.83, + "learning_rate": 1.066151458623987e-05, + "loss": 0.1561, + "step": 60600 + }, + { + "epoch": 2.83, + "learning_rate": 1.0660730801185085e-05, + "loss": 0.1313, + "step": 60605 + }, + { + "epoch": 2.83, + "learning_rate": 1.0659947016130297e-05, + "loss": 0.2536, + "step": 60610 + }, + { + "epoch": 2.83, + "learning_rate": 1.0659163231075511e-05, + "loss": 0.2586, + "step": 60615 + }, + { + "epoch": 2.83, + "learning_rate": 1.0658379446020723e-05, + "loss": 0.0328, + "step": 60620 + }, + { + "epoch": 2.83, + "learning_rate": 1.0657595660965939e-05, + "loss": 0.0261, + "step": 60625 + }, + { + "epoch": 2.83, + "learning_rate": 1.0656811875911151e-05, + "loss": 0.0413, + "step": 60630 + }, + { + "epoch": 2.83, + "learning_rate": 1.0656028090856365e-05, + "loss": 0.0651, + "step": 60635 + }, + { + "epoch": 2.83, + "learning_rate": 1.0655244305801577e-05, + "loss": 0.1421, + "step": 60640 + }, + { + "epoch": 2.83, + "learning_rate": 1.0654460520746793e-05, + "loss": 0.1075, + "step": 60645 + }, + { + "epoch": 2.83, + "learning_rate": 1.0653676735692005e-05, + "loss": 0.1013, + "step": 60650 + }, + { + "epoch": 2.83, + "learning_rate": 1.0652892950637219e-05, + "loss": 0.2221, + "step": 60655 + }, + { + "epoch": 2.83, + "learning_rate": 1.0652109165582431e-05, + "loss": 0.1437, + "step": 60660 + }, + { + "epoch": 2.83, + "learning_rate": 1.0651325380527643e-05, + "loss": 0.2729, + "step": 60665 + }, + { + "epoch": 2.83, + "learning_rate": 1.0650541595472859e-05, + "loss": 0.0612, + "step": 60670 + }, + { + "epoch": 2.83, + "learning_rate": 1.0649757810418071e-05, + "loss": 0.0464, + "step": 60675 + }, + { + "epoch": 2.83, + "learning_rate": 1.0648974025363285e-05, + "loss": 0.03, + "step": 60680 + }, + { + "epoch": 2.83, + "learning_rate": 1.0648190240308499e-05, + "loss": 0.0097, + "step": 60685 + }, + { + "epoch": 2.83, + "learning_rate": 1.0647406455253713e-05, + "loss": 0.1497, + "step": 60690 + }, + { + "epoch": 2.83, + "learning_rate": 1.0646622670198925e-05, + "loss": 0.0659, + "step": 60695 + }, + { + "epoch": 2.83, + "learning_rate": 1.0645838885144139e-05, + "loss": 0.1278, + "step": 60700 + }, + { + "epoch": 2.83, + "learning_rate": 1.0645055100089353e-05, + "loss": 0.1808, + "step": 60705 + }, + { + "epoch": 2.83, + "learning_rate": 1.0644271315034567e-05, + "loss": 0.3674, + "step": 60710 + }, + { + "epoch": 2.83, + "learning_rate": 1.0643487529979779e-05, + "loss": 0.395, + "step": 60715 + }, + { + "epoch": 2.83, + "learning_rate": 1.0642703744924994e-05, + "loss": 0.0519, + "step": 60720 + }, + { + "epoch": 2.83, + "learning_rate": 1.0641919959870207e-05, + "loss": 0.0121, + "step": 60725 + }, + { + "epoch": 2.83, + "learning_rate": 1.0641136174815419e-05, + "loss": 0.0334, + "step": 60730 + }, + { + "epoch": 2.83, + "learning_rate": 1.0640352389760633e-05, + "loss": 0.0302, + "step": 60735 + }, + { + "epoch": 2.83, + "learning_rate": 1.0639568604705845e-05, + "loss": 0.2201, + "step": 60740 + }, + { + "epoch": 2.83, + "learning_rate": 1.063878481965106e-05, + "loss": 0.1408, + "step": 60745 + }, + { + "epoch": 2.83, + "learning_rate": 1.0638001034596273e-05, + "loss": 0.1301, + "step": 60750 + }, + { + "epoch": 2.83, + "learning_rate": 1.0637217249541487e-05, + "loss": 0.1961, + "step": 60755 + }, + { + "epoch": 2.84, + "learning_rate": 1.0636433464486699e-05, + "loss": 0.1496, + "step": 60760 + }, + { + "epoch": 2.84, + "learning_rate": 1.0635649679431915e-05, + "loss": 0.2515, + "step": 60765 + }, + { + "epoch": 2.84, + "learning_rate": 1.0634865894377127e-05, + "loss": 0.0856, + "step": 60770 + }, + { + "epoch": 2.84, + "learning_rate": 1.063408210932234e-05, + "loss": 0.0536, + "step": 60775 + }, + { + "epoch": 2.84, + "learning_rate": 1.0633298324267553e-05, + "loss": 0.0576, + "step": 60780 + }, + { + "epoch": 2.84, + "learning_rate": 1.0632514539212768e-05, + "loss": 0.0704, + "step": 60785 + }, + { + "epoch": 2.84, + "learning_rate": 1.063173075415798e-05, + "loss": 0.068, + "step": 60790 + }, + { + "epoch": 2.84, + "learning_rate": 1.0630946969103193e-05, + "loss": 0.1093, + "step": 60795 + }, + { + "epoch": 2.84, + "learning_rate": 1.0630163184048407e-05, + "loss": 0.1841, + "step": 60800 + }, + { + "epoch": 2.84, + "learning_rate": 1.062937939899362e-05, + "loss": 0.1482, + "step": 60805 + }, + { + "epoch": 2.84, + "learning_rate": 1.0628595613938835e-05, + "loss": 0.1913, + "step": 60810 + }, + { + "epoch": 2.84, + "learning_rate": 1.0627811828884047e-05, + "loss": 0.2599, + "step": 60815 + }, + { + "epoch": 2.84, + "learning_rate": 1.0627028043829262e-05, + "loss": 0.0773, + "step": 60820 + }, + { + "epoch": 2.84, + "learning_rate": 1.0626244258774475e-05, + "loss": 0.0442, + "step": 60825 + }, + { + "epoch": 2.84, + "learning_rate": 1.0625460473719689e-05, + "loss": 0.0988, + "step": 60830 + }, + { + "epoch": 2.84, + "learning_rate": 1.06246766886649e-05, + "loss": 0.1, + "step": 60835 + }, + { + "epoch": 2.84, + "learning_rate": 1.0623892903610116e-05, + "loss": 0.0715, + "step": 60840 + }, + { + "epoch": 2.84, + "learning_rate": 1.0623109118555329e-05, + "loss": 0.0782, + "step": 60845 + }, + { + "epoch": 2.84, + "learning_rate": 1.0622325333500542e-05, + "loss": 0.1205, + "step": 60850 + }, + { + "epoch": 2.84, + "learning_rate": 1.0621541548445755e-05, + "loss": 0.1423, + "step": 60855 + }, + { + "epoch": 2.84, + "learning_rate": 1.0620757763390967e-05, + "loss": 0.2899, + "step": 60860 + }, + { + "epoch": 2.84, + "learning_rate": 1.0619973978336183e-05, + "loss": 0.2523, + "step": 60865 + }, + { + "epoch": 2.84, + "learning_rate": 1.0619190193281395e-05, + "loss": 0.0619, + "step": 60870 + }, + { + "epoch": 2.84, + "learning_rate": 1.0618406408226609e-05, + "loss": 0.044, + "step": 60875 + }, + { + "epoch": 2.84, + "learning_rate": 1.0617622623171821e-05, + "loss": 0.0412, + "step": 60880 + }, + { + "epoch": 2.84, + "learning_rate": 1.0616838838117036e-05, + "loss": 0.0697, + "step": 60885 + }, + { + "epoch": 2.84, + "learning_rate": 1.0616055053062249e-05, + "loss": 0.0875, + "step": 60890 + }, + { + "epoch": 2.84, + "learning_rate": 1.0615271268007463e-05, + "loss": 0.1086, + "step": 60895 + }, + { + "epoch": 2.84, + "learning_rate": 1.0614487482952675e-05, + "loss": 0.1613, + "step": 60900 + }, + { + "epoch": 2.84, + "learning_rate": 1.061370369789789e-05, + "loss": 0.1262, + "step": 60905 + }, + { + "epoch": 2.84, + "learning_rate": 1.0612919912843103e-05, + "loss": 0.181, + "step": 60910 + }, + { + "epoch": 2.84, + "learning_rate": 1.0612136127788316e-05, + "loss": 0.2297, + "step": 60915 + }, + { + "epoch": 2.84, + "learning_rate": 1.061135234273353e-05, + "loss": 0.0381, + "step": 60920 + }, + { + "epoch": 2.84, + "learning_rate": 1.0610568557678743e-05, + "loss": 0.0273, + "step": 60925 + }, + { + "epoch": 2.84, + "learning_rate": 1.0609784772623957e-05, + "loss": 0.0332, + "step": 60930 + }, + { + "epoch": 2.84, + "learning_rate": 1.0609000987569169e-05, + "loss": 0.0649, + "step": 60935 + }, + { + "epoch": 2.84, + "learning_rate": 1.0608217202514384e-05, + "loss": 0.0916, + "step": 60940 + }, + { + "epoch": 2.84, + "learning_rate": 1.0607433417459597e-05, + "loss": 0.0944, + "step": 60945 + }, + { + "epoch": 2.84, + "learning_rate": 1.060664963240481e-05, + "loss": 0.1064, + "step": 60950 + }, + { + "epoch": 2.84, + "learning_rate": 1.0605865847350023e-05, + "loss": 0.1578, + "step": 60955 + }, + { + "epoch": 2.84, + "learning_rate": 1.0605082062295238e-05, + "loss": 0.1276, + "step": 60960 + }, + { + "epoch": 2.84, + "learning_rate": 1.060429827724045e-05, + "loss": 0.3312, + "step": 60965 + }, + { + "epoch": 2.84, + "learning_rate": 1.0603514492185664e-05, + "loss": 0.0179, + "step": 60970 + }, + { + "epoch": 2.85, + "learning_rate": 1.0602730707130877e-05, + "loss": 0.062, + "step": 60975 + }, + { + "epoch": 2.85, + "learning_rate": 1.0601946922076092e-05, + "loss": 0.0659, + "step": 60980 + }, + { + "epoch": 2.85, + "learning_rate": 1.0601163137021304e-05, + "loss": 0.1297, + "step": 60985 + }, + { + "epoch": 2.85, + "learning_rate": 1.0600379351966517e-05, + "loss": 0.0375, + "step": 60990 + }, + { + "epoch": 2.85, + "learning_rate": 1.059959556691173e-05, + "loss": 0.118, + "step": 60995 + }, + { + "epoch": 2.85, + "learning_rate": 1.0598811781856944e-05, + "loss": 0.0791, + "step": 61000 + }, + { + "epoch": 2.85, + "learning_rate": 1.0598027996802158e-05, + "loss": 0.1074, + "step": 61005 + }, + { + "epoch": 2.85, + "learning_rate": 1.059724421174737e-05, + "loss": 0.2143, + "step": 61010 + }, + { + "epoch": 2.85, + "learning_rate": 1.0596460426692584e-05, + "loss": 0.2168, + "step": 61015 + }, + { + "epoch": 2.85, + "learning_rate": 1.0595676641637798e-05, + "loss": 0.07, + "step": 61020 + }, + { + "epoch": 2.85, + "learning_rate": 1.0594892856583012e-05, + "loss": 0.0378, + "step": 61025 + }, + { + "epoch": 2.85, + "learning_rate": 1.0594109071528224e-05, + "loss": 0.0538, + "step": 61030 + }, + { + "epoch": 2.85, + "learning_rate": 1.059332528647344e-05, + "loss": 0.058, + "step": 61035 + }, + { + "epoch": 2.85, + "learning_rate": 1.0592541501418652e-05, + "loss": 0.1057, + "step": 61040 + }, + { + "epoch": 2.85, + "learning_rate": 1.0591757716363866e-05, + "loss": 0.114, + "step": 61045 + }, + { + "epoch": 2.85, + "learning_rate": 1.0590973931309078e-05, + "loss": 0.1695, + "step": 61050 + }, + { + "epoch": 2.85, + "learning_rate": 1.059019014625429e-05, + "loss": 0.171, + "step": 61055 + }, + { + "epoch": 2.85, + "learning_rate": 1.0589406361199506e-05, + "loss": 0.3068, + "step": 61060 + }, + { + "epoch": 2.85, + "learning_rate": 1.0588622576144718e-05, + "loss": 0.2925, + "step": 61065 + }, + { + "epoch": 2.85, + "learning_rate": 1.0587838791089932e-05, + "loss": 0.0337, + "step": 61070 + }, + { + "epoch": 2.85, + "learning_rate": 1.0587055006035145e-05, + "loss": 0.0352, + "step": 61075 + }, + { + "epoch": 2.85, + "learning_rate": 1.058627122098036e-05, + "loss": 0.0502, + "step": 61080 + }, + { + "epoch": 2.85, + "learning_rate": 1.0585487435925572e-05, + "loss": 0.0615, + "step": 61085 + }, + { + "epoch": 2.85, + "learning_rate": 1.0584703650870786e-05, + "loss": 0.1042, + "step": 61090 + }, + { + "epoch": 2.85, + "learning_rate": 1.0583919865815998e-05, + "loss": 0.1052, + "step": 61095 + }, + { + "epoch": 2.85, + "learning_rate": 1.0583136080761214e-05, + "loss": 0.1772, + "step": 61100 + }, + { + "epoch": 2.85, + "learning_rate": 1.0582352295706426e-05, + "loss": 0.208, + "step": 61105 + }, + { + "epoch": 2.85, + "learning_rate": 1.058156851065164e-05, + "loss": 0.2459, + "step": 61110 + }, + { + "epoch": 2.85, + "learning_rate": 1.0580784725596852e-05, + "loss": 0.432, + "step": 61115 + }, + { + "epoch": 2.85, + "learning_rate": 1.0580000940542066e-05, + "loss": 0.064, + "step": 61120 + }, + { + "epoch": 2.85, + "learning_rate": 1.057921715548728e-05, + "loss": 0.0289, + "step": 61125 + }, + { + "epoch": 2.85, + "learning_rate": 1.0578433370432492e-05, + "loss": 0.0404, + "step": 61130 + }, + { + "epoch": 2.85, + "learning_rate": 1.0577649585377708e-05, + "loss": 0.0537, + "step": 61135 + }, + { + "epoch": 2.85, + "learning_rate": 1.057686580032292e-05, + "loss": 0.0917, + "step": 61140 + }, + { + "epoch": 2.85, + "learning_rate": 1.0576082015268134e-05, + "loss": 0.0957, + "step": 61145 + }, + { + "epoch": 2.85, + "learning_rate": 1.0575298230213346e-05, + "loss": 0.0822, + "step": 61150 + }, + { + "epoch": 2.85, + "learning_rate": 1.0574514445158562e-05, + "loss": 0.1236, + "step": 61155 + }, + { + "epoch": 2.85, + "learning_rate": 1.0573730660103774e-05, + "loss": 0.1939, + "step": 61160 + }, + { + "epoch": 2.85, + "learning_rate": 1.0572946875048988e-05, + "loss": 0.1853, + "step": 61165 + }, + { + "epoch": 2.85, + "learning_rate": 1.05721630899942e-05, + "loss": 0.028, + "step": 61170 + }, + { + "epoch": 2.85, + "learning_rate": 1.0571379304939416e-05, + "loss": 0.0427, + "step": 61175 + }, + { + "epoch": 2.85, + "learning_rate": 1.0570595519884628e-05, + "loss": 0.0422, + "step": 61180 + }, + { + "epoch": 2.85, + "learning_rate": 1.056981173482984e-05, + "loss": 0.0487, + "step": 61185 + }, + { + "epoch": 2.86, + "learning_rate": 1.0569027949775054e-05, + "loss": 0.0449, + "step": 61190 + }, + { + "epoch": 2.86, + "learning_rate": 1.0568244164720266e-05, + "loss": 0.1407, + "step": 61195 + }, + { + "epoch": 2.86, + "learning_rate": 1.0567460379665482e-05, + "loss": 0.1246, + "step": 61200 + }, + { + "epoch": 2.86, + "learning_rate": 1.0566676594610694e-05, + "loss": 0.085, + "step": 61205 + }, + { + "epoch": 2.86, + "learning_rate": 1.0565892809555908e-05, + "loss": 0.2521, + "step": 61210 + }, + { + "epoch": 2.86, + "learning_rate": 1.056510902450112e-05, + "loss": 0.1783, + "step": 61215 + }, + { + "epoch": 2.86, + "learning_rate": 1.0564325239446336e-05, + "loss": 0.1145, + "step": 61220 + }, + { + "epoch": 2.86, + "learning_rate": 1.0563541454391548e-05, + "loss": 0.03, + "step": 61225 + }, + { + "epoch": 2.86, + "learning_rate": 1.0562757669336762e-05, + "loss": 0.0302, + "step": 61230 + }, + { + "epoch": 2.86, + "learning_rate": 1.0561973884281976e-05, + "loss": 0.0758, + "step": 61235 + }, + { + "epoch": 2.86, + "learning_rate": 1.056119009922719e-05, + "loss": 0.0925, + "step": 61240 + }, + { + "epoch": 2.86, + "learning_rate": 1.0560406314172402e-05, + "loss": 0.0622, + "step": 61245 + }, + { + "epoch": 2.86, + "learning_rate": 1.0559622529117614e-05, + "loss": 0.0614, + "step": 61250 + }, + { + "epoch": 2.86, + "learning_rate": 1.055883874406283e-05, + "loss": 0.1648, + "step": 61255 + }, + { + "epoch": 2.86, + "learning_rate": 1.0558054959008042e-05, + "loss": 0.2341, + "step": 61260 + }, + { + "epoch": 2.86, + "learning_rate": 1.0557271173953256e-05, + "loss": 0.269, + "step": 61265 + }, + { + "epoch": 2.86, + "learning_rate": 1.0556487388898468e-05, + "loss": 0.0684, + "step": 61270 + }, + { + "epoch": 2.86, + "learning_rate": 1.0555703603843684e-05, + "loss": 0.107, + "step": 61275 + }, + { + "epoch": 2.86, + "learning_rate": 1.0554919818788896e-05, + "loss": 0.0388, + "step": 61280 + }, + { + "epoch": 2.86, + "learning_rate": 1.055413603373411e-05, + "loss": 0.0633, + "step": 61285 + }, + { + "epoch": 2.86, + "learning_rate": 1.0553352248679322e-05, + "loss": 0.1063, + "step": 61290 + }, + { + "epoch": 2.86, + "learning_rate": 1.0552568463624538e-05, + "loss": 0.1456, + "step": 61295 + }, + { + "epoch": 2.86, + "learning_rate": 1.055178467856975e-05, + "loss": 0.1461, + "step": 61300 + }, + { + "epoch": 2.86, + "learning_rate": 1.0551000893514964e-05, + "loss": 0.1618, + "step": 61305 + }, + { + "epoch": 2.86, + "learning_rate": 1.0550217108460176e-05, + "loss": 0.2129, + "step": 61310 + }, + { + "epoch": 2.86, + "learning_rate": 1.054943332340539e-05, + "loss": 0.2407, + "step": 61315 + }, + { + "epoch": 2.86, + "learning_rate": 1.0548649538350604e-05, + "loss": 0.0771, + "step": 61320 + }, + { + "epoch": 2.86, + "learning_rate": 1.0547865753295816e-05, + "loss": 0.0186, + "step": 61325 + }, + { + "epoch": 2.86, + "learning_rate": 1.054708196824103e-05, + "loss": 0.0309, + "step": 61330 + }, + { + "epoch": 2.86, + "learning_rate": 1.0546298183186244e-05, + "loss": 0.1447, + "step": 61335 + }, + { + "epoch": 2.86, + "learning_rate": 1.0545514398131458e-05, + "loss": 0.0258, + "step": 61340 + }, + { + "epoch": 2.86, + "learning_rate": 1.054473061307667e-05, + "loss": 0.1747, + "step": 61345 + }, + { + "epoch": 2.86, + "learning_rate": 1.0543946828021886e-05, + "loss": 0.1391, + "step": 61350 + }, + { + "epoch": 2.86, + "learning_rate": 1.0543163042967098e-05, + "loss": 0.217, + "step": 61355 + }, + { + "epoch": 2.86, + "learning_rate": 1.0542379257912312e-05, + "loss": 0.2874, + "step": 61360 + }, + { + "epoch": 2.86, + "learning_rate": 1.0541595472857524e-05, + "loss": 0.2921, + "step": 61365 + }, + { + "epoch": 2.86, + "learning_rate": 1.054081168780274e-05, + "loss": 0.0604, + "step": 61370 + }, + { + "epoch": 2.86, + "learning_rate": 1.0540027902747952e-05, + "loss": 0.0352, + "step": 61375 + }, + { + "epoch": 2.86, + "learning_rate": 1.0539244117693164e-05, + "loss": 0.0296, + "step": 61380 + }, + { + "epoch": 2.86, + "learning_rate": 1.0538460332638378e-05, + "loss": 0.0616, + "step": 61385 + }, + { + "epoch": 2.86, + "learning_rate": 1.053767654758359e-05, + "loss": 0.094, + "step": 61390 + }, + { + "epoch": 2.86, + "learning_rate": 1.0536892762528806e-05, + "loss": 0.1211, + "step": 61395 + }, + { + "epoch": 2.87, + "learning_rate": 1.0536108977474018e-05, + "loss": 0.104, + "step": 61400 + }, + { + "epoch": 2.87, + "learning_rate": 1.0535325192419232e-05, + "loss": 0.154, + "step": 61405 + }, + { + "epoch": 2.87, + "learning_rate": 1.0534541407364444e-05, + "loss": 0.1931, + "step": 61410 + }, + { + "epoch": 2.87, + "learning_rate": 1.053375762230966e-05, + "loss": 0.1805, + "step": 61415 + }, + { + "epoch": 2.87, + "learning_rate": 1.0532973837254872e-05, + "loss": 0.0324, + "step": 61420 + }, + { + "epoch": 2.87, + "learning_rate": 1.0532190052200086e-05, + "loss": 0.0607, + "step": 61425 + }, + { + "epoch": 2.87, + "learning_rate": 1.0531406267145298e-05, + "loss": 0.0146, + "step": 61430 + }, + { + "epoch": 2.87, + "learning_rate": 1.0530622482090514e-05, + "loss": 0.0607, + "step": 61435 + }, + { + "epoch": 2.87, + "learning_rate": 1.0529838697035726e-05, + "loss": 0.0477, + "step": 61440 + }, + { + "epoch": 2.87, + "learning_rate": 1.0529054911980938e-05, + "loss": 0.0923, + "step": 61445 + }, + { + "epoch": 2.87, + "learning_rate": 1.0528271126926154e-05, + "loss": 0.0744, + "step": 61450 + }, + { + "epoch": 2.87, + "learning_rate": 1.0527487341871366e-05, + "loss": 0.1322, + "step": 61455 + }, + { + "epoch": 2.87, + "learning_rate": 1.052670355681658e-05, + "loss": 0.2138, + "step": 61460 + }, + { + "epoch": 2.87, + "learning_rate": 1.0525919771761792e-05, + "loss": 0.2974, + "step": 61465 + }, + { + "epoch": 2.87, + "learning_rate": 1.0525135986707008e-05, + "loss": 0.0482, + "step": 61470 + }, + { + "epoch": 2.87, + "learning_rate": 1.052435220165222e-05, + "loss": 0.0448, + "step": 61475 + }, + { + "epoch": 2.87, + "learning_rate": 1.0523568416597434e-05, + "loss": 0.05, + "step": 61480 + }, + { + "epoch": 2.87, + "learning_rate": 1.0522784631542646e-05, + "loss": 0.0529, + "step": 61485 + }, + { + "epoch": 2.87, + "learning_rate": 1.0522000846487861e-05, + "loss": 0.1533, + "step": 61490 + }, + { + "epoch": 2.87, + "learning_rate": 1.0521217061433074e-05, + "loss": 0.1484, + "step": 61495 + }, + { + "epoch": 2.87, + "learning_rate": 1.0520433276378288e-05, + "loss": 0.1626, + "step": 61500 + }, + { + "epoch": 2.87, + "learning_rate": 1.05196494913235e-05, + "loss": 0.1731, + "step": 61505 + }, + { + "epoch": 2.87, + "learning_rate": 1.0518865706268712e-05, + "loss": 0.33, + "step": 61510 + }, + { + "epoch": 2.87, + "learning_rate": 1.0518081921213928e-05, + "loss": 0.3039, + "step": 61515 + }, + { + "epoch": 2.87, + "learning_rate": 1.051729813615914e-05, + "loss": 0.01, + "step": 61520 + }, + { + "epoch": 2.87, + "learning_rate": 1.0516514351104354e-05, + "loss": 0.0286, + "step": 61525 + }, + { + "epoch": 2.87, + "learning_rate": 1.0515730566049568e-05, + "loss": 0.0298, + "step": 61530 + }, + { + "epoch": 2.87, + "learning_rate": 1.0514946780994782e-05, + "loss": 0.077, + "step": 61535 + }, + { + "epoch": 2.87, + "learning_rate": 1.0514162995939994e-05, + "loss": 0.1129, + "step": 61540 + }, + { + "epoch": 2.87, + "learning_rate": 1.0513379210885208e-05, + "loss": 0.0934, + "step": 61545 + }, + { + "epoch": 2.87, + "learning_rate": 1.0512595425830422e-05, + "loss": 0.1048, + "step": 61550 + }, + { + "epoch": 2.87, + "learning_rate": 1.0511811640775635e-05, + "loss": 0.1929, + "step": 61555 + }, + { + "epoch": 2.87, + "learning_rate": 1.0511027855720848e-05, + "loss": 0.281, + "step": 61560 + }, + { + "epoch": 2.87, + "learning_rate": 1.0510244070666063e-05, + "loss": 0.345, + "step": 61565 + }, + { + "epoch": 2.87, + "learning_rate": 1.0509460285611275e-05, + "loss": 0.0306, + "step": 61570 + }, + { + "epoch": 2.87, + "learning_rate": 1.0508676500556488e-05, + "loss": 0.0636, + "step": 61575 + }, + { + "epoch": 2.87, + "learning_rate": 1.0507892715501702e-05, + "loss": 0.0506, + "step": 61580 + }, + { + "epoch": 2.87, + "learning_rate": 1.0507108930446914e-05, + "loss": 0.112, + "step": 61585 + }, + { + "epoch": 2.87, + "learning_rate": 1.050632514539213e-05, + "loss": 0.0215, + "step": 61590 + }, + { + "epoch": 2.87, + "learning_rate": 1.0505541360337342e-05, + "loss": 0.0534, + "step": 61595 + }, + { + "epoch": 2.87, + "learning_rate": 1.0504757575282556e-05, + "loss": 0.0811, + "step": 61600 + }, + { + "epoch": 2.87, + "learning_rate": 1.0503973790227768e-05, + "loss": 0.1887, + "step": 61605 + }, + { + "epoch": 2.87, + "learning_rate": 1.0503190005172983e-05, + "loss": 0.2217, + "step": 61610 + }, + { + "epoch": 2.88, + "learning_rate": 1.0502406220118196e-05, + "loss": 0.3228, + "step": 61615 + }, + { + "epoch": 2.88, + "learning_rate": 1.050162243506341e-05, + "loss": 0.052, + "step": 61620 + }, + { + "epoch": 2.88, + "learning_rate": 1.0500838650008622e-05, + "loss": 0.0327, + "step": 61625 + }, + { + "epoch": 2.88, + "learning_rate": 1.0500054864953837e-05, + "loss": 0.0629, + "step": 61630 + }, + { + "epoch": 2.88, + "learning_rate": 1.049927107989905e-05, + "loss": 0.0539, + "step": 61635 + }, + { + "epoch": 2.88, + "learning_rate": 1.0498487294844262e-05, + "loss": 0.0616, + "step": 61640 + }, + { + "epoch": 2.88, + "learning_rate": 1.0497703509789476e-05, + "loss": 0.0863, + "step": 61645 + }, + { + "epoch": 2.88, + "learning_rate": 1.049691972473469e-05, + "loss": 0.1395, + "step": 61650 + }, + { + "epoch": 2.88, + "learning_rate": 1.0496135939679903e-05, + "loss": 0.0981, + "step": 61655 + }, + { + "epoch": 2.88, + "learning_rate": 1.0495352154625116e-05, + "loss": 0.2278, + "step": 61660 + }, + { + "epoch": 2.88, + "learning_rate": 1.0494568369570331e-05, + "loss": 0.2109, + "step": 61665 + }, + { + "epoch": 2.88, + "learning_rate": 1.0493784584515543e-05, + "loss": 0.0107, + "step": 61670 + }, + { + "epoch": 2.88, + "learning_rate": 1.0493000799460757e-05, + "loss": 0.085, + "step": 61675 + }, + { + "epoch": 2.88, + "learning_rate": 1.049221701440597e-05, + "loss": 0.0593, + "step": 61680 + }, + { + "epoch": 2.88, + "learning_rate": 1.0491433229351185e-05, + "loss": 0.1071, + "step": 61685 + }, + { + "epoch": 2.88, + "learning_rate": 1.0490649444296397e-05, + "loss": 0.0502, + "step": 61690 + }, + { + "epoch": 2.88, + "learning_rate": 1.0489865659241611e-05, + "loss": 0.0725, + "step": 61695 + }, + { + "epoch": 2.88, + "learning_rate": 1.0489081874186823e-05, + "loss": 0.1771, + "step": 61700 + }, + { + "epoch": 2.88, + "learning_rate": 1.0488298089132036e-05, + "loss": 0.1795, + "step": 61705 + }, + { + "epoch": 2.88, + "learning_rate": 1.0487514304077251e-05, + "loss": 0.2024, + "step": 61710 + }, + { + "epoch": 2.88, + "learning_rate": 1.0486730519022463e-05, + "loss": 0.3146, + "step": 61715 + }, + { + "epoch": 2.88, + "learning_rate": 1.0485946733967677e-05, + "loss": 0.0512, + "step": 61720 + }, + { + "epoch": 2.88, + "learning_rate": 1.048516294891289e-05, + "loss": 0.0375, + "step": 61725 + }, + { + "epoch": 2.88, + "learning_rate": 1.0484379163858105e-05, + "loss": 0.0448, + "step": 61730 + }, + { + "epoch": 2.88, + "learning_rate": 1.0483595378803317e-05, + "loss": 0.062, + "step": 61735 + }, + { + "epoch": 2.88, + "learning_rate": 1.0482811593748531e-05, + "loss": 0.1169, + "step": 61740 + }, + { + "epoch": 2.88, + "learning_rate": 1.0482027808693744e-05, + "loss": 0.0705, + "step": 61745 + }, + { + "epoch": 2.88, + "learning_rate": 1.0481244023638959e-05, + "loss": 0.0729, + "step": 61750 + }, + { + "epoch": 2.88, + "learning_rate": 1.0480460238584171e-05, + "loss": 0.1679, + "step": 61755 + }, + { + "epoch": 2.88, + "learning_rate": 1.0479676453529385e-05, + "loss": 0.188, + "step": 61760 + }, + { + "epoch": 2.88, + "learning_rate": 1.04788926684746e-05, + "loss": 0.2839, + "step": 61765 + }, + { + "epoch": 2.88, + "learning_rate": 1.0478108883419811e-05, + "loss": 0.0484, + "step": 61770 + }, + { + "epoch": 2.88, + "learning_rate": 1.0477325098365025e-05, + "loss": 0.0209, + "step": 61775 + }, + { + "epoch": 2.88, + "learning_rate": 1.0476541313310237e-05, + "loss": 0.072, + "step": 61780 + }, + { + "epoch": 2.88, + "learning_rate": 1.0475757528255453e-05, + "loss": 0.043, + "step": 61785 + }, + { + "epoch": 2.88, + "learning_rate": 1.0474973743200665e-05, + "loss": 0.1533, + "step": 61790 + }, + { + "epoch": 2.88, + "learning_rate": 1.047418995814588e-05, + "loss": 0.062, + "step": 61795 + }, + { + "epoch": 2.88, + "learning_rate": 1.0473406173091091e-05, + "loss": 0.0718, + "step": 61800 + }, + { + "epoch": 2.88, + "learning_rate": 1.0472622388036307e-05, + "loss": 0.1603, + "step": 61805 + }, + { + "epoch": 2.88, + "learning_rate": 1.047183860298152e-05, + "loss": 0.2909, + "step": 61810 + }, + { + "epoch": 2.88, + "learning_rate": 1.0471054817926733e-05, + "loss": 0.182, + "step": 61815 + }, + { + "epoch": 2.88, + "learning_rate": 1.0470271032871945e-05, + "loss": 0.0543, + "step": 61820 + }, + { + "epoch": 2.88, + "learning_rate": 1.0469487247817161e-05, + "loss": 0.039, + "step": 61825 + }, + { + "epoch": 2.89, + "learning_rate": 1.0468703462762373e-05, + "loss": 0.0647, + "step": 61830 + }, + { + "epoch": 2.89, + "learning_rate": 1.0467919677707585e-05, + "loss": 0.0652, + "step": 61835 + }, + { + "epoch": 2.89, + "learning_rate": 1.04671358926528e-05, + "loss": 0.0565, + "step": 61840 + }, + { + "epoch": 2.89, + "learning_rate": 1.0466352107598013e-05, + "loss": 0.1336, + "step": 61845 + }, + { + "epoch": 2.89, + "learning_rate": 1.0465568322543227e-05, + "loss": 0.1115, + "step": 61850 + }, + { + "epoch": 2.89, + "learning_rate": 1.046478453748844e-05, + "loss": 0.1073, + "step": 61855 + }, + { + "epoch": 2.89, + "learning_rate": 1.046415750944461e-05, + "loss": 0.2446, + "step": 61860 + }, + { + "epoch": 2.89, + "learning_rate": 1.046353048140078e-05, + "loss": 0.3299, + "step": 61865 + }, + { + "epoch": 2.89, + "learning_rate": 1.0462746696345996e-05, + "loss": 0.0518, + "step": 61870 + }, + { + "epoch": 2.89, + "learning_rate": 1.0461962911291208e-05, + "loss": 0.0517, + "step": 61875 + }, + { + "epoch": 2.89, + "learning_rate": 1.0461179126236422e-05, + "loss": 0.0442, + "step": 61880 + }, + { + "epoch": 2.89, + "learning_rate": 1.0460395341181636e-05, + "loss": 0.0964, + "step": 61885 + }, + { + "epoch": 2.89, + "learning_rate": 1.045961155612685e-05, + "loss": 0.0607, + "step": 61890 + }, + { + "epoch": 2.89, + "learning_rate": 1.0458827771072062e-05, + "loss": 0.1274, + "step": 61895 + }, + { + "epoch": 2.89, + "learning_rate": 1.0458043986017274e-05, + "loss": 0.2033, + "step": 61900 + }, + { + "epoch": 2.89, + "learning_rate": 1.045726020096249e-05, + "loss": 0.2348, + "step": 61905 + }, + { + "epoch": 2.89, + "learning_rate": 1.0456476415907702e-05, + "loss": 0.1433, + "step": 61910 + }, + { + "epoch": 2.89, + "learning_rate": 1.0455692630852916e-05, + "loss": 0.2228, + "step": 61915 + }, + { + "epoch": 2.89, + "learning_rate": 1.0454908845798128e-05, + "loss": 0.0411, + "step": 61920 + }, + { + "epoch": 2.89, + "learning_rate": 1.0454125060743344e-05, + "loss": 0.0441, + "step": 61925 + }, + { + "epoch": 2.89, + "learning_rate": 1.0453341275688556e-05, + "loss": 0.0757, + "step": 61930 + }, + { + "epoch": 2.89, + "learning_rate": 1.045255749063377e-05, + "loss": 0.0323, + "step": 61935 + }, + { + "epoch": 2.89, + "learning_rate": 1.0451773705578982e-05, + "loss": 0.0426, + "step": 61940 + }, + { + "epoch": 2.89, + "learning_rate": 1.0450989920524198e-05, + "loss": 0.0863, + "step": 61945 + }, + { + "epoch": 2.89, + "learning_rate": 1.045020613546941e-05, + "loss": 0.184, + "step": 61950 + }, + { + "epoch": 2.89, + "learning_rate": 1.0449422350414624e-05, + "loss": 0.162, + "step": 61955 + }, + { + "epoch": 2.89, + "learning_rate": 1.0448638565359836e-05, + "loss": 0.1879, + "step": 61960 + }, + { + "epoch": 2.89, + "learning_rate": 1.0447854780305048e-05, + "loss": 0.2687, + "step": 61965 + }, + { + "epoch": 2.89, + "learning_rate": 1.0447070995250264e-05, + "loss": 0.0468, + "step": 61970 + }, + { + "epoch": 2.89, + "learning_rate": 1.0446287210195476e-05, + "loss": 0.0404, + "step": 61975 + }, + { + "epoch": 2.89, + "learning_rate": 1.044550342514069e-05, + "loss": 0.0753, + "step": 61980 + }, + { + "epoch": 2.89, + "learning_rate": 1.0444719640085904e-05, + "loss": 0.0453, + "step": 61985 + }, + { + "epoch": 2.89, + "learning_rate": 1.0443935855031118e-05, + "loss": 0.1476, + "step": 61990 + }, + { + "epoch": 2.89, + "learning_rate": 1.044315206997633e-05, + "loss": 0.0593, + "step": 61995 + }, + { + "epoch": 2.89, + "learning_rate": 1.0442368284921545e-05, + "loss": 0.1595, + "step": 62000 + }, + { + "epoch": 2.89, + "learning_rate": 1.0441584499866758e-05, + "loss": 0.1356, + "step": 62005 + }, + { + "epoch": 2.89, + "learning_rate": 1.0440800714811972e-05, + "loss": 0.2253, + "step": 62010 + }, + { + "epoch": 2.89, + "learning_rate": 1.0440016929757184e-05, + "loss": 0.2739, + "step": 62015 + }, + { + "epoch": 2.89, + "learning_rate": 1.04392331447024e-05, + "loss": 0.0676, + "step": 62020 + }, + { + "epoch": 2.89, + "learning_rate": 1.0438449359647612e-05, + "loss": 0.076, + "step": 62025 + }, + { + "epoch": 2.89, + "learning_rate": 1.0437665574592824e-05, + "loss": 0.0512, + "step": 62030 + }, + { + "epoch": 2.89, + "learning_rate": 1.0436881789538038e-05, + "loss": 0.0411, + "step": 62035 + }, + { + "epoch": 2.89, + "learning_rate": 1.043609800448325e-05, + "loss": 0.2422, + "step": 62040 + }, + { + "epoch": 2.9, + "learning_rate": 1.0435314219428466e-05, + "loss": 0.1052, + "step": 62045 + }, + { + "epoch": 2.9, + "learning_rate": 1.0434530434373678e-05, + "loss": 0.1557, + "step": 62050 + }, + { + "epoch": 2.9, + "learning_rate": 1.0433746649318892e-05, + "loss": 0.217, + "step": 62055 + }, + { + "epoch": 2.9, + "learning_rate": 1.0432962864264104e-05, + "loss": 0.1938, + "step": 62060 + }, + { + "epoch": 2.9, + "learning_rate": 1.043217907920932e-05, + "loss": 0.1747, + "step": 62065 + }, + { + "epoch": 2.9, + "learning_rate": 1.0431395294154532e-05, + "loss": 0.0406, + "step": 62070 + }, + { + "epoch": 2.9, + "learning_rate": 1.0430611509099746e-05, + "loss": 0.0092, + "step": 62075 + }, + { + "epoch": 2.9, + "learning_rate": 1.0429827724044958e-05, + "loss": 0.0618, + "step": 62080 + }, + { + "epoch": 2.9, + "learning_rate": 1.0429043938990173e-05, + "loss": 0.0918, + "step": 62085 + }, + { + "epoch": 2.9, + "learning_rate": 1.0428260153935386e-05, + "loss": 0.1263, + "step": 62090 + }, + { + "epoch": 2.9, + "learning_rate": 1.0427476368880598e-05, + "loss": 0.1332, + "step": 62095 + }, + { + "epoch": 2.9, + "learning_rate": 1.0426692583825813e-05, + "loss": 0.1137, + "step": 62100 + }, + { + "epoch": 2.9, + "learning_rate": 1.0425908798771026e-05, + "loss": 0.0945, + "step": 62105 + }, + { + "epoch": 2.9, + "learning_rate": 1.042512501371624e-05, + "loss": 0.1955, + "step": 62110 + }, + { + "epoch": 2.9, + "learning_rate": 1.0424341228661452e-05, + "loss": 0.226, + "step": 62115 + }, + { + "epoch": 2.9, + "learning_rate": 1.0423557443606667e-05, + "loss": 0.0538, + "step": 62120 + }, + { + "epoch": 2.9, + "learning_rate": 1.042277365855188e-05, + "loss": 0.0342, + "step": 62125 + }, + { + "epoch": 2.9, + "learning_rate": 1.0421989873497093e-05, + "loss": 0.0623, + "step": 62130 + }, + { + "epoch": 2.9, + "learning_rate": 1.0421206088442306e-05, + "loss": 0.0489, + "step": 62135 + }, + { + "epoch": 2.9, + "learning_rate": 1.0420422303387521e-05, + "loss": 0.108, + "step": 62140 + }, + { + "epoch": 2.9, + "learning_rate": 1.0419638518332734e-05, + "loss": 0.0872, + "step": 62145 + }, + { + "epoch": 2.9, + "learning_rate": 1.0418854733277947e-05, + "loss": 0.0748, + "step": 62150 + }, + { + "epoch": 2.9, + "learning_rate": 1.041807094822316e-05, + "loss": 0.1508, + "step": 62155 + }, + { + "epoch": 2.9, + "learning_rate": 1.0417287163168372e-05, + "loss": 0.2324, + "step": 62160 + }, + { + "epoch": 2.9, + "learning_rate": 1.0416503378113587e-05, + "loss": 0.3346, + "step": 62165 + }, + { + "epoch": 2.9, + "learning_rate": 1.04157195930588e-05, + "loss": 0.0784, + "step": 62170 + }, + { + "epoch": 2.9, + "learning_rate": 1.0414935808004014e-05, + "loss": 0.0446, + "step": 62175 + }, + { + "epoch": 2.9, + "learning_rate": 1.0414152022949226e-05, + "loss": 0.0434, + "step": 62180 + }, + { + "epoch": 2.9, + "learning_rate": 1.0413368237894441e-05, + "loss": 0.0521, + "step": 62185 + }, + { + "epoch": 2.9, + "learning_rate": 1.0412584452839654e-05, + "loss": 0.0605, + "step": 62190 + }, + { + "epoch": 2.9, + "learning_rate": 1.0411800667784867e-05, + "loss": 0.1037, + "step": 62195 + }, + { + "epoch": 2.9, + "learning_rate": 1.0411016882730081e-05, + "loss": 0.0748, + "step": 62200 + }, + { + "epoch": 2.9, + "learning_rate": 1.0410233097675295e-05, + "loss": 0.2214, + "step": 62205 + }, + { + "epoch": 2.9, + "learning_rate": 1.0409449312620508e-05, + "loss": 0.1377, + "step": 62210 + }, + { + "epoch": 2.9, + "learning_rate": 1.0408665527565721e-05, + "loss": 0.2861, + "step": 62215 + }, + { + "epoch": 2.9, + "learning_rate": 1.0407881742510935e-05, + "loss": 0.0281, + "step": 62220 + }, + { + "epoch": 2.9, + "learning_rate": 1.0407097957456148e-05, + "loss": 0.0211, + "step": 62225 + }, + { + "epoch": 2.9, + "learning_rate": 1.0406314172401361e-05, + "loss": 0.0767, + "step": 62230 + }, + { + "epoch": 2.9, + "learning_rate": 1.0405530387346574e-05, + "loss": 0.0752, + "step": 62235 + }, + { + "epoch": 2.9, + "learning_rate": 1.040474660229179e-05, + "loss": 0.0622, + "step": 62240 + }, + { + "epoch": 2.9, + "learning_rate": 1.0403962817237001e-05, + "loss": 0.0847, + "step": 62245 + }, + { + "epoch": 2.9, + "learning_rate": 1.0403179032182215e-05, + "loss": 0.0918, + "step": 62250 + }, + { + "epoch": 2.9, + "learning_rate": 1.0402395247127428e-05, + "loss": 0.0615, + "step": 62255 + }, + { + "epoch": 2.91, + "learning_rate": 1.0401611462072643e-05, + "loss": 0.1679, + "step": 62260 + }, + { + "epoch": 2.91, + "learning_rate": 1.0400827677017855e-05, + "loss": 0.3511, + "step": 62265 + }, + { + "epoch": 2.91, + "learning_rate": 1.040004389196307e-05, + "loss": 0.0629, + "step": 62270 + }, + { + "epoch": 2.91, + "learning_rate": 1.0399260106908282e-05, + "loss": 0.0183, + "step": 62275 + }, + { + "epoch": 2.91, + "learning_rate": 1.0398476321853497e-05, + "loss": 0.0467, + "step": 62280 + }, + { + "epoch": 2.91, + "learning_rate": 1.039769253679871e-05, + "loss": 0.0157, + "step": 62285 + }, + { + "epoch": 2.91, + "learning_rate": 1.0396908751743922e-05, + "loss": 0.0978, + "step": 62290 + }, + { + "epoch": 2.91, + "learning_rate": 1.0396124966689135e-05, + "loss": 0.0457, + "step": 62295 + }, + { + "epoch": 2.91, + "learning_rate": 1.039534118163435e-05, + "loss": 0.1463, + "step": 62300 + }, + { + "epoch": 2.91, + "learning_rate": 1.0394557396579563e-05, + "loss": 0.1993, + "step": 62305 + }, + { + "epoch": 2.91, + "learning_rate": 1.0393773611524775e-05, + "loss": 0.1728, + "step": 62310 + }, + { + "epoch": 2.91, + "learning_rate": 1.0392989826469991e-05, + "loss": 0.163, + "step": 62315 + }, + { + "epoch": 2.91, + "learning_rate": 1.0392206041415203e-05, + "loss": 0.0391, + "step": 62320 + }, + { + "epoch": 2.91, + "learning_rate": 1.0391422256360417e-05, + "loss": 0.054, + "step": 62325 + }, + { + "epoch": 2.91, + "learning_rate": 1.039063847130563e-05, + "loss": 0.0313, + "step": 62330 + }, + { + "epoch": 2.91, + "learning_rate": 1.0389854686250845e-05, + "loss": 0.0868, + "step": 62335 + }, + { + "epoch": 2.91, + "learning_rate": 1.0389070901196057e-05, + "loss": 0.0782, + "step": 62340 + }, + { + "epoch": 2.91, + "learning_rate": 1.0388287116141271e-05, + "loss": 0.0603, + "step": 62345 + }, + { + "epoch": 2.91, + "learning_rate": 1.0387503331086483e-05, + "loss": 0.1835, + "step": 62350 + }, + { + "epoch": 2.91, + "learning_rate": 1.0386719546031696e-05, + "loss": 0.1227, + "step": 62355 + }, + { + "epoch": 2.91, + "learning_rate": 1.0385935760976911e-05, + "loss": 0.4369, + "step": 62360 + }, + { + "epoch": 2.91, + "learning_rate": 1.0385151975922123e-05, + "loss": 0.3164, + "step": 62365 + }, + { + "epoch": 2.91, + "learning_rate": 1.0384368190867337e-05, + "loss": 0.0913, + "step": 62370 + }, + { + "epoch": 2.91, + "learning_rate": 1.038358440581255e-05, + "loss": 0.0212, + "step": 62375 + }, + { + "epoch": 2.91, + "learning_rate": 1.0382800620757765e-05, + "loss": 0.0354, + "step": 62380 + }, + { + "epoch": 2.91, + "learning_rate": 1.0382016835702977e-05, + "loss": 0.0694, + "step": 62385 + }, + { + "epoch": 2.91, + "learning_rate": 1.0381233050648191e-05, + "loss": 0.1128, + "step": 62390 + }, + { + "epoch": 2.91, + "learning_rate": 1.0380449265593403e-05, + "loss": 0.1718, + "step": 62395 + }, + { + "epoch": 2.91, + "learning_rate": 1.0379665480538619e-05, + "loss": 0.1168, + "step": 62400 + }, + { + "epoch": 2.91, + "learning_rate": 1.0378881695483831e-05, + "loss": 0.2419, + "step": 62405 + }, + { + "epoch": 2.91, + "learning_rate": 1.0378097910429045e-05, + "loss": 0.2319, + "step": 62410 + }, + { + "epoch": 2.91, + "learning_rate": 1.0377314125374259e-05, + "loss": 0.261, + "step": 62415 + }, + { + "epoch": 2.91, + "learning_rate": 1.0376530340319471e-05, + "loss": 0.0797, + "step": 62420 + }, + { + "epoch": 2.91, + "learning_rate": 1.0375746555264685e-05, + "loss": 0.0227, + "step": 62425 + }, + { + "epoch": 2.91, + "learning_rate": 1.0374962770209897e-05, + "loss": 0.0566, + "step": 62430 + }, + { + "epoch": 2.91, + "learning_rate": 1.0374178985155113e-05, + "loss": 0.1145, + "step": 62435 + }, + { + "epoch": 2.91, + "learning_rate": 1.0373395200100325e-05, + "loss": 0.0866, + "step": 62440 + }, + { + "epoch": 2.91, + "learning_rate": 1.0372611415045539e-05, + "loss": 0.0935, + "step": 62445 + }, + { + "epoch": 2.91, + "learning_rate": 1.0371827629990751e-05, + "loss": 0.1222, + "step": 62450 + }, + { + "epoch": 2.91, + "learning_rate": 1.0371043844935967e-05, + "loss": 0.1236, + "step": 62455 + }, + { + "epoch": 2.91, + "learning_rate": 1.0370260059881179e-05, + "loss": 0.2584, + "step": 62460 + }, + { + "epoch": 2.91, + "learning_rate": 1.0369476274826393e-05, + "loss": 0.3033, + "step": 62465 + }, + { + "epoch": 2.91, + "learning_rate": 1.0368692489771605e-05, + "loss": 0.0242, + "step": 62470 + }, + { + "epoch": 2.92, + "learning_rate": 1.036790870471682e-05, + "loss": 0.036, + "step": 62475 + }, + { + "epoch": 2.92, + "learning_rate": 1.0367124919662033e-05, + "loss": 0.0251, + "step": 62480 + }, + { + "epoch": 2.92, + "learning_rate": 1.0366341134607245e-05, + "loss": 0.0294, + "step": 62485 + }, + { + "epoch": 2.92, + "learning_rate": 1.0365557349552459e-05, + "loss": 0.0555, + "step": 62490 + }, + { + "epoch": 2.92, + "learning_rate": 1.0364773564497671e-05, + "loss": 0.0958, + "step": 62495 + }, + { + "epoch": 2.92, + "learning_rate": 1.0363989779442887e-05, + "loss": 0.1204, + "step": 62500 + }, + { + "epoch": 2.92, + "learning_rate": 1.0363205994388099e-05, + "loss": 0.0712, + "step": 62505 + }, + { + "epoch": 2.92, + "learning_rate": 1.0362422209333313e-05, + "loss": 0.1334, + "step": 62510 + }, + { + "epoch": 2.92, + "learning_rate": 1.0361638424278527e-05, + "loss": 0.2207, + "step": 62515 + }, + { + "epoch": 2.92, + "learning_rate": 1.0360854639223741e-05, + "loss": 0.0133, + "step": 62520 + }, + { + "epoch": 2.92, + "learning_rate": 1.0360070854168953e-05, + "loss": 0.0209, + "step": 62525 + }, + { + "epoch": 2.92, + "learning_rate": 1.0359287069114167e-05, + "loss": 0.0494, + "step": 62530 + }, + { + "epoch": 2.92, + "learning_rate": 1.0358503284059381e-05, + "loss": 0.0147, + "step": 62535 + }, + { + "epoch": 2.92, + "learning_rate": 1.0357719499004595e-05, + "loss": 0.1232, + "step": 62540 + }, + { + "epoch": 2.92, + "learning_rate": 1.0356935713949807e-05, + "loss": 0.0512, + "step": 62545 + }, + { + "epoch": 2.92, + "learning_rate": 1.035615192889502e-05, + "loss": 0.2343, + "step": 62550 + }, + { + "epoch": 2.92, + "learning_rate": 1.0355368143840235e-05, + "loss": 0.1384, + "step": 62555 + }, + { + "epoch": 2.92, + "learning_rate": 1.0354584358785447e-05, + "loss": 0.2172, + "step": 62560 + }, + { + "epoch": 2.92, + "learning_rate": 1.0353800573730661e-05, + "loss": 0.2982, + "step": 62565 + }, + { + "epoch": 2.92, + "learning_rate": 1.0353016788675873e-05, + "loss": 0.0251, + "step": 62570 + }, + { + "epoch": 2.92, + "learning_rate": 1.0352233003621089e-05, + "loss": 0.1112, + "step": 62575 + }, + { + "epoch": 2.92, + "learning_rate": 1.0351449218566301e-05, + "loss": 0.0654, + "step": 62580 + }, + { + "epoch": 2.92, + "learning_rate": 1.0350665433511515e-05, + "loss": 0.0838, + "step": 62585 + }, + { + "epoch": 2.92, + "learning_rate": 1.0349881648456727e-05, + "loss": 0.1541, + "step": 62590 + }, + { + "epoch": 2.92, + "learning_rate": 1.0349097863401943e-05, + "loss": 0.0852, + "step": 62595 + }, + { + "epoch": 2.92, + "learning_rate": 1.0348314078347155e-05, + "loss": 0.2073, + "step": 62600 + }, + { + "epoch": 2.92, + "learning_rate": 1.0347530293292369e-05, + "loss": 0.1479, + "step": 62605 + }, + { + "epoch": 2.92, + "learning_rate": 1.0346746508237581e-05, + "loss": 0.2269, + "step": 62610 + }, + { + "epoch": 2.92, + "learning_rate": 1.0345962723182795e-05, + "loss": 0.1799, + "step": 62615 + }, + { + "epoch": 2.92, + "learning_rate": 1.0345178938128009e-05, + "loss": 0.0507, + "step": 62620 + }, + { + "epoch": 2.92, + "learning_rate": 1.0344395153073221e-05, + "loss": 0.0694, + "step": 62625 + }, + { + "epoch": 2.92, + "learning_rate": 1.0343611368018437e-05, + "loss": 0.0565, + "step": 62630 + }, + { + "epoch": 2.92, + "learning_rate": 1.0342827582963649e-05, + "loss": 0.0774, + "step": 62635 + }, + { + "epoch": 2.92, + "learning_rate": 1.0342043797908863e-05, + "loss": 0.1522, + "step": 62640 + }, + { + "epoch": 2.92, + "learning_rate": 1.0341260012854075e-05, + "loss": 0.1715, + "step": 62645 + }, + { + "epoch": 2.92, + "learning_rate": 1.034047622779929e-05, + "loss": 0.1394, + "step": 62650 + }, + { + "epoch": 2.92, + "learning_rate": 1.0339692442744503e-05, + "loss": 0.2182, + "step": 62655 + }, + { + "epoch": 2.92, + "learning_rate": 1.0338908657689717e-05, + "loss": 0.3029, + "step": 62660 + }, + { + "epoch": 2.92, + "learning_rate": 1.0338124872634929e-05, + "loss": 0.2364, + "step": 62665 + }, + { + "epoch": 2.92, + "learning_rate": 1.0337341087580144e-05, + "loss": 0.036, + "step": 62670 + }, + { + "epoch": 2.92, + "learning_rate": 1.0336557302525357e-05, + "loss": 0.0416, + "step": 62675 + }, + { + "epoch": 2.92, + "learning_rate": 1.0335773517470569e-05, + "loss": 0.0485, + "step": 62680 + }, + { + "epoch": 2.92, + "learning_rate": 1.0334989732415783e-05, + "loss": 0.0772, + "step": 62685 + }, + { + "epoch": 2.93, + "learning_rate": 1.0334205947360995e-05, + "loss": 0.0545, + "step": 62690 + }, + { + "epoch": 2.93, + "learning_rate": 1.033342216230621e-05, + "loss": 0.0758, + "step": 62695 + }, + { + "epoch": 2.93, + "learning_rate": 1.0332638377251423e-05, + "loss": 0.1142, + "step": 62700 + }, + { + "epoch": 2.93, + "learning_rate": 1.0331854592196637e-05, + "loss": 0.1031, + "step": 62705 + }, + { + "epoch": 2.93, + "learning_rate": 1.0331070807141849e-05, + "loss": 0.3682, + "step": 62710 + }, + { + "epoch": 2.93, + "learning_rate": 1.0330287022087065e-05, + "loss": 0.3997, + "step": 62715 + }, + { + "epoch": 2.93, + "learning_rate": 1.0329503237032277e-05, + "loss": 0.052, + "step": 62720 + }, + { + "epoch": 2.93, + "learning_rate": 1.032871945197749e-05, + "loss": 0.0213, + "step": 62725 + }, + { + "epoch": 2.93, + "learning_rate": 1.0327935666922705e-05, + "loss": 0.0501, + "step": 62730 + }, + { + "epoch": 2.93, + "learning_rate": 1.0327151881867918e-05, + "loss": 0.0711, + "step": 62735 + }, + { + "epoch": 2.93, + "learning_rate": 1.032636809681313e-05, + "loss": 0.1498, + "step": 62740 + }, + { + "epoch": 2.93, + "learning_rate": 1.0325584311758343e-05, + "loss": 0.1179, + "step": 62745 + }, + { + "epoch": 2.93, + "learning_rate": 1.0324800526703559e-05, + "loss": 0.1803, + "step": 62750 + }, + { + "epoch": 2.93, + "learning_rate": 1.032401674164877e-05, + "loss": 0.1619, + "step": 62755 + }, + { + "epoch": 2.93, + "learning_rate": 1.0323232956593985e-05, + "loss": 0.2089, + "step": 62760 + }, + { + "epoch": 2.93, + "learning_rate": 1.0322449171539197e-05, + "loss": 0.4382, + "step": 62765 + }, + { + "epoch": 2.93, + "learning_rate": 1.0321665386484412e-05, + "loss": 0.0535, + "step": 62770 + }, + { + "epoch": 2.93, + "learning_rate": 1.0320881601429625e-05, + "loss": 0.029, + "step": 62775 + }, + { + "epoch": 2.93, + "learning_rate": 1.0320097816374839e-05, + "loss": 0.0429, + "step": 62780 + }, + { + "epoch": 2.93, + "learning_rate": 1.031931403132005e-05, + "loss": 0.0829, + "step": 62785 + }, + { + "epoch": 2.93, + "learning_rate": 1.0318530246265266e-05, + "loss": 0.0953, + "step": 62790 + }, + { + "epoch": 2.93, + "learning_rate": 1.0317746461210479e-05, + "loss": 0.1233, + "step": 62795 + }, + { + "epoch": 2.93, + "learning_rate": 1.0316962676155692e-05, + "loss": 0.0843, + "step": 62800 + }, + { + "epoch": 2.93, + "learning_rate": 1.0316178891100905e-05, + "loss": 0.1054, + "step": 62805 + }, + { + "epoch": 2.93, + "learning_rate": 1.0315395106046117e-05, + "loss": 0.277, + "step": 62810 + }, + { + "epoch": 2.93, + "learning_rate": 1.0314611320991333e-05, + "loss": 0.4017, + "step": 62815 + }, + { + "epoch": 2.93, + "learning_rate": 1.0313827535936545e-05, + "loss": 0.031, + "step": 62820 + }, + { + "epoch": 2.93, + "learning_rate": 1.0313043750881759e-05, + "loss": 0.0184, + "step": 62825 + }, + { + "epoch": 2.93, + "learning_rate": 1.0312259965826973e-05, + "loss": 0.1057, + "step": 62830 + }, + { + "epoch": 2.93, + "learning_rate": 1.0311476180772186e-05, + "loss": 0.1594, + "step": 62835 + }, + { + "epoch": 2.93, + "learning_rate": 1.0310692395717399e-05, + "loss": 0.1388, + "step": 62840 + }, + { + "epoch": 2.93, + "learning_rate": 1.0309908610662613e-05, + "loss": 0.0698, + "step": 62845 + }, + { + "epoch": 2.93, + "learning_rate": 1.0309124825607826e-05, + "loss": 0.2027, + "step": 62850 + }, + { + "epoch": 2.93, + "learning_rate": 1.030834104055304e-05, + "loss": 0.0898, + "step": 62855 + }, + { + "epoch": 2.93, + "learning_rate": 1.0307557255498253e-05, + "loss": 0.2479, + "step": 62860 + }, + { + "epoch": 2.93, + "learning_rate": 1.0306773470443468e-05, + "loss": 0.2501, + "step": 62865 + }, + { + "epoch": 2.93, + "learning_rate": 1.030598968538868e-05, + "loss": 0.0262, + "step": 62870 + }, + { + "epoch": 2.93, + "learning_rate": 1.0305205900333893e-05, + "loss": 0.0397, + "step": 62875 + }, + { + "epoch": 2.93, + "learning_rate": 1.0304422115279107e-05, + "loss": 0.074, + "step": 62880 + }, + { + "epoch": 2.93, + "learning_rate": 1.0303638330224319e-05, + "loss": 0.0701, + "step": 62885 + }, + { + "epoch": 2.93, + "learning_rate": 1.0302854545169534e-05, + "loss": 0.0451, + "step": 62890 + }, + { + "epoch": 2.93, + "learning_rate": 1.0302070760114747e-05, + "loss": 0.1437, + "step": 62895 + }, + { + "epoch": 2.94, + "learning_rate": 1.030128697505996e-05, + "loss": 0.0555, + "step": 62900 + }, + { + "epoch": 2.94, + "learning_rate": 1.0300503190005173e-05, + "loss": 0.1305, + "step": 62905 + }, + { + "epoch": 2.94, + "learning_rate": 1.0299719404950388e-05, + "loss": 0.2862, + "step": 62910 + }, + { + "epoch": 2.94, + "learning_rate": 1.02989356198956e-05, + "loss": 0.2047, + "step": 62915 + }, + { + "epoch": 2.94, + "learning_rate": 1.0298151834840814e-05, + "loss": 0.0395, + "step": 62920 + }, + { + "epoch": 2.94, + "learning_rate": 1.0297368049786027e-05, + "loss": 0.0727, + "step": 62925 + }, + { + "epoch": 2.94, + "learning_rate": 1.0296584264731242e-05, + "loss": 0.0484, + "step": 62930 + }, + { + "epoch": 2.94, + "learning_rate": 1.0295800479676454e-05, + "loss": 0.0634, + "step": 62935 + }, + { + "epoch": 2.94, + "learning_rate": 1.0295016694621667e-05, + "loss": 0.1216, + "step": 62940 + }, + { + "epoch": 2.94, + "learning_rate": 1.0294232909566882e-05, + "loss": 0.0631, + "step": 62945 + }, + { + "epoch": 2.94, + "learning_rate": 1.0293449124512094e-05, + "loss": 0.0701, + "step": 62950 + }, + { + "epoch": 2.94, + "learning_rate": 1.0292665339457308e-05, + "loss": 0.1508, + "step": 62955 + }, + { + "epoch": 2.94, + "learning_rate": 1.029188155440252e-05, + "loss": 0.1583, + "step": 62960 + }, + { + "epoch": 2.94, + "learning_rate": 1.0291097769347736e-05, + "loss": 0.2747, + "step": 62965 + }, + { + "epoch": 2.94, + "learning_rate": 1.0290313984292948e-05, + "loss": 0.027, + "step": 62970 + }, + { + "epoch": 2.94, + "learning_rate": 1.0289530199238162e-05, + "loss": 0.0262, + "step": 62975 + }, + { + "epoch": 2.94, + "learning_rate": 1.0288746414183374e-05, + "loss": 0.0405, + "step": 62980 + }, + { + "epoch": 2.94, + "learning_rate": 1.028796262912859e-05, + "loss": 0.0914, + "step": 62985 + }, + { + "epoch": 2.94, + "learning_rate": 1.0287178844073802e-05, + "loss": 0.0421, + "step": 62990 + }, + { + "epoch": 2.94, + "learning_rate": 1.0286395059019016e-05, + "loss": 0.1202, + "step": 62995 + }, + { + "epoch": 2.94, + "learning_rate": 1.0285611273964228e-05, + "loss": 0.1285, + "step": 63000 + }, + { + "epoch": 2.94, + "learning_rate": 1.028482748890944e-05, + "loss": 0.169, + "step": 63005 + }, + { + "epoch": 2.94, + "learning_rate": 1.0284043703854656e-05, + "loss": 0.1355, + "step": 63010 + }, + { + "epoch": 2.94, + "learning_rate": 1.0283259918799868e-05, + "loss": 0.2761, + "step": 63015 + }, + { + "epoch": 2.94, + "learning_rate": 1.0282476133745082e-05, + "loss": 0.089, + "step": 63020 + }, + { + "epoch": 2.94, + "learning_rate": 1.0281692348690295e-05, + "loss": 0.0571, + "step": 63025 + }, + { + "epoch": 2.94, + "learning_rate": 1.028090856363551e-05, + "loss": 0.0232, + "step": 63030 + }, + { + "epoch": 2.94, + "learning_rate": 1.0280124778580722e-05, + "loss": 0.0644, + "step": 63035 + }, + { + "epoch": 2.94, + "learning_rate": 1.0279340993525936e-05, + "loss": 0.0646, + "step": 63040 + }, + { + "epoch": 2.94, + "learning_rate": 1.027855720847115e-05, + "loss": 0.1479, + "step": 63045 + }, + { + "epoch": 2.94, + "learning_rate": 1.0277773423416364e-05, + "loss": 0.1911, + "step": 63050 + }, + { + "epoch": 2.94, + "learning_rate": 1.0276989638361576e-05, + "loss": 0.1927, + "step": 63055 + }, + { + "epoch": 2.94, + "learning_rate": 1.027620585330679e-05, + "loss": 0.1748, + "step": 63060 + }, + { + "epoch": 2.94, + "learning_rate": 1.0275422068252004e-05, + "loss": 0.2667, + "step": 63065 + }, + { + "epoch": 2.94, + "learning_rate": 1.0274638283197216e-05, + "loss": 0.0506, + "step": 63070 + }, + { + "epoch": 2.94, + "learning_rate": 1.027385449814243e-05, + "loss": 0.0323, + "step": 63075 + }, + { + "epoch": 2.94, + "learning_rate": 1.0273070713087642e-05, + "loss": 0.0294, + "step": 63080 + }, + { + "epoch": 2.94, + "learning_rate": 1.0272286928032858e-05, + "loss": 0.0357, + "step": 63085 + }, + { + "epoch": 2.94, + "learning_rate": 1.027150314297807e-05, + "loss": 0.0781, + "step": 63090 + }, + { + "epoch": 2.94, + "learning_rate": 1.0270719357923284e-05, + "loss": 0.1175, + "step": 63095 + }, + { + "epoch": 2.94, + "learning_rate": 1.0269935572868496e-05, + "loss": 0.1468, + "step": 63100 + }, + { + "epoch": 2.94, + "learning_rate": 1.0269151787813712e-05, + "loss": 0.1011, + "step": 63105 + }, + { + "epoch": 2.94, + "learning_rate": 1.0268368002758924e-05, + "loss": 0.2504, + "step": 63110 + }, + { + "epoch": 2.95, + "learning_rate": 1.0267584217704138e-05, + "loss": 0.3333, + "step": 63115 + }, + { + "epoch": 2.95, + "learning_rate": 1.026680043264935e-05, + "loss": 0.03, + "step": 63120 + }, + { + "epoch": 2.95, + "learning_rate": 1.0266016647594566e-05, + "loss": 0.0314, + "step": 63125 + }, + { + "epoch": 2.95, + "learning_rate": 1.0265232862539778e-05, + "loss": 0.0549, + "step": 63130 + }, + { + "epoch": 2.95, + "learning_rate": 1.026444907748499e-05, + "loss": 0.0711, + "step": 63135 + }, + { + "epoch": 2.95, + "learning_rate": 1.0263665292430204e-05, + "loss": 0.0903, + "step": 63140 + }, + { + "epoch": 2.95, + "learning_rate": 1.0262881507375418e-05, + "loss": 0.1095, + "step": 63145 + }, + { + "epoch": 2.95, + "learning_rate": 1.0262097722320632e-05, + "loss": 0.1305, + "step": 63150 + }, + { + "epoch": 2.95, + "learning_rate": 1.0261313937265844e-05, + "loss": 0.1326, + "step": 63155 + }, + { + "epoch": 2.95, + "learning_rate": 1.026053015221106e-05, + "loss": 0.2379, + "step": 63160 + }, + { + "epoch": 2.95, + "learning_rate": 1.0259746367156272e-05, + "loss": 0.3484, + "step": 63165 + }, + { + "epoch": 2.95, + "learning_rate": 1.0258962582101486e-05, + "loss": 0.0702, + "step": 63170 + }, + { + "epoch": 2.95, + "learning_rate": 1.0258178797046698e-05, + "loss": 0.0214, + "step": 63175 + }, + { + "epoch": 2.95, + "learning_rate": 1.0257395011991914e-05, + "loss": 0.0748, + "step": 63180 + }, + { + "epoch": 2.95, + "learning_rate": 1.0256611226937126e-05, + "loss": 0.046, + "step": 63185 + }, + { + "epoch": 2.95, + "learning_rate": 1.025582744188234e-05, + "loss": 0.0891, + "step": 63190 + }, + { + "epoch": 2.95, + "learning_rate": 1.0255043656827552e-05, + "loss": 0.0839, + "step": 63195 + }, + { + "epoch": 2.95, + "learning_rate": 1.0254259871772764e-05, + "loss": 0.1694, + "step": 63200 + }, + { + "epoch": 2.95, + "learning_rate": 1.025347608671798e-05, + "loss": 0.1068, + "step": 63205 + }, + { + "epoch": 2.95, + "learning_rate": 1.0252692301663192e-05, + "loss": 0.3257, + "step": 63210 + }, + { + "epoch": 2.95, + "learning_rate": 1.0251908516608406e-05, + "loss": 0.21, + "step": 63215 + }, + { + "epoch": 2.95, + "learning_rate": 1.0251124731553618e-05, + "loss": 0.0523, + "step": 63220 + }, + { + "epoch": 2.95, + "learning_rate": 1.0250340946498834e-05, + "loss": 0.0437, + "step": 63225 + }, + { + "epoch": 2.95, + "learning_rate": 1.0249557161444046e-05, + "loss": 0.0957, + "step": 63230 + }, + { + "epoch": 2.95, + "learning_rate": 1.024877337638926e-05, + "loss": 0.0554, + "step": 63235 + }, + { + "epoch": 2.95, + "learning_rate": 1.0247989591334472e-05, + "loss": 0.0969, + "step": 63240 + }, + { + "epoch": 2.95, + "learning_rate": 1.0247205806279688e-05, + "loss": 0.1589, + "step": 63245 + }, + { + "epoch": 2.95, + "learning_rate": 1.02464220212249e-05, + "loss": 0.1167, + "step": 63250 + }, + { + "epoch": 2.95, + "learning_rate": 1.0245638236170114e-05, + "loss": 0.1879, + "step": 63255 + }, + { + "epoch": 2.95, + "learning_rate": 1.0244854451115328e-05, + "loss": 0.1869, + "step": 63260 + }, + { + "epoch": 2.95, + "learning_rate": 1.024407066606054e-05, + "loss": 0.1766, + "step": 63265 + }, + { + "epoch": 2.95, + "learning_rate": 1.0243286881005754e-05, + "loss": 0.0105, + "step": 63270 + }, + { + "epoch": 2.95, + "learning_rate": 1.0242503095950966e-05, + "loss": 0.0834, + "step": 63275 + }, + { + "epoch": 2.95, + "learning_rate": 1.0241719310896182e-05, + "loss": 0.0791, + "step": 63280 + }, + { + "epoch": 2.95, + "learning_rate": 1.0240935525841394e-05, + "loss": 0.0993, + "step": 63285 + }, + { + "epoch": 2.95, + "learning_rate": 1.0240151740786608e-05, + "loss": 0.0776, + "step": 63290 + }, + { + "epoch": 2.95, + "learning_rate": 1.023936795573182e-05, + "loss": 0.1271, + "step": 63295 + }, + { + "epoch": 2.95, + "learning_rate": 1.0238584170677036e-05, + "loss": 0.0777, + "step": 63300 + }, + { + "epoch": 2.95, + "learning_rate": 1.0237800385622248e-05, + "loss": 0.2114, + "step": 63305 + }, + { + "epoch": 2.95, + "learning_rate": 1.0237016600567462e-05, + "loss": 0.2023, + "step": 63310 + }, + { + "epoch": 2.95, + "learning_rate": 1.0236232815512674e-05, + "loss": 0.2993, + "step": 63315 + }, + { + "epoch": 2.95, + "learning_rate": 1.023544903045789e-05, + "loss": 0.0464, + "step": 63320 + }, + { + "epoch": 2.95, + "learning_rate": 1.0234665245403102e-05, + "loss": 0.0378, + "step": 63325 + }, + { + "epoch": 2.96, + "learning_rate": 1.0233881460348314e-05, + "loss": 0.0215, + "step": 63330 + }, + { + "epoch": 2.96, + "learning_rate": 1.0233097675293528e-05, + "loss": 0.0999, + "step": 63335 + }, + { + "epoch": 2.96, + "learning_rate": 1.023231389023874e-05, + "loss": 0.0813, + "step": 63340 + }, + { + "epoch": 2.96, + "learning_rate": 1.0231530105183956e-05, + "loss": 0.0367, + "step": 63345 + }, + { + "epoch": 2.96, + "learning_rate": 1.0230746320129168e-05, + "loss": 0.1402, + "step": 63350 + }, + { + "epoch": 2.96, + "learning_rate": 1.0229962535074382e-05, + "loss": 0.2003, + "step": 63355 + }, + { + "epoch": 2.96, + "learning_rate": 1.0229178750019596e-05, + "loss": 0.1149, + "step": 63360 + }, + { + "epoch": 2.96, + "learning_rate": 1.022839496496481e-05, + "loss": 0.3584, + "step": 63365 + }, + { + "epoch": 2.96, + "learning_rate": 1.0227611179910022e-05, + "loss": 0.1109, + "step": 63370 + }, + { + "epoch": 2.96, + "learning_rate": 1.0226827394855236e-05, + "loss": 0.0422, + "step": 63375 + }, + { + "epoch": 2.96, + "learning_rate": 1.022604360980045e-05, + "loss": 0.0409, + "step": 63380 + }, + { + "epoch": 2.96, + "learning_rate": 1.0225259824745664e-05, + "loss": 0.0294, + "step": 63385 + }, + { + "epoch": 2.96, + "learning_rate": 1.0224476039690876e-05, + "loss": 0.0393, + "step": 63390 + }, + { + "epoch": 2.96, + "learning_rate": 1.0223692254636088e-05, + "loss": 0.0275, + "step": 63395 + }, + { + "epoch": 2.96, + "learning_rate": 1.0222908469581304e-05, + "loss": 0.1272, + "step": 63400 + }, + { + "epoch": 2.96, + "learning_rate": 1.0222124684526516e-05, + "loss": 0.1611, + "step": 63405 + }, + { + "epoch": 2.96, + "learning_rate": 1.022134089947173e-05, + "loss": 0.3513, + "step": 63410 + }, + { + "epoch": 2.96, + "learning_rate": 1.0220557114416942e-05, + "loss": 0.2599, + "step": 63415 + }, + { + "epoch": 2.96, + "learning_rate": 1.0219773329362158e-05, + "loss": 0.0898, + "step": 63420 + }, + { + "epoch": 2.96, + "learning_rate": 1.021898954430737e-05, + "loss": 0.0381, + "step": 63425 + }, + { + "epoch": 2.96, + "learning_rate": 1.0218205759252584e-05, + "loss": 0.0469, + "step": 63430 + }, + { + "epoch": 2.96, + "learning_rate": 1.0217421974197796e-05, + "loss": 0.0635, + "step": 63435 + }, + { + "epoch": 2.96, + "learning_rate": 1.0216638189143011e-05, + "loss": 0.0756, + "step": 63440 + }, + { + "epoch": 2.96, + "learning_rate": 1.0215854404088224e-05, + "loss": 0.0884, + "step": 63445 + }, + { + "epoch": 2.96, + "learning_rate": 1.0215070619033438e-05, + "loss": 0.1282, + "step": 63450 + }, + { + "epoch": 2.96, + "learning_rate": 1.021428683397865e-05, + "loss": 0.1419, + "step": 63455 + }, + { + "epoch": 2.96, + "learning_rate": 1.0213503048923864e-05, + "loss": 0.2619, + "step": 63460 + }, + { + "epoch": 2.96, + "learning_rate": 1.0212719263869078e-05, + "loss": 0.2593, + "step": 63465 + }, + { + "epoch": 2.96, + "learning_rate": 1.021193547881429e-05, + "loss": 0.0369, + "step": 63470 + }, + { + "epoch": 2.96, + "learning_rate": 1.0211151693759505e-05, + "loss": 0.0356, + "step": 63475 + }, + { + "epoch": 2.96, + "learning_rate": 1.0210367908704718e-05, + "loss": 0.0794, + "step": 63480 + }, + { + "epoch": 2.96, + "learning_rate": 1.0209584123649932e-05, + "loss": 0.0407, + "step": 63485 + }, + { + "epoch": 2.96, + "learning_rate": 1.0208800338595144e-05, + "loss": 0.0817, + "step": 63490 + }, + { + "epoch": 2.96, + "learning_rate": 1.020801655354036e-05, + "loss": 0.0535, + "step": 63495 + }, + { + "epoch": 2.96, + "learning_rate": 1.0207232768485572e-05, + "loss": 0.1159, + "step": 63500 + }, + { + "epoch": 2.96, + "learning_rate": 1.0206448983430785e-05, + "loss": 0.2946, + "step": 63505 + }, + { + "epoch": 2.96, + "learning_rate": 1.0205665198375998e-05, + "loss": 0.2417, + "step": 63510 + }, + { + "epoch": 2.96, + "learning_rate": 1.0204881413321213e-05, + "loss": 0.226, + "step": 63515 + }, + { + "epoch": 2.96, + "learning_rate": 1.0204097628266425e-05, + "loss": 0.0084, + "step": 63520 + }, + { + "epoch": 2.96, + "learning_rate": 1.0203313843211638e-05, + "loss": 0.041, + "step": 63525 + }, + { + "epoch": 2.96, + "learning_rate": 1.0202530058156852e-05, + "loss": 0.0369, + "step": 63530 + }, + { + "epoch": 2.96, + "learning_rate": 1.0201746273102064e-05, + "loss": 0.0958, + "step": 63535 + }, + { + "epoch": 2.96, + "learning_rate": 1.020096248804728e-05, + "loss": 0.1259, + "step": 63540 + }, + { + "epoch": 2.97, + "learning_rate": 1.0200178702992492e-05, + "loss": 0.1039, + "step": 63545 + }, + { + "epoch": 2.97, + "learning_rate": 1.0199394917937706e-05, + "loss": 0.1852, + "step": 63550 + }, + { + "epoch": 2.97, + "learning_rate": 1.0198611132882918e-05, + "loss": 0.1922, + "step": 63555 + }, + { + "epoch": 2.97, + "learning_rate": 1.0197827347828133e-05, + "loss": 0.2911, + "step": 63560 + }, + { + "epoch": 2.97, + "learning_rate": 1.0197043562773346e-05, + "loss": 0.2036, + "step": 63565 + }, + { + "epoch": 2.97, + "learning_rate": 1.019625977771856e-05, + "loss": 0.0676, + "step": 63570 + }, + { + "epoch": 2.97, + "learning_rate": 1.0195475992663773e-05, + "loss": 0.0323, + "step": 63575 + }, + { + "epoch": 2.97, + "learning_rate": 1.0194692207608987e-05, + "loss": 0.0438, + "step": 63580 + }, + { + "epoch": 2.97, + "learning_rate": 1.01939084225542e-05, + "loss": 0.0627, + "step": 63585 + }, + { + "epoch": 2.97, + "learning_rate": 1.0193124637499412e-05, + "loss": 0.1465, + "step": 63590 + }, + { + "epoch": 2.97, + "learning_rate": 1.0192340852444627e-05, + "loss": 0.0959, + "step": 63595 + }, + { + "epoch": 2.97, + "learning_rate": 1.019155706738984e-05, + "loss": 0.0919, + "step": 63600 + }, + { + "epoch": 2.97, + "learning_rate": 1.0190773282335053e-05, + "loss": 0.194, + "step": 63605 + }, + { + "epoch": 2.97, + "learning_rate": 1.0189989497280266e-05, + "loss": 0.1797, + "step": 63610 + }, + { + "epoch": 2.97, + "learning_rate": 1.0189205712225481e-05, + "loss": 0.2537, + "step": 63615 + }, + { + "epoch": 2.97, + "learning_rate": 1.0188421927170693e-05, + "loss": 0.0516, + "step": 63620 + }, + { + "epoch": 2.97, + "learning_rate": 1.0187638142115907e-05, + "loss": 0.0706, + "step": 63625 + }, + { + "epoch": 2.97, + "learning_rate": 1.018685435706112e-05, + "loss": 0.0674, + "step": 63630 + }, + { + "epoch": 2.97, + "learning_rate": 1.0186070572006335e-05, + "loss": 0.0511, + "step": 63635 + }, + { + "epoch": 2.97, + "learning_rate": 1.0185286786951547e-05, + "loss": 0.0479, + "step": 63640 + }, + { + "epoch": 2.97, + "learning_rate": 1.0184503001896761e-05, + "loss": 0.1024, + "step": 63645 + }, + { + "epoch": 2.97, + "learning_rate": 1.0183719216841973e-05, + "loss": 0.0979, + "step": 63650 + }, + { + "epoch": 2.97, + "learning_rate": 1.0182935431787186e-05, + "loss": 0.1912, + "step": 63655 + }, + { + "epoch": 2.97, + "learning_rate": 1.0182151646732401e-05, + "loss": 0.23, + "step": 63660 + }, + { + "epoch": 2.97, + "learning_rate": 1.0181367861677613e-05, + "loss": 0.3174, + "step": 63665 + }, + { + "epoch": 2.97, + "learning_rate": 1.0180584076622827e-05, + "loss": 0.0348, + "step": 63670 + }, + { + "epoch": 2.97, + "learning_rate": 1.0179800291568041e-05, + "loss": 0.0672, + "step": 63675 + }, + { + "epoch": 2.97, + "learning_rate": 1.0179016506513255e-05, + "loss": 0.0229, + "step": 63680 + }, + { + "epoch": 2.97, + "learning_rate": 1.0178232721458467e-05, + "loss": 0.0771, + "step": 63685 + }, + { + "epoch": 2.97, + "learning_rate": 1.0177448936403681e-05, + "loss": 0.114, + "step": 63690 + }, + { + "epoch": 2.97, + "learning_rate": 1.0176665151348895e-05, + "loss": 0.0955, + "step": 63695 + }, + { + "epoch": 2.97, + "learning_rate": 1.0175881366294109e-05, + "loss": 0.0727, + "step": 63700 + }, + { + "epoch": 2.97, + "learning_rate": 1.0175097581239321e-05, + "loss": 0.0753, + "step": 63705 + }, + { + "epoch": 2.97, + "learning_rate": 1.0174313796184537e-05, + "loss": 0.1925, + "step": 63710 + }, + { + "epoch": 2.97, + "learning_rate": 1.0173530011129749e-05, + "loss": 0.2355, + "step": 63715 + }, + { + "epoch": 2.97, + "learning_rate": 1.0172746226074961e-05, + "loss": 0.0492, + "step": 63720 + }, + { + "epoch": 2.97, + "learning_rate": 1.0171962441020175e-05, + "loss": 0.033, + "step": 63725 + }, + { + "epoch": 2.97, + "learning_rate": 1.0171178655965387e-05, + "loss": 0.0612, + "step": 63730 + }, + { + "epoch": 2.97, + "learning_rate": 1.0170394870910603e-05, + "loss": 0.0285, + "step": 63735 + }, + { + "epoch": 2.97, + "learning_rate": 1.0169611085855815e-05, + "loss": 0.0306, + "step": 63740 + }, + { + "epoch": 2.97, + "learning_rate": 1.016882730080103e-05, + "loss": 0.0759, + "step": 63745 + }, + { + "epoch": 2.97, + "learning_rate": 1.0168043515746241e-05, + "loss": 0.1336, + "step": 63750 + }, + { + "epoch": 2.97, + "learning_rate": 1.0167259730691457e-05, + "loss": 0.2051, + "step": 63755 + }, + { + "epoch": 2.98, + "learning_rate": 1.016647594563667e-05, + "loss": 0.1794, + "step": 63760 + }, + { + "epoch": 2.98, + "learning_rate": 1.0165692160581883e-05, + "loss": 0.2222, + "step": 63765 + }, + { + "epoch": 2.98, + "learning_rate": 1.0164908375527095e-05, + "loss": 0.0448, + "step": 63770 + }, + { + "epoch": 2.98, + "learning_rate": 1.0164124590472311e-05, + "loss": 0.1489, + "step": 63775 + }, + { + "epoch": 2.98, + "learning_rate": 1.0163340805417523e-05, + "loss": 0.0714, + "step": 63780 + }, + { + "epoch": 2.98, + "learning_rate": 1.0162557020362735e-05, + "loss": 0.1082, + "step": 63785 + }, + { + "epoch": 2.98, + "learning_rate": 1.0161773235307951e-05, + "loss": 0.0898, + "step": 63790 + }, + { + "epoch": 2.98, + "learning_rate": 1.0160989450253163e-05, + "loss": 0.0668, + "step": 63795 + }, + { + "epoch": 2.98, + "learning_rate": 1.0160205665198377e-05, + "loss": 0.0864, + "step": 63800 + }, + { + "epoch": 2.98, + "learning_rate": 1.015942188014359e-05, + "loss": 0.1504, + "step": 63805 + }, + { + "epoch": 2.98, + "learning_rate": 1.0158638095088805e-05, + "loss": 0.1931, + "step": 63810 + }, + { + "epoch": 2.98, + "learning_rate": 1.0157854310034017e-05, + "loss": 0.1701, + "step": 63815 + }, + { + "epoch": 2.98, + "learning_rate": 1.0157070524979231e-05, + "loss": 0.0198, + "step": 63820 + }, + { + "epoch": 2.98, + "learning_rate": 1.0156286739924443e-05, + "loss": 0.064, + "step": 63825 + }, + { + "epoch": 2.98, + "learning_rate": 1.0155502954869659e-05, + "loss": 0.0866, + "step": 63830 + }, + { + "epoch": 2.98, + "learning_rate": 1.0154719169814871e-05, + "loss": 0.0517, + "step": 63835 + }, + { + "epoch": 2.98, + "learning_rate": 1.0153935384760085e-05, + "loss": 0.0199, + "step": 63840 + }, + { + "epoch": 2.98, + "learning_rate": 1.0153151599705297e-05, + "loss": 0.1171, + "step": 63845 + }, + { + "epoch": 2.98, + "learning_rate": 1.015236781465051e-05, + "loss": 0.1281, + "step": 63850 + }, + { + "epoch": 2.98, + "learning_rate": 1.0151584029595725e-05, + "loss": 0.1711, + "step": 63855 + }, + { + "epoch": 2.98, + "learning_rate": 1.0150800244540937e-05, + "loss": 0.1402, + "step": 63860 + }, + { + "epoch": 2.98, + "learning_rate": 1.0150016459486151e-05, + "loss": 0.2753, + "step": 63865 + }, + { + "epoch": 2.98, + "learning_rate": 1.0149232674431363e-05, + "loss": 0.0264, + "step": 63870 + }, + { + "epoch": 2.98, + "learning_rate": 1.0148448889376579e-05, + "loss": 0.0742, + "step": 63875 + }, + { + "epoch": 2.98, + "learning_rate": 1.0147665104321791e-05, + "loss": 0.0883, + "step": 63880 + }, + { + "epoch": 2.98, + "learning_rate": 1.0146881319267005e-05, + "loss": 0.059, + "step": 63885 + }, + { + "epoch": 2.98, + "learning_rate": 1.0146097534212219e-05, + "loss": 0.0566, + "step": 63890 + }, + { + "epoch": 2.98, + "learning_rate": 1.0145313749157433e-05, + "loss": 0.2492, + "step": 63895 + }, + { + "epoch": 2.98, + "learning_rate": 1.0144529964102645e-05, + "loss": 0.1694, + "step": 63900 + }, + { + "epoch": 2.98, + "learning_rate": 1.0143746179047859e-05, + "loss": 0.1478, + "step": 63905 + }, + { + "epoch": 2.98, + "learning_rate": 1.0142962393993073e-05, + "loss": 0.2933, + "step": 63910 + }, + { + "epoch": 2.98, + "learning_rate": 1.0142178608938285e-05, + "loss": 0.3151, + "step": 63915 + }, + { + "epoch": 2.98, + "learning_rate": 1.0141394823883499e-05, + "loss": 0.0621, + "step": 63920 + }, + { + "epoch": 2.98, + "learning_rate": 1.0140611038828711e-05, + "loss": 0.0457, + "step": 63925 + }, + { + "epoch": 2.98, + "learning_rate": 1.0139827253773927e-05, + "loss": 0.0642, + "step": 63930 + }, + { + "epoch": 2.98, + "learning_rate": 1.0139043468719139e-05, + "loss": 0.091, + "step": 63935 + }, + { + "epoch": 2.98, + "learning_rate": 1.0138259683664353e-05, + "loss": 0.078, + "step": 63940 + }, + { + "epoch": 2.98, + "learning_rate": 1.0137475898609565e-05, + "loss": 0.0961, + "step": 63945 + }, + { + "epoch": 2.98, + "learning_rate": 1.013669211355478e-05, + "loss": 0.212, + "step": 63950 + }, + { + "epoch": 2.98, + "learning_rate": 1.0135908328499993e-05, + "loss": 0.1122, + "step": 63955 + }, + { + "epoch": 2.98, + "learning_rate": 1.0135124543445207e-05, + "loss": 0.1807, + "step": 63960 + }, + { + "epoch": 2.98, + "learning_rate": 1.0134340758390419e-05, + "loss": 0.216, + "step": 63965 + }, + { + "epoch": 2.98, + "learning_rate": 1.0133556973335635e-05, + "loss": 0.0358, + "step": 63970 + }, + { + "epoch": 2.99, + "learning_rate": 1.0132773188280847e-05, + "loss": 0.291, + "step": 63975 + }, + { + "epoch": 2.99, + "learning_rate": 1.0131989403226059e-05, + "loss": 0.0242, + "step": 63980 + }, + { + "epoch": 2.99, + "learning_rate": 1.0131205618171273e-05, + "loss": 0.1145, + "step": 63985 + }, + { + "epoch": 2.99, + "learning_rate": 1.0130421833116487e-05, + "loss": 0.0473, + "step": 63990 + }, + { + "epoch": 2.99, + "learning_rate": 1.01296380480617e-05, + "loss": 0.1068, + "step": 63995 + }, + { + "epoch": 2.99, + "learning_rate": 1.0128854263006913e-05, + "loss": 0.0515, + "step": 64000 + }, + { + "epoch": 2.99, + "learning_rate": 1.0128070477952127e-05, + "loss": 0.1869, + "step": 64005 + }, + { + "epoch": 2.99, + "learning_rate": 1.012728669289734e-05, + "loss": 0.2932, + "step": 64010 + }, + { + "epoch": 2.99, + "learning_rate": 1.0126502907842555e-05, + "loss": 0.3333, + "step": 64015 + }, + { + "epoch": 2.99, + "learning_rate": 1.0125719122787767e-05, + "loss": 0.0225, + "step": 64020 + }, + { + "epoch": 2.99, + "learning_rate": 1.0124935337732983e-05, + "loss": 0.0358, + "step": 64025 + }, + { + "epoch": 2.99, + "learning_rate": 1.0124151552678195e-05, + "loss": 0.0832, + "step": 64030 + }, + { + "epoch": 2.99, + "learning_rate": 1.0123367767623409e-05, + "loss": 0.0553, + "step": 64035 + }, + { + "epoch": 2.99, + "learning_rate": 1.012258398256862e-05, + "loss": 0.0834, + "step": 64040 + }, + { + "epoch": 2.99, + "learning_rate": 1.0121800197513833e-05, + "loss": 0.1044, + "step": 64045 + }, + { + "epoch": 2.99, + "learning_rate": 1.0121016412459049e-05, + "loss": 0.1887, + "step": 64050 + }, + { + "epoch": 2.99, + "learning_rate": 1.0120232627404261e-05, + "loss": 0.2406, + "step": 64055 + }, + { + "epoch": 2.99, + "learning_rate": 1.0119448842349475e-05, + "loss": 0.1824, + "step": 64060 + }, + { + "epoch": 2.99, + "learning_rate": 1.0118665057294687e-05, + "loss": 0.3448, + "step": 64065 + }, + { + "epoch": 2.99, + "learning_rate": 1.0117881272239903e-05, + "loss": 0.0214, + "step": 64070 + }, + { + "epoch": 2.99, + "learning_rate": 1.0117097487185115e-05, + "loss": 0.0121, + "step": 64075 + }, + { + "epoch": 2.99, + "learning_rate": 1.0116313702130329e-05, + "loss": 0.0553, + "step": 64080 + }, + { + "epoch": 2.99, + "learning_rate": 1.0115529917075541e-05, + "loss": 0.0827, + "step": 64085 + }, + { + "epoch": 2.99, + "learning_rate": 1.0114746132020757e-05, + "loss": 0.0886, + "step": 64090 + }, + { + "epoch": 2.99, + "learning_rate": 1.0113962346965969e-05, + "loss": 0.0533, + "step": 64095 + }, + { + "epoch": 2.99, + "learning_rate": 1.0113178561911183e-05, + "loss": 0.143, + "step": 64100 + }, + { + "epoch": 2.99, + "learning_rate": 1.0112394776856397e-05, + "loss": 0.1127, + "step": 64105 + }, + { + "epoch": 2.99, + "learning_rate": 1.0111610991801609e-05, + "loss": 0.2126, + "step": 64110 + }, + { + "epoch": 2.99, + "learning_rate": 1.0110827206746823e-05, + "loss": 0.2579, + "step": 64115 + }, + { + "epoch": 2.99, + "learning_rate": 1.0110043421692035e-05, + "loss": 0.0479, + "step": 64120 + }, + { + "epoch": 2.99, + "learning_rate": 1.010925963663725e-05, + "loss": 0.0203, + "step": 64125 + }, + { + "epoch": 2.99, + "learning_rate": 1.0108475851582463e-05, + "loss": 0.031, + "step": 64130 + }, + { + "epoch": 2.99, + "learning_rate": 1.0107692066527677e-05, + "loss": 0.1182, + "step": 64135 + }, + { + "epoch": 2.99, + "learning_rate": 1.0106908281472889e-05, + "loss": 0.059, + "step": 64140 + }, + { + "epoch": 2.99, + "learning_rate": 1.0106124496418104e-05, + "loss": 0.1079, + "step": 64145 + }, + { + "epoch": 2.99, + "learning_rate": 1.0105340711363317e-05, + "loss": 0.0826, + "step": 64150 + }, + { + "epoch": 2.99, + "learning_rate": 1.010455692630853e-05, + "loss": 0.1238, + "step": 64155 + }, + { + "epoch": 2.99, + "learning_rate": 1.0103773141253743e-05, + "loss": 0.1699, + "step": 64160 + }, + { + "epoch": 2.99, + "learning_rate": 1.0102989356198958e-05, + "loss": 0.224, + "step": 64165 + }, + { + "epoch": 2.99, + "learning_rate": 1.010220557114417e-05, + "loss": 0.0326, + "step": 64170 + }, + { + "epoch": 2.99, + "learning_rate": 1.0101421786089383e-05, + "loss": 0.061, + "step": 64175 + }, + { + "epoch": 2.99, + "learning_rate": 1.0100638001034597e-05, + "loss": 0.0567, + "step": 64180 + }, + { + "epoch": 2.99, + "learning_rate": 1.0099854215979809e-05, + "loss": 0.0547, + "step": 64185 + }, + { + "epoch": 3.0, + "learning_rate": 1.0099070430925024e-05, + "loss": 0.0667, + "step": 64190 + }, + { + "epoch": 3.0, + "learning_rate": 1.0098286645870237e-05, + "loss": 0.1063, + "step": 64195 + }, + { + "epoch": 3.0, + "learning_rate": 1.009750286081545e-05, + "loss": 0.1263, + "step": 64200 + }, + { + "epoch": 3.0, + "learning_rate": 1.0096719075760664e-05, + "loss": 0.1766, + "step": 64205 + }, + { + "epoch": 3.0, + "learning_rate": 1.0095935290705878e-05, + "loss": 0.2074, + "step": 64210 + }, + { + "epoch": 3.0, + "learning_rate": 1.009515150565109e-05, + "loss": 0.2975, + "step": 64215 + }, + { + "epoch": 3.0, + "learning_rate": 1.0094367720596305e-05, + "loss": 0.0328, + "step": 64220 + }, + { + "epoch": 3.0, + "learning_rate": 1.0093583935541518e-05, + "loss": 0.0301, + "step": 64225 + }, + { + "epoch": 3.0, + "learning_rate": 1.0092800150486732e-05, + "loss": 0.051, + "step": 64230 + }, + { + "epoch": 3.0, + "learning_rate": 1.0092016365431945e-05, + "loss": 0.0794, + "step": 64235 + }, + { + "epoch": 3.0, + "learning_rate": 1.0091232580377157e-05, + "loss": 0.2081, + "step": 64240 + }, + { + "epoch": 3.0, + "learning_rate": 1.0090448795322372e-05, + "loss": 0.0887, + "step": 64245 + }, + { + "epoch": 3.0, + "learning_rate": 1.0089665010267585e-05, + "loss": 0.0939, + "step": 64250 + }, + { + "epoch": 3.0, + "learning_rate": 1.0088881225212798e-05, + "loss": 0.152, + "step": 64255 + }, + { + "epoch": 3.0, + "learning_rate": 1.008809744015801e-05, + "loss": 0.1754, + "step": 64260 + }, + { + "epoch": 3.0, + "learning_rate": 1.0087313655103226e-05, + "loss": 0.2495, + "step": 64265 + }, + { + "epoch": 3.0, + "learning_rate": 1.0086529870048438e-05, + "loss": 0.0218, + "step": 64270 + }, + { + "epoch": 3.0, + "learning_rate": 1.0085746084993652e-05, + "loss": 0.0405, + "step": 64275 + }, + { + "epoch": 3.0, + "learning_rate": 1.0084962299938865e-05, + "loss": 0.0995, + "step": 64280 + }, + { + "epoch": 3.0, + "learning_rate": 1.008417851488408e-05, + "loss": 0.1217, + "step": 64285 + }, + { + "epoch": 3.0, + "learning_rate": 1.0083394729829292e-05, + "loss": 0.1712, + "step": 64290 + }, + { + "epoch": 3.0, + "eval_cer": 0.013233947127380689, + "eval_loss": 0.03575053811073303, + "eval_runtime": 477.3319, + "eval_samples_per_second": 39.909, + "eval_steps_per_second": 4.99, + "eval_wer": 0.11354552183567727, + "step": 64293 + } + ], + "max_steps": 128586, + "num_train_epochs": 6, + "total_flos": 7.718671206987888e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-64293/training_args.bin b/checkpoint-64293/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..36cc7cb27194c4763ad57ba9f820c49b1d0a2bcf --- /dev/null +++ b/checkpoint-64293/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35a655ca2fa82ac80a7162e5149caad102a189b97deb1fba1f94f21e15657a07 +size 3055 diff --git a/checkpoint-85724/config.json b/checkpoint-85724/config.json new file mode 100644 index 0000000000000000000000000000000000000000..382a3e79497e514ac876eee8114c7079c255a204 --- /dev/null +++ b/checkpoint-85724/config.json @@ -0,0 +1,109 @@ +{ + "_name_or_path": "facebook/wav2vec2-base-960h", + "activation_dropout": 0.1, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForCTC" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "group", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.1, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 12, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 12, + "num_negatives": 100, + "output_hidden_size": 768, + "pad_token_id": 0, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.18.0", + "use_weighted_layer_sum": false, + "vocab_size": 32, + "xvector_output_dim": 512 +} diff --git a/checkpoint-85724/optimizer.pt b/checkpoint-85724/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..97f00a57b411a4fe67eeb8f4f5bb501bab6b737e --- /dev/null +++ b/checkpoint-85724/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a235f877a16cc3cc481fb007efabb23402c955930dd66740e578cafe95e626e +size 1847865 diff --git a/checkpoint-85724/preprocessor_config.json b/checkpoint-85724/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a0b7227fc1d916e469b14f6c154ad6dfea1e6891 --- /dev/null +++ b/checkpoint-85724/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-85724/pytorch_model.bin b/checkpoint-85724/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..0523ca92902bf02a8bbec420436b52502b46e9f2 --- /dev/null +++ b/checkpoint-85724/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e9058ea24ca21d517ab6f94e25cf3eb81964c10fead6db6a2ac15ce0811969 +size 377656855 diff --git a/checkpoint-85724/rng_state.pth b/checkpoint-85724/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0dfc6303f4839139646d656a51b1581dda538ce7 --- /dev/null +++ b/checkpoint-85724/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd1df3868dc7c2780c025b5eb7d132a5557c1ecd5bea563e8b403134ca549d3d +size 14503 diff --git a/checkpoint-85724/scaler.pt b/checkpoint-85724/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2f8acbc584b4f88f0efc89e112eb7cb3f3e8851 --- /dev/null +++ b/checkpoint-85724/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc7c2b08ac7276e96be9a632cacd23f52961a7ac34826dce1e6336577087994d +size 559 diff --git a/checkpoint-85724/scheduler.pt b/checkpoint-85724/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d8b8e4c4218795db9e7da6c97753d35432b4e06 --- /dev/null +++ b/checkpoint-85724/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c3f795e18f84e343b6335c3d818ecfcf1febb50f75f65f332a004fb0807c5ec +size 623 diff --git a/checkpoint-85724/trainer_state.json b/checkpoint-85724/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7f332b3a997c7a13e3aad7bbee8076a85c8316f7 --- /dev/null +++ b/checkpoint-85724/trainer_state.json @@ -0,0 +1,102920 @@ +{ + "best_metric": 0.03575053811073303, + "best_model_checkpoint": "wav2vec2-base-pem123-960h-la/checkpoint-64293", + "epoch": 4.0, + "global_step": 85724, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 6.000000000000001e-08, + "loss": 2.6531, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.6e-07, + "loss": 3.4824, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 2.6e-07, + "loss": 3.2682, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 3.6e-07, + "loss": 3.2567, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.6000000000000004e-07, + "loss": 3.5979, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 5.6e-07, + "loss": 3.3327, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 6.6e-07, + "loss": 3.7519, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 7.6e-07, + "loss": 3.5748, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 8.6e-07, + "loss": 3.5357, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 9.400000000000001e-07, + "loss": 3.4531, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.04e-06, + "loss": 2.5381, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 1.14e-06, + "loss": 2.9048, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 1.2400000000000002e-06, + "loss": 3.0937, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 1.34e-06, + "loss": 3.0091, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 1.44e-06, + "loss": 2.8452, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.54e-06, + "loss": 2.6674, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 1.6400000000000002e-06, + "loss": 2.9619, + "step": 85 + }, + { + "epoch": 0.0, + "learning_rate": 1.74e-06, + "loss": 2.7327, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 1.8400000000000002e-06, + "loss": 2.7925, + "step": 95 + }, + { + "epoch": 0.0, + "learning_rate": 1.94e-06, + "loss": 3.0929, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.04e-06, + "loss": 1.7821, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 2.1400000000000003e-06, + "loss": 1.9388, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 2.24e-06, + "loss": 2.1683, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 2.3400000000000005e-06, + "loss": 1.8805, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 2.4400000000000004e-06, + "loss": 2.0734, + "step": 125 + }, + { + "epoch": 0.01, + "learning_rate": 2.5400000000000002e-06, + "loss": 2.0576, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 2.64e-06, + "loss": 1.778, + "step": 135 + }, + { + "epoch": 0.01, + "learning_rate": 2.7400000000000004e-06, + "loss": 1.866, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 2.84e-06, + "loss": 2.0255, + "step": 145 + }, + { + "epoch": 0.01, + "learning_rate": 2.9400000000000002e-06, + "loss": 2.1399, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 3.04e-06, + "loss": 1.4145, + "step": 155 + }, + { + "epoch": 0.01, + "learning_rate": 3.1400000000000004e-06, + "loss": 1.2365, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 3.2400000000000003e-06, + "loss": 1.5569, + "step": 165 + }, + { + "epoch": 0.01, + "learning_rate": 3.3400000000000006e-06, + "loss": 1.6138, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 3.44e-06, + "loss": 1.3237, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.54e-06, + "loss": 1.3709, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 3.6400000000000003e-06, + "loss": 1.475, + "step": 185 + }, + { + "epoch": 0.01, + "learning_rate": 3.74e-06, + "loss": 1.5188, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 3.8400000000000005e-06, + "loss": 1.7965, + "step": 195 + }, + { + "epoch": 0.01, + "learning_rate": 3.94e-06, + "loss": 1.9079, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.04e-06, + "loss": 1.1918, + "step": 205 + }, + { + "epoch": 0.01, + "learning_rate": 4.14e-06, + "loss": 0.9466, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 4.24e-06, + "loss": 1.186, + "step": 215 + }, + { + "epoch": 0.01, + "learning_rate": 4.34e-06, + "loss": 1.1864, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 4.440000000000001e-06, + "loss": 1.1844, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.540000000000001e-06, + "loss": 1.2449, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 4.6400000000000005e-06, + "loss": 1.5866, + "step": 235 + }, + { + "epoch": 0.01, + "learning_rate": 4.74e-06, + "loss": 1.3059, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 4.84e-06, + "loss": 1.4398, + "step": 245 + }, + { + "epoch": 0.01, + "learning_rate": 4.94e-06, + "loss": 1.8654, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.04e-06, + "loss": 1.2339, + "step": 255 + }, + { + "epoch": 0.01, + "learning_rate": 5.140000000000001e-06, + "loss": 0.8202, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 5.240000000000001e-06, + "loss": 1.151, + "step": 265 + }, + { + "epoch": 0.01, + "learning_rate": 5.3400000000000005e-06, + "loss": 1.1299, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 5.4400000000000004e-06, + "loss": 1.154, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.540000000000001e-06, + "loss": 1.2657, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 5.64e-06, + "loss": 1.3412, + "step": 285 + }, + { + "epoch": 0.01, + "learning_rate": 5.72e-06, + "loss": 1.2532, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 5.82e-06, + "loss": 1.5254, + "step": 295 + }, + { + "epoch": 0.01, + "learning_rate": 5.92e-06, + "loss": 1.9021, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.02e-06, + "loss": 1.2932, + "step": 305 + }, + { + "epoch": 0.01, + "learning_rate": 6.120000000000001e-06, + "loss": 0.882, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 6.220000000000001e-06, + "loss": 0.8607, + "step": 315 + }, + { + "epoch": 0.01, + "learning_rate": 6.3200000000000005e-06, + "loss": 0.9375, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 6.42e-06, + "loss": 1.0688, + "step": 325 + }, + { + "epoch": 0.02, + "learning_rate": 6.520000000000001e-06, + "loss": 1.0282, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 6.620000000000001e-06, + "loss": 1.1712, + "step": 335 + }, + { + "epoch": 0.02, + "learning_rate": 6.720000000000001e-06, + "loss": 1.3186, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 6.820000000000001e-06, + "loss": 1.3102, + "step": 345 + }, + { + "epoch": 0.02, + "learning_rate": 6.9e-06, + "loss": 2.0291, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 7e-06, + "loss": 1.0834, + "step": 355 + }, + { + "epoch": 0.02, + "learning_rate": 7.100000000000001e-06, + "loss": 0.7925, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 7.2000000000000005e-06, + "loss": 0.9559, + "step": 365 + }, + { + "epoch": 0.02, + "learning_rate": 7.3e-06, + "loss": 0.9066, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 7.4e-06, + "loss": 1.0408, + "step": 375 + }, + { + "epoch": 0.02, + "learning_rate": 7.500000000000001e-06, + "loss": 1.0672, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 7.600000000000001e-06, + "loss": 1.3249, + "step": 385 + }, + { + "epoch": 0.02, + "learning_rate": 7.7e-06, + "loss": 1.3579, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 7.800000000000002e-06, + "loss": 1.4037, + "step": 395 + }, + { + "epoch": 0.02, + "learning_rate": 7.9e-06, + "loss": 1.5432, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.9596, + "step": 405 + }, + { + "epoch": 0.02, + "learning_rate": 8.1e-06, + "loss": 0.6342, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 8.2e-06, + "loss": 0.8461, + "step": 415 + }, + { + "epoch": 0.02, + "learning_rate": 8.3e-06, + "loss": 0.9826, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 8.400000000000001e-06, + "loss": 0.9279, + "step": 425 + }, + { + "epoch": 0.02, + "learning_rate": 8.5e-06, + "loss": 0.8814, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 8.6e-06, + "loss": 1.1263, + "step": 435 + }, + { + "epoch": 0.02, + "learning_rate": 8.700000000000001e-06, + "loss": 1.0968, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 8.8e-06, + "loss": 1.2043, + "step": 445 + }, + { + "epoch": 0.02, + "learning_rate": 8.900000000000001e-06, + "loss": 1.5603, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 0.9924, + "step": 455 + }, + { + "epoch": 0.02, + "learning_rate": 9.100000000000001e-06, + "loss": 0.7293, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 9.200000000000002e-06, + "loss": 0.7576, + "step": 465 + }, + { + "epoch": 0.02, + "learning_rate": 9.3e-06, + "loss": 0.7923, + "step": 470 + }, + { + "epoch": 0.02, + "learning_rate": 9.4e-06, + "loss": 0.8264, + "step": 475 + }, + { + "epoch": 0.02, + "learning_rate": 9.5e-06, + "loss": 0.8031, + "step": 480 + }, + { + "epoch": 0.02, + "learning_rate": 9.600000000000001e-06, + "loss": 1.2293, + "step": 485 + }, + { + "epoch": 0.02, + "learning_rate": 9.7e-06, + "loss": 0.9651, + "step": 490 + }, + { + "epoch": 0.02, + "learning_rate": 9.800000000000001e-06, + "loss": 1.3314, + "step": 495 + }, + { + "epoch": 0.02, + "learning_rate": 9.9e-06, + "loss": 1.4383, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 1e-05, + "loss": 0.9384, + "step": 505 + }, + { + "epoch": 0.02, + "learning_rate": 1.0100000000000002e-05, + "loss": 0.6344, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 1.02e-05, + "loss": 0.8903, + "step": 515 + }, + { + "epoch": 0.02, + "learning_rate": 1.0300000000000001e-05, + "loss": 0.8112, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 1.04e-05, + "loss": 0.9797, + "step": 525 + }, + { + "epoch": 0.02, + "learning_rate": 1.0500000000000001e-05, + "loss": 0.7961, + "step": 530 + }, + { + "epoch": 0.02, + "learning_rate": 1.0600000000000002e-05, + "loss": 1.0021, + "step": 535 + }, + { + "epoch": 0.03, + "learning_rate": 1.0700000000000001e-05, + "loss": 1.111, + "step": 540 + }, + { + "epoch": 0.03, + "learning_rate": 1.0800000000000002e-05, + "loss": 1.0121, + "step": 545 + }, + { + "epoch": 0.03, + "learning_rate": 1.0900000000000002e-05, + "loss": 1.3162, + "step": 550 + }, + { + "epoch": 0.03, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.8775, + "step": 555 + }, + { + "epoch": 0.03, + "learning_rate": 1.1100000000000002e-05, + "loss": 0.6268, + "step": 560 + }, + { + "epoch": 0.03, + "learning_rate": 1.1200000000000001e-05, + "loss": 0.6093, + "step": 565 + }, + { + "epoch": 0.03, + "learning_rate": 1.13e-05, + "loss": 0.6371, + "step": 570 + }, + { + "epoch": 0.03, + "learning_rate": 1.14e-05, + "loss": 0.7299, + "step": 575 + }, + { + "epoch": 0.03, + "learning_rate": 1.15e-05, + "loss": 0.8892, + "step": 580 + }, + { + "epoch": 0.03, + "learning_rate": 1.16e-05, + "loss": 0.8902, + "step": 585 + }, + { + "epoch": 0.03, + "learning_rate": 1.17e-05, + "loss": 1.1263, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 1.18e-05, + "loss": 1.2628, + "step": 595 + }, + { + "epoch": 0.03, + "learning_rate": 1.1900000000000001e-05, + "loss": 1.4236, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 0.8066, + "step": 605 + }, + { + "epoch": 0.03, + "learning_rate": 1.2100000000000001e-05, + "loss": 0.6171, + "step": 610 + }, + { + "epoch": 0.03, + "learning_rate": 1.22e-05, + "loss": 0.6193, + "step": 615 + }, + { + "epoch": 0.03, + "learning_rate": 1.23e-05, + "loss": 0.7038, + "step": 620 + }, + { + "epoch": 0.03, + "learning_rate": 1.2400000000000002e-05, + "loss": 0.7382, + "step": 625 + }, + { + "epoch": 0.03, + "learning_rate": 1.25e-05, + "loss": 0.8153, + "step": 630 + }, + { + "epoch": 0.03, + "learning_rate": 1.2600000000000001e-05, + "loss": 0.8639, + "step": 635 + }, + { + "epoch": 0.03, + "learning_rate": 1.27e-05, + "loss": 0.985, + "step": 640 + }, + { + "epoch": 0.03, + "learning_rate": 1.2800000000000001e-05, + "loss": 0.9144, + "step": 645 + }, + { + "epoch": 0.03, + "learning_rate": 1.2900000000000002e-05, + "loss": 1.2459, + "step": 650 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.8517, + "step": 655 + }, + { + "epoch": 0.03, + "learning_rate": 1.3100000000000002e-05, + "loss": 0.4846, + "step": 660 + }, + { + "epoch": 0.03, + "learning_rate": 1.3200000000000002e-05, + "loss": 0.5826, + "step": 665 + }, + { + "epoch": 0.03, + "learning_rate": 1.3300000000000001e-05, + "loss": 0.7343, + "step": 670 + }, + { + "epoch": 0.03, + "learning_rate": 1.3400000000000002e-05, + "loss": 0.7328, + "step": 675 + }, + { + "epoch": 0.03, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.6546, + "step": 680 + }, + { + "epoch": 0.03, + "learning_rate": 1.3600000000000002e-05, + "loss": 0.8793, + "step": 685 + }, + { + "epoch": 0.03, + "learning_rate": 1.3700000000000003e-05, + "loss": 0.8999, + "step": 690 + }, + { + "epoch": 0.03, + "learning_rate": 1.38e-05, + "loss": 1.1491, + "step": 695 + }, + { + "epoch": 0.03, + "learning_rate": 1.39e-05, + "loss": 1.377, + "step": 700 + }, + { + "epoch": 0.03, + "learning_rate": 1.4e-05, + "loss": 0.7843, + "step": 705 + }, + { + "epoch": 0.03, + "learning_rate": 1.41e-05, + "loss": 0.622, + "step": 710 + }, + { + "epoch": 0.03, + "learning_rate": 1.4200000000000001e-05, + "loss": 0.5346, + "step": 715 + }, + { + "epoch": 0.03, + "learning_rate": 1.43e-05, + "loss": 0.6517, + "step": 720 + }, + { + "epoch": 0.03, + "learning_rate": 1.4400000000000001e-05, + "loss": 0.6661, + "step": 725 + }, + { + "epoch": 0.03, + "learning_rate": 1.45e-05, + "loss": 0.7379, + "step": 730 + }, + { + "epoch": 0.03, + "learning_rate": 1.46e-05, + "loss": 0.7839, + "step": 735 + }, + { + "epoch": 0.03, + "learning_rate": 1.4700000000000002e-05, + "loss": 1.0163, + "step": 740 + }, + { + "epoch": 0.03, + "learning_rate": 1.48e-05, + "loss": 0.9786, + "step": 745 + }, + { + "epoch": 0.03, + "learning_rate": 1.4900000000000001e-05, + "loss": 1.2756, + "step": 750 + }, + { + "epoch": 0.04, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.8289, + "step": 755 + }, + { + "epoch": 0.04, + "learning_rate": 1.5100000000000001e-05, + "loss": 0.5909, + "step": 760 + }, + { + "epoch": 0.04, + "learning_rate": 1.5200000000000002e-05, + "loss": 0.5347, + "step": 765 + }, + { + "epoch": 0.04, + "learning_rate": 1.5300000000000003e-05, + "loss": 0.7078, + "step": 770 + }, + { + "epoch": 0.04, + "learning_rate": 1.54e-05, + "loss": 0.6262, + "step": 775 + }, + { + "epoch": 0.04, + "learning_rate": 1.55e-05, + "loss": 0.8401, + "step": 780 + }, + { + "epoch": 0.04, + "learning_rate": 1.5600000000000003e-05, + "loss": 0.6788, + "step": 785 + }, + { + "epoch": 0.04, + "learning_rate": 1.5700000000000002e-05, + "loss": 0.9063, + "step": 790 + }, + { + "epoch": 0.04, + "learning_rate": 1.58e-05, + "loss": 0.9448, + "step": 795 + }, + { + "epoch": 0.04, + "learning_rate": 1.5900000000000004e-05, + "loss": 1.3078, + "step": 800 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.7461, + "step": 805 + }, + { + "epoch": 0.04, + "learning_rate": 1.6100000000000002e-05, + "loss": 0.4522, + "step": 810 + }, + { + "epoch": 0.04, + "learning_rate": 1.62e-05, + "loss": 0.5883, + "step": 815 + }, + { + "epoch": 0.04, + "learning_rate": 1.63e-05, + "loss": 0.5923, + "step": 820 + }, + { + "epoch": 0.04, + "learning_rate": 1.64e-05, + "loss": 0.7269, + "step": 825 + }, + { + "epoch": 0.04, + "learning_rate": 1.65e-05, + "loss": 0.6916, + "step": 830 + }, + { + "epoch": 0.04, + "learning_rate": 1.66e-05, + "loss": 0.6976, + "step": 835 + }, + { + "epoch": 0.04, + "learning_rate": 1.67e-05, + "loss": 0.953, + "step": 840 + }, + { + "epoch": 0.04, + "learning_rate": 1.6800000000000002e-05, + "loss": 0.952, + "step": 845 + }, + { + "epoch": 0.04, + "learning_rate": 1.69e-05, + "loss": 1.4978, + "step": 850 + }, + { + "epoch": 0.04, + "learning_rate": 1.7e-05, + "loss": 0.8314, + "step": 855 + }, + { + "epoch": 0.04, + "learning_rate": 1.7100000000000002e-05, + "loss": 0.4179, + "step": 860 + }, + { + "epoch": 0.04, + "learning_rate": 1.72e-05, + "loss": 0.5123, + "step": 865 + }, + { + "epoch": 0.04, + "learning_rate": 1.73e-05, + "loss": 0.528, + "step": 870 + }, + { + "epoch": 0.04, + "learning_rate": 1.7400000000000003e-05, + "loss": 0.6553, + "step": 875 + }, + { + "epoch": 0.04, + "learning_rate": 1.7500000000000002e-05, + "loss": 0.8417, + "step": 880 + }, + { + "epoch": 0.04, + "learning_rate": 1.76e-05, + "loss": 0.7153, + "step": 885 + }, + { + "epoch": 0.04, + "learning_rate": 1.77e-05, + "loss": 0.6923, + "step": 890 + }, + { + "epoch": 0.04, + "learning_rate": 1.7800000000000002e-05, + "loss": 0.8491, + "step": 895 + }, + { + "epoch": 0.04, + "learning_rate": 1.79e-05, + "loss": 1.1041, + "step": 900 + }, + { + "epoch": 0.04, + "learning_rate": 1.8e-05, + "loss": 0.6685, + "step": 905 + }, + { + "epoch": 0.04, + "learning_rate": 1.8100000000000003e-05, + "loss": 0.467, + "step": 910 + }, + { + "epoch": 0.04, + "learning_rate": 1.8200000000000002e-05, + "loss": 0.478, + "step": 915 + }, + { + "epoch": 0.04, + "learning_rate": 1.83e-05, + "loss": 0.6318, + "step": 920 + }, + { + "epoch": 0.04, + "learning_rate": 1.8400000000000003e-05, + "loss": 0.5477, + "step": 925 + }, + { + "epoch": 0.04, + "learning_rate": 1.8500000000000002e-05, + "loss": 0.8122, + "step": 930 + }, + { + "epoch": 0.04, + "learning_rate": 1.86e-05, + "loss": 0.7658, + "step": 935 + }, + { + "epoch": 0.04, + "learning_rate": 1.8700000000000004e-05, + "loss": 0.8465, + "step": 940 + }, + { + "epoch": 0.04, + "learning_rate": 1.88e-05, + "loss": 0.8287, + "step": 945 + }, + { + "epoch": 0.04, + "learning_rate": 1.8900000000000002e-05, + "loss": 1.1613, + "step": 950 + }, + { + "epoch": 0.04, + "learning_rate": 1.9e-05, + "loss": 0.5815, + "step": 955 + }, + { + "epoch": 0.04, + "learning_rate": 1.91e-05, + "loss": 0.3932, + "step": 960 + }, + { + "epoch": 0.05, + "learning_rate": 1.9200000000000003e-05, + "loss": 0.3984, + "step": 965 + }, + { + "epoch": 0.05, + "learning_rate": 1.93e-05, + "loss": 0.5436, + "step": 970 + }, + { + "epoch": 0.05, + "learning_rate": 1.94e-05, + "loss": 0.5992, + "step": 975 + }, + { + "epoch": 0.05, + "learning_rate": 1.95e-05, + "loss": 0.6758, + "step": 980 + }, + { + "epoch": 0.05, + "learning_rate": 1.9600000000000002e-05, + "loss": 0.6634, + "step": 985 + }, + { + "epoch": 0.05, + "learning_rate": 1.97e-05, + "loss": 0.8048, + "step": 990 + }, + { + "epoch": 0.05, + "learning_rate": 1.98e-05, + "loss": 1.0593, + "step": 995 + }, + { + "epoch": 0.05, + "learning_rate": 1.9900000000000003e-05, + "loss": 1.3275, + "step": 1000 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 0.745, + "step": 1005 + }, + { + "epoch": 0.05, + "learning_rate": 1.9999216214945216e-05, + "loss": 0.4186, + "step": 1010 + }, + { + "epoch": 0.05, + "learning_rate": 1.999843242989043e-05, + "loss": 0.4657, + "step": 1015 + }, + { + "epoch": 0.05, + "learning_rate": 1.999764864483564e-05, + "loss": 0.3849, + "step": 1020 + }, + { + "epoch": 0.05, + "learning_rate": 1.9996864859780857e-05, + "loss": 0.6111, + "step": 1025 + }, + { + "epoch": 0.05, + "learning_rate": 1.9996081074726068e-05, + "loss": 0.5342, + "step": 1030 + }, + { + "epoch": 0.05, + "learning_rate": 1.999529728967128e-05, + "loss": 0.6535, + "step": 1035 + }, + { + "epoch": 0.05, + "learning_rate": 1.9994513504616496e-05, + "loss": 0.7321, + "step": 1040 + }, + { + "epoch": 0.05, + "learning_rate": 1.999372971956171e-05, + "loss": 0.7966, + "step": 1045 + }, + { + "epoch": 0.05, + "learning_rate": 1.9992945934506923e-05, + "loss": 1.1703, + "step": 1050 + }, + { + "epoch": 0.05, + "learning_rate": 1.9992162149452137e-05, + "loss": 0.6399, + "step": 1055 + }, + { + "epoch": 0.05, + "learning_rate": 1.9991378364397348e-05, + "loss": 0.3462, + "step": 1060 + }, + { + "epoch": 0.05, + "learning_rate": 1.9990594579342565e-05, + "loss": 0.4746, + "step": 1065 + }, + { + "epoch": 0.05, + "learning_rate": 1.9989810794287776e-05, + "loss": 0.4348, + "step": 1070 + }, + { + "epoch": 0.05, + "learning_rate": 1.998902700923299e-05, + "loss": 0.5812, + "step": 1075 + }, + { + "epoch": 0.05, + "learning_rate": 1.9988243224178203e-05, + "loss": 0.5924, + "step": 1080 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987459439123414e-05, + "loss": 0.6898, + "step": 1085 + }, + { + "epoch": 0.05, + "learning_rate": 1.998667565406863e-05, + "loss": 0.7083, + "step": 1090 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985891869013842e-05, + "loss": 0.9508, + "step": 1095 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985108083959056e-05, + "loss": 1.2479, + "step": 1100 + }, + { + "epoch": 0.05, + "learning_rate": 1.998432429890427e-05, + "loss": 0.6903, + "step": 1105 + }, + { + "epoch": 0.05, + "learning_rate": 1.9983540513849483e-05, + "loss": 0.3722, + "step": 1110 + }, + { + "epoch": 0.05, + "learning_rate": 1.9982756728794697e-05, + "loss": 0.4207, + "step": 1115 + }, + { + "epoch": 0.05, + "learning_rate": 1.998197294373991e-05, + "loss": 0.5722, + "step": 1120 + }, + { + "epoch": 0.05, + "learning_rate": 1.9981189158685125e-05, + "loss": 0.5865, + "step": 1125 + }, + { + "epoch": 0.05, + "learning_rate": 1.998040537363034e-05, + "loss": 0.4989, + "step": 1130 + }, + { + "epoch": 0.05, + "learning_rate": 1.997962158857555e-05, + "loss": 0.5998, + "step": 1135 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978837803520764e-05, + "loss": 0.9028, + "step": 1140 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978054018465977e-05, + "loss": 0.8579, + "step": 1145 + }, + { + "epoch": 0.05, + "learning_rate": 1.997727023341119e-05, + "loss": 1.2055, + "step": 1150 + }, + { + "epoch": 0.05, + "learning_rate": 1.9976486448356405e-05, + "loss": 0.6244, + "step": 1155 + }, + { + "epoch": 0.05, + "learning_rate": 1.9975702663301616e-05, + "loss": 0.3915, + "step": 1160 + }, + { + "epoch": 0.05, + "learning_rate": 1.9974918878246833e-05, + "loss": 0.4356, + "step": 1165 + }, + { + "epoch": 0.05, + "learning_rate": 1.9974135093192044e-05, + "loss": 0.5114, + "step": 1170 + }, + { + "epoch": 0.05, + "learning_rate": 1.9973351308137257e-05, + "loss": 0.5035, + "step": 1175 + }, + { + "epoch": 0.06, + "learning_rate": 1.997256752308247e-05, + "loss": 0.6821, + "step": 1180 + }, + { + "epoch": 0.06, + "learning_rate": 1.9971783738027685e-05, + "loss": 0.5494, + "step": 1185 + }, + { + "epoch": 0.06, + "learning_rate": 1.99709999529729e-05, + "loss": 0.6005, + "step": 1190 + }, + { + "epoch": 0.06, + "learning_rate": 1.9970216167918113e-05, + "loss": 0.8884, + "step": 1195 + }, + { + "epoch": 0.06, + "learning_rate": 1.9969432382863324e-05, + "loss": 0.9246, + "step": 1200 + }, + { + "epoch": 0.06, + "learning_rate": 1.9968648597808538e-05, + "loss": 0.5223, + "step": 1205 + }, + { + "epoch": 0.06, + "learning_rate": 1.996786481275375e-05, + "loss": 0.3661, + "step": 1210 + }, + { + "epoch": 0.06, + "learning_rate": 1.9967081027698965e-05, + "loss": 0.5004, + "step": 1215 + }, + { + "epoch": 0.06, + "learning_rate": 1.996629724264418e-05, + "loss": 0.4138, + "step": 1220 + }, + { + "epoch": 0.06, + "learning_rate": 1.9965513457589393e-05, + "loss": 0.6478, + "step": 1225 + }, + { + "epoch": 0.06, + "learning_rate": 1.9964729672534607e-05, + "loss": 0.5358, + "step": 1230 + }, + { + "epoch": 0.06, + "learning_rate": 1.9963945887479818e-05, + "loss": 0.664, + "step": 1235 + }, + { + "epoch": 0.06, + "learning_rate": 1.9963162102425035e-05, + "loss": 0.5272, + "step": 1240 + }, + { + "epoch": 0.06, + "learning_rate": 1.9962378317370245e-05, + "loss": 0.9186, + "step": 1245 + }, + { + "epoch": 0.06, + "learning_rate": 1.996159453231546e-05, + "loss": 1.262, + "step": 1250 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960810747260673e-05, + "loss": 0.5889, + "step": 1255 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960026962205887e-05, + "loss": 0.4323, + "step": 1260 + }, + { + "epoch": 0.06, + "learning_rate": 1.99592431771511e-05, + "loss": 0.3947, + "step": 1265 + }, + { + "epoch": 0.06, + "learning_rate": 1.995845939209631e-05, + "loss": 0.5637, + "step": 1270 + }, + { + "epoch": 0.06, + "learning_rate": 1.9957675607041525e-05, + "loss": 0.4977, + "step": 1275 + }, + { + "epoch": 0.06, + "learning_rate": 1.995689182198674e-05, + "loss": 0.5272, + "step": 1280 + }, + { + "epoch": 0.06, + "learning_rate": 1.9956108036931953e-05, + "loss": 0.5806, + "step": 1285 + }, + { + "epoch": 0.06, + "learning_rate": 1.9955324251877167e-05, + "loss": 0.7725, + "step": 1290 + }, + { + "epoch": 0.06, + "learning_rate": 1.995454046682238e-05, + "loss": 1.0007, + "step": 1295 + }, + { + "epoch": 0.06, + "learning_rate": 1.995375668176759e-05, + "loss": 1.2275, + "step": 1300 + }, + { + "epoch": 0.06, + "learning_rate": 1.995297289671281e-05, + "loss": 0.5902, + "step": 1305 + }, + { + "epoch": 0.06, + "learning_rate": 1.995218911165802e-05, + "loss": 0.3678, + "step": 1310 + }, + { + "epoch": 0.06, + "learning_rate": 1.9951405326603233e-05, + "loss": 0.3997, + "step": 1315 + }, + { + "epoch": 0.06, + "learning_rate": 1.9950621541548447e-05, + "loss": 0.3824, + "step": 1320 + }, + { + "epoch": 0.06, + "learning_rate": 1.994983775649366e-05, + "loss": 0.45, + "step": 1325 + }, + { + "epoch": 0.06, + "learning_rate": 1.9949053971438875e-05, + "loss": 0.4312, + "step": 1330 + }, + { + "epoch": 0.06, + "learning_rate": 1.9948270186384086e-05, + "loss": 0.6568, + "step": 1335 + }, + { + "epoch": 0.06, + "learning_rate": 1.9947486401329303e-05, + "loss": 0.7326, + "step": 1340 + }, + { + "epoch": 0.06, + "learning_rate": 1.9946702616274513e-05, + "loss": 1.6595, + "step": 1345 + }, + { + "epoch": 0.06, + "learning_rate": 1.9945918831219727e-05, + "loss": 1.1587, + "step": 1350 + }, + { + "epoch": 0.06, + "learning_rate": 1.994513504616494e-05, + "loss": 0.5287, + "step": 1355 + }, + { + "epoch": 0.06, + "learning_rate": 1.9944351261110155e-05, + "loss": 0.4248, + "step": 1360 + }, + { + "epoch": 0.06, + "learning_rate": 1.994356747605537e-05, + "loss": 0.4569, + "step": 1365 + }, + { + "epoch": 0.06, + "learning_rate": 1.9942783691000583e-05, + "loss": 0.4401, + "step": 1370 + }, + { + "epoch": 0.06, + "learning_rate": 1.9941999905945793e-05, + "loss": 0.475, + "step": 1375 + }, + { + "epoch": 0.06, + "learning_rate": 1.994121612089101e-05, + "loss": 0.5074, + "step": 1380 + }, + { + "epoch": 0.06, + "learning_rate": 1.994043233583622e-05, + "loss": 0.6305, + "step": 1385 + }, + { + "epoch": 0.06, + "learning_rate": 1.9939648550781435e-05, + "loss": 0.6242, + "step": 1390 + }, + { + "epoch": 0.07, + "learning_rate": 1.993886476572665e-05, + "loss": 0.6831, + "step": 1395 + }, + { + "epoch": 0.07, + "learning_rate": 1.993808098067186e-05, + "loss": 0.9001, + "step": 1400 + }, + { + "epoch": 0.07, + "learning_rate": 1.9937297195617077e-05, + "loss": 0.5865, + "step": 1405 + }, + { + "epoch": 0.07, + "learning_rate": 1.9936513410562287e-05, + "loss": 0.332, + "step": 1410 + }, + { + "epoch": 0.07, + "learning_rate": 1.99357296255075e-05, + "loss": 0.4441, + "step": 1415 + }, + { + "epoch": 0.07, + "learning_rate": 1.9934945840452715e-05, + "loss": 0.3415, + "step": 1420 + }, + { + "epoch": 0.07, + "learning_rate": 1.993416205539793e-05, + "loss": 0.4253, + "step": 1425 + }, + { + "epoch": 0.07, + "learning_rate": 1.9933378270343143e-05, + "loss": 0.594, + "step": 1430 + }, + { + "epoch": 0.07, + "learning_rate": 1.9932594485288357e-05, + "loss": 0.7563, + "step": 1435 + }, + { + "epoch": 0.07, + "learning_rate": 1.993181070023357e-05, + "loss": 0.7389, + "step": 1440 + }, + { + "epoch": 0.07, + "learning_rate": 1.9931026915178785e-05, + "loss": 0.8635, + "step": 1445 + }, + { + "epoch": 0.07, + "learning_rate": 1.9930243130123995e-05, + "loss": 1.0338, + "step": 1450 + }, + { + "epoch": 0.07, + "learning_rate": 1.9929459345069212e-05, + "loss": 0.4737, + "step": 1455 + }, + { + "epoch": 0.07, + "learning_rate": 1.9928675560014423e-05, + "loss": 0.2898, + "step": 1460 + }, + { + "epoch": 0.07, + "learning_rate": 1.9927891774959637e-05, + "loss": 0.3928, + "step": 1465 + }, + { + "epoch": 0.07, + "learning_rate": 1.992710798990485e-05, + "loss": 0.4041, + "step": 1470 + }, + { + "epoch": 0.07, + "learning_rate": 1.992632420485006e-05, + "loss": 0.4331, + "step": 1475 + }, + { + "epoch": 0.07, + "learning_rate": 1.992554041979528e-05, + "loss": 0.5609, + "step": 1480 + }, + { + "epoch": 0.07, + "learning_rate": 1.992475663474049e-05, + "loss": 0.545, + "step": 1485 + }, + { + "epoch": 0.07, + "learning_rate": 1.9923972849685703e-05, + "loss": 0.6846, + "step": 1490 + }, + { + "epoch": 0.07, + "learning_rate": 1.9923189064630917e-05, + "loss": 0.7526, + "step": 1495 + }, + { + "epoch": 0.07, + "learning_rate": 1.992240527957613e-05, + "loss": 1.0243, + "step": 1500 + }, + { + "epoch": 0.07, + "learning_rate": 1.9921621494521345e-05, + "loss": 0.5665, + "step": 1505 + }, + { + "epoch": 0.07, + "learning_rate": 1.992083770946656e-05, + "loss": 0.3, + "step": 1510 + }, + { + "epoch": 0.07, + "learning_rate": 1.992005392441177e-05, + "loss": 0.3819, + "step": 1515 + }, + { + "epoch": 0.07, + "learning_rate": 1.9919270139356986e-05, + "loss": 0.4012, + "step": 1520 + }, + { + "epoch": 0.07, + "learning_rate": 1.9918486354302197e-05, + "loss": 0.4596, + "step": 1525 + }, + { + "epoch": 0.07, + "learning_rate": 1.991770256924741e-05, + "loss": 0.5021, + "step": 1530 + }, + { + "epoch": 0.07, + "learning_rate": 1.9916918784192625e-05, + "loss": 0.4451, + "step": 1535 + }, + { + "epoch": 0.07, + "learning_rate": 1.991613499913784e-05, + "loss": 0.704, + "step": 1540 + }, + { + "epoch": 0.07, + "learning_rate": 1.9915351214083053e-05, + "loss": 0.6392, + "step": 1545 + }, + { + "epoch": 0.07, + "learning_rate": 1.9914567429028263e-05, + "loss": 1.1391, + "step": 1550 + }, + { + "epoch": 0.07, + "learning_rate": 1.991378364397348e-05, + "loss": 0.5388, + "step": 1555 + }, + { + "epoch": 0.07, + "learning_rate": 1.991299985891869e-05, + "loss": 0.3753, + "step": 1560 + }, + { + "epoch": 0.07, + "learning_rate": 1.9912216073863905e-05, + "loss": 0.3058, + "step": 1565 + }, + { + "epoch": 0.07, + "learning_rate": 1.991143228880912e-05, + "loss": 0.4206, + "step": 1570 + }, + { + "epoch": 0.07, + "learning_rate": 1.9910648503754333e-05, + "loss": 0.3922, + "step": 1575 + }, + { + "epoch": 0.07, + "learning_rate": 1.9909864718699547e-05, + "loss": 0.4388, + "step": 1580 + }, + { + "epoch": 0.07, + "learning_rate": 1.990908093364476e-05, + "loss": 0.485, + "step": 1585 + }, + { + "epoch": 0.07, + "learning_rate": 1.990829714858997e-05, + "loss": 0.7441, + "step": 1590 + }, + { + "epoch": 0.07, + "learning_rate": 1.9907513363535185e-05, + "loss": 0.7177, + "step": 1595 + }, + { + "epoch": 0.07, + "learning_rate": 1.99067295784804e-05, + "loss": 1.129, + "step": 1600 + }, + { + "epoch": 0.07, + "learning_rate": 1.9905945793425613e-05, + "loss": 0.4943, + "step": 1605 + }, + { + "epoch": 0.08, + "learning_rate": 1.9905162008370827e-05, + "loss": 0.299, + "step": 1610 + }, + { + "epoch": 0.08, + "learning_rate": 1.9904378223316037e-05, + "loss": 0.4898, + "step": 1615 + }, + { + "epoch": 0.08, + "learning_rate": 1.9903594438261254e-05, + "loss": 0.3973, + "step": 1620 + }, + { + "epoch": 0.08, + "learning_rate": 1.9902810653206465e-05, + "loss": 0.4336, + "step": 1625 + }, + { + "epoch": 0.08, + "learning_rate": 1.990202686815168e-05, + "loss": 0.4543, + "step": 1630 + }, + { + "epoch": 0.08, + "learning_rate": 1.9901243083096893e-05, + "loss": 0.4828, + "step": 1635 + }, + { + "epoch": 0.08, + "learning_rate": 1.9900459298042107e-05, + "loss": 0.7448, + "step": 1640 + }, + { + "epoch": 0.08, + "learning_rate": 1.989967551298732e-05, + "loss": 0.6852, + "step": 1645 + }, + { + "epoch": 0.08, + "learning_rate": 1.9898891727932534e-05, + "loss": 0.9936, + "step": 1650 + }, + { + "epoch": 0.08, + "learning_rate": 1.989810794287775e-05, + "loss": 0.4738, + "step": 1655 + }, + { + "epoch": 0.08, + "learning_rate": 1.989732415782296e-05, + "loss": 0.2747, + "step": 1660 + }, + { + "epoch": 0.08, + "learning_rate": 1.9896540372768173e-05, + "loss": 0.2839, + "step": 1665 + }, + { + "epoch": 0.08, + "learning_rate": 1.9895756587713387e-05, + "loss": 0.3596, + "step": 1670 + }, + { + "epoch": 0.08, + "learning_rate": 1.98949728026586e-05, + "loss": 0.4797, + "step": 1675 + }, + { + "epoch": 0.08, + "learning_rate": 1.9894189017603815e-05, + "loss": 0.4667, + "step": 1680 + }, + { + "epoch": 0.08, + "learning_rate": 1.989340523254903e-05, + "loss": 0.5085, + "step": 1685 + }, + { + "epoch": 0.08, + "learning_rate": 1.989262144749424e-05, + "loss": 0.6464, + "step": 1690 + }, + { + "epoch": 0.08, + "learning_rate": 1.9891837662439456e-05, + "loss": 0.7383, + "step": 1695 + }, + { + "epoch": 0.08, + "learning_rate": 1.9891053877384667e-05, + "loss": 1.1763, + "step": 1700 + }, + { + "epoch": 0.08, + "learning_rate": 1.989027009232988e-05, + "loss": 0.4347, + "step": 1705 + }, + { + "epoch": 0.08, + "learning_rate": 1.9889486307275095e-05, + "loss": 0.3092, + "step": 1710 + }, + { + "epoch": 0.08, + "learning_rate": 1.988870252222031e-05, + "loss": 0.3977, + "step": 1715 + }, + { + "epoch": 0.08, + "learning_rate": 1.9887918737165522e-05, + "loss": 0.3769, + "step": 1720 + }, + { + "epoch": 0.08, + "learning_rate": 1.9887134952110733e-05, + "loss": 0.4335, + "step": 1725 + }, + { + "epoch": 0.08, + "learning_rate": 1.9886351167055947e-05, + "loss": 0.5907, + "step": 1730 + }, + { + "epoch": 0.08, + "learning_rate": 1.988556738200116e-05, + "loss": 0.5505, + "step": 1735 + }, + { + "epoch": 0.08, + "learning_rate": 1.9884783596946375e-05, + "loss": 0.515, + "step": 1740 + }, + { + "epoch": 0.08, + "learning_rate": 1.988399981189159e-05, + "loss": 0.7169, + "step": 1745 + }, + { + "epoch": 0.08, + "learning_rate": 1.9883216026836802e-05, + "loss": 1.006, + "step": 1750 + }, + { + "epoch": 0.08, + "learning_rate": 1.9882432241782016e-05, + "loss": 0.5365, + "step": 1755 + }, + { + "epoch": 0.08, + "learning_rate": 1.988164845672723e-05, + "loss": 0.2144, + "step": 1760 + }, + { + "epoch": 0.08, + "learning_rate": 1.988086467167244e-05, + "loss": 0.4321, + "step": 1765 + }, + { + "epoch": 0.08, + "learning_rate": 1.9880080886617658e-05, + "loss": 0.4422, + "step": 1770 + }, + { + "epoch": 0.08, + "learning_rate": 1.987929710156287e-05, + "loss": 0.3549, + "step": 1775 + }, + { + "epoch": 0.08, + "learning_rate": 1.9878513316508082e-05, + "loss": 0.418, + "step": 1780 + }, + { + "epoch": 0.08, + "learning_rate": 1.9877729531453296e-05, + "loss": 0.5311, + "step": 1785 + }, + { + "epoch": 0.08, + "learning_rate": 1.9876945746398507e-05, + "loss": 0.5819, + "step": 1790 + }, + { + "epoch": 0.08, + "learning_rate": 1.9876161961343724e-05, + "loss": 0.7497, + "step": 1795 + }, + { + "epoch": 0.08, + "learning_rate": 1.9875378176288935e-05, + "loss": 1.267, + "step": 1800 + }, + { + "epoch": 0.08, + "learning_rate": 1.987459439123415e-05, + "loss": 0.5148, + "step": 1805 + }, + { + "epoch": 0.08, + "learning_rate": 1.9873810606179363e-05, + "loss": 0.3745, + "step": 1810 + }, + { + "epoch": 0.08, + "learning_rate": 1.9873026821124576e-05, + "loss": 0.3499, + "step": 1815 + }, + { + "epoch": 0.08, + "learning_rate": 1.987224303606979e-05, + "loss": 0.3857, + "step": 1820 + }, + { + "epoch": 0.09, + "learning_rate": 1.9871459251015004e-05, + "loss": 0.3592, + "step": 1825 + }, + { + "epoch": 0.09, + "learning_rate": 1.9870675465960215e-05, + "loss": 0.4503, + "step": 1830 + }, + { + "epoch": 0.09, + "learning_rate": 1.9869891680905432e-05, + "loss": 0.5993, + "step": 1835 + }, + { + "epoch": 0.09, + "learning_rate": 1.9869107895850643e-05, + "loss": 0.7032, + "step": 1840 + }, + { + "epoch": 0.09, + "learning_rate": 1.9868324110795856e-05, + "loss": 0.7298, + "step": 1845 + }, + { + "epoch": 0.09, + "learning_rate": 1.986754032574107e-05, + "loss": 1.0798, + "step": 1850 + }, + { + "epoch": 0.09, + "learning_rate": 1.9866756540686284e-05, + "loss": 0.5357, + "step": 1855 + }, + { + "epoch": 0.09, + "learning_rate": 1.9865972755631498e-05, + "loss": 0.3394, + "step": 1860 + }, + { + "epoch": 0.09, + "learning_rate": 1.986518897057671e-05, + "loss": 0.2815, + "step": 1865 + }, + { + "epoch": 0.09, + "learning_rate": 1.9864405185521926e-05, + "loss": 0.3833, + "step": 1870 + }, + { + "epoch": 0.09, + "learning_rate": 1.9863621400467137e-05, + "loss": 0.3657, + "step": 1875 + }, + { + "epoch": 0.09, + "learning_rate": 1.986283761541235e-05, + "loss": 0.4627, + "step": 1880 + }, + { + "epoch": 0.09, + "learning_rate": 1.9862053830357564e-05, + "loss": 0.5369, + "step": 1885 + }, + { + "epoch": 0.09, + "learning_rate": 1.9861270045302778e-05, + "loss": 0.6602, + "step": 1890 + }, + { + "epoch": 0.09, + "learning_rate": 1.9860486260247992e-05, + "loss": 0.5755, + "step": 1895 + }, + { + "epoch": 0.09, + "learning_rate": 1.9859702475193206e-05, + "loss": 1.0243, + "step": 1900 + }, + { + "epoch": 0.09, + "learning_rate": 1.9858918690138417e-05, + "loss": 0.6036, + "step": 1905 + }, + { + "epoch": 0.09, + "learning_rate": 1.9858134905083634e-05, + "loss": 0.2902, + "step": 1910 + }, + { + "epoch": 0.09, + "learning_rate": 1.9857351120028844e-05, + "loss": 0.3318, + "step": 1915 + }, + { + "epoch": 0.09, + "learning_rate": 1.9856567334974058e-05, + "loss": 0.3193, + "step": 1920 + }, + { + "epoch": 0.09, + "learning_rate": 1.9855783549919272e-05, + "loss": 0.4973, + "step": 1925 + }, + { + "epoch": 0.09, + "learning_rate": 1.9854999764864483e-05, + "loss": 0.5275, + "step": 1930 + }, + { + "epoch": 0.09, + "learning_rate": 1.98542159798097e-05, + "loss": 0.4504, + "step": 1935 + }, + { + "epoch": 0.09, + "learning_rate": 1.985343219475491e-05, + "loss": 0.528, + "step": 1940 + }, + { + "epoch": 0.09, + "learning_rate": 1.9852648409700124e-05, + "loss": 0.8864, + "step": 1945 + }, + { + "epoch": 0.09, + "learning_rate": 1.985186462464534e-05, + "loss": 0.9087, + "step": 1950 + }, + { + "epoch": 0.09, + "learning_rate": 1.9851080839590552e-05, + "loss": 0.5253, + "step": 1955 + }, + { + "epoch": 0.09, + "learning_rate": 1.9850297054535766e-05, + "loss": 0.3372, + "step": 1960 + }, + { + "epoch": 0.09, + "learning_rate": 1.984951326948098e-05, + "loss": 0.3391, + "step": 1965 + }, + { + "epoch": 0.09, + "learning_rate": 1.9848729484426194e-05, + "loss": 0.3418, + "step": 1970 + }, + { + "epoch": 0.09, + "learning_rate": 1.9847945699371408e-05, + "loss": 0.3695, + "step": 1975 + }, + { + "epoch": 0.09, + "learning_rate": 1.984716191431662e-05, + "loss": 0.4408, + "step": 1980 + }, + { + "epoch": 0.09, + "learning_rate": 1.9846378129261832e-05, + "loss": 0.4869, + "step": 1985 + }, + { + "epoch": 0.09, + "learning_rate": 1.9845594344207046e-05, + "loss": 0.4849, + "step": 1990 + }, + { + "epoch": 0.09, + "learning_rate": 1.984481055915226e-05, + "loss": 0.6679, + "step": 1995 + }, + { + "epoch": 0.09, + "learning_rate": 1.9844026774097474e-05, + "loss": 0.9504, + "step": 2000 + }, + { + "epoch": 0.09, + "learning_rate": 1.9843242989042685e-05, + "loss": 0.4752, + "step": 2005 + }, + { + "epoch": 0.09, + "learning_rate": 1.9842459203987902e-05, + "loss": 0.2645, + "step": 2010 + }, + { + "epoch": 0.09, + "learning_rate": 1.9841675418933112e-05, + "loss": 0.2796, + "step": 2015 + }, + { + "epoch": 0.09, + "learning_rate": 1.9840891633878326e-05, + "loss": 0.4134, + "step": 2020 + }, + { + "epoch": 0.09, + "learning_rate": 1.984010784882354e-05, + "loss": 0.4873, + "step": 2025 + }, + { + "epoch": 0.09, + "learning_rate": 1.9839324063768754e-05, + "loss": 0.5061, + "step": 2030 + }, + { + "epoch": 0.09, + "learning_rate": 1.9838540278713968e-05, + "loss": 0.5597, + "step": 2035 + }, + { + "epoch": 0.1, + "learning_rate": 1.9837756493659182e-05, + "loss": 0.5494, + "step": 2040 + }, + { + "epoch": 0.1, + "learning_rate": 1.9836972708604392e-05, + "loss": 0.7516, + "step": 2045 + }, + { + "epoch": 0.1, + "learning_rate": 1.9836188923549606e-05, + "loss": 1.0119, + "step": 2050 + }, + { + "epoch": 0.1, + "learning_rate": 1.983540513849482e-05, + "loss": 0.4939, + "step": 2055 + }, + { + "epoch": 0.1, + "learning_rate": 1.9834621353440034e-05, + "loss": 0.3384, + "step": 2060 + }, + { + "epoch": 0.1, + "learning_rate": 1.9833837568385248e-05, + "loss": 0.292, + "step": 2065 + }, + { + "epoch": 0.1, + "learning_rate": 1.9833053783330462e-05, + "loss": 0.3244, + "step": 2070 + }, + { + "epoch": 0.1, + "learning_rate": 1.9832269998275676e-05, + "loss": 0.3806, + "step": 2075 + }, + { + "epoch": 0.1, + "learning_rate": 1.9831486213220886e-05, + "loss": 0.4527, + "step": 2080 + }, + { + "epoch": 0.1, + "learning_rate": 1.9830702428166104e-05, + "loss": 0.5149, + "step": 2085 + }, + { + "epoch": 0.1, + "learning_rate": 1.9829918643111314e-05, + "loss": 0.7397, + "step": 2090 + }, + { + "epoch": 0.1, + "learning_rate": 1.9829134858056528e-05, + "loss": 0.7535, + "step": 2095 + }, + { + "epoch": 0.1, + "learning_rate": 1.9828351073001742e-05, + "loss": 0.9471, + "step": 2100 + }, + { + "epoch": 0.1, + "learning_rate": 1.9827567287946956e-05, + "loss": 0.4706, + "step": 2105 + }, + { + "epoch": 0.1, + "learning_rate": 1.982678350289217e-05, + "loss": 0.3369, + "step": 2110 + }, + { + "epoch": 0.1, + "learning_rate": 1.982599971783738e-05, + "loss": 0.3418, + "step": 2115 + }, + { + "epoch": 0.1, + "learning_rate": 1.9825215932782594e-05, + "loss": 0.3639, + "step": 2120 + }, + { + "epoch": 0.1, + "learning_rate": 1.9824432147727808e-05, + "loss": 0.4024, + "step": 2125 + }, + { + "epoch": 0.1, + "learning_rate": 1.9823648362673022e-05, + "loss": 0.4048, + "step": 2130 + }, + { + "epoch": 0.1, + "learning_rate": 1.9822864577618236e-05, + "loss": 0.6293, + "step": 2135 + }, + { + "epoch": 0.1, + "learning_rate": 1.982208079256345e-05, + "loss": 0.6106, + "step": 2140 + }, + { + "epoch": 0.1, + "learning_rate": 1.982129700750866e-05, + "loss": 0.6771, + "step": 2145 + }, + { + "epoch": 0.1, + "learning_rate": 1.9820513222453878e-05, + "loss": 1.2044, + "step": 2150 + }, + { + "epoch": 0.1, + "learning_rate": 1.9819729437399088e-05, + "loss": 0.5152, + "step": 2155 + }, + { + "epoch": 0.1, + "learning_rate": 1.9818945652344302e-05, + "loss": 0.2559, + "step": 2160 + }, + { + "epoch": 0.1, + "learning_rate": 1.9818161867289516e-05, + "loss": 0.2496, + "step": 2165 + }, + { + "epoch": 0.1, + "learning_rate": 1.981737808223473e-05, + "loss": 0.2577, + "step": 2170 + }, + { + "epoch": 0.1, + "learning_rate": 1.9816594297179944e-05, + "loss": 0.5568, + "step": 2175 + }, + { + "epoch": 0.1, + "learning_rate": 1.9815810512125154e-05, + "loss": 0.4338, + "step": 2180 + }, + { + "epoch": 0.1, + "learning_rate": 1.981502672707037e-05, + "loss": 0.6521, + "step": 2185 + }, + { + "epoch": 0.1, + "learning_rate": 1.9814242942015582e-05, + "loss": 0.5643, + "step": 2190 + }, + { + "epoch": 0.1, + "learning_rate": 1.9813459156960796e-05, + "loss": 0.4993, + "step": 2195 + }, + { + "epoch": 0.1, + "learning_rate": 1.981267537190601e-05, + "loss": 1.2342, + "step": 2200 + }, + { + "epoch": 0.1, + "learning_rate": 1.9811891586851224e-05, + "loss": 0.4659, + "step": 2205 + }, + { + "epoch": 0.1, + "learning_rate": 1.9811107801796438e-05, + "loss": 0.2984, + "step": 2210 + }, + { + "epoch": 0.1, + "learning_rate": 1.981032401674165e-05, + "loss": 0.3362, + "step": 2215 + }, + { + "epoch": 0.1, + "learning_rate": 1.9809540231686862e-05, + "loss": 0.2982, + "step": 2220 + }, + { + "epoch": 0.1, + "learning_rate": 1.980875644663208e-05, + "loss": 0.3995, + "step": 2225 + }, + { + "epoch": 0.1, + "learning_rate": 1.980797266157729e-05, + "loss": 0.4959, + "step": 2230 + }, + { + "epoch": 0.1, + "learning_rate": 1.9807188876522504e-05, + "loss": 0.5604, + "step": 2235 + }, + { + "epoch": 0.1, + "learning_rate": 1.9806405091467718e-05, + "loss": 0.6278, + "step": 2240 + }, + { + "epoch": 0.1, + "learning_rate": 1.9805621306412928e-05, + "loss": 0.751, + "step": 2245 + }, + { + "epoch": 0.1, + "learning_rate": 1.9804837521358146e-05, + "loss": 1.2158, + "step": 2250 + }, + { + "epoch": 0.11, + "learning_rate": 1.9804053736303356e-05, + "loss": 0.441, + "step": 2255 + }, + { + "epoch": 0.11, + "learning_rate": 1.980326995124857e-05, + "loss": 0.2743, + "step": 2260 + }, + { + "epoch": 0.11, + "learning_rate": 1.9802486166193784e-05, + "loss": 0.4457, + "step": 2265 + }, + { + "epoch": 0.11, + "learning_rate": 1.9801702381138998e-05, + "loss": 0.4269, + "step": 2270 + }, + { + "epoch": 0.11, + "learning_rate": 1.9800918596084212e-05, + "loss": 0.3709, + "step": 2275 + }, + { + "epoch": 0.11, + "learning_rate": 1.9800134811029426e-05, + "loss": 0.4595, + "step": 2280 + }, + { + "epoch": 0.11, + "learning_rate": 1.979935102597464e-05, + "loss": 0.6209, + "step": 2285 + }, + { + "epoch": 0.11, + "learning_rate": 1.9798567240919853e-05, + "loss": 0.5724, + "step": 2290 + }, + { + "epoch": 0.11, + "learning_rate": 1.9797783455865064e-05, + "loss": 0.7098, + "step": 2295 + }, + { + "epoch": 0.11, + "learning_rate": 1.979699967081028e-05, + "loss": 1.0224, + "step": 2300 + }, + { + "epoch": 0.11, + "learning_rate": 1.9796215885755492e-05, + "loss": 0.4748, + "step": 2305 + }, + { + "epoch": 0.11, + "learning_rate": 1.9795432100700706e-05, + "loss": 0.188, + "step": 2310 + }, + { + "epoch": 0.11, + "learning_rate": 1.979464831564592e-05, + "loss": 0.2832, + "step": 2315 + }, + { + "epoch": 0.11, + "learning_rate": 1.979386453059113e-05, + "loss": 0.3772, + "step": 2320 + }, + { + "epoch": 0.11, + "learning_rate": 1.9793080745536347e-05, + "loss": 0.3791, + "step": 2325 + }, + { + "epoch": 0.11, + "learning_rate": 1.9792296960481558e-05, + "loss": 0.4206, + "step": 2330 + }, + { + "epoch": 0.11, + "learning_rate": 1.9791513175426772e-05, + "loss": 0.591, + "step": 2335 + }, + { + "epoch": 0.11, + "learning_rate": 1.9790729390371986e-05, + "loss": 0.5566, + "step": 2340 + }, + { + "epoch": 0.11, + "learning_rate": 1.97899456053172e-05, + "loss": 0.7117, + "step": 2345 + }, + { + "epoch": 0.11, + "learning_rate": 1.9789161820262414e-05, + "loss": 0.9647, + "step": 2350 + }, + { + "epoch": 0.11, + "learning_rate": 1.9788378035207627e-05, + "loss": 0.4322, + "step": 2355 + }, + { + "epoch": 0.11, + "learning_rate": 1.9787594250152838e-05, + "loss": 0.3008, + "step": 2360 + }, + { + "epoch": 0.11, + "learning_rate": 1.9786810465098055e-05, + "loss": 0.3233, + "step": 2365 + }, + { + "epoch": 0.11, + "learning_rate": 1.9786026680043266e-05, + "loss": 0.4009, + "step": 2370 + }, + { + "epoch": 0.11, + "learning_rate": 1.978524289498848e-05, + "loss": 0.378, + "step": 2375 + }, + { + "epoch": 0.11, + "learning_rate": 1.9784459109933694e-05, + "loss": 0.4942, + "step": 2380 + }, + { + "epoch": 0.11, + "learning_rate": 1.9783675324878907e-05, + "loss": 0.3688, + "step": 2385 + }, + { + "epoch": 0.11, + "learning_rate": 1.978289153982412e-05, + "loss": 0.6061, + "step": 2390 + }, + { + "epoch": 0.11, + "learning_rate": 1.9782107754769332e-05, + "loss": 0.5694, + "step": 2395 + }, + { + "epoch": 0.11, + "learning_rate": 1.978132396971455e-05, + "loss": 0.9538, + "step": 2400 + }, + { + "epoch": 0.11, + "learning_rate": 1.978054018465976e-05, + "loss": 0.4312, + "step": 2405 + }, + { + "epoch": 0.11, + "learning_rate": 1.9779756399604974e-05, + "loss": 0.2543, + "step": 2410 + }, + { + "epoch": 0.11, + "learning_rate": 1.9778972614550188e-05, + "loss": 0.231, + "step": 2415 + }, + { + "epoch": 0.11, + "learning_rate": 1.97781888294954e-05, + "loss": 0.2848, + "step": 2420 + }, + { + "epoch": 0.11, + "learning_rate": 1.9777405044440615e-05, + "loss": 0.3575, + "step": 2425 + }, + { + "epoch": 0.11, + "learning_rate": 1.977662125938583e-05, + "loss": 0.4481, + "step": 2430 + }, + { + "epoch": 0.11, + "learning_rate": 1.977583747433104e-05, + "loss": 0.4305, + "step": 2435 + }, + { + "epoch": 0.11, + "learning_rate": 1.9775053689276254e-05, + "loss": 0.7923, + "step": 2440 + }, + { + "epoch": 0.11, + "learning_rate": 1.9774269904221468e-05, + "loss": 0.5847, + "step": 2445 + }, + { + "epoch": 0.11, + "learning_rate": 1.977348611916668e-05, + "loss": 1.0685, + "step": 2450 + }, + { + "epoch": 0.11, + "learning_rate": 1.9772702334111895e-05, + "loss": 0.5119, + "step": 2455 + }, + { + "epoch": 0.11, + "learning_rate": 1.9771918549057106e-05, + "loss": 0.2529, + "step": 2460 + }, + { + "epoch": 0.12, + "learning_rate": 1.9771134764002323e-05, + "loss": 0.2534, + "step": 2465 + }, + { + "epoch": 0.12, + "learning_rate": 1.9770350978947534e-05, + "loss": 0.4104, + "step": 2470 + }, + { + "epoch": 0.12, + "learning_rate": 1.9769567193892748e-05, + "loss": 0.4233, + "step": 2475 + }, + { + "epoch": 0.12, + "learning_rate": 1.976878340883796e-05, + "loss": 0.3437, + "step": 2480 + }, + { + "epoch": 0.12, + "learning_rate": 1.9767999623783175e-05, + "loss": 0.5363, + "step": 2485 + }, + { + "epoch": 0.12, + "learning_rate": 1.976721583872839e-05, + "loss": 0.6265, + "step": 2490 + }, + { + "epoch": 0.12, + "learning_rate": 1.9766432053673603e-05, + "loss": 0.6902, + "step": 2495 + }, + { + "epoch": 0.12, + "learning_rate": 1.9765648268618817e-05, + "loss": 1.1218, + "step": 2500 + }, + { + "epoch": 0.12, + "learning_rate": 1.9764864483564028e-05, + "loss": 0.3805, + "step": 2505 + }, + { + "epoch": 0.12, + "learning_rate": 1.976408069850924e-05, + "loss": 0.2308, + "step": 2510 + }, + { + "epoch": 0.12, + "learning_rate": 1.9763296913454455e-05, + "loss": 0.2854, + "step": 2515 + }, + { + "epoch": 0.12, + "learning_rate": 1.976251312839967e-05, + "loss": 0.3471, + "step": 2520 + }, + { + "epoch": 0.12, + "learning_rate": 1.9761729343344883e-05, + "loss": 0.418, + "step": 2525 + }, + { + "epoch": 0.12, + "learning_rate": 1.9760945558290097e-05, + "loss": 0.3995, + "step": 2530 + }, + { + "epoch": 0.12, + "learning_rate": 1.9760161773235308e-05, + "loss": 0.5414, + "step": 2535 + }, + { + "epoch": 0.12, + "learning_rate": 1.9759377988180525e-05, + "loss": 0.4674, + "step": 2540 + }, + { + "epoch": 0.12, + "learning_rate": 1.9758594203125736e-05, + "loss": 0.548, + "step": 2545 + }, + { + "epoch": 0.12, + "learning_rate": 1.975781041807095e-05, + "loss": 1.1554, + "step": 2550 + }, + { + "epoch": 0.12, + "learning_rate": 1.9757026633016163e-05, + "loss": 0.4927, + "step": 2555 + }, + { + "epoch": 0.12, + "learning_rate": 1.9756242847961377e-05, + "loss": 0.2384, + "step": 2560 + }, + { + "epoch": 0.12, + "learning_rate": 1.975545906290659e-05, + "loss": 0.2467, + "step": 2565 + }, + { + "epoch": 0.12, + "learning_rate": 1.97546752778518e-05, + "loss": 0.2739, + "step": 2570 + }, + { + "epoch": 0.12, + "learning_rate": 1.9753891492797016e-05, + "loss": 0.4415, + "step": 2575 + }, + { + "epoch": 0.12, + "learning_rate": 1.975310770774223e-05, + "loss": 0.3919, + "step": 2580 + }, + { + "epoch": 0.12, + "learning_rate": 1.9752323922687443e-05, + "loss": 0.4875, + "step": 2585 + }, + { + "epoch": 0.12, + "learning_rate": 1.9751540137632657e-05, + "loss": 0.5478, + "step": 2590 + }, + { + "epoch": 0.12, + "learning_rate": 1.975075635257787e-05, + "loss": 0.6074, + "step": 2595 + }, + { + "epoch": 0.12, + "learning_rate": 1.9749972567523085e-05, + "loss": 0.7679, + "step": 2600 + }, + { + "epoch": 0.12, + "learning_rate": 1.97491887824683e-05, + "loss": 0.5413, + "step": 2605 + }, + { + "epoch": 0.12, + "learning_rate": 1.974840499741351e-05, + "loss": 0.2023, + "step": 2610 + }, + { + "epoch": 0.12, + "learning_rate": 1.9747621212358727e-05, + "loss": 0.2183, + "step": 2615 + }, + { + "epoch": 0.12, + "learning_rate": 1.9746837427303937e-05, + "loss": 0.327, + "step": 2620 + }, + { + "epoch": 0.12, + "learning_rate": 1.974605364224915e-05, + "loss": 0.3329, + "step": 2625 + }, + { + "epoch": 0.12, + "learning_rate": 1.9745269857194365e-05, + "loss": 0.3636, + "step": 2630 + }, + { + "epoch": 0.12, + "learning_rate": 1.9744486072139576e-05, + "loss": 0.4454, + "step": 2635 + }, + { + "epoch": 0.12, + "learning_rate": 1.9743702287084793e-05, + "loss": 0.5881, + "step": 2640 + }, + { + "epoch": 0.12, + "learning_rate": 1.9742918502030003e-05, + "loss": 0.6546, + "step": 2645 + }, + { + "epoch": 0.12, + "learning_rate": 1.9742134716975217e-05, + "loss": 0.9867, + "step": 2650 + }, + { + "epoch": 0.12, + "learning_rate": 1.974135093192043e-05, + "loss": 0.4603, + "step": 2655 + }, + { + "epoch": 0.12, + "learning_rate": 1.9740567146865645e-05, + "loss": 0.2764, + "step": 2660 + }, + { + "epoch": 0.12, + "learning_rate": 1.973978336181086e-05, + "loss": 0.2423, + "step": 2665 + }, + { + "epoch": 0.12, + "learning_rate": 1.9738999576756073e-05, + "loss": 0.2468, + "step": 2670 + }, + { + "epoch": 0.12, + "learning_rate": 1.9738215791701284e-05, + "loss": 0.365, + "step": 2675 + }, + { + "epoch": 0.13, + "learning_rate": 1.97374320066465e-05, + "loss": 0.4589, + "step": 2680 + }, + { + "epoch": 0.13, + "learning_rate": 1.973664822159171e-05, + "loss": 0.4907, + "step": 2685 + }, + { + "epoch": 0.13, + "learning_rate": 1.9735864436536925e-05, + "loss": 0.5466, + "step": 2690 + }, + { + "epoch": 0.13, + "learning_rate": 1.973508065148214e-05, + "loss": 0.4969, + "step": 2695 + }, + { + "epoch": 0.13, + "learning_rate": 1.9734296866427353e-05, + "loss": 1.0733, + "step": 2700 + }, + { + "epoch": 0.13, + "learning_rate": 1.9733513081372567e-05, + "loss": 0.4684, + "step": 2705 + }, + { + "epoch": 0.13, + "learning_rate": 1.9732729296317777e-05, + "loss": 0.2367, + "step": 2710 + }, + { + "epoch": 0.13, + "learning_rate": 1.9731945511262995e-05, + "loss": 0.2683, + "step": 2715 + }, + { + "epoch": 0.13, + "learning_rate": 1.9731161726208205e-05, + "loss": 0.2503, + "step": 2720 + }, + { + "epoch": 0.13, + "learning_rate": 1.973037794115342e-05, + "loss": 0.3385, + "step": 2725 + }, + { + "epoch": 0.13, + "learning_rate": 1.9729594156098633e-05, + "loss": 0.3533, + "step": 2730 + }, + { + "epoch": 0.13, + "learning_rate": 1.9728810371043847e-05, + "loss": 0.434, + "step": 2735 + }, + { + "epoch": 0.13, + "learning_rate": 1.972802658598906e-05, + "loss": 0.4045, + "step": 2740 + }, + { + "epoch": 0.13, + "learning_rate": 1.9727242800934275e-05, + "loss": 0.5531, + "step": 2745 + }, + { + "epoch": 0.13, + "learning_rate": 1.9726459015879485e-05, + "loss": 0.8087, + "step": 2750 + }, + { + "epoch": 0.13, + "learning_rate": 1.9725675230824703e-05, + "loss": 0.4245, + "step": 2755 + }, + { + "epoch": 0.13, + "learning_rate": 1.9724891445769913e-05, + "loss": 0.2777, + "step": 2760 + }, + { + "epoch": 0.13, + "learning_rate": 1.9724107660715127e-05, + "loss": 0.3036, + "step": 2765 + }, + { + "epoch": 0.13, + "learning_rate": 1.972332387566034e-05, + "loss": 0.4066, + "step": 2770 + }, + { + "epoch": 0.13, + "learning_rate": 1.972254009060555e-05, + "loss": 0.3239, + "step": 2775 + }, + { + "epoch": 0.13, + "learning_rate": 1.972175630555077e-05, + "loss": 0.3395, + "step": 2780 + }, + { + "epoch": 0.13, + "learning_rate": 1.972097252049598e-05, + "loss": 0.4637, + "step": 2785 + }, + { + "epoch": 0.13, + "learning_rate": 1.9720188735441193e-05, + "loss": 0.486, + "step": 2790 + }, + { + "epoch": 0.13, + "learning_rate": 1.9719404950386407e-05, + "loss": 0.6314, + "step": 2795 + }, + { + "epoch": 0.13, + "learning_rate": 1.971862116533162e-05, + "loss": 0.8526, + "step": 2800 + }, + { + "epoch": 0.13, + "learning_rate": 1.9717837380276835e-05, + "loss": 0.4288, + "step": 2805 + }, + { + "epoch": 0.13, + "learning_rate": 1.971705359522205e-05, + "loss": 0.227, + "step": 2810 + }, + { + "epoch": 0.13, + "learning_rate": 1.9716269810167263e-05, + "loss": 0.2939, + "step": 2815 + }, + { + "epoch": 0.13, + "learning_rate": 1.9715486025112477e-05, + "loss": 0.2735, + "step": 2820 + }, + { + "epoch": 0.13, + "learning_rate": 1.9714702240057687e-05, + "loss": 0.3667, + "step": 2825 + }, + { + "epoch": 0.13, + "learning_rate": 1.97139184550029e-05, + "loss": 0.385, + "step": 2830 + }, + { + "epoch": 0.13, + "learning_rate": 1.9713134669948115e-05, + "loss": 0.3763, + "step": 2835 + }, + { + "epoch": 0.13, + "learning_rate": 1.971235088489333e-05, + "loss": 0.4141, + "step": 2840 + }, + { + "epoch": 0.13, + "learning_rate": 1.9711567099838543e-05, + "loss": 0.6103, + "step": 2845 + }, + { + "epoch": 0.13, + "learning_rate": 1.9710783314783753e-05, + "loss": 0.7695, + "step": 2850 + }, + { + "epoch": 0.13, + "learning_rate": 1.970999952972897e-05, + "loss": 0.4827, + "step": 2855 + }, + { + "epoch": 0.13, + "learning_rate": 1.970921574467418e-05, + "loss": 0.2578, + "step": 2860 + }, + { + "epoch": 0.13, + "learning_rate": 1.9708431959619395e-05, + "loss": 0.2754, + "step": 2865 + }, + { + "epoch": 0.13, + "learning_rate": 1.970764817456461e-05, + "loss": 0.2874, + "step": 2870 + }, + { + "epoch": 0.13, + "learning_rate": 1.9706864389509823e-05, + "loss": 0.2498, + "step": 2875 + }, + { + "epoch": 0.13, + "learning_rate": 1.9706080604455037e-05, + "loss": 0.5192, + "step": 2880 + }, + { + "epoch": 0.13, + "learning_rate": 1.970529681940025e-05, + "loss": 0.3749, + "step": 2885 + }, + { + "epoch": 0.13, + "learning_rate": 1.970451303434546e-05, + "loss": 0.5461, + "step": 2890 + }, + { + "epoch": 0.14, + "learning_rate": 1.9703729249290675e-05, + "loss": 0.6363, + "step": 2895 + }, + { + "epoch": 0.14, + "learning_rate": 1.970294546423589e-05, + "loss": 1.0204, + "step": 2900 + }, + { + "epoch": 0.14, + "learning_rate": 1.9702161679181103e-05, + "loss": 0.4849, + "step": 2905 + }, + { + "epoch": 0.14, + "learning_rate": 1.9701377894126317e-05, + "loss": 0.1746, + "step": 2910 + }, + { + "epoch": 0.14, + "learning_rate": 1.970059410907153e-05, + "loss": 0.259, + "step": 2915 + }, + { + "epoch": 0.14, + "learning_rate": 1.9699810324016745e-05, + "loss": 0.3586, + "step": 2920 + }, + { + "epoch": 0.14, + "learning_rate": 1.9699026538961955e-05, + "loss": 0.3875, + "step": 2925 + }, + { + "epoch": 0.14, + "learning_rate": 1.9698242753907172e-05, + "loss": 0.3089, + "step": 2930 + }, + { + "epoch": 0.14, + "learning_rate": 1.9697458968852383e-05, + "loss": 0.4891, + "step": 2935 + }, + { + "epoch": 0.14, + "learning_rate": 1.9696675183797597e-05, + "loss": 0.4279, + "step": 2940 + }, + { + "epoch": 0.14, + "learning_rate": 1.969589139874281e-05, + "loss": 0.6177, + "step": 2945 + }, + { + "epoch": 0.14, + "learning_rate": 1.9695107613688025e-05, + "loss": 0.8893, + "step": 2950 + }, + { + "epoch": 0.14, + "learning_rate": 1.969432382863324e-05, + "loss": 0.3806, + "step": 2955 + }, + { + "epoch": 0.14, + "learning_rate": 1.969354004357845e-05, + "loss": 0.2398, + "step": 2960 + }, + { + "epoch": 0.14, + "learning_rate": 1.9692756258523663e-05, + "loss": 0.2188, + "step": 2965 + }, + { + "epoch": 0.14, + "learning_rate": 1.9691972473468877e-05, + "loss": 0.3182, + "step": 2970 + }, + { + "epoch": 0.14, + "learning_rate": 1.969118868841409e-05, + "loss": 0.3459, + "step": 2975 + }, + { + "epoch": 0.14, + "learning_rate": 1.9690404903359305e-05, + "loss": 0.2882, + "step": 2980 + }, + { + "epoch": 0.14, + "learning_rate": 1.968962111830452e-05, + "loss": 0.3212, + "step": 2985 + }, + { + "epoch": 0.14, + "learning_rate": 1.968899409026069e-05, + "loss": 0.4588, + "step": 2990 + }, + { + "epoch": 0.14, + "learning_rate": 1.9688210305205903e-05, + "loss": 0.6138, + "step": 2995 + }, + { + "epoch": 0.14, + "learning_rate": 1.9687426520151117e-05, + "loss": 0.837, + "step": 3000 + }, + { + "epoch": 0.14, + "learning_rate": 1.9686642735096327e-05, + "loss": 0.3779, + "step": 3005 + }, + { + "epoch": 0.14, + "learning_rate": 1.9685858950041545e-05, + "loss": 0.2237, + "step": 3010 + }, + { + "epoch": 0.14, + "learning_rate": 1.9685075164986755e-05, + "loss": 0.3279, + "step": 3015 + }, + { + "epoch": 0.14, + "learning_rate": 1.968429137993197e-05, + "loss": 0.2848, + "step": 3020 + }, + { + "epoch": 0.14, + "learning_rate": 1.9683507594877183e-05, + "loss": 0.3314, + "step": 3025 + }, + { + "epoch": 0.14, + "learning_rate": 1.9682723809822394e-05, + "loss": 0.3744, + "step": 3030 + }, + { + "epoch": 0.14, + "learning_rate": 1.968194002476761e-05, + "loss": 0.4589, + "step": 3035 + }, + { + "epoch": 0.14, + "learning_rate": 1.968115623971282e-05, + "loss": 0.6012, + "step": 3040 + }, + { + "epoch": 0.14, + "learning_rate": 1.9680372454658035e-05, + "loss": 0.625, + "step": 3045 + }, + { + "epoch": 0.14, + "learning_rate": 1.967958866960325e-05, + "loss": 0.9265, + "step": 3050 + }, + { + "epoch": 0.14, + "learning_rate": 1.9678804884548463e-05, + "loss": 0.4029, + "step": 3055 + }, + { + "epoch": 0.14, + "learning_rate": 1.9678021099493677e-05, + "loss": 0.2582, + "step": 3060 + }, + { + "epoch": 0.14, + "learning_rate": 1.967723731443889e-05, + "loss": 0.3223, + "step": 3065 + }, + { + "epoch": 0.14, + "learning_rate": 1.96764535293841e-05, + "loss": 0.2428, + "step": 3070 + }, + { + "epoch": 0.14, + "learning_rate": 1.967566974432932e-05, + "loss": 0.3014, + "step": 3075 + }, + { + "epoch": 0.14, + "learning_rate": 1.967488595927453e-05, + "loss": 0.452, + "step": 3080 + }, + { + "epoch": 0.14, + "learning_rate": 1.9674102174219743e-05, + "loss": 0.3358, + "step": 3085 + }, + { + "epoch": 0.14, + "learning_rate": 1.9673318389164957e-05, + "loss": 0.4613, + "step": 3090 + }, + { + "epoch": 0.14, + "learning_rate": 1.967253460411017e-05, + "loss": 0.5618, + "step": 3095 + }, + { + "epoch": 0.14, + "learning_rate": 1.9671750819055385e-05, + "loss": 0.7322, + "step": 3100 + }, + { + "epoch": 0.14, + "learning_rate": 1.9670967034000595e-05, + "loss": 0.3473, + "step": 3105 + }, + { + "epoch": 0.15, + "learning_rate": 1.9670183248945813e-05, + "loss": 0.2555, + "step": 3110 + }, + { + "epoch": 0.15, + "learning_rate": 1.9669399463891023e-05, + "loss": 0.2629, + "step": 3115 + }, + { + "epoch": 0.15, + "learning_rate": 1.9668615678836237e-05, + "loss": 0.3049, + "step": 3120 + }, + { + "epoch": 0.15, + "learning_rate": 1.966783189378145e-05, + "loss": 0.3242, + "step": 3125 + }, + { + "epoch": 0.15, + "learning_rate": 1.9667048108726665e-05, + "loss": 0.3861, + "step": 3130 + }, + { + "epoch": 0.15, + "learning_rate": 1.966626432367188e-05, + "loss": 0.4608, + "step": 3135 + }, + { + "epoch": 0.15, + "learning_rate": 1.9665480538617093e-05, + "loss": 0.4212, + "step": 3140 + }, + { + "epoch": 0.15, + "learning_rate": 1.9664696753562303e-05, + "loss": 0.6555, + "step": 3145 + }, + { + "epoch": 0.15, + "learning_rate": 1.966391296850752e-05, + "loss": 0.7641, + "step": 3150 + }, + { + "epoch": 0.15, + "learning_rate": 1.966312918345273e-05, + "loss": 0.4017, + "step": 3155 + }, + { + "epoch": 0.15, + "learning_rate": 1.9662345398397945e-05, + "loss": 0.1657, + "step": 3160 + }, + { + "epoch": 0.15, + "learning_rate": 1.966156161334316e-05, + "loss": 0.2806, + "step": 3165 + }, + { + "epoch": 0.15, + "learning_rate": 1.966077782828837e-05, + "loss": 0.2702, + "step": 3170 + }, + { + "epoch": 0.15, + "learning_rate": 1.9659994043233587e-05, + "loss": 0.3138, + "step": 3175 + }, + { + "epoch": 0.15, + "learning_rate": 1.9659210258178797e-05, + "loss": 0.4255, + "step": 3180 + }, + { + "epoch": 0.15, + "learning_rate": 1.965842647312401e-05, + "loss": 0.3488, + "step": 3185 + }, + { + "epoch": 0.15, + "learning_rate": 1.9657642688069225e-05, + "loss": 0.5055, + "step": 3190 + }, + { + "epoch": 0.15, + "learning_rate": 1.965685890301444e-05, + "loss": 0.5626, + "step": 3195 + }, + { + "epoch": 0.15, + "learning_rate": 1.9656075117959653e-05, + "loss": 0.8002, + "step": 3200 + }, + { + "epoch": 0.15, + "learning_rate": 1.9655291332904867e-05, + "loss": 0.4219, + "step": 3205 + }, + { + "epoch": 0.15, + "learning_rate": 1.965450754785008e-05, + "loss": 0.2463, + "step": 3210 + }, + { + "epoch": 0.15, + "learning_rate": 1.9653723762795294e-05, + "loss": 0.2057, + "step": 3215 + }, + { + "epoch": 0.15, + "learning_rate": 1.9652939977740505e-05, + "loss": 0.326, + "step": 3220 + }, + { + "epoch": 0.15, + "learning_rate": 1.965215619268572e-05, + "loss": 0.3563, + "step": 3225 + }, + { + "epoch": 0.15, + "learning_rate": 1.9651372407630933e-05, + "loss": 0.4195, + "step": 3230 + }, + { + "epoch": 0.15, + "learning_rate": 1.9650588622576147e-05, + "loss": 0.3367, + "step": 3235 + }, + { + "epoch": 0.15, + "learning_rate": 1.964980483752136e-05, + "loss": 0.545, + "step": 3240 + }, + { + "epoch": 0.15, + "learning_rate": 1.964902105246657e-05, + "loss": 0.6454, + "step": 3245 + }, + { + "epoch": 0.15, + "learning_rate": 1.964823726741179e-05, + "loss": 0.7225, + "step": 3250 + }, + { + "epoch": 0.15, + "learning_rate": 1.9647453482357e-05, + "loss": 0.4606, + "step": 3255 + }, + { + "epoch": 0.15, + "learning_rate": 1.9646669697302213e-05, + "loss": 0.2679, + "step": 3260 + }, + { + "epoch": 0.15, + "learning_rate": 1.9645885912247427e-05, + "loss": 0.2538, + "step": 3265 + }, + { + "epoch": 0.15, + "learning_rate": 1.964510212719264e-05, + "loss": 0.2758, + "step": 3270 + }, + { + "epoch": 0.15, + "learning_rate": 1.9644318342137855e-05, + "loss": 0.3146, + "step": 3275 + }, + { + "epoch": 0.15, + "learning_rate": 1.964353455708307e-05, + "loss": 0.2646, + "step": 3280 + }, + { + "epoch": 0.15, + "learning_rate": 1.964275077202828e-05, + "loss": 0.3698, + "step": 3285 + }, + { + "epoch": 0.15, + "learning_rate": 1.9641966986973493e-05, + "loss": 0.505, + "step": 3290 + }, + { + "epoch": 0.15, + "learning_rate": 1.9641183201918707e-05, + "loss": 0.5847, + "step": 3295 + }, + { + "epoch": 0.15, + "learning_rate": 1.964039941686392e-05, + "loss": 0.7694, + "step": 3300 + }, + { + "epoch": 0.15, + "learning_rate": 1.9639615631809135e-05, + "loss": 0.4151, + "step": 3305 + }, + { + "epoch": 0.15, + "learning_rate": 1.963883184675435e-05, + "loss": 0.2161, + "step": 3310 + }, + { + "epoch": 0.15, + "learning_rate": 1.9638048061699562e-05, + "loss": 0.3054, + "step": 3315 + }, + { + "epoch": 0.15, + "learning_rate": 1.9637264276644773e-05, + "loss": 0.2491, + "step": 3320 + }, + { + "epoch": 0.16, + "learning_rate": 1.963648049158999e-05, + "loss": 0.2946, + "step": 3325 + }, + { + "epoch": 0.16, + "learning_rate": 1.96356967065352e-05, + "loss": 0.3118, + "step": 3330 + }, + { + "epoch": 0.16, + "learning_rate": 1.9634912921480415e-05, + "loss": 0.463, + "step": 3335 + }, + { + "epoch": 0.16, + "learning_rate": 1.963412913642563e-05, + "loss": 0.3721, + "step": 3340 + }, + { + "epoch": 0.16, + "learning_rate": 1.9633345351370842e-05, + "loss": 0.6009, + "step": 3345 + }, + { + "epoch": 0.16, + "learning_rate": 1.9632561566316056e-05, + "loss": 0.8445, + "step": 3350 + }, + { + "epoch": 0.16, + "learning_rate": 1.9631777781261267e-05, + "loss": 0.4554, + "step": 3355 + }, + { + "epoch": 0.16, + "learning_rate": 1.963099399620648e-05, + "loss": 0.2121, + "step": 3360 + }, + { + "epoch": 0.16, + "learning_rate": 1.9630210211151695e-05, + "loss": 0.2767, + "step": 3365 + }, + { + "epoch": 0.16, + "learning_rate": 1.962942642609691e-05, + "loss": 0.2726, + "step": 3370 + }, + { + "epoch": 0.16, + "learning_rate": 1.9628642641042123e-05, + "loss": 0.4073, + "step": 3375 + }, + { + "epoch": 0.16, + "learning_rate": 1.9627858855987336e-05, + "loss": 0.4101, + "step": 3380 + }, + { + "epoch": 0.16, + "learning_rate": 1.9627075070932547e-05, + "loss": 0.4055, + "step": 3385 + }, + { + "epoch": 0.16, + "learning_rate": 1.9626291285877764e-05, + "loss": 0.4796, + "step": 3390 + }, + { + "epoch": 0.16, + "learning_rate": 1.9625507500822975e-05, + "loss": 0.6178, + "step": 3395 + }, + { + "epoch": 0.16, + "learning_rate": 1.962472371576819e-05, + "loss": 0.8758, + "step": 3400 + }, + { + "epoch": 0.16, + "learning_rate": 1.9623939930713403e-05, + "loss": 0.4028, + "step": 3405 + }, + { + "epoch": 0.16, + "learning_rate": 1.9623156145658616e-05, + "loss": 0.2257, + "step": 3410 + }, + { + "epoch": 0.16, + "learning_rate": 1.962237236060383e-05, + "loss": 0.2765, + "step": 3415 + }, + { + "epoch": 0.16, + "learning_rate": 1.962158857554904e-05, + "loss": 0.2894, + "step": 3420 + }, + { + "epoch": 0.16, + "learning_rate": 1.9620804790494258e-05, + "loss": 0.2995, + "step": 3425 + }, + { + "epoch": 0.16, + "learning_rate": 1.962002100543947e-05, + "loss": 0.3938, + "step": 3430 + }, + { + "epoch": 0.16, + "learning_rate": 1.9619237220384683e-05, + "loss": 0.4108, + "step": 3435 + }, + { + "epoch": 0.16, + "learning_rate": 1.9618453435329897e-05, + "loss": 0.4647, + "step": 3440 + }, + { + "epoch": 0.16, + "learning_rate": 1.961766965027511e-05, + "loss": 0.5126, + "step": 3445 + }, + { + "epoch": 0.16, + "learning_rate": 1.9616885865220324e-05, + "loss": 0.8012, + "step": 3450 + }, + { + "epoch": 0.16, + "learning_rate": 1.9616102080165538e-05, + "loss": 0.4131, + "step": 3455 + }, + { + "epoch": 0.16, + "learning_rate": 1.961531829511075e-05, + "loss": 0.2214, + "step": 3460 + }, + { + "epoch": 0.16, + "learning_rate": 1.9614534510055966e-05, + "loss": 0.2513, + "step": 3465 + }, + { + "epoch": 0.16, + "learning_rate": 1.9613750725001177e-05, + "loss": 0.3865, + "step": 3470 + }, + { + "epoch": 0.16, + "learning_rate": 1.961296693994639e-05, + "loss": 0.3377, + "step": 3475 + }, + { + "epoch": 0.16, + "learning_rate": 1.9612183154891604e-05, + "loss": 0.3707, + "step": 3480 + }, + { + "epoch": 0.16, + "learning_rate": 1.9611399369836818e-05, + "loss": 0.4052, + "step": 3485 + }, + { + "epoch": 0.16, + "learning_rate": 1.9610615584782032e-05, + "loss": 0.4237, + "step": 3490 + }, + { + "epoch": 0.16, + "learning_rate": 1.9609831799727243e-05, + "loss": 0.466, + "step": 3495 + }, + { + "epoch": 0.16, + "learning_rate": 1.9609048014672457e-05, + "loss": 0.791, + "step": 3500 + }, + { + "epoch": 0.16, + "learning_rate": 1.960826422961767e-05, + "loss": 0.3511, + "step": 3505 + }, + { + "epoch": 0.16, + "learning_rate": 1.9607480444562884e-05, + "loss": 0.1677, + "step": 3510 + }, + { + "epoch": 0.16, + "learning_rate": 1.96066966595081e-05, + "loss": 0.2672, + "step": 3515 + }, + { + "epoch": 0.16, + "learning_rate": 1.9605912874453312e-05, + "loss": 0.27, + "step": 3520 + }, + { + "epoch": 0.16, + "learning_rate": 1.9605129089398526e-05, + "loss": 0.3319, + "step": 3525 + }, + { + "epoch": 0.16, + "learning_rate": 1.960434530434374e-05, + "loss": 0.3659, + "step": 3530 + }, + { + "epoch": 0.16, + "learning_rate": 1.960356151928895e-05, + "loss": 0.3154, + "step": 3535 + }, + { + "epoch": 0.17, + "learning_rate": 1.9602777734234168e-05, + "loss": 0.4368, + "step": 3540 + }, + { + "epoch": 0.17, + "learning_rate": 1.960199394917938e-05, + "loss": 0.6238, + "step": 3545 + }, + { + "epoch": 0.17, + "learning_rate": 1.9601210164124592e-05, + "loss": 0.868, + "step": 3550 + }, + { + "epoch": 0.17, + "learning_rate": 1.9600426379069806e-05, + "loss": 0.439, + "step": 3555 + }, + { + "epoch": 0.17, + "learning_rate": 1.9599642594015017e-05, + "loss": 0.1922, + "step": 3560 + }, + { + "epoch": 0.17, + "learning_rate": 1.9598858808960234e-05, + "loss": 0.2233, + "step": 3565 + }, + { + "epoch": 0.17, + "learning_rate": 1.9598075023905445e-05, + "loss": 0.3184, + "step": 3570 + }, + { + "epoch": 0.17, + "learning_rate": 1.959729123885066e-05, + "loss": 0.3508, + "step": 3575 + }, + { + "epoch": 0.17, + "learning_rate": 1.9596507453795872e-05, + "loss": 0.2921, + "step": 3580 + }, + { + "epoch": 0.17, + "learning_rate": 1.9595723668741086e-05, + "loss": 0.3405, + "step": 3585 + }, + { + "epoch": 0.17, + "learning_rate": 1.95949398836863e-05, + "loss": 0.5112, + "step": 3590 + }, + { + "epoch": 0.17, + "learning_rate": 1.9594156098631514e-05, + "loss": 0.4812, + "step": 3595 + }, + { + "epoch": 0.17, + "learning_rate": 1.9593372313576725e-05, + "loss": 0.8224, + "step": 3600 + }, + { + "epoch": 0.17, + "learning_rate": 1.9592588528521942e-05, + "loss": 0.4547, + "step": 3605 + }, + { + "epoch": 0.17, + "learning_rate": 1.9591804743467152e-05, + "loss": 0.1878, + "step": 3610 + }, + { + "epoch": 0.17, + "learning_rate": 1.9591020958412366e-05, + "loss": 0.2636, + "step": 3615 + }, + { + "epoch": 0.17, + "learning_rate": 1.959023717335758e-05, + "loss": 0.2779, + "step": 3620 + }, + { + "epoch": 0.17, + "learning_rate": 1.9589453388302794e-05, + "loss": 0.2307, + "step": 3625 + }, + { + "epoch": 0.17, + "learning_rate": 1.9588669603248008e-05, + "loss": 0.3005, + "step": 3630 + }, + { + "epoch": 0.17, + "learning_rate": 1.958788581819322e-05, + "loss": 0.3427, + "step": 3635 + }, + { + "epoch": 0.17, + "learning_rate": 1.9587102033138436e-05, + "loss": 0.4767, + "step": 3640 + }, + { + "epoch": 0.17, + "learning_rate": 1.9586318248083646e-05, + "loss": 0.4229, + "step": 3645 + }, + { + "epoch": 0.17, + "learning_rate": 1.958553446302886e-05, + "loss": 0.8545, + "step": 3650 + }, + { + "epoch": 0.17, + "learning_rate": 1.9584750677974074e-05, + "loss": 0.4065, + "step": 3655 + }, + { + "epoch": 0.17, + "learning_rate": 1.9583966892919288e-05, + "loss": 0.1509, + "step": 3660 + }, + { + "epoch": 0.17, + "learning_rate": 1.9583183107864502e-05, + "loss": 0.2052, + "step": 3665 + }, + { + "epoch": 0.17, + "learning_rate": 1.9582399322809716e-05, + "loss": 0.2437, + "step": 3670 + }, + { + "epoch": 0.17, + "learning_rate": 1.9581615537754926e-05, + "loss": 0.3728, + "step": 3675 + }, + { + "epoch": 0.17, + "learning_rate": 1.958083175270014e-05, + "loss": 0.3682, + "step": 3680 + }, + { + "epoch": 0.17, + "learning_rate": 1.9580047967645354e-05, + "loss": 0.4861, + "step": 3685 + }, + { + "epoch": 0.17, + "learning_rate": 1.9579264182590568e-05, + "loss": 0.4066, + "step": 3690 + }, + { + "epoch": 0.17, + "learning_rate": 1.9578480397535782e-05, + "loss": 0.45, + "step": 3695 + }, + { + "epoch": 0.17, + "learning_rate": 1.9577696612480993e-05, + "loss": 0.7415, + "step": 3700 + }, + { + "epoch": 0.17, + "learning_rate": 1.957691282742621e-05, + "loss": 0.4888, + "step": 3705 + }, + { + "epoch": 0.17, + "learning_rate": 1.957612904237142e-05, + "loss": 0.2174, + "step": 3710 + }, + { + "epoch": 0.17, + "learning_rate": 1.9575345257316634e-05, + "loss": 0.2577, + "step": 3715 + }, + { + "epoch": 0.17, + "learning_rate": 1.9574561472261848e-05, + "loss": 0.2525, + "step": 3720 + }, + { + "epoch": 0.17, + "learning_rate": 1.9573777687207062e-05, + "loss": 0.3246, + "step": 3725 + }, + { + "epoch": 0.17, + "learning_rate": 1.9572993902152276e-05, + "loss": 0.3091, + "step": 3730 + }, + { + "epoch": 0.17, + "learning_rate": 1.957221011709749e-05, + "loss": 0.3636, + "step": 3735 + }, + { + "epoch": 0.17, + "learning_rate": 1.9571426332042704e-05, + "loss": 0.4908, + "step": 3740 + }, + { + "epoch": 0.17, + "learning_rate": 1.9570642546987914e-05, + "loss": 0.444, + "step": 3745 + }, + { + "epoch": 0.17, + "learning_rate": 1.9569858761933128e-05, + "loss": 0.6554, + "step": 3750 + }, + { + "epoch": 0.18, + "learning_rate": 1.9569074976878342e-05, + "loss": 0.3102, + "step": 3755 + }, + { + "epoch": 0.18, + "learning_rate": 1.9568291191823556e-05, + "loss": 0.2166, + "step": 3760 + }, + { + "epoch": 0.18, + "learning_rate": 1.956750740676877e-05, + "loss": 0.2067, + "step": 3765 + }, + { + "epoch": 0.18, + "learning_rate": 1.9566723621713984e-05, + "loss": 0.2251, + "step": 3770 + }, + { + "epoch": 0.18, + "learning_rate": 1.9565939836659194e-05, + "loss": 0.3112, + "step": 3775 + }, + { + "epoch": 0.18, + "learning_rate": 1.956515605160441e-05, + "loss": 0.3403, + "step": 3780 + }, + { + "epoch": 0.18, + "learning_rate": 1.9564372266549622e-05, + "loss": 0.3457, + "step": 3785 + }, + { + "epoch": 0.18, + "learning_rate": 1.9563588481494836e-05, + "loss": 0.5348, + "step": 3790 + }, + { + "epoch": 0.18, + "learning_rate": 1.956280469644005e-05, + "loss": 0.5904, + "step": 3795 + }, + { + "epoch": 0.18, + "learning_rate": 1.9562020911385264e-05, + "loss": 0.8875, + "step": 3800 + }, + { + "epoch": 0.18, + "learning_rate": 1.9561237126330478e-05, + "loss": 0.3938, + "step": 3805 + }, + { + "epoch": 0.18, + "learning_rate": 1.9560453341275688e-05, + "loss": 0.2407, + "step": 3810 + }, + { + "epoch": 0.18, + "learning_rate": 1.9559669556220902e-05, + "loss": 0.2626, + "step": 3815 + }, + { + "epoch": 0.18, + "learning_rate": 1.9558885771166116e-05, + "loss": 0.2197, + "step": 3820 + }, + { + "epoch": 0.18, + "learning_rate": 1.955810198611133e-05, + "loss": 0.3859, + "step": 3825 + }, + { + "epoch": 0.18, + "learning_rate": 1.9557318201056544e-05, + "loss": 0.3984, + "step": 3830 + }, + { + "epoch": 0.18, + "learning_rate": 1.9556534416001758e-05, + "loss": 0.2618, + "step": 3835 + }, + { + "epoch": 0.18, + "learning_rate": 1.9555750630946972e-05, + "loss": 0.607, + "step": 3840 + }, + { + "epoch": 0.18, + "learning_rate": 1.9554966845892186e-05, + "loss": 0.6173, + "step": 3845 + }, + { + "epoch": 0.18, + "learning_rate": 1.9554183060837396e-05, + "loss": 0.8174, + "step": 3850 + }, + { + "epoch": 0.18, + "learning_rate": 1.9553399275782613e-05, + "loss": 0.3763, + "step": 3855 + }, + { + "epoch": 0.18, + "learning_rate": 1.9552615490727824e-05, + "loss": 0.232, + "step": 3860 + }, + { + "epoch": 0.18, + "learning_rate": 1.9551831705673038e-05, + "loss": 0.1597, + "step": 3865 + }, + { + "epoch": 0.18, + "learning_rate": 1.9551047920618252e-05, + "loss": 0.2861, + "step": 3870 + }, + { + "epoch": 0.18, + "learning_rate": 1.9550264135563462e-05, + "loss": 0.2547, + "step": 3875 + }, + { + "epoch": 0.18, + "learning_rate": 1.954948035050868e-05, + "loss": 0.3221, + "step": 3880 + }, + { + "epoch": 0.18, + "learning_rate": 1.954869656545389e-05, + "loss": 0.3712, + "step": 3885 + }, + { + "epoch": 0.18, + "learning_rate": 1.9547912780399104e-05, + "loss": 0.5022, + "step": 3890 + }, + { + "epoch": 0.18, + "learning_rate": 1.9547128995344318e-05, + "loss": 0.5154, + "step": 3895 + }, + { + "epoch": 0.18, + "learning_rate": 1.9546345210289532e-05, + "loss": 0.8972, + "step": 3900 + }, + { + "epoch": 0.18, + "learning_rate": 1.9545561425234746e-05, + "loss": 0.3731, + "step": 3905 + }, + { + "epoch": 0.18, + "learning_rate": 1.954477764017996e-05, + "loss": 0.1438, + "step": 3910 + }, + { + "epoch": 0.18, + "learning_rate": 1.954399385512517e-05, + "loss": 0.3258, + "step": 3915 + }, + { + "epoch": 0.18, + "learning_rate": 1.9543210070070387e-05, + "loss": 0.2788, + "step": 3920 + }, + { + "epoch": 0.18, + "learning_rate": 1.9542426285015598e-05, + "loss": 0.269, + "step": 3925 + }, + { + "epoch": 0.18, + "learning_rate": 1.9541642499960812e-05, + "loss": 0.3346, + "step": 3930 + }, + { + "epoch": 0.18, + "learning_rate": 1.9540858714906026e-05, + "loss": 0.401, + "step": 3935 + }, + { + "epoch": 0.18, + "learning_rate": 1.954007492985124e-05, + "loss": 0.3484, + "step": 3940 + }, + { + "epoch": 0.18, + "learning_rate": 1.9539291144796454e-05, + "loss": 0.5295, + "step": 3945 + }, + { + "epoch": 0.18, + "learning_rate": 1.9538507359741664e-05, + "loss": 0.7834, + "step": 3950 + }, + { + "epoch": 0.18, + "learning_rate": 1.953772357468688e-05, + "loss": 0.3469, + "step": 3955 + }, + { + "epoch": 0.18, + "learning_rate": 1.9536939789632092e-05, + "loss": 0.3008, + "step": 3960 + }, + { + "epoch": 0.19, + "learning_rate": 1.9536156004577306e-05, + "loss": 0.2192, + "step": 3965 + }, + { + "epoch": 0.19, + "learning_rate": 1.953537221952252e-05, + "loss": 0.2714, + "step": 3970 + }, + { + "epoch": 0.19, + "learning_rate": 1.9534588434467734e-05, + "loss": 0.236, + "step": 3975 + }, + { + "epoch": 0.19, + "learning_rate": 1.9533804649412948e-05, + "loss": 0.3313, + "step": 3980 + }, + { + "epoch": 0.19, + "learning_rate": 1.953302086435816e-05, + "loss": 0.4521, + "step": 3985 + }, + { + "epoch": 0.19, + "learning_rate": 1.9532237079303372e-05, + "loss": 0.6107, + "step": 3990 + }, + { + "epoch": 0.19, + "learning_rate": 1.953145329424859e-05, + "loss": 0.6532, + "step": 3995 + }, + { + "epoch": 0.19, + "learning_rate": 1.95306695091938e-05, + "loss": 0.7622, + "step": 4000 + }, + { + "epoch": 0.19, + "learning_rate": 1.9529885724139014e-05, + "loss": 0.3901, + "step": 4005 + }, + { + "epoch": 0.19, + "learning_rate": 1.9529101939084228e-05, + "loss": 0.2575, + "step": 4010 + }, + { + "epoch": 0.19, + "learning_rate": 1.9528318154029438e-05, + "loss": 0.1799, + "step": 4015 + }, + { + "epoch": 0.19, + "learning_rate": 1.9527534368974655e-05, + "loss": 0.2706, + "step": 4020 + }, + { + "epoch": 0.19, + "learning_rate": 1.9526750583919866e-05, + "loss": 0.333, + "step": 4025 + }, + { + "epoch": 0.19, + "learning_rate": 1.952596679886508e-05, + "loss": 0.341, + "step": 4030 + }, + { + "epoch": 0.19, + "learning_rate": 1.9525183013810294e-05, + "loss": 0.3399, + "step": 4035 + }, + { + "epoch": 0.19, + "learning_rate": 1.9524399228755508e-05, + "loss": 0.4147, + "step": 4040 + }, + { + "epoch": 0.19, + "learning_rate": 1.952361544370072e-05, + "loss": 0.4624, + "step": 4045 + }, + { + "epoch": 0.19, + "learning_rate": 1.9522831658645935e-05, + "loss": 0.7946, + "step": 4050 + }, + { + "epoch": 0.19, + "learning_rate": 1.952204787359115e-05, + "loss": 0.3148, + "step": 4055 + }, + { + "epoch": 0.19, + "learning_rate": 1.9521264088536363e-05, + "loss": 0.2504, + "step": 4060 + }, + { + "epoch": 0.19, + "learning_rate": 1.9520480303481574e-05, + "loss": 0.1754, + "step": 4065 + }, + { + "epoch": 0.19, + "learning_rate": 1.9519696518426788e-05, + "loss": 0.2607, + "step": 4070 + }, + { + "epoch": 0.19, + "learning_rate": 1.9518912733372e-05, + "loss": 0.3272, + "step": 4075 + }, + { + "epoch": 0.19, + "learning_rate": 1.9518128948317215e-05, + "loss": 0.426, + "step": 4080 + }, + { + "epoch": 0.19, + "learning_rate": 1.951734516326243e-05, + "loss": 0.3973, + "step": 4085 + }, + { + "epoch": 0.19, + "learning_rate": 1.951656137820764e-05, + "loss": 0.418, + "step": 4090 + }, + { + "epoch": 0.19, + "learning_rate": 1.9515777593152857e-05, + "loss": 0.5203, + "step": 4095 + }, + { + "epoch": 0.19, + "learning_rate": 1.9514993808098068e-05, + "loss": 0.9427, + "step": 4100 + }, + { + "epoch": 0.19, + "learning_rate": 1.951421002304328e-05, + "loss": 0.3731, + "step": 4105 + }, + { + "epoch": 0.19, + "learning_rate": 1.9513426237988496e-05, + "loss": 0.1844, + "step": 4110 + }, + { + "epoch": 0.19, + "learning_rate": 1.951264245293371e-05, + "loss": 0.2464, + "step": 4115 + }, + { + "epoch": 0.19, + "learning_rate": 1.9511858667878923e-05, + "loss": 0.1807, + "step": 4120 + }, + { + "epoch": 0.19, + "learning_rate": 1.9511074882824137e-05, + "loss": 0.3148, + "step": 4125 + }, + { + "epoch": 0.19, + "learning_rate": 1.9510291097769348e-05, + "loss": 0.2822, + "step": 4130 + }, + { + "epoch": 0.19, + "learning_rate": 1.950950731271456e-05, + "loss": 0.2875, + "step": 4135 + }, + { + "epoch": 0.19, + "learning_rate": 1.9508723527659776e-05, + "loss": 0.432, + "step": 4140 + }, + { + "epoch": 0.19, + "learning_rate": 1.950793974260499e-05, + "loss": 0.6259, + "step": 4145 + }, + { + "epoch": 0.19, + "learning_rate": 1.9507155957550203e-05, + "loss": 0.7135, + "step": 4150 + }, + { + "epoch": 0.19, + "learning_rate": 1.9506372172495417e-05, + "loss": 0.3204, + "step": 4155 + }, + { + "epoch": 0.19, + "learning_rate": 1.950558838744063e-05, + "loss": 0.1675, + "step": 4160 + }, + { + "epoch": 0.19, + "learning_rate": 1.950480460238584e-05, + "loss": 0.1806, + "step": 4165 + }, + { + "epoch": 0.19, + "learning_rate": 1.950402081733106e-05, + "loss": 0.299, + "step": 4170 + }, + { + "epoch": 0.19, + "learning_rate": 1.950323703227627e-05, + "loss": 0.2398, + "step": 4175 + }, + { + "epoch": 0.2, + "learning_rate": 1.9502453247221483e-05, + "loss": 0.3808, + "step": 4180 + }, + { + "epoch": 0.2, + "learning_rate": 1.9501669462166697e-05, + "loss": 0.4253, + "step": 4185 + }, + { + "epoch": 0.2, + "learning_rate": 1.950088567711191e-05, + "loss": 0.4762, + "step": 4190 + }, + { + "epoch": 0.2, + "learning_rate": 1.9500101892057125e-05, + "loss": 0.5255, + "step": 4195 + }, + { + "epoch": 0.2, + "learning_rate": 1.9499318107002336e-05, + "loss": 0.8221, + "step": 4200 + }, + { + "epoch": 0.2, + "learning_rate": 1.949853432194755e-05, + "loss": 0.416, + "step": 4205 + }, + { + "epoch": 0.2, + "learning_rate": 1.9497750536892763e-05, + "loss": 0.6598, + "step": 4210 + }, + { + "epoch": 0.2, + "learning_rate": 1.9496966751837977e-05, + "loss": 0.1326, + "step": 4215 + }, + { + "epoch": 0.2, + "learning_rate": 1.949618296678319e-05, + "loss": 0.215, + "step": 4220 + }, + { + "epoch": 0.2, + "learning_rate": 1.9495399181728405e-05, + "loss": 0.3046, + "step": 4225 + }, + { + "epoch": 0.2, + "learning_rate": 1.9494615396673616e-05, + "loss": 0.3458, + "step": 4230 + }, + { + "epoch": 0.2, + "learning_rate": 1.9493831611618833e-05, + "loss": 0.3501, + "step": 4235 + }, + { + "epoch": 0.2, + "learning_rate": 1.9493047826564044e-05, + "loss": 0.5032, + "step": 4240 + }, + { + "epoch": 0.2, + "learning_rate": 1.9492264041509257e-05, + "loss": 0.6051, + "step": 4245 + }, + { + "epoch": 0.2, + "learning_rate": 1.949148025645447e-05, + "loss": 0.8645, + "step": 4250 + }, + { + "epoch": 0.2, + "learning_rate": 1.9490696471399685e-05, + "loss": 0.3597, + "step": 4255 + }, + { + "epoch": 0.2, + "learning_rate": 1.94899126863449e-05, + "loss": 0.2018, + "step": 4260 + }, + { + "epoch": 0.2, + "learning_rate": 1.948912890129011e-05, + "loss": 0.2225, + "step": 4265 + }, + { + "epoch": 0.2, + "learning_rate": 1.9488345116235327e-05, + "loss": 0.3058, + "step": 4270 + }, + { + "epoch": 0.2, + "learning_rate": 1.9487561331180537e-05, + "loss": 0.367, + "step": 4275 + }, + { + "epoch": 0.2, + "learning_rate": 1.948677754612575e-05, + "loss": 0.3147, + "step": 4280 + }, + { + "epoch": 0.2, + "learning_rate": 1.9485993761070965e-05, + "loss": 0.3372, + "step": 4285 + }, + { + "epoch": 0.2, + "learning_rate": 1.948520997601618e-05, + "loss": 0.3279, + "step": 4290 + }, + { + "epoch": 0.2, + "learning_rate": 1.9484426190961393e-05, + "loss": 0.4893, + "step": 4295 + }, + { + "epoch": 0.2, + "learning_rate": 1.9483642405906607e-05, + "loss": 0.8027, + "step": 4300 + }, + { + "epoch": 0.2, + "learning_rate": 1.9482858620851818e-05, + "loss": 0.4597, + "step": 4305 + }, + { + "epoch": 0.2, + "learning_rate": 1.9482074835797035e-05, + "loss": 0.1776, + "step": 4310 + }, + { + "epoch": 0.2, + "learning_rate": 1.9481291050742245e-05, + "loss": 0.2143, + "step": 4315 + }, + { + "epoch": 0.2, + "learning_rate": 1.948050726568746e-05, + "loss": 0.2351, + "step": 4320 + }, + { + "epoch": 0.2, + "learning_rate": 1.9479723480632673e-05, + "loss": 0.3237, + "step": 4325 + }, + { + "epoch": 0.2, + "learning_rate": 1.9478939695577884e-05, + "loss": 0.3484, + "step": 4330 + }, + { + "epoch": 0.2, + "learning_rate": 1.94781559105231e-05, + "loss": 0.3139, + "step": 4335 + }, + { + "epoch": 0.2, + "learning_rate": 1.947737212546831e-05, + "loss": 0.4198, + "step": 4340 + }, + { + "epoch": 0.2, + "learning_rate": 1.9476588340413525e-05, + "loss": 0.48, + "step": 4345 + }, + { + "epoch": 0.2, + "learning_rate": 1.947580455535874e-05, + "loss": 0.7149, + "step": 4350 + }, + { + "epoch": 0.2, + "learning_rate": 1.9475020770303953e-05, + "loss": 0.4256, + "step": 4355 + }, + { + "epoch": 0.2, + "learning_rate": 1.9474236985249167e-05, + "loss": 0.2352, + "step": 4360 + }, + { + "epoch": 0.2, + "learning_rate": 1.947345320019438e-05, + "loss": 0.197, + "step": 4365 + }, + { + "epoch": 0.2, + "learning_rate": 1.9472669415139595e-05, + "loss": 0.313, + "step": 4370 + }, + { + "epoch": 0.2, + "learning_rate": 1.947188563008481e-05, + "loss": 0.2447, + "step": 4375 + }, + { + "epoch": 0.2, + "learning_rate": 1.947110184503002e-05, + "loss": 0.2627, + "step": 4380 + }, + { + "epoch": 0.2, + "learning_rate": 1.9470318059975237e-05, + "loss": 0.2974, + "step": 4385 + }, + { + "epoch": 0.2, + "learning_rate": 1.9469534274920447e-05, + "loss": 0.4315, + "step": 4390 + }, + { + "epoch": 0.21, + "learning_rate": 1.946875048986566e-05, + "loss": 0.6057, + "step": 4395 + }, + { + "epoch": 0.21, + "learning_rate": 1.9467966704810875e-05, + "loss": 0.7785, + "step": 4400 + }, + { + "epoch": 0.21, + "learning_rate": 1.9467182919756085e-05, + "loss": 0.426, + "step": 4405 + }, + { + "epoch": 0.21, + "learning_rate": 1.9466399134701303e-05, + "loss": 0.3034, + "step": 4410 + }, + { + "epoch": 0.21, + "learning_rate": 1.9465615349646513e-05, + "loss": 0.1557, + "step": 4415 + }, + { + "epoch": 0.21, + "learning_rate": 1.9464831564591727e-05, + "loss": 0.2953, + "step": 4420 + }, + { + "epoch": 0.21, + "learning_rate": 1.946404777953694e-05, + "loss": 0.4292, + "step": 4425 + }, + { + "epoch": 0.21, + "learning_rate": 1.9463263994482155e-05, + "loss": 0.468, + "step": 4430 + }, + { + "epoch": 0.21, + "learning_rate": 1.946248020942737e-05, + "loss": 0.3723, + "step": 4435 + }, + { + "epoch": 0.21, + "learning_rate": 1.9461696424372583e-05, + "loss": 0.3748, + "step": 4440 + }, + { + "epoch": 0.21, + "learning_rate": 1.9460912639317793e-05, + "loss": 0.3476, + "step": 4445 + }, + { + "epoch": 0.21, + "learning_rate": 1.946012885426301e-05, + "loss": 0.7008, + "step": 4450 + }, + { + "epoch": 0.21, + "learning_rate": 1.945934506920822e-05, + "loss": 0.3942, + "step": 4455 + }, + { + "epoch": 0.21, + "learning_rate": 1.9458561284153435e-05, + "loss": 0.1434, + "step": 4460 + }, + { + "epoch": 0.21, + "learning_rate": 1.945777749909865e-05, + "loss": 0.1975, + "step": 4465 + }, + { + "epoch": 0.21, + "learning_rate": 1.9456993714043863e-05, + "loss": 0.3374, + "step": 4470 + }, + { + "epoch": 0.21, + "learning_rate": 1.9456209928989077e-05, + "loss": 0.2609, + "step": 4475 + }, + { + "epoch": 0.21, + "learning_rate": 1.9455426143934287e-05, + "loss": 0.2869, + "step": 4480 + }, + { + "epoch": 0.21, + "learning_rate": 1.9454642358879505e-05, + "loss": 0.4011, + "step": 4485 + }, + { + "epoch": 0.21, + "learning_rate": 1.9453858573824715e-05, + "loss": 0.4172, + "step": 4490 + }, + { + "epoch": 0.21, + "learning_rate": 1.945307478876993e-05, + "loss": 0.5234, + "step": 4495 + }, + { + "epoch": 0.21, + "learning_rate": 1.9452291003715143e-05, + "loss": 0.7829, + "step": 4500 + }, + { + "epoch": 0.21, + "learning_rate": 1.9451507218660357e-05, + "loss": 0.3145, + "step": 4505 + }, + { + "epoch": 0.21, + "learning_rate": 1.945072343360557e-05, + "loss": 0.1722, + "step": 4510 + }, + { + "epoch": 0.21, + "learning_rate": 1.9449939648550785e-05, + "loss": 0.2948, + "step": 4515 + }, + { + "epoch": 0.21, + "learning_rate": 1.9449155863495995e-05, + "loss": 0.2227, + "step": 4520 + }, + { + "epoch": 0.21, + "learning_rate": 1.944837207844121e-05, + "loss": 0.2552, + "step": 4525 + }, + { + "epoch": 0.21, + "learning_rate": 1.9447588293386423e-05, + "loss": 0.3306, + "step": 4530 + }, + { + "epoch": 0.21, + "learning_rate": 1.9446804508331637e-05, + "loss": 0.2957, + "step": 4535 + }, + { + "epoch": 0.21, + "learning_rate": 1.944602072327685e-05, + "loss": 0.5235, + "step": 4540 + }, + { + "epoch": 0.21, + "learning_rate": 1.944523693822206e-05, + "loss": 0.5366, + "step": 4545 + }, + { + "epoch": 0.21, + "learning_rate": 1.944445315316728e-05, + "loss": 0.753, + "step": 4550 + }, + { + "epoch": 0.21, + "learning_rate": 1.944366936811249e-05, + "loss": 0.2989, + "step": 4555 + }, + { + "epoch": 0.21, + "learning_rate": 1.9442885583057703e-05, + "loss": 0.1639, + "step": 4560 + }, + { + "epoch": 0.21, + "learning_rate": 1.9442101798002917e-05, + "loss": 0.1844, + "step": 4565 + }, + { + "epoch": 0.21, + "learning_rate": 1.944131801294813e-05, + "loss": 0.2972, + "step": 4570 + }, + { + "epoch": 0.21, + "learning_rate": 1.9440534227893345e-05, + "loss": 0.3515, + "step": 4575 + }, + { + "epoch": 0.21, + "learning_rate": 1.943975044283856e-05, + "loss": 0.2764, + "step": 4580 + }, + { + "epoch": 0.21, + "learning_rate": 1.9438966657783773e-05, + "loss": 0.4514, + "step": 4585 + }, + { + "epoch": 0.21, + "learning_rate": 1.9438182872728983e-05, + "loss": 0.3424, + "step": 4590 + }, + { + "epoch": 0.21, + "learning_rate": 1.9437399087674197e-05, + "loss": 0.539, + "step": 4595 + }, + { + "epoch": 0.21, + "learning_rate": 1.943661530261941e-05, + "loss": 0.8198, + "step": 4600 + }, + { + "epoch": 0.21, + "learning_rate": 1.9435831517564625e-05, + "loss": 0.3306, + "step": 4605 + }, + { + "epoch": 0.22, + "learning_rate": 1.943504773250984e-05, + "loss": 0.1788, + "step": 4610 + }, + { + "epoch": 0.22, + "learning_rate": 1.9434263947455053e-05, + "loss": 0.2419, + "step": 4615 + }, + { + "epoch": 0.22, + "learning_rate": 1.9433480162400263e-05, + "loss": 0.3073, + "step": 4620 + }, + { + "epoch": 0.22, + "learning_rate": 1.943269637734548e-05, + "loss": 0.1982, + "step": 4625 + }, + { + "epoch": 0.22, + "learning_rate": 1.943191259229069e-05, + "loss": 0.3853, + "step": 4630 + }, + { + "epoch": 0.22, + "learning_rate": 1.9431128807235905e-05, + "loss": 0.3433, + "step": 4635 + }, + { + "epoch": 0.22, + "learning_rate": 1.943034502218112e-05, + "loss": 0.373, + "step": 4640 + }, + { + "epoch": 0.22, + "learning_rate": 1.9429561237126333e-05, + "loss": 0.4099, + "step": 4645 + }, + { + "epoch": 0.22, + "learning_rate": 1.9428777452071547e-05, + "loss": 0.7581, + "step": 4650 + }, + { + "epoch": 0.22, + "learning_rate": 1.9427993667016757e-05, + "loss": 0.3001, + "step": 4655 + }, + { + "epoch": 0.22, + "learning_rate": 1.942720988196197e-05, + "loss": 0.1235, + "step": 4660 + }, + { + "epoch": 0.22, + "learning_rate": 1.9426426096907185e-05, + "loss": 0.1975, + "step": 4665 + }, + { + "epoch": 0.22, + "learning_rate": 1.94256423118524e-05, + "loss": 0.2757, + "step": 4670 + }, + { + "epoch": 0.22, + "learning_rate": 1.9424858526797613e-05, + "loss": 0.2863, + "step": 4675 + }, + { + "epoch": 0.22, + "learning_rate": 1.9424074741742827e-05, + "loss": 0.3228, + "step": 4680 + }, + { + "epoch": 0.22, + "learning_rate": 1.942329095668804e-05, + "loss": 0.3734, + "step": 4685 + }, + { + "epoch": 0.22, + "learning_rate": 1.9422507171633254e-05, + "loss": 0.4365, + "step": 4690 + }, + { + "epoch": 0.22, + "learning_rate": 1.9421723386578465e-05, + "loss": 0.4877, + "step": 4695 + }, + { + "epoch": 0.22, + "learning_rate": 1.9420939601523682e-05, + "loss": 0.7756, + "step": 4700 + }, + { + "epoch": 0.22, + "learning_rate": 1.9420155816468893e-05, + "loss": 0.338, + "step": 4705 + }, + { + "epoch": 0.22, + "learning_rate": 1.9419372031414107e-05, + "loss": 0.1238, + "step": 4710 + }, + { + "epoch": 0.22, + "learning_rate": 1.941858824635932e-05, + "loss": 0.2242, + "step": 4715 + }, + { + "epoch": 0.22, + "learning_rate": 1.941780446130453e-05, + "loss": 0.2178, + "step": 4720 + }, + { + "epoch": 0.22, + "learning_rate": 1.941702067624975e-05, + "loss": 0.27, + "step": 4725 + }, + { + "epoch": 0.22, + "learning_rate": 1.941623689119496e-05, + "loss": 0.3249, + "step": 4730 + }, + { + "epoch": 0.22, + "learning_rate": 1.9415453106140173e-05, + "loss": 0.3082, + "step": 4735 + }, + { + "epoch": 0.22, + "learning_rate": 1.9414669321085387e-05, + "loss": 0.3891, + "step": 4740 + }, + { + "epoch": 0.22, + "learning_rate": 1.94138855360306e-05, + "loss": 0.525, + "step": 4745 + }, + { + "epoch": 0.22, + "learning_rate": 1.9413101750975814e-05, + "loss": 0.6924, + "step": 4750 + }, + { + "epoch": 0.22, + "learning_rate": 1.941231796592103e-05, + "loss": 0.4089, + "step": 4755 + }, + { + "epoch": 0.22, + "learning_rate": 1.941153418086624e-05, + "loss": 0.1315, + "step": 4760 + }, + { + "epoch": 0.22, + "learning_rate": 1.9410750395811456e-05, + "loss": 0.1435, + "step": 4765 + }, + { + "epoch": 0.22, + "learning_rate": 1.9409966610756667e-05, + "loss": 0.2198, + "step": 4770 + }, + { + "epoch": 0.22, + "learning_rate": 1.940918282570188e-05, + "loss": 0.2413, + "step": 4775 + }, + { + "epoch": 0.22, + "learning_rate": 1.9408399040647095e-05, + "loss": 0.3171, + "step": 4780 + }, + { + "epoch": 0.22, + "learning_rate": 1.940761525559231e-05, + "loss": 0.2903, + "step": 4785 + }, + { + "epoch": 0.22, + "learning_rate": 1.9406831470537522e-05, + "loss": 0.3473, + "step": 4790 + }, + { + "epoch": 0.22, + "learning_rate": 1.9406047685482733e-05, + "loss": 0.5104, + "step": 4795 + }, + { + "epoch": 0.22, + "learning_rate": 1.940526390042795e-05, + "loss": 0.7117, + "step": 4800 + }, + { + "epoch": 0.22, + "learning_rate": 1.940448011537316e-05, + "loss": 0.3581, + "step": 4805 + }, + { + "epoch": 0.22, + "learning_rate": 1.9403696330318375e-05, + "loss": 0.2153, + "step": 4810 + }, + { + "epoch": 0.22, + "learning_rate": 1.940291254526359e-05, + "loss": 0.2117, + "step": 4815 + }, + { + "epoch": 0.22, + "learning_rate": 1.9402128760208802e-05, + "loss": 0.2479, + "step": 4820 + }, + { + "epoch": 0.23, + "learning_rate": 1.9401344975154016e-05, + "loss": 0.3125, + "step": 4825 + }, + { + "epoch": 0.23, + "learning_rate": 1.940056119009923e-05, + "loss": 0.3053, + "step": 4830 + }, + { + "epoch": 0.23, + "learning_rate": 1.939977740504444e-05, + "loss": 0.3783, + "step": 4835 + }, + { + "epoch": 0.23, + "learning_rate": 1.9398993619989658e-05, + "loss": 0.4291, + "step": 4840 + }, + { + "epoch": 0.23, + "learning_rate": 1.939820983493487e-05, + "loss": 0.3458, + "step": 4845 + }, + { + "epoch": 0.23, + "learning_rate": 1.9397426049880082e-05, + "loss": 0.7172, + "step": 4850 + }, + { + "epoch": 0.23, + "learning_rate": 1.9396642264825296e-05, + "loss": 0.4026, + "step": 4855 + }, + { + "epoch": 0.23, + "learning_rate": 1.9395858479770507e-05, + "loss": 0.1863, + "step": 4860 + }, + { + "epoch": 0.23, + "learning_rate": 1.9395074694715724e-05, + "loss": 0.208, + "step": 4865 + }, + { + "epoch": 0.23, + "learning_rate": 1.9394290909660935e-05, + "loss": 0.2116, + "step": 4870 + }, + { + "epoch": 0.23, + "learning_rate": 1.939350712460615e-05, + "loss": 0.2817, + "step": 4875 + }, + { + "epoch": 0.23, + "learning_rate": 1.9392723339551362e-05, + "loss": 0.3429, + "step": 4880 + }, + { + "epoch": 0.23, + "learning_rate": 1.9391939554496576e-05, + "loss": 0.4071, + "step": 4885 + }, + { + "epoch": 0.23, + "learning_rate": 1.939115576944179e-05, + "loss": 0.4191, + "step": 4890 + }, + { + "epoch": 0.23, + "learning_rate": 1.9390371984387004e-05, + "loss": 0.5109, + "step": 4895 + }, + { + "epoch": 0.23, + "learning_rate": 1.9389588199332218e-05, + "loss": 0.7804, + "step": 4900 + }, + { + "epoch": 0.23, + "learning_rate": 1.9388804414277432e-05, + "loss": 0.3884, + "step": 4905 + }, + { + "epoch": 0.23, + "learning_rate": 1.9388020629222642e-05, + "loss": 0.1351, + "step": 4910 + }, + { + "epoch": 0.23, + "learning_rate": 1.9387236844167856e-05, + "loss": 0.2175, + "step": 4915 + }, + { + "epoch": 0.23, + "learning_rate": 1.938645305911307e-05, + "loss": 0.237, + "step": 4920 + }, + { + "epoch": 0.23, + "learning_rate": 1.9385669274058284e-05, + "loss": 0.3365, + "step": 4925 + }, + { + "epoch": 0.23, + "learning_rate": 1.9384885489003498e-05, + "loss": 0.3038, + "step": 4930 + }, + { + "epoch": 0.23, + "learning_rate": 1.938410170394871e-05, + "loss": 0.2486, + "step": 4935 + }, + { + "epoch": 0.23, + "learning_rate": 1.9383317918893926e-05, + "loss": 0.3903, + "step": 4940 + }, + { + "epoch": 0.23, + "learning_rate": 1.9382534133839136e-05, + "loss": 0.5263, + "step": 4945 + }, + { + "epoch": 0.23, + "learning_rate": 1.938175034878435e-05, + "loss": 0.6892, + "step": 4950 + }, + { + "epoch": 0.23, + "learning_rate": 1.9380966563729564e-05, + "loss": 0.439, + "step": 4955 + }, + { + "epoch": 0.23, + "learning_rate": 1.9380182778674778e-05, + "loss": 0.1741, + "step": 4960 + }, + { + "epoch": 0.23, + "learning_rate": 1.9379398993619992e-05, + "loss": 0.199, + "step": 4965 + }, + { + "epoch": 0.23, + "learning_rate": 1.9378615208565206e-05, + "loss": 0.246, + "step": 4970 + }, + { + "epoch": 0.23, + "learning_rate": 1.9377831423510416e-05, + "loss": 0.2149, + "step": 4975 + }, + { + "epoch": 0.23, + "learning_rate": 1.937704763845563e-05, + "loss": 0.3709, + "step": 4980 + }, + { + "epoch": 0.23, + "learning_rate": 1.9376263853400844e-05, + "loss": 0.3476, + "step": 4985 + }, + { + "epoch": 0.23, + "learning_rate": 1.9375480068346058e-05, + "loss": 0.41, + "step": 4990 + }, + { + "epoch": 0.23, + "learning_rate": 1.9374696283291272e-05, + "loss": 0.4483, + "step": 4995 + }, + { + "epoch": 0.23, + "learning_rate": 1.9373912498236486e-05, + "loss": 0.6013, + "step": 5000 + }, + { + "epoch": 0.23, + "learning_rate": 1.93731287131817e-05, + "loss": 0.3716, + "step": 5005 + }, + { + "epoch": 0.23, + "learning_rate": 1.937234492812691e-05, + "loss": 0.1351, + "step": 5010 + }, + { + "epoch": 0.23, + "learning_rate": 1.9371561143072128e-05, + "loss": 0.1643, + "step": 5015 + }, + { + "epoch": 0.23, + "learning_rate": 1.9370777358017338e-05, + "loss": 0.2067, + "step": 5020 + }, + { + "epoch": 0.23, + "learning_rate": 1.9369993572962552e-05, + "loss": 0.2506, + "step": 5025 + }, + { + "epoch": 0.23, + "learning_rate": 1.9369209787907766e-05, + "loss": 0.2104, + "step": 5030 + }, + { + "epoch": 0.23, + "learning_rate": 1.936842600285298e-05, + "loss": 0.2822, + "step": 5035 + }, + { + "epoch": 0.24, + "learning_rate": 1.9367642217798194e-05, + "loss": 0.3395, + "step": 5040 + }, + { + "epoch": 0.24, + "learning_rate": 1.9366858432743404e-05, + "loss": 0.5625, + "step": 5045 + }, + { + "epoch": 0.24, + "learning_rate": 1.936607464768862e-05, + "loss": 0.8041, + "step": 5050 + }, + { + "epoch": 0.24, + "learning_rate": 1.9365290862633832e-05, + "loss": 0.2843, + "step": 5055 + }, + { + "epoch": 0.24, + "learning_rate": 1.9364507077579046e-05, + "loss": 0.2228, + "step": 5060 + }, + { + "epoch": 0.24, + "learning_rate": 1.936372329252426e-05, + "loss": 0.2473, + "step": 5065 + }, + { + "epoch": 0.24, + "learning_rate": 1.9362939507469474e-05, + "loss": 0.2517, + "step": 5070 + }, + { + "epoch": 0.24, + "learning_rate": 1.9362155722414684e-05, + "loss": 0.6024, + "step": 5075 + }, + { + "epoch": 0.24, + "learning_rate": 1.9361371937359902e-05, + "loss": 0.4021, + "step": 5080 + }, + { + "epoch": 0.24, + "learning_rate": 1.9360588152305112e-05, + "loss": 0.4412, + "step": 5085 + }, + { + "epoch": 0.24, + "learning_rate": 1.9359804367250326e-05, + "loss": 0.4314, + "step": 5090 + }, + { + "epoch": 0.24, + "learning_rate": 1.935902058219554e-05, + "loss": 0.4803, + "step": 5095 + }, + { + "epoch": 0.24, + "learning_rate": 1.9358236797140754e-05, + "loss": 0.4986, + "step": 5100 + }, + { + "epoch": 0.24, + "learning_rate": 1.9357453012085968e-05, + "loss": 0.3644, + "step": 5105 + }, + { + "epoch": 0.24, + "learning_rate": 1.935666922703118e-05, + "loss": 0.2482, + "step": 5110 + }, + { + "epoch": 0.24, + "learning_rate": 1.9355885441976396e-05, + "loss": 0.1968, + "step": 5115 + }, + { + "epoch": 0.24, + "learning_rate": 1.9355101656921606e-05, + "loss": 0.2267, + "step": 5120 + }, + { + "epoch": 0.24, + "learning_rate": 1.935431787186682e-05, + "loss": 0.2209, + "step": 5125 + }, + { + "epoch": 0.24, + "learning_rate": 1.9353534086812034e-05, + "loss": 0.3899, + "step": 5130 + }, + { + "epoch": 0.24, + "learning_rate": 1.9352750301757248e-05, + "loss": 0.2757, + "step": 5135 + }, + { + "epoch": 0.24, + "learning_rate": 1.9351966516702462e-05, + "loss": 0.3713, + "step": 5140 + }, + { + "epoch": 0.24, + "learning_rate": 1.9351182731647676e-05, + "loss": 0.4712, + "step": 5145 + }, + { + "epoch": 0.24, + "learning_rate": 1.9350398946592886e-05, + "loss": 0.5116, + "step": 5150 + }, + { + "epoch": 0.24, + "learning_rate": 1.9349615161538104e-05, + "loss": 0.3821, + "step": 5155 + }, + { + "epoch": 0.24, + "learning_rate": 1.9348831376483314e-05, + "loss": 0.1595, + "step": 5160 + }, + { + "epoch": 0.24, + "learning_rate": 1.9348047591428528e-05, + "loss": 0.1296, + "step": 5165 + }, + { + "epoch": 0.24, + "learning_rate": 1.9347263806373742e-05, + "loss": 0.2284, + "step": 5170 + }, + { + "epoch": 0.24, + "learning_rate": 1.9346480021318952e-05, + "loss": 0.281, + "step": 5175 + }, + { + "epoch": 0.24, + "learning_rate": 1.934569623626417e-05, + "loss": 0.17, + "step": 5180 + }, + { + "epoch": 0.24, + "learning_rate": 1.934491245120938e-05, + "loss": 0.3331, + "step": 5185 + }, + { + "epoch": 0.24, + "learning_rate": 1.9344128666154594e-05, + "loss": 0.4022, + "step": 5190 + }, + { + "epoch": 0.24, + "learning_rate": 1.9343344881099808e-05, + "loss": 0.495, + "step": 5195 + }, + { + "epoch": 0.24, + "learning_rate": 1.9342561096045022e-05, + "loss": 0.8846, + "step": 5200 + }, + { + "epoch": 0.24, + "learning_rate": 1.9341777310990236e-05, + "loss": 0.3249, + "step": 5205 + }, + { + "epoch": 0.24, + "learning_rate": 1.934099352593545e-05, + "loss": 0.1457, + "step": 5210 + }, + { + "epoch": 0.24, + "learning_rate": 1.9340209740880664e-05, + "loss": 0.205, + "step": 5215 + }, + { + "epoch": 0.24, + "learning_rate": 1.9339425955825878e-05, + "loss": 0.2736, + "step": 5220 + }, + { + "epoch": 0.24, + "learning_rate": 1.9338642170771088e-05, + "loss": 0.2574, + "step": 5225 + }, + { + "epoch": 0.24, + "learning_rate": 1.9337858385716305e-05, + "loss": 0.2396, + "step": 5230 + }, + { + "epoch": 0.24, + "learning_rate": 1.9337074600661516e-05, + "loss": 0.3956, + "step": 5235 + }, + { + "epoch": 0.24, + "learning_rate": 1.933629081560673e-05, + "loss": 0.3899, + "step": 5240 + }, + { + "epoch": 0.24, + "learning_rate": 1.9335507030551944e-05, + "loss": 0.4603, + "step": 5245 + }, + { + "epoch": 0.24, + "learning_rate": 1.9334723245497154e-05, + "loss": 0.8332, + "step": 5250 + }, + { + "epoch": 0.25, + "learning_rate": 1.933393946044237e-05, + "loss": 0.3782, + "step": 5255 + }, + { + "epoch": 0.25, + "learning_rate": 1.9333155675387582e-05, + "loss": 0.1268, + "step": 5260 + }, + { + "epoch": 0.25, + "learning_rate": 1.9332371890332796e-05, + "loss": 0.2608, + "step": 5265 + }, + { + "epoch": 0.25, + "learning_rate": 1.933158810527801e-05, + "loss": 0.1392, + "step": 5270 + }, + { + "epoch": 0.25, + "learning_rate": 1.9330804320223224e-05, + "loss": 0.2242, + "step": 5275 + }, + { + "epoch": 0.25, + "learning_rate": 1.9330020535168438e-05, + "loss": 0.1908, + "step": 5280 + }, + { + "epoch": 0.25, + "learning_rate": 1.932923675011365e-05, + "loss": 0.2051, + "step": 5285 + }, + { + "epoch": 0.25, + "learning_rate": 1.9328452965058862e-05, + "loss": 0.5424, + "step": 5290 + }, + { + "epoch": 0.25, + "learning_rate": 1.932766918000408e-05, + "loss": 0.5415, + "step": 5295 + }, + { + "epoch": 0.25, + "learning_rate": 1.932688539494929e-05, + "loss": 0.6368, + "step": 5300 + }, + { + "epoch": 0.25, + "learning_rate": 1.9326101609894504e-05, + "loss": 0.4718, + "step": 5305 + }, + { + "epoch": 0.25, + "learning_rate": 1.9325317824839718e-05, + "loss": 0.1058, + "step": 5310 + }, + { + "epoch": 0.25, + "learning_rate": 1.932453403978493e-05, + "loss": 0.1814, + "step": 5315 + }, + { + "epoch": 0.25, + "learning_rate": 1.9323750254730146e-05, + "loss": 0.219, + "step": 5320 + }, + { + "epoch": 0.25, + "learning_rate": 1.9322966469675356e-05, + "loss": 0.2229, + "step": 5325 + }, + { + "epoch": 0.25, + "learning_rate": 1.9322182684620573e-05, + "loss": 0.3236, + "step": 5330 + }, + { + "epoch": 0.25, + "learning_rate": 1.9321398899565784e-05, + "loss": 0.2804, + "step": 5335 + }, + { + "epoch": 0.25, + "learning_rate": 1.9320615114510998e-05, + "loss": 0.3324, + "step": 5340 + }, + { + "epoch": 0.25, + "learning_rate": 1.931983132945621e-05, + "loss": 0.5195, + "step": 5345 + }, + { + "epoch": 0.25, + "learning_rate": 1.9319047544401426e-05, + "loss": 0.8297, + "step": 5350 + }, + { + "epoch": 0.25, + "learning_rate": 1.931826375934664e-05, + "loss": 0.3, + "step": 5355 + }, + { + "epoch": 0.25, + "learning_rate": 1.9317479974291853e-05, + "loss": 0.2239, + "step": 5360 + }, + { + "epoch": 0.25, + "learning_rate": 1.9316696189237064e-05, + "loss": 0.2179, + "step": 5365 + }, + { + "epoch": 0.25, + "learning_rate": 1.9315912404182278e-05, + "loss": 0.2458, + "step": 5370 + }, + { + "epoch": 0.25, + "learning_rate": 1.931512861912749e-05, + "loss": 0.2145, + "step": 5375 + }, + { + "epoch": 0.25, + "learning_rate": 1.9314344834072706e-05, + "loss": 0.1894, + "step": 5380 + }, + { + "epoch": 0.25, + "learning_rate": 1.931356104901792e-05, + "loss": 0.3247, + "step": 5385 + }, + { + "epoch": 0.25, + "learning_rate": 1.931277726396313e-05, + "loss": 0.4402, + "step": 5390 + }, + { + "epoch": 0.25, + "learning_rate": 1.9311993478908347e-05, + "loss": 0.5536, + "step": 5395 + }, + { + "epoch": 0.25, + "learning_rate": 1.9311209693853558e-05, + "loss": 0.7553, + "step": 5400 + }, + { + "epoch": 0.25, + "learning_rate": 1.9310425908798772e-05, + "loss": 0.345, + "step": 5405 + }, + { + "epoch": 0.25, + "learning_rate": 1.9309642123743986e-05, + "loss": 0.1617, + "step": 5410 + }, + { + "epoch": 0.25, + "learning_rate": 1.93088583386892e-05, + "loss": 0.2743, + "step": 5415 + }, + { + "epoch": 0.25, + "learning_rate": 1.9308074553634413e-05, + "loss": 0.2523, + "step": 5420 + }, + { + "epoch": 0.25, + "learning_rate": 1.9307290768579627e-05, + "loss": 0.2766, + "step": 5425 + }, + { + "epoch": 0.25, + "learning_rate": 1.930650698352484e-05, + "loss": 0.2555, + "step": 5430 + }, + { + "epoch": 0.25, + "learning_rate": 1.9305723198470052e-05, + "loss": 0.3894, + "step": 5435 + }, + { + "epoch": 0.25, + "learning_rate": 1.9304939413415266e-05, + "loss": 0.3815, + "step": 5440 + }, + { + "epoch": 0.25, + "learning_rate": 1.930415562836048e-05, + "loss": 0.5165, + "step": 5445 + }, + { + "epoch": 0.25, + "learning_rate": 1.9303371843305694e-05, + "loss": 0.8409, + "step": 5450 + }, + { + "epoch": 0.25, + "learning_rate": 1.9302588058250907e-05, + "loss": 0.3595, + "step": 5455 + }, + { + "epoch": 0.25, + "learning_rate": 1.930180427319612e-05, + "loss": 0.1876, + "step": 5460 + }, + { + "epoch": 0.26, + "learning_rate": 1.9301020488141332e-05, + "loss": 0.2553, + "step": 5465 + }, + { + "epoch": 0.26, + "learning_rate": 1.930023670308655e-05, + "loss": 0.3259, + "step": 5470 + }, + { + "epoch": 0.26, + "learning_rate": 1.929945291803176e-05, + "loss": 0.2538, + "step": 5475 + }, + { + "epoch": 0.26, + "learning_rate": 1.9298669132976974e-05, + "loss": 0.3327, + "step": 5480 + }, + { + "epoch": 0.26, + "learning_rate": 1.9297885347922187e-05, + "loss": 0.3046, + "step": 5485 + }, + { + "epoch": 0.26, + "learning_rate": 1.92971015628674e-05, + "loss": 0.4529, + "step": 5490 + }, + { + "epoch": 0.26, + "learning_rate": 1.9296317777812615e-05, + "loss": 0.4448, + "step": 5495 + }, + { + "epoch": 0.26, + "learning_rate": 1.9295533992757826e-05, + "loss": 0.6134, + "step": 5500 + }, + { + "epoch": 0.26, + "learning_rate": 1.929475020770304e-05, + "loss": 0.4037, + "step": 5505 + }, + { + "epoch": 0.26, + "learning_rate": 1.9293966422648254e-05, + "loss": 0.1626, + "step": 5510 + }, + { + "epoch": 0.26, + "learning_rate": 1.9293182637593467e-05, + "loss": 0.1955, + "step": 5515 + }, + { + "epoch": 0.26, + "learning_rate": 1.929239885253868e-05, + "loss": 0.244, + "step": 5520 + }, + { + "epoch": 0.26, + "learning_rate": 1.9291615067483895e-05, + "loss": 0.1823, + "step": 5525 + }, + { + "epoch": 0.26, + "learning_rate": 1.929083128242911e-05, + "loss": 0.3409, + "step": 5530 + }, + { + "epoch": 0.26, + "learning_rate": 1.9290047497374323e-05, + "loss": 0.3905, + "step": 5535 + }, + { + "epoch": 0.26, + "learning_rate": 1.9289263712319534e-05, + "loss": 0.4031, + "step": 5540 + }, + { + "epoch": 0.26, + "learning_rate": 1.928847992726475e-05, + "loss": 0.5521, + "step": 5545 + }, + { + "epoch": 0.26, + "learning_rate": 1.928769614220996e-05, + "loss": 0.6648, + "step": 5550 + }, + { + "epoch": 0.26, + "learning_rate": 1.9286912357155175e-05, + "loss": 0.2675, + "step": 5555 + }, + { + "epoch": 0.26, + "learning_rate": 1.928612857210039e-05, + "loss": 0.1622, + "step": 5560 + }, + { + "epoch": 0.26, + "learning_rate": 1.92853447870456e-05, + "loss": 0.2045, + "step": 5565 + }, + { + "epoch": 0.26, + "learning_rate": 1.9284561001990817e-05, + "loss": 0.2512, + "step": 5570 + }, + { + "epoch": 0.26, + "learning_rate": 1.9283777216936028e-05, + "loss": 0.2425, + "step": 5575 + }, + { + "epoch": 0.26, + "learning_rate": 1.928299343188124e-05, + "loss": 0.3474, + "step": 5580 + }, + { + "epoch": 0.26, + "learning_rate": 1.9282209646826455e-05, + "loss": 0.4243, + "step": 5585 + }, + { + "epoch": 0.26, + "learning_rate": 1.928142586177167e-05, + "loss": 0.4205, + "step": 5590 + }, + { + "epoch": 0.26, + "learning_rate": 1.9280642076716883e-05, + "loss": 0.4256, + "step": 5595 + }, + { + "epoch": 0.26, + "learning_rate": 1.9279858291662097e-05, + "loss": 0.74, + "step": 5600 + }, + { + "epoch": 0.26, + "learning_rate": 1.9279074506607308e-05, + "loss": 0.3487, + "step": 5605 + }, + { + "epoch": 0.26, + "learning_rate": 1.9278290721552525e-05, + "loss": 0.1986, + "step": 5610 + }, + { + "epoch": 0.26, + "learning_rate": 1.9277506936497735e-05, + "loss": 0.279, + "step": 5615 + }, + { + "epoch": 0.26, + "learning_rate": 1.927672315144295e-05, + "loss": 0.2964, + "step": 5620 + }, + { + "epoch": 0.26, + "learning_rate": 1.9275939366388163e-05, + "loss": 0.255, + "step": 5625 + }, + { + "epoch": 0.26, + "learning_rate": 1.9275155581333377e-05, + "loss": 0.3853, + "step": 5630 + }, + { + "epoch": 0.26, + "learning_rate": 1.927437179627859e-05, + "loss": 0.3548, + "step": 5635 + }, + { + "epoch": 0.26, + "learning_rate": 1.92735880112238e-05, + "loss": 0.3402, + "step": 5640 + }, + { + "epoch": 0.26, + "learning_rate": 1.927280422616902e-05, + "loss": 0.375, + "step": 5645 + }, + { + "epoch": 0.26, + "learning_rate": 1.927202044111423e-05, + "loss": 0.8663, + "step": 5650 + }, + { + "epoch": 0.26, + "learning_rate": 1.9271236656059443e-05, + "loss": 0.3404, + "step": 5655 + }, + { + "epoch": 0.26, + "learning_rate": 1.9270452871004657e-05, + "loss": 0.1193, + "step": 5660 + }, + { + "epoch": 0.26, + "learning_rate": 1.926966908594987e-05, + "loss": 0.2268, + "step": 5665 + }, + { + "epoch": 0.26, + "learning_rate": 1.9268885300895085e-05, + "loss": 0.3271, + "step": 5670 + }, + { + "epoch": 0.26, + "learning_rate": 1.92681015158403e-05, + "loss": 0.2239, + "step": 5675 + }, + { + "epoch": 0.27, + "learning_rate": 1.926731773078551e-05, + "loss": 0.363, + "step": 5680 + }, + { + "epoch": 0.27, + "learning_rate": 1.9266690702741683e-05, + "loss": 0.3885, + "step": 5685 + }, + { + "epoch": 0.27, + "learning_rate": 1.9265906917686894e-05, + "loss": 0.5457, + "step": 5690 + }, + { + "epoch": 0.27, + "learning_rate": 1.9265123132632108e-05, + "loss": 0.532, + "step": 5695 + }, + { + "epoch": 0.27, + "learning_rate": 1.926433934757732e-05, + "loss": 0.8407, + "step": 5700 + }, + { + "epoch": 0.27, + "learning_rate": 1.9263555562522536e-05, + "loss": 0.214, + "step": 5705 + }, + { + "epoch": 0.27, + "learning_rate": 1.926277177746775e-05, + "loss": 0.1542, + "step": 5710 + }, + { + "epoch": 0.27, + "learning_rate": 1.9261987992412963e-05, + "loss": 0.2797, + "step": 5715 + }, + { + "epoch": 0.27, + "learning_rate": 1.9261204207358174e-05, + "loss": 0.1692, + "step": 5720 + }, + { + "epoch": 0.27, + "learning_rate": 1.926042042230339e-05, + "loss": 0.3042, + "step": 5725 + }, + { + "epoch": 0.27, + "learning_rate": 1.92596366372486e-05, + "loss": 0.3473, + "step": 5730 + }, + { + "epoch": 0.27, + "learning_rate": 1.9258852852193816e-05, + "loss": 0.3281, + "step": 5735 + }, + { + "epoch": 0.27, + "learning_rate": 1.925806906713903e-05, + "loss": 0.3659, + "step": 5740 + }, + { + "epoch": 0.27, + "learning_rate": 1.9257285282084243e-05, + "loss": 0.4402, + "step": 5745 + }, + { + "epoch": 0.27, + "learning_rate": 1.9256501497029457e-05, + "loss": 0.7063, + "step": 5750 + }, + { + "epoch": 0.27, + "learning_rate": 1.9255717711974668e-05, + "loss": 0.3744, + "step": 5755 + }, + { + "epoch": 0.27, + "learning_rate": 1.9254933926919882e-05, + "loss": 0.1654, + "step": 5760 + }, + { + "epoch": 0.27, + "learning_rate": 1.9254150141865096e-05, + "loss": 0.1909, + "step": 5765 + }, + { + "epoch": 0.27, + "learning_rate": 1.925336635681031e-05, + "loss": 0.2026, + "step": 5770 + }, + { + "epoch": 0.27, + "learning_rate": 1.9252582571755523e-05, + "loss": 0.246, + "step": 5775 + }, + { + "epoch": 0.27, + "learning_rate": 1.9251798786700737e-05, + "loss": 0.2819, + "step": 5780 + }, + { + "epoch": 0.27, + "learning_rate": 1.925101500164595e-05, + "loss": 0.3336, + "step": 5785 + }, + { + "epoch": 0.27, + "learning_rate": 1.9250231216591165e-05, + "loss": 0.4281, + "step": 5790 + }, + { + "epoch": 0.27, + "learning_rate": 1.9249447431536376e-05, + "loss": 0.4699, + "step": 5795 + }, + { + "epoch": 0.27, + "learning_rate": 1.924866364648159e-05, + "loss": 0.7916, + "step": 5800 + }, + { + "epoch": 0.27, + "learning_rate": 1.9247879861426803e-05, + "loss": 0.3297, + "step": 5805 + }, + { + "epoch": 0.27, + "learning_rate": 1.9247096076372017e-05, + "loss": 0.1378, + "step": 5810 + }, + { + "epoch": 0.27, + "learning_rate": 1.924631229131723e-05, + "loss": 0.155, + "step": 5815 + }, + { + "epoch": 0.27, + "learning_rate": 1.9245528506262442e-05, + "loss": 0.2305, + "step": 5820 + }, + { + "epoch": 0.27, + "learning_rate": 1.924474472120766e-05, + "loss": 0.247, + "step": 5825 + }, + { + "epoch": 0.27, + "learning_rate": 1.924396093615287e-05, + "loss": 0.2662, + "step": 5830 + }, + { + "epoch": 0.27, + "learning_rate": 1.9243177151098084e-05, + "loss": 0.335, + "step": 5835 + }, + { + "epoch": 0.27, + "learning_rate": 1.9242393366043297e-05, + "loss": 0.3854, + "step": 5840 + }, + { + "epoch": 0.27, + "learning_rate": 1.924160958098851e-05, + "loss": 0.4929, + "step": 5845 + }, + { + "epoch": 0.27, + "learning_rate": 1.9240825795933725e-05, + "loss": 0.6641, + "step": 5850 + }, + { + "epoch": 0.27, + "learning_rate": 1.924004201087894e-05, + "loss": 0.3136, + "step": 5855 + }, + { + "epoch": 0.27, + "learning_rate": 1.923925822582415e-05, + "loss": 0.1783, + "step": 5860 + }, + { + "epoch": 0.27, + "learning_rate": 1.9238474440769367e-05, + "loss": 0.2408, + "step": 5865 + }, + { + "epoch": 0.27, + "learning_rate": 1.9237690655714577e-05, + "loss": 0.2645, + "step": 5870 + }, + { + "epoch": 0.27, + "learning_rate": 1.923690687065979e-05, + "loss": 0.2702, + "step": 5875 + }, + { + "epoch": 0.27, + "learning_rate": 1.9236123085605005e-05, + "loss": 0.2408, + "step": 5880 + }, + { + "epoch": 0.27, + "learning_rate": 1.923533930055022e-05, + "loss": 0.3547, + "step": 5885 + }, + { + "epoch": 0.27, + "learning_rate": 1.9234555515495433e-05, + "loss": 0.3429, + "step": 5890 + }, + { + "epoch": 0.28, + "learning_rate": 1.9233771730440644e-05, + "loss": 0.5108, + "step": 5895 + }, + { + "epoch": 0.28, + "learning_rate": 1.923298794538586e-05, + "loss": 0.9399, + "step": 5900 + }, + { + "epoch": 0.28, + "learning_rate": 1.923220416033107e-05, + "loss": 0.3939, + "step": 5905 + }, + { + "epoch": 0.28, + "learning_rate": 1.9231420375276285e-05, + "loss": 0.1552, + "step": 5910 + }, + { + "epoch": 0.28, + "learning_rate": 1.92306365902215e-05, + "loss": 0.2075, + "step": 5915 + }, + { + "epoch": 0.28, + "learning_rate": 1.9229852805166713e-05, + "loss": 0.2066, + "step": 5920 + }, + { + "epoch": 0.28, + "learning_rate": 1.9229069020111927e-05, + "loss": 0.3028, + "step": 5925 + }, + { + "epoch": 0.28, + "learning_rate": 1.922828523505714e-05, + "loss": 0.24, + "step": 5930 + }, + { + "epoch": 0.28, + "learning_rate": 1.922750145000235e-05, + "loss": 0.2975, + "step": 5935 + }, + { + "epoch": 0.28, + "learning_rate": 1.922671766494757e-05, + "loss": 0.3763, + "step": 5940 + }, + { + "epoch": 0.28, + "learning_rate": 1.922593387989278e-05, + "loss": 0.4842, + "step": 5945 + }, + { + "epoch": 0.28, + "learning_rate": 1.9225150094837993e-05, + "loss": 0.4962, + "step": 5950 + }, + { + "epoch": 0.28, + "learning_rate": 1.9224366309783207e-05, + "loss": 0.3506, + "step": 5955 + }, + { + "epoch": 0.28, + "learning_rate": 1.9223582524728418e-05, + "loss": 0.1437, + "step": 5960 + }, + { + "epoch": 0.28, + "learning_rate": 1.9222798739673635e-05, + "loss": 0.1921, + "step": 5965 + }, + { + "epoch": 0.28, + "learning_rate": 1.9222014954618845e-05, + "loss": 0.211, + "step": 5970 + }, + { + "epoch": 0.28, + "learning_rate": 1.922123116956406e-05, + "loss": 0.1829, + "step": 5975 + }, + { + "epoch": 0.28, + "learning_rate": 1.9220447384509273e-05, + "loss": 0.3135, + "step": 5980 + }, + { + "epoch": 0.28, + "learning_rate": 1.9219663599454487e-05, + "loss": 0.3635, + "step": 5985 + }, + { + "epoch": 0.28, + "learning_rate": 1.92188798143997e-05, + "loss": 0.3972, + "step": 5990 + }, + { + "epoch": 0.28, + "learning_rate": 1.9218096029344915e-05, + "loss": 0.4304, + "step": 5995 + }, + { + "epoch": 0.28, + "learning_rate": 1.921731224429013e-05, + "loss": 0.5761, + "step": 6000 + }, + { + "epoch": 0.28, + "learning_rate": 1.9216528459235343e-05, + "loss": 0.3202, + "step": 6005 + }, + { + "epoch": 0.28, + "learning_rate": 1.9215744674180553e-05, + "loss": 0.132, + "step": 6010 + }, + { + "epoch": 0.28, + "learning_rate": 1.9214960889125767e-05, + "loss": 0.248, + "step": 6015 + }, + { + "epoch": 0.28, + "learning_rate": 1.921417710407098e-05, + "loss": 0.2491, + "step": 6020 + }, + { + "epoch": 0.28, + "learning_rate": 1.9213393319016195e-05, + "loss": 0.1984, + "step": 6025 + }, + { + "epoch": 0.28, + "learning_rate": 1.921260953396141e-05, + "loss": 0.4062, + "step": 6030 + }, + { + "epoch": 0.28, + "learning_rate": 1.921182574890662e-05, + "loss": 0.381, + "step": 6035 + }, + { + "epoch": 0.28, + "learning_rate": 1.9211041963851837e-05, + "loss": 0.3894, + "step": 6040 + }, + { + "epoch": 0.28, + "learning_rate": 1.9210258178797047e-05, + "loss": 0.5295, + "step": 6045 + }, + { + "epoch": 0.28, + "learning_rate": 1.920947439374226e-05, + "loss": 0.5582, + "step": 6050 + }, + { + "epoch": 0.28, + "learning_rate": 1.9208690608687475e-05, + "loss": 0.3742, + "step": 6055 + }, + { + "epoch": 0.28, + "learning_rate": 1.920790682363269e-05, + "loss": 0.1369, + "step": 6060 + }, + { + "epoch": 0.28, + "learning_rate": 1.9207123038577903e-05, + "loss": 0.191, + "step": 6065 + }, + { + "epoch": 0.28, + "learning_rate": 1.9206339253523117e-05, + "loss": 0.2741, + "step": 6070 + }, + { + "epoch": 0.28, + "learning_rate": 1.9205555468468327e-05, + "loss": 0.2237, + "step": 6075 + }, + { + "epoch": 0.28, + "learning_rate": 1.920477168341354e-05, + "loss": 0.2747, + "step": 6080 + }, + { + "epoch": 0.28, + "learning_rate": 1.9203987898358755e-05, + "loss": 0.3203, + "step": 6085 + }, + { + "epoch": 0.28, + "learning_rate": 1.920320411330397e-05, + "loss": 0.3738, + "step": 6090 + }, + { + "epoch": 0.28, + "learning_rate": 1.9202420328249183e-05, + "loss": 0.4449, + "step": 6095 + }, + { + "epoch": 0.28, + "learning_rate": 1.9201636543194397e-05, + "loss": 0.5899, + "step": 6100 + }, + { + "epoch": 0.28, + "learning_rate": 1.920085275813961e-05, + "loss": 0.2703, + "step": 6105 + }, + { + "epoch": 0.29, + "learning_rate": 1.920006897308482e-05, + "loss": 0.2, + "step": 6110 + }, + { + "epoch": 0.29, + "learning_rate": 1.9199285188030035e-05, + "loss": 0.1694, + "step": 6115 + }, + { + "epoch": 0.29, + "learning_rate": 1.919850140297525e-05, + "loss": 0.211, + "step": 6120 + }, + { + "epoch": 0.29, + "learning_rate": 1.9197717617920463e-05, + "loss": 0.2194, + "step": 6125 + }, + { + "epoch": 0.29, + "learning_rate": 1.9196933832865677e-05, + "loss": 0.2725, + "step": 6130 + }, + { + "epoch": 0.29, + "learning_rate": 1.919615004781089e-05, + "loss": 0.3563, + "step": 6135 + }, + { + "epoch": 0.29, + "learning_rate": 1.9195366262756105e-05, + "loss": 0.3059, + "step": 6140 + }, + { + "epoch": 0.29, + "learning_rate": 1.9194582477701315e-05, + "loss": 0.4064, + "step": 6145 + }, + { + "epoch": 0.29, + "learning_rate": 1.919379869264653e-05, + "loss": 0.6607, + "step": 6150 + }, + { + "epoch": 0.29, + "learning_rate": 1.9193014907591743e-05, + "loss": 0.3117, + "step": 6155 + }, + { + "epoch": 0.29, + "learning_rate": 1.9192231122536957e-05, + "loss": 0.1045, + "step": 6160 + }, + { + "epoch": 0.29, + "learning_rate": 1.919144733748217e-05, + "loss": 0.1874, + "step": 6165 + }, + { + "epoch": 0.29, + "learning_rate": 1.9190663552427385e-05, + "loss": 0.3021, + "step": 6170 + }, + { + "epoch": 0.29, + "learning_rate": 1.9189879767372595e-05, + "loss": 0.211, + "step": 6175 + }, + { + "epoch": 0.29, + "learning_rate": 1.9189095982317813e-05, + "loss": 0.265, + "step": 6180 + }, + { + "epoch": 0.29, + "learning_rate": 1.9188312197263023e-05, + "loss": 0.2312, + "step": 6185 + }, + { + "epoch": 0.29, + "learning_rate": 1.9187528412208237e-05, + "loss": 0.3051, + "step": 6190 + }, + { + "epoch": 0.29, + "learning_rate": 1.918674462715345e-05, + "loss": 0.5762, + "step": 6195 + }, + { + "epoch": 0.29, + "learning_rate": 1.9185960842098665e-05, + "loss": 0.6396, + "step": 6200 + }, + { + "epoch": 0.29, + "learning_rate": 1.918517705704388e-05, + "loss": 0.3087, + "step": 6205 + }, + { + "epoch": 0.29, + "learning_rate": 1.918439327198909e-05, + "loss": 0.1117, + "step": 6210 + }, + { + "epoch": 0.29, + "learning_rate": 1.9183609486934306e-05, + "loss": 0.1206, + "step": 6215 + }, + { + "epoch": 0.29, + "learning_rate": 1.9182825701879517e-05, + "loss": 0.177, + "step": 6220 + }, + { + "epoch": 0.29, + "learning_rate": 1.918204191682473e-05, + "loss": 0.2447, + "step": 6225 + }, + { + "epoch": 0.29, + "learning_rate": 1.9181258131769945e-05, + "loss": 0.2349, + "step": 6230 + }, + { + "epoch": 0.29, + "learning_rate": 1.918047434671516e-05, + "loss": 0.2984, + "step": 6235 + }, + { + "epoch": 0.29, + "learning_rate": 1.9179690561660373e-05, + "loss": 0.2954, + "step": 6240 + }, + { + "epoch": 0.29, + "learning_rate": 1.9178906776605587e-05, + "loss": 0.3903, + "step": 6245 + }, + { + "epoch": 0.29, + "learning_rate": 1.9178122991550797e-05, + "loss": 0.6694, + "step": 6250 + }, + { + "epoch": 0.29, + "learning_rate": 1.9177339206496014e-05, + "loss": 0.2947, + "step": 6255 + }, + { + "epoch": 0.29, + "learning_rate": 1.9176555421441225e-05, + "loss": 0.1219, + "step": 6260 + }, + { + "epoch": 0.29, + "learning_rate": 1.917577163638644e-05, + "loss": 0.2013, + "step": 6265 + }, + { + "epoch": 0.29, + "learning_rate": 1.9174987851331653e-05, + "loss": 0.1924, + "step": 6270 + }, + { + "epoch": 0.29, + "learning_rate": 1.9174204066276863e-05, + "loss": 0.29, + "step": 6275 + }, + { + "epoch": 0.29, + "learning_rate": 1.917342028122208e-05, + "loss": 0.3052, + "step": 6280 + }, + { + "epoch": 0.29, + "learning_rate": 1.917263649616729e-05, + "loss": 0.3302, + "step": 6285 + }, + { + "epoch": 0.29, + "learning_rate": 1.9171852711112505e-05, + "loss": 0.4615, + "step": 6290 + }, + { + "epoch": 0.29, + "learning_rate": 1.917106892605772e-05, + "loss": 0.4105, + "step": 6295 + }, + { + "epoch": 0.29, + "learning_rate": 1.9170285141002933e-05, + "loss": 0.6715, + "step": 6300 + }, + { + "epoch": 0.29, + "learning_rate": 1.9169501355948147e-05, + "loss": 0.3003, + "step": 6305 + }, + { + "epoch": 0.29, + "learning_rate": 1.916871757089336e-05, + "loss": 0.2266, + "step": 6310 + }, + { + "epoch": 0.29, + "learning_rate": 1.9167933785838574e-05, + "loss": 0.1107, + "step": 6315 + }, + { + "epoch": 0.29, + "learning_rate": 1.916715000078379e-05, + "loss": 0.2179, + "step": 6320 + }, + { + "epoch": 0.3, + "learning_rate": 1.9166366215729e-05, + "loss": 0.1912, + "step": 6325 + }, + { + "epoch": 0.3, + "learning_rate": 1.9165582430674213e-05, + "loss": 0.241, + "step": 6330 + }, + { + "epoch": 0.3, + "learning_rate": 1.9164798645619427e-05, + "loss": 0.336, + "step": 6335 + }, + { + "epoch": 0.3, + "learning_rate": 1.916401486056464e-05, + "loss": 0.3382, + "step": 6340 + }, + { + "epoch": 0.3, + "learning_rate": 1.9163231075509854e-05, + "loss": 0.5897, + "step": 6345 + }, + { + "epoch": 0.3, + "learning_rate": 1.9162447290455065e-05, + "loss": 0.7494, + "step": 6350 + }, + { + "epoch": 0.3, + "learning_rate": 1.9161663505400282e-05, + "loss": 0.3499, + "step": 6355 + }, + { + "epoch": 0.3, + "learning_rate": 1.9160879720345493e-05, + "loss": 0.1544, + "step": 6360 + }, + { + "epoch": 0.3, + "learning_rate": 1.9160095935290707e-05, + "loss": 0.1487, + "step": 6365 + }, + { + "epoch": 0.3, + "learning_rate": 1.915931215023592e-05, + "loss": 0.2265, + "step": 6370 + }, + { + "epoch": 0.3, + "learning_rate": 1.9158528365181135e-05, + "loss": 0.2467, + "step": 6375 + }, + { + "epoch": 0.3, + "learning_rate": 1.915774458012635e-05, + "loss": 0.2705, + "step": 6380 + }, + { + "epoch": 0.3, + "learning_rate": 1.9156960795071562e-05, + "loss": 0.4046, + "step": 6385 + }, + { + "epoch": 0.3, + "learning_rate": 1.9156177010016773e-05, + "loss": 0.4616, + "step": 6390 + }, + { + "epoch": 0.3, + "learning_rate": 1.915539322496199e-05, + "loss": 0.5142, + "step": 6395 + }, + { + "epoch": 0.3, + "learning_rate": 1.91546094399072e-05, + "loss": 0.6709, + "step": 6400 + }, + { + "epoch": 0.3, + "learning_rate": 1.9153825654852415e-05, + "loss": 0.3144, + "step": 6405 + }, + { + "epoch": 0.3, + "learning_rate": 1.915304186979763e-05, + "loss": 0.1743, + "step": 6410 + }, + { + "epoch": 0.3, + "learning_rate": 1.9152258084742842e-05, + "loss": 0.2046, + "step": 6415 + }, + { + "epoch": 0.3, + "learning_rate": 1.9151474299688056e-05, + "loss": 0.1748, + "step": 6420 + }, + { + "epoch": 0.3, + "learning_rate": 1.9150690514633267e-05, + "loss": 0.347, + "step": 6425 + }, + { + "epoch": 0.3, + "learning_rate": 1.9149906729578484e-05, + "loss": 0.4057, + "step": 6430 + }, + { + "epoch": 0.3, + "learning_rate": 1.9149122944523695e-05, + "loss": 0.3375, + "step": 6435 + }, + { + "epoch": 0.3, + "learning_rate": 1.914833915946891e-05, + "loss": 0.3648, + "step": 6440 + }, + { + "epoch": 0.3, + "learning_rate": 1.9147555374414122e-05, + "loss": 0.3829, + "step": 6445 + }, + { + "epoch": 0.3, + "learning_rate": 1.9146771589359336e-05, + "loss": 0.5695, + "step": 6450 + }, + { + "epoch": 0.3, + "learning_rate": 1.914598780430455e-05, + "loss": 0.238, + "step": 6455 + }, + { + "epoch": 0.3, + "learning_rate": 1.9145204019249764e-05, + "loss": 0.1316, + "step": 6460 + }, + { + "epoch": 0.3, + "learning_rate": 1.9144420234194975e-05, + "loss": 0.2054, + "step": 6465 + }, + { + "epoch": 0.3, + "learning_rate": 1.914363644914019e-05, + "loss": 0.2046, + "step": 6470 + }, + { + "epoch": 0.3, + "learning_rate": 1.9142852664085402e-05, + "loss": 0.2385, + "step": 6475 + }, + { + "epoch": 0.3, + "learning_rate": 1.9142068879030616e-05, + "loss": 0.2931, + "step": 6480 + }, + { + "epoch": 0.3, + "learning_rate": 1.914128509397583e-05, + "loss": 0.3218, + "step": 6485 + }, + { + "epoch": 0.3, + "learning_rate": 1.914050130892104e-05, + "loss": 0.3059, + "step": 6490 + }, + { + "epoch": 0.3, + "learning_rate": 1.9139717523866258e-05, + "loss": 0.4008, + "step": 6495 + }, + { + "epoch": 0.3, + "learning_rate": 1.913893373881147e-05, + "loss": 0.6874, + "step": 6500 + }, + { + "epoch": 0.3, + "learning_rate": 1.9138149953756683e-05, + "loss": 0.3523, + "step": 6505 + }, + { + "epoch": 0.3, + "learning_rate": 1.9137366168701896e-05, + "loss": 0.1623, + "step": 6510 + }, + { + "epoch": 0.3, + "learning_rate": 1.913658238364711e-05, + "loss": 0.1548, + "step": 6515 + }, + { + "epoch": 0.3, + "learning_rate": 1.9135798598592324e-05, + "loss": 0.2571, + "step": 6520 + }, + { + "epoch": 0.3, + "learning_rate": 1.9135014813537538e-05, + "loss": 0.2677, + "step": 6525 + }, + { + "epoch": 0.3, + "learning_rate": 1.9134231028482752e-05, + "loss": 0.2672, + "step": 6530 + }, + { + "epoch": 0.3, + "learning_rate": 1.9133447243427963e-05, + "loss": 0.1512, + "step": 6535 + }, + { + "epoch": 0.31, + "learning_rate": 1.9132663458373176e-05, + "loss": 0.434, + "step": 6540 + }, + { + "epoch": 0.31, + "learning_rate": 1.913187967331839e-05, + "loss": 0.4751, + "step": 6545 + }, + { + "epoch": 0.31, + "learning_rate": 1.9131095888263604e-05, + "loss": 0.9035, + "step": 6550 + }, + { + "epoch": 0.31, + "learning_rate": 1.9130312103208818e-05, + "loss": 0.2479, + "step": 6555 + }, + { + "epoch": 0.31, + "learning_rate": 1.9129528318154032e-05, + "loss": 0.1588, + "step": 6560 + }, + { + "epoch": 0.31, + "learning_rate": 1.9128744533099243e-05, + "loss": 0.1603, + "step": 6565 + }, + { + "epoch": 0.31, + "learning_rate": 1.912796074804446e-05, + "loss": 0.2272, + "step": 6570 + }, + { + "epoch": 0.31, + "learning_rate": 1.912717696298967e-05, + "loss": 0.2211, + "step": 6575 + }, + { + "epoch": 0.31, + "learning_rate": 1.9126393177934884e-05, + "loss": 0.2354, + "step": 6580 + }, + { + "epoch": 0.31, + "learning_rate": 1.9125609392880098e-05, + "loss": 0.3462, + "step": 6585 + }, + { + "epoch": 0.31, + "learning_rate": 1.9124825607825312e-05, + "loss": 0.3478, + "step": 6590 + }, + { + "epoch": 0.31, + "learning_rate": 1.9124041822770526e-05, + "loss": 0.434, + "step": 6595 + }, + { + "epoch": 0.31, + "learning_rate": 1.9123258037715737e-05, + "loss": 0.6833, + "step": 6600 + }, + { + "epoch": 0.31, + "learning_rate": 1.912247425266095e-05, + "loss": 0.3255, + "step": 6605 + }, + { + "epoch": 0.31, + "learning_rate": 1.9121690467606164e-05, + "loss": 0.2105, + "step": 6610 + }, + { + "epoch": 0.31, + "learning_rate": 1.9120906682551378e-05, + "loss": 0.248, + "step": 6615 + }, + { + "epoch": 0.31, + "learning_rate": 1.9120122897496592e-05, + "loss": 0.2495, + "step": 6620 + }, + { + "epoch": 0.31, + "learning_rate": 1.9119339112441806e-05, + "loss": 0.2071, + "step": 6625 + }, + { + "epoch": 0.31, + "learning_rate": 1.911855532738702e-05, + "loss": 0.204, + "step": 6630 + }, + { + "epoch": 0.31, + "learning_rate": 1.9117771542332234e-05, + "loss": 0.4248, + "step": 6635 + }, + { + "epoch": 0.31, + "learning_rate": 1.9116987757277444e-05, + "loss": 0.3261, + "step": 6640 + }, + { + "epoch": 0.31, + "learning_rate": 1.911620397222266e-05, + "loss": 0.425, + "step": 6645 + }, + { + "epoch": 0.31, + "learning_rate": 1.9115420187167872e-05, + "loss": 0.658, + "step": 6650 + }, + { + "epoch": 0.31, + "learning_rate": 1.9114636402113086e-05, + "loss": 0.2923, + "step": 6655 + }, + { + "epoch": 0.31, + "learning_rate": 1.91138526170583e-05, + "loss": 0.1128, + "step": 6660 + }, + { + "epoch": 0.31, + "learning_rate": 1.911306883200351e-05, + "loss": 0.1456, + "step": 6665 + }, + { + "epoch": 0.31, + "learning_rate": 1.9112285046948728e-05, + "loss": 0.2254, + "step": 6670 + }, + { + "epoch": 0.31, + "learning_rate": 1.911150126189394e-05, + "loss": 0.2541, + "step": 6675 + }, + { + "epoch": 0.31, + "learning_rate": 1.9110717476839152e-05, + "loss": 0.3436, + "step": 6680 + }, + { + "epoch": 0.31, + "learning_rate": 1.9109933691784366e-05, + "loss": 0.3193, + "step": 6685 + }, + { + "epoch": 0.31, + "learning_rate": 1.910914990672958e-05, + "loss": 0.309, + "step": 6690 + }, + { + "epoch": 0.31, + "learning_rate": 1.9108366121674794e-05, + "loss": 0.504, + "step": 6695 + }, + { + "epoch": 0.31, + "learning_rate": 1.9107582336620008e-05, + "loss": 0.6894, + "step": 6700 + }, + { + "epoch": 0.31, + "learning_rate": 1.910679855156522e-05, + "loss": 0.3584, + "step": 6705 + }, + { + "epoch": 0.31, + "learning_rate": 1.9106014766510436e-05, + "loss": 0.1103, + "step": 6710 + }, + { + "epoch": 0.31, + "learning_rate": 1.9105230981455646e-05, + "loss": 0.2487, + "step": 6715 + }, + { + "epoch": 0.31, + "learning_rate": 1.910444719640086e-05, + "loss": 0.1941, + "step": 6720 + }, + { + "epoch": 0.31, + "learning_rate": 1.9103663411346074e-05, + "loss": 0.2426, + "step": 6725 + }, + { + "epoch": 0.31, + "learning_rate": 1.9102879626291288e-05, + "loss": 0.2712, + "step": 6730 + }, + { + "epoch": 0.31, + "learning_rate": 1.9102095841236502e-05, + "loss": 0.3572, + "step": 6735 + }, + { + "epoch": 0.31, + "learning_rate": 1.9101312056181712e-05, + "loss": 0.4285, + "step": 6740 + }, + { + "epoch": 0.31, + "learning_rate": 1.910052827112693e-05, + "loss": 0.5303, + "step": 6745 + }, + { + "epoch": 0.31, + "learning_rate": 1.909974448607214e-05, + "loss": 0.4896, + "step": 6750 + }, + { + "epoch": 0.32, + "learning_rate": 1.9098960701017354e-05, + "loss": 0.2817, + "step": 6755 + }, + { + "epoch": 0.32, + "learning_rate": 1.9098176915962568e-05, + "loss": 0.1629, + "step": 6760 + }, + { + "epoch": 0.32, + "learning_rate": 1.9097393130907782e-05, + "loss": 0.2024, + "step": 6765 + }, + { + "epoch": 0.32, + "learning_rate": 1.9096609345852996e-05, + "loss": 0.2163, + "step": 6770 + }, + { + "epoch": 0.32, + "learning_rate": 1.909582556079821e-05, + "loss": 0.2022, + "step": 6775 + }, + { + "epoch": 0.32, + "learning_rate": 1.909504177574342e-05, + "loss": 0.3167, + "step": 6780 + }, + { + "epoch": 0.32, + "learning_rate": 1.9094257990688638e-05, + "loss": 0.4345, + "step": 6785 + }, + { + "epoch": 0.32, + "learning_rate": 1.9093474205633848e-05, + "loss": 0.3893, + "step": 6790 + }, + { + "epoch": 0.32, + "learning_rate": 1.9092690420579062e-05, + "loss": 0.3411, + "step": 6795 + }, + { + "epoch": 0.32, + "learning_rate": 1.9091906635524276e-05, + "loss": 0.6765, + "step": 6800 + }, + { + "epoch": 0.32, + "learning_rate": 1.9091122850469486e-05, + "loss": 0.3742, + "step": 6805 + }, + { + "epoch": 0.32, + "learning_rate": 1.9090339065414704e-05, + "loss": 0.1481, + "step": 6810 + }, + { + "epoch": 0.32, + "learning_rate": 1.9089555280359914e-05, + "loss": 0.1375, + "step": 6815 + }, + { + "epoch": 0.32, + "learning_rate": 1.9088771495305128e-05, + "loss": 0.2551, + "step": 6820 + }, + { + "epoch": 0.32, + "learning_rate": 1.9087987710250342e-05, + "loss": 0.2335, + "step": 6825 + }, + { + "epoch": 0.32, + "learning_rate": 1.9087203925195556e-05, + "loss": 0.2022, + "step": 6830 + }, + { + "epoch": 0.32, + "learning_rate": 1.908642014014077e-05, + "loss": 0.272, + "step": 6835 + }, + { + "epoch": 0.32, + "learning_rate": 1.9085636355085984e-05, + "loss": 0.3548, + "step": 6840 + }, + { + "epoch": 0.32, + "learning_rate": 1.9084852570031198e-05, + "loss": 0.4034, + "step": 6845 + }, + { + "epoch": 0.32, + "learning_rate": 1.908406878497641e-05, + "loss": 0.7186, + "step": 6850 + }, + { + "epoch": 0.32, + "learning_rate": 1.9083284999921622e-05, + "loss": 0.3299, + "step": 6855 + }, + { + "epoch": 0.32, + "learning_rate": 1.9082501214866836e-05, + "loss": 0.1502, + "step": 6860 + }, + { + "epoch": 0.32, + "learning_rate": 1.908171742981205e-05, + "loss": 0.1961, + "step": 6865 + }, + { + "epoch": 0.32, + "learning_rate": 1.9080933644757264e-05, + "loss": 0.1746, + "step": 6870 + }, + { + "epoch": 0.32, + "learning_rate": 1.9080149859702478e-05, + "loss": 0.2177, + "step": 6875 + }, + { + "epoch": 0.32, + "learning_rate": 1.9079366074647688e-05, + "loss": 0.2479, + "step": 6880 + }, + { + "epoch": 0.32, + "learning_rate": 1.9078582289592905e-05, + "loss": 0.3497, + "step": 6885 + }, + { + "epoch": 0.32, + "learning_rate": 1.9077798504538116e-05, + "loss": 0.2857, + "step": 6890 + }, + { + "epoch": 0.32, + "learning_rate": 1.907701471948333e-05, + "loss": 0.4186, + "step": 6895 + }, + { + "epoch": 0.32, + "learning_rate": 1.9076230934428544e-05, + "loss": 0.5697, + "step": 6900 + }, + { + "epoch": 0.32, + "learning_rate": 1.9075447149373758e-05, + "loss": 0.2705, + "step": 6905 + }, + { + "epoch": 0.32, + "learning_rate": 1.907466336431897e-05, + "loss": 0.1694, + "step": 6910 + }, + { + "epoch": 0.32, + "learning_rate": 1.9073879579264186e-05, + "loss": 0.2024, + "step": 6915 + }, + { + "epoch": 0.32, + "learning_rate": 1.9073095794209396e-05, + "loss": 0.1911, + "step": 6920 + }, + { + "epoch": 0.32, + "learning_rate": 1.907231200915461e-05, + "loss": 0.324, + "step": 6925 + }, + { + "epoch": 0.32, + "learning_rate": 1.9071528224099824e-05, + "loss": 0.2166, + "step": 6930 + }, + { + "epoch": 0.32, + "learning_rate": 1.9070744439045038e-05, + "loss": 0.2728, + "step": 6935 + }, + { + "epoch": 0.32, + "learning_rate": 1.906996065399025e-05, + "loss": 0.4889, + "step": 6940 + }, + { + "epoch": 0.32, + "learning_rate": 1.9069176868935466e-05, + "loss": 0.3559, + "step": 6945 + }, + { + "epoch": 0.32, + "learning_rate": 1.906839308388068e-05, + "loss": 0.6589, + "step": 6950 + }, + { + "epoch": 0.32, + "learning_rate": 1.906760929882589e-05, + "loss": 0.3064, + "step": 6955 + }, + { + "epoch": 0.32, + "learning_rate": 1.9066825513771104e-05, + "loss": 0.1575, + "step": 6960 + }, + { + "epoch": 0.32, + "learning_rate": 1.9066041728716318e-05, + "loss": 0.2603, + "step": 6965 + }, + { + "epoch": 0.33, + "learning_rate": 1.9065257943661532e-05, + "loss": 0.2049, + "step": 6970 + }, + { + "epoch": 0.33, + "learning_rate": 1.9064474158606746e-05, + "loss": 0.1928, + "step": 6975 + }, + { + "epoch": 0.33, + "learning_rate": 1.906369037355196e-05, + "loss": 0.2743, + "step": 6980 + }, + { + "epoch": 0.33, + "learning_rate": 1.9062906588497173e-05, + "loss": 0.3078, + "step": 6985 + }, + { + "epoch": 0.33, + "learning_rate": 1.9062122803442384e-05, + "loss": 0.3558, + "step": 6990 + }, + { + "epoch": 0.33, + "learning_rate": 1.9061339018387598e-05, + "loss": 0.5479, + "step": 6995 + }, + { + "epoch": 0.33, + "learning_rate": 1.9060555233332812e-05, + "loss": 0.7941, + "step": 7000 + }, + { + "epoch": 0.33, + "learning_rate": 1.9059771448278026e-05, + "loss": 0.2665, + "step": 7005 + }, + { + "epoch": 0.33, + "learning_rate": 1.905898766322324e-05, + "loss": 0.1489, + "step": 7010 + }, + { + "epoch": 0.33, + "learning_rate": 1.9058203878168453e-05, + "loss": 0.1762, + "step": 7015 + }, + { + "epoch": 0.33, + "learning_rate": 1.9057420093113664e-05, + "loss": 0.2242, + "step": 7020 + }, + { + "epoch": 0.33, + "learning_rate": 1.905663630805888e-05, + "loss": 0.2587, + "step": 7025 + }, + { + "epoch": 0.33, + "learning_rate": 1.9055852523004092e-05, + "loss": 0.2101, + "step": 7030 + }, + { + "epoch": 0.33, + "learning_rate": 1.9055068737949306e-05, + "loss": 0.2533, + "step": 7035 + }, + { + "epoch": 0.33, + "learning_rate": 1.905428495289452e-05, + "loss": 0.4377, + "step": 7040 + }, + { + "epoch": 0.33, + "learning_rate": 1.9053501167839734e-05, + "loss": 0.441, + "step": 7045 + }, + { + "epoch": 0.33, + "learning_rate": 1.9052717382784947e-05, + "loss": 0.7094, + "step": 7050 + }, + { + "epoch": 0.33, + "learning_rate": 1.9051933597730158e-05, + "loss": 0.1968, + "step": 7055 + }, + { + "epoch": 0.33, + "learning_rate": 1.9051149812675375e-05, + "loss": 0.1635, + "step": 7060 + }, + { + "epoch": 0.33, + "learning_rate": 1.9050366027620586e-05, + "loss": 0.1534, + "step": 7065 + }, + { + "epoch": 0.33, + "learning_rate": 1.90495822425658e-05, + "loss": 0.1294, + "step": 7070 + }, + { + "epoch": 0.33, + "learning_rate": 1.9048798457511014e-05, + "loss": 0.1829, + "step": 7075 + }, + { + "epoch": 0.33, + "learning_rate": 1.9048014672456227e-05, + "loss": 0.172, + "step": 7080 + }, + { + "epoch": 0.33, + "learning_rate": 1.904723088740144e-05, + "loss": 0.2696, + "step": 7085 + }, + { + "epoch": 0.33, + "learning_rate": 1.9046447102346655e-05, + "loss": 0.3744, + "step": 7090 + }, + { + "epoch": 0.33, + "learning_rate": 1.9045663317291866e-05, + "loss": 0.4525, + "step": 7095 + }, + { + "epoch": 0.33, + "learning_rate": 1.9044879532237083e-05, + "loss": 0.5999, + "step": 7100 + }, + { + "epoch": 0.33, + "learning_rate": 1.9044095747182294e-05, + "loss": 0.2546, + "step": 7105 + }, + { + "epoch": 0.33, + "learning_rate": 1.9043311962127508e-05, + "loss": 0.1782, + "step": 7110 + }, + { + "epoch": 0.33, + "learning_rate": 1.904252817707272e-05, + "loss": 0.217, + "step": 7115 + }, + { + "epoch": 0.33, + "learning_rate": 1.9041744392017932e-05, + "loss": 0.1918, + "step": 7120 + }, + { + "epoch": 0.33, + "learning_rate": 1.904096060696315e-05, + "loss": 0.2431, + "step": 7125 + }, + { + "epoch": 0.33, + "learning_rate": 1.904017682190836e-05, + "loss": 0.3174, + "step": 7130 + }, + { + "epoch": 0.33, + "learning_rate": 1.9039393036853574e-05, + "loss": 0.3594, + "step": 7135 + }, + { + "epoch": 0.33, + "learning_rate": 1.9038609251798788e-05, + "loss": 0.3965, + "step": 7140 + }, + { + "epoch": 0.33, + "learning_rate": 1.9037825466744e-05, + "loss": 0.2913, + "step": 7145 + }, + { + "epoch": 0.33, + "learning_rate": 1.9037041681689215e-05, + "loss": 0.7954, + "step": 7150 + }, + { + "epoch": 0.33, + "learning_rate": 1.903625789663443e-05, + "loss": 0.3234, + "step": 7155 + }, + { + "epoch": 0.33, + "learning_rate": 1.9035474111579643e-05, + "loss": 0.1657, + "step": 7160 + }, + { + "epoch": 0.33, + "learning_rate": 1.9034690326524857e-05, + "loss": 0.0944, + "step": 7165 + }, + { + "epoch": 0.33, + "learning_rate": 1.9033906541470068e-05, + "loss": 0.2358, + "step": 7170 + }, + { + "epoch": 0.33, + "learning_rate": 1.903312275641528e-05, + "loss": 0.2233, + "step": 7175 + }, + { + "epoch": 0.34, + "learning_rate": 1.9032338971360495e-05, + "loss": 0.2742, + "step": 7180 + }, + { + "epoch": 0.34, + "learning_rate": 1.903155518630571e-05, + "loss": 0.3522, + "step": 7185 + }, + { + "epoch": 0.34, + "learning_rate": 1.9030771401250923e-05, + "loss": 0.2871, + "step": 7190 + }, + { + "epoch": 0.34, + "learning_rate": 1.9029987616196134e-05, + "loss": 0.5827, + "step": 7195 + }, + { + "epoch": 0.34, + "learning_rate": 1.902920383114135e-05, + "loss": 0.9008, + "step": 7200 + }, + { + "epoch": 0.34, + "learning_rate": 1.902842004608656e-05, + "loss": 0.3112, + "step": 7205 + }, + { + "epoch": 0.34, + "learning_rate": 1.9027636261031775e-05, + "loss": 0.1972, + "step": 7210 + }, + { + "epoch": 0.34, + "learning_rate": 1.902685247597699e-05, + "loss": 0.2397, + "step": 7215 + }, + { + "epoch": 0.34, + "learning_rate": 1.9026068690922203e-05, + "loss": 0.1644, + "step": 7220 + }, + { + "epoch": 0.34, + "learning_rate": 1.9025284905867417e-05, + "loss": 0.329, + "step": 7225 + }, + { + "epoch": 0.34, + "learning_rate": 1.902450112081263e-05, + "loss": 0.2891, + "step": 7230 + }, + { + "epoch": 0.34, + "learning_rate": 1.902371733575784e-05, + "loss": 0.3564, + "step": 7235 + }, + { + "epoch": 0.34, + "learning_rate": 1.902293355070306e-05, + "loss": 0.3063, + "step": 7240 + }, + { + "epoch": 0.34, + "learning_rate": 1.902214976564827e-05, + "loss": 0.481, + "step": 7245 + }, + { + "epoch": 0.34, + "learning_rate": 1.9021365980593483e-05, + "loss": 0.7006, + "step": 7250 + }, + { + "epoch": 0.34, + "learning_rate": 1.9020582195538697e-05, + "loss": 0.3454, + "step": 7255 + }, + { + "epoch": 0.34, + "learning_rate": 1.901979841048391e-05, + "loss": 0.0799, + "step": 7260 + }, + { + "epoch": 0.34, + "learning_rate": 1.9019014625429125e-05, + "loss": 0.1361, + "step": 7265 + }, + { + "epoch": 0.34, + "learning_rate": 1.9018230840374336e-05, + "loss": 0.2282, + "step": 7270 + }, + { + "epoch": 0.34, + "learning_rate": 1.901744705531955e-05, + "loss": 0.2861, + "step": 7275 + }, + { + "epoch": 0.34, + "learning_rate": 1.9016663270264763e-05, + "loss": 0.3123, + "step": 7280 + }, + { + "epoch": 0.34, + "learning_rate": 1.9015879485209977e-05, + "loss": 0.2939, + "step": 7285 + }, + { + "epoch": 0.34, + "learning_rate": 1.901509570015519e-05, + "loss": 0.368, + "step": 7290 + }, + { + "epoch": 0.34, + "learning_rate": 1.9014311915100405e-05, + "loss": 0.2656, + "step": 7295 + }, + { + "epoch": 0.34, + "learning_rate": 1.901352813004562e-05, + "loss": 0.7157, + "step": 7300 + }, + { + "epoch": 0.34, + "learning_rate": 1.9012744344990833e-05, + "loss": 0.297, + "step": 7305 + }, + { + "epoch": 0.34, + "learning_rate": 1.9011960559936043e-05, + "loss": 0.1168, + "step": 7310 + }, + { + "epoch": 0.34, + "learning_rate": 1.9011176774881257e-05, + "loss": 0.1664, + "step": 7315 + }, + { + "epoch": 0.34, + "learning_rate": 1.901039298982647e-05, + "loss": 0.1445, + "step": 7320 + }, + { + "epoch": 0.34, + "learning_rate": 1.9009609204771685e-05, + "loss": 0.2953, + "step": 7325 + }, + { + "epoch": 0.34, + "learning_rate": 1.90088254197169e-05, + "loss": 0.1705, + "step": 7330 + }, + { + "epoch": 0.34, + "learning_rate": 1.900804163466211e-05, + "loss": 0.3413, + "step": 7335 + }, + { + "epoch": 0.34, + "learning_rate": 1.9007257849607327e-05, + "loss": 0.3309, + "step": 7340 + }, + { + "epoch": 0.34, + "learning_rate": 1.9006474064552537e-05, + "loss": 0.4903, + "step": 7345 + }, + { + "epoch": 0.34, + "learning_rate": 1.900569027949775e-05, + "loss": 0.6247, + "step": 7350 + }, + { + "epoch": 0.34, + "learning_rate": 1.9004906494442965e-05, + "loss": 0.299, + "step": 7355 + }, + { + "epoch": 0.34, + "learning_rate": 1.900412270938818e-05, + "loss": 0.1186, + "step": 7360 + }, + { + "epoch": 0.34, + "learning_rate": 1.9003338924333393e-05, + "loss": 0.151, + "step": 7365 + }, + { + "epoch": 0.34, + "learning_rate": 1.9002555139278607e-05, + "loss": 0.1634, + "step": 7370 + }, + { + "epoch": 0.34, + "learning_rate": 1.900177135422382e-05, + "loss": 0.2192, + "step": 7375 + }, + { + "epoch": 0.34, + "learning_rate": 1.900098756916903e-05, + "loss": 0.3089, + "step": 7380 + }, + { + "epoch": 0.34, + "learning_rate": 1.9000203784114245e-05, + "loss": 0.2922, + "step": 7385 + }, + { + "epoch": 0.34, + "learning_rate": 1.899941999905946e-05, + "loss": 0.2575, + "step": 7390 + }, + { + "epoch": 0.35, + "learning_rate": 1.8998636214004673e-05, + "loss": 0.5155, + "step": 7395 + }, + { + "epoch": 0.35, + "learning_rate": 1.8997852428949887e-05, + "loss": 0.8029, + "step": 7400 + }, + { + "epoch": 0.35, + "learning_rate": 1.89970686438951e-05, + "loss": 0.3019, + "step": 7405 + }, + { + "epoch": 0.35, + "learning_rate": 1.899628485884031e-05, + "loss": 0.1694, + "step": 7410 + }, + { + "epoch": 0.35, + "learning_rate": 1.899550107378553e-05, + "loss": 0.1644, + "step": 7415 + }, + { + "epoch": 0.35, + "learning_rate": 1.899471728873074e-05, + "loss": 0.2562, + "step": 7420 + }, + { + "epoch": 0.35, + "learning_rate": 1.8993933503675953e-05, + "loss": 0.2024, + "step": 7425 + }, + { + "epoch": 0.35, + "learning_rate": 1.8993149718621167e-05, + "loss": 0.2765, + "step": 7430 + }, + { + "epoch": 0.35, + "learning_rate": 1.899236593356638e-05, + "loss": 0.3152, + "step": 7435 + }, + { + "epoch": 0.35, + "learning_rate": 1.8991582148511595e-05, + "loss": 0.7121, + "step": 7440 + }, + { + "epoch": 0.35, + "learning_rate": 1.8990798363456805e-05, + "loss": 0.4815, + "step": 7445 + }, + { + "epoch": 0.35, + "learning_rate": 1.899001457840202e-05, + "loss": 0.596, + "step": 7450 + }, + { + "epoch": 0.35, + "learning_rate": 1.8989230793347233e-05, + "loss": 0.2899, + "step": 7455 + }, + { + "epoch": 0.35, + "learning_rate": 1.8988447008292447e-05, + "loss": 0.1684, + "step": 7460 + }, + { + "epoch": 0.35, + "learning_rate": 1.898766322323766e-05, + "loss": 0.1769, + "step": 7465 + }, + { + "epoch": 0.35, + "learning_rate": 1.8986879438182875e-05, + "loss": 0.1686, + "step": 7470 + }, + { + "epoch": 0.35, + "learning_rate": 1.898609565312809e-05, + "loss": 0.1521, + "step": 7475 + }, + { + "epoch": 0.35, + "learning_rate": 1.8985311868073303e-05, + "loss": 0.213, + "step": 7480 + }, + { + "epoch": 0.35, + "learning_rate": 1.8984528083018513e-05, + "loss": 0.3328, + "step": 7485 + }, + { + "epoch": 0.35, + "learning_rate": 1.8983744297963727e-05, + "loss": 0.3086, + "step": 7490 + }, + { + "epoch": 0.35, + "learning_rate": 1.898296051290894e-05, + "loss": 0.3979, + "step": 7495 + }, + { + "epoch": 0.35, + "learning_rate": 1.8982176727854155e-05, + "loss": 0.9146, + "step": 7500 + }, + { + "epoch": 0.35, + "learning_rate": 1.898139294279937e-05, + "loss": 0.2771, + "step": 7505 + }, + { + "epoch": 0.35, + "learning_rate": 1.898060915774458e-05, + "loss": 0.1257, + "step": 7510 + }, + { + "epoch": 0.35, + "learning_rate": 1.8979825372689797e-05, + "loss": 0.2241, + "step": 7515 + }, + { + "epoch": 0.35, + "learning_rate": 1.8979041587635007e-05, + "loss": 0.2393, + "step": 7520 + }, + { + "epoch": 0.35, + "learning_rate": 1.897825780258022e-05, + "loss": 0.172, + "step": 7525 + }, + { + "epoch": 0.35, + "learning_rate": 1.8977474017525435e-05, + "loss": 0.2781, + "step": 7530 + }, + { + "epoch": 0.35, + "learning_rate": 1.897669023247065e-05, + "loss": 0.3206, + "step": 7535 + }, + { + "epoch": 0.35, + "learning_rate": 1.8975906447415863e-05, + "loss": 0.3699, + "step": 7540 + }, + { + "epoch": 0.35, + "learning_rate": 1.8975122662361077e-05, + "loss": 0.3469, + "step": 7545 + }, + { + "epoch": 0.35, + "learning_rate": 1.8974338877306287e-05, + "loss": 0.8029, + "step": 7550 + }, + { + "epoch": 0.35, + "learning_rate": 1.8973555092251504e-05, + "loss": 0.312, + "step": 7555 + }, + { + "epoch": 0.35, + "learning_rate": 1.8972771307196715e-05, + "loss": 0.1367, + "step": 7560 + }, + { + "epoch": 0.35, + "learning_rate": 1.897198752214193e-05, + "loss": 0.0884, + "step": 7565 + }, + { + "epoch": 0.35, + "learning_rate": 1.8971203737087143e-05, + "loss": 0.2232, + "step": 7570 + }, + { + "epoch": 0.35, + "learning_rate": 1.8970419952032357e-05, + "loss": 0.1861, + "step": 7575 + }, + { + "epoch": 0.35, + "learning_rate": 1.896963616697757e-05, + "loss": 0.2897, + "step": 7580 + }, + { + "epoch": 0.35, + "learning_rate": 1.896885238192278e-05, + "loss": 0.3206, + "step": 7585 + }, + { + "epoch": 0.35, + "learning_rate": 1.8968068596868e-05, + "loss": 0.3773, + "step": 7590 + }, + { + "epoch": 0.35, + "learning_rate": 1.896728481181321e-05, + "loss": 0.3115, + "step": 7595 + }, + { + "epoch": 0.35, + "learning_rate": 1.8966501026758423e-05, + "loss": 0.9366, + "step": 7600 + }, + { + "epoch": 0.35, + "learning_rate": 1.8965717241703637e-05, + "loss": 0.2707, + "step": 7605 + }, + { + "epoch": 0.36, + "learning_rate": 1.896493345664885e-05, + "loss": 0.1204, + "step": 7610 + }, + { + "epoch": 0.36, + "learning_rate": 1.8964149671594065e-05, + "loss": 0.1409, + "step": 7615 + }, + { + "epoch": 0.36, + "learning_rate": 1.896336588653928e-05, + "loss": 0.1973, + "step": 7620 + }, + { + "epoch": 0.36, + "learning_rate": 1.896258210148449e-05, + "loss": 0.1588, + "step": 7625 + }, + { + "epoch": 0.36, + "learning_rate": 1.8961798316429706e-05, + "loss": 0.3012, + "step": 7630 + }, + { + "epoch": 0.36, + "learning_rate": 1.8961014531374917e-05, + "loss": 0.3534, + "step": 7635 + }, + { + "epoch": 0.36, + "learning_rate": 1.896023074632013e-05, + "loss": 0.4233, + "step": 7640 + }, + { + "epoch": 0.36, + "learning_rate": 1.8959446961265345e-05, + "loss": 0.3975, + "step": 7645 + }, + { + "epoch": 0.36, + "learning_rate": 1.8958663176210555e-05, + "loss": 0.4849, + "step": 7650 + }, + { + "epoch": 0.36, + "learning_rate": 1.8957879391155772e-05, + "loss": 0.3376, + "step": 7655 + }, + { + "epoch": 0.36, + "learning_rate": 1.8957095606100983e-05, + "loss": 0.1438, + "step": 7660 + }, + { + "epoch": 0.36, + "learning_rate": 1.8956311821046197e-05, + "loss": 0.1841, + "step": 7665 + }, + { + "epoch": 0.36, + "learning_rate": 1.895552803599141e-05, + "loss": 0.1959, + "step": 7670 + }, + { + "epoch": 0.36, + "learning_rate": 1.8954744250936625e-05, + "loss": 0.2594, + "step": 7675 + }, + { + "epoch": 0.36, + "learning_rate": 1.895396046588184e-05, + "loss": 0.3133, + "step": 7680 + }, + { + "epoch": 0.36, + "learning_rate": 1.8953176680827052e-05, + "loss": 0.3055, + "step": 7685 + }, + { + "epoch": 0.36, + "learning_rate": 1.8952392895772266e-05, + "loss": 0.3877, + "step": 7690 + }, + { + "epoch": 0.36, + "learning_rate": 1.895160911071748e-05, + "loss": 0.3368, + "step": 7695 + }, + { + "epoch": 0.36, + "learning_rate": 1.895082532566269e-05, + "loss": 0.6805, + "step": 7700 + }, + { + "epoch": 0.36, + "learning_rate": 1.8950041540607905e-05, + "loss": 0.3995, + "step": 7705 + }, + { + "epoch": 0.36, + "learning_rate": 1.894925775555312e-05, + "loss": 0.1304, + "step": 7710 + }, + { + "epoch": 0.36, + "learning_rate": 1.8948473970498333e-05, + "loss": 0.1476, + "step": 7715 + }, + { + "epoch": 0.36, + "learning_rate": 1.8947690185443546e-05, + "loss": 0.1871, + "step": 7720 + }, + { + "epoch": 0.36, + "learning_rate": 1.8946906400388757e-05, + "loss": 0.2658, + "step": 7725 + }, + { + "epoch": 0.36, + "learning_rate": 1.8946122615333974e-05, + "loss": 0.2439, + "step": 7730 + }, + { + "epoch": 0.36, + "learning_rate": 1.8945338830279185e-05, + "loss": 0.3401, + "step": 7735 + }, + { + "epoch": 0.36, + "learning_rate": 1.89445550452244e-05, + "loss": 0.2806, + "step": 7740 + }, + { + "epoch": 0.36, + "learning_rate": 1.8943771260169613e-05, + "loss": 0.4399, + "step": 7745 + }, + { + "epoch": 0.36, + "learning_rate": 1.8942987475114826e-05, + "loss": 0.6512, + "step": 7750 + }, + { + "epoch": 0.36, + "learning_rate": 1.894220369006004e-05, + "loss": 0.2376, + "step": 7755 + }, + { + "epoch": 0.36, + "learning_rate": 1.8941419905005254e-05, + "loss": 0.1043, + "step": 7760 + }, + { + "epoch": 0.36, + "learning_rate": 1.8940636119950465e-05, + "loss": 0.1589, + "step": 7765 + }, + { + "epoch": 0.36, + "learning_rate": 1.893985233489568e-05, + "loss": 0.1373, + "step": 7770 + }, + { + "epoch": 0.36, + "learning_rate": 1.8939068549840893e-05, + "loss": 0.2714, + "step": 7775 + }, + { + "epoch": 0.36, + "learning_rate": 1.8938284764786107e-05, + "loss": 0.2307, + "step": 7780 + }, + { + "epoch": 0.36, + "learning_rate": 1.893750097973132e-05, + "loss": 0.2299, + "step": 7785 + }, + { + "epoch": 0.36, + "learning_rate": 1.8936717194676534e-05, + "loss": 0.4098, + "step": 7790 + }, + { + "epoch": 0.36, + "learning_rate": 1.8935933409621748e-05, + "loss": 0.4609, + "step": 7795 + }, + { + "epoch": 0.36, + "learning_rate": 1.893514962456696e-05, + "loss": 0.7143, + "step": 7800 + }, + { + "epoch": 0.36, + "learning_rate": 1.8934365839512173e-05, + "loss": 0.3278, + "step": 7805 + }, + { + "epoch": 0.36, + "learning_rate": 1.8933582054457387e-05, + "loss": 0.1103, + "step": 7810 + }, + { + "epoch": 0.36, + "learning_rate": 1.89327982694026e-05, + "loss": 0.1488, + "step": 7815 + }, + { + "epoch": 0.36, + "learning_rate": 1.8932014484347814e-05, + "loss": 0.2829, + "step": 7820 + }, + { + "epoch": 0.37, + "learning_rate": 1.8931230699293028e-05, + "loss": 0.2354, + "step": 7825 + }, + { + "epoch": 0.37, + "learning_rate": 1.8930446914238242e-05, + "loss": 0.3971, + "step": 7830 + }, + { + "epoch": 0.37, + "learning_rate": 1.8929663129183453e-05, + "loss": 0.4175, + "step": 7835 + }, + { + "epoch": 0.37, + "learning_rate": 1.8928879344128667e-05, + "loss": 0.3929, + "step": 7840 + }, + { + "epoch": 0.37, + "learning_rate": 1.892809555907388e-05, + "loss": 0.4019, + "step": 7845 + }, + { + "epoch": 0.37, + "learning_rate": 1.8927311774019094e-05, + "loss": 0.6624, + "step": 7850 + }, + { + "epoch": 0.37, + "learning_rate": 1.892652798896431e-05, + "loss": 0.3053, + "step": 7855 + }, + { + "epoch": 0.37, + "learning_rate": 1.8925744203909522e-05, + "loss": 0.2106, + "step": 7860 + }, + { + "epoch": 0.37, + "learning_rate": 1.8924960418854733e-05, + "loss": 0.172, + "step": 7865 + }, + { + "epoch": 0.37, + "learning_rate": 1.892417663379995e-05, + "loss": 0.1984, + "step": 7870 + }, + { + "epoch": 0.37, + "learning_rate": 1.892339284874516e-05, + "loss": 0.1985, + "step": 7875 + }, + { + "epoch": 0.37, + "learning_rate": 1.8922609063690374e-05, + "loss": 0.4085, + "step": 7880 + }, + { + "epoch": 0.37, + "learning_rate": 1.892182527863559e-05, + "loss": 0.3442, + "step": 7885 + }, + { + "epoch": 0.37, + "learning_rate": 1.8921041493580802e-05, + "loss": 0.2404, + "step": 7890 + }, + { + "epoch": 0.37, + "learning_rate": 1.8920257708526016e-05, + "loss": 0.4951, + "step": 7895 + }, + { + "epoch": 0.37, + "learning_rate": 1.891947392347123e-05, + "loss": 0.6079, + "step": 7900 + }, + { + "epoch": 0.37, + "learning_rate": 1.8918690138416444e-05, + "loss": 0.3329, + "step": 7905 + }, + { + "epoch": 0.37, + "learning_rate": 1.8917906353361655e-05, + "loss": 0.1138, + "step": 7910 + }, + { + "epoch": 0.37, + "learning_rate": 1.891712256830687e-05, + "loss": 0.1449, + "step": 7915 + }, + { + "epoch": 0.37, + "learning_rate": 1.8916338783252082e-05, + "loss": 0.1854, + "step": 7920 + }, + { + "epoch": 0.37, + "learning_rate": 1.8915554998197296e-05, + "loss": 0.1878, + "step": 7925 + }, + { + "epoch": 0.37, + "learning_rate": 1.891477121314251e-05, + "loss": 0.2632, + "step": 7930 + }, + { + "epoch": 0.37, + "learning_rate": 1.8913987428087724e-05, + "loss": 0.1892, + "step": 7935 + }, + { + "epoch": 0.37, + "learning_rate": 1.8913203643032935e-05, + "loss": 0.3837, + "step": 7940 + }, + { + "epoch": 0.37, + "learning_rate": 1.8912419857978152e-05, + "loss": 0.4508, + "step": 7945 + }, + { + "epoch": 0.37, + "learning_rate": 1.8911636072923362e-05, + "loss": 0.5534, + "step": 7950 + }, + { + "epoch": 0.37, + "learning_rate": 1.8910852287868576e-05, + "loss": 0.315, + "step": 7955 + }, + { + "epoch": 0.37, + "learning_rate": 1.891006850281379e-05, + "loss": 0.1236, + "step": 7960 + }, + { + "epoch": 0.37, + "learning_rate": 1.8909284717759004e-05, + "loss": 0.0979, + "step": 7965 + }, + { + "epoch": 0.37, + "learning_rate": 1.8908500932704218e-05, + "loss": 0.1878, + "step": 7970 + }, + { + "epoch": 0.37, + "learning_rate": 1.890771714764943e-05, + "loss": 0.2305, + "step": 7975 + }, + { + "epoch": 0.37, + "learning_rate": 1.8906933362594642e-05, + "loss": 0.2422, + "step": 7980 + }, + { + "epoch": 0.37, + "learning_rate": 1.8906149577539856e-05, + "loss": 0.2615, + "step": 7985 + }, + { + "epoch": 0.37, + "learning_rate": 1.890536579248507e-05, + "loss": 0.3486, + "step": 7990 + }, + { + "epoch": 0.37, + "learning_rate": 1.8904582007430284e-05, + "loss": 0.461, + "step": 7995 + }, + { + "epoch": 0.37, + "learning_rate": 1.8903798222375498e-05, + "loss": 0.6374, + "step": 8000 + }, + { + "epoch": 0.37, + "learning_rate": 1.8903014437320712e-05, + "loss": 0.3201, + "step": 8005 + }, + { + "epoch": 0.37, + "learning_rate": 1.8902230652265926e-05, + "loss": 0.1109, + "step": 8010 + }, + { + "epoch": 0.37, + "learning_rate": 1.8901446867211136e-05, + "loss": 0.1345, + "step": 8015 + }, + { + "epoch": 0.37, + "learning_rate": 1.890066308215635e-05, + "loss": 0.1565, + "step": 8020 + }, + { + "epoch": 0.37, + "learning_rate": 1.8899879297101564e-05, + "loss": 0.7264, + "step": 8025 + }, + { + "epoch": 0.37, + "learning_rate": 1.8899095512046778e-05, + "loss": 0.231, + "step": 8030 + }, + { + "epoch": 0.37, + "learning_rate": 1.8898311726991992e-05, + "loss": 0.2366, + "step": 8035 + }, + { + "epoch": 0.38, + "learning_rate": 1.8897527941937203e-05, + "loss": 0.3771, + "step": 8040 + }, + { + "epoch": 0.38, + "learning_rate": 1.889674415688242e-05, + "loss": 0.4329, + "step": 8045 + }, + { + "epoch": 0.38, + "learning_rate": 1.889596037182763e-05, + "loss": 0.6533, + "step": 8050 + }, + { + "epoch": 0.38, + "learning_rate": 1.8895176586772844e-05, + "loss": 0.338, + "step": 8055 + }, + { + "epoch": 0.38, + "learning_rate": 1.8894392801718058e-05, + "loss": 0.1299, + "step": 8060 + }, + { + "epoch": 0.38, + "learning_rate": 1.8893609016663272e-05, + "loss": 0.1517, + "step": 8065 + }, + { + "epoch": 0.38, + "learning_rate": 1.8892825231608486e-05, + "loss": 0.2633, + "step": 8070 + }, + { + "epoch": 0.38, + "learning_rate": 1.88920414465537e-05, + "loss": 0.2221, + "step": 8075 + }, + { + "epoch": 0.38, + "learning_rate": 1.889125766149891e-05, + "loss": 0.2924, + "step": 8080 + }, + { + "epoch": 0.38, + "learning_rate": 1.8890473876444128e-05, + "loss": 0.2867, + "step": 8085 + }, + { + "epoch": 0.38, + "learning_rate": 1.8889690091389338e-05, + "loss": 0.3109, + "step": 8090 + }, + { + "epoch": 0.38, + "learning_rate": 1.8888906306334552e-05, + "loss": 0.4688, + "step": 8095 + }, + { + "epoch": 0.38, + "learning_rate": 1.8888122521279766e-05, + "loss": 0.7502, + "step": 8100 + }, + { + "epoch": 0.38, + "learning_rate": 1.888733873622498e-05, + "loss": 0.2301, + "step": 8105 + }, + { + "epoch": 0.38, + "learning_rate": 1.8886554951170194e-05, + "loss": 0.1549, + "step": 8110 + }, + { + "epoch": 0.38, + "learning_rate": 1.8885771166115404e-05, + "loss": 0.1893, + "step": 8115 + }, + { + "epoch": 0.38, + "learning_rate": 1.8884987381060618e-05, + "loss": 0.1992, + "step": 8120 + }, + { + "epoch": 0.38, + "learning_rate": 1.8884203596005832e-05, + "loss": 0.1521, + "step": 8125 + }, + { + "epoch": 0.38, + "learning_rate": 1.8883419810951046e-05, + "loss": 0.2274, + "step": 8130 + }, + { + "epoch": 0.38, + "learning_rate": 1.888263602589626e-05, + "loss": 0.2885, + "step": 8135 + }, + { + "epoch": 0.38, + "learning_rate": 1.8881852240841474e-05, + "loss": 0.3451, + "step": 8140 + }, + { + "epoch": 0.38, + "learning_rate": 1.8881068455786688e-05, + "loss": 0.3565, + "step": 8145 + }, + { + "epoch": 0.38, + "learning_rate": 1.88802846707319e-05, + "loss": 0.5021, + "step": 8150 + }, + { + "epoch": 0.38, + "learning_rate": 1.8879500885677112e-05, + "loss": 0.344, + "step": 8155 + }, + { + "epoch": 0.38, + "learning_rate": 1.887871710062233e-05, + "loss": 0.2039, + "step": 8160 + }, + { + "epoch": 0.38, + "learning_rate": 1.887793331556754e-05, + "loss": 0.2314, + "step": 8165 + }, + { + "epoch": 0.38, + "learning_rate": 1.8877149530512754e-05, + "loss": 0.1719, + "step": 8170 + }, + { + "epoch": 0.38, + "learning_rate": 1.8876365745457968e-05, + "loss": 0.2557, + "step": 8175 + }, + { + "epoch": 0.38, + "learning_rate": 1.887558196040318e-05, + "loss": 0.2644, + "step": 8180 + }, + { + "epoch": 0.38, + "learning_rate": 1.8874798175348396e-05, + "loss": 0.2162, + "step": 8185 + }, + { + "epoch": 0.38, + "learning_rate": 1.8874014390293606e-05, + "loss": 0.3655, + "step": 8190 + }, + { + "epoch": 0.38, + "learning_rate": 1.887323060523882e-05, + "loss": 0.4942, + "step": 8195 + }, + { + "epoch": 0.38, + "learning_rate": 1.8872446820184034e-05, + "loss": 0.6199, + "step": 8200 + }, + { + "epoch": 0.38, + "learning_rate": 1.8871663035129248e-05, + "loss": 0.3494, + "step": 8205 + }, + { + "epoch": 0.38, + "learning_rate": 1.8870879250074462e-05, + "loss": 0.1605, + "step": 8210 + }, + { + "epoch": 0.38, + "learning_rate": 1.8870095465019676e-05, + "loss": 0.1165, + "step": 8215 + }, + { + "epoch": 0.38, + "learning_rate": 1.886931167996489e-05, + "loss": 0.1653, + "step": 8220 + }, + { + "epoch": 0.38, + "learning_rate": 1.8868527894910103e-05, + "loss": 0.1936, + "step": 8225 + }, + { + "epoch": 0.38, + "learning_rate": 1.8867744109855314e-05, + "loss": 0.2854, + "step": 8230 + }, + { + "epoch": 0.38, + "learning_rate": 1.8866960324800528e-05, + "loss": 0.2725, + "step": 8235 + }, + { + "epoch": 0.38, + "learning_rate": 1.8866176539745742e-05, + "loss": 0.4045, + "step": 8240 + }, + { + "epoch": 0.38, + "learning_rate": 1.8865392754690956e-05, + "loss": 0.3646, + "step": 8245 + }, + { + "epoch": 0.38, + "learning_rate": 1.886460896963617e-05, + "loss": 0.6643, + "step": 8250 + }, + { + "epoch": 0.39, + "learning_rate": 1.886382518458138e-05, + "loss": 0.2439, + "step": 8255 + }, + { + "epoch": 0.39, + "learning_rate": 1.8863041399526597e-05, + "loss": 0.1384, + "step": 8260 + }, + { + "epoch": 0.39, + "learning_rate": 1.8862257614471808e-05, + "loss": 0.1399, + "step": 8265 + }, + { + "epoch": 0.39, + "learning_rate": 1.8861473829417022e-05, + "loss": 0.17, + "step": 8270 + }, + { + "epoch": 0.39, + "learning_rate": 1.8860690044362236e-05, + "loss": 0.2109, + "step": 8275 + }, + { + "epoch": 0.39, + "learning_rate": 1.885990625930745e-05, + "loss": 0.2079, + "step": 8280 + }, + { + "epoch": 0.39, + "learning_rate": 1.8859122474252664e-05, + "loss": 0.2478, + "step": 8285 + }, + { + "epoch": 0.39, + "learning_rate": 1.8858338689197877e-05, + "loss": 0.2618, + "step": 8290 + }, + { + "epoch": 0.39, + "learning_rate": 1.8857554904143088e-05, + "loss": 0.3146, + "step": 8295 + }, + { + "epoch": 0.39, + "learning_rate": 1.8856771119088302e-05, + "loss": 0.5877, + "step": 8300 + }, + { + "epoch": 0.39, + "learning_rate": 1.8855987334033516e-05, + "loss": 0.2784, + "step": 8305 + }, + { + "epoch": 0.39, + "learning_rate": 1.885520354897873e-05, + "loss": 0.1436, + "step": 8310 + }, + { + "epoch": 0.39, + "learning_rate": 1.8854419763923944e-05, + "loss": 0.1598, + "step": 8315 + }, + { + "epoch": 0.39, + "learning_rate": 1.8853635978869158e-05, + "loss": 0.1545, + "step": 8320 + }, + { + "epoch": 0.39, + "learning_rate": 1.885285219381437e-05, + "loss": 0.2853, + "step": 8325 + }, + { + "epoch": 0.39, + "learning_rate": 1.8852068408759582e-05, + "loss": 0.2467, + "step": 8330 + }, + { + "epoch": 0.39, + "learning_rate": 1.8851284623704796e-05, + "loss": 0.271, + "step": 8335 + }, + { + "epoch": 0.39, + "learning_rate": 1.885050083865001e-05, + "loss": 0.3561, + "step": 8340 + }, + { + "epoch": 0.39, + "learning_rate": 1.8849717053595224e-05, + "loss": 0.3717, + "step": 8345 + }, + { + "epoch": 0.39, + "learning_rate": 1.8848933268540438e-05, + "loss": 0.6701, + "step": 8350 + }, + { + "epoch": 0.39, + "learning_rate": 1.884814948348565e-05, + "loss": 0.3146, + "step": 8355 + }, + { + "epoch": 0.39, + "learning_rate": 1.8847365698430865e-05, + "loss": 0.1088, + "step": 8360 + }, + { + "epoch": 0.39, + "learning_rate": 1.8846581913376076e-05, + "loss": 0.1436, + "step": 8365 + }, + { + "epoch": 0.39, + "learning_rate": 1.884579812832129e-05, + "loss": 0.1958, + "step": 8370 + }, + { + "epoch": 0.39, + "learning_rate": 1.8845014343266504e-05, + "loss": 0.2178, + "step": 8375 + }, + { + "epoch": 0.39, + "learning_rate": 1.8844230558211718e-05, + "loss": 0.1422, + "step": 8380 + }, + { + "epoch": 0.39, + "learning_rate": 1.884344677315693e-05, + "loss": 0.2301, + "step": 8385 + }, + { + "epoch": 0.39, + "learning_rate": 1.8842662988102145e-05, + "loss": 0.4035, + "step": 8390 + }, + { + "epoch": 0.39, + "learning_rate": 1.8841879203047356e-05, + "loss": 0.4027, + "step": 8395 + }, + { + "epoch": 0.39, + "learning_rate": 1.8841095417992573e-05, + "loss": 0.6834, + "step": 8400 + }, + { + "epoch": 0.39, + "learning_rate": 1.8840311632937784e-05, + "loss": 0.2374, + "step": 8405 + }, + { + "epoch": 0.39, + "learning_rate": 1.8839527847882998e-05, + "loss": 0.172, + "step": 8410 + }, + { + "epoch": 0.39, + "learning_rate": 1.883874406282821e-05, + "loss": 0.1755, + "step": 8415 + }, + { + "epoch": 0.39, + "learning_rate": 1.8837960277773425e-05, + "loss": 0.2204, + "step": 8420 + }, + { + "epoch": 0.39, + "learning_rate": 1.883717649271864e-05, + "loss": 0.2173, + "step": 8425 + }, + { + "epoch": 0.39, + "learning_rate": 1.883639270766385e-05, + "loss": 0.3071, + "step": 8430 + }, + { + "epoch": 0.39, + "learning_rate": 1.8835608922609064e-05, + "loss": 0.2889, + "step": 8435 + }, + { + "epoch": 0.39, + "learning_rate": 1.8834825137554278e-05, + "loss": 0.3744, + "step": 8440 + }, + { + "epoch": 0.39, + "learning_rate": 1.8834198109510448e-05, + "loss": 0.6518, + "step": 8445 + }, + { + "epoch": 0.39, + "learning_rate": 1.8833414324455662e-05, + "loss": 0.7128, + "step": 8450 + }, + { + "epoch": 0.39, + "learning_rate": 1.8832630539400876e-05, + "loss": 0.3003, + "step": 8455 + }, + { + "epoch": 0.39, + "learning_rate": 1.883184675434609e-05, + "loss": 0.0996, + "step": 8460 + }, + { + "epoch": 0.39, + "learning_rate": 1.8831062969291304e-05, + "loss": 0.1282, + "step": 8465 + }, + { + "epoch": 0.4, + "learning_rate": 1.8830279184236518e-05, + "loss": 0.1423, + "step": 8470 + }, + { + "epoch": 0.4, + "learning_rate": 1.8829495399181728e-05, + "loss": 0.2481, + "step": 8475 + }, + { + "epoch": 0.4, + "learning_rate": 1.8828711614126945e-05, + "loss": 0.1612, + "step": 8480 + }, + { + "epoch": 0.4, + "learning_rate": 1.8827927829072156e-05, + "loss": 0.3512, + "step": 8485 + }, + { + "epoch": 0.4, + "learning_rate": 1.882714404401737e-05, + "loss": 0.2906, + "step": 8490 + }, + { + "epoch": 0.4, + "learning_rate": 1.8826360258962584e-05, + "loss": 0.2834, + "step": 8495 + }, + { + "epoch": 0.4, + "learning_rate": 1.8825576473907798e-05, + "loss": 0.6138, + "step": 8500 + }, + { + "epoch": 0.4, + "learning_rate": 1.882479268885301e-05, + "loss": 0.3247, + "step": 8505 + }, + { + "epoch": 0.4, + "learning_rate": 1.8824008903798222e-05, + "loss": 0.0987, + "step": 8510 + }, + { + "epoch": 0.4, + "learning_rate": 1.882322511874344e-05, + "loss": 0.1981, + "step": 8515 + }, + { + "epoch": 0.4, + "learning_rate": 1.882244133368865e-05, + "loss": 0.2302, + "step": 8520 + }, + { + "epoch": 0.4, + "learning_rate": 1.8821657548633864e-05, + "loss": 0.286, + "step": 8525 + }, + { + "epoch": 0.4, + "learning_rate": 1.8820873763579078e-05, + "loss": 0.2, + "step": 8530 + }, + { + "epoch": 0.4, + "learning_rate": 1.882008997852429e-05, + "loss": 0.3144, + "step": 8535 + }, + { + "epoch": 0.4, + "learning_rate": 1.8819306193469506e-05, + "loss": 0.2449, + "step": 8540 + }, + { + "epoch": 0.4, + "learning_rate": 1.881852240841472e-05, + "loss": 0.324, + "step": 8545 + }, + { + "epoch": 0.4, + "learning_rate": 1.881773862335993e-05, + "loss": 0.5676, + "step": 8550 + }, + { + "epoch": 0.4, + "learning_rate": 1.8816954838305144e-05, + "loss": 0.2509, + "step": 8555 + }, + { + "epoch": 0.4, + "learning_rate": 1.8816171053250358e-05, + "loss": 0.1236, + "step": 8560 + }, + { + "epoch": 0.4, + "learning_rate": 1.8815387268195572e-05, + "loss": 0.1508, + "step": 8565 + }, + { + "epoch": 0.4, + "learning_rate": 1.8814603483140786e-05, + "loss": 0.1854, + "step": 8570 + }, + { + "epoch": 0.4, + "learning_rate": 1.8813819698085996e-05, + "loss": 0.2049, + "step": 8575 + }, + { + "epoch": 0.4, + "learning_rate": 1.8813035913031213e-05, + "loss": 0.253, + "step": 8580 + }, + { + "epoch": 0.4, + "learning_rate": 1.8812252127976424e-05, + "loss": 0.2392, + "step": 8585 + }, + { + "epoch": 0.4, + "learning_rate": 1.8811468342921638e-05, + "loss": 0.3392, + "step": 8590 + }, + { + "epoch": 0.4, + "learning_rate": 1.8810684557866852e-05, + "loss": 0.3588, + "step": 8595 + }, + { + "epoch": 0.4, + "learning_rate": 1.8809900772812066e-05, + "loss": 0.5704, + "step": 8600 + }, + { + "epoch": 0.4, + "learning_rate": 1.880911698775728e-05, + "loss": 0.2261, + "step": 8605 + }, + { + "epoch": 0.4, + "learning_rate": 1.8808333202702493e-05, + "loss": 0.1138, + "step": 8610 + }, + { + "epoch": 0.4, + "learning_rate": 1.8807549417647707e-05, + "loss": 0.1819, + "step": 8615 + }, + { + "epoch": 0.4, + "learning_rate": 1.8806765632592918e-05, + "loss": 0.2097, + "step": 8620 + }, + { + "epoch": 0.4, + "learning_rate": 1.8805981847538132e-05, + "loss": 0.33, + "step": 8625 + }, + { + "epoch": 0.4, + "learning_rate": 1.8805198062483346e-05, + "loss": 0.1766, + "step": 8630 + }, + { + "epoch": 0.4, + "learning_rate": 1.880441427742856e-05, + "loss": 0.2288, + "step": 8635 + }, + { + "epoch": 0.4, + "learning_rate": 1.8803630492373774e-05, + "loss": 0.3268, + "step": 8640 + }, + { + "epoch": 0.4, + "learning_rate": 1.8802846707318987e-05, + "loss": 0.4618, + "step": 8645 + }, + { + "epoch": 0.4, + "learning_rate": 1.8802062922264198e-05, + "loss": 0.487, + "step": 8650 + }, + { + "epoch": 0.4, + "learning_rate": 1.8801279137209415e-05, + "loss": 0.2719, + "step": 8655 + }, + { + "epoch": 0.4, + "learning_rate": 1.8800495352154626e-05, + "loss": 0.0903, + "step": 8660 + }, + { + "epoch": 0.4, + "learning_rate": 1.879971156709984e-05, + "loss": 0.1441, + "step": 8665 + }, + { + "epoch": 0.4, + "learning_rate": 1.8798927782045054e-05, + "loss": 0.1544, + "step": 8670 + }, + { + "epoch": 0.4, + "learning_rate": 1.8798143996990267e-05, + "loss": 0.2806, + "step": 8675 + }, + { + "epoch": 0.41, + "learning_rate": 1.879736021193548e-05, + "loss": 0.3168, + "step": 8680 + }, + { + "epoch": 0.41, + "learning_rate": 1.8796576426880692e-05, + "loss": 0.2666, + "step": 8685 + }, + { + "epoch": 0.41, + "learning_rate": 1.8795792641825906e-05, + "loss": 0.3645, + "step": 8690 + }, + { + "epoch": 0.41, + "learning_rate": 1.879500885677112e-05, + "loss": 0.5892, + "step": 8695 + }, + { + "epoch": 0.41, + "learning_rate": 1.8794225071716334e-05, + "loss": 0.6008, + "step": 8700 + }, + { + "epoch": 0.41, + "learning_rate": 1.8793441286661548e-05, + "loss": 0.3091, + "step": 8705 + }, + { + "epoch": 0.41, + "learning_rate": 1.879265750160676e-05, + "loss": 0.1157, + "step": 8710 + }, + { + "epoch": 0.41, + "learning_rate": 1.8791873716551975e-05, + "loss": 0.1896, + "step": 8715 + }, + { + "epoch": 0.41, + "learning_rate": 1.879108993149719e-05, + "loss": 0.1758, + "step": 8720 + }, + { + "epoch": 0.41, + "learning_rate": 1.87903061464424e-05, + "loss": 0.2199, + "step": 8725 + }, + { + "epoch": 0.41, + "learning_rate": 1.8789522361387617e-05, + "loss": 0.1525, + "step": 8730 + }, + { + "epoch": 0.41, + "learning_rate": 1.8788738576332828e-05, + "loss": 0.3416, + "step": 8735 + }, + { + "epoch": 0.41, + "learning_rate": 1.878795479127804e-05, + "loss": 0.3551, + "step": 8740 + }, + { + "epoch": 0.41, + "learning_rate": 1.8787171006223255e-05, + "loss": 0.3541, + "step": 8745 + }, + { + "epoch": 0.41, + "learning_rate": 1.8786387221168466e-05, + "loss": 0.6545, + "step": 8750 + }, + { + "epoch": 0.41, + "learning_rate": 1.8785603436113683e-05, + "loss": 0.2391, + "step": 8755 + }, + { + "epoch": 0.41, + "learning_rate": 1.8784819651058894e-05, + "loss": 0.1449, + "step": 8760 + }, + { + "epoch": 0.41, + "learning_rate": 1.8784035866004108e-05, + "loss": 0.1726, + "step": 8765 + }, + { + "epoch": 0.41, + "learning_rate": 1.878325208094932e-05, + "loss": 0.1607, + "step": 8770 + }, + { + "epoch": 0.41, + "learning_rate": 1.8782468295894535e-05, + "loss": 0.2541, + "step": 8775 + }, + { + "epoch": 0.41, + "learning_rate": 1.878168451083975e-05, + "loss": 0.1685, + "step": 8780 + }, + { + "epoch": 0.41, + "learning_rate": 1.8780900725784963e-05, + "loss": 0.3067, + "step": 8785 + }, + { + "epoch": 0.41, + "learning_rate": 1.8780116940730174e-05, + "loss": 0.3734, + "step": 8790 + }, + { + "epoch": 0.41, + "learning_rate": 1.877933315567539e-05, + "loss": 0.311, + "step": 8795 + }, + { + "epoch": 0.41, + "learning_rate": 1.87785493706206e-05, + "loss": 0.464, + "step": 8800 + }, + { + "epoch": 0.41, + "learning_rate": 1.8777765585565815e-05, + "loss": 0.2861, + "step": 8805 + }, + { + "epoch": 0.41, + "learning_rate": 1.877698180051103e-05, + "loss": 0.0895, + "step": 8810 + }, + { + "epoch": 0.41, + "learning_rate": 1.8776198015456243e-05, + "loss": 0.1917, + "step": 8815 + }, + { + "epoch": 0.41, + "learning_rate": 1.8775414230401457e-05, + "loss": 0.2786, + "step": 8820 + }, + { + "epoch": 0.41, + "learning_rate": 1.8774630445346668e-05, + "loss": 0.1561, + "step": 8825 + }, + { + "epoch": 0.41, + "learning_rate": 1.8773846660291885e-05, + "loss": 0.2006, + "step": 8830 + }, + { + "epoch": 0.41, + "learning_rate": 1.8773062875237096e-05, + "loss": 0.2568, + "step": 8835 + }, + { + "epoch": 0.41, + "learning_rate": 1.877227909018231e-05, + "loss": 0.3812, + "step": 8840 + }, + { + "epoch": 0.41, + "learning_rate": 1.8771495305127523e-05, + "loss": 0.3857, + "step": 8845 + }, + { + "epoch": 0.41, + "learning_rate": 1.8770711520072737e-05, + "loss": 0.6512, + "step": 8850 + }, + { + "epoch": 0.41, + "learning_rate": 1.876992773501795e-05, + "loss": 0.2031, + "step": 8855 + }, + { + "epoch": 0.41, + "learning_rate": 1.8769143949963165e-05, + "loss": 0.0943, + "step": 8860 + }, + { + "epoch": 0.41, + "learning_rate": 1.8768360164908376e-05, + "loss": 0.133, + "step": 8865 + }, + { + "epoch": 0.41, + "learning_rate": 1.8767576379853593e-05, + "loss": 0.181, + "step": 8870 + }, + { + "epoch": 0.41, + "learning_rate": 1.8766792594798803e-05, + "loss": 0.3046, + "step": 8875 + }, + { + "epoch": 0.41, + "learning_rate": 1.8766008809744017e-05, + "loss": 0.3182, + "step": 8880 + }, + { + "epoch": 0.41, + "learning_rate": 1.876522502468923e-05, + "loss": 0.2638, + "step": 8885 + }, + { + "epoch": 0.41, + "learning_rate": 1.8764441239634442e-05, + "loss": 0.3133, + "step": 8890 + }, + { + "epoch": 0.42, + "learning_rate": 1.876365745457966e-05, + "loss": 0.3542, + "step": 8895 + }, + { + "epoch": 0.42, + "learning_rate": 1.876287366952487e-05, + "loss": 0.7553, + "step": 8900 + }, + { + "epoch": 0.42, + "learning_rate": 1.8762089884470083e-05, + "loss": 0.2799, + "step": 8905 + }, + { + "epoch": 0.42, + "learning_rate": 1.8761306099415297e-05, + "loss": 0.1592, + "step": 8910 + }, + { + "epoch": 0.42, + "learning_rate": 1.876052231436051e-05, + "loss": 0.1252, + "step": 8915 + }, + { + "epoch": 0.42, + "learning_rate": 1.8759738529305725e-05, + "loss": 0.1886, + "step": 8920 + }, + { + "epoch": 0.42, + "learning_rate": 1.875895474425094e-05, + "loss": 0.2613, + "step": 8925 + }, + { + "epoch": 0.42, + "learning_rate": 1.8758170959196153e-05, + "loss": 0.3106, + "step": 8930 + }, + { + "epoch": 0.42, + "learning_rate": 1.8757387174141367e-05, + "loss": 0.2214, + "step": 8935 + }, + { + "epoch": 0.42, + "learning_rate": 1.8756603389086577e-05, + "loss": 0.3003, + "step": 8940 + }, + { + "epoch": 0.42, + "learning_rate": 1.875581960403179e-05, + "loss": 0.3717, + "step": 8945 + }, + { + "epoch": 0.42, + "learning_rate": 1.8755035818977005e-05, + "loss": 0.5338, + "step": 8950 + }, + { + "epoch": 0.42, + "learning_rate": 1.875425203392222e-05, + "loss": 0.2474, + "step": 8955 + }, + { + "epoch": 0.42, + "learning_rate": 1.8753468248867433e-05, + "loss": 0.1502, + "step": 8960 + }, + { + "epoch": 0.42, + "learning_rate": 1.8752684463812644e-05, + "loss": 0.1918, + "step": 8965 + }, + { + "epoch": 0.42, + "learning_rate": 1.875190067875786e-05, + "loss": 0.1617, + "step": 8970 + }, + { + "epoch": 0.42, + "learning_rate": 1.875111689370307e-05, + "loss": 0.2153, + "step": 8975 + }, + { + "epoch": 0.42, + "learning_rate": 1.8750333108648285e-05, + "loss": 0.2767, + "step": 8980 + }, + { + "epoch": 0.42, + "learning_rate": 1.87495493235935e-05, + "loss": 0.2624, + "step": 8985 + }, + { + "epoch": 0.42, + "learning_rate": 1.8748765538538713e-05, + "loss": 0.3624, + "step": 8990 + }, + { + "epoch": 0.42, + "learning_rate": 1.8747981753483927e-05, + "loss": 0.4102, + "step": 8995 + }, + { + "epoch": 0.42, + "learning_rate": 1.874719796842914e-05, + "loss": 0.6647, + "step": 9000 + }, + { + "epoch": 0.42, + "learning_rate": 1.874641418337435e-05, + "loss": 0.16, + "step": 9005 + }, + { + "epoch": 0.42, + "learning_rate": 1.8745630398319565e-05, + "loss": 0.1188, + "step": 9010 + }, + { + "epoch": 0.42, + "learning_rate": 1.874484661326478e-05, + "loss": 0.1938, + "step": 9015 + }, + { + "epoch": 0.42, + "learning_rate": 1.8744062828209993e-05, + "loss": 0.2017, + "step": 9020 + }, + { + "epoch": 0.42, + "learning_rate": 1.8743279043155207e-05, + "loss": 0.2465, + "step": 9025 + }, + { + "epoch": 0.42, + "learning_rate": 1.874249525810042e-05, + "loss": 0.2671, + "step": 9030 + }, + { + "epoch": 0.42, + "learning_rate": 1.8741711473045635e-05, + "loss": 0.1911, + "step": 9035 + }, + { + "epoch": 0.42, + "learning_rate": 1.8740927687990845e-05, + "loss": 0.3654, + "step": 9040 + }, + { + "epoch": 0.42, + "learning_rate": 1.8740143902936063e-05, + "loss": 0.3904, + "step": 9045 + }, + { + "epoch": 0.42, + "learning_rate": 1.8739360117881273e-05, + "loss": 0.5258, + "step": 9050 + }, + { + "epoch": 0.42, + "learning_rate": 1.8738576332826487e-05, + "loss": 0.2227, + "step": 9055 + }, + { + "epoch": 0.42, + "learning_rate": 1.87377925477717e-05, + "loss": 0.0826, + "step": 9060 + }, + { + "epoch": 0.42, + "learning_rate": 1.8737008762716915e-05, + "loss": 0.1681, + "step": 9065 + }, + { + "epoch": 0.42, + "learning_rate": 1.873622497766213e-05, + "loss": 0.2063, + "step": 9070 + }, + { + "epoch": 0.42, + "learning_rate": 1.873544119260734e-05, + "loss": 0.2334, + "step": 9075 + }, + { + "epoch": 0.42, + "learning_rate": 1.8734657407552553e-05, + "loss": 0.2478, + "step": 9080 + }, + { + "epoch": 0.42, + "learning_rate": 1.8733873622497767e-05, + "loss": 0.3684, + "step": 9085 + }, + { + "epoch": 0.42, + "learning_rate": 1.873308983744298e-05, + "loss": 0.292, + "step": 9090 + }, + { + "epoch": 0.42, + "learning_rate": 1.8732306052388195e-05, + "loss": 0.2457, + "step": 9095 + }, + { + "epoch": 0.42, + "learning_rate": 1.873152226733341e-05, + "loss": 0.5138, + "step": 9100 + }, + { + "epoch": 0.42, + "learning_rate": 1.873073848227862e-05, + "loss": 0.3402, + "step": 9105 + }, + { + "epoch": 0.43, + "learning_rate": 1.8729954697223837e-05, + "loss": 0.1397, + "step": 9110 + }, + { + "epoch": 0.43, + "learning_rate": 1.8729170912169047e-05, + "loss": 0.1392, + "step": 9115 + }, + { + "epoch": 0.43, + "learning_rate": 1.872838712711426e-05, + "loss": 0.109, + "step": 9120 + }, + { + "epoch": 0.43, + "learning_rate": 1.8727603342059475e-05, + "loss": 0.2441, + "step": 9125 + }, + { + "epoch": 0.43, + "learning_rate": 1.872681955700469e-05, + "loss": 0.2452, + "step": 9130 + }, + { + "epoch": 0.43, + "learning_rate": 1.8726035771949903e-05, + "loss": 0.2466, + "step": 9135 + }, + { + "epoch": 0.43, + "learning_rate": 1.8725251986895113e-05, + "loss": 0.3464, + "step": 9140 + }, + { + "epoch": 0.43, + "learning_rate": 1.872446820184033e-05, + "loss": 0.4602, + "step": 9145 + }, + { + "epoch": 0.43, + "learning_rate": 1.872368441678554e-05, + "loss": 0.6565, + "step": 9150 + }, + { + "epoch": 0.43, + "learning_rate": 1.8722900631730755e-05, + "loss": 0.3206, + "step": 9155 + }, + { + "epoch": 0.43, + "learning_rate": 1.872211684667597e-05, + "loss": 0.0716, + "step": 9160 + }, + { + "epoch": 0.43, + "learning_rate": 1.8721333061621183e-05, + "loss": 0.1231, + "step": 9165 + }, + { + "epoch": 0.43, + "learning_rate": 1.8720549276566397e-05, + "loss": 0.2065, + "step": 9170 + }, + { + "epoch": 0.43, + "learning_rate": 1.871976549151161e-05, + "loss": 0.1649, + "step": 9175 + }, + { + "epoch": 0.43, + "learning_rate": 1.871898170645682e-05, + "loss": 0.1393, + "step": 9180 + }, + { + "epoch": 0.43, + "learning_rate": 1.871819792140204e-05, + "loss": 0.2792, + "step": 9185 + }, + { + "epoch": 0.43, + "learning_rate": 1.871741413634725e-05, + "loss": 0.3291, + "step": 9190 + }, + { + "epoch": 0.43, + "learning_rate": 1.8716630351292463e-05, + "loss": 0.294, + "step": 9195 + }, + { + "epoch": 0.43, + "learning_rate": 1.8715846566237677e-05, + "loss": 0.6407, + "step": 9200 + }, + { + "epoch": 0.43, + "learning_rate": 1.8715062781182887e-05, + "loss": 0.2999, + "step": 9205 + }, + { + "epoch": 0.43, + "learning_rate": 1.8714278996128105e-05, + "loss": 0.142, + "step": 9210 + }, + { + "epoch": 0.43, + "learning_rate": 1.8713495211073315e-05, + "loss": 0.2173, + "step": 9215 + }, + { + "epoch": 0.43, + "learning_rate": 1.871271142601853e-05, + "loss": 0.2285, + "step": 9220 + }, + { + "epoch": 0.43, + "learning_rate": 1.8711927640963743e-05, + "loss": 0.1986, + "step": 9225 + }, + { + "epoch": 0.43, + "learning_rate": 1.8711143855908957e-05, + "loss": 0.2207, + "step": 9230 + }, + { + "epoch": 0.43, + "learning_rate": 1.871036007085417e-05, + "loss": 0.2179, + "step": 9235 + }, + { + "epoch": 0.43, + "learning_rate": 1.8709576285799385e-05, + "loss": 0.2123, + "step": 9240 + }, + { + "epoch": 0.43, + "learning_rate": 1.87087925007446e-05, + "loss": 0.2957, + "step": 9245 + }, + { + "epoch": 0.43, + "learning_rate": 1.8708008715689812e-05, + "loss": 0.6045, + "step": 9250 + }, + { + "epoch": 0.43, + "learning_rate": 1.8707224930635023e-05, + "loss": 0.2722, + "step": 9255 + }, + { + "epoch": 0.43, + "learning_rate": 1.870644114558024e-05, + "loss": 0.2209, + "step": 9260 + }, + { + "epoch": 0.43, + "learning_rate": 1.870565736052545e-05, + "loss": 0.1307, + "step": 9265 + }, + { + "epoch": 0.43, + "learning_rate": 1.8704873575470665e-05, + "loss": 0.152, + "step": 9270 + }, + { + "epoch": 0.43, + "learning_rate": 1.870408979041588e-05, + "loss": 0.1983, + "step": 9275 + }, + { + "epoch": 0.43, + "learning_rate": 1.870330600536109e-05, + "loss": 0.2012, + "step": 9280 + }, + { + "epoch": 0.43, + "learning_rate": 1.8702522220306306e-05, + "loss": 0.6512, + "step": 9285 + }, + { + "epoch": 0.43, + "learning_rate": 1.8701738435251517e-05, + "loss": 0.3317, + "step": 9290 + }, + { + "epoch": 0.43, + "learning_rate": 1.870095465019673e-05, + "loss": 0.4515, + "step": 9295 + }, + { + "epoch": 0.43, + "learning_rate": 1.8700170865141945e-05, + "loss": 0.7104, + "step": 9300 + }, + { + "epoch": 0.43, + "learning_rate": 1.869938708008716e-05, + "loss": 0.2689, + "step": 9305 + }, + { + "epoch": 0.43, + "learning_rate": 1.8698603295032373e-05, + "loss": 0.1028, + "step": 9310 + }, + { + "epoch": 0.43, + "learning_rate": 1.8697819509977586e-05, + "loss": 0.1626, + "step": 9315 + }, + { + "epoch": 0.43, + "learning_rate": 1.8697035724922797e-05, + "loss": 0.2462, + "step": 9320 + }, + { + "epoch": 0.44, + "learning_rate": 1.8696251939868014e-05, + "loss": 0.1763, + "step": 9325 + }, + { + "epoch": 0.44, + "learning_rate": 1.8695468154813225e-05, + "loss": 0.3345, + "step": 9330 + }, + { + "epoch": 0.44, + "learning_rate": 1.869468436975844e-05, + "loss": 0.2613, + "step": 9335 + }, + { + "epoch": 0.44, + "learning_rate": 1.8693900584703653e-05, + "loss": 0.3072, + "step": 9340 + }, + { + "epoch": 0.44, + "learning_rate": 1.8693116799648866e-05, + "loss": 0.3295, + "step": 9345 + }, + { + "epoch": 0.44, + "learning_rate": 1.869233301459408e-05, + "loss": 0.6022, + "step": 9350 + }, + { + "epoch": 0.44, + "learning_rate": 1.869154922953929e-05, + "loss": 0.3035, + "step": 9355 + }, + { + "epoch": 0.44, + "learning_rate": 1.8690765444484508e-05, + "loss": 0.2063, + "step": 9360 + }, + { + "epoch": 0.44, + "learning_rate": 1.868998165942972e-05, + "loss": 0.2401, + "step": 9365 + }, + { + "epoch": 0.44, + "learning_rate": 1.8689197874374933e-05, + "loss": 0.1846, + "step": 9370 + }, + { + "epoch": 0.44, + "learning_rate": 1.8688414089320147e-05, + "loss": 0.1649, + "step": 9375 + }, + { + "epoch": 0.44, + "learning_rate": 1.868763030426536e-05, + "loss": 0.1734, + "step": 9380 + }, + { + "epoch": 0.44, + "learning_rate": 1.8686846519210574e-05, + "loss": 0.2245, + "step": 9385 + }, + { + "epoch": 0.44, + "learning_rate": 1.8686062734155788e-05, + "loss": 0.2765, + "step": 9390 + }, + { + "epoch": 0.44, + "learning_rate": 1.8685278949101e-05, + "loss": 0.3261, + "step": 9395 + }, + { + "epoch": 0.44, + "learning_rate": 1.8684495164046213e-05, + "loss": 0.5533, + "step": 9400 + }, + { + "epoch": 0.44, + "learning_rate": 1.8683711378991427e-05, + "loss": 0.2859, + "step": 9405 + }, + { + "epoch": 0.44, + "learning_rate": 1.868292759393664e-05, + "loss": 0.0899, + "step": 9410 + }, + { + "epoch": 0.44, + "learning_rate": 1.8682143808881854e-05, + "loss": 0.1552, + "step": 9415 + }, + { + "epoch": 0.44, + "learning_rate": 1.8681360023827065e-05, + "loss": 0.1668, + "step": 9420 + }, + { + "epoch": 0.44, + "learning_rate": 1.8680576238772282e-05, + "loss": 0.2532, + "step": 9425 + }, + { + "epoch": 0.44, + "learning_rate": 1.8679792453717493e-05, + "loss": 0.254, + "step": 9430 + }, + { + "epoch": 0.44, + "learning_rate": 1.8679008668662707e-05, + "loss": 0.2807, + "step": 9435 + }, + { + "epoch": 0.44, + "learning_rate": 1.867822488360792e-05, + "loss": 0.506, + "step": 9440 + }, + { + "epoch": 0.44, + "learning_rate": 1.8677441098553134e-05, + "loss": 0.3954, + "step": 9445 + }, + { + "epoch": 0.44, + "learning_rate": 1.867665731349835e-05, + "loss": 0.5384, + "step": 9450 + }, + { + "epoch": 0.44, + "learning_rate": 1.8675873528443562e-05, + "loss": 0.1873, + "step": 9455 + }, + { + "epoch": 0.44, + "learning_rate": 1.8675089743388776e-05, + "loss": 0.1488, + "step": 9460 + }, + { + "epoch": 0.44, + "learning_rate": 1.8674305958333987e-05, + "loss": 0.1511, + "step": 9465 + }, + { + "epoch": 0.44, + "learning_rate": 1.86735221732792e-05, + "loss": 0.17, + "step": 9470 + }, + { + "epoch": 0.44, + "learning_rate": 1.8672738388224414e-05, + "loss": 0.1869, + "step": 9475 + }, + { + "epoch": 0.44, + "learning_rate": 1.867195460316963e-05, + "loss": 0.2043, + "step": 9480 + }, + { + "epoch": 0.44, + "learning_rate": 1.8671170818114842e-05, + "loss": 0.3333, + "step": 9485 + }, + { + "epoch": 0.44, + "learning_rate": 1.8670387033060056e-05, + "loss": 0.3531, + "step": 9490 + }, + { + "epoch": 0.44, + "learning_rate": 1.8669603248005267e-05, + "loss": 0.5792, + "step": 9495 + }, + { + "epoch": 0.44, + "learning_rate": 1.8668819462950484e-05, + "loss": 0.6072, + "step": 9500 + }, + { + "epoch": 0.44, + "learning_rate": 1.8668035677895695e-05, + "loss": 0.3354, + "step": 9505 + }, + { + "epoch": 0.44, + "learning_rate": 1.866725189284091e-05, + "loss": 0.081, + "step": 9510 + }, + { + "epoch": 0.44, + "learning_rate": 1.8666468107786122e-05, + "loss": 0.1598, + "step": 9515 + }, + { + "epoch": 0.44, + "learning_rate": 1.8665684322731336e-05, + "loss": 0.1325, + "step": 9520 + }, + { + "epoch": 0.44, + "learning_rate": 1.866490053767655e-05, + "loss": 0.243, + "step": 9525 + }, + { + "epoch": 0.44, + "learning_rate": 1.866411675262176e-05, + "loss": 0.256, + "step": 9530 + }, + { + "epoch": 0.44, + "learning_rate": 1.8663332967566975e-05, + "loss": 0.2464, + "step": 9535 + }, + { + "epoch": 0.45, + "learning_rate": 1.866254918251219e-05, + "loss": 0.3764, + "step": 9540 + }, + { + "epoch": 0.45, + "learning_rate": 1.8661765397457402e-05, + "loss": 0.3959, + "step": 9545 + }, + { + "epoch": 0.45, + "learning_rate": 1.8660981612402616e-05, + "loss": 0.8095, + "step": 9550 + }, + { + "epoch": 0.45, + "learning_rate": 1.866019782734783e-05, + "loss": 0.2742, + "step": 9555 + }, + { + "epoch": 0.45, + "learning_rate": 1.8659414042293044e-05, + "loss": 0.1321, + "step": 9560 + }, + { + "epoch": 0.45, + "learning_rate": 1.8658630257238258e-05, + "loss": 0.1742, + "step": 9565 + }, + { + "epoch": 0.45, + "learning_rate": 1.865784647218347e-05, + "loss": 0.1818, + "step": 9570 + }, + { + "epoch": 0.45, + "learning_rate": 1.8657062687128686e-05, + "loss": 0.1883, + "step": 9575 + }, + { + "epoch": 0.45, + "learning_rate": 1.8656278902073896e-05, + "loss": 0.2709, + "step": 9580 + }, + { + "epoch": 0.45, + "learning_rate": 1.865549511701911e-05, + "loss": 0.2502, + "step": 9585 + }, + { + "epoch": 0.45, + "learning_rate": 1.8654711331964324e-05, + "loss": 0.3082, + "step": 9590 + }, + { + "epoch": 0.45, + "learning_rate": 1.8653927546909535e-05, + "loss": 0.3196, + "step": 9595 + }, + { + "epoch": 0.45, + "learning_rate": 1.8653143761854752e-05, + "loss": 0.8118, + "step": 9600 + }, + { + "epoch": 0.45, + "learning_rate": 1.8652359976799962e-05, + "loss": 0.2884, + "step": 9605 + }, + { + "epoch": 0.45, + "learning_rate": 1.8651576191745176e-05, + "loss": 0.1232, + "step": 9610 + }, + { + "epoch": 0.45, + "learning_rate": 1.865079240669039e-05, + "loss": 0.1453, + "step": 9615 + }, + { + "epoch": 0.45, + "learning_rate": 1.8650008621635604e-05, + "loss": 0.1877, + "step": 9620 + }, + { + "epoch": 0.45, + "learning_rate": 1.8649224836580818e-05, + "loss": 0.2546, + "step": 9625 + }, + { + "epoch": 0.45, + "learning_rate": 1.8648441051526032e-05, + "loss": 0.1846, + "step": 9630 + }, + { + "epoch": 0.45, + "learning_rate": 1.8647657266471243e-05, + "loss": 0.3649, + "step": 9635 + }, + { + "epoch": 0.45, + "learning_rate": 1.864687348141646e-05, + "loss": 0.3054, + "step": 9640 + }, + { + "epoch": 0.45, + "learning_rate": 1.864608969636167e-05, + "loss": 0.2868, + "step": 9645 + }, + { + "epoch": 0.45, + "learning_rate": 1.8645305911306884e-05, + "loss": 0.6246, + "step": 9650 + }, + { + "epoch": 0.45, + "learning_rate": 1.8644522126252098e-05, + "loss": 0.2241, + "step": 9655 + }, + { + "epoch": 0.45, + "learning_rate": 1.8643738341197312e-05, + "loss": 0.1827, + "step": 9660 + }, + { + "epoch": 0.45, + "learning_rate": 1.8642954556142526e-05, + "loss": 0.1333, + "step": 9665 + }, + { + "epoch": 0.45, + "learning_rate": 1.8642170771087736e-05, + "loss": 0.2136, + "step": 9670 + }, + { + "epoch": 0.45, + "learning_rate": 1.8641386986032954e-05, + "loss": 0.164, + "step": 9675 + }, + { + "epoch": 0.45, + "learning_rate": 1.8640603200978164e-05, + "loss": 0.2427, + "step": 9680 + }, + { + "epoch": 0.45, + "learning_rate": 1.8639819415923378e-05, + "loss": 0.2968, + "step": 9685 + }, + { + "epoch": 0.45, + "learning_rate": 1.8639035630868592e-05, + "loss": 0.3661, + "step": 9690 + }, + { + "epoch": 0.45, + "learning_rate": 1.8638251845813806e-05, + "loss": 0.3389, + "step": 9695 + }, + { + "epoch": 0.45, + "learning_rate": 1.863746806075902e-05, + "loss": 0.8643, + "step": 9700 + }, + { + "epoch": 0.45, + "learning_rate": 1.8636684275704234e-05, + "loss": 0.2849, + "step": 9705 + }, + { + "epoch": 0.45, + "learning_rate": 1.8635900490649444e-05, + "loss": 0.1267, + "step": 9710 + }, + { + "epoch": 0.45, + "learning_rate": 1.863511670559466e-05, + "loss": 0.1608, + "step": 9715 + }, + { + "epoch": 0.45, + "learning_rate": 1.8634332920539872e-05, + "loss": 0.1116, + "step": 9720 + }, + { + "epoch": 0.45, + "learning_rate": 1.8633549135485086e-05, + "loss": 0.1512, + "step": 9725 + }, + { + "epoch": 0.45, + "learning_rate": 1.86327653504303e-05, + "loss": 0.2167, + "step": 9730 + }, + { + "epoch": 0.45, + "learning_rate": 1.863198156537551e-05, + "loss": 0.2379, + "step": 9735 + }, + { + "epoch": 0.45, + "learning_rate": 1.8631197780320728e-05, + "loss": 0.3606, + "step": 9740 + }, + { + "epoch": 0.45, + "learning_rate": 1.8630413995265938e-05, + "loss": 0.3912, + "step": 9745 + }, + { + "epoch": 0.45, + "learning_rate": 1.8629630210211152e-05, + "loss": 0.5597, + "step": 9750 + }, + { + "epoch": 0.46, + "learning_rate": 1.8628846425156366e-05, + "loss": 0.3225, + "step": 9755 + }, + { + "epoch": 0.46, + "learning_rate": 1.862806264010158e-05, + "loss": 0.1031, + "step": 9760 + }, + { + "epoch": 0.46, + "learning_rate": 1.8627278855046794e-05, + "loss": 0.2045, + "step": 9765 + }, + { + "epoch": 0.46, + "learning_rate": 1.8626495069992008e-05, + "loss": 0.1208, + "step": 9770 + }, + { + "epoch": 0.46, + "learning_rate": 1.8625711284937222e-05, + "loss": 0.2459, + "step": 9775 + }, + { + "epoch": 0.46, + "learning_rate": 1.8624927499882436e-05, + "loss": 0.1681, + "step": 9780 + }, + { + "epoch": 0.46, + "learning_rate": 1.8624143714827646e-05, + "loss": 0.4399, + "step": 9785 + }, + { + "epoch": 0.46, + "learning_rate": 1.862335992977286e-05, + "loss": 0.2947, + "step": 9790 + }, + { + "epoch": 0.46, + "learning_rate": 1.8622576144718074e-05, + "loss": 0.4561, + "step": 9795 + }, + { + "epoch": 0.46, + "learning_rate": 1.8621792359663288e-05, + "loss": 0.6068, + "step": 9800 + }, + { + "epoch": 0.46, + "learning_rate": 1.8621008574608502e-05, + "loss": 0.2161, + "step": 9805 + }, + { + "epoch": 0.46, + "learning_rate": 1.8620224789553712e-05, + "loss": 0.1146, + "step": 9810 + }, + { + "epoch": 0.46, + "learning_rate": 1.861944100449893e-05, + "loss": 0.1992, + "step": 9815 + }, + { + "epoch": 0.46, + "learning_rate": 1.861865721944414e-05, + "loss": 0.2146, + "step": 9820 + }, + { + "epoch": 0.46, + "learning_rate": 1.8617873434389354e-05, + "loss": 0.1645, + "step": 9825 + }, + { + "epoch": 0.46, + "learning_rate": 1.8617089649334568e-05, + "loss": 0.2525, + "step": 9830 + }, + { + "epoch": 0.46, + "learning_rate": 1.8616305864279782e-05, + "loss": 0.2867, + "step": 9835 + }, + { + "epoch": 0.46, + "learning_rate": 1.8615522079224996e-05, + "loss": 0.3965, + "step": 9840 + }, + { + "epoch": 0.46, + "learning_rate": 1.861473829417021e-05, + "loss": 0.316, + "step": 9845 + }, + { + "epoch": 0.46, + "learning_rate": 1.861395450911542e-05, + "loss": 0.4794, + "step": 9850 + }, + { + "epoch": 0.46, + "learning_rate": 1.8613170724060634e-05, + "loss": 0.2089, + "step": 9855 + }, + { + "epoch": 0.46, + "learning_rate": 1.8612386939005848e-05, + "loss": 0.0973, + "step": 9860 + }, + { + "epoch": 0.46, + "learning_rate": 1.8611603153951062e-05, + "loss": 0.1503, + "step": 9865 + }, + { + "epoch": 0.46, + "learning_rate": 1.8610819368896276e-05, + "loss": 0.1923, + "step": 9870 + }, + { + "epoch": 0.46, + "learning_rate": 1.861003558384149e-05, + "loss": 0.2079, + "step": 9875 + }, + { + "epoch": 0.46, + "learning_rate": 1.8609251798786704e-05, + "loss": 0.2932, + "step": 9880 + }, + { + "epoch": 0.46, + "learning_rate": 1.8608468013731914e-05, + "loss": 0.2118, + "step": 9885 + }, + { + "epoch": 0.46, + "learning_rate": 1.860768422867713e-05, + "loss": 0.4529, + "step": 9890 + }, + { + "epoch": 0.46, + "learning_rate": 1.8606900443622342e-05, + "loss": 0.4992, + "step": 9895 + }, + { + "epoch": 0.46, + "learning_rate": 1.8606116658567556e-05, + "loss": 0.7247, + "step": 9900 + }, + { + "epoch": 0.46, + "learning_rate": 1.860533287351277e-05, + "loss": 0.2615, + "step": 9905 + }, + { + "epoch": 0.46, + "learning_rate": 1.8604549088457984e-05, + "loss": 0.1337, + "step": 9910 + }, + { + "epoch": 0.46, + "learning_rate": 1.8603765303403198e-05, + "loss": 0.1473, + "step": 9915 + }, + { + "epoch": 0.46, + "learning_rate": 1.8602981518348408e-05, + "loss": 0.2391, + "step": 9920 + }, + { + "epoch": 0.46, + "learning_rate": 1.8602197733293622e-05, + "loss": 0.2209, + "step": 9925 + }, + { + "epoch": 0.46, + "learning_rate": 1.8601413948238836e-05, + "loss": 0.2441, + "step": 9930 + }, + { + "epoch": 0.46, + "learning_rate": 1.860063016318405e-05, + "loss": 0.2697, + "step": 9935 + }, + { + "epoch": 0.46, + "learning_rate": 1.8599846378129264e-05, + "loss": 0.3542, + "step": 9940 + }, + { + "epoch": 0.46, + "learning_rate": 1.8599062593074478e-05, + "loss": 0.3546, + "step": 9945 + }, + { + "epoch": 0.46, + "learning_rate": 1.8598278808019688e-05, + "loss": 0.8018, + "step": 9950 + }, + { + "epoch": 0.46, + "learning_rate": 1.8597495022964905e-05, + "loss": 0.2301, + "step": 9955 + }, + { + "epoch": 0.46, + "learning_rate": 1.8596711237910116e-05, + "loss": 0.0913, + "step": 9960 + }, + { + "epoch": 0.46, + "learning_rate": 1.859592745285533e-05, + "loss": 0.1875, + "step": 9965 + }, + { + "epoch": 0.47, + "learning_rate": 1.8595143667800544e-05, + "loss": 0.1521, + "step": 9970 + }, + { + "epoch": 0.47, + "learning_rate": 1.8594359882745758e-05, + "loss": 0.1545, + "step": 9975 + }, + { + "epoch": 0.47, + "learning_rate": 1.859357609769097e-05, + "loss": 0.2105, + "step": 9980 + }, + { + "epoch": 0.47, + "learning_rate": 1.8592792312636182e-05, + "loss": 0.2772, + "step": 9985 + }, + { + "epoch": 0.47, + "learning_rate": 1.85920085275814e-05, + "loss": 0.384, + "step": 9990 + }, + { + "epoch": 0.47, + "learning_rate": 1.859122474252661e-05, + "loss": 0.294, + "step": 9995 + }, + { + "epoch": 0.47, + "learning_rate": 1.8590440957471824e-05, + "loss": 0.5701, + "step": 10000 + }, + { + "epoch": 0.47, + "learning_rate": 1.8589657172417038e-05, + "loss": 0.3018, + "step": 10005 + }, + { + "epoch": 0.47, + "learning_rate": 1.858887338736225e-05, + "loss": 0.146, + "step": 10010 + }, + { + "epoch": 0.47, + "learning_rate": 1.8588089602307465e-05, + "loss": 0.2157, + "step": 10015 + }, + { + "epoch": 0.47, + "learning_rate": 1.858730581725268e-05, + "loss": 0.1809, + "step": 10020 + }, + { + "epoch": 0.47, + "learning_rate": 1.858652203219789e-05, + "loss": 0.1605, + "step": 10025 + }, + { + "epoch": 0.47, + "learning_rate": 1.8585738247143107e-05, + "loss": 0.1612, + "step": 10030 + }, + { + "epoch": 0.47, + "learning_rate": 1.8584954462088318e-05, + "loss": 0.1955, + "step": 10035 + }, + { + "epoch": 0.47, + "learning_rate": 1.858417067703353e-05, + "loss": 0.2136, + "step": 10040 + }, + { + "epoch": 0.47, + "learning_rate": 1.8583386891978746e-05, + "loss": 0.4425, + "step": 10045 + }, + { + "epoch": 0.47, + "learning_rate": 1.8582603106923956e-05, + "loss": 0.5771, + "step": 10050 + }, + { + "epoch": 0.47, + "learning_rate": 1.8581819321869173e-05, + "loss": 0.3124, + "step": 10055 + }, + { + "epoch": 0.47, + "learning_rate": 1.8581035536814384e-05, + "loss": 0.1658, + "step": 10060 + }, + { + "epoch": 0.47, + "learning_rate": 1.8580251751759598e-05, + "loss": 0.2146, + "step": 10065 + }, + { + "epoch": 0.47, + "learning_rate": 1.857946796670481e-05, + "loss": 0.1755, + "step": 10070 + }, + { + "epoch": 0.47, + "learning_rate": 1.8578684181650026e-05, + "loss": 0.2154, + "step": 10075 + }, + { + "epoch": 0.47, + "learning_rate": 1.857790039659524e-05, + "loss": 0.1933, + "step": 10080 + }, + { + "epoch": 0.47, + "learning_rate": 1.8577116611540453e-05, + "loss": 0.1655, + "step": 10085 + }, + { + "epoch": 0.47, + "learning_rate": 1.8576332826485667e-05, + "loss": 0.1967, + "step": 10090 + }, + { + "epoch": 0.47, + "learning_rate": 1.857554904143088e-05, + "loss": 0.4955, + "step": 10095 + }, + { + "epoch": 0.47, + "learning_rate": 1.8574765256376092e-05, + "loss": 0.5188, + "step": 10100 + }, + { + "epoch": 0.47, + "learning_rate": 1.857398147132131e-05, + "loss": 0.2023, + "step": 10105 + }, + { + "epoch": 0.47, + "learning_rate": 1.857319768626652e-05, + "loss": 0.1892, + "step": 10110 + }, + { + "epoch": 0.47, + "learning_rate": 1.8572413901211733e-05, + "loss": 0.1397, + "step": 10115 + }, + { + "epoch": 0.47, + "learning_rate": 1.8571630116156947e-05, + "loss": 0.1333, + "step": 10120 + }, + { + "epoch": 0.47, + "learning_rate": 1.8570846331102158e-05, + "loss": 0.2091, + "step": 10125 + }, + { + "epoch": 0.47, + "learning_rate": 1.8570062546047375e-05, + "loss": 0.1695, + "step": 10130 + }, + { + "epoch": 0.47, + "learning_rate": 1.8569278760992586e-05, + "loss": 0.3251, + "step": 10135 + }, + { + "epoch": 0.47, + "learning_rate": 1.85684949759378e-05, + "loss": 0.3039, + "step": 10140 + }, + { + "epoch": 0.47, + "learning_rate": 1.8567711190883013e-05, + "loss": 0.4027, + "step": 10145 + }, + { + "epoch": 0.47, + "learning_rate": 1.8566927405828227e-05, + "loss": 0.461, + "step": 10150 + }, + { + "epoch": 0.47, + "learning_rate": 1.856614362077344e-05, + "loss": 0.2918, + "step": 10155 + }, + { + "epoch": 0.47, + "learning_rate": 1.8565359835718655e-05, + "loss": 0.1856, + "step": 10160 + }, + { + "epoch": 0.47, + "learning_rate": 1.8564576050663866e-05, + "loss": 0.1539, + "step": 10165 + }, + { + "epoch": 0.47, + "learning_rate": 1.8563792265609083e-05, + "loss": 0.1487, + "step": 10170 + }, + { + "epoch": 0.47, + "learning_rate": 1.8563008480554294e-05, + "loss": 0.1158, + "step": 10175 + }, + { + "epoch": 0.48, + "learning_rate": 1.8562224695499507e-05, + "loss": 0.216, + "step": 10180 + }, + { + "epoch": 0.48, + "learning_rate": 1.856144091044472e-05, + "loss": 0.3013, + "step": 10185 + }, + { + "epoch": 0.48, + "learning_rate": 1.8560657125389935e-05, + "loss": 0.4077, + "step": 10190 + }, + { + "epoch": 0.48, + "learning_rate": 1.855987334033515e-05, + "loss": 0.3365, + "step": 10195 + }, + { + "epoch": 0.48, + "learning_rate": 1.855908955528036e-05, + "loss": 0.4686, + "step": 10200 + }, + { + "epoch": 0.48, + "learning_rate": 1.8558305770225577e-05, + "loss": 0.2812, + "step": 10205 + }, + { + "epoch": 0.48, + "learning_rate": 1.8557521985170787e-05, + "loss": 0.133, + "step": 10210 + }, + { + "epoch": 0.48, + "learning_rate": 1.8556738200116e-05, + "loss": 0.1349, + "step": 10215 + }, + { + "epoch": 0.48, + "learning_rate": 1.8555954415061215e-05, + "loss": 0.1394, + "step": 10220 + }, + { + "epoch": 0.48, + "learning_rate": 1.855517063000643e-05, + "loss": 0.2261, + "step": 10225 + }, + { + "epoch": 0.48, + "learning_rate": 1.8554386844951643e-05, + "loss": 0.1947, + "step": 10230 + }, + { + "epoch": 0.48, + "learning_rate": 1.8553603059896857e-05, + "loss": 0.2436, + "step": 10235 + }, + { + "epoch": 0.48, + "learning_rate": 1.8552819274842068e-05, + "loss": 0.2637, + "step": 10240 + }, + { + "epoch": 0.48, + "learning_rate": 1.855203548978728e-05, + "loss": 0.4449, + "step": 10245 + }, + { + "epoch": 0.48, + "learning_rate": 1.8551251704732495e-05, + "loss": 0.7073, + "step": 10250 + }, + { + "epoch": 0.48, + "learning_rate": 1.855046791967771e-05, + "loss": 0.2425, + "step": 10255 + }, + { + "epoch": 0.48, + "learning_rate": 1.8549684134622923e-05, + "loss": 0.1223, + "step": 10260 + }, + { + "epoch": 0.48, + "learning_rate": 1.8548900349568134e-05, + "loss": 0.1035, + "step": 10265 + }, + { + "epoch": 0.48, + "learning_rate": 1.854811656451335e-05, + "loss": 0.1336, + "step": 10270 + }, + { + "epoch": 0.48, + "learning_rate": 1.854733277945856e-05, + "loss": 0.224, + "step": 10275 + }, + { + "epoch": 0.48, + "learning_rate": 1.8546548994403775e-05, + "loss": 0.2017, + "step": 10280 + }, + { + "epoch": 0.48, + "learning_rate": 1.854576520934899e-05, + "loss": 0.3207, + "step": 10285 + }, + { + "epoch": 0.48, + "learning_rate": 1.8544981424294203e-05, + "loss": 0.3581, + "step": 10290 + }, + { + "epoch": 0.48, + "learning_rate": 1.8544197639239417e-05, + "loss": 0.299, + "step": 10295 + }, + { + "epoch": 0.48, + "learning_rate": 1.854341385418463e-05, + "loss": 0.4774, + "step": 10300 + }, + { + "epoch": 0.48, + "learning_rate": 1.8542630069129845e-05, + "loss": 0.2978, + "step": 10305 + }, + { + "epoch": 0.48, + "learning_rate": 1.8541846284075055e-05, + "loss": 0.1423, + "step": 10310 + }, + { + "epoch": 0.48, + "learning_rate": 1.854106249902027e-05, + "loss": 0.1697, + "step": 10315 + }, + { + "epoch": 0.48, + "learning_rate": 1.8540278713965483e-05, + "loss": 0.1412, + "step": 10320 + }, + { + "epoch": 0.48, + "learning_rate": 1.8539494928910697e-05, + "loss": 0.1674, + "step": 10325 + }, + { + "epoch": 0.48, + "learning_rate": 1.853871114385591e-05, + "loss": 0.2561, + "step": 10330 + }, + { + "epoch": 0.48, + "learning_rate": 1.8537927358801125e-05, + "loss": 0.3232, + "step": 10335 + }, + { + "epoch": 0.48, + "learning_rate": 1.8537143573746335e-05, + "loss": 0.2814, + "step": 10340 + }, + { + "epoch": 0.48, + "learning_rate": 1.8536359788691553e-05, + "loss": 0.4515, + "step": 10345 + }, + { + "epoch": 0.48, + "learning_rate": 1.8535576003636763e-05, + "loss": 0.6846, + "step": 10350 + }, + { + "epoch": 0.48, + "learning_rate": 1.8534792218581977e-05, + "loss": 0.3577, + "step": 10355 + }, + { + "epoch": 0.48, + "learning_rate": 1.853400843352719e-05, + "loss": 0.076, + "step": 10360 + }, + { + "epoch": 0.48, + "learning_rate": 1.8533224648472405e-05, + "loss": 0.2025, + "step": 10365 + }, + { + "epoch": 0.48, + "learning_rate": 1.853244086341762e-05, + "loss": 0.1471, + "step": 10370 + }, + { + "epoch": 0.48, + "learning_rate": 1.853165707836283e-05, + "loss": 0.1286, + "step": 10375 + }, + { + "epoch": 0.48, + "learning_rate": 1.8530873293308043e-05, + "loss": 0.2737, + "step": 10380 + }, + { + "epoch": 0.48, + "learning_rate": 1.8530089508253257e-05, + "loss": 0.2873, + "step": 10385 + }, + { + "epoch": 0.48, + "learning_rate": 1.852930572319847e-05, + "loss": 0.3913, + "step": 10390 + }, + { + "epoch": 0.49, + "learning_rate": 1.8528521938143685e-05, + "loss": 0.4033, + "step": 10395 + }, + { + "epoch": 0.49, + "learning_rate": 1.85277381530889e-05, + "loss": 0.5046, + "step": 10400 + }, + { + "epoch": 0.49, + "learning_rate": 1.8526954368034113e-05, + "loss": 0.2842, + "step": 10405 + }, + { + "epoch": 0.49, + "learning_rate": 1.8526170582979327e-05, + "loss": 0.1708, + "step": 10410 + }, + { + "epoch": 0.49, + "learning_rate": 1.8525386797924537e-05, + "loss": 0.1666, + "step": 10415 + }, + { + "epoch": 0.49, + "learning_rate": 1.8524603012869755e-05, + "loss": 0.1689, + "step": 10420 + }, + { + "epoch": 0.49, + "learning_rate": 1.8523819227814965e-05, + "loss": 0.2137, + "step": 10425 + }, + { + "epoch": 0.49, + "learning_rate": 1.852303544276018e-05, + "loss": 0.2885, + "step": 10430 + }, + { + "epoch": 0.49, + "learning_rate": 1.8522251657705393e-05, + "loss": 0.2818, + "step": 10435 + }, + { + "epoch": 0.49, + "learning_rate": 1.8521467872650603e-05, + "loss": 0.2431, + "step": 10440 + }, + { + "epoch": 0.49, + "learning_rate": 1.852068408759582e-05, + "loss": 0.3849, + "step": 10445 + }, + { + "epoch": 0.49, + "learning_rate": 1.851990030254103e-05, + "loss": 0.6032, + "step": 10450 + }, + { + "epoch": 0.49, + "learning_rate": 1.8519116517486245e-05, + "loss": 0.2557, + "step": 10455 + }, + { + "epoch": 0.49, + "learning_rate": 1.851833273243146e-05, + "loss": 0.1205, + "step": 10460 + }, + { + "epoch": 0.49, + "learning_rate": 1.8517548947376673e-05, + "loss": 0.1141, + "step": 10465 + }, + { + "epoch": 0.49, + "learning_rate": 1.8516765162321887e-05, + "loss": 0.1535, + "step": 10470 + }, + { + "epoch": 0.49, + "learning_rate": 1.85159813772671e-05, + "loss": 0.1693, + "step": 10475 + }, + { + "epoch": 0.49, + "learning_rate": 1.851519759221231e-05, + "loss": 0.2214, + "step": 10480 + }, + { + "epoch": 0.49, + "learning_rate": 1.851441380715753e-05, + "loss": 0.3034, + "step": 10485 + }, + { + "epoch": 0.49, + "learning_rate": 1.851363002210274e-05, + "loss": 0.2867, + "step": 10490 + }, + { + "epoch": 0.49, + "learning_rate": 1.8512846237047953e-05, + "loss": 0.4438, + "step": 10495 + }, + { + "epoch": 0.49, + "learning_rate": 1.8512062451993167e-05, + "loss": 0.614, + "step": 10500 + }, + { + "epoch": 0.49, + "learning_rate": 1.851127866693838e-05, + "loss": 0.2172, + "step": 10505 + }, + { + "epoch": 0.49, + "learning_rate": 1.8510494881883595e-05, + "loss": 0.0805, + "step": 10510 + }, + { + "epoch": 0.49, + "learning_rate": 1.8509711096828805e-05, + "loss": 0.1772, + "step": 10515 + }, + { + "epoch": 0.49, + "learning_rate": 1.8508927311774023e-05, + "loss": 0.1784, + "step": 10520 + }, + { + "epoch": 0.49, + "learning_rate": 1.8508143526719233e-05, + "loss": 0.2444, + "step": 10525 + }, + { + "epoch": 0.49, + "learning_rate": 1.8507359741664447e-05, + "loss": 0.1369, + "step": 10530 + }, + { + "epoch": 0.49, + "learning_rate": 1.850657595660966e-05, + "loss": 0.1954, + "step": 10535 + }, + { + "epoch": 0.49, + "learning_rate": 1.8505792171554875e-05, + "loss": 0.3048, + "step": 10540 + }, + { + "epoch": 0.49, + "learning_rate": 1.850500838650009e-05, + "loss": 0.3967, + "step": 10545 + }, + { + "epoch": 0.49, + "learning_rate": 1.8504224601445303e-05, + "loss": 0.476, + "step": 10550 + }, + { + "epoch": 0.49, + "learning_rate": 1.8503440816390513e-05, + "loss": 0.2726, + "step": 10555 + }, + { + "epoch": 0.49, + "learning_rate": 1.850265703133573e-05, + "loss": 0.0955, + "step": 10560 + }, + { + "epoch": 0.49, + "learning_rate": 1.850187324628094e-05, + "loss": 0.1345, + "step": 10565 + }, + { + "epoch": 0.49, + "learning_rate": 1.8501089461226155e-05, + "loss": 0.2196, + "step": 10570 + }, + { + "epoch": 0.49, + "learning_rate": 1.850030567617137e-05, + "loss": 0.1968, + "step": 10575 + }, + { + "epoch": 0.49, + "learning_rate": 1.849952189111658e-05, + "loss": 0.2663, + "step": 10580 + }, + { + "epoch": 0.49, + "learning_rate": 1.8498738106061797e-05, + "loss": 0.2857, + "step": 10585 + }, + { + "epoch": 0.49, + "learning_rate": 1.8497954321007007e-05, + "loss": 0.3226, + "step": 10590 + }, + { + "epoch": 0.49, + "learning_rate": 1.849717053595222e-05, + "loss": 0.3366, + "step": 10595 + }, + { + "epoch": 0.49, + "learning_rate": 1.8496386750897435e-05, + "loss": 0.4914, + "step": 10600 + }, + { + "epoch": 0.49, + "learning_rate": 1.849560296584265e-05, + "loss": 0.3555, + "step": 10605 + }, + { + "epoch": 0.5, + "learning_rate": 1.8494819180787863e-05, + "loss": 0.1054, + "step": 10610 + }, + { + "epoch": 0.5, + "learning_rate": 1.8494035395733077e-05, + "loss": 0.2007, + "step": 10615 + }, + { + "epoch": 0.5, + "learning_rate": 1.849325161067829e-05, + "loss": 0.2069, + "step": 10620 + }, + { + "epoch": 0.5, + "learning_rate": 1.8492467825623504e-05, + "loss": 0.2225, + "step": 10625 + }, + { + "epoch": 0.5, + "learning_rate": 1.8491684040568715e-05, + "loss": 0.2789, + "step": 10630 + }, + { + "epoch": 0.5, + "learning_rate": 1.849090025551393e-05, + "loss": 0.3114, + "step": 10635 + }, + { + "epoch": 0.5, + "learning_rate": 1.8490116470459143e-05, + "loss": 0.3273, + "step": 10640 + }, + { + "epoch": 0.5, + "learning_rate": 1.8489332685404357e-05, + "loss": 0.2557, + "step": 10645 + }, + { + "epoch": 0.5, + "learning_rate": 1.848854890034957e-05, + "loss": 0.5148, + "step": 10650 + }, + { + "epoch": 0.5, + "learning_rate": 1.848776511529478e-05, + "loss": 0.3063, + "step": 10655 + }, + { + "epoch": 0.5, + "learning_rate": 1.848698133024e-05, + "loss": 0.1092, + "step": 10660 + }, + { + "epoch": 0.5, + "learning_rate": 1.848619754518521e-05, + "loss": 0.1135, + "step": 10665 + }, + { + "epoch": 0.5, + "learning_rate": 1.8485413760130423e-05, + "loss": 0.1765, + "step": 10670 + }, + { + "epoch": 0.5, + "learning_rate": 1.8484629975075637e-05, + "loss": 0.1905, + "step": 10675 + }, + { + "epoch": 0.5, + "learning_rate": 1.848384619002085e-05, + "loss": 0.1588, + "step": 10680 + }, + { + "epoch": 0.5, + "learning_rate": 1.8483062404966064e-05, + "loss": 0.2983, + "step": 10685 + }, + { + "epoch": 0.5, + "learning_rate": 1.848227861991128e-05, + "loss": 0.2593, + "step": 10690 + }, + { + "epoch": 0.5, + "learning_rate": 1.848149483485649e-05, + "loss": 0.3945, + "step": 10695 + }, + { + "epoch": 0.5, + "learning_rate": 1.8480711049801703e-05, + "loss": 0.5359, + "step": 10700 + }, + { + "epoch": 0.5, + "learning_rate": 1.8479927264746917e-05, + "loss": 0.473, + "step": 10705 + }, + { + "epoch": 0.5, + "learning_rate": 1.847914347969213e-05, + "loss": 0.1142, + "step": 10710 + }, + { + "epoch": 0.5, + "learning_rate": 1.8478359694637345e-05, + "loss": 0.1146, + "step": 10715 + }, + { + "epoch": 0.5, + "learning_rate": 1.847757590958256e-05, + "loss": 0.2126, + "step": 10720 + }, + { + "epoch": 0.5, + "learning_rate": 1.8476792124527772e-05, + "loss": 0.1638, + "step": 10725 + }, + { + "epoch": 0.5, + "learning_rate": 1.8476008339472983e-05, + "loss": 0.2529, + "step": 10730 + }, + { + "epoch": 0.5, + "learning_rate": 1.84752245544182e-05, + "loss": 0.2843, + "step": 10735 + }, + { + "epoch": 0.5, + "learning_rate": 1.847444076936341e-05, + "loss": 0.3466, + "step": 10740 + }, + { + "epoch": 0.5, + "learning_rate": 1.8473656984308625e-05, + "loss": 0.3651, + "step": 10745 + }, + { + "epoch": 0.5, + "learning_rate": 1.847287319925384e-05, + "loss": 0.5283, + "step": 10750 + }, + { + "epoch": 0.5, + "learning_rate": 1.8472089414199052e-05, + "loss": 0.2919, + "step": 10755 + }, + { + "epoch": 0.5, + "learning_rate": 1.8471305629144266e-05, + "loss": 0.1133, + "step": 10760 + }, + { + "epoch": 0.5, + "learning_rate": 1.8470521844089477e-05, + "loss": 0.1283, + "step": 10765 + }, + { + "epoch": 0.5, + "learning_rate": 1.846973805903469e-05, + "loss": 0.1331, + "step": 10770 + }, + { + "epoch": 0.5, + "learning_rate": 1.8468954273979905e-05, + "loss": 0.2511, + "step": 10775 + }, + { + "epoch": 0.5, + "learning_rate": 1.846817048892512e-05, + "loss": 0.2337, + "step": 10780 + }, + { + "epoch": 0.5, + "learning_rate": 1.8467386703870332e-05, + "loss": 0.2217, + "step": 10785 + }, + { + "epoch": 0.5, + "learning_rate": 1.8466602918815546e-05, + "loss": 0.4054, + "step": 10790 + }, + { + "epoch": 0.5, + "learning_rate": 1.8465819133760757e-05, + "loss": 0.42, + "step": 10795 + }, + { + "epoch": 0.5, + "learning_rate": 1.8465035348705974e-05, + "loss": 0.4874, + "step": 10800 + }, + { + "epoch": 0.5, + "learning_rate": 1.8464251563651185e-05, + "loss": 0.2858, + "step": 10805 + }, + { + "epoch": 0.5, + "learning_rate": 1.84634677785964e-05, + "loss": 0.1269, + "step": 10810 + }, + { + "epoch": 0.5, + "learning_rate": 1.8462683993541612e-05, + "loss": 0.1334, + "step": 10815 + }, + { + "epoch": 0.5, + "learning_rate": 1.8461900208486826e-05, + "loss": 0.1715, + "step": 10820 + }, + { + "epoch": 0.51, + "learning_rate": 1.846111642343204e-05, + "loss": 0.1632, + "step": 10825 + }, + { + "epoch": 0.51, + "learning_rate": 1.846033263837725e-05, + "loss": 0.1676, + "step": 10830 + }, + { + "epoch": 0.51, + "learning_rate": 1.8459548853322468e-05, + "loss": 0.1878, + "step": 10835 + }, + { + "epoch": 0.51, + "learning_rate": 1.845876506826768e-05, + "loss": 0.2542, + "step": 10840 + }, + { + "epoch": 0.51, + "learning_rate": 1.8457981283212893e-05, + "loss": 0.3347, + "step": 10845 + }, + { + "epoch": 0.51, + "learning_rate": 1.8457197498158106e-05, + "loss": 0.5031, + "step": 10850 + }, + { + "epoch": 0.51, + "learning_rate": 1.845641371310332e-05, + "loss": 0.3013, + "step": 10855 + }, + { + "epoch": 0.51, + "learning_rate": 1.8455629928048534e-05, + "loss": 0.0807, + "step": 10860 + }, + { + "epoch": 0.51, + "learning_rate": 1.8454846142993748e-05, + "loss": 0.1866, + "step": 10865 + }, + { + "epoch": 0.51, + "learning_rate": 1.845406235793896e-05, + "loss": 0.128, + "step": 10870 + }, + { + "epoch": 0.51, + "learning_rate": 1.8453278572884176e-05, + "loss": 0.2177, + "step": 10875 + }, + { + "epoch": 0.51, + "learning_rate": 1.8452494787829386e-05, + "loss": 0.2811, + "step": 10880 + }, + { + "epoch": 0.51, + "learning_rate": 1.84517110027746e-05, + "loss": 0.3075, + "step": 10885 + }, + { + "epoch": 0.51, + "learning_rate": 1.8450927217719814e-05, + "loss": 0.3236, + "step": 10890 + }, + { + "epoch": 0.51, + "learning_rate": 1.8450143432665025e-05, + "loss": 0.2975, + "step": 10895 + }, + { + "epoch": 0.51, + "learning_rate": 1.8449359647610242e-05, + "loss": 0.7018, + "step": 10900 + }, + { + "epoch": 0.51, + "learning_rate": 1.8448575862555453e-05, + "loss": 0.3354, + "step": 10905 + }, + { + "epoch": 0.51, + "learning_rate": 1.8447792077500667e-05, + "loss": 0.1611, + "step": 10910 + }, + { + "epoch": 0.51, + "learning_rate": 1.844700829244588e-05, + "loss": 0.1056, + "step": 10915 + }, + { + "epoch": 0.51, + "learning_rate": 1.8446224507391094e-05, + "loss": 0.1905, + "step": 10920 + }, + { + "epoch": 0.51, + "learning_rate": 1.8445440722336308e-05, + "loss": 0.1444, + "step": 10925 + }, + { + "epoch": 0.51, + "learning_rate": 1.8444656937281522e-05, + "loss": 0.2417, + "step": 10930 + }, + { + "epoch": 0.51, + "learning_rate": 1.8443873152226736e-05, + "loss": 0.2322, + "step": 10935 + }, + { + "epoch": 0.51, + "learning_rate": 1.844308936717195e-05, + "loss": 0.2791, + "step": 10940 + }, + { + "epoch": 0.51, + "learning_rate": 1.844230558211716e-05, + "loss": 0.3903, + "step": 10945 + }, + { + "epoch": 0.51, + "learning_rate": 1.8441521797062378e-05, + "loss": 0.6466, + "step": 10950 + }, + { + "epoch": 0.51, + "learning_rate": 1.8440738012007588e-05, + "loss": 0.2265, + "step": 10955 + }, + { + "epoch": 0.51, + "learning_rate": 1.8439954226952802e-05, + "loss": 0.1035, + "step": 10960 + }, + { + "epoch": 0.51, + "learning_rate": 1.8439170441898016e-05, + "loss": 0.127, + "step": 10965 + }, + { + "epoch": 0.51, + "learning_rate": 1.8438386656843227e-05, + "loss": 0.1504, + "step": 10970 + }, + { + "epoch": 0.51, + "learning_rate": 1.8437602871788444e-05, + "loss": 0.3139, + "step": 10975 + }, + { + "epoch": 0.51, + "learning_rate": 1.8436819086733654e-05, + "loss": 0.2015, + "step": 10980 + }, + { + "epoch": 0.51, + "learning_rate": 1.843603530167887e-05, + "loss": 0.2831, + "step": 10985 + }, + { + "epoch": 0.51, + "learning_rate": 1.8435251516624082e-05, + "loss": 0.4307, + "step": 10990 + }, + { + "epoch": 0.51, + "learning_rate": 1.8434467731569296e-05, + "loss": 0.4474, + "step": 10995 + }, + { + "epoch": 0.51, + "learning_rate": 1.843368394651451e-05, + "loss": 0.4485, + "step": 11000 + }, + { + "epoch": 0.51, + "learning_rate": 1.8432900161459724e-05, + "loss": 0.3155, + "step": 11005 + }, + { + "epoch": 0.51, + "learning_rate": 1.8432116376404934e-05, + "loss": 0.1338, + "step": 11010 + }, + { + "epoch": 0.51, + "learning_rate": 1.8431332591350152e-05, + "loss": 0.1072, + "step": 11015 + }, + { + "epoch": 0.51, + "learning_rate": 1.8430548806295362e-05, + "loss": 0.1568, + "step": 11020 + }, + { + "epoch": 0.51, + "learning_rate": 1.8429765021240576e-05, + "loss": 0.209, + "step": 11025 + }, + { + "epoch": 0.51, + "learning_rate": 1.842898123618579e-05, + "loss": 0.2181, + "step": 11030 + }, + { + "epoch": 0.51, + "learning_rate": 1.8428197451131004e-05, + "loss": 0.1599, + "step": 11035 + }, + { + "epoch": 0.52, + "learning_rate": 1.8427413666076218e-05, + "loss": 0.2345, + "step": 11040 + }, + { + "epoch": 0.52, + "learning_rate": 1.842662988102143e-05, + "loss": 0.5213, + "step": 11045 + }, + { + "epoch": 0.52, + "learning_rate": 1.8425846095966646e-05, + "loss": 0.6337, + "step": 11050 + }, + { + "epoch": 0.52, + "learning_rate": 1.8425062310911856e-05, + "loss": 0.2929, + "step": 11055 + }, + { + "epoch": 0.52, + "learning_rate": 1.842427852585707e-05, + "loss": 0.0707, + "step": 11060 + }, + { + "epoch": 0.52, + "learning_rate": 1.8423494740802284e-05, + "loss": 0.1464, + "step": 11065 + }, + { + "epoch": 0.52, + "learning_rate": 1.8422710955747498e-05, + "loss": 0.1703, + "step": 11070 + }, + { + "epoch": 0.52, + "learning_rate": 1.8421927170692712e-05, + "loss": 0.2093, + "step": 11075 + }, + { + "epoch": 0.52, + "learning_rate": 1.8421143385637926e-05, + "loss": 0.1709, + "step": 11080 + }, + { + "epoch": 0.52, + "learning_rate": 1.8420359600583136e-05, + "loss": 0.2491, + "step": 11085 + }, + { + "epoch": 0.52, + "learning_rate": 1.841957581552835e-05, + "loss": 0.3152, + "step": 11090 + }, + { + "epoch": 0.52, + "learning_rate": 1.8418792030473564e-05, + "loss": 0.3752, + "step": 11095 + }, + { + "epoch": 0.52, + "learning_rate": 1.8418008245418778e-05, + "loss": 0.7978, + "step": 11100 + }, + { + "epoch": 0.52, + "learning_rate": 1.8417224460363992e-05, + "loss": 0.2618, + "step": 11105 + }, + { + "epoch": 0.52, + "learning_rate": 1.8416440675309202e-05, + "loss": 0.121, + "step": 11110 + }, + { + "epoch": 0.52, + "learning_rate": 1.841565689025442e-05, + "loss": 0.3329, + "step": 11115 + }, + { + "epoch": 0.52, + "learning_rate": 1.841487310519963e-05, + "loss": 0.1459, + "step": 11120 + }, + { + "epoch": 0.52, + "learning_rate": 1.8414089320144844e-05, + "loss": 0.1842, + "step": 11125 + }, + { + "epoch": 0.52, + "learning_rate": 1.8413305535090058e-05, + "loss": 0.2167, + "step": 11130 + }, + { + "epoch": 0.52, + "learning_rate": 1.8412521750035272e-05, + "loss": 0.2646, + "step": 11135 + }, + { + "epoch": 0.52, + "learning_rate": 1.8411737964980486e-05, + "loss": 0.3365, + "step": 11140 + }, + { + "epoch": 0.52, + "learning_rate": 1.84109541799257e-05, + "loss": 0.356, + "step": 11145 + }, + { + "epoch": 0.52, + "learning_rate": 1.8410170394870914e-05, + "loss": 0.7355, + "step": 11150 + }, + { + "epoch": 0.52, + "learning_rate": 1.8409386609816124e-05, + "loss": 0.2418, + "step": 11155 + }, + { + "epoch": 0.52, + "learning_rate": 1.8408602824761338e-05, + "loss": 0.091, + "step": 11160 + }, + { + "epoch": 0.52, + "learning_rate": 1.8407819039706552e-05, + "loss": 0.1088, + "step": 11165 + }, + { + "epoch": 0.52, + "learning_rate": 1.8407035254651766e-05, + "loss": 0.2117, + "step": 11170 + }, + { + "epoch": 0.52, + "learning_rate": 1.840625146959698e-05, + "loss": 0.1758, + "step": 11175 + }, + { + "epoch": 0.52, + "learning_rate": 1.8405467684542194e-05, + "loss": 0.2617, + "step": 11180 + }, + { + "epoch": 0.52, + "learning_rate": 1.8404683899487404e-05, + "loss": 0.1872, + "step": 11185 + }, + { + "epoch": 0.52, + "learning_rate": 1.840390011443262e-05, + "loss": 0.2953, + "step": 11190 + }, + { + "epoch": 0.52, + "learning_rate": 1.8403116329377832e-05, + "loss": 0.2921, + "step": 11195 + }, + { + "epoch": 0.52, + "learning_rate": 1.8402332544323046e-05, + "loss": 0.6133, + "step": 11200 + }, + { + "epoch": 0.52, + "learning_rate": 1.840154875926826e-05, + "loss": 0.2085, + "step": 11205 + }, + { + "epoch": 0.52, + "learning_rate": 1.8400764974213474e-05, + "loss": 0.1601, + "step": 11210 + }, + { + "epoch": 0.52, + "learning_rate": 1.8399981189158688e-05, + "loss": 0.1244, + "step": 11215 + }, + { + "epoch": 0.52, + "learning_rate": 1.8399197404103898e-05, + "loss": 0.1695, + "step": 11220 + }, + { + "epoch": 0.52, + "learning_rate": 1.8398413619049112e-05, + "loss": 0.1907, + "step": 11225 + }, + { + "epoch": 0.52, + "learning_rate": 1.8397629833994326e-05, + "loss": 0.2046, + "step": 11230 + }, + { + "epoch": 0.52, + "learning_rate": 1.839684604893954e-05, + "loss": 0.2313, + "step": 11235 + }, + { + "epoch": 0.52, + "learning_rate": 1.8396062263884754e-05, + "loss": 0.4132, + "step": 11240 + }, + { + "epoch": 0.52, + "learning_rate": 1.8395278478829968e-05, + "loss": 0.3662, + "step": 11245 + }, + { + "epoch": 0.52, + "learning_rate": 1.839449469377518e-05, + "loss": 0.6663, + "step": 11250 + }, + { + "epoch": 0.53, + "learning_rate": 1.8393710908720396e-05, + "loss": 0.2829, + "step": 11255 + }, + { + "epoch": 0.53, + "learning_rate": 1.8392927123665606e-05, + "loss": 0.0905, + "step": 11260 + }, + { + "epoch": 0.53, + "learning_rate": 1.8392143338610823e-05, + "loss": 0.1383, + "step": 11265 + }, + { + "epoch": 0.53, + "learning_rate": 1.8391359553556034e-05, + "loss": 0.1256, + "step": 11270 + }, + { + "epoch": 0.53, + "learning_rate": 1.8390575768501248e-05, + "loss": 0.1687, + "step": 11275 + }, + { + "epoch": 0.53, + "learning_rate": 1.838979198344646e-05, + "loss": 0.2381, + "step": 11280 + }, + { + "epoch": 0.53, + "learning_rate": 1.8389008198391672e-05, + "loss": 0.2776, + "step": 11285 + }, + { + "epoch": 0.53, + "learning_rate": 1.838822441333689e-05, + "loss": 0.314, + "step": 11290 + }, + { + "epoch": 0.53, + "learning_rate": 1.83874406282821e-05, + "loss": 0.3639, + "step": 11295 + }, + { + "epoch": 0.53, + "learning_rate": 1.8386656843227314e-05, + "loss": 0.5223, + "step": 11300 + }, + { + "epoch": 0.53, + "learning_rate": 1.8385873058172528e-05, + "loss": 0.1783, + "step": 11305 + }, + { + "epoch": 0.53, + "learning_rate": 1.8385089273117742e-05, + "loss": 0.0991, + "step": 11310 + }, + { + "epoch": 0.53, + "learning_rate": 1.8384305488062956e-05, + "loss": 0.2287, + "step": 11315 + }, + { + "epoch": 0.53, + "learning_rate": 1.838352170300817e-05, + "loss": 0.2312, + "step": 11320 + }, + { + "epoch": 0.53, + "learning_rate": 1.838273791795338e-05, + "loss": 0.2481, + "step": 11325 + }, + { + "epoch": 0.53, + "learning_rate": 1.8381954132898597e-05, + "loss": 0.1717, + "step": 11330 + }, + { + "epoch": 0.53, + "learning_rate": 1.8381170347843808e-05, + "loss": 0.2379, + "step": 11335 + }, + { + "epoch": 0.53, + "learning_rate": 1.8380386562789022e-05, + "loss": 0.3034, + "step": 11340 + }, + { + "epoch": 0.53, + "learning_rate": 1.8379602777734236e-05, + "loss": 0.3773, + "step": 11345 + }, + { + "epoch": 0.53, + "learning_rate": 1.837881899267945e-05, + "loss": 0.6601, + "step": 11350 + }, + { + "epoch": 0.53, + "learning_rate": 1.8378035207624663e-05, + "loss": 0.2288, + "step": 11355 + }, + { + "epoch": 0.53, + "learning_rate": 1.8377251422569874e-05, + "loss": 0.1123, + "step": 11360 + }, + { + "epoch": 0.53, + "learning_rate": 1.837646763751509e-05, + "loss": 0.158, + "step": 11365 + }, + { + "epoch": 0.53, + "learning_rate": 1.8375683852460302e-05, + "loss": 0.1854, + "step": 11370 + }, + { + "epoch": 0.53, + "learning_rate": 1.8374900067405516e-05, + "loss": 0.1726, + "step": 11375 + }, + { + "epoch": 0.53, + "learning_rate": 1.837411628235073e-05, + "loss": 0.2843, + "step": 11380 + }, + { + "epoch": 0.53, + "learning_rate": 1.8373332497295944e-05, + "loss": 0.2421, + "step": 11385 + }, + { + "epoch": 0.53, + "learning_rate": 1.8372548712241157e-05, + "loss": 0.3232, + "step": 11390 + }, + { + "epoch": 0.53, + "learning_rate": 1.837176492718637e-05, + "loss": 0.4045, + "step": 11395 + }, + { + "epoch": 0.53, + "learning_rate": 1.8370981142131582e-05, + "loss": 0.5567, + "step": 11400 + }, + { + "epoch": 0.53, + "learning_rate": 1.83701973570768e-05, + "loss": 0.2517, + "step": 11405 + }, + { + "epoch": 0.53, + "learning_rate": 1.836941357202201e-05, + "loss": 0.0982, + "step": 11410 + }, + { + "epoch": 0.53, + "learning_rate": 1.8368629786967224e-05, + "loss": 0.1019, + "step": 11415 + }, + { + "epoch": 0.53, + "learning_rate": 1.8367846001912437e-05, + "loss": 0.1961, + "step": 11420 + }, + { + "epoch": 0.53, + "learning_rate": 1.8367062216857648e-05, + "loss": 0.1333, + "step": 11425 + }, + { + "epoch": 0.53, + "learning_rate": 1.8366278431802865e-05, + "loss": 0.1751, + "step": 11430 + }, + { + "epoch": 0.53, + "learning_rate": 1.8365494646748076e-05, + "loss": 0.2277, + "step": 11435 + }, + { + "epoch": 0.53, + "learning_rate": 1.836471086169329e-05, + "loss": 0.2504, + "step": 11440 + }, + { + "epoch": 0.53, + "learning_rate": 1.8363927076638504e-05, + "loss": 0.4636, + "step": 11445 + }, + { + "epoch": 0.53, + "learning_rate": 1.8363143291583718e-05, + "loss": 0.5295, + "step": 11450 + }, + { + "epoch": 0.53, + "learning_rate": 1.836235950652893e-05, + "loss": 0.2709, + "step": 11455 + }, + { + "epoch": 0.53, + "learning_rate": 1.8361575721474145e-05, + "loss": 0.0808, + "step": 11460 + }, + { + "epoch": 0.53, + "learning_rate": 1.836079193641936e-05, + "loss": 0.1669, + "step": 11465 + }, + { + "epoch": 0.54, + "learning_rate": 1.8360008151364573e-05, + "loss": 0.1414, + "step": 11470 + }, + { + "epoch": 0.54, + "learning_rate": 1.8359224366309784e-05, + "loss": 0.2378, + "step": 11475 + }, + { + "epoch": 0.54, + "learning_rate": 1.8358440581254998e-05, + "loss": 0.2094, + "step": 11480 + }, + { + "epoch": 0.54, + "learning_rate": 1.835765679620021e-05, + "loss": 0.193, + "step": 11485 + }, + { + "epoch": 0.54, + "learning_rate": 1.8356873011145425e-05, + "loss": 0.3476, + "step": 11490 + }, + { + "epoch": 0.54, + "learning_rate": 1.835608922609064e-05, + "loss": 0.416, + "step": 11495 + }, + { + "epoch": 0.54, + "learning_rate": 1.835530544103585e-05, + "loss": 0.5145, + "step": 11500 + }, + { + "epoch": 0.54, + "learning_rate": 1.8354521655981067e-05, + "loss": 0.2958, + "step": 11505 + }, + { + "epoch": 0.54, + "learning_rate": 1.8353737870926278e-05, + "loss": 0.0655, + "step": 11510 + }, + { + "epoch": 0.54, + "learning_rate": 1.835295408587149e-05, + "loss": 0.1104, + "step": 11515 + }, + { + "epoch": 0.54, + "learning_rate": 1.8352170300816705e-05, + "loss": 0.1485, + "step": 11520 + }, + { + "epoch": 0.54, + "learning_rate": 1.835138651576192e-05, + "loss": 0.1616, + "step": 11525 + }, + { + "epoch": 0.54, + "learning_rate": 1.8350602730707133e-05, + "loss": 0.183, + "step": 11530 + }, + { + "epoch": 0.54, + "learning_rate": 1.8349818945652347e-05, + "loss": 0.2659, + "step": 11535 + }, + { + "epoch": 0.54, + "learning_rate": 1.8349035160597558e-05, + "loss": 0.3224, + "step": 11540 + }, + { + "epoch": 0.54, + "learning_rate": 1.834825137554277e-05, + "loss": 0.4451, + "step": 11545 + }, + { + "epoch": 0.54, + "learning_rate": 1.8347467590487985e-05, + "loss": 0.6188, + "step": 11550 + }, + { + "epoch": 0.54, + "learning_rate": 1.83466838054332e-05, + "loss": 0.2427, + "step": 11555 + }, + { + "epoch": 0.54, + "learning_rate": 1.8345900020378413e-05, + "loss": 0.0932, + "step": 11560 + }, + { + "epoch": 0.54, + "learning_rate": 1.8345116235323627e-05, + "loss": 0.201, + "step": 11565 + }, + { + "epoch": 0.54, + "learning_rate": 1.834433245026884e-05, + "loss": 0.1564, + "step": 11570 + }, + { + "epoch": 0.54, + "learning_rate": 1.834354866521405e-05, + "loss": 0.1475, + "step": 11575 + }, + { + "epoch": 0.54, + "learning_rate": 1.834276488015927e-05, + "loss": 0.1801, + "step": 11580 + }, + { + "epoch": 0.54, + "learning_rate": 1.834198109510448e-05, + "loss": 0.2695, + "step": 11585 + }, + { + "epoch": 0.54, + "learning_rate": 1.8341197310049693e-05, + "loss": 0.2264, + "step": 11590 + }, + { + "epoch": 0.54, + "learning_rate": 1.8340413524994907e-05, + "loss": 0.3982, + "step": 11595 + }, + { + "epoch": 0.54, + "learning_rate": 1.833962973994012e-05, + "loss": 0.6555, + "step": 11600 + }, + { + "epoch": 0.54, + "learning_rate": 1.8338845954885335e-05, + "loss": 0.3081, + "step": 11605 + }, + { + "epoch": 0.54, + "learning_rate": 1.8338062169830546e-05, + "loss": 0.1064, + "step": 11610 + }, + { + "epoch": 0.54, + "learning_rate": 1.833727838477576e-05, + "loss": 0.1906, + "step": 11615 + }, + { + "epoch": 0.54, + "learning_rate": 1.8336494599720973e-05, + "loss": 0.1571, + "step": 11620 + }, + { + "epoch": 0.54, + "learning_rate": 1.8335710814666187e-05, + "loss": 0.1314, + "step": 11625 + }, + { + "epoch": 0.54, + "learning_rate": 1.83349270296114e-05, + "loss": 0.233, + "step": 11630 + }, + { + "epoch": 0.54, + "learning_rate": 1.8334143244556615e-05, + "loss": 0.216, + "step": 11635 + }, + { + "epoch": 0.54, + "learning_rate": 1.8333359459501826e-05, + "loss": 0.2412, + "step": 11640 + }, + { + "epoch": 0.54, + "learning_rate": 1.8332575674447043e-05, + "loss": 0.3539, + "step": 11645 + }, + { + "epoch": 0.54, + "learning_rate": 1.8331791889392253e-05, + "loss": 0.3351, + "step": 11650 + }, + { + "epoch": 0.54, + "learning_rate": 1.8331008104337467e-05, + "loss": 0.2767, + "step": 11655 + }, + { + "epoch": 0.54, + "learning_rate": 1.833022431928268e-05, + "loss": 0.1602, + "step": 11660 + }, + { + "epoch": 0.54, + "learning_rate": 1.8329440534227895e-05, + "loss": 0.64, + "step": 11665 + }, + { + "epoch": 0.54, + "learning_rate": 1.8328813506184066e-05, + "loss": 0.128, + "step": 11670 + }, + { + "epoch": 0.54, + "learning_rate": 1.832802972112928e-05, + "loss": 0.1629, + "step": 11675 + }, + { + "epoch": 0.55, + "learning_rate": 1.832724593607449e-05, + "loss": 0.2992, + "step": 11680 + }, + { + "epoch": 0.55, + "learning_rate": 1.8326462151019707e-05, + "loss": 0.2846, + "step": 11685 + }, + { + "epoch": 0.55, + "learning_rate": 1.8325678365964918e-05, + "loss": 0.283, + "step": 11690 + }, + { + "epoch": 0.55, + "learning_rate": 1.8324894580910132e-05, + "loss": 0.5153, + "step": 11695 + }, + { + "epoch": 0.55, + "learning_rate": 1.8324110795855346e-05, + "loss": 0.5733, + "step": 11700 + }, + { + "epoch": 0.55, + "learning_rate": 1.832332701080056e-05, + "loss": 0.2492, + "step": 11705 + }, + { + "epoch": 0.55, + "learning_rate": 1.8322543225745773e-05, + "loss": 0.0945, + "step": 11710 + }, + { + "epoch": 0.55, + "learning_rate": 1.8321759440690987e-05, + "loss": 0.0858, + "step": 11715 + }, + { + "epoch": 0.55, + "learning_rate": 1.8320975655636198e-05, + "loss": 0.1475, + "step": 11720 + }, + { + "epoch": 0.55, + "learning_rate": 1.8320191870581415e-05, + "loss": 0.1625, + "step": 11725 + }, + { + "epoch": 0.55, + "learning_rate": 1.8319408085526626e-05, + "loss": 0.2204, + "step": 11730 + }, + { + "epoch": 0.55, + "learning_rate": 1.831862430047184e-05, + "loss": 0.236, + "step": 11735 + }, + { + "epoch": 0.55, + "learning_rate": 1.8317840515417053e-05, + "loss": 0.32, + "step": 11740 + }, + { + "epoch": 0.55, + "learning_rate": 1.8317056730362267e-05, + "loss": 0.4229, + "step": 11745 + }, + { + "epoch": 0.55, + "learning_rate": 1.831627294530748e-05, + "loss": 0.5182, + "step": 11750 + }, + { + "epoch": 0.55, + "learning_rate": 1.8315489160252692e-05, + "loss": 0.2982, + "step": 11755 + }, + { + "epoch": 0.55, + "learning_rate": 1.831470537519791e-05, + "loss": 0.0763, + "step": 11760 + }, + { + "epoch": 0.55, + "learning_rate": 1.831392159014312e-05, + "loss": 0.0989, + "step": 11765 + }, + { + "epoch": 0.55, + "learning_rate": 1.8313137805088334e-05, + "loss": 0.1841, + "step": 11770 + }, + { + "epoch": 0.55, + "learning_rate": 1.8312354020033547e-05, + "loss": 0.1474, + "step": 11775 + }, + { + "epoch": 0.55, + "learning_rate": 1.831157023497876e-05, + "loss": 0.186, + "step": 11780 + }, + { + "epoch": 0.55, + "learning_rate": 1.8310786449923975e-05, + "loss": 0.2091, + "step": 11785 + }, + { + "epoch": 0.55, + "learning_rate": 1.831000266486919e-05, + "loss": 0.2279, + "step": 11790 + }, + { + "epoch": 0.55, + "learning_rate": 1.83092188798144e-05, + "loss": 0.3051, + "step": 11795 + }, + { + "epoch": 0.55, + "learning_rate": 1.8308435094759617e-05, + "loss": 0.7879, + "step": 11800 + }, + { + "epoch": 0.55, + "learning_rate": 1.8307651309704827e-05, + "loss": 0.2961, + "step": 11805 + }, + { + "epoch": 0.55, + "learning_rate": 1.830686752465004e-05, + "loss": 0.1053, + "step": 11810 + }, + { + "epoch": 0.55, + "learning_rate": 1.8306083739595255e-05, + "loss": 0.1465, + "step": 11815 + }, + { + "epoch": 0.55, + "learning_rate": 1.8305299954540466e-05, + "loss": 0.1971, + "step": 11820 + }, + { + "epoch": 0.55, + "learning_rate": 1.8304516169485683e-05, + "loss": 0.1379, + "step": 11825 + }, + { + "epoch": 0.55, + "learning_rate": 1.8303732384430894e-05, + "loss": 0.1948, + "step": 11830 + }, + { + "epoch": 0.55, + "learning_rate": 1.8302948599376108e-05, + "loss": 0.3947, + "step": 11835 + }, + { + "epoch": 0.55, + "learning_rate": 1.830216481432132e-05, + "loss": 0.3241, + "step": 11840 + }, + { + "epoch": 0.55, + "learning_rate": 1.8301381029266535e-05, + "loss": 0.4234, + "step": 11845 + }, + { + "epoch": 0.55, + "learning_rate": 1.830059724421175e-05, + "loss": 0.7616, + "step": 11850 + }, + { + "epoch": 0.55, + "learning_rate": 1.8299813459156963e-05, + "loss": 0.3111, + "step": 11855 + }, + { + "epoch": 0.55, + "learning_rate": 1.8299029674102177e-05, + "loss": 0.0971, + "step": 11860 + }, + { + "epoch": 0.55, + "learning_rate": 1.829824588904739e-05, + "loss": 0.0742, + "step": 11865 + }, + { + "epoch": 0.55, + "learning_rate": 1.82974621039926e-05, + "loss": 0.1005, + "step": 11870 + }, + { + "epoch": 0.55, + "learning_rate": 1.8296678318937815e-05, + "loss": 0.1713, + "step": 11875 + }, + { + "epoch": 0.55, + "learning_rate": 1.829589453388303e-05, + "loss": 0.2496, + "step": 11880 + }, + { + "epoch": 0.55, + "learning_rate": 1.82952675058392e-05, + "loss": 0.2245, + "step": 11885 + }, + { + "epoch": 0.55, + "learning_rate": 1.8294483720784414e-05, + "loss": 0.3005, + "step": 11890 + }, + { + "epoch": 0.56, + "learning_rate": 1.8293699935729628e-05, + "loss": 0.4579, + "step": 11895 + }, + { + "epoch": 0.56, + "learning_rate": 1.829291615067484e-05, + "loss": 0.7279, + "step": 11900 + }, + { + "epoch": 0.56, + "learning_rate": 1.8292132365620055e-05, + "loss": 0.177, + "step": 11905 + }, + { + "epoch": 0.56, + "learning_rate": 1.8291348580565266e-05, + "loss": 0.1003, + "step": 11910 + }, + { + "epoch": 0.56, + "learning_rate": 1.829056479551048e-05, + "loss": 0.1564, + "step": 11915 + }, + { + "epoch": 0.56, + "learning_rate": 1.8289781010455694e-05, + "loss": 0.233, + "step": 11920 + }, + { + "epoch": 0.56, + "learning_rate": 1.8288997225400908e-05, + "loss": 0.1809, + "step": 11925 + }, + { + "epoch": 0.56, + "learning_rate": 1.828821344034612e-05, + "loss": 0.2469, + "step": 11930 + }, + { + "epoch": 0.56, + "learning_rate": 1.8287429655291332e-05, + "loss": 0.2773, + "step": 11935 + }, + { + "epoch": 0.56, + "learning_rate": 1.828664587023655e-05, + "loss": 0.3309, + "step": 11940 + }, + { + "epoch": 0.56, + "learning_rate": 1.828586208518176e-05, + "loss": 0.25, + "step": 11945 + }, + { + "epoch": 0.56, + "learning_rate": 1.8285078300126974e-05, + "loss": 0.4452, + "step": 11950 + }, + { + "epoch": 0.56, + "learning_rate": 1.8284294515072188e-05, + "loss": 0.2698, + "step": 11955 + }, + { + "epoch": 0.56, + "learning_rate": 1.82835107300174e-05, + "loss": 0.0781, + "step": 11960 + }, + { + "epoch": 0.56, + "learning_rate": 1.8282726944962615e-05, + "loss": 0.1029, + "step": 11965 + }, + { + "epoch": 0.56, + "learning_rate": 1.828194315990783e-05, + "loss": 0.1722, + "step": 11970 + }, + { + "epoch": 0.56, + "learning_rate": 1.828115937485304e-05, + "loss": 0.1502, + "step": 11975 + }, + { + "epoch": 0.56, + "learning_rate": 1.8280375589798257e-05, + "loss": 0.2189, + "step": 11980 + }, + { + "epoch": 0.56, + "learning_rate": 1.8279591804743468e-05, + "loss": 0.1885, + "step": 11985 + }, + { + "epoch": 0.56, + "learning_rate": 1.827880801968868e-05, + "loss": 0.1883, + "step": 11990 + }, + { + "epoch": 0.56, + "learning_rate": 1.8278024234633896e-05, + "loss": 0.3488, + "step": 11995 + }, + { + "epoch": 0.56, + "learning_rate": 1.827724044957911e-05, + "loss": 0.6505, + "step": 12000 + }, + { + "epoch": 0.56, + "learning_rate": 1.8276456664524323e-05, + "loss": 0.2543, + "step": 12005 + }, + { + "epoch": 0.56, + "learning_rate": 1.8275672879469534e-05, + "loss": 0.1051, + "step": 12010 + }, + { + "epoch": 0.56, + "learning_rate": 1.827488909441475e-05, + "loss": 0.106, + "step": 12015 + }, + { + "epoch": 0.56, + "learning_rate": 1.827410530935996e-05, + "loss": 0.1483, + "step": 12020 + }, + { + "epoch": 0.56, + "learning_rate": 1.8273321524305176e-05, + "loss": 0.2022, + "step": 12025 + }, + { + "epoch": 0.56, + "learning_rate": 1.827253773925039e-05, + "loss": 0.2002, + "step": 12030 + }, + { + "epoch": 0.56, + "learning_rate": 1.8271753954195603e-05, + "loss": 0.2563, + "step": 12035 + }, + { + "epoch": 0.56, + "learning_rate": 1.8270970169140817e-05, + "loss": 0.2597, + "step": 12040 + }, + { + "epoch": 0.56, + "learning_rate": 1.827018638408603e-05, + "loss": 0.429, + "step": 12045 + }, + { + "epoch": 0.56, + "learning_rate": 1.8269402599031242e-05, + "loss": 0.5843, + "step": 12050 + }, + { + "epoch": 0.56, + "learning_rate": 1.826861881397646e-05, + "loss": 0.2978, + "step": 12055 + }, + { + "epoch": 0.56, + "learning_rate": 1.826783502892167e-05, + "loss": 0.1374, + "step": 12060 + }, + { + "epoch": 0.56, + "learning_rate": 1.8267051243866883e-05, + "loss": 0.1817, + "step": 12065 + }, + { + "epoch": 0.56, + "learning_rate": 1.8266267458812097e-05, + "loss": 0.0982, + "step": 12070 + }, + { + "epoch": 0.56, + "learning_rate": 1.8265483673757308e-05, + "loss": 0.2185, + "step": 12075 + }, + { + "epoch": 0.56, + "learning_rate": 1.8264699888702525e-05, + "loss": 0.1185, + "step": 12080 + }, + { + "epoch": 0.56, + "learning_rate": 1.8263916103647736e-05, + "loss": 0.2142, + "step": 12085 + }, + { + "epoch": 0.56, + "learning_rate": 1.826313231859295e-05, + "loss": 0.3277, + "step": 12090 + }, + { + "epoch": 0.56, + "learning_rate": 1.8262348533538163e-05, + "loss": 0.4278, + "step": 12095 + }, + { + "epoch": 0.56, + "learning_rate": 1.8261564748483377e-05, + "loss": 0.5751, + "step": 12100 + }, + { + "epoch": 0.56, + "learning_rate": 1.826078096342859e-05, + "loss": 0.3273, + "step": 12105 + }, + { + "epoch": 0.57, + "learning_rate": 1.8259997178373805e-05, + "loss": 0.0808, + "step": 12110 + }, + { + "epoch": 0.57, + "learning_rate": 1.825921339331902e-05, + "loss": 0.1315, + "step": 12115 + }, + { + "epoch": 0.57, + "learning_rate": 1.8258429608264233e-05, + "loss": 0.155, + "step": 12120 + }, + { + "epoch": 0.57, + "learning_rate": 1.8257645823209444e-05, + "loss": 0.1783, + "step": 12125 + }, + { + "epoch": 0.57, + "learning_rate": 1.8256862038154657e-05, + "loss": 0.2643, + "step": 12130 + }, + { + "epoch": 0.57, + "learning_rate": 1.825607825309987e-05, + "loss": 0.2056, + "step": 12135 + }, + { + "epoch": 0.57, + "learning_rate": 1.8255294468045085e-05, + "loss": 0.3616, + "step": 12140 + }, + { + "epoch": 0.57, + "learning_rate": 1.82545106829903e-05, + "loss": 0.4022, + "step": 12145 + }, + { + "epoch": 0.57, + "learning_rate": 1.825372689793551e-05, + "loss": 0.6273, + "step": 12150 + }, + { + "epoch": 0.57, + "learning_rate": 1.8252943112880727e-05, + "loss": 0.2924, + "step": 12155 + }, + { + "epoch": 0.57, + "learning_rate": 1.8252159327825937e-05, + "loss": 0.1846, + "step": 12160 + }, + { + "epoch": 0.57, + "learning_rate": 1.825137554277115e-05, + "loss": 0.1085, + "step": 12165 + }, + { + "epoch": 0.57, + "learning_rate": 1.8250591757716365e-05, + "loss": 0.1888, + "step": 12170 + }, + { + "epoch": 0.57, + "learning_rate": 1.824980797266158e-05, + "loss": 0.1602, + "step": 12175 + }, + { + "epoch": 0.57, + "learning_rate": 1.8249024187606793e-05, + "loss": 0.3114, + "step": 12180 + }, + { + "epoch": 0.57, + "learning_rate": 1.8248240402552007e-05, + "loss": 0.2241, + "step": 12185 + }, + { + "epoch": 0.57, + "learning_rate": 1.8247456617497218e-05, + "loss": 0.3146, + "step": 12190 + }, + { + "epoch": 0.57, + "learning_rate": 1.824667283244243e-05, + "loss": 0.3433, + "step": 12195 + }, + { + "epoch": 0.57, + "learning_rate": 1.8245889047387645e-05, + "loss": 0.3809, + "step": 12200 + }, + { + "epoch": 0.57, + "learning_rate": 1.824510526233286e-05, + "loss": 0.2425, + "step": 12205 + }, + { + "epoch": 0.57, + "learning_rate": 1.8244321477278073e-05, + "loss": 0.0895, + "step": 12210 + }, + { + "epoch": 0.57, + "learning_rate": 1.8243537692223287e-05, + "loss": 0.1157, + "step": 12215 + }, + { + "epoch": 0.57, + "learning_rate": 1.82427539071685e-05, + "loss": 0.1821, + "step": 12220 + }, + { + "epoch": 0.57, + "learning_rate": 1.824197012211371e-05, + "loss": 0.2085, + "step": 12225 + }, + { + "epoch": 0.57, + "learning_rate": 1.8241186337058925e-05, + "loss": 0.2786, + "step": 12230 + }, + { + "epoch": 0.57, + "learning_rate": 1.824040255200414e-05, + "loss": 0.2232, + "step": 12235 + }, + { + "epoch": 0.57, + "learning_rate": 1.8239618766949353e-05, + "loss": 0.1914, + "step": 12240 + }, + { + "epoch": 0.57, + "learning_rate": 1.8238834981894567e-05, + "loss": 0.4057, + "step": 12245 + }, + { + "epoch": 0.57, + "learning_rate": 1.823805119683978e-05, + "loss": 0.5458, + "step": 12250 + }, + { + "epoch": 0.57, + "learning_rate": 1.8237267411784995e-05, + "loss": 0.2761, + "step": 12255 + }, + { + "epoch": 0.57, + "learning_rate": 1.8236483626730205e-05, + "loss": 0.1128, + "step": 12260 + }, + { + "epoch": 0.57, + "learning_rate": 1.823569984167542e-05, + "loss": 0.1394, + "step": 12265 + }, + { + "epoch": 0.57, + "learning_rate": 1.8234916056620633e-05, + "loss": 0.1571, + "step": 12270 + }, + { + "epoch": 0.57, + "learning_rate": 1.8234132271565847e-05, + "loss": 0.2344, + "step": 12275 + }, + { + "epoch": 0.57, + "learning_rate": 1.823334848651106e-05, + "loss": 0.2405, + "step": 12280 + }, + { + "epoch": 0.57, + "learning_rate": 1.8232564701456275e-05, + "loss": 0.2111, + "step": 12285 + }, + { + "epoch": 0.57, + "learning_rate": 1.8231780916401485e-05, + "loss": 0.2919, + "step": 12290 + }, + { + "epoch": 0.57, + "learning_rate": 1.8230997131346703e-05, + "loss": 0.461, + "step": 12295 + }, + { + "epoch": 0.57, + "learning_rate": 1.8230213346291913e-05, + "loss": 0.4065, + "step": 12300 + }, + { + "epoch": 0.57, + "learning_rate": 1.8229429561237127e-05, + "loss": 0.2381, + "step": 12305 + }, + { + "epoch": 0.57, + "learning_rate": 1.822864577618234e-05, + "loss": 0.0971, + "step": 12310 + }, + { + "epoch": 0.57, + "learning_rate": 1.8227861991127555e-05, + "loss": 0.1587, + "step": 12315 + }, + { + "epoch": 0.57, + "learning_rate": 1.822707820607277e-05, + "loss": 0.221, + "step": 12320 + }, + { + "epoch": 0.58, + "learning_rate": 1.822629442101798e-05, + "loss": 0.1664, + "step": 12325 + }, + { + "epoch": 0.58, + "learning_rate": 1.8225510635963197e-05, + "loss": 0.2185, + "step": 12330 + }, + { + "epoch": 0.58, + "learning_rate": 1.8224726850908407e-05, + "loss": 0.2366, + "step": 12335 + }, + { + "epoch": 0.58, + "learning_rate": 1.822394306585362e-05, + "loss": 0.3796, + "step": 12340 + }, + { + "epoch": 0.58, + "learning_rate": 1.8223159280798835e-05, + "loss": 0.3175, + "step": 12345 + }, + { + "epoch": 0.58, + "learning_rate": 1.822237549574405e-05, + "loss": 0.6086, + "step": 12350 + }, + { + "epoch": 0.58, + "learning_rate": 1.8221591710689263e-05, + "loss": 0.2234, + "step": 12355 + }, + { + "epoch": 0.58, + "learning_rate": 1.8220807925634477e-05, + "loss": 0.1107, + "step": 12360 + }, + { + "epoch": 0.58, + "learning_rate": 1.8220024140579687e-05, + "loss": 0.1629, + "step": 12365 + }, + { + "epoch": 0.58, + "learning_rate": 1.8219240355524905e-05, + "loss": 0.1813, + "step": 12370 + }, + { + "epoch": 0.58, + "learning_rate": 1.8218456570470115e-05, + "loss": 0.1278, + "step": 12375 + }, + { + "epoch": 0.58, + "learning_rate": 1.821767278541533e-05, + "loss": 0.2862, + "step": 12380 + }, + { + "epoch": 0.58, + "learning_rate": 1.8216889000360543e-05, + "loss": 0.1883, + "step": 12385 + }, + { + "epoch": 0.58, + "learning_rate": 1.8216105215305753e-05, + "loss": 0.3023, + "step": 12390 + }, + { + "epoch": 0.58, + "learning_rate": 1.821532143025097e-05, + "loss": 0.3543, + "step": 12395 + }, + { + "epoch": 0.58, + "learning_rate": 1.821453764519618e-05, + "loss": 0.6779, + "step": 12400 + }, + { + "epoch": 0.58, + "learning_rate": 1.8213753860141395e-05, + "loss": 0.2855, + "step": 12405 + }, + { + "epoch": 0.58, + "learning_rate": 1.821297007508661e-05, + "loss": 0.1078, + "step": 12410 + }, + { + "epoch": 0.58, + "learning_rate": 1.8212186290031823e-05, + "loss": 0.0658, + "step": 12415 + }, + { + "epoch": 0.58, + "learning_rate": 1.8211402504977037e-05, + "loss": 0.1846, + "step": 12420 + }, + { + "epoch": 0.58, + "learning_rate": 1.821061871992225e-05, + "loss": 0.2117, + "step": 12425 + }, + { + "epoch": 0.58, + "learning_rate": 1.8209834934867465e-05, + "loss": 0.2137, + "step": 12430 + }, + { + "epoch": 0.58, + "learning_rate": 1.820905114981268e-05, + "loss": 0.2952, + "step": 12435 + }, + { + "epoch": 0.58, + "learning_rate": 1.820826736475789e-05, + "loss": 0.3457, + "step": 12440 + }, + { + "epoch": 0.58, + "learning_rate": 1.8207483579703103e-05, + "loss": 0.3901, + "step": 12445 + }, + { + "epoch": 0.58, + "learning_rate": 1.8206699794648317e-05, + "loss": 0.4563, + "step": 12450 + }, + { + "epoch": 0.58, + "learning_rate": 1.820591600959353e-05, + "loss": 0.3036, + "step": 12455 + }, + { + "epoch": 0.58, + "learning_rate": 1.8205132224538745e-05, + "loss": 0.1384, + "step": 12460 + }, + { + "epoch": 0.58, + "learning_rate": 1.8204348439483955e-05, + "loss": 0.1121, + "step": 12465 + }, + { + "epoch": 0.58, + "learning_rate": 1.8203564654429173e-05, + "loss": 0.1626, + "step": 12470 + }, + { + "epoch": 0.58, + "learning_rate": 1.8202780869374383e-05, + "loss": 0.1478, + "step": 12475 + }, + { + "epoch": 0.58, + "learning_rate": 1.8201997084319597e-05, + "loss": 0.2527, + "step": 12480 + }, + { + "epoch": 0.58, + "learning_rate": 1.820121329926481e-05, + "loss": 0.2802, + "step": 12485 + }, + { + "epoch": 0.58, + "learning_rate": 1.8200429514210025e-05, + "loss": 0.3921, + "step": 12490 + }, + { + "epoch": 0.58, + "learning_rate": 1.819964572915524e-05, + "loss": 0.3346, + "step": 12495 + }, + { + "epoch": 0.58, + "learning_rate": 1.8198861944100453e-05, + "loss": 0.5112, + "step": 12500 + }, + { + "epoch": 0.58, + "learning_rate": 1.8198078159045663e-05, + "loss": 0.2538, + "step": 12505 + }, + { + "epoch": 0.58, + "learning_rate": 1.819729437399088e-05, + "loss": 0.0939, + "step": 12510 + }, + { + "epoch": 0.58, + "learning_rate": 1.819651058893609e-05, + "loss": 0.1042, + "step": 12515 + }, + { + "epoch": 0.58, + "learning_rate": 1.8195726803881305e-05, + "loss": 0.1335, + "step": 12520 + }, + { + "epoch": 0.58, + "learning_rate": 1.819494301882652e-05, + "loss": 0.1319, + "step": 12525 + }, + { + "epoch": 0.58, + "learning_rate": 1.8194159233771733e-05, + "loss": 0.1862, + "step": 12530 + }, + { + "epoch": 0.58, + "learning_rate": 1.8193375448716947e-05, + "loss": 0.2162, + "step": 12535 + }, + { + "epoch": 0.59, + "learning_rate": 1.8192591663662157e-05, + "loss": 0.3461, + "step": 12540 + }, + { + "epoch": 0.59, + "learning_rate": 1.819180787860737e-05, + "loss": 0.2864, + "step": 12545 + }, + { + "epoch": 0.59, + "learning_rate": 1.8191024093552585e-05, + "loss": 0.5461, + "step": 12550 + }, + { + "epoch": 0.59, + "learning_rate": 1.81902403084978e-05, + "loss": 0.2668, + "step": 12555 + }, + { + "epoch": 0.59, + "learning_rate": 1.8189456523443013e-05, + "loss": 0.0315, + "step": 12560 + }, + { + "epoch": 0.59, + "learning_rate": 1.8188672738388227e-05, + "loss": 0.0844, + "step": 12565 + }, + { + "epoch": 0.59, + "learning_rate": 1.818788895333344e-05, + "loss": 0.193, + "step": 12570 + }, + { + "epoch": 0.59, + "learning_rate": 1.8187105168278654e-05, + "loss": 0.1365, + "step": 12575 + }, + { + "epoch": 0.59, + "learning_rate": 1.8186321383223865e-05, + "loss": 0.1781, + "step": 12580 + }, + { + "epoch": 0.59, + "learning_rate": 1.8185537598169082e-05, + "loss": 0.331, + "step": 12585 + }, + { + "epoch": 0.59, + "learning_rate": 1.8184753813114293e-05, + "loss": 0.2479, + "step": 12590 + }, + { + "epoch": 0.59, + "learning_rate": 1.8183970028059507e-05, + "loss": 0.3348, + "step": 12595 + }, + { + "epoch": 0.59, + "learning_rate": 1.818318624300472e-05, + "loss": 0.5393, + "step": 12600 + }, + { + "epoch": 0.59, + "learning_rate": 1.818240245794993e-05, + "loss": 0.2076, + "step": 12605 + }, + { + "epoch": 0.59, + "learning_rate": 1.818161867289515e-05, + "loss": 0.0844, + "step": 12610 + }, + { + "epoch": 0.59, + "learning_rate": 1.818083488784036e-05, + "loss": 0.1753, + "step": 12615 + }, + { + "epoch": 0.59, + "learning_rate": 1.8180051102785573e-05, + "loss": 0.1373, + "step": 12620 + }, + { + "epoch": 0.59, + "learning_rate": 1.8179267317730787e-05, + "loss": 0.2813, + "step": 12625 + }, + { + "epoch": 0.59, + "learning_rate": 1.8178483532676e-05, + "loss": 0.2083, + "step": 12630 + }, + { + "epoch": 0.59, + "learning_rate": 1.8177699747621214e-05, + "loss": 0.3167, + "step": 12635 + }, + { + "epoch": 0.59, + "learning_rate": 1.817691596256643e-05, + "loss": 0.2749, + "step": 12640 + }, + { + "epoch": 0.59, + "learning_rate": 1.8176132177511642e-05, + "loss": 0.421, + "step": 12645 + }, + { + "epoch": 0.59, + "learning_rate": 1.8175348392456856e-05, + "loss": 0.6261, + "step": 12650 + }, + { + "epoch": 0.59, + "learning_rate": 1.8174564607402067e-05, + "loss": 0.2091, + "step": 12655 + }, + { + "epoch": 0.59, + "learning_rate": 1.817378082234728e-05, + "loss": 0.1761, + "step": 12660 + }, + { + "epoch": 0.59, + "learning_rate": 1.8172997037292495e-05, + "loss": 0.1845, + "step": 12665 + }, + { + "epoch": 0.59, + "learning_rate": 1.817221325223771e-05, + "loss": 0.1495, + "step": 12670 + }, + { + "epoch": 0.59, + "learning_rate": 1.8171429467182922e-05, + "loss": 0.183, + "step": 12675 + }, + { + "epoch": 0.59, + "learning_rate": 1.8170645682128133e-05, + "loss": 0.1456, + "step": 12680 + }, + { + "epoch": 0.59, + "learning_rate": 1.816986189707335e-05, + "loss": 0.3501, + "step": 12685 + }, + { + "epoch": 0.59, + "learning_rate": 1.816907811201856e-05, + "loss": 0.2666, + "step": 12690 + }, + { + "epoch": 0.59, + "learning_rate": 1.8168294326963775e-05, + "loss": 0.2329, + "step": 12695 + }, + { + "epoch": 0.59, + "learning_rate": 1.816751054190899e-05, + "loss": 0.5528, + "step": 12700 + }, + { + "epoch": 0.59, + "learning_rate": 1.8166726756854202e-05, + "loss": 0.2247, + "step": 12705 + }, + { + "epoch": 0.59, + "learning_rate": 1.8165942971799416e-05, + "loss": 0.1046, + "step": 12710 + }, + { + "epoch": 0.59, + "learning_rate": 1.816515918674463e-05, + "loss": 0.1533, + "step": 12715 + }, + { + "epoch": 0.59, + "learning_rate": 1.816437540168984e-05, + "loss": 0.1581, + "step": 12720 + }, + { + "epoch": 0.59, + "learning_rate": 1.8163591616635055e-05, + "loss": 0.1386, + "step": 12725 + }, + { + "epoch": 0.59, + "learning_rate": 1.816280783158027e-05, + "loss": 0.1616, + "step": 12730 + }, + { + "epoch": 0.59, + "learning_rate": 1.8162024046525482e-05, + "loss": 0.1669, + "step": 12735 + }, + { + "epoch": 0.59, + "learning_rate": 1.8161240261470696e-05, + "loss": 0.2804, + "step": 12740 + }, + { + "epoch": 0.59, + "learning_rate": 1.816045647641591e-05, + "loss": 0.4752, + "step": 12745 + }, + { + "epoch": 0.59, + "learning_rate": 1.8159672691361124e-05, + "loss": 0.6178, + "step": 12750 + }, + { + "epoch": 0.6, + "learning_rate": 1.8158888906306335e-05, + "loss": 0.214, + "step": 12755 + }, + { + "epoch": 0.6, + "learning_rate": 1.815810512125155e-05, + "loss": 0.1284, + "step": 12760 + }, + { + "epoch": 0.6, + "learning_rate": 1.8157321336196762e-05, + "loss": 0.0707, + "step": 12765 + }, + { + "epoch": 0.6, + "learning_rate": 1.8156537551141976e-05, + "loss": 0.0857, + "step": 12770 + }, + { + "epoch": 0.6, + "learning_rate": 1.815575376608719e-05, + "loss": 0.162, + "step": 12775 + }, + { + "epoch": 0.6, + "learning_rate": 1.8154969981032404e-05, + "loss": 0.1753, + "step": 12780 + }, + { + "epoch": 0.6, + "learning_rate": 1.8154186195977618e-05, + "loss": 0.3083, + "step": 12785 + }, + { + "epoch": 0.6, + "learning_rate": 1.815340241092283e-05, + "loss": 0.359, + "step": 12790 + }, + { + "epoch": 0.6, + "learning_rate": 1.8152618625868043e-05, + "loss": 0.3942, + "step": 12795 + }, + { + "epoch": 0.6, + "learning_rate": 1.8151834840813256e-05, + "loss": 0.5581, + "step": 12800 + }, + { + "epoch": 0.6, + "learning_rate": 1.815105105575847e-05, + "loss": 0.275, + "step": 12805 + }, + { + "epoch": 0.6, + "learning_rate": 1.8150267270703684e-05, + "loss": 0.0976, + "step": 12810 + }, + { + "epoch": 0.6, + "learning_rate": 1.8149483485648898e-05, + "loss": 0.1869, + "step": 12815 + }, + { + "epoch": 0.6, + "learning_rate": 1.814869970059411e-05, + "loss": 0.1546, + "step": 12820 + }, + { + "epoch": 0.6, + "learning_rate": 1.8147915915539326e-05, + "loss": 0.1551, + "step": 12825 + }, + { + "epoch": 0.6, + "learning_rate": 1.8147132130484536e-05, + "loss": 0.1323, + "step": 12830 + }, + { + "epoch": 0.6, + "learning_rate": 1.814634834542975e-05, + "loss": 0.199, + "step": 12835 + }, + { + "epoch": 0.6, + "learning_rate": 1.8145564560374964e-05, + "loss": 0.2718, + "step": 12840 + }, + { + "epoch": 0.6, + "learning_rate": 1.8144780775320178e-05, + "loss": 0.358, + "step": 12845 + }, + { + "epoch": 0.6, + "learning_rate": 1.8143996990265392e-05, + "loss": 0.7596, + "step": 12850 + }, + { + "epoch": 0.6, + "learning_rate": 1.8143213205210603e-05, + "loss": 0.1977, + "step": 12855 + }, + { + "epoch": 0.6, + "learning_rate": 1.814242942015582e-05, + "loss": 0.1171, + "step": 12860 + }, + { + "epoch": 0.6, + "learning_rate": 1.814164563510103e-05, + "loss": 0.117, + "step": 12865 + }, + { + "epoch": 0.6, + "learning_rate": 1.8140861850046244e-05, + "loss": 0.1249, + "step": 12870 + }, + { + "epoch": 0.6, + "learning_rate": 1.8140078064991458e-05, + "loss": 0.2719, + "step": 12875 + }, + { + "epoch": 0.6, + "learning_rate": 1.8139294279936672e-05, + "loss": 0.1671, + "step": 12880 + }, + { + "epoch": 0.6, + "learning_rate": 1.8138510494881886e-05, + "loss": 0.2753, + "step": 12885 + }, + { + "epoch": 0.6, + "learning_rate": 1.81377267098271e-05, + "loss": 0.2222, + "step": 12890 + }, + { + "epoch": 0.6, + "learning_rate": 1.813694292477231e-05, + "loss": 0.3789, + "step": 12895 + }, + { + "epoch": 0.6, + "learning_rate": 1.8136159139717528e-05, + "loss": 0.5476, + "step": 12900 + }, + { + "epoch": 0.6, + "learning_rate": 1.8135375354662738e-05, + "loss": 0.2506, + "step": 12905 + }, + { + "epoch": 0.6, + "learning_rate": 1.8134591569607952e-05, + "loss": 0.1047, + "step": 12910 + }, + { + "epoch": 0.6, + "learning_rate": 1.8133807784553166e-05, + "loss": 0.1184, + "step": 12915 + }, + { + "epoch": 0.6, + "learning_rate": 1.8133023999498377e-05, + "loss": 0.1366, + "step": 12920 + }, + { + "epoch": 0.6, + "learning_rate": 1.8132240214443594e-05, + "loss": 0.1905, + "step": 12925 + }, + { + "epoch": 0.6, + "learning_rate": 1.8131456429388804e-05, + "loss": 0.1637, + "step": 12930 + }, + { + "epoch": 0.6, + "learning_rate": 1.813067264433402e-05, + "loss": 0.2118, + "step": 12935 + }, + { + "epoch": 0.6, + "learning_rate": 1.8129888859279232e-05, + "loss": 0.4517, + "step": 12940 + }, + { + "epoch": 0.6, + "learning_rate": 1.8129105074224446e-05, + "loss": 0.3792, + "step": 12945 + }, + { + "epoch": 0.6, + "learning_rate": 1.812832128916966e-05, + "loss": 0.6719, + "step": 12950 + }, + { + "epoch": 0.6, + "learning_rate": 1.8127537504114874e-05, + "loss": 0.1911, + "step": 12955 + }, + { + "epoch": 0.6, + "learning_rate": 1.8126753719060088e-05, + "loss": 0.0936, + "step": 12960 + }, + { + "epoch": 0.6, + "learning_rate": 1.8125969934005302e-05, + "loss": 0.0923, + "step": 12965 + }, + { + "epoch": 0.61, + "learning_rate": 1.8125186148950512e-05, + "loss": 0.184, + "step": 12970 + }, + { + "epoch": 0.61, + "learning_rate": 1.8124402363895726e-05, + "loss": 0.1625, + "step": 12975 + }, + { + "epoch": 0.61, + "learning_rate": 1.812361857884094e-05, + "loss": 0.1753, + "step": 12980 + }, + { + "epoch": 0.61, + "learning_rate": 1.8122834793786154e-05, + "loss": 0.2652, + "step": 12985 + }, + { + "epoch": 0.61, + "learning_rate": 1.8122051008731368e-05, + "loss": 0.2203, + "step": 12990 + }, + { + "epoch": 0.61, + "learning_rate": 1.812126722367658e-05, + "loss": 0.3814, + "step": 12995 + }, + { + "epoch": 0.61, + "learning_rate": 1.8120483438621796e-05, + "loss": 0.613, + "step": 13000 + }, + { + "epoch": 0.61, + "learning_rate": 1.8119699653567006e-05, + "loss": 0.2086, + "step": 13005 + }, + { + "epoch": 0.61, + "learning_rate": 1.811891586851222e-05, + "loss": 0.1071, + "step": 13010 + }, + { + "epoch": 0.61, + "learning_rate": 1.8118132083457434e-05, + "loss": 0.1252, + "step": 13015 + }, + { + "epoch": 0.61, + "learning_rate": 1.8117348298402648e-05, + "loss": 0.1397, + "step": 13020 + }, + { + "epoch": 0.61, + "learning_rate": 1.8116564513347862e-05, + "loss": 0.1292, + "step": 13025 + }, + { + "epoch": 0.61, + "learning_rate": 1.8115780728293076e-05, + "loss": 0.2849, + "step": 13030 + }, + { + "epoch": 0.61, + "learning_rate": 1.8114996943238286e-05, + "loss": 0.1862, + "step": 13035 + }, + { + "epoch": 0.61, + "learning_rate": 1.8114213158183504e-05, + "loss": 0.2616, + "step": 13040 + }, + { + "epoch": 0.61, + "learning_rate": 1.8113429373128714e-05, + "loss": 0.2944, + "step": 13045 + }, + { + "epoch": 0.61, + "learning_rate": 1.8112645588073928e-05, + "loss": 0.4961, + "step": 13050 + }, + { + "epoch": 0.61, + "learning_rate": 1.8111861803019142e-05, + "loss": 0.2485, + "step": 13055 + }, + { + "epoch": 0.61, + "learning_rate": 1.8111078017964356e-05, + "loss": 0.0993, + "step": 13060 + }, + { + "epoch": 0.61, + "learning_rate": 1.811029423290957e-05, + "loss": 0.1454, + "step": 13065 + }, + { + "epoch": 0.61, + "learning_rate": 1.810951044785478e-05, + "loss": 0.1368, + "step": 13070 + }, + { + "epoch": 0.61, + "learning_rate": 1.8108726662799994e-05, + "loss": 0.178, + "step": 13075 + }, + { + "epoch": 0.61, + "learning_rate": 1.8107942877745208e-05, + "loss": 0.2138, + "step": 13080 + }, + { + "epoch": 0.61, + "learning_rate": 1.8107159092690422e-05, + "loss": 0.2884, + "step": 13085 + }, + { + "epoch": 0.61, + "learning_rate": 1.8106375307635636e-05, + "loss": 0.2957, + "step": 13090 + }, + { + "epoch": 0.61, + "learning_rate": 1.810559152258085e-05, + "loss": 0.3439, + "step": 13095 + }, + { + "epoch": 0.61, + "learning_rate": 1.8104807737526064e-05, + "loss": 0.3735, + "step": 13100 + }, + { + "epoch": 0.61, + "learning_rate": 1.8104023952471278e-05, + "loss": 0.2341, + "step": 13105 + }, + { + "epoch": 0.61, + "learning_rate": 1.8103240167416488e-05, + "loss": 0.1029, + "step": 13110 + }, + { + "epoch": 0.61, + "learning_rate": 1.8102456382361702e-05, + "loss": 0.1167, + "step": 13115 + }, + { + "epoch": 0.61, + "learning_rate": 1.8101672597306916e-05, + "loss": 0.1298, + "step": 13120 + }, + { + "epoch": 0.61, + "learning_rate": 1.810088881225213e-05, + "loss": 0.1732, + "step": 13125 + }, + { + "epoch": 0.61, + "learning_rate": 1.8100105027197344e-05, + "loss": 0.2762, + "step": 13130 + }, + { + "epoch": 0.61, + "learning_rate": 1.8099321242142554e-05, + "loss": 0.26, + "step": 13135 + }, + { + "epoch": 0.61, + "learning_rate": 1.809853745708777e-05, + "loss": 0.3426, + "step": 13140 + }, + { + "epoch": 0.61, + "learning_rate": 1.8097753672032982e-05, + "loss": 0.4019, + "step": 13145 + }, + { + "epoch": 0.61, + "learning_rate": 1.8096969886978196e-05, + "loss": 0.7487, + "step": 13150 + }, + { + "epoch": 0.61, + "learning_rate": 1.809618610192341e-05, + "loss": 0.3123, + "step": 13155 + }, + { + "epoch": 0.61, + "learning_rate": 1.8095402316868624e-05, + "loss": 0.0959, + "step": 13160 + }, + { + "epoch": 0.61, + "learning_rate": 1.8094618531813838e-05, + "loss": 0.0824, + "step": 13165 + }, + { + "epoch": 0.61, + "learning_rate": 1.809383474675905e-05, + "loss": 0.2266, + "step": 13170 + }, + { + "epoch": 0.61, + "learning_rate": 1.8093050961704265e-05, + "loss": 0.1764, + "step": 13175 + }, + { + "epoch": 0.61, + "learning_rate": 1.8092267176649476e-05, + "loss": 0.2117, + "step": 13180 + }, + { + "epoch": 0.62, + "learning_rate": 1.809148339159469e-05, + "loss": 0.2613, + "step": 13185 + }, + { + "epoch": 0.62, + "learning_rate": 1.8090699606539904e-05, + "loss": 0.2495, + "step": 13190 + }, + { + "epoch": 0.62, + "learning_rate": 1.8089915821485118e-05, + "loss": 0.3384, + "step": 13195 + }, + { + "epoch": 0.62, + "learning_rate": 1.808913203643033e-05, + "loss": 0.6137, + "step": 13200 + }, + { + "epoch": 0.62, + "learning_rate": 1.8088348251375546e-05, + "loss": 0.2418, + "step": 13205 + }, + { + "epoch": 0.62, + "learning_rate": 1.8087564466320756e-05, + "loss": 0.0482, + "step": 13210 + }, + { + "epoch": 0.62, + "learning_rate": 1.8086780681265973e-05, + "loss": 0.1059, + "step": 13215 + }, + { + "epoch": 0.62, + "learning_rate": 1.8085996896211184e-05, + "loss": 0.1762, + "step": 13220 + }, + { + "epoch": 0.62, + "learning_rate": 1.8085213111156398e-05, + "loss": 0.2271, + "step": 13225 + }, + { + "epoch": 0.62, + "learning_rate": 1.808442932610161e-05, + "loss": 0.2679, + "step": 13230 + }, + { + "epoch": 0.62, + "learning_rate": 1.8083645541046826e-05, + "loss": 0.3361, + "step": 13235 + }, + { + "epoch": 0.62, + "learning_rate": 1.808286175599204e-05, + "loss": 0.3421, + "step": 13240 + }, + { + "epoch": 0.62, + "learning_rate": 1.808207797093725e-05, + "loss": 0.3615, + "step": 13245 + }, + { + "epoch": 0.62, + "learning_rate": 1.8081294185882464e-05, + "loss": 0.6389, + "step": 13250 + }, + { + "epoch": 0.62, + "learning_rate": 1.8080510400827678e-05, + "loss": 0.2581, + "step": 13255 + }, + { + "epoch": 0.62, + "learning_rate": 1.8079726615772892e-05, + "loss": 0.1071, + "step": 13260 + }, + { + "epoch": 0.62, + "learning_rate": 1.8078942830718106e-05, + "loss": 0.1831, + "step": 13265 + }, + { + "epoch": 0.62, + "learning_rate": 1.807815904566332e-05, + "loss": 0.2567, + "step": 13270 + }, + { + "epoch": 0.62, + "learning_rate": 1.8077375260608533e-05, + "loss": 0.1976, + "step": 13275 + }, + { + "epoch": 0.62, + "learning_rate": 1.8076591475553747e-05, + "loss": 0.177, + "step": 13280 + }, + { + "epoch": 0.62, + "learning_rate": 1.8075807690498958e-05, + "loss": 0.3031, + "step": 13285 + }, + { + "epoch": 0.62, + "learning_rate": 1.8075023905444172e-05, + "loss": 0.2594, + "step": 13290 + }, + { + "epoch": 0.62, + "learning_rate": 1.8074240120389386e-05, + "loss": 0.3334, + "step": 13295 + }, + { + "epoch": 0.62, + "learning_rate": 1.80734563353346e-05, + "loss": 0.7191, + "step": 13300 + }, + { + "epoch": 0.62, + "learning_rate": 1.8072672550279813e-05, + "loss": 0.2046, + "step": 13305 + }, + { + "epoch": 0.62, + "learning_rate": 1.8071888765225024e-05, + "loss": 0.0685, + "step": 13310 + }, + { + "epoch": 0.62, + "learning_rate": 1.807110498017024e-05, + "loss": 0.1259, + "step": 13315 + }, + { + "epoch": 0.62, + "learning_rate": 1.8070321195115452e-05, + "loss": 0.2039, + "step": 13320 + }, + { + "epoch": 0.62, + "learning_rate": 1.8069537410060666e-05, + "loss": 0.2184, + "step": 13325 + }, + { + "epoch": 0.62, + "learning_rate": 1.806875362500588e-05, + "loss": 0.1575, + "step": 13330 + }, + { + "epoch": 0.62, + "learning_rate": 1.8067969839951094e-05, + "loss": 0.3521, + "step": 13335 + }, + { + "epoch": 0.62, + "learning_rate": 1.8067186054896307e-05, + "loss": 0.3732, + "step": 13340 + }, + { + "epoch": 0.62, + "learning_rate": 1.806640226984152e-05, + "loss": 0.3423, + "step": 13345 + }, + { + "epoch": 0.62, + "learning_rate": 1.8065618484786732e-05, + "loss": 0.657, + "step": 13350 + }, + { + "epoch": 0.62, + "learning_rate": 1.806483469973195e-05, + "loss": 0.3007, + "step": 13355 + }, + { + "epoch": 0.62, + "learning_rate": 1.806405091467716e-05, + "loss": 0.0585, + "step": 13360 + }, + { + "epoch": 0.62, + "learning_rate": 1.8063267129622374e-05, + "loss": 0.1533, + "step": 13365 + }, + { + "epoch": 0.62, + "learning_rate": 1.8062483344567587e-05, + "loss": 0.2043, + "step": 13370 + }, + { + "epoch": 0.62, + "learning_rate": 1.80616995595128e-05, + "loss": 0.197, + "step": 13375 + }, + { + "epoch": 0.62, + "learning_rate": 1.8060915774458015e-05, + "loss": 0.1869, + "step": 13380 + }, + { + "epoch": 0.62, + "learning_rate": 1.8060131989403226e-05, + "loss": 0.1669, + "step": 13385 + }, + { + "epoch": 0.62, + "learning_rate": 1.805934820434844e-05, + "loss": 0.3543, + "step": 13390 + }, + { + "epoch": 0.63, + "learning_rate": 1.8058564419293654e-05, + "loss": 0.291, + "step": 13395 + }, + { + "epoch": 0.63, + "learning_rate": 1.8057780634238868e-05, + "loss": 0.4537, + "step": 13400 + }, + { + "epoch": 0.63, + "learning_rate": 1.805699684918408e-05, + "loss": 0.2838, + "step": 13405 + }, + { + "epoch": 0.63, + "learning_rate": 1.8056213064129295e-05, + "loss": 0.101, + "step": 13410 + }, + { + "epoch": 0.63, + "learning_rate": 1.805542927907451e-05, + "loss": 0.109, + "step": 13415 + }, + { + "epoch": 0.63, + "learning_rate": 1.8054645494019723e-05, + "loss": 0.13, + "step": 13420 + }, + { + "epoch": 0.63, + "learning_rate": 1.8053861708964934e-05, + "loss": 0.1621, + "step": 13425 + }, + { + "epoch": 0.63, + "learning_rate": 1.805307792391015e-05, + "loss": 0.2074, + "step": 13430 + }, + { + "epoch": 0.63, + "learning_rate": 1.805229413885536e-05, + "loss": 0.2683, + "step": 13435 + }, + { + "epoch": 0.63, + "learning_rate": 1.8051510353800575e-05, + "loss": 0.265, + "step": 13440 + }, + { + "epoch": 0.63, + "learning_rate": 1.805072656874579e-05, + "loss": 0.4351, + "step": 13445 + }, + { + "epoch": 0.63, + "learning_rate": 1.8049942783691e-05, + "loss": 0.7304, + "step": 13450 + }, + { + "epoch": 0.63, + "learning_rate": 1.8049158998636217e-05, + "loss": 0.1502, + "step": 13455 + }, + { + "epoch": 0.63, + "learning_rate": 1.8048375213581428e-05, + "loss": 0.0326, + "step": 13460 + }, + { + "epoch": 0.63, + "learning_rate": 1.804759142852664e-05, + "loss": 0.0823, + "step": 13465 + }, + { + "epoch": 0.63, + "learning_rate": 1.8046807643471855e-05, + "loss": 0.1804, + "step": 13470 + }, + { + "epoch": 0.63, + "learning_rate": 1.804602385841707e-05, + "loss": 0.1101, + "step": 13475 + }, + { + "epoch": 0.63, + "learning_rate": 1.8045240073362283e-05, + "loss": 0.2353, + "step": 13480 + }, + { + "epoch": 0.63, + "learning_rate": 1.8044456288307497e-05, + "loss": 0.2658, + "step": 13485 + }, + { + "epoch": 0.63, + "learning_rate": 1.804367250325271e-05, + "loss": 0.2739, + "step": 13490 + }, + { + "epoch": 0.63, + "learning_rate": 1.8042888718197925e-05, + "loss": 0.3481, + "step": 13495 + }, + { + "epoch": 0.63, + "learning_rate": 1.8042104933143135e-05, + "loss": 0.4943, + "step": 13500 + }, + { + "epoch": 0.63, + "learning_rate": 1.804132114808835e-05, + "loss": 0.2629, + "step": 13505 + }, + { + "epoch": 0.63, + "learning_rate": 1.8040537363033563e-05, + "loss": 0.0884, + "step": 13510 + }, + { + "epoch": 0.63, + "learning_rate": 1.8039753577978777e-05, + "loss": 0.0984, + "step": 13515 + }, + { + "epoch": 0.63, + "learning_rate": 1.803896979292399e-05, + "loss": 0.0861, + "step": 13520 + }, + { + "epoch": 0.63, + "learning_rate": 1.80381860078692e-05, + "loss": 0.1505, + "step": 13525 + }, + { + "epoch": 0.63, + "learning_rate": 1.803740222281442e-05, + "loss": 0.2409, + "step": 13530 + }, + { + "epoch": 0.63, + "learning_rate": 1.803661843775963e-05, + "loss": 0.2881, + "step": 13535 + }, + { + "epoch": 0.63, + "learning_rate": 1.8035834652704843e-05, + "loss": 0.2466, + "step": 13540 + }, + { + "epoch": 0.63, + "learning_rate": 1.8035050867650057e-05, + "loss": 0.3239, + "step": 13545 + }, + { + "epoch": 0.63, + "learning_rate": 1.803426708259527e-05, + "loss": 0.4168, + "step": 13550 + }, + { + "epoch": 0.63, + "learning_rate": 1.8033483297540485e-05, + "loss": 0.2401, + "step": 13555 + }, + { + "epoch": 0.63, + "learning_rate": 1.80326995124857e-05, + "loss": 0.1225, + "step": 13560 + }, + { + "epoch": 0.63, + "learning_rate": 1.803191572743091e-05, + "loss": 0.0936, + "step": 13565 + }, + { + "epoch": 0.63, + "learning_rate": 1.8031131942376123e-05, + "loss": 0.1213, + "step": 13570 + }, + { + "epoch": 0.63, + "learning_rate": 1.8030348157321337e-05, + "loss": 0.2741, + "step": 13575 + }, + { + "epoch": 0.63, + "learning_rate": 1.802956437226655e-05, + "loss": 0.2251, + "step": 13580 + }, + { + "epoch": 0.63, + "learning_rate": 1.8028780587211765e-05, + "loss": 0.2146, + "step": 13585 + }, + { + "epoch": 0.63, + "learning_rate": 1.802799680215698e-05, + "loss": 0.323, + "step": 13590 + }, + { + "epoch": 0.63, + "learning_rate": 1.8027213017102193e-05, + "loss": 0.2774, + "step": 13595 + }, + { + "epoch": 0.63, + "learning_rate": 1.8026429232047403e-05, + "loss": 0.4286, + "step": 13600 + }, + { + "epoch": 0.63, + "learning_rate": 1.8025645446992617e-05, + "loss": 0.2137, + "step": 13605 + }, + { + "epoch": 0.64, + "learning_rate": 1.802486166193783e-05, + "loss": 0.1228, + "step": 13610 + }, + { + "epoch": 0.64, + "learning_rate": 1.8024077876883045e-05, + "loss": 0.1267, + "step": 13615 + }, + { + "epoch": 0.64, + "learning_rate": 1.802329409182826e-05, + "loss": 0.1645, + "step": 13620 + }, + { + "epoch": 0.64, + "learning_rate": 1.8022510306773473e-05, + "loss": 0.2046, + "step": 13625 + }, + { + "epoch": 0.64, + "learning_rate": 1.8021726521718687e-05, + "loss": 0.2009, + "step": 13630 + }, + { + "epoch": 0.64, + "learning_rate": 1.8020942736663897e-05, + "loss": 0.2249, + "step": 13635 + }, + { + "epoch": 0.64, + "learning_rate": 1.802015895160911e-05, + "loss": 0.2867, + "step": 13640 + }, + { + "epoch": 0.64, + "learning_rate": 1.8019375166554325e-05, + "loss": 0.3413, + "step": 13645 + }, + { + "epoch": 0.64, + "learning_rate": 1.801859138149954e-05, + "loss": 0.7257, + "step": 13650 + }, + { + "epoch": 0.64, + "learning_rate": 1.8017807596444753e-05, + "loss": 0.1693, + "step": 13655 + }, + { + "epoch": 0.64, + "learning_rate": 1.8017023811389967e-05, + "loss": 0.0749, + "step": 13660 + }, + { + "epoch": 0.64, + "learning_rate": 1.8016240026335177e-05, + "loss": 0.1104, + "step": 13665 + }, + { + "epoch": 0.64, + "learning_rate": 1.8015456241280395e-05, + "loss": 0.1834, + "step": 13670 + }, + { + "epoch": 0.64, + "learning_rate": 1.8014672456225605e-05, + "loss": 0.1609, + "step": 13675 + }, + { + "epoch": 0.64, + "learning_rate": 1.801388867117082e-05, + "loss": 0.2899, + "step": 13680 + }, + { + "epoch": 0.64, + "learning_rate": 1.8013104886116033e-05, + "loss": 0.1833, + "step": 13685 + }, + { + "epoch": 0.64, + "learning_rate": 1.8012321101061247e-05, + "loss": 0.3229, + "step": 13690 + }, + { + "epoch": 0.64, + "learning_rate": 1.801153731600646e-05, + "loss": 0.34, + "step": 13695 + }, + { + "epoch": 0.64, + "learning_rate": 1.801075353095167e-05, + "loss": 0.7479, + "step": 13700 + }, + { + "epoch": 0.64, + "learning_rate": 1.8009969745896885e-05, + "loss": 0.2743, + "step": 13705 + }, + { + "epoch": 0.64, + "learning_rate": 1.80091859608421e-05, + "loss": 0.0687, + "step": 13710 + }, + { + "epoch": 0.64, + "learning_rate": 1.8008402175787313e-05, + "loss": 0.1244, + "step": 13715 + }, + { + "epoch": 0.64, + "learning_rate": 1.8007618390732527e-05, + "loss": 0.0964, + "step": 13720 + }, + { + "epoch": 0.64, + "learning_rate": 1.800683460567774e-05, + "loss": 0.2067, + "step": 13725 + }, + { + "epoch": 0.64, + "learning_rate": 1.8006050820622955e-05, + "loss": 0.2851, + "step": 13730 + }, + { + "epoch": 0.64, + "learning_rate": 1.800526703556817e-05, + "loss": 0.145, + "step": 13735 + }, + { + "epoch": 0.64, + "learning_rate": 1.800448325051338e-05, + "loss": 0.314, + "step": 13740 + }, + { + "epoch": 0.64, + "learning_rate": 1.8003699465458597e-05, + "loss": 0.3162, + "step": 13745 + }, + { + "epoch": 0.64, + "learning_rate": 1.8002915680403807e-05, + "loss": 0.6037, + "step": 13750 + }, + { + "epoch": 0.64, + "learning_rate": 1.800213189534902e-05, + "loss": 0.2893, + "step": 13755 + }, + { + "epoch": 0.64, + "learning_rate": 1.8001348110294235e-05, + "loss": 0.1066, + "step": 13760 + }, + { + "epoch": 0.64, + "learning_rate": 1.8000564325239445e-05, + "loss": 0.1369, + "step": 13765 + }, + { + "epoch": 0.64, + "learning_rate": 1.7999780540184663e-05, + "loss": 0.119, + "step": 13770 + }, + { + "epoch": 0.64, + "learning_rate": 1.7998996755129873e-05, + "loss": 0.1651, + "step": 13775 + }, + { + "epoch": 0.64, + "learning_rate": 1.7998212970075087e-05, + "loss": 0.1298, + "step": 13780 + }, + { + "epoch": 0.64, + "learning_rate": 1.79974291850203e-05, + "loss": 0.3191, + "step": 13785 + }, + { + "epoch": 0.64, + "learning_rate": 1.7996645399965515e-05, + "loss": 0.2348, + "step": 13790 + }, + { + "epoch": 0.64, + "learning_rate": 1.799586161491073e-05, + "loss": 0.2823, + "step": 13795 + }, + { + "epoch": 0.64, + "learning_rate": 1.7995077829855943e-05, + "loss": 0.7438, + "step": 13800 + }, + { + "epoch": 0.64, + "learning_rate": 1.7994294044801157e-05, + "loss": 0.1944, + "step": 13805 + }, + { + "epoch": 0.64, + "learning_rate": 1.799351025974637e-05, + "loss": 0.0748, + "step": 13810 + }, + { + "epoch": 0.64, + "learning_rate": 1.799272647469158e-05, + "loss": 0.1483, + "step": 13815 + }, + { + "epoch": 0.64, + "learning_rate": 1.7991942689636795e-05, + "loss": 0.1346, + "step": 13820 + }, + { + "epoch": 0.65, + "learning_rate": 1.799115890458201e-05, + "loss": 0.1945, + "step": 13825 + }, + { + "epoch": 0.65, + "learning_rate": 1.7990375119527223e-05, + "loss": 0.2159, + "step": 13830 + }, + { + "epoch": 0.65, + "learning_rate": 1.7989591334472437e-05, + "loss": 0.2135, + "step": 13835 + }, + { + "epoch": 0.65, + "learning_rate": 1.7988807549417647e-05, + "loss": 0.3681, + "step": 13840 + }, + { + "epoch": 0.65, + "learning_rate": 1.7988023764362864e-05, + "loss": 0.3707, + "step": 13845 + }, + { + "epoch": 0.65, + "learning_rate": 1.7987239979308075e-05, + "loss": 0.7777, + "step": 13850 + }, + { + "epoch": 0.65, + "learning_rate": 1.798645619425329e-05, + "loss": 0.322, + "step": 13855 + }, + { + "epoch": 0.65, + "learning_rate": 1.7985672409198503e-05, + "loss": 0.0827, + "step": 13860 + }, + { + "epoch": 0.65, + "learning_rate": 1.7984888624143717e-05, + "loss": 0.1055, + "step": 13865 + }, + { + "epoch": 0.65, + "learning_rate": 1.798410483908893e-05, + "loss": 0.1698, + "step": 13870 + }, + { + "epoch": 0.65, + "learning_rate": 1.7983321054034145e-05, + "loss": 0.2299, + "step": 13875 + }, + { + "epoch": 0.65, + "learning_rate": 1.7982537268979355e-05, + "loss": 0.1941, + "step": 13880 + }, + { + "epoch": 0.65, + "learning_rate": 1.7981753483924572e-05, + "loss": 0.2909, + "step": 13885 + }, + { + "epoch": 0.65, + "learning_rate": 1.7980969698869783e-05, + "loss": 0.3056, + "step": 13890 + }, + { + "epoch": 0.65, + "learning_rate": 1.7980185913814997e-05, + "loss": 0.3902, + "step": 13895 + }, + { + "epoch": 0.65, + "learning_rate": 1.797940212876021e-05, + "loss": 0.6766, + "step": 13900 + }, + { + "epoch": 0.65, + "learning_rate": 1.7978618343705425e-05, + "loss": 0.2698, + "step": 13905 + }, + { + "epoch": 0.65, + "learning_rate": 1.797783455865064e-05, + "loss": 0.1085, + "step": 13910 + }, + { + "epoch": 0.65, + "learning_rate": 1.797705077359585e-05, + "loss": 0.1044, + "step": 13915 + }, + { + "epoch": 0.65, + "learning_rate": 1.7976266988541063e-05, + "loss": 0.1361, + "step": 13920 + }, + { + "epoch": 0.65, + "learning_rate": 1.7975483203486277e-05, + "loss": 0.1876, + "step": 13925 + }, + { + "epoch": 0.65, + "learning_rate": 1.797469941843149e-05, + "loss": 0.1581, + "step": 13930 + }, + { + "epoch": 0.65, + "learning_rate": 1.7973915633376705e-05, + "loss": 0.2839, + "step": 13935 + }, + { + "epoch": 0.65, + "learning_rate": 1.797313184832192e-05, + "loss": 0.1772, + "step": 13940 + }, + { + "epoch": 0.65, + "learning_rate": 1.7972348063267132e-05, + "loss": 0.3157, + "step": 13945 + }, + { + "epoch": 0.65, + "learning_rate": 1.7971564278212346e-05, + "loss": 0.5271, + "step": 13950 + }, + { + "epoch": 0.65, + "learning_rate": 1.7970780493157557e-05, + "loss": 0.2653, + "step": 13955 + }, + { + "epoch": 0.65, + "learning_rate": 1.796999670810277e-05, + "loss": 0.0371, + "step": 13960 + }, + { + "epoch": 0.65, + "learning_rate": 1.7969212923047985e-05, + "loss": 0.077, + "step": 13965 + }, + { + "epoch": 0.65, + "learning_rate": 1.79684291379932e-05, + "loss": 0.1503, + "step": 13970 + }, + { + "epoch": 0.65, + "learning_rate": 1.7967645352938412e-05, + "loss": 0.1859, + "step": 13975 + }, + { + "epoch": 0.65, + "learning_rate": 1.7966861567883623e-05, + "loss": 0.1487, + "step": 13980 + }, + { + "epoch": 0.65, + "learning_rate": 1.796607778282884e-05, + "loss": 0.2365, + "step": 13985 + }, + { + "epoch": 0.65, + "learning_rate": 1.796529399777405e-05, + "loss": 0.2422, + "step": 13990 + }, + { + "epoch": 0.65, + "learning_rate": 1.7964510212719265e-05, + "loss": 0.3292, + "step": 13995 + }, + { + "epoch": 0.65, + "learning_rate": 1.796372642766448e-05, + "loss": 0.6147, + "step": 14000 + }, + { + "epoch": 0.65, + "learning_rate": 1.7962942642609693e-05, + "loss": 0.222, + "step": 14005 + }, + { + "epoch": 0.65, + "learning_rate": 1.7962158857554906e-05, + "loss": 0.1412, + "step": 14010 + }, + { + "epoch": 0.65, + "learning_rate": 1.796137507250012e-05, + "loss": 0.1588, + "step": 14015 + }, + { + "epoch": 0.65, + "learning_rate": 1.7960591287445334e-05, + "loss": 0.1609, + "step": 14020 + }, + { + "epoch": 0.65, + "learning_rate": 1.7959807502390545e-05, + "loss": 0.1297, + "step": 14025 + }, + { + "epoch": 0.65, + "learning_rate": 1.795902371733576e-05, + "loss": 0.1735, + "step": 14030 + }, + { + "epoch": 0.65, + "learning_rate": 1.7958239932280973e-05, + "loss": 0.1816, + "step": 14035 + }, + { + "epoch": 0.66, + "learning_rate": 1.7957456147226186e-05, + "loss": 0.2098, + "step": 14040 + }, + { + "epoch": 0.66, + "learning_rate": 1.79566723621714e-05, + "loss": 0.3348, + "step": 14045 + }, + { + "epoch": 0.66, + "learning_rate": 1.7955888577116614e-05, + "loss": 0.534, + "step": 14050 + }, + { + "epoch": 0.66, + "learning_rate": 1.7955104792061825e-05, + "loss": 0.2183, + "step": 14055 + }, + { + "epoch": 0.66, + "learning_rate": 1.7954321007007042e-05, + "loss": 0.0817, + "step": 14060 + }, + { + "epoch": 0.66, + "learning_rate": 1.7953537221952253e-05, + "loss": 0.0821, + "step": 14065 + }, + { + "epoch": 0.66, + "learning_rate": 1.7952753436897467e-05, + "loss": 0.1166, + "step": 14070 + }, + { + "epoch": 0.66, + "learning_rate": 1.795196965184268e-05, + "loss": 0.2538, + "step": 14075 + }, + { + "epoch": 0.66, + "learning_rate": 1.7951185866787894e-05, + "loss": 0.1809, + "step": 14080 + }, + { + "epoch": 0.66, + "learning_rate": 1.7950402081733108e-05, + "loss": 0.2724, + "step": 14085 + }, + { + "epoch": 0.66, + "learning_rate": 1.794961829667832e-05, + "loss": 0.2887, + "step": 14090 + }, + { + "epoch": 0.66, + "learning_rate": 1.7948834511623533e-05, + "loss": 0.4142, + "step": 14095 + }, + { + "epoch": 0.66, + "learning_rate": 1.7948050726568747e-05, + "loss": 0.6192, + "step": 14100 + }, + { + "epoch": 0.66, + "learning_rate": 1.794726694151396e-05, + "loss": 0.251, + "step": 14105 + }, + { + "epoch": 0.66, + "learning_rate": 1.7946483156459174e-05, + "loss": 0.0909, + "step": 14110 + }, + { + "epoch": 0.66, + "learning_rate": 1.7945699371404388e-05, + "loss": 0.1046, + "step": 14115 + }, + { + "epoch": 0.66, + "learning_rate": 1.7944915586349602e-05, + "loss": 0.0959, + "step": 14120 + }, + { + "epoch": 0.66, + "learning_rate": 1.7944131801294816e-05, + "loss": 0.1656, + "step": 14125 + }, + { + "epoch": 0.66, + "learning_rate": 1.7943348016240027e-05, + "loss": 0.2122, + "step": 14130 + }, + { + "epoch": 0.66, + "learning_rate": 1.794256423118524e-05, + "loss": 0.2179, + "step": 14135 + }, + { + "epoch": 0.66, + "learning_rate": 1.7941780446130454e-05, + "loss": 0.2127, + "step": 14140 + }, + { + "epoch": 0.66, + "learning_rate": 1.794099666107567e-05, + "loss": 0.3221, + "step": 14145 + }, + { + "epoch": 0.66, + "learning_rate": 1.7940212876020882e-05, + "loss": 0.6101, + "step": 14150 + }, + { + "epoch": 0.66, + "learning_rate": 1.7939429090966093e-05, + "loss": 0.294, + "step": 14155 + }, + { + "epoch": 0.66, + "learning_rate": 1.793864530591131e-05, + "loss": 0.1212, + "step": 14160 + }, + { + "epoch": 0.66, + "learning_rate": 1.793786152085652e-05, + "loss": 0.1184, + "step": 14165 + }, + { + "epoch": 0.66, + "learning_rate": 1.7937077735801734e-05, + "loss": 0.1062, + "step": 14170 + }, + { + "epoch": 0.66, + "learning_rate": 1.793629395074695e-05, + "loss": 0.1962, + "step": 14175 + }, + { + "epoch": 0.66, + "learning_rate": 1.7935510165692162e-05, + "loss": 0.1837, + "step": 14180 + }, + { + "epoch": 0.66, + "learning_rate": 1.7934726380637376e-05, + "loss": 0.2212, + "step": 14185 + }, + { + "epoch": 0.66, + "learning_rate": 1.793394259558259e-05, + "loss": 0.3377, + "step": 14190 + }, + { + "epoch": 0.66, + "learning_rate": 1.79331588105278e-05, + "loss": 0.2935, + "step": 14195 + }, + { + "epoch": 0.66, + "learning_rate": 1.7932375025473018e-05, + "loss": 0.4795, + "step": 14200 + }, + { + "epoch": 0.66, + "learning_rate": 1.793159124041823e-05, + "loss": 0.2201, + "step": 14205 + }, + { + "epoch": 0.66, + "learning_rate": 1.7930807455363442e-05, + "loss": 0.134, + "step": 14210 + }, + { + "epoch": 0.66, + "learning_rate": 1.7930023670308656e-05, + "loss": 0.1086, + "step": 14215 + }, + { + "epoch": 0.66, + "learning_rate": 1.792923988525387e-05, + "loss": 0.1584, + "step": 14220 + }, + { + "epoch": 0.66, + "learning_rate": 1.7928456100199084e-05, + "loss": 0.1764, + "step": 14225 + }, + { + "epoch": 0.66, + "learning_rate": 1.7927672315144295e-05, + "loss": 0.1776, + "step": 14230 + }, + { + "epoch": 0.66, + "learning_rate": 1.792688853008951e-05, + "loss": 0.1828, + "step": 14235 + }, + { + "epoch": 0.66, + "learning_rate": 1.7926104745034722e-05, + "loss": 0.2791, + "step": 14240 + }, + { + "epoch": 0.66, + "learning_rate": 1.7925320959979936e-05, + "loss": 0.3181, + "step": 14245 + }, + { + "epoch": 0.66, + "learning_rate": 1.792453717492515e-05, + "loss": 0.8255, + "step": 14250 + }, + { + "epoch": 0.67, + "learning_rate": 1.7923753389870364e-05, + "loss": 0.321, + "step": 14255 + }, + { + "epoch": 0.67, + "learning_rate": 1.7922969604815578e-05, + "loss": 0.0898, + "step": 14260 + }, + { + "epoch": 0.67, + "learning_rate": 1.7922185819760792e-05, + "loss": 0.1708, + "step": 14265 + }, + { + "epoch": 0.67, + "learning_rate": 1.7921402034706002e-05, + "loss": 0.1316, + "step": 14270 + }, + { + "epoch": 0.67, + "learning_rate": 1.792061824965122e-05, + "loss": 0.1778, + "step": 14275 + }, + { + "epoch": 0.67, + "learning_rate": 1.791983446459643e-05, + "loss": 0.168, + "step": 14280 + }, + { + "epoch": 0.67, + "learning_rate": 1.7919050679541644e-05, + "loss": 0.291, + "step": 14285 + }, + { + "epoch": 0.67, + "learning_rate": 1.7918266894486858e-05, + "loss": 0.2596, + "step": 14290 + }, + { + "epoch": 0.67, + "learning_rate": 1.791748310943207e-05, + "loss": 0.2771, + "step": 14295 + }, + { + "epoch": 0.67, + "learning_rate": 1.7916699324377286e-05, + "loss": 0.6026, + "step": 14300 + }, + { + "epoch": 0.67, + "learning_rate": 1.7915915539322496e-05, + "loss": 0.2228, + "step": 14305 + }, + { + "epoch": 0.67, + "learning_rate": 1.791513175426771e-05, + "loss": 0.0702, + "step": 14310 + }, + { + "epoch": 0.67, + "learning_rate": 1.7914347969212924e-05, + "loss": 0.164, + "step": 14315 + }, + { + "epoch": 0.67, + "learning_rate": 1.7913564184158138e-05, + "loss": 0.1234, + "step": 14320 + }, + { + "epoch": 0.67, + "learning_rate": 1.7912780399103352e-05, + "loss": 0.1424, + "step": 14325 + }, + { + "epoch": 0.67, + "learning_rate": 1.7911996614048566e-05, + "loss": 0.1952, + "step": 14330 + }, + { + "epoch": 0.67, + "learning_rate": 1.791121282899378e-05, + "loss": 0.233, + "step": 14335 + }, + { + "epoch": 0.67, + "learning_rate": 1.7910429043938994e-05, + "loss": 0.2717, + "step": 14340 + }, + { + "epoch": 0.67, + "learning_rate": 1.7909645258884204e-05, + "loss": 0.2716, + "step": 14345 + }, + { + "epoch": 0.67, + "learning_rate": 1.7908861473829418e-05, + "loss": 0.5792, + "step": 14350 + }, + { + "epoch": 0.67, + "learning_rate": 1.7908077688774632e-05, + "loss": 0.1712, + "step": 14355 + }, + { + "epoch": 0.67, + "learning_rate": 1.7907293903719846e-05, + "loss": 0.0798, + "step": 14360 + }, + { + "epoch": 0.67, + "learning_rate": 1.790651011866506e-05, + "loss": 0.137, + "step": 14365 + }, + { + "epoch": 0.67, + "learning_rate": 1.790572633361027e-05, + "loss": 0.1375, + "step": 14370 + }, + { + "epoch": 0.67, + "learning_rate": 1.7904942548555488e-05, + "loss": 0.2504, + "step": 14375 + }, + { + "epoch": 0.67, + "learning_rate": 1.7904158763500698e-05, + "loss": 0.2304, + "step": 14380 + }, + { + "epoch": 0.67, + "learning_rate": 1.7903374978445912e-05, + "loss": 0.2548, + "step": 14385 + }, + { + "epoch": 0.67, + "learning_rate": 1.7902591193391126e-05, + "loss": 0.3049, + "step": 14390 + }, + { + "epoch": 0.67, + "learning_rate": 1.790180740833634e-05, + "loss": 0.4073, + "step": 14395 + }, + { + "epoch": 0.67, + "learning_rate": 1.7901023623281554e-05, + "loss": 0.5195, + "step": 14400 + }, + { + "epoch": 0.67, + "learning_rate": 1.7900239838226768e-05, + "loss": 0.267, + "step": 14405 + }, + { + "epoch": 0.67, + "learning_rate": 1.7899456053171978e-05, + "loss": 0.0918, + "step": 14410 + }, + { + "epoch": 0.67, + "learning_rate": 1.7898672268117192e-05, + "loss": 0.1025, + "step": 14415 + }, + { + "epoch": 0.67, + "learning_rate": 1.7897888483062406e-05, + "loss": 0.1415, + "step": 14420 + }, + { + "epoch": 0.67, + "learning_rate": 1.789710469800762e-05, + "loss": 0.1369, + "step": 14425 + }, + { + "epoch": 0.67, + "learning_rate": 1.7896320912952834e-05, + "loss": 0.2144, + "step": 14430 + }, + { + "epoch": 0.67, + "learning_rate": 1.7895537127898048e-05, + "loss": 0.1942, + "step": 14435 + }, + { + "epoch": 0.67, + "learning_rate": 1.789475334284326e-05, + "loss": 0.2313, + "step": 14440 + }, + { + "epoch": 0.67, + "learning_rate": 1.7893969557788472e-05, + "loss": 0.4069, + "step": 14445 + }, + { + "epoch": 0.67, + "learning_rate": 1.7893185772733686e-05, + "loss": 0.6075, + "step": 14450 + }, + { + "epoch": 0.67, + "learning_rate": 1.78924019876789e-05, + "loss": 0.2016, + "step": 14455 + }, + { + "epoch": 0.67, + "learning_rate": 1.7891618202624114e-05, + "loss": 0.0962, + "step": 14460 + }, + { + "epoch": 0.67, + "learning_rate": 1.7890834417569328e-05, + "loss": 0.1401, + "step": 14465 + }, + { + "epoch": 0.68, + "learning_rate": 1.7890050632514542e-05, + "loss": 0.1746, + "step": 14470 + }, + { + "epoch": 0.68, + "learning_rate": 1.7889266847459756e-05, + "loss": 0.2198, + "step": 14475 + }, + { + "epoch": 0.68, + "learning_rate": 1.7888483062404966e-05, + "loss": 0.1788, + "step": 14480 + }, + { + "epoch": 0.68, + "learning_rate": 1.788769927735018e-05, + "loss": 0.3015, + "step": 14485 + }, + { + "epoch": 0.68, + "learning_rate": 1.7886915492295394e-05, + "loss": 0.336, + "step": 14490 + }, + { + "epoch": 0.68, + "learning_rate": 1.7886131707240608e-05, + "loss": 0.399, + "step": 14495 + }, + { + "epoch": 0.68, + "learning_rate": 1.7885347922185822e-05, + "loss": 0.5129, + "step": 14500 + }, + { + "epoch": 0.68, + "learning_rate": 1.7884564137131036e-05, + "loss": 0.2472, + "step": 14505 + }, + { + "epoch": 0.68, + "learning_rate": 1.7883780352076246e-05, + "loss": 0.0997, + "step": 14510 + }, + { + "epoch": 0.68, + "learning_rate": 1.7882996567021463e-05, + "loss": 0.151, + "step": 14515 + }, + { + "epoch": 0.68, + "learning_rate": 1.7882212781966674e-05, + "loss": 0.1149, + "step": 14520 + }, + { + "epoch": 0.68, + "learning_rate": 1.7881428996911888e-05, + "loss": 0.1079, + "step": 14525 + }, + { + "epoch": 0.68, + "learning_rate": 1.7880645211857102e-05, + "loss": 0.1688, + "step": 14530 + }, + { + "epoch": 0.68, + "learning_rate": 1.7879861426802316e-05, + "loss": 0.2422, + "step": 14535 + }, + { + "epoch": 0.68, + "learning_rate": 1.787907764174753e-05, + "loss": 0.256, + "step": 14540 + }, + { + "epoch": 0.68, + "learning_rate": 1.787829385669274e-05, + "loss": 0.2604, + "step": 14545 + }, + { + "epoch": 0.68, + "learning_rate": 1.7877510071637954e-05, + "loss": 0.4361, + "step": 14550 + }, + { + "epoch": 0.68, + "learning_rate": 1.7876726286583168e-05, + "loss": 0.3397, + "step": 14555 + }, + { + "epoch": 0.68, + "learning_rate": 1.7875942501528382e-05, + "loss": 0.0841, + "step": 14560 + }, + { + "epoch": 0.68, + "learning_rate": 1.7875158716473596e-05, + "loss": 0.0836, + "step": 14565 + }, + { + "epoch": 0.68, + "learning_rate": 1.787437493141881e-05, + "loss": 0.1452, + "step": 14570 + }, + { + "epoch": 0.68, + "learning_rate": 1.7873591146364024e-05, + "loss": 0.1594, + "step": 14575 + }, + { + "epoch": 0.68, + "learning_rate": 1.7872807361309237e-05, + "loss": 0.2242, + "step": 14580 + }, + { + "epoch": 0.68, + "learning_rate": 1.7872023576254448e-05, + "loss": 0.2338, + "step": 14585 + }, + { + "epoch": 0.68, + "learning_rate": 1.7871239791199665e-05, + "loss": 0.218, + "step": 14590 + }, + { + "epoch": 0.68, + "learning_rate": 1.7870456006144876e-05, + "loss": 0.4391, + "step": 14595 + }, + { + "epoch": 0.68, + "learning_rate": 1.786967222109009e-05, + "loss": 0.5228, + "step": 14600 + }, + { + "epoch": 0.68, + "learning_rate": 1.7868888436035304e-05, + "loss": 0.2472, + "step": 14605 + }, + { + "epoch": 0.68, + "learning_rate": 1.7868104650980514e-05, + "loss": 0.0867, + "step": 14610 + }, + { + "epoch": 0.68, + "learning_rate": 1.786732086592573e-05, + "loss": 0.1296, + "step": 14615 + }, + { + "epoch": 0.68, + "learning_rate": 1.7866537080870942e-05, + "loss": 0.1691, + "step": 14620 + }, + { + "epoch": 0.68, + "learning_rate": 1.7865753295816156e-05, + "loss": 0.2125, + "step": 14625 + }, + { + "epoch": 0.68, + "learning_rate": 1.786496951076137e-05, + "loss": 0.2022, + "step": 14630 + }, + { + "epoch": 0.68, + "learning_rate": 1.7864185725706584e-05, + "loss": 0.1817, + "step": 14635 + }, + { + "epoch": 0.68, + "learning_rate": 1.7863401940651798e-05, + "loss": 0.3351, + "step": 14640 + }, + { + "epoch": 0.68, + "learning_rate": 1.786261815559701e-05, + "loss": 0.3566, + "step": 14645 + }, + { + "epoch": 0.68, + "learning_rate": 1.7861834370542225e-05, + "loss": 0.6346, + "step": 14650 + }, + { + "epoch": 0.68, + "learning_rate": 1.786105058548744e-05, + "loss": 0.2488, + "step": 14655 + }, + { + "epoch": 0.68, + "learning_rate": 1.786026680043265e-05, + "loss": 0.0688, + "step": 14660 + }, + { + "epoch": 0.68, + "learning_rate": 1.7859483015377864e-05, + "loss": 0.1261, + "step": 14665 + }, + { + "epoch": 0.68, + "learning_rate": 1.7858699230323078e-05, + "loss": 0.1552, + "step": 14670 + }, + { + "epoch": 0.68, + "learning_rate": 1.785791544526829e-05, + "loss": 0.1928, + "step": 14675 + }, + { + "epoch": 0.68, + "learning_rate": 1.7857131660213505e-05, + "loss": 0.1883, + "step": 14680 + }, + { + "epoch": 0.69, + "learning_rate": 1.7856347875158716e-05, + "loss": 0.2545, + "step": 14685 + }, + { + "epoch": 0.69, + "learning_rate": 1.7855564090103933e-05, + "loss": 0.3732, + "step": 14690 + }, + { + "epoch": 0.69, + "learning_rate": 1.7854780305049144e-05, + "loss": 0.3061, + "step": 14695 + }, + { + "epoch": 0.69, + "learning_rate": 1.7853996519994358e-05, + "loss": 0.5844, + "step": 14700 + }, + { + "epoch": 0.69, + "learning_rate": 1.785321273493957e-05, + "loss": 0.2783, + "step": 14705 + }, + { + "epoch": 0.69, + "learning_rate": 1.7852428949884785e-05, + "loss": 0.1046, + "step": 14710 + }, + { + "epoch": 0.69, + "learning_rate": 1.785164516483e-05, + "loss": 0.1335, + "step": 14715 + }, + { + "epoch": 0.69, + "learning_rate": 1.7850861379775213e-05, + "loss": 0.1018, + "step": 14720 + }, + { + "epoch": 0.69, + "learning_rate": 1.7850077594720424e-05, + "loss": 0.1993, + "step": 14725 + }, + { + "epoch": 0.69, + "learning_rate": 1.784929380966564e-05, + "loss": 0.1792, + "step": 14730 + }, + { + "epoch": 0.69, + "learning_rate": 1.784851002461085e-05, + "loss": 0.2488, + "step": 14735 + }, + { + "epoch": 0.69, + "learning_rate": 1.7847726239556066e-05, + "loss": 0.247, + "step": 14740 + }, + { + "epoch": 0.69, + "learning_rate": 1.784694245450128e-05, + "loss": 0.4897, + "step": 14745 + }, + { + "epoch": 0.69, + "learning_rate": 1.7846158669446493e-05, + "loss": 0.4311, + "step": 14750 + }, + { + "epoch": 0.69, + "learning_rate": 1.7845374884391707e-05, + "loss": 0.2626, + "step": 14755 + }, + { + "epoch": 0.69, + "learning_rate": 1.7844591099336918e-05, + "loss": 0.069, + "step": 14760 + }, + { + "epoch": 0.69, + "learning_rate": 1.784380731428213e-05, + "loss": 0.1219, + "step": 14765 + }, + { + "epoch": 0.69, + "learning_rate": 1.7843023529227346e-05, + "loss": 0.1279, + "step": 14770 + }, + { + "epoch": 0.69, + "learning_rate": 1.784223974417256e-05, + "loss": 0.1779, + "step": 14775 + }, + { + "epoch": 0.69, + "learning_rate": 1.7841455959117773e-05, + "loss": 0.2421, + "step": 14780 + }, + { + "epoch": 0.69, + "learning_rate": 1.7840672174062987e-05, + "loss": 0.2912, + "step": 14785 + }, + { + "epoch": 0.69, + "learning_rate": 1.78398883890082e-05, + "loss": 0.2462, + "step": 14790 + }, + { + "epoch": 0.69, + "learning_rate": 1.7839104603953415e-05, + "loss": 0.3095, + "step": 14795 + }, + { + "epoch": 0.69, + "learning_rate": 1.7838320818898626e-05, + "loss": 0.5914, + "step": 14800 + }, + { + "epoch": 0.69, + "learning_rate": 1.783753703384384e-05, + "loss": 0.2444, + "step": 14805 + }, + { + "epoch": 0.69, + "learning_rate": 1.7836753248789053e-05, + "loss": 0.0887, + "step": 14810 + }, + { + "epoch": 0.69, + "learning_rate": 1.7835969463734267e-05, + "loss": 0.1362, + "step": 14815 + }, + { + "epoch": 0.69, + "learning_rate": 1.783518567867948e-05, + "loss": 0.0878, + "step": 14820 + }, + { + "epoch": 0.69, + "learning_rate": 1.7834401893624692e-05, + "loss": 0.1273, + "step": 14825 + }, + { + "epoch": 0.69, + "learning_rate": 1.783361810856991e-05, + "loss": 0.2425, + "step": 14830 + }, + { + "epoch": 0.69, + "learning_rate": 1.783283432351512e-05, + "loss": 0.275, + "step": 14835 + }, + { + "epoch": 0.69, + "learning_rate": 1.7832050538460333e-05, + "loss": 0.2317, + "step": 14840 + }, + { + "epoch": 0.69, + "learning_rate": 1.7831266753405547e-05, + "loss": 0.2663, + "step": 14845 + }, + { + "epoch": 0.69, + "learning_rate": 1.783048296835076e-05, + "loss": 0.2922, + "step": 14850 + }, + { + "epoch": 0.69, + "learning_rate": 1.7829699183295975e-05, + "loss": 0.2981, + "step": 14855 + }, + { + "epoch": 0.69, + "learning_rate": 1.782891539824119e-05, + "loss": 0.0931, + "step": 14860 + }, + { + "epoch": 0.69, + "learning_rate": 1.78281316131864e-05, + "loss": 0.0793, + "step": 14865 + }, + { + "epoch": 0.69, + "learning_rate": 1.7827347828131614e-05, + "loss": 0.1572, + "step": 14870 + }, + { + "epoch": 0.69, + "learning_rate": 1.7826564043076827e-05, + "loss": 0.2228, + "step": 14875 + }, + { + "epoch": 0.69, + "learning_rate": 1.782578025802204e-05, + "loss": 0.1472, + "step": 14880 + }, + { + "epoch": 0.69, + "learning_rate": 1.7824996472967255e-05, + "loss": 0.2983, + "step": 14885 + }, + { + "epoch": 0.69, + "learning_rate": 1.782421268791247e-05, + "loss": 0.3364, + "step": 14890 + }, + { + "epoch": 0.7, + "learning_rate": 1.7823428902857683e-05, + "loss": 0.4755, + "step": 14895 + }, + { + "epoch": 0.7, + "learning_rate": 1.7822645117802894e-05, + "loss": 0.5627, + "step": 14900 + }, + { + "epoch": 0.7, + "learning_rate": 1.782186133274811e-05, + "loss": 0.2746, + "step": 14905 + }, + { + "epoch": 0.7, + "learning_rate": 1.782107754769332e-05, + "loss": 0.0909, + "step": 14910 + }, + { + "epoch": 0.7, + "learning_rate": 1.7820293762638535e-05, + "loss": 0.1021, + "step": 14915 + }, + { + "epoch": 0.7, + "learning_rate": 1.781950997758375e-05, + "loss": 0.1913, + "step": 14920 + }, + { + "epoch": 0.7, + "learning_rate": 1.7818726192528963e-05, + "loss": 0.1024, + "step": 14925 + }, + { + "epoch": 0.7, + "learning_rate": 1.7817942407474177e-05, + "loss": 0.1359, + "step": 14930 + }, + { + "epoch": 0.7, + "learning_rate": 1.7817158622419388e-05, + "loss": 0.2782, + "step": 14935 + }, + { + "epoch": 0.7, + "learning_rate": 1.78163748373646e-05, + "loss": 0.3662, + "step": 14940 + }, + { + "epoch": 0.7, + "learning_rate": 1.7815591052309815e-05, + "loss": 0.3173, + "step": 14945 + }, + { + "epoch": 0.7, + "learning_rate": 1.781480726725503e-05, + "loss": 0.5629, + "step": 14950 + }, + { + "epoch": 0.7, + "learning_rate": 1.7814023482200243e-05, + "loss": 0.3126, + "step": 14955 + }, + { + "epoch": 0.7, + "learning_rate": 1.7813239697145457e-05, + "loss": 0.1257, + "step": 14960 + }, + { + "epoch": 0.7, + "learning_rate": 1.781245591209067e-05, + "loss": 0.102, + "step": 14965 + }, + { + "epoch": 0.7, + "learning_rate": 1.7811672127035885e-05, + "loss": 0.0812, + "step": 14970 + }, + { + "epoch": 0.7, + "learning_rate": 1.7810888341981095e-05, + "loss": 0.1951, + "step": 14975 + }, + { + "epoch": 0.7, + "learning_rate": 1.781010455692631e-05, + "loss": 0.2276, + "step": 14980 + }, + { + "epoch": 0.7, + "learning_rate": 1.7809320771871523e-05, + "loss": 0.2704, + "step": 14985 + }, + { + "epoch": 0.7, + "learning_rate": 1.7808536986816737e-05, + "loss": 0.2971, + "step": 14990 + }, + { + "epoch": 0.7, + "learning_rate": 1.780775320176195e-05, + "loss": 0.2102, + "step": 14995 + }, + { + "epoch": 0.7, + "learning_rate": 1.780696941670716e-05, + "loss": 0.4946, + "step": 15000 + }, + { + "epoch": 0.7, + "learning_rate": 1.780618563165238e-05, + "loss": 0.2092, + "step": 15005 + }, + { + "epoch": 0.7, + "learning_rate": 1.780540184659759e-05, + "loss": 0.0679, + "step": 15010 + }, + { + "epoch": 0.7, + "learning_rate": 1.7804618061542803e-05, + "loss": 0.1363, + "step": 15015 + }, + { + "epoch": 0.7, + "learning_rate": 1.7803834276488017e-05, + "loss": 0.1115, + "step": 15020 + }, + { + "epoch": 0.7, + "learning_rate": 1.780305049143323e-05, + "loss": 0.1363, + "step": 15025 + }, + { + "epoch": 0.7, + "learning_rate": 1.7802266706378445e-05, + "loss": 0.213, + "step": 15030 + }, + { + "epoch": 0.7, + "learning_rate": 1.780148292132366e-05, + "loss": 0.2516, + "step": 15035 + }, + { + "epoch": 0.7, + "learning_rate": 1.780069913626887e-05, + "loss": 0.3133, + "step": 15040 + }, + { + "epoch": 0.7, + "learning_rate": 1.7799915351214087e-05, + "loss": 0.3108, + "step": 15045 + }, + { + "epoch": 0.7, + "learning_rate": 1.7799131566159297e-05, + "loss": 0.4203, + "step": 15050 + }, + { + "epoch": 0.7, + "learning_rate": 1.779834778110451e-05, + "loss": 0.2288, + "step": 15055 + }, + { + "epoch": 0.7, + "learning_rate": 1.7797563996049725e-05, + "loss": 0.0967, + "step": 15060 + }, + { + "epoch": 0.7, + "learning_rate": 1.779678021099494e-05, + "loss": 0.1433, + "step": 15065 + }, + { + "epoch": 0.7, + "learning_rate": 1.7795996425940153e-05, + "loss": 0.1605, + "step": 15070 + }, + { + "epoch": 0.7, + "learning_rate": 1.7795212640885363e-05, + "loss": 0.1502, + "step": 15075 + }, + { + "epoch": 0.7, + "learning_rate": 1.7794428855830577e-05, + "loss": 0.2371, + "step": 15080 + }, + { + "epoch": 0.7, + "learning_rate": 1.779364507077579e-05, + "loss": 0.2546, + "step": 15085 + }, + { + "epoch": 0.7, + "learning_rate": 1.7792861285721005e-05, + "loss": 0.2453, + "step": 15090 + }, + { + "epoch": 0.7, + "learning_rate": 1.779207750066622e-05, + "loss": 0.3223, + "step": 15095 + }, + { + "epoch": 0.7, + "learning_rate": 1.7791293715611433e-05, + "loss": 0.5728, + "step": 15100 + }, + { + "epoch": 0.7, + "learning_rate": 1.7790509930556647e-05, + "loss": 0.1818, + "step": 15105 + }, + { + "epoch": 0.71, + "learning_rate": 1.778972614550186e-05, + "loss": 0.0977, + "step": 15110 + }, + { + "epoch": 0.71, + "learning_rate": 1.778894236044707e-05, + "loss": 0.0862, + "step": 15115 + }, + { + "epoch": 0.71, + "learning_rate": 1.778815857539229e-05, + "loss": 0.1162, + "step": 15120 + }, + { + "epoch": 0.71, + "learning_rate": 1.77873747903375e-05, + "loss": 0.1285, + "step": 15125 + }, + { + "epoch": 0.71, + "learning_rate": 1.7786591005282713e-05, + "loss": 0.1789, + "step": 15130 + }, + { + "epoch": 0.71, + "learning_rate": 1.7785807220227927e-05, + "loss": 0.1586, + "step": 15135 + }, + { + "epoch": 0.71, + "learning_rate": 1.7785023435173137e-05, + "loss": 0.2264, + "step": 15140 + }, + { + "epoch": 0.71, + "learning_rate": 1.7784239650118355e-05, + "loss": 0.3286, + "step": 15145 + }, + { + "epoch": 0.71, + "learning_rate": 1.7783455865063565e-05, + "loss": 0.5867, + "step": 15150 + }, + { + "epoch": 0.71, + "learning_rate": 1.778267208000878e-05, + "loss": 0.2071, + "step": 15155 + }, + { + "epoch": 0.71, + "learning_rate": 1.7781888294953993e-05, + "loss": 0.1373, + "step": 15160 + }, + { + "epoch": 0.71, + "learning_rate": 1.7781104509899207e-05, + "loss": 0.1818, + "step": 15165 + }, + { + "epoch": 0.71, + "learning_rate": 1.778032072484442e-05, + "loss": 0.1301, + "step": 15170 + }, + { + "epoch": 0.71, + "learning_rate": 1.7779536939789635e-05, + "loss": 0.161, + "step": 15175 + }, + { + "epoch": 0.71, + "learning_rate": 1.777875315473485e-05, + "loss": 0.3043, + "step": 15180 + }, + { + "epoch": 0.71, + "learning_rate": 1.7777969369680062e-05, + "loss": 0.3192, + "step": 15185 + }, + { + "epoch": 0.71, + "learning_rate": 1.7777185584625273e-05, + "loss": 0.263, + "step": 15190 + }, + { + "epoch": 0.71, + "learning_rate": 1.7776401799570487e-05, + "loss": 0.2928, + "step": 15195 + }, + { + "epoch": 0.71, + "learning_rate": 1.77756180145157e-05, + "loss": 0.866, + "step": 15200 + }, + { + "epoch": 0.71, + "learning_rate": 1.7774834229460915e-05, + "loss": 0.1849, + "step": 15205 + }, + { + "epoch": 0.71, + "learning_rate": 1.777405044440613e-05, + "loss": 0.0998, + "step": 15210 + }, + { + "epoch": 0.71, + "learning_rate": 1.777326665935134e-05, + "loss": 0.1643, + "step": 15215 + }, + { + "epoch": 0.71, + "learning_rate": 1.7772482874296556e-05, + "loss": 0.2084, + "step": 15220 + }, + { + "epoch": 0.71, + "learning_rate": 1.7771699089241767e-05, + "loss": 0.1575, + "step": 15225 + }, + { + "epoch": 0.71, + "learning_rate": 1.777091530418698e-05, + "loss": 0.132, + "step": 15230 + }, + { + "epoch": 0.71, + "learning_rate": 1.7770131519132195e-05, + "loss": 0.2206, + "step": 15235 + }, + { + "epoch": 0.71, + "learning_rate": 1.776934773407741e-05, + "loss": 0.2715, + "step": 15240 + }, + { + "epoch": 0.71, + "learning_rate": 1.7768563949022623e-05, + "loss": 0.3409, + "step": 15245 + }, + { + "epoch": 0.71, + "learning_rate": 1.7767780163967836e-05, + "loss": 0.5405, + "step": 15250 + }, + { + "epoch": 0.71, + "learning_rate": 1.7766996378913047e-05, + "loss": 0.3159, + "step": 15255 + }, + { + "epoch": 0.71, + "learning_rate": 1.776621259385826e-05, + "loss": 0.1621, + "step": 15260 + }, + { + "epoch": 0.71, + "learning_rate": 1.7765428808803475e-05, + "loss": 0.058, + "step": 15265 + }, + { + "epoch": 0.71, + "learning_rate": 1.776464502374869e-05, + "loss": 0.1118, + "step": 15270 + }, + { + "epoch": 0.71, + "learning_rate": 1.7763861238693903e-05, + "loss": 0.0801, + "step": 15275 + }, + { + "epoch": 0.71, + "learning_rate": 1.7763077453639117e-05, + "loss": 0.1691, + "step": 15280 + }, + { + "epoch": 0.71, + "learning_rate": 1.776229366858433e-05, + "loss": 0.1647, + "step": 15285 + }, + { + "epoch": 0.71, + "learning_rate": 1.776150988352954e-05, + "loss": 0.3036, + "step": 15290 + }, + { + "epoch": 0.71, + "learning_rate": 1.7760726098474755e-05, + "loss": 0.3638, + "step": 15295 + }, + { + "epoch": 0.71, + "learning_rate": 1.775994231341997e-05, + "loss": 0.6663, + "step": 15300 + }, + { + "epoch": 0.71, + "learning_rate": 1.7759158528365183e-05, + "loss": 0.2445, + "step": 15305 + }, + { + "epoch": 0.71, + "learning_rate": 1.7758374743310397e-05, + "loss": 0.0941, + "step": 15310 + }, + { + "epoch": 0.71, + "learning_rate": 1.775759095825561e-05, + "loss": 0.1141, + "step": 15315 + }, + { + "epoch": 0.71, + "learning_rate": 1.7756807173200824e-05, + "loss": 0.1723, + "step": 15320 + }, + { + "epoch": 0.72, + "learning_rate": 1.7756023388146035e-05, + "loss": 0.1063, + "step": 15325 + }, + { + "epoch": 0.72, + "learning_rate": 1.775523960309125e-05, + "loss": 0.2201, + "step": 15330 + }, + { + "epoch": 0.72, + "learning_rate": 1.7754455818036463e-05, + "loss": 0.2288, + "step": 15335 + }, + { + "epoch": 0.72, + "learning_rate": 1.7753672032981677e-05, + "loss": 0.2141, + "step": 15340 + }, + { + "epoch": 0.72, + "learning_rate": 1.775288824792689e-05, + "loss": 0.2616, + "step": 15345 + }, + { + "epoch": 0.72, + "learning_rate": 1.7752104462872104e-05, + "loss": 0.5344, + "step": 15350 + }, + { + "epoch": 0.72, + "learning_rate": 1.7751320677817315e-05, + "loss": 0.2139, + "step": 15355 + }, + { + "epoch": 0.72, + "learning_rate": 1.7750536892762532e-05, + "loss": 0.066, + "step": 15360 + }, + { + "epoch": 0.72, + "learning_rate": 1.7749753107707743e-05, + "loss": 0.1592, + "step": 15365 + }, + { + "epoch": 0.72, + "learning_rate": 1.7748969322652957e-05, + "loss": 0.1448, + "step": 15370 + }, + { + "epoch": 0.72, + "learning_rate": 1.774818553759817e-05, + "loss": 0.171, + "step": 15375 + }, + { + "epoch": 0.72, + "learning_rate": 1.7747401752543384e-05, + "loss": 0.181, + "step": 15380 + }, + { + "epoch": 0.72, + "learning_rate": 1.77466179674886e-05, + "loss": 0.2425, + "step": 15385 + }, + { + "epoch": 0.72, + "learning_rate": 1.774583418243381e-05, + "loss": 0.2223, + "step": 15390 + }, + { + "epoch": 0.72, + "learning_rate": 1.7745050397379023e-05, + "loss": 0.3635, + "step": 15395 + }, + { + "epoch": 0.72, + "learning_rate": 1.7744266612324237e-05, + "loss": 0.542, + "step": 15400 + }, + { + "epoch": 0.72, + "learning_rate": 1.774348282726945e-05, + "loss": 0.2035, + "step": 15405 + }, + { + "epoch": 0.72, + "learning_rate": 1.7742699042214665e-05, + "loss": 0.096, + "step": 15410 + }, + { + "epoch": 0.72, + "learning_rate": 1.774191525715988e-05, + "loss": 0.1612, + "step": 15415 + }, + { + "epoch": 0.72, + "learning_rate": 1.7741131472105092e-05, + "loss": 0.114, + "step": 15420 + }, + { + "epoch": 0.72, + "learning_rate": 1.7740347687050306e-05, + "loss": 0.0943, + "step": 15425 + }, + { + "epoch": 0.72, + "learning_rate": 1.7739563901995517e-05, + "loss": 0.1448, + "step": 15430 + }, + { + "epoch": 0.72, + "learning_rate": 1.7738780116940734e-05, + "loss": 0.2311, + "step": 15435 + }, + { + "epoch": 0.72, + "learning_rate": 1.7737996331885945e-05, + "loss": 0.1838, + "step": 15440 + }, + { + "epoch": 0.72, + "learning_rate": 1.773721254683116e-05, + "loss": 0.3578, + "step": 15445 + }, + { + "epoch": 0.72, + "learning_rate": 1.7736428761776372e-05, + "loss": 0.5445, + "step": 15450 + }, + { + "epoch": 0.72, + "learning_rate": 1.7735644976721583e-05, + "loss": 0.2114, + "step": 15455 + }, + { + "epoch": 0.72, + "learning_rate": 1.77348611916668e-05, + "loss": 0.0826, + "step": 15460 + }, + { + "epoch": 0.72, + "learning_rate": 1.773407740661201e-05, + "loss": 0.0514, + "step": 15465 + }, + { + "epoch": 0.72, + "learning_rate": 1.7733293621557225e-05, + "loss": 0.1493, + "step": 15470 + }, + { + "epoch": 0.72, + "learning_rate": 1.773250983650244e-05, + "loss": 0.1625, + "step": 15475 + }, + { + "epoch": 0.72, + "learning_rate": 1.7731726051447652e-05, + "loss": 0.3053, + "step": 15480 + }, + { + "epoch": 0.72, + "learning_rate": 1.7730942266392866e-05, + "loss": 0.1644, + "step": 15485 + }, + { + "epoch": 0.72, + "learning_rate": 1.773015848133808e-05, + "loss": 0.397, + "step": 15490 + }, + { + "epoch": 0.72, + "learning_rate": 1.7729374696283294e-05, + "loss": 0.5178, + "step": 15495 + }, + { + "epoch": 0.72, + "learning_rate": 1.7728590911228508e-05, + "loss": 0.588, + "step": 15500 + }, + { + "epoch": 0.72, + "learning_rate": 1.772780712617372e-05, + "loss": 0.219, + "step": 15505 + }, + { + "epoch": 0.72, + "learning_rate": 1.7727023341118932e-05, + "loss": 0.1002, + "step": 15510 + }, + { + "epoch": 0.72, + "learning_rate": 1.7726239556064146e-05, + "loss": 0.0972, + "step": 15515 + }, + { + "epoch": 0.72, + "learning_rate": 1.772545577100936e-05, + "loss": 0.1926, + "step": 15520 + }, + { + "epoch": 0.72, + "learning_rate": 1.7724671985954574e-05, + "loss": 0.1175, + "step": 15525 + }, + { + "epoch": 0.72, + "learning_rate": 1.7723888200899785e-05, + "loss": 0.2136, + "step": 15530 + }, + { + "epoch": 0.72, + "learning_rate": 1.7723104415845002e-05, + "loss": 0.3205, + "step": 15535 + }, + { + "epoch": 0.73, + "learning_rate": 1.7722320630790213e-05, + "loss": 0.2608, + "step": 15540 + }, + { + "epoch": 0.73, + "learning_rate": 1.7721536845735426e-05, + "loss": 0.2418, + "step": 15545 + }, + { + "epoch": 0.73, + "learning_rate": 1.772075306068064e-05, + "loss": 0.5699, + "step": 15550 + }, + { + "epoch": 0.73, + "learning_rate": 1.7719969275625854e-05, + "loss": 0.2461, + "step": 15555 + }, + { + "epoch": 0.73, + "learning_rate": 1.7719185490571068e-05, + "loss": 0.0584, + "step": 15560 + }, + { + "epoch": 0.73, + "learning_rate": 1.7718401705516282e-05, + "loss": 0.0991, + "step": 15565 + }, + { + "epoch": 0.73, + "learning_rate": 1.7717617920461493e-05, + "loss": 0.1333, + "step": 15570 + }, + { + "epoch": 0.73, + "learning_rate": 1.771683413540671e-05, + "loss": 0.1246, + "step": 15575 + }, + { + "epoch": 0.73, + "learning_rate": 1.771605035035192e-05, + "loss": 0.1544, + "step": 15580 + }, + { + "epoch": 0.73, + "learning_rate": 1.7715266565297134e-05, + "loss": 0.2113, + "step": 15585 + }, + { + "epoch": 0.73, + "learning_rate": 1.7714482780242348e-05, + "loss": 0.2301, + "step": 15590 + }, + { + "epoch": 0.73, + "learning_rate": 1.7713698995187562e-05, + "loss": 0.2443, + "step": 15595 + }, + { + "epoch": 0.73, + "learning_rate": 1.7712915210132776e-05, + "loss": 0.5969, + "step": 15600 + }, + { + "epoch": 0.73, + "learning_rate": 1.7712131425077986e-05, + "loss": 0.2402, + "step": 15605 + }, + { + "epoch": 0.73, + "learning_rate": 1.77113476400232e-05, + "loss": 0.1322, + "step": 15610 + }, + { + "epoch": 0.73, + "learning_rate": 1.7710563854968414e-05, + "loss": 0.1391, + "step": 15615 + }, + { + "epoch": 0.73, + "learning_rate": 1.7709780069913628e-05, + "loss": 0.1207, + "step": 15620 + }, + { + "epoch": 0.73, + "learning_rate": 1.7708996284858842e-05, + "loss": 0.2047, + "step": 15625 + }, + { + "epoch": 0.73, + "learning_rate": 1.7708212499804056e-05, + "loss": 0.2101, + "step": 15630 + }, + { + "epoch": 0.73, + "learning_rate": 1.770742871474927e-05, + "loss": 0.2683, + "step": 15635 + }, + { + "epoch": 0.73, + "learning_rate": 1.7706644929694484e-05, + "loss": 0.2237, + "step": 15640 + }, + { + "epoch": 0.73, + "learning_rate": 1.7705861144639694e-05, + "loss": 0.3521, + "step": 15645 + }, + { + "epoch": 0.73, + "learning_rate": 1.7705077359584908e-05, + "loss": 0.5843, + "step": 15650 + }, + { + "epoch": 0.73, + "learning_rate": 1.7704293574530122e-05, + "loss": 0.1719, + "step": 15655 + }, + { + "epoch": 0.73, + "learning_rate": 1.7703509789475336e-05, + "loss": 0.1001, + "step": 15660 + }, + { + "epoch": 0.73, + "learning_rate": 1.770272600442055e-05, + "loss": 0.0878, + "step": 15665 + }, + { + "epoch": 0.73, + "learning_rate": 1.770194221936576e-05, + "loss": 0.1989, + "step": 15670 + }, + { + "epoch": 0.73, + "learning_rate": 1.7701158434310978e-05, + "loss": 0.1543, + "step": 15675 + }, + { + "epoch": 0.73, + "learning_rate": 1.770037464925619e-05, + "loss": 0.1588, + "step": 15680 + }, + { + "epoch": 0.73, + "learning_rate": 1.7699590864201402e-05, + "loss": 0.3029, + "step": 15685 + }, + { + "epoch": 0.73, + "learning_rate": 1.7698807079146616e-05, + "loss": 0.2396, + "step": 15690 + }, + { + "epoch": 0.73, + "learning_rate": 1.769802329409183e-05, + "loss": 0.357, + "step": 15695 + }, + { + "epoch": 0.73, + "learning_rate": 1.7697239509037044e-05, + "loss": 0.6507, + "step": 15700 + }, + { + "epoch": 0.73, + "learning_rate": 1.7696455723982258e-05, + "loss": 0.2289, + "step": 15705 + }, + { + "epoch": 0.73, + "learning_rate": 1.769567193892747e-05, + "loss": 0.0816, + "step": 15710 + }, + { + "epoch": 0.73, + "learning_rate": 1.7694888153872682e-05, + "loss": 0.0891, + "step": 15715 + }, + { + "epoch": 0.73, + "learning_rate": 1.7694104368817896e-05, + "loss": 0.1736, + "step": 15720 + }, + { + "epoch": 0.73, + "learning_rate": 1.769332058376311e-05, + "loss": 0.2008, + "step": 15725 + }, + { + "epoch": 0.73, + "learning_rate": 1.7692536798708324e-05, + "loss": 0.199, + "step": 15730 + }, + { + "epoch": 0.73, + "learning_rate": 1.7691753013653538e-05, + "loss": 0.255, + "step": 15735 + }, + { + "epoch": 0.73, + "learning_rate": 1.7690969228598752e-05, + "loss": 0.2359, + "step": 15740 + }, + { + "epoch": 0.73, + "learning_rate": 1.7690185443543962e-05, + "loss": 0.3103, + "step": 15745 + }, + { + "epoch": 0.73, + "learning_rate": 1.768940165848918e-05, + "loss": 0.5922, + "step": 15750 + }, + { + "epoch": 0.74, + "learning_rate": 1.768861787343439e-05, + "loss": 0.2138, + "step": 15755 + }, + { + "epoch": 0.74, + "learning_rate": 1.7687834088379604e-05, + "loss": 0.1087, + "step": 15760 + }, + { + "epoch": 0.74, + "learning_rate": 1.7687050303324818e-05, + "loss": 0.0963, + "step": 15765 + }, + { + "epoch": 0.74, + "learning_rate": 1.7686266518270032e-05, + "loss": 0.0777, + "step": 15770 + }, + { + "epoch": 0.74, + "learning_rate": 1.7685482733215246e-05, + "loss": 0.1607, + "step": 15775 + }, + { + "epoch": 0.74, + "learning_rate": 1.7684698948160456e-05, + "loss": 0.1585, + "step": 15780 + }, + { + "epoch": 0.74, + "learning_rate": 1.768391516310567e-05, + "loss": 0.2127, + "step": 15785 + }, + { + "epoch": 0.74, + "learning_rate": 1.7683131378050884e-05, + "loss": 0.2459, + "step": 15790 + }, + { + "epoch": 0.74, + "learning_rate": 1.7682347592996098e-05, + "loss": 0.3235, + "step": 15795 + }, + { + "epoch": 0.74, + "learning_rate": 1.7681563807941312e-05, + "loss": 0.5031, + "step": 15800 + }, + { + "epoch": 0.74, + "learning_rate": 1.7680780022886526e-05, + "loss": 0.2883, + "step": 15805 + }, + { + "epoch": 0.74, + "learning_rate": 1.767999623783174e-05, + "loss": 0.1161, + "step": 15810 + }, + { + "epoch": 0.74, + "learning_rate": 1.7679212452776954e-05, + "loss": 0.117, + "step": 15815 + }, + { + "epoch": 0.74, + "learning_rate": 1.7678428667722164e-05, + "loss": 0.1198, + "step": 15820 + }, + { + "epoch": 0.74, + "learning_rate": 1.7677644882667378e-05, + "loss": 0.1703, + "step": 15825 + }, + { + "epoch": 0.74, + "learning_rate": 1.7676861097612592e-05, + "loss": 0.204, + "step": 15830 + }, + { + "epoch": 0.74, + "learning_rate": 1.7676077312557806e-05, + "loss": 0.1478, + "step": 15835 + }, + { + "epoch": 0.74, + "learning_rate": 1.767529352750302e-05, + "loss": 0.2569, + "step": 15840 + }, + { + "epoch": 0.74, + "learning_rate": 1.767450974244823e-05, + "loss": 0.2254, + "step": 15845 + }, + { + "epoch": 0.74, + "learning_rate": 1.7673725957393448e-05, + "loss": 0.5472, + "step": 15850 + }, + { + "epoch": 0.74, + "learning_rate": 1.7672942172338658e-05, + "loss": 0.2518, + "step": 15855 + }, + { + "epoch": 0.74, + "learning_rate": 1.7672158387283872e-05, + "loss": 0.0926, + "step": 15860 + }, + { + "epoch": 0.74, + "learning_rate": 1.7671374602229086e-05, + "loss": 0.1318, + "step": 15865 + }, + { + "epoch": 0.74, + "learning_rate": 1.76705908171743e-05, + "loss": 0.1401, + "step": 15870 + }, + { + "epoch": 0.74, + "learning_rate": 1.7669807032119514e-05, + "loss": 0.1579, + "step": 15875 + }, + { + "epoch": 0.74, + "learning_rate": 1.7669023247064728e-05, + "loss": 0.139, + "step": 15880 + }, + { + "epoch": 0.74, + "learning_rate": 1.7668239462009938e-05, + "loss": 0.17, + "step": 15885 + }, + { + "epoch": 0.74, + "learning_rate": 1.7667455676955155e-05, + "loss": 0.2695, + "step": 15890 + }, + { + "epoch": 0.74, + "learning_rate": 1.7666671891900366e-05, + "loss": 0.2543, + "step": 15895 + }, + { + "epoch": 0.74, + "learning_rate": 1.766588810684558e-05, + "loss": 0.4279, + "step": 15900 + }, + { + "epoch": 0.74, + "learning_rate": 1.7665104321790794e-05, + "loss": 0.2844, + "step": 15905 + }, + { + "epoch": 0.74, + "learning_rate": 1.7664320536736008e-05, + "loss": 0.1015, + "step": 15910 + }, + { + "epoch": 0.74, + "learning_rate": 1.766353675168122e-05, + "loss": 0.1068, + "step": 15915 + }, + { + "epoch": 0.74, + "learning_rate": 1.7662752966626432e-05, + "loss": 0.1115, + "step": 15920 + }, + { + "epoch": 0.74, + "learning_rate": 1.7661969181571646e-05, + "loss": 0.1674, + "step": 15925 + }, + { + "epoch": 0.74, + "learning_rate": 1.766118539651686e-05, + "loss": 0.1759, + "step": 15930 + }, + { + "epoch": 0.74, + "learning_rate": 1.7660401611462074e-05, + "loss": 0.2252, + "step": 15935 + }, + { + "epoch": 0.74, + "learning_rate": 1.7659617826407288e-05, + "loss": 0.2375, + "step": 15940 + }, + { + "epoch": 0.74, + "learning_rate": 1.76588340413525e-05, + "loss": 0.3769, + "step": 15945 + }, + { + "epoch": 0.74, + "learning_rate": 1.7658050256297716e-05, + "loss": 0.4495, + "step": 15950 + }, + { + "epoch": 0.74, + "learning_rate": 1.765726647124293e-05, + "loss": 0.1948, + "step": 15955 + }, + { + "epoch": 0.74, + "learning_rate": 1.765648268618814e-05, + "loss": 0.0774, + "step": 15960 + }, + { + "epoch": 0.74, + "learning_rate": 1.7655698901133357e-05, + "loss": 0.0989, + "step": 15965 + }, + { + "epoch": 0.75, + "learning_rate": 1.7654915116078568e-05, + "loss": 0.0939, + "step": 15970 + }, + { + "epoch": 0.75, + "learning_rate": 1.765413133102378e-05, + "loss": 0.2203, + "step": 15975 + }, + { + "epoch": 0.75, + "learning_rate": 1.7653347545968996e-05, + "loss": 0.2015, + "step": 15980 + }, + { + "epoch": 0.75, + "learning_rate": 1.7652563760914206e-05, + "loss": 0.193, + "step": 15985 + }, + { + "epoch": 0.75, + "learning_rate": 1.7651779975859423e-05, + "loss": 0.2686, + "step": 15990 + }, + { + "epoch": 0.75, + "learning_rate": 1.7650996190804634e-05, + "loss": 0.213, + "step": 15995 + }, + { + "epoch": 0.75, + "learning_rate": 1.7650212405749848e-05, + "loss": 0.476, + "step": 16000 + }, + { + "epoch": 0.75, + "learning_rate": 1.764942862069506e-05, + "loss": 0.2164, + "step": 16005 + }, + { + "epoch": 0.75, + "learning_rate": 1.7648644835640276e-05, + "loss": 0.0558, + "step": 16010 + }, + { + "epoch": 0.75, + "learning_rate": 1.764786105058549e-05, + "loss": 0.1303, + "step": 16015 + }, + { + "epoch": 0.75, + "learning_rate": 1.7647077265530703e-05, + "loss": 0.1231, + "step": 16020 + }, + { + "epoch": 0.75, + "learning_rate": 1.7646293480475914e-05, + "loss": 0.1846, + "step": 16025 + }, + { + "epoch": 0.75, + "learning_rate": 1.764550969542113e-05, + "loss": 0.1873, + "step": 16030 + }, + { + "epoch": 0.75, + "learning_rate": 1.7644725910366342e-05, + "loss": 0.1345, + "step": 16035 + }, + { + "epoch": 0.75, + "learning_rate": 1.7643942125311556e-05, + "loss": 0.2916, + "step": 16040 + }, + { + "epoch": 0.75, + "learning_rate": 1.764315834025677e-05, + "loss": 0.3561, + "step": 16045 + }, + { + "epoch": 0.75, + "learning_rate": 1.7642374555201983e-05, + "loss": 0.5833, + "step": 16050 + }, + { + "epoch": 0.75, + "learning_rate": 1.7641590770147197e-05, + "loss": 0.2464, + "step": 16055 + }, + { + "epoch": 0.75, + "learning_rate": 1.7640806985092408e-05, + "loss": 0.0829, + "step": 16060 + }, + { + "epoch": 0.75, + "learning_rate": 1.7640023200037625e-05, + "loss": 0.1179, + "step": 16065 + }, + { + "epoch": 0.75, + "learning_rate": 1.7639239414982836e-05, + "loss": 0.1629, + "step": 16070 + }, + { + "epoch": 0.75, + "learning_rate": 1.763845562992805e-05, + "loss": 0.1615, + "step": 16075 + }, + { + "epoch": 0.75, + "learning_rate": 1.7637671844873264e-05, + "loss": 0.2043, + "step": 16080 + }, + { + "epoch": 0.75, + "learning_rate": 1.7636888059818477e-05, + "loss": 0.2283, + "step": 16085 + }, + { + "epoch": 0.75, + "learning_rate": 1.763610427476369e-05, + "loss": 0.306, + "step": 16090 + }, + { + "epoch": 0.75, + "learning_rate": 1.7635320489708905e-05, + "loss": 0.4119, + "step": 16095 + }, + { + "epoch": 0.75, + "learning_rate": 1.7634536704654116e-05, + "loss": 0.4373, + "step": 16100 + }, + { + "epoch": 0.75, + "learning_rate": 1.763375291959933e-05, + "loss": 0.1972, + "step": 16105 + }, + { + "epoch": 0.75, + "learning_rate": 1.7632969134544544e-05, + "loss": 0.0756, + "step": 16110 + }, + { + "epoch": 0.75, + "learning_rate": 1.7632185349489757e-05, + "loss": 0.1467, + "step": 16115 + }, + { + "epoch": 0.75, + "learning_rate": 1.763140156443497e-05, + "loss": 0.1195, + "step": 16120 + }, + { + "epoch": 0.75, + "learning_rate": 1.7630617779380185e-05, + "loss": 0.1707, + "step": 16125 + }, + { + "epoch": 0.75, + "learning_rate": 1.76298339943254e-05, + "loss": 0.2611, + "step": 16130 + }, + { + "epoch": 0.75, + "learning_rate": 1.762905020927061e-05, + "loss": 0.2035, + "step": 16135 + }, + { + "epoch": 0.75, + "learning_rate": 1.7628266424215824e-05, + "loss": 0.2931, + "step": 16140 + }, + { + "epoch": 0.75, + "learning_rate": 1.7627482639161038e-05, + "loss": 0.3166, + "step": 16145 + }, + { + "epoch": 0.75, + "learning_rate": 1.762669885410625e-05, + "loss": 0.4894, + "step": 16150 + }, + { + "epoch": 0.75, + "learning_rate": 1.7625915069051465e-05, + "loss": 0.2469, + "step": 16155 + }, + { + "epoch": 0.75, + "learning_rate": 1.762513128399668e-05, + "loss": 0.091, + "step": 16160 + }, + { + "epoch": 0.75, + "learning_rate": 1.7624347498941893e-05, + "loss": 0.1252, + "step": 16165 + }, + { + "epoch": 0.75, + "learning_rate": 1.7623563713887104e-05, + "loss": 0.1507, + "step": 16170 + }, + { + "epoch": 0.75, + "learning_rate": 1.7622779928832318e-05, + "loss": 0.0806, + "step": 16175 + }, + { + "epoch": 0.75, + "learning_rate": 1.762199614377753e-05, + "loss": 0.1723, + "step": 16180 + }, + { + "epoch": 0.76, + "learning_rate": 1.7621212358722745e-05, + "loss": 0.1738, + "step": 16185 + }, + { + "epoch": 0.76, + "learning_rate": 1.762042857366796e-05, + "loss": 0.2276, + "step": 16190 + }, + { + "epoch": 0.76, + "learning_rate": 1.7619644788613173e-05, + "loss": 0.3772, + "step": 16195 + }, + { + "epoch": 0.76, + "learning_rate": 1.7618861003558384e-05, + "loss": 0.5853, + "step": 16200 + }, + { + "epoch": 0.76, + "learning_rate": 1.76180772185036e-05, + "loss": 0.2295, + "step": 16205 + }, + { + "epoch": 0.76, + "learning_rate": 1.761729343344881e-05, + "loss": 0.0793, + "step": 16210 + }, + { + "epoch": 0.76, + "learning_rate": 1.7616509648394025e-05, + "loss": 0.1744, + "step": 16215 + }, + { + "epoch": 0.76, + "learning_rate": 1.761572586333924e-05, + "loss": 0.1258, + "step": 16220 + }, + { + "epoch": 0.76, + "learning_rate": 1.7614942078284453e-05, + "loss": 0.1278, + "step": 16225 + }, + { + "epoch": 0.76, + "learning_rate": 1.7614158293229667e-05, + "loss": 0.1408, + "step": 16230 + }, + { + "epoch": 0.76, + "learning_rate": 1.7613374508174878e-05, + "loss": 0.176, + "step": 16235 + }, + { + "epoch": 0.76, + "learning_rate": 1.761259072312009e-05, + "loss": 0.2184, + "step": 16240 + }, + { + "epoch": 0.76, + "learning_rate": 1.7611806938065305e-05, + "loss": 0.4551, + "step": 16245 + }, + { + "epoch": 0.76, + "learning_rate": 1.761102315301052e-05, + "loss": 0.4999, + "step": 16250 + }, + { + "epoch": 0.76, + "learning_rate": 1.7610239367955733e-05, + "loss": 0.1799, + "step": 16255 + }, + { + "epoch": 0.76, + "learning_rate": 1.7609455582900947e-05, + "loss": 0.0511, + "step": 16260 + }, + { + "epoch": 0.76, + "learning_rate": 1.760867179784616e-05, + "loss": 0.0712, + "step": 16265 + }, + { + "epoch": 0.76, + "learning_rate": 1.7607888012791375e-05, + "loss": 0.1195, + "step": 16270 + }, + { + "epoch": 0.76, + "learning_rate": 1.7607104227736585e-05, + "loss": 0.1133, + "step": 16275 + }, + { + "epoch": 0.76, + "learning_rate": 1.7606320442681803e-05, + "loss": 0.18, + "step": 16280 + }, + { + "epoch": 0.76, + "learning_rate": 1.7605536657627013e-05, + "loss": 0.2426, + "step": 16285 + }, + { + "epoch": 0.76, + "learning_rate": 1.7604752872572227e-05, + "loss": 0.252, + "step": 16290 + }, + { + "epoch": 0.76, + "learning_rate": 1.760396908751744e-05, + "loss": 0.4202, + "step": 16295 + }, + { + "epoch": 0.76, + "learning_rate": 1.760318530246265e-05, + "loss": 0.645, + "step": 16300 + }, + { + "epoch": 0.76, + "learning_rate": 1.760240151740787e-05, + "loss": 0.2253, + "step": 16305 + }, + { + "epoch": 0.76, + "learning_rate": 1.760161773235308e-05, + "loss": 0.0957, + "step": 16310 + }, + { + "epoch": 0.76, + "learning_rate": 1.7600833947298293e-05, + "loss": 0.0851, + "step": 16315 + }, + { + "epoch": 0.76, + "learning_rate": 1.7600050162243507e-05, + "loss": 0.1494, + "step": 16320 + }, + { + "epoch": 0.76, + "learning_rate": 1.759926637718872e-05, + "loss": 0.2095, + "step": 16325 + }, + { + "epoch": 0.76, + "learning_rate": 1.7598482592133935e-05, + "loss": 0.1371, + "step": 16330 + }, + { + "epoch": 0.76, + "learning_rate": 1.759769880707915e-05, + "loss": 0.2431, + "step": 16335 + }, + { + "epoch": 0.76, + "learning_rate": 1.7596915022024363e-05, + "loss": 0.2534, + "step": 16340 + }, + { + "epoch": 0.76, + "learning_rate": 1.7596131236969577e-05, + "loss": 0.2616, + "step": 16345 + }, + { + "epoch": 0.76, + "learning_rate": 1.7595347451914787e-05, + "loss": 0.5365, + "step": 16350 + }, + { + "epoch": 0.76, + "learning_rate": 1.759456366686e-05, + "loss": 0.1913, + "step": 16355 + }, + { + "epoch": 0.76, + "learning_rate": 1.7593779881805215e-05, + "loss": 0.0728, + "step": 16360 + }, + { + "epoch": 0.76, + "learning_rate": 1.759299609675043e-05, + "loss": 0.1707, + "step": 16365 + }, + { + "epoch": 0.76, + "learning_rate": 1.7592212311695643e-05, + "loss": 0.2359, + "step": 16370 + }, + { + "epoch": 0.76, + "learning_rate": 1.7591428526640853e-05, + "loss": 0.1137, + "step": 16375 + }, + { + "epoch": 0.76, + "learning_rate": 1.759064474158607e-05, + "loss": 0.1787, + "step": 16380 + }, + { + "epoch": 0.76, + "learning_rate": 1.758986095653128e-05, + "loss": 0.1674, + "step": 16385 + }, + { + "epoch": 0.76, + "learning_rate": 1.7589077171476495e-05, + "loss": 0.1887, + "step": 16390 + }, + { + "epoch": 0.77, + "learning_rate": 1.758829338642171e-05, + "loss": 0.4233, + "step": 16395 + }, + { + "epoch": 0.77, + "learning_rate": 1.7587509601366923e-05, + "loss": 0.5711, + "step": 16400 + }, + { + "epoch": 0.77, + "learning_rate": 1.7586725816312137e-05, + "loss": 0.1733, + "step": 16405 + }, + { + "epoch": 0.77, + "learning_rate": 1.758594203125735e-05, + "loss": 0.0874, + "step": 16410 + }, + { + "epoch": 0.77, + "learning_rate": 1.758515824620256e-05, + "loss": 0.15, + "step": 16415 + }, + { + "epoch": 0.77, + "learning_rate": 1.758437446114778e-05, + "loss": 0.1558, + "step": 16420 + }, + { + "epoch": 0.77, + "learning_rate": 1.758359067609299e-05, + "loss": 0.1326, + "step": 16425 + }, + { + "epoch": 0.77, + "learning_rate": 1.7582806891038203e-05, + "loss": 0.196, + "step": 16430 + }, + { + "epoch": 0.77, + "learning_rate": 1.7582023105983417e-05, + "loss": 0.1857, + "step": 16435 + }, + { + "epoch": 0.77, + "learning_rate": 1.758123932092863e-05, + "loss": 0.2995, + "step": 16440 + }, + { + "epoch": 0.77, + "learning_rate": 1.7580455535873845e-05, + "loss": 0.4498, + "step": 16445 + }, + { + "epoch": 0.77, + "learning_rate": 1.7579671750819055e-05, + "loss": 0.6452, + "step": 16450 + }, + { + "epoch": 0.77, + "learning_rate": 1.757888796576427e-05, + "loss": 0.2235, + "step": 16455 + }, + { + "epoch": 0.77, + "learning_rate": 1.7578104180709483e-05, + "loss": 0.0881, + "step": 16460 + }, + { + "epoch": 0.77, + "learning_rate": 1.7577320395654697e-05, + "loss": 0.063, + "step": 16465 + }, + { + "epoch": 0.77, + "learning_rate": 1.757653661059991e-05, + "loss": 0.1451, + "step": 16470 + }, + { + "epoch": 0.77, + "learning_rate": 1.7575752825545125e-05, + "loss": 0.1652, + "step": 16475 + }, + { + "epoch": 0.77, + "learning_rate": 1.757496904049034e-05, + "loss": 0.1904, + "step": 16480 + }, + { + "epoch": 0.77, + "learning_rate": 1.7574185255435553e-05, + "loss": 0.253, + "step": 16485 + }, + { + "epoch": 0.77, + "learning_rate": 1.7573401470380763e-05, + "loss": 0.1778, + "step": 16490 + }, + { + "epoch": 0.77, + "learning_rate": 1.7572617685325977e-05, + "loss": 0.4833, + "step": 16495 + }, + { + "epoch": 0.77, + "learning_rate": 1.757183390027119e-05, + "loss": 0.4754, + "step": 16500 + }, + { + "epoch": 0.77, + "learning_rate": 1.7571050115216405e-05, + "loss": 0.1547, + "step": 16505 + }, + { + "epoch": 0.77, + "learning_rate": 1.757026633016162e-05, + "loss": 0.0813, + "step": 16510 + }, + { + "epoch": 0.77, + "learning_rate": 1.756948254510683e-05, + "loss": 0.1433, + "step": 16515 + }, + { + "epoch": 0.77, + "learning_rate": 1.7568698760052047e-05, + "loss": 0.1179, + "step": 16520 + }, + { + "epoch": 0.77, + "learning_rate": 1.7567914974997257e-05, + "loss": 0.1643, + "step": 16525 + }, + { + "epoch": 0.77, + "learning_rate": 1.756713118994247e-05, + "loss": 0.1876, + "step": 16530 + }, + { + "epoch": 0.77, + "learning_rate": 1.7566347404887685e-05, + "loss": 0.1914, + "step": 16535 + }, + { + "epoch": 0.77, + "learning_rate": 1.75655636198329e-05, + "loss": 0.2733, + "step": 16540 + }, + { + "epoch": 0.77, + "learning_rate": 1.7564779834778113e-05, + "loss": 0.3474, + "step": 16545 + }, + { + "epoch": 0.77, + "learning_rate": 1.7563996049723327e-05, + "loss": 0.5545, + "step": 16550 + }, + { + "epoch": 0.77, + "learning_rate": 1.7563212264668537e-05, + "loss": 0.251, + "step": 16555 + }, + { + "epoch": 0.77, + "learning_rate": 1.756242847961375e-05, + "loss": 0.0742, + "step": 16560 + }, + { + "epoch": 0.77, + "learning_rate": 1.7561644694558965e-05, + "loss": 0.0762, + "step": 16565 + }, + { + "epoch": 0.77, + "learning_rate": 1.756086090950418e-05, + "loss": 0.1709, + "step": 16570 + }, + { + "epoch": 0.77, + "learning_rate": 1.7560077124449393e-05, + "loss": 0.1702, + "step": 16575 + }, + { + "epoch": 0.77, + "learning_rate": 1.7559293339394607e-05, + "loss": 0.2099, + "step": 16580 + }, + { + "epoch": 0.77, + "learning_rate": 1.755850955433982e-05, + "loss": 0.311, + "step": 16585 + }, + { + "epoch": 0.77, + "learning_rate": 1.755772576928503e-05, + "loss": 0.2829, + "step": 16590 + }, + { + "epoch": 0.77, + "learning_rate": 1.755694198423025e-05, + "loss": 0.3343, + "step": 16595 + }, + { + "epoch": 0.77, + "learning_rate": 1.755615819917546e-05, + "loss": 0.3147, + "step": 16600 + }, + { + "epoch": 0.77, + "learning_rate": 1.7555374414120673e-05, + "loss": 0.242, + "step": 16605 + }, + { + "epoch": 0.78, + "learning_rate": 1.7554590629065887e-05, + "loss": 0.0859, + "step": 16610 + }, + { + "epoch": 0.78, + "learning_rate": 1.75538068440111e-05, + "loss": 0.1072, + "step": 16615 + }, + { + "epoch": 0.78, + "learning_rate": 1.7553023058956315e-05, + "loss": 0.1143, + "step": 16620 + }, + { + "epoch": 0.78, + "learning_rate": 1.7552239273901525e-05, + "loss": 0.1676, + "step": 16625 + }, + { + "epoch": 0.78, + "learning_rate": 1.755145548884674e-05, + "loss": 0.1877, + "step": 16630 + }, + { + "epoch": 0.78, + "learning_rate": 1.7550671703791953e-05, + "loss": 0.2769, + "step": 16635 + }, + { + "epoch": 0.78, + "learning_rate": 1.7549887918737167e-05, + "loss": 0.2055, + "step": 16640 + }, + { + "epoch": 0.78, + "learning_rate": 1.754910413368238e-05, + "loss": 0.1681, + "step": 16645 + }, + { + "epoch": 0.78, + "learning_rate": 1.7548320348627595e-05, + "loss": 0.5035, + "step": 16650 + }, + { + "epoch": 0.78, + "learning_rate": 1.754753656357281e-05, + "loss": 0.251, + "step": 16655 + }, + { + "epoch": 0.78, + "learning_rate": 1.7546752778518022e-05, + "loss": 0.0733, + "step": 16660 + }, + { + "epoch": 0.78, + "learning_rate": 1.7545968993463233e-05, + "loss": 0.0622, + "step": 16665 + }, + { + "epoch": 0.78, + "learning_rate": 1.7545185208408447e-05, + "loss": 0.186, + "step": 16670 + }, + { + "epoch": 0.78, + "learning_rate": 1.754440142335366e-05, + "loss": 0.1875, + "step": 16675 + }, + { + "epoch": 0.78, + "learning_rate": 1.7543617638298875e-05, + "loss": 0.1456, + "step": 16680 + }, + { + "epoch": 0.78, + "learning_rate": 1.754283385324409e-05, + "loss": 0.1846, + "step": 16685 + }, + { + "epoch": 0.78, + "learning_rate": 1.75420500681893e-05, + "loss": 0.2411, + "step": 16690 + }, + { + "epoch": 0.78, + "learning_rate": 1.7541266283134516e-05, + "loss": 0.2714, + "step": 16695 + }, + { + "epoch": 0.78, + "learning_rate": 1.7540482498079727e-05, + "loss": 0.5341, + "step": 16700 + }, + { + "epoch": 0.78, + "learning_rate": 1.753969871302494e-05, + "loss": 0.2582, + "step": 16705 + }, + { + "epoch": 0.78, + "learning_rate": 1.7538914927970155e-05, + "loss": 0.0639, + "step": 16710 + }, + { + "epoch": 0.78, + "learning_rate": 1.753813114291537e-05, + "loss": 0.091, + "step": 16715 + }, + { + "epoch": 0.78, + "learning_rate": 1.7537347357860582e-05, + "loss": 0.1561, + "step": 16720 + }, + { + "epoch": 0.78, + "learning_rate": 1.7536563572805796e-05, + "loss": 0.167, + "step": 16725 + }, + { + "epoch": 0.78, + "learning_rate": 1.7535779787751007e-05, + "loss": 0.1176, + "step": 16730 + }, + { + "epoch": 0.78, + "learning_rate": 1.7534996002696224e-05, + "loss": 0.1937, + "step": 16735 + }, + { + "epoch": 0.78, + "learning_rate": 1.7534212217641435e-05, + "loss": 0.1984, + "step": 16740 + }, + { + "epoch": 0.78, + "learning_rate": 1.753342843258665e-05, + "loss": 0.2591, + "step": 16745 + }, + { + "epoch": 0.78, + "learning_rate": 1.7532644647531863e-05, + "loss": 0.4628, + "step": 16750 + }, + { + "epoch": 0.78, + "learning_rate": 1.7531860862477076e-05, + "loss": 0.1609, + "step": 16755 + }, + { + "epoch": 0.78, + "learning_rate": 1.753107707742229e-05, + "loss": 0.0979, + "step": 16760 + }, + { + "epoch": 0.78, + "learning_rate": 1.75302932923675e-05, + "loss": 0.1462, + "step": 16765 + }, + { + "epoch": 0.78, + "learning_rate": 1.7529509507312715e-05, + "loss": 0.163, + "step": 16770 + }, + { + "epoch": 0.78, + "learning_rate": 1.752872572225793e-05, + "loss": 0.1616, + "step": 16775 + }, + { + "epoch": 0.78, + "learning_rate": 1.7527941937203143e-05, + "loss": 0.1537, + "step": 16780 + }, + { + "epoch": 0.78, + "learning_rate": 1.7527158152148356e-05, + "loss": 0.1465, + "step": 16785 + }, + { + "epoch": 0.78, + "learning_rate": 1.752637436709357e-05, + "loss": 0.2385, + "step": 16790 + }, + { + "epoch": 0.78, + "learning_rate": 1.7525590582038784e-05, + "loss": 0.3273, + "step": 16795 + }, + { + "epoch": 0.78, + "learning_rate": 1.7524806796983998e-05, + "loss": 0.5533, + "step": 16800 + }, + { + "epoch": 0.78, + "learning_rate": 1.752402301192921e-05, + "loss": 0.1391, + "step": 16805 + }, + { + "epoch": 0.78, + "learning_rate": 1.7523239226874426e-05, + "loss": 0.0745, + "step": 16810 + }, + { + "epoch": 0.78, + "learning_rate": 1.7522455441819636e-05, + "loss": 0.1043, + "step": 16815 + }, + { + "epoch": 0.78, + "learning_rate": 1.752167165676485e-05, + "loss": 0.1905, + "step": 16820 + }, + { + "epoch": 0.79, + "learning_rate": 1.7520887871710064e-05, + "loss": 0.1423, + "step": 16825 + }, + { + "epoch": 0.79, + "learning_rate": 1.7520104086655275e-05, + "loss": 0.0993, + "step": 16830 + }, + { + "epoch": 0.79, + "learning_rate": 1.7519320301600492e-05, + "loss": 0.2252, + "step": 16835 + }, + { + "epoch": 0.79, + "learning_rate": 1.7518536516545703e-05, + "loss": 0.2168, + "step": 16840 + }, + { + "epoch": 0.79, + "learning_rate": 1.7517752731490917e-05, + "loss": 0.3685, + "step": 16845 + }, + { + "epoch": 0.79, + "learning_rate": 1.751696894643613e-05, + "loss": 0.428, + "step": 16850 + }, + { + "epoch": 0.79, + "learning_rate": 1.7516185161381344e-05, + "loss": 0.2561, + "step": 16855 + }, + { + "epoch": 0.79, + "learning_rate": 1.7515401376326558e-05, + "loss": 0.0574, + "step": 16860 + }, + { + "epoch": 0.79, + "learning_rate": 1.7514617591271772e-05, + "loss": 0.1081, + "step": 16865 + }, + { + "epoch": 0.79, + "learning_rate": 1.7513833806216983e-05, + "loss": 0.1428, + "step": 16870 + }, + { + "epoch": 0.79, + "learning_rate": 1.75130500211622e-05, + "loss": 0.1128, + "step": 16875 + }, + { + "epoch": 0.79, + "learning_rate": 1.751226623610741e-05, + "loss": 0.2477, + "step": 16880 + }, + { + "epoch": 0.79, + "learning_rate": 1.7511482451052624e-05, + "loss": 0.2166, + "step": 16885 + }, + { + "epoch": 0.79, + "learning_rate": 1.751069866599784e-05, + "loss": 0.2552, + "step": 16890 + }, + { + "epoch": 0.79, + "learning_rate": 1.7509914880943052e-05, + "loss": 0.2411, + "step": 16895 + }, + { + "epoch": 0.79, + "learning_rate": 1.7509131095888266e-05, + "loss": 0.4488, + "step": 16900 + }, + { + "epoch": 0.79, + "learning_rate": 1.7508347310833477e-05, + "loss": 0.2285, + "step": 16905 + }, + { + "epoch": 0.79, + "learning_rate": 1.7507563525778694e-05, + "loss": 0.0925, + "step": 16910 + }, + { + "epoch": 0.79, + "learning_rate": 1.7506779740723904e-05, + "loss": 0.1007, + "step": 16915 + }, + { + "epoch": 0.79, + "learning_rate": 1.750599595566912e-05, + "loss": 0.1318, + "step": 16920 + }, + { + "epoch": 0.79, + "learning_rate": 1.7505212170614332e-05, + "loss": 0.1029, + "step": 16925 + }, + { + "epoch": 0.79, + "learning_rate": 1.7504428385559546e-05, + "loss": 0.1626, + "step": 16930 + }, + { + "epoch": 0.79, + "learning_rate": 1.750364460050476e-05, + "loss": 0.2252, + "step": 16935 + }, + { + "epoch": 0.79, + "learning_rate": 1.7502860815449974e-05, + "loss": 0.2685, + "step": 16940 + }, + { + "epoch": 0.79, + "learning_rate": 1.7502077030395184e-05, + "loss": 0.2682, + "step": 16945 + }, + { + "epoch": 0.79, + "learning_rate": 1.75012932453404e-05, + "loss": 0.4503, + "step": 16950 + }, + { + "epoch": 0.79, + "learning_rate": 1.7500509460285612e-05, + "loss": 0.1966, + "step": 16955 + }, + { + "epoch": 0.79, + "learning_rate": 1.7499725675230826e-05, + "loss": 0.0716, + "step": 16960 + }, + { + "epoch": 0.79, + "learning_rate": 1.749894189017604e-05, + "loss": 0.0855, + "step": 16965 + }, + { + "epoch": 0.79, + "learning_rate": 1.7498158105121254e-05, + "loss": 0.1765, + "step": 16970 + }, + { + "epoch": 0.79, + "learning_rate": 1.7497374320066468e-05, + "loss": 0.1045, + "step": 16975 + }, + { + "epoch": 0.79, + "learning_rate": 1.749659053501168e-05, + "loss": 0.1965, + "step": 16980 + }, + { + "epoch": 0.79, + "learning_rate": 1.7495806749956892e-05, + "loss": 0.2889, + "step": 16985 + }, + { + "epoch": 0.79, + "learning_rate": 1.7495022964902106e-05, + "loss": 0.2706, + "step": 16990 + }, + { + "epoch": 0.79, + "learning_rate": 1.749423917984732e-05, + "loss": 0.219, + "step": 16995 + }, + { + "epoch": 0.79, + "learning_rate": 1.7493455394792534e-05, + "loss": 0.7537, + "step": 17000 + }, + { + "epoch": 0.79, + "learning_rate": 1.7492671609737748e-05, + "loss": 0.2183, + "step": 17005 + }, + { + "epoch": 0.79, + "learning_rate": 1.7491887824682962e-05, + "loss": 0.095, + "step": 17010 + }, + { + "epoch": 0.79, + "learning_rate": 1.7491104039628172e-05, + "loss": 0.0708, + "step": 17015 + }, + { + "epoch": 0.79, + "learning_rate": 1.7490320254573386e-05, + "loss": 0.1224, + "step": 17020 + }, + { + "epoch": 0.79, + "learning_rate": 1.74895364695186e-05, + "loss": 0.149, + "step": 17025 + }, + { + "epoch": 0.79, + "learning_rate": 1.7488752684463814e-05, + "loss": 0.2394, + "step": 17030 + }, + { + "epoch": 0.79, + "learning_rate": 1.7487968899409028e-05, + "loss": 0.2442, + "step": 17035 + }, + { + "epoch": 0.8, + "learning_rate": 1.7487185114354242e-05, + "loss": 0.2727, + "step": 17040 + }, + { + "epoch": 0.8, + "learning_rate": 1.7486401329299452e-05, + "loss": 0.3609, + "step": 17045 + }, + { + "epoch": 0.8, + "learning_rate": 1.748561754424467e-05, + "loss": 0.5439, + "step": 17050 + }, + { + "epoch": 0.8, + "learning_rate": 1.748483375918988e-05, + "loss": 0.2195, + "step": 17055 + }, + { + "epoch": 0.8, + "learning_rate": 1.7484049974135094e-05, + "loss": 0.0805, + "step": 17060 + }, + { + "epoch": 0.8, + "learning_rate": 1.7483266189080308e-05, + "loss": 0.1076, + "step": 17065 + }, + { + "epoch": 0.8, + "learning_rate": 1.7482482404025522e-05, + "loss": 0.2019, + "step": 17070 + }, + { + "epoch": 0.8, + "learning_rate": 1.7481698618970736e-05, + "loss": 0.1393, + "step": 17075 + }, + { + "epoch": 0.8, + "learning_rate": 1.7480914833915946e-05, + "loss": 0.1601, + "step": 17080 + }, + { + "epoch": 0.8, + "learning_rate": 1.748013104886116e-05, + "loss": 0.2208, + "step": 17085 + }, + { + "epoch": 0.8, + "learning_rate": 1.7479347263806374e-05, + "loss": 0.2692, + "step": 17090 + }, + { + "epoch": 0.8, + "learning_rate": 1.7478563478751588e-05, + "loss": 0.2507, + "step": 17095 + }, + { + "epoch": 0.8, + "learning_rate": 1.7477779693696802e-05, + "loss": 0.6805, + "step": 17100 + }, + { + "epoch": 0.8, + "learning_rate": 1.7476995908642016e-05, + "loss": 0.2378, + "step": 17105 + }, + { + "epoch": 0.8, + "learning_rate": 1.747621212358723e-05, + "loss": 0.0834, + "step": 17110 + }, + { + "epoch": 0.8, + "learning_rate": 1.7475428338532444e-05, + "loss": 0.0724, + "step": 17115 + }, + { + "epoch": 0.8, + "learning_rate": 1.7474644553477654e-05, + "loss": 0.1847, + "step": 17120 + }, + { + "epoch": 0.8, + "learning_rate": 1.747386076842287e-05, + "loss": 0.1556, + "step": 17125 + }, + { + "epoch": 0.8, + "learning_rate": 1.7473076983368082e-05, + "loss": 0.2036, + "step": 17130 + }, + { + "epoch": 0.8, + "learning_rate": 1.7472293198313296e-05, + "loss": 0.2969, + "step": 17135 + }, + { + "epoch": 0.8, + "learning_rate": 1.747150941325851e-05, + "loss": 0.3382, + "step": 17140 + }, + { + "epoch": 0.8, + "learning_rate": 1.747072562820372e-05, + "loss": 0.3471, + "step": 17145 + }, + { + "epoch": 0.8, + "learning_rate": 1.7469941843148938e-05, + "loss": 0.4779, + "step": 17150 + }, + { + "epoch": 0.8, + "learning_rate": 1.7469158058094148e-05, + "loss": 0.2133, + "step": 17155 + }, + { + "epoch": 0.8, + "learning_rate": 1.7468374273039362e-05, + "loss": 0.0909, + "step": 17160 + }, + { + "epoch": 0.8, + "learning_rate": 1.7467590487984576e-05, + "loss": 0.1217, + "step": 17165 + }, + { + "epoch": 0.8, + "learning_rate": 1.746680670292979e-05, + "loss": 0.1222, + "step": 17170 + }, + { + "epoch": 0.8, + "learning_rate": 1.7466022917875004e-05, + "loss": 0.1105, + "step": 17175 + }, + { + "epoch": 0.8, + "learning_rate": 1.7465239132820218e-05, + "loss": 0.1904, + "step": 17180 + }, + { + "epoch": 0.8, + "learning_rate": 1.7464455347765428e-05, + "loss": 0.2327, + "step": 17185 + }, + { + "epoch": 0.8, + "learning_rate": 1.7463671562710646e-05, + "loss": 0.2373, + "step": 17190 + }, + { + "epoch": 0.8, + "learning_rate": 1.7462887777655856e-05, + "loss": 0.2425, + "step": 17195 + }, + { + "epoch": 0.8, + "learning_rate": 1.746210399260107e-05, + "loss": 0.4149, + "step": 17200 + }, + { + "epoch": 0.8, + "learning_rate": 1.7461320207546284e-05, + "loss": 0.3749, + "step": 17205 + }, + { + "epoch": 0.8, + "learning_rate": 1.7460536422491498e-05, + "loss": 0.076, + "step": 17210 + }, + { + "epoch": 0.8, + "learning_rate": 1.745975263743671e-05, + "loss": 0.0653, + "step": 17215 + }, + { + "epoch": 0.8, + "learning_rate": 1.7458968852381922e-05, + "loss": 0.1085, + "step": 17220 + }, + { + "epoch": 0.8, + "learning_rate": 1.745818506732714e-05, + "loss": 0.1582, + "step": 17225 + }, + { + "epoch": 0.8, + "learning_rate": 1.745740128227235e-05, + "loss": 0.2298, + "step": 17230 + }, + { + "epoch": 0.8, + "learning_rate": 1.7456617497217564e-05, + "loss": 0.1976, + "step": 17235 + }, + { + "epoch": 0.8, + "learning_rate": 1.7455833712162778e-05, + "loss": 0.3482, + "step": 17240 + }, + { + "epoch": 0.8, + "learning_rate": 1.7455049927107992e-05, + "loss": 0.2661, + "step": 17245 + }, + { + "epoch": 0.8, + "learning_rate": 1.7454266142053206e-05, + "loss": 0.6355, + "step": 17250 + }, + { + "epoch": 0.81, + "learning_rate": 1.745348235699842e-05, + "loss": 0.2025, + "step": 17255 + }, + { + "epoch": 0.81, + "learning_rate": 1.745269857194363e-05, + "loss": 0.0766, + "step": 17260 + }, + { + "epoch": 0.81, + "learning_rate": 1.7451914786888847e-05, + "loss": 0.1712, + "step": 17265 + }, + { + "epoch": 0.81, + "learning_rate": 1.7451131001834058e-05, + "loss": 0.1655, + "step": 17270 + }, + { + "epoch": 0.81, + "learning_rate": 1.7450347216779272e-05, + "loss": 0.1001, + "step": 17275 + }, + { + "epoch": 0.81, + "learning_rate": 1.7449563431724486e-05, + "loss": 0.1872, + "step": 17280 + }, + { + "epoch": 0.81, + "learning_rate": 1.74487796466697e-05, + "loss": 0.2953, + "step": 17285 + }, + { + "epoch": 0.81, + "learning_rate": 1.7447995861614914e-05, + "loss": 0.2079, + "step": 17290 + }, + { + "epoch": 0.81, + "learning_rate": 1.7447212076560124e-05, + "loss": 0.3059, + "step": 17295 + }, + { + "epoch": 0.81, + "learning_rate": 1.7446428291505338e-05, + "loss": 0.5211, + "step": 17300 + }, + { + "epoch": 0.81, + "learning_rate": 1.7445644506450552e-05, + "loss": 0.2568, + "step": 17305 + }, + { + "epoch": 0.81, + "learning_rate": 1.7444860721395766e-05, + "loss": 0.0556, + "step": 17310 + }, + { + "epoch": 0.81, + "learning_rate": 1.744407693634098e-05, + "loss": 0.1149, + "step": 17315 + }, + { + "epoch": 0.81, + "learning_rate": 1.7443293151286194e-05, + "loss": 0.1159, + "step": 17320 + }, + { + "epoch": 0.81, + "learning_rate": 1.7442509366231407e-05, + "loss": 0.1228, + "step": 17325 + }, + { + "epoch": 0.81, + "learning_rate": 1.744172558117662e-05, + "loss": 0.1988, + "step": 17330 + }, + { + "epoch": 0.81, + "learning_rate": 1.7440941796121832e-05, + "loss": 0.129, + "step": 17335 + }, + { + "epoch": 0.81, + "learning_rate": 1.7440158011067046e-05, + "loss": 0.3034, + "step": 17340 + }, + { + "epoch": 0.81, + "learning_rate": 1.743937422601226e-05, + "loss": 0.3292, + "step": 17345 + }, + { + "epoch": 0.81, + "learning_rate": 1.7438590440957474e-05, + "loss": 0.5812, + "step": 17350 + }, + { + "epoch": 0.81, + "learning_rate": 1.7437806655902688e-05, + "loss": 0.2535, + "step": 17355 + }, + { + "epoch": 0.81, + "learning_rate": 1.7437022870847898e-05, + "loss": 0.0812, + "step": 17360 + }, + { + "epoch": 0.81, + "learning_rate": 1.7436239085793115e-05, + "loss": 0.0573, + "step": 17365 + }, + { + "epoch": 0.81, + "learning_rate": 1.7435455300738326e-05, + "loss": 0.1486, + "step": 17370 + }, + { + "epoch": 0.81, + "learning_rate": 1.743467151568354e-05, + "loss": 0.1502, + "step": 17375 + }, + { + "epoch": 0.81, + "learning_rate": 1.7433887730628754e-05, + "loss": 0.1391, + "step": 17380 + }, + { + "epoch": 0.81, + "learning_rate": 1.7433103945573968e-05, + "loss": 0.2151, + "step": 17385 + }, + { + "epoch": 0.81, + "learning_rate": 1.743232016051918e-05, + "loss": 0.2215, + "step": 17390 + }, + { + "epoch": 0.81, + "learning_rate": 1.7431536375464395e-05, + "loss": 0.3069, + "step": 17395 + }, + { + "epoch": 0.81, + "learning_rate": 1.7430752590409606e-05, + "loss": 0.5807, + "step": 17400 + }, + { + "epoch": 0.81, + "learning_rate": 1.742996880535482e-05, + "loss": 0.2226, + "step": 17405 + }, + { + "epoch": 0.81, + "learning_rate": 1.7429185020300034e-05, + "loss": 0.0589, + "step": 17410 + }, + { + "epoch": 0.81, + "learning_rate": 1.7428401235245248e-05, + "loss": 0.0894, + "step": 17415 + }, + { + "epoch": 0.81, + "learning_rate": 1.742761745019046e-05, + "loss": 0.0559, + "step": 17420 + }, + { + "epoch": 0.81, + "learning_rate": 1.7426833665135675e-05, + "loss": 0.1474, + "step": 17425 + }, + { + "epoch": 0.81, + "learning_rate": 1.742604988008089e-05, + "loss": 0.1283, + "step": 17430 + }, + { + "epoch": 0.81, + "learning_rate": 1.74252660950261e-05, + "loss": 0.2015, + "step": 17435 + }, + { + "epoch": 0.81, + "learning_rate": 1.7424482309971317e-05, + "loss": 0.2343, + "step": 17440 + }, + { + "epoch": 0.81, + "learning_rate": 1.7423698524916528e-05, + "loss": 0.2946, + "step": 17445 + }, + { + "epoch": 0.81, + "learning_rate": 1.742291473986174e-05, + "loss": 0.4734, + "step": 17450 + }, + { + "epoch": 0.81, + "learning_rate": 1.7422130954806955e-05, + "loss": 0.1872, + "step": 17455 + }, + { + "epoch": 0.81, + "learning_rate": 1.742134716975217e-05, + "loss": 0.0883, + "step": 17460 + }, + { + "epoch": 0.81, + "learning_rate": 1.7420563384697383e-05, + "loss": 0.1069, + "step": 17465 + }, + { + "epoch": 0.82, + "learning_rate": 1.7419779599642594e-05, + "loss": 0.1773, + "step": 17470 + }, + { + "epoch": 0.82, + "learning_rate": 1.7418995814587808e-05, + "loss": 0.1791, + "step": 17475 + }, + { + "epoch": 0.82, + "learning_rate": 1.741821202953302e-05, + "loss": 0.1533, + "step": 17480 + }, + { + "epoch": 0.82, + "learning_rate": 1.7417428244478235e-05, + "loss": 0.176, + "step": 17485 + }, + { + "epoch": 0.82, + "learning_rate": 1.741664445942345e-05, + "loss": 0.3268, + "step": 17490 + }, + { + "epoch": 0.82, + "learning_rate": 1.7415860674368663e-05, + "loss": 0.2135, + "step": 17495 + }, + { + "epoch": 0.82, + "learning_rate": 1.7415076889313877e-05, + "loss": 0.5947, + "step": 17500 + }, + { + "epoch": 0.82, + "learning_rate": 1.741429310425909e-05, + "loss": 0.2006, + "step": 17505 + }, + { + "epoch": 0.82, + "learning_rate": 1.74135093192043e-05, + "loss": 0.1167, + "step": 17510 + }, + { + "epoch": 0.82, + "learning_rate": 1.7412725534149516e-05, + "loss": 0.1088, + "step": 17515 + }, + { + "epoch": 0.82, + "learning_rate": 1.741194174909473e-05, + "loss": 0.0985, + "step": 17520 + }, + { + "epoch": 0.82, + "learning_rate": 1.7411157964039943e-05, + "loss": 0.1412, + "step": 17525 + }, + { + "epoch": 0.82, + "learning_rate": 1.7410374178985157e-05, + "loss": 0.2124, + "step": 17530 + }, + { + "epoch": 0.82, + "learning_rate": 1.7409590393930368e-05, + "loss": 0.1429, + "step": 17535 + }, + { + "epoch": 0.82, + "learning_rate": 1.7408806608875585e-05, + "loss": 0.2762, + "step": 17540 + }, + { + "epoch": 0.82, + "learning_rate": 1.7408022823820796e-05, + "loss": 0.336, + "step": 17545 + }, + { + "epoch": 0.82, + "learning_rate": 1.740723903876601e-05, + "loss": 0.6298, + "step": 17550 + }, + { + "epoch": 0.82, + "learning_rate": 1.7406455253711223e-05, + "loss": 0.2686, + "step": 17555 + }, + { + "epoch": 0.82, + "learning_rate": 1.7405671468656437e-05, + "loss": 0.1043, + "step": 17560 + }, + { + "epoch": 0.82, + "learning_rate": 1.740488768360165e-05, + "loss": 0.0676, + "step": 17565 + }, + { + "epoch": 0.82, + "learning_rate": 1.7404103898546865e-05, + "loss": 0.1212, + "step": 17570 + }, + { + "epoch": 0.82, + "learning_rate": 1.7403320113492076e-05, + "loss": 0.1228, + "step": 17575 + }, + { + "epoch": 0.82, + "learning_rate": 1.7402536328437293e-05, + "loss": 0.1101, + "step": 17580 + }, + { + "epoch": 0.82, + "learning_rate": 1.7401752543382503e-05, + "loss": 0.1469, + "step": 17585 + }, + { + "epoch": 0.82, + "learning_rate": 1.7400968758327717e-05, + "loss": 0.1787, + "step": 17590 + }, + { + "epoch": 0.82, + "learning_rate": 1.740018497327293e-05, + "loss": 0.2206, + "step": 17595 + }, + { + "epoch": 0.82, + "learning_rate": 1.7399401188218145e-05, + "loss": 0.4916, + "step": 17600 + }, + { + "epoch": 0.82, + "learning_rate": 1.739861740316336e-05, + "loss": 0.208, + "step": 17605 + }, + { + "epoch": 0.82, + "learning_rate": 1.739783361810857e-05, + "loss": 0.0906, + "step": 17610 + }, + { + "epoch": 0.82, + "learning_rate": 1.7397049833053783e-05, + "loss": 0.1041, + "step": 17615 + }, + { + "epoch": 0.82, + "learning_rate": 1.7396266047998997e-05, + "loss": 0.1332, + "step": 17620 + }, + { + "epoch": 0.82, + "learning_rate": 1.739548226294421e-05, + "loss": 0.1337, + "step": 17625 + }, + { + "epoch": 0.82, + "learning_rate": 1.7394698477889425e-05, + "loss": 0.1325, + "step": 17630 + }, + { + "epoch": 0.82, + "learning_rate": 1.739391469283464e-05, + "loss": 0.147, + "step": 17635 + }, + { + "epoch": 0.82, + "learning_rate": 1.7393130907779853e-05, + "loss": 0.2622, + "step": 17640 + }, + { + "epoch": 0.82, + "learning_rate": 1.7392347122725067e-05, + "loss": 0.4564, + "step": 17645 + }, + { + "epoch": 0.82, + "learning_rate": 1.7391563337670277e-05, + "loss": 0.5175, + "step": 17650 + }, + { + "epoch": 0.82, + "learning_rate": 1.7390779552615495e-05, + "loss": 0.1756, + "step": 17655 + }, + { + "epoch": 0.82, + "learning_rate": 1.7389995767560705e-05, + "loss": 0.1188, + "step": 17660 + }, + { + "epoch": 0.82, + "learning_rate": 1.738921198250592e-05, + "loss": 0.0799, + "step": 17665 + }, + { + "epoch": 0.82, + "learning_rate": 1.7388428197451133e-05, + "loss": 0.1077, + "step": 17670 + }, + { + "epoch": 0.82, + "learning_rate": 1.7387644412396344e-05, + "loss": 0.1528, + "step": 17675 + }, + { + "epoch": 0.82, + "learning_rate": 1.738686062734156e-05, + "loss": 0.1655, + "step": 17680 + }, + { + "epoch": 0.83, + "learning_rate": 1.738607684228677e-05, + "loss": 0.1754, + "step": 17685 + }, + { + "epoch": 0.83, + "learning_rate": 1.7385293057231985e-05, + "loss": 0.2688, + "step": 17690 + }, + { + "epoch": 0.83, + "learning_rate": 1.73845092721772e-05, + "loss": 0.2753, + "step": 17695 + }, + { + "epoch": 0.83, + "learning_rate": 1.7383725487122413e-05, + "loss": 0.3675, + "step": 17700 + }, + { + "epoch": 0.83, + "learning_rate": 1.7382941702067627e-05, + "loss": 0.245, + "step": 17705 + }, + { + "epoch": 0.83, + "learning_rate": 1.738215791701284e-05, + "loss": 0.1142, + "step": 17710 + }, + { + "epoch": 0.83, + "learning_rate": 1.738137413195805e-05, + "loss": 0.0702, + "step": 17715 + }, + { + "epoch": 0.83, + "learning_rate": 1.738059034690327e-05, + "loss": 0.1529, + "step": 17720 + }, + { + "epoch": 0.83, + "learning_rate": 1.737980656184848e-05, + "loss": 0.1147, + "step": 17725 + }, + { + "epoch": 0.83, + "learning_rate": 1.7379022776793693e-05, + "loss": 0.1938, + "step": 17730 + }, + { + "epoch": 0.83, + "learning_rate": 1.7378238991738907e-05, + "loss": 0.2224, + "step": 17735 + }, + { + "epoch": 0.83, + "learning_rate": 1.737745520668412e-05, + "loss": 0.1982, + "step": 17740 + }, + { + "epoch": 0.83, + "learning_rate": 1.7376671421629335e-05, + "loss": 0.3572, + "step": 17745 + }, + { + "epoch": 0.83, + "learning_rate": 1.7375887636574545e-05, + "loss": 0.6613, + "step": 17750 + }, + { + "epoch": 0.83, + "learning_rate": 1.7375103851519763e-05, + "loss": 0.2306, + "step": 17755 + }, + { + "epoch": 0.83, + "learning_rate": 1.7374320066464973e-05, + "loss": 0.0619, + "step": 17760 + }, + { + "epoch": 0.83, + "learning_rate": 1.7373536281410187e-05, + "loss": 0.1426, + "step": 17765 + }, + { + "epoch": 0.83, + "learning_rate": 1.73727524963554e-05, + "loss": 0.1614, + "step": 17770 + }, + { + "epoch": 0.83, + "learning_rate": 1.7371968711300615e-05, + "loss": 0.137, + "step": 17775 + }, + { + "epoch": 0.83, + "learning_rate": 1.737118492624583e-05, + "loss": 0.2, + "step": 17780 + }, + { + "epoch": 0.83, + "learning_rate": 1.7370401141191043e-05, + "loss": 0.2528, + "step": 17785 + }, + { + "epoch": 0.83, + "learning_rate": 1.7369617356136253e-05, + "loss": 0.3046, + "step": 17790 + }, + { + "epoch": 0.83, + "learning_rate": 1.7368833571081467e-05, + "loss": 0.3107, + "step": 17795 + }, + { + "epoch": 0.83, + "learning_rate": 1.736804978602668e-05, + "loss": 0.4765, + "step": 17800 + }, + { + "epoch": 0.83, + "learning_rate": 1.7367266000971895e-05, + "loss": 0.2553, + "step": 17805 + }, + { + "epoch": 0.83, + "learning_rate": 1.736648221591711e-05, + "loss": 0.1027, + "step": 17810 + }, + { + "epoch": 0.83, + "learning_rate": 1.7365698430862323e-05, + "loss": 0.1448, + "step": 17815 + }, + { + "epoch": 0.83, + "learning_rate": 1.7364914645807537e-05, + "loss": 0.1179, + "step": 17820 + }, + { + "epoch": 0.83, + "learning_rate": 1.7364130860752747e-05, + "loss": 0.206, + "step": 17825 + }, + { + "epoch": 0.83, + "learning_rate": 1.736334707569796e-05, + "loss": 0.1499, + "step": 17830 + }, + { + "epoch": 0.83, + "learning_rate": 1.7362563290643175e-05, + "loss": 0.2357, + "step": 17835 + }, + { + "epoch": 0.83, + "learning_rate": 1.736177950558839e-05, + "loss": 0.1637, + "step": 17840 + }, + { + "epoch": 0.83, + "learning_rate": 1.7360995720533603e-05, + "loss": 0.2992, + "step": 17845 + }, + { + "epoch": 0.83, + "learning_rate": 1.7360211935478817e-05, + "loss": 0.3762, + "step": 17850 + }, + { + "epoch": 0.83, + "learning_rate": 1.735942815042403e-05, + "loss": 0.225, + "step": 17855 + }, + { + "epoch": 0.83, + "learning_rate": 1.735864436536924e-05, + "loss": 0.0935, + "step": 17860 + }, + { + "epoch": 0.83, + "learning_rate": 1.7357860580314455e-05, + "loss": 0.0893, + "step": 17865 + }, + { + "epoch": 0.83, + "learning_rate": 1.735707679525967e-05, + "loss": 0.1329, + "step": 17870 + }, + { + "epoch": 0.83, + "learning_rate": 1.7356293010204883e-05, + "loss": 0.1481, + "step": 17875 + }, + { + "epoch": 0.83, + "learning_rate": 1.7355509225150097e-05, + "loss": 0.1654, + "step": 17880 + }, + { + "epoch": 0.83, + "learning_rate": 1.735472544009531e-05, + "loss": 0.1354, + "step": 17885 + }, + { + "epoch": 0.83, + "learning_rate": 1.735394165504052e-05, + "loss": 0.3464, + "step": 17890 + }, + { + "epoch": 0.84, + "learning_rate": 1.735315786998574e-05, + "loss": 0.2993, + "step": 17895 + }, + { + "epoch": 0.84, + "learning_rate": 1.735253084194191e-05, + "loss": 0.6743, + "step": 17900 + }, + { + "epoch": 0.84, + "learning_rate": 1.735174705688712e-05, + "loss": 0.1873, + "step": 17905 + }, + { + "epoch": 0.84, + "learning_rate": 1.7350963271832337e-05, + "loss": 0.144, + "step": 17910 + }, + { + "epoch": 0.84, + "learning_rate": 1.7350179486777547e-05, + "loss": 0.1459, + "step": 17915 + }, + { + "epoch": 0.84, + "learning_rate": 1.734939570172276e-05, + "loss": 0.0939, + "step": 17920 + }, + { + "epoch": 0.84, + "learning_rate": 1.7348611916667975e-05, + "loss": 0.1254, + "step": 17925 + }, + { + "epoch": 0.84, + "learning_rate": 1.7347828131613186e-05, + "loss": 0.1482, + "step": 17930 + }, + { + "epoch": 0.84, + "learning_rate": 1.7347044346558403e-05, + "loss": 0.1526, + "step": 17935 + }, + { + "epoch": 0.84, + "learning_rate": 1.7346260561503613e-05, + "loss": 0.2159, + "step": 17940 + }, + { + "epoch": 0.84, + "learning_rate": 1.7345476776448827e-05, + "loss": 0.2293, + "step": 17945 + }, + { + "epoch": 0.84, + "learning_rate": 1.734469299139404e-05, + "loss": 0.3899, + "step": 17950 + }, + { + "epoch": 0.84, + "learning_rate": 1.7343909206339255e-05, + "loss": 0.2237, + "step": 17955 + }, + { + "epoch": 0.84, + "learning_rate": 1.734312542128447e-05, + "loss": 0.0663, + "step": 17960 + }, + { + "epoch": 0.84, + "learning_rate": 1.7342341636229683e-05, + "loss": 0.0931, + "step": 17965 + }, + { + "epoch": 0.84, + "learning_rate": 1.7341557851174893e-05, + "loss": 0.0785, + "step": 17970 + }, + { + "epoch": 0.84, + "learning_rate": 1.734077406612011e-05, + "loss": 0.1971, + "step": 17975 + }, + { + "epoch": 0.84, + "learning_rate": 1.733999028106532e-05, + "loss": 0.2693, + "step": 17980 + }, + { + "epoch": 0.84, + "learning_rate": 1.7339206496010535e-05, + "loss": 0.2542, + "step": 17985 + }, + { + "epoch": 0.84, + "learning_rate": 1.733842271095575e-05, + "loss": 0.1786, + "step": 17990 + }, + { + "epoch": 0.84, + "learning_rate": 1.7337638925900963e-05, + "loss": 0.3282, + "step": 17995 + }, + { + "epoch": 0.84, + "learning_rate": 1.7336855140846177e-05, + "loss": 0.4567, + "step": 18000 + }, + { + "epoch": 0.84, + "learning_rate": 1.7336071355791387e-05, + "loss": 0.3093, + "step": 18005 + }, + { + "epoch": 0.84, + "learning_rate": 1.7335287570736605e-05, + "loss": 0.0859, + "step": 18010 + }, + { + "epoch": 0.84, + "learning_rate": 1.7334503785681815e-05, + "loss": 0.1042, + "step": 18015 + }, + { + "epoch": 0.84, + "learning_rate": 1.733372000062703e-05, + "loss": 0.077, + "step": 18020 + }, + { + "epoch": 0.84, + "learning_rate": 1.7332936215572243e-05, + "loss": 0.148, + "step": 18025 + }, + { + "epoch": 0.84, + "learning_rate": 1.7332152430517457e-05, + "loss": 0.1841, + "step": 18030 + }, + { + "epoch": 0.84, + "learning_rate": 1.733136864546267e-05, + "loss": 0.0989, + "step": 18035 + }, + { + "epoch": 0.84, + "learning_rate": 1.7330584860407885e-05, + "loss": 0.1978, + "step": 18040 + }, + { + "epoch": 0.84, + "learning_rate": 1.7329801075353095e-05, + "loss": 0.3673, + "step": 18045 + }, + { + "epoch": 0.84, + "learning_rate": 1.732901729029831e-05, + "loss": 0.4043, + "step": 18050 + }, + { + "epoch": 0.84, + "learning_rate": 1.7328233505243523e-05, + "loss": 0.2048, + "step": 18055 + }, + { + "epoch": 0.84, + "learning_rate": 1.7327449720188737e-05, + "loss": 0.0706, + "step": 18060 + }, + { + "epoch": 0.84, + "learning_rate": 1.732666593513395e-05, + "loss": 0.1344, + "step": 18065 + }, + { + "epoch": 0.84, + "learning_rate": 1.732588215007916e-05, + "loss": 0.1345, + "step": 18070 + }, + { + "epoch": 0.84, + "learning_rate": 1.732509836502438e-05, + "loss": 0.1103, + "step": 18075 + }, + { + "epoch": 0.84, + "learning_rate": 1.732431457996959e-05, + "loss": 0.1883, + "step": 18080 + }, + { + "epoch": 0.84, + "learning_rate": 1.7323530794914803e-05, + "loss": 0.1763, + "step": 18085 + }, + { + "epoch": 0.84, + "learning_rate": 1.7322747009860017e-05, + "loss": 0.2073, + "step": 18090 + }, + { + "epoch": 0.84, + "learning_rate": 1.732196322480523e-05, + "loss": 0.3624, + "step": 18095 + }, + { + "epoch": 0.84, + "learning_rate": 1.7321179439750445e-05, + "loss": 0.4715, + "step": 18100 + }, + { + "epoch": 0.84, + "learning_rate": 1.732039565469566e-05, + "loss": 0.1813, + "step": 18105 + }, + { + "epoch": 0.85, + "learning_rate": 1.7319611869640873e-05, + "loss": 0.0933, + "step": 18110 + }, + { + "epoch": 0.85, + "learning_rate": 1.7318828084586083e-05, + "loss": 0.1297, + "step": 18115 + }, + { + "epoch": 0.85, + "learning_rate": 1.7318044299531297e-05, + "loss": 0.1176, + "step": 18120 + }, + { + "epoch": 0.85, + "learning_rate": 1.731726051447651e-05, + "loss": 0.1331, + "step": 18125 + }, + { + "epoch": 0.85, + "learning_rate": 1.7316476729421725e-05, + "loss": 0.166, + "step": 18130 + }, + { + "epoch": 0.85, + "learning_rate": 1.731569294436694e-05, + "loss": 0.2521, + "step": 18135 + }, + { + "epoch": 0.85, + "learning_rate": 1.7314909159312153e-05, + "loss": 0.2726, + "step": 18140 + }, + { + "epoch": 0.85, + "learning_rate": 1.7314125374257363e-05, + "loss": 0.3365, + "step": 18145 + }, + { + "epoch": 0.85, + "learning_rate": 1.731334158920258e-05, + "loss": 0.6564, + "step": 18150 + }, + { + "epoch": 0.85, + "learning_rate": 1.731255780414779e-05, + "loss": 0.2698, + "step": 18155 + }, + { + "epoch": 0.85, + "learning_rate": 1.7311774019093005e-05, + "loss": 0.0988, + "step": 18160 + }, + { + "epoch": 0.85, + "learning_rate": 1.731099023403822e-05, + "loss": 0.0748, + "step": 18165 + }, + { + "epoch": 0.85, + "learning_rate": 1.7310206448983433e-05, + "loss": 0.1137, + "step": 18170 + }, + { + "epoch": 0.85, + "learning_rate": 1.7309422663928647e-05, + "loss": 0.1392, + "step": 18175 + }, + { + "epoch": 0.85, + "learning_rate": 1.7308638878873857e-05, + "loss": 0.1052, + "step": 18180 + }, + { + "epoch": 0.85, + "learning_rate": 1.730785509381907e-05, + "loss": 0.1821, + "step": 18185 + }, + { + "epoch": 0.85, + "learning_rate": 1.7307071308764285e-05, + "loss": 0.1918, + "step": 18190 + }, + { + "epoch": 0.85, + "learning_rate": 1.73062875237095e-05, + "loss": 0.3034, + "step": 18195 + }, + { + "epoch": 0.85, + "learning_rate": 1.7305503738654713e-05, + "loss": 0.5586, + "step": 18200 + }, + { + "epoch": 0.85, + "learning_rate": 1.7304719953599927e-05, + "loss": 0.3347, + "step": 18205 + }, + { + "epoch": 0.85, + "learning_rate": 1.730393616854514e-05, + "loss": 0.1197, + "step": 18210 + }, + { + "epoch": 0.85, + "learning_rate": 1.7303152383490355e-05, + "loss": 0.0808, + "step": 18215 + }, + { + "epoch": 0.85, + "learning_rate": 1.7302525355446525e-05, + "loss": 0.1548, + "step": 18220 + }, + { + "epoch": 0.85, + "learning_rate": 1.7301741570391735e-05, + "loss": 0.1336, + "step": 18225 + }, + { + "epoch": 0.85, + "learning_rate": 1.7300957785336953e-05, + "loss": 0.1881, + "step": 18230 + }, + { + "epoch": 0.85, + "learning_rate": 1.7300174000282163e-05, + "loss": 0.1734, + "step": 18235 + }, + { + "epoch": 0.85, + "learning_rate": 1.7299390215227377e-05, + "loss": 0.2273, + "step": 18240 + }, + { + "epoch": 0.85, + "learning_rate": 1.729860643017259e-05, + "loss": 0.3434, + "step": 18245 + }, + { + "epoch": 0.85, + "learning_rate": 1.72978226451178e-05, + "loss": 0.4018, + "step": 18250 + }, + { + "epoch": 0.85, + "learning_rate": 1.729703886006302e-05, + "loss": 0.2318, + "step": 18255 + }, + { + "epoch": 0.85, + "learning_rate": 1.729625507500823e-05, + "loss": 0.0581, + "step": 18260 + }, + { + "epoch": 0.85, + "learning_rate": 1.7295471289953443e-05, + "loss": 0.1156, + "step": 18265 + }, + { + "epoch": 0.85, + "learning_rate": 1.7294687504898657e-05, + "loss": 0.1539, + "step": 18270 + }, + { + "epoch": 0.85, + "learning_rate": 1.729390371984387e-05, + "loss": 0.1465, + "step": 18275 + }, + { + "epoch": 0.85, + "learning_rate": 1.7293119934789085e-05, + "loss": 0.2405, + "step": 18280 + }, + { + "epoch": 0.85, + "learning_rate": 1.72923361497343e-05, + "loss": 0.2068, + "step": 18285 + }, + { + "epoch": 0.85, + "learning_rate": 1.7291552364679513e-05, + "loss": 0.3418, + "step": 18290 + }, + { + "epoch": 0.85, + "learning_rate": 1.7290768579624727e-05, + "loss": 0.3344, + "step": 18295 + }, + { + "epoch": 0.85, + "learning_rate": 1.7289984794569937e-05, + "loss": 0.324, + "step": 18300 + }, + { + "epoch": 0.85, + "learning_rate": 1.7289201009515155e-05, + "loss": 0.2146, + "step": 18305 + }, + { + "epoch": 0.85, + "learning_rate": 1.7288417224460365e-05, + "loss": 0.12, + "step": 18310 + }, + { + "epoch": 0.85, + "learning_rate": 1.728763343940558e-05, + "loss": 0.1088, + "step": 18315 + }, + { + "epoch": 0.85, + "learning_rate": 1.7286849654350793e-05, + "loss": 0.1019, + "step": 18320 + }, + { + "epoch": 0.86, + "learning_rate": 1.7286065869296003e-05, + "loss": 0.1868, + "step": 18325 + }, + { + "epoch": 0.86, + "learning_rate": 1.728528208424122e-05, + "loss": 0.2649, + "step": 18330 + }, + { + "epoch": 0.86, + "learning_rate": 1.728449829918643e-05, + "loss": 0.2723, + "step": 18335 + }, + { + "epoch": 0.86, + "learning_rate": 1.7283714514131645e-05, + "loss": 0.3037, + "step": 18340 + }, + { + "epoch": 0.86, + "learning_rate": 1.728293072907686e-05, + "loss": 0.32, + "step": 18345 + }, + { + "epoch": 0.86, + "learning_rate": 1.7282146944022073e-05, + "loss": 0.6633, + "step": 18350 + }, + { + "epoch": 0.86, + "learning_rate": 1.7281363158967287e-05, + "loss": 0.3423, + "step": 18355 + }, + { + "epoch": 0.86, + "learning_rate": 1.72805793739125e-05, + "loss": 0.0901, + "step": 18360 + }, + { + "epoch": 0.86, + "learning_rate": 1.727979558885771e-05, + "loss": 0.0976, + "step": 18365 + }, + { + "epoch": 0.86, + "learning_rate": 1.727901180380293e-05, + "loss": 0.219, + "step": 18370 + }, + { + "epoch": 0.86, + "learning_rate": 1.727822801874814e-05, + "loss": 0.1546, + "step": 18375 + }, + { + "epoch": 0.86, + "learning_rate": 1.7277444233693353e-05, + "loss": 0.183, + "step": 18380 + }, + { + "epoch": 0.86, + "learning_rate": 1.7276660448638567e-05, + "loss": 0.1376, + "step": 18385 + }, + { + "epoch": 0.86, + "learning_rate": 1.727587666358378e-05, + "loss": 0.2397, + "step": 18390 + }, + { + "epoch": 0.86, + "learning_rate": 1.7275092878528995e-05, + "loss": 0.3504, + "step": 18395 + }, + { + "epoch": 0.86, + "learning_rate": 1.7274309093474205e-05, + "loss": 0.4606, + "step": 18400 + }, + { + "epoch": 0.86, + "learning_rate": 1.7273525308419423e-05, + "loss": 0.1816, + "step": 18405 + }, + { + "epoch": 0.86, + "learning_rate": 1.7272741523364633e-05, + "loss": 0.0837, + "step": 18410 + }, + { + "epoch": 0.86, + "learning_rate": 1.7271957738309847e-05, + "loss": 0.1249, + "step": 18415 + }, + { + "epoch": 0.86, + "learning_rate": 1.727117395325506e-05, + "loss": 0.1805, + "step": 18420 + }, + { + "epoch": 0.86, + "learning_rate": 1.7270390168200275e-05, + "loss": 0.246, + "step": 18425 + }, + { + "epoch": 0.86, + "learning_rate": 1.726960638314549e-05, + "loss": 0.1257, + "step": 18430 + }, + { + "epoch": 0.86, + "learning_rate": 1.7268822598090703e-05, + "loss": 0.1773, + "step": 18435 + }, + { + "epoch": 0.86, + "learning_rate": 1.7268038813035913e-05, + "loss": 0.1636, + "step": 18440 + }, + { + "epoch": 0.86, + "learning_rate": 1.7267255027981127e-05, + "loss": 0.2327, + "step": 18445 + }, + { + "epoch": 0.86, + "learning_rate": 1.726647124292634e-05, + "loss": 0.8099, + "step": 18450 + }, + { + "epoch": 0.86, + "learning_rate": 1.7265687457871555e-05, + "loss": 0.13, + "step": 18455 + }, + { + "epoch": 0.86, + "learning_rate": 1.726490367281677e-05, + "loss": 0.1281, + "step": 18460 + }, + { + "epoch": 0.86, + "learning_rate": 1.726411988776198e-05, + "loss": 0.1082, + "step": 18465 + }, + { + "epoch": 0.86, + "learning_rate": 1.7263336102707197e-05, + "loss": 0.0937, + "step": 18470 + }, + { + "epoch": 0.86, + "learning_rate": 1.7262552317652407e-05, + "loss": 0.1067, + "step": 18475 + }, + { + "epoch": 0.86, + "learning_rate": 1.726176853259762e-05, + "loss": 0.1886, + "step": 18480 + }, + { + "epoch": 0.86, + "learning_rate": 1.7260984747542835e-05, + "loss": 0.1727, + "step": 18485 + }, + { + "epoch": 0.86, + "learning_rate": 1.726020096248805e-05, + "loss": 0.2541, + "step": 18490 + }, + { + "epoch": 0.86, + "learning_rate": 1.7259417177433263e-05, + "loss": 0.3449, + "step": 18495 + }, + { + "epoch": 0.86, + "learning_rate": 1.7258633392378477e-05, + "loss": 0.4496, + "step": 18500 + }, + { + "epoch": 0.86, + "learning_rate": 1.725784960732369e-05, + "loss": 0.2353, + "step": 18505 + }, + { + "epoch": 0.86, + "learning_rate": 1.72570658222689e-05, + "loss": 0.0484, + "step": 18510 + }, + { + "epoch": 0.86, + "learning_rate": 1.7256282037214115e-05, + "loss": 0.0597, + "step": 18515 + }, + { + "epoch": 0.86, + "learning_rate": 1.725549825215933e-05, + "loss": 0.1713, + "step": 18520 + }, + { + "epoch": 0.86, + "learning_rate": 1.7254714467104543e-05, + "loss": 0.1206, + "step": 18525 + }, + { + "epoch": 0.86, + "learning_rate": 1.7253930682049757e-05, + "loss": 0.2763, + "step": 18530 + }, + { + "epoch": 0.86, + "learning_rate": 1.725314689699497e-05, + "loss": 0.1911, + "step": 18535 + }, + { + "epoch": 0.87, + "learning_rate": 1.725236311194018e-05, + "loss": 0.2108, + "step": 18540 + }, + { + "epoch": 0.87, + "learning_rate": 1.72515793268854e-05, + "loss": 0.3533, + "step": 18545 + }, + { + "epoch": 0.87, + "learning_rate": 1.725079554183061e-05, + "loss": 0.3996, + "step": 18550 + }, + { + "epoch": 0.87, + "learning_rate": 1.7250011756775823e-05, + "loss": 0.1907, + "step": 18555 + }, + { + "epoch": 0.87, + "learning_rate": 1.7249227971721037e-05, + "loss": 0.076, + "step": 18560 + }, + { + "epoch": 0.87, + "learning_rate": 1.724844418666625e-05, + "loss": 0.165, + "step": 18565 + }, + { + "epoch": 0.87, + "learning_rate": 1.7247660401611464e-05, + "loss": 0.1086, + "step": 18570 + }, + { + "epoch": 0.87, + "learning_rate": 1.7246876616556675e-05, + "loss": 0.1199, + "step": 18575 + }, + { + "epoch": 0.87, + "learning_rate": 1.724609283150189e-05, + "loss": 0.245, + "step": 18580 + }, + { + "epoch": 0.87, + "learning_rate": 1.7245309046447103e-05, + "loss": 0.1674, + "step": 18585 + }, + { + "epoch": 0.87, + "learning_rate": 1.7244525261392317e-05, + "loss": 0.2862, + "step": 18590 + }, + { + "epoch": 0.87, + "learning_rate": 1.724374147633753e-05, + "loss": 0.4564, + "step": 18595 + }, + { + "epoch": 0.87, + "learning_rate": 1.7242957691282745e-05, + "loss": 0.4542, + "step": 18600 + }, + { + "epoch": 0.87, + "learning_rate": 1.724217390622796e-05, + "loss": 0.2245, + "step": 18605 + }, + { + "epoch": 0.87, + "learning_rate": 1.7241390121173172e-05, + "loss": 0.1992, + "step": 18610 + }, + { + "epoch": 0.87, + "learning_rate": 1.7240606336118383e-05, + "loss": 0.0679, + "step": 18615 + }, + { + "epoch": 0.87, + "learning_rate": 1.72398225510636e-05, + "loss": 0.0884, + "step": 18620 + }, + { + "epoch": 0.87, + "learning_rate": 1.723903876600881e-05, + "loss": 0.1305, + "step": 18625 + }, + { + "epoch": 0.87, + "learning_rate": 1.7238254980954025e-05, + "loss": 0.2016, + "step": 18630 + }, + { + "epoch": 0.87, + "learning_rate": 1.723747119589924e-05, + "loss": 0.196, + "step": 18635 + }, + { + "epoch": 0.87, + "learning_rate": 1.723668741084445e-05, + "loss": 0.1693, + "step": 18640 + }, + { + "epoch": 0.87, + "learning_rate": 1.7235903625789666e-05, + "loss": 0.3803, + "step": 18645 + }, + { + "epoch": 0.87, + "learning_rate": 1.7235119840734877e-05, + "loss": 0.4806, + "step": 18650 + }, + { + "epoch": 0.87, + "learning_rate": 1.723433605568009e-05, + "loss": 0.2328, + "step": 18655 + }, + { + "epoch": 0.87, + "learning_rate": 1.7233552270625305e-05, + "loss": 0.068, + "step": 18660 + }, + { + "epoch": 0.87, + "learning_rate": 1.723276848557052e-05, + "loss": 0.0851, + "step": 18665 + }, + { + "epoch": 0.87, + "learning_rate": 1.7231984700515732e-05, + "loss": 0.1106, + "step": 18670 + }, + { + "epoch": 0.87, + "learning_rate": 1.7231200915460946e-05, + "loss": 0.1496, + "step": 18675 + }, + { + "epoch": 0.87, + "learning_rate": 1.7230417130406157e-05, + "loss": 0.1601, + "step": 18680 + }, + { + "epoch": 0.87, + "learning_rate": 1.7229633345351374e-05, + "loss": 0.2402, + "step": 18685 + }, + { + "epoch": 0.87, + "learning_rate": 1.7228849560296585e-05, + "loss": 0.255, + "step": 18690 + }, + { + "epoch": 0.87, + "learning_rate": 1.72280657752418e-05, + "loss": 0.2822, + "step": 18695 + }, + { + "epoch": 0.87, + "learning_rate": 1.7227281990187012e-05, + "loss": 0.5758, + "step": 18700 + }, + { + "epoch": 0.87, + "learning_rate": 1.7226498205132226e-05, + "loss": 0.1843, + "step": 18705 + }, + { + "epoch": 0.87, + "learning_rate": 1.722571442007744e-05, + "loss": 0.0946, + "step": 18710 + }, + { + "epoch": 0.87, + "learning_rate": 1.722493063502265e-05, + "loss": 0.0383, + "step": 18715 + }, + { + "epoch": 0.87, + "learning_rate": 1.7224146849967868e-05, + "loss": 0.1229, + "step": 18720 + }, + { + "epoch": 0.87, + "learning_rate": 1.722336306491308e-05, + "loss": 0.2175, + "step": 18725 + }, + { + "epoch": 0.87, + "learning_rate": 1.7222579279858293e-05, + "loss": 0.1291, + "step": 18730 + }, + { + "epoch": 0.87, + "learning_rate": 1.7221795494803506e-05, + "loss": 0.1822, + "step": 18735 + }, + { + "epoch": 0.87, + "learning_rate": 1.722101170974872e-05, + "loss": 0.2656, + "step": 18740 + }, + { + "epoch": 0.87, + "learning_rate": 1.7220227924693934e-05, + "loss": 0.2236, + "step": 18745 + }, + { + "epoch": 0.87, + "learning_rate": 1.7219444139639148e-05, + "loss": 0.4038, + "step": 18750 + }, + { + "epoch": 0.88, + "learning_rate": 1.721866035458436e-05, + "loss": 0.1978, + "step": 18755 + }, + { + "epoch": 0.88, + "learning_rate": 1.7217876569529576e-05, + "loss": 0.0921, + "step": 18760 + }, + { + "epoch": 0.88, + "learning_rate": 1.7217092784474786e-05, + "loss": 0.151, + "step": 18765 + }, + { + "epoch": 0.88, + "learning_rate": 1.721630899942e-05, + "loss": 0.1317, + "step": 18770 + }, + { + "epoch": 0.88, + "learning_rate": 1.7215525214365214e-05, + "loss": 0.1417, + "step": 18775 + }, + { + "epoch": 0.88, + "learning_rate": 1.7214741429310425e-05, + "loss": 0.1506, + "step": 18780 + }, + { + "epoch": 0.88, + "learning_rate": 1.7213957644255642e-05, + "loss": 0.1963, + "step": 18785 + }, + { + "epoch": 0.88, + "learning_rate": 1.7213173859200853e-05, + "loss": 0.2705, + "step": 18790 + }, + { + "epoch": 0.88, + "learning_rate": 1.7212390074146067e-05, + "loss": 0.2663, + "step": 18795 + }, + { + "epoch": 0.88, + "learning_rate": 1.721160628909128e-05, + "loss": 0.5767, + "step": 18800 + }, + { + "epoch": 0.88, + "learning_rate": 1.7210822504036494e-05, + "loss": 0.1702, + "step": 18805 + }, + { + "epoch": 0.88, + "learning_rate": 1.7210038718981708e-05, + "loss": 0.0625, + "step": 18810 + }, + { + "epoch": 0.88, + "learning_rate": 1.7209254933926922e-05, + "loss": 0.1013, + "step": 18815 + }, + { + "epoch": 0.88, + "learning_rate": 1.7208471148872136e-05, + "loss": 0.132, + "step": 18820 + }, + { + "epoch": 0.88, + "learning_rate": 1.720768736381735e-05, + "loss": 0.1107, + "step": 18825 + }, + { + "epoch": 0.88, + "learning_rate": 1.720690357876256e-05, + "loss": 0.1782, + "step": 18830 + }, + { + "epoch": 0.88, + "learning_rate": 1.7206119793707774e-05, + "loss": 0.213, + "step": 18835 + }, + { + "epoch": 0.88, + "learning_rate": 1.7205336008652988e-05, + "loss": 0.3126, + "step": 18840 + }, + { + "epoch": 0.88, + "learning_rate": 1.7204552223598202e-05, + "loss": 0.3847, + "step": 18845 + }, + { + "epoch": 0.88, + "learning_rate": 1.7203768438543416e-05, + "loss": 0.5436, + "step": 18850 + }, + { + "epoch": 0.88, + "learning_rate": 1.7202984653488627e-05, + "loss": 0.1875, + "step": 18855 + }, + { + "epoch": 0.88, + "learning_rate": 1.7202200868433844e-05, + "loss": 0.0937, + "step": 18860 + }, + { + "epoch": 0.88, + "learning_rate": 1.7201417083379054e-05, + "loss": 0.1355, + "step": 18865 + }, + { + "epoch": 0.88, + "learning_rate": 1.720063329832427e-05, + "loss": 0.1417, + "step": 18870 + }, + { + "epoch": 0.88, + "learning_rate": 1.7199849513269482e-05, + "loss": 0.1382, + "step": 18875 + }, + { + "epoch": 0.88, + "learning_rate": 1.7199065728214696e-05, + "loss": 0.2395, + "step": 18880 + }, + { + "epoch": 0.88, + "learning_rate": 1.719828194315991e-05, + "loss": 0.2275, + "step": 18885 + }, + { + "epoch": 0.88, + "learning_rate": 1.7197498158105124e-05, + "loss": 0.2511, + "step": 18890 + }, + { + "epoch": 0.88, + "learning_rate": 1.7196714373050334e-05, + "loss": 0.3154, + "step": 18895 + }, + { + "epoch": 0.88, + "learning_rate": 1.719593058799555e-05, + "loss": 0.5231, + "step": 18900 + }, + { + "epoch": 0.88, + "learning_rate": 1.7195146802940762e-05, + "loss": 0.1573, + "step": 18905 + }, + { + "epoch": 0.88, + "learning_rate": 1.7194363017885976e-05, + "loss": 0.0808, + "step": 18910 + }, + { + "epoch": 0.88, + "learning_rate": 1.719357923283119e-05, + "loss": 0.0893, + "step": 18915 + }, + { + "epoch": 0.88, + "learning_rate": 1.7192795447776404e-05, + "loss": 0.1562, + "step": 18920 + }, + { + "epoch": 0.88, + "learning_rate": 1.7192011662721618e-05, + "loss": 0.1937, + "step": 18925 + }, + { + "epoch": 0.88, + "learning_rate": 1.719122787766683e-05, + "loss": 0.1925, + "step": 18930 + }, + { + "epoch": 0.88, + "learning_rate": 1.7190444092612046e-05, + "loss": 0.2791, + "step": 18935 + }, + { + "epoch": 0.88, + "learning_rate": 1.7189660307557256e-05, + "loss": 0.3337, + "step": 18940 + }, + { + "epoch": 0.88, + "learning_rate": 1.718887652250247e-05, + "loss": 0.3198, + "step": 18945 + }, + { + "epoch": 0.88, + "learning_rate": 1.7188092737447684e-05, + "loss": 0.5088, + "step": 18950 + }, + { + "epoch": 0.88, + "learning_rate": 1.7187308952392898e-05, + "loss": 0.1709, + "step": 18955 + }, + { + "epoch": 0.88, + "learning_rate": 1.7186525167338112e-05, + "loss": 0.1017, + "step": 18960 + }, + { + "epoch": 0.88, + "learning_rate": 1.7185741382283322e-05, + "loss": 0.1423, + "step": 18965 + }, + { + "epoch": 0.89, + "learning_rate": 1.7184957597228536e-05, + "loss": 0.1257, + "step": 18970 + }, + { + "epoch": 0.89, + "learning_rate": 1.718417381217375e-05, + "loss": 0.1645, + "step": 18975 + }, + { + "epoch": 0.89, + "learning_rate": 1.7183390027118964e-05, + "loss": 0.1752, + "step": 18980 + }, + { + "epoch": 0.89, + "learning_rate": 1.7182606242064178e-05, + "loss": 0.1956, + "step": 18985 + }, + { + "epoch": 0.89, + "learning_rate": 1.7181822457009392e-05, + "loss": 0.2104, + "step": 18990 + }, + { + "epoch": 0.89, + "learning_rate": 1.7181038671954602e-05, + "loss": 0.2864, + "step": 18995 + }, + { + "epoch": 0.89, + "learning_rate": 1.718025488689982e-05, + "loss": 0.5584, + "step": 19000 + }, + { + "epoch": 0.89, + "learning_rate": 1.717947110184503e-05, + "loss": 0.2235, + "step": 19005 + }, + { + "epoch": 0.89, + "learning_rate": 1.7178687316790244e-05, + "loss": 0.0822, + "step": 19010 + }, + { + "epoch": 0.89, + "learning_rate": 1.7177903531735458e-05, + "loss": 0.1069, + "step": 19015 + }, + { + "epoch": 0.89, + "learning_rate": 1.7177119746680672e-05, + "loss": 0.1213, + "step": 19020 + }, + { + "epoch": 0.89, + "learning_rate": 1.7176335961625886e-05, + "loss": 0.2554, + "step": 19025 + }, + { + "epoch": 0.89, + "learning_rate": 1.7175552176571096e-05, + "loss": 0.1368, + "step": 19030 + }, + { + "epoch": 0.89, + "learning_rate": 1.7174768391516314e-05, + "loss": 0.1717, + "step": 19035 + }, + { + "epoch": 0.89, + "learning_rate": 1.7173984606461524e-05, + "loss": 0.2277, + "step": 19040 + }, + { + "epoch": 0.89, + "learning_rate": 1.7173200821406738e-05, + "loss": 0.2489, + "step": 19045 + }, + { + "epoch": 0.89, + "learning_rate": 1.7172417036351952e-05, + "loss": 0.5192, + "step": 19050 + }, + { + "epoch": 0.89, + "learning_rate": 1.7171633251297166e-05, + "loss": 0.1059, + "step": 19055 + }, + { + "epoch": 0.89, + "learning_rate": 1.717084946624238e-05, + "loss": 0.1134, + "step": 19060 + }, + { + "epoch": 0.89, + "learning_rate": 1.7170065681187594e-05, + "loss": 0.1005, + "step": 19065 + }, + { + "epoch": 0.89, + "learning_rate": 1.7169281896132804e-05, + "loss": 0.079, + "step": 19070 + }, + { + "epoch": 0.89, + "learning_rate": 1.716849811107802e-05, + "loss": 0.1957, + "step": 19075 + }, + { + "epoch": 0.89, + "learning_rate": 1.7167714326023232e-05, + "loss": 0.1822, + "step": 19080 + }, + { + "epoch": 0.89, + "learning_rate": 1.7166930540968446e-05, + "loss": 0.1535, + "step": 19085 + }, + { + "epoch": 0.89, + "learning_rate": 1.716614675591366e-05, + "loss": 0.2625, + "step": 19090 + }, + { + "epoch": 0.89, + "learning_rate": 1.716536297085887e-05, + "loss": 0.3252, + "step": 19095 + }, + { + "epoch": 0.89, + "learning_rate": 1.7164579185804088e-05, + "loss": 0.486, + "step": 19100 + }, + { + "epoch": 0.89, + "learning_rate": 1.7163795400749298e-05, + "loss": 0.1903, + "step": 19105 + }, + { + "epoch": 0.89, + "learning_rate": 1.7163011615694512e-05, + "loss": 0.0703, + "step": 19110 + }, + { + "epoch": 0.89, + "learning_rate": 1.7162227830639726e-05, + "loss": 0.1731, + "step": 19115 + }, + { + "epoch": 0.89, + "learning_rate": 1.716144404558494e-05, + "loss": 0.0951, + "step": 19120 + }, + { + "epoch": 0.89, + "learning_rate": 1.7160660260530154e-05, + "loss": 0.1422, + "step": 19125 + }, + { + "epoch": 0.89, + "learning_rate": 1.7159876475475368e-05, + "loss": 0.1888, + "step": 19130 + }, + { + "epoch": 0.89, + "learning_rate": 1.715909269042058e-05, + "loss": 0.1529, + "step": 19135 + }, + { + "epoch": 0.89, + "learning_rate": 1.7158308905365796e-05, + "loss": 0.1551, + "step": 19140 + }, + { + "epoch": 0.89, + "learning_rate": 1.7157525120311006e-05, + "loss": 0.2719, + "step": 19145 + }, + { + "epoch": 0.89, + "learning_rate": 1.7156741335256223e-05, + "loss": 0.5324, + "step": 19150 + }, + { + "epoch": 0.89, + "learning_rate": 1.7155957550201434e-05, + "loss": 0.2271, + "step": 19155 + }, + { + "epoch": 0.89, + "learning_rate": 1.7155173765146648e-05, + "loss": 0.1467, + "step": 19160 + }, + { + "epoch": 0.89, + "learning_rate": 1.715438998009186e-05, + "loss": 0.0991, + "step": 19165 + }, + { + "epoch": 0.89, + "learning_rate": 1.7153606195037072e-05, + "loss": 0.1359, + "step": 19170 + }, + { + "epoch": 0.89, + "learning_rate": 1.715282240998229e-05, + "loss": 0.1051, + "step": 19175 + }, + { + "epoch": 0.89, + "learning_rate": 1.71520386249275e-05, + "loss": 0.2102, + "step": 19180 + }, + { + "epoch": 0.9, + "learning_rate": 1.7151254839872714e-05, + "loss": 0.2189, + "step": 19185 + }, + { + "epoch": 0.9, + "learning_rate": 1.7150471054817928e-05, + "loss": 0.2397, + "step": 19190 + }, + { + "epoch": 0.9, + "learning_rate": 1.7149687269763142e-05, + "loss": 0.2792, + "step": 19195 + }, + { + "epoch": 0.9, + "learning_rate": 1.7148903484708356e-05, + "loss": 0.4688, + "step": 19200 + }, + { + "epoch": 0.9, + "learning_rate": 1.714811969965357e-05, + "loss": 0.2494, + "step": 19205 + }, + { + "epoch": 0.9, + "learning_rate": 1.714733591459878e-05, + "loss": 0.1013, + "step": 19210 + }, + { + "epoch": 0.9, + "learning_rate": 1.7146552129543997e-05, + "loss": 0.0738, + "step": 19215 + }, + { + "epoch": 0.9, + "learning_rate": 1.7145768344489208e-05, + "loss": 0.1392, + "step": 19220 + }, + { + "epoch": 0.9, + "learning_rate": 1.7144984559434422e-05, + "loss": 0.132, + "step": 19225 + }, + { + "epoch": 0.9, + "learning_rate": 1.7144200774379636e-05, + "loss": 0.1433, + "step": 19230 + }, + { + "epoch": 0.9, + "learning_rate": 1.714341698932485e-05, + "loss": 0.2131, + "step": 19235 + }, + { + "epoch": 0.9, + "learning_rate": 1.7142633204270063e-05, + "loss": 0.237, + "step": 19240 + }, + { + "epoch": 0.9, + "learning_rate": 1.7142006176226234e-05, + "loss": 0.3774, + "step": 19245 + }, + { + "epoch": 0.9, + "learning_rate": 1.7141222391171444e-05, + "loss": 0.4308, + "step": 19250 + }, + { + "epoch": 0.9, + "learning_rate": 1.7140438606116662e-05, + "loss": 0.2426, + "step": 19255 + }, + { + "epoch": 0.9, + "learning_rate": 1.7139654821061872e-05, + "loss": 0.1149, + "step": 19260 + }, + { + "epoch": 0.9, + "learning_rate": 1.7138871036007086e-05, + "loss": 0.0679, + "step": 19265 + }, + { + "epoch": 0.9, + "learning_rate": 1.71380872509523e-05, + "loss": 0.0922, + "step": 19270 + }, + { + "epoch": 0.9, + "learning_rate": 1.7137303465897514e-05, + "loss": 0.1522, + "step": 19275 + }, + { + "epoch": 0.9, + "learning_rate": 1.7136519680842728e-05, + "loss": 0.1422, + "step": 19280 + }, + { + "epoch": 0.9, + "learning_rate": 1.7135735895787942e-05, + "loss": 0.1224, + "step": 19285 + }, + { + "epoch": 0.9, + "learning_rate": 1.7134952110733156e-05, + "loss": 0.27, + "step": 19290 + }, + { + "epoch": 0.9, + "learning_rate": 1.7134168325678366e-05, + "loss": 0.3725, + "step": 19295 + }, + { + "epoch": 0.9, + "learning_rate": 1.713338454062358e-05, + "loss": 0.3632, + "step": 19300 + }, + { + "epoch": 0.9, + "learning_rate": 1.7132600755568794e-05, + "loss": 0.1988, + "step": 19305 + }, + { + "epoch": 0.9, + "learning_rate": 1.7131816970514008e-05, + "loss": 0.0415, + "step": 19310 + }, + { + "epoch": 0.9, + "learning_rate": 1.7131033185459222e-05, + "loss": 0.0715, + "step": 19315 + }, + { + "epoch": 0.9, + "learning_rate": 1.7130249400404436e-05, + "loss": 0.1044, + "step": 19320 + }, + { + "epoch": 0.9, + "learning_rate": 1.7129465615349646e-05, + "loss": 0.2214, + "step": 19325 + }, + { + "epoch": 0.9, + "learning_rate": 1.7128681830294864e-05, + "loss": 0.4414, + "step": 19330 + }, + { + "epoch": 0.9, + "learning_rate": 1.7127898045240074e-05, + "loss": 0.2215, + "step": 19335 + }, + { + "epoch": 0.9, + "learning_rate": 1.7127114260185288e-05, + "loss": 0.2416, + "step": 19340 + }, + { + "epoch": 0.9, + "learning_rate": 1.7126330475130502e-05, + "loss": 0.2983, + "step": 19345 + }, + { + "epoch": 0.9, + "learning_rate": 1.7125546690075716e-05, + "loss": 0.6556, + "step": 19350 + }, + { + "epoch": 0.9, + "learning_rate": 1.712476290502093e-05, + "loss": 0.1771, + "step": 19355 + }, + { + "epoch": 0.9, + "learning_rate": 1.712397911996614e-05, + "loss": 0.1033, + "step": 19360 + }, + { + "epoch": 0.9, + "learning_rate": 1.7123195334911354e-05, + "loss": 0.0999, + "step": 19365 + }, + { + "epoch": 0.9, + "learning_rate": 1.7122411549856568e-05, + "loss": 0.1314, + "step": 19370 + }, + { + "epoch": 0.9, + "learning_rate": 1.7121627764801782e-05, + "loss": 0.1199, + "step": 19375 + }, + { + "epoch": 0.9, + "learning_rate": 1.7120843979746996e-05, + "loss": 0.237, + "step": 19380 + }, + { + "epoch": 0.9, + "learning_rate": 1.712006019469221e-05, + "loss": 0.2092, + "step": 19385 + }, + { + "epoch": 0.9, + "learning_rate": 1.7119276409637424e-05, + "loss": 0.2627, + "step": 19390 + }, + { + "epoch": 0.9, + "learning_rate": 1.7118492624582638e-05, + "loss": 0.3178, + "step": 19395 + }, + { + "epoch": 0.91, + "learning_rate": 1.7117708839527848e-05, + "loss": 0.5211, + "step": 19400 + }, + { + "epoch": 0.91, + "learning_rate": 1.7116925054473062e-05, + "loss": 0.2629, + "step": 19405 + }, + { + "epoch": 0.91, + "learning_rate": 1.7116141269418276e-05, + "loss": 0.0863, + "step": 19410 + }, + { + "epoch": 0.91, + "learning_rate": 1.711535748436349e-05, + "loss": 0.1249, + "step": 19415 + }, + { + "epoch": 0.91, + "learning_rate": 1.7114573699308704e-05, + "loss": 0.1279, + "step": 19420 + }, + { + "epoch": 0.91, + "learning_rate": 1.7113789914253914e-05, + "loss": 0.0951, + "step": 19425 + }, + { + "epoch": 0.91, + "learning_rate": 1.711300612919913e-05, + "loss": 0.1773, + "step": 19430 + }, + { + "epoch": 0.91, + "learning_rate": 1.7112222344144342e-05, + "loss": 0.169, + "step": 19435 + }, + { + "epoch": 0.91, + "learning_rate": 1.7111438559089556e-05, + "loss": 0.1928, + "step": 19440 + }, + { + "epoch": 0.91, + "learning_rate": 1.711065477403477e-05, + "loss": 0.3014, + "step": 19445 + }, + { + "epoch": 0.91, + "learning_rate": 1.7109870988979984e-05, + "loss": 0.5065, + "step": 19450 + }, + { + "epoch": 0.91, + "learning_rate": 1.7109087203925198e-05, + "loss": 0.127, + "step": 19455 + }, + { + "epoch": 0.91, + "learning_rate": 1.710830341887041e-05, + "loss": 0.0899, + "step": 19460 + }, + { + "epoch": 0.91, + "learning_rate": 1.7107519633815622e-05, + "loss": 0.0991, + "step": 19465 + }, + { + "epoch": 0.91, + "learning_rate": 1.710673584876084e-05, + "loss": 0.0852, + "step": 19470 + }, + { + "epoch": 0.91, + "learning_rate": 1.710595206370605e-05, + "loss": 0.1273, + "step": 19475 + }, + { + "epoch": 0.91, + "learning_rate": 1.7105168278651264e-05, + "loss": 0.2085, + "step": 19480 + }, + { + "epoch": 0.91, + "learning_rate": 1.7104384493596478e-05, + "loss": 0.2701, + "step": 19485 + }, + { + "epoch": 0.91, + "learning_rate": 1.710360070854169e-05, + "loss": 0.214, + "step": 19490 + }, + { + "epoch": 0.91, + "learning_rate": 1.7102816923486906e-05, + "loss": 0.3061, + "step": 19495 + }, + { + "epoch": 0.91, + "learning_rate": 1.7102033138432116e-05, + "loss": 0.4987, + "step": 19500 + }, + { + "epoch": 0.91, + "learning_rate": 1.710124935337733e-05, + "loss": 0.2229, + "step": 19505 + }, + { + "epoch": 0.91, + "learning_rate": 1.7100465568322544e-05, + "loss": 0.073, + "step": 19510 + }, + { + "epoch": 0.91, + "learning_rate": 1.7099681783267758e-05, + "loss": 0.0574, + "step": 19515 + }, + { + "epoch": 0.91, + "learning_rate": 1.709889799821297e-05, + "loss": 0.0993, + "step": 19520 + }, + { + "epoch": 0.91, + "learning_rate": 1.7098114213158186e-05, + "loss": 0.1771, + "step": 19525 + }, + { + "epoch": 0.91, + "learning_rate": 1.70973304281034e-05, + "loss": 0.0893, + "step": 19530 + }, + { + "epoch": 0.91, + "learning_rate": 1.7096546643048613e-05, + "loss": 0.1401, + "step": 19535 + }, + { + "epoch": 0.91, + "learning_rate": 1.7095762857993824e-05, + "loss": 0.2401, + "step": 19540 + }, + { + "epoch": 0.91, + "learning_rate": 1.709497907293904e-05, + "loss": 0.2492, + "step": 19545 + }, + { + "epoch": 0.91, + "learning_rate": 1.709419528788425e-05, + "loss": 0.4506, + "step": 19550 + }, + { + "epoch": 0.91, + "learning_rate": 1.7093411502829466e-05, + "loss": 0.1444, + "step": 19555 + }, + { + "epoch": 0.91, + "learning_rate": 1.709262771777468e-05, + "loss": 0.0706, + "step": 19560 + }, + { + "epoch": 0.91, + "learning_rate": 1.709184393271989e-05, + "loss": 0.0977, + "step": 19565 + }, + { + "epoch": 0.91, + "learning_rate": 1.7091060147665107e-05, + "loss": 0.1293, + "step": 19570 + }, + { + "epoch": 0.91, + "learning_rate": 1.7090276362610318e-05, + "loss": 0.1469, + "step": 19575 + }, + { + "epoch": 0.91, + "learning_rate": 1.7089492577555532e-05, + "loss": 0.1896, + "step": 19580 + }, + { + "epoch": 0.91, + "learning_rate": 1.7088708792500746e-05, + "loss": 0.1649, + "step": 19585 + }, + { + "epoch": 0.91, + "learning_rate": 1.708792500744596e-05, + "loss": 0.2219, + "step": 19590 + }, + { + "epoch": 0.91, + "learning_rate": 1.7087141222391173e-05, + "loss": 0.2975, + "step": 19595 + }, + { + "epoch": 0.91, + "learning_rate": 1.7086357437336387e-05, + "loss": 0.7063, + "step": 19600 + }, + { + "epoch": 0.91, + "learning_rate": 1.70855736522816e-05, + "loss": 0.1183, + "step": 19605 + }, + { + "epoch": 0.92, + "learning_rate": 1.7084789867226815e-05, + "loss": 0.0876, + "step": 19610 + }, + { + "epoch": 0.92, + "learning_rate": 1.7084006082172026e-05, + "loss": 0.3809, + "step": 19615 + }, + { + "epoch": 0.92, + "learning_rate": 1.708322229711724e-05, + "loss": 0.1294, + "step": 19620 + }, + { + "epoch": 0.92, + "learning_rate": 1.7082438512062454e-05, + "loss": 0.1697, + "step": 19625 + }, + { + "epoch": 0.92, + "learning_rate": 1.7081654727007667e-05, + "loss": 0.224, + "step": 19630 + }, + { + "epoch": 0.92, + "learning_rate": 1.708087094195288e-05, + "loss": 0.1926, + "step": 19635 + }, + { + "epoch": 0.92, + "learning_rate": 1.7080087156898092e-05, + "loss": 0.295, + "step": 19640 + }, + { + "epoch": 0.92, + "learning_rate": 1.707930337184331e-05, + "loss": 0.3733, + "step": 19645 + }, + { + "epoch": 0.92, + "learning_rate": 1.707851958678852e-05, + "loss": 0.4389, + "step": 19650 + }, + { + "epoch": 0.92, + "learning_rate": 1.7077735801733734e-05, + "loss": 0.1884, + "step": 19655 + }, + { + "epoch": 0.92, + "learning_rate": 1.7076952016678947e-05, + "loss": 0.0577, + "step": 19660 + }, + { + "epoch": 0.92, + "learning_rate": 1.707616823162416e-05, + "loss": 0.0889, + "step": 19665 + }, + { + "epoch": 0.92, + "learning_rate": 1.7075384446569375e-05, + "loss": 0.0989, + "step": 19670 + }, + { + "epoch": 0.92, + "learning_rate": 1.707460066151459e-05, + "loss": 0.1449, + "step": 19675 + }, + { + "epoch": 0.92, + "learning_rate": 1.70738168764598e-05, + "loss": 0.1931, + "step": 19680 + }, + { + "epoch": 0.92, + "learning_rate": 1.7073033091405014e-05, + "loss": 0.1717, + "step": 19685 + }, + { + "epoch": 0.92, + "learning_rate": 1.7072249306350228e-05, + "loss": 0.2924, + "step": 19690 + }, + { + "epoch": 0.92, + "learning_rate": 1.707146552129544e-05, + "loss": 0.2265, + "step": 19695 + }, + { + "epoch": 0.92, + "learning_rate": 1.7070681736240655e-05, + "loss": 0.3034, + "step": 19700 + }, + { + "epoch": 0.92, + "learning_rate": 1.706989795118587e-05, + "loss": 0.2481, + "step": 19705 + }, + { + "epoch": 0.92, + "learning_rate": 1.7069114166131083e-05, + "loss": 0.0638, + "step": 19710 + }, + { + "epoch": 0.92, + "learning_rate": 1.7068330381076294e-05, + "loss": 0.0919, + "step": 19715 + }, + { + "epoch": 0.92, + "learning_rate": 1.7067546596021508e-05, + "loss": 0.095, + "step": 19720 + }, + { + "epoch": 0.92, + "learning_rate": 1.706676281096672e-05, + "loss": 0.1459, + "step": 19725 + }, + { + "epoch": 0.92, + "learning_rate": 1.7065979025911935e-05, + "loss": 0.1144, + "step": 19730 + }, + { + "epoch": 0.92, + "learning_rate": 1.706519524085715e-05, + "loss": 0.2965, + "step": 19735 + }, + { + "epoch": 0.92, + "learning_rate": 1.7064411455802363e-05, + "loss": 0.18, + "step": 19740 + }, + { + "epoch": 0.92, + "learning_rate": 1.7063627670747577e-05, + "loss": 0.4294, + "step": 19745 + }, + { + "epoch": 0.92, + "learning_rate": 1.7062843885692788e-05, + "loss": 0.7633, + "step": 19750 + }, + { + "epoch": 0.92, + "learning_rate": 1.7062060100638e-05, + "loss": 0.1876, + "step": 19755 + }, + { + "epoch": 0.92, + "learning_rate": 1.7061276315583215e-05, + "loss": 0.1402, + "step": 19760 + }, + { + "epoch": 0.92, + "learning_rate": 1.706049253052843e-05, + "loss": 0.0744, + "step": 19765 + }, + { + "epoch": 0.92, + "learning_rate": 1.7059708745473643e-05, + "loss": 0.1534, + "step": 19770 + }, + { + "epoch": 0.92, + "learning_rate": 1.7058924960418857e-05, + "loss": 0.105, + "step": 19775 + }, + { + "epoch": 0.92, + "learning_rate": 1.7058141175364068e-05, + "loss": 0.2081, + "step": 19780 + }, + { + "epoch": 0.92, + "learning_rate": 1.7057357390309285e-05, + "loss": 0.1488, + "step": 19785 + }, + { + "epoch": 0.92, + "learning_rate": 1.7056573605254495e-05, + "loss": 0.2616, + "step": 19790 + }, + { + "epoch": 0.92, + "learning_rate": 1.705578982019971e-05, + "loss": 0.2662, + "step": 19795 + }, + { + "epoch": 0.92, + "learning_rate": 1.7055006035144923e-05, + "loss": 0.3883, + "step": 19800 + }, + { + "epoch": 0.92, + "learning_rate": 1.7054222250090137e-05, + "loss": 0.1609, + "step": 19805 + }, + { + "epoch": 0.92, + "learning_rate": 1.705343846503535e-05, + "loss": 0.0799, + "step": 19810 + }, + { + "epoch": 0.92, + "learning_rate": 1.705265467998056e-05, + "loss": 0.087, + "step": 19815 + }, + { + "epoch": 0.92, + "learning_rate": 1.7051870894925775e-05, + "loss": 0.194, + "step": 19820 + }, + { + "epoch": 0.93, + "learning_rate": 1.705108710987099e-05, + "loss": 0.1103, + "step": 19825 + }, + { + "epoch": 0.93, + "learning_rate": 1.7050303324816203e-05, + "loss": 0.1893, + "step": 19830 + }, + { + "epoch": 0.93, + "learning_rate": 1.7049519539761417e-05, + "loss": 0.2444, + "step": 19835 + }, + { + "epoch": 0.93, + "learning_rate": 1.704873575470663e-05, + "loss": 0.2429, + "step": 19840 + }, + { + "epoch": 0.93, + "learning_rate": 1.7047951969651845e-05, + "loss": 0.3144, + "step": 19845 + }, + { + "epoch": 0.93, + "learning_rate": 1.704716818459706e-05, + "loss": 0.4834, + "step": 19850 + }, + { + "epoch": 0.93, + "learning_rate": 1.704638439954227e-05, + "loss": 0.1951, + "step": 19855 + }, + { + "epoch": 0.93, + "learning_rate": 1.7045600614487487e-05, + "loss": 0.0849, + "step": 19860 + }, + { + "epoch": 0.93, + "learning_rate": 1.7044816829432697e-05, + "loss": 0.1384, + "step": 19865 + }, + { + "epoch": 0.93, + "learning_rate": 1.704403304437791e-05, + "loss": 0.1505, + "step": 19870 + }, + { + "epoch": 0.93, + "learning_rate": 1.7043249259323125e-05, + "loss": 0.154, + "step": 19875 + }, + { + "epoch": 0.93, + "learning_rate": 1.7042465474268336e-05, + "loss": 0.2354, + "step": 19880 + }, + { + "epoch": 0.93, + "learning_rate": 1.7041681689213553e-05, + "loss": 0.2192, + "step": 19885 + }, + { + "epoch": 0.93, + "learning_rate": 1.7040897904158763e-05, + "loss": 0.2718, + "step": 19890 + }, + { + "epoch": 0.93, + "learning_rate": 1.7040114119103977e-05, + "loss": 0.3056, + "step": 19895 + }, + { + "epoch": 0.93, + "learning_rate": 1.703933033404919e-05, + "loss": 0.4475, + "step": 19900 + }, + { + "epoch": 0.93, + "learning_rate": 1.7038546548994405e-05, + "loss": 0.2562, + "step": 19905 + }, + { + "epoch": 0.93, + "learning_rate": 1.703776276393962e-05, + "loss": 0.0562, + "step": 19910 + }, + { + "epoch": 0.93, + "learning_rate": 1.7036978978884833e-05, + "loss": 0.1212, + "step": 19915 + }, + { + "epoch": 0.93, + "learning_rate": 1.7036195193830047e-05, + "loss": 0.0697, + "step": 19920 + }, + { + "epoch": 0.93, + "learning_rate": 1.703541140877526e-05, + "loss": 0.1794, + "step": 19925 + }, + { + "epoch": 0.93, + "learning_rate": 1.703462762372047e-05, + "loss": 0.1976, + "step": 19930 + }, + { + "epoch": 0.93, + "learning_rate": 1.7033843838665685e-05, + "loss": 0.185, + "step": 19935 + }, + { + "epoch": 0.93, + "learning_rate": 1.70330600536109e-05, + "loss": 0.2454, + "step": 19940 + }, + { + "epoch": 0.93, + "learning_rate": 1.7032276268556113e-05, + "loss": 0.1997, + "step": 19945 + }, + { + "epoch": 0.93, + "learning_rate": 1.7031492483501327e-05, + "loss": 0.6297, + "step": 19950 + }, + { + "epoch": 0.93, + "learning_rate": 1.7030708698446537e-05, + "loss": 0.1574, + "step": 19955 + }, + { + "epoch": 0.93, + "learning_rate": 1.7029924913391755e-05, + "loss": 0.066, + "step": 19960 + }, + { + "epoch": 0.93, + "learning_rate": 1.7029141128336965e-05, + "loss": 0.1136, + "step": 19965 + }, + { + "epoch": 0.93, + "learning_rate": 1.702835734328218e-05, + "loss": 0.1132, + "step": 19970 + }, + { + "epoch": 0.93, + "learning_rate": 1.7027573558227393e-05, + "loss": 0.1921, + "step": 19975 + }, + { + "epoch": 0.93, + "learning_rate": 1.7026789773172607e-05, + "loss": 0.1347, + "step": 19980 + }, + { + "epoch": 0.93, + "learning_rate": 1.702600598811782e-05, + "loss": 0.2405, + "step": 19985 + }, + { + "epoch": 0.93, + "learning_rate": 1.7025222203063035e-05, + "loss": 0.2259, + "step": 19990 + }, + { + "epoch": 0.93, + "learning_rate": 1.7024438418008245e-05, + "loss": 0.3136, + "step": 19995 + }, + { + "epoch": 0.93, + "learning_rate": 1.7023654632953463e-05, + "loss": 0.3853, + "step": 20000 + }, + { + "epoch": 0.93, + "learning_rate": 1.7022870847898673e-05, + "loss": 0.2158, + "step": 20005 + }, + { + "epoch": 0.93, + "learning_rate": 1.7022087062843887e-05, + "loss": 0.0802, + "step": 20010 + }, + { + "epoch": 0.93, + "learning_rate": 1.70213032777891e-05, + "loss": 0.0745, + "step": 20015 + }, + { + "epoch": 0.93, + "learning_rate": 1.7020519492734315e-05, + "loss": 0.1353, + "step": 20020 + }, + { + "epoch": 0.93, + "learning_rate": 1.701973570767953e-05, + "loss": 0.1455, + "step": 20025 + }, + { + "epoch": 0.93, + "learning_rate": 1.701895192262474e-05, + "loss": 0.1117, + "step": 20030 + }, + { + "epoch": 0.93, + "learning_rate": 1.7018168137569953e-05, + "loss": 0.1912, + "step": 20035 + }, + { + "epoch": 0.94, + "learning_rate": 1.7017384352515167e-05, + "loss": 0.211, + "step": 20040 + }, + { + "epoch": 0.94, + "learning_rate": 1.701660056746038e-05, + "loss": 0.2398, + "step": 20045 + }, + { + "epoch": 0.94, + "learning_rate": 1.7015816782405595e-05, + "loss": 0.6622, + "step": 20050 + }, + { + "epoch": 0.94, + "learning_rate": 1.701503299735081e-05, + "loss": 0.1905, + "step": 20055 + }, + { + "epoch": 0.94, + "learning_rate": 1.7014249212296023e-05, + "loss": 0.0718, + "step": 20060 + }, + { + "epoch": 0.94, + "learning_rate": 1.7013465427241237e-05, + "loss": 0.0488, + "step": 20065 + }, + { + "epoch": 0.94, + "learning_rate": 1.7012681642186447e-05, + "loss": 0.1622, + "step": 20070 + }, + { + "epoch": 0.94, + "learning_rate": 1.701189785713166e-05, + "loss": 0.1579, + "step": 20075 + }, + { + "epoch": 0.94, + "learning_rate": 1.7011114072076875e-05, + "loss": 0.1167, + "step": 20080 + }, + { + "epoch": 0.94, + "learning_rate": 1.701033028702209e-05, + "loss": 0.1729, + "step": 20085 + }, + { + "epoch": 0.94, + "learning_rate": 1.7009546501967303e-05, + "loss": 0.1643, + "step": 20090 + }, + { + "epoch": 0.94, + "learning_rate": 1.7008762716912513e-05, + "loss": 0.3448, + "step": 20095 + }, + { + "epoch": 0.94, + "learning_rate": 1.700797893185773e-05, + "loss": 0.4274, + "step": 20100 + }, + { + "epoch": 0.94, + "learning_rate": 1.700719514680294e-05, + "loss": 0.1908, + "step": 20105 + }, + { + "epoch": 0.94, + "learning_rate": 1.7006411361748155e-05, + "loss": 0.0368, + "step": 20110 + }, + { + "epoch": 0.94, + "learning_rate": 1.700562757669337e-05, + "loss": 0.0898, + "step": 20115 + }, + { + "epoch": 0.94, + "learning_rate": 1.7004843791638583e-05, + "loss": 0.1293, + "step": 20120 + }, + { + "epoch": 0.94, + "learning_rate": 1.7004060006583797e-05, + "loss": 0.1416, + "step": 20125 + }, + { + "epoch": 0.94, + "learning_rate": 1.700327622152901e-05, + "loss": 0.1573, + "step": 20130 + }, + { + "epoch": 0.94, + "learning_rate": 1.700249243647422e-05, + "loss": 0.1809, + "step": 20135 + }, + { + "epoch": 0.94, + "learning_rate": 1.7001708651419435e-05, + "loss": 0.3192, + "step": 20140 + }, + { + "epoch": 0.94, + "learning_rate": 1.700092486636465e-05, + "loss": 0.2653, + "step": 20145 + }, + { + "epoch": 0.94, + "learning_rate": 1.7000141081309863e-05, + "loss": 0.5466, + "step": 20150 + }, + { + "epoch": 0.94, + "learning_rate": 1.6999357296255077e-05, + "loss": 0.1971, + "step": 20155 + }, + { + "epoch": 0.94, + "learning_rate": 1.699857351120029e-05, + "loss": 0.1222, + "step": 20160 + }, + { + "epoch": 0.94, + "learning_rate": 1.6997789726145505e-05, + "loss": 0.1325, + "step": 20165 + }, + { + "epoch": 0.94, + "learning_rate": 1.6997005941090715e-05, + "loss": 0.0593, + "step": 20170 + }, + { + "epoch": 0.94, + "learning_rate": 1.6996222156035932e-05, + "loss": 0.1517, + "step": 20175 + }, + { + "epoch": 0.94, + "learning_rate": 1.6995438370981143e-05, + "loss": 0.1367, + "step": 20180 + }, + { + "epoch": 0.94, + "learning_rate": 1.6994654585926357e-05, + "loss": 0.2145, + "step": 20185 + }, + { + "epoch": 0.94, + "learning_rate": 1.699387080087157e-05, + "loss": 0.2504, + "step": 20190 + }, + { + "epoch": 0.94, + "learning_rate": 1.6993087015816785e-05, + "loss": 0.3664, + "step": 20195 + }, + { + "epoch": 0.94, + "learning_rate": 1.6992303230762e-05, + "loss": 0.4137, + "step": 20200 + }, + { + "epoch": 0.94, + "learning_rate": 1.699151944570721e-05, + "loss": 0.2094, + "step": 20205 + }, + { + "epoch": 0.94, + "learning_rate": 1.6990735660652423e-05, + "loss": 0.0852, + "step": 20210 + }, + { + "epoch": 0.94, + "learning_rate": 1.6989951875597637e-05, + "loss": 0.0603, + "step": 20215 + }, + { + "epoch": 0.94, + "learning_rate": 1.698916809054285e-05, + "loss": 0.161, + "step": 20220 + }, + { + "epoch": 0.94, + "learning_rate": 1.6988384305488065e-05, + "loss": 0.106, + "step": 20225 + }, + { + "epoch": 0.94, + "learning_rate": 1.698760052043328e-05, + "loss": 0.1538, + "step": 20230 + }, + { + "epoch": 0.94, + "learning_rate": 1.6986816735378492e-05, + "loss": 0.1602, + "step": 20235 + }, + { + "epoch": 0.94, + "learning_rate": 1.6986032950323706e-05, + "loss": 0.2877, + "step": 20240 + }, + { + "epoch": 0.94, + "learning_rate": 1.6985249165268917e-05, + "loss": 0.4337, + "step": 20245 + }, + { + "epoch": 0.94, + "learning_rate": 1.698446538021413e-05, + "loss": 0.6768, + "step": 20250 + }, + { + "epoch": 0.95, + "learning_rate": 1.6983681595159345e-05, + "loss": 0.2147, + "step": 20255 + }, + { + "epoch": 0.95, + "learning_rate": 1.698289781010456e-05, + "loss": 0.1292, + "step": 20260 + }, + { + "epoch": 0.95, + "learning_rate": 1.6982114025049772e-05, + "loss": 0.1835, + "step": 20265 + }, + { + "epoch": 0.95, + "learning_rate": 1.6981330239994983e-05, + "loss": 0.1112, + "step": 20270 + }, + { + "epoch": 0.95, + "learning_rate": 1.69805464549402e-05, + "loss": 0.1764, + "step": 20275 + }, + { + "epoch": 0.95, + "learning_rate": 1.697976266988541e-05, + "loss": 0.1477, + "step": 20280 + }, + { + "epoch": 0.95, + "learning_rate": 1.6978978884830625e-05, + "loss": 0.257, + "step": 20285 + }, + { + "epoch": 0.95, + "learning_rate": 1.697819509977584e-05, + "loss": 0.2419, + "step": 20290 + }, + { + "epoch": 0.95, + "learning_rate": 1.6977411314721053e-05, + "loss": 0.2276, + "step": 20295 + }, + { + "epoch": 0.95, + "learning_rate": 1.6976627529666266e-05, + "loss": 0.3961, + "step": 20300 + }, + { + "epoch": 0.95, + "learning_rate": 1.697584374461148e-05, + "loss": 0.3068, + "step": 20305 + }, + { + "epoch": 0.95, + "learning_rate": 1.697505995955669e-05, + "loss": 0.0723, + "step": 20310 + }, + { + "epoch": 0.95, + "learning_rate": 1.6974276174501908e-05, + "loss": 0.0967, + "step": 20315 + }, + { + "epoch": 0.95, + "learning_rate": 1.697349238944712e-05, + "loss": 0.1151, + "step": 20320 + }, + { + "epoch": 0.95, + "learning_rate": 1.6972708604392333e-05, + "loss": 0.1615, + "step": 20325 + }, + { + "epoch": 0.95, + "learning_rate": 1.6971924819337546e-05, + "loss": 0.219, + "step": 20330 + }, + { + "epoch": 0.95, + "learning_rate": 1.697114103428276e-05, + "loss": 0.2574, + "step": 20335 + }, + { + "epoch": 0.95, + "learning_rate": 1.6970357249227974e-05, + "loss": 0.2705, + "step": 20340 + }, + { + "epoch": 0.95, + "learning_rate": 1.6969573464173185e-05, + "loss": 0.2993, + "step": 20345 + }, + { + "epoch": 0.95, + "learning_rate": 1.69687896791184e-05, + "loss": 0.6279, + "step": 20350 + }, + { + "epoch": 0.95, + "learning_rate": 1.6968005894063613e-05, + "loss": 0.1919, + "step": 20355 + }, + { + "epoch": 0.95, + "learning_rate": 1.6967222109008826e-05, + "loss": 0.0439, + "step": 20360 + }, + { + "epoch": 0.95, + "learning_rate": 1.696643832395404e-05, + "loss": 0.0789, + "step": 20365 + }, + { + "epoch": 0.95, + "learning_rate": 1.6965654538899254e-05, + "loss": 0.1537, + "step": 20370 + }, + { + "epoch": 0.95, + "learning_rate": 1.6964870753844468e-05, + "loss": 0.0927, + "step": 20375 + }, + { + "epoch": 0.95, + "learning_rate": 1.6964086968789682e-05, + "loss": 0.1653, + "step": 20380 + }, + { + "epoch": 0.95, + "learning_rate": 1.6963303183734893e-05, + "loss": 0.2245, + "step": 20385 + }, + { + "epoch": 0.95, + "learning_rate": 1.696251939868011e-05, + "loss": 0.2526, + "step": 20390 + }, + { + "epoch": 0.95, + "learning_rate": 1.696173561362532e-05, + "loss": 0.2687, + "step": 20395 + }, + { + "epoch": 0.95, + "learning_rate": 1.6960951828570534e-05, + "loss": 0.6334, + "step": 20400 + }, + { + "epoch": 0.95, + "learning_rate": 1.6960168043515748e-05, + "loss": 0.1858, + "step": 20405 + }, + { + "epoch": 0.95, + "learning_rate": 1.695938425846096e-05, + "loss": 0.0953, + "step": 20410 + }, + { + "epoch": 0.95, + "learning_rate": 1.6958600473406176e-05, + "loss": 0.1384, + "step": 20415 + }, + { + "epoch": 0.95, + "learning_rate": 1.6957816688351387e-05, + "loss": 0.1128, + "step": 20420 + }, + { + "epoch": 0.95, + "learning_rate": 1.69570329032966e-05, + "loss": 0.1004, + "step": 20425 + }, + { + "epoch": 0.95, + "learning_rate": 1.6956249118241814e-05, + "loss": 0.1117, + "step": 20430 + }, + { + "epoch": 0.95, + "learning_rate": 1.695546533318703e-05, + "loss": 0.2534, + "step": 20435 + }, + { + "epoch": 0.95, + "learning_rate": 1.6954681548132242e-05, + "loss": 0.23, + "step": 20440 + }, + { + "epoch": 0.95, + "learning_rate": 1.6953897763077456e-05, + "loss": 0.2366, + "step": 20445 + }, + { + "epoch": 0.95, + "learning_rate": 1.695311397802267e-05, + "loss": 0.4956, + "step": 20450 + }, + { + "epoch": 0.95, + "learning_rate": 1.6952330192967884e-05, + "loss": 0.2167, + "step": 20455 + }, + { + "epoch": 0.95, + "learning_rate": 1.6951546407913094e-05, + "loss": 0.0958, + "step": 20460 + }, + { + "epoch": 0.95, + "learning_rate": 1.695076262285831e-05, + "loss": 0.0566, + "step": 20465 + }, + { + "epoch": 0.96, + "learning_rate": 1.6949978837803522e-05, + "loss": 0.1221, + "step": 20470 + }, + { + "epoch": 0.96, + "learning_rate": 1.6949195052748736e-05, + "loss": 0.0884, + "step": 20475 + }, + { + "epoch": 0.96, + "learning_rate": 1.694841126769395e-05, + "loss": 0.2541, + "step": 20480 + }, + { + "epoch": 0.96, + "learning_rate": 1.694762748263916e-05, + "loss": 0.2203, + "step": 20485 + }, + { + "epoch": 0.96, + "learning_rate": 1.6946843697584378e-05, + "loss": 0.1863, + "step": 20490 + }, + { + "epoch": 0.96, + "learning_rate": 1.694605991252959e-05, + "loss": 0.2929, + "step": 20495 + }, + { + "epoch": 0.96, + "learning_rate": 1.6945276127474802e-05, + "loss": 0.271, + "step": 20500 + }, + { + "epoch": 0.96, + "learning_rate": 1.6944492342420016e-05, + "loss": 0.1831, + "step": 20505 + }, + { + "epoch": 0.96, + "learning_rate": 1.694370855736523e-05, + "loss": 0.0501, + "step": 20510 + }, + { + "epoch": 0.96, + "learning_rate": 1.6942924772310444e-05, + "loss": 0.1323, + "step": 20515 + }, + { + "epoch": 0.96, + "learning_rate": 1.6942140987255658e-05, + "loss": 0.2169, + "step": 20520 + }, + { + "epoch": 0.96, + "learning_rate": 1.694135720220087e-05, + "loss": 0.1172, + "step": 20525 + }, + { + "epoch": 0.96, + "learning_rate": 1.6940573417146082e-05, + "loss": 0.2164, + "step": 20530 + }, + { + "epoch": 0.96, + "learning_rate": 1.6939789632091296e-05, + "loss": 0.1629, + "step": 20535 + }, + { + "epoch": 0.96, + "learning_rate": 1.693900584703651e-05, + "loss": 0.2959, + "step": 20540 + }, + { + "epoch": 0.96, + "learning_rate": 1.6938222061981724e-05, + "loss": 0.3087, + "step": 20545 + }, + { + "epoch": 0.96, + "learning_rate": 1.6937438276926938e-05, + "loss": 0.7987, + "step": 20550 + }, + { + "epoch": 0.96, + "learning_rate": 1.6936654491872152e-05, + "loss": 0.2158, + "step": 20555 + }, + { + "epoch": 0.96, + "learning_rate": 1.6935870706817362e-05, + "loss": 0.0551, + "step": 20560 + }, + { + "epoch": 0.96, + "learning_rate": 1.6935086921762576e-05, + "loss": 0.0877, + "step": 20565 + }, + { + "epoch": 0.96, + "learning_rate": 1.693430313670779e-05, + "loss": 0.1039, + "step": 20570 + }, + { + "epoch": 0.96, + "learning_rate": 1.6933519351653004e-05, + "loss": 0.1547, + "step": 20575 + }, + { + "epoch": 0.96, + "learning_rate": 1.6932735566598218e-05, + "loss": 0.167, + "step": 20580 + }, + { + "epoch": 0.96, + "learning_rate": 1.6931951781543432e-05, + "loss": 0.1186, + "step": 20585 + }, + { + "epoch": 0.96, + "learning_rate": 1.6931167996488646e-05, + "loss": 0.1959, + "step": 20590 + }, + { + "epoch": 0.96, + "learning_rate": 1.6930384211433856e-05, + "loss": 0.1781, + "step": 20595 + }, + { + "epoch": 0.96, + "learning_rate": 1.692960042637907e-05, + "loss": 0.4715, + "step": 20600 + }, + { + "epoch": 0.96, + "learning_rate": 1.6928816641324284e-05, + "loss": 0.2431, + "step": 20605 + }, + { + "epoch": 0.96, + "learning_rate": 1.6928032856269498e-05, + "loss": 0.0756, + "step": 20610 + }, + { + "epoch": 0.96, + "learning_rate": 1.6927249071214712e-05, + "loss": 0.0938, + "step": 20615 + }, + { + "epoch": 0.96, + "learning_rate": 1.6926465286159926e-05, + "loss": 0.1711, + "step": 20620 + }, + { + "epoch": 0.96, + "learning_rate": 1.6925681501105136e-05, + "loss": 0.1379, + "step": 20625 + }, + { + "epoch": 0.96, + "learning_rate": 1.6924897716050354e-05, + "loss": 0.1878, + "step": 20630 + }, + { + "epoch": 0.96, + "learning_rate": 1.6924113930995564e-05, + "loss": 0.2055, + "step": 20635 + }, + { + "epoch": 0.96, + "learning_rate": 1.6923330145940778e-05, + "loss": 0.1536, + "step": 20640 + }, + { + "epoch": 0.96, + "learning_rate": 1.6922546360885992e-05, + "loss": 0.2507, + "step": 20645 + }, + { + "epoch": 0.96, + "learning_rate": 1.6921762575831206e-05, + "loss": 0.4966, + "step": 20650 + }, + { + "epoch": 0.96, + "learning_rate": 1.692097879077642e-05, + "loss": 0.1616, + "step": 20655 + }, + { + "epoch": 0.96, + "learning_rate": 1.692019500572163e-05, + "loss": 0.0673, + "step": 20660 + }, + { + "epoch": 0.96, + "learning_rate": 1.6919411220666844e-05, + "loss": 0.0905, + "step": 20665 + }, + { + "epoch": 0.96, + "learning_rate": 1.6918627435612058e-05, + "loss": 0.1219, + "step": 20670 + }, + { + "epoch": 0.96, + "learning_rate": 1.6917843650557272e-05, + "loss": 0.0941, + "step": 20675 + }, + { + "epoch": 0.96, + "learning_rate": 1.6917059865502486e-05, + "loss": 0.1362, + "step": 20680 + }, + { + "epoch": 0.97, + "learning_rate": 1.69162760804477e-05, + "loss": 0.1788, + "step": 20685 + }, + { + "epoch": 0.97, + "learning_rate": 1.6915492295392914e-05, + "loss": 0.2372, + "step": 20690 + }, + { + "epoch": 0.97, + "learning_rate": 1.6914708510338128e-05, + "loss": 0.2672, + "step": 20695 + }, + { + "epoch": 0.97, + "learning_rate": 1.6913924725283338e-05, + "loss": 0.409, + "step": 20700 + }, + { + "epoch": 0.97, + "learning_rate": 1.6913140940228556e-05, + "loss": 0.1966, + "step": 20705 + }, + { + "epoch": 0.97, + "learning_rate": 1.6912357155173766e-05, + "loss": 0.0573, + "step": 20710 + }, + { + "epoch": 0.97, + "learning_rate": 1.691157337011898e-05, + "loss": 0.0683, + "step": 20715 + }, + { + "epoch": 0.97, + "learning_rate": 1.6910789585064194e-05, + "loss": 0.0474, + "step": 20720 + }, + { + "epoch": 0.97, + "learning_rate": 1.6910005800009404e-05, + "loss": 0.0682, + "step": 20725 + }, + { + "epoch": 0.97, + "learning_rate": 1.690922201495462e-05, + "loss": 0.1241, + "step": 20730 + }, + { + "epoch": 0.97, + "learning_rate": 1.6908438229899832e-05, + "loss": 0.1928, + "step": 20735 + }, + { + "epoch": 0.97, + "learning_rate": 1.6907654444845046e-05, + "loss": 0.23, + "step": 20740 + }, + { + "epoch": 0.97, + "learning_rate": 1.690687065979026e-05, + "loss": 0.3035, + "step": 20745 + }, + { + "epoch": 0.97, + "learning_rate": 1.6906086874735474e-05, + "loss": 0.443, + "step": 20750 + }, + { + "epoch": 0.97, + "learning_rate": 1.6905303089680688e-05, + "loss": 0.2238, + "step": 20755 + }, + { + "epoch": 0.97, + "learning_rate": 1.69045193046259e-05, + "loss": 0.0642, + "step": 20760 + }, + { + "epoch": 0.97, + "learning_rate": 1.6903735519571116e-05, + "loss": 0.0975, + "step": 20765 + }, + { + "epoch": 0.97, + "learning_rate": 1.690295173451633e-05, + "loss": 0.1374, + "step": 20770 + }, + { + "epoch": 0.97, + "learning_rate": 1.690216794946154e-05, + "loss": 0.1851, + "step": 20775 + }, + { + "epoch": 0.97, + "learning_rate": 1.6901384164406754e-05, + "loss": 0.1809, + "step": 20780 + }, + { + "epoch": 0.97, + "learning_rate": 1.6900600379351968e-05, + "loss": 0.1576, + "step": 20785 + }, + { + "epoch": 0.97, + "learning_rate": 1.6899816594297182e-05, + "loss": 0.2686, + "step": 20790 + }, + { + "epoch": 0.97, + "learning_rate": 1.6899032809242396e-05, + "loss": 0.241, + "step": 20795 + }, + { + "epoch": 0.97, + "learning_rate": 1.6898249024187606e-05, + "loss": 0.6042, + "step": 20800 + }, + { + "epoch": 0.97, + "learning_rate": 1.6897465239132823e-05, + "loss": 0.1927, + "step": 20805 + }, + { + "epoch": 0.97, + "learning_rate": 1.6896681454078034e-05, + "loss": 0.1114, + "step": 20810 + }, + { + "epoch": 0.97, + "learning_rate": 1.6895897669023248e-05, + "loss": 0.0992, + "step": 20815 + }, + { + "epoch": 0.97, + "learning_rate": 1.6895113883968462e-05, + "loss": 0.1313, + "step": 20820 + }, + { + "epoch": 0.97, + "learning_rate": 1.6894330098913676e-05, + "loss": 0.1349, + "step": 20825 + }, + { + "epoch": 0.97, + "learning_rate": 1.689354631385889e-05, + "loss": 0.0776, + "step": 20830 + }, + { + "epoch": 0.97, + "learning_rate": 1.6892762528804104e-05, + "loss": 0.2206, + "step": 20835 + }, + { + "epoch": 0.97, + "learning_rate": 1.6891978743749314e-05, + "loss": 0.2008, + "step": 20840 + }, + { + "epoch": 0.97, + "learning_rate": 1.689119495869453e-05, + "loss": 0.1772, + "step": 20845 + }, + { + "epoch": 0.97, + "learning_rate": 1.6890411173639742e-05, + "loss": 0.4666, + "step": 20850 + }, + { + "epoch": 0.97, + "learning_rate": 1.6889627388584956e-05, + "loss": 0.1922, + "step": 20855 + }, + { + "epoch": 0.97, + "learning_rate": 1.688884360353017e-05, + "loss": 0.0489, + "step": 20860 + }, + { + "epoch": 0.97, + "learning_rate": 1.6888059818475384e-05, + "loss": 0.1144, + "step": 20865 + }, + { + "epoch": 0.97, + "learning_rate": 1.6887276033420597e-05, + "loss": 0.1017, + "step": 20870 + }, + { + "epoch": 0.97, + "learning_rate": 1.6886492248365808e-05, + "loss": 0.1164, + "step": 20875 + }, + { + "epoch": 0.97, + "learning_rate": 1.6885708463311022e-05, + "loss": 0.1358, + "step": 20880 + }, + { + "epoch": 0.97, + "learning_rate": 1.6884924678256236e-05, + "loss": 0.1768, + "step": 20885 + }, + { + "epoch": 0.97, + "learning_rate": 1.688414089320145e-05, + "loss": 0.2551, + "step": 20890 + }, + { + "epoch": 0.97, + "learning_rate": 1.6883357108146664e-05, + "loss": 0.3179, + "step": 20895 + }, + { + "epoch": 0.98, + "learning_rate": 1.6882573323091878e-05, + "loss": 0.5792, + "step": 20900 + }, + { + "epoch": 0.98, + "learning_rate": 1.688178953803709e-05, + "loss": 0.197, + "step": 20905 + }, + { + "epoch": 0.98, + "learning_rate": 1.6881005752982305e-05, + "loss": 0.1008, + "step": 20910 + }, + { + "epoch": 0.98, + "learning_rate": 1.6880221967927516e-05, + "loss": 0.1025, + "step": 20915 + }, + { + "epoch": 0.98, + "learning_rate": 1.687943818287273e-05, + "loss": 0.1174, + "step": 20920 + }, + { + "epoch": 0.98, + "learning_rate": 1.6878654397817944e-05, + "loss": 0.1236, + "step": 20925 + }, + { + "epoch": 0.98, + "learning_rate": 1.6877870612763158e-05, + "loss": 0.2077, + "step": 20930 + }, + { + "epoch": 0.98, + "learning_rate": 1.687708682770837e-05, + "loss": 0.2177, + "step": 20935 + }, + { + "epoch": 0.98, + "learning_rate": 1.6876303042653582e-05, + "loss": 0.2288, + "step": 20940 + }, + { + "epoch": 0.98, + "learning_rate": 1.68755192575988e-05, + "loss": 0.3481, + "step": 20945 + }, + { + "epoch": 0.98, + "learning_rate": 1.687473547254401e-05, + "loss": 0.4198, + "step": 20950 + }, + { + "epoch": 0.98, + "learning_rate": 1.6873951687489224e-05, + "loss": 0.2261, + "step": 20955 + }, + { + "epoch": 0.98, + "learning_rate": 1.6873167902434438e-05, + "loss": 0.0703, + "step": 20960 + }, + { + "epoch": 0.98, + "learning_rate": 1.687238411737965e-05, + "loss": 0.0709, + "step": 20965 + }, + { + "epoch": 0.98, + "learning_rate": 1.6871600332324865e-05, + "loss": 0.1438, + "step": 20970 + }, + { + "epoch": 0.98, + "learning_rate": 1.687081654727008e-05, + "loss": 0.1581, + "step": 20975 + }, + { + "epoch": 0.98, + "learning_rate": 1.687003276221529e-05, + "loss": 0.1882, + "step": 20980 + }, + { + "epoch": 0.98, + "learning_rate": 1.6869248977160504e-05, + "loss": 0.1306, + "step": 20985 + }, + { + "epoch": 0.98, + "learning_rate": 1.6868465192105718e-05, + "loss": 0.3843, + "step": 20990 + }, + { + "epoch": 0.98, + "learning_rate": 1.686768140705093e-05, + "loss": 0.2341, + "step": 20995 + }, + { + "epoch": 0.98, + "learning_rate": 1.6866897621996145e-05, + "loss": 0.5793, + "step": 21000 + }, + { + "epoch": 0.98, + "learning_rate": 1.686611383694136e-05, + "loss": 0.1926, + "step": 21005 + }, + { + "epoch": 0.98, + "learning_rate": 1.6865330051886573e-05, + "loss": 0.0443, + "step": 21010 + }, + { + "epoch": 0.98, + "learning_rate": 1.6864546266831784e-05, + "loss": 0.0993, + "step": 21015 + }, + { + "epoch": 0.98, + "learning_rate": 1.6863762481777e-05, + "loss": 0.1558, + "step": 21020 + }, + { + "epoch": 0.98, + "learning_rate": 1.686297869672221e-05, + "loss": 0.1095, + "step": 21025 + }, + { + "epoch": 0.98, + "learning_rate": 1.6862194911667425e-05, + "loss": 0.1696, + "step": 21030 + }, + { + "epoch": 0.98, + "learning_rate": 1.686141112661264e-05, + "loss": 0.1484, + "step": 21035 + }, + { + "epoch": 0.98, + "learning_rate": 1.6860627341557853e-05, + "loss": 0.3471, + "step": 21040 + }, + { + "epoch": 0.98, + "learning_rate": 1.6859843556503067e-05, + "loss": 0.2733, + "step": 21045 + }, + { + "epoch": 0.98, + "learning_rate": 1.6859059771448278e-05, + "loss": 0.4663, + "step": 21050 + }, + { + "epoch": 0.98, + "learning_rate": 1.685827598639349e-05, + "loss": 0.2136, + "step": 21055 + }, + { + "epoch": 0.98, + "learning_rate": 1.6857492201338706e-05, + "loss": 0.0273, + "step": 21060 + }, + { + "epoch": 0.98, + "learning_rate": 1.685670841628392e-05, + "loss": 0.1179, + "step": 21065 + }, + { + "epoch": 0.98, + "learning_rate": 1.6855924631229133e-05, + "loss": 0.1099, + "step": 21070 + }, + { + "epoch": 0.98, + "learning_rate": 1.6855140846174347e-05, + "loss": 0.1182, + "step": 21075 + }, + { + "epoch": 0.98, + "learning_rate": 1.685435706111956e-05, + "loss": 0.1853, + "step": 21080 + }, + { + "epoch": 0.98, + "learning_rate": 1.6853573276064775e-05, + "loss": 0.0739, + "step": 21085 + }, + { + "epoch": 0.98, + "learning_rate": 1.6852789491009986e-05, + "loss": 0.2085, + "step": 21090 + }, + { + "epoch": 0.98, + "learning_rate": 1.68520057059552e-05, + "loss": 0.2557, + "step": 21095 + }, + { + "epoch": 0.98, + "learning_rate": 1.6851221920900413e-05, + "loss": 0.5123, + "step": 21100 + }, + { + "epoch": 0.98, + "learning_rate": 1.6850438135845627e-05, + "loss": 0.2266, + "step": 21105 + }, + { + "epoch": 0.99, + "learning_rate": 1.684965435079084e-05, + "loss": 0.0615, + "step": 21110 + }, + { + "epoch": 0.99, + "learning_rate": 1.6848870565736052e-05, + "loss": 0.0517, + "step": 21115 + }, + { + "epoch": 0.99, + "learning_rate": 1.684808678068127e-05, + "loss": 0.0945, + "step": 21120 + }, + { + "epoch": 0.99, + "learning_rate": 1.684730299562648e-05, + "loss": 0.1399, + "step": 21125 + }, + { + "epoch": 0.99, + "learning_rate": 1.6846519210571693e-05, + "loss": 0.1547, + "step": 21130 + }, + { + "epoch": 0.99, + "learning_rate": 1.6845735425516907e-05, + "loss": 0.2386, + "step": 21135 + }, + { + "epoch": 0.99, + "learning_rate": 1.684495164046212e-05, + "loss": 0.2613, + "step": 21140 + }, + { + "epoch": 0.99, + "learning_rate": 1.6844167855407335e-05, + "loss": 0.3378, + "step": 21145 + }, + { + "epoch": 0.99, + "learning_rate": 1.684338407035255e-05, + "loss": 0.5164, + "step": 21150 + }, + { + "epoch": 0.99, + "learning_rate": 1.684260028529776e-05, + "loss": 0.1966, + "step": 21155 + }, + { + "epoch": 0.99, + "learning_rate": 1.6841816500242977e-05, + "loss": 0.0675, + "step": 21160 + }, + { + "epoch": 0.99, + "learning_rate": 1.6841032715188187e-05, + "loss": 0.1247, + "step": 21165 + }, + { + "epoch": 0.99, + "learning_rate": 1.68402489301334e-05, + "loss": 0.1262, + "step": 21170 + }, + { + "epoch": 0.99, + "learning_rate": 1.6839465145078615e-05, + "loss": 0.1284, + "step": 21175 + }, + { + "epoch": 0.99, + "learning_rate": 1.683868136002383e-05, + "loss": 0.2704, + "step": 21180 + }, + { + "epoch": 0.99, + "learning_rate": 1.6837897574969043e-05, + "loss": 0.1451, + "step": 21185 + }, + { + "epoch": 0.99, + "learning_rate": 1.6837113789914254e-05, + "loss": 0.3121, + "step": 21190 + }, + { + "epoch": 0.99, + "learning_rate": 1.6836330004859467e-05, + "loss": 0.3131, + "step": 21195 + }, + { + "epoch": 0.99, + "learning_rate": 1.683554621980468e-05, + "loss": 0.6027, + "step": 21200 + }, + { + "epoch": 0.99, + "learning_rate": 1.6834762434749895e-05, + "loss": 0.2483, + "step": 21205 + }, + { + "epoch": 0.99, + "learning_rate": 1.683397864969511e-05, + "loss": 0.0526, + "step": 21210 + }, + { + "epoch": 0.99, + "learning_rate": 1.6833194864640323e-05, + "loss": 0.1629, + "step": 21215 + }, + { + "epoch": 0.99, + "learning_rate": 1.6832411079585537e-05, + "loss": 0.1384, + "step": 21220 + }, + { + "epoch": 0.99, + "learning_rate": 1.683162729453075e-05, + "loss": 0.1629, + "step": 21225 + }, + { + "epoch": 0.99, + "learning_rate": 1.683084350947596e-05, + "loss": 0.1871, + "step": 21230 + }, + { + "epoch": 0.99, + "learning_rate": 1.683005972442118e-05, + "loss": 0.2585, + "step": 21235 + }, + { + "epoch": 0.99, + "learning_rate": 1.682927593936639e-05, + "loss": 0.3094, + "step": 21240 + }, + { + "epoch": 0.99, + "learning_rate": 1.6828492154311603e-05, + "loss": 0.275, + "step": 21245 + }, + { + "epoch": 0.99, + "learning_rate": 1.6827708369256817e-05, + "loss": 0.4019, + "step": 21250 + }, + { + "epoch": 0.99, + "learning_rate": 1.6826924584202028e-05, + "loss": 0.2128, + "step": 21255 + }, + { + "epoch": 0.99, + "learning_rate": 1.6826140799147245e-05, + "loss": 0.0263, + "step": 21260 + }, + { + "epoch": 0.99, + "learning_rate": 1.6825357014092455e-05, + "loss": 0.1624, + "step": 21265 + }, + { + "epoch": 0.99, + "learning_rate": 1.682457322903767e-05, + "loss": 0.0927, + "step": 21270 + }, + { + "epoch": 0.99, + "learning_rate": 1.6823789443982883e-05, + "loss": 0.1146, + "step": 21275 + }, + { + "epoch": 0.99, + "learning_rate": 1.6823005658928097e-05, + "loss": 0.1037, + "step": 21280 + }, + { + "epoch": 0.99, + "learning_rate": 1.682222187387331e-05, + "loss": 0.1416, + "step": 21285 + }, + { + "epoch": 0.99, + "learning_rate": 1.6821438088818525e-05, + "loss": 0.2463, + "step": 21290 + }, + { + "epoch": 0.99, + "learning_rate": 1.6820654303763735e-05, + "loss": 0.3664, + "step": 21295 + }, + { + "epoch": 0.99, + "learning_rate": 1.6819870518708953e-05, + "loss": 0.5594, + "step": 21300 + }, + { + "epoch": 0.99, + "learning_rate": 1.6819086733654163e-05, + "loss": 0.1797, + "step": 21305 + }, + { + "epoch": 0.99, + "learning_rate": 1.6818302948599377e-05, + "loss": 0.1078, + "step": 21310 + }, + { + "epoch": 0.99, + "learning_rate": 1.681751916354459e-05, + "loss": 0.0942, + "step": 21315 + }, + { + "epoch": 0.99, + "learning_rate": 1.6816735378489805e-05, + "loss": 0.0715, + "step": 21320 + }, + { + "epoch": 1.0, + "learning_rate": 1.681595159343502e-05, + "loss": 0.0956, + "step": 21325 + }, + { + "epoch": 1.0, + "learning_rate": 1.681516780838023e-05, + "loss": 0.1574, + "step": 21330 + }, + { + "epoch": 1.0, + "learning_rate": 1.6814384023325447e-05, + "loss": 0.1546, + "step": 21335 + }, + { + "epoch": 1.0, + "learning_rate": 1.6813600238270657e-05, + "loss": 0.3196, + "step": 21340 + }, + { + "epoch": 1.0, + "learning_rate": 1.681281645321587e-05, + "loss": 0.3463, + "step": 21345 + }, + { + "epoch": 1.0, + "learning_rate": 1.6812032668161085e-05, + "loss": 0.515, + "step": 21350 + }, + { + "epoch": 1.0, + "learning_rate": 1.68112488831063e-05, + "loss": 0.1254, + "step": 21355 + }, + { + "epoch": 1.0, + "learning_rate": 1.6810465098051513e-05, + "loss": 0.1172, + "step": 21360 + }, + { + "epoch": 1.0, + "learning_rate": 1.6809681312996727e-05, + "loss": 0.081, + "step": 21365 + }, + { + "epoch": 1.0, + "learning_rate": 1.6808897527941937e-05, + "loss": 0.1016, + "step": 21370 + }, + { + "epoch": 1.0, + "learning_rate": 1.680811374288715e-05, + "loss": 0.133, + "step": 21375 + }, + { + "epoch": 1.0, + "learning_rate": 1.6807329957832365e-05, + "loss": 0.1478, + "step": 21380 + }, + { + "epoch": 1.0, + "learning_rate": 1.680654617277758e-05, + "loss": 0.2154, + "step": 21385 + }, + { + "epoch": 1.0, + "learning_rate": 1.6805762387722793e-05, + "loss": 0.2109, + "step": 21390 + }, + { + "epoch": 1.0, + "learning_rate": 1.6804978602668007e-05, + "loss": 0.5048, + "step": 21395 + }, + { + "epoch": 1.0, + "learning_rate": 1.680419481761322e-05, + "loss": 0.5279, + "step": 21400 + }, + { + "epoch": 1.0, + "learning_rate": 1.680341103255843e-05, + "loss": 0.1696, + "step": 21405 + }, + { + "epoch": 1.0, + "learning_rate": 1.6802627247503645e-05, + "loss": 0.0934, + "step": 21410 + }, + { + "epoch": 1.0, + "learning_rate": 1.680184346244886e-05, + "loss": 0.1172, + "step": 21415 + }, + { + "epoch": 1.0, + "learning_rate": 1.6801059677394073e-05, + "loss": 0.1691, + "step": 21420 + }, + { + "epoch": 1.0, + "learning_rate": 1.6800275892339287e-05, + "loss": 0.2184, + "step": 21425 + }, + { + "epoch": 1.0, + "learning_rate": 1.67994921072845e-05, + "loss": 0.3285, + "step": 21430 + }, + { + "epoch": 1.0, + "eval_cer": 0.019780983363940314, + "eval_loss": 0.6674277186393738, + "eval_runtime": 457.5932, + "eval_samples_per_second": 41.631, + "eval_steps_per_second": 5.205, + "eval_wer": 0.16735751295336787, + "step": 21431 + }, + { + "epoch": 1.0, + "learning_rate": 1.6798708322229715e-05, + "loss": 0.2708, + "step": 21435 + }, + { + "epoch": 1.0, + "learning_rate": 1.6797924537174925e-05, + "loss": 0.0632, + "step": 21440 + }, + { + "epoch": 1.0, + "learning_rate": 1.679714075212014e-05, + "loss": 0.0625, + "step": 21445 + }, + { + "epoch": 1.0, + "learning_rate": 1.6796356967065353e-05, + "loss": 0.1169, + "step": 21450 + }, + { + "epoch": 1.0, + "learning_rate": 1.6795573182010567e-05, + "loss": 0.1674, + "step": 21455 + }, + { + "epoch": 1.0, + "learning_rate": 1.679478939695578e-05, + "loss": 0.1875, + "step": 21460 + }, + { + "epoch": 1.0, + "learning_rate": 1.6794005611900995e-05, + "loss": 0.2132, + "step": 21465 + }, + { + "epoch": 1.0, + "learning_rate": 1.6793221826846205e-05, + "loss": 0.2001, + "step": 21470 + }, + { + "epoch": 1.0, + "learning_rate": 1.6792438041791422e-05, + "loss": 0.2851, + "step": 21475 + }, + { + "epoch": 1.0, + "learning_rate": 1.6791654256736633e-05, + "loss": 0.2083, + "step": 21480 + }, + { + "epoch": 1.0, + "learning_rate": 1.6790870471681847e-05, + "loss": 0.4056, + "step": 21485 + }, + { + "epoch": 1.0, + "learning_rate": 1.679008668662706e-05, + "loss": 0.0784, + "step": 21490 + }, + { + "epoch": 1.0, + "learning_rate": 1.6789302901572275e-05, + "loss": 0.1198, + "step": 21495 + }, + { + "epoch": 1.0, + "learning_rate": 1.678851911651749e-05, + "loss": 0.0637, + "step": 21500 + }, + { + "epoch": 1.0, + "learning_rate": 1.67877353314627e-05, + "loss": 0.0893, + "step": 21505 + }, + { + "epoch": 1.0, + "learning_rate": 1.6786951546407913e-05, + "loss": 0.1339, + "step": 21510 + }, + { + "epoch": 1.0, + "learning_rate": 1.6786167761353127e-05, + "loss": 0.2976, + "step": 21515 + }, + { + "epoch": 1.0, + "learning_rate": 1.678538397629834e-05, + "loss": 0.2174, + "step": 21520 + }, + { + "epoch": 1.0, + "learning_rate": 1.6784600191243555e-05, + "loss": 0.2785, + "step": 21525 + }, + { + "epoch": 1.0, + "learning_rate": 1.678381640618877e-05, + "loss": 0.5756, + "step": 21530 + }, + { + "epoch": 1.0, + "learning_rate": 1.6783032621133983e-05, + "loss": 0.3013, + "step": 21535 + }, + { + "epoch": 1.01, + "learning_rate": 1.6782248836079196e-05, + "loss": 0.0721, + "step": 21540 + }, + { + "epoch": 1.01, + "learning_rate": 1.6781465051024407e-05, + "loss": 0.0953, + "step": 21545 + }, + { + "epoch": 1.01, + "learning_rate": 1.6780681265969624e-05, + "loss": 0.0683, + "step": 21550 + }, + { + "epoch": 1.01, + "learning_rate": 1.6779897480914835e-05, + "loss": 0.155, + "step": 21555 + }, + { + "epoch": 1.01, + "learning_rate": 1.677911369586005e-05, + "loss": 0.2342, + "step": 21560 + }, + { + "epoch": 1.01, + "learning_rate": 1.6778329910805263e-05, + "loss": 0.1579, + "step": 21565 + }, + { + "epoch": 1.01, + "learning_rate": 1.6777546125750473e-05, + "loss": 0.2137, + "step": 21570 + }, + { + "epoch": 1.01, + "learning_rate": 1.677676234069569e-05, + "loss": 0.3298, + "step": 21575 + }, + { + "epoch": 1.01, + "learning_rate": 1.67759785556409e-05, + "loss": 0.3294, + "step": 21580 + }, + { + "epoch": 1.01, + "learning_rate": 1.6775194770586115e-05, + "loss": 0.347, + "step": 21585 + }, + { + "epoch": 1.01, + "learning_rate": 1.677441098553133e-05, + "loss": 0.0707, + "step": 21590 + }, + { + "epoch": 1.01, + "learning_rate": 1.6773627200476543e-05, + "loss": 0.0494, + "step": 21595 + }, + { + "epoch": 1.01, + "learning_rate": 1.6772843415421757e-05, + "loss": 0.0614, + "step": 21600 + }, + { + "epoch": 1.01, + "learning_rate": 1.677205963036697e-05, + "loss": 0.1719, + "step": 21605 + }, + { + "epoch": 1.01, + "learning_rate": 1.6771275845312184e-05, + "loss": 0.1739, + "step": 21610 + }, + { + "epoch": 1.01, + "learning_rate": 1.6770492060257398e-05, + "loss": 0.2605, + "step": 21615 + }, + { + "epoch": 1.01, + "learning_rate": 1.676970827520261e-05, + "loss": 0.2799, + "step": 21620 + }, + { + "epoch": 1.01, + "learning_rate": 1.6768924490147823e-05, + "loss": 0.3259, + "step": 21625 + }, + { + "epoch": 1.01, + "learning_rate": 1.6768140705093037e-05, + "loss": 0.3949, + "step": 21630 + }, + { + "epoch": 1.01, + "learning_rate": 1.676735692003825e-05, + "loss": 0.4475, + "step": 21635 + }, + { + "epoch": 1.01, + "learning_rate": 1.6766573134983464e-05, + "loss": 0.067, + "step": 21640 + }, + { + "epoch": 1.01, + "learning_rate": 1.6765789349928675e-05, + "loss": 0.0889, + "step": 21645 + }, + { + "epoch": 1.01, + "learning_rate": 1.6765005564873892e-05, + "loss": 0.0926, + "step": 21650 + }, + { + "epoch": 1.01, + "learning_rate": 1.6764221779819103e-05, + "loss": 0.169, + "step": 21655 + }, + { + "epoch": 1.01, + "learning_rate": 1.6763437994764317e-05, + "loss": 0.1749, + "step": 21660 + }, + { + "epoch": 1.01, + "learning_rate": 1.676265420970953e-05, + "loss": 0.2628, + "step": 21665 + }, + { + "epoch": 1.01, + "learning_rate": 1.6761870424654744e-05, + "loss": 0.2144, + "step": 21670 + }, + { + "epoch": 1.01, + "learning_rate": 1.676108663959996e-05, + "loss": 0.274, + "step": 21675 + }, + { + "epoch": 1.01, + "learning_rate": 1.6760302854545172e-05, + "loss": 0.3579, + "step": 21680 + }, + { + "epoch": 1.01, + "learning_rate": 1.6759519069490383e-05, + "loss": 0.3807, + "step": 21685 + }, + { + "epoch": 1.01, + "learning_rate": 1.67587352844356e-05, + "loss": 0.1214, + "step": 21690 + }, + { + "epoch": 1.01, + "learning_rate": 1.675795149938081e-05, + "loss": 0.1011, + "step": 21695 + }, + { + "epoch": 1.01, + "learning_rate": 1.6757167714326024e-05, + "loss": 0.0696, + "step": 21700 + }, + { + "epoch": 1.01, + "learning_rate": 1.675638392927124e-05, + "loss": 0.1463, + "step": 21705 + }, + { + "epoch": 1.01, + "learning_rate": 1.6755600144216452e-05, + "loss": 0.1416, + "step": 21710 + }, + { + "epoch": 1.01, + "learning_rate": 1.6754816359161666e-05, + "loss": 0.128, + "step": 21715 + }, + { + "epoch": 1.01, + "learning_rate": 1.6754032574106877e-05, + "loss": 0.1893, + "step": 21720 + }, + { + "epoch": 1.01, + "learning_rate": 1.675324878905209e-05, + "loss": 0.3064, + "step": 21725 + }, + { + "epoch": 1.01, + "learning_rate": 1.6752465003997305e-05, + "loss": 0.3098, + "step": 21730 + }, + { + "epoch": 1.01, + "learning_rate": 1.675168121894252e-05, + "loss": 0.3172, + "step": 21735 + }, + { + "epoch": 1.01, + "learning_rate": 1.6750897433887732e-05, + "loss": 0.0526, + "step": 21740 + }, + { + "epoch": 1.01, + "learning_rate": 1.6750113648832946e-05, + "loss": 0.0767, + "step": 21745 + }, + { + "epoch": 1.01, + "learning_rate": 1.674932986377816e-05, + "loss": 0.105, + "step": 21750 + }, + { + "epoch": 1.02, + "learning_rate": 1.6748546078723374e-05, + "loss": 0.128, + "step": 21755 + }, + { + "epoch": 1.02, + "learning_rate": 1.6747762293668585e-05, + "loss": 0.1071, + "step": 21760 + }, + { + "epoch": 1.02, + "learning_rate": 1.67469785086138e-05, + "loss": 0.1624, + "step": 21765 + }, + { + "epoch": 1.02, + "learning_rate": 1.6746194723559012e-05, + "loss": 0.3446, + "step": 21770 + }, + { + "epoch": 1.02, + "learning_rate": 1.6745410938504226e-05, + "loss": 0.3188, + "step": 21775 + }, + { + "epoch": 1.02, + "learning_rate": 1.674462715344944e-05, + "loss": 0.5742, + "step": 21780 + }, + { + "epoch": 1.02, + "learning_rate": 1.674384336839465e-05, + "loss": 0.2419, + "step": 21785 + }, + { + "epoch": 1.02, + "learning_rate": 1.6743059583339868e-05, + "loss": 0.0936, + "step": 21790 + }, + { + "epoch": 1.02, + "learning_rate": 1.674227579828508e-05, + "loss": 0.0601, + "step": 21795 + }, + { + "epoch": 1.02, + "learning_rate": 1.6741492013230292e-05, + "loss": 0.0906, + "step": 21800 + }, + { + "epoch": 1.02, + "learning_rate": 1.6740708228175506e-05, + "loss": 0.104, + "step": 21805 + }, + { + "epoch": 1.02, + "learning_rate": 1.673992444312072e-05, + "loss": 0.1736, + "step": 21810 + }, + { + "epoch": 1.02, + "learning_rate": 1.6739140658065934e-05, + "loss": 0.1642, + "step": 21815 + }, + { + "epoch": 1.02, + "learning_rate": 1.6738356873011148e-05, + "loss": 0.194, + "step": 21820 + }, + { + "epoch": 1.02, + "learning_rate": 1.673757308795636e-05, + "loss": 0.1698, + "step": 21825 + }, + { + "epoch": 1.02, + "learning_rate": 1.6736789302901572e-05, + "loss": 0.5151, + "step": 21830 + }, + { + "epoch": 1.02, + "learning_rate": 1.6736005517846786e-05, + "loss": 0.4388, + "step": 21835 + }, + { + "epoch": 1.02, + "learning_rate": 1.6735221732792e-05, + "loss": 0.0625, + "step": 21840 + }, + { + "epoch": 1.02, + "learning_rate": 1.6734437947737214e-05, + "loss": 0.0551, + "step": 21845 + }, + { + "epoch": 1.02, + "learning_rate": 1.6733654162682428e-05, + "loss": 0.086, + "step": 21850 + }, + { + "epoch": 1.02, + "learning_rate": 1.6732870377627642e-05, + "loss": 0.107, + "step": 21855 + }, + { + "epoch": 1.02, + "learning_rate": 1.6732086592572853e-05, + "loss": 0.1138, + "step": 21860 + }, + { + "epoch": 1.02, + "learning_rate": 1.673130280751807e-05, + "loss": 0.1885, + "step": 21865 + }, + { + "epoch": 1.02, + "learning_rate": 1.673051902246328e-05, + "loss": 0.2082, + "step": 21870 + }, + { + "epoch": 1.02, + "learning_rate": 1.6729735237408494e-05, + "loss": 0.2015, + "step": 21875 + }, + { + "epoch": 1.02, + "learning_rate": 1.6728951452353708e-05, + "loss": 0.4405, + "step": 21880 + }, + { + "epoch": 1.02, + "learning_rate": 1.6728167667298922e-05, + "loss": 0.2858, + "step": 21885 + }, + { + "epoch": 1.02, + "learning_rate": 1.6727383882244136e-05, + "loss": 0.0538, + "step": 21890 + }, + { + "epoch": 1.02, + "learning_rate": 1.6726600097189346e-05, + "loss": 0.0779, + "step": 21895 + }, + { + "epoch": 1.02, + "learning_rate": 1.672581631213456e-05, + "loss": 0.142, + "step": 21900 + }, + { + "epoch": 1.02, + "learning_rate": 1.6725032527079774e-05, + "loss": 0.1888, + "step": 21905 + }, + { + "epoch": 1.02, + "learning_rate": 1.6724248742024988e-05, + "loss": 0.1195, + "step": 21910 + }, + { + "epoch": 1.02, + "learning_rate": 1.6723464956970202e-05, + "loss": 0.2157, + "step": 21915 + }, + { + "epoch": 1.02, + "learning_rate": 1.6722681171915416e-05, + "loss": 0.2258, + "step": 21920 + }, + { + "epoch": 1.02, + "learning_rate": 1.672189738686063e-05, + "loss": 0.3311, + "step": 21925 + }, + { + "epoch": 1.02, + "learning_rate": 1.6721113601805844e-05, + "loss": 0.3652, + "step": 21930 + }, + { + "epoch": 1.02, + "learning_rate": 1.6720329816751054e-05, + "loss": 0.3514, + "step": 21935 + }, + { + "epoch": 1.02, + "learning_rate": 1.6719546031696268e-05, + "loss": 0.0737, + "step": 21940 + }, + { + "epoch": 1.02, + "learning_rate": 1.6718762246641482e-05, + "loss": 0.0702, + "step": 21945 + }, + { + "epoch": 1.02, + "learning_rate": 1.6717978461586696e-05, + "loss": 0.1228, + "step": 21950 + }, + { + "epoch": 1.02, + "learning_rate": 1.671719467653191e-05, + "loss": 0.1717, + "step": 21955 + }, + { + "epoch": 1.02, + "learning_rate": 1.671641089147712e-05, + "loss": 0.1327, + "step": 21960 + }, + { + "epoch": 1.02, + "learning_rate": 1.6715627106422338e-05, + "loss": 0.18, + "step": 21965 + }, + { + "epoch": 1.03, + "learning_rate": 1.6714843321367548e-05, + "loss": 0.1339, + "step": 21970 + }, + { + "epoch": 1.03, + "learning_rate": 1.6714059536312762e-05, + "loss": 0.4464, + "step": 21975 + }, + { + "epoch": 1.03, + "learning_rate": 1.6713275751257976e-05, + "loss": 0.4614, + "step": 21980 + }, + { + "epoch": 1.03, + "learning_rate": 1.671249196620319e-05, + "loss": 0.4731, + "step": 21985 + }, + { + "epoch": 1.03, + "learning_rate": 1.6711708181148404e-05, + "loss": 0.0743, + "step": 21990 + }, + { + "epoch": 1.03, + "learning_rate": 1.6710924396093618e-05, + "loss": 0.0772, + "step": 21995 + }, + { + "epoch": 1.03, + "learning_rate": 1.671014061103883e-05, + "loss": 0.1295, + "step": 22000 + }, + { + "epoch": 1.03, + "learning_rate": 1.6709356825984046e-05, + "loss": 0.1665, + "step": 22005 + }, + { + "epoch": 1.03, + "learning_rate": 1.6708573040929256e-05, + "loss": 0.1362, + "step": 22010 + }, + { + "epoch": 1.03, + "learning_rate": 1.670778925587447e-05, + "loss": 0.1636, + "step": 22015 + }, + { + "epoch": 1.03, + "learning_rate": 1.6707005470819684e-05, + "loss": 0.2899, + "step": 22020 + }, + { + "epoch": 1.03, + "learning_rate": 1.6706221685764898e-05, + "loss": 0.2897, + "step": 22025 + }, + { + "epoch": 1.03, + "learning_rate": 1.6705437900710112e-05, + "loss": 0.3886, + "step": 22030 + }, + { + "epoch": 1.03, + "learning_rate": 1.6704654115655322e-05, + "loss": 0.2882, + "step": 22035 + }, + { + "epoch": 1.03, + "learning_rate": 1.6703870330600536e-05, + "loss": 0.0652, + "step": 22040 + }, + { + "epoch": 1.03, + "learning_rate": 1.670308654554575e-05, + "loss": 0.0749, + "step": 22045 + }, + { + "epoch": 1.03, + "learning_rate": 1.6702302760490964e-05, + "loss": 0.0939, + "step": 22050 + }, + { + "epoch": 1.03, + "learning_rate": 1.6701518975436178e-05, + "loss": 0.1791, + "step": 22055 + }, + { + "epoch": 1.03, + "learning_rate": 1.6700735190381392e-05, + "loss": 0.1139, + "step": 22060 + }, + { + "epoch": 1.03, + "learning_rate": 1.6699951405326606e-05, + "loss": 0.2075, + "step": 22065 + }, + { + "epoch": 1.03, + "learning_rate": 1.669916762027182e-05, + "loss": 0.2175, + "step": 22070 + }, + { + "epoch": 1.03, + "learning_rate": 1.669838383521703e-05, + "loss": 0.2589, + "step": 22075 + }, + { + "epoch": 1.03, + "learning_rate": 1.6697600050162247e-05, + "loss": 0.42, + "step": 22080 + }, + { + "epoch": 1.03, + "learning_rate": 1.6696816265107458e-05, + "loss": 0.3279, + "step": 22085 + }, + { + "epoch": 1.03, + "learning_rate": 1.6696032480052672e-05, + "loss": 0.0295, + "step": 22090 + }, + { + "epoch": 1.03, + "learning_rate": 1.6695248694997886e-05, + "loss": 0.0318, + "step": 22095 + }, + { + "epoch": 1.03, + "learning_rate": 1.6694464909943096e-05, + "loss": 0.1887, + "step": 22100 + }, + { + "epoch": 1.03, + "learning_rate": 1.6693681124888314e-05, + "loss": 0.1017, + "step": 22105 + }, + { + "epoch": 1.03, + "learning_rate": 1.6692897339833524e-05, + "loss": 0.145, + "step": 22110 + }, + { + "epoch": 1.03, + "learning_rate": 1.6692113554778738e-05, + "loss": 0.195, + "step": 22115 + }, + { + "epoch": 1.03, + "learning_rate": 1.6691329769723952e-05, + "loss": 0.2584, + "step": 22120 + }, + { + "epoch": 1.03, + "learning_rate": 1.6690545984669166e-05, + "loss": 0.2965, + "step": 22125 + }, + { + "epoch": 1.03, + "learning_rate": 1.668976219961438e-05, + "loss": 0.3912, + "step": 22130 + }, + { + "epoch": 1.03, + "learning_rate": 1.6688978414559594e-05, + "loss": 0.3964, + "step": 22135 + }, + { + "epoch": 1.03, + "learning_rate": 1.6688194629504804e-05, + "loss": 0.0447, + "step": 22140 + }, + { + "epoch": 1.03, + "learning_rate": 1.668741084445002e-05, + "loss": 0.1708, + "step": 22145 + }, + { + "epoch": 1.03, + "learning_rate": 1.6686627059395232e-05, + "loss": 0.1072, + "step": 22150 + }, + { + "epoch": 1.03, + "learning_rate": 1.6685843274340446e-05, + "loss": 0.1223, + "step": 22155 + }, + { + "epoch": 1.03, + "learning_rate": 1.668505948928566e-05, + "loss": 0.1006, + "step": 22160 + }, + { + "epoch": 1.03, + "learning_rate": 1.6684275704230874e-05, + "loss": 0.1096, + "step": 22165 + }, + { + "epoch": 1.03, + "learning_rate": 1.6683491919176088e-05, + "loss": 0.2708, + "step": 22170 + }, + { + "epoch": 1.03, + "learning_rate": 1.6682708134121298e-05, + "loss": 0.3093, + "step": 22175 + }, + { + "epoch": 1.03, + "learning_rate": 1.6681924349066515e-05, + "loss": 0.3701, + "step": 22180 + }, + { + "epoch": 1.04, + "learning_rate": 1.6681140564011726e-05, + "loss": 0.251, + "step": 22185 + }, + { + "epoch": 1.04, + "learning_rate": 1.668035677895694e-05, + "loss": 0.0674, + "step": 22190 + }, + { + "epoch": 1.04, + "learning_rate": 1.6679572993902154e-05, + "loss": 0.1336, + "step": 22195 + }, + { + "epoch": 1.04, + "learning_rate": 1.6678789208847368e-05, + "loss": 0.136, + "step": 22200 + }, + { + "epoch": 1.04, + "learning_rate": 1.667800542379258e-05, + "loss": 0.0998, + "step": 22205 + }, + { + "epoch": 1.04, + "learning_rate": 1.6677221638737795e-05, + "loss": 0.13, + "step": 22210 + }, + { + "epoch": 1.04, + "learning_rate": 1.6676437853683006e-05, + "loss": 0.1497, + "step": 22215 + }, + { + "epoch": 1.04, + "learning_rate": 1.667565406862822e-05, + "loss": 0.1846, + "step": 22220 + }, + { + "epoch": 1.04, + "learning_rate": 1.6674870283573434e-05, + "loss": 0.2499, + "step": 22225 + }, + { + "epoch": 1.04, + "learning_rate": 1.6674086498518648e-05, + "loss": 0.4191, + "step": 22230 + }, + { + "epoch": 1.04, + "learning_rate": 1.667330271346386e-05, + "loss": 0.3157, + "step": 22235 + }, + { + "epoch": 1.04, + "learning_rate": 1.6672518928409075e-05, + "loss": 0.0555, + "step": 22240 + }, + { + "epoch": 1.04, + "learning_rate": 1.667173514335429e-05, + "loss": 0.1057, + "step": 22245 + }, + { + "epoch": 1.04, + "learning_rate": 1.66709513582995e-05, + "loss": 0.0907, + "step": 22250 + }, + { + "epoch": 1.04, + "learning_rate": 1.6670167573244714e-05, + "loss": 0.1564, + "step": 22255 + }, + { + "epoch": 1.04, + "learning_rate": 1.6669383788189928e-05, + "loss": 0.1607, + "step": 22260 + }, + { + "epoch": 1.04, + "learning_rate": 1.666860000313514e-05, + "loss": 0.1959, + "step": 22265 + }, + { + "epoch": 1.04, + "learning_rate": 1.6667816218080356e-05, + "loss": 0.2283, + "step": 22270 + }, + { + "epoch": 1.04, + "learning_rate": 1.666703243302557e-05, + "loss": 0.3232, + "step": 22275 + }, + { + "epoch": 1.04, + "learning_rate": 1.6666248647970783e-05, + "loss": 0.344, + "step": 22280 + }, + { + "epoch": 1.04, + "learning_rate": 1.6665464862915994e-05, + "loss": 0.2879, + "step": 22285 + }, + { + "epoch": 1.04, + "learning_rate": 1.6664681077861208e-05, + "loss": 0.077, + "step": 22290 + }, + { + "epoch": 1.04, + "learning_rate": 1.666389729280642e-05, + "loss": 0.0681, + "step": 22295 + }, + { + "epoch": 1.04, + "learning_rate": 1.6663113507751636e-05, + "loss": 0.0788, + "step": 22300 + }, + { + "epoch": 1.04, + "learning_rate": 1.666232972269685e-05, + "loss": 0.1024, + "step": 22305 + }, + { + "epoch": 1.04, + "learning_rate": 1.6661545937642063e-05, + "loss": 0.1455, + "step": 22310 + }, + { + "epoch": 1.04, + "learning_rate": 1.6660762152587274e-05, + "loss": 0.2212, + "step": 22315 + }, + { + "epoch": 1.04, + "learning_rate": 1.665997836753249e-05, + "loss": 0.1622, + "step": 22320 + }, + { + "epoch": 1.04, + "learning_rate": 1.6659194582477702e-05, + "loss": 0.257, + "step": 22325 + }, + { + "epoch": 1.04, + "learning_rate": 1.6658410797422916e-05, + "loss": 0.386, + "step": 22330 + }, + { + "epoch": 1.04, + "learning_rate": 1.665762701236813e-05, + "loss": 0.3866, + "step": 22335 + }, + { + "epoch": 1.04, + "learning_rate": 1.6656843227313343e-05, + "loss": 0.068, + "step": 22340 + }, + { + "epoch": 1.04, + "learning_rate": 1.6656059442258557e-05, + "loss": 0.1259, + "step": 22345 + }, + { + "epoch": 1.04, + "learning_rate": 1.6655275657203768e-05, + "loss": 0.1199, + "step": 22350 + }, + { + "epoch": 1.04, + "learning_rate": 1.6654491872148982e-05, + "loss": 0.1675, + "step": 22355 + }, + { + "epoch": 1.04, + "learning_rate": 1.6653708087094196e-05, + "loss": 0.1525, + "step": 22360 + }, + { + "epoch": 1.04, + "learning_rate": 1.665292430203941e-05, + "loss": 0.1875, + "step": 22365 + }, + { + "epoch": 1.04, + "learning_rate": 1.6652140516984623e-05, + "loss": 0.2257, + "step": 22370 + }, + { + "epoch": 1.04, + "learning_rate": 1.6651356731929837e-05, + "loss": 0.195, + "step": 22375 + }, + { + "epoch": 1.04, + "learning_rate": 1.665057294687505e-05, + "loss": 0.3703, + "step": 22380 + }, + { + "epoch": 1.04, + "learning_rate": 1.6649789161820265e-05, + "loss": 0.3111, + "step": 22385 + }, + { + "epoch": 1.04, + "learning_rate": 1.6649005376765476e-05, + "loss": 0.0501, + "step": 22390 + }, + { + "epoch": 1.04, + "learning_rate": 1.6648221591710693e-05, + "loss": 0.1009, + "step": 22395 + }, + { + "epoch": 1.05, + "learning_rate": 1.6647437806655904e-05, + "loss": 0.1188, + "step": 22400 + }, + { + "epoch": 1.05, + "learning_rate": 1.6646654021601117e-05, + "loss": 0.1223, + "step": 22405 + }, + { + "epoch": 1.05, + "learning_rate": 1.664587023654633e-05, + "loss": 0.2532, + "step": 22410 + }, + { + "epoch": 1.05, + "learning_rate": 1.6645086451491542e-05, + "loss": 0.2103, + "step": 22415 + }, + { + "epoch": 1.05, + "learning_rate": 1.664430266643676e-05, + "loss": 0.2827, + "step": 22420 + }, + { + "epoch": 1.05, + "learning_rate": 1.664351888138197e-05, + "loss": 0.3525, + "step": 22425 + }, + { + "epoch": 1.05, + "learning_rate": 1.6642735096327184e-05, + "loss": 0.4255, + "step": 22430 + }, + { + "epoch": 1.05, + "learning_rate": 1.6641951311272397e-05, + "loss": 0.3211, + "step": 22435 + }, + { + "epoch": 1.05, + "learning_rate": 1.664116752621761e-05, + "loss": 0.0945, + "step": 22440 + }, + { + "epoch": 1.05, + "learning_rate": 1.6640383741162825e-05, + "loss": 0.0941, + "step": 22445 + }, + { + "epoch": 1.05, + "learning_rate": 1.663959995610804e-05, + "loss": 0.1018, + "step": 22450 + }, + { + "epoch": 1.05, + "learning_rate": 1.663881617105325e-05, + "loss": 0.1377, + "step": 22455 + }, + { + "epoch": 1.05, + "learning_rate": 1.6638032385998467e-05, + "loss": 0.1353, + "step": 22460 + }, + { + "epoch": 1.05, + "learning_rate": 1.6637248600943678e-05, + "loss": 0.1506, + "step": 22465 + }, + { + "epoch": 1.05, + "learning_rate": 1.663646481588889e-05, + "loss": 0.2194, + "step": 22470 + }, + { + "epoch": 1.05, + "learning_rate": 1.6635681030834105e-05, + "loss": 0.2673, + "step": 22475 + }, + { + "epoch": 1.05, + "learning_rate": 1.663489724577932e-05, + "loss": 0.4444, + "step": 22480 + }, + { + "epoch": 1.05, + "learning_rate": 1.6634113460724533e-05, + "loss": 0.4203, + "step": 22485 + }, + { + "epoch": 1.05, + "learning_rate": 1.6633329675669744e-05, + "loss": 0.0998, + "step": 22490 + }, + { + "epoch": 1.05, + "learning_rate": 1.663254589061496e-05, + "loss": 0.0645, + "step": 22495 + }, + { + "epoch": 1.05, + "learning_rate": 1.663176210556017e-05, + "loss": 0.0815, + "step": 22500 + }, + { + "epoch": 1.05, + "learning_rate": 1.6630978320505385e-05, + "loss": 0.1239, + "step": 22505 + }, + { + "epoch": 1.05, + "learning_rate": 1.66301945354506e-05, + "loss": 0.1158, + "step": 22510 + }, + { + "epoch": 1.05, + "learning_rate": 1.6629410750395813e-05, + "loss": 0.1965, + "step": 22515 + }, + { + "epoch": 1.05, + "learning_rate": 1.6628626965341027e-05, + "loss": 0.2295, + "step": 22520 + }, + { + "epoch": 1.05, + "learning_rate": 1.662784318028624e-05, + "loss": 0.3215, + "step": 22525 + }, + { + "epoch": 1.05, + "learning_rate": 1.662705939523145e-05, + "loss": 0.3832, + "step": 22530 + }, + { + "epoch": 1.05, + "learning_rate": 1.662627561017667e-05, + "loss": 0.3457, + "step": 22535 + }, + { + "epoch": 1.05, + "learning_rate": 1.662549182512188e-05, + "loss": 0.1041, + "step": 22540 + }, + { + "epoch": 1.05, + "learning_rate": 1.6624708040067093e-05, + "loss": 0.0946, + "step": 22545 + }, + { + "epoch": 1.05, + "learning_rate": 1.6623924255012307e-05, + "loss": 0.0797, + "step": 22550 + }, + { + "epoch": 1.05, + "learning_rate": 1.662314046995752e-05, + "loss": 0.1227, + "step": 22555 + }, + { + "epoch": 1.05, + "learning_rate": 1.6622356684902735e-05, + "loss": 0.1388, + "step": 22560 + }, + { + "epoch": 1.05, + "learning_rate": 1.6621572899847945e-05, + "loss": 0.1356, + "step": 22565 + }, + { + "epoch": 1.05, + "learning_rate": 1.662078911479316e-05, + "loss": 0.1936, + "step": 22570 + }, + { + "epoch": 1.05, + "learning_rate": 1.6620005329738373e-05, + "loss": 0.3707, + "step": 22575 + }, + { + "epoch": 1.05, + "learning_rate": 1.6619221544683587e-05, + "loss": 0.4417, + "step": 22580 + }, + { + "epoch": 1.05, + "learning_rate": 1.66184377596288e-05, + "loss": 0.4056, + "step": 22585 + }, + { + "epoch": 1.05, + "learning_rate": 1.6617653974574015e-05, + "loss": 0.1009, + "step": 22590 + }, + { + "epoch": 1.05, + "learning_rate": 1.661687018951923e-05, + "loss": 0.1221, + "step": 22595 + }, + { + "epoch": 1.05, + "learning_rate": 1.6616086404464443e-05, + "loss": 0.1144, + "step": 22600 + }, + { + "epoch": 1.05, + "learning_rate": 1.6615302619409653e-05, + "loss": 0.1499, + "step": 22605 + }, + { + "epoch": 1.06, + "learning_rate": 1.6614518834354867e-05, + "loss": 0.1002, + "step": 22610 + }, + { + "epoch": 1.06, + "learning_rate": 1.661373504930008e-05, + "loss": 0.1595, + "step": 22615 + }, + { + "epoch": 1.06, + "learning_rate": 1.6612951264245295e-05, + "loss": 0.2121, + "step": 22620 + }, + { + "epoch": 1.06, + "learning_rate": 1.661216747919051e-05, + "loss": 0.2338, + "step": 22625 + }, + { + "epoch": 1.06, + "learning_rate": 1.661138369413572e-05, + "loss": 0.375, + "step": 22630 + }, + { + "epoch": 1.06, + "learning_rate": 1.6610599909080937e-05, + "loss": 0.2275, + "step": 22635 + }, + { + "epoch": 1.06, + "learning_rate": 1.6609816124026147e-05, + "loss": 0.0468, + "step": 22640 + }, + { + "epoch": 1.06, + "learning_rate": 1.660903233897136e-05, + "loss": 0.0573, + "step": 22645 + }, + { + "epoch": 1.06, + "learning_rate": 1.6608248553916575e-05, + "loss": 0.0989, + "step": 22650 + }, + { + "epoch": 1.06, + "learning_rate": 1.660746476886179e-05, + "loss": 0.1472, + "step": 22655 + }, + { + "epoch": 1.06, + "learning_rate": 1.6606680983807003e-05, + "loss": 0.1244, + "step": 22660 + }, + { + "epoch": 1.06, + "learning_rate": 1.6605897198752217e-05, + "loss": 0.2461, + "step": 22665 + }, + { + "epoch": 1.06, + "learning_rate": 1.6605113413697427e-05, + "loss": 0.1669, + "step": 22670 + }, + { + "epoch": 1.06, + "learning_rate": 1.660432962864264e-05, + "loss": 0.2483, + "step": 22675 + }, + { + "epoch": 1.06, + "learning_rate": 1.6603545843587855e-05, + "loss": 0.3463, + "step": 22680 + }, + { + "epoch": 1.06, + "learning_rate": 1.660276205853307e-05, + "loss": 0.3456, + "step": 22685 + }, + { + "epoch": 1.06, + "learning_rate": 1.6601978273478283e-05, + "loss": 0.0599, + "step": 22690 + }, + { + "epoch": 1.06, + "learning_rate": 1.6601194488423497e-05, + "loss": 0.0639, + "step": 22695 + }, + { + "epoch": 1.06, + "learning_rate": 1.660041070336871e-05, + "loss": 0.1262, + "step": 22700 + }, + { + "epoch": 1.06, + "learning_rate": 1.659962691831392e-05, + "loss": 0.1424, + "step": 22705 + }, + { + "epoch": 1.06, + "learning_rate": 1.659884313325914e-05, + "loss": 0.1273, + "step": 22710 + }, + { + "epoch": 1.06, + "learning_rate": 1.659805934820435e-05, + "loss": 0.2208, + "step": 22715 + }, + { + "epoch": 1.06, + "learning_rate": 1.6597275563149563e-05, + "loss": 0.1814, + "step": 22720 + }, + { + "epoch": 1.06, + "learning_rate": 1.6596491778094777e-05, + "loss": 0.2325, + "step": 22725 + }, + { + "epoch": 1.06, + "learning_rate": 1.659570799303999e-05, + "loss": 0.4139, + "step": 22730 + }, + { + "epoch": 1.06, + "learning_rate": 1.6594924207985205e-05, + "loss": 0.2675, + "step": 22735 + }, + { + "epoch": 1.06, + "learning_rate": 1.6594140422930415e-05, + "loss": 0.0246, + "step": 22740 + }, + { + "epoch": 1.06, + "learning_rate": 1.659335663787563e-05, + "loss": 0.0829, + "step": 22745 + }, + { + "epoch": 1.06, + "learning_rate": 1.6592572852820843e-05, + "loss": 0.1863, + "step": 22750 + }, + { + "epoch": 1.06, + "learning_rate": 1.6591789067766057e-05, + "loss": 0.1843, + "step": 22755 + }, + { + "epoch": 1.06, + "learning_rate": 1.659100528271127e-05, + "loss": 0.1143, + "step": 22760 + }, + { + "epoch": 1.06, + "learning_rate": 1.6590221497656485e-05, + "loss": 0.1305, + "step": 22765 + }, + { + "epoch": 1.06, + "learning_rate": 1.65894377126017e-05, + "loss": 0.229, + "step": 22770 + }, + { + "epoch": 1.06, + "learning_rate": 1.6588653927546913e-05, + "loss": 0.2498, + "step": 22775 + }, + { + "epoch": 1.06, + "learning_rate": 1.6587870142492123e-05, + "loss": 0.3766, + "step": 22780 + }, + { + "epoch": 1.06, + "learning_rate": 1.6587086357437337e-05, + "loss": 0.2961, + "step": 22785 + }, + { + "epoch": 1.06, + "learning_rate": 1.658630257238255e-05, + "loss": 0.069, + "step": 22790 + }, + { + "epoch": 1.06, + "learning_rate": 1.6585518787327765e-05, + "loss": 0.0735, + "step": 22795 + }, + { + "epoch": 1.06, + "learning_rate": 1.658473500227298e-05, + "loss": 0.1352, + "step": 22800 + }, + { + "epoch": 1.06, + "learning_rate": 1.658395121721819e-05, + "loss": 0.1563, + "step": 22805 + }, + { + "epoch": 1.06, + "learning_rate": 1.6583167432163407e-05, + "loss": 0.155, + "step": 22810 + }, + { + "epoch": 1.06, + "learning_rate": 1.6582383647108617e-05, + "loss": 0.2341, + "step": 22815 + }, + { + "epoch": 1.06, + "learning_rate": 1.658159986205383e-05, + "loss": 0.22, + "step": 22820 + }, + { + "epoch": 1.07, + "learning_rate": 1.6580816076999045e-05, + "loss": 0.2533, + "step": 22825 + }, + { + "epoch": 1.07, + "learning_rate": 1.658003229194426e-05, + "loss": 0.5077, + "step": 22830 + }, + { + "epoch": 1.07, + "learning_rate": 1.6579248506889473e-05, + "loss": 0.49, + "step": 22835 + }, + { + "epoch": 1.07, + "learning_rate": 1.6578464721834687e-05, + "loss": 0.166, + "step": 22840 + }, + { + "epoch": 1.07, + "learning_rate": 1.6577680936779897e-05, + "loss": 0.0973, + "step": 22845 + }, + { + "epoch": 1.07, + "learning_rate": 1.6576897151725114e-05, + "loss": 0.0986, + "step": 22850 + }, + { + "epoch": 1.07, + "learning_rate": 1.6576113366670325e-05, + "loss": 0.1956, + "step": 22855 + }, + { + "epoch": 1.07, + "learning_rate": 1.657532958161554e-05, + "loss": 0.2051, + "step": 22860 + }, + { + "epoch": 1.07, + "learning_rate": 1.6574545796560753e-05, + "loss": 0.1668, + "step": 22865 + }, + { + "epoch": 1.07, + "learning_rate": 1.6573762011505967e-05, + "loss": 0.2645, + "step": 22870 + }, + { + "epoch": 1.07, + "learning_rate": 1.657297822645118e-05, + "loss": 0.2412, + "step": 22875 + }, + { + "epoch": 1.07, + "learning_rate": 1.657219444139639e-05, + "loss": 0.36, + "step": 22880 + }, + { + "epoch": 1.07, + "learning_rate": 1.6571410656341605e-05, + "loss": 0.3128, + "step": 22885 + }, + { + "epoch": 1.07, + "learning_rate": 1.657062687128682e-05, + "loss": 0.0542, + "step": 22890 + }, + { + "epoch": 1.07, + "learning_rate": 1.6569843086232033e-05, + "loss": 0.0474, + "step": 22895 + }, + { + "epoch": 1.07, + "learning_rate": 1.6569059301177247e-05, + "loss": 0.1289, + "step": 22900 + }, + { + "epoch": 1.07, + "learning_rate": 1.656827551612246e-05, + "loss": 0.0668, + "step": 22905 + }, + { + "epoch": 1.07, + "learning_rate": 1.6567491731067674e-05, + "loss": 0.2174, + "step": 22910 + }, + { + "epoch": 1.07, + "learning_rate": 1.656670794601289e-05, + "loss": 0.1433, + "step": 22915 + }, + { + "epoch": 1.07, + "learning_rate": 1.65659241609581e-05, + "loss": 0.1519, + "step": 22920 + }, + { + "epoch": 1.07, + "learning_rate": 1.6565140375903316e-05, + "loss": 0.2903, + "step": 22925 + }, + { + "epoch": 1.07, + "learning_rate": 1.6564356590848527e-05, + "loss": 0.4346, + "step": 22930 + }, + { + "epoch": 1.07, + "learning_rate": 1.656357280579374e-05, + "loss": 0.3024, + "step": 22935 + }, + { + "epoch": 1.07, + "learning_rate": 1.6562789020738955e-05, + "loss": 0.031, + "step": 22940 + }, + { + "epoch": 1.07, + "learning_rate": 1.6562005235684165e-05, + "loss": 0.1083, + "step": 22945 + }, + { + "epoch": 1.07, + "learning_rate": 1.6561221450629382e-05, + "loss": 0.0789, + "step": 22950 + }, + { + "epoch": 1.07, + "learning_rate": 1.6560437665574593e-05, + "loss": 0.193, + "step": 22955 + }, + { + "epoch": 1.07, + "learning_rate": 1.6559653880519807e-05, + "loss": 0.2532, + "step": 22960 + }, + { + "epoch": 1.07, + "learning_rate": 1.655887009546502e-05, + "loss": 0.2134, + "step": 22965 + }, + { + "epoch": 1.07, + "learning_rate": 1.6558086310410235e-05, + "loss": 0.2538, + "step": 22970 + }, + { + "epoch": 1.07, + "learning_rate": 1.655730252535545e-05, + "loss": 0.2492, + "step": 22975 + }, + { + "epoch": 1.07, + "learning_rate": 1.6556518740300662e-05, + "loss": 0.4392, + "step": 22980 + }, + { + "epoch": 1.07, + "learning_rate": 1.6555734955245873e-05, + "loss": 0.2968, + "step": 22985 + }, + { + "epoch": 1.07, + "learning_rate": 1.655495117019109e-05, + "loss": 0.0663, + "step": 22990 + }, + { + "epoch": 1.07, + "learning_rate": 1.65541673851363e-05, + "loss": 0.1486, + "step": 22995 + }, + { + "epoch": 1.07, + "learning_rate": 1.6553383600081515e-05, + "loss": 0.0821, + "step": 23000 + }, + { + "epoch": 1.07, + "learning_rate": 1.655259981502673e-05, + "loss": 0.1134, + "step": 23005 + }, + { + "epoch": 1.07, + "learning_rate": 1.6551816029971942e-05, + "loss": 0.2011, + "step": 23010 + }, + { + "epoch": 1.07, + "learning_rate": 1.6551032244917156e-05, + "loss": 0.2416, + "step": 23015 + }, + { + "epoch": 1.07, + "learning_rate": 1.6550248459862367e-05, + "loss": 0.2838, + "step": 23020 + }, + { + "epoch": 1.07, + "learning_rate": 1.6549464674807584e-05, + "loss": 0.285, + "step": 23025 + }, + { + "epoch": 1.07, + "learning_rate": 1.6548680889752795e-05, + "loss": 0.3597, + "step": 23030 + }, + { + "epoch": 1.07, + "learning_rate": 1.654789710469801e-05, + "loss": 0.4682, + "step": 23035 + }, + { + "epoch": 1.08, + "learning_rate": 1.6547113319643222e-05, + "loss": 0.0424, + "step": 23040 + }, + { + "epoch": 1.08, + "learning_rate": 1.6546329534588436e-05, + "loss": 0.0739, + "step": 23045 + }, + { + "epoch": 1.08, + "learning_rate": 1.654554574953365e-05, + "loss": 0.0871, + "step": 23050 + }, + { + "epoch": 1.08, + "learning_rate": 1.6544761964478864e-05, + "loss": 0.0759, + "step": 23055 + }, + { + "epoch": 1.08, + "learning_rate": 1.6543978179424075e-05, + "loss": 0.1438, + "step": 23060 + }, + { + "epoch": 1.08, + "learning_rate": 1.654319439436929e-05, + "loss": 0.1683, + "step": 23065 + }, + { + "epoch": 1.08, + "learning_rate": 1.6542410609314503e-05, + "loss": 0.2168, + "step": 23070 + }, + { + "epoch": 1.08, + "learning_rate": 1.6541626824259716e-05, + "loss": 0.2413, + "step": 23075 + }, + { + "epoch": 1.08, + "learning_rate": 1.654084303920493e-05, + "loss": 0.3585, + "step": 23080 + }, + { + "epoch": 1.08, + "learning_rate": 1.6540059254150144e-05, + "loss": 0.2754, + "step": 23085 + }, + { + "epoch": 1.08, + "learning_rate": 1.6539275469095358e-05, + "loss": 0.0734, + "step": 23090 + }, + { + "epoch": 1.08, + "learning_rate": 1.653849168404057e-05, + "loss": 0.1821, + "step": 23095 + }, + { + "epoch": 1.08, + "learning_rate": 1.6537707898985783e-05, + "loss": 0.136, + "step": 23100 + }, + { + "epoch": 1.08, + "learning_rate": 1.6536924113930996e-05, + "loss": 0.1626, + "step": 23105 + }, + { + "epoch": 1.08, + "learning_rate": 1.653614032887621e-05, + "loss": 0.1497, + "step": 23110 + }, + { + "epoch": 1.08, + "learning_rate": 1.6535356543821424e-05, + "loss": 0.1505, + "step": 23115 + }, + { + "epoch": 1.08, + "learning_rate": 1.6534572758766638e-05, + "loss": 0.2225, + "step": 23120 + }, + { + "epoch": 1.08, + "learning_rate": 1.6533788973711852e-05, + "loss": 0.2445, + "step": 23125 + }, + { + "epoch": 1.08, + "learning_rate": 1.6533005188657063e-05, + "loss": 0.4143, + "step": 23130 + }, + { + "epoch": 1.08, + "learning_rate": 1.6532221403602277e-05, + "loss": 0.322, + "step": 23135 + }, + { + "epoch": 1.08, + "learning_rate": 1.653143761854749e-05, + "loss": 0.0707, + "step": 23140 + }, + { + "epoch": 1.08, + "learning_rate": 1.6530653833492704e-05, + "loss": 0.0468, + "step": 23145 + }, + { + "epoch": 1.08, + "learning_rate": 1.6529870048437918e-05, + "loss": 0.1199, + "step": 23150 + }, + { + "epoch": 1.08, + "learning_rate": 1.6529086263383132e-05, + "loss": 0.1203, + "step": 23155 + }, + { + "epoch": 1.08, + "learning_rate": 1.6528302478328343e-05, + "loss": 0.1562, + "step": 23160 + }, + { + "epoch": 1.08, + "learning_rate": 1.652751869327356e-05, + "loss": 0.1764, + "step": 23165 + }, + { + "epoch": 1.08, + "learning_rate": 1.652673490821877e-05, + "loss": 0.1951, + "step": 23170 + }, + { + "epoch": 1.08, + "learning_rate": 1.6525951123163984e-05, + "loss": 0.3135, + "step": 23175 + }, + { + "epoch": 1.08, + "learning_rate": 1.6525167338109198e-05, + "loss": 0.2562, + "step": 23180 + }, + { + "epoch": 1.08, + "learning_rate": 1.6524383553054412e-05, + "loss": 0.2707, + "step": 23185 + }, + { + "epoch": 1.08, + "learning_rate": 1.6523599767999626e-05, + "loss": 0.0834, + "step": 23190 + }, + { + "epoch": 1.08, + "learning_rate": 1.6522815982944837e-05, + "loss": 0.1467, + "step": 23195 + }, + { + "epoch": 1.08, + "learning_rate": 1.652203219789005e-05, + "loss": 0.093, + "step": 23200 + }, + { + "epoch": 1.08, + "learning_rate": 1.6521248412835264e-05, + "loss": 0.0902, + "step": 23205 + }, + { + "epoch": 1.08, + "learning_rate": 1.652046462778048e-05, + "loss": 0.2016, + "step": 23210 + }, + { + "epoch": 1.08, + "learning_rate": 1.6519680842725692e-05, + "loss": 0.2145, + "step": 23215 + }, + { + "epoch": 1.08, + "learning_rate": 1.6518897057670906e-05, + "loss": 0.2803, + "step": 23220 + }, + { + "epoch": 1.08, + "learning_rate": 1.651811327261612e-05, + "loss": 0.2825, + "step": 23225 + }, + { + "epoch": 1.08, + "learning_rate": 1.6517329487561334e-05, + "loss": 0.3829, + "step": 23230 + }, + { + "epoch": 1.08, + "learning_rate": 1.6516545702506544e-05, + "loss": 0.2447, + "step": 23235 + }, + { + "epoch": 1.08, + "learning_rate": 1.6515761917451762e-05, + "loss": 0.0754, + "step": 23240 + }, + { + "epoch": 1.08, + "learning_rate": 1.6514978132396972e-05, + "loss": 0.0598, + "step": 23245 + }, + { + "epoch": 1.08, + "learning_rate": 1.6514194347342186e-05, + "loss": 0.1389, + "step": 23250 + }, + { + "epoch": 1.09, + "learning_rate": 1.65134105622874e-05, + "loss": 0.156, + "step": 23255 + }, + { + "epoch": 1.09, + "learning_rate": 1.651262677723261e-05, + "loss": 0.1372, + "step": 23260 + }, + { + "epoch": 1.09, + "learning_rate": 1.6511842992177828e-05, + "loss": 0.2297, + "step": 23265 + }, + { + "epoch": 1.09, + "learning_rate": 1.651105920712304e-05, + "loss": 0.2115, + "step": 23270 + }, + { + "epoch": 1.09, + "learning_rate": 1.6510275422068252e-05, + "loss": 0.2469, + "step": 23275 + }, + { + "epoch": 1.09, + "learning_rate": 1.6509491637013466e-05, + "loss": 0.2814, + "step": 23280 + }, + { + "epoch": 1.09, + "learning_rate": 1.650870785195868e-05, + "loss": 0.3825, + "step": 23285 + }, + { + "epoch": 1.09, + "learning_rate": 1.6507924066903894e-05, + "loss": 0.0548, + "step": 23290 + }, + { + "epoch": 1.09, + "learning_rate": 1.6507140281849108e-05, + "loss": 0.0556, + "step": 23295 + }, + { + "epoch": 1.09, + "learning_rate": 1.650635649679432e-05, + "loss": 0.0974, + "step": 23300 + }, + { + "epoch": 1.09, + "learning_rate": 1.6505572711739536e-05, + "loss": 0.1694, + "step": 23305 + }, + { + "epoch": 1.09, + "learning_rate": 1.6504788926684746e-05, + "loss": 0.2936, + "step": 23310 + }, + { + "epoch": 1.09, + "learning_rate": 1.650400514162996e-05, + "loss": 0.1766, + "step": 23315 + }, + { + "epoch": 1.09, + "learning_rate": 1.6503221356575174e-05, + "loss": 0.1843, + "step": 23320 + }, + { + "epoch": 1.09, + "learning_rate": 1.6502437571520388e-05, + "loss": 0.3095, + "step": 23325 + }, + { + "epoch": 1.09, + "learning_rate": 1.6501653786465602e-05, + "loss": 0.3444, + "step": 23330 + }, + { + "epoch": 1.09, + "learning_rate": 1.6500870001410812e-05, + "loss": 0.2731, + "step": 23335 + }, + { + "epoch": 1.09, + "learning_rate": 1.650008621635603e-05, + "loss": 0.0252, + "step": 23340 + }, + { + "epoch": 1.09, + "learning_rate": 1.649930243130124e-05, + "loss": 0.1032, + "step": 23345 + }, + { + "epoch": 1.09, + "learning_rate": 1.6498518646246454e-05, + "loss": 0.1265, + "step": 23350 + }, + { + "epoch": 1.09, + "learning_rate": 1.6497734861191668e-05, + "loss": 0.1087, + "step": 23355 + }, + { + "epoch": 1.09, + "learning_rate": 1.6496951076136882e-05, + "loss": 0.1427, + "step": 23360 + }, + { + "epoch": 1.09, + "learning_rate": 1.6496167291082096e-05, + "loss": 0.2091, + "step": 23365 + }, + { + "epoch": 1.09, + "learning_rate": 1.649538350602731e-05, + "loss": 0.7426, + "step": 23370 + }, + { + "epoch": 1.09, + "learning_rate": 1.649459972097252e-05, + "loss": 0.2259, + "step": 23375 + }, + { + "epoch": 1.09, + "learning_rate": 1.6493815935917738e-05, + "loss": 0.4487, + "step": 23380 + }, + { + "epoch": 1.09, + "learning_rate": 1.6493032150862948e-05, + "loss": 0.2265, + "step": 23385 + }, + { + "epoch": 1.09, + "learning_rate": 1.6492248365808162e-05, + "loss": 0.0922, + "step": 23390 + }, + { + "epoch": 1.09, + "learning_rate": 1.6491464580753376e-05, + "loss": 0.1548, + "step": 23395 + }, + { + "epoch": 1.09, + "learning_rate": 1.649068079569859e-05, + "loss": 0.1354, + "step": 23400 + }, + { + "epoch": 1.09, + "learning_rate": 1.6489897010643804e-05, + "loss": 0.1246, + "step": 23405 + }, + { + "epoch": 1.09, + "learning_rate": 1.6489113225589014e-05, + "loss": 0.1558, + "step": 23410 + }, + { + "epoch": 1.09, + "learning_rate": 1.6488329440534228e-05, + "loss": 0.1481, + "step": 23415 + }, + { + "epoch": 1.09, + "learning_rate": 1.6487545655479442e-05, + "loss": 0.2439, + "step": 23420 + }, + { + "epoch": 1.09, + "learning_rate": 1.6486761870424656e-05, + "loss": 0.3442, + "step": 23425 + }, + { + "epoch": 1.09, + "learning_rate": 1.648597808536987e-05, + "loss": 0.3016, + "step": 23430 + }, + { + "epoch": 1.09, + "learning_rate": 1.6485194300315084e-05, + "loss": 0.2837, + "step": 23435 + }, + { + "epoch": 1.09, + "learning_rate": 1.6484410515260298e-05, + "loss": 0.0948, + "step": 23440 + }, + { + "epoch": 1.09, + "learning_rate": 1.648362673020551e-05, + "loss": 0.0681, + "step": 23445 + }, + { + "epoch": 1.09, + "learning_rate": 1.6482842945150722e-05, + "loss": 0.107, + "step": 23450 + }, + { + "epoch": 1.09, + "learning_rate": 1.6482059160095936e-05, + "loss": 0.1297, + "step": 23455 + }, + { + "epoch": 1.09, + "learning_rate": 1.648127537504115e-05, + "loss": 0.1361, + "step": 23460 + }, + { + "epoch": 1.09, + "learning_rate": 1.6480491589986364e-05, + "loss": 0.142, + "step": 23465 + }, + { + "epoch": 1.1, + "learning_rate": 1.6479707804931578e-05, + "loss": 0.2334, + "step": 23470 + }, + { + "epoch": 1.1, + "learning_rate": 1.6478924019876788e-05, + "loss": 0.277, + "step": 23475 + }, + { + "epoch": 1.1, + "learning_rate": 1.6478140234822006e-05, + "loss": 0.341, + "step": 23480 + }, + { + "epoch": 1.1, + "learning_rate": 1.6477356449767216e-05, + "loss": 0.3929, + "step": 23485 + }, + { + "epoch": 1.1, + "learning_rate": 1.647657266471243e-05, + "loss": 0.1686, + "step": 23490 + }, + { + "epoch": 1.1, + "learning_rate": 1.6475788879657644e-05, + "loss": 0.0867, + "step": 23495 + }, + { + "epoch": 1.1, + "learning_rate": 1.6475005094602858e-05, + "loss": 0.0441, + "step": 23500 + }, + { + "epoch": 1.1, + "learning_rate": 1.647422130954807e-05, + "loss": 0.1218, + "step": 23505 + }, + { + "epoch": 1.1, + "learning_rate": 1.6473437524493286e-05, + "loss": 0.1216, + "step": 23510 + }, + { + "epoch": 1.1, + "learning_rate": 1.6472653739438496e-05, + "loss": 0.2273, + "step": 23515 + }, + { + "epoch": 1.1, + "learning_rate": 1.647186995438371e-05, + "loss": 0.3123, + "step": 23520 + }, + { + "epoch": 1.1, + "learning_rate": 1.6471086169328924e-05, + "loss": 0.2573, + "step": 23525 + }, + { + "epoch": 1.1, + "learning_rate": 1.6470302384274138e-05, + "loss": 0.4518, + "step": 23530 + }, + { + "epoch": 1.1, + "learning_rate": 1.6469518599219352e-05, + "loss": 0.2675, + "step": 23535 + }, + { + "epoch": 1.1, + "learning_rate": 1.6468734814164566e-05, + "loss": 0.0457, + "step": 23540 + }, + { + "epoch": 1.1, + "learning_rate": 1.646795102910978e-05, + "loss": 0.0892, + "step": 23545 + }, + { + "epoch": 1.1, + "learning_rate": 1.646716724405499e-05, + "loss": 0.07, + "step": 23550 + }, + { + "epoch": 1.1, + "learning_rate": 1.6466383459000207e-05, + "loss": 0.1443, + "step": 23555 + }, + { + "epoch": 1.1, + "learning_rate": 1.6465599673945418e-05, + "loss": 0.1848, + "step": 23560 + }, + { + "epoch": 1.1, + "learning_rate": 1.6464815888890632e-05, + "loss": 0.1461, + "step": 23565 + }, + { + "epoch": 1.1, + "learning_rate": 1.6464032103835846e-05, + "loss": 0.1837, + "step": 23570 + }, + { + "epoch": 1.1, + "learning_rate": 1.646324831878106e-05, + "loss": 0.178, + "step": 23575 + }, + { + "epoch": 1.1, + "learning_rate": 1.6462464533726273e-05, + "loss": 0.5349, + "step": 23580 + }, + { + "epoch": 1.1, + "learning_rate": 1.6461680748671484e-05, + "loss": 0.3265, + "step": 23585 + }, + { + "epoch": 1.1, + "learning_rate": 1.6460896963616698e-05, + "loss": 0.0742, + "step": 23590 + }, + { + "epoch": 1.1, + "learning_rate": 1.6460113178561912e-05, + "loss": 0.0978, + "step": 23595 + }, + { + "epoch": 1.1, + "learning_rate": 1.6459329393507126e-05, + "loss": 0.1308, + "step": 23600 + }, + { + "epoch": 1.1, + "learning_rate": 1.645854560845234e-05, + "loss": 0.1915, + "step": 23605 + }, + { + "epoch": 1.1, + "learning_rate": 1.6457761823397554e-05, + "loss": 0.094, + "step": 23610 + }, + { + "epoch": 1.1, + "learning_rate": 1.6456978038342764e-05, + "loss": 0.1869, + "step": 23615 + }, + { + "epoch": 1.1, + "learning_rate": 1.645619425328798e-05, + "loss": 0.1502, + "step": 23620 + }, + { + "epoch": 1.1, + "learning_rate": 1.6455410468233192e-05, + "loss": 0.2174, + "step": 23625 + }, + { + "epoch": 1.1, + "learning_rate": 1.6454626683178406e-05, + "loss": 0.4411, + "step": 23630 + }, + { + "epoch": 1.1, + "learning_rate": 1.645384289812362e-05, + "loss": 0.2565, + "step": 23635 + }, + { + "epoch": 1.1, + "learning_rate": 1.6453059113068834e-05, + "loss": 0.0362, + "step": 23640 + }, + { + "epoch": 1.1, + "learning_rate": 1.6452275328014047e-05, + "loss": 0.0526, + "step": 23645 + }, + { + "epoch": 1.1, + "learning_rate": 1.6451491542959258e-05, + "loss": 0.1324, + "step": 23650 + }, + { + "epoch": 1.1, + "learning_rate": 1.6450707757904475e-05, + "loss": 0.0992, + "step": 23655 + }, + { + "epoch": 1.1, + "learning_rate": 1.6449923972849686e-05, + "loss": 0.1667, + "step": 23660 + }, + { + "epoch": 1.1, + "learning_rate": 1.64491401877949e-05, + "loss": 0.2788, + "step": 23665 + }, + { + "epoch": 1.1, + "learning_rate": 1.6448356402740114e-05, + "loss": 0.2025, + "step": 23670 + }, + { + "epoch": 1.1, + "learning_rate": 1.6447572617685328e-05, + "loss": 0.2577, + "step": 23675 + }, + { + "epoch": 1.1, + "learning_rate": 1.644678883263054e-05, + "loss": 0.2843, + "step": 23680 + }, + { + "epoch": 1.11, + "learning_rate": 1.6446005047575755e-05, + "loss": 0.2967, + "step": 23685 + }, + { + "epoch": 1.11, + "learning_rate": 1.6445221262520966e-05, + "loss": 0.0835, + "step": 23690 + }, + { + "epoch": 1.11, + "learning_rate": 1.6444437477466183e-05, + "loss": 0.0873, + "step": 23695 + }, + { + "epoch": 1.11, + "learning_rate": 1.6443653692411394e-05, + "loss": 0.1089, + "step": 23700 + }, + { + "epoch": 1.11, + "learning_rate": 1.6442869907356608e-05, + "loss": 0.1707, + "step": 23705 + }, + { + "epoch": 1.11, + "learning_rate": 1.644208612230182e-05, + "loss": 0.191, + "step": 23710 + }, + { + "epoch": 1.11, + "learning_rate": 1.6441302337247035e-05, + "loss": 0.124, + "step": 23715 + }, + { + "epoch": 1.11, + "learning_rate": 1.644051855219225e-05, + "loss": 0.2111, + "step": 23720 + }, + { + "epoch": 1.11, + "learning_rate": 1.643973476713746e-05, + "loss": 0.1918, + "step": 23725 + }, + { + "epoch": 1.11, + "learning_rate": 1.6438950982082674e-05, + "loss": 0.4829, + "step": 23730 + }, + { + "epoch": 1.11, + "learning_rate": 1.6438167197027888e-05, + "loss": 0.3964, + "step": 23735 + }, + { + "epoch": 1.11, + "learning_rate": 1.64373834119731e-05, + "loss": 0.0372, + "step": 23740 + }, + { + "epoch": 1.11, + "learning_rate": 1.6436599626918315e-05, + "loss": 0.0559, + "step": 23745 + }, + { + "epoch": 1.11, + "learning_rate": 1.643581584186353e-05, + "loss": 0.1288, + "step": 23750 + }, + { + "epoch": 1.11, + "learning_rate": 1.6435032056808743e-05, + "loss": 0.1367, + "step": 23755 + }, + { + "epoch": 1.11, + "learning_rate": 1.6434248271753957e-05, + "loss": 0.0864, + "step": 23760 + }, + { + "epoch": 1.11, + "learning_rate": 1.6433464486699168e-05, + "loss": 0.1882, + "step": 23765 + }, + { + "epoch": 1.11, + "learning_rate": 1.6432680701644385e-05, + "loss": 0.2035, + "step": 23770 + }, + { + "epoch": 1.11, + "learning_rate": 1.6431896916589595e-05, + "loss": 0.3376, + "step": 23775 + }, + { + "epoch": 1.11, + "learning_rate": 1.643111313153481e-05, + "loss": 0.5252, + "step": 23780 + }, + { + "epoch": 1.11, + "learning_rate": 1.6430329346480023e-05, + "loss": 0.2832, + "step": 23785 + }, + { + "epoch": 1.11, + "learning_rate": 1.6429545561425234e-05, + "loss": 0.0732, + "step": 23790 + }, + { + "epoch": 1.11, + "learning_rate": 1.642876177637045e-05, + "loss": 0.0599, + "step": 23795 + }, + { + "epoch": 1.11, + "learning_rate": 1.642797799131566e-05, + "loss": 0.1289, + "step": 23800 + }, + { + "epoch": 1.11, + "learning_rate": 1.6427194206260876e-05, + "loss": 0.1547, + "step": 23805 + }, + { + "epoch": 1.11, + "learning_rate": 1.642641042120609e-05, + "loss": 0.0893, + "step": 23810 + }, + { + "epoch": 1.11, + "learning_rate": 1.6425626636151303e-05, + "loss": 0.1896, + "step": 23815 + }, + { + "epoch": 1.11, + "learning_rate": 1.6424842851096517e-05, + "loss": 0.2018, + "step": 23820 + }, + { + "epoch": 1.11, + "learning_rate": 1.642405906604173e-05, + "loss": 0.2314, + "step": 23825 + }, + { + "epoch": 1.11, + "learning_rate": 1.642327528098694e-05, + "loss": 0.6314, + "step": 23830 + }, + { + "epoch": 1.11, + "learning_rate": 1.642249149593216e-05, + "loss": 0.2099, + "step": 23835 + }, + { + "epoch": 1.11, + "learning_rate": 1.642170771087737e-05, + "loss": 0.0935, + "step": 23840 + }, + { + "epoch": 1.11, + "learning_rate": 1.6420923925822583e-05, + "loss": 0.0824, + "step": 23845 + }, + { + "epoch": 1.11, + "learning_rate": 1.6420140140767797e-05, + "loss": 0.1094, + "step": 23850 + }, + { + "epoch": 1.11, + "learning_rate": 1.641935635571301e-05, + "loss": 0.1203, + "step": 23855 + }, + { + "epoch": 1.11, + "learning_rate": 1.6418572570658225e-05, + "loss": 0.0716, + "step": 23860 + }, + { + "epoch": 1.11, + "learning_rate": 1.6417788785603436e-05, + "loss": 0.0886, + "step": 23865 + }, + { + "epoch": 1.11, + "learning_rate": 1.6417005000548653e-05, + "loss": 0.2079, + "step": 23870 + }, + { + "epoch": 1.11, + "learning_rate": 1.6416221215493863e-05, + "loss": 0.2192, + "step": 23875 + }, + { + "epoch": 1.11, + "learning_rate": 1.6415437430439077e-05, + "loss": 0.1609, + "step": 23880 + }, + { + "epoch": 1.11, + "learning_rate": 1.641465364538429e-05, + "loss": 0.427, + "step": 23885 + }, + { + "epoch": 1.11, + "learning_rate": 1.6413869860329505e-05, + "loss": 0.0594, + "step": 23890 + }, + { + "epoch": 1.11, + "learning_rate": 1.641308607527472e-05, + "loss": 0.0976, + "step": 23895 + }, + { + "epoch": 1.12, + "learning_rate": 1.6412302290219933e-05, + "loss": 0.1183, + "step": 23900 + }, + { + "epoch": 1.12, + "learning_rate": 1.6411518505165143e-05, + "loss": 0.1117, + "step": 23905 + }, + { + "epoch": 1.12, + "learning_rate": 1.6410734720110357e-05, + "loss": 0.1788, + "step": 23910 + }, + { + "epoch": 1.12, + "learning_rate": 1.640995093505557e-05, + "loss": 0.1834, + "step": 23915 + }, + { + "epoch": 1.12, + "learning_rate": 1.6409167150000785e-05, + "loss": 0.2126, + "step": 23920 + }, + { + "epoch": 1.12, + "learning_rate": 1.6408383364946e-05, + "loss": 0.4336, + "step": 23925 + }, + { + "epoch": 1.12, + "learning_rate": 1.6407599579891213e-05, + "loss": 0.3505, + "step": 23930 + }, + { + "epoch": 1.12, + "learning_rate": 1.6406815794836427e-05, + "loss": 0.3065, + "step": 23935 + }, + { + "epoch": 1.12, + "learning_rate": 1.6406032009781637e-05, + "loss": 0.0399, + "step": 23940 + }, + { + "epoch": 1.12, + "learning_rate": 1.640524822472685e-05, + "loss": 0.0772, + "step": 23945 + }, + { + "epoch": 1.12, + "learning_rate": 1.6404464439672065e-05, + "loss": 0.0448, + "step": 23950 + }, + { + "epoch": 1.12, + "learning_rate": 1.640368065461728e-05, + "loss": 0.0798, + "step": 23955 + }, + { + "epoch": 1.12, + "learning_rate": 1.6402896869562493e-05, + "loss": 0.0925, + "step": 23960 + }, + { + "epoch": 1.12, + "learning_rate": 1.6402113084507707e-05, + "loss": 0.1361, + "step": 23965 + }, + { + "epoch": 1.12, + "learning_rate": 1.640132929945292e-05, + "loss": 0.1452, + "step": 23970 + }, + { + "epoch": 1.12, + "learning_rate": 1.640054551439813e-05, + "loss": 0.2325, + "step": 23975 + }, + { + "epoch": 1.12, + "learning_rate": 1.6399761729343345e-05, + "loss": 0.3527, + "step": 23980 + }, + { + "epoch": 1.12, + "learning_rate": 1.639897794428856e-05, + "loss": 0.2864, + "step": 23985 + }, + { + "epoch": 1.12, + "learning_rate": 1.6398194159233773e-05, + "loss": 0.0622, + "step": 23990 + }, + { + "epoch": 1.12, + "learning_rate": 1.6397410374178987e-05, + "loss": 0.0723, + "step": 23995 + }, + { + "epoch": 1.12, + "learning_rate": 1.63966265891242e-05, + "loss": 0.1408, + "step": 24000 + }, + { + "epoch": 1.12, + "learning_rate": 1.639584280406941e-05, + "loss": 0.1051, + "step": 24005 + }, + { + "epoch": 1.12, + "learning_rate": 1.639505901901463e-05, + "loss": 0.1084, + "step": 24010 + }, + { + "epoch": 1.12, + "learning_rate": 1.639427523395984e-05, + "loss": 0.2103, + "step": 24015 + }, + { + "epoch": 1.12, + "learning_rate": 1.6393491448905053e-05, + "loss": 0.2023, + "step": 24020 + }, + { + "epoch": 1.12, + "learning_rate": 1.6392707663850267e-05, + "loss": 0.2348, + "step": 24025 + }, + { + "epoch": 1.12, + "learning_rate": 1.639192387879548e-05, + "loss": 0.3672, + "step": 24030 + }, + { + "epoch": 1.12, + "learning_rate": 1.6391140093740695e-05, + "loss": 0.3526, + "step": 24035 + }, + { + "epoch": 1.12, + "learning_rate": 1.6390356308685905e-05, + "loss": 0.0778, + "step": 24040 + }, + { + "epoch": 1.12, + "learning_rate": 1.638957252363112e-05, + "loss": 0.1023, + "step": 24045 + }, + { + "epoch": 1.12, + "learning_rate": 1.6388788738576333e-05, + "loss": 0.0873, + "step": 24050 + }, + { + "epoch": 1.12, + "learning_rate": 1.6388004953521547e-05, + "loss": 0.1086, + "step": 24055 + }, + { + "epoch": 1.12, + "learning_rate": 1.638722116846676e-05, + "loss": 0.1551, + "step": 24060 + }, + { + "epoch": 1.12, + "learning_rate": 1.6386437383411975e-05, + "loss": 0.1625, + "step": 24065 + }, + { + "epoch": 1.12, + "learning_rate": 1.638565359835719e-05, + "loss": 0.2734, + "step": 24070 + }, + { + "epoch": 1.12, + "learning_rate": 1.6384869813302403e-05, + "loss": 0.2265, + "step": 24075 + }, + { + "epoch": 1.12, + "learning_rate": 1.6384086028247613e-05, + "loss": 0.2489, + "step": 24080 + }, + { + "epoch": 1.12, + "learning_rate": 1.638330224319283e-05, + "loss": 0.3076, + "step": 24085 + }, + { + "epoch": 1.12, + "learning_rate": 1.638251845813804e-05, + "loss": 0.1064, + "step": 24090 + }, + { + "epoch": 1.12, + "learning_rate": 1.6381734673083255e-05, + "loss": 0.0911, + "step": 24095 + }, + { + "epoch": 1.12, + "learning_rate": 1.638095088802847e-05, + "loss": 0.0852, + "step": 24100 + }, + { + "epoch": 1.12, + "learning_rate": 1.638016710297368e-05, + "loss": 0.1027, + "step": 24105 + }, + { + "epoch": 1.13, + "learning_rate": 1.6379383317918897e-05, + "loss": 0.1949, + "step": 24110 + }, + { + "epoch": 1.13, + "learning_rate": 1.6378599532864107e-05, + "loss": 0.1723, + "step": 24115 + }, + { + "epoch": 1.13, + "learning_rate": 1.637781574780932e-05, + "loss": 0.207, + "step": 24120 + }, + { + "epoch": 1.13, + "learning_rate": 1.6377031962754535e-05, + "loss": 0.2483, + "step": 24125 + }, + { + "epoch": 1.13, + "learning_rate": 1.637624817769975e-05, + "loss": 0.4968, + "step": 24130 + }, + { + "epoch": 1.13, + "learning_rate": 1.6375464392644963e-05, + "loss": 0.3258, + "step": 24135 + }, + { + "epoch": 1.13, + "learning_rate": 1.6374680607590177e-05, + "loss": 0.0432, + "step": 24140 + }, + { + "epoch": 1.13, + "learning_rate": 1.6373896822535387e-05, + "loss": 0.0878, + "step": 24145 + }, + { + "epoch": 1.13, + "learning_rate": 1.6373113037480605e-05, + "loss": 0.163, + "step": 24150 + }, + { + "epoch": 1.13, + "learning_rate": 1.6372329252425815e-05, + "loss": 0.2356, + "step": 24155 + }, + { + "epoch": 1.13, + "learning_rate": 1.637154546737103e-05, + "loss": 0.1573, + "step": 24160 + }, + { + "epoch": 1.13, + "learning_rate": 1.6370761682316243e-05, + "loss": 0.1774, + "step": 24165 + }, + { + "epoch": 1.13, + "learning_rate": 1.6369977897261457e-05, + "loss": 0.1667, + "step": 24170 + }, + { + "epoch": 1.13, + "learning_rate": 1.636919411220667e-05, + "loss": 0.1914, + "step": 24175 + }, + { + "epoch": 1.13, + "learning_rate": 1.636841032715188e-05, + "loss": 0.3366, + "step": 24180 + }, + { + "epoch": 1.13, + "learning_rate": 1.63676265420971e-05, + "loss": 0.389, + "step": 24185 + }, + { + "epoch": 1.13, + "learning_rate": 1.636684275704231e-05, + "loss": 0.0601, + "step": 24190 + }, + { + "epoch": 1.13, + "learning_rate": 1.6366058971987523e-05, + "loss": 0.0784, + "step": 24195 + }, + { + "epoch": 1.13, + "learning_rate": 1.6365275186932737e-05, + "loss": 0.1066, + "step": 24200 + }, + { + "epoch": 1.13, + "learning_rate": 1.636449140187795e-05, + "loss": 0.1525, + "step": 24205 + }, + { + "epoch": 1.13, + "learning_rate": 1.6363707616823165e-05, + "loss": 0.1988, + "step": 24210 + }, + { + "epoch": 1.13, + "learning_rate": 1.636292383176838e-05, + "loss": 0.252, + "step": 24215 + }, + { + "epoch": 1.13, + "learning_rate": 1.636214004671359e-05, + "loss": 0.3322, + "step": 24220 + }, + { + "epoch": 1.13, + "learning_rate": 1.6361356261658806e-05, + "loss": 0.2519, + "step": 24225 + }, + { + "epoch": 1.13, + "learning_rate": 1.6360572476604017e-05, + "loss": 0.2909, + "step": 24230 + }, + { + "epoch": 1.13, + "learning_rate": 1.635978869154923e-05, + "loss": 0.3463, + "step": 24235 + }, + { + "epoch": 1.13, + "learning_rate": 1.6359004906494445e-05, + "loss": 0.0224, + "step": 24240 + }, + { + "epoch": 1.13, + "learning_rate": 1.635822112143966e-05, + "loss": 0.0675, + "step": 24245 + }, + { + "epoch": 1.13, + "learning_rate": 1.6357437336384872e-05, + "loss": 0.1372, + "step": 24250 + }, + { + "epoch": 1.13, + "learning_rate": 1.6356653551330083e-05, + "loss": 0.1832, + "step": 24255 + }, + { + "epoch": 1.13, + "learning_rate": 1.6355869766275297e-05, + "loss": 0.1108, + "step": 24260 + }, + { + "epoch": 1.13, + "learning_rate": 1.635508598122051e-05, + "loss": 0.1944, + "step": 24265 + }, + { + "epoch": 1.13, + "learning_rate": 1.6354302196165725e-05, + "loss": 0.1828, + "step": 24270 + }, + { + "epoch": 1.13, + "learning_rate": 1.635351841111094e-05, + "loss": 0.2393, + "step": 24275 + }, + { + "epoch": 1.13, + "learning_rate": 1.6352734626056153e-05, + "loss": 0.4225, + "step": 24280 + }, + { + "epoch": 1.13, + "learning_rate": 1.6351950841001366e-05, + "loss": 0.3735, + "step": 24285 + }, + { + "epoch": 1.13, + "learning_rate": 1.635116705594658e-05, + "loss": 0.0227, + "step": 24290 + }, + { + "epoch": 1.13, + "learning_rate": 1.635038327089179e-05, + "loss": 0.0606, + "step": 24295 + }, + { + "epoch": 1.13, + "learning_rate": 1.6349599485837005e-05, + "loss": 0.0897, + "step": 24300 + }, + { + "epoch": 1.13, + "learning_rate": 1.634881570078222e-05, + "loss": 0.1893, + "step": 24305 + }, + { + "epoch": 1.13, + "learning_rate": 1.6348031915727433e-05, + "loss": 0.1348, + "step": 24310 + }, + { + "epoch": 1.13, + "learning_rate": 1.6347248130672646e-05, + "loss": 0.1975, + "step": 24315 + }, + { + "epoch": 1.13, + "learning_rate": 1.6346464345617857e-05, + "loss": 0.2427, + "step": 24320 + }, + { + "epoch": 1.14, + "learning_rate": 1.6345680560563074e-05, + "loss": 0.2059, + "step": 24325 + }, + { + "epoch": 1.14, + "learning_rate": 1.6344896775508285e-05, + "loss": 0.4388, + "step": 24330 + }, + { + "epoch": 1.14, + "learning_rate": 1.63441129904535e-05, + "loss": 0.2305, + "step": 24335 + }, + { + "epoch": 1.14, + "learning_rate": 1.6343329205398713e-05, + "loss": 0.0722, + "step": 24340 + }, + { + "epoch": 1.14, + "learning_rate": 1.6342545420343927e-05, + "loss": 0.1261, + "step": 24345 + }, + { + "epoch": 1.14, + "learning_rate": 1.634176163528914e-05, + "loss": 0.0991, + "step": 24350 + }, + { + "epoch": 1.14, + "learning_rate": 1.6340977850234354e-05, + "loss": 0.1313, + "step": 24355 + }, + { + "epoch": 1.14, + "learning_rate": 1.6340194065179565e-05, + "loss": 0.1284, + "step": 24360 + }, + { + "epoch": 1.14, + "learning_rate": 1.633941028012478e-05, + "loss": 0.2371, + "step": 24365 + }, + { + "epoch": 1.14, + "learning_rate": 1.6338626495069993e-05, + "loss": 0.2578, + "step": 24370 + }, + { + "epoch": 1.14, + "learning_rate": 1.6337842710015207e-05, + "loss": 0.2852, + "step": 24375 + }, + { + "epoch": 1.14, + "learning_rate": 1.633705892496042e-05, + "loss": 0.3184, + "step": 24380 + }, + { + "epoch": 1.14, + "learning_rate": 1.6336275139905634e-05, + "loss": 0.2727, + "step": 24385 + }, + { + "epoch": 1.14, + "learning_rate": 1.6335491354850848e-05, + "loss": 0.0995, + "step": 24390 + }, + { + "epoch": 1.14, + "learning_rate": 1.633470756979606e-05, + "loss": 0.075, + "step": 24395 + }, + { + "epoch": 1.14, + "learning_rate": 1.6333923784741276e-05, + "loss": 0.1169, + "step": 24400 + }, + { + "epoch": 1.14, + "learning_rate": 1.6333139999686487e-05, + "loss": 0.1186, + "step": 24405 + }, + { + "epoch": 1.14, + "learning_rate": 1.63323562146317e-05, + "loss": 0.1519, + "step": 24410 + }, + { + "epoch": 1.14, + "learning_rate": 1.6331572429576914e-05, + "loss": 0.1897, + "step": 24415 + }, + { + "epoch": 1.14, + "learning_rate": 1.633078864452213e-05, + "loss": 0.2469, + "step": 24420 + }, + { + "epoch": 1.14, + "learning_rate": 1.6330004859467342e-05, + "loss": 0.2405, + "step": 24425 + }, + { + "epoch": 1.14, + "learning_rate": 1.6329221074412553e-05, + "loss": 0.5484, + "step": 24430 + }, + { + "epoch": 1.14, + "learning_rate": 1.6328437289357767e-05, + "loss": 0.3294, + "step": 24435 + }, + { + "epoch": 1.14, + "learning_rate": 1.632765350430298e-05, + "loss": 0.0427, + "step": 24440 + }, + { + "epoch": 1.14, + "learning_rate": 1.6326869719248194e-05, + "loss": 0.1192, + "step": 24445 + }, + { + "epoch": 1.14, + "learning_rate": 1.632608593419341e-05, + "loss": 0.0817, + "step": 24450 + }, + { + "epoch": 1.14, + "learning_rate": 1.6325302149138622e-05, + "loss": 0.0814, + "step": 24455 + }, + { + "epoch": 1.14, + "learning_rate": 1.6324518364083833e-05, + "loss": 0.1558, + "step": 24460 + }, + { + "epoch": 1.14, + "learning_rate": 1.632373457902905e-05, + "loss": 0.149, + "step": 24465 + }, + { + "epoch": 1.14, + "learning_rate": 1.632295079397426e-05, + "loss": 0.1717, + "step": 24470 + }, + { + "epoch": 1.14, + "learning_rate": 1.6322167008919475e-05, + "loss": 0.2424, + "step": 24475 + }, + { + "epoch": 1.14, + "learning_rate": 1.632138322386469e-05, + "loss": 0.2916, + "step": 24480 + }, + { + "epoch": 1.14, + "learning_rate": 1.6320599438809902e-05, + "loss": 0.3675, + "step": 24485 + }, + { + "epoch": 1.14, + "learning_rate": 1.6319815653755116e-05, + "loss": 0.1092, + "step": 24490 + }, + { + "epoch": 1.14, + "learning_rate": 1.6319031868700327e-05, + "loss": 0.086, + "step": 24495 + }, + { + "epoch": 1.14, + "learning_rate": 1.6318248083645544e-05, + "loss": 0.1483, + "step": 24500 + }, + { + "epoch": 1.14, + "learning_rate": 1.6317464298590755e-05, + "loss": 0.1295, + "step": 24505 + }, + { + "epoch": 1.14, + "learning_rate": 1.631668051353597e-05, + "loss": 0.1388, + "step": 24510 + }, + { + "epoch": 1.14, + "learning_rate": 1.6315896728481182e-05, + "loss": 0.2248, + "step": 24515 + }, + { + "epoch": 1.14, + "learning_rate": 1.6315112943426396e-05, + "loss": 0.229, + "step": 24520 + }, + { + "epoch": 1.14, + "learning_rate": 1.631432915837161e-05, + "loss": 0.2604, + "step": 24525 + }, + { + "epoch": 1.14, + "learning_rate": 1.6313545373316824e-05, + "loss": 0.3488, + "step": 24530 + }, + { + "epoch": 1.14, + "learning_rate": 1.6312761588262035e-05, + "loss": 0.2816, + "step": 24535 + }, + { + "epoch": 1.15, + "learning_rate": 1.6311977803207252e-05, + "loss": 0.0694, + "step": 24540 + }, + { + "epoch": 1.15, + "learning_rate": 1.6311194018152462e-05, + "loss": 0.0728, + "step": 24545 + }, + { + "epoch": 1.15, + "learning_rate": 1.6310410233097676e-05, + "loss": 0.058, + "step": 24550 + }, + { + "epoch": 1.15, + "learning_rate": 1.630962644804289e-05, + "loss": 0.0766, + "step": 24555 + }, + { + "epoch": 1.15, + "learning_rate": 1.6308842662988104e-05, + "loss": 0.1177, + "step": 24560 + }, + { + "epoch": 1.15, + "learning_rate": 1.6308058877933318e-05, + "loss": 0.1314, + "step": 24565 + }, + { + "epoch": 1.15, + "learning_rate": 1.630727509287853e-05, + "loss": 0.1472, + "step": 24570 + }, + { + "epoch": 1.15, + "learning_rate": 1.6306491307823742e-05, + "loss": 0.3199, + "step": 24575 + }, + { + "epoch": 1.15, + "learning_rate": 1.6305707522768956e-05, + "loss": 0.3317, + "step": 24580 + }, + { + "epoch": 1.15, + "learning_rate": 1.630492373771417e-05, + "loss": 0.3662, + "step": 24585 + }, + { + "epoch": 1.15, + "learning_rate": 1.6304139952659384e-05, + "loss": 0.0684, + "step": 24590 + }, + { + "epoch": 1.15, + "learning_rate": 1.6303356167604598e-05, + "loss": 0.0825, + "step": 24595 + }, + { + "epoch": 1.15, + "learning_rate": 1.6302572382549812e-05, + "loss": 0.0647, + "step": 24600 + }, + { + "epoch": 1.15, + "learning_rate": 1.6301788597495026e-05, + "loss": 0.101, + "step": 24605 + }, + { + "epoch": 1.15, + "learning_rate": 1.6301004812440236e-05, + "loss": 0.1126, + "step": 24610 + }, + { + "epoch": 1.15, + "learning_rate": 1.6300221027385454e-05, + "loss": 0.1619, + "step": 24615 + }, + { + "epoch": 1.15, + "learning_rate": 1.6299437242330664e-05, + "loss": 0.248, + "step": 24620 + }, + { + "epoch": 1.15, + "learning_rate": 1.6298653457275878e-05, + "loss": 0.2743, + "step": 24625 + }, + { + "epoch": 1.15, + "learning_rate": 1.6297869672221092e-05, + "loss": 0.4154, + "step": 24630 + }, + { + "epoch": 1.15, + "learning_rate": 1.6297085887166303e-05, + "loss": 0.3563, + "step": 24635 + }, + { + "epoch": 1.15, + "learning_rate": 1.629630210211152e-05, + "loss": 0.0512, + "step": 24640 + }, + { + "epoch": 1.15, + "learning_rate": 1.629551831705673e-05, + "loss": 0.0548, + "step": 24645 + }, + { + "epoch": 1.15, + "learning_rate": 1.6294734532001944e-05, + "loss": 0.0815, + "step": 24650 + }, + { + "epoch": 1.15, + "learning_rate": 1.6293950746947158e-05, + "loss": 0.1276, + "step": 24655 + }, + { + "epoch": 1.15, + "learning_rate": 1.6293166961892372e-05, + "loss": 0.141, + "step": 24660 + }, + { + "epoch": 1.15, + "learning_rate": 1.6292383176837586e-05, + "loss": 0.2118, + "step": 24665 + }, + { + "epoch": 1.15, + "learning_rate": 1.62915993917828e-05, + "loss": 0.1749, + "step": 24670 + }, + { + "epoch": 1.15, + "learning_rate": 1.629081560672801e-05, + "loss": 0.2312, + "step": 24675 + }, + { + "epoch": 1.15, + "learning_rate": 1.6290031821673228e-05, + "loss": 0.5181, + "step": 24680 + }, + { + "epoch": 1.15, + "learning_rate": 1.6289248036618438e-05, + "loss": 0.2571, + "step": 24685 + }, + { + "epoch": 1.15, + "learning_rate": 1.6288464251563652e-05, + "loss": 0.0409, + "step": 24690 + }, + { + "epoch": 1.15, + "learning_rate": 1.6287680466508866e-05, + "loss": 0.0747, + "step": 24695 + }, + { + "epoch": 1.15, + "learning_rate": 1.628689668145408e-05, + "loss": 0.1181, + "step": 24700 + }, + { + "epoch": 1.15, + "learning_rate": 1.6286112896399294e-05, + "loss": 0.1395, + "step": 24705 + }, + { + "epoch": 1.15, + "learning_rate": 1.6285329111344504e-05, + "loss": 0.1435, + "step": 24710 + }, + { + "epoch": 1.15, + "learning_rate": 1.628454532628972e-05, + "loss": 0.174, + "step": 24715 + }, + { + "epoch": 1.15, + "learning_rate": 1.6283761541234932e-05, + "loss": 0.1197, + "step": 24720 + }, + { + "epoch": 1.15, + "learning_rate": 1.6282977756180146e-05, + "loss": 0.2038, + "step": 24725 + }, + { + "epoch": 1.15, + "learning_rate": 1.628219397112536e-05, + "loss": 0.3092, + "step": 24730 + }, + { + "epoch": 1.15, + "learning_rate": 1.6281410186070574e-05, + "loss": 0.4175, + "step": 24735 + }, + { + "epoch": 1.15, + "learning_rate": 1.6280626401015788e-05, + "loss": 0.0201, + "step": 24740 + }, + { + "epoch": 1.15, + "learning_rate": 1.6279842615961002e-05, + "loss": 0.1052, + "step": 24745 + }, + { + "epoch": 1.15, + "learning_rate": 1.6279058830906212e-05, + "loss": 0.1185, + "step": 24750 + }, + { + "epoch": 1.16, + "learning_rate": 1.6278275045851426e-05, + "loss": 0.1012, + "step": 24755 + }, + { + "epoch": 1.16, + "learning_rate": 1.627749126079664e-05, + "loss": 0.2098, + "step": 24760 + }, + { + "epoch": 1.16, + "learning_rate": 1.6276707475741854e-05, + "loss": 0.1507, + "step": 24765 + }, + { + "epoch": 1.16, + "learning_rate": 1.6275923690687068e-05, + "loss": 0.1518, + "step": 24770 + }, + { + "epoch": 1.16, + "learning_rate": 1.627513990563228e-05, + "loss": 0.2168, + "step": 24775 + }, + { + "epoch": 1.16, + "learning_rate": 1.6274356120577496e-05, + "loss": 0.3516, + "step": 24780 + }, + { + "epoch": 1.16, + "learning_rate": 1.6273572335522706e-05, + "loss": 0.3077, + "step": 24785 + }, + { + "epoch": 1.16, + "learning_rate": 1.627278855046792e-05, + "loss": 0.0342, + "step": 24790 + }, + { + "epoch": 1.16, + "learning_rate": 1.6272004765413134e-05, + "loss": 0.083, + "step": 24795 + }, + { + "epoch": 1.16, + "learning_rate": 1.6271220980358348e-05, + "loss": 0.0801, + "step": 24800 + }, + { + "epoch": 1.16, + "learning_rate": 1.6270437195303562e-05, + "loss": 0.1694, + "step": 24805 + }, + { + "epoch": 1.16, + "learning_rate": 1.6269653410248776e-05, + "loss": 0.1143, + "step": 24810 + }, + { + "epoch": 1.16, + "learning_rate": 1.626886962519399e-05, + "loss": 0.0924, + "step": 24815 + }, + { + "epoch": 1.16, + "learning_rate": 1.62680858401392e-05, + "loss": 0.237, + "step": 24820 + }, + { + "epoch": 1.16, + "learning_rate": 1.6267302055084414e-05, + "loss": 0.1925, + "step": 24825 + }, + { + "epoch": 1.16, + "learning_rate": 1.6266518270029628e-05, + "loss": 0.3285, + "step": 24830 + }, + { + "epoch": 1.16, + "learning_rate": 1.6265734484974842e-05, + "loss": 0.4066, + "step": 24835 + }, + { + "epoch": 1.16, + "learning_rate": 1.6264950699920056e-05, + "loss": 0.0913, + "step": 24840 + }, + { + "epoch": 1.16, + "learning_rate": 1.626416691486527e-05, + "loss": 0.09, + "step": 24845 + }, + { + "epoch": 1.16, + "learning_rate": 1.626338312981048e-05, + "loss": 0.1174, + "step": 24850 + }, + { + "epoch": 1.16, + "learning_rate": 1.6262599344755697e-05, + "loss": 0.0969, + "step": 24855 + }, + { + "epoch": 1.16, + "learning_rate": 1.6261815559700908e-05, + "loss": 0.1173, + "step": 24860 + }, + { + "epoch": 1.16, + "learning_rate": 1.6261031774646122e-05, + "loss": 0.222, + "step": 24865 + }, + { + "epoch": 1.16, + "learning_rate": 1.6260247989591336e-05, + "loss": 0.2767, + "step": 24870 + }, + { + "epoch": 1.16, + "learning_rate": 1.625946420453655e-05, + "loss": 0.1782, + "step": 24875 + }, + { + "epoch": 1.16, + "learning_rate": 1.6258680419481764e-05, + "loss": 0.2277, + "step": 24880 + }, + { + "epoch": 1.16, + "learning_rate": 1.6257896634426974e-05, + "loss": 0.2343, + "step": 24885 + }, + { + "epoch": 1.16, + "learning_rate": 1.6257112849372188e-05, + "loss": 0.3339, + "step": 24890 + }, + { + "epoch": 1.16, + "learning_rate": 1.6256329064317402e-05, + "loss": 0.0648, + "step": 24895 + }, + { + "epoch": 1.16, + "learning_rate": 1.6255545279262616e-05, + "loss": 0.1105, + "step": 24900 + }, + { + "epoch": 1.16, + "learning_rate": 1.625476149420783e-05, + "loss": 0.1184, + "step": 24905 + }, + { + "epoch": 1.16, + "learning_rate": 1.6253977709153044e-05, + "loss": 0.166, + "step": 24910 + }, + { + "epoch": 1.16, + "learning_rate": 1.6253193924098258e-05, + "loss": 0.098, + "step": 24915 + }, + { + "epoch": 1.16, + "learning_rate": 1.625241013904347e-05, + "loss": 0.2253, + "step": 24920 + }, + { + "epoch": 1.16, + "learning_rate": 1.6251626353988682e-05, + "loss": 0.2517, + "step": 24925 + }, + { + "epoch": 1.16, + "learning_rate": 1.62508425689339e-05, + "loss": 0.287, + "step": 24930 + }, + { + "epoch": 1.16, + "learning_rate": 1.625005878387911e-05, + "loss": 0.3426, + "step": 24935 + }, + { + "epoch": 1.16, + "learning_rate": 1.6249274998824324e-05, + "loss": 0.0747, + "step": 24940 + }, + { + "epoch": 1.16, + "learning_rate": 1.6248491213769538e-05, + "loss": 0.0584, + "step": 24945 + }, + { + "epoch": 1.16, + "learning_rate": 1.6247707428714748e-05, + "loss": 0.1144, + "step": 24950 + }, + { + "epoch": 1.16, + "learning_rate": 1.6246923643659965e-05, + "loss": 0.1062, + "step": 24955 + }, + { + "epoch": 1.16, + "learning_rate": 1.6246139858605176e-05, + "loss": 0.1777, + "step": 24960 + }, + { + "epoch": 1.16, + "learning_rate": 1.624535607355039e-05, + "loss": 0.1773, + "step": 24965 + }, + { + "epoch": 1.17, + "learning_rate": 1.6244572288495604e-05, + "loss": 0.2313, + "step": 24970 + }, + { + "epoch": 1.17, + "learning_rate": 1.6243788503440818e-05, + "loss": 0.2795, + "step": 24975 + }, + { + "epoch": 1.17, + "learning_rate": 1.624300471838603e-05, + "loss": 0.3885, + "step": 24980 + }, + { + "epoch": 1.17, + "learning_rate": 1.6242220933331245e-05, + "loss": 0.3908, + "step": 24985 + }, + { + "epoch": 1.17, + "learning_rate": 1.6241437148276456e-05, + "loss": 0.0605, + "step": 24990 + }, + { + "epoch": 1.17, + "learning_rate": 1.6240653363221673e-05, + "loss": 0.085, + "step": 24995 + }, + { + "epoch": 1.17, + "learning_rate": 1.6239869578166884e-05, + "loss": 0.1085, + "step": 25000 + }, + { + "epoch": 1.17, + "learning_rate": 1.6239085793112098e-05, + "loss": 0.0463, + "step": 25005 + }, + { + "epoch": 1.17, + "learning_rate": 1.623830200805731e-05, + "loss": 0.1531, + "step": 25010 + }, + { + "epoch": 1.17, + "learning_rate": 1.6237518223002526e-05, + "loss": 0.2199, + "step": 25015 + }, + { + "epoch": 1.17, + "learning_rate": 1.623673443794774e-05, + "loss": 0.2393, + "step": 25020 + }, + { + "epoch": 1.17, + "learning_rate": 1.623595065289295e-05, + "loss": 0.3733, + "step": 25025 + }, + { + "epoch": 1.17, + "learning_rate": 1.6235166867838167e-05, + "loss": 0.3225, + "step": 25030 + }, + { + "epoch": 1.17, + "learning_rate": 1.6234383082783378e-05, + "loss": 0.2117, + "step": 25035 + }, + { + "epoch": 1.17, + "learning_rate": 1.623359929772859e-05, + "loss": 0.0419, + "step": 25040 + }, + { + "epoch": 1.17, + "learning_rate": 1.6232815512673806e-05, + "loss": 0.1021, + "step": 25045 + }, + { + "epoch": 1.17, + "learning_rate": 1.623203172761902e-05, + "loss": 0.1235, + "step": 25050 + }, + { + "epoch": 1.17, + "learning_rate": 1.6231247942564233e-05, + "loss": 0.1249, + "step": 25055 + }, + { + "epoch": 1.17, + "learning_rate": 1.6230464157509447e-05, + "loss": 0.192, + "step": 25060 + }, + { + "epoch": 1.17, + "learning_rate": 1.6229680372454658e-05, + "loss": 0.1072, + "step": 25065 + }, + { + "epoch": 1.17, + "learning_rate": 1.6228896587399875e-05, + "loss": 0.1035, + "step": 25070 + }, + { + "epoch": 1.17, + "learning_rate": 1.6228112802345086e-05, + "loss": 0.2934, + "step": 25075 + }, + { + "epoch": 1.17, + "learning_rate": 1.62273290172903e-05, + "loss": 0.4639, + "step": 25080 + }, + { + "epoch": 1.17, + "learning_rate": 1.6226545232235513e-05, + "loss": 0.235, + "step": 25085 + }, + { + "epoch": 1.17, + "learning_rate": 1.6225761447180727e-05, + "loss": 0.0493, + "step": 25090 + }, + { + "epoch": 1.17, + "learning_rate": 1.622497766212594e-05, + "loss": 0.0572, + "step": 25095 + }, + { + "epoch": 1.17, + "learning_rate": 1.6224193877071152e-05, + "loss": 0.0987, + "step": 25100 + }, + { + "epoch": 1.17, + "learning_rate": 1.6223410092016366e-05, + "loss": 0.1056, + "step": 25105 + }, + { + "epoch": 1.17, + "learning_rate": 1.622262630696158e-05, + "loss": 0.1613, + "step": 25110 + }, + { + "epoch": 1.17, + "learning_rate": 1.6221842521906793e-05, + "loss": 0.1629, + "step": 25115 + }, + { + "epoch": 1.17, + "learning_rate": 1.6221058736852007e-05, + "loss": 0.2078, + "step": 25120 + }, + { + "epoch": 1.17, + "learning_rate": 1.622027495179722e-05, + "loss": 0.1947, + "step": 25125 + }, + { + "epoch": 1.17, + "learning_rate": 1.6219491166742435e-05, + "loss": 0.3467, + "step": 25130 + }, + { + "epoch": 1.17, + "learning_rate": 1.621870738168765e-05, + "loss": 0.2284, + "step": 25135 + }, + { + "epoch": 1.17, + "learning_rate": 1.621792359663286e-05, + "loss": 0.0624, + "step": 25140 + }, + { + "epoch": 1.17, + "learning_rate": 1.6217139811578074e-05, + "loss": 0.0692, + "step": 25145 + }, + { + "epoch": 1.17, + "learning_rate": 1.6216356026523287e-05, + "loss": 0.1222, + "step": 25150 + }, + { + "epoch": 1.17, + "learning_rate": 1.62155722414685e-05, + "loss": 0.1692, + "step": 25155 + }, + { + "epoch": 1.17, + "learning_rate": 1.6214788456413715e-05, + "loss": 0.1629, + "step": 25160 + }, + { + "epoch": 1.17, + "learning_rate": 1.6214004671358926e-05, + "loss": 0.1671, + "step": 25165 + }, + { + "epoch": 1.17, + "learning_rate": 1.6213220886304143e-05, + "loss": 0.2507, + "step": 25170 + }, + { + "epoch": 1.17, + "learning_rate": 1.6212437101249354e-05, + "loss": 0.2868, + "step": 25175 + }, + { + "epoch": 1.17, + "learning_rate": 1.6211653316194567e-05, + "loss": 0.3525, + "step": 25180 + }, + { + "epoch": 1.18, + "learning_rate": 1.621086953113978e-05, + "loss": 0.3272, + "step": 25185 + }, + { + "epoch": 1.18, + "learning_rate": 1.6210085746084995e-05, + "loss": 0.0452, + "step": 25190 + }, + { + "epoch": 1.18, + "learning_rate": 1.620930196103021e-05, + "loss": 0.0713, + "step": 25195 + }, + { + "epoch": 1.18, + "learning_rate": 1.6208518175975423e-05, + "loss": 0.0912, + "step": 25200 + }, + { + "epoch": 1.18, + "learning_rate": 1.6207734390920634e-05, + "loss": 0.1543, + "step": 25205 + }, + { + "epoch": 1.18, + "learning_rate": 1.6206950605865848e-05, + "loss": 0.1041, + "step": 25210 + }, + { + "epoch": 1.18, + "learning_rate": 1.620616682081106e-05, + "loss": 0.141, + "step": 25215 + }, + { + "epoch": 1.18, + "learning_rate": 1.6205383035756275e-05, + "loss": 0.1966, + "step": 25220 + }, + { + "epoch": 1.18, + "learning_rate": 1.620459925070149e-05, + "loss": 0.3014, + "step": 25225 + }, + { + "epoch": 1.18, + "learning_rate": 1.6203815465646703e-05, + "loss": 0.4792, + "step": 25230 + }, + { + "epoch": 1.18, + "learning_rate": 1.6203031680591917e-05, + "loss": 0.2327, + "step": 25235 + }, + { + "epoch": 1.18, + "learning_rate": 1.6202247895537128e-05, + "loss": 0.0362, + "step": 25240 + }, + { + "epoch": 1.18, + "learning_rate": 1.6201464110482345e-05, + "loss": 0.1217, + "step": 25245 + }, + { + "epoch": 1.18, + "learning_rate": 1.6200680325427555e-05, + "loss": 0.0556, + "step": 25250 + }, + { + "epoch": 1.18, + "learning_rate": 1.619989654037277e-05, + "loss": 0.1454, + "step": 25255 + }, + { + "epoch": 1.18, + "learning_rate": 1.6199112755317983e-05, + "loss": 0.1041, + "step": 25260 + }, + { + "epoch": 1.18, + "learning_rate": 1.6198328970263197e-05, + "loss": 0.1451, + "step": 25265 + }, + { + "epoch": 1.18, + "learning_rate": 1.619754518520841e-05, + "loss": 0.2237, + "step": 25270 + }, + { + "epoch": 1.18, + "learning_rate": 1.619691815716458e-05, + "loss": 0.2745, + "step": 25275 + }, + { + "epoch": 1.18, + "learning_rate": 1.6196134372109792e-05, + "loss": 0.4215, + "step": 25280 + }, + { + "epoch": 1.18, + "learning_rate": 1.619535058705501e-05, + "loss": 0.481, + "step": 25285 + }, + { + "epoch": 1.18, + "learning_rate": 1.619456680200022e-05, + "loss": 0.0337, + "step": 25290 + }, + { + "epoch": 1.18, + "learning_rate": 1.6193783016945434e-05, + "loss": 0.102, + "step": 25295 + }, + { + "epoch": 1.18, + "learning_rate": 1.6192999231890648e-05, + "loss": 0.0853, + "step": 25300 + }, + { + "epoch": 1.18, + "learning_rate": 1.619221544683586e-05, + "loss": 0.1298, + "step": 25305 + }, + { + "epoch": 1.18, + "learning_rate": 1.6191431661781075e-05, + "loss": 0.151, + "step": 25310 + }, + { + "epoch": 1.18, + "learning_rate": 1.619064787672629e-05, + "loss": 0.1743, + "step": 25315 + }, + { + "epoch": 1.18, + "learning_rate": 1.61898640916715e-05, + "loss": 0.247, + "step": 25320 + }, + { + "epoch": 1.18, + "learning_rate": 1.6189080306616717e-05, + "loss": 0.2403, + "step": 25325 + }, + { + "epoch": 1.18, + "learning_rate": 1.6188296521561928e-05, + "loss": 0.4411, + "step": 25330 + }, + { + "epoch": 1.18, + "learning_rate": 1.618751273650714e-05, + "loss": 0.2575, + "step": 25335 + }, + { + "epoch": 1.18, + "learning_rate": 1.6186728951452355e-05, + "loss": 0.0465, + "step": 25340 + }, + { + "epoch": 1.18, + "learning_rate": 1.6185945166397566e-05, + "loss": 0.0688, + "step": 25345 + }, + { + "epoch": 1.18, + "learning_rate": 1.6185161381342783e-05, + "loss": 0.1129, + "step": 25350 + }, + { + "epoch": 1.18, + "learning_rate": 1.6184377596287994e-05, + "loss": 0.1276, + "step": 25355 + }, + { + "epoch": 1.18, + "learning_rate": 1.6183593811233208e-05, + "loss": 0.0864, + "step": 25360 + }, + { + "epoch": 1.18, + "learning_rate": 1.618281002617842e-05, + "loss": 0.1639, + "step": 25365 + }, + { + "epoch": 1.18, + "learning_rate": 1.6182026241123636e-05, + "loss": 0.1556, + "step": 25370 + }, + { + "epoch": 1.18, + "learning_rate": 1.618124245606885e-05, + "loss": 0.3036, + "step": 25375 + }, + { + "epoch": 1.18, + "learning_rate": 1.6180458671014063e-05, + "loss": 0.3818, + "step": 25380 + }, + { + "epoch": 1.18, + "learning_rate": 1.6179674885959277e-05, + "loss": 0.2293, + "step": 25385 + }, + { + "epoch": 1.18, + "learning_rate": 1.617889110090449e-05, + "loss": 0.0748, + "step": 25390 + }, + { + "epoch": 1.18, + "learning_rate": 1.61781073158497e-05, + "loss": 0.1107, + "step": 25395 + }, + { + "epoch": 1.19, + "learning_rate": 1.6177323530794916e-05, + "loss": 0.166, + "step": 25400 + }, + { + "epoch": 1.19, + "learning_rate": 1.617653974574013e-05, + "loss": 0.0938, + "step": 25405 + }, + { + "epoch": 1.19, + "learning_rate": 1.6175755960685343e-05, + "loss": 0.1765, + "step": 25410 + }, + { + "epoch": 1.19, + "learning_rate": 1.6174972175630557e-05, + "loss": 0.1421, + "step": 25415 + }, + { + "epoch": 1.19, + "learning_rate": 1.6174188390575768e-05, + "loss": 0.169, + "step": 25420 + }, + { + "epoch": 1.19, + "learning_rate": 1.6173404605520985e-05, + "loss": 0.2901, + "step": 25425 + }, + { + "epoch": 1.19, + "learning_rate": 1.6172620820466196e-05, + "loss": 0.4629, + "step": 25430 + }, + { + "epoch": 1.19, + "learning_rate": 1.617183703541141e-05, + "loss": 0.4004, + "step": 25435 + }, + { + "epoch": 1.19, + "learning_rate": 1.6171053250356623e-05, + "loss": 0.0532, + "step": 25440 + }, + { + "epoch": 1.19, + "learning_rate": 1.6170269465301837e-05, + "loss": 0.0672, + "step": 25445 + }, + { + "epoch": 1.19, + "learning_rate": 1.616948568024705e-05, + "loss": 0.1277, + "step": 25450 + }, + { + "epoch": 1.19, + "learning_rate": 1.6168701895192265e-05, + "loss": 0.0931, + "step": 25455 + }, + { + "epoch": 1.19, + "learning_rate": 1.6167918110137476e-05, + "loss": 0.1628, + "step": 25460 + }, + { + "epoch": 1.19, + "learning_rate": 1.616713432508269e-05, + "loss": 0.1642, + "step": 25465 + }, + { + "epoch": 1.19, + "learning_rate": 1.6166350540027903e-05, + "loss": 0.1649, + "step": 25470 + }, + { + "epoch": 1.19, + "learning_rate": 1.6165566754973117e-05, + "loss": 0.2531, + "step": 25475 + }, + { + "epoch": 1.19, + "learning_rate": 1.616478296991833e-05, + "loss": 0.4141, + "step": 25480 + }, + { + "epoch": 1.19, + "learning_rate": 1.6163999184863545e-05, + "loss": 0.3355, + "step": 25485 + }, + { + "epoch": 1.19, + "learning_rate": 1.616321539980876e-05, + "loss": 0.0631, + "step": 25490 + }, + { + "epoch": 1.19, + "learning_rate": 1.616243161475397e-05, + "loss": 0.0915, + "step": 25495 + }, + { + "epoch": 1.19, + "learning_rate": 1.6161647829699187e-05, + "loss": 0.1369, + "step": 25500 + }, + { + "epoch": 1.19, + "learning_rate": 1.6160864044644397e-05, + "loss": 0.1394, + "step": 25505 + }, + { + "epoch": 1.19, + "learning_rate": 1.616008025958961e-05, + "loss": 0.096, + "step": 25510 + }, + { + "epoch": 1.19, + "learning_rate": 1.6159296474534825e-05, + "loss": 0.1094, + "step": 25515 + }, + { + "epoch": 1.19, + "learning_rate": 1.615851268948004e-05, + "loss": 0.1217, + "step": 25520 + }, + { + "epoch": 1.19, + "learning_rate": 1.6157728904425253e-05, + "loss": 0.2451, + "step": 25525 + }, + { + "epoch": 1.19, + "learning_rate": 1.6156945119370464e-05, + "loss": 0.2088, + "step": 25530 + }, + { + "epoch": 1.19, + "learning_rate": 1.6156161334315677e-05, + "loss": 0.2696, + "step": 25535 + }, + { + "epoch": 1.19, + "learning_rate": 1.615537754926089e-05, + "loss": 0.0968, + "step": 25540 + }, + { + "epoch": 1.19, + "learning_rate": 1.6154593764206105e-05, + "loss": 0.0515, + "step": 25545 + }, + { + "epoch": 1.19, + "learning_rate": 1.615380997915132e-05, + "loss": 0.0912, + "step": 25550 + }, + { + "epoch": 1.19, + "learning_rate": 1.6153026194096533e-05, + "loss": 0.1158, + "step": 25555 + }, + { + "epoch": 1.19, + "learning_rate": 1.6152242409041744e-05, + "loss": 0.1445, + "step": 25560 + }, + { + "epoch": 1.19, + "learning_rate": 1.615145862398696e-05, + "loss": 0.1769, + "step": 25565 + }, + { + "epoch": 1.19, + "learning_rate": 1.615067483893217e-05, + "loss": 0.1704, + "step": 25570 + }, + { + "epoch": 1.19, + "learning_rate": 1.6149891053877385e-05, + "loss": 0.2812, + "step": 25575 + }, + { + "epoch": 1.19, + "learning_rate": 1.61491072688226e-05, + "loss": 0.3944, + "step": 25580 + }, + { + "epoch": 1.19, + "learning_rate": 1.6148323483767813e-05, + "loss": 0.2521, + "step": 25585 + }, + { + "epoch": 1.19, + "learning_rate": 1.6147539698713027e-05, + "loss": 0.0945, + "step": 25590 + }, + { + "epoch": 1.19, + "learning_rate": 1.6146755913658238e-05, + "loss": 0.1083, + "step": 25595 + }, + { + "epoch": 1.19, + "learning_rate": 1.6145972128603455e-05, + "loss": 0.0934, + "step": 25600 + }, + { + "epoch": 1.19, + "learning_rate": 1.6145188343548665e-05, + "loss": 0.1016, + "step": 25605 + }, + { + "epoch": 1.19, + "learning_rate": 1.614440455849388e-05, + "loss": 0.1465, + "step": 25610 + }, + { + "epoch": 1.2, + "learning_rate": 1.6143620773439093e-05, + "loss": 0.1531, + "step": 25615 + }, + { + "epoch": 1.2, + "learning_rate": 1.6142836988384307e-05, + "loss": 0.1764, + "step": 25620 + }, + { + "epoch": 1.2, + "learning_rate": 1.614205320332952e-05, + "loss": 0.2381, + "step": 25625 + }, + { + "epoch": 1.2, + "learning_rate": 1.6141269418274735e-05, + "loss": 0.4556, + "step": 25630 + }, + { + "epoch": 1.2, + "learning_rate": 1.6140485633219945e-05, + "loss": 0.3126, + "step": 25635 + }, + { + "epoch": 1.2, + "learning_rate": 1.6139701848165163e-05, + "loss": 0.0332, + "step": 25640 + }, + { + "epoch": 1.2, + "learning_rate": 1.6138918063110373e-05, + "loss": 0.0406, + "step": 25645 + }, + { + "epoch": 1.2, + "learning_rate": 1.6138134278055587e-05, + "loss": 0.1147, + "step": 25650 + }, + { + "epoch": 1.2, + "learning_rate": 1.61373504930008e-05, + "loss": 0.1279, + "step": 25655 + }, + { + "epoch": 1.2, + "learning_rate": 1.613656670794601e-05, + "loss": 0.1488, + "step": 25660 + }, + { + "epoch": 1.2, + "learning_rate": 1.613578292289123e-05, + "loss": 0.0947, + "step": 25665 + }, + { + "epoch": 1.2, + "learning_rate": 1.613499913783644e-05, + "loss": 0.147, + "step": 25670 + }, + { + "epoch": 1.2, + "learning_rate": 1.6134215352781653e-05, + "loss": 0.2196, + "step": 25675 + }, + { + "epoch": 1.2, + "learning_rate": 1.6133431567726867e-05, + "loss": 0.3325, + "step": 25680 + }, + { + "epoch": 1.2, + "learning_rate": 1.613264778267208e-05, + "loss": 0.3548, + "step": 25685 + }, + { + "epoch": 1.2, + "learning_rate": 1.6131863997617295e-05, + "loss": 0.0497, + "step": 25690 + }, + { + "epoch": 1.2, + "learning_rate": 1.613108021256251e-05, + "loss": 0.0752, + "step": 25695 + }, + { + "epoch": 1.2, + "learning_rate": 1.6130296427507723e-05, + "loss": 0.0716, + "step": 25700 + }, + { + "epoch": 1.2, + "learning_rate": 1.6129512642452937e-05, + "loss": 0.1445, + "step": 25705 + }, + { + "epoch": 1.2, + "learning_rate": 1.6128728857398147e-05, + "loss": 0.1371, + "step": 25710 + }, + { + "epoch": 1.2, + "learning_rate": 1.6128101829354318e-05, + "loss": 0.1388, + "step": 25715 + }, + { + "epoch": 1.2, + "learning_rate": 1.6127318044299535e-05, + "loss": 0.1818, + "step": 25720 + }, + { + "epoch": 1.2, + "learning_rate": 1.6126534259244745e-05, + "loss": 0.1747, + "step": 25725 + }, + { + "epoch": 1.2, + "learning_rate": 1.612575047418996e-05, + "loss": 0.2925, + "step": 25730 + }, + { + "epoch": 1.2, + "learning_rate": 1.6124966689135173e-05, + "loss": 0.212, + "step": 25735 + }, + { + "epoch": 1.2, + "learning_rate": 1.6124182904080384e-05, + "loss": 0.07, + "step": 25740 + }, + { + "epoch": 1.2, + "learning_rate": 1.61233991190256e-05, + "loss": 0.0871, + "step": 25745 + }, + { + "epoch": 1.2, + "learning_rate": 1.612261533397081e-05, + "loss": 0.1572, + "step": 25750 + }, + { + "epoch": 1.2, + "learning_rate": 1.6121831548916026e-05, + "loss": 0.1053, + "step": 25755 + }, + { + "epoch": 1.2, + "learning_rate": 1.612104776386124e-05, + "loss": 0.1303, + "step": 25760 + }, + { + "epoch": 1.2, + "learning_rate": 1.6120263978806453e-05, + "loss": 0.2018, + "step": 25765 + }, + { + "epoch": 1.2, + "learning_rate": 1.6119480193751667e-05, + "loss": 0.2664, + "step": 25770 + }, + { + "epoch": 1.2, + "learning_rate": 1.611869640869688e-05, + "loss": 0.2745, + "step": 25775 + }, + { + "epoch": 1.2, + "learning_rate": 1.6117912623642095e-05, + "loss": 0.4014, + "step": 25780 + }, + { + "epoch": 1.2, + "learning_rate": 1.611712883858731e-05, + "loss": 0.2742, + "step": 25785 + }, + { + "epoch": 1.2, + "learning_rate": 1.611634505353252e-05, + "loss": 0.094, + "step": 25790 + }, + { + "epoch": 1.2, + "learning_rate": 1.6115561268477733e-05, + "loss": 0.1958, + "step": 25795 + }, + { + "epoch": 1.2, + "learning_rate": 1.6114777483422947e-05, + "loss": 0.0788, + "step": 25800 + }, + { + "epoch": 1.2, + "learning_rate": 1.611399369836816e-05, + "loss": 0.106, + "step": 25805 + }, + { + "epoch": 1.2, + "learning_rate": 1.6113209913313375e-05, + "loss": 0.1569, + "step": 25810 + }, + { + "epoch": 1.2, + "learning_rate": 1.6112426128258586e-05, + "loss": 0.126, + "step": 25815 + }, + { + "epoch": 1.2, + "learning_rate": 1.6111642343203803e-05, + "loss": 0.1446, + "step": 25820 + }, + { + "epoch": 1.21, + "learning_rate": 1.6110858558149013e-05, + "loss": 0.3583, + "step": 25825 + }, + { + "epoch": 1.21, + "learning_rate": 1.6110074773094227e-05, + "loss": 0.3009, + "step": 25830 + }, + { + "epoch": 1.21, + "learning_rate": 1.610929098803944e-05, + "loss": 0.3158, + "step": 25835 + }, + { + "epoch": 1.21, + "learning_rate": 1.6108507202984655e-05, + "loss": 0.1105, + "step": 25840 + }, + { + "epoch": 1.21, + "learning_rate": 1.610772341792987e-05, + "loss": 0.0567, + "step": 25845 + }, + { + "epoch": 1.21, + "learning_rate": 1.6106939632875083e-05, + "loss": 0.0254, + "step": 25850 + }, + { + "epoch": 1.21, + "learning_rate": 1.6106155847820293e-05, + "loss": 0.1127, + "step": 25855 + }, + { + "epoch": 1.21, + "learning_rate": 1.6105372062765507e-05, + "loss": 0.165, + "step": 25860 + }, + { + "epoch": 1.21, + "learning_rate": 1.610458827771072e-05, + "loss": 0.1691, + "step": 25865 + }, + { + "epoch": 1.21, + "learning_rate": 1.6103804492655935e-05, + "loss": 0.1748, + "step": 25870 + }, + { + "epoch": 1.21, + "learning_rate": 1.610302070760115e-05, + "loss": 0.3087, + "step": 25875 + }, + { + "epoch": 1.21, + "learning_rate": 1.6102236922546363e-05, + "loss": 0.3935, + "step": 25880 + }, + { + "epoch": 1.21, + "learning_rate": 1.6101453137491577e-05, + "loss": 0.3608, + "step": 25885 + }, + { + "epoch": 1.21, + "learning_rate": 1.6100669352436787e-05, + "loss": 0.0722, + "step": 25890 + }, + { + "epoch": 1.21, + "learning_rate": 1.6099885567382005e-05, + "loss": 0.1239, + "step": 25895 + }, + { + "epoch": 1.21, + "learning_rate": 1.6099101782327215e-05, + "loss": 0.0837, + "step": 25900 + }, + { + "epoch": 1.21, + "learning_rate": 1.609831799727243e-05, + "loss": 0.1534, + "step": 25905 + }, + { + "epoch": 1.21, + "learning_rate": 1.6097534212217643e-05, + "loss": 0.1598, + "step": 25910 + }, + { + "epoch": 1.21, + "learning_rate": 1.6096750427162857e-05, + "loss": 0.1712, + "step": 25915 + }, + { + "epoch": 1.21, + "learning_rate": 1.609596664210807e-05, + "loss": 0.1853, + "step": 25920 + }, + { + "epoch": 1.21, + "learning_rate": 1.609518285705328e-05, + "loss": 0.2454, + "step": 25925 + }, + { + "epoch": 1.21, + "learning_rate": 1.6094399071998495e-05, + "loss": 0.303, + "step": 25930 + }, + { + "epoch": 1.21, + "learning_rate": 1.609361528694371e-05, + "loss": 0.3649, + "step": 25935 + }, + { + "epoch": 1.21, + "learning_rate": 1.6092831501888923e-05, + "loss": 0.0425, + "step": 25940 + }, + { + "epoch": 1.21, + "learning_rate": 1.6092047716834137e-05, + "loss": 0.1058, + "step": 25945 + }, + { + "epoch": 1.21, + "learning_rate": 1.609126393177935e-05, + "loss": 0.0823, + "step": 25950 + }, + { + "epoch": 1.21, + "learning_rate": 1.609048014672456e-05, + "loss": 0.0905, + "step": 25955 + }, + { + "epoch": 1.21, + "learning_rate": 1.608969636166978e-05, + "loss": 0.1479, + "step": 25960 + }, + { + "epoch": 1.21, + "learning_rate": 1.608891257661499e-05, + "loss": 0.1194, + "step": 25965 + }, + { + "epoch": 1.21, + "learning_rate": 1.6088128791560203e-05, + "loss": 0.2551, + "step": 25970 + }, + { + "epoch": 1.21, + "learning_rate": 1.6087345006505417e-05, + "loss": 0.2567, + "step": 25975 + }, + { + "epoch": 1.21, + "learning_rate": 1.608656122145063e-05, + "loss": 0.4998, + "step": 25980 + }, + { + "epoch": 1.21, + "learning_rate": 1.6085777436395845e-05, + "loss": 0.3416, + "step": 25985 + }, + { + "epoch": 1.21, + "learning_rate": 1.6084993651341055e-05, + "loss": 0.0866, + "step": 25990 + }, + { + "epoch": 1.21, + "learning_rate": 1.6084209866286273e-05, + "loss": 0.1128, + "step": 25995 + }, + { + "epoch": 1.21, + "learning_rate": 1.6083426081231483e-05, + "loss": 0.1394, + "step": 26000 + }, + { + "epoch": 1.21, + "learning_rate": 1.6082642296176697e-05, + "loss": 0.163, + "step": 26005 + }, + { + "epoch": 1.21, + "learning_rate": 1.608185851112191e-05, + "loss": 0.1127, + "step": 26010 + }, + { + "epoch": 1.21, + "learning_rate": 1.6081074726067125e-05, + "loss": 0.1923, + "step": 26015 + }, + { + "epoch": 1.21, + "learning_rate": 1.608029094101234e-05, + "loss": 0.1738, + "step": 26020 + }, + { + "epoch": 1.21, + "learning_rate": 1.6079507155957553e-05, + "loss": 0.2733, + "step": 26025 + }, + { + "epoch": 1.21, + "learning_rate": 1.6078723370902763e-05, + "loss": 0.3812, + "step": 26030 + }, + { + "epoch": 1.21, + "learning_rate": 1.607793958584798e-05, + "loss": 0.2911, + "step": 26035 + }, + { + "epoch": 1.22, + "learning_rate": 1.607715580079319e-05, + "loss": 0.0426, + "step": 26040 + }, + { + "epoch": 1.22, + "learning_rate": 1.6076372015738405e-05, + "loss": 0.1156, + "step": 26045 + }, + { + "epoch": 1.22, + "learning_rate": 1.607558823068362e-05, + "loss": 0.1092, + "step": 26050 + }, + { + "epoch": 1.22, + "learning_rate": 1.607480444562883e-05, + "loss": 0.1071, + "step": 26055 + }, + { + "epoch": 1.22, + "learning_rate": 1.6074020660574047e-05, + "loss": 0.1825, + "step": 26060 + }, + { + "epoch": 1.22, + "learning_rate": 1.6073236875519257e-05, + "loss": 0.2019, + "step": 26065 + }, + { + "epoch": 1.22, + "learning_rate": 1.607245309046447e-05, + "loss": 0.2312, + "step": 26070 + }, + { + "epoch": 1.22, + "learning_rate": 1.6071669305409685e-05, + "loss": 0.1766, + "step": 26075 + }, + { + "epoch": 1.22, + "learning_rate": 1.60708855203549e-05, + "loss": 0.3091, + "step": 26080 + }, + { + "epoch": 1.22, + "learning_rate": 1.6070101735300113e-05, + "loss": 0.35, + "step": 26085 + }, + { + "epoch": 1.22, + "learning_rate": 1.6069317950245327e-05, + "loss": 0.0862, + "step": 26090 + }, + { + "epoch": 1.22, + "learning_rate": 1.606853416519054e-05, + "loss": 0.0783, + "step": 26095 + }, + { + "epoch": 1.22, + "learning_rate": 1.6067750380135755e-05, + "loss": 0.0974, + "step": 26100 + }, + { + "epoch": 1.22, + "learning_rate": 1.6066966595080965e-05, + "loss": 0.0428, + "step": 26105 + }, + { + "epoch": 1.22, + "learning_rate": 1.6066182810026182e-05, + "loss": 0.1272, + "step": 26110 + }, + { + "epoch": 1.22, + "learning_rate": 1.6065399024971393e-05, + "loss": 0.1728, + "step": 26115 + }, + { + "epoch": 1.22, + "learning_rate": 1.6064615239916607e-05, + "loss": 0.2236, + "step": 26120 + }, + { + "epoch": 1.22, + "learning_rate": 1.606383145486182e-05, + "loss": 0.3677, + "step": 26125 + }, + { + "epoch": 1.22, + "learning_rate": 1.606304766980703e-05, + "loss": 0.4215, + "step": 26130 + }, + { + "epoch": 1.22, + "learning_rate": 1.606226388475225e-05, + "loss": 0.3085, + "step": 26135 + }, + { + "epoch": 1.22, + "learning_rate": 1.606148009969746e-05, + "loss": 0.0464, + "step": 26140 + }, + { + "epoch": 1.22, + "learning_rate": 1.6060696314642673e-05, + "loss": 0.0697, + "step": 26145 + }, + { + "epoch": 1.22, + "learning_rate": 1.6059912529587887e-05, + "loss": 0.1091, + "step": 26150 + }, + { + "epoch": 1.22, + "learning_rate": 1.60591287445331e-05, + "loss": 0.1073, + "step": 26155 + }, + { + "epoch": 1.22, + "learning_rate": 1.6058344959478315e-05, + "loss": 0.1585, + "step": 26160 + }, + { + "epoch": 1.22, + "learning_rate": 1.605756117442353e-05, + "loss": 0.1376, + "step": 26165 + }, + { + "epoch": 1.22, + "learning_rate": 1.605677738936874e-05, + "loss": 0.2462, + "step": 26170 + }, + { + "epoch": 1.22, + "learning_rate": 1.6055993604313956e-05, + "loss": 0.1889, + "step": 26175 + }, + { + "epoch": 1.22, + "learning_rate": 1.6055209819259167e-05, + "loss": 0.3838, + "step": 26180 + }, + { + "epoch": 1.22, + "learning_rate": 1.605442603420438e-05, + "loss": 0.3044, + "step": 26185 + }, + { + "epoch": 1.22, + "learning_rate": 1.6053642249149595e-05, + "loss": 0.0562, + "step": 26190 + }, + { + "epoch": 1.22, + "learning_rate": 1.605285846409481e-05, + "loss": 0.0978, + "step": 26195 + }, + { + "epoch": 1.22, + "learning_rate": 1.6052074679040022e-05, + "loss": 0.1215, + "step": 26200 + }, + { + "epoch": 1.22, + "learning_rate": 1.6051290893985233e-05, + "loss": 0.1945, + "step": 26205 + }, + { + "epoch": 1.22, + "learning_rate": 1.605050710893045e-05, + "loss": 0.1485, + "step": 26210 + }, + { + "epoch": 1.22, + "learning_rate": 1.604972332387566e-05, + "loss": 0.0891, + "step": 26215 + }, + { + "epoch": 1.22, + "learning_rate": 1.6048939538820875e-05, + "loss": 0.1399, + "step": 26220 + }, + { + "epoch": 1.22, + "learning_rate": 1.604815575376609e-05, + "loss": 0.205, + "step": 26225 + }, + { + "epoch": 1.22, + "learning_rate": 1.6047371968711303e-05, + "loss": 0.3367, + "step": 26230 + }, + { + "epoch": 1.22, + "learning_rate": 1.6046588183656516e-05, + "loss": 0.2997, + "step": 26235 + }, + { + "epoch": 1.22, + "learning_rate": 1.604580439860173e-05, + "loss": 0.0592, + "step": 26240 + }, + { + "epoch": 1.22, + "learning_rate": 1.604502061354694e-05, + "loss": 0.0551, + "step": 26245 + }, + { + "epoch": 1.22, + "learning_rate": 1.6044236828492155e-05, + "loss": 0.0715, + "step": 26250 + }, + { + "epoch": 1.23, + "learning_rate": 1.604345304343737e-05, + "loss": 0.1212, + "step": 26255 + }, + { + "epoch": 1.23, + "learning_rate": 1.6042669258382583e-05, + "loss": 0.1241, + "step": 26260 + }, + { + "epoch": 1.23, + "learning_rate": 1.6041885473327796e-05, + "loss": 0.1764, + "step": 26265 + }, + { + "epoch": 1.23, + "learning_rate": 1.6041101688273007e-05, + "loss": 0.1995, + "step": 26270 + }, + { + "epoch": 1.23, + "learning_rate": 1.6040317903218224e-05, + "loss": 0.2772, + "step": 26275 + }, + { + "epoch": 1.23, + "learning_rate": 1.6039534118163435e-05, + "loss": 0.3426, + "step": 26280 + }, + { + "epoch": 1.23, + "learning_rate": 1.603875033310865e-05, + "loss": 0.2971, + "step": 26285 + }, + { + "epoch": 1.23, + "learning_rate": 1.6037966548053863e-05, + "loss": 0.065, + "step": 26290 + }, + { + "epoch": 1.23, + "learning_rate": 1.6037182762999077e-05, + "loss": 0.0287, + "step": 26295 + }, + { + "epoch": 1.23, + "learning_rate": 1.603639897794429e-05, + "loss": 0.1137, + "step": 26300 + }, + { + "epoch": 1.23, + "learning_rate": 1.6035615192889504e-05, + "loss": 0.1516, + "step": 26305 + }, + { + "epoch": 1.23, + "learning_rate": 1.6034831407834718e-05, + "loss": 0.0944, + "step": 26310 + }, + { + "epoch": 1.23, + "learning_rate": 1.603404762277993e-05, + "loss": 0.1105, + "step": 26315 + }, + { + "epoch": 1.23, + "learning_rate": 1.6033263837725143e-05, + "loss": 0.1635, + "step": 26320 + }, + { + "epoch": 1.23, + "learning_rate": 1.6032480052670357e-05, + "loss": 0.2127, + "step": 26325 + }, + { + "epoch": 1.23, + "learning_rate": 1.603169626761557e-05, + "loss": 0.4454, + "step": 26330 + }, + { + "epoch": 1.23, + "learning_rate": 1.6030912482560784e-05, + "loss": 0.3167, + "step": 26335 + }, + { + "epoch": 1.23, + "learning_rate": 1.6030128697505998e-05, + "loss": 0.0731, + "step": 26340 + }, + { + "epoch": 1.23, + "learning_rate": 1.602934491245121e-05, + "loss": 0.0499, + "step": 26345 + }, + { + "epoch": 1.23, + "learning_rate": 1.6028561127396426e-05, + "loss": 0.097, + "step": 26350 + }, + { + "epoch": 1.23, + "learning_rate": 1.6027777342341637e-05, + "loss": 0.0544, + "step": 26355 + }, + { + "epoch": 1.23, + "learning_rate": 1.602699355728685e-05, + "loss": 0.1498, + "step": 26360 + }, + { + "epoch": 1.23, + "learning_rate": 1.6026209772232064e-05, + "loss": 0.1206, + "step": 26365 + }, + { + "epoch": 1.23, + "learning_rate": 1.602542598717728e-05, + "loss": 0.2232, + "step": 26370 + }, + { + "epoch": 1.23, + "learning_rate": 1.6024642202122492e-05, + "loss": 0.3615, + "step": 26375 + }, + { + "epoch": 1.23, + "learning_rate": 1.6023858417067703e-05, + "loss": 0.3271, + "step": 26380 + }, + { + "epoch": 1.23, + "learning_rate": 1.6023074632012917e-05, + "loss": 0.3493, + "step": 26385 + }, + { + "epoch": 1.23, + "learning_rate": 1.602229084695813e-05, + "loss": 0.0708, + "step": 26390 + }, + { + "epoch": 1.23, + "learning_rate": 1.6021507061903344e-05, + "loss": 0.124, + "step": 26395 + }, + { + "epoch": 1.23, + "learning_rate": 1.602072327684856e-05, + "loss": 0.0928, + "step": 26400 + }, + { + "epoch": 1.23, + "learning_rate": 1.6019939491793772e-05, + "loss": 0.0961, + "step": 26405 + }, + { + "epoch": 1.23, + "learning_rate": 1.6019155706738986e-05, + "loss": 0.1516, + "step": 26410 + }, + { + "epoch": 1.23, + "learning_rate": 1.60183719216842e-05, + "loss": 0.1803, + "step": 26415 + }, + { + "epoch": 1.23, + "learning_rate": 1.601758813662941e-05, + "loss": 0.1668, + "step": 26420 + }, + { + "epoch": 1.23, + "learning_rate": 1.6016804351574628e-05, + "loss": 0.2315, + "step": 26425 + }, + { + "epoch": 1.23, + "learning_rate": 1.601602056651984e-05, + "loss": 0.353, + "step": 26430 + }, + { + "epoch": 1.23, + "learning_rate": 1.6015236781465052e-05, + "loss": 0.319, + "step": 26435 + }, + { + "epoch": 1.23, + "learning_rate": 1.6014452996410266e-05, + "loss": 0.0558, + "step": 26440 + }, + { + "epoch": 1.23, + "learning_rate": 1.6013669211355477e-05, + "loss": 0.1412, + "step": 26445 + }, + { + "epoch": 1.23, + "learning_rate": 1.6012885426300694e-05, + "loss": 0.0911, + "step": 26450 + }, + { + "epoch": 1.23, + "learning_rate": 1.6012101641245905e-05, + "loss": 0.0855, + "step": 26455 + }, + { + "epoch": 1.23, + "learning_rate": 1.601131785619112e-05, + "loss": 0.1342, + "step": 26460 + }, + { + "epoch": 1.23, + "learning_rate": 1.6010534071136332e-05, + "loss": 0.1502, + "step": 26465 + }, + { + "epoch": 1.24, + "learning_rate": 1.6009750286081546e-05, + "loss": 0.2149, + "step": 26470 + }, + { + "epoch": 1.24, + "learning_rate": 1.600896650102676e-05, + "loss": 0.1451, + "step": 26475 + }, + { + "epoch": 1.24, + "learning_rate": 1.6008182715971974e-05, + "loss": 0.38, + "step": 26480 + }, + { + "epoch": 1.24, + "learning_rate": 1.6007398930917185e-05, + "loss": 0.5451, + "step": 26485 + }, + { + "epoch": 1.24, + "learning_rate": 1.6006615145862402e-05, + "loss": 0.0726, + "step": 26490 + }, + { + "epoch": 1.24, + "learning_rate": 1.6005831360807612e-05, + "loss": 0.0716, + "step": 26495 + }, + { + "epoch": 1.24, + "learning_rate": 1.6005047575752826e-05, + "loss": 0.084, + "step": 26500 + }, + { + "epoch": 1.24, + "learning_rate": 1.600426379069804e-05, + "loss": 0.1345, + "step": 26505 + }, + { + "epoch": 1.24, + "learning_rate": 1.6003480005643254e-05, + "loss": 0.2791, + "step": 26510 + }, + { + "epoch": 1.24, + "learning_rate": 1.6002696220588468e-05, + "loss": 0.1281, + "step": 26515 + }, + { + "epoch": 1.24, + "learning_rate": 1.600191243553368e-05, + "loss": 0.2016, + "step": 26520 + }, + { + "epoch": 1.24, + "learning_rate": 1.6001128650478896e-05, + "loss": 0.3165, + "step": 26525 + }, + { + "epoch": 1.24, + "learning_rate": 1.6000344865424106e-05, + "loss": 0.336, + "step": 26530 + }, + { + "epoch": 1.24, + "learning_rate": 1.599956108036932e-05, + "loss": 0.3234, + "step": 26535 + }, + { + "epoch": 1.24, + "learning_rate": 1.5998777295314534e-05, + "loss": 0.1082, + "step": 26540 + }, + { + "epoch": 1.24, + "learning_rate": 1.5997993510259748e-05, + "loss": 0.0455, + "step": 26545 + }, + { + "epoch": 1.24, + "learning_rate": 1.5997209725204962e-05, + "loss": 0.1116, + "step": 26550 + }, + { + "epoch": 1.24, + "learning_rate": 1.5996425940150176e-05, + "loss": 0.0805, + "step": 26555 + }, + { + "epoch": 1.24, + "learning_rate": 1.5995642155095386e-05, + "loss": 0.1438, + "step": 26560 + }, + { + "epoch": 1.24, + "learning_rate": 1.5994858370040604e-05, + "loss": 0.1412, + "step": 26565 + }, + { + "epoch": 1.24, + "learning_rate": 1.5994074584985814e-05, + "loss": 0.2316, + "step": 26570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5993290799931028e-05, + "loss": 0.2124, + "step": 26575 + }, + { + "epoch": 1.24, + "learning_rate": 1.5992507014876242e-05, + "loss": 0.3504, + "step": 26580 + }, + { + "epoch": 1.24, + "learning_rate": 1.5991723229821453e-05, + "loss": 0.3047, + "step": 26585 + }, + { + "epoch": 1.24, + "learning_rate": 1.599093944476667e-05, + "loss": 0.0622, + "step": 26590 + }, + { + "epoch": 1.24, + "learning_rate": 1.599015565971188e-05, + "loss": 0.0547, + "step": 26595 + }, + { + "epoch": 1.24, + "learning_rate": 1.5989371874657094e-05, + "loss": 0.0788, + "step": 26600 + }, + { + "epoch": 1.24, + "learning_rate": 1.5988588089602308e-05, + "loss": 0.1388, + "step": 26605 + }, + { + "epoch": 1.24, + "learning_rate": 1.5987804304547522e-05, + "loss": 0.1345, + "step": 26610 + }, + { + "epoch": 1.24, + "learning_rate": 1.5987020519492736e-05, + "loss": 0.1711, + "step": 26615 + }, + { + "epoch": 1.24, + "learning_rate": 1.598623673443795e-05, + "loss": 0.209, + "step": 26620 + }, + { + "epoch": 1.24, + "learning_rate": 1.5985452949383164e-05, + "loss": 0.24, + "step": 26625 + }, + { + "epoch": 1.24, + "learning_rate": 1.5984669164328378e-05, + "loss": 0.2429, + "step": 26630 + }, + { + "epoch": 1.24, + "learning_rate": 1.5983885379273588e-05, + "loss": 0.2346, + "step": 26635 + }, + { + "epoch": 1.24, + "learning_rate": 1.5983101594218802e-05, + "loss": 0.052, + "step": 26640 + }, + { + "epoch": 1.24, + "learning_rate": 1.5982317809164016e-05, + "loss": 0.1405, + "step": 26645 + }, + { + "epoch": 1.24, + "learning_rate": 1.598153402410923e-05, + "loss": 0.0957, + "step": 26650 + }, + { + "epoch": 1.24, + "learning_rate": 1.5980750239054444e-05, + "loss": 0.0875, + "step": 26655 + }, + { + "epoch": 1.24, + "learning_rate": 1.5979966453999654e-05, + "loss": 0.0848, + "step": 26660 + }, + { + "epoch": 1.24, + "learning_rate": 1.597918266894487e-05, + "loss": 0.1606, + "step": 26665 + }, + { + "epoch": 1.24, + "learning_rate": 1.5978398883890082e-05, + "loss": 0.2839, + "step": 26670 + }, + { + "epoch": 1.24, + "learning_rate": 1.5977615098835296e-05, + "loss": 0.2471, + "step": 26675 + }, + { + "epoch": 1.24, + "learning_rate": 1.597683131378051e-05, + "loss": 0.3593, + "step": 26680 + }, + { + "epoch": 1.25, + "learning_rate": 1.5976047528725724e-05, + "loss": 0.3423, + "step": 26685 + }, + { + "epoch": 1.25, + "learning_rate": 1.5975263743670938e-05, + "loss": 0.0586, + "step": 26690 + }, + { + "epoch": 1.25, + "learning_rate": 1.5974479958616152e-05, + "loss": 0.0374, + "step": 26695 + }, + { + "epoch": 1.25, + "learning_rate": 1.5973696173561362e-05, + "loss": 0.1045, + "step": 26700 + }, + { + "epoch": 1.25, + "learning_rate": 1.5972912388506576e-05, + "loss": 0.1342, + "step": 26705 + }, + { + "epoch": 1.25, + "learning_rate": 1.597212860345179e-05, + "loss": 0.2293, + "step": 26710 + }, + { + "epoch": 1.25, + "learning_rate": 1.5971344818397004e-05, + "loss": 0.1674, + "step": 26715 + }, + { + "epoch": 1.25, + "learning_rate": 1.5970561033342218e-05, + "loss": 0.1991, + "step": 26720 + }, + { + "epoch": 1.25, + "learning_rate": 1.5969777248287432e-05, + "loss": 0.2317, + "step": 26725 + }, + { + "epoch": 1.25, + "learning_rate": 1.5968993463232646e-05, + "loss": 0.4123, + "step": 26730 + }, + { + "epoch": 1.25, + "learning_rate": 1.5968209678177856e-05, + "loss": 0.2453, + "step": 26735 + }, + { + "epoch": 1.25, + "learning_rate": 1.5967425893123073e-05, + "loss": 0.083, + "step": 26740 + }, + { + "epoch": 1.25, + "learning_rate": 1.5966642108068284e-05, + "loss": 0.0648, + "step": 26745 + }, + { + "epoch": 1.25, + "learning_rate": 1.5965858323013498e-05, + "loss": 0.1362, + "step": 26750 + }, + { + "epoch": 1.25, + "learning_rate": 1.5965074537958712e-05, + "loss": 0.1406, + "step": 26755 + }, + { + "epoch": 1.25, + "learning_rate": 1.5964290752903926e-05, + "loss": 0.1994, + "step": 26760 + }, + { + "epoch": 1.25, + "learning_rate": 1.596350696784914e-05, + "loss": 0.1565, + "step": 26765 + }, + { + "epoch": 1.25, + "learning_rate": 1.596272318279435e-05, + "loss": 0.2318, + "step": 26770 + }, + { + "epoch": 1.25, + "learning_rate": 1.5961939397739564e-05, + "loss": 0.2744, + "step": 26775 + }, + { + "epoch": 1.25, + "learning_rate": 1.5961155612684778e-05, + "loss": 0.4521, + "step": 26780 + }, + { + "epoch": 1.25, + "learning_rate": 1.5960371827629992e-05, + "loss": 0.2907, + "step": 26785 + }, + { + "epoch": 1.25, + "learning_rate": 1.5959588042575206e-05, + "loss": 0.086, + "step": 26790 + }, + { + "epoch": 1.25, + "learning_rate": 1.595880425752042e-05, + "loss": 0.0616, + "step": 26795 + }, + { + "epoch": 1.25, + "learning_rate": 1.595802047246563e-05, + "loss": 0.0885, + "step": 26800 + }, + { + "epoch": 1.25, + "learning_rate": 1.5957236687410847e-05, + "loss": 0.1038, + "step": 26805 + }, + { + "epoch": 1.25, + "learning_rate": 1.5956452902356058e-05, + "loss": 0.1311, + "step": 26810 + }, + { + "epoch": 1.25, + "learning_rate": 1.5955669117301272e-05, + "loss": 0.2754, + "step": 26815 + }, + { + "epoch": 1.25, + "learning_rate": 1.5954885332246486e-05, + "loss": 0.2622, + "step": 26820 + }, + { + "epoch": 1.25, + "learning_rate": 1.59541015471917e-05, + "loss": 0.3481, + "step": 26825 + }, + { + "epoch": 1.25, + "learning_rate": 1.5953317762136914e-05, + "loss": 0.3888, + "step": 26830 + }, + { + "epoch": 1.25, + "learning_rate": 1.5952533977082124e-05, + "loss": 0.3201, + "step": 26835 + }, + { + "epoch": 1.25, + "learning_rate": 1.595175019202734e-05, + "loss": 0.0431, + "step": 26840 + }, + { + "epoch": 1.25, + "learning_rate": 1.5950966406972552e-05, + "loss": 0.0601, + "step": 26845 + }, + { + "epoch": 1.25, + "learning_rate": 1.5950182621917766e-05, + "loss": 0.0401, + "step": 26850 + }, + { + "epoch": 1.25, + "learning_rate": 1.594939883686298e-05, + "loss": 0.0848, + "step": 26855 + }, + { + "epoch": 1.25, + "learning_rate": 1.5948615051808194e-05, + "loss": 0.1519, + "step": 26860 + }, + { + "epoch": 1.25, + "learning_rate": 1.5947831266753408e-05, + "loss": 0.1502, + "step": 26865 + }, + { + "epoch": 1.25, + "learning_rate": 1.594704748169862e-05, + "loss": 0.1738, + "step": 26870 + }, + { + "epoch": 1.25, + "learning_rate": 1.5946263696643832e-05, + "loss": 0.1806, + "step": 26875 + }, + { + "epoch": 1.25, + "learning_rate": 1.594547991158905e-05, + "loss": 0.359, + "step": 26880 + }, + { + "epoch": 1.25, + "learning_rate": 1.594469612653426e-05, + "loss": 0.3319, + "step": 26885 + }, + { + "epoch": 1.25, + "learning_rate": 1.5943912341479474e-05, + "loss": 0.0602, + "step": 26890 + }, + { + "epoch": 1.25, + "learning_rate": 1.5943128556424688e-05, + "loss": 0.0851, + "step": 26895 + }, + { + "epoch": 1.26, + "learning_rate": 1.5942344771369898e-05, + "loss": 0.1058, + "step": 26900 + }, + { + "epoch": 1.26, + "learning_rate": 1.5941560986315115e-05, + "loss": 0.1035, + "step": 26905 + }, + { + "epoch": 1.26, + "learning_rate": 1.5940777201260326e-05, + "loss": 0.1427, + "step": 26910 + }, + { + "epoch": 1.26, + "learning_rate": 1.593999341620554e-05, + "loss": 0.1269, + "step": 26915 + }, + { + "epoch": 1.26, + "learning_rate": 1.5939209631150754e-05, + "loss": 0.1434, + "step": 26920 + }, + { + "epoch": 1.26, + "learning_rate": 1.5938425846095968e-05, + "loss": 0.2233, + "step": 26925 + }, + { + "epoch": 1.26, + "learning_rate": 1.593764206104118e-05, + "loss": 0.4562, + "step": 26930 + }, + { + "epoch": 1.26, + "learning_rate": 1.5936858275986395e-05, + "loss": 0.3848, + "step": 26935 + }, + { + "epoch": 1.26, + "learning_rate": 1.593607449093161e-05, + "loss": 0.0614, + "step": 26940 + }, + { + "epoch": 1.26, + "learning_rate": 1.5935290705876823e-05, + "loss": 0.0845, + "step": 26945 + }, + { + "epoch": 1.26, + "learning_rate": 1.5934506920822034e-05, + "loss": 0.0953, + "step": 26950 + }, + { + "epoch": 1.26, + "learning_rate": 1.593372313576725e-05, + "loss": 0.1694, + "step": 26955 + }, + { + "epoch": 1.26, + "learning_rate": 1.593293935071246e-05, + "loss": 0.1371, + "step": 26960 + }, + { + "epoch": 1.26, + "learning_rate": 1.5932155565657676e-05, + "loss": 0.2797, + "step": 26965 + }, + { + "epoch": 1.26, + "learning_rate": 1.593137178060289e-05, + "loss": 0.2314, + "step": 26970 + }, + { + "epoch": 1.26, + "learning_rate": 1.59305879955481e-05, + "loss": 0.1904, + "step": 26975 + }, + { + "epoch": 1.26, + "learning_rate": 1.5929804210493317e-05, + "loss": 0.3626, + "step": 26980 + }, + { + "epoch": 1.26, + "learning_rate": 1.5929020425438528e-05, + "loss": 0.3654, + "step": 26985 + }, + { + "epoch": 1.26, + "learning_rate": 1.592823664038374e-05, + "loss": 0.0897, + "step": 26990 + }, + { + "epoch": 1.26, + "learning_rate": 1.5927452855328956e-05, + "loss": 0.1229, + "step": 26995 + }, + { + "epoch": 1.26, + "learning_rate": 1.592666907027417e-05, + "loss": 0.1352, + "step": 27000 + }, + { + "epoch": 1.26, + "learning_rate": 1.5925885285219383e-05, + "loss": 0.1724, + "step": 27005 + }, + { + "epoch": 1.26, + "learning_rate": 1.5925101500164597e-05, + "loss": 0.1872, + "step": 27010 + }, + { + "epoch": 1.26, + "learning_rate": 1.5924317715109808e-05, + "loss": 0.211, + "step": 27015 + }, + { + "epoch": 1.26, + "learning_rate": 1.5923533930055025e-05, + "loss": 0.2077, + "step": 27020 + }, + { + "epoch": 1.26, + "learning_rate": 1.5922750145000236e-05, + "loss": 0.2314, + "step": 27025 + }, + { + "epoch": 1.26, + "learning_rate": 1.592196635994545e-05, + "loss": 0.2645, + "step": 27030 + }, + { + "epoch": 1.26, + "learning_rate": 1.5921182574890663e-05, + "loss": 0.3657, + "step": 27035 + }, + { + "epoch": 1.26, + "learning_rate": 1.5920398789835877e-05, + "loss": 0.0485, + "step": 27040 + }, + { + "epoch": 1.26, + "learning_rate": 1.591961500478109e-05, + "loss": 0.0873, + "step": 27045 + }, + { + "epoch": 1.26, + "learning_rate": 1.5918831219726302e-05, + "loss": 0.0935, + "step": 27050 + }, + { + "epoch": 1.26, + "learning_rate": 1.591804743467152e-05, + "loss": 0.0481, + "step": 27055 + }, + { + "epoch": 1.26, + "learning_rate": 1.591726364961673e-05, + "loss": 0.1495, + "step": 27060 + }, + { + "epoch": 1.26, + "learning_rate": 1.5916479864561943e-05, + "loss": 0.2669, + "step": 27065 + }, + { + "epoch": 1.26, + "learning_rate": 1.5915696079507157e-05, + "loss": 0.2449, + "step": 27070 + }, + { + "epoch": 1.26, + "learning_rate": 1.591491229445237e-05, + "loss": 0.2292, + "step": 27075 + }, + { + "epoch": 1.26, + "learning_rate": 1.5914128509397585e-05, + "loss": 0.3765, + "step": 27080 + }, + { + "epoch": 1.26, + "learning_rate": 1.59133447243428e-05, + "loss": 0.4043, + "step": 27085 + }, + { + "epoch": 1.26, + "learning_rate": 1.591256093928801e-05, + "loss": 0.0565, + "step": 27090 + }, + { + "epoch": 1.26, + "learning_rate": 1.5911777154233224e-05, + "loss": 0.1073, + "step": 27095 + }, + { + "epoch": 1.26, + "learning_rate": 1.5910993369178437e-05, + "loss": 0.1031, + "step": 27100 + }, + { + "epoch": 1.26, + "learning_rate": 1.591020958412365e-05, + "loss": 0.1669, + "step": 27105 + }, + { + "epoch": 1.26, + "learning_rate": 1.5909425799068865e-05, + "loss": 0.1159, + "step": 27110 + }, + { + "epoch": 1.27, + "learning_rate": 1.5908642014014076e-05, + "loss": 0.1482, + "step": 27115 + }, + { + "epoch": 1.27, + "learning_rate": 1.5907858228959293e-05, + "loss": 0.2052, + "step": 27120 + }, + { + "epoch": 1.27, + "learning_rate": 1.5907074443904504e-05, + "loss": 0.1452, + "step": 27125 + }, + { + "epoch": 1.27, + "learning_rate": 1.5906290658849717e-05, + "loss": 0.3528, + "step": 27130 + }, + { + "epoch": 1.27, + "learning_rate": 1.590550687379493e-05, + "loss": 0.2911, + "step": 27135 + }, + { + "epoch": 1.27, + "learning_rate": 1.5904723088740145e-05, + "loss": 0.0498, + "step": 27140 + }, + { + "epoch": 1.27, + "learning_rate": 1.590393930368536e-05, + "loss": 0.0742, + "step": 27145 + }, + { + "epoch": 1.27, + "learning_rate": 1.5903155518630573e-05, + "loss": 0.0673, + "step": 27150 + }, + { + "epoch": 1.27, + "learning_rate": 1.5902371733575787e-05, + "loss": 0.1554, + "step": 27155 + }, + { + "epoch": 1.27, + "learning_rate": 1.5901587948520998e-05, + "loss": 0.1423, + "step": 27160 + }, + { + "epoch": 1.27, + "learning_rate": 1.590080416346621e-05, + "loss": 0.2888, + "step": 27165 + }, + { + "epoch": 1.27, + "learning_rate": 1.5900020378411425e-05, + "loss": 0.206, + "step": 27170 + }, + { + "epoch": 1.27, + "learning_rate": 1.589923659335664e-05, + "loss": 0.2769, + "step": 27175 + }, + { + "epoch": 1.27, + "learning_rate": 1.5898452808301853e-05, + "loss": 0.3929, + "step": 27180 + }, + { + "epoch": 1.27, + "learning_rate": 1.5897669023247067e-05, + "loss": 0.2299, + "step": 27185 + }, + { + "epoch": 1.27, + "learning_rate": 1.5896885238192278e-05, + "loss": 0.0616, + "step": 27190 + }, + { + "epoch": 1.27, + "learning_rate": 1.5896101453137495e-05, + "loss": 0.0768, + "step": 27195 + }, + { + "epoch": 1.27, + "learning_rate": 1.5895317668082705e-05, + "loss": 0.066, + "step": 27200 + }, + { + "epoch": 1.27, + "learning_rate": 1.589453388302792e-05, + "loss": 0.1199, + "step": 27205 + }, + { + "epoch": 1.27, + "learning_rate": 1.5893750097973133e-05, + "loss": 0.1225, + "step": 27210 + }, + { + "epoch": 1.27, + "learning_rate": 1.5892966312918347e-05, + "loss": 0.1322, + "step": 27215 + }, + { + "epoch": 1.27, + "learning_rate": 1.589218252786356e-05, + "loss": 0.1938, + "step": 27220 + }, + { + "epoch": 1.27, + "learning_rate": 1.589139874280877e-05, + "loss": 0.1989, + "step": 27225 + }, + { + "epoch": 1.27, + "learning_rate": 1.5890614957753985e-05, + "loss": 0.3783, + "step": 27230 + }, + { + "epoch": 1.27, + "learning_rate": 1.58898311726992e-05, + "loss": 0.2278, + "step": 27235 + }, + { + "epoch": 1.27, + "learning_rate": 1.5889047387644413e-05, + "loss": 0.0426, + "step": 27240 + }, + { + "epoch": 1.27, + "learning_rate": 1.5888263602589627e-05, + "loss": 0.0671, + "step": 27245 + }, + { + "epoch": 1.27, + "learning_rate": 1.588747981753484e-05, + "loss": 0.1248, + "step": 27250 + }, + { + "epoch": 1.27, + "learning_rate": 1.5886696032480055e-05, + "loss": 0.1308, + "step": 27255 + }, + { + "epoch": 1.27, + "learning_rate": 1.588591224742527e-05, + "loss": 0.1626, + "step": 27260 + }, + { + "epoch": 1.27, + "learning_rate": 1.588512846237048e-05, + "loss": 0.0983, + "step": 27265 + }, + { + "epoch": 1.27, + "learning_rate": 1.5884344677315697e-05, + "loss": 0.1998, + "step": 27270 + }, + { + "epoch": 1.27, + "learning_rate": 1.5883560892260907e-05, + "loss": 0.2857, + "step": 27275 + }, + { + "epoch": 1.27, + "learning_rate": 1.588277710720612e-05, + "loss": 0.4716, + "step": 27280 + }, + { + "epoch": 1.27, + "learning_rate": 1.5881993322151335e-05, + "loss": 0.3464, + "step": 27285 + }, + { + "epoch": 1.27, + "learning_rate": 1.5881209537096546e-05, + "loss": 0.0157, + "step": 27290 + }, + { + "epoch": 1.27, + "learning_rate": 1.5880425752041763e-05, + "loss": 0.0292, + "step": 27295 + }, + { + "epoch": 1.27, + "learning_rate": 1.5879641966986973e-05, + "loss": 0.1048, + "step": 27300 + }, + { + "epoch": 1.27, + "learning_rate": 1.5878858181932187e-05, + "loss": 0.1233, + "step": 27305 + }, + { + "epoch": 1.27, + "learning_rate": 1.58780743968774e-05, + "loss": 0.1633, + "step": 27310 + }, + { + "epoch": 1.27, + "learning_rate": 1.5877290611822615e-05, + "loss": 0.213, + "step": 27315 + }, + { + "epoch": 1.27, + "learning_rate": 1.587650682676783e-05, + "loss": 0.1694, + "step": 27320 + }, + { + "epoch": 1.28, + "learning_rate": 1.5875723041713043e-05, + "loss": 0.2286, + "step": 27325 + }, + { + "epoch": 1.28, + "learning_rate": 1.5874939256658253e-05, + "loss": 0.4041, + "step": 27330 + }, + { + "epoch": 1.28, + "learning_rate": 1.587415547160347e-05, + "loss": 0.3315, + "step": 27335 + }, + { + "epoch": 1.28, + "learning_rate": 1.587337168654868e-05, + "loss": 0.0725, + "step": 27340 + }, + { + "epoch": 1.28, + "learning_rate": 1.5872587901493895e-05, + "loss": 0.092, + "step": 27345 + }, + { + "epoch": 1.28, + "learning_rate": 1.587180411643911e-05, + "loss": 0.0829, + "step": 27350 + }, + { + "epoch": 1.28, + "learning_rate": 1.5871020331384323e-05, + "loss": 0.1871, + "step": 27355 + }, + { + "epoch": 1.28, + "learning_rate": 1.5870236546329537e-05, + "loss": 0.1217, + "step": 27360 + }, + { + "epoch": 1.28, + "learning_rate": 1.5869452761274747e-05, + "loss": 0.1402, + "step": 27365 + }, + { + "epoch": 1.28, + "learning_rate": 1.5868668976219965e-05, + "loss": 0.2723, + "step": 27370 + }, + { + "epoch": 1.28, + "learning_rate": 1.5867885191165175e-05, + "loss": 0.2456, + "step": 27375 + }, + { + "epoch": 1.28, + "learning_rate": 1.586710140611039e-05, + "loss": 0.3611, + "step": 27380 + }, + { + "epoch": 1.28, + "learning_rate": 1.5866317621055603e-05, + "loss": 0.2447, + "step": 27385 + }, + { + "epoch": 1.28, + "learning_rate": 1.5865533836000817e-05, + "loss": 0.0605, + "step": 27390 + }, + { + "epoch": 1.28, + "learning_rate": 1.586475005094603e-05, + "loss": 0.1235, + "step": 27395 + }, + { + "epoch": 1.28, + "learning_rate": 1.5863966265891245e-05, + "loss": 0.1074, + "step": 27400 + }, + { + "epoch": 1.28, + "learning_rate": 1.5863182480836455e-05, + "loss": 0.1034, + "step": 27405 + }, + { + "epoch": 1.28, + "learning_rate": 1.5862398695781672e-05, + "loss": 0.1398, + "step": 27410 + }, + { + "epoch": 1.28, + "learning_rate": 1.5861614910726883e-05, + "loss": 0.1485, + "step": 27415 + }, + { + "epoch": 1.28, + "learning_rate": 1.5860831125672097e-05, + "loss": 0.184, + "step": 27420 + }, + { + "epoch": 1.28, + "learning_rate": 1.586004734061731e-05, + "loss": 0.301, + "step": 27425 + }, + { + "epoch": 1.28, + "learning_rate": 1.585926355556252e-05, + "loss": 0.3592, + "step": 27430 + }, + { + "epoch": 1.28, + "learning_rate": 1.585847977050774e-05, + "loss": 0.4242, + "step": 27435 + }, + { + "epoch": 1.28, + "learning_rate": 1.585769598545295e-05, + "loss": 0.033, + "step": 27440 + }, + { + "epoch": 1.28, + "learning_rate": 1.5856912200398163e-05, + "loss": 0.077, + "step": 27445 + }, + { + "epoch": 1.28, + "learning_rate": 1.5856128415343377e-05, + "loss": 0.0681, + "step": 27450 + }, + { + "epoch": 1.28, + "learning_rate": 1.585534463028859e-05, + "loss": 0.0868, + "step": 27455 + }, + { + "epoch": 1.28, + "learning_rate": 1.5854560845233805e-05, + "loss": 0.1456, + "step": 27460 + }, + { + "epoch": 1.28, + "learning_rate": 1.585377706017902e-05, + "loss": 0.1694, + "step": 27465 + }, + { + "epoch": 1.28, + "learning_rate": 1.5852993275124233e-05, + "loss": 0.2192, + "step": 27470 + }, + { + "epoch": 1.28, + "learning_rate": 1.5852209490069446e-05, + "loss": 0.2615, + "step": 27475 + }, + { + "epoch": 1.28, + "learning_rate": 1.5851425705014657e-05, + "loss": 0.3738, + "step": 27480 + }, + { + "epoch": 1.28, + "learning_rate": 1.585064191995987e-05, + "loss": 0.4018, + "step": 27485 + }, + { + "epoch": 1.28, + "learning_rate": 1.5849858134905085e-05, + "loss": 0.0784, + "step": 27490 + }, + { + "epoch": 1.28, + "learning_rate": 1.58490743498503e-05, + "loss": 0.0999, + "step": 27495 + }, + { + "epoch": 1.28, + "learning_rate": 1.5848290564795513e-05, + "loss": 0.094, + "step": 27500 + }, + { + "epoch": 1.28, + "learning_rate": 1.5847506779740723e-05, + "loss": 0.1918, + "step": 27505 + }, + { + "epoch": 1.28, + "learning_rate": 1.584672299468594e-05, + "loss": 0.0998, + "step": 27510 + }, + { + "epoch": 1.28, + "learning_rate": 1.584593920963115e-05, + "loss": 0.1329, + "step": 27515 + }, + { + "epoch": 1.28, + "learning_rate": 1.5845155424576365e-05, + "loss": 0.2364, + "step": 27520 + }, + { + "epoch": 1.28, + "learning_rate": 1.584437163952158e-05, + "loss": 0.2244, + "step": 27525 + }, + { + "epoch": 1.28, + "learning_rate": 1.5843587854466793e-05, + "loss": 0.2506, + "step": 27530 + }, + { + "epoch": 1.28, + "learning_rate": 1.5842804069412007e-05, + "loss": 0.2574, + "step": 27535 + }, + { + "epoch": 1.29, + "learning_rate": 1.584202028435722e-05, + "loss": 0.0459, + "step": 27540 + }, + { + "epoch": 1.29, + "learning_rate": 1.584123649930243e-05, + "loss": 0.169, + "step": 27545 + }, + { + "epoch": 1.29, + "learning_rate": 1.5840452714247645e-05, + "loss": 0.0555, + "step": 27550 + }, + { + "epoch": 1.29, + "learning_rate": 1.583966892919286e-05, + "loss": 0.1168, + "step": 27555 + }, + { + "epoch": 1.29, + "learning_rate": 1.5838885144138073e-05, + "loss": 0.1272, + "step": 27560 + }, + { + "epoch": 1.29, + "learning_rate": 1.5838101359083287e-05, + "loss": 0.1271, + "step": 27565 + }, + { + "epoch": 1.29, + "learning_rate": 1.58373175740285e-05, + "loss": 0.2171, + "step": 27570 + }, + { + "epoch": 1.29, + "learning_rate": 1.5836533788973714e-05, + "loss": 0.2344, + "step": 27575 + }, + { + "epoch": 1.29, + "learning_rate": 1.5835750003918925e-05, + "loss": 0.441, + "step": 27580 + }, + { + "epoch": 1.29, + "learning_rate": 1.5834966218864142e-05, + "loss": 0.344, + "step": 27585 + }, + { + "epoch": 1.29, + "learning_rate": 1.5834182433809353e-05, + "loss": 0.0275, + "step": 27590 + }, + { + "epoch": 1.29, + "learning_rate": 1.5833398648754567e-05, + "loss": 0.0879, + "step": 27595 + }, + { + "epoch": 1.29, + "learning_rate": 1.583261486369978e-05, + "loss": 0.0835, + "step": 27600 + }, + { + "epoch": 1.29, + "learning_rate": 1.5831831078644994e-05, + "loss": 0.0898, + "step": 27605 + }, + { + "epoch": 1.29, + "learning_rate": 1.583104729359021e-05, + "loss": 0.1473, + "step": 27610 + }, + { + "epoch": 1.29, + "learning_rate": 1.583026350853542e-05, + "loss": 0.1409, + "step": 27615 + }, + { + "epoch": 1.29, + "learning_rate": 1.5829479723480633e-05, + "loss": 0.1624, + "step": 27620 + }, + { + "epoch": 1.29, + "learning_rate": 1.5828695938425847e-05, + "loss": 0.2715, + "step": 27625 + }, + { + "epoch": 1.29, + "learning_rate": 1.582791215337106e-05, + "loss": 0.24, + "step": 27630 + }, + { + "epoch": 1.29, + "learning_rate": 1.5827128368316275e-05, + "loss": 0.3, + "step": 27635 + }, + { + "epoch": 1.29, + "learning_rate": 1.582634458326149e-05, + "loss": 0.1383, + "step": 27640 + }, + { + "epoch": 1.29, + "learning_rate": 1.58255607982067e-05, + "loss": 0.0718, + "step": 27645 + }, + { + "epoch": 1.29, + "learning_rate": 1.5824777013151916e-05, + "loss": 0.1329, + "step": 27650 + }, + { + "epoch": 1.29, + "learning_rate": 1.5823993228097127e-05, + "loss": 0.0816, + "step": 27655 + }, + { + "epoch": 1.29, + "learning_rate": 1.582320944304234e-05, + "loss": 0.1626, + "step": 27660 + }, + { + "epoch": 1.29, + "learning_rate": 1.5822425657987555e-05, + "loss": 0.1631, + "step": 27665 + }, + { + "epoch": 1.29, + "learning_rate": 1.582164187293277e-05, + "loss": 0.176, + "step": 27670 + }, + { + "epoch": 1.29, + "learning_rate": 1.5820858087877982e-05, + "loss": 0.242, + "step": 27675 + }, + { + "epoch": 1.29, + "learning_rate": 1.5820074302823193e-05, + "loss": 0.3799, + "step": 27680 + }, + { + "epoch": 1.29, + "learning_rate": 1.581929051776841e-05, + "loss": 0.3226, + "step": 27685 + }, + { + "epoch": 1.29, + "learning_rate": 1.581850673271362e-05, + "loss": 0.0577, + "step": 27690 + }, + { + "epoch": 1.29, + "learning_rate": 1.5817722947658835e-05, + "loss": 0.0607, + "step": 27695 + }, + { + "epoch": 1.29, + "learning_rate": 1.581693916260405e-05, + "loss": 0.0799, + "step": 27700 + }, + { + "epoch": 1.29, + "learning_rate": 1.5816155377549262e-05, + "loss": 0.1294, + "step": 27705 + }, + { + "epoch": 1.29, + "learning_rate": 1.5815371592494476e-05, + "loss": 0.1614, + "step": 27710 + }, + { + "epoch": 1.29, + "learning_rate": 1.581458780743969e-05, + "loss": 0.2011, + "step": 27715 + }, + { + "epoch": 1.29, + "learning_rate": 1.58138040223849e-05, + "loss": 0.1317, + "step": 27720 + }, + { + "epoch": 1.29, + "learning_rate": 1.5813020237330118e-05, + "loss": 0.138, + "step": 27725 + }, + { + "epoch": 1.29, + "learning_rate": 1.581239320928629e-05, + "loss": 0.3351, + "step": 27730 + }, + { + "epoch": 1.29, + "learning_rate": 1.58116094242315e-05, + "loss": 0.2869, + "step": 27735 + }, + { + "epoch": 1.29, + "learning_rate": 1.5810825639176713e-05, + "loss": 0.0514, + "step": 27740 + }, + { + "epoch": 1.29, + "learning_rate": 1.5810041854121927e-05, + "loss": 0.0941, + "step": 27745 + }, + { + "epoch": 1.29, + "learning_rate": 1.580925806906714e-05, + "loss": 0.0642, + "step": 27750 + }, + { + "epoch": 1.3, + "learning_rate": 1.5808474284012355e-05, + "loss": 0.1529, + "step": 27755 + }, + { + "epoch": 1.3, + "learning_rate": 1.5807690498957565e-05, + "loss": 0.1682, + "step": 27760 + }, + { + "epoch": 1.3, + "learning_rate": 1.5806906713902782e-05, + "loss": 0.1137, + "step": 27765 + }, + { + "epoch": 1.3, + "learning_rate": 1.5806279685858953e-05, + "loss": 0.2363, + "step": 27770 + }, + { + "epoch": 1.3, + "learning_rate": 1.5805495900804163e-05, + "loss": 0.2647, + "step": 27775 + }, + { + "epoch": 1.3, + "learning_rate": 1.5804712115749377e-05, + "loss": 0.432, + "step": 27780 + }, + { + "epoch": 1.3, + "learning_rate": 1.580392833069459e-05, + "loss": 0.2727, + "step": 27785 + }, + { + "epoch": 1.3, + "learning_rate": 1.5803144545639805e-05, + "loss": 0.0257, + "step": 27790 + }, + { + "epoch": 1.3, + "learning_rate": 1.580236076058502e-05, + "loss": 0.056, + "step": 27795 + }, + { + "epoch": 1.3, + "learning_rate": 1.5801576975530233e-05, + "loss": 0.1739, + "step": 27800 + }, + { + "epoch": 1.3, + "learning_rate": 1.5800793190475447e-05, + "loss": 0.1359, + "step": 27805 + }, + { + "epoch": 1.3, + "learning_rate": 1.5800009405420657e-05, + "loss": 0.1377, + "step": 27810 + }, + { + "epoch": 1.3, + "learning_rate": 1.579922562036587e-05, + "loss": 0.2141, + "step": 27815 + }, + { + "epoch": 1.3, + "learning_rate": 1.5798441835311085e-05, + "loss": 0.2105, + "step": 27820 + }, + { + "epoch": 1.3, + "learning_rate": 1.57976580502563e-05, + "loss": 0.2329, + "step": 27825 + }, + { + "epoch": 1.3, + "learning_rate": 1.5796874265201513e-05, + "loss": 0.2853, + "step": 27830 + }, + { + "epoch": 1.3, + "learning_rate": 1.5796090480146727e-05, + "loss": 0.2378, + "step": 27835 + }, + { + "epoch": 1.3, + "learning_rate": 1.5795306695091937e-05, + "loss": 0.0456, + "step": 27840 + }, + { + "epoch": 1.3, + "learning_rate": 1.5794522910037155e-05, + "loss": 0.0577, + "step": 27845 + }, + { + "epoch": 1.3, + "learning_rate": 1.5793739124982365e-05, + "loss": 0.0971, + "step": 27850 + }, + { + "epoch": 1.3, + "learning_rate": 1.579295533992758e-05, + "loss": 0.1172, + "step": 27855 + }, + { + "epoch": 1.3, + "learning_rate": 1.5792171554872793e-05, + "loss": 0.0872, + "step": 27860 + }, + { + "epoch": 1.3, + "learning_rate": 1.5791387769818007e-05, + "loss": 0.1082, + "step": 27865 + }, + { + "epoch": 1.3, + "learning_rate": 1.579060398476322e-05, + "loss": 0.1545, + "step": 27870 + }, + { + "epoch": 1.3, + "learning_rate": 1.578982019970843e-05, + "loss": 0.2331, + "step": 27875 + }, + { + "epoch": 1.3, + "learning_rate": 1.5789036414653645e-05, + "loss": 0.3144, + "step": 27880 + }, + { + "epoch": 1.3, + "learning_rate": 1.578825262959886e-05, + "loss": 0.2813, + "step": 27885 + }, + { + "epoch": 1.3, + "learning_rate": 1.5787468844544073e-05, + "loss": 0.0546, + "step": 27890 + }, + { + "epoch": 1.3, + "learning_rate": 1.5786685059489287e-05, + "loss": 0.0614, + "step": 27895 + }, + { + "epoch": 1.3, + "learning_rate": 1.57859012744345e-05, + "loss": 0.0767, + "step": 27900 + }, + { + "epoch": 1.3, + "learning_rate": 1.5785117489379715e-05, + "loss": 0.1322, + "step": 27905 + }, + { + "epoch": 1.3, + "learning_rate": 1.578433370432493e-05, + "loss": 0.134, + "step": 27910 + }, + { + "epoch": 1.3, + "learning_rate": 1.578354991927014e-05, + "loss": 0.1844, + "step": 27915 + }, + { + "epoch": 1.3, + "learning_rate": 1.5782766134215357e-05, + "loss": 0.1431, + "step": 27920 + }, + { + "epoch": 1.3, + "learning_rate": 1.5781982349160567e-05, + "loss": 0.2983, + "step": 27925 + }, + { + "epoch": 1.3, + "learning_rate": 1.578119856410578e-05, + "loss": 0.381, + "step": 27930 + }, + { + "epoch": 1.3, + "learning_rate": 1.5780414779050995e-05, + "loss": 0.3145, + "step": 27935 + }, + { + "epoch": 1.3, + "learning_rate": 1.5779630993996205e-05, + "loss": 0.0599, + "step": 27940 + }, + { + "epoch": 1.3, + "learning_rate": 1.5778847208941423e-05, + "loss": 0.1142, + "step": 27945 + }, + { + "epoch": 1.3, + "learning_rate": 1.5778063423886633e-05, + "loss": 0.0946, + "step": 27950 + }, + { + "epoch": 1.3, + "learning_rate": 1.5777279638831847e-05, + "loss": 0.1403, + "step": 27955 + }, + { + "epoch": 1.3, + "learning_rate": 1.577649585377706e-05, + "loss": 0.1777, + "step": 27960 + }, + { + "epoch": 1.3, + "learning_rate": 1.5775712068722275e-05, + "loss": 0.2978, + "step": 27965 + }, + { + "epoch": 1.31, + "learning_rate": 1.577492828366749e-05, + "loss": 0.1597, + "step": 27970 + }, + { + "epoch": 1.31, + "learning_rate": 1.5774144498612703e-05, + "loss": 0.195, + "step": 27975 + }, + { + "epoch": 1.31, + "learning_rate": 1.5773360713557913e-05, + "loss": 0.356, + "step": 27980 + }, + { + "epoch": 1.31, + "learning_rate": 1.577257692850313e-05, + "loss": 0.3127, + "step": 27985 + }, + { + "epoch": 1.31, + "learning_rate": 1.577179314344834e-05, + "loss": 0.0726, + "step": 27990 + }, + { + "epoch": 1.31, + "learning_rate": 1.5771009358393555e-05, + "loss": 0.0738, + "step": 27995 + }, + { + "epoch": 1.31, + "learning_rate": 1.577022557333877e-05, + "loss": 0.0923, + "step": 28000 + }, + { + "epoch": 1.31, + "learning_rate": 1.5769441788283983e-05, + "loss": 0.0928, + "step": 28005 + }, + { + "epoch": 1.31, + "learning_rate": 1.5768658003229197e-05, + "loss": 0.1215, + "step": 28010 + }, + { + "epoch": 1.31, + "learning_rate": 1.5767874218174407e-05, + "loss": 0.161, + "step": 28015 + }, + { + "epoch": 1.31, + "learning_rate": 1.5767090433119624e-05, + "loss": 0.2235, + "step": 28020 + }, + { + "epoch": 1.31, + "learning_rate": 1.5766306648064835e-05, + "loss": 0.2186, + "step": 28025 + }, + { + "epoch": 1.31, + "learning_rate": 1.576552286301005e-05, + "loss": 0.3355, + "step": 28030 + }, + { + "epoch": 1.31, + "learning_rate": 1.5764739077955263e-05, + "loss": 0.3447, + "step": 28035 + }, + { + "epoch": 1.31, + "learning_rate": 1.5763955292900477e-05, + "loss": 0.0388, + "step": 28040 + }, + { + "epoch": 1.31, + "learning_rate": 1.576317150784569e-05, + "loss": 0.074, + "step": 28045 + }, + { + "epoch": 1.31, + "learning_rate": 1.5762387722790905e-05, + "loss": 0.058, + "step": 28050 + }, + { + "epoch": 1.31, + "learning_rate": 1.5761603937736115e-05, + "loss": 0.1904, + "step": 28055 + }, + { + "epoch": 1.31, + "learning_rate": 1.5760820152681332e-05, + "loss": 0.0815, + "step": 28060 + }, + { + "epoch": 1.31, + "learning_rate": 1.5760036367626543e-05, + "loss": 0.1802, + "step": 28065 + }, + { + "epoch": 1.31, + "learning_rate": 1.5759252582571757e-05, + "loss": 0.1224, + "step": 28070 + }, + { + "epoch": 1.31, + "learning_rate": 1.575846879751697e-05, + "loss": 0.2543, + "step": 28075 + }, + { + "epoch": 1.31, + "learning_rate": 1.575768501246218e-05, + "loss": 0.248, + "step": 28080 + }, + { + "epoch": 1.31, + "learning_rate": 1.57569012274074e-05, + "loss": 0.3462, + "step": 28085 + }, + { + "epoch": 1.31, + "learning_rate": 1.575611744235261e-05, + "loss": 0.0377, + "step": 28090 + }, + { + "epoch": 1.31, + "learning_rate": 1.5755333657297823e-05, + "loss": 0.0877, + "step": 28095 + }, + { + "epoch": 1.31, + "learning_rate": 1.5754549872243037e-05, + "loss": 0.1138, + "step": 28100 + }, + { + "epoch": 1.31, + "learning_rate": 1.575376608718825e-05, + "loss": 0.1132, + "step": 28105 + }, + { + "epoch": 1.31, + "learning_rate": 1.5752982302133465e-05, + "loss": 0.1068, + "step": 28110 + }, + { + "epoch": 1.31, + "learning_rate": 1.575219851707868e-05, + "loss": 0.1146, + "step": 28115 + }, + { + "epoch": 1.31, + "learning_rate": 1.5751414732023892e-05, + "loss": 0.1925, + "step": 28120 + }, + { + "epoch": 1.31, + "learning_rate": 1.5750630946969106e-05, + "loss": 0.2804, + "step": 28125 + }, + { + "epoch": 1.31, + "learning_rate": 1.5749847161914317e-05, + "loss": 0.4683, + "step": 28130 + }, + { + "epoch": 1.31, + "learning_rate": 1.574906337685953e-05, + "loss": 0.3226, + "step": 28135 + }, + { + "epoch": 1.31, + "learning_rate": 1.5748279591804745e-05, + "loss": 0.063, + "step": 28140 + }, + { + "epoch": 1.31, + "learning_rate": 1.574749580674996e-05, + "loss": 0.0538, + "step": 28145 + }, + { + "epoch": 1.31, + "learning_rate": 1.5746712021695172e-05, + "loss": 0.0772, + "step": 28150 + }, + { + "epoch": 1.31, + "learning_rate": 1.5745928236640383e-05, + "loss": 0.1166, + "step": 28155 + }, + { + "epoch": 1.31, + "learning_rate": 1.57451444515856e-05, + "loss": 0.1212, + "step": 28160 + }, + { + "epoch": 1.31, + "learning_rate": 1.574436066653081e-05, + "loss": 0.1134, + "step": 28165 + }, + { + "epoch": 1.31, + "learning_rate": 1.5743576881476025e-05, + "loss": 0.2492, + "step": 28170 + }, + { + "epoch": 1.31, + "learning_rate": 1.574279309642124e-05, + "loss": 0.3189, + "step": 28175 + }, + { + "epoch": 1.31, + "learning_rate": 1.5742009311366453e-05, + "loss": 0.2254, + "step": 28180 + }, + { + "epoch": 1.32, + "learning_rate": 1.5741225526311666e-05, + "loss": 0.2818, + "step": 28185 + }, + { + "epoch": 1.32, + "learning_rate": 1.574044174125688e-05, + "loss": 0.0439, + "step": 28190 + }, + { + "epoch": 1.32, + "learning_rate": 1.573965795620209e-05, + "loss": 0.1292, + "step": 28195 + }, + { + "epoch": 1.32, + "learning_rate": 1.5738874171147305e-05, + "loss": 0.1012, + "step": 28200 + }, + { + "epoch": 1.32, + "learning_rate": 1.573809038609252e-05, + "loss": 0.08, + "step": 28205 + }, + { + "epoch": 1.32, + "learning_rate": 1.5737306601037733e-05, + "loss": 0.1199, + "step": 28210 + }, + { + "epoch": 1.32, + "learning_rate": 1.5736522815982946e-05, + "loss": 0.2438, + "step": 28215 + }, + { + "epoch": 1.32, + "learning_rate": 1.573573903092816e-05, + "loss": 0.0924, + "step": 28220 + }, + { + "epoch": 1.32, + "learning_rate": 1.5734955245873374e-05, + "loss": 0.2114, + "step": 28225 + }, + { + "epoch": 1.32, + "learning_rate": 1.5734171460818585e-05, + "loss": 0.323, + "step": 28230 + }, + { + "epoch": 1.32, + "learning_rate": 1.5733387675763802e-05, + "loss": 0.2703, + "step": 28235 + }, + { + "epoch": 1.32, + "learning_rate": 1.5732603890709013e-05, + "loss": 0.0825, + "step": 28240 + }, + { + "epoch": 1.32, + "learning_rate": 1.5731820105654227e-05, + "loss": 0.0734, + "step": 28245 + }, + { + "epoch": 1.32, + "learning_rate": 1.573103632059944e-05, + "loss": 0.0782, + "step": 28250 + }, + { + "epoch": 1.32, + "learning_rate": 1.5730252535544654e-05, + "loss": 0.1712, + "step": 28255 + }, + { + "epoch": 1.32, + "learning_rate": 1.5729468750489868e-05, + "loss": 0.1776, + "step": 28260 + }, + { + "epoch": 1.32, + "learning_rate": 1.572868496543508e-05, + "loss": 0.1425, + "step": 28265 + }, + { + "epoch": 1.32, + "learning_rate": 1.5727901180380293e-05, + "loss": 0.2537, + "step": 28270 + }, + { + "epoch": 1.32, + "learning_rate": 1.5727117395325507e-05, + "loss": 0.2227, + "step": 28275 + }, + { + "epoch": 1.32, + "learning_rate": 1.572633361027072e-05, + "loss": 0.4145, + "step": 28280 + }, + { + "epoch": 1.32, + "learning_rate": 1.5725549825215934e-05, + "loss": 0.4865, + "step": 28285 + }, + { + "epoch": 1.32, + "learning_rate": 1.5724766040161148e-05, + "loss": 0.0633, + "step": 28290 + }, + { + "epoch": 1.32, + "learning_rate": 1.572398225510636e-05, + "loss": 0.0838, + "step": 28295 + }, + { + "epoch": 1.32, + "learning_rate": 1.5723198470051576e-05, + "loss": 0.1071, + "step": 28300 + }, + { + "epoch": 1.32, + "learning_rate": 1.5722414684996787e-05, + "loss": 0.1015, + "step": 28305 + }, + { + "epoch": 1.32, + "learning_rate": 1.5721630899942e-05, + "loss": 0.0966, + "step": 28310 + }, + { + "epoch": 1.32, + "learning_rate": 1.5720847114887214e-05, + "loss": 0.2073, + "step": 28315 + }, + { + "epoch": 1.32, + "learning_rate": 1.572006332983243e-05, + "loss": 0.1782, + "step": 28320 + }, + { + "epoch": 1.32, + "learning_rate": 1.5719279544777642e-05, + "loss": 0.2457, + "step": 28325 + }, + { + "epoch": 1.32, + "learning_rate": 1.5718495759722853e-05, + "loss": 0.3423, + "step": 28330 + }, + { + "epoch": 1.32, + "learning_rate": 1.571771197466807e-05, + "loss": 0.3075, + "step": 28335 + }, + { + "epoch": 1.32, + "learning_rate": 1.571692818961328e-05, + "loss": 0.0612, + "step": 28340 + }, + { + "epoch": 1.32, + "learning_rate": 1.5716144404558494e-05, + "loss": 0.087, + "step": 28345 + }, + { + "epoch": 1.32, + "learning_rate": 1.571536061950371e-05, + "loss": 0.0356, + "step": 28350 + }, + { + "epoch": 1.32, + "learning_rate": 1.5714576834448922e-05, + "loss": 0.1484, + "step": 28355 + }, + { + "epoch": 1.32, + "learning_rate": 1.5713793049394136e-05, + "loss": 0.1245, + "step": 28360 + }, + { + "epoch": 1.32, + "learning_rate": 1.571300926433935e-05, + "loss": 0.117, + "step": 28365 + }, + { + "epoch": 1.32, + "learning_rate": 1.571222547928456e-05, + "loss": 0.187, + "step": 28370 + }, + { + "epoch": 1.32, + "learning_rate": 1.5711441694229778e-05, + "loss": 0.3141, + "step": 28375 + }, + { + "epoch": 1.32, + "learning_rate": 1.571065790917499e-05, + "loss": 0.3338, + "step": 28380 + }, + { + "epoch": 1.32, + "learning_rate": 1.5709874124120202e-05, + "loss": 0.3198, + "step": 28385 + }, + { + "epoch": 1.32, + "learning_rate": 1.5709090339065416e-05, + "loss": 0.0383, + "step": 28390 + }, + { + "epoch": 1.32, + "learning_rate": 1.5708306554010627e-05, + "loss": 0.1249, + "step": 28395 + }, + { + "epoch": 1.33, + "learning_rate": 1.5707522768955844e-05, + "loss": 0.0838, + "step": 28400 + }, + { + "epoch": 1.33, + "learning_rate": 1.5706738983901055e-05, + "loss": 0.1157, + "step": 28405 + }, + { + "epoch": 1.33, + "learning_rate": 1.570595519884627e-05, + "loss": 0.1878, + "step": 28410 + }, + { + "epoch": 1.33, + "learning_rate": 1.5705171413791482e-05, + "loss": 0.1487, + "step": 28415 + }, + { + "epoch": 1.33, + "learning_rate": 1.5704387628736696e-05, + "loss": 0.1422, + "step": 28420 + }, + { + "epoch": 1.33, + "learning_rate": 1.570360384368191e-05, + "loss": 0.1978, + "step": 28425 + }, + { + "epoch": 1.33, + "learning_rate": 1.5702820058627124e-05, + "loss": 0.3651, + "step": 28430 + }, + { + "epoch": 1.33, + "learning_rate": 1.5702036273572338e-05, + "loss": 0.2579, + "step": 28435 + }, + { + "epoch": 1.33, + "learning_rate": 1.5701252488517552e-05, + "loss": 0.0636, + "step": 28440 + }, + { + "epoch": 1.33, + "learning_rate": 1.5700468703462762e-05, + "loss": 0.069, + "step": 28445 + }, + { + "epoch": 1.33, + "learning_rate": 1.569968491840798e-05, + "loss": 0.0931, + "step": 28450 + }, + { + "epoch": 1.33, + "learning_rate": 1.569890113335319e-05, + "loss": 0.1109, + "step": 28455 + }, + { + "epoch": 1.33, + "learning_rate": 1.5698117348298404e-05, + "loss": 0.0914, + "step": 28460 + }, + { + "epoch": 1.33, + "learning_rate": 1.5697333563243618e-05, + "loss": 0.1079, + "step": 28465 + }, + { + "epoch": 1.33, + "learning_rate": 1.569654977818883e-05, + "loss": 0.2067, + "step": 28470 + }, + { + "epoch": 1.33, + "learning_rate": 1.5695765993134046e-05, + "loss": 0.2517, + "step": 28475 + }, + { + "epoch": 1.33, + "learning_rate": 1.5694982208079256e-05, + "loss": 0.3391, + "step": 28480 + }, + { + "epoch": 1.33, + "learning_rate": 1.569419842302447e-05, + "loss": 0.1997, + "step": 28485 + }, + { + "epoch": 1.33, + "learning_rate": 1.5693414637969684e-05, + "loss": 0.0238, + "step": 28490 + }, + { + "epoch": 1.33, + "learning_rate": 1.5692630852914898e-05, + "loss": 0.0778, + "step": 28495 + }, + { + "epoch": 1.33, + "learning_rate": 1.5691847067860112e-05, + "loss": 0.0956, + "step": 28500 + }, + { + "epoch": 1.33, + "learning_rate": 1.5691063282805326e-05, + "loss": 0.1695, + "step": 28505 + }, + { + "epoch": 1.33, + "learning_rate": 1.5690279497750536e-05, + "loss": 0.0927, + "step": 28510 + }, + { + "epoch": 1.33, + "learning_rate": 1.5689495712695754e-05, + "loss": 0.109, + "step": 28515 + }, + { + "epoch": 1.33, + "learning_rate": 1.5688711927640964e-05, + "loss": 0.2123, + "step": 28520 + }, + { + "epoch": 1.33, + "learning_rate": 1.5687928142586178e-05, + "loss": 0.1514, + "step": 28525 + }, + { + "epoch": 1.33, + "learning_rate": 1.5687144357531392e-05, + "loss": 0.4057, + "step": 28530 + }, + { + "epoch": 1.33, + "learning_rate": 1.5686360572476606e-05, + "loss": 0.3171, + "step": 28535 + }, + { + "epoch": 1.33, + "learning_rate": 1.568557678742182e-05, + "loss": 0.0878, + "step": 28540 + }, + { + "epoch": 1.33, + "learning_rate": 1.568479300236703e-05, + "loss": 0.0565, + "step": 28545 + }, + { + "epoch": 1.33, + "learning_rate": 1.5684009217312248e-05, + "loss": 0.1183, + "step": 28550 + }, + { + "epoch": 1.33, + "learning_rate": 1.5683225432257458e-05, + "loss": 0.1481, + "step": 28555 + }, + { + "epoch": 1.33, + "learning_rate": 1.5682441647202672e-05, + "loss": 0.1402, + "step": 28560 + }, + { + "epoch": 1.33, + "learning_rate": 1.5681657862147886e-05, + "loss": 0.1809, + "step": 28565 + }, + { + "epoch": 1.33, + "learning_rate": 1.56808740770931e-05, + "loss": 0.2217, + "step": 28570 + }, + { + "epoch": 1.33, + "learning_rate": 1.5680090292038314e-05, + "loss": 0.3258, + "step": 28575 + }, + { + "epoch": 1.33, + "learning_rate": 1.5679306506983528e-05, + "loss": 0.2286, + "step": 28580 + }, + { + "epoch": 1.33, + "learning_rate": 1.5678522721928738e-05, + "loss": 0.2848, + "step": 28585 + }, + { + "epoch": 1.33, + "learning_rate": 1.5677738936873952e-05, + "loss": 0.0547, + "step": 28590 + }, + { + "epoch": 1.33, + "learning_rate": 1.5676955151819166e-05, + "loss": 0.0618, + "step": 28595 + }, + { + "epoch": 1.33, + "learning_rate": 1.567617136676438e-05, + "loss": 0.0941, + "step": 28600 + }, + { + "epoch": 1.33, + "learning_rate": 1.5675387581709594e-05, + "loss": 0.1233, + "step": 28605 + }, + { + "epoch": 1.33, + "learning_rate": 1.5674603796654804e-05, + "loss": 0.1848, + "step": 28610 + }, + { + "epoch": 1.34, + "learning_rate": 1.567382001160002e-05, + "loss": 0.0776, + "step": 28615 + }, + { + "epoch": 1.34, + "learning_rate": 1.5673036226545232e-05, + "loss": 0.248, + "step": 28620 + }, + { + "epoch": 1.34, + "learning_rate": 1.5672252441490446e-05, + "loss": 0.232, + "step": 28625 + }, + { + "epoch": 1.34, + "learning_rate": 1.567146865643566e-05, + "loss": 0.4364, + "step": 28630 + }, + { + "epoch": 1.34, + "learning_rate": 1.5670684871380874e-05, + "loss": 0.2168, + "step": 28635 + }, + { + "epoch": 1.34, + "learning_rate": 1.5669901086326088e-05, + "loss": 0.049, + "step": 28640 + }, + { + "epoch": 1.34, + "learning_rate": 1.5669117301271302e-05, + "loss": 0.0206, + "step": 28645 + }, + { + "epoch": 1.34, + "learning_rate": 1.5668333516216516e-05, + "loss": 0.1098, + "step": 28650 + }, + { + "epoch": 1.34, + "learning_rate": 1.5667549731161726e-05, + "loss": 0.1082, + "step": 28655 + }, + { + "epoch": 1.34, + "learning_rate": 1.566676594610694e-05, + "loss": 0.1286, + "step": 28660 + }, + { + "epoch": 1.34, + "learning_rate": 1.5665982161052154e-05, + "loss": 0.2045, + "step": 28665 + }, + { + "epoch": 1.34, + "learning_rate": 1.5665198375997368e-05, + "loss": 0.1718, + "step": 28670 + }, + { + "epoch": 1.34, + "learning_rate": 1.5664414590942582e-05, + "loss": 0.3584, + "step": 28675 + }, + { + "epoch": 1.34, + "learning_rate": 1.5663630805887796e-05, + "loss": 0.4569, + "step": 28680 + }, + { + "epoch": 1.34, + "learning_rate": 1.5662847020833006e-05, + "loss": 0.3119, + "step": 28685 + }, + { + "epoch": 1.34, + "learning_rate": 1.5662063235778223e-05, + "loss": 0.0559, + "step": 28690 + }, + { + "epoch": 1.34, + "learning_rate": 1.5661279450723434e-05, + "loss": 0.0382, + "step": 28695 + }, + { + "epoch": 1.34, + "learning_rate": 1.5660495665668648e-05, + "loss": 0.1046, + "step": 28700 + }, + { + "epoch": 1.34, + "learning_rate": 1.5659711880613862e-05, + "loss": 0.1162, + "step": 28705 + }, + { + "epoch": 1.34, + "learning_rate": 1.5658928095559076e-05, + "loss": 0.1435, + "step": 28710 + }, + { + "epoch": 1.34, + "learning_rate": 1.565814431050429e-05, + "loss": 0.203, + "step": 28715 + }, + { + "epoch": 1.34, + "learning_rate": 1.56573605254495e-05, + "loss": 0.2438, + "step": 28720 + }, + { + "epoch": 1.34, + "learning_rate": 1.5656576740394714e-05, + "loss": 0.214, + "step": 28725 + }, + { + "epoch": 1.34, + "learning_rate": 1.5655792955339928e-05, + "loss": 0.369, + "step": 28730 + }, + { + "epoch": 1.34, + "learning_rate": 1.5655009170285142e-05, + "loss": 0.2569, + "step": 28735 + }, + { + "epoch": 1.34, + "learning_rate": 1.5654225385230356e-05, + "loss": 0.0718, + "step": 28740 + }, + { + "epoch": 1.34, + "learning_rate": 1.565344160017557e-05, + "loss": 0.1807, + "step": 28745 + }, + { + "epoch": 1.34, + "learning_rate": 1.5652657815120784e-05, + "loss": 0.0907, + "step": 28750 + }, + { + "epoch": 1.34, + "learning_rate": 1.5651874030065997e-05, + "loss": 0.1447, + "step": 28755 + }, + { + "epoch": 1.34, + "learning_rate": 1.5651090245011208e-05, + "loss": 0.1166, + "step": 28760 + }, + { + "epoch": 1.34, + "learning_rate": 1.5650306459956425e-05, + "loss": 0.1564, + "step": 28765 + }, + { + "epoch": 1.34, + "learning_rate": 1.5649522674901636e-05, + "loss": 0.2352, + "step": 28770 + }, + { + "epoch": 1.34, + "learning_rate": 1.564873888984685e-05, + "loss": 0.2784, + "step": 28775 + }, + { + "epoch": 1.34, + "learning_rate": 1.5647955104792064e-05, + "loss": 0.3881, + "step": 28780 + }, + { + "epoch": 1.34, + "learning_rate": 1.5647171319737274e-05, + "loss": 0.2386, + "step": 28785 + }, + { + "epoch": 1.34, + "learning_rate": 1.564638753468249e-05, + "loss": 0.0621, + "step": 28790 + }, + { + "epoch": 1.34, + "learning_rate": 1.5645603749627702e-05, + "loss": 0.0444, + "step": 28795 + }, + { + "epoch": 1.34, + "learning_rate": 1.5644819964572916e-05, + "loss": 0.0656, + "step": 28800 + }, + { + "epoch": 1.34, + "learning_rate": 1.564403617951813e-05, + "loss": 0.0765, + "step": 28805 + }, + { + "epoch": 1.34, + "learning_rate": 1.5643252394463344e-05, + "loss": 0.2032, + "step": 28810 + }, + { + "epoch": 1.34, + "learning_rate": 1.5642468609408558e-05, + "loss": 0.1976, + "step": 28815 + }, + { + "epoch": 1.34, + "learning_rate": 1.564168482435377e-05, + "loss": 0.1735, + "step": 28820 + }, + { + "epoch": 1.35, + "learning_rate": 1.5640901039298982e-05, + "loss": 0.2373, + "step": 28825 + }, + { + "epoch": 1.35, + "learning_rate": 1.56401172542442e-05, + "loss": 0.3012, + "step": 28830 + }, + { + "epoch": 1.35, + "learning_rate": 1.563933346918941e-05, + "loss": 0.3411, + "step": 28835 + }, + { + "epoch": 1.35, + "learning_rate": 1.5638549684134624e-05, + "loss": 0.0834, + "step": 28840 + }, + { + "epoch": 1.35, + "learning_rate": 1.5637765899079838e-05, + "loss": 0.0505, + "step": 28845 + }, + { + "epoch": 1.35, + "learning_rate": 1.563698211402505e-05, + "loss": 0.076, + "step": 28850 + }, + { + "epoch": 1.35, + "learning_rate": 1.5636198328970265e-05, + "loss": 0.0456, + "step": 28855 + }, + { + "epoch": 1.35, + "learning_rate": 1.5635414543915476e-05, + "loss": 0.0746, + "step": 28860 + }, + { + "epoch": 1.35, + "learning_rate": 1.5634630758860693e-05, + "loss": 0.1093, + "step": 28865 + }, + { + "epoch": 1.35, + "learning_rate": 1.5633846973805904e-05, + "loss": 0.1907, + "step": 28870 + }, + { + "epoch": 1.35, + "learning_rate": 1.5633063188751118e-05, + "loss": 0.1332, + "step": 28875 + }, + { + "epoch": 1.35, + "learning_rate": 1.563227940369633e-05, + "loss": 0.3031, + "step": 28880 + }, + { + "epoch": 1.35, + "learning_rate": 1.5631495618641545e-05, + "loss": 0.3297, + "step": 28885 + }, + { + "epoch": 1.35, + "learning_rate": 1.563071183358676e-05, + "loss": 0.0652, + "step": 28890 + }, + { + "epoch": 1.35, + "learning_rate": 1.5629928048531973e-05, + "loss": 0.0844, + "step": 28895 + }, + { + "epoch": 1.35, + "learning_rate": 1.5629144263477184e-05, + "loss": 0.1097, + "step": 28900 + }, + { + "epoch": 1.35, + "learning_rate": 1.56283604784224e-05, + "loss": 0.115, + "step": 28905 + }, + { + "epoch": 1.35, + "learning_rate": 1.562757669336761e-05, + "loss": 0.1077, + "step": 28910 + }, + { + "epoch": 1.35, + "learning_rate": 1.5626792908312826e-05, + "loss": 0.1605, + "step": 28915 + }, + { + "epoch": 1.35, + "learning_rate": 1.562600912325804e-05, + "loss": 0.2131, + "step": 28920 + }, + { + "epoch": 1.35, + "learning_rate": 1.562522533820325e-05, + "loss": 0.1889, + "step": 28925 + }, + { + "epoch": 1.35, + "learning_rate": 1.5624441553148467e-05, + "loss": 0.3687, + "step": 28930 + }, + { + "epoch": 1.35, + "learning_rate": 1.5623657768093678e-05, + "loss": 0.2025, + "step": 28935 + }, + { + "epoch": 1.35, + "learning_rate": 1.562287398303889e-05, + "loss": 0.0673, + "step": 28940 + }, + { + "epoch": 1.35, + "learning_rate": 1.5622090197984106e-05, + "loss": 0.0705, + "step": 28945 + }, + { + "epoch": 1.35, + "learning_rate": 1.562130641292932e-05, + "loss": 0.1058, + "step": 28950 + }, + { + "epoch": 1.35, + "learning_rate": 1.5620522627874533e-05, + "loss": 0.1207, + "step": 28955 + }, + { + "epoch": 1.35, + "learning_rate": 1.5619738842819747e-05, + "loss": 0.1275, + "step": 28960 + }, + { + "epoch": 1.35, + "learning_rate": 1.561895505776496e-05, + "loss": 0.2169, + "step": 28965 + }, + { + "epoch": 1.35, + "learning_rate": 1.5618171272710175e-05, + "loss": 0.1789, + "step": 28970 + }, + { + "epoch": 1.35, + "learning_rate": 1.5617387487655386e-05, + "loss": 0.2378, + "step": 28975 + }, + { + "epoch": 1.35, + "learning_rate": 1.56166037026006e-05, + "loss": 0.4098, + "step": 28980 + }, + { + "epoch": 1.35, + "learning_rate": 1.5615819917545813e-05, + "loss": 0.2499, + "step": 28985 + }, + { + "epoch": 1.35, + "learning_rate": 1.5615036132491027e-05, + "loss": 0.0793, + "step": 28990 + }, + { + "epoch": 1.35, + "learning_rate": 1.561425234743624e-05, + "loss": 0.067, + "step": 28995 + }, + { + "epoch": 1.35, + "learning_rate": 1.5613468562381452e-05, + "loss": 0.0746, + "step": 29000 + }, + { + "epoch": 1.35, + "learning_rate": 1.561268477732667e-05, + "loss": 0.2106, + "step": 29005 + }, + { + "epoch": 1.35, + "learning_rate": 1.561190099227188e-05, + "loss": 0.1199, + "step": 29010 + }, + { + "epoch": 1.35, + "learning_rate": 1.5611117207217093e-05, + "loss": 0.0969, + "step": 29015 + }, + { + "epoch": 1.35, + "learning_rate": 1.5610333422162307e-05, + "loss": 0.3277, + "step": 29020 + }, + { + "epoch": 1.35, + "learning_rate": 1.560954963710752e-05, + "loss": 0.2931, + "step": 29025 + }, + { + "epoch": 1.35, + "learning_rate": 1.5608765852052735e-05, + "loss": 0.4393, + "step": 29030 + }, + { + "epoch": 1.35, + "learning_rate": 1.560798206699795e-05, + "loss": 0.4498, + "step": 29035 + }, + { + "epoch": 1.36, + "learning_rate": 1.560719828194316e-05, + "loss": 0.0525, + "step": 29040 + }, + { + "epoch": 1.36, + "learning_rate": 1.5606414496888374e-05, + "loss": 0.0493, + "step": 29045 + }, + { + "epoch": 1.36, + "learning_rate": 1.5605630711833587e-05, + "loss": 0.0895, + "step": 29050 + }, + { + "epoch": 1.36, + "learning_rate": 1.56048469267788e-05, + "loss": 0.1352, + "step": 29055 + }, + { + "epoch": 1.36, + "learning_rate": 1.5604063141724015e-05, + "loss": 0.1055, + "step": 29060 + }, + { + "epoch": 1.36, + "learning_rate": 1.560327935666923e-05, + "loss": 0.2887, + "step": 29065 + }, + { + "epoch": 1.36, + "learning_rate": 1.5602495571614443e-05, + "loss": 0.1776, + "step": 29070 + }, + { + "epoch": 1.36, + "learning_rate": 1.5601711786559654e-05, + "loss": 0.1566, + "step": 29075 + }, + { + "epoch": 1.36, + "learning_rate": 1.560092800150487e-05, + "loss": 0.2249, + "step": 29080 + }, + { + "epoch": 1.36, + "learning_rate": 1.560014421645008e-05, + "loss": 0.4125, + "step": 29085 + }, + { + "epoch": 1.36, + "learning_rate": 1.5599360431395295e-05, + "loss": 0.1064, + "step": 29090 + }, + { + "epoch": 1.36, + "learning_rate": 1.559857664634051e-05, + "loss": 0.1015, + "step": 29095 + }, + { + "epoch": 1.36, + "learning_rate": 1.5597792861285723e-05, + "loss": 0.0806, + "step": 29100 + }, + { + "epoch": 1.36, + "learning_rate": 1.5597009076230937e-05, + "loss": 0.1206, + "step": 29105 + }, + { + "epoch": 1.36, + "learning_rate": 1.5596225291176148e-05, + "loss": 0.1109, + "step": 29110 + }, + { + "epoch": 1.36, + "learning_rate": 1.559544150612136e-05, + "loss": 0.1305, + "step": 29115 + }, + { + "epoch": 1.36, + "learning_rate": 1.5594657721066575e-05, + "loss": 0.2071, + "step": 29120 + }, + { + "epoch": 1.36, + "learning_rate": 1.559387393601179e-05, + "loss": 0.1554, + "step": 29125 + }, + { + "epoch": 1.36, + "learning_rate": 1.5593090150957003e-05, + "loss": 0.5038, + "step": 29130 + }, + { + "epoch": 1.36, + "learning_rate": 1.5592306365902217e-05, + "loss": 0.3296, + "step": 29135 + }, + { + "epoch": 1.36, + "learning_rate": 1.5591522580847428e-05, + "loss": 0.1192, + "step": 29140 + }, + { + "epoch": 1.36, + "learning_rate": 1.5590738795792645e-05, + "loss": 0.1061, + "step": 29145 + }, + { + "epoch": 1.36, + "learning_rate": 1.5589955010737855e-05, + "loss": 0.0826, + "step": 29150 + }, + { + "epoch": 1.36, + "learning_rate": 1.558917122568307e-05, + "loss": 0.1131, + "step": 29155 + }, + { + "epoch": 1.36, + "learning_rate": 1.5588387440628283e-05, + "loss": 0.1495, + "step": 29160 + }, + { + "epoch": 1.36, + "learning_rate": 1.5587603655573497e-05, + "loss": 0.2157, + "step": 29165 + }, + { + "epoch": 1.36, + "learning_rate": 1.558681987051871e-05, + "loss": 0.2206, + "step": 29170 + }, + { + "epoch": 1.36, + "learning_rate": 1.558603608546392e-05, + "loss": 0.2772, + "step": 29175 + }, + { + "epoch": 1.36, + "learning_rate": 1.558525230040914e-05, + "loss": 0.5025, + "step": 29180 + }, + { + "epoch": 1.36, + "learning_rate": 1.558446851535435e-05, + "loss": 0.3455, + "step": 29185 + }, + { + "epoch": 1.36, + "learning_rate": 1.5583684730299563e-05, + "loss": 0.0408, + "step": 29190 + }, + { + "epoch": 1.36, + "learning_rate": 1.5582900945244777e-05, + "loss": 0.0804, + "step": 29195 + }, + { + "epoch": 1.36, + "learning_rate": 1.558211716018999e-05, + "loss": 0.1028, + "step": 29200 + }, + { + "epoch": 1.36, + "learning_rate": 1.5581333375135205e-05, + "loss": 0.0895, + "step": 29205 + }, + { + "epoch": 1.36, + "learning_rate": 1.558054959008042e-05, + "loss": 0.151, + "step": 29210 + }, + { + "epoch": 1.36, + "learning_rate": 1.557976580502563e-05, + "loss": 0.1078, + "step": 29215 + }, + { + "epoch": 1.36, + "learning_rate": 1.5578982019970847e-05, + "loss": 0.287, + "step": 29220 + }, + { + "epoch": 1.36, + "learning_rate": 1.5578198234916057e-05, + "loss": 0.2512, + "step": 29225 + }, + { + "epoch": 1.36, + "learning_rate": 1.557741444986127e-05, + "loss": 0.3908, + "step": 29230 + }, + { + "epoch": 1.36, + "learning_rate": 1.5576630664806485e-05, + "loss": 0.3252, + "step": 29235 + }, + { + "epoch": 1.36, + "learning_rate": 1.5575846879751695e-05, + "loss": 0.0572, + "step": 29240 + }, + { + "epoch": 1.36, + "learning_rate": 1.5575063094696913e-05, + "loss": 0.0728, + "step": 29245 + }, + { + "epoch": 1.36, + "learning_rate": 1.5574279309642123e-05, + "loss": 0.1044, + "step": 29250 + }, + { + "epoch": 1.37, + "learning_rate": 1.5573495524587337e-05, + "loss": 0.1306, + "step": 29255 + }, + { + "epoch": 1.37, + "learning_rate": 1.557271173953255e-05, + "loss": 0.1469, + "step": 29260 + }, + { + "epoch": 1.37, + "learning_rate": 1.5571927954477765e-05, + "loss": 0.1307, + "step": 29265 + }, + { + "epoch": 1.37, + "learning_rate": 1.557114416942298e-05, + "loss": 0.2949, + "step": 29270 + }, + { + "epoch": 1.37, + "learning_rate": 1.5570360384368193e-05, + "loss": 0.1483, + "step": 29275 + }, + { + "epoch": 1.37, + "learning_rate": 1.5569576599313407e-05, + "loss": 0.3853, + "step": 29280 + }, + { + "epoch": 1.37, + "learning_rate": 1.556879281425862e-05, + "loss": 0.2308, + "step": 29285 + }, + { + "epoch": 1.37, + "learning_rate": 1.556800902920383e-05, + "loss": 0.0336, + "step": 29290 + }, + { + "epoch": 1.37, + "learning_rate": 1.556722524414905e-05, + "loss": 0.0368, + "step": 29295 + }, + { + "epoch": 1.37, + "learning_rate": 1.556644145909426e-05, + "loss": 0.0633, + "step": 29300 + }, + { + "epoch": 1.37, + "learning_rate": 1.5565657674039473e-05, + "loss": 0.1041, + "step": 29305 + }, + { + "epoch": 1.37, + "learning_rate": 1.5564873888984687e-05, + "loss": 0.2455, + "step": 29310 + }, + { + "epoch": 1.37, + "learning_rate": 1.5564090103929897e-05, + "loss": 0.128, + "step": 29315 + }, + { + "epoch": 1.37, + "learning_rate": 1.5563306318875115e-05, + "loss": 0.1876, + "step": 29320 + }, + { + "epoch": 1.37, + "learning_rate": 1.5562522533820325e-05, + "loss": 0.1958, + "step": 29325 + }, + { + "epoch": 1.37, + "learning_rate": 1.556173874876554e-05, + "loss": 0.3136, + "step": 29330 + }, + { + "epoch": 1.37, + "learning_rate": 1.5560954963710753e-05, + "loss": 0.3116, + "step": 29335 + }, + { + "epoch": 1.37, + "learning_rate": 1.5560171178655967e-05, + "loss": 0.0619, + "step": 29340 + }, + { + "epoch": 1.37, + "learning_rate": 1.555938739360118e-05, + "loss": 0.0497, + "step": 29345 + }, + { + "epoch": 1.37, + "learning_rate": 1.5558603608546395e-05, + "loss": 0.1101, + "step": 29350 + }, + { + "epoch": 1.37, + "learning_rate": 1.5557819823491605e-05, + "loss": 0.1347, + "step": 29355 + }, + { + "epoch": 1.37, + "learning_rate": 1.5557036038436822e-05, + "loss": 0.14, + "step": 29360 + }, + { + "epoch": 1.37, + "learning_rate": 1.5556252253382033e-05, + "loss": 0.2094, + "step": 29365 + }, + { + "epoch": 1.37, + "learning_rate": 1.5555468468327247e-05, + "loss": 0.2115, + "step": 29370 + }, + { + "epoch": 1.37, + "learning_rate": 1.555468468327246e-05, + "loss": 0.23, + "step": 29375 + }, + { + "epoch": 1.37, + "learning_rate": 1.5553900898217675e-05, + "loss": 0.409, + "step": 29380 + }, + { + "epoch": 1.37, + "learning_rate": 1.555311711316289e-05, + "loss": 0.2575, + "step": 29385 + }, + { + "epoch": 1.37, + "learning_rate": 1.55523333281081e-05, + "loss": 0.0701, + "step": 29390 + }, + { + "epoch": 1.37, + "learning_rate": 1.5551549543053316e-05, + "loss": 0.0587, + "step": 29395 + }, + { + "epoch": 1.37, + "learning_rate": 1.5550765757998527e-05, + "loss": 0.1614, + "step": 29400 + }, + { + "epoch": 1.37, + "learning_rate": 1.554998197294374e-05, + "loss": 0.1066, + "step": 29405 + }, + { + "epoch": 1.37, + "learning_rate": 1.5549198187888955e-05, + "loss": 0.1333, + "step": 29410 + }, + { + "epoch": 1.37, + "learning_rate": 1.554841440283417e-05, + "loss": 0.181, + "step": 29415 + }, + { + "epoch": 1.37, + "learning_rate": 1.5547630617779383e-05, + "loss": 0.183, + "step": 29420 + }, + { + "epoch": 1.37, + "learning_rate": 1.5546846832724596e-05, + "loss": 0.2203, + "step": 29425 + }, + { + "epoch": 1.37, + "learning_rate": 1.5546063047669807e-05, + "loss": 0.4626, + "step": 29430 + }, + { + "epoch": 1.37, + "learning_rate": 1.554527926261502e-05, + "loss": 0.2009, + "step": 29435 + }, + { + "epoch": 1.37, + "learning_rate": 1.5544495477560235e-05, + "loss": 0.0562, + "step": 29440 + }, + { + "epoch": 1.37, + "learning_rate": 1.554371169250545e-05, + "loss": 0.1155, + "step": 29445 + }, + { + "epoch": 1.37, + "learning_rate": 1.5542927907450663e-05, + "loss": 0.0612, + "step": 29450 + }, + { + "epoch": 1.37, + "learning_rate": 1.5542144122395873e-05, + "loss": 0.1157, + "step": 29455 + }, + { + "epoch": 1.37, + "learning_rate": 1.554136033734109e-05, + "loss": 0.1621, + "step": 29460 + }, + { + "epoch": 1.37, + "learning_rate": 1.55405765522863e-05, + "loss": 0.0789, + "step": 29465 + }, + { + "epoch": 1.38, + "learning_rate": 1.5539792767231515e-05, + "loss": 0.2351, + "step": 29470 + }, + { + "epoch": 1.38, + "learning_rate": 1.553900898217673e-05, + "loss": 0.2513, + "step": 29475 + }, + { + "epoch": 1.38, + "learning_rate": 1.5538225197121943e-05, + "loss": 0.3753, + "step": 29480 + }, + { + "epoch": 1.38, + "learning_rate": 1.5537441412067157e-05, + "loss": 0.2661, + "step": 29485 + }, + { + "epoch": 1.38, + "learning_rate": 1.553665762701237e-05, + "loss": 0.1098, + "step": 29490 + }, + { + "epoch": 1.38, + "learning_rate": 1.5535873841957584e-05, + "loss": 0.0493, + "step": 29495 + }, + { + "epoch": 1.38, + "learning_rate": 1.5535090056902795e-05, + "loss": 0.1339, + "step": 29500 + }, + { + "epoch": 1.38, + "learning_rate": 1.553430627184801e-05, + "loss": 0.1432, + "step": 29505 + }, + { + "epoch": 1.38, + "learning_rate": 1.5533522486793223e-05, + "loss": 0.107, + "step": 29510 + }, + { + "epoch": 1.38, + "learning_rate": 1.5532738701738437e-05, + "loss": 0.1993, + "step": 29515 + }, + { + "epoch": 1.38, + "learning_rate": 1.553195491668365e-05, + "loss": 0.3002, + "step": 29520 + }, + { + "epoch": 1.38, + "learning_rate": 1.5531171131628864e-05, + "loss": 0.2679, + "step": 29525 + }, + { + "epoch": 1.38, + "learning_rate": 1.5530387346574075e-05, + "loss": 0.3519, + "step": 29530 + }, + { + "epoch": 1.38, + "learning_rate": 1.5529603561519292e-05, + "loss": 0.2758, + "step": 29535 + }, + { + "epoch": 1.38, + "learning_rate": 1.5528819776464503e-05, + "loss": 0.1208, + "step": 29540 + }, + { + "epoch": 1.38, + "learning_rate": 1.5528035991409717e-05, + "loss": 0.0767, + "step": 29545 + }, + { + "epoch": 1.38, + "learning_rate": 1.552725220635493e-05, + "loss": 0.0603, + "step": 29550 + }, + { + "epoch": 1.38, + "learning_rate": 1.5526468421300144e-05, + "loss": 0.0691, + "step": 29555 + }, + { + "epoch": 1.38, + "learning_rate": 1.552568463624536e-05, + "loss": 0.1479, + "step": 29560 + }, + { + "epoch": 1.38, + "learning_rate": 1.552490085119057e-05, + "loss": 0.0891, + "step": 29565 + }, + { + "epoch": 1.38, + "learning_rate": 1.5524117066135783e-05, + "loss": 0.2104, + "step": 29570 + }, + { + "epoch": 1.38, + "learning_rate": 1.5523333281080997e-05, + "loss": 0.2748, + "step": 29575 + }, + { + "epoch": 1.38, + "learning_rate": 1.552254949602621e-05, + "loss": 0.3622, + "step": 29580 + }, + { + "epoch": 1.38, + "learning_rate": 1.5521765710971425e-05, + "loss": 0.252, + "step": 29585 + }, + { + "epoch": 1.38, + "learning_rate": 1.552098192591664e-05, + "loss": 0.079, + "step": 29590 + }, + { + "epoch": 1.38, + "learning_rate": 1.5520198140861852e-05, + "loss": 0.0913, + "step": 29595 + }, + { + "epoch": 1.38, + "learning_rate": 1.5519414355807066e-05, + "loss": 0.1742, + "step": 29600 + }, + { + "epoch": 1.38, + "learning_rate": 1.5518630570752277e-05, + "loss": 0.073, + "step": 29605 + }, + { + "epoch": 1.38, + "learning_rate": 1.5517846785697494e-05, + "loss": 0.091, + "step": 29610 + }, + { + "epoch": 1.38, + "learning_rate": 1.5517063000642705e-05, + "loss": 0.1321, + "step": 29615 + }, + { + "epoch": 1.38, + "learning_rate": 1.551627921558792e-05, + "loss": 0.1131, + "step": 29620 + }, + { + "epoch": 1.38, + "learning_rate": 1.5515495430533132e-05, + "loss": 0.3363, + "step": 29625 + }, + { + "epoch": 1.38, + "learning_rate": 1.5514711645478343e-05, + "loss": 0.3416, + "step": 29630 + }, + { + "epoch": 1.38, + "learning_rate": 1.551392786042356e-05, + "loss": 0.3418, + "step": 29635 + }, + { + "epoch": 1.38, + "learning_rate": 1.551314407536877e-05, + "loss": 0.0653, + "step": 29640 + }, + { + "epoch": 1.38, + "learning_rate": 1.5512360290313985e-05, + "loss": 0.0418, + "step": 29645 + }, + { + "epoch": 1.38, + "learning_rate": 1.55115765052592e-05, + "loss": 0.0941, + "step": 29650 + }, + { + "epoch": 1.38, + "learning_rate": 1.5510792720204412e-05, + "loss": 0.1759, + "step": 29655 + }, + { + "epoch": 1.38, + "learning_rate": 1.5510008935149626e-05, + "loss": 0.111, + "step": 29660 + }, + { + "epoch": 1.38, + "learning_rate": 1.550922515009484e-05, + "loss": 0.1351, + "step": 29665 + }, + { + "epoch": 1.38, + "learning_rate": 1.550844136504005e-05, + "loss": 0.1784, + "step": 29670 + }, + { + "epoch": 1.38, + "learning_rate": 1.5507657579985268e-05, + "loss": 0.2896, + "step": 29675 + }, + { + "epoch": 1.38, + "learning_rate": 1.550687379493048e-05, + "loss": 0.3232, + "step": 29680 + }, + { + "epoch": 1.39, + "learning_rate": 1.5506090009875692e-05, + "loss": 0.5113, + "step": 29685 + }, + { + "epoch": 1.39, + "learning_rate": 1.5505306224820906e-05, + "loss": 0.0731, + "step": 29690 + }, + { + "epoch": 1.39, + "learning_rate": 1.550452243976612e-05, + "loss": 0.0491, + "step": 29695 + }, + { + "epoch": 1.39, + "learning_rate": 1.5503738654711334e-05, + "loss": 0.1016, + "step": 29700 + }, + { + "epoch": 1.39, + "learning_rate": 1.5502954869656545e-05, + "loss": 0.0946, + "step": 29705 + }, + { + "epoch": 1.39, + "learning_rate": 1.5502171084601762e-05, + "loss": 0.1709, + "step": 29710 + }, + { + "epoch": 1.39, + "learning_rate": 1.5501387299546973e-05, + "loss": 0.2002, + "step": 29715 + }, + { + "epoch": 1.39, + "learning_rate": 1.5500603514492186e-05, + "loss": 0.2524, + "step": 29720 + }, + { + "epoch": 1.39, + "learning_rate": 1.54998197294374e-05, + "loss": 0.2481, + "step": 29725 + }, + { + "epoch": 1.39, + "learning_rate": 1.5499035944382614e-05, + "loss": 0.4631, + "step": 29730 + }, + { + "epoch": 1.39, + "learning_rate": 1.5498252159327828e-05, + "loss": 0.273, + "step": 29735 + }, + { + "epoch": 1.39, + "learning_rate": 1.5497468374273042e-05, + "loss": 0.0508, + "step": 29740 + }, + { + "epoch": 1.39, + "learning_rate": 1.5496684589218253e-05, + "loss": 0.1125, + "step": 29745 + }, + { + "epoch": 1.39, + "learning_rate": 1.549590080416347e-05, + "loss": 0.0327, + "step": 29750 + }, + { + "epoch": 1.39, + "learning_rate": 1.549511701910868e-05, + "loss": 0.1278, + "step": 29755 + }, + { + "epoch": 1.39, + "learning_rate": 1.5494333234053894e-05, + "loss": 0.0829, + "step": 29760 + }, + { + "epoch": 1.39, + "learning_rate": 1.5493549448999108e-05, + "loss": 0.1396, + "step": 29765 + }, + { + "epoch": 1.39, + "learning_rate": 1.549276566394432e-05, + "loss": 0.1894, + "step": 29770 + }, + { + "epoch": 1.39, + "learning_rate": 1.5491981878889536e-05, + "loss": 0.2046, + "step": 29775 + }, + { + "epoch": 1.39, + "learning_rate": 1.5491198093834747e-05, + "loss": 0.2948, + "step": 29780 + }, + { + "epoch": 1.39, + "learning_rate": 1.549041430877996e-05, + "loss": 0.3256, + "step": 29785 + }, + { + "epoch": 1.39, + "learning_rate": 1.5489630523725174e-05, + "loss": 0.09, + "step": 29790 + }, + { + "epoch": 1.39, + "learning_rate": 1.5488846738670388e-05, + "loss": 0.0646, + "step": 29795 + }, + { + "epoch": 1.39, + "learning_rate": 1.5488062953615602e-05, + "loss": 0.1158, + "step": 29800 + }, + { + "epoch": 1.39, + "learning_rate": 1.5487279168560816e-05, + "loss": 0.1228, + "step": 29805 + }, + { + "epoch": 1.39, + "learning_rate": 1.548649538350603e-05, + "loss": 0.1435, + "step": 29810 + }, + { + "epoch": 1.39, + "learning_rate": 1.5485711598451244e-05, + "loss": 0.1631, + "step": 29815 + }, + { + "epoch": 1.39, + "learning_rate": 1.5484927813396454e-05, + "loss": 0.0708, + "step": 29820 + }, + { + "epoch": 1.39, + "learning_rate": 1.5484144028341668e-05, + "loss": 0.4099, + "step": 29825 + }, + { + "epoch": 1.39, + "learning_rate": 1.5483360243286882e-05, + "loss": 0.4474, + "step": 29830 + }, + { + "epoch": 1.39, + "learning_rate": 1.5482576458232096e-05, + "loss": 0.3448, + "step": 29835 + }, + { + "epoch": 1.39, + "learning_rate": 1.548179267317731e-05, + "loss": 0.0606, + "step": 29840 + }, + { + "epoch": 1.39, + "learning_rate": 1.548100888812252e-05, + "loss": 0.1094, + "step": 29845 + }, + { + "epoch": 1.39, + "learning_rate": 1.5480225103067738e-05, + "loss": 0.1081, + "step": 29850 + }, + { + "epoch": 1.39, + "learning_rate": 1.547944131801295e-05, + "loss": 0.1361, + "step": 29855 + }, + { + "epoch": 1.39, + "learning_rate": 1.5478657532958162e-05, + "loss": 0.1611, + "step": 29860 + }, + { + "epoch": 1.39, + "learning_rate": 1.5477873747903376e-05, + "loss": 0.1986, + "step": 29865 + }, + { + "epoch": 1.39, + "learning_rate": 1.547708996284859e-05, + "loss": 0.1848, + "step": 29870 + }, + { + "epoch": 1.39, + "learning_rate": 1.5476306177793804e-05, + "loss": 0.3482, + "step": 29875 + }, + { + "epoch": 1.39, + "learning_rate": 1.5475522392739018e-05, + "loss": 0.4131, + "step": 29880 + }, + { + "epoch": 1.39, + "learning_rate": 1.547473860768423e-05, + "loss": 0.2946, + "step": 29885 + }, + { + "epoch": 1.39, + "learning_rate": 1.5473954822629442e-05, + "loss": 0.0356, + "step": 29890 + }, + { + "epoch": 1.39, + "learning_rate": 1.5473171037574656e-05, + "loss": 0.0547, + "step": 29895 + }, + { + "epoch": 1.4, + "learning_rate": 1.547238725251987e-05, + "loss": 0.1375, + "step": 29900 + }, + { + "epoch": 1.4, + "learning_rate": 1.5471603467465084e-05, + "loss": 0.119, + "step": 29905 + }, + { + "epoch": 1.4, + "learning_rate": 1.5470819682410298e-05, + "loss": 0.107, + "step": 29910 + }, + { + "epoch": 1.4, + "learning_rate": 1.5470035897355512e-05, + "loss": 0.1367, + "step": 29915 + }, + { + "epoch": 1.4, + "learning_rate": 1.5469252112300722e-05, + "loss": 0.147, + "step": 29920 + }, + { + "epoch": 1.4, + "learning_rate": 1.546846832724594e-05, + "loss": 0.2123, + "step": 29925 + }, + { + "epoch": 1.4, + "learning_rate": 1.546768454219115e-05, + "loss": 0.3934, + "step": 29930 + }, + { + "epoch": 1.4, + "learning_rate": 1.5466900757136364e-05, + "loss": 0.2301, + "step": 29935 + }, + { + "epoch": 1.4, + "learning_rate": 1.5466116972081578e-05, + "loss": 0.045, + "step": 29940 + }, + { + "epoch": 1.4, + "learning_rate": 1.5465333187026792e-05, + "loss": 0.092, + "step": 29945 + }, + { + "epoch": 1.4, + "learning_rate": 1.5464549401972006e-05, + "loss": 0.0908, + "step": 29950 + }, + { + "epoch": 1.4, + "learning_rate": 1.5463765616917216e-05, + "loss": 0.1098, + "step": 29955 + }, + { + "epoch": 1.4, + "learning_rate": 1.546298183186243e-05, + "loss": 0.0832, + "step": 29960 + }, + { + "epoch": 1.4, + "learning_rate": 1.5462198046807644e-05, + "loss": 0.1968, + "step": 29965 + }, + { + "epoch": 1.4, + "learning_rate": 1.5461414261752858e-05, + "loss": 0.2016, + "step": 29970 + }, + { + "epoch": 1.4, + "learning_rate": 1.5460630476698072e-05, + "loss": 0.2575, + "step": 29975 + }, + { + "epoch": 1.4, + "learning_rate": 1.5459846691643286e-05, + "loss": 0.4952, + "step": 29980 + }, + { + "epoch": 1.4, + "learning_rate": 1.5459062906588496e-05, + "loss": 0.364, + "step": 29985 + }, + { + "epoch": 1.4, + "learning_rate": 1.5458279121533714e-05, + "loss": 0.0521, + "step": 29990 + }, + { + "epoch": 1.4, + "learning_rate": 1.5457495336478924e-05, + "loss": 0.0457, + "step": 29995 + }, + { + "epoch": 1.4, + "learning_rate": 1.5456711551424138e-05, + "loss": 0.0946, + "step": 30000 + }, + { + "epoch": 1.4, + "learning_rate": 1.5455927766369352e-05, + "loss": 0.1691, + "step": 30005 + }, + { + "epoch": 1.4, + "learning_rate": 1.5455143981314566e-05, + "loss": 0.1704, + "step": 30010 + }, + { + "epoch": 1.4, + "learning_rate": 1.545436019625978e-05, + "loss": 0.1653, + "step": 30015 + }, + { + "epoch": 1.4, + "learning_rate": 1.545357641120499e-05, + "loss": 0.1592, + "step": 30020 + }, + { + "epoch": 1.4, + "learning_rate": 1.5452792626150208e-05, + "loss": 0.2251, + "step": 30025 + }, + { + "epoch": 1.4, + "learning_rate": 1.5452008841095418e-05, + "loss": 0.2223, + "step": 30030 + }, + { + "epoch": 1.4, + "learning_rate": 1.5451225056040632e-05, + "loss": 0.3359, + "step": 30035 + }, + { + "epoch": 1.4, + "learning_rate": 1.5450441270985846e-05, + "loss": 0.0442, + "step": 30040 + }, + { + "epoch": 1.4, + "learning_rate": 1.544965748593106e-05, + "loss": 0.1268, + "step": 30045 + }, + { + "epoch": 1.4, + "learning_rate": 1.5448873700876274e-05, + "loss": 0.1322, + "step": 30050 + }, + { + "epoch": 1.4, + "learning_rate": 1.5448089915821488e-05, + "loss": 0.0535, + "step": 30055 + }, + { + "epoch": 1.4, + "learning_rate": 1.5447306130766698e-05, + "loss": 0.1431, + "step": 30060 + }, + { + "epoch": 1.4, + "learning_rate": 1.5446522345711915e-05, + "loss": 0.1551, + "step": 30065 + }, + { + "epoch": 1.4, + "learning_rate": 1.5445738560657126e-05, + "loss": 0.1445, + "step": 30070 + }, + { + "epoch": 1.4, + "learning_rate": 1.544495477560234e-05, + "loss": 0.2025, + "step": 30075 + }, + { + "epoch": 1.4, + "learning_rate": 1.5444170990547554e-05, + "loss": 0.3375, + "step": 30080 + }, + { + "epoch": 1.4, + "learning_rate": 1.5443387205492764e-05, + "loss": 0.2115, + "step": 30085 + }, + { + "epoch": 1.4, + "learning_rate": 1.544260342043798e-05, + "loss": 0.0324, + "step": 30090 + }, + { + "epoch": 1.4, + "learning_rate": 1.5441819635383192e-05, + "loss": 0.0582, + "step": 30095 + }, + { + "epoch": 1.4, + "learning_rate": 1.5441035850328406e-05, + "loss": 0.0829, + "step": 30100 + }, + { + "epoch": 1.4, + "learning_rate": 1.544025206527362e-05, + "loss": 0.0776, + "step": 30105 + }, + { + "epoch": 1.4, + "learning_rate": 1.5439468280218834e-05, + "loss": 0.1564, + "step": 30110 + }, + { + "epoch": 1.41, + "learning_rate": 1.5438684495164048e-05, + "loss": 0.1717, + "step": 30115 + }, + { + "epoch": 1.41, + "learning_rate": 1.543790071010926e-05, + "loss": 0.217, + "step": 30120 + }, + { + "epoch": 1.41, + "learning_rate": 1.5437116925054476e-05, + "loss": 0.2191, + "step": 30125 + }, + { + "epoch": 1.41, + "learning_rate": 1.543633313999969e-05, + "loss": 0.3155, + "step": 30130 + }, + { + "epoch": 1.41, + "learning_rate": 1.54355493549449e-05, + "loss": 0.3595, + "step": 30135 + }, + { + "epoch": 1.41, + "learning_rate": 1.5434765569890117e-05, + "loss": 0.0571, + "step": 30140 + }, + { + "epoch": 1.41, + "learning_rate": 1.5433981784835328e-05, + "loss": 0.0568, + "step": 30145 + }, + { + "epoch": 1.41, + "learning_rate": 1.543319799978054e-05, + "loss": 0.077, + "step": 30150 + }, + { + "epoch": 1.41, + "learning_rate": 1.5432414214725756e-05, + "loss": 0.1147, + "step": 30155 + }, + { + "epoch": 1.41, + "learning_rate": 1.5431630429670966e-05, + "loss": 0.0826, + "step": 30160 + }, + { + "epoch": 1.41, + "learning_rate": 1.5430846644616183e-05, + "loss": 0.1165, + "step": 30165 + }, + { + "epoch": 1.41, + "learning_rate": 1.5430062859561394e-05, + "loss": 0.1987, + "step": 30170 + }, + { + "epoch": 1.41, + "learning_rate": 1.5429279074506608e-05, + "loss": 0.2379, + "step": 30175 + }, + { + "epoch": 1.41, + "learning_rate": 1.542849528945182e-05, + "loss": 0.5167, + "step": 30180 + }, + { + "epoch": 1.41, + "learning_rate": 1.5427711504397036e-05, + "loss": 0.2545, + "step": 30185 + }, + { + "epoch": 1.41, + "learning_rate": 1.542692771934225e-05, + "loss": 0.0899, + "step": 30190 + }, + { + "epoch": 1.41, + "learning_rate": 1.5426143934287463e-05, + "loss": 0.0671, + "step": 30195 + }, + { + "epoch": 1.41, + "learning_rate": 1.5425360149232674e-05, + "loss": 0.1145, + "step": 30200 + }, + { + "epoch": 1.41, + "learning_rate": 1.542457636417789e-05, + "loss": 0.0848, + "step": 30205 + }, + { + "epoch": 1.41, + "learning_rate": 1.5423792579123102e-05, + "loss": 0.208, + "step": 30210 + }, + { + "epoch": 1.41, + "learning_rate": 1.5423008794068316e-05, + "loss": 0.1277, + "step": 30215 + }, + { + "epoch": 1.41, + "learning_rate": 1.542222500901353e-05, + "loss": 0.182, + "step": 30220 + }, + { + "epoch": 1.41, + "learning_rate": 1.5421441223958743e-05, + "loss": 0.2234, + "step": 30225 + }, + { + "epoch": 1.41, + "learning_rate": 1.5420657438903957e-05, + "loss": 0.3056, + "step": 30230 + }, + { + "epoch": 1.41, + "learning_rate": 1.5419873653849168e-05, + "loss": 0.4144, + "step": 30235 + }, + { + "epoch": 1.41, + "learning_rate": 1.5419089868794385e-05, + "loss": 0.0343, + "step": 30240 + }, + { + "epoch": 1.41, + "learning_rate": 1.5418306083739596e-05, + "loss": 0.0805, + "step": 30245 + }, + { + "epoch": 1.41, + "learning_rate": 1.541752229868481e-05, + "loss": 0.1667, + "step": 30250 + }, + { + "epoch": 1.41, + "learning_rate": 1.5416738513630024e-05, + "loss": 0.1355, + "step": 30255 + }, + { + "epoch": 1.41, + "learning_rate": 1.5415954728575237e-05, + "loss": 0.1269, + "step": 30260 + }, + { + "epoch": 1.41, + "learning_rate": 1.541517094352045e-05, + "loss": 0.246, + "step": 30265 + }, + { + "epoch": 1.41, + "learning_rate": 1.5414387158465665e-05, + "loss": 0.1213, + "step": 30270 + }, + { + "epoch": 1.41, + "learning_rate": 1.5413603373410876e-05, + "loss": 0.169, + "step": 30275 + }, + { + "epoch": 1.41, + "learning_rate": 1.541281958835609e-05, + "loss": 0.3212, + "step": 30280 + }, + { + "epoch": 1.41, + "learning_rate": 1.5412035803301304e-05, + "loss": 0.1963, + "step": 30285 + }, + { + "epoch": 1.41, + "learning_rate": 1.5411252018246517e-05, + "loss": 0.0683, + "step": 30290 + }, + { + "epoch": 1.41, + "learning_rate": 1.541046823319173e-05, + "loss": 0.0797, + "step": 30295 + }, + { + "epoch": 1.41, + "learning_rate": 1.5409684448136942e-05, + "loss": 0.0972, + "step": 30300 + }, + { + "epoch": 1.41, + "learning_rate": 1.540890066308216e-05, + "loss": 0.1827, + "step": 30305 + }, + { + "epoch": 1.41, + "learning_rate": 1.540811687802737e-05, + "loss": 0.0909, + "step": 30310 + }, + { + "epoch": 1.41, + "learning_rate": 1.5407333092972584e-05, + "loss": 0.1743, + "step": 30315 + }, + { + "epoch": 1.41, + "learning_rate": 1.5406549307917798e-05, + "loss": 0.1385, + "step": 30320 + }, + { + "epoch": 1.42, + "learning_rate": 1.540576552286301e-05, + "loss": 0.2003, + "step": 30325 + }, + { + "epoch": 1.42, + "learning_rate": 1.5404981737808225e-05, + "loss": 0.4004, + "step": 30330 + }, + { + "epoch": 1.42, + "learning_rate": 1.540419795275344e-05, + "loss": 0.2143, + "step": 30335 + }, + { + "epoch": 1.42, + "learning_rate": 1.5403414167698653e-05, + "loss": 0.0175, + "step": 30340 + }, + { + "epoch": 1.42, + "learning_rate": 1.5402630382643864e-05, + "loss": 0.1199, + "step": 30345 + }, + { + "epoch": 1.42, + "learning_rate": 1.5401846597589078e-05, + "loss": 0.0549, + "step": 30350 + }, + { + "epoch": 1.42, + "learning_rate": 1.540106281253429e-05, + "loss": 0.1376, + "step": 30355 + }, + { + "epoch": 1.42, + "learning_rate": 1.5400279027479505e-05, + "loss": 0.1743, + "step": 30360 + }, + { + "epoch": 1.42, + "learning_rate": 1.539949524242472e-05, + "loss": 0.2547, + "step": 30365 + }, + { + "epoch": 1.42, + "learning_rate": 1.5398711457369933e-05, + "loss": 0.1769, + "step": 30370 + }, + { + "epoch": 1.42, + "learning_rate": 1.5397927672315144e-05, + "loss": 0.3749, + "step": 30375 + }, + { + "epoch": 1.42, + "learning_rate": 1.539714388726036e-05, + "loss": 0.5307, + "step": 30380 + }, + { + "epoch": 1.42, + "learning_rate": 1.539636010220557e-05, + "loss": 0.3359, + "step": 30385 + }, + { + "epoch": 1.42, + "learning_rate": 1.5395576317150785e-05, + "loss": 0.1245, + "step": 30390 + }, + { + "epoch": 1.42, + "learning_rate": 1.5394792532096e-05, + "loss": 0.0377, + "step": 30395 + }, + { + "epoch": 1.42, + "learning_rate": 1.5394008747041213e-05, + "loss": 0.1, + "step": 30400 + }, + { + "epoch": 1.42, + "learning_rate": 1.5393224961986427e-05, + "loss": 0.146, + "step": 30405 + }, + { + "epoch": 1.42, + "learning_rate": 1.5392441176931638e-05, + "loss": 0.1585, + "step": 30410 + }, + { + "epoch": 1.42, + "learning_rate": 1.539165739187685e-05, + "loss": 0.2002, + "step": 30415 + }, + { + "epoch": 1.42, + "learning_rate": 1.5390873606822065e-05, + "loss": 0.2221, + "step": 30420 + }, + { + "epoch": 1.42, + "learning_rate": 1.539008982176728e-05, + "loss": 0.2473, + "step": 30425 + }, + { + "epoch": 1.42, + "learning_rate": 1.5389306036712493e-05, + "loss": 0.3531, + "step": 30430 + }, + { + "epoch": 1.42, + "learning_rate": 1.5388522251657707e-05, + "loss": 0.3376, + "step": 30435 + }, + { + "epoch": 1.42, + "learning_rate": 1.538773846660292e-05, + "loss": 0.1212, + "step": 30440 + }, + { + "epoch": 1.42, + "learning_rate": 1.5386954681548135e-05, + "loss": 0.0599, + "step": 30445 + }, + { + "epoch": 1.42, + "learning_rate": 1.5386170896493345e-05, + "loss": 0.0923, + "step": 30450 + }, + { + "epoch": 1.42, + "learning_rate": 1.5385387111438563e-05, + "loss": 0.0752, + "step": 30455 + }, + { + "epoch": 1.42, + "learning_rate": 1.5384603326383773e-05, + "loss": 0.074, + "step": 30460 + }, + { + "epoch": 1.42, + "learning_rate": 1.5383819541328987e-05, + "loss": 0.0945, + "step": 30465 + }, + { + "epoch": 1.42, + "learning_rate": 1.53830357562742e-05, + "loss": 0.2006, + "step": 30470 + }, + { + "epoch": 1.42, + "learning_rate": 1.538225197121941e-05, + "loss": 0.2685, + "step": 30475 + }, + { + "epoch": 1.42, + "learning_rate": 1.538146818616463e-05, + "loss": 0.2831, + "step": 30480 + }, + { + "epoch": 1.42, + "learning_rate": 1.538068440110984e-05, + "loss": 0.1803, + "step": 30485 + }, + { + "epoch": 1.42, + "learning_rate": 1.5379900616055053e-05, + "loss": 0.0256, + "step": 30490 + }, + { + "epoch": 1.42, + "learning_rate": 1.5379116831000267e-05, + "loss": 0.0892, + "step": 30495 + }, + { + "epoch": 1.42, + "learning_rate": 1.537833304594548e-05, + "loss": 0.1554, + "step": 30500 + }, + { + "epoch": 1.42, + "learning_rate": 1.5377549260890695e-05, + "loss": 0.1182, + "step": 30505 + }, + { + "epoch": 1.42, + "learning_rate": 1.537676547583591e-05, + "loss": 0.1168, + "step": 30510 + }, + { + "epoch": 1.42, + "learning_rate": 1.537598169078112e-05, + "loss": 0.1539, + "step": 30515 + }, + { + "epoch": 1.42, + "learning_rate": 1.5375197905726337e-05, + "loss": 0.229, + "step": 30520 + }, + { + "epoch": 1.42, + "learning_rate": 1.5374414120671547e-05, + "loss": 0.2612, + "step": 30525 + }, + { + "epoch": 1.42, + "learning_rate": 1.537363033561676e-05, + "loss": 0.4199, + "step": 30530 + }, + { + "epoch": 1.42, + "learning_rate": 1.5372846550561975e-05, + "loss": 0.3006, + "step": 30535 + }, + { + "epoch": 1.43, + "learning_rate": 1.537206276550719e-05, + "loss": 0.0404, + "step": 30540 + }, + { + "epoch": 1.43, + "learning_rate": 1.5371278980452403e-05, + "loss": 0.0872, + "step": 30545 + }, + { + "epoch": 1.43, + "learning_rate": 1.5370495195397613e-05, + "loss": 0.1091, + "step": 30550 + }, + { + "epoch": 1.43, + "learning_rate": 1.536971141034283e-05, + "loss": 0.0654, + "step": 30555 + }, + { + "epoch": 1.43, + "learning_rate": 1.536892762528804e-05, + "loss": 0.1402, + "step": 30560 + }, + { + "epoch": 1.43, + "learning_rate": 1.5368143840233255e-05, + "loss": 0.1676, + "step": 30565 + }, + { + "epoch": 1.43, + "learning_rate": 1.536736005517847e-05, + "loss": 0.1912, + "step": 30570 + }, + { + "epoch": 1.43, + "learning_rate": 1.5366576270123683e-05, + "loss": 0.3351, + "step": 30575 + }, + { + "epoch": 1.43, + "learning_rate": 1.5365792485068897e-05, + "loss": 0.367, + "step": 30580 + }, + { + "epoch": 1.43, + "learning_rate": 1.536500870001411e-05, + "loss": 0.2385, + "step": 30585 + }, + { + "epoch": 1.43, + "learning_rate": 1.536422491495932e-05, + "loss": 0.0754, + "step": 30590 + }, + { + "epoch": 1.43, + "learning_rate": 1.536344112990454e-05, + "loss": 0.0664, + "step": 30595 + }, + { + "epoch": 1.43, + "learning_rate": 1.536265734484975e-05, + "loss": 0.1467, + "step": 30600 + }, + { + "epoch": 1.43, + "learning_rate": 1.5361873559794963e-05, + "loss": 0.0865, + "step": 30605 + }, + { + "epoch": 1.43, + "learning_rate": 1.5361089774740177e-05, + "loss": 0.1208, + "step": 30610 + }, + { + "epoch": 1.43, + "learning_rate": 1.5360305989685387e-05, + "loss": 0.1351, + "step": 30615 + }, + { + "epoch": 1.43, + "learning_rate": 1.5359522204630605e-05, + "loss": 0.1539, + "step": 30620 + }, + { + "epoch": 1.43, + "learning_rate": 1.5358738419575815e-05, + "loss": 0.2201, + "step": 30625 + }, + { + "epoch": 1.43, + "learning_rate": 1.535795463452103e-05, + "loss": 0.3883, + "step": 30630 + }, + { + "epoch": 1.43, + "learning_rate": 1.5357170849466243e-05, + "loss": 0.2628, + "step": 30635 + }, + { + "epoch": 1.43, + "learning_rate": 1.5356387064411457e-05, + "loss": 0.0461, + "step": 30640 + }, + { + "epoch": 1.43, + "learning_rate": 1.535560327935667e-05, + "loss": 0.0915, + "step": 30645 + }, + { + "epoch": 1.43, + "learning_rate": 1.5354819494301885e-05, + "loss": 0.0698, + "step": 30650 + }, + { + "epoch": 1.43, + "learning_rate": 1.53540357092471e-05, + "loss": 0.1448, + "step": 30655 + }, + { + "epoch": 1.43, + "learning_rate": 1.5353251924192313e-05, + "loss": 0.1386, + "step": 30660 + }, + { + "epoch": 1.43, + "learning_rate": 1.5352468139137523e-05, + "loss": 0.169, + "step": 30665 + }, + { + "epoch": 1.43, + "learning_rate": 1.5351684354082737e-05, + "loss": 0.1716, + "step": 30670 + }, + { + "epoch": 1.43, + "learning_rate": 1.535090056902795e-05, + "loss": 0.1982, + "step": 30675 + }, + { + "epoch": 1.43, + "learning_rate": 1.5350116783973165e-05, + "loss": 0.3828, + "step": 30680 + }, + { + "epoch": 1.43, + "learning_rate": 1.534933299891838e-05, + "loss": 0.2975, + "step": 30685 + }, + { + "epoch": 1.43, + "learning_rate": 1.534854921386359e-05, + "loss": 0.0999, + "step": 30690 + }, + { + "epoch": 1.43, + "learning_rate": 1.5347765428808807e-05, + "loss": 0.0746, + "step": 30695 + }, + { + "epoch": 1.43, + "learning_rate": 1.5346981643754017e-05, + "loss": 0.0579, + "step": 30700 + }, + { + "epoch": 1.43, + "learning_rate": 1.534619785869923e-05, + "loss": 0.1278, + "step": 30705 + }, + { + "epoch": 1.43, + "learning_rate": 1.5345414073644445e-05, + "loss": 0.1335, + "step": 30710 + }, + { + "epoch": 1.43, + "learning_rate": 1.534463028858966e-05, + "loss": 0.1177, + "step": 30715 + }, + { + "epoch": 1.43, + "learning_rate": 1.5343846503534873e-05, + "loss": 0.1861, + "step": 30720 + }, + { + "epoch": 1.43, + "learning_rate": 1.5343062718480087e-05, + "loss": 0.2215, + "step": 30725 + }, + { + "epoch": 1.43, + "learning_rate": 1.5342278933425297e-05, + "loss": 0.3913, + "step": 30730 + }, + { + "epoch": 1.43, + "learning_rate": 1.5341495148370514e-05, + "loss": 0.332, + "step": 30735 + }, + { + "epoch": 1.43, + "learning_rate": 1.5340711363315725e-05, + "loss": 0.0479, + "step": 30740 + }, + { + "epoch": 1.43, + "learning_rate": 1.533992757826094e-05, + "loss": 0.0902, + "step": 30745 + }, + { + "epoch": 1.43, + "learning_rate": 1.5339143793206153e-05, + "loss": 0.0383, + "step": 30750 + }, + { + "epoch": 1.44, + "learning_rate": 1.5338360008151367e-05, + "loss": 0.0873, + "step": 30755 + }, + { + "epoch": 1.44, + "learning_rate": 1.533757622309658e-05, + "loss": 0.058, + "step": 30760 + }, + { + "epoch": 1.44, + "learning_rate": 1.533679243804179e-05, + "loss": 0.1621, + "step": 30765 + }, + { + "epoch": 1.44, + "learning_rate": 1.533600865298701e-05, + "loss": 0.1878, + "step": 30770 + }, + { + "epoch": 1.44, + "learning_rate": 1.533522486793222e-05, + "loss": 0.3271, + "step": 30775 + }, + { + "epoch": 1.44, + "learning_rate": 1.5334441082877433e-05, + "loss": 0.2911, + "step": 30780 + }, + { + "epoch": 1.44, + "learning_rate": 1.5333657297822647e-05, + "loss": 0.2349, + "step": 30785 + }, + { + "epoch": 1.44, + "learning_rate": 1.533287351276786e-05, + "loss": 0.1201, + "step": 30790 + }, + { + "epoch": 1.44, + "learning_rate": 1.5332089727713075e-05, + "loss": 0.0789, + "step": 30795 + }, + { + "epoch": 1.44, + "learning_rate": 1.533130594265829e-05, + "loss": 0.0823, + "step": 30800 + }, + { + "epoch": 1.44, + "learning_rate": 1.53305221576035e-05, + "loss": 0.1153, + "step": 30805 + }, + { + "epoch": 1.44, + "learning_rate": 1.5329738372548713e-05, + "loss": 0.1054, + "step": 30810 + }, + { + "epoch": 1.44, + "learning_rate": 1.5328954587493927e-05, + "loss": 0.1376, + "step": 30815 + }, + { + "epoch": 1.44, + "learning_rate": 1.532817080243914e-05, + "loss": 0.1513, + "step": 30820 + }, + { + "epoch": 1.44, + "learning_rate": 1.5327387017384355e-05, + "loss": 0.2324, + "step": 30825 + }, + { + "epoch": 1.44, + "learning_rate": 1.5326603232329565e-05, + "loss": 0.3203, + "step": 30830 + }, + { + "epoch": 1.44, + "learning_rate": 1.5325819447274782e-05, + "loss": 0.4077, + "step": 30835 + }, + { + "epoch": 1.44, + "learning_rate": 1.5325035662219993e-05, + "loss": 0.0783, + "step": 30840 + }, + { + "epoch": 1.44, + "learning_rate": 1.5324251877165207e-05, + "loss": 0.0441, + "step": 30845 + }, + { + "epoch": 1.44, + "learning_rate": 1.532346809211042e-05, + "loss": 0.0616, + "step": 30850 + }, + { + "epoch": 1.44, + "learning_rate": 1.5322684307055635e-05, + "loss": 0.1068, + "step": 30855 + }, + { + "epoch": 1.44, + "learning_rate": 1.532190052200085e-05, + "loss": 0.1137, + "step": 30860 + }, + { + "epoch": 1.44, + "learning_rate": 1.5321116736946062e-05, + "loss": 0.2083, + "step": 30865 + }, + { + "epoch": 1.44, + "learning_rate": 1.5320332951891276e-05, + "loss": 0.1234, + "step": 30870 + }, + { + "epoch": 1.44, + "learning_rate": 1.5319549166836487e-05, + "loss": 0.2183, + "step": 30875 + }, + { + "epoch": 1.44, + "learning_rate": 1.53187653817817e-05, + "loss": 0.3248, + "step": 30880 + }, + { + "epoch": 1.44, + "learning_rate": 1.5317981596726915e-05, + "loss": 0.2879, + "step": 30885 + }, + { + "epoch": 1.44, + "learning_rate": 1.531719781167213e-05, + "loss": 0.0633, + "step": 30890 + }, + { + "epoch": 1.44, + "learning_rate": 1.5316414026617342e-05, + "loss": 0.0492, + "step": 30895 + }, + { + "epoch": 1.44, + "learning_rate": 1.5315630241562556e-05, + "loss": 0.0931, + "step": 30900 + }, + { + "epoch": 1.44, + "learning_rate": 1.5314846456507767e-05, + "loss": 0.0612, + "step": 30905 + }, + { + "epoch": 1.44, + "learning_rate": 1.5314062671452984e-05, + "loss": 0.0813, + "step": 30910 + }, + { + "epoch": 1.44, + "learning_rate": 1.5313278886398195e-05, + "loss": 0.1622, + "step": 30915 + }, + { + "epoch": 1.44, + "learning_rate": 1.531249510134341e-05, + "loss": 0.1378, + "step": 30920 + }, + { + "epoch": 1.44, + "learning_rate": 1.5311711316288623e-05, + "loss": 0.1609, + "step": 30925 + }, + { + "epoch": 1.44, + "learning_rate": 1.5310927531233836e-05, + "loss": 0.5023, + "step": 30930 + }, + { + "epoch": 1.44, + "learning_rate": 1.531014374617905e-05, + "loss": 0.3593, + "step": 30935 + }, + { + "epoch": 1.44, + "learning_rate": 1.530935996112426e-05, + "loss": 0.1545, + "step": 30940 + }, + { + "epoch": 1.44, + "learning_rate": 1.5308576176069475e-05, + "loss": 0.0696, + "step": 30945 + }, + { + "epoch": 1.44, + "learning_rate": 1.530779239101469e-05, + "loss": 0.0694, + "step": 30950 + }, + { + "epoch": 1.44, + "learning_rate": 1.5307008605959903e-05, + "loss": 0.1546, + "step": 30955 + }, + { + "epoch": 1.44, + "learning_rate": 1.5306224820905116e-05, + "loss": 0.1018, + "step": 30960 + }, + { + "epoch": 1.44, + "learning_rate": 1.530544103585033e-05, + "loss": 0.1363, + "step": 30965 + }, + { + "epoch": 1.45, + "learning_rate": 1.5304657250795544e-05, + "loss": 0.3256, + "step": 30970 + }, + { + "epoch": 1.45, + "learning_rate": 1.5303873465740758e-05, + "loss": 0.3606, + "step": 30975 + }, + { + "epoch": 1.45, + "learning_rate": 1.530308968068597e-05, + "loss": 0.331, + "step": 30980 + }, + { + "epoch": 1.45, + "learning_rate": 1.5302305895631186e-05, + "loss": 0.2471, + "step": 30985 + }, + { + "epoch": 1.45, + "learning_rate": 1.5301522110576397e-05, + "loss": 0.042, + "step": 30990 + }, + { + "epoch": 1.45, + "learning_rate": 1.530073832552161e-05, + "loss": 0.0757, + "step": 30995 + }, + { + "epoch": 1.45, + "learning_rate": 1.5299954540466824e-05, + "loss": 0.0695, + "step": 31000 + }, + { + "epoch": 1.45, + "learning_rate": 1.5299170755412035e-05, + "loss": 0.1101, + "step": 31005 + }, + { + "epoch": 1.45, + "learning_rate": 1.5298386970357252e-05, + "loss": 0.1784, + "step": 31010 + }, + { + "epoch": 1.45, + "learning_rate": 1.5297603185302463e-05, + "loss": 0.1233, + "step": 31015 + }, + { + "epoch": 1.45, + "learning_rate": 1.5296819400247677e-05, + "loss": 0.2507, + "step": 31020 + }, + { + "epoch": 1.45, + "learning_rate": 1.529603561519289e-05, + "loss": 0.248, + "step": 31025 + }, + { + "epoch": 1.45, + "learning_rate": 1.5295251830138104e-05, + "loss": 0.4157, + "step": 31030 + }, + { + "epoch": 1.45, + "learning_rate": 1.5294468045083318e-05, + "loss": 0.2699, + "step": 31035 + }, + { + "epoch": 1.45, + "learning_rate": 1.5293684260028532e-05, + "loss": 0.1, + "step": 31040 + }, + { + "epoch": 1.45, + "learning_rate": 1.5292900474973743e-05, + "loss": 0.0886, + "step": 31045 + }, + { + "epoch": 1.45, + "learning_rate": 1.529211668991896e-05, + "loss": 0.0945, + "step": 31050 + }, + { + "epoch": 1.45, + "learning_rate": 1.529133290486417e-05, + "loss": 0.1045, + "step": 31055 + }, + { + "epoch": 1.45, + "learning_rate": 1.5290549119809384e-05, + "loss": 0.1411, + "step": 31060 + }, + { + "epoch": 1.45, + "learning_rate": 1.52897653347546e-05, + "loss": 0.167, + "step": 31065 + }, + { + "epoch": 1.45, + "learning_rate": 1.5288981549699812e-05, + "loss": 0.2069, + "step": 31070 + }, + { + "epoch": 1.45, + "learning_rate": 1.5288197764645026e-05, + "loss": 0.1847, + "step": 31075 + }, + { + "epoch": 1.45, + "learning_rate": 1.5287413979590237e-05, + "loss": 0.2041, + "step": 31080 + }, + { + "epoch": 1.45, + "learning_rate": 1.5286630194535454e-05, + "loss": 0.2017, + "step": 31085 + }, + { + "epoch": 1.45, + "learning_rate": 1.5285846409480664e-05, + "loss": 0.0352, + "step": 31090 + }, + { + "epoch": 1.45, + "learning_rate": 1.528506262442588e-05, + "loss": 0.0676, + "step": 31095 + }, + { + "epoch": 1.45, + "learning_rate": 1.5284278839371092e-05, + "loss": 0.1651, + "step": 31100 + }, + { + "epoch": 1.45, + "learning_rate": 1.5283495054316306e-05, + "loss": 0.095, + "step": 31105 + }, + { + "epoch": 1.45, + "learning_rate": 1.528271126926152e-05, + "loss": 0.1385, + "step": 31110 + }, + { + "epoch": 1.45, + "learning_rate": 1.5281927484206734e-05, + "loss": 0.16, + "step": 31115 + }, + { + "epoch": 1.45, + "learning_rate": 1.5281143699151944e-05, + "loss": 0.2099, + "step": 31120 + }, + { + "epoch": 1.45, + "learning_rate": 1.5280359914097162e-05, + "loss": 0.3167, + "step": 31125 + }, + { + "epoch": 1.45, + "learning_rate": 1.5279576129042372e-05, + "loss": 0.2805, + "step": 31130 + }, + { + "epoch": 1.45, + "learning_rate": 1.5278792343987586e-05, + "loss": 0.2409, + "step": 31135 + }, + { + "epoch": 1.45, + "learning_rate": 1.52780085589328e-05, + "loss": 0.0747, + "step": 31140 + }, + { + "epoch": 1.45, + "learning_rate": 1.527722477387801e-05, + "loss": 0.0464, + "step": 31145 + }, + { + "epoch": 1.45, + "learning_rate": 1.5276440988823228e-05, + "loss": 0.0867, + "step": 31150 + }, + { + "epoch": 1.45, + "learning_rate": 1.527565720376844e-05, + "loss": 0.1238, + "step": 31155 + }, + { + "epoch": 1.45, + "learning_rate": 1.5274873418713652e-05, + "loss": 0.1296, + "step": 31160 + }, + { + "epoch": 1.45, + "learning_rate": 1.5274089633658866e-05, + "loss": 0.1621, + "step": 31165 + }, + { + "epoch": 1.45, + "learning_rate": 1.527330584860408e-05, + "loss": 0.1919, + "step": 31170 + }, + { + "epoch": 1.45, + "learning_rate": 1.5272522063549294e-05, + "loss": 0.3081, + "step": 31175 + }, + { + "epoch": 1.45, + "learning_rate": 1.5271738278494508e-05, + "loss": 0.3428, + "step": 31180 + }, + { + "epoch": 1.46, + "learning_rate": 1.5270954493439722e-05, + "loss": 0.273, + "step": 31185 + }, + { + "epoch": 1.46, + "learning_rate": 1.5270170708384936e-05, + "loss": 0.1131, + "step": 31190 + }, + { + "epoch": 1.46, + "learning_rate": 1.5269386923330146e-05, + "loss": 0.0283, + "step": 31195 + }, + { + "epoch": 1.46, + "learning_rate": 1.526860313827536e-05, + "loss": 0.0582, + "step": 31200 + }, + { + "epoch": 1.46, + "learning_rate": 1.5267819353220574e-05, + "loss": 0.0585, + "step": 31205 + }, + { + "epoch": 1.46, + "learning_rate": 1.5267035568165788e-05, + "loss": 0.0735, + "step": 31210 + }, + { + "epoch": 1.46, + "learning_rate": 1.5266251783111002e-05, + "loss": 0.272, + "step": 31215 + }, + { + "epoch": 1.46, + "learning_rate": 1.5265467998056212e-05, + "loss": 0.1506, + "step": 31220 + }, + { + "epoch": 1.46, + "learning_rate": 1.526468421300143e-05, + "loss": 0.2312, + "step": 31225 + }, + { + "epoch": 1.46, + "learning_rate": 1.526390042794664e-05, + "loss": 0.2993, + "step": 31230 + }, + { + "epoch": 1.46, + "learning_rate": 1.5263116642891854e-05, + "loss": 0.3461, + "step": 31235 + }, + { + "epoch": 1.46, + "learning_rate": 1.5262332857837068e-05, + "loss": 0.0388, + "step": 31240 + }, + { + "epoch": 1.46, + "learning_rate": 1.5261549072782282e-05, + "loss": 0.0356, + "step": 31245 + }, + { + "epoch": 1.46, + "learning_rate": 1.5260765287727496e-05, + "loss": 0.1398, + "step": 31250 + }, + { + "epoch": 1.46, + "learning_rate": 1.525998150267271e-05, + "loss": 0.0815, + "step": 31255 + }, + { + "epoch": 1.46, + "learning_rate": 1.525919771761792e-05, + "loss": 0.1347, + "step": 31260 + }, + { + "epoch": 1.46, + "learning_rate": 1.5258413932563134e-05, + "loss": 0.1489, + "step": 31265 + }, + { + "epoch": 1.46, + "learning_rate": 1.5257630147508348e-05, + "loss": 0.1687, + "step": 31270 + }, + { + "epoch": 1.46, + "learning_rate": 1.5256846362453562e-05, + "loss": 0.1338, + "step": 31275 + }, + { + "epoch": 1.46, + "learning_rate": 1.5256062577398776e-05, + "loss": 0.3636, + "step": 31280 + }, + { + "epoch": 1.46, + "learning_rate": 1.5255278792343988e-05, + "loss": 0.2142, + "step": 31285 + }, + { + "epoch": 1.46, + "learning_rate": 1.5254495007289202e-05, + "loss": 0.0323, + "step": 31290 + }, + { + "epoch": 1.46, + "learning_rate": 1.5253711222234416e-05, + "loss": 0.0875, + "step": 31295 + }, + { + "epoch": 1.46, + "learning_rate": 1.525292743717963e-05, + "loss": 0.0987, + "step": 31300 + }, + { + "epoch": 1.46, + "learning_rate": 1.5252143652124842e-05, + "loss": 0.1468, + "step": 31305 + }, + { + "epoch": 1.46, + "learning_rate": 1.5251359867070058e-05, + "loss": 0.0654, + "step": 31310 + }, + { + "epoch": 1.46, + "learning_rate": 1.525057608201527e-05, + "loss": 0.1286, + "step": 31315 + }, + { + "epoch": 1.46, + "learning_rate": 1.5249792296960484e-05, + "loss": 0.1423, + "step": 31320 + }, + { + "epoch": 1.46, + "learning_rate": 1.5249008511905696e-05, + "loss": 0.2072, + "step": 31325 + }, + { + "epoch": 1.46, + "learning_rate": 1.5248224726850908e-05, + "loss": 0.3649, + "step": 31330 + }, + { + "epoch": 1.46, + "learning_rate": 1.5247440941796124e-05, + "loss": 0.366, + "step": 31335 + }, + { + "epoch": 1.46, + "learning_rate": 1.5246657156741336e-05, + "loss": 0.0274, + "step": 31340 + }, + { + "epoch": 1.46, + "learning_rate": 1.524587337168655e-05, + "loss": 0.0756, + "step": 31345 + }, + { + "epoch": 1.46, + "learning_rate": 1.5245089586631762e-05, + "loss": 0.1292, + "step": 31350 + }, + { + "epoch": 1.46, + "learning_rate": 1.5244305801576978e-05, + "loss": 0.1784, + "step": 31355 + }, + { + "epoch": 1.46, + "learning_rate": 1.524352201652219e-05, + "loss": 0.1282, + "step": 31360 + }, + { + "epoch": 1.46, + "learning_rate": 1.5242738231467404e-05, + "loss": 0.2259, + "step": 31365 + }, + { + "epoch": 1.46, + "learning_rate": 1.5241954446412616e-05, + "loss": 0.2356, + "step": 31370 + }, + { + "epoch": 1.46, + "learning_rate": 1.5241170661357832e-05, + "loss": 0.2162, + "step": 31375 + }, + { + "epoch": 1.46, + "learning_rate": 1.5240386876303044e-05, + "loss": 0.2779, + "step": 31380 + }, + { + "epoch": 1.46, + "learning_rate": 1.5239603091248258e-05, + "loss": 0.3447, + "step": 31385 + }, + { + "epoch": 1.46, + "learning_rate": 1.523881930619347e-05, + "loss": 0.0535, + "step": 31390 + }, + { + "epoch": 1.46, + "learning_rate": 1.5238035521138684e-05, + "loss": 0.1216, + "step": 31395 + }, + { + "epoch": 1.47, + "learning_rate": 1.5237251736083898e-05, + "loss": 0.0795, + "step": 31400 + }, + { + "epoch": 1.47, + "learning_rate": 1.523646795102911e-05, + "loss": 0.1059, + "step": 31405 + }, + { + "epoch": 1.47, + "learning_rate": 1.5235684165974326e-05, + "loss": 0.079, + "step": 31410 + }, + { + "epoch": 1.47, + "learning_rate": 1.5234900380919538e-05, + "loss": 0.1216, + "step": 31415 + }, + { + "epoch": 1.47, + "learning_rate": 1.5234116595864752e-05, + "loss": 0.1635, + "step": 31420 + }, + { + "epoch": 1.47, + "learning_rate": 1.5233332810809964e-05, + "loss": 0.1558, + "step": 31425 + }, + { + "epoch": 1.47, + "learning_rate": 1.523254902575518e-05, + "loss": 0.405, + "step": 31430 + }, + { + "epoch": 1.47, + "learning_rate": 1.5231765240700392e-05, + "loss": 0.4419, + "step": 31435 + }, + { + "epoch": 1.47, + "learning_rate": 1.5230981455645606e-05, + "loss": 0.0325, + "step": 31440 + }, + { + "epoch": 1.47, + "learning_rate": 1.5230197670590818e-05, + "loss": 0.1278, + "step": 31445 + }, + { + "epoch": 1.47, + "learning_rate": 1.5229413885536033e-05, + "loss": 0.1036, + "step": 31450 + }, + { + "epoch": 1.47, + "learning_rate": 1.5228630100481246e-05, + "loss": 0.1107, + "step": 31455 + }, + { + "epoch": 1.47, + "learning_rate": 1.5227846315426458e-05, + "loss": 0.0893, + "step": 31460 + }, + { + "epoch": 1.47, + "learning_rate": 1.5227062530371672e-05, + "loss": 0.1751, + "step": 31465 + }, + { + "epoch": 1.47, + "learning_rate": 1.5226278745316884e-05, + "loss": 0.1661, + "step": 31470 + }, + { + "epoch": 1.47, + "learning_rate": 1.52254949602621e-05, + "loss": 0.1933, + "step": 31475 + }, + { + "epoch": 1.47, + "learning_rate": 1.5224711175207312e-05, + "loss": 0.2263, + "step": 31480 + }, + { + "epoch": 1.47, + "learning_rate": 1.5223927390152526e-05, + "loss": 0.3486, + "step": 31485 + }, + { + "epoch": 1.47, + "learning_rate": 1.522314360509774e-05, + "loss": 0.0391, + "step": 31490 + }, + { + "epoch": 1.47, + "learning_rate": 1.5222359820042954e-05, + "loss": 0.0702, + "step": 31495 + }, + { + "epoch": 1.47, + "learning_rate": 1.5221576034988166e-05, + "loss": 0.0595, + "step": 31500 + }, + { + "epoch": 1.47, + "learning_rate": 1.522079224993338e-05, + "loss": 0.0184, + "step": 31505 + }, + { + "epoch": 1.47, + "learning_rate": 1.5220008464878594e-05, + "loss": 0.1383, + "step": 31510 + }, + { + "epoch": 1.47, + "learning_rate": 1.5219224679823807e-05, + "loss": 0.1519, + "step": 31515 + }, + { + "epoch": 1.47, + "learning_rate": 1.521844089476902e-05, + "loss": 0.1777, + "step": 31520 + }, + { + "epoch": 1.47, + "learning_rate": 1.5217657109714232e-05, + "loss": 0.2411, + "step": 31525 + }, + { + "epoch": 1.47, + "learning_rate": 1.5216873324659448e-05, + "loss": 0.3028, + "step": 31530 + }, + { + "epoch": 1.47, + "learning_rate": 1.521608953960466e-05, + "loss": 0.379, + "step": 31535 + }, + { + "epoch": 1.47, + "learning_rate": 1.5215305754549874e-05, + "loss": 0.0478, + "step": 31540 + }, + { + "epoch": 1.47, + "learning_rate": 1.5214521969495086e-05, + "loss": 0.0623, + "step": 31545 + }, + { + "epoch": 1.47, + "learning_rate": 1.5213738184440301e-05, + "loss": 0.0523, + "step": 31550 + }, + { + "epoch": 1.47, + "learning_rate": 1.5212954399385514e-05, + "loss": 0.1509, + "step": 31555 + }, + { + "epoch": 1.47, + "learning_rate": 1.5212170614330728e-05, + "loss": 0.1149, + "step": 31560 + }, + { + "epoch": 1.47, + "learning_rate": 1.521138682927594e-05, + "loss": 0.1829, + "step": 31565 + }, + { + "epoch": 1.47, + "learning_rate": 1.5210603044221155e-05, + "loss": 0.1609, + "step": 31570 + }, + { + "epoch": 1.47, + "learning_rate": 1.5209819259166368e-05, + "loss": 0.2295, + "step": 31575 + }, + { + "epoch": 1.47, + "learning_rate": 1.5209035474111581e-05, + "loss": 0.486, + "step": 31580 + }, + { + "epoch": 1.47, + "learning_rate": 1.5208251689056794e-05, + "loss": 0.2702, + "step": 31585 + }, + { + "epoch": 1.47, + "learning_rate": 1.5207467904002008e-05, + "loss": 0.0594, + "step": 31590 + }, + { + "epoch": 1.47, + "learning_rate": 1.5206684118947222e-05, + "loss": 0.1273, + "step": 31595 + }, + { + "epoch": 1.47, + "learning_rate": 1.5205900333892434e-05, + "loss": 0.0883, + "step": 31600 + }, + { + "epoch": 1.47, + "learning_rate": 1.5205116548837648e-05, + "loss": 0.0647, + "step": 31605 + }, + { + "epoch": 1.47, + "learning_rate": 1.5204332763782862e-05, + "loss": 0.0872, + "step": 31610 + }, + { + "epoch": 1.48, + "learning_rate": 1.5203548978728075e-05, + "loss": 0.1148, + "step": 31615 + }, + { + "epoch": 1.48, + "learning_rate": 1.5202765193673288e-05, + "loss": 0.1226, + "step": 31620 + }, + { + "epoch": 1.48, + "learning_rate": 1.5201981408618503e-05, + "loss": 0.2385, + "step": 31625 + }, + { + "epoch": 1.48, + "learning_rate": 1.5201197623563715e-05, + "loss": 0.3862, + "step": 31630 + }, + { + "epoch": 1.48, + "learning_rate": 1.520041383850893e-05, + "loss": 0.3206, + "step": 31635 + }, + { + "epoch": 1.48, + "learning_rate": 1.5199630053454142e-05, + "loss": 0.0743, + "step": 31640 + }, + { + "epoch": 1.48, + "learning_rate": 1.5198846268399357e-05, + "loss": 0.0545, + "step": 31645 + }, + { + "epoch": 1.48, + "learning_rate": 1.519806248334457e-05, + "loss": 0.0978, + "step": 31650 + }, + { + "epoch": 1.48, + "learning_rate": 1.5197278698289782e-05, + "loss": 0.0675, + "step": 31655 + }, + { + "epoch": 1.48, + "learning_rate": 1.5196494913234995e-05, + "loss": 0.1666, + "step": 31660 + }, + { + "epoch": 1.48, + "learning_rate": 1.5195711128180208e-05, + "loss": 0.2834, + "step": 31665 + }, + { + "epoch": 1.48, + "learning_rate": 1.5194927343125423e-05, + "loss": 0.1719, + "step": 31670 + }, + { + "epoch": 1.48, + "learning_rate": 1.5194143558070636e-05, + "loss": 0.2468, + "step": 31675 + }, + { + "epoch": 1.48, + "learning_rate": 1.519335977301585e-05, + "loss": 0.3821, + "step": 31680 + }, + { + "epoch": 1.48, + "learning_rate": 1.5192575987961062e-05, + "loss": 0.3466, + "step": 31685 + }, + { + "epoch": 1.48, + "learning_rate": 1.5191792202906277e-05, + "loss": 0.0438, + "step": 31690 + }, + { + "epoch": 1.48, + "learning_rate": 1.519100841785149e-05, + "loss": 0.066, + "step": 31695 + }, + { + "epoch": 1.48, + "learning_rate": 1.5190224632796703e-05, + "loss": 0.083, + "step": 31700 + }, + { + "epoch": 1.48, + "learning_rate": 1.5189440847741916e-05, + "loss": 0.0671, + "step": 31705 + }, + { + "epoch": 1.48, + "learning_rate": 1.5188657062687131e-05, + "loss": 0.1106, + "step": 31710 + }, + { + "epoch": 1.48, + "learning_rate": 1.5187873277632343e-05, + "loss": 0.1968, + "step": 31715 + }, + { + "epoch": 1.48, + "learning_rate": 1.5187089492577556e-05, + "loss": 0.209, + "step": 31720 + }, + { + "epoch": 1.48, + "learning_rate": 1.5186305707522771e-05, + "loss": 0.1886, + "step": 31725 + }, + { + "epoch": 1.48, + "learning_rate": 1.5185521922467983e-05, + "loss": 0.3375, + "step": 31730 + }, + { + "epoch": 1.48, + "learning_rate": 1.5184738137413197e-05, + "loss": 0.2246, + "step": 31735 + }, + { + "epoch": 1.48, + "learning_rate": 1.518395435235841e-05, + "loss": 0.0597, + "step": 31740 + }, + { + "epoch": 1.48, + "learning_rate": 1.5183170567303625e-05, + "loss": 0.0915, + "step": 31745 + }, + { + "epoch": 1.48, + "learning_rate": 1.5182386782248837e-05, + "loss": 0.1168, + "step": 31750 + }, + { + "epoch": 1.48, + "learning_rate": 1.5181602997194051e-05, + "loss": 0.0906, + "step": 31755 + }, + { + "epoch": 1.48, + "learning_rate": 1.5180819212139263e-05, + "loss": 0.0936, + "step": 31760 + }, + { + "epoch": 1.48, + "learning_rate": 1.5180035427084479e-05, + "loss": 0.1309, + "step": 31765 + }, + { + "epoch": 1.48, + "learning_rate": 1.5179251642029691e-05, + "loss": 0.149, + "step": 31770 + }, + { + "epoch": 1.48, + "learning_rate": 1.5178467856974905e-05, + "loss": 0.2683, + "step": 31775 + }, + { + "epoch": 1.48, + "learning_rate": 1.5177684071920117e-05, + "loss": 0.4699, + "step": 31780 + }, + { + "epoch": 1.48, + "learning_rate": 1.517690028686533e-05, + "loss": 0.2512, + "step": 31785 + }, + { + "epoch": 1.48, + "learning_rate": 1.5176116501810545e-05, + "loss": 0.059, + "step": 31790 + }, + { + "epoch": 1.48, + "learning_rate": 1.5175332716755757e-05, + "loss": 0.0595, + "step": 31795 + }, + { + "epoch": 1.48, + "learning_rate": 1.5174548931700971e-05, + "loss": 0.0786, + "step": 31800 + }, + { + "epoch": 1.48, + "learning_rate": 1.5173765146646185e-05, + "loss": 0.1279, + "step": 31805 + }, + { + "epoch": 1.48, + "learning_rate": 1.5172981361591399e-05, + "loss": 0.2461, + "step": 31810 + }, + { + "epoch": 1.48, + "learning_rate": 1.5172197576536611e-05, + "loss": 0.1358, + "step": 31815 + }, + { + "epoch": 1.48, + "learning_rate": 1.5171413791481825e-05, + "loss": 0.1626, + "step": 31820 + }, + { + "epoch": 1.48, + "learning_rate": 1.5170630006427039e-05, + "loss": 0.1351, + "step": 31825 + }, + { + "epoch": 1.49, + "learning_rate": 1.5169846221372253e-05, + "loss": 0.3434, + "step": 31830 + }, + { + "epoch": 1.49, + "learning_rate": 1.5169062436317465e-05, + "loss": 0.2988, + "step": 31835 + }, + { + "epoch": 1.49, + "learning_rate": 1.5168278651262681e-05, + "loss": 0.0382, + "step": 31840 + }, + { + "epoch": 1.49, + "learning_rate": 1.5167494866207893e-05, + "loss": 0.0455, + "step": 31845 + }, + { + "epoch": 1.49, + "learning_rate": 1.5166711081153105e-05, + "loss": 0.0726, + "step": 31850 + }, + { + "epoch": 1.49, + "learning_rate": 1.516592729609832e-05, + "loss": 0.0954, + "step": 31855 + }, + { + "epoch": 1.49, + "learning_rate": 1.5165143511043531e-05, + "loss": 0.0936, + "step": 31860 + }, + { + "epoch": 1.49, + "learning_rate": 1.5164359725988747e-05, + "loss": 0.1024, + "step": 31865 + }, + { + "epoch": 1.49, + "learning_rate": 1.516357594093396e-05, + "loss": 0.1589, + "step": 31870 + }, + { + "epoch": 1.49, + "learning_rate": 1.5162792155879173e-05, + "loss": 0.2689, + "step": 31875 + }, + { + "epoch": 1.49, + "learning_rate": 1.5162008370824385e-05, + "loss": 0.3105, + "step": 31880 + }, + { + "epoch": 1.49, + "learning_rate": 1.5161224585769601e-05, + "loss": 0.295, + "step": 31885 + }, + { + "epoch": 1.49, + "learning_rate": 1.5160440800714813e-05, + "loss": 0.0471, + "step": 31890 + }, + { + "epoch": 1.49, + "learning_rate": 1.5159657015660027e-05, + "loss": 0.0563, + "step": 31895 + }, + { + "epoch": 1.49, + "learning_rate": 1.515887323060524e-05, + "loss": 0.0521, + "step": 31900 + }, + { + "epoch": 1.49, + "learning_rate": 1.5158089445550455e-05, + "loss": 0.1391, + "step": 31905 + }, + { + "epoch": 1.49, + "learning_rate": 1.5157305660495667e-05, + "loss": 0.0926, + "step": 31910 + }, + { + "epoch": 1.49, + "learning_rate": 1.515652187544088e-05, + "loss": 0.1678, + "step": 31915 + }, + { + "epoch": 1.49, + "learning_rate": 1.5155738090386093e-05, + "loss": 0.1874, + "step": 31920 + }, + { + "epoch": 1.49, + "learning_rate": 1.5154954305331307e-05, + "loss": 0.1741, + "step": 31925 + }, + { + "epoch": 1.49, + "learning_rate": 1.5154170520276521e-05, + "loss": 0.362, + "step": 31930 + }, + { + "epoch": 1.49, + "learning_rate": 1.5153386735221733e-05, + "loss": 0.3195, + "step": 31935 + }, + { + "epoch": 1.49, + "learning_rate": 1.5152602950166949e-05, + "loss": 0.0607, + "step": 31940 + }, + { + "epoch": 1.49, + "learning_rate": 1.5151819165112161e-05, + "loss": 0.0734, + "step": 31945 + }, + { + "epoch": 1.49, + "learning_rate": 1.5151035380057375e-05, + "loss": 0.096, + "step": 31950 + }, + { + "epoch": 1.49, + "learning_rate": 1.5150251595002587e-05, + "loss": 0.1064, + "step": 31955 + }, + { + "epoch": 1.49, + "learning_rate": 1.5149467809947803e-05, + "loss": 0.1528, + "step": 31960 + }, + { + "epoch": 1.49, + "learning_rate": 1.5148684024893015e-05, + "loss": 0.2397, + "step": 31965 + }, + { + "epoch": 1.49, + "learning_rate": 1.5147900239838229e-05, + "loss": 0.2079, + "step": 31970 + }, + { + "epoch": 1.49, + "learning_rate": 1.5147116454783441e-05, + "loss": 0.2747, + "step": 31975 + }, + { + "epoch": 1.49, + "learning_rate": 1.5146332669728653e-05, + "loss": 0.4375, + "step": 31980 + }, + { + "epoch": 1.49, + "learning_rate": 1.5145548884673869e-05, + "loss": 0.348, + "step": 31985 + }, + { + "epoch": 1.49, + "learning_rate": 1.5144765099619081e-05, + "loss": 0.0462, + "step": 31990 + }, + { + "epoch": 1.49, + "learning_rate": 1.5143981314564295e-05, + "loss": 0.078, + "step": 31995 + }, + { + "epoch": 1.49, + "learning_rate": 1.5143197529509507e-05, + "loss": 0.0692, + "step": 32000 + }, + { + "epoch": 1.49, + "learning_rate": 1.5142413744454723e-05, + "loss": 0.0825, + "step": 32005 + }, + { + "epoch": 1.49, + "learning_rate": 1.5141629959399935e-05, + "loss": 0.138, + "step": 32010 + }, + { + "epoch": 1.49, + "learning_rate": 1.5140846174345149e-05, + "loss": 0.1539, + "step": 32015 + }, + { + "epoch": 1.49, + "learning_rate": 1.5140062389290361e-05, + "loss": 0.2308, + "step": 32020 + }, + { + "epoch": 1.49, + "learning_rate": 1.5139278604235577e-05, + "loss": 0.2213, + "step": 32025 + }, + { + "epoch": 1.49, + "learning_rate": 1.5138494819180789e-05, + "loss": 0.2618, + "step": 32030 + }, + { + "epoch": 1.49, + "learning_rate": 1.5137711034126003e-05, + "loss": 0.3362, + "step": 32035 + }, + { + "epoch": 1.5, + "learning_rate": 1.5136927249071217e-05, + "loss": 0.0699, + "step": 32040 + }, + { + "epoch": 1.5, + "learning_rate": 1.5136143464016429e-05, + "loss": 0.0228, + "step": 32045 + }, + { + "epoch": 1.5, + "learning_rate": 1.5135359678961643e-05, + "loss": 0.046, + "step": 32050 + }, + { + "epoch": 1.5, + "learning_rate": 1.5134575893906855e-05, + "loss": 0.0907, + "step": 32055 + }, + { + "epoch": 1.5, + "learning_rate": 1.513379210885207e-05, + "loss": 0.1112, + "step": 32060 + }, + { + "epoch": 1.5, + "learning_rate": 1.5133008323797283e-05, + "loss": 0.1013, + "step": 32065 + }, + { + "epoch": 1.5, + "learning_rate": 1.5132224538742497e-05, + "loss": 0.1556, + "step": 32070 + }, + { + "epoch": 1.5, + "learning_rate": 1.5131440753687709e-05, + "loss": 0.24, + "step": 32075 + }, + { + "epoch": 1.5, + "learning_rate": 1.5130656968632925e-05, + "loss": 0.5122, + "step": 32080 + }, + { + "epoch": 1.5, + "learning_rate": 1.5129873183578137e-05, + "loss": 0.2973, + "step": 32085 + }, + { + "epoch": 1.5, + "learning_rate": 1.512908939852335e-05, + "loss": 0.0492, + "step": 32090 + }, + { + "epoch": 1.5, + "learning_rate": 1.5128305613468563e-05, + "loss": 0.0999, + "step": 32095 + }, + { + "epoch": 1.5, + "learning_rate": 1.5127521828413779e-05, + "loss": 0.0568, + "step": 32100 + }, + { + "epoch": 1.5, + "learning_rate": 1.512673804335899e-05, + "loss": 0.0834, + "step": 32105 + }, + { + "epoch": 1.5, + "learning_rate": 1.5125954258304203e-05, + "loss": 0.0824, + "step": 32110 + }, + { + "epoch": 1.5, + "learning_rate": 1.5125170473249417e-05, + "loss": 0.2455, + "step": 32115 + }, + { + "epoch": 1.5, + "learning_rate": 1.512438668819463e-05, + "loss": 0.1645, + "step": 32120 + }, + { + "epoch": 1.5, + "learning_rate": 1.5123602903139845e-05, + "loss": 0.2583, + "step": 32125 + }, + { + "epoch": 1.5, + "learning_rate": 1.5122819118085057e-05, + "loss": 0.3227, + "step": 32130 + }, + { + "epoch": 1.5, + "learning_rate": 1.512203533303027e-05, + "loss": 0.3561, + "step": 32135 + }, + { + "epoch": 1.5, + "learning_rate": 1.5121251547975485e-05, + "loss": 0.0648, + "step": 32140 + }, + { + "epoch": 1.5, + "learning_rate": 1.5120467762920699e-05, + "loss": 0.016, + "step": 32145 + }, + { + "epoch": 1.5, + "learning_rate": 1.511968397786591e-05, + "loss": 0.1238, + "step": 32150 + }, + { + "epoch": 1.5, + "learning_rate": 1.5118900192811126e-05, + "loss": 0.0738, + "step": 32155 + }, + { + "epoch": 1.5, + "learning_rate": 1.5118116407756339e-05, + "loss": 0.113, + "step": 32160 + }, + { + "epoch": 1.5, + "learning_rate": 1.5117332622701553e-05, + "loss": 0.0999, + "step": 32165 + }, + { + "epoch": 1.5, + "learning_rate": 1.5116548837646765e-05, + "loss": 0.2245, + "step": 32170 + }, + { + "epoch": 1.5, + "learning_rate": 1.5115765052591977e-05, + "loss": 0.3073, + "step": 32175 + }, + { + "epoch": 1.5, + "learning_rate": 1.5114981267537193e-05, + "loss": 0.2557, + "step": 32180 + }, + { + "epoch": 1.5, + "learning_rate": 1.5114197482482405e-05, + "loss": 0.2607, + "step": 32185 + }, + { + "epoch": 1.5, + "learning_rate": 1.5113413697427619e-05, + "loss": 0.064, + "step": 32190 + }, + { + "epoch": 1.5, + "learning_rate": 1.5112629912372831e-05, + "loss": 0.076, + "step": 32195 + }, + { + "epoch": 1.5, + "learning_rate": 1.5111846127318046e-05, + "loss": 0.1157, + "step": 32200 + }, + { + "epoch": 1.5, + "learning_rate": 1.5111062342263259e-05, + "loss": 0.075, + "step": 32205 + }, + { + "epoch": 1.5, + "learning_rate": 1.5110278557208473e-05, + "loss": 0.1556, + "step": 32210 + }, + { + "epoch": 1.5, + "learning_rate": 1.5109494772153685e-05, + "loss": 0.1983, + "step": 32215 + }, + { + "epoch": 1.5, + "learning_rate": 1.51087109870989e-05, + "loss": 0.1951, + "step": 32220 + }, + { + "epoch": 1.5, + "learning_rate": 1.5107927202044113e-05, + "loss": 0.2283, + "step": 32225 + }, + { + "epoch": 1.5, + "learning_rate": 1.5107143416989327e-05, + "loss": 0.3268, + "step": 32230 + }, + { + "epoch": 1.5, + "learning_rate": 1.5106359631934539e-05, + "loss": 0.2575, + "step": 32235 + }, + { + "epoch": 1.5, + "learning_rate": 1.5105575846879753e-05, + "loss": 0.046, + "step": 32240 + }, + { + "epoch": 1.5, + "learning_rate": 1.5104792061824967e-05, + "loss": 0.0554, + "step": 32245 + }, + { + "epoch": 1.5, + "learning_rate": 1.5104008276770179e-05, + "loss": 0.0833, + "step": 32250 + }, + { + "epoch": 1.51, + "learning_rate": 1.5103224491715394e-05, + "loss": 0.0969, + "step": 32255 + }, + { + "epoch": 1.51, + "learning_rate": 1.5102440706660607e-05, + "loss": 0.1418, + "step": 32260 + }, + { + "epoch": 1.51, + "learning_rate": 1.510165692160582e-05, + "loss": 0.1293, + "step": 32265 + }, + { + "epoch": 1.51, + "learning_rate": 1.5100873136551033e-05, + "loss": 0.0968, + "step": 32270 + }, + { + "epoch": 1.51, + "learning_rate": 1.5100089351496248e-05, + "loss": 0.3261, + "step": 32275 + }, + { + "epoch": 1.51, + "learning_rate": 1.509930556644146e-05, + "loss": 0.283, + "step": 32280 + }, + { + "epoch": 1.51, + "learning_rate": 1.5098521781386674e-05, + "loss": 0.3071, + "step": 32285 + }, + { + "epoch": 1.51, + "learning_rate": 1.5097737996331887e-05, + "loss": 0.072, + "step": 32290 + }, + { + "epoch": 1.51, + "learning_rate": 1.5096954211277102e-05, + "loss": 0.1006, + "step": 32295 + }, + { + "epoch": 1.51, + "learning_rate": 1.5096170426222314e-05, + "loss": 0.1003, + "step": 32300 + }, + { + "epoch": 1.51, + "learning_rate": 1.5095386641167527e-05, + "loss": 0.1151, + "step": 32305 + }, + { + "epoch": 1.51, + "learning_rate": 1.509460285611274e-05, + "loss": 0.1184, + "step": 32310 + }, + { + "epoch": 1.51, + "learning_rate": 1.5093819071057953e-05, + "loss": 0.1667, + "step": 32315 + }, + { + "epoch": 1.51, + "learning_rate": 1.5093035286003168e-05, + "loss": 0.1405, + "step": 32320 + }, + { + "epoch": 1.51, + "learning_rate": 1.509225150094838e-05, + "loss": 0.2462, + "step": 32325 + }, + { + "epoch": 1.51, + "learning_rate": 1.5091467715893594e-05, + "loss": 0.2843, + "step": 32330 + }, + { + "epoch": 1.51, + "learning_rate": 1.5090683930838808e-05, + "loss": 0.2082, + "step": 32335 + }, + { + "epoch": 1.51, + "learning_rate": 1.5089900145784022e-05, + "loss": 0.0373, + "step": 32340 + }, + { + "epoch": 1.51, + "learning_rate": 1.5089116360729235e-05, + "loss": 0.0384, + "step": 32345 + }, + { + "epoch": 1.51, + "learning_rate": 1.5088332575674448e-05, + "loss": 0.0444, + "step": 32350 + }, + { + "epoch": 1.51, + "learning_rate": 1.5087548790619662e-05, + "loss": 0.0799, + "step": 32355 + }, + { + "epoch": 1.51, + "learning_rate": 1.5086765005564876e-05, + "loss": 0.1469, + "step": 32360 + }, + { + "epoch": 1.51, + "learning_rate": 1.5085981220510088e-05, + "loss": 0.1759, + "step": 32365 + }, + { + "epoch": 1.51, + "learning_rate": 1.50851974354553e-05, + "loss": 0.1966, + "step": 32370 + }, + { + "epoch": 1.51, + "learning_rate": 1.5084413650400516e-05, + "loss": 0.2456, + "step": 32375 + }, + { + "epoch": 1.51, + "learning_rate": 1.5083629865345728e-05, + "loss": 0.3569, + "step": 32380 + }, + { + "epoch": 1.51, + "learning_rate": 1.5082846080290942e-05, + "loss": 0.2519, + "step": 32385 + }, + { + "epoch": 1.51, + "learning_rate": 1.5082062295236155e-05, + "loss": 0.0463, + "step": 32390 + }, + { + "epoch": 1.51, + "learning_rate": 1.508127851018137e-05, + "loss": 0.0796, + "step": 32395 + }, + { + "epoch": 1.51, + "learning_rate": 1.5080494725126582e-05, + "loss": 0.0918, + "step": 32400 + }, + { + "epoch": 1.51, + "learning_rate": 1.5079710940071796e-05, + "loss": 0.0762, + "step": 32405 + }, + { + "epoch": 1.51, + "learning_rate": 1.5078927155017009e-05, + "loss": 0.0586, + "step": 32410 + }, + { + "epoch": 1.51, + "learning_rate": 1.5078143369962224e-05, + "loss": 0.1551, + "step": 32415 + }, + { + "epoch": 1.51, + "learning_rate": 1.5077359584907436e-05, + "loss": 0.2989, + "step": 32420 + }, + { + "epoch": 1.51, + "learning_rate": 1.507657579985265e-05, + "loss": 0.2065, + "step": 32425 + }, + { + "epoch": 1.51, + "learning_rate": 1.5075792014797862e-05, + "loss": 0.3864, + "step": 32430 + }, + { + "epoch": 1.51, + "learning_rate": 1.5075008229743076e-05, + "loss": 0.2009, + "step": 32435 + }, + { + "epoch": 1.51, + "learning_rate": 1.507422444468829e-05, + "loss": 0.0476, + "step": 32440 + }, + { + "epoch": 1.51, + "learning_rate": 1.5073440659633502e-05, + "loss": 0.1002, + "step": 32445 + }, + { + "epoch": 1.51, + "learning_rate": 1.5072656874578716e-05, + "loss": 0.1062, + "step": 32450 + }, + { + "epoch": 1.51, + "learning_rate": 1.507187308952393e-05, + "loss": 0.0847, + "step": 32455 + }, + { + "epoch": 1.51, + "learning_rate": 1.5071089304469144e-05, + "loss": 0.0809, + "step": 32460 + }, + { + "epoch": 1.51, + "learning_rate": 1.5070305519414356e-05, + "loss": 0.1638, + "step": 32465 + }, + { + "epoch": 1.52, + "learning_rate": 1.5069521734359572e-05, + "loss": 0.2693, + "step": 32470 + }, + { + "epoch": 1.52, + "learning_rate": 1.5068737949304784e-05, + "loss": 0.2582, + "step": 32475 + }, + { + "epoch": 1.52, + "learning_rate": 1.5067954164249998e-05, + "loss": 0.2734, + "step": 32480 + }, + { + "epoch": 1.52, + "learning_rate": 1.506717037919521e-05, + "loss": 0.2822, + "step": 32485 + }, + { + "epoch": 1.52, + "learning_rate": 1.5066386594140426e-05, + "loss": 0.0517, + "step": 32490 + }, + { + "epoch": 1.52, + "learning_rate": 1.5065602809085638e-05, + "loss": 0.0486, + "step": 32495 + }, + { + "epoch": 1.52, + "learning_rate": 1.506481902403085e-05, + "loss": 0.0478, + "step": 32500 + }, + { + "epoch": 1.52, + "learning_rate": 1.5064035238976064e-05, + "loss": 0.1418, + "step": 32505 + }, + { + "epoch": 1.52, + "learning_rate": 1.5063251453921276e-05, + "loss": 0.1221, + "step": 32510 + }, + { + "epoch": 1.52, + "learning_rate": 1.5062467668866492e-05, + "loss": 0.168, + "step": 32515 + }, + { + "epoch": 1.52, + "learning_rate": 1.5061683883811704e-05, + "loss": 0.196, + "step": 32520 + }, + { + "epoch": 1.52, + "learning_rate": 1.5060900098756918e-05, + "loss": 0.2721, + "step": 32525 + }, + { + "epoch": 1.52, + "learning_rate": 1.506011631370213e-05, + "loss": 0.3576, + "step": 32530 + }, + { + "epoch": 1.52, + "learning_rate": 1.5059332528647346e-05, + "loss": 0.3454, + "step": 32535 + }, + { + "epoch": 1.52, + "learning_rate": 1.5058548743592558e-05, + "loss": 0.0558, + "step": 32540 + }, + { + "epoch": 1.52, + "learning_rate": 1.5057764958537772e-05, + "loss": 0.0572, + "step": 32545 + }, + { + "epoch": 1.52, + "learning_rate": 1.5056981173482984e-05, + "loss": 0.0613, + "step": 32550 + }, + { + "epoch": 1.52, + "learning_rate": 1.50561973884282e-05, + "loss": 0.1226, + "step": 32555 + }, + { + "epoch": 1.52, + "learning_rate": 1.5055413603373412e-05, + "loss": 0.1035, + "step": 32560 + }, + { + "epoch": 1.52, + "learning_rate": 1.5054629818318624e-05, + "loss": 0.0922, + "step": 32565 + }, + { + "epoch": 1.52, + "learning_rate": 1.505384603326384e-05, + "loss": 0.2358, + "step": 32570 + }, + { + "epoch": 1.52, + "learning_rate": 1.5053062248209052e-05, + "loss": 0.1497, + "step": 32575 + }, + { + "epoch": 1.52, + "learning_rate": 1.5052278463154266e-05, + "loss": 0.3272, + "step": 32580 + }, + { + "epoch": 1.52, + "learning_rate": 1.5051494678099478e-05, + "loss": 0.3985, + "step": 32585 + }, + { + "epoch": 1.52, + "learning_rate": 1.5050710893044694e-05, + "loss": 0.044, + "step": 32590 + }, + { + "epoch": 1.52, + "learning_rate": 1.5049927107989906e-05, + "loss": 0.1202, + "step": 32595 + }, + { + "epoch": 1.52, + "learning_rate": 1.504914332293512e-05, + "loss": 0.1429, + "step": 32600 + }, + { + "epoch": 1.52, + "learning_rate": 1.5048359537880332e-05, + "loss": 0.0826, + "step": 32605 + }, + { + "epoch": 1.52, + "learning_rate": 1.5047575752825548e-05, + "loss": 0.1068, + "step": 32610 + }, + { + "epoch": 1.52, + "learning_rate": 1.504679196777076e-05, + "loss": 0.2536, + "step": 32615 + }, + { + "epoch": 1.52, + "learning_rate": 1.5046008182715974e-05, + "loss": 0.2464, + "step": 32620 + }, + { + "epoch": 1.52, + "learning_rate": 1.5045224397661186e-05, + "loss": 0.4023, + "step": 32625 + }, + { + "epoch": 1.52, + "learning_rate": 1.5044440612606398e-05, + "loss": 0.4528, + "step": 32630 + }, + { + "epoch": 1.52, + "learning_rate": 1.5043656827551614e-05, + "loss": 0.2417, + "step": 32635 + }, + { + "epoch": 1.52, + "learning_rate": 1.5042873042496826e-05, + "loss": 0.0292, + "step": 32640 + }, + { + "epoch": 1.52, + "learning_rate": 1.504208925744204e-05, + "loss": 0.0988, + "step": 32645 + }, + { + "epoch": 1.52, + "learning_rate": 1.5041305472387254e-05, + "loss": 0.067, + "step": 32650 + }, + { + "epoch": 1.52, + "learning_rate": 1.5040521687332468e-05, + "loss": 0.1081, + "step": 32655 + }, + { + "epoch": 1.52, + "learning_rate": 1.503973790227768e-05, + "loss": 0.1431, + "step": 32660 + }, + { + "epoch": 1.52, + "learning_rate": 1.5038954117222894e-05, + "loss": 0.1415, + "step": 32665 + }, + { + "epoch": 1.52, + "learning_rate": 1.5038170332168108e-05, + "loss": 0.1489, + "step": 32670 + }, + { + "epoch": 1.52, + "learning_rate": 1.5037386547113322e-05, + "loss": 0.187, + "step": 32675 + }, + { + "epoch": 1.52, + "learning_rate": 1.5036602762058534e-05, + "loss": 0.3941, + "step": 32680 + }, + { + "epoch": 1.53, + "learning_rate": 1.503581897700375e-05, + "loss": 0.267, + "step": 32685 + }, + { + "epoch": 1.53, + "learning_rate": 1.5035035191948962e-05, + "loss": 0.0578, + "step": 32690 + }, + { + "epoch": 1.53, + "learning_rate": 1.5034251406894174e-05, + "loss": 0.0391, + "step": 32695 + }, + { + "epoch": 1.53, + "learning_rate": 1.5033467621839388e-05, + "loss": 0.0686, + "step": 32700 + }, + { + "epoch": 1.53, + "learning_rate": 1.50326838367846e-05, + "loss": 0.1086, + "step": 32705 + }, + { + "epoch": 1.53, + "learning_rate": 1.5031900051729816e-05, + "loss": 0.1141, + "step": 32710 + }, + { + "epoch": 1.53, + "learning_rate": 1.5031116266675028e-05, + "loss": 0.1051, + "step": 32715 + }, + { + "epoch": 1.53, + "learning_rate": 1.5030332481620242e-05, + "loss": 0.2131, + "step": 32720 + }, + { + "epoch": 1.53, + "learning_rate": 1.5029548696565454e-05, + "loss": 0.2887, + "step": 32725 + }, + { + "epoch": 1.53, + "learning_rate": 1.502876491151067e-05, + "loss": 0.4061, + "step": 32730 + }, + { + "epoch": 1.53, + "learning_rate": 1.5027981126455882e-05, + "loss": 0.3098, + "step": 32735 + }, + { + "epoch": 1.53, + "learning_rate": 1.5027197341401096e-05, + "loss": 0.0542, + "step": 32740 + }, + { + "epoch": 1.53, + "learning_rate": 1.5026413556346308e-05, + "loss": 0.0951, + "step": 32745 + }, + { + "epoch": 1.53, + "learning_rate": 1.5025629771291524e-05, + "loss": 0.0702, + "step": 32750 + }, + { + "epoch": 1.53, + "learning_rate": 1.5024845986236736e-05, + "loss": 0.0914, + "step": 32755 + }, + { + "epoch": 1.53, + "learning_rate": 1.5024062201181948e-05, + "loss": 0.1323, + "step": 32760 + }, + { + "epoch": 1.53, + "learning_rate": 1.5023278416127162e-05, + "loss": 0.1518, + "step": 32765 + }, + { + "epoch": 1.53, + "learning_rate": 1.5022494631072376e-05, + "loss": 0.2011, + "step": 32770 + }, + { + "epoch": 1.53, + "learning_rate": 1.502171084601759e-05, + "loss": 0.1865, + "step": 32775 + }, + { + "epoch": 1.53, + "learning_rate": 1.5020927060962802e-05, + "loss": 0.328, + "step": 32780 + }, + { + "epoch": 1.53, + "learning_rate": 1.5020143275908018e-05, + "loss": 0.3728, + "step": 32785 + }, + { + "epoch": 1.53, + "learning_rate": 1.501935949085323e-05, + "loss": 0.0544, + "step": 32790 + }, + { + "epoch": 1.53, + "learning_rate": 1.5018575705798444e-05, + "loss": 0.0543, + "step": 32795 + }, + { + "epoch": 1.53, + "learning_rate": 1.5017791920743656e-05, + "loss": 0.0897, + "step": 32800 + }, + { + "epoch": 1.53, + "learning_rate": 1.5017008135688871e-05, + "loss": 0.1329, + "step": 32805 + }, + { + "epoch": 1.53, + "learning_rate": 1.5016224350634084e-05, + "loss": 0.0871, + "step": 32810 + }, + { + "epoch": 1.53, + "learning_rate": 1.5015440565579298e-05, + "loss": 0.1539, + "step": 32815 + }, + { + "epoch": 1.53, + "learning_rate": 1.501465678052451e-05, + "loss": 0.2018, + "step": 32820 + }, + { + "epoch": 1.53, + "learning_rate": 1.5013872995469722e-05, + "loss": 0.1788, + "step": 32825 + }, + { + "epoch": 1.53, + "learning_rate": 1.5013089210414938e-05, + "loss": 0.4134, + "step": 32830 + }, + { + "epoch": 1.53, + "learning_rate": 1.501230542536015e-05, + "loss": 0.3125, + "step": 32835 + }, + { + "epoch": 1.53, + "learning_rate": 1.5011521640305364e-05, + "loss": 0.0423, + "step": 32840 + }, + { + "epoch": 1.53, + "learning_rate": 1.5010737855250576e-05, + "loss": 0.0694, + "step": 32845 + }, + { + "epoch": 1.53, + "learning_rate": 1.5009954070195792e-05, + "loss": 0.0747, + "step": 32850 + }, + { + "epoch": 1.53, + "learning_rate": 1.5009170285141004e-05, + "loss": 0.1646, + "step": 32855 + }, + { + "epoch": 1.53, + "learning_rate": 1.5008386500086218e-05, + "loss": 0.1413, + "step": 32860 + }, + { + "epoch": 1.53, + "learning_rate": 1.500760271503143e-05, + "loss": 0.1363, + "step": 32865 + }, + { + "epoch": 1.53, + "learning_rate": 1.5006818929976645e-05, + "loss": 0.1389, + "step": 32870 + }, + { + "epoch": 1.53, + "learning_rate": 1.5006035144921858e-05, + "loss": 0.313, + "step": 32875 + }, + { + "epoch": 1.53, + "learning_rate": 1.5005251359867072e-05, + "loss": 0.4249, + "step": 32880 + }, + { + "epoch": 1.53, + "learning_rate": 1.5004467574812286e-05, + "loss": 0.2864, + "step": 32885 + }, + { + "epoch": 1.53, + "learning_rate": 1.5003683789757498e-05, + "loss": 0.0551, + "step": 32890 + }, + { + "epoch": 1.53, + "learning_rate": 1.5002900004702712e-05, + "loss": 0.0842, + "step": 32895 + }, + { + "epoch": 1.54, + "learning_rate": 1.5002116219647924e-05, + "loss": 0.0557, + "step": 32900 + }, + { + "epoch": 1.54, + "learning_rate": 1.500133243459314e-05, + "loss": 0.1035, + "step": 32905 + }, + { + "epoch": 1.54, + "learning_rate": 1.5000548649538352e-05, + "loss": 0.0518, + "step": 32910 + }, + { + "epoch": 1.54, + "learning_rate": 1.4999764864483566e-05, + "loss": 0.1531, + "step": 32915 + }, + { + "epoch": 1.54, + "learning_rate": 1.4998981079428778e-05, + "loss": 0.216, + "step": 32920 + }, + { + "epoch": 1.54, + "learning_rate": 1.4998197294373993e-05, + "loss": 0.1716, + "step": 32925 + }, + { + "epoch": 1.54, + "learning_rate": 1.4997413509319206e-05, + "loss": 0.39, + "step": 32930 + }, + { + "epoch": 1.54, + "learning_rate": 1.499662972426442e-05, + "loss": 0.2803, + "step": 32935 + }, + { + "epoch": 1.54, + "learning_rate": 1.4995845939209632e-05, + "loss": 0.1181, + "step": 32940 + }, + { + "epoch": 1.54, + "learning_rate": 1.4995062154154847e-05, + "loss": 0.0665, + "step": 32945 + }, + { + "epoch": 1.54, + "learning_rate": 1.499427836910006e-05, + "loss": 0.0879, + "step": 32950 + }, + { + "epoch": 1.54, + "learning_rate": 1.4993494584045272e-05, + "loss": 0.111, + "step": 32955 + }, + { + "epoch": 1.54, + "learning_rate": 1.4992710798990486e-05, + "loss": 0.1512, + "step": 32960 + }, + { + "epoch": 1.54, + "learning_rate": 1.49919270139357e-05, + "loss": 0.1847, + "step": 32965 + }, + { + "epoch": 1.54, + "learning_rate": 1.4991143228880913e-05, + "loss": 0.1602, + "step": 32970 + }, + { + "epoch": 1.54, + "learning_rate": 1.4990359443826126e-05, + "loss": 0.3279, + "step": 32975 + }, + { + "epoch": 1.54, + "learning_rate": 1.498957565877134e-05, + "loss": 0.2164, + "step": 32980 + }, + { + "epoch": 1.54, + "learning_rate": 1.4988791873716553e-05, + "loss": 0.2688, + "step": 32985 + }, + { + "epoch": 1.54, + "learning_rate": 1.4988008088661767e-05, + "loss": 0.0506, + "step": 32990 + }, + { + "epoch": 1.54, + "learning_rate": 1.498722430360698e-05, + "loss": 0.0692, + "step": 32995 + }, + { + "epoch": 1.54, + "learning_rate": 1.4986440518552195e-05, + "loss": 0.1243, + "step": 33000 + }, + { + "epoch": 1.54, + "learning_rate": 1.4985656733497407e-05, + "loss": 0.1195, + "step": 33005 + }, + { + "epoch": 1.54, + "learning_rate": 1.4984872948442621e-05, + "loss": 0.1462, + "step": 33010 + }, + { + "epoch": 1.54, + "learning_rate": 1.4984089163387834e-05, + "loss": 0.121, + "step": 33015 + }, + { + "epoch": 1.54, + "learning_rate": 1.4983305378333046e-05, + "loss": 0.1771, + "step": 33020 + }, + { + "epoch": 1.54, + "learning_rate": 1.4982521593278261e-05, + "loss": 0.1746, + "step": 33025 + }, + { + "epoch": 1.54, + "learning_rate": 1.4981737808223474e-05, + "loss": 0.3095, + "step": 33030 + }, + { + "epoch": 1.54, + "learning_rate": 1.4980954023168687e-05, + "loss": 0.2838, + "step": 33035 + }, + { + "epoch": 1.54, + "learning_rate": 1.49801702381139e-05, + "loss": 0.0275, + "step": 33040 + }, + { + "epoch": 1.54, + "learning_rate": 1.4979386453059115e-05, + "loss": 0.0868, + "step": 33045 + }, + { + "epoch": 1.54, + "learning_rate": 1.4978602668004327e-05, + "loss": 0.0809, + "step": 33050 + }, + { + "epoch": 1.54, + "learning_rate": 1.4977818882949541e-05, + "loss": 0.1187, + "step": 33055 + }, + { + "epoch": 1.54, + "learning_rate": 1.4977035097894754e-05, + "loss": 0.1628, + "step": 33060 + }, + { + "epoch": 1.54, + "learning_rate": 1.497625131283997e-05, + "loss": 0.1992, + "step": 33065 + }, + { + "epoch": 1.54, + "learning_rate": 1.4975467527785181e-05, + "loss": 0.2054, + "step": 33070 + }, + { + "epoch": 1.54, + "learning_rate": 1.4974683742730395e-05, + "loss": 0.2641, + "step": 33075 + }, + { + "epoch": 1.54, + "learning_rate": 1.4973899957675608e-05, + "loss": 0.4842, + "step": 33080 + }, + { + "epoch": 1.54, + "learning_rate": 1.4973116172620821e-05, + "loss": 0.4444, + "step": 33085 + }, + { + "epoch": 1.54, + "learning_rate": 1.4972332387566035e-05, + "loss": 0.0517, + "step": 33090 + }, + { + "epoch": 1.54, + "learning_rate": 1.4971548602511248e-05, + "loss": 0.0611, + "step": 33095 + }, + { + "epoch": 1.54, + "learning_rate": 1.4970764817456463e-05, + "loss": 0.0702, + "step": 33100 + }, + { + "epoch": 1.54, + "learning_rate": 1.4969981032401675e-05, + "loss": 0.143, + "step": 33105 + }, + { + "epoch": 1.54, + "learning_rate": 1.496919724734689e-05, + "loss": 0.1304, + "step": 33110 + }, + { + "epoch": 1.55, + "learning_rate": 1.4968413462292101e-05, + "loss": 0.1834, + "step": 33115 + }, + { + "epoch": 1.55, + "learning_rate": 1.4967629677237317e-05, + "loss": 0.1337, + "step": 33120 + }, + { + "epoch": 1.55, + "learning_rate": 1.496684589218253e-05, + "loss": 0.213, + "step": 33125 + }, + { + "epoch": 1.55, + "learning_rate": 1.49662188641387e-05, + "loss": 0.3345, + "step": 33130 + }, + { + "epoch": 1.55, + "learning_rate": 1.4965435079083914e-05, + "loss": 0.307, + "step": 33135 + }, + { + "epoch": 1.55, + "learning_rate": 1.4964651294029126e-05, + "loss": 0.063, + "step": 33140 + }, + { + "epoch": 1.55, + "learning_rate": 1.496386750897434e-05, + "loss": 0.0567, + "step": 33145 + }, + { + "epoch": 1.55, + "learning_rate": 1.4963083723919554e-05, + "loss": 0.0812, + "step": 33150 + }, + { + "epoch": 1.55, + "learning_rate": 1.4962299938864766e-05, + "loss": 0.0735, + "step": 33155 + }, + { + "epoch": 1.55, + "learning_rate": 1.4961516153809981e-05, + "loss": 0.1804, + "step": 33160 + }, + { + "epoch": 1.55, + "learning_rate": 1.4960732368755194e-05, + "loss": 0.125, + "step": 33165 + }, + { + "epoch": 1.55, + "learning_rate": 1.4959948583700408e-05, + "loss": 0.2112, + "step": 33170 + }, + { + "epoch": 1.55, + "learning_rate": 1.495916479864562e-05, + "loss": 0.2541, + "step": 33175 + }, + { + "epoch": 1.55, + "learning_rate": 1.4958381013590835e-05, + "loss": 0.2793, + "step": 33180 + }, + { + "epoch": 1.55, + "learning_rate": 1.4957597228536048e-05, + "loss": 0.242, + "step": 33185 + }, + { + "epoch": 1.55, + "learning_rate": 1.4956813443481262e-05, + "loss": 0.0286, + "step": 33190 + }, + { + "epoch": 1.55, + "learning_rate": 1.4956029658426474e-05, + "loss": 0.057, + "step": 33195 + }, + { + "epoch": 1.55, + "learning_rate": 1.495524587337169e-05, + "loss": 0.087, + "step": 33200 + }, + { + "epoch": 1.55, + "learning_rate": 1.4954462088316902e-05, + "loss": 0.0883, + "step": 33205 + }, + { + "epoch": 1.55, + "learning_rate": 1.4953678303262114e-05, + "loss": 0.1055, + "step": 33210 + }, + { + "epoch": 1.55, + "learning_rate": 1.4952894518207328e-05, + "loss": 0.4157, + "step": 33215 + }, + { + "epoch": 1.55, + "learning_rate": 1.495211073315254e-05, + "loss": 0.1814, + "step": 33220 + }, + { + "epoch": 1.55, + "learning_rate": 1.4951326948097755e-05, + "loss": 0.301, + "step": 33225 + }, + { + "epoch": 1.55, + "learning_rate": 1.4950543163042968e-05, + "loss": 0.3661, + "step": 33230 + }, + { + "epoch": 1.55, + "learning_rate": 1.4949759377988182e-05, + "loss": 0.2502, + "step": 33235 + }, + { + "epoch": 1.55, + "learning_rate": 1.4948975592933394e-05, + "loss": 0.0987, + "step": 33240 + }, + { + "epoch": 1.55, + "learning_rate": 1.494819180787861e-05, + "loss": 0.0505, + "step": 33245 + }, + { + "epoch": 1.55, + "learning_rate": 1.4947408022823822e-05, + "loss": 0.1345, + "step": 33250 + }, + { + "epoch": 1.55, + "learning_rate": 1.4946624237769036e-05, + "loss": 0.1235, + "step": 33255 + }, + { + "epoch": 1.55, + "learning_rate": 1.494584045271425e-05, + "loss": 0.1194, + "step": 33260 + }, + { + "epoch": 1.55, + "learning_rate": 1.4945056667659463e-05, + "loss": 0.1738, + "step": 33265 + }, + { + "epoch": 1.55, + "learning_rate": 1.4944272882604676e-05, + "loss": 0.2238, + "step": 33270 + }, + { + "epoch": 1.55, + "learning_rate": 1.4943489097549888e-05, + "loss": 0.295, + "step": 33275 + }, + { + "epoch": 1.55, + "learning_rate": 1.4942705312495103e-05, + "loss": 0.488, + "step": 33280 + }, + { + "epoch": 1.55, + "learning_rate": 1.4941921527440316e-05, + "loss": 0.392, + "step": 33285 + }, + { + "epoch": 1.55, + "learning_rate": 1.494113774238553e-05, + "loss": 0.0366, + "step": 33290 + }, + { + "epoch": 1.55, + "learning_rate": 1.4940353957330742e-05, + "loss": 0.0348, + "step": 33295 + }, + { + "epoch": 1.55, + "learning_rate": 1.4939570172275957e-05, + "loss": 0.0887, + "step": 33300 + }, + { + "epoch": 1.55, + "learning_rate": 1.493878638722117e-05, + "loss": 0.1132, + "step": 33305 + }, + { + "epoch": 1.55, + "learning_rate": 1.4938002602166383e-05, + "loss": 0.1445, + "step": 33310 + }, + { + "epoch": 1.55, + "learning_rate": 1.4937218817111596e-05, + "loss": 0.0744, + "step": 33315 + }, + { + "epoch": 1.55, + "learning_rate": 1.4936435032056811e-05, + "loss": 0.2349, + "step": 33320 + }, + { + "epoch": 1.55, + "learning_rate": 1.4935651247002023e-05, + "loss": 0.3013, + "step": 33325 + }, + { + "epoch": 1.56, + "learning_rate": 1.4934867461947237e-05, + "loss": 0.3091, + "step": 33330 + }, + { + "epoch": 1.56, + "learning_rate": 1.493408367689245e-05, + "loss": 0.3184, + "step": 33335 + }, + { + "epoch": 1.56, + "learning_rate": 1.4933299891837663e-05, + "loss": 0.0297, + "step": 33340 + }, + { + "epoch": 1.56, + "learning_rate": 1.4932516106782877e-05, + "loss": 0.0657, + "step": 33345 + }, + { + "epoch": 1.56, + "learning_rate": 1.493173232172809e-05, + "loss": 0.0838, + "step": 33350 + }, + { + "epoch": 1.56, + "learning_rate": 1.4930948536673303e-05, + "loss": 0.0415, + "step": 33355 + }, + { + "epoch": 1.56, + "learning_rate": 1.4930164751618517e-05, + "loss": 0.1043, + "step": 33360 + }, + { + "epoch": 1.56, + "learning_rate": 1.4929380966563731e-05, + "loss": 0.1528, + "step": 33365 + }, + { + "epoch": 1.56, + "learning_rate": 1.4928597181508943e-05, + "loss": 0.1359, + "step": 33370 + }, + { + "epoch": 1.56, + "learning_rate": 1.4927813396454159e-05, + "loss": 0.2899, + "step": 33375 + }, + { + "epoch": 1.56, + "learning_rate": 1.4927029611399371e-05, + "loss": 0.3585, + "step": 33380 + }, + { + "epoch": 1.56, + "learning_rate": 1.4926245826344585e-05, + "loss": 0.3395, + "step": 33385 + }, + { + "epoch": 1.56, + "learning_rate": 1.4925462041289797e-05, + "loss": 0.0484, + "step": 33390 + }, + { + "epoch": 1.56, + "learning_rate": 1.4924678256235013e-05, + "loss": 0.0426, + "step": 33395 + }, + { + "epoch": 1.56, + "learning_rate": 1.4923894471180225e-05, + "loss": 0.0988, + "step": 33400 + }, + { + "epoch": 1.56, + "learning_rate": 1.4923110686125437e-05, + "loss": 0.1479, + "step": 33405 + }, + { + "epoch": 1.56, + "learning_rate": 1.4922326901070651e-05, + "loss": 0.1827, + "step": 33410 + }, + { + "epoch": 1.56, + "learning_rate": 1.4921543116015864e-05, + "loss": 0.1848, + "step": 33415 + }, + { + "epoch": 1.56, + "learning_rate": 1.492075933096108e-05, + "loss": 0.1389, + "step": 33420 + }, + { + "epoch": 1.56, + "learning_rate": 1.4919975545906291e-05, + "loss": 0.2672, + "step": 33425 + }, + { + "epoch": 1.56, + "learning_rate": 1.4919191760851505e-05, + "loss": 0.3146, + "step": 33430 + }, + { + "epoch": 1.56, + "learning_rate": 1.4918407975796717e-05, + "loss": 0.206, + "step": 33435 + }, + { + "epoch": 1.56, + "learning_rate": 1.4917624190741933e-05, + "loss": 0.0536, + "step": 33440 + }, + { + "epoch": 1.56, + "learning_rate": 1.4916840405687145e-05, + "loss": 0.0521, + "step": 33445 + }, + { + "epoch": 1.56, + "learning_rate": 1.491605662063236e-05, + "loss": 0.053, + "step": 33450 + }, + { + "epoch": 1.56, + "learning_rate": 1.4915272835577571e-05, + "loss": 0.1253, + "step": 33455 + }, + { + "epoch": 1.56, + "learning_rate": 1.4914489050522787e-05, + "loss": 0.1643, + "step": 33460 + }, + { + "epoch": 1.56, + "learning_rate": 1.4913705265468e-05, + "loss": 0.1678, + "step": 33465 + }, + { + "epoch": 1.56, + "learning_rate": 1.4912921480413211e-05, + "loss": 0.1316, + "step": 33470 + }, + { + "epoch": 1.56, + "learning_rate": 1.4912137695358427e-05, + "loss": 0.3028, + "step": 33475 + }, + { + "epoch": 1.56, + "learning_rate": 1.491135391030364e-05, + "loss": 0.4472, + "step": 33480 + }, + { + "epoch": 1.56, + "learning_rate": 1.4910570125248853e-05, + "loss": 0.2954, + "step": 33485 + }, + { + "epoch": 1.56, + "learning_rate": 1.4909786340194065e-05, + "loss": 0.0151, + "step": 33490 + }, + { + "epoch": 1.56, + "learning_rate": 1.4909002555139281e-05, + "loss": 0.0831, + "step": 33495 + }, + { + "epoch": 1.56, + "learning_rate": 1.4908218770084493e-05, + "loss": 0.0586, + "step": 33500 + }, + { + "epoch": 1.56, + "learning_rate": 1.4907434985029707e-05, + "loss": 0.1487, + "step": 33505 + }, + { + "epoch": 1.56, + "learning_rate": 1.490665119997492e-05, + "loss": 0.0689, + "step": 33510 + }, + { + "epoch": 1.56, + "learning_rate": 1.4905867414920135e-05, + "loss": 0.1569, + "step": 33515 + }, + { + "epoch": 1.56, + "learning_rate": 1.4905083629865347e-05, + "loss": 0.1794, + "step": 33520 + }, + { + "epoch": 1.56, + "learning_rate": 1.4904299844810561e-05, + "loss": 0.2747, + "step": 33525 + }, + { + "epoch": 1.56, + "learning_rate": 1.4903516059755773e-05, + "loss": 0.3703, + "step": 33530 + }, + { + "epoch": 1.56, + "learning_rate": 1.4902732274700985e-05, + "loss": 0.336, + "step": 33535 + }, + { + "epoch": 1.57, + "learning_rate": 1.4901948489646201e-05, + "loss": 0.0809, + "step": 33540 + }, + { + "epoch": 1.57, + "learning_rate": 1.4901164704591413e-05, + "loss": 0.0574, + "step": 33545 + }, + { + "epoch": 1.57, + "learning_rate": 1.4900380919536627e-05, + "loss": 0.0474, + "step": 33550 + }, + { + "epoch": 1.57, + "learning_rate": 1.489959713448184e-05, + "loss": 0.1294, + "step": 33555 + }, + { + "epoch": 1.57, + "learning_rate": 1.4898813349427055e-05, + "loss": 0.1134, + "step": 33560 + }, + { + "epoch": 1.57, + "learning_rate": 1.4898029564372267e-05, + "loss": 0.1304, + "step": 33565 + }, + { + "epoch": 1.57, + "learning_rate": 1.4897245779317481e-05, + "loss": 0.1349, + "step": 33570 + }, + { + "epoch": 1.57, + "learning_rate": 1.4896461994262695e-05, + "loss": 0.1339, + "step": 33575 + }, + { + "epoch": 1.57, + "learning_rate": 1.4895678209207909e-05, + "loss": 0.3101, + "step": 33580 + }, + { + "epoch": 1.57, + "learning_rate": 1.4894894424153121e-05, + "loss": 0.2441, + "step": 33585 + }, + { + "epoch": 1.57, + "learning_rate": 1.4894110639098337e-05, + "loss": 0.0574, + "step": 33590 + }, + { + "epoch": 1.57, + "learning_rate": 1.4893326854043549e-05, + "loss": 0.1494, + "step": 33595 + }, + { + "epoch": 1.57, + "learning_rate": 1.4892543068988761e-05, + "loss": 0.0818, + "step": 33600 + }, + { + "epoch": 1.57, + "learning_rate": 1.4891759283933975e-05, + "loss": 0.0343, + "step": 33605 + }, + { + "epoch": 1.57, + "learning_rate": 1.4890975498879187e-05, + "loss": 0.0497, + "step": 33610 + }, + { + "epoch": 1.57, + "learning_rate": 1.4890191713824403e-05, + "loss": 0.1251, + "step": 33615 + }, + { + "epoch": 1.57, + "learning_rate": 1.4889407928769615e-05, + "loss": 0.1593, + "step": 33620 + }, + { + "epoch": 1.57, + "learning_rate": 1.4888624143714829e-05, + "loss": 0.1829, + "step": 33625 + }, + { + "epoch": 1.57, + "learning_rate": 1.4887840358660041e-05, + "loss": 0.4039, + "step": 33630 + }, + { + "epoch": 1.57, + "learning_rate": 1.4887056573605257e-05, + "loss": 0.2516, + "step": 33635 + }, + { + "epoch": 1.57, + "learning_rate": 1.4886272788550469e-05, + "loss": 0.0328, + "step": 33640 + }, + { + "epoch": 1.57, + "learning_rate": 1.4885489003495683e-05, + "loss": 0.0959, + "step": 33645 + }, + { + "epoch": 1.57, + "learning_rate": 1.4884705218440895e-05, + "loss": 0.0733, + "step": 33650 + }, + { + "epoch": 1.57, + "learning_rate": 1.488392143338611e-05, + "loss": 0.0875, + "step": 33655 + }, + { + "epoch": 1.57, + "learning_rate": 1.4883137648331323e-05, + "loss": 0.0994, + "step": 33660 + }, + { + "epoch": 1.57, + "learning_rate": 1.4882353863276535e-05, + "loss": 0.1776, + "step": 33665 + }, + { + "epoch": 1.57, + "learning_rate": 1.4881570078221749e-05, + "loss": 0.1794, + "step": 33670 + }, + { + "epoch": 1.57, + "learning_rate": 1.4880786293166963e-05, + "loss": 0.2112, + "step": 33675 + }, + { + "epoch": 1.57, + "learning_rate": 1.4880002508112177e-05, + "loss": 0.2123, + "step": 33680 + }, + { + "epoch": 1.57, + "learning_rate": 1.4879218723057389e-05, + "loss": 0.4273, + "step": 33685 + }, + { + "epoch": 1.57, + "learning_rate": 1.4878434938002605e-05, + "loss": 0.0487, + "step": 33690 + }, + { + "epoch": 1.57, + "learning_rate": 1.4877651152947817e-05, + "loss": 0.0653, + "step": 33695 + }, + { + "epoch": 1.57, + "learning_rate": 1.487686736789303e-05, + "loss": 0.0647, + "step": 33700 + }, + { + "epoch": 1.57, + "learning_rate": 1.4876083582838243e-05, + "loss": 0.0774, + "step": 33705 + }, + { + "epoch": 1.57, + "learning_rate": 1.4875299797783459e-05, + "loss": 0.1084, + "step": 33710 + }, + { + "epoch": 1.57, + "learning_rate": 1.487451601272867e-05, + "loss": 0.1575, + "step": 33715 + }, + { + "epoch": 1.57, + "learning_rate": 1.4873732227673885e-05, + "loss": 0.1553, + "step": 33720 + }, + { + "epoch": 1.57, + "learning_rate": 1.4872948442619097e-05, + "loss": 0.2492, + "step": 33725 + }, + { + "epoch": 1.57, + "learning_rate": 1.4872164657564309e-05, + "loss": 0.2886, + "step": 33730 + }, + { + "epoch": 1.57, + "learning_rate": 1.4871380872509525e-05, + "loss": 0.2957, + "step": 33735 + }, + { + "epoch": 1.57, + "learning_rate": 1.4870597087454737e-05, + "loss": 0.0997, + "step": 33740 + }, + { + "epoch": 1.57, + "learning_rate": 1.486981330239995e-05, + "loss": 0.0764, + "step": 33745 + }, + { + "epoch": 1.57, + "learning_rate": 1.4869029517345163e-05, + "loss": 0.0875, + "step": 33750 + }, + { + "epoch": 1.58, + "learning_rate": 1.4868245732290379e-05, + "loss": 0.1655, + "step": 33755 + }, + { + "epoch": 1.58, + "learning_rate": 1.4867461947235591e-05, + "loss": 0.0998, + "step": 33760 + }, + { + "epoch": 1.58, + "learning_rate": 1.4866678162180805e-05, + "loss": 0.1561, + "step": 33765 + }, + { + "epoch": 1.58, + "learning_rate": 1.4865894377126017e-05, + "loss": 0.1504, + "step": 33770 + }, + { + "epoch": 1.58, + "learning_rate": 1.4865110592071233e-05, + "loss": 0.2526, + "step": 33775 + }, + { + "epoch": 1.58, + "learning_rate": 1.4864326807016445e-05, + "loss": 0.3547, + "step": 33780 + }, + { + "epoch": 1.58, + "learning_rate": 1.4863543021961659e-05, + "loss": 0.2778, + "step": 33785 + }, + { + "epoch": 1.58, + "learning_rate": 1.4862759236906873e-05, + "loss": 0.0651, + "step": 33790 + }, + { + "epoch": 1.58, + "learning_rate": 1.4861975451852085e-05, + "loss": 0.0651, + "step": 33795 + }, + { + "epoch": 1.58, + "learning_rate": 1.4861191666797299e-05, + "loss": 0.1012, + "step": 33800 + }, + { + "epoch": 1.58, + "learning_rate": 1.4860407881742511e-05, + "loss": 0.1282, + "step": 33805 + }, + { + "epoch": 1.58, + "learning_rate": 1.4859624096687727e-05, + "loss": 0.163, + "step": 33810 + }, + { + "epoch": 1.58, + "learning_rate": 1.4858840311632939e-05, + "loss": 0.221, + "step": 33815 + }, + { + "epoch": 1.58, + "learning_rate": 1.4858056526578153e-05, + "loss": 0.1405, + "step": 33820 + }, + { + "epoch": 1.58, + "learning_rate": 1.4857272741523365e-05, + "loss": 0.3016, + "step": 33825 + }, + { + "epoch": 1.58, + "learning_rate": 1.485648895646858e-05, + "loss": 0.4393, + "step": 33830 + }, + { + "epoch": 1.58, + "learning_rate": 1.4855705171413793e-05, + "loss": 0.2873, + "step": 33835 + }, + { + "epoch": 1.58, + "learning_rate": 1.4854921386359007e-05, + "loss": 0.0466, + "step": 33840 + }, + { + "epoch": 1.58, + "learning_rate": 1.4854137601304219e-05, + "loss": 0.0364, + "step": 33845 + }, + { + "epoch": 1.58, + "learning_rate": 1.4853353816249434e-05, + "loss": 0.1092, + "step": 33850 + }, + { + "epoch": 1.58, + "learning_rate": 1.4852570031194647e-05, + "loss": 0.1333, + "step": 33855 + }, + { + "epoch": 1.58, + "learning_rate": 1.4851786246139859e-05, + "loss": 0.1323, + "step": 33860 + }, + { + "epoch": 1.58, + "learning_rate": 1.4851002461085073e-05, + "loss": 0.1203, + "step": 33865 + }, + { + "epoch": 1.58, + "learning_rate": 1.4850218676030285e-05, + "loss": 0.1091, + "step": 33870 + }, + { + "epoch": 1.58, + "learning_rate": 1.48494348909755e-05, + "loss": 0.2064, + "step": 33875 + }, + { + "epoch": 1.58, + "learning_rate": 1.4848651105920713e-05, + "loss": 0.3023, + "step": 33880 + }, + { + "epoch": 1.58, + "learning_rate": 1.4847867320865927e-05, + "loss": 0.3641, + "step": 33885 + }, + { + "epoch": 1.58, + "learning_rate": 1.484708353581114e-05, + "loss": 0.0508, + "step": 33890 + }, + { + "epoch": 1.58, + "learning_rate": 1.4846299750756354e-05, + "loss": 0.0378, + "step": 33895 + }, + { + "epoch": 1.58, + "learning_rate": 1.4845515965701567e-05, + "loss": 0.1599, + "step": 33900 + }, + { + "epoch": 1.58, + "learning_rate": 1.4844732180646782e-05, + "loss": 0.0987, + "step": 33905 + }, + { + "epoch": 1.58, + "learning_rate": 1.4843948395591994e-05, + "loss": 0.0775, + "step": 33910 + }, + { + "epoch": 1.58, + "learning_rate": 1.4843164610537208e-05, + "loss": 0.1141, + "step": 33915 + }, + { + "epoch": 1.58, + "learning_rate": 1.484238082548242e-05, + "loss": 0.1908, + "step": 33920 + }, + { + "epoch": 1.58, + "learning_rate": 1.4841597040427633e-05, + "loss": 0.2086, + "step": 33925 + }, + { + "epoch": 1.58, + "learning_rate": 1.4840813255372848e-05, + "loss": 0.3953, + "step": 33930 + }, + { + "epoch": 1.58, + "learning_rate": 1.484002947031806e-05, + "loss": 0.3082, + "step": 33935 + }, + { + "epoch": 1.58, + "learning_rate": 1.4839245685263275e-05, + "loss": 0.0498, + "step": 33940 + }, + { + "epoch": 1.58, + "learning_rate": 1.4838461900208487e-05, + "loss": 0.0308, + "step": 33945 + }, + { + "epoch": 1.58, + "learning_rate": 1.4837678115153702e-05, + "loss": 0.0608, + "step": 33950 + }, + { + "epoch": 1.58, + "learning_rate": 1.4836894330098915e-05, + "loss": 0.1048, + "step": 33955 + }, + { + "epoch": 1.58, + "learning_rate": 1.4836110545044128e-05, + "loss": 0.1423, + "step": 33960 + }, + { + "epoch": 1.58, + "learning_rate": 1.483532675998934e-05, + "loss": 0.175, + "step": 33965 + }, + { + "epoch": 1.59, + "learning_rate": 1.4834542974934556e-05, + "loss": 0.1559, + "step": 33970 + }, + { + "epoch": 1.59, + "learning_rate": 1.4833759189879768e-05, + "loss": 0.2204, + "step": 33975 + }, + { + "epoch": 1.59, + "learning_rate": 1.4832975404824982e-05, + "loss": 0.3428, + "step": 33980 + }, + { + "epoch": 1.59, + "learning_rate": 1.4832191619770195e-05, + "loss": 0.2474, + "step": 33985 + }, + { + "epoch": 1.59, + "learning_rate": 1.4831407834715409e-05, + "loss": 0.0378, + "step": 33990 + }, + { + "epoch": 1.59, + "learning_rate": 1.4830624049660622e-05, + "loss": 0.0739, + "step": 33995 + }, + { + "epoch": 1.59, + "learning_rate": 1.4829840264605835e-05, + "loss": 0.0936, + "step": 34000 + }, + { + "epoch": 1.59, + "learning_rate": 1.482905647955105e-05, + "loss": 0.0705, + "step": 34005 + }, + { + "epoch": 1.59, + "learning_rate": 1.4828272694496262e-05, + "loss": 0.0965, + "step": 34010 + }, + { + "epoch": 1.59, + "learning_rate": 1.4827488909441476e-05, + "loss": 0.0939, + "step": 34015 + }, + { + "epoch": 1.59, + "learning_rate": 1.4826705124386689e-05, + "loss": 0.1264, + "step": 34020 + }, + { + "epoch": 1.59, + "learning_rate": 1.4825921339331904e-05, + "loss": 0.1568, + "step": 34025 + }, + { + "epoch": 1.59, + "learning_rate": 1.4825137554277116e-05, + "loss": 0.2927, + "step": 34030 + }, + { + "epoch": 1.59, + "learning_rate": 1.482435376922233e-05, + "loss": 0.3461, + "step": 34035 + }, + { + "epoch": 1.59, + "learning_rate": 1.4823569984167542e-05, + "loss": 0.0535, + "step": 34040 + }, + { + "epoch": 1.59, + "learning_rate": 1.4822786199112758e-05, + "loss": 0.0372, + "step": 34045 + }, + { + "epoch": 1.59, + "learning_rate": 1.482200241405797e-05, + "loss": 0.0823, + "step": 34050 + }, + { + "epoch": 1.59, + "learning_rate": 1.4821218629003183e-05, + "loss": 0.0993, + "step": 34055 + }, + { + "epoch": 1.59, + "learning_rate": 1.4820434843948396e-05, + "loss": 0.1109, + "step": 34060 + }, + { + "epoch": 1.59, + "learning_rate": 1.4819651058893609e-05, + "loss": 0.1505, + "step": 34065 + }, + { + "epoch": 1.59, + "learning_rate": 1.4818867273838824e-05, + "loss": 0.2167, + "step": 34070 + }, + { + "epoch": 1.59, + "learning_rate": 1.4818083488784036e-05, + "loss": 0.1975, + "step": 34075 + }, + { + "epoch": 1.59, + "learning_rate": 1.481729970372925e-05, + "loss": 0.3396, + "step": 34080 + }, + { + "epoch": 1.59, + "learning_rate": 1.4816515918674463e-05, + "loss": 0.2306, + "step": 34085 + }, + { + "epoch": 1.59, + "learning_rate": 1.4815732133619678e-05, + "loss": 0.0387, + "step": 34090 + }, + { + "epoch": 1.59, + "learning_rate": 1.481494834856489e-05, + "loss": 0.0714, + "step": 34095 + }, + { + "epoch": 1.59, + "learning_rate": 1.4814164563510104e-05, + "loss": 0.1295, + "step": 34100 + }, + { + "epoch": 1.59, + "learning_rate": 1.4813380778455318e-05, + "loss": 0.1515, + "step": 34105 + }, + { + "epoch": 1.59, + "learning_rate": 1.4812596993400532e-05, + "loss": 0.1432, + "step": 34110 + }, + { + "epoch": 1.59, + "learning_rate": 1.4811813208345744e-05, + "loss": 0.1063, + "step": 34115 + }, + { + "epoch": 1.59, + "learning_rate": 1.4811029423290957e-05, + "loss": 0.175, + "step": 34120 + }, + { + "epoch": 1.59, + "learning_rate": 1.4810245638236172e-05, + "loss": 0.2832, + "step": 34125 + }, + { + "epoch": 1.59, + "learning_rate": 1.4809461853181384e-05, + "loss": 0.4135, + "step": 34130 + }, + { + "epoch": 1.59, + "learning_rate": 1.4808678068126598e-05, + "loss": 0.2442, + "step": 34135 + }, + { + "epoch": 1.59, + "learning_rate": 1.480789428307181e-05, + "loss": 0.0886, + "step": 34140 + }, + { + "epoch": 1.59, + "learning_rate": 1.4807110498017026e-05, + "loss": 0.0458, + "step": 34145 + }, + { + "epoch": 1.59, + "learning_rate": 1.4806326712962238e-05, + "loss": 0.1056, + "step": 34150 + }, + { + "epoch": 1.59, + "learning_rate": 1.4805542927907452e-05, + "loss": 0.0775, + "step": 34155 + }, + { + "epoch": 1.59, + "learning_rate": 1.4804759142852664e-05, + "loss": 0.1509, + "step": 34160 + }, + { + "epoch": 1.59, + "learning_rate": 1.480397535779788e-05, + "loss": 0.1604, + "step": 34165 + }, + { + "epoch": 1.59, + "learning_rate": 1.4803191572743092e-05, + "loss": 0.1765, + "step": 34170 + }, + { + "epoch": 1.59, + "learning_rate": 1.4802407787688306e-05, + "loss": 0.1314, + "step": 34175 + }, + { + "epoch": 1.59, + "learning_rate": 1.4801624002633518e-05, + "loss": 0.428, + "step": 34180 + }, + { + "epoch": 1.6, + "learning_rate": 1.480084021757873e-05, + "loss": 0.2183, + "step": 34185 + }, + { + "epoch": 1.6, + "learning_rate": 1.4800056432523946e-05, + "loss": 0.0748, + "step": 34190 + }, + { + "epoch": 1.6, + "learning_rate": 1.4799272647469158e-05, + "loss": 0.0489, + "step": 34195 + }, + { + "epoch": 1.6, + "learning_rate": 1.4798488862414372e-05, + "loss": 0.0639, + "step": 34200 + }, + { + "epoch": 1.6, + "learning_rate": 1.4797705077359586e-05, + "loss": 0.0929, + "step": 34205 + }, + { + "epoch": 1.6, + "learning_rate": 1.47969212923048e-05, + "loss": 0.187, + "step": 34210 + }, + { + "epoch": 1.6, + "learning_rate": 1.4796137507250012e-05, + "loss": 0.1703, + "step": 34215 + }, + { + "epoch": 1.6, + "learning_rate": 1.4795353722195228e-05, + "loss": 0.1433, + "step": 34220 + }, + { + "epoch": 1.6, + "learning_rate": 1.479456993714044e-05, + "loss": 0.2344, + "step": 34225 + }, + { + "epoch": 1.6, + "learning_rate": 1.4793786152085654e-05, + "loss": 0.3602, + "step": 34230 + }, + { + "epoch": 1.6, + "learning_rate": 1.4793002367030866e-05, + "loss": 0.3009, + "step": 34235 + }, + { + "epoch": 1.6, + "learning_rate": 1.4792218581976082e-05, + "loss": 0.107, + "step": 34240 + }, + { + "epoch": 1.6, + "learning_rate": 1.4791434796921294e-05, + "loss": 0.0895, + "step": 34245 + }, + { + "epoch": 1.6, + "learning_rate": 1.4790651011866506e-05, + "loss": 0.0729, + "step": 34250 + }, + { + "epoch": 1.6, + "learning_rate": 1.478986722681172e-05, + "loss": 0.0705, + "step": 34255 + }, + { + "epoch": 1.6, + "learning_rate": 1.4789083441756932e-05, + "loss": 0.1037, + "step": 34260 + }, + { + "epoch": 1.6, + "learning_rate": 1.4788299656702148e-05, + "loss": 0.1372, + "step": 34265 + }, + { + "epoch": 1.6, + "learning_rate": 1.478751587164736e-05, + "loss": 0.2172, + "step": 34270 + }, + { + "epoch": 1.6, + "learning_rate": 1.4786732086592574e-05, + "loss": 0.2641, + "step": 34275 + }, + { + "epoch": 1.6, + "learning_rate": 1.4785948301537786e-05, + "loss": 0.3562, + "step": 34280 + }, + { + "epoch": 1.6, + "learning_rate": 1.4785164516483002e-05, + "loss": 0.2552, + "step": 34285 + }, + { + "epoch": 1.6, + "learning_rate": 1.4784380731428214e-05, + "loss": 0.0604, + "step": 34290 + }, + { + "epoch": 1.6, + "learning_rate": 1.4783596946373428e-05, + "loss": 0.0537, + "step": 34295 + }, + { + "epoch": 1.6, + "learning_rate": 1.478281316131864e-05, + "loss": 0.1428, + "step": 34300 + }, + { + "epoch": 1.6, + "learning_rate": 1.4782029376263856e-05, + "loss": 0.1001, + "step": 34305 + }, + { + "epoch": 1.6, + "learning_rate": 1.4781245591209068e-05, + "loss": 0.0837, + "step": 34310 + }, + { + "epoch": 1.6, + "learning_rate": 1.478046180615428e-05, + "loss": 0.125, + "step": 34315 + }, + { + "epoch": 1.6, + "learning_rate": 1.4779678021099496e-05, + "loss": 0.1296, + "step": 34320 + }, + { + "epoch": 1.6, + "learning_rate": 1.4778894236044708e-05, + "loss": 0.1319, + "step": 34325 + }, + { + "epoch": 1.6, + "learning_rate": 1.4778110450989922e-05, + "loss": 0.2868, + "step": 34330 + }, + { + "epoch": 1.6, + "learning_rate": 1.4777326665935134e-05, + "loss": 0.3055, + "step": 34335 + }, + { + "epoch": 1.6, + "learning_rate": 1.477654288088035e-05, + "loss": 0.0613, + "step": 34340 + }, + { + "epoch": 1.6, + "learning_rate": 1.4775759095825562e-05, + "loss": 0.1232, + "step": 34345 + }, + { + "epoch": 1.6, + "learning_rate": 1.4774975310770776e-05, + "loss": 0.0745, + "step": 34350 + }, + { + "epoch": 1.6, + "learning_rate": 1.4774191525715988e-05, + "loss": 0.128, + "step": 34355 + }, + { + "epoch": 1.6, + "learning_rate": 1.4773407740661204e-05, + "loss": 0.0547, + "step": 34360 + }, + { + "epoch": 1.6, + "learning_rate": 1.4772623955606416e-05, + "loss": 0.1655, + "step": 34365 + }, + { + "epoch": 1.6, + "learning_rate": 1.477184017055163e-05, + "loss": 0.47, + "step": 34370 + }, + { + "epoch": 1.6, + "learning_rate": 1.4771056385496842e-05, + "loss": 0.2424, + "step": 34375 + }, + { + "epoch": 1.6, + "learning_rate": 1.4770272600442054e-05, + "loss": 0.3795, + "step": 34380 + }, + { + "epoch": 1.6, + "learning_rate": 1.476948881538727e-05, + "loss": 0.292, + "step": 34385 + }, + { + "epoch": 1.6, + "learning_rate": 1.4768705030332482e-05, + "loss": 0.0394, + "step": 34390 + }, + { + "epoch": 1.6, + "learning_rate": 1.4767921245277696e-05, + "loss": 0.0294, + "step": 34395 + }, + { + "epoch": 1.61, + "learning_rate": 1.4767137460222908e-05, + "loss": 0.0748, + "step": 34400 + }, + { + "epoch": 1.61, + "learning_rate": 1.4766353675168124e-05, + "loss": 0.1135, + "step": 34405 + }, + { + "epoch": 1.61, + "learning_rate": 1.4765569890113336e-05, + "loss": 0.1248, + "step": 34410 + }, + { + "epoch": 1.61, + "learning_rate": 1.476478610505855e-05, + "loss": 0.1699, + "step": 34415 + }, + { + "epoch": 1.61, + "learning_rate": 1.4764002320003764e-05, + "loss": 0.1724, + "step": 34420 + }, + { + "epoch": 1.61, + "learning_rate": 1.4763218534948978e-05, + "loss": 0.2934, + "step": 34425 + }, + { + "epoch": 1.61, + "learning_rate": 1.476243474989419e-05, + "loss": 0.3888, + "step": 34430 + }, + { + "epoch": 1.61, + "learning_rate": 1.4761650964839405e-05, + "loss": 0.3509, + "step": 34435 + }, + { + "epoch": 1.61, + "learning_rate": 1.4760867179784618e-05, + "loss": 0.0337, + "step": 34440 + }, + { + "epoch": 1.61, + "learning_rate": 1.476008339472983e-05, + "loss": 0.0399, + "step": 34445 + }, + { + "epoch": 1.61, + "learning_rate": 1.4759299609675044e-05, + "loss": 0.071, + "step": 34450 + }, + { + "epoch": 1.61, + "learning_rate": 1.4758515824620256e-05, + "loss": 0.1536, + "step": 34455 + }, + { + "epoch": 1.61, + "learning_rate": 1.4757732039565472e-05, + "loss": 0.0879, + "step": 34460 + }, + { + "epoch": 1.61, + "learning_rate": 1.4756948254510684e-05, + "loss": 0.256, + "step": 34465 + }, + { + "epoch": 1.61, + "learning_rate": 1.4756164469455898e-05, + "loss": 0.1642, + "step": 34470 + }, + { + "epoch": 1.61, + "learning_rate": 1.475538068440111e-05, + "loss": 0.3665, + "step": 34475 + }, + { + "epoch": 1.61, + "learning_rate": 1.4754596899346326e-05, + "loss": 0.5219, + "step": 34480 + }, + { + "epoch": 1.61, + "learning_rate": 1.4753813114291538e-05, + "loss": 0.24, + "step": 34485 + }, + { + "epoch": 1.61, + "learning_rate": 1.4753029329236752e-05, + "loss": 0.0458, + "step": 34490 + }, + { + "epoch": 1.61, + "learning_rate": 1.4752245544181964e-05, + "loss": 0.0644, + "step": 34495 + }, + { + "epoch": 1.61, + "learning_rate": 1.475146175912718e-05, + "loss": 0.0412, + "step": 34500 + }, + { + "epoch": 1.61, + "learning_rate": 1.4750677974072392e-05, + "loss": 0.0782, + "step": 34505 + }, + { + "epoch": 1.61, + "learning_rate": 1.4749894189017604e-05, + "loss": 0.102, + "step": 34510 + }, + { + "epoch": 1.61, + "learning_rate": 1.4749110403962818e-05, + "loss": 0.1028, + "step": 34515 + }, + { + "epoch": 1.61, + "learning_rate": 1.4748326618908032e-05, + "loss": 0.1788, + "step": 34520 + }, + { + "epoch": 1.61, + "learning_rate": 1.4747542833853246e-05, + "loss": 0.1417, + "step": 34525 + }, + { + "epoch": 1.61, + "learning_rate": 1.4746759048798458e-05, + "loss": 0.2783, + "step": 34530 + }, + { + "epoch": 1.61, + "learning_rate": 1.4745975263743673e-05, + "loss": 0.2968, + "step": 34535 + }, + { + "epoch": 1.61, + "learning_rate": 1.4745191478688886e-05, + "loss": 0.0946, + "step": 34540 + }, + { + "epoch": 1.61, + "learning_rate": 1.47444076936341e-05, + "loss": 0.0437, + "step": 34545 + }, + { + "epoch": 1.61, + "learning_rate": 1.4743623908579312e-05, + "loss": 0.0868, + "step": 34550 + }, + { + "epoch": 1.61, + "learning_rate": 1.4742840123524527e-05, + "loss": 0.105, + "step": 34555 + }, + { + "epoch": 1.61, + "learning_rate": 1.474205633846974e-05, + "loss": 0.1074, + "step": 34560 + }, + { + "epoch": 1.61, + "learning_rate": 1.4741272553414953e-05, + "loss": 0.1464, + "step": 34565 + }, + { + "epoch": 1.61, + "learning_rate": 1.4740488768360166e-05, + "loss": 0.1298, + "step": 34570 + }, + { + "epoch": 1.61, + "learning_rate": 1.4739704983305378e-05, + "loss": 0.2425, + "step": 34575 + }, + { + "epoch": 1.61, + "learning_rate": 1.4738921198250593e-05, + "loss": 0.3058, + "step": 34580 + }, + { + "epoch": 1.61, + "learning_rate": 1.4738137413195806e-05, + "loss": 0.2462, + "step": 34585 + }, + { + "epoch": 1.61, + "learning_rate": 1.473735362814102e-05, + "loss": 0.0695, + "step": 34590 + }, + { + "epoch": 1.61, + "learning_rate": 1.4736569843086232e-05, + "loss": 0.0971, + "step": 34595 + }, + { + "epoch": 1.61, + "learning_rate": 1.4735786058031447e-05, + "loss": 0.1426, + "step": 34600 + }, + { + "epoch": 1.61, + "learning_rate": 1.473500227297666e-05, + "loss": 0.0843, + "step": 34605 + }, + { + "epoch": 1.61, + "learning_rate": 1.4734218487921874e-05, + "loss": 0.1106, + "step": 34610 + }, + { + "epoch": 1.62, + "learning_rate": 1.4733434702867086e-05, + "loss": 0.0959, + "step": 34615 + }, + { + "epoch": 1.62, + "learning_rate": 1.4732650917812301e-05, + "loss": 0.1614, + "step": 34620 + }, + { + "epoch": 1.62, + "learning_rate": 1.4731867132757514e-05, + "loss": 0.2626, + "step": 34625 + }, + { + "epoch": 1.62, + "learning_rate": 1.4731083347702727e-05, + "loss": 0.2758, + "step": 34630 + }, + { + "epoch": 1.62, + "learning_rate": 1.4730299562647941e-05, + "loss": 0.3072, + "step": 34635 + }, + { + "epoch": 1.62, + "learning_rate": 1.4729515777593154e-05, + "loss": 0.0358, + "step": 34640 + }, + { + "epoch": 1.62, + "learning_rate": 1.4728731992538367e-05, + "loss": 0.0405, + "step": 34645 + }, + { + "epoch": 1.62, + "learning_rate": 1.472794820748358e-05, + "loss": 0.0772, + "step": 34650 + }, + { + "epoch": 1.62, + "learning_rate": 1.4727164422428795e-05, + "loss": 0.1179, + "step": 34655 + }, + { + "epoch": 1.62, + "learning_rate": 1.4726380637374008e-05, + "loss": 0.0595, + "step": 34660 + }, + { + "epoch": 1.62, + "learning_rate": 1.4725596852319221e-05, + "loss": 0.2746, + "step": 34665 + }, + { + "epoch": 1.62, + "learning_rate": 1.4724813067264434e-05, + "loss": 0.1401, + "step": 34670 + }, + { + "epoch": 1.62, + "learning_rate": 1.472402928220965e-05, + "loss": 0.1435, + "step": 34675 + }, + { + "epoch": 1.62, + "learning_rate": 1.4723245497154861e-05, + "loss": 0.2242, + "step": 34680 + }, + { + "epoch": 1.62, + "learning_rate": 1.4722461712100075e-05, + "loss": 0.3268, + "step": 34685 + }, + { + "epoch": 1.62, + "learning_rate": 1.4721677927045288e-05, + "loss": 0.034, + "step": 34690 + }, + { + "epoch": 1.62, + "learning_rate": 1.4720894141990503e-05, + "loss": 0.1351, + "step": 34695 + }, + { + "epoch": 1.62, + "learning_rate": 1.4720110356935715e-05, + "loss": 0.0795, + "step": 34700 + }, + { + "epoch": 1.62, + "learning_rate": 1.4719326571880928e-05, + "loss": 0.0898, + "step": 34705 + }, + { + "epoch": 1.62, + "learning_rate": 1.4718542786826141e-05, + "loss": 0.096, + "step": 34710 + }, + { + "epoch": 1.62, + "learning_rate": 1.4717759001771354e-05, + "loss": 0.1264, + "step": 34715 + }, + { + "epoch": 1.62, + "learning_rate": 1.471697521671657e-05, + "loss": 0.1767, + "step": 34720 + }, + { + "epoch": 1.62, + "learning_rate": 1.4716191431661782e-05, + "loss": 0.1141, + "step": 34725 + }, + { + "epoch": 1.62, + "learning_rate": 1.4715407646606995e-05, + "loss": 0.3326, + "step": 34730 + }, + { + "epoch": 1.62, + "learning_rate": 1.471462386155221e-05, + "loss": 0.3716, + "step": 34735 + }, + { + "epoch": 1.62, + "learning_rate": 1.4713840076497423e-05, + "loss": 0.0574, + "step": 34740 + }, + { + "epoch": 1.62, + "learning_rate": 1.4713056291442635e-05, + "loss": 0.0426, + "step": 34745 + }, + { + "epoch": 1.62, + "learning_rate": 1.4712272506387851e-05, + "loss": 0.0909, + "step": 34750 + }, + { + "epoch": 1.62, + "learning_rate": 1.4711488721333063e-05, + "loss": 0.1078, + "step": 34755 + }, + { + "epoch": 1.62, + "learning_rate": 1.4710704936278277e-05, + "loss": 0.1061, + "step": 34760 + }, + { + "epoch": 1.62, + "learning_rate": 1.470992115122349e-05, + "loss": 0.1885, + "step": 34765 + }, + { + "epoch": 1.62, + "learning_rate": 1.4709137366168702e-05, + "loss": 0.176, + "step": 34770 + }, + { + "epoch": 1.62, + "learning_rate": 1.4708353581113917e-05, + "loss": 0.1519, + "step": 34775 + }, + { + "epoch": 1.62, + "learning_rate": 1.470756979605913e-05, + "loss": 0.2766, + "step": 34780 + }, + { + "epoch": 1.62, + "learning_rate": 1.4706786011004343e-05, + "loss": 0.2345, + "step": 34785 + }, + { + "epoch": 1.62, + "learning_rate": 1.4706002225949556e-05, + "loss": 0.0734, + "step": 34790 + }, + { + "epoch": 1.62, + "learning_rate": 1.4705218440894771e-05, + "loss": 0.0545, + "step": 34795 + }, + { + "epoch": 1.62, + "learning_rate": 1.4704434655839983e-05, + "loss": 0.0359, + "step": 34800 + }, + { + "epoch": 1.62, + "learning_rate": 1.4703650870785197e-05, + "loss": 0.08, + "step": 34805 + }, + { + "epoch": 1.62, + "learning_rate": 1.470286708573041e-05, + "loss": 0.1442, + "step": 34810 + }, + { + "epoch": 1.62, + "learning_rate": 1.4702083300675625e-05, + "loss": 0.1107, + "step": 34815 + }, + { + "epoch": 1.62, + "learning_rate": 1.4701299515620837e-05, + "loss": 0.1809, + "step": 34820 + }, + { + "epoch": 1.62, + "learning_rate": 1.4700515730566051e-05, + "loss": 0.1639, + "step": 34825 + }, + { + "epoch": 1.63, + "learning_rate": 1.4699731945511263e-05, + "loss": 0.2368, + "step": 34830 + }, + { + "epoch": 1.63, + "learning_rate": 1.4698948160456477e-05, + "loss": 0.2754, + "step": 34835 + }, + { + "epoch": 1.63, + "learning_rate": 1.4698164375401691e-05, + "loss": 0.1172, + "step": 34840 + }, + { + "epoch": 1.63, + "learning_rate": 1.4697380590346903e-05, + "loss": 0.0575, + "step": 34845 + }, + { + "epoch": 1.63, + "learning_rate": 1.4696596805292119e-05, + "loss": 0.1116, + "step": 34850 + }, + { + "epoch": 1.63, + "learning_rate": 1.4695813020237331e-05, + "loss": 0.1418, + "step": 34855 + }, + { + "epoch": 1.63, + "learning_rate": 1.4695029235182545e-05, + "loss": 0.1506, + "step": 34860 + }, + { + "epoch": 1.63, + "learning_rate": 1.4694245450127757e-05, + "loss": 0.1702, + "step": 34865 + }, + { + "epoch": 1.63, + "learning_rate": 1.4693461665072973e-05, + "loss": 0.2012, + "step": 34870 + }, + { + "epoch": 1.63, + "learning_rate": 1.4692677880018185e-05, + "loss": 0.28, + "step": 34875 + }, + { + "epoch": 1.63, + "learning_rate": 1.4691894094963399e-05, + "loss": 0.2179, + "step": 34880 + }, + { + "epoch": 1.63, + "learning_rate": 1.4691110309908611e-05, + "loss": 0.2453, + "step": 34885 + }, + { + "epoch": 1.63, + "learning_rate": 1.4690326524853827e-05, + "loss": 0.0277, + "step": 34890 + }, + { + "epoch": 1.63, + "learning_rate": 1.4689542739799039e-05, + "loss": 0.1151, + "step": 34895 + }, + { + "epoch": 1.63, + "learning_rate": 1.4688758954744251e-05, + "loss": 0.091, + "step": 34900 + }, + { + "epoch": 1.63, + "learning_rate": 1.4687975169689465e-05, + "loss": 0.1309, + "step": 34905 + }, + { + "epoch": 1.63, + "learning_rate": 1.4687191384634677e-05, + "loss": 0.0805, + "step": 34910 + }, + { + "epoch": 1.63, + "learning_rate": 1.4686407599579893e-05, + "loss": 0.1674, + "step": 34915 + }, + { + "epoch": 1.63, + "learning_rate": 1.4685623814525105e-05, + "loss": 0.1859, + "step": 34920 + }, + { + "epoch": 1.63, + "learning_rate": 1.4684840029470319e-05, + "loss": 0.234, + "step": 34925 + }, + { + "epoch": 1.63, + "learning_rate": 1.4684056244415531e-05, + "loss": 0.3353, + "step": 34930 + }, + { + "epoch": 1.63, + "learning_rate": 1.4683272459360747e-05, + "loss": 0.3759, + "step": 34935 + }, + { + "epoch": 1.63, + "learning_rate": 1.4682488674305959e-05, + "loss": 0.0299, + "step": 34940 + }, + { + "epoch": 1.63, + "learning_rate": 1.4681704889251173e-05, + "loss": 0.0724, + "step": 34945 + }, + { + "epoch": 1.63, + "learning_rate": 1.4680921104196387e-05, + "loss": 0.1143, + "step": 34950 + }, + { + "epoch": 1.63, + "learning_rate": 1.46801373191416e-05, + "loss": 0.1031, + "step": 34955 + }, + { + "epoch": 1.63, + "learning_rate": 1.4679353534086813e-05, + "loss": 0.1564, + "step": 34960 + }, + { + "epoch": 1.63, + "learning_rate": 1.4678569749032025e-05, + "loss": 0.1843, + "step": 34965 + }, + { + "epoch": 1.63, + "learning_rate": 1.4677785963977241e-05, + "loss": 0.1421, + "step": 34970 + }, + { + "epoch": 1.63, + "learning_rate": 1.4677002178922453e-05, + "loss": 0.1495, + "step": 34975 + }, + { + "epoch": 1.63, + "learning_rate": 1.4676218393867667e-05, + "loss": 0.3336, + "step": 34980 + }, + { + "epoch": 1.63, + "learning_rate": 1.467543460881288e-05, + "loss": 0.3336, + "step": 34985 + }, + { + "epoch": 1.63, + "learning_rate": 1.4674650823758095e-05, + "loss": 0.0764, + "step": 34990 + }, + { + "epoch": 1.63, + "learning_rate": 1.4673867038703307e-05, + "loss": 0.0414, + "step": 34995 + }, + { + "epoch": 1.63, + "learning_rate": 1.4673083253648521e-05, + "loss": 0.0867, + "step": 35000 + }, + { + "epoch": 1.63, + "learning_rate": 1.4672299468593733e-05, + "loss": 0.0791, + "step": 35005 + }, + { + "epoch": 1.63, + "learning_rate": 1.4671515683538949e-05, + "loss": 0.1432, + "step": 35010 + }, + { + "epoch": 1.63, + "learning_rate": 1.4670731898484161e-05, + "loss": 0.1849, + "step": 35015 + }, + { + "epoch": 1.63, + "learning_rate": 1.4669948113429375e-05, + "loss": 0.1386, + "step": 35020 + }, + { + "epoch": 1.63, + "learning_rate": 1.4669164328374587e-05, + "loss": 0.1576, + "step": 35025 + }, + { + "epoch": 1.63, + "learning_rate": 1.46683805433198e-05, + "loss": 0.3647, + "step": 35030 + }, + { + "epoch": 1.63, + "learning_rate": 1.4667596758265015e-05, + "loss": 0.3103, + "step": 35035 + }, + { + "epoch": 1.64, + "learning_rate": 1.4666812973210227e-05, + "loss": 0.0494, + "step": 35040 + }, + { + "epoch": 1.64, + "learning_rate": 1.4666029188155441e-05, + "loss": 0.0672, + "step": 35045 + }, + { + "epoch": 1.64, + "learning_rate": 1.4665245403100655e-05, + "loss": 0.0995, + "step": 35050 + }, + { + "epoch": 1.64, + "learning_rate": 1.4664461618045869e-05, + "loss": 0.0589, + "step": 35055 + }, + { + "epoch": 1.64, + "learning_rate": 1.4663677832991081e-05, + "loss": 0.1083, + "step": 35060 + }, + { + "epoch": 1.64, + "learning_rate": 1.4662894047936297e-05, + "loss": 0.1836, + "step": 35065 + }, + { + "epoch": 1.64, + "learning_rate": 1.4662110262881509e-05, + "loss": 0.216, + "step": 35070 + }, + { + "epoch": 1.64, + "learning_rate": 1.4661326477826723e-05, + "loss": 0.2673, + "step": 35075 + }, + { + "epoch": 1.64, + "learning_rate": 1.4660542692771935e-05, + "loss": 0.3442, + "step": 35080 + }, + { + "epoch": 1.64, + "learning_rate": 1.465975890771715e-05, + "loss": 0.3217, + "step": 35085 + }, + { + "epoch": 1.64, + "learning_rate": 1.4658975122662363e-05, + "loss": 0.0211, + "step": 35090 + }, + { + "epoch": 1.64, + "learning_rate": 1.4658191337607575e-05, + "loss": 0.0388, + "step": 35095 + }, + { + "epoch": 1.64, + "learning_rate": 1.4657407552552789e-05, + "loss": 0.044, + "step": 35100 + }, + { + "epoch": 1.64, + "learning_rate": 1.4656623767498001e-05, + "loss": 0.1086, + "step": 35105 + }, + { + "epoch": 1.64, + "learning_rate": 1.4655839982443217e-05, + "loss": 0.1599, + "step": 35110 + }, + { + "epoch": 1.64, + "learning_rate": 1.4655056197388429e-05, + "loss": 0.129, + "step": 35115 + }, + { + "epoch": 1.64, + "learning_rate": 1.4654272412333643e-05, + "loss": 0.1483, + "step": 35120 + }, + { + "epoch": 1.64, + "learning_rate": 1.4653488627278855e-05, + "loss": 0.2187, + "step": 35125 + }, + { + "epoch": 1.64, + "learning_rate": 1.465270484222407e-05, + "loss": 0.4357, + "step": 35130 + }, + { + "epoch": 1.64, + "learning_rate": 1.4651921057169283e-05, + "loss": 0.2683, + "step": 35135 + }, + { + "epoch": 1.64, + "learning_rate": 1.4651137272114497e-05, + "loss": 0.0366, + "step": 35140 + }, + { + "epoch": 1.64, + "learning_rate": 1.4650353487059709e-05, + "loss": 0.0448, + "step": 35145 + }, + { + "epoch": 1.64, + "learning_rate": 1.4649569702004925e-05, + "loss": 0.0861, + "step": 35150 + }, + { + "epoch": 1.64, + "learning_rate": 1.4648785916950137e-05, + "loss": 0.0554, + "step": 35155 + }, + { + "epoch": 1.64, + "learning_rate": 1.4648002131895349e-05, + "loss": 0.143, + "step": 35160 + }, + { + "epoch": 1.64, + "learning_rate": 1.4647218346840565e-05, + "loss": 0.1561, + "step": 35165 + }, + { + "epoch": 1.64, + "learning_rate": 1.4646434561785777e-05, + "loss": 0.1867, + "step": 35170 + }, + { + "epoch": 1.64, + "learning_rate": 1.464565077673099e-05, + "loss": 0.1855, + "step": 35175 + }, + { + "epoch": 1.64, + "learning_rate": 1.4644866991676203e-05, + "loss": 0.4092, + "step": 35180 + }, + { + "epoch": 1.64, + "learning_rate": 1.4644083206621418e-05, + "loss": 0.3768, + "step": 35185 + }, + { + "epoch": 1.64, + "learning_rate": 1.464329942156663e-05, + "loss": 0.1151, + "step": 35190 + }, + { + "epoch": 1.64, + "learning_rate": 1.4642515636511845e-05, + "loss": 0.0626, + "step": 35195 + }, + { + "epoch": 1.64, + "learning_rate": 1.4641731851457057e-05, + "loss": 0.0333, + "step": 35200 + }, + { + "epoch": 1.64, + "learning_rate": 1.4640948066402272e-05, + "loss": 0.0781, + "step": 35205 + }, + { + "epoch": 1.64, + "learning_rate": 1.4640164281347485e-05, + "loss": 0.1316, + "step": 35210 + }, + { + "epoch": 1.64, + "learning_rate": 1.4639380496292699e-05, + "loss": 0.0988, + "step": 35215 + }, + { + "epoch": 1.64, + "learning_rate": 1.463859671123791e-05, + "loss": 0.1637, + "step": 35220 + }, + { + "epoch": 1.64, + "learning_rate": 1.4637812926183123e-05, + "loss": 0.2726, + "step": 35225 + }, + { + "epoch": 1.64, + "learning_rate": 1.4637029141128339e-05, + "loss": 0.336, + "step": 35230 + }, + { + "epoch": 1.64, + "learning_rate": 1.463624535607355e-05, + "loss": 0.1921, + "step": 35235 + }, + { + "epoch": 1.64, + "learning_rate": 1.4635461571018765e-05, + "loss": 0.026, + "step": 35240 + }, + { + "epoch": 1.64, + "learning_rate": 1.4634677785963977e-05, + "loss": 0.0511, + "step": 35245 + }, + { + "epoch": 1.64, + "learning_rate": 1.4633894000909192e-05, + "loss": 0.0613, + "step": 35250 + }, + { + "epoch": 1.65, + "learning_rate": 1.4633110215854405e-05, + "loss": 0.116, + "step": 35255 + }, + { + "epoch": 1.65, + "learning_rate": 1.4632326430799619e-05, + "loss": 0.1376, + "step": 35260 + }, + { + "epoch": 1.65, + "learning_rate": 1.4631542645744833e-05, + "loss": 0.1423, + "step": 35265 + }, + { + "epoch": 1.65, + "learning_rate": 1.4630758860690046e-05, + "loss": 0.0715, + "step": 35270 + }, + { + "epoch": 1.65, + "learning_rate": 1.4629975075635259e-05, + "loss": 0.2406, + "step": 35275 + }, + { + "epoch": 1.65, + "learning_rate": 1.4629191290580474e-05, + "loss": 0.2576, + "step": 35280 + }, + { + "epoch": 1.65, + "learning_rate": 1.4628407505525686e-05, + "loss": 0.1546, + "step": 35285 + }, + { + "epoch": 1.65, + "learning_rate": 1.4627623720470899e-05, + "loss": 0.0603, + "step": 35290 + }, + { + "epoch": 1.65, + "learning_rate": 1.4626839935416113e-05, + "loss": 0.0637, + "step": 35295 + }, + { + "epoch": 1.65, + "learning_rate": 1.4626056150361325e-05, + "loss": 0.0723, + "step": 35300 + }, + { + "epoch": 1.65, + "learning_rate": 1.462527236530654e-05, + "loss": 0.0931, + "step": 35305 + }, + { + "epoch": 1.65, + "learning_rate": 1.4624488580251753e-05, + "loss": 0.163, + "step": 35310 + }, + { + "epoch": 1.65, + "learning_rate": 1.4623704795196966e-05, + "loss": 0.2184, + "step": 35315 + }, + { + "epoch": 1.65, + "learning_rate": 1.4622921010142179e-05, + "loss": 0.0844, + "step": 35320 + }, + { + "epoch": 1.65, + "learning_rate": 1.4622137225087394e-05, + "loss": 0.2475, + "step": 35325 + }, + { + "epoch": 1.65, + "learning_rate": 1.4621353440032607e-05, + "loss": 0.3015, + "step": 35330 + }, + { + "epoch": 1.65, + "learning_rate": 1.462056965497782e-05, + "loss": 0.26, + "step": 35335 + }, + { + "epoch": 1.65, + "learning_rate": 1.4619785869923033e-05, + "loss": 0.0676, + "step": 35340 + }, + { + "epoch": 1.65, + "learning_rate": 1.4619002084868248e-05, + "loss": 0.0765, + "step": 35345 + }, + { + "epoch": 1.65, + "learning_rate": 1.461821829981346e-05, + "loss": 0.0856, + "step": 35350 + }, + { + "epoch": 1.65, + "learning_rate": 1.4617434514758673e-05, + "loss": 0.0801, + "step": 35355 + }, + { + "epoch": 1.65, + "learning_rate": 1.4616650729703887e-05, + "loss": 0.1053, + "step": 35360 + }, + { + "epoch": 1.65, + "learning_rate": 1.46158669446491e-05, + "loss": 0.1531, + "step": 35365 + }, + { + "epoch": 1.65, + "learning_rate": 1.4615083159594314e-05, + "loss": 0.2514, + "step": 35370 + }, + { + "epoch": 1.65, + "learning_rate": 1.4614299374539527e-05, + "loss": 0.1658, + "step": 35375 + }, + { + "epoch": 1.65, + "learning_rate": 1.4613515589484742e-05, + "loss": 0.3044, + "step": 35380 + }, + { + "epoch": 1.65, + "learning_rate": 1.4612731804429954e-05, + "loss": 0.2306, + "step": 35385 + }, + { + "epoch": 1.65, + "learning_rate": 1.4611948019375168e-05, + "loss": 0.0903, + "step": 35390 + }, + { + "epoch": 1.65, + "learning_rate": 1.461116423432038e-05, + "loss": 0.0519, + "step": 35395 + }, + { + "epoch": 1.65, + "learning_rate": 1.4610380449265596e-05, + "loss": 0.0563, + "step": 35400 + }, + { + "epoch": 1.65, + "learning_rate": 1.4609596664210808e-05, + "loss": 0.0959, + "step": 35405 + }, + { + "epoch": 1.65, + "learning_rate": 1.4608812879156022e-05, + "loss": 0.0844, + "step": 35410 + }, + { + "epoch": 1.65, + "learning_rate": 1.4608029094101234e-05, + "loss": 0.1355, + "step": 35415 + }, + { + "epoch": 1.65, + "learning_rate": 1.4607245309046447e-05, + "loss": 0.1931, + "step": 35420 + }, + { + "epoch": 1.65, + "learning_rate": 1.4606461523991662e-05, + "loss": 0.257, + "step": 35425 + }, + { + "epoch": 1.65, + "learning_rate": 1.4605677738936874e-05, + "loss": 0.5352, + "step": 35430 + }, + { + "epoch": 1.65, + "learning_rate": 1.4604893953882088e-05, + "loss": 0.3642, + "step": 35435 + }, + { + "epoch": 1.65, + "learning_rate": 1.46041101688273e-05, + "loss": 0.0393, + "step": 35440 + }, + { + "epoch": 1.65, + "learning_rate": 1.4603326383772516e-05, + "loss": 0.106, + "step": 35445 + }, + { + "epoch": 1.65, + "learning_rate": 1.4602542598717728e-05, + "loss": 0.1245, + "step": 35450 + }, + { + "epoch": 1.65, + "learning_rate": 1.4601758813662942e-05, + "loss": 0.1576, + "step": 35455 + }, + { + "epoch": 1.65, + "learning_rate": 1.4600975028608155e-05, + "loss": 0.1734, + "step": 35460 + }, + { + "epoch": 1.65, + "learning_rate": 1.460019124355337e-05, + "loss": 0.0837, + "step": 35465 + }, + { + "epoch": 1.66, + "learning_rate": 1.4599407458498582e-05, + "loss": 0.1933, + "step": 35470 + }, + { + "epoch": 1.66, + "learning_rate": 1.4598623673443796e-05, + "loss": 0.1611, + "step": 35475 + }, + { + "epoch": 1.66, + "learning_rate": 1.459783988838901e-05, + "loss": 0.2965, + "step": 35480 + }, + { + "epoch": 1.66, + "learning_rate": 1.4597056103334222e-05, + "loss": 0.314, + "step": 35485 + }, + { + "epoch": 1.66, + "learning_rate": 1.4596272318279436e-05, + "loss": 0.0355, + "step": 35490 + }, + { + "epoch": 1.66, + "learning_rate": 1.4595488533224648e-05, + "loss": 0.0476, + "step": 35495 + }, + { + "epoch": 1.66, + "learning_rate": 1.4594704748169864e-05, + "loss": 0.0686, + "step": 35500 + }, + { + "epoch": 1.66, + "learning_rate": 1.4593920963115076e-05, + "loss": 0.1746, + "step": 35505 + }, + { + "epoch": 1.66, + "learning_rate": 1.459313717806029e-05, + "loss": 0.1403, + "step": 35510 + }, + { + "epoch": 1.66, + "learning_rate": 1.4592353393005502e-05, + "loss": 0.1285, + "step": 35515 + }, + { + "epoch": 1.66, + "learning_rate": 1.4591569607950718e-05, + "loss": 0.252, + "step": 35520 + }, + { + "epoch": 1.66, + "learning_rate": 1.459078582289593e-05, + "loss": 0.2358, + "step": 35525 + }, + { + "epoch": 1.66, + "learning_rate": 1.4590002037841144e-05, + "loss": 0.2364, + "step": 35530 + }, + { + "epoch": 1.66, + "learning_rate": 1.4589218252786356e-05, + "loss": 0.2942, + "step": 35535 + }, + { + "epoch": 1.66, + "learning_rate": 1.4588434467731572e-05, + "loss": 0.0387, + "step": 35540 + }, + { + "epoch": 1.66, + "learning_rate": 1.4587650682676784e-05, + "loss": 0.1152, + "step": 35545 + }, + { + "epoch": 1.66, + "learning_rate": 1.4586866897621996e-05, + "loss": 0.0824, + "step": 35550 + }, + { + "epoch": 1.66, + "learning_rate": 1.458608311256721e-05, + "loss": 0.1106, + "step": 35555 + }, + { + "epoch": 1.66, + "learning_rate": 1.4585299327512422e-05, + "loss": 0.1017, + "step": 35560 + }, + { + "epoch": 1.66, + "learning_rate": 1.4584515542457638e-05, + "loss": 0.2116, + "step": 35565 + }, + { + "epoch": 1.66, + "learning_rate": 1.458373175740285e-05, + "loss": 0.2479, + "step": 35570 + }, + { + "epoch": 1.66, + "learning_rate": 1.4582947972348064e-05, + "loss": 0.2595, + "step": 35575 + }, + { + "epoch": 1.66, + "learning_rate": 1.4582164187293278e-05, + "loss": 0.3131, + "step": 35580 + }, + { + "epoch": 1.66, + "learning_rate": 1.4581380402238492e-05, + "loss": 0.2581, + "step": 35585 + }, + { + "epoch": 1.66, + "learning_rate": 1.4580596617183704e-05, + "loss": 0.0496, + "step": 35590 + }, + { + "epoch": 1.66, + "learning_rate": 1.457981283212892e-05, + "loss": 0.0473, + "step": 35595 + }, + { + "epoch": 1.66, + "learning_rate": 1.4579029047074132e-05, + "loss": 0.1001, + "step": 35600 + }, + { + "epoch": 1.66, + "learning_rate": 1.4578245262019346e-05, + "loss": 0.053, + "step": 35605 + }, + { + "epoch": 1.66, + "learning_rate": 1.4577461476964558e-05, + "loss": 0.1636, + "step": 35610 + }, + { + "epoch": 1.66, + "learning_rate": 1.457667769190977e-05, + "loss": 0.1338, + "step": 35615 + }, + { + "epoch": 1.66, + "learning_rate": 1.4575893906854986e-05, + "loss": 0.2236, + "step": 35620 + }, + { + "epoch": 1.66, + "learning_rate": 1.4575110121800198e-05, + "loss": 0.17, + "step": 35625 + }, + { + "epoch": 1.66, + "learning_rate": 1.4574326336745412e-05, + "loss": 0.3704, + "step": 35630 + }, + { + "epoch": 1.66, + "learning_rate": 1.4573542551690624e-05, + "loss": 0.1904, + "step": 35635 + }, + { + "epoch": 1.66, + "learning_rate": 1.457275876663584e-05, + "loss": 0.0506, + "step": 35640 + }, + { + "epoch": 1.66, + "learning_rate": 1.4571974981581052e-05, + "loss": 0.0974, + "step": 35645 + }, + { + "epoch": 1.66, + "learning_rate": 1.4571191196526266e-05, + "loss": 0.036, + "step": 35650 + }, + { + "epoch": 1.66, + "learning_rate": 1.4570407411471478e-05, + "loss": 0.1061, + "step": 35655 + }, + { + "epoch": 1.66, + "learning_rate": 1.4569623626416694e-05, + "loss": 0.1198, + "step": 35660 + }, + { + "epoch": 1.66, + "learning_rate": 1.4568839841361906e-05, + "loss": 0.244, + "step": 35665 + }, + { + "epoch": 1.66, + "learning_rate": 1.456805605630712e-05, + "loss": 0.1738, + "step": 35670 + }, + { + "epoch": 1.66, + "learning_rate": 1.4567272271252332e-05, + "loss": 0.1297, + "step": 35675 + }, + { + "epoch": 1.66, + "learning_rate": 1.4566488486197546e-05, + "loss": 0.3234, + "step": 35680 + }, + { + "epoch": 1.67, + "learning_rate": 1.456570470114276e-05, + "loss": 0.3038, + "step": 35685 + }, + { + "epoch": 1.67, + "learning_rate": 1.4564920916087972e-05, + "loss": 0.0216, + "step": 35690 + }, + { + "epoch": 1.67, + "learning_rate": 1.4564137131033188e-05, + "loss": 0.0839, + "step": 35695 + }, + { + "epoch": 1.67, + "learning_rate": 1.45633533459784e-05, + "loss": 0.0911, + "step": 35700 + }, + { + "epoch": 1.67, + "learning_rate": 1.4562569560923614e-05, + "loss": 0.1222, + "step": 35705 + }, + { + "epoch": 1.67, + "learning_rate": 1.4561785775868826e-05, + "loss": 0.0914, + "step": 35710 + }, + { + "epoch": 1.67, + "learning_rate": 1.4561001990814042e-05, + "loss": 0.1321, + "step": 35715 + }, + { + "epoch": 1.67, + "learning_rate": 1.4560218205759254e-05, + "loss": 0.1351, + "step": 35720 + }, + { + "epoch": 1.67, + "learning_rate": 1.4559434420704468e-05, + "loss": 0.2113, + "step": 35725 + }, + { + "epoch": 1.67, + "learning_rate": 1.455865063564968e-05, + "loss": 0.3217, + "step": 35730 + }, + { + "epoch": 1.67, + "learning_rate": 1.4557866850594896e-05, + "loss": 0.3432, + "step": 35735 + }, + { + "epoch": 1.67, + "learning_rate": 1.4557083065540108e-05, + "loss": 0.0595, + "step": 35740 + }, + { + "epoch": 1.67, + "learning_rate": 1.455629928048532e-05, + "loss": 0.0583, + "step": 35745 + }, + { + "epoch": 1.67, + "learning_rate": 1.4555515495430534e-05, + "loss": 0.0955, + "step": 35750 + }, + { + "epoch": 1.67, + "learning_rate": 1.4554731710375746e-05, + "loss": 0.0763, + "step": 35755 + }, + { + "epoch": 1.67, + "learning_rate": 1.4553947925320962e-05, + "loss": 0.159, + "step": 35760 + }, + { + "epoch": 1.67, + "learning_rate": 1.4553164140266174e-05, + "loss": 0.1699, + "step": 35765 + }, + { + "epoch": 1.67, + "learning_rate": 1.4552380355211388e-05, + "loss": 0.1226, + "step": 35770 + }, + { + "epoch": 1.67, + "learning_rate": 1.45515965701566e-05, + "loss": 0.233, + "step": 35775 + }, + { + "epoch": 1.67, + "learning_rate": 1.4550812785101816e-05, + "loss": 0.3687, + "step": 35780 + }, + { + "epoch": 1.67, + "learning_rate": 1.4550029000047028e-05, + "loss": 0.2873, + "step": 35785 + }, + { + "epoch": 1.67, + "learning_rate": 1.4549245214992242e-05, + "loss": 0.0251, + "step": 35790 + }, + { + "epoch": 1.67, + "learning_rate": 1.4548461429937456e-05, + "loss": 0.0768, + "step": 35795 + }, + { + "epoch": 1.67, + "learning_rate": 1.454767764488267e-05, + "loss": 0.1486, + "step": 35800 + }, + { + "epoch": 1.67, + "learning_rate": 1.4546893859827882e-05, + "loss": 0.1071, + "step": 35805 + }, + { + "epoch": 1.67, + "learning_rate": 1.4546110074773094e-05, + "loss": 0.115, + "step": 35810 + }, + { + "epoch": 1.67, + "learning_rate": 1.454532628971831e-05, + "loss": 0.1783, + "step": 35815 + }, + { + "epoch": 1.67, + "learning_rate": 1.4544542504663522e-05, + "loss": 0.1416, + "step": 35820 + }, + { + "epoch": 1.67, + "learning_rate": 1.4543758719608736e-05, + "loss": 0.2712, + "step": 35825 + }, + { + "epoch": 1.67, + "learning_rate": 1.4542974934553948e-05, + "loss": 0.3227, + "step": 35830 + }, + { + "epoch": 1.67, + "learning_rate": 1.4542191149499164e-05, + "loss": 0.3159, + "step": 35835 + }, + { + "epoch": 1.67, + "learning_rate": 1.4541407364444376e-05, + "loss": 0.0305, + "step": 35840 + }, + { + "epoch": 1.67, + "learning_rate": 1.454062357938959e-05, + "loss": 0.0921, + "step": 35845 + }, + { + "epoch": 1.67, + "learning_rate": 1.4539839794334802e-05, + "loss": 0.0471, + "step": 35850 + }, + { + "epoch": 1.67, + "learning_rate": 1.4539056009280017e-05, + "loss": 0.1261, + "step": 35855 + }, + { + "epoch": 1.67, + "learning_rate": 1.453827222422523e-05, + "loss": 0.1288, + "step": 35860 + }, + { + "epoch": 1.67, + "learning_rate": 1.4537488439170444e-05, + "loss": 0.1459, + "step": 35865 + }, + { + "epoch": 1.67, + "learning_rate": 1.4536704654115656e-05, + "loss": 0.141, + "step": 35870 + }, + { + "epoch": 1.67, + "learning_rate": 1.4535920869060868e-05, + "loss": 0.2848, + "step": 35875 + }, + { + "epoch": 1.67, + "learning_rate": 1.4535137084006084e-05, + "loss": 0.3458, + "step": 35880 + }, + { + "epoch": 1.67, + "learning_rate": 1.4534353298951296e-05, + "loss": 0.4117, + "step": 35885 + }, + { + "epoch": 1.67, + "learning_rate": 1.453356951389651e-05, + "loss": 0.0384, + "step": 35890 + }, + { + "epoch": 1.67, + "learning_rate": 1.4532785728841724e-05, + "loss": 0.1041, + "step": 35895 + }, + { + "epoch": 1.68, + "learning_rate": 1.4532001943786938e-05, + "loss": 0.1218, + "step": 35900 + }, + { + "epoch": 1.68, + "learning_rate": 1.453121815873215e-05, + "loss": 0.1158, + "step": 35905 + }, + { + "epoch": 1.68, + "learning_rate": 1.4530434373677365e-05, + "loss": 0.0852, + "step": 35910 + }, + { + "epoch": 1.68, + "learning_rate": 1.4529650588622578e-05, + "loss": 0.081, + "step": 35915 + }, + { + "epoch": 1.68, + "learning_rate": 1.4528866803567791e-05, + "loss": 0.198, + "step": 35920 + }, + { + "epoch": 1.68, + "learning_rate": 1.4528083018513004e-05, + "loss": 0.2399, + "step": 35925 + }, + { + "epoch": 1.68, + "learning_rate": 1.452729923345822e-05, + "loss": 0.2144, + "step": 35930 + }, + { + "epoch": 1.68, + "learning_rate": 1.4526515448403432e-05, + "loss": 0.2149, + "step": 35935 + }, + { + "epoch": 1.68, + "learning_rate": 1.4525731663348644e-05, + "loss": 0.0402, + "step": 35940 + }, + { + "epoch": 1.68, + "learning_rate": 1.4524947878293858e-05, + "loss": 0.0718, + "step": 35945 + }, + { + "epoch": 1.68, + "learning_rate": 1.452416409323907e-05, + "loss": 0.0818, + "step": 35950 + }, + { + "epoch": 1.68, + "learning_rate": 1.4523380308184285e-05, + "loss": 0.0695, + "step": 35955 + }, + { + "epoch": 1.68, + "learning_rate": 1.4522596523129498e-05, + "loss": 0.0995, + "step": 35960 + }, + { + "epoch": 1.68, + "learning_rate": 1.4521812738074712e-05, + "loss": 0.1538, + "step": 35965 + }, + { + "epoch": 1.68, + "learning_rate": 1.4521028953019924e-05, + "loss": 0.2184, + "step": 35970 + }, + { + "epoch": 1.68, + "learning_rate": 1.452024516796514e-05, + "loss": 0.1701, + "step": 35975 + }, + { + "epoch": 1.68, + "learning_rate": 1.4519461382910352e-05, + "loss": 0.401, + "step": 35980 + }, + { + "epoch": 1.68, + "learning_rate": 1.4518677597855565e-05, + "loss": 0.3014, + "step": 35985 + }, + { + "epoch": 1.68, + "learning_rate": 1.4517893812800778e-05, + "loss": 0.047, + "step": 35990 + }, + { + "epoch": 1.68, + "learning_rate": 1.4517110027745993e-05, + "loss": 0.0691, + "step": 35995 + }, + { + "epoch": 1.68, + "learning_rate": 1.4516326242691206e-05, + "loss": 0.1155, + "step": 36000 + }, + { + "epoch": 1.68, + "learning_rate": 1.4515542457636418e-05, + "loss": 0.1304, + "step": 36005 + }, + { + "epoch": 1.68, + "learning_rate": 1.4514758672581633e-05, + "loss": 0.1563, + "step": 36010 + }, + { + "epoch": 1.68, + "learning_rate": 1.4513974887526846e-05, + "loss": 0.1016, + "step": 36015 + }, + { + "epoch": 1.68, + "learning_rate": 1.451319110247206e-05, + "loss": 0.186, + "step": 36020 + }, + { + "epoch": 1.68, + "learning_rate": 1.4512407317417272e-05, + "loss": 0.203, + "step": 36025 + }, + { + "epoch": 1.68, + "learning_rate": 1.4511623532362487e-05, + "loss": 0.3696, + "step": 36030 + }, + { + "epoch": 1.68, + "learning_rate": 1.45108397473077e-05, + "loss": 0.2746, + "step": 36035 + }, + { + "epoch": 1.68, + "learning_rate": 1.4510055962252913e-05, + "loss": 0.0431, + "step": 36040 + }, + { + "epoch": 1.68, + "learning_rate": 1.4509272177198126e-05, + "loss": 0.0458, + "step": 36045 + }, + { + "epoch": 1.68, + "learning_rate": 1.4508488392143341e-05, + "loss": 0.068, + "step": 36050 + }, + { + "epoch": 1.68, + "learning_rate": 1.4507704607088553e-05, + "loss": 0.156, + "step": 36055 + }, + { + "epoch": 1.68, + "learning_rate": 1.4506920822033767e-05, + "loss": 0.0859, + "step": 36060 + }, + { + "epoch": 1.68, + "learning_rate": 1.450613703697898e-05, + "loss": 0.1969, + "step": 36065 + }, + { + "epoch": 1.68, + "learning_rate": 1.4505353251924192e-05, + "loss": 0.1335, + "step": 36070 + }, + { + "epoch": 1.68, + "learning_rate": 1.4504569466869407e-05, + "loss": 0.2158, + "step": 36075 + }, + { + "epoch": 1.68, + "learning_rate": 1.450378568181462e-05, + "loss": 0.2714, + "step": 36080 + }, + { + "epoch": 1.68, + "learning_rate": 1.4503001896759833e-05, + "loss": 0.2969, + "step": 36085 + }, + { + "epoch": 1.68, + "learning_rate": 1.4502218111705046e-05, + "loss": 0.0216, + "step": 36090 + }, + { + "epoch": 1.68, + "learning_rate": 1.4501434326650261e-05, + "loss": 0.0438, + "step": 36095 + }, + { + "epoch": 1.68, + "learning_rate": 1.4500650541595473e-05, + "loss": 0.0988, + "step": 36100 + }, + { + "epoch": 1.68, + "learning_rate": 1.4499866756540687e-05, + "loss": 0.0888, + "step": 36105 + }, + { + "epoch": 1.68, + "learning_rate": 1.4499082971485901e-05, + "loss": 0.0665, + "step": 36110 + }, + { + "epoch": 1.69, + "learning_rate": 1.4498299186431115e-05, + "loss": 0.1506, + "step": 36115 + }, + { + "epoch": 1.69, + "learning_rate": 1.4497515401376327e-05, + "loss": 0.1244, + "step": 36120 + }, + { + "epoch": 1.69, + "learning_rate": 1.4496731616321541e-05, + "loss": 0.1407, + "step": 36125 + }, + { + "epoch": 1.69, + "learning_rate": 1.4495947831266755e-05, + "loss": 0.389, + "step": 36130 + }, + { + "epoch": 1.69, + "learning_rate": 1.4495164046211967e-05, + "loss": 0.2841, + "step": 36135 + }, + { + "epoch": 1.69, + "learning_rate": 1.4494380261157181e-05, + "loss": 0.0355, + "step": 36140 + }, + { + "epoch": 1.69, + "learning_rate": 1.4493596476102394e-05, + "loss": 0.1308, + "step": 36145 + }, + { + "epoch": 1.69, + "learning_rate": 1.4492812691047609e-05, + "loss": 0.0888, + "step": 36150 + }, + { + "epoch": 1.69, + "learning_rate": 1.4492028905992821e-05, + "loss": 0.1226, + "step": 36155 + }, + { + "epoch": 1.69, + "learning_rate": 1.4491245120938035e-05, + "loss": 0.1033, + "step": 36160 + }, + { + "epoch": 1.69, + "learning_rate": 1.4490461335883247e-05, + "loss": 0.1986, + "step": 36165 + }, + { + "epoch": 1.69, + "learning_rate": 1.4489677550828463e-05, + "loss": 0.1476, + "step": 36170 + }, + { + "epoch": 1.69, + "learning_rate": 1.4488893765773675e-05, + "loss": 0.3752, + "step": 36175 + }, + { + "epoch": 1.69, + "learning_rate": 1.448810998071889e-05, + "loss": 0.3593, + "step": 36180 + }, + { + "epoch": 1.69, + "learning_rate": 1.4487326195664101e-05, + "loss": 0.3019, + "step": 36185 + }, + { + "epoch": 1.69, + "learning_rate": 1.4486542410609317e-05, + "loss": 0.0502, + "step": 36190 + }, + { + "epoch": 1.69, + "learning_rate": 1.448575862555453e-05, + "loss": 0.0869, + "step": 36195 + }, + { + "epoch": 1.69, + "learning_rate": 1.4484974840499741e-05, + "loss": 0.0769, + "step": 36200 + }, + { + "epoch": 1.69, + "learning_rate": 1.4484191055444955e-05, + "loss": 0.0761, + "step": 36205 + }, + { + "epoch": 1.69, + "learning_rate": 1.448340727039017e-05, + "loss": 0.1277, + "step": 36210 + }, + { + "epoch": 1.69, + "learning_rate": 1.4482623485335383e-05, + "loss": 0.1541, + "step": 36215 + }, + { + "epoch": 1.69, + "learning_rate": 1.4481839700280595e-05, + "loss": 0.1727, + "step": 36220 + }, + { + "epoch": 1.69, + "learning_rate": 1.4481055915225811e-05, + "loss": 0.2071, + "step": 36225 + }, + { + "epoch": 1.69, + "learning_rate": 1.4480272130171023e-05, + "loss": 0.2472, + "step": 36230 + }, + { + "epoch": 1.69, + "learning_rate": 1.4479488345116237e-05, + "loss": 0.1617, + "step": 36235 + }, + { + "epoch": 1.69, + "learning_rate": 1.447870456006145e-05, + "loss": 0.0486, + "step": 36240 + }, + { + "epoch": 1.69, + "learning_rate": 1.4477920775006665e-05, + "loss": 0.029, + "step": 36245 + }, + { + "epoch": 1.69, + "learning_rate": 1.4477136989951877e-05, + "loss": 0.0703, + "step": 36250 + }, + { + "epoch": 1.69, + "learning_rate": 1.4476353204897091e-05, + "loss": 0.0847, + "step": 36255 + }, + { + "epoch": 1.69, + "learning_rate": 1.4475569419842303e-05, + "loss": 0.1484, + "step": 36260 + }, + { + "epoch": 1.69, + "learning_rate": 1.4474785634787515e-05, + "loss": 0.1272, + "step": 36265 + }, + { + "epoch": 1.69, + "learning_rate": 1.4474001849732731e-05, + "loss": 0.1129, + "step": 36270 + }, + { + "epoch": 1.69, + "learning_rate": 1.4473218064677943e-05, + "loss": 0.1698, + "step": 36275 + }, + { + "epoch": 1.69, + "learning_rate": 1.4472434279623157e-05, + "loss": 0.2721, + "step": 36280 + }, + { + "epoch": 1.69, + "learning_rate": 1.447165049456837e-05, + "loss": 0.2507, + "step": 36285 + }, + { + "epoch": 1.69, + "learning_rate": 1.4470866709513585e-05, + "loss": 0.1443, + "step": 36290 + }, + { + "epoch": 1.69, + "learning_rate": 1.4470082924458797e-05, + "loss": 0.0702, + "step": 36295 + }, + { + "epoch": 1.69, + "learning_rate": 1.4469299139404011e-05, + "loss": 0.0486, + "step": 36300 + }, + { + "epoch": 1.69, + "learning_rate": 1.4468515354349223e-05, + "loss": 0.0962, + "step": 36305 + }, + { + "epoch": 1.69, + "learning_rate": 1.4467731569294439e-05, + "loss": 0.1491, + "step": 36310 + }, + { + "epoch": 1.69, + "learning_rate": 1.4466947784239651e-05, + "loss": 0.1473, + "step": 36315 + }, + { + "epoch": 1.69, + "learning_rate": 1.4466163999184865e-05, + "loss": 0.1861, + "step": 36320 + }, + { + "epoch": 1.69, + "learning_rate": 1.4465380214130079e-05, + "loss": 0.2508, + "step": 36325 + }, + { + "epoch": 1.7, + "learning_rate": 1.4464596429075291e-05, + "loss": 0.1919, + "step": 36330 + }, + { + "epoch": 1.7, + "learning_rate": 1.4463812644020505e-05, + "loss": 0.329, + "step": 36335 + }, + { + "epoch": 1.7, + "learning_rate": 1.4463028858965717e-05, + "loss": 0.0241, + "step": 36340 + }, + { + "epoch": 1.7, + "learning_rate": 1.4462245073910933e-05, + "loss": 0.0493, + "step": 36345 + }, + { + "epoch": 1.7, + "learning_rate": 1.4461461288856145e-05, + "loss": 0.0885, + "step": 36350 + }, + { + "epoch": 1.7, + "learning_rate": 1.4460677503801359e-05, + "loss": 0.0676, + "step": 36355 + }, + { + "epoch": 1.7, + "learning_rate": 1.4459893718746571e-05, + "loss": 0.138, + "step": 36360 + }, + { + "epoch": 1.7, + "learning_rate": 1.4459109933691787e-05, + "loss": 0.1602, + "step": 36365 + }, + { + "epoch": 1.7, + "learning_rate": 1.4458326148636999e-05, + "loss": 0.1771, + "step": 36370 + }, + { + "epoch": 1.7, + "learning_rate": 1.4457542363582213e-05, + "loss": 0.207, + "step": 36375 + }, + { + "epoch": 1.7, + "learning_rate": 1.4456758578527425e-05, + "loss": 0.3872, + "step": 36380 + }, + { + "epoch": 1.7, + "learning_rate": 1.445597479347264e-05, + "loss": 0.4241, + "step": 36385 + }, + { + "epoch": 1.7, + "learning_rate": 1.4455191008417853e-05, + "loss": 0.0311, + "step": 36390 + }, + { + "epoch": 1.7, + "learning_rate": 1.4454407223363065e-05, + "loss": 0.0573, + "step": 36395 + }, + { + "epoch": 1.7, + "learning_rate": 1.4453623438308279e-05, + "loss": 0.0677, + "step": 36400 + }, + { + "epoch": 1.7, + "learning_rate": 1.4452839653253491e-05, + "loss": 0.1657, + "step": 36405 + }, + { + "epoch": 1.7, + "learning_rate": 1.4452055868198707e-05, + "loss": 0.0953, + "step": 36410 + }, + { + "epoch": 1.7, + "learning_rate": 1.4451272083143919e-05, + "loss": 0.1371, + "step": 36415 + }, + { + "epoch": 1.7, + "learning_rate": 1.4450488298089133e-05, + "loss": 0.1549, + "step": 36420 + }, + { + "epoch": 1.7, + "learning_rate": 1.4449704513034347e-05, + "loss": 0.1376, + "step": 36425 + }, + { + "epoch": 1.7, + "learning_rate": 1.444892072797956e-05, + "loss": 0.3851, + "step": 36430 + }, + { + "epoch": 1.7, + "learning_rate": 1.4448136942924773e-05, + "loss": 0.2807, + "step": 36435 + }, + { + "epoch": 1.7, + "learning_rate": 1.4447353157869989e-05, + "loss": 0.0163, + "step": 36440 + }, + { + "epoch": 1.7, + "learning_rate": 1.44465693728152e-05, + "loss": 0.0581, + "step": 36445 + }, + { + "epoch": 1.7, + "learning_rate": 1.4445785587760415e-05, + "loss": 0.056, + "step": 36450 + }, + { + "epoch": 1.7, + "learning_rate": 1.4445001802705627e-05, + "loss": 0.1045, + "step": 36455 + }, + { + "epoch": 1.7, + "learning_rate": 1.4444218017650839e-05, + "loss": 0.0543, + "step": 36460 + }, + { + "epoch": 1.7, + "learning_rate": 1.4443434232596055e-05, + "loss": 0.1655, + "step": 36465 + }, + { + "epoch": 1.7, + "learning_rate": 1.4442807204552225e-05, + "loss": 0.1582, + "step": 36470 + }, + { + "epoch": 1.7, + "learning_rate": 1.4442023419497437e-05, + "loss": 0.2369, + "step": 36475 + }, + { + "epoch": 1.7, + "learning_rate": 1.4441239634442651e-05, + "loss": 0.2963, + "step": 36480 + }, + { + "epoch": 1.7, + "learning_rate": 1.4440455849387865e-05, + "loss": 0.2817, + "step": 36485 + }, + { + "epoch": 1.7, + "learning_rate": 1.4439672064333079e-05, + "loss": 0.0662, + "step": 36490 + }, + { + "epoch": 1.7, + "learning_rate": 1.4438888279278291e-05, + "loss": 0.0917, + "step": 36495 + }, + { + "epoch": 1.7, + "learning_rate": 1.4438104494223505e-05, + "loss": 0.0448, + "step": 36500 + }, + { + "epoch": 1.7, + "learning_rate": 1.4437320709168719e-05, + "loss": 0.1574, + "step": 36505 + }, + { + "epoch": 1.7, + "learning_rate": 1.4436536924113933e-05, + "loss": 0.1734, + "step": 36510 + }, + { + "epoch": 1.7, + "learning_rate": 1.4435753139059145e-05, + "loss": 0.1669, + "step": 36515 + }, + { + "epoch": 1.7, + "learning_rate": 1.443496935400436e-05, + "loss": 0.1831, + "step": 36520 + }, + { + "epoch": 1.7, + "learning_rate": 1.4434185568949573e-05, + "loss": 0.1776, + "step": 36525 + }, + { + "epoch": 1.7, + "learning_rate": 1.4433401783894785e-05, + "loss": 0.3592, + "step": 36530 + }, + { + "epoch": 1.7, + "learning_rate": 1.4432617998839999e-05, + "loss": 0.2814, + "step": 36535 + }, + { + "epoch": 1.71, + "learning_rate": 1.4431834213785211e-05, + "loss": 0.0311, + "step": 36540 + }, + { + "epoch": 1.71, + "learning_rate": 1.4431050428730427e-05, + "loss": 0.091, + "step": 36545 + }, + { + "epoch": 1.71, + "learning_rate": 1.443026664367564e-05, + "loss": 0.0954, + "step": 36550 + }, + { + "epoch": 1.71, + "learning_rate": 1.4429482858620853e-05, + "loss": 0.102, + "step": 36555 + }, + { + "epoch": 1.71, + "learning_rate": 1.4428699073566065e-05, + "loss": 0.1249, + "step": 36560 + }, + { + "epoch": 1.71, + "learning_rate": 1.4427915288511281e-05, + "loss": 0.1781, + "step": 36565 + }, + { + "epoch": 1.71, + "learning_rate": 1.4427131503456493e-05, + "loss": 0.122, + "step": 36570 + }, + { + "epoch": 1.71, + "learning_rate": 1.4426347718401707e-05, + "loss": 0.2721, + "step": 36575 + }, + { + "epoch": 1.71, + "learning_rate": 1.442556393334692e-05, + "loss": 0.3878, + "step": 36580 + }, + { + "epoch": 1.71, + "learning_rate": 1.4424780148292135e-05, + "loss": 0.2806, + "step": 36585 + }, + { + "epoch": 1.71, + "learning_rate": 1.4423996363237347e-05, + "loss": 0.0162, + "step": 36590 + }, + { + "epoch": 1.71, + "learning_rate": 1.442321257818256e-05, + "loss": 0.1438, + "step": 36595 + }, + { + "epoch": 1.71, + "learning_rate": 1.4422428793127775e-05, + "loss": 0.0547, + "step": 36600 + }, + { + "epoch": 1.71, + "learning_rate": 1.4421645008072987e-05, + "loss": 0.0748, + "step": 36605 + }, + { + "epoch": 1.71, + "learning_rate": 1.4420861223018201e-05, + "loss": 0.071, + "step": 36610 + }, + { + "epoch": 1.71, + "learning_rate": 1.4420077437963413e-05, + "loss": 0.0877, + "step": 36615 + }, + { + "epoch": 1.71, + "learning_rate": 1.4419293652908629e-05, + "loss": 0.0916, + "step": 36620 + }, + { + "epoch": 1.71, + "learning_rate": 1.4418509867853841e-05, + "loss": 0.29, + "step": 36625 + }, + { + "epoch": 1.71, + "learning_rate": 1.4417726082799055e-05, + "loss": 0.3652, + "step": 36630 + }, + { + "epoch": 1.71, + "learning_rate": 1.4416942297744267e-05, + "loss": 0.3349, + "step": 36635 + }, + { + "epoch": 1.71, + "learning_rate": 1.4416158512689483e-05, + "loss": 0.0536, + "step": 36640 + }, + { + "epoch": 1.71, + "learning_rate": 1.4415374727634695e-05, + "loss": 0.0458, + "step": 36645 + }, + { + "epoch": 1.71, + "learning_rate": 1.4414590942579909e-05, + "loss": 0.085, + "step": 36650 + }, + { + "epoch": 1.71, + "learning_rate": 1.4413807157525121e-05, + "loss": 0.0993, + "step": 36655 + }, + { + "epoch": 1.71, + "learning_rate": 1.4413023372470333e-05, + "loss": 0.1099, + "step": 36660 + }, + { + "epoch": 1.71, + "learning_rate": 1.4412239587415549e-05, + "loss": 0.112, + "step": 36665 + }, + { + "epoch": 1.71, + "learning_rate": 1.4411455802360761e-05, + "loss": 0.2057, + "step": 36670 + }, + { + "epoch": 1.71, + "learning_rate": 1.4410672017305975e-05, + "loss": 0.3171, + "step": 36675 + }, + { + "epoch": 1.71, + "learning_rate": 1.4409888232251187e-05, + "loss": 0.296, + "step": 36680 + }, + { + "epoch": 1.71, + "learning_rate": 1.4409104447196403e-05, + "loss": 0.2658, + "step": 36685 + }, + { + "epoch": 1.71, + "learning_rate": 1.4408320662141615e-05, + "loss": 0.0218, + "step": 36690 + }, + { + "epoch": 1.71, + "learning_rate": 1.4407536877086829e-05, + "loss": 0.0514, + "step": 36695 + }, + { + "epoch": 1.71, + "learning_rate": 1.4406753092032043e-05, + "loss": 0.081, + "step": 36700 + }, + { + "epoch": 1.71, + "learning_rate": 1.4405969306977257e-05, + "loss": 0.0679, + "step": 36705 + }, + { + "epoch": 1.71, + "learning_rate": 1.4405185521922469e-05, + "loss": 0.1526, + "step": 36710 + }, + { + "epoch": 1.71, + "learning_rate": 1.4404401736867683e-05, + "loss": 0.1244, + "step": 36715 + }, + { + "epoch": 1.71, + "learning_rate": 1.4403617951812897e-05, + "loss": 0.1967, + "step": 36720 + }, + { + "epoch": 1.71, + "learning_rate": 1.4402834166758109e-05, + "loss": 0.2807, + "step": 36725 + }, + { + "epoch": 1.71, + "learning_rate": 1.4402050381703323e-05, + "loss": 0.3164, + "step": 36730 + }, + { + "epoch": 1.71, + "learning_rate": 1.4401266596648535e-05, + "loss": 0.2543, + "step": 36735 + }, + { + "epoch": 1.71, + "learning_rate": 1.440048281159375e-05, + "loss": 0.0474, + "step": 36740 + }, + { + "epoch": 1.71, + "learning_rate": 1.4399699026538963e-05, + "loss": 0.0358, + "step": 36745 + }, + { + "epoch": 1.71, + "learning_rate": 1.4398915241484177e-05, + "loss": 0.0577, + "step": 36750 + }, + { + "epoch": 1.72, + "learning_rate": 1.4398131456429389e-05, + "loss": 0.1247, + "step": 36755 + }, + { + "epoch": 1.72, + "learning_rate": 1.4397347671374605e-05, + "loss": 0.0912, + "step": 36760 + }, + { + "epoch": 1.72, + "learning_rate": 1.4396563886319817e-05, + "loss": 0.1512, + "step": 36765 + }, + { + "epoch": 1.72, + "learning_rate": 1.439578010126503e-05, + "loss": 0.187, + "step": 36770 + }, + { + "epoch": 1.72, + "learning_rate": 1.4394996316210243e-05, + "loss": 0.2044, + "step": 36775 + }, + { + "epoch": 1.72, + "learning_rate": 1.4394212531155459e-05, + "loss": 0.2219, + "step": 36780 + }, + { + "epoch": 1.72, + "learning_rate": 1.439342874610067e-05, + "loss": 0.3031, + "step": 36785 + }, + { + "epoch": 1.72, + "learning_rate": 1.4392644961045883e-05, + "loss": 0.0566, + "step": 36790 + }, + { + "epoch": 1.72, + "learning_rate": 1.4391861175991097e-05, + "loss": 0.05, + "step": 36795 + }, + { + "epoch": 1.72, + "learning_rate": 1.439107739093631e-05, + "loss": 0.1119, + "step": 36800 + }, + { + "epoch": 1.72, + "learning_rate": 1.4390293605881525e-05, + "loss": 0.1107, + "step": 36805 + }, + { + "epoch": 1.72, + "learning_rate": 1.4389509820826737e-05, + "loss": 0.198, + "step": 36810 + }, + { + "epoch": 1.72, + "learning_rate": 1.438872603577195e-05, + "loss": 0.1943, + "step": 36815 + }, + { + "epoch": 1.72, + "learning_rate": 1.4387942250717165e-05, + "loss": 0.1967, + "step": 36820 + }, + { + "epoch": 1.72, + "learning_rate": 1.4387158465662379e-05, + "loss": 0.1664, + "step": 36825 + }, + { + "epoch": 1.72, + "learning_rate": 1.438637468060759e-05, + "loss": 0.3422, + "step": 36830 + }, + { + "epoch": 1.72, + "learning_rate": 1.4385590895552806e-05, + "loss": 0.2327, + "step": 36835 + }, + { + "epoch": 1.72, + "learning_rate": 1.4384807110498019e-05, + "loss": 0.0715, + "step": 36840 + }, + { + "epoch": 1.72, + "learning_rate": 1.4384023325443233e-05, + "loss": 0.0976, + "step": 36845 + }, + { + "epoch": 1.72, + "learning_rate": 1.4383239540388445e-05, + "loss": 0.0848, + "step": 36850 + }, + { + "epoch": 1.72, + "learning_rate": 1.4382455755333657e-05, + "loss": 0.0524, + "step": 36855 + }, + { + "epoch": 1.72, + "learning_rate": 1.4381671970278873e-05, + "loss": 0.1459, + "step": 36860 + }, + { + "epoch": 1.72, + "learning_rate": 1.4380888185224085e-05, + "loss": 0.1363, + "step": 36865 + }, + { + "epoch": 1.72, + "learning_rate": 1.4380104400169299e-05, + "loss": 0.1704, + "step": 36870 + }, + { + "epoch": 1.72, + "learning_rate": 1.4379320615114511e-05, + "loss": 0.3359, + "step": 36875 + }, + { + "epoch": 1.72, + "learning_rate": 1.4378536830059726e-05, + "loss": 0.4377, + "step": 36880 + }, + { + "epoch": 1.72, + "learning_rate": 1.4377753045004939e-05, + "loss": 0.1847, + "step": 36885 + }, + { + "epoch": 1.72, + "learning_rate": 1.4376969259950153e-05, + "loss": 0.0775, + "step": 36890 + }, + { + "epoch": 1.72, + "learning_rate": 1.4376185474895365e-05, + "loss": 0.0722, + "step": 36895 + }, + { + "epoch": 1.72, + "learning_rate": 1.437540168984058e-05, + "loss": 0.106, + "step": 36900 + }, + { + "epoch": 1.72, + "learning_rate": 1.4374617904785793e-05, + "loss": 0.1095, + "step": 36905 + }, + { + "epoch": 1.72, + "learning_rate": 1.4373834119731007e-05, + "loss": 0.1045, + "step": 36910 + }, + { + "epoch": 1.72, + "learning_rate": 1.437305033467622e-05, + "loss": 0.1643, + "step": 36915 + }, + { + "epoch": 1.72, + "learning_rate": 1.4372266549621433e-05, + "loss": 0.1888, + "step": 36920 + }, + { + "epoch": 1.72, + "learning_rate": 1.4371482764566647e-05, + "loss": 0.1461, + "step": 36925 + }, + { + "epoch": 1.72, + "learning_rate": 1.4370698979511859e-05, + "loss": 0.36, + "step": 36930 + }, + { + "epoch": 1.72, + "learning_rate": 1.4369915194457074e-05, + "loss": 0.1914, + "step": 36935 + }, + { + "epoch": 1.72, + "learning_rate": 1.4369131409402287e-05, + "loss": 0.0292, + "step": 36940 + }, + { + "epoch": 1.72, + "learning_rate": 1.43683476243475e-05, + "loss": 0.0758, + "step": 36945 + }, + { + "epoch": 1.72, + "learning_rate": 1.4367563839292713e-05, + "loss": 0.074, + "step": 36950 + }, + { + "epoch": 1.72, + "learning_rate": 1.4366780054237928e-05, + "loss": 0.1499, + "step": 36955 + }, + { + "epoch": 1.72, + "learning_rate": 1.436599626918314e-05, + "loss": 0.1167, + "step": 36960 + }, + { + "epoch": 1.72, + "learning_rate": 1.4365212484128354e-05, + "loss": 0.0796, + "step": 36965 + }, + { + "epoch": 1.73, + "learning_rate": 1.4364428699073567e-05, + "loss": 0.0926, + "step": 36970 + }, + { + "epoch": 1.73, + "learning_rate": 1.4363644914018782e-05, + "loss": 0.2108, + "step": 36975 + }, + { + "epoch": 1.73, + "learning_rate": 1.4362861128963994e-05, + "loss": 0.2106, + "step": 36980 + }, + { + "epoch": 1.73, + "learning_rate": 1.4362077343909207e-05, + "loss": 0.2872, + "step": 36985 + }, + { + "epoch": 1.73, + "learning_rate": 1.436129355885442e-05, + "loss": 0.0477, + "step": 36990 + }, + { + "epoch": 1.73, + "learning_rate": 1.4360509773799633e-05, + "loss": 0.0257, + "step": 36995 + }, + { + "epoch": 1.73, + "learning_rate": 1.4359725988744848e-05, + "loss": 0.1252, + "step": 37000 + }, + { + "epoch": 1.73, + "learning_rate": 1.435894220369006e-05, + "loss": 0.1321, + "step": 37005 + }, + { + "epoch": 1.73, + "learning_rate": 1.4358158418635274e-05, + "loss": 0.1789, + "step": 37010 + }, + { + "epoch": 1.73, + "learning_rate": 1.4357374633580488e-05, + "loss": 0.1004, + "step": 37015 + }, + { + "epoch": 1.73, + "learning_rate": 1.4356590848525702e-05, + "loss": 0.1946, + "step": 37020 + }, + { + "epoch": 1.73, + "learning_rate": 1.4355807063470914e-05, + "loss": 0.1881, + "step": 37025 + }, + { + "epoch": 1.73, + "learning_rate": 1.4355023278416128e-05, + "loss": 0.3375, + "step": 37030 + }, + { + "epoch": 1.73, + "learning_rate": 1.4354239493361342e-05, + "loss": 0.2199, + "step": 37035 + }, + { + "epoch": 1.73, + "learning_rate": 1.4353455708306556e-05, + "loss": 0.0575, + "step": 37040 + }, + { + "epoch": 1.73, + "learning_rate": 1.4352671923251768e-05, + "loss": 0.0641, + "step": 37045 + }, + { + "epoch": 1.73, + "learning_rate": 1.435188813819698e-05, + "loss": 0.1354, + "step": 37050 + }, + { + "epoch": 1.73, + "learning_rate": 1.4351104353142196e-05, + "loss": 0.0793, + "step": 37055 + }, + { + "epoch": 1.73, + "learning_rate": 1.4350320568087408e-05, + "loss": 0.0634, + "step": 37060 + }, + { + "epoch": 1.73, + "learning_rate": 1.4349536783032622e-05, + "loss": 0.1773, + "step": 37065 + }, + { + "epoch": 1.73, + "learning_rate": 1.4348752997977835e-05, + "loss": 0.1752, + "step": 37070 + }, + { + "epoch": 1.73, + "learning_rate": 1.434796921292305e-05, + "loss": 0.1668, + "step": 37075 + }, + { + "epoch": 1.73, + "learning_rate": 1.4347185427868262e-05, + "loss": 0.3402, + "step": 37080 + }, + { + "epoch": 1.73, + "learning_rate": 1.4346401642813476e-05, + "loss": 0.2596, + "step": 37085 + }, + { + "epoch": 1.73, + "learning_rate": 1.4345617857758688e-05, + "loss": 0.0693, + "step": 37090 + }, + { + "epoch": 1.73, + "learning_rate": 1.4344834072703904e-05, + "loss": 0.0821, + "step": 37095 + }, + { + "epoch": 1.73, + "learning_rate": 1.4344050287649116e-05, + "loss": 0.0515, + "step": 37100 + }, + { + "epoch": 1.73, + "learning_rate": 1.434326650259433e-05, + "loss": 0.0693, + "step": 37105 + }, + { + "epoch": 1.73, + "learning_rate": 1.4342482717539542e-05, + "loss": 0.0675, + "step": 37110 + }, + { + "epoch": 1.73, + "learning_rate": 1.4341698932484756e-05, + "loss": 0.0614, + "step": 37115 + }, + { + "epoch": 1.73, + "learning_rate": 1.434091514742997e-05, + "loss": 0.1118, + "step": 37120 + }, + { + "epoch": 1.73, + "learning_rate": 1.4340131362375182e-05, + "loss": 0.1841, + "step": 37125 + }, + { + "epoch": 1.73, + "learning_rate": 1.4339347577320396e-05, + "loss": 0.4163, + "step": 37130 + }, + { + "epoch": 1.73, + "learning_rate": 1.433856379226561e-05, + "loss": 0.2525, + "step": 37135 + }, + { + "epoch": 1.73, + "learning_rate": 1.4337780007210824e-05, + "loss": 0.0489, + "step": 37140 + }, + { + "epoch": 1.73, + "learning_rate": 1.4336996222156036e-05, + "loss": 0.0879, + "step": 37145 + }, + { + "epoch": 1.73, + "learning_rate": 1.4336212437101252e-05, + "loss": 0.0851, + "step": 37150 + }, + { + "epoch": 1.73, + "learning_rate": 1.4335428652046464e-05, + "loss": 0.093, + "step": 37155 + }, + { + "epoch": 1.73, + "learning_rate": 1.4334644866991678e-05, + "loss": 0.161, + "step": 37160 + }, + { + "epoch": 1.73, + "learning_rate": 1.433386108193689e-05, + "loss": 0.1694, + "step": 37165 + }, + { + "epoch": 1.73, + "learning_rate": 1.4333077296882106e-05, + "loss": 0.17, + "step": 37170 + }, + { + "epoch": 1.73, + "learning_rate": 1.4332293511827318e-05, + "loss": 0.2063, + "step": 37175 + }, + { + "epoch": 1.73, + "learning_rate": 1.433150972677253e-05, + "loss": 0.2002, + "step": 37180 + }, + { + "epoch": 1.74, + "learning_rate": 1.4330725941717744e-05, + "loss": 0.2128, + "step": 37185 + }, + { + "epoch": 1.74, + "learning_rate": 1.4329942156662956e-05, + "loss": 0.0525, + "step": 37190 + }, + { + "epoch": 1.74, + "learning_rate": 1.4329158371608172e-05, + "loss": 0.1173, + "step": 37195 + }, + { + "epoch": 1.74, + "learning_rate": 1.4328374586553384e-05, + "loss": 0.1373, + "step": 37200 + }, + { + "epoch": 1.74, + "learning_rate": 1.4327590801498598e-05, + "loss": 0.0733, + "step": 37205 + }, + { + "epoch": 1.74, + "learning_rate": 1.432680701644381e-05, + "loss": 0.118, + "step": 37210 + }, + { + "epoch": 1.74, + "learning_rate": 1.4326023231389026e-05, + "loss": 0.1538, + "step": 37215 + }, + { + "epoch": 1.74, + "learning_rate": 1.4325239446334238e-05, + "loss": 0.1563, + "step": 37220 + }, + { + "epoch": 1.74, + "learning_rate": 1.4324455661279452e-05, + "loss": 0.2196, + "step": 37225 + }, + { + "epoch": 1.74, + "learning_rate": 1.4323671876224666e-05, + "loss": 0.3397, + "step": 37230 + }, + { + "epoch": 1.74, + "learning_rate": 1.432288809116988e-05, + "loss": 0.2154, + "step": 37235 + }, + { + "epoch": 1.74, + "learning_rate": 1.4322104306115092e-05, + "loss": 0.0617, + "step": 37240 + }, + { + "epoch": 1.74, + "learning_rate": 1.4321320521060304e-05, + "loss": 0.0488, + "step": 37245 + }, + { + "epoch": 1.74, + "learning_rate": 1.432053673600552e-05, + "loss": 0.1145, + "step": 37250 + }, + { + "epoch": 1.74, + "learning_rate": 1.4319752950950732e-05, + "loss": 0.1049, + "step": 37255 + }, + { + "epoch": 1.74, + "learning_rate": 1.4318969165895946e-05, + "loss": 0.0923, + "step": 37260 + }, + { + "epoch": 1.74, + "learning_rate": 1.4318185380841158e-05, + "loss": 0.1686, + "step": 37265 + }, + { + "epoch": 1.74, + "learning_rate": 1.4317401595786374e-05, + "loss": 0.1347, + "step": 37270 + }, + { + "epoch": 1.74, + "learning_rate": 1.4316617810731586e-05, + "loss": 0.161, + "step": 37275 + }, + { + "epoch": 1.74, + "learning_rate": 1.43158340256768e-05, + "loss": 0.2883, + "step": 37280 + }, + { + "epoch": 1.74, + "learning_rate": 1.4315050240622012e-05, + "loss": 0.2093, + "step": 37285 + }, + { + "epoch": 1.74, + "learning_rate": 1.4314266455567228e-05, + "loss": 0.0615, + "step": 37290 + }, + { + "epoch": 1.74, + "learning_rate": 1.431348267051244e-05, + "loss": 0.058, + "step": 37295 + }, + { + "epoch": 1.74, + "learning_rate": 1.4312698885457654e-05, + "loss": 0.0953, + "step": 37300 + }, + { + "epoch": 1.74, + "learning_rate": 1.4311915100402866e-05, + "loss": 0.0668, + "step": 37305 + }, + { + "epoch": 1.74, + "learning_rate": 1.4311131315348078e-05, + "loss": 0.1407, + "step": 37310 + }, + { + "epoch": 1.74, + "learning_rate": 1.4310347530293294e-05, + "loss": 0.1212, + "step": 37315 + }, + { + "epoch": 1.74, + "learning_rate": 1.4309563745238506e-05, + "loss": 0.1046, + "step": 37320 + }, + { + "epoch": 1.74, + "learning_rate": 1.430877996018372e-05, + "loss": 0.1611, + "step": 37325 + }, + { + "epoch": 1.74, + "learning_rate": 1.4307996175128934e-05, + "loss": 0.3317, + "step": 37330 + }, + { + "epoch": 1.74, + "learning_rate": 1.4307212390074148e-05, + "loss": 0.2498, + "step": 37335 + }, + { + "epoch": 1.74, + "learning_rate": 1.430642860501936e-05, + "loss": 0.0457, + "step": 37340 + }, + { + "epoch": 1.74, + "learning_rate": 1.4305644819964574e-05, + "loss": 0.0306, + "step": 37345 + }, + { + "epoch": 1.74, + "learning_rate": 1.4304861034909788e-05, + "loss": 0.0676, + "step": 37350 + }, + { + "epoch": 1.74, + "learning_rate": 1.4304077249855002e-05, + "loss": 0.0872, + "step": 37355 + }, + { + "epoch": 1.74, + "learning_rate": 1.4303293464800214e-05, + "loss": 0.1829, + "step": 37360 + }, + { + "epoch": 1.74, + "learning_rate": 1.430250967974543e-05, + "loss": 0.1626, + "step": 37365 + }, + { + "epoch": 1.74, + "learning_rate": 1.4301725894690642e-05, + "loss": 0.1964, + "step": 37370 + }, + { + "epoch": 1.74, + "learning_rate": 1.4300942109635854e-05, + "loss": 0.2105, + "step": 37375 + }, + { + "epoch": 1.74, + "learning_rate": 1.4300158324581068e-05, + "loss": 0.3942, + "step": 37380 + }, + { + "epoch": 1.74, + "learning_rate": 1.429937453952628e-05, + "loss": 0.2594, + "step": 37385 + }, + { + "epoch": 1.74, + "learning_rate": 1.4298590754471496e-05, + "loss": 0.0395, + "step": 37390 + }, + { + "epoch": 1.74, + "learning_rate": 1.4297806969416708e-05, + "loss": 0.0307, + "step": 37395 + }, + { + "epoch": 1.75, + "learning_rate": 1.4297023184361922e-05, + "loss": 0.0373, + "step": 37400 + }, + { + "epoch": 1.75, + "learning_rate": 1.4296239399307134e-05, + "loss": 0.06, + "step": 37405 + }, + { + "epoch": 1.75, + "learning_rate": 1.429545561425235e-05, + "loss": 0.1288, + "step": 37410 + }, + { + "epoch": 1.75, + "learning_rate": 1.4294671829197562e-05, + "loss": 0.1277, + "step": 37415 + }, + { + "epoch": 1.75, + "learning_rate": 1.4293888044142776e-05, + "loss": 0.1357, + "step": 37420 + }, + { + "epoch": 1.75, + "learning_rate": 1.4293104259087988e-05, + "loss": 0.1732, + "step": 37425 + }, + { + "epoch": 1.75, + "learning_rate": 1.4292320474033204e-05, + "loss": 0.2856, + "step": 37430 + }, + { + "epoch": 1.75, + "learning_rate": 1.4291536688978416e-05, + "loss": 0.2645, + "step": 37435 + }, + { + "epoch": 1.75, + "learning_rate": 1.4290752903923628e-05, + "loss": 0.0783, + "step": 37440 + }, + { + "epoch": 1.75, + "learning_rate": 1.4289969118868842e-05, + "loss": 0.0332, + "step": 37445 + }, + { + "epoch": 1.75, + "learning_rate": 1.4289185333814056e-05, + "loss": 0.0371, + "step": 37450 + }, + { + "epoch": 1.75, + "learning_rate": 1.428840154875927e-05, + "loss": 0.1575, + "step": 37455 + }, + { + "epoch": 1.75, + "learning_rate": 1.4287617763704482e-05, + "loss": 0.094, + "step": 37460 + }, + { + "epoch": 1.75, + "learning_rate": 1.4286833978649698e-05, + "loss": 0.0949, + "step": 37465 + }, + { + "epoch": 1.75, + "learning_rate": 1.428605019359491e-05, + "loss": 0.1676, + "step": 37470 + }, + { + "epoch": 1.75, + "learning_rate": 1.4285266408540124e-05, + "loss": 0.1947, + "step": 37475 + }, + { + "epoch": 1.75, + "learning_rate": 1.4284482623485336e-05, + "loss": 0.1995, + "step": 37480 + }, + { + "epoch": 1.75, + "learning_rate": 1.4283698838430551e-05, + "loss": 0.2297, + "step": 37485 + }, + { + "epoch": 1.75, + "learning_rate": 1.4282915053375764e-05, + "loss": 0.0635, + "step": 37490 + }, + { + "epoch": 1.75, + "learning_rate": 1.4282131268320978e-05, + "loss": 0.049, + "step": 37495 + }, + { + "epoch": 1.75, + "learning_rate": 1.428134748326619e-05, + "loss": 0.1047, + "step": 37500 + }, + { + "epoch": 1.75, + "learning_rate": 1.4280563698211402e-05, + "loss": 0.125, + "step": 37505 + }, + { + "epoch": 1.75, + "learning_rate": 1.4279779913156618e-05, + "loss": 0.1367, + "step": 37510 + }, + { + "epoch": 1.75, + "learning_rate": 1.427899612810183e-05, + "loss": 0.16, + "step": 37515 + }, + { + "epoch": 1.75, + "learning_rate": 1.4278212343047044e-05, + "loss": 0.2038, + "step": 37520 + }, + { + "epoch": 1.75, + "learning_rate": 1.4277428557992256e-05, + "loss": 0.2235, + "step": 37525 + }, + { + "epoch": 1.75, + "learning_rate": 1.4276644772937472e-05, + "loss": 0.428, + "step": 37530 + }, + { + "epoch": 1.75, + "learning_rate": 1.4275860987882684e-05, + "loss": 0.3868, + "step": 37535 + }, + { + "epoch": 1.75, + "learning_rate": 1.4275077202827898e-05, + "loss": 0.0283, + "step": 37540 + }, + { + "epoch": 1.75, + "learning_rate": 1.4274293417773112e-05, + "loss": 0.0262, + "step": 37545 + }, + { + "epoch": 1.75, + "learning_rate": 1.4273509632718325e-05, + "loss": 0.0667, + "step": 37550 + }, + { + "epoch": 1.75, + "learning_rate": 1.4272725847663538e-05, + "loss": 0.1034, + "step": 37555 + }, + { + "epoch": 1.75, + "learning_rate": 1.4271942062608752e-05, + "loss": 0.1123, + "step": 37560 + }, + { + "epoch": 1.75, + "learning_rate": 1.4271158277553965e-05, + "loss": 0.1533, + "step": 37565 + }, + { + "epoch": 1.75, + "learning_rate": 1.4270374492499178e-05, + "loss": 0.0854, + "step": 37570 + }, + { + "epoch": 1.75, + "learning_rate": 1.4269590707444392e-05, + "loss": 0.2158, + "step": 37575 + }, + { + "epoch": 1.75, + "learning_rate": 1.4268806922389604e-05, + "loss": 0.3784, + "step": 37580 + }, + { + "epoch": 1.75, + "learning_rate": 1.426802313733482e-05, + "loss": 0.2143, + "step": 37585 + }, + { + "epoch": 1.75, + "learning_rate": 1.4267239352280032e-05, + "loss": 0.0696, + "step": 37590 + }, + { + "epoch": 1.75, + "learning_rate": 1.4266455567225246e-05, + "loss": 0.0339, + "step": 37595 + }, + { + "epoch": 1.75, + "learning_rate": 1.4265671782170458e-05, + "loss": 0.0495, + "step": 37600 + }, + { + "epoch": 1.75, + "learning_rate": 1.4264887997115673e-05, + "loss": 0.1684, + "step": 37605 + }, + { + "epoch": 1.75, + "learning_rate": 1.4264104212060886e-05, + "loss": 0.184, + "step": 37610 + }, + { + "epoch": 1.76, + "learning_rate": 1.42633204270061e-05, + "loss": 0.1755, + "step": 37615 + }, + { + "epoch": 1.76, + "learning_rate": 1.4262536641951312e-05, + "loss": 0.1916, + "step": 37620 + }, + { + "epoch": 1.76, + "learning_rate": 1.4261752856896527e-05, + "loss": 0.178, + "step": 37625 + }, + { + "epoch": 1.76, + "learning_rate": 1.426096907184174e-05, + "loss": 0.2962, + "step": 37630 + }, + { + "epoch": 1.76, + "learning_rate": 1.4260185286786952e-05, + "loss": 0.1308, + "step": 37635 + }, + { + "epoch": 1.76, + "learning_rate": 1.4259401501732166e-05, + "loss": 0.0454, + "step": 37640 + }, + { + "epoch": 1.76, + "learning_rate": 1.425861771667738e-05, + "loss": 0.0986, + "step": 37645 + }, + { + "epoch": 1.76, + "learning_rate": 1.4257833931622593e-05, + "loss": 0.0789, + "step": 37650 + }, + { + "epoch": 1.76, + "learning_rate": 1.4257050146567806e-05, + "loss": 0.1297, + "step": 37655 + }, + { + "epoch": 1.76, + "learning_rate": 1.425626636151302e-05, + "loss": 0.0659, + "step": 37660 + }, + { + "epoch": 1.76, + "learning_rate": 1.4255482576458233e-05, + "loss": 0.1486, + "step": 37665 + }, + { + "epoch": 1.76, + "learning_rate": 1.4254698791403447e-05, + "loss": 0.1687, + "step": 37670 + }, + { + "epoch": 1.76, + "learning_rate": 1.425391500634866e-05, + "loss": 0.1958, + "step": 37675 + }, + { + "epoch": 1.76, + "learning_rate": 1.4253131221293875e-05, + "loss": 0.3865, + "step": 37680 + }, + { + "epoch": 1.76, + "learning_rate": 1.4252347436239087e-05, + "loss": 0.3559, + "step": 37685 + }, + { + "epoch": 1.76, + "learning_rate": 1.4251563651184301e-05, + "loss": 0.0624, + "step": 37690 + }, + { + "epoch": 1.76, + "learning_rate": 1.4250779866129513e-05, + "loss": 0.0917, + "step": 37695 + }, + { + "epoch": 1.76, + "learning_rate": 1.4249996081074726e-05, + "loss": 0.0923, + "step": 37700 + }, + { + "epoch": 1.76, + "learning_rate": 1.4249212296019941e-05, + "loss": 0.1211, + "step": 37705 + }, + { + "epoch": 1.76, + "learning_rate": 1.4248428510965153e-05, + "loss": 0.1117, + "step": 37710 + }, + { + "epoch": 1.76, + "learning_rate": 1.4247644725910367e-05, + "loss": 0.2351, + "step": 37715 + }, + { + "epoch": 1.76, + "learning_rate": 1.424686094085558e-05, + "loss": 0.1649, + "step": 37720 + }, + { + "epoch": 1.76, + "learning_rate": 1.4246077155800795e-05, + "loss": 0.1855, + "step": 37725 + }, + { + "epoch": 1.76, + "learning_rate": 1.4245293370746007e-05, + "loss": 0.3319, + "step": 37730 + }, + { + "epoch": 1.76, + "learning_rate": 1.4244509585691221e-05, + "loss": 0.2802, + "step": 37735 + }, + { + "epoch": 1.76, + "learning_rate": 1.4243725800636434e-05, + "loss": 0.0575, + "step": 37740 + }, + { + "epoch": 1.76, + "learning_rate": 1.4242942015581649e-05, + "loss": 0.0562, + "step": 37745 + }, + { + "epoch": 1.76, + "learning_rate": 1.4242158230526861e-05, + "loss": 0.0816, + "step": 37750 + }, + { + "epoch": 1.76, + "learning_rate": 1.4241374445472075e-05, + "loss": 0.0493, + "step": 37755 + }, + { + "epoch": 1.76, + "learning_rate": 1.424059066041729e-05, + "loss": 0.1505, + "step": 37760 + }, + { + "epoch": 1.76, + "learning_rate": 1.4239806875362501e-05, + "loss": 0.1146, + "step": 37765 + }, + { + "epoch": 1.76, + "learning_rate": 1.4239023090307715e-05, + "loss": 0.2125, + "step": 37770 + }, + { + "epoch": 1.76, + "learning_rate": 1.4238239305252927e-05, + "loss": 0.2498, + "step": 37775 + }, + { + "epoch": 1.76, + "learning_rate": 1.4237455520198143e-05, + "loss": 0.3085, + "step": 37780 + }, + { + "epoch": 1.76, + "learning_rate": 1.4236671735143355e-05, + "loss": 0.4248, + "step": 37785 + }, + { + "epoch": 1.76, + "learning_rate": 1.423588795008857e-05, + "loss": 0.0457, + "step": 37790 + }, + { + "epoch": 1.76, + "learning_rate": 1.4235104165033781e-05, + "loss": 0.0464, + "step": 37795 + }, + { + "epoch": 1.76, + "learning_rate": 1.4234320379978997e-05, + "loss": 0.0329, + "step": 37800 + }, + { + "epoch": 1.76, + "learning_rate": 1.423353659492421e-05, + "loss": 0.1443, + "step": 37805 + }, + { + "epoch": 1.76, + "learning_rate": 1.4232752809869423e-05, + "loss": 0.1504, + "step": 37810 + }, + { + "epoch": 1.76, + "learning_rate": 1.4231969024814635e-05, + "loss": 0.1618, + "step": 37815 + }, + { + "epoch": 1.76, + "learning_rate": 1.4231185239759851e-05, + "loss": 0.1992, + "step": 37820 + }, + { + "epoch": 1.76, + "learning_rate": 1.4230401454705063e-05, + "loss": 0.1979, + "step": 37825 + }, + { + "epoch": 1.77, + "learning_rate": 1.4229617669650275e-05, + "loss": 0.3638, + "step": 37830 + }, + { + "epoch": 1.77, + "learning_rate": 1.422883388459549e-05, + "loss": 0.2355, + "step": 37835 + }, + { + "epoch": 1.77, + "learning_rate": 1.4228050099540701e-05, + "loss": 0.0892, + "step": 37840 + }, + { + "epoch": 1.77, + "learning_rate": 1.4227266314485917e-05, + "loss": 0.0729, + "step": 37845 + }, + { + "epoch": 1.77, + "learning_rate": 1.422648252943113e-05, + "loss": 0.1274, + "step": 37850 + }, + { + "epoch": 1.77, + "learning_rate": 1.4225698744376343e-05, + "loss": 0.0896, + "step": 37855 + }, + { + "epoch": 1.77, + "learning_rate": 1.4224914959321557e-05, + "loss": 0.0717, + "step": 37860 + }, + { + "epoch": 1.77, + "learning_rate": 1.4224131174266771e-05, + "loss": 0.1184, + "step": 37865 + }, + { + "epoch": 1.77, + "learning_rate": 1.4223347389211983e-05, + "loss": 0.1207, + "step": 37870 + }, + { + "epoch": 1.77, + "learning_rate": 1.4222563604157197e-05, + "loss": 0.2518, + "step": 37875 + }, + { + "epoch": 1.77, + "learning_rate": 1.4221779819102411e-05, + "loss": 0.3498, + "step": 37880 + }, + { + "epoch": 1.77, + "learning_rate": 1.4220996034047625e-05, + "loss": 0.3115, + "step": 37885 + }, + { + "epoch": 1.77, + "learning_rate": 1.4220212248992837e-05, + "loss": 0.0583, + "step": 37890 + }, + { + "epoch": 1.77, + "learning_rate": 1.421942846393805e-05, + "loss": 0.0557, + "step": 37895 + }, + { + "epoch": 1.77, + "learning_rate": 1.4218644678883265e-05, + "loss": 0.0753, + "step": 37900 + }, + { + "epoch": 1.77, + "learning_rate": 1.4217860893828477e-05, + "loss": 0.0799, + "step": 37905 + }, + { + "epoch": 1.77, + "learning_rate": 1.4217077108773691e-05, + "loss": 0.0979, + "step": 37910 + }, + { + "epoch": 1.77, + "learning_rate": 1.4216293323718903e-05, + "loss": 0.0891, + "step": 37915 + }, + { + "epoch": 1.77, + "learning_rate": 1.4215509538664119e-05, + "loss": 0.1499, + "step": 37920 + }, + { + "epoch": 1.77, + "learning_rate": 1.4214725753609331e-05, + "loss": 0.1411, + "step": 37925 + }, + { + "epoch": 1.77, + "learning_rate": 1.4213941968554545e-05, + "loss": 0.2998, + "step": 37930 + }, + { + "epoch": 1.77, + "learning_rate": 1.4213158183499757e-05, + "loss": 0.2268, + "step": 37935 + }, + { + "epoch": 1.77, + "learning_rate": 1.4212374398444973e-05, + "loss": 0.0357, + "step": 37940 + }, + { + "epoch": 1.77, + "learning_rate": 1.4211590613390185e-05, + "loss": 0.0426, + "step": 37945 + }, + { + "epoch": 1.77, + "learning_rate": 1.4210806828335399e-05, + "loss": 0.0794, + "step": 37950 + }, + { + "epoch": 1.77, + "learning_rate": 1.4210023043280611e-05, + "loss": 0.1651, + "step": 37955 + }, + { + "epoch": 1.77, + "learning_rate": 1.4209239258225825e-05, + "loss": 0.1167, + "step": 37960 + }, + { + "epoch": 1.77, + "learning_rate": 1.4208455473171039e-05, + "loss": 0.1324, + "step": 37965 + }, + { + "epoch": 1.77, + "learning_rate": 1.4207671688116251e-05, + "loss": 0.1587, + "step": 37970 + }, + { + "epoch": 1.77, + "learning_rate": 1.4206887903061465e-05, + "loss": 0.2, + "step": 37975 + }, + { + "epoch": 1.77, + "learning_rate": 1.4206104118006679e-05, + "loss": 0.3415, + "step": 37980 + }, + { + "epoch": 1.77, + "learning_rate": 1.4205320332951893e-05, + "loss": 0.24, + "step": 37985 + }, + { + "epoch": 1.77, + "learning_rate": 1.4204536547897105e-05, + "loss": 0.0173, + "step": 37990 + }, + { + "epoch": 1.77, + "learning_rate": 1.420375276284232e-05, + "loss": 0.0568, + "step": 37995 + }, + { + "epoch": 1.77, + "learning_rate": 1.4202968977787533e-05, + "loss": 0.0666, + "step": 38000 + }, + { + "epoch": 1.77, + "learning_rate": 1.4202185192732747e-05, + "loss": 0.1213, + "step": 38005 + }, + { + "epoch": 1.77, + "learning_rate": 1.4201401407677959e-05, + "loss": 0.1022, + "step": 38010 + }, + { + "epoch": 1.77, + "learning_rate": 1.4200617622623175e-05, + "loss": 0.1468, + "step": 38015 + }, + { + "epoch": 1.77, + "learning_rate": 1.4199833837568387e-05, + "loss": 0.2344, + "step": 38020 + }, + { + "epoch": 1.77, + "learning_rate": 1.4199050052513599e-05, + "loss": 0.2398, + "step": 38025 + }, + { + "epoch": 1.77, + "learning_rate": 1.4198266267458813e-05, + "loss": 0.3248, + "step": 38030 + }, + { + "epoch": 1.77, + "learning_rate": 1.4197482482404025e-05, + "loss": 0.2467, + "step": 38035 + }, + { + "epoch": 1.77, + "learning_rate": 1.419669869734924e-05, + "loss": 0.0681, + "step": 38040 + }, + { + "epoch": 1.78, + "learning_rate": 1.4195914912294453e-05, + "loss": 0.0525, + "step": 38045 + }, + { + "epoch": 1.78, + "learning_rate": 1.4195131127239667e-05, + "loss": 0.0501, + "step": 38050 + }, + { + "epoch": 1.78, + "learning_rate": 1.4194347342184879e-05, + "loss": 0.0514, + "step": 38055 + }, + { + "epoch": 1.78, + "learning_rate": 1.4193563557130095e-05, + "loss": 0.1218, + "step": 38060 + }, + { + "epoch": 1.78, + "learning_rate": 1.4192779772075307e-05, + "loss": 0.1249, + "step": 38065 + }, + { + "epoch": 1.78, + "learning_rate": 1.419199598702052e-05, + "loss": 0.1505, + "step": 38070 + }, + { + "epoch": 1.78, + "learning_rate": 1.4191212201965735e-05, + "loss": 0.2301, + "step": 38075 + }, + { + "epoch": 1.78, + "learning_rate": 1.4190428416910949e-05, + "loss": 0.3129, + "step": 38080 + }, + { + "epoch": 1.78, + "learning_rate": 1.4189644631856161e-05, + "loss": 0.2717, + "step": 38085 + }, + { + "epoch": 1.78, + "learning_rate": 1.4188860846801373e-05, + "loss": 0.0331, + "step": 38090 + }, + { + "epoch": 1.78, + "learning_rate": 1.4188077061746589e-05, + "loss": 0.0624, + "step": 38095 + }, + { + "epoch": 1.78, + "learning_rate": 1.4187293276691801e-05, + "loss": 0.0583, + "step": 38100 + }, + { + "epoch": 1.78, + "learning_rate": 1.4186509491637015e-05, + "loss": 0.0417, + "step": 38105 + }, + { + "epoch": 1.78, + "learning_rate": 1.4185725706582227e-05, + "loss": 0.1927, + "step": 38110 + }, + { + "epoch": 1.78, + "learning_rate": 1.4184941921527443e-05, + "loss": 0.0895, + "step": 38115 + }, + { + "epoch": 1.78, + "learning_rate": 1.4184158136472655e-05, + "loss": 0.1494, + "step": 38120 + }, + { + "epoch": 1.78, + "learning_rate": 1.4183374351417869e-05, + "loss": 0.2075, + "step": 38125 + }, + { + "epoch": 1.78, + "learning_rate": 1.4182590566363081e-05, + "loss": 0.2841, + "step": 38130 + }, + { + "epoch": 1.78, + "learning_rate": 1.4181806781308297e-05, + "loss": 0.2523, + "step": 38135 + }, + { + "epoch": 1.78, + "learning_rate": 1.4181022996253509e-05, + "loss": 0.051, + "step": 38140 + }, + { + "epoch": 1.78, + "learning_rate": 1.4180239211198723e-05, + "loss": 0.0424, + "step": 38145 + }, + { + "epoch": 1.78, + "learning_rate": 1.4179455426143935e-05, + "loss": 0.0772, + "step": 38150 + }, + { + "epoch": 1.78, + "learning_rate": 1.4178671641089147e-05, + "loss": 0.0753, + "step": 38155 + }, + { + "epoch": 1.78, + "learning_rate": 1.4177887856034363e-05, + "loss": 0.1035, + "step": 38160 + }, + { + "epoch": 1.78, + "learning_rate": 1.4177104070979575e-05, + "loss": 0.226, + "step": 38165 + }, + { + "epoch": 1.78, + "learning_rate": 1.4176320285924789e-05, + "loss": 0.2246, + "step": 38170 + }, + { + "epoch": 1.78, + "learning_rate": 1.4175536500870003e-05, + "loss": 0.1679, + "step": 38175 + }, + { + "epoch": 1.78, + "learning_rate": 1.4174752715815217e-05, + "loss": 0.3673, + "step": 38180 + }, + { + "epoch": 1.78, + "learning_rate": 1.4173968930760429e-05, + "loss": 0.2447, + "step": 38185 + }, + { + "epoch": 1.78, + "learning_rate": 1.4173185145705643e-05, + "loss": 0.1178, + "step": 38190 + }, + { + "epoch": 1.78, + "learning_rate": 1.4172401360650857e-05, + "loss": 0.0845, + "step": 38195 + }, + { + "epoch": 1.78, + "learning_rate": 1.417161757559607e-05, + "loss": 0.0427, + "step": 38200 + }, + { + "epoch": 1.78, + "learning_rate": 1.4170833790541283e-05, + "loss": 0.1022, + "step": 38205 + }, + { + "epoch": 1.78, + "learning_rate": 1.4170050005486498e-05, + "loss": 0.0848, + "step": 38210 + }, + { + "epoch": 1.78, + "learning_rate": 1.416926622043171e-05, + "loss": 0.1151, + "step": 38215 + }, + { + "epoch": 1.78, + "learning_rate": 1.4168482435376923e-05, + "loss": 0.1462, + "step": 38220 + }, + { + "epoch": 1.78, + "learning_rate": 1.4167698650322137e-05, + "loss": 0.2352, + "step": 38225 + }, + { + "epoch": 1.78, + "learning_rate": 1.4166914865267349e-05, + "loss": 0.4185, + "step": 38230 + }, + { + "epoch": 1.78, + "learning_rate": 1.4166131080212564e-05, + "loss": 0.2938, + "step": 38235 + }, + { + "epoch": 1.78, + "learning_rate": 1.4165347295157777e-05, + "loss": 0.0908, + "step": 38240 + }, + { + "epoch": 1.78, + "learning_rate": 1.416456351010299e-05, + "loss": 0.0728, + "step": 38245 + }, + { + "epoch": 1.78, + "learning_rate": 1.4163779725048203e-05, + "loss": 0.0581, + "step": 38250 + }, + { + "epoch": 1.79, + "learning_rate": 1.4162995939993418e-05, + "loss": 0.128, + "step": 38255 + }, + { + "epoch": 1.79, + "learning_rate": 1.416221215493863e-05, + "loss": 0.1307, + "step": 38260 + }, + { + "epoch": 1.79, + "learning_rate": 1.4161428369883845e-05, + "loss": 0.1847, + "step": 38265 + }, + { + "epoch": 1.79, + "learning_rate": 1.4160644584829057e-05, + "loss": 0.1701, + "step": 38270 + }, + { + "epoch": 1.79, + "learning_rate": 1.4159860799774272e-05, + "loss": 0.2065, + "step": 38275 + }, + { + "epoch": 1.79, + "learning_rate": 1.4159077014719485e-05, + "loss": 0.4186, + "step": 38280 + }, + { + "epoch": 1.79, + "learning_rate": 1.4158293229664697e-05, + "loss": 0.2782, + "step": 38285 + }, + { + "epoch": 1.79, + "learning_rate": 1.415750944460991e-05, + "loss": 0.0348, + "step": 38290 + }, + { + "epoch": 1.79, + "learning_rate": 1.4156725659555125e-05, + "loss": 0.023, + "step": 38295 + }, + { + "epoch": 1.79, + "learning_rate": 1.4155941874500338e-05, + "loss": 0.0195, + "step": 38300 + }, + { + "epoch": 1.79, + "learning_rate": 1.415515808944555e-05, + "loss": 0.1097, + "step": 38305 + }, + { + "epoch": 1.79, + "learning_rate": 1.4154374304390766e-05, + "loss": 0.1873, + "step": 38310 + }, + { + "epoch": 1.79, + "learning_rate": 1.4153590519335978e-05, + "loss": 0.1415, + "step": 38315 + }, + { + "epoch": 1.79, + "learning_rate": 1.4152806734281192e-05, + "loss": 0.1651, + "step": 38320 + }, + { + "epoch": 1.79, + "learning_rate": 1.4152022949226405e-05, + "loss": 0.2963, + "step": 38325 + }, + { + "epoch": 1.79, + "learning_rate": 1.415123916417162e-05, + "loss": 0.2719, + "step": 38330 + }, + { + "epoch": 1.79, + "learning_rate": 1.4150455379116832e-05, + "loss": 0.2542, + "step": 38335 + }, + { + "epoch": 1.79, + "learning_rate": 1.4149671594062046e-05, + "loss": 0.0304, + "step": 38340 + }, + { + "epoch": 1.79, + "learning_rate": 1.4148887809007259e-05, + "loss": 0.1187, + "step": 38345 + }, + { + "epoch": 1.79, + "learning_rate": 1.414810402395247e-05, + "loss": 0.1246, + "step": 38350 + }, + { + "epoch": 1.79, + "learning_rate": 1.4147320238897686e-05, + "loss": 0.0625, + "step": 38355 + }, + { + "epoch": 1.79, + "learning_rate": 1.4146536453842899e-05, + "loss": 0.182, + "step": 38360 + }, + { + "epoch": 1.79, + "learning_rate": 1.4145752668788112e-05, + "loss": 0.0816, + "step": 38365 + }, + { + "epoch": 1.79, + "learning_rate": 1.4144968883733325e-05, + "loss": 0.1735, + "step": 38370 + }, + { + "epoch": 1.79, + "learning_rate": 1.414418509867854e-05, + "loss": 0.1373, + "step": 38375 + }, + { + "epoch": 1.79, + "learning_rate": 1.4143401313623752e-05, + "loss": 0.4016, + "step": 38380 + }, + { + "epoch": 1.79, + "learning_rate": 1.4142617528568966e-05, + "loss": 0.2637, + "step": 38385 + }, + { + "epoch": 1.79, + "learning_rate": 1.414183374351418e-05, + "loss": 0.0964, + "step": 38390 + }, + { + "epoch": 1.79, + "learning_rate": 1.4141049958459394e-05, + "loss": 0.0635, + "step": 38395 + }, + { + "epoch": 1.79, + "learning_rate": 1.4140266173404606e-05, + "loss": 0.0482, + "step": 38400 + }, + { + "epoch": 1.79, + "learning_rate": 1.413948238834982e-05, + "loss": 0.0768, + "step": 38405 + }, + { + "epoch": 1.79, + "learning_rate": 1.4138698603295034e-05, + "loss": 0.0976, + "step": 38410 + }, + { + "epoch": 1.79, + "learning_rate": 1.4137914818240246e-05, + "loss": 0.2089, + "step": 38415 + }, + { + "epoch": 1.79, + "learning_rate": 1.413713103318546e-05, + "loss": 0.2171, + "step": 38420 + }, + { + "epoch": 1.79, + "learning_rate": 1.4136347248130673e-05, + "loss": 0.2064, + "step": 38425 + }, + { + "epoch": 1.79, + "learning_rate": 1.4135563463075888e-05, + "loss": 0.4382, + "step": 38430 + }, + { + "epoch": 1.79, + "learning_rate": 1.41347796780211e-05, + "loss": 0.2063, + "step": 38435 + }, + { + "epoch": 1.79, + "learning_rate": 1.4133995892966314e-05, + "loss": 0.0299, + "step": 38440 + }, + { + "epoch": 1.79, + "learning_rate": 1.4133212107911526e-05, + "loss": 0.08, + "step": 38445 + }, + { + "epoch": 1.79, + "learning_rate": 1.4132428322856742e-05, + "loss": 0.1102, + "step": 38450 + }, + { + "epoch": 1.79, + "learning_rate": 1.4131644537801954e-05, + "loss": 0.0615, + "step": 38455 + }, + { + "epoch": 1.79, + "learning_rate": 1.4130860752747168e-05, + "loss": 0.0704, + "step": 38460 + }, + { + "epoch": 1.79, + "learning_rate": 1.413007696769238e-05, + "loss": 0.0914, + "step": 38465 + }, + { + "epoch": 1.8, + "learning_rate": 1.4129293182637596e-05, + "loss": 0.1401, + "step": 38470 + }, + { + "epoch": 1.8, + "learning_rate": 1.4128509397582808e-05, + "loss": 0.2028, + "step": 38475 + }, + { + "epoch": 1.8, + "learning_rate": 1.412772561252802e-05, + "loss": 0.2902, + "step": 38480 + }, + { + "epoch": 1.8, + "learning_rate": 1.4126941827473234e-05, + "loss": 0.2409, + "step": 38485 + }, + { + "epoch": 1.8, + "learning_rate": 1.4126158042418448e-05, + "loss": 0.0215, + "step": 38490 + }, + { + "epoch": 1.8, + "learning_rate": 1.4125374257363662e-05, + "loss": 0.0641, + "step": 38495 + }, + { + "epoch": 1.8, + "learning_rate": 1.4124590472308874e-05, + "loss": 0.1115, + "step": 38500 + }, + { + "epoch": 1.8, + "learning_rate": 1.4123806687254088e-05, + "loss": 0.099, + "step": 38505 + }, + { + "epoch": 1.8, + "learning_rate": 1.4123022902199302e-05, + "loss": 0.0935, + "step": 38510 + }, + { + "epoch": 1.8, + "learning_rate": 1.4122239117144516e-05, + "loss": 0.1694, + "step": 38515 + }, + { + "epoch": 1.8, + "learning_rate": 1.4121455332089728e-05, + "loss": 0.234, + "step": 38520 + }, + { + "epoch": 1.8, + "learning_rate": 1.4120671547034944e-05, + "loss": 0.237, + "step": 38525 + }, + { + "epoch": 1.8, + "learning_rate": 1.4119887761980156e-05, + "loss": 0.3852, + "step": 38530 + }, + { + "epoch": 1.8, + "learning_rate": 1.411910397692537e-05, + "loss": 0.2284, + "step": 38535 + }, + { + "epoch": 1.8, + "learning_rate": 1.4118320191870582e-05, + "loss": 0.0491, + "step": 38540 + }, + { + "epoch": 1.8, + "learning_rate": 1.4117536406815794e-05, + "loss": 0.0762, + "step": 38545 + }, + { + "epoch": 1.8, + "learning_rate": 1.411675262176101e-05, + "loss": 0.1074, + "step": 38550 + }, + { + "epoch": 1.8, + "learning_rate": 1.4115968836706222e-05, + "loss": 0.1303, + "step": 38555 + }, + { + "epoch": 1.8, + "learning_rate": 1.4115185051651436e-05, + "loss": 0.1395, + "step": 38560 + }, + { + "epoch": 1.8, + "learning_rate": 1.4114401266596648e-05, + "loss": 0.156, + "step": 38565 + }, + { + "epoch": 1.8, + "learning_rate": 1.4113617481541864e-05, + "loss": 0.1675, + "step": 38570 + }, + { + "epoch": 1.8, + "learning_rate": 1.4112833696487076e-05, + "loss": 0.1753, + "step": 38575 + }, + { + "epoch": 1.8, + "learning_rate": 1.411204991143229e-05, + "loss": 0.3437, + "step": 38580 + }, + { + "epoch": 1.8, + "learning_rate": 1.4111266126377502e-05, + "loss": 0.3223, + "step": 38585 + }, + { + "epoch": 1.8, + "learning_rate": 1.4110482341322718e-05, + "loss": 0.0702, + "step": 38590 + }, + { + "epoch": 1.8, + "learning_rate": 1.410969855626793e-05, + "loss": 0.0645, + "step": 38595 + }, + { + "epoch": 1.8, + "learning_rate": 1.4108914771213144e-05, + "loss": 0.1382, + "step": 38600 + }, + { + "epoch": 1.8, + "learning_rate": 1.4108130986158356e-05, + "loss": 0.1124, + "step": 38605 + }, + { + "epoch": 1.8, + "learning_rate": 1.410734720110357e-05, + "loss": 0.0886, + "step": 38610 + }, + { + "epoch": 1.8, + "learning_rate": 1.4106563416048784e-05, + "loss": 0.1948, + "step": 38615 + }, + { + "epoch": 1.8, + "learning_rate": 1.4105779630993996e-05, + "loss": 0.2204, + "step": 38620 + }, + { + "epoch": 1.8, + "learning_rate": 1.4104995845939212e-05, + "loss": 0.1517, + "step": 38625 + }, + { + "epoch": 1.8, + "learning_rate": 1.4104212060884424e-05, + "loss": 0.3109, + "step": 38630 + }, + { + "epoch": 1.8, + "learning_rate": 1.4103428275829638e-05, + "loss": 0.1802, + "step": 38635 + }, + { + "epoch": 1.8, + "learning_rate": 1.410264449077485e-05, + "loss": 0.028, + "step": 38640 + }, + { + "epoch": 1.8, + "learning_rate": 1.4101860705720066e-05, + "loss": 0.1063, + "step": 38645 + }, + { + "epoch": 1.8, + "learning_rate": 1.4101076920665278e-05, + "loss": 0.0521, + "step": 38650 + }, + { + "epoch": 1.8, + "learning_rate": 1.4100293135610492e-05, + "loss": 0.091, + "step": 38655 + }, + { + "epoch": 1.8, + "learning_rate": 1.4099509350555704e-05, + "loss": 0.0967, + "step": 38660 + }, + { + "epoch": 1.8, + "learning_rate": 1.409872556550092e-05, + "loss": 0.117, + "step": 38665 + }, + { + "epoch": 1.8, + "learning_rate": 1.4097941780446132e-05, + "loss": 0.1346, + "step": 38670 + }, + { + "epoch": 1.8, + "learning_rate": 1.4097157995391344e-05, + "loss": 0.1494, + "step": 38675 + }, + { + "epoch": 1.8, + "learning_rate": 1.4096374210336558e-05, + "loss": 0.2843, + "step": 38680 + }, + { + "epoch": 1.81, + "learning_rate": 1.409559042528177e-05, + "loss": 0.2728, + "step": 38685 + }, + { + "epoch": 1.81, + "learning_rate": 1.4094806640226986e-05, + "loss": 0.0535, + "step": 38690 + }, + { + "epoch": 1.81, + "learning_rate": 1.4094022855172198e-05, + "loss": 0.0669, + "step": 38695 + }, + { + "epoch": 1.81, + "learning_rate": 1.4093239070117412e-05, + "loss": 0.1214, + "step": 38700 + }, + { + "epoch": 1.81, + "learning_rate": 1.4092455285062626e-05, + "loss": 0.0523, + "step": 38705 + }, + { + "epoch": 1.81, + "learning_rate": 1.409167150000784e-05, + "loss": 0.1108, + "step": 38710 + }, + { + "epoch": 1.81, + "learning_rate": 1.4090887714953052e-05, + "loss": 0.1358, + "step": 38715 + }, + { + "epoch": 1.81, + "learning_rate": 1.4090103929898266e-05, + "loss": 0.1182, + "step": 38720 + }, + { + "epoch": 1.81, + "learning_rate": 1.408932014484348e-05, + "loss": 0.1505, + "step": 38725 + }, + { + "epoch": 1.81, + "learning_rate": 1.4088536359788694e-05, + "loss": 0.3261, + "step": 38730 + }, + { + "epoch": 1.81, + "learning_rate": 1.4087752574733906e-05, + "loss": 0.3337, + "step": 38735 + }, + { + "epoch": 1.81, + "learning_rate": 1.4086968789679118e-05, + "loss": 0.0553, + "step": 38740 + }, + { + "epoch": 1.81, + "learning_rate": 1.4086185004624334e-05, + "loss": 0.0685, + "step": 38745 + }, + { + "epoch": 1.81, + "learning_rate": 1.4085401219569546e-05, + "loss": 0.0481, + "step": 38750 + }, + { + "epoch": 1.81, + "learning_rate": 1.408461743451476e-05, + "loss": 0.095, + "step": 38755 + }, + { + "epoch": 1.81, + "learning_rate": 1.4083833649459972e-05, + "loss": 0.1435, + "step": 38760 + }, + { + "epoch": 1.81, + "learning_rate": 1.4083049864405188e-05, + "loss": 0.0965, + "step": 38765 + }, + { + "epoch": 1.81, + "learning_rate": 1.40822660793504e-05, + "loss": 0.1779, + "step": 38770 + }, + { + "epoch": 1.81, + "learning_rate": 1.4081482294295614e-05, + "loss": 0.2204, + "step": 38775 + }, + { + "epoch": 1.81, + "learning_rate": 1.4080698509240826e-05, + "loss": 0.3111, + "step": 38780 + }, + { + "epoch": 1.81, + "learning_rate": 1.4079914724186042e-05, + "loss": 0.2863, + "step": 38785 + }, + { + "epoch": 1.81, + "learning_rate": 1.4079130939131254e-05, + "loss": 0.0378, + "step": 38790 + }, + { + "epoch": 1.81, + "learning_rate": 1.4078347154076468e-05, + "loss": 0.1025, + "step": 38795 + }, + { + "epoch": 1.81, + "learning_rate": 1.407756336902168e-05, + "loss": 0.0496, + "step": 38800 + }, + { + "epoch": 1.81, + "learning_rate": 1.4076779583966894e-05, + "loss": 0.0819, + "step": 38805 + }, + { + "epoch": 1.81, + "learning_rate": 1.4075995798912108e-05, + "loss": 0.071, + "step": 38810 + }, + { + "epoch": 1.81, + "learning_rate": 1.407521201385732e-05, + "loss": 0.1571, + "step": 38815 + }, + { + "epoch": 1.81, + "learning_rate": 1.4074428228802534e-05, + "loss": 0.262, + "step": 38820 + }, + { + "epoch": 1.81, + "learning_rate": 1.4073644443747748e-05, + "loss": 0.2983, + "step": 38825 + }, + { + "epoch": 1.81, + "learning_rate": 1.4072860658692962e-05, + "loss": 0.228, + "step": 38830 + }, + { + "epoch": 1.81, + "learning_rate": 1.4072076873638174e-05, + "loss": 0.1873, + "step": 38835 + }, + { + "epoch": 1.81, + "learning_rate": 1.407129308858339e-05, + "loss": 0.018, + "step": 38840 + }, + { + "epoch": 1.81, + "learning_rate": 1.4070509303528602e-05, + "loss": 0.0565, + "step": 38845 + }, + { + "epoch": 1.81, + "learning_rate": 1.4069725518473816e-05, + "loss": 0.1004, + "step": 38850 + }, + { + "epoch": 1.81, + "learning_rate": 1.4068941733419028e-05, + "loss": 0.0656, + "step": 38855 + }, + { + "epoch": 1.81, + "learning_rate": 1.4068157948364243e-05, + "loss": 0.2027, + "step": 38860 + }, + { + "epoch": 1.81, + "learning_rate": 1.4067374163309456e-05, + "loss": 0.2106, + "step": 38865 + }, + { + "epoch": 1.81, + "learning_rate": 1.4066590378254668e-05, + "loss": 0.1221, + "step": 38870 + }, + { + "epoch": 1.81, + "learning_rate": 1.4065806593199882e-05, + "loss": 0.2289, + "step": 38875 + }, + { + "epoch": 1.81, + "learning_rate": 1.4065022808145094e-05, + "loss": 0.311, + "step": 38880 + }, + { + "epoch": 1.81, + "learning_rate": 1.406423902309031e-05, + "loss": 0.3, + "step": 38885 + }, + { + "epoch": 1.81, + "learning_rate": 1.4063455238035522e-05, + "loss": 0.0561, + "step": 38890 + }, + { + "epoch": 1.81, + "learning_rate": 1.4062671452980736e-05, + "loss": 0.0676, + "step": 38895 + }, + { + "epoch": 1.82, + "learning_rate": 1.4061887667925948e-05, + "loss": 0.1541, + "step": 38900 + }, + { + "epoch": 1.82, + "learning_rate": 1.4061103882871163e-05, + "loss": 0.0841, + "step": 38905 + }, + { + "epoch": 1.82, + "learning_rate": 1.4060320097816376e-05, + "loss": 0.1486, + "step": 38910 + }, + { + "epoch": 1.82, + "learning_rate": 1.405953631276159e-05, + "loss": 0.1742, + "step": 38915 + }, + { + "epoch": 1.82, + "learning_rate": 1.4058752527706803e-05, + "loss": 0.153, + "step": 38920 + }, + { + "epoch": 1.82, + "learning_rate": 1.4057968742652017e-05, + "loss": 0.201, + "step": 38925 + }, + { + "epoch": 1.82, + "learning_rate": 1.405718495759723e-05, + "loss": 0.3103, + "step": 38930 + }, + { + "epoch": 1.82, + "learning_rate": 1.4056401172542442e-05, + "loss": 0.1888, + "step": 38935 + }, + { + "epoch": 1.82, + "learning_rate": 1.4055617387487657e-05, + "loss": 0.035, + "step": 38940 + }, + { + "epoch": 1.82, + "learning_rate": 1.405483360243287e-05, + "loss": 0.02, + "step": 38945 + }, + { + "epoch": 1.82, + "learning_rate": 1.4054049817378084e-05, + "loss": 0.0971, + "step": 38950 + }, + { + "epoch": 1.82, + "learning_rate": 1.4053266032323296e-05, + "loss": 0.1084, + "step": 38955 + }, + { + "epoch": 1.82, + "learning_rate": 1.4052482247268511e-05, + "loss": 0.0948, + "step": 38960 + }, + { + "epoch": 1.82, + "learning_rate": 1.4051698462213724e-05, + "loss": 0.1653, + "step": 38965 + }, + { + "epoch": 1.82, + "learning_rate": 1.4050914677158937e-05, + "loss": 0.1645, + "step": 38970 + }, + { + "epoch": 1.82, + "learning_rate": 1.405013089210415e-05, + "loss": 0.1154, + "step": 38975 + }, + { + "epoch": 1.82, + "learning_rate": 1.4049347107049365e-05, + "loss": 0.4433, + "step": 38980 + }, + { + "epoch": 1.82, + "learning_rate": 1.4048563321994577e-05, + "loss": 0.2112, + "step": 38985 + }, + { + "epoch": 1.82, + "learning_rate": 1.4047779536939791e-05, + "loss": 0.0622, + "step": 38990 + }, + { + "epoch": 1.82, + "learning_rate": 1.4046995751885004e-05, + "loss": 0.0382, + "step": 38995 + }, + { + "epoch": 1.82, + "learning_rate": 1.4046211966830216e-05, + "loss": 0.1102, + "step": 39000 + }, + { + "epoch": 1.82, + "learning_rate": 1.4045428181775431e-05, + "loss": 0.1516, + "step": 39005 + }, + { + "epoch": 1.82, + "learning_rate": 1.4044644396720644e-05, + "loss": 0.1424, + "step": 39010 + }, + { + "epoch": 1.82, + "learning_rate": 1.4043860611665858e-05, + "loss": 0.1871, + "step": 39015 + }, + { + "epoch": 1.82, + "learning_rate": 1.4043076826611071e-05, + "loss": 0.2085, + "step": 39020 + }, + { + "epoch": 1.82, + "learning_rate": 1.4042293041556285e-05, + "loss": 0.2028, + "step": 39025 + }, + { + "epoch": 1.82, + "learning_rate": 1.4041509256501498e-05, + "loss": 0.3885, + "step": 39030 + }, + { + "epoch": 1.82, + "learning_rate": 1.4040725471446711e-05, + "loss": 0.3049, + "step": 39035 + }, + { + "epoch": 1.82, + "learning_rate": 1.4039941686391925e-05, + "loss": 0.0252, + "step": 39040 + }, + { + "epoch": 1.82, + "learning_rate": 1.403915790133714e-05, + "loss": 0.0376, + "step": 39045 + }, + { + "epoch": 1.82, + "learning_rate": 1.4038374116282351e-05, + "loss": 0.029, + "step": 39050 + }, + { + "epoch": 1.82, + "learning_rate": 1.4037590331227567e-05, + "loss": 0.1269, + "step": 39055 + }, + { + "epoch": 1.82, + "learning_rate": 1.403680654617278e-05, + "loss": 0.1043, + "step": 39060 + }, + { + "epoch": 1.82, + "learning_rate": 1.4036022761117992e-05, + "loss": 0.1261, + "step": 39065 + }, + { + "epoch": 1.82, + "learning_rate": 1.4035238976063205e-05, + "loss": 0.2339, + "step": 39070 + }, + { + "epoch": 1.82, + "learning_rate": 1.4034455191008418e-05, + "loss": 0.1286, + "step": 39075 + }, + { + "epoch": 1.82, + "learning_rate": 1.4033671405953633e-05, + "loss": 0.2755, + "step": 39080 + }, + { + "epoch": 1.82, + "learning_rate": 1.4032887620898845e-05, + "loss": 0.3312, + "step": 39085 + }, + { + "epoch": 1.82, + "learning_rate": 1.403210383584406e-05, + "loss": 0.0087, + "step": 39090 + }, + { + "epoch": 1.82, + "learning_rate": 1.4031320050789272e-05, + "loss": 0.0794, + "step": 39095 + }, + { + "epoch": 1.82, + "learning_rate": 1.4030536265734487e-05, + "loss": 0.0557, + "step": 39100 + }, + { + "epoch": 1.82, + "learning_rate": 1.40297524806797e-05, + "loss": 0.136, + "step": 39105 + }, + { + "epoch": 1.82, + "learning_rate": 1.4028968695624913e-05, + "loss": 0.1002, + "step": 39110 + }, + { + "epoch": 1.83, + "learning_rate": 1.4028184910570125e-05, + "loss": 0.1967, + "step": 39115 + }, + { + "epoch": 1.83, + "learning_rate": 1.4027401125515341e-05, + "loss": 0.1752, + "step": 39120 + }, + { + "epoch": 1.83, + "learning_rate": 1.4026617340460553e-05, + "loss": 0.1226, + "step": 39125 + }, + { + "epoch": 1.83, + "learning_rate": 1.4025833555405766e-05, + "loss": 0.2543, + "step": 39130 + }, + { + "epoch": 1.83, + "learning_rate": 1.402504977035098e-05, + "loss": 0.3523, + "step": 39135 + }, + { + "epoch": 1.83, + "learning_rate": 1.4024265985296193e-05, + "loss": 0.086, + "step": 39140 + }, + { + "epoch": 1.83, + "learning_rate": 1.4023482200241407e-05, + "loss": 0.1465, + "step": 39145 + }, + { + "epoch": 1.83, + "learning_rate": 1.402269841518662e-05, + "loss": 0.1037, + "step": 39150 + }, + { + "epoch": 1.83, + "learning_rate": 1.4021914630131835e-05, + "loss": 0.0747, + "step": 39155 + }, + { + "epoch": 1.83, + "learning_rate": 1.4021130845077047e-05, + "loss": 0.1403, + "step": 39160 + }, + { + "epoch": 1.83, + "learning_rate": 1.4020347060022261e-05, + "loss": 0.1251, + "step": 39165 + }, + { + "epoch": 1.83, + "learning_rate": 1.4019563274967473e-05, + "loss": 0.1805, + "step": 39170 + }, + { + "epoch": 1.83, + "learning_rate": 1.4018779489912689e-05, + "loss": 0.1899, + "step": 39175 + }, + { + "epoch": 1.83, + "learning_rate": 1.4017995704857901e-05, + "loss": 0.3973, + "step": 39180 + }, + { + "epoch": 1.83, + "learning_rate": 1.4017368676814072e-05, + "loss": 0.2354, + "step": 39185 + }, + { + "epoch": 1.83, + "learning_rate": 1.4016584891759286e-05, + "loss": 0.0266, + "step": 39190 + }, + { + "epoch": 1.83, + "learning_rate": 1.4015801106704498e-05, + "loss": 0.0359, + "step": 39195 + }, + { + "epoch": 1.83, + "learning_rate": 1.4015017321649712e-05, + "loss": 0.0703, + "step": 39200 + }, + { + "epoch": 1.83, + "learning_rate": 1.4014233536594926e-05, + "loss": 0.1166, + "step": 39205 + }, + { + "epoch": 1.83, + "learning_rate": 1.4013449751540138e-05, + "loss": 0.1409, + "step": 39210 + }, + { + "epoch": 1.83, + "learning_rate": 1.4012665966485353e-05, + "loss": 0.129, + "step": 39215 + }, + { + "epoch": 1.83, + "learning_rate": 1.4011882181430566e-05, + "loss": 0.1237, + "step": 39220 + }, + { + "epoch": 1.83, + "learning_rate": 1.401109839637578e-05, + "loss": 0.1638, + "step": 39225 + }, + { + "epoch": 1.83, + "learning_rate": 1.4010314611320992e-05, + "loss": 0.2936, + "step": 39230 + }, + { + "epoch": 1.83, + "learning_rate": 1.4009530826266207e-05, + "loss": 0.2029, + "step": 39235 + }, + { + "epoch": 1.83, + "learning_rate": 1.400874704121142e-05, + "loss": 0.0323, + "step": 39240 + }, + { + "epoch": 1.83, + "learning_rate": 1.4007963256156633e-05, + "loss": 0.1204, + "step": 39245 + }, + { + "epoch": 1.83, + "learning_rate": 1.4007179471101846e-05, + "loss": 0.0603, + "step": 39250 + }, + { + "epoch": 1.83, + "learning_rate": 1.4006395686047061e-05, + "loss": 0.1161, + "step": 39255 + }, + { + "epoch": 1.83, + "learning_rate": 1.4005611900992273e-05, + "loss": 0.1051, + "step": 39260 + }, + { + "epoch": 1.83, + "learning_rate": 1.4004828115937486e-05, + "loss": 0.1097, + "step": 39265 + }, + { + "epoch": 1.83, + "learning_rate": 1.40040443308827e-05, + "loss": 0.1569, + "step": 39270 + }, + { + "epoch": 1.83, + "learning_rate": 1.4003260545827912e-05, + "loss": 0.2331, + "step": 39275 + }, + { + "epoch": 1.83, + "learning_rate": 1.4002476760773127e-05, + "loss": 0.4034, + "step": 39280 + }, + { + "epoch": 1.83, + "learning_rate": 1.400169297571834e-05, + "loss": 0.2077, + "step": 39285 + }, + { + "epoch": 1.83, + "learning_rate": 1.4000909190663553e-05, + "loss": 0.0452, + "step": 39290 + }, + { + "epoch": 1.83, + "learning_rate": 1.4000125405608766e-05, + "loss": 0.0357, + "step": 39295 + }, + { + "epoch": 1.83, + "learning_rate": 1.3999341620553981e-05, + "loss": 0.0787, + "step": 39300 + }, + { + "epoch": 1.83, + "learning_rate": 1.3998557835499194e-05, + "loss": 0.1583, + "step": 39305 + }, + { + "epoch": 1.83, + "learning_rate": 1.3997774050444407e-05, + "loss": 0.1058, + "step": 39310 + }, + { + "epoch": 1.83, + "learning_rate": 1.3996990265389621e-05, + "loss": 0.0733, + "step": 39315 + }, + { + "epoch": 1.83, + "learning_rate": 1.3996206480334835e-05, + "loss": 0.1355, + "step": 39320 + }, + { + "epoch": 1.83, + "learning_rate": 1.3995422695280047e-05, + "loss": 0.1527, + "step": 39325 + }, + { + "epoch": 1.84, + "learning_rate": 1.399463891022526e-05, + "loss": 0.2326, + "step": 39330 + }, + { + "epoch": 1.84, + "learning_rate": 1.3993855125170475e-05, + "loss": 0.2507, + "step": 39335 + }, + { + "epoch": 1.84, + "learning_rate": 1.3993071340115687e-05, + "loss": 0.0268, + "step": 39340 + }, + { + "epoch": 1.84, + "learning_rate": 1.3992287555060901e-05, + "loss": 0.0515, + "step": 39345 + }, + { + "epoch": 1.84, + "learning_rate": 1.3991503770006114e-05, + "loss": 0.0863, + "step": 39350 + }, + { + "epoch": 1.84, + "learning_rate": 1.399071998495133e-05, + "loss": 0.1121, + "step": 39355 + }, + { + "epoch": 1.84, + "learning_rate": 1.3989936199896541e-05, + "loss": 0.1218, + "step": 39360 + }, + { + "epoch": 1.84, + "learning_rate": 1.3989152414841755e-05, + "loss": 0.0561, + "step": 39365 + }, + { + "epoch": 1.84, + "learning_rate": 1.3988368629786968e-05, + "loss": 0.1979, + "step": 39370 + }, + { + "epoch": 1.84, + "learning_rate": 1.3987584844732183e-05, + "loss": 0.3349, + "step": 39375 + }, + { + "epoch": 1.84, + "learning_rate": 1.3986801059677395e-05, + "loss": 0.2712, + "step": 39380 + }, + { + "epoch": 1.84, + "learning_rate": 1.398601727462261e-05, + "loss": 0.21, + "step": 39385 + }, + { + "epoch": 1.84, + "learning_rate": 1.3985233489567821e-05, + "loss": 0.0712, + "step": 39390 + }, + { + "epoch": 1.84, + "learning_rate": 1.3984449704513035e-05, + "loss": 0.1168, + "step": 39395 + }, + { + "epoch": 1.84, + "learning_rate": 1.398366591945825e-05, + "loss": 0.0892, + "step": 39400 + }, + { + "epoch": 1.84, + "learning_rate": 1.3982882134403461e-05, + "loss": 0.1336, + "step": 39405 + }, + { + "epoch": 1.84, + "learning_rate": 1.3982098349348675e-05, + "loss": 0.172, + "step": 39410 + }, + { + "epoch": 1.84, + "learning_rate": 1.398131456429389e-05, + "loss": 0.1663, + "step": 39415 + }, + { + "epoch": 1.84, + "learning_rate": 1.3980530779239103e-05, + "loss": 0.1677, + "step": 39420 + }, + { + "epoch": 1.84, + "learning_rate": 1.3979746994184315e-05, + "loss": 0.1407, + "step": 39425 + }, + { + "epoch": 1.84, + "learning_rate": 1.3978963209129531e-05, + "loss": 0.4581, + "step": 39430 + }, + { + "epoch": 1.84, + "learning_rate": 1.3978179424074743e-05, + "loss": 0.4789, + "step": 39435 + }, + { + "epoch": 1.84, + "learning_rate": 1.3977395639019957e-05, + "loss": 0.0543, + "step": 39440 + }, + { + "epoch": 1.84, + "learning_rate": 1.397661185396517e-05, + "loss": 0.0469, + "step": 39445 + }, + { + "epoch": 1.84, + "learning_rate": 1.3975828068910385e-05, + "loss": 0.0991, + "step": 39450 + }, + { + "epoch": 1.84, + "learning_rate": 1.3975044283855597e-05, + "loss": 0.0445, + "step": 39455 + }, + { + "epoch": 1.84, + "learning_rate": 1.397426049880081e-05, + "loss": 0.1107, + "step": 39460 + }, + { + "epoch": 1.84, + "learning_rate": 1.3973476713746023e-05, + "loss": 0.0865, + "step": 39465 + }, + { + "epoch": 1.84, + "learning_rate": 1.3972692928691235e-05, + "loss": 0.1738, + "step": 39470 + }, + { + "epoch": 1.84, + "learning_rate": 1.3971909143636451e-05, + "loss": 0.3184, + "step": 39475 + }, + { + "epoch": 1.84, + "learning_rate": 1.3971125358581663e-05, + "loss": 0.2718, + "step": 39480 + }, + { + "epoch": 1.84, + "learning_rate": 1.3970341573526877e-05, + "loss": 0.3997, + "step": 39485 + }, + { + "epoch": 1.84, + "learning_rate": 1.396955778847209e-05, + "loss": 0.0962, + "step": 39490 + }, + { + "epoch": 1.84, + "learning_rate": 1.3968774003417305e-05, + "loss": 0.0429, + "step": 39495 + }, + { + "epoch": 1.84, + "learning_rate": 1.3967990218362517e-05, + "loss": 0.119, + "step": 39500 + }, + { + "epoch": 1.84, + "learning_rate": 1.3967206433307731e-05, + "loss": 0.065, + "step": 39505 + }, + { + "epoch": 1.84, + "learning_rate": 1.3966422648252943e-05, + "loss": 0.2045, + "step": 39510 + }, + { + "epoch": 1.84, + "learning_rate": 1.3965638863198159e-05, + "loss": 0.1923, + "step": 39515 + }, + { + "epoch": 1.84, + "learning_rate": 1.3964855078143371e-05, + "loss": 0.1101, + "step": 39520 + }, + { + "epoch": 1.84, + "learning_rate": 1.3964071293088583e-05, + "loss": 0.201, + "step": 39525 + }, + { + "epoch": 1.84, + "learning_rate": 1.3963287508033799e-05, + "loss": 0.2879, + "step": 39530 + }, + { + "epoch": 1.84, + "learning_rate": 1.3962503722979011e-05, + "loss": 0.1518, + "step": 39535 + }, + { + "epoch": 1.84, + "learning_rate": 1.3961719937924225e-05, + "loss": 0.0882, + "step": 39540 + }, + { + "epoch": 1.85, + "learning_rate": 1.3960936152869437e-05, + "loss": 0.0576, + "step": 39545 + }, + { + "epoch": 1.85, + "learning_rate": 1.3960152367814653e-05, + "loss": 0.0711, + "step": 39550 + }, + { + "epoch": 1.85, + "learning_rate": 1.3959368582759865e-05, + "loss": 0.1663, + "step": 39555 + }, + { + "epoch": 1.85, + "learning_rate": 1.3958584797705079e-05, + "loss": 0.1107, + "step": 39560 + }, + { + "epoch": 1.85, + "learning_rate": 1.3957801012650291e-05, + "loss": 0.1376, + "step": 39565 + }, + { + "epoch": 1.85, + "learning_rate": 1.3957017227595507e-05, + "loss": 0.2258, + "step": 39570 + }, + { + "epoch": 1.85, + "learning_rate": 1.3956233442540719e-05, + "loss": 0.1872, + "step": 39575 + }, + { + "epoch": 1.85, + "learning_rate": 1.3955449657485933e-05, + "loss": 0.3751, + "step": 39580 + }, + { + "epoch": 1.85, + "learning_rate": 1.3954665872431145e-05, + "loss": 0.3203, + "step": 39585 + }, + { + "epoch": 1.85, + "learning_rate": 1.3953882087376357e-05, + "loss": 0.0157, + "step": 39590 + }, + { + "epoch": 1.85, + "learning_rate": 1.3953098302321573e-05, + "loss": 0.0469, + "step": 39595 + }, + { + "epoch": 1.85, + "learning_rate": 1.3952314517266785e-05, + "loss": 0.0631, + "step": 39600 + }, + { + "epoch": 1.85, + "learning_rate": 1.3951530732211999e-05, + "loss": 0.0862, + "step": 39605 + }, + { + "epoch": 1.85, + "learning_rate": 1.3950746947157211e-05, + "loss": 0.1194, + "step": 39610 + }, + { + "epoch": 1.85, + "learning_rate": 1.3949963162102427e-05, + "loss": 0.1458, + "step": 39615 + }, + { + "epoch": 1.85, + "learning_rate": 1.3949179377047639e-05, + "loss": 0.2322, + "step": 39620 + }, + { + "epoch": 1.85, + "learning_rate": 1.3948395591992853e-05, + "loss": 0.363, + "step": 39625 + }, + { + "epoch": 1.85, + "learning_rate": 1.3947611806938067e-05, + "loss": 0.2981, + "step": 39630 + }, + { + "epoch": 1.85, + "learning_rate": 1.394682802188328e-05, + "loss": 0.2648, + "step": 39635 + }, + { + "epoch": 1.85, + "learning_rate": 1.3946044236828493e-05, + "loss": 0.0346, + "step": 39640 + }, + { + "epoch": 1.85, + "learning_rate": 1.3945260451773709e-05, + "loss": 0.0576, + "step": 39645 + }, + { + "epoch": 1.85, + "learning_rate": 1.394447666671892e-05, + "loss": 0.1158, + "step": 39650 + }, + { + "epoch": 1.85, + "learning_rate": 1.3943692881664133e-05, + "loss": 0.0324, + "step": 39655 + }, + { + "epoch": 1.85, + "learning_rate": 1.3942909096609347e-05, + "loss": 0.1207, + "step": 39660 + }, + { + "epoch": 1.85, + "learning_rate": 1.3942125311554559e-05, + "loss": 0.1835, + "step": 39665 + }, + { + "epoch": 1.85, + "learning_rate": 1.3941341526499775e-05, + "loss": 0.1337, + "step": 39670 + }, + { + "epoch": 1.85, + "learning_rate": 1.3940557741444987e-05, + "loss": 0.1672, + "step": 39675 + }, + { + "epoch": 1.85, + "learning_rate": 1.3939773956390201e-05, + "loss": 0.2736, + "step": 39680 + }, + { + "epoch": 1.85, + "learning_rate": 1.3938990171335413e-05, + "loss": 0.2847, + "step": 39685 + }, + { + "epoch": 1.85, + "learning_rate": 1.3938206386280629e-05, + "loss": 0.044, + "step": 39690 + }, + { + "epoch": 1.85, + "learning_rate": 1.3937422601225841e-05, + "loss": 0.0497, + "step": 39695 + }, + { + "epoch": 1.85, + "learning_rate": 1.3936638816171055e-05, + "loss": 0.1422, + "step": 39700 + }, + { + "epoch": 1.85, + "learning_rate": 1.3935855031116267e-05, + "loss": 0.0669, + "step": 39705 + }, + { + "epoch": 1.85, + "learning_rate": 1.3935071246061483e-05, + "loss": 0.0804, + "step": 39710 + }, + { + "epoch": 1.85, + "learning_rate": 1.3934287461006695e-05, + "loss": 0.0735, + "step": 39715 + }, + { + "epoch": 1.85, + "learning_rate": 1.3933503675951907e-05, + "loss": 0.1105, + "step": 39720 + }, + { + "epoch": 1.85, + "learning_rate": 1.3932719890897121e-05, + "loss": 0.1518, + "step": 39725 + }, + { + "epoch": 1.85, + "learning_rate": 1.3931936105842335e-05, + "loss": 0.3293, + "step": 39730 + }, + { + "epoch": 1.85, + "learning_rate": 1.3931152320787549e-05, + "loss": 0.2068, + "step": 39735 + }, + { + "epoch": 1.85, + "learning_rate": 1.3930368535732761e-05, + "loss": 0.0597, + "step": 39740 + }, + { + "epoch": 1.85, + "learning_rate": 1.3929584750677977e-05, + "loss": 0.0848, + "step": 39745 + }, + { + "epoch": 1.85, + "learning_rate": 1.3928800965623189e-05, + "loss": 0.0794, + "step": 39750 + }, + { + "epoch": 1.86, + "learning_rate": 1.3928017180568403e-05, + "loss": 0.0735, + "step": 39755 + }, + { + "epoch": 1.86, + "learning_rate": 1.3927233395513615e-05, + "loss": 0.1662, + "step": 39760 + }, + { + "epoch": 1.86, + "learning_rate": 1.392644961045883e-05, + "loss": 0.1268, + "step": 39765 + }, + { + "epoch": 1.86, + "learning_rate": 1.3925665825404043e-05, + "loss": 0.1259, + "step": 39770 + }, + { + "epoch": 1.86, + "learning_rate": 1.3924882040349257e-05, + "loss": 0.2228, + "step": 39775 + }, + { + "epoch": 1.86, + "learning_rate": 1.3924098255294469e-05, + "loss": 0.2452, + "step": 39780 + }, + { + "epoch": 1.86, + "learning_rate": 1.3923314470239681e-05, + "loss": 0.3537, + "step": 39785 + }, + { + "epoch": 1.86, + "learning_rate": 1.3922530685184897e-05, + "loss": 0.0579, + "step": 39790 + }, + { + "epoch": 1.86, + "learning_rate": 1.3921746900130109e-05, + "loss": 0.0819, + "step": 39795 + }, + { + "epoch": 1.86, + "learning_rate": 1.3920963115075323e-05, + "loss": 0.117, + "step": 39800 + }, + { + "epoch": 1.86, + "learning_rate": 1.3920179330020535e-05, + "loss": 0.0466, + "step": 39805 + }, + { + "epoch": 1.86, + "learning_rate": 1.391939554496575e-05, + "loss": 0.1153, + "step": 39810 + }, + { + "epoch": 1.86, + "learning_rate": 1.3918611759910963e-05, + "loss": 0.1227, + "step": 39815 + }, + { + "epoch": 1.86, + "learning_rate": 1.3917827974856177e-05, + "loss": 0.1416, + "step": 39820 + }, + { + "epoch": 1.86, + "learning_rate": 1.3917044189801389e-05, + "loss": 0.1812, + "step": 39825 + }, + { + "epoch": 1.86, + "learning_rate": 1.3916260404746604e-05, + "loss": 0.3345, + "step": 39830 + }, + { + "epoch": 1.86, + "learning_rate": 1.3915476619691817e-05, + "loss": 0.359, + "step": 39835 + }, + { + "epoch": 1.86, + "learning_rate": 1.391469283463703e-05, + "loss": 0.019, + "step": 39840 + }, + { + "epoch": 1.86, + "learning_rate": 1.3913909049582245e-05, + "loss": 0.037, + "step": 39845 + }, + { + "epoch": 1.86, + "learning_rate": 1.3913125264527457e-05, + "loss": 0.0897, + "step": 39850 + }, + { + "epoch": 1.86, + "learning_rate": 1.391234147947267e-05, + "loss": 0.0943, + "step": 39855 + }, + { + "epoch": 1.86, + "learning_rate": 1.3911557694417883e-05, + "loss": 0.1399, + "step": 39860 + }, + { + "epoch": 1.86, + "learning_rate": 1.3910773909363098e-05, + "loss": 0.1403, + "step": 39865 + }, + { + "epoch": 1.86, + "learning_rate": 1.390999012430831e-05, + "loss": 0.2049, + "step": 39870 + }, + { + "epoch": 1.86, + "learning_rate": 1.3909206339253525e-05, + "loss": 0.2028, + "step": 39875 + }, + { + "epoch": 1.86, + "learning_rate": 1.3908422554198737e-05, + "loss": 0.281, + "step": 39880 + }, + { + "epoch": 1.86, + "learning_rate": 1.3907638769143952e-05, + "loss": 0.2856, + "step": 39885 + }, + { + "epoch": 1.86, + "learning_rate": 1.3906854984089165e-05, + "loss": 0.0383, + "step": 39890 + }, + { + "epoch": 1.86, + "learning_rate": 1.3906071199034378e-05, + "loss": 0.0449, + "step": 39895 + }, + { + "epoch": 1.86, + "learning_rate": 1.390528741397959e-05, + "loss": 0.0715, + "step": 39900 + }, + { + "epoch": 1.86, + "learning_rate": 1.3904503628924806e-05, + "loss": 0.0983, + "step": 39905 + }, + { + "epoch": 1.86, + "learning_rate": 1.3903719843870019e-05, + "loss": 0.1431, + "step": 39910 + }, + { + "epoch": 1.86, + "learning_rate": 1.390293605881523e-05, + "loss": 0.1682, + "step": 39915 + }, + { + "epoch": 1.86, + "learning_rate": 1.3902152273760445e-05, + "loss": 0.1312, + "step": 39920 + }, + { + "epoch": 1.86, + "learning_rate": 1.3901368488705659e-05, + "loss": 0.1199, + "step": 39925 + }, + { + "epoch": 1.86, + "learning_rate": 1.3900584703650872e-05, + "loss": 0.1829, + "step": 39930 + }, + { + "epoch": 1.86, + "learning_rate": 1.3899800918596085e-05, + "loss": 0.2109, + "step": 39935 + }, + { + "epoch": 1.86, + "learning_rate": 1.3899017133541299e-05, + "loss": 0.0469, + "step": 39940 + }, + { + "epoch": 1.86, + "learning_rate": 1.3898233348486512e-05, + "loss": 0.056, + "step": 39945 + }, + { + "epoch": 1.86, + "learning_rate": 1.3897449563431726e-05, + "loss": 0.0796, + "step": 39950 + }, + { + "epoch": 1.86, + "learning_rate": 1.3896665778376939e-05, + "loss": 0.0767, + "step": 39955 + }, + { + "epoch": 1.86, + "learning_rate": 1.3895881993322154e-05, + "loss": 0.0807, + "step": 39960 + }, + { + "epoch": 1.86, + "learning_rate": 1.3895098208267366e-05, + "loss": 0.1694, + "step": 39965 + }, + { + "epoch": 1.87, + "learning_rate": 1.389431442321258e-05, + "loss": 0.1686, + "step": 39970 + }, + { + "epoch": 1.87, + "learning_rate": 1.3893530638157793e-05, + "loss": 0.1735, + "step": 39975 + }, + { + "epoch": 1.87, + "learning_rate": 1.3892746853103005e-05, + "loss": 0.3052, + "step": 39980 + }, + { + "epoch": 1.87, + "learning_rate": 1.389196306804822e-05, + "loss": 0.315, + "step": 39985 + }, + { + "epoch": 1.87, + "learning_rate": 1.3891179282993433e-05, + "loss": 0.0689, + "step": 39990 + }, + { + "epoch": 1.87, + "learning_rate": 1.3890395497938646e-05, + "loss": 0.0481, + "step": 39995 + }, + { + "epoch": 1.87, + "learning_rate": 1.3889611712883859e-05, + "loss": 0.0813, + "step": 40000 + }, + { + "epoch": 1.87, + "learning_rate": 1.3888827927829074e-05, + "loss": 0.0883, + "step": 40005 + }, + { + "epoch": 1.87, + "learning_rate": 1.3888044142774286e-05, + "loss": 0.0955, + "step": 40010 + }, + { + "epoch": 1.87, + "learning_rate": 1.38872603577195e-05, + "loss": 0.1675, + "step": 40015 + }, + { + "epoch": 1.87, + "learning_rate": 1.3886476572664713e-05, + "loss": 0.1361, + "step": 40020 + }, + { + "epoch": 1.87, + "learning_rate": 1.3885692787609928e-05, + "loss": 0.1712, + "step": 40025 + }, + { + "epoch": 1.87, + "learning_rate": 1.388490900255514e-05, + "loss": 0.3516, + "step": 40030 + }, + { + "epoch": 1.87, + "learning_rate": 1.3884125217500354e-05, + "loss": 0.233, + "step": 40035 + }, + { + "epoch": 1.87, + "learning_rate": 1.3883341432445567e-05, + "loss": 0.0227, + "step": 40040 + }, + { + "epoch": 1.87, + "learning_rate": 1.388255764739078e-05, + "loss": 0.0551, + "step": 40045 + }, + { + "epoch": 1.87, + "learning_rate": 1.3881773862335994e-05, + "loss": 0.0955, + "step": 40050 + }, + { + "epoch": 1.87, + "learning_rate": 1.3880990077281207e-05, + "loss": 0.1336, + "step": 40055 + }, + { + "epoch": 1.87, + "learning_rate": 1.3880206292226422e-05, + "loss": 0.0766, + "step": 40060 + }, + { + "epoch": 1.87, + "learning_rate": 1.3879422507171634e-05, + "loss": 0.0847, + "step": 40065 + }, + { + "epoch": 1.87, + "learning_rate": 1.3878638722116848e-05, + "loss": 0.143, + "step": 40070 + }, + { + "epoch": 1.87, + "learning_rate": 1.387785493706206e-05, + "loss": 0.1564, + "step": 40075 + }, + { + "epoch": 1.87, + "learning_rate": 1.3877071152007276e-05, + "loss": 0.3278, + "step": 40080 + }, + { + "epoch": 1.87, + "learning_rate": 1.3876287366952488e-05, + "loss": 0.2336, + "step": 40085 + }, + { + "epoch": 1.87, + "learning_rate": 1.3875503581897702e-05, + "loss": 0.0916, + "step": 40090 + }, + { + "epoch": 1.87, + "learning_rate": 1.3874719796842914e-05, + "loss": 0.11, + "step": 40095 + }, + { + "epoch": 1.87, + "learning_rate": 1.387393601178813e-05, + "loss": 0.0472, + "step": 40100 + }, + { + "epoch": 1.87, + "learning_rate": 1.3873152226733342e-05, + "loss": 0.0719, + "step": 40105 + }, + { + "epoch": 1.87, + "learning_rate": 1.3872368441678554e-05, + "loss": 0.1545, + "step": 40110 + }, + { + "epoch": 1.87, + "learning_rate": 1.3871584656623768e-05, + "loss": 0.1102, + "step": 40115 + }, + { + "epoch": 1.87, + "learning_rate": 1.387080087156898e-05, + "loss": 0.2047, + "step": 40120 + }, + { + "epoch": 1.87, + "learning_rate": 1.3870017086514196e-05, + "loss": 0.241, + "step": 40125 + }, + { + "epoch": 1.87, + "learning_rate": 1.3869233301459408e-05, + "loss": 0.3618, + "step": 40130 + }, + { + "epoch": 1.87, + "learning_rate": 1.3868449516404622e-05, + "loss": 0.3678, + "step": 40135 + }, + { + "epoch": 1.87, + "learning_rate": 1.3867665731349834e-05, + "loss": 0.0394, + "step": 40140 + }, + { + "epoch": 1.87, + "learning_rate": 1.386688194629505e-05, + "loss": 0.0337, + "step": 40145 + }, + { + "epoch": 1.87, + "learning_rate": 1.3866098161240262e-05, + "loss": 0.0898, + "step": 40150 + }, + { + "epoch": 1.87, + "learning_rate": 1.3865314376185476e-05, + "loss": 0.074, + "step": 40155 + }, + { + "epoch": 1.87, + "learning_rate": 1.386453059113069e-05, + "loss": 0.1356, + "step": 40160 + }, + { + "epoch": 1.87, + "learning_rate": 1.3863746806075904e-05, + "loss": 0.1607, + "step": 40165 + }, + { + "epoch": 1.87, + "learning_rate": 1.3862963021021116e-05, + "loss": 0.1426, + "step": 40170 + }, + { + "epoch": 1.87, + "learning_rate": 1.3862179235966328e-05, + "loss": 0.2663, + "step": 40175 + }, + { + "epoch": 1.87, + "learning_rate": 1.3861395450911544e-05, + "loss": 0.3673, + "step": 40180 + }, + { + "epoch": 1.88, + "learning_rate": 1.3860611665856756e-05, + "loss": 0.3103, + "step": 40185 + }, + { + "epoch": 1.88, + "learning_rate": 1.385982788080197e-05, + "loss": 0.0458, + "step": 40190 + }, + { + "epoch": 1.88, + "learning_rate": 1.3859044095747182e-05, + "loss": 0.0491, + "step": 40195 + }, + { + "epoch": 1.88, + "learning_rate": 1.3858260310692398e-05, + "loss": 0.0837, + "step": 40200 + }, + { + "epoch": 1.88, + "learning_rate": 1.385747652563761e-05, + "loss": 0.1077, + "step": 40205 + }, + { + "epoch": 1.88, + "learning_rate": 1.3856692740582824e-05, + "loss": 0.1128, + "step": 40210 + }, + { + "epoch": 1.88, + "learning_rate": 1.3855908955528036e-05, + "loss": 0.2105, + "step": 40215 + }, + { + "epoch": 1.88, + "learning_rate": 1.3855125170473252e-05, + "loss": 0.169, + "step": 40220 + }, + { + "epoch": 1.88, + "learning_rate": 1.3854341385418464e-05, + "loss": 0.1373, + "step": 40225 + }, + { + "epoch": 1.88, + "learning_rate": 1.3853557600363678e-05, + "loss": 0.2803, + "step": 40230 + }, + { + "epoch": 1.88, + "learning_rate": 1.385277381530889e-05, + "loss": 0.2458, + "step": 40235 + }, + { + "epoch": 1.88, + "learning_rate": 1.3851990030254104e-05, + "loss": 0.067, + "step": 40240 + }, + { + "epoch": 1.88, + "learning_rate": 1.3851206245199318e-05, + "loss": 0.0656, + "step": 40245 + }, + { + "epoch": 1.88, + "learning_rate": 1.385042246014453e-05, + "loss": 0.0975, + "step": 40250 + }, + { + "epoch": 1.88, + "learning_rate": 1.3849638675089744e-05, + "loss": 0.0711, + "step": 40255 + }, + { + "epoch": 1.88, + "learning_rate": 1.3848854890034958e-05, + "loss": 0.1479, + "step": 40260 + }, + { + "epoch": 1.88, + "learning_rate": 1.3848071104980172e-05, + "loss": 0.0855, + "step": 40265 + }, + { + "epoch": 1.88, + "learning_rate": 1.3847287319925384e-05, + "loss": 0.2257, + "step": 40270 + }, + { + "epoch": 1.88, + "learning_rate": 1.38465035348706e-05, + "loss": 0.2097, + "step": 40275 + }, + { + "epoch": 1.88, + "learning_rate": 1.3845719749815812e-05, + "loss": 0.4084, + "step": 40280 + }, + { + "epoch": 1.88, + "learning_rate": 1.3844935964761026e-05, + "loss": 0.2848, + "step": 40285 + }, + { + "epoch": 1.88, + "learning_rate": 1.3844152179706238e-05, + "loss": 0.104, + "step": 40290 + }, + { + "epoch": 1.88, + "learning_rate": 1.3843368394651454e-05, + "loss": 0.0429, + "step": 40295 + }, + { + "epoch": 1.88, + "learning_rate": 1.3842584609596666e-05, + "loss": 0.086, + "step": 40300 + }, + { + "epoch": 1.88, + "learning_rate": 1.3841800824541878e-05, + "loss": 0.0811, + "step": 40305 + }, + { + "epoch": 1.88, + "learning_rate": 1.3841017039487092e-05, + "loss": 0.058, + "step": 40310 + }, + { + "epoch": 1.88, + "learning_rate": 1.3840233254432304e-05, + "loss": 0.1156, + "step": 40315 + }, + { + "epoch": 1.88, + "learning_rate": 1.383944946937752e-05, + "loss": 0.2159, + "step": 40320 + }, + { + "epoch": 1.88, + "learning_rate": 1.3838665684322732e-05, + "loss": 0.2478, + "step": 40325 + }, + { + "epoch": 1.88, + "learning_rate": 1.3837881899267946e-05, + "loss": 0.2005, + "step": 40330 + }, + { + "epoch": 1.88, + "learning_rate": 1.3837098114213158e-05, + "loss": 0.3357, + "step": 40335 + }, + { + "epoch": 1.88, + "learning_rate": 1.3836314329158374e-05, + "loss": 0.0394, + "step": 40340 + }, + { + "epoch": 1.88, + "learning_rate": 1.3835530544103586e-05, + "loss": 0.0648, + "step": 40345 + }, + { + "epoch": 1.88, + "learning_rate": 1.38347467590488e-05, + "loss": 0.0823, + "step": 40350 + }, + { + "epoch": 1.88, + "learning_rate": 1.3833962973994012e-05, + "loss": 0.0483, + "step": 40355 + }, + { + "epoch": 1.88, + "learning_rate": 1.3833179188939228e-05, + "loss": 0.193, + "step": 40360 + }, + { + "epoch": 1.88, + "learning_rate": 1.383239540388444e-05, + "loss": 0.1007, + "step": 40365 + }, + { + "epoch": 1.88, + "learning_rate": 1.3831611618829652e-05, + "loss": 0.1092, + "step": 40370 + }, + { + "epoch": 1.88, + "learning_rate": 1.3830827833774868e-05, + "loss": 0.2738, + "step": 40375 + }, + { + "epoch": 1.88, + "learning_rate": 1.383004404872008e-05, + "loss": 0.2871, + "step": 40380 + }, + { + "epoch": 1.88, + "learning_rate": 1.3829260263665294e-05, + "loss": 0.2501, + "step": 40385 + }, + { + "epoch": 1.88, + "learning_rate": 1.3828476478610506e-05, + "loss": 0.0473, + "step": 40390 + }, + { + "epoch": 1.88, + "learning_rate": 1.3827692693555722e-05, + "loss": 0.0597, + "step": 40395 + }, + { + "epoch": 1.89, + "learning_rate": 1.3826908908500934e-05, + "loss": 0.0646, + "step": 40400 + }, + { + "epoch": 1.89, + "learning_rate": 1.3826125123446148e-05, + "loss": 0.1314, + "step": 40405 + }, + { + "epoch": 1.89, + "learning_rate": 1.382534133839136e-05, + "loss": 0.1628, + "step": 40410 + }, + { + "epoch": 1.89, + "learning_rate": 1.3824557553336576e-05, + "loss": 0.1858, + "step": 40415 + }, + { + "epoch": 1.89, + "learning_rate": 1.3823773768281788e-05, + "loss": 0.2056, + "step": 40420 + }, + { + "epoch": 1.89, + "learning_rate": 1.3822989983227002e-05, + "loss": 0.1649, + "step": 40425 + }, + { + "epoch": 1.89, + "learning_rate": 1.3822206198172214e-05, + "loss": 0.1464, + "step": 40430 + }, + { + "epoch": 1.89, + "learning_rate": 1.3821422413117426e-05, + "loss": 0.3024, + "step": 40435 + }, + { + "epoch": 1.89, + "learning_rate": 1.3820638628062642e-05, + "loss": 0.0702, + "step": 40440 + }, + { + "epoch": 1.89, + "learning_rate": 1.3819854843007854e-05, + "loss": 0.0468, + "step": 40445 + }, + { + "epoch": 1.89, + "learning_rate": 1.3819071057953068e-05, + "loss": 0.0472, + "step": 40450 + }, + { + "epoch": 1.89, + "learning_rate": 1.381828727289828e-05, + "loss": 0.0957, + "step": 40455 + }, + { + "epoch": 1.89, + "learning_rate": 1.3817503487843496e-05, + "loss": 0.1398, + "step": 40460 + }, + { + "epoch": 1.89, + "learning_rate": 1.3816719702788708e-05, + "loss": 0.109, + "step": 40465 + }, + { + "epoch": 1.89, + "learning_rate": 1.3815935917733922e-05, + "loss": 0.2193, + "step": 40470 + }, + { + "epoch": 1.89, + "learning_rate": 1.3815152132679136e-05, + "loss": 0.2995, + "step": 40475 + }, + { + "epoch": 1.89, + "learning_rate": 1.381436834762435e-05, + "loss": 0.336, + "step": 40480 + }, + { + "epoch": 1.89, + "learning_rate": 1.3813584562569562e-05, + "loss": 0.2809, + "step": 40485 + }, + { + "epoch": 1.89, + "learning_rate": 1.3812800777514777e-05, + "loss": 0.0376, + "step": 40490 + }, + { + "epoch": 1.89, + "learning_rate": 1.381201699245999e-05, + "loss": 0.0265, + "step": 40495 + }, + { + "epoch": 1.89, + "learning_rate": 1.3811233207405202e-05, + "loss": 0.0517, + "step": 40500 + }, + { + "epoch": 1.89, + "learning_rate": 1.3810449422350416e-05, + "loss": 0.0746, + "step": 40505 + }, + { + "epoch": 1.89, + "learning_rate": 1.3809665637295628e-05, + "loss": 0.0954, + "step": 40510 + }, + { + "epoch": 1.89, + "learning_rate": 1.3808881852240844e-05, + "loss": 0.1229, + "step": 40515 + }, + { + "epoch": 1.89, + "learning_rate": 1.3808098067186056e-05, + "loss": 0.1508, + "step": 40520 + }, + { + "epoch": 1.89, + "learning_rate": 1.380731428213127e-05, + "loss": 0.2266, + "step": 40525 + }, + { + "epoch": 1.89, + "learning_rate": 1.3806530497076482e-05, + "loss": 0.3108, + "step": 40530 + }, + { + "epoch": 1.89, + "learning_rate": 1.3805746712021697e-05, + "loss": 0.2541, + "step": 40535 + }, + { + "epoch": 1.89, + "learning_rate": 1.380496292696691e-05, + "loss": 0.0478, + "step": 40540 + }, + { + "epoch": 1.89, + "learning_rate": 1.3804179141912124e-05, + "loss": 0.0496, + "step": 40545 + }, + { + "epoch": 1.89, + "learning_rate": 1.3803395356857336e-05, + "loss": 0.0953, + "step": 40550 + }, + { + "epoch": 1.89, + "learning_rate": 1.3802611571802551e-05, + "loss": 0.1024, + "step": 40555 + }, + { + "epoch": 1.89, + "learning_rate": 1.3801827786747764e-05, + "loss": 0.0996, + "step": 40560 + }, + { + "epoch": 1.89, + "learning_rate": 1.3801044001692976e-05, + "loss": 0.1564, + "step": 40565 + }, + { + "epoch": 1.89, + "learning_rate": 1.380026021663819e-05, + "loss": 0.2166, + "step": 40570 + }, + { + "epoch": 1.89, + "learning_rate": 1.3799476431583404e-05, + "loss": 0.226, + "step": 40575 + }, + { + "epoch": 1.89, + "learning_rate": 1.3798692646528618e-05, + "loss": 0.219, + "step": 40580 + }, + { + "epoch": 1.89, + "learning_rate": 1.379790886147383e-05, + "loss": 0.1555, + "step": 40585 + }, + { + "epoch": 1.89, + "learning_rate": 1.3797125076419045e-05, + "loss": 0.0567, + "step": 40590 + }, + { + "epoch": 1.89, + "learning_rate": 1.3796341291364258e-05, + "loss": 0.0816, + "step": 40595 + }, + { + "epoch": 1.89, + "learning_rate": 1.3795557506309471e-05, + "loss": 0.0672, + "step": 40600 + }, + { + "epoch": 1.89, + "learning_rate": 1.3794773721254684e-05, + "loss": 0.0293, + "step": 40605 + }, + { + "epoch": 1.89, + "learning_rate": 1.37939899361999e-05, + "loss": 0.1691, + "step": 40610 + }, + { + "epoch": 1.9, + "learning_rate": 1.3793206151145111e-05, + "loss": 0.1475, + "step": 40615 + }, + { + "epoch": 1.9, + "learning_rate": 1.3792422366090325e-05, + "loss": 0.1439, + "step": 40620 + }, + { + "epoch": 1.9, + "learning_rate": 1.3791638581035538e-05, + "loss": 0.2063, + "step": 40625 + }, + { + "epoch": 1.9, + "learning_rate": 1.379085479598075e-05, + "loss": 0.2501, + "step": 40630 + }, + { + "epoch": 1.9, + "learning_rate": 1.3790071010925965e-05, + "loss": 0.24, + "step": 40635 + }, + { + "epoch": 1.9, + "learning_rate": 1.3789287225871178e-05, + "loss": 0.0474, + "step": 40640 + }, + { + "epoch": 1.9, + "learning_rate": 1.3788503440816392e-05, + "loss": 0.0137, + "step": 40645 + }, + { + "epoch": 1.9, + "learning_rate": 1.3787719655761604e-05, + "loss": 0.0644, + "step": 40650 + }, + { + "epoch": 1.9, + "learning_rate": 1.378693587070682e-05, + "loss": 0.0847, + "step": 40655 + }, + { + "epoch": 1.9, + "learning_rate": 1.3786152085652032e-05, + "loss": 0.136, + "step": 40660 + }, + { + "epoch": 1.9, + "learning_rate": 1.3785368300597245e-05, + "loss": 0.1225, + "step": 40665 + }, + { + "epoch": 1.9, + "learning_rate": 1.3784584515542458e-05, + "loss": 0.1019, + "step": 40670 + }, + { + "epoch": 1.9, + "learning_rate": 1.3783800730487673e-05, + "loss": 0.2475, + "step": 40675 + }, + { + "epoch": 1.9, + "learning_rate": 1.3783016945432885e-05, + "loss": 0.2234, + "step": 40680 + }, + { + "epoch": 1.9, + "learning_rate": 1.37822331603781e-05, + "loss": 0.2204, + "step": 40685 + }, + { + "epoch": 1.9, + "learning_rate": 1.3781449375323313e-05, + "loss": 0.0354, + "step": 40690 + }, + { + "epoch": 1.9, + "learning_rate": 1.3780665590268525e-05, + "loss": 0.0904, + "step": 40695 + }, + { + "epoch": 1.9, + "learning_rate": 1.377988180521374e-05, + "loss": 0.0707, + "step": 40700 + }, + { + "epoch": 1.9, + "learning_rate": 1.3779098020158952e-05, + "loss": 0.0884, + "step": 40705 + }, + { + "epoch": 1.9, + "learning_rate": 1.3778314235104167e-05, + "loss": 0.1006, + "step": 40710 + }, + { + "epoch": 1.9, + "learning_rate": 1.377753045004938e-05, + "loss": 0.0914, + "step": 40715 + }, + { + "epoch": 1.9, + "learning_rate": 1.3776746664994593e-05, + "loss": 0.2008, + "step": 40720 + }, + { + "epoch": 1.9, + "learning_rate": 1.3775962879939806e-05, + "loss": 0.1749, + "step": 40725 + }, + { + "epoch": 1.9, + "learning_rate": 1.3775179094885021e-05, + "loss": 0.3868, + "step": 40730 + }, + { + "epoch": 1.9, + "learning_rate": 1.3774395309830233e-05, + "loss": 0.3199, + "step": 40735 + }, + { + "epoch": 1.9, + "learning_rate": 1.3773611524775447e-05, + "loss": 0.065, + "step": 40740 + }, + { + "epoch": 1.9, + "learning_rate": 1.377282773972066e-05, + "loss": 0.071, + "step": 40745 + }, + { + "epoch": 1.9, + "learning_rate": 1.3772043954665875e-05, + "loss": 0.0642, + "step": 40750 + }, + { + "epoch": 1.9, + "learning_rate": 1.3771260169611087e-05, + "loss": 0.1533, + "step": 40755 + }, + { + "epoch": 1.9, + "learning_rate": 1.37704763845563e-05, + "loss": 0.0883, + "step": 40760 + }, + { + "epoch": 1.9, + "learning_rate": 1.3769692599501513e-05, + "loss": 0.2085, + "step": 40765 + }, + { + "epoch": 1.9, + "learning_rate": 1.3768908814446726e-05, + "loss": 0.1509, + "step": 40770 + }, + { + "epoch": 1.9, + "learning_rate": 1.3768125029391941e-05, + "loss": 0.1984, + "step": 40775 + }, + { + "epoch": 1.9, + "learning_rate": 1.3767341244337153e-05, + "loss": 0.4016, + "step": 40780 + }, + { + "epoch": 1.9, + "learning_rate": 1.3766557459282367e-05, + "loss": 0.2388, + "step": 40785 + }, + { + "epoch": 1.9, + "learning_rate": 1.3765773674227581e-05, + "loss": 0.0885, + "step": 40790 + }, + { + "epoch": 1.9, + "learning_rate": 1.3764989889172795e-05, + "loss": 0.0829, + "step": 40795 + }, + { + "epoch": 1.9, + "learning_rate": 1.3764206104118007e-05, + "loss": 0.0609, + "step": 40800 + }, + { + "epoch": 1.9, + "learning_rate": 1.3763422319063223e-05, + "loss": 0.0371, + "step": 40805 + }, + { + "epoch": 1.9, + "learning_rate": 1.3762638534008435e-05, + "loss": 0.0782, + "step": 40810 + }, + { + "epoch": 1.9, + "learning_rate": 1.3761854748953649e-05, + "loss": 0.1198, + "step": 40815 + }, + { + "epoch": 1.9, + "learning_rate": 1.3761070963898861e-05, + "loss": 0.1064, + "step": 40820 + }, + { + "epoch": 1.9, + "learning_rate": 1.3760287178844073e-05, + "loss": 0.2126, + "step": 40825 + }, + { + "epoch": 1.91, + "learning_rate": 1.3759503393789289e-05, + "loss": 0.2644, + "step": 40830 + }, + { + "epoch": 1.91, + "learning_rate": 1.3758719608734501e-05, + "loss": 0.2017, + "step": 40835 + }, + { + "epoch": 1.91, + "learning_rate": 1.3757935823679715e-05, + "loss": 0.0589, + "step": 40840 + }, + { + "epoch": 1.91, + "learning_rate": 1.3757152038624927e-05, + "loss": 0.0665, + "step": 40845 + }, + { + "epoch": 1.91, + "learning_rate": 1.3756368253570143e-05, + "loss": 0.0637, + "step": 40850 + }, + { + "epoch": 1.91, + "learning_rate": 1.3755584468515355e-05, + "loss": 0.1055, + "step": 40855 + }, + { + "epoch": 1.91, + "learning_rate": 1.3754800683460569e-05, + "loss": 0.0967, + "step": 40860 + }, + { + "epoch": 1.91, + "learning_rate": 1.3754016898405781e-05, + "loss": 0.1057, + "step": 40865 + }, + { + "epoch": 1.91, + "learning_rate": 1.3753233113350997e-05, + "loss": 0.1226, + "step": 40870 + }, + { + "epoch": 1.91, + "learning_rate": 1.3752449328296209e-05, + "loss": 0.1455, + "step": 40875 + }, + { + "epoch": 1.91, + "learning_rate": 1.3751665543241423e-05, + "loss": 0.4124, + "step": 40880 + }, + { + "epoch": 1.91, + "learning_rate": 1.3750881758186635e-05, + "loss": 0.2321, + "step": 40885 + }, + { + "epoch": 1.91, + "learning_rate": 1.375009797313185e-05, + "loss": 0.0824, + "step": 40890 + }, + { + "epoch": 1.91, + "learning_rate": 1.3749314188077063e-05, + "loss": 0.0691, + "step": 40895 + }, + { + "epoch": 1.91, + "learning_rate": 1.3748530403022275e-05, + "loss": 0.0824, + "step": 40900 + }, + { + "epoch": 1.91, + "learning_rate": 1.3747746617967491e-05, + "loss": 0.1582, + "step": 40905 + }, + { + "epoch": 1.91, + "learning_rate": 1.3746962832912703e-05, + "loss": 0.104, + "step": 40910 + }, + { + "epoch": 1.91, + "learning_rate": 1.3746179047857917e-05, + "loss": 0.1756, + "step": 40915 + }, + { + "epoch": 1.91, + "learning_rate": 1.374539526280313e-05, + "loss": 0.1663, + "step": 40920 + }, + { + "epoch": 1.91, + "learning_rate": 1.3744611477748345e-05, + "loss": 0.2132, + "step": 40925 + }, + { + "epoch": 1.91, + "learning_rate": 1.3743827692693557e-05, + "loss": 0.2881, + "step": 40930 + }, + { + "epoch": 1.91, + "learning_rate": 1.3743043907638771e-05, + "loss": 0.1533, + "step": 40935 + }, + { + "epoch": 1.91, + "learning_rate": 1.3742260122583983e-05, + "loss": 0.034, + "step": 40940 + }, + { + "epoch": 1.91, + "learning_rate": 1.3741476337529199e-05, + "loss": 0.0692, + "step": 40945 + }, + { + "epoch": 1.91, + "learning_rate": 1.3740692552474411e-05, + "loss": 0.0526, + "step": 40950 + }, + { + "epoch": 1.91, + "learning_rate": 1.3739908767419623e-05, + "loss": 0.1364, + "step": 40955 + }, + { + "epoch": 1.91, + "learning_rate": 1.3739124982364837e-05, + "loss": 0.0757, + "step": 40960 + }, + { + "epoch": 1.91, + "learning_rate": 1.373834119731005e-05, + "loss": 0.1443, + "step": 40965 + }, + { + "epoch": 1.91, + "learning_rate": 1.3737557412255265e-05, + "loss": 0.2183, + "step": 40970 + }, + { + "epoch": 1.91, + "learning_rate": 1.3736773627200477e-05, + "loss": 0.2046, + "step": 40975 + }, + { + "epoch": 1.91, + "learning_rate": 1.3735989842145691e-05, + "loss": 0.3461, + "step": 40980 + }, + { + "epoch": 1.91, + "learning_rate": 1.3735206057090903e-05, + "loss": 0.268, + "step": 40985 + }, + { + "epoch": 1.91, + "learning_rate": 1.3734422272036119e-05, + "loss": 0.0231, + "step": 40990 + }, + { + "epoch": 1.91, + "learning_rate": 1.3733638486981331e-05, + "loss": 0.1039, + "step": 40995 + }, + { + "epoch": 1.91, + "learning_rate": 1.3732854701926545e-05, + "loss": 0.0878, + "step": 41000 + }, + { + "epoch": 1.91, + "learning_rate": 1.3732070916871759e-05, + "loss": 0.0818, + "step": 41005 + }, + { + "epoch": 1.91, + "learning_rate": 1.3731287131816973e-05, + "loss": 0.108, + "step": 41010 + }, + { + "epoch": 1.91, + "learning_rate": 1.3730503346762185e-05, + "loss": 0.1776, + "step": 41015 + }, + { + "epoch": 1.91, + "learning_rate": 1.3729719561707397e-05, + "loss": 0.0533, + "step": 41020 + }, + { + "epoch": 1.91, + "learning_rate": 1.3728935776652613e-05, + "loss": 0.1883, + "step": 41025 + }, + { + "epoch": 1.91, + "learning_rate": 1.3728151991597825e-05, + "loss": 0.3926, + "step": 41030 + }, + { + "epoch": 1.91, + "learning_rate": 1.3727368206543039e-05, + "loss": 0.2241, + "step": 41035 + }, + { + "epoch": 1.91, + "learning_rate": 1.3726584421488251e-05, + "loss": 0.0445, + "step": 41040 + }, + { + "epoch": 1.92, + "learning_rate": 1.3725800636433467e-05, + "loss": 0.087, + "step": 41045 + }, + { + "epoch": 1.92, + "learning_rate": 1.3725016851378679e-05, + "loss": 0.0322, + "step": 41050 + }, + { + "epoch": 1.92, + "learning_rate": 1.3724233066323893e-05, + "loss": 0.0478, + "step": 41055 + }, + { + "epoch": 1.92, + "learning_rate": 1.3723449281269105e-05, + "loss": 0.1294, + "step": 41060 + }, + { + "epoch": 1.92, + "learning_rate": 1.372266549621432e-05, + "loss": 0.1646, + "step": 41065 + }, + { + "epoch": 1.92, + "learning_rate": 1.3721881711159533e-05, + "loss": 0.1664, + "step": 41070 + }, + { + "epoch": 1.92, + "learning_rate": 1.3721097926104747e-05, + "loss": 0.2329, + "step": 41075 + }, + { + "epoch": 1.92, + "learning_rate": 1.3720314141049959e-05, + "loss": 0.2623, + "step": 41080 + }, + { + "epoch": 1.92, + "learning_rate": 1.3719530355995173e-05, + "loss": 0.2138, + "step": 41085 + }, + { + "epoch": 1.92, + "learning_rate": 1.3718746570940387e-05, + "loss": 0.0504, + "step": 41090 + }, + { + "epoch": 1.92, + "learning_rate": 1.3717962785885599e-05, + "loss": 0.0839, + "step": 41095 + }, + { + "epoch": 1.92, + "learning_rate": 1.3717179000830813e-05, + "loss": 0.0589, + "step": 41100 + }, + { + "epoch": 1.92, + "learning_rate": 1.3716395215776027e-05, + "loss": 0.1023, + "step": 41105 + }, + { + "epoch": 1.92, + "learning_rate": 1.371561143072124e-05, + "loss": 0.1489, + "step": 41110 + }, + { + "epoch": 1.92, + "learning_rate": 1.3714827645666453e-05, + "loss": 0.1497, + "step": 41115 + }, + { + "epoch": 1.92, + "learning_rate": 1.3714043860611669e-05, + "loss": 0.1494, + "step": 41120 + }, + { + "epoch": 1.92, + "learning_rate": 1.371326007555688e-05, + "loss": 0.2351, + "step": 41125 + }, + { + "epoch": 1.92, + "learning_rate": 1.3712476290502095e-05, + "loss": 0.3537, + "step": 41130 + }, + { + "epoch": 1.92, + "learning_rate": 1.3711692505447307e-05, + "loss": 0.3639, + "step": 41135 + }, + { + "epoch": 1.92, + "learning_rate": 1.3710908720392522e-05, + "loss": 0.0352, + "step": 41140 + }, + { + "epoch": 1.92, + "learning_rate": 1.3710124935337735e-05, + "loss": 0.0505, + "step": 41145 + }, + { + "epoch": 1.92, + "learning_rate": 1.3709341150282947e-05, + "loss": 0.0559, + "step": 41150 + }, + { + "epoch": 1.92, + "learning_rate": 1.370855736522816e-05, + "loss": 0.0432, + "step": 41155 + }, + { + "epoch": 1.92, + "learning_rate": 1.3707773580173373e-05, + "loss": 0.2254, + "step": 41160 + }, + { + "epoch": 1.92, + "learning_rate": 1.3706989795118589e-05, + "loss": 0.1569, + "step": 41165 + }, + { + "epoch": 1.92, + "learning_rate": 1.37062060100638e-05, + "loss": 0.0941, + "step": 41170 + }, + { + "epoch": 1.92, + "learning_rate": 1.3705422225009015e-05, + "loss": 0.2558, + "step": 41175 + }, + { + "epoch": 1.92, + "learning_rate": 1.3704638439954227e-05, + "loss": 0.2511, + "step": 41180 + }, + { + "epoch": 1.92, + "learning_rate": 1.3703854654899443e-05, + "loss": 0.3308, + "step": 41185 + }, + { + "epoch": 1.92, + "learning_rate": 1.3703070869844655e-05, + "loss": 0.0499, + "step": 41190 + }, + { + "epoch": 1.92, + "learning_rate": 1.3702287084789869e-05, + "loss": 0.0471, + "step": 41195 + }, + { + "epoch": 1.92, + "learning_rate": 1.370150329973508e-05, + "loss": 0.063, + "step": 41200 + }, + { + "epoch": 1.92, + "learning_rate": 1.3700719514680296e-05, + "loss": 0.0849, + "step": 41205 + }, + { + "epoch": 1.92, + "learning_rate": 1.3699935729625509e-05, + "loss": 0.1009, + "step": 41210 + }, + { + "epoch": 1.92, + "learning_rate": 1.3699151944570721e-05, + "loss": 0.1324, + "step": 41215 + }, + { + "epoch": 1.92, + "learning_rate": 1.3698368159515936e-05, + "loss": 0.145, + "step": 41220 + }, + { + "epoch": 1.92, + "learning_rate": 1.3697584374461149e-05, + "loss": 0.206, + "step": 41225 + }, + { + "epoch": 1.92, + "learning_rate": 1.3696800589406363e-05, + "loss": 0.3881, + "step": 41230 + }, + { + "epoch": 1.92, + "learning_rate": 1.3696016804351575e-05, + "loss": 0.3881, + "step": 41235 + }, + { + "epoch": 1.92, + "learning_rate": 1.369523301929679e-05, + "loss": 0.1123, + "step": 41240 + }, + { + "epoch": 1.92, + "learning_rate": 1.3694449234242003e-05, + "loss": 0.0195, + "step": 41245 + }, + { + "epoch": 1.92, + "learning_rate": 1.3693665449187217e-05, + "loss": 0.1247, + "step": 41250 + }, + { + "epoch": 1.93, + "learning_rate": 1.3692881664132429e-05, + "loss": 0.0557, + "step": 41255 + }, + { + "epoch": 1.93, + "learning_rate": 1.3692097879077644e-05, + "loss": 0.0885, + "step": 41260 + }, + { + "epoch": 1.93, + "learning_rate": 1.3691314094022857e-05, + "loss": 0.1122, + "step": 41265 + }, + { + "epoch": 1.93, + "learning_rate": 1.369053030896807e-05, + "loss": 0.2121, + "step": 41270 + }, + { + "epoch": 1.93, + "learning_rate": 1.3689746523913283e-05, + "loss": 0.1881, + "step": 41275 + }, + { + "epoch": 1.93, + "learning_rate": 1.3688962738858495e-05, + "loss": 0.2472, + "step": 41280 + }, + { + "epoch": 1.93, + "learning_rate": 1.368817895380371e-05, + "loss": 0.1782, + "step": 41285 + }, + { + "epoch": 1.93, + "learning_rate": 1.3687395168748923e-05, + "loss": 0.0595, + "step": 41290 + }, + { + "epoch": 1.93, + "learning_rate": 1.3686611383694137e-05, + "loss": 0.0678, + "step": 41295 + }, + { + "epoch": 1.93, + "learning_rate": 1.3685827598639349e-05, + "loss": 0.1161, + "step": 41300 + }, + { + "epoch": 1.93, + "learning_rate": 1.3685043813584564e-05, + "loss": 0.0635, + "step": 41305 + }, + { + "epoch": 1.93, + "learning_rate": 1.3684260028529777e-05, + "loss": 0.1394, + "step": 41310 + }, + { + "epoch": 1.93, + "learning_rate": 1.368347624347499e-05, + "loss": 0.1814, + "step": 41315 + }, + { + "epoch": 1.93, + "learning_rate": 1.3682692458420204e-05, + "loss": 0.3026, + "step": 41320 + }, + { + "epoch": 1.93, + "learning_rate": 1.3681908673365418e-05, + "loss": 0.2154, + "step": 41325 + }, + { + "epoch": 1.93, + "learning_rate": 1.368112488831063e-05, + "loss": 0.2504, + "step": 41330 + }, + { + "epoch": 1.93, + "learning_rate": 1.3680341103255846e-05, + "loss": 0.3073, + "step": 41335 + }, + { + "epoch": 1.93, + "learning_rate": 1.3679557318201058e-05, + "loss": 0.0638, + "step": 41340 + }, + { + "epoch": 1.93, + "learning_rate": 1.367877353314627e-05, + "loss": 0.0279, + "step": 41345 + }, + { + "epoch": 1.93, + "learning_rate": 1.3677989748091484e-05, + "loss": 0.1001, + "step": 41350 + }, + { + "epoch": 1.93, + "learning_rate": 1.3677205963036697e-05, + "loss": 0.0529, + "step": 41355 + }, + { + "epoch": 1.93, + "learning_rate": 1.3676422177981912e-05, + "loss": 0.1054, + "step": 41360 + }, + { + "epoch": 1.93, + "learning_rate": 1.3675638392927124e-05, + "loss": 0.111, + "step": 41365 + }, + { + "epoch": 1.93, + "learning_rate": 1.3674854607872338e-05, + "loss": 0.0676, + "step": 41370 + }, + { + "epoch": 1.93, + "learning_rate": 1.367407082281755e-05, + "loss": 0.243, + "step": 41375 + }, + { + "epoch": 1.93, + "learning_rate": 1.3673287037762766e-05, + "loss": 0.4077, + "step": 41380 + }, + { + "epoch": 1.93, + "learning_rate": 1.3672503252707978e-05, + "loss": 0.3552, + "step": 41385 + }, + { + "epoch": 1.93, + "learning_rate": 1.3671719467653192e-05, + "loss": 0.0411, + "step": 41390 + }, + { + "epoch": 1.93, + "learning_rate": 1.3670935682598405e-05, + "loss": 0.039, + "step": 41395 + }, + { + "epoch": 1.93, + "learning_rate": 1.367015189754362e-05, + "loss": 0.0688, + "step": 41400 + }, + { + "epoch": 1.93, + "learning_rate": 1.3669368112488832e-05, + "loss": 0.1222, + "step": 41405 + }, + { + "epoch": 1.93, + "learning_rate": 1.3668584327434045e-05, + "loss": 0.073, + "step": 41410 + }, + { + "epoch": 1.93, + "learning_rate": 1.3667800542379258e-05, + "loss": 0.0939, + "step": 41415 + }, + { + "epoch": 1.93, + "learning_rate": 1.3667016757324472e-05, + "loss": 0.1917, + "step": 41420 + }, + { + "epoch": 1.93, + "learning_rate": 1.3666232972269686e-05, + "loss": 0.1409, + "step": 41425 + }, + { + "epoch": 1.93, + "learning_rate": 1.3665449187214898e-05, + "loss": 0.3746, + "step": 41430 + }, + { + "epoch": 1.93, + "learning_rate": 1.3664665402160114e-05, + "loss": 0.3048, + "step": 41435 + }, + { + "epoch": 1.93, + "learning_rate": 1.3663881617105326e-05, + "loss": 0.0518, + "step": 41440 + }, + { + "epoch": 1.93, + "learning_rate": 1.366309783205054e-05, + "loss": 0.0805, + "step": 41445 + }, + { + "epoch": 1.93, + "learning_rate": 1.3662314046995752e-05, + "loss": 0.0876, + "step": 41450 + }, + { + "epoch": 1.93, + "learning_rate": 1.3661530261940968e-05, + "loss": 0.0906, + "step": 41455 + }, + { + "epoch": 1.93, + "learning_rate": 1.366074647688618e-05, + "loss": 0.0911, + "step": 41460 + }, + { + "epoch": 1.93, + "learning_rate": 1.3659962691831394e-05, + "loss": 0.1121, + "step": 41465 + }, + { + "epoch": 1.94, + "learning_rate": 1.3659178906776606e-05, + "loss": 0.1549, + "step": 41470 + }, + { + "epoch": 1.94, + "learning_rate": 1.3658395121721819e-05, + "loss": 0.2158, + "step": 41475 + }, + { + "epoch": 1.94, + "learning_rate": 1.3657611336667034e-05, + "loss": 0.3177, + "step": 41480 + }, + { + "epoch": 1.94, + "learning_rate": 1.3656827551612246e-05, + "loss": 0.2616, + "step": 41485 + }, + { + "epoch": 1.94, + "learning_rate": 1.365604376655746e-05, + "loss": 0.0809, + "step": 41490 + }, + { + "epoch": 1.94, + "learning_rate": 1.3655259981502672e-05, + "loss": 0.0507, + "step": 41495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3654476196447888e-05, + "loss": 0.0408, + "step": 41500 + }, + { + "epoch": 1.94, + "learning_rate": 1.36536924113931e-05, + "loss": 0.079, + "step": 41505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3652908626338314e-05, + "loss": 0.1017, + "step": 41510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3652124841283526e-05, + "loss": 0.0745, + "step": 41515 + }, + { + "epoch": 1.94, + "learning_rate": 1.3651341056228742e-05, + "loss": 0.1234, + "step": 41520 + }, + { + "epoch": 1.94, + "learning_rate": 1.3650557271173954e-05, + "loss": 0.23, + "step": 41525 + }, + { + "epoch": 1.94, + "learning_rate": 1.3649773486119168e-05, + "loss": 0.2216, + "step": 41530 + }, + { + "epoch": 1.94, + "learning_rate": 1.3648989701064382e-05, + "loss": 0.2246, + "step": 41535 + }, + { + "epoch": 1.94, + "learning_rate": 1.3648205916009594e-05, + "loss": 0.0395, + "step": 41540 + }, + { + "epoch": 1.94, + "learning_rate": 1.3647422130954808e-05, + "loss": 0.0559, + "step": 41545 + }, + { + "epoch": 1.94, + "learning_rate": 1.364663834590002e-05, + "loss": 0.0517, + "step": 41550 + }, + { + "epoch": 1.94, + "learning_rate": 1.3645854560845236e-05, + "loss": 0.1465, + "step": 41555 + }, + { + "epoch": 1.94, + "learning_rate": 1.3645070775790448e-05, + "loss": 0.2362, + "step": 41560 + }, + { + "epoch": 1.94, + "learning_rate": 1.3644286990735662e-05, + "loss": 0.1821, + "step": 41565 + }, + { + "epoch": 1.94, + "learning_rate": 1.3643503205680874e-05, + "loss": 0.1984, + "step": 41570 + }, + { + "epoch": 1.94, + "learning_rate": 1.364271942062609e-05, + "loss": 0.2761, + "step": 41575 + }, + { + "epoch": 1.94, + "learning_rate": 1.3641935635571302e-05, + "loss": 0.4789, + "step": 41580 + }, + { + "epoch": 1.94, + "learning_rate": 1.3641151850516516e-05, + "loss": 0.2259, + "step": 41585 + }, + { + "epoch": 1.94, + "learning_rate": 1.3640368065461728e-05, + "loss": 0.0482, + "step": 41590 + }, + { + "epoch": 1.94, + "learning_rate": 1.3639584280406944e-05, + "loss": 0.0146, + "step": 41595 + }, + { + "epoch": 1.94, + "learning_rate": 1.3638800495352156e-05, + "loss": 0.1254, + "step": 41600 + }, + { + "epoch": 1.94, + "learning_rate": 1.3638016710297368e-05, + "loss": 0.0778, + "step": 41605 + }, + { + "epoch": 1.94, + "learning_rate": 1.3637232925242582e-05, + "loss": 0.0651, + "step": 41610 + }, + { + "epoch": 1.94, + "learning_rate": 1.3636449140187794e-05, + "loss": 0.1359, + "step": 41615 + }, + { + "epoch": 1.94, + "learning_rate": 1.363566535513301e-05, + "loss": 0.2115, + "step": 41620 + }, + { + "epoch": 1.94, + "learning_rate": 1.3634881570078222e-05, + "loss": 0.1439, + "step": 41625 + }, + { + "epoch": 1.94, + "learning_rate": 1.3634097785023436e-05, + "loss": 0.3997, + "step": 41630 + }, + { + "epoch": 1.94, + "learning_rate": 1.363331399996865e-05, + "loss": 0.2421, + "step": 41635 + }, + { + "epoch": 1.94, + "learning_rate": 1.3632530214913864e-05, + "loss": 0.0331, + "step": 41640 + }, + { + "epoch": 1.94, + "learning_rate": 1.3631746429859076e-05, + "loss": 0.1243, + "step": 41645 + }, + { + "epoch": 1.94, + "learning_rate": 1.3630962644804292e-05, + "loss": 0.147, + "step": 41650 + }, + { + "epoch": 1.94, + "learning_rate": 1.3630178859749504e-05, + "loss": 0.0483, + "step": 41655 + }, + { + "epoch": 1.94, + "learning_rate": 1.3629395074694718e-05, + "loss": 0.0896, + "step": 41660 + }, + { + "epoch": 1.94, + "learning_rate": 1.362861128963993e-05, + "loss": 0.1532, + "step": 41665 + }, + { + "epoch": 1.94, + "learning_rate": 1.3627827504585142e-05, + "loss": 0.1593, + "step": 41670 + }, + { + "epoch": 1.94, + "learning_rate": 1.3627043719530358e-05, + "loss": 0.1917, + "step": 41675 + }, + { + "epoch": 1.94, + "learning_rate": 1.362625993447557e-05, + "loss": 0.318, + "step": 41680 + }, + { + "epoch": 1.95, + "learning_rate": 1.3625476149420784e-05, + "loss": 0.3726, + "step": 41685 + }, + { + "epoch": 1.95, + "learning_rate": 1.3624692364365996e-05, + "loss": 0.0432, + "step": 41690 + }, + { + "epoch": 1.95, + "learning_rate": 1.3623908579311212e-05, + "loss": 0.0398, + "step": 41695 + }, + { + "epoch": 1.95, + "learning_rate": 1.3623124794256424e-05, + "loss": 0.0579, + "step": 41700 + }, + { + "epoch": 1.95, + "learning_rate": 1.3622341009201638e-05, + "loss": 0.0813, + "step": 41705 + }, + { + "epoch": 1.95, + "learning_rate": 1.362155722414685e-05, + "loss": 0.0798, + "step": 41710 + }, + { + "epoch": 1.95, + "learning_rate": 1.3620773439092066e-05, + "loss": 0.0768, + "step": 41715 + }, + { + "epoch": 1.95, + "learning_rate": 1.3619989654037278e-05, + "loss": 0.1886, + "step": 41720 + }, + { + "epoch": 1.95, + "learning_rate": 1.3619205868982492e-05, + "loss": 0.1185, + "step": 41725 + }, + { + "epoch": 1.95, + "learning_rate": 1.3618422083927704e-05, + "loss": 0.2939, + "step": 41730 + }, + { + "epoch": 1.95, + "learning_rate": 1.3617638298872918e-05, + "loss": 0.295, + "step": 41735 + }, + { + "epoch": 1.95, + "learning_rate": 1.3616854513818132e-05, + "loss": 0.028, + "step": 41740 + }, + { + "epoch": 1.95, + "learning_rate": 1.3616070728763344e-05, + "loss": 0.0461, + "step": 41745 + }, + { + "epoch": 1.95, + "learning_rate": 1.361528694370856e-05, + "loss": 0.0892, + "step": 41750 + }, + { + "epoch": 1.95, + "learning_rate": 1.3614503158653772e-05, + "loss": 0.0907, + "step": 41755 + }, + { + "epoch": 1.95, + "learning_rate": 1.3613719373598986e-05, + "loss": 0.0892, + "step": 41760 + }, + { + "epoch": 1.95, + "learning_rate": 1.3612935588544198e-05, + "loss": 0.1427, + "step": 41765 + }, + { + "epoch": 1.95, + "learning_rate": 1.3612151803489414e-05, + "loss": 0.108, + "step": 41770 + }, + { + "epoch": 1.95, + "learning_rate": 1.3611368018434626e-05, + "loss": 0.206, + "step": 41775 + }, + { + "epoch": 1.95, + "learning_rate": 1.361058423337984e-05, + "loss": 0.2341, + "step": 41780 + }, + { + "epoch": 1.95, + "learning_rate": 1.3609800448325052e-05, + "loss": 0.2855, + "step": 41785 + }, + { + "epoch": 1.95, + "learning_rate": 1.3609016663270268e-05, + "loss": 0.0509, + "step": 41790 + }, + { + "epoch": 1.95, + "learning_rate": 1.360823287821548e-05, + "loss": 0.0388, + "step": 41795 + }, + { + "epoch": 1.95, + "learning_rate": 1.3607449093160692e-05, + "loss": 0.0668, + "step": 41800 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606665308105906e-05, + "loss": 0.067, + "step": 41805 + }, + { + "epoch": 1.95, + "learning_rate": 1.3605881523051118e-05, + "loss": 0.1153, + "step": 41810 + }, + { + "epoch": 1.95, + "learning_rate": 1.3605097737996334e-05, + "loss": 0.183, + "step": 41815 + }, + { + "epoch": 1.95, + "learning_rate": 1.3604313952941546e-05, + "loss": 0.2191, + "step": 41820 + }, + { + "epoch": 1.95, + "learning_rate": 1.360353016788676e-05, + "loss": 0.1736, + "step": 41825 + }, + { + "epoch": 1.95, + "learning_rate": 1.3602746382831972e-05, + "loss": 0.3504, + "step": 41830 + }, + { + "epoch": 1.95, + "learning_rate": 1.3601962597777188e-05, + "loss": 0.2775, + "step": 41835 + }, + { + "epoch": 1.95, + "learning_rate": 1.36011788127224e-05, + "loss": 0.0406, + "step": 41840 + }, + { + "epoch": 1.95, + "learning_rate": 1.3600395027667614e-05, + "loss": 0.0676, + "step": 41845 + }, + { + "epoch": 1.95, + "learning_rate": 1.3599611242612828e-05, + "loss": 0.0566, + "step": 41850 + }, + { + "epoch": 1.95, + "learning_rate": 1.3598827457558042e-05, + "loss": 0.0765, + "step": 41855 + }, + { + "epoch": 1.95, + "learning_rate": 1.3598043672503254e-05, + "loss": 0.1108, + "step": 41860 + }, + { + "epoch": 1.95, + "learning_rate": 1.3597259887448466e-05, + "loss": 0.1186, + "step": 41865 + }, + { + "epoch": 1.95, + "learning_rate": 1.3596476102393682e-05, + "loss": 0.1556, + "step": 41870 + }, + { + "epoch": 1.95, + "learning_rate": 1.3595692317338894e-05, + "loss": 0.16, + "step": 41875 + }, + { + "epoch": 1.95, + "learning_rate": 1.3594908532284108e-05, + "loss": 0.3715, + "step": 41880 + }, + { + "epoch": 1.95, + "learning_rate": 1.359412474722932e-05, + "loss": 0.272, + "step": 41885 + }, + { + "epoch": 1.95, + "learning_rate": 1.3593340962174535e-05, + "loss": 0.0219, + "step": 41890 + }, + { + "epoch": 1.95, + "learning_rate": 1.3592557177119748e-05, + "loss": 0.0154, + "step": 41895 + }, + { + "epoch": 1.96, + "learning_rate": 1.3591773392064962e-05, + "loss": 0.1363, + "step": 41900 + }, + { + "epoch": 1.96, + "learning_rate": 1.3590989607010174e-05, + "loss": 0.074, + "step": 41905 + }, + { + "epoch": 1.96, + "learning_rate": 1.359020582195539e-05, + "loss": 0.1535, + "step": 41910 + }, + { + "epoch": 1.96, + "learning_rate": 1.3589422036900602e-05, + "loss": 0.0951, + "step": 41915 + }, + { + "epoch": 1.96, + "learning_rate": 1.3588638251845816e-05, + "loss": 0.1372, + "step": 41920 + }, + { + "epoch": 1.96, + "learning_rate": 1.3587854466791028e-05, + "loss": 0.2103, + "step": 41925 + }, + { + "epoch": 1.96, + "learning_rate": 1.358707068173624e-05, + "loss": 0.1851, + "step": 41930 + }, + { + "epoch": 1.96, + "learning_rate": 1.3586286896681456e-05, + "loss": 0.3416, + "step": 41935 + }, + { + "epoch": 1.96, + "learning_rate": 1.3585503111626668e-05, + "loss": 0.0271, + "step": 41940 + }, + { + "epoch": 1.96, + "learning_rate": 1.3584719326571882e-05, + "loss": 0.0605, + "step": 41945 + }, + { + "epoch": 1.96, + "learning_rate": 1.3583935541517096e-05, + "loss": 0.0437, + "step": 41950 + }, + { + "epoch": 1.96, + "learning_rate": 1.358315175646231e-05, + "loss": 0.0778, + "step": 41955 + }, + { + "epoch": 1.96, + "learning_rate": 1.3582367971407522e-05, + "loss": 0.0917, + "step": 41960 + }, + { + "epoch": 1.96, + "learning_rate": 1.3581584186352737e-05, + "loss": 0.1232, + "step": 41965 + }, + { + "epoch": 1.96, + "learning_rate": 1.358080040129795e-05, + "loss": 0.236, + "step": 41970 + }, + { + "epoch": 1.96, + "learning_rate": 1.3580016616243163e-05, + "loss": 0.2185, + "step": 41975 + }, + { + "epoch": 1.96, + "learning_rate": 1.3579232831188376e-05, + "loss": 0.393, + "step": 41980 + }, + { + "epoch": 1.96, + "learning_rate": 1.3578449046133591e-05, + "loss": 0.3043, + "step": 41985 + }, + { + "epoch": 1.96, + "learning_rate": 1.3577665261078803e-05, + "loss": 0.0506, + "step": 41990 + }, + { + "epoch": 1.96, + "learning_rate": 1.3576881476024016e-05, + "loss": 0.0796, + "step": 41995 + }, + { + "epoch": 1.96, + "learning_rate": 1.357609769096923e-05, + "loss": 0.0657, + "step": 42000 + }, + { + "epoch": 1.96, + "learning_rate": 1.3575313905914442e-05, + "loss": 0.0712, + "step": 42005 + }, + { + "epoch": 1.96, + "learning_rate": 1.3574530120859657e-05, + "loss": 0.1189, + "step": 42010 + }, + { + "epoch": 1.96, + "learning_rate": 1.357374633580487e-05, + "loss": 0.157, + "step": 42015 + }, + { + "epoch": 1.96, + "learning_rate": 1.3572962550750083e-05, + "loss": 0.1085, + "step": 42020 + }, + { + "epoch": 1.96, + "learning_rate": 1.3572178765695296e-05, + "loss": 0.2305, + "step": 42025 + }, + { + "epoch": 1.96, + "learning_rate": 1.3571394980640511e-05, + "loss": 0.3907, + "step": 42030 + }, + { + "epoch": 1.96, + "learning_rate": 1.3570611195585723e-05, + "loss": 0.2425, + "step": 42035 + }, + { + "epoch": 1.96, + "learning_rate": 1.3569827410530937e-05, + "loss": 0.039, + "step": 42040 + }, + { + "epoch": 1.96, + "learning_rate": 1.356904362547615e-05, + "loss": 0.0644, + "step": 42045 + }, + { + "epoch": 1.96, + "learning_rate": 1.3568259840421365e-05, + "loss": 0.0367, + "step": 42050 + }, + { + "epoch": 1.96, + "learning_rate": 1.3567476055366577e-05, + "loss": 0.0325, + "step": 42055 + }, + { + "epoch": 1.96, + "learning_rate": 1.356669227031179e-05, + "loss": 0.1135, + "step": 42060 + }, + { + "epoch": 1.96, + "learning_rate": 1.3565908485257005e-05, + "loss": 0.1177, + "step": 42065 + }, + { + "epoch": 1.96, + "learning_rate": 1.3565124700202217e-05, + "loss": 0.1744, + "step": 42070 + }, + { + "epoch": 1.96, + "learning_rate": 1.3564340915147431e-05, + "loss": 0.267, + "step": 42075 + }, + { + "epoch": 1.96, + "learning_rate": 1.3563557130092644e-05, + "loss": 0.2601, + "step": 42080 + }, + { + "epoch": 1.96, + "learning_rate": 1.3562773345037859e-05, + "loss": 0.3815, + "step": 42085 + }, + { + "epoch": 1.96, + "learning_rate": 1.3561989559983071e-05, + "loss": 0.0435, + "step": 42090 + }, + { + "epoch": 1.96, + "learning_rate": 1.3561205774928285e-05, + "loss": 0.0452, + "step": 42095 + }, + { + "epoch": 1.96, + "learning_rate": 1.3560421989873497e-05, + "loss": 0.1247, + "step": 42100 + }, + { + "epoch": 1.96, + "learning_rate": 1.3559638204818713e-05, + "loss": 0.0745, + "step": 42105 + }, + { + "epoch": 1.96, + "learning_rate": 1.3558854419763925e-05, + "loss": 0.1184, + "step": 42110 + }, + { + "epoch": 1.97, + "learning_rate": 1.355807063470914e-05, + "loss": 0.1522, + "step": 42115 + }, + { + "epoch": 1.97, + "learning_rate": 1.3557286849654351e-05, + "loss": 0.1299, + "step": 42120 + }, + { + "epoch": 1.97, + "learning_rate": 1.3556503064599564e-05, + "loss": 0.2057, + "step": 42125 + }, + { + "epoch": 1.97, + "learning_rate": 1.355571927954478e-05, + "loss": 0.2441, + "step": 42130 + }, + { + "epoch": 1.97, + "learning_rate": 1.3554935494489991e-05, + "loss": 0.2413, + "step": 42135 + }, + { + "epoch": 1.97, + "learning_rate": 1.3554151709435205e-05, + "loss": 0.063, + "step": 42140 + }, + { + "epoch": 1.97, + "learning_rate": 1.3553367924380418e-05, + "loss": 0.0286, + "step": 42145 + }, + { + "epoch": 1.97, + "learning_rate": 1.3552584139325633e-05, + "loss": 0.0437, + "step": 42150 + }, + { + "epoch": 1.97, + "learning_rate": 1.3551800354270845e-05, + "loss": 0.0742, + "step": 42155 + }, + { + "epoch": 1.97, + "learning_rate": 1.355101656921606e-05, + "loss": 0.1081, + "step": 42160 + }, + { + "epoch": 1.97, + "learning_rate": 1.3550232784161273e-05, + "loss": 0.1142, + "step": 42165 + }, + { + "epoch": 1.97, + "learning_rate": 1.3549448999106487e-05, + "loss": 0.1755, + "step": 42170 + }, + { + "epoch": 1.97, + "learning_rate": 1.35486652140517e-05, + "loss": 0.2252, + "step": 42175 + }, + { + "epoch": 1.97, + "learning_rate": 1.3547881428996915e-05, + "loss": 0.2728, + "step": 42180 + }, + { + "epoch": 1.97, + "learning_rate": 1.3547097643942127e-05, + "loss": 0.3548, + "step": 42185 + }, + { + "epoch": 1.97, + "learning_rate": 1.354631385888734e-05, + "loss": 0.1084, + "step": 42190 + }, + { + "epoch": 1.97, + "learning_rate": 1.3545530073832553e-05, + "loss": 0.0751, + "step": 42195 + }, + { + "epoch": 1.97, + "learning_rate": 1.3544746288777765e-05, + "loss": 0.1289, + "step": 42200 + }, + { + "epoch": 1.97, + "learning_rate": 1.3543962503722981e-05, + "loss": 0.1643, + "step": 42205 + }, + { + "epoch": 1.97, + "learning_rate": 1.3543178718668193e-05, + "loss": 0.1434, + "step": 42210 + }, + { + "epoch": 1.97, + "learning_rate": 1.3542394933613407e-05, + "loss": 0.1457, + "step": 42215 + }, + { + "epoch": 1.97, + "learning_rate": 1.354161114855862e-05, + "loss": 0.1197, + "step": 42220 + }, + { + "epoch": 1.97, + "learning_rate": 1.3540827363503835e-05, + "loss": 0.3008, + "step": 42225 + }, + { + "epoch": 1.97, + "learning_rate": 1.3540043578449047e-05, + "loss": 0.2925, + "step": 42230 + }, + { + "epoch": 1.97, + "learning_rate": 1.3539259793394261e-05, + "loss": 0.3225, + "step": 42235 + }, + { + "epoch": 1.97, + "learning_rate": 1.3538476008339473e-05, + "loss": 0.0174, + "step": 42240 + }, + { + "epoch": 1.97, + "learning_rate": 1.3537692223284689e-05, + "loss": 0.0662, + "step": 42245 + }, + { + "epoch": 1.97, + "learning_rate": 1.3536908438229901e-05, + "loss": 0.0883, + "step": 42250 + }, + { + "epoch": 1.97, + "learning_rate": 1.3536124653175113e-05, + "loss": 0.1134, + "step": 42255 + }, + { + "epoch": 1.97, + "learning_rate": 1.3535340868120327e-05, + "loss": 0.0576, + "step": 42260 + }, + { + "epoch": 1.97, + "learning_rate": 1.3534557083065541e-05, + "loss": 0.1248, + "step": 42265 + }, + { + "epoch": 1.97, + "learning_rate": 1.3533773298010755e-05, + "loss": 0.1739, + "step": 42270 + }, + { + "epoch": 1.97, + "learning_rate": 1.3532989512955967e-05, + "loss": 0.1615, + "step": 42275 + }, + { + "epoch": 1.97, + "learning_rate": 1.3532205727901183e-05, + "loss": 0.2108, + "step": 42280 + }, + { + "epoch": 1.97, + "learning_rate": 1.3531421942846395e-05, + "loss": 0.1906, + "step": 42285 + }, + { + "epoch": 1.97, + "learning_rate": 1.3530638157791609e-05, + "loss": 0.0301, + "step": 42290 + }, + { + "epoch": 1.97, + "learning_rate": 1.3529854372736821e-05, + "loss": 0.0248, + "step": 42295 + }, + { + "epoch": 1.97, + "learning_rate": 1.3529070587682037e-05, + "loss": 0.0653, + "step": 42300 + }, + { + "epoch": 1.97, + "learning_rate": 1.3528286802627249e-05, + "loss": 0.1097, + "step": 42305 + }, + { + "epoch": 1.97, + "learning_rate": 1.3527503017572463e-05, + "loss": 0.1149, + "step": 42310 + }, + { + "epoch": 1.97, + "learning_rate": 1.3526719232517675e-05, + "loss": 0.1353, + "step": 42315 + }, + { + "epoch": 1.97, + "learning_rate": 1.3525935447462887e-05, + "loss": 0.1043, + "step": 42320 + }, + { + "epoch": 1.97, + "learning_rate": 1.3525151662408103e-05, + "loss": 0.3242, + "step": 42325 + }, + { + "epoch": 1.98, + "learning_rate": 1.3524367877353315e-05, + "loss": 0.2849, + "step": 42330 + }, + { + "epoch": 1.98, + "learning_rate": 1.3523584092298529e-05, + "loss": 0.3416, + "step": 42335 + }, + { + "epoch": 1.98, + "learning_rate": 1.3522800307243741e-05, + "loss": 0.0547, + "step": 42340 + }, + { + "epoch": 1.98, + "learning_rate": 1.3522016522188957e-05, + "loss": 0.0568, + "step": 42345 + }, + { + "epoch": 1.98, + "learning_rate": 1.3521232737134169e-05, + "loss": 0.0978, + "step": 42350 + }, + { + "epoch": 1.98, + "learning_rate": 1.3520448952079383e-05, + "loss": 0.1457, + "step": 42355 + }, + { + "epoch": 1.98, + "learning_rate": 1.3519665167024595e-05, + "loss": 0.0819, + "step": 42360 + }, + { + "epoch": 1.98, + "learning_rate": 1.351888138196981e-05, + "loss": 0.1787, + "step": 42365 + }, + { + "epoch": 1.98, + "learning_rate": 1.3518097596915023e-05, + "loss": 0.1941, + "step": 42370 + }, + { + "epoch": 1.98, + "learning_rate": 1.3517313811860237e-05, + "loss": 0.2267, + "step": 42375 + }, + { + "epoch": 1.98, + "learning_rate": 1.351653002680545e-05, + "loss": 0.377, + "step": 42380 + }, + { + "epoch": 1.98, + "learning_rate": 1.3515746241750663e-05, + "loss": 0.2612, + "step": 42385 + }, + { + "epoch": 1.98, + "learning_rate": 1.3514962456695877e-05, + "loss": 0.0406, + "step": 42390 + }, + { + "epoch": 1.98, + "learning_rate": 1.3514178671641089e-05, + "loss": 0.0252, + "step": 42395 + }, + { + "epoch": 1.98, + "learning_rate": 1.3513394886586305e-05, + "loss": 0.0102, + "step": 42400 + }, + { + "epoch": 1.98, + "learning_rate": 1.3512611101531517e-05, + "loss": 0.1246, + "step": 42405 + }, + { + "epoch": 1.98, + "learning_rate": 1.351182731647673e-05, + "loss": 0.1436, + "step": 42410 + }, + { + "epoch": 1.98, + "learning_rate": 1.3511043531421943e-05, + "loss": 0.1284, + "step": 42415 + }, + { + "epoch": 1.98, + "learning_rate": 1.3510259746367159e-05, + "loss": 0.1715, + "step": 42420 + }, + { + "epoch": 1.98, + "learning_rate": 1.3509475961312371e-05, + "loss": 0.2565, + "step": 42425 + }, + { + "epoch": 1.98, + "learning_rate": 1.3508692176257585e-05, + "loss": 0.2379, + "step": 42430 + }, + { + "epoch": 1.98, + "learning_rate": 1.3507908391202797e-05, + "loss": 0.1811, + "step": 42435 + }, + { + "epoch": 1.98, + "learning_rate": 1.3507124606148013e-05, + "loss": 0.0796, + "step": 42440 + }, + { + "epoch": 1.98, + "learning_rate": 1.3506340821093225e-05, + "loss": 0.0504, + "step": 42445 + }, + { + "epoch": 1.98, + "learning_rate": 1.3505557036038437e-05, + "loss": 0.1026, + "step": 42450 + }, + { + "epoch": 1.98, + "learning_rate": 1.3504773250983651e-05, + "loss": 0.0687, + "step": 42455 + }, + { + "epoch": 1.98, + "learning_rate": 1.3503989465928863e-05, + "loss": 0.0637, + "step": 42460 + }, + { + "epoch": 1.98, + "learning_rate": 1.3503205680874079e-05, + "loss": 0.0864, + "step": 42465 + }, + { + "epoch": 1.98, + "learning_rate": 1.3502421895819291e-05, + "loss": 0.1525, + "step": 42470 + }, + { + "epoch": 1.98, + "learning_rate": 1.3501638110764505e-05, + "loss": 0.1946, + "step": 42475 + }, + { + "epoch": 1.98, + "learning_rate": 1.3500854325709719e-05, + "loss": 0.4185, + "step": 42480 + }, + { + "epoch": 1.98, + "learning_rate": 1.3500070540654933e-05, + "loss": 0.2679, + "step": 42485 + }, + { + "epoch": 1.98, + "learning_rate": 1.3499286755600145e-05, + "loss": 0.0422, + "step": 42490 + }, + { + "epoch": 1.98, + "learning_rate": 1.349850297054536e-05, + "loss": 0.1082, + "step": 42495 + }, + { + "epoch": 1.98, + "learning_rate": 1.3497719185490573e-05, + "loss": 0.0823, + "step": 42500 + }, + { + "epoch": 1.98, + "learning_rate": 1.3496935400435787e-05, + "loss": 0.0491, + "step": 42505 + }, + { + "epoch": 1.98, + "learning_rate": 1.3496151615380999e-05, + "loss": 0.1773, + "step": 42510 + }, + { + "epoch": 1.98, + "learning_rate": 1.3495367830326211e-05, + "loss": 0.1344, + "step": 42515 + }, + { + "epoch": 1.98, + "learning_rate": 1.3494584045271427e-05, + "loss": 0.0834, + "step": 42520 + }, + { + "epoch": 1.98, + "learning_rate": 1.3493800260216639e-05, + "loss": 0.2854, + "step": 42525 + }, + { + "epoch": 1.98, + "learning_rate": 1.3493016475161853e-05, + "loss": 0.3165, + "step": 42530 + }, + { + "epoch": 1.98, + "learning_rate": 1.3492232690107065e-05, + "loss": 0.2865, + "step": 42535 + }, + { + "epoch": 1.98, + "learning_rate": 1.349144890505228e-05, + "loss": 0.0551, + "step": 42540 + }, + { + "epoch": 1.99, + "learning_rate": 1.3490665119997493e-05, + "loss": 0.0634, + "step": 42545 + }, + { + "epoch": 1.99, + "learning_rate": 1.3489881334942707e-05, + "loss": 0.0897, + "step": 42550 + }, + { + "epoch": 1.99, + "learning_rate": 1.3489097549887919e-05, + "loss": 0.1488, + "step": 42555 + }, + { + "epoch": 1.99, + "learning_rate": 1.3488313764833134e-05, + "loss": 0.1682, + "step": 42560 + }, + { + "epoch": 1.99, + "learning_rate": 1.3487529979778347e-05, + "loss": 0.1566, + "step": 42565 + }, + { + "epoch": 1.99, + "learning_rate": 1.348674619472356e-05, + "loss": 0.1352, + "step": 42570 + }, + { + "epoch": 1.99, + "learning_rate": 1.3485962409668773e-05, + "loss": 0.2381, + "step": 42575 + }, + { + "epoch": 1.99, + "learning_rate": 1.3485178624613987e-05, + "loss": 0.3136, + "step": 42580 + }, + { + "epoch": 1.99, + "learning_rate": 1.34843948395592e-05, + "loss": 0.2361, + "step": 42585 + }, + { + "epoch": 1.99, + "learning_rate": 1.3483611054504413e-05, + "loss": 0.0656, + "step": 42590 + }, + { + "epoch": 1.99, + "learning_rate": 1.3482827269449628e-05, + "loss": 0.0353, + "step": 42595 + }, + { + "epoch": 1.99, + "learning_rate": 1.348204348439484e-05, + "loss": 0.0433, + "step": 42600 + }, + { + "epoch": 1.99, + "learning_rate": 1.3481259699340055e-05, + "loss": 0.0876, + "step": 42605 + }, + { + "epoch": 1.99, + "learning_rate": 1.3480475914285267e-05, + "loss": 0.1082, + "step": 42610 + }, + { + "epoch": 1.99, + "learning_rate": 1.3479692129230482e-05, + "loss": 0.1904, + "step": 42615 + }, + { + "epoch": 1.99, + "learning_rate": 1.3478908344175695e-05, + "loss": 0.2017, + "step": 42620 + }, + { + "epoch": 1.99, + "learning_rate": 1.3478124559120908e-05, + "loss": 0.1698, + "step": 42625 + }, + { + "epoch": 1.99, + "learning_rate": 1.347734077406612e-05, + "loss": 0.4722, + "step": 42630 + }, + { + "epoch": 1.99, + "learning_rate": 1.3476556989011336e-05, + "loss": 0.2895, + "step": 42635 + }, + { + "epoch": 1.99, + "learning_rate": 1.3475773203956548e-05, + "loss": 0.0633, + "step": 42640 + }, + { + "epoch": 1.99, + "learning_rate": 1.347498941890176e-05, + "loss": 0.0354, + "step": 42645 + }, + { + "epoch": 1.99, + "learning_rate": 1.3474205633846975e-05, + "loss": 0.0921, + "step": 42650 + }, + { + "epoch": 1.99, + "learning_rate": 1.3473421848792187e-05, + "loss": 0.0774, + "step": 42655 + }, + { + "epoch": 1.99, + "learning_rate": 1.3472638063737402e-05, + "loss": 0.1163, + "step": 42660 + }, + { + "epoch": 1.99, + "learning_rate": 1.3471854278682615e-05, + "loss": 0.1109, + "step": 42665 + }, + { + "epoch": 1.99, + "learning_rate": 1.3471070493627829e-05, + "loss": 0.1748, + "step": 42670 + }, + { + "epoch": 1.99, + "learning_rate": 1.347028670857304e-05, + "loss": 0.1627, + "step": 42675 + }, + { + "epoch": 1.99, + "learning_rate": 1.3469502923518256e-05, + "loss": 0.3621, + "step": 42680 + }, + { + "epoch": 1.99, + "learning_rate": 1.3468719138463469e-05, + "loss": 0.193, + "step": 42685 + }, + { + "epoch": 1.99, + "learning_rate": 1.3467935353408682e-05, + "loss": 0.0512, + "step": 42690 + }, + { + "epoch": 1.99, + "learning_rate": 1.3467151568353896e-05, + "loss": 0.0666, + "step": 42695 + }, + { + "epoch": 1.99, + "learning_rate": 1.346636778329911e-05, + "loss": 0.1495, + "step": 42700 + }, + { + "epoch": 1.99, + "learning_rate": 1.3465583998244322e-05, + "loss": 0.13, + "step": 42705 + }, + { + "epoch": 1.99, + "learning_rate": 1.3464800213189535e-05, + "loss": 0.0941, + "step": 42710 + }, + { + "epoch": 1.99, + "learning_rate": 1.346401642813475e-05, + "loss": 0.1118, + "step": 42715 + }, + { + "epoch": 1.99, + "learning_rate": 1.3463232643079963e-05, + "loss": 0.1672, + "step": 42720 + }, + { + "epoch": 1.99, + "learning_rate": 1.3462448858025176e-05, + "loss": 0.2903, + "step": 42725 + }, + { + "epoch": 1.99, + "learning_rate": 1.3461665072970389e-05, + "loss": 0.5016, + "step": 42730 + }, + { + "epoch": 1.99, + "learning_rate": 1.3460881287915604e-05, + "loss": 0.3474, + "step": 42735 + }, + { + "epoch": 1.99, + "learning_rate": 1.3460097502860816e-05, + "loss": 0.0593, + "step": 42740 + }, + { + "epoch": 1.99, + "learning_rate": 1.345931371780603e-05, + "loss": 0.0545, + "step": 42745 + }, + { + "epoch": 1.99, + "learning_rate": 1.3458529932751243e-05, + "loss": 0.0924, + "step": 42750 + }, + { + "epoch": 2.0, + "learning_rate": 1.3457746147696458e-05, + "loss": 0.0801, + "step": 42755 + }, + { + "epoch": 2.0, + "learning_rate": 1.345696236264167e-05, + "loss": 0.0866, + "step": 42760 + }, + { + "epoch": 2.0, + "learning_rate": 1.3456178577586884e-05, + "loss": 0.1346, + "step": 42765 + }, + { + "epoch": 2.0, + "learning_rate": 1.3455394792532096e-05, + "loss": 0.1853, + "step": 42770 + }, + { + "epoch": 2.0, + "learning_rate": 1.3454611007477309e-05, + "loss": 0.1966, + "step": 42775 + }, + { + "epoch": 2.0, + "learning_rate": 1.3453827222422524e-05, + "loss": 0.238, + "step": 42780 + }, + { + "epoch": 2.0, + "learning_rate": 1.3453043437367737e-05, + "loss": 0.2965, + "step": 42785 + }, + { + "epoch": 2.0, + "learning_rate": 1.345225965231295e-05, + "loss": 0.1616, + "step": 42790 + }, + { + "epoch": 2.0, + "learning_rate": 1.3451475867258164e-05, + "loss": 0.0177, + "step": 42795 + }, + { + "epoch": 2.0, + "learning_rate": 1.3450692082203378e-05, + "loss": 0.0763, + "step": 42800 + }, + { + "epoch": 2.0, + "learning_rate": 1.344990829714859e-05, + "loss": 0.108, + "step": 42805 + }, + { + "epoch": 2.0, + "learning_rate": 1.3449124512093806e-05, + "loss": 0.0828, + "step": 42810 + }, + { + "epoch": 2.0, + "learning_rate": 1.3448340727039018e-05, + "loss": 0.0873, + "step": 42815 + }, + { + "epoch": 2.0, + "learning_rate": 1.3447556941984232e-05, + "loss": 0.1054, + "step": 42820 + }, + { + "epoch": 2.0, + "learning_rate": 1.3446773156929444e-05, + "loss": 0.2872, + "step": 42825 + }, + { + "epoch": 2.0, + "learning_rate": 1.344598937187466e-05, + "loss": 0.2569, + "step": 42830 + }, + { + "epoch": 2.0, + "learning_rate": 1.3445205586819872e-05, + "loss": 0.2157, + "step": 42835 + }, + { + "epoch": 2.0, + "learning_rate": 1.3444421801765084e-05, + "loss": 0.074, + "step": 42840 + }, + { + "epoch": 2.0, + "learning_rate": 1.3443638016710298e-05, + "loss": 0.1238, + "step": 42845 + }, + { + "epoch": 2.0, + "learning_rate": 1.344285423165551e-05, + "loss": 0.1821, + "step": 42850 + }, + { + "epoch": 2.0, + "learning_rate": 1.3442070446600726e-05, + "loss": 0.0898, + "step": 42855 + }, + { + "epoch": 2.0, + "learning_rate": 1.3441286661545938e-05, + "loss": 0.269, + "step": 42860 + }, + { + "epoch": 2.0, + "eval_cer": 0.015201216458283512, + "eval_loss": 0.34811559319496155, + "eval_runtime": 472.4357, + "eval_samples_per_second": 40.323, + "eval_steps_per_second": 5.042, + "eval_wer": 0.12934863064396743, + "step": 42862 + }, + { + "epoch": 2.0, + "learning_rate": 1.3440502876491152e-05, + "loss": 0.3587, + "step": 42865 + }, + { + "epoch": 2.0, + "learning_rate": 1.3439719091436364e-05, + "loss": 0.1501, + "step": 42870 + }, + { + "epoch": 2.0, + "learning_rate": 1.343893530638158e-05, + "loss": 0.0647, + "step": 42875 + }, + { + "epoch": 2.0, + "learning_rate": 1.3438151521326792e-05, + "loss": 0.049, + "step": 42880 + }, + { + "epoch": 2.0, + "learning_rate": 1.3437367736272006e-05, + "loss": 0.0998, + "step": 42885 + }, + { + "epoch": 2.0, + "learning_rate": 1.3436583951217218e-05, + "loss": 0.0791, + "step": 42890 + }, + { + "epoch": 2.0, + "learning_rate": 1.3435800166162434e-05, + "loss": 0.2884, + "step": 42895 + }, + { + "epoch": 2.0, + "learning_rate": 1.3435016381107646e-05, + "loss": 0.1765, + "step": 42900 + }, + { + "epoch": 2.0, + "learning_rate": 1.3434232596052858e-05, + "loss": 0.1953, + "step": 42905 + }, + { + "epoch": 2.0, + "learning_rate": 1.3433448810998074e-05, + "loss": 0.2493, + "step": 42910 + }, + { + "epoch": 2.0, + "learning_rate": 1.3432665025943286e-05, + "loss": 0.2425, + "step": 42915 + }, + { + "epoch": 2.0, + "learning_rate": 1.34318812408885e-05, + "loss": 0.0484, + "step": 42920 + }, + { + "epoch": 2.0, + "learning_rate": 1.3431097455833712e-05, + "loss": 0.0687, + "step": 42925 + }, + { + "epoch": 2.0, + "learning_rate": 1.3430313670778928e-05, + "loss": 0.0949, + "step": 42930 + }, + { + "epoch": 2.0, + "learning_rate": 1.342952988572414e-05, + "loss": 0.0677, + "step": 42935 + }, + { + "epoch": 2.0, + "learning_rate": 1.3428746100669354e-05, + "loss": 0.1255, + "step": 42940 + }, + { + "epoch": 2.0, + "learning_rate": 1.3427962315614566e-05, + "loss": 0.1747, + "step": 42945 + }, + { + "epoch": 2.0, + "learning_rate": 1.3427178530559782e-05, + "loss": 0.1615, + "step": 42950 + }, + { + "epoch": 2.0, + "learning_rate": 1.3426394745504994e-05, + "loss": 0.2272, + "step": 42955 + }, + { + "epoch": 2.0, + "learning_rate": 1.3425610960450208e-05, + "loss": 0.4022, + "step": 42960 + }, + { + "epoch": 2.0, + "learning_rate": 1.342482717539542e-05, + "loss": 0.3077, + "step": 42965 + }, + { + "epoch": 2.01, + "learning_rate": 1.3424043390340632e-05, + "loss": 0.0336, + "step": 42970 + }, + { + "epoch": 2.01, + "learning_rate": 1.3423259605285848e-05, + "loss": 0.0245, + "step": 42975 + }, + { + "epoch": 2.01, + "learning_rate": 1.342247582023106e-05, + "loss": 0.0466, + "step": 42980 + }, + { + "epoch": 2.01, + "learning_rate": 1.3421692035176274e-05, + "loss": 0.049, + "step": 42985 + }, + { + "epoch": 2.01, + "learning_rate": 1.3420908250121486e-05, + "loss": 0.081, + "step": 42990 + }, + { + "epoch": 2.01, + "learning_rate": 1.3420124465066702e-05, + "loss": 0.2433, + "step": 42995 + }, + { + "epoch": 2.01, + "learning_rate": 1.3419340680011914e-05, + "loss": 0.0825, + "step": 43000 + }, + { + "epoch": 2.01, + "learning_rate": 1.3418556894957128e-05, + "loss": 0.176, + "step": 43005 + }, + { + "epoch": 2.01, + "learning_rate": 1.3417773109902342e-05, + "loss": 0.3964, + "step": 43010 + }, + { + "epoch": 2.01, + "learning_rate": 1.3416989324847556e-05, + "loss": 0.2797, + "step": 43015 + }, + { + "epoch": 2.01, + "learning_rate": 1.3416205539792768e-05, + "loss": 0.0541, + "step": 43020 + }, + { + "epoch": 2.01, + "learning_rate": 1.3415421754737984e-05, + "loss": 0.1003, + "step": 43025 + }, + { + "epoch": 2.01, + "learning_rate": 1.3414637969683196e-05, + "loss": 0.0925, + "step": 43030 + }, + { + "epoch": 2.01, + "learning_rate": 1.3413854184628408e-05, + "loss": 0.0643, + "step": 43035 + }, + { + "epoch": 2.01, + "learning_rate": 1.3413070399573622e-05, + "loss": 0.0862, + "step": 43040 + }, + { + "epoch": 2.01, + "learning_rate": 1.3412286614518834e-05, + "loss": 0.1016, + "step": 43045 + }, + { + "epoch": 2.01, + "learning_rate": 1.341150282946405e-05, + "loss": 0.1259, + "step": 43050 + }, + { + "epoch": 2.01, + "learning_rate": 1.3410719044409262e-05, + "loss": 0.1814, + "step": 43055 + }, + { + "epoch": 2.01, + "learning_rate": 1.3409935259354476e-05, + "loss": 0.2568, + "step": 43060 + }, + { + "epoch": 2.01, + "learning_rate": 1.3409151474299688e-05, + "loss": 0.3472, + "step": 43065 + }, + { + "epoch": 2.01, + "learning_rate": 1.3408367689244904e-05, + "loss": 0.0966, + "step": 43070 + }, + { + "epoch": 2.01, + "learning_rate": 1.3407583904190116e-05, + "loss": 0.0378, + "step": 43075 + }, + { + "epoch": 2.01, + "learning_rate": 1.340680011913533e-05, + "loss": 0.066, + "step": 43080 + }, + { + "epoch": 2.01, + "learning_rate": 1.3406016334080542e-05, + "loss": 0.1326, + "step": 43085 + }, + { + "epoch": 2.01, + "learning_rate": 1.3405232549025758e-05, + "loss": 0.0976, + "step": 43090 + }, + { + "epoch": 2.01, + "learning_rate": 1.340444876397097e-05, + "loss": 0.1264, + "step": 43095 + }, + { + "epoch": 2.01, + "learning_rate": 1.3403664978916182e-05, + "loss": 0.1619, + "step": 43100 + }, + { + "epoch": 2.01, + "learning_rate": 1.3402881193861396e-05, + "loss": 0.2066, + "step": 43105 + }, + { + "epoch": 2.01, + "learning_rate": 1.340209740880661e-05, + "loss": 0.2903, + "step": 43110 + }, + { + "epoch": 2.01, + "learning_rate": 1.3401313623751824e-05, + "loss": 0.2423, + "step": 43115 + }, + { + "epoch": 2.01, + "learning_rate": 1.3400529838697036e-05, + "loss": 0.0823, + "step": 43120 + }, + { + "epoch": 2.01, + "learning_rate": 1.3399746053642252e-05, + "loss": 0.0537, + "step": 43125 + }, + { + "epoch": 2.01, + "learning_rate": 1.3398962268587464e-05, + "loss": 0.061, + "step": 43130 + }, + { + "epoch": 2.01, + "learning_rate": 1.3398178483532678e-05, + "loss": 0.1014, + "step": 43135 + }, + { + "epoch": 2.01, + "learning_rate": 1.339739469847789e-05, + "loss": 0.1187, + "step": 43140 + }, + { + "epoch": 2.01, + "learning_rate": 1.3396610913423106e-05, + "loss": 0.0802, + "step": 43145 + }, + { + "epoch": 2.01, + "learning_rate": 1.3395827128368318e-05, + "loss": 0.1449, + "step": 43150 + }, + { + "epoch": 2.01, + "learning_rate": 1.3395043343313532e-05, + "loss": 0.2398, + "step": 43155 + }, + { + "epoch": 2.01, + "learning_rate": 1.3394259558258744e-05, + "loss": 0.1875, + "step": 43160 + }, + { + "epoch": 2.01, + "learning_rate": 1.3393475773203956e-05, + "loss": 0.3293, + "step": 43165 + }, + { + "epoch": 2.01, + "learning_rate": 1.3392691988149172e-05, + "loss": 0.0328, + "step": 43170 + }, + { + "epoch": 2.01, + "learning_rate": 1.3391908203094384e-05, + "loss": 0.0352, + "step": 43175 + }, + { + "epoch": 2.01, + "learning_rate": 1.3391124418039598e-05, + "loss": 0.0326, + "step": 43180 + }, + { + "epoch": 2.02, + "learning_rate": 1.339034063298481e-05, + "loss": 0.1078, + "step": 43185 + }, + { + "epoch": 2.02, + "learning_rate": 1.3389556847930026e-05, + "loss": 0.0485, + "step": 43190 + }, + { + "epoch": 2.02, + "learning_rate": 1.3388773062875238e-05, + "loss": 0.1721, + "step": 43195 + }, + { + "epoch": 2.02, + "learning_rate": 1.3387989277820452e-05, + "loss": 0.2037, + "step": 43200 + }, + { + "epoch": 2.02, + "learning_rate": 1.3387205492765664e-05, + "loss": 0.1506, + "step": 43205 + }, + { + "epoch": 2.02, + "learning_rate": 1.338642170771088e-05, + "loss": 0.2595, + "step": 43210 + }, + { + "epoch": 2.02, + "learning_rate": 1.3385637922656092e-05, + "loss": 0.2567, + "step": 43215 + }, + { + "epoch": 2.02, + "learning_rate": 1.3384854137601306e-05, + "loss": 0.0354, + "step": 43220 + }, + { + "epoch": 2.02, + "learning_rate": 1.338407035254652e-05, + "loss": 0.0445, + "step": 43225 + }, + { + "epoch": 2.02, + "learning_rate": 1.3383286567491732e-05, + "loss": 0.1386, + "step": 43230 + }, + { + "epoch": 2.02, + "learning_rate": 1.3382502782436946e-05, + "loss": 0.1286, + "step": 43235 + }, + { + "epoch": 2.02, + "learning_rate": 1.3381718997382158e-05, + "loss": 0.1383, + "step": 43240 + }, + { + "epoch": 2.02, + "learning_rate": 1.3380935212327373e-05, + "loss": 0.164, + "step": 43245 + }, + { + "epoch": 2.02, + "learning_rate": 1.3380151427272586e-05, + "loss": 0.1469, + "step": 43250 + }, + { + "epoch": 2.02, + "learning_rate": 1.33793676422178e-05, + "loss": 0.1615, + "step": 43255 + }, + { + "epoch": 2.02, + "learning_rate": 1.3378583857163012e-05, + "loss": 0.2817, + "step": 43260 + }, + { + "epoch": 2.02, + "learning_rate": 1.3377800072108227e-05, + "loss": 0.3087, + "step": 43265 + }, + { + "epoch": 2.02, + "learning_rate": 1.337701628705344e-05, + "loss": 0.0857, + "step": 43270 + }, + { + "epoch": 2.02, + "learning_rate": 1.3376232501998654e-05, + "loss": 0.0569, + "step": 43275 + }, + { + "epoch": 2.02, + "learning_rate": 1.3375448716943866e-05, + "loss": 0.0329, + "step": 43280 + }, + { + "epoch": 2.02, + "learning_rate": 1.3374664931889081e-05, + "loss": 0.0912, + "step": 43285 + }, + { + "epoch": 2.02, + "learning_rate": 1.3373881146834294e-05, + "loss": 0.0769, + "step": 43290 + }, + { + "epoch": 2.02, + "learning_rate": 1.3373097361779506e-05, + "loss": 0.0714, + "step": 43295 + }, + { + "epoch": 2.02, + "learning_rate": 1.337231357672472e-05, + "loss": 0.1705, + "step": 43300 + }, + { + "epoch": 2.02, + "learning_rate": 1.3371529791669932e-05, + "loss": 0.3426, + "step": 43305 + }, + { + "epoch": 2.02, + "learning_rate": 1.3370746006615147e-05, + "loss": 0.3034, + "step": 43310 + }, + { + "epoch": 2.02, + "learning_rate": 1.336996222156036e-05, + "loss": 0.2828, + "step": 43315 + }, + { + "epoch": 2.02, + "learning_rate": 1.3369178436505574e-05, + "loss": 0.0257, + "step": 43320 + }, + { + "epoch": 2.02, + "learning_rate": 1.3368394651450788e-05, + "loss": 0.0729, + "step": 43325 + }, + { + "epoch": 2.02, + "learning_rate": 1.3367610866396001e-05, + "loss": 0.132, + "step": 43330 + }, + { + "epoch": 2.02, + "learning_rate": 1.3366827081341214e-05, + "loss": 0.1212, + "step": 43335 + }, + { + "epoch": 2.02, + "learning_rate": 1.336604329628643e-05, + "loss": 0.1476, + "step": 43340 + }, + { + "epoch": 2.02, + "learning_rate": 1.3365259511231641e-05, + "loss": 0.1749, + "step": 43345 + }, + { + "epoch": 2.02, + "learning_rate": 1.3364475726176855e-05, + "loss": 0.2094, + "step": 43350 + }, + { + "epoch": 2.02, + "learning_rate": 1.3363691941122068e-05, + "loss": 0.1269, + "step": 43355 + }, + { + "epoch": 2.02, + "learning_rate": 1.336290815606728e-05, + "loss": 0.397, + "step": 43360 + }, + { + "epoch": 2.02, + "learning_rate": 1.3362124371012495e-05, + "loss": 0.3699, + "step": 43365 + }, + { + "epoch": 2.02, + "learning_rate": 1.3361340585957708e-05, + "loss": 0.0477, + "step": 43370 + }, + { + "epoch": 2.02, + "learning_rate": 1.3360556800902921e-05, + "loss": 0.1156, + "step": 43375 + }, + { + "epoch": 2.02, + "learning_rate": 1.3359773015848134e-05, + "loss": 0.0311, + "step": 43380 + }, + { + "epoch": 2.02, + "learning_rate": 1.335898923079335e-05, + "loss": 0.3633, + "step": 43385 + }, + { + "epoch": 2.02, + "learning_rate": 1.3358205445738562e-05, + "loss": 0.1132, + "step": 43390 + }, + { + "epoch": 2.02, + "learning_rate": 1.3357421660683775e-05, + "loss": 0.0839, + "step": 43395 + }, + { + "epoch": 2.03, + "learning_rate": 1.3356637875628988e-05, + "loss": 0.1844, + "step": 43400 + }, + { + "epoch": 2.03, + "learning_rate": 1.3355854090574203e-05, + "loss": 0.2091, + "step": 43405 + }, + { + "epoch": 2.03, + "learning_rate": 1.3355070305519415e-05, + "loss": 0.2798, + "step": 43410 + }, + { + "epoch": 2.03, + "learning_rate": 1.335428652046463e-05, + "loss": 0.2263, + "step": 43415 + }, + { + "epoch": 2.03, + "learning_rate": 1.3353502735409842e-05, + "loss": 0.0277, + "step": 43420 + }, + { + "epoch": 2.03, + "learning_rate": 1.3352718950355055e-05, + "loss": 0.0383, + "step": 43425 + }, + { + "epoch": 2.03, + "learning_rate": 1.335193516530027e-05, + "loss": 0.0466, + "step": 43430 + }, + { + "epoch": 2.03, + "learning_rate": 1.3351151380245482e-05, + "loss": 0.0454, + "step": 43435 + }, + { + "epoch": 2.03, + "learning_rate": 1.3350367595190697e-05, + "loss": 0.1619, + "step": 43440 + }, + { + "epoch": 2.03, + "learning_rate": 1.3349740567146868e-05, + "loss": 0.0808, + "step": 43445 + }, + { + "epoch": 2.03, + "learning_rate": 1.334895678209208e-05, + "loss": 0.1816, + "step": 43450 + }, + { + "epoch": 2.03, + "learning_rate": 1.3348172997037294e-05, + "loss": 0.2489, + "step": 43455 + }, + { + "epoch": 2.03, + "learning_rate": 1.3347389211982506e-05, + "loss": 0.2021, + "step": 43460 + }, + { + "epoch": 2.03, + "learning_rate": 1.3346762183938678e-05, + "loss": 0.3291, + "step": 43465 + }, + { + "epoch": 2.03, + "learning_rate": 1.3345978398883892e-05, + "loss": 0.0624, + "step": 43470 + }, + { + "epoch": 2.03, + "learning_rate": 1.3345194613829104e-05, + "loss": 0.0649, + "step": 43475 + }, + { + "epoch": 2.03, + "learning_rate": 1.334441082877432e-05, + "loss": 0.093, + "step": 43480 + }, + { + "epoch": 2.03, + "learning_rate": 1.3343627043719532e-05, + "loss": 0.124, + "step": 43485 + }, + { + "epoch": 2.03, + "learning_rate": 1.3342843258664744e-05, + "loss": 0.0791, + "step": 43490 + }, + { + "epoch": 2.03, + "learning_rate": 1.3342059473609958e-05, + "loss": 0.1554, + "step": 43495 + }, + { + "epoch": 2.03, + "learning_rate": 1.334127568855517e-05, + "loss": 0.1531, + "step": 43500 + }, + { + "epoch": 2.03, + "learning_rate": 1.3340491903500386e-05, + "loss": 0.1726, + "step": 43505 + }, + { + "epoch": 2.03, + "learning_rate": 1.3339708118445598e-05, + "loss": 0.3568, + "step": 43510 + }, + { + "epoch": 2.03, + "learning_rate": 1.3338924333390812e-05, + "loss": 0.3171, + "step": 43515 + }, + { + "epoch": 2.03, + "learning_rate": 1.3338140548336024e-05, + "loss": 0.0196, + "step": 43520 + }, + { + "epoch": 2.03, + "learning_rate": 1.333735676328124e-05, + "loss": 0.0832, + "step": 43525 + }, + { + "epoch": 2.03, + "learning_rate": 1.3336572978226452e-05, + "loss": 0.0747, + "step": 43530 + }, + { + "epoch": 2.03, + "learning_rate": 1.3335789193171666e-05, + "loss": 0.0519, + "step": 43535 + }, + { + "epoch": 2.03, + "learning_rate": 1.3335005408116878e-05, + "loss": 0.1477, + "step": 43540 + }, + { + "epoch": 2.03, + "learning_rate": 1.3334221623062094e-05, + "loss": 0.231, + "step": 43545 + }, + { + "epoch": 2.03, + "learning_rate": 1.3333437838007306e-05, + "loss": 0.2418, + "step": 43550 + }, + { + "epoch": 2.03, + "learning_rate": 1.3332654052952518e-05, + "loss": 0.2191, + "step": 43555 + }, + { + "epoch": 2.03, + "learning_rate": 1.3331870267897732e-05, + "loss": 0.396, + "step": 43560 + }, + { + "epoch": 2.03, + "learning_rate": 1.3331086482842946e-05, + "loss": 0.3743, + "step": 43565 + }, + { + "epoch": 2.03, + "learning_rate": 1.333030269778816e-05, + "loss": 0.0503, + "step": 43570 + }, + { + "epoch": 2.03, + "learning_rate": 1.3329518912733372e-05, + "loss": 0.0244, + "step": 43575 + }, + { + "epoch": 2.03, + "learning_rate": 1.3328735127678588e-05, + "loss": 0.0543, + "step": 43580 + }, + { + "epoch": 2.03, + "learning_rate": 1.33279513426238e-05, + "loss": 0.0519, + "step": 43585 + }, + { + "epoch": 2.03, + "learning_rate": 1.3327167557569014e-05, + "loss": 0.0916, + "step": 43590 + }, + { + "epoch": 2.03, + "learning_rate": 1.3326383772514226e-05, + "loss": 0.167, + "step": 43595 + }, + { + "epoch": 2.03, + "learning_rate": 1.3325599987459442e-05, + "loss": 0.1703, + "step": 43600 + }, + { + "epoch": 2.03, + "learning_rate": 1.3324816202404654e-05, + "loss": 0.1012, + "step": 43605 + }, + { + "epoch": 2.03, + "learning_rate": 1.3324032417349868e-05, + "loss": 0.1885, + "step": 43610 + }, + { + "epoch": 2.04, + "learning_rate": 1.332324863229508e-05, + "loss": 0.2534, + "step": 43615 + }, + { + "epoch": 2.04, + "learning_rate": 1.3322464847240292e-05, + "loss": 0.0711, + "step": 43620 + }, + { + "epoch": 2.04, + "learning_rate": 1.3321681062185508e-05, + "loss": 0.0885, + "step": 43625 + }, + { + "epoch": 2.04, + "learning_rate": 1.332089727713072e-05, + "loss": 0.0919, + "step": 43630 + }, + { + "epoch": 2.04, + "learning_rate": 1.3320113492075934e-05, + "loss": 0.0972, + "step": 43635 + }, + { + "epoch": 2.04, + "learning_rate": 1.3319329707021146e-05, + "loss": 0.0946, + "step": 43640 + }, + { + "epoch": 2.04, + "learning_rate": 1.3318545921966362e-05, + "loss": 0.0836, + "step": 43645 + }, + { + "epoch": 2.04, + "learning_rate": 1.3317762136911574e-05, + "loss": 0.1282, + "step": 43650 + }, + { + "epoch": 2.04, + "learning_rate": 1.3316978351856788e-05, + "loss": 0.1321, + "step": 43655 + }, + { + "epoch": 2.04, + "learning_rate": 1.3316194566802002e-05, + "loss": 0.266, + "step": 43660 + }, + { + "epoch": 2.04, + "learning_rate": 1.3315410781747216e-05, + "loss": 0.3727, + "step": 43665 + }, + { + "epoch": 2.04, + "learning_rate": 1.3314626996692428e-05, + "loss": 0.0841, + "step": 43670 + }, + { + "epoch": 2.04, + "learning_rate": 1.3313843211637642e-05, + "loss": 0.0567, + "step": 43675 + }, + { + "epoch": 2.04, + "learning_rate": 1.3313059426582856e-05, + "loss": 0.0792, + "step": 43680 + }, + { + "epoch": 2.04, + "learning_rate": 1.3312275641528068e-05, + "loss": 0.0857, + "step": 43685 + }, + { + "epoch": 2.04, + "learning_rate": 1.3311491856473282e-05, + "loss": 0.0897, + "step": 43690 + }, + { + "epoch": 2.04, + "learning_rate": 1.3310708071418494e-05, + "loss": 0.1323, + "step": 43695 + }, + { + "epoch": 2.04, + "learning_rate": 1.330992428636371e-05, + "loss": 0.3455, + "step": 43700 + }, + { + "epoch": 2.04, + "learning_rate": 1.3309140501308922e-05, + "loss": 0.1129, + "step": 43705 + }, + { + "epoch": 2.04, + "learning_rate": 1.3308356716254136e-05, + "loss": 0.2136, + "step": 43710 + }, + { + "epoch": 2.04, + "learning_rate": 1.3307572931199348e-05, + "loss": 0.3236, + "step": 43715 + }, + { + "epoch": 2.04, + "learning_rate": 1.3306789146144564e-05, + "loss": 0.0699, + "step": 43720 + }, + { + "epoch": 2.04, + "learning_rate": 1.3306005361089776e-05, + "loss": 0.063, + "step": 43725 + }, + { + "epoch": 2.04, + "learning_rate": 1.330522157603499e-05, + "loss": 0.0609, + "step": 43730 + }, + { + "epoch": 2.04, + "learning_rate": 1.3304437790980202e-05, + "loss": 0.0902, + "step": 43735 + }, + { + "epoch": 2.04, + "learning_rate": 1.3303654005925417e-05, + "loss": 0.0967, + "step": 43740 + }, + { + "epoch": 2.04, + "learning_rate": 1.330287022087063e-05, + "loss": 0.1716, + "step": 43745 + }, + { + "epoch": 2.04, + "learning_rate": 1.3302086435815842e-05, + "loss": 0.117, + "step": 43750 + }, + { + "epoch": 2.04, + "learning_rate": 1.3301302650761056e-05, + "loss": 0.2584, + "step": 43755 + }, + { + "epoch": 2.04, + "learning_rate": 1.330051886570627e-05, + "loss": 0.3534, + "step": 43760 + }, + { + "epoch": 2.04, + "learning_rate": 1.3299735080651484e-05, + "loss": 0.2967, + "step": 43765 + }, + { + "epoch": 2.04, + "learning_rate": 1.3298951295596696e-05, + "loss": 0.0437, + "step": 43770 + }, + { + "epoch": 2.04, + "learning_rate": 1.329816751054191e-05, + "loss": 0.0362, + "step": 43775 + }, + { + "epoch": 2.04, + "learning_rate": 1.3297383725487124e-05, + "loss": 0.0681, + "step": 43780 + }, + { + "epoch": 2.04, + "learning_rate": 1.3296599940432338e-05, + "loss": 0.0636, + "step": 43785 + }, + { + "epoch": 2.04, + "learning_rate": 1.329581615537755e-05, + "loss": 0.1009, + "step": 43790 + }, + { + "epoch": 2.04, + "learning_rate": 1.3295032370322765e-05, + "loss": 0.1697, + "step": 43795 + }, + { + "epoch": 2.04, + "learning_rate": 1.3294248585267978e-05, + "loss": 0.1545, + "step": 43800 + }, + { + "epoch": 2.04, + "learning_rate": 1.3293464800213191e-05, + "loss": 0.1749, + "step": 43805 + }, + { + "epoch": 2.04, + "learning_rate": 1.3292681015158404e-05, + "loss": 0.2246, + "step": 43810 + }, + { + "epoch": 2.04, + "learning_rate": 1.3291897230103616e-05, + "loss": 0.2266, + "step": 43815 + }, + { + "epoch": 2.04, + "learning_rate": 1.3291113445048832e-05, + "loss": 0.0738, + "step": 43820 + }, + { + "epoch": 2.04, + "learning_rate": 1.3290329659994044e-05, + "loss": 0.0333, + "step": 43825 + }, + { + "epoch": 2.05, + "learning_rate": 1.3289545874939258e-05, + "loss": 0.0439, + "step": 43830 + }, + { + "epoch": 2.05, + "learning_rate": 1.328876208988447e-05, + "loss": 0.0997, + "step": 43835 + }, + { + "epoch": 2.05, + "learning_rate": 1.3287978304829685e-05, + "loss": 0.0818, + "step": 43840 + }, + { + "epoch": 2.05, + "learning_rate": 1.3287194519774898e-05, + "loss": 0.0876, + "step": 43845 + }, + { + "epoch": 2.05, + "learning_rate": 1.3286410734720112e-05, + "loss": 0.1397, + "step": 43850 + }, + { + "epoch": 2.05, + "learning_rate": 1.3285626949665324e-05, + "loss": 0.1035, + "step": 43855 + }, + { + "epoch": 2.05, + "learning_rate": 1.328484316461054e-05, + "loss": 0.3954, + "step": 43860 + }, + { + "epoch": 2.05, + "learning_rate": 1.3284059379555752e-05, + "loss": 0.4025, + "step": 43865 + }, + { + "epoch": 2.05, + "learning_rate": 1.3283275594500965e-05, + "loss": 0.0292, + "step": 43870 + }, + { + "epoch": 2.05, + "learning_rate": 1.328249180944618e-05, + "loss": 0.091, + "step": 43875 + }, + { + "epoch": 2.05, + "learning_rate": 1.3281708024391392e-05, + "loss": 0.0692, + "step": 43880 + }, + { + "epoch": 2.05, + "learning_rate": 1.3280924239336606e-05, + "loss": 0.0826, + "step": 43885 + }, + { + "epoch": 2.05, + "learning_rate": 1.3280140454281818e-05, + "loss": 0.1142, + "step": 43890 + }, + { + "epoch": 2.05, + "learning_rate": 1.3279356669227033e-05, + "loss": 0.1234, + "step": 43895 + }, + { + "epoch": 2.05, + "learning_rate": 1.3278572884172246e-05, + "loss": 0.1834, + "step": 43900 + }, + { + "epoch": 2.05, + "learning_rate": 1.327778909911746e-05, + "loss": 0.227, + "step": 43905 + }, + { + "epoch": 2.05, + "learning_rate": 1.3277005314062672e-05, + "loss": 0.336, + "step": 43910 + }, + { + "epoch": 2.05, + "learning_rate": 1.3276221529007887e-05, + "loss": 0.2137, + "step": 43915 + }, + { + "epoch": 2.05, + "learning_rate": 1.32754377439531e-05, + "loss": 0.08, + "step": 43920 + }, + { + "epoch": 2.05, + "learning_rate": 1.3274653958898313e-05, + "loss": 0.0813, + "step": 43925 + }, + { + "epoch": 2.05, + "learning_rate": 1.3273870173843526e-05, + "loss": 0.0463, + "step": 43930 + }, + { + "epoch": 2.05, + "learning_rate": 1.3273086388788741e-05, + "loss": 0.0811, + "step": 43935 + }, + { + "epoch": 2.05, + "learning_rate": 1.3272302603733953e-05, + "loss": 0.0861, + "step": 43940 + }, + { + "epoch": 2.05, + "learning_rate": 1.3271518818679166e-05, + "loss": 0.1037, + "step": 43945 + }, + { + "epoch": 2.05, + "learning_rate": 1.327073503362438e-05, + "loss": 0.2025, + "step": 43950 + }, + { + "epoch": 2.05, + "learning_rate": 1.3269951248569592e-05, + "loss": 0.2942, + "step": 43955 + }, + { + "epoch": 2.05, + "learning_rate": 1.3269167463514807e-05, + "loss": 0.1693, + "step": 43960 + }, + { + "epoch": 2.05, + "learning_rate": 1.326838367846002e-05, + "loss": 0.269, + "step": 43965 + }, + { + "epoch": 2.05, + "learning_rate": 1.3267599893405233e-05, + "loss": 0.1356, + "step": 43970 + }, + { + "epoch": 2.05, + "learning_rate": 1.3266816108350447e-05, + "loss": 0.016, + "step": 43975 + }, + { + "epoch": 2.05, + "learning_rate": 1.3266032323295661e-05, + "loss": 0.0608, + "step": 43980 + }, + { + "epoch": 2.05, + "learning_rate": 1.3265248538240873e-05, + "loss": 0.0614, + "step": 43985 + }, + { + "epoch": 2.05, + "learning_rate": 1.3264464753186087e-05, + "loss": 0.0645, + "step": 43990 + }, + { + "epoch": 2.05, + "learning_rate": 1.3263680968131301e-05, + "loss": 0.1768, + "step": 43995 + }, + { + "epoch": 2.05, + "learning_rate": 1.3262897183076515e-05, + "loss": 0.0816, + "step": 44000 + }, + { + "epoch": 2.05, + "learning_rate": 1.3262113398021727e-05, + "loss": 0.121, + "step": 44005 + }, + { + "epoch": 2.05, + "learning_rate": 1.326132961296694e-05, + "loss": 0.1856, + "step": 44010 + }, + { + "epoch": 2.05, + "learning_rate": 1.3260545827912155e-05, + "loss": 0.2582, + "step": 44015 + }, + { + "epoch": 2.05, + "learning_rate": 1.3259762042857367e-05, + "loss": 0.0342, + "step": 44020 + }, + { + "epoch": 2.05, + "learning_rate": 1.3258978257802581e-05, + "loss": 0.0482, + "step": 44025 + }, + { + "epoch": 2.05, + "learning_rate": 1.3258194472747794e-05, + "loss": 0.066, + "step": 44030 + }, + { + "epoch": 2.05, + "learning_rate": 1.3257410687693009e-05, + "loss": 0.1002, + "step": 44035 + }, + { + "epoch": 2.05, + "learning_rate": 1.3256626902638221e-05, + "loss": 0.0774, + "step": 44040 + }, + { + "epoch": 2.06, + "learning_rate": 1.3255843117583435e-05, + "loss": 0.2158, + "step": 44045 + }, + { + "epoch": 2.06, + "learning_rate": 1.3255059332528647e-05, + "loss": 0.1574, + "step": 44050 + }, + { + "epoch": 2.06, + "learning_rate": 1.3254275547473863e-05, + "loss": 0.2037, + "step": 44055 + }, + { + "epoch": 2.06, + "learning_rate": 1.3253491762419075e-05, + "loss": 0.3914, + "step": 44060 + }, + { + "epoch": 2.06, + "learning_rate": 1.325270797736429e-05, + "loss": 0.299, + "step": 44065 + }, + { + "epoch": 2.06, + "learning_rate": 1.3251924192309501e-05, + "loss": 0.0783, + "step": 44070 + }, + { + "epoch": 2.06, + "learning_rate": 1.3251140407254715e-05, + "loss": 0.0358, + "step": 44075 + }, + { + "epoch": 2.06, + "learning_rate": 1.325035662219993e-05, + "loss": 0.0511, + "step": 44080 + }, + { + "epoch": 2.06, + "learning_rate": 1.3249572837145141e-05, + "loss": 0.0737, + "step": 44085 + }, + { + "epoch": 2.06, + "learning_rate": 1.3248789052090355e-05, + "loss": 0.0319, + "step": 44090 + }, + { + "epoch": 2.06, + "learning_rate": 1.324800526703557e-05, + "loss": 0.0438, + "step": 44095 + }, + { + "epoch": 2.06, + "learning_rate": 1.3247221481980783e-05, + "loss": 0.1289, + "step": 44100 + }, + { + "epoch": 2.06, + "learning_rate": 1.3246437696925995e-05, + "loss": 0.2054, + "step": 44105 + }, + { + "epoch": 2.06, + "learning_rate": 1.3245653911871211e-05, + "loss": 0.2511, + "step": 44110 + }, + { + "epoch": 2.06, + "learning_rate": 1.3244870126816423e-05, + "loss": 0.3578, + "step": 44115 + }, + { + "epoch": 2.06, + "learning_rate": 1.3244086341761637e-05, + "loss": 0.0513, + "step": 44120 + }, + { + "epoch": 2.06, + "learning_rate": 1.324330255670685e-05, + "loss": 0.0485, + "step": 44125 + }, + { + "epoch": 2.06, + "learning_rate": 1.3242518771652065e-05, + "loss": 0.0457, + "step": 44130 + }, + { + "epoch": 2.06, + "learning_rate": 1.3241734986597277e-05, + "loss": 0.0965, + "step": 44135 + }, + { + "epoch": 2.06, + "learning_rate": 1.324095120154249e-05, + "loss": 0.0919, + "step": 44140 + }, + { + "epoch": 2.06, + "learning_rate": 1.3240167416487703e-05, + "loss": 0.1236, + "step": 44145 + }, + { + "epoch": 2.06, + "learning_rate": 1.3239383631432915e-05, + "loss": 0.1037, + "step": 44150 + }, + { + "epoch": 2.06, + "learning_rate": 1.3238599846378131e-05, + "loss": 0.1805, + "step": 44155 + }, + { + "epoch": 2.06, + "learning_rate": 1.3237816061323343e-05, + "loss": 0.2902, + "step": 44160 + }, + { + "epoch": 2.06, + "learning_rate": 1.3237032276268557e-05, + "loss": 0.2851, + "step": 44165 + }, + { + "epoch": 2.06, + "learning_rate": 1.323624849121377e-05, + "loss": 0.0588, + "step": 44170 + }, + { + "epoch": 2.06, + "learning_rate": 1.3235464706158985e-05, + "loss": 0.1677, + "step": 44175 + }, + { + "epoch": 2.06, + "learning_rate": 1.3234680921104197e-05, + "loss": 0.0701, + "step": 44180 + }, + { + "epoch": 2.06, + "learning_rate": 1.3233897136049411e-05, + "loss": 0.0714, + "step": 44185 + }, + { + "epoch": 2.06, + "learning_rate": 1.3233113350994625e-05, + "loss": 0.0526, + "step": 44190 + }, + { + "epoch": 2.06, + "learning_rate": 1.3232329565939839e-05, + "loss": 0.0875, + "step": 44195 + }, + { + "epoch": 2.06, + "learning_rate": 1.3231545780885051e-05, + "loss": 0.1067, + "step": 44200 + }, + { + "epoch": 2.06, + "learning_rate": 1.3230761995830263e-05, + "loss": 0.1859, + "step": 44205 + }, + { + "epoch": 2.06, + "learning_rate": 1.3229978210775479e-05, + "loss": 0.2862, + "step": 44210 + }, + { + "epoch": 2.06, + "learning_rate": 1.3229194425720691e-05, + "loss": 0.3919, + "step": 44215 + }, + { + "epoch": 2.06, + "learning_rate": 1.3228410640665905e-05, + "loss": 0.0165, + "step": 44220 + }, + { + "epoch": 2.06, + "learning_rate": 1.3227626855611117e-05, + "loss": 0.0282, + "step": 44225 + }, + { + "epoch": 2.06, + "learning_rate": 1.3226843070556333e-05, + "loss": 0.1177, + "step": 44230 + }, + { + "epoch": 2.06, + "learning_rate": 1.3226059285501545e-05, + "loss": 0.0937, + "step": 44235 + }, + { + "epoch": 2.06, + "learning_rate": 1.3225275500446759e-05, + "loss": 0.1239, + "step": 44240 + }, + { + "epoch": 2.06, + "learning_rate": 1.3224491715391971e-05, + "loss": 0.1821, + "step": 44245 + }, + { + "epoch": 2.06, + "learning_rate": 1.3223707930337187e-05, + "loss": 0.205, + "step": 44250 + }, + { + "epoch": 2.06, + "learning_rate": 1.3222924145282399e-05, + "loss": 0.1652, + "step": 44255 + }, + { + "epoch": 2.07, + "learning_rate": 1.3222140360227613e-05, + "loss": 0.1785, + "step": 44260 + }, + { + "epoch": 2.07, + "learning_rate": 1.3221356575172825e-05, + "loss": 0.2491, + "step": 44265 + }, + { + "epoch": 2.07, + "learning_rate": 1.3220572790118037e-05, + "loss": 0.026, + "step": 44270 + }, + { + "epoch": 2.07, + "learning_rate": 1.3219789005063253e-05, + "loss": 0.0235, + "step": 44275 + }, + { + "epoch": 2.07, + "learning_rate": 1.3219005220008465e-05, + "loss": 0.0774, + "step": 44280 + }, + { + "epoch": 2.07, + "learning_rate": 1.3218221434953679e-05, + "loss": 0.08, + "step": 44285 + }, + { + "epoch": 2.07, + "learning_rate": 1.3217437649898893e-05, + "loss": 0.0742, + "step": 44290 + }, + { + "epoch": 2.07, + "learning_rate": 1.3216653864844107e-05, + "loss": 0.1891, + "step": 44295 + }, + { + "epoch": 2.07, + "learning_rate": 1.3215870079789319e-05, + "loss": 0.1566, + "step": 44300 + }, + { + "epoch": 2.07, + "learning_rate": 1.3215086294734533e-05, + "loss": 0.2266, + "step": 44305 + }, + { + "epoch": 2.07, + "learning_rate": 1.3214302509679747e-05, + "loss": 0.2815, + "step": 44310 + }, + { + "epoch": 2.07, + "learning_rate": 1.321351872462496e-05, + "loss": 0.2934, + "step": 44315 + }, + { + "epoch": 2.07, + "learning_rate": 1.3212734939570173e-05, + "loss": 0.0386, + "step": 44320 + }, + { + "epoch": 2.07, + "learning_rate": 1.3211951154515389e-05, + "loss": 0.079, + "step": 44325 + }, + { + "epoch": 2.07, + "learning_rate": 1.32111673694606e-05, + "loss": 0.0483, + "step": 44330 + }, + { + "epoch": 2.07, + "learning_rate": 1.3210383584405813e-05, + "loss": 0.12, + "step": 44335 + }, + { + "epoch": 2.07, + "learning_rate": 1.3209599799351027e-05, + "loss": 0.1331, + "step": 44340 + }, + { + "epoch": 2.07, + "learning_rate": 1.3208816014296239e-05, + "loss": 0.1102, + "step": 44345 + }, + { + "epoch": 2.07, + "learning_rate": 1.3208032229241455e-05, + "loss": 0.116, + "step": 44350 + }, + { + "epoch": 2.07, + "learning_rate": 1.3207248444186667e-05, + "loss": 0.1937, + "step": 44355 + }, + { + "epoch": 2.07, + "learning_rate": 1.320646465913188e-05, + "loss": 0.2692, + "step": 44360 + }, + { + "epoch": 2.07, + "learning_rate": 1.3205837631088051e-05, + "loss": 0.3699, + "step": 44365 + }, + { + "epoch": 2.07, + "learning_rate": 1.3205053846033265e-05, + "loss": 0.0153, + "step": 44370 + }, + { + "epoch": 2.07, + "learning_rate": 1.3204270060978479e-05, + "loss": 0.0688, + "step": 44375 + }, + { + "epoch": 2.07, + "learning_rate": 1.3203486275923691e-05, + "loss": 0.0807, + "step": 44380 + }, + { + "epoch": 2.07, + "learning_rate": 1.3202702490868907e-05, + "loss": 0.0753, + "step": 44385 + }, + { + "epoch": 2.07, + "learning_rate": 1.3201918705814119e-05, + "loss": 0.083, + "step": 44390 + }, + { + "epoch": 2.07, + "learning_rate": 1.3201134920759333e-05, + "loss": 0.165, + "step": 44395 + }, + { + "epoch": 2.07, + "learning_rate": 1.3200351135704545e-05, + "loss": 0.169, + "step": 44400 + }, + { + "epoch": 2.07, + "learning_rate": 1.3199567350649757e-05, + "loss": 0.2277, + "step": 44405 + }, + { + "epoch": 2.07, + "learning_rate": 1.3198783565594973e-05, + "loss": 0.3473, + "step": 44410 + }, + { + "epoch": 2.07, + "learning_rate": 1.3197999780540185e-05, + "loss": 0.2985, + "step": 44415 + }, + { + "epoch": 2.07, + "learning_rate": 1.3197215995485399e-05, + "loss": 0.0606, + "step": 44420 + }, + { + "epoch": 2.07, + "learning_rate": 1.3196432210430611e-05, + "loss": 0.0483, + "step": 44425 + }, + { + "epoch": 2.07, + "learning_rate": 1.3195648425375827e-05, + "loss": 0.0787, + "step": 44430 + }, + { + "epoch": 2.07, + "learning_rate": 1.319486464032104e-05, + "loss": 0.1182, + "step": 44435 + }, + { + "epoch": 2.07, + "learning_rate": 1.3194080855266253e-05, + "loss": 0.152, + "step": 44440 + }, + { + "epoch": 2.07, + "learning_rate": 1.3193297070211465e-05, + "loss": 0.1204, + "step": 44445 + }, + { + "epoch": 2.07, + "learning_rate": 1.3192513285156681e-05, + "loss": 0.0873, + "step": 44450 + }, + { + "epoch": 2.07, + "learning_rate": 1.3191729500101893e-05, + "loss": 0.1337, + "step": 44455 + }, + { + "epoch": 2.07, + "learning_rate": 1.3190945715047107e-05, + "loss": 0.2727, + "step": 44460 + }, + { + "epoch": 2.07, + "learning_rate": 1.319016192999232e-05, + "loss": 0.2811, + "step": 44465 + }, + { + "epoch": 2.08, + "learning_rate": 1.3189378144937533e-05, + "loss": 0.0753, + "step": 44470 + }, + { + "epoch": 2.08, + "learning_rate": 1.3188594359882747e-05, + "loss": 0.0589, + "step": 44475 + }, + { + "epoch": 2.08, + "learning_rate": 1.318781057482796e-05, + "loss": 0.1141, + "step": 44480 + }, + { + "epoch": 2.08, + "learning_rate": 1.3187026789773175e-05, + "loss": 0.0495, + "step": 44485 + }, + { + "epoch": 2.08, + "learning_rate": 1.3186243004718387e-05, + "loss": 0.0716, + "step": 44490 + }, + { + "epoch": 2.08, + "learning_rate": 1.3185459219663601e-05, + "loss": 0.1369, + "step": 44495 + }, + { + "epoch": 2.08, + "learning_rate": 1.3184675434608813e-05, + "loss": 0.1567, + "step": 44500 + }, + { + "epoch": 2.08, + "learning_rate": 1.3183891649554029e-05, + "loss": 0.1988, + "step": 44505 + }, + { + "epoch": 2.08, + "learning_rate": 1.3183107864499241e-05, + "loss": 0.3542, + "step": 44510 + }, + { + "epoch": 2.08, + "learning_rate": 1.3182324079444455e-05, + "loss": 0.3402, + "step": 44515 + }, + { + "epoch": 2.08, + "learning_rate": 1.3181540294389667e-05, + "loss": 0.0784, + "step": 44520 + }, + { + "epoch": 2.08, + "learning_rate": 1.3180756509334883e-05, + "loss": 0.0768, + "step": 44525 + }, + { + "epoch": 2.08, + "learning_rate": 1.3179972724280095e-05, + "loss": 0.0386, + "step": 44530 + }, + { + "epoch": 2.08, + "learning_rate": 1.3179188939225307e-05, + "loss": 0.0284, + "step": 44535 + }, + { + "epoch": 2.08, + "learning_rate": 1.3178405154170521e-05, + "loss": 0.0668, + "step": 44540 + }, + { + "epoch": 2.08, + "learning_rate": 1.3177621369115733e-05, + "loss": 0.1517, + "step": 44545 + }, + { + "epoch": 2.08, + "learning_rate": 1.3176837584060949e-05, + "loss": 0.1934, + "step": 44550 + }, + { + "epoch": 2.08, + "learning_rate": 1.3176053799006161e-05, + "loss": 0.321, + "step": 44555 + }, + { + "epoch": 2.08, + "learning_rate": 1.3175270013951375e-05, + "loss": 0.3407, + "step": 44560 + }, + { + "epoch": 2.08, + "learning_rate": 1.3174486228896587e-05, + "loss": 0.2514, + "step": 44565 + }, + { + "epoch": 2.08, + "learning_rate": 1.3173702443841803e-05, + "loss": 0.041, + "step": 44570 + }, + { + "epoch": 2.08, + "learning_rate": 1.3172918658787015e-05, + "loss": 0.0766, + "step": 44575 + }, + { + "epoch": 2.08, + "learning_rate": 1.3172134873732229e-05, + "loss": 0.0465, + "step": 44580 + }, + { + "epoch": 2.08, + "learning_rate": 1.3171351088677443e-05, + "loss": 0.1087, + "step": 44585 + }, + { + "epoch": 2.08, + "learning_rate": 1.3170567303622657e-05, + "loss": 0.1005, + "step": 44590 + }, + { + "epoch": 2.08, + "learning_rate": 1.3169783518567869e-05, + "loss": 0.1296, + "step": 44595 + }, + { + "epoch": 2.08, + "learning_rate": 1.3168999733513081e-05, + "loss": 0.1509, + "step": 44600 + }, + { + "epoch": 2.08, + "learning_rate": 1.3168215948458297e-05, + "loss": 0.1615, + "step": 44605 + }, + { + "epoch": 2.08, + "learning_rate": 1.3167432163403509e-05, + "loss": 0.1695, + "step": 44610 + }, + { + "epoch": 2.08, + "learning_rate": 1.3166648378348723e-05, + "loss": 0.2675, + "step": 44615 + }, + { + "epoch": 2.08, + "learning_rate": 1.3165864593293935e-05, + "loss": 0.0326, + "step": 44620 + }, + { + "epoch": 2.08, + "learning_rate": 1.316508080823915e-05, + "loss": 0.065, + "step": 44625 + }, + { + "epoch": 2.08, + "learning_rate": 1.3164297023184363e-05, + "loss": 0.0504, + "step": 44630 + }, + { + "epoch": 2.08, + "learning_rate": 1.3163513238129577e-05, + "loss": 0.0747, + "step": 44635 + }, + { + "epoch": 2.08, + "learning_rate": 1.3162729453074789e-05, + "loss": 0.064, + "step": 44640 + }, + { + "epoch": 2.08, + "learning_rate": 1.3161945668020005e-05, + "loss": 0.1414, + "step": 44645 + }, + { + "epoch": 2.08, + "learning_rate": 1.3161161882965217e-05, + "loss": 0.2267, + "step": 44650 + }, + { + "epoch": 2.08, + "learning_rate": 1.316037809791043e-05, + "loss": 0.1493, + "step": 44655 + }, + { + "epoch": 2.08, + "learning_rate": 1.3159594312855643e-05, + "loss": 0.3172, + "step": 44660 + }, + { + "epoch": 2.08, + "learning_rate": 1.3158810527800857e-05, + "loss": 0.2474, + "step": 44665 + }, + { + "epoch": 2.08, + "learning_rate": 1.315802674274607e-05, + "loss": 0.0665, + "step": 44670 + }, + { + "epoch": 2.08, + "learning_rate": 1.3157242957691283e-05, + "loss": 0.0464, + "step": 44675 + }, + { + "epoch": 2.08, + "learning_rate": 1.3156459172636497e-05, + "loss": 0.0733, + "step": 44680 + }, + { + "epoch": 2.09, + "learning_rate": 1.315567538758171e-05, + "loss": 0.1709, + "step": 44685 + }, + { + "epoch": 2.09, + "learning_rate": 1.3154891602526925e-05, + "loss": 0.0666, + "step": 44690 + }, + { + "epoch": 2.09, + "learning_rate": 1.3154107817472137e-05, + "loss": 0.0916, + "step": 44695 + }, + { + "epoch": 2.09, + "learning_rate": 1.3153324032417352e-05, + "loss": 0.0939, + "step": 44700 + }, + { + "epoch": 2.09, + "learning_rate": 1.3152540247362565e-05, + "loss": 0.155, + "step": 44705 + }, + { + "epoch": 2.09, + "learning_rate": 1.3151756462307779e-05, + "loss": 0.2603, + "step": 44710 + }, + { + "epoch": 2.09, + "learning_rate": 1.315097267725299e-05, + "loss": 0.2802, + "step": 44715 + }, + { + "epoch": 2.09, + "learning_rate": 1.3150188892198206e-05, + "loss": 0.0517, + "step": 44720 + }, + { + "epoch": 2.09, + "learning_rate": 1.3149405107143419e-05, + "loss": 0.0623, + "step": 44725 + }, + { + "epoch": 2.09, + "learning_rate": 1.314862132208863e-05, + "loss": 0.0714, + "step": 44730 + }, + { + "epoch": 2.09, + "learning_rate": 1.3147837537033845e-05, + "loss": 0.0585, + "step": 44735 + }, + { + "epoch": 2.09, + "learning_rate": 1.3147053751979057e-05, + "loss": 0.1013, + "step": 44740 + }, + { + "epoch": 2.09, + "learning_rate": 1.3146269966924273e-05, + "loss": 0.0922, + "step": 44745 + }, + { + "epoch": 2.09, + "learning_rate": 1.3145486181869485e-05, + "loss": 0.1085, + "step": 44750 + }, + { + "epoch": 2.09, + "learning_rate": 1.3144702396814699e-05, + "loss": 0.1422, + "step": 44755 + }, + { + "epoch": 2.09, + "learning_rate": 1.3143918611759911e-05, + "loss": 0.2372, + "step": 44760 + }, + { + "epoch": 2.09, + "learning_rate": 1.3143134826705126e-05, + "loss": 0.3727, + "step": 44765 + }, + { + "epoch": 2.09, + "learning_rate": 1.3142351041650339e-05, + "loss": 0.1013, + "step": 44770 + }, + { + "epoch": 2.09, + "learning_rate": 1.3141567256595553e-05, + "loss": 0.0361, + "step": 44775 + }, + { + "epoch": 2.09, + "learning_rate": 1.3140783471540765e-05, + "loss": 0.088, + "step": 44780 + }, + { + "epoch": 2.09, + "learning_rate": 1.313999968648598e-05, + "loss": 0.0731, + "step": 44785 + }, + { + "epoch": 2.09, + "learning_rate": 1.3139215901431193e-05, + "loss": 0.0661, + "step": 44790 + }, + { + "epoch": 2.09, + "learning_rate": 1.3138432116376405e-05, + "loss": 0.1273, + "step": 44795 + }, + { + "epoch": 2.09, + "learning_rate": 1.313764833132162e-05, + "loss": 0.1032, + "step": 44800 + }, + { + "epoch": 2.09, + "learning_rate": 1.3136864546266833e-05, + "loss": 0.2088, + "step": 44805 + }, + { + "epoch": 2.09, + "learning_rate": 1.3136080761212047e-05, + "loss": 0.3149, + "step": 44810 + }, + { + "epoch": 2.09, + "learning_rate": 1.3135296976157259e-05, + "loss": 0.3955, + "step": 44815 + }, + { + "epoch": 2.09, + "learning_rate": 1.3134513191102474e-05, + "loss": 0.0629, + "step": 44820 + }, + { + "epoch": 2.09, + "learning_rate": 1.3133729406047687e-05, + "loss": 0.0434, + "step": 44825 + }, + { + "epoch": 2.09, + "learning_rate": 1.31329456209929e-05, + "loss": 0.1228, + "step": 44830 + }, + { + "epoch": 2.09, + "learning_rate": 1.3132161835938113e-05, + "loss": 0.0403, + "step": 44835 + }, + { + "epoch": 2.09, + "learning_rate": 1.3131378050883328e-05, + "loss": 0.0699, + "step": 44840 + }, + { + "epoch": 2.09, + "learning_rate": 1.313059426582854e-05, + "loss": 0.1441, + "step": 44845 + }, + { + "epoch": 2.09, + "learning_rate": 1.3129810480773754e-05, + "loss": 0.1262, + "step": 44850 + }, + { + "epoch": 2.09, + "learning_rate": 1.3129026695718967e-05, + "loss": 0.1689, + "step": 44855 + }, + { + "epoch": 2.09, + "learning_rate": 1.3128242910664179e-05, + "loss": 0.2219, + "step": 44860 + }, + { + "epoch": 2.09, + "learning_rate": 1.3127459125609394e-05, + "loss": 0.3262, + "step": 44865 + }, + { + "epoch": 2.09, + "learning_rate": 1.3126675340554607e-05, + "loss": 0.0307, + "step": 44870 + }, + { + "epoch": 2.09, + "learning_rate": 1.312589155549982e-05, + "loss": 0.0441, + "step": 44875 + }, + { + "epoch": 2.09, + "learning_rate": 1.3125107770445034e-05, + "loss": 0.0356, + "step": 44880 + }, + { + "epoch": 2.09, + "learning_rate": 1.3124323985390248e-05, + "loss": 0.0829, + "step": 44885 + }, + { + "epoch": 2.09, + "learning_rate": 1.312354020033546e-05, + "loss": 0.0608, + "step": 44890 + }, + { + "epoch": 2.09, + "learning_rate": 1.3122756415280674e-05, + "loss": 0.1429, + "step": 44895 + }, + { + "epoch": 2.1, + "learning_rate": 1.3121972630225888e-05, + "loss": 0.125, + "step": 44900 + }, + { + "epoch": 2.1, + "learning_rate": 1.3121188845171102e-05, + "loss": 0.1752, + "step": 44905 + }, + { + "epoch": 2.1, + "learning_rate": 1.3120405060116314e-05, + "loss": 0.1865, + "step": 44910 + }, + { + "epoch": 2.1, + "learning_rate": 1.311962127506153e-05, + "loss": 0.358, + "step": 44915 + }, + { + "epoch": 2.1, + "learning_rate": 1.3118837490006742e-05, + "loss": 0.0342, + "step": 44920 + }, + { + "epoch": 2.1, + "learning_rate": 1.3118053704951955e-05, + "loss": 0.0759, + "step": 44925 + }, + { + "epoch": 2.1, + "learning_rate": 1.3117269919897168e-05, + "loss": 0.0653, + "step": 44930 + }, + { + "epoch": 2.1, + "learning_rate": 1.311648613484238e-05, + "loss": 0.0713, + "step": 44935 + }, + { + "epoch": 2.1, + "learning_rate": 1.3115702349787596e-05, + "loss": 0.0867, + "step": 44940 + }, + { + "epoch": 2.1, + "learning_rate": 1.3114918564732808e-05, + "loss": 0.0849, + "step": 44945 + }, + { + "epoch": 2.1, + "learning_rate": 1.3114134779678022e-05, + "loss": 0.175, + "step": 44950 + }, + { + "epoch": 2.1, + "learning_rate": 1.3113350994623235e-05, + "loss": 0.1791, + "step": 44955 + }, + { + "epoch": 2.1, + "learning_rate": 1.311256720956845e-05, + "loss": 0.1874, + "step": 44960 + }, + { + "epoch": 2.1, + "learning_rate": 1.3111783424513662e-05, + "loss": 0.2178, + "step": 44965 + }, + { + "epoch": 2.1, + "learning_rate": 1.3110999639458876e-05, + "loss": 0.0445, + "step": 44970 + }, + { + "epoch": 2.1, + "learning_rate": 1.3110215854404088e-05, + "loss": 0.0422, + "step": 44975 + }, + { + "epoch": 2.1, + "learning_rate": 1.3109432069349304e-05, + "loss": 0.0326, + "step": 44980 + }, + { + "epoch": 2.1, + "learning_rate": 1.3108648284294516e-05, + "loss": 0.0743, + "step": 44985 + }, + { + "epoch": 2.1, + "learning_rate": 1.3107864499239729e-05, + "loss": 0.1022, + "step": 44990 + }, + { + "epoch": 2.1, + "learning_rate": 1.3107080714184942e-05, + "loss": 0.1278, + "step": 44995 + }, + { + "epoch": 2.1, + "learning_rate": 1.3106296929130156e-05, + "loss": 0.1631, + "step": 45000 + }, + { + "epoch": 2.1, + "learning_rate": 1.310551314407537e-05, + "loss": 0.1475, + "step": 45005 + }, + { + "epoch": 2.1, + "learning_rate": 1.3104729359020582e-05, + "loss": 0.2213, + "step": 45010 + }, + { + "epoch": 2.1, + "learning_rate": 1.3103945573965798e-05, + "loss": 0.3505, + "step": 45015 + }, + { + "epoch": 2.1, + "learning_rate": 1.310316178891101e-05, + "loss": 0.0454, + "step": 45020 + }, + { + "epoch": 2.1, + "learning_rate": 1.3102378003856224e-05, + "loss": 0.0718, + "step": 45025 + }, + { + "epoch": 2.1, + "learning_rate": 1.3101594218801436e-05, + "loss": 0.0849, + "step": 45030 + }, + { + "epoch": 2.1, + "learning_rate": 1.3100810433746652e-05, + "loss": 0.0966, + "step": 45035 + }, + { + "epoch": 2.1, + "learning_rate": 1.3100026648691864e-05, + "loss": 0.1208, + "step": 45040 + }, + { + "epoch": 2.1, + "learning_rate": 1.3099242863637078e-05, + "loss": 0.1753, + "step": 45045 + }, + { + "epoch": 2.1, + "learning_rate": 1.309845907858229e-05, + "loss": 0.2323, + "step": 45050 + }, + { + "epoch": 2.1, + "learning_rate": 1.3097675293527503e-05, + "loss": 0.2094, + "step": 45055 + }, + { + "epoch": 2.1, + "learning_rate": 1.3096891508472718e-05, + "loss": 0.2613, + "step": 45060 + }, + { + "epoch": 2.1, + "learning_rate": 1.309610772341793e-05, + "loss": 0.3646, + "step": 45065 + }, + { + "epoch": 2.1, + "learning_rate": 1.3095323938363144e-05, + "loss": 0.0468, + "step": 45070 + }, + { + "epoch": 2.1, + "learning_rate": 1.3094540153308356e-05, + "loss": 0.0398, + "step": 45075 + }, + { + "epoch": 2.1, + "learning_rate": 1.3093756368253572e-05, + "loss": 0.0819, + "step": 45080 + }, + { + "epoch": 2.1, + "learning_rate": 1.3092972583198784e-05, + "loss": 0.0739, + "step": 45085 + }, + { + "epoch": 2.1, + "learning_rate": 1.3092188798143998e-05, + "loss": 0.1802, + "step": 45090 + }, + { + "epoch": 2.1, + "learning_rate": 1.309140501308921e-05, + "loss": 0.1384, + "step": 45095 + }, + { + "epoch": 2.1, + "learning_rate": 1.3090621228034426e-05, + "loss": 0.1445, + "step": 45100 + }, + { + "epoch": 2.1, + "learning_rate": 1.3089837442979638e-05, + "loss": 0.1752, + "step": 45105 + }, + { + "epoch": 2.1, + "learning_rate": 1.3089053657924852e-05, + "loss": 0.4362, + "step": 45110 + }, + { + "epoch": 2.11, + "learning_rate": 1.3088269872870066e-05, + "loss": 0.3223, + "step": 45115 + }, + { + "epoch": 2.11, + "learning_rate": 1.3087486087815278e-05, + "loss": 0.0303, + "step": 45120 + }, + { + "epoch": 2.11, + "learning_rate": 1.3086702302760492e-05, + "loss": 0.047, + "step": 45125 + }, + { + "epoch": 2.11, + "learning_rate": 1.3085918517705704e-05, + "loss": 0.0458, + "step": 45130 + }, + { + "epoch": 2.11, + "learning_rate": 1.308513473265092e-05, + "loss": 0.097, + "step": 45135 + }, + { + "epoch": 2.11, + "learning_rate": 1.3084350947596132e-05, + "loss": 0.1045, + "step": 45140 + }, + { + "epoch": 2.11, + "learning_rate": 1.3083567162541346e-05, + "loss": 0.1566, + "step": 45145 + }, + { + "epoch": 2.11, + "learning_rate": 1.3082783377486558e-05, + "loss": 0.098, + "step": 45150 + }, + { + "epoch": 2.11, + "learning_rate": 1.3081999592431774e-05, + "loss": 0.1235, + "step": 45155 + }, + { + "epoch": 2.11, + "learning_rate": 1.3081215807376986e-05, + "loss": 0.2617, + "step": 45160 + }, + { + "epoch": 2.11, + "learning_rate": 1.30804320223222e-05, + "loss": 0.2604, + "step": 45165 + }, + { + "epoch": 2.11, + "learning_rate": 1.3079648237267412e-05, + "loss": 0.0638, + "step": 45170 + }, + { + "epoch": 2.11, + "learning_rate": 1.3078864452212628e-05, + "loss": 0.0437, + "step": 45175 + }, + { + "epoch": 2.11, + "learning_rate": 1.307808066715784e-05, + "loss": 0.125, + "step": 45180 + }, + { + "epoch": 2.11, + "learning_rate": 1.3077296882103052e-05, + "loss": 0.0794, + "step": 45185 + }, + { + "epoch": 2.11, + "learning_rate": 1.3076513097048266e-05, + "loss": 0.0686, + "step": 45190 + }, + { + "epoch": 2.11, + "learning_rate": 1.307572931199348e-05, + "loss": 0.1249, + "step": 45195 + }, + { + "epoch": 2.11, + "learning_rate": 1.3074945526938694e-05, + "loss": 0.1045, + "step": 45200 + }, + { + "epoch": 2.11, + "learning_rate": 1.3074161741883906e-05, + "loss": 0.151, + "step": 45205 + }, + { + "epoch": 2.11, + "learning_rate": 1.307337795682912e-05, + "loss": 0.2116, + "step": 45210 + }, + { + "epoch": 2.11, + "learning_rate": 1.3072594171774334e-05, + "loss": 0.3223, + "step": 45215 + }, + { + "epoch": 2.11, + "learning_rate": 1.3071810386719548e-05, + "loss": 0.1557, + "step": 45220 + }, + { + "epoch": 2.11, + "learning_rate": 1.307102660166476e-05, + "loss": 0.0382, + "step": 45225 + }, + { + "epoch": 2.11, + "learning_rate": 1.3070242816609976e-05, + "loss": 0.0262, + "step": 45230 + }, + { + "epoch": 2.11, + "learning_rate": 1.3069459031555188e-05, + "loss": 0.1138, + "step": 45235 + }, + { + "epoch": 2.11, + "learning_rate": 1.3068675246500402e-05, + "loss": 0.0946, + "step": 45240 + }, + { + "epoch": 2.11, + "learning_rate": 1.3067891461445614e-05, + "loss": 0.0577, + "step": 45245 + }, + { + "epoch": 2.11, + "learning_rate": 1.3067107676390826e-05, + "loss": 0.125, + "step": 45250 + }, + { + "epoch": 2.11, + "learning_rate": 1.3066323891336042e-05, + "loss": 0.0984, + "step": 45255 + }, + { + "epoch": 2.11, + "learning_rate": 1.3065540106281254e-05, + "loss": 0.3112, + "step": 45260 + }, + { + "epoch": 2.11, + "learning_rate": 1.3064756321226468e-05, + "loss": 0.4456, + "step": 45265 + }, + { + "epoch": 2.11, + "learning_rate": 1.306397253617168e-05, + "loss": 0.1024, + "step": 45270 + }, + { + "epoch": 2.11, + "learning_rate": 1.3063188751116896e-05, + "loss": 0.0654, + "step": 45275 + }, + { + "epoch": 2.11, + "learning_rate": 1.3062404966062108e-05, + "loss": 0.063, + "step": 45280 + }, + { + "epoch": 2.11, + "learning_rate": 1.3061621181007322e-05, + "loss": 0.073, + "step": 45285 + }, + { + "epoch": 2.11, + "learning_rate": 1.3060837395952534e-05, + "loss": 0.0433, + "step": 45290 + }, + { + "epoch": 2.11, + "learning_rate": 1.306005361089775e-05, + "loss": 0.0807, + "step": 45295 + }, + { + "epoch": 2.11, + "learning_rate": 1.3059269825842962e-05, + "loss": 0.1388, + "step": 45300 + }, + { + "epoch": 2.11, + "learning_rate": 1.3058486040788176e-05, + "loss": 0.1288, + "step": 45305 + }, + { + "epoch": 2.11, + "learning_rate": 1.3057702255733388e-05, + "loss": 0.1973, + "step": 45310 + }, + { + "epoch": 2.11, + "learning_rate": 1.3056918470678602e-05, + "loss": 0.2249, + "step": 45315 + }, + { + "epoch": 2.11, + "learning_rate": 1.3056134685623816e-05, + "loss": 0.0417, + "step": 45320 + }, + { + "epoch": 2.11, + "learning_rate": 1.3055350900569028e-05, + "loss": 0.0328, + "step": 45325 + }, + { + "epoch": 2.12, + "learning_rate": 1.3054567115514244e-05, + "loss": 0.092, + "step": 45330 + }, + { + "epoch": 2.12, + "learning_rate": 1.3053783330459456e-05, + "loss": 0.1243, + "step": 45335 + }, + { + "epoch": 2.12, + "learning_rate": 1.305299954540467e-05, + "loss": 0.056, + "step": 45340 + }, + { + "epoch": 2.12, + "learning_rate": 1.3052215760349882e-05, + "loss": 0.1449, + "step": 45345 + }, + { + "epoch": 2.12, + "learning_rate": 1.3051431975295098e-05, + "loss": 0.2336, + "step": 45350 + }, + { + "epoch": 2.12, + "learning_rate": 1.305064819024031e-05, + "loss": 0.1807, + "step": 45355 + }, + { + "epoch": 2.12, + "learning_rate": 1.3049864405185524e-05, + "loss": 0.373, + "step": 45360 + }, + { + "epoch": 2.12, + "learning_rate": 1.3049080620130736e-05, + "loss": 0.3324, + "step": 45365 + }, + { + "epoch": 2.12, + "learning_rate": 1.3048296835075951e-05, + "loss": 0.0863, + "step": 45370 + }, + { + "epoch": 2.12, + "learning_rate": 1.3047513050021164e-05, + "loss": 0.0827, + "step": 45375 + }, + { + "epoch": 2.12, + "learning_rate": 1.3046729264966376e-05, + "loss": 0.053, + "step": 45380 + }, + { + "epoch": 2.12, + "learning_rate": 1.304594547991159e-05, + "loss": 0.0841, + "step": 45385 + }, + { + "epoch": 2.12, + "learning_rate": 1.3045161694856802e-05, + "loss": 0.1131, + "step": 45390 + }, + { + "epoch": 2.12, + "learning_rate": 1.3044377909802018e-05, + "loss": 0.2027, + "step": 45395 + }, + { + "epoch": 2.12, + "learning_rate": 1.304359412474723e-05, + "loss": 0.1405, + "step": 45400 + }, + { + "epoch": 2.12, + "learning_rate": 1.3042810339692444e-05, + "loss": 0.1859, + "step": 45405 + }, + { + "epoch": 2.12, + "learning_rate": 1.3042026554637656e-05, + "loss": 0.2386, + "step": 45410 + }, + { + "epoch": 2.12, + "learning_rate": 1.3041242769582872e-05, + "loss": 0.3155, + "step": 45415 + }, + { + "epoch": 2.12, + "learning_rate": 1.3040458984528084e-05, + "loss": 0.0233, + "step": 45420 + }, + { + "epoch": 2.12, + "learning_rate": 1.3039675199473298e-05, + "loss": 0.0834, + "step": 45425 + }, + { + "epoch": 2.12, + "learning_rate": 1.3038891414418512e-05, + "loss": 0.0897, + "step": 45430 + }, + { + "epoch": 2.12, + "learning_rate": 1.3038107629363725e-05, + "loss": 0.0855, + "step": 45435 + }, + { + "epoch": 2.12, + "learning_rate": 1.3037323844308938e-05, + "loss": 0.0508, + "step": 45440 + }, + { + "epoch": 2.12, + "learning_rate": 1.303654005925415e-05, + "loss": 0.0879, + "step": 45445 + }, + { + "epoch": 2.12, + "learning_rate": 1.3035756274199365e-05, + "loss": 0.1952, + "step": 45450 + }, + { + "epoch": 2.12, + "learning_rate": 1.3034972489144578e-05, + "loss": 0.1725, + "step": 45455 + }, + { + "epoch": 2.12, + "learning_rate": 1.3034188704089792e-05, + "loss": 0.3064, + "step": 45460 + }, + { + "epoch": 2.12, + "learning_rate": 1.3033404919035004e-05, + "loss": 0.2897, + "step": 45465 + }, + { + "epoch": 2.12, + "learning_rate": 1.303262113398022e-05, + "loss": 0.0456, + "step": 45470 + }, + { + "epoch": 2.12, + "learning_rate": 1.3031837348925432e-05, + "loss": 0.0907, + "step": 45475 + }, + { + "epoch": 2.12, + "learning_rate": 1.3031053563870646e-05, + "loss": 0.0953, + "step": 45480 + }, + { + "epoch": 2.12, + "learning_rate": 1.3030269778815858e-05, + "loss": 0.0623, + "step": 45485 + }, + { + "epoch": 2.12, + "learning_rate": 1.3029485993761073e-05, + "loss": 0.0422, + "step": 45490 + }, + { + "epoch": 2.12, + "learning_rate": 1.3028702208706286e-05, + "loss": 0.1251, + "step": 45495 + }, + { + "epoch": 2.12, + "learning_rate": 1.30279184236515e-05, + "loss": 0.2241, + "step": 45500 + }, + { + "epoch": 2.12, + "learning_rate": 1.3027134638596712e-05, + "loss": 0.206, + "step": 45505 + }, + { + "epoch": 2.12, + "learning_rate": 1.3026350853541926e-05, + "loss": 0.1835, + "step": 45510 + }, + { + "epoch": 2.12, + "learning_rate": 1.302556706848714e-05, + "loss": 0.2396, + "step": 45515 + }, + { + "epoch": 2.12, + "learning_rate": 1.3024783283432352e-05, + "loss": 0.0602, + "step": 45520 + }, + { + "epoch": 2.12, + "learning_rate": 1.3023999498377566e-05, + "loss": 0.0384, + "step": 45525 + }, + { + "epoch": 2.12, + "learning_rate": 1.302321571332278e-05, + "loss": 0.0227, + "step": 45530 + }, + { + "epoch": 2.12, + "learning_rate": 1.3022431928267993e-05, + "loss": 0.0598, + "step": 45535 + }, + { + "epoch": 2.12, + "learning_rate": 1.3021648143213206e-05, + "loss": 0.1411, + "step": 45540 + }, + { + "epoch": 2.13, + "learning_rate": 1.3020864358158421e-05, + "loss": 0.1612, + "step": 45545 + }, + { + "epoch": 2.13, + "learning_rate": 1.3020080573103633e-05, + "loss": 0.218, + "step": 45550 + }, + { + "epoch": 2.13, + "learning_rate": 1.3019296788048847e-05, + "loss": 0.2523, + "step": 45555 + }, + { + "epoch": 2.13, + "learning_rate": 1.301851300299406e-05, + "loss": 0.1978, + "step": 45560 + }, + { + "epoch": 2.13, + "learning_rate": 1.3017729217939275e-05, + "loss": 0.3117, + "step": 45565 + }, + { + "epoch": 2.13, + "learning_rate": 1.3016945432884487e-05, + "loss": 0.1062, + "step": 45570 + }, + { + "epoch": 2.13, + "learning_rate": 1.30161616478297e-05, + "loss": 0.1145, + "step": 45575 + }, + { + "epoch": 2.13, + "learning_rate": 1.3015377862774913e-05, + "loss": 0.1031, + "step": 45580 + }, + { + "epoch": 2.13, + "learning_rate": 1.3014594077720126e-05, + "loss": 0.0727, + "step": 45585 + }, + { + "epoch": 2.13, + "learning_rate": 1.3013810292665341e-05, + "loss": 0.0766, + "step": 45590 + }, + { + "epoch": 2.13, + "learning_rate": 1.3013026507610554e-05, + "loss": 0.118, + "step": 45595 + }, + { + "epoch": 2.13, + "learning_rate": 1.3012242722555767e-05, + "loss": 0.0794, + "step": 45600 + }, + { + "epoch": 2.13, + "learning_rate": 1.301145893750098e-05, + "loss": 0.2502, + "step": 45605 + }, + { + "epoch": 2.13, + "learning_rate": 1.3010675152446195e-05, + "loss": 0.2556, + "step": 45610 + }, + { + "epoch": 2.13, + "learning_rate": 1.3009891367391407e-05, + "loss": 0.2407, + "step": 45615 + }, + { + "epoch": 2.13, + "learning_rate": 1.3009107582336621e-05, + "loss": 0.0709, + "step": 45620 + }, + { + "epoch": 2.13, + "learning_rate": 1.3008323797281834e-05, + "loss": 0.0743, + "step": 45625 + }, + { + "epoch": 2.13, + "learning_rate": 1.3007540012227049e-05, + "loss": 0.0364, + "step": 45630 + }, + { + "epoch": 2.13, + "learning_rate": 1.3006756227172261e-05, + "loss": 0.0777, + "step": 45635 + }, + { + "epoch": 2.13, + "learning_rate": 1.3005972442117474e-05, + "loss": 0.0727, + "step": 45640 + }, + { + "epoch": 2.13, + "learning_rate": 1.300518865706269e-05, + "loss": 0.1977, + "step": 45645 + }, + { + "epoch": 2.13, + "learning_rate": 1.3004404872007901e-05, + "loss": 0.1681, + "step": 45650 + }, + { + "epoch": 2.13, + "learning_rate": 1.3003621086953115e-05, + "loss": 0.2623, + "step": 45655 + }, + { + "epoch": 2.13, + "learning_rate": 1.3002837301898328e-05, + "loss": 0.3067, + "step": 45660 + }, + { + "epoch": 2.13, + "learning_rate": 1.3002053516843543e-05, + "loss": 0.3166, + "step": 45665 + }, + { + "epoch": 2.13, + "learning_rate": 1.3001269731788755e-05, + "loss": 0.1028, + "step": 45670 + }, + { + "epoch": 2.13, + "learning_rate": 1.300048594673397e-05, + "loss": 0.0572, + "step": 45675 + }, + { + "epoch": 2.13, + "learning_rate": 1.2999702161679181e-05, + "loss": 0.0974, + "step": 45680 + }, + { + "epoch": 2.13, + "learning_rate": 1.2998918376624397e-05, + "loss": 0.1084, + "step": 45685 + }, + { + "epoch": 2.13, + "learning_rate": 1.299813459156961e-05, + "loss": 0.1394, + "step": 45690 + }, + { + "epoch": 2.13, + "learning_rate": 1.2997350806514823e-05, + "loss": 0.1306, + "step": 45695 + }, + { + "epoch": 2.13, + "learning_rate": 1.2996567021460035e-05, + "loss": 0.1705, + "step": 45700 + }, + { + "epoch": 2.13, + "learning_rate": 1.2995783236405248e-05, + "loss": 0.1305, + "step": 45705 + }, + { + "epoch": 2.13, + "learning_rate": 1.2994999451350463e-05, + "loss": 0.3169, + "step": 45710 + }, + { + "epoch": 2.13, + "learning_rate": 1.2994215666295675e-05, + "loss": 0.2909, + "step": 45715 + }, + { + "epoch": 2.13, + "learning_rate": 1.299343188124089e-05, + "loss": 0.0292, + "step": 45720 + }, + { + "epoch": 2.13, + "learning_rate": 1.2992648096186102e-05, + "loss": 0.0243, + "step": 45725 + }, + { + "epoch": 2.13, + "learning_rate": 1.2991864311131317e-05, + "loss": 0.0665, + "step": 45730 + }, + { + "epoch": 2.13, + "learning_rate": 1.299108052607653e-05, + "loss": 0.0384, + "step": 45735 + }, + { + "epoch": 2.13, + "learning_rate": 1.2990296741021743e-05, + "loss": 0.0984, + "step": 45740 + }, + { + "epoch": 2.13, + "learning_rate": 1.2989512955966957e-05, + "loss": 0.0985, + "step": 45745 + }, + { + "epoch": 2.13, + "learning_rate": 1.2988729170912171e-05, + "loss": 0.1137, + "step": 45750 + }, + { + "epoch": 2.13, + "learning_rate": 1.2987945385857383e-05, + "loss": 0.2239, + "step": 45755 + }, + { + "epoch": 2.14, + "learning_rate": 1.2987161600802599e-05, + "loss": 0.1643, + "step": 45760 + }, + { + "epoch": 2.14, + "learning_rate": 1.2986377815747811e-05, + "loss": 0.4238, + "step": 45765 + }, + { + "epoch": 2.14, + "learning_rate": 1.2985594030693023e-05, + "loss": 0.0549, + "step": 45770 + }, + { + "epoch": 2.14, + "learning_rate": 1.2984810245638237e-05, + "loss": 0.0319, + "step": 45775 + }, + { + "epoch": 2.14, + "learning_rate": 1.298402646058345e-05, + "loss": 0.0423, + "step": 45780 + }, + { + "epoch": 2.14, + "learning_rate": 1.2983242675528665e-05, + "loss": 0.0657, + "step": 45785 + }, + { + "epoch": 2.14, + "learning_rate": 1.2982458890473877e-05, + "loss": 0.065, + "step": 45790 + }, + { + "epoch": 2.14, + "learning_rate": 1.2981675105419091e-05, + "loss": 0.1418, + "step": 45795 + }, + { + "epoch": 2.14, + "learning_rate": 1.2980891320364303e-05, + "loss": 0.1648, + "step": 45800 + }, + { + "epoch": 2.14, + "learning_rate": 1.2980107535309519e-05, + "loss": 0.1982, + "step": 45805 + }, + { + "epoch": 2.14, + "learning_rate": 1.2979323750254731e-05, + "loss": 0.196, + "step": 45810 + }, + { + "epoch": 2.14, + "learning_rate": 1.2978539965199945e-05, + "loss": 0.3101, + "step": 45815 + }, + { + "epoch": 2.14, + "learning_rate": 1.2977756180145157e-05, + "loss": 0.0599, + "step": 45820 + }, + { + "epoch": 2.14, + "learning_rate": 1.2976972395090373e-05, + "loss": 0.0809, + "step": 45825 + }, + { + "epoch": 2.14, + "learning_rate": 1.2976188610035585e-05, + "loss": 0.1309, + "step": 45830 + }, + { + "epoch": 2.14, + "learning_rate": 1.2975404824980797e-05, + "loss": 0.0561, + "step": 45835 + }, + { + "epoch": 2.14, + "learning_rate": 1.2974621039926011e-05, + "loss": 0.0666, + "step": 45840 + }, + { + "epoch": 2.14, + "learning_rate": 1.2973837254871225e-05, + "loss": 0.1195, + "step": 45845 + }, + { + "epoch": 2.14, + "learning_rate": 1.2973053469816439e-05, + "loss": 0.1992, + "step": 45850 + }, + { + "epoch": 2.14, + "learning_rate": 1.2972269684761651e-05, + "loss": 0.1405, + "step": 45855 + }, + { + "epoch": 2.14, + "learning_rate": 1.2971485899706867e-05, + "loss": 0.2539, + "step": 45860 + }, + { + "epoch": 2.14, + "learning_rate": 1.2970702114652079e-05, + "loss": 0.3003, + "step": 45865 + }, + { + "epoch": 2.14, + "learning_rate": 1.2969918329597293e-05, + "loss": 0.0268, + "step": 45870 + }, + { + "epoch": 2.14, + "learning_rate": 1.2969134544542505e-05, + "loss": 0.029, + "step": 45875 + }, + { + "epoch": 2.14, + "learning_rate": 1.296835075948772e-05, + "loss": 0.0899, + "step": 45880 + }, + { + "epoch": 2.14, + "learning_rate": 1.2967566974432933e-05, + "loss": 0.0464, + "step": 45885 + }, + { + "epoch": 2.14, + "learning_rate": 1.2966783189378147e-05, + "loss": 0.1088, + "step": 45890 + }, + { + "epoch": 2.14, + "learning_rate": 1.2965999404323359e-05, + "loss": 0.1264, + "step": 45895 + }, + { + "epoch": 2.14, + "learning_rate": 1.2965215619268571e-05, + "loss": 0.1743, + "step": 45900 + }, + { + "epoch": 2.14, + "learning_rate": 1.2964431834213787e-05, + "loss": 0.2256, + "step": 45905 + }, + { + "epoch": 2.14, + "learning_rate": 1.2963648049158999e-05, + "loss": 0.235, + "step": 45910 + }, + { + "epoch": 2.14, + "learning_rate": 1.2962864264104213e-05, + "loss": 0.4299, + "step": 45915 + }, + { + "epoch": 2.14, + "learning_rate": 1.2962080479049425e-05, + "loss": 0.0647, + "step": 45920 + }, + { + "epoch": 2.14, + "learning_rate": 1.296129669399464e-05, + "loss": 0.0445, + "step": 45925 + }, + { + "epoch": 2.14, + "learning_rate": 1.2960512908939853e-05, + "loss": 0.0616, + "step": 45930 + }, + { + "epoch": 2.14, + "learning_rate": 1.2959729123885067e-05, + "loss": 0.0822, + "step": 45935 + }, + { + "epoch": 2.14, + "learning_rate": 1.2958945338830279e-05, + "loss": 0.1084, + "step": 45940 + }, + { + "epoch": 2.14, + "learning_rate": 1.2958161553775495e-05, + "loss": 0.1383, + "step": 45945 + }, + { + "epoch": 2.14, + "learning_rate": 1.2957377768720707e-05, + "loss": 0.309, + "step": 45950 + }, + { + "epoch": 2.14, + "learning_rate": 1.295659398366592e-05, + "loss": 0.1302, + "step": 45955 + }, + { + "epoch": 2.14, + "learning_rate": 1.2955810198611135e-05, + "loss": 0.4292, + "step": 45960 + }, + { + "epoch": 2.14, + "learning_rate": 1.2955026413556347e-05, + "loss": 0.3065, + "step": 45965 + }, + { + "epoch": 2.15, + "learning_rate": 1.2954242628501561e-05, + "loss": 0.0508, + "step": 45970 + }, + { + "epoch": 2.15, + "learning_rate": 1.2953458843446773e-05, + "loss": 0.0602, + "step": 45975 + }, + { + "epoch": 2.15, + "learning_rate": 1.2952675058391989e-05, + "loss": 0.08, + "step": 45980 + }, + { + "epoch": 2.15, + "learning_rate": 1.2951891273337201e-05, + "loss": 0.0903, + "step": 45985 + }, + { + "epoch": 2.15, + "learning_rate": 1.2951107488282415e-05, + "loss": 0.0842, + "step": 45990 + }, + { + "epoch": 2.15, + "learning_rate": 1.2950323703227627e-05, + "loss": 0.115, + "step": 45995 + }, + { + "epoch": 2.15, + "learning_rate": 1.2949539918172843e-05, + "loss": 0.0675, + "step": 46000 + }, + { + "epoch": 2.15, + "learning_rate": 1.2948756133118055e-05, + "loss": 0.1959, + "step": 46005 + }, + { + "epoch": 2.15, + "learning_rate": 1.2947972348063269e-05, + "loss": 0.2618, + "step": 46010 + }, + { + "epoch": 2.15, + "learning_rate": 1.2947188563008481e-05, + "loss": 0.291, + "step": 46015 + }, + { + "epoch": 2.15, + "learning_rate": 1.2946404777953697e-05, + "loss": 0.0569, + "step": 46020 + }, + { + "epoch": 2.15, + "learning_rate": 1.2945620992898909e-05, + "loss": 0.0237, + "step": 46025 + }, + { + "epoch": 2.15, + "learning_rate": 1.2944837207844121e-05, + "loss": 0.0394, + "step": 46030 + }, + { + "epoch": 2.15, + "learning_rate": 1.2944053422789335e-05, + "loss": 0.029, + "step": 46035 + }, + { + "epoch": 2.15, + "learning_rate": 1.2943269637734549e-05, + "loss": 0.069, + "step": 46040 + }, + { + "epoch": 2.15, + "learning_rate": 1.2942485852679763e-05, + "loss": 0.0995, + "step": 46045 + }, + { + "epoch": 2.15, + "learning_rate": 1.2941702067624975e-05, + "loss": 0.0975, + "step": 46050 + }, + { + "epoch": 2.15, + "learning_rate": 1.2940918282570189e-05, + "loss": 0.1855, + "step": 46055 + }, + { + "epoch": 2.15, + "learning_rate": 1.2940134497515403e-05, + "loss": 0.2035, + "step": 46060 + }, + { + "epoch": 2.15, + "learning_rate": 1.2939350712460617e-05, + "loss": 0.3937, + "step": 46065 + }, + { + "epoch": 2.15, + "learning_rate": 1.2938566927405829e-05, + "loss": 0.1026, + "step": 46070 + }, + { + "epoch": 2.15, + "learning_rate": 1.2937783142351044e-05, + "loss": 0.0271, + "step": 46075 + }, + { + "epoch": 2.15, + "learning_rate": 1.2936999357296257e-05, + "loss": 0.0701, + "step": 46080 + }, + { + "epoch": 2.15, + "learning_rate": 1.293621557224147e-05, + "loss": 0.0962, + "step": 46085 + }, + { + "epoch": 2.15, + "learning_rate": 1.2935431787186683e-05, + "loss": 0.0695, + "step": 46090 + }, + { + "epoch": 2.15, + "learning_rate": 1.2934648002131895e-05, + "loss": 0.0941, + "step": 46095 + }, + { + "epoch": 2.15, + "learning_rate": 1.293386421707711e-05, + "loss": 0.1758, + "step": 46100 + }, + { + "epoch": 2.15, + "learning_rate": 1.2933080432022323e-05, + "loss": 0.2329, + "step": 46105 + }, + { + "epoch": 2.15, + "learning_rate": 1.2932296646967537e-05, + "loss": 0.257, + "step": 46110 + }, + { + "epoch": 2.15, + "learning_rate": 1.2931512861912749e-05, + "loss": 0.1914, + "step": 46115 + }, + { + "epoch": 2.15, + "learning_rate": 1.2930729076857964e-05, + "loss": 0.0743, + "step": 46120 + }, + { + "epoch": 2.15, + "learning_rate": 1.2929945291803177e-05, + "loss": 0.0692, + "step": 46125 + }, + { + "epoch": 2.15, + "learning_rate": 1.292916150674839e-05, + "loss": 0.0397, + "step": 46130 + }, + { + "epoch": 2.15, + "learning_rate": 1.2928377721693603e-05, + "loss": 0.0593, + "step": 46135 + }, + { + "epoch": 2.15, + "learning_rate": 1.2927593936638818e-05, + "loss": 0.1479, + "step": 46140 + }, + { + "epoch": 2.15, + "learning_rate": 1.292681015158403e-05, + "loss": 0.0926, + "step": 46145 + }, + { + "epoch": 2.15, + "learning_rate": 1.2926026366529245e-05, + "loss": 0.1446, + "step": 46150 + }, + { + "epoch": 2.15, + "learning_rate": 1.2925242581474457e-05, + "loss": 0.1745, + "step": 46155 + }, + { + "epoch": 2.15, + "learning_rate": 1.292445879641967e-05, + "loss": 0.2465, + "step": 46160 + }, + { + "epoch": 2.15, + "learning_rate": 1.2923675011364885e-05, + "loss": 0.2131, + "step": 46165 + }, + { + "epoch": 2.15, + "learning_rate": 1.2922891226310097e-05, + "loss": 0.0662, + "step": 46170 + }, + { + "epoch": 2.15, + "learning_rate": 1.2922107441255312e-05, + "loss": 0.0392, + "step": 46175 + }, + { + "epoch": 2.15, + "learning_rate": 1.2921323656200525e-05, + "loss": 0.1003, + "step": 46180 + }, + { + "epoch": 2.16, + "learning_rate": 1.2920539871145738e-05, + "loss": 0.1232, + "step": 46185 + }, + { + "epoch": 2.16, + "learning_rate": 1.291975608609095e-05, + "loss": 0.1044, + "step": 46190 + }, + { + "epoch": 2.16, + "learning_rate": 1.2918972301036166e-05, + "loss": 0.0763, + "step": 46195 + }, + { + "epoch": 2.16, + "learning_rate": 1.2918188515981379e-05, + "loss": 0.1681, + "step": 46200 + }, + { + "epoch": 2.16, + "learning_rate": 1.2917404730926592e-05, + "loss": 0.1915, + "step": 46205 + }, + { + "epoch": 2.16, + "learning_rate": 1.2916620945871805e-05, + "loss": 0.2404, + "step": 46210 + }, + { + "epoch": 2.16, + "learning_rate": 1.291583716081702e-05, + "loss": 0.2095, + "step": 46215 + }, + { + "epoch": 2.16, + "learning_rate": 1.2915053375762232e-05, + "loss": 0.0508, + "step": 46220 + }, + { + "epoch": 2.16, + "learning_rate": 1.2914269590707445e-05, + "loss": 0.0292, + "step": 46225 + }, + { + "epoch": 2.16, + "learning_rate": 1.2913485805652659e-05, + "loss": 0.0615, + "step": 46230 + }, + { + "epoch": 2.16, + "learning_rate": 1.291270202059787e-05, + "loss": 0.0614, + "step": 46235 + }, + { + "epoch": 2.16, + "learning_rate": 1.2911918235543086e-05, + "loss": 0.1748, + "step": 46240 + }, + { + "epoch": 2.16, + "learning_rate": 1.2911134450488299e-05, + "loss": 0.0778, + "step": 46245 + }, + { + "epoch": 2.16, + "learning_rate": 1.2910350665433512e-05, + "loss": 0.1938, + "step": 46250 + }, + { + "epoch": 2.16, + "learning_rate": 1.2909566880378725e-05, + "loss": 0.22, + "step": 46255 + }, + { + "epoch": 2.16, + "learning_rate": 1.290878309532394e-05, + "loss": 0.2227, + "step": 46260 + }, + { + "epoch": 2.16, + "learning_rate": 1.2907999310269153e-05, + "loss": 0.2721, + "step": 46265 + }, + { + "epoch": 2.16, + "learning_rate": 1.2907215525214366e-05, + "loss": 0.0752, + "step": 46270 + }, + { + "epoch": 2.16, + "learning_rate": 1.290643174015958e-05, + "loss": 0.0638, + "step": 46275 + }, + { + "epoch": 2.16, + "learning_rate": 1.2905647955104794e-05, + "loss": 0.0867, + "step": 46280 + }, + { + "epoch": 2.16, + "learning_rate": 1.2904864170050006e-05, + "loss": 0.0472, + "step": 46285 + }, + { + "epoch": 2.16, + "learning_rate": 1.2904080384995219e-05, + "loss": 0.0876, + "step": 46290 + }, + { + "epoch": 2.16, + "learning_rate": 1.2903296599940434e-05, + "loss": 0.1803, + "step": 46295 + }, + { + "epoch": 2.16, + "learning_rate": 1.2902512814885646e-05, + "loss": 0.1082, + "step": 46300 + }, + { + "epoch": 2.16, + "learning_rate": 1.290172902983086e-05, + "loss": 0.203, + "step": 46305 + }, + { + "epoch": 2.16, + "learning_rate": 1.2900945244776073e-05, + "loss": 0.3482, + "step": 46310 + }, + { + "epoch": 2.16, + "learning_rate": 1.2900161459721288e-05, + "loss": 0.31, + "step": 46315 + }, + { + "epoch": 2.16, + "learning_rate": 1.28993776746665e-05, + "loss": 0.0936, + "step": 46320 + }, + { + "epoch": 2.16, + "learning_rate": 1.2898593889611714e-05, + "loss": 0.0706, + "step": 46325 + }, + { + "epoch": 2.16, + "learning_rate": 1.2897810104556927e-05, + "loss": 0.0416, + "step": 46330 + }, + { + "epoch": 2.16, + "learning_rate": 1.2897026319502142e-05, + "loss": 0.1093, + "step": 46335 + }, + { + "epoch": 2.16, + "learning_rate": 1.2896242534447354e-05, + "loss": 0.1354, + "step": 46340 + }, + { + "epoch": 2.16, + "learning_rate": 1.2895458749392568e-05, + "loss": 0.0767, + "step": 46345 + }, + { + "epoch": 2.16, + "learning_rate": 1.289467496433778e-05, + "loss": 0.1904, + "step": 46350 + }, + { + "epoch": 2.16, + "learning_rate": 1.2893891179282994e-05, + "loss": 0.1699, + "step": 46355 + }, + { + "epoch": 2.16, + "learning_rate": 1.2893107394228208e-05, + "loss": 0.1509, + "step": 46360 + }, + { + "epoch": 2.16, + "learning_rate": 1.289232360917342e-05, + "loss": 0.3889, + "step": 46365 + }, + { + "epoch": 2.16, + "learning_rate": 1.2891539824118634e-05, + "loss": 0.0727, + "step": 46370 + }, + { + "epoch": 2.16, + "learning_rate": 1.2890756039063848e-05, + "loss": 0.0465, + "step": 46375 + }, + { + "epoch": 2.16, + "learning_rate": 1.2889972254009062e-05, + "loss": 0.0655, + "step": 46380 + }, + { + "epoch": 2.16, + "learning_rate": 1.2889188468954274e-05, + "loss": 0.0616, + "step": 46385 + }, + { + "epoch": 2.16, + "learning_rate": 1.288840468389949e-05, + "loss": 0.1318, + "step": 46390 + }, + { + "epoch": 2.16, + "learning_rate": 1.2887620898844702e-05, + "loss": 0.1106, + "step": 46395 + }, + { + "epoch": 2.17, + "learning_rate": 1.2886837113789916e-05, + "loss": 0.175, + "step": 46400 + }, + { + "epoch": 2.17, + "learning_rate": 1.2886053328735128e-05, + "loss": 0.2635, + "step": 46405 + }, + { + "epoch": 2.17, + "learning_rate": 1.2885269543680344e-05, + "loss": 0.2759, + "step": 46410 + }, + { + "epoch": 2.17, + "learning_rate": 1.2884485758625556e-05, + "loss": 0.2409, + "step": 46415 + }, + { + "epoch": 2.17, + "learning_rate": 1.2883701973570768e-05, + "loss": 0.0379, + "step": 46420 + }, + { + "epoch": 2.17, + "learning_rate": 1.2882918188515982e-05, + "loss": 0.0526, + "step": 46425 + }, + { + "epoch": 2.17, + "learning_rate": 1.2882134403461194e-05, + "loss": 0.0494, + "step": 46430 + }, + { + "epoch": 2.17, + "learning_rate": 1.288135061840641e-05, + "loss": 0.0765, + "step": 46435 + }, + { + "epoch": 2.17, + "learning_rate": 1.2880566833351622e-05, + "loss": 0.1189, + "step": 46440 + }, + { + "epoch": 2.17, + "learning_rate": 1.2879783048296836e-05, + "loss": 0.1203, + "step": 46445 + }, + { + "epoch": 2.17, + "learning_rate": 1.2878999263242048e-05, + "loss": 0.1393, + "step": 46450 + }, + { + "epoch": 2.17, + "learning_rate": 1.2878215478187264e-05, + "loss": 0.1572, + "step": 46455 + }, + { + "epoch": 2.17, + "learning_rate": 1.2877431693132476e-05, + "loss": 0.3012, + "step": 46460 + }, + { + "epoch": 2.17, + "learning_rate": 1.287664790807769e-05, + "loss": 0.2512, + "step": 46465 + }, + { + "epoch": 2.17, + "learning_rate": 1.2875864123022902e-05, + "loss": 0.0924, + "step": 46470 + }, + { + "epoch": 2.17, + "learning_rate": 1.2875080337968118e-05, + "loss": 0.0359, + "step": 46475 + }, + { + "epoch": 2.17, + "learning_rate": 1.287429655291333e-05, + "loss": 0.079, + "step": 46480 + }, + { + "epoch": 2.17, + "learning_rate": 1.2873512767858542e-05, + "loss": 0.0759, + "step": 46485 + }, + { + "epoch": 2.17, + "learning_rate": 1.2872728982803758e-05, + "loss": 0.103, + "step": 46490 + }, + { + "epoch": 2.17, + "learning_rate": 1.287194519774897e-05, + "loss": 0.0971, + "step": 46495 + }, + { + "epoch": 2.17, + "learning_rate": 1.2871161412694184e-05, + "loss": 0.1746, + "step": 46500 + }, + { + "epoch": 2.17, + "learning_rate": 1.2870377627639396e-05, + "loss": 0.1998, + "step": 46505 + }, + { + "epoch": 2.17, + "learning_rate": 1.2869593842584612e-05, + "loss": 0.2048, + "step": 46510 + }, + { + "epoch": 2.17, + "learning_rate": 1.2868810057529824e-05, + "loss": 0.3011, + "step": 46515 + }, + { + "epoch": 2.17, + "learning_rate": 1.2868026272475038e-05, + "loss": 0.0584, + "step": 46520 + }, + { + "epoch": 2.17, + "learning_rate": 1.286724248742025e-05, + "loss": 0.064, + "step": 46525 + }, + { + "epoch": 2.17, + "learning_rate": 1.2866458702365466e-05, + "loss": 0.0913, + "step": 46530 + }, + { + "epoch": 2.17, + "learning_rate": 1.2865674917310678e-05, + "loss": 0.1153, + "step": 46535 + }, + { + "epoch": 2.17, + "learning_rate": 1.2864891132255892e-05, + "loss": 0.069, + "step": 46540 + }, + { + "epoch": 2.17, + "learning_rate": 1.2864107347201104e-05, + "loss": 0.068, + "step": 46545 + }, + { + "epoch": 2.17, + "learning_rate": 1.2863323562146316e-05, + "loss": 0.1295, + "step": 46550 + }, + { + "epoch": 2.17, + "learning_rate": 1.2862539777091532e-05, + "loss": 0.229, + "step": 46555 + }, + { + "epoch": 2.17, + "learning_rate": 1.2861755992036744e-05, + "loss": 0.2234, + "step": 46560 + }, + { + "epoch": 2.17, + "learning_rate": 1.2860972206981958e-05, + "loss": 0.2418, + "step": 46565 + }, + { + "epoch": 2.17, + "learning_rate": 1.286018842192717e-05, + "loss": 0.0448, + "step": 46570 + }, + { + "epoch": 2.17, + "learning_rate": 1.2859404636872386e-05, + "loss": 0.0333, + "step": 46575 + }, + { + "epoch": 2.17, + "learning_rate": 1.2858620851817598e-05, + "loss": 0.0544, + "step": 46580 + }, + { + "epoch": 2.17, + "learning_rate": 1.2857837066762812e-05, + "loss": 0.126, + "step": 46585 + }, + { + "epoch": 2.17, + "learning_rate": 1.2857053281708026e-05, + "loss": 0.0373, + "step": 46590 + }, + { + "epoch": 2.17, + "learning_rate": 1.285626949665324e-05, + "loss": 0.0796, + "step": 46595 + }, + { + "epoch": 2.17, + "learning_rate": 1.2855485711598452e-05, + "loss": 0.1411, + "step": 46600 + }, + { + "epoch": 2.17, + "learning_rate": 1.2854701926543668e-05, + "loss": 0.1499, + "step": 46605 + }, + { + "epoch": 2.17, + "learning_rate": 1.285391814148888e-05, + "loss": 0.259, + "step": 46610 + }, + { + "epoch": 2.18, + "learning_rate": 1.2853134356434092e-05, + "loss": 0.2371, + "step": 46615 + }, + { + "epoch": 2.18, + "learning_rate": 1.2852350571379306e-05, + "loss": 0.0754, + "step": 46620 + }, + { + "epoch": 2.18, + "learning_rate": 1.2851566786324518e-05, + "loss": 0.0368, + "step": 46625 + }, + { + "epoch": 2.18, + "learning_rate": 1.2850783001269734e-05, + "loss": 0.0324, + "step": 46630 + }, + { + "epoch": 2.18, + "learning_rate": 1.2849999216214946e-05, + "loss": 0.0577, + "step": 46635 + }, + { + "epoch": 2.18, + "learning_rate": 1.284921543116016e-05, + "loss": 0.0888, + "step": 46640 + }, + { + "epoch": 2.18, + "learning_rate": 1.2848431646105372e-05, + "loss": 0.1034, + "step": 46645 + }, + { + "epoch": 2.18, + "learning_rate": 1.2847647861050588e-05, + "loss": 0.1378, + "step": 46650 + }, + { + "epoch": 2.18, + "learning_rate": 1.28468640759958e-05, + "loss": 0.2142, + "step": 46655 + }, + { + "epoch": 2.18, + "learning_rate": 1.2846080290941014e-05, + "loss": 0.3083, + "step": 46660 + }, + { + "epoch": 2.18, + "learning_rate": 1.2845296505886226e-05, + "loss": 0.193, + "step": 46665 + }, + { + "epoch": 2.18, + "learning_rate": 1.2844512720831442e-05, + "loss": 0.0664, + "step": 46670 + }, + { + "epoch": 2.18, + "learning_rate": 1.2843728935776654e-05, + "loss": 0.04, + "step": 46675 + }, + { + "epoch": 2.18, + "learning_rate": 1.2842945150721866e-05, + "loss": 0.0385, + "step": 46680 + }, + { + "epoch": 2.18, + "learning_rate": 1.284216136566708e-05, + "loss": 0.0412, + "step": 46685 + }, + { + "epoch": 2.18, + "learning_rate": 1.2841377580612294e-05, + "loss": 0.0962, + "step": 46690 + }, + { + "epoch": 2.18, + "learning_rate": 1.2840593795557508e-05, + "loss": 0.1109, + "step": 46695 + }, + { + "epoch": 2.18, + "learning_rate": 1.283981001050272e-05, + "loss": 0.1748, + "step": 46700 + }, + { + "epoch": 2.18, + "learning_rate": 1.2839026225447936e-05, + "loss": 0.1363, + "step": 46705 + }, + { + "epoch": 2.18, + "learning_rate": 1.2838242440393148e-05, + "loss": 0.3242, + "step": 46710 + }, + { + "epoch": 2.18, + "learning_rate": 1.2837458655338362e-05, + "loss": 0.2394, + "step": 46715 + }, + { + "epoch": 2.18, + "learning_rate": 1.2836674870283574e-05, + "loss": 0.0867, + "step": 46720 + }, + { + "epoch": 2.18, + "learning_rate": 1.283589108522879e-05, + "loss": 0.053, + "step": 46725 + }, + { + "epoch": 2.18, + "learning_rate": 1.2835107300174002e-05, + "loss": 0.0816, + "step": 46730 + }, + { + "epoch": 2.18, + "learning_rate": 1.2834323515119216e-05, + "loss": 0.0848, + "step": 46735 + }, + { + "epoch": 2.18, + "learning_rate": 1.2833539730064428e-05, + "loss": 0.1253, + "step": 46740 + }, + { + "epoch": 2.18, + "learning_rate": 1.283275594500964e-05, + "loss": 0.1279, + "step": 46745 + }, + { + "epoch": 2.18, + "learning_rate": 1.2831972159954856e-05, + "loss": 0.1643, + "step": 46750 + }, + { + "epoch": 2.18, + "learning_rate": 1.2831188374900068e-05, + "loss": 0.1052, + "step": 46755 + }, + { + "epoch": 2.18, + "learning_rate": 1.2830404589845282e-05, + "loss": 0.414, + "step": 46760 + }, + { + "epoch": 2.18, + "learning_rate": 1.2829620804790494e-05, + "loss": 0.3846, + "step": 46765 + }, + { + "epoch": 2.18, + "learning_rate": 1.282883701973571e-05, + "loss": 0.0487, + "step": 46770 + }, + { + "epoch": 2.18, + "learning_rate": 1.2828053234680922e-05, + "loss": 0.049, + "step": 46775 + }, + { + "epoch": 2.18, + "learning_rate": 1.2827269449626136e-05, + "loss": 0.0539, + "step": 46780 + }, + { + "epoch": 2.18, + "learning_rate": 1.2826485664571348e-05, + "loss": 0.0454, + "step": 46785 + }, + { + "epoch": 2.18, + "learning_rate": 1.2825701879516563e-05, + "loss": 0.1597, + "step": 46790 + }, + { + "epoch": 2.18, + "learning_rate": 1.2824918094461776e-05, + "loss": 0.1002, + "step": 46795 + }, + { + "epoch": 2.18, + "learning_rate": 1.282413430940699e-05, + "loss": 0.1373, + "step": 46800 + }, + { + "epoch": 2.18, + "learning_rate": 1.2823350524352204e-05, + "loss": 0.2601, + "step": 46805 + }, + { + "epoch": 2.18, + "learning_rate": 1.2822566739297416e-05, + "loss": 0.2318, + "step": 46810 + }, + { + "epoch": 2.18, + "learning_rate": 1.282178295424263e-05, + "loss": 0.3056, + "step": 46815 + }, + { + "epoch": 2.18, + "learning_rate": 1.2820999169187842e-05, + "loss": 0.026, + "step": 46820 + }, + { + "epoch": 2.18, + "learning_rate": 1.2820215384133057e-05, + "loss": 0.103, + "step": 46825 + }, + { + "epoch": 2.19, + "learning_rate": 1.281943159907827e-05, + "loss": 0.0383, + "step": 46830 + }, + { + "epoch": 2.19, + "learning_rate": 1.2818647814023484e-05, + "loss": 0.0978, + "step": 46835 + }, + { + "epoch": 2.19, + "learning_rate": 1.2817864028968696e-05, + "loss": 0.0945, + "step": 46840 + }, + { + "epoch": 2.19, + "learning_rate": 1.2817080243913911e-05, + "loss": 0.1017, + "step": 46845 + }, + { + "epoch": 2.19, + "learning_rate": 1.2816296458859124e-05, + "loss": 0.0924, + "step": 46850 + }, + { + "epoch": 2.19, + "learning_rate": 1.2815512673804337e-05, + "loss": 0.156, + "step": 46855 + }, + { + "epoch": 2.19, + "learning_rate": 1.281472888874955e-05, + "loss": 0.2734, + "step": 46860 + }, + { + "epoch": 2.19, + "learning_rate": 1.2813945103694765e-05, + "loss": 0.3109, + "step": 46865 + }, + { + "epoch": 2.19, + "learning_rate": 1.2813161318639978e-05, + "loss": 0.0688, + "step": 46870 + }, + { + "epoch": 2.19, + "learning_rate": 1.281237753358519e-05, + "loss": 0.0376, + "step": 46875 + }, + { + "epoch": 2.19, + "learning_rate": 1.2811593748530404e-05, + "loss": 0.0537, + "step": 46880 + }, + { + "epoch": 2.19, + "learning_rate": 1.2810809963475616e-05, + "loss": 0.084, + "step": 46885 + }, + { + "epoch": 2.19, + "learning_rate": 1.2810026178420831e-05, + "loss": 0.1507, + "step": 46890 + }, + { + "epoch": 2.19, + "learning_rate": 1.2809242393366044e-05, + "loss": 0.144, + "step": 46895 + }, + { + "epoch": 2.19, + "learning_rate": 1.2808458608311258e-05, + "loss": 0.1558, + "step": 46900 + }, + { + "epoch": 2.19, + "learning_rate": 1.2807674823256471e-05, + "loss": 0.1787, + "step": 46905 + }, + { + "epoch": 2.19, + "learning_rate": 1.2806891038201685e-05, + "loss": 0.2776, + "step": 46910 + }, + { + "epoch": 2.19, + "learning_rate": 1.2806107253146898e-05, + "loss": 0.3282, + "step": 46915 + }, + { + "epoch": 2.19, + "learning_rate": 1.2805323468092113e-05, + "loss": 0.0975, + "step": 46920 + }, + { + "epoch": 2.19, + "learning_rate": 1.2804539683037325e-05, + "loss": 0.0326, + "step": 46925 + }, + { + "epoch": 2.19, + "learning_rate": 1.280375589798254e-05, + "loss": 0.0807, + "step": 46930 + }, + { + "epoch": 2.19, + "learning_rate": 1.2802972112927752e-05, + "loss": 0.1076, + "step": 46935 + }, + { + "epoch": 2.19, + "learning_rate": 1.2802188327872964e-05, + "loss": 0.123, + "step": 46940 + }, + { + "epoch": 2.19, + "learning_rate": 1.280140454281818e-05, + "loss": 0.1297, + "step": 46945 + }, + { + "epoch": 2.19, + "learning_rate": 1.2800620757763392e-05, + "loss": 0.13, + "step": 46950 + }, + { + "epoch": 2.19, + "learning_rate": 1.2799836972708605e-05, + "loss": 0.1437, + "step": 46955 + }, + { + "epoch": 2.19, + "learning_rate": 1.2799053187653818e-05, + "loss": 0.1865, + "step": 46960 + }, + { + "epoch": 2.19, + "learning_rate": 1.2798269402599033e-05, + "loss": 0.2441, + "step": 46965 + }, + { + "epoch": 2.19, + "learning_rate": 1.2797485617544245e-05, + "loss": 0.0758, + "step": 46970 + }, + { + "epoch": 2.19, + "learning_rate": 1.279670183248946e-05, + "loss": 0.0861, + "step": 46975 + }, + { + "epoch": 2.19, + "learning_rate": 1.2795918047434672e-05, + "loss": 0.0704, + "step": 46980 + }, + { + "epoch": 2.19, + "learning_rate": 1.2795134262379887e-05, + "loss": 0.0482, + "step": 46985 + }, + { + "epoch": 2.19, + "learning_rate": 1.27943504773251e-05, + "loss": 0.0407, + "step": 46990 + }, + { + "epoch": 2.19, + "learning_rate": 1.2793566692270313e-05, + "loss": 0.1309, + "step": 46995 + }, + { + "epoch": 2.19, + "learning_rate": 1.2792782907215526e-05, + "loss": 0.1658, + "step": 47000 + }, + { + "epoch": 2.19, + "learning_rate": 1.279199912216074e-05, + "loss": 0.1571, + "step": 47005 + }, + { + "epoch": 2.19, + "learning_rate": 1.2791215337105953e-05, + "loss": 0.2098, + "step": 47010 + }, + { + "epoch": 2.19, + "learning_rate": 1.2790431552051166e-05, + "loss": 0.2532, + "step": 47015 + }, + { + "epoch": 2.19, + "learning_rate": 1.2789647766996381e-05, + "loss": 0.0152, + "step": 47020 + }, + { + "epoch": 2.19, + "learning_rate": 1.2788863981941593e-05, + "loss": 0.0505, + "step": 47025 + }, + { + "epoch": 2.19, + "learning_rate": 1.2788080196886807e-05, + "loss": 0.0752, + "step": 47030 + }, + { + "epoch": 2.19, + "learning_rate": 1.278729641183202e-05, + "loss": 0.0727, + "step": 47035 + }, + { + "epoch": 2.19, + "learning_rate": 1.2786512626777235e-05, + "loss": 0.1866, + "step": 47040 + }, + { + "epoch": 2.2, + "learning_rate": 1.2785728841722447e-05, + "loss": 0.0927, + "step": 47045 + }, + { + "epoch": 2.2, + "learning_rate": 1.2784945056667661e-05, + "loss": 0.0914, + "step": 47050 + }, + { + "epoch": 2.2, + "learning_rate": 1.2784161271612873e-05, + "loss": 0.2837, + "step": 47055 + }, + { + "epoch": 2.2, + "learning_rate": 1.2783377486558089e-05, + "loss": 0.3236, + "step": 47060 + }, + { + "epoch": 2.2, + "learning_rate": 1.2782593701503301e-05, + "loss": 0.3262, + "step": 47065 + }, + { + "epoch": 2.2, + "learning_rate": 1.2781809916448513e-05, + "loss": 0.0784, + "step": 47070 + }, + { + "epoch": 2.2, + "learning_rate": 1.2781026131393727e-05, + "loss": 0.0751, + "step": 47075 + }, + { + "epoch": 2.2, + "learning_rate": 1.278024234633894e-05, + "loss": 0.0486, + "step": 47080 + }, + { + "epoch": 2.2, + "learning_rate": 1.2779458561284155e-05, + "loss": 0.1281, + "step": 47085 + }, + { + "epoch": 2.2, + "learning_rate": 1.2778674776229367e-05, + "loss": 0.1138, + "step": 47090 + }, + { + "epoch": 2.2, + "learning_rate": 1.2777890991174581e-05, + "loss": 0.1101, + "step": 47095 + }, + { + "epoch": 2.2, + "learning_rate": 1.2777107206119793e-05, + "loss": 0.1547, + "step": 47100 + }, + { + "epoch": 2.2, + "learning_rate": 1.2776323421065009e-05, + "loss": 0.1095, + "step": 47105 + }, + { + "epoch": 2.2, + "learning_rate": 1.2775539636010221e-05, + "loss": 0.3924, + "step": 47110 + }, + { + "epoch": 2.2, + "learning_rate": 1.2774755850955435e-05, + "loss": 0.314, + "step": 47115 + }, + { + "epoch": 2.2, + "learning_rate": 1.2773972065900649e-05, + "loss": 0.0539, + "step": 47120 + }, + { + "epoch": 2.2, + "learning_rate": 1.2773188280845863e-05, + "loss": 0.039, + "step": 47125 + }, + { + "epoch": 2.2, + "learning_rate": 1.2772404495791075e-05, + "loss": 0.0658, + "step": 47130 + }, + { + "epoch": 2.2, + "learning_rate": 1.2771620710736287e-05, + "loss": 0.0625, + "step": 47135 + }, + { + "epoch": 2.2, + "learning_rate": 1.2770836925681503e-05, + "loss": 0.115, + "step": 47140 + }, + { + "epoch": 2.2, + "learning_rate": 1.2770053140626715e-05, + "loss": 0.1128, + "step": 47145 + }, + { + "epoch": 2.2, + "learning_rate": 1.2769269355571929e-05, + "loss": 0.1195, + "step": 47150 + }, + { + "epoch": 2.2, + "learning_rate": 1.2768485570517141e-05, + "loss": 0.1494, + "step": 47155 + }, + { + "epoch": 2.2, + "learning_rate": 1.2767701785462357e-05, + "loss": 0.1993, + "step": 47160 + }, + { + "epoch": 2.2, + "learning_rate": 1.2766918000407569e-05, + "loss": 0.2361, + "step": 47165 + }, + { + "epoch": 2.2, + "learning_rate": 1.2766134215352783e-05, + "loss": 0.0823, + "step": 47170 + }, + { + "epoch": 2.2, + "learning_rate": 1.2765350430297995e-05, + "loss": 0.0557, + "step": 47175 + }, + { + "epoch": 2.2, + "learning_rate": 1.2764566645243211e-05, + "loss": 0.0295, + "step": 47180 + }, + { + "epoch": 2.2, + "learning_rate": 1.2763782860188423e-05, + "loss": 0.0804, + "step": 47185 + }, + { + "epoch": 2.2, + "learning_rate": 1.2762999075133637e-05, + "loss": 0.0986, + "step": 47190 + }, + { + "epoch": 2.2, + "learning_rate": 1.276221529007885e-05, + "loss": 0.1192, + "step": 47195 + }, + { + "epoch": 2.2, + "learning_rate": 1.2761431505024063e-05, + "loss": 0.1397, + "step": 47200 + }, + { + "epoch": 2.2, + "learning_rate": 1.2760647719969277e-05, + "loss": 0.2456, + "step": 47205 + }, + { + "epoch": 2.2, + "learning_rate": 1.275986393491449e-05, + "loss": 0.267, + "step": 47210 + }, + { + "epoch": 2.2, + "learning_rate": 1.2759080149859703e-05, + "loss": 0.3058, + "step": 47215 + }, + { + "epoch": 2.2, + "learning_rate": 1.2758296364804917e-05, + "loss": 0.0419, + "step": 47220 + }, + { + "epoch": 2.2, + "learning_rate": 1.2757512579750131e-05, + "loss": 0.0535, + "step": 47225 + }, + { + "epoch": 2.2, + "learning_rate": 1.2756728794695343e-05, + "loss": 0.0522, + "step": 47230 + }, + { + "epoch": 2.2, + "learning_rate": 1.2755945009640559e-05, + "loss": 0.0544, + "step": 47235 + }, + { + "epoch": 2.2, + "learning_rate": 1.2755161224585771e-05, + "loss": 0.1465, + "step": 47240 + }, + { + "epoch": 2.2, + "learning_rate": 1.2754377439530985e-05, + "loss": 0.1576, + "step": 47245 + }, + { + "epoch": 2.2, + "learning_rate": 1.2753593654476197e-05, + "loss": 0.1764, + "step": 47250 + }, + { + "epoch": 2.2, + "learning_rate": 1.2752809869421413e-05, + "loss": 0.2072, + "step": 47255 + }, + { + "epoch": 2.21, + "learning_rate": 1.2752026084366625e-05, + "loss": 0.2698, + "step": 47260 + }, + { + "epoch": 2.21, + "learning_rate": 1.2751242299311837e-05, + "loss": 0.2404, + "step": 47265 + }, + { + "epoch": 2.21, + "learning_rate": 1.2750458514257051e-05, + "loss": 0.1002, + "step": 47270 + }, + { + "epoch": 2.21, + "learning_rate": 1.2749674729202263e-05, + "loss": 0.0465, + "step": 47275 + }, + { + "epoch": 2.21, + "learning_rate": 1.2748890944147479e-05, + "loss": 0.0432, + "step": 47280 + }, + { + "epoch": 2.21, + "learning_rate": 1.2748107159092691e-05, + "loss": 0.0784, + "step": 47285 + }, + { + "epoch": 2.21, + "learning_rate": 1.2747323374037905e-05, + "loss": 0.0798, + "step": 47290 + }, + { + "epoch": 2.21, + "learning_rate": 1.2746539588983117e-05, + "loss": 0.0903, + "step": 47295 + }, + { + "epoch": 2.21, + "learning_rate": 1.2745755803928333e-05, + "loss": 0.197, + "step": 47300 + }, + { + "epoch": 2.21, + "learning_rate": 1.2744972018873545e-05, + "loss": 0.2127, + "step": 47305 + }, + { + "epoch": 2.21, + "learning_rate": 1.2744188233818759e-05, + "loss": 0.2387, + "step": 47310 + }, + { + "epoch": 2.21, + "learning_rate": 1.2743404448763971e-05, + "loss": 0.2397, + "step": 47315 + }, + { + "epoch": 2.21, + "learning_rate": 1.2742620663709187e-05, + "loss": 0.0321, + "step": 47320 + }, + { + "epoch": 2.21, + "learning_rate": 1.2741836878654399e-05, + "loss": 0.0406, + "step": 47325 + }, + { + "epoch": 2.21, + "learning_rate": 1.2741053093599611e-05, + "loss": 0.0561, + "step": 47330 + }, + { + "epoch": 2.21, + "learning_rate": 1.2740269308544827e-05, + "loss": 0.1, + "step": 47335 + }, + { + "epoch": 2.21, + "learning_rate": 1.2739485523490039e-05, + "loss": 0.0816, + "step": 47340 + }, + { + "epoch": 2.21, + "learning_rate": 1.2738701738435253e-05, + "loss": 0.1594, + "step": 47345 + }, + { + "epoch": 2.21, + "learning_rate": 1.2737917953380465e-05, + "loss": 0.1174, + "step": 47350 + }, + { + "epoch": 2.21, + "learning_rate": 1.273713416832568e-05, + "loss": 0.2226, + "step": 47355 + }, + { + "epoch": 2.21, + "learning_rate": 1.2736350383270893e-05, + "loss": 0.2778, + "step": 47360 + }, + { + "epoch": 2.21, + "learning_rate": 1.2735566598216107e-05, + "loss": 0.3245, + "step": 47365 + }, + { + "epoch": 2.21, + "learning_rate": 1.2734782813161319e-05, + "loss": 0.0526, + "step": 47370 + }, + { + "epoch": 2.21, + "learning_rate": 1.2733999028106535e-05, + "loss": 0.0418, + "step": 47375 + }, + { + "epoch": 2.21, + "learning_rate": 1.2733215243051747e-05, + "loss": 0.1414, + "step": 47380 + }, + { + "epoch": 2.21, + "learning_rate": 1.273243145799696e-05, + "loss": 0.0651, + "step": 47385 + }, + { + "epoch": 2.21, + "learning_rate": 1.2731647672942173e-05, + "loss": 0.1081, + "step": 47390 + }, + { + "epoch": 2.21, + "learning_rate": 1.2730863887887385e-05, + "loss": 0.1513, + "step": 47395 + }, + { + "epoch": 2.21, + "learning_rate": 1.27300801028326e-05, + "loss": 0.1419, + "step": 47400 + }, + { + "epoch": 2.21, + "learning_rate": 1.2729296317777813e-05, + "loss": 0.3379, + "step": 47405 + }, + { + "epoch": 2.21, + "learning_rate": 1.2728512532723027e-05, + "loss": 0.2274, + "step": 47410 + }, + { + "epoch": 2.21, + "learning_rate": 1.2727728747668239e-05, + "loss": 0.3456, + "step": 47415 + }, + { + "epoch": 2.21, + "learning_rate": 1.2726944962613455e-05, + "loss": 0.0683, + "step": 47420 + }, + { + "epoch": 2.21, + "learning_rate": 1.2726161177558667e-05, + "loss": 0.0611, + "step": 47425 + }, + { + "epoch": 2.21, + "learning_rate": 1.272537739250388e-05, + "loss": 0.0673, + "step": 47430 + }, + { + "epoch": 2.21, + "learning_rate": 1.2724593607449095e-05, + "loss": 0.1342, + "step": 47435 + }, + { + "epoch": 2.21, + "learning_rate": 1.2723809822394309e-05, + "loss": 0.0678, + "step": 47440 + }, + { + "epoch": 2.21, + "learning_rate": 1.272302603733952e-05, + "loss": 0.254, + "step": 47445 + }, + { + "epoch": 2.21, + "learning_rate": 1.2722242252284736e-05, + "loss": 0.1418, + "step": 47450 + }, + { + "epoch": 2.21, + "learning_rate": 1.2721458467229949e-05, + "loss": 0.1771, + "step": 47455 + }, + { + "epoch": 2.21, + "learning_rate": 1.272067468217516e-05, + "loss": 0.2067, + "step": 47460 + }, + { + "epoch": 2.21, + "learning_rate": 1.2719890897120375e-05, + "loss": 0.302, + "step": 47465 + }, + { + "epoch": 2.22, + "learning_rate": 1.2719107112065587e-05, + "loss": 0.0339, + "step": 47470 + }, + { + "epoch": 2.22, + "learning_rate": 1.2718323327010803e-05, + "loss": 0.068, + "step": 47475 + }, + { + "epoch": 2.22, + "learning_rate": 1.2717539541956015e-05, + "loss": 0.101, + "step": 47480 + }, + { + "epoch": 2.22, + "learning_rate": 1.2716755756901229e-05, + "loss": 0.0721, + "step": 47485 + }, + { + "epoch": 2.22, + "learning_rate": 1.271597197184644e-05, + "loss": 0.0679, + "step": 47490 + }, + { + "epoch": 2.22, + "learning_rate": 1.2715188186791656e-05, + "loss": 0.155, + "step": 47495 + }, + { + "epoch": 2.22, + "learning_rate": 1.2714404401736869e-05, + "loss": 0.0995, + "step": 47500 + }, + { + "epoch": 2.22, + "learning_rate": 1.2713620616682083e-05, + "loss": 0.2467, + "step": 47505 + }, + { + "epoch": 2.22, + "learning_rate": 1.2712836831627295e-05, + "loss": 0.2413, + "step": 47510 + }, + { + "epoch": 2.22, + "learning_rate": 1.271205304657251e-05, + "loss": 0.2587, + "step": 47515 + }, + { + "epoch": 2.22, + "learning_rate": 1.2711269261517723e-05, + "loss": 0.0649, + "step": 47520 + }, + { + "epoch": 2.22, + "learning_rate": 1.2710485476462935e-05, + "loss": 0.0333, + "step": 47525 + }, + { + "epoch": 2.22, + "learning_rate": 1.2709701691408149e-05, + "loss": 0.0575, + "step": 47530 + }, + { + "epoch": 2.22, + "learning_rate": 1.2708917906353363e-05, + "loss": 0.1551, + "step": 47535 + }, + { + "epoch": 2.22, + "learning_rate": 1.2708134121298577e-05, + "loss": 0.2555, + "step": 47540 + }, + { + "epoch": 2.22, + "learning_rate": 1.2707350336243789e-05, + "loss": 0.0763, + "step": 47545 + }, + { + "epoch": 2.22, + "learning_rate": 1.2706566551189004e-05, + "loss": 0.1238, + "step": 47550 + }, + { + "epoch": 2.22, + "learning_rate": 1.2705782766134217e-05, + "loss": 0.1277, + "step": 47555 + }, + { + "epoch": 2.22, + "learning_rate": 1.270499898107943e-05, + "loss": 0.2487, + "step": 47560 + }, + { + "epoch": 2.22, + "learning_rate": 1.2704215196024643e-05, + "loss": 0.2534, + "step": 47565 + }, + { + "epoch": 2.22, + "learning_rate": 1.2703431410969858e-05, + "loss": 0.0228, + "step": 47570 + }, + { + "epoch": 2.22, + "learning_rate": 1.270264762591507e-05, + "loss": 0.0238, + "step": 47575 + }, + { + "epoch": 2.22, + "learning_rate": 1.2701863840860284e-05, + "loss": 0.0532, + "step": 47580 + }, + { + "epoch": 2.22, + "learning_rate": 1.2701080055805497e-05, + "loss": 0.1343, + "step": 47585 + }, + { + "epoch": 2.22, + "learning_rate": 1.2700296270750709e-05, + "loss": 0.1061, + "step": 47590 + }, + { + "epoch": 2.22, + "learning_rate": 1.2699512485695924e-05, + "loss": 0.1548, + "step": 47595 + }, + { + "epoch": 2.22, + "learning_rate": 1.2698728700641137e-05, + "loss": 0.2443, + "step": 47600 + }, + { + "epoch": 2.22, + "learning_rate": 1.269794491558635e-05, + "loss": 0.1995, + "step": 47605 + }, + { + "epoch": 2.22, + "learning_rate": 1.2697161130531563e-05, + "loss": 0.4085, + "step": 47610 + }, + { + "epoch": 2.22, + "learning_rate": 1.2696377345476778e-05, + "loss": 0.2198, + "step": 47615 + }, + { + "epoch": 2.22, + "learning_rate": 1.269559356042199e-05, + "loss": 0.0253, + "step": 47620 + }, + { + "epoch": 2.22, + "learning_rate": 1.2694809775367204e-05, + "loss": 0.0511, + "step": 47625 + }, + { + "epoch": 2.22, + "learning_rate": 1.2694025990312417e-05, + "loss": 0.0841, + "step": 47630 + }, + { + "epoch": 2.22, + "learning_rate": 1.2693242205257632e-05, + "loss": 0.0448, + "step": 47635 + }, + { + "epoch": 2.22, + "learning_rate": 1.2692458420202844e-05, + "loss": 0.0793, + "step": 47640 + }, + { + "epoch": 2.22, + "learning_rate": 1.2691674635148058e-05, + "loss": 0.0846, + "step": 47645 + }, + { + "epoch": 2.22, + "learning_rate": 1.2690890850093272e-05, + "loss": 0.1674, + "step": 47650 + }, + { + "epoch": 2.22, + "learning_rate": 1.2690107065038484e-05, + "loss": 0.129, + "step": 47655 + }, + { + "epoch": 2.22, + "learning_rate": 1.2689323279983698e-05, + "loss": 0.2383, + "step": 47660 + }, + { + "epoch": 2.22, + "learning_rate": 1.268853949492891e-05, + "loss": 0.3317, + "step": 47665 + }, + { + "epoch": 2.22, + "learning_rate": 1.2687755709874126e-05, + "loss": 0.0892, + "step": 47670 + }, + { + "epoch": 2.22, + "learning_rate": 1.2686971924819338e-05, + "loss": 0.0223, + "step": 47675 + }, + { + "epoch": 2.22, + "learning_rate": 1.2686188139764552e-05, + "loss": 0.077, + "step": 47680 + }, + { + "epoch": 2.23, + "learning_rate": 1.2685404354709765e-05, + "loss": 0.0558, + "step": 47685 + }, + { + "epoch": 2.23, + "learning_rate": 1.268462056965498e-05, + "loss": 0.0593, + "step": 47690 + }, + { + "epoch": 2.23, + "learning_rate": 1.2683836784600192e-05, + "loss": 0.0725, + "step": 47695 + }, + { + "epoch": 2.23, + "learning_rate": 1.2683052999545406e-05, + "loss": 0.2018, + "step": 47700 + }, + { + "epoch": 2.23, + "learning_rate": 1.2682269214490618e-05, + "loss": 0.1803, + "step": 47705 + }, + { + "epoch": 2.23, + "learning_rate": 1.2681485429435834e-05, + "loss": 0.2223, + "step": 47710 + }, + { + "epoch": 2.23, + "learning_rate": 1.2680701644381046e-05, + "loss": 0.4095, + "step": 47715 + }, + { + "epoch": 2.23, + "learning_rate": 1.2679917859326258e-05, + "loss": 0.0528, + "step": 47720 + }, + { + "epoch": 2.23, + "learning_rate": 1.2679134074271472e-05, + "loss": 0.0447, + "step": 47725 + }, + { + "epoch": 2.23, + "learning_rate": 1.2678350289216685e-05, + "loss": 0.0256, + "step": 47730 + }, + { + "epoch": 2.23, + "learning_rate": 1.26775665041619e-05, + "loss": 0.0814, + "step": 47735 + }, + { + "epoch": 2.23, + "learning_rate": 1.2676782719107112e-05, + "loss": 0.1283, + "step": 47740 + }, + { + "epoch": 2.23, + "learning_rate": 1.2675998934052326e-05, + "loss": 0.0823, + "step": 47745 + }, + { + "epoch": 2.23, + "learning_rate": 1.267521514899754e-05, + "loss": 0.0804, + "step": 47750 + }, + { + "epoch": 2.23, + "learning_rate": 1.2674431363942754e-05, + "loss": 0.1365, + "step": 47755 + }, + { + "epoch": 2.23, + "learning_rate": 1.2673647578887966e-05, + "loss": 0.2962, + "step": 47760 + }, + { + "epoch": 2.23, + "learning_rate": 1.2672863793833182e-05, + "loss": 0.3267, + "step": 47765 + }, + { + "epoch": 2.23, + "learning_rate": 1.2672080008778394e-05, + "loss": 0.0293, + "step": 47770 + }, + { + "epoch": 2.23, + "learning_rate": 1.2671296223723608e-05, + "loss": 0.0746, + "step": 47775 + }, + { + "epoch": 2.23, + "learning_rate": 1.267051243866882e-05, + "loss": 0.0665, + "step": 47780 + }, + { + "epoch": 2.23, + "learning_rate": 1.2669728653614032e-05, + "loss": 0.0971, + "step": 47785 + }, + { + "epoch": 2.23, + "learning_rate": 1.2668944868559248e-05, + "loss": 0.0418, + "step": 47790 + }, + { + "epoch": 2.23, + "learning_rate": 1.266816108350446e-05, + "loss": 0.1123, + "step": 47795 + }, + { + "epoch": 2.23, + "learning_rate": 1.2667377298449674e-05, + "loss": 0.0995, + "step": 47800 + }, + { + "epoch": 2.23, + "learning_rate": 1.2666593513394886e-05, + "loss": 0.1129, + "step": 47805 + }, + { + "epoch": 2.23, + "learning_rate": 1.2665809728340102e-05, + "loss": 0.2582, + "step": 47810 + }, + { + "epoch": 2.23, + "learning_rate": 1.2665025943285314e-05, + "loss": 0.3506, + "step": 47815 + }, + { + "epoch": 2.23, + "learning_rate": 1.2664242158230528e-05, + "loss": 0.0258, + "step": 47820 + }, + { + "epoch": 2.23, + "learning_rate": 1.266345837317574e-05, + "loss": 0.0343, + "step": 47825 + }, + { + "epoch": 2.23, + "learning_rate": 1.2662674588120956e-05, + "loss": 0.0366, + "step": 47830 + }, + { + "epoch": 2.23, + "learning_rate": 1.2661890803066168e-05, + "loss": 0.1132, + "step": 47835 + }, + { + "epoch": 2.23, + "learning_rate": 1.2661107018011382e-05, + "loss": 0.0819, + "step": 47840 + }, + { + "epoch": 2.23, + "learning_rate": 1.2660323232956594e-05, + "loss": 0.1235, + "step": 47845 + }, + { + "epoch": 2.23, + "learning_rate": 1.2659539447901808e-05, + "loss": 0.1482, + "step": 47850 + }, + { + "epoch": 2.23, + "learning_rate": 1.2658755662847022e-05, + "loss": 0.1346, + "step": 47855 + }, + { + "epoch": 2.23, + "learning_rate": 1.2657971877792234e-05, + "loss": 0.1981, + "step": 47860 + }, + { + "epoch": 2.23, + "learning_rate": 1.265718809273745e-05, + "loss": 0.2022, + "step": 47865 + }, + { + "epoch": 2.23, + "learning_rate": 1.2656404307682662e-05, + "loss": 0.0667, + "step": 47870 + }, + { + "epoch": 2.23, + "learning_rate": 1.2655620522627876e-05, + "loss": 0.0178, + "step": 47875 + }, + { + "epoch": 2.23, + "learning_rate": 1.2654836737573088e-05, + "loss": 0.1099, + "step": 47880 + }, + { + "epoch": 2.23, + "learning_rate": 1.2654052952518304e-05, + "loss": 0.0823, + "step": 47885 + }, + { + "epoch": 2.23, + "learning_rate": 1.2653269167463516e-05, + "loss": 0.0406, + "step": 47890 + }, + { + "epoch": 2.23, + "learning_rate": 1.265248538240873e-05, + "loss": 0.1551, + "step": 47895 + }, + { + "epoch": 2.24, + "learning_rate": 1.2651701597353942e-05, + "loss": 0.1111, + "step": 47900 + }, + { + "epoch": 2.24, + "learning_rate": 1.2650917812299158e-05, + "loss": 0.1663, + "step": 47905 + }, + { + "epoch": 2.24, + "learning_rate": 1.265013402724437e-05, + "loss": 0.2386, + "step": 47910 + }, + { + "epoch": 2.24, + "learning_rate": 1.2649350242189582e-05, + "loss": 0.2496, + "step": 47915 + }, + { + "epoch": 2.24, + "learning_rate": 1.2648566457134796e-05, + "loss": 0.0195, + "step": 47920 + }, + { + "epoch": 2.24, + "learning_rate": 1.2647782672080008e-05, + "loss": 0.0671, + "step": 47925 + }, + { + "epoch": 2.24, + "learning_rate": 1.2646998887025224e-05, + "loss": 0.0558, + "step": 47930 + }, + { + "epoch": 2.24, + "learning_rate": 1.2646215101970436e-05, + "loss": 0.0917, + "step": 47935 + }, + { + "epoch": 2.24, + "learning_rate": 1.264543131691565e-05, + "loss": 0.1752, + "step": 47940 + }, + { + "epoch": 2.24, + "learning_rate": 1.2644647531860862e-05, + "loss": 0.1103, + "step": 47945 + }, + { + "epoch": 2.24, + "learning_rate": 1.2643863746806078e-05, + "loss": 0.1002, + "step": 47950 + }, + { + "epoch": 2.24, + "learning_rate": 1.264307996175129e-05, + "loss": 0.1335, + "step": 47955 + }, + { + "epoch": 2.24, + "learning_rate": 1.2642296176696504e-05, + "loss": 0.3335, + "step": 47960 + }, + { + "epoch": 2.24, + "learning_rate": 1.2641512391641718e-05, + "loss": 0.3093, + "step": 47965 + }, + { + "epoch": 2.24, + "learning_rate": 1.2640728606586932e-05, + "loss": 0.0686, + "step": 47970 + }, + { + "epoch": 2.24, + "learning_rate": 1.2639944821532144e-05, + "loss": 0.0387, + "step": 47975 + }, + { + "epoch": 2.24, + "learning_rate": 1.2639161036477356e-05, + "loss": 0.0357, + "step": 47980 + }, + { + "epoch": 2.24, + "learning_rate": 1.2638377251422572e-05, + "loss": 0.0849, + "step": 47985 + }, + { + "epoch": 2.24, + "learning_rate": 1.2637593466367784e-05, + "loss": 0.1276, + "step": 47990 + }, + { + "epoch": 2.24, + "learning_rate": 1.2636809681312998e-05, + "loss": 0.1694, + "step": 47995 + }, + { + "epoch": 2.24, + "learning_rate": 1.263602589625821e-05, + "loss": 0.1722, + "step": 48000 + }, + { + "epoch": 2.24, + "learning_rate": 1.2635242111203426e-05, + "loss": 0.1639, + "step": 48005 + }, + { + "epoch": 2.24, + "learning_rate": 1.2634458326148638e-05, + "loss": 0.326, + "step": 48010 + }, + { + "epoch": 2.24, + "learning_rate": 1.2633674541093852e-05, + "loss": 0.3992, + "step": 48015 + }, + { + "epoch": 2.24, + "learning_rate": 1.2632890756039064e-05, + "loss": 0.0436, + "step": 48020 + }, + { + "epoch": 2.24, + "learning_rate": 1.263210697098428e-05, + "loss": 0.0531, + "step": 48025 + }, + { + "epoch": 2.24, + "learning_rate": 1.2631323185929492e-05, + "loss": 0.0581, + "step": 48030 + }, + { + "epoch": 2.24, + "learning_rate": 1.2630539400874706e-05, + "loss": 0.0561, + "step": 48035 + }, + { + "epoch": 2.24, + "learning_rate": 1.2629755615819918e-05, + "loss": 0.1014, + "step": 48040 + }, + { + "epoch": 2.24, + "learning_rate": 1.262897183076513e-05, + "loss": 0.1809, + "step": 48045 + }, + { + "epoch": 2.24, + "learning_rate": 1.2628188045710346e-05, + "loss": 0.1019, + "step": 48050 + }, + { + "epoch": 2.24, + "learning_rate": 1.2627404260655558e-05, + "loss": 0.1508, + "step": 48055 + }, + { + "epoch": 2.24, + "learning_rate": 1.2626620475600772e-05, + "loss": 0.309, + "step": 48060 + }, + { + "epoch": 2.24, + "learning_rate": 1.2625836690545986e-05, + "loss": 0.2341, + "step": 48065 + }, + { + "epoch": 2.24, + "learning_rate": 1.26250529054912e-05, + "loss": 0.0517, + "step": 48070 + }, + { + "epoch": 2.24, + "learning_rate": 1.2624269120436412e-05, + "loss": 0.039, + "step": 48075 + }, + { + "epoch": 2.24, + "learning_rate": 1.2623485335381628e-05, + "loss": 0.0392, + "step": 48080 + }, + { + "epoch": 2.24, + "learning_rate": 1.262270155032684e-05, + "loss": 0.0337, + "step": 48085 + }, + { + "epoch": 2.24, + "learning_rate": 1.2621917765272054e-05, + "loss": 0.0574, + "step": 48090 + }, + { + "epoch": 2.24, + "learning_rate": 1.2621133980217266e-05, + "loss": 0.0952, + "step": 48095 + }, + { + "epoch": 2.24, + "learning_rate": 1.2620350195162481e-05, + "loss": 0.1344, + "step": 48100 + }, + { + "epoch": 2.24, + "learning_rate": 1.2619566410107694e-05, + "loss": 0.235, + "step": 48105 + }, + { + "epoch": 2.24, + "learning_rate": 1.2618782625052906e-05, + "loss": 0.2454, + "step": 48110 + }, + { + "epoch": 2.25, + "learning_rate": 1.261799883999812e-05, + "loss": 0.26, + "step": 48115 + }, + { + "epoch": 2.25, + "learning_rate": 1.2617215054943332e-05, + "loss": 0.0369, + "step": 48120 + }, + { + "epoch": 2.25, + "learning_rate": 1.2616431269888548e-05, + "loss": 0.0334, + "step": 48125 + }, + { + "epoch": 2.25, + "learning_rate": 1.261564748483376e-05, + "loss": 0.0459, + "step": 48130 + }, + { + "epoch": 2.25, + "learning_rate": 1.2614863699778974e-05, + "loss": 0.0905, + "step": 48135 + }, + { + "epoch": 2.25, + "learning_rate": 1.2614079914724186e-05, + "loss": 0.1017, + "step": 48140 + }, + { + "epoch": 2.25, + "learning_rate": 1.2613296129669402e-05, + "loss": 0.1289, + "step": 48145 + }, + { + "epoch": 2.25, + "learning_rate": 1.2612512344614614e-05, + "loss": 0.1654, + "step": 48150 + }, + { + "epoch": 2.25, + "learning_rate": 1.2611728559559828e-05, + "loss": 0.1671, + "step": 48155 + }, + { + "epoch": 2.25, + "learning_rate": 1.261094477450504e-05, + "loss": 0.2027, + "step": 48160 + }, + { + "epoch": 2.25, + "learning_rate": 1.2610160989450255e-05, + "loss": 0.2967, + "step": 48165 + }, + { + "epoch": 2.25, + "learning_rate": 1.2609377204395468e-05, + "loss": 0.0546, + "step": 48170 + }, + { + "epoch": 2.25, + "learning_rate": 1.260859341934068e-05, + "loss": 0.0423, + "step": 48175 + }, + { + "epoch": 2.25, + "learning_rate": 1.2607809634285895e-05, + "loss": 0.0787, + "step": 48180 + }, + { + "epoch": 2.25, + "learning_rate": 1.2607025849231108e-05, + "loss": 0.1554, + "step": 48185 + }, + { + "epoch": 2.25, + "learning_rate": 1.2606242064176322e-05, + "loss": 0.0672, + "step": 48190 + }, + { + "epoch": 2.25, + "learning_rate": 1.2605458279121534e-05, + "loss": 0.1474, + "step": 48195 + }, + { + "epoch": 2.25, + "learning_rate": 1.260467449406675e-05, + "loss": 0.0994, + "step": 48200 + }, + { + "epoch": 2.25, + "learning_rate": 1.2603890709011962e-05, + "loss": 0.1914, + "step": 48205 + }, + { + "epoch": 2.25, + "learning_rate": 1.2603106923957176e-05, + "loss": 0.2273, + "step": 48210 + }, + { + "epoch": 2.25, + "learning_rate": 1.2602323138902388e-05, + "loss": 0.3731, + "step": 48215 + }, + { + "epoch": 2.25, + "learning_rate": 1.2601539353847603e-05, + "loss": 0.0643, + "step": 48220 + }, + { + "epoch": 2.25, + "learning_rate": 1.2600755568792816e-05, + "loss": 0.0238, + "step": 48225 + }, + { + "epoch": 2.25, + "learning_rate": 1.259997178373803e-05, + "loss": 0.0542, + "step": 48230 + }, + { + "epoch": 2.25, + "learning_rate": 1.2599187998683242e-05, + "loss": 0.0509, + "step": 48235 + }, + { + "epoch": 2.25, + "learning_rate": 1.2598404213628454e-05, + "loss": 0.1615, + "step": 48240 + }, + { + "epoch": 2.25, + "learning_rate": 1.259762042857367e-05, + "loss": 0.0994, + "step": 48245 + }, + { + "epoch": 2.25, + "learning_rate": 1.2596836643518882e-05, + "loss": 0.1109, + "step": 48250 + }, + { + "epoch": 2.25, + "learning_rate": 1.2596052858464096e-05, + "loss": 0.1553, + "step": 48255 + }, + { + "epoch": 2.25, + "learning_rate": 1.2595269073409308e-05, + "loss": 0.3124, + "step": 48260 + }, + { + "epoch": 2.25, + "learning_rate": 1.2594485288354523e-05, + "loss": 0.2899, + "step": 48265 + }, + { + "epoch": 2.25, + "learning_rate": 1.2593701503299736e-05, + "loss": 0.0501, + "step": 48270 + }, + { + "epoch": 2.25, + "learning_rate": 1.259291771824495e-05, + "loss": 0.0855, + "step": 48275 + }, + { + "epoch": 2.25, + "learning_rate": 1.2592133933190163e-05, + "loss": 0.0823, + "step": 48280 + }, + { + "epoch": 2.25, + "learning_rate": 1.2591350148135377e-05, + "loss": 0.145, + "step": 48285 + }, + { + "epoch": 2.25, + "learning_rate": 1.259056636308059e-05, + "loss": 0.0703, + "step": 48290 + }, + { + "epoch": 2.25, + "learning_rate": 1.2589782578025805e-05, + "loss": 0.143, + "step": 48295 + }, + { + "epoch": 2.25, + "learning_rate": 1.2588998792971017e-05, + "loss": 0.1223, + "step": 48300 + }, + { + "epoch": 2.25, + "learning_rate": 1.258821500791623e-05, + "loss": 0.1272, + "step": 48305 + }, + { + "epoch": 2.25, + "learning_rate": 1.2587431222861443e-05, + "loss": 0.1794, + "step": 48310 + }, + { + "epoch": 2.25, + "learning_rate": 1.2586647437806656e-05, + "loss": 0.2009, + "step": 48315 + }, + { + "epoch": 2.25, + "learning_rate": 1.2585863652751871e-05, + "loss": 0.0941, + "step": 48320 + }, + { + "epoch": 2.25, + "learning_rate": 1.2585079867697083e-05, + "loss": 0.0456, + "step": 48325 + }, + { + "epoch": 2.26, + "learning_rate": 1.2584296082642297e-05, + "loss": 0.0458, + "step": 48330 + }, + { + "epoch": 2.26, + "learning_rate": 1.258351229758751e-05, + "loss": 0.0858, + "step": 48335 + }, + { + "epoch": 2.26, + "learning_rate": 1.2582728512532725e-05, + "loss": 0.1233, + "step": 48340 + }, + { + "epoch": 2.26, + "learning_rate": 1.2581944727477937e-05, + "loss": 0.0849, + "step": 48345 + }, + { + "epoch": 2.26, + "learning_rate": 1.2581160942423151e-05, + "loss": 0.1166, + "step": 48350 + }, + { + "epoch": 2.26, + "learning_rate": 1.2580377157368364e-05, + "loss": 0.2759, + "step": 48355 + }, + { + "epoch": 2.26, + "learning_rate": 1.2579593372313579e-05, + "loss": 0.2935, + "step": 48360 + }, + { + "epoch": 2.26, + "learning_rate": 1.2578809587258791e-05, + "loss": 0.3687, + "step": 48365 + }, + { + "epoch": 2.26, + "learning_rate": 1.2578025802204004e-05, + "loss": 0.0169, + "step": 48370 + }, + { + "epoch": 2.26, + "learning_rate": 1.2577242017149217e-05, + "loss": 0.0272, + "step": 48375 + }, + { + "epoch": 2.26, + "learning_rate": 1.2576458232094431e-05, + "loss": 0.0452, + "step": 48380 + }, + { + "epoch": 2.26, + "learning_rate": 1.2575674447039645e-05, + "loss": 0.0606, + "step": 48385 + }, + { + "epoch": 2.26, + "learning_rate": 1.2574890661984857e-05, + "loss": 0.1621, + "step": 48390 + }, + { + "epoch": 2.26, + "learning_rate": 1.2574106876930073e-05, + "loss": 0.0805, + "step": 48395 + }, + { + "epoch": 2.26, + "learning_rate": 1.2573323091875285e-05, + "loss": 0.1328, + "step": 48400 + }, + { + "epoch": 2.26, + "learning_rate": 1.25725393068205e-05, + "loss": 0.1362, + "step": 48405 + }, + { + "epoch": 2.26, + "learning_rate": 1.2571755521765711e-05, + "loss": 0.1753, + "step": 48410 + }, + { + "epoch": 2.26, + "learning_rate": 1.2570971736710927e-05, + "loss": 0.2823, + "step": 48415 + }, + { + "epoch": 2.26, + "learning_rate": 1.257018795165614e-05, + "loss": 0.0633, + "step": 48420 + }, + { + "epoch": 2.26, + "learning_rate": 1.2569404166601353e-05, + "loss": 0.0347, + "step": 48425 + }, + { + "epoch": 2.26, + "learning_rate": 1.2568620381546565e-05, + "loss": 0.0578, + "step": 48430 + }, + { + "epoch": 2.26, + "learning_rate": 1.2567836596491778e-05, + "loss": 0.0631, + "step": 48435 + }, + { + "epoch": 2.26, + "learning_rate": 1.2567052811436993e-05, + "loss": 0.0828, + "step": 48440 + }, + { + "epoch": 2.26, + "learning_rate": 1.2566269026382205e-05, + "loss": 0.1166, + "step": 48445 + }, + { + "epoch": 2.26, + "learning_rate": 1.256548524132742e-05, + "loss": 0.1153, + "step": 48450 + }, + { + "epoch": 2.26, + "learning_rate": 1.2564701456272631e-05, + "loss": 0.1634, + "step": 48455 + }, + { + "epoch": 2.26, + "learning_rate": 1.2563917671217847e-05, + "loss": 0.2301, + "step": 48460 + }, + { + "epoch": 2.26, + "learning_rate": 1.256313388616306e-05, + "loss": 0.2951, + "step": 48465 + }, + { + "epoch": 2.26, + "learning_rate": 1.2562350101108273e-05, + "loss": 0.046, + "step": 48470 + }, + { + "epoch": 2.26, + "learning_rate": 1.2561566316053485e-05, + "loss": 0.0414, + "step": 48475 + }, + { + "epoch": 2.26, + "learning_rate": 1.2560782530998701e-05, + "loss": 0.0748, + "step": 48480 + }, + { + "epoch": 2.26, + "learning_rate": 1.2559998745943913e-05, + "loss": 0.0706, + "step": 48485 + }, + { + "epoch": 2.26, + "learning_rate": 1.2559214960889127e-05, + "loss": 0.1279, + "step": 48490 + }, + { + "epoch": 2.26, + "learning_rate": 1.2558431175834341e-05, + "loss": 0.0743, + "step": 48495 + }, + { + "epoch": 2.26, + "learning_rate": 1.2557647390779553e-05, + "loss": 0.1205, + "step": 48500 + }, + { + "epoch": 2.26, + "learning_rate": 1.2556863605724767e-05, + "loss": 0.1911, + "step": 48505 + }, + { + "epoch": 2.26, + "learning_rate": 1.255607982066998e-05, + "loss": 0.1648, + "step": 48510 + }, + { + "epoch": 2.26, + "learning_rate": 1.2555296035615195e-05, + "loss": 0.2687, + "step": 48515 + }, + { + "epoch": 2.26, + "learning_rate": 1.2554512250560407e-05, + "loss": 0.0847, + "step": 48520 + }, + { + "epoch": 2.26, + "learning_rate": 1.2553728465505621e-05, + "loss": 0.1152, + "step": 48525 + }, + { + "epoch": 2.26, + "learning_rate": 1.2552944680450833e-05, + "loss": 0.0977, + "step": 48530 + }, + { + "epoch": 2.26, + "learning_rate": 1.2552160895396049e-05, + "loss": 0.0585, + "step": 48535 + }, + { + "epoch": 2.26, + "learning_rate": 1.2551377110341261e-05, + "loss": 0.1386, + "step": 48540 + }, + { + "epoch": 2.27, + "learning_rate": 1.2550593325286475e-05, + "loss": 0.1517, + "step": 48545 + }, + { + "epoch": 2.27, + "learning_rate": 1.2549809540231687e-05, + "loss": 0.0852, + "step": 48550 + }, + { + "epoch": 2.27, + "learning_rate": 1.2549025755176903e-05, + "loss": 0.1604, + "step": 48555 + }, + { + "epoch": 2.27, + "learning_rate": 1.2548241970122115e-05, + "loss": 0.3075, + "step": 48560 + }, + { + "epoch": 2.27, + "learning_rate": 1.2547458185067327e-05, + "loss": 0.3901, + "step": 48565 + }, + { + "epoch": 2.27, + "learning_rate": 1.2546674400012541e-05, + "loss": 0.0896, + "step": 48570 + }, + { + "epoch": 2.27, + "learning_rate": 1.2545890614957753e-05, + "loss": 0.0526, + "step": 48575 + }, + { + "epoch": 2.27, + "learning_rate": 1.2545106829902969e-05, + "loss": 0.0586, + "step": 48580 + }, + { + "epoch": 2.27, + "learning_rate": 1.2544323044848181e-05, + "loss": 0.0928, + "step": 48585 + }, + { + "epoch": 2.27, + "learning_rate": 1.2543539259793395e-05, + "loss": 0.1047, + "step": 48590 + }, + { + "epoch": 2.27, + "learning_rate": 1.2542755474738609e-05, + "loss": 0.144, + "step": 48595 + }, + { + "epoch": 2.27, + "learning_rate": 1.2541971689683823e-05, + "loss": 0.1413, + "step": 48600 + }, + { + "epoch": 2.27, + "learning_rate": 1.2541187904629035e-05, + "loss": 0.1345, + "step": 48605 + }, + { + "epoch": 2.27, + "learning_rate": 1.254040411957425e-05, + "loss": 0.2603, + "step": 48610 + }, + { + "epoch": 2.27, + "learning_rate": 1.2539620334519463e-05, + "loss": 0.3291, + "step": 48615 + }, + { + "epoch": 2.27, + "learning_rate": 1.2538836549464677e-05, + "loss": 0.0751, + "step": 48620 + }, + { + "epoch": 2.27, + "learning_rate": 1.2538052764409889e-05, + "loss": 0.0066, + "step": 48625 + }, + { + "epoch": 2.27, + "learning_rate": 1.2537268979355101e-05, + "loss": 0.076, + "step": 48630 + }, + { + "epoch": 2.27, + "learning_rate": 1.2536485194300317e-05, + "loss": 0.131, + "step": 48635 + }, + { + "epoch": 2.27, + "learning_rate": 1.2535701409245529e-05, + "loss": 0.0862, + "step": 48640 + }, + { + "epoch": 2.27, + "learning_rate": 1.2534917624190743e-05, + "loss": 0.0589, + "step": 48645 + }, + { + "epoch": 2.27, + "learning_rate": 1.2534133839135955e-05, + "loss": 0.1927, + "step": 48650 + }, + { + "epoch": 2.27, + "learning_rate": 1.253335005408117e-05, + "loss": 0.103, + "step": 48655 + }, + { + "epoch": 2.27, + "learning_rate": 1.2532566269026383e-05, + "loss": 0.305, + "step": 48660 + }, + { + "epoch": 2.27, + "learning_rate": 1.2531782483971597e-05, + "loss": 0.3637, + "step": 48665 + }, + { + "epoch": 2.27, + "learning_rate": 1.2530998698916809e-05, + "loss": 0.0596, + "step": 48670 + }, + { + "epoch": 2.27, + "learning_rate": 1.2530214913862025e-05, + "loss": 0.0414, + "step": 48675 + }, + { + "epoch": 2.27, + "learning_rate": 1.2529431128807237e-05, + "loss": 0.0615, + "step": 48680 + }, + { + "epoch": 2.27, + "learning_rate": 1.252864734375245e-05, + "loss": 0.1236, + "step": 48685 + }, + { + "epoch": 2.27, + "learning_rate": 1.2527863558697663e-05, + "loss": 0.1301, + "step": 48690 + }, + { + "epoch": 2.27, + "learning_rate": 1.2527079773642877e-05, + "loss": 0.0993, + "step": 48695 + }, + { + "epoch": 2.27, + "learning_rate": 1.252629598858809e-05, + "loss": 0.1616, + "step": 48700 + }, + { + "epoch": 2.27, + "learning_rate": 1.2525512203533303e-05, + "loss": 0.1623, + "step": 48705 + }, + { + "epoch": 2.27, + "learning_rate": 1.2524728418478519e-05, + "loss": 0.3241, + "step": 48710 + }, + { + "epoch": 2.27, + "learning_rate": 1.2523944633423731e-05, + "loss": 0.2883, + "step": 48715 + }, + { + "epoch": 2.27, + "learning_rate": 1.2523160848368945e-05, + "loss": 0.1131, + "step": 48720 + }, + { + "epoch": 2.27, + "learning_rate": 1.2522377063314157e-05, + "loss": 0.0587, + "step": 48725 + }, + { + "epoch": 2.27, + "learning_rate": 1.2521593278259373e-05, + "loss": 0.0835, + "step": 48730 + }, + { + "epoch": 2.27, + "learning_rate": 1.2520809493204585e-05, + "loss": 0.0798, + "step": 48735 + }, + { + "epoch": 2.27, + "learning_rate": 1.2520025708149799e-05, + "loss": 0.112, + "step": 48740 + }, + { + "epoch": 2.27, + "learning_rate": 1.2519241923095011e-05, + "loss": 0.0651, + "step": 48745 + }, + { + "epoch": 2.27, + "learning_rate": 1.2518458138040227e-05, + "loss": 0.248, + "step": 48750 + }, + { + "epoch": 2.27, + "learning_rate": 1.2517674352985439e-05, + "loss": 0.2368, + "step": 48755 + }, + { + "epoch": 2.28, + "learning_rate": 1.2516890567930651e-05, + "loss": 0.1879, + "step": 48760 + }, + { + "epoch": 2.28, + "learning_rate": 1.2516106782875865e-05, + "loss": 0.267, + "step": 48765 + }, + { + "epoch": 2.28, + "learning_rate": 1.2515322997821077e-05, + "loss": 0.0323, + "step": 48770 + }, + { + "epoch": 2.28, + "learning_rate": 1.2514539212766293e-05, + "loss": 0.0736, + "step": 48775 + }, + { + "epoch": 2.28, + "learning_rate": 1.2513755427711505e-05, + "loss": 0.0625, + "step": 48780 + }, + { + "epoch": 2.28, + "learning_rate": 1.2512971642656719e-05, + "loss": 0.089, + "step": 48785 + }, + { + "epoch": 2.28, + "learning_rate": 1.2512187857601931e-05, + "loss": 0.0695, + "step": 48790 + }, + { + "epoch": 2.28, + "learning_rate": 1.2511404072547147e-05, + "loss": 0.1172, + "step": 48795 + }, + { + "epoch": 2.28, + "learning_rate": 1.2510620287492359e-05, + "loss": 0.2187, + "step": 48800 + }, + { + "epoch": 2.28, + "learning_rate": 1.2509836502437573e-05, + "loss": 0.1755, + "step": 48805 + }, + { + "epoch": 2.28, + "learning_rate": 1.2509052717382787e-05, + "loss": 0.3494, + "step": 48810 + }, + { + "epoch": 2.28, + "learning_rate": 1.2508268932328e-05, + "loss": 0.3253, + "step": 48815 + }, + { + "epoch": 2.28, + "learning_rate": 1.2507485147273213e-05, + "loss": 0.1197, + "step": 48820 + }, + { + "epoch": 2.28, + "learning_rate": 1.2506701362218425e-05, + "loss": 0.0672, + "step": 48825 + }, + { + "epoch": 2.28, + "learning_rate": 1.250591757716364e-05, + "loss": 0.0997, + "step": 48830 + }, + { + "epoch": 2.28, + "learning_rate": 1.2505133792108853e-05, + "loss": 0.0682, + "step": 48835 + }, + { + "epoch": 2.28, + "learning_rate": 1.2504350007054067e-05, + "loss": 0.0378, + "step": 48840 + }, + { + "epoch": 2.28, + "learning_rate": 1.2503566221999279e-05, + "loss": 0.0569, + "step": 48845 + }, + { + "epoch": 2.28, + "learning_rate": 1.2502782436944494e-05, + "loss": 0.0817, + "step": 48850 + }, + { + "epoch": 2.28, + "learning_rate": 1.2501998651889707e-05, + "loss": 0.1632, + "step": 48855 + }, + { + "epoch": 2.28, + "learning_rate": 1.250121486683492e-05, + "loss": 0.2643, + "step": 48860 + }, + { + "epoch": 2.28, + "learning_rate": 1.2500431081780133e-05, + "loss": 0.419, + "step": 48865 + }, + { + "epoch": 2.28, + "learning_rate": 1.2499647296725348e-05, + "loss": 0.0458, + "step": 48870 + }, + { + "epoch": 2.28, + "learning_rate": 1.249886351167056e-05, + "loss": 0.0664, + "step": 48875 + }, + { + "epoch": 2.28, + "learning_rate": 1.2498079726615774e-05, + "loss": 0.0878, + "step": 48880 + }, + { + "epoch": 2.28, + "learning_rate": 1.2497295941560987e-05, + "loss": 0.069, + "step": 48885 + }, + { + "epoch": 2.28, + "learning_rate": 1.2496512156506199e-05, + "loss": 0.1751, + "step": 48890 + }, + { + "epoch": 2.28, + "learning_rate": 1.2495728371451415e-05, + "loss": 0.1207, + "step": 48895 + }, + { + "epoch": 2.28, + "learning_rate": 1.2494944586396627e-05, + "loss": 0.1929, + "step": 48900 + }, + { + "epoch": 2.28, + "learning_rate": 1.249416080134184e-05, + "loss": 0.2412, + "step": 48905 + }, + { + "epoch": 2.28, + "learning_rate": 1.2493377016287055e-05, + "loss": 0.2499, + "step": 48910 + }, + { + "epoch": 2.28, + "learning_rate": 1.2492593231232268e-05, + "loss": 0.3758, + "step": 48915 + }, + { + "epoch": 2.28, + "learning_rate": 1.249180944617748e-05, + "loss": 0.0861, + "step": 48920 + }, + { + "epoch": 2.28, + "learning_rate": 1.2491025661122696e-05, + "loss": 0.051, + "step": 48925 + }, + { + "epoch": 2.28, + "learning_rate": 1.2490241876067908e-05, + "loss": 0.0319, + "step": 48930 + }, + { + "epoch": 2.28, + "learning_rate": 1.2489458091013122e-05, + "loss": 0.0455, + "step": 48935 + }, + { + "epoch": 2.28, + "learning_rate": 1.2488674305958335e-05, + "loss": 0.0583, + "step": 48940 + }, + { + "epoch": 2.28, + "learning_rate": 1.248789052090355e-05, + "loss": 0.0818, + "step": 48945 + }, + { + "epoch": 2.28, + "learning_rate": 1.2487106735848762e-05, + "loss": 0.1859, + "step": 48950 + }, + { + "epoch": 2.28, + "learning_rate": 1.2486322950793975e-05, + "loss": 0.1899, + "step": 48955 + }, + { + "epoch": 2.28, + "learning_rate": 1.2485539165739189e-05, + "loss": 0.4047, + "step": 48960 + }, + { + "epoch": 2.28, + "learning_rate": 1.24847553806844e-05, + "loss": 0.3151, + "step": 48965 + }, + { + "epoch": 2.29, + "learning_rate": 1.2483971595629616e-05, + "loss": 0.0545, + "step": 48970 + }, + { + "epoch": 2.29, + "learning_rate": 1.2483187810574829e-05, + "loss": 0.0155, + "step": 48975 + }, + { + "epoch": 2.29, + "learning_rate": 1.2482404025520042e-05, + "loss": 0.0754, + "step": 48980 + }, + { + "epoch": 2.29, + "learning_rate": 1.2481620240465255e-05, + "loss": 0.039, + "step": 48985 + }, + { + "epoch": 2.29, + "learning_rate": 1.248083645541047e-05, + "loss": 0.08, + "step": 48990 + }, + { + "epoch": 2.29, + "learning_rate": 1.2480052670355682e-05, + "loss": 0.1233, + "step": 48995 + }, + { + "epoch": 2.29, + "learning_rate": 1.2479268885300896e-05, + "loss": 0.1165, + "step": 49000 + }, + { + "epoch": 2.29, + "learning_rate": 1.2478485100246109e-05, + "loss": 0.1813, + "step": 49005 + }, + { + "epoch": 2.29, + "learning_rate": 1.2477701315191324e-05, + "loss": 0.2639, + "step": 49010 + }, + { + "epoch": 2.29, + "learning_rate": 1.2476917530136536e-05, + "loss": 0.3061, + "step": 49015 + }, + { + "epoch": 2.29, + "learning_rate": 1.2476133745081749e-05, + "loss": 0.0448, + "step": 49020 + }, + { + "epoch": 2.29, + "learning_rate": 1.2475349960026964e-05, + "loss": 0.0651, + "step": 49025 + }, + { + "epoch": 2.29, + "learning_rate": 1.2474566174972176e-05, + "loss": 0.0556, + "step": 49030 + }, + { + "epoch": 2.29, + "learning_rate": 1.247378238991739e-05, + "loss": 0.0711, + "step": 49035 + }, + { + "epoch": 2.29, + "learning_rate": 1.2472998604862603e-05, + "loss": 0.0944, + "step": 49040 + }, + { + "epoch": 2.29, + "learning_rate": 1.2472214819807818e-05, + "loss": 0.1723, + "step": 49045 + }, + { + "epoch": 2.29, + "learning_rate": 1.247143103475303e-05, + "loss": 0.1437, + "step": 49050 + }, + { + "epoch": 2.29, + "learning_rate": 1.2470647249698244e-05, + "loss": 0.1611, + "step": 49055 + }, + { + "epoch": 2.29, + "learning_rate": 1.2469863464643456e-05, + "loss": 0.2474, + "step": 49060 + }, + { + "epoch": 2.29, + "learning_rate": 1.2469079679588672e-05, + "loss": 0.3262, + "step": 49065 + }, + { + "epoch": 2.29, + "learning_rate": 1.2468295894533884e-05, + "loss": 0.0379, + "step": 49070 + }, + { + "epoch": 2.29, + "learning_rate": 1.2467512109479098e-05, + "loss": 0.0395, + "step": 49075 + }, + { + "epoch": 2.29, + "learning_rate": 1.246672832442431e-05, + "loss": 0.0432, + "step": 49080 + }, + { + "epoch": 2.29, + "learning_rate": 1.2465944539369523e-05, + "loss": 0.075, + "step": 49085 + }, + { + "epoch": 2.29, + "learning_rate": 1.2465160754314738e-05, + "loss": 0.051, + "step": 49090 + }, + { + "epoch": 2.29, + "learning_rate": 1.246437696925995e-05, + "loss": 0.1434, + "step": 49095 + }, + { + "epoch": 2.29, + "learning_rate": 1.2463593184205164e-05, + "loss": 0.1986, + "step": 49100 + }, + { + "epoch": 2.29, + "learning_rate": 1.2462809399150377e-05, + "loss": 0.1596, + "step": 49105 + }, + { + "epoch": 2.29, + "learning_rate": 1.2462025614095592e-05, + "loss": 0.3813, + "step": 49110 + }, + { + "epoch": 2.29, + "learning_rate": 1.2461241829040804e-05, + "loss": 0.2625, + "step": 49115 + }, + { + "epoch": 2.29, + "learning_rate": 1.2460458043986018e-05, + "loss": 0.0344, + "step": 49120 + }, + { + "epoch": 2.29, + "learning_rate": 1.2459674258931232e-05, + "loss": 0.034, + "step": 49125 + }, + { + "epoch": 2.29, + "learning_rate": 1.2458890473876446e-05, + "loss": 0.0638, + "step": 49130 + }, + { + "epoch": 2.29, + "learning_rate": 1.2458106688821658e-05, + "loss": 0.045, + "step": 49135 + }, + { + "epoch": 2.29, + "learning_rate": 1.2457322903766872e-05, + "loss": 0.0743, + "step": 49140 + }, + { + "epoch": 2.29, + "learning_rate": 1.2456539118712086e-05, + "loss": 0.1166, + "step": 49145 + }, + { + "epoch": 2.29, + "learning_rate": 1.2455755333657298e-05, + "loss": 0.1078, + "step": 49150 + }, + { + "epoch": 2.29, + "learning_rate": 1.2454971548602512e-05, + "loss": 0.1802, + "step": 49155 + }, + { + "epoch": 2.29, + "learning_rate": 1.2454187763547724e-05, + "loss": 0.3078, + "step": 49160 + }, + { + "epoch": 2.29, + "learning_rate": 1.245340397849294e-05, + "loss": 0.3489, + "step": 49165 + }, + { + "epoch": 2.29, + "learning_rate": 1.2452620193438152e-05, + "loss": 0.0332, + "step": 49170 + }, + { + "epoch": 2.29, + "learning_rate": 1.2451836408383366e-05, + "loss": 0.0592, + "step": 49175 + }, + { + "epoch": 2.29, + "learning_rate": 1.2451052623328578e-05, + "loss": 0.0291, + "step": 49180 + }, + { + "epoch": 2.3, + "learning_rate": 1.2450268838273794e-05, + "loss": 0.0695, + "step": 49185 + }, + { + "epoch": 2.3, + "learning_rate": 1.2449485053219006e-05, + "loss": 0.081, + "step": 49190 + }, + { + "epoch": 2.3, + "learning_rate": 1.244870126816422e-05, + "loss": 0.1311, + "step": 49195 + }, + { + "epoch": 2.3, + "learning_rate": 1.2447917483109432e-05, + "loss": 0.1058, + "step": 49200 + }, + { + "epoch": 2.3, + "learning_rate": 1.2447133698054648e-05, + "loss": 0.1388, + "step": 49205 + }, + { + "epoch": 2.3, + "learning_rate": 1.244634991299986e-05, + "loss": 0.3073, + "step": 49210 + }, + { + "epoch": 2.3, + "learning_rate": 1.2445566127945072e-05, + "loss": 0.2426, + "step": 49215 + }, + { + "epoch": 2.3, + "learning_rate": 1.2444782342890286e-05, + "loss": 0.0957, + "step": 49220 + }, + { + "epoch": 2.3, + "learning_rate": 1.24439985578355e-05, + "loss": 0.055, + "step": 49225 + }, + { + "epoch": 2.3, + "learning_rate": 1.2443214772780714e-05, + "loss": 0.0305, + "step": 49230 + }, + { + "epoch": 2.3, + "learning_rate": 1.2442430987725926e-05, + "loss": 0.1408, + "step": 49235 + }, + { + "epoch": 2.3, + "learning_rate": 1.2441647202671142e-05, + "loss": 0.0908, + "step": 49240 + }, + { + "epoch": 2.3, + "learning_rate": 1.2440863417616354e-05, + "loss": 0.0978, + "step": 49245 + }, + { + "epoch": 2.3, + "learning_rate": 1.2440079632561568e-05, + "loss": 0.1482, + "step": 49250 + }, + { + "epoch": 2.3, + "learning_rate": 1.243929584750678e-05, + "loss": 0.2039, + "step": 49255 + }, + { + "epoch": 2.3, + "learning_rate": 1.2438512062451996e-05, + "loss": 0.3077, + "step": 49260 + }, + { + "epoch": 2.3, + "learning_rate": 1.2437728277397208e-05, + "loss": 0.1668, + "step": 49265 + }, + { + "epoch": 2.3, + "learning_rate": 1.2436944492342422e-05, + "loss": 0.0675, + "step": 49270 + }, + { + "epoch": 2.3, + "learning_rate": 1.2436160707287634e-05, + "loss": 0.0325, + "step": 49275 + }, + { + "epoch": 2.3, + "learning_rate": 1.2435376922232846e-05, + "loss": 0.0352, + "step": 49280 + }, + { + "epoch": 2.3, + "learning_rate": 1.2434593137178062e-05, + "loss": 0.0273, + "step": 49285 + }, + { + "epoch": 2.3, + "learning_rate": 1.2433809352123274e-05, + "loss": 0.1357, + "step": 49290 + }, + { + "epoch": 2.3, + "learning_rate": 1.2433025567068488e-05, + "loss": 0.1806, + "step": 49295 + }, + { + "epoch": 2.3, + "learning_rate": 1.24322417820137e-05, + "loss": 0.1102, + "step": 49300 + }, + { + "epoch": 2.3, + "learning_rate": 1.2431457996958916e-05, + "loss": 0.2882, + "step": 49305 + }, + { + "epoch": 2.3, + "learning_rate": 1.2430674211904128e-05, + "loss": 0.301, + "step": 49310 + }, + { + "epoch": 2.3, + "learning_rate": 1.2429890426849342e-05, + "loss": 0.2907, + "step": 49315 + }, + { + "epoch": 2.3, + "learning_rate": 1.2429106641794554e-05, + "loss": 0.0655, + "step": 49320 + }, + { + "epoch": 2.3, + "learning_rate": 1.242832285673977e-05, + "loss": 0.0572, + "step": 49325 + }, + { + "epoch": 2.3, + "learning_rate": 1.2427539071684982e-05, + "loss": 0.0607, + "step": 49330 + }, + { + "epoch": 2.3, + "learning_rate": 1.2426755286630196e-05, + "loss": 0.0779, + "step": 49335 + }, + { + "epoch": 2.3, + "learning_rate": 1.242597150157541e-05, + "loss": 0.0683, + "step": 49340 + }, + { + "epoch": 2.3, + "learning_rate": 1.2425187716520622e-05, + "loss": 0.1186, + "step": 49345 + }, + { + "epoch": 2.3, + "learning_rate": 1.2424403931465836e-05, + "loss": 0.1542, + "step": 49350 + }, + { + "epoch": 2.3, + "learning_rate": 1.2423620146411048e-05, + "loss": 0.11, + "step": 49355 + }, + { + "epoch": 2.3, + "learning_rate": 1.2422836361356264e-05, + "loss": 0.2608, + "step": 49360 + }, + { + "epoch": 2.3, + "learning_rate": 1.2422052576301476e-05, + "loss": 0.3212, + "step": 49365 + }, + { + "epoch": 2.3, + "learning_rate": 1.242126879124669e-05, + "loss": 0.031, + "step": 49370 + }, + { + "epoch": 2.3, + "learning_rate": 1.2420485006191902e-05, + "loss": 0.0297, + "step": 49375 + }, + { + "epoch": 2.3, + "learning_rate": 1.2419701221137118e-05, + "loss": 0.1169, + "step": 49380 + }, + { + "epoch": 2.3, + "learning_rate": 1.241891743608233e-05, + "loss": 0.0557, + "step": 49385 + }, + { + "epoch": 2.3, + "learning_rate": 1.2418133651027544e-05, + "loss": 0.1283, + "step": 49390 + }, + { + "epoch": 2.3, + "learning_rate": 1.2417349865972756e-05, + "loss": 0.114, + "step": 49395 + }, + { + "epoch": 2.31, + "learning_rate": 1.2416566080917972e-05, + "loss": 0.0548, + "step": 49400 + }, + { + "epoch": 2.31, + "learning_rate": 1.2415782295863184e-05, + "loss": 0.1379, + "step": 49405 + }, + { + "epoch": 2.31, + "learning_rate": 1.2414998510808396e-05, + "loss": 0.2058, + "step": 49410 + }, + { + "epoch": 2.31, + "learning_rate": 1.241421472575361e-05, + "loss": 0.2836, + "step": 49415 + }, + { + "epoch": 2.31, + "learning_rate": 1.2413430940698822e-05, + "loss": 0.0669, + "step": 49420 + }, + { + "epoch": 2.31, + "learning_rate": 1.2412647155644038e-05, + "loss": 0.0242, + "step": 49425 + }, + { + "epoch": 2.31, + "learning_rate": 1.241186337058925e-05, + "loss": 0.0977, + "step": 49430 + }, + { + "epoch": 2.31, + "learning_rate": 1.2411079585534464e-05, + "loss": 0.0507, + "step": 49435 + }, + { + "epoch": 2.31, + "learning_rate": 1.2410295800479678e-05, + "loss": 0.063, + "step": 49440 + }, + { + "epoch": 2.31, + "learning_rate": 1.2409512015424892e-05, + "loss": 0.1527, + "step": 49445 + }, + { + "epoch": 2.31, + "learning_rate": 1.2408728230370104e-05, + "loss": 0.0736, + "step": 49450 + }, + { + "epoch": 2.31, + "learning_rate": 1.240794444531532e-05, + "loss": 0.2219, + "step": 49455 + }, + { + "epoch": 2.31, + "learning_rate": 1.2407160660260532e-05, + "loss": 0.2611, + "step": 49460 + }, + { + "epoch": 2.31, + "learning_rate": 1.2406376875205746e-05, + "loss": 0.3693, + "step": 49465 + }, + { + "epoch": 2.31, + "learning_rate": 1.2405593090150958e-05, + "loss": 0.1247, + "step": 49470 + }, + { + "epoch": 2.31, + "learning_rate": 1.240480930509617e-05, + "loss": 0.0177, + "step": 49475 + }, + { + "epoch": 2.31, + "learning_rate": 1.2404025520041386e-05, + "loss": 0.0873, + "step": 49480 + }, + { + "epoch": 2.31, + "learning_rate": 1.2403241734986598e-05, + "loss": 0.0888, + "step": 49485 + }, + { + "epoch": 2.31, + "learning_rate": 1.2402457949931812e-05, + "loss": 0.0582, + "step": 49490 + }, + { + "epoch": 2.31, + "learning_rate": 1.2401674164877024e-05, + "loss": 0.1075, + "step": 49495 + }, + { + "epoch": 2.31, + "learning_rate": 1.240089037982224e-05, + "loss": 0.1859, + "step": 49500 + }, + { + "epoch": 2.31, + "learning_rate": 1.2400106594767452e-05, + "loss": 0.1537, + "step": 49505 + }, + { + "epoch": 2.31, + "learning_rate": 1.2399322809712666e-05, + "loss": 0.2297, + "step": 49510 + }, + { + "epoch": 2.31, + "learning_rate": 1.2398695781668836e-05, + "loss": 0.2882, + "step": 49515 + }, + { + "epoch": 2.31, + "learning_rate": 1.239791199661405e-05, + "loss": 0.0205, + "step": 49520 + }, + { + "epoch": 2.31, + "learning_rate": 1.2397128211559264e-05, + "loss": 0.0498, + "step": 49525 + }, + { + "epoch": 2.31, + "learning_rate": 1.2396344426504476e-05, + "loss": 0.0858, + "step": 49530 + }, + { + "epoch": 2.31, + "learning_rate": 1.2395560641449692e-05, + "loss": 0.1835, + "step": 49535 + }, + { + "epoch": 2.31, + "learning_rate": 1.2394776856394904e-05, + "loss": 0.1171, + "step": 49540 + }, + { + "epoch": 2.31, + "learning_rate": 1.2393993071340116e-05, + "loss": 0.0876, + "step": 49545 + }, + { + "epoch": 2.31, + "learning_rate": 1.239320928628533e-05, + "loss": 0.0962, + "step": 49550 + }, + { + "epoch": 2.31, + "learning_rate": 1.2392425501230542e-05, + "loss": 0.0861, + "step": 49555 + }, + { + "epoch": 2.31, + "learning_rate": 1.2391641716175758e-05, + "loss": 0.2315, + "step": 49560 + }, + { + "epoch": 2.31, + "learning_rate": 1.239085793112097e-05, + "loss": 0.278, + "step": 49565 + }, + { + "epoch": 2.31, + "learning_rate": 1.2390074146066184e-05, + "loss": 0.0506, + "step": 49570 + }, + { + "epoch": 2.31, + "learning_rate": 1.2389290361011396e-05, + "loss": 0.0443, + "step": 49575 + }, + { + "epoch": 2.31, + "learning_rate": 1.2388506575956612e-05, + "loss": 0.1289, + "step": 49580 + }, + { + "epoch": 2.31, + "learning_rate": 1.2387722790901824e-05, + "loss": 0.0433, + "step": 49585 + }, + { + "epoch": 2.31, + "learning_rate": 1.2386939005847038e-05, + "loss": 0.1379, + "step": 49590 + }, + { + "epoch": 2.31, + "learning_rate": 1.238615522079225e-05, + "loss": 0.108, + "step": 49595 + }, + { + "epoch": 2.31, + "learning_rate": 1.2385371435737466e-05, + "loss": 0.1276, + "step": 49600 + }, + { + "epoch": 2.31, + "learning_rate": 1.2384587650682678e-05, + "loss": 0.1717, + "step": 49605 + }, + { + "epoch": 2.31, + "learning_rate": 1.238380386562789e-05, + "loss": 0.2584, + "step": 49610 + }, + { + "epoch": 2.32, + "learning_rate": 1.2383020080573106e-05, + "loss": 0.2265, + "step": 49615 + }, + { + "epoch": 2.32, + "learning_rate": 1.2382236295518318e-05, + "loss": 0.1069, + "step": 49620 + }, + { + "epoch": 2.32, + "learning_rate": 1.2381452510463532e-05, + "loss": 0.0404, + "step": 49625 + }, + { + "epoch": 2.32, + "learning_rate": 1.2380668725408744e-05, + "loss": 0.0704, + "step": 49630 + }, + { + "epoch": 2.32, + "learning_rate": 1.237988494035396e-05, + "loss": 0.0985, + "step": 49635 + }, + { + "epoch": 2.32, + "learning_rate": 1.2379101155299172e-05, + "loss": 0.084, + "step": 49640 + }, + { + "epoch": 2.32, + "learning_rate": 1.2378317370244386e-05, + "loss": 0.2373, + "step": 49645 + }, + { + "epoch": 2.32, + "learning_rate": 1.2377533585189598e-05, + "loss": 0.0689, + "step": 49650 + }, + { + "epoch": 2.32, + "learning_rate": 1.2376749800134814e-05, + "loss": 0.1609, + "step": 49655 + }, + { + "epoch": 2.32, + "learning_rate": 1.2375966015080026e-05, + "loss": 0.2887, + "step": 49660 + }, + { + "epoch": 2.32, + "learning_rate": 1.237518223002524e-05, + "loss": 0.2715, + "step": 49665 + }, + { + "epoch": 2.32, + "learning_rate": 1.2374398444970452e-05, + "loss": 0.0749, + "step": 49670 + }, + { + "epoch": 2.32, + "learning_rate": 1.2373614659915664e-05, + "loss": 0.0717, + "step": 49675 + }, + { + "epoch": 2.32, + "learning_rate": 1.237283087486088e-05, + "loss": 0.0777, + "step": 49680 + }, + { + "epoch": 2.32, + "learning_rate": 1.2372047089806092e-05, + "loss": 0.0669, + "step": 49685 + }, + { + "epoch": 2.32, + "learning_rate": 1.2371263304751306e-05, + "loss": 0.0836, + "step": 49690 + }, + { + "epoch": 2.32, + "learning_rate": 1.2370479519696518e-05, + "loss": 0.2553, + "step": 49695 + }, + { + "epoch": 2.32, + "learning_rate": 1.2369695734641734e-05, + "loss": 0.1672, + "step": 49700 + }, + { + "epoch": 2.32, + "learning_rate": 1.2368911949586946e-05, + "loss": 0.1128, + "step": 49705 + }, + { + "epoch": 2.32, + "learning_rate": 1.236812816453216e-05, + "loss": 0.2423, + "step": 49710 + }, + { + "epoch": 2.32, + "learning_rate": 1.2367344379477374e-05, + "loss": 0.3124, + "step": 49715 + }, + { + "epoch": 2.32, + "learning_rate": 1.2366560594422588e-05, + "loss": 0.0414, + "step": 49720 + }, + { + "epoch": 2.32, + "learning_rate": 1.23657768093678e-05, + "loss": 0.0981, + "step": 49725 + }, + { + "epoch": 2.32, + "learning_rate": 1.2364993024313014e-05, + "loss": 0.1131, + "step": 49730 + }, + { + "epoch": 2.32, + "learning_rate": 1.2364209239258228e-05, + "loss": 0.0668, + "step": 49735 + }, + { + "epoch": 2.32, + "learning_rate": 1.236342545420344e-05, + "loss": 0.1405, + "step": 49740 + }, + { + "epoch": 2.32, + "learning_rate": 1.2362641669148654e-05, + "loss": 0.0965, + "step": 49745 + }, + { + "epoch": 2.32, + "learning_rate": 1.2361857884093866e-05, + "loss": 0.1463, + "step": 49750 + }, + { + "epoch": 2.32, + "learning_rate": 1.2361074099039082e-05, + "loss": 0.2175, + "step": 49755 + }, + { + "epoch": 2.32, + "learning_rate": 1.2360290313984294e-05, + "loss": 0.1888, + "step": 49760 + }, + { + "epoch": 2.32, + "learning_rate": 1.2359506528929508e-05, + "loss": 0.3132, + "step": 49765 + }, + { + "epoch": 2.32, + "learning_rate": 1.235872274387472e-05, + "loss": 0.1664, + "step": 49770 + }, + { + "epoch": 2.32, + "learning_rate": 1.2357938958819935e-05, + "loss": 0.0261, + "step": 49775 + }, + { + "epoch": 2.32, + "learning_rate": 1.2357155173765148e-05, + "loss": 0.015, + "step": 49780 + }, + { + "epoch": 2.32, + "learning_rate": 1.2356371388710362e-05, + "loss": 0.0923, + "step": 49785 + }, + { + "epoch": 2.32, + "learning_rate": 1.2355587603655574e-05, + "loss": 0.0871, + "step": 49790 + }, + { + "epoch": 2.32, + "learning_rate": 1.235480381860079e-05, + "loss": 0.1367, + "step": 49795 + }, + { + "epoch": 2.32, + "learning_rate": 1.2354020033546002e-05, + "loss": 0.1528, + "step": 49800 + }, + { + "epoch": 2.32, + "learning_rate": 1.2353236248491214e-05, + "loss": 0.1766, + "step": 49805 + }, + { + "epoch": 2.32, + "learning_rate": 1.2352452463436428e-05, + "loss": 0.2554, + "step": 49810 + }, + { + "epoch": 2.32, + "learning_rate": 1.2351668678381642e-05, + "loss": 0.3166, + "step": 49815 + }, + { + "epoch": 2.32, + "learning_rate": 1.2350884893326856e-05, + "loss": 0.0502, + "step": 49820 + }, + { + "epoch": 2.32, + "learning_rate": 1.2350101108272068e-05, + "loss": 0.0437, + "step": 49825 + }, + { + "epoch": 2.33, + "learning_rate": 1.2349317323217282e-05, + "loss": 0.0936, + "step": 49830 + }, + { + "epoch": 2.33, + "learning_rate": 1.2348533538162496e-05, + "loss": 0.0694, + "step": 49835 + }, + { + "epoch": 2.33, + "learning_rate": 1.234774975310771e-05, + "loss": 0.0366, + "step": 49840 + }, + { + "epoch": 2.33, + "learning_rate": 1.2346965968052922e-05, + "loss": 0.0825, + "step": 49845 + }, + { + "epoch": 2.33, + "learning_rate": 1.2346182182998137e-05, + "loss": 0.1329, + "step": 49850 + }, + { + "epoch": 2.33, + "learning_rate": 1.234539839794335e-05, + "loss": 0.0959, + "step": 49855 + }, + { + "epoch": 2.33, + "learning_rate": 1.2344614612888563e-05, + "loss": 0.1717, + "step": 49860 + }, + { + "epoch": 2.33, + "learning_rate": 1.2343830827833776e-05, + "loss": 0.2998, + "step": 49865 + }, + { + "epoch": 2.33, + "learning_rate": 1.2343047042778988e-05, + "loss": 0.123, + "step": 49870 + }, + { + "epoch": 2.33, + "learning_rate": 1.2342263257724203e-05, + "loss": 0.0147, + "step": 49875 + }, + { + "epoch": 2.33, + "learning_rate": 1.2341479472669416e-05, + "loss": 0.0361, + "step": 49880 + }, + { + "epoch": 2.33, + "learning_rate": 1.234069568761463e-05, + "loss": 0.089, + "step": 49885 + }, + { + "epoch": 2.33, + "learning_rate": 1.2339911902559842e-05, + "loss": 0.057, + "step": 49890 + }, + { + "epoch": 2.33, + "learning_rate": 1.2339128117505057e-05, + "loss": 0.066, + "step": 49895 + }, + { + "epoch": 2.33, + "learning_rate": 1.233834433245027e-05, + "loss": 0.2305, + "step": 49900 + }, + { + "epoch": 2.33, + "learning_rate": 1.2337560547395483e-05, + "loss": 0.2467, + "step": 49905 + }, + { + "epoch": 2.33, + "learning_rate": 1.2336776762340696e-05, + "loss": 0.226, + "step": 49910 + }, + { + "epoch": 2.33, + "learning_rate": 1.2335992977285911e-05, + "loss": 0.2258, + "step": 49915 + }, + { + "epoch": 2.33, + "learning_rate": 1.2335209192231123e-05, + "loss": 0.0611, + "step": 49920 + }, + { + "epoch": 2.33, + "learning_rate": 1.2334425407176337e-05, + "loss": 0.0755, + "step": 49925 + }, + { + "epoch": 2.33, + "learning_rate": 1.2333641622121551e-05, + "loss": 0.0437, + "step": 49930 + }, + { + "epoch": 2.33, + "learning_rate": 1.2332857837066764e-05, + "loss": 0.1033, + "step": 49935 + }, + { + "epoch": 2.33, + "learning_rate": 1.2332074052011977e-05, + "loss": 0.0961, + "step": 49940 + }, + { + "epoch": 2.33, + "learning_rate": 1.233129026695719e-05, + "loss": 0.0845, + "step": 49945 + }, + { + "epoch": 2.33, + "learning_rate": 1.2330506481902405e-05, + "loss": 0.1404, + "step": 49950 + }, + { + "epoch": 2.33, + "learning_rate": 1.2329722696847617e-05, + "loss": 0.2093, + "step": 49955 + }, + { + "epoch": 2.33, + "learning_rate": 1.2328938911792831e-05, + "loss": 0.1984, + "step": 49960 + }, + { + "epoch": 2.33, + "learning_rate": 1.2328155126738044e-05, + "loss": 0.2746, + "step": 49965 + }, + { + "epoch": 2.33, + "learning_rate": 1.232737134168326e-05, + "loss": 0.0631, + "step": 49970 + }, + { + "epoch": 2.33, + "learning_rate": 1.2326587556628471e-05, + "loss": 0.0268, + "step": 49975 + }, + { + "epoch": 2.33, + "learning_rate": 1.2325803771573685e-05, + "loss": 0.0344, + "step": 49980 + }, + { + "epoch": 2.33, + "learning_rate": 1.2325019986518897e-05, + "loss": 0.0861, + "step": 49985 + }, + { + "epoch": 2.33, + "learning_rate": 1.2324236201464113e-05, + "loss": 0.1821, + "step": 49990 + }, + { + "epoch": 2.33, + "learning_rate": 1.2323452416409325e-05, + "loss": 0.1076, + "step": 49995 + }, + { + "epoch": 2.33, + "learning_rate": 1.2322668631354538e-05, + "loss": 0.1184, + "step": 50000 + }, + { + "epoch": 2.33, + "learning_rate": 1.2321884846299751e-05, + "loss": 0.0798, + "step": 50005 + }, + { + "epoch": 2.33, + "learning_rate": 1.2321101061244964e-05, + "loss": 0.2843, + "step": 50010 + }, + { + "epoch": 2.33, + "learning_rate": 1.232031727619018e-05, + "loss": 0.1617, + "step": 50015 + }, + { + "epoch": 2.33, + "learning_rate": 1.2319533491135391e-05, + "loss": 0.0382, + "step": 50020 + }, + { + "epoch": 2.33, + "learning_rate": 1.2318749706080605e-05, + "loss": 0.0547, + "step": 50025 + }, + { + "epoch": 2.33, + "learning_rate": 1.231796592102582e-05, + "loss": 0.0349, + "step": 50030 + }, + { + "epoch": 2.33, + "learning_rate": 1.2317182135971033e-05, + "loss": 0.0498, + "step": 50035 + }, + { + "epoch": 2.33, + "learning_rate": 1.2316398350916245e-05, + "loss": 0.0729, + "step": 50040 + }, + { + "epoch": 2.34, + "learning_rate": 1.231561456586146e-05, + "loss": 0.1601, + "step": 50045 + }, + { + "epoch": 2.34, + "learning_rate": 1.2314830780806673e-05, + "loss": 0.1798, + "step": 50050 + }, + { + "epoch": 2.34, + "learning_rate": 1.2314046995751887e-05, + "loss": 0.1981, + "step": 50055 + }, + { + "epoch": 2.34, + "learning_rate": 1.23132632106971e-05, + "loss": 0.3657, + "step": 50060 + }, + { + "epoch": 2.34, + "learning_rate": 1.2312479425642312e-05, + "loss": 0.3816, + "step": 50065 + }, + { + "epoch": 2.34, + "learning_rate": 1.2311695640587527e-05, + "loss": 0.0421, + "step": 50070 + }, + { + "epoch": 2.34, + "learning_rate": 1.231091185553274e-05, + "loss": 0.0836, + "step": 50075 + }, + { + "epoch": 2.34, + "learning_rate": 1.2310128070477953e-05, + "loss": 0.0849, + "step": 50080 + }, + { + "epoch": 2.34, + "learning_rate": 1.2309344285423165e-05, + "loss": 0.1037, + "step": 50085 + }, + { + "epoch": 2.34, + "learning_rate": 1.2308560500368381e-05, + "loss": 0.1266, + "step": 50090 + }, + { + "epoch": 2.34, + "learning_rate": 1.2307776715313593e-05, + "loss": 0.1167, + "step": 50095 + }, + { + "epoch": 2.34, + "learning_rate": 1.2306992930258807e-05, + "loss": 0.1929, + "step": 50100 + }, + { + "epoch": 2.34, + "learning_rate": 1.230620914520402e-05, + "loss": 0.2002, + "step": 50105 + }, + { + "epoch": 2.34, + "learning_rate": 1.2305425360149235e-05, + "loss": 0.3381, + "step": 50110 + }, + { + "epoch": 2.34, + "learning_rate": 1.2304641575094447e-05, + "loss": 0.3687, + "step": 50115 + }, + { + "epoch": 2.34, + "learning_rate": 1.2303857790039661e-05, + "loss": 0.0699, + "step": 50120 + }, + { + "epoch": 2.34, + "learning_rate": 1.2303074004984873e-05, + "loss": 0.0607, + "step": 50125 + }, + { + "epoch": 2.34, + "learning_rate": 1.2302290219930087e-05, + "loss": 0.0523, + "step": 50130 + }, + { + "epoch": 2.34, + "learning_rate": 1.2301506434875301e-05, + "loss": 0.0631, + "step": 50135 + }, + { + "epoch": 2.34, + "learning_rate": 1.2300722649820513e-05, + "loss": 0.1482, + "step": 50140 + }, + { + "epoch": 2.34, + "learning_rate": 1.2299938864765727e-05, + "loss": 0.1021, + "step": 50145 + }, + { + "epoch": 2.34, + "learning_rate": 1.2299155079710941e-05, + "loss": 0.1413, + "step": 50150 + }, + { + "epoch": 2.34, + "learning_rate": 1.2298371294656155e-05, + "loss": 0.1853, + "step": 50155 + }, + { + "epoch": 2.34, + "learning_rate": 1.2297587509601367e-05, + "loss": 0.2019, + "step": 50160 + }, + { + "epoch": 2.34, + "learning_rate": 1.2296803724546583e-05, + "loss": 0.3643, + "step": 50165 + }, + { + "epoch": 2.34, + "learning_rate": 1.2296019939491795e-05, + "loss": 0.0704, + "step": 50170 + }, + { + "epoch": 2.34, + "learning_rate": 1.2295236154437009e-05, + "loss": 0.0761, + "step": 50175 + }, + { + "epoch": 2.34, + "learning_rate": 1.2294452369382221e-05, + "loss": 0.0657, + "step": 50180 + }, + { + "epoch": 2.34, + "learning_rate": 1.2293668584327437e-05, + "loss": 0.0956, + "step": 50185 + }, + { + "epoch": 2.34, + "learning_rate": 1.2292884799272649e-05, + "loss": 0.057, + "step": 50190 + }, + { + "epoch": 2.34, + "learning_rate": 1.2292101014217861e-05, + "loss": 0.0977, + "step": 50195 + }, + { + "epoch": 2.34, + "learning_rate": 1.2291317229163075e-05, + "loss": 0.1418, + "step": 50200 + }, + { + "epoch": 2.34, + "learning_rate": 1.2290533444108287e-05, + "loss": 0.2145, + "step": 50205 + }, + { + "epoch": 2.34, + "learning_rate": 1.2289749659053503e-05, + "loss": 0.1492, + "step": 50210 + }, + { + "epoch": 2.34, + "learning_rate": 1.2288965873998715e-05, + "loss": 0.3733, + "step": 50215 + }, + { + "epoch": 2.34, + "learning_rate": 1.2288182088943929e-05, + "loss": 0.0474, + "step": 50220 + }, + { + "epoch": 2.34, + "learning_rate": 1.2287398303889141e-05, + "loss": 0.0178, + "step": 50225 + }, + { + "epoch": 2.34, + "learning_rate": 1.2286614518834357e-05, + "loss": 0.1194, + "step": 50230 + }, + { + "epoch": 2.34, + "learning_rate": 1.2285830733779569e-05, + "loss": 0.0799, + "step": 50235 + }, + { + "epoch": 2.34, + "learning_rate": 1.2285046948724783e-05, + "loss": 0.1206, + "step": 50240 + }, + { + "epoch": 2.34, + "learning_rate": 1.2284263163669997e-05, + "loss": 0.1841, + "step": 50245 + }, + { + "epoch": 2.34, + "learning_rate": 1.228347937861521e-05, + "loss": 0.1276, + "step": 50250 + }, + { + "epoch": 2.34, + "learning_rate": 1.2282695593560423e-05, + "loss": 0.2027, + "step": 50255 + }, + { + "epoch": 2.35, + "learning_rate": 1.2281911808505635e-05, + "loss": 0.1799, + "step": 50260 + }, + { + "epoch": 2.35, + "learning_rate": 1.228112802345085e-05, + "loss": 0.375, + "step": 50265 + }, + { + "epoch": 2.35, + "learning_rate": 1.2280344238396063e-05, + "loss": 0.0396, + "step": 50270 + }, + { + "epoch": 2.35, + "learning_rate": 1.2279560453341277e-05, + "loss": 0.0545, + "step": 50275 + }, + { + "epoch": 2.35, + "learning_rate": 1.2278776668286489e-05, + "loss": 0.0606, + "step": 50280 + }, + { + "epoch": 2.35, + "learning_rate": 1.2277992883231705e-05, + "loss": 0.0914, + "step": 50285 + }, + { + "epoch": 2.35, + "learning_rate": 1.2277209098176917e-05, + "loss": 0.0412, + "step": 50290 + }, + { + "epoch": 2.35, + "learning_rate": 1.2276425313122131e-05, + "loss": 0.1434, + "step": 50295 + }, + { + "epoch": 2.35, + "learning_rate": 1.2275641528067343e-05, + "loss": 0.2036, + "step": 50300 + }, + { + "epoch": 2.35, + "learning_rate": 1.2274857743012559e-05, + "loss": 0.219, + "step": 50305 + }, + { + "epoch": 2.35, + "learning_rate": 1.2274073957957771e-05, + "loss": 0.263, + "step": 50310 + }, + { + "epoch": 2.35, + "learning_rate": 1.2273290172902985e-05, + "loss": 0.2972, + "step": 50315 + }, + { + "epoch": 2.35, + "learning_rate": 1.2272506387848197e-05, + "loss": 0.0396, + "step": 50320 + }, + { + "epoch": 2.35, + "learning_rate": 1.227172260279341e-05, + "loss": 0.0763, + "step": 50325 + }, + { + "epoch": 2.35, + "learning_rate": 1.2270938817738625e-05, + "loss": 0.0389, + "step": 50330 + }, + { + "epoch": 2.35, + "learning_rate": 1.2270155032683837e-05, + "loss": 0.0813, + "step": 50335 + }, + { + "epoch": 2.35, + "learning_rate": 1.2269371247629051e-05, + "loss": 0.1306, + "step": 50340 + }, + { + "epoch": 2.35, + "learning_rate": 1.2268587462574265e-05, + "loss": 0.0779, + "step": 50345 + }, + { + "epoch": 2.35, + "learning_rate": 1.2267803677519479e-05, + "loss": 0.187, + "step": 50350 + }, + { + "epoch": 2.35, + "learning_rate": 1.2267019892464691e-05, + "loss": 0.1718, + "step": 50355 + }, + { + "epoch": 2.35, + "learning_rate": 1.2266236107409905e-05, + "loss": 0.3207, + "step": 50360 + }, + { + "epoch": 2.35, + "learning_rate": 1.2265452322355119e-05, + "loss": 0.2648, + "step": 50365 + }, + { + "epoch": 2.35, + "learning_rate": 1.2264668537300333e-05, + "loss": 0.0264, + "step": 50370 + }, + { + "epoch": 2.35, + "learning_rate": 1.2263884752245545e-05, + "loss": 0.0418, + "step": 50375 + }, + { + "epoch": 2.35, + "learning_rate": 1.226310096719076e-05, + "loss": 0.0412, + "step": 50380 + }, + { + "epoch": 2.35, + "learning_rate": 1.2262317182135973e-05, + "loss": 0.0641, + "step": 50385 + }, + { + "epoch": 2.35, + "learning_rate": 1.2261533397081185e-05, + "loss": 0.1354, + "step": 50390 + }, + { + "epoch": 2.35, + "learning_rate": 1.2260749612026399e-05, + "loss": 0.0955, + "step": 50395 + }, + { + "epoch": 2.35, + "learning_rate": 1.2259965826971611e-05, + "loss": 0.1032, + "step": 50400 + }, + { + "epoch": 2.35, + "learning_rate": 1.2259182041916827e-05, + "loss": 0.2132, + "step": 50405 + }, + { + "epoch": 2.35, + "learning_rate": 1.2258398256862039e-05, + "loss": 0.2025, + "step": 50410 + }, + { + "epoch": 2.35, + "learning_rate": 1.2257614471807253e-05, + "loss": 0.2278, + "step": 50415 + }, + { + "epoch": 2.35, + "learning_rate": 1.2256830686752465e-05, + "loss": 0.0977, + "step": 50420 + }, + { + "epoch": 2.35, + "learning_rate": 1.225604690169768e-05, + "loss": 0.0473, + "step": 50425 + }, + { + "epoch": 2.35, + "learning_rate": 1.2255263116642893e-05, + "loss": 0.0606, + "step": 50430 + }, + { + "epoch": 2.35, + "learning_rate": 1.2254479331588107e-05, + "loss": 0.0981, + "step": 50435 + }, + { + "epoch": 2.35, + "learning_rate": 1.2253695546533319e-05, + "loss": 0.0818, + "step": 50440 + }, + { + "epoch": 2.35, + "learning_rate": 1.2252911761478534e-05, + "loss": 0.1135, + "step": 50445 + }, + { + "epoch": 2.35, + "learning_rate": 1.2252127976423747e-05, + "loss": 0.1628, + "step": 50450 + }, + { + "epoch": 2.35, + "learning_rate": 1.2251344191368959e-05, + "loss": 0.1521, + "step": 50455 + }, + { + "epoch": 2.35, + "learning_rate": 1.2250560406314174e-05, + "loss": 0.3401, + "step": 50460 + }, + { + "epoch": 2.35, + "learning_rate": 1.2249776621259387e-05, + "loss": 0.2756, + "step": 50465 + }, + { + "epoch": 2.35, + "learning_rate": 1.22489928362046e-05, + "loss": 0.0554, + "step": 50470 + }, + { + "epoch": 2.36, + "learning_rate": 1.2248209051149813e-05, + "loss": 0.0253, + "step": 50475 + }, + { + "epoch": 2.36, + "learning_rate": 1.2247425266095028e-05, + "loss": 0.1059, + "step": 50480 + }, + { + "epoch": 2.36, + "learning_rate": 1.224664148104024e-05, + "loss": 0.1775, + "step": 50485 + }, + { + "epoch": 2.36, + "learning_rate": 1.2245857695985455e-05, + "loss": 0.0703, + "step": 50490 + }, + { + "epoch": 2.36, + "learning_rate": 1.2245073910930667e-05, + "loss": 0.0463, + "step": 50495 + }, + { + "epoch": 2.36, + "learning_rate": 1.2244290125875882e-05, + "loss": 0.1483, + "step": 50500 + }, + { + "epoch": 2.36, + "learning_rate": 1.2243506340821095e-05, + "loss": 0.2113, + "step": 50505 + }, + { + "epoch": 2.36, + "learning_rate": 1.2242722555766308e-05, + "loss": 0.2143, + "step": 50510 + }, + { + "epoch": 2.36, + "learning_rate": 1.224193877071152e-05, + "loss": 0.3792, + "step": 50515 + }, + { + "epoch": 2.36, + "learning_rate": 1.2241154985656733e-05, + "loss": 0.0368, + "step": 50520 + }, + { + "epoch": 2.36, + "learning_rate": 1.2240371200601948e-05, + "loss": 0.0232, + "step": 50525 + }, + { + "epoch": 2.36, + "learning_rate": 1.223958741554716e-05, + "loss": 0.0197, + "step": 50530 + }, + { + "epoch": 2.36, + "learning_rate": 1.2238803630492375e-05, + "loss": 0.1343, + "step": 50535 + }, + { + "epoch": 2.36, + "learning_rate": 1.2238019845437587e-05, + "loss": 0.1147, + "step": 50540 + }, + { + "epoch": 2.36, + "learning_rate": 1.2237236060382802e-05, + "loss": 0.135, + "step": 50545 + }, + { + "epoch": 2.36, + "learning_rate": 1.2236452275328015e-05, + "loss": 0.1731, + "step": 50550 + }, + { + "epoch": 2.36, + "learning_rate": 1.2235668490273229e-05, + "loss": 0.1098, + "step": 50555 + }, + { + "epoch": 2.36, + "learning_rate": 1.2234884705218442e-05, + "loss": 0.2273, + "step": 50560 + }, + { + "epoch": 2.36, + "learning_rate": 1.2234100920163656e-05, + "loss": 0.2623, + "step": 50565 + }, + { + "epoch": 2.36, + "learning_rate": 1.2233317135108869e-05, + "loss": 0.0419, + "step": 50570 + }, + { + "epoch": 2.36, + "learning_rate": 1.2232533350054082e-05, + "loss": 0.0451, + "step": 50575 + }, + { + "epoch": 2.36, + "learning_rate": 1.2231749564999296e-05, + "loss": 0.0982, + "step": 50580 + }, + { + "epoch": 2.36, + "learning_rate": 1.2230965779944509e-05, + "loss": 0.0518, + "step": 50585 + }, + { + "epoch": 2.36, + "learning_rate": 1.2230181994889722e-05, + "loss": 0.0991, + "step": 50590 + }, + { + "epoch": 2.36, + "learning_rate": 1.2229398209834935e-05, + "loss": 0.0705, + "step": 50595 + }, + { + "epoch": 2.36, + "learning_rate": 1.222861442478015e-05, + "loss": 0.1053, + "step": 50600 + }, + { + "epoch": 2.36, + "learning_rate": 1.2227830639725363e-05, + "loss": 0.1624, + "step": 50605 + }, + { + "epoch": 2.36, + "learning_rate": 1.2227046854670576e-05, + "loss": 0.2793, + "step": 50610 + }, + { + "epoch": 2.36, + "learning_rate": 1.2226263069615789e-05, + "loss": 0.2738, + "step": 50615 + }, + { + "epoch": 2.36, + "learning_rate": 1.2225479284561004e-05, + "loss": 0.0397, + "step": 50620 + }, + { + "epoch": 2.36, + "learning_rate": 1.2224695499506216e-05, + "loss": 0.0552, + "step": 50625 + }, + { + "epoch": 2.36, + "learning_rate": 1.222391171445143e-05, + "loss": 0.1016, + "step": 50630 + }, + { + "epoch": 2.36, + "learning_rate": 1.2223127929396643e-05, + "loss": 0.1722, + "step": 50635 + }, + { + "epoch": 2.36, + "learning_rate": 1.2222344144341858e-05, + "loss": 0.1134, + "step": 50640 + }, + { + "epoch": 2.36, + "learning_rate": 1.222156035928707e-05, + "loss": 0.1319, + "step": 50645 + }, + { + "epoch": 2.36, + "learning_rate": 1.2220776574232283e-05, + "loss": 0.1036, + "step": 50650 + }, + { + "epoch": 2.36, + "learning_rate": 1.2219992789177496e-05, + "loss": 0.1984, + "step": 50655 + }, + { + "epoch": 2.36, + "learning_rate": 1.221920900412271e-05, + "loss": 0.2497, + "step": 50660 + }, + { + "epoch": 2.36, + "learning_rate": 1.2218425219067924e-05, + "loss": 0.3259, + "step": 50665 + }, + { + "epoch": 2.36, + "learning_rate": 1.2217641434013137e-05, + "loss": 0.0318, + "step": 50670 + }, + { + "epoch": 2.36, + "learning_rate": 1.221685764895835e-05, + "loss": 0.0643, + "step": 50675 + }, + { + "epoch": 2.36, + "learning_rate": 1.2216073863903564e-05, + "loss": 0.1131, + "step": 50680 + }, + { + "epoch": 2.37, + "learning_rate": 1.2215290078848778e-05, + "loss": 0.073, + "step": 50685 + }, + { + "epoch": 2.37, + "learning_rate": 1.221450629379399e-05, + "loss": 0.1513, + "step": 50690 + }, + { + "epoch": 2.37, + "learning_rate": 1.2213722508739206e-05, + "loss": 0.1274, + "step": 50695 + }, + { + "epoch": 2.37, + "learning_rate": 1.2212938723684418e-05, + "loss": 0.1174, + "step": 50700 + }, + { + "epoch": 2.37, + "learning_rate": 1.2212154938629632e-05, + "loss": 0.196, + "step": 50705 + }, + { + "epoch": 2.37, + "learning_rate": 1.2211371153574844e-05, + "loss": 0.2789, + "step": 50710 + }, + { + "epoch": 2.37, + "learning_rate": 1.2210587368520057e-05, + "loss": 0.2517, + "step": 50715 + }, + { + "epoch": 2.37, + "learning_rate": 1.2209803583465272e-05, + "loss": 0.0358, + "step": 50720 + }, + { + "epoch": 2.37, + "learning_rate": 1.2209019798410484e-05, + "loss": 0.0353, + "step": 50725 + }, + { + "epoch": 2.37, + "learning_rate": 1.2208236013355698e-05, + "loss": 0.116, + "step": 50730 + }, + { + "epoch": 2.37, + "learning_rate": 1.220745222830091e-05, + "loss": 0.0738, + "step": 50735 + }, + { + "epoch": 2.37, + "learning_rate": 1.2206668443246126e-05, + "loss": 0.0956, + "step": 50740 + }, + { + "epoch": 2.37, + "learning_rate": 1.2205884658191338e-05, + "loss": 0.0734, + "step": 50745 + }, + { + "epoch": 2.37, + "learning_rate": 1.2205100873136552e-05, + "loss": 0.1592, + "step": 50750 + }, + { + "epoch": 2.37, + "learning_rate": 1.2204317088081764e-05, + "loss": 0.2333, + "step": 50755 + }, + { + "epoch": 2.37, + "learning_rate": 1.220353330302698e-05, + "loss": 0.1929, + "step": 50760 + }, + { + "epoch": 2.37, + "learning_rate": 1.2202749517972192e-05, + "loss": 0.5826, + "step": 50765 + }, + { + "epoch": 2.37, + "learning_rate": 1.2201965732917406e-05, + "loss": 0.0678, + "step": 50770 + }, + { + "epoch": 2.37, + "learning_rate": 1.220118194786262e-05, + "loss": 0.0588, + "step": 50775 + }, + { + "epoch": 2.37, + "learning_rate": 1.2200398162807832e-05, + "loss": 0.0301, + "step": 50780 + }, + { + "epoch": 2.37, + "learning_rate": 1.2199614377753046e-05, + "loss": 0.0743, + "step": 50785 + }, + { + "epoch": 2.37, + "learning_rate": 1.2198830592698258e-05, + "loss": 0.0594, + "step": 50790 + }, + { + "epoch": 2.37, + "learning_rate": 1.2198046807643474e-05, + "loss": 0.0979, + "step": 50795 + }, + { + "epoch": 2.37, + "learning_rate": 1.2197263022588686e-05, + "loss": 0.0678, + "step": 50800 + }, + { + "epoch": 2.37, + "learning_rate": 1.21964792375339e-05, + "loss": 0.1468, + "step": 50805 + }, + { + "epoch": 2.37, + "learning_rate": 1.2195695452479112e-05, + "loss": 0.238, + "step": 50810 + }, + { + "epoch": 2.37, + "learning_rate": 1.2194911667424328e-05, + "loss": 0.185, + "step": 50815 + }, + { + "epoch": 2.37, + "learning_rate": 1.219412788236954e-05, + "loss": 0.0528, + "step": 50820 + }, + { + "epoch": 2.37, + "learning_rate": 1.2193344097314754e-05, + "loss": 0.0391, + "step": 50825 + }, + { + "epoch": 2.37, + "learning_rate": 1.2192560312259966e-05, + "loss": 0.0602, + "step": 50830 + }, + { + "epoch": 2.37, + "learning_rate": 1.2191776527205182e-05, + "loss": 0.0745, + "step": 50835 + }, + { + "epoch": 2.37, + "learning_rate": 1.2190992742150394e-05, + "loss": 0.0926, + "step": 50840 + }, + { + "epoch": 2.37, + "learning_rate": 1.2190208957095606e-05, + "loss": 0.1335, + "step": 50845 + }, + { + "epoch": 2.37, + "learning_rate": 1.218942517204082e-05, + "loss": 0.1373, + "step": 50850 + }, + { + "epoch": 2.37, + "learning_rate": 1.2188641386986032e-05, + "loss": 0.204, + "step": 50855 + }, + { + "epoch": 2.37, + "learning_rate": 1.2187857601931248e-05, + "loss": 0.2939, + "step": 50860 + }, + { + "epoch": 2.37, + "learning_rate": 1.218707381687646e-05, + "loss": 0.2599, + "step": 50865 + }, + { + "epoch": 2.37, + "learning_rate": 1.2186290031821674e-05, + "loss": 0.1391, + "step": 50870 + }, + { + "epoch": 2.37, + "learning_rate": 1.2185506246766888e-05, + "loss": 0.0675, + "step": 50875 + }, + { + "epoch": 2.37, + "learning_rate": 1.2184722461712102e-05, + "loss": 0.0415, + "step": 50880 + }, + { + "epoch": 2.37, + "learning_rate": 1.2183938676657314e-05, + "loss": 0.0662, + "step": 50885 + }, + { + "epoch": 2.37, + "learning_rate": 1.2183154891602528e-05, + "loss": 0.0667, + "step": 50890 + }, + { + "epoch": 2.37, + "learning_rate": 1.2182371106547742e-05, + "loss": 0.0808, + "step": 50895 + }, + { + "epoch": 2.38, + "learning_rate": 1.2181587321492956e-05, + "loss": 0.1179, + "step": 50900 + }, + { + "epoch": 2.38, + "learning_rate": 1.2180803536438168e-05, + "loss": 0.1444, + "step": 50905 + }, + { + "epoch": 2.38, + "learning_rate": 1.218001975138338e-05, + "loss": 0.3121, + "step": 50910 + }, + { + "epoch": 2.38, + "learning_rate": 1.2179235966328596e-05, + "loss": 0.1573, + "step": 50915 + }, + { + "epoch": 2.38, + "learning_rate": 1.2178452181273808e-05, + "loss": 0.1081, + "step": 50920 + }, + { + "epoch": 2.38, + "learning_rate": 1.2177668396219022e-05, + "loss": 0.0486, + "step": 50925 + }, + { + "epoch": 2.38, + "learning_rate": 1.2176884611164234e-05, + "loss": 0.0354, + "step": 50930 + }, + { + "epoch": 2.38, + "learning_rate": 1.217610082610945e-05, + "loss": 0.1187, + "step": 50935 + }, + { + "epoch": 2.38, + "learning_rate": 1.2175317041054662e-05, + "loss": 0.0868, + "step": 50940 + }, + { + "epoch": 2.38, + "learning_rate": 1.2174533255999876e-05, + "loss": 0.1052, + "step": 50945 + }, + { + "epoch": 2.38, + "learning_rate": 1.2173749470945088e-05, + "loss": 0.1715, + "step": 50950 + }, + { + "epoch": 2.38, + "learning_rate": 1.2172965685890304e-05, + "loss": 0.2318, + "step": 50955 + }, + { + "epoch": 2.38, + "learning_rate": 1.2172181900835516e-05, + "loss": 0.2057, + "step": 50960 + }, + { + "epoch": 2.38, + "learning_rate": 1.217139811578073e-05, + "loss": 0.2589, + "step": 50965 + }, + { + "epoch": 2.38, + "learning_rate": 1.2170614330725942e-05, + "loss": 0.0572, + "step": 50970 + }, + { + "epoch": 2.38, + "learning_rate": 1.2169830545671156e-05, + "loss": 0.0639, + "step": 50975 + }, + { + "epoch": 2.38, + "learning_rate": 1.216904676061637e-05, + "loss": 0.0619, + "step": 50980 + }, + { + "epoch": 2.38, + "learning_rate": 1.2168262975561582e-05, + "loss": 0.0854, + "step": 50985 + }, + { + "epoch": 2.38, + "learning_rate": 1.2167479190506796e-05, + "loss": 0.0625, + "step": 50990 + }, + { + "epoch": 2.38, + "learning_rate": 1.216669540545201e-05, + "loss": 0.0908, + "step": 50995 + }, + { + "epoch": 2.38, + "learning_rate": 1.2165911620397224e-05, + "loss": 0.1432, + "step": 51000 + }, + { + "epoch": 2.38, + "learning_rate": 1.2165127835342436e-05, + "loss": 0.1444, + "step": 51005 + }, + { + "epoch": 2.38, + "learning_rate": 1.2164344050287652e-05, + "loss": 0.1782, + "step": 51010 + }, + { + "epoch": 2.38, + "learning_rate": 1.2163560265232864e-05, + "loss": 0.2457, + "step": 51015 + }, + { + "epoch": 2.38, + "learning_rate": 1.2162776480178078e-05, + "loss": 0.0601, + "step": 51020 + }, + { + "epoch": 2.38, + "learning_rate": 1.216199269512329e-05, + "loss": 0.1218, + "step": 51025 + }, + { + "epoch": 2.38, + "learning_rate": 1.2161208910068506e-05, + "loss": 0.059, + "step": 51030 + }, + { + "epoch": 2.38, + "learning_rate": 1.2160425125013718e-05, + "loss": 0.0545, + "step": 51035 + }, + { + "epoch": 2.38, + "learning_rate": 1.215964133995893e-05, + "loss": 0.1509, + "step": 51040 + }, + { + "epoch": 2.38, + "learning_rate": 1.2158857554904144e-05, + "loss": 0.0776, + "step": 51045 + }, + { + "epoch": 2.38, + "learning_rate": 1.2158073769849356e-05, + "loss": 0.2111, + "step": 51050 + }, + { + "epoch": 2.38, + "learning_rate": 1.2157289984794572e-05, + "loss": 0.1845, + "step": 51055 + }, + { + "epoch": 2.38, + "learning_rate": 1.2156506199739784e-05, + "loss": 0.295, + "step": 51060 + }, + { + "epoch": 2.38, + "learning_rate": 1.2155722414684998e-05, + "loss": 0.3386, + "step": 51065 + }, + { + "epoch": 2.38, + "learning_rate": 1.215493862963021e-05, + "loss": 0.0308, + "step": 51070 + }, + { + "epoch": 2.38, + "learning_rate": 1.2154154844575426e-05, + "loss": 0.0535, + "step": 51075 + }, + { + "epoch": 2.38, + "learning_rate": 1.2153371059520638e-05, + "loss": 0.0747, + "step": 51080 + }, + { + "epoch": 2.38, + "learning_rate": 1.2152587274465852e-05, + "loss": 0.0781, + "step": 51085 + }, + { + "epoch": 2.38, + "learning_rate": 1.2151803489411066e-05, + "loss": 0.074, + "step": 51090 + }, + { + "epoch": 2.38, + "learning_rate": 1.215101970435628e-05, + "loss": 0.0889, + "step": 51095 + }, + { + "epoch": 2.38, + "learning_rate": 1.2150235919301492e-05, + "loss": 0.0867, + "step": 51100 + }, + { + "epoch": 2.38, + "learning_rate": 1.2149452134246704e-05, + "loss": 0.1321, + "step": 51105 + }, + { + "epoch": 2.38, + "learning_rate": 1.214866834919192e-05, + "loss": 0.1562, + "step": 51110 + }, + { + "epoch": 2.39, + "learning_rate": 1.2147884564137132e-05, + "loss": 0.632, + "step": 51115 + }, + { + "epoch": 2.39, + "learning_rate": 1.2147100779082346e-05, + "loss": 0.0545, + "step": 51120 + }, + { + "epoch": 2.39, + "learning_rate": 1.2146316994027558e-05, + "loss": 0.0554, + "step": 51125 + }, + { + "epoch": 2.39, + "learning_rate": 1.2145533208972773e-05, + "loss": 0.0916, + "step": 51130 + }, + { + "epoch": 2.39, + "learning_rate": 1.2144749423917986e-05, + "loss": 0.0778, + "step": 51135 + }, + { + "epoch": 2.39, + "learning_rate": 1.21439656388632e-05, + "loss": 0.1447, + "step": 51140 + }, + { + "epoch": 2.39, + "learning_rate": 1.2143181853808412e-05, + "loss": 0.062, + "step": 51145 + }, + { + "epoch": 2.39, + "learning_rate": 1.2142398068753627e-05, + "loss": 0.1337, + "step": 51150 + }, + { + "epoch": 2.39, + "learning_rate": 1.214161428369884e-05, + "loss": 0.1622, + "step": 51155 + }, + { + "epoch": 2.39, + "learning_rate": 1.2140830498644054e-05, + "loss": 0.2186, + "step": 51160 + }, + { + "epoch": 2.39, + "learning_rate": 1.2140046713589266e-05, + "loss": 0.2537, + "step": 51165 + }, + { + "epoch": 2.39, + "learning_rate": 1.2139262928534478e-05, + "loss": 0.0788, + "step": 51170 + }, + { + "epoch": 2.39, + "learning_rate": 1.2138479143479694e-05, + "loss": 0.0364, + "step": 51175 + }, + { + "epoch": 2.39, + "learning_rate": 1.2137695358424906e-05, + "loss": 0.1155, + "step": 51180 + }, + { + "epoch": 2.39, + "learning_rate": 1.213691157337012e-05, + "loss": 0.0683, + "step": 51185 + }, + { + "epoch": 2.39, + "learning_rate": 1.2136127788315334e-05, + "loss": 0.1416, + "step": 51190 + }, + { + "epoch": 2.39, + "learning_rate": 1.2135344003260547e-05, + "loss": 0.0975, + "step": 51195 + }, + { + "epoch": 2.39, + "learning_rate": 1.213456021820576e-05, + "loss": 0.0998, + "step": 51200 + }, + { + "epoch": 2.39, + "learning_rate": 1.2133776433150974e-05, + "loss": 0.2315, + "step": 51205 + }, + { + "epoch": 2.39, + "learning_rate": 1.2132992648096188e-05, + "loss": 0.2483, + "step": 51210 + }, + { + "epoch": 2.39, + "learning_rate": 1.2132208863041401e-05, + "loss": 0.2255, + "step": 51215 + }, + { + "epoch": 2.39, + "learning_rate": 1.2131425077986614e-05, + "loss": 0.1239, + "step": 51220 + }, + { + "epoch": 2.39, + "learning_rate": 1.213064129293183e-05, + "loss": 0.0421, + "step": 51225 + }, + { + "epoch": 2.39, + "learning_rate": 1.2129857507877041e-05, + "loss": 0.0431, + "step": 51230 + }, + { + "epoch": 2.39, + "learning_rate": 1.2129073722822254e-05, + "loss": 0.0604, + "step": 51235 + }, + { + "epoch": 2.39, + "learning_rate": 1.2128289937767468e-05, + "loss": 0.0692, + "step": 51240 + }, + { + "epoch": 2.39, + "learning_rate": 1.212750615271268e-05, + "loss": 0.0819, + "step": 51245 + }, + { + "epoch": 2.39, + "learning_rate": 1.2126722367657895e-05, + "loss": 0.0985, + "step": 51250 + }, + { + "epoch": 2.39, + "learning_rate": 1.2125938582603108e-05, + "loss": 0.1348, + "step": 51255 + }, + { + "epoch": 2.39, + "learning_rate": 1.2125154797548321e-05, + "loss": 0.184, + "step": 51260 + }, + { + "epoch": 2.39, + "learning_rate": 1.2124371012493534e-05, + "loss": 0.3057, + "step": 51265 + }, + { + "epoch": 2.39, + "learning_rate": 1.212358722743875e-05, + "loss": 0.088, + "step": 51270 + }, + { + "epoch": 2.39, + "learning_rate": 1.2122803442383962e-05, + "loss": 0.0724, + "step": 51275 + }, + { + "epoch": 2.39, + "learning_rate": 1.2122019657329175e-05, + "loss": 0.0378, + "step": 51280 + }, + { + "epoch": 2.39, + "learning_rate": 1.2121235872274388e-05, + "loss": 0.0781, + "step": 51285 + }, + { + "epoch": 2.39, + "learning_rate": 1.2120452087219603e-05, + "loss": 0.1908, + "step": 51290 + }, + { + "epoch": 2.39, + "learning_rate": 1.2119668302164815e-05, + "loss": 0.1843, + "step": 51295 + }, + { + "epoch": 2.39, + "learning_rate": 1.2118884517110028e-05, + "loss": 0.085, + "step": 51300 + }, + { + "epoch": 2.39, + "learning_rate": 1.2118100732055242e-05, + "loss": 0.1861, + "step": 51305 + }, + { + "epoch": 2.39, + "learning_rate": 1.2117316947000455e-05, + "loss": 0.252, + "step": 51310 + }, + { + "epoch": 2.39, + "learning_rate": 1.211653316194567e-05, + "loss": 0.341, + "step": 51315 + }, + { + "epoch": 2.39, + "learning_rate": 1.2115749376890882e-05, + "loss": 0.054, + "step": 51320 + }, + { + "epoch": 2.39, + "learning_rate": 1.2114965591836097e-05, + "loss": 0.0834, + "step": 51325 + }, + { + "epoch": 2.4, + "learning_rate": 1.211418180678131e-05, + "loss": 0.0253, + "step": 51330 + }, + { + "epoch": 2.4, + "learning_rate": 1.2113398021726523e-05, + "loss": 0.0387, + "step": 51335 + }, + { + "epoch": 2.4, + "learning_rate": 1.2112614236671736e-05, + "loss": 0.058, + "step": 51340 + }, + { + "epoch": 2.4, + "learning_rate": 1.2111830451616951e-05, + "loss": 0.0874, + "step": 51345 + }, + { + "epoch": 2.4, + "learning_rate": 1.2111046666562163e-05, + "loss": 0.0858, + "step": 51350 + }, + { + "epoch": 2.4, + "learning_rate": 1.2110262881507377e-05, + "loss": 0.1828, + "step": 51355 + }, + { + "epoch": 2.4, + "learning_rate": 1.210947909645259e-05, + "loss": 0.2306, + "step": 51360 + }, + { + "epoch": 2.4, + "learning_rate": 1.2108695311397802e-05, + "loss": 0.3866, + "step": 51365 + }, + { + "epoch": 2.4, + "learning_rate": 1.2107911526343017e-05, + "loss": 0.0208, + "step": 51370 + }, + { + "epoch": 2.4, + "learning_rate": 1.210712774128823e-05, + "loss": 0.0327, + "step": 51375 + }, + { + "epoch": 2.4, + "learning_rate": 1.2106343956233443e-05, + "loss": 0.0503, + "step": 51380 + }, + { + "epoch": 2.4, + "learning_rate": 1.2105560171178656e-05, + "loss": 0.1202, + "step": 51385 + }, + { + "epoch": 2.4, + "learning_rate": 1.2104776386123871e-05, + "loss": 0.1188, + "step": 51390 + }, + { + "epoch": 2.4, + "learning_rate": 1.2103992601069083e-05, + "loss": 0.0703, + "step": 51395 + }, + { + "epoch": 2.4, + "learning_rate": 1.2103208816014297e-05, + "loss": 0.1258, + "step": 51400 + }, + { + "epoch": 2.4, + "learning_rate": 1.2102425030959511e-05, + "loss": 0.2325, + "step": 51405 + }, + { + "epoch": 2.4, + "learning_rate": 1.2101641245904725e-05, + "loss": 0.1977, + "step": 51410 + }, + { + "epoch": 2.4, + "learning_rate": 1.2100857460849937e-05, + "loss": 0.3474, + "step": 51415 + }, + { + "epoch": 2.4, + "learning_rate": 1.2100073675795151e-05, + "loss": 0.077, + "step": 51420 + }, + { + "epoch": 2.4, + "learning_rate": 1.2099289890740365e-05, + "loss": 0.0395, + "step": 51425 + }, + { + "epoch": 2.4, + "learning_rate": 1.2098506105685577e-05, + "loss": 0.0626, + "step": 51430 + }, + { + "epoch": 2.4, + "learning_rate": 1.2097722320630791e-05, + "loss": 0.0527, + "step": 51435 + }, + { + "epoch": 2.4, + "learning_rate": 1.2096938535576003e-05, + "loss": 0.1469, + "step": 51440 + }, + { + "epoch": 2.4, + "learning_rate": 1.2096154750521219e-05, + "loss": 0.1648, + "step": 51445 + }, + { + "epoch": 2.4, + "learning_rate": 1.2095370965466431e-05, + "loss": 0.1252, + "step": 51450 + }, + { + "epoch": 2.4, + "learning_rate": 1.2094587180411645e-05, + "loss": 0.199, + "step": 51455 + }, + { + "epoch": 2.4, + "learning_rate": 1.2093803395356857e-05, + "loss": 0.252, + "step": 51460 + }, + { + "epoch": 2.4, + "learning_rate": 1.2093019610302073e-05, + "loss": 0.2616, + "step": 51465 + }, + { + "epoch": 2.4, + "learning_rate": 1.2092235825247285e-05, + "loss": 0.0359, + "step": 51470 + }, + { + "epoch": 2.4, + "learning_rate": 1.2091452040192499e-05, + "loss": 0.0569, + "step": 51475 + }, + { + "epoch": 2.4, + "learning_rate": 1.2090668255137711e-05, + "loss": 0.0824, + "step": 51480 + }, + { + "epoch": 2.4, + "learning_rate": 1.2089884470082927e-05, + "loss": 0.065, + "step": 51485 + }, + { + "epoch": 2.4, + "learning_rate": 1.2089100685028139e-05, + "loss": 0.0876, + "step": 51490 + }, + { + "epoch": 2.4, + "learning_rate": 1.2088316899973351e-05, + "loss": 0.1345, + "step": 51495 + }, + { + "epoch": 2.4, + "learning_rate": 1.2087533114918565e-05, + "loss": 0.0907, + "step": 51500 + }, + { + "epoch": 2.4, + "learning_rate": 1.2086749329863779e-05, + "loss": 0.3917, + "step": 51505 + }, + { + "epoch": 2.4, + "learning_rate": 1.2085965544808993e-05, + "loss": 0.3124, + "step": 51510 + }, + { + "epoch": 2.4, + "learning_rate": 1.2085181759754205e-05, + "loss": 0.3424, + "step": 51515 + }, + { + "epoch": 2.4, + "learning_rate": 1.208439797469942e-05, + "loss": 0.0339, + "step": 51520 + }, + { + "epoch": 2.4, + "learning_rate": 1.2083614189644633e-05, + "loss": 0.0568, + "step": 51525 + }, + { + "epoch": 2.4, + "learning_rate": 1.2082830404589847e-05, + "loss": 0.1319, + "step": 51530 + }, + { + "epoch": 2.4, + "learning_rate": 1.208204661953506e-05, + "loss": 0.0855, + "step": 51535 + }, + { + "epoch": 2.4, + "learning_rate": 1.2081262834480275e-05, + "loss": 0.0986, + "step": 51540 + }, + { + "epoch": 2.41, + "learning_rate": 1.2080479049425487e-05, + "loss": 0.1848, + "step": 51545 + }, + { + "epoch": 2.41, + "learning_rate": 1.2079695264370701e-05, + "loss": 0.1471, + "step": 51550 + }, + { + "epoch": 2.41, + "learning_rate": 1.2078911479315913e-05, + "loss": 0.1345, + "step": 51555 + }, + { + "epoch": 2.41, + "learning_rate": 1.2078127694261125e-05, + "loss": 0.1926, + "step": 51560 + }, + { + "epoch": 2.41, + "learning_rate": 1.2077343909206341e-05, + "loss": 0.3924, + "step": 51565 + }, + { + "epoch": 2.41, + "learning_rate": 1.2076560124151553e-05, + "loss": 0.0675, + "step": 51570 + }, + { + "epoch": 2.41, + "learning_rate": 1.2075776339096767e-05, + "loss": 0.0411, + "step": 51575 + }, + { + "epoch": 2.41, + "learning_rate": 1.207499255404198e-05, + "loss": 0.0543, + "step": 51580 + }, + { + "epoch": 2.41, + "learning_rate": 1.2074208768987195e-05, + "loss": 0.148, + "step": 51585 + }, + { + "epoch": 2.41, + "learning_rate": 1.2073424983932407e-05, + "loss": 0.0893, + "step": 51590 + }, + { + "epoch": 2.41, + "learning_rate": 1.2072641198877621e-05, + "loss": 0.1574, + "step": 51595 + }, + { + "epoch": 2.41, + "learning_rate": 1.2071857413822833e-05, + "loss": 0.1502, + "step": 51600 + }, + { + "epoch": 2.41, + "learning_rate": 1.2071073628768049e-05, + "loss": 0.2346, + "step": 51605 + }, + { + "epoch": 2.41, + "learning_rate": 1.2070289843713261e-05, + "loss": 0.2326, + "step": 51610 + }, + { + "epoch": 2.41, + "learning_rate": 1.2069506058658475e-05, + "loss": 0.3411, + "step": 51615 + }, + { + "epoch": 2.41, + "learning_rate": 1.2068722273603689e-05, + "loss": 0.0766, + "step": 51620 + }, + { + "epoch": 2.41, + "learning_rate": 1.2067938488548901e-05, + "loss": 0.022, + "step": 51625 + }, + { + "epoch": 2.41, + "learning_rate": 1.2067154703494115e-05, + "loss": 0.0682, + "step": 51630 + }, + { + "epoch": 2.41, + "learning_rate": 1.2066370918439327e-05, + "loss": 0.0439, + "step": 51635 + }, + { + "epoch": 2.41, + "learning_rate": 1.2065587133384543e-05, + "loss": 0.0751, + "step": 51640 + }, + { + "epoch": 2.41, + "learning_rate": 1.2064803348329755e-05, + "loss": 0.0661, + "step": 51645 + }, + { + "epoch": 2.41, + "learning_rate": 1.2064019563274969e-05, + "loss": 0.215, + "step": 51650 + }, + { + "epoch": 2.41, + "learning_rate": 1.2063235778220181e-05, + "loss": 0.2804, + "step": 51655 + }, + { + "epoch": 2.41, + "learning_rate": 1.2062451993165397e-05, + "loss": 0.3444, + "step": 51660 + }, + { + "epoch": 2.41, + "learning_rate": 1.2061668208110609e-05, + "loss": 0.2795, + "step": 51665 + }, + { + "epoch": 2.41, + "learning_rate": 1.2060884423055823e-05, + "loss": 0.0677, + "step": 51670 + }, + { + "epoch": 2.41, + "learning_rate": 1.2060100638001035e-05, + "loss": 0.0437, + "step": 51675 + }, + { + "epoch": 2.41, + "learning_rate": 1.205931685294625e-05, + "loss": 0.0713, + "step": 51680 + }, + { + "epoch": 2.41, + "learning_rate": 1.2058533067891463e-05, + "loss": 0.032, + "step": 51685 + }, + { + "epoch": 2.41, + "learning_rate": 1.2057749282836675e-05, + "loss": 0.0629, + "step": 51690 + }, + { + "epoch": 2.41, + "learning_rate": 1.2056965497781889e-05, + "loss": 0.1758, + "step": 51695 + }, + { + "epoch": 2.41, + "learning_rate": 1.2056181712727101e-05, + "loss": 0.0785, + "step": 51700 + }, + { + "epoch": 2.41, + "learning_rate": 1.2055397927672317e-05, + "loss": 0.1533, + "step": 51705 + }, + { + "epoch": 2.41, + "learning_rate": 1.2054614142617529e-05, + "loss": 0.249, + "step": 51710 + }, + { + "epoch": 2.41, + "learning_rate": 1.2053830357562743e-05, + "loss": 0.186, + "step": 51715 + }, + { + "epoch": 2.41, + "learning_rate": 1.2053046572507957e-05, + "loss": 0.01, + "step": 51720 + }, + { + "epoch": 2.41, + "learning_rate": 1.205226278745317e-05, + "loss": 0.0311, + "step": 51725 + }, + { + "epoch": 2.41, + "learning_rate": 1.2051479002398383e-05, + "loss": 0.0707, + "step": 51730 + }, + { + "epoch": 2.41, + "learning_rate": 1.2050695217343597e-05, + "loss": 0.0547, + "step": 51735 + }, + { + "epoch": 2.41, + "learning_rate": 1.204991143228881e-05, + "loss": 0.1202, + "step": 51740 + }, + { + "epoch": 2.41, + "learning_rate": 1.2049127647234025e-05, + "loss": 0.0915, + "step": 51745 + }, + { + "epoch": 2.41, + "learning_rate": 1.2048343862179237e-05, + "loss": 0.2305, + "step": 51750 + }, + { + "epoch": 2.41, + "learning_rate": 1.2047560077124449e-05, + "loss": 0.2337, + "step": 51755 + }, + { + "epoch": 2.42, + "learning_rate": 1.2046776292069665e-05, + "loss": 0.2104, + "step": 51760 + }, + { + "epoch": 2.42, + "learning_rate": 1.2045992507014877e-05, + "loss": 0.2833, + "step": 51765 + }, + { + "epoch": 2.42, + "learning_rate": 1.204520872196009e-05, + "loss": 0.022, + "step": 51770 + }, + { + "epoch": 2.42, + "learning_rate": 1.2044424936905303e-05, + "loss": 0.0689, + "step": 51775 + }, + { + "epoch": 2.42, + "learning_rate": 1.2043641151850519e-05, + "loss": 0.142, + "step": 51780 + }, + { + "epoch": 2.42, + "learning_rate": 1.204285736679573e-05, + "loss": 0.1489, + "step": 51785 + }, + { + "epoch": 2.42, + "learning_rate": 1.2042073581740945e-05, + "loss": 0.0738, + "step": 51790 + }, + { + "epoch": 2.42, + "learning_rate": 1.2041289796686157e-05, + "loss": 0.114, + "step": 51795 + }, + { + "epoch": 2.42, + "learning_rate": 1.2040506011631372e-05, + "loss": 0.1686, + "step": 51800 + }, + { + "epoch": 2.42, + "learning_rate": 1.2039722226576585e-05, + "loss": 0.1656, + "step": 51805 + }, + { + "epoch": 2.42, + "learning_rate": 1.2038938441521799e-05, + "loss": 0.1686, + "step": 51810 + }, + { + "epoch": 2.42, + "learning_rate": 1.203815465646701e-05, + "loss": 0.2587, + "step": 51815 + }, + { + "epoch": 2.42, + "learning_rate": 1.2037370871412225e-05, + "loss": 0.0502, + "step": 51820 + }, + { + "epoch": 2.42, + "learning_rate": 1.2036587086357439e-05, + "loss": 0.0239, + "step": 51825 + }, + { + "epoch": 2.42, + "learning_rate": 1.203580330130265e-05, + "loss": 0.0367, + "step": 51830 + }, + { + "epoch": 2.42, + "learning_rate": 1.2035019516247865e-05, + "loss": 0.0598, + "step": 51835 + }, + { + "epoch": 2.42, + "learning_rate": 1.2034235731193079e-05, + "loss": 0.1056, + "step": 51840 + }, + { + "epoch": 2.42, + "learning_rate": 1.2033451946138293e-05, + "loss": 0.1451, + "step": 51845 + }, + { + "epoch": 2.42, + "learning_rate": 1.2032668161083505e-05, + "loss": 0.174, + "step": 51850 + }, + { + "epoch": 2.42, + "learning_rate": 1.203188437602872e-05, + "loss": 0.199, + "step": 51855 + }, + { + "epoch": 2.42, + "learning_rate": 1.2031100590973933e-05, + "loss": 0.3578, + "step": 51860 + }, + { + "epoch": 2.42, + "learning_rate": 1.2030316805919146e-05, + "loss": 0.3888, + "step": 51865 + }, + { + "epoch": 2.42, + "learning_rate": 1.2029533020864359e-05, + "loss": 0.0432, + "step": 51870 + }, + { + "epoch": 2.42, + "learning_rate": 1.2028749235809574e-05, + "loss": 0.0301, + "step": 51875 + }, + { + "epoch": 2.42, + "learning_rate": 1.2027965450754787e-05, + "loss": 0.0577, + "step": 51880 + }, + { + "epoch": 2.42, + "learning_rate": 1.2027181665699999e-05, + "loss": 0.0879, + "step": 51885 + }, + { + "epoch": 2.42, + "learning_rate": 1.2026397880645213e-05, + "loss": 0.1147, + "step": 51890 + }, + { + "epoch": 2.42, + "learning_rate": 1.2025614095590425e-05, + "loss": 0.1604, + "step": 51895 + }, + { + "epoch": 2.42, + "learning_rate": 1.202483031053564e-05, + "loss": 0.0468, + "step": 51900 + }, + { + "epoch": 2.42, + "learning_rate": 1.2024046525480853e-05, + "loss": 0.1913, + "step": 51905 + }, + { + "epoch": 2.42, + "learning_rate": 1.2023262740426067e-05, + "loss": 0.2669, + "step": 51910 + }, + { + "epoch": 2.42, + "learning_rate": 1.2022478955371279e-05, + "loss": 0.17, + "step": 51915 + }, + { + "epoch": 2.42, + "learning_rate": 1.2021695170316494e-05, + "loss": 0.1219, + "step": 51920 + }, + { + "epoch": 2.42, + "learning_rate": 1.2020911385261707e-05, + "loss": 0.0375, + "step": 51925 + }, + { + "epoch": 2.42, + "learning_rate": 1.202012760020692e-05, + "loss": 0.0667, + "step": 51930 + }, + { + "epoch": 2.42, + "learning_rate": 1.2019343815152134e-05, + "loss": 0.0615, + "step": 51935 + }, + { + "epoch": 2.42, + "learning_rate": 1.2018560030097348e-05, + "loss": 0.0947, + "step": 51940 + }, + { + "epoch": 2.42, + "learning_rate": 1.201777624504256e-05, + "loss": 0.1589, + "step": 51945 + }, + { + "epoch": 2.42, + "learning_rate": 1.2016992459987773e-05, + "loss": 0.0904, + "step": 51950 + }, + { + "epoch": 2.42, + "learning_rate": 1.2016208674932988e-05, + "loss": 0.2414, + "step": 51955 + }, + { + "epoch": 2.42, + "learning_rate": 1.20154248898782e-05, + "loss": 0.3085, + "step": 51960 + }, + { + "epoch": 2.42, + "learning_rate": 1.2014641104823414e-05, + "loss": 0.3275, + "step": 51965 + }, + { + "epoch": 2.42, + "learning_rate": 1.2013857319768627e-05, + "loss": 0.0334, + "step": 51970 + }, + { + "epoch": 2.43, + "learning_rate": 1.2013073534713842e-05, + "loss": 0.0268, + "step": 51975 + }, + { + "epoch": 2.43, + "learning_rate": 1.2012289749659054e-05, + "loss": 0.1292, + "step": 51980 + }, + { + "epoch": 2.43, + "learning_rate": 1.2011505964604268e-05, + "loss": 0.0277, + "step": 51985 + }, + { + "epoch": 2.43, + "learning_rate": 1.201072217954948e-05, + "loss": 0.0855, + "step": 51990 + }, + { + "epoch": 2.43, + "learning_rate": 1.2009938394494696e-05, + "loss": 0.0944, + "step": 51995 + }, + { + "epoch": 2.43, + "learning_rate": 1.2009154609439908e-05, + "loss": 0.0794, + "step": 52000 + }, + { + "epoch": 2.43, + "learning_rate": 1.2008370824385122e-05, + "loss": 0.1485, + "step": 52005 + }, + { + "epoch": 2.43, + "learning_rate": 1.2007587039330335e-05, + "loss": 0.2271, + "step": 52010 + }, + { + "epoch": 2.43, + "learning_rate": 1.2006803254275547e-05, + "loss": 0.2456, + "step": 52015 + }, + { + "epoch": 2.43, + "learning_rate": 1.2006019469220762e-05, + "loss": 0.085, + "step": 52020 + }, + { + "epoch": 2.43, + "learning_rate": 1.2005235684165975e-05, + "loss": 0.0676, + "step": 52025 + }, + { + "epoch": 2.43, + "learning_rate": 1.2004451899111188e-05, + "loss": 0.0165, + "step": 52030 + }, + { + "epoch": 2.43, + "learning_rate": 1.2003668114056402e-05, + "loss": 0.0606, + "step": 52035 + }, + { + "epoch": 2.43, + "learning_rate": 1.2002884329001616e-05, + "loss": 0.1106, + "step": 52040 + }, + { + "epoch": 2.43, + "learning_rate": 1.2002100543946828e-05, + "loss": 0.1506, + "step": 52045 + }, + { + "epoch": 2.43, + "learning_rate": 1.2001316758892042e-05, + "loss": 0.0898, + "step": 52050 + }, + { + "epoch": 2.43, + "learning_rate": 1.2000532973837256e-05, + "loss": 0.1746, + "step": 52055 + }, + { + "epoch": 2.43, + "learning_rate": 1.199974918878247e-05, + "loss": 0.251, + "step": 52060 + }, + { + "epoch": 2.43, + "learning_rate": 1.1998965403727682e-05, + "loss": 0.2887, + "step": 52065 + }, + { + "epoch": 2.43, + "learning_rate": 1.1998181618672898e-05, + "loss": 0.0429, + "step": 52070 + }, + { + "epoch": 2.43, + "learning_rate": 1.199739783361811e-05, + "loss": 0.0352, + "step": 52075 + }, + { + "epoch": 2.43, + "learning_rate": 1.1996614048563322e-05, + "loss": 0.1168, + "step": 52080 + }, + { + "epoch": 2.43, + "learning_rate": 1.1995830263508536e-05, + "loss": 0.0959, + "step": 52085 + }, + { + "epoch": 2.43, + "learning_rate": 1.1995046478453749e-05, + "loss": 0.154, + "step": 52090 + }, + { + "epoch": 2.43, + "learning_rate": 1.1994262693398964e-05, + "loss": 0.0716, + "step": 52095 + }, + { + "epoch": 2.43, + "learning_rate": 1.1993478908344176e-05, + "loss": 0.1781, + "step": 52100 + }, + { + "epoch": 2.43, + "learning_rate": 1.199269512328939e-05, + "loss": 0.2264, + "step": 52105 + }, + { + "epoch": 2.43, + "learning_rate": 1.1991911338234602e-05, + "loss": 0.2278, + "step": 52110 + }, + { + "epoch": 2.43, + "learning_rate": 1.1991127553179818e-05, + "loss": 0.3195, + "step": 52115 + }, + { + "epoch": 2.43, + "learning_rate": 1.199034376812503e-05, + "loss": 0.0321, + "step": 52120 + }, + { + "epoch": 2.43, + "learning_rate": 1.1989559983070244e-05, + "loss": 0.0277, + "step": 52125 + }, + { + "epoch": 2.43, + "learning_rate": 1.1988776198015456e-05, + "loss": 0.0288, + "step": 52130 + }, + { + "epoch": 2.43, + "learning_rate": 1.1987992412960672e-05, + "loss": 0.0316, + "step": 52135 + }, + { + "epoch": 2.43, + "learning_rate": 1.1987208627905884e-05, + "loss": 0.0851, + "step": 52140 + }, + { + "epoch": 2.43, + "learning_rate": 1.1986424842851096e-05, + "loss": 0.1352, + "step": 52145 + }, + { + "epoch": 2.43, + "learning_rate": 1.198564105779631e-05, + "loss": 0.1331, + "step": 52150 + }, + { + "epoch": 2.43, + "learning_rate": 1.1984857272741524e-05, + "loss": 0.1579, + "step": 52155 + }, + { + "epoch": 2.43, + "learning_rate": 1.1984073487686738e-05, + "loss": 0.2474, + "step": 52160 + }, + { + "epoch": 2.43, + "learning_rate": 1.198328970263195e-05, + "loss": 0.2848, + "step": 52165 + }, + { + "epoch": 2.43, + "learning_rate": 1.1982505917577166e-05, + "loss": 0.0673, + "step": 52170 + }, + { + "epoch": 2.43, + "learning_rate": 1.1981722132522378e-05, + "loss": 0.0426, + "step": 52175 + }, + { + "epoch": 2.43, + "learning_rate": 1.1980938347467592e-05, + "loss": 0.0501, + "step": 52180 + }, + { + "epoch": 2.44, + "learning_rate": 1.1980154562412804e-05, + "loss": 0.0576, + "step": 52185 + }, + { + "epoch": 2.44, + "learning_rate": 1.197937077735802e-05, + "loss": 0.105, + "step": 52190 + }, + { + "epoch": 2.44, + "learning_rate": 1.1978586992303232e-05, + "loss": 0.1487, + "step": 52195 + }, + { + "epoch": 2.44, + "learning_rate": 1.1977803207248446e-05, + "loss": 0.063, + "step": 52200 + }, + { + "epoch": 2.44, + "learning_rate": 1.1977019422193658e-05, + "loss": 0.1358, + "step": 52205 + }, + { + "epoch": 2.44, + "learning_rate": 1.197623563713887e-05, + "loss": 0.4102, + "step": 52210 + }, + { + "epoch": 2.44, + "learning_rate": 1.1975451852084086e-05, + "loss": 0.2808, + "step": 52215 + }, + { + "epoch": 2.44, + "learning_rate": 1.1974668067029298e-05, + "loss": 0.1002, + "step": 52220 + }, + { + "epoch": 2.44, + "learning_rate": 1.1973884281974512e-05, + "loss": 0.0695, + "step": 52225 + }, + { + "epoch": 2.44, + "learning_rate": 1.1973100496919724e-05, + "loss": 0.0389, + "step": 52230 + }, + { + "epoch": 2.44, + "learning_rate": 1.197231671186494e-05, + "loss": 0.0591, + "step": 52235 + }, + { + "epoch": 2.44, + "learning_rate": 1.1971532926810152e-05, + "loss": 0.1046, + "step": 52240 + }, + { + "epoch": 2.44, + "learning_rate": 1.1970749141755366e-05, + "loss": 0.0821, + "step": 52245 + }, + { + "epoch": 2.44, + "learning_rate": 1.196996535670058e-05, + "loss": 0.1596, + "step": 52250 + }, + { + "epoch": 2.44, + "learning_rate": 1.1969181571645794e-05, + "loss": 0.0936, + "step": 52255 + }, + { + "epoch": 2.44, + "learning_rate": 1.1968397786591006e-05, + "loss": 0.1988, + "step": 52260 + }, + { + "epoch": 2.44, + "learning_rate": 1.196761400153622e-05, + "loss": 0.3196, + "step": 52265 + }, + { + "epoch": 2.44, + "learning_rate": 1.1966830216481434e-05, + "loss": 0.0698, + "step": 52270 + }, + { + "epoch": 2.44, + "learning_rate": 1.1966046431426646e-05, + "loss": 0.0393, + "step": 52275 + }, + { + "epoch": 2.44, + "learning_rate": 1.196526264637186e-05, + "loss": 0.0488, + "step": 52280 + }, + { + "epoch": 2.44, + "learning_rate": 1.1964478861317072e-05, + "loss": 0.096, + "step": 52285 + }, + { + "epoch": 2.44, + "learning_rate": 1.1963695076262288e-05, + "loss": 0.0349, + "step": 52290 + }, + { + "epoch": 2.44, + "learning_rate": 1.19629112912075e-05, + "loss": 0.2553, + "step": 52295 + }, + { + "epoch": 2.44, + "learning_rate": 1.1962127506152714e-05, + "loss": 0.1327, + "step": 52300 + }, + { + "epoch": 2.44, + "learning_rate": 1.1961343721097926e-05, + "loss": 0.1643, + "step": 52305 + }, + { + "epoch": 2.44, + "learning_rate": 1.1960559936043142e-05, + "loss": 0.2194, + "step": 52310 + }, + { + "epoch": 2.44, + "learning_rate": 1.1959776150988354e-05, + "loss": 0.3497, + "step": 52315 + }, + { + "epoch": 2.44, + "learning_rate": 1.1958992365933568e-05, + "loss": 0.0641, + "step": 52320 + }, + { + "epoch": 2.44, + "learning_rate": 1.195820858087878e-05, + "loss": 0.0347, + "step": 52325 + }, + { + "epoch": 2.44, + "learning_rate": 1.1957424795823996e-05, + "loss": 0.0945, + "step": 52330 + }, + { + "epoch": 2.44, + "learning_rate": 1.1956641010769208e-05, + "loss": 0.0739, + "step": 52335 + }, + { + "epoch": 2.44, + "learning_rate": 1.195585722571442e-05, + "loss": 0.1243, + "step": 52340 + }, + { + "epoch": 2.44, + "learning_rate": 1.1955073440659634e-05, + "loss": 0.1303, + "step": 52345 + }, + { + "epoch": 2.44, + "learning_rate": 1.1954289655604848e-05, + "loss": 0.1913, + "step": 52350 + }, + { + "epoch": 2.44, + "learning_rate": 1.1953505870550062e-05, + "loss": 0.1298, + "step": 52355 + }, + { + "epoch": 2.44, + "learning_rate": 1.1952722085495274e-05, + "loss": 0.2293, + "step": 52360 + }, + { + "epoch": 2.44, + "learning_rate": 1.1951938300440488e-05, + "loss": 0.2134, + "step": 52365 + }, + { + "epoch": 2.44, + "learning_rate": 1.1951154515385702e-05, + "loss": 0.0677, + "step": 52370 + }, + { + "epoch": 2.44, + "learning_rate": 1.1950370730330916e-05, + "loss": 0.0779, + "step": 52375 + }, + { + "epoch": 2.44, + "learning_rate": 1.1949586945276128e-05, + "loss": 0.0791, + "step": 52380 + }, + { + "epoch": 2.44, + "learning_rate": 1.1948803160221344e-05, + "loss": 0.086, + "step": 52385 + }, + { + "epoch": 2.44, + "learning_rate": 1.1948019375166556e-05, + "loss": 0.1275, + "step": 52390 + }, + { + "epoch": 2.44, + "learning_rate": 1.194723559011177e-05, + "loss": 0.0879, + "step": 52395 + }, + { + "epoch": 2.45, + "learning_rate": 1.1946451805056982e-05, + "loss": 0.133, + "step": 52400 + }, + { + "epoch": 2.45, + "learning_rate": 1.1945668020002194e-05, + "loss": 0.2451, + "step": 52405 + }, + { + "epoch": 2.45, + "learning_rate": 1.194488423494741e-05, + "loss": 0.2477, + "step": 52410 + }, + { + "epoch": 2.45, + "learning_rate": 1.1944100449892622e-05, + "loss": 0.4061, + "step": 52415 + }, + { + "epoch": 2.45, + "learning_rate": 1.1943316664837836e-05, + "loss": 0.0401, + "step": 52420 + }, + { + "epoch": 2.45, + "learning_rate": 1.1942532879783048e-05, + "loss": 0.0241, + "step": 52425 + }, + { + "epoch": 2.45, + "learning_rate": 1.1941749094728264e-05, + "loss": 0.0777, + "step": 52430 + }, + { + "epoch": 2.45, + "learning_rate": 1.1940965309673476e-05, + "loss": 0.065, + "step": 52435 + }, + { + "epoch": 2.45, + "learning_rate": 1.194018152461869e-05, + "loss": 0.1047, + "step": 52440 + }, + { + "epoch": 2.45, + "learning_rate": 1.1939397739563902e-05, + "loss": 0.0401, + "step": 52445 + }, + { + "epoch": 2.45, + "learning_rate": 1.1938613954509118e-05, + "loss": 0.0681, + "step": 52450 + }, + { + "epoch": 2.45, + "learning_rate": 1.193783016945433e-05, + "loss": 0.1758, + "step": 52455 + }, + { + "epoch": 2.45, + "learning_rate": 1.1937046384399544e-05, + "loss": 0.1733, + "step": 52460 + }, + { + "epoch": 2.45, + "learning_rate": 1.1936262599344756e-05, + "loss": 0.3287, + "step": 52465 + }, + { + "epoch": 2.45, + "learning_rate": 1.193547881428997e-05, + "loss": 0.049, + "step": 52470 + }, + { + "epoch": 2.45, + "learning_rate": 1.1934695029235184e-05, + "loss": 0.0322, + "step": 52475 + }, + { + "epoch": 2.45, + "learning_rate": 1.1933911244180396e-05, + "loss": 0.069, + "step": 52480 + }, + { + "epoch": 2.45, + "learning_rate": 1.1933127459125612e-05, + "loss": 0.0482, + "step": 52485 + }, + { + "epoch": 2.45, + "learning_rate": 1.1932343674070824e-05, + "loss": 0.1658, + "step": 52490 + }, + { + "epoch": 2.45, + "learning_rate": 1.1931559889016038e-05, + "loss": 0.1137, + "step": 52495 + }, + { + "epoch": 2.45, + "learning_rate": 1.193077610396125e-05, + "loss": 0.2187, + "step": 52500 + }, + { + "epoch": 2.45, + "learning_rate": 1.1929992318906465e-05, + "loss": 0.1515, + "step": 52505 + }, + { + "epoch": 2.45, + "learning_rate": 1.1929208533851678e-05, + "loss": 0.307, + "step": 52510 + }, + { + "epoch": 2.45, + "learning_rate": 1.1928424748796892e-05, + "loss": 0.3527, + "step": 52515 + }, + { + "epoch": 2.45, + "learning_rate": 1.1927640963742104e-05, + "loss": 0.0675, + "step": 52520 + }, + { + "epoch": 2.45, + "learning_rate": 1.192685717868732e-05, + "loss": 0.0585, + "step": 52525 + }, + { + "epoch": 2.45, + "learning_rate": 1.1926073393632532e-05, + "loss": 0.0977, + "step": 52530 + }, + { + "epoch": 2.45, + "learning_rate": 1.1925289608577744e-05, + "loss": 0.0608, + "step": 52535 + }, + { + "epoch": 2.45, + "learning_rate": 1.1924505823522958e-05, + "loss": 0.061, + "step": 52540 + }, + { + "epoch": 2.45, + "learning_rate": 1.192372203846817e-05, + "loss": 0.0563, + "step": 52545 + }, + { + "epoch": 2.45, + "learning_rate": 1.1922938253413386e-05, + "loss": 0.1041, + "step": 52550 + }, + { + "epoch": 2.45, + "learning_rate": 1.1922154468358598e-05, + "loss": 0.1776, + "step": 52555 + }, + { + "epoch": 2.45, + "learning_rate": 1.1921370683303812e-05, + "loss": 0.2721, + "step": 52560 + }, + { + "epoch": 2.45, + "learning_rate": 1.1920586898249026e-05, + "loss": 0.2447, + "step": 52565 + }, + { + "epoch": 2.45, + "learning_rate": 1.191980311319424e-05, + "loss": 0.0822, + "step": 52570 + }, + { + "epoch": 2.45, + "learning_rate": 1.1919019328139452e-05, + "loss": 0.0929, + "step": 52575 + }, + { + "epoch": 2.45, + "learning_rate": 1.1918235543084666e-05, + "loss": 0.0734, + "step": 52580 + }, + { + "epoch": 2.45, + "learning_rate": 1.191745175802988e-05, + "loss": 0.0762, + "step": 52585 + }, + { + "epoch": 2.45, + "learning_rate": 1.1916667972975093e-05, + "loss": 0.1376, + "step": 52590 + }, + { + "epoch": 2.45, + "learning_rate": 1.1915884187920306e-05, + "loss": 0.1607, + "step": 52595 + }, + { + "epoch": 2.45, + "learning_rate": 1.1915100402865518e-05, + "loss": 0.2222, + "step": 52600 + }, + { + "epoch": 2.45, + "learning_rate": 1.1914316617810733e-05, + "loss": 0.2216, + "step": 52605 + }, + { + "epoch": 2.45, + "learning_rate": 1.1913532832755946e-05, + "loss": 0.2515, + "step": 52610 + }, + { + "epoch": 2.46, + "learning_rate": 1.191274904770116e-05, + "loss": 0.3711, + "step": 52615 + }, + { + "epoch": 2.46, + "learning_rate": 1.1911965262646372e-05, + "loss": 0.0333, + "step": 52620 + }, + { + "epoch": 2.46, + "learning_rate": 1.1911181477591587e-05, + "loss": 0.0515, + "step": 52625 + }, + { + "epoch": 2.46, + "learning_rate": 1.19103976925368e-05, + "loss": 0.0941, + "step": 52630 + }, + { + "epoch": 2.46, + "learning_rate": 1.1909613907482013e-05, + "loss": 0.1234, + "step": 52635 + }, + { + "epoch": 2.46, + "learning_rate": 1.1908830122427226e-05, + "loss": 0.0329, + "step": 52640 + }, + { + "epoch": 2.46, + "learning_rate": 1.1908046337372441e-05, + "loss": 0.1261, + "step": 52645 + }, + { + "epoch": 2.46, + "learning_rate": 1.1907262552317653e-05, + "loss": 0.1531, + "step": 52650 + }, + { + "epoch": 2.46, + "learning_rate": 1.1906478767262867e-05, + "loss": 0.1486, + "step": 52655 + }, + { + "epoch": 2.46, + "learning_rate": 1.190569498220808e-05, + "loss": 0.2458, + "step": 52660 + }, + { + "epoch": 2.46, + "learning_rate": 1.1904911197153293e-05, + "loss": 0.3166, + "step": 52665 + }, + { + "epoch": 2.46, + "learning_rate": 1.1904127412098507e-05, + "loss": 0.0304, + "step": 52670 + }, + { + "epoch": 2.46, + "learning_rate": 1.190334362704372e-05, + "loss": 0.0736, + "step": 52675 + }, + { + "epoch": 2.46, + "learning_rate": 1.1902559841988934e-05, + "loss": 0.0754, + "step": 52680 + }, + { + "epoch": 2.46, + "learning_rate": 1.1901776056934147e-05, + "loss": 0.0731, + "step": 52685 + }, + { + "epoch": 2.46, + "learning_rate": 1.1900992271879361e-05, + "loss": 0.0482, + "step": 52690 + }, + { + "epoch": 2.46, + "learning_rate": 1.1900208486824574e-05, + "loss": 0.0744, + "step": 52695 + }, + { + "epoch": 2.46, + "learning_rate": 1.1899424701769789e-05, + "loss": 0.1076, + "step": 52700 + }, + { + "epoch": 2.46, + "learning_rate": 1.1898640916715001e-05, + "loss": 0.1539, + "step": 52705 + }, + { + "epoch": 2.46, + "learning_rate": 1.1897857131660215e-05, + "loss": 0.2656, + "step": 52710 + }, + { + "epoch": 2.46, + "learning_rate": 1.1897073346605427e-05, + "loss": 0.2979, + "step": 52715 + }, + { + "epoch": 2.46, + "learning_rate": 1.1896289561550643e-05, + "loss": 0.0254, + "step": 52720 + }, + { + "epoch": 2.46, + "learning_rate": 1.1895505776495855e-05, + "loss": 0.0355, + "step": 52725 + }, + { + "epoch": 2.46, + "learning_rate": 1.1894721991441067e-05, + "loss": 0.0842, + "step": 52730 + }, + { + "epoch": 2.46, + "learning_rate": 1.1893938206386281e-05, + "loss": 0.075, + "step": 52735 + }, + { + "epoch": 2.46, + "learning_rate": 1.1893154421331494e-05, + "loss": 0.122, + "step": 52740 + }, + { + "epoch": 2.46, + "learning_rate": 1.189237063627671e-05, + "loss": 0.1795, + "step": 52745 + }, + { + "epoch": 2.46, + "learning_rate": 1.1891586851221921e-05, + "loss": 0.1504, + "step": 52750 + }, + { + "epoch": 2.46, + "learning_rate": 1.1890803066167135e-05, + "loss": 0.2475, + "step": 52755 + }, + { + "epoch": 2.46, + "learning_rate": 1.1890019281112348e-05, + "loss": 0.1841, + "step": 52760 + }, + { + "epoch": 2.46, + "learning_rate": 1.1889235496057563e-05, + "loss": 0.3067, + "step": 52765 + }, + { + "epoch": 2.46, + "learning_rate": 1.1888451711002775e-05, + "loss": 0.0693, + "step": 52770 + }, + { + "epoch": 2.46, + "learning_rate": 1.188766792594799e-05, + "loss": 0.0174, + "step": 52775 + }, + { + "epoch": 2.46, + "learning_rate": 1.1886884140893203e-05, + "loss": 0.0503, + "step": 52780 + }, + { + "epoch": 2.46, + "learning_rate": 1.1886100355838417e-05, + "loss": 0.1001, + "step": 52785 + }, + { + "epoch": 2.46, + "learning_rate": 1.188531657078363e-05, + "loss": 0.1064, + "step": 52790 + }, + { + "epoch": 2.46, + "learning_rate": 1.1884532785728841e-05, + "loss": 0.0935, + "step": 52795 + }, + { + "epoch": 2.46, + "learning_rate": 1.1883749000674057e-05, + "loss": 0.1237, + "step": 52800 + }, + { + "epoch": 2.46, + "learning_rate": 1.188296521561927e-05, + "loss": 0.2529, + "step": 52805 + }, + { + "epoch": 2.46, + "learning_rate": 1.1882181430564483e-05, + "loss": 0.2904, + "step": 52810 + }, + { + "epoch": 2.46, + "learning_rate": 1.1881397645509695e-05, + "loss": 0.2634, + "step": 52815 + }, + { + "epoch": 2.46, + "learning_rate": 1.1880613860454911e-05, + "loss": 0.0563, + "step": 52820 + }, + { + "epoch": 2.46, + "learning_rate": 1.1879830075400123e-05, + "loss": 0.1206, + "step": 52825 + }, + { + "epoch": 2.47, + "learning_rate": 1.1879046290345337e-05, + "loss": 0.0276, + "step": 52830 + }, + { + "epoch": 2.47, + "learning_rate": 1.187826250529055e-05, + "loss": 0.0621, + "step": 52835 + }, + { + "epoch": 2.47, + "learning_rate": 1.1877478720235765e-05, + "loss": 0.1221, + "step": 52840 + }, + { + "epoch": 2.47, + "learning_rate": 1.1876694935180977e-05, + "loss": 0.1302, + "step": 52845 + }, + { + "epoch": 2.47, + "learning_rate": 1.1875911150126191e-05, + "loss": 0.1104, + "step": 52850 + }, + { + "epoch": 2.47, + "learning_rate": 1.1875127365071403e-05, + "loss": 0.2365, + "step": 52855 + }, + { + "epoch": 2.47, + "learning_rate": 1.1874343580016615e-05, + "loss": 0.2957, + "step": 52860 + }, + { + "epoch": 2.47, + "learning_rate": 1.1873559794961831e-05, + "loss": 0.1747, + "step": 52865 + }, + { + "epoch": 2.47, + "learning_rate": 1.1872776009907043e-05, + "loss": 0.0363, + "step": 52870 + }, + { + "epoch": 2.47, + "learning_rate": 1.1871992224852257e-05, + "loss": 0.0558, + "step": 52875 + }, + { + "epoch": 2.47, + "learning_rate": 1.1871208439797471e-05, + "loss": 0.1079, + "step": 52880 + }, + { + "epoch": 2.47, + "learning_rate": 1.1870424654742685e-05, + "loss": 0.0892, + "step": 52885 + }, + { + "epoch": 2.47, + "learning_rate": 1.1869640869687897e-05, + "loss": 0.0586, + "step": 52890 + }, + { + "epoch": 2.47, + "learning_rate": 1.1868857084633111e-05, + "loss": 0.0733, + "step": 52895 + }, + { + "epoch": 2.47, + "learning_rate": 1.1868073299578325e-05, + "loss": 0.1407, + "step": 52900 + }, + { + "epoch": 2.47, + "learning_rate": 1.1867289514523539e-05, + "loss": 0.1468, + "step": 52905 + }, + { + "epoch": 2.47, + "learning_rate": 1.1866505729468751e-05, + "loss": 0.312, + "step": 52910 + }, + { + "epoch": 2.47, + "learning_rate": 1.1865721944413967e-05, + "loss": 0.1312, + "step": 52915 + }, + { + "epoch": 2.47, + "learning_rate": 1.1864938159359179e-05, + "loss": 0.074, + "step": 52920 + }, + { + "epoch": 2.47, + "learning_rate": 1.1864154374304391e-05, + "loss": 0.0478, + "step": 52925 + }, + { + "epoch": 2.47, + "learning_rate": 1.1863370589249605e-05, + "loss": 0.0973, + "step": 52930 + }, + { + "epoch": 2.47, + "learning_rate": 1.1862586804194817e-05, + "loss": 0.0613, + "step": 52935 + }, + { + "epoch": 2.47, + "learning_rate": 1.1861803019140033e-05, + "loss": 0.0627, + "step": 52940 + }, + { + "epoch": 2.47, + "learning_rate": 1.1861019234085245e-05, + "loss": 0.0968, + "step": 52945 + }, + { + "epoch": 2.47, + "learning_rate": 1.1860235449030459e-05, + "loss": 0.0754, + "step": 52950 + }, + { + "epoch": 2.47, + "learning_rate": 1.1859451663975671e-05, + "loss": 0.1461, + "step": 52955 + }, + { + "epoch": 2.47, + "learning_rate": 1.1858667878920887e-05, + "loss": 0.2842, + "step": 52960 + }, + { + "epoch": 2.47, + "learning_rate": 1.1857884093866099e-05, + "loss": 0.2588, + "step": 52965 + }, + { + "epoch": 2.47, + "learning_rate": 1.1857100308811313e-05, + "loss": 0.044, + "step": 52970 + }, + { + "epoch": 2.47, + "learning_rate": 1.1856473280767485e-05, + "loss": 0.0901, + "step": 52975 + }, + { + "epoch": 2.47, + "learning_rate": 1.1855689495712697e-05, + "loss": 0.098, + "step": 52980 + }, + { + "epoch": 2.47, + "learning_rate": 1.185490571065791e-05, + "loss": 0.1027, + "step": 52985 + }, + { + "epoch": 2.47, + "learning_rate": 1.1854121925603123e-05, + "loss": 0.0727, + "step": 52990 + }, + { + "epoch": 2.47, + "learning_rate": 1.1853338140548336e-05, + "loss": 0.086, + "step": 52995 + }, + { + "epoch": 2.47, + "learning_rate": 1.1852554355493551e-05, + "loss": 0.1336, + "step": 53000 + }, + { + "epoch": 2.47, + "learning_rate": 1.1851770570438763e-05, + "loss": 0.2022, + "step": 53005 + }, + { + "epoch": 2.47, + "learning_rate": 1.1850986785383977e-05, + "loss": 0.2916, + "step": 53010 + }, + { + "epoch": 2.47, + "learning_rate": 1.185020300032919e-05, + "loss": 0.1668, + "step": 53015 + }, + { + "epoch": 2.47, + "learning_rate": 1.1849419215274405e-05, + "loss": 0.0279, + "step": 53020 + }, + { + "epoch": 2.47, + "learning_rate": 1.1848635430219617e-05, + "loss": 0.0569, + "step": 53025 + }, + { + "epoch": 2.47, + "learning_rate": 1.1847851645164831e-05, + "loss": 0.0159, + "step": 53030 + }, + { + "epoch": 2.47, + "learning_rate": 1.1847067860110043e-05, + "loss": 0.0532, + "step": 53035 + }, + { + "epoch": 2.47, + "learning_rate": 1.1846284075055259e-05, + "loss": 0.0931, + "step": 53040 + }, + { + "epoch": 2.48, + "learning_rate": 1.1845500290000471e-05, + "loss": 0.1704, + "step": 53045 + }, + { + "epoch": 2.48, + "learning_rate": 1.1844716504945683e-05, + "loss": 0.1275, + "step": 53050 + }, + { + "epoch": 2.48, + "learning_rate": 1.1843932719890897e-05, + "loss": 0.204, + "step": 53055 + }, + { + "epoch": 2.48, + "learning_rate": 1.1843148934836111e-05, + "loss": 0.2639, + "step": 53060 + }, + { + "epoch": 2.48, + "learning_rate": 1.1842365149781325e-05, + "loss": 0.3573, + "step": 53065 + }, + { + "epoch": 2.48, + "learning_rate": 1.1841581364726537e-05, + "loss": 0.0625, + "step": 53070 + }, + { + "epoch": 2.48, + "learning_rate": 1.1840797579671753e-05, + "loss": 0.0354, + "step": 53075 + }, + { + "epoch": 2.48, + "learning_rate": 1.1840013794616965e-05, + "loss": 0.0453, + "step": 53080 + }, + { + "epoch": 2.48, + "learning_rate": 1.1839230009562179e-05, + "loss": 0.0939, + "step": 53085 + }, + { + "epoch": 2.48, + "learning_rate": 1.1838446224507391e-05, + "loss": 0.0633, + "step": 53090 + }, + { + "epoch": 2.48, + "learning_rate": 1.1837662439452607e-05, + "loss": 0.1074, + "step": 53095 + }, + { + "epoch": 2.48, + "learning_rate": 1.183687865439782e-05, + "loss": 0.0843, + "step": 53100 + }, + { + "epoch": 2.48, + "learning_rate": 1.1836094869343033e-05, + "loss": 0.1816, + "step": 53105 + }, + { + "epoch": 2.48, + "learning_rate": 1.1835311084288245e-05, + "loss": 0.283, + "step": 53110 + }, + { + "epoch": 2.48, + "learning_rate": 1.1834527299233457e-05, + "loss": 0.4184, + "step": 53115 + }, + { + "epoch": 2.48, + "learning_rate": 1.1833743514178673e-05, + "loss": 0.0683, + "step": 53120 + }, + { + "epoch": 2.48, + "learning_rate": 1.1832959729123885e-05, + "loss": 0.0495, + "step": 53125 + }, + { + "epoch": 2.48, + "learning_rate": 1.18321759440691e-05, + "loss": 0.0605, + "step": 53130 + }, + { + "epoch": 2.48, + "learning_rate": 1.1831392159014311e-05, + "loss": 0.1192, + "step": 53135 + }, + { + "epoch": 2.48, + "learning_rate": 1.1830608373959527e-05, + "loss": 0.1119, + "step": 53140 + }, + { + "epoch": 2.48, + "learning_rate": 1.182982458890474e-05, + "loss": 0.1132, + "step": 53145 + }, + { + "epoch": 2.48, + "learning_rate": 1.1829040803849953e-05, + "loss": 0.1361, + "step": 53150 + }, + { + "epoch": 2.48, + "learning_rate": 1.1828257018795165e-05, + "loss": 0.2086, + "step": 53155 + }, + { + "epoch": 2.48, + "learning_rate": 1.1827473233740381e-05, + "loss": 0.192, + "step": 53160 + }, + { + "epoch": 2.48, + "learning_rate": 1.1826689448685593e-05, + "loss": 0.2593, + "step": 53165 + }, + { + "epoch": 2.48, + "learning_rate": 1.1825905663630807e-05, + "loss": 0.0378, + "step": 53170 + }, + { + "epoch": 2.48, + "learning_rate": 1.1825121878576021e-05, + "loss": 0.0689, + "step": 53175 + }, + { + "epoch": 2.48, + "learning_rate": 1.1824338093521233e-05, + "loss": 0.0995, + "step": 53180 + }, + { + "epoch": 2.48, + "learning_rate": 1.1823554308466447e-05, + "loss": 0.0625, + "step": 53185 + }, + { + "epoch": 2.48, + "learning_rate": 1.182277052341166e-05, + "loss": 0.0522, + "step": 53190 + }, + { + "epoch": 2.48, + "learning_rate": 1.1821986738356875e-05, + "loss": 0.1119, + "step": 53195 + }, + { + "epoch": 2.48, + "learning_rate": 1.1821202953302087e-05, + "loss": 0.2608, + "step": 53200 + }, + { + "epoch": 2.48, + "learning_rate": 1.1820419168247301e-05, + "loss": 0.1296, + "step": 53205 + }, + { + "epoch": 2.48, + "learning_rate": 1.1819635383192513e-05, + "loss": 0.2392, + "step": 53210 + }, + { + "epoch": 2.48, + "learning_rate": 1.1818851598137729e-05, + "loss": 0.3229, + "step": 53215 + }, + { + "epoch": 2.48, + "learning_rate": 1.1818067813082941e-05, + "loss": 0.0728, + "step": 53220 + }, + { + "epoch": 2.48, + "learning_rate": 1.1817284028028155e-05, + "loss": 0.0496, + "step": 53225 + }, + { + "epoch": 2.48, + "learning_rate": 1.1816500242973367e-05, + "loss": 0.1219, + "step": 53230 + }, + { + "epoch": 2.48, + "learning_rate": 1.1815716457918583e-05, + "loss": 0.0571, + "step": 53235 + }, + { + "epoch": 2.48, + "learning_rate": 1.1814932672863795e-05, + "loss": 0.0712, + "step": 53240 + }, + { + "epoch": 2.48, + "learning_rate": 1.1814148887809007e-05, + "loss": 0.1081, + "step": 53245 + }, + { + "epoch": 2.48, + "learning_rate": 1.1813365102754221e-05, + "loss": 0.1176, + "step": 53250 + }, + { + "epoch": 2.48, + "learning_rate": 1.1812581317699435e-05, + "loss": 0.1212, + "step": 53255 + }, + { + "epoch": 2.49, + "learning_rate": 1.1811797532644649e-05, + "loss": 0.3415, + "step": 53260 + }, + { + "epoch": 2.49, + "learning_rate": 1.1811013747589861e-05, + "loss": 0.2234, + "step": 53265 + }, + { + "epoch": 2.49, + "learning_rate": 1.1810229962535075e-05, + "loss": 0.0399, + "step": 53270 + }, + { + "epoch": 2.49, + "learning_rate": 1.1809446177480289e-05, + "loss": 0.0437, + "step": 53275 + }, + { + "epoch": 2.49, + "learning_rate": 1.1808662392425503e-05, + "loss": 0.0732, + "step": 53280 + }, + { + "epoch": 2.49, + "learning_rate": 1.1807878607370715e-05, + "loss": 0.068, + "step": 53285 + }, + { + "epoch": 2.49, + "learning_rate": 1.180709482231593e-05, + "loss": 0.1564, + "step": 53290 + }, + { + "epoch": 2.49, + "learning_rate": 1.1806311037261143e-05, + "loss": 0.1242, + "step": 53295 + }, + { + "epoch": 2.49, + "learning_rate": 1.1805527252206357e-05, + "loss": 0.0852, + "step": 53300 + }, + { + "epoch": 2.49, + "learning_rate": 1.1804743467151569e-05, + "loss": 0.1913, + "step": 53305 + }, + { + "epoch": 2.49, + "learning_rate": 1.1803959682096781e-05, + "loss": 0.2867, + "step": 53310 + }, + { + "epoch": 2.49, + "learning_rate": 1.1803175897041997e-05, + "loss": 0.2384, + "step": 53315 + }, + { + "epoch": 2.49, + "learning_rate": 1.1802392111987209e-05, + "loss": 0.0679, + "step": 53320 + }, + { + "epoch": 2.49, + "learning_rate": 1.1801608326932423e-05, + "loss": 0.015, + "step": 53325 + }, + { + "epoch": 2.49, + "learning_rate": 1.1800824541877635e-05, + "loss": 0.0466, + "step": 53330 + }, + { + "epoch": 2.49, + "learning_rate": 1.180004075682285e-05, + "loss": 0.0421, + "step": 53335 + }, + { + "epoch": 2.49, + "learning_rate": 1.1799256971768063e-05, + "loss": 0.0758, + "step": 53340 + }, + { + "epoch": 2.49, + "learning_rate": 1.1798473186713277e-05, + "loss": 0.062, + "step": 53345 + }, + { + "epoch": 2.49, + "learning_rate": 1.1797689401658489e-05, + "loss": 0.1198, + "step": 53350 + }, + { + "epoch": 2.49, + "learning_rate": 1.1796905616603705e-05, + "loss": 0.2479, + "step": 53355 + }, + { + "epoch": 2.49, + "learning_rate": 1.1796121831548917e-05, + "loss": 0.2566, + "step": 53360 + }, + { + "epoch": 2.49, + "learning_rate": 1.179533804649413e-05, + "loss": 0.5469, + "step": 53365 + }, + { + "epoch": 2.49, + "learning_rate": 1.1794554261439343e-05, + "loss": 0.0849, + "step": 53370 + }, + { + "epoch": 2.49, + "learning_rate": 1.1793770476384557e-05, + "loss": 0.0693, + "step": 53375 + }, + { + "epoch": 2.49, + "learning_rate": 1.179298669132977e-05, + "loss": 0.0148, + "step": 53380 + }, + { + "epoch": 2.49, + "learning_rate": 1.1792202906274983e-05, + "loss": 0.0683, + "step": 53385 + }, + { + "epoch": 2.49, + "learning_rate": 1.1791419121220199e-05, + "loss": 0.0983, + "step": 53390 + }, + { + "epoch": 2.49, + "learning_rate": 1.179063533616541e-05, + "loss": 0.1328, + "step": 53395 + }, + { + "epoch": 2.49, + "learning_rate": 1.1789851551110625e-05, + "loss": 0.089, + "step": 53400 + }, + { + "epoch": 2.49, + "learning_rate": 1.1789067766055837e-05, + "loss": 0.2294, + "step": 53405 + }, + { + "epoch": 2.49, + "learning_rate": 1.1788283981001053e-05, + "loss": 0.216, + "step": 53410 + }, + { + "epoch": 2.49, + "learning_rate": 1.1787500195946265e-05, + "loss": 0.3105, + "step": 53415 + }, + { + "epoch": 2.49, + "learning_rate": 1.1786716410891479e-05, + "loss": 0.0386, + "step": 53420 + }, + { + "epoch": 2.49, + "learning_rate": 1.1785932625836691e-05, + "loss": 0.0221, + "step": 53425 + }, + { + "epoch": 2.49, + "learning_rate": 1.1785148840781906e-05, + "loss": 0.0662, + "step": 53430 + }, + { + "epoch": 2.49, + "learning_rate": 1.1784365055727119e-05, + "loss": 0.1186, + "step": 53435 + }, + { + "epoch": 2.49, + "learning_rate": 1.1783581270672331e-05, + "loss": 0.1842, + "step": 53440 + }, + { + "epoch": 2.49, + "learning_rate": 1.1782797485617545e-05, + "loss": 0.0644, + "step": 53445 + }, + { + "epoch": 2.49, + "learning_rate": 1.1782013700562757e-05, + "loss": 0.1558, + "step": 53450 + }, + { + "epoch": 2.49, + "learning_rate": 1.1781229915507973e-05, + "loss": 0.1776, + "step": 53455 + }, + { + "epoch": 2.49, + "learning_rate": 1.1780446130453185e-05, + "loss": 0.1429, + "step": 53460 + }, + { + "epoch": 2.49, + "learning_rate": 1.1779662345398399e-05, + "loss": 0.2067, + "step": 53465 + }, + { + "epoch": 2.49, + "learning_rate": 1.1778878560343611e-05, + "loss": 0.0238, + "step": 53470 + }, + { + "epoch": 2.5, + "learning_rate": 1.1778094775288827e-05, + "loss": 0.0882, + "step": 53475 + }, + { + "epoch": 2.5, + "learning_rate": 1.1777310990234039e-05, + "loss": 0.0437, + "step": 53480 + }, + { + "epoch": 2.5, + "learning_rate": 1.1776527205179253e-05, + "loss": 0.1085, + "step": 53485 + }, + { + "epoch": 2.5, + "learning_rate": 1.1775743420124467e-05, + "loss": 0.082, + "step": 53490 + }, + { + "epoch": 2.5, + "learning_rate": 1.177495963506968e-05, + "loss": 0.0515, + "step": 53495 + }, + { + "epoch": 2.5, + "learning_rate": 1.1774175850014893e-05, + "loss": 0.1574, + "step": 53500 + }, + { + "epoch": 2.5, + "learning_rate": 1.1773392064960105e-05, + "loss": 0.176, + "step": 53505 + }, + { + "epoch": 2.5, + "learning_rate": 1.177260827990532e-05, + "loss": 0.3283, + "step": 53510 + }, + { + "epoch": 2.5, + "learning_rate": 1.1771824494850533e-05, + "loss": 0.2003, + "step": 53515 + }, + { + "epoch": 2.5, + "learning_rate": 1.1771040709795747e-05, + "loss": 0.0773, + "step": 53520 + }, + { + "epoch": 2.5, + "learning_rate": 1.1770256924740959e-05, + "loss": 0.0402, + "step": 53525 + }, + { + "epoch": 2.5, + "learning_rate": 1.1769473139686174e-05, + "loss": 0.0686, + "step": 53530 + }, + { + "epoch": 2.5, + "learning_rate": 1.1768689354631387e-05, + "loss": 0.1132, + "step": 53535 + }, + { + "epoch": 2.5, + "learning_rate": 1.17679055695766e-05, + "loss": 0.109, + "step": 53540 + }, + { + "epoch": 2.5, + "learning_rate": 1.1767121784521813e-05, + "loss": 0.1463, + "step": 53545 + }, + { + "epoch": 2.5, + "learning_rate": 1.1766337999467028e-05, + "loss": 0.1783, + "step": 53550 + }, + { + "epoch": 2.5, + "learning_rate": 1.176555421441224e-05, + "loss": 0.1049, + "step": 53555 + }, + { + "epoch": 2.5, + "learning_rate": 1.1764770429357454e-05, + "loss": 0.4038, + "step": 53560 + }, + { + "epoch": 2.5, + "learning_rate": 1.1763986644302667e-05, + "loss": 0.2925, + "step": 53565 + }, + { + "epoch": 2.5, + "learning_rate": 1.176320285924788e-05, + "loss": 0.0268, + "step": 53570 + }, + { + "epoch": 2.5, + "learning_rate": 1.1762419074193094e-05, + "loss": 0.0987, + "step": 53575 + }, + { + "epoch": 2.5, + "learning_rate": 1.1761635289138307e-05, + "loss": 0.0748, + "step": 53580 + }, + { + "epoch": 2.5, + "learning_rate": 1.176085150408352e-05, + "loss": 0.0521, + "step": 53585 + }, + { + "epoch": 2.5, + "learning_rate": 1.1760067719028734e-05, + "loss": 0.1107, + "step": 53590 + }, + { + "epoch": 2.5, + "learning_rate": 1.1759283933973948e-05, + "loss": 0.1081, + "step": 53595 + }, + { + "epoch": 2.5, + "learning_rate": 1.175850014891916e-05, + "loss": 0.0851, + "step": 53600 + }, + { + "epoch": 2.5, + "learning_rate": 1.1757716363864376e-05, + "loss": 0.1295, + "step": 53605 + }, + { + "epoch": 2.5, + "learning_rate": 1.1756932578809588e-05, + "loss": 0.3218, + "step": 53610 + }, + { + "epoch": 2.5, + "learning_rate": 1.1756148793754802e-05, + "loss": 0.3039, + "step": 53615 + }, + { + "epoch": 2.5, + "learning_rate": 1.1755365008700015e-05, + "loss": 0.0713, + "step": 53620 + }, + { + "epoch": 2.5, + "learning_rate": 1.175458122364523e-05, + "loss": 0.0448, + "step": 53625 + }, + { + "epoch": 2.5, + "learning_rate": 1.1753797438590442e-05, + "loss": 0.0533, + "step": 53630 + }, + { + "epoch": 2.5, + "learning_rate": 1.1753013653535655e-05, + "loss": 0.0662, + "step": 53635 + }, + { + "epoch": 2.5, + "learning_rate": 1.1752229868480868e-05, + "loss": 0.0825, + "step": 53640 + }, + { + "epoch": 2.5, + "learning_rate": 1.175144608342608e-05, + "loss": 0.1356, + "step": 53645 + }, + { + "epoch": 2.5, + "learning_rate": 1.1750662298371296e-05, + "loss": 0.119, + "step": 53650 + }, + { + "epoch": 2.5, + "learning_rate": 1.1749878513316508e-05, + "loss": 0.177, + "step": 53655 + }, + { + "epoch": 2.5, + "learning_rate": 1.1749094728261722e-05, + "loss": 0.2674, + "step": 53660 + }, + { + "epoch": 2.5, + "learning_rate": 1.1748310943206935e-05, + "loss": 0.2447, + "step": 53665 + }, + { + "epoch": 2.5, + "learning_rate": 1.174752715815215e-05, + "loss": 0.0238, + "step": 53670 + }, + { + "epoch": 2.5, + "learning_rate": 1.1746743373097362e-05, + "loss": 0.0325, + "step": 53675 + }, + { + "epoch": 2.5, + "learning_rate": 1.1745959588042576e-05, + "loss": 0.0444, + "step": 53680 + }, + { + "epoch": 2.51, + "learning_rate": 1.1745175802987789e-05, + "loss": 0.0893, + "step": 53685 + }, + { + "epoch": 2.51, + "learning_rate": 1.1744392017933004e-05, + "loss": 0.0845, + "step": 53690 + }, + { + "epoch": 2.51, + "learning_rate": 1.1743608232878216e-05, + "loss": 0.0906, + "step": 53695 + }, + { + "epoch": 2.51, + "learning_rate": 1.1742824447823429e-05, + "loss": 0.1417, + "step": 53700 + }, + { + "epoch": 2.51, + "learning_rate": 1.1742040662768644e-05, + "loss": 0.1703, + "step": 53705 + }, + { + "epoch": 2.51, + "learning_rate": 1.1741256877713856e-05, + "loss": 0.2382, + "step": 53710 + }, + { + "epoch": 2.51, + "learning_rate": 1.174047309265907e-05, + "loss": 0.3102, + "step": 53715 + }, + { + "epoch": 2.51, + "learning_rate": 1.1739689307604282e-05, + "loss": 0.0984, + "step": 53720 + }, + { + "epoch": 2.51, + "learning_rate": 1.1738905522549498e-05, + "loss": 0.0297, + "step": 53725 + }, + { + "epoch": 2.51, + "learning_rate": 1.173812173749471e-05, + "loss": 0.0989, + "step": 53730 + }, + { + "epoch": 2.51, + "learning_rate": 1.1737337952439924e-05, + "loss": 0.076, + "step": 53735 + }, + { + "epoch": 2.51, + "learning_rate": 1.1736554167385136e-05, + "loss": 0.1314, + "step": 53740 + }, + { + "epoch": 2.51, + "learning_rate": 1.1735770382330352e-05, + "loss": 0.1822, + "step": 53745 + }, + { + "epoch": 2.51, + "learning_rate": 1.1734986597275564e-05, + "loss": 0.2006, + "step": 53750 + }, + { + "epoch": 2.51, + "learning_rate": 1.1734202812220778e-05, + "loss": 0.1828, + "step": 53755 + }, + { + "epoch": 2.51, + "learning_rate": 1.173341902716599e-05, + "loss": 0.3078, + "step": 53760 + }, + { + "epoch": 2.51, + "learning_rate": 1.1732635242111203e-05, + "loss": 0.3112, + "step": 53765 + }, + { + "epoch": 2.51, + "learning_rate": 1.1731851457056418e-05, + "loss": 0.0143, + "step": 53770 + }, + { + "epoch": 2.51, + "learning_rate": 1.173106767200163e-05, + "loss": 0.0488, + "step": 53775 + }, + { + "epoch": 2.51, + "learning_rate": 1.1730283886946844e-05, + "loss": 0.0628, + "step": 53780 + }, + { + "epoch": 2.51, + "learning_rate": 1.1729500101892058e-05, + "loss": 0.162, + "step": 53785 + }, + { + "epoch": 2.51, + "learning_rate": 1.1728716316837272e-05, + "loss": 0.1037, + "step": 53790 + }, + { + "epoch": 2.51, + "learning_rate": 1.1727932531782484e-05, + "loss": 0.0923, + "step": 53795 + }, + { + "epoch": 2.51, + "learning_rate": 1.1727148746727698e-05, + "loss": 0.0755, + "step": 53800 + }, + { + "epoch": 2.51, + "learning_rate": 1.1726364961672912e-05, + "loss": 0.1833, + "step": 53805 + }, + { + "epoch": 2.51, + "learning_rate": 1.1725581176618126e-05, + "loss": 0.1893, + "step": 53810 + }, + { + "epoch": 2.51, + "learning_rate": 1.1724797391563338e-05, + "loss": 0.3448, + "step": 53815 + }, + { + "epoch": 2.51, + "learning_rate": 1.1724013606508554e-05, + "loss": 0.0322, + "step": 53820 + }, + { + "epoch": 2.51, + "learning_rate": 1.1723229821453766e-05, + "loss": 0.0322, + "step": 53825 + }, + { + "epoch": 2.51, + "learning_rate": 1.1722446036398978e-05, + "loss": 0.0736, + "step": 53830 + }, + { + "epoch": 2.51, + "learning_rate": 1.1721662251344192e-05, + "loss": 0.0983, + "step": 53835 + }, + { + "epoch": 2.51, + "learning_rate": 1.1720878466289404e-05, + "loss": 0.0753, + "step": 53840 + }, + { + "epoch": 2.51, + "learning_rate": 1.172009468123462e-05, + "loss": 0.0771, + "step": 53845 + }, + { + "epoch": 2.51, + "learning_rate": 1.1719310896179832e-05, + "loss": 0.1376, + "step": 53850 + }, + { + "epoch": 2.51, + "learning_rate": 1.1718527111125046e-05, + "loss": 0.1836, + "step": 53855 + }, + { + "epoch": 2.51, + "learning_rate": 1.1717743326070258e-05, + "loss": 0.2587, + "step": 53860 + }, + { + "epoch": 2.51, + "learning_rate": 1.1716959541015474e-05, + "loss": 0.2983, + "step": 53865 + }, + { + "epoch": 2.51, + "learning_rate": 1.1716175755960686e-05, + "loss": 0.0732, + "step": 53870 + }, + { + "epoch": 2.51, + "learning_rate": 1.17153919709059e-05, + "loss": 0.0571, + "step": 53875 + }, + { + "epoch": 2.51, + "learning_rate": 1.1714608185851112e-05, + "loss": 0.0792, + "step": 53880 + }, + { + "epoch": 2.51, + "learning_rate": 1.1713824400796328e-05, + "loss": 0.0731, + "step": 53885 + }, + { + "epoch": 2.51, + "learning_rate": 1.171304061574154e-05, + "loss": 0.0635, + "step": 53890 + }, + { + "epoch": 2.51, + "learning_rate": 1.1712256830686752e-05, + "loss": 0.1637, + "step": 53895 + }, + { + "epoch": 2.52, + "learning_rate": 1.1711473045631966e-05, + "loss": 0.2193, + "step": 53900 + }, + { + "epoch": 2.52, + "learning_rate": 1.171068926057718e-05, + "loss": 0.2715, + "step": 53905 + }, + { + "epoch": 2.52, + "learning_rate": 1.1709905475522394e-05, + "loss": 0.3451, + "step": 53910 + }, + { + "epoch": 2.52, + "learning_rate": 1.1709121690467606e-05, + "loss": 0.2351, + "step": 53915 + }, + { + "epoch": 2.52, + "learning_rate": 1.1708337905412822e-05, + "loss": 0.0587, + "step": 53920 + }, + { + "epoch": 2.52, + "learning_rate": 1.1707554120358034e-05, + "loss": 0.0571, + "step": 53925 + }, + { + "epoch": 2.52, + "learning_rate": 1.1706770335303248e-05, + "loss": 0.0329, + "step": 53930 + }, + { + "epoch": 2.52, + "learning_rate": 1.170598655024846e-05, + "loss": 0.0982, + "step": 53935 + }, + { + "epoch": 2.52, + "learning_rate": 1.1705202765193676e-05, + "loss": 0.056, + "step": 53940 + }, + { + "epoch": 2.52, + "learning_rate": 1.1704418980138888e-05, + "loss": 0.1438, + "step": 53945 + }, + { + "epoch": 2.52, + "learning_rate": 1.1703635195084102e-05, + "loss": 0.1759, + "step": 53950 + }, + { + "epoch": 2.52, + "learning_rate": 1.1702851410029314e-05, + "loss": 0.2695, + "step": 53955 + }, + { + "epoch": 2.52, + "learning_rate": 1.1702067624974526e-05, + "loss": 0.2936, + "step": 53960 + }, + { + "epoch": 2.52, + "learning_rate": 1.1701283839919742e-05, + "loss": 0.2662, + "step": 53965 + }, + { + "epoch": 2.52, + "learning_rate": 1.1700500054864954e-05, + "loss": 0.0413, + "step": 53970 + }, + { + "epoch": 2.52, + "learning_rate": 1.1699716269810168e-05, + "loss": 0.0348, + "step": 53975 + }, + { + "epoch": 2.52, + "learning_rate": 1.169893248475538e-05, + "loss": 0.0487, + "step": 53980 + }, + { + "epoch": 2.52, + "learning_rate": 1.1698148699700596e-05, + "loss": 0.0561, + "step": 53985 + }, + { + "epoch": 2.52, + "learning_rate": 1.1697364914645808e-05, + "loss": 0.1206, + "step": 53990 + }, + { + "epoch": 2.52, + "learning_rate": 1.1696581129591022e-05, + "loss": 0.1703, + "step": 53995 + }, + { + "epoch": 2.52, + "learning_rate": 1.1695797344536234e-05, + "loss": 0.1549, + "step": 54000 + }, + { + "epoch": 2.52, + "learning_rate": 1.169501355948145e-05, + "loss": 0.209, + "step": 54005 + }, + { + "epoch": 2.52, + "learning_rate": 1.1694229774426662e-05, + "loss": 0.4151, + "step": 54010 + }, + { + "epoch": 2.52, + "learning_rate": 1.1693445989371876e-05, + "loss": 0.1933, + "step": 54015 + }, + { + "epoch": 2.52, + "learning_rate": 1.169266220431709e-05, + "loss": 0.0544, + "step": 54020 + }, + { + "epoch": 2.52, + "learning_rate": 1.1691878419262302e-05, + "loss": 0.0166, + "step": 54025 + }, + { + "epoch": 2.52, + "learning_rate": 1.1691094634207516e-05, + "loss": 0.0557, + "step": 54030 + }, + { + "epoch": 2.52, + "learning_rate": 1.1690310849152728e-05, + "loss": 0.0669, + "step": 54035 + }, + { + "epoch": 2.52, + "learning_rate": 1.1689527064097944e-05, + "loss": 0.0985, + "step": 54040 + }, + { + "epoch": 2.52, + "learning_rate": 1.1688743279043156e-05, + "loss": 0.1685, + "step": 54045 + }, + { + "epoch": 2.52, + "learning_rate": 1.168795949398837e-05, + "loss": 0.1963, + "step": 54050 + }, + { + "epoch": 2.52, + "learning_rate": 1.1687175708933582e-05, + "loss": 0.192, + "step": 54055 + }, + { + "epoch": 2.52, + "learning_rate": 1.1686391923878798e-05, + "loss": 0.3119, + "step": 54060 + }, + { + "epoch": 2.52, + "learning_rate": 1.168560813882401e-05, + "loss": 0.3424, + "step": 54065 + }, + { + "epoch": 2.52, + "learning_rate": 1.1684824353769224e-05, + "loss": 0.0345, + "step": 54070 + }, + { + "epoch": 2.52, + "learning_rate": 1.1684040568714436e-05, + "loss": 0.0528, + "step": 54075 + }, + { + "epoch": 2.52, + "learning_rate": 1.1683256783659652e-05, + "loss": 0.053, + "step": 54080 + }, + { + "epoch": 2.52, + "learning_rate": 1.1682472998604864e-05, + "loss": 0.0567, + "step": 54085 + }, + { + "epoch": 2.52, + "learning_rate": 1.1681689213550076e-05, + "loss": 0.1253, + "step": 54090 + }, + { + "epoch": 2.52, + "learning_rate": 1.168090542849529e-05, + "loss": 0.1384, + "step": 54095 + }, + { + "epoch": 2.52, + "learning_rate": 1.1680121643440504e-05, + "loss": 0.1149, + "step": 54100 + }, + { + "epoch": 2.52, + "learning_rate": 1.1679337858385718e-05, + "loss": 0.1669, + "step": 54105 + }, + { + "epoch": 2.52, + "learning_rate": 1.1678710830341888e-05, + "loss": 0.1833, + "step": 54110 + }, + { + "epoch": 2.53, + "learning_rate": 1.16779270452871e-05, + "loss": 0.2386, + "step": 54115 + }, + { + "epoch": 2.53, + "learning_rate": 1.1677143260232316e-05, + "loss": 0.0729, + "step": 54120 + }, + { + "epoch": 2.53, + "learning_rate": 1.1676359475177528e-05, + "loss": 0.0523, + "step": 54125 + }, + { + "epoch": 2.53, + "learning_rate": 1.1675575690122742e-05, + "loss": 0.0632, + "step": 54130 + }, + { + "epoch": 2.53, + "learning_rate": 1.1674791905067954e-05, + "loss": 0.0702, + "step": 54135 + }, + { + "epoch": 2.53, + "learning_rate": 1.167400812001317e-05, + "loss": 0.0453, + "step": 54140 + }, + { + "epoch": 2.53, + "learning_rate": 1.1673224334958382e-05, + "loss": 0.1348, + "step": 54145 + }, + { + "epoch": 2.53, + "learning_rate": 1.1672440549903596e-05, + "loss": 0.156, + "step": 54150 + }, + { + "epoch": 2.53, + "learning_rate": 1.1671656764848808e-05, + "loss": 0.1055, + "step": 54155 + }, + { + "epoch": 2.53, + "learning_rate": 1.167087297979402e-05, + "loss": 0.1431, + "step": 54160 + }, + { + "epoch": 2.53, + "learning_rate": 1.1670089194739236e-05, + "loss": 0.3631, + "step": 54165 + }, + { + "epoch": 2.53, + "learning_rate": 1.1669305409684448e-05, + "loss": 0.0497, + "step": 54170 + }, + { + "epoch": 2.53, + "learning_rate": 1.1668521624629662e-05, + "loss": 0.0897, + "step": 54175 + }, + { + "epoch": 2.53, + "learning_rate": 1.1667737839574876e-05, + "loss": 0.0819, + "step": 54180 + }, + { + "epoch": 2.53, + "learning_rate": 1.166695405452009e-05, + "loss": 0.0764, + "step": 54185 + }, + { + "epoch": 2.53, + "learning_rate": 1.1666170269465302e-05, + "loss": 0.0894, + "step": 54190 + }, + { + "epoch": 2.53, + "learning_rate": 1.1665386484410518e-05, + "loss": 0.0866, + "step": 54195 + }, + { + "epoch": 2.53, + "learning_rate": 1.166460269935573e-05, + "loss": 0.1523, + "step": 54200 + }, + { + "epoch": 2.53, + "learning_rate": 1.1663818914300944e-05, + "loss": 0.1929, + "step": 54205 + }, + { + "epoch": 2.53, + "learning_rate": 1.1663035129246156e-05, + "loss": 0.2861, + "step": 54210 + }, + { + "epoch": 2.53, + "learning_rate": 1.1662251344191372e-05, + "loss": 0.2864, + "step": 54215 + }, + { + "epoch": 2.53, + "learning_rate": 1.1661467559136584e-05, + "loss": 0.0513, + "step": 54220 + }, + { + "epoch": 2.53, + "learning_rate": 1.1660683774081796e-05, + "loss": 0.0342, + "step": 54225 + }, + { + "epoch": 2.53, + "learning_rate": 1.165989998902701e-05, + "loss": 0.078, + "step": 54230 + }, + { + "epoch": 2.53, + "learning_rate": 1.1659116203972222e-05, + "loss": 0.0786, + "step": 54235 + }, + { + "epoch": 2.53, + "learning_rate": 1.1658332418917438e-05, + "loss": 0.1127, + "step": 54240 + }, + { + "epoch": 2.53, + "learning_rate": 1.165754863386265e-05, + "loss": 0.1296, + "step": 54245 + }, + { + "epoch": 2.53, + "learning_rate": 1.1656764848807864e-05, + "loss": 0.134, + "step": 54250 + }, + { + "epoch": 2.53, + "learning_rate": 1.1655981063753076e-05, + "loss": 0.2205, + "step": 54255 + }, + { + "epoch": 2.53, + "learning_rate": 1.1655197278698292e-05, + "loss": 0.1846, + "step": 54260 + }, + { + "epoch": 2.53, + "learning_rate": 1.1654413493643504e-05, + "loss": 0.3259, + "step": 54265 + }, + { + "epoch": 2.53, + "learning_rate": 1.1653629708588718e-05, + "loss": 0.0222, + "step": 54270 + }, + { + "epoch": 2.53, + "learning_rate": 1.165284592353393e-05, + "loss": 0.0677, + "step": 54275 + }, + { + "epoch": 2.53, + "learning_rate": 1.1652062138479146e-05, + "loss": 0.0542, + "step": 54280 + }, + { + "epoch": 2.53, + "learning_rate": 1.1651278353424358e-05, + "loss": 0.0699, + "step": 54285 + }, + { + "epoch": 2.53, + "learning_rate": 1.165049456836957e-05, + "loss": 0.0722, + "step": 54290 + }, + { + "epoch": 2.53, + "learning_rate": 1.1649710783314786e-05, + "loss": 0.1666, + "step": 54295 + }, + { + "epoch": 2.53, + "learning_rate": 1.1648926998259998e-05, + "loss": 0.2113, + "step": 54300 + }, + { + "epoch": 2.53, + "learning_rate": 1.1648143213205212e-05, + "loss": 0.1151, + "step": 54305 + }, + { + "epoch": 2.53, + "learning_rate": 1.1647359428150424e-05, + "loss": 0.2646, + "step": 54310 + }, + { + "epoch": 2.53, + "learning_rate": 1.164657564309564e-05, + "loss": 0.2441, + "step": 54315 + }, + { + "epoch": 2.53, + "learning_rate": 1.1645791858040852e-05, + "loss": 0.0764, + "step": 54320 + }, + { + "epoch": 2.53, + "learning_rate": 1.1645008072986066e-05, + "loss": 0.0424, + "step": 54325 + }, + { + "epoch": 2.54, + "learning_rate": 1.1644224287931278e-05, + "loss": 0.0538, + "step": 54330 + }, + { + "epoch": 2.54, + "learning_rate": 1.1643440502876494e-05, + "loss": 0.0319, + "step": 54335 + }, + { + "epoch": 2.54, + "learning_rate": 1.1642656717821706e-05, + "loss": 0.1016, + "step": 54340 + }, + { + "epoch": 2.54, + "learning_rate": 1.164187293276692e-05, + "loss": 0.0941, + "step": 54345 + }, + { + "epoch": 2.54, + "learning_rate": 1.1641089147712132e-05, + "loss": 0.0697, + "step": 54350 + }, + { + "epoch": 2.54, + "learning_rate": 1.1640305362657344e-05, + "loss": 0.1258, + "step": 54355 + }, + { + "epoch": 2.54, + "learning_rate": 1.163952157760256e-05, + "loss": 0.2775, + "step": 54360 + }, + { + "epoch": 2.54, + "learning_rate": 1.1638737792547772e-05, + "loss": 0.3991, + "step": 54365 + }, + { + "epoch": 2.54, + "learning_rate": 1.1637954007492986e-05, + "loss": 0.098, + "step": 54370 + }, + { + "epoch": 2.54, + "learning_rate": 1.1637170222438198e-05, + "loss": 0.0404, + "step": 54375 + }, + { + "epoch": 2.54, + "learning_rate": 1.1636386437383414e-05, + "loss": 0.0601, + "step": 54380 + }, + { + "epoch": 2.54, + "learning_rate": 1.1635602652328626e-05, + "loss": 0.0452, + "step": 54385 + }, + { + "epoch": 2.54, + "learning_rate": 1.163481886727384e-05, + "loss": 0.1061, + "step": 54390 + }, + { + "epoch": 2.54, + "learning_rate": 1.1634035082219054e-05, + "loss": 0.0819, + "step": 54395 + }, + { + "epoch": 2.54, + "learning_rate": 1.1633251297164268e-05, + "loss": 0.1277, + "step": 54400 + }, + { + "epoch": 2.54, + "learning_rate": 1.163246751210948e-05, + "loss": 0.2061, + "step": 54405 + }, + { + "epoch": 2.54, + "learning_rate": 1.1631683727054695e-05, + "loss": 0.3015, + "step": 54410 + }, + { + "epoch": 2.54, + "learning_rate": 1.1630899941999908e-05, + "loss": 0.4022, + "step": 54415 + }, + { + "epoch": 2.54, + "learning_rate": 1.163011615694512e-05, + "loss": 0.0536, + "step": 54420 + }, + { + "epoch": 2.54, + "learning_rate": 1.1629332371890334e-05, + "loss": 0.0426, + "step": 54425 + }, + { + "epoch": 2.54, + "learning_rate": 1.1628548586835546e-05, + "loss": 0.046, + "step": 54430 + }, + { + "epoch": 2.54, + "learning_rate": 1.1627764801780761e-05, + "loss": 0.0336, + "step": 54435 + }, + { + "epoch": 2.54, + "learning_rate": 1.1626981016725974e-05, + "loss": 0.0846, + "step": 54440 + }, + { + "epoch": 2.54, + "learning_rate": 1.1626197231671188e-05, + "loss": 0.0895, + "step": 54445 + }, + { + "epoch": 2.54, + "learning_rate": 1.16254134466164e-05, + "loss": 0.0969, + "step": 54450 + }, + { + "epoch": 2.54, + "learning_rate": 1.1624629661561615e-05, + "loss": 0.1187, + "step": 54455 + }, + { + "epoch": 2.54, + "learning_rate": 1.1623845876506828e-05, + "loss": 0.2181, + "step": 54460 + }, + { + "epoch": 2.54, + "learning_rate": 1.1623062091452042e-05, + "loss": 0.2948, + "step": 54465 + }, + { + "epoch": 2.54, + "learning_rate": 1.1622278306397254e-05, + "loss": 0.0419, + "step": 54470 + }, + { + "epoch": 2.54, + "learning_rate": 1.162149452134247e-05, + "loss": 0.0756, + "step": 54475 + }, + { + "epoch": 2.54, + "learning_rate": 1.1620710736287682e-05, + "loss": 0.0746, + "step": 54480 + }, + { + "epoch": 2.54, + "learning_rate": 1.1619926951232894e-05, + "loss": 0.0578, + "step": 54485 + }, + { + "epoch": 2.54, + "learning_rate": 1.1619143166178108e-05, + "loss": 0.047, + "step": 54490 + }, + { + "epoch": 2.54, + "learning_rate": 1.1618359381123322e-05, + "loss": 0.1197, + "step": 54495 + }, + { + "epoch": 2.54, + "learning_rate": 1.1617575596068535e-05, + "loss": 0.0707, + "step": 54500 + }, + { + "epoch": 2.54, + "learning_rate": 1.1616791811013748e-05, + "loss": 0.1846, + "step": 54505 + }, + { + "epoch": 2.54, + "learning_rate": 1.1616008025958963e-05, + "loss": 0.3005, + "step": 54510 + }, + { + "epoch": 2.54, + "learning_rate": 1.1615224240904176e-05, + "loss": 0.3327, + "step": 54515 + }, + { + "epoch": 2.54, + "learning_rate": 1.161444045584939e-05, + "loss": 0.0754, + "step": 54520 + }, + { + "epoch": 2.54, + "learning_rate": 1.1613656670794602e-05, + "loss": 0.0423, + "step": 54525 + }, + { + "epoch": 2.54, + "learning_rate": 1.1612872885739817e-05, + "loss": 0.0361, + "step": 54530 + }, + { + "epoch": 2.54, + "learning_rate": 1.161208910068503e-05, + "loss": 0.0925, + "step": 54535 + }, + { + "epoch": 2.54, + "learning_rate": 1.1611305315630243e-05, + "loss": 0.0593, + "step": 54540 + }, + { + "epoch": 2.55, + "learning_rate": 1.1610521530575456e-05, + "loss": 0.1092, + "step": 54545 + }, + { + "epoch": 2.55, + "learning_rate": 1.1609737745520668e-05, + "loss": 0.1291, + "step": 54550 + }, + { + "epoch": 2.55, + "learning_rate": 1.1608953960465883e-05, + "loss": 0.2171, + "step": 54555 + }, + { + "epoch": 2.55, + "learning_rate": 1.1608170175411096e-05, + "loss": 0.1797, + "step": 54560 + }, + { + "epoch": 2.55, + "learning_rate": 1.160738639035631e-05, + "loss": 0.3179, + "step": 54565 + }, + { + "epoch": 2.55, + "learning_rate": 1.1606602605301522e-05, + "loss": 0.0331, + "step": 54570 + }, + { + "epoch": 2.55, + "learning_rate": 1.1605818820246737e-05, + "loss": 0.0527, + "step": 54575 + }, + { + "epoch": 2.55, + "learning_rate": 1.160503503519195e-05, + "loss": 0.0389, + "step": 54580 + }, + { + "epoch": 2.55, + "learning_rate": 1.1604251250137163e-05, + "loss": 0.0399, + "step": 54585 + }, + { + "epoch": 2.55, + "learning_rate": 1.1603467465082376e-05, + "loss": 0.0983, + "step": 54590 + }, + { + "epoch": 2.55, + "learning_rate": 1.1602683680027591e-05, + "loss": 0.0581, + "step": 54595 + }, + { + "epoch": 2.55, + "learning_rate": 1.1601899894972803e-05, + "loss": 0.1263, + "step": 54600 + }, + { + "epoch": 2.55, + "learning_rate": 1.1601116109918017e-05, + "loss": 0.1091, + "step": 54605 + }, + { + "epoch": 2.55, + "learning_rate": 1.1600332324863231e-05, + "loss": 0.3447, + "step": 54610 + }, + { + "epoch": 2.55, + "learning_rate": 1.1599548539808443e-05, + "loss": 0.2702, + "step": 54615 + }, + { + "epoch": 2.55, + "learning_rate": 1.1598764754753657e-05, + "loss": 0.1182, + "step": 54620 + }, + { + "epoch": 2.55, + "learning_rate": 1.159798096969887e-05, + "loss": 0.0375, + "step": 54625 + }, + { + "epoch": 2.55, + "learning_rate": 1.1597197184644085e-05, + "loss": 0.0246, + "step": 54630 + }, + { + "epoch": 2.55, + "learning_rate": 1.1596413399589297e-05, + "loss": 0.0543, + "step": 54635 + }, + { + "epoch": 2.55, + "learning_rate": 1.1595629614534511e-05, + "loss": 0.0953, + "step": 54640 + }, + { + "epoch": 2.55, + "learning_rate": 1.1594845829479724e-05, + "loss": 0.206, + "step": 54645 + }, + { + "epoch": 2.55, + "learning_rate": 1.1594062044424939e-05, + "loss": 0.1144, + "step": 54650 + }, + { + "epoch": 2.55, + "learning_rate": 1.1593278259370151e-05, + "loss": 0.1205, + "step": 54655 + }, + { + "epoch": 2.55, + "learning_rate": 1.1592494474315365e-05, + "loss": 0.2163, + "step": 54660 + }, + { + "epoch": 2.55, + "learning_rate": 1.1591710689260577e-05, + "loss": 0.34, + "step": 54665 + }, + { + "epoch": 2.55, + "learning_rate": 1.1590926904205793e-05, + "loss": 0.0405, + "step": 54670 + }, + { + "epoch": 2.55, + "learning_rate": 1.1590143119151005e-05, + "loss": 0.0285, + "step": 54675 + }, + { + "epoch": 2.55, + "learning_rate": 1.1589359334096217e-05, + "loss": 0.0436, + "step": 54680 + }, + { + "epoch": 2.55, + "learning_rate": 1.1588575549041431e-05, + "loss": 0.036, + "step": 54685 + }, + { + "epoch": 2.55, + "learning_rate": 1.1587791763986644e-05, + "loss": 0.0615, + "step": 54690 + }, + { + "epoch": 2.55, + "learning_rate": 1.158700797893186e-05, + "loss": 0.073, + "step": 54695 + }, + { + "epoch": 2.55, + "learning_rate": 1.1586224193877071e-05, + "loss": 0.0988, + "step": 54700 + }, + { + "epoch": 2.55, + "learning_rate": 1.1585440408822285e-05, + "loss": 0.1362, + "step": 54705 + }, + { + "epoch": 2.55, + "learning_rate": 1.15846566237675e-05, + "loss": 0.2456, + "step": 54710 + }, + { + "epoch": 2.55, + "learning_rate": 1.1583872838712713e-05, + "loss": 0.2314, + "step": 54715 + }, + { + "epoch": 2.55, + "learning_rate": 1.1583089053657925e-05, + "loss": 0.1112, + "step": 54720 + }, + { + "epoch": 2.55, + "learning_rate": 1.1582305268603141e-05, + "loss": 0.125, + "step": 54725 + }, + { + "epoch": 2.55, + "learning_rate": 1.1581521483548353e-05, + "loss": 0.0651, + "step": 54730 + }, + { + "epoch": 2.55, + "learning_rate": 1.1580737698493567e-05, + "loss": 0.1076, + "step": 54735 + }, + { + "epoch": 2.55, + "learning_rate": 1.157995391343878e-05, + "loss": 0.0838, + "step": 54740 + }, + { + "epoch": 2.55, + "learning_rate": 1.1579170128383991e-05, + "loss": 0.0998, + "step": 54745 + }, + { + "epoch": 2.55, + "learning_rate": 1.1578386343329207e-05, + "loss": 0.0908, + "step": 54750 + }, + { + "epoch": 2.55, + "learning_rate": 1.157760255827442e-05, + "loss": 0.1566, + "step": 54755 + }, + { + "epoch": 2.56, + "learning_rate": 1.1576818773219633e-05, + "loss": 0.325, + "step": 54760 + }, + { + "epoch": 2.56, + "learning_rate": 1.1576034988164845e-05, + "loss": 0.3982, + "step": 54765 + }, + { + "epoch": 2.56, + "learning_rate": 1.1575251203110061e-05, + "loss": 0.0304, + "step": 54770 + }, + { + "epoch": 2.56, + "learning_rate": 1.1574467418055273e-05, + "loss": 0.0603, + "step": 54775 + }, + { + "epoch": 2.56, + "learning_rate": 1.1573683633000487e-05, + "loss": 0.0578, + "step": 54780 + }, + { + "epoch": 2.56, + "learning_rate": 1.15728998479457e-05, + "loss": 0.0566, + "step": 54785 + }, + { + "epoch": 2.56, + "learning_rate": 1.1572116062890915e-05, + "loss": 0.0771, + "step": 54790 + }, + { + "epoch": 2.56, + "learning_rate": 1.1571332277836127e-05, + "loss": 0.1294, + "step": 54795 + }, + { + "epoch": 2.56, + "learning_rate": 1.1570548492781341e-05, + "loss": 0.1138, + "step": 54800 + }, + { + "epoch": 2.56, + "learning_rate": 1.1569764707726553e-05, + "loss": 0.1218, + "step": 54805 + }, + { + "epoch": 2.56, + "learning_rate": 1.1568980922671767e-05, + "loss": 0.256, + "step": 54810 + }, + { + "epoch": 2.56, + "learning_rate": 1.1568197137616981e-05, + "loss": 0.3657, + "step": 54815 + }, + { + "epoch": 2.56, + "learning_rate": 1.1567413352562193e-05, + "loss": 0.0775, + "step": 54820 + }, + { + "epoch": 2.56, + "learning_rate": 1.1566629567507409e-05, + "loss": 0.0329, + "step": 54825 + }, + { + "epoch": 2.56, + "learning_rate": 1.1565845782452621e-05, + "loss": 0.02, + "step": 54830 + }, + { + "epoch": 2.56, + "learning_rate": 1.1565061997397835e-05, + "loss": 0.0346, + "step": 54835 + }, + { + "epoch": 2.56, + "learning_rate": 1.1564278212343047e-05, + "loss": 0.0832, + "step": 54840 + }, + { + "epoch": 2.56, + "learning_rate": 1.1563494427288263e-05, + "loss": 0.0888, + "step": 54845 + }, + { + "epoch": 2.56, + "learning_rate": 1.1562710642233475e-05, + "loss": 0.1356, + "step": 54850 + }, + { + "epoch": 2.56, + "learning_rate": 1.1561926857178689e-05, + "loss": 0.1574, + "step": 54855 + }, + { + "epoch": 2.56, + "learning_rate": 1.1561143072123901e-05, + "loss": 0.2213, + "step": 54860 + }, + { + "epoch": 2.56, + "learning_rate": 1.1560359287069117e-05, + "loss": 0.2773, + "step": 54865 + }, + { + "epoch": 2.56, + "learning_rate": 1.1559575502014329e-05, + "loss": 0.073, + "step": 54870 + }, + { + "epoch": 2.56, + "learning_rate": 1.1558791716959541e-05, + "loss": 0.0188, + "step": 54875 + }, + { + "epoch": 2.56, + "learning_rate": 1.1558007931904755e-05, + "loss": 0.0895, + "step": 54880 + }, + { + "epoch": 2.56, + "learning_rate": 1.1557224146849967e-05, + "loss": 0.0839, + "step": 54885 + }, + { + "epoch": 2.56, + "learning_rate": 1.1556440361795183e-05, + "loss": 0.0635, + "step": 54890 + }, + { + "epoch": 2.56, + "learning_rate": 1.1555656576740395e-05, + "loss": 0.0701, + "step": 54895 + }, + { + "epoch": 2.56, + "learning_rate": 1.1554872791685609e-05, + "loss": 0.1132, + "step": 54900 + }, + { + "epoch": 2.56, + "learning_rate": 1.1554089006630821e-05, + "loss": 0.1373, + "step": 54905 + }, + { + "epoch": 2.56, + "learning_rate": 1.1553305221576037e-05, + "loss": 0.1604, + "step": 54910 + }, + { + "epoch": 2.56, + "learning_rate": 1.1552521436521249e-05, + "loss": 0.3499, + "step": 54915 + }, + { + "epoch": 2.56, + "learning_rate": 1.1551737651466463e-05, + "loss": 0.0524, + "step": 54920 + }, + { + "epoch": 2.56, + "learning_rate": 1.1550953866411677e-05, + "loss": 0.026, + "step": 54925 + }, + { + "epoch": 2.56, + "learning_rate": 1.155017008135689e-05, + "loss": 0.0591, + "step": 54930 + }, + { + "epoch": 2.56, + "learning_rate": 1.1549386296302103e-05, + "loss": 0.1202, + "step": 54935 + }, + { + "epoch": 2.56, + "learning_rate": 1.1548602511247315e-05, + "loss": 0.0974, + "step": 54940 + }, + { + "epoch": 2.56, + "learning_rate": 1.154781872619253e-05, + "loss": 0.1075, + "step": 54945 + }, + { + "epoch": 2.56, + "learning_rate": 1.1547034941137743e-05, + "loss": 0.588, + "step": 54950 + }, + { + "epoch": 2.56, + "learning_rate": 1.1546251156082957e-05, + "loss": 0.1737, + "step": 54955 + }, + { + "epoch": 2.56, + "learning_rate": 1.1545467371028169e-05, + "loss": 0.2966, + "step": 54960 + }, + { + "epoch": 2.56, + "learning_rate": 1.1544683585973385e-05, + "loss": 0.1925, + "step": 54965 + }, + { + "epoch": 2.56, + "learning_rate": 1.1543899800918597e-05, + "loss": 0.0461, + "step": 54970 + }, + { + "epoch": 2.57, + "learning_rate": 1.154311601586381e-05, + "loss": 0.0533, + "step": 54975 + }, + { + "epoch": 2.57, + "learning_rate": 1.1542332230809023e-05, + "loss": 0.0946, + "step": 54980 + }, + { + "epoch": 2.57, + "learning_rate": 1.1541548445754239e-05, + "loss": 0.0699, + "step": 54985 + }, + { + "epoch": 2.57, + "learning_rate": 1.154076466069945e-05, + "loss": 0.125, + "step": 54990 + }, + { + "epoch": 2.57, + "learning_rate": 1.1539980875644665e-05, + "loss": 0.0972, + "step": 54995 + }, + { + "epoch": 2.57, + "learning_rate": 1.1539197090589877e-05, + "loss": 0.2018, + "step": 55000 + }, + { + "epoch": 2.57, + "learning_rate": 1.153841330553509e-05, + "loss": 0.2578, + "step": 55005 + }, + { + "epoch": 2.57, + "learning_rate": 1.1537629520480305e-05, + "loss": 0.2174, + "step": 55010 + }, + { + "epoch": 2.57, + "learning_rate": 1.1536845735425517e-05, + "loss": 0.2747, + "step": 55015 + }, + { + "epoch": 2.57, + "learning_rate": 1.1536061950370731e-05, + "loss": 0.1615, + "step": 55020 + }, + { + "epoch": 2.57, + "learning_rate": 1.1535278165315945e-05, + "loss": 0.034, + "step": 55025 + }, + { + "epoch": 2.57, + "learning_rate": 1.1534494380261159e-05, + "loss": 0.101, + "step": 55030 + }, + { + "epoch": 2.57, + "learning_rate": 1.1533710595206371e-05, + "loss": 0.1131, + "step": 55035 + }, + { + "epoch": 2.57, + "learning_rate": 1.1532926810151586e-05, + "loss": 0.142, + "step": 55040 + }, + { + "epoch": 2.57, + "learning_rate": 1.1532143025096799e-05, + "loss": 0.0432, + "step": 55045 + }, + { + "epoch": 2.57, + "learning_rate": 1.1531359240042013e-05, + "loss": 0.1238, + "step": 55050 + }, + { + "epoch": 2.57, + "learning_rate": 1.1530575454987225e-05, + "loss": 0.1882, + "step": 55055 + }, + { + "epoch": 2.57, + "learning_rate": 1.152979166993244e-05, + "loss": 0.1863, + "step": 55060 + }, + { + "epoch": 2.57, + "learning_rate": 1.1529007884877653e-05, + "loss": 0.3653, + "step": 55065 + }, + { + "epoch": 2.57, + "learning_rate": 1.1528224099822865e-05, + "loss": 0.043, + "step": 55070 + }, + { + "epoch": 2.57, + "learning_rate": 1.1527440314768079e-05, + "loss": 0.0351, + "step": 55075 + }, + { + "epoch": 2.57, + "learning_rate": 1.1526656529713291e-05, + "loss": 0.0739, + "step": 55080 + }, + { + "epoch": 2.57, + "learning_rate": 1.1525872744658507e-05, + "loss": 0.0644, + "step": 55085 + }, + { + "epoch": 2.57, + "learning_rate": 1.1525088959603719e-05, + "loss": 0.1164, + "step": 55090 + }, + { + "epoch": 2.57, + "learning_rate": 1.1524305174548933e-05, + "loss": 0.1265, + "step": 55095 + }, + { + "epoch": 2.57, + "learning_rate": 1.1523521389494145e-05, + "loss": 0.0871, + "step": 55100 + }, + { + "epoch": 2.57, + "learning_rate": 1.152273760443936e-05, + "loss": 0.1264, + "step": 55105 + }, + { + "epoch": 2.57, + "learning_rate": 1.1521953819384573e-05, + "loss": 0.3587, + "step": 55110 + }, + { + "epoch": 2.57, + "learning_rate": 1.1521170034329787e-05, + "loss": 0.4438, + "step": 55115 + }, + { + "epoch": 2.57, + "learning_rate": 1.1520386249274999e-05, + "loss": 0.0607, + "step": 55120 + }, + { + "epoch": 2.57, + "learning_rate": 1.1519602464220214e-05, + "loss": 0.0342, + "step": 55125 + }, + { + "epoch": 2.57, + "learning_rate": 1.1518818679165427e-05, + "loss": 0.022, + "step": 55130 + }, + { + "epoch": 2.57, + "learning_rate": 1.1518034894110639e-05, + "loss": 0.0265, + "step": 55135 + }, + { + "epoch": 2.57, + "learning_rate": 1.1517251109055854e-05, + "loss": 0.0946, + "step": 55140 + }, + { + "epoch": 2.57, + "learning_rate": 1.1516467324001067e-05, + "loss": 0.1235, + "step": 55145 + }, + { + "epoch": 2.57, + "learning_rate": 1.151568353894628e-05, + "loss": 0.2051, + "step": 55150 + }, + { + "epoch": 2.57, + "learning_rate": 1.1514899753891493e-05, + "loss": 0.2828, + "step": 55155 + }, + { + "epoch": 2.57, + "learning_rate": 1.1514115968836708e-05, + "loss": 0.2089, + "step": 55160 + }, + { + "epoch": 2.57, + "learning_rate": 1.151333218378192e-05, + "loss": 0.2756, + "step": 55165 + }, + { + "epoch": 2.57, + "learning_rate": 1.1512548398727134e-05, + "loss": 0.0753, + "step": 55170 + }, + { + "epoch": 2.57, + "learning_rate": 1.1511764613672347e-05, + "loss": 0.0189, + "step": 55175 + }, + { + "epoch": 2.57, + "learning_rate": 1.1510980828617562e-05, + "loss": 0.0486, + "step": 55180 + }, + { + "epoch": 2.58, + "learning_rate": 1.1510197043562775e-05, + "loss": 0.0668, + "step": 55185 + }, + { + "epoch": 2.58, + "learning_rate": 1.1509413258507988e-05, + "loss": 0.0484, + "step": 55190 + }, + { + "epoch": 2.58, + "learning_rate": 1.15086294734532e-05, + "loss": 0.2344, + "step": 55195 + }, + { + "epoch": 2.58, + "learning_rate": 1.1507845688398413e-05, + "loss": 0.1022, + "step": 55200 + }, + { + "epoch": 2.58, + "learning_rate": 1.1507061903343628e-05, + "loss": 0.2039, + "step": 55205 + }, + { + "epoch": 2.58, + "learning_rate": 1.150627811828884e-05, + "loss": 0.3759, + "step": 55210 + }, + { + "epoch": 2.58, + "learning_rate": 1.1505494333234055e-05, + "loss": 0.3593, + "step": 55215 + }, + { + "epoch": 2.58, + "learning_rate": 1.1504710548179267e-05, + "loss": 0.0577, + "step": 55220 + }, + { + "epoch": 2.58, + "learning_rate": 1.1503926763124482e-05, + "loss": 0.0329, + "step": 55225 + }, + { + "epoch": 2.58, + "learning_rate": 1.1503142978069695e-05, + "loss": 0.0644, + "step": 55230 + }, + { + "epoch": 2.58, + "learning_rate": 1.1502359193014908e-05, + "loss": 0.0507, + "step": 55235 + }, + { + "epoch": 2.58, + "learning_rate": 1.1501575407960122e-05, + "loss": 0.0605, + "step": 55240 + }, + { + "epoch": 2.58, + "learning_rate": 1.1500791622905336e-05, + "loss": 0.1677, + "step": 55245 + }, + { + "epoch": 2.58, + "learning_rate": 1.1500007837850549e-05, + "loss": 0.1497, + "step": 55250 + }, + { + "epoch": 2.58, + "learning_rate": 1.1499224052795762e-05, + "loss": 0.1236, + "step": 55255 + }, + { + "epoch": 2.58, + "learning_rate": 1.1498440267740976e-05, + "loss": 0.2027, + "step": 55260 + }, + { + "epoch": 2.58, + "learning_rate": 1.1497656482686189e-05, + "loss": 0.3551, + "step": 55265 + }, + { + "epoch": 2.58, + "learning_rate": 1.1496872697631402e-05, + "loss": 0.1194, + "step": 55270 + }, + { + "epoch": 2.58, + "learning_rate": 1.1496088912576615e-05, + "loss": 0.0431, + "step": 55275 + }, + { + "epoch": 2.58, + "learning_rate": 1.149530512752183e-05, + "loss": 0.0716, + "step": 55280 + }, + { + "epoch": 2.58, + "learning_rate": 1.1494521342467042e-05, + "loss": 0.0789, + "step": 55285 + }, + { + "epoch": 2.58, + "learning_rate": 1.1493737557412256e-05, + "loss": 0.0781, + "step": 55290 + }, + { + "epoch": 2.58, + "learning_rate": 1.1492953772357469e-05, + "loss": 0.0786, + "step": 55295 + }, + { + "epoch": 2.58, + "learning_rate": 1.1492169987302684e-05, + "loss": 0.1363, + "step": 55300 + }, + { + "epoch": 2.58, + "learning_rate": 1.1491386202247896e-05, + "loss": 0.2028, + "step": 55305 + }, + { + "epoch": 2.58, + "learning_rate": 1.149060241719311e-05, + "loss": 0.1638, + "step": 55310 + }, + { + "epoch": 2.58, + "learning_rate": 1.1489818632138323e-05, + "loss": 0.2609, + "step": 55315 + }, + { + "epoch": 2.58, + "learning_rate": 1.1489034847083538e-05, + "loss": 0.0372, + "step": 55320 + }, + { + "epoch": 2.58, + "learning_rate": 1.148825106202875e-05, + "loss": 0.0139, + "step": 55325 + }, + { + "epoch": 2.58, + "learning_rate": 1.1487467276973963e-05, + "loss": 0.0547, + "step": 55330 + }, + { + "epoch": 2.58, + "learning_rate": 1.1486683491919176e-05, + "loss": 0.0491, + "step": 55335 + }, + { + "epoch": 2.58, + "learning_rate": 1.148589970686439e-05, + "loss": 0.0403, + "step": 55340 + }, + { + "epoch": 2.58, + "learning_rate": 1.1485115921809604e-05, + "loss": 0.1877, + "step": 55345 + }, + { + "epoch": 2.58, + "learning_rate": 1.1484332136754816e-05, + "loss": 0.1706, + "step": 55350 + }, + { + "epoch": 2.58, + "learning_rate": 1.1483548351700032e-05, + "loss": 0.1843, + "step": 55355 + }, + { + "epoch": 2.58, + "learning_rate": 1.1482764566645244e-05, + "loss": 0.2677, + "step": 55360 + }, + { + "epoch": 2.58, + "learning_rate": 1.1481980781590458e-05, + "loss": 0.1672, + "step": 55365 + }, + { + "epoch": 2.58, + "learning_rate": 1.148119699653567e-05, + "loss": 0.0489, + "step": 55370 + }, + { + "epoch": 2.58, + "learning_rate": 1.1480413211480886e-05, + "loss": 0.0291, + "step": 55375 + }, + { + "epoch": 2.58, + "learning_rate": 1.1479629426426098e-05, + "loss": 0.034, + "step": 55380 + }, + { + "epoch": 2.58, + "learning_rate": 1.1478845641371312e-05, + "loss": 0.1155, + "step": 55385 + }, + { + "epoch": 2.58, + "learning_rate": 1.1478061856316524e-05, + "loss": 0.1148, + "step": 55390 + }, + { + "epoch": 2.58, + "learning_rate": 1.1477278071261737e-05, + "loss": 0.1079, + "step": 55395 + }, + { + "epoch": 2.59, + "learning_rate": 1.1476494286206952e-05, + "loss": 0.1543, + "step": 55400 + }, + { + "epoch": 2.59, + "learning_rate": 1.1475710501152164e-05, + "loss": 0.1308, + "step": 55405 + }, + { + "epoch": 2.59, + "learning_rate": 1.1474926716097378e-05, + "loss": 0.3201, + "step": 55410 + }, + { + "epoch": 2.59, + "learning_rate": 1.147414293104259e-05, + "loss": 0.1922, + "step": 55415 + }, + { + "epoch": 2.59, + "learning_rate": 1.1473359145987806e-05, + "loss": 0.056, + "step": 55420 + }, + { + "epoch": 2.59, + "learning_rate": 1.1472575360933018e-05, + "loss": 0.0477, + "step": 55425 + }, + { + "epoch": 2.59, + "learning_rate": 1.1471791575878232e-05, + "loss": 0.0765, + "step": 55430 + }, + { + "epoch": 2.59, + "learning_rate": 1.1471007790823444e-05, + "loss": 0.0625, + "step": 55435 + }, + { + "epoch": 2.59, + "learning_rate": 1.147022400576866e-05, + "loss": 0.0535, + "step": 55440 + }, + { + "epoch": 2.59, + "learning_rate": 1.1469440220713872e-05, + "loss": 0.1265, + "step": 55445 + }, + { + "epoch": 2.59, + "learning_rate": 1.1468656435659086e-05, + "loss": 0.0814, + "step": 55450 + }, + { + "epoch": 2.59, + "learning_rate": 1.14678726506043e-05, + "loss": 0.1643, + "step": 55455 + }, + { + "epoch": 2.59, + "learning_rate": 1.1467088865549512e-05, + "loss": 0.3128, + "step": 55460 + }, + { + "epoch": 2.59, + "learning_rate": 1.1466305080494726e-05, + "loss": 0.2313, + "step": 55465 + }, + { + "epoch": 2.59, + "learning_rate": 1.1465521295439938e-05, + "loss": 0.0514, + "step": 55470 + }, + { + "epoch": 2.59, + "learning_rate": 1.1464737510385154e-05, + "loss": 0.0917, + "step": 55475 + }, + { + "epoch": 2.59, + "learning_rate": 1.1463953725330366e-05, + "loss": 0.0453, + "step": 55480 + }, + { + "epoch": 2.59, + "learning_rate": 1.146316994027558e-05, + "loss": 0.0833, + "step": 55485 + }, + { + "epoch": 2.59, + "learning_rate": 1.1462386155220792e-05, + "loss": 0.1032, + "step": 55490 + }, + { + "epoch": 2.59, + "learning_rate": 1.1461602370166008e-05, + "loss": 0.1346, + "step": 55495 + }, + { + "epoch": 2.59, + "learning_rate": 1.146081858511122e-05, + "loss": 0.1739, + "step": 55500 + }, + { + "epoch": 2.59, + "learning_rate": 1.1460034800056434e-05, + "loss": 0.2011, + "step": 55505 + }, + { + "epoch": 2.59, + "learning_rate": 1.1459251015001646e-05, + "loss": 0.1148, + "step": 55510 + }, + { + "epoch": 2.59, + "learning_rate": 1.1458467229946862e-05, + "loss": 0.2026, + "step": 55515 + }, + { + "epoch": 2.59, + "learning_rate": 1.1457683444892074e-05, + "loss": 0.0575, + "step": 55520 + }, + { + "epoch": 2.59, + "learning_rate": 1.1456899659837286e-05, + "loss": 0.0359, + "step": 55525 + }, + { + "epoch": 2.59, + "learning_rate": 1.14561158747825e-05, + "loss": 0.0556, + "step": 55530 + }, + { + "epoch": 2.59, + "learning_rate": 1.1455332089727712e-05, + "loss": 0.0612, + "step": 55535 + }, + { + "epoch": 2.59, + "learning_rate": 1.1454548304672928e-05, + "loss": 0.0526, + "step": 55540 + }, + { + "epoch": 2.59, + "learning_rate": 1.145376451961814e-05, + "loss": 0.0315, + "step": 55545 + }, + { + "epoch": 2.59, + "learning_rate": 1.1452980734563354e-05, + "loss": 0.144, + "step": 55550 + }, + { + "epoch": 2.59, + "learning_rate": 1.1452196949508568e-05, + "loss": 0.1281, + "step": 55555 + }, + { + "epoch": 2.59, + "learning_rate": 1.1451413164453782e-05, + "loss": 0.2331, + "step": 55560 + }, + { + "epoch": 2.59, + "learning_rate": 1.1450629379398994e-05, + "loss": 0.2845, + "step": 55565 + }, + { + "epoch": 2.59, + "learning_rate": 1.1449845594344208e-05, + "loss": 0.0317, + "step": 55570 + }, + { + "epoch": 2.59, + "learning_rate": 1.1449061809289422e-05, + "loss": 0.03, + "step": 55575 + }, + { + "epoch": 2.59, + "learning_rate": 1.1448278024234636e-05, + "loss": 0.0739, + "step": 55580 + }, + { + "epoch": 2.59, + "learning_rate": 1.1447494239179848e-05, + "loss": 0.1004, + "step": 55585 + }, + { + "epoch": 2.59, + "learning_rate": 1.144671045412506e-05, + "loss": 0.1453, + "step": 55590 + }, + { + "epoch": 2.59, + "learning_rate": 1.1445926669070276e-05, + "loss": 0.2057, + "step": 55595 + }, + { + "epoch": 2.59, + "learning_rate": 1.1445142884015488e-05, + "loss": 0.2203, + "step": 55600 + }, + { + "epoch": 2.59, + "learning_rate": 1.1444359098960702e-05, + "loss": 0.1848, + "step": 55605 + }, + { + "epoch": 2.59, + "learning_rate": 1.1443575313905914e-05, + "loss": 0.2693, + "step": 55610 + }, + { + "epoch": 2.6, + "learning_rate": 1.144279152885113e-05, + "loss": 0.2025, + "step": 55615 + }, + { + "epoch": 2.6, + "learning_rate": 1.1442007743796342e-05, + "loss": 0.0353, + "step": 55620 + }, + { + "epoch": 2.6, + "learning_rate": 1.1441223958741556e-05, + "loss": 0.0815, + "step": 55625 + }, + { + "epoch": 2.6, + "learning_rate": 1.1440440173686768e-05, + "loss": 0.0724, + "step": 55630 + }, + { + "epoch": 2.6, + "learning_rate": 1.1439656388631984e-05, + "loss": 0.0461, + "step": 55635 + }, + { + "epoch": 2.6, + "learning_rate": 1.1438872603577196e-05, + "loss": 0.1578, + "step": 55640 + }, + { + "epoch": 2.6, + "learning_rate": 1.143808881852241e-05, + "loss": 0.091, + "step": 55645 + }, + { + "epoch": 2.6, + "learning_rate": 1.1437305033467622e-05, + "loss": 0.1293, + "step": 55650 + }, + { + "epoch": 2.6, + "learning_rate": 1.1436521248412836e-05, + "loss": 0.2032, + "step": 55655 + }, + { + "epoch": 2.6, + "learning_rate": 1.143573746335805e-05, + "loss": 0.2329, + "step": 55660 + }, + { + "epoch": 2.6, + "learning_rate": 1.1434953678303262e-05, + "loss": 0.246, + "step": 55665 + }, + { + "epoch": 2.6, + "learning_rate": 1.1434169893248478e-05, + "loss": 0.0633, + "step": 55670 + }, + { + "epoch": 2.6, + "learning_rate": 1.143338610819369e-05, + "loss": 0.0702, + "step": 55675 + }, + { + "epoch": 2.6, + "learning_rate": 1.1432602323138904e-05, + "loss": 0.0746, + "step": 55680 + }, + { + "epoch": 2.6, + "learning_rate": 1.1431818538084116e-05, + "loss": 0.0994, + "step": 55685 + }, + { + "epoch": 2.6, + "learning_rate": 1.1431034753029332e-05, + "loss": 0.0673, + "step": 55690 + }, + { + "epoch": 2.6, + "learning_rate": 1.1430250967974544e-05, + "loss": 0.1014, + "step": 55695 + }, + { + "epoch": 2.6, + "learning_rate": 1.1429467182919758e-05, + "loss": 0.1598, + "step": 55700 + }, + { + "epoch": 2.6, + "learning_rate": 1.142868339786497e-05, + "loss": 0.1542, + "step": 55705 + }, + { + "epoch": 2.6, + "learning_rate": 1.1427899612810185e-05, + "loss": 0.1683, + "step": 55710 + }, + { + "epoch": 2.6, + "learning_rate": 1.1427115827755398e-05, + "loss": 0.3066, + "step": 55715 + }, + { + "epoch": 2.6, + "learning_rate": 1.142633204270061e-05, + "loss": 0.0337, + "step": 55720 + }, + { + "epoch": 2.6, + "learning_rate": 1.1425548257645824e-05, + "loss": 0.0113, + "step": 55725 + }, + { + "epoch": 2.6, + "learning_rate": 1.1424764472591036e-05, + "loss": 0.0901, + "step": 55730 + }, + { + "epoch": 2.6, + "learning_rate": 1.1423980687536252e-05, + "loss": 0.0709, + "step": 55735 + }, + { + "epoch": 2.6, + "learning_rate": 1.1423196902481464e-05, + "loss": 0.0917, + "step": 55740 + }, + { + "epoch": 2.6, + "learning_rate": 1.1422413117426678e-05, + "loss": 0.0923, + "step": 55745 + }, + { + "epoch": 2.6, + "learning_rate": 1.142162933237189e-05, + "loss": 0.1011, + "step": 55750 + }, + { + "epoch": 2.6, + "learning_rate": 1.1420845547317106e-05, + "loss": 0.2065, + "step": 55755 + }, + { + "epoch": 2.6, + "learning_rate": 1.1420061762262318e-05, + "loss": 0.2262, + "step": 55760 + }, + { + "epoch": 2.6, + "learning_rate": 1.1419277977207532e-05, + "loss": 0.3833, + "step": 55765 + }, + { + "epoch": 2.6, + "learning_rate": 1.1418494192152746e-05, + "loss": 0.0298, + "step": 55770 + }, + { + "epoch": 2.6, + "learning_rate": 1.141771040709796e-05, + "loss": 0.0199, + "step": 55775 + }, + { + "epoch": 2.6, + "learning_rate": 1.1416926622043172e-05, + "loss": 0.0608, + "step": 55780 + }, + { + "epoch": 2.6, + "learning_rate": 1.1416142836988384e-05, + "loss": 0.0674, + "step": 55785 + }, + { + "epoch": 2.6, + "learning_rate": 1.14153590519336e-05, + "loss": 0.05, + "step": 55790 + }, + { + "epoch": 2.6, + "learning_rate": 1.1414575266878812e-05, + "loss": 0.4276, + "step": 55795 + }, + { + "epoch": 2.6, + "learning_rate": 1.1413791481824026e-05, + "loss": 0.1672, + "step": 55800 + }, + { + "epoch": 2.6, + "learning_rate": 1.1413007696769238e-05, + "loss": 0.2332, + "step": 55805 + }, + { + "epoch": 2.6, + "learning_rate": 1.1412223911714453e-05, + "loss": 0.2378, + "step": 55810 + }, + { + "epoch": 2.6, + "learning_rate": 1.1411440126659666e-05, + "loss": 0.3594, + "step": 55815 + }, + { + "epoch": 2.6, + "learning_rate": 1.141065634160488e-05, + "loss": 0.0743, + "step": 55820 + }, + { + "epoch": 2.6, + "learning_rate": 1.1409872556550092e-05, + "loss": 0.0567, + "step": 55825 + }, + { + "epoch": 2.61, + "learning_rate": 1.1409088771495307e-05, + "loss": 0.0386, + "step": 55830 + }, + { + "epoch": 2.61, + "learning_rate": 1.140830498644052e-05, + "loss": 0.0708, + "step": 55835 + }, + { + "epoch": 2.61, + "learning_rate": 1.1407521201385733e-05, + "loss": 0.1147, + "step": 55840 + }, + { + "epoch": 2.61, + "learning_rate": 1.1406737416330946e-05, + "loss": 0.107, + "step": 55845 + }, + { + "epoch": 2.61, + "learning_rate": 1.1405953631276158e-05, + "loss": 0.1496, + "step": 55850 + }, + { + "epoch": 2.61, + "learning_rate": 1.1405169846221374e-05, + "loss": 0.1711, + "step": 55855 + }, + { + "epoch": 2.61, + "learning_rate": 1.1404386061166586e-05, + "loss": 0.1991, + "step": 55860 + }, + { + "epoch": 2.61, + "learning_rate": 1.14036022761118e-05, + "loss": 0.2534, + "step": 55865 + }, + { + "epoch": 2.61, + "learning_rate": 1.1402818491057014e-05, + "loss": 0.0475, + "step": 55870 + }, + { + "epoch": 2.61, + "learning_rate": 1.1402034706002227e-05, + "loss": 0.0586, + "step": 55875 + }, + { + "epoch": 2.61, + "learning_rate": 1.140125092094744e-05, + "loss": 0.0615, + "step": 55880 + }, + { + "epoch": 2.61, + "learning_rate": 1.1400467135892655e-05, + "loss": 0.0973, + "step": 55885 + }, + { + "epoch": 2.61, + "learning_rate": 1.1399683350837867e-05, + "loss": 0.0871, + "step": 55890 + }, + { + "epoch": 2.61, + "learning_rate": 1.1398899565783081e-05, + "loss": 0.0739, + "step": 55895 + }, + { + "epoch": 2.61, + "learning_rate": 1.1398115780728294e-05, + "loss": 0.1811, + "step": 55900 + }, + { + "epoch": 2.61, + "learning_rate": 1.139733199567351e-05, + "loss": 0.1679, + "step": 55905 + }, + { + "epoch": 2.61, + "learning_rate": 1.1396548210618721e-05, + "loss": 0.2377, + "step": 55910 + }, + { + "epoch": 2.61, + "learning_rate": 1.1395764425563934e-05, + "loss": 0.29, + "step": 55915 + }, + { + "epoch": 2.61, + "learning_rate": 1.1394980640509148e-05, + "loss": 0.0498, + "step": 55920 + }, + { + "epoch": 2.61, + "learning_rate": 1.139419685545436e-05, + "loss": 0.0492, + "step": 55925 + }, + { + "epoch": 2.61, + "learning_rate": 1.1393413070399575e-05, + "loss": 0.0697, + "step": 55930 + }, + { + "epoch": 2.61, + "learning_rate": 1.1392629285344788e-05, + "loss": 0.0466, + "step": 55935 + }, + { + "epoch": 2.61, + "learning_rate": 1.1391845500290001e-05, + "loss": 0.0723, + "step": 55940 + }, + { + "epoch": 2.61, + "learning_rate": 1.1391061715235214e-05, + "loss": 0.0772, + "step": 55945 + }, + { + "epoch": 2.61, + "learning_rate": 1.139027793018043e-05, + "loss": 0.1073, + "step": 55950 + }, + { + "epoch": 2.61, + "learning_rate": 1.1389494145125641e-05, + "loss": 0.1866, + "step": 55955 + }, + { + "epoch": 2.61, + "learning_rate": 1.1388710360070855e-05, + "loss": 0.3105, + "step": 55960 + }, + { + "epoch": 2.61, + "learning_rate": 1.1387926575016068e-05, + "loss": 0.3204, + "step": 55965 + }, + { + "epoch": 2.61, + "learning_rate": 1.1387142789961283e-05, + "loss": 0.0536, + "step": 55970 + }, + { + "epoch": 2.61, + "learning_rate": 1.1386359004906495e-05, + "loss": 0.0331, + "step": 55975 + }, + { + "epoch": 2.61, + "learning_rate": 1.1385575219851708e-05, + "loss": 0.0336, + "step": 55980 + }, + { + "epoch": 2.61, + "learning_rate": 1.1384791434796923e-05, + "loss": 0.0781, + "step": 55985 + }, + { + "epoch": 2.61, + "learning_rate": 1.1384007649742135e-05, + "loss": 0.0902, + "step": 55990 + }, + { + "epoch": 2.61, + "learning_rate": 1.138322386468735e-05, + "loss": 0.1057, + "step": 55995 + }, + { + "epoch": 2.61, + "learning_rate": 1.1382440079632562e-05, + "loss": 0.1152, + "step": 56000 + }, + { + "epoch": 2.61, + "learning_rate": 1.1381656294577777e-05, + "loss": 0.1095, + "step": 56005 + }, + { + "epoch": 2.61, + "learning_rate": 1.138087250952299e-05, + "loss": 0.2313, + "step": 56010 + }, + { + "epoch": 2.61, + "learning_rate": 1.1380088724468203e-05, + "loss": 0.3165, + "step": 56015 + }, + { + "epoch": 2.61, + "learning_rate": 1.1379304939413415e-05, + "loss": 0.0596, + "step": 56020 + }, + { + "epoch": 2.61, + "learning_rate": 1.1378521154358631e-05, + "loss": 0.0576, + "step": 56025 + }, + { + "epoch": 2.61, + "learning_rate": 1.1377737369303843e-05, + "loss": 0.0679, + "step": 56030 + }, + { + "epoch": 2.61, + "learning_rate": 1.1376953584249057e-05, + "loss": 0.0265, + "step": 56035 + }, + { + "epoch": 2.61, + "learning_rate": 1.137616979919427e-05, + "loss": 0.0645, + "step": 56040 + }, + { + "epoch": 2.62, + "learning_rate": 1.1375386014139482e-05, + "loss": 0.1304, + "step": 56045 + }, + { + "epoch": 2.62, + "learning_rate": 1.1374602229084697e-05, + "loss": 0.2927, + "step": 56050 + }, + { + "epoch": 2.62, + "learning_rate": 1.137381844402991e-05, + "loss": 0.1191, + "step": 56055 + }, + { + "epoch": 2.62, + "learning_rate": 1.1373034658975123e-05, + "loss": 0.2488, + "step": 56060 + }, + { + "epoch": 2.62, + "learning_rate": 1.1372250873920336e-05, + "loss": 0.1988, + "step": 56065 + }, + { + "epoch": 2.62, + "learning_rate": 1.1371467088865551e-05, + "loss": 0.0326, + "step": 56070 + }, + { + "epoch": 2.62, + "learning_rate": 1.1370683303810763e-05, + "loss": 0.0501, + "step": 56075 + }, + { + "epoch": 2.62, + "learning_rate": 1.1369899518755977e-05, + "loss": 0.0934, + "step": 56080 + }, + { + "epoch": 2.62, + "learning_rate": 1.1369115733701191e-05, + "loss": 0.0529, + "step": 56085 + }, + { + "epoch": 2.62, + "learning_rate": 1.1368331948646405e-05, + "loss": 0.1376, + "step": 56090 + }, + { + "epoch": 2.62, + "learning_rate": 1.1367548163591617e-05, + "loss": 0.0725, + "step": 56095 + }, + { + "epoch": 2.62, + "learning_rate": 1.1366764378536831e-05, + "loss": 0.1274, + "step": 56100 + }, + { + "epoch": 2.62, + "learning_rate": 1.1365980593482045e-05, + "loss": 0.1481, + "step": 56105 + }, + { + "epoch": 2.62, + "learning_rate": 1.1365196808427257e-05, + "loss": 0.1959, + "step": 56110 + }, + { + "epoch": 2.62, + "learning_rate": 1.1364413023372471e-05, + "loss": 0.1771, + "step": 56115 + }, + { + "epoch": 2.62, + "learning_rate": 1.1363629238317683e-05, + "loss": 0.0172, + "step": 56120 + }, + { + "epoch": 2.62, + "learning_rate": 1.1362845453262899e-05, + "loss": 0.0531, + "step": 56125 + }, + { + "epoch": 2.62, + "learning_rate": 1.1362061668208111e-05, + "loss": 0.0984, + "step": 56130 + }, + { + "epoch": 2.62, + "learning_rate": 1.1361277883153325e-05, + "loss": 0.074, + "step": 56135 + }, + { + "epoch": 2.62, + "learning_rate": 1.1360494098098537e-05, + "loss": 0.0676, + "step": 56140 + }, + { + "epoch": 2.62, + "learning_rate": 1.1359710313043753e-05, + "loss": 0.1362, + "step": 56145 + }, + { + "epoch": 2.62, + "learning_rate": 1.1358926527988965e-05, + "loss": 0.1122, + "step": 56150 + }, + { + "epoch": 2.62, + "learning_rate": 1.1358142742934179e-05, + "loss": 0.1515, + "step": 56155 + }, + { + "epoch": 2.62, + "learning_rate": 1.1357358957879391e-05, + "loss": 0.261, + "step": 56160 + }, + { + "epoch": 2.62, + "learning_rate": 1.1356575172824607e-05, + "loss": 0.293, + "step": 56165 + }, + { + "epoch": 2.62, + "learning_rate": 1.1355791387769819e-05, + "loss": 0.0781, + "step": 56170 + }, + { + "epoch": 2.62, + "learning_rate": 1.1355007602715031e-05, + "loss": 0.0546, + "step": 56175 + }, + { + "epoch": 2.62, + "learning_rate": 1.1354223817660245e-05, + "loss": 0.0495, + "step": 56180 + }, + { + "epoch": 2.62, + "learning_rate": 1.1353440032605459e-05, + "loss": 0.0399, + "step": 56185 + }, + { + "epoch": 2.62, + "learning_rate": 1.1352656247550673e-05, + "loss": 0.1285, + "step": 56190 + }, + { + "epoch": 2.62, + "learning_rate": 1.1351872462495885e-05, + "loss": 0.1656, + "step": 56195 + }, + { + "epoch": 2.62, + "learning_rate": 1.13510886774411e-05, + "loss": 0.1061, + "step": 56200 + }, + { + "epoch": 2.62, + "learning_rate": 1.1350304892386313e-05, + "loss": 0.1784, + "step": 56205 + }, + { + "epoch": 2.62, + "learning_rate": 1.1349521107331527e-05, + "loss": 0.254, + "step": 56210 + }, + { + "epoch": 2.62, + "learning_rate": 1.134873732227674e-05, + "loss": 0.3493, + "step": 56215 + }, + { + "epoch": 2.62, + "learning_rate": 1.1347953537221955e-05, + "loss": 0.0536, + "step": 56220 + }, + { + "epoch": 2.62, + "learning_rate": 1.1347169752167167e-05, + "loss": 0.0224, + "step": 56225 + }, + { + "epoch": 2.62, + "learning_rate": 1.1346385967112381e-05, + "loss": 0.06, + "step": 56230 + }, + { + "epoch": 2.62, + "learning_rate": 1.1345602182057593e-05, + "loss": 0.1365, + "step": 56235 + }, + { + "epoch": 2.62, + "learning_rate": 1.1344818397002805e-05, + "loss": 0.0783, + "step": 56240 + }, + { + "epoch": 2.62, + "learning_rate": 1.1344034611948021e-05, + "loss": 0.1582, + "step": 56245 + }, + { + "epoch": 2.62, + "learning_rate": 1.1343250826893233e-05, + "loss": 0.0864, + "step": 56250 + }, + { + "epoch": 2.62, + "learning_rate": 1.1342467041838447e-05, + "loss": 0.1631, + "step": 56255 + }, + { + "epoch": 2.63, + "learning_rate": 1.134168325678366e-05, + "loss": 0.2896, + "step": 56260 + }, + { + "epoch": 2.63, + "learning_rate": 1.1340899471728875e-05, + "loss": 0.2259, + "step": 56265 + }, + { + "epoch": 2.63, + "learning_rate": 1.1340115686674087e-05, + "loss": 0.083, + "step": 56270 + }, + { + "epoch": 2.63, + "learning_rate": 1.1339331901619301e-05, + "loss": 0.0676, + "step": 56275 + }, + { + "epoch": 2.63, + "learning_rate": 1.1338548116564513e-05, + "loss": 0.0453, + "step": 56280 + }, + { + "epoch": 2.63, + "learning_rate": 1.1337764331509729e-05, + "loss": 0.0732, + "step": 56285 + }, + { + "epoch": 2.63, + "learning_rate": 1.1336980546454941e-05, + "loss": 0.0857, + "step": 56290 + }, + { + "epoch": 2.63, + "learning_rate": 1.1336196761400155e-05, + "loss": 0.0953, + "step": 56295 + }, + { + "epoch": 2.63, + "learning_rate": 1.1335412976345369e-05, + "loss": 0.1977, + "step": 56300 + }, + { + "epoch": 2.63, + "learning_rate": 1.1334629191290581e-05, + "loss": 0.1663, + "step": 56305 + }, + { + "epoch": 2.63, + "learning_rate": 1.1333845406235795e-05, + "loss": 0.2465, + "step": 56310 + }, + { + "epoch": 2.63, + "learning_rate": 1.1333061621181007e-05, + "loss": 0.2748, + "step": 56315 + }, + { + "epoch": 2.63, + "learning_rate": 1.1332277836126223e-05, + "loss": 0.0505, + "step": 56320 + }, + { + "epoch": 2.63, + "learning_rate": 1.1331494051071435e-05, + "loss": 0.0276, + "step": 56325 + }, + { + "epoch": 2.63, + "learning_rate": 1.1330710266016649e-05, + "loss": 0.0824, + "step": 56330 + }, + { + "epoch": 2.63, + "learning_rate": 1.1329926480961861e-05, + "loss": 0.1261, + "step": 56335 + }, + { + "epoch": 2.63, + "learning_rate": 1.1329142695907077e-05, + "loss": 0.074, + "step": 56340 + }, + { + "epoch": 2.63, + "learning_rate": 1.1328358910852289e-05, + "loss": 0.206, + "step": 56345 + }, + { + "epoch": 2.63, + "learning_rate": 1.1327575125797503e-05, + "loss": 0.1365, + "step": 56350 + }, + { + "epoch": 2.63, + "learning_rate": 1.1326791340742715e-05, + "loss": 0.1942, + "step": 56355 + }, + { + "epoch": 2.63, + "learning_rate": 1.132600755568793e-05, + "loss": 0.2217, + "step": 56360 + }, + { + "epoch": 2.63, + "learning_rate": 1.1325223770633143e-05, + "loss": 0.3364, + "step": 56365 + }, + { + "epoch": 2.63, + "learning_rate": 1.1324439985578355e-05, + "loss": 0.0938, + "step": 56370 + }, + { + "epoch": 2.63, + "learning_rate": 1.1323656200523569e-05, + "loss": 0.0528, + "step": 56375 + }, + { + "epoch": 2.63, + "learning_rate": 1.1322872415468781e-05, + "loss": 0.0529, + "step": 56380 + }, + { + "epoch": 2.63, + "learning_rate": 1.1322088630413997e-05, + "loss": 0.0723, + "step": 56385 + }, + { + "epoch": 2.63, + "learning_rate": 1.1321304845359209e-05, + "loss": 0.0862, + "step": 56390 + }, + { + "epoch": 2.63, + "learning_rate": 1.1320521060304423e-05, + "loss": 0.1117, + "step": 56395 + }, + { + "epoch": 2.63, + "learning_rate": 1.1319737275249637e-05, + "loss": 0.1197, + "step": 56400 + }, + { + "epoch": 2.63, + "learning_rate": 1.131895349019485e-05, + "loss": 0.2187, + "step": 56405 + }, + { + "epoch": 2.63, + "learning_rate": 1.1318169705140063e-05, + "loss": 0.119, + "step": 56410 + }, + { + "epoch": 2.63, + "learning_rate": 1.1317385920085277e-05, + "loss": 0.4055, + "step": 56415 + }, + { + "epoch": 2.63, + "learning_rate": 1.131660213503049e-05, + "loss": 0.0653, + "step": 56420 + }, + { + "epoch": 2.63, + "learning_rate": 1.1315818349975705e-05, + "loss": 0.0624, + "step": 56425 + }, + { + "epoch": 2.63, + "learning_rate": 1.1315034564920917e-05, + "loss": 0.1205, + "step": 56430 + }, + { + "epoch": 2.63, + "learning_rate": 1.1314250779866129e-05, + "loss": 0.0533, + "step": 56435 + }, + { + "epoch": 2.63, + "learning_rate": 1.1313466994811345e-05, + "loss": 0.1112, + "step": 56440 + }, + { + "epoch": 2.63, + "learning_rate": 1.1312683209756557e-05, + "loss": 0.0813, + "step": 56445 + }, + { + "epoch": 2.63, + "learning_rate": 1.131189942470177e-05, + "loss": 0.1179, + "step": 56450 + }, + { + "epoch": 2.63, + "learning_rate": 1.1311115639646983e-05, + "loss": 0.108, + "step": 56455 + }, + { + "epoch": 2.63, + "learning_rate": 1.1310331854592199e-05, + "loss": 0.3539, + "step": 56460 + }, + { + "epoch": 2.63, + "learning_rate": 1.130954806953741e-05, + "loss": 0.3225, + "step": 56465 + }, + { + "epoch": 2.63, + "learning_rate": 1.1308764284482625e-05, + "loss": 0.1074, + "step": 56470 + }, + { + "epoch": 2.64, + "learning_rate": 1.1307980499427837e-05, + "loss": 0.0635, + "step": 56475 + }, + { + "epoch": 2.64, + "learning_rate": 1.1307196714373052e-05, + "loss": 0.0487, + "step": 56480 + }, + { + "epoch": 2.64, + "learning_rate": 1.1306412929318265e-05, + "loss": 0.0477, + "step": 56485 + }, + { + "epoch": 2.64, + "learning_rate": 1.1305629144263479e-05, + "loss": 0.0545, + "step": 56490 + }, + { + "epoch": 2.64, + "learning_rate": 1.130484535920869e-05, + "loss": 0.1771, + "step": 56495 + }, + { + "epoch": 2.64, + "learning_rate": 1.1304061574153905e-05, + "loss": 0.1594, + "step": 56500 + }, + { + "epoch": 2.64, + "learning_rate": 1.1303277789099119e-05, + "loss": 0.1938, + "step": 56505 + }, + { + "epoch": 2.64, + "learning_rate": 1.130249400404433e-05, + "loss": 0.2132, + "step": 56510 + }, + { + "epoch": 2.64, + "learning_rate": 1.1301710218989546e-05, + "loss": 0.2514, + "step": 56515 + }, + { + "epoch": 2.64, + "learning_rate": 1.1300926433934759e-05, + "loss": 0.042, + "step": 56520 + }, + { + "epoch": 2.64, + "learning_rate": 1.1300142648879973e-05, + "loss": 0.0904, + "step": 56525 + }, + { + "epoch": 2.64, + "learning_rate": 1.1299358863825185e-05, + "loss": 0.0356, + "step": 56530 + }, + { + "epoch": 2.64, + "learning_rate": 1.12985750787704e-05, + "loss": 0.1147, + "step": 56535 + }, + { + "epoch": 2.64, + "learning_rate": 1.1297791293715613e-05, + "loss": 0.0365, + "step": 56540 + }, + { + "epoch": 2.64, + "learning_rate": 1.1297007508660826e-05, + "loss": 0.1268, + "step": 56545 + }, + { + "epoch": 2.64, + "learning_rate": 1.1296223723606039e-05, + "loss": 0.1831, + "step": 56550 + }, + { + "epoch": 2.64, + "learning_rate": 1.1295439938551254e-05, + "loss": 0.1668, + "step": 56555 + }, + { + "epoch": 2.64, + "learning_rate": 1.1294656153496466e-05, + "loss": 0.3163, + "step": 56560 + }, + { + "epoch": 2.64, + "learning_rate": 1.1293872368441679e-05, + "loss": 0.2783, + "step": 56565 + }, + { + "epoch": 2.64, + "learning_rate": 1.1293088583386893e-05, + "loss": 0.0947, + "step": 56570 + }, + { + "epoch": 2.64, + "learning_rate": 1.1292304798332105e-05, + "loss": 0.0528, + "step": 56575 + }, + { + "epoch": 2.64, + "learning_rate": 1.129152101327732e-05, + "loss": 0.0567, + "step": 56580 + }, + { + "epoch": 2.64, + "learning_rate": 1.1290737228222533e-05, + "loss": 0.0607, + "step": 56585 + }, + { + "epoch": 2.64, + "learning_rate": 1.1289953443167747e-05, + "loss": 0.0598, + "step": 56590 + }, + { + "epoch": 2.64, + "learning_rate": 1.1289169658112959e-05, + "loss": 0.151, + "step": 56595 + }, + { + "epoch": 2.64, + "learning_rate": 1.1288385873058174e-05, + "loss": 0.2271, + "step": 56600 + }, + { + "epoch": 2.64, + "learning_rate": 1.1287602088003387e-05, + "loss": 0.1683, + "step": 56605 + }, + { + "epoch": 2.64, + "learning_rate": 1.12868183029486e-05, + "loss": 0.2022, + "step": 56610 + }, + { + "epoch": 2.64, + "learning_rate": 1.1286034517893814e-05, + "loss": 0.2549, + "step": 56615 + }, + { + "epoch": 2.64, + "learning_rate": 1.1285250732839028e-05, + "loss": 0.0524, + "step": 56620 + }, + { + "epoch": 2.64, + "learning_rate": 1.128446694778424e-05, + "loss": 0.0105, + "step": 56625 + }, + { + "epoch": 2.64, + "learning_rate": 1.1283683162729453e-05, + "loss": 0.0215, + "step": 56630 + }, + { + "epoch": 2.64, + "learning_rate": 1.1282899377674668e-05, + "loss": 0.0492, + "step": 56635 + }, + { + "epoch": 2.64, + "learning_rate": 1.128211559261988e-05, + "loss": 0.1595, + "step": 56640 + }, + { + "epoch": 2.64, + "learning_rate": 1.1281331807565094e-05, + "loss": 0.0684, + "step": 56645 + }, + { + "epoch": 2.64, + "learning_rate": 1.1280548022510307e-05, + "loss": 0.1465, + "step": 56650 + }, + { + "epoch": 2.64, + "learning_rate": 1.1279764237455522e-05, + "loss": 0.1494, + "step": 56655 + }, + { + "epoch": 2.64, + "learning_rate": 1.1278980452400734e-05, + "loss": 0.2374, + "step": 56660 + }, + { + "epoch": 2.64, + "learning_rate": 1.1278196667345948e-05, + "loss": 0.2635, + "step": 56665 + }, + { + "epoch": 2.64, + "learning_rate": 1.127741288229116e-05, + "loss": 0.0529, + "step": 56670 + }, + { + "epoch": 2.64, + "learning_rate": 1.1276629097236376e-05, + "loss": 0.0205, + "step": 56675 + }, + { + "epoch": 2.64, + "learning_rate": 1.1275845312181588e-05, + "loss": 0.1151, + "step": 56680 + }, + { + "epoch": 2.65, + "learning_rate": 1.1275061527126802e-05, + "loss": 0.0793, + "step": 56685 + }, + { + "epoch": 2.65, + "learning_rate": 1.1274277742072014e-05, + "loss": 0.058, + "step": 56690 + }, + { + "epoch": 2.65, + "learning_rate": 1.1273493957017227e-05, + "loss": 0.0678, + "step": 56695 + }, + { + "epoch": 2.65, + "learning_rate": 1.1272710171962442e-05, + "loss": 0.1245, + "step": 56700 + }, + { + "epoch": 2.65, + "learning_rate": 1.1271926386907654e-05, + "loss": 0.3038, + "step": 56705 + }, + { + "epoch": 2.65, + "learning_rate": 1.1271142601852868e-05, + "loss": 0.2822, + "step": 56710 + }, + { + "epoch": 2.65, + "learning_rate": 1.1270358816798082e-05, + "loss": 0.2808, + "step": 56715 + }, + { + "epoch": 2.65, + "learning_rate": 1.1269575031743296e-05, + "loss": 0.0484, + "step": 56720 + }, + { + "epoch": 2.65, + "learning_rate": 1.1268791246688508e-05, + "loss": 0.0473, + "step": 56725 + }, + { + "epoch": 2.65, + "learning_rate": 1.1268007461633722e-05, + "loss": 0.0651, + "step": 56730 + }, + { + "epoch": 2.65, + "learning_rate": 1.1267223676578936e-05, + "loss": 0.0608, + "step": 56735 + }, + { + "epoch": 2.65, + "learning_rate": 1.126643989152415e-05, + "loss": 0.0815, + "step": 56740 + }, + { + "epoch": 2.65, + "learning_rate": 1.1265656106469362e-05, + "loss": 0.0831, + "step": 56745 + }, + { + "epoch": 2.65, + "learning_rate": 1.1264872321414578e-05, + "loss": 0.1264, + "step": 56750 + }, + { + "epoch": 2.65, + "learning_rate": 1.126408853635979e-05, + "loss": 0.196, + "step": 56755 + }, + { + "epoch": 2.65, + "learning_rate": 1.1263304751305002e-05, + "loss": 0.1935, + "step": 56760 + }, + { + "epoch": 2.65, + "learning_rate": 1.1262520966250216e-05, + "loss": 0.2181, + "step": 56765 + }, + { + "epoch": 2.65, + "learning_rate": 1.1261737181195428e-05, + "loss": 0.0669, + "step": 56770 + }, + { + "epoch": 2.65, + "learning_rate": 1.1260953396140644e-05, + "loss": 0.0351, + "step": 56775 + }, + { + "epoch": 2.65, + "learning_rate": 1.1260169611085856e-05, + "loss": 0.031, + "step": 56780 + }, + { + "epoch": 2.65, + "learning_rate": 1.125938582603107e-05, + "loss": 0.1046, + "step": 56785 + }, + { + "epoch": 2.65, + "learning_rate": 1.1258602040976282e-05, + "loss": 0.0576, + "step": 56790 + }, + { + "epoch": 2.65, + "learning_rate": 1.1257818255921498e-05, + "loss": 0.1757, + "step": 56795 + }, + { + "epoch": 2.65, + "learning_rate": 1.125703447086671e-05, + "loss": 0.0646, + "step": 56800 + }, + { + "epoch": 2.65, + "learning_rate": 1.1256250685811924e-05, + "loss": 0.0942, + "step": 56805 + }, + { + "epoch": 2.65, + "learning_rate": 1.1255466900757136e-05, + "loss": 0.2081, + "step": 56810 + }, + { + "epoch": 2.65, + "learning_rate": 1.1254683115702352e-05, + "loss": 0.2665, + "step": 56815 + }, + { + "epoch": 2.65, + "learning_rate": 1.1253899330647564e-05, + "loss": 0.1061, + "step": 56820 + }, + { + "epoch": 2.65, + "learning_rate": 1.1253115545592776e-05, + "loss": 0.0643, + "step": 56825 + }, + { + "epoch": 2.65, + "learning_rate": 1.1252331760537992e-05, + "loss": 0.0954, + "step": 56830 + }, + { + "epoch": 2.65, + "learning_rate": 1.1251547975483204e-05, + "loss": 0.1031, + "step": 56835 + }, + { + "epoch": 2.65, + "learning_rate": 1.1250764190428418e-05, + "loss": 0.0669, + "step": 56840 + }, + { + "epoch": 2.65, + "learning_rate": 1.124998040537363e-05, + "loss": 0.102, + "step": 56845 + }, + { + "epoch": 2.65, + "learning_rate": 1.1249196620318846e-05, + "loss": 0.0921, + "step": 56850 + }, + { + "epoch": 2.65, + "learning_rate": 1.1248412835264058e-05, + "loss": 0.2177, + "step": 56855 + }, + { + "epoch": 2.65, + "learning_rate": 1.1247629050209272e-05, + "loss": 0.2624, + "step": 56860 + }, + { + "epoch": 2.65, + "learning_rate": 1.1246845265154484e-05, + "loss": 0.2272, + "step": 56865 + }, + { + "epoch": 2.65, + "learning_rate": 1.12460614800997e-05, + "loss": 0.0577, + "step": 56870 + }, + { + "epoch": 2.65, + "learning_rate": 1.1245277695044912e-05, + "loss": 0.0705, + "step": 56875 + }, + { + "epoch": 2.65, + "learning_rate": 1.1244493909990126e-05, + "loss": 0.0784, + "step": 56880 + }, + { + "epoch": 2.65, + "learning_rate": 1.1243710124935338e-05, + "loss": 0.0725, + "step": 56885 + }, + { + "epoch": 2.65, + "learning_rate": 1.124292633988055e-05, + "loss": 0.079, + "step": 56890 + }, + { + "epoch": 2.65, + "learning_rate": 1.1242142554825766e-05, + "loss": 0.0578, + "step": 56895 + }, + { + "epoch": 2.66, + "learning_rate": 1.1241358769770978e-05, + "loss": 0.1903, + "step": 56900 + }, + { + "epoch": 2.66, + "learning_rate": 1.1240574984716192e-05, + "loss": 0.1061, + "step": 56905 + }, + { + "epoch": 2.66, + "learning_rate": 1.1239791199661404e-05, + "loss": 0.3914, + "step": 56910 + }, + { + "epoch": 2.66, + "learning_rate": 1.123900741460662e-05, + "loss": 0.4026, + "step": 56915 + }, + { + "epoch": 2.66, + "learning_rate": 1.1238223629551832e-05, + "loss": 0.0493, + "step": 56920 + }, + { + "epoch": 2.66, + "learning_rate": 1.1237439844497046e-05, + "loss": 0.066, + "step": 56925 + }, + { + "epoch": 2.66, + "learning_rate": 1.123665605944226e-05, + "loss": 0.0681, + "step": 56930 + }, + { + "epoch": 2.66, + "learning_rate": 1.1235872274387474e-05, + "loss": 0.0228, + "step": 56935 + }, + { + "epoch": 2.66, + "learning_rate": 1.1235088489332686e-05, + "loss": 0.0556, + "step": 56940 + }, + { + "epoch": 2.66, + "learning_rate": 1.12343047042779e-05, + "loss": 0.1144, + "step": 56945 + }, + { + "epoch": 2.66, + "learning_rate": 1.1233520919223114e-05, + "loss": 0.0883, + "step": 56950 + }, + { + "epoch": 2.66, + "learning_rate": 1.1232737134168326e-05, + "loss": 0.156, + "step": 56955 + }, + { + "epoch": 2.66, + "learning_rate": 1.123195334911354e-05, + "loss": 0.2826, + "step": 56960 + }, + { + "epoch": 2.66, + "learning_rate": 1.1231169564058752e-05, + "loss": 0.2467, + "step": 56965 + }, + { + "epoch": 2.66, + "learning_rate": 1.1230385779003968e-05, + "loss": 0.0382, + "step": 56970 + }, + { + "epoch": 2.66, + "learning_rate": 1.122960199394918e-05, + "loss": 0.068, + "step": 56975 + }, + { + "epoch": 2.66, + "learning_rate": 1.1228818208894394e-05, + "loss": 0.0677, + "step": 56980 + }, + { + "epoch": 2.66, + "learning_rate": 1.1228034423839606e-05, + "loss": 0.1175, + "step": 56985 + }, + { + "epoch": 2.66, + "learning_rate": 1.1227250638784822e-05, + "loss": 0.0899, + "step": 56990 + }, + { + "epoch": 2.66, + "learning_rate": 1.1226466853730034e-05, + "loss": 0.1353, + "step": 56995 + }, + { + "epoch": 2.66, + "learning_rate": 1.1225683068675248e-05, + "loss": 0.1473, + "step": 57000 + }, + { + "epoch": 2.66, + "learning_rate": 1.122489928362046e-05, + "loss": 0.1321, + "step": 57005 + }, + { + "epoch": 2.66, + "learning_rate": 1.1224115498565676e-05, + "loss": 0.3038, + "step": 57010 + }, + { + "epoch": 2.66, + "learning_rate": 1.1223331713510888e-05, + "loss": 0.4092, + "step": 57015 + }, + { + "epoch": 2.66, + "learning_rate": 1.12225479284561e-05, + "loss": 0.0233, + "step": 57020 + }, + { + "epoch": 2.66, + "learning_rate": 1.1221764143401314e-05, + "loss": 0.0531, + "step": 57025 + }, + { + "epoch": 2.66, + "learning_rate": 1.1220980358346528e-05, + "loss": 0.011, + "step": 57030 + }, + { + "epoch": 2.66, + "learning_rate": 1.1220196573291742e-05, + "loss": 0.0896, + "step": 57035 + }, + { + "epoch": 2.66, + "learning_rate": 1.1219412788236954e-05, + "loss": 0.038, + "step": 57040 + }, + { + "epoch": 2.66, + "learning_rate": 1.121862900318217e-05, + "loss": 0.1052, + "step": 57045 + }, + { + "epoch": 2.66, + "learning_rate": 1.1217845218127382e-05, + "loss": 0.1334, + "step": 57050 + }, + { + "epoch": 2.66, + "learning_rate": 1.1217061433072596e-05, + "loss": 0.1431, + "step": 57055 + }, + { + "epoch": 2.66, + "learning_rate": 1.1216277648017808e-05, + "loss": 0.1915, + "step": 57060 + }, + { + "epoch": 2.66, + "learning_rate": 1.1215493862963024e-05, + "loss": 0.287, + "step": 57065 + }, + { + "epoch": 2.66, + "learning_rate": 1.1214710077908236e-05, + "loss": 0.1064, + "step": 57070 + }, + { + "epoch": 2.66, + "learning_rate": 1.121392629285345e-05, + "loss": 0.0665, + "step": 57075 + }, + { + "epoch": 2.66, + "learning_rate": 1.1213142507798662e-05, + "loss": 0.0537, + "step": 57080 + }, + { + "epoch": 2.66, + "learning_rate": 1.1212358722743874e-05, + "loss": 0.0913, + "step": 57085 + }, + { + "epoch": 2.66, + "learning_rate": 1.121157493768909e-05, + "loss": 0.0652, + "step": 57090 + }, + { + "epoch": 2.66, + "learning_rate": 1.1210791152634302e-05, + "loss": 0.0595, + "step": 57095 + }, + { + "epoch": 2.66, + "learning_rate": 1.1210007367579516e-05, + "loss": 0.0666, + "step": 57100 + }, + { + "epoch": 2.66, + "learning_rate": 1.1209223582524728e-05, + "loss": 0.2029, + "step": 57105 + }, + { + "epoch": 2.66, + "learning_rate": 1.1208439797469944e-05, + "loss": 0.2374, + "step": 57110 + }, + { + "epoch": 2.67, + "learning_rate": 1.1207656012415156e-05, + "loss": 0.2624, + "step": 57115 + }, + { + "epoch": 2.67, + "learning_rate": 1.120687222736037e-05, + "loss": 0.0789, + "step": 57120 + }, + { + "epoch": 2.67, + "learning_rate": 1.1206088442305582e-05, + "loss": 0.0434, + "step": 57125 + }, + { + "epoch": 2.67, + "learning_rate": 1.1205304657250798e-05, + "loss": 0.0571, + "step": 57130 + }, + { + "epoch": 2.67, + "learning_rate": 1.120452087219601e-05, + "loss": 0.0856, + "step": 57135 + }, + { + "epoch": 2.67, + "learning_rate": 1.1203737087141224e-05, + "loss": 0.1147, + "step": 57140 + }, + { + "epoch": 2.67, + "learning_rate": 1.1202953302086438e-05, + "loss": 0.1438, + "step": 57145 + }, + { + "epoch": 2.67, + "learning_rate": 1.120216951703165e-05, + "loss": 0.1312, + "step": 57150 + }, + { + "epoch": 2.67, + "learning_rate": 1.1201385731976864e-05, + "loss": 0.1851, + "step": 57155 + }, + { + "epoch": 2.67, + "learning_rate": 1.1200601946922076e-05, + "loss": 0.1552, + "step": 57160 + }, + { + "epoch": 2.67, + "learning_rate": 1.1199818161867291e-05, + "loss": 0.2446, + "step": 57165 + }, + { + "epoch": 2.67, + "learning_rate": 1.1199034376812504e-05, + "loss": 0.0227, + "step": 57170 + }, + { + "epoch": 2.67, + "learning_rate": 1.1198250591757718e-05, + "loss": 0.071, + "step": 57175 + }, + { + "epoch": 2.67, + "learning_rate": 1.119746680670293e-05, + "loss": 0.0333, + "step": 57180 + }, + { + "epoch": 2.67, + "learning_rate": 1.1196683021648145e-05, + "loss": 0.0713, + "step": 57185 + }, + { + "epoch": 2.67, + "learning_rate": 1.1195899236593358e-05, + "loss": 0.0717, + "step": 57190 + }, + { + "epoch": 2.67, + "learning_rate": 1.1195115451538572e-05, + "loss": 0.0648, + "step": 57195 + }, + { + "epoch": 2.67, + "learning_rate": 1.1194331666483784e-05, + "loss": 0.1173, + "step": 57200 + }, + { + "epoch": 2.67, + "learning_rate": 1.1193547881429e-05, + "loss": 0.09, + "step": 57205 + }, + { + "epoch": 2.67, + "learning_rate": 1.1192764096374212e-05, + "loss": 0.2192, + "step": 57210 + }, + { + "epoch": 2.67, + "learning_rate": 1.1191980311319424e-05, + "loss": 0.3397, + "step": 57215 + }, + { + "epoch": 2.67, + "learning_rate": 1.1191196526264638e-05, + "loss": 0.0862, + "step": 57220 + }, + { + "epoch": 2.67, + "learning_rate": 1.119041274120985e-05, + "loss": 0.0396, + "step": 57225 + }, + { + "epoch": 2.67, + "learning_rate": 1.1189628956155065e-05, + "loss": 0.0291, + "step": 57230 + }, + { + "epoch": 2.67, + "learning_rate": 1.1188845171100278e-05, + "loss": 0.0333, + "step": 57235 + }, + { + "epoch": 2.67, + "learning_rate": 1.1188061386045492e-05, + "loss": 0.0601, + "step": 57240 + }, + { + "epoch": 2.67, + "learning_rate": 1.1187277600990705e-05, + "loss": 0.0811, + "step": 57245 + }, + { + "epoch": 2.67, + "learning_rate": 1.118649381593592e-05, + "loss": 0.0969, + "step": 57250 + }, + { + "epoch": 2.67, + "learning_rate": 1.1185710030881132e-05, + "loss": 0.229, + "step": 57255 + }, + { + "epoch": 2.67, + "learning_rate": 1.1184926245826346e-05, + "loss": 0.294, + "step": 57260 + }, + { + "epoch": 2.67, + "learning_rate": 1.118414246077156e-05, + "loss": 0.3216, + "step": 57265 + }, + { + "epoch": 2.67, + "learning_rate": 1.1183358675716773e-05, + "loss": 0.0457, + "step": 57270 + }, + { + "epoch": 2.67, + "learning_rate": 1.1182574890661986e-05, + "loss": 0.0488, + "step": 57275 + }, + { + "epoch": 2.67, + "learning_rate": 1.1181791105607198e-05, + "loss": 0.0694, + "step": 57280 + }, + { + "epoch": 2.67, + "learning_rate": 1.1181007320552413e-05, + "loss": 0.0691, + "step": 57285 + }, + { + "epoch": 2.67, + "learning_rate": 1.1180223535497626e-05, + "loss": 0.1437, + "step": 57290 + }, + { + "epoch": 2.67, + "learning_rate": 1.117943975044284e-05, + "loss": 0.1237, + "step": 57295 + }, + { + "epoch": 2.67, + "learning_rate": 1.1178655965388052e-05, + "loss": 0.1094, + "step": 57300 + }, + { + "epoch": 2.67, + "learning_rate": 1.1177872180333267e-05, + "loss": 0.3414, + "step": 57305 + }, + { + "epoch": 2.67, + "learning_rate": 1.117708839527848e-05, + "loss": 0.2766, + "step": 57310 + }, + { + "epoch": 2.67, + "learning_rate": 1.1176304610223693e-05, + "loss": 0.2673, + "step": 57315 + }, + { + "epoch": 2.67, + "learning_rate": 1.1175520825168906e-05, + "loss": 0.0421, + "step": 57320 + }, + { + "epoch": 2.67, + "learning_rate": 1.1174737040114121e-05, + "loss": 0.0769, + "step": 57325 + }, + { + "epoch": 2.68, + "learning_rate": 1.1173953255059333e-05, + "loss": 0.042, + "step": 57330 + }, + { + "epoch": 2.68, + "learning_rate": 1.1173169470004547e-05, + "loss": 0.0679, + "step": 57335 + }, + { + "epoch": 2.68, + "learning_rate": 1.117238568494976e-05, + "loss": 0.0895, + "step": 57340 + }, + { + "epoch": 2.68, + "learning_rate": 1.1171601899894973e-05, + "loss": 0.1201, + "step": 57345 + }, + { + "epoch": 2.68, + "learning_rate": 1.1170818114840187e-05, + "loss": 0.1044, + "step": 57350 + }, + { + "epoch": 2.68, + "learning_rate": 1.11700343297854e-05, + "loss": 0.0649, + "step": 57355 + }, + { + "epoch": 2.68, + "learning_rate": 1.1169250544730615e-05, + "loss": 0.224, + "step": 57360 + }, + { + "epoch": 2.68, + "learning_rate": 1.1168466759675827e-05, + "loss": 0.3896, + "step": 57365 + }, + { + "epoch": 2.68, + "learning_rate": 1.1167682974621041e-05, + "loss": 0.0307, + "step": 57370 + }, + { + "epoch": 2.68, + "learning_rate": 1.1166899189566253e-05, + "loss": 0.0542, + "step": 57375 + }, + { + "epoch": 2.68, + "learning_rate": 1.1166115404511469e-05, + "loss": 0.0674, + "step": 57380 + }, + { + "epoch": 2.68, + "learning_rate": 1.1165331619456681e-05, + "loss": 0.0919, + "step": 57385 + }, + { + "epoch": 2.68, + "learning_rate": 1.1164547834401895e-05, + "loss": 0.0887, + "step": 57390 + }, + { + "epoch": 2.68, + "learning_rate": 1.1163764049347107e-05, + "loss": 0.1729, + "step": 57395 + }, + { + "epoch": 2.68, + "learning_rate": 1.1162980264292323e-05, + "loss": 0.1892, + "step": 57400 + }, + { + "epoch": 2.68, + "learning_rate": 1.1162196479237535e-05, + "loss": 0.1451, + "step": 57405 + }, + { + "epoch": 2.68, + "learning_rate": 1.1161412694182747e-05, + "loss": 0.3484, + "step": 57410 + }, + { + "epoch": 2.68, + "learning_rate": 1.1160628909127961e-05, + "loss": 0.3337, + "step": 57415 + }, + { + "epoch": 2.68, + "learning_rate": 1.1159845124073174e-05, + "loss": 0.0925, + "step": 57420 + }, + { + "epoch": 2.68, + "learning_rate": 1.1159061339018389e-05, + "loss": 0.0863, + "step": 57425 + }, + { + "epoch": 2.68, + "learning_rate": 1.1158277553963601e-05, + "loss": 0.0389, + "step": 57430 + }, + { + "epoch": 2.68, + "learning_rate": 1.1157493768908815e-05, + "loss": 0.0606, + "step": 57435 + }, + { + "epoch": 2.68, + "learning_rate": 1.1156709983854027e-05, + "loss": 0.1619, + "step": 57440 + }, + { + "epoch": 2.68, + "learning_rate": 1.1155926198799243e-05, + "loss": 0.177, + "step": 57445 + }, + { + "epoch": 2.68, + "learning_rate": 1.1155142413744455e-05, + "loss": 0.1052, + "step": 57450 + }, + { + "epoch": 2.68, + "learning_rate": 1.115435862868967e-05, + "loss": 0.2065, + "step": 57455 + }, + { + "epoch": 2.68, + "learning_rate": 1.1153574843634883e-05, + "loss": 0.1876, + "step": 57460 + }, + { + "epoch": 2.68, + "learning_rate": 1.1152791058580097e-05, + "loss": 0.1923, + "step": 57465 + }, + { + "epoch": 2.68, + "learning_rate": 1.115200727352531e-05, + "loss": 0.0589, + "step": 57470 + }, + { + "epoch": 2.68, + "learning_rate": 1.1151223488470521e-05, + "loss": 0.0346, + "step": 57475 + }, + { + "epoch": 2.68, + "learning_rate": 1.1150439703415737e-05, + "loss": 0.0486, + "step": 57480 + }, + { + "epoch": 2.68, + "learning_rate": 1.114965591836095e-05, + "loss": 0.0673, + "step": 57485 + }, + { + "epoch": 2.68, + "learning_rate": 1.1148872133306163e-05, + "loss": 0.0864, + "step": 57490 + }, + { + "epoch": 2.68, + "learning_rate": 1.1148088348251375e-05, + "loss": 0.103, + "step": 57495 + }, + { + "epoch": 2.68, + "learning_rate": 1.1147304563196591e-05, + "loss": 0.2384, + "step": 57500 + }, + { + "epoch": 2.68, + "learning_rate": 1.1146520778141803e-05, + "loss": 0.151, + "step": 57505 + }, + { + "epoch": 2.68, + "learning_rate": 1.1145736993087017e-05, + "loss": 0.1685, + "step": 57510 + }, + { + "epoch": 2.68, + "learning_rate": 1.114495320803223e-05, + "loss": 0.2351, + "step": 57515 + }, + { + "epoch": 2.68, + "learning_rate": 1.1144169422977445e-05, + "loss": 0.0842, + "step": 57520 + }, + { + "epoch": 2.68, + "learning_rate": 1.1143385637922657e-05, + "loss": 0.0256, + "step": 57525 + }, + { + "epoch": 2.68, + "learning_rate": 1.1142601852867871e-05, + "loss": 0.0696, + "step": 57530 + }, + { + "epoch": 2.68, + "learning_rate": 1.1141818067813083e-05, + "loss": 0.0371, + "step": 57535 + }, + { + "epoch": 2.68, + "learning_rate": 1.1141034282758295e-05, + "loss": 0.0474, + "step": 57540 + }, + { + "epoch": 2.69, + "learning_rate": 1.1140250497703511e-05, + "loss": 0.0927, + "step": 57545 + }, + { + "epoch": 2.69, + "learning_rate": 1.1139466712648723e-05, + "loss": 0.0978, + "step": 57550 + }, + { + "epoch": 2.69, + "learning_rate": 1.1138682927593937e-05, + "loss": 0.223, + "step": 57555 + }, + { + "epoch": 2.69, + "learning_rate": 1.1137899142539151e-05, + "loss": 0.2131, + "step": 57560 + }, + { + "epoch": 2.69, + "learning_rate": 1.1137115357484365e-05, + "loss": 0.2394, + "step": 57565 + }, + { + "epoch": 2.69, + "learning_rate": 1.1136331572429577e-05, + "loss": 0.0255, + "step": 57570 + }, + { + "epoch": 2.69, + "learning_rate": 1.1135547787374791e-05, + "loss": 0.0629, + "step": 57575 + }, + { + "epoch": 2.69, + "learning_rate": 1.1134764002320005e-05, + "loss": 0.0524, + "step": 57580 + }, + { + "epoch": 2.69, + "learning_rate": 1.1133980217265219e-05, + "loss": 0.0692, + "step": 57585 + }, + { + "epoch": 2.69, + "learning_rate": 1.1133196432210431e-05, + "loss": 0.0791, + "step": 57590 + }, + { + "epoch": 2.69, + "learning_rate": 1.1132412647155647e-05, + "loss": 0.0576, + "step": 57595 + }, + { + "epoch": 2.69, + "learning_rate": 1.1131628862100859e-05, + "loss": 0.1245, + "step": 57600 + }, + { + "epoch": 2.69, + "learning_rate": 1.1130845077046071e-05, + "loss": 0.1853, + "step": 57605 + }, + { + "epoch": 2.69, + "learning_rate": 1.1130061291991285e-05, + "loss": 0.4984, + "step": 57610 + }, + { + "epoch": 2.69, + "learning_rate": 1.1129277506936497e-05, + "loss": 0.306, + "step": 57615 + }, + { + "epoch": 2.69, + "learning_rate": 1.1128493721881713e-05, + "loss": 0.0546, + "step": 57620 + }, + { + "epoch": 2.69, + "learning_rate": 1.1127709936826925e-05, + "loss": 0.0659, + "step": 57625 + }, + { + "epoch": 2.69, + "learning_rate": 1.1126926151772139e-05, + "loss": 0.043, + "step": 57630 + }, + { + "epoch": 2.69, + "learning_rate": 1.1126142366717351e-05, + "loss": 0.0508, + "step": 57635 + }, + { + "epoch": 2.69, + "learning_rate": 1.1125358581662567e-05, + "loss": 0.0316, + "step": 57640 + }, + { + "epoch": 2.69, + "learning_rate": 1.1124574796607779e-05, + "loss": 0.0663, + "step": 57645 + }, + { + "epoch": 2.69, + "learning_rate": 1.1123791011552993e-05, + "loss": 0.098, + "step": 57650 + }, + { + "epoch": 2.69, + "learning_rate": 1.1123007226498205e-05, + "loss": 0.1989, + "step": 57655 + }, + { + "epoch": 2.69, + "learning_rate": 1.112222344144342e-05, + "loss": 0.25, + "step": 57660 + }, + { + "epoch": 2.69, + "learning_rate": 1.1121439656388633e-05, + "loss": 0.3534, + "step": 57665 + }, + { + "epoch": 2.69, + "learning_rate": 1.1120655871333845e-05, + "loss": 0.018, + "step": 57670 + }, + { + "epoch": 2.69, + "learning_rate": 1.111987208627906e-05, + "loss": 0.0352, + "step": 57675 + }, + { + "epoch": 2.69, + "learning_rate": 1.1119088301224273e-05, + "loss": 0.0879, + "step": 57680 + }, + { + "epoch": 2.69, + "learning_rate": 1.1118304516169487e-05, + "loss": 0.1559, + "step": 57685 + }, + { + "epoch": 2.69, + "learning_rate": 1.1117520731114699e-05, + "loss": 0.1489, + "step": 57690 + }, + { + "epoch": 2.69, + "learning_rate": 1.1116736946059915e-05, + "loss": 0.0735, + "step": 57695 + }, + { + "epoch": 2.69, + "learning_rate": 1.1115953161005127e-05, + "loss": 0.1344, + "step": 57700 + }, + { + "epoch": 2.69, + "learning_rate": 1.111516937595034e-05, + "loss": 0.1133, + "step": 57705 + }, + { + "epoch": 2.69, + "learning_rate": 1.1114385590895553e-05, + "loss": 0.1911, + "step": 57710 + }, + { + "epoch": 2.69, + "learning_rate": 1.1113601805840769e-05, + "loss": 0.3633, + "step": 57715 + }, + { + "epoch": 2.69, + "learning_rate": 1.111281802078598e-05, + "loss": 0.036, + "step": 57720 + }, + { + "epoch": 2.69, + "learning_rate": 1.1112034235731195e-05, + "loss": 0.0589, + "step": 57725 + }, + { + "epoch": 2.69, + "learning_rate": 1.1111250450676407e-05, + "loss": 0.0351, + "step": 57730 + }, + { + "epoch": 2.69, + "learning_rate": 1.1110466665621619e-05, + "loss": 0.0914, + "step": 57735 + }, + { + "epoch": 2.69, + "learning_rate": 1.1109682880566835e-05, + "loss": 0.1147, + "step": 57740 + }, + { + "epoch": 2.69, + "learning_rate": 1.1108899095512047e-05, + "loss": 0.0828, + "step": 57745 + }, + { + "epoch": 2.69, + "learning_rate": 1.110811531045726e-05, + "loss": 0.1332, + "step": 57750 + }, + { + "epoch": 2.69, + "learning_rate": 1.1107331525402473e-05, + "loss": 0.2566, + "step": 57755 + }, + { + "epoch": 2.7, + "learning_rate": 1.1106547740347689e-05, + "loss": 0.2272, + "step": 57760 + }, + { + "epoch": 2.7, + "learning_rate": 1.1105763955292901e-05, + "loss": 0.2749, + "step": 57765 + }, + { + "epoch": 2.7, + "learning_rate": 1.1104980170238115e-05, + "loss": 0.0733, + "step": 57770 + }, + { + "epoch": 2.7, + "learning_rate": 1.1104196385183329e-05, + "loss": 0.0124, + "step": 57775 + }, + { + "epoch": 2.7, + "learning_rate": 1.1103412600128543e-05, + "loss": 0.0851, + "step": 57780 + }, + { + "epoch": 2.7, + "learning_rate": 1.1102628815073755e-05, + "loss": 0.0767, + "step": 57785 + }, + { + "epoch": 2.7, + "learning_rate": 1.1101845030018969e-05, + "loss": 0.0738, + "step": 57790 + }, + { + "epoch": 2.7, + "learning_rate": 1.1101061244964183e-05, + "loss": 0.0922, + "step": 57795 + }, + { + "epoch": 2.7, + "learning_rate": 1.1100277459909395e-05, + "loss": 0.121, + "step": 57800 + }, + { + "epoch": 2.7, + "learning_rate": 1.1099493674854609e-05, + "loss": 0.2056, + "step": 57805 + }, + { + "epoch": 2.7, + "learning_rate": 1.1098709889799821e-05, + "loss": 0.1976, + "step": 57810 + }, + { + "epoch": 2.7, + "learning_rate": 1.1097926104745037e-05, + "loss": 0.3544, + "step": 57815 + }, + { + "epoch": 2.7, + "learning_rate": 1.1097142319690249e-05, + "loss": 0.0921, + "step": 57820 + }, + { + "epoch": 2.7, + "learning_rate": 1.1096358534635463e-05, + "loss": 0.0193, + "step": 57825 + }, + { + "epoch": 2.7, + "learning_rate": 1.1095574749580675e-05, + "loss": 0.0806, + "step": 57830 + }, + { + "epoch": 2.7, + "learning_rate": 1.109479096452589e-05, + "loss": 0.0968, + "step": 57835 + }, + { + "epoch": 2.7, + "learning_rate": 1.1094007179471103e-05, + "loss": 0.0627, + "step": 57840 + }, + { + "epoch": 2.7, + "learning_rate": 1.1093223394416317e-05, + "loss": 0.1318, + "step": 57845 + }, + { + "epoch": 2.7, + "learning_rate": 1.1092439609361529e-05, + "loss": 0.1039, + "step": 57850 + }, + { + "epoch": 2.7, + "learning_rate": 1.1091655824306744e-05, + "loss": 0.1956, + "step": 57855 + }, + { + "epoch": 2.7, + "learning_rate": 1.1090872039251957e-05, + "loss": 0.1967, + "step": 57860 + }, + { + "epoch": 2.7, + "learning_rate": 1.1090088254197169e-05, + "loss": 0.4312, + "step": 57865 + }, + { + "epoch": 2.7, + "learning_rate": 1.1089304469142383e-05, + "loss": 0.054, + "step": 57870 + }, + { + "epoch": 2.7, + "learning_rate": 1.1088520684087597e-05, + "loss": 0.0584, + "step": 57875 + }, + { + "epoch": 2.7, + "learning_rate": 1.108773689903281e-05, + "loss": 0.0745, + "step": 57880 + }, + { + "epoch": 2.7, + "learning_rate": 1.1086953113978023e-05, + "loss": 0.0903, + "step": 57885 + }, + { + "epoch": 2.7, + "learning_rate": 1.1086169328923237e-05, + "loss": 0.0683, + "step": 57890 + }, + { + "epoch": 2.7, + "learning_rate": 1.108538554386845e-05, + "loss": 0.1025, + "step": 57895 + }, + { + "epoch": 2.7, + "learning_rate": 1.1084601758813664e-05, + "loss": 0.129, + "step": 57900 + }, + { + "epoch": 2.7, + "learning_rate": 1.1083817973758877e-05, + "loss": 0.0918, + "step": 57905 + }, + { + "epoch": 2.7, + "learning_rate": 1.1083034188704092e-05, + "loss": 0.1841, + "step": 57910 + }, + { + "epoch": 2.7, + "learning_rate": 1.1082250403649304e-05, + "loss": 0.237, + "step": 57915 + }, + { + "epoch": 2.7, + "learning_rate": 1.1081466618594518e-05, + "loss": 0.1046, + "step": 57920 + }, + { + "epoch": 2.7, + "learning_rate": 1.108068283353973e-05, + "loss": 0.0991, + "step": 57925 + }, + { + "epoch": 2.7, + "learning_rate": 1.1079899048484943e-05, + "loss": 0.0322, + "step": 57930 + }, + { + "epoch": 2.7, + "learning_rate": 1.1079115263430158e-05, + "loss": 0.0464, + "step": 57935 + }, + { + "epoch": 2.7, + "learning_rate": 1.107833147837537e-05, + "loss": 0.0611, + "step": 57940 + }, + { + "epoch": 2.7, + "learning_rate": 1.1077547693320585e-05, + "loss": 0.1024, + "step": 57945 + }, + { + "epoch": 2.7, + "learning_rate": 1.1076763908265797e-05, + "loss": 0.2626, + "step": 57950 + }, + { + "epoch": 2.7, + "learning_rate": 1.1075980123211012e-05, + "loss": 0.1249, + "step": 57955 + }, + { + "epoch": 2.7, + "learning_rate": 1.1075196338156225e-05, + "loss": 0.2525, + "step": 57960 + }, + { + "epoch": 2.7, + "learning_rate": 1.1074412553101438e-05, + "loss": 0.2988, + "step": 57965 + }, + { + "epoch": 2.7, + "learning_rate": 1.107362876804665e-05, + "loss": 0.0208, + "step": 57970 + }, + { + "epoch": 2.71, + "learning_rate": 1.1072844982991866e-05, + "loss": 0.0614, + "step": 57975 + }, + { + "epoch": 2.71, + "learning_rate": 1.1072061197937078e-05, + "loss": 0.056, + "step": 57980 + }, + { + "epoch": 2.71, + "learning_rate": 1.1071277412882292e-05, + "loss": 0.0867, + "step": 57985 + }, + { + "epoch": 2.71, + "learning_rate": 1.1070493627827506e-05, + "loss": 0.0771, + "step": 57990 + }, + { + "epoch": 2.71, + "learning_rate": 1.1069709842772719e-05, + "loss": 0.1148, + "step": 57995 + }, + { + "epoch": 2.71, + "learning_rate": 1.1068926057717932e-05, + "loss": 0.1713, + "step": 58000 + }, + { + "epoch": 2.71, + "learning_rate": 1.1068142272663145e-05, + "loss": 0.1785, + "step": 58005 + }, + { + "epoch": 2.71, + "learning_rate": 1.106735848760836e-05, + "loss": 0.1555, + "step": 58010 + }, + { + "epoch": 2.71, + "learning_rate": 1.1066574702553572e-05, + "loss": 0.3208, + "step": 58015 + }, + { + "epoch": 2.71, + "learning_rate": 1.1065790917498786e-05, + "loss": 0.0433, + "step": 58020 + }, + { + "epoch": 2.71, + "learning_rate": 1.1065007132443999e-05, + "loss": 0.0256, + "step": 58025 + }, + { + "epoch": 2.71, + "learning_rate": 1.1064223347389214e-05, + "loss": 0.09, + "step": 58030 + }, + { + "epoch": 2.71, + "learning_rate": 1.1063439562334426e-05, + "loss": 0.0716, + "step": 58035 + }, + { + "epoch": 2.71, + "learning_rate": 1.106265577727964e-05, + "loss": 0.0298, + "step": 58040 + }, + { + "epoch": 2.71, + "learning_rate": 1.1061871992224852e-05, + "loss": 0.0745, + "step": 58045 + }, + { + "epoch": 2.71, + "learning_rate": 1.1061088207170068e-05, + "loss": 0.1625, + "step": 58050 + }, + { + "epoch": 2.71, + "learning_rate": 1.106030442211528e-05, + "loss": 0.1818, + "step": 58055 + }, + { + "epoch": 2.71, + "learning_rate": 1.1059520637060493e-05, + "loss": 0.2606, + "step": 58060 + }, + { + "epoch": 2.71, + "learning_rate": 1.1058736852005706e-05, + "loss": 0.2315, + "step": 58065 + }, + { + "epoch": 2.71, + "learning_rate": 1.1057953066950919e-05, + "loss": 0.0346, + "step": 58070 + }, + { + "epoch": 2.71, + "learning_rate": 1.1057169281896134e-05, + "loss": 0.0731, + "step": 58075 + }, + { + "epoch": 2.71, + "learning_rate": 1.1056385496841346e-05, + "loss": 0.0567, + "step": 58080 + }, + { + "epoch": 2.71, + "learning_rate": 1.105560171178656e-05, + "loss": 0.0793, + "step": 58085 + }, + { + "epoch": 2.71, + "learning_rate": 1.1054817926731774e-05, + "loss": 0.0618, + "step": 58090 + }, + { + "epoch": 2.71, + "learning_rate": 1.1054034141676988e-05, + "loss": 0.1303, + "step": 58095 + }, + { + "epoch": 2.71, + "learning_rate": 1.10532503566222e-05, + "loss": 0.1785, + "step": 58100 + }, + { + "epoch": 2.71, + "learning_rate": 1.1052466571567414e-05, + "loss": 0.197, + "step": 58105 + }, + { + "epoch": 2.71, + "learning_rate": 1.1051682786512628e-05, + "loss": 0.3032, + "step": 58110 + }, + { + "epoch": 2.71, + "learning_rate": 1.1050899001457842e-05, + "loss": 0.3066, + "step": 58115 + }, + { + "epoch": 2.71, + "learning_rate": 1.1050115216403054e-05, + "loss": 0.0911, + "step": 58120 + }, + { + "epoch": 2.71, + "learning_rate": 1.1049331431348267e-05, + "loss": 0.0323, + "step": 58125 + }, + { + "epoch": 2.71, + "learning_rate": 1.1048547646293482e-05, + "loss": 0.0549, + "step": 58130 + }, + { + "epoch": 2.71, + "learning_rate": 1.1047763861238694e-05, + "loss": 0.1821, + "step": 58135 + }, + { + "epoch": 2.71, + "learning_rate": 1.1046980076183908e-05, + "loss": 0.0566, + "step": 58140 + }, + { + "epoch": 2.71, + "learning_rate": 1.104619629112912e-05, + "loss": 0.0821, + "step": 58145 + }, + { + "epoch": 2.71, + "learning_rate": 1.1045412506074336e-05, + "loss": 0.1769, + "step": 58150 + }, + { + "epoch": 2.71, + "learning_rate": 1.1044628721019548e-05, + "loss": 0.1196, + "step": 58155 + }, + { + "epoch": 2.71, + "learning_rate": 1.1043844935964762e-05, + "loss": 0.1464, + "step": 58160 + }, + { + "epoch": 2.71, + "learning_rate": 1.1043061150909974e-05, + "loss": 0.2286, + "step": 58165 + }, + { + "epoch": 2.71, + "learning_rate": 1.104227736585519e-05, + "loss": 0.0928, + "step": 58170 + }, + { + "epoch": 2.71, + "learning_rate": 1.1041493580800402e-05, + "loss": 0.0066, + "step": 58175 + }, + { + "epoch": 2.71, + "learning_rate": 1.1040709795745616e-05, + "loss": 0.0367, + "step": 58180 + }, + { + "epoch": 2.71, + "learning_rate": 1.1039926010690828e-05, + "loss": 0.0453, + "step": 58185 + }, + { + "epoch": 2.72, + "learning_rate": 1.1039142225636042e-05, + "loss": 0.0471, + "step": 58190 + }, + { + "epoch": 2.72, + "learning_rate": 1.1038358440581256e-05, + "loss": 0.1814, + "step": 58195 + }, + { + "epoch": 2.72, + "learning_rate": 1.1037574655526468e-05, + "loss": 0.1526, + "step": 58200 + }, + { + "epoch": 2.72, + "learning_rate": 1.1036790870471684e-05, + "loss": 0.2725, + "step": 58205 + }, + { + "epoch": 2.72, + "learning_rate": 1.1036007085416896e-05, + "loss": 0.1844, + "step": 58210 + }, + { + "epoch": 2.72, + "learning_rate": 1.103522330036211e-05, + "loss": 0.1978, + "step": 58215 + }, + { + "epoch": 2.72, + "learning_rate": 1.1034439515307322e-05, + "loss": 0.1005, + "step": 58220 + }, + { + "epoch": 2.72, + "learning_rate": 1.1033655730252538e-05, + "loss": 0.0466, + "step": 58225 + }, + { + "epoch": 2.72, + "learning_rate": 1.103287194519775e-05, + "loss": 0.0683, + "step": 58230 + }, + { + "epoch": 2.72, + "learning_rate": 1.1032088160142964e-05, + "loss": 0.0518, + "step": 58235 + }, + { + "epoch": 2.72, + "learning_rate": 1.1031304375088176e-05, + "loss": 0.0813, + "step": 58240 + }, + { + "epoch": 2.72, + "learning_rate": 1.1030520590033392e-05, + "loss": 0.0502, + "step": 58245 + }, + { + "epoch": 2.72, + "learning_rate": 1.1029736804978604e-05, + "loss": 0.1395, + "step": 58250 + }, + { + "epoch": 2.72, + "learning_rate": 1.1028953019923816e-05, + "loss": 0.0872, + "step": 58255 + }, + { + "epoch": 2.72, + "learning_rate": 1.102816923486903e-05, + "loss": 0.3588, + "step": 58260 + }, + { + "epoch": 2.72, + "learning_rate": 1.1027385449814242e-05, + "loss": 0.2024, + "step": 58265 + }, + { + "epoch": 2.72, + "learning_rate": 1.1026601664759458e-05, + "loss": 0.1045, + "step": 58270 + }, + { + "epoch": 2.72, + "learning_rate": 1.102581787970467e-05, + "loss": 0.0733, + "step": 58275 + }, + { + "epoch": 2.72, + "learning_rate": 1.1025034094649884e-05, + "loss": 0.0158, + "step": 58280 + }, + { + "epoch": 2.72, + "learning_rate": 1.1024250309595096e-05, + "loss": 0.0791, + "step": 58285 + }, + { + "epoch": 2.72, + "learning_rate": 1.1023466524540312e-05, + "loss": 0.0812, + "step": 58290 + }, + { + "epoch": 2.72, + "learning_rate": 1.1022682739485524e-05, + "loss": 0.0696, + "step": 58295 + }, + { + "epoch": 2.72, + "learning_rate": 1.1021898954430738e-05, + "loss": 0.1509, + "step": 58300 + }, + { + "epoch": 2.72, + "learning_rate": 1.1021115169375952e-05, + "loss": 0.1556, + "step": 58305 + }, + { + "epoch": 2.72, + "learning_rate": 1.1020331384321166e-05, + "loss": 0.2344, + "step": 58310 + }, + { + "epoch": 2.72, + "learning_rate": 1.1019547599266378e-05, + "loss": 0.3865, + "step": 58315 + }, + { + "epoch": 2.72, + "learning_rate": 1.101876381421159e-05, + "loss": 0.0351, + "step": 58320 + }, + { + "epoch": 2.72, + "learning_rate": 1.1017980029156806e-05, + "loss": 0.0247, + "step": 58325 + }, + { + "epoch": 2.72, + "learning_rate": 1.1017196244102018e-05, + "loss": 0.034, + "step": 58330 + }, + { + "epoch": 2.72, + "learning_rate": 1.1016412459047232e-05, + "loss": 0.0443, + "step": 58335 + }, + { + "epoch": 2.72, + "learning_rate": 1.1015628673992444e-05, + "loss": 0.0533, + "step": 58340 + }, + { + "epoch": 2.72, + "learning_rate": 1.101484488893766e-05, + "loss": 0.113, + "step": 58345 + }, + { + "epoch": 2.72, + "learning_rate": 1.1014061103882872e-05, + "loss": 0.1294, + "step": 58350 + }, + { + "epoch": 2.72, + "learning_rate": 1.1013277318828086e-05, + "loss": 0.2372, + "step": 58355 + }, + { + "epoch": 2.72, + "learning_rate": 1.1012493533773298e-05, + "loss": 0.1815, + "step": 58360 + }, + { + "epoch": 2.72, + "learning_rate": 1.1011709748718514e-05, + "loss": 0.4188, + "step": 58365 + }, + { + "epoch": 2.72, + "learning_rate": 1.1010925963663726e-05, + "loss": 0.0725, + "step": 58370 + }, + { + "epoch": 2.72, + "learning_rate": 1.101014217860894e-05, + "loss": 0.0558, + "step": 58375 + }, + { + "epoch": 2.72, + "learning_rate": 1.1009358393554152e-05, + "loss": 0.0533, + "step": 58380 + }, + { + "epoch": 2.72, + "learning_rate": 1.1008574608499364e-05, + "loss": 0.0649, + "step": 58385 + }, + { + "epoch": 2.72, + "learning_rate": 1.100779082344458e-05, + "loss": 0.0457, + "step": 58390 + }, + { + "epoch": 2.72, + "learning_rate": 1.1007007038389792e-05, + "loss": 0.063, + "step": 58395 + }, + { + "epoch": 2.73, + "learning_rate": 1.1006223253335006e-05, + "loss": 0.2506, + "step": 58400 + }, + { + "epoch": 2.73, + "learning_rate": 1.100543946828022e-05, + "loss": 0.1326, + "step": 58405 + }, + { + "epoch": 2.73, + "learning_rate": 1.1004655683225434e-05, + "loss": 0.2099, + "step": 58410 + }, + { + "epoch": 2.73, + "learning_rate": 1.1003871898170646e-05, + "loss": 0.2704, + "step": 58415 + }, + { + "epoch": 2.73, + "learning_rate": 1.100308811311586e-05, + "loss": 0.0142, + "step": 58420 + }, + { + "epoch": 2.73, + "learning_rate": 1.1002304328061074e-05, + "loss": 0.0844, + "step": 58425 + }, + { + "epoch": 2.73, + "learning_rate": 1.1001520543006288e-05, + "loss": 0.0416, + "step": 58430 + }, + { + "epoch": 2.73, + "learning_rate": 1.10007367579515e-05, + "loss": 0.0186, + "step": 58435 + }, + { + "epoch": 2.73, + "learning_rate": 1.0999952972896715e-05, + "loss": 0.0636, + "step": 58440 + }, + { + "epoch": 2.73, + "learning_rate": 1.0999169187841928e-05, + "loss": 0.1073, + "step": 58445 + }, + { + "epoch": 2.73, + "learning_rate": 1.099838540278714e-05, + "loss": 0.1895, + "step": 58450 + }, + { + "epoch": 2.73, + "learning_rate": 1.0997601617732354e-05, + "loss": 0.157, + "step": 58455 + }, + { + "epoch": 2.73, + "learning_rate": 1.0996817832677566e-05, + "loss": 0.2624, + "step": 58460 + }, + { + "epoch": 2.73, + "learning_rate": 1.0996034047622782e-05, + "loss": 0.2066, + "step": 58465 + }, + { + "epoch": 2.73, + "learning_rate": 1.0995250262567994e-05, + "loss": 0.0556, + "step": 58470 + }, + { + "epoch": 2.73, + "learning_rate": 1.0994466477513208e-05, + "loss": 0.0346, + "step": 58475 + }, + { + "epoch": 2.73, + "learning_rate": 1.099368269245842e-05, + "loss": 0.0719, + "step": 58480 + }, + { + "epoch": 2.73, + "learning_rate": 1.0992898907403636e-05, + "loss": 0.064, + "step": 58485 + }, + { + "epoch": 2.73, + "learning_rate": 1.0992115122348848e-05, + "loss": 0.0563, + "step": 58490 + }, + { + "epoch": 2.73, + "learning_rate": 1.0991331337294062e-05, + "loss": 0.1556, + "step": 58495 + }, + { + "epoch": 2.73, + "learning_rate": 1.0990547552239274e-05, + "loss": 0.1548, + "step": 58500 + }, + { + "epoch": 2.73, + "learning_rate": 1.098976376718449e-05, + "loss": 0.1101, + "step": 58505 + }, + { + "epoch": 2.73, + "learning_rate": 1.0988979982129702e-05, + "loss": 0.3026, + "step": 58510 + }, + { + "epoch": 2.73, + "learning_rate": 1.0988196197074914e-05, + "loss": 0.3281, + "step": 58515 + }, + { + "epoch": 2.73, + "learning_rate": 1.098741241202013e-05, + "loss": 0.1004, + "step": 58520 + }, + { + "epoch": 2.73, + "learning_rate": 1.0986628626965342e-05, + "loss": 0.0659, + "step": 58525 + }, + { + "epoch": 2.73, + "learning_rate": 1.0985844841910556e-05, + "loss": 0.0501, + "step": 58530 + }, + { + "epoch": 2.73, + "learning_rate": 1.0985061056855768e-05, + "loss": 0.0952, + "step": 58535 + }, + { + "epoch": 2.73, + "learning_rate": 1.0984277271800983e-05, + "loss": 0.1421, + "step": 58540 + }, + { + "epoch": 2.73, + "learning_rate": 1.0983493486746196e-05, + "loss": 0.0672, + "step": 58545 + }, + { + "epoch": 2.73, + "learning_rate": 1.098270970169141e-05, + "loss": 0.1645, + "step": 58550 + }, + { + "epoch": 2.73, + "learning_rate": 1.0981925916636622e-05, + "loss": 0.2002, + "step": 58555 + }, + { + "epoch": 2.73, + "learning_rate": 1.0981142131581837e-05, + "loss": 0.2722, + "step": 58560 + }, + { + "epoch": 2.73, + "learning_rate": 1.098035834652705e-05, + "loss": 0.3582, + "step": 58565 + }, + { + "epoch": 2.73, + "learning_rate": 1.0979574561472263e-05, + "loss": 0.0799, + "step": 58570 + }, + { + "epoch": 2.73, + "learning_rate": 1.0978790776417476e-05, + "loss": 0.0616, + "step": 58575 + }, + { + "epoch": 2.73, + "learning_rate": 1.0978006991362688e-05, + "loss": 0.0691, + "step": 58580 + }, + { + "epoch": 2.73, + "learning_rate": 1.0977223206307903e-05, + "loss": 0.0437, + "step": 58585 + }, + { + "epoch": 2.73, + "learning_rate": 1.0976439421253116e-05, + "loss": 0.0749, + "step": 58590 + }, + { + "epoch": 2.73, + "learning_rate": 1.097565563619833e-05, + "loss": 0.0765, + "step": 58595 + }, + { + "epoch": 2.73, + "learning_rate": 1.0975028608154502e-05, + "loss": 0.0997, + "step": 58600 + }, + { + "epoch": 2.73, + "learning_rate": 1.0974244823099714e-05, + "loss": 0.1803, + "step": 58605 + }, + { + "epoch": 2.73, + "learning_rate": 1.0973461038044928e-05, + "loss": 0.0981, + "step": 58610 + }, + { + "epoch": 2.74, + "learning_rate": 1.097267725299014e-05, + "loss": 0.3792, + "step": 58615 + }, + { + "epoch": 2.74, + "learning_rate": 1.0971893467935356e-05, + "loss": 0.0301, + "step": 58620 + }, + { + "epoch": 2.74, + "learning_rate": 1.0971109682880568e-05, + "loss": 0.0599, + "step": 58625 + }, + { + "epoch": 2.74, + "learning_rate": 1.0970325897825782e-05, + "loss": 0.0407, + "step": 58630 + }, + { + "epoch": 2.74, + "learning_rate": 1.0969542112770994e-05, + "loss": 0.0282, + "step": 58635 + }, + { + "epoch": 2.74, + "learning_rate": 1.096875832771621e-05, + "loss": 0.1213, + "step": 58640 + }, + { + "epoch": 2.74, + "learning_rate": 1.0967974542661422e-05, + "loss": 0.0466, + "step": 58645 + }, + { + "epoch": 2.74, + "learning_rate": 1.0967190757606634e-05, + "loss": 0.1724, + "step": 58650 + }, + { + "epoch": 2.74, + "learning_rate": 1.0966406972551848e-05, + "loss": 0.127, + "step": 58655 + }, + { + "epoch": 2.74, + "learning_rate": 1.096562318749706e-05, + "loss": 0.1452, + "step": 58660 + }, + { + "epoch": 2.74, + "learning_rate": 1.0964839402442276e-05, + "loss": 0.1929, + "step": 58665 + }, + { + "epoch": 2.74, + "learning_rate": 1.0964055617387488e-05, + "loss": 0.0849, + "step": 58670 + }, + { + "epoch": 2.74, + "learning_rate": 1.0963271832332702e-05, + "loss": 0.0543, + "step": 58675 + }, + { + "epoch": 2.74, + "learning_rate": 1.0962488047277916e-05, + "loss": 0.0736, + "step": 58680 + }, + { + "epoch": 2.74, + "learning_rate": 1.096170426222313e-05, + "loss": 0.0518, + "step": 58685 + }, + { + "epoch": 2.74, + "learning_rate": 1.0960920477168342e-05, + "loss": 0.0852, + "step": 58690 + }, + { + "epoch": 2.74, + "learning_rate": 1.0960136692113556e-05, + "loss": 0.1975, + "step": 58695 + }, + { + "epoch": 2.74, + "learning_rate": 1.095935290705877e-05, + "loss": 0.1414, + "step": 58700 + }, + { + "epoch": 2.74, + "learning_rate": 1.0958569122003984e-05, + "loss": 0.2228, + "step": 58705 + }, + { + "epoch": 2.74, + "learning_rate": 1.0957785336949196e-05, + "loss": 0.1912, + "step": 58710 + }, + { + "epoch": 2.74, + "learning_rate": 1.0957001551894408e-05, + "loss": 0.2865, + "step": 58715 + }, + { + "epoch": 2.74, + "learning_rate": 1.0956217766839624e-05, + "loss": 0.0588, + "step": 58720 + }, + { + "epoch": 2.74, + "learning_rate": 1.0955433981784836e-05, + "loss": 0.0375, + "step": 58725 + }, + { + "epoch": 2.74, + "learning_rate": 1.095465019673005e-05, + "loss": 0.1023, + "step": 58730 + }, + { + "epoch": 2.74, + "learning_rate": 1.0953866411675262e-05, + "loss": 0.0744, + "step": 58735 + }, + { + "epoch": 2.74, + "learning_rate": 1.0953082626620478e-05, + "loss": 0.114, + "step": 58740 + }, + { + "epoch": 2.74, + "learning_rate": 1.095229884156569e-05, + "loss": 0.1044, + "step": 58745 + }, + { + "epoch": 2.74, + "learning_rate": 1.0951515056510904e-05, + "loss": 0.1161, + "step": 58750 + }, + { + "epoch": 2.74, + "learning_rate": 1.0950731271456116e-05, + "loss": 0.1975, + "step": 58755 + }, + { + "epoch": 2.74, + "learning_rate": 1.0949947486401331e-05, + "loss": 0.2124, + "step": 58760 + }, + { + "epoch": 2.74, + "learning_rate": 1.0949163701346544e-05, + "loss": 0.4417, + "step": 58765 + }, + { + "epoch": 2.74, + "learning_rate": 1.0948379916291758e-05, + "loss": 0.0748, + "step": 58770 + }, + { + "epoch": 2.74, + "learning_rate": 1.094759613123697e-05, + "loss": 0.0129, + "step": 58775 + }, + { + "epoch": 2.74, + "learning_rate": 1.0946812346182184e-05, + "loss": 0.0311, + "step": 58780 + }, + { + "epoch": 2.74, + "learning_rate": 1.0946028561127398e-05, + "loss": 0.1198, + "step": 58785 + }, + { + "epoch": 2.74, + "learning_rate": 1.094524477607261e-05, + "loss": 0.0663, + "step": 58790 + }, + { + "epoch": 2.74, + "learning_rate": 1.0944460991017824e-05, + "loss": 0.0381, + "step": 58795 + }, + { + "epoch": 2.74, + "learning_rate": 1.0943677205963038e-05, + "loss": 0.1238, + "step": 58800 + }, + { + "epoch": 2.74, + "learning_rate": 1.0942893420908252e-05, + "loss": 0.213, + "step": 58805 + }, + { + "epoch": 2.74, + "learning_rate": 1.0942109635853464e-05, + "loss": 0.1942, + "step": 58810 + }, + { + "epoch": 2.74, + "learning_rate": 1.094132585079868e-05, + "loss": 0.349, + "step": 58815 + }, + { + "epoch": 2.74, + "learning_rate": 1.0940542065743892e-05, + "loss": 0.0369, + "step": 58820 + }, + { + "epoch": 2.74, + "learning_rate": 1.0939758280689105e-05, + "loss": 0.0544, + "step": 58825 + }, + { + "epoch": 2.75, + "learning_rate": 1.0938974495634318e-05, + "loss": 0.0355, + "step": 58830 + }, + { + "epoch": 2.75, + "learning_rate": 1.0938190710579533e-05, + "loss": 0.0572, + "step": 58835 + }, + { + "epoch": 2.75, + "learning_rate": 1.0937406925524746e-05, + "loss": 0.1182, + "step": 58840 + }, + { + "epoch": 2.75, + "learning_rate": 1.0936623140469958e-05, + "loss": 0.1305, + "step": 58845 + }, + { + "epoch": 2.75, + "learning_rate": 1.0935839355415172e-05, + "loss": 0.1772, + "step": 58850 + }, + { + "epoch": 2.75, + "learning_rate": 1.0935055570360384e-05, + "loss": 0.1283, + "step": 58855 + }, + { + "epoch": 2.75, + "learning_rate": 1.09342717853056e-05, + "loss": 0.1314, + "step": 58860 + }, + { + "epoch": 2.75, + "learning_rate": 1.0933488000250812e-05, + "loss": 0.2568, + "step": 58865 + }, + { + "epoch": 2.75, + "learning_rate": 1.0932704215196026e-05, + "loss": 0.0676, + "step": 58870 + }, + { + "epoch": 2.75, + "learning_rate": 1.0931920430141238e-05, + "loss": 0.0313, + "step": 58875 + }, + { + "epoch": 2.75, + "learning_rate": 1.0931136645086453e-05, + "loss": 0.0603, + "step": 58880 + }, + { + "epoch": 2.75, + "learning_rate": 1.0930352860031666e-05, + "loss": 0.0634, + "step": 58885 + }, + { + "epoch": 2.75, + "learning_rate": 1.092956907497688e-05, + "loss": 0.0677, + "step": 58890 + }, + { + "epoch": 2.75, + "learning_rate": 1.0928785289922092e-05, + "loss": 0.0863, + "step": 58895 + }, + { + "epoch": 2.75, + "learning_rate": 1.0928001504867307e-05, + "loss": 0.063, + "step": 58900 + }, + { + "epoch": 2.75, + "learning_rate": 1.092721771981252e-05, + "loss": 0.17, + "step": 58905 + }, + { + "epoch": 2.75, + "learning_rate": 1.0926433934757732e-05, + "loss": 0.1749, + "step": 58910 + }, + { + "epoch": 2.75, + "learning_rate": 1.0925650149702947e-05, + "loss": 0.2382, + "step": 58915 + }, + { + "epoch": 2.75, + "learning_rate": 1.092486636464816e-05, + "loss": 0.0386, + "step": 58920 + }, + { + "epoch": 2.75, + "learning_rate": 1.0924082579593373e-05, + "loss": 0.0822, + "step": 58925 + }, + { + "epoch": 2.75, + "learning_rate": 1.0923298794538586e-05, + "loss": 0.0393, + "step": 58930 + }, + { + "epoch": 2.75, + "learning_rate": 1.0922515009483801e-05, + "loss": 0.0475, + "step": 58935 + }, + { + "epoch": 2.75, + "learning_rate": 1.0921731224429013e-05, + "loss": 0.0143, + "step": 58940 + }, + { + "epoch": 2.75, + "learning_rate": 1.0920947439374227e-05, + "loss": 0.1933, + "step": 58945 + }, + { + "epoch": 2.75, + "learning_rate": 1.092016365431944e-05, + "loss": 0.1644, + "step": 58950 + }, + { + "epoch": 2.75, + "learning_rate": 1.0919379869264655e-05, + "loss": 0.1502, + "step": 58955 + }, + { + "epoch": 2.75, + "learning_rate": 1.0918596084209867e-05, + "loss": 0.1974, + "step": 58960 + }, + { + "epoch": 2.75, + "learning_rate": 1.0917812299155081e-05, + "loss": 0.2703, + "step": 58965 + }, + { + "epoch": 2.75, + "learning_rate": 1.0917028514100293e-05, + "loss": 0.0824, + "step": 58970 + }, + { + "epoch": 2.75, + "learning_rate": 1.0916244729045506e-05, + "loss": 0.0104, + "step": 58975 + }, + { + "epoch": 2.75, + "learning_rate": 1.0915460943990721e-05, + "loss": 0.0975, + "step": 58980 + }, + { + "epoch": 2.75, + "learning_rate": 1.0914677158935934e-05, + "loss": 0.0575, + "step": 58985 + }, + { + "epoch": 2.75, + "learning_rate": 1.0913893373881147e-05, + "loss": 0.059, + "step": 58990 + }, + { + "epoch": 2.75, + "learning_rate": 1.0913109588826361e-05, + "loss": 0.1536, + "step": 58995 + }, + { + "epoch": 2.75, + "learning_rate": 1.0912325803771575e-05, + "loss": 0.1253, + "step": 59000 + }, + { + "epoch": 2.75, + "learning_rate": 1.0911542018716787e-05, + "loss": 0.198, + "step": 59005 + }, + { + "epoch": 2.75, + "learning_rate": 1.0910758233662001e-05, + "loss": 0.2212, + "step": 59010 + }, + { + "epoch": 2.75, + "learning_rate": 1.0909974448607215e-05, + "loss": 0.3376, + "step": 59015 + }, + { + "epoch": 2.75, + "learning_rate": 1.090919066355243e-05, + "loss": 0.0407, + "step": 59020 + }, + { + "epoch": 2.75, + "learning_rate": 1.0908406878497641e-05, + "loss": 0.0796, + "step": 59025 + }, + { + "epoch": 2.75, + "learning_rate": 1.0907623093442857e-05, + "loss": 0.0662, + "step": 59030 + }, + { + "epoch": 2.75, + "learning_rate": 1.090683930838807e-05, + "loss": 0.0887, + "step": 59035 + }, + { + "epoch": 2.75, + "learning_rate": 1.0906055523333281e-05, + "loss": 0.0557, + "step": 59040 + }, + { + "epoch": 2.76, + "learning_rate": 1.0905271738278495e-05, + "loss": 0.1039, + "step": 59045 + }, + { + "epoch": 2.76, + "learning_rate": 1.0904487953223708e-05, + "loss": 0.1408, + "step": 59050 + }, + { + "epoch": 2.76, + "learning_rate": 1.0903704168168923e-05, + "loss": 0.1745, + "step": 59055 + }, + { + "epoch": 2.76, + "learning_rate": 1.0902920383114135e-05, + "loss": 0.2041, + "step": 59060 + }, + { + "epoch": 2.76, + "learning_rate": 1.090213659805935e-05, + "loss": 0.3081, + "step": 59065 + }, + { + "epoch": 2.76, + "learning_rate": 1.0901352813004561e-05, + "loss": 0.0728, + "step": 59070 + }, + { + "epoch": 2.76, + "learning_rate": 1.0900569027949777e-05, + "loss": 0.0263, + "step": 59075 + }, + { + "epoch": 2.76, + "learning_rate": 1.089978524289499e-05, + "loss": 0.0564, + "step": 59080 + }, + { + "epoch": 2.76, + "learning_rate": 1.0899001457840203e-05, + "loss": 0.0237, + "step": 59085 + }, + { + "epoch": 2.76, + "learning_rate": 1.0898217672785415e-05, + "loss": 0.1018, + "step": 59090 + }, + { + "epoch": 2.76, + "learning_rate": 1.0897433887730631e-05, + "loss": 0.1151, + "step": 59095 + }, + { + "epoch": 2.76, + "learning_rate": 1.0896650102675843e-05, + "loss": 0.147, + "step": 59100 + }, + { + "epoch": 2.76, + "learning_rate": 1.0895866317621055e-05, + "loss": 0.1815, + "step": 59105 + }, + { + "epoch": 2.76, + "learning_rate": 1.089508253256627e-05, + "loss": 0.1446, + "step": 59110 + }, + { + "epoch": 2.76, + "learning_rate": 1.0894298747511483e-05, + "loss": 0.2181, + "step": 59115 + }, + { + "epoch": 2.76, + "learning_rate": 1.0893514962456697e-05, + "loss": 0.0203, + "step": 59120 + }, + { + "epoch": 2.76, + "learning_rate": 1.089273117740191e-05, + "loss": 0.0237, + "step": 59125 + }, + { + "epoch": 2.76, + "learning_rate": 1.0891947392347125e-05, + "loss": 0.0479, + "step": 59130 + }, + { + "epoch": 2.76, + "learning_rate": 1.0891163607292337e-05, + "loss": 0.0397, + "step": 59135 + }, + { + "epoch": 2.76, + "learning_rate": 1.0890379822237551e-05, + "loss": 0.0788, + "step": 59140 + }, + { + "epoch": 2.76, + "learning_rate": 1.0889596037182763e-05, + "loss": 0.1114, + "step": 59145 + }, + { + "epoch": 2.76, + "learning_rate": 1.0888812252127979e-05, + "loss": 0.1694, + "step": 59150 + }, + { + "epoch": 2.76, + "learning_rate": 1.0888028467073191e-05, + "loss": 0.2413, + "step": 59155 + }, + { + "epoch": 2.76, + "learning_rate": 1.0887244682018405e-05, + "loss": 0.2377, + "step": 59160 + }, + { + "epoch": 2.76, + "learning_rate": 1.0886460896963617e-05, + "loss": 0.2335, + "step": 59165 + }, + { + "epoch": 2.76, + "learning_rate": 1.088567711190883e-05, + "loss": 0.0697, + "step": 59170 + }, + { + "epoch": 2.76, + "learning_rate": 1.0884893326854045e-05, + "loss": 0.0551, + "step": 59175 + }, + { + "epoch": 2.76, + "learning_rate": 1.0884109541799257e-05, + "loss": 0.0589, + "step": 59180 + }, + { + "epoch": 2.76, + "learning_rate": 1.0883325756744471e-05, + "loss": 0.0893, + "step": 59185 + }, + { + "epoch": 2.76, + "learning_rate": 1.0882541971689683e-05, + "loss": 0.1181, + "step": 59190 + }, + { + "epoch": 2.76, + "learning_rate": 1.0881758186634899e-05, + "loss": 0.0627, + "step": 59195 + }, + { + "epoch": 2.76, + "learning_rate": 1.0880974401580111e-05, + "loss": 0.0549, + "step": 59200 + }, + { + "epoch": 2.76, + "learning_rate": 1.0880190616525325e-05, + "loss": 0.1895, + "step": 59205 + }, + { + "epoch": 2.76, + "learning_rate": 1.0879406831470539e-05, + "loss": 0.1951, + "step": 59210 + }, + { + "epoch": 2.76, + "learning_rate": 1.0878623046415753e-05, + "loss": 0.2903, + "step": 59215 + }, + { + "epoch": 2.76, + "learning_rate": 1.0877839261360965e-05, + "loss": 0.0543, + "step": 59220 + }, + { + "epoch": 2.76, + "learning_rate": 1.0877055476306179e-05, + "loss": 0.0562, + "step": 59225 + }, + { + "epoch": 2.76, + "learning_rate": 1.0876271691251393e-05, + "loss": 0.0212, + "step": 59230 + }, + { + "epoch": 2.76, + "learning_rate": 1.0875487906196605e-05, + "loss": 0.0372, + "step": 59235 + }, + { + "epoch": 2.76, + "learning_rate": 1.0874704121141819e-05, + "loss": 0.0557, + "step": 59240 + }, + { + "epoch": 2.76, + "learning_rate": 1.0873920336087031e-05, + "loss": 0.0941, + "step": 59245 + }, + { + "epoch": 2.76, + "learning_rate": 1.0873136551032247e-05, + "loss": 0.1335, + "step": 59250 + }, + { + "epoch": 2.76, + "learning_rate": 1.0872352765977459e-05, + "loss": 0.1388, + "step": 59255 + }, + { + "epoch": 2.77, + "learning_rate": 1.0871568980922673e-05, + "loss": 0.3518, + "step": 59260 + }, + { + "epoch": 2.77, + "learning_rate": 1.0870785195867885e-05, + "loss": 0.2424, + "step": 59265 + }, + { + "epoch": 2.77, + "learning_rate": 1.08700014108131e-05, + "loss": 0.0594, + "step": 59270 + }, + { + "epoch": 2.77, + "learning_rate": 1.0869217625758313e-05, + "loss": 0.0566, + "step": 59275 + }, + { + "epoch": 2.77, + "learning_rate": 1.0868433840703527e-05, + "loss": 0.0809, + "step": 59280 + }, + { + "epoch": 2.77, + "learning_rate": 1.0867650055648739e-05, + "loss": 0.0813, + "step": 59285 + }, + { + "epoch": 2.77, + "learning_rate": 1.0866866270593955e-05, + "loss": 0.1093, + "step": 59290 + }, + { + "epoch": 2.77, + "learning_rate": 1.0866082485539167e-05, + "loss": 0.0765, + "step": 59295 + }, + { + "epoch": 2.77, + "learning_rate": 1.0865298700484379e-05, + "loss": 0.1189, + "step": 59300 + }, + { + "epoch": 2.77, + "learning_rate": 1.0864514915429593e-05, + "loss": 0.2332, + "step": 59305 + }, + { + "epoch": 2.77, + "learning_rate": 1.0863731130374807e-05, + "loss": 0.2367, + "step": 59310 + }, + { + "epoch": 2.77, + "learning_rate": 1.086294734532002e-05, + "loss": 0.3552, + "step": 59315 + }, + { + "epoch": 2.77, + "learning_rate": 1.0862163560265233e-05, + "loss": 0.0216, + "step": 59320 + }, + { + "epoch": 2.77, + "learning_rate": 1.0861379775210447e-05, + "loss": 0.0542, + "step": 59325 + }, + { + "epoch": 2.77, + "learning_rate": 1.086059599015566e-05, + "loss": 0.0626, + "step": 59330 + }, + { + "epoch": 2.77, + "learning_rate": 1.0859812205100875e-05, + "loss": 0.0793, + "step": 59335 + }, + { + "epoch": 2.77, + "learning_rate": 1.0859028420046087e-05, + "loss": 0.0943, + "step": 59340 + }, + { + "epoch": 2.77, + "learning_rate": 1.0858244634991303e-05, + "loss": 0.1106, + "step": 59345 + }, + { + "epoch": 2.77, + "learning_rate": 1.0857460849936515e-05, + "loss": 0.1345, + "step": 59350 + }, + { + "epoch": 2.77, + "learning_rate": 1.0856677064881729e-05, + "loss": 0.2081, + "step": 59355 + }, + { + "epoch": 2.77, + "learning_rate": 1.0855893279826941e-05, + "loss": 0.3029, + "step": 59360 + }, + { + "epoch": 2.77, + "learning_rate": 1.0855109494772153e-05, + "loss": 0.2904, + "step": 59365 + }, + { + "epoch": 2.77, + "learning_rate": 1.0854325709717369e-05, + "loss": 0.0358, + "step": 59370 + }, + { + "epoch": 2.77, + "learning_rate": 1.0853541924662581e-05, + "loss": 0.0264, + "step": 59375 + }, + { + "epoch": 2.77, + "learning_rate": 1.0852758139607795e-05, + "loss": 0.0394, + "step": 59380 + }, + { + "epoch": 2.77, + "learning_rate": 1.0851974354553007e-05, + "loss": 0.0479, + "step": 59385 + }, + { + "epoch": 2.77, + "learning_rate": 1.0851190569498223e-05, + "loss": 0.0999, + "step": 59390 + }, + { + "epoch": 2.77, + "learning_rate": 1.0850406784443435e-05, + "loss": 0.0661, + "step": 59395 + }, + { + "epoch": 2.77, + "learning_rate": 1.0849622999388649e-05, + "loss": 0.0937, + "step": 59400 + }, + { + "epoch": 2.77, + "learning_rate": 1.0848839214333861e-05, + "loss": 0.1815, + "step": 59405 + }, + { + "epoch": 2.77, + "learning_rate": 1.0848055429279077e-05, + "loss": 0.3394, + "step": 59410 + }, + { + "epoch": 2.77, + "learning_rate": 1.0847271644224289e-05, + "loss": 0.3475, + "step": 59415 + }, + { + "epoch": 2.77, + "learning_rate": 1.0846487859169503e-05, + "loss": 0.0382, + "step": 59420 + }, + { + "epoch": 2.77, + "learning_rate": 1.0845704074114715e-05, + "loss": 0.0969, + "step": 59425 + }, + { + "epoch": 2.77, + "learning_rate": 1.0844920289059929e-05, + "loss": 0.0548, + "step": 59430 + }, + { + "epoch": 2.77, + "learning_rate": 1.0844136504005143e-05, + "loss": 0.0841, + "step": 59435 + }, + { + "epoch": 2.77, + "learning_rate": 1.0843352718950355e-05, + "loss": 0.0781, + "step": 59440 + }, + { + "epoch": 2.77, + "learning_rate": 1.084256893389557e-05, + "loss": 0.1071, + "step": 59445 + }, + { + "epoch": 2.77, + "learning_rate": 1.0841785148840783e-05, + "loss": 0.1219, + "step": 59450 + }, + { + "epoch": 2.77, + "learning_rate": 1.0841001363785997e-05, + "loss": 0.1605, + "step": 59455 + }, + { + "epoch": 2.77, + "learning_rate": 1.0840217578731209e-05, + "loss": 0.3221, + "step": 59460 + }, + { + "epoch": 2.77, + "learning_rate": 1.0839433793676424e-05, + "loss": 0.2631, + "step": 59465 + }, + { + "epoch": 2.77, + "learning_rate": 1.0838650008621637e-05, + "loss": 0.0423, + "step": 59470 + }, + { + "epoch": 2.78, + "learning_rate": 1.083786622356685e-05, + "loss": 0.0269, + "step": 59475 + }, + { + "epoch": 2.78, + "learning_rate": 1.0837082438512063e-05, + "loss": 0.0213, + "step": 59480 + }, + { + "epoch": 2.78, + "learning_rate": 1.0836298653457278e-05, + "loss": 0.0742, + "step": 59485 + }, + { + "epoch": 2.78, + "learning_rate": 1.083551486840249e-05, + "loss": 0.0317, + "step": 59490 + }, + { + "epoch": 2.78, + "learning_rate": 1.0834731083347703e-05, + "loss": 0.1428, + "step": 59495 + }, + { + "epoch": 2.78, + "learning_rate": 1.0833947298292917e-05, + "loss": 0.1145, + "step": 59500 + }, + { + "epoch": 2.78, + "learning_rate": 1.0833163513238129e-05, + "loss": 0.1002, + "step": 59505 + }, + { + "epoch": 2.78, + "learning_rate": 1.0832379728183344e-05, + "loss": 0.2298, + "step": 59510 + }, + { + "epoch": 2.78, + "learning_rate": 1.0831595943128557e-05, + "loss": 0.3871, + "step": 59515 + }, + { + "epoch": 2.78, + "learning_rate": 1.083081215807377e-05, + "loss": 0.03, + "step": 59520 + }, + { + "epoch": 2.78, + "learning_rate": 1.0830028373018985e-05, + "loss": 0.0571, + "step": 59525 + }, + { + "epoch": 2.78, + "learning_rate": 1.0829244587964198e-05, + "loss": 0.0582, + "step": 59530 + }, + { + "epoch": 2.78, + "learning_rate": 1.082846080290941e-05, + "loss": 0.0255, + "step": 59535 + }, + { + "epoch": 2.78, + "learning_rate": 1.0827677017854625e-05, + "loss": 0.0613, + "step": 59540 + }, + { + "epoch": 2.78, + "learning_rate": 1.0826893232799838e-05, + "loss": 0.0576, + "step": 59545 + }, + { + "epoch": 2.78, + "learning_rate": 1.0826109447745052e-05, + "loss": 0.172, + "step": 59550 + }, + { + "epoch": 2.78, + "learning_rate": 1.0825325662690265e-05, + "loss": 0.1161, + "step": 59555 + }, + { + "epoch": 2.78, + "learning_rate": 1.0824541877635477e-05, + "loss": 0.3, + "step": 59560 + }, + { + "epoch": 2.78, + "learning_rate": 1.0823758092580692e-05, + "loss": 0.3196, + "step": 59565 + }, + { + "epoch": 2.78, + "learning_rate": 1.0822974307525905e-05, + "loss": 0.0401, + "step": 59570 + }, + { + "epoch": 2.78, + "learning_rate": 1.0822190522471118e-05, + "loss": 0.0501, + "step": 59575 + }, + { + "epoch": 2.78, + "learning_rate": 1.082140673741633e-05, + "loss": 0.03, + "step": 59580 + }, + { + "epoch": 2.78, + "learning_rate": 1.0820622952361546e-05, + "loss": 0.1153, + "step": 59585 + }, + { + "epoch": 2.78, + "learning_rate": 1.0819839167306759e-05, + "loss": 0.095, + "step": 59590 + }, + { + "epoch": 2.78, + "learning_rate": 1.0819055382251972e-05, + "loss": 0.0778, + "step": 59595 + }, + { + "epoch": 2.78, + "learning_rate": 1.0818271597197185e-05, + "loss": 0.0841, + "step": 59600 + }, + { + "epoch": 2.78, + "learning_rate": 1.08174878121424e-05, + "loss": 0.1166, + "step": 59605 + }, + { + "epoch": 2.78, + "learning_rate": 1.0816704027087612e-05, + "loss": 0.202, + "step": 59610 + }, + { + "epoch": 2.78, + "learning_rate": 1.0815920242032826e-05, + "loss": 0.2211, + "step": 59615 + }, + { + "epoch": 2.78, + "learning_rate": 1.0815136456978039e-05, + "loss": 0.0165, + "step": 59620 + }, + { + "epoch": 2.78, + "learning_rate": 1.0814352671923252e-05, + "loss": 0.0745, + "step": 59625 + }, + { + "epoch": 2.78, + "learning_rate": 1.0813568886868466e-05, + "loss": 0.0653, + "step": 59630 + }, + { + "epoch": 2.78, + "learning_rate": 1.0812785101813679e-05, + "loss": 0.0922, + "step": 59635 + }, + { + "epoch": 2.78, + "learning_rate": 1.0812001316758892e-05, + "loss": 0.1051, + "step": 59640 + }, + { + "epoch": 2.78, + "learning_rate": 1.0811217531704106e-05, + "loss": 0.1219, + "step": 59645 + }, + { + "epoch": 2.78, + "learning_rate": 1.081043374664932e-05, + "loss": 0.2201, + "step": 59650 + }, + { + "epoch": 2.78, + "learning_rate": 1.0809649961594533e-05, + "loss": 0.1585, + "step": 59655 + }, + { + "epoch": 2.78, + "learning_rate": 1.0808866176539748e-05, + "loss": 0.2065, + "step": 59660 + }, + { + "epoch": 2.78, + "learning_rate": 1.080808239148496e-05, + "loss": 0.3347, + "step": 59665 + }, + { + "epoch": 2.78, + "learning_rate": 1.0807298606430174e-05, + "loss": 0.0825, + "step": 59670 + }, + { + "epoch": 2.78, + "learning_rate": 1.0806514821375386e-05, + "loss": 0.0217, + "step": 59675 + }, + { + "epoch": 2.78, + "learning_rate": 1.0805731036320602e-05, + "loss": 0.0387, + "step": 59680 + }, + { + "epoch": 2.78, + "learning_rate": 1.0804947251265814e-05, + "loss": 0.0708, + "step": 59685 + }, + { + "epoch": 2.79, + "learning_rate": 1.0804163466211026e-05, + "loss": 0.1803, + "step": 59690 + }, + { + "epoch": 2.79, + "learning_rate": 1.080337968115624e-05, + "loss": 0.0929, + "step": 59695 + }, + { + "epoch": 2.79, + "learning_rate": 1.0802595896101453e-05, + "loss": 0.0763, + "step": 59700 + }, + { + "epoch": 2.79, + "learning_rate": 1.0801812111046668e-05, + "loss": 0.1241, + "step": 59705 + }, + { + "epoch": 2.79, + "learning_rate": 1.080102832599188e-05, + "loss": 0.2267, + "step": 59710 + }, + { + "epoch": 2.79, + "learning_rate": 1.0800244540937094e-05, + "loss": 0.1755, + "step": 59715 + }, + { + "epoch": 2.79, + "learning_rate": 1.0799460755882307e-05, + "loss": 0.0372, + "step": 59720 + }, + { + "epoch": 2.79, + "learning_rate": 1.0798676970827522e-05, + "loss": 0.0253, + "step": 59725 + }, + { + "epoch": 2.79, + "learning_rate": 1.0797893185772734e-05, + "loss": 0.1087, + "step": 59730 + }, + { + "epoch": 2.79, + "learning_rate": 1.0797109400717948e-05, + "loss": 0.0811, + "step": 59735 + }, + { + "epoch": 2.79, + "learning_rate": 1.079632561566316e-05, + "loss": 0.0563, + "step": 59740 + }, + { + "epoch": 2.79, + "learning_rate": 1.0795541830608376e-05, + "loss": 0.1129, + "step": 59745 + }, + { + "epoch": 2.79, + "learning_rate": 1.0794758045553588e-05, + "loss": 0.1649, + "step": 59750 + }, + { + "epoch": 2.79, + "learning_rate": 1.07939742604988e-05, + "loss": 0.1641, + "step": 59755 + }, + { + "epoch": 2.79, + "learning_rate": 1.0793190475444016e-05, + "loss": 0.2485, + "step": 59760 + }, + { + "epoch": 2.79, + "learning_rate": 1.0792406690389228e-05, + "loss": 0.2066, + "step": 59765 + }, + { + "epoch": 2.79, + "learning_rate": 1.0791622905334442e-05, + "loss": 0.0445, + "step": 59770 + }, + { + "epoch": 2.79, + "learning_rate": 1.0790839120279654e-05, + "loss": 0.0544, + "step": 59775 + }, + { + "epoch": 2.79, + "learning_rate": 1.079005533522487e-05, + "loss": 0.0232, + "step": 59780 + }, + { + "epoch": 2.79, + "learning_rate": 1.0789271550170082e-05, + "loss": 0.0835, + "step": 59785 + }, + { + "epoch": 2.79, + "learning_rate": 1.0788487765115296e-05, + "loss": 0.1308, + "step": 59790 + }, + { + "epoch": 2.79, + "learning_rate": 1.0787703980060508e-05, + "loss": 0.0412, + "step": 59795 + }, + { + "epoch": 2.79, + "learning_rate": 1.0786920195005724e-05, + "loss": 0.2067, + "step": 59800 + }, + { + "epoch": 2.79, + "learning_rate": 1.0786136409950936e-05, + "loss": 0.1801, + "step": 59805 + }, + { + "epoch": 2.79, + "learning_rate": 1.078535262489615e-05, + "loss": 0.2654, + "step": 59810 + }, + { + "epoch": 2.79, + "learning_rate": 1.0784568839841362e-05, + "loss": 0.2404, + "step": 59815 + }, + { + "epoch": 2.79, + "learning_rate": 1.0783785054786574e-05, + "loss": 0.0479, + "step": 59820 + }, + { + "epoch": 2.79, + "learning_rate": 1.078300126973179e-05, + "loss": 0.0855, + "step": 59825 + }, + { + "epoch": 2.79, + "learning_rate": 1.0782217484677002e-05, + "loss": 0.0729, + "step": 59830 + }, + { + "epoch": 2.79, + "learning_rate": 1.0781433699622216e-05, + "loss": 0.0526, + "step": 59835 + }, + { + "epoch": 2.79, + "learning_rate": 1.078064991456743e-05, + "loss": 0.0608, + "step": 59840 + }, + { + "epoch": 2.79, + "learning_rate": 1.0779866129512644e-05, + "loss": 0.0754, + "step": 59845 + }, + { + "epoch": 2.79, + "learning_rate": 1.0779082344457856e-05, + "loss": 0.1885, + "step": 59850 + }, + { + "epoch": 2.79, + "learning_rate": 1.077829855940307e-05, + "loss": 0.1078, + "step": 59855 + }, + { + "epoch": 2.79, + "learning_rate": 1.0777514774348284e-05, + "loss": 0.2044, + "step": 59860 + }, + { + "epoch": 2.79, + "learning_rate": 1.0776730989293498e-05, + "loss": 0.3295, + "step": 59865 + }, + { + "epoch": 2.79, + "learning_rate": 1.077594720423871e-05, + "loss": 0.0507, + "step": 59870 + }, + { + "epoch": 2.79, + "learning_rate": 1.0775163419183926e-05, + "loss": 0.0369, + "step": 59875 + }, + { + "epoch": 2.79, + "learning_rate": 1.0774379634129138e-05, + "loss": 0.0298, + "step": 59880 + }, + { + "epoch": 2.79, + "learning_rate": 1.077359584907435e-05, + "loss": 0.0492, + "step": 59885 + }, + { + "epoch": 2.79, + "learning_rate": 1.0772812064019564e-05, + "loss": 0.0656, + "step": 59890 + }, + { + "epoch": 2.79, + "learning_rate": 1.0772028278964776e-05, + "loss": 0.0895, + "step": 59895 + }, + { + "epoch": 2.8, + "learning_rate": 1.0771244493909992e-05, + "loss": 0.1175, + "step": 59900 + }, + { + "epoch": 2.8, + "learning_rate": 1.0770460708855204e-05, + "loss": 0.1537, + "step": 59905 + }, + { + "epoch": 2.8, + "learning_rate": 1.0769676923800418e-05, + "loss": 0.3598, + "step": 59910 + }, + { + "epoch": 2.8, + "learning_rate": 1.076889313874563e-05, + "loss": 0.2923, + "step": 59915 + }, + { + "epoch": 2.8, + "learning_rate": 1.0768109353690846e-05, + "loss": 0.0608, + "step": 59920 + }, + { + "epoch": 2.8, + "learning_rate": 1.0767325568636058e-05, + "loss": 0.0519, + "step": 59925 + }, + { + "epoch": 2.8, + "learning_rate": 1.0766541783581272e-05, + "loss": 0.0307, + "step": 59930 + }, + { + "epoch": 2.8, + "learning_rate": 1.0765757998526484e-05, + "loss": 0.1519, + "step": 59935 + }, + { + "epoch": 2.8, + "learning_rate": 1.07649742134717e-05, + "loss": 0.0559, + "step": 59940 + }, + { + "epoch": 2.8, + "learning_rate": 1.0764190428416912e-05, + "loss": 0.1429, + "step": 59945 + }, + { + "epoch": 2.8, + "learning_rate": 1.0763406643362124e-05, + "loss": 0.085, + "step": 59950 + }, + { + "epoch": 2.8, + "learning_rate": 1.0762622858307338e-05, + "loss": 0.2346, + "step": 59955 + }, + { + "epoch": 2.8, + "learning_rate": 1.0761839073252552e-05, + "loss": 0.2619, + "step": 59960 + }, + { + "epoch": 2.8, + "learning_rate": 1.0761055288197766e-05, + "loss": 0.1942, + "step": 59965 + }, + { + "epoch": 2.8, + "learning_rate": 1.0760271503142978e-05, + "loss": 0.0377, + "step": 59970 + }, + { + "epoch": 2.8, + "learning_rate": 1.0759487718088194e-05, + "loss": 0.0325, + "step": 59975 + }, + { + "epoch": 2.8, + "learning_rate": 1.0758703933033406e-05, + "loss": 0.0534, + "step": 59980 + }, + { + "epoch": 2.8, + "learning_rate": 1.075792014797862e-05, + "loss": 0.0849, + "step": 59985 + }, + { + "epoch": 2.8, + "learning_rate": 1.0757136362923832e-05, + "loss": 0.0567, + "step": 59990 + }, + { + "epoch": 2.8, + "learning_rate": 1.0756352577869048e-05, + "loss": 0.0723, + "step": 59995 + }, + { + "epoch": 2.8, + "learning_rate": 1.075556879281426e-05, + "loss": 0.1649, + "step": 60000 + }, + { + "epoch": 2.8, + "learning_rate": 1.0754785007759474e-05, + "loss": 0.1065, + "step": 60005 + }, + { + "epoch": 2.8, + "learning_rate": 1.0754001222704686e-05, + "loss": 0.0981, + "step": 60010 + }, + { + "epoch": 2.8, + "learning_rate": 1.0753217437649898e-05, + "loss": 0.3571, + "step": 60015 + }, + { + "epoch": 2.8, + "learning_rate": 1.0752433652595114e-05, + "loss": 0.0691, + "step": 60020 + }, + { + "epoch": 2.8, + "learning_rate": 1.0751649867540326e-05, + "loss": 0.0567, + "step": 60025 + }, + { + "epoch": 2.8, + "learning_rate": 1.075086608248554e-05, + "loss": 0.0616, + "step": 60030 + }, + { + "epoch": 2.8, + "learning_rate": 1.0750082297430752e-05, + "loss": 0.0686, + "step": 60035 + }, + { + "epoch": 2.8, + "learning_rate": 1.0749298512375968e-05, + "loss": 0.0687, + "step": 60040 + }, + { + "epoch": 2.8, + "learning_rate": 1.074851472732118e-05, + "loss": 0.1136, + "step": 60045 + }, + { + "epoch": 2.8, + "learning_rate": 1.0747730942266394e-05, + "loss": 0.1254, + "step": 60050 + }, + { + "epoch": 2.8, + "learning_rate": 1.0746947157211606e-05, + "loss": 0.2474, + "step": 60055 + }, + { + "epoch": 2.8, + "learning_rate": 1.0746163372156822e-05, + "loss": 0.1883, + "step": 60060 + }, + { + "epoch": 2.8, + "learning_rate": 1.0745379587102034e-05, + "loss": 0.4343, + "step": 60065 + }, + { + "epoch": 2.8, + "learning_rate": 1.0744595802047248e-05, + "loss": 0.0974, + "step": 60070 + }, + { + "epoch": 2.8, + "learning_rate": 1.0743812016992462e-05, + "loss": 0.0394, + "step": 60075 + }, + { + "epoch": 2.8, + "learning_rate": 1.0743028231937674e-05, + "loss": 0.0907, + "step": 60080 + }, + { + "epoch": 2.8, + "learning_rate": 1.0742244446882888e-05, + "loss": 0.1173, + "step": 60085 + }, + { + "epoch": 2.8, + "learning_rate": 1.07414606618281e-05, + "loss": 0.096, + "step": 60090 + }, + { + "epoch": 2.8, + "learning_rate": 1.0740676876773316e-05, + "loss": 0.1801, + "step": 60095 + }, + { + "epoch": 2.8, + "learning_rate": 1.0739893091718528e-05, + "loss": 0.2232, + "step": 60100 + }, + { + "epoch": 2.8, + "learning_rate": 1.0739109306663742e-05, + "loss": 0.1869, + "step": 60105 + }, + { + "epoch": 2.8, + "learning_rate": 1.0738325521608954e-05, + "loss": 0.2526, + "step": 60110 + }, + { + "epoch": 2.81, + "learning_rate": 1.073754173655417e-05, + "loss": 0.1866, + "step": 60115 + }, + { + "epoch": 2.81, + "learning_rate": 1.0736757951499382e-05, + "loss": 0.088, + "step": 60120 + }, + { + "epoch": 2.81, + "learning_rate": 1.0735974166444596e-05, + "loss": 0.0488, + "step": 60125 + }, + { + "epoch": 2.81, + "learning_rate": 1.0735190381389808e-05, + "loss": 0.0262, + "step": 60130 + }, + { + "epoch": 2.81, + "learning_rate": 1.0734406596335023e-05, + "loss": 0.0383, + "step": 60135 + }, + { + "epoch": 2.81, + "learning_rate": 1.0733622811280236e-05, + "loss": 0.1139, + "step": 60140 + }, + { + "epoch": 2.81, + "learning_rate": 1.0732839026225448e-05, + "loss": 0.0991, + "step": 60145 + }, + { + "epoch": 2.81, + "learning_rate": 1.0732055241170662e-05, + "loss": 0.08, + "step": 60150 + }, + { + "epoch": 2.81, + "learning_rate": 1.0731271456115876e-05, + "loss": 0.1972, + "step": 60155 + }, + { + "epoch": 2.81, + "learning_rate": 1.073048767106109e-05, + "loss": 0.157, + "step": 60160 + }, + { + "epoch": 2.81, + "learning_rate": 1.0729703886006302e-05, + "loss": 0.3585, + "step": 60165 + }, + { + "epoch": 2.81, + "learning_rate": 1.0728920100951516e-05, + "loss": 0.0269, + "step": 60170 + }, + { + "epoch": 2.81, + "learning_rate": 1.072813631589673e-05, + "loss": 0.0937, + "step": 60175 + }, + { + "epoch": 2.81, + "learning_rate": 1.0727352530841943e-05, + "loss": 0.0532, + "step": 60180 + }, + { + "epoch": 2.81, + "learning_rate": 1.0726568745787156e-05, + "loss": 0.065, + "step": 60185 + }, + { + "epoch": 2.81, + "learning_rate": 1.0725784960732371e-05, + "loss": 0.1078, + "step": 60190 + }, + { + "epoch": 2.81, + "learning_rate": 1.0725001175677584e-05, + "loss": 0.0757, + "step": 60195 + }, + { + "epoch": 2.81, + "learning_rate": 1.0724217390622797e-05, + "loss": 0.1246, + "step": 60200 + }, + { + "epoch": 2.81, + "learning_rate": 1.072343360556801e-05, + "loss": 0.1834, + "step": 60205 + }, + { + "epoch": 2.81, + "learning_rate": 1.0722649820513222e-05, + "loss": 0.3636, + "step": 60210 + }, + { + "epoch": 2.81, + "learning_rate": 1.0721866035458437e-05, + "loss": 0.2649, + "step": 60215 + }, + { + "epoch": 2.81, + "learning_rate": 1.072108225040365e-05, + "loss": 0.0994, + "step": 60220 + }, + { + "epoch": 2.81, + "learning_rate": 1.0720298465348864e-05, + "loss": 0.0371, + "step": 60225 + }, + { + "epoch": 2.81, + "learning_rate": 1.0719514680294076e-05, + "loss": 0.0265, + "step": 60230 + }, + { + "epoch": 2.81, + "learning_rate": 1.0718730895239291e-05, + "loss": 0.0794, + "step": 60235 + }, + { + "epoch": 2.81, + "learning_rate": 1.0717947110184504e-05, + "loss": 0.0882, + "step": 60240 + }, + { + "epoch": 2.81, + "learning_rate": 1.0717163325129717e-05, + "loss": 0.0821, + "step": 60245 + }, + { + "epoch": 2.81, + "learning_rate": 1.071637954007493e-05, + "loss": 0.0938, + "step": 60250 + }, + { + "epoch": 2.81, + "learning_rate": 1.0715595755020145e-05, + "loss": 0.0977, + "step": 60255 + }, + { + "epoch": 2.81, + "learning_rate": 1.0714811969965358e-05, + "loss": 0.165, + "step": 60260 + }, + { + "epoch": 2.81, + "learning_rate": 1.0714028184910571e-05, + "loss": 0.3129, + "step": 60265 + }, + { + "epoch": 2.81, + "learning_rate": 1.0713244399855784e-05, + "loss": 0.0767, + "step": 60270 + }, + { + "epoch": 2.81, + "learning_rate": 1.0712460614800998e-05, + "loss": 0.0686, + "step": 60275 + }, + { + "epoch": 2.81, + "learning_rate": 1.0711676829746211e-05, + "loss": 0.0351, + "step": 60280 + }, + { + "epoch": 2.81, + "learning_rate": 1.0710893044691424e-05, + "loss": 0.0881, + "step": 60285 + }, + { + "epoch": 2.81, + "learning_rate": 1.071010925963664e-05, + "loss": 0.0737, + "step": 60290 + }, + { + "epoch": 2.81, + "learning_rate": 1.0709325474581851e-05, + "loss": 0.125, + "step": 60295 + }, + { + "epoch": 2.81, + "learning_rate": 1.0708541689527065e-05, + "loss": 0.1237, + "step": 60300 + }, + { + "epoch": 2.81, + "learning_rate": 1.0707757904472278e-05, + "loss": 0.1589, + "step": 60305 + }, + { + "epoch": 2.81, + "learning_rate": 1.0706974119417493e-05, + "loss": 0.1822, + "step": 60310 + }, + { + "epoch": 2.81, + "learning_rate": 1.0706190334362705e-05, + "loss": 0.2297, + "step": 60315 + }, + { + "epoch": 2.81, + "learning_rate": 1.070540654930792e-05, + "loss": 0.0529, + "step": 60320 + }, + { + "epoch": 2.81, + "learning_rate": 1.0704622764253132e-05, + "loss": 0.0159, + "step": 60325 + }, + { + "epoch": 2.82, + "learning_rate": 1.0703838979198347e-05, + "loss": 0.0392, + "step": 60330 + }, + { + "epoch": 2.82, + "learning_rate": 1.070305519414356e-05, + "loss": 0.0952, + "step": 60335 + }, + { + "epoch": 2.82, + "learning_rate": 1.0702271409088772e-05, + "loss": 0.1019, + "step": 60340 + }, + { + "epoch": 2.82, + "learning_rate": 1.0701487624033985e-05, + "loss": 0.1498, + "step": 60345 + }, + { + "epoch": 2.82, + "learning_rate": 1.0700703838979198e-05, + "loss": 0.1059, + "step": 60350 + }, + { + "epoch": 2.82, + "learning_rate": 1.0699920053924413e-05, + "loss": 0.1739, + "step": 60355 + }, + { + "epoch": 2.82, + "learning_rate": 1.0699136268869625e-05, + "loss": 0.2294, + "step": 60360 + }, + { + "epoch": 2.82, + "learning_rate": 1.069835248381484e-05, + "loss": 0.3432, + "step": 60365 + }, + { + "epoch": 2.82, + "learning_rate": 1.0697568698760053e-05, + "loss": 0.0413, + "step": 60370 + }, + { + "epoch": 2.82, + "learning_rate": 1.0696784913705267e-05, + "loss": 0.0329, + "step": 60375 + }, + { + "epoch": 2.82, + "learning_rate": 1.069600112865048e-05, + "loss": 0.0649, + "step": 60380 + }, + { + "epoch": 2.82, + "learning_rate": 1.0695217343595693e-05, + "loss": 0.0388, + "step": 60385 + }, + { + "epoch": 2.82, + "learning_rate": 1.0694433558540907e-05, + "loss": 0.1039, + "step": 60390 + }, + { + "epoch": 2.82, + "learning_rate": 1.0693649773486121e-05, + "loss": 0.0985, + "step": 60395 + }, + { + "epoch": 2.82, + "learning_rate": 1.0692865988431333e-05, + "loss": 0.152, + "step": 60400 + }, + { + "epoch": 2.82, + "learning_rate": 1.0692082203376546e-05, + "loss": 0.1796, + "step": 60405 + }, + { + "epoch": 2.82, + "learning_rate": 1.0691298418321761e-05, + "loss": 0.1797, + "step": 60410 + }, + { + "epoch": 2.82, + "learning_rate": 1.0690514633266973e-05, + "loss": 0.2262, + "step": 60415 + }, + { + "epoch": 2.82, + "learning_rate": 1.0689730848212187e-05, + "loss": 0.0712, + "step": 60420 + }, + { + "epoch": 2.82, + "learning_rate": 1.06889470631574e-05, + "loss": 0.0351, + "step": 60425 + }, + { + "epoch": 2.82, + "learning_rate": 1.0688163278102615e-05, + "loss": 0.0411, + "step": 60430 + }, + { + "epoch": 2.82, + "learning_rate": 1.0687379493047827e-05, + "loss": 0.0386, + "step": 60435 + }, + { + "epoch": 2.82, + "learning_rate": 1.0686595707993041e-05, + "loss": 0.0963, + "step": 60440 + }, + { + "epoch": 2.82, + "learning_rate": 1.0685811922938253e-05, + "loss": 0.1084, + "step": 60445 + }, + { + "epoch": 2.82, + "learning_rate": 1.0685028137883469e-05, + "loss": 0.1291, + "step": 60450 + }, + { + "epoch": 2.82, + "learning_rate": 1.0684244352828681e-05, + "loss": 0.2619, + "step": 60455 + }, + { + "epoch": 2.82, + "learning_rate": 1.0683460567773895e-05, + "loss": 0.2443, + "step": 60460 + }, + { + "epoch": 2.82, + "learning_rate": 1.0682676782719107e-05, + "loss": 0.2697, + "step": 60465 + }, + { + "epoch": 2.82, + "learning_rate": 1.0681892997664321e-05, + "loss": 0.0326, + "step": 60470 + }, + { + "epoch": 2.82, + "learning_rate": 1.0681109212609535e-05, + "loss": 0.0153, + "step": 60475 + }, + { + "epoch": 2.82, + "learning_rate": 1.0680325427554747e-05, + "loss": 0.0179, + "step": 60480 + }, + { + "epoch": 2.82, + "learning_rate": 1.0679541642499961e-05, + "loss": 0.036, + "step": 60485 + }, + { + "epoch": 2.82, + "learning_rate": 1.0678757857445175e-05, + "loss": 0.057, + "step": 60490 + }, + { + "epoch": 2.82, + "learning_rate": 1.0677974072390389e-05, + "loss": 0.0842, + "step": 60495 + }, + { + "epoch": 2.82, + "learning_rate": 1.0677190287335601e-05, + "loss": 0.1821, + "step": 60500 + }, + { + "epoch": 2.82, + "learning_rate": 1.0676406502280817e-05, + "loss": 0.1145, + "step": 60505 + }, + { + "epoch": 2.82, + "learning_rate": 1.0675622717226029e-05, + "loss": 0.1985, + "step": 60510 + }, + { + "epoch": 2.82, + "learning_rate": 1.0674838932171243e-05, + "loss": 0.202, + "step": 60515 + }, + { + "epoch": 2.82, + "learning_rate": 1.0674055147116455e-05, + "loss": 0.0571, + "step": 60520 + }, + { + "epoch": 2.82, + "learning_rate": 1.067327136206167e-05, + "loss": 0.046, + "step": 60525 + }, + { + "epoch": 2.82, + "learning_rate": 1.0672487577006883e-05, + "loss": 0.0769, + "step": 60530 + }, + { + "epoch": 2.82, + "learning_rate": 1.0671703791952095e-05, + "loss": 0.0421, + "step": 60535 + }, + { + "epoch": 2.82, + "learning_rate": 1.0670920006897309e-05, + "loss": 0.121, + "step": 60540 + }, + { + "epoch": 2.83, + "learning_rate": 1.0670136221842521e-05, + "loss": 0.099, + "step": 60545 + }, + { + "epoch": 2.83, + "learning_rate": 1.0669352436787737e-05, + "loss": 0.1578, + "step": 60550 + }, + { + "epoch": 2.83, + "learning_rate": 1.066856865173295e-05, + "loss": 0.1956, + "step": 60555 + }, + { + "epoch": 2.83, + "learning_rate": 1.0667784866678163e-05, + "loss": 0.4068, + "step": 60560 + }, + { + "epoch": 2.83, + "learning_rate": 1.0667001081623375e-05, + "loss": 0.3239, + "step": 60565 + }, + { + "epoch": 2.83, + "learning_rate": 1.0666217296568591e-05, + "loss": 0.0318, + "step": 60570 + }, + { + "epoch": 2.83, + "learning_rate": 1.0665433511513803e-05, + "loss": 0.0266, + "step": 60575 + }, + { + "epoch": 2.83, + "learning_rate": 1.0664649726459017e-05, + "loss": 0.0429, + "step": 60580 + }, + { + "epoch": 2.83, + "learning_rate": 1.066386594140423e-05, + "loss": 0.0474, + "step": 60585 + }, + { + "epoch": 2.83, + "learning_rate": 1.0663082156349445e-05, + "loss": 0.0555, + "step": 60590 + }, + { + "epoch": 2.83, + "learning_rate": 1.0662298371294657e-05, + "loss": 0.0826, + "step": 60595 + }, + { + "epoch": 2.83, + "learning_rate": 1.066151458623987e-05, + "loss": 0.1561, + "step": 60600 + }, + { + "epoch": 2.83, + "learning_rate": 1.0660730801185085e-05, + "loss": 0.1313, + "step": 60605 + }, + { + "epoch": 2.83, + "learning_rate": 1.0659947016130297e-05, + "loss": 0.2536, + "step": 60610 + }, + { + "epoch": 2.83, + "learning_rate": 1.0659163231075511e-05, + "loss": 0.2586, + "step": 60615 + }, + { + "epoch": 2.83, + "learning_rate": 1.0658379446020723e-05, + "loss": 0.0328, + "step": 60620 + }, + { + "epoch": 2.83, + "learning_rate": 1.0657595660965939e-05, + "loss": 0.0261, + "step": 60625 + }, + { + "epoch": 2.83, + "learning_rate": 1.0656811875911151e-05, + "loss": 0.0413, + "step": 60630 + }, + { + "epoch": 2.83, + "learning_rate": 1.0656028090856365e-05, + "loss": 0.0651, + "step": 60635 + }, + { + "epoch": 2.83, + "learning_rate": 1.0655244305801577e-05, + "loss": 0.1421, + "step": 60640 + }, + { + "epoch": 2.83, + "learning_rate": 1.0654460520746793e-05, + "loss": 0.1075, + "step": 60645 + }, + { + "epoch": 2.83, + "learning_rate": 1.0653676735692005e-05, + "loss": 0.1013, + "step": 60650 + }, + { + "epoch": 2.83, + "learning_rate": 1.0652892950637219e-05, + "loss": 0.2221, + "step": 60655 + }, + { + "epoch": 2.83, + "learning_rate": 1.0652109165582431e-05, + "loss": 0.1437, + "step": 60660 + }, + { + "epoch": 2.83, + "learning_rate": 1.0651325380527643e-05, + "loss": 0.2729, + "step": 60665 + }, + { + "epoch": 2.83, + "learning_rate": 1.0650541595472859e-05, + "loss": 0.0612, + "step": 60670 + }, + { + "epoch": 2.83, + "learning_rate": 1.0649757810418071e-05, + "loss": 0.0464, + "step": 60675 + }, + { + "epoch": 2.83, + "learning_rate": 1.0648974025363285e-05, + "loss": 0.03, + "step": 60680 + }, + { + "epoch": 2.83, + "learning_rate": 1.0648190240308499e-05, + "loss": 0.0097, + "step": 60685 + }, + { + "epoch": 2.83, + "learning_rate": 1.0647406455253713e-05, + "loss": 0.1497, + "step": 60690 + }, + { + "epoch": 2.83, + "learning_rate": 1.0646622670198925e-05, + "loss": 0.0659, + "step": 60695 + }, + { + "epoch": 2.83, + "learning_rate": 1.0645838885144139e-05, + "loss": 0.1278, + "step": 60700 + }, + { + "epoch": 2.83, + "learning_rate": 1.0645055100089353e-05, + "loss": 0.1808, + "step": 60705 + }, + { + "epoch": 2.83, + "learning_rate": 1.0644271315034567e-05, + "loss": 0.3674, + "step": 60710 + }, + { + "epoch": 2.83, + "learning_rate": 1.0643487529979779e-05, + "loss": 0.395, + "step": 60715 + }, + { + "epoch": 2.83, + "learning_rate": 1.0642703744924994e-05, + "loss": 0.0519, + "step": 60720 + }, + { + "epoch": 2.83, + "learning_rate": 1.0641919959870207e-05, + "loss": 0.0121, + "step": 60725 + }, + { + "epoch": 2.83, + "learning_rate": 1.0641136174815419e-05, + "loss": 0.0334, + "step": 60730 + }, + { + "epoch": 2.83, + "learning_rate": 1.0640352389760633e-05, + "loss": 0.0302, + "step": 60735 + }, + { + "epoch": 2.83, + "learning_rate": 1.0639568604705845e-05, + "loss": 0.2201, + "step": 60740 + }, + { + "epoch": 2.83, + "learning_rate": 1.063878481965106e-05, + "loss": 0.1408, + "step": 60745 + }, + { + "epoch": 2.83, + "learning_rate": 1.0638001034596273e-05, + "loss": 0.1301, + "step": 60750 + }, + { + "epoch": 2.83, + "learning_rate": 1.0637217249541487e-05, + "loss": 0.1961, + "step": 60755 + }, + { + "epoch": 2.84, + "learning_rate": 1.0636433464486699e-05, + "loss": 0.1496, + "step": 60760 + }, + { + "epoch": 2.84, + "learning_rate": 1.0635649679431915e-05, + "loss": 0.2515, + "step": 60765 + }, + { + "epoch": 2.84, + "learning_rate": 1.0634865894377127e-05, + "loss": 0.0856, + "step": 60770 + }, + { + "epoch": 2.84, + "learning_rate": 1.063408210932234e-05, + "loss": 0.0536, + "step": 60775 + }, + { + "epoch": 2.84, + "learning_rate": 1.0633298324267553e-05, + "loss": 0.0576, + "step": 60780 + }, + { + "epoch": 2.84, + "learning_rate": 1.0632514539212768e-05, + "loss": 0.0704, + "step": 60785 + }, + { + "epoch": 2.84, + "learning_rate": 1.063173075415798e-05, + "loss": 0.068, + "step": 60790 + }, + { + "epoch": 2.84, + "learning_rate": 1.0630946969103193e-05, + "loss": 0.1093, + "step": 60795 + }, + { + "epoch": 2.84, + "learning_rate": 1.0630163184048407e-05, + "loss": 0.1841, + "step": 60800 + }, + { + "epoch": 2.84, + "learning_rate": 1.062937939899362e-05, + "loss": 0.1482, + "step": 60805 + }, + { + "epoch": 2.84, + "learning_rate": 1.0628595613938835e-05, + "loss": 0.1913, + "step": 60810 + }, + { + "epoch": 2.84, + "learning_rate": 1.0627811828884047e-05, + "loss": 0.2599, + "step": 60815 + }, + { + "epoch": 2.84, + "learning_rate": 1.0627028043829262e-05, + "loss": 0.0773, + "step": 60820 + }, + { + "epoch": 2.84, + "learning_rate": 1.0626244258774475e-05, + "loss": 0.0442, + "step": 60825 + }, + { + "epoch": 2.84, + "learning_rate": 1.0625460473719689e-05, + "loss": 0.0988, + "step": 60830 + }, + { + "epoch": 2.84, + "learning_rate": 1.06246766886649e-05, + "loss": 0.1, + "step": 60835 + }, + { + "epoch": 2.84, + "learning_rate": 1.0623892903610116e-05, + "loss": 0.0715, + "step": 60840 + }, + { + "epoch": 2.84, + "learning_rate": 1.0623109118555329e-05, + "loss": 0.0782, + "step": 60845 + }, + { + "epoch": 2.84, + "learning_rate": 1.0622325333500542e-05, + "loss": 0.1205, + "step": 60850 + }, + { + "epoch": 2.84, + "learning_rate": 1.0621541548445755e-05, + "loss": 0.1423, + "step": 60855 + }, + { + "epoch": 2.84, + "learning_rate": 1.0620757763390967e-05, + "loss": 0.2899, + "step": 60860 + }, + { + "epoch": 2.84, + "learning_rate": 1.0619973978336183e-05, + "loss": 0.2523, + "step": 60865 + }, + { + "epoch": 2.84, + "learning_rate": 1.0619190193281395e-05, + "loss": 0.0619, + "step": 60870 + }, + { + "epoch": 2.84, + "learning_rate": 1.0618406408226609e-05, + "loss": 0.044, + "step": 60875 + }, + { + "epoch": 2.84, + "learning_rate": 1.0617622623171821e-05, + "loss": 0.0412, + "step": 60880 + }, + { + "epoch": 2.84, + "learning_rate": 1.0616838838117036e-05, + "loss": 0.0697, + "step": 60885 + }, + { + "epoch": 2.84, + "learning_rate": 1.0616055053062249e-05, + "loss": 0.0875, + "step": 60890 + }, + { + "epoch": 2.84, + "learning_rate": 1.0615271268007463e-05, + "loss": 0.1086, + "step": 60895 + }, + { + "epoch": 2.84, + "learning_rate": 1.0614487482952675e-05, + "loss": 0.1613, + "step": 60900 + }, + { + "epoch": 2.84, + "learning_rate": 1.061370369789789e-05, + "loss": 0.1262, + "step": 60905 + }, + { + "epoch": 2.84, + "learning_rate": 1.0612919912843103e-05, + "loss": 0.181, + "step": 60910 + }, + { + "epoch": 2.84, + "learning_rate": 1.0612136127788316e-05, + "loss": 0.2297, + "step": 60915 + }, + { + "epoch": 2.84, + "learning_rate": 1.061135234273353e-05, + "loss": 0.0381, + "step": 60920 + }, + { + "epoch": 2.84, + "learning_rate": 1.0610568557678743e-05, + "loss": 0.0273, + "step": 60925 + }, + { + "epoch": 2.84, + "learning_rate": 1.0609784772623957e-05, + "loss": 0.0332, + "step": 60930 + }, + { + "epoch": 2.84, + "learning_rate": 1.0609000987569169e-05, + "loss": 0.0649, + "step": 60935 + }, + { + "epoch": 2.84, + "learning_rate": 1.0608217202514384e-05, + "loss": 0.0916, + "step": 60940 + }, + { + "epoch": 2.84, + "learning_rate": 1.0607433417459597e-05, + "loss": 0.0944, + "step": 60945 + }, + { + "epoch": 2.84, + "learning_rate": 1.060664963240481e-05, + "loss": 0.1064, + "step": 60950 + }, + { + "epoch": 2.84, + "learning_rate": 1.0605865847350023e-05, + "loss": 0.1578, + "step": 60955 + }, + { + "epoch": 2.84, + "learning_rate": 1.0605082062295238e-05, + "loss": 0.1276, + "step": 60960 + }, + { + "epoch": 2.84, + "learning_rate": 1.060429827724045e-05, + "loss": 0.3312, + "step": 60965 + }, + { + "epoch": 2.84, + "learning_rate": 1.0603514492185664e-05, + "loss": 0.0179, + "step": 60970 + }, + { + "epoch": 2.85, + "learning_rate": 1.0602730707130877e-05, + "loss": 0.062, + "step": 60975 + }, + { + "epoch": 2.85, + "learning_rate": 1.0601946922076092e-05, + "loss": 0.0659, + "step": 60980 + }, + { + "epoch": 2.85, + "learning_rate": 1.0601163137021304e-05, + "loss": 0.1297, + "step": 60985 + }, + { + "epoch": 2.85, + "learning_rate": 1.0600379351966517e-05, + "loss": 0.0375, + "step": 60990 + }, + { + "epoch": 2.85, + "learning_rate": 1.059959556691173e-05, + "loss": 0.118, + "step": 60995 + }, + { + "epoch": 2.85, + "learning_rate": 1.0598811781856944e-05, + "loss": 0.0791, + "step": 61000 + }, + { + "epoch": 2.85, + "learning_rate": 1.0598027996802158e-05, + "loss": 0.1074, + "step": 61005 + }, + { + "epoch": 2.85, + "learning_rate": 1.059724421174737e-05, + "loss": 0.2143, + "step": 61010 + }, + { + "epoch": 2.85, + "learning_rate": 1.0596460426692584e-05, + "loss": 0.2168, + "step": 61015 + }, + { + "epoch": 2.85, + "learning_rate": 1.0595676641637798e-05, + "loss": 0.07, + "step": 61020 + }, + { + "epoch": 2.85, + "learning_rate": 1.0594892856583012e-05, + "loss": 0.0378, + "step": 61025 + }, + { + "epoch": 2.85, + "learning_rate": 1.0594109071528224e-05, + "loss": 0.0538, + "step": 61030 + }, + { + "epoch": 2.85, + "learning_rate": 1.059332528647344e-05, + "loss": 0.058, + "step": 61035 + }, + { + "epoch": 2.85, + "learning_rate": 1.0592541501418652e-05, + "loss": 0.1057, + "step": 61040 + }, + { + "epoch": 2.85, + "learning_rate": 1.0591757716363866e-05, + "loss": 0.114, + "step": 61045 + }, + { + "epoch": 2.85, + "learning_rate": 1.0590973931309078e-05, + "loss": 0.1695, + "step": 61050 + }, + { + "epoch": 2.85, + "learning_rate": 1.059019014625429e-05, + "loss": 0.171, + "step": 61055 + }, + { + "epoch": 2.85, + "learning_rate": 1.0589406361199506e-05, + "loss": 0.3068, + "step": 61060 + }, + { + "epoch": 2.85, + "learning_rate": 1.0588622576144718e-05, + "loss": 0.2925, + "step": 61065 + }, + { + "epoch": 2.85, + "learning_rate": 1.0587838791089932e-05, + "loss": 0.0337, + "step": 61070 + }, + { + "epoch": 2.85, + "learning_rate": 1.0587055006035145e-05, + "loss": 0.0352, + "step": 61075 + }, + { + "epoch": 2.85, + "learning_rate": 1.058627122098036e-05, + "loss": 0.0502, + "step": 61080 + }, + { + "epoch": 2.85, + "learning_rate": 1.0585487435925572e-05, + "loss": 0.0615, + "step": 61085 + }, + { + "epoch": 2.85, + "learning_rate": 1.0584703650870786e-05, + "loss": 0.1042, + "step": 61090 + }, + { + "epoch": 2.85, + "learning_rate": 1.0583919865815998e-05, + "loss": 0.1052, + "step": 61095 + }, + { + "epoch": 2.85, + "learning_rate": 1.0583136080761214e-05, + "loss": 0.1772, + "step": 61100 + }, + { + "epoch": 2.85, + "learning_rate": 1.0582352295706426e-05, + "loss": 0.208, + "step": 61105 + }, + { + "epoch": 2.85, + "learning_rate": 1.058156851065164e-05, + "loss": 0.2459, + "step": 61110 + }, + { + "epoch": 2.85, + "learning_rate": 1.0580784725596852e-05, + "loss": 0.432, + "step": 61115 + }, + { + "epoch": 2.85, + "learning_rate": 1.0580000940542066e-05, + "loss": 0.064, + "step": 61120 + }, + { + "epoch": 2.85, + "learning_rate": 1.057921715548728e-05, + "loss": 0.0289, + "step": 61125 + }, + { + "epoch": 2.85, + "learning_rate": 1.0578433370432492e-05, + "loss": 0.0404, + "step": 61130 + }, + { + "epoch": 2.85, + "learning_rate": 1.0577649585377708e-05, + "loss": 0.0537, + "step": 61135 + }, + { + "epoch": 2.85, + "learning_rate": 1.057686580032292e-05, + "loss": 0.0917, + "step": 61140 + }, + { + "epoch": 2.85, + "learning_rate": 1.0576082015268134e-05, + "loss": 0.0957, + "step": 61145 + }, + { + "epoch": 2.85, + "learning_rate": 1.0575298230213346e-05, + "loss": 0.0822, + "step": 61150 + }, + { + "epoch": 2.85, + "learning_rate": 1.0574514445158562e-05, + "loss": 0.1236, + "step": 61155 + }, + { + "epoch": 2.85, + "learning_rate": 1.0573730660103774e-05, + "loss": 0.1939, + "step": 61160 + }, + { + "epoch": 2.85, + "learning_rate": 1.0572946875048988e-05, + "loss": 0.1853, + "step": 61165 + }, + { + "epoch": 2.85, + "learning_rate": 1.05721630899942e-05, + "loss": 0.028, + "step": 61170 + }, + { + "epoch": 2.85, + "learning_rate": 1.0571379304939416e-05, + "loss": 0.0427, + "step": 61175 + }, + { + "epoch": 2.85, + "learning_rate": 1.0570595519884628e-05, + "loss": 0.0422, + "step": 61180 + }, + { + "epoch": 2.85, + "learning_rate": 1.056981173482984e-05, + "loss": 0.0487, + "step": 61185 + }, + { + "epoch": 2.86, + "learning_rate": 1.0569027949775054e-05, + "loss": 0.0449, + "step": 61190 + }, + { + "epoch": 2.86, + "learning_rate": 1.0568244164720266e-05, + "loss": 0.1407, + "step": 61195 + }, + { + "epoch": 2.86, + "learning_rate": 1.0567460379665482e-05, + "loss": 0.1246, + "step": 61200 + }, + { + "epoch": 2.86, + "learning_rate": 1.0566676594610694e-05, + "loss": 0.085, + "step": 61205 + }, + { + "epoch": 2.86, + "learning_rate": 1.0565892809555908e-05, + "loss": 0.2521, + "step": 61210 + }, + { + "epoch": 2.86, + "learning_rate": 1.056510902450112e-05, + "loss": 0.1783, + "step": 61215 + }, + { + "epoch": 2.86, + "learning_rate": 1.0564325239446336e-05, + "loss": 0.1145, + "step": 61220 + }, + { + "epoch": 2.86, + "learning_rate": 1.0563541454391548e-05, + "loss": 0.03, + "step": 61225 + }, + { + "epoch": 2.86, + "learning_rate": 1.0562757669336762e-05, + "loss": 0.0302, + "step": 61230 + }, + { + "epoch": 2.86, + "learning_rate": 1.0561973884281976e-05, + "loss": 0.0758, + "step": 61235 + }, + { + "epoch": 2.86, + "learning_rate": 1.056119009922719e-05, + "loss": 0.0925, + "step": 61240 + }, + { + "epoch": 2.86, + "learning_rate": 1.0560406314172402e-05, + "loss": 0.0622, + "step": 61245 + }, + { + "epoch": 2.86, + "learning_rate": 1.0559622529117614e-05, + "loss": 0.0614, + "step": 61250 + }, + { + "epoch": 2.86, + "learning_rate": 1.055883874406283e-05, + "loss": 0.1648, + "step": 61255 + }, + { + "epoch": 2.86, + "learning_rate": 1.0558054959008042e-05, + "loss": 0.2341, + "step": 61260 + }, + { + "epoch": 2.86, + "learning_rate": 1.0557271173953256e-05, + "loss": 0.269, + "step": 61265 + }, + { + "epoch": 2.86, + "learning_rate": 1.0556487388898468e-05, + "loss": 0.0684, + "step": 61270 + }, + { + "epoch": 2.86, + "learning_rate": 1.0555703603843684e-05, + "loss": 0.107, + "step": 61275 + }, + { + "epoch": 2.86, + "learning_rate": 1.0554919818788896e-05, + "loss": 0.0388, + "step": 61280 + }, + { + "epoch": 2.86, + "learning_rate": 1.055413603373411e-05, + "loss": 0.0633, + "step": 61285 + }, + { + "epoch": 2.86, + "learning_rate": 1.0553352248679322e-05, + "loss": 0.1063, + "step": 61290 + }, + { + "epoch": 2.86, + "learning_rate": 1.0552568463624538e-05, + "loss": 0.1456, + "step": 61295 + }, + { + "epoch": 2.86, + "learning_rate": 1.055178467856975e-05, + "loss": 0.1461, + "step": 61300 + }, + { + "epoch": 2.86, + "learning_rate": 1.0551000893514964e-05, + "loss": 0.1618, + "step": 61305 + }, + { + "epoch": 2.86, + "learning_rate": 1.0550217108460176e-05, + "loss": 0.2129, + "step": 61310 + }, + { + "epoch": 2.86, + "learning_rate": 1.054943332340539e-05, + "loss": 0.2407, + "step": 61315 + }, + { + "epoch": 2.86, + "learning_rate": 1.0548649538350604e-05, + "loss": 0.0771, + "step": 61320 + }, + { + "epoch": 2.86, + "learning_rate": 1.0547865753295816e-05, + "loss": 0.0186, + "step": 61325 + }, + { + "epoch": 2.86, + "learning_rate": 1.054708196824103e-05, + "loss": 0.0309, + "step": 61330 + }, + { + "epoch": 2.86, + "learning_rate": 1.0546298183186244e-05, + "loss": 0.1447, + "step": 61335 + }, + { + "epoch": 2.86, + "learning_rate": 1.0545514398131458e-05, + "loss": 0.0258, + "step": 61340 + }, + { + "epoch": 2.86, + "learning_rate": 1.054473061307667e-05, + "loss": 0.1747, + "step": 61345 + }, + { + "epoch": 2.86, + "learning_rate": 1.0543946828021886e-05, + "loss": 0.1391, + "step": 61350 + }, + { + "epoch": 2.86, + "learning_rate": 1.0543163042967098e-05, + "loss": 0.217, + "step": 61355 + }, + { + "epoch": 2.86, + "learning_rate": 1.0542379257912312e-05, + "loss": 0.2874, + "step": 61360 + }, + { + "epoch": 2.86, + "learning_rate": 1.0541595472857524e-05, + "loss": 0.2921, + "step": 61365 + }, + { + "epoch": 2.86, + "learning_rate": 1.054081168780274e-05, + "loss": 0.0604, + "step": 61370 + }, + { + "epoch": 2.86, + "learning_rate": 1.0540027902747952e-05, + "loss": 0.0352, + "step": 61375 + }, + { + "epoch": 2.86, + "learning_rate": 1.0539244117693164e-05, + "loss": 0.0296, + "step": 61380 + }, + { + "epoch": 2.86, + "learning_rate": 1.0538460332638378e-05, + "loss": 0.0616, + "step": 61385 + }, + { + "epoch": 2.86, + "learning_rate": 1.053767654758359e-05, + "loss": 0.094, + "step": 61390 + }, + { + "epoch": 2.86, + "learning_rate": 1.0536892762528806e-05, + "loss": 0.1211, + "step": 61395 + }, + { + "epoch": 2.87, + "learning_rate": 1.0536108977474018e-05, + "loss": 0.104, + "step": 61400 + }, + { + "epoch": 2.87, + "learning_rate": 1.0535325192419232e-05, + "loss": 0.154, + "step": 61405 + }, + { + "epoch": 2.87, + "learning_rate": 1.0534541407364444e-05, + "loss": 0.1931, + "step": 61410 + }, + { + "epoch": 2.87, + "learning_rate": 1.053375762230966e-05, + "loss": 0.1805, + "step": 61415 + }, + { + "epoch": 2.87, + "learning_rate": 1.0532973837254872e-05, + "loss": 0.0324, + "step": 61420 + }, + { + "epoch": 2.87, + "learning_rate": 1.0532190052200086e-05, + "loss": 0.0607, + "step": 61425 + }, + { + "epoch": 2.87, + "learning_rate": 1.0531406267145298e-05, + "loss": 0.0146, + "step": 61430 + }, + { + "epoch": 2.87, + "learning_rate": 1.0530622482090514e-05, + "loss": 0.0607, + "step": 61435 + }, + { + "epoch": 2.87, + "learning_rate": 1.0529838697035726e-05, + "loss": 0.0477, + "step": 61440 + }, + { + "epoch": 2.87, + "learning_rate": 1.0529054911980938e-05, + "loss": 0.0923, + "step": 61445 + }, + { + "epoch": 2.87, + "learning_rate": 1.0528271126926154e-05, + "loss": 0.0744, + "step": 61450 + }, + { + "epoch": 2.87, + "learning_rate": 1.0527487341871366e-05, + "loss": 0.1322, + "step": 61455 + }, + { + "epoch": 2.87, + "learning_rate": 1.052670355681658e-05, + "loss": 0.2138, + "step": 61460 + }, + { + "epoch": 2.87, + "learning_rate": 1.0525919771761792e-05, + "loss": 0.2974, + "step": 61465 + }, + { + "epoch": 2.87, + "learning_rate": 1.0525135986707008e-05, + "loss": 0.0482, + "step": 61470 + }, + { + "epoch": 2.87, + "learning_rate": 1.052435220165222e-05, + "loss": 0.0448, + "step": 61475 + }, + { + "epoch": 2.87, + "learning_rate": 1.0523568416597434e-05, + "loss": 0.05, + "step": 61480 + }, + { + "epoch": 2.87, + "learning_rate": 1.0522784631542646e-05, + "loss": 0.0529, + "step": 61485 + }, + { + "epoch": 2.87, + "learning_rate": 1.0522000846487861e-05, + "loss": 0.1533, + "step": 61490 + }, + { + "epoch": 2.87, + "learning_rate": 1.0521217061433074e-05, + "loss": 0.1484, + "step": 61495 + }, + { + "epoch": 2.87, + "learning_rate": 1.0520433276378288e-05, + "loss": 0.1626, + "step": 61500 + }, + { + "epoch": 2.87, + "learning_rate": 1.05196494913235e-05, + "loss": 0.1731, + "step": 61505 + }, + { + "epoch": 2.87, + "learning_rate": 1.0518865706268712e-05, + "loss": 0.33, + "step": 61510 + }, + { + "epoch": 2.87, + "learning_rate": 1.0518081921213928e-05, + "loss": 0.3039, + "step": 61515 + }, + { + "epoch": 2.87, + "learning_rate": 1.051729813615914e-05, + "loss": 0.01, + "step": 61520 + }, + { + "epoch": 2.87, + "learning_rate": 1.0516514351104354e-05, + "loss": 0.0286, + "step": 61525 + }, + { + "epoch": 2.87, + "learning_rate": 1.0515730566049568e-05, + "loss": 0.0298, + "step": 61530 + }, + { + "epoch": 2.87, + "learning_rate": 1.0514946780994782e-05, + "loss": 0.077, + "step": 61535 + }, + { + "epoch": 2.87, + "learning_rate": 1.0514162995939994e-05, + "loss": 0.1129, + "step": 61540 + }, + { + "epoch": 2.87, + "learning_rate": 1.0513379210885208e-05, + "loss": 0.0934, + "step": 61545 + }, + { + "epoch": 2.87, + "learning_rate": 1.0512595425830422e-05, + "loss": 0.1048, + "step": 61550 + }, + { + "epoch": 2.87, + "learning_rate": 1.0511811640775635e-05, + "loss": 0.1929, + "step": 61555 + }, + { + "epoch": 2.87, + "learning_rate": 1.0511027855720848e-05, + "loss": 0.281, + "step": 61560 + }, + { + "epoch": 2.87, + "learning_rate": 1.0510244070666063e-05, + "loss": 0.345, + "step": 61565 + }, + { + "epoch": 2.87, + "learning_rate": 1.0509460285611275e-05, + "loss": 0.0306, + "step": 61570 + }, + { + "epoch": 2.87, + "learning_rate": 1.0508676500556488e-05, + "loss": 0.0636, + "step": 61575 + }, + { + "epoch": 2.87, + "learning_rate": 1.0507892715501702e-05, + "loss": 0.0506, + "step": 61580 + }, + { + "epoch": 2.87, + "learning_rate": 1.0507108930446914e-05, + "loss": 0.112, + "step": 61585 + }, + { + "epoch": 2.87, + "learning_rate": 1.050632514539213e-05, + "loss": 0.0215, + "step": 61590 + }, + { + "epoch": 2.87, + "learning_rate": 1.0505541360337342e-05, + "loss": 0.0534, + "step": 61595 + }, + { + "epoch": 2.87, + "learning_rate": 1.0504757575282556e-05, + "loss": 0.0811, + "step": 61600 + }, + { + "epoch": 2.87, + "learning_rate": 1.0503973790227768e-05, + "loss": 0.1887, + "step": 61605 + }, + { + "epoch": 2.87, + "learning_rate": 1.0503190005172983e-05, + "loss": 0.2217, + "step": 61610 + }, + { + "epoch": 2.88, + "learning_rate": 1.0502406220118196e-05, + "loss": 0.3228, + "step": 61615 + }, + { + "epoch": 2.88, + "learning_rate": 1.050162243506341e-05, + "loss": 0.052, + "step": 61620 + }, + { + "epoch": 2.88, + "learning_rate": 1.0500838650008622e-05, + "loss": 0.0327, + "step": 61625 + }, + { + "epoch": 2.88, + "learning_rate": 1.0500054864953837e-05, + "loss": 0.0629, + "step": 61630 + }, + { + "epoch": 2.88, + "learning_rate": 1.049927107989905e-05, + "loss": 0.0539, + "step": 61635 + }, + { + "epoch": 2.88, + "learning_rate": 1.0498487294844262e-05, + "loss": 0.0616, + "step": 61640 + }, + { + "epoch": 2.88, + "learning_rate": 1.0497703509789476e-05, + "loss": 0.0863, + "step": 61645 + }, + { + "epoch": 2.88, + "learning_rate": 1.049691972473469e-05, + "loss": 0.1395, + "step": 61650 + }, + { + "epoch": 2.88, + "learning_rate": 1.0496135939679903e-05, + "loss": 0.0981, + "step": 61655 + }, + { + "epoch": 2.88, + "learning_rate": 1.0495352154625116e-05, + "loss": 0.2278, + "step": 61660 + }, + { + "epoch": 2.88, + "learning_rate": 1.0494568369570331e-05, + "loss": 0.2109, + "step": 61665 + }, + { + "epoch": 2.88, + "learning_rate": 1.0493784584515543e-05, + "loss": 0.0107, + "step": 61670 + }, + { + "epoch": 2.88, + "learning_rate": 1.0493000799460757e-05, + "loss": 0.085, + "step": 61675 + }, + { + "epoch": 2.88, + "learning_rate": 1.049221701440597e-05, + "loss": 0.0593, + "step": 61680 + }, + { + "epoch": 2.88, + "learning_rate": 1.0491433229351185e-05, + "loss": 0.1071, + "step": 61685 + }, + { + "epoch": 2.88, + "learning_rate": 1.0490649444296397e-05, + "loss": 0.0502, + "step": 61690 + }, + { + "epoch": 2.88, + "learning_rate": 1.0489865659241611e-05, + "loss": 0.0725, + "step": 61695 + }, + { + "epoch": 2.88, + "learning_rate": 1.0489081874186823e-05, + "loss": 0.1771, + "step": 61700 + }, + { + "epoch": 2.88, + "learning_rate": 1.0488298089132036e-05, + "loss": 0.1795, + "step": 61705 + }, + { + "epoch": 2.88, + "learning_rate": 1.0487514304077251e-05, + "loss": 0.2024, + "step": 61710 + }, + { + "epoch": 2.88, + "learning_rate": 1.0486730519022463e-05, + "loss": 0.3146, + "step": 61715 + }, + { + "epoch": 2.88, + "learning_rate": 1.0485946733967677e-05, + "loss": 0.0512, + "step": 61720 + }, + { + "epoch": 2.88, + "learning_rate": 1.048516294891289e-05, + "loss": 0.0375, + "step": 61725 + }, + { + "epoch": 2.88, + "learning_rate": 1.0484379163858105e-05, + "loss": 0.0448, + "step": 61730 + }, + { + "epoch": 2.88, + "learning_rate": 1.0483595378803317e-05, + "loss": 0.062, + "step": 61735 + }, + { + "epoch": 2.88, + "learning_rate": 1.0482811593748531e-05, + "loss": 0.1169, + "step": 61740 + }, + { + "epoch": 2.88, + "learning_rate": 1.0482027808693744e-05, + "loss": 0.0705, + "step": 61745 + }, + { + "epoch": 2.88, + "learning_rate": 1.0481244023638959e-05, + "loss": 0.0729, + "step": 61750 + }, + { + "epoch": 2.88, + "learning_rate": 1.0480460238584171e-05, + "loss": 0.1679, + "step": 61755 + }, + { + "epoch": 2.88, + "learning_rate": 1.0479676453529385e-05, + "loss": 0.188, + "step": 61760 + }, + { + "epoch": 2.88, + "learning_rate": 1.04788926684746e-05, + "loss": 0.2839, + "step": 61765 + }, + { + "epoch": 2.88, + "learning_rate": 1.0478108883419811e-05, + "loss": 0.0484, + "step": 61770 + }, + { + "epoch": 2.88, + "learning_rate": 1.0477325098365025e-05, + "loss": 0.0209, + "step": 61775 + }, + { + "epoch": 2.88, + "learning_rate": 1.0476541313310237e-05, + "loss": 0.072, + "step": 61780 + }, + { + "epoch": 2.88, + "learning_rate": 1.0475757528255453e-05, + "loss": 0.043, + "step": 61785 + }, + { + "epoch": 2.88, + "learning_rate": 1.0474973743200665e-05, + "loss": 0.1533, + "step": 61790 + }, + { + "epoch": 2.88, + "learning_rate": 1.047418995814588e-05, + "loss": 0.062, + "step": 61795 + }, + { + "epoch": 2.88, + "learning_rate": 1.0473406173091091e-05, + "loss": 0.0718, + "step": 61800 + }, + { + "epoch": 2.88, + "learning_rate": 1.0472622388036307e-05, + "loss": 0.1603, + "step": 61805 + }, + { + "epoch": 2.88, + "learning_rate": 1.047183860298152e-05, + "loss": 0.2909, + "step": 61810 + }, + { + "epoch": 2.88, + "learning_rate": 1.0471054817926733e-05, + "loss": 0.182, + "step": 61815 + }, + { + "epoch": 2.88, + "learning_rate": 1.0470271032871945e-05, + "loss": 0.0543, + "step": 61820 + }, + { + "epoch": 2.88, + "learning_rate": 1.0469487247817161e-05, + "loss": 0.039, + "step": 61825 + }, + { + "epoch": 2.89, + "learning_rate": 1.0468703462762373e-05, + "loss": 0.0647, + "step": 61830 + }, + { + "epoch": 2.89, + "learning_rate": 1.0467919677707585e-05, + "loss": 0.0652, + "step": 61835 + }, + { + "epoch": 2.89, + "learning_rate": 1.04671358926528e-05, + "loss": 0.0565, + "step": 61840 + }, + { + "epoch": 2.89, + "learning_rate": 1.0466352107598013e-05, + "loss": 0.1336, + "step": 61845 + }, + { + "epoch": 2.89, + "learning_rate": 1.0465568322543227e-05, + "loss": 0.1115, + "step": 61850 + }, + { + "epoch": 2.89, + "learning_rate": 1.046478453748844e-05, + "loss": 0.1073, + "step": 61855 + }, + { + "epoch": 2.89, + "learning_rate": 1.046415750944461e-05, + "loss": 0.2446, + "step": 61860 + }, + { + "epoch": 2.89, + "learning_rate": 1.046353048140078e-05, + "loss": 0.3299, + "step": 61865 + }, + { + "epoch": 2.89, + "learning_rate": 1.0462746696345996e-05, + "loss": 0.0518, + "step": 61870 + }, + { + "epoch": 2.89, + "learning_rate": 1.0461962911291208e-05, + "loss": 0.0517, + "step": 61875 + }, + { + "epoch": 2.89, + "learning_rate": 1.0461179126236422e-05, + "loss": 0.0442, + "step": 61880 + }, + { + "epoch": 2.89, + "learning_rate": 1.0460395341181636e-05, + "loss": 0.0964, + "step": 61885 + }, + { + "epoch": 2.89, + "learning_rate": 1.045961155612685e-05, + "loss": 0.0607, + "step": 61890 + }, + { + "epoch": 2.89, + "learning_rate": 1.0458827771072062e-05, + "loss": 0.1274, + "step": 61895 + }, + { + "epoch": 2.89, + "learning_rate": 1.0458043986017274e-05, + "loss": 0.2033, + "step": 61900 + }, + { + "epoch": 2.89, + "learning_rate": 1.045726020096249e-05, + "loss": 0.2348, + "step": 61905 + }, + { + "epoch": 2.89, + "learning_rate": 1.0456476415907702e-05, + "loss": 0.1433, + "step": 61910 + }, + { + "epoch": 2.89, + "learning_rate": 1.0455692630852916e-05, + "loss": 0.2228, + "step": 61915 + }, + { + "epoch": 2.89, + "learning_rate": 1.0454908845798128e-05, + "loss": 0.0411, + "step": 61920 + }, + { + "epoch": 2.89, + "learning_rate": 1.0454125060743344e-05, + "loss": 0.0441, + "step": 61925 + }, + { + "epoch": 2.89, + "learning_rate": 1.0453341275688556e-05, + "loss": 0.0757, + "step": 61930 + }, + { + "epoch": 2.89, + "learning_rate": 1.045255749063377e-05, + "loss": 0.0323, + "step": 61935 + }, + { + "epoch": 2.89, + "learning_rate": 1.0451773705578982e-05, + "loss": 0.0426, + "step": 61940 + }, + { + "epoch": 2.89, + "learning_rate": 1.0450989920524198e-05, + "loss": 0.0863, + "step": 61945 + }, + { + "epoch": 2.89, + "learning_rate": 1.045020613546941e-05, + "loss": 0.184, + "step": 61950 + }, + { + "epoch": 2.89, + "learning_rate": 1.0449422350414624e-05, + "loss": 0.162, + "step": 61955 + }, + { + "epoch": 2.89, + "learning_rate": 1.0448638565359836e-05, + "loss": 0.1879, + "step": 61960 + }, + { + "epoch": 2.89, + "learning_rate": 1.0447854780305048e-05, + "loss": 0.2687, + "step": 61965 + }, + { + "epoch": 2.89, + "learning_rate": 1.0447070995250264e-05, + "loss": 0.0468, + "step": 61970 + }, + { + "epoch": 2.89, + "learning_rate": 1.0446287210195476e-05, + "loss": 0.0404, + "step": 61975 + }, + { + "epoch": 2.89, + "learning_rate": 1.044550342514069e-05, + "loss": 0.0753, + "step": 61980 + }, + { + "epoch": 2.89, + "learning_rate": 1.0444719640085904e-05, + "loss": 0.0453, + "step": 61985 + }, + { + "epoch": 2.89, + "learning_rate": 1.0443935855031118e-05, + "loss": 0.1476, + "step": 61990 + }, + { + "epoch": 2.89, + "learning_rate": 1.044315206997633e-05, + "loss": 0.0593, + "step": 61995 + }, + { + "epoch": 2.89, + "learning_rate": 1.0442368284921545e-05, + "loss": 0.1595, + "step": 62000 + }, + { + "epoch": 2.89, + "learning_rate": 1.0441584499866758e-05, + "loss": 0.1356, + "step": 62005 + }, + { + "epoch": 2.89, + "learning_rate": 1.0440800714811972e-05, + "loss": 0.2253, + "step": 62010 + }, + { + "epoch": 2.89, + "learning_rate": 1.0440016929757184e-05, + "loss": 0.2739, + "step": 62015 + }, + { + "epoch": 2.89, + "learning_rate": 1.04392331447024e-05, + "loss": 0.0676, + "step": 62020 + }, + { + "epoch": 2.89, + "learning_rate": 1.0438449359647612e-05, + "loss": 0.076, + "step": 62025 + }, + { + "epoch": 2.89, + "learning_rate": 1.0437665574592824e-05, + "loss": 0.0512, + "step": 62030 + }, + { + "epoch": 2.89, + "learning_rate": 1.0436881789538038e-05, + "loss": 0.0411, + "step": 62035 + }, + { + "epoch": 2.89, + "learning_rate": 1.043609800448325e-05, + "loss": 0.2422, + "step": 62040 + }, + { + "epoch": 2.9, + "learning_rate": 1.0435314219428466e-05, + "loss": 0.1052, + "step": 62045 + }, + { + "epoch": 2.9, + "learning_rate": 1.0434530434373678e-05, + "loss": 0.1557, + "step": 62050 + }, + { + "epoch": 2.9, + "learning_rate": 1.0433746649318892e-05, + "loss": 0.217, + "step": 62055 + }, + { + "epoch": 2.9, + "learning_rate": 1.0432962864264104e-05, + "loss": 0.1938, + "step": 62060 + }, + { + "epoch": 2.9, + "learning_rate": 1.043217907920932e-05, + "loss": 0.1747, + "step": 62065 + }, + { + "epoch": 2.9, + "learning_rate": 1.0431395294154532e-05, + "loss": 0.0406, + "step": 62070 + }, + { + "epoch": 2.9, + "learning_rate": 1.0430611509099746e-05, + "loss": 0.0092, + "step": 62075 + }, + { + "epoch": 2.9, + "learning_rate": 1.0429827724044958e-05, + "loss": 0.0618, + "step": 62080 + }, + { + "epoch": 2.9, + "learning_rate": 1.0429043938990173e-05, + "loss": 0.0918, + "step": 62085 + }, + { + "epoch": 2.9, + "learning_rate": 1.0428260153935386e-05, + "loss": 0.1263, + "step": 62090 + }, + { + "epoch": 2.9, + "learning_rate": 1.0427476368880598e-05, + "loss": 0.1332, + "step": 62095 + }, + { + "epoch": 2.9, + "learning_rate": 1.0426692583825813e-05, + "loss": 0.1137, + "step": 62100 + }, + { + "epoch": 2.9, + "learning_rate": 1.0425908798771026e-05, + "loss": 0.0945, + "step": 62105 + }, + { + "epoch": 2.9, + "learning_rate": 1.042512501371624e-05, + "loss": 0.1955, + "step": 62110 + }, + { + "epoch": 2.9, + "learning_rate": 1.0424341228661452e-05, + "loss": 0.226, + "step": 62115 + }, + { + "epoch": 2.9, + "learning_rate": 1.0423557443606667e-05, + "loss": 0.0538, + "step": 62120 + }, + { + "epoch": 2.9, + "learning_rate": 1.042277365855188e-05, + "loss": 0.0342, + "step": 62125 + }, + { + "epoch": 2.9, + "learning_rate": 1.0421989873497093e-05, + "loss": 0.0623, + "step": 62130 + }, + { + "epoch": 2.9, + "learning_rate": 1.0421206088442306e-05, + "loss": 0.0489, + "step": 62135 + }, + { + "epoch": 2.9, + "learning_rate": 1.0420422303387521e-05, + "loss": 0.108, + "step": 62140 + }, + { + "epoch": 2.9, + "learning_rate": 1.0419638518332734e-05, + "loss": 0.0872, + "step": 62145 + }, + { + "epoch": 2.9, + "learning_rate": 1.0418854733277947e-05, + "loss": 0.0748, + "step": 62150 + }, + { + "epoch": 2.9, + "learning_rate": 1.041807094822316e-05, + "loss": 0.1508, + "step": 62155 + }, + { + "epoch": 2.9, + "learning_rate": 1.0417287163168372e-05, + "loss": 0.2324, + "step": 62160 + }, + { + "epoch": 2.9, + "learning_rate": 1.0416503378113587e-05, + "loss": 0.3346, + "step": 62165 + }, + { + "epoch": 2.9, + "learning_rate": 1.04157195930588e-05, + "loss": 0.0784, + "step": 62170 + }, + { + "epoch": 2.9, + "learning_rate": 1.0414935808004014e-05, + "loss": 0.0446, + "step": 62175 + }, + { + "epoch": 2.9, + "learning_rate": 1.0414152022949226e-05, + "loss": 0.0434, + "step": 62180 + }, + { + "epoch": 2.9, + "learning_rate": 1.0413368237894441e-05, + "loss": 0.0521, + "step": 62185 + }, + { + "epoch": 2.9, + "learning_rate": 1.0412584452839654e-05, + "loss": 0.0605, + "step": 62190 + }, + { + "epoch": 2.9, + "learning_rate": 1.0411800667784867e-05, + "loss": 0.1037, + "step": 62195 + }, + { + "epoch": 2.9, + "learning_rate": 1.0411016882730081e-05, + "loss": 0.0748, + "step": 62200 + }, + { + "epoch": 2.9, + "learning_rate": 1.0410233097675295e-05, + "loss": 0.2214, + "step": 62205 + }, + { + "epoch": 2.9, + "learning_rate": 1.0409449312620508e-05, + "loss": 0.1377, + "step": 62210 + }, + { + "epoch": 2.9, + "learning_rate": 1.0408665527565721e-05, + "loss": 0.2861, + "step": 62215 + }, + { + "epoch": 2.9, + "learning_rate": 1.0407881742510935e-05, + "loss": 0.0281, + "step": 62220 + }, + { + "epoch": 2.9, + "learning_rate": 1.0407097957456148e-05, + "loss": 0.0211, + "step": 62225 + }, + { + "epoch": 2.9, + "learning_rate": 1.0406314172401361e-05, + "loss": 0.0767, + "step": 62230 + }, + { + "epoch": 2.9, + "learning_rate": 1.0405530387346574e-05, + "loss": 0.0752, + "step": 62235 + }, + { + "epoch": 2.9, + "learning_rate": 1.040474660229179e-05, + "loss": 0.0622, + "step": 62240 + }, + { + "epoch": 2.9, + "learning_rate": 1.0403962817237001e-05, + "loss": 0.0847, + "step": 62245 + }, + { + "epoch": 2.9, + "learning_rate": 1.0403179032182215e-05, + "loss": 0.0918, + "step": 62250 + }, + { + "epoch": 2.9, + "learning_rate": 1.0402395247127428e-05, + "loss": 0.0615, + "step": 62255 + }, + { + "epoch": 2.91, + "learning_rate": 1.0401611462072643e-05, + "loss": 0.1679, + "step": 62260 + }, + { + "epoch": 2.91, + "learning_rate": 1.0400827677017855e-05, + "loss": 0.3511, + "step": 62265 + }, + { + "epoch": 2.91, + "learning_rate": 1.040004389196307e-05, + "loss": 0.0629, + "step": 62270 + }, + { + "epoch": 2.91, + "learning_rate": 1.0399260106908282e-05, + "loss": 0.0183, + "step": 62275 + }, + { + "epoch": 2.91, + "learning_rate": 1.0398476321853497e-05, + "loss": 0.0467, + "step": 62280 + }, + { + "epoch": 2.91, + "learning_rate": 1.039769253679871e-05, + "loss": 0.0157, + "step": 62285 + }, + { + "epoch": 2.91, + "learning_rate": 1.0396908751743922e-05, + "loss": 0.0978, + "step": 62290 + }, + { + "epoch": 2.91, + "learning_rate": 1.0396124966689135e-05, + "loss": 0.0457, + "step": 62295 + }, + { + "epoch": 2.91, + "learning_rate": 1.039534118163435e-05, + "loss": 0.1463, + "step": 62300 + }, + { + "epoch": 2.91, + "learning_rate": 1.0394557396579563e-05, + "loss": 0.1993, + "step": 62305 + }, + { + "epoch": 2.91, + "learning_rate": 1.0393773611524775e-05, + "loss": 0.1728, + "step": 62310 + }, + { + "epoch": 2.91, + "learning_rate": 1.0392989826469991e-05, + "loss": 0.163, + "step": 62315 + }, + { + "epoch": 2.91, + "learning_rate": 1.0392206041415203e-05, + "loss": 0.0391, + "step": 62320 + }, + { + "epoch": 2.91, + "learning_rate": 1.0391422256360417e-05, + "loss": 0.054, + "step": 62325 + }, + { + "epoch": 2.91, + "learning_rate": 1.039063847130563e-05, + "loss": 0.0313, + "step": 62330 + }, + { + "epoch": 2.91, + "learning_rate": 1.0389854686250845e-05, + "loss": 0.0868, + "step": 62335 + }, + { + "epoch": 2.91, + "learning_rate": 1.0389070901196057e-05, + "loss": 0.0782, + "step": 62340 + }, + { + "epoch": 2.91, + "learning_rate": 1.0388287116141271e-05, + "loss": 0.0603, + "step": 62345 + }, + { + "epoch": 2.91, + "learning_rate": 1.0387503331086483e-05, + "loss": 0.1835, + "step": 62350 + }, + { + "epoch": 2.91, + "learning_rate": 1.0386719546031696e-05, + "loss": 0.1227, + "step": 62355 + }, + { + "epoch": 2.91, + "learning_rate": 1.0385935760976911e-05, + "loss": 0.4369, + "step": 62360 + }, + { + "epoch": 2.91, + "learning_rate": 1.0385151975922123e-05, + "loss": 0.3164, + "step": 62365 + }, + { + "epoch": 2.91, + "learning_rate": 1.0384368190867337e-05, + "loss": 0.0913, + "step": 62370 + }, + { + "epoch": 2.91, + "learning_rate": 1.038358440581255e-05, + "loss": 0.0212, + "step": 62375 + }, + { + "epoch": 2.91, + "learning_rate": 1.0382800620757765e-05, + "loss": 0.0354, + "step": 62380 + }, + { + "epoch": 2.91, + "learning_rate": 1.0382016835702977e-05, + "loss": 0.0694, + "step": 62385 + }, + { + "epoch": 2.91, + "learning_rate": 1.0381233050648191e-05, + "loss": 0.1128, + "step": 62390 + }, + { + "epoch": 2.91, + "learning_rate": 1.0380449265593403e-05, + "loss": 0.1718, + "step": 62395 + }, + { + "epoch": 2.91, + "learning_rate": 1.0379665480538619e-05, + "loss": 0.1168, + "step": 62400 + }, + { + "epoch": 2.91, + "learning_rate": 1.0378881695483831e-05, + "loss": 0.2419, + "step": 62405 + }, + { + "epoch": 2.91, + "learning_rate": 1.0378097910429045e-05, + "loss": 0.2319, + "step": 62410 + }, + { + "epoch": 2.91, + "learning_rate": 1.0377314125374259e-05, + "loss": 0.261, + "step": 62415 + }, + { + "epoch": 2.91, + "learning_rate": 1.0376530340319471e-05, + "loss": 0.0797, + "step": 62420 + }, + { + "epoch": 2.91, + "learning_rate": 1.0375746555264685e-05, + "loss": 0.0227, + "step": 62425 + }, + { + "epoch": 2.91, + "learning_rate": 1.0374962770209897e-05, + "loss": 0.0566, + "step": 62430 + }, + { + "epoch": 2.91, + "learning_rate": 1.0374178985155113e-05, + "loss": 0.1145, + "step": 62435 + }, + { + "epoch": 2.91, + "learning_rate": 1.0373395200100325e-05, + "loss": 0.0866, + "step": 62440 + }, + { + "epoch": 2.91, + "learning_rate": 1.0372611415045539e-05, + "loss": 0.0935, + "step": 62445 + }, + { + "epoch": 2.91, + "learning_rate": 1.0371827629990751e-05, + "loss": 0.1222, + "step": 62450 + }, + { + "epoch": 2.91, + "learning_rate": 1.0371043844935967e-05, + "loss": 0.1236, + "step": 62455 + }, + { + "epoch": 2.91, + "learning_rate": 1.0370260059881179e-05, + "loss": 0.2584, + "step": 62460 + }, + { + "epoch": 2.91, + "learning_rate": 1.0369476274826393e-05, + "loss": 0.3033, + "step": 62465 + }, + { + "epoch": 2.91, + "learning_rate": 1.0368692489771605e-05, + "loss": 0.0242, + "step": 62470 + }, + { + "epoch": 2.92, + "learning_rate": 1.036790870471682e-05, + "loss": 0.036, + "step": 62475 + }, + { + "epoch": 2.92, + "learning_rate": 1.0367124919662033e-05, + "loss": 0.0251, + "step": 62480 + }, + { + "epoch": 2.92, + "learning_rate": 1.0366341134607245e-05, + "loss": 0.0294, + "step": 62485 + }, + { + "epoch": 2.92, + "learning_rate": 1.0365557349552459e-05, + "loss": 0.0555, + "step": 62490 + }, + { + "epoch": 2.92, + "learning_rate": 1.0364773564497671e-05, + "loss": 0.0958, + "step": 62495 + }, + { + "epoch": 2.92, + "learning_rate": 1.0363989779442887e-05, + "loss": 0.1204, + "step": 62500 + }, + { + "epoch": 2.92, + "learning_rate": 1.0363205994388099e-05, + "loss": 0.0712, + "step": 62505 + }, + { + "epoch": 2.92, + "learning_rate": 1.0362422209333313e-05, + "loss": 0.1334, + "step": 62510 + }, + { + "epoch": 2.92, + "learning_rate": 1.0361638424278527e-05, + "loss": 0.2207, + "step": 62515 + }, + { + "epoch": 2.92, + "learning_rate": 1.0360854639223741e-05, + "loss": 0.0133, + "step": 62520 + }, + { + "epoch": 2.92, + "learning_rate": 1.0360070854168953e-05, + "loss": 0.0209, + "step": 62525 + }, + { + "epoch": 2.92, + "learning_rate": 1.0359287069114167e-05, + "loss": 0.0494, + "step": 62530 + }, + { + "epoch": 2.92, + "learning_rate": 1.0358503284059381e-05, + "loss": 0.0147, + "step": 62535 + }, + { + "epoch": 2.92, + "learning_rate": 1.0357719499004595e-05, + "loss": 0.1232, + "step": 62540 + }, + { + "epoch": 2.92, + "learning_rate": 1.0356935713949807e-05, + "loss": 0.0512, + "step": 62545 + }, + { + "epoch": 2.92, + "learning_rate": 1.035615192889502e-05, + "loss": 0.2343, + "step": 62550 + }, + { + "epoch": 2.92, + "learning_rate": 1.0355368143840235e-05, + "loss": 0.1384, + "step": 62555 + }, + { + "epoch": 2.92, + "learning_rate": 1.0354584358785447e-05, + "loss": 0.2172, + "step": 62560 + }, + { + "epoch": 2.92, + "learning_rate": 1.0353800573730661e-05, + "loss": 0.2982, + "step": 62565 + }, + { + "epoch": 2.92, + "learning_rate": 1.0353016788675873e-05, + "loss": 0.0251, + "step": 62570 + }, + { + "epoch": 2.92, + "learning_rate": 1.0352233003621089e-05, + "loss": 0.1112, + "step": 62575 + }, + { + "epoch": 2.92, + "learning_rate": 1.0351449218566301e-05, + "loss": 0.0654, + "step": 62580 + }, + { + "epoch": 2.92, + "learning_rate": 1.0350665433511515e-05, + "loss": 0.0838, + "step": 62585 + }, + { + "epoch": 2.92, + "learning_rate": 1.0349881648456727e-05, + "loss": 0.1541, + "step": 62590 + }, + { + "epoch": 2.92, + "learning_rate": 1.0349097863401943e-05, + "loss": 0.0852, + "step": 62595 + }, + { + "epoch": 2.92, + "learning_rate": 1.0348314078347155e-05, + "loss": 0.2073, + "step": 62600 + }, + { + "epoch": 2.92, + "learning_rate": 1.0347530293292369e-05, + "loss": 0.1479, + "step": 62605 + }, + { + "epoch": 2.92, + "learning_rate": 1.0346746508237581e-05, + "loss": 0.2269, + "step": 62610 + }, + { + "epoch": 2.92, + "learning_rate": 1.0345962723182795e-05, + "loss": 0.1799, + "step": 62615 + }, + { + "epoch": 2.92, + "learning_rate": 1.0345178938128009e-05, + "loss": 0.0507, + "step": 62620 + }, + { + "epoch": 2.92, + "learning_rate": 1.0344395153073221e-05, + "loss": 0.0694, + "step": 62625 + }, + { + "epoch": 2.92, + "learning_rate": 1.0343611368018437e-05, + "loss": 0.0565, + "step": 62630 + }, + { + "epoch": 2.92, + "learning_rate": 1.0342827582963649e-05, + "loss": 0.0774, + "step": 62635 + }, + { + "epoch": 2.92, + "learning_rate": 1.0342043797908863e-05, + "loss": 0.1522, + "step": 62640 + }, + { + "epoch": 2.92, + "learning_rate": 1.0341260012854075e-05, + "loss": 0.1715, + "step": 62645 + }, + { + "epoch": 2.92, + "learning_rate": 1.034047622779929e-05, + "loss": 0.1394, + "step": 62650 + }, + { + "epoch": 2.92, + "learning_rate": 1.0339692442744503e-05, + "loss": 0.2182, + "step": 62655 + }, + { + "epoch": 2.92, + "learning_rate": 1.0338908657689717e-05, + "loss": 0.3029, + "step": 62660 + }, + { + "epoch": 2.92, + "learning_rate": 1.0338124872634929e-05, + "loss": 0.2364, + "step": 62665 + }, + { + "epoch": 2.92, + "learning_rate": 1.0337341087580144e-05, + "loss": 0.036, + "step": 62670 + }, + { + "epoch": 2.92, + "learning_rate": 1.0336557302525357e-05, + "loss": 0.0416, + "step": 62675 + }, + { + "epoch": 2.92, + "learning_rate": 1.0335773517470569e-05, + "loss": 0.0485, + "step": 62680 + }, + { + "epoch": 2.92, + "learning_rate": 1.0334989732415783e-05, + "loss": 0.0772, + "step": 62685 + }, + { + "epoch": 2.93, + "learning_rate": 1.0334205947360995e-05, + "loss": 0.0545, + "step": 62690 + }, + { + "epoch": 2.93, + "learning_rate": 1.033342216230621e-05, + "loss": 0.0758, + "step": 62695 + }, + { + "epoch": 2.93, + "learning_rate": 1.0332638377251423e-05, + "loss": 0.1142, + "step": 62700 + }, + { + "epoch": 2.93, + "learning_rate": 1.0331854592196637e-05, + "loss": 0.1031, + "step": 62705 + }, + { + "epoch": 2.93, + "learning_rate": 1.0331070807141849e-05, + "loss": 0.3682, + "step": 62710 + }, + { + "epoch": 2.93, + "learning_rate": 1.0330287022087065e-05, + "loss": 0.3997, + "step": 62715 + }, + { + "epoch": 2.93, + "learning_rate": 1.0329503237032277e-05, + "loss": 0.052, + "step": 62720 + }, + { + "epoch": 2.93, + "learning_rate": 1.032871945197749e-05, + "loss": 0.0213, + "step": 62725 + }, + { + "epoch": 2.93, + "learning_rate": 1.0327935666922705e-05, + "loss": 0.0501, + "step": 62730 + }, + { + "epoch": 2.93, + "learning_rate": 1.0327151881867918e-05, + "loss": 0.0711, + "step": 62735 + }, + { + "epoch": 2.93, + "learning_rate": 1.032636809681313e-05, + "loss": 0.1498, + "step": 62740 + }, + { + "epoch": 2.93, + "learning_rate": 1.0325584311758343e-05, + "loss": 0.1179, + "step": 62745 + }, + { + "epoch": 2.93, + "learning_rate": 1.0324800526703559e-05, + "loss": 0.1803, + "step": 62750 + }, + { + "epoch": 2.93, + "learning_rate": 1.032401674164877e-05, + "loss": 0.1619, + "step": 62755 + }, + { + "epoch": 2.93, + "learning_rate": 1.0323232956593985e-05, + "loss": 0.2089, + "step": 62760 + }, + { + "epoch": 2.93, + "learning_rate": 1.0322449171539197e-05, + "loss": 0.4382, + "step": 62765 + }, + { + "epoch": 2.93, + "learning_rate": 1.0321665386484412e-05, + "loss": 0.0535, + "step": 62770 + }, + { + "epoch": 2.93, + "learning_rate": 1.0320881601429625e-05, + "loss": 0.029, + "step": 62775 + }, + { + "epoch": 2.93, + "learning_rate": 1.0320097816374839e-05, + "loss": 0.0429, + "step": 62780 + }, + { + "epoch": 2.93, + "learning_rate": 1.031931403132005e-05, + "loss": 0.0829, + "step": 62785 + }, + { + "epoch": 2.93, + "learning_rate": 1.0318530246265266e-05, + "loss": 0.0953, + "step": 62790 + }, + { + "epoch": 2.93, + "learning_rate": 1.0317746461210479e-05, + "loss": 0.1233, + "step": 62795 + }, + { + "epoch": 2.93, + "learning_rate": 1.0316962676155692e-05, + "loss": 0.0843, + "step": 62800 + }, + { + "epoch": 2.93, + "learning_rate": 1.0316178891100905e-05, + "loss": 0.1054, + "step": 62805 + }, + { + "epoch": 2.93, + "learning_rate": 1.0315395106046117e-05, + "loss": 0.277, + "step": 62810 + }, + { + "epoch": 2.93, + "learning_rate": 1.0314611320991333e-05, + "loss": 0.4017, + "step": 62815 + }, + { + "epoch": 2.93, + "learning_rate": 1.0313827535936545e-05, + "loss": 0.031, + "step": 62820 + }, + { + "epoch": 2.93, + "learning_rate": 1.0313043750881759e-05, + "loss": 0.0184, + "step": 62825 + }, + { + "epoch": 2.93, + "learning_rate": 1.0312259965826973e-05, + "loss": 0.1057, + "step": 62830 + }, + { + "epoch": 2.93, + "learning_rate": 1.0311476180772186e-05, + "loss": 0.1594, + "step": 62835 + }, + { + "epoch": 2.93, + "learning_rate": 1.0310692395717399e-05, + "loss": 0.1388, + "step": 62840 + }, + { + "epoch": 2.93, + "learning_rate": 1.0309908610662613e-05, + "loss": 0.0698, + "step": 62845 + }, + { + "epoch": 2.93, + "learning_rate": 1.0309124825607826e-05, + "loss": 0.2027, + "step": 62850 + }, + { + "epoch": 2.93, + "learning_rate": 1.030834104055304e-05, + "loss": 0.0898, + "step": 62855 + }, + { + "epoch": 2.93, + "learning_rate": 1.0307557255498253e-05, + "loss": 0.2479, + "step": 62860 + }, + { + "epoch": 2.93, + "learning_rate": 1.0306773470443468e-05, + "loss": 0.2501, + "step": 62865 + }, + { + "epoch": 2.93, + "learning_rate": 1.030598968538868e-05, + "loss": 0.0262, + "step": 62870 + }, + { + "epoch": 2.93, + "learning_rate": 1.0305205900333893e-05, + "loss": 0.0397, + "step": 62875 + }, + { + "epoch": 2.93, + "learning_rate": 1.0304422115279107e-05, + "loss": 0.074, + "step": 62880 + }, + { + "epoch": 2.93, + "learning_rate": 1.0303638330224319e-05, + "loss": 0.0701, + "step": 62885 + }, + { + "epoch": 2.93, + "learning_rate": 1.0302854545169534e-05, + "loss": 0.0451, + "step": 62890 + }, + { + "epoch": 2.93, + "learning_rate": 1.0302070760114747e-05, + "loss": 0.1437, + "step": 62895 + }, + { + "epoch": 2.94, + "learning_rate": 1.030128697505996e-05, + "loss": 0.0555, + "step": 62900 + }, + { + "epoch": 2.94, + "learning_rate": 1.0300503190005173e-05, + "loss": 0.1305, + "step": 62905 + }, + { + "epoch": 2.94, + "learning_rate": 1.0299719404950388e-05, + "loss": 0.2862, + "step": 62910 + }, + { + "epoch": 2.94, + "learning_rate": 1.02989356198956e-05, + "loss": 0.2047, + "step": 62915 + }, + { + "epoch": 2.94, + "learning_rate": 1.0298151834840814e-05, + "loss": 0.0395, + "step": 62920 + }, + { + "epoch": 2.94, + "learning_rate": 1.0297368049786027e-05, + "loss": 0.0727, + "step": 62925 + }, + { + "epoch": 2.94, + "learning_rate": 1.0296584264731242e-05, + "loss": 0.0484, + "step": 62930 + }, + { + "epoch": 2.94, + "learning_rate": 1.0295800479676454e-05, + "loss": 0.0634, + "step": 62935 + }, + { + "epoch": 2.94, + "learning_rate": 1.0295016694621667e-05, + "loss": 0.1216, + "step": 62940 + }, + { + "epoch": 2.94, + "learning_rate": 1.0294232909566882e-05, + "loss": 0.0631, + "step": 62945 + }, + { + "epoch": 2.94, + "learning_rate": 1.0293449124512094e-05, + "loss": 0.0701, + "step": 62950 + }, + { + "epoch": 2.94, + "learning_rate": 1.0292665339457308e-05, + "loss": 0.1508, + "step": 62955 + }, + { + "epoch": 2.94, + "learning_rate": 1.029188155440252e-05, + "loss": 0.1583, + "step": 62960 + }, + { + "epoch": 2.94, + "learning_rate": 1.0291097769347736e-05, + "loss": 0.2747, + "step": 62965 + }, + { + "epoch": 2.94, + "learning_rate": 1.0290313984292948e-05, + "loss": 0.027, + "step": 62970 + }, + { + "epoch": 2.94, + "learning_rate": 1.0289530199238162e-05, + "loss": 0.0262, + "step": 62975 + }, + { + "epoch": 2.94, + "learning_rate": 1.0288746414183374e-05, + "loss": 0.0405, + "step": 62980 + }, + { + "epoch": 2.94, + "learning_rate": 1.028796262912859e-05, + "loss": 0.0914, + "step": 62985 + }, + { + "epoch": 2.94, + "learning_rate": 1.0287178844073802e-05, + "loss": 0.0421, + "step": 62990 + }, + { + "epoch": 2.94, + "learning_rate": 1.0286395059019016e-05, + "loss": 0.1202, + "step": 62995 + }, + { + "epoch": 2.94, + "learning_rate": 1.0285611273964228e-05, + "loss": 0.1285, + "step": 63000 + }, + { + "epoch": 2.94, + "learning_rate": 1.028482748890944e-05, + "loss": 0.169, + "step": 63005 + }, + { + "epoch": 2.94, + "learning_rate": 1.0284043703854656e-05, + "loss": 0.1355, + "step": 63010 + }, + { + "epoch": 2.94, + "learning_rate": 1.0283259918799868e-05, + "loss": 0.2761, + "step": 63015 + }, + { + "epoch": 2.94, + "learning_rate": 1.0282476133745082e-05, + "loss": 0.089, + "step": 63020 + }, + { + "epoch": 2.94, + "learning_rate": 1.0281692348690295e-05, + "loss": 0.0571, + "step": 63025 + }, + { + "epoch": 2.94, + "learning_rate": 1.028090856363551e-05, + "loss": 0.0232, + "step": 63030 + }, + { + "epoch": 2.94, + "learning_rate": 1.0280124778580722e-05, + "loss": 0.0644, + "step": 63035 + }, + { + "epoch": 2.94, + "learning_rate": 1.0279340993525936e-05, + "loss": 0.0646, + "step": 63040 + }, + { + "epoch": 2.94, + "learning_rate": 1.027855720847115e-05, + "loss": 0.1479, + "step": 63045 + }, + { + "epoch": 2.94, + "learning_rate": 1.0277773423416364e-05, + "loss": 0.1911, + "step": 63050 + }, + { + "epoch": 2.94, + "learning_rate": 1.0276989638361576e-05, + "loss": 0.1927, + "step": 63055 + }, + { + "epoch": 2.94, + "learning_rate": 1.027620585330679e-05, + "loss": 0.1748, + "step": 63060 + }, + { + "epoch": 2.94, + "learning_rate": 1.0275422068252004e-05, + "loss": 0.2667, + "step": 63065 + }, + { + "epoch": 2.94, + "learning_rate": 1.0274638283197216e-05, + "loss": 0.0506, + "step": 63070 + }, + { + "epoch": 2.94, + "learning_rate": 1.027385449814243e-05, + "loss": 0.0323, + "step": 63075 + }, + { + "epoch": 2.94, + "learning_rate": 1.0273070713087642e-05, + "loss": 0.0294, + "step": 63080 + }, + { + "epoch": 2.94, + "learning_rate": 1.0272286928032858e-05, + "loss": 0.0357, + "step": 63085 + }, + { + "epoch": 2.94, + "learning_rate": 1.027150314297807e-05, + "loss": 0.0781, + "step": 63090 + }, + { + "epoch": 2.94, + "learning_rate": 1.0270719357923284e-05, + "loss": 0.1175, + "step": 63095 + }, + { + "epoch": 2.94, + "learning_rate": 1.0269935572868496e-05, + "loss": 0.1468, + "step": 63100 + }, + { + "epoch": 2.94, + "learning_rate": 1.0269151787813712e-05, + "loss": 0.1011, + "step": 63105 + }, + { + "epoch": 2.94, + "learning_rate": 1.0268368002758924e-05, + "loss": 0.2504, + "step": 63110 + }, + { + "epoch": 2.95, + "learning_rate": 1.0267584217704138e-05, + "loss": 0.3333, + "step": 63115 + }, + { + "epoch": 2.95, + "learning_rate": 1.026680043264935e-05, + "loss": 0.03, + "step": 63120 + }, + { + "epoch": 2.95, + "learning_rate": 1.0266016647594566e-05, + "loss": 0.0314, + "step": 63125 + }, + { + "epoch": 2.95, + "learning_rate": 1.0265232862539778e-05, + "loss": 0.0549, + "step": 63130 + }, + { + "epoch": 2.95, + "learning_rate": 1.026444907748499e-05, + "loss": 0.0711, + "step": 63135 + }, + { + "epoch": 2.95, + "learning_rate": 1.0263665292430204e-05, + "loss": 0.0903, + "step": 63140 + }, + { + "epoch": 2.95, + "learning_rate": 1.0262881507375418e-05, + "loss": 0.1095, + "step": 63145 + }, + { + "epoch": 2.95, + "learning_rate": 1.0262097722320632e-05, + "loss": 0.1305, + "step": 63150 + }, + { + "epoch": 2.95, + "learning_rate": 1.0261313937265844e-05, + "loss": 0.1326, + "step": 63155 + }, + { + "epoch": 2.95, + "learning_rate": 1.026053015221106e-05, + "loss": 0.2379, + "step": 63160 + }, + { + "epoch": 2.95, + "learning_rate": 1.0259746367156272e-05, + "loss": 0.3484, + "step": 63165 + }, + { + "epoch": 2.95, + "learning_rate": 1.0258962582101486e-05, + "loss": 0.0702, + "step": 63170 + }, + { + "epoch": 2.95, + "learning_rate": 1.0258178797046698e-05, + "loss": 0.0214, + "step": 63175 + }, + { + "epoch": 2.95, + "learning_rate": 1.0257395011991914e-05, + "loss": 0.0748, + "step": 63180 + }, + { + "epoch": 2.95, + "learning_rate": 1.0256611226937126e-05, + "loss": 0.046, + "step": 63185 + }, + { + "epoch": 2.95, + "learning_rate": 1.025582744188234e-05, + "loss": 0.0891, + "step": 63190 + }, + { + "epoch": 2.95, + "learning_rate": 1.0255043656827552e-05, + "loss": 0.0839, + "step": 63195 + }, + { + "epoch": 2.95, + "learning_rate": 1.0254259871772764e-05, + "loss": 0.1694, + "step": 63200 + }, + { + "epoch": 2.95, + "learning_rate": 1.025347608671798e-05, + "loss": 0.1068, + "step": 63205 + }, + { + "epoch": 2.95, + "learning_rate": 1.0252692301663192e-05, + "loss": 0.3257, + "step": 63210 + }, + { + "epoch": 2.95, + "learning_rate": 1.0251908516608406e-05, + "loss": 0.21, + "step": 63215 + }, + { + "epoch": 2.95, + "learning_rate": 1.0251124731553618e-05, + "loss": 0.0523, + "step": 63220 + }, + { + "epoch": 2.95, + "learning_rate": 1.0250340946498834e-05, + "loss": 0.0437, + "step": 63225 + }, + { + "epoch": 2.95, + "learning_rate": 1.0249557161444046e-05, + "loss": 0.0957, + "step": 63230 + }, + { + "epoch": 2.95, + "learning_rate": 1.024877337638926e-05, + "loss": 0.0554, + "step": 63235 + }, + { + "epoch": 2.95, + "learning_rate": 1.0247989591334472e-05, + "loss": 0.0969, + "step": 63240 + }, + { + "epoch": 2.95, + "learning_rate": 1.0247205806279688e-05, + "loss": 0.1589, + "step": 63245 + }, + { + "epoch": 2.95, + "learning_rate": 1.02464220212249e-05, + "loss": 0.1167, + "step": 63250 + }, + { + "epoch": 2.95, + "learning_rate": 1.0245638236170114e-05, + "loss": 0.1879, + "step": 63255 + }, + { + "epoch": 2.95, + "learning_rate": 1.0244854451115328e-05, + "loss": 0.1869, + "step": 63260 + }, + { + "epoch": 2.95, + "learning_rate": 1.024407066606054e-05, + "loss": 0.1766, + "step": 63265 + }, + { + "epoch": 2.95, + "learning_rate": 1.0243286881005754e-05, + "loss": 0.0105, + "step": 63270 + }, + { + "epoch": 2.95, + "learning_rate": 1.0242503095950966e-05, + "loss": 0.0834, + "step": 63275 + }, + { + "epoch": 2.95, + "learning_rate": 1.0241719310896182e-05, + "loss": 0.0791, + "step": 63280 + }, + { + "epoch": 2.95, + "learning_rate": 1.0240935525841394e-05, + "loss": 0.0993, + "step": 63285 + }, + { + "epoch": 2.95, + "learning_rate": 1.0240151740786608e-05, + "loss": 0.0776, + "step": 63290 + }, + { + "epoch": 2.95, + "learning_rate": 1.023936795573182e-05, + "loss": 0.1271, + "step": 63295 + }, + { + "epoch": 2.95, + "learning_rate": 1.0238584170677036e-05, + "loss": 0.0777, + "step": 63300 + }, + { + "epoch": 2.95, + "learning_rate": 1.0237800385622248e-05, + "loss": 0.2114, + "step": 63305 + }, + { + "epoch": 2.95, + "learning_rate": 1.0237016600567462e-05, + "loss": 0.2023, + "step": 63310 + }, + { + "epoch": 2.95, + "learning_rate": 1.0236232815512674e-05, + "loss": 0.2993, + "step": 63315 + }, + { + "epoch": 2.95, + "learning_rate": 1.023544903045789e-05, + "loss": 0.0464, + "step": 63320 + }, + { + "epoch": 2.95, + "learning_rate": 1.0234665245403102e-05, + "loss": 0.0378, + "step": 63325 + }, + { + "epoch": 2.96, + "learning_rate": 1.0233881460348314e-05, + "loss": 0.0215, + "step": 63330 + }, + { + "epoch": 2.96, + "learning_rate": 1.0233097675293528e-05, + "loss": 0.0999, + "step": 63335 + }, + { + "epoch": 2.96, + "learning_rate": 1.023231389023874e-05, + "loss": 0.0813, + "step": 63340 + }, + { + "epoch": 2.96, + "learning_rate": 1.0231530105183956e-05, + "loss": 0.0367, + "step": 63345 + }, + { + "epoch": 2.96, + "learning_rate": 1.0230746320129168e-05, + "loss": 0.1402, + "step": 63350 + }, + { + "epoch": 2.96, + "learning_rate": 1.0229962535074382e-05, + "loss": 0.2003, + "step": 63355 + }, + { + "epoch": 2.96, + "learning_rate": 1.0229178750019596e-05, + "loss": 0.1149, + "step": 63360 + }, + { + "epoch": 2.96, + "learning_rate": 1.022839496496481e-05, + "loss": 0.3584, + "step": 63365 + }, + { + "epoch": 2.96, + "learning_rate": 1.0227611179910022e-05, + "loss": 0.1109, + "step": 63370 + }, + { + "epoch": 2.96, + "learning_rate": 1.0226827394855236e-05, + "loss": 0.0422, + "step": 63375 + }, + { + "epoch": 2.96, + "learning_rate": 1.022604360980045e-05, + "loss": 0.0409, + "step": 63380 + }, + { + "epoch": 2.96, + "learning_rate": 1.0225259824745664e-05, + "loss": 0.0294, + "step": 63385 + }, + { + "epoch": 2.96, + "learning_rate": 1.0224476039690876e-05, + "loss": 0.0393, + "step": 63390 + }, + { + "epoch": 2.96, + "learning_rate": 1.0223692254636088e-05, + "loss": 0.0275, + "step": 63395 + }, + { + "epoch": 2.96, + "learning_rate": 1.0222908469581304e-05, + "loss": 0.1272, + "step": 63400 + }, + { + "epoch": 2.96, + "learning_rate": 1.0222124684526516e-05, + "loss": 0.1611, + "step": 63405 + }, + { + "epoch": 2.96, + "learning_rate": 1.022134089947173e-05, + "loss": 0.3513, + "step": 63410 + }, + { + "epoch": 2.96, + "learning_rate": 1.0220557114416942e-05, + "loss": 0.2599, + "step": 63415 + }, + { + "epoch": 2.96, + "learning_rate": 1.0219773329362158e-05, + "loss": 0.0898, + "step": 63420 + }, + { + "epoch": 2.96, + "learning_rate": 1.021898954430737e-05, + "loss": 0.0381, + "step": 63425 + }, + { + "epoch": 2.96, + "learning_rate": 1.0218205759252584e-05, + "loss": 0.0469, + "step": 63430 + }, + { + "epoch": 2.96, + "learning_rate": 1.0217421974197796e-05, + "loss": 0.0635, + "step": 63435 + }, + { + "epoch": 2.96, + "learning_rate": 1.0216638189143011e-05, + "loss": 0.0756, + "step": 63440 + }, + { + "epoch": 2.96, + "learning_rate": 1.0215854404088224e-05, + "loss": 0.0884, + "step": 63445 + }, + { + "epoch": 2.96, + "learning_rate": 1.0215070619033438e-05, + "loss": 0.1282, + "step": 63450 + }, + { + "epoch": 2.96, + "learning_rate": 1.021428683397865e-05, + "loss": 0.1419, + "step": 63455 + }, + { + "epoch": 2.96, + "learning_rate": 1.0213503048923864e-05, + "loss": 0.2619, + "step": 63460 + }, + { + "epoch": 2.96, + "learning_rate": 1.0212719263869078e-05, + "loss": 0.2593, + "step": 63465 + }, + { + "epoch": 2.96, + "learning_rate": 1.021193547881429e-05, + "loss": 0.0369, + "step": 63470 + }, + { + "epoch": 2.96, + "learning_rate": 1.0211151693759505e-05, + "loss": 0.0356, + "step": 63475 + }, + { + "epoch": 2.96, + "learning_rate": 1.0210367908704718e-05, + "loss": 0.0794, + "step": 63480 + }, + { + "epoch": 2.96, + "learning_rate": 1.0209584123649932e-05, + "loss": 0.0407, + "step": 63485 + }, + { + "epoch": 2.96, + "learning_rate": 1.0208800338595144e-05, + "loss": 0.0817, + "step": 63490 + }, + { + "epoch": 2.96, + "learning_rate": 1.020801655354036e-05, + "loss": 0.0535, + "step": 63495 + }, + { + "epoch": 2.96, + "learning_rate": 1.0207232768485572e-05, + "loss": 0.1159, + "step": 63500 + }, + { + "epoch": 2.96, + "learning_rate": 1.0206448983430785e-05, + "loss": 0.2946, + "step": 63505 + }, + { + "epoch": 2.96, + "learning_rate": 1.0205665198375998e-05, + "loss": 0.2417, + "step": 63510 + }, + { + "epoch": 2.96, + "learning_rate": 1.0204881413321213e-05, + "loss": 0.226, + "step": 63515 + }, + { + "epoch": 2.96, + "learning_rate": 1.0204097628266425e-05, + "loss": 0.0084, + "step": 63520 + }, + { + "epoch": 2.96, + "learning_rate": 1.0203313843211638e-05, + "loss": 0.041, + "step": 63525 + }, + { + "epoch": 2.96, + "learning_rate": 1.0202530058156852e-05, + "loss": 0.0369, + "step": 63530 + }, + { + "epoch": 2.96, + "learning_rate": 1.0201746273102064e-05, + "loss": 0.0958, + "step": 63535 + }, + { + "epoch": 2.96, + "learning_rate": 1.020096248804728e-05, + "loss": 0.1259, + "step": 63540 + }, + { + "epoch": 2.97, + "learning_rate": 1.0200178702992492e-05, + "loss": 0.1039, + "step": 63545 + }, + { + "epoch": 2.97, + "learning_rate": 1.0199394917937706e-05, + "loss": 0.1852, + "step": 63550 + }, + { + "epoch": 2.97, + "learning_rate": 1.0198611132882918e-05, + "loss": 0.1922, + "step": 63555 + }, + { + "epoch": 2.97, + "learning_rate": 1.0197827347828133e-05, + "loss": 0.2911, + "step": 63560 + }, + { + "epoch": 2.97, + "learning_rate": 1.0197043562773346e-05, + "loss": 0.2036, + "step": 63565 + }, + { + "epoch": 2.97, + "learning_rate": 1.019625977771856e-05, + "loss": 0.0676, + "step": 63570 + }, + { + "epoch": 2.97, + "learning_rate": 1.0195475992663773e-05, + "loss": 0.0323, + "step": 63575 + }, + { + "epoch": 2.97, + "learning_rate": 1.0194692207608987e-05, + "loss": 0.0438, + "step": 63580 + }, + { + "epoch": 2.97, + "learning_rate": 1.01939084225542e-05, + "loss": 0.0627, + "step": 63585 + }, + { + "epoch": 2.97, + "learning_rate": 1.0193124637499412e-05, + "loss": 0.1465, + "step": 63590 + }, + { + "epoch": 2.97, + "learning_rate": 1.0192340852444627e-05, + "loss": 0.0959, + "step": 63595 + }, + { + "epoch": 2.97, + "learning_rate": 1.019155706738984e-05, + "loss": 0.0919, + "step": 63600 + }, + { + "epoch": 2.97, + "learning_rate": 1.0190773282335053e-05, + "loss": 0.194, + "step": 63605 + }, + { + "epoch": 2.97, + "learning_rate": 1.0189989497280266e-05, + "loss": 0.1797, + "step": 63610 + }, + { + "epoch": 2.97, + "learning_rate": 1.0189205712225481e-05, + "loss": 0.2537, + "step": 63615 + }, + { + "epoch": 2.97, + "learning_rate": 1.0188421927170693e-05, + "loss": 0.0516, + "step": 63620 + }, + { + "epoch": 2.97, + "learning_rate": 1.0187638142115907e-05, + "loss": 0.0706, + "step": 63625 + }, + { + "epoch": 2.97, + "learning_rate": 1.018685435706112e-05, + "loss": 0.0674, + "step": 63630 + }, + { + "epoch": 2.97, + "learning_rate": 1.0186070572006335e-05, + "loss": 0.0511, + "step": 63635 + }, + { + "epoch": 2.97, + "learning_rate": 1.0185286786951547e-05, + "loss": 0.0479, + "step": 63640 + }, + { + "epoch": 2.97, + "learning_rate": 1.0184503001896761e-05, + "loss": 0.1024, + "step": 63645 + }, + { + "epoch": 2.97, + "learning_rate": 1.0183719216841973e-05, + "loss": 0.0979, + "step": 63650 + }, + { + "epoch": 2.97, + "learning_rate": 1.0182935431787186e-05, + "loss": 0.1912, + "step": 63655 + }, + { + "epoch": 2.97, + "learning_rate": 1.0182151646732401e-05, + "loss": 0.23, + "step": 63660 + }, + { + "epoch": 2.97, + "learning_rate": 1.0181367861677613e-05, + "loss": 0.3174, + "step": 63665 + }, + { + "epoch": 2.97, + "learning_rate": 1.0180584076622827e-05, + "loss": 0.0348, + "step": 63670 + }, + { + "epoch": 2.97, + "learning_rate": 1.0179800291568041e-05, + "loss": 0.0672, + "step": 63675 + }, + { + "epoch": 2.97, + "learning_rate": 1.0179016506513255e-05, + "loss": 0.0229, + "step": 63680 + }, + { + "epoch": 2.97, + "learning_rate": 1.0178232721458467e-05, + "loss": 0.0771, + "step": 63685 + }, + { + "epoch": 2.97, + "learning_rate": 1.0177448936403681e-05, + "loss": 0.114, + "step": 63690 + }, + { + "epoch": 2.97, + "learning_rate": 1.0176665151348895e-05, + "loss": 0.0955, + "step": 63695 + }, + { + "epoch": 2.97, + "learning_rate": 1.0175881366294109e-05, + "loss": 0.0727, + "step": 63700 + }, + { + "epoch": 2.97, + "learning_rate": 1.0175097581239321e-05, + "loss": 0.0753, + "step": 63705 + }, + { + "epoch": 2.97, + "learning_rate": 1.0174313796184537e-05, + "loss": 0.1925, + "step": 63710 + }, + { + "epoch": 2.97, + "learning_rate": 1.0173530011129749e-05, + "loss": 0.2355, + "step": 63715 + }, + { + "epoch": 2.97, + "learning_rate": 1.0172746226074961e-05, + "loss": 0.0492, + "step": 63720 + }, + { + "epoch": 2.97, + "learning_rate": 1.0171962441020175e-05, + "loss": 0.033, + "step": 63725 + }, + { + "epoch": 2.97, + "learning_rate": 1.0171178655965387e-05, + "loss": 0.0612, + "step": 63730 + }, + { + "epoch": 2.97, + "learning_rate": 1.0170394870910603e-05, + "loss": 0.0285, + "step": 63735 + }, + { + "epoch": 2.97, + "learning_rate": 1.0169611085855815e-05, + "loss": 0.0306, + "step": 63740 + }, + { + "epoch": 2.97, + "learning_rate": 1.016882730080103e-05, + "loss": 0.0759, + "step": 63745 + }, + { + "epoch": 2.97, + "learning_rate": 1.0168043515746241e-05, + "loss": 0.1336, + "step": 63750 + }, + { + "epoch": 2.97, + "learning_rate": 1.0167259730691457e-05, + "loss": 0.2051, + "step": 63755 + }, + { + "epoch": 2.98, + "learning_rate": 1.016647594563667e-05, + "loss": 0.1794, + "step": 63760 + }, + { + "epoch": 2.98, + "learning_rate": 1.0165692160581883e-05, + "loss": 0.2222, + "step": 63765 + }, + { + "epoch": 2.98, + "learning_rate": 1.0164908375527095e-05, + "loss": 0.0448, + "step": 63770 + }, + { + "epoch": 2.98, + "learning_rate": 1.0164124590472311e-05, + "loss": 0.1489, + "step": 63775 + }, + { + "epoch": 2.98, + "learning_rate": 1.0163340805417523e-05, + "loss": 0.0714, + "step": 63780 + }, + { + "epoch": 2.98, + "learning_rate": 1.0162557020362735e-05, + "loss": 0.1082, + "step": 63785 + }, + { + "epoch": 2.98, + "learning_rate": 1.0161773235307951e-05, + "loss": 0.0898, + "step": 63790 + }, + { + "epoch": 2.98, + "learning_rate": 1.0160989450253163e-05, + "loss": 0.0668, + "step": 63795 + }, + { + "epoch": 2.98, + "learning_rate": 1.0160205665198377e-05, + "loss": 0.0864, + "step": 63800 + }, + { + "epoch": 2.98, + "learning_rate": 1.015942188014359e-05, + "loss": 0.1504, + "step": 63805 + }, + { + "epoch": 2.98, + "learning_rate": 1.0158638095088805e-05, + "loss": 0.1931, + "step": 63810 + }, + { + "epoch": 2.98, + "learning_rate": 1.0157854310034017e-05, + "loss": 0.1701, + "step": 63815 + }, + { + "epoch": 2.98, + "learning_rate": 1.0157070524979231e-05, + "loss": 0.0198, + "step": 63820 + }, + { + "epoch": 2.98, + "learning_rate": 1.0156286739924443e-05, + "loss": 0.064, + "step": 63825 + }, + { + "epoch": 2.98, + "learning_rate": 1.0155502954869659e-05, + "loss": 0.0866, + "step": 63830 + }, + { + "epoch": 2.98, + "learning_rate": 1.0154719169814871e-05, + "loss": 0.0517, + "step": 63835 + }, + { + "epoch": 2.98, + "learning_rate": 1.0153935384760085e-05, + "loss": 0.0199, + "step": 63840 + }, + { + "epoch": 2.98, + "learning_rate": 1.0153151599705297e-05, + "loss": 0.1171, + "step": 63845 + }, + { + "epoch": 2.98, + "learning_rate": 1.015236781465051e-05, + "loss": 0.1281, + "step": 63850 + }, + { + "epoch": 2.98, + "learning_rate": 1.0151584029595725e-05, + "loss": 0.1711, + "step": 63855 + }, + { + "epoch": 2.98, + "learning_rate": 1.0150800244540937e-05, + "loss": 0.1402, + "step": 63860 + }, + { + "epoch": 2.98, + "learning_rate": 1.0150016459486151e-05, + "loss": 0.2753, + "step": 63865 + }, + { + "epoch": 2.98, + "learning_rate": 1.0149232674431363e-05, + "loss": 0.0264, + "step": 63870 + }, + { + "epoch": 2.98, + "learning_rate": 1.0148448889376579e-05, + "loss": 0.0742, + "step": 63875 + }, + { + "epoch": 2.98, + "learning_rate": 1.0147665104321791e-05, + "loss": 0.0883, + "step": 63880 + }, + { + "epoch": 2.98, + "learning_rate": 1.0146881319267005e-05, + "loss": 0.059, + "step": 63885 + }, + { + "epoch": 2.98, + "learning_rate": 1.0146097534212219e-05, + "loss": 0.0566, + "step": 63890 + }, + { + "epoch": 2.98, + "learning_rate": 1.0145313749157433e-05, + "loss": 0.2492, + "step": 63895 + }, + { + "epoch": 2.98, + "learning_rate": 1.0144529964102645e-05, + "loss": 0.1694, + "step": 63900 + }, + { + "epoch": 2.98, + "learning_rate": 1.0143746179047859e-05, + "loss": 0.1478, + "step": 63905 + }, + { + "epoch": 2.98, + "learning_rate": 1.0142962393993073e-05, + "loss": 0.2933, + "step": 63910 + }, + { + "epoch": 2.98, + "learning_rate": 1.0142178608938285e-05, + "loss": 0.3151, + "step": 63915 + }, + { + "epoch": 2.98, + "learning_rate": 1.0141394823883499e-05, + "loss": 0.0621, + "step": 63920 + }, + { + "epoch": 2.98, + "learning_rate": 1.0140611038828711e-05, + "loss": 0.0457, + "step": 63925 + }, + { + "epoch": 2.98, + "learning_rate": 1.0139827253773927e-05, + "loss": 0.0642, + "step": 63930 + }, + { + "epoch": 2.98, + "learning_rate": 1.0139043468719139e-05, + "loss": 0.091, + "step": 63935 + }, + { + "epoch": 2.98, + "learning_rate": 1.0138259683664353e-05, + "loss": 0.078, + "step": 63940 + }, + { + "epoch": 2.98, + "learning_rate": 1.0137475898609565e-05, + "loss": 0.0961, + "step": 63945 + }, + { + "epoch": 2.98, + "learning_rate": 1.013669211355478e-05, + "loss": 0.212, + "step": 63950 + }, + { + "epoch": 2.98, + "learning_rate": 1.0135908328499993e-05, + "loss": 0.1122, + "step": 63955 + }, + { + "epoch": 2.98, + "learning_rate": 1.0135124543445207e-05, + "loss": 0.1807, + "step": 63960 + }, + { + "epoch": 2.98, + "learning_rate": 1.0134340758390419e-05, + "loss": 0.216, + "step": 63965 + }, + { + "epoch": 2.98, + "learning_rate": 1.0133556973335635e-05, + "loss": 0.0358, + "step": 63970 + }, + { + "epoch": 2.99, + "learning_rate": 1.0132773188280847e-05, + "loss": 0.291, + "step": 63975 + }, + { + "epoch": 2.99, + "learning_rate": 1.0131989403226059e-05, + "loss": 0.0242, + "step": 63980 + }, + { + "epoch": 2.99, + "learning_rate": 1.0131205618171273e-05, + "loss": 0.1145, + "step": 63985 + }, + { + "epoch": 2.99, + "learning_rate": 1.0130421833116487e-05, + "loss": 0.0473, + "step": 63990 + }, + { + "epoch": 2.99, + "learning_rate": 1.01296380480617e-05, + "loss": 0.1068, + "step": 63995 + }, + { + "epoch": 2.99, + "learning_rate": 1.0128854263006913e-05, + "loss": 0.0515, + "step": 64000 + }, + { + "epoch": 2.99, + "learning_rate": 1.0128070477952127e-05, + "loss": 0.1869, + "step": 64005 + }, + { + "epoch": 2.99, + "learning_rate": 1.012728669289734e-05, + "loss": 0.2932, + "step": 64010 + }, + { + "epoch": 2.99, + "learning_rate": 1.0126502907842555e-05, + "loss": 0.3333, + "step": 64015 + }, + { + "epoch": 2.99, + "learning_rate": 1.0125719122787767e-05, + "loss": 0.0225, + "step": 64020 + }, + { + "epoch": 2.99, + "learning_rate": 1.0124935337732983e-05, + "loss": 0.0358, + "step": 64025 + }, + { + "epoch": 2.99, + "learning_rate": 1.0124151552678195e-05, + "loss": 0.0832, + "step": 64030 + }, + { + "epoch": 2.99, + "learning_rate": 1.0123367767623409e-05, + "loss": 0.0553, + "step": 64035 + }, + { + "epoch": 2.99, + "learning_rate": 1.012258398256862e-05, + "loss": 0.0834, + "step": 64040 + }, + { + "epoch": 2.99, + "learning_rate": 1.0121800197513833e-05, + "loss": 0.1044, + "step": 64045 + }, + { + "epoch": 2.99, + "learning_rate": 1.0121016412459049e-05, + "loss": 0.1887, + "step": 64050 + }, + { + "epoch": 2.99, + "learning_rate": 1.0120232627404261e-05, + "loss": 0.2406, + "step": 64055 + }, + { + "epoch": 2.99, + "learning_rate": 1.0119448842349475e-05, + "loss": 0.1824, + "step": 64060 + }, + { + "epoch": 2.99, + "learning_rate": 1.0118665057294687e-05, + "loss": 0.3448, + "step": 64065 + }, + { + "epoch": 2.99, + "learning_rate": 1.0117881272239903e-05, + "loss": 0.0214, + "step": 64070 + }, + { + "epoch": 2.99, + "learning_rate": 1.0117097487185115e-05, + "loss": 0.0121, + "step": 64075 + }, + { + "epoch": 2.99, + "learning_rate": 1.0116313702130329e-05, + "loss": 0.0553, + "step": 64080 + }, + { + "epoch": 2.99, + "learning_rate": 1.0115529917075541e-05, + "loss": 0.0827, + "step": 64085 + }, + { + "epoch": 2.99, + "learning_rate": 1.0114746132020757e-05, + "loss": 0.0886, + "step": 64090 + }, + { + "epoch": 2.99, + "learning_rate": 1.0113962346965969e-05, + "loss": 0.0533, + "step": 64095 + }, + { + "epoch": 2.99, + "learning_rate": 1.0113178561911183e-05, + "loss": 0.143, + "step": 64100 + }, + { + "epoch": 2.99, + "learning_rate": 1.0112394776856397e-05, + "loss": 0.1127, + "step": 64105 + }, + { + "epoch": 2.99, + "learning_rate": 1.0111610991801609e-05, + "loss": 0.2126, + "step": 64110 + }, + { + "epoch": 2.99, + "learning_rate": 1.0110827206746823e-05, + "loss": 0.2579, + "step": 64115 + }, + { + "epoch": 2.99, + "learning_rate": 1.0110043421692035e-05, + "loss": 0.0479, + "step": 64120 + }, + { + "epoch": 2.99, + "learning_rate": 1.010925963663725e-05, + "loss": 0.0203, + "step": 64125 + }, + { + "epoch": 2.99, + "learning_rate": 1.0108475851582463e-05, + "loss": 0.031, + "step": 64130 + }, + { + "epoch": 2.99, + "learning_rate": 1.0107692066527677e-05, + "loss": 0.1182, + "step": 64135 + }, + { + "epoch": 2.99, + "learning_rate": 1.0106908281472889e-05, + "loss": 0.059, + "step": 64140 + }, + { + "epoch": 2.99, + "learning_rate": 1.0106124496418104e-05, + "loss": 0.1079, + "step": 64145 + }, + { + "epoch": 2.99, + "learning_rate": 1.0105340711363317e-05, + "loss": 0.0826, + "step": 64150 + }, + { + "epoch": 2.99, + "learning_rate": 1.010455692630853e-05, + "loss": 0.1238, + "step": 64155 + }, + { + "epoch": 2.99, + "learning_rate": 1.0103773141253743e-05, + "loss": 0.1699, + "step": 64160 + }, + { + "epoch": 2.99, + "learning_rate": 1.0102989356198958e-05, + "loss": 0.224, + "step": 64165 + }, + { + "epoch": 2.99, + "learning_rate": 1.010220557114417e-05, + "loss": 0.0326, + "step": 64170 + }, + { + "epoch": 2.99, + "learning_rate": 1.0101421786089383e-05, + "loss": 0.061, + "step": 64175 + }, + { + "epoch": 2.99, + "learning_rate": 1.0100638001034597e-05, + "loss": 0.0567, + "step": 64180 + }, + { + "epoch": 2.99, + "learning_rate": 1.0099854215979809e-05, + "loss": 0.0547, + "step": 64185 + }, + { + "epoch": 3.0, + "learning_rate": 1.0099070430925024e-05, + "loss": 0.0667, + "step": 64190 + }, + { + "epoch": 3.0, + "learning_rate": 1.0098286645870237e-05, + "loss": 0.1063, + "step": 64195 + }, + { + "epoch": 3.0, + "learning_rate": 1.009750286081545e-05, + "loss": 0.1263, + "step": 64200 + }, + { + "epoch": 3.0, + "learning_rate": 1.0096719075760664e-05, + "loss": 0.1766, + "step": 64205 + }, + { + "epoch": 3.0, + "learning_rate": 1.0095935290705878e-05, + "loss": 0.2074, + "step": 64210 + }, + { + "epoch": 3.0, + "learning_rate": 1.009515150565109e-05, + "loss": 0.2975, + "step": 64215 + }, + { + "epoch": 3.0, + "learning_rate": 1.0094367720596305e-05, + "loss": 0.0328, + "step": 64220 + }, + { + "epoch": 3.0, + "learning_rate": 1.0093583935541518e-05, + "loss": 0.0301, + "step": 64225 + }, + { + "epoch": 3.0, + "learning_rate": 1.0092800150486732e-05, + "loss": 0.051, + "step": 64230 + }, + { + "epoch": 3.0, + "learning_rate": 1.0092016365431945e-05, + "loss": 0.0794, + "step": 64235 + }, + { + "epoch": 3.0, + "learning_rate": 1.0091232580377157e-05, + "loss": 0.2081, + "step": 64240 + }, + { + "epoch": 3.0, + "learning_rate": 1.0090448795322372e-05, + "loss": 0.0887, + "step": 64245 + }, + { + "epoch": 3.0, + "learning_rate": 1.0089665010267585e-05, + "loss": 0.0939, + "step": 64250 + }, + { + "epoch": 3.0, + "learning_rate": 1.0088881225212798e-05, + "loss": 0.152, + "step": 64255 + }, + { + "epoch": 3.0, + "learning_rate": 1.008809744015801e-05, + "loss": 0.1754, + "step": 64260 + }, + { + "epoch": 3.0, + "learning_rate": 1.0087313655103226e-05, + "loss": 0.2495, + "step": 64265 + }, + { + "epoch": 3.0, + "learning_rate": 1.0086529870048438e-05, + "loss": 0.0218, + "step": 64270 + }, + { + "epoch": 3.0, + "learning_rate": 1.0085746084993652e-05, + "loss": 0.0405, + "step": 64275 + }, + { + "epoch": 3.0, + "learning_rate": 1.0084962299938865e-05, + "loss": 0.0995, + "step": 64280 + }, + { + "epoch": 3.0, + "learning_rate": 1.008417851488408e-05, + "loss": 0.1217, + "step": 64285 + }, + { + "epoch": 3.0, + "learning_rate": 1.0083394729829292e-05, + "loss": 0.1712, + "step": 64290 + }, + { + "epoch": 3.0, + "eval_cer": 0.013233947127380689, + "eval_loss": 0.03575053811073303, + "eval_runtime": 477.3319, + "eval_samples_per_second": 39.909, + "eval_steps_per_second": 4.99, + "eval_wer": 0.11354552183567727, + "step": 64293 + }, + { + "epoch": 3.0, + "learning_rate": 1.0082610944774506e-05, + "loss": 0.1887, + "step": 64295 + }, + { + "epoch": 3.0, + "learning_rate": 1.0081827159719719e-05, + "loss": 0.0727, + "step": 64300 + }, + { + "epoch": 3.0, + "learning_rate": 1.0081043374664932e-05, + "loss": 0.0216, + "step": 64305 + }, + { + "epoch": 3.0, + "learning_rate": 1.0080259589610146e-05, + "loss": 0.0405, + "step": 64310 + }, + { + "epoch": 3.0, + "learning_rate": 1.0079475804555359e-05, + "loss": 0.083, + "step": 64315 + }, + { + "epoch": 3.0, + "learning_rate": 1.0078692019500574e-05, + "loss": 0.0631, + "step": 64320 + }, + { + "epoch": 3.0, + "learning_rate": 1.0077908234445786e-05, + "loss": 0.0888, + "step": 64325 + }, + { + "epoch": 3.0, + "learning_rate": 1.0077124449391e-05, + "loss": 0.2566, + "step": 64330 + }, + { + "epoch": 3.0, + "learning_rate": 1.0076340664336212e-05, + "loss": 0.1636, + "step": 64335 + }, + { + "epoch": 3.0, + "learning_rate": 1.0075556879281428e-05, + "loss": 0.1579, + "step": 64340 + }, + { + "epoch": 3.0, + "learning_rate": 1.007477309422664e-05, + "loss": 0.2837, + "step": 64345 + }, + { + "epoch": 3.0, + "learning_rate": 1.0073989309171854e-05, + "loss": 0.1294, + "step": 64350 + }, + { + "epoch": 3.0, + "learning_rate": 1.0073205524117066e-05, + "loss": 0.038, + "step": 64355 + }, + { + "epoch": 3.0, + "learning_rate": 1.0072421739062282e-05, + "loss": 0.0945, + "step": 64360 + }, + { + "epoch": 3.0, + "learning_rate": 1.0071637954007494e-05, + "loss": 0.0394, + "step": 64365 + }, + { + "epoch": 3.0, + "learning_rate": 1.0070854168952706e-05, + "loss": 0.0792, + "step": 64370 + }, + { + "epoch": 3.0, + "learning_rate": 1.007007038389792e-05, + "loss": 0.114, + "step": 64375 + }, + { + "epoch": 3.0, + "learning_rate": 1.0069286598843133e-05, + "loss": 0.0888, + "step": 64380 + }, + { + "epoch": 3.0, + "learning_rate": 1.0068502813788348e-05, + "loss": 0.1336, + "step": 64385 + }, + { + "epoch": 3.0, + "learning_rate": 1.006771902873356e-05, + "loss": 0.2821, + "step": 64390 + }, + { + "epoch": 3.0, + "learning_rate": 1.0066935243678774e-05, + "loss": 0.2377, + "step": 64395 + }, + { + "epoch": 3.0, + "learning_rate": 1.0066151458623986e-05, + "loss": 0.1294, + "step": 64400 + }, + { + "epoch": 3.01, + "learning_rate": 1.0065367673569202e-05, + "loss": 0.0552, + "step": 64405 + }, + { + "epoch": 3.01, + "learning_rate": 1.0064583888514414e-05, + "loss": 0.0234, + "step": 64410 + }, + { + "epoch": 3.01, + "learning_rate": 1.0063800103459628e-05, + "loss": 0.0932, + "step": 64415 + }, + { + "epoch": 3.01, + "learning_rate": 1.0063016318404842e-05, + "loss": 0.1105, + "step": 64420 + }, + { + "epoch": 3.01, + "learning_rate": 1.0062232533350056e-05, + "loss": 0.0516, + "step": 64425 + }, + { + "epoch": 3.01, + "learning_rate": 1.0061448748295268e-05, + "loss": 0.2342, + "step": 64430 + }, + { + "epoch": 3.01, + "learning_rate": 1.006066496324048e-05, + "loss": 0.1758, + "step": 64435 + }, + { + "epoch": 3.01, + "learning_rate": 1.0059881178185696e-05, + "loss": 0.1461, + "step": 64440 + }, + { + "epoch": 3.01, + "learning_rate": 1.0059097393130908e-05, + "loss": 0.3334, + "step": 64445 + }, + { + "epoch": 3.01, + "learning_rate": 1.0058313608076122e-05, + "loss": 0.0562, + "step": 64450 + }, + { + "epoch": 3.01, + "learning_rate": 1.0057529823021334e-05, + "loss": 0.0572, + "step": 64455 + }, + { + "epoch": 3.01, + "learning_rate": 1.005674603796655e-05, + "loss": 0.0752, + "step": 64460 + }, + { + "epoch": 3.01, + "learning_rate": 1.0055962252911762e-05, + "loss": 0.0533, + "step": 64465 + }, + { + "epoch": 3.01, + "learning_rate": 1.0055178467856976e-05, + "loss": 0.0731, + "step": 64470 + }, + { + "epoch": 3.01, + "learning_rate": 1.0054394682802188e-05, + "loss": 0.0838, + "step": 64475 + }, + { + "epoch": 3.01, + "learning_rate": 1.0053610897747404e-05, + "loss": 0.0864, + "step": 64480 + }, + { + "epoch": 3.01, + "learning_rate": 1.0052827112692616e-05, + "loss": 0.2098, + "step": 64485 + }, + { + "epoch": 3.01, + "learning_rate": 1.005204332763783e-05, + "loss": 0.1538, + "step": 64490 + }, + { + "epoch": 3.01, + "learning_rate": 1.0051259542583042e-05, + "loss": 0.2549, + "step": 64495 + }, + { + "epoch": 3.01, + "learning_rate": 1.0050475757528254e-05, + "loss": 0.1237, + "step": 64500 + }, + { + "epoch": 3.01, + "learning_rate": 1.004969197247347e-05, + "loss": 0.0369, + "step": 64505 + }, + { + "epoch": 3.01, + "learning_rate": 1.0048908187418682e-05, + "loss": 0.0278, + "step": 64510 + }, + { + "epoch": 3.01, + "learning_rate": 1.0048124402363896e-05, + "loss": 0.074, + "step": 64515 + }, + { + "epoch": 3.01, + "learning_rate": 1.004734061730911e-05, + "loss": 0.0517, + "step": 64520 + }, + { + "epoch": 3.01, + "learning_rate": 1.0046556832254324e-05, + "loss": 0.0991, + "step": 64525 + }, + { + "epoch": 3.01, + "learning_rate": 1.0045773047199536e-05, + "loss": 0.1158, + "step": 64530 + }, + { + "epoch": 3.01, + "learning_rate": 1.004498926214475e-05, + "loss": 0.1029, + "step": 64535 + }, + { + "epoch": 3.01, + "learning_rate": 1.0044205477089964e-05, + "loss": 0.1561, + "step": 64540 + }, + { + "epoch": 3.01, + "learning_rate": 1.0043421692035178e-05, + "loss": 0.2435, + "step": 64545 + }, + { + "epoch": 3.01, + "learning_rate": 1.004263790698039e-05, + "loss": 0.0767, + "step": 64550 + }, + { + "epoch": 3.01, + "learning_rate": 1.0041854121925606e-05, + "loss": 0.0818, + "step": 64555 + }, + { + "epoch": 3.01, + "learning_rate": 1.0041070336870818e-05, + "loss": 0.0948, + "step": 64560 + }, + { + "epoch": 3.01, + "learning_rate": 1.004028655181603e-05, + "loss": 0.0388, + "step": 64565 + }, + { + "epoch": 3.01, + "learning_rate": 1.0039502766761244e-05, + "loss": 0.0989, + "step": 64570 + }, + { + "epoch": 3.01, + "learning_rate": 1.0038718981706456e-05, + "loss": 0.1033, + "step": 64575 + }, + { + "epoch": 3.01, + "learning_rate": 1.0037935196651672e-05, + "loss": 0.0571, + "step": 64580 + }, + { + "epoch": 3.01, + "learning_rate": 1.0037151411596884e-05, + "loss": 0.1396, + "step": 64585 + }, + { + "epoch": 3.01, + "learning_rate": 1.0036367626542098e-05, + "loss": 0.1573, + "step": 64590 + }, + { + "epoch": 3.01, + "learning_rate": 1.003558384148731e-05, + "loss": 0.2187, + "step": 64595 + }, + { + "epoch": 3.01, + "learning_rate": 1.0034800056432526e-05, + "loss": 0.0839, + "step": 64600 + }, + { + "epoch": 3.01, + "learning_rate": 1.0034016271377738e-05, + "loss": 0.0647, + "step": 64605 + }, + { + "epoch": 3.01, + "learning_rate": 1.0033232486322952e-05, + "loss": 0.0414, + "step": 64610 + }, + { + "epoch": 3.02, + "learning_rate": 1.0032448701268164e-05, + "loss": 0.0753, + "step": 64615 + }, + { + "epoch": 3.02, + "learning_rate": 1.003166491621338e-05, + "loss": 0.0433, + "step": 64620 + }, + { + "epoch": 3.02, + "learning_rate": 1.0030881131158592e-05, + "loss": 0.0816, + "step": 64625 + }, + { + "epoch": 3.02, + "learning_rate": 1.0030097346103804e-05, + "loss": 0.0984, + "step": 64630 + }, + { + "epoch": 3.02, + "learning_rate": 1.002931356104902e-05, + "loss": 0.1363, + "step": 64635 + }, + { + "epoch": 3.02, + "learning_rate": 1.002868653300519e-05, + "loss": 0.1344, + "step": 64640 + }, + { + "epoch": 3.02, + "learning_rate": 1.0027902747950402e-05, + "loss": 0.2421, + "step": 64645 + }, + { + "epoch": 3.02, + "learning_rate": 1.0027118962895616e-05, + "loss": 0.1472, + "step": 64650 + }, + { + "epoch": 3.02, + "learning_rate": 1.0026335177840828e-05, + "loss": 0.0602, + "step": 64655 + }, + { + "epoch": 3.02, + "learning_rate": 1.0025551392786044e-05, + "loss": 0.0546, + "step": 64660 + }, + { + "epoch": 3.02, + "learning_rate": 1.0024767607731256e-05, + "loss": 0.04, + "step": 64665 + }, + { + "epoch": 3.02, + "learning_rate": 1.002398382267647e-05, + "loss": 0.0398, + "step": 64670 + }, + { + "epoch": 3.02, + "learning_rate": 1.0023200037621682e-05, + "loss": 0.0285, + "step": 64675 + }, + { + "epoch": 3.02, + "learning_rate": 1.0022416252566898e-05, + "loss": 0.134, + "step": 64680 + }, + { + "epoch": 3.02, + "learning_rate": 1.002163246751211e-05, + "loss": 0.0968, + "step": 64685 + }, + { + "epoch": 3.02, + "learning_rate": 1.0020848682457324e-05, + "loss": 0.2418, + "step": 64690 + }, + { + "epoch": 3.02, + "learning_rate": 1.0020064897402536e-05, + "loss": 0.3335, + "step": 64695 + }, + { + "epoch": 3.02, + "learning_rate": 1.001928111234775e-05, + "loss": 0.065, + "step": 64700 + }, + { + "epoch": 3.02, + "learning_rate": 1.0018497327292964e-05, + "loss": 0.0142, + "step": 64705 + }, + { + "epoch": 3.02, + "learning_rate": 1.0017713542238176e-05, + "loss": 0.0253, + "step": 64710 + }, + { + "epoch": 3.02, + "learning_rate": 1.0016929757183392e-05, + "loss": 0.0308, + "step": 64715 + }, + { + "epoch": 3.02, + "learning_rate": 1.0016145972128604e-05, + "loss": 0.0628, + "step": 64720 + }, + { + "epoch": 3.02, + "learning_rate": 1.0015362187073818e-05, + "loss": 0.1409, + "step": 64725 + }, + { + "epoch": 3.02, + "learning_rate": 1.001457840201903e-05, + "loss": 0.0527, + "step": 64730 + }, + { + "epoch": 3.02, + "learning_rate": 1.0013794616964246e-05, + "loss": 0.1638, + "step": 64735 + }, + { + "epoch": 3.02, + "learning_rate": 1.0013010831909458e-05, + "loss": 0.1495, + "step": 64740 + }, + { + "epoch": 3.02, + "learning_rate": 1.0012227046854672e-05, + "loss": 0.3142, + "step": 64745 + }, + { + "epoch": 3.02, + "learning_rate": 1.0011443261799884e-05, + "loss": 0.0679, + "step": 64750 + }, + { + "epoch": 3.02, + "learning_rate": 1.00106594767451e-05, + "loss": 0.0159, + "step": 64755 + }, + { + "epoch": 3.02, + "learning_rate": 1.0009875691690312e-05, + "loss": 0.0669, + "step": 64760 + }, + { + "epoch": 3.02, + "learning_rate": 1.0009091906635524e-05, + "loss": 0.0254, + "step": 64765 + }, + { + "epoch": 3.02, + "learning_rate": 1.0008308121580738e-05, + "loss": 0.0483, + "step": 64770 + }, + { + "epoch": 3.02, + "learning_rate": 1.000752433652595e-05, + "loss": 0.0889, + "step": 64775 + }, + { + "epoch": 3.02, + "learning_rate": 1.0006740551471166e-05, + "loss": 0.0646, + "step": 64780 + }, + { + "epoch": 3.02, + "learning_rate": 1.0005956766416378e-05, + "loss": 0.1802, + "step": 64785 + }, + { + "epoch": 3.02, + "learning_rate": 1.0005172981361592e-05, + "loss": 0.2382, + "step": 64790 + }, + { + "epoch": 3.02, + "learning_rate": 1.0004389196306806e-05, + "loss": 0.3438, + "step": 64795 + }, + { + "epoch": 3.02, + "learning_rate": 1.000360541125202e-05, + "loss": 0.072, + "step": 64800 + }, + { + "epoch": 3.02, + "learning_rate": 1.0002821626197232e-05, + "loss": 0.0219, + "step": 64805 + }, + { + "epoch": 3.02, + "learning_rate": 1.0002037841142446e-05, + "loss": 0.0681, + "step": 64810 + }, + { + "epoch": 3.02, + "learning_rate": 1.000125405608766e-05, + "loss": 0.0652, + "step": 64815 + }, + { + "epoch": 3.02, + "learning_rate": 1.0000470271032874e-05, + "loss": 0.1291, + "step": 64820 + }, + { + "epoch": 3.02, + "learning_rate": 9.999686485978086e-06, + "loss": 0.1223, + "step": 64825 + }, + { + "epoch": 3.03, + "learning_rate": 9.9989027009233e-06, + "loss": 0.1653, + "step": 64830 + }, + { + "epoch": 3.03, + "learning_rate": 9.998118915868514e-06, + "loss": 0.1461, + "step": 64835 + }, + { + "epoch": 3.03, + "learning_rate": 9.997335130813726e-06, + "loss": 0.2772, + "step": 64840 + }, + { + "epoch": 3.03, + "learning_rate": 9.99655134575894e-06, + "loss": 0.2285, + "step": 64845 + }, + { + "epoch": 3.03, + "learning_rate": 9.995767560704154e-06, + "loss": 0.1091, + "step": 64850 + }, + { + "epoch": 3.03, + "learning_rate": 9.994983775649368e-06, + "loss": 0.0319, + "step": 64855 + }, + { + "epoch": 3.03, + "learning_rate": 9.99419999059458e-06, + "loss": 0.044, + "step": 64860 + }, + { + "epoch": 3.03, + "learning_rate": 9.993416205539794e-06, + "loss": 0.08, + "step": 64865 + }, + { + "epoch": 3.03, + "learning_rate": 9.992632420485006e-06, + "loss": 0.1067, + "step": 64870 + }, + { + "epoch": 3.03, + "learning_rate": 9.99184863543022e-06, + "loss": 0.1008, + "step": 64875 + }, + { + "epoch": 3.03, + "learning_rate": 9.991064850375434e-06, + "loss": 0.1137, + "step": 64880 + }, + { + "epoch": 3.03, + "learning_rate": 9.990281065320648e-06, + "loss": 0.0964, + "step": 64885 + }, + { + "epoch": 3.03, + "learning_rate": 9.98949728026586e-06, + "loss": 0.2726, + "step": 64890 + }, + { + "epoch": 3.03, + "learning_rate": 9.988713495211074e-06, + "loss": 0.3439, + "step": 64895 + }, + { + "epoch": 3.03, + "learning_rate": 9.987929710156288e-06, + "loss": 0.0521, + "step": 64900 + }, + { + "epoch": 3.03, + "learning_rate": 9.987145925101502e-06, + "loss": 0.0303, + "step": 64905 + }, + { + "epoch": 3.03, + "learning_rate": 9.986362140046714e-06, + "loss": 0.0604, + "step": 64910 + }, + { + "epoch": 3.03, + "learning_rate": 9.985578354991928e-06, + "loss": 0.0909, + "step": 64915 + }, + { + "epoch": 3.03, + "learning_rate": 9.984794569937142e-06, + "loss": 0.0717, + "step": 64920 + }, + { + "epoch": 3.03, + "learning_rate": 9.984010784882354e-06, + "loss": 0.0903, + "step": 64925 + }, + { + "epoch": 3.03, + "learning_rate": 9.983226999827568e-06, + "loss": 0.0838, + "step": 64930 + }, + { + "epoch": 3.03, + "learning_rate": 9.982443214772782e-06, + "loss": 0.1498, + "step": 64935 + }, + { + "epoch": 3.03, + "learning_rate": 9.981659429717994e-06, + "loss": 0.1015, + "step": 64940 + }, + { + "epoch": 3.03, + "learning_rate": 9.980875644663208e-06, + "loss": 0.2865, + "step": 64945 + }, + { + "epoch": 3.03, + "learning_rate": 9.980091859608422e-06, + "loss": 0.0189, + "step": 64950 + }, + { + "epoch": 3.03, + "learning_rate": 9.979308074553636e-06, + "loss": 0.0376, + "step": 64955 + }, + { + "epoch": 3.03, + "learning_rate": 9.978524289498848e-06, + "loss": 0.0662, + "step": 64960 + }, + { + "epoch": 3.03, + "learning_rate": 9.977740504444062e-06, + "loss": 0.0318, + "step": 64965 + }, + { + "epoch": 3.03, + "learning_rate": 9.976956719389276e-06, + "loss": 0.0951, + "step": 64970 + }, + { + "epoch": 3.03, + "learning_rate": 9.97617293433449e-06, + "loss": 0.0406, + "step": 64975 + }, + { + "epoch": 3.03, + "learning_rate": 9.975389149279704e-06, + "loss": 0.0863, + "step": 64980 + }, + { + "epoch": 3.03, + "learning_rate": 9.974605364224916e-06, + "loss": 0.1428, + "step": 64985 + }, + { + "epoch": 3.03, + "learning_rate": 9.973821579170128e-06, + "loss": 0.2493, + "step": 64990 + }, + { + "epoch": 3.03, + "learning_rate": 9.973037794115342e-06, + "loss": 0.2759, + "step": 64995 + }, + { + "epoch": 3.03, + "learning_rate": 9.972254009060556e-06, + "loss": 0.0916, + "step": 65000 + }, + { + "epoch": 3.03, + "learning_rate": 9.97147022400577e-06, + "loss": 0.029, + "step": 65005 + }, + { + "epoch": 3.03, + "learning_rate": 9.970686438950982e-06, + "loss": 0.0638, + "step": 65010 + }, + { + "epoch": 3.03, + "learning_rate": 9.969902653896196e-06, + "loss": 0.0404, + "step": 65015 + }, + { + "epoch": 3.03, + "learning_rate": 9.96911886884141e-06, + "loss": 0.0599, + "step": 65020 + }, + { + "epoch": 3.03, + "learning_rate": 9.968335083786624e-06, + "loss": 0.1324, + "step": 65025 + }, + { + "epoch": 3.03, + "learning_rate": 9.967551298731838e-06, + "loss": 0.1053, + "step": 65030 + }, + { + "epoch": 3.03, + "learning_rate": 9.96676751367705e-06, + "loss": 0.0871, + "step": 65035 + }, + { + "epoch": 3.03, + "learning_rate": 9.965983728622264e-06, + "loss": 0.3495, + "step": 65040 + }, + { + "epoch": 3.04, + "learning_rate": 9.965199943567478e-06, + "loss": 0.2196, + "step": 65045 + }, + { + "epoch": 3.04, + "learning_rate": 9.964416158512691e-06, + "loss": 0.0536, + "step": 65050 + }, + { + "epoch": 3.04, + "learning_rate": 9.963632373457904e-06, + "loss": 0.0329, + "step": 65055 + }, + { + "epoch": 3.04, + "learning_rate": 9.962848588403118e-06, + "loss": 0.0575, + "step": 65060 + }, + { + "epoch": 3.04, + "learning_rate": 9.96206480334833e-06, + "loss": 0.0783, + "step": 65065 + }, + { + "epoch": 3.04, + "learning_rate": 9.961281018293544e-06, + "loss": 0.0488, + "step": 65070 + }, + { + "epoch": 3.04, + "learning_rate": 9.960497233238758e-06, + "loss": 0.1067, + "step": 65075 + }, + { + "epoch": 3.04, + "learning_rate": 9.959713448183972e-06, + "loss": 0.1122, + "step": 65080 + }, + { + "epoch": 3.04, + "learning_rate": 9.958929663129184e-06, + "loss": 0.1402, + "step": 65085 + }, + { + "epoch": 3.04, + "learning_rate": 9.958145878074398e-06, + "loss": 0.1527, + "step": 65090 + }, + { + "epoch": 3.04, + "learning_rate": 9.957362093019612e-06, + "loss": 0.173, + "step": 65095 + }, + { + "epoch": 3.04, + "learning_rate": 9.956578307964825e-06, + "loss": 0.0723, + "step": 65100 + }, + { + "epoch": 3.04, + "learning_rate": 9.955794522910038e-06, + "loss": 0.0479, + "step": 65105 + }, + { + "epoch": 3.04, + "learning_rate": 9.955010737855252e-06, + "loss": 0.087, + "step": 65110 + }, + { + "epoch": 3.04, + "learning_rate": 9.954226952800465e-06, + "loss": 0.0572, + "step": 65115 + }, + { + "epoch": 3.04, + "learning_rate": 9.953443167745678e-06, + "loss": 0.0548, + "step": 65120 + }, + { + "epoch": 3.04, + "learning_rate": 9.952659382690892e-06, + "loss": 0.0851, + "step": 65125 + }, + { + "epoch": 3.04, + "learning_rate": 9.951875597636105e-06, + "loss": 0.1876, + "step": 65130 + }, + { + "epoch": 3.04, + "learning_rate": 9.951091812581318e-06, + "loss": 0.1328, + "step": 65135 + }, + { + "epoch": 3.04, + "learning_rate": 9.950308027526532e-06, + "loss": 0.1966, + "step": 65140 + }, + { + "epoch": 3.04, + "learning_rate": 9.949524242471746e-06, + "loss": 0.222, + "step": 65145 + }, + { + "epoch": 3.04, + "learning_rate": 9.94874045741696e-06, + "loss": 0.0636, + "step": 65150 + }, + { + "epoch": 3.04, + "learning_rate": 9.947956672362172e-06, + "loss": 0.055, + "step": 65155 + }, + { + "epoch": 3.04, + "learning_rate": 9.947172887307386e-06, + "loss": 0.0413, + "step": 65160 + }, + { + "epoch": 3.04, + "learning_rate": 9.9463891022526e-06, + "loss": 0.0801, + "step": 65165 + }, + { + "epoch": 3.04, + "learning_rate": 9.945605317197813e-06, + "loss": 0.081, + "step": 65170 + }, + { + "epoch": 3.04, + "learning_rate": 9.944821532143026e-06, + "loss": 0.0356, + "step": 65175 + }, + { + "epoch": 3.04, + "learning_rate": 9.94403774708824e-06, + "loss": 0.1325, + "step": 65180 + }, + { + "epoch": 3.04, + "learning_rate": 9.943253962033452e-06, + "loss": 0.1551, + "step": 65185 + }, + { + "epoch": 3.04, + "learning_rate": 9.942470176978666e-06, + "loss": 0.2802, + "step": 65190 + }, + { + "epoch": 3.04, + "learning_rate": 9.94168639192388e-06, + "loss": 0.4242, + "step": 65195 + }, + { + "epoch": 3.04, + "learning_rate": 9.940902606869093e-06, + "loss": 0.0853, + "step": 65200 + }, + { + "epoch": 3.04, + "learning_rate": 9.940118821814306e-06, + "loss": 0.0255, + "step": 65205 + }, + { + "epoch": 3.04, + "learning_rate": 9.93933503675952e-06, + "loss": 0.0622, + "step": 65210 + }, + { + "epoch": 3.04, + "learning_rate": 9.938551251704733e-06, + "loss": 0.0154, + "step": 65215 + }, + { + "epoch": 3.04, + "learning_rate": 9.937767466649947e-06, + "loss": 0.093, + "step": 65220 + }, + { + "epoch": 3.04, + "learning_rate": 9.93698368159516e-06, + "loss": 0.2092, + "step": 65225 + }, + { + "epoch": 3.04, + "learning_rate": 9.936199896540373e-06, + "loss": 0.0847, + "step": 65230 + }, + { + "epoch": 3.04, + "learning_rate": 9.935416111485587e-06, + "loss": 0.1413, + "step": 65235 + }, + { + "epoch": 3.04, + "learning_rate": 9.934632326430801e-06, + "loss": 0.2631, + "step": 65240 + }, + { + "epoch": 3.04, + "learning_rate": 9.933848541376015e-06, + "loss": 0.3179, + "step": 65245 + }, + { + "epoch": 3.04, + "learning_rate": 9.933064756321227e-06, + "loss": 0.0971, + "step": 65250 + }, + { + "epoch": 3.04, + "learning_rate": 9.93228097126644e-06, + "loss": 0.055, + "step": 65255 + }, + { + "epoch": 3.05, + "learning_rate": 9.931497186211653e-06, + "loss": 0.0182, + "step": 65260 + }, + { + "epoch": 3.05, + "learning_rate": 9.930713401156867e-06, + "loss": 0.1053, + "step": 65265 + }, + { + "epoch": 3.05, + "learning_rate": 9.929929616102081e-06, + "loss": 0.0475, + "step": 65270 + }, + { + "epoch": 3.05, + "learning_rate": 9.929145831047294e-06, + "loss": 0.0945, + "step": 65275 + }, + { + "epoch": 3.05, + "learning_rate": 9.928362045992507e-06, + "loss": 0.0834, + "step": 65280 + }, + { + "epoch": 3.05, + "learning_rate": 9.927578260937721e-06, + "loss": 0.1769, + "step": 65285 + }, + { + "epoch": 3.05, + "learning_rate": 9.926794475882935e-06, + "loss": 0.2825, + "step": 65290 + }, + { + "epoch": 3.05, + "learning_rate": 9.926010690828149e-06, + "loss": 0.2869, + "step": 65295 + }, + { + "epoch": 3.05, + "learning_rate": 9.925226905773361e-06, + "loss": 0.1088, + "step": 65300 + }, + { + "epoch": 3.05, + "learning_rate": 9.924443120718575e-06, + "loss": 0.035, + "step": 65305 + }, + { + "epoch": 3.05, + "learning_rate": 9.92365933566379e-06, + "loss": 0.0313, + "step": 65310 + }, + { + "epoch": 3.05, + "learning_rate": 9.922875550609001e-06, + "loss": 0.0684, + "step": 65315 + }, + { + "epoch": 3.05, + "learning_rate": 9.922091765554215e-06, + "loss": 0.0795, + "step": 65320 + }, + { + "epoch": 3.05, + "learning_rate": 9.921307980499427e-06, + "loss": 0.0704, + "step": 65325 + }, + { + "epoch": 3.05, + "learning_rate": 9.920524195444641e-06, + "loss": 0.125, + "step": 65330 + }, + { + "epoch": 3.05, + "learning_rate": 9.919740410389855e-06, + "loss": 0.1401, + "step": 65335 + }, + { + "epoch": 3.05, + "learning_rate": 9.91895662533507e-06, + "loss": 0.2818, + "step": 65340 + }, + { + "epoch": 3.05, + "learning_rate": 9.918172840280283e-06, + "loss": 0.2141, + "step": 65345 + }, + { + "epoch": 3.05, + "learning_rate": 9.917389055225495e-06, + "loss": 0.1145, + "step": 65350 + }, + { + "epoch": 3.05, + "learning_rate": 9.91660527017071e-06, + "loss": 0.0228, + "step": 65355 + }, + { + "epoch": 3.05, + "learning_rate": 9.915821485115923e-06, + "loss": 0.0151, + "step": 65360 + }, + { + "epoch": 3.05, + "learning_rate": 9.915037700061137e-06, + "loss": 0.0571, + "step": 65365 + }, + { + "epoch": 3.05, + "learning_rate": 9.91425391500635e-06, + "loss": 0.0924, + "step": 65370 + }, + { + "epoch": 3.05, + "learning_rate": 9.913470129951563e-06, + "loss": 0.0435, + "step": 65375 + }, + { + "epoch": 3.05, + "learning_rate": 9.912686344896775e-06, + "loss": 0.1203, + "step": 65380 + }, + { + "epoch": 3.05, + "learning_rate": 9.91190255984199e-06, + "loss": 0.1247, + "step": 65385 + }, + { + "epoch": 3.05, + "learning_rate": 9.911118774787203e-06, + "loss": 0.1618, + "step": 65390 + }, + { + "epoch": 3.05, + "learning_rate": 9.910334989732417e-06, + "loss": 0.2363, + "step": 65395 + }, + { + "epoch": 3.05, + "learning_rate": 9.90955120467763e-06, + "loss": 0.0811, + "step": 65400 + }, + { + "epoch": 3.05, + "learning_rate": 9.908767419622843e-06, + "loss": 0.0359, + "step": 65405 + }, + { + "epoch": 3.05, + "learning_rate": 9.907983634568057e-06, + "loss": 0.0697, + "step": 65410 + }, + { + "epoch": 3.05, + "learning_rate": 9.907199849513271e-06, + "loss": 0.0408, + "step": 65415 + }, + { + "epoch": 3.05, + "learning_rate": 9.906416064458483e-06, + "loss": 0.0404, + "step": 65420 + }, + { + "epoch": 3.05, + "learning_rate": 9.905632279403697e-06, + "loss": 0.1271, + "step": 65425 + }, + { + "epoch": 3.05, + "learning_rate": 9.904848494348911e-06, + "loss": 0.149, + "step": 65430 + }, + { + "epoch": 3.05, + "learning_rate": 9.904064709294125e-06, + "loss": 0.1755, + "step": 65435 + }, + { + "epoch": 3.05, + "learning_rate": 9.903280924239337e-06, + "loss": 0.2316, + "step": 65440 + }, + { + "epoch": 3.05, + "learning_rate": 9.902497139184551e-06, + "loss": 0.2651, + "step": 65445 + }, + { + "epoch": 3.05, + "learning_rate": 9.901713354129763e-06, + "loss": 0.0487, + "step": 65450 + }, + { + "epoch": 3.05, + "learning_rate": 9.900929569074977e-06, + "loss": 0.0474, + "step": 65455 + }, + { + "epoch": 3.05, + "learning_rate": 9.900145784020191e-06, + "loss": 0.0345, + "step": 65460 + }, + { + "epoch": 3.05, + "learning_rate": 9.899361998965405e-06, + "loss": 0.0674, + "step": 65465 + }, + { + "epoch": 3.05, + "learning_rate": 9.898578213910617e-06, + "loss": 0.1199, + "step": 65470 + }, + { + "epoch": 3.06, + "learning_rate": 9.897794428855831e-06, + "loss": 0.075, + "step": 65475 + }, + { + "epoch": 3.06, + "learning_rate": 9.897010643801045e-06, + "loss": 0.0844, + "step": 65480 + }, + { + "epoch": 3.06, + "learning_rate": 9.896226858746259e-06, + "loss": 0.1378, + "step": 65485 + }, + { + "epoch": 3.06, + "learning_rate": 9.895443073691471e-06, + "loss": 0.2872, + "step": 65490 + }, + { + "epoch": 3.06, + "learning_rate": 9.894659288636685e-06, + "loss": 0.276, + "step": 65495 + }, + { + "epoch": 3.06, + "learning_rate": 9.893875503581899e-06, + "loss": 0.1098, + "step": 65500 + }, + { + "epoch": 3.06, + "learning_rate": 9.893091718527113e-06, + "loss": 0.0231, + "step": 65505 + }, + { + "epoch": 3.06, + "learning_rate": 9.892307933472325e-06, + "loss": 0.0576, + "step": 65510 + }, + { + "epoch": 3.06, + "learning_rate": 9.891524148417539e-06, + "loss": 0.1021, + "step": 65515 + }, + { + "epoch": 3.06, + "learning_rate": 9.890740363362751e-06, + "loss": 0.1098, + "step": 65520 + }, + { + "epoch": 3.06, + "learning_rate": 9.889956578307965e-06, + "loss": 0.0809, + "step": 65525 + }, + { + "epoch": 3.06, + "learning_rate": 9.889172793253179e-06, + "loss": 0.0706, + "step": 65530 + }, + { + "epoch": 3.06, + "learning_rate": 9.888389008198393e-06, + "loss": 0.1328, + "step": 65535 + }, + { + "epoch": 3.06, + "learning_rate": 9.887605223143605e-06, + "loss": 0.1188, + "step": 65540 + }, + { + "epoch": 3.06, + "learning_rate": 9.886821438088819e-06, + "loss": 0.2656, + "step": 65545 + }, + { + "epoch": 3.06, + "learning_rate": 9.886037653034033e-06, + "loss": 0.1282, + "step": 65550 + }, + { + "epoch": 3.06, + "learning_rate": 9.885253867979247e-06, + "loss": 0.0373, + "step": 65555 + }, + { + "epoch": 3.06, + "learning_rate": 9.88447008292446e-06, + "loss": 0.0479, + "step": 65560 + }, + { + "epoch": 3.06, + "learning_rate": 9.883686297869673e-06, + "loss": 0.0388, + "step": 65565 + }, + { + "epoch": 3.06, + "learning_rate": 9.882902512814887e-06, + "loss": 0.0458, + "step": 65570 + }, + { + "epoch": 3.06, + "learning_rate": 9.882118727760099e-06, + "loss": 0.06, + "step": 65575 + }, + { + "epoch": 3.06, + "learning_rate": 9.881334942705313e-06, + "loss": 0.1196, + "step": 65580 + }, + { + "epoch": 3.06, + "learning_rate": 9.880551157650527e-06, + "loss": 0.2084, + "step": 65585 + }, + { + "epoch": 3.06, + "learning_rate": 9.879767372595739e-06, + "loss": 0.247, + "step": 65590 + }, + { + "epoch": 3.06, + "learning_rate": 9.878983587540953e-06, + "loss": 0.3117, + "step": 65595 + }, + { + "epoch": 3.06, + "learning_rate": 9.878199802486167e-06, + "loss": 0.1258, + "step": 65600 + }, + { + "epoch": 3.06, + "learning_rate": 9.87741601743138e-06, + "loss": 0.0392, + "step": 65605 + }, + { + "epoch": 3.06, + "learning_rate": 9.876632232376595e-06, + "loss": 0.0395, + "step": 65610 + }, + { + "epoch": 3.06, + "learning_rate": 9.875848447321807e-06, + "loss": 0.0822, + "step": 65615 + }, + { + "epoch": 3.06, + "learning_rate": 9.87506466226702e-06, + "loss": 0.1316, + "step": 65620 + }, + { + "epoch": 3.06, + "learning_rate": 9.874280877212235e-06, + "loss": 0.0738, + "step": 65625 + }, + { + "epoch": 3.06, + "learning_rate": 9.873497092157449e-06, + "loss": 0.1253, + "step": 65630 + }, + { + "epoch": 3.06, + "learning_rate": 9.872713307102661e-06, + "loss": 0.1578, + "step": 65635 + }, + { + "epoch": 3.06, + "learning_rate": 9.871929522047875e-06, + "loss": 0.2201, + "step": 65640 + }, + { + "epoch": 3.06, + "learning_rate": 9.871145736993087e-06, + "loss": 0.2527, + "step": 65645 + }, + { + "epoch": 3.06, + "learning_rate": 9.870361951938301e-06, + "loss": 0.0481, + "step": 65650 + }, + { + "epoch": 3.06, + "learning_rate": 9.869578166883515e-06, + "loss": 0.0439, + "step": 65655 + }, + { + "epoch": 3.06, + "learning_rate": 9.868794381828729e-06, + "loss": 0.0479, + "step": 65660 + }, + { + "epoch": 3.06, + "learning_rate": 9.868010596773941e-06, + "loss": 0.0606, + "step": 65665 + }, + { + "epoch": 3.06, + "learning_rate": 9.867226811719155e-06, + "loss": 0.0551, + "step": 65670 + }, + { + "epoch": 3.06, + "learning_rate": 9.866443026664369e-06, + "loss": 0.1018, + "step": 65675 + }, + { + "epoch": 3.06, + "learning_rate": 9.865659241609583e-06, + "loss": 0.0722, + "step": 65680 + }, + { + "epoch": 3.06, + "learning_rate": 9.864875456554795e-06, + "loss": 0.1421, + "step": 65685 + }, + { + "epoch": 3.07, + "learning_rate": 9.864091671500009e-06, + "loss": 0.2154, + "step": 65690 + }, + { + "epoch": 3.07, + "learning_rate": 9.863307886445223e-06, + "loss": 0.3339, + "step": 65695 + }, + { + "epoch": 3.07, + "learning_rate": 9.862524101390437e-06, + "loss": 0.0759, + "step": 65700 + }, + { + "epoch": 3.07, + "learning_rate": 9.861740316335649e-06, + "loss": 0.0287, + "step": 65705 + }, + { + "epoch": 3.07, + "learning_rate": 9.860956531280863e-06, + "loss": 0.096, + "step": 65710 + }, + { + "epoch": 3.07, + "learning_rate": 9.860172746226075e-06, + "loss": 0.0382, + "step": 65715 + }, + { + "epoch": 3.07, + "learning_rate": 9.859388961171289e-06, + "loss": 0.0794, + "step": 65720 + }, + { + "epoch": 3.07, + "learning_rate": 9.858605176116503e-06, + "loss": 0.1148, + "step": 65725 + }, + { + "epoch": 3.07, + "learning_rate": 9.857821391061717e-06, + "loss": 0.1004, + "step": 65730 + }, + { + "epoch": 3.07, + "learning_rate": 9.857037606006929e-06, + "loss": 0.1558, + "step": 65735 + }, + { + "epoch": 3.07, + "learning_rate": 9.856253820952143e-06, + "loss": 0.1553, + "step": 65740 + }, + { + "epoch": 3.07, + "learning_rate": 9.855470035897357e-06, + "loss": 0.3653, + "step": 65745 + }, + { + "epoch": 3.07, + "learning_rate": 9.85468625084257e-06, + "loss": 0.0707, + "step": 65750 + }, + { + "epoch": 3.07, + "learning_rate": 9.853902465787783e-06, + "loss": 0.0303, + "step": 65755 + }, + { + "epoch": 3.07, + "learning_rate": 9.853118680732997e-06, + "loss": 0.0592, + "step": 65760 + }, + { + "epoch": 3.07, + "learning_rate": 9.85233489567821e-06, + "loss": 0.0684, + "step": 65765 + }, + { + "epoch": 3.07, + "learning_rate": 9.851551110623423e-06, + "loss": 0.073, + "step": 65770 + }, + { + "epoch": 3.07, + "learning_rate": 9.850767325568637e-06, + "loss": 0.085, + "step": 65775 + }, + { + "epoch": 3.07, + "learning_rate": 9.84998354051385e-06, + "loss": 0.1083, + "step": 65780 + }, + { + "epoch": 3.07, + "learning_rate": 9.849199755459063e-06, + "loss": 0.1539, + "step": 65785 + }, + { + "epoch": 3.07, + "learning_rate": 9.848415970404277e-06, + "loss": 0.1221, + "step": 65790 + }, + { + "epoch": 3.07, + "learning_rate": 9.84763218534949e-06, + "loss": 0.3577, + "step": 65795 + }, + { + "epoch": 3.07, + "learning_rate": 9.846848400294704e-06, + "loss": 0.12, + "step": 65800 + }, + { + "epoch": 3.07, + "learning_rate": 9.846064615239917e-06, + "loss": 0.022, + "step": 65805 + }, + { + "epoch": 3.07, + "learning_rate": 9.84528083018513e-06, + "loss": 0.082, + "step": 65810 + }, + { + "epoch": 3.07, + "learning_rate": 9.844497045130345e-06, + "loss": 0.0643, + "step": 65815 + }, + { + "epoch": 3.07, + "learning_rate": 9.843713260075558e-06, + "loss": 0.0713, + "step": 65820 + }, + { + "epoch": 3.07, + "learning_rate": 9.842929475020772e-06, + "loss": 0.0629, + "step": 65825 + }, + { + "epoch": 3.07, + "learning_rate": 9.842145689965985e-06, + "loss": 0.1637, + "step": 65830 + }, + { + "epoch": 3.07, + "learning_rate": 9.841361904911197e-06, + "loss": 0.1241, + "step": 65835 + }, + { + "epoch": 3.07, + "learning_rate": 9.84057811985641e-06, + "loss": 0.2507, + "step": 65840 + }, + { + "epoch": 3.07, + "learning_rate": 9.839794334801625e-06, + "loss": 0.4136, + "step": 65845 + }, + { + "epoch": 3.07, + "learning_rate": 9.839010549746838e-06, + "loss": 0.0744, + "step": 65850 + }, + { + "epoch": 3.07, + "learning_rate": 9.83822676469205e-06, + "loss": 0.0148, + "step": 65855 + }, + { + "epoch": 3.07, + "learning_rate": 9.837442979637265e-06, + "loss": 0.0641, + "step": 65860 + }, + { + "epoch": 3.07, + "learning_rate": 9.836659194582478e-06, + "loss": 0.0579, + "step": 65865 + }, + { + "epoch": 3.07, + "learning_rate": 9.835875409527692e-06, + "loss": 0.0924, + "step": 65870 + }, + { + "epoch": 3.07, + "learning_rate": 9.835091624472906e-06, + "loss": 0.0568, + "step": 65875 + }, + { + "epoch": 3.07, + "learning_rate": 9.834307839418119e-06, + "loss": 0.1328, + "step": 65880 + }, + { + "epoch": 3.07, + "learning_rate": 9.833524054363332e-06, + "loss": 0.2094, + "step": 65885 + }, + { + "epoch": 3.07, + "learning_rate": 9.832740269308546e-06, + "loss": 0.1356, + "step": 65890 + }, + { + "epoch": 3.07, + "learning_rate": 9.83195648425376e-06, + "loss": 0.3647, + "step": 65895 + }, + { + "epoch": 3.07, + "learning_rate": 9.831172699198972e-06, + "loss": 0.0792, + "step": 65900 + }, + { + "epoch": 3.08, + "learning_rate": 9.830388914144185e-06, + "loss": 0.0556, + "step": 65905 + }, + { + "epoch": 3.08, + "learning_rate": 9.829605129089399e-06, + "loss": 0.0496, + "step": 65910 + }, + { + "epoch": 3.08, + "learning_rate": 9.828821344034612e-06, + "loss": 0.0758, + "step": 65915 + }, + { + "epoch": 3.08, + "learning_rate": 9.828037558979826e-06, + "loss": 0.0409, + "step": 65920 + }, + { + "epoch": 3.08, + "learning_rate": 9.82725377392504e-06, + "loss": 0.053, + "step": 65925 + }, + { + "epoch": 3.08, + "learning_rate": 9.826469988870252e-06, + "loss": 0.0676, + "step": 65930 + }, + { + "epoch": 3.08, + "learning_rate": 9.825686203815466e-06, + "loss": 0.1414, + "step": 65935 + }, + { + "epoch": 3.08, + "learning_rate": 9.82490241876068e-06, + "loss": 0.233, + "step": 65940 + }, + { + "epoch": 3.08, + "learning_rate": 9.824118633705894e-06, + "loss": 0.2629, + "step": 65945 + }, + { + "epoch": 3.08, + "learning_rate": 9.823334848651106e-06, + "loss": 0.0594, + "step": 65950 + }, + { + "epoch": 3.08, + "learning_rate": 9.82255106359632e-06, + "loss": 0.0329, + "step": 65955 + }, + { + "epoch": 3.08, + "learning_rate": 9.821767278541534e-06, + "loss": 0.0585, + "step": 65960 + }, + { + "epoch": 3.08, + "learning_rate": 9.820983493486746e-06, + "loss": 0.0716, + "step": 65965 + }, + { + "epoch": 3.08, + "learning_rate": 9.82019970843196e-06, + "loss": 0.0227, + "step": 65970 + }, + { + "epoch": 3.08, + "learning_rate": 9.819415923377174e-06, + "loss": 0.1145, + "step": 65975 + }, + { + "epoch": 3.08, + "learning_rate": 9.818632138322386e-06, + "loss": 0.0928, + "step": 65980 + }, + { + "epoch": 3.08, + "learning_rate": 9.8178483532676e-06, + "loss": 0.1671, + "step": 65985 + }, + { + "epoch": 3.08, + "learning_rate": 9.817064568212814e-06, + "loss": 0.3238, + "step": 65990 + }, + { + "epoch": 3.08, + "learning_rate": 9.816280783158028e-06, + "loss": 0.2865, + "step": 65995 + }, + { + "epoch": 3.08, + "learning_rate": 9.81549699810324e-06, + "loss": 0.0648, + "step": 66000 + }, + { + "epoch": 3.08, + "learning_rate": 9.814713213048454e-06, + "loss": 0.0084, + "step": 66005 + }, + { + "epoch": 3.08, + "learning_rate": 9.813929427993668e-06, + "loss": 0.0627, + "step": 66010 + }, + { + "epoch": 3.08, + "learning_rate": 9.813145642938882e-06, + "loss": 0.0375, + "step": 66015 + }, + { + "epoch": 3.08, + "learning_rate": 9.812361857884094e-06, + "loss": 0.0965, + "step": 66020 + }, + { + "epoch": 3.08, + "learning_rate": 9.811578072829308e-06, + "loss": 0.0915, + "step": 66025 + }, + { + "epoch": 3.08, + "learning_rate": 9.81079428777452e-06, + "loss": 0.1729, + "step": 66030 + }, + { + "epoch": 3.08, + "learning_rate": 9.810010502719734e-06, + "loss": 0.0954, + "step": 66035 + }, + { + "epoch": 3.08, + "learning_rate": 9.809226717664948e-06, + "loss": 0.1511, + "step": 66040 + }, + { + "epoch": 3.08, + "learning_rate": 9.808442932610162e-06, + "loss": 0.2462, + "step": 66045 + }, + { + "epoch": 3.08, + "learning_rate": 9.807659147555374e-06, + "loss": 0.0609, + "step": 66050 + }, + { + "epoch": 3.08, + "learning_rate": 9.806875362500588e-06, + "loss": 0.063, + "step": 66055 + }, + { + "epoch": 3.08, + "learning_rate": 9.806091577445802e-06, + "loss": 0.042, + "step": 66060 + }, + { + "epoch": 3.08, + "learning_rate": 9.805307792391016e-06, + "loss": 0.0742, + "step": 66065 + }, + { + "epoch": 3.08, + "learning_rate": 9.804524007336228e-06, + "loss": 0.0461, + "step": 66070 + }, + { + "epoch": 3.08, + "learning_rate": 9.803740222281442e-06, + "loss": 0.1103, + "step": 66075 + }, + { + "epoch": 3.08, + "learning_rate": 9.802956437226656e-06, + "loss": 0.1392, + "step": 66080 + }, + { + "epoch": 3.08, + "learning_rate": 9.80217265217187e-06, + "loss": 0.112, + "step": 66085 + }, + { + "epoch": 3.08, + "learning_rate": 9.801388867117084e-06, + "loss": 0.1597, + "step": 66090 + }, + { + "epoch": 3.08, + "learning_rate": 9.800605082062296e-06, + "loss": 0.3931, + "step": 66095 + }, + { + "epoch": 3.08, + "learning_rate": 9.799821297007508e-06, + "loss": 0.038, + "step": 66100 + }, + { + "epoch": 3.08, + "learning_rate": 9.799037511952722e-06, + "loss": 0.0841, + "step": 66105 + }, + { + "epoch": 3.08, + "learning_rate": 9.798253726897936e-06, + "loss": 0.0283, + "step": 66110 + }, + { + "epoch": 3.09, + "learning_rate": 9.79746994184315e-06, + "loss": 0.0556, + "step": 66115 + }, + { + "epoch": 3.09, + "learning_rate": 9.796686156788362e-06, + "loss": 0.0488, + "step": 66120 + }, + { + "epoch": 3.09, + "learning_rate": 9.795902371733576e-06, + "loss": 0.0714, + "step": 66125 + }, + { + "epoch": 3.09, + "learning_rate": 9.79511858667879e-06, + "loss": 0.0883, + "step": 66130 + }, + { + "epoch": 3.09, + "learning_rate": 9.794334801624004e-06, + "loss": 0.0689, + "step": 66135 + }, + { + "epoch": 3.09, + "learning_rate": 9.793551016569218e-06, + "loss": 0.2007, + "step": 66140 + }, + { + "epoch": 3.09, + "learning_rate": 9.79276723151443e-06, + "loss": 0.3548, + "step": 66145 + }, + { + "epoch": 3.09, + "learning_rate": 9.791983446459644e-06, + "loss": 0.0955, + "step": 66150 + }, + { + "epoch": 3.09, + "learning_rate": 9.791199661404858e-06, + "loss": 0.0395, + "step": 66155 + }, + { + "epoch": 3.09, + "learning_rate": 9.79041587635007e-06, + "loss": 0.0361, + "step": 66160 + }, + { + "epoch": 3.09, + "learning_rate": 9.789632091295284e-06, + "loss": 0.0398, + "step": 66165 + }, + { + "epoch": 3.09, + "learning_rate": 9.788848306240496e-06, + "loss": 0.0547, + "step": 66170 + }, + { + "epoch": 3.09, + "learning_rate": 9.78806452118571e-06, + "loss": 0.0618, + "step": 66175 + }, + { + "epoch": 3.09, + "learning_rate": 9.787280736130924e-06, + "loss": 0.1462, + "step": 66180 + }, + { + "epoch": 3.09, + "learning_rate": 9.786496951076138e-06, + "loss": 0.1491, + "step": 66185 + }, + { + "epoch": 3.09, + "learning_rate": 9.785713166021352e-06, + "loss": 0.177, + "step": 66190 + }, + { + "epoch": 3.09, + "learning_rate": 9.784929380966564e-06, + "loss": 0.2986, + "step": 66195 + }, + { + "epoch": 3.09, + "learning_rate": 9.784145595911778e-06, + "loss": 0.0846, + "step": 66200 + }, + { + "epoch": 3.09, + "learning_rate": 9.783361810856992e-06, + "loss": 0.0278, + "step": 66205 + }, + { + "epoch": 3.09, + "learning_rate": 9.782578025802206e-06, + "loss": 0.0166, + "step": 66210 + }, + { + "epoch": 3.09, + "learning_rate": 9.781794240747418e-06, + "loss": 0.051, + "step": 66215 + }, + { + "epoch": 3.09, + "learning_rate": 9.781010455692632e-06, + "loss": 0.0734, + "step": 66220 + }, + { + "epoch": 3.09, + "learning_rate": 9.780226670637844e-06, + "loss": 0.0638, + "step": 66225 + }, + { + "epoch": 3.09, + "learning_rate": 9.779442885583058e-06, + "loss": 0.0957, + "step": 66230 + }, + { + "epoch": 3.09, + "learning_rate": 9.778659100528272e-06, + "loss": 0.1582, + "step": 66235 + }, + { + "epoch": 3.09, + "learning_rate": 9.777875315473486e-06, + "loss": 0.2031, + "step": 66240 + }, + { + "epoch": 3.09, + "learning_rate": 9.777091530418698e-06, + "loss": 0.3371, + "step": 66245 + }, + { + "epoch": 3.09, + "learning_rate": 9.776307745363912e-06, + "loss": 0.0844, + "step": 66250 + }, + { + "epoch": 3.09, + "learning_rate": 9.775523960309126e-06, + "loss": 0.0135, + "step": 66255 + }, + { + "epoch": 3.09, + "learning_rate": 9.77474017525434e-06, + "loss": 0.0855, + "step": 66260 + }, + { + "epoch": 3.09, + "learning_rate": 9.773956390199552e-06, + "loss": 0.0722, + "step": 66265 + }, + { + "epoch": 3.09, + "learning_rate": 9.773172605144766e-06, + "loss": 0.1067, + "step": 66270 + }, + { + "epoch": 3.09, + "learning_rate": 9.77238882008998e-06, + "loss": 0.118, + "step": 66275 + }, + { + "epoch": 3.09, + "learning_rate": 9.771605035035194e-06, + "loss": 0.193, + "step": 66280 + }, + { + "epoch": 3.09, + "learning_rate": 9.770821249980406e-06, + "loss": 0.172, + "step": 66285 + }, + { + "epoch": 3.09, + "learning_rate": 9.77003746492562e-06, + "loss": 0.2177, + "step": 66290 + }, + { + "epoch": 3.09, + "learning_rate": 9.769253679870832e-06, + "loss": 0.46, + "step": 66295 + }, + { + "epoch": 3.09, + "learning_rate": 9.768469894816046e-06, + "loss": 0.085, + "step": 66300 + }, + { + "epoch": 3.09, + "learning_rate": 9.76768610976126e-06, + "loss": 0.027, + "step": 66305 + }, + { + "epoch": 3.09, + "learning_rate": 9.766902324706474e-06, + "loss": 0.0324, + "step": 66310 + }, + { + "epoch": 3.09, + "learning_rate": 9.766118539651686e-06, + "loss": 0.0424, + "step": 66315 + }, + { + "epoch": 3.09, + "learning_rate": 9.7653347545969e-06, + "loss": 0.1014, + "step": 66320 + }, + { + "epoch": 3.09, + "learning_rate": 9.764550969542114e-06, + "loss": 0.1147, + "step": 66325 + }, + { + "epoch": 3.1, + "learning_rate": 9.763767184487328e-06, + "loss": 0.1188, + "step": 66330 + }, + { + "epoch": 3.1, + "learning_rate": 9.76298339943254e-06, + "loss": 0.2082, + "step": 66335 + }, + { + "epoch": 3.1, + "learning_rate": 9.762199614377754e-06, + "loss": 0.2368, + "step": 66340 + }, + { + "epoch": 3.1, + "learning_rate": 9.761415829322968e-06, + "loss": 0.2593, + "step": 66345 + }, + { + "epoch": 3.1, + "learning_rate": 9.760632044268182e-06, + "loss": 0.0695, + "step": 66350 + }, + { + "epoch": 3.1, + "learning_rate": 9.759848259213394e-06, + "loss": 0.0248, + "step": 66355 + }, + { + "epoch": 3.1, + "learning_rate": 9.759064474158608e-06, + "loss": 0.048, + "step": 66360 + }, + { + "epoch": 3.1, + "learning_rate": 9.75828068910382e-06, + "loss": 0.0906, + "step": 66365 + }, + { + "epoch": 3.1, + "learning_rate": 9.757496904049034e-06, + "loss": 0.1016, + "step": 66370 + }, + { + "epoch": 3.1, + "learning_rate": 9.756713118994248e-06, + "loss": 0.0903, + "step": 66375 + }, + { + "epoch": 3.1, + "learning_rate": 9.755929333939462e-06, + "loss": 0.1557, + "step": 66380 + }, + { + "epoch": 3.1, + "learning_rate": 9.755145548884674e-06, + "loss": 0.2118, + "step": 66385 + }, + { + "epoch": 3.1, + "learning_rate": 9.754361763829888e-06, + "loss": 0.3594, + "step": 66390 + }, + { + "epoch": 3.1, + "learning_rate": 9.753577978775102e-06, + "loss": 0.3155, + "step": 66395 + }, + { + "epoch": 3.1, + "learning_rate": 9.752794193720316e-06, + "loss": 0.0644, + "step": 66400 + }, + { + "epoch": 3.1, + "learning_rate": 9.75201040866553e-06, + "loss": 0.0568, + "step": 66405 + }, + { + "epoch": 3.1, + "learning_rate": 9.751226623610742e-06, + "loss": 0.0713, + "step": 66410 + }, + { + "epoch": 3.1, + "learning_rate": 9.750442838555956e-06, + "loss": 0.0646, + "step": 66415 + }, + { + "epoch": 3.1, + "learning_rate": 9.749659053501168e-06, + "loss": 0.0589, + "step": 66420 + }, + { + "epoch": 3.1, + "learning_rate": 9.748875268446382e-06, + "loss": 0.1199, + "step": 66425 + }, + { + "epoch": 3.1, + "learning_rate": 9.748091483391596e-06, + "loss": 0.0658, + "step": 66430 + }, + { + "epoch": 3.1, + "learning_rate": 9.747307698336808e-06, + "loss": 0.1286, + "step": 66435 + }, + { + "epoch": 3.1, + "learning_rate": 9.746523913282022e-06, + "loss": 0.2039, + "step": 66440 + }, + { + "epoch": 3.1, + "learning_rate": 9.745740128227236e-06, + "loss": 0.3109, + "step": 66445 + }, + { + "epoch": 3.1, + "learning_rate": 9.74495634317245e-06, + "loss": 0.0421, + "step": 66450 + }, + { + "epoch": 3.1, + "learning_rate": 9.744172558117663e-06, + "loss": 0.0413, + "step": 66455 + }, + { + "epoch": 3.1, + "learning_rate": 9.743388773062876e-06, + "loss": 0.0249, + "step": 66460 + }, + { + "epoch": 3.1, + "learning_rate": 9.74260498800809e-06, + "loss": 0.0545, + "step": 66465 + }, + { + "epoch": 3.1, + "learning_rate": 9.741821202953303e-06, + "loss": 0.099, + "step": 66470 + }, + { + "epoch": 3.1, + "learning_rate": 9.741037417898517e-06, + "loss": 0.0948, + "step": 66475 + }, + { + "epoch": 3.1, + "learning_rate": 9.74025363284373e-06, + "loss": 0.0998, + "step": 66480 + }, + { + "epoch": 3.1, + "learning_rate": 9.739469847788942e-06, + "loss": 0.1262, + "step": 66485 + }, + { + "epoch": 3.1, + "learning_rate": 9.738686062734156e-06, + "loss": 0.1912, + "step": 66490 + }, + { + "epoch": 3.1, + "learning_rate": 9.73790227767937e-06, + "loss": 0.2891, + "step": 66495 + }, + { + "epoch": 3.1, + "learning_rate": 9.737118492624584e-06, + "loss": 0.0749, + "step": 66500 + }, + { + "epoch": 3.1, + "learning_rate": 9.736334707569797e-06, + "loss": 0.1176, + "step": 66505 + }, + { + "epoch": 3.1, + "learning_rate": 9.73555092251501e-06, + "loss": 0.0257, + "step": 66510 + }, + { + "epoch": 3.1, + "learning_rate": 9.734767137460224e-06, + "loss": 0.0613, + "step": 66515 + }, + { + "epoch": 3.1, + "learning_rate": 9.733983352405437e-06, + "loss": 0.1069, + "step": 66520 + }, + { + "epoch": 3.1, + "learning_rate": 9.733199567350651e-06, + "loss": 0.0754, + "step": 66525 + }, + { + "epoch": 3.1, + "learning_rate": 9.732415782295864e-06, + "loss": 0.0548, + "step": 66530 + }, + { + "epoch": 3.1, + "learning_rate": 9.731631997241077e-06, + "loss": 0.1278, + "step": 66535 + }, + { + "epoch": 3.1, + "learning_rate": 9.730848212186291e-06, + "loss": 0.1902, + "step": 66540 + }, + { + "epoch": 3.11, + "learning_rate": 9.730064427131505e-06, + "loss": 0.3116, + "step": 66545 + }, + { + "epoch": 3.11, + "learning_rate": 9.729280642076718e-06, + "loss": 0.0847, + "step": 66550 + }, + { + "epoch": 3.11, + "learning_rate": 9.728496857021931e-06, + "loss": 0.0388, + "step": 66555 + }, + { + "epoch": 3.11, + "learning_rate": 9.727713071967144e-06, + "loss": 0.0235, + "step": 66560 + }, + { + "epoch": 3.11, + "learning_rate": 9.726929286912358e-06, + "loss": 0.0432, + "step": 66565 + }, + { + "epoch": 3.11, + "learning_rate": 9.726145501857571e-06, + "loss": 0.0687, + "step": 66570 + }, + { + "epoch": 3.11, + "learning_rate": 9.725361716802785e-06, + "loss": 0.1861, + "step": 66575 + }, + { + "epoch": 3.11, + "learning_rate": 9.724577931747998e-06, + "loss": 0.1056, + "step": 66580 + }, + { + "epoch": 3.11, + "learning_rate": 9.723794146693211e-06, + "loss": 0.1404, + "step": 66585 + }, + { + "epoch": 3.11, + "learning_rate": 9.723010361638425e-06, + "loss": 0.2881, + "step": 66590 + }, + { + "epoch": 3.11, + "learning_rate": 9.72222657658364e-06, + "loss": 0.2303, + "step": 66595 + }, + { + "epoch": 3.11, + "learning_rate": 9.721442791528851e-06, + "loss": 0.0623, + "step": 66600 + }, + { + "epoch": 3.11, + "learning_rate": 9.720659006474065e-06, + "loss": 0.016, + "step": 66605 + }, + { + "epoch": 3.11, + "learning_rate": 9.71987522141928e-06, + "loss": 0.0418, + "step": 66610 + }, + { + "epoch": 3.11, + "learning_rate": 9.719091436364492e-06, + "loss": 0.0622, + "step": 66615 + }, + { + "epoch": 3.11, + "learning_rate": 9.718307651309705e-06, + "loss": 0.163, + "step": 66620 + }, + { + "epoch": 3.11, + "learning_rate": 9.71752386625492e-06, + "loss": 0.1038, + "step": 66625 + }, + { + "epoch": 3.11, + "learning_rate": 9.716740081200132e-06, + "loss": 0.1165, + "step": 66630 + }, + { + "epoch": 3.11, + "learning_rate": 9.715956296145345e-06, + "loss": 0.0962, + "step": 66635 + }, + { + "epoch": 3.11, + "learning_rate": 9.71517251109056e-06, + "loss": 0.164, + "step": 66640 + }, + { + "epoch": 3.11, + "learning_rate": 9.714388726035773e-06, + "loss": 0.4056, + "step": 66645 + }, + { + "epoch": 3.11, + "learning_rate": 9.713604940980985e-06, + "loss": 0.0518, + "step": 66650 + }, + { + "epoch": 3.11, + "learning_rate": 9.7128211559262e-06, + "loss": 0.069, + "step": 66655 + }, + { + "epoch": 3.11, + "learning_rate": 9.712037370871413e-06, + "loss": 0.0428, + "step": 66660 + }, + { + "epoch": 3.11, + "learning_rate": 9.711253585816627e-06, + "loss": 0.1114, + "step": 66665 + }, + { + "epoch": 3.11, + "learning_rate": 9.710469800761841e-06, + "loss": 0.0601, + "step": 66670 + }, + { + "epoch": 3.11, + "learning_rate": 9.709686015707053e-06, + "loss": 0.0506, + "step": 66675 + }, + { + "epoch": 3.11, + "learning_rate": 9.708902230652266e-06, + "loss": 0.1785, + "step": 66680 + }, + { + "epoch": 3.11, + "learning_rate": 9.70811844559748e-06, + "loss": 0.198, + "step": 66685 + }, + { + "epoch": 3.11, + "learning_rate": 9.707334660542693e-06, + "loss": 0.2683, + "step": 66690 + }, + { + "epoch": 3.11, + "learning_rate": 9.706550875487907e-06, + "loss": 0.3954, + "step": 66695 + }, + { + "epoch": 3.11, + "learning_rate": 9.70576709043312e-06, + "loss": 0.081, + "step": 66700 + }, + { + "epoch": 3.11, + "learning_rate": 9.704983305378333e-06, + "loss": 0.0303, + "step": 66705 + }, + { + "epoch": 3.11, + "learning_rate": 9.704199520323547e-06, + "loss": 0.0428, + "step": 66710 + }, + { + "epoch": 3.11, + "learning_rate": 9.703415735268761e-06, + "loss": 0.0387, + "step": 66715 + }, + { + "epoch": 3.11, + "learning_rate": 9.702631950213975e-06, + "loss": 0.0866, + "step": 66720 + }, + { + "epoch": 3.11, + "learning_rate": 9.701848165159187e-06, + "loss": 0.1363, + "step": 66725 + }, + { + "epoch": 3.11, + "learning_rate": 9.701064380104401e-06, + "loss": 0.208, + "step": 66730 + }, + { + "epoch": 3.11, + "learning_rate": 9.700280595049615e-06, + "loss": 0.1501, + "step": 66735 + }, + { + "epoch": 3.11, + "learning_rate": 9.699496809994829e-06, + "loss": 0.1701, + "step": 66740 + }, + { + "epoch": 3.11, + "learning_rate": 9.698713024940041e-06, + "loss": 0.23, + "step": 66745 + }, + { + "epoch": 3.11, + "learning_rate": 9.697929239885253e-06, + "loss": 0.0608, + "step": 66750 + }, + { + "epoch": 3.11, + "learning_rate": 9.697145454830467e-06, + "loss": 0.0249, + "step": 66755 + }, + { + "epoch": 3.12, + "learning_rate": 9.696361669775681e-06, + "loss": 0.0245, + "step": 66760 + }, + { + "epoch": 3.12, + "learning_rate": 9.695577884720895e-06, + "loss": 0.0743, + "step": 66765 + }, + { + "epoch": 3.12, + "learning_rate": 9.694794099666109e-06, + "loss": 0.0734, + "step": 66770 + }, + { + "epoch": 3.12, + "learning_rate": 9.694010314611321e-06, + "loss": 0.0864, + "step": 66775 + }, + { + "epoch": 3.12, + "learning_rate": 9.693226529556535e-06, + "loss": 0.1432, + "step": 66780 + }, + { + "epoch": 3.12, + "learning_rate": 9.692442744501749e-06, + "loss": 0.1472, + "step": 66785 + }, + { + "epoch": 3.12, + "learning_rate": 9.691658959446963e-06, + "loss": 0.0936, + "step": 66790 + }, + { + "epoch": 3.12, + "learning_rate": 9.690875174392175e-06, + "loss": 0.3271, + "step": 66795 + }, + { + "epoch": 3.12, + "learning_rate": 9.690091389337389e-06, + "loss": 0.0738, + "step": 66800 + }, + { + "epoch": 3.12, + "learning_rate": 9.689307604282603e-06, + "loss": 0.0401, + "step": 66805 + }, + { + "epoch": 3.12, + "learning_rate": 9.688523819227815e-06, + "loss": 0.0529, + "step": 66810 + }, + { + "epoch": 3.12, + "learning_rate": 9.687740034173029e-06, + "loss": 0.0409, + "step": 66815 + }, + { + "epoch": 3.12, + "learning_rate": 9.686956249118243e-06, + "loss": 0.0777, + "step": 66820 + }, + { + "epoch": 3.12, + "learning_rate": 9.686172464063455e-06, + "loss": 0.1198, + "step": 66825 + }, + { + "epoch": 3.12, + "learning_rate": 9.685388679008669e-06, + "loss": 0.1342, + "step": 66830 + }, + { + "epoch": 3.12, + "learning_rate": 9.684604893953883e-06, + "loss": 0.0932, + "step": 66835 + }, + { + "epoch": 3.12, + "learning_rate": 9.683821108899097e-06, + "loss": 0.2755, + "step": 66840 + }, + { + "epoch": 3.12, + "learning_rate": 9.68303732384431e-06, + "loss": 0.2539, + "step": 66845 + }, + { + "epoch": 3.12, + "learning_rate": 9.682253538789523e-06, + "loss": 0.059, + "step": 66850 + }, + { + "epoch": 3.12, + "learning_rate": 9.681469753734737e-06, + "loss": 0.0206, + "step": 66855 + }, + { + "epoch": 3.12, + "learning_rate": 9.680685968679951e-06, + "loss": 0.0485, + "step": 66860 + }, + { + "epoch": 3.12, + "learning_rate": 9.679902183625163e-06, + "loss": 0.0562, + "step": 66865 + }, + { + "epoch": 3.12, + "learning_rate": 9.679118398570377e-06, + "loss": 0.0851, + "step": 66870 + }, + { + "epoch": 3.12, + "learning_rate": 9.67833461351559e-06, + "loss": 0.1055, + "step": 66875 + }, + { + "epoch": 3.12, + "learning_rate": 9.677550828460803e-06, + "loss": 0.0656, + "step": 66880 + }, + { + "epoch": 3.12, + "learning_rate": 9.676767043406017e-06, + "loss": 0.1683, + "step": 66885 + }, + { + "epoch": 3.12, + "learning_rate": 9.675983258351231e-06, + "loss": 0.1515, + "step": 66890 + }, + { + "epoch": 3.12, + "learning_rate": 9.675199473296443e-06, + "loss": 0.3496, + "step": 66895 + }, + { + "epoch": 3.12, + "learning_rate": 9.674415688241657e-06, + "loss": 0.0908, + "step": 66900 + }, + { + "epoch": 3.12, + "learning_rate": 9.673631903186871e-06, + "loss": 0.0156, + "step": 66905 + }, + { + "epoch": 3.12, + "learning_rate": 9.672848118132085e-06, + "loss": 0.0492, + "step": 66910 + }, + { + "epoch": 3.12, + "learning_rate": 9.672064333077297e-06, + "loss": 0.0519, + "step": 66915 + }, + { + "epoch": 3.12, + "learning_rate": 9.671280548022511e-06, + "loss": 0.1146, + "step": 66920 + }, + { + "epoch": 3.12, + "learning_rate": 9.670496762967725e-06, + "loss": 0.1169, + "step": 66925 + }, + { + "epoch": 3.12, + "learning_rate": 9.669712977912939e-06, + "loss": 0.0662, + "step": 66930 + }, + { + "epoch": 3.12, + "learning_rate": 9.668929192858153e-06, + "loss": 0.1658, + "step": 66935 + }, + { + "epoch": 3.12, + "learning_rate": 9.668145407803365e-06, + "loss": 0.1625, + "step": 66940 + }, + { + "epoch": 3.12, + "learning_rate": 9.667361622748577e-06, + "loss": 0.1685, + "step": 66945 + }, + { + "epoch": 3.12, + "learning_rate": 9.666577837693791e-06, + "loss": 0.0706, + "step": 66950 + }, + { + "epoch": 3.12, + "learning_rate": 9.665794052639005e-06, + "loss": 0.0915, + "step": 66955 + }, + { + "epoch": 3.12, + "learning_rate": 9.665010267584219e-06, + "loss": 0.0695, + "step": 66960 + }, + { + "epoch": 3.12, + "learning_rate": 9.664226482529431e-06, + "loss": 0.0764, + "step": 66965 + }, + { + "epoch": 3.12, + "learning_rate": 9.663442697474645e-06, + "loss": 0.0657, + "step": 66970 + }, + { + "epoch": 3.13, + "learning_rate": 9.662658912419859e-06, + "loss": 0.0836, + "step": 66975 + }, + { + "epoch": 3.13, + "learning_rate": 9.661875127365073e-06, + "loss": 0.0888, + "step": 66980 + }, + { + "epoch": 3.13, + "learning_rate": 9.661091342310287e-06, + "loss": 0.1487, + "step": 66985 + }, + { + "epoch": 3.13, + "learning_rate": 9.660307557255499e-06, + "loss": 0.3276, + "step": 66990 + }, + { + "epoch": 3.13, + "learning_rate": 9.659523772200713e-06, + "loss": 0.3517, + "step": 66995 + }, + { + "epoch": 3.13, + "learning_rate": 9.658739987145927e-06, + "loss": 0.0516, + "step": 67000 + }, + { + "epoch": 3.13, + "learning_rate": 9.657956202091139e-06, + "loss": 0.0154, + "step": 67005 + }, + { + "epoch": 3.13, + "learning_rate": 9.657172417036353e-06, + "loss": 0.0361, + "step": 67010 + }, + { + "epoch": 3.13, + "learning_rate": 9.656388631981565e-06, + "loss": 0.0564, + "step": 67015 + }, + { + "epoch": 3.13, + "learning_rate": 9.655604846926779e-06, + "loss": 0.0617, + "step": 67020 + }, + { + "epoch": 3.13, + "learning_rate": 9.654821061871993e-06, + "loss": 0.0484, + "step": 67025 + }, + { + "epoch": 3.13, + "learning_rate": 9.654037276817207e-06, + "loss": 0.0938, + "step": 67030 + }, + { + "epoch": 3.13, + "learning_rate": 9.65325349176242e-06, + "loss": 0.104, + "step": 67035 + }, + { + "epoch": 3.13, + "learning_rate": 9.652469706707633e-06, + "loss": 0.2527, + "step": 67040 + }, + { + "epoch": 3.13, + "learning_rate": 9.651685921652847e-06, + "loss": 0.2725, + "step": 67045 + }, + { + "epoch": 3.13, + "learning_rate": 9.65090213659806e-06, + "loss": 0.1013, + "step": 67050 + }, + { + "epoch": 3.13, + "learning_rate": 9.650118351543275e-06, + "loss": 0.0622, + "step": 67055 + }, + { + "epoch": 3.13, + "learning_rate": 9.649334566488487e-06, + "loss": 0.0481, + "step": 67060 + }, + { + "epoch": 3.13, + "learning_rate": 9.6485507814337e-06, + "loss": 0.0547, + "step": 67065 + }, + { + "epoch": 3.13, + "learning_rate": 9.647766996378913e-06, + "loss": 0.1045, + "step": 67070 + }, + { + "epoch": 3.13, + "learning_rate": 9.646983211324127e-06, + "loss": 0.0833, + "step": 67075 + }, + { + "epoch": 3.13, + "learning_rate": 9.64619942626934e-06, + "loss": 0.1074, + "step": 67080 + }, + { + "epoch": 3.13, + "learning_rate": 9.645415641214555e-06, + "loss": 0.1298, + "step": 67085 + }, + { + "epoch": 3.13, + "learning_rate": 9.644631856159767e-06, + "loss": 0.1969, + "step": 67090 + }, + { + "epoch": 3.13, + "learning_rate": 9.64384807110498e-06, + "loss": 0.2585, + "step": 67095 + }, + { + "epoch": 3.13, + "learning_rate": 9.643064286050195e-06, + "loss": 0.0726, + "step": 67100 + }, + { + "epoch": 3.13, + "learning_rate": 9.642280500995409e-06, + "loss": 0.0397, + "step": 67105 + }, + { + "epoch": 3.13, + "learning_rate": 9.64149671594062e-06, + "loss": 0.0865, + "step": 67110 + }, + { + "epoch": 3.13, + "learning_rate": 9.640712930885835e-06, + "loss": 0.0552, + "step": 67115 + }, + { + "epoch": 3.13, + "learning_rate": 9.639929145831049e-06, + "loss": 0.0323, + "step": 67120 + }, + { + "epoch": 3.13, + "learning_rate": 9.639145360776262e-06, + "loss": 0.0693, + "step": 67125 + }, + { + "epoch": 3.13, + "learning_rate": 9.638361575721475e-06, + "loss": 0.0847, + "step": 67130 + }, + { + "epoch": 3.13, + "learning_rate": 9.637577790666689e-06, + "loss": 0.1027, + "step": 67135 + }, + { + "epoch": 3.13, + "learning_rate": 9.6367940056119e-06, + "loss": 0.2035, + "step": 67140 + }, + { + "epoch": 3.13, + "learning_rate": 9.636010220557115e-06, + "loss": 0.225, + "step": 67145 + }, + { + "epoch": 3.13, + "learning_rate": 9.635226435502329e-06, + "loss": 0.1034, + "step": 67150 + }, + { + "epoch": 3.13, + "learning_rate": 9.634442650447543e-06, + "loss": 0.0304, + "step": 67155 + }, + { + "epoch": 3.13, + "learning_rate": 9.633658865392755e-06, + "loss": 0.0912, + "step": 67160 + }, + { + "epoch": 3.13, + "learning_rate": 9.632875080337969e-06, + "loss": 0.0627, + "step": 67165 + }, + { + "epoch": 3.13, + "learning_rate": 9.632091295283183e-06, + "loss": 0.0269, + "step": 67170 + }, + { + "epoch": 3.13, + "learning_rate": 9.631307510228396e-06, + "loss": 0.0673, + "step": 67175 + }, + { + "epoch": 3.13, + "learning_rate": 9.630523725173609e-06, + "loss": 0.1514, + "step": 67180 + }, + { + "epoch": 3.13, + "learning_rate": 9.629739940118823e-06, + "loss": 0.146, + "step": 67185 + }, + { + "epoch": 3.14, + "learning_rate": 9.628956155064036e-06, + "loss": 0.185, + "step": 67190 + }, + { + "epoch": 3.14, + "learning_rate": 9.62817237000925e-06, + "loss": 0.3927, + "step": 67195 + }, + { + "epoch": 3.14, + "learning_rate": 9.627388584954463e-06, + "loss": 0.0939, + "step": 67200 + }, + { + "epoch": 3.14, + "learning_rate": 9.626604799899676e-06, + "loss": 0.034, + "step": 67205 + }, + { + "epoch": 3.14, + "learning_rate": 9.625821014844889e-06, + "loss": 0.0339, + "step": 67210 + }, + { + "epoch": 3.14, + "learning_rate": 9.625037229790103e-06, + "loss": 0.1123, + "step": 67215 + }, + { + "epoch": 3.14, + "learning_rate": 9.624253444735317e-06, + "loss": 0.0462, + "step": 67220 + }, + { + "epoch": 3.14, + "learning_rate": 9.62346965968053e-06, + "loss": 0.1015, + "step": 67225 + }, + { + "epoch": 3.14, + "learning_rate": 9.622685874625743e-06, + "loss": 0.1491, + "step": 67230 + }, + { + "epoch": 3.14, + "learning_rate": 9.621902089570957e-06, + "loss": 0.1363, + "step": 67235 + }, + { + "epoch": 3.14, + "learning_rate": 9.62111830451617e-06, + "loss": 0.2497, + "step": 67240 + }, + { + "epoch": 3.14, + "learning_rate": 9.620334519461384e-06, + "loss": 0.2666, + "step": 67245 + }, + { + "epoch": 3.14, + "learning_rate": 9.619550734406598e-06, + "loss": 0.0863, + "step": 67250 + }, + { + "epoch": 3.14, + "learning_rate": 9.61876694935181e-06, + "loss": 0.0429, + "step": 67255 + }, + { + "epoch": 3.14, + "learning_rate": 9.617983164297024e-06, + "loss": 0.0542, + "step": 67260 + }, + { + "epoch": 3.14, + "learning_rate": 9.617199379242237e-06, + "loss": 0.0543, + "step": 67265 + }, + { + "epoch": 3.14, + "learning_rate": 9.61641559418745e-06, + "loss": 0.0545, + "step": 67270 + }, + { + "epoch": 3.14, + "learning_rate": 9.615631809132664e-06, + "loss": 0.0679, + "step": 67275 + }, + { + "epoch": 3.14, + "learning_rate": 9.614848024077877e-06, + "loss": 0.1376, + "step": 67280 + }, + { + "epoch": 3.14, + "learning_rate": 9.61406423902309e-06, + "loss": 0.1443, + "step": 67285 + }, + { + "epoch": 3.14, + "learning_rate": 9.613280453968304e-06, + "loss": 0.1009, + "step": 67290 + }, + { + "epoch": 3.14, + "learning_rate": 9.612496668913518e-06, + "loss": 0.2522, + "step": 67295 + }, + { + "epoch": 3.14, + "learning_rate": 9.611712883858732e-06, + "loss": 0.0529, + "step": 67300 + }, + { + "epoch": 3.14, + "learning_rate": 9.610929098803944e-06, + "loss": 0.0183, + "step": 67305 + }, + { + "epoch": 3.14, + "learning_rate": 9.610145313749158e-06, + "loss": 0.0619, + "step": 67310 + }, + { + "epoch": 3.14, + "learning_rate": 9.609361528694372e-06, + "loss": 0.0607, + "step": 67315 + }, + { + "epoch": 3.14, + "learning_rate": 9.608577743639586e-06, + "loss": 0.0888, + "step": 67320 + }, + { + "epoch": 3.14, + "learning_rate": 9.607793958584798e-06, + "loss": 0.1036, + "step": 67325 + }, + { + "epoch": 3.14, + "learning_rate": 9.60701017353001e-06, + "loss": 0.117, + "step": 67330 + }, + { + "epoch": 3.14, + "learning_rate": 9.606226388475224e-06, + "loss": 0.1202, + "step": 67335 + }, + { + "epoch": 3.14, + "learning_rate": 9.605442603420438e-06, + "loss": 0.2976, + "step": 67340 + }, + { + "epoch": 3.14, + "learning_rate": 9.604658818365652e-06, + "loss": 0.3352, + "step": 67345 + }, + { + "epoch": 3.14, + "learning_rate": 9.603875033310866e-06, + "loss": 0.0541, + "step": 67350 + }, + { + "epoch": 3.14, + "learning_rate": 9.603091248256078e-06, + "loss": 0.017, + "step": 67355 + }, + { + "epoch": 3.14, + "learning_rate": 9.602307463201292e-06, + "loss": 0.0692, + "step": 67360 + }, + { + "epoch": 3.14, + "learning_rate": 9.601523678146506e-06, + "loss": 0.0356, + "step": 67365 + }, + { + "epoch": 3.14, + "learning_rate": 9.60073989309172e-06, + "loss": 0.0566, + "step": 67370 + }, + { + "epoch": 3.14, + "learning_rate": 9.599956108036932e-06, + "loss": 0.0287, + "step": 67375 + }, + { + "epoch": 3.14, + "learning_rate": 9.599172322982146e-06, + "loss": 0.1013, + "step": 67380 + }, + { + "epoch": 3.14, + "learning_rate": 9.59838853792736e-06, + "loss": 0.1021, + "step": 67385 + }, + { + "epoch": 3.14, + "learning_rate": 9.597604752872574e-06, + "loss": 0.1245, + "step": 67390 + }, + { + "epoch": 3.14, + "learning_rate": 9.596820967817786e-06, + "loss": 0.2174, + "step": 67395 + }, + { + "epoch": 3.14, + "learning_rate": 9.596037182763e-06, + "loss": 0.0651, + "step": 67400 + }, + { + "epoch": 3.15, + "learning_rate": 9.595253397708212e-06, + "loss": 0.0342, + "step": 67405 + }, + { + "epoch": 3.15, + "learning_rate": 9.594469612653426e-06, + "loss": 0.0149, + "step": 67410 + }, + { + "epoch": 3.15, + "learning_rate": 9.59368582759864e-06, + "loss": 0.0436, + "step": 67415 + }, + { + "epoch": 3.15, + "learning_rate": 9.592902042543854e-06, + "loss": 0.0795, + "step": 67420 + }, + { + "epoch": 3.15, + "learning_rate": 9.592118257489066e-06, + "loss": 0.1034, + "step": 67425 + }, + { + "epoch": 3.15, + "learning_rate": 9.59133447243428e-06, + "loss": 0.1312, + "step": 67430 + }, + { + "epoch": 3.15, + "learning_rate": 9.590550687379494e-06, + "loss": 0.1473, + "step": 67435 + }, + { + "epoch": 3.15, + "learning_rate": 9.589766902324708e-06, + "loss": 0.1771, + "step": 67440 + }, + { + "epoch": 3.15, + "learning_rate": 9.58898311726992e-06, + "loss": 0.2701, + "step": 67445 + }, + { + "epoch": 3.15, + "learning_rate": 9.588199332215134e-06, + "loss": 0.0963, + "step": 67450 + }, + { + "epoch": 3.15, + "learning_rate": 9.587415547160348e-06, + "loss": 0.0433, + "step": 67455 + }, + { + "epoch": 3.15, + "learning_rate": 9.58663176210556e-06, + "loss": 0.0833, + "step": 67460 + }, + { + "epoch": 3.15, + "learning_rate": 9.585847977050774e-06, + "loss": 0.0404, + "step": 67465 + }, + { + "epoch": 3.15, + "learning_rate": 9.585064191995988e-06, + "loss": 0.1147, + "step": 67470 + }, + { + "epoch": 3.15, + "learning_rate": 9.5842804069412e-06, + "loss": 0.1279, + "step": 67475 + }, + { + "epoch": 3.15, + "learning_rate": 9.583496621886414e-06, + "loss": 0.0957, + "step": 67480 + }, + { + "epoch": 3.15, + "learning_rate": 9.582712836831628e-06, + "loss": 0.1441, + "step": 67485 + }, + { + "epoch": 3.15, + "learning_rate": 9.581929051776842e-06, + "loss": 0.1781, + "step": 67490 + }, + { + "epoch": 3.15, + "learning_rate": 9.581145266722054e-06, + "loss": 0.3565, + "step": 67495 + }, + { + "epoch": 3.15, + "learning_rate": 9.580361481667268e-06, + "loss": 0.0838, + "step": 67500 + }, + { + "epoch": 3.15, + "learning_rate": 9.579577696612482e-06, + "loss": 0.025, + "step": 67505 + }, + { + "epoch": 3.15, + "learning_rate": 9.578793911557696e-06, + "loss": 0.0205, + "step": 67510 + }, + { + "epoch": 3.15, + "learning_rate": 9.57801012650291e-06, + "loss": 0.0888, + "step": 67515 + }, + { + "epoch": 3.15, + "learning_rate": 9.577226341448122e-06, + "loss": 0.0704, + "step": 67520 + }, + { + "epoch": 3.15, + "learning_rate": 9.576442556393334e-06, + "loss": 0.097, + "step": 67525 + }, + { + "epoch": 3.15, + "learning_rate": 9.575658771338548e-06, + "loss": 0.1136, + "step": 67530 + }, + { + "epoch": 3.15, + "learning_rate": 9.574874986283762e-06, + "loss": 0.1827, + "step": 67535 + }, + { + "epoch": 3.15, + "learning_rate": 9.574091201228976e-06, + "loss": 0.3217, + "step": 67540 + }, + { + "epoch": 3.15, + "learning_rate": 9.573307416174188e-06, + "loss": 0.3586, + "step": 67545 + }, + { + "epoch": 3.15, + "learning_rate": 9.572523631119402e-06, + "loss": 0.094, + "step": 67550 + }, + { + "epoch": 3.15, + "learning_rate": 9.571739846064616e-06, + "loss": 0.0299, + "step": 67555 + }, + { + "epoch": 3.15, + "learning_rate": 9.57095606100983e-06, + "loss": 0.0378, + "step": 67560 + }, + { + "epoch": 3.15, + "learning_rate": 9.570172275955044e-06, + "loss": 0.0593, + "step": 67565 + }, + { + "epoch": 3.15, + "learning_rate": 9.569388490900256e-06, + "loss": 0.0426, + "step": 67570 + }, + { + "epoch": 3.15, + "learning_rate": 9.56860470584547e-06, + "loss": 0.1359, + "step": 67575 + }, + { + "epoch": 3.15, + "learning_rate": 9.567820920790684e-06, + "loss": 0.0562, + "step": 67580 + }, + { + "epoch": 3.15, + "learning_rate": 9.567037135735898e-06, + "loss": 0.1838, + "step": 67585 + }, + { + "epoch": 3.15, + "learning_rate": 9.56625335068111e-06, + "loss": 0.2696, + "step": 67590 + }, + { + "epoch": 3.15, + "learning_rate": 9.565469565626322e-06, + "loss": 0.4682, + "step": 67595 + }, + { + "epoch": 3.15, + "learning_rate": 9.564685780571536e-06, + "loss": 0.0999, + "step": 67600 + }, + { + "epoch": 3.15, + "learning_rate": 9.56390199551675e-06, + "loss": 0.0261, + "step": 67605 + }, + { + "epoch": 3.15, + "learning_rate": 9.563118210461964e-06, + "loss": 0.1327, + "step": 67610 + }, + { + "epoch": 3.16, + "learning_rate": 9.562334425407178e-06, + "loss": 0.0242, + "step": 67615 + }, + { + "epoch": 3.16, + "learning_rate": 9.56155064035239e-06, + "loss": 0.0565, + "step": 67620 + }, + { + "epoch": 3.16, + "learning_rate": 9.560766855297604e-06, + "loss": 0.1068, + "step": 67625 + }, + { + "epoch": 3.16, + "learning_rate": 9.559983070242818e-06, + "loss": 0.0941, + "step": 67630 + }, + { + "epoch": 3.16, + "learning_rate": 9.559199285188032e-06, + "loss": 0.2012, + "step": 67635 + }, + { + "epoch": 3.16, + "learning_rate": 9.558415500133244e-06, + "loss": 0.2336, + "step": 67640 + }, + { + "epoch": 3.16, + "learning_rate": 9.557631715078458e-06, + "loss": 0.2486, + "step": 67645 + }, + { + "epoch": 3.16, + "learning_rate": 9.556847930023672e-06, + "loss": 0.1049, + "step": 67650 + }, + { + "epoch": 3.16, + "learning_rate": 9.556064144968884e-06, + "loss": 0.0125, + "step": 67655 + }, + { + "epoch": 3.16, + "learning_rate": 9.555280359914098e-06, + "loss": 0.0499, + "step": 67660 + }, + { + "epoch": 3.16, + "learning_rate": 9.554496574859312e-06, + "loss": 0.1079, + "step": 67665 + }, + { + "epoch": 3.16, + "learning_rate": 9.553712789804524e-06, + "loss": 0.0323, + "step": 67670 + }, + { + "epoch": 3.16, + "learning_rate": 9.552929004749738e-06, + "loss": 0.0886, + "step": 67675 + }, + { + "epoch": 3.16, + "learning_rate": 9.552145219694952e-06, + "loss": 0.1056, + "step": 67680 + }, + { + "epoch": 3.16, + "learning_rate": 9.551361434640166e-06, + "loss": 0.1175, + "step": 67685 + }, + { + "epoch": 3.16, + "learning_rate": 9.550577649585378e-06, + "loss": 0.3264, + "step": 67690 + }, + { + "epoch": 3.16, + "learning_rate": 9.549793864530592e-06, + "loss": 0.5046, + "step": 67695 + }, + { + "epoch": 3.16, + "learning_rate": 9.549010079475806e-06, + "loss": 0.0509, + "step": 67700 + }, + { + "epoch": 3.16, + "learning_rate": 9.54822629442102e-06, + "loss": 0.0376, + "step": 67705 + }, + { + "epoch": 3.16, + "learning_rate": 9.547442509366232e-06, + "loss": 0.0338, + "step": 67710 + }, + { + "epoch": 3.16, + "learning_rate": 9.546658724311446e-06, + "loss": 0.033, + "step": 67715 + }, + { + "epoch": 3.16, + "learning_rate": 9.545874939256658e-06, + "loss": 0.0897, + "step": 67720 + }, + { + "epoch": 3.16, + "learning_rate": 9.545091154201872e-06, + "loss": 0.1058, + "step": 67725 + }, + { + "epoch": 3.16, + "learning_rate": 9.544307369147086e-06, + "loss": 0.0865, + "step": 67730 + }, + { + "epoch": 3.16, + "learning_rate": 9.5435235840923e-06, + "loss": 0.1294, + "step": 67735 + }, + { + "epoch": 3.16, + "learning_rate": 9.542739799037512e-06, + "loss": 0.1835, + "step": 67740 + }, + { + "epoch": 3.16, + "learning_rate": 9.541956013982726e-06, + "loss": 0.3487, + "step": 67745 + }, + { + "epoch": 3.16, + "learning_rate": 9.54117222892794e-06, + "loss": 0.0457, + "step": 67750 + }, + { + "epoch": 3.16, + "learning_rate": 9.540388443873154e-06, + "loss": 0.0352, + "step": 67755 + }, + { + "epoch": 3.16, + "learning_rate": 9.539604658818366e-06, + "loss": 0.026, + "step": 67760 + }, + { + "epoch": 3.16, + "learning_rate": 9.53882087376358e-06, + "loss": 0.0794, + "step": 67765 + }, + { + "epoch": 3.16, + "learning_rate": 9.538037088708794e-06, + "loss": 0.0649, + "step": 67770 + }, + { + "epoch": 3.16, + "learning_rate": 9.537253303654008e-06, + "loss": 0.0954, + "step": 67775 + }, + { + "epoch": 3.16, + "learning_rate": 9.536469518599221e-06, + "loss": 0.0569, + "step": 67780 + }, + { + "epoch": 3.16, + "learning_rate": 9.535685733544434e-06, + "loss": 0.1698, + "step": 67785 + }, + { + "epoch": 3.16, + "learning_rate": 9.534901948489646e-06, + "loss": 0.2006, + "step": 67790 + }, + { + "epoch": 3.16, + "learning_rate": 9.53411816343486e-06, + "loss": 0.1633, + "step": 67795 + }, + { + "epoch": 3.16, + "learning_rate": 9.533334378380074e-06, + "loss": 0.078, + "step": 67800 + }, + { + "epoch": 3.16, + "learning_rate": 9.532550593325288e-06, + "loss": 0.0221, + "step": 67805 + }, + { + "epoch": 3.16, + "learning_rate": 9.5317668082705e-06, + "loss": 0.1055, + "step": 67810 + }, + { + "epoch": 3.16, + "learning_rate": 9.530983023215714e-06, + "loss": 0.0439, + "step": 67815 + }, + { + "epoch": 3.16, + "learning_rate": 9.530199238160928e-06, + "loss": 0.0632, + "step": 67820 + }, + { + "epoch": 3.16, + "learning_rate": 9.529415453106142e-06, + "loss": 0.1022, + "step": 67825 + }, + { + "epoch": 3.17, + "learning_rate": 9.528631668051355e-06, + "loss": 0.1556, + "step": 67830 + }, + { + "epoch": 3.17, + "learning_rate": 9.527847882996568e-06, + "loss": 0.118, + "step": 67835 + }, + { + "epoch": 3.17, + "learning_rate": 9.527064097941782e-06, + "loss": 0.1185, + "step": 67840 + }, + { + "epoch": 3.17, + "learning_rate": 9.526280312886995e-06, + "loss": 0.2748, + "step": 67845 + }, + { + "epoch": 3.17, + "learning_rate": 9.525496527832208e-06, + "loss": 0.0664, + "step": 67850 + }, + { + "epoch": 3.17, + "learning_rate": 9.524712742777422e-06, + "loss": 0.0549, + "step": 67855 + }, + { + "epoch": 3.17, + "learning_rate": 9.523928957722634e-06, + "loss": 0.0159, + "step": 67860 + }, + { + "epoch": 3.17, + "learning_rate": 9.523145172667848e-06, + "loss": 0.029, + "step": 67865 + }, + { + "epoch": 3.17, + "learning_rate": 9.522361387613062e-06, + "loss": 0.0671, + "step": 67870 + }, + { + "epoch": 3.17, + "learning_rate": 9.521577602558275e-06, + "loss": 0.0725, + "step": 67875 + }, + { + "epoch": 3.17, + "learning_rate": 9.52079381750349e-06, + "loss": 0.2095, + "step": 67880 + }, + { + "epoch": 3.17, + "learning_rate": 9.520010032448702e-06, + "loss": 0.2265, + "step": 67885 + }, + { + "epoch": 3.17, + "learning_rate": 9.519226247393916e-06, + "loss": 0.3112, + "step": 67890 + }, + { + "epoch": 3.17, + "learning_rate": 9.51844246233913e-06, + "loss": 0.2284, + "step": 67895 + }, + { + "epoch": 3.17, + "learning_rate": 9.517658677284343e-06, + "loss": 0.0592, + "step": 67900 + }, + { + "epoch": 3.17, + "learning_rate": 9.516874892229556e-06, + "loss": 0.0495, + "step": 67905 + }, + { + "epoch": 3.17, + "learning_rate": 9.51609110717477e-06, + "loss": 0.02, + "step": 67910 + }, + { + "epoch": 3.17, + "learning_rate": 9.515307322119982e-06, + "loss": 0.0283, + "step": 67915 + }, + { + "epoch": 3.17, + "learning_rate": 9.514523537065196e-06, + "loss": 0.0647, + "step": 67920 + }, + { + "epoch": 3.17, + "learning_rate": 9.51373975201041e-06, + "loss": 0.1077, + "step": 67925 + }, + { + "epoch": 3.17, + "learning_rate": 9.512955966955623e-06, + "loss": 0.1124, + "step": 67930 + }, + { + "epoch": 3.17, + "learning_rate": 9.512172181900836e-06, + "loss": 0.1757, + "step": 67935 + }, + { + "epoch": 3.17, + "learning_rate": 9.51138839684605e-06, + "loss": 0.217, + "step": 67940 + }, + { + "epoch": 3.17, + "learning_rate": 9.510604611791263e-06, + "loss": 0.3992, + "step": 67945 + }, + { + "epoch": 3.17, + "learning_rate": 9.509820826736477e-06, + "loss": 0.0693, + "step": 67950 + }, + { + "epoch": 3.17, + "learning_rate": 9.50903704168169e-06, + "loss": 0.067, + "step": 67955 + }, + { + "epoch": 3.17, + "learning_rate": 9.508253256626903e-06, + "loss": 0.0275, + "step": 67960 + }, + { + "epoch": 3.17, + "learning_rate": 9.507469471572117e-06, + "loss": 0.0541, + "step": 67965 + }, + { + "epoch": 3.17, + "learning_rate": 9.506685686517331e-06, + "loss": 0.055, + "step": 67970 + }, + { + "epoch": 3.17, + "learning_rate": 9.505901901462543e-06, + "loss": 0.0966, + "step": 67975 + }, + { + "epoch": 3.17, + "learning_rate": 9.505118116407757e-06, + "loss": 0.1219, + "step": 67980 + }, + { + "epoch": 3.17, + "learning_rate": 9.50433433135297e-06, + "loss": 0.2099, + "step": 67985 + }, + { + "epoch": 3.17, + "learning_rate": 9.503550546298183e-06, + "loss": 0.1988, + "step": 67990 + }, + { + "epoch": 3.17, + "learning_rate": 9.502766761243397e-06, + "loss": 0.3276, + "step": 67995 + }, + { + "epoch": 3.17, + "learning_rate": 9.501982976188611e-06, + "loss": 0.1253, + "step": 68000 + }, + { + "epoch": 3.17, + "learning_rate": 9.501199191133823e-06, + "loss": 0.0215, + "step": 68005 + }, + { + "epoch": 3.17, + "learning_rate": 9.500415406079037e-06, + "loss": 0.0237, + "step": 68010 + }, + { + "epoch": 3.17, + "learning_rate": 9.499631621024251e-06, + "loss": 0.0624, + "step": 68015 + }, + { + "epoch": 3.17, + "learning_rate": 9.498847835969465e-06, + "loss": 0.0849, + "step": 68020 + }, + { + "epoch": 3.17, + "learning_rate": 9.498064050914677e-06, + "loss": 0.083, + "step": 68025 + }, + { + "epoch": 3.17, + "learning_rate": 9.497280265859891e-06, + "loss": 0.1419, + "step": 68030 + }, + { + "epoch": 3.17, + "learning_rate": 9.496496480805105e-06, + "loss": 0.1342, + "step": 68035 + }, + { + "epoch": 3.17, + "learning_rate": 9.495712695750319e-06, + "loss": 0.1337, + "step": 68040 + }, + { + "epoch": 3.18, + "learning_rate": 9.494928910695531e-06, + "loss": 0.2854, + "step": 68045 + }, + { + "epoch": 3.18, + "learning_rate": 9.494145125640745e-06, + "loss": 0.146, + "step": 68050 + }, + { + "epoch": 3.18, + "learning_rate": 9.493361340585957e-06, + "loss": 0.0851, + "step": 68055 + }, + { + "epoch": 3.18, + "learning_rate": 9.492577555531171e-06, + "loss": 0.0148, + "step": 68060 + }, + { + "epoch": 3.18, + "learning_rate": 9.491793770476385e-06, + "loss": 0.0833, + "step": 68065 + }, + { + "epoch": 3.18, + "learning_rate": 9.4910099854216e-06, + "loss": 0.0505, + "step": 68070 + }, + { + "epoch": 3.18, + "learning_rate": 9.490226200366811e-06, + "loss": 0.103, + "step": 68075 + }, + { + "epoch": 3.18, + "learning_rate": 9.489442415312025e-06, + "loss": 0.1506, + "step": 68080 + }, + { + "epoch": 3.18, + "learning_rate": 9.48865863025724e-06, + "loss": 0.1196, + "step": 68085 + }, + { + "epoch": 3.18, + "learning_rate": 9.487874845202453e-06, + "loss": 0.1659, + "step": 68090 + }, + { + "epoch": 3.18, + "learning_rate": 9.487091060147667e-06, + "loss": 0.3757, + "step": 68095 + }, + { + "epoch": 3.18, + "learning_rate": 9.48630727509288e-06, + "loss": 0.0993, + "step": 68100 + }, + { + "epoch": 3.18, + "learning_rate": 9.485523490038093e-06, + "loss": 0.0322, + "step": 68105 + }, + { + "epoch": 3.18, + "learning_rate": 9.484739704983305e-06, + "loss": 0.047, + "step": 68110 + }, + { + "epoch": 3.18, + "learning_rate": 9.48395591992852e-06, + "loss": 0.081, + "step": 68115 + }, + { + "epoch": 3.18, + "learning_rate": 9.483172134873733e-06, + "loss": 0.0764, + "step": 68120 + }, + { + "epoch": 3.18, + "learning_rate": 9.482388349818945e-06, + "loss": 0.0573, + "step": 68125 + }, + { + "epoch": 3.18, + "learning_rate": 9.48160456476416e-06, + "loss": 0.1731, + "step": 68130 + }, + { + "epoch": 3.18, + "learning_rate": 9.480820779709373e-06, + "loss": 0.1322, + "step": 68135 + }, + { + "epoch": 3.18, + "learning_rate": 9.480036994654587e-06, + "loss": 0.1864, + "step": 68140 + }, + { + "epoch": 3.18, + "learning_rate": 9.479253209599801e-06, + "loss": 0.2936, + "step": 68145 + }, + { + "epoch": 3.18, + "learning_rate": 9.478469424545013e-06, + "loss": 0.0338, + "step": 68150 + }, + { + "epoch": 3.18, + "learning_rate": 9.477685639490227e-06, + "loss": 0.0361, + "step": 68155 + }, + { + "epoch": 3.18, + "learning_rate": 9.476901854435441e-06, + "loss": 0.0377, + "step": 68160 + }, + { + "epoch": 3.18, + "learning_rate": 9.476118069380655e-06, + "loss": 0.018, + "step": 68165 + }, + { + "epoch": 3.18, + "learning_rate": 9.475334284325867e-06, + "loss": 0.0892, + "step": 68170 + }, + { + "epoch": 3.18, + "learning_rate": 9.47455049927108e-06, + "loss": 0.0872, + "step": 68175 + }, + { + "epoch": 3.18, + "learning_rate": 9.473766714216293e-06, + "loss": 0.1519, + "step": 68180 + }, + { + "epoch": 3.18, + "learning_rate": 9.472982929161507e-06, + "loss": 0.2009, + "step": 68185 + }, + { + "epoch": 3.18, + "learning_rate": 9.472199144106721e-06, + "loss": 0.122, + "step": 68190 + }, + { + "epoch": 3.18, + "learning_rate": 9.471415359051935e-06, + "loss": 0.2358, + "step": 68195 + }, + { + "epoch": 3.18, + "learning_rate": 9.470631573997147e-06, + "loss": 0.1171, + "step": 68200 + }, + { + "epoch": 3.18, + "learning_rate": 9.469847788942361e-06, + "loss": 0.0262, + "step": 68205 + }, + { + "epoch": 3.18, + "learning_rate": 9.469064003887575e-06, + "loss": 0.0397, + "step": 68210 + }, + { + "epoch": 3.18, + "learning_rate": 9.468280218832789e-06, + "loss": 0.0614, + "step": 68215 + }, + { + "epoch": 3.18, + "learning_rate": 9.467496433778001e-06, + "loss": 0.0558, + "step": 68220 + }, + { + "epoch": 3.18, + "learning_rate": 9.466712648723215e-06, + "loss": 0.106, + "step": 68225 + }, + { + "epoch": 3.18, + "learning_rate": 9.465928863668429e-06, + "loss": 0.0806, + "step": 68230 + }, + { + "epoch": 3.18, + "learning_rate": 9.465145078613643e-06, + "loss": 0.1555, + "step": 68235 + }, + { + "epoch": 3.18, + "learning_rate": 9.464361293558855e-06, + "loss": 0.1864, + "step": 68240 + }, + { + "epoch": 3.18, + "learning_rate": 9.463577508504069e-06, + "loss": 0.2363, + "step": 68245 + }, + { + "epoch": 3.18, + "learning_rate": 9.462793723449281e-06, + "loss": 0.0648, + "step": 68250 + }, + { + "epoch": 3.18, + "learning_rate": 9.462009938394495e-06, + "loss": 0.0486, + "step": 68255 + }, + { + "epoch": 3.19, + "learning_rate": 9.461226153339709e-06, + "loss": 0.0306, + "step": 68260 + }, + { + "epoch": 3.19, + "learning_rate": 9.460442368284923e-06, + "loss": 0.0714, + "step": 68265 + }, + { + "epoch": 3.19, + "learning_rate": 9.459658583230135e-06, + "loss": 0.05, + "step": 68270 + }, + { + "epoch": 3.19, + "learning_rate": 9.458874798175349e-06, + "loss": 0.0767, + "step": 68275 + }, + { + "epoch": 3.19, + "learning_rate": 9.458091013120563e-06, + "loss": 0.062, + "step": 68280 + }, + { + "epoch": 3.19, + "learning_rate": 9.457307228065777e-06, + "loss": 0.1273, + "step": 68285 + }, + { + "epoch": 3.19, + "learning_rate": 9.456523443010989e-06, + "loss": 0.1011, + "step": 68290 + }, + { + "epoch": 3.19, + "learning_rate": 9.455739657956203e-06, + "loss": 0.2082, + "step": 68295 + }, + { + "epoch": 3.19, + "learning_rate": 9.454955872901417e-06, + "loss": 0.0501, + "step": 68300 + }, + { + "epoch": 3.19, + "learning_rate": 9.454172087846629e-06, + "loss": 0.0578, + "step": 68305 + }, + { + "epoch": 3.19, + "learning_rate": 9.453388302791843e-06, + "loss": 0.0514, + "step": 68310 + }, + { + "epoch": 3.19, + "learning_rate": 9.452604517737057e-06, + "loss": 0.0431, + "step": 68315 + }, + { + "epoch": 3.19, + "learning_rate": 9.451820732682269e-06, + "loss": 0.0945, + "step": 68320 + }, + { + "epoch": 3.19, + "learning_rate": 9.451036947627483e-06, + "loss": 0.1068, + "step": 68325 + }, + { + "epoch": 3.19, + "learning_rate": 9.450253162572697e-06, + "loss": 0.1171, + "step": 68330 + }, + { + "epoch": 3.19, + "learning_rate": 9.44946937751791e-06, + "loss": 0.0724, + "step": 68335 + }, + { + "epoch": 3.19, + "learning_rate": 9.448685592463123e-06, + "loss": 0.2423, + "step": 68340 + }, + { + "epoch": 3.19, + "learning_rate": 9.447901807408337e-06, + "loss": 0.2527, + "step": 68345 + }, + { + "epoch": 3.19, + "learning_rate": 9.44711802235355e-06, + "loss": 0.0918, + "step": 68350 + }, + { + "epoch": 3.19, + "learning_rate": 9.446334237298765e-06, + "loss": 0.0222, + "step": 68355 + }, + { + "epoch": 3.19, + "learning_rate": 9.445550452243979e-06, + "loss": 0.0152, + "step": 68360 + }, + { + "epoch": 3.19, + "learning_rate": 9.44476666718919e-06, + "loss": 0.0738, + "step": 68365 + }, + { + "epoch": 3.19, + "learning_rate": 9.443982882134403e-06, + "loss": 0.0907, + "step": 68370 + }, + { + "epoch": 3.19, + "learning_rate": 9.443199097079617e-06, + "loss": 0.0834, + "step": 68375 + }, + { + "epoch": 3.19, + "learning_rate": 9.44241531202483e-06, + "loss": 0.107, + "step": 68380 + }, + { + "epoch": 3.19, + "learning_rate": 9.441631526970045e-06, + "loss": 0.2131, + "step": 68385 + }, + { + "epoch": 3.19, + "learning_rate": 9.440847741915257e-06, + "loss": 0.1436, + "step": 68390 + }, + { + "epoch": 3.19, + "learning_rate": 9.440063956860471e-06, + "loss": 0.3737, + "step": 68395 + }, + { + "epoch": 3.19, + "learning_rate": 9.439280171805685e-06, + "loss": 0.1027, + "step": 68400 + }, + { + "epoch": 3.19, + "learning_rate": 9.438496386750899e-06, + "loss": 0.0675, + "step": 68405 + }, + { + "epoch": 3.19, + "learning_rate": 9.437712601696113e-06, + "loss": 0.0592, + "step": 68410 + }, + { + "epoch": 3.19, + "learning_rate": 9.436928816641325e-06, + "loss": 0.0521, + "step": 68415 + }, + { + "epoch": 3.19, + "learning_rate": 9.436145031586539e-06, + "loss": 0.0423, + "step": 68420 + }, + { + "epoch": 3.19, + "learning_rate": 9.435361246531753e-06, + "loss": 0.0177, + "step": 68425 + }, + { + "epoch": 3.19, + "learning_rate": 9.434577461476967e-06, + "loss": 0.0523, + "step": 68430 + }, + { + "epoch": 3.19, + "learning_rate": 9.433793676422179e-06, + "loss": 0.1323, + "step": 68435 + }, + { + "epoch": 3.19, + "learning_rate": 9.433009891367391e-06, + "loss": 0.1668, + "step": 68440 + }, + { + "epoch": 3.19, + "learning_rate": 9.432226106312605e-06, + "loss": 0.25, + "step": 68445 + }, + { + "epoch": 3.19, + "learning_rate": 9.431442321257819e-06, + "loss": 0.0571, + "step": 68450 + }, + { + "epoch": 3.19, + "learning_rate": 9.430658536203033e-06, + "loss": 0.0383, + "step": 68455 + }, + { + "epoch": 3.19, + "learning_rate": 9.429874751148247e-06, + "loss": 0.0444, + "step": 68460 + }, + { + "epoch": 3.19, + "learning_rate": 9.429090966093459e-06, + "loss": 0.0427, + "step": 68465 + }, + { + "epoch": 3.19, + "learning_rate": 9.428307181038673e-06, + "loss": 0.0621, + "step": 68470 + }, + { + "epoch": 3.2, + "learning_rate": 9.427523395983887e-06, + "loss": 0.0821, + "step": 68475 + }, + { + "epoch": 3.2, + "learning_rate": 9.4267396109291e-06, + "loss": 0.1072, + "step": 68480 + }, + { + "epoch": 3.2, + "learning_rate": 9.425955825874313e-06, + "loss": 0.1496, + "step": 68485 + }, + { + "epoch": 3.2, + "learning_rate": 9.425172040819527e-06, + "loss": 0.327, + "step": 68490 + }, + { + "epoch": 3.2, + "learning_rate": 9.42438825576474e-06, + "loss": 0.3904, + "step": 68495 + }, + { + "epoch": 3.2, + "learning_rate": 9.423604470709953e-06, + "loss": 0.0764, + "step": 68500 + }, + { + "epoch": 3.2, + "learning_rate": 9.422820685655167e-06, + "loss": 0.0141, + "step": 68505 + }, + { + "epoch": 3.2, + "learning_rate": 9.42203690060038e-06, + "loss": 0.0912, + "step": 68510 + }, + { + "epoch": 3.2, + "learning_rate": 9.421253115545593e-06, + "loss": 0.0373, + "step": 68515 + }, + { + "epoch": 3.2, + "learning_rate": 9.420469330490807e-06, + "loss": 0.1329, + "step": 68520 + }, + { + "epoch": 3.2, + "learning_rate": 9.41968554543602e-06, + "loss": 0.0989, + "step": 68525 + }, + { + "epoch": 3.2, + "learning_rate": 9.418901760381234e-06, + "loss": 0.103, + "step": 68530 + }, + { + "epoch": 3.2, + "learning_rate": 9.418117975326447e-06, + "loss": 0.1543, + "step": 68535 + }, + { + "epoch": 3.2, + "learning_rate": 9.41733419027166e-06, + "loss": 0.1886, + "step": 68540 + }, + { + "epoch": 3.2, + "learning_rate": 9.416550405216874e-06, + "loss": 0.3793, + "step": 68545 + }, + { + "epoch": 3.2, + "learning_rate": 9.415766620162088e-06, + "loss": 0.0654, + "step": 68550 + }, + { + "epoch": 3.2, + "learning_rate": 9.4149828351073e-06, + "loss": 0.0313, + "step": 68555 + }, + { + "epoch": 3.2, + "learning_rate": 9.414199050052515e-06, + "loss": 0.0346, + "step": 68560 + }, + { + "epoch": 3.2, + "learning_rate": 9.413415264997727e-06, + "loss": 0.0393, + "step": 68565 + }, + { + "epoch": 3.2, + "learning_rate": 9.41263147994294e-06, + "loss": 0.0822, + "step": 68570 + }, + { + "epoch": 3.2, + "learning_rate": 9.411847694888155e-06, + "loss": 0.1126, + "step": 68575 + }, + { + "epoch": 3.2, + "learning_rate": 9.411063909833368e-06, + "loss": 0.0963, + "step": 68580 + }, + { + "epoch": 3.2, + "learning_rate": 9.41028012477858e-06, + "loss": 0.1581, + "step": 68585 + }, + { + "epoch": 3.2, + "learning_rate": 9.409496339723795e-06, + "loss": 0.2612, + "step": 68590 + }, + { + "epoch": 3.2, + "learning_rate": 9.408712554669008e-06, + "loss": 0.323, + "step": 68595 + }, + { + "epoch": 3.2, + "learning_rate": 9.407928769614222e-06, + "loss": 0.0175, + "step": 68600 + }, + { + "epoch": 3.2, + "learning_rate": 9.407144984559435e-06, + "loss": 0.0262, + "step": 68605 + }, + { + "epoch": 3.2, + "learning_rate": 9.406361199504648e-06, + "loss": 0.0282, + "step": 68610 + }, + { + "epoch": 3.2, + "learning_rate": 9.405577414449862e-06, + "loss": 0.0527, + "step": 68615 + }, + { + "epoch": 3.2, + "learning_rate": 9.404793629395076e-06, + "loss": 0.0796, + "step": 68620 + }, + { + "epoch": 3.2, + "learning_rate": 9.40400984434029e-06, + "loss": 0.0827, + "step": 68625 + }, + { + "epoch": 3.2, + "learning_rate": 9.403226059285502e-06, + "loss": 0.0572, + "step": 68630 + }, + { + "epoch": 3.2, + "learning_rate": 9.402442274230715e-06, + "loss": 0.1104, + "step": 68635 + }, + { + "epoch": 3.2, + "learning_rate": 9.401658489175929e-06, + "loss": 0.1353, + "step": 68640 + }, + { + "epoch": 3.2, + "learning_rate": 9.400874704121142e-06, + "loss": 0.3382, + "step": 68645 + }, + { + "epoch": 3.2, + "learning_rate": 9.400090919066356e-06, + "loss": 0.0508, + "step": 68650 + }, + { + "epoch": 3.2, + "learning_rate": 9.399307134011569e-06, + "loss": 0.0686, + "step": 68655 + }, + { + "epoch": 3.2, + "learning_rate": 9.398523348956782e-06, + "loss": 0.0725, + "step": 68660 + }, + { + "epoch": 3.2, + "learning_rate": 9.397739563901996e-06, + "loss": 0.1, + "step": 68665 + }, + { + "epoch": 3.2, + "learning_rate": 9.39695577884721e-06, + "loss": 0.0732, + "step": 68670 + }, + { + "epoch": 3.2, + "learning_rate": 9.396171993792424e-06, + "loss": 0.0743, + "step": 68675 + }, + { + "epoch": 3.2, + "learning_rate": 9.395388208737636e-06, + "loss": 0.0961, + "step": 68680 + }, + { + "epoch": 3.2, + "learning_rate": 9.39460442368285e-06, + "loss": 0.1475, + "step": 68685 + }, + { + "epoch": 3.21, + "learning_rate": 9.393820638628064e-06, + "loss": 0.197, + "step": 68690 + }, + { + "epoch": 3.21, + "learning_rate": 9.393036853573276e-06, + "loss": 0.2542, + "step": 68695 + }, + { + "epoch": 3.21, + "learning_rate": 9.39225306851849e-06, + "loss": 0.0426, + "step": 68700 + }, + { + "epoch": 3.21, + "learning_rate": 9.391469283463703e-06, + "loss": 0.0372, + "step": 68705 + }, + { + "epoch": 3.21, + "learning_rate": 9.390685498408916e-06, + "loss": 0.0381, + "step": 68710 + }, + { + "epoch": 3.21, + "learning_rate": 9.38990171335413e-06, + "loss": 0.07, + "step": 68715 + }, + { + "epoch": 3.21, + "learning_rate": 9.389117928299344e-06, + "loss": 0.0335, + "step": 68720 + }, + { + "epoch": 3.21, + "learning_rate": 9.388334143244558e-06, + "loss": 0.0311, + "step": 68725 + }, + { + "epoch": 3.21, + "learning_rate": 9.38755035818977e-06, + "loss": 0.0415, + "step": 68730 + }, + { + "epoch": 3.21, + "learning_rate": 9.386766573134984e-06, + "loss": 0.1207, + "step": 68735 + }, + { + "epoch": 3.21, + "learning_rate": 9.385982788080198e-06, + "loss": 0.2369, + "step": 68740 + }, + { + "epoch": 3.21, + "learning_rate": 9.385199003025412e-06, + "loss": 0.2357, + "step": 68745 + }, + { + "epoch": 3.21, + "learning_rate": 9.384415217970624e-06, + "loss": 0.1444, + "step": 68750 + }, + { + "epoch": 3.21, + "learning_rate": 9.383631432915838e-06, + "loss": 0.0314, + "step": 68755 + }, + { + "epoch": 3.21, + "learning_rate": 9.38284764786105e-06, + "loss": 0.0433, + "step": 68760 + }, + { + "epoch": 3.21, + "learning_rate": 9.382063862806264e-06, + "loss": 0.0297, + "step": 68765 + }, + { + "epoch": 3.21, + "learning_rate": 9.381280077751478e-06, + "loss": 0.0899, + "step": 68770 + }, + { + "epoch": 3.21, + "learning_rate": 9.380496292696692e-06, + "loss": 0.06, + "step": 68775 + }, + { + "epoch": 3.21, + "learning_rate": 9.379712507641904e-06, + "loss": 0.1671, + "step": 68780 + }, + { + "epoch": 3.21, + "learning_rate": 9.378928722587118e-06, + "loss": 0.1742, + "step": 68785 + }, + { + "epoch": 3.21, + "learning_rate": 9.378144937532332e-06, + "loss": 0.243, + "step": 68790 + }, + { + "epoch": 3.21, + "learning_rate": 9.377361152477546e-06, + "loss": 0.2625, + "step": 68795 + }, + { + "epoch": 3.21, + "learning_rate": 9.376577367422758e-06, + "loss": 0.0928, + "step": 68800 + }, + { + "epoch": 3.21, + "learning_rate": 9.375793582367972e-06, + "loss": 0.0624, + "step": 68805 + }, + { + "epoch": 3.21, + "learning_rate": 9.375009797313186e-06, + "loss": 0.0441, + "step": 68810 + }, + { + "epoch": 3.21, + "learning_rate": 9.3742260122584e-06, + "loss": 0.0707, + "step": 68815 + }, + { + "epoch": 3.21, + "learning_rate": 9.373442227203612e-06, + "loss": 0.0557, + "step": 68820 + }, + { + "epoch": 3.21, + "learning_rate": 9.372658442148826e-06, + "loss": 0.0632, + "step": 68825 + }, + { + "epoch": 3.21, + "learning_rate": 9.371874657094038e-06, + "loss": 0.0644, + "step": 68830 + }, + { + "epoch": 3.21, + "learning_rate": 9.371090872039252e-06, + "loss": 0.1814, + "step": 68835 + }, + { + "epoch": 3.21, + "learning_rate": 9.370307086984466e-06, + "loss": 0.1505, + "step": 68840 + }, + { + "epoch": 3.21, + "learning_rate": 9.36952330192968e-06, + "loss": 0.2694, + "step": 68845 + }, + { + "epoch": 3.21, + "learning_rate": 9.368739516874892e-06, + "loss": 0.0535, + "step": 68850 + }, + { + "epoch": 3.21, + "learning_rate": 9.367955731820106e-06, + "loss": 0.0589, + "step": 68855 + }, + { + "epoch": 3.21, + "learning_rate": 9.36717194676532e-06, + "loss": 0.0595, + "step": 68860 + }, + { + "epoch": 3.21, + "learning_rate": 9.366388161710534e-06, + "loss": 0.0274, + "step": 68865 + }, + { + "epoch": 3.21, + "learning_rate": 9.365604376655746e-06, + "loss": 0.1054, + "step": 68870 + }, + { + "epoch": 3.21, + "learning_rate": 9.36482059160096e-06, + "loss": 0.0694, + "step": 68875 + }, + { + "epoch": 3.21, + "learning_rate": 9.364036806546174e-06, + "loss": 0.0681, + "step": 68880 + }, + { + "epoch": 3.21, + "learning_rate": 9.363253021491388e-06, + "loss": 0.0854, + "step": 68885 + }, + { + "epoch": 3.21, + "learning_rate": 9.3624692364366e-06, + "loss": 0.306, + "step": 68890 + }, + { + "epoch": 3.21, + "learning_rate": 9.361685451381814e-06, + "loss": 0.1706, + "step": 68895 + }, + { + "epoch": 3.21, + "learning_rate": 9.360901666327026e-06, + "loss": 0.037, + "step": 68900 + }, + { + "epoch": 3.22, + "learning_rate": 9.36011788127224e-06, + "loss": 0.0279, + "step": 68905 + }, + { + "epoch": 3.22, + "learning_rate": 9.359334096217454e-06, + "loss": 0.0225, + "step": 68910 + }, + { + "epoch": 3.22, + "learning_rate": 9.358550311162668e-06, + "loss": 0.0484, + "step": 68915 + }, + { + "epoch": 3.22, + "learning_rate": 9.35776652610788e-06, + "loss": 0.101, + "step": 68920 + }, + { + "epoch": 3.22, + "learning_rate": 9.356982741053094e-06, + "loss": 0.0671, + "step": 68925 + }, + { + "epoch": 3.22, + "learning_rate": 9.356198955998308e-06, + "loss": 0.1678, + "step": 68930 + }, + { + "epoch": 3.22, + "learning_rate": 9.355415170943522e-06, + "loss": 0.1417, + "step": 68935 + }, + { + "epoch": 3.22, + "learning_rate": 9.354631385888736e-06, + "loss": 0.212, + "step": 68940 + }, + { + "epoch": 3.22, + "learning_rate": 9.353847600833948e-06, + "loss": 0.3602, + "step": 68945 + }, + { + "epoch": 3.22, + "learning_rate": 9.353063815779162e-06, + "loss": 0.0381, + "step": 68950 + }, + { + "epoch": 3.22, + "learning_rate": 9.352280030724374e-06, + "loss": 0.0262, + "step": 68955 + }, + { + "epoch": 3.22, + "learning_rate": 9.351496245669588e-06, + "loss": 0.0538, + "step": 68960 + }, + { + "epoch": 3.22, + "learning_rate": 9.350712460614802e-06, + "loss": 0.0429, + "step": 68965 + }, + { + "epoch": 3.22, + "learning_rate": 9.349928675560014e-06, + "loss": 0.0416, + "step": 68970 + }, + { + "epoch": 3.22, + "learning_rate": 9.349144890505228e-06, + "loss": 0.0407, + "step": 68975 + }, + { + "epoch": 3.22, + "learning_rate": 9.348361105450442e-06, + "loss": 0.129, + "step": 68980 + }, + { + "epoch": 3.22, + "learning_rate": 9.347577320395656e-06, + "loss": 0.0655, + "step": 68985 + }, + { + "epoch": 3.22, + "learning_rate": 9.34679353534087e-06, + "loss": 0.2001, + "step": 68990 + }, + { + "epoch": 3.22, + "learning_rate": 9.346009750286082e-06, + "loss": 0.2085, + "step": 68995 + }, + { + "epoch": 3.22, + "learning_rate": 9.345225965231296e-06, + "loss": 0.0888, + "step": 69000 + }, + { + "epoch": 3.22, + "learning_rate": 9.34444218017651e-06, + "loss": 0.0161, + "step": 69005 + }, + { + "epoch": 3.22, + "learning_rate": 9.343658395121724e-06, + "loss": 0.0768, + "step": 69010 + }, + { + "epoch": 3.22, + "learning_rate": 9.342874610066936e-06, + "loss": 0.0276, + "step": 69015 + }, + { + "epoch": 3.22, + "learning_rate": 9.342090825012148e-06, + "loss": 0.0582, + "step": 69020 + }, + { + "epoch": 3.22, + "learning_rate": 9.341307039957362e-06, + "loss": 0.0803, + "step": 69025 + }, + { + "epoch": 3.22, + "learning_rate": 9.340523254902576e-06, + "loss": 0.0411, + "step": 69030 + }, + { + "epoch": 3.22, + "learning_rate": 9.33973946984779e-06, + "loss": 0.1711, + "step": 69035 + }, + { + "epoch": 3.22, + "learning_rate": 9.338955684793004e-06, + "loss": 0.1804, + "step": 69040 + }, + { + "epoch": 3.22, + "learning_rate": 9.338171899738216e-06, + "loss": 0.231, + "step": 69045 + }, + { + "epoch": 3.22, + "learning_rate": 9.33738811468343e-06, + "loss": 0.0687, + "step": 69050 + }, + { + "epoch": 3.22, + "learning_rate": 9.336604329628644e-06, + "loss": 0.0435, + "step": 69055 + }, + { + "epoch": 3.22, + "learning_rate": 9.335820544573858e-06, + "loss": 0.0353, + "step": 69060 + }, + { + "epoch": 3.22, + "learning_rate": 9.33503675951907e-06, + "loss": 0.0694, + "step": 69065 + }, + { + "epoch": 3.22, + "learning_rate": 9.334252974464284e-06, + "loss": 0.0859, + "step": 69070 + }, + { + "epoch": 3.22, + "learning_rate": 9.333469189409498e-06, + "loss": 0.0612, + "step": 69075 + }, + { + "epoch": 3.22, + "learning_rate": 9.332685404354712e-06, + "loss": 0.1262, + "step": 69080 + }, + { + "epoch": 3.22, + "learning_rate": 9.331901619299924e-06, + "loss": 0.1553, + "step": 69085 + }, + { + "epoch": 3.22, + "learning_rate": 9.331117834245138e-06, + "loss": 0.2087, + "step": 69090 + }, + { + "epoch": 3.22, + "learning_rate": 9.33033404919035e-06, + "loss": 0.1873, + "step": 69095 + }, + { + "epoch": 3.22, + "learning_rate": 9.329550264135564e-06, + "loss": 0.0867, + "step": 69100 + }, + { + "epoch": 3.22, + "learning_rate": 9.328766479080778e-06, + "loss": 0.0105, + "step": 69105 + }, + { + "epoch": 3.22, + "learning_rate": 9.327982694025992e-06, + "loss": 0.0393, + "step": 69110 + }, + { + "epoch": 3.23, + "learning_rate": 9.327198908971204e-06, + "loss": 0.0326, + "step": 69115 + }, + { + "epoch": 3.23, + "learning_rate": 9.326415123916418e-06, + "loss": 0.0376, + "step": 69120 + }, + { + "epoch": 3.23, + "learning_rate": 9.325631338861632e-06, + "loss": 0.0788, + "step": 69125 + }, + { + "epoch": 3.23, + "learning_rate": 9.324847553806846e-06, + "loss": 0.0968, + "step": 69130 + }, + { + "epoch": 3.23, + "learning_rate": 9.324063768752058e-06, + "loss": 0.215, + "step": 69135 + }, + { + "epoch": 3.23, + "learning_rate": 9.323279983697272e-06, + "loss": 0.2422, + "step": 69140 + }, + { + "epoch": 3.23, + "learning_rate": 9.322496198642486e-06, + "loss": 0.4225, + "step": 69145 + }, + { + "epoch": 3.23, + "learning_rate": 9.321712413587698e-06, + "loss": 0.0695, + "step": 69150 + }, + { + "epoch": 3.23, + "learning_rate": 9.320928628532912e-06, + "loss": 0.0703, + "step": 69155 + }, + { + "epoch": 3.23, + "learning_rate": 9.320144843478126e-06, + "loss": 0.1488, + "step": 69160 + }, + { + "epoch": 3.23, + "learning_rate": 9.319361058423338e-06, + "loss": 0.0382, + "step": 69165 + }, + { + "epoch": 3.23, + "learning_rate": 9.318577273368552e-06, + "loss": 0.1603, + "step": 69170 + }, + { + "epoch": 3.23, + "learning_rate": 9.317793488313766e-06, + "loss": 0.1347, + "step": 69175 + }, + { + "epoch": 3.23, + "learning_rate": 9.31700970325898e-06, + "loss": 0.0366, + "step": 69180 + }, + { + "epoch": 3.23, + "learning_rate": 9.316225918204192e-06, + "loss": 0.1063, + "step": 69185 + }, + { + "epoch": 3.23, + "learning_rate": 9.315442133149406e-06, + "loss": 0.2112, + "step": 69190 + }, + { + "epoch": 3.23, + "learning_rate": 9.31465834809462e-06, + "loss": 0.3252, + "step": 69195 + }, + { + "epoch": 3.23, + "learning_rate": 9.313874563039833e-06, + "loss": 0.0515, + "step": 69200 + }, + { + "epoch": 3.23, + "learning_rate": 9.313090777985047e-06, + "loss": 0.0263, + "step": 69205 + }, + { + "epoch": 3.23, + "learning_rate": 9.31230699293026e-06, + "loss": 0.0196, + "step": 69210 + }, + { + "epoch": 3.23, + "learning_rate": 9.311523207875472e-06, + "loss": 0.0484, + "step": 69215 + }, + { + "epoch": 3.23, + "learning_rate": 9.310739422820686e-06, + "loss": 0.0974, + "step": 69220 + }, + { + "epoch": 3.23, + "learning_rate": 9.3099556377659e-06, + "loss": 0.1009, + "step": 69225 + }, + { + "epoch": 3.23, + "learning_rate": 9.309171852711114e-06, + "loss": 0.1064, + "step": 69230 + }, + { + "epoch": 3.23, + "learning_rate": 9.308388067656326e-06, + "loss": 0.1218, + "step": 69235 + }, + { + "epoch": 3.23, + "learning_rate": 9.30760428260154e-06, + "loss": 0.1984, + "step": 69240 + }, + { + "epoch": 3.23, + "learning_rate": 9.306820497546754e-06, + "loss": 0.3309, + "step": 69245 + }, + { + "epoch": 3.23, + "learning_rate": 9.306036712491967e-06, + "loss": 0.0605, + "step": 69250 + }, + { + "epoch": 3.23, + "learning_rate": 9.305252927437181e-06, + "loss": 0.0544, + "step": 69255 + }, + { + "epoch": 3.23, + "learning_rate": 9.304469142382394e-06, + "loss": 0.044, + "step": 69260 + }, + { + "epoch": 3.23, + "learning_rate": 9.303685357327607e-06, + "loss": 0.046, + "step": 69265 + }, + { + "epoch": 3.23, + "learning_rate": 9.302901572272821e-06, + "loss": 0.0805, + "step": 69270 + }, + { + "epoch": 3.23, + "learning_rate": 9.302117787218035e-06, + "loss": 0.1386, + "step": 69275 + }, + { + "epoch": 3.23, + "learning_rate": 9.301334002163247e-06, + "loss": 0.1237, + "step": 69280 + }, + { + "epoch": 3.23, + "learning_rate": 9.30055021710846e-06, + "loss": 0.1503, + "step": 69285 + }, + { + "epoch": 3.23, + "learning_rate": 9.299766432053674e-06, + "loss": 0.1735, + "step": 69290 + }, + { + "epoch": 3.23, + "learning_rate": 9.298982646998888e-06, + "loss": 0.2164, + "step": 69295 + }, + { + "epoch": 3.23, + "learning_rate": 9.298198861944101e-06, + "loss": 0.0882, + "step": 69300 + }, + { + "epoch": 3.23, + "learning_rate": 9.297415076889315e-06, + "loss": 0.0838, + "step": 69305 + }, + { + "epoch": 3.23, + "learning_rate": 9.296631291834528e-06, + "loss": 0.0821, + "step": 69310 + }, + { + "epoch": 3.23, + "learning_rate": 9.295847506779741e-06, + "loss": 0.0388, + "step": 69315 + }, + { + "epoch": 3.23, + "learning_rate": 9.295063721724955e-06, + "loss": 0.1242, + "step": 69320 + }, + { + "epoch": 3.23, + "learning_rate": 9.29427993667017e-06, + "loss": 0.1089, + "step": 69325 + }, + { + "epoch": 3.24, + "learning_rate": 9.293496151615381e-06, + "loss": 0.0842, + "step": 69330 + }, + { + "epoch": 3.24, + "learning_rate": 9.292712366560595e-06, + "loss": 0.1545, + "step": 69335 + }, + { + "epoch": 3.24, + "learning_rate": 9.29192858150581e-06, + "loss": 0.2175, + "step": 69340 + }, + { + "epoch": 3.24, + "learning_rate": 9.291144796451021e-06, + "loss": 0.3745, + "step": 69345 + }, + { + "epoch": 3.24, + "learning_rate": 9.290361011396235e-06, + "loss": 0.093, + "step": 69350 + }, + { + "epoch": 3.24, + "learning_rate": 9.28957722634145e-06, + "loss": 0.0221, + "step": 69355 + }, + { + "epoch": 3.24, + "learning_rate": 9.288793441286662e-06, + "loss": 0.0494, + "step": 69360 + }, + { + "epoch": 3.24, + "learning_rate": 9.288009656231875e-06, + "loss": 0.06, + "step": 69365 + }, + { + "epoch": 3.24, + "learning_rate": 9.28722587117709e-06, + "loss": 0.0468, + "step": 69370 + }, + { + "epoch": 3.24, + "learning_rate": 9.286442086122303e-06, + "loss": 0.0985, + "step": 69375 + }, + { + "epoch": 3.24, + "learning_rate": 9.285658301067515e-06, + "loss": 0.2375, + "step": 69380 + }, + { + "epoch": 3.24, + "learning_rate": 9.28487451601273e-06, + "loss": 0.0803, + "step": 69385 + }, + { + "epoch": 3.24, + "learning_rate": 9.284090730957943e-06, + "loss": 0.3352, + "step": 69390 + }, + { + "epoch": 3.24, + "learning_rate": 9.283306945903157e-06, + "loss": 0.2779, + "step": 69395 + }, + { + "epoch": 3.24, + "learning_rate": 9.28252316084837e-06, + "loss": 0.0764, + "step": 69400 + }, + { + "epoch": 3.24, + "learning_rate": 9.281739375793583e-06, + "loss": 0.0254, + "step": 69405 + }, + { + "epoch": 3.24, + "learning_rate": 9.280955590738795e-06, + "loss": 0.0204, + "step": 69410 + }, + { + "epoch": 3.24, + "learning_rate": 9.28017180568401e-06, + "loss": 0.0758, + "step": 69415 + }, + { + "epoch": 3.24, + "learning_rate": 9.279388020629223e-06, + "loss": 0.0516, + "step": 69420 + }, + { + "epoch": 3.24, + "learning_rate": 9.278604235574437e-06, + "loss": 0.1475, + "step": 69425 + }, + { + "epoch": 3.24, + "learning_rate": 9.27782045051965e-06, + "loss": 0.1257, + "step": 69430 + }, + { + "epoch": 3.24, + "learning_rate": 9.277036665464863e-06, + "loss": 0.1269, + "step": 69435 + }, + { + "epoch": 3.24, + "learning_rate": 9.276252880410077e-06, + "loss": 0.2353, + "step": 69440 + }, + { + "epoch": 3.24, + "learning_rate": 9.275469095355291e-06, + "loss": 0.3649, + "step": 69445 + }, + { + "epoch": 3.24, + "learning_rate": 9.274685310300503e-06, + "loss": 0.0354, + "step": 69450 + }, + { + "epoch": 3.24, + "learning_rate": 9.273901525245717e-06, + "loss": 0.0686, + "step": 69455 + }, + { + "epoch": 3.24, + "learning_rate": 9.273117740190931e-06, + "loss": 0.0279, + "step": 69460 + }, + { + "epoch": 3.24, + "learning_rate": 9.272333955136145e-06, + "loss": 0.0607, + "step": 69465 + }, + { + "epoch": 3.24, + "learning_rate": 9.271550170081359e-06, + "loss": 0.0724, + "step": 69470 + }, + { + "epoch": 3.24, + "learning_rate": 9.270766385026571e-06, + "loss": 0.1336, + "step": 69475 + }, + { + "epoch": 3.24, + "learning_rate": 9.269982599971783e-06, + "loss": 0.1661, + "step": 69480 + }, + { + "epoch": 3.24, + "learning_rate": 9.269198814916997e-06, + "loss": 0.1611, + "step": 69485 + }, + { + "epoch": 3.24, + "learning_rate": 9.268415029862211e-06, + "loss": 0.3469, + "step": 69490 + }, + { + "epoch": 3.24, + "learning_rate": 9.267631244807425e-06, + "loss": 0.3338, + "step": 69495 + }, + { + "epoch": 3.24, + "learning_rate": 9.266847459752637e-06, + "loss": 0.0401, + "step": 69500 + }, + { + "epoch": 3.24, + "learning_rate": 9.266063674697851e-06, + "loss": 0.072, + "step": 69505 + }, + { + "epoch": 3.24, + "learning_rate": 9.265279889643065e-06, + "loss": 0.0282, + "step": 69510 + }, + { + "epoch": 3.24, + "learning_rate": 9.264496104588279e-06, + "loss": 0.0697, + "step": 69515 + }, + { + "epoch": 3.24, + "learning_rate": 9.263712319533493e-06, + "loss": 0.1149, + "step": 69520 + }, + { + "epoch": 3.24, + "learning_rate": 9.262928534478705e-06, + "loss": 0.0651, + "step": 69525 + }, + { + "epoch": 3.24, + "learning_rate": 9.262144749423919e-06, + "loss": 0.0796, + "step": 69530 + }, + { + "epoch": 3.24, + "learning_rate": 9.261360964369133e-06, + "loss": 0.1888, + "step": 69535 + }, + { + "epoch": 3.24, + "learning_rate": 9.260577179314345e-06, + "loss": 0.1374, + "step": 69540 + }, + { + "epoch": 3.25, + "learning_rate": 9.259793394259559e-06, + "loss": 0.2406, + "step": 69545 + }, + { + "epoch": 3.25, + "learning_rate": 9.259009609204771e-06, + "loss": 0.088, + "step": 69550 + }, + { + "epoch": 3.25, + "learning_rate": 9.258225824149985e-06, + "loss": 0.1102, + "step": 69555 + }, + { + "epoch": 3.25, + "learning_rate": 9.257442039095199e-06, + "loss": 0.014, + "step": 69560 + }, + { + "epoch": 3.25, + "learning_rate": 9.256658254040413e-06, + "loss": 0.0231, + "step": 69565 + }, + { + "epoch": 3.25, + "learning_rate": 9.255874468985627e-06, + "loss": 0.0708, + "step": 69570 + }, + { + "epoch": 3.25, + "learning_rate": 9.255090683930839e-06, + "loss": 0.0572, + "step": 69575 + }, + { + "epoch": 3.25, + "learning_rate": 9.254306898876053e-06, + "loss": 0.0775, + "step": 69580 + }, + { + "epoch": 3.25, + "learning_rate": 9.253523113821267e-06, + "loss": 0.1815, + "step": 69585 + }, + { + "epoch": 3.25, + "learning_rate": 9.25273932876648e-06, + "loss": 0.2664, + "step": 69590 + }, + { + "epoch": 3.25, + "learning_rate": 9.251955543711693e-06, + "loss": 0.308, + "step": 69595 + }, + { + "epoch": 3.25, + "learning_rate": 9.251171758656907e-06, + "loss": 0.1139, + "step": 69600 + }, + { + "epoch": 3.25, + "learning_rate": 9.25038797360212e-06, + "loss": 0.0147, + "step": 69605 + }, + { + "epoch": 3.25, + "learning_rate": 9.249604188547333e-06, + "loss": 0.0446, + "step": 69610 + }, + { + "epoch": 3.25, + "learning_rate": 9.248820403492547e-06, + "loss": 0.0582, + "step": 69615 + }, + { + "epoch": 3.25, + "learning_rate": 9.248036618437761e-06, + "loss": 0.0645, + "step": 69620 + }, + { + "epoch": 3.25, + "learning_rate": 9.247252833382973e-06, + "loss": 0.103, + "step": 69625 + }, + { + "epoch": 3.25, + "learning_rate": 9.246469048328187e-06, + "loss": 0.0929, + "step": 69630 + }, + { + "epoch": 3.25, + "learning_rate": 9.245685263273401e-06, + "loss": 0.104, + "step": 69635 + }, + { + "epoch": 3.25, + "learning_rate": 9.244901478218615e-06, + "loss": 0.2659, + "step": 69640 + }, + { + "epoch": 3.25, + "learning_rate": 9.244117693163827e-06, + "loss": 0.2342, + "step": 69645 + }, + { + "epoch": 3.25, + "learning_rate": 9.243333908109041e-06, + "loss": 0.6174, + "step": 69650 + }, + { + "epoch": 3.25, + "learning_rate": 9.242550123054255e-06, + "loss": 0.0114, + "step": 69655 + }, + { + "epoch": 3.25, + "learning_rate": 9.241766337999469e-06, + "loss": 0.0766, + "step": 69660 + }, + { + "epoch": 3.25, + "learning_rate": 9.240982552944681e-06, + "loss": 0.0467, + "step": 69665 + }, + { + "epoch": 3.25, + "learning_rate": 9.240198767889895e-06, + "loss": 0.0568, + "step": 69670 + }, + { + "epoch": 3.25, + "learning_rate": 9.239414982835107e-06, + "loss": 0.1458, + "step": 69675 + }, + { + "epoch": 3.25, + "learning_rate": 9.238631197780321e-06, + "loss": 0.2081, + "step": 69680 + }, + { + "epoch": 3.25, + "learning_rate": 9.237847412725535e-06, + "loss": 0.2514, + "step": 69685 + }, + { + "epoch": 3.25, + "learning_rate": 9.237063627670749e-06, + "loss": 0.1977, + "step": 69690 + }, + { + "epoch": 3.25, + "learning_rate": 9.236279842615961e-06, + "loss": 0.2282, + "step": 69695 + }, + { + "epoch": 3.25, + "learning_rate": 9.235496057561175e-06, + "loss": 0.0591, + "step": 69700 + }, + { + "epoch": 3.25, + "learning_rate": 9.234712272506389e-06, + "loss": 0.0384, + "step": 69705 + }, + { + "epoch": 3.25, + "learning_rate": 9.233928487451603e-06, + "loss": 0.0815, + "step": 69710 + }, + { + "epoch": 3.25, + "learning_rate": 9.233144702396815e-06, + "loss": 0.0774, + "step": 69715 + }, + { + "epoch": 3.25, + "learning_rate": 9.232360917342029e-06, + "loss": 0.0509, + "step": 69720 + }, + { + "epoch": 3.25, + "learning_rate": 9.231577132287243e-06, + "loss": 0.0877, + "step": 69725 + }, + { + "epoch": 3.25, + "learning_rate": 9.230793347232457e-06, + "loss": 0.1008, + "step": 69730 + }, + { + "epoch": 3.25, + "learning_rate": 9.230009562177669e-06, + "loss": 0.126, + "step": 69735 + }, + { + "epoch": 3.25, + "learning_rate": 9.229225777122883e-06, + "loss": 0.1484, + "step": 69740 + }, + { + "epoch": 3.25, + "learning_rate": 9.228441992068095e-06, + "loss": 0.2838, + "step": 69745 + }, + { + "epoch": 3.25, + "learning_rate": 9.227658207013309e-06, + "loss": 0.1356, + "step": 69750 + }, + { + "epoch": 3.25, + "learning_rate": 9.226874421958523e-06, + "loss": 0.0381, + "step": 69755 + }, + { + "epoch": 3.26, + "learning_rate": 9.226090636903737e-06, + "loss": 0.0354, + "step": 69760 + }, + { + "epoch": 3.26, + "learning_rate": 9.225306851848949e-06, + "loss": 0.0649, + "step": 69765 + }, + { + "epoch": 3.26, + "learning_rate": 9.224523066794163e-06, + "loss": 0.0909, + "step": 69770 + }, + { + "epoch": 3.26, + "learning_rate": 9.223739281739377e-06, + "loss": 0.0969, + "step": 69775 + }, + { + "epoch": 3.26, + "learning_rate": 9.22295549668459e-06, + "loss": 0.1227, + "step": 69780 + }, + { + "epoch": 3.26, + "learning_rate": 9.222171711629805e-06, + "loss": 0.1145, + "step": 69785 + }, + { + "epoch": 3.26, + "learning_rate": 9.221387926575017e-06, + "loss": 0.2365, + "step": 69790 + }, + { + "epoch": 3.26, + "learning_rate": 9.22060414152023e-06, + "loss": 0.2366, + "step": 69795 + }, + { + "epoch": 3.26, + "learning_rate": 9.219820356465443e-06, + "loss": 0.1243, + "step": 69800 + }, + { + "epoch": 3.26, + "learning_rate": 9.219036571410657e-06, + "loss": 0.0516, + "step": 69805 + }, + { + "epoch": 3.26, + "learning_rate": 9.21825278635587e-06, + "loss": 0.0249, + "step": 69810 + }, + { + "epoch": 3.26, + "learning_rate": 9.217469001301083e-06, + "loss": 0.0937, + "step": 69815 + }, + { + "epoch": 3.26, + "learning_rate": 9.216685216246297e-06, + "loss": 0.1407, + "step": 69820 + }, + { + "epoch": 3.26, + "learning_rate": 9.21590143119151e-06, + "loss": 0.0835, + "step": 69825 + }, + { + "epoch": 3.26, + "learning_rate": 9.215117646136725e-06, + "loss": 0.104, + "step": 69830 + }, + { + "epoch": 3.26, + "learning_rate": 9.214333861081939e-06, + "loss": 0.2058, + "step": 69835 + }, + { + "epoch": 3.26, + "learning_rate": 9.21355007602715e-06, + "loss": 0.2023, + "step": 69840 + }, + { + "epoch": 3.26, + "learning_rate": 9.212766290972365e-06, + "loss": 0.3072, + "step": 69845 + }, + { + "epoch": 3.26, + "learning_rate": 9.211982505917579e-06, + "loss": 0.0897, + "step": 69850 + }, + { + "epoch": 3.26, + "learning_rate": 9.211198720862792e-06, + "loss": 0.036, + "step": 69855 + }, + { + "epoch": 3.26, + "learning_rate": 9.210414935808005e-06, + "loss": 0.0343, + "step": 69860 + }, + { + "epoch": 3.26, + "learning_rate": 9.209631150753217e-06, + "loss": 0.0693, + "step": 69865 + }, + { + "epoch": 3.26, + "learning_rate": 9.20884736569843e-06, + "loss": 0.0274, + "step": 69870 + }, + { + "epoch": 3.26, + "learning_rate": 9.208063580643645e-06, + "loss": 0.0446, + "step": 69875 + }, + { + "epoch": 3.26, + "learning_rate": 9.207279795588859e-06, + "loss": 0.1388, + "step": 69880 + }, + { + "epoch": 3.26, + "learning_rate": 9.206496010534072e-06, + "loss": 0.1066, + "step": 69885 + }, + { + "epoch": 3.26, + "learning_rate": 9.205712225479285e-06, + "loss": 0.1121, + "step": 69890 + }, + { + "epoch": 3.26, + "learning_rate": 9.204928440424499e-06, + "loss": 0.283, + "step": 69895 + }, + { + "epoch": 3.26, + "learning_rate": 9.204144655369713e-06, + "loss": 0.0839, + "step": 69900 + }, + { + "epoch": 3.26, + "learning_rate": 9.203360870314926e-06, + "loss": 0.053, + "step": 69905 + }, + { + "epoch": 3.26, + "learning_rate": 9.202577085260139e-06, + "loss": 0.0771, + "step": 69910 + }, + { + "epoch": 3.26, + "learning_rate": 9.201793300205353e-06, + "loss": 0.1367, + "step": 69915 + }, + { + "epoch": 3.26, + "learning_rate": 9.201009515150566e-06, + "loss": 0.0701, + "step": 69920 + }, + { + "epoch": 3.26, + "learning_rate": 9.20022573009578e-06, + "loss": 0.0521, + "step": 69925 + }, + { + "epoch": 3.26, + "learning_rate": 9.199441945040993e-06, + "loss": 0.1232, + "step": 69930 + }, + { + "epoch": 3.26, + "learning_rate": 9.198658159986206e-06, + "loss": 0.0843, + "step": 69935 + }, + { + "epoch": 3.26, + "learning_rate": 9.197874374931419e-06, + "loss": 0.2965, + "step": 69940 + }, + { + "epoch": 3.26, + "learning_rate": 9.197090589876633e-06, + "loss": 0.277, + "step": 69945 + }, + { + "epoch": 3.26, + "learning_rate": 9.196306804821846e-06, + "loss": 0.0945, + "step": 69950 + }, + { + "epoch": 3.26, + "learning_rate": 9.19552301976706e-06, + "loss": 0.0235, + "step": 69955 + }, + { + "epoch": 3.26, + "learning_rate": 9.194739234712273e-06, + "loss": 0.0263, + "step": 69960 + }, + { + "epoch": 3.26, + "learning_rate": 9.193955449657487e-06, + "loss": 0.0488, + "step": 69965 + }, + { + "epoch": 3.26, + "learning_rate": 9.1931716646027e-06, + "loss": 0.0679, + "step": 69970 + }, + { + "epoch": 3.27, + "learning_rate": 9.192387879547914e-06, + "loss": 0.1051, + "step": 69975 + }, + { + "epoch": 3.27, + "learning_rate": 9.191604094493127e-06, + "loss": 0.1029, + "step": 69980 + }, + { + "epoch": 3.27, + "learning_rate": 9.19082030943834e-06, + "loss": 0.1088, + "step": 69985 + }, + { + "epoch": 3.27, + "learning_rate": 9.190036524383554e-06, + "loss": 0.1562, + "step": 69990 + }, + { + "epoch": 3.27, + "learning_rate": 9.189252739328767e-06, + "loss": 0.2521, + "step": 69995 + }, + { + "epoch": 3.27, + "learning_rate": 9.18846895427398e-06, + "loss": 0.0542, + "step": 70000 + }, + { + "epoch": 3.27, + "learning_rate": 9.187685169219194e-06, + "loss": 0.0405, + "step": 70005 + }, + { + "epoch": 3.27, + "learning_rate": 9.186901384164407e-06, + "loss": 0.0403, + "step": 70010 + }, + { + "epoch": 3.27, + "learning_rate": 9.18611759910962e-06, + "loss": 0.0638, + "step": 70015 + }, + { + "epoch": 3.27, + "learning_rate": 9.185333814054834e-06, + "loss": 0.1057, + "step": 70020 + }, + { + "epoch": 3.27, + "learning_rate": 9.184550029000048e-06, + "loss": 0.0915, + "step": 70025 + }, + { + "epoch": 3.27, + "learning_rate": 9.18376624394526e-06, + "loss": 0.0756, + "step": 70030 + }, + { + "epoch": 3.27, + "learning_rate": 9.182982458890474e-06, + "loss": 0.1864, + "step": 70035 + }, + { + "epoch": 3.27, + "learning_rate": 9.182198673835688e-06, + "loss": 0.1794, + "step": 70040 + }, + { + "epoch": 3.27, + "learning_rate": 9.181414888780902e-06, + "loss": 0.3143, + "step": 70045 + }, + { + "epoch": 3.27, + "learning_rate": 9.180631103726116e-06, + "loss": 0.0461, + "step": 70050 + }, + { + "epoch": 3.27, + "learning_rate": 9.179847318671328e-06, + "loss": 0.0468, + "step": 70055 + }, + { + "epoch": 3.27, + "learning_rate": 9.17906353361654e-06, + "loss": 0.058, + "step": 70060 + }, + { + "epoch": 3.27, + "learning_rate": 9.178279748561754e-06, + "loss": 0.0569, + "step": 70065 + }, + { + "epoch": 3.27, + "learning_rate": 9.177495963506968e-06, + "loss": 0.0887, + "step": 70070 + }, + { + "epoch": 3.27, + "learning_rate": 9.176712178452182e-06, + "loss": 0.0843, + "step": 70075 + }, + { + "epoch": 3.27, + "learning_rate": 9.175928393397394e-06, + "loss": 0.1165, + "step": 70080 + }, + { + "epoch": 3.27, + "learning_rate": 9.175144608342608e-06, + "loss": 0.1447, + "step": 70085 + }, + { + "epoch": 3.27, + "learning_rate": 9.174360823287822e-06, + "loss": 0.1615, + "step": 70090 + }, + { + "epoch": 3.27, + "learning_rate": 9.173577038233036e-06, + "loss": 0.2179, + "step": 70095 + }, + { + "epoch": 3.27, + "learning_rate": 9.17279325317825e-06, + "loss": 0.0924, + "step": 70100 + }, + { + "epoch": 3.27, + "learning_rate": 9.172009468123462e-06, + "loss": 0.0574, + "step": 70105 + }, + { + "epoch": 3.27, + "learning_rate": 9.171225683068676e-06, + "loss": 0.0505, + "step": 70110 + }, + { + "epoch": 3.27, + "learning_rate": 9.17044189801389e-06, + "loss": 0.0461, + "step": 70115 + }, + { + "epoch": 3.27, + "learning_rate": 9.169658112959104e-06, + "loss": 0.1141, + "step": 70120 + }, + { + "epoch": 3.27, + "learning_rate": 9.168874327904316e-06, + "loss": 0.1524, + "step": 70125 + }, + { + "epoch": 3.27, + "learning_rate": 9.168090542849528e-06, + "loss": 0.0598, + "step": 70130 + }, + { + "epoch": 3.27, + "learning_rate": 9.167306757794742e-06, + "loss": 0.1248, + "step": 70135 + }, + { + "epoch": 3.27, + "learning_rate": 9.166522972739956e-06, + "loss": 0.1644, + "step": 70140 + }, + { + "epoch": 3.27, + "learning_rate": 9.16573918768517e-06, + "loss": 0.1953, + "step": 70145 + }, + { + "epoch": 3.27, + "learning_rate": 9.164955402630384e-06, + "loss": 0.0394, + "step": 70150 + }, + { + "epoch": 3.27, + "learning_rate": 9.164171617575596e-06, + "loss": 0.0543, + "step": 70155 + }, + { + "epoch": 3.27, + "learning_rate": 9.16338783252081e-06, + "loss": 0.0456, + "step": 70160 + }, + { + "epoch": 3.27, + "learning_rate": 9.162604047466024e-06, + "loss": 0.0717, + "step": 70165 + }, + { + "epoch": 3.27, + "learning_rate": 9.161820262411238e-06, + "loss": 0.1553, + "step": 70170 + }, + { + "epoch": 3.27, + "learning_rate": 9.16103647735645e-06, + "loss": 0.0369, + "step": 70175 + }, + { + "epoch": 3.27, + "learning_rate": 9.160252692301664e-06, + "loss": 0.11, + "step": 70180 + }, + { + "epoch": 3.27, + "learning_rate": 9.159468907246878e-06, + "loss": 0.1583, + "step": 70185 + }, + { + "epoch": 3.28, + "learning_rate": 9.15868512219209e-06, + "loss": 0.2984, + "step": 70190 + }, + { + "epoch": 3.28, + "learning_rate": 9.157901337137304e-06, + "loss": 0.2892, + "step": 70195 + }, + { + "epoch": 3.28, + "learning_rate": 9.157117552082518e-06, + "loss": 0.0816, + "step": 70200 + }, + { + "epoch": 3.28, + "learning_rate": 9.15633376702773e-06, + "loss": 0.0477, + "step": 70205 + }, + { + "epoch": 3.28, + "learning_rate": 9.155549981972944e-06, + "loss": 0.0191, + "step": 70210 + }, + { + "epoch": 3.28, + "learning_rate": 9.154766196918158e-06, + "loss": 0.058, + "step": 70215 + }, + { + "epoch": 3.28, + "learning_rate": 9.153982411863372e-06, + "loss": 0.0377, + "step": 70220 + }, + { + "epoch": 3.28, + "learning_rate": 9.153198626808584e-06, + "loss": 0.1391, + "step": 70225 + }, + { + "epoch": 3.28, + "learning_rate": 9.152414841753798e-06, + "loss": 0.1003, + "step": 70230 + }, + { + "epoch": 3.28, + "learning_rate": 9.151631056699012e-06, + "loss": 0.1134, + "step": 70235 + }, + { + "epoch": 3.28, + "learning_rate": 9.150847271644226e-06, + "loss": 0.2391, + "step": 70240 + }, + { + "epoch": 3.28, + "learning_rate": 9.150063486589438e-06, + "loss": 0.1599, + "step": 70245 + }, + { + "epoch": 3.28, + "learning_rate": 9.149279701534652e-06, + "loss": 0.0952, + "step": 70250 + }, + { + "epoch": 3.28, + "learning_rate": 9.148495916479864e-06, + "loss": 0.0636, + "step": 70255 + }, + { + "epoch": 3.28, + "learning_rate": 9.147712131425078e-06, + "loss": 0.0402, + "step": 70260 + }, + { + "epoch": 3.28, + "learning_rate": 9.146928346370292e-06, + "loss": 0.0885, + "step": 70265 + }, + { + "epoch": 3.28, + "learning_rate": 9.146144561315506e-06, + "loss": 0.1678, + "step": 70270 + }, + { + "epoch": 3.28, + "learning_rate": 9.145360776260718e-06, + "loss": 0.0763, + "step": 70275 + }, + { + "epoch": 3.28, + "learning_rate": 9.144576991205932e-06, + "loss": 0.0798, + "step": 70280 + }, + { + "epoch": 3.28, + "learning_rate": 9.143793206151146e-06, + "loss": 0.1155, + "step": 70285 + }, + { + "epoch": 3.28, + "learning_rate": 9.14300942109636e-06, + "loss": 0.2216, + "step": 70290 + }, + { + "epoch": 3.28, + "learning_rate": 9.142225636041572e-06, + "loss": 0.288, + "step": 70295 + }, + { + "epoch": 3.28, + "learning_rate": 9.141441850986786e-06, + "loss": 0.0696, + "step": 70300 + }, + { + "epoch": 3.28, + "learning_rate": 9.140658065932e-06, + "loss": 0.0834, + "step": 70305 + }, + { + "epoch": 3.28, + "learning_rate": 9.139874280877214e-06, + "loss": 0.0411, + "step": 70310 + }, + { + "epoch": 3.28, + "learning_rate": 9.139090495822428e-06, + "loss": 0.0357, + "step": 70315 + }, + { + "epoch": 3.28, + "learning_rate": 9.13830671076764e-06, + "loss": 0.1072, + "step": 70320 + }, + { + "epoch": 3.28, + "learning_rate": 9.137522925712852e-06, + "loss": 0.0559, + "step": 70325 + }, + { + "epoch": 3.28, + "learning_rate": 9.136739140658066e-06, + "loss": 0.1139, + "step": 70330 + }, + { + "epoch": 3.28, + "learning_rate": 9.13595535560328e-06, + "loss": 0.1853, + "step": 70335 + }, + { + "epoch": 3.28, + "learning_rate": 9.135171570548494e-06, + "loss": 0.1746, + "step": 70340 + }, + { + "epoch": 3.28, + "learning_rate": 9.134387785493706e-06, + "loss": 0.2631, + "step": 70345 + }, + { + "epoch": 3.28, + "learning_rate": 9.13360400043892e-06, + "loss": 0.0738, + "step": 70350 + }, + { + "epoch": 3.28, + "learning_rate": 9.132820215384134e-06, + "loss": 0.018, + "step": 70355 + }, + { + "epoch": 3.28, + "learning_rate": 9.132036430329348e-06, + "loss": 0.0448, + "step": 70360 + }, + { + "epoch": 3.28, + "learning_rate": 9.131252645274562e-06, + "loss": 0.063, + "step": 70365 + }, + { + "epoch": 3.28, + "learning_rate": 9.130468860219774e-06, + "loss": 0.0829, + "step": 70370 + }, + { + "epoch": 3.28, + "learning_rate": 9.129685075164988e-06, + "loss": 0.0777, + "step": 70375 + }, + { + "epoch": 3.28, + "learning_rate": 9.128901290110202e-06, + "loss": 0.0711, + "step": 70380 + }, + { + "epoch": 3.28, + "learning_rate": 9.128117505055414e-06, + "loss": 0.1215, + "step": 70385 + }, + { + "epoch": 3.28, + "learning_rate": 9.127333720000628e-06, + "loss": 0.171, + "step": 70390 + }, + { + "epoch": 3.28, + "learning_rate": 9.12654993494584e-06, + "loss": 0.3134, + "step": 70395 + }, + { + "epoch": 3.28, + "learning_rate": 9.125766149891054e-06, + "loss": 0.0421, + "step": 70400 + }, + { + "epoch": 3.29, + "learning_rate": 9.124982364836268e-06, + "loss": 0.0682, + "step": 70405 + }, + { + "epoch": 3.29, + "learning_rate": 9.124198579781482e-06, + "loss": 0.0628, + "step": 70410 + }, + { + "epoch": 3.29, + "learning_rate": 9.123571551737652e-06, + "loss": 0.0632, + "step": 70415 + }, + { + "epoch": 3.29, + "learning_rate": 9.122787766682866e-06, + "loss": 0.0502, + "step": 70420 + }, + { + "epoch": 3.29, + "learning_rate": 9.12200398162808e-06, + "loss": 0.0768, + "step": 70425 + }, + { + "epoch": 3.29, + "learning_rate": 9.121220196573292e-06, + "loss": 0.114, + "step": 70430 + }, + { + "epoch": 3.29, + "learning_rate": 9.120436411518506e-06, + "loss": 0.2299, + "step": 70435 + }, + { + "epoch": 3.29, + "learning_rate": 9.11965262646372e-06, + "loss": 0.1957, + "step": 70440 + }, + { + "epoch": 3.29, + "learning_rate": 9.118868841408934e-06, + "loss": 0.3038, + "step": 70445 + }, + { + "epoch": 3.29, + "learning_rate": 9.118085056354146e-06, + "loss": 0.0454, + "step": 70450 + }, + { + "epoch": 3.29, + "learning_rate": 9.117301271299358e-06, + "loss": 0.0615, + "step": 70455 + }, + { + "epoch": 3.29, + "learning_rate": 9.116517486244572e-06, + "loss": 0.0582, + "step": 70460 + }, + { + "epoch": 3.29, + "learning_rate": 9.115733701189786e-06, + "loss": 0.113, + "step": 70465 + }, + { + "epoch": 3.29, + "learning_rate": 9.114949916135e-06, + "loss": 0.0629, + "step": 70470 + }, + { + "epoch": 3.29, + "learning_rate": 9.114166131080214e-06, + "loss": 0.0821, + "step": 70475 + }, + { + "epoch": 3.29, + "learning_rate": 9.113382346025426e-06, + "loss": 0.1246, + "step": 70480 + }, + { + "epoch": 3.29, + "learning_rate": 9.11259856097064e-06, + "loss": 0.199, + "step": 70485 + }, + { + "epoch": 3.29, + "learning_rate": 9.111814775915854e-06, + "loss": 0.2589, + "step": 70490 + }, + { + "epoch": 3.29, + "learning_rate": 9.111030990861068e-06, + "loss": 0.2966, + "step": 70495 + }, + { + "epoch": 3.29, + "learning_rate": 9.11024720580628e-06, + "loss": 0.0677, + "step": 70500 + }, + { + "epoch": 3.29, + "learning_rate": 9.109463420751494e-06, + "loss": 0.0092, + "step": 70505 + }, + { + "epoch": 3.29, + "learning_rate": 9.108679635696708e-06, + "loss": 0.0471, + "step": 70510 + }, + { + "epoch": 3.29, + "learning_rate": 9.10789585064192e-06, + "loss": 0.0891, + "step": 70515 + }, + { + "epoch": 3.29, + "learning_rate": 9.107112065587134e-06, + "loss": 0.1139, + "step": 70520 + }, + { + "epoch": 3.29, + "learning_rate": 9.106328280532348e-06, + "loss": 0.0479, + "step": 70525 + }, + { + "epoch": 3.29, + "learning_rate": 9.10554449547756e-06, + "loss": 0.0951, + "step": 70530 + }, + { + "epoch": 3.29, + "learning_rate": 9.104760710422774e-06, + "loss": 0.1251, + "step": 70535 + }, + { + "epoch": 3.29, + "learning_rate": 9.103976925367988e-06, + "loss": 0.1632, + "step": 70540 + }, + { + "epoch": 3.29, + "learning_rate": 9.103193140313202e-06, + "loss": 0.3112, + "step": 70545 + }, + { + "epoch": 3.29, + "learning_rate": 9.102409355258414e-06, + "loss": 0.0694, + "step": 70550 + }, + { + "epoch": 3.29, + "learning_rate": 9.101625570203628e-06, + "loss": 0.0419, + "step": 70555 + }, + { + "epoch": 3.29, + "learning_rate": 9.100841785148842e-06, + "loss": 0.0534, + "step": 70560 + }, + { + "epoch": 3.29, + "learning_rate": 9.100058000094056e-06, + "loss": 0.0536, + "step": 70565 + }, + { + "epoch": 3.29, + "learning_rate": 9.099274215039268e-06, + "loss": 0.0873, + "step": 70570 + }, + { + "epoch": 3.29, + "learning_rate": 9.098490429984482e-06, + "loss": 0.1011, + "step": 70575 + }, + { + "epoch": 3.29, + "learning_rate": 9.097706644929694e-06, + "loss": 0.1586, + "step": 70580 + }, + { + "epoch": 3.29, + "learning_rate": 9.096922859874908e-06, + "loss": 0.1024, + "step": 70585 + }, + { + "epoch": 3.29, + "learning_rate": 9.096139074820122e-06, + "loss": 0.1611, + "step": 70590 + }, + { + "epoch": 3.29, + "learning_rate": 9.095355289765336e-06, + "loss": 0.3784, + "step": 70595 + }, + { + "epoch": 3.29, + "learning_rate": 9.094571504710548e-06, + "loss": 0.1113, + "step": 70600 + }, + { + "epoch": 3.29, + "learning_rate": 9.093787719655762e-06, + "loss": 0.0555, + "step": 70605 + }, + { + "epoch": 3.29, + "learning_rate": 9.093003934600976e-06, + "loss": 0.0418, + "step": 70610 + }, + { + "epoch": 3.29, + "learning_rate": 9.09222014954619e-06, + "loss": 0.0386, + "step": 70615 + }, + { + "epoch": 3.3, + "learning_rate": 9.091436364491402e-06, + "loss": 0.076, + "step": 70620 + }, + { + "epoch": 3.3, + "learning_rate": 9.090652579436616e-06, + "loss": 0.0295, + "step": 70625 + }, + { + "epoch": 3.3, + "learning_rate": 9.08986879438183e-06, + "loss": 0.1095, + "step": 70630 + }, + { + "epoch": 3.3, + "learning_rate": 9.089085009327044e-06, + "loss": 0.1026, + "step": 70635 + }, + { + "epoch": 3.3, + "learning_rate": 9.088301224272258e-06, + "loss": 0.2221, + "step": 70640 + }, + { + "epoch": 3.3, + "learning_rate": 9.08751743921747e-06, + "loss": 0.2452, + "step": 70645 + }, + { + "epoch": 3.3, + "learning_rate": 9.086733654162682e-06, + "loss": 0.0779, + "step": 70650 + }, + { + "epoch": 3.3, + "learning_rate": 9.085949869107896e-06, + "loss": 0.0415, + "step": 70655 + }, + { + "epoch": 3.3, + "learning_rate": 9.08516608405311e-06, + "loss": 0.065, + "step": 70660 + }, + { + "epoch": 3.3, + "learning_rate": 9.084382298998324e-06, + "loss": 0.1131, + "step": 70665 + }, + { + "epoch": 3.3, + "learning_rate": 9.083598513943536e-06, + "loss": 0.0668, + "step": 70670 + }, + { + "epoch": 3.3, + "learning_rate": 9.08281472888875e-06, + "loss": 0.0669, + "step": 70675 + }, + { + "epoch": 3.3, + "learning_rate": 9.082030943833964e-06, + "loss": 0.1014, + "step": 70680 + }, + { + "epoch": 3.3, + "learning_rate": 9.081247158779178e-06, + "loss": 0.0679, + "step": 70685 + }, + { + "epoch": 3.3, + "learning_rate": 9.080463373724392e-06, + "loss": 0.1369, + "step": 70690 + }, + { + "epoch": 3.3, + "learning_rate": 9.079679588669604e-06, + "loss": 0.3386, + "step": 70695 + }, + { + "epoch": 3.3, + "learning_rate": 9.078895803614818e-06, + "loss": 0.097, + "step": 70700 + }, + { + "epoch": 3.3, + "learning_rate": 9.078112018560032e-06, + "loss": 0.0637, + "step": 70705 + }, + { + "epoch": 3.3, + "learning_rate": 9.077328233505244e-06, + "loss": 0.0306, + "step": 70710 + }, + { + "epoch": 3.3, + "learning_rate": 9.076544448450458e-06, + "loss": 0.0302, + "step": 70715 + }, + { + "epoch": 3.3, + "learning_rate": 9.07576066339567e-06, + "loss": 0.0902, + "step": 70720 + }, + { + "epoch": 3.3, + "learning_rate": 9.074976878340884e-06, + "loss": 0.1711, + "step": 70725 + }, + { + "epoch": 3.3, + "learning_rate": 9.074193093286098e-06, + "loss": 0.0799, + "step": 70730 + }, + { + "epoch": 3.3, + "learning_rate": 9.073409308231312e-06, + "loss": 0.0986, + "step": 70735 + }, + { + "epoch": 3.3, + "learning_rate": 9.072625523176526e-06, + "loss": 0.2437, + "step": 70740 + }, + { + "epoch": 3.3, + "learning_rate": 9.071841738121738e-06, + "loss": 0.3259, + "step": 70745 + }, + { + "epoch": 3.3, + "learning_rate": 9.071057953066952e-06, + "loss": 0.0754, + "step": 70750 + }, + { + "epoch": 3.3, + "learning_rate": 9.070274168012166e-06, + "loss": 0.0394, + "step": 70755 + }, + { + "epoch": 3.3, + "learning_rate": 9.06949038295738e-06, + "loss": 0.0298, + "step": 70760 + }, + { + "epoch": 3.3, + "learning_rate": 9.068706597902592e-06, + "loss": 0.0491, + "step": 70765 + }, + { + "epoch": 3.3, + "learning_rate": 9.067922812847806e-06, + "loss": 0.0504, + "step": 70770 + }, + { + "epoch": 3.3, + "learning_rate": 9.067139027793018e-06, + "loss": 0.0863, + "step": 70775 + }, + { + "epoch": 3.3, + "learning_rate": 9.066355242738232e-06, + "loss": 0.133, + "step": 70780 + }, + { + "epoch": 3.3, + "learning_rate": 9.065571457683446e-06, + "loss": 0.1856, + "step": 70785 + }, + { + "epoch": 3.3, + "learning_rate": 9.06478767262866e-06, + "loss": 0.33, + "step": 70790 + }, + { + "epoch": 3.3, + "learning_rate": 9.064003887573872e-06, + "loss": 0.293, + "step": 70795 + }, + { + "epoch": 3.3, + "learning_rate": 9.063220102519086e-06, + "loss": 0.0919, + "step": 70800 + }, + { + "epoch": 3.3, + "learning_rate": 9.0624363174643e-06, + "loss": 0.0686, + "step": 70805 + }, + { + "epoch": 3.3, + "learning_rate": 9.061652532409513e-06, + "loss": 0.0482, + "step": 70810 + }, + { + "epoch": 3.3, + "learning_rate": 9.060868747354726e-06, + "loss": 0.0436, + "step": 70815 + }, + { + "epoch": 3.3, + "learning_rate": 9.06008496229994e-06, + "loss": 0.1321, + "step": 70820 + }, + { + "epoch": 3.3, + "learning_rate": 9.059301177245154e-06, + "loss": 0.0897, + "step": 70825 + }, + { + "epoch": 3.31, + "learning_rate": 9.058517392190367e-06, + "loss": 0.1366, + "step": 70830 + }, + { + "epoch": 3.31, + "learning_rate": 9.05773360713558e-06, + "loss": 0.0788, + "step": 70835 + }, + { + "epoch": 3.31, + "learning_rate": 9.056949822080794e-06, + "loss": 0.1021, + "step": 70840 + }, + { + "epoch": 3.31, + "learning_rate": 9.056166037026006e-06, + "loss": 0.4001, + "step": 70845 + }, + { + "epoch": 3.31, + "learning_rate": 9.05538225197122e-06, + "loss": 0.0607, + "step": 70850 + }, + { + "epoch": 3.31, + "learning_rate": 9.054598466916434e-06, + "loss": 0.0377, + "step": 70855 + }, + { + "epoch": 3.31, + "learning_rate": 9.053814681861647e-06, + "loss": 0.0581, + "step": 70860 + }, + { + "epoch": 3.31, + "learning_rate": 9.05303089680686e-06, + "loss": 0.0553, + "step": 70865 + }, + { + "epoch": 3.31, + "learning_rate": 9.052247111752074e-06, + "loss": 0.0653, + "step": 70870 + }, + { + "epoch": 3.31, + "learning_rate": 9.051463326697287e-06, + "loss": 0.17, + "step": 70875 + }, + { + "epoch": 3.31, + "learning_rate": 9.050679541642501e-06, + "loss": 0.1759, + "step": 70880 + }, + { + "epoch": 3.31, + "learning_rate": 9.049895756587714e-06, + "loss": 0.0962, + "step": 70885 + }, + { + "epoch": 3.31, + "learning_rate": 9.049111971532928e-06, + "loss": 0.1679, + "step": 70890 + }, + { + "epoch": 3.31, + "learning_rate": 9.048328186478141e-06, + "loss": 0.3484, + "step": 70895 + }, + { + "epoch": 3.31, + "learning_rate": 9.047544401423355e-06, + "loss": 0.0895, + "step": 70900 + }, + { + "epoch": 3.31, + "learning_rate": 9.046760616368568e-06, + "loss": 0.0192, + "step": 70905 + }, + { + "epoch": 3.31, + "learning_rate": 9.045976831313781e-06, + "loss": 0.0598, + "step": 70910 + }, + { + "epoch": 3.31, + "learning_rate": 9.045193046258994e-06, + "loss": 0.0646, + "step": 70915 + }, + { + "epoch": 3.31, + "learning_rate": 9.044409261204208e-06, + "loss": 0.125, + "step": 70920 + }, + { + "epoch": 3.31, + "learning_rate": 9.043625476149421e-06, + "loss": 0.0955, + "step": 70925 + }, + { + "epoch": 3.31, + "learning_rate": 9.042841691094635e-06, + "loss": 0.0751, + "step": 70930 + }, + { + "epoch": 3.31, + "learning_rate": 9.042057906039848e-06, + "loss": 0.0934, + "step": 70935 + }, + { + "epoch": 3.31, + "learning_rate": 9.041274120985061e-06, + "loss": 0.3588, + "step": 70940 + }, + { + "epoch": 3.31, + "learning_rate": 9.040490335930275e-06, + "loss": 0.4088, + "step": 70945 + }, + { + "epoch": 3.31, + "learning_rate": 9.03970655087549e-06, + "loss": 0.096, + "step": 70950 + }, + { + "epoch": 3.31, + "learning_rate": 9.038922765820703e-06, + "loss": 0.039, + "step": 70955 + }, + { + "epoch": 3.31, + "learning_rate": 9.038138980765915e-06, + "loss": 0.0466, + "step": 70960 + }, + { + "epoch": 3.31, + "learning_rate": 9.03735519571113e-06, + "loss": 0.056, + "step": 70965 + }, + { + "epoch": 3.31, + "learning_rate": 9.036571410656342e-06, + "loss": 0.0959, + "step": 70970 + }, + { + "epoch": 3.31, + "learning_rate": 9.035787625601555e-06, + "loss": 0.0983, + "step": 70975 + }, + { + "epoch": 3.31, + "learning_rate": 9.03500384054677e-06, + "loss": 0.0763, + "step": 70980 + }, + { + "epoch": 3.31, + "learning_rate": 9.034220055491982e-06, + "loss": 0.1379, + "step": 70985 + }, + { + "epoch": 3.31, + "learning_rate": 9.033436270437195e-06, + "loss": 0.1648, + "step": 70990 + }, + { + "epoch": 3.31, + "learning_rate": 9.03265248538241e-06, + "loss": 0.3926, + "step": 70995 + }, + { + "epoch": 3.31, + "learning_rate": 9.031868700327623e-06, + "loss": 0.0747, + "step": 71000 + }, + { + "epoch": 3.31, + "learning_rate": 9.031084915272837e-06, + "loss": 0.0096, + "step": 71005 + }, + { + "epoch": 3.31, + "learning_rate": 9.03030113021805e-06, + "loss": 0.0439, + "step": 71010 + }, + { + "epoch": 3.31, + "learning_rate": 9.029517345163263e-06, + "loss": 0.0545, + "step": 71015 + }, + { + "epoch": 3.31, + "learning_rate": 9.028733560108477e-06, + "loss": 0.0585, + "step": 71020 + }, + { + "epoch": 3.31, + "learning_rate": 9.027949775053691e-06, + "loss": 0.0719, + "step": 71025 + }, + { + "epoch": 3.31, + "learning_rate": 9.027165989998903e-06, + "loss": 0.0609, + "step": 71030 + }, + { + "epoch": 3.31, + "learning_rate": 9.026382204944116e-06, + "loss": 0.1347, + "step": 71035 + }, + { + "epoch": 3.31, + "learning_rate": 9.02559841988933e-06, + "loss": 0.2076, + "step": 71040 + }, + { + "epoch": 3.32, + "learning_rate": 9.024814634834543e-06, + "loss": 0.2844, + "step": 71045 + }, + { + "epoch": 3.32, + "learning_rate": 9.024030849779757e-06, + "loss": 0.0975, + "step": 71050 + }, + { + "epoch": 3.32, + "learning_rate": 9.023247064724971e-06, + "loss": 0.0456, + "step": 71055 + }, + { + "epoch": 3.32, + "learning_rate": 9.022463279670183e-06, + "loss": 0.0962, + "step": 71060 + }, + { + "epoch": 3.32, + "learning_rate": 9.021679494615397e-06, + "loss": 0.1066, + "step": 71065 + }, + { + "epoch": 3.32, + "learning_rate": 9.020895709560611e-06, + "loss": 0.0887, + "step": 71070 + }, + { + "epoch": 3.32, + "learning_rate": 9.020111924505825e-06, + "loss": 0.1813, + "step": 71075 + }, + { + "epoch": 3.32, + "learning_rate": 9.019328139451037e-06, + "loss": 0.1152, + "step": 71080 + }, + { + "epoch": 3.32, + "learning_rate": 9.018544354396251e-06, + "loss": 0.1858, + "step": 71085 + }, + { + "epoch": 3.32, + "learning_rate": 9.017760569341465e-06, + "loss": 0.2239, + "step": 71090 + }, + { + "epoch": 3.32, + "learning_rate": 9.016976784286679e-06, + "loss": 0.3627, + "step": 71095 + }, + { + "epoch": 3.32, + "learning_rate": 9.016192999231891e-06, + "loss": 0.0499, + "step": 71100 + }, + { + "epoch": 3.32, + "learning_rate": 9.015409214177105e-06, + "loss": 0.033, + "step": 71105 + }, + { + "epoch": 3.32, + "learning_rate": 9.014625429122317e-06, + "loss": 0.0734, + "step": 71110 + }, + { + "epoch": 3.32, + "learning_rate": 9.013841644067531e-06, + "loss": 0.0203, + "step": 71115 + }, + { + "epoch": 3.32, + "learning_rate": 9.013057859012745e-06, + "loss": 0.0896, + "step": 71120 + }, + { + "epoch": 3.32, + "learning_rate": 9.012274073957959e-06, + "loss": 0.0496, + "step": 71125 + }, + { + "epoch": 3.32, + "learning_rate": 9.011490288903171e-06, + "loss": 0.1073, + "step": 71130 + }, + { + "epoch": 3.32, + "learning_rate": 9.010706503848385e-06, + "loss": 0.2097, + "step": 71135 + }, + { + "epoch": 3.32, + "learning_rate": 9.009922718793599e-06, + "loss": 0.2172, + "step": 71140 + }, + { + "epoch": 3.32, + "learning_rate": 9.009138933738813e-06, + "loss": 0.3922, + "step": 71145 + }, + { + "epoch": 3.32, + "learning_rate": 9.008355148684025e-06, + "loss": 0.0941, + "step": 71150 + }, + { + "epoch": 3.32, + "learning_rate": 9.007571363629239e-06, + "loss": 0.0167, + "step": 71155 + }, + { + "epoch": 3.32, + "learning_rate": 9.006787578574453e-06, + "loss": 0.042, + "step": 71160 + }, + { + "epoch": 3.32, + "learning_rate": 9.006003793519665e-06, + "loss": 0.0617, + "step": 71165 + }, + { + "epoch": 3.32, + "learning_rate": 9.005220008464879e-06, + "loss": 0.0855, + "step": 71170 + }, + { + "epoch": 3.32, + "learning_rate": 9.004436223410093e-06, + "loss": 0.1239, + "step": 71175 + }, + { + "epoch": 3.32, + "learning_rate": 9.003652438355305e-06, + "loss": 0.0801, + "step": 71180 + }, + { + "epoch": 3.32, + "learning_rate": 9.00286865330052e-06, + "loss": 0.1824, + "step": 71185 + }, + { + "epoch": 3.32, + "learning_rate": 9.002084868245733e-06, + "loss": 0.1555, + "step": 71190 + }, + { + "epoch": 3.32, + "learning_rate": 9.001301083190947e-06, + "loss": 0.3987, + "step": 71195 + }, + { + "epoch": 3.32, + "learning_rate": 9.00051729813616e-06, + "loss": 0.0915, + "step": 71200 + }, + { + "epoch": 3.32, + "learning_rate": 8.999733513081373e-06, + "loss": 0.0749, + "step": 71205 + }, + { + "epoch": 3.32, + "learning_rate": 8.998949728026587e-06, + "loss": 0.0269, + "step": 71210 + }, + { + "epoch": 3.32, + "learning_rate": 8.998165942971801e-06, + "loss": 0.0165, + "step": 71215 + }, + { + "epoch": 3.32, + "learning_rate": 8.997382157917015e-06, + "loss": 0.0827, + "step": 71220 + }, + { + "epoch": 3.32, + "learning_rate": 8.996598372862227e-06, + "loss": 0.0354, + "step": 71225 + }, + { + "epoch": 3.32, + "learning_rate": 8.99581458780744e-06, + "loss": 0.1169, + "step": 71230 + }, + { + "epoch": 3.32, + "learning_rate": 8.995030802752653e-06, + "loss": 0.1483, + "step": 71235 + }, + { + "epoch": 3.32, + "learning_rate": 8.994247017697867e-06, + "loss": 0.1475, + "step": 71240 + }, + { + "epoch": 3.32, + "learning_rate": 8.993463232643081e-06, + "loss": 0.199, + "step": 71245 + }, + { + "epoch": 3.32, + "learning_rate": 8.992679447588293e-06, + "loss": 0.1115, + "step": 71250 + }, + { + "epoch": 3.32, + "learning_rate": 8.991895662533507e-06, + "loss": 0.0245, + "step": 71255 + }, + { + "epoch": 3.33, + "learning_rate": 8.991111877478721e-06, + "loss": 0.04, + "step": 71260 + }, + { + "epoch": 3.33, + "learning_rate": 8.990328092423935e-06, + "loss": 0.0279, + "step": 71265 + }, + { + "epoch": 3.33, + "learning_rate": 8.989544307369149e-06, + "loss": 0.0768, + "step": 71270 + }, + { + "epoch": 3.33, + "learning_rate": 8.988760522314361e-06, + "loss": 0.079, + "step": 71275 + }, + { + "epoch": 3.33, + "learning_rate": 8.987976737259575e-06, + "loss": 0.1345, + "step": 71280 + }, + { + "epoch": 3.33, + "learning_rate": 8.987192952204789e-06, + "loss": 0.1671, + "step": 71285 + }, + { + "epoch": 3.33, + "learning_rate": 8.986409167150003e-06, + "loss": 0.1551, + "step": 71290 + }, + { + "epoch": 3.33, + "learning_rate": 8.985625382095215e-06, + "loss": 0.2852, + "step": 71295 + }, + { + "epoch": 3.33, + "learning_rate": 8.984841597040427e-06, + "loss": 0.0474, + "step": 71300 + }, + { + "epoch": 3.33, + "learning_rate": 8.984057811985641e-06, + "loss": 0.0449, + "step": 71305 + }, + { + "epoch": 3.33, + "learning_rate": 8.983274026930855e-06, + "loss": 0.0172, + "step": 71310 + }, + { + "epoch": 3.33, + "learning_rate": 8.982490241876069e-06, + "loss": 0.0817, + "step": 71315 + }, + { + "epoch": 3.33, + "learning_rate": 8.981706456821283e-06, + "loss": 0.0804, + "step": 71320 + }, + { + "epoch": 3.33, + "learning_rate": 8.980922671766495e-06, + "loss": 0.0379, + "step": 71325 + }, + { + "epoch": 3.33, + "learning_rate": 8.980138886711709e-06, + "loss": 0.0653, + "step": 71330 + }, + { + "epoch": 3.33, + "learning_rate": 8.979355101656923e-06, + "loss": 0.099, + "step": 71335 + }, + { + "epoch": 3.33, + "learning_rate": 8.978571316602137e-06, + "loss": 0.149, + "step": 71340 + }, + { + "epoch": 3.33, + "learning_rate": 8.977787531547349e-06, + "loss": 0.2112, + "step": 71345 + }, + { + "epoch": 3.33, + "learning_rate": 8.977003746492563e-06, + "loss": 0.077, + "step": 71350 + }, + { + "epoch": 3.33, + "learning_rate": 8.976219961437777e-06, + "loss": 0.0558, + "step": 71355 + }, + { + "epoch": 3.33, + "learning_rate": 8.975436176382989e-06, + "loss": 0.0582, + "step": 71360 + }, + { + "epoch": 3.33, + "learning_rate": 8.974652391328203e-06, + "loss": 0.0369, + "step": 71365 + }, + { + "epoch": 3.33, + "learning_rate": 8.973868606273417e-06, + "loss": 0.0759, + "step": 71370 + }, + { + "epoch": 3.33, + "learning_rate": 8.973084821218629e-06, + "loss": 0.0728, + "step": 71375 + }, + { + "epoch": 3.33, + "learning_rate": 8.972301036163843e-06, + "loss": 0.0995, + "step": 71380 + }, + { + "epoch": 3.33, + "learning_rate": 8.971517251109057e-06, + "loss": 0.4829, + "step": 71385 + }, + { + "epoch": 3.33, + "learning_rate": 8.97073346605427e-06, + "loss": 0.1905, + "step": 71390 + }, + { + "epoch": 3.33, + "learning_rate": 8.969949680999483e-06, + "loss": 0.355, + "step": 71395 + }, + { + "epoch": 3.33, + "learning_rate": 8.969165895944697e-06, + "loss": 0.0518, + "step": 71400 + }, + { + "epoch": 3.33, + "learning_rate": 8.96838211088991e-06, + "loss": 0.0564, + "step": 71405 + }, + { + "epoch": 3.33, + "learning_rate": 8.967598325835125e-06, + "loss": 0.0721, + "step": 71410 + }, + { + "epoch": 3.33, + "learning_rate": 8.966814540780337e-06, + "loss": 0.0613, + "step": 71415 + }, + { + "epoch": 3.33, + "learning_rate": 8.96603075572555e-06, + "loss": 0.1354, + "step": 71420 + }, + { + "epoch": 3.33, + "learning_rate": 8.965246970670763e-06, + "loss": 0.1019, + "step": 71425 + }, + { + "epoch": 3.33, + "learning_rate": 8.964463185615977e-06, + "loss": 0.071, + "step": 71430 + }, + { + "epoch": 3.33, + "learning_rate": 8.96367940056119e-06, + "loss": 0.0923, + "step": 71435 + }, + { + "epoch": 3.33, + "learning_rate": 8.962895615506405e-06, + "loss": 0.3224, + "step": 71440 + }, + { + "epoch": 3.33, + "learning_rate": 8.962111830451617e-06, + "loss": 0.3049, + "step": 71445 + }, + { + "epoch": 3.33, + "learning_rate": 8.96132804539683e-06, + "loss": 0.0961, + "step": 71450 + }, + { + "epoch": 3.33, + "learning_rate": 8.960544260342045e-06, + "loss": 0.0378, + "step": 71455 + }, + { + "epoch": 3.33, + "learning_rate": 8.959760475287259e-06, + "loss": 0.0332, + "step": 71460 + }, + { + "epoch": 3.33, + "learning_rate": 8.95897669023247e-06, + "loss": 0.0462, + "step": 71465 + }, + { + "epoch": 3.33, + "learning_rate": 8.958192905177685e-06, + "loss": 0.0546, + "step": 71470 + }, + { + "epoch": 3.34, + "learning_rate": 8.957409120122899e-06, + "loss": 0.0442, + "step": 71475 + }, + { + "epoch": 3.34, + "learning_rate": 8.956625335068112e-06, + "loss": 0.1452, + "step": 71480 + }, + { + "epoch": 3.34, + "learning_rate": 8.955841550013326e-06, + "loss": 0.1316, + "step": 71485 + }, + { + "epoch": 3.34, + "learning_rate": 8.955057764958539e-06, + "loss": 0.1885, + "step": 71490 + }, + { + "epoch": 3.34, + "learning_rate": 8.95427397990375e-06, + "loss": 0.1965, + "step": 71495 + }, + { + "epoch": 3.34, + "learning_rate": 8.953490194848965e-06, + "loss": 0.0727, + "step": 71500 + }, + { + "epoch": 3.34, + "learning_rate": 8.952706409794179e-06, + "loss": 0.0363, + "step": 71505 + }, + { + "epoch": 3.34, + "learning_rate": 8.951922624739393e-06, + "loss": 0.0834, + "step": 71510 + }, + { + "epoch": 3.34, + "learning_rate": 8.951138839684605e-06, + "loss": 0.053, + "step": 71515 + }, + { + "epoch": 3.34, + "learning_rate": 8.950355054629819e-06, + "loss": 0.0988, + "step": 71520 + }, + { + "epoch": 3.34, + "learning_rate": 8.949571269575033e-06, + "loss": 0.1021, + "step": 71525 + }, + { + "epoch": 3.34, + "learning_rate": 8.948787484520246e-06, + "loss": 0.1247, + "step": 71530 + }, + { + "epoch": 3.34, + "learning_rate": 8.94800369946546e-06, + "loss": 0.1089, + "step": 71535 + }, + { + "epoch": 3.34, + "learning_rate": 8.947219914410673e-06, + "loss": 0.213, + "step": 71540 + }, + { + "epoch": 3.34, + "learning_rate": 8.946436129355886e-06, + "loss": 0.3372, + "step": 71545 + }, + { + "epoch": 3.34, + "learning_rate": 8.9456523443011e-06, + "loss": 0.0676, + "step": 71550 + }, + { + "epoch": 3.34, + "learning_rate": 8.944868559246313e-06, + "loss": 0.0195, + "step": 71555 + }, + { + "epoch": 3.34, + "learning_rate": 8.944084774191527e-06, + "loss": 0.0476, + "step": 71560 + }, + { + "epoch": 3.34, + "learning_rate": 8.943300989136739e-06, + "loss": 0.0935, + "step": 71565 + }, + { + "epoch": 3.34, + "learning_rate": 8.942517204081953e-06, + "loss": 0.0661, + "step": 71570 + }, + { + "epoch": 3.34, + "learning_rate": 8.941733419027167e-06, + "loss": 0.1716, + "step": 71575 + }, + { + "epoch": 3.34, + "learning_rate": 8.94094963397238e-06, + "loss": 0.1298, + "step": 71580 + }, + { + "epoch": 3.34, + "learning_rate": 8.940165848917594e-06, + "loss": 0.1413, + "step": 71585 + }, + { + "epoch": 3.34, + "learning_rate": 8.939382063862807e-06, + "loss": 0.2108, + "step": 71590 + }, + { + "epoch": 3.34, + "learning_rate": 8.93859827880802e-06, + "loss": 0.2585, + "step": 71595 + }, + { + "epoch": 3.34, + "learning_rate": 8.937814493753234e-06, + "loss": 0.0759, + "step": 71600 + }, + { + "epoch": 3.34, + "learning_rate": 8.937030708698448e-06, + "loss": 0.0262, + "step": 71605 + }, + { + "epoch": 3.34, + "learning_rate": 8.93624692364366e-06, + "loss": 0.0391, + "step": 71610 + }, + { + "epoch": 3.34, + "learning_rate": 8.935463138588874e-06, + "loss": 0.0442, + "step": 71615 + }, + { + "epoch": 3.34, + "learning_rate": 8.934679353534087e-06, + "loss": 0.1691, + "step": 71620 + }, + { + "epoch": 3.34, + "learning_rate": 8.9338955684793e-06, + "loss": 0.1244, + "step": 71625 + }, + { + "epoch": 3.34, + "learning_rate": 8.933111783424514e-06, + "loss": 0.1147, + "step": 71630 + }, + { + "epoch": 3.34, + "learning_rate": 8.932327998369728e-06, + "loss": 0.1015, + "step": 71635 + }, + { + "epoch": 3.34, + "learning_rate": 8.93154421331494e-06, + "loss": 0.2465, + "step": 71640 + }, + { + "epoch": 3.34, + "learning_rate": 8.930760428260154e-06, + "loss": 0.2367, + "step": 71645 + }, + { + "epoch": 3.34, + "learning_rate": 8.929976643205368e-06, + "loss": 0.0882, + "step": 71650 + }, + { + "epoch": 3.34, + "learning_rate": 8.929192858150582e-06, + "loss": 0.0552, + "step": 71655 + }, + { + "epoch": 3.34, + "learning_rate": 8.928409073095794e-06, + "loss": 0.0628, + "step": 71660 + }, + { + "epoch": 3.34, + "learning_rate": 8.927625288041008e-06, + "loss": 0.0651, + "step": 71665 + }, + { + "epoch": 3.34, + "learning_rate": 8.926841502986222e-06, + "loss": 0.0243, + "step": 71670 + }, + { + "epoch": 3.34, + "learning_rate": 8.926057717931436e-06, + "loss": 0.113, + "step": 71675 + }, + { + "epoch": 3.34, + "learning_rate": 8.925273932876648e-06, + "loss": 0.1623, + "step": 71680 + }, + { + "epoch": 3.34, + "learning_rate": 8.924490147821862e-06, + "loss": 0.1306, + "step": 71685 + }, + { + "epoch": 3.35, + "learning_rate": 8.923706362767075e-06, + "loss": 0.2359, + "step": 71690 + }, + { + "epoch": 3.35, + "learning_rate": 8.922922577712288e-06, + "loss": 0.2171, + "step": 71695 + }, + { + "epoch": 3.35, + "learning_rate": 8.922138792657502e-06, + "loss": 0.1319, + "step": 71700 + }, + { + "epoch": 3.35, + "learning_rate": 8.921355007602716e-06, + "loss": 0.0114, + "step": 71705 + }, + { + "epoch": 3.35, + "learning_rate": 8.920571222547928e-06, + "loss": 0.0387, + "step": 71710 + }, + { + "epoch": 3.35, + "learning_rate": 8.919787437493142e-06, + "loss": 0.0901, + "step": 71715 + }, + { + "epoch": 3.35, + "learning_rate": 8.919003652438356e-06, + "loss": 0.0646, + "step": 71720 + }, + { + "epoch": 3.35, + "learning_rate": 8.91821986738357e-06, + "loss": 0.0286, + "step": 71725 + }, + { + "epoch": 3.35, + "learning_rate": 8.917436082328782e-06, + "loss": 0.1043, + "step": 71730 + }, + { + "epoch": 3.35, + "learning_rate": 8.916652297273996e-06, + "loss": 0.1484, + "step": 71735 + }, + { + "epoch": 3.35, + "learning_rate": 8.91586851221921e-06, + "loss": 0.1106, + "step": 71740 + }, + { + "epoch": 3.35, + "learning_rate": 8.915084727164424e-06, + "loss": 0.2136, + "step": 71745 + }, + { + "epoch": 3.35, + "learning_rate": 8.914300942109636e-06, + "loss": 0.0145, + "step": 71750 + }, + { + "epoch": 3.35, + "learning_rate": 8.91351715705485e-06, + "loss": 0.0284, + "step": 71755 + }, + { + "epoch": 3.35, + "learning_rate": 8.912733372000062e-06, + "loss": 0.0206, + "step": 71760 + }, + { + "epoch": 3.35, + "learning_rate": 8.911949586945276e-06, + "loss": 0.0796, + "step": 71765 + }, + { + "epoch": 3.35, + "learning_rate": 8.91116580189049e-06, + "loss": 0.0947, + "step": 71770 + }, + { + "epoch": 3.35, + "learning_rate": 8.910382016835704e-06, + "loss": 0.0731, + "step": 71775 + }, + { + "epoch": 3.35, + "learning_rate": 8.909598231780916e-06, + "loss": 0.1135, + "step": 71780 + }, + { + "epoch": 3.35, + "learning_rate": 8.90881444672613e-06, + "loss": 0.1274, + "step": 71785 + }, + { + "epoch": 3.35, + "learning_rate": 8.908030661671344e-06, + "loss": 0.2114, + "step": 71790 + }, + { + "epoch": 3.35, + "learning_rate": 8.907246876616558e-06, + "loss": 0.29, + "step": 71795 + }, + { + "epoch": 3.35, + "learning_rate": 8.906463091561772e-06, + "loss": 0.1027, + "step": 71800 + }, + { + "epoch": 3.35, + "learning_rate": 8.905679306506984e-06, + "loss": 0.0313, + "step": 71805 + }, + { + "epoch": 3.35, + "learning_rate": 8.904895521452198e-06, + "loss": 0.057, + "step": 71810 + }, + { + "epoch": 3.35, + "learning_rate": 8.90411173639741e-06, + "loss": 0.039, + "step": 71815 + }, + { + "epoch": 3.35, + "learning_rate": 8.903327951342624e-06, + "loss": 0.0554, + "step": 71820 + }, + { + "epoch": 3.35, + "learning_rate": 8.902544166287838e-06, + "loss": 0.0639, + "step": 71825 + }, + { + "epoch": 3.35, + "learning_rate": 8.90176038123305e-06, + "loss": 0.1176, + "step": 71830 + }, + { + "epoch": 3.35, + "learning_rate": 8.900976596178264e-06, + "loss": 0.0734, + "step": 71835 + }, + { + "epoch": 3.35, + "learning_rate": 8.900192811123478e-06, + "loss": 0.2307, + "step": 71840 + }, + { + "epoch": 3.35, + "learning_rate": 8.899409026068692e-06, + "loss": 0.3026, + "step": 71845 + }, + { + "epoch": 3.35, + "learning_rate": 8.898625241013906e-06, + "loss": 0.0709, + "step": 71850 + }, + { + "epoch": 3.35, + "learning_rate": 8.897841455959118e-06, + "loss": 0.0339, + "step": 71855 + }, + { + "epoch": 3.35, + "learning_rate": 8.897057670904332e-06, + "loss": 0.0831, + "step": 71860 + }, + { + "epoch": 3.35, + "learning_rate": 8.896273885849546e-06, + "loss": 0.0316, + "step": 71865 + }, + { + "epoch": 3.35, + "learning_rate": 8.89549010079476e-06, + "loss": 0.103, + "step": 71870 + }, + { + "epoch": 3.35, + "learning_rate": 8.894706315739972e-06, + "loss": 0.0875, + "step": 71875 + }, + { + "epoch": 3.35, + "learning_rate": 8.893922530685184e-06, + "loss": 0.0532, + "step": 71880 + }, + { + "epoch": 3.35, + "learning_rate": 8.893138745630398e-06, + "loss": 0.1521, + "step": 71885 + }, + { + "epoch": 3.35, + "learning_rate": 8.892354960575612e-06, + "loss": 0.1833, + "step": 71890 + }, + { + "epoch": 3.35, + "learning_rate": 8.891571175520826e-06, + "loss": 0.4676, + "step": 71895 + }, + { + "epoch": 3.35, + "learning_rate": 8.89078739046604e-06, + "loss": 0.0905, + "step": 71900 + }, + { + "epoch": 3.36, + "learning_rate": 8.890003605411252e-06, + "loss": 0.0352, + "step": 71905 + }, + { + "epoch": 3.36, + "learning_rate": 8.889219820356466e-06, + "loss": 0.0343, + "step": 71910 + }, + { + "epoch": 3.36, + "learning_rate": 8.88843603530168e-06, + "loss": 0.0217, + "step": 71915 + }, + { + "epoch": 3.36, + "learning_rate": 8.887652250246894e-06, + "loss": 0.0845, + "step": 71920 + }, + { + "epoch": 3.36, + "learning_rate": 8.886868465192106e-06, + "loss": 0.0914, + "step": 71925 + }, + { + "epoch": 3.36, + "learning_rate": 8.88608468013732e-06, + "loss": 0.0889, + "step": 71930 + }, + { + "epoch": 3.36, + "learning_rate": 8.885300895082534e-06, + "loss": 0.0855, + "step": 71935 + }, + { + "epoch": 3.36, + "learning_rate": 8.884517110027748e-06, + "loss": 0.2037, + "step": 71940 + }, + { + "epoch": 3.36, + "learning_rate": 8.88373332497296e-06, + "loss": 0.3553, + "step": 71945 + }, + { + "epoch": 3.36, + "learning_rate": 8.882949539918174e-06, + "loss": 0.0663, + "step": 71950 + }, + { + "epoch": 3.36, + "learning_rate": 8.882165754863386e-06, + "loss": 0.0432, + "step": 71955 + }, + { + "epoch": 3.36, + "learning_rate": 8.8813819698086e-06, + "loss": 0.0513, + "step": 71960 + }, + { + "epoch": 3.36, + "learning_rate": 8.880598184753814e-06, + "loss": 0.027, + "step": 71965 + }, + { + "epoch": 3.36, + "learning_rate": 8.879814399699028e-06, + "loss": 0.1287, + "step": 71970 + }, + { + "epoch": 3.36, + "learning_rate": 8.87903061464424e-06, + "loss": 0.0541, + "step": 71975 + }, + { + "epoch": 3.36, + "learning_rate": 8.878246829589454e-06, + "loss": 0.0934, + "step": 71980 + }, + { + "epoch": 3.36, + "learning_rate": 8.877463044534668e-06, + "loss": 0.0739, + "step": 71985 + }, + { + "epoch": 3.36, + "learning_rate": 8.876679259479882e-06, + "loss": 0.1659, + "step": 71990 + }, + { + "epoch": 3.36, + "learning_rate": 8.875895474425094e-06, + "loss": 0.2925, + "step": 71995 + }, + { + "epoch": 3.36, + "learning_rate": 8.875111689370308e-06, + "loss": 0.0761, + "step": 72000 + }, + { + "epoch": 3.36, + "learning_rate": 8.874327904315522e-06, + "loss": 0.0252, + "step": 72005 + }, + { + "epoch": 3.36, + "learning_rate": 8.873544119260734e-06, + "loss": 0.0426, + "step": 72010 + }, + { + "epoch": 3.36, + "learning_rate": 8.872760334205948e-06, + "loss": 0.0238, + "step": 72015 + }, + { + "epoch": 3.36, + "learning_rate": 8.871976549151162e-06, + "loss": 0.0345, + "step": 72020 + }, + { + "epoch": 3.36, + "learning_rate": 8.871192764096374e-06, + "loss": 0.0779, + "step": 72025 + }, + { + "epoch": 3.36, + "learning_rate": 8.870408979041588e-06, + "loss": 0.0438, + "step": 72030 + }, + { + "epoch": 3.36, + "learning_rate": 8.869625193986802e-06, + "loss": 0.1802, + "step": 72035 + }, + { + "epoch": 3.36, + "learning_rate": 8.868841408932016e-06, + "loss": 0.1752, + "step": 72040 + }, + { + "epoch": 3.36, + "learning_rate": 8.868057623877228e-06, + "loss": 0.2804, + "step": 72045 + }, + { + "epoch": 3.36, + "learning_rate": 8.867273838822442e-06, + "loss": 0.0499, + "step": 72050 + }, + { + "epoch": 3.36, + "learning_rate": 8.866490053767656e-06, + "loss": 0.0281, + "step": 72055 + }, + { + "epoch": 3.36, + "learning_rate": 8.86570626871287e-06, + "loss": 0.0557, + "step": 72060 + }, + { + "epoch": 3.36, + "learning_rate": 8.864922483658084e-06, + "loss": 0.0986, + "step": 72065 + }, + { + "epoch": 3.36, + "learning_rate": 8.864138698603296e-06, + "loss": 0.0801, + "step": 72070 + }, + { + "epoch": 3.36, + "learning_rate": 8.863354913548508e-06, + "loss": 0.1067, + "step": 72075 + }, + { + "epoch": 3.36, + "learning_rate": 8.862571128493722e-06, + "loss": 0.069, + "step": 72080 + }, + { + "epoch": 3.36, + "learning_rate": 8.861787343438936e-06, + "loss": 0.1912, + "step": 72085 + }, + { + "epoch": 3.36, + "learning_rate": 8.86100355838415e-06, + "loss": 0.2374, + "step": 72090 + }, + { + "epoch": 3.36, + "learning_rate": 8.860219773329362e-06, + "loss": 0.2909, + "step": 72095 + }, + { + "epoch": 3.36, + "learning_rate": 8.859435988274576e-06, + "loss": 0.0656, + "step": 72100 + }, + { + "epoch": 3.36, + "learning_rate": 8.85865220321979e-06, + "loss": 0.0112, + "step": 72105 + }, + { + "epoch": 3.36, + "learning_rate": 8.857868418165004e-06, + "loss": 0.0553, + "step": 72110 + }, + { + "epoch": 3.36, + "learning_rate": 8.857084633110218e-06, + "loss": 0.045, + "step": 72115 + }, + { + "epoch": 3.37, + "learning_rate": 8.85630084805543e-06, + "loss": 0.1336, + "step": 72120 + }, + { + "epoch": 3.37, + "learning_rate": 8.855517063000644e-06, + "loss": 0.1027, + "step": 72125 + }, + { + "epoch": 3.37, + "learning_rate": 8.854733277945858e-06, + "loss": 0.1755, + "step": 72130 + }, + { + "epoch": 3.37, + "learning_rate": 8.853949492891071e-06, + "loss": 0.0975, + "step": 72135 + }, + { + "epoch": 3.37, + "learning_rate": 8.853165707836284e-06, + "loss": 0.219, + "step": 72140 + }, + { + "epoch": 3.37, + "learning_rate": 8.852381922781496e-06, + "loss": 0.2413, + "step": 72145 + }, + { + "epoch": 3.37, + "learning_rate": 8.85159813772671e-06, + "loss": 0.1244, + "step": 72150 + }, + { + "epoch": 3.37, + "learning_rate": 8.850814352671924e-06, + "loss": 0.0359, + "step": 72155 + }, + { + "epoch": 3.37, + "learning_rate": 8.850030567617138e-06, + "loss": 0.0713, + "step": 72160 + }, + { + "epoch": 3.37, + "learning_rate": 8.849246782562352e-06, + "loss": 0.0794, + "step": 72165 + }, + { + "epoch": 3.37, + "learning_rate": 8.848462997507564e-06, + "loss": 0.0946, + "step": 72170 + }, + { + "epoch": 3.37, + "learning_rate": 8.847679212452778e-06, + "loss": 0.0688, + "step": 72175 + }, + { + "epoch": 3.37, + "learning_rate": 8.846895427397992e-06, + "loss": 0.1037, + "step": 72180 + }, + { + "epoch": 3.37, + "learning_rate": 8.846111642343205e-06, + "loss": 0.1237, + "step": 72185 + }, + { + "epoch": 3.37, + "learning_rate": 8.845327857288418e-06, + "loss": 0.1358, + "step": 72190 + }, + { + "epoch": 3.37, + "learning_rate": 8.844544072233632e-06, + "loss": 0.5546, + "step": 72195 + }, + { + "epoch": 3.37, + "learning_rate": 8.843760287178845e-06, + "loss": 0.0636, + "step": 72200 + }, + { + "epoch": 3.37, + "learning_rate": 8.842976502124058e-06, + "loss": 0.0212, + "step": 72205 + }, + { + "epoch": 3.37, + "learning_rate": 8.842192717069272e-06, + "loss": 0.0511, + "step": 72210 + }, + { + "epoch": 3.37, + "learning_rate": 8.841408932014485e-06, + "loss": 0.0671, + "step": 72215 + }, + { + "epoch": 3.37, + "learning_rate": 8.840625146959698e-06, + "loss": 0.1021, + "step": 72220 + }, + { + "epoch": 3.37, + "learning_rate": 8.839841361904912e-06, + "loss": 0.1161, + "step": 72225 + }, + { + "epoch": 3.37, + "learning_rate": 8.839057576850126e-06, + "loss": 0.1356, + "step": 72230 + }, + { + "epoch": 3.37, + "learning_rate": 8.83827379179534e-06, + "loss": 0.2081, + "step": 72235 + }, + { + "epoch": 3.37, + "learning_rate": 8.837490006740552e-06, + "loss": 0.1651, + "step": 72240 + }, + { + "epoch": 3.37, + "learning_rate": 8.836706221685766e-06, + "loss": 0.1851, + "step": 72245 + }, + { + "epoch": 3.37, + "learning_rate": 8.83592243663098e-06, + "loss": 0.0708, + "step": 72250 + }, + { + "epoch": 3.37, + "learning_rate": 8.835138651576193e-06, + "loss": 0.007, + "step": 72255 + }, + { + "epoch": 3.37, + "learning_rate": 8.834354866521406e-06, + "loss": 0.0373, + "step": 72260 + }, + { + "epoch": 3.37, + "learning_rate": 8.83357108146662e-06, + "loss": 0.0591, + "step": 72265 + }, + { + "epoch": 3.37, + "learning_rate": 8.832787296411832e-06, + "loss": 0.0865, + "step": 72270 + }, + { + "epoch": 3.37, + "learning_rate": 8.832003511357046e-06, + "loss": 0.0524, + "step": 72275 + }, + { + "epoch": 3.37, + "learning_rate": 8.83121972630226e-06, + "loss": 0.1104, + "step": 72280 + }, + { + "epoch": 3.37, + "learning_rate": 8.830435941247473e-06, + "loss": 0.1238, + "step": 72285 + }, + { + "epoch": 3.37, + "learning_rate": 8.829652156192686e-06, + "loss": 0.1583, + "step": 72290 + }, + { + "epoch": 3.37, + "learning_rate": 8.8288683711379e-06, + "loss": 0.1681, + "step": 72295 + }, + { + "epoch": 3.37, + "learning_rate": 8.828084586083113e-06, + "loss": 0.1097, + "step": 72300 + }, + { + "epoch": 3.37, + "learning_rate": 8.827300801028327e-06, + "loss": 0.0426, + "step": 72305 + }, + { + "epoch": 3.37, + "learning_rate": 8.82651701597354e-06, + "loss": 0.0368, + "step": 72310 + }, + { + "epoch": 3.37, + "learning_rate": 8.825733230918753e-06, + "loss": 0.0631, + "step": 72315 + }, + { + "epoch": 3.37, + "learning_rate": 8.824949445863967e-06, + "loss": 0.0818, + "step": 72320 + }, + { + "epoch": 3.37, + "learning_rate": 8.824165660809181e-06, + "loss": 0.0878, + "step": 72325 + }, + { + "epoch": 3.38, + "learning_rate": 8.823381875754393e-06, + "loss": 0.1477, + "step": 72330 + }, + { + "epoch": 3.38, + "learning_rate": 8.822598090699607e-06, + "loss": 0.2534, + "step": 72335 + }, + { + "epoch": 3.38, + "learning_rate": 8.82181430564482e-06, + "loss": 0.2975, + "step": 72340 + }, + { + "epoch": 3.38, + "learning_rate": 8.821030520590033e-06, + "loss": 0.2963, + "step": 72345 + }, + { + "epoch": 3.38, + "learning_rate": 8.820246735535247e-06, + "loss": 0.0708, + "step": 72350 + }, + { + "epoch": 3.38, + "learning_rate": 8.819462950480461e-06, + "loss": 0.0599, + "step": 72355 + }, + { + "epoch": 3.38, + "learning_rate": 8.818679165425674e-06, + "loss": 0.0268, + "step": 72360 + }, + { + "epoch": 3.38, + "learning_rate": 8.817895380370887e-06, + "loss": 0.0357, + "step": 72365 + }, + { + "epoch": 3.38, + "learning_rate": 8.817111595316101e-06, + "loss": 0.1084, + "step": 72370 + }, + { + "epoch": 3.38, + "learning_rate": 8.816327810261315e-06, + "loss": 0.1211, + "step": 72375 + }, + { + "epoch": 3.38, + "learning_rate": 8.815544025206529e-06, + "loss": 0.1424, + "step": 72380 + }, + { + "epoch": 3.38, + "learning_rate": 8.814760240151741e-06, + "loss": 0.1438, + "step": 72385 + }, + { + "epoch": 3.38, + "learning_rate": 8.813976455096955e-06, + "loss": 0.2297, + "step": 72390 + }, + { + "epoch": 3.38, + "learning_rate": 8.81319267004217e-06, + "loss": 0.2569, + "step": 72395 + }, + { + "epoch": 3.38, + "learning_rate": 8.812408884987381e-06, + "loss": 0.0635, + "step": 72400 + }, + { + "epoch": 3.38, + "learning_rate": 8.811625099932595e-06, + "loss": 0.0358, + "step": 72405 + }, + { + "epoch": 3.38, + "learning_rate": 8.810841314877807e-06, + "loss": 0.033, + "step": 72410 + }, + { + "epoch": 3.38, + "learning_rate": 8.810057529823021e-06, + "loss": 0.0559, + "step": 72415 + }, + { + "epoch": 3.38, + "learning_rate": 8.809273744768235e-06, + "loss": 0.0961, + "step": 72420 + }, + { + "epoch": 3.38, + "learning_rate": 8.80848995971345e-06, + "loss": 0.133, + "step": 72425 + }, + { + "epoch": 3.38, + "learning_rate": 8.807706174658663e-06, + "loss": 0.1051, + "step": 72430 + }, + { + "epoch": 3.38, + "learning_rate": 8.806922389603875e-06, + "loss": 0.1934, + "step": 72435 + }, + { + "epoch": 3.38, + "learning_rate": 8.80613860454909e-06, + "loss": 0.1838, + "step": 72440 + }, + { + "epoch": 3.38, + "learning_rate": 8.805354819494303e-06, + "loss": 0.2467, + "step": 72445 + }, + { + "epoch": 3.38, + "learning_rate": 8.804571034439517e-06, + "loss": 0.064, + "step": 72450 + }, + { + "epoch": 3.38, + "learning_rate": 8.80378724938473e-06, + "loss": 0.0198, + "step": 72455 + }, + { + "epoch": 3.38, + "learning_rate": 8.803003464329943e-06, + "loss": 0.0427, + "step": 72460 + }, + { + "epoch": 3.38, + "learning_rate": 8.802219679275155e-06, + "loss": 0.05, + "step": 72465 + }, + { + "epoch": 3.38, + "learning_rate": 8.80143589422037e-06, + "loss": 0.1142, + "step": 72470 + }, + { + "epoch": 3.38, + "learning_rate": 8.800652109165583e-06, + "loss": 0.0693, + "step": 72475 + }, + { + "epoch": 3.38, + "learning_rate": 8.799868324110797e-06, + "loss": 0.0562, + "step": 72480 + }, + { + "epoch": 3.38, + "learning_rate": 8.79908453905601e-06, + "loss": 0.1657, + "step": 72485 + }, + { + "epoch": 3.38, + "learning_rate": 8.798300754001223e-06, + "loss": 0.2237, + "step": 72490 + }, + { + "epoch": 3.38, + "learning_rate": 8.797516968946437e-06, + "loss": 0.2782, + "step": 72495 + }, + { + "epoch": 3.38, + "learning_rate": 8.796733183891651e-06, + "loss": 0.037, + "step": 72500 + }, + { + "epoch": 3.38, + "learning_rate": 8.795949398836863e-06, + "loss": 0.0315, + "step": 72505 + }, + { + "epoch": 3.38, + "learning_rate": 8.795165613782077e-06, + "loss": 0.0494, + "step": 72510 + }, + { + "epoch": 3.38, + "learning_rate": 8.794381828727291e-06, + "loss": 0.0724, + "step": 72515 + }, + { + "epoch": 3.38, + "learning_rate": 8.793598043672505e-06, + "loss": 0.1049, + "step": 72520 + }, + { + "epoch": 3.38, + "learning_rate": 8.792814258617717e-06, + "loss": 0.0456, + "step": 72525 + }, + { + "epoch": 3.38, + "learning_rate": 8.792030473562931e-06, + "loss": 0.0621, + "step": 72530 + }, + { + "epoch": 3.38, + "learning_rate": 8.791246688508143e-06, + "loss": 0.0755, + "step": 72535 + }, + { + "epoch": 3.38, + "learning_rate": 8.790462903453357e-06, + "loss": 0.2267, + "step": 72540 + }, + { + "epoch": 3.39, + "learning_rate": 8.789679118398571e-06, + "loss": 0.4156, + "step": 72545 + }, + { + "epoch": 3.39, + "learning_rate": 8.788895333343785e-06, + "loss": 0.0726, + "step": 72550 + }, + { + "epoch": 3.39, + "learning_rate": 8.788111548288997e-06, + "loss": 0.0252, + "step": 72555 + }, + { + "epoch": 3.39, + "learning_rate": 8.787327763234211e-06, + "loss": 0.0365, + "step": 72560 + }, + { + "epoch": 3.39, + "learning_rate": 8.786543978179425e-06, + "loss": 0.0294, + "step": 72565 + }, + { + "epoch": 3.39, + "learning_rate": 8.785760193124639e-06, + "loss": 0.0894, + "step": 72570 + }, + { + "epoch": 3.39, + "learning_rate": 8.784976408069851e-06, + "loss": 0.1237, + "step": 72575 + }, + { + "epoch": 3.39, + "learning_rate": 8.784192623015065e-06, + "loss": 0.1305, + "step": 72580 + }, + { + "epoch": 3.39, + "learning_rate": 8.783408837960279e-06, + "loss": 0.2606, + "step": 72585 + }, + { + "epoch": 3.39, + "learning_rate": 8.782625052905493e-06, + "loss": 0.1614, + "step": 72590 + }, + { + "epoch": 3.39, + "learning_rate": 8.781841267850705e-06, + "loss": 0.4077, + "step": 72595 + }, + { + "epoch": 3.39, + "learning_rate": 8.781057482795919e-06, + "loss": 0.0702, + "step": 72600 + }, + { + "epoch": 3.39, + "learning_rate": 8.780273697741131e-06, + "loss": 0.1083, + "step": 72605 + }, + { + "epoch": 3.39, + "learning_rate": 8.779489912686345e-06, + "loss": 0.0806, + "step": 72610 + }, + { + "epoch": 3.39, + "learning_rate": 8.778706127631559e-06, + "loss": 0.07, + "step": 72615 + }, + { + "epoch": 3.39, + "learning_rate": 8.777922342576773e-06, + "loss": 0.0412, + "step": 72620 + }, + { + "epoch": 3.39, + "learning_rate": 8.777138557521985e-06, + "loss": 0.1552, + "step": 72625 + }, + { + "epoch": 3.39, + "learning_rate": 8.776354772467199e-06, + "loss": 0.1425, + "step": 72630 + }, + { + "epoch": 3.39, + "learning_rate": 8.775570987412413e-06, + "loss": 0.2181, + "step": 72635 + }, + { + "epoch": 3.39, + "learning_rate": 8.774787202357627e-06, + "loss": 0.1684, + "step": 72640 + }, + { + "epoch": 3.39, + "learning_rate": 8.77400341730284e-06, + "loss": 0.2213, + "step": 72645 + }, + { + "epoch": 3.39, + "learning_rate": 8.773219632248053e-06, + "loss": 0.1042, + "step": 72650 + }, + { + "epoch": 3.39, + "learning_rate": 8.772435847193267e-06, + "loss": 0.0489, + "step": 72655 + }, + { + "epoch": 3.39, + "learning_rate": 8.771652062138479e-06, + "loss": 0.0216, + "step": 72660 + }, + { + "epoch": 3.39, + "learning_rate": 8.770868277083693e-06, + "loss": 0.0553, + "step": 72665 + }, + { + "epoch": 3.39, + "learning_rate": 8.770084492028907e-06, + "loss": 0.1275, + "step": 72670 + }, + { + "epoch": 3.39, + "learning_rate": 8.769300706974119e-06, + "loss": 0.1648, + "step": 72675 + }, + { + "epoch": 3.39, + "learning_rate": 8.768516921919333e-06, + "loss": 0.0575, + "step": 72680 + }, + { + "epoch": 3.39, + "learning_rate": 8.767733136864547e-06, + "loss": 0.1371, + "step": 72685 + }, + { + "epoch": 3.39, + "learning_rate": 8.76694935180976e-06, + "loss": 0.21, + "step": 72690 + }, + { + "epoch": 3.39, + "learning_rate": 8.766165566754975e-06, + "loss": 0.3111, + "step": 72695 + }, + { + "epoch": 3.39, + "learning_rate": 8.765381781700187e-06, + "loss": 0.0959, + "step": 72700 + }, + { + "epoch": 3.39, + "learning_rate": 8.7645979966454e-06, + "loss": 0.0205, + "step": 72705 + }, + { + "epoch": 3.39, + "learning_rate": 8.763814211590615e-06, + "loss": 0.0431, + "step": 72710 + }, + { + "epoch": 3.39, + "learning_rate": 8.763030426535829e-06, + "loss": 0.0457, + "step": 72715 + }, + { + "epoch": 3.39, + "learning_rate": 8.762246641481041e-06, + "loss": 0.0813, + "step": 72720 + }, + { + "epoch": 3.39, + "learning_rate": 8.761462856426253e-06, + "loss": 0.057, + "step": 72725 + }, + { + "epoch": 3.39, + "learning_rate": 8.760679071371467e-06, + "loss": 0.1586, + "step": 72730 + }, + { + "epoch": 3.39, + "learning_rate": 8.759895286316681e-06, + "loss": 0.1567, + "step": 72735 + }, + { + "epoch": 3.39, + "learning_rate": 8.759111501261895e-06, + "loss": 0.1828, + "step": 72740 + }, + { + "epoch": 3.39, + "learning_rate": 8.758327716207109e-06, + "loss": 0.3974, + "step": 72745 + }, + { + "epoch": 3.39, + "learning_rate": 8.757543931152321e-06, + "loss": 0.0386, + "step": 72750 + }, + { + "epoch": 3.39, + "learning_rate": 8.756760146097535e-06, + "loss": 0.0502, + "step": 72755 + }, + { + "epoch": 3.4, + "learning_rate": 8.755976361042749e-06, + "loss": 0.0306, + "step": 72760 + }, + { + "epoch": 3.4, + "learning_rate": 8.755192575987963e-06, + "loss": 0.0475, + "step": 72765 + }, + { + "epoch": 3.4, + "learning_rate": 8.754408790933175e-06, + "loss": 0.0952, + "step": 72770 + }, + { + "epoch": 3.4, + "learning_rate": 8.753625005878389e-06, + "loss": 0.1277, + "step": 72775 + }, + { + "epoch": 3.4, + "learning_rate": 8.752841220823603e-06, + "loss": 0.0964, + "step": 72780 + }, + { + "epoch": 3.4, + "learning_rate": 8.752057435768817e-06, + "loss": 0.1072, + "step": 72785 + }, + { + "epoch": 3.4, + "learning_rate": 8.751273650714029e-06, + "loss": 0.2131, + "step": 72790 + }, + { + "epoch": 3.4, + "learning_rate": 8.750489865659243e-06, + "loss": 0.2558, + "step": 72795 + }, + { + "epoch": 3.4, + "learning_rate": 8.749706080604455e-06, + "loss": 0.0444, + "step": 72800 + }, + { + "epoch": 3.4, + "learning_rate": 8.748922295549669e-06, + "loss": 0.057, + "step": 72805 + }, + { + "epoch": 3.4, + "learning_rate": 8.748138510494883e-06, + "loss": 0.0217, + "step": 72810 + }, + { + "epoch": 3.4, + "learning_rate": 8.747354725440097e-06, + "loss": 0.0364, + "step": 72815 + }, + { + "epoch": 3.4, + "learning_rate": 8.746570940385309e-06, + "loss": 0.0689, + "step": 72820 + }, + { + "epoch": 3.4, + "learning_rate": 8.745787155330523e-06, + "loss": 0.1745, + "step": 72825 + }, + { + "epoch": 3.4, + "learning_rate": 8.745003370275737e-06, + "loss": 0.1426, + "step": 72830 + }, + { + "epoch": 3.4, + "learning_rate": 8.74421958522095e-06, + "loss": 0.2692, + "step": 72835 + }, + { + "epoch": 3.4, + "learning_rate": 8.743435800166163e-06, + "loss": 0.2706, + "step": 72840 + }, + { + "epoch": 3.4, + "learning_rate": 8.742652015111377e-06, + "loss": 0.1699, + "step": 72845 + }, + { + "epoch": 3.4, + "learning_rate": 8.74186823005659e-06, + "loss": 0.0315, + "step": 72850 + }, + { + "epoch": 3.4, + "learning_rate": 8.741084445001804e-06, + "loss": 0.0269, + "step": 72855 + }, + { + "epoch": 3.4, + "learning_rate": 8.740300659947017e-06, + "loss": 0.0178, + "step": 72860 + }, + { + "epoch": 3.4, + "learning_rate": 8.73951687489223e-06, + "loss": 0.0391, + "step": 72865 + }, + { + "epoch": 3.4, + "learning_rate": 8.738733089837443e-06, + "loss": 0.0862, + "step": 72870 + }, + { + "epoch": 3.4, + "learning_rate": 8.737949304782657e-06, + "loss": 0.0993, + "step": 72875 + }, + { + "epoch": 3.4, + "learning_rate": 8.73716551972787e-06, + "loss": 0.1759, + "step": 72880 + }, + { + "epoch": 3.4, + "learning_rate": 8.736381734673084e-06, + "loss": 0.1792, + "step": 72885 + }, + { + "epoch": 3.4, + "learning_rate": 8.735597949618297e-06, + "loss": 0.2136, + "step": 72890 + }, + { + "epoch": 3.4, + "learning_rate": 8.73481416456351e-06, + "loss": 0.1858, + "step": 72895 + }, + { + "epoch": 3.4, + "learning_rate": 8.734030379508725e-06, + "loss": 0.0937, + "step": 72900 + }, + { + "epoch": 3.4, + "learning_rate": 8.733246594453938e-06, + "loss": 0.0136, + "step": 72905 + }, + { + "epoch": 3.4, + "learning_rate": 8.73246280939915e-06, + "loss": 0.0673, + "step": 72910 + }, + { + "epoch": 3.4, + "learning_rate": 8.731679024344365e-06, + "loss": 0.0488, + "step": 72915 + }, + { + "epoch": 3.4, + "learning_rate": 8.730895239289578e-06, + "loss": 0.0564, + "step": 72920 + }, + { + "epoch": 3.4, + "learning_rate": 8.73011145423479e-06, + "loss": 0.158, + "step": 72925 + }, + { + "epoch": 3.4, + "learning_rate": 8.729327669180005e-06, + "loss": 0.1729, + "step": 72930 + }, + { + "epoch": 3.4, + "learning_rate": 8.728543884125218e-06, + "loss": 0.1008, + "step": 72935 + }, + { + "epoch": 3.4, + "learning_rate": 8.72776009907043e-06, + "loss": 0.1283, + "step": 72940 + }, + { + "epoch": 3.4, + "learning_rate": 8.726976314015645e-06, + "loss": 0.4005, + "step": 72945 + }, + { + "epoch": 3.4, + "learning_rate": 8.726192528960858e-06, + "loss": 0.0513, + "step": 72950 + }, + { + "epoch": 3.4, + "learning_rate": 8.725408743906072e-06, + "loss": 0.0246, + "step": 72955 + }, + { + "epoch": 3.4, + "learning_rate": 8.724624958851286e-06, + "loss": 0.0681, + "step": 72960 + }, + { + "epoch": 3.4, + "learning_rate": 8.723841173796499e-06, + "loss": 0.0764, + "step": 72965 + }, + { + "epoch": 3.4, + "learning_rate": 8.723057388741712e-06, + "loss": 0.0384, + "step": 72970 + }, + { + "epoch": 3.41, + "learning_rate": 8.722273603686926e-06, + "loss": 0.0427, + "step": 72975 + }, + { + "epoch": 3.41, + "learning_rate": 8.72148981863214e-06, + "loss": 0.1383, + "step": 72980 + }, + { + "epoch": 3.41, + "learning_rate": 8.720706033577352e-06, + "loss": 0.1134, + "step": 72985 + }, + { + "epoch": 3.41, + "learning_rate": 8.719922248522565e-06, + "loss": 0.1788, + "step": 72990 + }, + { + "epoch": 3.41, + "learning_rate": 8.719138463467779e-06, + "loss": 0.1881, + "step": 72995 + }, + { + "epoch": 3.41, + "learning_rate": 8.718354678412992e-06, + "loss": 0.0662, + "step": 73000 + }, + { + "epoch": 3.41, + "learning_rate": 8.717570893358206e-06, + "loss": 0.0656, + "step": 73005 + }, + { + "epoch": 3.41, + "learning_rate": 8.71678710830342e-06, + "loss": 0.091, + "step": 73010 + }, + { + "epoch": 3.41, + "learning_rate": 8.716003323248632e-06, + "loss": 0.0671, + "step": 73015 + }, + { + "epoch": 3.41, + "learning_rate": 8.715219538193846e-06, + "loss": 0.0417, + "step": 73020 + }, + { + "epoch": 3.41, + "learning_rate": 8.71443575313906e-06, + "loss": 0.1163, + "step": 73025 + }, + { + "epoch": 3.41, + "learning_rate": 8.713651968084274e-06, + "loss": 0.0666, + "step": 73030 + }, + { + "epoch": 3.41, + "learning_rate": 8.712868183029486e-06, + "loss": 0.128, + "step": 73035 + }, + { + "epoch": 3.41, + "learning_rate": 8.7120843979747e-06, + "loss": 0.1942, + "step": 73040 + }, + { + "epoch": 3.41, + "learning_rate": 8.711300612919914e-06, + "loss": 0.2439, + "step": 73045 + }, + { + "epoch": 3.41, + "learning_rate": 8.710516827865128e-06, + "loss": 0.067, + "step": 73050 + }, + { + "epoch": 3.41, + "learning_rate": 8.70973304281034e-06, + "loss": 0.0163, + "step": 73055 + }, + { + "epoch": 3.41, + "learning_rate": 8.708949257755554e-06, + "loss": 0.0617, + "step": 73060 + }, + { + "epoch": 3.41, + "learning_rate": 8.708165472700766e-06, + "loss": 0.0927, + "step": 73065 + }, + { + "epoch": 3.41, + "learning_rate": 8.70738168764598e-06, + "loss": 0.0931, + "step": 73070 + }, + { + "epoch": 3.41, + "learning_rate": 8.706597902591194e-06, + "loss": 0.0455, + "step": 73075 + }, + { + "epoch": 3.41, + "learning_rate": 8.705814117536408e-06, + "loss": 0.1762, + "step": 73080 + }, + { + "epoch": 3.41, + "learning_rate": 8.70503033248162e-06, + "loss": 0.1451, + "step": 73085 + }, + { + "epoch": 3.41, + "learning_rate": 8.704246547426834e-06, + "loss": 0.1864, + "step": 73090 + }, + { + "epoch": 3.41, + "learning_rate": 8.703462762372048e-06, + "loss": 0.2506, + "step": 73095 + }, + { + "epoch": 3.41, + "learning_rate": 8.702678977317262e-06, + "loss": 0.136, + "step": 73100 + }, + { + "epoch": 3.41, + "learning_rate": 8.701895192262474e-06, + "loss": 0.0409, + "step": 73105 + }, + { + "epoch": 3.41, + "learning_rate": 8.701111407207688e-06, + "loss": 0.0228, + "step": 73110 + }, + { + "epoch": 3.41, + "learning_rate": 8.700327622152902e-06, + "loss": 0.0804, + "step": 73115 + }, + { + "epoch": 3.41, + "learning_rate": 8.699543837098114e-06, + "loss": 0.0498, + "step": 73120 + }, + { + "epoch": 3.41, + "learning_rate": 8.698916809054285e-06, + "loss": 0.0897, + "step": 73125 + }, + { + "epoch": 3.41, + "learning_rate": 8.698133023999499e-06, + "loss": 0.0444, + "step": 73130 + }, + { + "epoch": 3.41, + "learning_rate": 8.697349238944713e-06, + "loss": 0.1355, + "step": 73135 + }, + { + "epoch": 3.41, + "learning_rate": 8.696565453889927e-06, + "loss": 0.1694, + "step": 73140 + }, + { + "epoch": 3.41, + "learning_rate": 8.695781668835139e-06, + "loss": 0.2544, + "step": 73145 + }, + { + "epoch": 3.41, + "learning_rate": 8.694997883780353e-06, + "loss": 0.0608, + "step": 73150 + }, + { + "epoch": 3.41, + "learning_rate": 8.694214098725567e-06, + "loss": 0.0813, + "step": 73155 + }, + { + "epoch": 3.41, + "learning_rate": 8.69343031367078e-06, + "loss": 0.049, + "step": 73160 + }, + { + "epoch": 3.41, + "learning_rate": 8.692646528615993e-06, + "loss": 0.0249, + "step": 73165 + }, + { + "epoch": 3.41, + "learning_rate": 8.691862743561207e-06, + "loss": 0.0771, + "step": 73170 + }, + { + "epoch": 3.41, + "learning_rate": 8.69107895850642e-06, + "loss": 0.1589, + "step": 73175 + }, + { + "epoch": 3.41, + "learning_rate": 8.690295173451634e-06, + "loss": 0.0805, + "step": 73180 + }, + { + "epoch": 3.41, + "learning_rate": 8.689511388396847e-06, + "loss": 0.1952, + "step": 73185 + }, + { + "epoch": 3.42, + "learning_rate": 8.68872760334206e-06, + "loss": 0.2552, + "step": 73190 + }, + { + "epoch": 3.42, + "learning_rate": 8.687943818287273e-06, + "loss": 0.2266, + "step": 73195 + }, + { + "epoch": 3.42, + "learning_rate": 8.687160033232487e-06, + "loss": 0.0267, + "step": 73200 + }, + { + "epoch": 3.42, + "learning_rate": 8.6863762481777e-06, + "loss": 0.0572, + "step": 73205 + }, + { + "epoch": 3.42, + "learning_rate": 8.685592463122914e-06, + "loss": 0.0195, + "step": 73210 + }, + { + "epoch": 3.42, + "learning_rate": 8.684808678068127e-06, + "loss": 0.1335, + "step": 73215 + }, + { + "epoch": 3.42, + "learning_rate": 8.68402489301334e-06, + "loss": 0.0398, + "step": 73220 + }, + { + "epoch": 3.42, + "learning_rate": 8.683241107958554e-06, + "loss": 0.2702, + "step": 73225 + }, + { + "epoch": 3.42, + "learning_rate": 8.682457322903768e-06, + "loss": 0.1329, + "step": 73230 + }, + { + "epoch": 3.42, + "learning_rate": 8.68167353784898e-06, + "loss": 0.1341, + "step": 73235 + }, + { + "epoch": 3.42, + "learning_rate": 8.680889752794194e-06, + "loss": 0.1694, + "step": 73240 + }, + { + "epoch": 3.42, + "learning_rate": 8.680105967739408e-06, + "loss": 0.3827, + "step": 73245 + }, + { + "epoch": 3.42, + "learning_rate": 8.67932218268462e-06, + "loss": 0.0979, + "step": 73250 + }, + { + "epoch": 3.42, + "learning_rate": 8.678538397629834e-06, + "loss": 0.0562, + "step": 73255 + }, + { + "epoch": 3.42, + "learning_rate": 8.677754612575048e-06, + "loss": 0.0399, + "step": 73260 + }, + { + "epoch": 3.42, + "learning_rate": 8.67697082752026e-06, + "loss": 0.0307, + "step": 73265 + }, + { + "epoch": 3.42, + "learning_rate": 8.676187042465475e-06, + "loss": 0.0571, + "step": 73270 + }, + { + "epoch": 3.42, + "learning_rate": 8.675403257410688e-06, + "loss": 0.0912, + "step": 73275 + }, + { + "epoch": 3.42, + "learning_rate": 8.674619472355902e-06, + "loss": 0.0733, + "step": 73280 + }, + { + "epoch": 3.42, + "learning_rate": 8.673835687301115e-06, + "loss": 0.0705, + "step": 73285 + }, + { + "epoch": 3.42, + "learning_rate": 8.673051902246328e-06, + "loss": 0.1932, + "step": 73290 + }, + { + "epoch": 3.42, + "learning_rate": 8.672268117191542e-06, + "loss": 0.2412, + "step": 73295 + }, + { + "epoch": 3.42, + "learning_rate": 8.671484332136756e-06, + "loss": 0.1248, + "step": 73300 + }, + { + "epoch": 3.42, + "learning_rate": 8.67070054708197e-06, + "loss": 0.0374, + "step": 73305 + }, + { + "epoch": 3.42, + "learning_rate": 8.669916762027182e-06, + "loss": 0.0487, + "step": 73310 + }, + { + "epoch": 3.42, + "learning_rate": 8.669132976972395e-06, + "loss": 0.087, + "step": 73315 + }, + { + "epoch": 3.42, + "learning_rate": 8.668349191917608e-06, + "loss": 0.0897, + "step": 73320 + }, + { + "epoch": 3.42, + "learning_rate": 8.667565406862822e-06, + "loss": 0.092, + "step": 73325 + }, + { + "epoch": 3.42, + "learning_rate": 8.666781621808036e-06, + "loss": 0.1125, + "step": 73330 + }, + { + "epoch": 3.42, + "learning_rate": 8.665997836753249e-06, + "loss": 0.1317, + "step": 73335 + }, + { + "epoch": 3.42, + "learning_rate": 8.665214051698462e-06, + "loss": 0.2335, + "step": 73340 + }, + { + "epoch": 3.42, + "learning_rate": 8.664430266643676e-06, + "loss": 0.2869, + "step": 73345 + }, + { + "epoch": 3.42, + "learning_rate": 8.66364648158889e-06, + "loss": 0.0941, + "step": 73350 + }, + { + "epoch": 3.42, + "learning_rate": 8.662862696534104e-06, + "loss": 0.0136, + "step": 73355 + }, + { + "epoch": 3.42, + "learning_rate": 8.662078911479316e-06, + "loss": 0.065, + "step": 73360 + }, + { + "epoch": 3.42, + "learning_rate": 8.66129512642453e-06, + "loss": 0.0897, + "step": 73365 + }, + { + "epoch": 3.42, + "learning_rate": 8.660511341369744e-06, + "loss": 0.0441, + "step": 73370 + }, + { + "epoch": 3.42, + "learning_rate": 8.659727556314958e-06, + "loss": 0.0534, + "step": 73375 + }, + { + "epoch": 3.42, + "learning_rate": 8.65894377126017e-06, + "loss": 0.147, + "step": 73380 + }, + { + "epoch": 3.42, + "learning_rate": 8.658159986205384e-06, + "loss": 0.0851, + "step": 73385 + }, + { + "epoch": 3.42, + "learning_rate": 8.657376201150596e-06, + "loss": 0.1515, + "step": 73390 + }, + { + "epoch": 3.42, + "learning_rate": 8.65659241609581e-06, + "loss": 0.272, + "step": 73395 + }, + { + "epoch": 3.42, + "learning_rate": 8.655808631041024e-06, + "loss": 0.0938, + "step": 73400 + }, + { + "epoch": 3.43, + "learning_rate": 8.655024845986238e-06, + "loss": 0.0485, + "step": 73405 + }, + { + "epoch": 3.43, + "learning_rate": 8.65424106093145e-06, + "loss": 0.0487, + "step": 73410 + }, + { + "epoch": 3.43, + "learning_rate": 8.653457275876664e-06, + "loss": 0.024, + "step": 73415 + }, + { + "epoch": 3.43, + "learning_rate": 8.652673490821878e-06, + "loss": 0.0974, + "step": 73420 + }, + { + "epoch": 3.43, + "learning_rate": 8.651889705767092e-06, + "loss": 0.111, + "step": 73425 + }, + { + "epoch": 3.43, + "learning_rate": 8.651105920712304e-06, + "loss": 0.088, + "step": 73430 + }, + { + "epoch": 3.43, + "learning_rate": 8.650322135657518e-06, + "loss": 0.1132, + "step": 73435 + }, + { + "epoch": 3.43, + "learning_rate": 8.649538350602732e-06, + "loss": 0.1547, + "step": 73440 + }, + { + "epoch": 3.43, + "learning_rate": 8.648754565547944e-06, + "loss": 0.1941, + "step": 73445 + }, + { + "epoch": 3.43, + "learning_rate": 8.647970780493158e-06, + "loss": 0.0833, + "step": 73450 + }, + { + "epoch": 3.43, + "learning_rate": 8.647186995438372e-06, + "loss": 0.028, + "step": 73455 + }, + { + "epoch": 3.43, + "learning_rate": 8.646403210383584e-06, + "loss": 0.0611, + "step": 73460 + }, + { + "epoch": 3.43, + "learning_rate": 8.645619425328798e-06, + "loss": 0.0603, + "step": 73465 + }, + { + "epoch": 3.43, + "learning_rate": 8.644835640274012e-06, + "loss": 0.0568, + "step": 73470 + }, + { + "epoch": 3.43, + "learning_rate": 8.644051855219226e-06, + "loss": 0.0533, + "step": 73475 + }, + { + "epoch": 3.43, + "learning_rate": 8.643268070164438e-06, + "loss": 0.0709, + "step": 73480 + }, + { + "epoch": 3.43, + "learning_rate": 8.642484285109652e-06, + "loss": 0.0942, + "step": 73485 + }, + { + "epoch": 3.43, + "learning_rate": 8.641700500054866e-06, + "loss": 0.1067, + "step": 73490 + }, + { + "epoch": 3.43, + "learning_rate": 8.64091671500008e-06, + "loss": 0.2943, + "step": 73495 + }, + { + "epoch": 3.43, + "learning_rate": 8.640132929945292e-06, + "loss": 0.0612, + "step": 73500 + }, + { + "epoch": 3.43, + "learning_rate": 8.639349144890506e-06, + "loss": 0.041, + "step": 73505 + }, + { + "epoch": 3.43, + "learning_rate": 8.638565359835718e-06, + "loss": 0.0527, + "step": 73510 + }, + { + "epoch": 3.43, + "learning_rate": 8.637781574780932e-06, + "loss": 0.0718, + "step": 73515 + }, + { + "epoch": 3.43, + "learning_rate": 8.636997789726146e-06, + "loss": 0.1534, + "step": 73520 + }, + { + "epoch": 3.43, + "learning_rate": 8.63621400467136e-06, + "loss": 0.0413, + "step": 73525 + }, + { + "epoch": 3.43, + "learning_rate": 8.635430219616572e-06, + "loss": 0.1293, + "step": 73530 + }, + { + "epoch": 3.43, + "learning_rate": 8.634646434561786e-06, + "loss": 0.0855, + "step": 73535 + }, + { + "epoch": 3.43, + "learning_rate": 8.633862649507e-06, + "loss": 0.2193, + "step": 73540 + }, + { + "epoch": 3.43, + "learning_rate": 8.633078864452214e-06, + "loss": 0.3822, + "step": 73545 + }, + { + "epoch": 3.43, + "learning_rate": 8.632295079397426e-06, + "loss": 0.0456, + "step": 73550 + }, + { + "epoch": 3.43, + "learning_rate": 8.63151129434264e-06, + "loss": 0.0161, + "step": 73555 + }, + { + "epoch": 3.43, + "learning_rate": 8.630727509287854e-06, + "loss": 0.054, + "step": 73560 + }, + { + "epoch": 3.43, + "learning_rate": 8.629943724233068e-06, + "loss": 0.1103, + "step": 73565 + }, + { + "epoch": 3.43, + "learning_rate": 8.629159939178282e-06, + "loss": 0.0462, + "step": 73570 + }, + { + "epoch": 3.43, + "learning_rate": 8.628376154123494e-06, + "loss": 0.1141, + "step": 73575 + }, + { + "epoch": 3.43, + "learning_rate": 8.627592369068706e-06, + "loss": 0.1126, + "step": 73580 + }, + { + "epoch": 3.43, + "learning_rate": 8.62680858401392e-06, + "loss": 0.0803, + "step": 73585 + }, + { + "epoch": 3.43, + "learning_rate": 8.626024798959134e-06, + "loss": 0.1266, + "step": 73590 + }, + { + "epoch": 3.43, + "learning_rate": 8.625241013904348e-06, + "loss": 0.2645, + "step": 73595 + }, + { + "epoch": 3.43, + "learning_rate": 8.62445722884956e-06, + "loss": 0.09, + "step": 73600 + }, + { + "epoch": 3.43, + "learning_rate": 8.623673443794774e-06, + "loss": 0.0246, + "step": 73605 + }, + { + "epoch": 3.43, + "learning_rate": 8.622889658739988e-06, + "loss": 0.0233, + "step": 73610 + }, + { + "epoch": 3.43, + "learning_rate": 8.622105873685202e-06, + "loss": 0.037, + "step": 73615 + }, + { + "epoch": 3.44, + "learning_rate": 8.621322088630416e-06, + "loss": 0.0779, + "step": 73620 + }, + { + "epoch": 3.44, + "learning_rate": 8.620538303575628e-06, + "loss": 0.0583, + "step": 73625 + }, + { + "epoch": 3.44, + "learning_rate": 8.619754518520842e-06, + "loss": 0.1562, + "step": 73630 + }, + { + "epoch": 3.44, + "learning_rate": 8.618970733466056e-06, + "loss": 0.084, + "step": 73635 + }, + { + "epoch": 3.44, + "learning_rate": 8.618186948411268e-06, + "loss": 0.2324, + "step": 73640 + }, + { + "epoch": 3.44, + "learning_rate": 8.617403163356482e-06, + "loss": 0.3043, + "step": 73645 + }, + { + "epoch": 3.44, + "learning_rate": 8.616619378301696e-06, + "loss": 0.1044, + "step": 73650 + }, + { + "epoch": 3.44, + "learning_rate": 8.615835593246908e-06, + "loss": 0.0041, + "step": 73655 + }, + { + "epoch": 3.44, + "learning_rate": 8.615051808192122e-06, + "loss": 0.0606, + "step": 73660 + }, + { + "epoch": 3.44, + "learning_rate": 8.614268023137336e-06, + "loss": 0.0305, + "step": 73665 + }, + { + "epoch": 3.44, + "learning_rate": 8.61348423808255e-06, + "loss": 0.1222, + "step": 73670 + }, + { + "epoch": 3.44, + "learning_rate": 8.612700453027762e-06, + "loss": 0.0978, + "step": 73675 + }, + { + "epoch": 3.44, + "learning_rate": 8.611916667972976e-06, + "loss": 0.105, + "step": 73680 + }, + { + "epoch": 3.44, + "learning_rate": 8.61113288291819e-06, + "loss": 0.0781, + "step": 73685 + }, + { + "epoch": 3.44, + "learning_rate": 8.610349097863404e-06, + "loss": 0.196, + "step": 73690 + }, + { + "epoch": 3.44, + "learning_rate": 8.609565312808616e-06, + "loss": 0.2442, + "step": 73695 + }, + { + "epoch": 3.44, + "learning_rate": 8.60878152775383e-06, + "loss": 0.105, + "step": 73700 + }, + { + "epoch": 3.44, + "learning_rate": 8.607997742699042e-06, + "loss": 0.0184, + "step": 73705 + }, + { + "epoch": 3.44, + "learning_rate": 8.607213957644256e-06, + "loss": 0.0643, + "step": 73710 + }, + { + "epoch": 3.44, + "learning_rate": 8.60643017258947e-06, + "loss": 0.0301, + "step": 73715 + }, + { + "epoch": 3.44, + "learning_rate": 8.605646387534684e-06, + "loss": 0.0908, + "step": 73720 + }, + { + "epoch": 3.44, + "learning_rate": 8.604862602479896e-06, + "loss": 0.057, + "step": 73725 + }, + { + "epoch": 3.44, + "learning_rate": 8.60407881742511e-06, + "loss": 0.0879, + "step": 73730 + }, + { + "epoch": 3.44, + "learning_rate": 8.603295032370324e-06, + "loss": 0.1638, + "step": 73735 + }, + { + "epoch": 3.44, + "learning_rate": 8.602511247315538e-06, + "loss": 0.1459, + "step": 73740 + }, + { + "epoch": 3.44, + "learning_rate": 8.60172746226075e-06, + "loss": 0.3662, + "step": 73745 + }, + { + "epoch": 3.44, + "learning_rate": 8.600943677205964e-06, + "loss": 0.1232, + "step": 73750 + }, + { + "epoch": 3.44, + "learning_rate": 8.600159892151178e-06, + "loss": 0.0439, + "step": 73755 + }, + { + "epoch": 3.44, + "learning_rate": 8.599376107096392e-06, + "loss": 0.0612, + "step": 73760 + }, + { + "epoch": 3.44, + "learning_rate": 8.598592322041604e-06, + "loss": 0.0793, + "step": 73765 + }, + { + "epoch": 3.44, + "learning_rate": 8.597808536986818e-06, + "loss": 0.1031, + "step": 73770 + }, + { + "epoch": 3.44, + "learning_rate": 8.59702475193203e-06, + "loss": 0.1145, + "step": 73775 + }, + { + "epoch": 3.44, + "learning_rate": 8.596240966877244e-06, + "loss": 0.0756, + "step": 73780 + }, + { + "epoch": 3.44, + "learning_rate": 8.595457181822458e-06, + "loss": 0.1199, + "step": 73785 + }, + { + "epoch": 3.44, + "learning_rate": 8.594673396767672e-06, + "loss": 0.1422, + "step": 73790 + }, + { + "epoch": 3.44, + "learning_rate": 8.593889611712884e-06, + "loss": 0.2619, + "step": 73795 + }, + { + "epoch": 3.44, + "learning_rate": 8.593105826658098e-06, + "loss": 0.0832, + "step": 73800 + }, + { + "epoch": 3.44, + "learning_rate": 8.592322041603312e-06, + "loss": 0.0297, + "step": 73805 + }, + { + "epoch": 3.44, + "learning_rate": 8.591538256548526e-06, + "loss": 0.0311, + "step": 73810 + }, + { + "epoch": 3.44, + "learning_rate": 8.590754471493738e-06, + "loss": 0.0356, + "step": 73815 + }, + { + "epoch": 3.44, + "learning_rate": 8.589970686438952e-06, + "loss": 0.1395, + "step": 73820 + }, + { + "epoch": 3.44, + "learning_rate": 8.589186901384166e-06, + "loss": 0.321, + "step": 73825 + }, + { + "epoch": 3.45, + "learning_rate": 8.58840311632938e-06, + "loss": 0.1941, + "step": 73830 + }, + { + "epoch": 3.45, + "learning_rate": 8.587619331274592e-06, + "loss": 0.1362, + "step": 73835 + }, + { + "epoch": 3.45, + "learning_rate": 8.586835546219806e-06, + "loss": 0.1958, + "step": 73840 + }, + { + "epoch": 3.45, + "learning_rate": 8.586051761165018e-06, + "loss": 0.3383, + "step": 73845 + }, + { + "epoch": 3.45, + "learning_rate": 8.585267976110232e-06, + "loss": 0.0636, + "step": 73850 + }, + { + "epoch": 3.45, + "learning_rate": 8.584484191055446e-06, + "loss": 0.041, + "step": 73855 + }, + { + "epoch": 3.45, + "learning_rate": 8.58370040600066e-06, + "loss": 0.0478, + "step": 73860 + }, + { + "epoch": 3.45, + "learning_rate": 8.582916620945872e-06, + "loss": 0.0538, + "step": 73865 + }, + { + "epoch": 3.45, + "learning_rate": 8.582132835891086e-06, + "loss": 0.1034, + "step": 73870 + }, + { + "epoch": 3.45, + "learning_rate": 8.5813490508363e-06, + "loss": 0.1014, + "step": 73875 + }, + { + "epoch": 3.45, + "learning_rate": 8.580565265781513e-06, + "loss": 0.039, + "step": 73880 + }, + { + "epoch": 3.45, + "learning_rate": 8.579781480726727e-06, + "loss": 0.2059, + "step": 73885 + }, + { + "epoch": 3.45, + "learning_rate": 8.57899769567194e-06, + "loss": 0.2326, + "step": 73890 + }, + { + "epoch": 3.45, + "learning_rate": 8.578213910617153e-06, + "loss": 0.2216, + "step": 73895 + }, + { + "epoch": 3.45, + "learning_rate": 8.577430125562366e-06, + "loss": 0.0655, + "step": 73900 + }, + { + "epoch": 3.45, + "learning_rate": 8.57664634050758e-06, + "loss": 0.0306, + "step": 73905 + }, + { + "epoch": 3.45, + "learning_rate": 8.575862555452793e-06, + "loss": 0.041, + "step": 73910 + }, + { + "epoch": 3.45, + "learning_rate": 8.575078770398006e-06, + "loss": 0.0813, + "step": 73915 + }, + { + "epoch": 3.45, + "learning_rate": 8.57429498534322e-06, + "loss": 0.0851, + "step": 73920 + }, + { + "epoch": 3.45, + "learning_rate": 8.573511200288433e-06, + "loss": 0.0716, + "step": 73925 + }, + { + "epoch": 3.45, + "learning_rate": 8.572727415233647e-06, + "loss": 0.1528, + "step": 73930 + }, + { + "epoch": 3.45, + "learning_rate": 8.571943630178861e-06, + "loss": 0.0754, + "step": 73935 + }, + { + "epoch": 3.45, + "learning_rate": 8.571159845124074e-06, + "loss": 0.3233, + "step": 73940 + }, + { + "epoch": 3.45, + "learning_rate": 8.570376060069287e-06, + "loss": 0.2341, + "step": 73945 + }, + { + "epoch": 3.45, + "learning_rate": 8.569592275014501e-06, + "loss": 0.0771, + "step": 73950 + }, + { + "epoch": 3.45, + "learning_rate": 8.568808489959715e-06, + "loss": 0.0268, + "step": 73955 + }, + { + "epoch": 3.45, + "learning_rate": 8.568024704904927e-06, + "loss": 0.0653, + "step": 73960 + }, + { + "epoch": 3.45, + "learning_rate": 8.567240919850141e-06, + "loss": 0.0982, + "step": 73965 + }, + { + "epoch": 3.45, + "learning_rate": 8.566457134795354e-06, + "loss": 0.0533, + "step": 73970 + }, + { + "epoch": 3.45, + "learning_rate": 8.565673349740567e-06, + "loss": 0.0812, + "step": 73975 + }, + { + "epoch": 3.45, + "learning_rate": 8.564889564685781e-06, + "loss": 0.1737, + "step": 73980 + }, + { + "epoch": 3.45, + "learning_rate": 8.564105779630995e-06, + "loss": 0.1171, + "step": 73985 + }, + { + "epoch": 3.45, + "learning_rate": 8.563321994576207e-06, + "loss": 0.2456, + "step": 73990 + }, + { + "epoch": 3.45, + "learning_rate": 8.562538209521421e-06, + "loss": 0.2324, + "step": 73995 + }, + { + "epoch": 3.45, + "learning_rate": 8.561754424466635e-06, + "loss": 0.0594, + "step": 74000 + }, + { + "epoch": 3.45, + "learning_rate": 8.56097063941185e-06, + "loss": 0.0404, + "step": 74005 + }, + { + "epoch": 3.45, + "learning_rate": 8.560186854357061e-06, + "loss": 0.0326, + "step": 74010 + }, + { + "epoch": 3.45, + "learning_rate": 8.559403069302275e-06, + "loss": 0.0492, + "step": 74015 + }, + { + "epoch": 3.45, + "learning_rate": 8.55861928424749e-06, + "loss": 0.0784, + "step": 74020 + }, + { + "epoch": 3.45, + "learning_rate": 8.557835499192703e-06, + "loss": 0.1076, + "step": 74025 + }, + { + "epoch": 3.45, + "learning_rate": 8.557051714137915e-06, + "loss": 0.122, + "step": 74030 + }, + { + "epoch": 3.45, + "learning_rate": 8.55626792908313e-06, + "loss": 0.1381, + "step": 74035 + }, + { + "epoch": 3.45, + "learning_rate": 8.555484144028341e-06, + "loss": 0.1295, + "step": 74040 + }, + { + "epoch": 3.46, + "learning_rate": 8.554700358973555e-06, + "loss": 0.3292, + "step": 74045 + }, + { + "epoch": 3.46, + "learning_rate": 8.55391657391877e-06, + "loss": 0.0633, + "step": 74050 + }, + { + "epoch": 3.46, + "learning_rate": 8.553132788863983e-06, + "loss": 0.039, + "step": 74055 + }, + { + "epoch": 3.46, + "learning_rate": 8.552349003809195e-06, + "loss": 0.031, + "step": 74060 + }, + { + "epoch": 3.46, + "learning_rate": 8.55156521875441e-06, + "loss": 0.1121, + "step": 74065 + }, + { + "epoch": 3.46, + "learning_rate": 8.550781433699623e-06, + "loss": 0.0995, + "step": 74070 + }, + { + "epoch": 3.46, + "learning_rate": 8.549997648644837e-06, + "loss": 0.0764, + "step": 74075 + }, + { + "epoch": 3.46, + "learning_rate": 8.54921386359005e-06, + "loss": 0.1044, + "step": 74080 + }, + { + "epoch": 3.46, + "learning_rate": 8.548430078535263e-06, + "loss": 0.072, + "step": 74085 + }, + { + "epoch": 3.46, + "learning_rate": 8.547646293480477e-06, + "loss": 0.2137, + "step": 74090 + }, + { + "epoch": 3.46, + "learning_rate": 8.54686250842569e-06, + "loss": 0.2968, + "step": 74095 + }, + { + "epoch": 3.46, + "learning_rate": 8.546078723370903e-06, + "loss": 0.0581, + "step": 74100 + }, + { + "epoch": 3.46, + "learning_rate": 8.545294938316117e-06, + "loss": 0.0139, + "step": 74105 + }, + { + "epoch": 3.46, + "learning_rate": 8.54451115326133e-06, + "loss": 0.032, + "step": 74110 + }, + { + "epoch": 3.46, + "learning_rate": 8.543727368206543e-06, + "loss": 0.1237, + "step": 74115 + }, + { + "epoch": 3.46, + "learning_rate": 8.542943583151757e-06, + "loss": 0.0868, + "step": 74120 + }, + { + "epoch": 3.46, + "learning_rate": 8.542159798096971e-06, + "loss": 0.0434, + "step": 74125 + }, + { + "epoch": 3.46, + "learning_rate": 8.541376013042183e-06, + "loss": 0.0577, + "step": 74130 + }, + { + "epoch": 3.46, + "learning_rate": 8.540592227987397e-06, + "loss": 0.1186, + "step": 74135 + }, + { + "epoch": 3.46, + "learning_rate": 8.539808442932611e-06, + "loss": 0.2465, + "step": 74140 + }, + { + "epoch": 3.46, + "learning_rate": 8.539024657877825e-06, + "loss": 0.4115, + "step": 74145 + }, + { + "epoch": 3.46, + "learning_rate": 8.538240872823039e-06, + "loss": 0.0344, + "step": 74150 + }, + { + "epoch": 3.46, + "learning_rate": 8.537457087768251e-06, + "loss": 0.0311, + "step": 74155 + }, + { + "epoch": 3.46, + "learning_rate": 8.536673302713463e-06, + "loss": 0.0506, + "step": 74160 + }, + { + "epoch": 3.46, + "learning_rate": 8.535889517658677e-06, + "loss": 0.1081, + "step": 74165 + }, + { + "epoch": 3.46, + "learning_rate": 8.535105732603891e-06, + "loss": 0.0612, + "step": 74170 + }, + { + "epoch": 3.46, + "learning_rate": 8.534321947549105e-06, + "loss": 0.0644, + "step": 74175 + }, + { + "epoch": 3.46, + "learning_rate": 8.533538162494317e-06, + "loss": 0.0716, + "step": 74180 + }, + { + "epoch": 3.46, + "learning_rate": 8.532754377439531e-06, + "loss": 0.219, + "step": 74185 + }, + { + "epoch": 3.46, + "learning_rate": 8.531970592384745e-06, + "loss": 0.1413, + "step": 74190 + }, + { + "epoch": 3.46, + "learning_rate": 8.531186807329959e-06, + "loss": 0.2697, + "step": 74195 + }, + { + "epoch": 3.46, + "learning_rate": 8.530403022275173e-06, + "loss": 0.1026, + "step": 74200 + }, + { + "epoch": 3.46, + "learning_rate": 8.529619237220385e-06, + "loss": 0.1033, + "step": 74205 + }, + { + "epoch": 3.46, + "learning_rate": 8.528835452165599e-06, + "loss": 0.0327, + "step": 74210 + }, + { + "epoch": 3.46, + "learning_rate": 8.528051667110813e-06, + "loss": 0.0793, + "step": 74215 + }, + { + "epoch": 3.46, + "learning_rate": 8.527267882056027e-06, + "loss": 0.0608, + "step": 74220 + }, + { + "epoch": 3.46, + "learning_rate": 8.526484097001239e-06, + "loss": 0.1283, + "step": 74225 + }, + { + "epoch": 3.46, + "learning_rate": 8.525700311946453e-06, + "loss": 0.1363, + "step": 74230 + }, + { + "epoch": 3.46, + "learning_rate": 8.524916526891665e-06, + "loss": 0.1228, + "step": 74235 + }, + { + "epoch": 3.46, + "learning_rate": 8.524132741836879e-06, + "loss": 0.1047, + "step": 74240 + }, + { + "epoch": 3.46, + "learning_rate": 8.523348956782093e-06, + "loss": 0.3583, + "step": 74245 + }, + { + "epoch": 3.46, + "learning_rate": 8.522565171727307e-06, + "loss": 0.0644, + "step": 74250 + }, + { + "epoch": 3.46, + "learning_rate": 8.521781386672519e-06, + "loss": 0.0303, + "step": 74255 + }, + { + "epoch": 3.47, + "learning_rate": 8.520997601617733e-06, + "loss": 0.0464, + "step": 74260 + }, + { + "epoch": 3.47, + "learning_rate": 8.520213816562947e-06, + "loss": 0.0405, + "step": 74265 + }, + { + "epoch": 3.47, + "learning_rate": 8.51943003150816e-06, + "loss": 0.0766, + "step": 74270 + }, + { + "epoch": 3.47, + "learning_rate": 8.518646246453373e-06, + "loss": 0.0909, + "step": 74275 + }, + { + "epoch": 3.47, + "learning_rate": 8.517862461398587e-06, + "loss": 0.0987, + "step": 74280 + }, + { + "epoch": 3.47, + "learning_rate": 8.5170786763438e-06, + "loss": 0.0941, + "step": 74285 + }, + { + "epoch": 3.47, + "learning_rate": 8.516294891289013e-06, + "loss": 0.1221, + "step": 74290 + }, + { + "epoch": 3.47, + "learning_rate": 8.515511106234227e-06, + "loss": 0.2891, + "step": 74295 + }, + { + "epoch": 3.47, + "learning_rate": 8.51472732117944e-06, + "loss": 0.104, + "step": 74300 + }, + { + "epoch": 3.47, + "learning_rate": 8.513943536124653e-06, + "loss": 0.0496, + "step": 74305 + }, + { + "epoch": 3.47, + "learning_rate": 8.513159751069867e-06, + "loss": 0.0794, + "step": 74310 + }, + { + "epoch": 3.47, + "learning_rate": 8.512375966015081e-06, + "loss": 0.1103, + "step": 74315 + }, + { + "epoch": 3.47, + "learning_rate": 8.511592180960295e-06, + "loss": 0.0626, + "step": 74320 + }, + { + "epoch": 3.47, + "learning_rate": 8.510808395905507e-06, + "loss": 0.1357, + "step": 74325 + }, + { + "epoch": 3.47, + "learning_rate": 8.510024610850721e-06, + "loss": 0.1465, + "step": 74330 + }, + { + "epoch": 3.47, + "learning_rate": 8.509240825795935e-06, + "loss": 0.1323, + "step": 74335 + }, + { + "epoch": 3.47, + "learning_rate": 8.508457040741149e-06, + "loss": 0.2311, + "step": 74340 + }, + { + "epoch": 3.47, + "learning_rate": 8.507673255686361e-06, + "loss": 0.3059, + "step": 74345 + }, + { + "epoch": 3.47, + "learning_rate": 8.506889470631575e-06, + "loss": 0.0809, + "step": 74350 + }, + { + "epoch": 3.47, + "learning_rate": 8.506105685576787e-06, + "loss": 0.0233, + "step": 74355 + }, + { + "epoch": 3.47, + "learning_rate": 8.505321900522001e-06, + "loss": 0.0304, + "step": 74360 + }, + { + "epoch": 3.47, + "learning_rate": 8.504538115467215e-06, + "loss": 0.0697, + "step": 74365 + }, + { + "epoch": 3.47, + "learning_rate": 8.503754330412429e-06, + "loss": 0.1096, + "step": 74370 + }, + { + "epoch": 3.47, + "learning_rate": 8.502970545357641e-06, + "loss": 0.1051, + "step": 74375 + }, + { + "epoch": 3.47, + "learning_rate": 8.502186760302855e-06, + "loss": 0.0537, + "step": 74380 + }, + { + "epoch": 3.47, + "learning_rate": 8.501402975248069e-06, + "loss": 0.1299, + "step": 74385 + }, + { + "epoch": 3.47, + "learning_rate": 8.500619190193283e-06, + "loss": 0.2017, + "step": 74390 + }, + { + "epoch": 3.47, + "learning_rate": 8.499835405138495e-06, + "loss": 0.2505, + "step": 74395 + }, + { + "epoch": 3.47, + "learning_rate": 8.499051620083709e-06, + "loss": 0.0293, + "step": 74400 + }, + { + "epoch": 3.47, + "learning_rate": 8.498267835028923e-06, + "loss": 0.0581, + "step": 74405 + }, + { + "epoch": 3.47, + "learning_rate": 8.497484049974137e-06, + "loss": 0.0503, + "step": 74410 + }, + { + "epoch": 3.47, + "learning_rate": 8.49670026491935e-06, + "loss": 0.0547, + "step": 74415 + }, + { + "epoch": 3.47, + "learning_rate": 8.495916479864563e-06, + "loss": 0.0854, + "step": 74420 + }, + { + "epoch": 3.47, + "learning_rate": 8.495132694809775e-06, + "loss": 0.0962, + "step": 74425 + }, + { + "epoch": 3.47, + "learning_rate": 8.494348909754989e-06, + "loss": 0.0931, + "step": 74430 + }, + { + "epoch": 3.47, + "learning_rate": 8.493565124700203e-06, + "loss": 0.1318, + "step": 74435 + }, + { + "epoch": 3.47, + "learning_rate": 8.492781339645417e-06, + "loss": 0.223, + "step": 74440 + }, + { + "epoch": 3.47, + "learning_rate": 8.491997554590629e-06, + "loss": 0.2705, + "step": 74445 + }, + { + "epoch": 3.47, + "learning_rate": 8.491213769535843e-06, + "loss": 0.0941, + "step": 74450 + }, + { + "epoch": 3.47, + "learning_rate": 8.490429984481057e-06, + "loss": 0.0466, + "step": 74455 + }, + { + "epoch": 3.47, + "learning_rate": 8.48964619942627e-06, + "loss": 0.0315, + "step": 74460 + }, + { + "epoch": 3.47, + "learning_rate": 8.488862414371484e-06, + "loss": 0.044, + "step": 74465 + }, + { + "epoch": 3.47, + "learning_rate": 8.488078629316697e-06, + "loss": 0.1042, + "step": 74470 + }, + { + "epoch": 3.48, + "learning_rate": 8.48729484426191e-06, + "loss": 0.071, + "step": 74475 + }, + { + "epoch": 3.48, + "learning_rate": 8.486511059207125e-06, + "loss": 0.1504, + "step": 74480 + }, + { + "epoch": 3.48, + "learning_rate": 8.485727274152337e-06, + "loss": 0.1746, + "step": 74485 + }, + { + "epoch": 3.48, + "learning_rate": 8.48494348909755e-06, + "loss": 0.295, + "step": 74490 + }, + { + "epoch": 3.48, + "learning_rate": 8.484159704042763e-06, + "loss": 0.2217, + "step": 74495 + }, + { + "epoch": 3.48, + "learning_rate": 8.483375918987977e-06, + "loss": 0.0303, + "step": 74500 + }, + { + "epoch": 3.48, + "learning_rate": 8.48259213393319e-06, + "loss": 0.0239, + "step": 74505 + }, + { + "epoch": 3.48, + "learning_rate": 8.481808348878405e-06, + "loss": 0.0651, + "step": 74510 + }, + { + "epoch": 3.48, + "learning_rate": 8.481024563823618e-06, + "loss": 0.0711, + "step": 74515 + }, + { + "epoch": 3.48, + "learning_rate": 8.48024077876883e-06, + "loss": 0.162, + "step": 74520 + }, + { + "epoch": 3.48, + "learning_rate": 8.479456993714045e-06, + "loss": 0.0623, + "step": 74525 + }, + { + "epoch": 3.48, + "learning_rate": 8.478673208659258e-06, + "loss": 0.0458, + "step": 74530 + }, + { + "epoch": 3.48, + "learning_rate": 8.477889423604472e-06, + "loss": 0.0875, + "step": 74535 + }, + { + "epoch": 3.48, + "learning_rate": 8.477105638549685e-06, + "loss": 0.2352, + "step": 74540 + }, + { + "epoch": 3.48, + "learning_rate": 8.476321853494899e-06, + "loss": 0.2607, + "step": 74545 + }, + { + "epoch": 3.48, + "learning_rate": 8.47553806844011e-06, + "loss": 0.0826, + "step": 74550 + }, + { + "epoch": 3.48, + "learning_rate": 8.474754283385325e-06, + "loss": 0.0826, + "step": 74555 + }, + { + "epoch": 3.48, + "learning_rate": 8.473970498330539e-06, + "loss": 0.0637, + "step": 74560 + }, + { + "epoch": 3.48, + "learning_rate": 8.473186713275752e-06, + "loss": 0.053, + "step": 74565 + }, + { + "epoch": 3.48, + "learning_rate": 8.472402928220965e-06, + "loss": 0.064, + "step": 74570 + }, + { + "epoch": 3.48, + "learning_rate": 8.471619143166179e-06, + "loss": 0.0851, + "step": 74575 + }, + { + "epoch": 3.48, + "learning_rate": 8.470835358111392e-06, + "loss": 0.1457, + "step": 74580 + }, + { + "epoch": 3.48, + "learning_rate": 8.470051573056606e-06, + "loss": 0.1017, + "step": 74585 + }, + { + "epoch": 3.48, + "learning_rate": 8.469267788001819e-06, + "loss": 0.1879, + "step": 74590 + }, + { + "epoch": 3.48, + "learning_rate": 8.468484002947032e-06, + "loss": 0.3116, + "step": 74595 + }, + { + "epoch": 3.48, + "learning_rate": 8.467700217892246e-06, + "loss": 0.1218, + "step": 74600 + }, + { + "epoch": 3.48, + "learning_rate": 8.46691643283746e-06, + "loss": 0.0432, + "step": 74605 + }, + { + "epoch": 3.48, + "learning_rate": 8.466132647782673e-06, + "loss": 0.0217, + "step": 74610 + }, + { + "epoch": 3.48, + "learning_rate": 8.465348862727886e-06, + "loss": 0.0888, + "step": 74615 + }, + { + "epoch": 3.48, + "learning_rate": 8.464565077673099e-06, + "loss": 0.0544, + "step": 74620 + }, + { + "epoch": 3.48, + "learning_rate": 8.463781292618313e-06, + "loss": 0.0501, + "step": 74625 + }, + { + "epoch": 3.48, + "learning_rate": 8.462997507563526e-06, + "loss": 0.0863, + "step": 74630 + }, + { + "epoch": 3.48, + "learning_rate": 8.46221372250874e-06, + "loss": 0.2043, + "step": 74635 + }, + { + "epoch": 3.48, + "learning_rate": 8.461429937453953e-06, + "loss": 0.288, + "step": 74640 + }, + { + "epoch": 3.48, + "learning_rate": 8.460646152399166e-06, + "loss": 0.3324, + "step": 74645 + }, + { + "epoch": 3.48, + "learning_rate": 8.45986236734438e-06, + "loss": 0.0586, + "step": 74650 + }, + { + "epoch": 3.48, + "learning_rate": 8.459078582289594e-06, + "loss": 0.0707, + "step": 74655 + }, + { + "epoch": 3.48, + "learning_rate": 8.458294797234806e-06, + "loss": 0.0335, + "step": 74660 + }, + { + "epoch": 3.48, + "learning_rate": 8.45751101218002e-06, + "loss": 0.0967, + "step": 74665 + }, + { + "epoch": 3.48, + "learning_rate": 8.456727227125234e-06, + "loss": 0.0504, + "step": 74670 + }, + { + "epoch": 3.48, + "learning_rate": 8.455943442070448e-06, + "loss": 0.0912, + "step": 74675 + }, + { + "epoch": 3.48, + "learning_rate": 8.45515965701566e-06, + "loss": 0.1916, + "step": 74680 + }, + { + "epoch": 3.48, + "learning_rate": 8.454375871960874e-06, + "loss": 0.195, + "step": 74685 + }, + { + "epoch": 3.49, + "learning_rate": 8.453592086906087e-06, + "loss": 0.1557, + "step": 74690 + }, + { + "epoch": 3.49, + "learning_rate": 8.4528083018513e-06, + "loss": 0.3294, + "step": 74695 + }, + { + "epoch": 3.49, + "learning_rate": 8.452024516796514e-06, + "loss": 0.1187, + "step": 74700 + }, + { + "epoch": 3.49, + "learning_rate": 8.451240731741728e-06, + "loss": 0.0509, + "step": 74705 + }, + { + "epoch": 3.49, + "learning_rate": 8.45045694668694e-06, + "loss": 0.0239, + "step": 74710 + }, + { + "epoch": 3.49, + "learning_rate": 8.449673161632154e-06, + "loss": 0.0198, + "step": 74715 + }, + { + "epoch": 3.49, + "learning_rate": 8.448889376577368e-06, + "loss": 0.0729, + "step": 74720 + }, + { + "epoch": 3.49, + "learning_rate": 8.448105591522582e-06, + "loss": 0.0967, + "step": 74725 + }, + { + "epoch": 3.49, + "learning_rate": 8.447321806467796e-06, + "loss": 0.1026, + "step": 74730 + }, + { + "epoch": 3.49, + "learning_rate": 8.446538021413008e-06, + "loss": 0.1301, + "step": 74735 + }, + { + "epoch": 3.49, + "learning_rate": 8.445754236358222e-06, + "loss": 0.2965, + "step": 74740 + }, + { + "epoch": 3.49, + "learning_rate": 8.444970451303434e-06, + "loss": 0.3699, + "step": 74745 + }, + { + "epoch": 3.49, + "learning_rate": 8.444186666248648e-06, + "loss": 0.0774, + "step": 74750 + }, + { + "epoch": 3.49, + "learning_rate": 8.443402881193862e-06, + "loss": 0.036, + "step": 74755 + }, + { + "epoch": 3.49, + "learning_rate": 8.442619096139074e-06, + "loss": 0.0738, + "step": 74760 + }, + { + "epoch": 3.49, + "learning_rate": 8.441835311084288e-06, + "loss": 0.146, + "step": 74765 + }, + { + "epoch": 3.49, + "learning_rate": 8.441051526029502e-06, + "loss": 0.0819, + "step": 74770 + }, + { + "epoch": 3.49, + "learning_rate": 8.440267740974716e-06, + "loss": 0.0764, + "step": 74775 + }, + { + "epoch": 3.49, + "learning_rate": 8.43948395591993e-06, + "loss": 0.1826, + "step": 74780 + }, + { + "epoch": 3.49, + "learning_rate": 8.438700170865142e-06, + "loss": 0.1201, + "step": 74785 + }, + { + "epoch": 3.49, + "learning_rate": 8.437916385810356e-06, + "loss": 0.2108, + "step": 74790 + }, + { + "epoch": 3.49, + "learning_rate": 8.43713260075557e-06, + "loss": 0.3365, + "step": 74795 + }, + { + "epoch": 3.49, + "learning_rate": 8.436348815700784e-06, + "loss": 0.0217, + "step": 74800 + }, + { + "epoch": 3.49, + "learning_rate": 8.435565030645996e-06, + "loss": 0.0271, + "step": 74805 + }, + { + "epoch": 3.49, + "learning_rate": 8.434781245591208e-06, + "loss": 0.1088, + "step": 74810 + }, + { + "epoch": 3.49, + "learning_rate": 8.433997460536422e-06, + "loss": 0.0373, + "step": 74815 + }, + { + "epoch": 3.49, + "learning_rate": 8.433213675481636e-06, + "loss": 0.0418, + "step": 74820 + }, + { + "epoch": 3.49, + "learning_rate": 8.43242989042685e-06, + "loss": 0.0449, + "step": 74825 + }, + { + "epoch": 3.49, + "learning_rate": 8.431646105372064e-06, + "loss": 0.1189, + "step": 74830 + }, + { + "epoch": 3.49, + "learning_rate": 8.430862320317276e-06, + "loss": 0.2114, + "step": 74835 + }, + { + "epoch": 3.49, + "learning_rate": 8.43007853526249e-06, + "loss": 0.2039, + "step": 74840 + }, + { + "epoch": 3.49, + "learning_rate": 8.429294750207704e-06, + "loss": 0.2547, + "step": 74845 + }, + { + "epoch": 3.49, + "learning_rate": 8.428510965152918e-06, + "loss": 0.109, + "step": 74850 + }, + { + "epoch": 3.49, + "learning_rate": 8.42772718009813e-06, + "loss": 0.0629, + "step": 74855 + }, + { + "epoch": 3.49, + "learning_rate": 8.426943395043344e-06, + "loss": 0.0208, + "step": 74860 + }, + { + "epoch": 3.49, + "learning_rate": 8.426159609988558e-06, + "loss": 0.058, + "step": 74865 + }, + { + "epoch": 3.49, + "learning_rate": 8.425375824933772e-06, + "loss": 0.0206, + "step": 74870 + }, + { + "epoch": 3.49, + "learning_rate": 8.424592039878984e-06, + "loss": 0.0756, + "step": 74875 + }, + { + "epoch": 3.49, + "learning_rate": 8.423808254824198e-06, + "loss": 0.0402, + "step": 74880 + }, + { + "epoch": 3.49, + "learning_rate": 8.42302446976941e-06, + "loss": 0.1043, + "step": 74885 + }, + { + "epoch": 3.49, + "learning_rate": 8.422240684714624e-06, + "loss": 0.2587, + "step": 74890 + }, + { + "epoch": 3.49, + "learning_rate": 8.421456899659838e-06, + "loss": 0.283, + "step": 74895 + }, + { + "epoch": 3.49, + "learning_rate": 8.420673114605052e-06, + "loss": 0.0697, + "step": 74900 + }, + { + "epoch": 3.5, + "learning_rate": 8.419889329550264e-06, + "loss": 0.0106, + "step": 74905 + }, + { + "epoch": 3.5, + "learning_rate": 8.419105544495478e-06, + "loss": 0.0514, + "step": 74910 + }, + { + "epoch": 3.5, + "learning_rate": 8.418321759440692e-06, + "loss": 0.0274, + "step": 74915 + }, + { + "epoch": 3.5, + "learning_rate": 8.417537974385906e-06, + "loss": 0.1058, + "step": 74920 + }, + { + "epoch": 3.5, + "learning_rate": 8.416754189331118e-06, + "loss": 0.0667, + "step": 74925 + }, + { + "epoch": 3.5, + "learning_rate": 8.415970404276332e-06, + "loss": 0.0528, + "step": 74930 + }, + { + "epoch": 3.5, + "learning_rate": 8.415186619221546e-06, + "loss": 0.1184, + "step": 74935 + }, + { + "epoch": 3.5, + "learning_rate": 8.414402834166758e-06, + "loss": 0.2604, + "step": 74940 + }, + { + "epoch": 3.5, + "learning_rate": 8.413619049111972e-06, + "loss": 0.3226, + "step": 74945 + }, + { + "epoch": 3.5, + "learning_rate": 8.412835264057186e-06, + "loss": 0.0827, + "step": 74950 + }, + { + "epoch": 3.5, + "learning_rate": 8.412051479002398e-06, + "loss": 0.0426, + "step": 74955 + }, + { + "epoch": 3.5, + "learning_rate": 8.411267693947612e-06, + "loss": 0.0343, + "step": 74960 + }, + { + "epoch": 3.5, + "learning_rate": 8.410483908892826e-06, + "loss": 0.1049, + "step": 74965 + }, + { + "epoch": 3.5, + "learning_rate": 8.40970012383804e-06, + "loss": 0.056, + "step": 74970 + }, + { + "epoch": 3.5, + "learning_rate": 8.408916338783252e-06, + "loss": 0.1389, + "step": 74975 + }, + { + "epoch": 3.5, + "learning_rate": 8.408132553728466e-06, + "loss": 0.0572, + "step": 74980 + }, + { + "epoch": 3.5, + "learning_rate": 8.40734876867368e-06, + "loss": 0.115, + "step": 74985 + }, + { + "epoch": 3.5, + "learning_rate": 8.406564983618894e-06, + "loss": 0.2138, + "step": 74990 + }, + { + "epoch": 3.5, + "learning_rate": 8.405781198564108e-06, + "loss": 0.3076, + "step": 74995 + }, + { + "epoch": 3.5, + "learning_rate": 8.40499741350932e-06, + "loss": 0.0509, + "step": 75000 + }, + { + "epoch": 3.5, + "learning_rate": 8.404213628454532e-06, + "loss": 0.0308, + "step": 75005 + }, + { + "epoch": 3.5, + "learning_rate": 8.403429843399746e-06, + "loss": 0.0843, + "step": 75010 + }, + { + "epoch": 3.5, + "learning_rate": 8.40264605834496e-06, + "loss": 0.1278, + "step": 75015 + }, + { + "epoch": 3.5, + "learning_rate": 8.401862273290174e-06, + "loss": 0.0298, + "step": 75020 + }, + { + "epoch": 3.5, + "learning_rate": 8.401078488235386e-06, + "loss": 0.0982, + "step": 75025 + }, + { + "epoch": 3.5, + "learning_rate": 8.4002947031806e-06, + "loss": 0.1339, + "step": 75030 + }, + { + "epoch": 3.5, + "learning_rate": 8.399510918125814e-06, + "loss": 0.1617, + "step": 75035 + }, + { + "epoch": 3.5, + "learning_rate": 8.398727133071028e-06, + "loss": 0.2432, + "step": 75040 + }, + { + "epoch": 3.5, + "learning_rate": 8.397943348016242e-06, + "loss": 0.2742, + "step": 75045 + }, + { + "epoch": 3.5, + "learning_rate": 8.397159562961454e-06, + "loss": 0.0797, + "step": 75050 + }, + { + "epoch": 3.5, + "learning_rate": 8.396375777906668e-06, + "loss": 0.048, + "step": 75055 + }, + { + "epoch": 3.5, + "learning_rate": 8.395591992851882e-06, + "loss": 0.035, + "step": 75060 + }, + { + "epoch": 3.5, + "learning_rate": 8.394808207797096e-06, + "loss": 0.0585, + "step": 75065 + }, + { + "epoch": 3.5, + "learning_rate": 8.394024422742308e-06, + "loss": 0.1197, + "step": 75070 + }, + { + "epoch": 3.5, + "learning_rate": 8.39324063768752e-06, + "loss": 0.0651, + "step": 75075 + }, + { + "epoch": 3.5, + "learning_rate": 8.392456852632734e-06, + "loss": 0.1699, + "step": 75080 + }, + { + "epoch": 3.5, + "learning_rate": 8.391673067577948e-06, + "loss": 0.1379, + "step": 75085 + }, + { + "epoch": 3.5, + "learning_rate": 8.390889282523162e-06, + "loss": 0.2202, + "step": 75090 + }, + { + "epoch": 3.5, + "learning_rate": 8.390105497468376e-06, + "loss": 0.3336, + "step": 75095 + }, + { + "epoch": 3.5, + "learning_rate": 8.389321712413588e-06, + "loss": 0.0507, + "step": 75100 + }, + { + "epoch": 3.5, + "learning_rate": 8.388537927358802e-06, + "loss": 0.0492, + "step": 75105 + }, + { + "epoch": 3.5, + "learning_rate": 8.387754142304016e-06, + "loss": 0.0252, + "step": 75110 + }, + { + "epoch": 3.5, + "learning_rate": 8.38697035724923e-06, + "loss": 0.1295, + "step": 75115 + }, + { + "epoch": 3.51, + "learning_rate": 8.386186572194442e-06, + "loss": 0.1012, + "step": 75120 + }, + { + "epoch": 3.51, + "learning_rate": 8.385402787139656e-06, + "loss": 0.1108, + "step": 75125 + }, + { + "epoch": 3.51, + "learning_rate": 8.38461900208487e-06, + "loss": 0.0637, + "step": 75130 + }, + { + "epoch": 3.51, + "learning_rate": 8.383835217030082e-06, + "loss": 0.1363, + "step": 75135 + }, + { + "epoch": 3.51, + "learning_rate": 8.383051431975296e-06, + "loss": 0.2128, + "step": 75140 + }, + { + "epoch": 3.51, + "learning_rate": 8.38226764692051e-06, + "loss": 0.2319, + "step": 75145 + }, + { + "epoch": 3.51, + "learning_rate": 8.381483861865722e-06, + "loss": 0.0976, + "step": 75150 + }, + { + "epoch": 3.51, + "learning_rate": 8.380700076810936e-06, + "loss": 0.0321, + "step": 75155 + }, + { + "epoch": 3.51, + "learning_rate": 8.37991629175615e-06, + "loss": 0.0293, + "step": 75160 + }, + { + "epoch": 3.51, + "learning_rate": 8.379132506701364e-06, + "loss": 0.0679, + "step": 75165 + }, + { + "epoch": 3.51, + "learning_rate": 8.378348721646576e-06, + "loss": 0.1282, + "step": 75170 + }, + { + "epoch": 3.51, + "learning_rate": 8.37756493659179e-06, + "loss": 0.0671, + "step": 75175 + }, + { + "epoch": 3.51, + "learning_rate": 8.376781151537004e-06, + "loss": 0.0583, + "step": 75180 + }, + { + "epoch": 3.51, + "learning_rate": 8.375997366482217e-06, + "loss": 0.1728, + "step": 75185 + }, + { + "epoch": 3.51, + "learning_rate": 8.37521358142743e-06, + "loss": 0.1375, + "step": 75190 + }, + { + "epoch": 3.51, + "learning_rate": 8.374429796372644e-06, + "loss": 0.1859, + "step": 75195 + }, + { + "epoch": 3.51, + "learning_rate": 8.373646011317856e-06, + "loss": 0.0792, + "step": 75200 + }, + { + "epoch": 3.51, + "learning_rate": 8.37286222626307e-06, + "loss": 0.0548, + "step": 75205 + }, + { + "epoch": 3.51, + "learning_rate": 8.372078441208284e-06, + "loss": 0.0164, + "step": 75210 + }, + { + "epoch": 3.51, + "learning_rate": 8.371294656153498e-06, + "loss": 0.0193, + "step": 75215 + }, + { + "epoch": 3.51, + "learning_rate": 8.37051087109871e-06, + "loss": 0.0914, + "step": 75220 + }, + { + "epoch": 3.51, + "learning_rate": 8.369727086043924e-06, + "loss": 0.1217, + "step": 75225 + }, + { + "epoch": 3.51, + "learning_rate": 8.368943300989138e-06, + "loss": 0.1028, + "step": 75230 + }, + { + "epoch": 3.51, + "learning_rate": 8.368159515934351e-06, + "loss": 0.1744, + "step": 75235 + }, + { + "epoch": 3.51, + "learning_rate": 8.367375730879564e-06, + "loss": 0.2296, + "step": 75240 + }, + { + "epoch": 3.51, + "learning_rate": 8.366591945824778e-06, + "loss": 0.2154, + "step": 75245 + }, + { + "epoch": 3.51, + "learning_rate": 8.365808160769991e-06, + "loss": 0.0799, + "step": 75250 + }, + { + "epoch": 3.51, + "learning_rate": 8.365024375715205e-06, + "loss": 0.0403, + "step": 75255 + }, + { + "epoch": 3.51, + "learning_rate": 8.36424059066042e-06, + "loss": 0.0285, + "step": 75260 + }, + { + "epoch": 3.51, + "learning_rate": 8.363456805605631e-06, + "loss": 0.0687, + "step": 75265 + }, + { + "epoch": 3.51, + "learning_rate": 8.362673020550844e-06, + "loss": 0.1119, + "step": 75270 + }, + { + "epoch": 3.51, + "learning_rate": 8.361889235496058e-06, + "loss": 0.0707, + "step": 75275 + }, + { + "epoch": 3.51, + "learning_rate": 8.361105450441272e-06, + "loss": 0.0419, + "step": 75280 + }, + { + "epoch": 3.51, + "learning_rate": 8.360321665386485e-06, + "loss": 0.1551, + "step": 75285 + }, + { + "epoch": 3.51, + "learning_rate": 8.359537880331698e-06, + "loss": 0.209, + "step": 75290 + }, + { + "epoch": 3.51, + "learning_rate": 8.358754095276912e-06, + "loss": 0.3307, + "step": 75295 + }, + { + "epoch": 3.51, + "learning_rate": 8.357970310222125e-06, + "loss": 0.0807, + "step": 75300 + }, + { + "epoch": 3.51, + "learning_rate": 8.35718652516734e-06, + "loss": 0.0273, + "step": 75305 + }, + { + "epoch": 3.51, + "learning_rate": 8.356402740112553e-06, + "loss": 0.0654, + "step": 75310 + }, + { + "epoch": 3.51, + "learning_rate": 8.355618955057765e-06, + "loss": 0.0535, + "step": 75315 + }, + { + "epoch": 3.51, + "learning_rate": 8.35483517000298e-06, + "loss": 0.0402, + "step": 75320 + }, + { + "epoch": 3.51, + "learning_rate": 8.354051384948193e-06, + "loss": 0.0842, + "step": 75325 + }, + { + "epoch": 3.52, + "learning_rate": 8.353267599893405e-06, + "loss": 0.1353, + "step": 75330 + }, + { + "epoch": 3.52, + "learning_rate": 8.35248381483862e-06, + "loss": 0.1386, + "step": 75335 + }, + { + "epoch": 3.52, + "learning_rate": 8.351700029783832e-06, + "loss": 0.1482, + "step": 75340 + }, + { + "epoch": 3.52, + "learning_rate": 8.350916244729046e-06, + "loss": 0.4411, + "step": 75345 + }, + { + "epoch": 3.52, + "learning_rate": 8.35013245967426e-06, + "loss": 0.0583, + "step": 75350 + }, + { + "epoch": 3.52, + "learning_rate": 8.349348674619473e-06, + "loss": 0.0396, + "step": 75355 + }, + { + "epoch": 3.52, + "learning_rate": 8.348564889564687e-06, + "loss": 0.02, + "step": 75360 + }, + { + "epoch": 3.52, + "learning_rate": 8.3477811045099e-06, + "loss": 0.0512, + "step": 75365 + }, + { + "epoch": 3.52, + "learning_rate": 8.346997319455113e-06, + "loss": 0.0338, + "step": 75370 + }, + { + "epoch": 3.52, + "learning_rate": 8.346213534400327e-06, + "loss": 0.0807, + "step": 75375 + }, + { + "epoch": 3.52, + "learning_rate": 8.345429749345541e-06, + "loss": 0.1244, + "step": 75380 + }, + { + "epoch": 3.52, + "learning_rate": 8.344645964290753e-06, + "loss": 0.1919, + "step": 75385 + }, + { + "epoch": 3.52, + "learning_rate": 8.343862179235967e-06, + "loss": 0.2456, + "step": 75390 + }, + { + "epoch": 3.52, + "learning_rate": 8.34307839418118e-06, + "loss": 0.3295, + "step": 75395 + }, + { + "epoch": 3.52, + "learning_rate": 8.342294609126393e-06, + "loss": 0.0937, + "step": 75400 + }, + { + "epoch": 3.52, + "learning_rate": 8.341510824071607e-06, + "loss": 0.0356, + "step": 75405 + }, + { + "epoch": 3.52, + "learning_rate": 8.340727039016821e-06, + "loss": 0.0717, + "step": 75410 + }, + { + "epoch": 3.52, + "learning_rate": 8.339943253962033e-06, + "loss": 0.0336, + "step": 75415 + }, + { + "epoch": 3.52, + "learning_rate": 8.339159468907247e-06, + "loss": 0.0833, + "step": 75420 + }, + { + "epoch": 3.52, + "learning_rate": 8.338375683852461e-06, + "loss": 0.1159, + "step": 75425 + }, + { + "epoch": 3.52, + "learning_rate": 8.337591898797675e-06, + "loss": 0.0786, + "step": 75430 + }, + { + "epoch": 3.52, + "learning_rate": 8.336808113742887e-06, + "loss": 0.1555, + "step": 75435 + }, + { + "epoch": 3.52, + "learning_rate": 8.336024328688101e-06, + "loss": 0.2435, + "step": 75440 + }, + { + "epoch": 3.52, + "learning_rate": 8.335240543633315e-06, + "loss": 0.2153, + "step": 75445 + }, + { + "epoch": 3.52, + "learning_rate": 8.334456758578529e-06, + "loss": 0.0385, + "step": 75450 + }, + { + "epoch": 3.52, + "learning_rate": 8.333672973523741e-06, + "loss": 0.0182, + "step": 75455 + }, + { + "epoch": 3.52, + "learning_rate": 8.332889188468955e-06, + "loss": 0.0516, + "step": 75460 + }, + { + "epoch": 3.52, + "learning_rate": 8.332105403414167e-06, + "loss": 0.0697, + "step": 75465 + }, + { + "epoch": 3.52, + "learning_rate": 8.331321618359381e-06, + "loss": 0.1061, + "step": 75470 + }, + { + "epoch": 3.52, + "learning_rate": 8.330537833304595e-06, + "loss": 0.1071, + "step": 75475 + }, + { + "epoch": 3.52, + "learning_rate": 8.329754048249809e-06, + "loss": 0.0414, + "step": 75480 + }, + { + "epoch": 3.52, + "learning_rate": 8.328970263195021e-06, + "loss": 0.1574, + "step": 75485 + }, + { + "epoch": 3.52, + "learning_rate": 8.328186478140235e-06, + "loss": 0.191, + "step": 75490 + }, + { + "epoch": 3.52, + "learning_rate": 8.327402693085449e-06, + "loss": 0.1444, + "step": 75495 + }, + { + "epoch": 3.52, + "learning_rate": 8.326618908030663e-06, + "loss": 0.0571, + "step": 75500 + }, + { + "epoch": 3.52, + "learning_rate": 8.325835122975875e-06, + "loss": 0.0229, + "step": 75505 + }, + { + "epoch": 3.52, + "learning_rate": 8.325051337921089e-06, + "loss": 0.063, + "step": 75510 + }, + { + "epoch": 3.52, + "learning_rate": 8.324267552866303e-06, + "loss": 0.0546, + "step": 75515 + }, + { + "epoch": 3.52, + "learning_rate": 8.323483767811517e-06, + "loss": 0.0901, + "step": 75520 + }, + { + "epoch": 3.52, + "learning_rate": 8.32269998275673e-06, + "loss": 0.0649, + "step": 75525 + }, + { + "epoch": 3.52, + "learning_rate": 8.321916197701943e-06, + "loss": 0.0851, + "step": 75530 + }, + { + "epoch": 3.52, + "learning_rate": 8.321132412647155e-06, + "loss": 0.1331, + "step": 75535 + }, + { + "epoch": 3.52, + "learning_rate": 8.32034862759237e-06, + "loss": 0.2535, + "step": 75540 + }, + { + "epoch": 3.53, + "learning_rate": 8.319564842537583e-06, + "loss": 0.2529, + "step": 75545 + }, + { + "epoch": 3.53, + "learning_rate": 8.318781057482797e-06, + "loss": 0.0799, + "step": 75550 + }, + { + "epoch": 3.53, + "learning_rate": 8.31799727242801e-06, + "loss": 0.0059, + "step": 75555 + }, + { + "epoch": 3.53, + "learning_rate": 8.317213487373223e-06, + "loss": 0.0637, + "step": 75560 + }, + { + "epoch": 3.53, + "learning_rate": 8.316429702318437e-06, + "loss": 0.03, + "step": 75565 + }, + { + "epoch": 3.53, + "learning_rate": 8.315645917263651e-06, + "loss": 0.081, + "step": 75570 + }, + { + "epoch": 3.53, + "learning_rate": 8.314862132208865e-06, + "loss": 0.1242, + "step": 75575 + }, + { + "epoch": 3.53, + "learning_rate": 8.314078347154077e-06, + "loss": 0.0443, + "step": 75580 + }, + { + "epoch": 3.53, + "learning_rate": 8.313294562099291e-06, + "loss": 0.1409, + "step": 75585 + }, + { + "epoch": 3.53, + "learning_rate": 8.312510777044503e-06, + "loss": 0.0827, + "step": 75590 + }, + { + "epoch": 3.53, + "learning_rate": 8.311726991989717e-06, + "loss": 0.2518, + "step": 75595 + }, + { + "epoch": 3.53, + "learning_rate": 8.310943206934931e-06, + "loss": 0.0828, + "step": 75600 + }, + { + "epoch": 3.53, + "learning_rate": 8.310159421880143e-06, + "loss": 0.015, + "step": 75605 + }, + { + "epoch": 3.53, + "learning_rate": 8.309375636825357e-06, + "loss": 0.0221, + "step": 75610 + }, + { + "epoch": 3.53, + "learning_rate": 8.308591851770571e-06, + "loss": 0.0275, + "step": 75615 + }, + { + "epoch": 3.53, + "learning_rate": 8.307808066715785e-06, + "loss": 0.0394, + "step": 75620 + }, + { + "epoch": 3.53, + "learning_rate": 8.307024281660999e-06, + "loss": 0.0226, + "step": 75625 + }, + { + "epoch": 3.53, + "learning_rate": 8.306240496606211e-06, + "loss": 0.0955, + "step": 75630 + }, + { + "epoch": 3.53, + "learning_rate": 8.305456711551425e-06, + "loss": 0.1029, + "step": 75635 + }, + { + "epoch": 3.53, + "learning_rate": 8.304672926496639e-06, + "loss": 0.2034, + "step": 75640 + }, + { + "epoch": 3.53, + "learning_rate": 8.303889141441853e-06, + "loss": 0.3854, + "step": 75645 + }, + { + "epoch": 3.53, + "learning_rate": 8.303105356387065e-06, + "loss": 0.0793, + "step": 75650 + }, + { + "epoch": 3.53, + "learning_rate": 8.302321571332277e-06, + "loss": 0.0189, + "step": 75655 + }, + { + "epoch": 3.53, + "learning_rate": 8.301537786277491e-06, + "loss": 0.0376, + "step": 75660 + }, + { + "epoch": 3.53, + "learning_rate": 8.300754001222705e-06, + "loss": 0.0251, + "step": 75665 + }, + { + "epoch": 3.53, + "learning_rate": 8.299970216167919e-06, + "loss": 0.068, + "step": 75670 + }, + { + "epoch": 3.53, + "learning_rate": 8.299186431113133e-06, + "loss": 0.1966, + "step": 75675 + }, + { + "epoch": 3.53, + "learning_rate": 8.298402646058345e-06, + "loss": 0.0765, + "step": 75680 + }, + { + "epoch": 3.53, + "learning_rate": 8.297618861003559e-06, + "loss": 0.0766, + "step": 75685 + }, + { + "epoch": 3.53, + "learning_rate": 8.296835075948773e-06, + "loss": 0.2139, + "step": 75690 + }, + { + "epoch": 3.53, + "learning_rate": 8.296051290893987e-06, + "loss": 0.2011, + "step": 75695 + }, + { + "epoch": 3.53, + "learning_rate": 8.295267505839199e-06, + "loss": 0.0308, + "step": 75700 + }, + { + "epoch": 3.53, + "learning_rate": 8.294483720784413e-06, + "loss": 0.0239, + "step": 75705 + }, + { + "epoch": 3.53, + "learning_rate": 8.293699935729627e-06, + "loss": 0.0442, + "step": 75710 + }, + { + "epoch": 3.53, + "learning_rate": 8.29291615067484e-06, + "loss": 0.1048, + "step": 75715 + }, + { + "epoch": 3.53, + "learning_rate": 8.292132365620053e-06, + "loss": 0.1053, + "step": 75720 + }, + { + "epoch": 3.53, + "learning_rate": 8.291348580565267e-06, + "loss": 0.0838, + "step": 75725 + }, + { + "epoch": 3.53, + "learning_rate": 8.290564795510479e-06, + "loss": 0.1122, + "step": 75730 + }, + { + "epoch": 3.53, + "learning_rate": 8.289781010455693e-06, + "loss": 0.0573, + "step": 75735 + }, + { + "epoch": 3.53, + "learning_rate": 8.288997225400907e-06, + "loss": 0.286, + "step": 75740 + }, + { + "epoch": 3.53, + "learning_rate": 8.28821344034612e-06, + "loss": 0.3212, + "step": 75745 + }, + { + "epoch": 3.53, + "learning_rate": 8.287429655291333e-06, + "loss": 0.0789, + "step": 75750 + }, + { + "epoch": 3.53, + "learning_rate": 8.286645870236547e-06, + "loss": 0.0362, + "step": 75755 + }, + { + "epoch": 3.54, + "learning_rate": 8.28586208518176e-06, + "loss": 0.0528, + "step": 75760 + }, + { + "epoch": 3.54, + "learning_rate": 8.285078300126975e-06, + "loss": 0.0774, + "step": 75765 + }, + { + "epoch": 3.54, + "learning_rate": 8.284294515072187e-06, + "loss": 0.037, + "step": 75770 + }, + { + "epoch": 3.54, + "learning_rate": 8.2835107300174e-06, + "loss": 0.0995, + "step": 75775 + }, + { + "epoch": 3.54, + "learning_rate": 8.282726944962615e-06, + "loss": 0.0897, + "step": 75780 + }, + { + "epoch": 3.54, + "learning_rate": 8.281943159907827e-06, + "loss": 0.0795, + "step": 75785 + }, + { + "epoch": 3.54, + "learning_rate": 8.28115937485304e-06, + "loss": 0.1816, + "step": 75790 + }, + { + "epoch": 3.54, + "learning_rate": 8.280375589798255e-06, + "loss": 0.2481, + "step": 75795 + }, + { + "epoch": 3.54, + "learning_rate": 8.279591804743467e-06, + "loss": 0.0503, + "step": 75800 + }, + { + "epoch": 3.54, + "learning_rate": 8.27880801968868e-06, + "loss": 0.0122, + "step": 75805 + }, + { + "epoch": 3.54, + "learning_rate": 8.278024234633895e-06, + "loss": 0.0234, + "step": 75810 + }, + { + "epoch": 3.54, + "learning_rate": 8.277240449579109e-06, + "loss": 0.0445, + "step": 75815 + }, + { + "epoch": 3.54, + "learning_rate": 8.27645666452432e-06, + "loss": 0.059, + "step": 75820 + }, + { + "epoch": 3.54, + "learning_rate": 8.275672879469535e-06, + "loss": 0.0987, + "step": 75825 + }, + { + "epoch": 3.54, + "learning_rate": 8.274889094414749e-06, + "loss": 0.1198, + "step": 75830 + }, + { + "epoch": 3.54, + "learning_rate": 8.274105309359963e-06, + "loss": 0.1624, + "step": 75835 + }, + { + "epoch": 3.54, + "learning_rate": 8.273321524305176e-06, + "loss": 0.1389, + "step": 75840 + }, + { + "epoch": 3.54, + "learning_rate": 8.272537739250389e-06, + "loss": 0.2726, + "step": 75845 + }, + { + "epoch": 3.54, + "learning_rate": 8.271753954195601e-06, + "loss": 0.0476, + "step": 75850 + }, + { + "epoch": 3.54, + "learning_rate": 8.270970169140815e-06, + "loss": 0.0124, + "step": 75855 + }, + { + "epoch": 3.54, + "learning_rate": 8.270186384086029e-06, + "loss": 0.0568, + "step": 75860 + }, + { + "epoch": 3.54, + "learning_rate": 8.269402599031243e-06, + "loss": 0.0667, + "step": 75865 + }, + { + "epoch": 3.54, + "learning_rate": 8.268618813976455e-06, + "loss": 0.1202, + "step": 75870 + }, + { + "epoch": 3.54, + "learning_rate": 8.267835028921669e-06, + "loss": 0.0241, + "step": 75875 + }, + { + "epoch": 3.54, + "learning_rate": 8.267051243866883e-06, + "loss": 0.1229, + "step": 75880 + }, + { + "epoch": 3.54, + "learning_rate": 8.266267458812097e-06, + "loss": 0.0868, + "step": 75885 + }, + { + "epoch": 3.54, + "learning_rate": 8.26548367375731e-06, + "loss": 0.201, + "step": 75890 + }, + { + "epoch": 3.54, + "learning_rate": 8.264699888702523e-06, + "loss": 0.2908, + "step": 75895 + }, + { + "epoch": 3.54, + "learning_rate": 8.263916103647737e-06, + "loss": 0.0415, + "step": 75900 + }, + { + "epoch": 3.54, + "learning_rate": 8.26313231859295e-06, + "loss": 0.0291, + "step": 75905 + }, + { + "epoch": 3.54, + "learning_rate": 8.262348533538164e-06, + "loss": 0.012, + "step": 75910 + }, + { + "epoch": 3.54, + "learning_rate": 8.261564748483377e-06, + "loss": 0.0537, + "step": 75915 + }, + { + "epoch": 3.54, + "learning_rate": 8.260780963428589e-06, + "loss": 0.078, + "step": 75920 + }, + { + "epoch": 3.54, + "learning_rate": 8.259997178373803e-06, + "loss": 0.162, + "step": 75925 + }, + { + "epoch": 3.54, + "learning_rate": 8.259213393319017e-06, + "loss": 0.1439, + "step": 75930 + }, + { + "epoch": 3.54, + "learning_rate": 8.25842960826423e-06, + "loss": 0.09, + "step": 75935 + }, + { + "epoch": 3.54, + "learning_rate": 8.257645823209444e-06, + "loss": 0.1553, + "step": 75940 + }, + { + "epoch": 3.54, + "learning_rate": 8.256862038154657e-06, + "loss": 0.2545, + "step": 75945 + }, + { + "epoch": 3.54, + "learning_rate": 8.25607825309987e-06, + "loss": 0.0797, + "step": 75950 + }, + { + "epoch": 3.54, + "learning_rate": 8.255294468045084e-06, + "loss": 0.0807, + "step": 75955 + }, + { + "epoch": 3.54, + "learning_rate": 8.254510682990298e-06, + "loss": 0.0415, + "step": 75960 + }, + { + "epoch": 3.54, + "learning_rate": 8.25372689793551e-06, + "loss": 0.0477, + "step": 75965 + }, + { + "epoch": 3.54, + "learning_rate": 8.252943112880724e-06, + "loss": 0.1314, + "step": 75970 + }, + { + "epoch": 3.55, + "learning_rate": 8.252159327825938e-06, + "loss": 0.0979, + "step": 75975 + }, + { + "epoch": 3.55, + "learning_rate": 8.25137554277115e-06, + "loss": 0.0582, + "step": 75980 + }, + { + "epoch": 3.55, + "learning_rate": 8.250591757716364e-06, + "loss": 0.0899, + "step": 75985 + }, + { + "epoch": 3.55, + "learning_rate": 8.249807972661578e-06, + "loss": 0.2765, + "step": 75990 + }, + { + "epoch": 3.55, + "learning_rate": 8.24902418760679e-06, + "loss": 0.2902, + "step": 75995 + }, + { + "epoch": 3.55, + "learning_rate": 8.248240402552004e-06, + "loss": 0.1004, + "step": 76000 + }, + { + "epoch": 3.55, + "learning_rate": 8.247456617497218e-06, + "loss": 0.0277, + "step": 76005 + }, + { + "epoch": 3.55, + "learning_rate": 8.246672832442432e-06, + "loss": 0.0526, + "step": 76010 + }, + { + "epoch": 3.55, + "learning_rate": 8.245889047387645e-06, + "loss": 0.0395, + "step": 76015 + }, + { + "epoch": 3.55, + "learning_rate": 8.245105262332858e-06, + "loss": 0.0749, + "step": 76020 + }, + { + "epoch": 3.55, + "learning_rate": 8.244321477278072e-06, + "loss": 0.0923, + "step": 76025 + }, + { + "epoch": 3.55, + "learning_rate": 8.243537692223286e-06, + "loss": 0.0788, + "step": 76030 + }, + { + "epoch": 3.55, + "learning_rate": 8.242753907168498e-06, + "loss": 0.1192, + "step": 76035 + }, + { + "epoch": 3.55, + "learning_rate": 8.241970122113712e-06, + "loss": 0.1008, + "step": 76040 + }, + { + "epoch": 3.55, + "learning_rate": 8.241186337058925e-06, + "loss": 0.4365, + "step": 76045 + }, + { + "epoch": 3.55, + "learning_rate": 8.240402552004138e-06, + "loss": 0.0791, + "step": 76050 + }, + { + "epoch": 3.55, + "learning_rate": 8.239618766949352e-06, + "loss": 0.0438, + "step": 76055 + }, + { + "epoch": 3.55, + "learning_rate": 8.238834981894566e-06, + "loss": 0.0265, + "step": 76060 + }, + { + "epoch": 3.55, + "learning_rate": 8.238051196839778e-06, + "loss": 0.0122, + "step": 76065 + }, + { + "epoch": 3.55, + "learning_rate": 8.237267411784992e-06, + "loss": 0.0748, + "step": 76070 + }, + { + "epoch": 3.55, + "learning_rate": 8.236483626730206e-06, + "loss": 0.1352, + "step": 76075 + }, + { + "epoch": 3.55, + "learning_rate": 8.23569984167542e-06, + "loss": 0.1491, + "step": 76080 + }, + { + "epoch": 3.55, + "learning_rate": 8.234916056620632e-06, + "loss": 0.1648, + "step": 76085 + }, + { + "epoch": 3.55, + "learning_rate": 8.234132271565846e-06, + "loss": 0.1639, + "step": 76090 + }, + { + "epoch": 3.55, + "learning_rate": 8.23334848651106e-06, + "loss": 0.1581, + "step": 76095 + }, + { + "epoch": 3.55, + "learning_rate": 8.232564701456274e-06, + "loss": 0.1422, + "step": 76100 + }, + { + "epoch": 3.55, + "learning_rate": 8.231780916401488e-06, + "loss": 0.0529, + "step": 76105 + }, + { + "epoch": 3.55, + "learning_rate": 8.2309971313467e-06, + "loss": 0.0227, + "step": 76110 + }, + { + "epoch": 3.55, + "learning_rate": 8.230213346291912e-06, + "loss": 0.0177, + "step": 76115 + }, + { + "epoch": 3.55, + "learning_rate": 8.229429561237126e-06, + "loss": 0.0846, + "step": 76120 + }, + { + "epoch": 3.55, + "learning_rate": 8.22864577618234e-06, + "loss": 0.0638, + "step": 76125 + }, + { + "epoch": 3.55, + "learning_rate": 8.227861991127554e-06, + "loss": 0.1441, + "step": 76130 + }, + { + "epoch": 3.55, + "learning_rate": 8.227078206072766e-06, + "loss": 0.1508, + "step": 76135 + }, + { + "epoch": 3.55, + "learning_rate": 8.22629442101798e-06, + "loss": 0.2952, + "step": 76140 + }, + { + "epoch": 3.55, + "learning_rate": 8.225510635963194e-06, + "loss": 0.268, + "step": 76145 + }, + { + "epoch": 3.55, + "learning_rate": 8.224726850908408e-06, + "loss": 0.0742, + "step": 76150 + }, + { + "epoch": 3.55, + "learning_rate": 8.223943065853622e-06, + "loss": 0.034, + "step": 76155 + }, + { + "epoch": 3.55, + "learning_rate": 8.223159280798834e-06, + "loss": 0.0313, + "step": 76160 + }, + { + "epoch": 3.55, + "learning_rate": 8.222375495744048e-06, + "loss": 0.052, + "step": 76165 + }, + { + "epoch": 3.55, + "learning_rate": 8.221591710689262e-06, + "loss": 0.0843, + "step": 76170 + }, + { + "epoch": 3.55, + "learning_rate": 8.220807925634474e-06, + "loss": 0.0998, + "step": 76175 + }, + { + "epoch": 3.55, + "learning_rate": 8.220024140579688e-06, + "loss": 0.1013, + "step": 76180 + }, + { + "epoch": 3.55, + "learning_rate": 8.2192403555249e-06, + "loss": 0.1085, + "step": 76185 + }, + { + "epoch": 3.56, + "learning_rate": 8.218456570470114e-06, + "loss": 0.1796, + "step": 76190 + }, + { + "epoch": 3.56, + "learning_rate": 8.217672785415328e-06, + "loss": 0.293, + "step": 76195 + }, + { + "epoch": 3.56, + "learning_rate": 8.216889000360542e-06, + "loss": 0.0851, + "step": 76200 + }, + { + "epoch": 3.56, + "learning_rate": 8.216105215305756e-06, + "loss": 0.0458, + "step": 76205 + }, + { + "epoch": 3.56, + "learning_rate": 8.215321430250968e-06, + "loss": 0.043, + "step": 76210 + }, + { + "epoch": 3.56, + "learning_rate": 8.214537645196182e-06, + "loss": 0.0795, + "step": 76215 + }, + { + "epoch": 3.56, + "learning_rate": 8.213753860141396e-06, + "loss": 0.0633, + "step": 76220 + }, + { + "epoch": 3.56, + "learning_rate": 8.21297007508661e-06, + "loss": 0.0637, + "step": 76225 + }, + { + "epoch": 3.56, + "learning_rate": 8.212186290031822e-06, + "loss": 0.1009, + "step": 76230 + }, + { + "epoch": 3.56, + "learning_rate": 8.211402504977036e-06, + "loss": 0.1289, + "step": 76235 + }, + { + "epoch": 3.56, + "learning_rate": 8.210618719922248e-06, + "loss": 0.1908, + "step": 76240 + }, + { + "epoch": 3.56, + "learning_rate": 8.209834934867462e-06, + "loss": 0.2687, + "step": 76245 + }, + { + "epoch": 3.56, + "learning_rate": 8.209051149812676e-06, + "loss": 0.092, + "step": 76250 + }, + { + "epoch": 3.56, + "learning_rate": 8.20826736475789e-06, + "loss": 0.057, + "step": 76255 + }, + { + "epoch": 3.56, + "learning_rate": 8.207483579703102e-06, + "loss": 0.0251, + "step": 76260 + }, + { + "epoch": 3.56, + "learning_rate": 8.206699794648316e-06, + "loss": 0.0866, + "step": 76265 + }, + { + "epoch": 3.56, + "learning_rate": 8.20591600959353e-06, + "loss": 0.1353, + "step": 76270 + }, + { + "epoch": 3.56, + "learning_rate": 8.205132224538744e-06, + "loss": 0.0878, + "step": 76275 + }, + { + "epoch": 3.56, + "learning_rate": 8.204348439483956e-06, + "loss": 0.0853, + "step": 76280 + }, + { + "epoch": 3.56, + "learning_rate": 8.20356465442917e-06, + "loss": 0.0852, + "step": 76285 + }, + { + "epoch": 3.56, + "learning_rate": 8.202780869374384e-06, + "loss": 0.1853, + "step": 76290 + }, + { + "epoch": 3.56, + "learning_rate": 8.201997084319598e-06, + "loss": 0.2395, + "step": 76295 + }, + { + "epoch": 3.56, + "learning_rate": 8.20121329926481e-06, + "loss": 0.0643, + "step": 76300 + }, + { + "epoch": 3.56, + "learning_rate": 8.200429514210024e-06, + "loss": 0.044, + "step": 76305 + }, + { + "epoch": 3.56, + "learning_rate": 8.199645729155236e-06, + "loss": 0.0486, + "step": 76310 + }, + { + "epoch": 3.56, + "learning_rate": 8.19886194410045e-06, + "loss": 0.0564, + "step": 76315 + }, + { + "epoch": 3.56, + "learning_rate": 8.198078159045664e-06, + "loss": 0.0806, + "step": 76320 + }, + { + "epoch": 3.56, + "learning_rate": 8.197294373990878e-06, + "loss": 0.0805, + "step": 76325 + }, + { + "epoch": 3.56, + "learning_rate": 8.19651058893609e-06, + "loss": 0.1667, + "step": 76330 + }, + { + "epoch": 3.56, + "learning_rate": 8.195726803881304e-06, + "loss": 0.2686, + "step": 76335 + }, + { + "epoch": 3.56, + "learning_rate": 8.194943018826518e-06, + "loss": 0.2024, + "step": 76340 + }, + { + "epoch": 3.56, + "learning_rate": 8.194159233771732e-06, + "loss": 0.2692, + "step": 76345 + }, + { + "epoch": 3.56, + "learning_rate": 8.193375448716944e-06, + "loss": 0.0956, + "step": 76350 + }, + { + "epoch": 3.56, + "learning_rate": 8.192591663662158e-06, + "loss": 0.0181, + "step": 76355 + }, + { + "epoch": 3.56, + "learning_rate": 8.191807878607372e-06, + "loss": 0.0212, + "step": 76360 + }, + { + "epoch": 3.56, + "learning_rate": 8.191024093552586e-06, + "loss": 0.1716, + "step": 76365 + }, + { + "epoch": 3.56, + "learning_rate": 8.190240308497798e-06, + "loss": 0.1032, + "step": 76370 + }, + { + "epoch": 3.56, + "learning_rate": 8.189456523443012e-06, + "loss": 0.0507, + "step": 76375 + }, + { + "epoch": 3.56, + "learning_rate": 8.188672738388224e-06, + "loss": 0.1214, + "step": 76380 + }, + { + "epoch": 3.56, + "learning_rate": 8.187888953333438e-06, + "loss": 0.1668, + "step": 76385 + }, + { + "epoch": 3.56, + "learning_rate": 8.187105168278652e-06, + "loss": 0.2541, + "step": 76390 + }, + { + "epoch": 3.56, + "learning_rate": 8.186321383223866e-06, + "loss": 0.2692, + "step": 76395 + }, + { + "epoch": 3.56, + "learning_rate": 8.185537598169078e-06, + "loss": 0.0889, + "step": 76400 + }, + { + "epoch": 3.57, + "learning_rate": 8.184753813114292e-06, + "loss": 0.0115, + "step": 76405 + }, + { + "epoch": 3.57, + "learning_rate": 8.183970028059506e-06, + "loss": 0.0236, + "step": 76410 + }, + { + "epoch": 3.57, + "learning_rate": 8.18318624300472e-06, + "loss": 0.022, + "step": 76415 + }, + { + "epoch": 3.57, + "learning_rate": 8.182402457949934e-06, + "loss": 0.1446, + "step": 76420 + }, + { + "epoch": 3.57, + "learning_rate": 8.181618672895146e-06, + "loss": 0.0501, + "step": 76425 + }, + { + "epoch": 3.57, + "learning_rate": 8.18083488784036e-06, + "loss": 0.1078, + "step": 76430 + }, + { + "epoch": 3.57, + "learning_rate": 8.180051102785572e-06, + "loss": 0.1363, + "step": 76435 + }, + { + "epoch": 3.57, + "learning_rate": 8.179267317730786e-06, + "loss": 0.1027, + "step": 76440 + }, + { + "epoch": 3.57, + "learning_rate": 8.178483532676e-06, + "loss": 0.3394, + "step": 76445 + }, + { + "epoch": 3.57, + "learning_rate": 8.177699747621212e-06, + "loss": 0.0626, + "step": 76450 + }, + { + "epoch": 3.57, + "learning_rate": 8.176915962566426e-06, + "loss": 0.0337, + "step": 76455 + }, + { + "epoch": 3.57, + "learning_rate": 8.17613217751164e-06, + "loss": 0.0466, + "step": 76460 + }, + { + "epoch": 3.57, + "learning_rate": 8.175348392456854e-06, + "loss": 0.0782, + "step": 76465 + }, + { + "epoch": 3.57, + "learning_rate": 8.174564607402068e-06, + "loss": 0.0538, + "step": 76470 + }, + { + "epoch": 3.57, + "learning_rate": 8.17378082234728e-06, + "loss": 0.2283, + "step": 76475 + }, + { + "epoch": 3.57, + "learning_rate": 8.172997037292494e-06, + "loss": 0.1222, + "step": 76480 + }, + { + "epoch": 3.57, + "learning_rate": 8.172213252237708e-06, + "loss": 0.1388, + "step": 76485 + }, + { + "epoch": 3.57, + "learning_rate": 8.171429467182922e-06, + "loss": 0.2528, + "step": 76490 + }, + { + "epoch": 3.57, + "learning_rate": 8.170645682128134e-06, + "loss": 0.2953, + "step": 76495 + }, + { + "epoch": 3.57, + "learning_rate": 8.169861897073346e-06, + "loss": 0.1195, + "step": 76500 + }, + { + "epoch": 3.57, + "learning_rate": 8.16907811201856e-06, + "loss": 0.029, + "step": 76505 + }, + { + "epoch": 3.57, + "learning_rate": 8.168294326963774e-06, + "loss": 0.0202, + "step": 76510 + }, + { + "epoch": 3.57, + "learning_rate": 8.167510541908988e-06, + "loss": 0.0609, + "step": 76515 + }, + { + "epoch": 3.57, + "learning_rate": 8.166726756854202e-06, + "loss": 0.0825, + "step": 76520 + }, + { + "epoch": 3.57, + "learning_rate": 8.165942971799414e-06, + "loss": 0.0671, + "step": 76525 + }, + { + "epoch": 3.57, + "learning_rate": 8.165159186744628e-06, + "loss": 0.1079, + "step": 76530 + }, + { + "epoch": 3.57, + "learning_rate": 8.164375401689842e-06, + "loss": 0.1552, + "step": 76535 + }, + { + "epoch": 3.57, + "learning_rate": 8.163591616635055e-06, + "loss": 0.2753, + "step": 76540 + }, + { + "epoch": 3.57, + "learning_rate": 8.162807831580268e-06, + "loss": 0.2377, + "step": 76545 + }, + { + "epoch": 3.57, + "learning_rate": 8.162024046525482e-06, + "loss": 0.0422, + "step": 76550 + }, + { + "epoch": 3.57, + "learning_rate": 8.161240261470696e-06, + "loss": 0.0695, + "step": 76555 + }, + { + "epoch": 3.57, + "learning_rate": 8.16045647641591e-06, + "loss": 0.0457, + "step": 76560 + }, + { + "epoch": 3.57, + "learning_rate": 8.159672691361122e-06, + "loss": 0.0343, + "step": 76565 + }, + { + "epoch": 3.57, + "learning_rate": 8.158888906306336e-06, + "loss": 0.0607, + "step": 76570 + }, + { + "epoch": 3.57, + "learning_rate": 8.158105121251548e-06, + "loss": 0.0968, + "step": 76575 + }, + { + "epoch": 3.57, + "learning_rate": 8.157321336196762e-06, + "loss": 0.0807, + "step": 76580 + }, + { + "epoch": 3.57, + "learning_rate": 8.156537551141976e-06, + "loss": 0.1224, + "step": 76585 + }, + { + "epoch": 3.57, + "learning_rate": 8.15575376608719e-06, + "loss": 0.0768, + "step": 76590 + }, + { + "epoch": 3.57, + "learning_rate": 8.154969981032402e-06, + "loss": 0.3868, + "step": 76595 + }, + { + "epoch": 3.57, + "learning_rate": 8.154186195977616e-06, + "loss": 0.0574, + "step": 76600 + }, + { + "epoch": 3.57, + "learning_rate": 8.15340241092283e-06, + "loss": 0.0198, + "step": 76605 + }, + { + "epoch": 3.57, + "learning_rate": 8.152618625868043e-06, + "loss": 0.0272, + "step": 76610 + }, + { + "epoch": 3.57, + "learning_rate": 8.151834840813256e-06, + "loss": 0.1063, + "step": 76615 + }, + { + "epoch": 3.58, + "learning_rate": 8.15105105575847e-06, + "loss": 0.0647, + "step": 76620 + }, + { + "epoch": 3.58, + "learning_rate": 8.150267270703683e-06, + "loss": 0.0933, + "step": 76625 + }, + { + "epoch": 3.58, + "learning_rate": 8.149483485648896e-06, + "loss": 0.1446, + "step": 76630 + }, + { + "epoch": 3.58, + "learning_rate": 8.14869970059411e-06, + "loss": 0.1358, + "step": 76635 + }, + { + "epoch": 3.58, + "learning_rate": 8.147915915539323e-06, + "loss": 0.2393, + "step": 76640 + }, + { + "epoch": 3.58, + "learning_rate": 8.147132130484536e-06, + "loss": 0.3171, + "step": 76645 + }, + { + "epoch": 3.58, + "learning_rate": 8.14634834542975e-06, + "loss": 0.1299, + "step": 76650 + }, + { + "epoch": 3.58, + "learning_rate": 8.145564560374963e-06, + "loss": 0.0429, + "step": 76655 + }, + { + "epoch": 3.58, + "learning_rate": 8.144780775320177e-06, + "loss": 0.0264, + "step": 76660 + }, + { + "epoch": 3.58, + "learning_rate": 8.14399699026539e-06, + "loss": 0.0446, + "step": 76665 + }, + { + "epoch": 3.58, + "learning_rate": 8.143213205210603e-06, + "loss": 0.1154, + "step": 76670 + }, + { + "epoch": 3.58, + "learning_rate": 8.142429420155817e-06, + "loss": 0.105, + "step": 76675 + }, + { + "epoch": 3.58, + "learning_rate": 8.141645635101031e-06, + "loss": 0.0613, + "step": 76680 + }, + { + "epoch": 3.58, + "learning_rate": 8.140861850046245e-06, + "loss": 0.2053, + "step": 76685 + }, + { + "epoch": 3.58, + "learning_rate": 8.140078064991457e-06, + "loss": 0.1957, + "step": 76690 + }, + { + "epoch": 3.58, + "learning_rate": 8.13929427993667e-06, + "loss": 0.2316, + "step": 76695 + }, + { + "epoch": 3.58, + "learning_rate": 8.138510494881884e-06, + "loss": 0.0859, + "step": 76700 + }, + { + "epoch": 3.58, + "learning_rate": 8.137726709827097e-06, + "loss": 0.0217, + "step": 76705 + }, + { + "epoch": 3.58, + "learning_rate": 8.136942924772311e-06, + "loss": 0.0319, + "step": 76710 + }, + { + "epoch": 3.58, + "learning_rate": 8.136159139717524e-06, + "loss": 0.0411, + "step": 76715 + }, + { + "epoch": 3.58, + "learning_rate": 8.135375354662737e-06, + "loss": 0.0563, + "step": 76720 + }, + { + "epoch": 3.58, + "learning_rate": 8.134591569607951e-06, + "loss": 0.1346, + "step": 76725 + }, + { + "epoch": 3.58, + "learning_rate": 8.133807784553165e-06, + "loss": 0.1397, + "step": 76730 + }, + { + "epoch": 3.58, + "learning_rate": 8.13302399949838e-06, + "loss": 0.0917, + "step": 76735 + }, + { + "epoch": 3.58, + "learning_rate": 8.13239697145455e-06, + "loss": 0.2303, + "step": 76740 + }, + { + "epoch": 3.58, + "learning_rate": 8.131613186399764e-06, + "loss": 0.6665, + "step": 76745 + }, + { + "epoch": 3.58, + "learning_rate": 8.130829401344976e-06, + "loss": 0.0425, + "step": 76750 + }, + { + "epoch": 3.58, + "learning_rate": 8.13004561629019e-06, + "loss": 0.0298, + "step": 76755 + }, + { + "epoch": 3.58, + "learning_rate": 8.129261831235402e-06, + "loss": 0.0255, + "step": 76760 + }, + { + "epoch": 3.58, + "learning_rate": 8.128478046180616e-06, + "loss": 0.0327, + "step": 76765 + }, + { + "epoch": 3.58, + "learning_rate": 8.12769426112583e-06, + "loss": 0.075, + "step": 76770 + }, + { + "epoch": 3.58, + "learning_rate": 8.126910476071042e-06, + "loss": 0.0938, + "step": 76775 + }, + { + "epoch": 3.58, + "learning_rate": 8.126126691016256e-06, + "loss": 0.1333, + "step": 76780 + }, + { + "epoch": 3.58, + "learning_rate": 8.12534290596147e-06, + "loss": 0.1166, + "step": 76785 + }, + { + "epoch": 3.58, + "learning_rate": 8.124559120906684e-06, + "loss": 0.1987, + "step": 76790 + }, + { + "epoch": 3.58, + "learning_rate": 8.123775335851897e-06, + "loss": 0.3264, + "step": 76795 + }, + { + "epoch": 3.58, + "learning_rate": 8.12299155079711e-06, + "loss": 0.096, + "step": 76800 + }, + { + "epoch": 3.58, + "learning_rate": 8.122207765742324e-06, + "loss": 0.034, + "step": 76805 + }, + { + "epoch": 3.58, + "learning_rate": 8.121423980687538e-06, + "loss": 0.0516, + "step": 76810 + }, + { + "epoch": 3.58, + "learning_rate": 8.120640195632751e-06, + "loss": 0.0528, + "step": 76815 + }, + { + "epoch": 3.58, + "learning_rate": 8.119856410577964e-06, + "loss": 0.0884, + "step": 76820 + }, + { + "epoch": 3.58, + "learning_rate": 8.119072625523176e-06, + "loss": 0.0855, + "step": 76825 + }, + { + "epoch": 3.58, + "learning_rate": 8.11828884046839e-06, + "loss": 0.0948, + "step": 76830 + }, + { + "epoch": 3.59, + "learning_rate": 8.117505055413604e-06, + "loss": 0.0889, + "step": 76835 + }, + { + "epoch": 3.59, + "learning_rate": 8.116721270358818e-06, + "loss": 0.2022, + "step": 76840 + }, + { + "epoch": 3.59, + "learning_rate": 8.115937485304031e-06, + "loss": 0.2287, + "step": 76845 + }, + { + "epoch": 3.59, + "learning_rate": 8.115153700249244e-06, + "loss": 0.0573, + "step": 76850 + }, + { + "epoch": 3.59, + "learning_rate": 8.114369915194458e-06, + "loss": 0.0238, + "step": 76855 + }, + { + "epoch": 3.59, + "learning_rate": 8.113586130139671e-06, + "loss": 0.0125, + "step": 76860 + }, + { + "epoch": 3.59, + "learning_rate": 8.112802345084885e-06, + "loss": 0.1019, + "step": 76865 + }, + { + "epoch": 3.59, + "learning_rate": 8.112018560030098e-06, + "loss": 0.1157, + "step": 76870 + }, + { + "epoch": 3.59, + "learning_rate": 8.111234774975312e-06, + "loss": 0.0912, + "step": 76875 + }, + { + "epoch": 3.59, + "learning_rate": 8.110450989920525e-06, + "loss": 0.1185, + "step": 76880 + }, + { + "epoch": 3.59, + "learning_rate": 8.10966720486574e-06, + "loss": 0.1433, + "step": 76885 + }, + { + "epoch": 3.59, + "learning_rate": 8.108883419810952e-06, + "loss": 0.2892, + "step": 76890 + }, + { + "epoch": 3.59, + "learning_rate": 8.108099634756165e-06, + "loss": 0.375, + "step": 76895 + }, + { + "epoch": 3.59, + "learning_rate": 8.107315849701378e-06, + "loss": 0.0706, + "step": 76900 + }, + { + "epoch": 3.59, + "learning_rate": 8.106532064646592e-06, + "loss": 0.022, + "step": 76905 + }, + { + "epoch": 3.59, + "learning_rate": 8.105748279591805e-06, + "loss": 0.0182, + "step": 76910 + }, + { + "epoch": 3.59, + "learning_rate": 8.10496449453702e-06, + "loss": 0.0763, + "step": 76915 + }, + { + "epoch": 3.59, + "learning_rate": 8.104180709482232e-06, + "loss": 0.0429, + "step": 76920 + }, + { + "epoch": 3.59, + "learning_rate": 8.103396924427445e-06, + "loss": 0.0621, + "step": 76925 + }, + { + "epoch": 3.59, + "learning_rate": 8.10261313937266e-06, + "loss": 0.0587, + "step": 76930 + }, + { + "epoch": 3.59, + "learning_rate": 8.101829354317873e-06, + "loss": 0.1526, + "step": 76935 + }, + { + "epoch": 3.59, + "learning_rate": 8.101045569263086e-06, + "loss": 0.101, + "step": 76940 + }, + { + "epoch": 3.59, + "learning_rate": 8.1002617842083e-06, + "loss": 0.213, + "step": 76945 + }, + { + "epoch": 3.59, + "learning_rate": 8.099477999153513e-06, + "loss": 0.0705, + "step": 76950 + }, + { + "epoch": 3.59, + "learning_rate": 8.098694214098726e-06, + "loss": 0.0205, + "step": 76955 + }, + { + "epoch": 3.59, + "learning_rate": 8.09791042904394e-06, + "loss": 0.0226, + "step": 76960 + }, + { + "epoch": 3.59, + "learning_rate": 8.097126643989153e-06, + "loss": 0.0564, + "step": 76965 + }, + { + "epoch": 3.59, + "learning_rate": 8.096342858934366e-06, + "loss": 0.0664, + "step": 76970 + }, + { + "epoch": 3.59, + "learning_rate": 8.09555907387958e-06, + "loss": 0.0594, + "step": 76975 + }, + { + "epoch": 3.59, + "learning_rate": 8.094775288824793e-06, + "loss": 0.1133, + "step": 76980 + }, + { + "epoch": 3.59, + "learning_rate": 8.093991503770007e-06, + "loss": 0.1443, + "step": 76985 + }, + { + "epoch": 3.59, + "learning_rate": 8.09320771871522e-06, + "loss": 0.2564, + "step": 76990 + }, + { + "epoch": 3.59, + "learning_rate": 8.092423933660433e-06, + "loss": 0.3879, + "step": 76995 + }, + { + "epoch": 3.59, + "learning_rate": 8.091640148605647e-06, + "loss": 0.0718, + "step": 77000 + }, + { + "epoch": 3.59, + "learning_rate": 8.090856363550861e-06, + "loss": 0.0181, + "step": 77005 + }, + { + "epoch": 3.59, + "learning_rate": 8.090072578496075e-06, + "loss": 0.0427, + "step": 77010 + }, + { + "epoch": 3.59, + "learning_rate": 8.089288793441287e-06, + "loss": 0.0605, + "step": 77015 + }, + { + "epoch": 3.59, + "learning_rate": 8.0885050083865e-06, + "loss": 0.0279, + "step": 77020 + }, + { + "epoch": 3.59, + "learning_rate": 8.087721223331713e-06, + "loss": 0.2084, + "step": 77025 + }, + { + "epoch": 3.59, + "learning_rate": 8.086937438276927e-06, + "loss": 0.097, + "step": 77030 + }, + { + "epoch": 3.59, + "learning_rate": 8.086153653222141e-06, + "loss": 0.1676, + "step": 77035 + }, + { + "epoch": 3.59, + "learning_rate": 8.085369868167353e-06, + "loss": 0.1195, + "step": 77040 + }, + { + "epoch": 3.6, + "learning_rate": 8.084586083112567e-06, + "loss": 0.1522, + "step": 77045 + }, + { + "epoch": 3.6, + "learning_rate": 8.083802298057781e-06, + "loss": 0.0322, + "step": 77050 + }, + { + "epoch": 3.6, + "learning_rate": 8.083018513002995e-06, + "loss": 0.035, + "step": 77055 + }, + { + "epoch": 3.6, + "learning_rate": 8.082234727948209e-06, + "loss": 0.047, + "step": 77060 + }, + { + "epoch": 3.6, + "learning_rate": 8.081450942893421e-06, + "loss": 0.0983, + "step": 77065 + }, + { + "epoch": 3.6, + "learning_rate": 8.080667157838635e-06, + "loss": 0.1496, + "step": 77070 + }, + { + "epoch": 3.6, + "learning_rate": 8.079883372783849e-06, + "loss": 0.1533, + "step": 77075 + }, + { + "epoch": 3.6, + "learning_rate": 8.079099587729063e-06, + "loss": 0.1387, + "step": 77080 + }, + { + "epoch": 3.6, + "learning_rate": 8.078315802674275e-06, + "loss": 0.0755, + "step": 77085 + }, + { + "epoch": 3.6, + "learning_rate": 8.077532017619487e-06, + "loss": 0.215, + "step": 77090 + }, + { + "epoch": 3.6, + "learning_rate": 8.076748232564701e-06, + "loss": 0.172, + "step": 77095 + }, + { + "epoch": 3.6, + "learning_rate": 8.075964447509915e-06, + "loss": 0.0512, + "step": 77100 + }, + { + "epoch": 3.6, + "learning_rate": 8.07518066245513e-06, + "loss": 0.0308, + "step": 77105 + }, + { + "epoch": 3.6, + "learning_rate": 8.074396877400343e-06, + "loss": 0.0165, + "step": 77110 + }, + { + "epoch": 3.6, + "learning_rate": 8.073613092345555e-06, + "loss": 0.0949, + "step": 77115 + }, + { + "epoch": 3.6, + "learning_rate": 8.07282930729077e-06, + "loss": 0.0572, + "step": 77120 + }, + { + "epoch": 3.6, + "learning_rate": 8.072045522235983e-06, + "loss": 0.0503, + "step": 77125 + }, + { + "epoch": 3.6, + "learning_rate": 8.071261737181197e-06, + "loss": 0.0687, + "step": 77130 + }, + { + "epoch": 3.6, + "learning_rate": 8.07047795212641e-06, + "loss": 0.1899, + "step": 77135 + }, + { + "epoch": 3.6, + "learning_rate": 8.069694167071623e-06, + "loss": 0.1388, + "step": 77140 + }, + { + "epoch": 3.6, + "learning_rate": 8.068910382016837e-06, + "loss": 0.2585, + "step": 77145 + }, + { + "epoch": 3.6, + "learning_rate": 8.06812659696205e-06, + "loss": 0.0928, + "step": 77150 + }, + { + "epoch": 3.6, + "learning_rate": 8.067342811907263e-06, + "loss": 0.082, + "step": 77155 + }, + { + "epoch": 3.6, + "learning_rate": 8.066559026852477e-06, + "loss": 0.0616, + "step": 77160 + }, + { + "epoch": 3.6, + "learning_rate": 8.06577524179769e-06, + "loss": 0.1023, + "step": 77165 + }, + { + "epoch": 3.6, + "learning_rate": 8.064991456742903e-06, + "loss": 0.0539, + "step": 77170 + }, + { + "epoch": 3.6, + "learning_rate": 8.064207671688117e-06, + "loss": 0.0532, + "step": 77175 + }, + { + "epoch": 3.6, + "learning_rate": 8.063423886633331e-06, + "loss": 0.1331, + "step": 77180 + }, + { + "epoch": 3.6, + "learning_rate": 8.062640101578543e-06, + "loss": 0.1599, + "step": 77185 + }, + { + "epoch": 3.6, + "learning_rate": 8.061856316523757e-06, + "loss": 0.2474, + "step": 77190 + }, + { + "epoch": 3.6, + "learning_rate": 8.061072531468971e-06, + "loss": 0.2996, + "step": 77195 + }, + { + "epoch": 3.6, + "learning_rate": 8.060288746414185e-06, + "loss": 0.0673, + "step": 77200 + }, + { + "epoch": 3.6, + "learning_rate": 8.059504961359397e-06, + "loss": 0.0557, + "step": 77205 + }, + { + "epoch": 3.6, + "learning_rate": 8.058721176304611e-06, + "loss": 0.0642, + "step": 77210 + }, + { + "epoch": 3.6, + "learning_rate": 8.057937391249823e-06, + "loss": 0.0275, + "step": 77215 + }, + { + "epoch": 3.6, + "learning_rate": 8.057153606195037e-06, + "loss": 0.0654, + "step": 77220 + }, + { + "epoch": 3.6, + "learning_rate": 8.056369821140251e-06, + "loss": 0.0903, + "step": 77225 + }, + { + "epoch": 3.6, + "learning_rate": 8.055586036085465e-06, + "loss": 0.0431, + "step": 77230 + }, + { + "epoch": 3.6, + "learning_rate": 8.054802251030677e-06, + "loss": 0.0962, + "step": 77235 + }, + { + "epoch": 3.6, + "learning_rate": 8.054018465975891e-06, + "loss": 0.237, + "step": 77240 + }, + { + "epoch": 3.6, + "learning_rate": 8.053234680921105e-06, + "loss": 0.3863, + "step": 77245 + }, + { + "epoch": 3.6, + "learning_rate": 8.052450895866319e-06, + "loss": 0.0608, + "step": 77250 + }, + { + "epoch": 3.6, + "learning_rate": 8.051667110811531e-06, + "loss": 0.0215, + "step": 77255 + }, + { + "epoch": 3.61, + "learning_rate": 8.050883325756745e-06, + "loss": 0.088, + "step": 77260 + }, + { + "epoch": 3.61, + "learning_rate": 8.050099540701959e-06, + "loss": 0.0515, + "step": 77265 + }, + { + "epoch": 3.61, + "learning_rate": 8.049315755647173e-06, + "loss": 0.0528, + "step": 77270 + }, + { + "epoch": 3.61, + "learning_rate": 8.048531970592387e-06, + "loss": 0.0872, + "step": 77275 + }, + { + "epoch": 3.61, + "learning_rate": 8.047748185537599e-06, + "loss": 0.1265, + "step": 77280 + }, + { + "epoch": 3.61, + "learning_rate": 8.046964400482811e-06, + "loss": 0.1019, + "step": 77285 + }, + { + "epoch": 3.61, + "learning_rate": 8.046180615428025e-06, + "loss": 0.1935, + "step": 77290 + }, + { + "epoch": 3.61, + "learning_rate": 8.045396830373239e-06, + "loss": 0.331, + "step": 77295 + }, + { + "epoch": 3.61, + "learning_rate": 8.044613045318453e-06, + "loss": 0.0256, + "step": 77300 + }, + { + "epoch": 3.61, + "learning_rate": 8.043829260263665e-06, + "loss": 0.06, + "step": 77305 + }, + { + "epoch": 3.61, + "learning_rate": 8.043045475208879e-06, + "loss": 0.0504, + "step": 77310 + }, + { + "epoch": 3.61, + "learning_rate": 8.042261690154093e-06, + "loss": 0.0804, + "step": 77315 + }, + { + "epoch": 3.61, + "learning_rate": 8.041477905099307e-06, + "loss": 0.0677, + "step": 77320 + }, + { + "epoch": 3.61, + "learning_rate": 8.04069412004452e-06, + "loss": 0.088, + "step": 77325 + }, + { + "epoch": 3.61, + "learning_rate": 8.039910334989733e-06, + "loss": 0.095, + "step": 77330 + }, + { + "epoch": 3.61, + "learning_rate": 8.039126549934947e-06, + "loss": 0.0983, + "step": 77335 + }, + { + "epoch": 3.61, + "learning_rate": 8.03834276488016e-06, + "loss": 0.204, + "step": 77340 + }, + { + "epoch": 3.61, + "learning_rate": 8.037558979825373e-06, + "loss": 0.3072, + "step": 77345 + }, + { + "epoch": 3.61, + "learning_rate": 8.036775194770587e-06, + "loss": 0.0787, + "step": 77350 + }, + { + "epoch": 3.61, + "learning_rate": 8.035991409715799e-06, + "loss": 0.0133, + "step": 77355 + }, + { + "epoch": 3.61, + "learning_rate": 8.035207624661013e-06, + "loss": 0.0537, + "step": 77360 + }, + { + "epoch": 3.61, + "learning_rate": 8.034423839606227e-06, + "loss": 0.0439, + "step": 77365 + }, + { + "epoch": 3.61, + "learning_rate": 8.03364005455144e-06, + "loss": 0.098, + "step": 77370 + }, + { + "epoch": 3.61, + "learning_rate": 8.032856269496655e-06, + "loss": 0.0948, + "step": 77375 + }, + { + "epoch": 3.61, + "learning_rate": 8.032072484441867e-06, + "loss": 0.08, + "step": 77380 + }, + { + "epoch": 3.61, + "learning_rate": 8.03128869938708e-06, + "loss": 0.0979, + "step": 77385 + }, + { + "epoch": 3.61, + "learning_rate": 8.030504914332295e-06, + "loss": 0.2572, + "step": 77390 + }, + { + "epoch": 3.61, + "learning_rate": 8.029721129277509e-06, + "loss": 0.3724, + "step": 77395 + }, + { + "epoch": 3.61, + "learning_rate": 8.02893734422272e-06, + "loss": 0.0675, + "step": 77400 + }, + { + "epoch": 3.61, + "learning_rate": 8.028153559167935e-06, + "loss": 0.0416, + "step": 77405 + }, + { + "epoch": 3.61, + "learning_rate": 8.027369774113147e-06, + "loss": 0.046, + "step": 77410 + }, + { + "epoch": 3.61, + "learning_rate": 8.02658598905836e-06, + "loss": 0.1032, + "step": 77415 + }, + { + "epoch": 3.61, + "learning_rate": 8.025802204003575e-06, + "loss": 0.0493, + "step": 77420 + }, + { + "epoch": 3.61, + "learning_rate": 8.025018418948789e-06, + "loss": 0.0605, + "step": 77425 + }, + { + "epoch": 3.61, + "learning_rate": 8.024234633894001e-06, + "loss": 0.1291, + "step": 77430 + }, + { + "epoch": 3.61, + "learning_rate": 8.023450848839215e-06, + "loss": 0.1378, + "step": 77435 + }, + { + "epoch": 3.61, + "learning_rate": 8.022667063784429e-06, + "loss": 0.1254, + "step": 77440 + }, + { + "epoch": 3.61, + "learning_rate": 8.021883278729643e-06, + "loss": 0.3442, + "step": 77445 + }, + { + "epoch": 3.61, + "learning_rate": 8.021099493674855e-06, + "loss": 0.0681, + "step": 77450 + }, + { + "epoch": 3.61, + "learning_rate": 8.020315708620069e-06, + "loss": 0.0381, + "step": 77455 + }, + { + "epoch": 3.61, + "learning_rate": 8.019531923565283e-06, + "loss": 0.0804, + "step": 77460 + }, + { + "epoch": 3.61, + "learning_rate": 8.018748138510496e-06, + "loss": 0.0281, + "step": 77465 + }, + { + "epoch": 3.61, + "learning_rate": 8.017964353455709e-06, + "loss": 0.0935, + "step": 77470 + }, + { + "epoch": 3.62, + "learning_rate": 8.017180568400923e-06, + "loss": 0.1158, + "step": 77475 + }, + { + "epoch": 3.62, + "learning_rate": 8.016396783346135e-06, + "loss": 0.1317, + "step": 77480 + }, + { + "epoch": 3.62, + "learning_rate": 8.015612998291349e-06, + "loss": 0.1924, + "step": 77485 + }, + { + "epoch": 3.62, + "learning_rate": 8.014829213236563e-06, + "loss": 0.2521, + "step": 77490 + }, + { + "epoch": 3.62, + "learning_rate": 8.014045428181777e-06, + "loss": 0.2315, + "step": 77495 + }, + { + "epoch": 3.62, + "learning_rate": 8.013261643126989e-06, + "loss": 0.0565, + "step": 77500 + }, + { + "epoch": 3.62, + "learning_rate": 8.012477858072203e-06, + "loss": 0.0091, + "step": 77505 + }, + { + "epoch": 3.62, + "learning_rate": 8.011694073017417e-06, + "loss": 0.0347, + "step": 77510 + }, + { + "epoch": 3.62, + "learning_rate": 8.01091028796263e-06, + "loss": 0.0646, + "step": 77515 + }, + { + "epoch": 3.62, + "learning_rate": 8.010126502907843e-06, + "loss": 0.0254, + "step": 77520 + }, + { + "epoch": 3.62, + "learning_rate": 8.009342717853057e-06, + "loss": 0.0536, + "step": 77525 + }, + { + "epoch": 3.62, + "learning_rate": 8.00855893279827e-06, + "loss": 0.1028, + "step": 77530 + }, + { + "epoch": 3.62, + "learning_rate": 8.007775147743484e-06, + "loss": 0.0755, + "step": 77535 + }, + { + "epoch": 3.62, + "learning_rate": 8.006991362688697e-06, + "loss": 0.19, + "step": 77540 + }, + { + "epoch": 3.62, + "learning_rate": 8.00620757763391e-06, + "loss": 0.4053, + "step": 77545 + }, + { + "epoch": 3.62, + "learning_rate": 8.005423792579123e-06, + "loss": 0.0813, + "step": 77550 + }, + { + "epoch": 3.62, + "learning_rate": 8.004640007524337e-06, + "loss": 0.0142, + "step": 77555 + }, + { + "epoch": 3.62, + "learning_rate": 8.00385622246955e-06, + "loss": 0.0355, + "step": 77560 + }, + { + "epoch": 3.62, + "learning_rate": 8.003072437414764e-06, + "loss": 0.0556, + "step": 77565 + }, + { + "epoch": 3.62, + "learning_rate": 8.002288652359977e-06, + "loss": 0.0372, + "step": 77570 + }, + { + "epoch": 3.62, + "learning_rate": 8.00150486730519e-06, + "loss": 0.054, + "step": 77575 + }, + { + "epoch": 3.62, + "learning_rate": 8.000721082250404e-06, + "loss": 0.0955, + "step": 77580 + }, + { + "epoch": 3.62, + "learning_rate": 7.999937297195618e-06, + "loss": 0.1179, + "step": 77585 + }, + { + "epoch": 3.62, + "learning_rate": 7.999153512140832e-06, + "loss": 0.1637, + "step": 77590 + }, + { + "epoch": 3.62, + "learning_rate": 7.998369727086044e-06, + "loss": 0.2502, + "step": 77595 + }, + { + "epoch": 3.62, + "learning_rate": 7.997585942031258e-06, + "loss": 0.0943, + "step": 77600 + }, + { + "epoch": 3.62, + "learning_rate": 7.99680215697647e-06, + "loss": 0.0234, + "step": 77605 + }, + { + "epoch": 3.62, + "learning_rate": 7.996018371921685e-06, + "loss": 0.0662, + "step": 77610 + }, + { + "epoch": 3.62, + "learning_rate": 7.995234586866898e-06, + "loss": 0.0476, + "step": 77615 + }, + { + "epoch": 3.62, + "learning_rate": 7.99445080181211e-06, + "loss": 0.0984, + "step": 77620 + }, + { + "epoch": 3.62, + "learning_rate": 7.993667016757325e-06, + "loss": 0.0983, + "step": 77625 + }, + { + "epoch": 3.62, + "learning_rate": 7.992883231702538e-06, + "loss": 0.0697, + "step": 77630 + }, + { + "epoch": 3.62, + "learning_rate": 7.992099446647752e-06, + "loss": 0.0726, + "step": 77635 + }, + { + "epoch": 3.62, + "learning_rate": 7.991315661592966e-06, + "loss": 0.1864, + "step": 77640 + }, + { + "epoch": 3.62, + "learning_rate": 7.990531876538178e-06, + "loss": 0.2873, + "step": 77645 + }, + { + "epoch": 3.62, + "learning_rate": 7.989748091483392e-06, + "loss": 0.0721, + "step": 77650 + }, + { + "epoch": 3.62, + "learning_rate": 7.988964306428606e-06, + "loss": 0.0206, + "step": 77655 + }, + { + "epoch": 3.62, + "learning_rate": 7.98818052137382e-06, + "loss": 0.0385, + "step": 77660 + }, + { + "epoch": 3.62, + "learning_rate": 7.987396736319032e-06, + "loss": 0.0535, + "step": 77665 + }, + { + "epoch": 3.62, + "learning_rate": 7.986612951264245e-06, + "loss": 0.0577, + "step": 77670 + }, + { + "epoch": 3.62, + "learning_rate": 7.985829166209459e-06, + "loss": 0.0842, + "step": 77675 + }, + { + "epoch": 3.62, + "learning_rate": 7.985045381154672e-06, + "loss": 0.0724, + "step": 77680 + }, + { + "epoch": 3.62, + "learning_rate": 7.984261596099886e-06, + "loss": 0.0749, + "step": 77685 + }, + { + "epoch": 3.63, + "learning_rate": 7.9834778110451e-06, + "loss": 0.1389, + "step": 77690 + }, + { + "epoch": 3.63, + "learning_rate": 7.982694025990312e-06, + "loss": 0.2554, + "step": 77695 + }, + { + "epoch": 3.63, + "learning_rate": 7.981910240935526e-06, + "loss": 0.0241, + "step": 77700 + }, + { + "epoch": 3.63, + "learning_rate": 7.98112645588074e-06, + "loss": 0.0397, + "step": 77705 + }, + { + "epoch": 3.63, + "learning_rate": 7.980342670825954e-06, + "loss": 0.0711, + "step": 77710 + }, + { + "epoch": 3.63, + "learning_rate": 7.979558885771166e-06, + "loss": 0.0933, + "step": 77715 + }, + { + "epoch": 3.63, + "learning_rate": 7.97877510071638e-06, + "loss": 0.0517, + "step": 77720 + }, + { + "epoch": 3.63, + "learning_rate": 7.977991315661594e-06, + "loss": 0.1226, + "step": 77725 + }, + { + "epoch": 3.63, + "learning_rate": 7.977207530606808e-06, + "loss": 0.1274, + "step": 77730 + }, + { + "epoch": 3.63, + "learning_rate": 7.97642374555202e-06, + "loss": 0.1621, + "step": 77735 + }, + { + "epoch": 3.63, + "learning_rate": 7.975639960497234e-06, + "loss": 0.279, + "step": 77740 + }, + { + "epoch": 3.63, + "learning_rate": 7.974856175442446e-06, + "loss": 0.2862, + "step": 77745 + }, + { + "epoch": 3.63, + "learning_rate": 7.97407239038766e-06, + "loss": 0.0266, + "step": 77750 + }, + { + "epoch": 3.63, + "learning_rate": 7.973288605332874e-06, + "loss": 0.0083, + "step": 77755 + }, + { + "epoch": 3.63, + "learning_rate": 7.972504820278088e-06, + "loss": 0.0477, + "step": 77760 + }, + { + "epoch": 3.63, + "learning_rate": 7.9717210352233e-06, + "loss": 0.0244, + "step": 77765 + }, + { + "epoch": 3.63, + "learning_rate": 7.970937250168514e-06, + "loss": 0.0371, + "step": 77770 + }, + { + "epoch": 3.63, + "learning_rate": 7.970153465113728e-06, + "loss": 0.1224, + "step": 77775 + }, + { + "epoch": 3.63, + "learning_rate": 7.969369680058942e-06, + "loss": 0.1255, + "step": 77780 + }, + { + "epoch": 3.63, + "learning_rate": 7.968585895004154e-06, + "loss": 0.1138, + "step": 77785 + }, + { + "epoch": 3.63, + "learning_rate": 7.967802109949368e-06, + "loss": 0.174, + "step": 77790 + }, + { + "epoch": 3.63, + "learning_rate": 7.967018324894582e-06, + "loss": 0.2551, + "step": 77795 + }, + { + "epoch": 3.63, + "learning_rate": 7.966234539839794e-06, + "loss": 0.0698, + "step": 77800 + }, + { + "epoch": 3.63, + "learning_rate": 7.965450754785008e-06, + "loss": 0.0357, + "step": 77805 + }, + { + "epoch": 3.63, + "learning_rate": 7.964666969730222e-06, + "loss": 0.0263, + "step": 77810 + }, + { + "epoch": 3.63, + "learning_rate": 7.963883184675434e-06, + "loss": 0.0402, + "step": 77815 + }, + { + "epoch": 3.63, + "learning_rate": 7.963099399620648e-06, + "loss": 0.0769, + "step": 77820 + }, + { + "epoch": 3.63, + "learning_rate": 7.962315614565862e-06, + "loss": 0.0841, + "step": 77825 + }, + { + "epoch": 3.63, + "learning_rate": 7.961531829511076e-06, + "loss": 0.1091, + "step": 77830 + }, + { + "epoch": 3.63, + "learning_rate": 7.960748044456288e-06, + "loss": 0.0706, + "step": 77835 + }, + { + "epoch": 3.63, + "learning_rate": 7.959964259401502e-06, + "loss": 0.1679, + "step": 77840 + }, + { + "epoch": 3.63, + "learning_rate": 7.959180474346716e-06, + "loss": 0.2807, + "step": 77845 + }, + { + "epoch": 3.63, + "learning_rate": 7.95839668929193e-06, + "loss": 0.0608, + "step": 77850 + }, + { + "epoch": 3.63, + "learning_rate": 7.957612904237144e-06, + "loss": 0.0379, + "step": 77855 + }, + { + "epoch": 3.63, + "learning_rate": 7.956829119182356e-06, + "loss": 0.012, + "step": 77860 + }, + { + "epoch": 3.63, + "learning_rate": 7.956045334127568e-06, + "loss": 0.1044, + "step": 77865 + }, + { + "epoch": 3.63, + "learning_rate": 7.955261549072782e-06, + "loss": 0.0755, + "step": 77870 + }, + { + "epoch": 3.63, + "learning_rate": 7.954477764017996e-06, + "loss": 0.1034, + "step": 77875 + }, + { + "epoch": 3.63, + "learning_rate": 7.95369397896321e-06, + "loss": 0.078, + "step": 77880 + }, + { + "epoch": 3.63, + "learning_rate": 7.952910193908422e-06, + "loss": 0.1178, + "step": 77885 + }, + { + "epoch": 3.63, + "learning_rate": 7.952126408853636e-06, + "loss": 0.2689, + "step": 77890 + }, + { + "epoch": 3.63, + "learning_rate": 7.95134262379885e-06, + "loss": 0.297, + "step": 77895 + }, + { + "epoch": 3.63, + "learning_rate": 7.950558838744064e-06, + "loss": 0.1267, + "step": 77900 + }, + { + "epoch": 3.64, + "learning_rate": 7.949775053689278e-06, + "loss": 0.0355, + "step": 77905 + }, + { + "epoch": 3.64, + "learning_rate": 7.94899126863449e-06, + "loss": 0.0595, + "step": 77910 + }, + { + "epoch": 3.64, + "learning_rate": 7.948207483579704e-06, + "loss": 0.0467, + "step": 77915 + }, + { + "epoch": 3.64, + "learning_rate": 7.947423698524918e-06, + "loss": 0.0638, + "step": 77920 + }, + { + "epoch": 3.64, + "learning_rate": 7.946639913470132e-06, + "loss": 0.0661, + "step": 77925 + }, + { + "epoch": 3.64, + "learning_rate": 7.945856128415344e-06, + "loss": 0.0676, + "step": 77930 + }, + { + "epoch": 3.64, + "learning_rate": 7.945072343360556e-06, + "loss": 0.123, + "step": 77935 + }, + { + "epoch": 3.64, + "learning_rate": 7.94428855830577e-06, + "loss": 0.2575, + "step": 77940 + }, + { + "epoch": 3.64, + "learning_rate": 7.943504773250984e-06, + "loss": 0.2391, + "step": 77945 + }, + { + "epoch": 3.64, + "learning_rate": 7.942720988196198e-06, + "loss": 0.0485, + "step": 77950 + }, + { + "epoch": 3.64, + "learning_rate": 7.941937203141412e-06, + "loss": 0.0493, + "step": 77955 + }, + { + "epoch": 3.64, + "learning_rate": 7.941153418086624e-06, + "loss": 0.0492, + "step": 77960 + }, + { + "epoch": 3.64, + "learning_rate": 7.940369633031838e-06, + "loss": 0.0334, + "step": 77965 + }, + { + "epoch": 3.64, + "learning_rate": 7.939585847977052e-06, + "loss": 0.0832, + "step": 77970 + }, + { + "epoch": 3.64, + "learning_rate": 7.938802062922266e-06, + "loss": 0.0248, + "step": 77975 + }, + { + "epoch": 3.64, + "learning_rate": 7.938018277867478e-06, + "loss": 0.0332, + "step": 77980 + }, + { + "epoch": 3.64, + "learning_rate": 7.937234492812692e-06, + "loss": 0.1669, + "step": 77985 + }, + { + "epoch": 3.64, + "learning_rate": 7.936450707757906e-06, + "loss": 0.2448, + "step": 77990 + }, + { + "epoch": 3.64, + "learning_rate": 7.935666922703118e-06, + "loss": 0.3489, + "step": 77995 + }, + { + "epoch": 3.64, + "learning_rate": 7.934883137648332e-06, + "loss": 0.0499, + "step": 78000 + }, + { + "epoch": 3.64, + "learning_rate": 7.934099352593546e-06, + "loss": 0.0775, + "step": 78005 + }, + { + "epoch": 3.64, + "learning_rate": 7.933315567538758e-06, + "loss": 0.0262, + "step": 78010 + }, + { + "epoch": 3.64, + "learning_rate": 7.932531782483972e-06, + "loss": 0.0921, + "step": 78015 + }, + { + "epoch": 3.64, + "learning_rate": 7.931747997429186e-06, + "loss": 0.0541, + "step": 78020 + }, + { + "epoch": 3.64, + "learning_rate": 7.9309642123744e-06, + "loss": 0.1125, + "step": 78025 + }, + { + "epoch": 3.64, + "learning_rate": 7.930180427319612e-06, + "loss": 0.083, + "step": 78030 + }, + { + "epoch": 3.64, + "learning_rate": 7.929396642264826e-06, + "loss": 0.0961, + "step": 78035 + }, + { + "epoch": 3.64, + "learning_rate": 7.92861285721004e-06, + "loss": 0.1987, + "step": 78040 + }, + { + "epoch": 3.64, + "learning_rate": 7.927829072155254e-06, + "loss": 0.3407, + "step": 78045 + }, + { + "epoch": 3.64, + "learning_rate": 7.927045287100466e-06, + "loss": 0.0674, + "step": 78050 + }, + { + "epoch": 3.64, + "learning_rate": 7.92626150204568e-06, + "loss": 0.0582, + "step": 78055 + }, + { + "epoch": 3.64, + "learning_rate": 7.925477716990892e-06, + "loss": 0.0877, + "step": 78060 + }, + { + "epoch": 3.64, + "learning_rate": 7.924693931936106e-06, + "loss": 0.6231, + "step": 78065 + }, + { + "epoch": 3.64, + "learning_rate": 7.92391014688132e-06, + "loss": 0.0963, + "step": 78070 + }, + { + "epoch": 3.64, + "learning_rate": 7.923126361826534e-06, + "loss": 0.0848, + "step": 78075 + }, + { + "epoch": 3.64, + "learning_rate": 7.922342576771746e-06, + "loss": 0.119, + "step": 78080 + }, + { + "epoch": 3.64, + "learning_rate": 7.92155879171696e-06, + "loss": 0.1368, + "step": 78085 + }, + { + "epoch": 3.64, + "learning_rate": 7.920775006662174e-06, + "loss": 0.299, + "step": 78090 + }, + { + "epoch": 3.64, + "learning_rate": 7.919991221607388e-06, + "loss": 0.2911, + "step": 78095 + }, + { + "epoch": 3.64, + "learning_rate": 7.9192074365526e-06, + "loss": 0.0824, + "step": 78100 + }, + { + "epoch": 3.64, + "learning_rate": 7.918423651497814e-06, + "loss": 0.0767, + "step": 78105 + }, + { + "epoch": 3.64, + "learning_rate": 7.917639866443028e-06, + "loss": 0.0962, + "step": 78110 + }, + { + "epoch": 3.64, + "learning_rate": 7.916856081388242e-06, + "loss": 0.0316, + "step": 78115 + }, + { + "epoch": 3.65, + "learning_rate": 7.916072296333455e-06, + "loss": 0.1537, + "step": 78120 + }, + { + "epoch": 3.65, + "learning_rate": 7.915288511278668e-06, + "loss": 0.1014, + "step": 78125 + }, + { + "epoch": 3.65, + "learning_rate": 7.91450472622388e-06, + "loss": 0.0757, + "step": 78130 + }, + { + "epoch": 3.65, + "learning_rate": 7.913720941169094e-06, + "loss": 0.1169, + "step": 78135 + }, + { + "epoch": 3.65, + "learning_rate": 7.912937156114308e-06, + "loss": 0.1633, + "step": 78140 + }, + { + "epoch": 3.65, + "learning_rate": 7.912153371059522e-06, + "loss": 0.301, + "step": 78145 + }, + { + "epoch": 3.65, + "learning_rate": 7.911369586004734e-06, + "loss": 0.07, + "step": 78150 + }, + { + "epoch": 3.65, + "learning_rate": 7.910585800949948e-06, + "loss": 0.0381, + "step": 78155 + }, + { + "epoch": 3.65, + "learning_rate": 7.909802015895162e-06, + "loss": 0.0312, + "step": 78160 + }, + { + "epoch": 3.65, + "learning_rate": 7.909018230840376e-06, + "loss": 0.0264, + "step": 78165 + }, + { + "epoch": 3.65, + "learning_rate": 7.90823444578559e-06, + "loss": 0.0543, + "step": 78170 + }, + { + "epoch": 3.65, + "learning_rate": 7.907450660730802e-06, + "loss": 0.0919, + "step": 78175 + }, + { + "epoch": 3.65, + "learning_rate": 7.906666875676016e-06, + "loss": 0.1097, + "step": 78180 + }, + { + "epoch": 3.65, + "learning_rate": 7.90588309062123e-06, + "loss": 0.0907, + "step": 78185 + }, + { + "epoch": 3.65, + "learning_rate": 7.905099305566442e-06, + "loss": 0.1474, + "step": 78190 + }, + { + "epoch": 3.65, + "learning_rate": 7.904315520511656e-06, + "loss": 0.2071, + "step": 78195 + }, + { + "epoch": 3.65, + "learning_rate": 7.903531735456868e-06, + "loss": 0.0569, + "step": 78200 + }, + { + "epoch": 3.65, + "learning_rate": 7.902747950402082e-06, + "loss": 0.0304, + "step": 78205 + }, + { + "epoch": 3.65, + "learning_rate": 7.901964165347296e-06, + "loss": 0.0876, + "step": 78210 + }, + { + "epoch": 3.65, + "learning_rate": 7.90118038029251e-06, + "loss": 0.0544, + "step": 78215 + }, + { + "epoch": 3.65, + "learning_rate": 7.900396595237723e-06, + "loss": 0.0574, + "step": 78220 + }, + { + "epoch": 3.65, + "learning_rate": 7.899612810182936e-06, + "loss": 0.0921, + "step": 78225 + }, + { + "epoch": 3.65, + "learning_rate": 7.89882902512815e-06, + "loss": 0.1187, + "step": 78230 + }, + { + "epoch": 3.65, + "learning_rate": 7.898045240073363e-06, + "loss": 0.1302, + "step": 78235 + }, + { + "epoch": 3.65, + "learning_rate": 7.897261455018577e-06, + "loss": 0.1524, + "step": 78240 + }, + { + "epoch": 3.65, + "learning_rate": 7.89647766996379e-06, + "loss": 0.2349, + "step": 78245 + }, + { + "epoch": 3.65, + "learning_rate": 7.895693884909003e-06, + "loss": 0.0869, + "step": 78250 + }, + { + "epoch": 3.65, + "learning_rate": 7.894910099854216e-06, + "loss": 0.0222, + "step": 78255 + }, + { + "epoch": 3.65, + "learning_rate": 7.89412631479943e-06, + "loss": 0.0833, + "step": 78260 + }, + { + "epoch": 3.65, + "learning_rate": 7.893342529744643e-06, + "loss": 0.0476, + "step": 78265 + }, + { + "epoch": 3.65, + "learning_rate": 7.892558744689857e-06, + "loss": 0.063, + "step": 78270 + }, + { + "epoch": 3.65, + "learning_rate": 7.89177495963507e-06, + "loss": 0.085, + "step": 78275 + }, + { + "epoch": 3.65, + "learning_rate": 7.890991174580284e-06, + "loss": 0.1516, + "step": 78280 + }, + { + "epoch": 3.65, + "learning_rate": 7.890207389525497e-06, + "loss": 0.0996, + "step": 78285 + }, + { + "epoch": 3.65, + "learning_rate": 7.889423604470711e-06, + "loss": 0.178, + "step": 78290 + }, + { + "epoch": 3.65, + "learning_rate": 7.888639819415924e-06, + "loss": 0.2889, + "step": 78295 + }, + { + "epoch": 3.65, + "learning_rate": 7.887856034361137e-06, + "loss": 0.0732, + "step": 78300 + }, + { + "epoch": 3.65, + "learning_rate": 7.887072249306351e-06, + "loss": 0.0353, + "step": 78305 + }, + { + "epoch": 3.65, + "learning_rate": 7.886288464251565e-06, + "loss": 0.0454, + "step": 78310 + }, + { + "epoch": 3.65, + "learning_rate": 7.885504679196777e-06, + "loss": 0.0422, + "step": 78315 + }, + { + "epoch": 3.65, + "learning_rate": 7.884720894141991e-06, + "loss": 0.073, + "step": 78320 + }, + { + "epoch": 3.65, + "learning_rate": 7.883937109087204e-06, + "loss": 0.0793, + "step": 78325 + }, + { + "epoch": 3.65, + "learning_rate": 7.883153324032417e-06, + "loss": 0.0979, + "step": 78330 + }, + { + "epoch": 3.66, + "learning_rate": 7.882369538977631e-06, + "loss": 0.1539, + "step": 78335 + }, + { + "epoch": 3.66, + "learning_rate": 7.881585753922845e-06, + "loss": 0.2239, + "step": 78340 + }, + { + "epoch": 3.66, + "learning_rate": 7.880801968868058e-06, + "loss": 0.2689, + "step": 78345 + }, + { + "epoch": 3.66, + "learning_rate": 7.880018183813271e-06, + "loss": 0.0597, + "step": 78350 + }, + { + "epoch": 3.66, + "learning_rate": 7.879234398758485e-06, + "loss": 0.0656, + "step": 78355 + }, + { + "epoch": 3.66, + "learning_rate": 7.8784506137037e-06, + "loss": 0.0343, + "step": 78360 + }, + { + "epoch": 3.66, + "learning_rate": 7.877666828648911e-06, + "loss": 0.0812, + "step": 78365 + }, + { + "epoch": 3.66, + "learning_rate": 7.876883043594125e-06, + "loss": 0.0899, + "step": 78370 + }, + { + "epoch": 3.66, + "learning_rate": 7.87609925853934e-06, + "loss": 0.0821, + "step": 78375 + }, + { + "epoch": 3.66, + "learning_rate": 7.875315473484553e-06, + "loss": 0.0868, + "step": 78380 + }, + { + "epoch": 3.66, + "learning_rate": 7.874531688429765e-06, + "loss": 0.0812, + "step": 78385 + }, + { + "epoch": 3.66, + "learning_rate": 7.87374790337498e-06, + "loss": 0.1704, + "step": 78390 + }, + { + "epoch": 3.66, + "learning_rate": 7.872964118320191e-06, + "loss": 0.4426, + "step": 78395 + }, + { + "epoch": 3.66, + "learning_rate": 7.872180333265405e-06, + "loss": 0.0825, + "step": 78400 + }, + { + "epoch": 3.66, + "learning_rate": 7.87139654821062e-06, + "loss": 0.0304, + "step": 78405 + }, + { + "epoch": 3.66, + "learning_rate": 7.870612763155833e-06, + "loss": 0.0108, + "step": 78410 + }, + { + "epoch": 3.66, + "learning_rate": 7.869828978101045e-06, + "loss": 0.061, + "step": 78415 + }, + { + "epoch": 3.66, + "learning_rate": 7.86904519304626e-06, + "loss": 0.044, + "step": 78420 + }, + { + "epoch": 3.66, + "learning_rate": 7.868261407991473e-06, + "loss": 0.1055, + "step": 78425 + }, + { + "epoch": 3.66, + "learning_rate": 7.867477622936687e-06, + "loss": 0.0752, + "step": 78430 + }, + { + "epoch": 3.66, + "learning_rate": 7.866693837881901e-06, + "loss": 0.1773, + "step": 78435 + }, + { + "epoch": 3.66, + "learning_rate": 7.865910052827113e-06, + "loss": 0.1699, + "step": 78440 + }, + { + "epoch": 3.66, + "learning_rate": 7.865126267772327e-06, + "loss": 0.1484, + "step": 78445 + }, + { + "epoch": 3.66, + "learning_rate": 7.86434248271754e-06, + "loss": 0.0886, + "step": 78450 + }, + { + "epoch": 3.66, + "learning_rate": 7.863558697662753e-06, + "loss": 0.028, + "step": 78455 + }, + { + "epoch": 3.66, + "learning_rate": 7.862774912607967e-06, + "loss": 0.0327, + "step": 78460 + }, + { + "epoch": 3.66, + "learning_rate": 7.86199112755318e-06, + "loss": 0.0557, + "step": 78465 + }, + { + "epoch": 3.66, + "learning_rate": 7.861207342498393e-06, + "loss": 0.068, + "step": 78470 + }, + { + "epoch": 3.66, + "learning_rate": 7.860423557443607e-06, + "loss": 0.0406, + "step": 78475 + }, + { + "epoch": 3.66, + "learning_rate": 7.859639772388821e-06, + "loss": 0.1129, + "step": 78480 + }, + { + "epoch": 3.66, + "learning_rate": 7.858855987334035e-06, + "loss": 0.0918, + "step": 78485 + }, + { + "epoch": 3.66, + "learning_rate": 7.858072202279247e-06, + "loss": 0.1735, + "step": 78490 + }, + { + "epoch": 3.66, + "learning_rate": 7.857288417224461e-06, + "loss": 0.2278, + "step": 78495 + }, + { + "epoch": 3.66, + "learning_rate": 7.856504632169675e-06, + "loss": 0.063, + "step": 78500 + }, + { + "epoch": 3.66, + "learning_rate": 7.855720847114889e-06, + "loss": 0.0642, + "step": 78505 + }, + { + "epoch": 3.66, + "learning_rate": 7.854937062060101e-06, + "loss": 0.029, + "step": 78510 + }, + { + "epoch": 3.66, + "learning_rate": 7.854153277005313e-06, + "loss": 0.0382, + "step": 78515 + }, + { + "epoch": 3.66, + "learning_rate": 7.853369491950527e-06, + "loss": 0.0576, + "step": 78520 + }, + { + "epoch": 3.66, + "learning_rate": 7.852585706895741e-06, + "loss": 0.096, + "step": 78525 + }, + { + "epoch": 3.66, + "learning_rate": 7.851801921840955e-06, + "loss": 0.0674, + "step": 78530 + }, + { + "epoch": 3.66, + "learning_rate": 7.851018136786169e-06, + "loss": 0.2253, + "step": 78535 + }, + { + "epoch": 3.66, + "learning_rate": 7.850234351731381e-06, + "loss": 0.1518, + "step": 78540 + }, + { + "epoch": 3.67, + "learning_rate": 7.849450566676595e-06, + "loss": 0.386, + "step": 78545 + }, + { + "epoch": 3.67, + "learning_rate": 7.848666781621809e-06, + "loss": 0.0507, + "step": 78550 + }, + { + "epoch": 3.67, + "learning_rate": 7.847882996567023e-06, + "loss": 0.0119, + "step": 78555 + }, + { + "epoch": 3.67, + "learning_rate": 7.847099211512235e-06, + "loss": 0.0605, + "step": 78560 + }, + { + "epoch": 3.67, + "learning_rate": 7.846315426457449e-06, + "loss": 0.0267, + "step": 78565 + }, + { + "epoch": 3.67, + "learning_rate": 7.845531641402663e-06, + "loss": 0.054, + "step": 78570 + }, + { + "epoch": 3.67, + "learning_rate": 7.844747856347877e-06, + "loss": 0.1297, + "step": 78575 + }, + { + "epoch": 3.67, + "learning_rate": 7.843964071293089e-06, + "loss": 0.1203, + "step": 78580 + }, + { + "epoch": 3.67, + "learning_rate": 7.843180286238303e-06, + "loss": 0.0647, + "step": 78585 + }, + { + "epoch": 3.67, + "learning_rate": 7.842396501183515e-06, + "loss": 0.1628, + "step": 78590 + }, + { + "epoch": 3.67, + "learning_rate": 7.841612716128729e-06, + "loss": 0.3329, + "step": 78595 + }, + { + "epoch": 3.67, + "learning_rate": 7.840828931073943e-06, + "loss": 0.05, + "step": 78600 + }, + { + "epoch": 3.67, + "learning_rate": 7.840045146019157e-06, + "loss": 0.0131, + "step": 78605 + }, + { + "epoch": 3.67, + "learning_rate": 7.839261360964369e-06, + "loss": 0.0388, + "step": 78610 + }, + { + "epoch": 3.67, + "learning_rate": 7.838477575909583e-06, + "loss": 0.0757, + "step": 78615 + }, + { + "epoch": 3.67, + "learning_rate": 7.837693790854797e-06, + "loss": 0.0546, + "step": 78620 + }, + { + "epoch": 3.67, + "learning_rate": 7.83691000580001e-06, + "loss": 0.1291, + "step": 78625 + }, + { + "epoch": 3.67, + "learning_rate": 7.836126220745223e-06, + "loss": 0.0914, + "step": 78630 + }, + { + "epoch": 3.67, + "learning_rate": 7.835342435690437e-06, + "loss": 0.2011, + "step": 78635 + }, + { + "epoch": 3.67, + "learning_rate": 7.834558650635651e-06, + "loss": 0.1913, + "step": 78640 + }, + { + "epoch": 3.67, + "learning_rate": 7.833774865580863e-06, + "loss": 0.3214, + "step": 78645 + }, + { + "epoch": 3.67, + "learning_rate": 7.832991080526077e-06, + "loss": 0.0318, + "step": 78650 + }, + { + "epoch": 3.67, + "learning_rate": 7.832207295471291e-06, + "loss": 0.0228, + "step": 78655 + }, + { + "epoch": 3.67, + "learning_rate": 7.831423510416503e-06, + "loss": 0.0552, + "step": 78660 + }, + { + "epoch": 3.67, + "learning_rate": 7.830639725361717e-06, + "loss": 0.0665, + "step": 78665 + }, + { + "epoch": 3.67, + "learning_rate": 7.829855940306931e-06, + "loss": 0.0686, + "step": 78670 + }, + { + "epoch": 3.67, + "learning_rate": 7.829072155252145e-06, + "loss": 0.0723, + "step": 78675 + }, + { + "epoch": 3.67, + "learning_rate": 7.828288370197357e-06, + "loss": 0.1752, + "step": 78680 + }, + { + "epoch": 3.67, + "learning_rate": 7.827504585142571e-06, + "loss": 0.1152, + "step": 78685 + }, + { + "epoch": 3.67, + "learning_rate": 7.826720800087785e-06, + "loss": 0.1724, + "step": 78690 + }, + { + "epoch": 3.67, + "learning_rate": 7.825937015032999e-06, + "loss": 0.1554, + "step": 78695 + }, + { + "epoch": 3.67, + "learning_rate": 7.825153229978213e-06, + "loss": 0.054, + "step": 78700 + }, + { + "epoch": 3.67, + "learning_rate": 7.824369444923425e-06, + "loss": 0.0047, + "step": 78705 + }, + { + "epoch": 3.67, + "learning_rate": 7.823585659868637e-06, + "loss": 0.0628, + "step": 78710 + }, + { + "epoch": 3.67, + "learning_rate": 7.822801874813851e-06, + "loss": 0.1317, + "step": 78715 + }, + { + "epoch": 3.67, + "learning_rate": 7.822018089759065e-06, + "loss": 0.0651, + "step": 78720 + }, + { + "epoch": 3.67, + "learning_rate": 7.821234304704279e-06, + "loss": 0.0526, + "step": 78725 + }, + { + "epoch": 3.67, + "learning_rate": 7.820450519649491e-06, + "loss": 0.0481, + "step": 78730 + }, + { + "epoch": 3.67, + "learning_rate": 7.819666734594705e-06, + "loss": 0.1255, + "step": 78735 + }, + { + "epoch": 3.67, + "learning_rate": 7.818882949539919e-06, + "loss": 0.2002, + "step": 78740 + }, + { + "epoch": 3.67, + "learning_rate": 7.818099164485133e-06, + "loss": 0.2313, + "step": 78745 + }, + { + "epoch": 3.67, + "learning_rate": 7.817315379430347e-06, + "loss": 0.1013, + "step": 78750 + }, + { + "epoch": 3.67, + "learning_rate": 7.816531594375559e-06, + "loss": 0.0301, + "step": 78755 + }, + { + "epoch": 3.68, + "learning_rate": 7.815747809320773e-06, + "loss": 0.0135, + "step": 78760 + }, + { + "epoch": 3.68, + "learning_rate": 7.814964024265987e-06, + "loss": 0.0525, + "step": 78765 + }, + { + "epoch": 3.68, + "learning_rate": 7.8141802392112e-06, + "loss": 0.1247, + "step": 78770 + }, + { + "epoch": 3.68, + "learning_rate": 7.813396454156413e-06, + "loss": 0.0658, + "step": 78775 + }, + { + "epoch": 3.68, + "learning_rate": 7.812612669101625e-06, + "loss": 0.1077, + "step": 78780 + }, + { + "epoch": 3.68, + "learning_rate": 7.811828884046839e-06, + "loss": 0.1567, + "step": 78785 + }, + { + "epoch": 3.68, + "learning_rate": 7.811045098992053e-06, + "loss": 0.1781, + "step": 78790 + }, + { + "epoch": 3.68, + "learning_rate": 7.810261313937267e-06, + "loss": 0.2068, + "step": 78795 + }, + { + "epoch": 3.68, + "learning_rate": 7.80947752888248e-06, + "loss": 0.1577, + "step": 78800 + }, + { + "epoch": 3.68, + "learning_rate": 7.808693743827693e-06, + "loss": 0.0462, + "step": 78805 + }, + { + "epoch": 3.68, + "learning_rate": 7.807909958772907e-06, + "loss": 0.0606, + "step": 78810 + }, + { + "epoch": 3.68, + "learning_rate": 7.80712617371812e-06, + "loss": 0.0329, + "step": 78815 + }, + { + "epoch": 3.68, + "learning_rate": 7.806342388663335e-06, + "loss": 0.0622, + "step": 78820 + }, + { + "epoch": 3.68, + "learning_rate": 7.805558603608547e-06, + "loss": 0.2249, + "step": 78825 + }, + { + "epoch": 3.68, + "learning_rate": 7.80477481855376e-06, + "loss": 0.1718, + "step": 78830 + }, + { + "epoch": 3.68, + "learning_rate": 7.803991033498975e-06, + "loss": 0.1659, + "step": 78835 + }, + { + "epoch": 3.68, + "learning_rate": 7.803207248444187e-06, + "loss": 0.1572, + "step": 78840 + }, + { + "epoch": 3.68, + "learning_rate": 7.8024234633894e-06, + "loss": 0.3135, + "step": 78845 + }, + { + "epoch": 3.68, + "learning_rate": 7.801639678334615e-06, + "loss": 0.0707, + "step": 78850 + }, + { + "epoch": 3.68, + "learning_rate": 7.800855893279827e-06, + "loss": 0.0092, + "step": 78855 + }, + { + "epoch": 3.68, + "learning_rate": 7.80007210822504e-06, + "loss": 0.061, + "step": 78860 + }, + { + "epoch": 3.68, + "learning_rate": 7.799288323170255e-06, + "loss": 0.0245, + "step": 78865 + }, + { + "epoch": 3.68, + "learning_rate": 7.798504538115468e-06, + "loss": 0.0909, + "step": 78870 + }, + { + "epoch": 3.68, + "learning_rate": 7.79772075306068e-06, + "loss": 0.1077, + "step": 78875 + }, + { + "epoch": 3.68, + "learning_rate": 7.796936968005895e-06, + "loss": 0.0799, + "step": 78880 + }, + { + "epoch": 3.68, + "learning_rate": 7.796153182951109e-06, + "loss": 0.1157, + "step": 78885 + }, + { + "epoch": 3.68, + "learning_rate": 7.795369397896322e-06, + "loss": 0.2334, + "step": 78890 + }, + { + "epoch": 3.68, + "learning_rate": 7.794585612841535e-06, + "loss": 0.4019, + "step": 78895 + }, + { + "epoch": 3.68, + "learning_rate": 7.793801827786749e-06, + "loss": 0.0946, + "step": 78900 + }, + { + "epoch": 3.68, + "learning_rate": 7.79301804273196e-06, + "loss": 0.0261, + "step": 78905 + }, + { + "epoch": 3.68, + "learning_rate": 7.792234257677175e-06, + "loss": 0.0345, + "step": 78910 + }, + { + "epoch": 3.68, + "learning_rate": 7.791450472622389e-06, + "loss": 0.0157, + "step": 78915 + }, + { + "epoch": 3.68, + "learning_rate": 7.790666687567602e-06, + "loss": 0.0677, + "step": 78920 + }, + { + "epoch": 3.68, + "learning_rate": 7.789882902512815e-06, + "loss": 0.1063, + "step": 78925 + }, + { + "epoch": 3.68, + "learning_rate": 7.789099117458029e-06, + "loss": 0.0972, + "step": 78930 + }, + { + "epoch": 3.68, + "learning_rate": 7.788315332403242e-06, + "loss": 0.1355, + "step": 78935 + }, + { + "epoch": 3.68, + "learning_rate": 7.787531547348456e-06, + "loss": 0.2036, + "step": 78940 + }, + { + "epoch": 3.68, + "learning_rate": 7.786747762293669e-06, + "loss": 0.296, + "step": 78945 + }, + { + "epoch": 3.68, + "learning_rate": 7.785963977238883e-06, + "loss": 0.0542, + "step": 78950 + }, + { + "epoch": 3.68, + "learning_rate": 7.785180192184096e-06, + "loss": 0.0348, + "step": 78955 + }, + { + "epoch": 3.68, + "learning_rate": 7.78439640712931e-06, + "loss": 0.0292, + "step": 78960 + }, + { + "epoch": 3.68, + "learning_rate": 7.783612622074524e-06, + "loss": 0.115, + "step": 78965 + }, + { + "epoch": 3.68, + "learning_rate": 7.782828837019736e-06, + "loss": 0.1179, + "step": 78970 + }, + { + "epoch": 3.69, + "learning_rate": 7.782045051964949e-06, + "loss": 0.0641, + "step": 78975 + }, + { + "epoch": 3.69, + "learning_rate": 7.781261266910163e-06, + "loss": 0.0712, + "step": 78980 + }, + { + "epoch": 3.69, + "learning_rate": 7.780477481855376e-06, + "loss": 0.0613, + "step": 78985 + }, + { + "epoch": 3.69, + "learning_rate": 7.77969369680059e-06, + "loss": 0.2245, + "step": 78990 + }, + { + "epoch": 3.69, + "learning_rate": 7.778909911745803e-06, + "loss": 0.1215, + "step": 78995 + }, + { + "epoch": 3.69, + "learning_rate": 7.778126126691016e-06, + "loss": 0.0407, + "step": 79000 + }, + { + "epoch": 3.69, + "learning_rate": 7.77734234163623e-06, + "loss": 0.0457, + "step": 79005 + }, + { + "epoch": 3.69, + "learning_rate": 7.776558556581444e-06, + "loss": 0.0425, + "step": 79010 + }, + { + "epoch": 3.69, + "learning_rate": 7.775774771526658e-06, + "loss": 0.0623, + "step": 79015 + }, + { + "epoch": 3.69, + "learning_rate": 7.77499098647187e-06, + "loss": 0.0741, + "step": 79020 + }, + { + "epoch": 3.69, + "learning_rate": 7.774207201417084e-06, + "loss": 0.0935, + "step": 79025 + }, + { + "epoch": 3.69, + "learning_rate": 7.773423416362298e-06, + "loss": 0.0852, + "step": 79030 + }, + { + "epoch": 3.69, + "learning_rate": 7.77263963130751e-06, + "loss": 0.1269, + "step": 79035 + }, + { + "epoch": 3.69, + "learning_rate": 7.771855846252724e-06, + "loss": 0.2653, + "step": 79040 + }, + { + "epoch": 3.69, + "learning_rate": 7.771072061197937e-06, + "loss": 0.3134, + "step": 79045 + }, + { + "epoch": 3.69, + "learning_rate": 7.77028827614315e-06, + "loss": 0.0349, + "step": 79050 + }, + { + "epoch": 3.69, + "learning_rate": 7.769504491088364e-06, + "loss": 0.0252, + "step": 79055 + }, + { + "epoch": 3.69, + "learning_rate": 7.768720706033578e-06, + "loss": 0.0486, + "step": 79060 + }, + { + "epoch": 3.69, + "learning_rate": 7.767936920978792e-06, + "loss": 0.0675, + "step": 79065 + }, + { + "epoch": 3.69, + "learning_rate": 7.767153135924004e-06, + "loss": 0.0798, + "step": 79070 + }, + { + "epoch": 3.69, + "learning_rate": 7.766369350869218e-06, + "loss": 0.1307, + "step": 79075 + }, + { + "epoch": 3.69, + "learning_rate": 7.765585565814432e-06, + "loss": 0.0834, + "step": 79080 + }, + { + "epoch": 3.69, + "learning_rate": 7.764801780759646e-06, + "loss": 0.1156, + "step": 79085 + }, + { + "epoch": 3.69, + "learning_rate": 7.764017995704858e-06, + "loss": 0.3002, + "step": 79090 + }, + { + "epoch": 3.69, + "learning_rate": 7.763234210650072e-06, + "loss": 0.1833, + "step": 79095 + }, + { + "epoch": 3.69, + "learning_rate": 7.762450425595284e-06, + "loss": 0.0513, + "step": 79100 + }, + { + "epoch": 3.69, + "learning_rate": 7.761666640540498e-06, + "loss": 0.0273, + "step": 79105 + }, + { + "epoch": 3.69, + "learning_rate": 7.760882855485712e-06, + "loss": 0.0704, + "step": 79110 + }, + { + "epoch": 3.69, + "learning_rate": 7.760099070430926e-06, + "loss": 0.0579, + "step": 79115 + }, + { + "epoch": 3.69, + "learning_rate": 7.759315285376138e-06, + "loss": 0.1112, + "step": 79120 + }, + { + "epoch": 3.69, + "learning_rate": 7.758531500321352e-06, + "loss": 0.067, + "step": 79125 + }, + { + "epoch": 3.69, + "learning_rate": 7.757747715266566e-06, + "loss": 0.1091, + "step": 79130 + }, + { + "epoch": 3.69, + "learning_rate": 7.75696393021178e-06, + "loss": 0.1161, + "step": 79135 + }, + { + "epoch": 3.69, + "learning_rate": 7.756180145156992e-06, + "loss": 0.2371, + "step": 79140 + }, + { + "epoch": 3.69, + "learning_rate": 7.755396360102206e-06, + "loss": 0.277, + "step": 79145 + }, + { + "epoch": 3.69, + "learning_rate": 7.75461257504742e-06, + "loss": 0.0751, + "step": 79150 + }, + { + "epoch": 3.69, + "learning_rate": 7.75398554700359e-06, + "loss": 0.0851, + "step": 79155 + }, + { + "epoch": 3.69, + "learning_rate": 7.753201761948804e-06, + "loss": 0.0308, + "step": 79160 + }, + { + "epoch": 3.69, + "learning_rate": 7.752417976894018e-06, + "loss": 0.0851, + "step": 79165 + }, + { + "epoch": 3.69, + "learning_rate": 7.75163419183923e-06, + "loss": 0.0434, + "step": 79170 + }, + { + "epoch": 3.69, + "learning_rate": 7.750850406784444e-06, + "loss": 0.0541, + "step": 79175 + }, + { + "epoch": 3.69, + "learning_rate": 7.750066621729657e-06, + "loss": 0.1718, + "step": 79180 + }, + { + "epoch": 3.69, + "learning_rate": 7.74928283667487e-06, + "loss": 0.0957, + "step": 79185 + }, + { + "epoch": 3.7, + "learning_rate": 7.748499051620085e-06, + "loss": 0.2069, + "step": 79190 + }, + { + "epoch": 3.7, + "learning_rate": 7.747715266565298e-06, + "loss": 0.3096, + "step": 79195 + }, + { + "epoch": 3.7, + "learning_rate": 7.74693148151051e-06, + "loss": 0.0314, + "step": 79200 + }, + { + "epoch": 3.7, + "learning_rate": 7.746147696455725e-06, + "loss": 0.026, + "step": 79205 + }, + { + "epoch": 3.7, + "learning_rate": 7.745363911400938e-06, + "loss": 0.0116, + "step": 79210 + }, + { + "epoch": 3.7, + "learning_rate": 7.744580126346152e-06, + "loss": 0.0652, + "step": 79215 + }, + { + "epoch": 3.7, + "learning_rate": 7.743796341291365e-06, + "loss": 0.0941, + "step": 79220 + }, + { + "epoch": 3.7, + "learning_rate": 7.743012556236578e-06, + "loss": 0.054, + "step": 79225 + }, + { + "epoch": 3.7, + "learning_rate": 7.742228771181792e-06, + "loss": 0.0919, + "step": 79230 + }, + { + "epoch": 3.7, + "learning_rate": 7.741444986127005e-06, + "loss": 0.0863, + "step": 79235 + }, + { + "epoch": 3.7, + "learning_rate": 7.740661201072218e-06, + "loss": 0.2058, + "step": 79240 + }, + { + "epoch": 3.7, + "learning_rate": 7.739877416017432e-06, + "loss": 0.3746, + "step": 79245 + }, + { + "epoch": 3.7, + "learning_rate": 7.739093630962645e-06, + "loss": 0.0631, + "step": 79250 + }, + { + "epoch": 3.7, + "learning_rate": 7.738309845907859e-06, + "loss": 0.0152, + "step": 79255 + }, + { + "epoch": 3.7, + "learning_rate": 7.737526060853072e-06, + "loss": 0.0425, + "step": 79260 + }, + { + "epoch": 3.7, + "learning_rate": 7.736742275798286e-06, + "loss": 0.104, + "step": 79265 + }, + { + "epoch": 3.7, + "learning_rate": 7.735958490743499e-06, + "loss": 0.0209, + "step": 79270 + }, + { + "epoch": 3.7, + "learning_rate": 7.735174705688712e-06, + "loss": 0.0863, + "step": 79275 + }, + { + "epoch": 3.7, + "learning_rate": 7.734390920633926e-06, + "loss": 0.1091, + "step": 79280 + }, + { + "epoch": 3.7, + "learning_rate": 7.73360713557914e-06, + "loss": 0.1356, + "step": 79285 + }, + { + "epoch": 3.7, + "learning_rate": 7.732823350524352e-06, + "loss": 0.1522, + "step": 79290 + }, + { + "epoch": 3.7, + "learning_rate": 7.732039565469566e-06, + "loss": 0.1584, + "step": 79295 + }, + { + "epoch": 3.7, + "learning_rate": 7.731255780414779e-06, + "loss": 0.0795, + "step": 79300 + }, + { + "epoch": 3.7, + "learning_rate": 7.730471995359992e-06, + "loss": 0.0596, + "step": 79305 + }, + { + "epoch": 3.7, + "learning_rate": 7.729688210305206e-06, + "loss": 0.0502, + "step": 79310 + }, + { + "epoch": 3.7, + "learning_rate": 7.72890442525042e-06, + "loss": 0.0671, + "step": 79315 + }, + { + "epoch": 3.7, + "learning_rate": 7.728120640195633e-06, + "loss": 0.0851, + "step": 79320 + }, + { + "epoch": 3.7, + "learning_rate": 7.727336855140846e-06, + "loss": 0.0644, + "step": 79325 + }, + { + "epoch": 3.7, + "learning_rate": 7.72655307008606e-06, + "loss": 0.104, + "step": 79330 + }, + { + "epoch": 3.7, + "learning_rate": 7.725769285031274e-06, + "loss": 0.1543, + "step": 79335 + }, + { + "epoch": 3.7, + "learning_rate": 7.724985499976486e-06, + "loss": 0.1573, + "step": 79340 + }, + { + "epoch": 3.7, + "learning_rate": 7.7242017149217e-06, + "loss": 0.2184, + "step": 79345 + }, + { + "epoch": 3.7, + "learning_rate": 7.723417929866914e-06, + "loss": 0.0369, + "step": 79350 + }, + { + "epoch": 3.7, + "learning_rate": 7.722634144812128e-06, + "loss": 0.0109, + "step": 79355 + }, + { + "epoch": 3.7, + "learning_rate": 7.721850359757342e-06, + "loss": 0.0269, + "step": 79360 + }, + { + "epoch": 3.7, + "learning_rate": 7.721066574702554e-06, + "loss": 0.0179, + "step": 79365 + }, + { + "epoch": 3.7, + "learning_rate": 7.720282789647766e-06, + "loss": 0.0539, + "step": 79370 + }, + { + "epoch": 3.7, + "learning_rate": 7.71949900459298e-06, + "loss": 0.0521, + "step": 79375 + }, + { + "epoch": 3.7, + "learning_rate": 7.718715219538194e-06, + "loss": 0.0838, + "step": 79380 + }, + { + "epoch": 3.7, + "learning_rate": 7.717931434483408e-06, + "loss": 0.0885, + "step": 79385 + }, + { + "epoch": 3.7, + "learning_rate": 7.717147649428622e-06, + "loss": 0.2194, + "step": 79390 + }, + { + "epoch": 3.7, + "learning_rate": 7.716363864373834e-06, + "loss": 0.2308, + "step": 79395 + }, + { + "epoch": 3.7, + "learning_rate": 7.715580079319048e-06, + "loss": 0.0599, + "step": 79400 + }, + { + "epoch": 3.71, + "learning_rate": 7.714796294264262e-06, + "loss": 0.0291, + "step": 79405 + }, + { + "epoch": 3.71, + "learning_rate": 7.714012509209476e-06, + "loss": 0.0124, + "step": 79410 + }, + { + "epoch": 3.71, + "learning_rate": 7.713228724154688e-06, + "loss": 0.1041, + "step": 79415 + }, + { + "epoch": 3.71, + "learning_rate": 7.712444939099902e-06, + "loss": 0.0448, + "step": 79420 + }, + { + "epoch": 3.71, + "learning_rate": 7.711661154045116e-06, + "loss": 0.0642, + "step": 79425 + }, + { + "epoch": 3.71, + "learning_rate": 7.710877368990328e-06, + "loss": 0.0951, + "step": 79430 + }, + { + "epoch": 3.71, + "learning_rate": 7.710093583935542e-06, + "loss": 0.1728, + "step": 79435 + }, + { + "epoch": 3.71, + "learning_rate": 7.709309798880756e-06, + "loss": 0.1487, + "step": 79440 + }, + { + "epoch": 3.71, + "learning_rate": 7.708526013825968e-06, + "loss": 0.237, + "step": 79445 + }, + { + "epoch": 3.71, + "learning_rate": 7.707742228771182e-06, + "loss": 0.0672, + "step": 79450 + }, + { + "epoch": 3.71, + "learning_rate": 7.706958443716396e-06, + "loss": 0.0275, + "step": 79455 + }, + { + "epoch": 3.71, + "learning_rate": 7.70617465866161e-06, + "loss": 0.0369, + "step": 79460 + }, + { + "epoch": 3.71, + "learning_rate": 7.705390873606822e-06, + "loss": 0.0482, + "step": 79465 + }, + { + "epoch": 3.71, + "learning_rate": 7.704607088552036e-06, + "loss": 0.0857, + "step": 79470 + }, + { + "epoch": 3.71, + "learning_rate": 7.70382330349725e-06, + "loss": 0.0598, + "step": 79475 + }, + { + "epoch": 3.71, + "learning_rate": 7.703039518442464e-06, + "loss": 0.1201, + "step": 79480 + }, + { + "epoch": 3.71, + "learning_rate": 7.702255733387676e-06, + "loss": 0.1677, + "step": 79485 + }, + { + "epoch": 3.71, + "learning_rate": 7.70147194833289e-06, + "loss": 0.2025, + "step": 79490 + }, + { + "epoch": 3.71, + "learning_rate": 7.700688163278102e-06, + "loss": 0.3051, + "step": 79495 + }, + { + "epoch": 3.71, + "learning_rate": 7.699904378223316e-06, + "loss": 0.1057, + "step": 79500 + }, + { + "epoch": 3.71, + "learning_rate": 7.69912059316853e-06, + "loss": 0.0513, + "step": 79505 + }, + { + "epoch": 3.71, + "learning_rate": 7.698336808113744e-06, + "loss": 0.0324, + "step": 79510 + }, + { + "epoch": 3.71, + "learning_rate": 7.697553023058956e-06, + "loss": 0.031, + "step": 79515 + }, + { + "epoch": 3.71, + "learning_rate": 7.69676923800417e-06, + "loss": 0.0548, + "step": 79520 + }, + { + "epoch": 3.71, + "learning_rate": 7.695985452949384e-06, + "loss": 0.0992, + "step": 79525 + }, + { + "epoch": 3.71, + "learning_rate": 7.695201667894598e-06, + "loss": 0.1201, + "step": 79530 + }, + { + "epoch": 3.71, + "learning_rate": 7.69441788283981e-06, + "loss": 0.0911, + "step": 79535 + }, + { + "epoch": 3.71, + "learning_rate": 7.693634097785024e-06, + "loss": 0.3037, + "step": 79540 + }, + { + "epoch": 3.71, + "learning_rate": 7.692850312730238e-06, + "loss": 0.4432, + "step": 79545 + }, + { + "epoch": 3.71, + "learning_rate": 7.692066527675452e-06, + "loss": 0.0432, + "step": 79550 + }, + { + "epoch": 3.71, + "learning_rate": 7.691282742620664e-06, + "loss": 0.0619, + "step": 79555 + }, + { + "epoch": 3.71, + "learning_rate": 7.690498957565878e-06, + "loss": 0.0823, + "step": 79560 + }, + { + "epoch": 3.71, + "learning_rate": 7.68971517251109e-06, + "loss": 0.0433, + "step": 79565 + }, + { + "epoch": 3.71, + "learning_rate": 7.688931387456304e-06, + "loss": 0.0585, + "step": 79570 + }, + { + "epoch": 3.71, + "learning_rate": 7.688147602401518e-06, + "loss": 0.1646, + "step": 79575 + }, + { + "epoch": 3.71, + "learning_rate": 7.687363817346732e-06, + "loss": 0.1671, + "step": 79580 + }, + { + "epoch": 3.71, + "learning_rate": 7.686580032291944e-06, + "loss": 0.0532, + "step": 79585 + }, + { + "epoch": 3.71, + "learning_rate": 7.685796247237158e-06, + "loss": 0.1626, + "step": 79590 + }, + { + "epoch": 3.71, + "learning_rate": 7.685012462182372e-06, + "loss": 0.3362, + "step": 79595 + }, + { + "epoch": 3.71, + "learning_rate": 7.684228677127586e-06, + "loss": 0.0745, + "step": 79600 + }, + { + "epoch": 3.71, + "learning_rate": 7.683444892072798e-06, + "loss": 0.0608, + "step": 79605 + }, + { + "epoch": 3.71, + "learning_rate": 7.682661107018012e-06, + "loss": 0.0552, + "step": 79610 + }, + { + "epoch": 3.71, + "learning_rate": 7.681877321963226e-06, + "loss": 0.0213, + "step": 79615 + }, + { + "epoch": 3.72, + "learning_rate": 7.68109353690844e-06, + "loss": 0.0431, + "step": 79620 + }, + { + "epoch": 3.72, + "learning_rate": 7.680309751853652e-06, + "loss": 0.084, + "step": 79625 + }, + { + "epoch": 3.72, + "learning_rate": 7.679525966798866e-06, + "loss": 0.0523, + "step": 79630 + }, + { + "epoch": 3.72, + "learning_rate": 7.678898938755036e-06, + "loss": 0.1399, + "step": 79635 + }, + { + "epoch": 3.72, + "learning_rate": 7.67811515370025e-06, + "loss": 0.1198, + "step": 79640 + }, + { + "epoch": 3.72, + "learning_rate": 7.677331368645462e-06, + "loss": 0.1457, + "step": 79645 + }, + { + "epoch": 3.72, + "learning_rate": 7.676547583590676e-06, + "loss": 0.0347, + "step": 79650 + }, + { + "epoch": 3.72, + "learning_rate": 7.67576379853589e-06, + "loss": 0.0446, + "step": 79655 + }, + { + "epoch": 3.72, + "learning_rate": 7.674980013481104e-06, + "loss": 0.0179, + "step": 79660 + }, + { + "epoch": 3.72, + "learning_rate": 7.674196228426316e-06, + "loss": 0.058, + "step": 79665 + }, + { + "epoch": 3.72, + "learning_rate": 7.67341244337153e-06, + "loss": 0.041, + "step": 79670 + }, + { + "epoch": 3.72, + "learning_rate": 7.672628658316744e-06, + "loss": 0.112, + "step": 79675 + }, + { + "epoch": 3.72, + "learning_rate": 7.671844873261958e-06, + "loss": 0.1677, + "step": 79680 + }, + { + "epoch": 3.72, + "learning_rate": 7.671061088207172e-06, + "loss": 0.148, + "step": 79685 + }, + { + "epoch": 3.72, + "learning_rate": 7.670277303152384e-06, + "loss": 0.2323, + "step": 79690 + }, + { + "epoch": 3.72, + "learning_rate": 7.669493518097596e-06, + "loss": 0.1864, + "step": 79695 + }, + { + "epoch": 3.72, + "learning_rate": 7.66870973304281e-06, + "loss": 0.046, + "step": 79700 + }, + { + "epoch": 3.72, + "learning_rate": 7.667925947988024e-06, + "loss": 0.0314, + "step": 79705 + }, + { + "epoch": 3.72, + "learning_rate": 7.667142162933238e-06, + "loss": 0.0366, + "step": 79710 + }, + { + "epoch": 3.72, + "learning_rate": 7.66635837787845e-06, + "loss": 0.0915, + "step": 79715 + }, + { + "epoch": 3.72, + "learning_rate": 7.665574592823664e-06, + "loss": 0.074, + "step": 79720 + }, + { + "epoch": 3.72, + "learning_rate": 7.664790807768878e-06, + "loss": 0.1124, + "step": 79725 + }, + { + "epoch": 3.72, + "learning_rate": 7.664007022714092e-06, + "loss": 0.0639, + "step": 79730 + }, + { + "epoch": 3.72, + "learning_rate": 7.663223237659306e-06, + "loss": 0.134, + "step": 79735 + }, + { + "epoch": 3.72, + "learning_rate": 7.662439452604518e-06, + "loss": 0.0796, + "step": 79740 + }, + { + "epoch": 3.72, + "learning_rate": 7.661655667549732e-06, + "loss": 0.2107, + "step": 79745 + }, + { + "epoch": 3.72, + "learning_rate": 7.660871882494946e-06, + "loss": 0.035, + "step": 79750 + }, + { + "epoch": 3.72, + "learning_rate": 7.660088097440158e-06, + "loss": 0.0502, + "step": 79755 + }, + { + "epoch": 3.72, + "learning_rate": 7.659304312385372e-06, + "loss": 0.0185, + "step": 79760 + }, + { + "epoch": 3.72, + "learning_rate": 7.658520527330584e-06, + "loss": 0.0351, + "step": 79765 + }, + { + "epoch": 3.72, + "learning_rate": 7.657736742275798e-06, + "loss": 0.1205, + "step": 79770 + }, + { + "epoch": 3.72, + "learning_rate": 7.656952957221012e-06, + "loss": 0.0339, + "step": 79775 + }, + { + "epoch": 3.72, + "learning_rate": 7.656169172166226e-06, + "loss": 0.1625, + "step": 79780 + }, + { + "epoch": 3.72, + "learning_rate": 7.65538538711144e-06, + "loss": 0.2515, + "step": 79785 + }, + { + "epoch": 3.72, + "learning_rate": 7.654601602056652e-06, + "loss": 0.1826, + "step": 79790 + }, + { + "epoch": 3.72, + "learning_rate": 7.653817817001866e-06, + "loss": 0.2473, + "step": 79795 + }, + { + "epoch": 3.72, + "learning_rate": 7.65303403194708e-06, + "loss": 0.108, + "step": 79800 + }, + { + "epoch": 3.72, + "learning_rate": 7.652250246892294e-06, + "loss": 0.029, + "step": 79805 + }, + { + "epoch": 3.72, + "learning_rate": 7.651466461837506e-06, + "loss": 0.0212, + "step": 79810 + }, + { + "epoch": 3.72, + "learning_rate": 7.65068267678272e-06, + "loss": 0.0281, + "step": 79815 + }, + { + "epoch": 3.72, + "learning_rate": 7.649898891727932e-06, + "loss": 0.1148, + "step": 79820 + }, + { + "epoch": 3.72, + "learning_rate": 7.649115106673146e-06, + "loss": 0.1028, + "step": 79825 + }, + { + "epoch": 3.72, + "learning_rate": 7.64833132161836e-06, + "loss": 0.1096, + "step": 79830 + }, + { + "epoch": 3.73, + "learning_rate": 7.647547536563574e-06, + "loss": 0.1648, + "step": 79835 + }, + { + "epoch": 3.73, + "learning_rate": 7.646763751508786e-06, + "loss": 0.2439, + "step": 79840 + }, + { + "epoch": 3.73, + "learning_rate": 7.645979966454e-06, + "loss": 0.3196, + "step": 79845 + }, + { + "epoch": 3.73, + "learning_rate": 7.645196181399214e-06, + "loss": 0.0447, + "step": 79850 + }, + { + "epoch": 3.73, + "learning_rate": 7.644412396344428e-06, + "loss": 0.0287, + "step": 79855 + }, + { + "epoch": 3.73, + "learning_rate": 7.64362861128964e-06, + "loss": 0.0343, + "step": 79860 + }, + { + "epoch": 3.73, + "learning_rate": 7.642844826234854e-06, + "loss": 0.0918, + "step": 79865 + }, + { + "epoch": 3.73, + "learning_rate": 7.642061041180068e-06, + "loss": 0.0603, + "step": 79870 + }, + { + "epoch": 3.73, + "learning_rate": 7.641277256125282e-06, + "loss": 0.0978, + "step": 79875 + }, + { + "epoch": 3.73, + "learning_rate": 7.640493471070494e-06, + "loss": 0.1099, + "step": 79880 + }, + { + "epoch": 3.73, + "learning_rate": 7.639709686015708e-06, + "loss": 0.0707, + "step": 79885 + }, + { + "epoch": 3.73, + "learning_rate": 7.63892590096092e-06, + "loss": 0.1441, + "step": 79890 + }, + { + "epoch": 3.73, + "learning_rate": 7.638142115906134e-06, + "loss": 0.2386, + "step": 79895 + }, + { + "epoch": 3.73, + "learning_rate": 7.637358330851348e-06, + "loss": 0.0736, + "step": 79900 + }, + { + "epoch": 3.73, + "learning_rate": 7.636574545796562e-06, + "loss": 0.0412, + "step": 79905 + }, + { + "epoch": 3.73, + "learning_rate": 7.635790760741774e-06, + "loss": 0.0549, + "step": 79910 + }, + { + "epoch": 3.73, + "learning_rate": 7.635006975686988e-06, + "loss": 0.0487, + "step": 79915 + }, + { + "epoch": 3.73, + "learning_rate": 7.634223190632202e-06, + "loss": 0.071, + "step": 79920 + }, + { + "epoch": 3.73, + "learning_rate": 7.633439405577416e-06, + "loss": 0.0594, + "step": 79925 + }, + { + "epoch": 3.73, + "learning_rate": 7.632655620522628e-06, + "loss": 0.0753, + "step": 79930 + }, + { + "epoch": 3.73, + "learning_rate": 7.631871835467842e-06, + "loss": 0.1202, + "step": 79935 + }, + { + "epoch": 3.73, + "learning_rate": 7.631088050413056e-06, + "loss": 0.1864, + "step": 79940 + }, + { + "epoch": 3.73, + "learning_rate": 7.63030426535827e-06, + "loss": 0.2801, + "step": 79945 + }, + { + "epoch": 3.73, + "learning_rate": 7.629520480303482e-06, + "loss": 0.0676, + "step": 79950 + }, + { + "epoch": 3.73, + "learning_rate": 7.628736695248695e-06, + "loss": 0.0707, + "step": 79955 + }, + { + "epoch": 3.73, + "learning_rate": 7.627952910193909e-06, + "loss": 0.0497, + "step": 79960 + }, + { + "epoch": 3.73, + "learning_rate": 7.627169125139122e-06, + "loss": 0.0591, + "step": 79965 + }, + { + "epoch": 3.73, + "learning_rate": 7.626385340084336e-06, + "loss": 0.0237, + "step": 79970 + }, + { + "epoch": 3.73, + "learning_rate": 7.625601555029549e-06, + "loss": 0.0949, + "step": 79975 + }, + { + "epoch": 3.73, + "learning_rate": 7.624817769974763e-06, + "loss": 0.0499, + "step": 79980 + }, + { + "epoch": 3.73, + "learning_rate": 7.624033984919977e-06, + "loss": 0.1492, + "step": 79985 + }, + { + "epoch": 3.73, + "learning_rate": 7.62325019986519e-06, + "loss": 0.1709, + "step": 79990 + }, + { + "epoch": 3.73, + "learning_rate": 7.622466414810404e-06, + "loss": 0.3034, + "step": 79995 + }, + { + "epoch": 3.73, + "learning_rate": 7.621682629755617e-06, + "loss": 0.1068, + "step": 80000 + }, + { + "epoch": 3.73, + "learning_rate": 7.620898844700831e-06, + "loss": 0.0279, + "step": 80005 + }, + { + "epoch": 3.73, + "learning_rate": 7.620115059646044e-06, + "loss": 0.08, + "step": 80010 + }, + { + "epoch": 3.73, + "learning_rate": 7.619331274591256e-06, + "loss": 0.0887, + "step": 80015 + }, + { + "epoch": 3.73, + "learning_rate": 7.61854748953647e-06, + "loss": 0.0659, + "step": 80020 + }, + { + "epoch": 3.73, + "learning_rate": 7.617763704481683e-06, + "loss": 0.1461, + "step": 80025 + }, + { + "epoch": 3.73, + "learning_rate": 7.616979919426897e-06, + "loss": 0.1, + "step": 80030 + }, + { + "epoch": 3.73, + "learning_rate": 7.616196134372111e-06, + "loss": 0.1256, + "step": 80035 + }, + { + "epoch": 3.73, + "learning_rate": 7.615412349317324e-06, + "loss": 0.1121, + "step": 80040 + }, + { + "epoch": 3.74, + "learning_rate": 7.614628564262538e-06, + "loss": 0.3639, + "step": 80045 + }, + { + "epoch": 3.74, + "learning_rate": 7.613844779207751e-06, + "loss": 0.0781, + "step": 80050 + }, + { + "epoch": 3.74, + "learning_rate": 7.613060994152965e-06, + "loss": 0.0196, + "step": 80055 + }, + { + "epoch": 3.74, + "learning_rate": 7.612277209098178e-06, + "loss": 0.1044, + "step": 80060 + }, + { + "epoch": 3.74, + "learning_rate": 7.6114934240433916e-06, + "loss": 0.0287, + "step": 80065 + }, + { + "epoch": 3.74, + "learning_rate": 7.610709638988605e-06, + "loss": 0.0434, + "step": 80070 + }, + { + "epoch": 3.74, + "learning_rate": 7.6099258539338185e-06, + "loss": 0.0981, + "step": 80075 + }, + { + "epoch": 3.74, + "learning_rate": 7.609142068879031e-06, + "loss": 0.0543, + "step": 80080 + }, + { + "epoch": 3.74, + "learning_rate": 7.608358283824245e-06, + "loss": 0.0924, + "step": 80085 + }, + { + "epoch": 3.74, + "learning_rate": 7.607574498769458e-06, + "loss": 0.2712, + "step": 80090 + }, + { + "epoch": 3.74, + "learning_rate": 7.606790713714672e-06, + "loss": 0.2002, + "step": 80095 + }, + { + "epoch": 3.74, + "learning_rate": 7.606006928659885e-06, + "loss": 0.0818, + "step": 80100 + }, + { + "epoch": 3.74, + "learning_rate": 7.6052231436050986e-06, + "loss": 0.0633, + "step": 80105 + }, + { + "epoch": 3.74, + "learning_rate": 7.604439358550312e-06, + "loss": 0.0325, + "step": 80110 + }, + { + "epoch": 3.74, + "learning_rate": 7.6036555734955255e-06, + "loss": 0.1279, + "step": 80115 + }, + { + "epoch": 3.74, + "learning_rate": 7.602871788440739e-06, + "loss": 0.0537, + "step": 80120 + }, + { + "epoch": 3.74, + "learning_rate": 7.6020880033859525e-06, + "loss": 0.193, + "step": 80125 + }, + { + "epoch": 3.74, + "learning_rate": 7.6013042183311655e-06, + "loss": 0.0717, + "step": 80130 + }, + { + "epoch": 3.74, + "learning_rate": 7.6005204332763795e-06, + "loss": 0.1594, + "step": 80135 + }, + { + "epoch": 3.74, + "learning_rate": 7.5997366482215925e-06, + "loss": 0.1187, + "step": 80140 + }, + { + "epoch": 3.74, + "learning_rate": 7.5989528631668056e-06, + "loss": 0.3229, + "step": 80145 + }, + { + "epoch": 3.74, + "learning_rate": 7.598169078112019e-06, + "loss": 0.0789, + "step": 80150 + }, + { + "epoch": 3.74, + "learning_rate": 7.5973852930572325e-06, + "loss": 0.0952, + "step": 80155 + }, + { + "epoch": 3.74, + "learning_rate": 7.596601508002446e-06, + "loss": 0.0218, + "step": 80160 + }, + { + "epoch": 3.74, + "learning_rate": 7.5958177229476595e-06, + "loss": 0.0401, + "step": 80165 + }, + { + "epoch": 3.74, + "learning_rate": 7.5950339378928726e-06, + "loss": 0.0259, + "step": 80170 + }, + { + "epoch": 3.74, + "learning_rate": 7.5942501528380865e-06, + "loss": 0.1339, + "step": 80175 + }, + { + "epoch": 3.74, + "learning_rate": 7.5934663677832995e-06, + "loss": 0.0555, + "step": 80180 + }, + { + "epoch": 3.74, + "learning_rate": 7.5926825827285134e-06, + "loss": 0.1528, + "step": 80185 + }, + { + "epoch": 3.74, + "learning_rate": 7.5918987976737265e-06, + "loss": 0.2276, + "step": 80190 + }, + { + "epoch": 3.74, + "learning_rate": 7.59111501261894e-06, + "loss": 0.5037, + "step": 80195 + }, + { + "epoch": 3.74, + "learning_rate": 7.5903312275641535e-06, + "loss": 0.0713, + "step": 80200 + }, + { + "epoch": 3.74, + "learning_rate": 7.589547442509367e-06, + "loss": 0.0174, + "step": 80205 + }, + { + "epoch": 3.74, + "learning_rate": 7.5887636574545796e-06, + "loss": 0.016, + "step": 80210 + }, + { + "epoch": 3.74, + "learning_rate": 7.5879798723997935e-06, + "loss": 0.0537, + "step": 80215 + }, + { + "epoch": 3.74, + "learning_rate": 7.5871960873450065e-06, + "loss": 0.1113, + "step": 80220 + }, + { + "epoch": 3.74, + "learning_rate": 7.5864123022902204e-06, + "loss": 0.1015, + "step": 80225 + }, + { + "epoch": 3.74, + "learning_rate": 7.5856285172354335e-06, + "loss": 0.1157, + "step": 80230 + }, + { + "epoch": 3.74, + "learning_rate": 7.584844732180647e-06, + "loss": 0.097, + "step": 80235 + }, + { + "epoch": 3.74, + "learning_rate": 7.5840609471258605e-06, + "loss": 0.1815, + "step": 80240 + }, + { + "epoch": 3.74, + "learning_rate": 7.583277162071074e-06, + "loss": 0.2547, + "step": 80245 + }, + { + "epoch": 3.74, + "learning_rate": 7.5824933770162874e-06, + "loss": 0.064, + "step": 80250 + }, + { + "epoch": 3.74, + "learning_rate": 7.581709591961501e-06, + "loss": 0.0617, + "step": 80255 + }, + { + "epoch": 3.75, + "learning_rate": 7.580925806906715e-06, + "loss": 0.0732, + "step": 80260 + }, + { + "epoch": 3.75, + "learning_rate": 7.580142021851928e-06, + "loss": 0.0817, + "step": 80265 + }, + { + "epoch": 3.75, + "learning_rate": 7.579358236797142e-06, + "loss": 0.0828, + "step": 80270 + }, + { + "epoch": 3.75, + "learning_rate": 7.578574451742354e-06, + "loss": 0.1249, + "step": 80275 + }, + { + "epoch": 3.75, + "learning_rate": 7.5777906666875675e-06, + "loss": 0.085, + "step": 80280 + }, + { + "epoch": 3.75, + "learning_rate": 7.577006881632781e-06, + "loss": 0.2085, + "step": 80285 + }, + { + "epoch": 3.75, + "learning_rate": 7.5762230965779944e-06, + "loss": 0.1444, + "step": 80290 + }, + { + "epoch": 3.75, + "learning_rate": 7.575439311523208e-06, + "loss": 0.2968, + "step": 80295 + }, + { + "epoch": 3.75, + "learning_rate": 7.574655526468422e-06, + "loss": 0.0624, + "step": 80300 + }, + { + "epoch": 3.75, + "learning_rate": 7.573871741413635e-06, + "loss": 0.0325, + "step": 80305 + }, + { + "epoch": 3.75, + "learning_rate": 7.573087956358849e-06, + "loss": 0.0343, + "step": 80310 + }, + { + "epoch": 3.75, + "learning_rate": 7.572304171304062e-06, + "loss": 0.0628, + "step": 80315 + }, + { + "epoch": 3.75, + "learning_rate": 7.571520386249276e-06, + "loss": 0.0368, + "step": 80320 + }, + { + "epoch": 3.75, + "learning_rate": 7.570736601194489e-06, + "loss": 0.1064, + "step": 80325 + }, + { + "epoch": 3.75, + "learning_rate": 7.569952816139703e-06, + "loss": 0.1008, + "step": 80330 + }, + { + "epoch": 3.75, + "learning_rate": 7.569169031084916e-06, + "loss": 0.0986, + "step": 80335 + }, + { + "epoch": 3.75, + "learning_rate": 7.568385246030128e-06, + "loss": 0.1832, + "step": 80340 + }, + { + "epoch": 3.75, + "learning_rate": 7.567601460975342e-06, + "loss": 0.2612, + "step": 80345 + }, + { + "epoch": 3.75, + "learning_rate": 7.566817675920556e-06, + "loss": 0.0729, + "step": 80350 + }, + { + "epoch": 3.75, + "learning_rate": 7.566033890865769e-06, + "loss": 0.0367, + "step": 80355 + }, + { + "epoch": 3.75, + "learning_rate": 7.565250105810983e-06, + "loss": 0.0083, + "step": 80360 + }, + { + "epoch": 3.75, + "learning_rate": 7.564466320756196e-06, + "loss": 0.1255, + "step": 80365 + }, + { + "epoch": 3.75, + "learning_rate": 7.56368253570141e-06, + "loss": 0.0915, + "step": 80370 + }, + { + "epoch": 3.75, + "learning_rate": 7.562898750646623e-06, + "loss": 0.0961, + "step": 80375 + }, + { + "epoch": 3.75, + "learning_rate": 7.562114965591837e-06, + "loss": 0.0742, + "step": 80380 + }, + { + "epoch": 3.75, + "learning_rate": 7.56133118053705e-06, + "loss": 0.0985, + "step": 80385 + }, + { + "epoch": 3.75, + "learning_rate": 7.560547395482264e-06, + "loss": 0.2192, + "step": 80390 + }, + { + "epoch": 3.75, + "learning_rate": 7.559763610427477e-06, + "loss": 0.1856, + "step": 80395 + }, + { + "epoch": 3.75, + "learning_rate": 7.558979825372691e-06, + "loss": 0.0524, + "step": 80400 + }, + { + "epoch": 3.75, + "learning_rate": 7.558196040317903e-06, + "loss": 0.068, + "step": 80405 + }, + { + "epoch": 3.75, + "learning_rate": 7.557412255263117e-06, + "loss": 0.0281, + "step": 80410 + }, + { + "epoch": 3.75, + "learning_rate": 7.55662847020833e-06, + "loss": 0.0676, + "step": 80415 + }, + { + "epoch": 3.75, + "learning_rate": 7.555844685153544e-06, + "loss": 0.0235, + "step": 80420 + }, + { + "epoch": 3.75, + "learning_rate": 7.555060900098757e-06, + "loss": 0.0761, + "step": 80425 + }, + { + "epoch": 3.75, + "learning_rate": 7.554277115043971e-06, + "loss": 0.145, + "step": 80430 + }, + { + "epoch": 3.75, + "learning_rate": 7.553493329989184e-06, + "loss": 0.1257, + "step": 80435 + }, + { + "epoch": 3.75, + "learning_rate": 7.552709544934398e-06, + "loss": 0.2336, + "step": 80440 + }, + { + "epoch": 3.75, + "learning_rate": 7.551925759879611e-06, + "loss": 0.146, + "step": 80445 + }, + { + "epoch": 3.75, + "learning_rate": 7.551141974824825e-06, + "loss": 0.071, + "step": 80450 + }, + { + "epoch": 3.75, + "learning_rate": 7.550358189770038e-06, + "loss": 0.0779, + "step": 80455 + }, + { + "epoch": 3.75, + "learning_rate": 7.549574404715252e-06, + "loss": 0.0214, + "step": 80460 + }, + { + "epoch": 3.75, + "learning_rate": 7.548790619660465e-06, + "loss": 0.0261, + "step": 80465 + }, + { + "epoch": 3.75, + "learning_rate": 7.548006834605678e-06, + "loss": 0.0636, + "step": 80470 + }, + { + "epoch": 3.76, + "learning_rate": 7.547223049550891e-06, + "loss": 0.1001, + "step": 80475 + }, + { + "epoch": 3.76, + "learning_rate": 7.546439264496105e-06, + "loss": 0.0961, + "step": 80480 + }, + { + "epoch": 3.76, + "learning_rate": 7.545655479441318e-06, + "loss": 0.1074, + "step": 80485 + }, + { + "epoch": 3.76, + "learning_rate": 7.544871694386532e-06, + "loss": 0.1157, + "step": 80490 + }, + { + "epoch": 3.76, + "learning_rate": 7.544087909331745e-06, + "loss": 0.4315, + "step": 80495 + }, + { + "epoch": 3.76, + "learning_rate": 7.543304124276959e-06, + "loss": 0.0414, + "step": 80500 + }, + { + "epoch": 3.76, + "learning_rate": 7.542520339222172e-06, + "loss": 0.0545, + "step": 80505 + }, + { + "epoch": 3.76, + "learning_rate": 7.541736554167386e-06, + "loss": 0.0484, + "step": 80510 + }, + { + "epoch": 3.76, + "learning_rate": 7.540952769112599e-06, + "loss": 0.0749, + "step": 80515 + }, + { + "epoch": 3.76, + "learning_rate": 7.540168984057813e-06, + "loss": 0.0789, + "step": 80520 + }, + { + "epoch": 3.76, + "learning_rate": 7.539385199003027e-06, + "loss": 0.0878, + "step": 80525 + }, + { + "epoch": 3.76, + "learning_rate": 7.53860141394824e-06, + "loss": 0.0794, + "step": 80530 + }, + { + "epoch": 3.76, + "learning_rate": 7.537817628893452e-06, + "loss": 0.1222, + "step": 80535 + }, + { + "epoch": 3.76, + "learning_rate": 7.537033843838666e-06, + "loss": 0.1389, + "step": 80540 + }, + { + "epoch": 3.76, + "learning_rate": 7.536250058783879e-06, + "loss": 0.1715, + "step": 80545 + }, + { + "epoch": 3.76, + "learning_rate": 7.535466273729093e-06, + "loss": 0.0799, + "step": 80550 + }, + { + "epoch": 3.76, + "learning_rate": 7.534682488674306e-06, + "loss": 0.0222, + "step": 80555 + }, + { + "epoch": 3.76, + "learning_rate": 7.53389870361952e-06, + "loss": 0.0624, + "step": 80560 + }, + { + "epoch": 3.76, + "learning_rate": 7.533114918564734e-06, + "loss": 0.1226, + "step": 80565 + }, + { + "epoch": 3.76, + "learning_rate": 7.532331133509947e-06, + "loss": 0.1108, + "step": 80570 + }, + { + "epoch": 3.76, + "learning_rate": 7.531547348455161e-06, + "loss": 0.101, + "step": 80575 + }, + { + "epoch": 3.76, + "learning_rate": 7.530763563400374e-06, + "loss": 0.0392, + "step": 80580 + }, + { + "epoch": 3.76, + "learning_rate": 7.529979778345588e-06, + "loss": 0.1091, + "step": 80585 + }, + { + "epoch": 3.76, + "learning_rate": 7.529195993290801e-06, + "loss": 0.1041, + "step": 80590 + }, + { + "epoch": 3.76, + "learning_rate": 7.528412208236015e-06, + "loss": 0.2562, + "step": 80595 + }, + { + "epoch": 3.76, + "learning_rate": 7.527628423181227e-06, + "loss": 0.033, + "step": 80600 + }, + { + "epoch": 3.76, + "learning_rate": 7.52684463812644e-06, + "loss": 0.0161, + "step": 80605 + }, + { + "epoch": 3.76, + "learning_rate": 7.526060853071654e-06, + "loss": 0.0184, + "step": 80610 + }, + { + "epoch": 3.76, + "learning_rate": 7.525277068016868e-06, + "loss": 0.034, + "step": 80615 + }, + { + "epoch": 3.76, + "learning_rate": 7.524493282962081e-06, + "loss": 0.0559, + "step": 80620 + }, + { + "epoch": 3.76, + "learning_rate": 7.523709497907295e-06, + "loss": 0.0886, + "step": 80625 + }, + { + "epoch": 3.76, + "learning_rate": 7.522925712852508e-06, + "loss": 0.1356, + "step": 80630 + }, + { + "epoch": 3.76, + "learning_rate": 7.522141927797722e-06, + "loss": 0.2386, + "step": 80635 + }, + { + "epoch": 3.76, + "learning_rate": 7.521358142742935e-06, + "loss": 0.1457, + "step": 80640 + }, + { + "epoch": 3.76, + "learning_rate": 7.520574357688149e-06, + "loss": 0.2578, + "step": 80645 + }, + { + "epoch": 3.76, + "learning_rate": 7.519790572633362e-06, + "loss": 0.0684, + "step": 80650 + }, + { + "epoch": 3.76, + "learning_rate": 7.519006787578576e-06, + "loss": 0.0222, + "step": 80655 + }, + { + "epoch": 3.76, + "learning_rate": 7.518223002523789e-06, + "loss": 0.0702, + "step": 80660 + }, + { + "epoch": 3.76, + "learning_rate": 7.517439217469002e-06, + "loss": 0.0117, + "step": 80665 + }, + { + "epoch": 3.76, + "learning_rate": 7.516655432414215e-06, + "loss": 0.0342, + "step": 80670 + }, + { + "epoch": 3.76, + "learning_rate": 7.515871647359429e-06, + "loss": 0.1044, + "step": 80675 + }, + { + "epoch": 3.76, + "learning_rate": 7.515087862304642e-06, + "loss": 0.1218, + "step": 80680 + }, + { + "epoch": 3.76, + "learning_rate": 7.514304077249856e-06, + "loss": 0.1304, + "step": 80685 + }, + { + "epoch": 3.77, + "learning_rate": 7.513520292195069e-06, + "loss": 0.2197, + "step": 80690 + }, + { + "epoch": 3.77, + "learning_rate": 7.512736507140283e-06, + "loss": 0.3307, + "step": 80695 + }, + { + "epoch": 3.77, + "learning_rate": 7.511952722085496e-06, + "loss": 0.07, + "step": 80700 + }, + { + "epoch": 3.77, + "learning_rate": 7.51116893703071e-06, + "loss": 0.0504, + "step": 80705 + }, + { + "epoch": 3.77, + "learning_rate": 7.510385151975923e-06, + "loss": 0.0316, + "step": 80710 + }, + { + "epoch": 3.77, + "learning_rate": 7.509601366921137e-06, + "loss": 0.0594, + "step": 80715 + }, + { + "epoch": 3.77, + "learning_rate": 7.50881758186635e-06, + "loss": 0.1011, + "step": 80720 + }, + { + "epoch": 3.77, + "learning_rate": 7.508033796811564e-06, + "loss": 0.0862, + "step": 80725 + }, + { + "epoch": 3.77, + "learning_rate": 7.507250011756776e-06, + "loss": 0.1308, + "step": 80730 + }, + { + "epoch": 3.77, + "learning_rate": 7.50646622670199e-06, + "loss": 0.1348, + "step": 80735 + }, + { + "epoch": 3.77, + "learning_rate": 7.505682441647203e-06, + "loss": 0.2836, + "step": 80740 + }, + { + "epoch": 3.77, + "learning_rate": 7.504898656592417e-06, + "loss": 0.3387, + "step": 80745 + }, + { + "epoch": 3.77, + "learning_rate": 7.50411487153763e-06, + "loss": 0.1073, + "step": 80750 + }, + { + "epoch": 3.77, + "learning_rate": 7.503331086482844e-06, + "loss": 0.016, + "step": 80755 + }, + { + "epoch": 3.77, + "learning_rate": 7.502547301428057e-06, + "loss": 0.0336, + "step": 80760 + }, + { + "epoch": 3.77, + "learning_rate": 7.501763516373271e-06, + "loss": 0.0689, + "step": 80765 + }, + { + "epoch": 3.77, + "learning_rate": 7.500979731318484e-06, + "loss": 0.0737, + "step": 80770 + }, + { + "epoch": 3.77, + "learning_rate": 7.5001959462636976e-06, + "loss": 0.0906, + "step": 80775 + }, + { + "epoch": 3.77, + "learning_rate": 7.499412161208911e-06, + "loss": 0.1477, + "step": 80780 + }, + { + "epoch": 3.77, + "learning_rate": 7.4986283761541245e-06, + "loss": 0.1053, + "step": 80785 + }, + { + "epoch": 3.77, + "learning_rate": 7.4978445910993384e-06, + "loss": 0.1451, + "step": 80790 + }, + { + "epoch": 3.77, + "learning_rate": 7.497060806044551e-06, + "loss": 0.2624, + "step": 80795 + }, + { + "epoch": 3.77, + "learning_rate": 7.496277020989764e-06, + "loss": 0.0496, + "step": 80800 + }, + { + "epoch": 3.77, + "learning_rate": 7.495493235934978e-06, + "loss": 0.0683, + "step": 80805 + }, + { + "epoch": 3.77, + "learning_rate": 7.494709450880191e-06, + "loss": 0.0146, + "step": 80810 + }, + { + "epoch": 3.77, + "learning_rate": 7.4939256658254046e-06, + "loss": 0.0139, + "step": 80815 + }, + { + "epoch": 3.77, + "learning_rate": 7.493141880770618e-06, + "loss": 0.0735, + "step": 80820 + }, + { + "epoch": 3.77, + "learning_rate": 7.4923580957158315e-06, + "loss": 0.0898, + "step": 80825 + }, + { + "epoch": 3.77, + "learning_rate": 7.491574310661045e-06, + "loss": 0.0639, + "step": 80830 + }, + { + "epoch": 3.77, + "learning_rate": 7.4907905256062585e-06, + "loss": 0.1063, + "step": 80835 + }, + { + "epoch": 3.77, + "learning_rate": 7.490006740551472e-06, + "loss": 0.1522, + "step": 80840 + }, + { + "epoch": 3.77, + "learning_rate": 7.4892229554966855e-06, + "loss": 0.1718, + "step": 80845 + }, + { + "epoch": 3.77, + "learning_rate": 7.488439170441899e-06, + "loss": 0.1038, + "step": 80850 + }, + { + "epoch": 3.77, + "learning_rate": 7.4876553853871124e-06, + "loss": 0.0347, + "step": 80855 + }, + { + "epoch": 3.77, + "learning_rate": 7.486871600332325e-06, + "loss": 0.0486, + "step": 80860 + }, + { + "epoch": 3.77, + "learning_rate": 7.4860878152775385e-06, + "loss": 0.0466, + "step": 80865 + }, + { + "epoch": 3.77, + "learning_rate": 7.485304030222752e-06, + "loss": 0.1096, + "step": 80870 + }, + { + "epoch": 3.77, + "learning_rate": 7.4845202451679655e-06, + "loss": 0.1127, + "step": 80875 + }, + { + "epoch": 3.77, + "learning_rate": 7.483736460113179e-06, + "loss": 0.108, + "step": 80880 + }, + { + "epoch": 3.77, + "learning_rate": 7.4829526750583925e-06, + "loss": 0.1396, + "step": 80885 + }, + { + "epoch": 3.77, + "learning_rate": 7.482168890003606e-06, + "loss": 0.1624, + "step": 80890 + }, + { + "epoch": 3.77, + "learning_rate": 7.4813851049488194e-06, + "loss": 0.2717, + "step": 80895 + }, + { + "epoch": 3.77, + "learning_rate": 7.480601319894033e-06, + "loss": 0.0359, + "step": 80900 + }, + { + "epoch": 3.78, + "learning_rate": 7.479817534839246e-06, + "loss": 0.0532, + "step": 80905 + }, + { + "epoch": 3.78, + "learning_rate": 7.47903374978446e-06, + "loss": 0.0593, + "step": 80910 + }, + { + "epoch": 3.78, + "learning_rate": 7.478249964729673e-06, + "loss": 0.0459, + "step": 80915 + }, + { + "epoch": 3.78, + "learning_rate": 7.477466179674887e-06, + "loss": 0.0694, + "step": 80920 + }, + { + "epoch": 3.78, + "learning_rate": 7.4766823946200995e-06, + "loss": 0.0415, + "step": 80925 + }, + { + "epoch": 3.78, + "learning_rate": 7.475898609565313e-06, + "loss": 0.106, + "step": 80930 + }, + { + "epoch": 3.78, + "learning_rate": 7.4751148245105265e-06, + "loss": 0.1636, + "step": 80935 + }, + { + "epoch": 3.78, + "learning_rate": 7.47433103945574e-06, + "loss": 0.1601, + "step": 80940 + }, + { + "epoch": 3.78, + "learning_rate": 7.473547254400953e-06, + "loss": 0.277, + "step": 80945 + }, + { + "epoch": 3.78, + "learning_rate": 7.472763469346167e-06, + "loss": 0.065, + "step": 80950 + }, + { + "epoch": 3.78, + "learning_rate": 7.47197968429138e-06, + "loss": 0.0045, + "step": 80955 + }, + { + "epoch": 3.78, + "learning_rate": 7.471195899236594e-06, + "loss": 0.0261, + "step": 80960 + }, + { + "epoch": 3.78, + "learning_rate": 7.470412114181807e-06, + "loss": 0.0298, + "step": 80965 + }, + { + "epoch": 3.78, + "learning_rate": 7.469628329127021e-06, + "loss": 0.0836, + "step": 80970 + }, + { + "epoch": 3.78, + "learning_rate": 7.468844544072234e-06, + "loss": 0.0585, + "step": 80975 + }, + { + "epoch": 3.78, + "learning_rate": 7.468060759017448e-06, + "loss": 0.0885, + "step": 80980 + }, + { + "epoch": 3.78, + "learning_rate": 7.467276973962661e-06, + "loss": 0.1012, + "step": 80985 + }, + { + "epoch": 3.78, + "learning_rate": 7.466493188907874e-06, + "loss": 0.1778, + "step": 80990 + }, + { + "epoch": 3.78, + "learning_rate": 7.465709403853087e-06, + "loss": 0.1979, + "step": 80995 + }, + { + "epoch": 3.78, + "learning_rate": 7.464925618798301e-06, + "loss": 0.0336, + "step": 81000 + }, + { + "epoch": 3.78, + "learning_rate": 7.464141833743514e-06, + "loss": 0.0493, + "step": 81005 + }, + { + "epoch": 3.78, + "learning_rate": 7.463358048688728e-06, + "loss": 0.0297, + "step": 81010 + }, + { + "epoch": 3.78, + "learning_rate": 7.462574263633941e-06, + "loss": 0.017, + "step": 81015 + }, + { + "epoch": 3.78, + "learning_rate": 7.461790478579155e-06, + "loss": 0.0602, + "step": 81020 + }, + { + "epoch": 3.78, + "learning_rate": 7.461006693524368e-06, + "loss": 0.0804, + "step": 81025 + }, + { + "epoch": 3.78, + "learning_rate": 7.460222908469582e-06, + "loss": 0.0775, + "step": 81030 + }, + { + "epoch": 3.78, + "learning_rate": 7.459439123414795e-06, + "loss": 0.2335, + "step": 81035 + }, + { + "epoch": 3.78, + "learning_rate": 7.458655338360009e-06, + "loss": 0.1755, + "step": 81040 + }, + { + "epoch": 3.78, + "learning_rate": 7.457871553305222e-06, + "loss": 0.2894, + "step": 81045 + }, + { + "epoch": 3.78, + "learning_rate": 7.457087768250436e-06, + "loss": 0.0208, + "step": 81050 + }, + { + "epoch": 3.78, + "learning_rate": 7.456303983195648e-06, + "loss": 0.0958, + "step": 81055 + }, + { + "epoch": 3.78, + "learning_rate": 7.455520198140862e-06, + "loss": 0.0727, + "step": 81060 + }, + { + "epoch": 3.78, + "learning_rate": 7.454736413086075e-06, + "loss": 0.0566, + "step": 81065 + }, + { + "epoch": 3.78, + "learning_rate": 7.453952628031289e-06, + "loss": 0.0311, + "step": 81070 + }, + { + "epoch": 3.78, + "learning_rate": 7.453168842976502e-06, + "loss": 0.1151, + "step": 81075 + }, + { + "epoch": 3.78, + "learning_rate": 7.452385057921716e-06, + "loss": 0.1571, + "step": 81080 + }, + { + "epoch": 3.78, + "learning_rate": 7.451601272866929e-06, + "loss": 0.1032, + "step": 81085 + }, + { + "epoch": 3.78, + "learning_rate": 7.450817487812143e-06, + "loss": 0.1239, + "step": 81090 + }, + { + "epoch": 3.78, + "learning_rate": 7.450033702757356e-06, + "loss": 0.3272, + "step": 81095 + }, + { + "epoch": 3.78, + "learning_rate": 7.44924991770257e-06, + "loss": 0.088, + "step": 81100 + }, + { + "epoch": 3.78, + "learning_rate": 7.448466132647784e-06, + "loss": 0.0075, + "step": 81105 + }, + { + "epoch": 3.78, + "learning_rate": 7.447682347592997e-06, + "loss": 0.0353, + "step": 81110 + }, + { + "epoch": 3.78, + "learning_rate": 7.446898562538211e-06, + "loss": 0.0448, + "step": 81115 + }, + { + "epoch": 3.79, + "learning_rate": 7.446114777483423e-06, + "loss": 0.0518, + "step": 81120 + }, + { + "epoch": 3.79, + "learning_rate": 7.445330992428636e-06, + "loss": 0.1159, + "step": 81125 + }, + { + "epoch": 3.79, + "learning_rate": 7.44454720737385e-06, + "loss": 0.0917, + "step": 81130 + }, + { + "epoch": 3.79, + "learning_rate": 7.443763422319063e-06, + "loss": 0.1232, + "step": 81135 + }, + { + "epoch": 3.79, + "learning_rate": 7.442979637264277e-06, + "loss": 0.2608, + "step": 81140 + }, + { + "epoch": 3.79, + "learning_rate": 7.442195852209491e-06, + "loss": 0.2487, + "step": 81145 + }, + { + "epoch": 3.79, + "learning_rate": 7.441412067154704e-06, + "loss": 0.0297, + "step": 81150 + }, + { + "epoch": 3.79, + "learning_rate": 7.440628282099918e-06, + "loss": 0.0077, + "step": 81155 + }, + { + "epoch": 3.79, + "learning_rate": 7.439844497045131e-06, + "loss": 0.0524, + "step": 81160 + }, + { + "epoch": 3.79, + "learning_rate": 7.439060711990345e-06, + "loss": 0.0355, + "step": 81165 + }, + { + "epoch": 3.79, + "learning_rate": 7.438276926935558e-06, + "loss": 0.0954, + "step": 81170 + }, + { + "epoch": 3.79, + "learning_rate": 7.437493141880772e-06, + "loss": 0.1215, + "step": 81175 + }, + { + "epoch": 3.79, + "learning_rate": 7.436709356825985e-06, + "loss": 0.1101, + "step": 81180 + }, + { + "epoch": 3.79, + "learning_rate": 7.435925571771197e-06, + "loss": 0.1134, + "step": 81185 + }, + { + "epoch": 3.79, + "learning_rate": 7.435141786716411e-06, + "loss": 0.1016, + "step": 81190 + }, + { + "epoch": 3.79, + "learning_rate": 7.434358001661625e-06, + "loss": 0.3215, + "step": 81195 + }, + { + "epoch": 3.79, + "learning_rate": 7.433574216606838e-06, + "loss": 0.0755, + "step": 81200 + }, + { + "epoch": 3.79, + "learning_rate": 7.432790431552052e-06, + "loss": 0.0231, + "step": 81205 + }, + { + "epoch": 3.79, + "learning_rate": 7.432006646497265e-06, + "loss": 0.0457, + "step": 81210 + }, + { + "epoch": 3.79, + "learning_rate": 7.431222861442479e-06, + "loss": 0.0341, + "step": 81215 + }, + { + "epoch": 3.79, + "learning_rate": 7.430439076387692e-06, + "loss": 0.1064, + "step": 81220 + }, + { + "epoch": 3.79, + "learning_rate": 7.429655291332906e-06, + "loss": 0.105, + "step": 81225 + }, + { + "epoch": 3.79, + "learning_rate": 7.428871506278119e-06, + "loss": 0.0941, + "step": 81230 + }, + { + "epoch": 3.79, + "learning_rate": 7.428087721223333e-06, + "loss": 0.0624, + "step": 81235 + }, + { + "epoch": 3.79, + "learning_rate": 7.427303936168546e-06, + "loss": 0.2318, + "step": 81240 + }, + { + "epoch": 3.79, + "learning_rate": 7.42652015111376e-06, + "loss": 0.2825, + "step": 81245 + }, + { + "epoch": 3.79, + "learning_rate": 7.425736366058972e-06, + "loss": 0.0709, + "step": 81250 + }, + { + "epoch": 3.79, + "learning_rate": 7.424952581004186e-06, + "loss": 0.0251, + "step": 81255 + }, + { + "epoch": 3.79, + "learning_rate": 7.424168795949399e-06, + "loss": 0.0932, + "step": 81260 + }, + { + "epoch": 3.79, + "learning_rate": 7.423385010894613e-06, + "loss": 0.0567, + "step": 81265 + }, + { + "epoch": 3.79, + "learning_rate": 7.422601225839826e-06, + "loss": 0.0956, + "step": 81270 + }, + { + "epoch": 3.79, + "learning_rate": 7.42181744078504e-06, + "loss": 0.0827, + "step": 81275 + }, + { + "epoch": 3.79, + "learning_rate": 7.421033655730253e-06, + "loss": 0.1244, + "step": 81280 + }, + { + "epoch": 3.79, + "learning_rate": 7.420249870675467e-06, + "loss": 0.049, + "step": 81285 + }, + { + "epoch": 3.79, + "learning_rate": 7.41946608562068e-06, + "loss": 0.1748, + "step": 81290 + }, + { + "epoch": 3.79, + "learning_rate": 7.418682300565894e-06, + "loss": 0.2296, + "step": 81295 + }, + { + "epoch": 3.79, + "learning_rate": 7.417898515511107e-06, + "loss": 0.0165, + "step": 81300 + }, + { + "epoch": 3.79, + "learning_rate": 7.417114730456321e-06, + "loss": 0.0275, + "step": 81305 + }, + { + "epoch": 3.79, + "learning_rate": 7.416330945401534e-06, + "loss": 0.0393, + "step": 81310 + }, + { + "epoch": 3.79, + "learning_rate": 7.415547160346747e-06, + "loss": 0.0505, + "step": 81315 + }, + { + "epoch": 3.79, + "learning_rate": 7.41476337529196e-06, + "loss": 0.148, + "step": 81320 + }, + { + "epoch": 3.79, + "learning_rate": 7.413979590237174e-06, + "loss": 0.0627, + "step": 81325 + }, + { + "epoch": 3.79, + "learning_rate": 7.413195805182387e-06, + "loss": 0.111, + "step": 81330 + }, + { + "epoch": 3.8, + "learning_rate": 7.412412020127601e-06, + "loss": 0.1179, + "step": 81335 + }, + { + "epoch": 3.8, + "learning_rate": 7.411628235072814e-06, + "loss": 0.2385, + "step": 81340 + }, + { + "epoch": 3.8, + "learning_rate": 7.410844450018028e-06, + "loss": 0.2367, + "step": 81345 + }, + { + "epoch": 3.8, + "learning_rate": 7.410060664963241e-06, + "loss": 0.0705, + "step": 81350 + }, + { + "epoch": 3.8, + "learning_rate": 7.409276879908455e-06, + "loss": 0.0118, + "step": 81355 + }, + { + "epoch": 3.8, + "learning_rate": 7.408493094853668e-06, + "loss": 0.0494, + "step": 81360 + }, + { + "epoch": 3.8, + "learning_rate": 7.407709309798882e-06, + "loss": 0.0659, + "step": 81365 + }, + { + "epoch": 3.8, + "learning_rate": 7.406925524744096e-06, + "loss": 0.0815, + "step": 81370 + }, + { + "epoch": 3.8, + "learning_rate": 7.406141739689309e-06, + "loss": 0.1722, + "step": 81375 + }, + { + "epoch": 3.8, + "learning_rate": 7.405357954634521e-06, + "loss": 0.0848, + "step": 81380 + }, + { + "epoch": 3.8, + "learning_rate": 7.404574169579735e-06, + "loss": 0.1096, + "step": 81385 + }, + { + "epoch": 3.8, + "learning_rate": 7.403790384524948e-06, + "loss": 0.2585, + "step": 81390 + }, + { + "epoch": 3.8, + "learning_rate": 7.403006599470162e-06, + "loss": 0.3918, + "step": 81395 + }, + { + "epoch": 3.8, + "learning_rate": 7.402222814415375e-06, + "loss": 0.0702, + "step": 81400 + }, + { + "epoch": 3.8, + "learning_rate": 7.401439029360589e-06, + "loss": 0.016, + "step": 81405 + }, + { + "epoch": 3.8, + "learning_rate": 7.400655244305802e-06, + "loss": 0.0741, + "step": 81410 + }, + { + "epoch": 3.8, + "learning_rate": 7.399871459251016e-06, + "loss": 0.0389, + "step": 81415 + }, + { + "epoch": 3.8, + "learning_rate": 7.3990876741962296e-06, + "loss": 0.0683, + "step": 81420 + }, + { + "epoch": 3.8, + "learning_rate": 7.398303889141443e-06, + "loss": 0.1226, + "step": 81425 + }, + { + "epoch": 3.8, + "learning_rate": 7.3975201040866565e-06, + "loss": 0.1473, + "step": 81430 + }, + { + "epoch": 3.8, + "learning_rate": 7.39673631903187e-06, + "loss": 0.1458, + "step": 81435 + }, + { + "epoch": 3.8, + "learning_rate": 7.3959525339770835e-06, + "loss": 0.1623, + "step": 81440 + }, + { + "epoch": 3.8, + "learning_rate": 7.395168748922296e-06, + "loss": 0.2272, + "step": 81445 + }, + { + "epoch": 3.8, + "learning_rate": 7.394384963867509e-06, + "loss": 0.0278, + "step": 81450 + }, + { + "epoch": 3.8, + "learning_rate": 7.393601178812723e-06, + "loss": 0.026, + "step": 81455 + }, + { + "epoch": 3.8, + "learning_rate": 7.392817393757937e-06, + "loss": 0.0562, + "step": 81460 + }, + { + "epoch": 3.8, + "learning_rate": 7.39203360870315e-06, + "loss": 0.0508, + "step": 81465 + }, + { + "epoch": 3.8, + "learning_rate": 7.3912498236483635e-06, + "loss": 0.067, + "step": 81470 + }, + { + "epoch": 3.8, + "learning_rate": 7.390466038593577e-06, + "loss": 0.1139, + "step": 81475 + }, + { + "epoch": 3.8, + "learning_rate": 7.3896822535387905e-06, + "loss": 0.0491, + "step": 81480 + }, + { + "epoch": 3.8, + "learning_rate": 7.3888984684840036e-06, + "loss": 0.1286, + "step": 81485 + }, + { + "epoch": 3.8, + "learning_rate": 7.3881146834292175e-06, + "loss": 0.2884, + "step": 81490 + }, + { + "epoch": 3.8, + "learning_rate": 7.3873308983744305e-06, + "loss": 0.1804, + "step": 81495 + }, + { + "epoch": 3.8, + "learning_rate": 7.3865471133196444e-06, + "loss": 0.0247, + "step": 81500 + }, + { + "epoch": 3.8, + "learning_rate": 7.3857633282648575e-06, + "loss": 0.0351, + "step": 81505 + }, + { + "epoch": 3.8, + "learning_rate": 7.3849795432100706e-06, + "loss": 0.0495, + "step": 81510 + }, + { + "epoch": 3.8, + "learning_rate": 7.384195758155284e-06, + "loss": 0.0383, + "step": 81515 + }, + { + "epoch": 3.8, + "learning_rate": 7.3834119731004975e-06, + "loss": 0.0481, + "step": 81520 + }, + { + "epoch": 3.8, + "learning_rate": 7.382628188045711e-06, + "loss": 0.096, + "step": 81525 + }, + { + "epoch": 3.8, + "learning_rate": 7.3818444029909245e-06, + "loss": 0.0954, + "step": 81530 + }, + { + "epoch": 3.8, + "learning_rate": 7.3810606179361375e-06, + "loss": 0.1689, + "step": 81535 + }, + { + "epoch": 3.8, + "learning_rate": 7.3802768328813515e-06, + "loss": 0.1517, + "step": 81540 + }, + { + "epoch": 3.81, + "learning_rate": 7.3794930478265645e-06, + "loss": 0.2761, + "step": 81545 + }, + { + "epoch": 3.81, + "learning_rate": 7.378709262771778e-06, + "loss": 0.0598, + "step": 81550 + }, + { + "epoch": 3.81, + "learning_rate": 7.3779254777169915e-06, + "loss": 0.035, + "step": 81555 + }, + { + "epoch": 3.81, + "learning_rate": 7.377141692662205e-06, + "loss": 0.0392, + "step": 81560 + }, + { + "epoch": 3.81, + "learning_rate": 7.3763579076074184e-06, + "loss": 0.0373, + "step": 81565 + }, + { + "epoch": 3.81, + "learning_rate": 7.375574122552632e-06, + "loss": 0.0863, + "step": 81570 + }, + { + "epoch": 3.81, + "learning_rate": 7.3747903374978446e-06, + "loss": 0.0891, + "step": 81575 + }, + { + "epoch": 3.81, + "learning_rate": 7.3740065524430585e-06, + "loss": 0.22, + "step": 81580 + }, + { + "epoch": 3.81, + "learning_rate": 7.3732227673882715e-06, + "loss": 0.1706, + "step": 81585 + }, + { + "epoch": 3.81, + "learning_rate": 7.3724389823334854e-06, + "loss": 0.1848, + "step": 81590 + }, + { + "epoch": 3.81, + "learning_rate": 7.3716551972786985e-06, + "loss": 0.2883, + "step": 81595 + }, + { + "epoch": 3.81, + "learning_rate": 7.370871412223912e-06, + "loss": 0.0452, + "step": 81600 + }, + { + "epoch": 3.81, + "learning_rate": 7.3700876271691255e-06, + "loss": 0.0269, + "step": 81605 + }, + { + "epoch": 3.81, + "learning_rate": 7.369303842114339e-06, + "loss": 0.0488, + "step": 81610 + }, + { + "epoch": 3.81, + "learning_rate": 7.368520057059552e-06, + "loss": 0.1098, + "step": 81615 + }, + { + "epoch": 3.81, + "learning_rate": 7.367736272004766e-06, + "loss": 0.044, + "step": 81620 + }, + { + "epoch": 3.81, + "learning_rate": 7.366952486949979e-06, + "loss": 0.0823, + "step": 81625 + }, + { + "epoch": 3.81, + "learning_rate": 7.366168701895193e-06, + "loss": 0.102, + "step": 81630 + }, + { + "epoch": 3.81, + "learning_rate": 7.365384916840407e-06, + "loss": 0.0871, + "step": 81635 + }, + { + "epoch": 3.81, + "learning_rate": 7.364601131785619e-06, + "loss": 0.1703, + "step": 81640 + }, + { + "epoch": 3.81, + "learning_rate": 7.3638173467308325e-06, + "loss": 0.2861, + "step": 81645 + }, + { + "epoch": 3.81, + "learning_rate": 7.363033561676046e-06, + "loss": 0.0518, + "step": 81650 + }, + { + "epoch": 3.81, + "learning_rate": 7.3622497766212594e-06, + "loss": 0.0568, + "step": 81655 + }, + { + "epoch": 3.81, + "learning_rate": 7.361465991566473e-06, + "loss": 0.0474, + "step": 81660 + }, + { + "epoch": 3.81, + "learning_rate": 7.360682206511686e-06, + "loss": 0.0214, + "step": 81665 + }, + { + "epoch": 3.81, + "learning_rate": 7.3598984214569e-06, + "loss": 0.0658, + "step": 81670 + }, + { + "epoch": 3.81, + "learning_rate": 7.359114636402113e-06, + "loss": 0.0758, + "step": 81675 + }, + { + "epoch": 3.81, + "learning_rate": 7.358330851347327e-06, + "loss": 0.0836, + "step": 81680 + }, + { + "epoch": 3.81, + "learning_rate": 7.357547066292541e-06, + "loss": 0.0765, + "step": 81685 + }, + { + "epoch": 3.81, + "learning_rate": 7.356763281237754e-06, + "loss": 0.1627, + "step": 81690 + }, + { + "epoch": 3.81, + "learning_rate": 7.355979496182968e-06, + "loss": 0.3071, + "step": 81695 + }, + { + "epoch": 3.81, + "learning_rate": 7.355195711128181e-06, + "loss": 0.1123, + "step": 81700 + }, + { + "epoch": 3.81, + "learning_rate": 7.354411926073393e-06, + "loss": 0.0162, + "step": 81705 + }, + { + "epoch": 3.81, + "learning_rate": 7.353628141018607e-06, + "loss": 0.0456, + "step": 81710 + }, + { + "epoch": 3.81, + "learning_rate": 7.35284435596382e-06, + "loss": 0.052, + "step": 81715 + }, + { + "epoch": 3.81, + "learning_rate": 7.352060570909034e-06, + "loss": 0.0999, + "step": 81720 + }, + { + "epoch": 3.81, + "learning_rate": 7.351276785854248e-06, + "loss": 0.1386, + "step": 81725 + }, + { + "epoch": 3.81, + "learning_rate": 7.350493000799461e-06, + "loss": 0.0898, + "step": 81730 + }, + { + "epoch": 3.81, + "learning_rate": 7.349709215744675e-06, + "loss": 0.1664, + "step": 81735 + }, + { + "epoch": 3.81, + "learning_rate": 7.348925430689888e-06, + "loss": 0.2276, + "step": 81740 + }, + { + "epoch": 3.81, + "learning_rate": 7.348141645635102e-06, + "loss": 0.2442, + "step": 81745 + }, + { + "epoch": 3.81, + "learning_rate": 7.347357860580315e-06, + "loss": 0.0724, + "step": 81750 + }, + { + "epoch": 3.81, + "learning_rate": 7.346574075525529e-06, + "loss": 0.0132, + "step": 81755 + }, + { + "epoch": 3.82, + "learning_rate": 7.345790290470742e-06, + "loss": 0.0106, + "step": 81760 + }, + { + "epoch": 3.82, + "learning_rate": 7.345006505415956e-06, + "loss": 0.0188, + "step": 81765 + }, + { + "epoch": 3.82, + "learning_rate": 7.344222720361168e-06, + "loss": 0.0648, + "step": 81770 + }, + { + "epoch": 3.82, + "learning_rate": 7.343438935306382e-06, + "loss": 0.1224, + "step": 81775 + }, + { + "epoch": 3.82, + "learning_rate": 7.342655150251595e-06, + "loss": 0.0454, + "step": 81780 + }, + { + "epoch": 3.82, + "learning_rate": 7.341871365196809e-06, + "loss": 0.1007, + "step": 81785 + }, + { + "epoch": 3.82, + "learning_rate": 7.341087580142022e-06, + "loss": 0.227, + "step": 81790 + }, + { + "epoch": 3.82, + "learning_rate": 7.340303795087236e-06, + "loss": 0.206, + "step": 81795 + }, + { + "epoch": 3.82, + "learning_rate": 7.339520010032449e-06, + "loss": 0.0636, + "step": 81800 + }, + { + "epoch": 3.82, + "learning_rate": 7.338736224977663e-06, + "loss": 0.0383, + "step": 81805 + }, + { + "epoch": 3.82, + "learning_rate": 7.337952439922876e-06, + "loss": 0.0408, + "step": 81810 + }, + { + "epoch": 3.82, + "learning_rate": 7.33716865486809e-06, + "loss": 0.0514, + "step": 81815 + }, + { + "epoch": 3.82, + "learning_rate": 7.336384869813303e-06, + "loss": 0.0681, + "step": 81820 + }, + { + "epoch": 3.82, + "learning_rate": 7.335601084758517e-06, + "loss": 0.0524, + "step": 81825 + }, + { + "epoch": 3.82, + "learning_rate": 7.33481729970373e-06, + "loss": 0.0948, + "step": 81830 + }, + { + "epoch": 3.82, + "learning_rate": 7.334033514648943e-06, + "loss": 0.1006, + "step": 81835 + }, + { + "epoch": 3.82, + "learning_rate": 7.333249729594156e-06, + "loss": 0.1285, + "step": 81840 + }, + { + "epoch": 3.82, + "learning_rate": 7.33246594453937e-06, + "loss": 0.2243, + "step": 81845 + }, + { + "epoch": 3.82, + "learning_rate": 7.331682159484583e-06, + "loss": 0.0631, + "step": 81850 + }, + { + "epoch": 3.82, + "learning_rate": 7.330898374429797e-06, + "loss": 0.0126, + "step": 81855 + }, + { + "epoch": 3.82, + "learning_rate": 7.33011458937501e-06, + "loss": 0.033, + "step": 81860 + }, + { + "epoch": 3.82, + "learning_rate": 7.329330804320224e-06, + "loss": 0.0864, + "step": 81865 + }, + { + "epoch": 3.82, + "learning_rate": 7.328547019265437e-06, + "loss": 0.0507, + "step": 81870 + }, + { + "epoch": 3.82, + "learning_rate": 7.327763234210651e-06, + "loss": 0.0965, + "step": 81875 + }, + { + "epoch": 3.82, + "learning_rate": 7.326979449155864e-06, + "loss": 0.1274, + "step": 81880 + }, + { + "epoch": 3.82, + "learning_rate": 7.326195664101078e-06, + "loss": 0.1476, + "step": 81885 + }, + { + "epoch": 3.82, + "learning_rate": 7.325411879046291e-06, + "loss": 0.239, + "step": 81890 + }, + { + "epoch": 3.82, + "learning_rate": 7.324628093991505e-06, + "loss": 0.2824, + "step": 81895 + }, + { + "epoch": 3.82, + "learning_rate": 7.323844308936717e-06, + "loss": 0.046, + "step": 81900 + }, + { + "epoch": 3.82, + "learning_rate": 7.323060523881931e-06, + "loss": 0.0408, + "step": 81905 + }, + { + "epoch": 3.82, + "learning_rate": 7.322276738827144e-06, + "loss": 0.0289, + "step": 81910 + }, + { + "epoch": 3.82, + "learning_rate": 7.321492953772358e-06, + "loss": 0.0431, + "step": 81915 + }, + { + "epoch": 3.82, + "learning_rate": 7.320709168717571e-06, + "loss": 0.025, + "step": 81920 + }, + { + "epoch": 3.82, + "learning_rate": 7.319925383662785e-06, + "loss": 0.0459, + "step": 81925 + }, + { + "epoch": 3.82, + "learning_rate": 7.319141598607998e-06, + "loss": 0.1007, + "step": 81930 + }, + { + "epoch": 3.82, + "learning_rate": 7.318357813553212e-06, + "loss": 0.1685, + "step": 81935 + }, + { + "epoch": 3.82, + "learning_rate": 7.317574028498425e-06, + "loss": 0.2215, + "step": 81940 + }, + { + "epoch": 3.82, + "learning_rate": 7.316790243443639e-06, + "loss": 0.2574, + "step": 81945 + }, + { + "epoch": 3.82, + "learning_rate": 7.316006458388853e-06, + "loss": 0.0614, + "step": 81950 + }, + { + "epoch": 3.82, + "learning_rate": 7.315222673334066e-06, + "loss": 0.0751, + "step": 81955 + }, + { + "epoch": 3.82, + "learning_rate": 7.31443888827928e-06, + "loss": 0.0696, + "step": 81960 + }, + { + "epoch": 3.82, + "learning_rate": 7.313655103224492e-06, + "loss": 0.0804, + "step": 81965 + }, + { + "epoch": 3.82, + "learning_rate": 7.312871318169705e-06, + "loss": 0.0579, + "step": 81970 + }, + { + "epoch": 3.83, + "learning_rate": 7.312087533114919e-06, + "loss": 0.064, + "step": 81975 + }, + { + "epoch": 3.83, + "learning_rate": 7.311303748060132e-06, + "loss": 0.0764, + "step": 81980 + }, + { + "epoch": 3.83, + "learning_rate": 7.310519963005346e-06, + "loss": 0.1372, + "step": 81985 + }, + { + "epoch": 3.83, + "learning_rate": 7.309736177950559e-06, + "loss": 0.2091, + "step": 81990 + }, + { + "epoch": 3.83, + "learning_rate": 7.308952392895773e-06, + "loss": 0.2188, + "step": 81995 + }, + { + "epoch": 3.83, + "learning_rate": 7.308168607840987e-06, + "loss": 0.0708, + "step": 82000 + }, + { + "epoch": 3.83, + "learning_rate": 7.3073848227862e-06, + "loss": 0.0232, + "step": 82005 + }, + { + "epoch": 3.83, + "learning_rate": 7.306601037731414e-06, + "loss": 0.0316, + "step": 82010 + }, + { + "epoch": 3.83, + "learning_rate": 7.305817252676627e-06, + "loss": 0.0794, + "step": 82015 + }, + { + "epoch": 3.83, + "learning_rate": 7.305033467621841e-06, + "loss": 0.0471, + "step": 82020 + }, + { + "epoch": 3.83, + "learning_rate": 7.304249682567054e-06, + "loss": 0.1317, + "step": 82025 + }, + { + "epoch": 3.83, + "learning_rate": 7.303465897512266e-06, + "loss": 0.1332, + "step": 82030 + }, + { + "epoch": 3.83, + "learning_rate": 7.30268211245748e-06, + "loss": 0.0663, + "step": 82035 + }, + { + "epoch": 3.83, + "learning_rate": 7.301898327402694e-06, + "loss": 0.1334, + "step": 82040 + }, + { + "epoch": 3.83, + "learning_rate": 7.301114542347907e-06, + "loss": 0.2608, + "step": 82045 + }, + { + "epoch": 3.83, + "learning_rate": 7.300330757293121e-06, + "loss": 0.1023, + "step": 82050 + }, + { + "epoch": 3.83, + "learning_rate": 7.299546972238334e-06, + "loss": 0.042, + "step": 82055 + }, + { + "epoch": 3.83, + "learning_rate": 7.298763187183548e-06, + "loss": 0.0138, + "step": 82060 + }, + { + "epoch": 3.83, + "learning_rate": 7.297979402128761e-06, + "loss": 0.0702, + "step": 82065 + }, + { + "epoch": 3.83, + "learning_rate": 7.297195617073975e-06, + "loss": 0.1097, + "step": 82070 + }, + { + "epoch": 3.83, + "learning_rate": 7.296411832019188e-06, + "loss": 0.0231, + "step": 82075 + }, + { + "epoch": 3.83, + "learning_rate": 7.295628046964402e-06, + "loss": 0.0528, + "step": 82080 + }, + { + "epoch": 3.83, + "learning_rate": 7.294844261909615e-06, + "loss": 0.0923, + "step": 82085 + }, + { + "epoch": 3.83, + "learning_rate": 7.2940604768548286e-06, + "loss": 0.153, + "step": 82090 + }, + { + "epoch": 3.83, + "learning_rate": 7.293276691800041e-06, + "loss": 0.2262, + "step": 82095 + }, + { + "epoch": 3.83, + "learning_rate": 7.292492906745255e-06, + "loss": 0.0514, + "step": 82100 + }, + { + "epoch": 3.83, + "learning_rate": 7.291709121690468e-06, + "loss": 0.0909, + "step": 82105 + }, + { + "epoch": 3.83, + "learning_rate": 7.290925336635682e-06, + "loss": 0.0377, + "step": 82110 + }, + { + "epoch": 3.83, + "learning_rate": 7.290141551580895e-06, + "loss": 0.0834, + "step": 82115 + }, + { + "epoch": 3.83, + "learning_rate": 7.289357766526109e-06, + "loss": 0.0565, + "step": 82120 + }, + { + "epoch": 3.83, + "learning_rate": 7.288573981471322e-06, + "loss": 0.1259, + "step": 82125 + }, + { + "epoch": 3.83, + "learning_rate": 7.287790196416536e-06, + "loss": 0.1021, + "step": 82130 + }, + { + "epoch": 3.83, + "learning_rate": 7.287006411361749e-06, + "loss": 0.1543, + "step": 82135 + }, + { + "epoch": 3.83, + "learning_rate": 7.2862226263069625e-06, + "loss": 0.1117, + "step": 82140 + }, + { + "epoch": 3.83, + "learning_rate": 7.285438841252176e-06, + "loss": 0.2498, + "step": 82145 + }, + { + "epoch": 3.83, + "learning_rate": 7.2846550561973895e-06, + "loss": 0.074, + "step": 82150 + }, + { + "epoch": 3.83, + "learning_rate": 7.2838712711426026e-06, + "loss": 0.014, + "step": 82155 + }, + { + "epoch": 3.83, + "learning_rate": 7.283087486087816e-06, + "loss": 0.0593, + "step": 82160 + }, + { + "epoch": 3.83, + "learning_rate": 7.282303701033029e-06, + "loss": 0.0556, + "step": 82165 + }, + { + "epoch": 3.83, + "learning_rate": 7.281519915978243e-06, + "loss": 0.1292, + "step": 82170 + }, + { + "epoch": 3.83, + "learning_rate": 7.280736130923456e-06, + "loss": 0.1866, + "step": 82175 + }, + { + "epoch": 3.83, + "learning_rate": 7.2799523458686696e-06, + "loss": 0.1015, + "step": 82180 + }, + { + "epoch": 3.83, + "learning_rate": 7.279168560813883e-06, + "loss": 0.1122, + "step": 82185 + }, + { + "epoch": 3.84, + "learning_rate": 7.2783847757590965e-06, + "loss": 0.2874, + "step": 82190 + }, + { + "epoch": 3.84, + "learning_rate": 7.27760099070431e-06, + "loss": 0.2783, + "step": 82195 + }, + { + "epoch": 3.84, + "learning_rate": 7.2768172056495235e-06, + "loss": 0.0762, + "step": 82200 + }, + { + "epoch": 3.84, + "learning_rate": 7.2760334205947365e-06, + "loss": 0.0206, + "step": 82205 + }, + { + "epoch": 3.84, + "learning_rate": 7.2752496355399504e-06, + "loss": 0.0353, + "step": 82210 + }, + { + "epoch": 3.84, + "learning_rate": 7.274465850485164e-06, + "loss": 0.0914, + "step": 82215 + }, + { + "epoch": 3.84, + "learning_rate": 7.273682065430377e-06, + "loss": 0.0593, + "step": 82220 + }, + { + "epoch": 3.84, + "learning_rate": 7.27289828037559e-06, + "loss": 0.0548, + "step": 82225 + }, + { + "epoch": 3.84, + "learning_rate": 7.2721144953208035e-06, + "loss": 0.1621, + "step": 82230 + }, + { + "epoch": 3.84, + "learning_rate": 7.271330710266017e-06, + "loss": 0.1095, + "step": 82235 + }, + { + "epoch": 3.84, + "learning_rate": 7.2705469252112305e-06, + "loss": 0.1579, + "step": 82240 + }, + { + "epoch": 3.84, + "learning_rate": 7.2697631401564436e-06, + "loss": 0.3142, + "step": 82245 + }, + { + "epoch": 3.84, + "learning_rate": 7.2689793551016575e-06, + "loss": 0.0763, + "step": 82250 + }, + { + "epoch": 3.84, + "learning_rate": 7.2681955700468705e-06, + "loss": 0.0297, + "step": 82255 + }, + { + "epoch": 3.84, + "learning_rate": 7.2674117849920844e-06, + "loss": 0.051, + "step": 82260 + }, + { + "epoch": 3.84, + "learning_rate": 7.266627999937298e-06, + "loss": 0.0488, + "step": 82265 + }, + { + "epoch": 3.84, + "learning_rate": 7.265844214882511e-06, + "loss": 0.069, + "step": 82270 + }, + { + "epoch": 3.84, + "learning_rate": 7.265060429827725e-06, + "loss": 0.0763, + "step": 82275 + }, + { + "epoch": 3.84, + "learning_rate": 7.264276644772938e-06, + "loss": 0.0423, + "step": 82280 + }, + { + "epoch": 3.84, + "learning_rate": 7.263492859718152e-06, + "loss": 0.0885, + "step": 82285 + }, + { + "epoch": 3.84, + "learning_rate": 7.2627090746633645e-06, + "loss": 0.1617, + "step": 82290 + }, + { + "epoch": 3.84, + "learning_rate": 7.2619252896085775e-06, + "loss": 0.3108, + "step": 82295 + }, + { + "epoch": 3.84, + "learning_rate": 7.2611415045537914e-06, + "loss": 0.0586, + "step": 82300 + }, + { + "epoch": 3.84, + "learning_rate": 7.260357719499005e-06, + "loss": 0.0536, + "step": 82305 + }, + { + "epoch": 3.84, + "learning_rate": 7.259573934444218e-06, + "loss": 0.0501, + "step": 82310 + }, + { + "epoch": 3.84, + "learning_rate": 7.258790149389432e-06, + "loss": 0.0268, + "step": 82315 + }, + { + "epoch": 3.84, + "learning_rate": 7.258006364334645e-06, + "loss": 0.1857, + "step": 82320 + }, + { + "epoch": 3.84, + "learning_rate": 7.257222579279859e-06, + "loss": 0.0637, + "step": 82325 + }, + { + "epoch": 3.84, + "learning_rate": 7.256438794225072e-06, + "loss": 0.1362, + "step": 82330 + }, + { + "epoch": 3.84, + "learning_rate": 7.255655009170286e-06, + "loss": 0.1791, + "step": 82335 + }, + { + "epoch": 3.84, + "learning_rate": 7.254871224115499e-06, + "loss": 0.1739, + "step": 82340 + }, + { + "epoch": 3.84, + "learning_rate": 7.254087439060713e-06, + "loss": 0.222, + "step": 82345 + }, + { + "epoch": 3.84, + "learning_rate": 7.253303654005926e-06, + "loss": 0.0556, + "step": 82350 + }, + { + "epoch": 3.84, + "learning_rate": 7.252519868951139e-06, + "loss": 0.0175, + "step": 82355 + }, + { + "epoch": 3.84, + "learning_rate": 7.251736083896352e-06, + "loss": 0.0208, + "step": 82360 + }, + { + "epoch": 3.84, + "learning_rate": 7.250952298841566e-06, + "loss": 0.0106, + "step": 82365 + }, + { + "epoch": 3.84, + "learning_rate": 7.250168513786779e-06, + "loss": 0.0689, + "step": 82370 + }, + { + "epoch": 3.84, + "learning_rate": 7.249384728731993e-06, + "loss": 0.0633, + "step": 82375 + }, + { + "epoch": 3.84, + "learning_rate": 7.248600943677206e-06, + "loss": 0.0498, + "step": 82380 + }, + { + "epoch": 3.84, + "learning_rate": 7.24781715862242e-06, + "loss": 0.1595, + "step": 82385 + }, + { + "epoch": 3.84, + "learning_rate": 7.247033373567633e-06, + "loss": 0.1389, + "step": 82390 + }, + { + "epoch": 3.84, + "learning_rate": 7.246249588512847e-06, + "loss": 0.2956, + "step": 82395 + }, + { + "epoch": 3.84, + "learning_rate": 7.24546580345806e-06, + "loss": 0.0601, + "step": 82400 + }, + { + "epoch": 3.85, + "learning_rate": 7.244682018403274e-06, + "loss": 0.0084, + "step": 82405 + }, + { + "epoch": 3.85, + "learning_rate": 7.243898233348487e-06, + "loss": 0.0508, + "step": 82410 + }, + { + "epoch": 3.85, + "learning_rate": 7.243114448293701e-06, + "loss": 0.0388, + "step": 82415 + }, + { + "epoch": 3.85, + "learning_rate": 7.242330663238913e-06, + "loss": 0.0569, + "step": 82420 + }, + { + "epoch": 3.85, + "learning_rate": 7.241546878184127e-06, + "loss": 0.0943, + "step": 82425 + }, + { + "epoch": 3.85, + "learning_rate": 7.24076309312934e-06, + "loss": 0.0948, + "step": 82430 + }, + { + "epoch": 3.85, + "learning_rate": 7.239979308074554e-06, + "loss": 0.1843, + "step": 82435 + }, + { + "epoch": 3.85, + "learning_rate": 7.239195523019767e-06, + "loss": 0.2833, + "step": 82440 + }, + { + "epoch": 3.85, + "learning_rate": 7.238411737964981e-06, + "loss": 0.1999, + "step": 82445 + }, + { + "epoch": 3.85, + "learning_rate": 7.237627952910194e-06, + "loss": 0.056, + "step": 82450 + }, + { + "epoch": 3.85, + "learning_rate": 7.236844167855408e-06, + "loss": 0.009, + "step": 82455 + }, + { + "epoch": 3.85, + "learning_rate": 7.236060382800621e-06, + "loss": 0.0356, + "step": 82460 + }, + { + "epoch": 3.85, + "learning_rate": 7.235276597745835e-06, + "loss": 0.0509, + "step": 82465 + }, + { + "epoch": 3.85, + "learning_rate": 7.234492812691048e-06, + "loss": 0.0332, + "step": 82470 + }, + { + "epoch": 3.85, + "learning_rate": 7.233709027636262e-06, + "loss": 0.0879, + "step": 82475 + }, + { + "epoch": 3.85, + "learning_rate": 7.232925242581476e-06, + "loss": 0.1112, + "step": 82480 + }, + { + "epoch": 3.85, + "learning_rate": 7.232141457526688e-06, + "loss": 0.1709, + "step": 82485 + }, + { + "epoch": 3.85, + "learning_rate": 7.231357672471901e-06, + "loss": 0.1559, + "step": 82490 + }, + { + "epoch": 3.85, + "learning_rate": 7.230573887417115e-06, + "loss": 0.2309, + "step": 82495 + }, + { + "epoch": 3.85, + "learning_rate": 7.229790102362328e-06, + "loss": 0.0397, + "step": 82500 + }, + { + "epoch": 3.85, + "learning_rate": 7.229006317307542e-06, + "loss": 0.0336, + "step": 82505 + }, + { + "epoch": 3.85, + "learning_rate": 7.228222532252755e-06, + "loss": 0.0623, + "step": 82510 + }, + { + "epoch": 3.85, + "learning_rate": 7.227438747197969e-06, + "loss": 0.0456, + "step": 82515 + }, + { + "epoch": 3.85, + "learning_rate": 7.226654962143182e-06, + "loss": 0.0266, + "step": 82520 + }, + { + "epoch": 3.85, + "learning_rate": 7.225871177088396e-06, + "loss": 0.067, + "step": 82525 + }, + { + "epoch": 3.85, + "learning_rate": 7.22508739203361e-06, + "loss": 0.1142, + "step": 82530 + }, + { + "epoch": 3.85, + "learning_rate": 7.224303606978823e-06, + "loss": 0.1408, + "step": 82535 + }, + { + "epoch": 3.85, + "learning_rate": 7.223519821924037e-06, + "loss": 0.1929, + "step": 82540 + }, + { + "epoch": 3.85, + "learning_rate": 7.22273603686925e-06, + "loss": 0.2066, + "step": 82545 + }, + { + "epoch": 3.85, + "learning_rate": 7.221952251814462e-06, + "loss": 0.0644, + "step": 82550 + }, + { + "epoch": 3.85, + "learning_rate": 7.221168466759676e-06, + "loss": 0.0071, + "step": 82555 + }, + { + "epoch": 3.85, + "learning_rate": 7.220384681704889e-06, + "loss": 0.0073, + "step": 82560 + }, + { + "epoch": 3.85, + "learning_rate": 7.219600896650103e-06, + "loss": 0.0488, + "step": 82565 + }, + { + "epoch": 3.85, + "learning_rate": 7.218817111595316e-06, + "loss": 0.03, + "step": 82570 + }, + { + "epoch": 3.85, + "learning_rate": 7.21803332654053e-06, + "loss": 0.0536, + "step": 82575 + }, + { + "epoch": 3.85, + "learning_rate": 7.217249541485744e-06, + "loss": 0.0703, + "step": 82580 + }, + { + "epoch": 3.85, + "learning_rate": 7.216465756430957e-06, + "loss": 0.1186, + "step": 82585 + }, + { + "epoch": 3.85, + "learning_rate": 7.215681971376171e-06, + "loss": 0.1826, + "step": 82590 + }, + { + "epoch": 3.85, + "learning_rate": 7.214898186321384e-06, + "loss": 0.2697, + "step": 82595 + }, + { + "epoch": 3.85, + "learning_rate": 7.214114401266598e-06, + "loss": 0.095, + "step": 82600 + }, + { + "epoch": 3.85, + "learning_rate": 7.213330616211811e-06, + "loss": 0.0544, + "step": 82605 + }, + { + "epoch": 3.85, + "learning_rate": 7.212546831157025e-06, + "loss": 0.0774, + "step": 82610 + }, + { + "epoch": 3.85, + "learning_rate": 7.211763046102237e-06, + "loss": 0.0261, + "step": 82615 + }, + { + "epoch": 3.86, + "learning_rate": 7.210979261047451e-06, + "loss": 0.0929, + "step": 82620 + }, + { + "epoch": 3.86, + "learning_rate": 7.210195475992664e-06, + "loss": 0.0937, + "step": 82625 + }, + { + "epoch": 3.86, + "learning_rate": 7.209411690937878e-06, + "loss": 0.1644, + "step": 82630 + }, + { + "epoch": 3.86, + "learning_rate": 7.208627905883091e-06, + "loss": 0.2539, + "step": 82635 + }, + { + "epoch": 3.86, + "learning_rate": 7.207844120828305e-06, + "loss": 0.2675, + "step": 82640 + }, + { + "epoch": 3.86, + "learning_rate": 7.207060335773518e-06, + "loss": 0.4391, + "step": 82645 + }, + { + "epoch": 3.86, + "learning_rate": 7.206276550718732e-06, + "loss": 0.1219, + "step": 82650 + }, + { + "epoch": 3.86, + "learning_rate": 7.205492765663945e-06, + "loss": 0.034, + "step": 82655 + }, + { + "epoch": 3.86, + "learning_rate": 7.204708980609159e-06, + "loss": 0.0426, + "step": 82660 + }, + { + "epoch": 3.86, + "learning_rate": 7.203925195554372e-06, + "loss": 0.0084, + "step": 82665 + }, + { + "epoch": 3.86, + "learning_rate": 7.203141410499586e-06, + "loss": 0.0198, + "step": 82670 + }, + { + "epoch": 3.86, + "learning_rate": 7.202357625444799e-06, + "loss": 0.1924, + "step": 82675 + }, + { + "epoch": 3.86, + "learning_rate": 7.201573840390012e-06, + "loss": 0.1191, + "step": 82680 + }, + { + "epoch": 3.86, + "learning_rate": 7.200790055335225e-06, + "loss": 0.1241, + "step": 82685 + }, + { + "epoch": 3.86, + "learning_rate": 7.200006270280439e-06, + "loss": 0.1177, + "step": 82690 + }, + { + "epoch": 3.86, + "learning_rate": 7.199222485225652e-06, + "loss": 0.1445, + "step": 82695 + }, + { + "epoch": 3.86, + "learning_rate": 7.198438700170866e-06, + "loss": 0.1037, + "step": 82700 + }, + { + "epoch": 3.86, + "learning_rate": 7.197654915116079e-06, + "loss": 0.0354, + "step": 82705 + }, + { + "epoch": 3.86, + "learning_rate": 7.196871130061293e-06, + "loss": 0.0342, + "step": 82710 + }, + { + "epoch": 3.86, + "learning_rate": 7.196087345006506e-06, + "loss": 0.0374, + "step": 82715 + }, + { + "epoch": 3.86, + "learning_rate": 7.19530355995172e-06, + "loss": 0.0835, + "step": 82720 + }, + { + "epoch": 3.86, + "learning_rate": 7.194519774896933e-06, + "loss": 0.065, + "step": 82725 + }, + { + "epoch": 3.86, + "learning_rate": 7.193735989842147e-06, + "loss": 0.1511, + "step": 82730 + }, + { + "epoch": 3.86, + "learning_rate": 7.19295220478736e-06, + "loss": 0.0832, + "step": 82735 + }, + { + "epoch": 3.86, + "learning_rate": 7.192168419732574e-06, + "loss": 0.3202, + "step": 82740 + }, + { + "epoch": 3.86, + "learning_rate": 7.191384634677786e-06, + "loss": 0.1847, + "step": 82745 + }, + { + "epoch": 3.86, + "learning_rate": 7.190600849623e-06, + "loss": 0.0422, + "step": 82750 + }, + { + "epoch": 3.86, + "learning_rate": 7.189817064568213e-06, + "loss": 0.0422, + "step": 82755 + }, + { + "epoch": 3.86, + "learning_rate": 7.189033279513427e-06, + "loss": 0.0557, + "step": 82760 + }, + { + "epoch": 3.86, + "learning_rate": 7.18824949445864e-06, + "loss": 0.0645, + "step": 82765 + }, + { + "epoch": 3.86, + "learning_rate": 7.187465709403854e-06, + "loss": 0.0985, + "step": 82770 + }, + { + "epoch": 3.86, + "learning_rate": 7.186681924349067e-06, + "loss": 0.1161, + "step": 82775 + }, + { + "epoch": 3.86, + "learning_rate": 7.185898139294281e-06, + "loss": 0.0945, + "step": 82780 + }, + { + "epoch": 3.86, + "learning_rate": 7.185114354239494e-06, + "loss": 0.1213, + "step": 82785 + }, + { + "epoch": 3.86, + "learning_rate": 7.184330569184708e-06, + "loss": 0.1853, + "step": 82790 + }, + { + "epoch": 3.86, + "learning_rate": 7.1835467841299215e-06, + "loss": 0.1965, + "step": 82795 + }, + { + "epoch": 3.86, + "learning_rate": 7.182762999075135e-06, + "loss": 0.0679, + "step": 82800 + }, + { + "epoch": 3.86, + "learning_rate": 7.1819792140203485e-06, + "loss": 0.0218, + "step": 82805 + }, + { + "epoch": 3.86, + "learning_rate": 7.181195428965561e-06, + "loss": 0.0262, + "step": 82810 + }, + { + "epoch": 3.86, + "learning_rate": 7.180411643910774e-06, + "loss": 0.0487, + "step": 82815 + }, + { + "epoch": 3.86, + "learning_rate": 7.179627858855988e-06, + "loss": 0.0454, + "step": 82820 + }, + { + "epoch": 3.86, + "learning_rate": 7.178844073801201e-06, + "loss": 0.064, + "step": 82825 + }, + { + "epoch": 3.86, + "learning_rate": 7.178060288746415e-06, + "loss": 0.0518, + "step": 82830 + }, + { + "epoch": 3.87, + "learning_rate": 7.177276503691628e-06, + "loss": 0.0888, + "step": 82835 + }, + { + "epoch": 3.87, + "learning_rate": 7.176492718636842e-06, + "loss": 0.2183, + "step": 82840 + }, + { + "epoch": 3.87, + "learning_rate": 7.1757089335820555e-06, + "loss": 0.3927, + "step": 82845 + }, + { + "epoch": 3.87, + "learning_rate": 7.1749251485272686e-06, + "loss": 0.0595, + "step": 82850 + }, + { + "epoch": 3.87, + "learning_rate": 7.1741413634724825e-06, + "loss": 0.0189, + "step": 82855 + }, + { + "epoch": 3.87, + "learning_rate": 7.1733575784176955e-06, + "loss": 0.0272, + "step": 82860 + }, + { + "epoch": 3.87, + "learning_rate": 7.1725737933629094e-06, + "loss": 0.0464, + "step": 82865 + }, + { + "epoch": 3.87, + "learning_rate": 7.1717900083081225e-06, + "loss": 0.0883, + "step": 82870 + }, + { + "epoch": 3.87, + "learning_rate": 7.171006223253335e-06, + "loss": 0.0915, + "step": 82875 + }, + { + "epoch": 3.87, + "learning_rate": 7.170222438198549e-06, + "loss": 0.0331, + "step": 82880 + }, + { + "epoch": 3.87, + "learning_rate": 7.1694386531437625e-06, + "loss": 0.111, + "step": 82885 + }, + { + "epoch": 3.87, + "learning_rate": 7.1686548680889756e-06, + "loss": 0.1855, + "step": 82890 + }, + { + "epoch": 3.87, + "learning_rate": 7.1678710830341895e-06, + "loss": 0.1827, + "step": 82895 + }, + { + "epoch": 3.87, + "learning_rate": 7.1670872979794025e-06, + "loss": 0.0682, + "step": 82900 + }, + { + "epoch": 3.87, + "learning_rate": 7.1663035129246164e-06, + "loss": 0.009, + "step": 82905 + }, + { + "epoch": 3.87, + "learning_rate": 7.1655197278698295e-06, + "loss": 0.0417, + "step": 82910 + }, + { + "epoch": 3.87, + "learning_rate": 7.164735942815043e-06, + "loss": 0.129, + "step": 82915 + }, + { + "epoch": 3.87, + "learning_rate": 7.1639521577602565e-06, + "loss": 0.0611, + "step": 82920 + }, + { + "epoch": 3.87, + "learning_rate": 7.16316837270547e-06, + "loss": 0.0629, + "step": 82925 + }, + { + "epoch": 3.87, + "learning_rate": 7.1623845876506834e-06, + "loss": 0.1248, + "step": 82930 + }, + { + "epoch": 3.87, + "learning_rate": 7.161600802595897e-06, + "loss": 0.1549, + "step": 82935 + }, + { + "epoch": 3.87, + "learning_rate": 7.1608170175411095e-06, + "loss": 0.2218, + "step": 82940 + }, + { + "epoch": 3.87, + "learning_rate": 7.1600332324863234e-06, + "loss": 0.433, + "step": 82945 + }, + { + "epoch": 3.87, + "learning_rate": 7.1592494474315365e-06, + "loss": 0.0611, + "step": 82950 + }, + { + "epoch": 3.87, + "learning_rate": 7.15846566237675e-06, + "loss": 0.0288, + "step": 82955 + }, + { + "epoch": 3.87, + "learning_rate": 7.1576818773219635e-06, + "loss": 0.048, + "step": 82960 + }, + { + "epoch": 3.87, + "learning_rate": 7.156898092267177e-06, + "loss": 0.0867, + "step": 82965 + }, + { + "epoch": 3.87, + "learning_rate": 7.1561143072123904e-06, + "loss": 0.0412, + "step": 82970 + }, + { + "epoch": 3.87, + "learning_rate": 7.155330522157604e-06, + "loss": 0.0969, + "step": 82975 + }, + { + "epoch": 3.87, + "learning_rate": 7.154546737102817e-06, + "loss": 0.1484, + "step": 82980 + }, + { + "epoch": 3.87, + "learning_rate": 7.153762952048031e-06, + "loss": 0.1506, + "step": 82985 + }, + { + "epoch": 3.87, + "learning_rate": 7.152979166993244e-06, + "loss": 0.2028, + "step": 82990 + }, + { + "epoch": 3.87, + "learning_rate": 7.152195381938458e-06, + "loss": 0.204, + "step": 82995 + }, + { + "epoch": 3.87, + "learning_rate": 7.151411596883671e-06, + "loss": 0.0698, + "step": 83000 + }, + { + "epoch": 3.87, + "learning_rate": 7.150627811828884e-06, + "loss": 0.0254, + "step": 83005 + }, + { + "epoch": 3.87, + "learning_rate": 7.1498440267740974e-06, + "loss": 0.0321, + "step": 83010 + }, + { + "epoch": 3.87, + "learning_rate": 7.149060241719311e-06, + "loss": 0.0639, + "step": 83015 + }, + { + "epoch": 3.87, + "learning_rate": 7.148276456664524e-06, + "loss": 0.0328, + "step": 83020 + }, + { + "epoch": 3.87, + "learning_rate": 7.147492671609738e-06, + "loss": 0.0518, + "step": 83025 + }, + { + "epoch": 3.87, + "learning_rate": 7.146708886554951e-06, + "loss": 0.108, + "step": 83030 + }, + { + "epoch": 3.87, + "learning_rate": 7.145925101500165e-06, + "loss": 0.1571, + "step": 83035 + }, + { + "epoch": 3.87, + "learning_rate": 7.145141316445378e-06, + "loss": 0.1334, + "step": 83040 + }, + { + "epoch": 3.87, + "learning_rate": 7.144357531390592e-06, + "loss": 0.3672, + "step": 83045 + }, + { + "epoch": 3.88, + "learning_rate": 7.143573746335805e-06, + "loss": 0.0319, + "step": 83050 + }, + { + "epoch": 3.88, + "learning_rate": 7.142789961281019e-06, + "loss": 0.0214, + "step": 83055 + }, + { + "epoch": 3.88, + "learning_rate": 7.142006176226233e-06, + "loss": 0.0112, + "step": 83060 + }, + { + "epoch": 3.88, + "learning_rate": 7.141222391171446e-06, + "loss": 0.0584, + "step": 83065 + }, + { + "epoch": 3.88, + "learning_rate": 7.140438606116658e-06, + "loss": 0.0678, + "step": 83070 + }, + { + "epoch": 3.88, + "learning_rate": 7.139654821061872e-06, + "loss": 0.0804, + "step": 83075 + }, + { + "epoch": 3.88, + "learning_rate": 7.138871036007085e-06, + "loss": 0.0552, + "step": 83080 + }, + { + "epoch": 3.88, + "learning_rate": 7.138087250952299e-06, + "loss": 0.0976, + "step": 83085 + }, + { + "epoch": 3.88, + "learning_rate": 7.137303465897512e-06, + "loss": 0.2082, + "step": 83090 + }, + { + "epoch": 3.88, + "learning_rate": 7.136519680842726e-06, + "loss": 0.2262, + "step": 83095 + }, + { + "epoch": 3.88, + "learning_rate": 7.135735895787939e-06, + "loss": 0.0472, + "step": 83100 + }, + { + "epoch": 3.88, + "learning_rate": 7.134952110733153e-06, + "loss": 0.0212, + "step": 83105 + }, + { + "epoch": 3.88, + "learning_rate": 7.134168325678367e-06, + "loss": 0.0167, + "step": 83110 + }, + { + "epoch": 3.88, + "learning_rate": 7.13338454062358e-06, + "loss": 0.0431, + "step": 83115 + }, + { + "epoch": 3.88, + "learning_rate": 7.132600755568794e-06, + "loss": 0.0403, + "step": 83120 + }, + { + "epoch": 3.88, + "learning_rate": 7.131816970514007e-06, + "loss": 0.0445, + "step": 83125 + }, + { + "epoch": 3.88, + "learning_rate": 7.131033185459221e-06, + "loss": 0.0775, + "step": 83130 + }, + { + "epoch": 3.88, + "learning_rate": 7.130249400404433e-06, + "loss": 0.1693, + "step": 83135 + }, + { + "epoch": 3.88, + "learning_rate": 7.129465615349646e-06, + "loss": 0.1404, + "step": 83140 + }, + { + "epoch": 3.88, + "learning_rate": 7.12868183029486e-06, + "loss": 0.1983, + "step": 83145 + }, + { + "epoch": 3.88, + "learning_rate": 7.127898045240073e-06, + "loss": 0.0576, + "step": 83150 + }, + { + "epoch": 3.88, + "learning_rate": 7.127114260185287e-06, + "loss": 0.0225, + "step": 83155 + }, + { + "epoch": 3.88, + "learning_rate": 7.126330475130501e-06, + "loss": 0.042, + "step": 83160 + }, + { + "epoch": 3.88, + "learning_rate": 7.125546690075714e-06, + "loss": 0.047, + "step": 83165 + }, + { + "epoch": 3.88, + "learning_rate": 7.124762905020928e-06, + "loss": 0.0822, + "step": 83170 + }, + { + "epoch": 3.88, + "learning_rate": 7.123979119966141e-06, + "loss": 0.0317, + "step": 83175 + }, + { + "epoch": 3.88, + "learning_rate": 7.123195334911355e-06, + "loss": 0.178, + "step": 83180 + }, + { + "epoch": 3.88, + "learning_rate": 7.122411549856568e-06, + "loss": 0.0838, + "step": 83185 + }, + { + "epoch": 3.88, + "learning_rate": 7.121627764801782e-06, + "loss": 0.1366, + "step": 83190 + }, + { + "epoch": 3.88, + "learning_rate": 7.120843979746995e-06, + "loss": 0.22, + "step": 83195 + }, + { + "epoch": 3.88, + "learning_rate": 7.120060194692208e-06, + "loss": 0.0103, + "step": 83200 + }, + { + "epoch": 3.88, + "learning_rate": 7.119276409637421e-06, + "loss": 0.0207, + "step": 83205 + }, + { + "epoch": 3.88, + "learning_rate": 7.118492624582635e-06, + "loss": 0.0341, + "step": 83210 + }, + { + "epoch": 3.88, + "learning_rate": 7.117708839527848e-06, + "loss": 0.0363, + "step": 83215 + }, + { + "epoch": 3.88, + "learning_rate": 7.116925054473062e-06, + "loss": 0.0304, + "step": 83220 + }, + { + "epoch": 3.88, + "learning_rate": 7.116141269418275e-06, + "loss": 0.1067, + "step": 83225 + }, + { + "epoch": 3.88, + "learning_rate": 7.115357484363489e-06, + "loss": 0.0815, + "step": 83230 + }, + { + "epoch": 3.88, + "learning_rate": 7.114573699308702e-06, + "loss": 0.1673, + "step": 83235 + }, + { + "epoch": 3.88, + "learning_rate": 7.113789914253916e-06, + "loss": 0.2259, + "step": 83240 + }, + { + "epoch": 3.88, + "learning_rate": 7.113006129199129e-06, + "loss": 0.2584, + "step": 83245 + }, + { + "epoch": 3.88, + "learning_rate": 7.112222344144343e-06, + "loss": 0.1109, + "step": 83250 + }, + { + "epoch": 3.88, + "learning_rate": 7.111438559089556e-06, + "loss": 0.1151, + "step": 83255 + }, + { + "epoch": 3.89, + "learning_rate": 7.11065477403477e-06, + "loss": 0.0398, + "step": 83260 + }, + { + "epoch": 3.89, + "learning_rate": 7.109870988979982e-06, + "loss": 0.0395, + "step": 83265 + }, + { + "epoch": 3.89, + "learning_rate": 7.109087203925196e-06, + "loss": 0.0706, + "step": 83270 + }, + { + "epoch": 3.89, + "learning_rate": 7.108303418870409e-06, + "loss": 0.0666, + "step": 83275 + }, + { + "epoch": 3.89, + "learning_rate": 7.107519633815623e-06, + "loss": 0.0801, + "step": 83280 + }, + { + "epoch": 3.89, + "learning_rate": 7.106735848760836e-06, + "loss": 0.119, + "step": 83285 + }, + { + "epoch": 3.89, + "learning_rate": 7.10595206370605e-06, + "loss": 0.2241, + "step": 83290 + }, + { + "epoch": 3.89, + "learning_rate": 7.105168278651263e-06, + "loss": 0.1682, + "step": 83295 + }, + { + "epoch": 3.89, + "learning_rate": 7.104384493596477e-06, + "loss": 0.0431, + "step": 83300 + }, + { + "epoch": 3.89, + "learning_rate": 7.10360070854169e-06, + "loss": 0.011, + "step": 83305 + }, + { + "epoch": 3.89, + "learning_rate": 7.102816923486904e-06, + "loss": 0.0472, + "step": 83310 + }, + { + "epoch": 3.89, + "learning_rate": 7.102033138432117e-06, + "loss": 0.0674, + "step": 83315 + }, + { + "epoch": 3.89, + "learning_rate": 7.101249353377331e-06, + "loss": 0.0227, + "step": 83320 + }, + { + "epoch": 3.89, + "learning_rate": 7.100465568322545e-06, + "loss": 0.0615, + "step": 83325 + }, + { + "epoch": 3.89, + "learning_rate": 7.099838540278714e-06, + "loss": 0.1183, + "step": 83330 + }, + { + "epoch": 3.89, + "learning_rate": 7.099054755223927e-06, + "loss": 0.0832, + "step": 83335 + }, + { + "epoch": 3.89, + "learning_rate": 7.098270970169141e-06, + "loss": 0.1191, + "step": 83340 + }, + { + "epoch": 3.89, + "learning_rate": 7.097487185114354e-06, + "loss": 0.3515, + "step": 83345 + }, + { + "epoch": 3.89, + "learning_rate": 7.096703400059568e-06, + "loss": 0.106, + "step": 83350 + }, + { + "epoch": 3.89, + "learning_rate": 7.095919615004781e-06, + "loss": 0.0136, + "step": 83355 + }, + { + "epoch": 3.89, + "learning_rate": 7.095135829949995e-06, + "loss": 0.0589, + "step": 83360 + }, + { + "epoch": 3.89, + "learning_rate": 7.094352044895208e-06, + "loss": 0.0601, + "step": 83365 + }, + { + "epoch": 3.89, + "learning_rate": 7.093568259840422e-06, + "loss": 0.0845, + "step": 83370 + }, + { + "epoch": 3.89, + "learning_rate": 7.092784474785635e-06, + "loss": 0.1721, + "step": 83375 + }, + { + "epoch": 3.89, + "learning_rate": 7.092000689730849e-06, + "loss": 0.0775, + "step": 83380 + }, + { + "epoch": 3.89, + "learning_rate": 7.091216904676062e-06, + "loss": 0.1601, + "step": 83385 + }, + { + "epoch": 3.89, + "learning_rate": 7.090433119621276e-06, + "loss": 0.1606, + "step": 83390 + }, + { + "epoch": 3.89, + "learning_rate": 7.089649334566488e-06, + "loss": 0.2723, + "step": 83395 + }, + { + "epoch": 3.89, + "learning_rate": 7.088865549511702e-06, + "loss": 0.0425, + "step": 83400 + }, + { + "epoch": 3.89, + "learning_rate": 7.088081764456915e-06, + "loss": 0.0291, + "step": 83405 + }, + { + "epoch": 3.89, + "learning_rate": 7.087297979402129e-06, + "loss": 0.038, + "step": 83410 + }, + { + "epoch": 3.89, + "learning_rate": 7.086514194347342e-06, + "loss": 0.0745, + "step": 83415 + }, + { + "epoch": 3.89, + "learning_rate": 7.085730409292556e-06, + "loss": 0.0439, + "step": 83420 + }, + { + "epoch": 3.89, + "learning_rate": 7.084946624237769e-06, + "loss": 0.0769, + "step": 83425 + }, + { + "epoch": 3.89, + "learning_rate": 7.084162839182983e-06, + "loss": 0.0507, + "step": 83430 + }, + { + "epoch": 3.89, + "learning_rate": 7.083379054128197e-06, + "loss": 0.1185, + "step": 83435 + }, + { + "epoch": 3.89, + "learning_rate": 7.08259526907341e-06, + "loss": 0.2094, + "step": 83440 + }, + { + "epoch": 3.89, + "learning_rate": 7.081811484018624e-06, + "loss": 0.3072, + "step": 83445 + }, + { + "epoch": 3.89, + "learning_rate": 7.081027698963837e-06, + "loss": 0.044, + "step": 83450 + }, + { + "epoch": 3.89, + "learning_rate": 7.080243913909051e-06, + "loss": 0.0651, + "step": 83455 + }, + { + "epoch": 3.89, + "learning_rate": 7.079460128854263e-06, + "loss": 0.0351, + "step": 83460 + }, + { + "epoch": 3.89, + "learning_rate": 7.078676343799476e-06, + "loss": 0.023, + "step": 83465 + }, + { + "epoch": 3.89, + "learning_rate": 7.07789255874469e-06, + "loss": 0.1239, + "step": 83470 + }, + { + "epoch": 3.9, + "learning_rate": 7.077108773689903e-06, + "loss": 0.1016, + "step": 83475 + }, + { + "epoch": 3.9, + "learning_rate": 7.076324988635117e-06, + "loss": 0.093, + "step": 83480 + }, + { + "epoch": 3.9, + "learning_rate": 7.075541203580331e-06, + "loss": 0.1165, + "step": 83485 + }, + { + "epoch": 3.9, + "learning_rate": 7.074757418525544e-06, + "loss": 0.165, + "step": 83490 + }, + { + "epoch": 3.9, + "learning_rate": 7.073973633470758e-06, + "loss": 0.2434, + "step": 83495 + }, + { + "epoch": 3.9, + "learning_rate": 7.073189848415971e-06, + "loss": 0.0324, + "step": 83500 + }, + { + "epoch": 3.9, + "learning_rate": 7.072406063361185e-06, + "loss": 0.0379, + "step": 83505 + }, + { + "epoch": 3.9, + "learning_rate": 7.071622278306398e-06, + "loss": 0.0729, + "step": 83510 + }, + { + "epoch": 3.9, + "learning_rate": 7.070838493251612e-06, + "loss": 0.0829, + "step": 83515 + }, + { + "epoch": 3.9, + "learning_rate": 7.070054708196825e-06, + "loss": 0.046, + "step": 83520 + }, + { + "epoch": 3.9, + "learning_rate": 7.069270923142037e-06, + "loss": 0.0305, + "step": 83525 + }, + { + "epoch": 3.9, + "learning_rate": 7.068487138087251e-06, + "loss": 0.0839, + "step": 83530 + }, + { + "epoch": 3.9, + "learning_rate": 7.067703353032465e-06, + "loss": 0.1183, + "step": 83535 + }, + { + "epoch": 3.9, + "learning_rate": 7.066919567977678e-06, + "loss": 0.1723, + "step": 83540 + }, + { + "epoch": 3.9, + "learning_rate": 7.066135782922892e-06, + "loss": 0.1977, + "step": 83545 + }, + { + "epoch": 3.9, + "learning_rate": 7.065351997868105e-06, + "loss": 0.0894, + "step": 83550 + }, + { + "epoch": 3.9, + "learning_rate": 7.064568212813319e-06, + "loss": 0.0183, + "step": 83555 + }, + { + "epoch": 3.9, + "learning_rate": 7.063784427758532e-06, + "loss": 0.0516, + "step": 83560 + }, + { + "epoch": 3.9, + "learning_rate": 7.063000642703746e-06, + "loss": 0.0505, + "step": 83565 + }, + { + "epoch": 3.9, + "learning_rate": 7.062216857648959e-06, + "loss": 0.0342, + "step": 83570 + }, + { + "epoch": 3.9, + "learning_rate": 7.061433072594173e-06, + "loss": 0.0858, + "step": 83575 + }, + { + "epoch": 3.9, + "learning_rate": 7.060649287539386e-06, + "loss": 0.0855, + "step": 83580 + }, + { + "epoch": 3.9, + "learning_rate": 7.0598655024846e-06, + "loss": 0.1751, + "step": 83585 + }, + { + "epoch": 3.9, + "learning_rate": 7.059081717429812e-06, + "loss": 0.1959, + "step": 83590 + }, + { + "epoch": 3.9, + "learning_rate": 7.058297932375026e-06, + "loss": 0.2493, + "step": 83595 + }, + { + "epoch": 3.9, + "learning_rate": 7.057514147320239e-06, + "loss": 0.0743, + "step": 83600 + }, + { + "epoch": 3.9, + "learning_rate": 7.056730362265453e-06, + "loss": 0.0131, + "step": 83605 + }, + { + "epoch": 3.9, + "learning_rate": 7.055946577210666e-06, + "loss": 0.029, + "step": 83610 + }, + { + "epoch": 3.9, + "learning_rate": 7.05516279215588e-06, + "loss": 0.0278, + "step": 83615 + }, + { + "epoch": 3.9, + "learning_rate": 7.054379007101093e-06, + "loss": 0.0613, + "step": 83620 + }, + { + "epoch": 3.9, + "learning_rate": 7.053595222046307e-06, + "loss": 0.0682, + "step": 83625 + }, + { + "epoch": 3.9, + "learning_rate": 7.05281143699152e-06, + "loss": 0.0363, + "step": 83630 + }, + { + "epoch": 3.9, + "learning_rate": 7.052027651936734e-06, + "loss": 0.1171, + "step": 83635 + }, + { + "epoch": 3.9, + "learning_rate": 7.051243866881947e-06, + "loss": 0.1654, + "step": 83640 + }, + { + "epoch": 3.9, + "learning_rate": 7.050460081827161e-06, + "loss": 0.2631, + "step": 83645 + }, + { + "epoch": 3.9, + "learning_rate": 7.049676296772374e-06, + "loss": 0.0565, + "step": 83650 + }, + { + "epoch": 3.9, + "learning_rate": 7.048892511717587e-06, + "loss": 0.0405, + "step": 83655 + }, + { + "epoch": 3.9, + "learning_rate": 7.0481087266628e-06, + "loss": 0.0411, + "step": 83660 + }, + { + "epoch": 3.9, + "learning_rate": 7.047324941608014e-06, + "loss": 0.0245, + "step": 83665 + }, + { + "epoch": 3.9, + "learning_rate": 7.046541156553227e-06, + "loss": 0.075, + "step": 83670 + }, + { + "epoch": 3.9, + "learning_rate": 7.045757371498441e-06, + "loss": 0.1692, + "step": 83675 + }, + { + "epoch": 3.9, + "learning_rate": 7.044973586443654e-06, + "loss": 0.1396, + "step": 83680 + }, + { + "epoch": 3.9, + "learning_rate": 7.044189801388868e-06, + "loss": 0.136, + "step": 83685 + }, + { + "epoch": 3.91, + "learning_rate": 7.043406016334081e-06, + "loss": 0.2351, + "step": 83690 + }, + { + "epoch": 3.91, + "learning_rate": 7.042622231279295e-06, + "loss": 0.3361, + "step": 83695 + }, + { + "epoch": 3.91, + "learning_rate": 7.041838446224508e-06, + "loss": 0.1006, + "step": 83700 + }, + { + "epoch": 3.91, + "learning_rate": 7.041054661169722e-06, + "loss": 0.03, + "step": 83705 + }, + { + "epoch": 3.91, + "learning_rate": 7.0402708761149356e-06, + "loss": 0.0043, + "step": 83710 + }, + { + "epoch": 3.91, + "learning_rate": 7.039487091060149e-06, + "loss": 0.0544, + "step": 83715 + }, + { + "epoch": 3.91, + "learning_rate": 7.038703306005361e-06, + "loss": 0.0858, + "step": 83720 + }, + { + "epoch": 3.91, + "learning_rate": 7.037919520950575e-06, + "loss": 0.1035, + "step": 83725 + }, + { + "epoch": 3.91, + "learning_rate": 7.037135735895788e-06, + "loss": 0.1786, + "step": 83730 + }, + { + "epoch": 3.91, + "learning_rate": 7.036351950841002e-06, + "loss": 0.1902, + "step": 83735 + }, + { + "epoch": 3.91, + "learning_rate": 7.035568165786215e-06, + "loss": 0.1655, + "step": 83740 + }, + { + "epoch": 3.91, + "learning_rate": 7.034784380731429e-06, + "loss": 0.3697, + "step": 83745 + }, + { + "epoch": 3.91, + "learning_rate": 7.034000595676643e-06, + "loss": 0.0693, + "step": 83750 + }, + { + "epoch": 3.91, + "learning_rate": 7.033216810621856e-06, + "loss": 0.0175, + "step": 83755 + }, + { + "epoch": 3.91, + "learning_rate": 7.0324330255670695e-06, + "loss": 0.0227, + "step": 83760 + }, + { + "epoch": 3.91, + "learning_rate": 7.031649240512283e-06, + "loss": 0.0299, + "step": 83765 + }, + { + "epoch": 3.91, + "learning_rate": 7.0308654554574965e-06, + "loss": 0.0819, + "step": 83770 + }, + { + "epoch": 3.91, + "learning_rate": 7.0300816704027096e-06, + "loss": 0.0887, + "step": 83775 + }, + { + "epoch": 3.91, + "learning_rate": 7.0292978853479235e-06, + "loss": 0.0337, + "step": 83780 + }, + { + "epoch": 3.91, + "learning_rate": 7.028514100293136e-06, + "loss": 0.1754, + "step": 83785 + }, + { + "epoch": 3.91, + "learning_rate": 7.027730315238349e-06, + "loss": 0.1529, + "step": 83790 + }, + { + "epoch": 3.91, + "learning_rate": 7.026946530183563e-06, + "loss": 0.2586, + "step": 83795 + }, + { + "epoch": 3.91, + "learning_rate": 7.0261627451287766e-06, + "loss": 0.0753, + "step": 83800 + }, + { + "epoch": 3.91, + "learning_rate": 7.02537896007399e-06, + "loss": 0.0732, + "step": 83805 + }, + { + "epoch": 3.91, + "learning_rate": 7.0245951750192035e-06, + "loss": 0.0517, + "step": 83810 + }, + { + "epoch": 3.91, + "learning_rate": 7.023811389964417e-06, + "loss": 0.0656, + "step": 83815 + }, + { + "epoch": 3.91, + "learning_rate": 7.0230276049096305e-06, + "loss": 0.0995, + "step": 83820 + }, + { + "epoch": 3.91, + "learning_rate": 7.0222438198548435e-06, + "loss": 0.0459, + "step": 83825 + }, + { + "epoch": 3.91, + "learning_rate": 7.0214600348000575e-06, + "loss": 0.0869, + "step": 83830 + }, + { + "epoch": 3.91, + "learning_rate": 7.0206762497452705e-06, + "loss": 0.0683, + "step": 83835 + }, + { + "epoch": 3.91, + "learning_rate": 7.019892464690484e-06, + "loss": 0.1981, + "step": 83840 + }, + { + "epoch": 3.91, + "learning_rate": 7.0191086796356975e-06, + "loss": 0.3106, + "step": 83845 + }, + { + "epoch": 3.91, + "learning_rate": 7.0183248945809105e-06, + "loss": 0.0214, + "step": 83850 + }, + { + "epoch": 3.91, + "learning_rate": 7.017541109526124e-06, + "loss": 0.0107, + "step": 83855 + }, + { + "epoch": 3.91, + "learning_rate": 7.0167573244713375e-06, + "loss": 0.0483, + "step": 83860 + }, + { + "epoch": 3.91, + "learning_rate": 7.0159735394165506e-06, + "loss": 0.0288, + "step": 83865 + }, + { + "epoch": 3.91, + "learning_rate": 7.0151897543617645e-06, + "loss": 0.0898, + "step": 83870 + }, + { + "epoch": 3.91, + "learning_rate": 7.0144059693069775e-06, + "loss": 0.1365, + "step": 83875 + }, + { + "epoch": 3.91, + "learning_rate": 7.0136221842521914e-06, + "loss": 0.0615, + "step": 83880 + }, + { + "epoch": 3.91, + "learning_rate": 7.0128383991974045e-06, + "loss": 0.1307, + "step": 83885 + }, + { + "epoch": 3.91, + "learning_rate": 7.012054614142618e-06, + "loss": 0.1897, + "step": 83890 + }, + { + "epoch": 3.91, + "learning_rate": 7.0112708290878315e-06, + "loss": 0.2455, + "step": 83895 + }, + { + "epoch": 3.91, + "learning_rate": 7.010487044033045e-06, + "loss": 0.0779, + "step": 83900 + }, + { + "epoch": 3.92, + "learning_rate": 7.009703258978258e-06, + "loss": 0.0304, + "step": 83905 + }, + { + "epoch": 3.92, + "learning_rate": 7.008919473923472e-06, + "loss": 0.0261, + "step": 83910 + }, + { + "epoch": 3.92, + "learning_rate": 7.0081356888686845e-06, + "loss": 0.0529, + "step": 83915 + }, + { + "epoch": 3.92, + "learning_rate": 7.0073519038138984e-06, + "loss": 0.0457, + "step": 83920 + }, + { + "epoch": 3.92, + "learning_rate": 7.0065681187591115e-06, + "loss": 0.1164, + "step": 83925 + }, + { + "epoch": 3.92, + "learning_rate": 7.005784333704325e-06, + "loss": 0.0913, + "step": 83930 + }, + { + "epoch": 3.92, + "learning_rate": 7.0050005486495385e-06, + "loss": 0.1972, + "step": 83935 + }, + { + "epoch": 3.92, + "learning_rate": 7.004216763594752e-06, + "loss": 0.137, + "step": 83940 + }, + { + "epoch": 3.92, + "learning_rate": 7.0034329785399654e-06, + "loss": 0.2245, + "step": 83945 + }, + { + "epoch": 3.92, + "learning_rate": 7.002649193485179e-06, + "loss": 0.0774, + "step": 83950 + }, + { + "epoch": 3.92, + "learning_rate": 7.001865408430392e-06, + "loss": 0.0612, + "step": 83955 + }, + { + "epoch": 3.92, + "learning_rate": 7.001081623375606e-06, + "loss": 0.0355, + "step": 83960 + }, + { + "epoch": 3.92, + "learning_rate": 7.000297838320819e-06, + "loss": 0.0663, + "step": 83965 + }, + { + "epoch": 3.92, + "learning_rate": 6.999514053266033e-06, + "loss": 0.0779, + "step": 83970 + }, + { + "epoch": 3.92, + "learning_rate": 6.998730268211247e-06, + "loss": 0.1159, + "step": 83975 + }, + { + "epoch": 3.92, + "learning_rate": 6.997946483156459e-06, + "loss": 0.1134, + "step": 83980 + }, + { + "epoch": 3.92, + "learning_rate": 6.9971626981016724e-06, + "loss": 0.0894, + "step": 83985 + }, + { + "epoch": 3.92, + "learning_rate": 6.996378913046886e-06, + "loss": 0.1549, + "step": 83990 + }, + { + "epoch": 3.92, + "learning_rate": 6.995595127992099e-06, + "loss": 0.266, + "step": 83995 + }, + { + "epoch": 3.92, + "learning_rate": 6.994811342937313e-06, + "loss": 0.064, + "step": 84000 + }, + { + "epoch": 3.92, + "learning_rate": 6.994027557882526e-06, + "loss": 0.0812, + "step": 84005 + }, + { + "epoch": 3.92, + "learning_rate": 6.99324377282774e-06, + "loss": 0.0724, + "step": 84010 + }, + { + "epoch": 3.92, + "learning_rate": 6.992459987772954e-06, + "loss": 0.056, + "step": 84015 + }, + { + "epoch": 3.92, + "learning_rate": 6.991676202718167e-06, + "loss": 0.0807, + "step": 84020 + }, + { + "epoch": 3.92, + "learning_rate": 6.990892417663381e-06, + "loss": 0.0549, + "step": 84025 + }, + { + "epoch": 3.92, + "learning_rate": 6.990108632608594e-06, + "loss": 0.0929, + "step": 84030 + }, + { + "epoch": 3.92, + "learning_rate": 6.989324847553808e-06, + "loss": 0.1392, + "step": 84035 + }, + { + "epoch": 3.92, + "learning_rate": 6.988541062499021e-06, + "loss": 0.2502, + "step": 84040 + }, + { + "epoch": 3.92, + "learning_rate": 6.987757277444233e-06, + "loss": 0.3448, + "step": 84045 + }, + { + "epoch": 3.92, + "learning_rate": 6.986973492389447e-06, + "loss": 0.0538, + "step": 84050 + }, + { + "epoch": 3.92, + "learning_rate": 6.98618970733466e-06, + "loss": 0.0152, + "step": 84055 + }, + { + "epoch": 3.92, + "learning_rate": 6.985405922279874e-06, + "loss": 0.074, + "step": 84060 + }, + { + "epoch": 3.92, + "learning_rate": 6.984622137225088e-06, + "loss": 0.0497, + "step": 84065 + }, + { + "epoch": 3.92, + "learning_rate": 6.983838352170301e-06, + "loss": 0.0891, + "step": 84070 + }, + { + "epoch": 3.92, + "learning_rate": 6.983054567115515e-06, + "loss": 0.0899, + "step": 84075 + }, + { + "epoch": 3.92, + "learning_rate": 6.982270782060728e-06, + "loss": 0.0808, + "step": 84080 + }, + { + "epoch": 3.92, + "learning_rate": 6.981486997005942e-06, + "loss": 0.1104, + "step": 84085 + }, + { + "epoch": 3.92, + "learning_rate": 6.980703211951155e-06, + "loss": 0.2043, + "step": 84090 + }, + { + "epoch": 3.92, + "learning_rate": 6.979919426896369e-06, + "loss": 0.2488, + "step": 84095 + }, + { + "epoch": 3.92, + "learning_rate": 6.979135641841582e-06, + "loss": 0.0789, + "step": 84100 + }, + { + "epoch": 3.92, + "learning_rate": 6.978351856786796e-06, + "loss": 0.0489, + "step": 84105 + }, + { + "epoch": 3.92, + "learning_rate": 6.977568071732008e-06, + "loss": 0.107, + "step": 84110 + }, + { + "epoch": 3.92, + "learning_rate": 6.976784286677222e-06, + "loss": 0.0696, + "step": 84115 + }, + { + "epoch": 3.93, + "learning_rate": 6.976000501622435e-06, + "loss": 0.1148, + "step": 84120 + }, + { + "epoch": 3.93, + "learning_rate": 6.975216716567649e-06, + "loss": 0.1428, + "step": 84125 + }, + { + "epoch": 3.93, + "learning_rate": 6.974432931512862e-06, + "loss": 0.12, + "step": 84130 + }, + { + "epoch": 3.93, + "learning_rate": 6.973649146458076e-06, + "loss": 0.2055, + "step": 84135 + }, + { + "epoch": 3.93, + "learning_rate": 6.972865361403289e-06, + "loss": 0.2817, + "step": 84140 + }, + { + "epoch": 3.93, + "learning_rate": 6.972081576348503e-06, + "loss": 0.2038, + "step": 84145 + }, + { + "epoch": 3.93, + "learning_rate": 6.971297791293716e-06, + "loss": 0.1615, + "step": 84150 + }, + { + "epoch": 3.93, + "learning_rate": 6.97051400623893e-06, + "loss": 0.0239, + "step": 84155 + }, + { + "epoch": 3.93, + "learning_rate": 6.969730221184143e-06, + "loss": 0.0394, + "step": 84160 + }, + { + "epoch": 3.93, + "learning_rate": 6.968946436129357e-06, + "loss": 0.0815, + "step": 84165 + }, + { + "epoch": 3.93, + "learning_rate": 6.96816265107457e-06, + "loss": 0.0547, + "step": 84170 + }, + { + "epoch": 3.93, + "learning_rate": 6.967378866019783e-06, + "loss": 0.0423, + "step": 84175 + }, + { + "epoch": 3.93, + "learning_rate": 6.966595080964996e-06, + "loss": 0.1092, + "step": 84180 + }, + { + "epoch": 3.93, + "learning_rate": 6.96581129591021e-06, + "loss": 0.0865, + "step": 84185 + }, + { + "epoch": 3.93, + "learning_rate": 6.965027510855423e-06, + "loss": 0.0807, + "step": 84190 + }, + { + "epoch": 3.93, + "learning_rate": 6.964243725800637e-06, + "loss": 0.2152, + "step": 84195 + }, + { + "epoch": 3.93, + "learning_rate": 6.96345994074585e-06, + "loss": 0.0474, + "step": 84200 + }, + { + "epoch": 3.93, + "learning_rate": 6.962676155691064e-06, + "loss": 0.0093, + "step": 84205 + }, + { + "epoch": 3.93, + "learning_rate": 6.961892370636277e-06, + "loss": 0.0628, + "step": 84210 + }, + { + "epoch": 3.93, + "learning_rate": 6.961108585581491e-06, + "loss": 0.0878, + "step": 84215 + }, + { + "epoch": 3.93, + "learning_rate": 6.960324800526704e-06, + "loss": 0.0281, + "step": 84220 + }, + { + "epoch": 3.93, + "learning_rate": 6.959541015471918e-06, + "loss": 0.1112, + "step": 84225 + }, + { + "epoch": 3.93, + "learning_rate": 6.958757230417131e-06, + "loss": 0.1499, + "step": 84230 + }, + { + "epoch": 3.93, + "learning_rate": 6.957973445362345e-06, + "loss": 0.0709, + "step": 84235 + }, + { + "epoch": 3.93, + "learning_rate": 6.957189660307559e-06, + "loss": 0.0987, + "step": 84240 + }, + { + "epoch": 3.93, + "learning_rate": 6.956405875252771e-06, + "loss": 0.3597, + "step": 84245 + }, + { + "epoch": 3.93, + "learning_rate": 6.955622090197984e-06, + "loss": 0.0683, + "step": 84250 + }, + { + "epoch": 3.93, + "learning_rate": 6.954838305143198e-06, + "loss": 0.043, + "step": 84255 + }, + { + "epoch": 3.93, + "learning_rate": 6.954054520088411e-06, + "loss": 0.0472, + "step": 84260 + }, + { + "epoch": 3.93, + "learning_rate": 6.953270735033625e-06, + "loss": 0.0598, + "step": 84265 + }, + { + "epoch": 3.93, + "learning_rate": 6.952486949978838e-06, + "loss": 0.0462, + "step": 84270 + }, + { + "epoch": 3.93, + "learning_rate": 6.951703164924052e-06, + "loss": 0.0809, + "step": 84275 + }, + { + "epoch": 3.93, + "learning_rate": 6.950919379869265e-06, + "loss": 0.0949, + "step": 84280 + }, + { + "epoch": 3.93, + "learning_rate": 6.950135594814479e-06, + "loss": 0.2274, + "step": 84285 + }, + { + "epoch": 3.93, + "learning_rate": 6.949351809759693e-06, + "loss": 0.1614, + "step": 84290 + }, + { + "epoch": 3.93, + "learning_rate": 6.948568024704906e-06, + "loss": 0.1468, + "step": 84295 + }, + { + "epoch": 3.93, + "learning_rate": 6.94778423965012e-06, + "loss": 0.0695, + "step": 84300 + }, + { + "epoch": 3.93, + "learning_rate": 6.947000454595333e-06, + "loss": 0.04, + "step": 84305 + }, + { + "epoch": 3.93, + "learning_rate": 6.946216669540545e-06, + "loss": 0.0326, + "step": 84310 + }, + { + "epoch": 3.93, + "learning_rate": 6.945432884485759e-06, + "loss": 0.4171, + "step": 84315 + }, + { + "epoch": 3.93, + "learning_rate": 6.944649099430972e-06, + "loss": 0.0765, + "step": 84320 + }, + { + "epoch": 3.93, + "learning_rate": 6.943865314376186e-06, + "loss": 0.0823, + "step": 84325 + }, + { + "epoch": 3.93, + "learning_rate": 6.9430815293214e-06, + "loss": 0.0574, + "step": 84330 + }, + { + "epoch": 3.94, + "learning_rate": 6.942297744266613e-06, + "loss": 0.1587, + "step": 84335 + }, + { + "epoch": 3.94, + "learning_rate": 6.941513959211827e-06, + "loss": 0.2067, + "step": 84340 + }, + { + "epoch": 3.94, + "learning_rate": 6.94073017415704e-06, + "loss": 0.3755, + "step": 84345 + }, + { + "epoch": 3.94, + "learning_rate": 6.939946389102254e-06, + "loss": 0.0658, + "step": 84350 + }, + { + "epoch": 3.94, + "learning_rate": 6.939162604047467e-06, + "loss": 0.0162, + "step": 84355 + }, + { + "epoch": 3.94, + "learning_rate": 6.938378818992681e-06, + "loss": 0.0154, + "step": 84360 + }, + { + "epoch": 3.94, + "learning_rate": 6.937595033937894e-06, + "loss": 0.0647, + "step": 84365 + }, + { + "epoch": 3.94, + "learning_rate": 6.936811248883108e-06, + "loss": 0.0973, + "step": 84370 + }, + { + "epoch": 3.94, + "learning_rate": 6.93602746382832e-06, + "loss": 0.073, + "step": 84375 + }, + { + "epoch": 3.94, + "learning_rate": 6.935243678773534e-06, + "loss": 0.2244, + "step": 84380 + }, + { + "epoch": 3.94, + "learning_rate": 6.934459893718747e-06, + "loss": 0.0851, + "step": 84385 + }, + { + "epoch": 3.94, + "learning_rate": 6.933676108663961e-06, + "loss": 0.2985, + "step": 84390 + }, + { + "epoch": 3.94, + "learning_rate": 6.932892323609174e-06, + "loss": 0.2953, + "step": 84395 + }, + { + "epoch": 3.94, + "learning_rate": 6.932108538554388e-06, + "loss": 0.0488, + "step": 84400 + }, + { + "epoch": 3.94, + "learning_rate": 6.931324753499601e-06, + "loss": 0.0575, + "step": 84405 + }, + { + "epoch": 3.94, + "learning_rate": 6.930540968444815e-06, + "loss": 0.0392, + "step": 84410 + }, + { + "epoch": 3.94, + "learning_rate": 6.929757183390028e-06, + "loss": 0.0602, + "step": 84415 + }, + { + "epoch": 3.94, + "learning_rate": 6.928973398335242e-06, + "loss": 0.1123, + "step": 84420 + }, + { + "epoch": 3.94, + "learning_rate": 6.928189613280455e-06, + "loss": 0.0457, + "step": 84425 + }, + { + "epoch": 3.94, + "learning_rate": 6.9274058282256685e-06, + "loss": 0.0504, + "step": 84430 + }, + { + "epoch": 3.94, + "learning_rate": 6.926622043170882e-06, + "loss": 0.104, + "step": 84435 + }, + { + "epoch": 3.94, + "learning_rate": 6.925838258116095e-06, + "loss": 0.1921, + "step": 84440 + }, + { + "epoch": 3.94, + "learning_rate": 6.925054473061308e-06, + "loss": 0.4111, + "step": 84445 + }, + { + "epoch": 3.94, + "learning_rate": 6.924270688006522e-06, + "loss": 0.0485, + "step": 84450 + }, + { + "epoch": 3.94, + "learning_rate": 6.923486902951735e-06, + "loss": 0.0229, + "step": 84455 + }, + { + "epoch": 3.94, + "learning_rate": 6.922703117896949e-06, + "loss": 0.0297, + "step": 84460 + }, + { + "epoch": 3.94, + "learning_rate": 6.921919332842162e-06, + "loss": 0.0345, + "step": 84465 + }, + { + "epoch": 3.94, + "learning_rate": 6.9211355477873756e-06, + "loss": 0.1021, + "step": 84470 + }, + { + "epoch": 3.94, + "learning_rate": 6.920351762732589e-06, + "loss": 0.1215, + "step": 84475 + }, + { + "epoch": 3.94, + "learning_rate": 6.9195679776778025e-06, + "loss": 0.1225, + "step": 84480 + }, + { + "epoch": 3.94, + "learning_rate": 6.918784192623016e-06, + "loss": 0.1005, + "step": 84485 + }, + { + "epoch": 3.94, + "learning_rate": 6.9180004075682295e-06, + "loss": 0.2317, + "step": 84490 + }, + { + "epoch": 3.94, + "learning_rate": 6.9172166225134425e-06, + "loss": 0.3476, + "step": 84495 + }, + { + "epoch": 3.94, + "learning_rate": 6.9164328374586565e-06, + "loss": 0.052, + "step": 84500 + }, + { + "epoch": 3.94, + "learning_rate": 6.915649052403869e-06, + "loss": 0.0212, + "step": 84505 + }, + { + "epoch": 3.94, + "learning_rate": 6.9148652673490826e-06, + "loss": 0.0163, + "step": 84510 + }, + { + "epoch": 3.94, + "learning_rate": 6.914081482294296e-06, + "loss": 0.0298, + "step": 84515 + }, + { + "epoch": 3.94, + "learning_rate": 6.9132976972395095e-06, + "loss": 0.0641, + "step": 84520 + }, + { + "epoch": 3.94, + "learning_rate": 6.912513912184723e-06, + "loss": 0.0839, + "step": 84525 + }, + { + "epoch": 3.94, + "learning_rate": 6.9117301271299365e-06, + "loss": 0.0815, + "step": 84530 + }, + { + "epoch": 3.94, + "learning_rate": 6.9109463420751496e-06, + "loss": 0.1028, + "step": 84535 + }, + { + "epoch": 3.94, + "learning_rate": 6.9101625570203635e-06, + "loss": 0.2123, + "step": 84540 + }, + { + "epoch": 3.94, + "learning_rate": 6.9093787719655765e-06, + "loss": 0.2505, + "step": 84545 + }, + { + "epoch": 3.95, + "learning_rate": 6.9085949869107904e-06, + "loss": 0.0627, + "step": 84550 + }, + { + "epoch": 3.95, + "learning_rate": 6.907811201856004e-06, + "loss": 0.0441, + "step": 84555 + }, + { + "epoch": 3.95, + "learning_rate": 6.907027416801217e-06, + "loss": 0.0371, + "step": 84560 + }, + { + "epoch": 3.95, + "learning_rate": 6.906243631746431e-06, + "loss": 0.0606, + "step": 84565 + }, + { + "epoch": 3.95, + "learning_rate": 6.9054598466916435e-06, + "loss": 0.0634, + "step": 84570 + }, + { + "epoch": 3.95, + "learning_rate": 6.9046760616368566e-06, + "loss": 0.0775, + "step": 84575 + }, + { + "epoch": 3.95, + "learning_rate": 6.9038922765820705e-06, + "loss": 0.067, + "step": 84580 + }, + { + "epoch": 3.95, + "learning_rate": 6.9031084915272835e-06, + "loss": 0.1218, + "step": 84585 + }, + { + "epoch": 3.95, + "learning_rate": 6.9023247064724974e-06, + "loss": 0.2002, + "step": 84590 + }, + { + "epoch": 3.95, + "learning_rate": 6.901540921417711e-06, + "loss": 0.3358, + "step": 84595 + }, + { + "epoch": 3.95, + "learning_rate": 6.900757136362924e-06, + "loss": 0.1052, + "step": 84600 + }, + { + "epoch": 3.95, + "learning_rate": 6.899973351308138e-06, + "loss": 0.0229, + "step": 84605 + }, + { + "epoch": 3.95, + "learning_rate": 6.899189566253351e-06, + "loss": 0.0396, + "step": 84610 + }, + { + "epoch": 3.95, + "learning_rate": 6.898405781198565e-06, + "loss": 0.0759, + "step": 84615 + }, + { + "epoch": 3.95, + "learning_rate": 6.897621996143778e-06, + "loss": 0.0415, + "step": 84620 + }, + { + "epoch": 3.95, + "learning_rate": 6.896838211088992e-06, + "loss": 0.0713, + "step": 84625 + }, + { + "epoch": 3.95, + "learning_rate": 6.896054426034205e-06, + "loss": 0.0515, + "step": 84630 + }, + { + "epoch": 3.95, + "learning_rate": 6.8952706409794175e-06, + "loss": 0.1614, + "step": 84635 + }, + { + "epoch": 3.95, + "learning_rate": 6.894486855924631e-06, + "loss": 0.1235, + "step": 84640 + }, + { + "epoch": 3.95, + "learning_rate": 6.893703070869845e-06, + "loss": 0.2719, + "step": 84645 + }, + { + "epoch": 3.95, + "learning_rate": 6.892919285815058e-06, + "loss": 0.0453, + "step": 84650 + }, + { + "epoch": 3.95, + "learning_rate": 6.892135500760272e-06, + "loss": 0.0359, + "step": 84655 + }, + { + "epoch": 3.95, + "learning_rate": 6.891351715705485e-06, + "loss": 0.0588, + "step": 84660 + }, + { + "epoch": 3.95, + "learning_rate": 6.890567930650699e-06, + "loss": 0.0396, + "step": 84665 + }, + { + "epoch": 3.95, + "learning_rate": 6.889784145595912e-06, + "loss": 0.0536, + "step": 84670 + }, + { + "epoch": 3.95, + "learning_rate": 6.889000360541126e-06, + "loss": 0.0496, + "step": 84675 + }, + { + "epoch": 3.95, + "learning_rate": 6.888216575486339e-06, + "loss": 0.1221, + "step": 84680 + }, + { + "epoch": 3.95, + "learning_rate": 6.887432790431553e-06, + "loss": 0.1342, + "step": 84685 + }, + { + "epoch": 3.95, + "learning_rate": 6.886649005376766e-06, + "loss": 0.2477, + "step": 84690 + }, + { + "epoch": 3.95, + "learning_rate": 6.88586522032198e-06, + "loss": 0.2642, + "step": 84695 + }, + { + "epoch": 3.95, + "learning_rate": 6.885081435267192e-06, + "loss": 0.0678, + "step": 84700 + }, + { + "epoch": 3.95, + "learning_rate": 6.884297650212406e-06, + "loss": 0.0121, + "step": 84705 + }, + { + "epoch": 3.95, + "learning_rate": 6.883513865157619e-06, + "loss": 0.0479, + "step": 84710 + }, + { + "epoch": 3.95, + "learning_rate": 6.882730080102833e-06, + "loss": 0.0348, + "step": 84715 + }, + { + "epoch": 3.95, + "learning_rate": 6.881946295048046e-06, + "loss": 0.0813, + "step": 84720 + }, + { + "epoch": 3.95, + "learning_rate": 6.88116250999326e-06, + "loss": 0.0826, + "step": 84725 + }, + { + "epoch": 3.95, + "learning_rate": 6.880378724938473e-06, + "loss": 0.0925, + "step": 84730 + }, + { + "epoch": 3.95, + "learning_rate": 6.879594939883687e-06, + "loss": 0.1139, + "step": 84735 + }, + { + "epoch": 3.95, + "learning_rate": 6.8788111548289e-06, + "loss": 0.1557, + "step": 84740 + }, + { + "epoch": 3.95, + "learning_rate": 6.878027369774114e-06, + "loss": 0.2379, + "step": 84745 + }, + { + "epoch": 3.95, + "learning_rate": 6.877243584719327e-06, + "loss": 0.1039, + "step": 84750 + }, + { + "epoch": 3.95, + "learning_rate": 6.876459799664541e-06, + "loss": 0.0495, + "step": 84755 + }, + { + "epoch": 3.96, + "learning_rate": 6.875676014609754e-06, + "loss": 0.0733, + "step": 84760 + }, + { + "epoch": 3.96, + "learning_rate": 6.874892229554967e-06, + "loss": 0.1012, + "step": 84765 + }, + { + "epoch": 3.96, + "learning_rate": 6.87410844450018e-06, + "loss": 0.1164, + "step": 84770 + }, + { + "epoch": 3.96, + "learning_rate": 6.873324659445394e-06, + "loss": 0.098, + "step": 84775 + }, + { + "epoch": 3.96, + "learning_rate": 6.872540874390607e-06, + "loss": 0.1502, + "step": 84780 + }, + { + "epoch": 3.96, + "learning_rate": 6.871757089335821e-06, + "loss": 0.159, + "step": 84785 + }, + { + "epoch": 3.96, + "learning_rate": 6.870973304281034e-06, + "loss": 0.1656, + "step": 84790 + }, + { + "epoch": 3.96, + "learning_rate": 6.870189519226248e-06, + "loss": 0.2007, + "step": 84795 + }, + { + "epoch": 3.96, + "learning_rate": 6.869405734171461e-06, + "loss": 0.0573, + "step": 84800 + }, + { + "epoch": 3.96, + "learning_rate": 6.868621949116675e-06, + "loss": 0.0321, + "step": 84805 + }, + { + "epoch": 3.96, + "learning_rate": 6.867838164061888e-06, + "loss": 0.0407, + "step": 84810 + }, + { + "epoch": 3.96, + "learning_rate": 6.867054379007102e-06, + "loss": 0.0898, + "step": 84815 + }, + { + "epoch": 3.96, + "learning_rate": 6.866270593952316e-06, + "loss": 0.0688, + "step": 84820 + }, + { + "epoch": 3.96, + "learning_rate": 6.865486808897529e-06, + "loss": 0.0849, + "step": 84825 + }, + { + "epoch": 3.96, + "learning_rate": 6.864703023842741e-06, + "loss": 0.055, + "step": 84830 + }, + { + "epoch": 3.96, + "learning_rate": 6.863919238787955e-06, + "loss": 0.1568, + "step": 84835 + }, + { + "epoch": 3.96, + "learning_rate": 6.863135453733168e-06, + "loss": 0.1795, + "step": 84840 + }, + { + "epoch": 3.96, + "learning_rate": 6.862351668678382e-06, + "loss": 0.2184, + "step": 84845 + }, + { + "epoch": 3.96, + "learning_rate": 6.861567883623595e-06, + "loss": 0.0367, + "step": 84850 + }, + { + "epoch": 3.96, + "learning_rate": 6.860784098568809e-06, + "loss": 0.0136, + "step": 84855 + }, + { + "epoch": 3.96, + "learning_rate": 6.860000313514022e-06, + "loss": 0.05, + "step": 84860 + }, + { + "epoch": 3.96, + "learning_rate": 6.859216528459236e-06, + "loss": 0.0288, + "step": 84865 + }, + { + "epoch": 3.96, + "learning_rate": 6.85843274340445e-06, + "loss": 0.0863, + "step": 84870 + }, + { + "epoch": 3.96, + "learning_rate": 6.857648958349663e-06, + "loss": 0.0304, + "step": 84875 + }, + { + "epoch": 3.96, + "learning_rate": 6.856865173294877e-06, + "loss": 0.1366, + "step": 84880 + }, + { + "epoch": 3.96, + "learning_rate": 6.85608138824009e-06, + "loss": 0.1891, + "step": 84885 + }, + { + "epoch": 3.96, + "learning_rate": 6.855297603185304e-06, + "loss": 0.2073, + "step": 84890 + }, + { + "epoch": 3.96, + "learning_rate": 6.854513818130516e-06, + "loss": 0.3622, + "step": 84895 + }, + { + "epoch": 3.96, + "learning_rate": 6.853730033075729e-06, + "loss": 0.0635, + "step": 84900 + }, + { + "epoch": 3.96, + "learning_rate": 6.852946248020943e-06, + "loss": 0.005, + "step": 84905 + }, + { + "epoch": 3.96, + "learning_rate": 6.852162462966157e-06, + "loss": 0.0065, + "step": 84910 + }, + { + "epoch": 3.96, + "learning_rate": 6.85137867791137e-06, + "loss": 0.0524, + "step": 84915 + }, + { + "epoch": 3.96, + "learning_rate": 6.850594892856584e-06, + "loss": 0.0971, + "step": 84920 + }, + { + "epoch": 3.96, + "learning_rate": 6.849811107801797e-06, + "loss": 0.0614, + "step": 84925 + }, + { + "epoch": 3.96, + "learning_rate": 6.849027322747011e-06, + "loss": 0.1487, + "step": 84930 + }, + { + "epoch": 3.96, + "learning_rate": 6.848243537692224e-06, + "loss": 0.146, + "step": 84935 + }, + { + "epoch": 3.96, + "learning_rate": 6.847459752637438e-06, + "loss": 0.2943, + "step": 84940 + }, + { + "epoch": 3.96, + "learning_rate": 6.846675967582651e-06, + "loss": 0.3352, + "step": 84945 + }, + { + "epoch": 3.96, + "learning_rate": 6.845892182527865e-06, + "loss": 0.034, + "step": 84950 + }, + { + "epoch": 3.96, + "learning_rate": 6.845108397473078e-06, + "loss": 0.0656, + "step": 84955 + }, + { + "epoch": 3.96, + "learning_rate": 6.844324612418291e-06, + "loss": 0.0307, + "step": 84960 + }, + { + "epoch": 3.96, + "learning_rate": 6.843540827363504e-06, + "loss": 0.0868, + "step": 84965 + }, + { + "epoch": 3.96, + "learning_rate": 6.842757042308718e-06, + "loss": 0.0561, + "step": 84970 + }, + { + "epoch": 3.97, + "learning_rate": 6.841973257253931e-06, + "loss": 0.1256, + "step": 84975 + }, + { + "epoch": 3.97, + "learning_rate": 6.841189472199145e-06, + "loss": 0.0754, + "step": 84980 + }, + { + "epoch": 3.97, + "learning_rate": 6.840405687144358e-06, + "loss": 0.1401, + "step": 84985 + }, + { + "epoch": 3.97, + "learning_rate": 6.839621902089572e-06, + "loss": 0.135, + "step": 84990 + }, + { + "epoch": 3.97, + "learning_rate": 6.838838117034785e-06, + "loss": 0.2946, + "step": 84995 + }, + { + "epoch": 3.97, + "learning_rate": 6.838054331979999e-06, + "loss": 0.0858, + "step": 85000 + }, + { + "epoch": 3.97, + "learning_rate": 6.837270546925212e-06, + "loss": 0.0219, + "step": 85005 + }, + { + "epoch": 3.97, + "learning_rate": 6.836486761870426e-06, + "loss": 0.026, + "step": 85010 + }, + { + "epoch": 3.97, + "learning_rate": 6.835702976815639e-06, + "loss": 0.0867, + "step": 85015 + }, + { + "epoch": 3.97, + "learning_rate": 6.834919191760853e-06, + "loss": 0.045, + "step": 85020 + }, + { + "epoch": 3.97, + "learning_rate": 6.834135406706065e-06, + "loss": 0.0908, + "step": 85025 + }, + { + "epoch": 3.97, + "learning_rate": 6.833351621651279e-06, + "loss": 0.2008, + "step": 85030 + }, + { + "epoch": 3.97, + "learning_rate": 6.832567836596492e-06, + "loss": 0.1749, + "step": 85035 + }, + { + "epoch": 3.97, + "learning_rate": 6.831784051541706e-06, + "loss": 0.1238, + "step": 85040 + }, + { + "epoch": 3.97, + "learning_rate": 6.831000266486919e-06, + "loss": 0.284, + "step": 85045 + }, + { + "epoch": 3.97, + "learning_rate": 6.830216481432133e-06, + "loss": 0.1071, + "step": 85050 + }, + { + "epoch": 3.97, + "learning_rate": 6.829432696377346e-06, + "loss": 0.0221, + "step": 85055 + }, + { + "epoch": 3.97, + "learning_rate": 6.82864891132256e-06, + "loss": 0.0341, + "step": 85060 + }, + { + "epoch": 3.97, + "learning_rate": 6.827865126267773e-06, + "loss": 0.0256, + "step": 85065 + }, + { + "epoch": 3.97, + "learning_rate": 6.827081341212987e-06, + "loss": 0.0422, + "step": 85070 + }, + { + "epoch": 3.97, + "learning_rate": 6.8262975561582e-06, + "loss": 0.0906, + "step": 85075 + }, + { + "epoch": 3.97, + "learning_rate": 6.825513771103414e-06, + "loss": 0.1339, + "step": 85080 + }, + { + "epoch": 3.97, + "learning_rate": 6.8247299860486275e-06, + "loss": 0.1046, + "step": 85085 + }, + { + "epoch": 3.97, + "learning_rate": 6.82394620099384e-06, + "loss": 0.1263, + "step": 85090 + }, + { + "epoch": 3.97, + "learning_rate": 6.823162415939053e-06, + "loss": 0.2744, + "step": 85095 + }, + { + "epoch": 3.97, + "learning_rate": 6.822378630884267e-06, + "loss": 0.0645, + "step": 85100 + }, + { + "epoch": 3.97, + "learning_rate": 6.82159484582948e-06, + "loss": 0.016, + "step": 85105 + }, + { + "epoch": 3.97, + "learning_rate": 6.820811060774694e-06, + "loss": 0.0759, + "step": 85110 + }, + { + "epoch": 3.97, + "learning_rate": 6.820027275719907e-06, + "loss": 0.0534, + "step": 85115 + }, + { + "epoch": 3.97, + "learning_rate": 6.819243490665121e-06, + "loss": 0.0791, + "step": 85120 + }, + { + "epoch": 3.97, + "learning_rate": 6.818459705610334e-06, + "loss": 0.1052, + "step": 85125 + }, + { + "epoch": 3.97, + "learning_rate": 6.817675920555548e-06, + "loss": 0.0868, + "step": 85130 + }, + { + "epoch": 3.97, + "learning_rate": 6.8168921355007615e-06, + "loss": 0.0865, + "step": 85135 + }, + { + "epoch": 3.97, + "learning_rate": 6.8161083504459746e-06, + "loss": 0.2504, + "step": 85140 + }, + { + "epoch": 3.97, + "learning_rate": 6.8153245653911885e-06, + "loss": 0.2567, + "step": 85145 + }, + { + "epoch": 3.97, + "learning_rate": 6.8145407803364015e-06, + "loss": 0.0648, + "step": 85150 + }, + { + "epoch": 3.97, + "learning_rate": 6.813756995281614e-06, + "loss": 0.0277, + "step": 85155 + }, + { + "epoch": 3.97, + "learning_rate": 6.812973210226828e-06, + "loss": 0.0593, + "step": 85160 + }, + { + "epoch": 3.97, + "learning_rate": 6.812189425172041e-06, + "loss": 0.1007, + "step": 85165 + }, + { + "epoch": 3.97, + "learning_rate": 6.811405640117255e-06, + "loss": 0.1356, + "step": 85170 + }, + { + "epoch": 3.97, + "learning_rate": 6.8106218550624685e-06, + "loss": 0.0813, + "step": 85175 + }, + { + "epoch": 3.97, + "learning_rate": 6.8098380700076816e-06, + "loss": 0.1681, + "step": 85180 + }, + { + "epoch": 3.97, + "learning_rate": 6.8090542849528955e-06, + "loss": 0.1017, + "step": 85185 + }, + { + "epoch": 3.98, + "learning_rate": 6.8082704998981085e-06, + "loss": 0.1854, + "step": 85190 + }, + { + "epoch": 3.98, + "learning_rate": 6.8074867148433224e-06, + "loss": 0.3727, + "step": 85195 + }, + { + "epoch": 3.98, + "learning_rate": 6.8067029297885355e-06, + "loss": 0.0719, + "step": 85200 + }, + { + "epoch": 3.98, + "learning_rate": 6.805919144733749e-06, + "loss": 0.0192, + "step": 85205 + }, + { + "epoch": 3.98, + "learning_rate": 6.8051353596789625e-06, + "loss": 0.0716, + "step": 85210 + }, + { + "epoch": 3.98, + "learning_rate": 6.804351574624176e-06, + "loss": 0.0414, + "step": 85215 + }, + { + "epoch": 3.98, + "learning_rate": 6.803567789569389e-06, + "loss": 0.0351, + "step": 85220 + }, + { + "epoch": 3.98, + "learning_rate": 6.8027840045146025e-06, + "loss": 0.075, + "step": 85225 + }, + { + "epoch": 3.98, + "learning_rate": 6.8020002194598155e-06, + "loss": 0.2622, + "step": 85230 + }, + { + "epoch": 3.98, + "learning_rate": 6.8012164344050294e-06, + "loss": 0.0557, + "step": 85235 + }, + { + "epoch": 3.98, + "learning_rate": 6.8004326493502425e-06, + "loss": 0.1608, + "step": 85240 + }, + { + "epoch": 3.98, + "learning_rate": 6.799648864295456e-06, + "loss": 0.1829, + "step": 85245 + }, + { + "epoch": 3.98, + "learning_rate": 6.7988650792406695e-06, + "loss": 0.1448, + "step": 85250 + }, + { + "epoch": 3.98, + "learning_rate": 6.798081294185883e-06, + "loss": 0.0546, + "step": 85255 + }, + { + "epoch": 3.98, + "learning_rate": 6.7972975091310964e-06, + "loss": 0.032, + "step": 85260 + }, + { + "epoch": 3.98, + "learning_rate": 6.79651372407631e-06, + "loss": 0.0511, + "step": 85265 + }, + { + "epoch": 3.98, + "learning_rate": 6.795729939021523e-06, + "loss": 0.0677, + "step": 85270 + }, + { + "epoch": 3.98, + "learning_rate": 6.794946153966737e-06, + "loss": 0.0947, + "step": 85275 + }, + { + "epoch": 3.98, + "learning_rate": 6.79416236891195e-06, + "loss": 0.1098, + "step": 85280 + }, + { + "epoch": 3.98, + "learning_rate": 6.7933785838571634e-06, + "loss": 0.1172, + "step": 85285 + }, + { + "epoch": 3.98, + "learning_rate": 6.7925947988023765e-06, + "loss": 0.1582, + "step": 85290 + }, + { + "epoch": 3.98, + "learning_rate": 6.79181101374759e-06, + "loss": 0.2443, + "step": 85295 + }, + { + "epoch": 3.98, + "learning_rate": 6.7910272286928034e-06, + "loss": 0.0805, + "step": 85300 + }, + { + "epoch": 3.98, + "learning_rate": 6.790243443638017e-06, + "loss": 0.0288, + "step": 85305 + }, + { + "epoch": 3.98, + "learning_rate": 6.78945965858323e-06, + "loss": 0.083, + "step": 85310 + }, + { + "epoch": 3.98, + "learning_rate": 6.788675873528444e-06, + "loss": 0.0465, + "step": 85315 + }, + { + "epoch": 3.98, + "learning_rate": 6.787892088473657e-06, + "loss": 0.048, + "step": 85320 + }, + { + "epoch": 3.98, + "learning_rate": 6.787108303418871e-06, + "loss": 0.0728, + "step": 85325 + }, + { + "epoch": 3.98, + "learning_rate": 6.786324518364084e-06, + "loss": 0.1453, + "step": 85330 + }, + { + "epoch": 3.98, + "learning_rate": 6.785540733309298e-06, + "loss": 0.2035, + "step": 85335 + }, + { + "epoch": 3.98, + "learning_rate": 6.784756948254511e-06, + "loss": 0.2733, + "step": 85340 + }, + { + "epoch": 3.98, + "learning_rate": 6.783973163199725e-06, + "loss": 0.1481, + "step": 85345 + }, + { + "epoch": 3.98, + "learning_rate": 6.783189378144937e-06, + "loss": 0.0662, + "step": 85350 + }, + { + "epoch": 3.98, + "learning_rate": 6.782405593090151e-06, + "loss": 0.0241, + "step": 85355 + }, + { + "epoch": 3.98, + "learning_rate": 6.781621808035364e-06, + "loss": 0.0078, + "step": 85360 + }, + { + "epoch": 3.98, + "learning_rate": 6.780838022980578e-06, + "loss": 0.0335, + "step": 85365 + }, + { + "epoch": 3.98, + "learning_rate": 6.780054237925791e-06, + "loss": 0.0504, + "step": 85370 + }, + { + "epoch": 3.98, + "learning_rate": 6.779270452871005e-06, + "loss": 0.0439, + "step": 85375 + }, + { + "epoch": 3.98, + "learning_rate": 6.778486667816218e-06, + "loss": 0.0861, + "step": 85380 + }, + { + "epoch": 3.98, + "learning_rate": 6.777702882761432e-06, + "loss": 0.1036, + "step": 85385 + }, + { + "epoch": 3.98, + "learning_rate": 6.776919097706645e-06, + "loss": 0.0982, + "step": 85390 + }, + { + "epoch": 3.98, + "learning_rate": 6.776135312651859e-06, + "loss": 0.4021, + "step": 85395 + }, + { + "epoch": 3.98, + "learning_rate": 6.775351527597073e-06, + "loss": 0.0563, + "step": 85400 + }, + { + "epoch": 3.99, + "learning_rate": 6.774567742542286e-06, + "loss": 0.0224, + "step": 85405 + }, + { + "epoch": 3.99, + "learning_rate": 6.7737839574875e-06, + "loss": 0.0658, + "step": 85410 + }, + { + "epoch": 3.99, + "learning_rate": 6.773000172432712e-06, + "loss": 0.0374, + "step": 85415 + }, + { + "epoch": 3.99, + "learning_rate": 6.772216387377925e-06, + "loss": 0.0689, + "step": 85420 + }, + { + "epoch": 3.99, + "learning_rate": 6.771432602323139e-06, + "loss": 0.0196, + "step": 85425 + }, + { + "epoch": 3.99, + "learning_rate": 6.770648817268352e-06, + "loss": 0.0726, + "step": 85430 + }, + { + "epoch": 3.99, + "learning_rate": 6.769865032213566e-06, + "loss": 0.1081, + "step": 85435 + }, + { + "epoch": 3.99, + "learning_rate": 6.769081247158779e-06, + "loss": 0.23, + "step": 85440 + }, + { + "epoch": 3.99, + "learning_rate": 6.768297462103993e-06, + "loss": 0.1952, + "step": 85445 + }, + { + "epoch": 3.99, + "learning_rate": 6.767513677049207e-06, + "loss": 0.0612, + "step": 85450 + }, + { + "epoch": 3.99, + "learning_rate": 6.76672989199442e-06, + "loss": 0.0818, + "step": 85455 + }, + { + "epoch": 3.99, + "learning_rate": 6.765946106939634e-06, + "loss": 0.0122, + "step": 85460 + }, + { + "epoch": 3.99, + "learning_rate": 6.765162321884847e-06, + "loss": 0.0459, + "step": 85465 + }, + { + "epoch": 3.99, + "learning_rate": 6.764378536830061e-06, + "loss": 0.0668, + "step": 85470 + }, + { + "epoch": 3.99, + "learning_rate": 6.763594751775274e-06, + "loss": 0.0418, + "step": 85475 + }, + { + "epoch": 3.99, + "learning_rate": 6.762810966720486e-06, + "loss": 0.0965, + "step": 85480 + }, + { + "epoch": 3.99, + "learning_rate": 6.7620271816657e-06, + "loss": 0.1879, + "step": 85485 + }, + { + "epoch": 3.99, + "learning_rate": 6.761243396610914e-06, + "loss": 0.2135, + "step": 85490 + }, + { + "epoch": 3.99, + "learning_rate": 6.760459611556127e-06, + "loss": 0.1978, + "step": 85495 + }, + { + "epoch": 3.99, + "learning_rate": 6.759675826501341e-06, + "loss": 0.111, + "step": 85500 + }, + { + "epoch": 3.99, + "learning_rate": 6.758892041446554e-06, + "loss": 0.0223, + "step": 85505 + }, + { + "epoch": 3.99, + "learning_rate": 6.758108256391768e-06, + "loss": 0.0205, + "step": 85510 + }, + { + "epoch": 3.99, + "learning_rate": 6.757324471336981e-06, + "loss": 0.0967, + "step": 85515 + }, + { + "epoch": 3.99, + "learning_rate": 6.756540686282195e-06, + "loss": 0.0362, + "step": 85520 + }, + { + "epoch": 3.99, + "learning_rate": 6.755756901227408e-06, + "loss": 0.1377, + "step": 85525 + }, + { + "epoch": 3.99, + "learning_rate": 6.754973116172622e-06, + "loss": 0.0516, + "step": 85530 + }, + { + "epoch": 3.99, + "learning_rate": 6.754189331117835e-06, + "loss": 0.1818, + "step": 85535 + }, + { + "epoch": 3.99, + "learning_rate": 6.753405546063049e-06, + "loss": 0.1526, + "step": 85540 + }, + { + "epoch": 3.99, + "learning_rate": 6.752621761008261e-06, + "loss": 0.3196, + "step": 85545 + }, + { + "epoch": 3.99, + "learning_rate": 6.751837975953475e-06, + "loss": 0.0523, + "step": 85550 + }, + { + "epoch": 3.99, + "learning_rate": 6.751054190898688e-06, + "loss": 0.0336, + "step": 85555 + }, + { + "epoch": 3.99, + "learning_rate": 6.750270405843902e-06, + "loss": 0.0413, + "step": 85560 + }, + { + "epoch": 3.99, + "learning_rate": 6.749486620789115e-06, + "loss": 0.1544, + "step": 85565 + }, + { + "epoch": 3.99, + "learning_rate": 6.748702835734329e-06, + "loss": 0.0657, + "step": 85570 + }, + { + "epoch": 3.99, + "learning_rate": 6.747919050679542e-06, + "loss": 0.0705, + "step": 85575 + }, + { + "epoch": 3.99, + "learning_rate": 6.747135265624756e-06, + "loss": 0.1632, + "step": 85580 + }, + { + "epoch": 3.99, + "learning_rate": 6.746351480569969e-06, + "loss": 0.1428, + "step": 85585 + }, + { + "epoch": 3.99, + "learning_rate": 6.745567695515183e-06, + "loss": 0.1539, + "step": 85590 + }, + { + "epoch": 3.99, + "learning_rate": 6.744783910460396e-06, + "loss": 0.2851, + "step": 85595 + }, + { + "epoch": 3.99, + "learning_rate": 6.74400012540561e-06, + "loss": 0.1251, + "step": 85600 + }, + { + "epoch": 3.99, + "learning_rate": 6.743216340350823e-06, + "loss": 0.0252, + "step": 85605 + }, + { + "epoch": 3.99, + "learning_rate": 6.742432555296036e-06, + "loss": 0.0494, + "step": 85610 + }, + { + "epoch": 3.99, + "learning_rate": 6.741648770241249e-06, + "loss": 0.0426, + "step": 85615 + }, + { + "epoch": 4.0, + "learning_rate": 6.740864985186463e-06, + "loss": 0.1323, + "step": 85620 + }, + { + "epoch": 4.0, + "learning_rate": 6.740081200131676e-06, + "loss": 0.1494, + "step": 85625 + }, + { + "epoch": 4.0, + "learning_rate": 6.73929741507689e-06, + "loss": 0.1759, + "step": 85630 + }, + { + "epoch": 4.0, + "learning_rate": 6.738513630022103e-06, + "loss": 0.1965, + "step": 85635 + }, + { + "epoch": 4.0, + "learning_rate": 6.737886601978274e-06, + "loss": 0.2621, + "step": 85640 + }, + { + "epoch": 4.0, + "learning_rate": 6.737102816923487e-06, + "loss": 0.3035, + "step": 85645 + }, + { + "epoch": 4.0, + "learning_rate": 6.736319031868701e-06, + "loss": 0.0709, + "step": 85650 + }, + { + "epoch": 4.0, + "learning_rate": 6.735535246813914e-06, + "loss": 0.0263, + "step": 85655 + }, + { + "epoch": 4.0, + "learning_rate": 6.734751461759128e-06, + "loss": 0.0484, + "step": 85660 + }, + { + "epoch": 4.0, + "learning_rate": 6.733967676704341e-06, + "loss": 0.0793, + "step": 85665 + }, + { + "epoch": 4.0, + "learning_rate": 6.733183891649555e-06, + "loss": 0.0728, + "step": 85670 + }, + { + "epoch": 4.0, + "learning_rate": 6.732400106594767e-06, + "loss": 0.0888, + "step": 85675 + }, + { + "epoch": 4.0, + "learning_rate": 6.731616321539981e-06, + "loss": 0.0878, + "step": 85680 + }, + { + "epoch": 4.0, + "learning_rate": 6.730832536485194e-06, + "loss": 0.2161, + "step": 85685 + }, + { + "epoch": 4.0, + "learning_rate": 6.730048751430408e-06, + "loss": 0.1678, + "step": 85690 + }, + { + "epoch": 4.0, + "learning_rate": 6.729264966375621e-06, + "loss": 0.2515, + "step": 85695 + }, + { + "epoch": 4.0, + "learning_rate": 6.728481181320835e-06, + "loss": 0.0519, + "step": 85700 + }, + { + "epoch": 4.0, + "learning_rate": 6.727697396266048e-06, + "loss": 0.035, + "step": 85705 + }, + { + "epoch": 4.0, + "learning_rate": 6.726913611211262e-06, + "loss": 0.0771, + "step": 85710 + }, + { + "epoch": 4.0, + "learning_rate": 6.726129826156475e-06, + "loss": 0.0916, + "step": 85715 + }, + { + "epoch": 4.0, + "learning_rate": 6.725346041101689e-06, + "loss": 0.163, + "step": 85720 + }, + { + "epoch": 4.0, + "eval_cer": 0.01181715224228095, + "eval_loss": 0.08370912075042725, + "eval_runtime": 455.3512, + "eval_samples_per_second": 41.836, + "eval_steps_per_second": 5.231, + "eval_wer": 0.10025906735751296, + "step": 85724 + } + ], + "max_steps": 128586, + "num_train_epochs": 6, + "total_flos": 1.028911335995475e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-85724/training_args.bin b/checkpoint-85724/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..36cc7cb27194c4763ad57ba9f820c49b1d0a2bcf --- /dev/null +++ b/checkpoint-85724/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35a655ca2fa82ac80a7162e5149caad102a189b97deb1fba1f94f21e15657a07 +size 3055 diff --git a/pytorch_model.bin b/pytorch_model.bin index cefe44052a57697173eb54797a9c5ea3254b5479..dd1c66ac35d6cbed02c4733ac54a2af2daa159b9 100644 --- a/pytorch_model.bin +++ b/pytorch_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d99669a317a14e78185e29fd25de6d26e7c9f26656b08e0603047f2218cea722 +oid sha256:abc8dd19eb7009f20d4447482fcddd4c2b4d655e4137b8cc06205b09e62140ae size 377656855 diff --git a/runs/Apr30_09-11-30_4687ea2a1995/1651311629.182532/events.out.tfevents.1651311629.4687ea2a1995.99.1 b/runs/Apr30_09-11-30_4687ea2a1995/1651311629.182532/events.out.tfevents.1651311629.4687ea2a1995.99.1 new file mode 100644 index 0000000000000000000000000000000000000000..45d28887a135d4d298d49d609addfcbe98343210 --- /dev/null +++ b/runs/Apr30_09-11-30_4687ea2a1995/1651311629.182532/events.out.tfevents.1651311629.4687ea2a1995.99.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:986c580fabc83f80575c6c3e5baf65700b7e34a4d4444190791915a50bc93b98 +size 4878 diff --git a/runs/Apr30_09-11-30_4687ea2a1995/events.out.tfevents.1651311629.4687ea2a1995.99.0 b/runs/Apr30_09-11-30_4687ea2a1995/events.out.tfevents.1651311629.4687ea2a1995.99.0 new file mode 100644 index 0000000000000000000000000000000000000000..e72ded76c5152b5d24a87bd32e97cb6aaaa7f19b --- /dev/null +++ b/runs/Apr30_09-11-30_4687ea2a1995/events.out.tfevents.1651311629.4687ea2a1995.99.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcc255762788b687c2714c5614c8b49285fee13c2fb81355d1cd64a537941ebb +size 4112208